[deal.ii] 01/03: Imported Upstream version 8.4.1

Sun Jul 17 17:46:52 UTC 2016

This is an automated email from the git hooks/post-receive script.

tamiko-guest pushed a commit to branch master
in repository deal.ii.

commit 25012ffe62cd71020baef2089960115981e4de2b
Author: Matthias Maier <tamiko+DEBIAN at kyomu.43-1.org>
Date:   Fri Jul 15 12:52:12 2016 +0000

    Imported Upstream version 8.4.1
---
 AUTHORS                                            |   160 +
 CMakeLists.txt                                     |   163 +
 CTestConfig.cmake                                  |    50 +
 LICENSE                                            |   523 +
 README.md                                          |    56 +
 VERSION                                            |     1 +
 cmake/README.md                                    |    47 +
 cmake/checks/check_01_compiler_features.cmake      |   382 +
 cmake/checks/check_01_cpu_features.cmake           |   257 +
 cmake/checks/check_01_cxx_features.cmake           |   550 +
 cmake/checks/check_02_system_features.cmake        |   146 +
 cmake/checks/check_03_compiler_bugs.cmake          |   444 +
 cmake/checks/check_03_generator_bugs.cmake         |    33 +
 cmake/config/CMakeLists.txt                        |   242 +
 cmake/config/Config.cmake.in                       |   189 +
 cmake/config/ConfigVersion.cmake.in                |    17 +
 cmake/config/Make.global_options.in                |   136 +
 cmake/config/template-arguments.in                 |   108 +
 cmake/configure/configure_1_bzip2.cmake            |    20 +
 cmake/configure/configure_1_lapack.cmake           |    83 +
 cmake/configure/configure_1_mpi.cmake              |    55 +
 cmake/configure/configure_1_threads.cmake          |   214 +
 cmake/configure/configure_1_zlib.cmake             |    20 +
 cmake/configure/configure_2_metis.cmake            |    43 +
 cmake/configure/configure_2_trilinos.cmake         |   220 +
 cmake/configure/configure_2_umfpack.cmake          |    46 +
 cmake/configure/configure_3_petsc.cmake            |   129 +
 cmake/configure/configure_arpack.cmake             |    23 +
 cmake/configure/configure_boost.cmake              |    87 +
 cmake/configure/configure_hdf5.cmake               |    48 +
 cmake/configure/configure_muparser.cmake           |    25 +
 cmake/configure/configure_netcdf.cmake             |    20 +
 cmake/configure/configure_opencascade.cmake        |    20 +
 cmake/configure/configure_p4est.cmake              |    63 +
 cmake/configure/configure_slepc.cmake              |    80 +
 cmake/cpack-mac-bundle/Info.plist.in               |    30 +
 cmake/cpack-mac-bundle/dealii-icon.icns            |   Bin 0 -> 3000 bytes
 cmake/cpack-mac-bundle/dealii-terminal.in          |    67 +
 cmake/cpack-mac-bundle/dealii.conf.in              |    88 +
 cmake/cpack-mac-bundle/mac_startup_script.sh.in    |    26 +
 cmake/macros/macro_add_flags.cmake                 |    31 +
 cmake/macros/macro_check_cxx_compiler_bug.cmake    |    46 +
 cmake/macros/macro_check_mpi_interface.cmake       |    70 +
 cmake/macros/macro_clear_cmake_required.cmake      |    28 +
 cmake/macros/macro_configure_feature.cmake         |   297 +
 cmake/macros/macro_deal_ii_add_definitions.cmake   |    33 +
 cmake/macros/macro_deal_ii_add_dependencies.cmake  |    31 +
 cmake/macros/macro_deal_ii_add_library.cmake       |    52 +
 cmake/macros/macro_deal_ii_add_test.cmake          |   324 +
 cmake/macros/macro_deal_ii_find_file.cmake         |    30 +
 cmake/macros/macro_deal_ii_find_library.cmake      |    30 +
 cmake/macros/macro_deal_ii_find_path.cmake         |    30 +
 ...macro_deal_ii_initialize_cached_variables.cmake |    95 +
 .../macro_deal_ii_insource_setup_target.cmake      |    50 +
 cmake/macros/macro_deal_ii_invoke_autopilot.cmake  |   249 +
 cmake/macros/macro_deal_ii_package_handle.cmake    |   156 +
 cmake/macros/macro_deal_ii_pickup_tests.cmake      |   237 +
 .../macro_deal_ii_query_git_information.cmake      |   113 +
 cmake/macros/macro_deal_ii_setup_target.cmake      |   124 +
 cmake/macros/macro_enable_if_links.cmake           |    45 +
 cmake/macros/macro_enable_if_supported.cmake       |    65 +
 cmake/macros/macro_enable_language_optional.cmake  |    77 +
 cmake/macros/macro_expand_instantiations.cmake     |    78 +
 cmake/macros/macro_filter_system_libraries.cmake   |    42 +
 cmake/macros/macro_find_package.cmake              |    33 +
 cmake/macros/macro_find_system_library.cmake       |    49 +
 cmake/macros/macro_item_matches.cmake              |    34 +
 cmake/macros/macro_purge_feature.cmake             |    47 +
 cmake/macros/macro_push_cmake_required.cmake       |    30 +
 cmake/macros/macro_register_feature.cmake          |    72 +
 cmake/macros/macro_remove_duplicates.cmake         |    34 +
 cmake/macros/macro_replace_flag.cmake              |    36 +
 cmake/macros/macro_reset_cmake_required.cmake      |    29 +
 cmake/macros/macro_set_if_empty.cmake              |    23 +
 cmake/macros/macro_strip_flag.cmake                |    31 +
 cmake/macros/macro_switch_library_preference.cmake |    32 +
 cmake/macros/macro_to_string.cmake                 |    30 +
 cmake/macros/macro_to_string_and_add_prefix.cmake  |    31 +
 cmake/macros/macro_verbose_include.cmake           |    20 +
 cmake/modules/FindARPACK.cmake                     |    87 +
 cmake/modules/FindBOOST.cmake                      |    91 +
 cmake/modules/FindBZIP2.cmake                      |    46 +
 cmake/modules/FindHDF5.cmake                       |    72 +
 cmake/modules/FindLAPACK.cmake                     |    91 +
 cmake/modules/FindMETIS.cmake                      |   109 +
 cmake/modules/FindMPI.cmake                        |   198 +
 cmake/modules/FindMUPARSER.cmake                   |    71 +
 cmake/modules/FindNETCDF.cmake                     |    54 +
 cmake/modules/FindOPENCASCADE.cmake                |    78 +
 cmake/modules/FindP4EST.cmake                      |   170 +
 cmake/modules/FindPETSC.cmake                      |   235 +
 cmake/modules/FindSLEPC.cmake                      |   100 +
 cmake/modules/FindTBB.cmake                        |    86 +
 cmake/modules/FindTRILINOS.cmake                   |   236 +
 cmake/modules/FindUMFPACK.cmake                    |   172 +
 cmake/modules/FindZLIB.cmake                       |    43 +
 cmake/scripts/CMakeLists.txt                       |    52 +
 cmake/scripts/expand_instantiations.cc             |   532 +
 cmake/scripts/normalize.pl                         |   103 +
 cmake/scripts/run_test.cmake                       |   143 +
 cmake/scripts/run_test.sh                          |   151 +
 cmake/setup_cached_variables.cmake                 |   395 +
 cmake/setup_compiler_flags.cmake                   |   153 +
 cmake/setup_compiler_flags_gnu.cmake               |   189 +
 cmake/setup_compiler_flags_intel.cmake             |   197 +
 cmake/setup_compiler_flags_msvc.cmake              |   103 +
 cmake/setup_cpack.cmake                            |    87 +
 cmake/setup_custom_targets.cmake                   |   136 +
 cmake/setup_deal_ii.cmake                          |   164 +
 cmake/setup_external_macros.cmake                  |    27 +
 cmake/setup_finalize.cmake                         |    85 +
 cmake/setup_write_config.cmake                     |   262 +
 contrib/CMakeLists.txt                             |    21 +
 contrib/README.md                                  |    21 +
 contrib/header-templates/header-template           |    15 +
 contrib/header-templates/header-template.cmake     |    14 +
 contrib/parameter_gui/CMakeLists.txt               |    69 +
 contrib/parameter_gui/README                       |     5 +
 contrib/parameter_gui/application.qrc              |     7 +
 contrib/parameter_gui/browse_lineedit.cpp          |   126 +
 contrib/parameter_gui/browse_lineedit.h            |   121 +
 contrib/parameter_gui/dealii_parameter_gui.pro     |    25 +
 contrib/parameter_gui/images/logo_dealii.png       |   Bin 0 -> 34400 bytes
 contrib/parameter_gui/images/logo_dealii_64.png    |   Bin 0 -> 1374 bytes
 contrib/parameter_gui/images/logo_dealii_gui.png   |   Bin 0 -> 24271 bytes
 .../parameter_gui/images/logo_dealii_gui_128.png   |   Bin 0 -> 3481 bytes
 contrib/parameter_gui/info_message.cpp             |   104 +
 contrib/parameter_gui/info_message.h               |   113 +
 contrib/parameter_gui/lgpl-2.1.txt                 |   502 +
 contrib/parameter_gui/main.cpp                     |    71 +
 contrib/parameter_gui/mainwindow.cpp               |   385 +
 contrib/parameter_gui/mainwindow.h                 |   183 +
 contrib/parameter_gui/parameter_delegate.cpp       |   329 +
 contrib/parameter_gui/parameter_delegate.h         |   122 +
 contrib/parameter_gui/parameters.xml               |     2 +
 contrib/parameter_gui/xml_parameter_reader.cpp     |   368 +
 contrib/parameter_gui/xml_parameter_reader.h       |   128 +
 contrib/parameter_gui/xml_parameter_writer.cpp     |   103 +
 contrib/parameter_gui/xml_parameter_writer.h       |   100 +
 contrib/utilities/astyle.rc                        |    33 +
 contrib/utilities/check_indentation.sh             |    11 +
 contrib/utilities/dotgdbinit.py                    |   210 +
 contrib/utilities/embedding.cc                     |   227 +
 contrib/utilities/fe_table.pl                      |    84 +
 contrib/utilities/gridio.cc                        |    80 +
 contrib/utilities/indent                           |    48 +
 contrib/utilities/interpolation.cc                 |   344 +
 contrib/utilities/lagrange_basis                   |    80 +
 contrib/utilities/makeofflinedoc.sh                |    42 +
 contrib/utilities/setup_astyle.sh                  |    20 +
 contrib/utilities/simplify.pl                      |    89 +
 contrib/utilities/update-copyright                 |    67 +
 contrib/utilities/wrapcomments.py                  |   555 +
 doc/CMakeLists.txt                                 |    73 +
 doc/README.md                                      |    10 +
 doc/deal.ico                                       |   Bin 0 -> 35402 bytes
 doc/developers/Toolchain-x86_64-w64-mingw32.sample |    16 +
 doc/developers/Toolchain.sample                    |    14 +
 doc/developers/cmake-internals.html                |   714 +
 doc/developers/porting.html                        |   166 +
 doc/developers/testsuite.html                      |   863 ++
 doc/developers/writing-documentation.html          |   464 +
 doc/documentation.html                             |    74 +
 doc/doxygen/CMakeLists.txt                         |   229 +
 doc/doxygen/DoxygenLayout.xml                      |   197 +
 doc/doxygen/code-gallery/CMakeLists.txt            |   149 +
 doc/doxygen/code-gallery/code-gallery.h.in         |    40 +
 doc/doxygen/code-gallery/no-code-gallery.h         |    27 +
 doc/doxygen/deal.II/images/step-1.grid-1.png       |   Bin 0 -> 9111 bytes
 doc/doxygen/deal.II/images/step-1.grid-2.png       |   Bin 0 -> 29392 bytes
 doc/doxygen/deal.II/images/step-1.grid-2r2.png     |   Bin 0 -> 20513 bytes
 .../images/step-10.ball_mapping_q1_ref0.png        |   Bin 0 -> 13197 bytes
 .../images/step-10.ball_mapping_q1_ref1.png        |   Bin 0 -> 28324 bytes
 .../images/step-10.ball_mapping_q2_ref0.png        |   Bin 0 -> 27199 bytes
 .../images/step-10.ball_mapping_q2_ref1.png        |   Bin 0 -> 27650 bytes
 .../images/step-10.ball_mapping_q3_ref0.png        |   Bin 0 -> 24393 bytes
 .../images/step-10.ball_mapping_q3_ref1.png        |   Bin 0 -> 27619 bytes
 doc/doxygen/deal.II/images/step-10.quarter-q1.png  |   Bin 0 -> 16587 bytes
 doc/doxygen/deal.II/images/step-10.quarter-q2.png  |   Bin 0 -> 18701 bytes
 doc/doxygen/deal.II/images/step-10.quarter-q3.png  |   Bin 0 -> 18122 bytes
 doc/doxygen/deal.II/images/step-12.3d-solution.png |   Bin 0 -> 68881 bytes
 doc/doxygen/deal.II/images/step-12.cg.sol-0.png    |   Bin 0 -> 20332 bytes
 doc/doxygen/deal.II/images/step-12.cg.sol-1.png    |   Bin 0 -> 21664 bytes
 doc/doxygen/deal.II/images/step-12.cg.sol-2.png    |   Bin 0 -> 21920 bytes
 doc/doxygen/deal.II/images/step-12.cg.sol-3.png    |   Bin 0 -> 21668 bytes
 doc/doxygen/deal.II/images/step-12.cg.sol-4.png    |   Bin 0 -> 27187 bytes
 doc/doxygen/deal.II/images/step-12.cg.sol-5.png    |   Bin 0 -> 23387 bytes
 doc/doxygen/deal.II/images/step-12.grid-5.png      |   Bin 0 -> 2462 bytes
 doc/doxygen/deal.II/images/step-12.sol-0.png       |   Bin 0 -> 5147 bytes
 doc/doxygen/deal.II/images/step-12.sol-2.png       |   Bin 0 -> 6886 bytes
 doc/doxygen/deal.II/images/step-12.sol-5-3d.png    |   Bin 0 -> 50842 bytes
 doc/doxygen/deal.II/images/step-12.sol-5.png       |   Bin 0 -> 7730 bytes
 doc/doxygen/deal.II/images/step-13.error.png       |   Bin 0 -> 5967 bytes
 .../deal.II/images/step-13.grid-kelly-8.png        |   Bin 0 -> 3297 bytes
 .../deal.II/images/step-13.solution-kelly-0.png    |   Bin 0 -> 44786 bytes
 .../deal.II/images/step-13.solution-kelly-1.png    |   Bin 0 -> 54252 bytes
 .../deal.II/images/step-13.solution-kelly-2.png    |   Bin 0 -> 57033 bytes
 .../deal.II/images/step-13.solution-kelly-3.png    |   Bin 0 -> 59988 bytes
 .../deal.II/images/step-13.solution-kelly-4.png    |   Bin 0 -> 64059 bytes
 .../deal.II/images/step-13.solution-kelly-5.png    |   Bin 0 -> 67991 bytes
 .../deal.II/images/step-13.solution-kelly-6.png    |   Bin 0 -> 71160 bytes
 .../deal.II/images/step-13.solution-kelly-7.png    |   Bin 0 -> 72836 bytes
 .../deal.II/images/step-13.solution-kelly-8.png    |   Bin 0 -> 74323 bytes
 .../deal.II/images/step-13.solution-kelly-9.png    |   Bin 0 -> 74784 bytes
 .../step-14.point-derivative.error-estimation.png  |   Bin 0 -> 3796 bytes
 .../images/step-14.point-derivative.error.png      |   Bin 0 -> 4334 bytes
 .../images/step-14.point-derivative.grid-0.png     |   Bin 0 -> 1068 bytes
 .../images/step-14.point-derivative.grid-5.png     |   Bin 0 -> 2771 bytes
 .../images/step-14.point-derivative.grid-6.png     |   Bin 0 -> 3991 bytes
 .../images/step-14.point-derivative.grid-7.png     |   Bin 0 -> 5718 bytes
 .../images/step-14.point-derivative.grid-8.png     |   Bin 0 -> 7737 bytes
 .../images/step-14.point-derivative.grid-9.png     |   Bin 0 -> 10121 bytes
 .../step-14.point-derivative.solution-5-dual.png   |   Bin 0 -> 8089 bytes
 .../step-14.point-value.error-comparison.png       |   Bin 0 -> 17080 bytes
 .../step-14.point-value.error-estimation.png       |   Bin 0 -> 5544 bytes
 .../deal.II/images/step-14.point-value.error.png   |   Bin 0 -> 6266 bytes
 .../deal.II/images/step-14.point-value.grid-0.png  |   Bin 0 -> 1068 bytes
 .../deal.II/images/step-14.point-value.grid-2.png  |   Bin 0 -> 1363 bytes
 .../deal.II/images/step-14.point-value.grid-4.png  |   Bin 0 -> 2240 bytes
 .../deal.II/images/step-14.point-value.grid-5.png  |   Bin 0 -> 3158 bytes
 .../deal.II/images/step-14.point-value.grid-7.png  |   Bin 0 -> 6213 bytes
 .../deal.II/images/step-14.point-value.grid-8.png  |   Bin 0 -> 9589 bytes
 .../images/step-14.point-value.solution-5-dual.png |   Bin 0 -> 7923 bytes
 .../images/step-14.point-value.solution-5.png      |   Bin 0 -> 10146 bytes
 .../images/step-14.step-13.error-comparison.png    |   Bin 0 -> 14968 bytes
 .../deal.II/images/step-14.step-13.grid-10.png     |   Bin 0 -> 17122 bytes
 .../deal.II/images/step-14.step-13.grid-9.png      |   Bin 0 -> 11097 bytes
 .../deal.II/images/step-14.step-13.solution-7.png  |   Bin 0 -> 10806 bytes
 doc/doxygen/deal.II/images/step-15.grid.png        |   Bin 0 -> 189872 bytes
 doc/doxygen/deal.II/images/step-15.solution-0.png  |   Bin 0 -> 83447 bytes
 doc/doxygen/deal.II/images/step-15.solution-1.png  |   Bin 0 -> 85272 bytes
 doc/doxygen/deal.II/images/step-15.solution-2.png  |   Bin 0 -> 86436 bytes
 doc/doxygen/deal.II/images/step-15.solution-3.png  |   Bin 0 -> 86298 bytes
 doc/doxygen/deal.II/images/step-15.solution-4.png  |   Bin 0 -> 85938 bytes
 doc/doxygen/deal.II/images/step-15.solution-5.png  |   Bin 0 -> 85071 bytes
 doc/doxygen/deal.II/images/step-15.solution-6.png  |   Bin 0 -> 84284 bytes
 doc/doxygen/deal.II/images/step-15.solution-7.png  |   Bin 0 -> 83812 bytes
 doc/doxygen/deal.II/images/step-16.solution.png    |   Bin 0 -> 54411 bytes
 doc/doxygen/deal.II/images/step-17.12-grid.png     |   Bin 0 -> 53688 bytes
 .../deal.II/images/step-17.12-partition.png        |   Bin 0 -> 18093 bytes
 doc/doxygen/deal.II/images/step-17.12-ux.png       |   Bin 0 -> 43842 bytes
 doc/doxygen/deal.II/images/step-17.12-uy.png       |   Bin 0 -> 42640 bytes
 .../deal.II/images/step-17.4-3d-partition.png      |   Bin 0 -> 77778 bytes
 doc/doxygen/deal.II/images/step-17.4-3d-ux.png     |   Bin 0 -> 79647 bytes
 .../deal.II/images/step-18.parallel-0002.p.png     |   Bin 0 -> 26006 bytes
 .../deal.II/images/step-18.parallel-0002.s.png     |   Bin 0 -> 26330 bytes
 .../deal.II/images/step-18.parallel-0005.s.png     |   Bin 0 -> 40976 bytes
 .../deal.II/images/step-18.parallel-0007.s.png     |   Bin 0 -> 52911 bytes
 .../deal.II/images/step-18.parallel-0008.s.png     |   Bin 0 -> 52230 bytes
 .../deal.II/images/step-18.parallel-0009.s.png     |   Bin 0 -> 44588 bytes
 .../deal.II/images/step-18.parallel-000mesh.png    |   Bin 0 -> 96596 bytes
 .../deal.II/images/step-18.parallel-0010.s.png     |   Bin 0 -> 40507 bytes
 .../images/step-18.sequential-0002.0000.png        |   Bin 0 -> 102319 bytes
 .../images/step-18.sequential-0005.0000.png        |   Bin 0 -> 100532 bytes
 .../images/step-18.sequential-0007.0000.png        |   Bin 0 -> 103357 bytes
 .../images/step-18.sequential-0008.0000.png        |   Bin 0 -> 96823 bytes
 .../images/step-18.sequential-0009.0000.png        |   Bin 0 -> 91393 bytes
 .../images/step-18.sequential-0010.0000.png        |   Bin 0 -> 86572 bytes
 .../deal.II/images/step-19.solution-0005.png       |   Bin 0 -> 5388 bytes
 doc/doxygen/deal.II/images/step-2.sparsity-1.png   |   Bin 0 -> 3423 bytes
 doc/doxygen/deal.II/images/step-2.sparsity-1.svg   |  1183 ++
 doc/doxygen/deal.II/images/step-2.sparsity-2.png   |   Bin 0 -> 5109 bytes
 doc/doxygen/deal.II/images/step-2.sparsity-2.svg   |  1183 ++
 doc/doxygen/deal.II/images/step-20.k-random.png    |   Bin 0 -> 78609 bytes
 doc/doxygen/deal.II/images/step-20.p-random.png    |   Bin 0 -> 21071 bytes
 doc/doxygen/deal.II/images/step-20.p.png           |   Bin 0 -> 42693 bytes
 doc/doxygen/deal.II/images/step-20.u-random.png    |   Bin 0 -> 28239 bytes
 doc/doxygen/deal.II/images/step-20.u-wiggle.png    |   Bin 0 -> 24683 bytes
 doc/doxygen/deal.II/images/step-20.u.png           |   Bin 0 -> 38361 bytes
 doc/doxygen/deal.II/images/step-20.v-wiggle.png    |   Bin 0 -> 21443 bytes
 doc/doxygen/deal.II/images/step-20.v.png           |   Bin 0 -> 36218 bytes
 doc/doxygen/deal.II/images/step-21.centerline.gif  |   Bin 0 -> 1286721 bytes
 doc/doxygen/deal.II/images/step-21.random2d.gif    |   Bin 0 -> 1516967 bytes
 doc/doxygen/deal.II/images/step-21.random3d.gif    |   Bin 0 -> 1627829 bytes
 doc/doxygen/deal.II/images/step-22.2d.mesh-0.png   |   Bin 0 -> 716 bytes
 doc/doxygen/deal.II/images/step-22.2d.mesh-1.png   |   Bin 0 -> 1350 bytes
 doc/doxygen/deal.II/images/step-22.2d.mesh-2.png   |   Bin 0 -> 1916 bytes
 doc/doxygen/deal.II/images/step-22.2d.mesh-3.png   |   Bin 0 -> 2620 bytes
 doc/doxygen/deal.II/images/step-22.2d.mesh-4.png   |   Bin 0 -> 3954 bytes
 doc/doxygen/deal.II/images/step-22.2d.mesh-5.png   |   Bin 0 -> 5070 bytes
 doc/doxygen/deal.II/images/step-22.2d.solution.png |   Bin 0 -> 14598 bytes
 .../deal.II/images/step-22.2d.sparsity-nor.png     |   Bin 0 -> 110959 bytes
 .../deal.II/images/step-22.2d.sparsity-ren.png     |   Bin 0 -> 87478 bytes
 .../deal.II/images/step-22.3d-extension.png        |   Bin 0 -> 511097 bytes
 .../deal.II/images/step-22.3d-grid-extension.png   |   Bin 0 -> 463131 bytes
 doc/doxygen/deal.II/images/step-22.3d.mesh-0.png   |   Bin 0 -> 57749 bytes
 doc/doxygen/deal.II/images/step-22.3d.mesh-1.png   |   Bin 0 -> 72786 bytes
 doc/doxygen/deal.II/images/step-22.3d.mesh-2.png   |   Bin 0 -> 102384 bytes
 doc/doxygen/deal.II/images/step-22.3d.mesh-3.png   |   Bin 0 -> 120247 bytes
 doc/doxygen/deal.II/images/step-22.3d.mesh-4.png   |   Bin 0 -> 131028 bytes
 doc/doxygen/deal.II/images/step-22.3d.mesh-5.png   |   Bin 0 -> 140620 bytes
 doc/doxygen/deal.II/images/step-22.3d.solution.png |   Bin 0 -> 285786 bytes
 .../deal.II/images/step-22.3d.sparsity_uu-ren.png  |   Bin 0 -> 105287 bytes
 .../deal.II/images/step-22.profile-3.original.png  |   Bin 0 -> 94866 bytes
 doc/doxygen/deal.II/images/step-22.profile-3.png   |   Bin 0 -> 101411 bytes
 doc/doxygen/deal.II/images/step-23.movie.gif       |   Bin 0 -> 3662392 bytes
 doc/doxygen/deal.II/images/step-24.multi.png       |   Bin 0 -> 41427 bytes
 doc/doxygen/deal.II/images/step-24.multi_movie.gif |   Bin 0 -> 4404094 bytes
 doc/doxygen/deal.II/images/step-24.multi_s.png     |   Bin 0 -> 27762 bytes
 doc/doxygen/deal.II/images/step-24.multi_s2.png    |   Bin 0 -> 29963 bytes
 doc/doxygen/deal.II/images/step-24.multi_sf.png    |   Bin 0 -> 29056 bytes
 doc/doxygen/deal.II/images/step-24.one.png         |   Bin 0 -> 38358 bytes
 doc/doxygen/deal.II/images/step-24.one_movie.gif   |   Bin 0 -> 5337113 bytes
 doc/doxygen/deal.II/images/step-24.one_s.png       |   Bin 0 -> 42393 bytes
 doc/doxygen/deal.II/images/step-24.one_s2.png      |   Bin 0 -> 31151 bytes
 doc/doxygen/deal.II/images/step-24.one_sf.png      |   Bin 0 -> 24575 bytes
 doc/doxygen/deal.II/images/step-24.traces.png      |   Bin 0 -> 9122 bytes
 doc/doxygen/deal.II/images/step-25.1d-breather.gif |   Bin 0 -> 69649 bytes
 .../deal.II/images/step-25.1d-breather_stp.png     |   Bin 0 -> 75606 bytes
 .../deal.II/images/step-25.2d-angled_kink.gif      |   Bin 0 -> 4032149 bytes
 doc/doxygen/deal.II/images/step-25.2d-kink.gif     |   Bin 0 -> 1270707 bytes
 doc/doxygen/deal.II/images/step-25.2d-kink.png     |   Bin 0 -> 81913 bytes
 .../deal.II/images/step-25.2d-pseudobreather.1.png |   Bin 0 -> 25691 bytes
 .../deal.II/images/step-25.2d-pseudobreather.2.png |   Bin 0 -> 33783 bytes
 .../deal.II/images/step-25.2d-pseudobreather.gif   |   Bin 0 -> 860268 bytes
 doc/doxygen/deal.II/images/step-26.movie.gif       |   Bin 0 -> 9971683 bytes
 doc/doxygen/deal.II/images/step-26.surface.png     |   Bin 0 -> 174915 bytes
 doc/doxygen/deal.II/images/step-26.volume.png      |   Bin 0 -> 120919 bytes
 doc/doxygen/deal.II/images/step-27.fe_degree-0.png |   Bin 0 -> 28909 bytes
 doc/doxygen/deal.II/images/step-27.fe_degree-1.png |   Bin 0 -> 47835 bytes
 doc/doxygen/deal.II/images/step-27.fe_degree-2.png |   Bin 0 -> 54178 bytes
 doc/doxygen/deal.II/images/step-27.fe_degree-3.png |   Bin 0 -> 63487 bytes
 doc/doxygen/deal.II/images/step-27.fe_degree-4.png |   Bin 0 -> 70226 bytes
 doc/doxygen/deal.II/images/step-27.fe_degree-5.png |   Bin 0 -> 75838 bytes
 doc/doxygen/deal.II/images/step-27.mesh-0.png      |   Bin 0 -> 5833 bytes
 doc/doxygen/deal.II/images/step-27.mesh-1.png      |   Bin 0 -> 7654 bytes
 doc/doxygen/deal.II/images/step-27.mesh-2.png      |   Bin 0 -> 8939 bytes
 doc/doxygen/deal.II/images/step-27.mesh-3.png      |   Bin 0 -> 9813 bytes
 doc/doxygen/deal.II/images/step-27.mesh-4.png      |   Bin 0 -> 10400 bytes
 doc/doxygen/deal.II/images/step-27.mesh-5.png      |   Bin 0 -> 12012 bytes
 .../deal.II/images/step-27.smoothness-0.png        |   Bin 0 -> 78594 bytes
 .../deal.II/images/step-27.smoothness-1.png        |   Bin 0 -> 79951 bytes
 .../deal.II/images/step-27.smoothness-2.png        |   Bin 0 -> 80470 bytes
 .../deal.II/images/step-27.smoothness-3.png        |   Bin 0 -> 81121 bytes
 .../deal.II/images/step-27.smoothness-4.png        |   Bin 0 -> 81507 bytes
 .../deal.II/images/step-27.smoothness-5.png        |   Bin 0 -> 82866 bytes
 doc/doxygen/deal.II/images/step-27.solution.png    |   Bin 0 -> 177783 bytes
 doc/doxygen/deal.II/images/step-28.convergence.png |   Bin 0 -> 12445 bytes
 .../deal.II/images/step-28.error-vs-dofs.png       |   Bin 0 -> 5494 bytes
 .../deal.II/images/step-28.error-vs-time.png       |   Bin 0 -> 5413 bytes
 .../deal.II/images/step-28.grid-0.9.order2.png     |   Bin 0 -> 33134 bytes
 .../deal.II/images/step-28.grid-1.9.order2.png     |   Bin 0 -> 40513 bytes
 .../deal.II/images/step-28.solution-0.9.order2.png |   Bin 0 -> 16578 bytes
 .../deal.II/images/step-28.solution-1.9.order2.png |   Bin 0 -> 14160 bytes
 doc/doxygen/deal.II/images/step-29.contours.png    |   Bin 0 -> 438857 bytes
 doc/doxygen/deal.II/images/step-29.intensity.png   |   Bin 0 -> 70326 bytes
 doc/doxygen/deal.II/images/step-29.surface.png     |   Bin 0 -> 304334 bytes
 doc/doxygen/deal.II/images/step-29.v.png           |   Bin 0 -> 89945 bytes
 doc/doxygen/deal.II/images/step-29.w.png           |   Bin 0 -> 89823 bytes
 doc/doxygen/deal.II/images/step-3.solution-1.png   |   Bin 0 -> 24423 bytes
 doc/doxygen/deal.II/images/step-3.solution-2.png   |   Bin 0 -> 23047 bytes
 .../deal.II/images/step-30.grid-3.aniso.png        |   Bin 0 -> 1354 bytes
 doc/doxygen/deal.II/images/step-30.sol-1.aniso.png |   Bin 0 -> 2335 bytes
 doc/doxygen/deal.II/images/step-30.sol-1.iso.png   |   Bin 0 -> 2390 bytes
 doc/doxygen/deal.II/images/step-30.sol-5.aniso.png |   Bin 0 -> 2254 bytes
 doc/doxygen/deal.II/images/step-30.sol-5.iso.png   |   Bin 0 -> 2112 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.00.png  |   Bin 0 -> 50532 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.01.png  |   Bin 0 -> 71276 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.02.png  |   Bin 0 -> 93659 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.03.png  |   Bin 0 -> 109770 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.04.png  |   Bin 0 -> 135218 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.05.png  |   Bin 0 -> 145506 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.06.png  |   Bin 0 -> 160584 bytes
 doc/doxygen/deal.II/images/step-31.2d.grid.07.png  |   Bin 0 -> 171903 bytes
 .../deal.II/images/step-31.2d.solution.00.png      |   Bin 0 -> 6587 bytes
 .../deal.II/images/step-31.2d.solution.01.png      |   Bin 0 -> 16188 bytes
 .../deal.II/images/step-31.2d.solution.02.png      |   Bin 0 -> 26758 bytes
 .../deal.II/images/step-31.2d.solution.03.png      |   Bin 0 -> 36056 bytes
 .../deal.II/images/step-31.2d.solution.04.png      |   Bin 0 -> 43289 bytes
 .../deal.II/images/step-31.2d.solution.05.png      |   Bin 0 -> 59270 bytes
 .../deal.II/images/step-31.2d.solution.06.png      |   Bin 0 -> 74005 bytes
 .../deal.II/images/step-31.2d.solution.07.png      |   Bin 0 -> 85101 bytes
 .../deal.II/images/step-31.3d.solution.00.png      |   Bin 0 -> 58959 bytes
 .../deal.II/images/step-31.3d.solution.01.png      |   Bin 0 -> 60384 bytes
 .../deal.II/images/step-31.3d.solution.02.png      |   Bin 0 -> 65433 bytes
 .../deal.II/images/step-31.3d.solution.03.png      |   Bin 0 -> 71168 bytes
 .../deal.II/images/step-31.3d.solution.04.png      |   Bin 0 -> 77819 bytes
 .../deal.II/images/step-31.3d.solution.05.png      |   Bin 0 -> 90633 bytes
 .../deal.II/images/step-31.3d.solution.06.png      |   Bin 0 -> 100880 bytes
 .../deal.II/images/step-31.3d.solution.07.png      |   Bin 0 -> 107200 bytes
 .../deal.II/images/step-31.3d.solution.08.png      |   Bin 0 -> 112965 bytes
 .../deal.II/images/step-31.3d.solution.09.png      |   Bin 0 -> 118321 bytes
 .../deal.II/images/step-31.3d.solution.10.png      |   Bin 0 -> 122924 bytes
 .../images/step-31.timestep.q1.beta=0.01.png       |   Bin 0 -> 5361 bytes
 .../images/step-31.timestep.q1.beta=0.03.png       |   Bin 0 -> 5079 bytes
 .../images/step-31.timestep.q1.beta=0.1.png        |   Bin 0 -> 5405 bytes
 .../images/step-31.timestep.q1.beta=0.5.png        |   Bin 0 -> 4623 bytes
 .../images/step-31.timestep.q2.beta=0.01.png       |   Bin 0 -> 4725 bytes
 .../images/step-31.timestep.q2.beta=0.03.png       |   Bin 0 -> 4743 bytes
 .../images/step-31.timestep.q2.beta=0.1.png        |   Bin 0 -> 5162 bytes
 doc/doxygen/deal.II/images/step-32.2d-initial.png  |   Bin 0 -> 79831 bytes
 .../deal.II/images/step-32.2d.grid.2100.png        |   Bin 0 -> 119333 bytes
 .../deal.II/images/step-32.2d.partition.2100.png   |   Bin 0 -> 44170 bytes
 .../deal.II/images/step-32.2d.t_vs_vmax.png        |   Bin 0 -> 2730 bytes
 .../deal.II/images/step-32.2d.temperature.0000.png |   Bin 0 -> 70932 bytes
 .../deal.II/images/step-32.2d.temperature.0100.png |   Bin 0 -> 89552 bytes
 .../deal.II/images/step-32.2d.temperature.0200.png |   Bin 0 -> 87794 bytes
 .../deal.II/images/step-32.2d.temperature.0300.png |   Bin 0 -> 80723 bytes
 .../deal.II/images/step-32.2d.temperature.0400.png |   Bin 0 -> 81426 bytes
 .../deal.II/images/step-32.2d.temperature.0500.png |   Bin 0 -> 82853 bytes
 .../deal.II/images/step-32.2d.temperature.0600.png |   Bin 0 -> 82363 bytes
 .../deal.II/images/step-32.2d.temperature.0700.png |   Bin 0 -> 76506 bytes
 .../deal.II/images/step-32.2d.temperature.0800.png |   Bin 0 -> 74571 bytes
 .../deal.II/images/step-32.2d.temperature.0900.png |   Bin 0 -> 72754 bytes
 .../deal.II/images/step-32.2d.temperature.1000.png |   Bin 0 -> 71755 bytes
 .../deal.II/images/step-32.2d.temperature.1100.png |   Bin 0 -> 72929 bytes
 .../deal.II/images/step-32.2d.temperature.1200.png |   Bin 0 -> 72826 bytes
 .../deal.II/images/step-32.2d.temperature.1300.png |   Bin 0 -> 72743 bytes
 .../deal.II/images/step-32.2d.temperature.1400.png |   Bin 0 -> 72622 bytes
 .../deal.II/images/step-32.2d.temperature.1500.png |   Bin 0 -> 71767 bytes
 .../deal.II/images/step-32.2d.temperature.1600.png |   Bin 0 -> 71434 bytes
 .../deal.II/images/step-32.2d.temperature.1700.png |   Bin 0 -> 70017 bytes
 .../deal.II/images/step-32.2d.temperature.1800.png |   Bin 0 -> 69702 bytes
 .../deal.II/images/step-32.2d.temperature.1900.png |   Bin 0 -> 70283 bytes
 .../deal.II/images/step-32.2d.temperature.2000.png |   Bin 0 -> 71753 bytes
 .../deal.II/images/step-32.2d.temperature.2100.png |   Bin 0 -> 72013 bytes
 .../deal.II/images/step-32.3d-sphere.partition.png |   Bin 0 -> 180865 bytes
 .../deal.II/images/step-32.3d-sphere.solution.png  |   Bin 0 -> 225178 bytes
 doc/doxygen/deal.II/images/step-32.3d.cube.0.png   |   Bin 0 -> 118060 bytes
 doc/doxygen/deal.II/images/step-32.3d.cube.1.png   |   Bin 0 -> 140052 bytes
 doc/doxygen/deal.II/images/step-32.beta.2d.png     |   Bin 0 -> 2770 bytes
 doc/doxygen/deal.II/images/step-32.beta_cr.2d.png  |   Bin 0 -> 3492 bytes
 doc/doxygen/deal.II/images/step-33.oscillation.gif |   Bin 0 -> 622213 bytes
 .../deal.II/images/step-33.slide.adapt.ed2.gif     |   Bin 0 -> 31029675 bytes
 doc/doxygen/deal.II/images/step-33.slide.ed2.gif   |   Bin 0 -> 19825790 bytes
 doc/doxygen/deal.II/images/step-33.slide.gif       |   Bin 0 -> 4295977 bytes
 doc/doxygen/deal.II/images/step-33.slide_adapt.gif |   Bin 0 -> 14070347 bytes
 doc/doxygen/deal.II/images/step-34_2d.png          |   Bin 0 -> 130745 bytes
 doc/doxygen/deal.II/images/step-34_3d-2.png        |   Bin 0 -> 143583 bytes
 doc/doxygen/deal.II/images/step-34_3d.png          |   Bin 0 -> 108434 bytes
 .../deal.II/images/step-35.Re_100.velocity.0.png   |   Bin 0 -> 131867 bytes
 .../deal.II/images/step-35.Re_100.velocity.1.png   |   Bin 0 -> 130714 bytes
 .../deal.II/images/step-35.Re_100.velocity.2.png   |   Bin 0 -> 134987 bytes
 .../deal.II/images/step-35.Re_100.velocity.3.png   |   Bin 0 -> 136051 bytes
 .../deal.II/images/step-35.Re_100.velocity.4.png   |   Bin 0 -> 130938 bytes
 .../deal.II/images/step-35.Re_100.vorticity.0.png  |   Bin 0 -> 19625 bytes
 .../deal.II/images/step-35.Re_100.vorticity.1.png  |   Bin 0 -> 40131 bytes
 .../deal.II/images/step-35.Re_100.vorticity.2.png  |   Bin 0 -> 71943 bytes
 .../deal.II/images/step-35.Re_100.vorticity.3.png  |   Bin 0 -> 103986 bytes
 .../deal.II/images/step-35.Re_100.vorticity.4.png  |   Bin 0 -> 111408 bytes
 .../deal.II/images/step-35.Re_500.velocity.0.png   |   Bin 0 -> 165919 bytes
 .../deal.II/images/step-35.Re_500.velocity.1.png   |   Bin 0 -> 147256 bytes
 .../deal.II/images/step-35.Re_500.vorticity.0.png  |   Bin 0 -> 189403 bytes
 .../deal.II/images/step-35.Re_500.vorticity.1.png  |   Bin 0 -> 169207 bytes
 doc/doxygen/deal.II/images/step-35.Re_500.zoom.png |   Bin 0 -> 434862 bytes
 .../deal.II/images/step-35.Re_500.zoom_2.png       |   Bin 0 -> 166330 bytes
 doc/doxygen/deal.II/images/step-35.geometry.png    |   Bin 0 -> 9184 bytes
 .../images/step-36.default.eigenfunction.0.png     |   Bin 0 -> 27877 bytes
 .../images/step-36.default.eigenfunction.1.png     |   Bin 0 -> 18552 bytes
 .../images/step-36.default.eigenfunction.2.png     |   Bin 0 -> 26823 bytes
 .../images/step-36.default.eigenfunction.3.png     |   Bin 0 -> 24359 bytes
 .../images/step-36.default.eigenfunction.4.png     |   Bin 0 -> 26702 bytes
 .../deal.II/images/step-36.mod.eigenfunction.0.png |   Bin 0 -> 31693 bytes
 .../deal.II/images/step-36.mod.eigenfunction.1.png |   Bin 0 -> 23881 bytes
 .../deal.II/images/step-36.mod.eigenfunction.2.png |   Bin 0 -> 26899 bytes
 .../deal.II/images/step-36.mod.eigenfunction.3.png |   Bin 0 -> 24886 bytes
 .../deal.II/images/step-36.mod.eigenfunction.4.png |   Bin 0 -> 25609 bytes
 .../deal.II/images/step-36.mod.potential.png       |   Bin 0 -> 35683 bytes
 doc/doxygen/deal.II/images/step-37.solution.png    |   Bin 0 -> 147968 bytes
 doc/doxygen/deal.II/images/step-38.solution-2d.png |   Bin 0 -> 7521 bytes
 doc/doxygen/deal.II/images/step-38.solution-3d.png |   Bin 0 -> 96645 bytes
 doc/doxygen/deal.II/images/step-38.warp-1.png      |   Bin 0 -> 64369 bytes
 doc/doxygen/deal.II/images/step-38.warp-2.png      |   Bin 0 -> 67535 bytes
 doc/doxygen/deal.II/images/step-39-convergence.png |   Bin 0 -> 4223 bytes
 doc/doxygen/deal.II/images/step-4.contours-3d.png  |   Bin 0 -> 184071 bytes
 doc/doxygen/deal.II/images/step-4.solution-2d.png  |   Bin 0 -> 85927 bytes
 doc/doxygen/deal.II/images/step-4.solution-3d.png  |   Bin 0 -> 172244 bytes
 doc/doxygen/deal.II/images/step-40.256.png         |   Bin 0 -> 11910 bytes
 doc/doxygen/deal.II/images/step-40.4096.png        |   Bin 0 -> 12158 bytes
 doc/doxygen/deal.II/images/step-40.mesh.png        |   Bin 0 -> 11346 bytes
 doc/doxygen/deal.II/images/step-40.solution.png    |   Bin 0 -> 91586 bytes
 doc/doxygen/deal.II/images/step-40.strong.png      |   Bin 0 -> 11211 bytes
 doc/doxygen/deal.II/images/step-40.strong2.png     |   Bin 0 -> 12277 bytes
 .../deal.II/images/step-41.active-set.00.png       |   Bin 0 -> 4908 bytes
 .../deal.II/images/step-41.active-set.03.png       |   Bin 0 -> 4625 bytes
 .../deal.II/images/step-41.active-set.06.png       |   Bin 0 -> 4470 bytes
 .../deal.II/images/step-41.active-set.09.png       |   Bin 0 -> 3886 bytes
 .../deal.II/images/step-41.active-set.12.png       |   Bin 0 -> 2949 bytes
 .../deal.II/images/step-41.active-set.15.png       |   Bin 0 -> 3719 bytes
 .../deal.II/images/step-41.active-set.18.png       |   Bin 0 -> 3918 bytes
 doc/doxygen/deal.II/images/step-41.active-set.png  |   Bin 0 -> 50948 bytes
 .../deal.II/images/step-41.displacement.00.png     |   Bin 0 -> 16732 bytes
 .../deal.II/images/step-41.displacement.03.png     |   Bin 0 -> 16515 bytes
 .../deal.II/images/step-41.displacement.06.png     |   Bin 0 -> 18525 bytes
 .../deal.II/images/step-41.displacement.09.png     |   Bin 0 -> 20177 bytes
 .../deal.II/images/step-41.displacement.12.png     |   Bin 0 -> 21344 bytes
 .../deal.II/images/step-41.displacement.15.png     |   Bin 0 -> 21006 bytes
 .../deal.II/images/step-41.displacement.18.png     |   Bin 0 -> 21123 bytes
 .../deal.II/images/step-41.displacement.3d.00.png  |   Bin 0 -> 57041 bytes
 .../deal.II/images/step-41.displacement.3d.03.png  |   Bin 0 -> 57437 bytes
 .../deal.II/images/step-41.displacement.3d.06.png  |   Bin 0 -> 57537 bytes
 .../deal.II/images/step-41.displacement.3d.09.png  |   Bin 0 -> 57905 bytes
 .../deal.II/images/step-41.displacement.3d.12.png  |   Bin 0 -> 57784 bytes
 .../deal.II/images/step-41.displacement.3d.15.png  |   Bin 0 -> 58614 bytes
 .../deal.II/images/step-41.displacement.3d.18.png  |   Bin 0 -> 58602 bytes
 .../deal.II/images/step-41.displacement.png        |   Bin 0 -> 79173 bytes
 doc/doxygen/deal.II/images/step-41.forces.01.png   |   Bin 0 -> 18455 bytes
 doc/doxygen/deal.II/images/step-41.forces.09.png   |   Bin 0 -> 16895 bytes
 doc/doxygen/deal.II/images/step-41.forces.18.png   |   Bin 0 -> 18946 bytes
 .../deal.II/images/step-42.CellConstitution.png    |   Bin 0 -> 52558 bytes
 .../images/step-42.CellConstitutionBall.png        |   Bin 0 -> 52558 bytes
 .../images/step-42.CellConstitutionBall2.png       |   Bin 0 -> 25917 bytes
 .../images/step-42.CellConstitutionColorbar.png    |   Bin 0 -> 6614 bytes
 .../deal.II/images/step-42.CellConstitutionLi2.png |   Bin 0 -> 48974 bytes
 .../deal.II/images/step-42.adaptive-contact.png    |   Bin 0 -> 201969 bytes
 doc/doxygen/deal.II/images/step-42.character.png   |   Bin 0 -> 3333 bytes
 .../deal.II/images/step-42.starke-skalierung.png   |   Bin 0 -> 19167 bytes
 doc/doxygen/deal.II/images/step-43.3d.mesh.png     |   Bin 0 -> 114927 bytes
 .../deal.II/images/step-43.3d.saturation.png       |   Bin 0 -> 244053 bytes
 .../deal.II/images/step-43.3d.streamlines.png      |   Bin 0 -> 138629 bytes
 doc/doxygen/deal.II/images/step-43.3d.velocity.png |   Bin 0 -> 250628 bytes
 doc/doxygen/deal.II/images/step-43.spe10.1.png     |   Bin 0 -> 161477 bytes
 doc/doxygen/deal.II/images/step-43.spe10.2.png     |   Bin 0 -> 145238 bytes
 doc/doxygen/deal.II/images/step-44.2d-gr_2.png     |   Bin 0 -> 82462 bytes
 doc/doxygen/deal.II/images/step-44.2d-gr_5.png     |   Bin 0 -> 211350 bytes
 .../deal.II/images/step-44.Normalised_runtime.png  |   Bin 0 -> 48456 bytes
 .../deal.II/images/step-44.Q1-P0_convergence.png   |   Bin 0 -> 37185 bytes
 .../step-44.Q1-P0_gr_1_p_ratio_80-dilatation.png   |   Bin 0 -> 34841 bytes
 .../step-44.Q1-P0_gr_1_p_ratio_80-displacement.png |   Bin 0 -> 82934 bytes
 .../step-44.Q1-P0_gr_1_p_ratio_80-pressure.png     |   Bin 0 -> 36900 bytes
 .../deal.II/images/step-44.Q2-P1_convergence.png   |   Bin 0 -> 34859 bytes
 .../step-44.Q2-P1_gr_3_p_ratio_80-dilatation.png   |   Bin 0 -> 206263 bytes
 .../step-44.Q2-P1_gr_3_p_ratio_80-displacement.png |   Bin 0 -> 211126 bytes
 .../step-44.Q2-P1_gr_3_p_ratio_80-pressure.png     |   Bin 0 -> 186746 bytes
 doc/doxygen/deal.II/images/step-44.setup.png       |   Bin 0 -> 129238 bytes
 .../deal.II/images/step-45.non_periodic.png        |   Bin 0 -> 69788 bytes
 doc/doxygen/deal.II/images/step-45.periodic.png    |   Bin 0 -> 86866 bytes
 .../deal.II/images/step-45.periodic_cells.png      |   Bin 0 -> 3855 bytes
 doc/doxygen/deal.II/images/step-45.solution.png    |   Bin 0 -> 85370 bytes
 .../deal.II/images/step-45_non_periodic.png        |   Bin 0 -> 98565 bytes
 doc/doxygen/deal.II/images/step-45_periodic.png    |   Bin 0 -> 88670 bytes
 .../deal.II/images/step-46.3d.displacement.png     |   Bin 0 -> 196692 bytes
 .../deal.II/images/step-46.3d.streamlines.png      |   Bin 0 -> 283366 bytes
 doc/doxygen/deal.II/images/step-46.3d.velocity.png |   Bin 0 -> 224229 bytes
 .../deal.II/images/step-46.displacement.png        |   Bin 0 -> 47058 bytes
 doc/doxygen/deal.II/images/step-46.layout.png      |   Bin 0 -> 3864 bytes
 doc/doxygen/deal.II/images/step-46.pressure.png    |   Bin 0 -> 63190 bytes
 .../deal.II/images/step-46.velocity-magnitude.png  |   Bin 0 -> 72842 bytes
 doc/doxygen/deal.II/images/step-46.velocity.png    |   Bin 0 -> 82566 bytes
 .../deal.II/images/step-49.gmsh_picture.png        |   Bin 0 -> 19230 bytes
 doc/doxygen/deal.II/images/step-49.grid-1.png      |   Bin 0 -> 18208 bytes
 doc/doxygen/deal.II/images/step-49.grid-2.png      |   Bin 0 -> 1843 bytes
 doc/doxygen/deal.II/images/step-49.grid-2a.png     |   Bin 0 -> 3373 bytes
 doc/doxygen/deal.II/images/step-49.grid-2b.png     |   Bin 0 -> 974 bytes
 .../deal.II/images/step-49.grid-2d-refined.png     |   Bin 0 -> 51179 bytes
 doc/doxygen/deal.II/images/step-49.grid-3.png      |   Bin 0 -> 19847 bytes
 doc/doxygen/deal.II/images/step-49.grid-3a.png     |   Bin 0 -> 15793 bytes
 doc/doxygen/deal.II/images/step-49.grid-4.png      |   Bin 0 -> 14348 bytes
 doc/doxygen/deal.II/images/step-49.grid-4base.png  |   Bin 0 -> 3373 bytes
 doc/doxygen/deal.II/images/step-49.grid-5.png      |   Bin 0 -> 3607 bytes
 doc/doxygen/deal.II/images/step-49.grid-5a.png     |   Bin 0 -> 659 bytes
 doc/doxygen/deal.II/images/step-49.grid-6.png      |   Bin 0 -> 1762 bytes
 doc/doxygen/deal.II/images/step-49.grid-6a.png     |   Bin 0 -> 1427 bytes
 doc/doxygen/deal.II/images/step-49.grid-7.png      |   Bin 0 -> 17355 bytes
 doc/doxygen/deal.II/images/step-49.grid-7a.png     |   Bin 0 -> 1582 bytes
 doc/doxygen/deal.II/images/step-49.yuhan.1.png     |   Bin 0 -> 18190 bytes
 doc/doxygen/deal.II/images/step-49.yuhan.2.png     |   Bin 0 -> 15218 bytes
 doc/doxygen/deal.II/images/step-49.yuhan.3.png     |   Bin 0 -> 24774 bytes
 doc/doxygen/deal.II/images/step-49.yuhan.4.png     |   Bin 0 -> 26928 bytes
 doc/doxygen/deal.II/images/step-49.yuhan.5.png     |   Bin 0 -> 25282 bytes
 doc/doxygen/deal.II/images/step-49.yuhan.6.png     |   Bin 0 -> 25270 bytes
 doc/doxygen/deal.II/images/step-49.yuhan.7.png     |   Bin 0 -> 68167 bytes
 doc/doxygen/deal.II/images/step-5.solution-0.png   |   Bin 0 -> 45503 bytes
 doc/doxygen/deal.II/images/step-5.solution-1.png   |   Bin 0 -> 88359 bytes
 doc/doxygen/deal.II/images/step-5.solution-2.png   |   Bin 0 -> 122929 bytes
 doc/doxygen/deal.II/images/step-5.solution-3.png   |   Bin 0 -> 144386 bytes
 doc/doxygen/deal.II/images/step-5.solution-4.png   |   Bin 0 -> 154978 bytes
 doc/doxygen/deal.II/images/step-5.solution-5.png   |   Bin 0 -> 138354 bytes
 doc/doxygen/deal.II/images/step-51.2d_plain.png    |   Bin 0 -> 34377 bytes
 doc/doxygen/deal.II/images/step-51.2d_post.png     |   Bin 0 -> 35107 bytes
 doc/doxygen/deal.II/images/step-51.2d_postb.png    |   Bin 0 -> 35539 bytes
 doc/doxygen/deal.II/images/step-51.2dt_plain.png   |   Bin 0 -> 33413 bytes
 doc/doxygen/deal.II/images/step-51.2dt_post.png    |   Bin 0 -> 33701 bytes
 doc/doxygen/deal.II/images/step-51.2dt_postb.png   |   Bin 0 -> 35425 bytes
 doc/doxygen/deal.II/images/step-51.3d_plain.png    |   Bin 0 -> 34617 bytes
 doc/doxygen/deal.II/images/step-51.3d_post.png     |   Bin 0 -> 35568 bytes
 doc/doxygen/deal.II/images/step-51.3d_postb.png    |   Bin 0 -> 35452 bytes
 doc/doxygen/deal.II/images/step-51.3dt_plain.png   |   Bin 0 -> 33296 bytes
 doc/doxygen/deal.II/images/step-51.3dt_post.png    |   Bin 0 -> 34440 bytes
 doc/doxygen/deal.II/images/step-51.3dt_postb.png   |   Bin 0 -> 35594 bytes
 doc/doxygen/deal.II/images/step-51.post_2.png      |   Bin 0 -> 80789 bytes
 doc/doxygen/deal.II/images/step-51.post_3.png      |   Bin 0 -> 79260 bytes
 doc/doxygen/deal.II/images/step-51.post_4.png      |   Bin 0 -> 82404 bytes
 doc/doxygen/deal.II/images/step-51.post_8.png      |   Bin 0 -> 106136 bytes
 doc/doxygen/deal.II/images/step-51.post_q3_2.png   |   Bin 0 -> 72562 bytes
 doc/doxygen/deal.II/images/step-51.sol_2.png       |   Bin 0 -> 70450 bytes
 doc/doxygen/deal.II/images/step-51.sol_3.png       |   Bin 0 -> 68551 bytes
 doc/doxygen/deal.II/images/step-51.sol_4.png       |   Bin 0 -> 70897 bytes
 doc/doxygen/deal.II/images/step-51.sol_8.png       |   Bin 0 -> 97074 bytes
 doc/doxygen/deal.II/images/step-51.sol_q3_2.png    |   Bin 0 -> 80153 bytes
 doc/doxygen/deal.II/images/step-53.mesh.png        |   Bin 0 -> 171777 bytes
 .../deal.II/images/step-53.smooth-geometry.png     |   Bin 0 -> 244865 bytes
 doc/doxygen/deal.II/images/step-53.topo.png        |   Bin 0 -> 176238 bytes
 doc/doxygen/deal.II/images/step-53.topozoom.png    |   Bin 0 -> 381790 bytes
 doc/doxygen/deal.II/images/step-54.CurveSplit.png  |   Bin 0 -> 29532 bytes
 .../images/step-54.DirectionalProjection.png       |   Bin 0 -> 28672 bytes
 .../deal.II/images/step-54.NormalProjection.png    |   Bin 0 -> 27079 bytes
 .../images/step-54.NormalProjectionEdge.png        |   Bin 0 -> 13400 bytes
 .../images/step-54.ProjectionComparisons.png       |   Bin 0 -> 84131 bytes
 doc/doxygen/deal.II/images/step-54.bare.png        |   Bin 0 -> 95098 bytes
 doc/doxygen/deal.II/images/step-54.common_0.png    |   Bin 0 -> 79579 bytes
 .../deal.II/images/step-54.directional_1.png       |   Bin 0 -> 71461 bytes
 .../deal.II/images/step-54.directional_2.png       |   Bin 0 -> 71642 bytes
 .../deal.II/images/step-54.directional_3.png       |   Bin 0 -> 76482 bytes
 .../deal.II/images/step-54.directional_4.png       |   Bin 0 -> 101575 bytes
 .../deal.II/images/step-54.directional_5.png       |   Bin 0 -> 140548 bytes
 .../deal.II/images/step-54.directional_front_3.png |   Bin 0 -> 58879 bytes
 .../deal.II/images/step-54.directional_front_4.png |   Bin 0 -> 74872 bytes
 .../deal.II/images/step-54.directional_front_5.png |   Bin 0 -> 98967 bytes
 doc/doxygen/deal.II/images/step-54.normal_1.png    |   Bin 0 -> 71779 bytes
 doc/doxygen/deal.II/images/step-54.normal_2.png    |   Bin 0 -> 70912 bytes
 doc/doxygen/deal.II/images/step-54.normal_3.png    |   Bin 0 -> 77487 bytes
 doc/doxygen/deal.II/images/step-54.normal_4.png    |   Bin 0 -> 101206 bytes
 doc/doxygen/deal.II/images/step-54.normal_5.png    |   Bin 0 -> 135074 bytes
 .../deal.II/images/step-54.normal_front_3.png      |   Bin 0 -> 64040 bytes
 .../deal.II/images/step-54.normal_front_4.png      |   Bin 0 -> 80283 bytes
 .../deal.II/images/step-54.normal_front_5.png      |   Bin 0 -> 103818 bytes
 .../deal.II/images/step-54.normal_to_mesh_1.png    |   Bin 0 -> 71785 bytes
 .../deal.II/images/step-54.normal_to_mesh_2.png    |   Bin 0 -> 70931 bytes
 .../deal.II/images/step-54.normal_to_mesh_3.png    |   Bin 0 -> 74469 bytes
 .../deal.II/images/step-54.normal_to_mesh_4.png    |   Bin 0 -> 96372 bytes
 .../deal.II/images/step-54.normal_to_mesh_5.png    |   Bin 0 -> 131538 bytes
 .../images/step-54.normal_to_mesh_front_3.png      |   Bin 0 -> 58209 bytes
 .../images/step-54.normal_to_mesh_front_4.png      |   Bin 0 -> 75952 bytes
 .../images/step-54.normal_to_mesh_front_5.png      |   Bin 0 -> 99084 bytes
 doc/doxygen/deal.II/images/step-6.grid-0.png       |   Bin 0 -> 13379 bytes
 doc/doxygen/deal.II/images/step-6.grid-1.png       |   Bin 0 -> 20547 bytes
 doc/doxygen/deal.II/images/step-6.grid-2.png       |   Bin 0 -> 27097 bytes
 doc/doxygen/deal.II/images/step-6.grid-3.png       |   Bin 0 -> 32110 bytes
 doc/doxygen/deal.II/images/step-6.grid-4.png       |   Bin 0 -> 42263 bytes
 doc/doxygen/deal.II/images/step-6.grid-5.png       |   Bin 0 -> 47887 bytes
 doc/doxygen/deal.II/images/step-6.grid-6.png       |   Bin 0 -> 49599 bytes
 doc/doxygen/deal.II/images/step-6.grid-7.png       |   Bin 0 -> 50987 bytes
 .../deal.II/images/step-6.manifold-grid-0.png      |   Bin 0 -> 6458 bytes
 .../deal.II/images/step-6.manifold-grid-1.png      |   Bin 0 -> 9046 bytes
 .../deal.II/images/step-6.manifold-grid-2.png      |   Bin 0 -> 12233 bytes
 .../deal.II/images/step-6.manifold-grid-3.png      |   Bin 0 -> 15946 bytes
 .../deal.II/images/step-6.manifold-grid-4-bad.png  |   Bin 0 -> 22830 bytes
 .../deal.II/images/step-6.manifold-grid-4.png      |   Bin 0 -> 20578 bytes
 .../deal.II/images/step-6.manifold-grid-5.png      |   Bin 0 -> 24817 bytes
 .../deal.II/images/step-6.manifold-grid-6.png      |   Bin 0 -> 30538 bytes
 .../deal.II/images/step-6.manifold-grid-7.png      |   Bin 0 -> 32068 bytes
 .../images/step-6.q1.dofs_vs_iterations.png        |   Bin 0 -> 3727 bytes
 .../deal.II/images/step-6.q1.dofs_vs_time.png      |   Bin 0 -> 3345 bytes
 .../images/step-6.q2.dofs_vs_iterations.png        |   Bin 0 -> 3782 bytes
 .../deal.II/images/step-6.q2.dofs_vs_time.png      |   Bin 0 -> 3657 bytes
 doc/doxygen/deal.II/images/step-6.solution.png     |   Bin 0 -> 107198 bytes
 doc/doxygen/deal.II/images/step-7.solution.png     |   Bin 0 -> 84740 bytes
 doc/doxygen/deal.II/images/step-8.grid.png         |   Bin 0 -> 14804 bytes
 doc/doxygen/deal.II/images/step-8.vectors.png      |   Bin 0 -> 19083 bytes
 doc/doxygen/deal.II/images/step-8.x.png            |   Bin 0 -> 39599 bytes
 doc/doxygen/deal.II/images/step-8.y.png            |   Bin 0 -> 38983 bytes
 doc/doxygen/deal.II/images/step-9.grid.png         |   Bin 0 -> 161688 bytes
 doc/doxygen/deal.II/images/step-9.solution.png     |   Bin 0 -> 135961 bytes
 doc/doxygen/headers/boundary.h                     |   136 +
 doc/doxygen/headers/c++11.h                        |   115 +
 doc/doxygen/headers/coding_conventions.h           |   404 +
 doc/doxygen/headers/concepts.h                     |   270 +
 doc/doxygen/headers/constraints.h                  |   493 +
 doc/doxygen/headers/distributed.h                  |   436 +
 doc/doxygen/headers/dofs.h                         |    63 +
 doc/doxygen/headers/exceptions.h                   |   355 +
 doc/doxygen/headers/fe.h                           |   203 +
 doc/doxygen/headers/fe_vs_mapping_vs_fevalues.h    |   333 +
 doc/doxygen/headers/functions.h                    |    71 +
 doc/doxygen/headers/geodynamics.h                  |   145 +
 doc/doxygen/headers/geometry_and_primitives.h      |    52 +
 doc/doxygen/headers/global_dof_index.h             |    88 +
 doc/doxygen/headers/glossary.h                     |  1708 +++
 doc/doxygen/headers/grid.h                         |   117 +
 doc/doxygen/headers/hp.h                           |   104 +
 doc/doxygen/headers/instantiations.h               |   127 +
 doc/doxygen/headers/integrators.h                  |    60 +
 doc/doxygen/headers/io.h                           |   134 +
 doc/doxygen/headers/iterators.h                    |   446 +
 doc/doxygen/headers/lac.h                          |    30 +
 doc/doxygen/headers/laoperators.h                  |   185 +
 doc/doxygen/headers/main.h                         |   259 +
 doc/doxygen/headers/manifold.h                     |   267 +
 doc/doxygen/headers/matrices.h                     |    72 +
 doc/doxygen/headers/memory.h                       |    40 +
 doc/doxygen/headers/mesh_worker.h                  |    27 +
 doc/doxygen/headers/mg.h                           |    83 +
 doc/doxygen/headers/multithreading.h               |  1233 ++
 doc/doxygen/headers/namespace_dealii.h             |    27 +
 doc/doxygen/headers/numerical_algorithms.h         |    39 +
 doc/doxygen/headers/parallel.h                     |    39 +
 doc/doxygen/headers/petsc.h                        |    34 +
 doc/doxygen/headers/polynomials.h                  |    25 +
 doc/doxygen/headers/preconditioners.h              |   106 +
 doc/doxygen/headers/quadrature.h                   |   101 +
 doc/doxygen/headers/slepc.h                        |    32 +
 doc/doxygen/headers/solvers.h                      |    35 +
 doc/doxygen/headers/sparsity.h                     |   131 +
 doc/doxygen/headers/std_cxx11.h                    |    32 +
 doc/doxygen/headers/trilinos.h                     |    34 +
 doc/doxygen/headers/update_flags.h                 |   256 +
 doc/doxygen/headers/utilities.h                    |    42 +
 doc/doxygen/headers/vector_memory.h                |    30 +
 doc/doxygen/headers/vector_valued.h                |  1109 ++
 doc/doxygen/headers/vectors.h                      |    25 +
 doc/doxygen/images/cg-monitor-smoothing-0.png      |   Bin 0 -> 106547 bytes
 doc/doxygen/images/cg-monitor-smoothing-1.png      |   Bin 0 -> 86065 bytes
 doc/doxygen/images/cg-monitor-smoothing-2.png      |   Bin 0 -> 77339 bytes
 doc/doxygen/images/cg-monitor-smoothing-3.png      |   Bin 0 -> 71136 bytes
 doc/doxygen/images/cg-monitor-smoothing-4.png      |   Bin 0 -> 70150 bytes
 doc/doxygen/images/cg-monitor-smoothing-5.png      |   Bin 0 -> 68709 bytes
 doc/doxygen/images/cheese_2d.png                   |   Bin 0 -> 3361 bytes
 doc/doxygen/images/collaboration.eps               |   258 +
 doc/doxygen/images/collaboration.fig               |    72 +
 doc/doxygen/images/collaboration.png               |   Bin 0 -> 19409 bytes
 doc/doxygen/images/cone_2d.png                     |   Bin 0 -> 4547 bytes
 doc/doxygen/images/cone_3d.png                     |   Bin 0 -> 7590 bytes
 doc/doxygen/images/conflicting_constraints.fig     |    29 +
 doc/doxygen/images/conflicting_constraints.png     |   Bin 0 -> 2436 bytes
 doc/doxygen/images/cubes_hole.png                  |   Bin 0 -> 58571 bytes
 doc/doxygen/images/dgp_doesnt_contain_p.fig        |    20 +
 doc/doxygen/images/dgp_doesnt_contain_p.png        |   Bin 0 -> 3429 bytes
 doc/doxygen/images/direction_flag.fig              |    16 +
 doc/doxygen/images/direction_flag.png              |   Bin 0 -> 3502 bytes
 doc/doxygen/images/direction_flag_normals.fig      |    31 +
 doc/doxygen/images/direction_flag_normals.png      |   Bin 0 -> 4436 bytes
 doc/doxygen/images/distorted_2d.fig                |    15 +
 doc/doxygen/images/distorted_2d.png                |   Bin 0 -> 1708 bytes
 doc/doxygen/images/distorted_2d_refinement_01.fig  |    18 +
 doc/doxygen/images/distorted_2d_refinement_01.png  |   Bin 0 -> 1491 bytes
 doc/doxygen/images/distorted_2d_refinement_02.fig  |    22 +
 doc/doxygen/images/distorted_2d_refinement_02.png  |   Bin 0 -> 1727 bytes
 doc/doxygen/images/distorted_2d_refinement_03.fig  |    27 +
 doc/doxygen/images/distorted_2d_refinement_03.png  |   Bin 0 -> 1809 bytes
 doc/doxygen/images/distorted_3d.fig                |    30 +
 doc/doxygen/images/distorted_3d.png                |   Bin 0 -> 3836 bytes
 doc/doxygen/images/distributed_mesh_0.png          |   Bin 0 -> 41469 bytes
 doc/doxygen/images/distributed_mesh_1.png          |   Bin 0 -> 52087 bytes
 doc/doxygen/images/distributed_mesh_2.png          |   Bin 0 -> 56663 bytes
 doc/doxygen/images/distributed_mesh_3.png          |   Bin 0 -> 37749 bytes
 doc/doxygen/images/fe_q_bubbles_conditioning.png   |   Bin 0 -> 8062 bytes
 doc/doxygen/images/hanging_nodes.png               |   Bin 0 -> 2374 bytes
 doc/doxygen/images/hp-refinement-simple.png        |   Bin 0 -> 2067 bytes
 doc/doxygen/images/hyper_cross_2d.png              |   Bin 0 -> 973 bytes
 doc/doxygen/images/hyper_cross_3d.png              |   Bin 0 -> 7396 bytes
 doc/doxygen/images/hyper_cubes.png                 |   Bin 0 -> 881 bytes
 doc/doxygen/images/hyper_l.png                     |   Bin 0 -> 1040 bytes
 doc/doxygen/images/hyper_shell_12_cut.png          |   Bin 0 -> 24187 bytes
 doc/doxygen/images/hyper_shell_6_cross_plane.png   |   Bin 0 -> 1476 bytes
 doc/doxygen/images/hyper_shell_96_cut.png          |   Bin 0 -> 26070 bytes
 doc/doxygen/images/hypershell-all-3.png            |   Bin 0 -> 15618 bytes
 doc/doxygen/images/hypershell-all.png              |   Bin 0 -> 23984 bytes
 doc/doxygen/images/hypershell-boundary-only-3.png  |   Bin 0 -> 17431 bytes
 doc/doxygen/images/hypershell-boundary-only-4.png  |   Bin 0 -> 18134 bytes
 doc/doxygen/images/hypershell-boundary-only.png    |   Bin 0 -> 24795 bytes
 doc/doxygen/images/hypershell-nothing.png          |   Bin 0 -> 22298 bytes
 doc/doxygen/images/hypershell3d-12.png             |   Bin 0 -> 41992 bytes
 doc/doxygen/images/hypershell3d-6.png              |   Bin 0 -> 36600 bytes
 .../images/limit_level_difference_at_vertices.fig  |    27 +
 .../images/limit_level_difference_at_vertices.png  |   Bin 0 -> 1808 bytes
 doc/doxygen/images/logo200.png                     |   Bin 0 -> 13396 bytes
 doc/doxygen/images/multigrid.png                   |   Bin 0 -> 4761 bytes
 doc/doxygen/images/no_normal_flux_1.fig            |    25 +
 doc/doxygen/images/no_normal_flux_1.png            |   Bin 0 -> 3760 bytes
 doc/doxygen/images/no_normal_flux_2.fig            |    24 +
 doc/doxygen/images/no_normal_flux_2.png            |   Bin 0 -> 3782 bytes
 doc/doxygen/images/no_normal_flux_3.fig            |    28 +
 doc/doxygen/images/no_normal_flux_3.png            |   Bin 0 -> 4230 bytes
 doc/doxygen/images/no_normal_flux_4.fig            |    28 +
 doc/doxygen/images/no_normal_flux_4.png            |   Bin 0 -> 3715 bytes
 doc/doxygen/images/no_normal_flux_5.png            |   Bin 0 -> 161890 bytes
 doc/doxygen/images/no_normal_flux_6.png            |   Bin 0 -> 169290 bytes
 doc/doxygen/images/parameter_gui.png               |   Bin 0 -> 64956 bytes
 doc/doxygen/images/parameter_handler.fig           |    27 +
 doc/doxygen/images/parameter_handler.png           |   Bin 0 -> 28060 bytes
 .../images/parameter_handler_background.png        |   Bin 0 -> 22309 bytes
 .../images/reorder_sparsity_step_31_boost_cmk.png  |   Bin 0 -> 57509 bytes
 .../images/reorder_sparsity_step_31_boost_king.png |   Bin 0 -> 52614 bytes
 .../images/reorder_sparsity_step_31_boost_md.png   |   Bin 0 -> 116521 bytes
 .../images/reorder_sparsity_step_31_deal_cmk.png   |   Bin 0 -> 62688 bytes
 .../images/reorder_sparsity_step_31_downstream.png |   Bin 0 -> 69685 bytes
 .../images/reorder_sparsity_step_31_original.png   |   Bin 0 -> 67101 bytes
 .../images/reorder_sparsity_step_31_random.png     |   Bin 0 -> 28217 bytes
 doc/doxygen/images/simplex_2d.png                  |   Bin 0 -> 2813 bytes
 doc/doxygen/images/simplex_3d.png                  |   Bin 0 -> 1264 bytes
 doc/doxygen/images/sphere.png                      |   Bin 0 -> 39488 bytes
 doc/doxygen/images/sphere_section.png              |   Bin 0 -> 21744 bytes
 doc/doxygen/options.dox.in                         |   235 +
 doc/doxygen/scripts/code-gallery.pl                |    68 +
 doc/doxygen/scripts/create_anchors                 |    31 +
 doc/doxygen/scripts/filter                         |   125 +
 doc/doxygen/scripts/intro2toc                      |    67 +
 doc/doxygen/scripts/make_gallery.pl                |   164 +
 doc/doxygen/scripts/make_step.pl                   |    70 +
 doc/doxygen/scripts/mod_footer.pl.in               |    15 +
 doc/doxygen/scripts/mod_header.pl.in               |    15 +
 doc/doxygen/scripts/program2doxygen                |   121 +
 doc/doxygen/scripts/program2doxyplain              |    51 +
 doc/doxygen/scripts/program2html                   |    99 +
 doc/doxygen/scripts/program2plain                  |    28 +
 doc/doxygen/scripts/program2toc                    |    63 +
 doc/doxygen/scripts/steps.pl                       |   230 +
 doc/doxygen/scripts/validate-xrefs.pl              |   157 +
 doc/doxygen/stylesheet.css                         |    21 +
 doc/doxygen/tutorial/CMakeLists.txt                |   131 +
 doc/doxygen/tutorial/tutorial.h.in                 |   994 ++
 doc/external-libs/arpack.html                      |   110 +
 doc/external-libs/opencascade.html                 |    84 +
 doc/external-libs/p4est-setup.sh                   |   133 +
 doc/external-libs/p4est.html                       |    66 +
 doc/external-libs/petsc.html                       |   156 +
 doc/external-libs/slepc.html                       |   116 +
 doc/external-libs/trilinos.html                    |   165 +
 doc/index.html                                     |    39 +
 doc/navbar.html                                    |    51 +
 doc/news/1.0.0-vs-2.0.0.h                          |    36 +
 doc/news/2.0.0-vs-3.0.0.h                          |    82 +
 doc/news/3.0.0-vs-3.0.1.h                          |    76 +
 doc/news/3.0.0-vs-3.1.0.h                          |  1113 ++
 doc/news/3.1.0-vs-3.1.1.h                          |   181 +
 doc/news/3.1.0-vs-3.2.0.h                          |  1242 ++
 doc/news/3.1.1-vs-3.1.2.h                          |    40 +
 doc/news/3.2.0-vs-3.2.1.h                          |   137 +
 doc/news/3.2.0-vs-3.3.0.h                          |   543 +
 doc/news/3.3.0-vs-3.3.1.h                          |    44 +
 doc/news/3.3.0-vs-3.4.0.h                          |   380 +
 doc/news/3.4.0-vs-4.0.0.h                          |  1378 ++
 doc/news/4.0.0-vs-5.0.0.h                          |   872 ++
 doc/news/5.0.0-vs-5.1.0.h                          |   568 +
 doc/news/5.1.0-vs-5.2.0.h                          |  1013 ++
 doc/news/5.2.0-vs-6.0.0.h                          |  1728 +++
 doc/news/6.0.0-vs-6.1.0.h                          |   563 +
 doc/news/6.1.0-vs-6.2.0.h                          |  1384 ++
 doc/news/6.2.0-vs-6.2.1.h                          |    59 +
 doc/news/6.2.0-vs-6.3.0.h                          |  1101 ++
 doc/news/6.3.0-vs-6.3.1.h                          |   180 +
 doc/news/6.3.0-vs-7.0.0.h                          |   689 +
 doc/news/7.0.0-vs-7.1.0.h                          |   769 ++
 doc/news/7.1.0-vs-7.2.0.h                          |   764 ++
 doc/news/7.2.0-vs-7.3.0.h                          |   533 +
 doc/news/7.3.0-vs-8.0.0.h                          |   641 +
 doc/news/8.0.0-vs-8.1.0.h                          |   750 +
 doc/news/8.1.0-vs-8.2.0.h                          |  1254 ++
 doc/news/8.2.0-vs-8.2.1.h                          |    43 +
 doc/news/8.2.1-vs-8.3.0.h                          |  1150 ++
 doc/news/8.3.0-vs-8.4.0.h                          |  1168 ++
 doc/news/8.4.0-vs-8.4.1.h                          |    39 +
 doc/news/changes.h                                 |    62 +
 doc/pictures/background-grid.jpg                   |   Bin 0 -> 3530 bytes
 doc/pictures/deal.II-text.jpg                      |   Bin 0 -> 2226 bytes
 doc/pictures/fail.gif                              |   Bin 0 -> 88 bytes
 doc/pictures/grid.1.gif                            |   Bin 0 -> 3316 bytes
 doc/pictures/grid.4.gif                            |   Bin 0 -> 11406 bytes
 doc/pictures/hex.fig                               |   123 +
 doc/pictures/ok.gif                                |   Bin 0 -> 82 bytes
 doc/pictures/quad.fig                              |    39 +
 doc/pictures/title-background.jpg                  |   Bin 0 -> 1341 bytes
 doc/readme.html                                    |   831 ++
 doc/screen.css                                     |   336 +
 doc/title.html.in                                  |    17 +
 doc/users/CMakeLists.txt.sample                    |    11 +
 doc/users/CMakeLists.txt.sample2                   |    28 +
 doc/users/CMakeLists.txt.sample3                   |    41 +
 doc/users/cmake.html                               |  1022 ++
 doc/users/cmakelists.html                          |   887 ++
 doc/users/config.sample                            |   664 +
 doc/users/doxygen.html                             |    63 +
 doc/users/gdb.html                                 |    96 +
 doc/users/testsuite.html                           |   291 +
 examples/CMakeLists.txt                            |    50 +
 examples/README.md                                 |     5 +
 examples/doxygen/CMakeLists.txt                    |    29 +
 examples/doxygen/block_dynamic_sparsity_pattern.cc |    83 +
 examples/doxygen/block_matrix_array.cc             |   130 +
 examples/doxygen/product_matrix.cc                 |    64 +
 examples/doxygen/theta_timestepping.cc             |   132 +
 examples/step-1/CMakeLists.txt                     |    39 +
 examples/step-1/doc/builds-on                      |     1 +
 examples/step-1/doc/intro.dox                      |   193 +
 examples/step-1/doc/kind                           |     1 +
 examples/step-1/doc/results.dox                    |    82 +
 examples/step-1/doc/tooltip                        |     1 +
 examples/step-1/step-1.cc                          |   264 +
 examples/step-10/CMakeLists.txt                    |    39 +
 examples/step-10/doc/builds-on                     |     1 +
 examples/step-10/doc/intro.dox                     |    71 +
 examples/step-10/doc/kind                          |     1 +
 examples/step-10/doc/results.dox                   |   176 +
 examples/step-10/doc/tooltip                       |     1 +
 examples/step-10/step-10.cc                        |   457 +
 examples/step-11/CMakeLists.txt                    |    39 +
 examples/step-11/doc/builds-on                     |     1 +
 examples/step-11/doc/intro.dox                     |   110 +
 examples/step-11/doc/kind                          |     1 +
 examples/step-11/doc/results.dox                   |    44 +
 examples/step-11/doc/tooltip                       |     2 +
 examples/step-11/step-11.cc                        |   479 +
 examples/step-12/CMakeLists.txt                    |    39 +
 examples/step-12/doc/builds-on                     |     1 +
 examples/step-12/doc/intro.dox                     |    76 +
 examples/step-12/doc/kind                          |     1 +
 examples/step-12/doc/results.dox                   |   188 +
 examples/step-12/doc/tooltip                       |     1 +
 examples/step-12/step-12.cc                        |   663 +
 examples/step-13/CMakeLists.txt                    |    39 +
 examples/step-13/doc/builds-on                     |     1 +
 examples/step-13/doc/intro.dox                     |   194 +
 examples/step-13/doc/kind                          |     1 +
 examples/step-13/doc/results.dox                   |   189 +
 examples/step-13/doc/tooltip                       |     1 +
 examples/step-13/step-13.cc                        |  1552 +++
 examples/step-14/CMakeLists.txt                    |    39 +
 examples/step-14/doc/builds-on                     |     1 +
 examples/step-14/doc/intro.dox                     |   408 +
 examples/step-14/doc/kind                          |     1 +
 examples/step-14/doc/results.dox                   |   389 +
 examples/step-14/doc/tooltip                       |     1 +
 examples/step-14/step-14.cc                        |  3088 +++++
 examples/step-15/CMakeLists.txt                    |    39 +
 examples/step-15/doc/builds-on                     |     1 +
 examples/step-15/doc/intro.dox                     |   304 +
 examples/step-15/doc/kind                          |     1 +
 examples/step-15/doc/results.dox                   |   184 +
 examples/step-15/doc/tooltip                       |     1 +
 examples/step-15/step-15.cc                        |   752 +
 examples/step-16/CMakeLists.txt                    |    39 +
 examples/step-16/doc/builds-on                     |     1 +
 examples/step-16/doc/intro.dox                     |    65 +
 examples/step-16/doc/kind                          |     1 +
 examples/step-16/doc/results.dox                   |    83 +
 examples/step-16/doc/tooltip                       |     1 +
 examples/step-16/step-16.cc                        |   693 +
 examples/step-17/CMakeLists.txt                    |    51 +
 examples/step-17/doc/builds-on                     |     1 +
 examples/step-17/doc/intro.dox                     |   196 +
 examples/step-17/doc/kind                          |     1 +
 examples/step-17/doc/results.dox                   |   248 +
 examples/step-17/doc/tooltip                       |     1 +
 examples/step-17/step-17.cc                        |  1103 ++
 examples/step-18/CMakeLists.txt                    |    51 +
 examples/step-18/doc/builds-on                     |     1 +
 examples/step-18/doc/intro.dox                     |   601 +
 examples/step-18/doc/kind                          |     1 +
 examples/step-18/doc/results.dox                   |   548 +
 examples/step-18/doc/tooltip                       |     1 +
 examples/step-18/step-18.cc                        |  1939 +++
 examples/step-19/CMakeLists.txt                    |    42 +
 examples/step-19/doc/builds-on                     |     1 +
 examples/step-19/doc/intro.dox                     |   121 +
 examples/step-19/doc/kind                          |     1 +
 examples/step-19/doc/results.dox                   |   259 +
 examples/step-19/doc/tooltip                       |     1 +
 examples/step-19/step-19.cc                        |   534 +
 examples/step-2/CMakeLists.txt                     |    39 +
 examples/step-2/doc/builds-on                      |     1 +
 examples/step-2/doc/intro.dox                      |    99 +
 examples/step-2/doc/kind                           |     1 +
 examples/step-2/doc/results.dox                    |    75 +
 examples/step-2/doc/tooltip                        |     1 +
 examples/step-2/step-2.cc                          |   311 +
 examples/step-20/CMakeLists.txt                    |    39 +
 examples/step-20/doc/builds-on                     |     1 +
 examples/step-20/doc/intro.dox                     |   712 +
 examples/step-20/doc/kind                          |     1 +
 examples/step-20/doc/results.dox                   |   323 +
 examples/step-20/doc/tooltip                       |     1 +
 examples/step-20/step-20.cc                        |   968 ++
 examples/step-21/CMakeLists.txt                    |    39 +
 examples/step-21/doc/builds-on                     |     1 +
 examples/step-21/doc/intro.dox                     |   532 +
 examples/step-21/doc/kind                          |     1 +
 examples/step-21/doc/results.dox                   |   212 +
 examples/step-21/doc/tooltip                       |     1 +
 examples/step-21/step-21.cc                        |  1313 ++
 examples/step-22/CMakeLists.txt                    |    50 +
 examples/step-22/doc/builds-on                     |     1 +
 examples/step-22/doc/intro.dox                     |   786 ++
 examples/step-22/doc/kind                          |     1 +
 examples/step-22/doc/results.dox                   |   804 ++
 examples/step-22/doc/tooltip                       |     1 +
 examples/step-22/step-22.cc                        |  1032 ++
 examples/step-23/CMakeLists.txt                    |    39 +
 examples/step-23/doc/builds-on                     |     1 +
 examples/step-23/doc/intro.dox                     |   418 +
 examples/step-23/doc/kind                          |     1 +
 examples/step-23/doc/results.dox                   |   194 +
 examples/step-23/doc/tooltip                       |     1 +
 examples/step-23/step-23.cc                        |   672 +
 examples/step-24/CMakeLists.txt                    |    39 +
 examples/step-24/doc/builds-on                     |     1 +
 examples/step-24/doc/intro.dox                     |   322 +
 examples/step-24/doc/kind                          |     1 +
 examples/step-24/doc/project-1.tex                 |    91 +
 examples/step-24/doc/results.dox                   |   144 +
 examples/step-24/doc/tooltip                       |     1 +
 examples/step-24/step-24.cc                        |   595 +
 examples/step-25/CMakeLists.txt                    |    39 +
 examples/step-25/doc/animate.sh                    |    57 +
 examples/step-25/doc/builds-on                     |     1 +
 examples/step-25/doc/intro.dox                     |   275 +
 examples/step-25/doc/kind                          |     1 +
 examples/step-25/doc/plot.plt                      |    20 +
 examples/step-25/doc/results.dox                   |   143 +
 examples/step-25/doc/tooltip                       |     1 +
 examples/step-25/step-25.cc                        |   757 ++
 examples/step-26/CMakeLists.txt                    |    39 +
 examples/step-26/doc/builds-on                     |     1 +
 examples/step-26/doc/intro.dox                     |   439 +
 examples/step-26/doc/kind                          |     1 +
 examples/step-26/doc/results.dox                   |   122 +
 examples/step-26/doc/tooltip                       |     1 +
 examples/step-26/step-26.cc                        |   692 +
 examples/step-27/CMakeLists.txt                    |    39 +
 examples/step-27/doc/builds-on                     |     1 +
 examples/step-27/doc/intro.dox                     |   724 +
 examples/step-27/doc/kind                          |     1 +
 examples/step-27/doc/results.dox                   |   144 +
 examples/step-27/doc/tooltip                       |     1 +
 examples/step-27/step-27.cc                        |   880 ++
 examples/step-28/CMakeLists.txt                    |    39 +
 examples/step-28/doc/builds-on                     |     1 +
 examples/step-28/doc/data-q1                       |    10 +
 examples/step-28/doc/data-q2                       |    10 +
 examples/step-28/doc/data-q3                       |    10 +
 examples/step-28/doc/data-q4                       |    10 +
 examples/step-28/doc/data-q5                       |    10 +
 examples/step-28/doc/data-q6                       |    15 +
 examples/step-28/doc/gnuplot.1                     |    26 +
 examples/step-28/doc/intro.dox                     |   645 +
 examples/step-28/doc/kind                          |     1 +
 examples/step-28/doc/results.dox                   |    80 +
 examples/step-28/doc/tooltip                       |     1 +
 examples/step-28/step-28.cc                        |  1807 +++
 examples/step-29/CMakeLists.txt                    |    50 +
 examples/step-29/doc/builds-on                     |     1 +
 examples/step-29/doc/intro.dox                     |   196 +
 examples/step-29/doc/kind                          |     1 +
 examples/step-29/doc/results.dox                   |   188 +
 examples/step-29/doc/tooltip                       |     1 +
 examples/step-29/step-29.cc                        |   974 ++
 examples/step-29/step-29.prm                       |    28 +
 examples/step-3/CMakeLists.txt                     |    39 +
 examples/step-3/doc/builds-on                      |     1 +
 examples/step-3/doc/intro.dox                      |   387 +
 examples/step-3/doc/kind                           |     1 +
 examples/step-3/doc/results.dox                    |   218 +
 examples/step-3/doc/tooltip                        |     1 +
 examples/step-3/step-3.cc                          |   660 +
 examples/step-30/CMakeLists.txt                    |    39 +
 examples/step-30/doc/builds-on                     |     1 +
 examples/step-30/doc/intro.dox                     |   490 +
 examples/step-30/doc/kind                          |     1 +
 examples/step-30/doc/results.dox                   |   151 +
 examples/step-30/doc/tooltip                       |     1 +
 examples/step-30/step-30.cc                        |  1018 ++
 examples/step-31/CMakeLists.txt                    |    50 +
 examples/step-31/doc/builds-on                     |     1 +
 examples/step-31/doc/intro.dox                     |  1016 ++
 examples/step-31/doc/kind                          |     1 +
 examples/step-31/doc/results.dox                   |   614 +
 examples/step-31/doc/tooltip                       |     1 +
 examples/step-31/step-31.cc                        |  2247 +++
 examples/step-32/CMakeLists.txt                    |    57 +
 examples/step-32/doc/builds-on                     |     1 +
 examples/step-32/doc/intro.dox                     |  1310 ++
 examples/step-32/doc/kind                          |     1 +
 examples/step-32/doc/results.dox                   |   492 +
 examples/step-32/doc/tooltip                       |     1 +
 examples/step-32/step-32.cc                        |  3778 +++++
 examples/step-32/step-32.prm                       |    54 +
 examples/step-33/CMakeLists.txt                    |    53 +
 examples/step-33/doc/builds-on                     |     1 +
 examples/step-33/doc/intro.dox                     |   363 +
 examples/step-33/doc/kind                          |     1 +
 examples/step-33/doc/results.dox                   |   291 +
 examples/step-33/doc/tooltip                       |     1 +
 examples/step-33/input.prm                         |    84 +
 examples/step-33/slide.inp                         |  3560 +++++
 examples/step-33/step-33.cc                        |  2550 ++++
 examples/step-34/CMakeLists.txt                    |    50 +
 examples/step-34/coarse_circle.inp                 |    21 +
 examples/step-34/coarse_sphere.inp                 |    15 +
 examples/step-34/doc/builds-on                     |     1 +
 examples/step-34/doc/intro.dox                     |   687 +
 examples/step-34/doc/kind                          |     1 +
 examples/step-34/doc/results.dox                   |   219 +
 examples/step-34/doc/tooltip                       |     1 +
 examples/step-34/parameters.prm                    |    81 +
 examples/step-34/step-34.cc                        |  1131 ++
 examples/step-35/CMakeLists.txt                    |    50 +
 examples/step-35/doc/builds-on                     |     1 +
 examples/step-35/doc/intro.dox                     |   236 +
 examples/step-35/doc/kind                          |     1 +
 examples/step-35/doc/results.dox                   |   176 +
 examples/step-35/doc/tooltip                       |     1 +
 examples/step-35/nsbench2.inp                      |   187 +
 examples/step-35/parameter-file.prm                |    42 +
 examples/step-35/step-35.cc                        |  1455 ++
 examples/step-35/zigzag.inp                        |   102 +
 examples/step-36/CMakeLists.txt                    |    51 +
 examples/step-36/doc/builds-on                     |     1 +
 examples/step-36/doc/intro.dox                     |   269 +
 examples/step-36/doc/kind                          |     1 +
 examples/step-36/doc/results.dox                   |   252 +
 examples/step-36/doc/tooltip                       |     1 +
 examples/step-36/step-36.cc                        |   524 +
 examples/step-36/step-36.prm                       |    11 +
 examples/step-37/CMakeLists.txt                    |    50 +
 examples/step-37/doc/builds-on                     |     1 +
 examples/step-37/doc/intro.dox                     |   475 +
 examples/step-37/doc/kind                          |     1 +
 examples/step-37/doc/results.dox                   |   239 +
 examples/step-37/doc/tooltip                       |     1 +
 examples/step-37/step-37.cc                        |  1149 ++
 examples/step-38/CMakeLists.txt                    |    39 +
 examples/step-38/doc/builds-on                     |     1 +
 examples/step-38/doc/intro.dox                     |   233 +
 examples/step-38/doc/kind                          |     1 +
 examples/step-38/doc/results.dox                   |   161 +
 examples/step-38/doc/tooltip                       |     1 +
 examples/step-38/step-38.cc                        |   590 +
 examples/step-39/CMakeLists.txt                    |    39 +
 examples/step-39/doc/builds-on                     |     1 +
 examples/step-39/doc/intro.dox                     |    89 +
 examples/step-39/doc/kind                          |     1 +
 examples/step-39/doc/results.dox                   |    93 +
 examples/step-39/doc/tooltip                       |     1 +
 examples/step-39/output.reference.dat              |    13 +
 examples/step-39/postprocess.pl                    |    41 +
 examples/step-39/step-39.cc                        |   970 ++
 examples/step-4/CMakeLists.txt                     |    39 +
 examples/step-4/doc/builds-on                      |     1 +
 examples/step-4/doc/intro.dox                      |   141 +
 examples/step-4/doc/kind                           |     1 +
 examples/step-4/doc/results.dox                    |   110 +
 examples/step-4/doc/tooltip                        |     1 +
 examples/step-4/step-4.cc                          |   540 +
 examples/step-40/CMakeLists.txt                    |    55 +
 examples/step-40/doc/builds-on                     |     1 +
 examples/step-40/doc/intro.dox                     |   148 +
 examples/step-40/doc/kind                          |     1 +
 examples/step-40/doc/results.dox                   |   174 +
 examples/step-40/doc/tooltip                       |     1 +
 examples/step-40/step-40.cc                        |   743 +
 examples/step-41/CMakeLists.txt                    |    50 +
 examples/step-41/doc/builds-on                     |     1 +
 examples/step-41/doc/intro.dox                     |   418 +
 examples/step-41/doc/kind                          |     1 +
 examples/step-41/doc/results.dox                   |   255 +
 examples/step-41/doc/tooltip                       |     1 +
 examples/step-41/step-41.cc                        |   706 +
 examples/step-42/CMakeLists.txt                    |    58 +
 examples/step-42/doc/builds-on                     |     1 +
 examples/step-42/doc/intro.dox                     |   425 +
 examples/step-42/doc/kind                          |     1 +
 examples/step-42/doc/results.dox                   |   221 +
 examples/step-42/doc/tooltip                       |     1 +
 examples/step-42/obstacle.pbm                      | 13602 +++++++++++++++++++
 examples/step-42/p1_adaptive.prm                   |     6 +
 examples/step-42/p1_chinese.prm                    |     6 +
 examples/step-42/p1_global.prm                     |     6 +
 examples/step-42/p2_adaptive.prm                   |    25 +
 examples/step-42/p2_global.prm                     |     6 +
 examples/step-42/step-42.cc                        |  2248 +++
 examples/step-43/CMakeLists.txt                    |    50 +
 examples/step-43/doc/builds-on                     |     1 +
 examples/step-43/doc/intro.dox                     |   609 +
 examples/step-43/doc/kind                          |     1 +
 examples/step-43/doc/results.dox                   |   111 +
 examples/step-43/doc/tooltip                       |     1 +
 examples/step-43/step-43.cc                        |  2285 ++++
 examples/step-44/CMakeLists.txt                    |    39 +
 examples/step-44/doc/builds-on                     |     1 +
 examples/step-44/doc/intro.dox                     |   681 +
 examples/step-44/doc/kind                          |     1 +
 examples/step-44/doc/results.dox                   |   279 +
 examples/step-44/doc/tooltip                       |     1 +
 examples/step-44/parameters.prm                    |    71 +
 examples/step-44/step-44.cc                        |  3205 +++++
 examples/step-45/CMakeLists.txt                    |    54 +
 examples/step-45/doc/builds-on                     |     1 +
 examples/step-45/doc/intro.dox                     |   149 +
 examples/step-45/doc/kind                          |     1 +
 examples/step-45/doc/results.dox                   |    10 +
 examples/step-45/doc/tooltip                       |     1 +
 examples/step-45/step-45.cc                        |   797 ++
 examples/step-46/CMakeLists.txt                    |    50 +
 examples/step-46/doc/builds-on                     |     1 +
 examples/step-46/doc/intro.dox                     |   685 +
 examples/step-46/doc/kind                          |     1 +
 examples/step-46/doc/results.dox                   |   322 +
 examples/step-46/doc/step-46.layout.fig            |    43 +
 examples/step-46/doc/tooltip                       |     1 +
 examples/step-46/step-46.cc                        |  1133 ++
 examples/step-48/CMakeLists.txt                    |    39 +
 examples/step-48/doc/builds-on                     |     1 +
 examples/step-48/doc/intro.dox                     |   176 +
 examples/step-48/doc/kind                          |     1 +
 examples/step-48/doc/results.dox                   |   174 +
 examples/step-48/doc/tooltip                       |     1 +
 examples/step-48/step-48.cc                        |   664 +
 examples/step-49/CMakeLists.txt                    |    39 +
 examples/step-49/doc/builds-on                     |     1 +
 examples/step-49/doc/intro.dox                     |   294 +
 examples/step-49/doc/kind                          |     1 +
 examples/step-49/doc/results.dox                   |   382 +
 examples/step-49/doc/tooltip                       |     1 +
 examples/step-49/step-49.cc                        |   358 +
 examples/step-49/untitled.geo                      |    56 +
 examples/step-49/untitled.msh                      |  1150 ++
 examples/step-5/CMakeLists.txt                     |    39 +
 examples/step-5/TODO                               |     1 +
 examples/step-5/circle-grid.inp                    |    46 +
 examples/step-5/doc/builds-on                      |     1 +
 examples/step-5/doc/intro.dox                      |    74 +
 examples/step-5/doc/kind                           |     1 +
 examples/step-5/doc/results.dox                    |   162 +
 examples/step-5/doc/tooltip                        |     1 +
 examples/step-5/step-5.cc                          |   654 +
 examples/step-51/CMakeLists.txt                    |    39 +
 examples/step-51/doc/builds-on                     |     1 +
 examples/step-51/doc/intro.dox                     |   350 +
 examples/step-51/doc/kind                          |     1 +
 examples/step-51/doc/results.dox                   |   384 +
 examples/step-51/doc/tooltip                       |     1 +
 examples/step-51/step-51.cc                        |  1449 ++
 examples/step-52/CMakeLists.txt                    |    42 +
 examples/step-52/doc/builds-on                     |     1 +
 examples/step-52/doc/intro.dox                     |   251 +
 examples/step-52/doc/kind                          |     1 +
 examples/step-52/doc/results.dox                   |    40 +
 examples/step-52/doc/tooltip                       |     1 +
 examples/step-52/step-52.cc                        |   764 ++
 examples/step-53/CMakeLists.txt                    |    50 +
 examples/step-53/doc/builds-on                     |     1 +
 examples/step-53/doc/intro.dox                     |   371 +
 examples/step-53/doc/kind                          |     1 +
 examples/step-53/doc/results.dox                   |   282 +
 examples/step-53/doc/tooltip                       |     1 +
 examples/step-53/step-53.cc                        |   480 +
 examples/step-53/topography.license                |   227 +
 examples/step-53/topography.txt.gz                 |   Bin 0 -> 332893 bytes
 examples/step-54/CMakeLists.txt                    |    50 +
 examples/step-54/DTMB-5415_bulbous_bow.iges        |   549 +
 examples/step-54/doc/builds-on                     |     1 +
 examples/step-54/doc/intro.dox                     |   190 +
 examples/step-54/doc/kind                          |     1 +
 examples/step-54/doc/results.dox                   |    92 +
 examples/step-54/doc/tooltip                       |     1 +
 examples/step-54/input/initial_mesh_3d.vtk         |    17 +
 examples/step-54/step-54.cc                        |   472 +
 examples/step-6/CMakeLists.txt                     |    39 +
 examples/step-6/doc/builds-on                      |     1 +
 examples/step-6/doc/intro.dox                      |    72 +
 examples/step-6/doc/kind                           |     1 +
 examples/step-6/doc/results.dox                    |   488 +
 examples/step-6/doc/tooltip                        |     1 +
 examples/step-6/step-6.cc                          |   808 ++
 examples/step-7/CMakeLists.txt                     |    39 +
 examples/step-7/doc/builds-on                      |     1 +
 examples/step-7/doc/intro.dox                      |   221 +
 examples/step-7/doc/kind                           |     1 +
 examples/step-7/doc/results.dox                    |   218 +
 examples/step-7/doc/tooltip                        |     1 +
 examples/step-7/step-7.cc                          |  1404 ++
 examples/step-8/CMakeLists.txt                     |    39 +
 examples/step-8/doc/builds-on                      |     1 +
 examples/step-8/doc/intro.dox                      |   355 +
 examples/step-8/doc/kind                           |     1 +
 examples/step-8/doc/results.dox                    |    58 +
 examples/step-8/doc/tooltip                        |     1 +
 examples/step-8/step-8.cc                          |   810 ++
 examples/step-9/CMakeLists.txt                     |    39 +
 examples/step-9/doc/builds-on                      |     1 +
 examples/step-9/doc/intro.dox                      |   292 +
 examples/step-9/doc/kind                           |     1 +
 examples/step-9/doc/results.dox                    |    49 +
 examples/step-9/doc/tooltip                        |     1 +
 examples/step-9/step-9.cc                          |  1363 ++
 include/CMakeLists.txt                             |    45 +
 include/deal.II/algorithms/any_data.h              |   480 +
 include/deal.II/algorithms/named_selection.h       |   119 +
 include/deal.II/algorithms/newton.h                |   174 +
 include/deal.II/algorithms/newton.templates.h      |   206 +
 include/deal.II/algorithms/operator.h              |   163 +
 include/deal.II/algorithms/operator.templates.h    |    74 +
 include/deal.II/algorithms/theta_timestepping.h    |   395 +
 .../algorithms/theta_timestepping.templates.h      |   135 +
 include/deal.II/algorithms/timestep_control.h      |   297 +
 include/deal.II/base/aligned_vector.h              |   888 ++
 include/deal.II/base/array_view.h                  |   441 +
 include/deal.II/base/auto_derivative_function.h    |   233 +
 include/deal.II/base/complex_overloads.h           |    84 +
 include/deal.II/base/conditional_ostream.h         |   180 +
 include/deal.II/base/config.h.in                   |   334 +
 include/deal.II/base/convergence_table.h           |   225 +
 include/deal.II/base/data_out_base.h               |  2756 ++++
 include/deal.II/base/derivative_form.h             |   484 +
 include/deal.II/base/event.h                       |   300 +
 include/deal.II/base/exceptions.h                  |  1103 ++
 include/deal.II/base/flow_function.h               |   291 +
 include/deal.II/base/function.h                    |   866 ++
 include/deal.II/base/function.templates.h          |   815 ++
 include/deal.II/base/function_bessel.h             |    60 +
 include/deal.II/base/function_derivative.h         |   138 +
 include/deal.II/base/function_lib.h                |  1342 ++
 include/deal.II/base/function_parser.h             |   384 +
 include/deal.II/base/function_time.h               |   124 +
 include/deal.II/base/function_time.templates.h     |    56 +
 include/deal.II/base/geometry_info.h               |  2663 ++++
 include/deal.II/base/graph_coloring.h              |   553 +
 include/deal.II/base/index_set.h                   |  1610 +++
 include/deal.II/base/iterator_range.h              |   330 +
 include/deal.II/base/job_identifier.h              |    81 +
 include/deal.II/base/logstream.h                   |   670 +
 include/deal.II/base/memory_consumption.h          |   389 +
 include/deal.II/base/mg_level_object.h             |   212 +
 include/deal.II/base/mpi.h                         |   763 ++
 include/deal.II/base/multithread_info.h            |   136 +
 include/deal.II/base/numbers.h                     |   360 +
 include/deal.II/base/parallel.h                    |   785 ++
 include/deal.II/base/parameter_handler.h           |  2444 ++++
 include/deal.II/base/parsed_function.h             |   216 +
 include/deal.II/base/partitioner.h                 |   552 +
 include/deal.II/base/path_search.h                 |   280 +
 include/deal.II/base/point.h                       |   546 +
 include/deal.II/base/polynomial.h                  |   677 +
 include/deal.II/base/polynomial_space.h            |   433 +
 include/deal.II/base/polynomials_abf.h             |   188 +
 include/deal.II/base/polynomials_adini.h           |   135 +
 include/deal.II/base/polynomials_bdm.h             |   231 +
 include/deal.II/base/polynomials_bernstein.h       |    64 +
 include/deal.II/base/polynomials_nedelec.h         |   154 +
 include/deal.II/base/polynomials_p.h               |   112 +
 include/deal.II/base/polynomials_piecewise.h       |   227 +
 include/deal.II/base/polynomials_rannacher_turek.h |   208 +
 include/deal.II/base/polynomials_raviart_thomas.h  |   160 +
 include/deal.II/base/qprojector.h                  |   428 +
 include/deal.II/base/quadrature.h                  |   409 +
 include/deal.II/base/quadrature_lib.h              |   659 +
 include/deal.II/base/quadrature_selector.h         |   104 +
 include/deal.II/base/revision.h.in                 |    34 +
 include/deal.II/base/sacado_product_type.h         |   166 +
 include/deal.II/base/signaling_nan.h               |   209 +
 include/deal.II/base/smartpointer.h                |   469 +
 include/deal.II/base/std_cxx11/array.h             |    49 +
 include/deal.II/base/std_cxx11/bind.h              |    92 +
 .../deal.II/base/std_cxx11/condition_variable.h    |    53 +
 include/deal.II/base/std_cxx11/function.h          |    49 +
 include/deal.II/base/std_cxx11/iterator.h          |    44 +
 include/deal.II/base/std_cxx11/mutex.h             |    49 +
 include/deal.II/base/std_cxx11/shared_ptr.h        |    52 +
 include/deal.II/base/std_cxx11/thread.h            |    52 +
 include/deal.II/base/std_cxx11/tuple.h             |    72 +
 include/deal.II/base/std_cxx11/type_traits.h       |    91 +
 include/deal.II/base/std_cxx11/unique_ptr.h        |    79 +
 include/deal.II/base/std_cxx1x/array.h             |    22 +
 include/deal.II/base/std_cxx1x/bind.h              |    22 +
 .../deal.II/base/std_cxx1x/condition_variable.h    |    22 +
 include/deal.II/base/std_cxx1x/function.h          |    22 +
 include/deal.II/base/std_cxx1x/mutex.h             |    22 +
 include/deal.II/base/std_cxx1x/shared_ptr.h        |    22 +
 include/deal.II/base/std_cxx1x/thread.h            |    22 +
 include/deal.II/base/std_cxx1x/tuple.h             |    22 +
 include/deal.II/base/std_cxx1x/type_traits.h       |    22 +
 include/deal.II/base/subscriptor.h                 |   215 +
 include/deal.II/base/symmetric_tensor.h            |  3156 +++++
 include/deal.II/base/synchronous_iterator.h        |   272 +
 include/deal.II/base/table.h                       |  3187 +++++
 include/deal.II/base/table_handler.h               |   883 ++
 include/deal.II/base/table_indices.h               |   281 +
 include/deal.II/base/template_constraints.h        |   626 +
 include/deal.II/base/tensor.h                      |  1983 +++
 include/deal.II/base/tensor_accessors.h            |   772 ++
 include/deal.II/base/tensor_base.h                 |     6 +
 include/deal.II/base/tensor_deprecated.h           |   522 +
 include/deal.II/base/tensor_function.h             |   171 +
 include/deal.II/base/tensor_function.templates.h   |   164 +
 include/deal.II/base/tensor_product_polynomials.h  |   659 +
 .../base/tensor_product_polynomials_bubbles.h      |   350 +
 .../base/tensor_product_polynomials_const.h        |   214 +
 include/deal.II/base/thread_local_storage.h        |   283 +
 include/deal.II/base/thread_management.h           |  4030 ++++++
 include/deal.II/base/time_stepping.h               |   591 +
 include/deal.II/base/time_stepping.templates.h     |   837 ++
 include/deal.II/base/timer.h                       |   749 +
 include/deal.II/base/types.h                       |   261 +
 include/deal.II/base/utilities.h                   |   710 +
 include/deal.II/base/vector_slice.h                |   279 +
 include/deal.II/base/vectorization.h               |  2670 ++++
 include/deal.II/base/work_stream.h                 |  1269 ++
 include/deal.II/distributed/grid_refinement.h      |   111 +
 include/deal.II/distributed/shared_tria.h          |   206 +
 include/deal.II/distributed/solution_transfer.h    |   252 +
 include/deal.II/distributed/tria.h                 |  1132 ++
 include/deal.II/distributed/tria_base.h            |   225 +
 include/deal.II/dofs/block_info.h                  |   316 +
 include/deal.II/dofs/dof_accessor.h                |  1708 +++
 include/deal.II/dofs/dof_accessor.templates.h      |  3584 +++++
 include/deal.II/dofs/dof_faces.h                   |   196 +
 include/deal.II/dofs/dof_handler.h                 |  1344 ++
 include/deal.II/dofs/dof_handler_policy.h          |   228 +
 include/deal.II/dofs/dof_iterator_selector.h       |   175 +
 include/deal.II/dofs/dof_levels.h                  |   155 +
 include/deal.II/dofs/dof_objects.h                 |   222 +
 include/deal.II/dofs/dof_renumbering.h             |  1130 ++
 include/deal.II/dofs/dof_tools.h                   |  2450 ++++
 include/deal.II/dofs/function_map.h                |    86 +
 include/deal.II/dofs/number_cache.h                |   113 +
 include/deal.II/fe/block_mask.h                    |   413 +
 include/deal.II/fe/component_mask.h                |   430 +
 include/deal.II/fe/fe.h                            |  2983 ++++
 include/deal.II/fe/fe_abf.h                        |   255 +
 include/deal.II/fe/fe_base.h                       |   683 +
 include/deal.II/fe/fe_bdm.h                        |   123 +
 include/deal.II/fe/fe_bernstein.h                  |   192 +
 include/deal.II/fe/fe_dg_vector.h                  |   244 +
 include/deal.II/fe/fe_dg_vector.templates.h        |   128 +
 include/deal.II/fe/fe_dgp.h                        |   541 +
 include/deal.II/fe/fe_dgp_monomial.h               |   473 +
 include/deal.II/fe/fe_dgp_nonparametric.h          |   667 +
 include/deal.II/fe/fe_dgq.h                        |   402 +
 include/deal.II/fe/fe_face.h                       |   463 +
 include/deal.II/fe/fe_nedelec.h                    |   340 +
 include/deal.II/fe/fe_nothing.h                    |   268 +
 include/deal.II/fe/fe_poly.h                       |   451 +
 include/deal.II/fe/fe_poly.templates.h             |   536 +
 include/deal.II/fe/fe_poly_face.h                  |   224 +
 include/deal.II/fe/fe_poly_face.templates.h        |   196 +
 include/deal.II/fe/fe_poly_tensor.h                |   437 +
 include/deal.II/fe/fe_q.h                          |   573 +
 include/deal.II/fe/fe_q_base.h                     |   342 +
 include/deal.II/fe/fe_q_bubbles.h                  |   205 +
 include/deal.II/fe/fe_q_dg0.h                      |   351 +
 include/deal.II/fe/fe_q_hierarchical.h             |   838 ++
 include/deal.II/fe/fe_q_iso_q1.h                   |   159 +
 include/deal.II/fe/fe_rannacher_turek.h            |   109 +
 include/deal.II/fe/fe_raviart_thomas.h             |   348 +
 include/deal.II/fe/fe_system.h                     |  1088 ++
 include/deal.II/fe/fe_tools.h                      |  1061 ++
 include/deal.II/fe/fe_trace.h                      |   163 +
 include/deal.II/fe/fe_update_flags.h               |   584 +
 include/deal.II/fe/fe_values.h                     |  4704 +++++++
 include/deal.II/fe/fe_values_extractors.h          |   285 +
 include/deal.II/fe/mapping.h                       |  1181 ++
 include/deal.II/fe/mapping_c1.h                    |   133 +
 include/deal.II/fe/mapping_cartesian.h             |   268 +
 include/deal.II/fe/mapping_fe_field.h              |   610 +
 include/deal.II/fe/mapping_q.h                     |   375 +
 include/deal.II/fe/mapping_q1.h                    |   107 +
 include/deal.II/fe/mapping_q1_eulerian.h           |   190 +
 include/deal.II/fe/mapping_q_eulerian.h            |   279 +
 include/deal.II/fe/mapping_q_generic.h             |   782 ++
 include/deal.II/grid/cell_id.h                     |   178 +
 include/deal.II/grid/filtered_iterator.h           |  1227 ++
 include/deal.II/grid/grid_generator.h              |  1138 ++
 include/deal.II/grid/grid_in.h                     |   634 +
 include/deal.II/grid/grid_out.h                    |  1629 +++
 include/deal.II/grid/grid_refinement.h             |   362 +
 include/deal.II/grid/grid_reordering.h             |   699 +
 include/deal.II/grid/grid_reordering_internal.h    |   635 +
 include/deal.II/grid/grid_tools.h                  |  1688 +++
 include/deal.II/grid/intergrid_map.h               |   210 +
 include/deal.II/grid/magic_numbers.h               |    45 +
 include/deal.II/grid/manifold.h                    |   587 +
 include/deal.II/grid/manifold_lib.h                |   296 +
 include/deal.II/grid/persistent_tria.h             |   260 +
 include/deal.II/grid/tria.h                        |  3633 +++++
 include/deal.II/grid/tria_accessor.h               |  3191 +++++
 include/deal.II/grid/tria_accessor.templates.h     |  3559 +++++
 include/deal.II/grid/tria_boundary.h               |   489 +
 include/deal.II/grid/tria_boundary_lib.h           |   766 ++
 include/deal.II/grid/tria_faces.h                  |   182 +
 include/deal.II/grid/tria_iterator.h               |  1240 ++
 include/deal.II/grid/tria_iterator.templates.h     |   575 +
 include/deal.II/grid/tria_iterator_base.h          |    51 +
 include/deal.II/grid/tria_iterator_selector.h      |   208 +
 include/deal.II/grid/tria_levels.h                 |   317 +
 include/deal.II/grid/tria_object.h                 |   212 +
 include/deal.II/grid/tria_objects.h                |   874 ++
 include/deal.II/hp/dof_faces.h                     |   639 +
 include/deal.II/hp/dof_handler.h                   |  1082 ++
 include/deal.II/hp/dof_level.h                     |   422 +
 include/deal.II/hp/fe_collection.h                 |   622 +
 include/deal.II/hp/fe_values.h                     |   648 +
 include/deal.II/hp/mapping_collection.h            |   168 +
 include/deal.II/hp/q_collection.h                  |   230 +
 include/deal.II/integrators/advection.h            |   732 +
 include/deal.II/integrators/divergence.h           |   629 +
 include/deal.II/integrators/elasticity.h           |   417 +
 include/deal.II/integrators/l2.h                   |   260 +
 include/deal.II/integrators/laplace.h              |   635 +
 include/deal.II/integrators/local_integrators.h    |   153 +
 include/deal.II/integrators/maxwell.h              |   462 +
 include/deal.II/integrators/patches.h              |    65 +
 include/deal.II/lac/arpack_solver.h                |   553 +
 include/deal.II/lac/block_indices.h                |   449 +
 include/deal.II/lac/block_linear_operator.h        |   850 ++
 include/deal.II/lac/block_matrix.h                 |   132 +
 include/deal.II/lac/block_matrix_array.h           |   634 +
 include/deal.II/lac/block_matrix_base.h            |  2677 ++++
 include/deal.II/lac/block_sparse_matrix.h          |   532 +
 .../deal.II/lac/block_sparse_matrix.templates.h    |   212 +
 include/deal.II/lac/block_sparse_matrix_ez.h       |   541 +
 .../deal.II/lac/block_sparse_matrix_ez.templates.h |   183 +
 include/deal.II/lac/block_sparsity_pattern.h       |   999 ++
 include/deal.II/lac/block_vector.h                 |   537 +
 include/deal.II/lac/block_vector.templates.h       |   221 +
 include/deal.II/lac/block_vector_base.h            |  2243 +++
 include/deal.II/lac/chunk_sparse_matrix.h          |  2085 +++
 .../deal.II/lac/chunk_sparse_matrix.templates.h    |  1639 +++
 include/deal.II/lac/chunk_sparsity_pattern.h       |  1205 ++
 .../deal.II/lac/compressed_set_sparsity_pattern.h  |    38 +
 .../lac/compressed_simple_sparsity_pattern.h       |    51 +
 include/deal.II/lac/compressed_sparsity_pattern.h  |    38 +
 include/deal.II/lac/constrained_linear_operator.h  |   321 +
 include/deal.II/lac/constraint_matrix.h            |  1808 +++
 include/deal.II/lac/constraint_matrix.templates.h  |  2733 ++++
 include/deal.II/lac/dynamic_sparsity_pattern.h     |  1106 ++
 include/deal.II/lac/eigen.h                        |   433 +
 include/deal.II/lac/exceptions.h                   |    72 +
 include/deal.II/lac/filtered_matrix.h              |  1019 ++
 include/deal.II/lac/full_matrix.h                  |  1566 +++
 include/deal.II/lac/full_matrix.templates.h        |  1865 +++
 include/deal.II/lac/generic_linear_algebra.h       |   227 +
 include/deal.II/lac/householder.h                  |   321 +
 include/deal.II/lac/identity_matrix.h              |   260 +
 include/deal.II/lac/iterative_inverse.h            |   180 +
 include/deal.II/lac/lapack_full_matrix.h           |   837 ++
 include/deal.II/lac/lapack_support.h               |   162 +
 include/deal.II/lac/lapack_templates.h             |  1016 ++
 include/deal.II/lac/linear_operator.h              |  1121 ++
 include/deal.II/lac/matrix_block.h                 |  1118 ++
 include/deal.II/lac/matrix_iterator.h              |   227 +
 include/deal.II/lac/matrix_lib.h                   |   754 +
 include/deal.II/lac/matrix_lib.templates.h         |   208 +
 include/deal.II/lac/matrix_out.h                   |   412 +
 include/deal.II/lac/packaged_operation.h           |   835 ++
 include/deal.II/lac/parallel_block_vector.h        |  1012 ++
 include/deal.II/lac/parallel_vector.h              |  2438 ++++
 include/deal.II/lac/parallel_vector.templates.h    |   810 ++
 include/deal.II/lac/parpack_solver.h               |   870 ++
 include/deal.II/lac/petsc_block_sparse_matrix.h    |   341 +
 include/deal.II/lac/petsc_block_vector.h           |   466 +
 include/deal.II/lac/petsc_full_matrix.h            |   111 +
 include/deal.II/lac/petsc_matrix_base.h            |  1559 +++
 include/deal.II/lac/petsc_matrix_free.h            |   311 +
 .../lac/petsc_parallel_block_sparse_matrix.h       |   358 +
 include/deal.II/lac/petsc_parallel_block_vector.h  |   545 +
 include/deal.II/lac/petsc_parallel_sparse_matrix.h |   457 +
 include/deal.II/lac/petsc_parallel_vector.h        |   600 +
 include/deal.II/lac/petsc_precondition.h           |   950 ++
 include/deal.II/lac/petsc_solver.h                 |  1012 ++
 include/deal.II/lac/petsc_sparse_matrix.h          |   278 +
 include/deal.II/lac/petsc_vector.h                 |   401 +
 include/deal.II/lac/petsc_vector_base.h            |  1274 ++
 include/deal.II/lac/pointer_matrix.h               |   900 ++
 include/deal.II/lac/precondition.h                 |  2001 +++
 include/deal.II/lac/precondition_block.h           |  1102 ++
 include/deal.II/lac/precondition_block.templates.h |  1067 ++
 include/deal.II/lac/precondition_block_base.h      |   679 +
 include/deal.II/lac/precondition_selector.h        |   297 +
 include/deal.II/lac/relaxation_block.h             |   477 +
 include/deal.II/lac/relaxation_block.templates.h   |   299 +
 include/deal.II/lac/schur_complement.h             |   503 +
 include/deal.II/lac/schur_matrix.h                 |   344 +
 include/deal.II/lac/shifted_matrix.h               |   279 +
 include/deal.II/lac/slepc_solver.h                 |   825 ++
 .../deal.II/lac/slepc_spectral_transformation.h    |   300 +
 include/deal.II/lac/solver.h                       |   547 +
 include/deal.II/lac/solver_bicgstab.h              |   502 +
 include/deal.II/lac/solver_cg.h                    |   662 +
 include/deal.II/lac/solver_control.h               |   633 +
 include/deal.II/lac/solver_gmres.h                 |  1223 ++
 include/deal.II/lac/solver_minres.h                |   387 +
 include/deal.II/lac/solver_qmrs.h                  |   429 +
 include/deal.II/lac/solver_relaxation.h            |   164 +
 include/deal.II/lac/solver_richardson.h            |   385 +
 include/deal.II/lac/solver_selector.h              |   361 +
 include/deal.II/lac/sparse_decomposition.h         |   424 +
 .../deal.II/lac/sparse_decomposition.templates.h   |   235 +
 include/deal.II/lac/sparse_direct.h                |   352 +
 include/deal.II/lac/sparse_ilu.h                   |   165 +
 include/deal.II/lac/sparse_ilu.templates.h         |   303 +
 include/deal.II/lac/sparse_matrix.h                |  2425 ++++
 include/deal.II/lac/sparse_matrix.templates.h      |  2000 +++
 include/deal.II/lac/sparse_matrix_ez.h             |  1592 +++
 include/deal.II/lac/sparse_matrix_ez.templates.h   |   615 +
 include/deal.II/lac/sparse_mic.h                   |   181 +
 include/deal.II/lac/sparse_mic.templates.h         |   216 +
 include/deal.II/lac/sparse_vanka.h                 |   604 +
 include/deal.II/lac/sparse_vanka.templates.h       |   671 +
 include/deal.II/lac/sparsity_pattern.h             |  1569 +++
 include/deal.II/lac/sparsity_tools.h               |   260 +
 include/deal.II/lac/swappable_vector.h             |   218 +
 include/deal.II/lac/swappable_vector.templates.h   |   258 +
 include/deal.II/lac/transpose_matrix.h             |   208 +
 include/deal.II/lac/tridiagonal_matrix.h           |   401 +
 include/deal.II/lac/trilinos_block_sparse_matrix.h |   573 +
 include/deal.II/lac/trilinos_block_vector.h        |   501 +
 .../deal.II/lac/trilinos_parallel_block_vector.h   |   525 +
 include/deal.II/lac/trilinos_precondition.h        |  1998 +++
 include/deal.II/lac/trilinos_solver.h              |   660 +
 include/deal.II/lac/trilinos_sparse_matrix.h       |  2704 ++++
 include/deal.II/lac/trilinos_sparsity_pattern.h    |  1529 +++
 include/deal.II/lac/trilinos_vector.h              |  1046 ++
 include/deal.II/lac/trilinos_vector_base.h         |  2019 +++
 include/deal.II/lac/vector.h                       |  1465 ++
 include/deal.II/lac/vector.templates.h             |  2078 +++
 include/deal.II/lac/vector_memory.h                |   396 +
 include/deal.II/lac/vector_memory.templates.h      |   210 +
 include/deal.II/lac/vector_view.h                  |   313 +
 include/deal.II/matrix_free/dof_info.h             |   558 +
 include/deal.II/matrix_free/dof_info.templates.h   |  2009 +++
 include/deal.II/matrix_free/fe_evaluation.h        |  6757 +++++++++
 include/deal.II/matrix_free/helper_functions.h     |   182 +
 .../deal.II/matrix_free/mapping_data_on_the_fly.h  |   340 +
 include/deal.II/matrix_free/mapping_info.h         |   380 +
 .../deal.II/matrix_free/mapping_info.templates.h   |   926 ++
 include/deal.II/matrix_free/matrix_free.h          |  2551 ++++
 .../deal.II/matrix_free/matrix_free.templates.h    |  1078 ++
 include/deal.II/matrix_free/operators.h            |   213 +
 include/deal.II/matrix_free/shape_info.h           |   257 +
 include/deal.II/matrix_free/shape_info.templates.h |   419 +
 include/deal.II/meshworker/assembler.h             |  1237 ++
 include/deal.II/meshworker/dof_info.h              |   478 +
 include/deal.II/meshworker/dof_info.templates.h    |    60 +
 include/deal.II/meshworker/functional.h            |   272 +
 include/deal.II/meshworker/integration_info.h      |   867 ++
 .../meshworker/integration_info.templates.h        |   231 +
 include/deal.II/meshworker/local_integrator.h      |   146 +
 include/deal.II/meshworker/local_results.h         |   673 +
 include/deal.II/meshworker/loop.h                  |   455 +
 include/deal.II/meshworker/output.h                |   240 +
 include/deal.II/meshworker/simple.h                |  1163 ++
 include/deal.II/meshworker/vector_selector.h       |   538 +
 .../deal.II/meshworker/vector_selector.templates.h |   246 +
 include/deal.II/multigrid/mg_base.h                |   235 +
 include/deal.II/multigrid/mg_block_smoother.h      |   313 +
 include/deal.II/multigrid/mg_coarse.h              |   357 +
 include/deal.II/multigrid/mg_constrained_dofs.h    |   228 +
 include/deal.II/multigrid/mg_dof_accessor.h        |    22 +
 include/deal.II/multigrid/mg_matrix.h              |   349 +
 include/deal.II/multigrid/mg_smoother.h            |  1057 ++
 include/deal.II/multigrid/mg_tools.h               |   228 +
 include/deal.II/multigrid/mg_transfer.h            |   557 +
 include/deal.II/multigrid/mg_transfer.templates.h  |   551 +
 include/deal.II/multigrid/mg_transfer_block.h      |   461 +
 .../multigrid/mg_transfer_block.templates.h        |   151 +
 include/deal.II/multigrid/mg_transfer_component.h  |   378 +
 .../multigrid/mg_transfer_component.templates.h    |   223 +
 .../deal.II/multigrid/mg_transfer_matrix_free.h    |   237 +
 include/deal.II/multigrid/multigrid.h              |   554 +
 include/deal.II/multigrid/multigrid.templates.h    |   424 +
 .../deal.II/multigrid/sparse_matrix_collection.h   |   123 +
 .../numerics/data_component_interpretation.h       |    68 +
 include/deal.II/numerics/data_out.h                |   309 +
 include/deal.II/numerics/data_out_dof_data.h       |   971 ++
 include/deal.II/numerics/data_out_faces.h          |   238 +
 include/deal.II/numerics/data_out_rotation.h       |   209 +
 include/deal.II/numerics/data_out_stack.h          |   350 +
 include/deal.II/numerics/data_postprocessor.h      |   354 +
 .../deal.II/numerics/derivative_approximation.h    |   310 +
 include/deal.II/numerics/dof_output_operator.h     |    80 +
 .../numerics/dof_output_operator.templates.h       |    69 +
 include/deal.II/numerics/dof_print_solver_step.h   |   131 +
 include/deal.II/numerics/error_estimator.h         |   777 ++
 .../deal.II/numerics/error_estimator.templates.h   |  1292 ++
 include/deal.II/numerics/fe_field_function.h       |   468 +
 .../deal.II/numerics/fe_field_function.templates.h |   595 +
 include/deal.II/numerics/histogram.h               |   231 +
 .../deal.II/numerics/matrix_creator.templates.h    |  1894 +++
 include/deal.II/numerics/matrix_tools.h            |   957 ++
 include/deal.II/numerics/point_value_history.h     |   661 +
 include/deal.II/numerics/solution_transfer.h       |   505 +
 include/deal.II/numerics/time_dependent.h          |  1586 +++
 include/deal.II/numerics/vector_tools.h            |  2423 ++++
 include/deal.II/numerics/vector_tools.templates.h  |  7276 ++++++++++
 include/deal.II/opencascade/boundary_lib.h         |   333 +
 include/deal.II/opencascade/utilities.h            |   379 +
 source/CMakeLists.txt                              |   144 +
 source/algorithms/CMakeLists.txt                   |    32 +
 source/algorithms/operator.cc                      |    57 +
 source/algorithms/operator.inst.in                 |    22 +
 source/algorithms/timestep_control.cc              |   144 +
 source/base/CMakeLists.txt                         |    88 +
 source/base/auto_derivative_function.cc            |   347 +
 source/base/conditional_ostream.cc                 |    40 +
 source/base/config.cc                              |    41 +
 source/base/convergence_table.cc                   |   247 +
 source/base/data_out_base.cc                       |  7512 ++++++++++
 source/base/data_out_base.inst.in                  |   142 +
 source/base/event.cc                               |    74 +
 source/base/exceptions.cc                          |   375 +
 source/base/flow_function.cc                       |   778 ++
 source/base/function.cc                            |    23 +
 source/base/function.inst.in                       |    37 +
 source/base/function_derivative.cc                 |   225 +
 source/base/function_lib.cc                        |  2760 ++++
 source/base/function_lib_cutoff.cc                 |   327 +
 source/base/function_parser.cc                     |   475 +
 source/base/function_time.cc                       |    24 +
 source/base/function_time.inst.in                  |    24 +
 source/base/geometry_info.cc                       |  1972 +++
 source/base/index_set.cc                           |   558 +
 source/base/job_identifier.cc                      |    71 +
 source/base/logstream.cc                           |   528 +
 source/base/mpi.cc                                 |   549 +
 source/base/multithread_info.cc                    |   222 +
 source/base/named_selection.cc                     |    30 +
 source/base/parallel.cc                            |    60 +
 source/base/parameter_handler.cc                   |  3054 +++++
 source/base/parsed_function.cc                     |   185 +
 source/base/partitioner.cc                         |   394 +
 source/base/path_search.cc                         |   256 +
 source/base/polynomial.cc                          |  1375 ++
 source/base/polynomial_space.cc                    |   410 +
 source/base/polynomials_abf.cc                     |   155 +
 source/base/polynomials_adini.cc                   |   246 +
 source/base/polynomials_bdm.cc                     |   423 +
 source/base/polynomials_bernstein.cc               |    58 +
 source/base/polynomials_bernstein.inst             |     2 +
 source/base/polynomials_nedelec.cc                 |  1415 ++
 source/base/polynomials_p.cc                       |   114 +
 source/base/polynomials_piecewise.cc               |   146 +
 source/base/polynomials_rannacher_turek.cc         |   186 +
 source/base/polynomials_rannacher_turek.inst.in    |    22 +
 source/base/polynomials_raviart_thomas.cc          |   174 +
 source/base/quadrature.cc                          |  1782 +++
 source/base/quadrature_lib.cc                      |  1379 ++
 source/base/quadrature_selector.cc                 |    79 +
 source/base/subscriptor.cc                         |   227 +
 source/base/symmetric_tensor.cc                    |   129 +
 source/base/table_handler.cc                       |   749 +
 source/base/tensor_function.cc                     |    23 +
 source/base/tensor_function.inst.in                |    29 +
 source/base/tensor_product_polynomials.cc          |   679 +
 source/base/tensor_product_polynomials_bubbles.cc  |   294 +
 source/base/tensor_product_polynomials_const.cc    |   160 +
 source/base/thread_management.cc                   |   275 +
 source/base/time_stepping.cc                       |    34 +
 source/base/time_stepping.inst.in                  |    47 +
 source/base/timer.cc                               |   610 +
 source/base/utilities.cc                           |   849 ++
 source/distributed/CMakeLists.txt                  |    39 +
 source/distributed/grid_refinement.cc              |   627 +
 source/distributed/grid_refinement.inst.in         |    79 +
 source/distributed/shared_tria.cc                  |   192 +
 source/distributed/shared_tria.inst.in             |    36 +
 source/distributed/solution_transfer.cc            |   268 +
 source/distributed/solution_transfer.inst.in       |    59 +
 source/distributed/tria.cc                         |  5188 +++++++
 source/distributed/tria.inst.in                    |    70 +
 source/distributed/tria_base.cc                    |   248 +
 source/distributed/tria_base.inst.in               |    33 +
 source/dofs/CMakeLists.txt                         |    53 +
 source/dofs/block_info.cc                          |    77 +
 source/dofs/block_info.inst.in                     |    32 +
 source/dofs/dof_accessor.cc                        |    91 +
 source/dofs/dof_accessor.inst.in                   |   166 +
 source/dofs/dof_accessor_get.cc                    |   176 +
 source/dofs/dof_accessor_get.inst.in               |    77 +
 source/dofs/dof_accessor_set.cc                    |   125 +
 source/dofs/dof_accessor_set.inst.in               |    77 +
 source/dofs/dof_faces.cc                           |    50 +
 source/dofs/dof_handler.cc                         |  1713 +++
 source/dofs/dof_handler.inst.in                    |   164 +
 source/dofs/dof_handler_policy.cc                  |  2901 ++++
 source/dofs/dof_handler_policy.inst.in             |    47 +
 source/dofs/dof_objects.cc                         |    73 +
 source/dofs/dof_objects.inst.in                    |    84 +
 source/dofs/dof_renumbering.cc                     |  1953 +++
 source/dofs/dof_renumbering.inst.in                |   323 +
 source/dofs/dof_tools.cc                           |  2366 ++++
 source/dofs/dof_tools.inst.in                      |   836 ++
 source/dofs/dof_tools_constraints.cc               |  3350 +++++
 source/dofs/dof_tools_constraints.inst.in          |   130 +
 source/dofs/dof_tools_sparsity.cc                  |  1189 ++
 source/dofs/dof_tools_sparsity.inst.in             |   307 +
 source/dofs/number_cache.cc                        |    54 +
 source/dummy.cc                                    |    24 +
 source/fe/CMakeLists.txt                           |   111 +
 source/fe/block_mask.cc                            |    53 +
 source/fe/component_mask.cc                        |    53 +
 source/fe/fe.cc                                    |  1252 ++
 source/fe/fe.inst.in                               |    24 +
 source/fe/fe_abf.cc                                |   657 +
 source/fe/fe_abf.inst.in                           |    22 +
 source/fe/fe_bdm.cc                                |   392 +
 source/fe/fe_bdm.inst.in                           |    22 +
 source/fe/fe_bernstein.cc                          |   312 +
 source/fe/fe_bernstein.inst.in                     |    23 +
 source/fe/fe_data.cc                               |    88 +
 source/fe/fe_dg_vector.cc                          |   106 +
 source/fe/fe_dg_vector.inst.in                     |    29 +
 source/fe/fe_dgp.cc                                |   269 +
 source/fe/fe_dgp.inst.in                           |    25 +
 source/fe/fe_dgp_monomial.cc                       |   458 +
 source/fe/fe_dgp_monomial.inst.in                  |    22 +
 source/fe/fe_dgp_nonparametric.cc                  |   597 +
 source/fe/fe_dgp_nonparametric.inst.in             |    22 +
 source/fe/fe_dgq.cc                                |   894 ++
 source/fe/fe_dgq.inst.in                           |    35 +
 source/fe/fe_face.cc                               |   783 ++
 source/fe/fe_face.inst.in                          |    28 +
 source/fe/fe_nedelec.cc                            |  5597 ++++++++
 source/fe/fe_nedelec.inst.in                       |    22 +
 source/fe/fe_nothing.cc                            |   287 +
 source/fe/fe_nothing.inst.in                       |    24 +
 source/fe/fe_poly.cc                               |   266 +
 source/fe/fe_poly.inst.in                          |    30 +
 source/fe/fe_poly_tensor.cc                        |  1811 +++
 source/fe/fe_poly_tensor.inst.in                   |    25 +
 source/fe/fe_q.cc                                  |   137 +
 source/fe/fe_q.inst.in                             |    25 +
 source/fe/fe_q_base.cc                             |  1575 +++
 source/fe/fe_q_base.inst.in                        |    28 +
 source/fe/fe_q_bubbles.cc                          |   527 +
 source/fe/fe_q_bubbles.inst.in                     |    24 +
 source/fe/fe_q_dg0.cc                              |   339 +
 source/fe/fe_q_dg0.inst.in                         |    25 +
 source/fe/fe_q_hierarchical.cc                     |  2133 +++
 source/fe/fe_q_hierarchical.inst.in                |    22 +
 source/fe/fe_q_iso_q1.cc                           |   123 +
 source/fe/fe_q_iso_q1.inst.in                      |    25 +
 source/fe/fe_rannacher_turek.cc                    |   175 +
 source/fe/fe_rannacher_turek.inst.in               |    22 +
 source/fe/fe_raviart_thomas.cc                     |   566 +
 source/fe/fe_raviart_thomas.inst.in                |    22 +
 source/fe/fe_raviart_thomas_nodal.cc               |   783 ++
 source/fe/fe_raviart_thomas_nodal.inst.in          |    22 +
 source/fe/fe_system.cc                             |  3041 +++++
 source/fe/fe_system.inst.in                        |    24 +
 source/fe/fe_tools.cc                              |  2111 +++
 source/fe/fe_tools.inst.in                         |   185 +
 source/fe/fe_tools_interpolate.cc                  |   835 ++
 source/fe/fe_tools_interpolate.inst.in             |   106 +
 source/fe/fe_trace.cc                              |   252 +
 source/fe/fe_trace.inst.in                         |    19 +
 source/fe/fe_values.cc                             |  4259 ++++++
 source/fe/fe_values.decl.1.inst.in                 |    31 +
 source/fe/fe_values.decl.2.inst.in                 |    31 +
 source/fe/fe_values.impl.1.inst.in                 |    28 +
 source/fe/fe_values.impl.2.inst.in                 |    27 +
 source/fe/fe_values.inst.in                        |   558 +
 source/fe/fe_values_inst2.cc                       |    20 +
 source/fe/mapping.cc                               |   111 +
 source/fe/mapping.inst.in                          |    25 +
 source/fe/mapping_c1.cc                            |   221 +
 source/fe/mapping_c1.inst.in                       |    22 +
 source/fe/mapping_cartesian.cc                     |  1075 ++
 source/fe/mapping_cartesian.inst.in                |    22 +
 source/fe/mapping_fe_field.cc                      |  1965 +++
 source/fe/mapping_fe_field.inst.in                 |    25 +
 source/fe/mapping_q.cc                             |   516 +
 source/fe/mapping_q.inst.in                        |    25 +
 source/fe/mapping_q1.cc                            |    71 +
 source/fe/mapping_q1.inst.in                       |    24 +
 source/fe/mapping_q1_eulerian.cc                   |   149 +
 source/fe/mapping_q1_eulerian.inst.in              |    40 +
 source/fe/mapping_q_eulerian.cc                    |   265 +
 source/fe/mapping_q_eulerian.inst.in               |    42 +
 source/fe/mapping_q_generic.cc                     |  3894 ++++++
 source/fe/mapping_q_generic.inst.in                |    23 +
 source/grid/CMakeLists.txt                         |    60 +
 source/grid/cell_id.cc                             |    30 +
 source/grid/grid_generator.cc                      |  4336 ++++++
 source/grid/grid_generator.inst.in                 |   175 +
 source/grid/grid_in.cc                             |  3432 +++++
 source/grid/grid_in.inst.in                        |    23 +
 source/grid/grid_out.cc                            |  3977 ++++++
 source/grid/grid_out.inst.in                       |    97 +
 source/grid/grid_refinement.cc                     |   588 +
 source/grid/grid_refinement.inst.in                |   125 +
 source/grid/grid_reordering.cc                     |  1718 +++
 source/grid/grid_tools.cc                          |  3769 +++++
 source/grid/grid_tools.inst.in                     |   374 +
 source/grid/intergrid_map.cc                       |   215 +
 source/grid/intergrid_map.inst.in                  |    23 +
 source/grid/manifold.cc                            |   318 +
 source/grid/manifold.inst.in                       |    31 +
 source/grid/manifold_lib.cc                        |   297 +
 source/grid/manifold_lib.inst.in                   |    30 +
 source/grid/persistent_tria.cc                     |   255 +
 source/grid/tria.cc                                | 13297 ++++++++++++++++++
 source/grid/tria.inst.in                           |    26 +
 source/grid/tria_accessor.cc                       |  2281 ++++
 source/grid/tria_accessor.inst.in                  |    93 +
 source/grid/tria_boundary.cc                       |   907 ++
 source/grid/tria_boundary.inst.in                  |    27 +
 source/grid/tria_boundary_lib.cc                   |  1493 ++
 source/grid/tria_boundary_lib.inst.in              |    34 +
 source/grid/tria_faces.cc                          |    52 +
 source/grid/tria_levels.cc                         |   211 +
 source/grid/tria_objects.cc                        |   472 +
 source/grid/tria_objects.inst.in                   |    23 +
 source/hp/CMakeLists.txt                           |    39 +
 source/hp/dof_faces.cc                             |    78 +
 source/hp/dof_handler.cc                           |  3389 +++++
 source/hp/dof_handler.inst.in                      |    89 +
 source/hp/dof_level.cc                             |   244 +
 source/hp/fe_collection.cc                         |   420 +
 source/hp/fe_collection.inst.in                    |    30 +
 source/hp/fe_values.cc                             |   476 +
 source/hp/fe_values.inst.in                        |   102 +
 source/hp/mapping_collection.cc                    |   117 +
 source/hp/mapping_collection.inst.in               |    34 +
 source/integrators/CMakeLists.txt                  |    25 +
 source/lac/CMakeLists.txt                          |   134 +
 source/lac/block_matrix_array.cc                   |   447 +
 source/lac/block_sparse_matrix.cc                  |    23 +
 source/lac/block_sparse_matrix.inst.in             |    19 +
 source/lac/block_sparse_matrix_ez.cc               |    26 +
 source/lac/block_sparsity_pattern.cc               |   695 +
 source/lac/block_vector.cc                         |    33 +
 source/lac/block_vector.inst.in                    |    39 +
 source/lac/chunk_sparse_matrix.cc                  |    21 +
 source/lac/chunk_sparse_matrix.inst.in             |   228 +
 source/lac/chunk_sparsity_pattern.cc               |   641 +
 source/lac/constraint_matrix.cc                    |  1414 ++
 source/lac/constraint_matrix.inst.in               |    69 +
 source/lac/dynamic_sparsity_pattern.cc             |   494 +
 source/lac/full_matrix.cc                          |    57 +
 source/lac/full_matrix.inst.in                     |   237 +
 source/lac/lapack_full_matrix.cc                   |  1057 ++
 source/lac/lapack_full_matrix.inst.in              |    31 +
 source/lac/matrix_lib.cc                           |   169 +
 source/lac/matrix_out.cc                           |    51 +
 source/lac/parallel_vector.cc                      |    24 +
 source/lac/parallel_vector.inst.in                 |    39 +
 source/lac/petsc_block_sparse_matrix.cc            |    82 +
 source/lac/petsc_full_matrix.cc                    |    80 +
 source/lac/petsc_matrix_base.cc                    |   663 +
 source/lac/petsc_matrix_free.cc                    |   291 +
 source/lac/petsc_parallel_block_sparse_matrix.cc   |   144 +
 source/lac/petsc_parallel_block_vector.cc          |    64 +
 source/lac/petsc_parallel_sparse_matrix.cc         |   899 ++
 source/lac/petsc_parallel_vector.cc                |   450 +
 source/lac/petsc_precondition.cc                   |   755 +
 source/lac/petsc_solver.cc                         |   917 ++
 source/lac/petsc_sparse_matrix.cc                  |   356 +
 source/lac/petsc_vector.cc                         |   131 +
 source/lac/petsc_vector_base.cc                    |  1203 ++
 source/lac/precondition_block.cc                   |    21 +
 source/lac/precondition_block.inst.in              |    89 +
 source/lac/precondition_block_ez.cc                |   204 +
 source/lac/relaxation_block.cc                     |    21 +
 source/lac/relaxation_block.inst.in                |    53 +
 source/lac/slepc_solver.cc                         |   457 +
 source/lac/slepc_spectral_transformation.cc        |   172 +
 source/lac/solver.cc                               |    33 +
 source/lac/solver.inst.in                          |    21 +
 source/lac/solver_control.cc                       |   334 +
 source/lac/sparse_decomposition.cc                 |    41 +
 source/lac/sparse_direct.cc                        |   514 +
 source/lac/sparse_ilu.cc                           |    51 +
 source/lac/sparse_matrix.cc                        |    26 +
 source/lac/sparse_matrix.inst.in                   |   294 +
 source/lac/sparse_matrix_ez.cc                     |    21 +
 source/lac/sparse_matrix_ez.inst.in                |    57 +
 source/lac/sparse_matrix_inst2.cc                  |    27 +
 source/lac/sparse_mic.cc                           |    53 +
 source/lac/sparse_vanka.cc                         |    39 +
 source/lac/sparsity_pattern.cc                     |  1023 ++
 source/lac/sparsity_tools.cc                       |   776 ++
 source/lac/swappable_vector.cc                     |    24 +
 source/lac/tridiagonal_matrix.cc                   |   272 +
 source/lac/trilinos_block_sparse_matrix.cc         |   444 +
 source/lac/trilinos_block_vector.cc                |   428 +
 source/lac/trilinos_parallel_block_vector.cc       |    52 +
 source/lac/trilinos_precondition.cc                |   722 +
 source/lac/trilinos_precondition_ml.cc             |   345 +
 source/lac/trilinos_precondition_muelu.cc          |   330 +
 source/lac/trilinos_solver.cc                      |   569 +
 source/lac/trilinos_sparse_matrix.cc               |  2546 ++++
 source/lac/trilinos_sparse_matrix.inst.in          |    49 +
 source/lac/trilinos_sparsity_pattern.cc            |  1199 ++
 source/lac/trilinos_vector.cc                      |   757 ++
 source/lac/trilinos_vector_base.cc                 |   552 +
 source/lac/trilinos_vector_base.inst.in            |    23 +
 source/lac/vector.cc                               |    98 +
 source/lac/vector.inst.in                          |    79 +
 source/lac/vector_memory.cc                        |    34 +
 source/lac/vector_memory.inst.in                   |    31 +
 source/lac/vector_view.cc                          |    22 +
 source/lac/vector_view.inst.in                     |    26 +
 source/matrix_free/CMakeLists.txt                  |    31 +
 source/matrix_free/matrix_free.cc                  |    31 +
 source/matrix_free/matrix_free.inst.in             |    46 +
 source/meshworker/CMakeLists.txt                   |    34 +
 source/meshworker/mesh_worker.cc                   |   123 +
 source/meshworker/mesh_worker_info.cc              |    34 +
 source/meshworker/mesh_worker_info.inst.in         |    58 +
 source/meshworker/mesh_worker_vector_selector.cc   |    33 +
 .../meshworker/mesh_worker_vector_selector.inst.in |    35 +
 source/multigrid/CMakeLists.txt                    |    45 +
 source/multigrid/mg_base.cc                        |    54 +
 source/multigrid/mg_base.inst.in                   |    23 +
 source/multigrid/mg_level_global_transfer.cc       |   588 +
 source/multigrid/mg_level_global_transfer.inst.in  |    80 +
 source/multigrid/mg_tools.cc                       |  1575 +++
 source/multigrid/mg_tools.inst.in                  |   128 +
 source/multigrid/mg_transfer_block.cc              |   630 +
 source/multigrid/mg_transfer_block.inst.in         |   134 +
 source/multigrid/mg_transfer_component.cc          |   689 +
 source/multigrid/mg_transfer_component.inst.in     |   100 +
 source/multigrid/mg_transfer_matrix_free.cc        |   947 ++
 source/multigrid/mg_transfer_matrix_free.inst.in   |    21 +
 source/multigrid/mg_transfer_prebuilt.cc           |   297 +
 source/multigrid/mg_transfer_prebuilt.inst.in      |    28 +
 source/multigrid/multigrid.cc                      |   296 +
 source/multigrid/multigrid.inst.in                 |    20 +
 source/numerics/CMakeLists.txt                     |    87 +
 source/numerics/data_out.cc                        |   539 +
 source/numerics/data_out.inst.in                   |    40 +
 source/numerics/data_out_dof_data.cc               |  1369 ++
 source/numerics/data_out_dof_data.inst.in          |   430 +
 source/numerics/data_out_faces.cc                  |   430 +
 source/numerics/data_out_faces.inst.in             |    24 +
 source/numerics/data_out_rotation.cc               |   512 +
 source/numerics/data_out_rotation.inst.in          |    22 +
 source/numerics/data_out_stack.cc                  |   478 +
 source/numerics/data_out_stack.inst.in             |    53 +
 source/numerics/data_postprocessor.cc              |   166 +
 source/numerics/data_postprocessor.inst.in         |    22 +
 source/numerics/derivative_approximation.cc        |  1143 ++
 source/numerics/derivative_approximation.inst.in   |   133 +
 source/numerics/dof_output_operator.cc             |    40 +
 source/numerics/dof_output_operator.inst.in        |    20 +
 source/numerics/error_estimator.cc                 |    24 +
 source/numerics/error_estimator.inst.in            |   157 +
 source/numerics/error_estimator_1d.cc              |   456 +
 source/numerics/error_estimator_1d.inst.in         |   142 +
 source/numerics/error_estimator_inst2.cc           |    24 +
 source/numerics/fe_field_function.cc               |    38 +
 source/numerics/fe_field_function.inst.in          |    27 +
 source/numerics/histogram.cc                       |   348 +
 source/numerics/matrix_creator.cc                  |    28 +
 source/numerics/matrix_creator.inst.in             |   315 +
 source/numerics/matrix_creator_inst2.cc            |    20 +
 source/numerics/matrix_creator_inst3.cc            |    20 +
 source/numerics/matrix_tools.cc                    |   615 +
 source/numerics/matrix_tools.inst.in               |    35 +
 source/numerics/matrix_tools_once.cc               |   523 +
 source/numerics/point_value_history.cc             |  1358 ++
 source/numerics/point_value_history.inst.in        |    59 +
 source/numerics/solution_transfer.cc               |   576 +
 source/numerics/solution_transfer.inst.in          |    26 +
 source/numerics/solution_transfer_inst2.cc         |    20 +
 source/numerics/solution_transfer_inst3.cc         |    20 +
 source/numerics/solution_transfer_inst4.cc         |    20 +
 source/numerics/time_dependent.cc                  |  1242 ++
 source/numerics/time_dependent.inst.in             |    26 +
 source/numerics/vector_tools_boundary.cc           |    24 +
 source/numerics/vector_tools_boundary.inst.in      |   187 +
 source/numerics/vector_tools_constraints.cc        |    24 +
 source/numerics/vector_tools_constraints.inst.in   |    94 +
 .../numerics/vector_tools_integrate_difference.cc  |    24 +
 .../vector_tools_integrate_difference.inst.in      |   116 +
 source/numerics/vector_tools_interpolate.cc        |    24 +
 source/numerics/vector_tools_interpolate.inst.in   |   120 +
 source/numerics/vector_tools_mean_value.cc         |    24 +
 source/numerics/vector_tools_mean_value.inst.in    |    49 +
 source/numerics/vector_tools_point_gradient.cc     |    24 +
 .../numerics/vector_tools_point_gradient.inst.in   |    80 +
 source/numerics/vector_tools_point_value.cc        |    24 +
 source/numerics/vector_tools_point_value.inst.in   |   164 +
 source/numerics/vector_tools_project.cc            |    24 +
 source/numerics/vector_tools_project.inst.in       |    69 +
 source/numerics/vector_tools_rhs.cc                |    85 +
 source/numerics/vector_tools_rhs.inst.in           |   107 +
 source/opencascade/CMakeLists.txt                  |    33 +
 source/opencascade/boundary_lib.cc                 |   263 +
 source/opencascade/boundary_lib.inst.in            |    28 +
 source/opencascade/utilities.cc                    |   638 +
 tests/CMakeLists.txt                               |   145 +
 tests/quick_tests/CMakeLists.txt                   |   121 +
 tests/quick_tests/affinity.cc                      |   114 +
 tests/quick_tests/mpi.cc                           |    71 +
 tests/quick_tests/p4est.cc                         |    83 +
 tests/quick_tests/run.cmake                        |    64 +
 tests/quick_tests/step-petsc.cc                    |   218 +
 tests/quick_tests/step-slepc.cc                    |   237 +
 tests/quick_tests/step.cc                          |   252 +
 tests/quick_tests/tbb.cc                           |    87 +
 2081 files changed, 632802 insertions(+)

diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..9d41856
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,160 @@
+deal.II Authors
+===============
+
+   deal.II is a global project. It is administered by a group of principal
+   developers. Technical decisions are made by the principal developers
+   and a group of developers consisting of long-term contributors with a
+   global overview of the library. A large number of contributors have
+   provided substantial patches over the years. These three groups are
+   listed below.
+
+Principal developers
+--------------------
+
+     * Wolfgang Bangerth, Texas A&M University, TX, USA
+     * Timo Heister, Clemson University, SC, USA
+     * Guido Kanschat, Universität Heidelberg, Germany
+     * Matthias Maier, University of Minnesota, Minneapolis, MN, USA
+
+Developers
+----------
+
+     * Denis Davydov, University of Erlangen-Nuremberg, Germany
+     * Luca Heltai, SISSA, Trieste, Italy
+     * Martin Kronbichler, Technische Universität München, Germany
+     * Bruno Turcksin, Texas A&M University, TX, USA
+     * David Wells, Rensselaer Polytechnic Institute, NY, USA
+     * Toby D. Young, Polish Academy of Sciences, Poland
+
+Contributors
+------------
+
+   Many people have contributed to deal.II over the years, some of them
+   very substantial parts of the library. Their work is greatly
+   appreciated: no open source project can survice without a community.
+   The following people contributed major parts of the library (in
+   alphabetical order), with many more that have sent in fixes and small
+   enhancements:
+
+Mihai Alexe
+Moritz Allmaras
+Fahad Alrashed
+Michael Anderson
+Juan Carlos Araujo Cabarcas
+Nicola Cavallini
+Daniel Arndt
+Andrew Baker
+Mauro Bardelloni
+Alistair Bentley
+Andrea Bonito
+Marco Borelli
+Benjamin Brands
+Claire Bruna-Rosso
+Markus Bürg
+John Burnell
+Krysztof Bzowski
+Praveen C
+Brian Carnes
+Jordan Cazamias
+Conrad Clevenger
+Ivan Christov
+Chih-Che Chueh
+Minh Do-Quang
+Kevin Drzycimski
+Kevin Dugan
+Marco Engelhard
+Patrick Esser
+Anton Evgrafov
+Menno Fraters
+Jörg Frohne
+Thomas Geenen
+Rene Gassmoeller
+Martin Genet
+Arezou Ghesmati
+Nicola Giuliani
+Christian Goll
+Alexander Grayver
+Ryan Grove
+Felix Gruber
+Maien Hamed
+Ralf Hartmann
+Eric Heien
+Christoph Heiniger
+Michael F. Herbst
+Dakshina Ilangovan
+Bärbel Janssen
+Armin Ghajar Jazi
+Xing Jin
+Oliver Kayser-Herold
+Seungil Kim
+Benjamin Shelton Kirk
+Angela Klewinghaus
+Uwe Köcher
+Aslan Kosakian
+Adam Kosik
+Katharina Kormann
+Lukas Korous
+Giorgos Kourakos
+Oleh Krehel
+Ross Kynch
+Konstantin Ladutenko
+Damien Lebrun-Grandie
+Tobias Leicht
+Yan Li
+Manuel Quezada de Luna
+Vijay Mahadevan
+Cataldo Manigrasso
+Andre Massing
+Andrew McBride
+Craig Michoski
+Scott Miller
+Andrea Mola
+Helmut Müller
+Jose Javier Munoz Criollo
+Stefan Nauber
+David Neckels
+M. Sebastian Pauletti
+Jean-Paul Pelteret
+Jonathan Pitt
+Guiseppe Pitton
+Fernando Posada
+Adam Powell IV
+Florian Prill
+Andreas Putz
+Lei Qiao
+Daniel Castanon Quiroz
+Michael Rapson
+Thomas Richter
+Gennadiy Rishin
+Angel Rodriguez
+Ian Rose
+Shiva Rudraraju
+Mayank Sabharwal
+Abner Salgado-Gonzalez
+Alberto Sartori
+Anna Schneebeli
+Jan Schrage
+Ralf B. Schulz
+Daniel Shapero
+Jason Sheldon
+Florian Sonner
+Michael Stadler
+Jan Stebel
+Martin Steigemann
+Simon Sticko
+Franz-Theo Suttmeier
+Zhen Tao
+Marco Tezzele
+Habib Talavatifard
+Ben Thompson
+Christophe Trophime
+Heikki Virtanen
+Kainan Wang
+Yaqi Wang
+Sven Wetterauer
+Daniel Weygand
+Joshua White
+Michal Wichrowski
+Christian Wülker
+Yuhan Zhou
+Valentin Zingan
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..b425ee1
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,163 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+##                                                                    ##
+#            The cmake build system for the deal.II project            #
+#                                                                      #
+#    See doc/readme.html and doc/development/cmake.html for further    #
+#    details on how to use the cmake build system of deal.II.          #
+##                                                                    ##
+
+########################################################################
+#                                                                      #
+#                            Configuration:                            #
+#                                                                      #
+########################################################################
+
+#
+# General configuration for cmake:
+#
+MESSAGE(STATUS "This is CMake ${CMAKE_VERSION}")
+MESSAGE(STATUS "")
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+#
+# We support all policy changes up to version 3.1.0. Thus, explicitly set
+# all policies CMP0001 - CMP0054 to new for version 3.1 (and later) to
+# avoid some unnecessary warnings.
+#
+IF( "${CMAKE_VERSION}" VERSION_EQUAL "3.1" OR
+    "${CMAKE_VERSION}" VERSION_GREATER "3.1" )
+  CMAKE_POLICY(VERSION 3.1.0)
+ENDIF()
+
+IF(POLICY CMP0037)
+  # allow to override "test" target for quick tests
+  CMAKE_POLICY(SET CMP0037 OLD)
+ENDIF()
+
+LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+
+#
+# Load all macros:
+#
+FILE(GLOB _macro_files "cmake/macros/*.cmake")
+MESSAGE(STATUS "Include ${CMAKE_SOURCE_DIR}/cmake/setup_external_macros.cmake")
+INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_external_macros.cmake)
+FOREACH(_file ${_macro_files})
+  MESSAGE(STATUS "Include ${_file}")
+  INCLUDE(${_file})
+ENDFOREACH()
+
+#
+# Check for the existence of various optional folders:
+#
+IF(EXISTS ${CMAKE_SOURCE_DIR}/bundled/CMakeLists.txt)
+  SET(DEAL_II_HAVE_BUNDLED_DIRECTORY TRUE)
+ENDIF()
+
+IF(EXISTS ${CMAKE_SOURCE_DIR}/doc/CMakeLists.txt)
+  SET(DEAL_II_HAVE_DOC_DIRECTORY TRUE)
+ENDIF()
+
+IF(EXISTS ${CMAKE_SOURCE_DIR}/tests/CMakeLists.txt)
+  SET(DEAL_II_HAVE_TESTS_DIRECTORY TRUE)
+ENDIF()
+
+#
+# We have to initialize some cached variables before PROJECT is called, so
+# do it at this point:
+#
+VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_cached_variables.cmake)
+
+#
+# Now, set the project and set up the rest:
+#
+PROJECT(deal.II CXX C)
+ENABLE_LANGUAGE_OPTIONAL(Fortran)
+
+VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_deal_ii.cmake)
+
+VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_compiler_flags.cmake)
+
+#
+# Include information about bundled libraries:
+#
+IF(DEAL_II_HAVE_BUNDLED_DIRECTORY)
+  VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/bundled/setup_bundled.cmake)
+ENDIF()
+
+#
+# Run all system checks:
+#
+FILE(GLOB _check_files "cmake/checks/*.cmake")
+LIST(SORT _check_files)
+FOREACH(_file ${_check_files})
+  VERBOSE_INCLUDE(${_file})
+ENDFOREACH()
+
+#
+# Feature configuration:
+#
+FILE(GLOB _configure_files "cmake/configure/configure_*.cmake")
+LIST(SORT _configure_files) # make sure to include in alphabetical order
+FOREACH(_file ${_configure_files})
+  VERBOSE_INCLUDE(${_file})
+ENDFOREACH()
+
+#
+# Finalize the configuration:
+#
+VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_cpack.cmake)
+VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_custom_targets.cmake)
+VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_finalize.cmake)
+VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_write_config.cmake)
+
+########################################################################
+#                                                                      #
+#                     Compilation and installation:                    #
+#                                                                      #
+########################################################################
+
+MESSAGE(STATUS "")
+MESSAGE(STATUS "Configuring done. Proceed to target definitions now.")
+
+ADD_SUBDIRECTORY(cmake/scripts)
+ADD_SUBDIRECTORY(include)
+
+IF(DEAL_II_HAVE_DOC_DIRECTORY)
+  ADD_SUBDIRECTORY(doc) # has to be included after include
+ENDIF()
+
+IF(DEAL_II_HAVE_BUNDLED_DIRECTORY)
+  ADD_SUBDIRECTORY(bundled)
+ENDIF()
+ADD_SUBDIRECTORY(source) # has to be included after bundled
+
+ADD_SUBDIRECTORY(cmake/config) # has to be included after source
+ADD_SUBDIRECTORY(contrib) # has to be included after source
+ADD_SUBDIRECTORY(examples)
+
+IF(DEAL_II_HAVE_TESTS_DIRECTORY)
+  ADD_SUBDIRECTORY(tests)
+ENDIF()
+
+#
+# And finally, print the configuration:
+#
+FILE(READ ${CMAKE_BINARY_DIR}/summary.log DEAL_II_LOG_SUMMARY)
+MESSAGE("${DEAL_II_LOG_SUMMARY}")
diff --git a/CTestConfig.cmake b/CTestConfig.cmake
new file mode 100644
index 0000000..6a8dc24
--- /dev/null
+++ b/CTestConfig.cmake
@@ -0,0 +1,50 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Dashboard configuration:
+#
+
+SET(CTEST_PROJECT_NAME "deal.II")
+
+SET(CTEST_DROP_METHOD "http")
+SET(CTEST_DROP_SITE "cdash.kyomu.43-1.org")
+SET(CTEST_DROP_LOCATION "/submit.php?project=deal.II")
+SET(CTEST_DROP_SITE_CDASH TRUE)
+
+SET(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS   100)
+SET(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS 300)
+
+# number of lines to submit before an error:
+SET(CTEST_CUSTOM_ERROR_PRE_CONTEXT            5)
+# number of lines to submit after an error:
+SET(CTEST_CUSTOM_ERROR_POST_CONTEXT          20)
+
+#
+# Coverage options:
+#
+
+SET(CTEST_EXTRA_COVERAGE_GLOB
+  # These files should have executable lines and therefore coverage:
+  # source/**/*.cc
+  )
+
+SET(CTEST_CUSTOM_COVERAGE_EXCLUDE
+  "/bundled"
+  "/cmake/scripts/"
+  "/contrib"
+  "/examples"
+  "/tests"
+  )
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5f5a596
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,523 @@
+
+This directory contains the deal.II library.
+
+The deal.II library is copyrighted by the deal.II authors. This term
+refers to the people listed in the file AUTHORS.
+
+The deal.II library is free software; you can use it, redistribute it,
+and/or modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either version
+2.1 of the License, or (at your option) any later version.
+
+The full text of the GNU Lesser General Public version 2.1 is quoted
+below.
+
+The subdirectories "bundled/" and "contrib/" contain third party software.
+PLEASE NOTE THAT THE SOFTWARE THERE IS COPYRIGHTED BY OTHERS THAN THE
+deal.II AUTHORS, but is included by permission. For details, consult the
+stated licenses there.
+
+----------------------------------------------------------------------
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bd1dc1d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,56 @@
+What is deal.II?
+================
+
+deal.II is a C++ program library targeted at the computational solution
+of partial differential equations using adaptive finite elements. It uses
+state-of-the-art programming techniques to offer you a modern interface
+to the complex data structures and algorithms required.
+
+For the impatient:
+------------------
+
+Let's say you've unpacked the .tar.gz file into a directory /path/to/dealii/sources. 
+Then configure, compile, and install the deal.II library with:
+
+    $ mkdir build
+    $ cd build
+    $ cmake -DCMAKE_INSTALL_PREFIX=/path/where/dealii/should/be/intalled/to /path/to/dealii/sources
+    $ make install    (alternatively $ make -j<N> install)
+    $ make test
+
+To build from the repository, execute the following commands first:
+
+    $ git clone https://github.com/dealii/dealii
+    $ cd dealii
+
+Then continue as before.
+
+A detailed *ReadME* can be found at ./doc/readme.html and
+./doc/users/cmake.html or at http://www.dealii.org/.
+
+Getting started:
+----------------
+
+The tutorial steps are located under examples/ of the installation.
+Information about the tutorial steps can be found at
+doc/doxygen/tutorial/index.html or at http://www.dealii.org/.
+
+deal.II includes support for pretty-printing deal.II objects inside GDB. See
+`contrib/utilities/dotgdbinit.py` or the new documentation page (under
+'information for users') for instructions on how to set this up.
+
+License:
+--------
+
+Please see the file ./LICENSE for details
+
+Further information:
+--------------------
+
+For further information have a look at ./doc/index.html or at
+http://www.dealii.org.
+
+Continuous Integration Status:
+------------------------
+
+[![Build Status](https://travis-ci.org/dealii/dealii.png)](https://travis-ci.org/dealii/dealii)
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..6da4de5
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+8.4.1
diff --git a/cmake/README.md b/cmake/README.md
new file mode 100644
index 0000000..a840fb6
--- /dev/null
+++ b/cmake/README.md
@@ -0,0 +1,47 @@
+This folder contains the deal.II CMake build system
+===================================================
+
+Extensive documentation can be found at
+`/doc/development/cmake-internals.html`
+
+It is structured as follows:
+
+checks
+------
+
+Contains checks for platform features and compiler bugs and features
+
+config
+------
+
+Contains configuration templates for
+
+  * the project configuration (deal.IIConfig.cmake)
+  * the legacy Make.global_options mechanism
+  * the C++ template expansion mechanism (template-arguments)
+
+configure
+---------
+
+Contains files `configure_<feature>.cmake` for configuration and setup of
+all features the deal.II library supports
+
+macros
+------
+
+CMake script macros for several purposes
+
+modules
+-------
+
+Contains `Find<Library>.cmake` modules for finding external libraries
+
+scripts
+-------
+
+Contains script files needed for the build system, notably expand_instantiations
+
+setup_*.cmake
+-------------
+
+Setup files included by the top level `CMakeLists.txt` file
diff --git a/cmake/checks/check_01_compiler_features.cmake b/cmake/checks/check_01_compiler_features.cmake
new file mode 100644
index 0000000..62f6e0d
--- /dev/null
+++ b/cmake/checks/check_01_compiler_features.cmake
@@ -0,0 +1,382 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+########################################################################
+#                                                                      #
+#                 Check for various compiler features:                 #
+#                                                                      #
+########################################################################
+
+#
+# This file sets up:
+#
+#   DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+#   DEAL_II_VECTOR_ITERATOR_IS_POINTER
+#   DEAL_II_HAVE_BUILTIN_EXPECT
+#   DEAL_II_HAVE_VERBOSE_TERMINATE
+#   DEAL_II_HAVE_GLIBC_STACKTRACE
+#   DEAL_II_HAVE_LIBSTDCXX_DEMANGLER
+#   DEAL_II_COMPILER_HAS_ATTRIBUTE_PRETTY_FUNCTION
+#   DEAL_II_COMPILER_HAS_ATTRIBUTE_DEPRECATED
+#   DEAL_II_COMPILER_HAS_ATTRIBUTE_ALWAYS_INLINE
+#   DEAL_II_DEPRECATED
+#   DEAL_II_ALWAYS_INLINE
+#   DEAL_II_COMPILER_HAS_DIAGNOSTIC_PRAGMA
+#   DEAL_II_COMPILER_HAS_FUSE_LD_GOLD
+#
+
+
+#
+# Check whether the compiler allows to use arithmetic operations
+# +-*/ on vectorized data types or whether we need to use
+# _mm_add_pd for addition and so on. +-*/ is preferred because
+# it allows the compiler to choose other optimizations like
+# fused multiply add, whereas _mm_add_pd explicitly enforces the
+# assembler command.
+#
+# - Matthias Maier, rewritten 2012
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <emmintrin.h>
+  int main()
+  {
+    __m128d a, b;
+    a = _mm_set_sd (1.0);
+    b = _mm_set1_pd (2.1);
+    __m128d c = a + b;
+    __m128d d = b - c;
+    __m128d e = c * a + d;
+    __m128d f = e/a;
+    (void)f;
+  }
+  "
+  DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS)
+
+
+#
+# Check whether the std::vector::iterator is just a plain pointer
+#
+# (Yes. It is not a bug. But the logic is the same.)
+#
+# - Matthias Maier, rewritten 2012
+#
+CHECK_CXX_COMPILER_BUG(
+  "
+  #include <vector>
+  template <typename T> void f(T) {}
+  template void f(int *);
+  template void f(std::vector<int>::iterator);
+  int main(){return 0;}
+  "
+  DEAL_II_VECTOR_ITERATOR_IS_POINTER)
+
+
+#
+# Check for existence of the __builtin_expect facility of newer
+# GCC compilers. This can be used to hint the compiler's branch
+# prediction unit in some cases. We use it in the AssertThrow
+# macros.
+#
+# - Matthias Maier, rewritten 2012
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  bool f() {}
+  int main(){ if (__builtin_expect(f(),false)) ; }
+  "
+  DEAL_II_HAVE_BUILTIN_EXPECT)
+
+
+#
+# Newer versions of GCC have a very nice feature: you can set
+# a verbose terminate handler, that not only aborts a program
+# when an exception is thrown and not caught somewhere, but
+# before aborting it prints that an exception has been thrown,
+# and possibly what the std::exception::what() function has to
+# say. Since many people run into the trap of not having a
+# catch clause in main(), they wonder where that abort may be
+# coming from.  The terminate handler then at least says what is
+# missing in their program.
+#
+# This test checks whether this feature is available.
+#
+# - Matthias Maier, rewritten 2012
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <exception>
+  namespace __gnu_cxx
+  {
+    extern void __verbose_terminate_handler ();
+  }
+  struct preload_terminate_dummy
+  {
+    preload_terminate_dummy()
+    {
+      std::set_terminate (__gnu_cxx::__verbose_terminate_handler);
+    }
+  };
+  static preload_terminate_dummy dummy;
+  int main() { throw 1; return 0; }
+  "
+  DEAL_II_HAVE_VERBOSE_TERMINATE)
+
+
+#
+# Check whether glibc-like stacktrace information is available
+# for the Exception class. If it is, then try to also determine
+# whether the compiler accepts the -rdynamic flag, since that is
+# recommended for linking if one wants to have meaningful
+# backtraces.
+#
+# - Matthias Maier, rewritten 2012
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <execinfo.h>
+  #include <stdlib.h>
+  void * array[25];
+  int nSize = backtrace(array, 25);
+  char ** symbols = backtrace_symbols(array, nSize);
+  int main(){ free(symbols); return 0; }
+  "
+  DEAL_II_HAVE_GLIBC_STACKTRACE)
+
+IF(DEAL_II_HAVE_GLIBC_STACKTRACE AND NOT DEAL_II_STATIC_EXECUTABLE)
+  ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-rdynamic")
+ENDIF()
+
+
+#
+# Check whether the compiler offers a way to demangle symbols
+# from within the program. Used inside the exception stacktrace
+# mechanism.
+#
+# The example code is taken from
+#   http://gcc.gnu.org/onlinedocs/libstdc++/18_support/howto.html#6
+#
+# - Matthias Maier, rewritten 2012
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <exception>
+  #include <iostream>
+  #include <cxxabi.h>
+  #include <cstdlib>
+
+  struct empty { };
+
+  template <typename T, int N>
+  struct bar { };
+
+  int     status;
+  char   *realname;
+
+  int main()
+  {
+    // exception classes not in <stdexcept>, thrown by the implementation
+    // instead of the user
+    std::bad_exception  e;
+    realname = abi::__cxa_demangle(e.what(), 0, 0, &status);
+    free(realname);
+
+
+    // typeid
+    bar<empty,17>          u;
+    const std::type_info  &ti = typeid(u);
+
+    realname = abi::__cxa_demangle(ti.name(), 0, 0, &status);
+    free(realname);
+
+      return 0;
+  }
+  "
+  DEAL_II_HAVE_LIBSTDCXX_DEMANGLER)
+
+
+#
+# GCC and some other compilers have __PRETTY_FUNCTION__, showing
+# an unmangled version of the function we are presently in,
+# while __FUNCTION__ (or __func__ in ISO C99) simply give the
+# function name which would not include the arguments of that
+# function, leading to problems in C++ with overloaded function
+# names.
+#
+# If __PRETTY_FUNCTION__ is not available, try to find out whether
+# __func__ is available and use the preprocessor to set the first
+# thing to the second. If this is also not the case, then set it
+# to something indicating non-availability.
+#
+# - Matthias Maier, rewritten 2012
+#
+
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <iostream>
+  int main()
+  {
+    std::cout << __PRETTY_FUNCTION__ << std::endl;
+    return 0;
+  }
+  "
+  DEAL_II_COMPILER_HAS_ATTRIBUTE_PRETTY_FUNCTION)
+
+IF(NOT DEAL_II_COMPILER_HAS_ATTRIBUTE_PRETTY_FUNCTION)
+  CHECK_CXX_SOURCE_COMPILES(
+    "
+    #include <iostream>
+    int main()
+    {
+      std::cout << __func__ << std::endl;
+      return 0;
+    }
+    "
+    DEAL_II_COMPILER_HAS_ATTRIBUTE_FUNC)
+
+  IF(DEAL_II_COMPILER_HAS_ATTRIBUTE_FUNC)
+    SET(__PRETTY_FUNCTION__ "__func__")
+  ELSE()
+    SET(__PRETTY_FUNCTION__ "\"(not available)\"")
+  ENDIF()
+
+ENDIF()
+
+
+#
+# Newer versions of GCC can pass a flag to the assembler to
+# compress debug sections. At the time of writing this test,
+# this can save around 230 MB of disk space on the object
+# files we produce (810MB down to 570MB for the debug versions
+# of object files). Unfortunately, the sections have to be
+# unpacked again when they are put into the shared libs, so
+# no savings there.
+#
+# The flag also doesn't appear to be working on Cygwin, as
+# per email by John Fowkes on the mailing list in Feb 2012,
+# so don't run the test on cygwin.
+#
+# Finally, Intel's icpc compiler complains about the flag
+# but apparently only if the file to be compiled contains
+# particular content. See bug #46 in the Google Code bug
+# data base (http://code.google.com/p/dealii/issues/detail?id=46).
+# It proved impossible to track down under which circumstances
+# this happens, and so it was disabled for icpc.
+#
+# - Matthias Maier, rewritten 2012, 2013
+#
+IF( (NOT CMAKE_SYSTEM_NAME MATCHES "CYGWIN") AND
+    (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") AND
+    (NOT CMAKE_CXX_COMPILER_ID MATCHES "Intel") )
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-Wa,--compress-debug-sections")
+ENDIF()
+
+
+#
+# GCC and some other compilers have an attribute of the form
+# __attribute__((deprecated)) that can be used to make the
+# compiler warn whenever a deprecated function is used. See
+# if this attribute is available.
+#
+# If it is, set the variable DEAL_II_DEPRECATED to its value. If
+# it isn't, set it to an empty string (actually, to a single
+# space, since the empty string causes CMAKE to #undef the
+# variable in config.h), i.e., to something the compiler will
+# ignore
+#
+# - Wolfgang Bangerth, 2012
+#
+
+# first see if the compiler accepts the attribute
+CHECK_CXX_SOURCE_COMPILES(
+  "
+          int old_fn () __attribute__((deprecated));
+          int old_fn () { return 0; }
+          int (*fn_ptr)() = old_fn;
+
+          int main () {}
+  "
+  DEAL_II_COMPILER_HAS_ATTRIBUTE_DEPRECATED
+  )
+
+IF(DEAL_II_COMPILER_HAS_ATTRIBUTE_DEPRECATED)
+  SET(DEAL_II_DEPRECATED "__attribute__((deprecated))")
+ELSE()
+  SET(DEAL_II_DEPRECATED " ")
+ENDIF()
+
+
+#
+# Do a similar check with the always_inline attribute on functions.
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+          __attribute__((always_inline)) int fn () { return 0; }
+          int main () { return fn(); }
+  "
+  DEAL_II_COMPILER_HAS_ATTRIBUTE_ALWAYS_INLINE
+  )
+
+IF(DEAL_II_COMPILER_HAS_ATTRIBUTE_ALWAYS_INLINE)
+  SET(DEAL_II_ALWAYS_INLINE "__attribute__((always_inline))")
+ELSE()
+  SET(DEAL_II_ALWAYS_INLINE " ")
+ENDIF()
+
+
+#
+# GCC and Clang allow fine grained control of diagnostics via the "GCC
+# diagnostic" pragma. Check whether the compiler supports the "push" and
+# "pop" mechanism and the "ignored" toggle. Further, test for the
+# alternative "_Pragma(...)" variant (and that it does not emit a warning).
+#
+# - Matthias Maier, 2015
+#
+PUSH_CMAKE_REQUIRED("-Werror")
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  _Pragma(\"GCC diagnostic push\")
+  _Pragma(\"GCC diagnostic ignored \\\\\\\"-Wextra\\\\\\\"\")
+  _Pragma(\"GCC diagnostic ignored \\\\\\\"-Wunknown-pragmas\\\\\\\"\")
+  _Pragma(\"GCC diagnostic ignored \\\\\\\"-Wpragmas\\\\\\\"\")
+  int main() { return 0; }
+  _Pragma(\"GCC diagnostic pop\")
+  "
+  DEAL_II_COMPILER_HAS_DIAGNOSTIC_PRAGMA)
+RESET_CMAKE_REQUIRED()
+
+
+#
+# Use the 'gold' linker if possible, given that it's substantially faster.
+#
+# We have to try to link a full executable with -fuse-ld=gold to check
+# whether "ld.gold" is actually available. gcc has the bad habit of
+# accepting the flag without emitting an error.
+#
+# Wolfgang Bangerth, Matthias Maier, 2015
+#
+PUSH_CMAKE_REQUIRED("-Werror")
+PUSH_CMAKE_REQUIRED("-fuse-ld=gold")
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  int main() { return 0; }
+  "
+  DEAL_II_COMPILER_HAS_FUSE_LD_GOLD)
+RESET_CMAKE_REQUIRED()
+
+IF(DEAL_II_COMPILER_HAS_FUSE_LD_GOLD)
+  ADD_FLAGS(DEAL_II_LINKER_FLAGS "-fuse-ld=gold")
+ENDIF()
+
diff --git a/cmake/checks/check_01_cpu_features.cmake b/cmake/checks/check_01_cpu_features.cmake
new file mode 100644
index 0000000..37b6e2e
--- /dev/null
+++ b/cmake/checks/check_01_cpu_features.cmake
@@ -0,0 +1,257 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+########################################################################
+#                                                                      #
+#                   Platform and CPU specific tests:                   #
+#                                                                      #
+########################################################################
+
+#
+# This file sets up
+#
+#   DEAL_II_WORDS_BIGENDIAN
+#   DEAL_II_HAVE_SSE2                    *)
+#   DEAL_II_HAVE_AVX                     *)
+#   DEAL_II_HAVE_AVX512                  *)
+#   DEAL_II_COMPILER_VECTORIZATION_LEVEL
+#   DEAL_II_HAVE_OPENMP_SIMD             *)
+#   DEAL_II_OPENMP_SIMD_PRAGMA
+#
+# *)
+# It is is possible to manually set the above values to their corresponding
+# values, when platform introspection is disabled with
+# DEAL_II_ALLOW_PLATFORM_INTROSPECTION=OFF,
+#
+
+
+#
+# Determine the Endianess of the platform:
+#
+IF(CMAKE_C_COMPILER_WORKS)
+  INCLUDE(TestBigEndian)
+
+  CLEAR_CMAKE_REQUIRED()
+  TEST_BIG_ENDIAN(DEAL_II_WORDS_BIGENDIAN)
+  RESET_CMAKE_REQUIRED()
+ELSE()
+  MESSAGE(STATUS
+    "No suitable C compiler was found! Assuming little endian platform."
+    )
+  SET(DEAL_II_WORDS_BIGENDIAN "0")
+ENDIF()
+
+
+#
+# Check whether the compiler allows for vectorization and that
+# vectorization actually works on the given CPU. For this test, we use
+# compiler intrinsics similar to what is used in the deal.II library and
+# check whether the arithmetic operations are correctly performed on
+# examples where all numbers are exactly represented as floating point
+# numbers.
+#
+# - Matthias Maier, rewritten 2012
+#
+
+IF(DEAL_II_ALLOW_PLATFORM_INTROSPECTION)
+  #
+  # Take care that the following tests are rerun if CMAKE_REQUIRED_FLAGS
+  # changes..
+  #
+  IF(NOT "${CMAKE_REQUIRED_FLAGS}" STREQUAL "${DEAL_II_CHECK_CPU_FEATURES_SAVED}")
+    UNSET(DEAL_II_HAVE_SSE2 CACHE)
+    UNSET(DEAL_II_HAVE_AVX CACHE)
+    UNSET(DEAL_II_HAVE_AVX512 CACHE)
+  ENDIF()
+  SET(DEAL_II_CHECK_CPU_FEATURES_SAVED
+    "${CMAKE_REQUIRED_FLAGS}" CACHE INTERNAL "" FORCE
+    )
+
+  CHECK_CXX_SOURCE_RUNS(
+    "
+    #include <emmintrin.h>
+    int main()
+    {
+    __m128d a, b;
+    const unsigned int vector_bytes = sizeof(__m128d);
+    const int n_vectors = vector_bytes/sizeof(double);
+    __m128d * data =
+      reinterpret_cast<__m128d*>(_mm_malloc (2*vector_bytes, vector_bytes));
+    double * ptr = reinterpret_cast<double*>(&a);
+    ptr[0] = (volatile double)(1.0);
+    for (int i=1; i<n_vectors; ++i)
+      ptr[i] = 0.0;
+    b = _mm_set1_pd ((volatile double)(2.25));
+    data[0] = _mm_add_pd (a, b);
+    data[1] = _mm_mul_pd (b, data[0]);
+    ptr = reinterpret_cast<double*>(&data[1]);
+    unsigned int return_value = 0;
+    if (ptr[0] != 7.3125)
+      return_value = 1;
+    for (int i=1; i<n_vectors; ++i)
+      if (ptr[i] != 5.0625)
+        return_value = 1;
+    _mm_free (data);
+    return return_value;
+    }
+    "
+    DEAL_II_HAVE_SSE2)
+
+  #
+  # clang-3.6.0 has a bug in operator+ on two VectorizedArray components as
+  # defined in deal.II. Therefore, the test for AVX needs to also test for
+  # operator+ to be correctly implemented.
+  #
+  CHECK_CXX_SOURCE_RUNS(
+    "
+    #include <immintrin.h>
+    class VectorizedArray
+    {
+    public:
+      VectorizedArray &
+      operator += (const VectorizedArray &vec)
+      {
+        data = _mm256_add_pd (data, vec.data);
+        return *this;
+      }
+      __m256d data;
+    };
+    inline
+    VectorizedArray
+    operator + (const VectorizedArray &u, const VectorizedArray &v)
+    {
+      VectorizedArray tmp = u;
+      return tmp+=v;
+    }
+    int main()
+    {
+      __m256d a, b;
+      const unsigned int vector_bytes = sizeof(__m256d);
+      const int n_vectors = vector_bytes/sizeof(double);
+      __m256d * data =
+        reinterpret_cast<__m256d*>(_mm_malloc (2*vector_bytes, vector_bytes));
+      double * ptr = reinterpret_cast<double*>(&a);
+      ptr[0] = (volatile double)(1.0);
+      for (int i=1; i<n_vectors; ++i)
+        ptr[i] = 0.0;
+      b = _mm256_set1_pd ((volatile double)(2.25));
+      data[0] = _mm256_add_pd (a, b);
+      data[1] = _mm256_mul_pd (b, data[0]);
+      ptr = reinterpret_cast<double*>(&data[1]);
+      unsigned int return_value = 0;
+      if (ptr[0] != 7.3125)
+        return_value = 1;
+      for (int i=1; i<n_vectors; ++i)
+        if (ptr[i] != 5.0625)
+          return_value = 1;
+      VectorizedArray c, d, e;
+      c.data = b;
+      d.data = b;
+      e = c + d;
+      ptr = reinterpret_cast<double*>(&e.data);
+      for (int i=0; i<n_vectors; ++i)
+        if (ptr[i] != 4.5)
+          return_value = 1;
+      _mm_free (data);
+      return return_value;
+    }
+    "
+    DEAL_II_HAVE_AVX)
+
+  CHECK_CXX_SOURCE_RUNS(
+    "
+    #include <immintrin.h>
+    int main()
+    {
+      __m512d a, b;
+      const unsigned int vector_bytes = sizeof(__m512d);
+      const int n_vectors = vector_bytes/sizeof(double);
+      __m512d * data =
+        reinterpret_cast<__m512d*>(_mm_malloc (2*vector_bytes, vector_bytes));
+      double * ptr = reinterpret_cast<double*>(&a);
+      ptr[0] = (volatile double)(1.0);
+      for (int i=1; i<n_vectors; ++i)
+        ptr[i] = 0.0;
+      const volatile double x = 2.25;
+      b = _mm512_set1_pd(x);
+      data[0] = _mm512_add_pd (a, b);
+      data[1] = _mm512_mul_pd (b, data[0]);
+      ptr = reinterpret_cast<double*>(&data[1]);
+      unsigned int return_value = 0;
+      if (ptr[0] != 7.3125)
+        return_value = 1;
+      for (int i=1; i<n_vectors; ++i)
+        if (ptr[i] != 5.0625)
+          return_value = 1;
+      _mm_free (data);
+      return return_value;
+    }
+    "
+    DEAL_II_HAVE_AVX512)
+ENDIF()
+
+IF(DEAL_II_HAVE_AVX512)
+  SET(DEAL_II_COMPILER_VECTORIZATION_LEVEL 3)
+ELSEIF(DEAL_II_HAVE_AVX)
+  SET(DEAL_II_COMPILER_VECTORIZATION_LEVEL 2)
+ELSEIF(DEAL_II_HAVE_SSE2)
+  SET(DEAL_II_COMPILER_VECTORIZATION_LEVEL 1)
+ELSE()
+  SET(DEAL_II_COMPILER_VECTORIZATION_LEVEL 0)
+ENDIF()
+
+
+#
+# OpenMP 4.0 can be used for vectorization (supported by gcc-4.9.1 and
+# later). Only the vectorization instructions
+# are allowed, the threading must be done through TBB.
+#
+
+# Pick up the correct candidate keyword for the current compiler:
+SET(_keyword "")
+IF(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+  IF(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "15" )
+    SET(_keyword "qopenmp")
+  ELSEIF(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "14" )
+    SET(_keyword "openmp")
+  ENDIF()
+
+ELSEIF(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  # clang-3.6.1 or newer, or XCode version 6.3, or newer.
+  IF( ( CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "3.6"
+        AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0" )
+      OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "6.2")
+    SET(_keyword "openmp")
+  ENDIF()
+
+ELSE()
+  SET(_keyword "fopenmp")
+
+ENDIF()
+
+IF(NOT "${_keyword}" STREQUAL "")
+  CHECK_CXX_COMPILER_FLAG("-${_keyword}-simd" DEAL_II_HAVE_OPENMP_SIMD)
+ENDIF()
+
+SET(DEAL_II_OPENMP_SIMD_PRAGMA " ")
+IF(DEAL_II_HAVE_OPENMP_SIMD)
+  ADD_FLAGS(DEAL_II_CXX_FLAGS "-${_keyword}-simd")
+  # Intel is special:
+  IF(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+    ADD_FLAGS(DEAL_II_LINKER_FLAGS "-${_keyword}")
+  ENDIF()
+  SET(DEAL_II_OPENMP_SIMD_PRAGMA "_Pragma(\"omp simd\")")
+ENDIF()
diff --git a/cmake/checks/check_01_cxx_features.cmake b/cmake/checks/check_01_cxx_features.cmake
new file mode 100644
index 0000000..6d94ea7
--- /dev/null
+++ b/cmake/checks/check_01_cxx_features.cmake
@@ -0,0 +1,550 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Check for various C++ language features
+#
+# This file sets up
+#
+#   DEAL_II_WITH_CXX11
+#   DEAL_II_WITH_CXX14
+#
+#   DEAL_II_HAVE_CXX11_IS_TRIVIALLY_COPYABLE
+#   DEAL_II_HAVE_ISNAN
+#   DEAL_II_HAVE_UNDERSCORE_ISNAN
+#   DEAL_II_HAVE_ISFINITE
+#   DEAL_II_HAVE_FP_EXCEPTIONS
+#   DEAL_II_HAVE_COMPLEX_OPERATOR_OVERLOADS
+#
+
+
+########################################################################
+#                                                                      #
+#                         C++ Version Support:                         #
+#                                                                      #
+########################################################################
+
+#
+# backwards compatibility with the old DEAL_II_CXX11_FLAG option
+#
+SET_IF_EMPTY(DEAL_II_CXX_VERSION_FLAG "${DEAL_II_CXX11_FLAG}")
+
+IF(DEAL_II_WITH_CXX14 AND DEFINED DEAL_II_WITH_CXX11 AND NOT DEAL_II_WITH_CXX11)
+  MESSAGE(FATAL_ERROR
+    "Compiling deal.II with C++14 support (i.e., DEAL_II_WITH_CXX14=ON) requires"
+    " that C++11 support not be explicitly disabled (i.e., DEAL_II_WITH_CXX11 may"
+    " not be set to a logically false value)."
+    )
+ENDIF()
+
+IF(DEFINED DEAL_II_WITH_CXX11 AND NOT DEAL_II_WITH_CXX11)
+  SET(DEAL_II_WITH_CXX14 OFF CACHE STRING "" FORCE)
+ENDIF()
+
+
+#
+# Check the user supplied DEAL_II_CXX_VERSION_FLAG
+#
+
+IF(NOT "${DEAL_II_CXX_VERSION_FLAG}" STREQUAL "")
+  CHECK_CXX_COMPILER_FLAG(${DEAL_II_CXX_VERSION_FLAG} DEAL_II_CXX_VERSION_FLAG_VALID)
+  IF(NOT DEAL_II_CXX_VERSION_FLAG_VALID)
+    MESSAGE(FATAL_ERROR
+      "The supplied flag \"${DEAL_II_CXX_VERSION_FLAG}\" was not recognized "
+      "by the compiler."
+      )
+  ENDIF()
+
+  SET(_user_provided_cxx_version_flag TRUE)
+ENDIF()
+
+#
+# A macro to check for various C++11 and C++14 flags
+#
+
+MACRO(_check_cxx_flag _suffix)
+  IF("${DEAL_II_CXX_VERSION_FLAG}" STREQUAL "")
+    CHECK_CXX_COMPILER_FLAG("-std=c++${_suffix}" DEAL_II_HAVE_FLAG_stdcxx${_suffix})
+    IF(DEAL_II_HAVE_FLAG_stdcxx${_suffix})
+      SET(DEAL_II_CXX_VERSION_FLAG "-std=c++${_suffix}")
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+MACRO(_check_version _version _symbolic)
+  _check_cxx_flag("${_version}")
+  _check_cxx_flag("${_symbolic}")
+  IF(DEAL_II_WITH_CXX${_version} AND "${DEAL_II_CXX_VERSION_FLAG}" STREQUAL "")
+    MESSAGE(FATAL_ERROR
+      "C++${_version} support was requested but CMake was not able to find a valid C++${_version}"
+      " flag. Try to manually specify DEAL_II_CXX_VERSION_FLAG and rerun CMake."
+      )
+  ENDIF()
+ENDMACRO()
+
+#
+# Check for proper C++14 support and set up DEAL_II_HAVE_CXX14:
+#
+IF(NOT DEFINED DEAL_II_WITH_CXX14 OR DEAL_II_WITH_CXX14)
+  _check_version("14" "1y")
+
+  IF(NOT "${DEAL_II_CXX_VERSION_FLAG}" STREQUAL "")
+    # Set CMAKE_REQUIRED_FLAGS for the unit tests
+    MESSAGE(STATUS "Using C++ version flag \"${DEAL_II_CXX_VERSION_FLAG}\"")
+    PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+
+    #
+    # This test does not guarantee full C++14 support, but virtually every
+    # compiler with some C++14 support implements this.
+    #
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <memory>
+      int main()
+      {
+          auto ptr = std::make_unique<int>(42);
+          return 0;
+      }
+      "
+      DEAL_II_HAVE_CXX14_MAKE_UNIQUE)
+
+    #
+    # Clang-3.5* or older, bail out with a spurious error message in case
+    # of an undeduced auto return type.
+    #
+    # https://llvm.org/bugs/show_bug.cgi?id=16876
+    #
+    PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_FLAGS_DEBUG}")
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      struct foo
+      {
+        auto func();
+      };
+      int main()
+      {
+        foo bar;
+      }
+      "
+      DEAL_II_HAVE_CXX14_CLANGAUTODEBUG_BUG_OK)
+
+    RESET_CMAKE_REQUIRED()
+  ENDIF()
+
+  IF( DEAL_II_HAVE_CXX14_MAKE_UNIQUE AND
+      DEAL_II_HAVE_CXX14_CLANGAUTODEBUG_BUG_OK )
+    SET(DEAL_II_HAVE_CXX14 TRUE)
+  ELSE()
+    IF(NOT _user_provided_cxx_version_flag)
+      SET(DEAL_II_CXX_VERSION_FLAG "")
+    ENDIF()
+  ENDIF()
+ENDIF()
+
+
+#
+# Check for proper C++11 support and set up DEAL_II_HAVE_CXX11:
+#
+IF(NOT DEFINED DEAL_II_WITH_CXX11 OR DEAL_II_WITH_CXX11)
+
+  IF("${DEAL_II_CXX_VERSION_FLAG}" STREQUAL "")
+    _check_version("11" "0x")
+  ENDIF()
+
+  IF(NOT "${DEAL_II_CXX_VERSION_FLAG}" STREQUAL "")
+    # Set CMAKE_REQUIRED_FLAGS for the unit tests
+    MESSAGE(STATUS "Using C++ version flag \"${DEAL_II_CXX_VERSION_FLAG}\"")
+    PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <array>
+      std::array<int,3> p;
+      int main(){  p[0]; return 0; }
+      "
+      DEAL_II_HAVE_CXX11_ARRAY)
+
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <condition_variable>
+      std::condition_variable c;
+      int main(){ c.notify_all(); return 0; }
+      "
+      DEAL_II_HAVE_CXX11_CONDITION_VARIABLE)
+
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <functional>
+      void f(int, double){}
+      std::function<void (int)> g = std::bind (f, std::placeholders::_1,1.1);
+      int main(){ return 0; }
+      "
+      DEAL_II_HAVE_CXX11_FUNCTIONAL)
+
+    # Make sure we don't run into GCC bug 35569
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <functional>
+      void f(int){}
+      using namespace std;
+      using namespace std::placeholders;
+      int main(){ bind(multiplies<int>(),4,_1)(5); return 0; }
+      "
+      DEAL_II_HAVE_CXX11_FUNCTIONAL_GCCBUG35569_OK)
+
+    # clang libc++ bug, see https://llvm.org/bugs/show_bug.cgi?id=20084
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <functional>
+      struct A { void foo() const {} };
+      int main() { A a; std::bind(&A::foo,a)(); return 0; }
+      "
+      DEAL_II_HAVE_CXX11_FUNCTIONAL_LLVMBUG20084_OK)
+
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <memory>
+      std::shared_ptr<int> p(new int(3));
+      int main(){ return 0; }
+      "
+      DEAL_II_HAVE_CXX11_SHARED_PTR)
+
+    PUSH_CMAKE_REQUIRED("-pthread")
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <thread>
+      void f(int){}
+      int main(){ std::thread t(f,1); t.join(); return 0; }
+      "
+      DEAL_II_HAVE_CXX11_THREAD)
+    RESET_CMAKE_REQUIRED()
+    PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <mutex>
+      std::mutex m;
+      int main(){ m.lock(); return 0; }
+      "
+      DEAL_II_HAVE_CXX11_MUTEX)
+
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <tuple>],
+      std::tuple<int,double,char> p(1,1.1,'a');
+      int main(){ return 0; }
+      "
+      DEAL_II_HAVE_CXX11_TUPLE)
+
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <type_traits>
+      const bool m0 = std::is_trivial<double>::value;
+      const bool m1 = std::is_standard_layout<double>::value;
+      const bool m2 = std::is_pod<double>::value;
+      int main(){ return 0; }
+      "
+      DEAL_II_HAVE_CXX11_TYPE_TRAITS)
+
+    #
+    # On Mac OS-X 10.9 with recent gcc compilers in C++11 mode linking to
+    # some standard C library functions, notably toupper and tolower, fail
+    # due to unresolved references to these functions.
+    #
+    # Thanks to Denis Davydov for the testcase.
+    #
+    # Matthias Maier, 2013
+    #
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <ctype.h>
+      int main ()
+      {
+        char c = toupper('a');
+      }
+      "
+      DEAL_II_HAVE_CXX11_MACOSXC99BUG_OK)
+
+
+    #
+    # icc-13 triggers an internal compiler error when compiling
+    # std::numeric_limits<...>::min() with -std=c++0x [1].
+    #
+    # Reported by Ted Kord.
+    #
+    # - Matthias Maier, 2013
+    #
+    # [1] http://software.intel.com/en-us/forums/topic/328902
+    #
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <limits>
+      struct Integer
+      {
+        static const int min_int_value;
+        static const int max_int_value;
+      };
+      const int Integer::min_int_value = std::numeric_limits<int>::min();
+      const int Integer::max_int_value = std::numeric_limits<int>::max();
+      int main() { return 0; }
+      "
+      DEAL_II_HAVE_CXX11_ICCNUMERICLIMITSBUG_OK)
+
+
+    #
+    # icc-14.0.0 has an astonishing bug [1] where it hits an internal compiler
+    # error when run in C++11 mode with libstdc++-4.7 (from gcc).
+    #
+    # We just disable C++11 mode in this case
+    #
+    # [1] http://software.intel.com/en-us/forums/topic/472385
+    #
+    # - Matthias Maier, 2013
+    #
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <vector>
+      template<typename T> void foo()
+      {
+        std::vector<double> data(100);
+      }
+      int main()
+      {
+        foo<int>();
+      }
+      "
+      DEAL_II_HAVE_CXX11_ICCLIBSTDCPP47CXX11BUG_OK)
+
+    RESET_CMAKE_REQUIRED()
+  ENDIF()
+
+  IF( DEAL_II_HAVE_CXX11_ARRAY AND
+      DEAL_II_HAVE_CXX11_CONDITION_VARIABLE AND
+      DEAL_II_HAVE_CXX11_FUNCTIONAL AND
+      DEAL_II_HAVE_CXX11_FUNCTIONAL_GCCBUG35569_OK AND
+      DEAL_II_HAVE_CXX11_FUNCTIONAL_LLVMBUG20084_OK AND
+      DEAL_II_HAVE_CXX11_SHARED_PTR AND
+      DEAL_II_HAVE_CXX11_THREAD AND
+      DEAL_II_HAVE_CXX11_MUTEX AND
+      DEAL_II_HAVE_CXX11_TUPLE AND
+      DEAL_II_HAVE_CXX11_TYPE_TRAITS AND
+      DEAL_II_HAVE_CXX11_MACOSXC99BUG_OK AND
+      DEAL_II_HAVE_CXX11_ICCNUMERICLIMITSBUG_OK AND
+      DEAL_II_HAVE_CXX11_ICCLIBSTDCPP47CXX11BUG_OK )
+      SET(DEAL_II_HAVE_CXX11 TRUE)
+  ENDIF()
+ENDIF()
+
+
+#
+# Finally disable cxx14 if cxx11 detection failed for whatever reason. This
+# can happen if any of our compile checks above fails, for example threading
+# support.
+# 
+IF (DEAL_II_HAVE_CXX14 AND NOT DEAL_II_HAVE_CXX11)
+  MESSAGE(STATUS "Disabling CXX14 support because CXX11 detection failed.")
+  SET(DEAL_II_HAVE_CXX14 FALSE)
+ENDIF()
+
+#
+# Set up a configuration options for C++11 and C++14 support:
+#
+
+OPTION(DEAL_II_WITH_CXX11
+  "Compile deal.II using C++11 language standard."
+  ${DEAL_II_HAVE_CXX11}
+  )
+
+OPTION(DEAL_II_WITH_CXX14
+  "Compile deal.II using C++14 language standard."
+  ${DEAL_II_HAVE_CXX14}
+  )
+
+#
+# Bail out if user requested C++11 support (DEAL_II_WITH_CXX11) but support
+# is not available due to above tests (DEAL_II_HAVE_CXX11):
+#
+
+MACRO(_bailout _version)
+  IF(DEAL_II_WITH_CXX${_version} AND NOT DEAL_II_HAVE_CXX${_version})
+    MESSAGE(FATAL_ERROR "\n"
+      "C++${_version} support was requested (DEAL_II_WITH_CXX${_version}=${DEAL_II_WITH_CXX${_version}}) but is not "
+      "supported by the current compiler.\n"
+      "Please disable C++${_version} support, i.e. configure with\n"
+      "    -DDEAL_II_WITH_CXX${_version}=FALSE,\n"
+      "or use a different compiler, instead. (If the compiler flag for C++${_version} "
+      "support differs from \"-std=c++0x\" or \"-std=c++${_version}\", a suitable "
+      "compiler flag has to be specified manually via\n"
+      "    -DDEAL_II_CXX_VERSION_FLAG=\"...\"\n\n"
+      )
+  ENDIF()
+ENDMACRO()
+
+_bailout("11")
+_bailout("14")
+
+IF (DEAL_II_WITH_CXX14)
+  ADD_FLAGS(DEAL_II_CXX_FLAGS "${DEAL_II_CXX_VERSION_FLAG}")
+  MESSAGE(STATUS "DEAL_II_WITH_CXX11 successfully set up")
+  MESSAGE(STATUS "DEAL_II_WITH_CXX14 successfully set up")
+ELSEIF(DEAL_II_WITH_CXX11)
+  ADD_FLAGS(DEAL_II_CXX_FLAGS "${DEAL_II_CXX_VERSION_FLAG}")
+  MESSAGE(STATUS "DEAL_II_WITH_CXX11 successfully set up")
+ELSE()
+  MESSAGE(STATUS "DEAL_II_WITH_CXX14 and DEAL_II_WITH_CXX11 are both disabled")
+ENDIF()
+
+
+########################################################################
+#                                                                      #
+#                   Check for various C++ features:                    #
+#                                                                      #
+########################################################################
+
+#
+# Some compilers (such as Intel 15.3 and GCC 4.9.2) support the flags
+# "-std=c++11" and "-std=c++14" but do not support
+# 'std::is_trivially_copyable', so check for support in C++11 or newer.
+#
+IF(DEAL_II_WITH_CXX11)
+  PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+  CHECK_CXX_SOURCE_COMPILES(
+    "
+  #include <type_traits>
+  int main(){ std::is_trivially_copyable<int> bob; }
+  "
+    DEAL_II_HAVE_CXX11_IS_TRIVIALLY_COPYABLE)
+  RESET_CMAKE_REQUIRED()
+ELSE()
+  SET(DEAL_II_HAVE_CXX11_IS_TRIVIALLY_COPYABLE FALSE)
+ENDIF()
+
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <cmath>
+  int main(){ double d=0; std::isnan (d); return 0; }
+  "
+  DEAL_II_HAVE_STD_ISNAN)
+
+
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <cmath>
+  int main(){ double d=0; isnan (d); return 0; }
+  "
+  DEAL_II_HAVE_ISNAN)
+
+
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <cmath>
+  int main(){ double d=0; _isnan (d); return 0; }
+  "
+  DEAL_II_HAVE_UNDERSCORE_ISNAN)
+
+
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <cmath>
+  int main(){ double d=0; std::isfinite (d); return 0; }
+  "
+  DEAL_II_HAVE_ISFINITE)
+
+
+#
+# Check that we can use feenableexcept through the C++11 header file cfenv:
+#
+# The test is a bit more complicated because we also check that no garbage
+# exception is thrown if we convert -std::numeric_limits<double>::max to a
+# string. This sadly happens with some compiler support libraries :-(
+#
+# - Timo Heister, 2015
+#
+
+# This test requires C++11
+IF(DEAL_II_WITH_CXX11)
+  PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+  IF(DEAL_II_ALLOW_PLATFORM_INTROSPECTION)
+    CHECK_CXX_SOURCE_RUNS(
+      "
+      #include <cfenv>
+      #include <limits>
+      #include <sstream>
+
+      int main()
+      {
+        feenableexcept(FE_DIVBYZERO|FE_INVALID);
+        std::ostringstream description;
+        const double lower_bound = -std::numeric_limits<double>::max();
+
+        description << lower_bound;
+
+        return 0;
+      }
+      "
+       DEAL_II_HAVE_FP_EXCEPTIONS)
+  ELSE()
+    #
+    # If we are not allowed to do platform introspection, just test whether
+    # we can compile above code.
+    #
+    CHECK_CXX_SOURCE_COMPILES(
+      "
+      #include <cfenv>
+      #include <limits>
+      #include <sstream>
+
+      int main()
+      {
+        feenableexcept(FE_DIVBYZERO|FE_INVALID);
+        std::ostringstream description;
+        const double lower_bound = -std::numeric_limits<double>::max();
+
+        description << lower_bound;
+
+        return 0;
+      }
+      "
+       DEAL_II_HAVE_FP_EXCEPTIONS)
+  ENDIF()
+  RESET_CMAKE_REQUIRED()
+ELSE()
+  SET(DEAL_II_HAVE_FP_EXCEPTIONS FALSE)
+ENDIF()
+
+#
+# Check whether the standard library provides operator* overloads for mixed
+# floating point multiplication of complex and real valued numbers.
+#
+# - Matthias Maier, 2015
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <complex>
+
+  int main()
+  {
+    double() * std::complex<float>();
+    std::complex<float>() * double();
+    float() * std::complex<double>();
+    std::complex<double>() * float();
+    std::complex<double>() * std::complex<float>();
+    std::complex<float>() * std::complex<double>();
+
+    return 0;
+  }
+  "
+  DEAL_II_HAVE_COMPLEX_OPERATOR_OVERLOADS)
+
diff --git a/cmake/checks/check_02_system_features.cmake b/cmake/checks/check_02_system_features.cmake
new file mode 100644
index 0000000..655e5eb
--- /dev/null
+++ b/cmake/checks/check_02_system_features.cmake
@@ -0,0 +1,146 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This file sets up:
+#
+#   DEAL_II_HAVE_GETHOSTNAME
+#   DEAL_II_HAVE_GETPID
+#   DEAL_II_HAVE_JN
+#   DEAL_II_HAVE_SYS_RESOURCE_H
+#   DEAL_II_HAVE_SYS_TIME_H
+#   DEAL_II_HAVE_SYS_TIMES_H
+#   DEAL_II_HAVE_SYS_TYPES_H
+#   DEAL_II_HAVE_TIMES
+#   DEAL_II_HAVE_UNISTD_H
+#   DEAL_II_MSVC
+#
+
+
+########################################################################
+#                                                                      #
+#                    POSIX and Linux specific tests:                   #
+#                                                                      #
+########################################################################
+
+#
+# Check for various posix (and linux) specific header files and symbols
+#
+CHECK_INCLUDE_FILE_CXX("sys/resource.h" DEAL_II_HAVE_SYS_RESOURCE_H)
+
+CHECK_INCLUDE_FILE_CXX("sys/time.h" DEAL_II_HAVE_SYS_TIME_H)
+
+CHECK_INCLUDE_FILE_CXX("sys/times.h" DEAL_II_HAVE_SYS_TIMES_H)
+CHECK_CXX_SYMBOL_EXISTS("times" "sys/times.h" DEAL_II_HAVE_TIMES)
+
+CHECK_INCLUDE_FILE_CXX("sys/types.h" DEAL_II_HAVE_SYS_TYPES_H)
+
+CHECK_INCLUDE_FILE_CXX("unistd.h" DEAL_II_HAVE_UNISTD_H)
+CHECK_CXX_SYMBOL_EXISTS("gethostname" "unistd.h" DEAL_II_HAVE_GETHOSTNAME)
+CHECK_CXX_SYMBOL_EXISTS("getpid" "unistd.h" DEAL_II_HAVE_GETPID)
+
+#
+# Do we have the Bessel function jn?
+#
+FIND_SYSTEM_LIBRARY(m_LIBRARY NAMES m)
+MARK_AS_ADVANCED(m_LIBRARY)
+
+IF(NOT m_LIBRARY MATCHES "-NOTFOUND")
+  LIST(APPEND CMAKE_REQUIRED_LIBRARIES ${m_LIBRARY})
+  CHECK_CXX_SYMBOL_EXISTS("jn" "math.h" DEAL_II_HAVE_JN)
+  RESET_CMAKE_REQUIRED()
+  IF(DEAL_II_HAVE_JN)
+    LIST(APPEND DEAL_II_LIBRARIES ${m_LIBRARY})
+  ENDIF()
+ENDIF()
+
+
+
+########################################################################
+#                                                                      #
+#                        Mac OSX specific setup:                       #
+#                                                                      #
+########################################################################
+
+IF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+  #
+  # Use -Wno-long-double on Apple Darwin to avoid some unnecessary
+  # warnings. However, newer gccs on that platform do not have
+  # this flag any more, so check whether we can indeed do this
+  #
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-long-double")
+
+  #
+  # On Mac OS X, -rdynamic is accepted by the compiler (i.e.
+  # it doesn't produce an error) but we always get a warning
+  # that it isn't supported.
+  #
+  # TODO: MM: Check whether this is still necessary...
+  #
+  STRIP_FLAG(DEAL_II_LINKER_FLAGS "-rdynamic")
+
+  #
+  # At least on Clang 5.0.0 the template depth is set to 128, which is too low
+  # to compile parts of the library. Fix this by setting a large value.
+  #
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-ftemplate-depth=1024")
+ENDIF()
+
+
+
+########################################################################
+#                                                                      #
+#                   Windows and CYGWIN specific setup:                 #
+#                                                                      #
+########################################################################
+
+#
+# Put an end to user's suffering from cygwin's defects
+#
+IF( CMAKE_SYSTEM_NAME MATCHES "CYGWIN" OR
+    CMAKE_SYSTEM_NAME MATCHES "Windows" )
+  IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+    MESSAGE(FATAL_ERROR
+      "\nCygwin and forks such as MinGW and MinGW-64 are unsupported due to "
+      "multiple unresolved miscompilation issues.\n\n"
+      )
+  ENDIF()
+ENDIF()
+
+IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
+
+  #
+  # Export DEAL_II_MSVC if we are on a Windows platform:
+  #
+  SET(DEAL_II_MSVC TRUE)
+
+  #
+  # Shared library handling:
+  #
+
+  IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+    # With MinGW we're lucky:
+    ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-Wl,--export-all-symbols")
+    ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-Wl,--enable-auto-import")
+    ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-Wl,--allow-multiple-definition")
+  ELSE()
+    # Otherwise disable shared libraries:
+    MESSAGE(WARNING "\n"
+      "BUILD_SHARED_LIBS forced to OFF\n\n"
+      )
+    SET(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
+  ENDIF()
+
+ENDIF()
diff --git a/cmake/checks/check_03_compiler_bugs.cmake b/cmake/checks/check_03_compiler_bugs.cmake
new file mode 100644
index 0000000..fa88b64
--- /dev/null
+++ b/cmake/checks/check_03_compiler_bugs.cmake
@@ -0,0 +1,444 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+########################################################################
+#                                                                      #
+#                   Check for various compiler bugs:                   #
+#                                                                      #
+########################################################################
+
+
+#
+# On some gcc 4.3 snapshots, a 'const' qualifier on a return type triggers a
+# warning. This is unfortunate, since we happen to stumble on this
+# in some of our template trickery with iterator classes. If necessary,
+# do not use the relevant warning flag
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+PUSH_CMAKE_REQUIRED("-Wreturn-type")
+PUSH_CMAKE_REQUIRED("-Werror")
+CHECK_CXX_COMPILER_BUG(
+  "
+  const double foo() { return 1.; }
+  int main() { return 0; }
+  "
+  DEAL_II_WRETURN_TYPE_CONST_QUALIFIER_BUG
+  )
+RESET_CMAKE_REQUIRED()
+
+IF(DEAL_II_WRETURN_TYPE_CONST_QUALIFIER_BUG)
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS -Wno-return-type)
+ENDIF()
+
+
+#
+# gcc 4.4 has an interesting problem in that it doesn't
+# care for one of BOOST signals2's header files and produces
+# dozens of pages of error messages of the form
+#   warning: invoking macro BOOST_PP_CAT argument 1: \
+#   empty macro arguments are undefined in ISO C90 and ISO C++98
+# This can be avoided by not using -pedantic for this compiler.
+# For all other versions, we use this flag, however.
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND
+   CMAKE_CXX_COMPILER_VERSION MATCHES "4.4.")
+  STRIP_FLAG(DEAL_II_CXX_FLAGS "-pedantic")
+ENDIF()
+
+
+#
+# Newer gcc versions generate a large number of warnings inside boost if we
+# are compiling without cxx11 but with -pedantic and there is no way to
+# silence them.
+#
+IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND
+   NOT DEAL_II_WITH_CXX11)
+  STRIP_FLAG(DEAL_II_CXX_FLAGS "-pedantic")
+ENDIF()
+
+
+#
+# In some cases, we would like to name partial specializations
+# as friends. However, the standard forbids us to do so. But
+# then, we can declare the general template as a friend, and
+# at least gcc extends the friendship to all specializations
+# of the templates, which is not what the standard says.
+#
+# With other compilers, most notably cxx, this does not work.
+# In this case, we can make individual specializations friends,
+# which in turn gcc rejects. So check, whether this is possible.
+#
+# The respective clause in the standard is 14.5.3.1, which gives
+# this example:
+#   template<class T> class task {
+#     friend class task<int>;
+#   };
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+CHECK_CXX_COMPILER_BUG(
+  "
+  template <int N, typename T> class X;
+  template <typename T>        class X<1,T>;
+
+  template <typename P> class Y {
+      static int i;
+      template <int N, typename T> friend class X;
+      friend class X<1,P>;
+  };
+
+  template <typename T> class X<1,T> {
+      int f () { return Y<T>::i; };     // access private field
+  };
+  int main() { return 0; }
+  "
+  DEAL_II_TEMPL_SPEC_FRIEND_BUG)
+
+
+#
+# This is a variant of the previous test. Some icc 11.0
+# builds (sub-releases) on Windows apparently don't allow
+# the declaration of an explicit specialization of member
+# arrays of templates:
+#
+# template <int dim>
+# struct X
+# {
+#    static const int N = 2*dim;
+#    static const int x[N];
+# };
+# template <> const int X<2>::x[N];
+#
+# That version of icc requests that there be an initialization,
+# i.e. it thinks that this is the *definition*, not merely a
+# *declaration* of an explicit specialization. This is wrong,
+# however.
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+CHECK_CXX_COMPILER_BUG(
+  "
+  template <int dim>
+  struct X
+  {
+    static const int N = 2*dim;
+    static const int x[N];
+  };
+  template <> const int X<2>::x[N];
+  int main() { return 0; }
+  "
+  DEAL_II_MEMBER_ARRAY_SPECIALIZATION_BUG
+  )
+
+
+#
+# Many compilers get this wrong (see Section 14.7.3.1, number (4)):
+#
+#   template <int dim> struct T {
+#     static const int i;
+#   };
+#
+#   template <> const int T<1>::i;
+#   template <> const int T<1>::i = 1;
+#
+# First, by Section 14.7.3.14 of the standard, the first template<>
+# line must necessarily be the _declaration_ of a specialization,
+# and the second is then its definition. There is therefore no
+# reason to report a doubly defined variable (Intel ICC 6.0), or
+# to choke on these lines at all (Sun Forte)
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+CHECK_CXX_COMPILER_BUG(
+  "
+  template <int dim> struct T
+  {
+    static const int i;
+  };
+  template <> const int T<1>::i;
+  template <> const int T<1>::i = 1;
+  int main() {return 0;}
+  "
+  DEAL_II_MEMBER_VAR_SPECIALIZATION_BUG
+  )
+
+
+#
+# Some older versions of gcc compile this, despite the 'explicit'
+# keyword:
+#
+# struct X {
+#     template <typename T>
+#     explicit X(T);
+# };
+# void f(X);
+# int main () { f(1); }
+#
+# Check for this misfeature.
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  struct X {
+    template <typename T>
+    explicit X(T) {}
+  };
+  void f(X) {}
+  int main() { f(1); }
+  "
+  DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+  )
+
+
+#
+# Some older versions of gcc deduce pointers to const functions in
+# template contexts to pointer-to-function of const objects.
+# This is not correct
+#
+# Check for this misfeature.
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+CHECK_CXX_COMPILER_BUG(
+  "
+  template <typename T> struct identity { typedef T type; };
+  template <typename C> void new_thread (void (C::*fun_ptr)(),
+                typename identity<C>::type &c) {}
+  template <typename C> void new_thread (void (C::*fun_ptr)() const,
+                const typename identity<C>::type &c) {}
+  struct X { void f() const{} };
+
+  int main()
+  {
+    X x;
+    new_thread (&X::f, x);
+  }
+  "
+  DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  )
+
+
+#
+# Check for GCC bug 36052, see
+#   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36052
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+
+CHECK_CXX_COMPILER_BUG(
+  "
+  struct S {
+      typedef double value_type;
+  };
+
+  template <typename T> struct Traits {
+      typedef const typename T::value_type dereference_type;
+  };
+
+  template <class BlockVectorType> struct ConstIterator {
+      typedef typename Traits<BlockVectorType>::dereference_type dereference_type;
+
+      dereference_type operator * () const  { return 0; }
+  };
+  template class ConstIterator<S>;
+  int main(){return 0;}
+  "
+  DEAL_II_TYPE_QUALIFIER_BUG)
+
+IF(DEAL_II_TYPE_QUALIFIER_BUG)
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS -Wno-ignored-qualifiers)
+ENDIF()
+
+
+#
+# On Mac OS X, gcc appears to have a bug that prevents us from
+# compiling a bit of code that involves boost::bind. Check for
+# that.
+#
+# - Wolfgang Bangerth, Matthias Maier, rewritten 2012
+#
+IF(DEAL_II_HAVE_BUNDLED_DIRECTORY)
+  CHECK_CXX_COMPILER_BUG(
+    "
+    #include <complex>
+    #include <iostream>
+    #include \"${BOOST_FOLDER}/include/boost/bind.hpp\"
+
+    template<typename number>
+    void bug_function (number test)
+    {
+      std::cout << test << std::endl;
+    }
+    int main()
+    {
+      std::complex<float> float_val (1., 2.);
+      boost::bind(&bug_function<std::complex<float> >,
+                  float_val)();
+      return 0;
+    }
+    "
+    DEAL_II_BOOST_BIND_COMPILER_BUG
+    )
+ENDIF()
+
+
+#
+# Microsoft Visual C++ has a bug where the resulting object
+# from calling std::bind does not have a const operator(),
+# so we cannot pass such objects as const references as we
+# usually do with input arguments of other functions.
+#
+# - Wolfgang Bangerth, 2014
+#
+IF(DEAL_II_WITH_CXX11)
+  PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+  CHECK_CXX_COMPILER_BUG(
+    "
+    #include <functional>
+
+    void f(int, int) {}
+
+    template <typename F>
+    void g(const F &func)
+    {
+      func(1);
+    }
+
+    int main ()
+    {
+      g (std::bind(&f, std::placeholders::_1, 1));
+    }
+    "
+    DEAL_II_BIND_NO_CONST_OP_PARENTHESES
+    )
+  RESET_CMAKE_REQUIRED()
+ELSE()
+  CHECK_CXX_COMPILER_BUG(
+    "
+    #include <functional>
+    #include \"${BOOST_FOLDER}/include/boost/bind.hpp\"
+
+    void f(int, int) {}
+
+    template <typename F>
+    void g(const F &func)
+    {
+      func(1);
+    }
+
+    int main ()
+    {
+      using boost::bind;
+      using boost::reference_wrapper;
+  
+      // now also import the _1, _2 placeholders from the global namespace
+      // into the current one as suggested above
+      using ::_1;
+
+      g (boost::bind(&f, boost::_1, 1));
+    }
+    "
+    DEAL_II_BIND_NO_CONST_OP_PARENTHESES
+    )
+ENDIF()
+
+
+#
+# In intel (at least 13.1 and 14), vectorization causes
+# wrong code. See https://code.google.com/p/dealii/issues/detail?id=156
+# or tests/hp/solution_transfer.cc
+# A work-around is to disable all vectorization.
+#
+# - Timo Heister, 2013, 2015
+#
+IF(CMAKE_CXX_COMPILER_ID MATCHES "Intel" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "15.0.3" )
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_RELEASE "-no-vec")
+ENDIF()
+
+
+#
+# gcc-4.8.1 has some problems with the constexpr "vertices_per_cell" in the
+# definition of alternating_form_at_vertices.
+#
+# TODO: Write a unit test.
+#
+# For now, just enable the workaround for Windows targets
+#
+# - Matthias Maier, 2013
+#
+IF( CMAKE_SYSTEM_NAME MATCHES "CYGWIN"
+    OR CMAKE_SYSTEM_NAME MATCHES "Windows" )
+  SET(DEAL_II_CONSTEXPR_BUG TRUE)
+ENDIF()
+
+
+#
+# Intel (at least 14, 15) has a bug where it incorrectly detects multiple
+# matching function candidates and dies during type resolution in a
+# perfectly valid SFINAE scenario. This seems to happen because the templated
+# variant is not discarded (where it should be):
+#
+# error: more than one instance of overloaded function
+#     "has_vmult_add<Range, T>::test [with Range=double, T=MyMatrix]"
+# matches the argument list:
+#     function template "void has_vmult_add<Range, T>::test<C>(decltype((<expression>))) [with Range=double, T=MyMatrix]"
+#     function template "void has_vmult_add<Range, T>::test<C>(decltype((&C::vmult_add<double>))) [with Range=double, T=MyMatrix]"
+# [...]
+#
+# - Matthias Maier, 2015
+#
+
+IF(DEAL_II_WITH_CXX11)
+  PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+  CHECK_CXX_COMPILER_BUG(
+    "
+    template <typename Range, typename T> struct has_vmult_add
+    {
+      template <typename C>
+      static void test(decltype(&C::vmult_add));
+
+      template <typename C>
+      static void test(decltype(&C::template vmult_add<Range>));
+
+      typedef decltype(test<T>(0)) type;
+    };
+
+    struct MyMatrix
+    {
+      void vmult_add() const;
+    };
+
+    int main()
+    {
+      typedef has_vmult_add<double, MyMatrix>::type test;
+    }
+    "
+    DEAL_II_ICC_SFINAE_BUG
+    )
+  RESET_CMAKE_REQUIRED()
+ENDIF()
+
+#
+# Intel 16.0.1 produces wrong code that creates a race condition in
+# tests/fe/curl_curl_01.debug but 16.0.2 is known to work. Blacklist this
+# version. Also see github.com/dealii/dealii/issues/2203
+#
+IF(CMAKE_CXX_COMPILER_ID MATCHES "Intel" AND CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "16.0.1" )
+  MESSAGE(FATAL_ERROR "Intel compiler version 16.0.1 is not supported, please update to 16.0.2 or newer!")
+ENDIF()
diff --git a/cmake/checks/check_03_generator_bugs.cmake b/cmake/checks/check_03_generator_bugs.cmake
new file mode 100644
index 0000000..b7ca155
--- /dev/null
+++ b/cmake/checks/check_03_generator_bugs.cmake
@@ -0,0 +1,33 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+#
+# CMakes Ninja generator is currently incompatible with everything but gcc
+# and clang.
+#
+# - Matthias Maier, 2013
+#
+
+IF( CMAKE_GENERATOR MATCHES "Ninja" AND NOT
+    ( CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR
+      CMAKE_CXX_COMPILER_ID MATCHES "Clang"  ) )
+  MESSAGE(FATAL_ERROR "\n"
+    "Error!\n"
+    "The CMAKE_GENERATOR \"${CMAKE_GENERATOR}\" "
+    "currently only supports the GNU and Clang C++ compilers, but "
+    "\"${CMAKE_CXX_COMPILER_ID}\" was found.\n\n"
+    )
+ENDIF()
diff --git a/cmake/config/CMakeLists.txt b/cmake/config/CMakeLists.txt
new file mode 100644
index 0000000..61cbfd4
--- /dev/null
+++ b/cmake/config/CMakeLists.txt
@@ -0,0 +1,242 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This file sets up the project configuration consisting of
+#
+#   deal.IIConfig.cmake
+#   deal.IIVersionConfig.cmake
+#   Make.global_options
+#
+# We support two configurations out of which deal.II can be used - directly
+# from the build directory or after installation. So we have to prepare
+# two distinct setups.
+#
+
+MESSAGE(STATUS "Setting up project configuration")
+
+#
+# Configure the template-arguments file
+#
+CONFIGURE_FILE( # for binary dir:
+  ${CMAKE_CURRENT_SOURCE_DIR}/template-arguments.in
+  ${CMAKE_BINARY_DIR}/${DEAL_II_SHARE_RELDIR}/template-arguments
+  )
+
+
+########################################################################
+#                                                                      #
+#                   Setup and install cmake macros:                    #
+#                                                                      #
+########################################################################
+
+SET(_macros
+  ${CMAKE_SOURCE_DIR}/cmake/macros/macro_deal_ii_initialize_cached_variables.cmake
+  ${CMAKE_SOURCE_DIR}/cmake/macros/macro_deal_ii_invoke_autopilot.cmake
+  ${CMAKE_SOURCE_DIR}/cmake/macros/macro_deal_ii_setup_target.cmake
+  ${CMAKE_SOURCE_DIR}/cmake/macros/macro_deal_ii_query_git_information.cmake
+  ${CMAKE_SOURCE_DIR}/cmake/macros/macro_deal_ii_add_test.cmake
+  ${CMAKE_SOURCE_DIR}/cmake/macros/macro_deal_ii_pickup_tests.cmake
+  )
+FILE(COPY ${_macros}
+  DESTINATION ${CMAKE_BINARY_DIR}/${DEAL_II_SHARE_RELDIR}/macros
+  )
+INSTALL(FILES ${_macros}
+  DESTINATION ${DEAL_II_SHARE_RELDIR}/macros
+  COMPONENT library
+  )
+
+
+########################################################################
+#                                                                      #
+#        Configure and install the cmake project configuration:        #
+#                                                                      #
+########################################################################
+
+#
+# Do not force --as-needed for executables on user side:
+#
+
+STRIP_FLAG(DEAL_II_LINKER_FLAGS "-Wl,--as-needed")
+
+#
+# Strip -Wno-deprecated-declarations from DEAL_II_CXX_FLAGS so that
+# deprecation warnings are actually shown for user code:
+#
+STRIP_FLAG(DEAL_II_CXX_FLAGS "-Wno-deprecated-declarations")
+
+#
+# Populate a bunch of CONFIG_* variables with useful information:
+#
+
+FOREACH(_build ${DEAL_II_BUILD_TYPES})
+
+  IF(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "DebugRelease")
+    SET(_keyword "general")
+  ELSE()
+    IF(_build MATCHES DEBUG)
+      SET(_keyword "debug")
+    ELSE()
+      SET(_keyword "optimized")
+    ENDIF()
+  ENDIF()
+
+  #
+  # Build up library name depending on link type and platform:
+  #
+
+  IF(BUILD_SHARED_LIBS)
+    SET(_type "SHARED")
+  ELSE()
+    SET(_type "STATIC")
+  ENDIF()
+
+  SET(_name "${CMAKE_${_type}_LIBRARY_PREFIX}${DEAL_II_BASE_NAME}${DEAL_II_${_build}_SUFFIX}${CMAKE_${_type}_LIBRARY_SUFFIX}")
+  SET(CONFIG_LIBRARIES_${_build}
+    "\${DEAL_II_PATH}/${DEAL_II_LIBRARY_RELDIR}/${_name}"
+    ${DEAL_II_LIBRARIES_${_build}}
+    ${DEAL_II_LIBRARIES}
+    )
+  TO_STRING(MAKEFILE_LIBRARIES_${_build} ${CONFIG_LIBRARIES_${_build}})
+  TO_STRING(MAKEFILE_USER_DEFINITIONS_${_build} ${DEAL_II_USER_DEFINITIONS_${_build}})
+  LIST(APPEND CONFIG_LIBRARIES ${_keyword} \${DEAL_II_LIBRARIES_${_build}})
+
+  SET(CONFIG_TARGET_${_build} ${DEAL_II_BASE_NAME}${DEAL_II_${_build}_SUFFIX})
+  LIST(APPEND CONFIG_TARGET ${_keyword} \${DEAL_II_TARGET_${_build}})
+ENDFOREACH()
+
+TO_STRING(MAKEFILE_BUILD_TYPES ${DEAL_II_BUILD_TYPES})
+TO_STRING(MAKEFILE_USER_DEFINITIONS ${DEAL_II_USER_DEFINITIONS})
+
+IF(DEAL_II_STATIC_EXECUTABLE)
+  SET(MAKEFILE_STATIC_EXECUTABLE "true")
+ELSE()
+  SET(MAKEFILE_STATIC_EXECUTABLE "false")
+ENDIF()
+
+#
+# For binary dir:
+#
+
+SET(CONFIG_BUILD_DIR TRUE)
+SET(MAKEFILE_BUILD_DIR "true")
+SET(CONFIG_INCLUDE_DIRS
+  \${DEAL_II_PATH}/include
+  ${CMAKE_SOURCE_DIR}/include/
+  ${DEAL_II_BUNDLED_INCLUDE_DIRS}
+  ${DEAL_II_USER_INCLUDE_DIRS}
+  )
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
+  ${CMAKE_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_RELDIR}/${DEAL_II_PROJECT_CONFIG_NAME}Config.cmake
+  @ONLY
+  )
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/ConfigVersion.cmake.in
+  ${CMAKE_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_RELDIR}/${DEAL_II_PROJECT_CONFIG_NAME}ConfigVersion.cmake
+  @ONLY
+  )
+TO_STRING(MAKEFILE_INCLUDE_DIRS ${CONFIG_INCLUDE_DIRS})
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Make.global_options.in
+  ${CMAKE_BINARY_DIR}/${DEAL_II_SHARE_RELDIR}/Make.global_options
+  @ONLY
+  )
+
+#
+# For installation:
+#
+
+SET(CONFIG_BUILD_DIR FALSE)
+SET(MAKEFILE_BUILD_DIR "false")
+SET(CONFIG_INCLUDE_DIRS
+  \${DEAL_II_PATH}/\${DEAL_II_INCLUDE_RELDIR}
+  \${DEAL_II_PATH}/\${DEAL_II_INCLUDE_RELDIR}/deal.II/bundled
+  ${DEAL_II_USER_INCLUDE_DIRS}
+  )
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_NAME}Config.cmake
+  @ONLY
+  )
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/ConfigVersion.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_NAME}ConfigVersion.cmake
+  @ONLY
+  )
+TO_STRING(MAKEFILE_INCLUDE_DIRS ${CONFIG_INCLUDE_DIRS})
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Make.global_options.in
+  ${CMAKE_CURRENT_BINARY_DIR}/Make.global_options
+  @ONLY
+  )
+INSTALL(FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_NAME}Config.cmake
+  ${CMAKE_CURRENT_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_NAME}ConfigVersion.cmake
+  DESTINATION ${DEAL_II_PROJECT_CONFIG_RELDIR}
+  COMPONENT library
+  )
+INSTALL(FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/Make.global_options
+  DESTINATION ${DEAL_II_SHARE_RELDIR}
+  COMPONENT library
+  )
+
+#
+# Append feature configuration to all configuration files:
+#
+
+SET(_files
+  ${CMAKE_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_RELDIR}/${DEAL_II_PROJECT_CONFIG_NAME}Config.cmake
+  ${CMAKE_CURRENT_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_NAME}Config.cmake
+  )
+SET(_makefiles
+  ${CMAKE_BINARY_DIR}/${DEAL_II_SHARE_RELDIR}/Make.global_options
+  ${CMAKE_CURRENT_BINARY_DIR}/Make.global_options
+  )
+FOREACH(_file ${_files} ${_makefiles})
+  FILE(APPEND ${_file} "\n\n#\n# Feature configuration:\n#\n\n")
+ENDFOREACH()
+
+GET_CMAKE_PROPERTY(res VARIABLES)
+FOREACH(var ${res})
+  IF(var MATCHES "DEAL_II_WITH")
+    STRING(REPLACE "DEAL_II_WITH_" "" _name ${var})
+
+    FOREACH(_file ${_files})
+      FILE(APPEND ${_file} "SET(${var} ${${var}})\n")
+      IF(${var} AND NOT "${${_name}_VERSION}" STREQUAL "")
+        FILE(APPEND ${_file}
+          "SET(DEAL_II_${_name}_VERSION \"${${_name}_VERSION}\")\n"
+          )
+      ENDIF()
+    ENDFOREACH()
+
+    FOREACH(_file ${_makefiles})
+      IF(${var})
+        FILE(APPEND ${_file} "${var} = true\n")
+        IF(NOT "${${_name}_VERSION}" STREQUAL "")
+          FILE(APPEND ${_file}
+            "DEAL_II_${_name}_VERSION = ${${_name}_VERSION}\n"
+            )
+        ENDIF()
+      ELSE()
+        FILE(APPEND ${_file} "${var} = false\n")
+      ENDIF()
+    ENDFOREACH()
+  ENDIF()
+ENDFOREACH()
+
+MESSAGE(STATUS "Setting up project configuration - Done")
diff --git a/cmake/config/Config.cmake.in b/cmake/config/Config.cmake.in
new file mode 100644
index 0000000..095349b
--- /dev/null
+++ b/cmake/config/Config.cmake.in
@@ -0,0 +1,189 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+########################################################################
+##                                                                    ##
+##               The deal.II project configuration file               ##
+##                                                                    ##
+########################################################################
+
+
+#
+# General information
+#
+
+SET(DEAL_II_PACKAGE_NAME "@DEAL_II_PACKAGE_NAME@")
+SET(DEAL_II_PACKAGE_VERSION "@DEAL_II_PACKAGE_VERSION@")
+SET(DEAL_II_PACKAGE_VENDOR "@DEAL_II_PACKAGE_VENDOR@")
+SET(DEAL_II_PACKAGE_DESCRIPTION "@DEAL_II_PACKAGE_DESCRIPTION@")
+
+SET(DEAL_II_VERSION_MAJOR "@DEAL_II_VERSION_MAJOR@")
+SET(DEAL_II_VERSION_MINOR "@DEAL_II_VERSION_MINOR@")
+SET(DEAL_II_VERSION_SUBMINOR "@DEAL_II_VERSION_SUBMINOR@")
+SET(DEAL_II_VERSION "@DEAL_II_VERSION@")
+
+SET(DEAL_II_GIT_BRANCH "@DEAL_II_GIT_BRANCH@")
+SET(DEAL_II_GIT_REVISION "@DEAL_II_GIT_REVISION@")
+SET(DEAL_II_GIT_SHORTREV "@DEAL_II_GIT_SHORTREV@")
+
+SET(DEAL_II_PROJECT_CONFIG_NAME "@DEAL_II_PROJECT_CONFIG_NAME@")
+
+SET(DEAL_II_BUILD_TYPE "@CMAKE_BUILD_TYPE@")
+SET(DEAL_II_BUILD_TYPES "@DEAL_II_BUILD_TYPES@")
+
+
+#
+# Information about the project location
+#
+
+SET(DEAL_II_DOCHTML_RELDIR "@DEAL_II_DOCHTML_RELDIR@")
+SET(DEAL_II_DOCREADME_RELDIR "@DEAL_II_DOCREADME_RELDIR@")
+SET(DEAL_II_EXAMPLES_RELDIR "@DEAL_II_EXAMPLES_RELDIR@")
+SET(DEAL_II_EXECUTABLE_RELDIR "@DEAL_II_EXECUTABLE_RELDIR@")
+SET(DEAL_II_INCLUDE_RELDIR "@DEAL_II_INCLUDE_RELDIR@")
+SET(DEAL_II_LIBRARY_RELDIR "@DEAL_II_LIBRARY_RELDIR@")
+SET(DEAL_II_PROJECT_CONFIG_RELDIR "@DEAL_II_PROJECT_CONFIG_RELDIR@")
+SET(DEAL_II_SHARE_RELDIR "@DEAL_II_SHARE_RELDIR@")
+
+#
+# Determine DEAL_II_PATH from CMAKE_CURRENT_LIST_DIR:
+#
+
+SET(DEAL_II_PATH "${CMAKE_CURRENT_LIST_DIR}")
+SET(_path "${DEAL_II_PROJECT_CONFIG_RELDIR}")
+WHILE(NOT "${_path}" STREQUAL "")
+  GET_FILENAME_COMPONENT(DEAL_II_PATH "${DEAL_II_PATH}" PATH)
+  GET_FILENAME_COMPONENT(_path "${_path}" PATH)
+ENDWHILE()
+
+#
+# Print a message after inclusion of this file:
+#
+
+SET(DEAL_II_PROJECT_CONFIG_INCLUDED TRUE)
+
+SET(DEAL_II_BUILD_DIR @CONFIG_BUILD_DIR@)
+
+IF(NOT ${DEAL_II_PACKAGE_NAME}_FIND_QUIETLY)
+  IF(DEAL_II_BUILD_DIR)
+    MESSAGE(STATUS
+      "Using the ${DEAL_II_PACKAGE_NAME}-${DEAL_II_PACKAGE_VERSION} build directory found at ${DEAL_II_PATH}"
+      )
+  ELSE()
+    MESSAGE(STATUS
+      "Using the ${DEAL_II_PACKAGE_NAME}-${DEAL_II_PACKAGE_VERSION} installation found at ${DEAL_II_PATH}"
+      )
+  ENDIF()
+ENDIF()
+
+
+#
+# Include all convenience macros:
+#
+
+FILE(GLOB _macro_files
+  "${DEAL_II_PATH}/${DEAL_II_SHARE_RELDIR}/macros/*.cmake"
+  )
+FOREACH(file ${_macro_files})
+  IF(NOT ${DEAL_II_PACKAGE_NAME}_FIND_QUIETLY)
+    MESSAGE(STATUS "Include macro ${file}")
+  ENDIF()
+  INCLUDE(${file})
+ENDFOREACH()
+
+
+#
+# Compiler and linker configuration
+#
+
+SET(DEAL_II_CXX_COMPILER "@CMAKE_CXX_COMPILER@")
+SET(DEAL_II_C_COMPILER "@CMAKE_C_COMPILER@")
+
+# used for all targets:
+SET(DEAL_II_CXX_FLAGS "@DEAL_II_CXX_FLAGS@")
+
+# _additionally_ used for debug targets:
+SET(DEAL_II_CXX_FLAGS_DEBUG "@DEAL_II_CXX_FLAGS_DEBUG@")
+
+# _additionally_ used for release targets:
+SET(DEAL_II_CXX_FLAGS_RELEASE "@DEAL_II_CXX_FLAGS_RELEASE@")
+
+# used for all targets:
+SET(DEAL_II_LINKER_FLAGS "@DEAL_II_LINKER_FLAGS@")
+
+# _additionally_ used for debug targets:
+SET(DEAL_II_LINKER_FLAGS_DEBUG "@DEAL_II_LINKER_FLAGS_DEBUG@")
+
+# _additionally_ used for release targets:
+SET(DEAL_II_LINKER_FLAGS_RELEASE "@DEAL_II_LINKER_FLAGS_RELEASE@")
+
+# used for all targets:
+SET(DEAL_II_USER_DEFINITIONS "@DEAL_II_USER_DEFINITIONS@")
+
+# _additionally_ used for debug targets:
+SET(DEAL_II_USER_DEFINITIONS_DEBUG "@DEAL_II_USER_DEFINITIONS_DEBUG@")
+
+# _additionally_ used for release targets:
+SET(DEAL_II_USER_DEFINITIONS_RELEASE "@DEAL_II_USER_DEFINITIONS_RELEASE@")
+
+#
+# MPI runtime:
+#
+
+SET(DEAL_II_MPIEXEC "@MPIEXEC@")
+SET(DEAL_II_MPIEXEC_NUMPROC_FLAG "@MPIEXEC_NUMPROC_FLAG@")
+SET(DEAL_II_MPIEXEC_PREFLAGS "@MPIEXEC_PREFLAGS@")
+SET(DEAL_II_MPIEXEC_POSTFLAGS "@MPIEXEC_POSTFLAGS@")
+
+#
+# Build a static executable:
+#
+
+SET(DEAL_II_STATIC_EXECUTABLE "@DEAL_II_STATIC_EXECUTABLE@")
+
+
+#
+# Information about include directories and libraries
+#
+
+# Full list of include directories:
+SET(DEAL_II_INCLUDE_DIRS "@CONFIG_INCLUDE_DIRS@")
+
+# Full list of libraries for the debug target:
+SET(DEAL_II_LIBRARIES_DEBUG "@CONFIG_LIBRARIES_DEBUG@")
+
+# Full list of libraries for the release target:
+SET(DEAL_II_LIBRARIES_RELEASE "@CONFIG_LIBRARIES_RELEASE@")
+
+# Full list of libraries with "debug" and "optimized" keywords for easy use with TARGET_LINK_LIBRARIES:
+SET(DEAL_II_LIBRARIES "@CONFIG_LIBRARIES@")
+
+
+#
+# Information about library targets
+#
+
+# The library targets file:
+SET(DEAL_II_TARGET_CONFIG "${DEAL_II_PATH}/${DEAL_II_PROJECT_CONFIG_RELDIR}/${DEAL_II_PROJECT_CONFIG_NAME}Targets.cmake")
+
+# The Debug target:
+SET(DEAL_II_TARGET_DEBUG "@CONFIG_TARGET_DEBUG@")
+
+# The Release target:
+SET(DEAL_II_TARGET_RELEASE "@CONFIG_TARGET_RELEASE@")
+
+# Full list of targets with "debug" and "optimized" keywords for easy use with TARGET_LINK_LIBRARIES:
+SET(DEAL_II_TARGET "@CONFIG_TARGET@")
diff --git a/cmake/config/ConfigVersion.cmake.in b/cmake/config/ConfigVersion.cmake.in
new file mode 100644
index 0000000..627c1be
--- /dev/null
+++ b/cmake/config/ConfigVersion.cmake.in
@@ -0,0 +1,17 @@
+
+#
+# As suggested by the cmake documentation
+#
+
+set(PACKAGE_VERSION "@DEAL_II_VERSION_MAJOR at .@DEAL_II_VERSION_MINOR at .@DEAL_II_VERSION_SUBMINOR@")
+
+# Check whether the requested PACKAGE_FIND_VERSION is compatible
+if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+else()
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
+    set(PACKAGE_VERSION_EXACT TRUE)
+  endif()
+endif()
+
diff --git a/cmake/config/Make.global_options.in b/cmake/config/Make.global_options.in
new file mode 100644
index 0000000..e27daf7
--- /dev/null
+++ b/cmake/config/Make.global_options.in
@@ -0,0 +1,136 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+########################################################################
+##                                                                    ##
+##               The deal.II project configuration file               ##
+##                                                                    ##
+########################################################################
+
+
+#
+# General information
+#
+
+DEAL_II_PACKAGE_NAME = @DEAL_II_PACKAGE_NAME@
+DEAL_II_PACKAGE_VERSION = @DEAL_II_PACKAGE_VERSION@
+DEAL_II_PACKAGE_VENDOR = @DEAL_II_PACKAGE_VENDOR@
+DEAL_II_PACKAGE_DESCRIPTION = @DEAL_II_PACKAGE_DESCRIPTION@
+
+DEAL_II_VERSION_MAJOR = @DEAL_II_VERSION_MAJOR@
+DEAL_II_VERSION_MINOR = @DEAL_II_VERSION_MINOR@
+DEAL_II_VERSION_SUBMINOR = @DEAL_II_VERSION_SUBMINOR@
+DEAL_II_VERSION = @DEAL_II_VERSION@
+
+DEAL_II_GIT_BRANCH = @DEAL_II_GIT_BRANCH@
+DEAL_II_GIT_REVISION = @DEAL_II_GIT_REVISION@
+DEAL_II_GIT_SHORTREV =  @DEAL_II_GIT_SHORTREV@
+
+DEAL_II_PROJECT_CONFIG_NAME = @DEAL_II_PROJECT_CONFIG_NAME@
+
+DEAL_II_BUILD_TYPE = @CMAKE_BUILD_TYPE@
+DEAL_II_BUILD_TYPES = @MAKEFILE_BUILD_TYPES@
+
+#
+# Information about the project location
+#
+
+DEAL_II_SHARE_RELDIR = @DEAL_II_SHARE_RELDIR@
+DEAL_II_DOCREADME_RELDIR = @DEAL_II_DOCREADME_RELDIR@
+DEAL_II_DOCHTML_RELDIR = @DEAL_II_DOCHTML_RELDIR@
+DEAL_II_EXAMPLES_RELDIR = @DEAL_II_EXAMPLES_RELDIR@
+DEAL_II_EXECUTABLE_RELDIR = @DEAL_II_EXECUTABLE_RELDIR@
+DEAL_II_INCLUDE_RELDIR = @DEAL_II_INCLUDE_RELDIR@
+DEAL_II_LIBRARY_RELDIR = @DEAL_II_LIBRARY_RELDIR@
+DEAL_II_PROJECT_CONFIG_RELDIR = @DEAL_II_PROJECT_CONFIG_RELDIR@
+
+#
+# Determine DEAL_II_PATH from MAKEFILE_LIST:
+#
+DEAL_II_MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+DEAL_II_PATH := $(subst /${DEAL_II_SHARE_RELDIR}/Make.global_options,,${DEAL_II_MAKEFILE_PATH})
+
+#
+# Information about configuration:
+#
+
+DEAL_II_PROJECT_CONFIG_INCLUDED = true
+DEAL_II_BUILD_DIR = @MAKEFILE_BUILD_DIR@
+
+
+#
+# Compiler and linker configuration
+#
+
+DEAL_II_CXX_COMPILER = @CMAKE_CXX_COMPILER@
+
+# used for all targets:
+DEAL_II_CXX_FLAGS = @DEAL_II_CXX_FLAGS@
+
+# _additionally_ used for debug targets:
+DEAL_II_CXX_FLAGS_DEBUG = @DEAL_II_CXX_FLAGS_DEBUG@
+
+# _additionally_ used for release targets:
+DEAL_II_CXX_FLAGS_RELEASE = @DEAL_II_CXX_FLAGS_RELEASE@
+
+# used for all targets:
+DEAL_II_LINKER_FLAGS = @DEAL_II_LINKER_FLAGS@
+
+# _additionally_ used for debug targets:
+DEAL_II_LINKER_FLAGS_DEBUG = @DEAL_II_LINKER_FLAGS_DEBUG@
+
+# _additionally_ used for release targets:
+DEAL_II_LINKER_FLAGS_RELEASE = @DEAL_II_LINKER_FLAGS_RELEASE@
+
+# used for all targets:
+DEAL_II_USER_DEFINITIONS = @MAKEFILE_USER_DEFINITIONS@
+
+# _additionally_ used for debug targets:
+DEAL_II_USER_DEFINITIONS_DEBUG = @MAKEFILE_USER_DEFINITIONS_DEBUG@
+
+# _additionally_ used for release targets:
+DEAL_II_USER_DEFINITIONS_RELEASE = @MAKEFILE_USER_DEFINITIONS_RELEASE@
+
+
+#
+# MPI runtime:
+#
+
+DEAL_II_MPIEXEC = @MPIEXEC@
+DEAL_II_MPIEXEC_NUMPROC_FLAG = @MPIEXEC_NUMPROC_FLAG@
+DEAL_II_MPIEXEC_PREFLAGS = @MPIEXEC_PREFLAGS@
+DEAL_II_MPIEXEC_POSTFLAGS = @MPIEXEC_POSTFLAGS@
+
+
+#
+# Build a static executable:
+#
+
+DEAL_II_STATIC_EXECUTABLE = @MAKEFILE_STATIC_EXECUTABLE@
+
+
+#
+# Information about include directories and libraries
+#
+
+# Full list of include directories:
+DEAL_II_INCLUDE_DIRS = ${DEAL_II_PATH}@MAKEFILE_INCLUDE_DIRS@
+
+# Full list of libraries for the debug target:
+DEAL_II_LIBRARIES_DEBUG = ${DEAL_II_PATH}@MAKEFILE_LIBRARIES_DEBUG@
+
+# Full list of libraries for the release target:
+DEAL_II_LIBRARIES_RELEASE = ${DEAL_II_PATH}@MAKEFILE_LIBRARIES_RELEASE@
diff --git a/cmake/config/template-arguments.in b/cmake/config/template-arguments.in
new file mode 100644
index 0000000..15e46dc
--- /dev/null
+++ b/cmake/config/template-arguments.in
@@ -0,0 +1,108 @@
+BOOL            := { true; false }
+REAL_SCALARS    := { double; float; long double }
+COMPLEX_SCALARS := { std::complex<double>;
+                     std::complex<float>;
+                     std::complex<long double> }
+
+DERIVATIVE_TENSORS := { double;
+                        Tensor<1,deal_II_dimension>;
+                        Tensor<2,deal_II_dimension> }
+
+DEAL_II_VEC_TEMPLATES := { Vector; BlockVector }
+
+SERIAL_VECTORS := { Vector<double>;
+                    Vector<float> ;
+                    Vector<long double>;
+
+                    BlockVector<double>;
+                    BlockVector<float>;
+                    BlockVector<long double>;
+
+                    parallel::distributed::Vector<double>;
+                    parallel::distributed::Vector<float> ;
+                    parallel::distributed::Vector<long double>;
+
+                    parallel::distributed::BlockVector<double>;
+                    parallel::distributed::BlockVector<float> ;
+                    parallel::distributed::BlockVector<long double>;
+
+                    @DEAL_II_EXPAND_TRILINOS_VECTOR@;
+                    @DEAL_II_EXPAND_TRILINOS_MPI_VECTOR@;
+                    @DEAL_II_EXPAND_PETSC_VECTOR@;
+                    @DEAL_II_EXPAND_PETSC_MPI_VECTOR@;
+
+                    @DEAL_II_EXPAND_TRILINOS_BLOCKVECTOR@;
+                    @DEAL_II_EXPAND_TRILINOS_MPI_BLOCKVECTOR@;
+                    @DEAL_II_EXPAND_PETSC_BLOCKVECTOR@;
+                    @DEAL_II_EXPAND_PETSC_MPI_BLOCKVECTOR@;
+                  }
+
+EXTERNAL_SEQUENTIAL_VECTORS := { @DEAL_II_EXPAND_TRILINOS_VECTOR@;
+                                 @DEAL_II_EXPAND_TRILINOS_BLOCKVECTOR@;
+                                 @DEAL_II_EXPAND_PETSC_VECTOR@;
+                                 @DEAL_II_EXPAND_PETSC_BLOCKVECTOR@
+                               }
+
+EXTERNAL_PARALLEL_VECTORS := { @DEAL_II_EXPAND_TRILINOS_MPI_VECTOR@;
+                               @DEAL_II_EXPAND_TRILINOS_MPI_BLOCKVECTOR@;
+                               @DEAL_II_EXPAND_PETSC_MPI_VECTOR@;
+                               @DEAL_II_EXPAND_PETSC_MPI_BLOCKVECTOR@
+                             }
+
+VECTORS_WITH_MATRIX := { Vector<double>;
+                    Vector<float> ;
+                    Vector<long double>;
+
+                    BlockVector<double>;
+                    BlockVector<float>;
+                    BlockVector<long double>;
+
+                    parallel::distributed::Vector<double>;
+
+                    @DEAL_II_EXPAND_TRILINOS_VECTOR@;
+                    @DEAL_II_EXPAND_TRILINOS_MPI_VECTOR@;
+                  }
+
+DOFHANDLERS := { DoFHandler<deal_II_dimension>;
+                 hp::DoFHandler<deal_II_dimension> }
+
+DOFHANDLER_TEMPLATES := { DoFHandler;
+                          hp::DoFHandler }
+
+TRIANGULATION_AND_DOFHANDLER_TEMPLATES := { Triangulation;
+                                            parallel::shared::Triangulation;
+                                            parallel::distributed::Triangulation;
+                                            DoFHandler;
+                                            hp::DoFHandler }
+
+SEQUENTIAL_TRIANGULATION_AND_DOFHANDLERS := { Triangulation<deal_II_dimension, deal_II_space_dimension>;
+                                              DoFHandler<deal_II_dimension, deal_II_space_dimension>;
+                                              hp::DoFHandler<deal_II_dimension, deal_II_space_dimension> }
+
+TRIANGULATION_AND_DOFHANDLERS := { Triangulation<deal_II_dimension, deal_II_space_dimension>;
+                                   parallel::shared::Triangulation<deal_II_dimension, deal_II_space_dimension>;
+                                   parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension>;
+                                   DoFHandler<deal_II_dimension, deal_II_space_dimension>;
+                                   hp::DoFHandler<deal_II_dimension, deal_II_space_dimension> }
+
+
+FEVALUES_BASES := { FEValuesBase<deal_II_dimension>;
+                    FEFaceValuesBase<deal_II_dimension> }
+
+SPARSITY_PATTERNS := { SparsityPattern;
+                       DynamicSparsityPattern;
+                       @DEAL_II_EXPAND_TRILINOS_SPARSITY_PATTERN@;
+
+                       BlockSparsityPattern;
+                       BlockDynamicSparsityPattern;
+                       @DEAL_II_EXPAND_TRILINOS_BLOCK_SPARSITY_PATTERN@; }
+
+DIMENSIONS := { 1; 2; 3 }
+
+SPACE_DIMENSIONS := { 1; 2; 3 }
+
+RANKS := { 1; 2; 3; 4 }
+
+OUTPUT_FLAG_TYPES := { DXFlags; UcdFlags; GnuplotFlags; PovrayFlags; EpsFlags;
+                       GmvFlags; TecplotFlags; VtkFlags; SvgFlags;
+                       Deal_II_IntermediateFlags }
diff --git a/cmake/configure/configure_1_bzip2.cmake b/cmake/configure/configure_1_bzip2.cmake
new file mode 100644
index 0000000..ae86e44
--- /dev/null
+++ b/cmake/configure/configure_1_bzip2.cmake
@@ -0,0 +1,20 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the bzip2 library:
+#
+
+CONFIGURE_FEATURE(BZIP2)
diff --git a/cmake/configure/configure_1_lapack.cmake b/cmake/configure/configure_1_lapack.cmake
new file mode 100644
index 0000000..629b1d8
--- /dev/null
+++ b/cmake/configure/configure_1_lapack.cmake
@@ -0,0 +1,83 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the lapack library:
+#
+
+MACRO(FEATURE_LAPACK_FIND_EXTERNAL var)
+  FIND_PACKAGE(LAPACK)
+
+  #
+  # We do a check for availability of every single LAPACK function we use.
+  #
+  IF(LAPACK_FOUND)
+    SET(${var} TRUE)
+
+    #
+    # Clear the test flags because the following test will use a C compiler
+    #
+    CLEAR_CMAKE_REQUIRED()
+    SET(CMAKE_REQUIRED_FLAGS "${LAPACK_LINKER_FLAGS}")
+    SET(CMAKE_REQUIRED_LIBRARIES ${LAPACK_LIBRARIES})
+    # Push -pthread as well:
+    ENABLE_IF_SUPPORTED(CMAKE_REQUIRED_FLAGS "-pthread")
+
+    CHECK_C_SOURCE_COMPILES("
+      char daxpy_(); char dgeev_(); char dgeevx_(); char dgelsd_(); char
+      dgemm_(); char dgemv_(); char dgeqrf_(); char dgesdd_(); char
+      dgesvd_(); char dgetrf_(); char dgetri_(); char dgetrs_(); char
+      dorgqr_(); char dormqr_(); char dstev_(); char dsyevx_(); char
+      dsygv_(); char dsygvx_(); char dtrtrs_(); char saxpy_(); char
+      sgeev_(); char sgeevx_(); char sgelsd_(); char sgemm_(); char
+      sgemv_(); char sgeqrf_(); char sgesdd_(); char sgesvd_(); char
+      sgetrf_(); char sgetri_(); char sgetrs_(); char sorgqr_(); char
+      sormqr_(); char sstev_(); char ssyevx_(); char ssygv_(); char
+      ssygvx_(); char strtrs_();
+      int main(){
+        daxpy_ (); dgeev_ (); dgeevx_ (); dgelsd_ (); dgemm_ (); dgemv_ ();
+        dgeqrf_ (); dgesdd_ (); dgesvd_ (); dgetrf_ (); dgetri_ (); dgetrs_
+        (); dorgqr_ (); dormqr_ (); dstev_ (); dsyevx_ (); dsygv_ ();
+        dsygvx_ (); dtrtrs_ (); saxpy_ (); sgeev_ (); sgeevx_ (); sgelsd_
+        (); sgemm_ (); sgemv_ (); sgeqrf_ (); sgesdd_ (); sgesvd_ ();
+        sgetrf_ (); sgetri_ (); sgetrs_ (); sorgqr_ (); sormqr_ (); sstev_
+        (); ssyevx_ (); ssygv_ (); ssygvx_ (); strtrs_ ();
+
+        return 0;
+      }"
+      LAPACK_SYMBOL_CHECK)
+
+    IF(NOT LAPACK_SYMBOL_CHECK)
+      MESSAGE(STATUS
+        "Could not find a sufficient BLAS/LAPACK installation: "
+        "BLAS/LAPACK symbol check failed! Consult CMakeFiles/CMakeError.log "
+        "for further information."
+        )
+      SET(LAPACK_ADDITIONAL_ERROR_STRING
+        ${LAPACK_ADDITIONAL_ERROR_STRING}
+        "Could not find a sufficient BLAS/LAPACK installation: \n"
+        "BLAS/LAPACK symbol check failed! This usually means that your "
+        "BLAS/LAPACK installation is incomplete or the link line is "
+        "broken. Consult\n"
+        "  CMakeFiles/CMakeError.log\n"
+        "for further information.\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(LAPACK)
diff --git a/cmake/configure/configure_1_mpi.cmake b/cmake/configure/configure_1_mpi.cmake
new file mode 100644
index 0000000..781c47c
--- /dev/null
+++ b/cmake/configure/configure_1_mpi.cmake
@@ -0,0 +1,55 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for mpi support:
+#
+
+MACRO(FEATURE_MPI_FIND_EXTERNAL var)
+  FIND_PACKAGE(MPI)
+
+  IF(MPI_FOUND)
+    SET(${var} TRUE)
+
+    IF(NOT MPI_HAVE_MPI_SEEK_SET)
+      MESSAGE(STATUS
+        "Could not find a sufficient MPI version: "
+        "Your MPI implementation must define MPI_SEEK_SET.")
+      SET(MPI_ADDITIONAL_ERROR_STRING
+        "Your MPI implementation must define MPI_SEEK_SET.")
+      SET(${var} FALSE)
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+MACRO(FEATURE_MPI_ERROR_MESSAGE)
+  MESSAGE(FATAL_ERROR "\n"
+    "Could not find any suitable mpi library!\n"
+    ${MPI_ADDITIONAL_ERROR_STRING}
+    "\nPlease ensure that an mpi library is installed on your computer\n"
+    "and set CMAKE_CXX_COMPILER to the appropriate mpi wrappers:\n"
+    "    $ CXX=\".../mpicxx\" cmake <...>\n"
+    "    $ cmake -DCMAKE_CXX_COMPILER=\".../mpicxx\" <...>\n"
+    "Or with additional C and Fortran wrappers (recommended!):\n"
+    "    $ CC=\".../mpicc\" CXX=\".../mpicxx\" F90=\".../mpif90\" cmake <...>\n"
+    "    $ cmake -DCMAKE_C_COMPILER=\".../mpicc\"\\\n"
+    "            -DCMAKE_CXX_COMPILER=\".../mpicxx\"\\\n"
+    "            -DCMAKE_Fortran_COMPILER=\".../mpif90\"\\\n"
+    "            <...>\n"
+    )
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(MPI)
diff --git a/cmake/configure/configure_1_threads.cmake b/cmake/configure/configure_1_threads.cmake
new file mode 100644
index 0000000..afd8adb
--- /dev/null
+++ b/cmake/configure/configure_1_threads.cmake
@@ -0,0 +1,214 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for thread support in deal.II with the help of the tbb
+# library:
+#
+
+#
+# Set up general threading:
+# The macro will be included in CONFIGURE_FEATURE_THREADS_EXTERNAL/BUNDLED.
+#
+MACRO(SETUP_THREADING)
+  #
+  # Unfortunately the FindThreads macro needs a working C compiler
+  #
+  IF(CMAKE_C_COMPILER_WORKS)
+    #
+    # Clear the test flags because FindThreads.cmake will use a C compiler:
+    #
+    CLEAR_CMAKE_REQUIRED()
+
+    SWITCH_LIBRARY_PREFERENCE()
+    FIND_PACKAGE(Threads)
+    SWITCH_LIBRARY_PREFERENCE()
+
+    RESET_CMAKE_REQUIRED()
+
+  ELSE()
+
+    #
+    # We have no way to query for thread support. Just assume that it is
+    # provided by Pthreads...
+    #
+    MESSAGE(STATUS
+      "No suitable C compiler was found! Assuming threading is provided by Pthreads."
+      )
+    SET_IF_EMPTY(Threads_FOUND TRUE)
+    SET_IF_EMPTY(CMAKE_THREAD_LIBS_INIT "-lpthread")
+    SET_IF_EMPTY(CMAKE_USE_PTHREADS_INIT TRUE)
+  ENDIF()
+
+  IF(NOT Threads_FOUND)
+    #
+    # TODO: This is a dead end. Threading might be set up with internal TBB
+    # so we have no way of returning unsuccessfully...
+    #
+    MESSAGE(FATAL_ERROR
+      "\nInternal configuration error: No Threading support found\n\n"
+      )
+  ENDIF()
+
+  MARK_AS_ADVANCED(pthread_LIBRARY)
+
+  #
+  # Change -lphtread to -pthread for better compatibility on non linux
+  # platforms:
+  #
+  IF("${CMAKE_THREAD_LIBS_INIT}" MATCHES "-lpthread")
+    CHECK_CXX_COMPILER_FLAG("-pthread"
+      DEAL_II_HAVE_FLAG_pthread
+      )
+    IF(DEAL_II_HAVE_FLAG_pthread)
+      STRING(REPLACE "-lpthread" "-pthread" CMAKE_THREAD_LIBS_INIT
+        "${CMAKE_THREAD_LIBS_INIT}"
+        )
+    ENDIF()
+  ENDIF()
+
+  ADD_FLAGS(THREADS_LINKER_FLAGS "${CMAKE_THREAD_LIBS_INIT}")
+
+  #
+  # Set up some posix thread specific configuration toggles:
+  #
+  IF(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
+
+    IF(NOT CMAKE_USE_PTHREADS_INIT)
+      MESSAGE(FATAL_ERROR
+        "\nInternal configuration error: Not on Windows but posix thread support unavailable\n\n"
+        )
+    ENDIF()
+
+    SET(DEAL_II_USE_MT_POSIX TRUE)
+
+    #
+    # Check whether posix thread barriers are available:
+    #
+    ADD_FLAGS(CMAKE_REQUIRED_FLAGS "${CMAKE_THREAD_LIBS_INIT}")
+    CHECK_CXX_SOURCE_COMPILES(
+    "
+    #include <pthread.h>
+    int main()
+    {
+      pthread_barrier_t pb;
+      pthread_barrier_init (&pb, 0, 1);
+      pthread_barrier_wait (&pb);
+      pthread_barrier_destroy (&pb);
+      return 0;
+    }
+    "
+    DEAL_II_HAVE_MT_POSIX_BARRIERS)
+    RESET_CMAKE_REQUIRED()
+    IF(NOT DEAL_II_HAVE_MT_POSIX_BARRIERS)
+      SET(DEAL_II_USE_MT_POSIX_NO_BARRIERS TRUE)
+    ENDIF()
+
+  ELSE()
+
+    #
+    # Poor Windows:
+    #
+    SET(DEAL_II_USE_MT_POSIX FALSE)
+    SET(DEAL_II_USE_MT_POSIX_NO_BARRIERS TRUE)
+  ENDIF()
+
+ENDMACRO()
+
+
+#
+# Set up the tbb library:
+#
+
+MACRO(FEATURE_THREADS_FIND_EXTERNAL var)
+  FIND_PACKAGE(TBB)
+
+  IF(TBB_FOUND)
+    SET(${var} TRUE)
+  ENDIF()
+ENDMACRO()
+
+
+MACRO(FEATURE_THREADS_CONFIGURE_EXTERNAL)
+
+  IF(CMAKE_BUILD_TYPE MATCHES "Debug")
+    IF(TBB_WITH_DEBUG_LIB)
+      LIST(APPEND THREADS_DEFINITIONS_DEBUG "TBB_USE_DEBUG" "TBB_DO_ASSERT=1")
+      LIST(APPEND THREADS_USER_DEFINITIONS_DEBUG "TBB_USE_DEBUG" "TBB_DO_ASSERT=1")
+    ENDIF()
+  ENDIF()
+
+  #
+  # Workaround for an issue with C++11 mode, non gcc-compilers and missing
+  # template<typename T> std::is_trivially_copyable<T>
+  #
+  IF( DEAL_II_WITH_CXX11 AND
+      NOT DEAL_II_HAVE_CXX11_IS_TRIVIALLY_COPYABLE AND
+      NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU" )
+    LIST(APPEND THREADS_DEFINITIONS "TBB_IMPLEMENT_CPP0X=1")
+    LIST(APPEND THREADS_USER_DEFINITIONS "TBB_IMPLEMENT_CPP0X=1")
+  ENDIF()
+
+  SETUP_THREADING()
+
+  LIST(APPEND THREADS_LIBRARIES ${TBB_LIBRARIES})
+  LIST(APPEND THREADS_INCLUDE_DIRS ${TBB_INCLUDE_DIRS})
+  LIST(APPEND THREADS_USER_INCLUDE_DIRS ${TBB_USER_INCLUDE_DIRS})
+
+ENDMACRO()
+
+
+MACRO(FEATURE_THREADS_CONFIGURE_BUNDLED)
+  #
+  # Setup threading (before configuring our build...)
+  #
+  SETUP_THREADING()
+
+  #
+  # We have to disable a bunch of warnings:
+  #
+  ENABLE_IF_SUPPORTED(THREADS_CXX_FLAGS "-Wno-parentheses")
+
+  #
+  # Add some definitions to use the header files in debug mode:
+  #
+  IF (CMAKE_BUILD_TYPE MATCHES "Debug")
+    LIST(APPEND THREADS_DEFINITIONS_DEBUG "TBB_USE_DEBUG" "TBB_DO_ASSERT=1")
+    LIST(APPEND THREADS_USER_DEFINITIONS_DEBUG "TBB_USE_DEBUG" "TBB_DO_ASSERT=1")
+  ENDIF()
+
+  #
+  # Workaround for an issue with C++11 mode, non gcc-compilers and missing
+  # template<typename T> std::is_trivially_copyable<T>
+  #
+  IF( DEAL_II_WITH_CXX11 AND
+      NOT DEAL_II_HAVE_CXX11_IS_TRIVIALLY_COPYABLE AND
+      NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU" )
+    LIST(APPEND THREADS_DEFINITIONS "TBB_IMPLEMENT_CPP0X=1")
+    LIST(APPEND THREADS_USER_DEFINITIONS "TBB_IMPLEMENT_CPP0X=1")
+  ENDIF()
+
+  #
+  # tbb uses dlopen/dlclose, so link against libdl.so as well:
+  #
+  # TODO: Also necessary for external lib, use preference toggle
+  #
+  LIST(APPEND THREADS_LIBRARIES ${CMAKE_DL_LIBS})
+
+  LIST(APPEND THREADS_BUNDLED_INCLUDE_DIRS ${TBB_FOLDER}/include)
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(THREADS)
diff --git a/cmake/configure/configure_1_zlib.cmake b/cmake/configure/configure_1_zlib.cmake
new file mode 100644
index 0000000..c554311
--- /dev/null
+++ b/cmake/configure/configure_1_zlib.cmake
@@ -0,0 +1,20 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the zlib library:
+#
+
+CONFIGURE_FEATURE(ZLIB)
diff --git a/cmake/configure/configure_2_metis.cmake b/cmake/configure/configure_2_metis.cmake
new file mode 100644
index 0000000..fc55a58
--- /dev/null
+++ b/cmake/configure/configure_2_metis.cmake
@@ -0,0 +1,43 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+SET(FEATURE_METIS_AFTER MPI)
+
+#
+# Configuration for the metis library:
+#
+
+MACRO(FEATURE_METIS_FIND_EXTERNAL var)
+  FIND_PACKAGE(METIS)
+
+  IF(METIS_FOUND)
+    SET(${var} TRUE)
+
+    IF(NOT METIS_VERSION_MAJOR GREATER 4)
+      MESSAGE(STATUS "Insufficient metis installation found: "
+        "Version 5.x required!"
+        )
+      SET(METIS_ADDITIONAL_ERROR_STRING
+        "Could not find a sufficient modern metis installation: "
+        "Version 5.x required!\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    CHECK_MPI_INTERFACE(METIS ${var})
+  ENDIF()
+ENDMACRO()
+
+CONFIGURE_FEATURE(METIS)
diff --git a/cmake/configure/configure_2_trilinos.cmake b/cmake/configure/configure_2_trilinos.cmake
new file mode 100644
index 0000000..45d50c2
--- /dev/null
+++ b/cmake/configure/configure_2_trilinos.cmake
@@ -0,0 +1,220 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the trilinos library:
+#
+
+
+MACRO(FEATURE_TRILINOS_FIND_EXTERNAL var)
+  FIND_PACKAGE(TRILINOS)
+
+  IF(TRILINOS_FOUND)
+    #
+    # So, we have a library. Let's see whether we can use it:
+    #
+    SET(${var} TRUE)
+
+    #
+    # Set TRILINOS_DIR to something meaningful if empty
+    #
+    IF("${TRILINOS_DIR}" STREQUAL "")
+      SET(TRILINOS_DIR "<system location>")
+    ENDIF()
+
+    #
+    # Check whether all required modules of trilinos are installed:
+    #
+    MESSAGE(STATUS
+      "Check whether the found trilinos package contains all required modules:"
+      )
+
+    FOREACH(_module
+      Amesos Epetra Ifpack AztecOO Sacado Teuchos ML
+      )
+      ITEM_MATCHES(_module_found ${_module} ${Trilinos_PACKAGE_LIST})
+      IF(_module_found)
+        MESSAGE(STATUS "Found ${_module}")
+      ELSE()
+        MESSAGE(STATUS "Module ${_module} not found!")
+        SET(_modules_missing "${_modules_missing} ${_module}")
+        SET(${var} FALSE)
+      ENDIF()
+    ENDFOREACH()
+
+    IF((TRILINOS_VERSION_MAJOR EQUAL 11 AND
+        NOT (TRILINOS_VERSION_MINOR LESS 14)) 
+       OR
+       (NOT (TRILINOS_VERSION_MAJOR LESS 12)))
+        ITEM_MATCHES(_module_found MueLu ${Trilinos_PACKAGE_LIST})
+      IF(_module_found)
+        MESSAGE(STATUS "Found MueLu")
+      ELSE()
+        MESSAGE(STATUS "Module MueLu not found!")
+        SET(_modules_missing "${_modules_missing} MueLu")
+        SET(${var} FALSE)
+      ENDIF()
+    ENDIF()
+
+    IF(NOT ${var})
+      MESSAGE(STATUS "Could not find a sufficient Trilinos installation: "
+        "Missing ${_modules_missing}"
+        )
+      SET(TRILINOS_ADDITIONAL_ERROR_STRING
+        "The Trilinos installation (found at \"${TRILINOS_DIR}\")\n"
+        "is missing one or more modules necessary for the deal.II Trilinos interfaces:\n"
+        "  ${_modules_missing}\n\n"
+        "Please re-install Trilinos with the missing Trilinos subpackages enabled.\n\n"
+        )
+    ENDIF()
+
+    #
+    # We require at least Trilinos 11.2
+    #
+    IF(TRILINOS_VERSION VERSION_LESS 11.2)
+
+      MESSAGE(STATUS "Could not find a sufficient Trilinos installation: "
+        "deal.II requires at least version 11.2, but version ${TRILINOS_VERSION} was found."
+        )
+      SET(TRILINOS_ADDITIONAL_ERROR_STRING
+        ${TRILINOS_ADDITIONAL_ERROR_STRING}
+        "The Trilinos installation (found at \"${TRILINOS_DIR}\")\n"
+        "with version ${TRILINOS_VERSION} is too old.\n"
+        "deal.II requires at least version 11.2.\n\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    #
+    # Trilinos has to be configured with the same MPI configuration as
+    # deal.II.
+    #
+    IF( (TRILINOS_WITH_MPI AND NOT DEAL_II_WITH_MPI)
+         OR
+         (NOT TRILINOS_WITH_MPI AND DEAL_II_WITH_MPI))
+      MESSAGE(STATUS "Could not find a sufficient Trilinos installation: "
+        "Trilinos has to be configured with the same MPI configuration as deal.II."
+        )
+      SET(TRILINOS_ADDITIONAL_ERROR_STRING
+        ${TRILINOS_ADDITIONAL_ERROR_STRING}
+        "The Trilinos installation (found at \"${TRILINOS_DIR}\")\n"
+        "has to be configured with the same MPI configuration as deal.II, but found:\n"
+        "  DEAL_II_WITH_MPI = ${DEAL_II_WITH_MPI}\n"
+        "  TRILINOS_WITH_MPI = ${TRILINOS_WITH_MPI}\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    #
+    # Trilinos has to be configured with 32bit indices if deal.II uses
+    # unsigned int.
+    #
+    IF(TRILINOS_WITH_NO_32BIT_INDICES AND NOT DEAL_II_WITH_64BIT_INDICES)
+      MESSAGE(STATUS "Could not find a sufficient Trilinos installation: "
+        "deal.II was configured to use 32bit global indices but "
+        "Trilinos was not."
+        )
+      SET(TRILINOS_ADDITIONAL_ERROR_STRING
+        ${TRILINOS_ADDITIONAL_ERROR_STRING}
+        "The Trilinos installation (found at \"${TRILINOS_DIR}\")\n"
+        "has to be configured to use the same number of bits as deal.II, but "
+        "found:\n"
+        "  DEAL_II_WITH_64BIT_INDICES = ${DEAL_II_WITH_64BIT_INDICES}\n"
+        "  TRILINOS_WITH_NO_32BIT_INDICES = ${TRILINOS_WITH_NO_32_BIT_INDICES}\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    #
+    # Trilinos has to be configured with 64bit indices if deal.II uses
+    # unsigned long long int.
+    #
+    IF(TRILINOS_WITH_NO_64BIT_INDICES AND DEAL_II_WITH_64BIT_INDICES)
+      MESSAGE(STATUS "Could not find a sufficient Trilinos installation: "
+        "deal.II was configured to use 64bit global indices but "
+        "Trilinos was not."
+        )
+      SET(TRILINOS_ADDITIONAL_ERROR_STRING
+        ${TRILINOS_ADDITIONAL_ERROR_STRING}
+        "The Trilinos installation (found at \"${TRILINOS_DIR}\")\n"
+        "has to be configured to use the same number of bits as deal.II, but "
+        "found:\n"
+        "  DEAL_II_WITH_64BIT_INDICES = ${DEAL_II_WITH_64BIT_INDICES}\n"
+        "  TRILINOS_WITH_NO_64BIT_INDICES = ${TRILINOS_WITH_NO_64_BIT_INDICES}\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    #
+    # Some versions of Sacado_cmath.hpp do things that aren't compatible
+    # with the -std=c++0x flag of GCC, see deal.II FAQ.
+    # Test whether that is indeed the case
+    #
+    IF(DEAL_II_WITH_CXX11 AND NOT TRILINOS_SUPPORTS_CPP11)
+
+      IF(TRILINOS_HAS_C99_TR1_WORKAROUND)
+        LIST(APPEND TRILINOS_DEFINITIONS "HAS_C99_TR1_CMATH")
+        LIST(APPEND TRILINOS_USER_DEFINITIONS "HAS_C99_TR1_CMATH")
+      ELSE()
+        MESSAGE(STATUS "Could not find a sufficient Trilinos installation: "
+          "The installation is not compatible with the C++ standard selected for "
+          "this compiler."
+          )
+        SET(TRILINOS_ADDITIONAL_ERROR_STRING
+          ${TRILINOS_ADDITIONAL_ERROR_STRING}
+          "The Trilinos installation (found at \"${TRILINOS_DIR}\")\n"
+          "is not compatible with the C++ standard selected for\n"
+          "this compiler. See the deal.II FAQ page for a solution.\n\n"
+          )
+        SET(${var} FALSE)
+      ENDIF()
+    ENDIF()
+
+    #
+    # Newer Trilinos versions (12.0.1 or newer) require a matching C++11
+    # support. I.e., if Trilinos is configured with C++11 support, deal.II
+    # also has to be configured with C++11 support:
+    #
+    IF(TRILINOS_WITH_MANDATORY_CXX11 AND NOT DEAL_II_WITH_CXX11)
+      MESSAGE(STATUS "Could not find a sufficient Trilinos installation: "
+        "Trilinos was compiled with C++11 support, but C++11 support is "
+        "disabled (DEAL_II_WITH_CXX11=off)."
+        )
+      SET(TRILINOS_ADDITIONAL_ERROR_STRING
+        ${TRILINOS_ADDITIONAL_ERROR_STRING}
+        "The Trilinos installation (found at \"${TRILINOS_DIR}\")\n"
+        "requires C++11 support, but C++11 support is disabled:\n"
+        "  DEAL_II_WITH_CXX11 = ${DEAL_II_WITH_CXX11}\n"
+        )
+      SET(${var} FALSE)
+
+    ENDIF()
+
+    CHECK_MPI_INTERFACE(TRILINOS ${var})
+  ENDIF()
+ENDMACRO()
+
+
+MACRO(FEATURE_TRILINOS_CONFIGURE_EXTERNAL)
+  SET(DEAL_II_EXPAND_TRILINOS_VECTOR "TrilinosWrappers::Vector")
+  SET(DEAL_II_EXPAND_TRILINOS_BLOCKVECTOR "TrilinosWrappers::BlockVector")
+  SET(DEAL_II_EXPAND_TRILINOS_SPARSITY_PATTERN "TrilinosWrappers::SparsityPattern")
+  SET(DEAL_II_EXPAND_TRILINOS_BLOCK_SPARSITY_PATTERN "TrilinosWrappers::BlockSparsityPattern")
+  SET(DEAL_II_EXPAND_TRILINOS_MPI_BLOCKVECTOR "TrilinosWrappers::MPI::BlockVector")
+  SET(DEAL_II_EXPAND_TRILINOS_MPI_VECTOR "TrilinosWrappers::MPI::Vector")
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(TRILINOS)
diff --git a/cmake/configure/configure_2_umfpack.cmake b/cmake/configure/configure_2_umfpack.cmake
new file mode 100644
index 0000000..e731d8c
--- /dev/null
+++ b/cmake/configure/configure_2_umfpack.cmake
@@ -0,0 +1,46 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the umfpack library:
+#
+
+SET(FEATURE_UMFPACK_DEPENDS LAPACK)
+
+MACRO(FEATURE_UMFPACK_CONFIGURE_BUNDLED)
+  SET(UMFPACK_BUNDLED_INCLUDE_DIRS
+    ${UMFPACK_FOLDER}/UMFPACK/Include
+    ${UMFPACK_FOLDER}/AMD/Include
+    )
+ENDMACRO()
+
+MACRO(FEATURE_UMFPACK_ERROR_MESSAGE)
+  MESSAGE(FATAL_ERROR "\n"
+    "Could not find umfpack and supporting libraries!\n"
+    "Please ensure that the libraries are installed on your computer.\n"
+    "If the libraries are not at a default location, either provide some hints\n"
+    "for the autodetection:\n"
+    "    $ UMFPACK_DIR=\"...\" cmake <...>\n"
+    "    $ cmake -DUMFPACK_DIR=\"...\" <...>\n"
+    "or set the relevant variables by hand in ccmake.\n"
+    "Relevant hints for UMFPACK are SUITESPARSE_DIR, UMFPACK_DIR\n"
+    "(AMD_DIR, CHOLMOD_DIR, COLAMD_DIR, SUITESPARSECONFIG_DIR.)\n"
+    "Alternatively you may choose to compile the bundled libraries\n"
+    "by setting DEAL_II_ALLOW_BUNDLED=ON or DEAL_II_FORCE_BUNDLED_UMFPACK=ON.\n"
+    "(BLAS and LAPACK have to be installed for bundled UMFPACK to be available)\n\n"
+    )
+ENDMACRO()
+
+CONFIGURE_FEATURE(UMFPACK)
diff --git a/cmake/configure/configure_3_petsc.cmake b/cmake/configure/configure_3_petsc.cmake
new file mode 100644
index 0000000..3bec8a9
--- /dev/null
+++ b/cmake/configure/configure_3_petsc.cmake
@@ -0,0 +1,129 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the petsc library:
+#
+
+SET(FEATURE_PETSC_AFTER MPI)
+
+
+MACRO(FEATURE_PETSC_FIND_EXTERNAL var)
+  FIND_PACKAGE(PETSC)
+
+  IF(PETSC_FOUND)
+    #
+    # So, we have found a petsc library. Let's check whether we can use it.
+    #
+    SET(${var} TRUE)
+
+    #
+    # We support petsc from version 3.x.x onwards
+    #
+    IF(PETSC_VERSION_MAJOR LESS 3)
+      MESSAGE(STATUS "Could not find a sufficient modern PETSc installation: "
+        "Version >=3.0.0 required!"
+        )
+      SET(PETSC_ADDITIONAL_ERROR_STRING
+        "Could not find a sufficient modern PETSc installation: "
+        "Version >=3.0.0 required!\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    #
+    # Petsc has to be configured with the same MPI configuration as
+    # deal.II.
+    #
+    # petscconf.h should export PETSC_HAVE_MPIUNI 1 in case mpi support is
+    # _NOT_ enabled.
+    # So we check for this:
+    #
+    IF( (PETSC_WITH_MPIUNI AND DEAL_II_WITH_MPI)
+         OR
+         (NOT PETSC_WITH_MPIUNI AND NOT DEAL_II_WITH_MPI))
+      MESSAGE(STATUS "Could not find a sufficient PETSc installation: "
+        "PETSc has to be configured with the same MPI configuration as deal.II."
+        )
+      SET(PETSC_ADDITIONAL_ERROR_STRING
+        ${PETSC_ADDITIONAL_ERROR_STRING}
+        "Could not find a sufficient PETSc installation:\n"
+        "PETSc has to be configured with the same MPI configuration as deal.II, but found:\n"
+        "  DEAL_II_WITH_MPI = ${DEAL_II_WITH_MPI}\n"
+        "  PETSC_WITH_MPI   = (NOT ${PETSC_WITH_MPIUNI})\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    #
+    # Petsc has to be configured with the same number of bits for indices as
+    # deal.II.
+    #
+    # petscconf.h should export PETSC_WITH_64BIT_INDICES 1 in case 64bits
+    # indices support is enabled.
+    # So we check for this:
+    #
+    IF( (NOT PETSC_WITH_64BIT_INDICES AND DEAL_II_WITH_64BIT_INDICES)
+         OR
+         (PETSC_WITH_64BIT_INDICES AND NOT DEAL_II_WITH_64BIT_INDICES))
+      MESSAGE(STATUS "Could not find a sufficient PETSc installation: "
+        "PETSc has to be configured to use the same number of bits for the "
+        "global indices as deal.II."
+        )
+      SET(PETSC_ADDITIONAL_ERROR_STRING
+        ${PETSC_ADDITIONAL_ERROR_STRING}
+        "Could not find a sufficient PETSc installation:\n"
+        "PETSc has to be configured to use the same number of bits for the "
+        "global indices as deal.II, but found:\n"
+        "  DEAL_II_WITH_64BIT_INDICES = ${DEAL_II_WITH_64BIT_INDICES}\n"
+        "  PETSC_WITH_64BIT_INDICES = (${PETSC_WITH_64BIT_INDICES})\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    CHECK_MPI_INTERFACE(PETSC ${var})
+  ENDIF()
+ENDMACRO()
+
+
+MACRO(FEATURE_PETSC_CONFIGURE_EXTERNAL)
+  SET(DEAL_II_EXPAND_PETSC_VECTOR "PETScWrappers::Vector")
+  SET(DEAL_II_EXPAND_PETSC_BLOCKVECTOR "PETScWrappers::BlockVector")
+  SET(DEAL_II_EXPAND_PETSC_MPI_VECTOR "PETScWrappers::MPI::Vector")
+  SET(DEAL_II_EXPAND_PETSC_MPI_BLOCKVECTOR "PETScWrappers::MPI::BlockVector")
+ENDMACRO()
+
+
+MACRO(FEATURE_PETSC_ERROR_MESSAGE)
+  MESSAGE(FATAL_ERROR "\n"
+    "Could not find the petsc library!\n"
+    ${PETSC_ADDITIONAL_ERROR_STRING}
+    "\nPlease ensure that the petsc library version 3.0.0 or newer is "
+    "installed on your computer and is configured with the same mpi options "
+    "as deal.II\n"
+    "If the library is not at a default location, either provide some hints\n"
+    "for the autodetection:\n"
+    "PETSc installed with --prefix=<...> to a destination:\n"
+    "    $ PETSC_DIR=\"...\" cmake <...>\n"
+    "    $ cmake -DPETSC_DIR=\"...\" <...>\n"
+    "PETSc compiled in source tree:\n"
+    "    $ PETSC_DIR=\"...\"  PETSC_ARCH=\"...\" cmake <...>\n"
+    "    $ cmake -DPETSC_DIR=\"...\" -DPETSC_ARCH=\"...\" <...>\n"
+    "or set the relevant variables by hand in ccmake.\n\n"
+    )
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(PETSC)
diff --git a/cmake/configure/configure_arpack.cmake b/cmake/configure/configure_arpack.cmake
new file mode 100644
index 0000000..fa0e449
--- /dev/null
+++ b/cmake/configure/configure_arpack.cmake
@@ -0,0 +1,23 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the ARPACK library:
+#
+
+SET(FEATURE_ARPACK_DEPENDS LAPACK)
+
+CONFIGURE_FEATURE(ARPACK)
+SET(DEAL_II_ARPACK_WITH_PARPACK ${ARPACK_WITH_PARPACK})
diff --git a/cmake/configure/configure_boost.cmake b/cmake/configure/configure_boost.cmake
new file mode 100644
index 0000000..a097da7
--- /dev/null
+++ b/cmake/configure/configure_boost.cmake
@@ -0,0 +1,87 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the boost library:
+#
+
+SET(DEAL_II_WITH_BOOST ON # Always true. We need it :-]
+  CACHE BOOL "Build deal.II with support for boost." FORCE
+  )
+
+
+MACRO(FEATURE_BOOST_CONFIGURE_BUNDLED)
+  #
+  # Add rt to the link interface as well, boost/chrono needs it.
+  #
+  FIND_SYSTEM_LIBRARY(rt_LIBRARY NAMES rt)
+  MARK_AS_ADVANCED(rt_LIBRARY)
+  IF(NOT rt_LIBRARY MATCHES "-NOTFOUND")
+    SET(BOOST_LIBRARIES ${rt_LIBRARY})
+  ENDIF()
+
+  ENABLE_IF_SUPPORTED(BOOST_CXX_FLAGS "-Wno-unused-local-typedefs")
+
+  SET(BOOST_BUNDLED_INCLUDE_DIRS ${BOOST_FOLDER}/include)
+
+  IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
+    #
+    # Bundled boost tries to (dl)open itself as a dynamic library on
+    # Windows. Disable this undesired behavior by exporting
+    # BOOST_ALL_NO_LIB on Windows platforms (for bundled boost).
+    #
+    LIST(APPEND BOOST_DEFINITIONS "BOOST_ALL_NO_LIB")
+    LIST(APPEND BOOST_USER_DEFINITIONS "BOOST_ALL_NO_LIB")
+  ENDIF()
+ENDMACRO()
+
+MACRO(FEATURE_BOOST_FIND_EXTERNAL var)
+  FIND_PACKAGE(BOOST)
+
+  IF(BOOST_FOUND)
+    SET(${var} TRUE)
+
+    #
+    # Blacklist version 1.58 because we get serialization errors with it. At
+    # least version 1.56 and 1.59 are known to work.
+    #
+    IF("${BOOST_VERSION_MAJOR}" STREQUAL "1" AND "${BOOST_VERSION_MINOR}" STREQUAL "58")
+      MESSAGE(STATUS "Boost version 1.58 is not compatible with deal.II!")
+      SET(${var} FALSE)
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+MACRO(FEATURE_BOOST_CONFIGURE_EXTERNAL)
+  ENABLE_IF_SUPPORTED(BOOST_CXX_FLAGS "-Wno-unused-local-typedefs")
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(BOOST)
+
+
+#
+# DEAL_II_WITH_BOOST is always required.
+#
+IF(NOT DEAL_II_WITH_BOOST)
+  IF(DEAL_II_FEATURE_AUTODETECTION)
+    FEATURE_ERROR_MESSAGE("BOOST")
+  ELSE()
+    MESSAGE(FATAL_ERROR "\n"
+      "Unmet configuration requirements: "
+      "DEAL_II_WITH_BOOST required, but set to OFF!.\n\n"
+      )
+  ENDIF()
+ENDIF()
diff --git a/cmake/configure/configure_hdf5.cmake b/cmake/configure/configure_hdf5.cmake
new file mode 100644
index 0000000..5666a51
--- /dev/null
+++ b/cmake/configure/configure_hdf5.cmake
@@ -0,0 +1,48 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the hdf5 library:
+#
+
+SET(FEATURE_HDF5_AFTER MPI)
+
+
+MACRO(FEATURE_HDF5_FIND_EXTERNAL var)
+  FIND_PACKAGE(HDF5)
+
+  IF(HDF5_FOUND)
+    SET(${var} TRUE)
+
+    IF( (HDF5_WITH_MPI AND NOT DEAL_II_WITH_MPI) OR
+        (NOT HDF5_WITH_MPI AND DEAL_II_WITH_MPI) )
+      MESSAGE(STATUS "Insufficient hdf5 installation found: "
+        "hdf5 has to be configured with the same MPI configuration as deal.II."
+        )
+      SET(HDF5_ADDITIONAL_ERROR_STRING
+        "Insufficient hdf5 installation found!\n"
+        "hdf5 has to be configured with the same MPI configuration as deal.II, but found:\n"
+        "  DEAL_II_WITH_MPI = ${DEAL_II_WITH_MPI}\n"
+        "  HDF5_WITH_MPI    = ${HDF5_WITH_MPI}\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    CHECK_MPI_INTERFACE(HDF5 ${var})
+  ENDIF()
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(HDF5)
diff --git a/cmake/configure/configure_muparser.cmake b/cmake/configure/configure_muparser.cmake
new file mode 100644
index 0000000..0e74e3c
--- /dev/null
+++ b/cmake/configure/configure_muparser.cmake
@@ -0,0 +1,25 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the MUPARSER library:
+#
+
+MACRO(FEATURE_MUPARSER_CONFIGURE_BUNDLED)
+  SET(MUPARSER_BUNDLED_INCLUDE_DIRS ${MUPARSER_FOLDER}/include)
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(MUPARSER)
diff --git a/cmake/configure/configure_netcdf.cmake b/cmake/configure/configure_netcdf.cmake
new file mode 100644
index 0000000..710f1ca
--- /dev/null
+++ b/cmake/configure/configure_netcdf.cmake
@@ -0,0 +1,20 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the netcdf library:
+#
+
+CONFIGURE_FEATURE(NETCDF)
diff --git a/cmake/configure/configure_opencascade.cmake b/cmake/configure/configure_opencascade.cmake
new file mode 100644
index 0000000..090cc02
--- /dev/null
+++ b/cmake/configure/configure_opencascade.cmake
@@ -0,0 +1,20 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the OpenCASCADE library:
+#
+
+CONFIGURE_FEATURE(OPENCASCADE)
diff --git a/cmake/configure/configure_p4est.cmake b/cmake/configure/configure_p4est.cmake
new file mode 100644
index 0000000..cfc0afc
--- /dev/null
+++ b/cmake/configure/configure_p4est.cmake
@@ -0,0 +1,63 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the p4est and sc libraries:
+#
+
+SET(FEATURE_P4EST_DEPENDS MPI)
+
+
+MACRO(FEATURE_P4EST_FIND_EXTERNAL var)
+  FIND_PACKAGE(P4EST)
+
+  IF(P4EST_FOUND)
+    SET(${var} TRUE)
+
+    #
+    # We require at least version 0.3.4.1
+    #
+    IF(P4EST_VERSION VERSION_LESS  "0.3.4.1")
+      MESSAGE(STATUS "Insufficient p4est installation found: "
+        "At least version 0.3.4.1 is required."
+        )
+      SET(P4EST_ADDITIONAL_ERROR_STRING
+        "Insufficient p4est installation found!\n"
+        "At least version 0.3.4.1 is required.\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    #
+    # Check whether p4est supports mpi:
+    #
+    IF(NOT P4EST_WITH_MPI)
+      MESSAGE(STATUS "Insufficient p4est installation found: "
+        "p4est has to be configured with MPI enabled."
+        )
+      SET(P4EST_ADDITIONAL_ERROR_STRING
+        ${P4EST_ADDITIONAL_ERROR_STRING}
+        "Insufficient p4est installation found!\n"
+        "p4est has to be configured with MPI enabled.\n"
+        )
+      SET(${var} FALSE)
+    ENDIF()
+
+    CHECK_MPI_INTERFACE(P4EST ${var})
+  ENDIF()
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(P4EST)
diff --git a/cmake/configure/configure_slepc.cmake b/cmake/configure/configure_slepc.cmake
new file mode 100644
index 0000000..35d139e
--- /dev/null
+++ b/cmake/configure/configure_slepc.cmake
@@ -0,0 +1,80 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configuration for the SLEPC library:
+#
+
+SET(FEATURE_SLEPC_DEPENDS PETSC)
+
+
+MACRO(FEATURE_SLEPC_FIND_EXTERNAL var)
+  FIND_PACKAGE(SLEPC)
+
+  IF(SLEPC_FOUND)
+    #
+    # Check whether SLEPc and PETSc are compatible according to
+    # SLEPc's rules: This is equivalent to asking if the VERSION_MAJOR
+    # and VERSION_MINOR of PETSc and SLEPc are
+    # equivalent; and where VERSION_SUBMINORs are allowed to differ.
+    #
+    IF( ("${SLEPC_VERSION_MAJOR}" STREQUAL "${PETSC_VERSION_MAJOR}")
+       AND
+       ("${SLEPC_VERSION_MINOR}" STREQUAL "${PETSC_VERSION_MINOR}"))
+      SET(${var} TRUE)
+    ELSE()
+
+      MESSAGE(STATUS "Could not find a sufficient SLEPc installation: "
+        "The SLEPc library must have the same version as the PETSc library."
+        )
+      SET(SLEPC_ADDITIONAL_ERROR_STRING
+        "Could not find a sufficient SLEPc installation: "
+        "The SLEPc library must have the same version as the PETSc library.\n"
+        )
+
+      UNSET(SLEPC_INCLUDE_DIR_ARCH CACHE)
+      UNSET(SLEPC_INCLUDE_DIR_COMMON CACHE)
+      UNSET(SLEPC_LIBRARY CACHE)
+      SET(SLEPC_DIR "" CACHE PATH
+        "An optional hint to a SLEPc directory"
+        )
+      MARK_AS_ADVANCED(CLEAR SLEPC_DIR)
+
+      SET(${var} FALSE)
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+
+MACRO(FEATURE_SLEPC_ERROR_MESSAGE)
+  MESSAGE(FATAL_ERROR "\n"
+    "Could not find the SLEPc library!\n"
+    ${SLEPC_ADDITIONAL_ERROR_STRING}
+    "Please ensure that the SLEPc library version 3.0.0 or newer is installed on your computer\n"
+    "and the version is the same as the one of the installed PETSc library.\n"
+    "If the library is not at a default location, either provide some hints\n"
+    "for the autodetection:\n"
+    "SLEPc installed with --prefix=<...> to a destination:\n"
+    "    $ SLEPC_DIR=\"...\" cmake <...>\n"
+    "    $ cmake -DSLEPC_DIR=\"...\" <...>\n"
+    "SLEPc compiled in source tree:\n"
+    "    $ SLEPC_DIR=\"...\"\n"
+    "    $ cmake -DSLEPC_DIR=\"...\"\n"
+    "or set the relevant variables by hand in ccmake.\n\n"
+    )
+ENDMACRO()
+
+
+CONFIGURE_FEATURE(SLEPC)
diff --git a/cmake/cpack-mac-bundle/Info.plist.in b/cmake/cpack-mac-bundle/Info.plist.in
new file mode 100644
index 0000000..d3332c0
--- /dev/null
+++ b/cmake/cpack-mac-bundle/Info.plist.in
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>@DEAL_II_PACKAGE_NAME@</string>
+	<key>CFBundleGetInfoString</key>
+	<string>@DEAL_II_PACKAGE_VERSION@</string>
+	<key>CFBundleIconFile</key>
+	<string>deal.II</string>
+	<key>CFBundleIdentifier</key>
+	<string>org.dealii</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>@DEAL_II_PACKAGE_VERSION@</string>
+	<key>CFBundleSignature</key>
+	<string>@DEAL_II_PACKAGE_NAME@</string>
+	<key>CFBundleVersion</key>
+	<string>@DEAL_II_PACKAGE_VERSION@</string>
+	<key>NSHumanReadableCopyright</key>
+	<string>GPL</string>
+	<key>LSMinimumSystemVersion</key>
+	<string>10.9</string>
+</dict>
+</plist>
diff --git a/cmake/cpack-mac-bundle/dealii-icon.icns b/cmake/cpack-mac-bundle/dealii-icon.icns
new file mode 100644
index 0000000..cda670c
Binary files /dev/null and b/cmake/cpack-mac-bundle/dealii-icon.icns differ
diff --git a/cmake/cpack-mac-bundle/dealii-terminal.in b/cmake/cpack-mac-bundle/dealii-terminal.in
new file mode 100755
index 0000000..71b887d
--- /dev/null
+++ b/cmake/cpack-mac-bundle/dealii-terminal.in
@@ -0,0 +1,67 @@
+#!/bin/bash
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library. It is a modified version
+## of the file FEniCS terminal from the FEniCS project.
+##
+## This library is free software; you can redistribute it and/or
+## modify it under the terms of the GNU Lesser General Public
+## License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+##
+## ---------------------------------------------------------------------
+
+if [ "$BASH_SOURCE" == "$0" ]
+then
+  export DEAL_II_BUNDLE=`echo "$0" | sed -e 's|/Contents/.*||'`
+  export DEAL_II_RESOURCES=$DEAL_II_BUNDLE/Contents/Resources
+else
+  export DEAL_II_BUNDLE=`echo "$BASH_SOURCE" | sed -e 's|/Contents/MacOS/.*||'`
+  export DEAL_II_RESOURCES=$DEAL_II_BUNDLE/Contents/Resources
+fi
+
+# Make sure DEAL_II_BUNDLE is set correctly
+if [ -z "$DEAL_II_BUNDLE" ]; then
+    export DEAL_II_BUNDLE=/Applications/deal.II.app
+    if [ ! -d $DEAL_II_BUNDLE ]; then
+	    return=`/usr/bin/osascript <<EOF
+tell app "System Events"
+    Activate
+    display dialog "This commands need to know where you installed the deal.II application. If you moved it or renamed it, and you want to run this script from the command line, export the variable DEAL_II_BUNDLE to the correct path." buttons "OK" default button 1 with title "Location of deal.II changed." with icon 0
+end tell
+EOF`
+    exit 1
+    fi
+fi
+
+if [ -z "$DEAL_II_RESOURCES" ]; then
+    export DEAL_II_RESOURCES=$DEAL_II_BUNDLE/Contents/Resources
+fi
+
+# Make sure XCode is installed
+if ! [[ -x /usr/bin/xcodebuild ]]; then
+    return=`/usr/bin/osascript <<EOF
+tell app "System Events"
+    Activate
+    display dialog "This package requires XCode to be installed in order to run. Please install XCode from the OS X install disc and try again." buttons "OK" default button 1 with title "XCode Missing" with icon 0
+end tell
+EOF`
+    exit 1
+fi
+
+# Everything seems to be okay. Launch a terminal with PATH's
+# set up to work with deal.II.
+CONF=$DEAL_II_RESOURCES/@DEAL_II_SHARE_RELDIR@/dealii.conf
+
+if ! [[ -f $CONF ]]; then
+    return=`/usr/bin/osascript <<EOF
+tell app "System Events"
+    Activate
+    display dialog "I could not find the file $CONF. Make sure your deal.II app is installed correctly." buttons "OK" default button 1 with title "XCode Missing" with icon 0
+end tell
+EOF`
+    exit 1
+fi
+/bin/bash --rcfile $CONF -i
diff --git a/cmake/cpack-mac-bundle/dealii.conf.in b/cmake/cpack-mac-bundle/dealii.conf.in
new file mode 100644
index 0000000..3cc47d4
--- /dev/null
+++ b/cmake/cpack-mac-bundle/dealii.conf.in
@@ -0,0 +1,88 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library. It is a modified version
+## of the file FEniCS terminal from the FEniCS project.
+##
+## This library is free software; you can redistribute it and/or
+## modify it under the terms of the GNU Lesser General Public
+## License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+##
+## ---------------------------------------------------------------------
+
+# Make sure DEAL_II_BUNDLE is set correctly
+if [ -z "$DEAL_II_BUNDLE" ]; then
+    export DEAL_II_BUNDLE=/Applications/deal.II.app
+    if [ ! -d $DEAL_II_BUNDLE ]; then
+    cat << EOF
+*** Warning: I did not find the deal.II installation in $DEAL_II_BUNDLE.
+    Could not set the environment to work with deal.II. Either reinstall
+    or set the environment variable DEAL_II_BUNDLE to the correct value.
+    Things may not work as you expect if you continue.
+EOF
+    fi
+fi
+
+# Check if DEAL_II_RESOURCES is set, otherwise set it to the default
+if [ -z "$DEAL_II_RESOURCES" ]; then
+    export DEAL_II_RESOURCES=$DEAL_II_BUNDLE/Contents/Resources
+fi
+
+# Start with default compilers libraries
+export PATH=$DEAL_II_RESOURCES/bin:$PATH
+export DEAL_II_DIR=$DEAL_II_RESOURCES
+
+# Do whatever the external library tells us to
+if [ -f $DEAL_II_RESOURCES/opt/external.conf ]; then
+    . $DEAL_II_RESOURCES/opt/external.conf
+fi
+
+if [ -z "$DEAL_II_CONF_SILENT" ]; then
+    # Clear screen
+    clear
+
+    # Set prompt
+    # export PS1="\[\033[01;32m\]\u@\h:\[\033[01;34m\]\W\$ \[\033[00m\]"
+
+    # Set terminal title
+    # echo -n -e "\033]0;deal.II - an open source finite element library DEAL_II \007"
+
+    cat << EOF
+        ______           _  _____ _____
+        |  _  \         | ||_   _|_   _|
+        | | | |___  __ _| |  | |   | |
+        | | | / _ \/ _| | |  | |   | |
+        | |/ /  __/ (_| | |__| |_ _| |_
+        |___/ \___|\__,_|_(_)___/ \___/
+
+This is a shell with PATHs and \${EXTERNAL_LIB}_DIR setup to work with Deal.II.
+All external libraries are located in 
+
+    $DEAL_II_RESOURCES/opt/
+
+If you are new to Deal.II you probably want to have a look at
+
+    $DEAL_II_RESOURCES/examples/
+
+For more information please visit the project website:
+
+http://www.dealii.org
+
+If you want to set up your daily Terminal to work with deal.II, add
+these lines to your ~/.profile file (the first line turns off this message):
+
+   export DEAL_II_CONF_SILENT=ON
+   . $DEAL_II_RESOURCES/@DEAL_II_SHARE_RELDIR@/dealii.conf
+
+EOF
+fi
+
+if ! [[ -x /usr/bin/xcodebuild ]]; then
+    cat << EOF
+*** Warning: This package requires XCode to be installed in order to run.
+    Please install XCode from the OS X install disc before you continue.
+
+EOF
+fi
diff --git a/cmake/cpack-mac-bundle/mac_startup_script.sh.in b/cmake/cpack-mac-bundle/mac_startup_script.sh.in
new file mode 100755
index 0000000..060f6ac
--- /dev/null
+++ b/cmake/cpack-mac-bundle/mac_startup_script.sh.in
@@ -0,0 +1,26 @@
+#!/bin/sh
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library. It is a modified version
+## of the file FEniCS terminal from the FEniCS project.
+##
+## This library is free software; you can redistribute it and/or
+## modify it under the terms of the GNU Lesser General Public
+## License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+##
+## ---------------------------------------------------------------------
+
+
+if [ "$BASH_SOURCE" == "$0" ]
+then
+  export DEAL_II_BUNDLE=`echo "$0" | sed -e 's|/Contents/MacOS/.*||'`
+  export DEAL_II_RESOURCES=$DEAL_II_BUNDLE/Contents/Resources
+  open -a /Applications/Utilities/Terminal.app $DEAL_II_RESOURCES/@DEAL_II_EXECUTABLE_RELDIR@/dealii-terminal
+else
+  export DEAL_II_BUNDLE=`echo "$BASH_SOURCE" | sed -e 's|/Contents/MacOS/.*||'`
+  export DEAL_II_RESOURCES=$DEAL_II_BUNDLE/Contents/Resources
+  source $DEAL_II_RESOURCES/@DEAL_II_SHARE_RELDIR@/dealii.conf
+fi
diff --git a/cmake/macros/macro_add_flags.cmake b/cmake/macros/macro_add_flags.cmake
new file mode 100644
index 0000000..0f21bf2
--- /dev/null
+++ b/cmake/macros/macro_add_flags.cmake
@@ -0,0 +1,31 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small macro used for (string-)appending a string "${flags}" to a
+# string "${variable}"
+#
+# Usage:
+#     ADD_FLAGS(variable flags)
+#
+
+MACRO(ADD_FLAGS _variable _flags)
+  STRING(STRIP "${_flags}" _flags_stripped)
+  IF(NOT "${_flags_stripped}" STREQUAL "")
+    SET(${_variable} "${${_variable}} ${_flags}")
+    STRING(STRIP "${${_variable}}" ${_variable})
+  ENDIF()
+ENDMACRO()
+
diff --git a/cmake/macros/macro_check_cxx_compiler_bug.cmake b/cmake/macros/macro_check_cxx_compiler_bug.cmake
new file mode 100644
index 0000000..270912f
--- /dev/null
+++ b/cmake/macros/macro_check_cxx_compiler_bug.cmake
@@ -0,0 +1,46 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Check for a compiler bug.
+#
+# Usage:
+#     CHECK_CXX_COMPILER_BUG(source var),
+#
+# where source is a snipped of source code and var is a variable that will
+# be set to true if the source could not be compiled and linked successfully.
+# (This just inverts the logic of CHECK_CXX_SOURCE_COMPILES.)
+#
+
+MACRO(CHECK_CXX_COMPILER_BUG _source _var)
+  IF(NOT DEFINED ${_var}_OK)
+    CHECK_CXX_SOURCE_COMPILES(
+      "${_source}"
+      ${_var}_OK
+      )
+    IF(${_var}_OK)
+      MESSAGE(STATUS "Test successful, do not define ${_var}")
+    ELSE()
+      MESSAGE(STATUS "Test unsuccessful, define ${_var}")
+    ENDIF()
+  ENDIF()
+
+  IF(${_var}_OK)
+    SET(${_var})
+  ELSE()
+    SET(${_var} TRUE)
+  ENDIF()
+ENDMACRO()
+
diff --git a/cmake/macros/macro_check_mpi_interface.cmake b/cmake/macros/macro_check_mpi_interface.cmake
new file mode 100644
index 0000000..2d80db3
--- /dev/null
+++ b/cmake/macros/macro_check_mpi_interface.cmake
@@ -0,0 +1,70 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Check whether a feature is compiled against the same MPI library as the
+# one deal.II picked up
+#
+# Usage:
+#     CHECK_MPI_INTERFACE(_feature _var),
+#
+
+MACRO(CHECK_MPI_INTERFACE _feature _var)
+  IF(DEAL_II_WITH_MPI)
+
+    SET(_nope FALSE)
+
+    FOREACH(_library ${${_feature}_LIBRARIES})
+      IF( _library MATCHES "/libmpi(|_cxx)\\.(a|so)[^/]*$")
+
+        GET_FILENAME_COMPONENT(_file1 ${_library} REALPATH)
+
+        SET(_not_found TRUE)
+        FOREACH(_mpi_library ${MPI_LIBRARIES})
+          GET_FILENAME_COMPONENT(_file2 ${_mpi_library} REALPATH)
+          IF("${_file1}" STREQUAL "${_file2}")
+            SET(_not_found FALSE)
+            BREAK()
+          ENDIF()
+        ENDFOREACH()
+
+        IF(_not_found)
+          SET(_nope TRUE)
+          SET(_spurious_library ${_library})
+          BREAK()
+        ENDIF()
+      ENDIF()
+    ENDFOREACH()
+
+    IF(_nope)
+      MESSAGE(STATUS "Could not find a sufficient ${_feature} installation: "
+        "${_feature} is compiled against a different MPI library than the one "
+        "deal.II picked up."
+        )
+      TO_STRING(_str ${MPI_LIBRARIES})
+      SET(PETSC_ADDITIONAL_ERROR_STRING
+        ${PETSC_ADDITIONAL_ERROR_STRING}
+        "Could not find a sufficient ${_feature} installation:\n"
+        "${_feature} has to be compiled against the same MPI library as deal.II "
+        "but the link line of ${_feature} contains:\n"
+        "  ${_spurious_library}\n"
+        "which is not listed in MPI_LIBRARIES:\n"
+        "  MPI_LIBRARIES = \"${_str}\"\n"
+        )
+      SET(${_var} FALSE)
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
diff --git a/cmake/macros/macro_clear_cmake_required.cmake b/cmake/macros/macro_clear_cmake_required.cmake
new file mode 100644
index 0000000..f178854
--- /dev/null
+++ b/cmake/macros/macro_clear_cmake_required.cmake
@@ -0,0 +1,28 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small macro to clear the CMAKE_REQUIRED_* variables.
+#
+# Usage:
+#     CLEAR_CMAKE_REQUIRED_FLAGS
+#
+
+MACRO(CLEAR_CMAKE_REQUIRED)
+  SET(CMAKE_REQUIRED_FLAGS)
+  SET(CMAKE_REQUIRED_INCLUDES)
+  SET(CMAKE_REQUIRED_LIBRARIES)
+ENDMACRO()
+
diff --git a/cmake/macros/macro_configure_feature.cmake b/cmake/macros/macro_configure_feature.cmake
new file mode 100644
index 0000000..c5cbf15
--- /dev/null
+++ b/cmake/macros/macro_configure_feature.cmake
@@ -0,0 +1,297 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This macro is used for the feature configuration in deal.II
+#
+# Usage:
+#     CONFIGURE_FEATURE(feature)
+#
+#
+# This macro uses the following optional variables and macros:
+#
+# FEATURE_${feature}_DEPENDS    (a variable)
+#    a variable which contains an optional list of other features
+#    this feature depends on (and which have to be enabled for this feature
+#    to work.)
+#    Features must be given with short name, i.e. without DEAL_II_WITH_
+#
+# FEATURE_${feature}_after      (a variable)
+#    a variable which contains an optional list of other features
+#    that have to be configured prior to this feature
+#    Features must be given with short name, i.e. without DEAL_II_WITH_
+#
+# FEATURE_${feature}_HAVE_BUNDLED   (a variable)
+#    which should either be set to TRUE if all necessary libraries of the
+#    features comes bundled with deal.II and hence can be supported
+#    without external dependencies, or unset.
+#
+# FEATURE_${feature}_CONFIGURE_BUNDLED()   (a macro)
+#    which should setup all necessary configuration for the feature with
+#    bundled source dependencies. If something goes wrong this macro must
+#    issue a FATAL_ERROR.
+#
+# FEATURE_${feature}_FIND_EXTERNAL(var)   (a macro)
+#    which should set var to TRUE if all dependencies for the feature are
+#    fulfilled. In this case all necessary variables for
+#    FEATURE_${feature}_CONFIGURE_EXTERNAL must be set. Otherwise
+#    var should remain unset.
+#    If not defined, FIND_PACKAGE(${feature}) is called.
+#
+# FEATURE_${feature}_CONFIGURE_EXTERNAL()   (macro)
+#    which should setup all necessary configuration for the feature with
+#    external dependencies.
+#
+# FEATURE_${feature}_ERROR_MESSAGE()  (macro)
+#    which should print a meaningful error message (with FATAL_ERROR) for
+#    the case that no usable library was found.
+#    If not defined, a suitable default error message will be printed.
+#
+
+
+########################################################################
+#                                                                      #
+#                            Helper Macros:                            #
+#                                                                      #
+########################################################################
+
+#
+# Some black magic to have substitution in command names:
+#
+MACRO(RUN_COMMAND _the_command)
+  FILE(WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/macro_configure_feature.tmp"
+    "${_the_command}")
+  INCLUDE("${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/macro_configure_feature.tmp")
+ENDMACRO()
+
+
+#
+# A small macro to set the DEAL_II_WITH_${_feature} variables:
+#
+MACRO(SET_CACHED_OPTION _str _value)
+  STRING(TOLOWER "${_str}" _str_lower)
+  SET(DEAL_II_WITH_${_str}
+    ${_value}
+    CACHE BOOL
+    "Build deal.II with support for ${_str_lower}."
+    FORCE)
+ENDMACRO()
+
+
+#
+# A small macro to post a default error message:
+#
+MACRO(FEATURE_ERROR_MESSAGE _feature)
+  STRING(TOLOWER ${_feature} _feature_lowercase)
+
+  IF(DEFINED ${_feature}_DIR)
+    SET(_hint_snippet "
+    $ ${_feature}_DIR=\"...\" cmake <...>
+    $ cmake -D${_feature}_DIR=\"...\" <...>
+or set the relevant variables by hand in ccmake."
+      )
+  ELSE()
+    SET(_hint_snippet
+      " or set the relevant variables by hand in ccmake."
+      )
+  ENDIF()
+
+  IF(FEATURE_${_feature}_HAVE_BUNDLED)
+    SET(_bundled_snippet
+      "\nAlternatively you may choose to compile the bundled library of "
+      "${_feature_lowercase} by setting DEAL_II_ALLOW_BUNDLED=on or "
+      "DEAL_II_FORCE_BUNDLED_${_feature}=on.\n"
+      )
+  ELSE()
+    SET(_bundled_snippet "\n")
+  ENDIF()
+
+  MESSAGE(FATAL_ERROR "\n"
+    "Could not find the ${_feature_lowercase} library!\n"
+    ${${_feature}_ADDITIONAL_ERROR_STRING}
+    "Please ensure that a suitable ${_feature_lowercase} library is installed on your computer.\n"
+    "If the library is not at a default location, either provide some hints "
+    "for autodetection,${_hint_snippet}${_bundled_snippet}"
+    )
+ENDMACRO()
+
+
+#
+# Default macro for finding an external library:
+#
+MACRO(FEATURE_FIND_EXTERNAL _feature _var)
+  FIND_PACKAGE(${_feature})
+  IF(${_feature}_FOUND)
+    SET(${_var} TRUE)
+  ENDIF()
+ENDMACRO()
+
+
+########################################################################
+#                                                                      #
+#                          CONFIGURE_FEATURE:                          #
+#                                                                      #
+########################################################################
+
+MACRO(CONFIGURE_FEATURE _feature)
+  #
+  # This script is arcane black magic. But at least for the better good: We
+  # don't have to copy the configuration logic to every single
+  # configure_<feature>.cmake script...
+  #
+
+  #
+  # Check for correct include order of the configure_*.cmake files:
+  # If feature B explicitly states to come after feature A, or if feature B
+  # depends on feature A, configure_A.cmake has to be included before
+  # configure_B.cmake:
+  #
+  FOREACH(_dependency
+      ${FEATURE_${_feature}_AFTER}
+      ${FEATURE_${_feature}_DEPENDS}
+      )
+    IF(NOT FEATURE_${_dependency}_PROCESSED)
+      MESSAGE(FATAL_ERROR "\n"
+        "Internal build system error: The configuration of "
+        "DEAL_II_WITH_${_feature} depends on "
+        "DEAL_II_WITH_${_dependency}, but CONFIGURE_FEATURE(${_feature}) "
+        "was called before CONFIGURE_FEATURE(${_dependency}).\n\n"
+        )
+    ENDIF()
+  ENDFOREACH()
+
+  #
+  # Obey the user overrides:
+  #
+  IF( (NOT DEAL_II_ALLOW_AUTODETECTION) AND
+      (NOT DEFINED DEAL_II_WITH_${_feature}) )
+    PURGE_FEATURE(${_feature})
+    SET_CACHED_OPTION(${_feature} OFF)
+  ENDIF()
+
+  #
+  # Only try to configure ${_feature} if we have to, i.e.
+  # DEAL_II_WITH_${_feature} is set to true or not set at all.
+  #
+  IF((NOT DEFINED DEAL_II_WITH_${_feature}) OR
+     DEAL_II_WITH_${_feature})
+
+    #
+    # Are all dependencies fulfilled?
+    #
+    SET(_dependencies_ok TRUE)
+    FOREACH(_dependency ${FEATURE_${_feature}_DEPENDS})
+      IF(NOT DEAL_II_WITH_${_dependency})
+        IF(DEAL_II_WITH_${_feature})
+          MESSAGE(FATAL_ERROR "\n"
+            "DEAL_II_WITH_${_feature} has unmet configuration requirements: "
+            "DEAL_II_WITH_${_dependency} has to be set to \"ON\".\n\n"
+            )
+        ELSE()
+          MESSAGE(STATUS
+            "DEAL_II_WITH_${_feature} has unmet configuration requirements: "
+            "DEAL_II_WITH_${_dependency} has to be set to \"ON\"."
+            )
+          PURGE_FEATURE(${_feature})
+          SET_CACHED_OPTION(${_feature} OFF)
+        ENDIF()
+        SET(_dependencies_ok FALSE)
+      ENDIF()
+    ENDFOREACH()
+
+    IF(_dependencies_ok)
+      IF(DEAL_II_FORCE_BUNDLED_${_feature})
+        #
+        # First case: DEAL_II_FORCE_BUNDLED_${_feature} is defined:
+        #
+
+        PURGE_FEATURE(${_feature})
+
+        IF(FEATURE_${_feature}_HAVE_BUNDLED)
+          RUN_COMMAND("FEATURE_${_feature}_CONFIGURE_BUNDLED()")
+          MESSAGE(STATUS "DEAL_II_WITH_${_feature} successfully set up with bundled packages.")
+          LIST(APPEND DEAL_II_FEATURES ${_feature})
+          SET(FEATURE_${_feature}_BUNDLED_CONFIGURED TRUE)
+          SET_CACHED_OPTION(${_feature} ON)
+        ELSE()
+          MESSAGE(FATAL_ERROR "\n"
+            "Internal build system error: DEAL_II_FORCE_BUNDLED_${_feature} "
+            "defined, but FEATURE_${_feature}_HAVE_BUNDLED not present.\n"
+            )
+        ENDIF()
+
+      ELSE(DEAL_II_FORCE_BUNDLED_${_feature})
+        #
+        # Second case: We are allowed to search for an external library
+        #
+        IF(COMMAND FEATURE_${_feature}_FIND_EXTERNAL)
+          RUN_COMMAND("FEATURE_${_feature}_FIND_EXTERNAL(FEATURE_${_feature}_EXTERNAL_FOUND)")
+        ELSE()
+          FEATURE_FIND_EXTERNAL(${_feature} FEATURE_${_feature}_EXTERNAL_FOUND)
+        ENDIF()
+
+        IF(FEATURE_${_feature}_EXTERNAL_FOUND)
+          IF(COMMAND FEATURE_${_feature}_CONFIGURE_EXTERNAL)
+            RUN_COMMAND("FEATURE_${_feature}_CONFIGURE_EXTERNAL()")
+          ENDIF()
+
+          MESSAGE(STATUS "DEAL_II_WITH_${_feature} successfully set up with external dependencies.")
+          LIST(APPEND DEAL_II_FEATURES ${_feature})
+          SET(FEATURE_${_feature}_EXTERNAL_CONFIGURED TRUE)
+          SET_CACHED_OPTION(${_feature} ON)
+
+        ELSE(FEATURE_${_feature}_EXTERNAL_FOUND)
+
+          PURGE_FEATURE(${_feature})
+
+          MESSAGE(STATUS "DEAL_II_WITH_${_feature} has unmet external dependencies.")
+
+          IF(FEATURE_${_feature}_HAVE_BUNDLED AND DEAL_II_ALLOW_BUNDLED)
+            RUN_COMMAND("FEATURE_${_feature}_CONFIGURE_BUNDLED()")
+
+            MESSAGE(STATUS "DEAL_II_WITH_${_feature} successfully set up with bundled packages.")
+            LIST(APPEND DEAL_II_FEATURES ${_feature})
+            SET(FEATURE_${_feature}_BUNDLED_CONFIGURED TRUE)
+            SET_CACHED_OPTION(${_feature} ON)
+
+          ELSE()
+            IF(DEAL_II_WITH_${_feature})
+              IF(COMMAND FEATURE_${_feature}_ERROR_MESSAGE)
+                RUN_COMMAND("FEATURE_${_feature}_ERROR_MESSAGE()")
+              ELSE()
+                FEATURE_ERROR_MESSAGE(${_feature})
+              ENDIF()
+            ELSE()
+              SET_CACHED_OPTION(${_feature} OFF)
+            ENDIF()
+          ENDIF()
+
+        ENDIF(FEATURE_${_feature}_EXTERNAL_FOUND)
+
+      ENDIF()
+    ENDIF()
+  ELSE()
+    #
+    # DEAL_II_WITH_${_feature} is defined and set to OFF, promote it to
+    # cache nevertheless:
+    #
+    MESSAGE(STATUS "DEAL_II_WITH_${_feature} is set to off.")
+    PURGE_FEATURE(${_feature})
+    SET_CACHED_OPTION(${_feature} OFF)
+  ENDIF()
+
+  SET(FEATURE_${_feature}_PROCESSED TRUE)
+
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_add_definitions.cmake b/cmake/macros/macro_deal_ii_add_definitions.cmake
new file mode 100644
index 0000000..86319ef
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_add_definitions.cmake
@@ -0,0 +1,33 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small wrapper around
+# SET_TARGET_PROPERTY(... PROPERTIES COMPILE_DEFINITIONS ...)
+# to _add_ compile definitions to every target we have specified.
+#
+
+MACRO(DEAL_II_ADD_DEFINITIONS _name)
+
+  FOREACH(_build ${DEAL_II_BUILD_TYPES})
+    STRING(TOLOWER ${_build} _build_lowercase)
+
+    SET_PROPERTY(TARGET ${_name}.${_build_lowercase}
+      APPEND PROPERTY COMPILE_DEFINITIONS "${ARGN}"
+      )
+  ENDFOREACH()
+
+ENDMACRO()
+
diff --git a/cmake/macros/macro_deal_ii_add_dependencies.cmake b/cmake/macros/macro_deal_ii_add_dependencies.cmake
new file mode 100644
index 0000000..6a8062a
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_add_dependencies.cmake
@@ -0,0 +1,31 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small wrapper around ADD_DEPENDENCIES to add the specified dependencies
+# to every ${target}_${build} target, where build runs through all build
+# types specified in DEAL_II_BUILD_TYPES
+#
+
+MACRO(DEAL_II_ADD_DEPENDENCIES _name _target)
+
+  FOREACH(_build ${DEAL_II_BUILD_TYPES})
+    STRING(TOLOWER ${_build} _build_lowercase)
+    ADD_DEPENDENCIES(${_name}.${_build_lowercase}
+      ${_target}.${_build_lowercase}
+      )
+  ENDFOREACH()
+
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_add_library.cmake b/cmake/macros/macro_deal_ii_add_library.cmake
new file mode 100644
index 0000000..02f4ed9
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_add_library.cmake
@@ -0,0 +1,52 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small wrapper around ADD_LIBRARY that will define a target for each
+# build type specified in DEAL_II_BUILD_TYPES
+#
+# It is assumed that the desired compilation configuration is set via
+#   DEAL_II_LINKER_FLAGS_${build}
+#   DEAL_II_CXX_FLAGS_${build}
+#   DEAL_II_DEFINITIONS_${build}
+#
+# as well as the global (for all build types)
+#   DEAL_II_LINKER_FLAGS
+#   DEAL_II_CXX_FLAGS
+#   DEAL_II_DEFINITIONS
+#
+
+MACRO(DEAL_II_ADD_LIBRARY _library)
+
+  FOREACH(_build ${DEAL_II_BUILD_TYPES})
+    STRING(TOLOWER ${_build} _build_lowercase)
+
+    ADD_LIBRARY(${_library}.${_build_lowercase}
+      ${ARGN}
+      )
+
+    SET_TARGET_PROPERTIES(${_library}.${_build_lowercase} PROPERTIES
+      LINK_FLAGS "${DEAL_II_LINKER_FLAGS} ${DEAL_II_LINKER_FLAGS_${_build}}"
+      COMPILE_DEFINITIONS "${DEAL_II_DEFINITIONS};${DEAL_II_DEFINITIONS_${_build}}"
+      COMPILE_FLAGS "${DEAL_II_CXX_FLAGS} ${DEAL_II_CXX_FLAGS_${_build}}"
+      LINKER_LANGUAGE "CXX"
+      )
+
+    SET_PROPERTY(GLOBAL APPEND PROPERTY DEAL_II_OBJECTS_${_build}
+      "$<TARGET_OBJECTS:${_library}.${_build_lowercase}>"
+      )
+  ENDFOREACH()
+
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_add_test.cmake b/cmake/macros/macro_deal_ii_add_test.cmake
new file mode 100644
index 0000000..cdfaaf1
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_add_test.cmake
@@ -0,0 +1,324 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A Macro to set up tests for the testsuite
+#
+#
+# The testsuite distinguishes two different kinds of tests:
+#
+# - A combination of a source file "${test_name}.cc" (containing a main
+#   function) with a file "${comparison_file}" defines an executable that
+#   is compiled and linked against deal.II. Its output is compared with the
+#   comparison file. Additional libraries (like a library from a user
+#   project with code to test) the target should be linked against can be
+#   specified by
+#
+#     TEST_LIBRARIES
+#     TEST_LIBRARIES_DEBUG
+#     TEST_LIBRARIES_RELEASE
+#
+# - A combination of a parameter file "${test_name}.prm" with a file
+#   "${comparison_file}" describes the configuration of an already compiled
+#   executable that should just be run with its output being compared with
+#   the file "${comparison_file}". The executable is defined by
+#
+#     TEST_TARGET or
+#     TEST_TARGET_DEBUG and TEST_TARGET_RELEASE
+#
+# For every deal.II build type (given by the variable DEAL_II_BUILD_TYPES)
+# that is a (case insensitive) substring of CMAKE_BUILD_TYPE a test is
+# defined.
+#
+# This macro gets the following options from the comparison file name (have
+# a look at the testsuite documentation for details):
+#  - usage of mpirun and number of simultaneous processes
+#  - valid build configurations
+#  - expected test stage
+#
+# The following variables must be set:
+#
+#   NUMDIFF_EXECUTABLE, DIFF_EXECUTABLE
+#     - pointing to valid diff executables. If NUMDIFF_EXECUTABLE is not
+#       "numdiff" it will be ignored and DIFF_EXECUTABLE is used instead.
+#
+#   TEST_TIME_LIMIT
+#     - specifying the maximal wall clock time in seconds a test is allowed
+#       to run
+#
+# Usage:
+#     DEAL_II_ADD_TEST(category test_name comparison_file)
+#
+
+MACRO(DEAL_II_ADD_TEST _category _test_name _comparison_file)
+
+  IF(NOT DEAL_II_PROJECT_CONFIG_INCLUDED)
+    MESSAGE(FATAL_ERROR
+      "\nDEAL_II_ADD_TEST can only be called in external (test sub-) projects after "
+      "the inclusion of deal.IIConfig.cmake. It is not intended for "
+      "internal use.\n\n"
+      )
+  ENDIF()
+
+  GET_FILENAME_COMPONENT(_file ${_comparison_file} NAME)
+
+  #
+  # Determine valid build configurations for this test:
+  #
+  SET(_configuration)
+  IF(_file MATCHES "\\.debug\\.")
+    SET(_configuration DEBUG)
+  ELSEIF(_file MATCHES "\\.release\\.")
+    SET(_configuration RELEASE)
+  ENDIF()
+
+  #
+  # A "binary" in the output file indicates binary output. In this case we
+  # have to switch to plain diff instead of (possibly) numdiff, which can
+  # only work on plain text files.
+  #
+  IF(_file MATCHES "\\.binary\\.")
+    SET(_test_diff ${DIFF_EXECUTABLE})
+  ELSE()
+    SET(_test_diff ${NUMDIFF_EXECUTABLE})
+  ENDIF()
+
+  #
+  # Determine whether the test should be run with mpirun:
+  #
+  STRING(REGEX MATCH "mpirun=([0-9]*)" _n_cpu ${_file})
+  IF("${_n_cpu}" STREQUAL "")
+    SET(_n_cpu 0) # 0 indicates that no mpirun should be used
+  ELSE()
+    STRING(REGEX REPLACE "^mpirun=([0-9]*)$" "\\1" _n_cpu ${_n_cpu})
+  ENDIF()
+
+  #
+  # Determine the expected build stage of this test:
+  #
+  STRING(REGEX MATCH "expect=([a-z]*)" _expect ${_file})
+  IF("${_expect}" STREQUAL "")
+    SET(_expect "PASSED")
+  ELSE()
+    STRING(REGEX REPLACE "^expect=([a-z]*)$" "\\1" _expect ${_expect})
+    STRING(TOUPPER ${_expect} _expect)
+  ENDIF()
+
+  #
+  # Determine for which build types a test should be defined. Every deal.II
+  # build type (given by the list DEAL_II_BUILD_TYPES) that is a  (case
+  # insensitive) substring of CMAKE_BUILD_TYPE:
+  #
+  SET(_build_types "")
+  FOREACH(_build ${DEAL_II_BUILD_TYPES})
+    STRING(TOLOWER ${_build} _build_lowercase)
+    STRING(TOLOWER ${CMAKE_BUILD_TYPE} _cmake_build_type)
+    IF("${_cmake_build_type}" MATCHES "${_build_lowercase}")
+      LIST(APPEND _build_types ${_build})
+    ENDIF()
+  ENDFOREACH()
+
+  FOREACH(_build ${_build_types})
+
+    #
+    # Obey "debug" and "release" keywords in the output file:
+    #
+    ITEM_MATCHES(_match "${_build}" ${_configuration})
+    IF(_match OR "${_configuration}" STREQUAL "")
+
+      STRING(TOLOWER ${_build} _build_lowercase)
+
+      #
+      # Select a suitable target:
+      #
+      IF(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_test_name}.cc")
+
+        SET(_target ${_test_name}.${_build_lowercase}) # target name
+        SET(_run_command "$<TARGET_FILE:${_target}>") # the command to issue
+
+      ELSEIF(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_test_name}.prm")
+
+        IF(NOT "${TEST_TARGET_${_build}}" STREQUAL "")
+          SET(_target ${TEST_TARGET_${_build}})
+        ELSEIF(NOT "${TEST_TARGET}" STREQUAL "")
+          SET(_target ${TEST_TARGET})
+        ELSE()
+          MESSAGE(FATAL_ERROR
+            "\nFor ${_comparison_file}: \"${_test_name}.prm\" provided, but "
+            "neither \"\${TEST_TARGET}\", nor \"\${TEST_TARGET_${_build}}"
+            "\" is defined.\n\n"
+            )
+        ENDIF()
+        SET(_run_command "$<TARGET_FILE:${_target}> ${CMAKE_CURRENT_SOURCE_DIR}/${_test_name}.prm")
+
+      ELSE()
+        MESSAGE(FATAL_ERROR
+          "\nFor ${_comparison_file}: Neither \"${_test_name}.cc\", "
+          "nor \"${_test_name}.prm\" could be found!\n\n"
+          )
+      ENDIF()
+
+      #
+      # Set up a bunch of variables describing this particular test:
+      #
+
+      # If _n_cpu is equal to "0", a normal, sequential test will be run,
+      # otherwise run the test with mpirun:
+      IF("${_n_cpu}" STREQUAL "0")
+
+        SET(_diff_target ${_test_name}.${_build_lowercase}.diff) # diff target name
+        SET(_test_full ${_category}/${_test_name}.${_build_lowercase}) # full test name
+        SET(_test_directory ${CMAKE_CURRENT_BINARY_DIR}/${_test_name}.${_build_lowercase}) # directory to run the test in
+
+      ELSE()
+
+        SET(_diff_target ${_test_name}.mpirun${_n_cpu}.${_build_lowercase}.diff) # diff target name
+        SET(_test_full ${_category}/${_test_name}.mpirun=${_n_cpu}.${_build_lowercase}) # full test name
+        SET(_test_directory ${CMAKE_CURRENT_BINARY_DIR}/${_test_name}.${_build_lowercase}/mpirun=${_n_cpu}) # directory to run the test in
+        SET(_run_command "${DEAL_II_MPIEXEC} ${DEAL_II_MPIEXEC_NUMPROC_FLAG} ${_n_cpu} ${DEAL_II_MPIEXEC_PREFLAGS} ${_run_command} ${DEAL_II_MPIEXEC_POSTFLAGS}")
+      ENDIF()
+
+      FILE(MAKE_DIRECTORY ${_test_directory})
+
+      #
+      # Add an executable (for the first type of tests) and set up compile
+      # definitions and the full link interface. Only add the target once.
+      #
+
+      IF(NOT TARGET ${_target})
+        #
+        # Add a "guard file" rule: The purpose of interrupt_guard.cc is to
+        # force a complete rerun of this test (BUILD, RUN and DIFF stage)
+        # if interrupt_guard.cc is removed by run_test.cmake due to an
+        # interruption.
+        #
+        ADD_CUSTOM_COMMAND(
+          OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_target}/interrupt_guard.cc
+          COMMAND touch ${CMAKE_CURRENT_BINARY_DIR}/${_target}/interrupt_guard.cc
+          )
+
+        ADD_EXECUTABLE(${_target} EXCLUDE_FROM_ALL
+          ${_test_name}.cc
+          ${CMAKE_CURRENT_BINARY_DIR}/${_target}/interrupt_guard.cc
+          )
+
+        DEAL_II_SETUP_TARGET(${_target} ${_build})
+        TARGET_LINK_LIBRARIES(${_target}
+          ${TEST_LIBRARIES} ${TEST_LIBRARIES_${_build}}
+          )
+
+        SET_PROPERTY(TARGET ${_target} APPEND PROPERTY
+          COMPILE_DEFINITIONS SOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}"
+          )
+        SET_PROPERTY(TARGET ${_target} PROPERTY
+          RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${_target}"
+          )
+
+      ENDIF()
+
+      #
+      # Add a top level target to run and compare the test:
+      #
+
+      ADD_CUSTOM_COMMAND(OUTPUT ${_test_directory}/output
+        COMMAND sh ${DEAL_II_PATH}/${DEAL_II_SHARE_RELDIR}/scripts/run_test.sh
+          run "${_test_full}" "${_run_command}" "${_test_diff}"
+          "${DIFF_EXECUTABLE}" "${_comparison_file}"
+        COMMAND ${PERL_EXECUTABLE}
+          -pi ${DEAL_II_PATH}/${DEAL_II_SHARE_RELDIR}/scripts/normalize.pl
+          ${_test_directory}/output
+        WORKING_DIRECTORY
+          ${_test_directory}
+        DEPENDS
+          ${_target}
+          ${DEAL_II_PATH}/${DEAL_II_SHARE_RELDIR}/scripts/normalize.pl
+        VERBATIM
+        )
+
+      FILE(GLOB _comparison_files ${_comparison_file} ${_comparison_file}.*)
+
+      ADD_CUSTOM_COMMAND(OUTPUT ${_test_directory}/diff
+        COMMAND sh ${DEAL_II_PATH}/${DEAL_II_SHARE_RELDIR}/scripts/run_test.sh
+          diff "${_test_full}" "${_run_command}" "${_test_diff}"
+          "${DIFF_EXECUTABLE}" "${_comparison_file}"
+        WORKING_DIRECTORY
+          ${_test_directory}
+        DEPENDS
+          ${_test_directory}/output
+          ${_comparison_files}
+        VERBATIM
+        )
+
+      ADD_CUSTOM_TARGET(${_diff_target}
+        COMMAND echo "${_test_full}: BUILD successful."
+        COMMAND echo "${_test_full}: RUN successful."
+        COMMAND echo "${_test_full}: DIFF successful."
+        COMMAND echo "${_test_full}: PASSED."
+        DEPENDS ${_test_directory}/diff
+        )
+
+      #
+      # And finally define the test:
+      #
+
+      ADD_TEST(NAME ${_test_full}
+        COMMAND ${CMAKE_COMMAND}
+          -DTRGT=${_diff_target}
+          -DTEST=${_test_full}
+          -DEXPECT=${_expect}
+          -DBINARY_DIR=${CMAKE_BINARY_DIR}
+          -DGUARD_FILE=${CMAKE_CURRENT_BINARY_DIR}/${_test_name}.${_build_lowercase}/interrupt_guard.cc
+          -P ${DEAL_II_PATH}/${DEAL_II_SHARE_RELDIR}/scripts/run_test.cmake
+        WORKING_DIRECTORY ${_test_directory}
+        )
+      SET_TESTS_PROPERTIES(${_test_full} PROPERTIES
+        LABEL "${_category}"
+        TIMEOUT ${TEST_TIME_LIMIT}
+        )
+
+      #
+      # Limit concurrency of mpi tests. We can only set concurrency
+      # for the entire test, which includes the compiling and linking
+      # stages that are purely sequential. There is no good way to model
+      # this without unnecessarily restricting concurrency. Consequently,
+      # we just choose to model an "average" concurrency as one half of
+      # the number of MPI jobs.
+      #
+      IF(_n_cpu GREATER 2)
+        MATH(EXPR _slots "${_n_cpu} / 2")
+        SET_TESTS_PROPERTIES(${_test_full} PROPERTIES PROCESSORS ${_slots})
+      ENDIF()
+
+      IF(NOT "${_n_cpu}" STREQUAL "0")
+        #
+        # We have to be careful not to run different mpirun settings for the
+        # same executable in parallel because this triggers a race condition
+        # when compiling the not yet existent executable that is shared
+        # between the different tests.
+        #
+        # Luckily CMake has a mechanism to force a test to be run after
+        # another has finished (and both are scheduled):
+        #
+        IF(DEFINED TEST_DEPENDENCIES_${_target})
+          SET_TESTS_PROPERTIES(${_test_full} PROPERTIES
+            DEPENDS ${TEST_DEPENDENCIES_${_target}}
+            )
+        ENDIF()
+        SET(TEST_DEPENDENCIES_${_target} ${_test_full})
+      ENDIF()
+
+    ENDIF()
+  ENDFOREACH()
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_find_file.cmake b/cmake/macros/macro_deal_ii_find_file.cmake
new file mode 100644
index 0000000..8a4774d
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_find_file.cmake
@@ -0,0 +1,30 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small wrapper around FIND_FILE to be a bit more verbose
+#
+
+MACRO(DEAL_II_FIND_FILE _file_name)
+  FIND_FILE(${_file_name} ${ARGN})
+
+  IF(${_file_name} MATCHES "-NOTFOUND")
+    MESSAGE(STATUS "${_file_name} not found! Call:")
+    TO_STRING(_str ${ARGN})
+    MESSAGE(STATUS "    FIND_FILE(${_file_name} ${_str})")
+  ELSE()
+    MESSAGE(STATUS "Found ${_file_name}")
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_find_library.cmake b/cmake/macros/macro_deal_ii_find_library.cmake
new file mode 100644
index 0000000..9204edd
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_find_library.cmake
@@ -0,0 +1,30 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small wrapper around FIND_LIBRARY to be a bit more verbose
+#
+
+MACRO(DEAL_II_FIND_LIBRARY _library_name)
+  FIND_LIBRARY(${_library_name} ${ARGN})
+
+  IF(${_library_name} MATCHES "-NOTFOUND")
+    MESSAGE(STATUS "${_library_name} not found! Call:")
+    TO_STRING(_str ${ARGN})
+    MESSAGE(STATUS "    FIND_LIBRARY(${_library_name} ${_str})")
+  ELSE()
+    MESSAGE(STATUS "Found ${_library_name}")
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_find_path.cmake b/cmake/macros/macro_deal_ii_find_path.cmake
new file mode 100644
index 0000000..9427dc9
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_find_path.cmake
@@ -0,0 +1,30 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small wrapper around FIND_FILE to be a bit more verbose
+#
+
+MACRO(DEAL_II_FIND_PATH _path_name)
+  FIND_PATH(${_path_name} ${ARGN})
+
+  IF(${_path_name} MATCHES "-NOTFOUND")
+    MESSAGE(STATUS "${_path_name} not found! Call:")
+    TO_STRING(_str ${ARGN})
+    MESSAGE(STATUS "    FIND_PATH(${_path_name} ${_str})")
+  ELSE()
+    MESSAGE(STATUS "Found ${_path_name}")
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_initialize_cached_variables.cmake b/cmake/macros/macro_deal_ii_initialize_cached_variables.cmake
new file mode 100644
index 0000000..6f406c6
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_initialize_cached_variables.cmake
@@ -0,0 +1,95 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This file implements the DEAL_II_INITIALIZE_VARIABLES macro, which is
+# part of the deal.II library.
+#
+# Usage:
+#       DEAL_II_INITIALIZE_CACHED_VARIABLES()
+#
+# This sets some cached variables to the values used for compiling the
+# deal.II library.
+#
+# This macro has to be called before PROJECT()!
+#
+
+MACRO(DEAL_II_INITIALIZE_CACHED_VARIABLES)
+
+  IF(NOT DEAL_II_PROJECT_CONFIG_INCLUDED)
+    MESSAGE(FATAL_ERROR
+      "\nDEAL_II_INITIALIZE_CACHED_VARIABLES can only be called in external "
+      "projects after the inclusion of deal.IIConfig.cmake. It is not "
+      "intended for internal use.\n\n"
+      )
+  ENDIF()
+
+  #
+  # Set build type according to build type of deal.II
+  #
+  SET(CMAKE_BUILD_TYPE ${DEAL_II_BUILD_TYPE} CACHE STRING
+    "Choose the type of build, options are: Debug, Release, DebugRelease")
+
+  #
+  # Reset build type if unsupported, i.e. if it is not (case insensitively
+  # equal to Debug or Release or unsupported by the current build type:
+  #
+  STRING(TOLOWER "${CMAKE_BUILD_TYPE}" _cmake_build_type)
+
+  IF(NOT "${_cmake_build_type}" MATCHES "^(debug|release|debugrelease)$")
+
+
+    MESSAGE(
+"###
+#
+#  WARNING:
+#
+#  CMAKE_BUILD_TYPE \"${CMAKE_BUILD_TYPE}\" unsupported by current installation!
+#  deal.II was built with CMAKE_BUILD_TYPE \"${DEAL_II_BUILD_TYPE}\".
+#
+#  CMAKE_BUILD_TYPE is forced to \"${DEAL_II_BUILD_TYPE}\".
+#
+###"
+      )
+    SET(CMAKE_BUILD_TYPE ${DEAL_II_BUILD_TYPE} CACHE STRING
+      "Choose the type of build, options are: Debug, Release, DebugRelease"
+      FORCE
+      )
+
+  ENDIF()
+
+
+  SET(CMAKE_CXX_COMPILER ${DEAL_II_CXX_COMPILER} CACHE STRING
+    "CXX Compiler.")
+
+  SET(CMAKE_C_COMPILER ${DEAL_II_C_COMPILER} CACHE STRING
+    "C Compiler.")
+
+  SET(CMAKE_CXX_FLAGS "" CACHE STRING
+    "Flags used by the compiler during all build types."
+    )
+
+  SET(CMAKE_CXX_FLAGS_DEBUG "" CACHE STRING
+    "Flags used by the compiler during debug builds."
+    )
+
+  SET(CMAKE_CXX_FLAGS_RELEASE "" CACHE STRING
+    "Flags used by the compiler during release builds."
+    )
+
+  MARK_AS_ADVANCED(CMAKE_INSTALL_PREFIX)
+
+ENDMACRO()
+
diff --git a/cmake/macros/macro_deal_ii_insource_setup_target.cmake b/cmake/macros/macro_deal_ii_insource_setup_target.cmake
new file mode 100644
index 0000000..786c45c
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_insource_setup_target.cmake
@@ -0,0 +1,50 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This file provides an insource version of the DEAL_II_SETUP_TARGET macro.
+#
+# Usage:
+#       DEAL_II_INSOURCE_SETUP_TARGET(target build)
+#
+# This appends necessary include directories, linker flags, compile
+# definitions and the deal.II library link interface to the given target.
+#
+#
+
+MACRO(DEAL_II_INSOURCE_SETUP_TARGET _target _build)
+
+  SET_TARGET_PROPERTIES(${_target} PROPERTIES
+    LINK_FLAGS "${DEAL_II_LINKER_FLAGS} ${DEAL_II_LINKER_FLAGS_${_build}}"
+    COMPILE_DEFINITIONS "${DEAL_II_DEFINITIONS};${DEAL_II_DEFINITIONS_${_build}}"
+    COMPILE_FLAGS "${DEAL_II_CXX_FLAGS} ${DEAL_II_CXX_FLAGS_${_build}}"
+    LINKER_LANGUAGE "CXX"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${_test_short}"
+    )
+  SET_PROPERTY(TARGET ${_target} APPEND PROPERTY
+    INCLUDE_DIRECTORIES
+      "${CMAKE_BINARY_DIR}/include"
+      "${CMAKE_SOURCE_DIR}/include"
+      "${CMAKE_SOURCE_DIR}/include/deal.II/"
+    )
+
+GET_PROPERTY(_type TARGET ${_target} PROPERTY TYPE)
+IF(NOT "${_type}" STREQUAL "OBJECT_LIBRARY")
+  TARGET_LINK_LIBRARIES(${_target}
+    ${DEAL_II_BASE_NAME}${DEAL_II_${_build}_SUFFIX}
+    )
+ENDIF()
+
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_invoke_autopilot.cmake b/cmake/macros/macro_deal_ii_invoke_autopilot.cmake
new file mode 100644
index 0000000..48bed62
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_invoke_autopilot.cmake
@@ -0,0 +1,249 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This file implements the DEAL_II_INVOKE_AUTOPILOT macro, which is
+# part of the deal.II library.
+#
+# Usage:
+#       DEAL_II_INVOKE_AUTOPILOT()
+#
+# where it is assumed that the following variables are defined:
+#
+#       TARGET         -  a string used for the project and target name
+#       TARGET_SRC     -  a list of source file to compile for target
+#                         ${TARGET}
+#       TARGET_RUN     -  (optional) the command line that should be
+#                         invoked by "make run", will be set to default
+#                         values if undefined. If no run target should be
+#                         created, set it to an empty string.
+#       CLEAN_UP_FILES -  (optional) a list of files (globs) that will be
+#                         removed with "make runclean" and "make
+#                         distclean", will be set to default values if
+#                         empty
+#
+
+MACRO(DEAL_II_INVOKE_AUTOPILOT)
+
+  # Set CMAKE_BUILD_TYPE=Debug if both 
+  # Debug and Release mode are given
+  IF("${CMAKE_BUILD_TYPE}" STREQUAL "DebugRelease")
+    SET(CMAKE_BUILD_TYPE "Debug" CACHE STRING
+      "Choose the type of build, options are: Debug, Release"
+      FORCE)
+  ENDIF()
+
+
+  # Generator specific values:
+  IF(CMAKE_GENERATOR MATCHES "Ninja")
+    SET(_make_command "$ ninja")
+  ELSE()
+    SET(_make_command " $ make")
+  ENDIF()
+
+  # Define and setup a compilation target:
+  ADD_EXECUTABLE(${TARGET} ${TARGET_SRC})
+  DEAL_II_SETUP_TARGET(${TARGET})
+
+  MESSAGE(STATUS "Autopilot invoked")
+
+  # Define a custom target to easily run the program:
+
+  IF(NOT DEFINED TARGET_RUN)
+    SET(TARGET_RUN ${TARGET})
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME MATCHES "(CYGWIN|Windows)")
+    #
+    # Hack for Cygwin and Windows targets: Export PATH to point to the
+    # dynamic library.
+    #
+    SET(_delim ":")
+    IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
+      SET(_delim ";")
+    ENDIF()
+    FILE(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/run_target.cmake
+      "SET(ENV{PATH} \"${CMAKE_CURRENT_BINARY_DIR}${_delim}${DEAL_II_PATH}/${DEAL_II_EXECUTABLE_RELDIR}${_delim}\$ENV{PATH}\")\n"
+      "EXECUTE_PROCESS(COMMAND ${CMAKE_BUILD_TYPE}\\\\${TARGET_RUN}\n"
+      "  RESULT_VARIABLE _return_value\n"
+      "  )\n"
+      "IF(NOT \"\${_return_value}\" STREQUAL \"0\")\n"
+      "  MESSAGE(SEND_ERROR \"\nProgram terminated with exit code: \${_return_value}\")\n"
+      "ENDIF()\n"
+      )
+    SET(_command
+      ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/run_target.cmake
+      )
+
+  ELSE()
+
+    SET(_command ${TARGET_RUN})
+  ENDIF()
+
+  IF(NOT "${TARGET_RUN}" STREQUAL "")
+    ADD_CUSTOM_TARGET(run
+      COMMAND ${_command}
+      DEPENDS ${TARGET}
+      COMMENT "Run ${TARGET} with ${CMAKE_BUILD_TYPE} configuration"
+      )
+    SET(_run_targets
+      "#      ${_make_command} run            - to (compile, link and) run the program\n"
+      )
+  ENDIF()
+
+
+  #
+  # Provide a target to sign the generated executable with a Mac OSX
+  # developer key. This avoids problems with an enabled firewall and MPI
+  # tasks that need networking.
+  #
+
+  IF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+    IF(DEFINED OSX_CERTIFICATE_NAME)
+      ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${TARGET}.signed
+        COMMAND codesign -f -s ${OSX_CERTIFICATE_NAME} ${TARGET}
+        COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${TARGET}.signed
+        COMMENT "Digitally signing ${TARGET}"
+        DEPENDS ${TARGET}
+        VERBATIM
+        )
+      ADD_CUSTOM_TARGET(sign ALL
+        DEPENDS ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${TARGET}.signed
+        )
+      ADD_DEPENDENCIES(run sign)
+    ELSE()
+      ADD_CUSTOM_TARGET(sign
+        COMMAND
+           ${CMAKE_COMMAND} -E echo ''
+        && ${CMAKE_COMMAND} -E echo '***************************************************************************'
+        && ${CMAKE_COMMAND} -E echo '**  Error: No Mac OSX developer certificate specified'
+        && ${CMAKE_COMMAND} -E echo '**  Please reconfigure with -DOSX_CERTIFICATE_NAME="<...>"'
+        && ${CMAKE_COMMAND} -E echo '***************************************************************************'
+        && ${CMAKE_COMMAND} -E echo ''
+        COMMENT "Digitally signing ${TARGET}"
+        )
+    ENDIF()
+
+    SET(_run_targets
+      "${_run_targets}#\n#      ${_make_command} sign           - to sign the executable with the supplied OSX developer key\n"
+      )
+  ENDIF()
+
+  # Define custom targets to easily switch the build type:
+  ADD_CUSTOM_TARGET(debug
+    COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Debug ${CMAKE_SOURCE_DIR}
+    COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target all
+    COMMENT "Switch CMAKE_BUILD_TYPE to Debug"
+    )
+
+  ADD_CUSTOM_TARGET(release
+    COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Release ${CMAKE_SOURCE_DIR}
+    COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target all
+    COMMENT "Switch CMAKE_BUILD_TYPE to Release"
+    )
+
+  # Only mention release and debug targets if it is actually possible to
+  # switch between them:
+  IF(${DEAL_II_BUILD_TYPE} MATCHES "DebugRelease")
+    SET(_switch_targets
+"#      ${_make_command} debug          - to switch the build type to 'Debug'
+#      ${_make_command} release        - to switch the build type to 'Release'\n"
+      )
+  ENDIF()
+
+  # And another custom target to clean up all files generated by the program:
+  IF("${CLEAN_UP_FILES}" STREQUAL "")
+    SET(CLEAN_UP_FILES *.log *.gmv *.gnuplot *.gpl *.eps *.pov *.vtk *.ucd *.d2)
+  ENDIF()
+  ADD_CUSTOM_TARGET(runclean
+    COMMAND ${CMAKE_COMMAND} -E remove ${CLEAN_UP_FILES}
+    COMMENT "runclean invoked"
+    )
+
+  # Define a distclean target to remove every generated file:
+  ADD_CUSTOM_TARGET(distclean
+    COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target clean
+    COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target runclean
+    COMMAND ${CMAKE_COMMAND} -E remove_directory CMakeFiles
+    COMMAND ${CMAKE_COMMAND} -E remove CMakeCache.txt cmake_install.cmake Makefile
+    COMMENT "distclean invoked"
+    )
+
+  # Define a strip-comments target:
+  FIND_PACKAGE(Perl QUIET)
+  IF(PERL_FOUND)
+    ADD_CUSTOM_TARGET(strip_comments
+      COMMAND ${PERL_EXECUTABLE} -pi -e 's\#^[ \\t]*//.*\\n\#\#g;' ${TARGET_SRC}
+      COMMENT "strip comments"
+      )
+  ENDIF()
+
+
+  # Print out some usage information to file:
+  FILE(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_usage.cmake
+"MESSAGE(
+\"###
+#
+#  Project  ${TARGET}  set up with  ${DEAL_II_PACKAGE_NAME}-${DEAL_II_PACKAGE_VERSION}  found at
+#      ${DEAL_II_PATH}
+#
+#  CMAKE_BUILD_TYPE:          ${CMAKE_BUILD_TYPE}
+#
+#  You can now run
+#      ${_make_command}                - to compile and link the program
+${_run_targets}#
+${_switch_targets}#
+")
+  IF(NOT CMAKE_GENERATOR MATCHES "Ninja")
+    FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_usage.cmake
+"#      ${_make_command} edit_cache     - to change (cached) configuration variables
+#                               and rerun the configure and generate phases of CMake
+#
+")
+  ENDIF()
+  IF(PERL_FOUND)
+    FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_usage.cmake
+"#      ${_make_command} strip_comments - to strip the source files in this
+#                               directory off the documentation comments
+")
+  ENDIF()
+  FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_usage.cmake
+"#      ${_make_command} clean          - to remove the generated executable as well as
+#                               all intermediate compilation files
+#      ${_make_command} runclean       - to remove all output generated by the program
+#      ${_make_command} distclean      - to clean the directory from _all_ generated
+#                               files (includes clean, runclean and the removal
+#                               of the generated build system)
+#      ${_make_command} info           - to view this message again
+#
+#  Have a nice day!
+#
+###\")"
+     )
+
+  ADD_CUSTOM_TARGET(info
+    COMMAND ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_usage.cmake
+    )
+
+  # Print this message once:
+  IF(NOT USAGE_PRINTED)
+    INCLUDE(${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_usage.cmake)
+    SET(USAGE_PRINTED TRUE CACHE INTERNAL "")
+  ELSE()
+    MESSAGE(STATUS "Run  ${_make_command} info  to print a detailed help message")
+  ENDIF()
+
+ENDMACRO()
+
diff --git a/cmake/macros/macro_deal_ii_package_handle.cmake b/cmake/macros/macro_deal_ii_package_handle.cmake
new file mode 100644
index 0000000..7c8ae71
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_package_handle.cmake
@@ -0,0 +1,156 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# DEAL_II_PACKAGE_HANDLE(<feature>
+#  {<conf. variable> {(REQUIRED|OPTIONAL) <variables>}}
+#  [CLEAR <variables>]
+#  )
+#
+# This macro is an alternative implementation of the
+# FIND_PACKAGE_HANDLE_STANDARD_ARGS macro shipped with CMake - aka do
+# everything that was expected from CMake in the first place *sigh*
+#
+# Its usage is best explained with an example:
+#
+#   DEAL_II_PACKAGE_HANDLE(PETSC
+#     LIBRARIES
+#       REQUIRED PETSC_LIBRARY
+#       OPTIONAL _petsc_libraries
+#     INCLUDE_DIRS
+#       REQUIRED PETSC_INCLUDE_DIR_COMMON PETSC_INCLUDE_DIR_ARCH
+#       OPTIONAL _petsc_includes
+#     CLEAR PETSC_LIBRARY PETSC_INCLUDE_DIR_COMMON PETSC_INCLUDE_DIR_ARCH
+#     )
+#
+# This will check whether all REQUIRED variables are non-empty and
+# different from "-NOTFOUND". If so, PETSC_LIBRARIES and PETSC_INCLUDE_DIRS
+# is defined and populated with the contents of all specified variables.
+# Optional variables with no content or whose content is "-NOTFOUND" are
+# filtered out.
+# After the 'CLEAR' statement all internally cached variables should be
+# listed - this is used to provide a possibility to undo a feature
+# search.
+#
+
+MACRO(DEAL_II_PACKAGE_HANDLE _feature _var)
+
+  IF(DEFINED ${_feature}_VERSION)
+    MESSAGE(STATUS "  ${_feature}_VERSION: ${${_feature}_VERSION}")
+  ENDIF()
+
+  SET(${_feature}_FOUND TRUE)
+
+  SET(_variable ${_var})
+  SET(${_feature}_${_variable} "")
+  SET(_required TRUE)
+  SET(_fine TRUE)
+  SET(_fill_clear FALSE)
+  SET(_clear "")
+
+  FOREACH(_arg ${ARGN})
+    IF(_arg MATCHES "^LIBRARIES(|_DEBUG|_RELEASE)$"
+       OR _arg MATCHES "^(|BUNDLED_|USER_)INCLUDE_DIRS$"
+       OR _arg MATCHES "^(|USER_)DEFINITIONS(|_DEBUG|_RELEASE)$"
+       OR _arg MATCHES "^CXX_FLAGS(|_DEBUG|_RELEASE)"
+       OR _arg MATCHES "^LINKER_FLAGS(|_DEBUG|_RELEASE)")
+
+      IF(_fine)
+        IF(_variable MATCHES "^CXX_FLAGS(|_DEBUG|_RELEASE)"
+           OR _variable MATCHES "^LINKER_FLAGS(|_DEBUG|_RELEASE)")
+          TO_STRING(${_feature}_${_variable} ${${_feature}_${_variable}})
+        ENDIF()
+        MESSAGE(STATUS "  ${_feature}_${_variable}: ${${_feature}_${_variable}}")
+      ENDIF()
+
+      #
+      # *Yay* a new keyword.
+      #
+      SET(_variable ${_arg})
+      SET(${_feature}_${_variable} "")
+      SET(_required TRUE)
+      SET(_fine TRUE)
+
+    ELSEIF("${_arg}" STREQUAL "REQUIRED")
+      SET(_required TRUE)
+    ELSEIF("${_arg}" STREQUAL "OPTIONAL")
+      SET(_required FALSE)
+    ELSEIF(_arg MATCHES "^(optimized|debug|general)$"
+            AND "${_variable}" STREQUAL "LIBRARIES")
+      #
+      # Keywords are special...
+      #
+      LIST(APPEND ${_feature}_${_variable} ${_arg})
+    ELSEIF("${_arg}" STREQUAL "CLEAR")
+      SET(_fill_clear TRUE)
+    ELSE()
+      MARK_AS_ADVANCED(${_arg})
+      IF(_fill_clear)
+        IF(NOT _arg MATCHES "^(optimized|debug|general)$")
+          LIST(APPEND _clear ${_arg})
+        ENDIF()
+      ELSE()
+        IF("${${_arg}}" MATCHES "^\\s*$" OR "${${_arg}}" MATCHES "-NOTFOUND")
+          IF(_required AND _fine)
+            IF("${${_arg}}" MATCHES "^\\s*$")
+              MESSAGE(STATUS
+                "  ${_feature}_${_variable}: *** Required variable \"${_arg}\" empty ***"
+                )
+            ELSE()
+              MESSAGE(STATUS
+                "  ${_feature}_${_variable}: *** Required variable \"${_arg}\" set to NOTFOUND ***"
+                )
+            ENDIF()
+            SET(${_feature}_FOUND FALSE)
+            SET(_fine FALSE)
+          ENDIF()
+        ELSE()
+          LIST(APPEND ${_feature}_${_variable} ${${_arg}})
+        ENDIF()
+      ENDIF()
+    ENDIF()
+  ENDFOREACH()
+
+  SET(${_feature}_CLEAR_VARIABLES ${_clear} CACHE INTERNAL "")
+
+  IF(_fine)
+    IF(_variable MATCHES "^CXX_FLAGS(|_DEBUG|_RELEASE)"
+       OR _variable MATCHES "^LINKER_FLAGS(|_DEBUG|_RELEASE)")
+      TO_STRING(${_feature}_${_variable} ${${_feature}_${_variable}})
+    ENDIF()
+    MESSAGE(STATUS "  ${_feature}_${_variable}: ${${_feature}_${_variable}}")
+  ENDIF()
+
+  IF(${_feature}_FOUND)
+    #
+    # Deduplicate entries:
+    #
+    FOREACH(_suffix ${DEAL_II_LIST_SUFFIXES})
+      IF(_suffix MATCHES "INCLUDE_DIRS$")
+        REMOVE_DUPLICATES(${_feature}_${_suffix})
+      ELSE()
+        REMOVE_DUPLICATES(${_feature}_${_suffix} REVERSE)
+      ENDIF()
+    ENDFOREACH()
+
+    MESSAGE(STATUS "Found ${_feature}")
+
+    MARK_AS_ADVANCED(${_feature}_DIR ${_feature}_ARCH)
+
+  ELSE()
+
+    MESSAGE(STATUS "Could NOT find ${_feature}")
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_pickup_tests.cmake b/cmake/macros/macro_deal_ii_pickup_tests.cmake
new file mode 100644
index 0000000..6b76286
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_pickup_tests.cmake
@@ -0,0 +1,237 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A macro to set up testing and pick up all tests in the current
+# subdirectory.
+#
+# If TEST_PICKUP_REGEX is set, only tests matching the regex will be
+# processed.
+#
+# Furthermore, the macro sets up (if necessary) deal.II, perl, a diff tool
+# and the following variables, that can be overwritten by environment or
+# command line:
+#
+#     TEST_LIBRARIES
+#     TEST_LIBRARIES_DEBUG
+#     TEST_LIBRARIES_RELEASE
+#       - specifying additional libraries (and targets) to link against.
+#
+#     TEST_TARGET or
+#     TEST_TARGET_DEBUG and TEST_TARGET_RELEASE
+#       - specifying a test target to be executed for a parameter run.
+#
+#     TEST_TIME_LIMIT
+#       - specifying the maximal wall clock time in seconds a test is
+#         allowed to run
+#
+# Either numdiff (if available), or diff are used for the comparison of
+# test results. Their location can be specified with NUMDIFF_DIR and
+# DIFF_DIR.
+#
+# Usage:
+#     DEAL_II_PICKUP_TESTS()
+#
+
+
+#
+# Two very small macros that are used below:
+#
+
+MACRO(SET_IF_EMPTY _variable)
+  IF("${${_variable}}" STREQUAL "")
+    SET(${_variable} ${ARGN})
+  ENDIF()
+ENDMACRO()
+
+MACRO(ITEM_MATCHES _var _regex)
+  SET(${_var})
+  FOREACH (_item ${ARGN})
+    IF("${_item}" MATCHES ${_regex})
+      SET(${_var} TRUE)
+      BREAK()
+    ENDIF()
+  ENDFOREACH()
+ENDMACRO()
+
+
+MACRO(DEAL_II_PICKUP_TESTS)
+
+  IF(NOT DEAL_II_PROJECT_CONFIG_INCLUDED)
+    MESSAGE(FATAL_ERROR
+      "\nDEAL_II_PICKUP_TESTS can only be called in external (test sub-) "
+      "projects after the inclusion of deal.IIConfig.cmake. It is not "
+      "intended for internal use.\n\n"
+      )
+  ENDIF()
+
+  #
+  # Necessary external interpreters and programs:
+  #
+
+  FIND_PACKAGE(Perl REQUIRED)
+
+  FIND_PROGRAM(DIFF_EXECUTABLE
+    NAMES diff
+    HINTS ${DIFF_DIR}
+    PATH_SUFFIXES bin
+    )
+
+  FIND_PROGRAM(NUMDIFF_EXECUTABLE
+    NAMES numdiff
+    HINTS ${NUMDIFF_DIR}
+    PATH_SUFFIXES bin
+    )
+
+  MARK_AS_ADVANCED(DIFF_EXECUTABLE NUMDIFF_EXECUTABLE)
+
+  IF( NUMDIFF_EXECUTABLE MATCHES "-NOTFOUND" AND
+      DIFF_EXECUTABLE MATCHES "-NOTFOUND" )
+    MESSAGE(FATAL_ERROR
+      "Could not find diff or numdiff. One of those are required for running the testsuite.\n"
+      "Please specify DIFF_DIR or NUMDIFF_DIR to a location containing the binaries."
+      )
+  ENDIF()
+
+  IF(DIFF_EXECUTABLE MATCHES "-NOTFOUND")
+    SET(DIFF_EXECUTABLE ${NUMDIFF_EXECUTABLE})
+  ENDIF()
+
+  IF(NUMDIFF_EXECUTABLE MATCHES "-NOTFOUND")
+    SET(NUMDIFF_EXECUTABLE ${DIFF_EXECUTABLE})
+  ENDIF()
+
+  #
+  # Set time limit:
+  #
+
+  SET_IF_EMPTY(TEST_TIME_LIMIT "$ENV{TEST_TIME_LIMIT}")
+  SET_IF_EMPTY(TEST_TIME_LIMIT 600)
+
+  #
+  # ... and finally pick up tests:
+  #
+
+  ENABLE_TESTING()
+
+  SET_IF_EMPTY(TEST_PICKUP_REGEX "$ENV{TEST_PICKUP_REGEX}")
+  GET_FILENAME_COMPONENT(_category ${CMAKE_CURRENT_SOURCE_DIR} NAME)
+
+  SET(DEAL_II_SOURCE_DIR) # avoid a bogus warning
+
+  FILE(GLOB _tests "*.output")
+  FOREACH(_test ${_tests})
+    SET(_comparison ${_test})
+    GET_FILENAME_COMPONENT(_test ${_test} NAME)
+
+    #
+    # Respect TEST_PICKUP_REGEX:
+    #
+
+    IF( "${TEST_PICKUP_REGEX}" STREQUAL "" OR
+        "${_category}/${_test}" MATCHES "${TEST_PICKUP_REGEX}" )
+      SET(_define_test TRUE)
+    ELSE()
+      SET(_define_test FALSE)
+    ENDIF()
+
+    # Disable tests using mpirun if MPI is not enabled
+    STRING(REGEX MATCH "mpirun=" _matches ${_test})
+    IF (_matches AND NOT DEAL_II_WITH_MPI)
+      SET(_define_test FALSE)
+    ENDIF()
+
+    #
+    # Query configuration and check whether we support it. Otherwise
+    # set _define_test to FALSE:
+    #
+
+    SET(_op_regex "=|\\.geq\\.|\\.leq\\.|\\.ge\\.|\\.le\\.")
+
+    STRING(REGEX MATCHALL
+      "with_([0-9]|[a-z]|_)*(${_op_regex})(on|off|yes|no|true|false|[0-9]+(\\.[0-9]+)*)"
+      _matches ${_test}
+      )
+
+    FOREACH(_match ${_matches})
+      #
+      # Extract feature name, comparison operator, (a possible) boolean and
+      # (a possible) version number from the feature constraint:
+      #
+      STRING(REGEX REPLACE "^with_(([0-9]|[a-z]|_)*)(${_op_regex}).*" "\\1" _feature ${_match})
+      STRING(TOUPPER ${_feature} _feature)
+      STRING(REGEX MATCH "(${_op_regex})" _operator ${_match})
+      STRING(REGEX REPLACE "^with_(([0-9]|[a-z]|_)*)(${_op_regex}).*$" "\\3" _operator ${_match})
+      STRING(REGEX MATCH "(on|off|yes|no|true|false)$" _boolean ${_match})
+      STRING(REGEX MATCH "([0-9]+(\\.[0-9]+)*)$" _version ${_match})
+
+      #
+      # Valid feature?
+      #
+      IF(NOT DEFINED DEAL_II_WITH_${_feature})
+        MESSAGE(FATAL_ERROR "
+Invalid feature constraint \"${_match}\" in file
+\"${_comparison}\":
+The feature \"DEAL_II_${_feature}\" does not exist.\n"
+          )
+      ENDIF()
+
+      #
+      # First process simple yes/no feature constraints:
+      #
+      IF(NOT "${_boolean}" STREQUAL "")
+        IF(NOT "${_operator}" STREQUAL "=")
+          MESSAGE(FATAL_ERROR "
+Invalid syntax in constraint \"${_match}\" in file
+\"${_comparison}\":
+Comparison operator \"=\" expected for boolean match.\n"
+            )
+        ENDIF()
+
+        # This is why I hate CMake :-/
+        IF( (DEAL_II_WITH_${_feature} AND NOT ${_boolean}) OR
+            (NOT DEAL_II_WITH_${_feature} AND ${_boolean}) )
+          SET(_define_test FALSE)
+        ENDIF()
+      ENDIF()
+
+      #
+      # Process version constraints:
+      #
+      IF(NOT "${_version}" STREQUAL "")
+
+        IF( ( NOT ${DEAL_II_WITH_${_feature}} ) OR
+            ( "${_operator}" STREQUAL "=" AND
+              NOT "${DEAL_II_${_feature}_VERSION}" VERSION_EQUAL "${_version}" ) OR
+            ( "${_operator}" STREQUAL ".ge." AND
+              NOT "${DEAL_II_${_feature}_VERSION}" VERSION_GREATER "${_version}" ) OR
+            ( "${_operator}" STREQUAL ".le." AND
+              NOT "${DEAL_II_${_feature}_VERSION}" VERSION_LESS "${_version}" ) OR
+            ( "${_operator}" STREQUAL ".geq." AND
+              "${DEAL_II_${_feature}_VERSION}" VERSION_LESS "${_version}" ) OR
+            ( "${_operator}" STREQUAL ".leq." AND
+              "${DEAL_II_${_feature}_VERSION}" VERSION_GREATER "${_version}" ) )
+          SET(_define_test FALSE)
+        ENDIF()
+      ENDIF()
+    ENDFOREACH()
+
+    IF(_define_test)
+      STRING(REGEX REPLACE "\\..*" "" _test ${_test})
+      DEAL_II_ADD_TEST(${_category} ${_test} ${_comparison})
+    ENDIF()
+
+  ENDFOREACH()
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_query_git_information.cmake b/cmake/macros/macro_deal_ii_query_git_information.cmake
new file mode 100644
index 0000000..aa536b9
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_query_git_information.cmake
@@ -0,0 +1,113 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This file implements the DEAL_II_QUERY_GIT_INFORMATION macro, which is
+# part of the deal.II library.
+#
+# Usage:
+#       DEAL_II_QUERY_GIT_INFORMATION()
+#
+# This will try to gather information about current branch, as well as
+# short and long revision. If ${CMAKE_SOURCE_DIR} is the root of a git
+# repository the following variables will be populated:
+#
+#       GIT_BRANCH
+#       GIT_REVISION
+#       GIT_SHORTREV
+#
+# If this macro is called within the deal.II build system the variables are
+# prefixed with DEAL_II_:
+#
+#       DEAL_II_GIT_BRANCH
+#       DEAL_II_GIT_REVISION
+#       DEAL_II_GIT_SHORTREV
+#
+
+MACRO(DEAL_II_QUERY_GIT_INFORMATION)
+
+  MESSAGE(STATUS "Query git repository information.")
+
+  #
+  # If DEAL_II_BASE_NAME is defined and DEAL_II_PROJECT_CONFIG_INCLUDED was
+  # not set, we assume that we are called from within the deal.II build
+  # system. In this case we prepend all variables by "DEAL_II_"
+  #
+  IF( DEFINED DEAL_II_BASE_NAME AND
+      NOT DEFINED DEAL_II_PROJECT_CONFIG_INCLUDED )
+    SET(_prefix "DEAL_II_")
+  ELSE()
+    SET(_prefix "")
+  ENDIF()
+
+  FIND_PACKAGE(Git)
+
+  #
+  # Only run the following if we have git and the source directory seems to
+  # be under version control.
+  #
+  IF(GIT_FOUND AND EXISTS ${CMAKE_SOURCE_DIR}/.git/HEAD)
+    #
+    # Bogus configure_file calls to trigger a reconfigure, and thus an
+    # update of branch and commit information every time HEAD has changed.
+    #
+    CONFIGURE_FILE(
+      ${CMAKE_SOURCE_DIR}/.git/HEAD
+      ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/HEAD
+      )
+    FILE(STRINGS ${CMAKE_SOURCE_DIR}/.git/HEAD _head_ref LIMIT_COUNT 1)
+    STRING(REPLACE "ref: " "" _head_ref ${_head_ref})
+    IF(EXISTS ${CMAKE_SOURCE_DIR}/.git/${_head_ref})
+      CONFIGURE_FILE(
+        ${CMAKE_SOURCE_DIR}/.git/${_head_ref}
+        ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/HEAD_REF
+        )
+    ENDIF()
+
+    #
+    # Query for revision:
+    #
+
+    EXECUTE_PROCESS(
+       COMMAND ${GIT_EXECUTABLE} log -n 1 --pretty=format:"%H %h"
+       WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+       OUTPUT_VARIABLE _info
+       RESULT_VARIABLE _result
+       OUTPUT_STRIP_TRAILING_WHITESPACE
+       )
+    IF(${_result} EQUAL 0)
+      STRING(REGEX REPLACE "^\"([^ ]+) ([^ ]+)\"$"
+        "\\1" ${_prefix}GIT_REVISION "${_info}")
+      STRING(REGEX REPLACE "^\"([^ ]+) ([^ ]+)\"$"
+        "\\2" ${_prefix}GIT_SHORTREV "${_info}")
+    ENDIF()
+
+    #
+    # Query for branch:
+    #
+
+    EXECUTE_PROCESS(
+       COMMAND ${GIT_EXECUTABLE} symbolic-ref HEAD
+       WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+       OUTPUT_VARIABLE _branch
+       RESULT_VARIABLE _result
+       OUTPUT_STRIP_TRAILING_WHITESPACE
+       )
+    IF(${_result} EQUAL 0)
+      STRING(REGEX REPLACE "refs/heads/" "" ${_prefix}GIT_BRANCH "${_branch}")
+    ENDIF()
+  ENDIF()
+
+ENDMACRO()
diff --git a/cmake/macros/macro_deal_ii_setup_target.cmake b/cmake/macros/macro_deal_ii_setup_target.cmake
new file mode 100644
index 0000000..f971888
--- /dev/null
+++ b/cmake/macros/macro_deal_ii_setup_target.cmake
@@ -0,0 +1,124 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This file implements the DEAL_II_SETUP_TARGET macro, which is
+# part of the deal.II library.
+#
+# Usage:
+#       DEAL_II_SETUP_TARGET(target)
+#       DEAL_II_SETUP_TARGET(target DEBUG|RELEASE)
+#
+# This appends necessary include directories, linker flags, compile flags
+# and compile definitions and the deal.II library link interface to the
+# given target. In particular:
+#
+# INCLUDE_DIRECTORIES is appended with
+#   "${DEAL_II_INCLUDE_DIRS}"
+#
+# COMPILE_FLAGS is appended with
+#   "${DEAL_II_CXX_FLAGS} ${DEAL_II_CXX_FLAGS_<build type>}"
+#
+# LINK_FLAGS is appended with
+#   "${DEAL_II_LINKER_FLAGS ${DEAL_II_LINKER_FLAGS_<build type>}"
+#
+# COMPILE_DEFINITIONS is appended with
+#   "${DEAL_II_USER_DEFINITIONS};${DEAL_II_USER_DEFINITIONS_<build type>}"
+#
+# If no "DEBUG" or "RELEASE" keyword is specified after the target, the
+# current CMAKE_BUILD_TYPE determines which compiler and linker flags as
+# well as compile definitions to use and against which deal.II library it
+# should be linked against.
+#
+# If the requested build type is not available (e.g. DEBUG request but
+# deal.II was compiled with release mode only), the other available will be
+# used instead.
+#
+
+MACRO(DEAL_II_SETUP_TARGET _target)
+
+  IF(NOT DEAL_II_PROJECT_CONFIG_INCLUDED)
+    MESSAGE(FATAL_ERROR
+      "\nDEAL_II_SETUP_TARGET can only be called in external projects after "
+      "the inclusion of deal.IIConfig.cmake. It is not intended for "
+      "internal use.\n\n"
+      )
+  ENDIF()
+
+  IF(NOT DEAL_II_TARGET_CONFIG_INCLUDED)
+    INCLUDE(${DEAL_II_TARGET_CONFIG})
+    SET(DEAL_II_TARGET_CONFIG_INCLUDED TRUE)
+  ENDIF()
+
+  # Necessary for setting INCLUDE_DIRECTORIES via SET_PROPERTY
+  CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+  #
+  # Every build type that (case insensitively) matches "debug" is
+  # considered a debug build:
+  #
+  SET(_build "RELEASE")
+  STRING(TOLOWER "${CMAKE_BUILD_TYPE}" _cmake_build_type)
+  IF("${_cmake_build_type}" MATCHES "debug")
+    SET(_build "DEBUG")
+  ENDIF()
+
+  #
+  # Override _on_debug_build if ${ARGN} is set:
+  #
+  IF("${ARGN}" MATCHES "^(DEBUG|RELEASE)$")
+    SET(_build "${ARGN}")
+  ENDIF()
+
+  #
+  # We can only append DEBUG link flags and compile definitions if deal.II
+  # was built with the Debug or DebugRelease build type. So test for this:
+  #
+  IF("${_build}" STREQUAL "DEBUG" AND NOT DEAL_II_BUILD_TYPE MATCHES "Debug")
+    SET(_build "RELEASE")
+  ENDIF()
+
+  SET_PROPERTY(TARGET ${_target} APPEND PROPERTY
+    INCLUDE_DIRECTORIES "${DEAL_II_INCLUDE_DIRS}"
+    )
+  SET_PROPERTY(TARGET ${_target} APPEND_STRING PROPERTY
+    COMPILE_FLAGS "${DEAL_II_CXX_FLAGS} ${DEAL_II_CXX_FLAGS_${_build}}"
+    )
+  SET_PROPERTY(TARGET ${_target} APPEND_STRING PROPERTY
+    LINK_FLAGS " ${DEAL_II_LINKER_FLAGS} ${DEAL_II_LINKER_FLAGS_${_build}}"
+    )
+  SET_PROPERTY(TARGET ${_target} APPEND PROPERTY
+    COMPILE_DEFINITIONS "${DEAL_II_USER_DEFINITIONS};${DEAL_II_USER_DEFINITIONS_${_build}}"
+    )
+
+  #
+  # Set up the link interface:
+  #
+  GET_PROPERTY(_type TARGET ${_target} PROPERTY TYPE)
+  IF(NOT "${_type}" STREQUAL "OBJECT_LIBRARY")
+    TARGET_LINK_LIBRARIES(${_target} ${DEAL_II_TARGET_${_build}})
+  ENDIF()
+
+  #
+  # If DEAL_II_STATIC_EXECUTABLE is set, switch the final link type to
+  # static:
+  #
+  IF(DEAL_II_STATIC_EXECUTABLE)
+    SET_PROPERTY(TARGET ${_target} PROPERTY
+      LINK_SEARCH_END_STATIC TRUE
+      )
+  ENDIF()
+
+ENDMACRO()
diff --git a/cmake/macros/macro_enable_if_links.cmake b/cmake/macros/macro_enable_if_links.cmake
new file mode 100644
index 0000000..fd2aa9b
--- /dev/null
+++ b/cmake/macros/macro_enable_if_links.cmake
@@ -0,0 +1,45 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Tests whether it is possible to compile and link a dummy program with a
+# given flag.
+# If so, add it to variable.
+#
+# Usage:
+#     ENABLE_IF_LINKS(variable flag)
+#
+
+MACRO(ENABLE_IF_LINKS _variable _flag)
+  STRING(STRIP "${_flag}" _flag_stripped)
+  IF(NOT "${_flag_stripped}" STREQUAL "")
+    STRING(REGEX REPLACE "^-" "" _flag_name "${_flag_stripped}")
+    STRING(REPLACE "," "" _flag_name "${_flag_name}")
+    STRING(REPLACE "-" "_" _flag_name "${_flag_name}")
+    STRING(REPLACE "+" "_" _flag_name "${_flag_name}")
+    SET(_backup ${CMAKE_REQUIRED_LIBRARIES})
+    LIST(APPEND CMAKE_REQUIRED_LIBRARIES "${_flag_stripped}")
+    CHECK_CXX_COMPILER_FLAG(
+      ""
+      DEAL_II_HAVE_FLAG_${_flag_name}
+      )
+    SET(CMAKE_REQUIRED_LIBRARIES ${_backup})
+
+    IF(DEAL_II_HAVE_FLAG_${_flag_name})
+      SET(${_variable} "${${_variable}} ${_flag_stripped}")
+      STRING(STRIP "${${_variable}}" ${_variable})
+    ENDIF()
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_enable_if_supported.cmake b/cmake/macros/macro_enable_if_supported.cmake
new file mode 100644
index 0000000..f180dbd
--- /dev/null
+++ b/cmake/macros/macro_enable_if_supported.cmake
@@ -0,0 +1,65 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Tests whether the cxx compiler understands a flag.
+# If so, add it to 'variable'.
+#
+# Usage:
+#     ENABLE_IF_SUPPORTED(variable flag)
+#
+
+MACRO(ENABLE_IF_SUPPORTED _variable _flag)
+  #
+  # Clang is too conservative when reporting unsupported compiler flags.
+  # Therefore, we promote all warnings for an unsupported compiler flag to
+  # actual errors with the -Werror switch:
+  #
+  IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    SET(_werror_string "-Werror ")
+  ELSE()
+    SET(_werror_string "")
+  ENDIF()
+
+  STRING(STRIP "${_flag}" _flag_stripped)
+  SET(_flag_stripped_orig "${_flag_stripped}")
+
+  #
+  # Gcc does not emit a warning if testing -Wno-... flags which leads to
+  # false positive detection. Unfortunately it later warns that an unknown
+  # warning option is used if another warning is emitted in the same
+  # compilation unit.
+  # Therefore we invert the test for -Wno-... flags:
+  #
+  IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+    STRING(REPLACE "-Wno-" "-W" _flag_stripped "${_flag_stripped}")
+  ENDIF()
+
+  IF(NOT "${_flag_stripped}" STREQUAL "")
+    STRING(REGEX REPLACE "^-" "" _flag_name "${_flag_stripped}")
+    STRING(REPLACE "," "" _flag_name "${_flag_name}")
+    STRING(REPLACE "-" "_" _flag_name "${_flag_name}")
+    STRING(REPLACE "+" "_" _flag_name "${_flag_name}")
+    CHECK_CXX_COMPILER_FLAG(
+      "${_werror_string}${_flag_stripped}"
+      DEAL_II_HAVE_FLAG_${_flag_name}
+      )
+    IF(DEAL_II_HAVE_FLAG_${_flag_name})
+      SET(${_variable} "${${_variable}} ${_flag_stripped_orig}")
+      STRING(STRIP "${${_variable}}" ${_variable})
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
diff --git a/cmake/macros/macro_enable_language_optional.cmake b/cmake/macros/macro_enable_language_optional.cmake
new file mode 100644
index 0000000..c774e88
--- /dev/null
+++ b/cmake/macros/macro_enable_language_optional.cmake
@@ -0,0 +1,77 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+#
+# Test whether a usable language compiler is available and if yes, call
+# ENABLE_LANGUAGE(language)
+#
+# This works around a severe bug [1] in
+#
+#   ENABLE_LANGUAGE(Fortran OPTIONAL)
+#
+# [1] http://public.kitware.com/Bug/view.php?id=9220
+#
+# Usage:
+#     ENABLE_LANGUAGE_FORTRAN_OPTIONAL(language)
+#
+# where language is either C or Fortran
+#
+
+MACRO(ENABLE_LANGUAGE_OPTIONAL _language)
+  IF(NOT ${_language}_CHECKED)
+    #
+    # Run this check exactly once:
+    #
+    SET(${_language}_CHECKED TRUE CACHE INTERNAL "" FORCE)
+
+    SET(_tmp ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/${_language}_test)
+    file(REMOVE ${_tmp})
+
+    IF(DEFINED CMAKE_${_language}_COMPILER)
+      SET(_hint "-DCMAKE_${_language}_COMPILER=${CMAKE_${_language}_COMPILER}")
+    ENDIF()
+
+    FILE(WRITE ${_tmp}/CMakeLists.txt
+      "PROJECT(foobar ${_language})"
+      )
+
+    IF(NOT "${CMAKE_TOOLCHAIN_FILE}" STREQUAL "")
+      LIST(APPEND _hint "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
+    ENDIF()
+
+    EXECUTE_PROCESS(
+      COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${_hint} .
+      WORKING_DIRECTORY ${_tmp}
+      RESULT_VARIABLE _result
+      OUTPUT_QUIET
+      ERROR_QUIET
+      )
+
+    IF("${_result}" STREQUAL "0")
+      SET(DEAL_II_${_language}_COMPILER_WORKS TRUE CACHE INTERNAL "" FORCE)
+      ENABLE_LANGUAGE(${_language})
+    ELSE()
+      MESSAGE(STATUS "No working ${_language} compiler found, disabling ${_language}")
+    ENDIF()
+  ELSE()
+    #
+    # Enable the language depending on the cached result from a former run:
+    #
+    IF(DEAL_II_${_language}_COMPILER_WORKS)
+      ENABLE_LANGUAGE(${_language})
+    ENDIF()
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_expand_instantiations.cmake b/cmake/macros/macro_expand_instantiations.cmake
new file mode 100644
index 0000000..d6bb62a
--- /dev/null
+++ b/cmake/macros/macro_expand_instantiations.cmake
@@ -0,0 +1,78 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A macro for the inst.in file expansion
+#
+# Usage:
+#     EXPAND_INSTANTATIONS(target inst_in_files)
+#
+# Options:
+#
+# target
+#
+#    where target.${build_type} will depend on the generation of all .inst
+#    files, to ensure that all .inst files are generated prior to
+#    compiling.
+#
+# inst_in_files
+#
+#    a list of inst.in files that will be expanded
+#
+
+MACRO(EXPAND_INSTANTIATIONS _target _inst_in_files)
+
+  FOREACH (_inst_in_file ${_inst_in_files})
+    STRING(REGEX REPLACE "\\.in$" "" _inst_file "${_inst_in_file}" )
+
+    IF(NOT CMAKE_CROSSCOMPILING)
+      SET(_command expand_instantiations_exe)
+      SET(_dependency expand_instantiations_exe)
+    ELSE()
+      SET(_command expand_instantiations)
+      SET(_dependency)
+    ENDIF()
+
+    ADD_CUSTOM_COMMAND(
+      OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_inst_file}
+      DEPENDS ${_dependency}
+              ${CMAKE_BINARY_DIR}/${DEAL_II_SHARE_RELDIR}/template-arguments
+              ${CMAKE_CURRENT_SOURCE_DIR}/${_inst_in_file}
+      COMMAND ${_command}
+      ARGS ${CMAKE_BINARY_DIR}/${DEAL_II_SHARE_RELDIR}/template-arguments
+           < ${CMAKE_CURRENT_SOURCE_DIR}/${_inst_in_file}
+           > ${CMAKE_CURRENT_BINARY_DIR}/${_inst_file}
+      )
+
+    LIST(APPEND _inst_targets ${CMAKE_CURRENT_BINARY_DIR}/${_inst_file})
+  ENDFOREACH()
+
+  #
+  # Define a custom target that depends on the generation of all inst.in
+  # files.
+  #
+  ADD_CUSTOM_TARGET(${_target}.inst ALL DEPENDS ${_inst_targets})
+
+  #
+  # Add a dependency to all target.${build_type} so that target.inst is
+  # fully generated before target will be processed.
+  #
+  FOREACH(_build ${DEAL_II_BUILD_TYPES})
+    STRING(TOLOWER ${_build} _build_lowercase)
+    ADD_DEPENDENCIES(${_target}.${_build_lowercase} ${_target}.inst)
+  ENDFOREACH()
+
+ENDMACRO()
+
diff --git a/cmake/macros/macro_filter_system_libraries.cmake b/cmake/macros/macro_filter_system_libraries.cmake
new file mode 100644
index 0000000..4261e49
--- /dev/null
+++ b/cmake/macros/macro_filter_system_libraries.cmake
@@ -0,0 +1,42 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This macro replaces absolute paths to system libraries with the
+# corresponding short name within the FEATURE_LIBRARIES(|_DEBUG|_RELEASE)
+# variables
+#
+# Usage:
+#     FILTER_SYSTEM_LIBRARIES(feature)
+#
+
+MACRO(FILTER_SYSTEM_LIBRARIES _feature)
+  FOREACH(_variable
+    ${_feature}_LIBRARIES
+    ${_feature}_LIBRARIES_DEBUG
+    ${_feature}_LIBRARIES_RELEASE
+    )
+    IF(DEFINED ${_variable})
+      SET(_tmp_${_variable} ${${_variable}})
+      SET(${_variable} "")
+      FOREACH(_lib ${_tmp_${_variable}})
+        IF(_lib MATCHES "lib(bfd|c|dl|gfortran|iberty|m|nsl|opcodes|pthread|quadmath|rt)\\.(a|so)$")
+          string(REGEX REPLACE ".*lib([a-z]+).so$" "\\1" _lib ${_lib})
+        ENDIF()
+        LIST(APPEND ${_variable} ${_lib})
+      ENDFOREACH()
+    ENDIF()
+  ENDFOREACH()
+ENDMACRO()
diff --git a/cmake/macros/macro_find_package.cmake b/cmake/macros/macro_find_package.cmake
new file mode 100644
index 0000000..7b5cb1a
--- /dev/null
+++ b/cmake/macros/macro_find_package.cmake
@@ -0,0 +1,33 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small wrapper around FIND_PACKAGE.
+# We guard the invocation of FIND_PACKAGE(package <...>) by
+# ${package}_FOUND and ${package}_LIBRARIES to allow easy custom overrides
+#
+
+MACRO(FIND_PACKAGE _package_name)
+  STRING(TOUPPER ${_package_name} _package_name_uppercase)
+
+  IF( NOT DEFINED ${_package_name_uppercase}_FOUND AND
+      NOT DEFINED ${_package_name_uppercase}_LIBRARIES )
+    _FIND_PACKAGE (${_package_name} ${ARGN})
+  ELSE()
+    IF(NOT DEFINED ${_package_name_uppercase}_FOUND)
+      SET(${_package_name_uppercase}_FOUND TRUE)
+    ENDIF()
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_find_system_library.cmake b/cmake/macros/macro_find_system_library.cmake
new file mode 100644
index 0000000..e17bb28
--- /dev/null
+++ b/cmake/macros/macro_find_system_library.cmake
@@ -0,0 +1,49 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Search for a system library. In contrast to normal libraries we do this
+# purely via "-l<library name>" instead of selecting a full library path..
+#
+# USAGE:
+#   FIND_SYSTEM_LIBRARY(variable NAMES [list of possible names])
+#
+
+MACRO(FIND_SYSTEM_LIBRARY)
+  SET(_argn ${ARGN})
+  LIST(GET _argn 0 _variable)
+  LIST(REMOVE_AT _argn 0 1)
+
+  if("${_variable}" MATCHES "^${_variable}$")
+    FOREACH(_arg ${_argn})
+      LIST(APPEND CMAKE_REQUIRED_LIBRARIES "-l${_arg}")
+      CHECK_CXX_COMPILER_FLAG("" ${_variable})
+      RESET_CMAKE_REQUIRED()
+
+      IF(${_variable})
+        UNSET(${_variable} CACHE)
+        SET(${_variable} ${_arg} CACHE STRING "A system library.")
+        SET(${_variable} ${_arg})
+        BREAK()
+      ELSE()
+        UNSET(${_variable} CACHE)
+      ENDIF()
+    ENDFOREACH()
+
+    IF(NOT ${_variable})
+      SET(${_variable} "${_variable}-NOTFOUND")
+    ENDIF()
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_item_matches.cmake b/cmake/macros/macro_item_matches.cmake
new file mode 100644
index 0000000..de973c1
--- /dev/null
+++ b/cmake/macros/macro_item_matches.cmake
@@ -0,0 +1,34 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small macro to test whether a given list contains an element.
+#
+# Usage:
+#     ITEM_MATCHES(var regex list)
+#
+# var is set to true if list contains an item that matches regex.
+#
+
+MACRO(ITEM_MATCHES _var _regex)
+  SET(${_var})
+  FOREACH (_item ${ARGN})
+    IF("${_item}" MATCHES ${_regex})
+      SET(${_var} TRUE)
+      BREAK()
+    ENDIF()
+  ENDFOREACH()
+ENDMACRO()
+
diff --git a/cmake/macros/macro_purge_feature.cmake b/cmake/macros/macro_purge_feature.cmake
new file mode 100644
index 0000000..6614d7f
--- /dev/null
+++ b/cmake/macros/macro_purge_feature.cmake
@@ -0,0 +1,47 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Remove all cached and non cached variables associated with a feature.
+#
+# Usage:
+#     PURGE_FEATURE(feature)
+#
+
+MACRO(PURGE_FEATURE _feature)
+  #
+  # uncached:
+  #
+  FOREACH(_var ${DEAL_II_LIST_SUFFIXES} ${DEAL_II_STRING_SUFFIXES})
+    IF(NOT _var MATCHES BUNDLED)
+      SET(${_feature}_${_var})
+    ENDIF()
+  ENDFOREACH()
+
+  UNSET(${_feature}_FOUND)
+  UNSET(${_feature}_VERSION)
+
+  #
+  # cached:
+  #
+  FOREACH(_var ${${_feature}_CLEAR_VARIABLES})
+    SET(${_var})
+    UNSET(${_var} CACHE)
+  ENDFOREACH()
+
+  UNSET(${_feature}_CLEAR_VARIABLES CACHE)
+
+  MARK_AS_ADVANCED(CLEAR ${_feature}_DIR ${_feature}_ARCH)
+ENDMACRO()
diff --git a/cmake/macros/macro_push_cmake_required.cmake b/cmake/macros/macro_push_cmake_required.cmake
new file mode 100644
index 0000000..1101a56
--- /dev/null
+++ b/cmake/macros/macro_push_cmake_required.cmake
@@ -0,0 +1,30 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small macro used in the platform checks to easily add a flag to
+# CMAKE_REQUIRED_FLAGS
+#
+# Usage:
+#     PUSH_CMAKE_REQUIRED("flag")
+#
+
+MACRO(PUSH_CMAKE_REQUIRED _flag)
+
+  SET(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${_flag}")
+  STRING(STRIP "${CMAKE_REQUIRED_FLAGS}" CMAKE_REQUIRED_FLAGS)
+
+ENDMACRO()
+
diff --git a/cmake/macros/macro_register_feature.cmake b/cmake/macros/macro_register_feature.cmake
new file mode 100644
index 0000000..9a83467
--- /dev/null
+++ b/cmake/macros/macro_register_feature.cmake
@@ -0,0 +1,72 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This macro is used for the feature configuration in deal.II. It adds
+# individual FEATURE_* configuration variables to the corresponding
+# DEAL_II_* variables
+#
+# Usage:
+#     REGISTER_FEATURE(feature)
+#
+# This macro will add
+#
+#   <FEATURE>_LIBRARIES (respecting general, optimized, debug keyword)
+#
+# and all other suffixes defined in DEAL_II_LIST_SUFFIXES and
+# DEAL_II_STRING_SUFFIXES to the corresponding DEAL_II_* variables
+#
+
+MACRO(REGISTER_FEATURE _feature)
+
+  IF(DEFINED ${_feature}_LIBRARIES)
+    #
+    # Add ${_feature}_LIBRARIES to
+    #   DEAL_II_LIBRARIES
+    #   DEAL_II_LIBRARIES_DEBUG
+    #   DEAL_II_LIBRARIES_RELEASE
+    # depending on the "optimized", "debug" or "general" keyword
+    #
+    SET(_toggle "general")
+    FOREACH(_tmp ${${_feature}_LIBRARIES})
+      IF( "${_tmp}" STREQUAL "debug" OR
+          "${_tmp}" STREQUAL "optimized" OR
+          "${_tmp}" STREQUAL "general" )
+        SET(_toggle "${_tmp}")
+      ELSE()
+        IF("${_toggle}" STREQUAL "general")
+          LIST(APPEND DEAL_II_LIBRARIES ${_tmp})
+        ELSEIF("${_toggle}" STREQUAL "debug")
+          LIST(APPEND DEAL_II_LIBRARIES_DEBUG ${_tmp})
+        ELSEIF("${_toggle}" STREQUAL "optimized")
+          LIST(APPEND DEAL_II_LIBRARIES_RELEASE ${_tmp})
+        ENDIF()
+      ENDIF()
+    ENDFOREACH()
+  ENDIF()
+
+  FOREACH(_var ${DEAL_II_LIST_SUFFIXES})
+    IF(NOT "${_var}" STREQUAL "LIBRARIES" AND DEFINED ${_feature}_${_var})
+      LIST(APPEND DEAL_II_${_var} ${${_feature}_${_var}})
+    ENDIF()
+  ENDFOREACH()
+
+  FOREACH(_var ${DEAL_II_STRING_SUFFIXES})
+    IF(DEFINED ${_feature}_${_var})
+      ADD_FLAGS(DEAL_II_${_var} "${${_feature}_${_var}}")
+    ENDIF()
+  ENDFOREACH()
+
+ENDMACRO()
diff --git a/cmake/macros/macro_remove_duplicates.cmake b/cmake/macros/macro_remove_duplicates.cmake
new file mode 100644
index 0000000..c9c7d58
--- /dev/null
+++ b/cmake/macros/macro_remove_duplicates.cmake
@@ -0,0 +1,34 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Remove duplicate entries from a list. Optionally do this in reverse
+# order, keeping the rightmost element
+#
+# Usage:
+#     REMOVE_DUPLICATES(list [REVERSE])
+#
+
+MACRO(REMOVE_DUPLICATES _list)
+  IF(NOT "${${_list}}" STREQUAL "")
+    IF("${ARGN}" STREQUAL "REVERSE")
+      LIST(REVERSE ${_list})
+    ENDIF()
+    LIST(REMOVE_DUPLICATES ${_list})
+    IF("${ARGN}" STREQUAL "REVERSE")
+      LIST(REVERSE ${_list})
+    ENDIF()
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_replace_flag.cmake b/cmake/macros/macro_replace_flag.cmake
new file mode 100644
index 0000000..80eb4f4
--- /dev/null
+++ b/cmake/macros/macro_replace_flag.cmake
@@ -0,0 +1,36 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Replace all occurrences of "${flag}" with "${replacement}" in the string
+# variable.
+#
+# Usage:
+#     REPLACE_FLAG(variable flag replacement)
+#
+
+MACRO(REPLACE_FLAG _variable _flag _replacement)
+  STRING(STRIP "${_replacement}" _replacement_stripped)
+  STRING(REPLACE " " "  " ${_variable} "${${_variable}}")
+  SET(${_variable} " ${${_variable}} ")
+  STRING(REPLACE " " "  " _flag2 "${_flag}")
+  IF(NOT "${_replacement_stripped}" STREQUAL "")
+    STRING(REPLACE " ${_flag2} " " ${_replacement_stripped} " ${_variable} "${${_variable}}")
+  ELSE()
+    STRING(REPLACE " ${_flag2} " " " ${_variable} "${${_variable}}")
+  ENDIF()
+  STRING(REPLACE "  " " " ${_variable} "${${_variable}}")
+  STRING(STRIP "${${_variable}}" ${_variable})
+ENDMACRO()
diff --git a/cmake/macros/macro_reset_cmake_required.cmake b/cmake/macros/macro_reset_cmake_required.cmake
new file mode 100644
index 0000000..82b3192
--- /dev/null
+++ b/cmake/macros/macro_reset_cmake_required.cmake
@@ -0,0 +1,29 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small macro to reset the CMAKE_REQUIRED_* variables to its default
+# values
+#
+# Usage:
+#     RESET_CMAKE_REQUIRED_FLAGS
+#
+
+MACRO(RESET_CMAKE_REQUIRED)
+  SET(CMAKE_REQUIRED_FLAGS ${DEAL_II_CXX_FLAGS_SAVED})
+  SET(CMAKE_REQUIRED_INCLUDES)
+  SET(CMAKE_REQUIRED_LIBRARIES ${DEAL_II_LINKER_FLAGS_SAVED})
+ENDMACRO()
+
diff --git a/cmake/macros/macro_set_if_empty.cmake b/cmake/macros/macro_set_if_empty.cmake
new file mode 100644
index 0000000..05e0aeb
--- /dev/null
+++ b/cmake/macros/macro_set_if_empty.cmake
@@ -0,0 +1,23 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# If 'variable' is empty it will be set to 'value'
+#
+MACRO(SET_IF_EMPTY _variable)
+  IF("${${_variable}}" STREQUAL "")
+    SET(${_variable} ${ARGN})
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_strip_flag.cmake b/cmake/macros/macro_strip_flag.cmake
new file mode 100644
index 0000000..5ac2e6b
--- /dev/null
+++ b/cmake/macros/macro_strip_flag.cmake
@@ -0,0 +1,31 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Remove all occurrences of "${flag}" in the string variable.
+#
+# Usage:
+#     STRIP_FLAG(variable flag)
+#
+
+MACRO(STRIP_FLAG _variable _flag)
+  STRING(REPLACE " " "  " ${_variable} "${${_variable}}")
+  SET(${_variable} " ${${_variable}} ")
+  STRING(REPLACE " " "  " _flag2 "${_flag}")
+  STRING(REPLACE " ${_flag2} " " " ${_variable} "${${_variable}}")
+  STRING(REPLACE "  " " " ${_variable} "${${_variable}}")
+  STRING(STRIP "${${_variable}}" ${_variable})
+ENDMACRO()
+
diff --git a/cmake/macros/macro_switch_library_preference.cmake b/cmake/macros/macro_switch_library_preference.cmake
new file mode 100644
index 0000000..499b78c
--- /dev/null
+++ b/cmake/macros/macro_switch_library_preference.cmake
@@ -0,0 +1,32 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This macro toggles the preference for static/shared libraries if
+# DEAL_II_PREFER_STATIC_LIBS=TRUE but the final executable will still be
+# dynamically linked, i.e. DEAL_II_STATIC_EXECUTABLE=OFF
+#
+# Usage:
+#     SWITCH_LIBRARY_PREFERENCE()
+#
+
+MACRO(SWITCH_LIBRARY_PREFERENCE)
+  IF(DEAL_II_PREFER_STATIC_LIBS AND NOT DEAL_II_STATIC_EXECUTABLE)
+    #
+    # Invert the search order for libraries when DEAL_II_PREFER_STATIC_LIBS
+    # is set. This will prefer static archives instead of shared libraries:
+    LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
+  ENDIF()
+ENDMACRO()
diff --git a/cmake/macros/macro_to_string.cmake b/cmake/macros/macro_to_string.cmake
new file mode 100644
index 0000000..4565380
--- /dev/null
+++ b/cmake/macros/macro_to_string.cmake
@@ -0,0 +1,30 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small macro used for converting a list into a space
+# separated string:
+#
+# Usage:
+#     TO_STRING(string ${list1} ${list2} ...)
+#
+
+MACRO(TO_STRING _variable)
+  SET(${_variable} "")
+  FOREACH(_var  ${ARGN})
+    SET(${_variable} "${${_variable}} ${_var}")
+  ENDFOREACH()
+  STRING(STRIP "${${_variable}}" ${_variable})
+ENDMACRO()
diff --git a/cmake/macros/macro_to_string_and_add_prefix.cmake b/cmake/macros/macro_to_string_and_add_prefix.cmake
new file mode 100644
index 0000000..709e0ca
--- /dev/null
+++ b/cmake/macros/macro_to_string_and_add_prefix.cmake
@@ -0,0 +1,31 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A small macro used for converting a cmake list into a space
+# separated string. This macro adds the string "prefix" in front of each
+# element of the list.
+#
+# Usage:
+#     TO_STRING_AND_ADD_PREFIX(string "prefix" ${list1} ${list2} ...)
+#
+
+MACRO(TO_STRING_AND_ADD_PREFIX _variable _prefix)
+  SET(${_variable} "")
+  FOREACH(_var ${ARGN})
+    SET(${_variable} "${${_variable}} ${_prefix}${_var}")
+  ENDFOREACH()
+  STRING(STRIP "${${_variable}}" ${_variable})
+ENDMACRO()
diff --git a/cmake/macros/macro_verbose_include.cmake b/cmake/macros/macro_verbose_include.cmake
new file mode 100644
index 0000000..58790cf
--- /dev/null
+++ b/cmake/macros/macro_verbose_include.cmake
@@ -0,0 +1,20 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+MACRO(VERBOSE_INCLUDE _file)
+  MESSAGE(STATUS "")
+  MESSAGE(STATUS "Include ${_file}")
+  INCLUDE(${_file})
+ENDMACRO()
diff --git a/cmake/modules/FindARPACK.cmake b/cmake/modules/FindARPACK.cmake
new file mode 100644
index 0000000..7d923e5
--- /dev/null
+++ b/cmake/modules/FindARPACK.cmake
@@ -0,0 +1,87 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the ARPACK library
+#
+# This module exports
+#
+#   ARPACK_LIBRARIES
+#   ARPACK_LINKER_FLAGS
+#   ARPACK_WITH_PARPACK
+#
+
+SET(ARPACK_DIR "" CACHE PATH "An optional hint to an ARPACK installation")
+SET_IF_EMPTY(ARPACK_DIR "$ENV{ARPACK_DIR}")
+
+DEAL_II_FIND_LIBRARY(ARPACK_LIBRARY
+  NAMES arpack
+  HINTS ${ARPACK_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+
+IF(DEAL_II_WITH_MPI)
+  #
+  # Sanity check: Only search the parpack library in the same directory as
+  # the arpack library...
+  #
+  GET_FILENAME_COMPONENT(_path "${ARPACK_LIBRARY}" PATH)
+  DEAL_II_FIND_LIBRARY(PARPACK_LIBRARY
+    NAMES parpack
+    HINTS ${_path}
+    NO_DEFAULT_PATH
+    NO_CMAKE_ENVIRONMENT_PATH
+    NO_CMAKE_PATH
+    NO_SYSTEM_ENVIRONMENT_PATH
+    NO_CMAKE_SYSTEM_PATH
+    NO_CMAKE_FIND_ROOT_PATH
+    )
+ELSE()
+  SET(PARPACK_LIBRARY "PARPACK_LIBRARY-NOTFOUND")
+ENDIF()
+
+IF(NOT DEAL_II_ARPACK_WITH_PARPACK)
+  #
+  # We have to avoid an unfortunate symbol clash with "libscalapack.so" -
+  # arpack happened to blindly copy a symbol name...
+  #   https://github.com/opencollab/arpack-ng/issues/18
+  #   https://github.com/opencollab/arpack-ng/pull/21
+  #
+  # Just disable parpack support if scalapack is present in Trilinos' or
+  # PETSc's link interface. This can be overridden by manually setting
+  # DEAL_II_ARPACK_WITH_PARPACK to true.
+  #
+  FOREACH(_libraries ${TRILINOS_LIBRARIES} ${PETSC_LIBRARIES})
+    IF("${_libraries}" MATCHES "scalapack")
+      SET(PARPACK_LIBRARY "PARPACK_LIBRARY-NOTFOUND")
+    ENDIF()
+  ENDFOREACH()
+ENDIF()
+
+
+IF(NOT PARPACK_LIBRARY MATCHES "-NOTFOUND")
+  SET(ARPACK_WITH_PARPACK TRUE)
+ELSE()
+  SET(ARPACK_WITH_PARPACK FALSE)
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(ARPACK
+  LIBRARIES
+    OPTIONAL PARPACK_LIBRARY
+    REQUIRED ARPACK_LIBRARY LAPACK_LIBRARIES
+    OPTIONAL MPI_C_LIBRARIES
+  LINKER_FLAGS OPTIONAL LAPACK_LINKER_FLAGS
+  CLEAR ARPACK_LIBRARY PARPACK_LIBRARY
+  )
diff --git a/cmake/modules/FindBOOST.cmake b/cmake/modules/FindBOOST.cmake
new file mode 100644
index 0000000..632483f
--- /dev/null
+++ b/cmake/modules/FindBOOST.cmake
@@ -0,0 +1,91 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the boost libraries
+#
+# This module exports:
+#
+#     BOOST_FOUND
+#     BOOST_LIBRARIES
+#     BOOST_INCLUDE_DIRS
+#     BOOST_VERSION
+#     BOOST_VERSION_MAJOR
+#     BOOST_VERSION_MINOR
+#     BOOST_VERSION_SUBMINOR
+#
+
+SET(BOOST_DIR "" CACHE PATH "An optional hint to a BOOST installation")
+SET_IF_EMPTY(BOOST_DIR "$ENV{BOOST_DIR}")
+
+IF(NOT "${BOOST_DIR}" STREQUAL "")
+  SET(BOOST_ROOT "${BOOST_DIR}")
+ENDIF()
+
+#
+# Prefer static libs if BUILD_SHARED_LIBS=OFF:
+#
+IF(NOT BUILD_SHARED_LIBS)
+  SET(Boost_USE_STATIC_LIBS TRUE)
+ENDIF()
+
+# temporarily disable ${CMAKE_SOURCE_DIR}/cmake/modules for module lookup
+LIST(REMOVE_ITEM CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+FIND_PACKAGE(Boost 1.48 COMPONENTS iostreams serialization system thread)
+LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+
+#
+# Fall back to dynamic libraries if no static libraries could be found:
+#
+IF(NOT Boost_FOUND AND Boost_USE_STATIC_LIBS)
+  SET(Boost_USE_STATIC_LIBS FALSE)
+
+  # temporarily disable ${CMAKE_SOURCE_DIR}/cmake/modules for module lookup
+  LIST(REMOVE_ITEM CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+  FIND_PACKAGE(Boost 1.48 COMPONENTS iostreams serialization system thread)
+  LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+ENDIF()
+
+
+
+IF(Boost_FOUND)
+  #
+  # Remove "pthread" from Boost_LIBRARIES. Threading, if necessary, is
+  # already set up via configure_1_threads.cmake.
+  #
+  LIST(REMOVE_ITEM Boost_LIBRARIES "pthread")
+
+  SET(BOOST_VERSION_MAJOR "${Boost_MAJOR_VERSION}")
+  SET(BOOST_VERSION_MINOR "${Boost_MINOR_VERSION}")
+  SET(BOOST_VERSION_SUBMINOR "${Boost_SUBMINOR_VERSION}")
+  SET(BOOST_VERSION
+    "${BOOST_VERSION_MAJOR}.${BOOST_VERSION_MINOR}.${BOOST_VERSION_SUBMINOR}"
+    )
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(BOOST
+  LIBRARIES REQUIRED Boost_LIBRARIES
+  INCLUDE_DIRS REQUIRED Boost_INCLUDE_DIRS
+  USER_INCLUDE_DIRS Boost_INCLUDE_DIRS
+  CLEAR
+    Boost_DIR Boost_INCLUDE_DIR Boost_IOSTREAMS_LIBRARY_DEBUG
+    Boost_IOSTREAMS_LIBRARY_RELEASE Boost_LIBRARY_DIR
+    Boost_SERIALIZATION_LIBRARY_DEBUG Boost_SERIALIZATION_LIBRARY_RELEASE
+    Boost_SYSTEM_LIBRARY_DEBUG Boost_SYSTEM_LIBRARY_RELEASE
+    Boost_THREAD_LIBRARY_DEBUG Boost_THREAD_LIBRARY_RELEASE
+    Boost_LIBRARY_DIR_DEBUG Boost_LIBRARY_DIR_RELEASE
+    _Boost_COMPONENTS_SEARCHED _Boost_INCLUDE_DIR_LAST
+    _Boost_LIBRARY_DIR_LAST _Boost_USE_MULTITHREADED_LAST
+  )
diff --git a/cmake/modules/FindBZIP2.cmake b/cmake/modules/FindBZIP2.cmake
new file mode 100644
index 0000000..e09106c
--- /dev/null
+++ b/cmake/modules/FindBZIP2.cmake
@@ -0,0 +1,46 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the BZIP2 library
+#
+# This module exports
+#
+#   BZIP2_LIBRARIES
+#   BZIP2_INCLUDE_DIRS
+#   BZIP2_VERSION
+#
+
+#
+# Houston, we have a problem: CMake ships its own FindBZip2.cmake module.
+# Unfortunately we want to call DEAL_II_PACKAGE_HANDLE. Therefore, use the
+# original find module and do a dummy call to DEAL_II_PACKAGE_HANDLE:
+#
+
+# temporarily disable ${CMAKE_SOURCE_DIR}/cmake/modules for module lookup
+LIST(REMOVE_ITEM CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+FIND_PACKAGE(BZip2)
+LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+
+SET(BZIP2_VERSION ${BZIP2_VERSION_STRING})
+SET(_bzip2_libraries ${BZIP2_LIBRARIES})
+
+DEAL_II_PACKAGE_HANDLE(BZIP2
+  LIBRARIES REQUIRED _bzip2_libraries
+  INCLUDE_DIRS REQUIRED BZIP2_INCLUDE_DIR
+  CLEAR
+    BZIP2_INCLUDE_DIR BZIP2_LIBRARY_DEBUG BZIP2_LIBRARY_RELEASE
+    BZIP2_NEED_PREFIX
+  )
diff --git a/cmake/modules/FindHDF5.cmake b/cmake/modules/FindHDF5.cmake
new file mode 100644
index 0000000..d87526d
--- /dev/null
+++ b/cmake/modules/FindHDF5.cmake
@@ -0,0 +1,72 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the HDF5 library
+#
+# This module exports
+#
+#   HDF5_LIBRARIES
+#   HDF5_INCLUDE_DIRS
+#   HDF5_WITH_MPI
+#
+
+SET(HDF5_DIR "" CACHE PATH "An optional hint to an hdf5 directory")
+SET_IF_EMPTY(HDF5_DIR "$ENV{HDF5_DIR}")
+
+DEAL_II_FIND_PATH(HDF5_INCLUDE_DIR hdf5.h
+  HINTS ${HDF5_DIR}
+  PATH_SUFFIXES hdf5 hdf5/include include/hdf5 include
+  )
+
+DEAL_II_FIND_LIBRARY(HDF5_LIBRARY NAMES hdf5
+  HINTS ${HDF5_DIR}
+  PATH_SUFFIXES hdf5/lib lib${LIB_SUFFIX} lib64 lib
+  )
+
+DEAL_II_FIND_LIBRARY(HDF5_HL_LIBRARY NAMES hdf5_hl
+  HINTS ${HDF5_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+
+DEAL_II_FIND_FILE(HDF5_PUBCONF NAMES H5pubconf.h H5pubconf-64.h
+  HINTS ${HDF5_INCLUDE_DIR} ${HDF5_DIR}
+  PATH_SUFFIXES hdf5 hdf5/include include/hdf5 include
+  )
+
+IF(EXISTS ${HDF5_PUBCONF})
+  #
+  # Is hdf5 compiled with support for mpi?
+  #
+  FILE(STRINGS ${HDF5_PUBCONF} HDF5_MPI_STRING
+    REGEX "#define.*H5_HAVE_PARALLEL 1"
+    )
+  IF("${HDF5_MPI_STRING}" STREQUAL "")
+    SET(HDF5_WITH_MPI FALSE)
+  ELSE()
+    SET(HDF5_WITH_MPI TRUE)
+  ENDIF()
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(HDF5
+  LIBRARIES
+    REQUIRED HDF5_HL_LIBRARY HDF5_LIBRARY
+    OPTIONAL MPI_C_LIBRARIES
+  INCLUDE_DIRS
+    REQUIRED HDF5_INCLUDE_DIR
+  USER_INCLUDE_DIRS
+    REQUIRED HDF5_INCLUDE_DIR
+  CLEAR HDF5_HL_LIBRARY HDF5_LIBRARY HDF5_INCLUDE_DIR HDF5_PUBCONF
+  )
diff --git a/cmake/modules/FindLAPACK.cmake b/cmake/modules/FindLAPACK.cmake
new file mode 100644
index 0000000..001e5ce
--- /dev/null
+++ b/cmake/modules/FindLAPACK.cmake
@@ -0,0 +1,91 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This module is a wrapper around the FindLAPACK.cmake module provided by
+# CMake.
+#
+# This module exports
+#
+#   LAPACK_FOUND
+#   LAPACK_LIBRARIES
+#   LAPACK_LINKER_FLAGS
+#   BLAS_FOUND
+#   BLAS_LIBRARIES
+#   BLAS_LINKER_FLAGS
+#
+
+#
+# We have to use a trick with CMAKE_PREFIX_PATH to make LAPACK_DIR and
+# BLAS_DIR work...
+#
+SET(LAPACK_DIR "" CACHE PATH "An optional hint to a LAPACK installation")
+SET(BLAS_DIR "" CACHE PATH "An optional hint to a BLAS installation")
+SET_IF_EMPTY(BLAS_DIR "$ENV{BLAS_DIR}")
+SET_IF_EMPTY(LAPACK_DIR "$ENV{LAPACK_DIR}")
+
+SET(_cmake_prefix_path_backup "${CMAKE_PREFIX_PATH}")
+
+# temporarily disable ${CMAKE_SOURCE_DIR}/cmake/modules for module lookup
+LIST(REMOVE_ITEM CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+
+SET(CMAKE_PREFIX_PATH ${BLAS_DIR} ${LAPACK_DIR} ${_cmake_prefix_path_backup})
+FIND_PACKAGE(BLAS)
+
+SET(CMAKE_PREFIX_PATH ${LAPACK_DIR} ${_cmake_prefix_path_backup})
+FIND_PACKAGE(LAPACK)
+
+SET(CMAKE_PREFIX_PATH ${_cmake_prefix_path_backup})
+LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+
+#
+# Filter out spurious "FALSE" in the library lists:
+#
+IF(DEFINED BLAS_LIBRARIES)
+  LIST(REMOVE_ITEM BLAS_LIBRARIES "FALSE")
+ENDIF()
+IF(DEFINED LAPACK_LIBRARIES)
+  LIST(REMOVE_ITEM LAPACK_LIBRARIES "FALSE")
+ENDIF()
+
+#
+# Well, in case of static archives we have to manually pick up the
+# complete link interface. *sigh*
+#
+# If CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES is not available, do it
+# unconditionally for the most common case (gfortran).
+#
+SET(_fortran_libs ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
+SET_IF_EMPTY(_fortran_libs gfortran m quadmath c)
+FOREACH(_lib ${_fortran_libs})
+  FIND_SYSTEM_LIBRARY(${_lib}_LIBRARY NAMES ${_lib})
+  LIST(APPEND _additional_libraries ${_lib}_LIBRARY)
+ENDFOREACH()
+
+
+SET(_lapack_libraries ${LAPACK_LIBRARIES})
+SET(_lapack_linker_flags ${LAPACK_LINKER_FLAGS})
+DEAL_II_PACKAGE_HANDLE(LAPACK
+  LIBRARIES
+    REQUIRED _lapack_libraries
+    OPTIONAL BLAS_LIBRARIES ${_additional_libraries}
+  LINKER_FLAGS OPTIONAL _lapack_linker_flags BLAS_LINKER_FLAGS
+  CLEAR
+    atlas_LIBRARY atlcblas_LIBRARY atllapack_LIBRARY blas_LIBRARY
+    eigen_blas_LIBRARY f77blas_LIBRARY gslcblas_LIBRARY lapack_LIBRARY
+    m_LIBRARY ptf77blas_LIBRARY ptlapack_LIBRARY refblas_LIBRARY
+    reflapack_LIBRARY BLAS_LIBRARIES ${_additional_libraries}
+    LAPACK_SYMBOL_CHECK # Cleanup check in configure_1_lapack.cmake
+  )
diff --git a/cmake/modules/FindMETIS.cmake b/cmake/modules/FindMETIS.cmake
new file mode 100644
index 0000000..e6d4ecc
--- /dev/null
+++ b/cmake/modules/FindMETIS.cmake
@@ -0,0 +1,109 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the (serial) METIS library
+#
+# This module exports
+#
+#   METIS_LIBRARIES
+#   METIS_INCLUDE_DIRS
+#   METIS_VERSION
+#   METIS_VERSION_MAJOR
+#   METIS_VERSION_MINOR
+#   METIS_VERSION_SUBMINOR
+#
+
+SET(METIS_DIR "" CACHE PATH "An optional hint to a metis directory")
+SET_IF_EMPTY(METIS_DIR "$ENV{METIS_DIR}")
+
+#
+# Metis is usually pretty self contained. So no external dependencies
+# so far. But there could be dependencies on pcre and mpi...
+#
+# Link in MPI unconditionally (if found).
+#
+
+DEAL_II_FIND_LIBRARY(METIS_LIBRARY
+  NAMES metis
+  HINTS ${METIS_DIR}
+  PATH_SUFFIXES
+    lib${LIB_SUFFIX} lib64 lib
+    # This is a hint, isn't it?
+    build/${CMAKE_CXX_PLATFORM_ID}-${CMAKE_SYSTEM_PROCESSOR}/libmetis
+  )
+
+#
+# Sanity check: Only search the parmetis library in the same directory as
+# the metis library...
+#
+GET_FILENAME_COMPONENT(_path "${METIS_LIBRARY}" PATH)
+DEAL_II_FIND_LIBRARY(PARMETIS_LIBRARY
+  NAMES parmetis
+  HINTS ${_path}
+  NO_DEFAULT_PATH
+  NO_CMAKE_ENVIRONMENT_PATH
+  NO_CMAKE_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH
+  NO_CMAKE_SYSTEM_PATH
+  NO_CMAKE_FIND_ROOT_PATH
+  )
+
+DEAL_II_FIND_PATH(METIS_INCLUDE_DIR metis.h
+  HINTS ${METIS_DIR}
+  PATH_SUFFIXES metis include/metis include
+  )
+
+IF(EXISTS ${METIS_INCLUDE_DIR}/metis.h)
+  #
+  # Extract the version number out of metis.h
+  #
+  FILE(STRINGS "${METIS_INCLUDE_DIR}/metis.h" _metis_major_string
+    REGEX "METIS_VER_MAJOR"
+    )
+  STRING(REGEX REPLACE "^.*METIS_VER_MAJOR.* ([0-9]+).*" "\\1"
+    METIS_VERSION_MAJOR "${_metis_major_string}"
+    )
+  FILE(STRINGS "${METIS_INCLUDE_DIR}/metis.h" _metis_minor_string
+    REGEX "METIS_VER_MINOR"
+    )
+  STRING(REGEX REPLACE "^.*METIS_VER_MINOR.* ([0-9]+).*" "\\1"
+    METIS_VERSION_MINOR "${_metis_minor_string}"
+    )
+  FILE(STRINGS "${METIS_INCLUDE_DIR}/metis.h" _metis_subminor_string
+    REGEX "METIS_VER_SUBMINOR"
+    )
+  STRING(REGEX REPLACE "^.*METIS_VER_SUBMINOR.* ([0-9]+).*" "\\1"
+    METIS_VERSION_SUBMINOR "${_metis_subminor_string}"
+    )
+  SET(METIS_VERSION
+    "${METIS_VERSION_MAJOR}.${METIS_VERSION_MINOR}.${METIS_VERSION_SUBMINOR}"
+    )
+  IF("${METIS_VERSION}" STREQUAL "..")
+    SET(METIS_VERSION)
+  ENDIF()
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(METIS
+  LIBRARIES
+    OPTIONAL PARMETIS_LIBRARY
+    REQUIRED METIS_LIBRARY
+    OPTIONAL MPI_C_LIBRARIES
+  INCLUDE_DIRS
+    REQUIRED METIS_INCLUDE_DIR
+  USER_INCLUDE_DIRS
+    REQUIRED METIS_INCLUDE_DIR 
+  CLEAR METIS_LIBRARY PARMETIS_LIBRARY METIS_INCLUDE_DIR
+  )
diff --git a/cmake/modules/FindMPI.cmake b/cmake/modules/FindMPI.cmake
new file mode 100644
index 0000000..7f1d396
--- /dev/null
+++ b/cmake/modules/FindMPI.cmake
@@ -0,0 +1,198 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Find MPI
+#
+# This module exports:
+#   MPI_LIBRARIES
+#   MPI_INCLUDE_DIRS
+#   MPI_CXX_FLAGS
+#   MPI_LINKER_FLAGS
+#   MPI_VERSION
+#   OMPI_VERSION
+#   MPI_HAVE_MPI_SEEK_SET
+#
+
+#
+# Configuration for mpi support:
+#
+# We look for the C and Fortran libraries as well because they are needed
+# by some external libraries for the link interface.
+#
+
+IF(MPI_CXX_FOUND)
+  SET(MPI_FOUND TRUE)
+ENDIF()
+
+#
+# If CMAKE_CXX_COMPILER is already an MPI wrapper, use it to determine
+# the mpi implementation. If MPI_CXX_COMPILER is defined use the value
+# directly.
+#
+SET_IF_EMPTY(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
+IF(CMAKE_C_COMPILER_WORKS)
+  SET_IF_EMPTY(MPI_C_COMPILER ${CMAKE_C_COMPILER}) # for good measure
+ELSE()
+  MESSAGE(STATUS
+    "No suitable C compiler was found! MPI C interface can not be "
+    "autodetected"
+    )
+ENDIF()
+IF(CMAKE_Fortran_COMPILER_WORKS)
+  SET_IF_EMPTY(MPI_Fortran_COMPILER ${CMAKE_Fortran_COMPILER}) # for good measure
+ELSE()
+  MESSAGE(STATUS
+    "No suitable Fortran compiler was found! MPI Fortran interface can "
+    "not be autodetected"
+    )
+ENDIF()
+
+#
+# Call the system FindMPI.cmake module:
+#
+
+# in case MPIEXEC is specified first call find_program() so that in case of success 
+# its subsequent runs inside FIND_PACKAGE(MPI) do not alter the desired result.
+IF(DEFINED ENV{MPIEXEC})
+  FIND_PROGRAM(MPIEXEC $ENV{MPIEXEC})
+ENDIF()
+
+# temporarily disable ${CMAKE_SOURCE_DIR}/cmake/modules for module lookup
+LIST(REMOVE_ITEM CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+FIND_PACKAGE(MPI)
+
+IF(NOT MPI_CXX_FOUND AND DEAL_II_WITH_MPI)
+  #
+  # CMAKE_CXX_COMPILER is apparently not an mpi wrapper.
+  # So, let's be a bit more aggressive in finding MPI (and if
+  # DEAL_II_WITH_MPI is set).
+  #
+  MESSAGE(STATUS
+    "MPI not found but DEAL_II_WITH_MPI is set to TRUE."
+    " Try again with more aggressive search paths:"
+    )
+  # Clear variables so that FIND_PACKAGE runs again:
+  SET(MPI_FOUND)
+  SET(MPI_CXX_COMPILER)
+  SET(MPI_C_COMPILER)
+  SET(MPI_Fortran_COMPILER)
+  UNSET(MPI_CXX_COMPILER CACHE)
+  UNSET(MPI_C_COMPILER CACHE)
+  UNSET(MPI_Fortran_COMPILER CACHE)
+  FIND_PACKAGE(MPI)
+ENDIF()
+LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+
+#
+# Older versions of MPI may not have MPI_SEEK_SET, which we
+# require. Strangely, unlike MPICH, OpenMPI needs the correct link libraries
+# for this to compile, not *just* the correct include directories.
+#
+
+CLEAR_CMAKE_REQUIRED()
+SET(CMAKE_REQUIRED_FLAGS ${MPI_CXX_COMPILE_FLAGS} ${MPI_CXX_LINK_FLAGS})
+SET(CMAKE_REQUIRED_INCLUDES ${MPI_CXX_INCLUDE_PATH})
+SET(CMAKE_REQUIRED_LIBRARIES ${MPI_LIBRARIES})
+CHECK_CXX_SOURCE_COMPILES(
+  "
+  #include <mpi.h>
+  #ifndef MPI_SEEK_SET
+  #  error
+  #endif
+  int main() {}
+  "
+  MPI_HAVE_MPI_SEEK_SET
+  )
+RESET_CMAKE_REQUIRED()
+
+#
+# Manually assemble some version information:
+#
+
+DEAL_II_FIND_FILE(MPI_MPI_H
+  NAMES mpi.h
+  HINTS ${MPI_CXX_INCLUDE_PATH} ${MPI_C_INCLUDE_PATH}
+  )
+IF(NOT MPI_MPI_H MATCHES "-NOTFOUND" AND NOT DEFINED MPI_VERSION)
+  FILE(STRINGS "${MPI_MPI_H}" MPI_VERSION_MAJOR_STRING
+    REGEX "#define.*MPI_VERSION")
+  STRING(REGEX REPLACE "^.*MPI_VERSION.*([0-9]+).*" "\\1"
+    MPI_VERSION_MAJOR "${MPI_VERSION_MAJOR_STRING}"
+    )
+  FILE(STRINGS ${MPI_MPI_H} MPI_VERSION_MINOR_STRING
+    REGEX "#define.*MPI_SUBVERSION")
+  STRING(REGEX REPLACE "^.*MPI_SUBVERSION.*([0-9]+).*" "\\1"
+    MPI_VERSION_MINOR "${MPI_VERSION_MINOR_STRING}"
+    )
+  SET(MPI_VERSION "${MPI_VERSION_MAJOR}.${MPI_VERSION_MINOR}")
+  IF("${MPI_VERSION}" STREQUAL ".")
+    SET(MPI_VERSION)
+    SET(MPI_VERSION_MAJOR)
+    SET(MPI_VERSION_MINOR)
+  ENDIF()
+
+  # OMPI specific version number:
+  FILE(STRINGS ${MPI_MPI_H} OMPI_VERSION_MAJOR_STRING
+    REGEX "#define.*OMPI_MAJOR_VERSION")
+  STRING(REGEX REPLACE "^.*OMPI_MAJOR_VERSION.*([0-9]+).*" "\\1"
+    OMPI_VERSION_MAJOR "${OMPI_VERSION_MAJOR_STRING}"
+    )
+  FILE(STRINGS ${MPI_MPI_H} OMPI_VERSION_MINOR_STRING
+    REGEX "#define.*OMPI_MINOR_VERSION")
+  STRING(REGEX REPLACE "^.*OMPI_MINOR_VERSION.*([0-9]+).*" "\\1"
+    OMPI_VERSION_MINOR "${OMPI_VERSION_MINOR_STRING}"
+    )
+  FILE(STRINGS ${MPI_MPI_H} OMPI_VERSION_RELEASE_STRING
+    REGEX "#define.*OMPI_RELEASE_VERSION")
+  STRING(REGEX REPLACE "^.*OMPI_RELEASE_VERSION.*([0-9]+).*" "\\1"
+    OMPI_VERSION_SUBMINOR "${OMPI_VERSION_RELEASE_STRING}"
+    )
+  SET(OMPI_VERSION
+    "${OMPI_VERSION_MAJOR}.${OMPI_VERSION_MINOR}.${OMPI_VERSION_SUBMINOR}"
+    )
+  IF("${OMPI_VERSION}" STREQUAL "..")
+    SET(OMPI_VERSION)
+    SET(OMPI_VERSION_MAJOR)
+    SET(OMPI_VERSION_MINOR)
+    SET(OMPI_VERSION_SUBMINOR)
+  ENDIF()
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(MPI
+  LIBRARIES
+    REQUIRED MPI_CXX_LIBRARIES
+    OPTIONAL MPI_Fortran_LIBRARIES MPI_C_LIBRARIES
+  INCLUDE_DIRS
+    REQUIRED MPI_CXX_INCLUDE_PATH
+    OPTIONAL MPI_C_INCLUDE_PATH
+  USER_INCLUDE_DIRS
+    REQUIRED MPI_CXX_INCLUDE_PATH
+    OPTIONAL MPI_C_INCLUDE_PATH
+  CXX_FLAGS OPTIONAL MPI_CXX_COMPILE_FLAGS
+  LINKER_FLAGS OPTIONAL MPI_CXX_LINK_FLAGS
+  CLEAR
+    MPI_C_COMPILER
+    MPI_CXX_COMPILER
+    MPIEXEC
+    MPI_EXTRA_LIBRARY
+    MPI_Fortran_COMPILER
+    MPI_HEADER_PATH
+    MPI_LIB
+    MPI_LIBRARY
+    MPI_MPI_H
+    MPI_HAVE_MPI_SEEK_SET
+  )
+
diff --git a/cmake/modules/FindMUPARSER.cmake b/cmake/modules/FindMUPARSER.cmake
new file mode 100644
index 0000000..22991f6
--- /dev/null
+++ b/cmake/modules/FindMUPARSER.cmake
@@ -0,0 +1,71 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the MUPARSER library
+#
+# This module exports
+#
+#   MUPARSER_LIBRARIES
+#   MUPARSER_INCLUDE_DIRS
+#   MUPARSER_VERSION
+#   MUPARSER_VERSION_MAJOR
+#   MUPARSER_VERSION_MINOR
+#   MUPARSER_VERSION_SUBMINOR
+#
+
+SET(MUPARSER_DIR "" CACHE PATH "An optional hint to a MUPARSER installation")
+SET_IF_EMPTY(MUPARSER_DIR "$ENV{MUPARSER_DIR}")
+
+DEAL_II_FIND_LIBRARY(MUPARSER_LIBRARY
+  NAMES muparser muparserd
+  HINTS ${MUPARSER_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+
+DEAL_II_FIND_PATH(MUPARSER_INCLUDE_DIR muParserDef.h
+  HINTS ${MUPARSER_DIR}
+  PATH_SUFFIXES include
+  )
+
+IF(EXISTS ${MUPARSER_INCLUDE_DIR}/muParserDef.h)
+  FILE(STRINGS "${MUPARSER_INCLUDE_DIR}/muParserDef.h" MUPARSER_VERSION_STRING_LINE
+    REGEX "#define MUP_VERSION _T"
+    )
+ 
+  STRING(REGEX REPLACE ".*\"(.*)\".*" "\\1"
+    _VERSION_STRING "${MUPARSER_VERSION_STRING_LINE}"
+    )
+
+  STRING(REPLACE "." ";" _VERSION_LIST ${_VERSION_STRING})
+  LIST(GET _VERSION_LIST 0 MUPARSER_VERSION_MAJOR)
+  LIST(GET _VERSION_LIST 1 MUPARSER_VERSION_MINOR)
+  LIST( LENGTH _VERSION_LIST _LISTLEN ) 
+  IF (${_LISTLEN} GREATER 2)
+    LIST(GET _VERSION_LIST 2 MUPARSER_VERSION_SUBMINOR)
+  ELSE()
+    SET(MUPARSER_VERSION_SUBMINOR "0")
+  ENDIF()
+
+  SET(MUPARSER_VERSION
+    "${MUPARSER_VERSION_MAJOR}.${MUPARSER_VERSION_MINOR}.${MUPARSER_VERSION_SUBMINOR}"
+    )
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(MUPARSER
+  LIBRARIES REQUIRED MUPARSER_LIBRARY
+  INCLUDE_DIRS REQUIRED MUPARSER_INCLUDE_DIR
+  CLEAR MUPARSER_LIBRARY MUPARSER_INCLUDE_DIR
+  )
diff --git a/cmake/modules/FindNETCDF.cmake b/cmake/modules/FindNETCDF.cmake
new file mode 100644
index 0000000..f5b8bc2
--- /dev/null
+++ b/cmake/modules/FindNETCDF.cmake
@@ -0,0 +1,54 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the NETCDF C and C++ libraries
+#
+# This module exports
+#
+#   NETCDF_LIBRARIES
+#   NETCDF_INCLUDE_DIRS
+#
+
+SET(NETCDF_DIR "" CACHE PATH "An optional hint to a NETCDF installation")
+SET_IF_EMPTY(NETCDF_DIR "$ENV{NETCDF_DIR}")
+
+DEAL_II_FIND_PATH(NETCDF_INCLUDE_DIR netcdfcpp.h
+  HINTS ${NETCDF_DIR}
+  PATH_SUFFIXES netcdf include
+  )
+
+#
+# TODO:
+#
+# - netcdf might externally depend on hdf5. Check and fix this.
+# - separate C++ and C library search
+#
+
+DEAL_II_FIND_LIBRARY(NETCDF_CPLUSPLUS_LIBRARY NAMES netcdf_c++ netcdf_cpp
+  HINTS ${NETCDF_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+
+DEAL_II_FIND_LIBRARY(NETCDF_C_LIBRARY NAMES netcdf
+  HINTS ${NETCDF_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+
+DEAL_II_PACKAGE_HANDLE(NETCDF
+  LIBRARIES REQUIRED NETCDF_CPLUSPLUS_LIBRARY NETCDF_C_LIBRARY
+  INCLUDE_DIRS REQUIRED NETCDF_INCLUDE_DIR
+  CLEAR NETCDF_CPLUSPLUS_LIBRARY NETCDF_C_LIBRARY NETCDF_INCLUDE_DIR
+  )
diff --git a/cmake/modules/FindOPENCASCADE.cmake b/cmake/modules/FindOPENCASCADE.cmake
new file mode 100644
index 0000000..a37bf28
--- /dev/null
+++ b/cmake/modules/FindOPENCASCADE.cmake
@@ -0,0 +1,78 @@
+## ---------------------------------------------------------------------
+## $Id$
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+#
+# Try to find the OpenCASCADE (OCC) library. This scripts supports the
+# OpenCASCADE Community Edition (OCE) library, which is a cmake based
+# OCC library. You might try the original OpenCASCADE library, but your
+# mileage may vary.
+#
+# This module exports:
+#
+#   OPENCASCADE_DIR
+#   OPENCASCADE_INCLUDE_DIRS
+#   OPENCASCADE_LIBRARIES
+#   OPENCASCADE_VERSION
+#
+
+
+SET(OPENCASCADE_DIR "" CACHE PATH "An optional hint to a OpenCASCADE installation")
+SET_IF_EMPTY(OPENCASCADE_DIR "$ENV{OPENCASCADE_DIR}")
+SET_IF_EMPTY(OPENCASCADE_DIR "$ENV{OCC_DIR}")
+SET_IF_EMPTY(OPENCASCADE_DIR "$ENV{OCE_DIR}")
+SET_IF_EMPTY(OPENCASCADE_DIR "$ENV{CASROOT}")
+
+
+DEAL_II_FIND_PATH(OPENCASCADE_INCLUDE_DIR Standard_Version.hxx
+  HINTS ${OPENCASCADE_DIR}
+  PATH_SUFFIXES include include/oce inc
+  )
+
+IF(EXISTS ${OPENCASCADE_INCLUDE_DIR}/Standard_Version.hxx)
+  FILE(STRINGS "${OPENCASCADE_INCLUDE_DIR}/Standard_Version.hxx" OPENCASCADE_VERSION
+    REGEX "#define OCC_VERSION _T"
+    )
+ENDIF()
+
+# These seem to be pretty much the only required ones.
+SET(_opencascade_libraries
+  TKBO TKBool TKBRep TKernel TKFeat TKFillet TKG2d TKG3d TKGeomAlgo
+  TKGeomBase TKHLR TKIGES TKMath TKMesh TKOffset TKPrim TKShHealing TKSTEP
+  TKSTEPAttr TKSTEPBase TKSTL TKTopAlgo TKXSBase
+  )
+
+SET(_libraries "")
+FOREACH(_library ${_opencascade_libraries})
+  LIST(APPEND _libraries OPENCASCADE_${_library})
+  DEAL_II_FIND_LIBRARY(OPENCASCADE_${_library}
+    NAMES ${_library}
+    HINTS ${OPENCASCADE_DIR}
+    PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib mac64/clang/lib mac32/clang/lib lin64/gcc/lib lin32/gcc/lib
+    )
+ENDFOREACH()
+
+
+DEAL_II_PACKAGE_HANDLE(OPENCASCADE
+  LIBRARIES
+    REQUIRED ${_libraries}
+  INCLUDE_DIRS
+    REQUIRED OPENCASCADE_INCLUDE_DIR
+  USER_INCLUDE_DIRS
+    REQUIRED OPENCASCADE_INCLUDE_DIR
+  CLEAR
+    _opencascade_libraries ${_libraries}
+  )
diff --git a/cmake/modules/FindP4EST.cmake b/cmake/modules/FindP4EST.cmake
new file mode 100644
index 0000000..5fa4567
--- /dev/null
+++ b/cmake/modules/FindP4EST.cmake
@@ -0,0 +1,170 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the P4EST library
+#
+# This module exports:
+#   P4EST_LIBRARIES
+#   P4EST_INCLUDE_DIRS
+#   P4EST_WITH_MPI
+#   P4EST_VERSION
+#   P4EST_VERSION_MAJOR
+#   P4EST_VERSION_MINOR
+#   P4EST_VERSION_SUBMINOR
+#   P4EST_VERSION_PATCH
+#
+
+SET(P4EST_DIR "" CACHE PATH
+  "An optional hint to a p4est installation/directory"
+  )
+SET_IF_EMPTY(P4EST_DIR "$ENV{P4EST_DIR}")
+SET_IF_EMPTY(SC_DIR "$ENV{SC_DIR}")
+
+#
+# Search for the sc library, usually bundled with p4est. If no SC_DIR was
+# given, take what we chose for p4est.
+#
+
+DEAL_II_FIND_PATH(SC_INCLUDE_DIR sc.h
+  HINTS
+    ${SC_DIR}/FAST
+    ${SC_DIR}/DEBUG
+    ${SC_DIR}
+    ${P4EST_DIR}/FAST
+    ${P4EST_DIR}/DEBUG
+    ${P4EST_DIR}
+  PATH_SUFFIXES
+    sc include/p4est include src sc/src
+  )
+
+DEAL_II_FIND_LIBRARY(P4EST_LIBRARY_OPTIMIZED
+  NAMES p4est
+  HINTS ${P4EST_DIR}/FAST ${P4EST_DIR}/DEBUG ${P4EST_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib src
+  )
+
+DEAL_II_FIND_LIBRARY(SC_LIBRARY_OPTIMIZED
+  NAMES sc
+  HINTS
+    ${SC_DIR}/FAST
+    ${SC_DIR}/DEBUG
+    ${SC_DIR}
+    ${P4EST_DIR}/FAST
+    ${P4EST_DIR}/DEBUG
+    ${P4EST_DIR}
+  PATH_SUFFIXES
+    lib${LIB_SUFFIX} lib64 lib src sc/src
+  )
+
+#
+# Support debug variants as well:
+#
+
+DEAL_II_FIND_LIBRARY(P4EST_LIBRARY_DEBUG
+  NAMES p4est
+  HINTS ${P4EST_DIR}/DEBUG
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib src
+  )
+
+DEAL_II_FIND_LIBRARY(SC_LIBRARY_DEBUG
+  NAMES sc
+  HINTS ${SC_DIR}/DEBUG ${P4EST_DIR}/DEBUG
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib src sc/src
+  )
+
+IF( ( "${P4EST_LIBRARY_OPTIMIZED}" STREQUAL "${P4EST_LIBRARY_DEBUG}"
+      AND "${SC_LIBRARY_OPTIMIZED}" STREQUAL "${SC_LIBRARY_DEBUG}" )
+    OR P4EST_LIBRARY_DEBUG MATCHES "-NOTFOUND"
+    OR SC_LIBRARY_DEBUG MATCHES "-NOTFOUND" )
+  SET(_libraries P4EST_LIBRARY_OPTIMIZED SC_LIBRARY_OPTIMIZED)
+ELSE()
+  SET(_libraries
+    optimized P4EST_LIBRARY_OPTIMIZED SC_LIBRARY_OPTIMIZED
+    debug P4EST_LIBRARY_DEBUG SC_LIBRARY_DEBUG
+    general
+    )
+ENDIF()
+
+
+DEAL_II_FIND_PATH(P4EST_INCLUDE_DIR p4est_config.h
+  HINTS ${P4EST_DIR}/FAST ${P4EST_DIR}/DEBUG ${P4EST_DIR}
+  PATH_SUFFIXES p4est include/p4est include src
+  )
+
+IF(EXISTS ${P4EST_INCLUDE_DIR}/p4est_config.h)
+  #
+  # Determine mpi support of p4est:
+  #
+  FILE(STRINGS "${P4EST_INCLUDE_DIR}/p4est_config.h" P4EST_MPI_STRING
+    REGEX "#define.*P4EST_MPI 1")
+  IF("${P4EST_MPI_STRING}" STREQUAL "")
+    SET(P4EST_WITH_MPI FALSE)
+  ELSE()
+    SET(P4EST_WITH_MPI TRUE)
+  ENDIF()
+
+  #
+  # Extract version numbers:
+  #
+  FILE(STRINGS "${P4EST_INCLUDE_DIR}/p4est_config.h" P4EST_VERSION
+    REGEX "#define P4EST_VERSION \"")
+  STRING(REGEX REPLACE "^.*P4EST_VERSION.*\"([0-9]+.*)\".*" "\\1"
+    P4EST_VERSION "${P4EST_VERSION}"
+    )
+  STRING(REGEX REPLACE
+    "^([0-9]+).*$" "\\1"
+    P4EST_VERSION_MAJOR "${P4EST_VERSION}")
+  STRING(REGEX REPLACE
+    "^[0-9]+\\.([0-9]+).*$" "\\1"
+    P4EST_VERSION_MINOR "${P4EST_VERSION}")
+  STRING(REGEX REPLACE
+    "^[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1"
+    P4EST_VERSION_SUBMINOR "${P4EST_VERSION}")
+  STRING(REGEX REPLACE
+    "^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1"
+    P4EST_VERSION_PATCH "${P4EST_VERSION}")
+
+  #
+  # We cannot rely on the fact that SUBMINOR or PATCH are defined.
+  # Nevertheless, we need a full version number for our preprocessor macros
+  # to work. If the p4est version number is only of the form x.y instead of
+  # a.b.c.d, then the last two REGEX_REPLACE calls above will have failed
+  # because the regular expression didn't match the version string,
+  # and P4EST_VERSION_SUBMINOR and P4EST_VERSION_PATCH will either be
+  # empty or be the full version string. In those cases, set those numbers
+  # to 0 if necessary.
+  #
+  IF("${P4EST_VERSION_SUBMINOR}" MATCHES "^(|${P4EST_VERSION})$")
+    SET(P4EST_VERSION_SUBMINOR "0")
+  ENDIF()
+
+  IF("${P4EST_VERSION_PATCH}" MATCHES "^(|${P4EST_VERSION})$")
+    SET(P4EST_VERSION_PATCH "0")
+  ENDIF()
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(P4EST
+  LIBRARIES
+    REQUIRED ${_libraries}
+    OPTIONAL LAPACK_LIBRARIES MPI_C_LIBRARIES
+  INCLUDE_DIRS
+    REQUIRED P4EST_INCLUDE_DIR SC_INCLUDE_DIR
+  USER_INCLUDE_DIRS
+    REQUIRED P4EST_INCLUDE_DIR SC_INCLUDE_DIR
+  CLEAR
+    SC_INCLUDE_DIR P4EST_LIBRARY_OPTIMIZED SC_LIBRARY_OPTIMIZED
+    P4EST_LIBRARY_DEBUG SC_LIBRARY_DEBUG P4EST_INCLUDE_DIR
+  )
diff --git a/cmake/modules/FindPETSC.cmake b/cmake/modules/FindPETSC.cmake
new file mode 100644
index 0000000..172ffb1
--- /dev/null
+++ b/cmake/modules/FindPETSC.cmake
@@ -0,0 +1,235 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the petsc library
+#
+# This module exports:
+#
+#     PETSC_FOUND
+#     PETSC_LIBRARIES
+#     PETSC_INCLUDE_DIRS
+#     PETSC_VERSION
+#     PETSC_VERSION_MAJOR
+#     PETSC_VERSION_MINOR
+#     PETSC_VERSION_SUBMINOR
+#     PETSC_VERSION_PATCH
+#     PETSC_WITH_MPIUNI
+#     PETSC_WITH_64BIT_INDICES
+#     PETSC_WITH_COMPLEX
+#
+
+SET(PETSC_DIR "" CACHE PATH "An optional hint to a PETSc directory")
+SET(PETSC_ARCH "" CACHE STRING "An optional hint to a PETSc arch")
+SET_IF_EMPTY(PETSC_DIR "$ENV{PETSC_DIR}")
+SET_IF_EMPTY(PETSC_ARCH "$ENV{PETSC_ARCH}")
+
+DEAL_II_FIND_LIBRARY(PETSC_LIBRARY
+  NAMES petsc
+  HINTS ${PETSC_DIR} ${PETSC_DIR}/${PETSC_ARCH}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+
+#
+# Search for the first part of the includes:
+#
+
+DEAL_II_FIND_PATH(PETSC_INCLUDE_DIR_ARCH petscconf.h
+  HINTS ${PETSC_DIR} ${PETSC_DIR}/${PETSC_ARCH} ${PETSC_INCLUDE_DIRS}
+  PATH_SUFFIXES petsc include include/petsc
+)
+
+SET(PETSC_PETSCCONF_H "${PETSC_INCLUDE_DIR_ARCH}/petscconf.h")
+IF(EXISTS ${PETSC_PETSCCONF_H})
+  #
+  # Is petsc compiled with support for MPIUNI?
+  #
+  FILE(STRINGS "${PETSC_PETSCCONF_H}" PETSC_MPIUNI_STRING
+    REGEX "#define.*PETSC_HAVE_MPIUNI 1")
+  IF("${PETSC_MPIUNI_STRING}" STREQUAL "")
+    SET(PETSC_WITH_MPIUNI FALSE)
+  ELSE()
+    SET(PETSC_WITH_MPIUNI TRUE)
+  ENDIF()
+
+  #
+  # Is petsc compiled with support for 64BIT_INDICES?
+  #
+  FILE(STRINGS "${PETSC_PETSCCONF_H}" PETSC_64BIT_INDICES_STRING
+    REGEX "#define.*PETSC_USE_64BIT_INDICES 1")
+  IF("${PETSC_64BIT_INDICES_STRING}" STREQUAL "")
+    SET(PETSC_WITH_64BIT_INDICES FALSE)
+  ELSE()
+    SET(PETSC_WITH_64BIT_INDICES TRUE)
+  ENDIF()
+
+  #
+  # Is petsc compiled with support for COMPLEX numbers?
+  #
+  FILE(STRINGS "${PETSC_PETSCCONF_H}" PETSC_COMPLEX_STRING
+    REGEX "#define.*PETSC_USE_COMPLEX 1")
+  IF("${PETSC_COMPLEX_STRING}" STREQUAL "")
+    SET(PETSC_WITH_COMPLEX FALSE)
+  ELSE()
+    SET(PETSC_WITH_COMPLEX TRUE)
+  ENDIF()
+ENDIF()
+
+#
+# Sometimes, this is not enough...
+# If petsc is not installed but in source tree layout, there will be
+#   ${PETSC_DIR}/${PETSC_ARCH}/include - which we should have found by now.
+#   ${PETSC_DIR}/include               - which we still have to find.
+#
+# Or it is installed in a non standard layout in the system (e.g. in
+# Gentoo), where there will be
+#   ${PETSC_DIR}/${PETSC_ARCH}/include
+#   /usr/include/petsc ...
+#
+# Either way, we must be able to find petscversion.h:
+#
+
+DEAL_II_FIND_PATH(PETSC_INCLUDE_DIR_COMMON petscversion.h
+  HINTS ${PETSC_DIR} ${PETSC_DIR}/${PETSC_ARCH} ${PETSC_INCLUDE_DIRS}
+  PATH_SUFFIXES petsc include include/petsc
+)
+
+SET(PETSC_PETSCVERSION_H "${PETSC_INCLUDE_DIR_COMMON}/petscversion.h")
+IF(EXISTS ${PETSC_PETSCVERSION_H})
+  FILE(STRINGS "${PETSC_PETSCVERSION_H}" PETSC_VERSION_MAJOR_STRING
+    REGEX "#define.*PETSC_VERSION_MAJOR")
+  STRING(REGEX REPLACE "^.*PETSC_VERSION_MAJOR.*([0-9]+).*" "\\1"
+    PETSC_VERSION_MAJOR "${PETSC_VERSION_MAJOR_STRING}"
+    )
+  FILE(STRINGS "${PETSC_PETSCVERSION_H}" PETSC_VERSION_MINOR_STRING
+    REGEX "#define.*PETSC_VERSION_MINOR")
+  STRING(REGEX REPLACE "^.*PETSC_VERSION_MINOR.*([0-9]+).*" "\\1"
+    PETSC_VERSION_MINOR "${PETSC_VERSION_MINOR_STRING}"
+    )
+  FILE(STRINGS "${PETSC_PETSCVERSION_H}" PETSC_VERSION_SUBMINOR_STRING
+    REGEX "#define.*PETSC_VERSION_SUBMINOR")
+  STRING(REGEX REPLACE "^.*PETSC_VERSION_SUBMINOR.*([0-9]+).*" "\\1"
+    PETSC_VERSION_SUBMINOR "${PETSC_VERSION_SUBMINOR_STRING}"
+    )
+  FILE(STRINGS "${PETSC_PETSCVERSION_H}" PETSC_VERSION_PATCH_STRING
+    REGEX "#define.*PETSC_VERSION_PATCH")
+  STRING(REGEX REPLACE "^.*PETSC_VERSION_PATCH.*([0-9]+).*" "\\1"
+    PETSC_VERSION_PATCH "${PETSC_VERSION_PATCH_STRING}"
+    )
+  SET(PETSC_VERSION
+    "${PETSC_VERSION_MAJOR}.${PETSC_VERSION_MINOR}.${PETSC_VERSION_SUBMINOR}.${PETSC_VERSION_PATCH}"
+    )
+ENDIF()
+
+#
+# So, up to this point it was easy. Now, the tricky part. Search for
+# petscvariables and determine the includes and the link interface from
+# that file:
+#
+
+DEAL_II_FIND_FILE(PETSC_PETSCVARIABLES
+  NAMES petscvariables
+  HINTS ${PETSC_DIR}/${PETSC_ARCH} ${PETSC_DIR}
+  PATH_SUFFIXES conf
+  )
+
+IF(NOT PETSC_PETSCVARIABLES MATCHES "-NOTFOUND")
+  #
+  # Includes:
+  #
+
+  FILE(STRINGS "${PETSC_PETSCVARIABLES}" _external_includes
+    REGEX "^PETSC_CC_INCLUDES =.*")
+  SEPARATE_ARGUMENTS(_external_includes)
+
+  SET(_petsc_includes)
+  FOREACH(_token ${_external_includes})
+    #
+    # workaround: Do not pull in scotch include directory. It clashes with
+    # our use of the metis headers...
+    #
+    IF(_token MATCHES "^-I" AND NOT _token MATCHES "scotch$")
+      STRING(REGEX REPLACE "^-I" "" _token "${_token}")
+      LIST(APPEND _petsc_includes ${_token})
+    ENDIF()
+  ENDFOREACH()
+
+  # Remove petsc's own include directories:
+  IF(NOT "${_petsc_includes}" STREQUAL "")
+    LIST(REMOVE_AT _petsc_includes 0 1)
+  ENDIF()
+
+  #
+  # Link line:
+  #
+
+  FILE(STRINGS "${PETSC_PETSCVARIABLES}" PETSC_EXTERNAL_LINK_LINE
+    REGEX "^PETSC_WITH_EXTERNAL_LIB =.*")
+
+  SEPARATE_ARGUMENTS(PETSC_EXTERNAL_LINK_LINE)
+
+  SET(_hints)
+  SET(_petsc_libraries)
+  SET(_cleanup_variables)
+  FOREACH(_token ${PETSC_EXTERNAL_LINK_LINE})
+    IF(_token MATCHES "^-L")
+      # Build up hints with the help of all tokens passed with -L:
+      STRING(REGEX REPLACE "^-L" "" _token "${_token}")
+      LIST(APPEND _hints ${_token})
+    ELSEIF(_token MATCHES "^-l")
+      # Search for every library that was specified with -l:
+      STRING(REGEX REPLACE "^-l" "" _token "${_token}")
+
+      IF(NOT _token MATCHES "(petsc|stdc\\+\\+|gcc_s|clang_rt)")
+        LIST(APPEND _cleanup_variables PETSC_LIBRARY_${_token})
+
+        IF(_token MATCHES "^(c|quadmath|gfortran|m|rt|nsl|dl|pthread)$")
+          FIND_SYSTEM_LIBRARY(PETSC_LIBRARY_${_token} NAMES ${_token})
+        ELSE()
+          DEAL_II_FIND_LIBRARY(PETSC_LIBRARY_${_token}
+            NAMES ${_token}
+            HINTS ${_hints}
+            )
+        ENDIF()
+        IF(NOT PETSC_LIBRARY_${_token} MATCHES "-NOTFOUND")
+          LIST(APPEND _petsc_libraries ${PETSC_LIBRARY_${_token}})
+        ENDIF()
+
+      ENDIF()
+
+    ENDIF()
+  ENDFOREACH()
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(PETSC
+  LIBRARIES
+    REQUIRED PETSC_LIBRARY
+    OPTIONAL _petsc_libraries
+  INCLUDE_DIRS
+    REQUIRED PETSC_INCLUDE_DIR_COMMON PETSC_INCLUDE_DIR_ARCH
+    OPTIONAL _petsc_includes
+  USER_INCLUDE_DIRS
+    REQUIRED PETSC_INCLUDE_DIR_COMMON PETSC_INCLUDE_DIR_ARCH
+    OPTIONAL _petsc_includes
+  CLEAR
+    PETSC_LIBRARY PETSC_INCLUDE_DIR_COMMON PETSC_INCLUDE_DIR_ARCH
+    PETSC_PETSCVARIABLES ${_cleanup_variables}
+  )
+
+IF(PETSC_FOUND)
+  MARK_AS_ADVANCED(PETSC_ARCH)
+ELSE()
+  MARK_AS_ADVANCED(CLEAR PETSC_ARCH)
+ENDIF()
diff --git a/cmake/modules/FindSLEPC.cmake b/cmake/modules/FindSLEPC.cmake
new file mode 100644
index 0000000..2df9257
--- /dev/null
+++ b/cmake/modules/FindSLEPC.cmake
@@ -0,0 +1,100 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the SLEPC library
+#
+# This module exports:
+#
+#     SLEPC_FOUND
+#     SLEPC_LIBRARIES
+#     SLEPC_INCLUDE_DIRS
+#     SLEPC_VERSION
+#     SLEPC_VERSION_MAJOR
+#     SLEPC_VERSION_MINOR
+#     SLEPC_VERSION_SUBMINOR
+#     SLEPC_VERSION_PATCH
+#
+
+SET(SLEPC_DIR "" CACHE PATH "An optional hint to a SLEPC directory")
+SET_IF_EMPTY(SLEPC_DIR "$ENV{SLEPC_DIR}")
+SET_IF_EMPTY(PETSC_DIR "$ENV{PETSC_DIR}")
+SET_IF_EMPTY(PETSC_ARCH "$ENV{PETSC_ARCH}")
+
+#
+# Luckily, SLEPc wants the same insanity as PETSc, so we can just copy the
+# mechanism.
+#
+
+DEAL_II_FIND_LIBRARY(SLEPC_LIBRARY
+  NAMES slepc
+  HINTS ${SLEPC_DIR} ${SLEPC_DIR}/${PETSC_ARCH} ${PETSC_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+)
+
+DEAL_II_FIND_PATH(SLEPC_INCLUDE_DIR_ARCH slepcconf.h
+  HINTS
+    ${SLEPC_DIR}
+    ${SLEPC_DIR}/${PETSC_ARCH}
+    ${SLEPC_INCLUDE_DIRS}
+    ${PETSC_DIR}
+  PATH_SUFFIXES slepc include include/slepc
+)
+
+DEAL_II_FIND_PATH(SLEPC_INCLUDE_DIR_COMMON slepcversion.h
+  HINTS
+    ${SLEPC_DIR}
+    ${SLEPC_DIR}/${PETSC_ARCH}
+    ${SLEPC_INCLUDE_DIRS}
+    ${PETSC_DIR}
+  PATH_SUFFIXES slepc include include/slepc
+)
+
+SET(SLEPC_SLEPCVERSION_H "${SLEPC_INCLUDE_DIR_COMMON}/slepcversion.h")
+IF(EXISTS ${SLEPC_SLEPCVERSION_H})
+  FILE(STRINGS "${SLEPC_SLEPCVERSION_H}" SLEPC_VERSION_MAJOR_STRING
+    REGEX "#define.*SLEPC_VERSION_MAJOR")
+  STRING(REGEX REPLACE "^.*SLEPC_VERSION_MAJOR.*([0-9]+).*" "\\1"
+    SLEPC_VERSION_MAJOR "${SLEPC_VERSION_MAJOR_STRING}"
+    )
+  FILE(STRINGS "${SLEPC_SLEPCVERSION_H}" SLEPC_VERSION_MINOR_STRING
+    REGEX "#define.*SLEPC_VERSION_MINOR")
+  STRING(REGEX REPLACE "^.*SLEPC_VERSION_MINOR.*([0-9]+).*" "\\1"
+    SLEPC_VERSION_MINOR "${SLEPC_VERSION_MINOR_STRING}"
+    )
+  FILE(STRINGS "${SLEPC_SLEPCVERSION_H}" SLEPC_VERSION_SUBMINOR_STRING
+    REGEX "#define.*SLEPC_VERSION_SUBMINOR")
+  STRING(REGEX REPLACE "^.*SLEPC_VERSION_SUBMINOR.*([0-9]+).*" "\\1"
+    SLEPC_VERSION_SUBMINOR "${SLEPC_VERSION_SUBMINOR_STRING}"
+    )
+  FILE(STRINGS "${SLEPC_SLEPCVERSION_H}" SLEPC_VERSION_PATCH_STRING
+    REGEX "#define.*SLEPC_VERSION_PATCH")
+  STRING(REGEX REPLACE "^.*SLEPC_VERSION_PATCH.*([0-9]+).*" "\\1"
+    SLEPC_VERSION_PATCH "${SLEPC_VERSION_PATCH_STRING}"
+    )
+  SET(SLEPC_VERSION
+    "${SLEPC_VERSION_MAJOR}.${SLEPC_VERSION_MINOR}.${SLEPC_VERSION_SUBMINOR}.${SLEPC_VERSION_PATCH}"
+    )
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(SLEPC
+  LIBRARIES
+    REQUIRED SLEPC_LIBRARY PETSC_LIBRARIES
+  INCLUDE_DIRS
+    REQUIRED SLEPC_INCLUDE_DIR_ARCH SLEPC_INCLUDE_DIR_COMMON
+  USER_INCLUDE_DIRS
+    REQUIRED SLEPC_INCLUDE_DIR_ARCH SLEPC_INCLUDE_DIR_COMMON
+  CLEAR SLEPC_LIBRARY SLEPC_INCLUDE_DIR_ARCH SLEPC_INCLUDE_DIR_COMMON
+  )
diff --git a/cmake/modules/FindTBB.cmake b/cmake/modules/FindTBB.cmake
new file mode 100644
index 0000000..26b582c
--- /dev/null
+++ b/cmake/modules/FindTBB.cmake
@@ -0,0 +1,86 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the Threading Building Blocks library
+#
+# This module exports
+#
+#   TBB_LIBRARIES
+#   TBB_INCLUDE_DIRS
+#   TBB_WITH_DEBUGLIB
+#   TBB_VERSION
+#   TBB_VERSION_MAJOR
+#   TBB_VERSION_MINOR
+#
+
+SET(TBB_DIR "" CACHE PATH "An optional hint to a TBB installation")
+SET_IF_EMPTY(TBB_DIR "$ENV{TBB_DIR}")
+
+FILE(GLOB _path ${TBB_DIR}/build/*_release)
+DEAL_II_FIND_LIBRARY(TBB_LIBRARY
+  NAMES tbb
+  HINTS
+    ${_path}
+    ${TBB_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+
+#
+# Also search for the debug library:
+#
+FILE(GLOB _path ${TBB_DIR}/build/*_debug)
+DEAL_II_FIND_LIBRARY(TBB_DEBUG_LIBRARY
+  NAMES tbb_debug
+  HINTS
+    ${_path}
+    ${TBB_DIR}
+  PATH_SUFFIXES lib${LIB_SUFFIX} lib64 lib
+  )
+IF(NOT TBB_DEBUG_LIBRARY MATCHES "-NOTFOUND")
+  SET(TBB_WITH_DEBUGLIB TRUE)
+  SET(_libraries debug TBB_DEBUG_LIBRARY optimized TBB_LIBRARY)
+ELSE()
+  SET(_libraries TBB_LIBRARY)
+ENDIF()
+
+DEAL_II_FIND_PATH(TBB_INCLUDE_DIR tbb/tbb_stddef.h
+  HINTS
+    ${TBB_DIR}
+  PATH_SUFFIXES include include/tbb tbb
+  )
+
+IF(EXISTS ${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h)
+  FILE(STRINGS "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h" TBB_VERSION_MAJOR_STRING
+    REGEX "#define.*TBB_VERSION_MAJOR")
+  STRING(REGEX REPLACE "^.*TBB_VERSION_MAJOR.*([0-9]+).*" "\\1"
+    TBB_VERSION_MAJOR "${TBB_VERSION_MAJOR_STRING}"
+    )
+  FILE(STRINGS "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h" TBB_VERSION_MINOR_STRING
+    REGEX "#define.*TBB_VERSION_MINOR")
+  STRING(REGEX REPLACE "^.*TBB_VERSION_MINOR.*([0-9]+).*" "\\1"
+    TBB_VERSION_MINOR "${TBB_VERSION_MINOR_STRING}"
+    )
+  SET(TBB_VERSION
+    "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}"
+    )
+ENDIF()
+
+DEAL_II_PACKAGE_HANDLE(TBB
+  LIBRARIES REQUIRED ${_libraries}
+  INCLUDE_DIRS REQUIRED TBB_INCLUDE_DIR
+  USER_INCLUDE_DIRS REQUIRED TBB_INCLUDE_DIR
+  CLEAR TBB_DEBUG_LIBRARY TBB_LIBRARY TBB_INCLUDE_DIR
+  )
diff --git a/cmake/modules/FindTRILINOS.cmake b/cmake/modules/FindTRILINOS.cmake
new file mode 100644
index 0000000..0e10112
--- /dev/null
+++ b/cmake/modules/FindTRILINOS.cmake
@@ -0,0 +1,236 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the Trilinos library
+#
+# This module exports:
+#
+#   TRILINOS_DIR
+#   TRILINOS_INCLUDE_DIRS
+#   TRILINOS_LIBRARIES
+#   TRILINOS_VERSION
+#   TRILINOS_VERSION_MAJOR
+#   TRILINOS_VERSION_MINOR
+#   TRILINOS_VERSION_SUBMINOR
+#   TRILINOS_WITH_MANDATORY_CXX11
+#   TRILINOS_WITH_MPI
+#   TRILINOS_SUPPORTS_CPP11
+#   TRILINOS_HAS_C99_TR1_WORKAROUND
+#
+
+SET(TRILINOS_DIR "" CACHE PATH "An optional hint to a Trilinos installation")
+SET_IF_EMPTY(TRILINOS_DIR "$ENV{TRILINOS_DIR}")
+
+#
+# Include the trilinos package configuration:
+#
+FIND_PACKAGE(TRILINOS_CONFIG
+  CONFIG QUIET
+  NAMES Trilinos TRILINOS
+  HINTS
+    ${TRILINOS_DIR}/lib/cmake/Trilinos
+    ${TRILINOS_DIR}
+  PATH_SUFFIXES
+    lib64/cmake/Trilinos
+    lib/cmake/Trilinos
+    lib${LIB_SUFFIX}/cmake/Trilinos
+  NO_SYSTEM_ENVIRONMENT_PATH
+  )
+
+
+IF(DEFINED Trilinos_VERSION)
+  #
+  # Extract version numbers:
+  #
+  SET(TRILINOS_VERSION "${Trilinos_VERSION}")
+
+  STRING(REGEX REPLACE
+    "^([0-9]+).*$" "\\1"
+    TRILINOS_VERSION_MAJOR "${Trilinos_VERSION}")
+
+  STRING(REGEX REPLACE
+    "^[0-9]+\\.([0-9]+).*$" "\\1"
+    TRILINOS_VERSION_MINOR "${Trilinos_VERSION}")
+
+  # If there is no subminor number, 
+  # TRILINOS_VERSION_SUBMINOR is set to an empty string. 
+  # If that is the case, set the subminor number to zero
+  STRING(REGEX REPLACE
+    "^[0-9]+\\.[0-9]+\\.?(([0-9]+)?).*$" "\\1"
+    TRILINOS_VERSION_SUBMINOR "${Trilinos_VERSION}")
+  IF("${TRILINOS_VERSION_SUBMINOR}" STREQUAL "")
+    SET(TRILINOS_VERSION_SUBMINOR "0")
+  ENDIF()  
+ENDIF()
+
+#
+# Look for Epetra_config.h - we'll query it to determine MPI and 64bit
+# indices support:
+#
+DEAL_II_FIND_FILE(EPETRA_CONFIG_H Epetra_config.h
+  HINTS ${Trilinos_INCLUDE_DIRS}
+  NO_DEFAULT_PATH NO_CMAKE_ENVIRONMENT_PATH NO_CMAKE_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH NO_CMAKE_FIND_ROOT_PATH
+  )
+
+IF(EXISTS ${EPETRA_CONFIG_H})
+  #
+  # Determine whether Trilinos was configured with MPI and 64bit indices:
+  #
+  FILE(STRINGS "${EPETRA_CONFIG_H}" EPETRA_MPI_STRING
+    REGEX "#define HAVE_MPI")
+  IF("${EPETRA_MPI_STRING}" STREQUAL "")
+    SET(TRILINOS_WITH_MPI FALSE)
+  ELSE()
+    SET(TRILINOS_WITH_MPI TRUE)
+  ENDIF()
+  FILE(STRINGS "${EPETRA_CONFIG_H}" EPETRA_32BIT_STRING
+    REGEX "#define EPETRA_NO_32BIT_GLOBAL_INDICES")
+  IF("${EPETRA_64BIT_STRING}" STREQUAL "")
+    SET(TRILINOS_WITH_NO_32BITS_INDICES TRUE)
+  ELSE()
+    SET(TRILINOS_WITH_NO_32BITS_INDICES FALSE)
+  ENDIF()
+  FILE(STRINGS "${EPETRA_CONFIG_H}" EPETRA_64BIT_STRING
+    REGEX "#define EPETRA_NO_64BIT_GLOBAL_INDICES")
+  IF("${EPETRA_64BIT_STRING}" STREQUAL "")
+    SET(TRILINOS_WITH_NO_64BITS_INDICES TRUE)
+  ELSE()
+    SET(TRILINOS_WITH_NO_64BITS_INDICES FALSE)
+  ENDIF()
+ENDIF()
+
+#
+# Look for Sacado_config.h - we'll query it to determine C++11 support:
+#
+DEAL_II_FIND_FILE(SACADO_CONFIG_H Sacado_config.h
+  HINTS ${Trilinos_INCLUDE_DIRS}
+  NO_DEFAULT_PATH NO_CMAKE_ENVIRONMENT_PATH NO_CMAKE_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH NO_CMAKE_FIND_ROOT_PATH
+  )
+
+SET(TRILINOS_WITH_MANDATORY_CXX11 FALSE)
+IF(EXISTS ${SACADO_CONFIG_H})
+  #
+  # Determine whether Trilinos was configured with C++11 support and
+  # enabling C++11 in deal.II is mandatory (Trilinos 12.0.1 and later).
+  #
+  FILE(STRINGS "${SACADO_CONFIG_H}" SACADO_CXX11_STRING
+    REGEX "#define HAVE_SACADO_CXX11")
+  IF(NOT "${SACADO_CXX11_STRING}" STREQUAL "")
+    SET(TRILINOS_WITH_MANDATORY_CXX11 TRUE)
+  ENDIF()
+ENDIF()
+
+#
+# Some versions of Sacado_cmath.hpp do things that aren't compatible
+# with the -std=c++0x flag of GCC, see deal.II FAQ.
+# Test whether that is indeed the case:
+#
+
+DEAL_II_FIND_FILE(SACADO_CMATH_HPP Sacado_cmath.hpp
+  HINTS ${Trilinos_INCLUDE_DIRS}
+  NO_DEFAULT_PATH NO_CMAKE_ENVIRONMENT_PATH NO_CMAKE_PATH
+  NO_SYSTEM_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH NO_CMAKE_FIND_ROOT_PATH
+  )
+
+IF(EXISTS ${SACADO_CMATH_HPP})
+  LIST(APPEND CMAKE_REQUIRED_INCLUDES ${Trilinos_INCLUDE_DIRS})
+  PUSH_CMAKE_REQUIRED("${DEAL_II_CXX_VERSION_FLAG}")
+
+  CHECK_CXX_SOURCE_COMPILES(
+    "
+    #include <Sacado_cmath.hpp>
+    int main(){ return 0; }
+    "
+    TRILINOS_SUPPORTS_CPP11
+    )
+
+  #
+  # Try whether exporting HAS_C99_TR1_CMATH helps:
+  #
+  PUSH_CMAKE_REQUIRED("-DHAS_C99_TR1_CMATH")
+  CHECK_CXX_SOURCE_COMPILES(
+    "
+    #include <Sacado_cmath.hpp>
+    int main(){ return 0; }
+    "
+    TRILINOS_HAS_C99_TR1_WORKAROUND
+    )
+  RESET_CMAKE_REQUIRED()
+ENDIF()
+
+#
+# *Boy* Sanitize variables that are exported by TrilinosConfig.cmake...
+#
+# Especially deduplicate stuff...
+#
+REMOVE_DUPLICATES(Trilinos_LIBRARIES REVERSE)
+REMOVE_DUPLICATES(Trilinos_TPL_LIBRARIES REVERSE)
+
+REMOVE_DUPLICATES(Trilinos_INCLUDE_DIRS)
+STRING(REGEX REPLACE
+  "(lib64|lib)\\/cmake\\/Trilinos\\/\\.\\.\\/\\.\\.\\/\\.\\.\\/" ""
+  Trilinos_INCLUDE_DIRS "${Trilinos_INCLUDE_DIRS}"
+  )
+
+REMOVE_DUPLICATES(Trilinos_TPL_INCLUDE_DIRS)
+
+#
+# workaround: Do not pull in scotch include directory. It clashes with
+# our use of the metis headers...
+#
+FOREACH(_item ${Trilinos_TPL_INCLUDE_DIRS})
+  IF("${_item}" MATCHES "scotch$")
+    LIST(REMOVE_ITEM Trilinos_TPL_INCLUDE_DIRS ${_item})
+  ENDIF()
+ENDFOREACH()
+
+#
+# We'd like to have the full library names but the Trilinos package only
+# exports a list with short names...
+# So we check again for every lib and store the full path:
+#
+SET(_libraries "")
+FOREACH(_library ${Trilinos_LIBRARIES})
+  LIST(APPEND _libraries TRILINOS_LIBRARY_${_library})
+  DEAL_II_FIND_LIBRARY(TRILINOS_LIBRARY_${_library}
+    NAMES ${_library}
+    HINTS ${Trilinos_LIBRARY_DIRS}
+    NO_DEFAULT_PATH
+    NO_CMAKE_ENVIRONMENT_PATH
+    NO_CMAKE_PATH
+    NO_SYSTEM_ENVIRONMENT_PATH
+    NO_CMAKE_SYSTEM_PATH
+    NO_CMAKE_FIND_ROOT_PATH
+    )
+ENDFOREACH()
+
+
+DEAL_II_PACKAGE_HANDLE(TRILINOS
+  LIBRARIES
+    REQUIRED ${_libraries}
+    OPTIONAL Trilinos_TPL_LIBRARIES MPI_CXX_LIBRARIES
+  INCLUDE_DIRS
+    REQUIRED Trilinos_INCLUDE_DIRS
+    OPTIONAL Trilinos_TPL_INCLUDE_DIRS
+  USER_INCLUDE_DIRS
+    REQUIRED Trilinos_INCLUDE_DIRS
+    OPTIONAL Trilinos_TPL_INCLUDE_DIRS
+  CLEAR
+    TRILINOS_CONFIG_DIR EPETRA_CONFIG_H SACADO_CMATH_HPP ${_libraries}
+    TRILINOS_SUPPORTS_CPP11 TRILINOS_HAS_C99_TR1_WORKAROUND
+  )
diff --git a/cmake/modules/FindUMFPACK.cmake b/cmake/modules/FindUMFPACK.cmake
new file mode 100644
index 0000000..bd26c93
--- /dev/null
+++ b/cmake/modules/FindUMFPACK.cmake
@@ -0,0 +1,172 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the UMFPACK library
+#
+# This module exports
+#
+#   UMFPACK_LIBRARIES
+#   UMFPACK_INCLUDE_DIRS
+#   UMFPACK_LINKER_FLAGS
+#   UMFPACK_VERSION
+#   UMFPACK_VERSION_MAJOR
+#   UMFPACK_VERSION_MINOR
+#   UMFPACK_VERSION_SUBMINOR
+#
+
+SET(UMFPACK_DIR "" CACHE PATH "An optional hint to an UMFPACK directory")
+SET(SUITESPARSE_DIR "" CACHE PATH
+  "An optional hint to a SUITESPARSE directory"
+  )
+FOREACH(_comp SUITESPARSE SUITESPARSE_CONFIG UMFPACK AMD CHOLMOD COLAMD)
+  SET_IF_EMPTY(${_comp}_DIR "$ENV{${_comp}_DIR}")
+ENDFOREACH()
+
+#
+# Two macros to make life easier:
+#
+
+MACRO(FIND_UMFPACK_PATH _comp _file)
+  STRING(TOLOWER ${_comp} _comp_lowercase)
+  STRING(TOUPPER ${_comp} _comp_uppercase)
+  DEAL_II_FIND_PATH(${_comp}_INCLUDE_DIR ${_file}
+    HINTS
+      ${${_comp_uppercase}_DIR}
+      ${SUITESPARSE_DIR}/${_comp}
+      ${UMFPACK_DIR}/../${_comp}
+      ${UMFPACK_DIR}/${_comp}
+      ${UMFPACK_DIR}
+    PATH_SUFFIXES
+      ${_comp_lowercase} include/${_comp_lowercase} include Include ${_comp}/Include suitesparse
+    )
+ENDMACRO()
+
+MACRO(FIND_UMFPACK_LIBRARY _comp _name)
+  STRING(TOUPPER ${_comp} _comp_uppercase)
+  DEAL_II_FIND_LIBRARY(${_comp}_LIBRARY
+    NAMES ${_name} lib${_name}
+    HINTS
+      ${${_comp_uppercase}_DIR}
+      ${SUITESPARSE_DIR}/${_comp}
+      ${UMFPACK_DIR}/../${_comp}
+      ${UMFPACK_DIR}/${_comp}
+      ${UMFPACK_DIR}
+    PATH_SUFFIXES
+    lib${LIB_SUFFIX} lib64 lib Lib ${_comp}/Lib
+    )
+ENDMACRO()
+
+
+#
+# Search for include directories:
+#
+FIND_UMFPACK_PATH(UMFPACK umfpack.h)
+FIND_UMFPACK_PATH(AMD amd.h)
+
+IF(EXISTS ${UMFPACK_INCLUDE_DIR}/umfpack.h)
+  #
+  # Well, recent versions of UMFPACK include SuiteSparse_config.h, if so,
+  # ensure that we'll find these headers as well.
+  #
+  FILE(STRINGS "${UMFPACK_INCLUDE_DIR}/umfpack.h" UMFPACK_SUITESPARSE_STRING
+    REGEX "#include \"SuiteSparse_config.h\"")
+  IF(NOT "${UMFPACK_SUITESPARSE_STRING}" STREQUAL "")
+    FIND_UMFPACK_PATH(SuiteSparse_config SuiteSparse_config.h)
+  ENDIF()
+
+  FILE(STRINGS "${UMFPACK_INCLUDE_DIR}/umfpack.h" UMFPACK_VERSION_MAJOR_STRING
+    REGEX "#define.*UMFPACK_MAIN_VERSION")
+  STRING(REGEX REPLACE "^.*UMFPACK_MAIN_VERSION.*([0-9]+).*" "\\1"
+    UMFPACK_VERSION_MAJOR "${UMFPACK_VERSION_MAJOR_STRING}"
+    )
+  FILE(STRINGS "${UMFPACK_INCLUDE_DIR}/umfpack.h" UMFPACK_VERSION_MINOR_STRING
+    REGEX "#define.*UMFPACK_SUB_VERSION")
+  STRING(REGEX REPLACE "^.*UMFPACK_SUB_VERSION.*([0-9]+).*" "\\1"
+    UMFPACK_VERSION_MINOR "${UMFPACK_VERSION_MINOR_STRING}"
+    )
+  FILE(STRINGS "${UMFPACK_INCLUDE_DIR}/umfpack.h" UMFPACK_VERSION_SUBMINOR_STRING
+    REGEX "#define.*UMFPACK_SUBSUB_VERSION")
+  STRING(REGEX REPLACE "^.*UMFPACK_SUBSUB_VERSION.*([0-9]+).*" "\\1"
+    UMFPACK_VERSION_SUBMINOR "${UMFPACK_VERSION_SUBMINOR_STRING}"
+    )
+  SET(UMFPACK_VERSION
+    "${UMFPACK_VERSION_MAJOR}.${UMFPACK_VERSION_MINOR}.${UMFPACK_VERSION_SUBMINOR}"
+    )
+ENDIF()
+
+#
+# Link against everything we can find to avoid underlinkage:
+#
+FIND_UMFPACK_LIBRARY(UMFPACK umfpack)
+FIND_UMFPACK_LIBRARY(AMD amd)
+FIND_UMFPACK_LIBRARY(CHOLMOD cholmod)
+FIND_UMFPACK_LIBRARY(COLAMD colamd)
+FIND_UMFPACK_LIBRARY(CCOLAMD ccolamd)
+FIND_UMFPACK_LIBRARY(CAMD camd)
+FIND_UMFPACK_LIBRARY(SuiteSparse_config suitesparseconfig)
+
+#
+# Test whether libsuitesparseconfig.xxx can be used for shared library
+# linkage. If not, exclude it from the command line.
+#
+LIST(APPEND CMAKE_REQUIRED_LIBRARIES
+  "-shared"
+  ${SuiteSparse_config_LIBRARY}
+  )
+CHECK_CXX_SOURCE_COMPILES("extern int SuiteSparse_version (int[3]);
+  void foo(int bar[3]) { SuiteSparse_version(bar);}"
+  LAPACK_SUITESPARSECONFIG_WITH_PIC
+  )
+RESET_CMAKE_REQUIRED()
+
+IF(LAPACK_SUITESPARSECONFIG_WITH_PIC OR NOT BUILD_SHARED_LIBS)
+  SET(_suitesparse_config SuiteSparse_config_LIBRARY)
+ENDIF()
+
+#
+# Add rt to the link interface as well (for whatever reason,
+# libsuitesparse.so depends on clock_gettime but the shared
+# lib does not record its dependence on librt.so as evidenced
+# by ldd :-( ):
+#
+FIND_SYSTEM_LIBRARY(rt_LIBRARY NAMES rt)
+MARK_AS_ADVANCED(rt_LIBRARY)
+
+DEAL_II_PACKAGE_HANDLE(UMFPACK
+  LIBRARIES
+    REQUIRED UMFPACK_LIBRARY
+    OPTIONAL CHOLMOD_LIBRARY CCOLAMD_LIBRARY COLAMD_LIBRARY CAMD_LIBRARY ${_suitesparse_config}
+    REQUIRED AMD_LIBRARY
+    OPTIONAL METIS_LIBRARIES LAPACK_LIBRARIES rt_LIBRARY
+  INCLUDE_DIRS
+    REQUIRED UMFPACK_INCLUDE_DIR AMD_INCLUDE_DIR
+    OPTIONAL SuiteSparse_config_INCLUDE_DIR
+  USER_INCLUDE_DIRS
+    REQUIRED UMFPACK_INCLUDE_DIR AMD_INCLUDE_DIR
+    OPTIONAL SuiteSparse_config_INCLUDE_DIR
+  LINKER_FLAGS
+    OPTIONAL LAPACK_LINKER_FLAGS
+  CLEAR
+    UMFPACK_LIBRARY CHOLMOD_LIBRARY CCOLAMD_LIBRARY COLAMD_LIBRARY
+    CAMD_LIBRARY SuiteSparse_config_LIBRARY AMD_LIBRARY UMFPACK_INCLUDE_DIR
+    AMD_INCLUDE_DIR SuiteSparse_config_INCLUDE_DIR
+  )
+
+IF(UMFPACK_FOUND)
+  MARK_AS_ADVANCED(SUITESPARSE_DIR)
+ELSE()
+  MARK_AS_ADVANCED(CLEAR SUITESPARSE_DIR)
+ENDIF()
diff --git a/cmake/modules/FindZLIB.cmake b/cmake/modules/FindZLIB.cmake
new file mode 100644
index 0000000..17da5f4
--- /dev/null
+++ b/cmake/modules/FindZLIB.cmake
@@ -0,0 +1,43 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Try to find the ZLIB library
+#
+# This module exports
+#
+#   ZLIB_LIBRARIES
+#   ZLIB_INCLUDE_DIRS
+#   ZLIB_VERSION
+#
+
+SET(ZLIB_DIR "" CACHE PATH "An optional hint to a ZLIB installation")
+SET_IF_EMPTY(ZLIB_DIR "$ENV{ZLIB_DIR}")
+
+IF(NOT "${ZLIB_DIR}" STREQUAL "")
+  SET(ZLIB_ROOT ${ZLIB_DIR})
+ENDIF()
+# temporarily disable ${CMAKE_SOURCE_DIR}/cmake/modules for module lookup
+LIST(REMOVE_ITEM CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+FIND_PACKAGE(ZLIB)
+LIST(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules/)
+
+SET(ZLIB_VERSION ${ZLIB_VERSION_STRING})
+
+DEAL_II_PACKAGE_HANDLE(ZLIB
+  LIBRARIES REQUIRED ZLIB_LIBRARY
+  INCLUDE_DIRS REQUIRED ZLIB_INCLUDE_DIR
+  CLEAR ZLIB_INCLUDE_DIR ZLIB_LIBRARY
+  )
diff --git a/cmake/scripts/CMakeLists.txt b/cmake/scripts/CMakeLists.txt
new file mode 100644
index 0000000..5db06b7
--- /dev/null
+++ b/cmake/scripts/CMakeLists.txt
@@ -0,0 +1,52 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+ADD_EXECUTABLE(expand_instantiations_exe expand_instantiations.cc)
+
+#
+# We run into a bug when compiling expand_instantiations with
+# -fuse-ld=gold -pthreads, see https://github.com/dealii/dealii/issues/1798
+# Work around this by stripping -fuse-ld=gold out of the build flags
+# for the script. This does little harm because linking this one file
+# is so exceedingly cheap that there is no speed difference between
+# using gold or the old BFD ld linker.
+#
+# If we are on a system where -fuse-ld=gold simply isn't part of the
+# linker flag, filtering this one command out of the list of flags
+# does not harm either.
+#
+STRING(REPLACE "-fuse-ld=gold" "" _expand_instantiations_link_flags "${DEAL_II_LINKER_FLAGS}" )
+
+SET_TARGET_PROPERTIES(expand_instantiations_exe PROPERTIES
+  LINK_FLAGS "${_expand_instantiations_link_flags}"
+  LINKER_LANGUAGE "CXX"
+  COMPILE_DEFINITIONS "${DEAL_II_DEFINITIONS}"
+  COMPILE_FLAGS "${DEAL_II_CXX_FLAGS}"
+  OUTPUT_NAME expand_instantiations
+  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${DEAL_II_EXECUTABLE_RELDIR}
+  )
+
+SET(_scripts
+  ${CMAKE_CURRENT_SOURCE_DIR}/normalize.pl
+  ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cmake
+  ${CMAKE_CURRENT_SOURCE_DIR}/run_test.sh
+  )
+FILE(COPY ${_scripts}
+  DESTINATION ${CMAKE_BINARY_DIR}/${DEAL_II_SHARE_RELDIR}/scripts
+  )
+INSTALL(FILES ${_scripts}
+  DESTINATION ${DEAL_II_SHARE_RELDIR}/scripts
+  COMPONENT library
+  )
diff --git a/cmake/scripts/expand_instantiations.cc b/cmake/scripts/expand_instantiations.cc
new file mode 100644
index 0000000..d63669d
--- /dev/null
+++ b/cmake/scripts/expand_instantiations.cc
@@ -0,0 +1,532 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+// This is the program that we use to generate explicit instantiations for a
+// variety of template arguments. It takes two kinds of input files. The first
+// is given as arguments on the command line and contains entries of the
+// following form:
+// --------------------
+// REAL_SCALARS    := { double; float; long double }
+// COMPLEX_SCALARS := { std::complex<double>;
+//                      std::complex<float>;
+//                      std::complex<long double> }
+// VECTORS := { Vector<double>; Vector<float>; Vector<long double> }
+// --------------------
+//
+// The input file of this form is typically located in the common/ directory
+// and is generated by ./configure to contain the list of vectors etc that
+// make sense for the current configuration. For example, the list of VECTORS
+// is going to contain PETSc vectors if so configured.
+//
+// The second input is read from the command line and consists of a sequence
+// of statements of the following form:
+// --------------------
+// for (u,v:VECTORS; z:SCALARS) { f(u, z, const v &); }
+// --------------------
+// Here, everything between {...} will be copied as many times as there are
+// combinations of arguments u,v in the list of substitutions given by
+// VECTORS. For each copy, the arguments u,v will be replaced by one of these
+// combinations.
+
+// Author: Wolfgang Bangerth, 2007
+
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <list>
+#include <cstdlib>
+#include <string>
+
+// a map from the keys in the expansion lists to the list itself. For
+// instance, the example above will lead to the entry
+//      expansion_lists[REAL_SCALARS] = (double, float, long double)
+// in this map, among others
+std::map<std::string, std::list<std::string> >  expansion_lists;
+
+
+
+/* ======================== auxiliary functions ================= */
+
+
+// replace all occurrences of 'pattern' by 'substitute' in 'in', and
+// return the result
+std::string
+replace_all (const std::string &in,
+             const std::string &pattern,
+             const std::string &substitute)
+{
+  std::string x = in;
+  while (x.find(pattern) != std::string::npos)
+    x.replace (x.find(pattern),
+               pattern.size(),
+               substitute);
+  return x;
+}
+
+
+// extract from the start of #in the part of the string that ends with one
+// of the characters in #delim_list. The extracted part is deleted from #in
+// and returned. We skip characters in delim_list if they are preceded by a
+// backslash
+std::string
+get_substring_with_delim (std::string       &in,
+                          const std::string &delim_list)
+{
+  std::string x;
+  while (in.size() != 0)
+    {
+      // stop copying to the result if the current character is a
+      // delimiter, but only if the previous character was not a backslash
+      if ((delim_list.find (in[0]) != std::string::npos)
+          &&
+          !((x.size() > 0)
+            &&
+            (x[x.size()-1] == '\\')))
+        break;
+
+      x += in[0];
+      in.erase (0, 1);
+    }
+
+  return x;
+}
+
+
+// delete all whitespace at the beginning of the given argument
+void
+skip_space (std::string &in)
+{
+  while ((in.size() != 0)
+         &&
+         ((in[0] == ' ') || (in[0] == '\t') || (in[0] == '\n')))
+    in.erase (0, 1);
+}
+
+
+std::string remove_comments (std::string line)
+{
+  const std::string::size_type double_slash_comment = line.find ("//");
+  if (double_slash_comment != std::string::npos)
+    line.erase (double_slash_comment, std::string::npos);
+
+  const std::string::size_type slash_star_comment_begin = line.find ("/*");
+  if (slash_star_comment_begin != std::string::npos)
+    {
+      const std::string::size_type slash_star_comment_end = line.find ("*/");
+      if (slash_star_comment_end == std::string::npos)
+        {
+          std::cerr << "The program can currently only handle /* block */"
+                    << "comments that start and end within the same line."
+                    << std::endl;
+          std::exit (1);
+        }
+      line.erase (slash_star_comment_begin,
+                  slash_star_comment_end - slash_star_comment_begin + 2);
+    }
+
+  return line;
+}
+
+
+// read the whole file specified by the stream given as argument into a string
+// for simpler parsing, and return it
+std::string read_whole_file (std::istream &in)
+{
+  std::string whole_file;
+  while (in)
+    {
+      std::string line;
+      getline (in, line);
+
+      whole_file += remove_comments (line);
+      whole_file += '\n';
+    }
+  // substitute tabs by spaces, multiple spaces by single ones
+  for (unsigned int i=0; i<whole_file.size(); ++i)
+    if (whole_file[i] == '\t')
+      whole_file[i] = ' ';
+  while (whole_file.find("  ") != std::string::npos)
+    whole_file.replace (whole_file.find("  "), 2, " ");
+
+  return whole_file;
+}
+
+
+
+// split a given string assumed to consist of a list of substrings
+// delimited by a particular character into its components
+std::list<std::string>
+split_string_list (const std::string &s,
+                   const char         delimiter)
+{
+  std::string tmp = s;
+  std::list<std::string> split_list;
+
+  // split the input list
+  while (tmp.length() != 0)
+    {
+      std::string name;
+      name = tmp;
+
+      if (name.find(delimiter) != std::string::npos)
+        {
+          name.erase (name.find(delimiter), std::string::npos);
+          tmp.erase (0, tmp.find(delimiter)+1);
+        }
+      else
+        tmp = "";
+
+      skip_space (name);
+
+      while ((name.size() != 0) && (name[name.length()-1] == ' '))
+        name.erase (name.length()-1, 1);
+
+      split_list.push_back (name);
+    }
+
+  return split_list;
+}
+
+
+
+// return the given list but without empty entries
+std::list<std::string>
+delete_empty_entries (const std::list<std::string> &list)
+{
+  std::list<std::string> return_list;
+  for (std::list<std::string>::const_iterator i = list.begin();
+       i != list.end(); ++i)
+    if (*i != "")
+      return_list.push_back (*i);
+
+  return return_list;
+}
+
+
+
+// determine whether a given substring at position #pos and length #length
+// in the string #text is a real token, i.e. not just part of another word
+bool is_real_token (const std::string &text,
+                    const std::string::size_type pos,
+                    const std::string::size_type length)
+{
+  static const std::string token_chars ("abcdefghijklmnopqrstuvwxyz"
+                                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                        "0123456789"
+                                        "_");
+  if ((pos != 0) && (token_chars.find (text[pos-1]) != std::string::npos))
+    return false;
+
+  if ((pos+length < text.size()) &&
+      (token_chars.find (text[pos+length]) != std::string::npos))
+    return false;
+
+  return true;
+}
+
+
+// substitute all occurrences of #token in #text by #substitute. because a
+// replacement token could be a templated class like std::complex<double>
+// and because the token to the substituted may be a template argument
+// itself, we surround the substitution by a space which shouldn't matter
+// in C++
+std::string substitute_tokens (const std::string &text,
+                               const std::string &token,
+                               const std::string &substitute)
+{
+  std::string x_text = text;
+  std::string::size_type pos = 0;
+  while ((pos = x_text.find(token, pos)) != std::string::npos)
+    {
+      if (is_real_token (x_text, pos, token.size()))
+        {
+          x_text.replace (pos, token.size(),
+                          std::string(" ")+substitute+std::string(" "));
+          pos += substitute.size()+2;
+        }
+      else
+        ++pos;
+    }
+
+  return x_text;
+}
+
+
+
+/* ======================== the main functions ================= */
+
+
+// read and parse the expansion lists like
+//   REAL_SCALARS    := { double; float; long double }
+// as specified at the top of the file and store them in the global
+// expansion_lists variable
+void read_expansion_lists (const std::string &filename)
+{
+  std::ifstream in (filename.c_str());
+
+  if (! in)
+    {
+      std::cerr << "Instantiation list file can not be read!"
+                << std::endl;
+      std::exit (1);
+    }
+
+  // read the entire file into a string for simpler processing. replace
+  // end-of-line characters by spaces
+  std::string whole_file = read_whole_file (in);
+
+  // now process entries of the form
+  //   NAME := { class1; class2; ...}.
+  while (whole_file.size() != 0)
+    {
+      const std::string
+      name = get_substring_with_delim (whole_file, " :");
+
+      skip_space (whole_file);
+      if (whole_file.find (":=") != 0)
+        {
+          std::cerr << "Invalid entry <" << name << '>' << std::endl;
+          std::exit (1);
+        }
+      whole_file.erase (0, 2);
+      skip_space (whole_file);
+      if (whole_file.find ("{") != 0)
+        {
+          std::cerr << "Invalid entry <" << name << '>' << std::endl;
+          std::exit (1);
+        }
+      whole_file.erase (0, 1);
+      skip_space (whole_file);
+
+      std::string
+      expansion = get_substring_with_delim (whole_file, "}");
+
+      if (whole_file.find ("}") != 0)
+        {
+          std::cerr << "Invalid entry <" << name << '>' << std::endl;
+          std::exit (1);
+        }
+      whole_file.erase (0, 1);
+      skip_space (whole_file);
+
+      // assign but remove empty entries; this may happen if an expansion
+      // list ends in a semicolon (then we get an empty entry at the end),
+      // or if there are multiple semicolons after each other (this may
+      // happen if, for example, we have "Vector<double>; TRILINOS_VECTOR;"
+      // and if TRILINOS_VECTOR is an empty expansion after running
+      // ./configure)
+      expansion_lists[name]
+        = delete_empty_entries (split_string_list (expansion, ';'));
+    }
+}
+
+
+
+// produce all combinations of substitutions of the tokens given in the
+// #substitutions list in #text and output it to std::cout
+void substitute (const std::string &text,
+                 const std::list<std::pair<std::string, std::string> > &substitutions)
+{
+  // do things recursively: if the list of substitutions has a single
+  // entry, then process all of them. otherwise, process the first in the
+  // list and call the function recursively with the rest of the
+  // substitutions
+  if (substitutions.size() > 1)
+    {
+      // do the first substitution, then call function recursively
+      const std::string name    = substitutions.front().first,
+                        pattern = substitutions.front().second;
+
+      const std::list<std::pair<std::string, std::string> >
+      rest_of_substitutions (++substitutions.begin(),
+                             substitutions.end());
+
+      for (std::list<std::string>::const_iterator
+           expansion = expansion_lists[pattern].begin();
+           expansion != expansion_lists[pattern].end();
+           ++expansion)
+        {
+          std::string new_text
+            = substitute_tokens (text, name, *expansion);
+
+          substitute (new_text, rest_of_substitutions);
+        }
+    }
+  else if (substitutions.size() == 1)
+    {
+      // do the substitutions
+      const std::string name    = substitutions.front().first,
+                        pattern = substitutions.front().second;
+
+      for (std::list<std::string>::const_iterator
+           expansion = expansion_lists[pattern].begin();
+           expansion != expansion_lists[pattern].end();
+           ++expansion)
+        {
+          // surround each block in the for loop with an if-def hack
+          // that allows us to split instantiation files into several
+          // chunks to be used in different .cc files (to reduce
+          // compiler memory usage).
+          // Just define SPLIT_INSTANTIATIONS_COUNT to a positive number (number of sections)
+          // to split the definitions into and SPLIT_INSTANTIATIONS_INDEX as a number
+          // between 0 and SPLIT_INSTANTIATIONS_COUNT-1 to get the instantiations of that
+          // particular chunk.
+          static unsigned int counter = 0;
+          std::cout << "#if (SPLIT_INSTANTIATIONS_CHECK("
+                    << counter++
+                    << "))" << std::endl;
+          std::cout << substitute_tokens (text, name, *expansion)
+                    << std::endl;
+          std::cout << "#endif" << std::endl;
+        }
+
+    }
+  else
+    {
+      std::cout << text
+                << std::endl;
+    }
+
+}
+
+
+
+// process the list of instantiations given in the form
+//   for (u,v:VECTORS; z:SCALARS) { f(u, z, const v &); }
+void process_instantiations ()
+{
+  std::string whole_file = read_whole_file (std::cin);
+
+  // process entries of the form
+  //   for (X:Y; A:B) { INST }
+  while (whole_file.size() != 0)
+    {
+      skip_space (whole_file);
+      if (whole_file.find ("for") != 0)
+        {
+          std::cerr << "Invalid instantiation list: missing 'for'" << std::endl;
+          std::exit (1);
+        }
+      whole_file.erase (0, 3);
+      skip_space (whole_file);
+      if (whole_file.find ("(") != 0)
+        {
+          std::cerr << "Invalid instantiation list: missing '('" << std::endl;
+          std::exit (1);
+        }
+      whole_file.erase (0, 1);
+      skip_space (whole_file);
+
+      const std::list<std::string>
+      substitutions_list
+        = split_string_list (get_substring_with_delim (whole_file,
+                                                       ")"),
+                             ';');
+      if (whole_file.find (")") != 0)
+        {
+          std::cerr << "Invalid instantiation list: missing ')'" << std::endl;
+          std::exit (1);
+        }
+      whole_file.erase (0, 1);
+      skip_space (whole_file);
+
+      // process the header
+      std::list<std::pair<std::string, std::string> >
+      substitutions;
+
+      for (std::list<std::string>::const_iterator
+           s = substitutions_list.begin();
+           s != substitutions_list.end(); ++s)
+        {
+          const std::list<std::string>
+          names_and_type = split_string_list (*s, ':');
+          if (names_and_type.size() != 2)
+            {
+              std::cerr << "Invalid instantiation header" << std::endl;
+              std::exit (1);
+            }
+
+          const std::list<std::string>
+          names = split_string_list (names_and_type.front(), ',');
+
+          for (std::list<std::string>::const_iterator
+               x = names.begin(); x != names.end(); ++x)
+            substitutions.push_back (std::make_pair (*x,
+                                                     names_and_type.back()));
+        }
+
+      // now read the part in {...}
+      skip_space (whole_file);
+      if (whole_file.find ("{") != 0)
+        {
+          std::cerr << "Invalid substitution text" << std::endl;
+          std::exit (1);
+        }
+      whole_file.erase (0, 1);
+      skip_space (whole_file);
+      const std::string text_to_substitute
+        = get_substring_with_delim (whole_file, "}");
+      whole_file.erase (0,1);
+      skip_space (whole_file);
+
+      // now produce the substitutions. first replace all occurrences of
+      // "\{" by "{"
+      substitute (replace_all(replace_all(text_to_substitute, "\\{", "{"),
+                              "\\}", "}"),
+                  substitutions);
+    }
+}
+
+
+
+int main (int argc, char **argv)
+{
+  if (argc < 2)
+    {
+      std::cerr << "Usage: " << std::endl
+                << "  expand_instantiations class_list_files < in_file > out_file"
+                << std::endl;
+      std::exit (1);
+    }
+
+  for (int i=1; i<argc; ++i)
+    read_expansion_lists (argv[i]);
+
+  // write header:
+  std::cout << "// This file is automatically generated from corresponding .inst.in, do not edit."
+            << std::endl << std::endl;
+
+  // Make sure SPLIT_INSTANTIATIONS_* is working correctly if the user doesn't
+  // use it. The defaults will not split the instantiations. This logic is
+  // somewhat tricky to get right for two reasons: 1. icc 14 will not allow an
+  // expressition like "#if !defined(B) || (A % B == C)" 2. we have .cc files
+  // where more than one .inst is included and splitting is only required in
+  // one of them. So we need to handle the case where _COUNT is undefined but
+  // _INDEX is defined, which might be needed later.
+  std::cout << "#ifdef SPLIT_INSTANTIATIONS_COUNT" << std::endl
+            << "  #define SPLIT_INSTANTIATIONS_CHECK(C) (((C) % SPLIT_INSTANTIATIONS_COUNT) == SPLIT_INSTANTIATIONS_INDEX)" << std::endl
+            << "#else" << std::endl
+            << "  #define SPLIT_INSTANTIATIONS_CHECK(C) (1)" << std::endl
+            << "#endif" << std::endl << std::endl;
+
+  process_instantiations ();
+
+  // undefine the macro to avoid issues when more than one .inst file is
+  // included in a single .cc
+  std::cout << std::endl
+            << "#undef SPLIT_INSTANTIATIONS_CHECK" << std::endl;
+}
diff --git a/cmake/scripts/normalize.pl b/cmake/scripts/normalize.pl
new file mode 100644
index 0000000..f2c33eb
--- /dev/null
+++ b/cmake/scripts/normalize.pl
@@ -0,0 +1,103 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2001 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Remove insignificant volatile data from output files of tests
+#
+# Data affected:
+#  JobID line (containing date)
+#  line number of exceptions
+#  start and final residual in iterations
+#  small doubles
+#
+
+# Convert windows to unix line endings. This is necessary to be able to run
+# the testsuite on windows (using cygwin's diff/perl)
+s/\r$//;
+
+# Remove JobID
+
+s/JobId.*//;
+
+# Remove Input File Name:
+
+s/# Input file name:.*//;
+
+# Several date and time strings
+
+s/%%Creation Date:.*//;
+s/\"created\".*//;
+s/# Time =.*//;
+s/# Date =.*//;
+s/^\s+Time =.*//;
+s/^\s+Date =.*//;
+s/Time tag:.*//g;
+s/by the deal.II library on.*//;
+
+# Exceptions
+
+s/line <\d+> of file <.*\//file </;
+
+# See if we have a -0.0... (not followed by any other digit) and replace it
+# by the same number without the negative sign
+s/-0\.(0+)(?!\d)/0.\1/g;
+
+# remove deal.II debug output
+s/^DEAL.*::_.*\n//g;
+
+# Normalize version string by replacing (for example) 'written by
+# deal.II 8.1.0-pre' by written by 'written by deal.II x.y.z'
+s/written by deal\.II \d+\.\d+\.\d+(-pre|-rc\d*|)/written by deal.II x.y.z/;
+
+
+# different p4est versions output different text in VTU output. For
+# example, we get these kinds of differences:
+# ***************
+# *** 6,14 ****
+#       <PPoints>
+#         <PDataArray type="Float32" Name="Position" NumberOfComponents="3" form# at="ascii"/>
+#       </PPoints>
+# !     <PCellData Scalars="mpirank,treeid">
+# !       <PDataArray type="Int32" Name="mpirank" format="ascii"/>
+#         <PDataArray type="Int32" Name="treeid" format="ascii"/>
+#       </PCellData>
+#       <PPointData>
+#       </PPointData>
+# --- 6,15 ----
+#       <PPoints>
+#         <PDataArray type="Float32" Name="Position" NumberOfComponents="3" form# at="ascii"/>
+#       </PPoints>
+# !     <PCellData Scalars="treeid,level,mpirank">
+#         <PDataArray type="Int32" Name="treeid" format="ascii"/>
+# +       <PDataArray type="UInt8" Name="level" format="ascii"/>
+# +       <PDataArray type="Int32" Name="mpirank" format="ascii"/>
+#       </PCellData>
+#       <PPointData>
+#       </PPointData>
+#
+# To deal with these issues, we simply delete these lines
+s/.*<PCellData Scalars.*\n//g;
+s/.*<PDataArray type.*(mpirank|level).*\n//g;
+
+#
+# Different boost versions output output the opening bracket for json
+# output on a new line. Thus always transform
+#     "label": {
+#
+# into
+#     "label":
+#     {
+#
+s/^(\s*)(".*":) \{$/\1\2\n\1\{/;
diff --git a/cmake/scripts/run_test.cmake b/cmake/scripts/run_test.cmake
new file mode 100644
index 0000000..3c378ca
--- /dev/null
+++ b/cmake/scripts/run_test.cmake
@@ -0,0 +1,143 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# This is a small worker to run a single test in the testsuite
+#
+# The following variables must be set:
+#
+#   TRGT - the name of the target that should be invoked
+#   TEST - the test name (used for status messages)
+#   BINARY_DIR - the build directory that contains the target
+#
+# Optional options:
+#   EXPECT - the stage this test must reach to be considered successful
+#            (return value 0)
+#            Possible values are CONFIGURE, BUILD, RUN, DIFF, PASSED
+#
+#   GUARD_FILE - used to detect a forced interruption of this script: On
+#                startup the backed up file ${GUARD_FILE}_bck is put back
+#                in place as ${GUARD_FILE} and on exit ${GUARD_FILE} is
+#                backed up as ${GUARD_FILE}_bck. If on startup a stale
+#                ${GUARD_FILE} is found, it is deleted.
+#
+
+IF(NOT "${GUARD_FILE}" STREQUAL "" AND EXISTS ${GUARD_FILE})
+  #
+  # Guard file still exists, so this script must have been interrupted.
+  # Remove guard file to force a complete rerun:
+  #
+  EXECUTE_PROCESS(COMMAND rm -f ${GUARD_FILE})
+ELSEIF(NOT "${GUARD_FILE}" STREQUAL "" AND EXISTS ${GUARD_FILE}_bck)
+  #
+  # A backed up guard file exists. Put it back in place:
+  #
+  EXECUTE_PROCESS(COMMAND mv ${GUARD_FILE}_bck ${GUARD_FILE})
+ENDIF()
+
+
+IF("${EXPECT}" STREQUAL "")
+  SET(EXPECT "PASSED")
+ENDIF()
+
+EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND}
+  --build . --target ${TRGT}
+  WORKING_DIRECTORY ${BINARY_DIR}
+  RESULT_VARIABLE _result_code # ignored ;-)
+  OUTPUT_VARIABLE _output
+  )
+
+#
+# Determine the last successful stage of the test:
+# (Possible values are CONFIGURE, BUILD, RUN, DIFF, PASSED)
+#
+
+STRING(REGEX MATCH "${TEST}: CONFIGURE failed\\." _configure_regex "${_output}")
+STRING(REGEX MATCH "${TEST}: BUILD failed\\." _build_regex "${_output}")
+STRING(REGEX MATCH "${TEST}: RUN failed\\." _run_regex "${_output}")
+STRING(REGEX MATCH "${TEST}: DIFF failed\\." _diff_regex "${_output}")
+STRING(REGEX MATCH "${TEST}: PASSED\\." _passed_regex "${_output}")
+
+IF(NOT "${_passed_regex}" STREQUAL "")
+  SET(_stage PASSED)
+ELSEIF(NOT "${_diff_regex}" STREQUAL "")
+  SET(_stage DIFF)
+ELSEIF(NOT "${_run_regex}" STREQUAL "")
+  SET(_stage RUN)
+ELSEIF(NOT "${_configure_regex}" STREQUAL "")
+  SET(_stage CONFIGURE)
+ELSE() # unconditionally, because "BUILD failed." doesn't have to be printed...
+  SET(_stage BUILD)
+ENDIF()
+
+#
+# Print out the test result:
+#
+
+MESSAGE("Test ${TEST}: ${_stage}")
+
+MESSAGE("===============================   OUTPUT BEGIN  ===============================")
+
+IF("${_stage}" STREQUAL "PASSED")
+  STRING(REGEX REPLACE ".*\\/" "" _test ${TEST})
+  #
+  # MPI tests have a special runtime directory so rename:
+  # test.mpirun=X.BUILD -> test.BUILD/mpirun=X
+  #
+  STRING(REGEX REPLACE "\\.(mpirun=[0-9]+)(\\..*)" "\\2/\\1" _test ${_test})
+  #
+  # Also output the diff file if we guessed the location correctly. This is
+  # solely for cosmetic reasons: The diff file is either empty (if
+  # comparison against the main comparison file was successful) or contains
+  # a string explaining which comparison file variant succeeded.
+  #
+  SET(_diff "")
+  IF(EXISTS ${BINARY_DIR}/${_test}/diff)
+    FILE(READ ${BINARY_DIR}/${_test}/diff _diff)
+  ENDIF()
+  MESSAGE("${_diff}${TEST}: PASSED.")
+
+ELSE()
+
+  IF( "${_stage}" STREQUAL "BUILD" AND "${_build_regex}" STREQUAL "" )
+    # Some special output in case the BUILD stage failed in a regression test:
+    MESSAGE("${TEST}: BUILD failed. Output:")
+  ENDIF()
+  MESSAGE("${_output}")
+  MESSAGE("")
+  MESSAGE("${TEST}: ******    ${_stage} failed    *******")
+  MESSAGE("")
+ENDIF()
+
+MESSAGE("===============================    OUTPUT END   ===============================")
+
+#
+# Back up guard file:
+#
+
+IF(NOT "${GUARD_FILE}" STREQUAL "" AND EXISTS ${GUARD_FILE})
+  EXECUTE_PROCESS(COMMAND mv ${GUARD_FILE} ${GUARD_FILE}_bck)
+ENDIF()
+
+#
+# Bail out:
+#
+
+IF(NOT "${_stage}" STREQUAL "${EXPECT}")
+  MESSAGE("Expected stage ${EXPECT} - aborting")
+  MESSAGE(FATAL_ERROR "*** abort")
+ELSEIF(NOT "${_stage}" STREQUAL "PASSED")
+  MESSAGE("Expected stage ${EXPECT} - test considered successful.")
+ENDIF()
diff --git a/cmake/scripts/run_test.sh b/cmake/scripts/run_test.sh
new file mode 100644
index 0000000..897e4ca
--- /dev/null
+++ b/cmake/scripts/run_test.sh
@@ -0,0 +1,151 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Helper script used in testsuite targets to run ("run") and compare
+# ("diff") individual tests.
+#
+# Usage:
+#   run_test.sh run TEST_FULL RUN_COMMAND
+#
+#   run_test.sh diff TEST_FULL DIFF_EXECUTABLE DIFF_EXECUTABLE COMPARISON_FILE
+#
+
+set -u
+
+STAGE="$1"
+TEST_FULL="$2"
+RUN_COMMAND="$3"
+NUMDIFF_EXECUTABLE="$4"
+DIFF_EXECUTABLE="$5"
+COMPARISON_FILE="$6"
+
+# Ensure uniform sorting for pathname expansion
+export LC_ALL=C
+
+#
+# Add a top level target to run and compare the test:
+#
+
+run(){
+  rm -f failing_output
+  rm -f output
+  rm -f stdout
+
+  ${RUN_COMMAND} > stdout 2>&1
+  RETURN_VALUE=$?
+
+  [ -f output ] || mv stdout output
+
+  if [ $RETURN_VALUE -ne 0 ]; then
+    mv output failing_output
+    echo "${TEST_FULL}: BUILD successful."
+    echo "${TEST_FULL}: RUN failed. ------ Return code $RETURN_VALUE"
+    echo "${TEST_FULL}: RUN failed. ------ Result: `pwd`/failing_output"
+    echo "${TEST_FULL}: RUN failed. ------ Partial output:"
+    cat failing_output
+    if [ -f stdout ]; then
+      echo ""
+      echo "${TEST_FULL}: RUN failed. ------ Additional output on stdout/stderr:"
+      echo ""
+      cat stdout
+    fi
+    exit 1
+  fi
+}
+
+diff() {
+  rm -f failing_diff*
+  rm -f diff*
+  touch diff
+
+  test_successful=false
+
+  #
+  # Pick up main comparison file and all variants. A valid variant name is
+  # of the form [...].output.[STRING]
+  #
+  for file in "${COMPARISON_FILE}"*; do
+    # determine variant name (empty string for main comparison file):
+    variant=${file#*.output}
+
+    #
+    # Run diff or numdiff (if available) to determine whether files are the
+    # same. Create a diff file "diff${variant}" for each variant file that
+    # is found (including the main comparison file).
+    #
+    case ${NUMDIFF_EXECUTABLE} in
+      *numdiff*)
+        ${NUMDIFF_EXECUTABLE} -a 1e-6 -r 1e-8 -s ' \t\n:<>=,;' \
+                              "${file}" output > diff${variant}
+        ;;
+      *)
+        "${DIFF_EXECUTABLE}" "${file}" output > diff${variant}
+        ;;
+    esac
+
+    if [ $? -eq 0 ]; then
+      #
+      # Ensure that only a single diff file with no contents remains
+      # (numdiff has the bad habit of being very verbose...):
+      #
+      rm -f diff*
+      touch diff
+
+      if [ -n "${variant}" ]; then
+        #
+        # In case of a successful comparison against a variant, store the
+        # fact that we compared against a variant in the diff file.
+        #
+        echo "${TEST_FULL}: DIFF successful. - Variant: ${file}" > diff
+      fi
+
+      test_successful=true
+      break
+    fi
+  done
+
+  #
+  # If none of the diffs succeeded, use the diff against the main comparison
+  # file. Output the first few lines of the output of numdiff, followed by
+  # the results of regular diff since the latter is just more readable.
+  #
+  if [ $test_successful = false ] ; then
+    for file in diff*; do
+      mv "$file" failing_"$file"
+    done
+    echo "${TEST_FULL}: BUILD successful."
+    echo "${TEST_FULL}: RUN successful."
+    echo "${TEST_FULL}: DIFF failed. ------ Source: ${COMPARISON_FILE}"
+    echo "${TEST_FULL}: DIFF failed. ------ Result: `pwd`/output"
+    echo "Check `pwd`/output ${COMPARISON_FILE}"
+    echo "${TEST_FULL}: DIFF failed. ------ Diff:   `pwd`/failing_diff"
+    echo "${TEST_FULL}: DIFF failed. ------ First 8 lines of numdiff/diff output:"
+    cat failing_diff | head -n 8
+    echo "${TEST_FULL}: DIFF failed. ------ First 50 lines diff output:"
+    "${DIFF_EXECUTABLE}" -c "${COMPARISON_FILE}" output | head -n 50
+    exit 1
+  fi
+  exit 0
+}
+
+case $STAGE in
+  run)
+    run;;
+  diff)
+    diff;;
+  *)
+    exit 1;;
+esac
diff --git a/cmake/setup_cached_variables.cmake b/cmake/setup_cached_variables.cmake
new file mode 100644
index 0000000..8dff4ca
--- /dev/null
+++ b/cmake/setup_cached_variables.cmake
@@ -0,0 +1,395 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Set up cached variables (prior to the PROJECT(deal.II) call)
+#
+# This file sets up the following cached Options:
+#
+# General configuration options:
+#
+#     DEAL_II_ALLOW_AUTODETECTION
+#     DEAL_II_ALLOW_BUNDLED
+#     DEAL_II_COMPONENT_DOCUMENTATION
+#     DEAL_II_COMPONENT_EXAMPLES
+#     DEAL_II_COMPONENT_PARAMETER_GUI
+#     DEAL_II_COMPONENT_PACKAGE
+#     DEAL_II_FORCE_AUTODETECTION
+#
+# Options regarding compilation and linking:
+#
+#     CMAKE_BUILD_TYPE
+#     DEAL_II_ALLOW_PLATFORM_INTROSPECTION
+#     DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS
+#     DEAL_II_SETUP_COVERAGE
+#     BUILD_SHARED_LIBS
+#     DEAL_II_PREFER_STATIC_LIBS
+#     DEAL_II_STATIC_EXECUTABLE
+#     CMAKE_INSTALL_RPATH_USE_LINK_PATH
+#     DEAL_II_CXX_FLAGS                    *)
+#     DEAL_II_CXX_FLAGS_DEBUG
+#     DEAL_II_CXX_FLAGS_RELEASE
+#     DEAL_II_LINKER_FLAGS                 *)
+#     DEAL_II_LINKER_FLAGS_DEBUG
+#     DEAL_II_LINKER_FLAGS_RELEASE
+#
+# Components and miscellaneous options:
+#
+#     DEAL_II_WITH_64BIT_INDICES
+#     DEAL_II_DOXYGEN_USE_MATHJAX
+#     DEAL_II_CPACK_EXTERNAL_LIBS_TREE
+#
+#
+# *)  May also be set via environment variable (CXXFLAGS, LDFLAGS)
+#     (a nonempty cached variable has precedence and will not be
+#     overwritten by environment)
+#
+
+
+########################################################################
+#                                                                      #
+#                    General configuration options:                    #
+#                                                                      #
+########################################################################
+
+If(DEAL_II_HAVE_BUNDLED_DIRECTORY)
+  OPTION(DEAL_II_ALLOW_BUNDLED
+    "Allow the use of libraries bundled with the source tarball. (DEAL_II_FORCE_BUNDLED* will overwrite this option.)"
+    ON
+    )
+ENDIF()
+
+If(DEAL_II_HAVE_DOC_DIRECTORY)
+  OPTION(DEAL_II_COMPONENT_DOCUMENTATION
+    "Enable configuration, build and installation of the documentation. This adds a COMPONENT \"documentation\" to the build system."
+    OFF
+    )
+ENDIF()
+
+OPTION(DEAL_II_COMPONENT_EXAMPLES
+  "Enable configuration and installation of the example steps. This adds a COMPONENT \"examples\" to the build system."
+  ON
+  )
+
+OPTION(DEAL_II_COMPONENT_PARAMETER_GUI
+  "Build and install the parameter_gui. This adds a COMPONENT \"parameter_gui\" to the build system."
+  OFF
+  )
+
+OPTION(DEAL_II_ALLOW_AUTODETECTION
+  "Allow to automatically set up features by setting all undefined DEAL_II_WITH_* variables to ON or OFF"
+  ON
+  )
+
+OPTION(DEAL_II_FORCE_AUTODETECTION
+  "Force feature autodetection by undefining all DEAL_II_WITH_* variables prior to configure"
+  OFF
+  )
+
+OPTION(DEAL_II_COMPONENT_PACKAGE
+  "Generates additional targets for packaging deal.II"
+  OFF
+  )
+
+
+########################################################################
+#                                                                      #
+#                       Compilation and linking:                       #
+#                                                                      #
+########################################################################
+
+#
+# Setup CMAKE_BUILD_TYPE:
+#
+
+SET(CMAKE_BUILD_TYPE
+  "DebugRelease"
+  CACHE STRING
+  "Choose the type of build, options are: Debug, Release and DebugRelease."
+  )
+
+# This is cruel, I know. But it is better to only have a known number of
+# options for CMAKE_BUILD_TYPE...
+IF( NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND
+    NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug" AND
+    NOT "${CMAKE_BUILD_TYPE}" STREQUAL "DebugRelease" )
+  MESSAGE(FATAL_ERROR
+    "CMAKE_BUILD_TYPE does neither match Release, Debug, nor DebugRelease!"
+    )
+ENDIF()
+
+#
+# Configuration behaviour:
+#
+
+OPTION(DEAL_II_ALLOW_PLATFORM_INTROSPECTION
+  "Allow platform introspection for CPU command sets, SSE and AVX"
+  ON
+  )
+MARK_AS_ADVANCED(DEAL_II_ALLOW_PLATFORM_INTROSPECTION)
+
+OPTION(DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS
+  "Configure sensible default CFLAGS and CXXFLAGS depending on platform, compiler and build target."
+  ON
+  )
+MARK_AS_ADVANCED(DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS)
+
+OPTION(DEAL_II_SETUP_COVERAGE
+  "Setup debug compiler flags to provide additional test coverage information. Currently only gprof is supported."
+  OFF
+  )
+MARK_AS_ADVANCED(DEAL_II_SETUP_COVERAGE)
+
+SET(BUILD_SHARED_LIBS "ON" CACHE BOOL
+  "Build a shared library"
+  )
+
+OPTION(DEAL_II_PREFER_STATIC_LIBS
+  "Prefer static libraries over dynamic libraries when searching for features and corresponding link interface"
+  OFF
+  )
+MARK_AS_ADVANCED(DEAL_II_PREFER_STATIC_LIBS)
+
+OPTION(DEAL_II_STATIC_EXECUTABLE
+  "Provide a link interface that is suitable for static linkage of executables. Enabling this option forces BUILD_SHARED_LIBS=OFF and DEAL_II_PREFER_STATIC_LIBS=ON"
+  OFF
+  )
+MARK_AS_ADVANCED(DEAL_II_STATIC_EXECUTABLE)
+
+IF(DEAL_II_STATIC_EXECUTABLE)
+  SET(BUILD_SHARED_LIBS "OFF" CACHE BOOL
+    "Build a shared library"
+    FORCE
+    )
+  SET(DEAL_II_PREFER_STATIC_LIBS "ON" CACHE BOOL
+    "Prefer static libraries over dynamic libraries when searching for features and corresponding link interface"
+    FORCE
+    )
+ENDIF()
+
+SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH "ON" CACHE BOOL
+  "Set the rpath of the library to the external link paths on installation"
+  )
+MARK_AS_ADVANCED(CMAKE_INSTALL_RPATH_USE_LINK_PATH)
+
+
+#
+# Translate CMake specific variables to deal.II naming:
+#
+
+FOREACH(_flag CXX_FLAGS CXX_FLAGS_RELEASE CXX_FLAGS_DEBUG)
+  IF(NOT "${CMAKE_${_flag}}" STREQUAL "")
+    MESSAGE(STATUS
+      "Prepending \${CMAKE_${_flag}} to \${DEAL_II_${_flag}}"
+      )
+    SET(DEAL_II_${_flag} "${CMAKE_${_flag}} ${DEAL_II_${_flag}}")
+  ENDIF()
+ENDFOREACH()
+
+FOREACH(_flag LINKER_FLAGS LINKER_FLAGS_DEBUG LINKER_FLAGS_RELEASE)
+  IF(NOT "${CMAKE_SHARED_${_flag}}" STREQUAL "")
+    MESSAGE(STATUS
+      "Prepending \${CMAKE_SHARED_${_flag}} to \${DEAL_II_${_flag}}"
+      )
+    SET(DEAL_II_${_flag} "${CMAKE_${_flag}} ${DEAL_II_${_flag}}")
+  ENDIF()
+ENDFOREACH()
+
+#
+# Hide all unused CMake variables:
+#
+
+SET(DEAL_II_REMOVED_FLAGS
+  CMAKE_CXX_FLAGS
+  CMAKE_CXX_FLAGS_RELEASE
+  CMAKE_CXX_FLAGS_DEBUG
+  CMAKE_CXX_FLAGS_MINSIZEREL
+  CMAKE_CXX_FLAGS_RELWITHDEBINFO
+  CMAKE_C_FLAGS
+  CMAKE_C_FLAGS_RELEASE
+  CMAKE_C_FLAGS_DEBUG
+  CMAKE_C_FLAGS_MINSIZEREL
+  CMAKE_C_FLAGS_RELWITHDEBINFO
+  CMAKE_Fortran_FLAGS
+  CMAKE_Fortran_FLAGS_RELEASE
+  CMAKE_Fortran_FLAGS_DEBUG
+  CMAKE_Fortran_FLAGS_MINSIZEREL
+  CMAKE_Fortran_FLAGS_RELWITHDEBINFO
+  CMAKE_SHARED_LINKER_FLAGS
+  CMAKE_SHARED_LINKER_FLAGS_DEBUG
+  CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL
+  CMAKE_SHARED_LINKER_FLAGS_RELEASE
+  CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
+  )
+FOREACH(_flag ${DEAL_II_REMOVED_FLAGS})
+  # Go away...
+  SET(${_flag} ${${_flag}} CACHE INTERNAL "" FORCE)
+  # Also set it to an empty string for the configuration run so that it
+  # does not confuse the build system (to unset is not an option - it is
+  # cached...)
+  SET(${_flag} "")
+ENDFOREACH()
+
+#
+# Promote our configuration variables to cache:
+#
+
+SET(DEAL_II_USED_FLAGS
+  DEAL_II_CXX_FLAGS
+  DEAL_II_CXX_FLAGS_DEBUG
+  DEAL_II_CXX_FLAGS_RELEASE
+  DEAL_II_LINKER_FLAGS
+  DEAL_II_LINKER_FLAGS_DEBUG
+  DEAL_II_LINKER_FLAGS_RELEASE
+  )
+FOREACH(_flag ${DEAL_II_USED_FLAGS})
+  #
+  # Promote to cache:
+  #
+  SET(${_flag} "${${_flag}}" CACHE STRING
+    "The user supplied cache variable will be appended _at the end_ of the configuration step to the auto generated ${_flag} variable"
+    )
+  MARK_AS_ADVANCED(${_flag})
+
+  #
+  # The order of compiler and linker flags is important. In order to
+  # provide an override mechanism we have to save the initial (cached)
+  # variable at this point and clear it.
+  # ${flags}_SAVED will be appended to ${flags} again in
+  # setup_finalize.cmake (called at the end of the main CMakeLists.txt
+  # file).
+  #
+  SET(${_flag}_SAVED ${${_flag}})
+  SET(${_flag} "")
+ENDFOREACH()
+
+FOREACH(_variable
+  DEAL_II_DEFINITIONS
+  DEAL_II_DEFINITIONS_DEBUG
+  DEAL_II_DEFINITIONS_RELEASE
+  )
+  #
+  # Promote to cache:
+  #
+  SET(${_variable} ${${_variable}} CACHE STRING
+    "Additional, user supplied compile definitions"
+    )
+  MARK_AS_ADVANCED(${_variable})
+ENDFOREACH()
+
+
+#
+# Finally, read in CXXFLAGS and LDFLAGS from environment and prepend them
+# to the saved variables:
+#
+# Also strip leading and trailing whitespace from linker flags to make
+# old cmake versions happy
+#
+SET(DEAL_II_CXX_FLAGS_SAVED "$ENV{CXXFLAGS} ${DEAL_II_CXX_FLAGS_SAVED}")
+STRING(STRIP "${DEAL_II_CXX_FLAGS_SAVED}" DEAL_II_CXX_FLAGS_SAVED)
+SET(DEAL_II_LINKER_FLAGS_SAVED "$ENV{LDFLAGS} ${DEAL_II_LINKER_FLAGS_SAVED}")
+STRING(STRIP "${DEAL_II_LINKER_FLAGS_SAVED}" DEAL_II_LINKER_FLAGS_SAVED)
+UNSET(ENV{CXXFLAGS})
+UNSET(ENV{LDFLAGS})
+
+
+########################################################################
+#                                                                      #
+#                Components and miscellaneous setup:                   #
+#                                                                      #
+########################################################################
+
+OPTION(DEAL_II_WITH_64BIT_INDICES
+  "If set to ON, then use 64-bit data types to represent global degree of freedom indices. The default is to OFF. You only want to set this to ON if you will solve problems with more than 2^31 (approximately 2 billion) unknowns. If set to ON, you also need to ensure that both Trilinos and/or PETSc support 64-bit indices."
+  OFF
+  )
+
+OPTION(DEAL_II_DOXYGEN_USE_MATHJAX
+  "If set to ON, doxygen documentation is generated using mathjax"
+  OFF
+  )
+MARK_AS_ADVANCED(DEAL_II_DOXYGEN_USE_MATHJAX)
+
+SET(DEAL_II_CPACK_EXTERNAL_LIBS_TREE "" CACHE PATH
+    "Path to tree of external libraries that will be installed in bundle package."
+  )
+MARK_AS_ADVANCED(DEAL_II_CPACK_EXTERNAL_LIBS_TREE)
+
+
+########################################################################
+#                                                                      #
+#                               Finalize:                              #
+#                                                                      #
+########################################################################
+
+#
+# We do not support installation into the binary directory any more ("too
+# much pain, not enough profit"):
+#
+
+IF("${CMAKE_BINARY_DIR}" STREQUAL "${CMAKE_INSTALL_PREFIX}")
+  MESSAGE(FATAL_ERROR "
+Error CMAKE_INSTALL_PREFIX is equal to CMAKE_BINARY_DIR.
+It is not possible to install into the build directory. Please set
+CMAKE_INSTALL_PREFIX to a designated install directory different than
+CMAKE_BINARY_DIR.
+(Please note that you can use deal.II directly out of a build directory
+without the need to install it, if this is what you tried to do.)
+"
+    )
+ENDIF()
+
+#
+# Compatibility renaming:
+#
+
+IF(DEFINED DEAL_II_HAVE_CXX11_FLAG AND NOT DEAL_II_HAVE_CXX11_FLAG)
+  SET(DEAL_II_WITH_CXX11 FALSE CACHE BOOL "" FORCE)
+ENDIF()
+
+#
+# Miscellaneous renaming:
+#
+
+GET_CMAKE_PROPERTY(_res VARIABLES)
+FOREACH(_var ${_res})
+  #
+  # Rename (ALLOW|WITH|FORCE|COMPONENT)_* by DEAL_II_(ALLOW|WITH|FORCE|COMPONENT)_*
+  #
+  FOREACH(_match ALLOW_ WITH_ FORCE_ COMPONENT_)
+    IF(_var MATCHES "^${_match}")
+      SET(DEAL_II_${_var} ${${_var}} CACHE BOOL "" FORCE)
+      UNSET(${_var} CACHE)
+    ENDIF()
+  ENDFOREACH()
+
+  #
+  # Same for components:
+  #
+  IF(_var MATCHES "^(DOCUMENTATION|EXAMPLES|PACKAGE|PARAMETER_GUI)")
+    SET(DEAL_II_COMPONENT_${_var} ${${_var}} CACHE BOOL "" FORCE)
+    UNSET(${_var} CACHE)
+  ENDIF()
+
+  #
+  # If DEAL_II_FORCE_AUTODETECTION is set undefine all feature toggles
+  # DEAL_II_WITH_* prior to configure:
+  #
+  IF(DEAL_II_FORCE_AUTODETECTION AND _var MATCHES "^DEAL_II_WITH_"
+     # Exclude FEATURES that do not represent external libraries:
+     AND NOT _var MATCHES "^DEAL_II_WITH_64BIT_INDICES" )
+    UNSET(${_var} CACHE)
+  ENDIF()
+ENDFOREACH()
diff --git a/cmake/setup_compiler_flags.cmake b/cmake/setup_compiler_flags.cmake
new file mode 100644
index 0000000..3dc72be
--- /dev/null
+++ b/cmake/setup_compiler_flags.cmake
@@ -0,0 +1,153 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Setup default compiler flags: This file sets up sensible default compiler
+# flags for the various platforms, compilers and build targets supported by
+# the deal.II library.
+#
+#
+# ####################
+# #     FAT NOTE:    #
+# ####################
+#
+# All configuration in setup_compiler_flags.cmake and
+# setup_compiler_flags_<compiler>.cmake shall ONLY modify:
+#
+#   DEAL_II_CXX_FLAGS
+#   DEAL_II_CXX_FLAGS_DEBUG
+#   DEAL_II_CXX_FLAGS_RELEASE
+#   DEAL_II_LINKER_FLAGS
+#   DEAL_II_LINKER_FLAGS_DEBUG
+#   DEAL_II_LINKER_FLAGS_RELEASE
+#   DEAL_II_DEFINITIONS
+#   DEAL_II_DEFINITIONS_DEBUG
+#   DEAL_II_DEFINITIONS_RELEASE
+#   DEAL_II_USER_DEFINITIONS
+#   DEAL_II_USER_DEFINITIONS_DEBUG
+#   DEAL_II_USER_DEFINITIONS_RELEASE
+#
+# All modifications shall be guarded with the ENABLE_IF_SUPPORTED
+# or ENABLE_IF_LINKS macro, e.g.
+#
+#   ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-fpic")
+#   ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-Wl,--as-needed")
+#
+# Checks for compiler features (such as C++11 support) and compiler
+# specific bugs that
+#   - usually set up further configuration (such as preprocessor
+#     definitions)
+#   - disable a specific flag for a specific compiler version.
+#
+# belong the corresponding file:
+#
+#   ./cmake/checks/check_01_compiler_features.cmake
+#   ./cmake/checks/check_01_cpu_features.cmake
+#   ./cmake/checks/check_01_cxx_features.cmake
+#   ./cmake/checks/check_01_system_features.cmake
+#   ./cmake/checks/check_02_compiler_bugs.cmake
+#
+
+
+########################################################################
+#                                                                      #
+#                            Sanity checks:                            #
+#                                                                      #
+########################################################################
+
+#
+# Check the user provided CXX flags:
+#
+
+IF(NOT "${DEAL_II_CXX_FLAGS_SAVED}" STREQUAL "${CACHED_DEAL_II_CXX_FLAGS_SAVED}"
+   OR NOT "${DEAL_II_LINKER_FLAGS_SAVED}" STREQUAL "${CACHED_DEAL_II_LINKER_FLAGS_SAVED}")
+  # Rerun this test if cxx flags changed:
+  UNSET(DEAL_II_HAVE_USABLE_CXX_FLAGS CACHE)
+ELSE()
+  SET(DEAL_II_HAVE_USABLE_CXX_FLAGS TRUE CACHE INTERNAL "")
+ENDIF()
+SET(CACHED_DEAL_II_CXX_FLAGS_SAVED "${DEAL_II_CXX_FLAGS_SAVED}" CACHE INTERNAL "" FORCE)
+SET(CACHED_DEAL_II_LINKER_FLAGS_SAVED "${DEAL_II_LINKER_FLAGS_SAVED}" CACHE INTERNAL "" FORCE)
+
+# Initialize all CMAKE_REQUIRED_* variables a this point:
+RESET_CMAKE_REQUIRED()
+
+CHECK_CXX_SOURCE_COMPILES(
+  "int main(){ return 0; }"
+  DEAL_II_HAVE_USABLE_CXX_FLAGS)
+
+IF(NOT DEAL_II_HAVE_USABLE_CXX_FLAGS)
+  UNSET(DEAL_II_HAVE_USABLE_CXX_FLAGS CACHE)
+  MESSAGE(FATAL_ERROR "
+Configuration error: Cannot compile with the user supplied flags:
+CXX flags: ${DEAL_II_CXX_FLAGS_SAVED}
+LD flags: ${DEAL_II_LINKER_FLAGS_SAVED}
+Please check the CMake variables DEAL_II_CXX_FLAGS, DEAL_II_LINKER_FLAGS
+and the environment variables CXXFLAGS, LDFLAGS.\n\n"
+    )
+ENDIF()
+
+
+########################################################################
+#                                                                      #
+#                           Compiler setup:                            #
+#                                                                      #
+########################################################################
+
+IF(DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS)
+  #
+  # *Hooray* We are allowed to set compiler flags :-]
+  #
+
+  #
+  # General setup for GCC and compilers sufficiently close to GCC:
+  #
+  IF( CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR
+      CMAKE_CXX_COMPILER_ID MATCHES "Clang" )
+    VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_compiler_flags_gnu.cmake)
+    SET(DEAL_II_KNOWN_COMPILER TRUE)
+  ENDIF()
+
+  #
+  # Setup for ICC compiler (version >= 10):
+  #
+  IF(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+    VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_compiler_flags_intel.cmake)
+    SET(DEAL_II_KNOWN_COMPILER TRUE)
+  ENDIF()
+
+  #
+  # Setup for MSVC compiler (version >= 2012):
+  #
+   IF(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+    VERBOSE_INCLUDE(${CMAKE_SOURCE_DIR}/cmake/setup_compiler_flags_msvc.cmake)
+    SET(DEAL_II_KNOWN_COMPILER TRUE)
+  ENDIF()
+
+  IF(NOT DEAL_II_KNOWN_COMPILER)
+    MESSAGE(FATAL_ERROR "\n"
+      "Unknown compiler!\n"
+      "If you're serious about it, set DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS=OFF "
+      "and set the relevant compiler options by hand.\n\n"
+      )
+  ENDIF()
+
+ELSE(DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS)
+
+  MESSAGE(STATUS
+    "Skipped setup of default compiler flags "
+    "(DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS=OFF)"
+    )
+ENDIF(DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS)
diff --git a/cmake/setup_compiler_flags_gnu.cmake b/cmake/setup_compiler_flags_gnu.cmake
new file mode 100644
index 0000000..4cccd53
--- /dev/null
+++ b/cmake/setup_compiler_flags_gnu.cmake
@@ -0,0 +1,189 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# General setup for GCC and compilers sufficiently close to GCC
+#
+# Please read the fat note in setup_compiler_flags.cmake prior to
+# editing this file.
+#
+
+IF( CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND
+    CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.4" )
+  MESSAGE(WARNING "\n"
+    "You're using an old version of the GNU Compiler Collection (gcc/g++)!\n"
+    "It is strongly recommended to use at least version 3.4.\n"
+    )
+ENDIF()
+
+
+########################
+#                      #
+#    General setup:    #
+#                      #
+########################
+
+#
+# Set -pedantic if the compiler supports it.
+#
+IF(NOT (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND
+        CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.4"))
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-pedantic")
+ENDIF()
+
+#
+# Set the pic flag.
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-fpic")
+
+#
+# Check whether the -as-needed flag is available. If so set it to link
+# the deal.II library with it.
+#
+ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-Wl,--as-needed")
+
+#
+# Setup various warnings:
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wall")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wextra")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wpointer-arith")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wwrite-strings")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wsynth")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wsign-compare")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wswitch")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Woverloaded-virtual")
+
+#
+# Disable Wlong-long that will trigger a lot of warnings when compiling
+# with disabled C++11 support:
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-long-long")
+
+#
+# Disable deprecation warnings
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-deprecated-declarations")
+
+#
+# Disable warning generated by Debian version of openmpi
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-literal-suffix")
+
+IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  #
+  # Silence Clang warnings about unused compiler parameters (works around a
+  # regression in the clang driver frontend of certain versions):
+  #
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Qunused-arguments")
+
+  #
+  # Clang verbosely warns about not supporting all our friend declarations
+  # (and consequently removing access control altogether)
+  #
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-unsupported-friend")
+
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-unused-parameter")
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-unused-variable")
+
+  # without c++11 enabled, clang produces a ton of warnings in boost:
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-c99-extensions")
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-variadic-macros")
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-c++11-extensions")
+
+  #
+  # Clang versions prior to 3.6 emit a lot of false positives wrt
+  # "-Wunused-function". Also suppress warnings for Xcode older than 6.3
+  # (which is equivalent to clang < 3.6).
+  #
+  # FIXME: I wait for the day with a clang version "4.0"... and I will
+  # curse the person that thought it is a _great_ idea to come up with
+  # independent version numbers for clang on Mac...
+  #
+  IF( CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.6" OR
+      ( NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.0" AND
+        CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.3") )
+    ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-Wno-unused-function")
+  ENDIF()
+ENDIF()
+
+
+IF(DEAL_II_STATIC_EXECUTABLE)
+  #
+  # To produce a static executable, we have to statically link libstdc++
+  # and gcc's support libraries and glibc:
+  #
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS "-static")
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS "-pthread")
+ENDIF()
+
+
+#############################
+#                           #
+#    For Release target:    #
+#                           #
+#############################
+
+IF (CMAKE_BUILD_TYPE MATCHES "Release")
+  #
+  # General optimization flags:
+  #
+  ADD_FLAGS(DEAL_II_CXX_FLAGS_RELEASE "-O2")
+
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_RELEASE "-funroll-loops")
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_RELEASE "-funroll-all-loops")
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_RELEASE "-fstrict-aliasing")
+ENDIF()
+
+
+###########################
+#                         #
+#    For Debug target:    #
+#                         #
+###########################
+
+IF (CMAKE_BUILD_TYPE MATCHES "Debug")
+
+  LIST(APPEND DEAL_II_DEFINITIONS_DEBUG "DEBUG")
+  LIST(APPEND DEAL_II_USER_DEFINITIONS_DEBUG "DEBUG")
+
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-Og")
+  #
+  # If -Og is not available, fall back to -O0:
+  #
+  IF(NOT DEAL_II_HAVE_FLAG_Og)
+    ADD_FLAGS(DEAL_II_CXX_FLAGS_DEBUG "-O0")
+  ENDIF()
+
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-ggdb")
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS_DEBUG "-ggdb")
+  #
+  # If -ggdb is not available, fall back to -g:
+  #
+  IF(NOT DEAL_II_HAVE_FLAG_ggdb)
+    ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-g")
+    ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS_DEBUG "-g")
+  ENDIF()
+
+  IF(DEAL_II_SETUP_COVERAGE)
+    #
+    # Enable test coverage
+    #
+    ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-fno-elide-constructors")
+    ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-ftest-coverage -fprofile-arcs")
+    ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS_DEBUG "-ftest-coverage -fprofile-arcs")
+  ENDIF()
+
+ENDIF()
diff --git a/cmake/setup_compiler_flags_intel.cmake b/cmake/setup_compiler_flags_intel.cmake
new file mode 100644
index 0000000..2232a64
--- /dev/null
+++ b/cmake/setup_compiler_flags_intel.cmake
@@ -0,0 +1,197 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# General setup for the Intel C++ Compiler
+#
+# Please read the fat note in setup_compiler_flags.cmake prior to
+# editing this file.
+#
+
+IF(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "10.0" )
+  MESSAGE(WARNING "\n"
+    "You're using an old version of the Intel C++ Compiler (icc/icpc)!\n"
+    "It is strongly recommended to use at least version 10.\n"
+    )
+ENDIF()
+
+
+########################
+#                      #
+#    General setup:    #
+#                      #
+########################
+
+#
+# Set the pic flag.
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-fpic")
+
+#
+# Check whether the -as-needed flag is available. If so set it to link
+# the deal.II library with it.
+#
+ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-Wl,--as-needed")
+
+#
+# Set ansi mode:
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-ansi")
+
+#
+# Enable verbose warnings:
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-w2")
+
+#
+# Disable some warnings that lead to a lot of false positives:
+#
+#   -w68    integer conversion resulted in a change of sign
+#           (triggers a lot in functionparser)
+#   -w175   subscript out of range
+#   -w135   class template "dealii::FE_Q_Base<POLY, dim, spacedim>"
+#           has no member "Implementation"
+#           (the compiler is objectively wrong since the warning
+#            triggers also on code of the form
+#            class FE_Q_Base {
+#              struct Implementation; // forward declaration
+#              friend struct Implementation;
+#            };)
+#   -w177   declared but not referenced
+#   -w191   type qualifier is meaningless on cast type
+#           Warnings from this warn about code like this:
+#              static_cast<T const * const>(p)
+#           There are many places in boost that do this kind of stuff
+#   -w193   zero used for undefined preprocessing identifier "..."
+#           This happens when using undefined preprocessor names in
+#           conditions such as
+#             #if (abc && def)
+#           instead of
+#             #if (defined(abc) && defined(def))
+#           The standard says that in such cases, the undefined symbol
+#           is assumed to be zero. The warning is in principle
+#           useful, but the pattern appears exceedingly often in the TBB
+#   -w279   controlling expression is constant
+#   -w327   NULL reference is not allowed
+#           (the compiler is correct here in that statements like
+#            *static_cast<int*>(0) are not allowed to initialize
+#            references; however, it's the only useful way to do
+#            so if you need an invalid value for a reference)
+#   -w383   value copied to temporary, reference to temporary used
+#   -w981   operands are evaluated in unspecified order
+#   -w1418  external function definition with no prior declaration
+#           (happens in boost)
+#   -w1478  deprecation warning
+#   -w1572  floating-point equality and inequality comparisons are unreliable
+#   -w2259  non-pointer conversion from "double" to "float" may
+#           lose significant bits
+#   -w21    type qualifiers are meaningless in this declaration
+#   -w2536  type qualifiers are meaningless here
+#   -w15531 A portion of SIMD loop is serialized
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd68")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd135")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd175")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd177")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd191")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd193")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd279")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd327")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd383")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd981")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd1418")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd1478")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd1572")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd2259")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd21")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd2536")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd15531")
+
+
+#
+# Also disable the following warnings that we frequently
+# trigger writing dimension independent code:
+#   -w111 statement is unreachable
+#         Happens in code that is guarded by a check on 'dim'
+#   -w128 loop is not reachable from preceding
+#         Same as above
+#   -w185 dynamic initialization in unreachable code
+#         When initializing a local variable in code
+#         that is executed only for one specific dimension
+#   -w280 selector expression is constant
+#         When writing 'switch(dim)'
+#
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd111")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd128")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd185")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-wd280")
+
+
+IF(DEAL_II_STATIC_EXECUTABLE)
+  #
+  # To produce a static executable, we have to statically link intel's
+  # support libraries:
+  #
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS "-static")
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS "-static-intel")
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS "-static-gcc")
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS "-pthread")
+ELSE()
+  #
+  # Explicitly link intel support libraries dynamically:
+  #
+  ENABLE_IF_SUPPORTED(DEAL_II_LINKER_FLAGS "-shared-intel")
+ENDIF()
+
+
+#############################
+#                           #
+#    For Release target:    #
+#                           #
+#############################
+
+IF (CMAKE_BUILD_TYPE MATCHES "Release")
+  #
+  # General optimization flags:
+  #
+
+  ADD_FLAGS(DEAL_II_CXX_FLAGS_RELEASE "-O2")
+
+  # equivalent to -fno-strict-aliasing:
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_RELEASE "-no-ansi-alias")
+
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_RELEASE "-ip")
+
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_RELEASE "-funroll-loops")
+ENDIF()
+
+
+###########################
+#                         #
+#    For Debug target:    #
+#                         #
+###########################
+
+IF (CMAKE_BUILD_TYPE MATCHES "Debug")
+  LIST(APPEND DEAL_II_DEFINITIONS_DEBUG "DEBUG")
+  LIST(APPEND DEAL_II_USER_DEFINITIONS_DEBUG "DEBUG")
+
+  ADD_FLAGS(DEAL_II_CXX_FLAGS_DEBUG "-O0")
+
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-g")
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-gdwarf-2")
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "-grecord-gcc-switches")
+ENDIF()
+
diff --git a/cmake/setup_compiler_flags_msvc.cmake b/cmake/setup_compiler_flags_msvc.cmake
new file mode 100644
index 0000000..a533697
--- /dev/null
+++ b/cmake/setup_compiler_flags_msvc.cmake
@@ -0,0 +1,103 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# General setup for the Microsoft Visual Studio C++ Compiler (Windows)
+#
+# Please read the fat note in setup_compiler_flags.cmake prior to
+# editing this file.
+#
+
+
+########################
+#                      #
+#    General setup:    #
+#                      #
+########################
+
+# enable exception handling:
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/EHsc")
+
+
+# Globally disable some legacy min and max macros that cause problems:
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/NOMINMAX")
+LIST(APPEND DEAL_II_DEFINITIONS "NOMINMAX")
+LIST(APPEND DEAL_II_USER_DEFINITIONS "NOMINMAX")
+
+# fix "fatal error C1128: number of sections exceeded object file format limit"
+# happening in debug mode with visual studio 2015
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/bigobj")
+
+ADD_FLAGS(DEAL_II_CXX_FLAGS "/W3")
+
+#
+# Selectively disable a bunch of warnings:
+#
+# 4068 - unknown pragmas
+# 4244 - implied downcasting from double to float
+# 4267 - implied downcasting from size_t to unsigned int
+# 4996 - unsafe functions, such as strcat and sprintf
+# 4355 - 'this' : used in base member initializer list
+# 4661 - no suitable definition provided for explicit template instantiation request
+# 4800 - forcing value to bool 'true' or 'false' (performance warning)
+# 4146 - unary minus operator applied to unsigned type, result still unsigned
+# 4667 - no function template defined that matches forced instantiation
+# 4520 - multiple default constructors specified
+# 4700 - uninitialized local variable
+# 4789 - destination of memory copy is too small
+# 4808 - case 'value' is not a valid value for switch condition of type 'bool
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4068")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4244")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4267")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4996")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4355")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4661")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4800")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4146")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4667")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4520")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4700")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4789")
+ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "/wd4808")
+
+
+#############################
+#                           #
+#    For Release target:    #
+#                           #
+#############################
+
+IF (CMAKE_BUILD_TYPE MATCHES "Release")
+  #
+  # General optimization flags: (very basic for now)
+  #
+  ADD_FLAGS(DEAL_II_CXX_FLAGS_RELEASE "/O2")
+ENDIF()
+
+
+###########################
+#                         #
+#    For Debug target:    #
+#                         #
+###########################
+
+IF (CMAKE_BUILD_TYPE MATCHES "Debug")
+  LIST(APPEND DEAL_II_DEFINITIONS_DEBUG "DEBUG")
+  LIST(APPEND DEAL_II_USER_DEFINITIONS_DEBUG "DEBUG")
+
+  # generate some debug info:
+  ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS_DEBUG "/Zi /MDd /Od")
+ENDIF()
+
diff --git a/cmake/setup_cpack.cmake b/cmake/setup_cpack.cmake
new file mode 100644
index 0000000..4da4543
--- /dev/null
+++ b/cmake/setup_cpack.cmake
@@ -0,0 +1,87 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+IF(DEAL_II_COMPONENT_PACKAGE)
+  MESSAGE(STATUS "Setting up CPack")
+  SET(CPACK_GENERATOR "Bundle")
+
+  CONFIGURE_FILE(
+    ${CMAKE_SOURCE_DIR}/cmake/cpack-mac-bundle/mac_startup_script.sh.in
+    ${CMAKE_BINARY_DIR}/cpack/mac_startup_script.sh
+    @ONLY
+    )
+
+  CONFIGURE_FILE(
+    ${CMAKE_SOURCE_DIR}/cmake/cpack-mac-bundle/dealii-terminal.in
+    ${CMAKE_BINARY_DIR}/cpack/dealii-terminal
+    @ONLY
+    )
+
+  CONFIGURE_FILE(
+    ${CMAKE_SOURCE_DIR}/cmake/cpack-mac-bundle/dealii.conf.in
+    ${CMAKE_BINARY_DIR}/cpack/dealii.conf
+    @ONLY
+    )
+
+  CONFIGURE_FILE(
+    ${CMAKE_SOURCE_DIR}/cmake/cpack-mac-bundle/Info.plist.in
+    ${CMAKE_BINARY_DIR}/cpack/Info.plist
+    @ONLY
+    )
+
+  SET(CPACK_PACKAGE_ICON
+    "${CMAKE_SOURCE_DIR}/cmake/cpack-mac-bundle/dealii-icon.icns"
+    )
+
+  set(CPACK_PACKAGE_FILE_NAME
+    "${DEAL_II_PACKAGE_NAME}-${DEAL_II_PACKAGE_VERSION}"
+    )
+
+  set(CPACK_BUNDLE_NAME
+    "${DEAL_II_PACKAGE_NAME}"
+    )
+
+  SET(CPACK_BUNDLE_ICON
+    "${CMAKE_SOURCE_DIR}/cmake/cpack-mac-bundle/dealii-icon.icns"
+    )
+
+  SET(CPACK_BUNDLE_PLIST
+    "${CMAKE_BINARY_DIR}/cpack/Info.plist"
+    )
+
+  SET(CPACK_BUNDLE_STARTUP_COMMAND
+    "${CMAKE_BINARY_DIR}/cpack/mac_startup_script.sh"
+    )
+
+  INSTALL(FILES
+    ${CMAKE_BINARY_DIR}/cpack/dealii.conf
+    DESTINATION ${DEAL_II_SHARE_RELDIR}
+    )
+
+  INSTALL(PROGRAMS
+    ${CMAKE_BINARY_DIR}/cpack/dealii-terminal
+    DESTINATION ${DEAL_II_EXECUTABLE_RELDIR}
+    )
+
+  IF(NOT "${DEAL_II_CPACK_EXTERNAL_LIBS_TREE}" STREQUAL "")
+     INSTALL(DIRECTORY ${DEAL_II_CPACK_EXTERNAL_LIBS_TREE}/
+       DESTINATION opt
+       USE_SOURCE_PERMISSIONS
+       )
+  ENDIF()
+
+  INCLUDE(CPack)
+  MESSAGE(STATUS "Setting up CPack - Done")
+ENDIF()
diff --git a/cmake/setup_custom_targets.cmake b/cmake/setup_custom_targets.cmake
new file mode 100644
index 0000000..38daa74
--- /dev/null
+++ b/cmake/setup_custom_targets.cmake
@@ -0,0 +1,136 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Add convenience targets that build and install only a specific component:
+#
+#   library
+#   compat_files
+#   documentation
+#   examples
+#   parameter_gui
+#
+
+
+IF("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
+  #
+  # In case that CMAKE_INSTALL_PREFIX wasn't set, we assume that the user
+  # doesn't actually want to install but just use deal.II in the build
+  # directory. In this case, do not add the "install" phase to the
+  # convenience targets.
+  #
+  MACRO(_add_custom_target _name)
+    ADD_CUSTOM_TARGET(${_name})
+  ENDMACRO()
+
+  # Print precise informations about the convenience targets:
+  SET(_description_string "build")
+ELSE()
+  MACRO(_add_custom_target _name)
+    ADD_CUSTOM_TARGET(${_name}
+      COMMAND ${CMAKE_COMMAND}
+        -DCOMPONENT="${_name}" -P cmake_install.cmake
+      COMMENT "Build and install component \"library\"."
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+      )
+  ENDMACRO()
+
+  # Print precise informations about the convenience targets:
+  SET(_description_string "build and install")
+ENDIF()
+
+# The library can always be compiled and/or installed unconditionally ;-)
+_add_custom_target(library)
+
+FOREACH(_component compat_files documentation examples parameter_gui)
+  STRING(TOUPPER "${_component}" _component_uppercase)
+  IF(DEAL_II_COMPONENT_${_component_uppercase})
+    _add_custom_target(${_component})
+  ELSE()
+    STRING(TOUPPER ${_component} _componentuppercase)
+    ADD_CUSTOM_TARGET(${_component}
+      COMMAND
+           ${CMAKE_COMMAND} -E echo ''
+        && ${CMAKE_COMMAND} -E echo ''
+        && ${CMAKE_COMMAND} -E echo '***************************************************************************'
+        && ${CMAKE_COMMAND} -E echo "**  Error: Could not ${_description_string} disabled component \"${_component}\"."
+        && ${CMAKE_COMMAND} -E echo "**  Please reconfigure with -DDEAL_II_COMPONENT_${_componentuppercase}=ON"
+        && ${CMAKE_COMMAND} -E echo '***************************************************************************'
+        && ${CMAKE_COMMAND} -E echo ''
+        && ${CMAKE_COMMAND} -E echo ''
+        && false
+      )
+  ENDIF()
+ENDFOREACH()
+
+IF(NOT DEAL_II_COMPONENT_PACKAGE)
+  ADD_CUSTOM_TARGET(package
+    COMMAND
+         ${CMAKE_COMMAND} -E echo ''
+      && ${CMAKE_COMMAND} -E echo ''
+      && ${CMAKE_COMMAND} -E echo '***************************************************************************'
+      && ${CMAKE_COMMAND} -E echo "**  Error: Could not generate binary package. The component is disabled."
+      && ${CMAKE_COMMAND} -E echo "**  Please reconfigure with -DDEAL_II_COMPONENT_PACKAGE=ON"
+      && ${CMAKE_COMMAND} -E echo '***************************************************************************'
+      && ${CMAKE_COMMAND} -E echo ''
+      && ${CMAKE_COMMAND} -E echo ''
+      && false
+    )
+ENDIF()
+
+#
+# Provide an "info" target to print a help message:
+#
+IF(CMAKE_GENERATOR MATCHES "Ninja")
+  SET(_make_command "ninja")
+ELSE()
+  SET(_make_command "make")
+ENDIF()
+
+FILE(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_info.cmake
+"MESSAGE(
+\"###
+#
+#  The following targets are available (invoke by $ ${_make_command} <target>):
+#
+#    all            - compile the library and all enabled components
+#    clean          - remove all generated files
+#    install        - install into CMAKE_INSTALL_PREFIX
+#
+#    info           - print this help message
+#    help           - print a list of valid top level targets
+#
+#    edit_cache     - run ccmake for changing (cached) configuration variables
+#                     and reruns the configure and generate phases of CMake
+#    rebuild_cache  - rerun the configure and generate phases of CMake
+#
+#    compat_files   - ${_description_string} component 'compat_files'
+#    documentation  - ${_description_string} component 'documentation'
+#    examples       - ${_description_string} component 'examples'
+#    library        - ${_description_string} component 'library'
+#    parameter_gui  - ${_description_string} component 'parameter_gui'
+#    package        - build binary package
+#
+#    test           - run a minimal set of tests
+#
+#    setup_tests    - set up testsuite subprojects
+#    prune_tests    - remove all testsuite subprojects
+#
+###\")"
+  )
+
+ADD_CUSTOM_TARGET(info
+  COMMAND ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/print_info.cmake
+  )
diff --git a/cmake/setup_deal_ii.cmake b/cmake/setup_deal_ii.cmake
new file mode 100644
index 0000000..014c9e8
--- /dev/null
+++ b/cmake/setup_deal_ii.cmake
@@ -0,0 +1,164 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Set up deal.II specific definitions
+#
+# This file defines a long list of uncached variables, used throughout the
+# configuration to determine paths, locations and names. Some linkage and
+# crosscompilation setup happens also in here.
+#
+# Definitions marked with *) can be overridden by defining them to cache
+# prior to the call of this file. This is done with the help of the
+# SET_IF_EMPTY macro.
+#
+# General information about deal.II:
+#
+#     DEAL_II_PACKAGE_NAME            *)
+#     DEAL_II_PACKAGE_VERSION         *)
+#     DEAL_II_PACKAGE_VENDOR          *)
+#     DEAL_II_PACKAGE_DESCRIPTION     *)
+#     DEAL_II_VERSION_MAJOR
+#     DEAL_II_VERSION_MINOR
+#     DEAL_II_VERSION_SUBMINOR
+#     DEAL_II_VERSION
+#
+# Information about paths, install locations and names:
+#
+#     DEAL_II_PROJECT_CONFIG_NAME     *)
+#     DEAL_II_BASE_NAME               *)
+#     DEAL_II_DEBUG_SUFFIX            *)
+#     DEAL_II_RELEASE_SUFFIX          *)
+#
+#     DEAL_II_EXECUTABLE_RELDIR       *)
+#     DEAL_II_INCLUDE_RELDIR          *)
+#     DEAL_II_LIBRARY_RELDIR          *)
+#     DEAL_II_PROJECT_CONFIG_RELDIR   *)
+#     DEAL_II_SHARE_RELDIR            *)
+#     DEAL_II_DOCREADME_RELDIR        *)
+#     DEAL_II_DOCHTML_RELDIR          *)
+#     DEAL_II_EXAMPLES_RELDIR         *)
+#
+#     DEAL_II_BUILD_TYPES
+#     DEAL_II_LIST_SUFFIXES
+#     DEAL_II_STRING_SUFFIXES
+#
+# *)  Can be overwritten by the command line via -D<...>
+#
+
+########################################################################
+#                                                                      #
+#                  General information about deal.II:                  #
+#                                                                      #
+########################################################################
+
+SET_IF_EMPTY(DEAL_II_PACKAGE_NAME "deal.II")
+
+SET_IF_EMPTY(DEAL_II_PACKAGE_VENDOR
+  "The deal.II Authors <http://www.dealii.org/>"
+  )
+SET_IF_EMPTY(DEAL_II_PACKAGE_DESCRIPTION
+  "Library for solving partial differential equations with the finite element method"
+  )
+
+FILE(STRINGS "${CMAKE_SOURCE_DIR}/VERSION" _version LIMIT_COUNT 1)
+SET_IF_EMPTY(DEAL_II_PACKAGE_VERSION "${_version}")
+
+#
+# We expect a version number of the form "X.Y.Z" or "X.Y.Z-bla", where X, Y, Z
+# are always numbers and bla is a short string ("pre", "rc0", "rc1", etc.).
+#
+STRING(REGEX REPLACE "^([0-9]+)\\..*" "\\1"
+  DEAL_II_VERSION_MAJOR "${DEAL_II_PACKAGE_VERSION}"
+  )
+STRING(REGEX REPLACE "^[0-9]+\\.([0-9]+).*" "\\1"
+  DEAL_II_VERSION_MINOR "${DEAL_II_PACKAGE_VERSION}"
+  )
+STRING(REGEX REPLACE "^[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1"
+  DEAL_II_VERSION_SUBMINOR "${DEAL_II_PACKAGE_VERSION}"
+  )
+
+SET(DEAL_II_VERSION ${DEAL_II_VERSION_MAJOR}.${DEAL_II_VERSION_MINOR}.${DEAL_II_VERSION_SUBMINOR})
+
+
+########################################################################
+#                                                                      #
+#         Information about paths, install locations and names:        #
+#                                                                      #
+########################################################################
+
+SET(DEAL_II_PROJECT_CONFIG_NAME "${DEAL_II_PACKAGE_NAME}")
+
+STRING(REPLACE "." "_" _base_name "${DEAL_II_PACKAGE_NAME}")
+SET_IF_EMPTY(DEAL_II_BASE_NAME "${_base_name}")
+SET_IF_EMPTY(DEAL_II_DEBUG_SUFFIX ".g")
+SET_IF_EMPTY(DEAL_II_RELEASE_SUFFIX "")
+
+#
+# Try to obey the FSHS as close as possible ...
+#
+SET_IF_EMPTY(DEAL_II_EXECUTABLE_RELDIR "bin")
+SET_IF_EMPTY(DEAL_II_INCLUDE_RELDIR "include")
+SET_IF_EMPTY(DEAL_II_LIBRARY_RELDIR "lib${LIB_SUFFIX}")
+SET_IF_EMPTY(DEAL_II_PROJECT_CONFIG_RELDIR "${DEAL_II_LIBRARY_RELDIR}/cmake/${DEAL_II_PROJECT_CONFIG_NAME}")
+SET_IF_EMPTY(DEAL_II_SHARE_RELDIR "share/${DEAL_II_PACKAGE_NAME}")
+#
+# ... but install the documentation into prominent places:
+#
+SET_IF_EMPTY(DEAL_II_DOCREADME_RELDIR "./")
+SET_IF_EMPTY(DEAL_II_DOCHTML_RELDIR "doc")
+SET_IF_EMPTY(DEAL_II_EXAMPLES_RELDIR "examples")
+
+IF(CMAKE_BUILD_TYPE MATCHES "Debug")
+  LIST(APPEND DEAL_II_BUILD_TYPES "DEBUG")
+ENDIF()
+
+IF(CMAKE_BUILD_TYPE MATCHES "Release")
+  LIST(APPEND DEAL_II_BUILD_TYPES "RELEASE")
+ENDIF()
+
+SET(DEAL_II_LIST_SUFFIXES
+  DEFINITIONS DEFINITIONS_RELEASE DEFINITIONS_DEBUG
+  USER_DEFINITIONS USER_DEFINITIONS_RELEASE USER_DEFINITIONS_DEBUG
+  INCLUDE_DIRS USER_INCLUDE_DIRS BUNDLED_INCLUDE_DIRS
+  LIBRARIES LIBRARIES_RELEASE LIBRARIES_DEBUG
+  )
+
+SET(DEAL_II_STRING_SUFFIXES
+  CXX_FLAGS CXX_FLAGS_RELEASE CXX_FLAGS_DEBUG
+  LINKER_FLAGS LINKER_FLAGS_RELEASE LINKER_FLAGS_DEBUG
+  )
+
+
+########################################################################
+#                                                                      #
+#              Setup static linkage and crosscompilation:              #
+#                                                                      #
+########################################################################
+
+IF(DEAL_II_PREFER_STATIC_LIBS)
+  #
+  # Invert the search order for libraries when DEAL_II_PREFER_STATIC_LIBS
+  # is set. This will prefer static archives instead of shared libraries:
+  #
+  LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
+ENDIF()
+
+IF(CMAKE_CROSSCOMPILING)
+  #
+  # Disable platform introspection when cross compiling
+  #
+  SET(DEAL_II_ALLOW_PLATFORM_INTROSPECTION OFF CACHE BOOL "" FORCE)
+ENDIF()
diff --git a/cmake/setup_external_macros.cmake b/cmake/setup_external_macros.cmake
new file mode 100644
index 0000000..e625f77
--- /dev/null
+++ b/cmake/setup_external_macros.cmake
@@ -0,0 +1,27 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Include all external (cmake) macros that we will use:
+#
+
+INCLUDE(CheckCXXCompilerFlag)
+INCLUDE(CheckCXXSourceCompiles)
+INCLUDE(CheckCXXSourceRuns)
+INCLUDE(CheckCXXSymbolExists)
+INCLUDE(CheckIncludeFileCXX)
+
+INCLUDE(CheckCSourceCompiles)
+INCLUDE(CheckFunctionExists)
diff --git a/cmake/setup_finalize.cmake b/cmake/setup_finalize.cmake
new file mode 100644
index 0000000..6dd72d4
--- /dev/null
+++ b/cmake/setup_finalize.cmake
@@ -0,0 +1,85 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+########################################################################
+#                                                                      #
+#                      Finalize the configuration:                     #
+#                                                                      #
+########################################################################
+
+#
+# Hide some cmake specific cached variables. This is annoying...
+#
+MARK_AS_ADVANCED(file_cmd)
+
+#
+# Append the saved initial (cached) variables ${flags}_SAVED at the end of
+# ${flags}, see setup_cached_compiler_flags.cmake and the main
+# CMakeLists.txt for details.
+#
+FOREACH(_flags ${DEAL_II_USED_FLAGS})
+  # Strip leading and trailing whitespace:
+  STRING(STRIP "${${_flags}} ${${_flags}_SAVED}" ${_flags})
+ENDFOREACH()
+
+#
+# Sanity check: The variables defined in DEAL_II_REMOVED_FLAGS must not be
+# used during the configuration stage:
+#
+FOREACH(_flag ${DEAL_II_REMOVED_FLAGS})
+  IF(NOT "${${_flag}}" STREQUAL "")
+    MESSAGE(FATAL_ERROR
+      "\nInternal configuration error: The variable ${_flag} was set to a "
+      "non empty value during the configuration! (The corresponding "
+      "DEAL_II_* variable should have been used.)\n"
+      "${_flag}=\"${${_flag}}\"\n"
+      )
+  ENDIF()
+ENDFOREACH()
+
+#
+# Save base configuration into variables BASE_* for later use in
+# setup_write_config.cmake:
+#
+FOREACH(_suffix ${DEAL_II_STRING_SUFFIXES} ${DEAL_II_LIST_SUFFIXES})
+  SET(BASE_${_suffix} ${DEAL_II_${_suffix}})
+ENDFOREACH()
+
+#
+# Register features:
+#
+FOREACH(_feature ${DEAL_II_FEATURES})
+  FILTER_SYSTEM_LIBRARIES(${_feature}) # TODO, remove here
+  REGISTER_FEATURE(${_feature})
+ENDFOREACH()
+
+#
+# Deduplicate entries one more time :-]
+#
+FOREACH(_suffix ${DEAL_II_LIST_SUFFIXES})
+  IF(_suffix MATCHES "INCLUDE_DIRS$")
+    REMOVE_DUPLICATES(DEAL_II_${_suffix})
+  ELSE()
+    REMOVE_DUPLICATES(DEAL_II_${_suffix} REVERSE)
+  ENDIF()
+ENDFOREACH()
+
+#
+# Clean up deal.IITargets.cmake in the build directory:
+#
+FILE(REMOVE
+  ${CMAKE_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_RELDIR}/${DEAL_II_PROJECT_CONFIG_NAME}Targets.cmake
+  )
diff --git a/cmake/setup_write_config.cmake b/cmake/setup_write_config.cmake
new file mode 100644
index 0000000..c458b72
--- /dev/null
+++ b/cmake/setup_write_config.cmake
@@ -0,0 +1,262 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2014 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+########################################################################
+#                                                                      #
+#                Query for git repository information:                 #
+#                                                                      #
+########################################################################
+
+DEAL_II_QUERY_GIT_INFORMATION()
+
+FILE(WRITE ${CMAKE_BINARY_DIR}/revision.log
+"###
+#
+#  Git information:
+#        Branch:   ${DEAL_II_GIT_BRANCH}
+#        Revision: ${DEAL_II_GIT_REVISION}
+#
+###"
+  )
+
+
+########################################################################
+#                                                                      #
+#              Write a nice configuration summary to file:             #
+#                                                                      #
+########################################################################
+
+SET(_log_detailed "${CMAKE_BINARY_DIR}/detailed.log")
+SET(_log_summary  "${CMAKE_BINARY_DIR}/summary.log")
+FILE(REMOVE ${_log_detailed} ${_log_summary})
+
+MACRO(_both)
+  # Write to both log files:
+  FILE(APPEND ${_log_detailed} "${ARGN}")
+  FILE(APPEND ${_log_summary} "${ARGN}")
+ENDMACRO()
+MACRO(_detailed)
+  # Only write to detailed.log:
+  FILE(APPEND ${_log_detailed} "${ARGN}")
+ENDMACRO()
+MACRO(_summary)
+  # Only write to summary.log:
+  FILE(APPEND ${_log_summary} "${ARGN}")
+ENDMACRO()
+
+_both(
+"###
+#
+#  ${DEAL_II_PACKAGE_NAME} configuration:
+#        CMAKE_BUILD_TYPE:       ${CMAKE_BUILD_TYPE}
+#        BUILD_SHARED_LIBS:      ${BUILD_SHARED_LIBS}
+#        CMAKE_INSTALL_PREFIX:   ${CMAKE_INSTALL_PREFIX}
+#        CMAKE_SOURCE_DIR:       ${CMAKE_SOURCE_DIR}
+"
+  )
+IF("${DEAL_II_GIT_SHORTREV}" STREQUAL "")
+  _both("#                                (version ${DEAL_II_PACKAGE_VERSION})\n")
+ELSE()
+  _both("#                                (version ${DEAL_II_PACKAGE_VERSION}, shortrev ${DEAL_II_GIT_SHORTREV})\n")
+ENDIF()
+_both(
+"#        CMAKE_BINARY_DIR:       ${CMAKE_BINARY_DIR}
+#        CMAKE_CXX_COMPILER:     ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} on platform ${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}
+#                                ${CMAKE_CXX_COMPILER}
+"
+  )
+
+IF(CMAKE_C_COMPILER_WORKS)
+  _detailed("#        CMAKE_C_COMPILER:       ${CMAKE_C_COMPILER}\n")
+ENDIF()
+IF(CMAKE_Fortran_COMPILER_WORKS)
+  _detailed("#        CMAKE_Fortran_COMPILER: ${CMAKE_Fortran_COMPILER}\n")
+ENDIF()
+_detailed("#        CMAKE_GENERATOR:        ${CMAKE_GENERATOR}\n")
+
+IF(CMAKE_CROSSCOMPILING)
+  _both(
+    "#\n#        CROSSCOMPILING!\n"
+    )
+ENDIF()
+
+IF(DEAL_II_STATIC_EXECUTABLE)
+  _both(
+    "#\n#        STATIC LINKAGE!\n"
+    )
+ENDIF()
+
+_both("#\n")
+
+_detailed(
+"#  Base configuration (prior to feature configuration):
+#        DEAL_II_CXX_FLAGS:            ${BASE_CXX_FLAGS}
+"
+  )
+IF(CMAKE_BUILD_TYPE MATCHES "Release")
+  _detailed("#        DEAL_II_CXX_FLAGS_RELEASE:    ${BASE_CXX_FLAGS_RELEASE}\n")
+ENDIF()
+IF(CMAKE_BUILD_TYPE MATCHES "Debug")
+  _detailed("#        DEAL_II_CXX_FLAGS_DEBUG:      ${BASE_CXX_FLAGS_DEBUG}\n")
+ENDIF()
+
+_detailed("#        DEAL_II_LINKER_FLAGS:         ${BASE_LINKER_FLAGS}\n")
+IF(CMAKE_BUILD_TYPE MATCHES "Release")
+  _detailed("#        DEAL_II_LINKER_FLAGS_RELEASE: ${BASE_LINKER_FLAGS_RELEASE}\n")
+ENDIF()
+IF(CMAKE_BUILD_TYPE MATCHES "Debug")
+  _detailed("#        DEAL_II_LINKER_FLAGS_DEBUG:   ${BASE_LINKER_FLAGS_DEBUG}\n")
+ENDIF()
+
+_detailed("#        DEAL_II_DEFINITIONS:          ${BASE_DEFINITIONS}\n")
+IF(CMAKE_BUILD_TYPE MATCHES "Release")
+  _detailed("#        DEAL_II_DEFINITIONS_RELEASE:  ${BASE_DEFINITIONS_RELEASE}\n")
+ENDIF()
+IF(CMAKE_BUILD_TYPE MATCHES "Debug")
+  _detailed("#        DEAL_II_DEFINITIONS_DEBUG:    ${BASE_DEFINITIONS_DEBUG}\n")
+ENDIF()
+
+_detailed("#        DEAL_II_USER_DEFINITIONS:     ${BASE_DEFINITIONS}\n")
+IF(CMAKE_BUILD_TYPE MATCHES "Release")
+  _detailed("#        DEAL_II_USER_DEFINITIONS_REL: ${BASE_DEFINITIONS_RELEASE}\n")
+ENDIF()
+IF(CMAKE_BUILD_TYPE MATCHES "Debug")
+  _detailed("#        DEAL_II_USER_DEFINITIONS_DEB: ${BASE_DEFINITIONS_DEBUG}\n")
+ENDIF()
+
+_detailed("#        DEAL_II_INCLUDE_DIRS          ${BASE_INCLUDE_DIRS}\n")
+_detailed("#        DEAL_II_USER_INCLUDE_DIRS:    ${BASE_USER_INCLUDE_DIRS}\n")
+_detailed("#        DEAL_II_BUNDLED_INCLUDE_DIRS: ${BASE_BUNDLED_INCLUDE_DIRS}\n")
+
+_detailed("#        DEAL_II_LIBRARIES:            ${BASE_LIBRARIES}\n")
+IF(CMAKE_BUILD_TYPE MATCHES "Release")
+  _detailed("#        DEAL_II_LIBRARIES_RELEASE:    ${BASE_LIBRARIES_RELEASE}\n")
+ENDIF()
+IF(CMAKE_BUILD_TYPE MATCHES "Debug")
+  _detailed("#        DEAL_II_LIBRARIES_DEBUG:      ${BASE_LIBRARIES_DEBUG}\n")
+ENDIF()
+
+_detailed("#\n")
+
+IF(NOT DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS)
+  _both("#  WARNING: DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS is set to OFF\n")
+ENDIF()
+_both("#  Configured Features (")
+IF(DEFINED DEAL_II_ALLOW_BUNDLED)
+  _both("DEAL_II_ALLOW_BUNDLED = ${DEAL_II_ALLOW_BUNDLED}, ")
+ENDIF()
+IF(DEAL_II_FORCE_AUTODETECTION)
+  _both("!!! DEAL_II_FORCE_AUTODETECTION=ON !!!, ")
+ENDIF()
+_both("DEAL_II_ALLOW_AUTODETECTION = ${DEAL_II_ALLOW_AUTODETECTION}):\n")
+
+
+GET_CMAKE_PROPERTY(_variables VARIABLES)
+FOREACH(_var ${_variables})
+  IF(_var MATCHES "DEAL_II_WITH")
+    LIST(APPEND _features "${_var}")
+  ELSEIF(_var MATCHES "DEAL_II_COMPONENT")
+    LIST(APPEND _components "${_var}")
+  ENDIF()
+ENDFOREACH()
+
+FOREACH(_var ${_features})
+  IF(${${_var}})
+
+    #
+    # The feature is enabled:
+    #
+    STRING(REGEX REPLACE "^DEAL_II_WITH_" "" _feature ${_var})
+    IF(FEATURE_${_feature}_EXTERNAL_CONFIGURED)
+      _both("#        ${_var} set up with external dependencies\n")
+    ELSEIF(FEATURE_${_feature}_BUNDLED_CONFIGURED)
+      IF(DEAL_II_FORCE_BUNDLED_${_feature})
+        _both("#        ${_var} set up with bundled packages (forced)\n")
+      ELSE()
+        _both("#        ${_var} set up with bundled packages\n")
+      ENDIF()
+    ELSE()
+     _both("#        ${_var} = ${${_var}}\n")
+    ENDIF()
+
+    #
+    # Print out version number:
+    #
+    IF(DEFINED ${_feature}_VERSION)
+      _detailed("#            ${_feature}_VERSION = ${${_feature}_VERSION}\n")
+    ENDIF()
+
+    #
+    # Special version numbers:
+    #
+    IF(_feature MATCHES "THREADS" AND DEFINED TBB_VERSION)
+      _detailed("#            TBB_VERSION = ${TBB_VERSION}\n")
+    ENDIF()
+    IF(_feature MATCHES "MPI" AND DEFINED OMPI_VERSION)
+      _detailed("#            OMPI_VERSION = ${OMPI_VERSION}\n")
+    ENDIF()
+
+    #
+    # Print out ${_feature}_DIR:
+    #
+    IF(NOT "${${_feature}_DIR}" STREQUAL "")
+      _detailed("#            ${_feature}_DIR = ${${_feature}_DIR}\n")
+    ENDIF()
+
+    #
+    # Print the feature configuration:
+    #
+    FOREACH(_var2
+      C_COMPILER CXX_COMPILER Fortran_COMPILER
+      ${DEAL_II_STRING_SUFFIXES} ${DEAL_II_LIST_SUFFIXES}
+      )
+      IF(DEFINED ${_feature}_${_var2})
+        _detailed("#            ${_feature}_${_var2} = ${${_feature}_${_var2}}\n")
+      ENDIF()
+    ENDFOREACH()
+  ELSE()
+    # FEATURE is disabled
+    _both("#      ( ${_var} = ${${_var}} )\n")
+  ENDIF()
+ENDFOREACH()
+
+_both(
+  "#\n#  Component configuration:\n"
+  )
+FOREACH(_var ${_components})
+  IF(_var MATCHES "DEAL_II_COMPONENT")
+    IF(${${_var}})
+      _both("#        ${_var}\n")
+      STRING(REPLACE "DEAL_II_COMPONENT_" "" _component ${_var})
+      LIST(APPEND _components ${_component})
+    ELSE()
+      _both("#      ( ${_var} = ${${_var}} )\n")
+    ENDIF()
+  ENDIF()
+ENDFOREACH()
+
+_summary(
+"#\n#  Detailed information (compiler flags, feature configuration) can be found in detailed.log
+#\n#  Run  $ "
+  )
+IF(CMAKE_GENERATOR MATCHES "Ninja")
+  _summary("ninja ")
+ELSE()
+_summary("make ")
+ENDIF()
+_summary("info  to print a help message with a list of top level targets\n")
+
+_both("#\n###")
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
new file mode 100644
index 0000000..1a5aae3
--- /dev/null
+++ b/contrib/CMakeLists.txt
@@ -0,0 +1,21 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+IF(DEAL_II_COMPONENT_PARAMETER_GUI)
+  MESSAGE(STATUS "Setting up parameter_gui")
+  ADD_SUBDIRECTORY(parameter_gui)
+  MESSAGE(STATUS "Setting up parameter_gui - Done")
+ENDIF()
+
diff --git a/contrib/README.md b/contrib/README.md
new file mode 100644
index 0000000..2298b2e
--- /dev/null
+++ b/contrib/README.md
@@ -0,0 +1,21 @@
+This folder contains contributions to the deal.II library
+=========================================================
+
+**PLEASE NOTE THAT THESE PROJECTS MAY be COPYRIGHTED BY OTHERS THAN THE
+deal.II AUTHORS, but are included by permission. For details, consult the
+stated licenses below.**
+
+
+parameter_gui
+-------------
+
+Is a program written by Martin Steigemann and Wolfgang Bangerth and
+licensed under the GNU Lesser General Public License (LGPL) Version 2.1 or
+later. See `parameter_gui/README`, `parameter_gui/lgpl-2.1.txt`.
+
+
+utilities
+---------
+
+A collection of small programs for various purposes. Distributed under the
+same license as the deal.II library.
diff --git a/contrib/header-templates/header-template b/contrib/header-templates/header-template
new file mode 100644
index 0000000..34d9127
--- /dev/null
+++ b/contrib/header-templates/header-template
@@ -0,0 +1,15 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) @YEAR@ by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
diff --git a/contrib/header-templates/header-template.cmake b/contrib/header-templates/header-template.cmake
new file mode 100644
index 0000000..fb3e3aa
--- /dev/null
+++ b/contrib/header-templates/header-template.cmake
@@ -0,0 +1,14 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
diff --git a/contrib/parameter_gui/CMakeLists.txt b/contrib/parameter_gui/CMakeLists.txt
new file mode 100644
index 0000000..4b20ceb
--- /dev/null
+++ b/contrib/parameter_gui/CMakeLists.txt
@@ -0,0 +1,69 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by Martin Steigemann and Wolfgang Bangerth
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+#PROJECT(parameter_gui)
+
+IF("${DEAL_II_EXECUTABLE_RELDIR}" STREQUAL "")
+  SET(DEAL_II_EXECUTABLE_RELDIR "bin")
+ENDIF()
+
+FIND_PACKAGE(Qt4 REQUIRED QtCore QtGui QtXml)
+MARK_AS_ADVANCED(QT_QMAKE_EXECUTABLE)
+
+INCLUDE(${QT_USE_FILE})
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+QT4_WRAP_CPP(SOURCE_MOC
+  browse_lineedit.h
+  info_message.h
+  mainwindow.h
+  parameter_delegate.h
+  )
+
+QT4_ADD_RESOURCES(SOURCE_RCC
+  application.qrc
+  )
+
+ADD_EXECUTABLE(parameter_gui_exe
+  browse_lineedit.cpp
+  info_message.cpp
+  main.cpp
+  mainwindow.cpp
+  parameter_delegate.cpp
+  xml_parameter_reader.cpp
+  xml_parameter_writer.cpp
+  ${SOURCE_MOC}
+  ${SOURCE_RCC}
+  )
+SET_TARGET_PROPERTIES(parameter_gui_exe
+  PROPERTIES
+  OUTPUT_NAME parameter_gui
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${DEAL_II_EXECUTABLE_RELDIR}"
+  )
+TARGET_LINK_LIBRARIES(parameter_gui_exe ${QT_LIBRARIES})
+
+ADD_DEPENDENCIES(parameter_gui parameter_gui_exe)
+
+INSTALL(TARGETS parameter_gui_exe
+  RUNTIME DESTINATION ${DEAL_II_EXECUTABLE_RELDIR}
+  COMPONENT parameter_gui
+  )
+EXPORT(TARGETS parameter_gui_exe
+  FILE
+  ${CMAKE_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_RELDIR}/${DEAL_II_PROJECT_CONFIG_NAME}Targets.cmake
+  APPEND
+  )
diff --git a/contrib/parameter_gui/README b/contrib/parameter_gui/README
new file mode 100644
index 0000000..963da45
--- /dev/null
+++ b/contrib/parameter_gui/README
@@ -0,0 +1,5 @@
+This subdirectory contains the parameter_gui project.
+
+It is copyrighted by Martin Steigemann and Wolfgang Bangerth and
+distributed under the same license as the deal.II library, i.e.
+The GNU Lesser General Public License (LGPL) version 2.1 or later.
diff --git a/contrib/parameter_gui/application.qrc b/contrib/parameter_gui/application.qrc
new file mode 100644
index 0000000..ef2d9e8
--- /dev/null
+++ b/contrib/parameter_gui/application.qrc
@@ -0,0 +1,7 @@
+<!DOCTYPE RCC><RCC version="1.0">
+ <qresource>
+     <file>images/logo_dealii_64.png</file>
+     <file>images/logo_dealii_gui.png</file>
+     <file>images/logo_dealii_gui_128.png</file>
+ </qresource>
+ </RCC>
diff --git a/contrib/parameter_gui/browse_lineedit.cpp b/contrib/parameter_gui/browse_lineedit.cpp
new file mode 100644
index 0000000..d2890b5
--- /dev/null
+++ b/contrib/parameter_gui/browse_lineedit.cpp
@@ -0,0 +1,126 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <QtGui>
+
+#include "browse_lineedit.h"
+
+
+namespace dealii
+{
+  namespace ParameterGui
+  {
+    BrowseLineEdit::BrowseLineEdit(const BrowseType  type, QWidget *parent)
+                  : QFrame(parent, 0),
+                    browse_type(type)
+    {
+      line_editor = new QLineEdit;
+      connect(line_editor, SIGNAL(editingFinished()), this, SLOT(editing_finished()));
+
+      browse_button = new QPushButton("&Browse...");
+      connect(browse_button, SIGNAL(clicked()), this, SLOT(browse()));
+
+      setFocusPolicy (Qt::StrongFocus);
+
+      QHBoxLayout *layout = new QHBoxLayout;
+
+      layout->addWidget(line_editor);
+      layout->addWidget(browse_button);
+      setLayout(layout);
+
+      setAutoFillBackground(true);
+      setBackgroundRole(QPalette::Highlight);
+    }
+
+
+
+
+    QSize BrowseLineEdit::sizeHint() const
+    {
+      QSize  size_line_editor   = line_editor->sizeHint(),
+             size_browse_button = browse_button->sizeHint();
+
+      int w = size_line_editor.rwidth() + size_browse_button.rwidth(),
+          h = qMax(size_line_editor.rheight(), size_browse_button.rheight());
+
+      return QSize (w, h);
+    }
+
+
+
+    QSize BrowseLineEdit::minimumSizeHint() const
+    {
+      QSize  size_line_editor   = line_editor->minimumSizeHint(),
+             size_browse_button = browse_button->minimumSizeHint();
+
+      int w = size_line_editor.rwidth() + size_browse_button.rwidth(),
+          h = qMax(size_line_editor.rheight(), size_browse_button.rheight());
+
+      return QSize (w, h);
+    }
+
+
+
+    QString BrowseLineEdit::text() const
+    {
+      return line_editor->text();
+    }
+
+
+
+    void BrowseLineEdit::setText(const QString &str)
+    {
+      line_editor->setText(str);
+    }
+
+
+
+    void BrowseLineEdit::editing_finished()
+    {
+      emit editingFinished();
+    }
+
+
+
+    void BrowseLineEdit::browse()
+    {
+      QString  name = "";
+
+      switch (browse_type)
+        {
+          case file:
+            {
+              name = QFileDialog::getOpenFileName(this, tr("Open File"),
+                                                  QDir::currentPath(),
+                                                  tr("All Files (*.*)"));
+              break;
+            };
+
+          case directory:
+            {
+              name = QFileDialog::getExistingDirectory(this, tr("Open Directory"),
+                                                       QDir::homePath(),
+                                                       QFileDialog::ShowDirsOnly | QFileDialog::DontResolveSymlinks);
+              break;
+            };
+        };
+
+      if (!name.isEmpty() && !name.isNull())
+        line_editor->setText(name);
+    }
+  }
+}
+
diff --git a/contrib/parameter_gui/browse_lineedit.h b/contrib/parameter_gui/browse_lineedit.h
new file mode 100644
index 0000000..1c14ec6
--- /dev/null
+++ b/contrib/parameter_gui/browse_lineedit.h
@@ -0,0 +1,121 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef BROWSELINEEDIT_H
+#define BROWSELINEEDIT_H
+
+#include <QFrame>
+#include <QLineEdit>
+#include <QFileDialog>
+#include <QPushButton>
+
+
+namespace dealii
+{
+/*! @addtogroup ParameterGui
+ *@{
+ */
+  namespace ParameterGui
+  {
+/**
+ * The BrowseLineEdit class provides a special line editor for the parameterGUI.
+ * While editing file- or directory names it is much more easier to have a file-dialog
+ * and just click on existing files or directories. This editor provides a simple QLineEditor
+ * and a browse-button which opens a file- or a directory dialog. Clicking on existing files or directories
+ * copies the path to the line editor. Depending on the <tt>BrowseType</tt> given in the constructor
+ * the browse button opens a <tt>file</tt> or a <tt>directory</tt> dialog.
+ *
+ * @note This class is used in the graphical user interface for the @ref ParameterHandler class.
+ *       It is not compiled into the deal.II libraries and can not be used by applications using deal.II.
+ *
+ * @ingroup ParameterGui
+ * @author Martin Steigemann, Wolfgang Bangerth, 2010
+ */
+    class BrowseLineEdit : public QFrame
+    {
+      Q_OBJECT
+
+      public:
+				     /**
+				      * The browse button opens a <tt>file</tt> or
+				      * a <tt>directory</tt> dialog. This can be specified
+				      * in the constructor by setting this flag <tt>BrowseType</tt>.
+				      */
+        enum BrowseType {file = 0, directory = 1};
+				     /**
+				      * Constructor. The type of the browse dialog can be specified
+				      * by the flag <tt>BrowseType</tt>, the default is <tt>file</tt>.
+				      */
+        BrowseLineEdit (const BrowseType  type = file,
+                        QWidget          *parent = 0);
+
+				     /**
+				      * Reimplemented from the QWidget class.
+				      * Returns the size of the editor.
+				      */
+        QSize  sizeHint() const;
+				     /**
+				      * Reimplemented from the QWidget class.
+				      */
+        QSize  minimumSizeHint() const;
+				     /**
+				      * Returns the text of the line editor.
+				      */
+        QString  text() const;
+				     /**
+				      * This pattern stores the type of the browse dialog.
+				      */
+        BrowseType  browse_type;
+
+      public slots:
+				     /**
+				      * A <tt>slot</tt> to set @p str as text of the line editor.
+				      */
+        void setText(const QString &str);
+
+      signals:
+				     /**
+				      * This <tt>signal</tt> will be emitted, if editing is finished.
+				      */
+        void editingFinished();
+
+      private slots:
+				     /**
+				      * This <tt>slot</tt> should be always called, if editing is finished.
+				      */
+        void editing_finished();
+				     /**
+				      * This function opens a file- or a directory dialog as specified in the
+				      * constructor.
+				      */
+        void browse();
+
+      private:
+				     /**
+				      * The line editor.
+				      */
+        QLineEdit * line_editor;
+				     /**
+				      * The browse button.
+				      */
+        QPushButton * browse_button;
+    };
+  }
+/**@}*/
+}
+
+
+#endif
diff --git a/contrib/parameter_gui/dealii_parameter_gui.pro b/contrib/parameter_gui/dealii_parameter_gui.pro
new file mode 100644
index 0000000..834545d
--- /dev/null
+++ b/contrib/parameter_gui/dealii_parameter_gui.pro
@@ -0,0 +1,25 @@
+######################################################################
+# Automatically generated by qmake (2.01a) So. Dez 12 15:30:12 2010
+######################################################################
+
+TEMPLATE = app
+TARGET = 
+DEPENDPATH += .
+INCLUDEPATH += .
+DESTDIR = ../../lib/bin
+
+# Input
+HEADERS += browse_lineedit.h \
+           info_message.h \
+           mainwindow.h \
+           parameter_delegate.h \
+           xml_parameter_reader.h \
+           xml_parameter_writer.h
+SOURCES += browse_lineedit.cpp \
+           info_message.cpp \
+           main.cpp \
+           mainwindow.cpp \
+           parameter_delegate.cpp \
+           xml_parameter_reader.cpp \
+           xml_parameter_writer.cpp
+RESOURCES += application.qrc
diff --git a/contrib/parameter_gui/images/logo_dealii.png b/contrib/parameter_gui/images/logo_dealii.png
new file mode 100644
index 0000000..3b7c7d6
Binary files /dev/null and b/contrib/parameter_gui/images/logo_dealii.png differ
diff --git a/contrib/parameter_gui/images/logo_dealii_64.png b/contrib/parameter_gui/images/logo_dealii_64.png
new file mode 100644
index 0000000..0ecf03f
Binary files /dev/null and b/contrib/parameter_gui/images/logo_dealii_64.png differ
diff --git a/contrib/parameter_gui/images/logo_dealii_gui.png b/contrib/parameter_gui/images/logo_dealii_gui.png
new file mode 100644
index 0000000..546b453
Binary files /dev/null and b/contrib/parameter_gui/images/logo_dealii_gui.png differ
diff --git a/contrib/parameter_gui/images/logo_dealii_gui_128.png b/contrib/parameter_gui/images/logo_dealii_gui_128.png
new file mode 100644
index 0000000..98e3cf7
Binary files /dev/null and b/contrib/parameter_gui/images/logo_dealii_gui_128.png differ
diff --git a/contrib/parameter_gui/info_message.cpp b/contrib/parameter_gui/info_message.cpp
new file mode 100644
index 0000000..b5207d3
--- /dev/null
+++ b/contrib/parameter_gui/info_message.cpp
@@ -0,0 +1,104 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <QtGui>
+
+#include "info_message.h"
+
+
+namespace dealii
+{
+  namespace ParameterGui
+  {
+    InfoMessage::InfoMessage(QWidget *parent)
+               : QDialog(parent, 0)
+    {
+      show_again = true;						// this variable stores, if the
+									// the info message should be shown again
+      QGridLayout * grid = new QGridLayout(this);
+
+      icon = new QLabel(this);						// set an icon
+#ifndef QT_NO_MESSAGEBOX
+      icon->setPixmap(QMessageBox::standardIcon(QMessageBox::Information));
+      icon->setAlignment(Qt::AlignHCenter | Qt::AlignTop);
+#endif
+      grid->addWidget(icon, 0, 0, Qt::AlignTop);			// add the icon in the upper left corner
+
+      message = new QTextEdit(this);					// set the new message
+      message->setReadOnly(true);
+      grid->addWidget(message, 0, 1);					// and add the message on the right
+
+      again = new QCheckBox(this);					// add a check box
+      again->setChecked(true);
+      again->setText(QErrorMessage::tr("&Show this message again"));
+      grid->addWidget(again, 1, 1, Qt::AlignTop);
+
+      ok = new QPushButton(this);					// and finally a OK button
+      ok->setText(QErrorMessage::tr("&OK"));
+#ifdef QT_SOFTKEYS_ENABLED
+      ok_action = new QAction(ok);					// define the action for the button
+      ok_action->setSoftKeyRole(QAction::PositiveSoftKey);
+      ok_action->setText(ok->text());
+      connect(ok_action, SIGNAL(triggered()), this, SLOT(accept()));
+      addAction(ok_action);
+#endif
+      connect(ok, SIGNAL(clicked()), this, SLOT(accept()));
+      ok->setFocus();							// aand set the focus on the button
+      grid->addWidget(ok, 2, 0, 1, 2, Qt::AlignCenter);
+
+      grid->setColumnStretch(1, 42);
+      grid->setRowStretch(0, 42);
+									// load settings from an ini-file
+      QString  settings_file = QDir::currentPath() + "/settings.ini";
+
+      settings = new QSettings (settings_file, QSettings::IniFormat);
+
+      settings->beginGroup("infoMessage");				// we store settings of this class in the
+      show_again = settings->value("showInformation", true).toBool();	//group infoMessage
+      settings->endGroup();
+    }
+
+
+
+    void InfoMessage::setInfoMessage(const QString &message)
+    {
+      this->message->setText(message);					// set the message
+    }
+
+
+
+    void InfoMessage::showMessage()
+    {
+      if (show_again)							// and show the message
+        show();
+    }
+
+
+
+    void InfoMessage::done(int r)
+    {
+      if(!again->isChecked())						// if the box is not checked,
+        {								// store this to settings
+          settings->beginGroup("infoMessage");
+          settings->setValue("showInformation", false);
+          settings->endGroup();
+        };
+
+      QDialog::done(r);
+    }
+  }
+}
+
diff --git a/contrib/parameter_gui/info_message.h b/contrib/parameter_gui/info_message.h
new file mode 100644
index 0000000..0dd1024
--- /dev/null
+++ b/contrib/parameter_gui/info_message.h
@@ -0,0 +1,113 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef INFOMESSAGE_H
+#define INFOMESSAGE_H
+
+#include <QDialog>
+#include <QSettings>
+#include <QCheckBox>
+#include <QTextEdit>
+#include <QLabel>
+
+
+namespace dealii
+{
+/*! @addtogroup ParameterGui
+ *@{
+ */
+  namespace ParameterGui
+  {
+/**
+ * The InfoMessage class implements a special info message for the parameterGUI.
+ * Besides showing a info message itself, the dialog shows a checkbox "Show this message again".
+ * If the user unchecks this box, this is stored in the "settings.ini" file and will be reloaded
+ * the next time the user opens the parameterGUI. The intention of such a info message is the following.
+ * The user should have some information on how using the GUI "at hand"
+ * such as "how to edit parameter values" for example. But after reading this message, the user knows
+ * it and the message should not appear permanently.
+ *
+ * @note This class is used in the graphical user interface for the @ref ParameterHandler class.
+ *       It is not compiled into the deal.II libraries and can not be used by applications using deal.II.
+ *
+ * @ingroup ParameterGui
+ * @author Martin Steigemann, Wolfgang Bangerth, 2010
+ */
+    class InfoMessage : public QDialog
+    {
+      Q_OBJECT
+
+      public:
+				     /**
+				      * Constructor
+				      */
+        InfoMessage (QWidget *parent = 0);
+				     /**
+				      * With this function the @p message which will be shown in the
+				      * dialog can be set.
+				      */
+        void setInfoMessage(const QString &message);
+
+      public slots:
+				     /**
+				      * Show the dialog with the <tt>message</tt>.
+				      */
+        void showMessage();
+
+      protected:
+				     /**
+				      * Reimplemented from QDialog.
+				      */
+        void done(int r);
+
+      private:
+				     /**
+				      * This variable stores, if the <tt>message</tt> should be shown again the next time.
+				      */
+        bool show_again;
+				     /**
+				      * The <tt>Ok</tt> button.
+				      */
+        QPushButton * ok;
+				     /**
+				      * The checkbox<tt>Show this message again</tt>.
+				      */
+        QCheckBox * again;
+				     /**
+				      * The <tt>message</tt> editor.
+				      */
+        QTextEdit * message;
+				     /**
+				      * An <tt>icon</tt> for the dialog.
+				      */
+        QLabel * icon;
+#ifdef QT_SOFTKEYS_ENABLED
+				     /**
+				      * A action for pressing the <tt>Ok</tt> button.
+				      */
+        QAction * ok_action;
+#endif
+				     /**
+				      * An object for storing <tt>settings</tt> in a file.
+				      */
+        QSettings * settings;
+    };
+  }
+/**@}*/
+}
+
+
+#endif
diff --git a/contrib/parameter_gui/lgpl-2.1.txt b/contrib/parameter_gui/lgpl-2.1.txt
new file mode 100644
index 0000000..4362b49
--- /dev/null
+++ b/contrib/parameter_gui/lgpl-2.1.txt
@@ -0,0 +1,502 @@
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+

+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+

+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+

+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+

+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+

+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+

+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+

+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+

+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+

+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/contrib/parameter_gui/main.cpp b/contrib/parameter_gui/main.cpp
new file mode 100644
index 0000000..6229d7e
--- /dev/null
+++ b/contrib/parameter_gui/main.cpp
@@ -0,0 +1,71 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <QApplication>
+#include <QSplashScreen>
+#include <QTimer>
+
+#include "mainwindow.h"
+
+/*! @addtogroup ParameterGui
+ *@{
+ */
+
+/**
+ * Main function for the parameterGUI.
+ * The parameterGUI is a graphical user interface for editing parameter files based on the XML format,
+ * created by the ParameterHandler::print_parameters() function with ParameterHandler::XML as second argument.
+ *
+ * @image html logo_dealii_gui.png
+ *
+ * @note This class is used in the graphical user interface for the @ref ParameterHandler class.
+ *       It is not compiled into the deal.II libraries and can not be used by applications using deal.II.
+ *
+ *
+ * <p>This program uses Qt version > 4.3. Qt is licensed under the GNU General Public License
+ * version 3.0. Please see http://qt.nokia.com/products/licensing for an overview of Qt licensing.
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). Qt is a Nokia product.
+ * See http://qt.nokia.com/ for more information.</p>
+ *
+ *
+ * @ingroup ParameterGui
+ * @author Martin Steigemann, Wolfgang Bangerth, 2010
+ */
+int main(int argc, char *argv[])
+{
+  Q_INIT_RESOURCE(application);						// init resources such as icons or graphics
+
+  QApplication app(argc, argv);
+
+  QSplashScreen * splash = new QSplashScreen;				// setup a splash screen
+  splash->setPixmap(QPixmap(":/images/logo_dealii_gui.png"));
+  splash->show();
+
+  QTimer::singleShot(3000, splash, SLOT(close()));			// and close it after 3000 ms
+
+  app.setApplicationName("parameterGUI for deal.II");			// setup the application name
+
+  dealii::ParameterGui::MainWindow * main_win =
+    new dealii::ParameterGui::MainWindow (argv[1]);			// give command line arguments to main_win
+									// if a parameter file is specified at the
+									// command line, give it to the MainWindow.
+
+  QTimer::singleShot(1500, main_win, SLOT(show()));			// show the main window with a short delay
+									// so we can see the splash screen
+  return app.exec();
+}
+/**@}*/
+
diff --git a/contrib/parameter_gui/mainwindow.cpp b/contrib/parameter_gui/mainwindow.cpp
new file mode 100644
index 0000000..533fa8b
--- /dev/null
+++ b/contrib/parameter_gui/mainwindow.cpp
@@ -0,0 +1,385 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <QtGui>
+
+#include "mainwindow.h"
+#include "parameter_delegate.h"
+#include "xml_parameter_reader.h"
+#include "xml_parameter_writer.h"
+
+
+namespace dealii
+{
+  namespace ParameterGui
+  {
+    MainWindow::MainWindow(const QString  &filename)
+    {
+      QString  settings_file = QDir::currentPath() + "/settings.ini";		// a file for user settings
+
+      gui_settings = new QSettings (settings_file, QSettings::IniFormat);	// load settings
+										// Up to now, we do not read any settings,
+										// but this can be used in the future for customizing the GUI.
+
+      tree_widget = new QTreeWidget;						// tree for showing XML tags
+
+										// Setup the tree and the window first:
+      tree_widget->header()->setResizeMode(QHeaderView::ResizeToContents);	// behavior of the header sections:
+										// "Interactive: User can resize sections"
+										// "Fixed: User cannot resize sections"
+										// "Stretch: Qt will automatically resize sections to fill available space"
+										// "ResizeToContents: Qt will automatically resize sections to optimal size"
+      tree_widget->setHeaderLabels(QStringList() << tr("(Sub)Sections/Parameters")
+                                                 << tr("Value"));
+      tree_widget->setMouseTracking(true);					// enables mouse events e.g. showing ToolTips
+										// and documentation in the StatusLine
+      tree_widget->setEditTriggers(QAbstractItemView::DoubleClicked|
+                                   QAbstractItemView::SelectedClicked|
+                                   QAbstractItemView::EditKeyPressed);
+										// set which actions will initiate item editing: Editing starts when:
+										// DoubleClicked: an item is double clicked
+										// SelectedClicked: clicking on an already selected item
+										// EditKeyPressed: the platform edit key has been pressed over an item
+										// AnyKeyPressed: any key is pressed over an item
+
+      tree_widget->setItemDelegate(new ParameterDelegate(1));			// set the delegate for editing items
+      setCentralWidget(tree_widget);
+										// connect: if the tree changes, the window will know
+      connect(tree_widget, SIGNAL(itemChanged(QTreeWidgetItem*, int)), this, SLOT(tree_was_modified()));
+
+      create_actions();								// create window actions as "Open",...
+      create_menus();								// and menus
+      statusBar()->showMessage(tr("Ready, start editing by double-clicking or hitting F2!"));
+      setWindowTitle(tr("[*]parameterGUI"));					// set window title
+
+      resize(800, 600);								// set window height and width
+
+      if (filename.size() > 3)							// if there is a file_name, try to load the file.
+        load_file(filename);							// a vliad file has the xml extension, so we require size() > 3
+    }
+
+
+
+    void MainWindow::open()
+    {
+      if (maybe_save())								// check, if the content was modified
+        {
+          QString  file_name =							// open a file dialog
+                     QFileDialog::getOpenFileName(this, tr("Open XML Parameter File"),
+                                                  QDir::currentPath(),
+                                                  tr("XML Files (*.xml)"));
+          if (!file_name.isEmpty())						// if a file was selected,
+            load_file(file_name);						// load the content
+        };
+    }
+
+
+
+    bool MainWindow::save()
+    {
+      if (current_file.isEmpty())						// if there is no file
+        return save_as();							// to save changes, open a dialog
+      else
+        return save_file(current_file);						// otherwise save
+    }
+
+
+
+    bool MainWindow::save_as()
+    {
+      QString  file_name =							// open a file dialog
+                 QFileDialog::getSaveFileName(this, tr("Save XML Parameter File"),
+                                              QDir::currentPath(),
+                                              tr("XML Files (*.xml)"));
+
+      if (file_name.isEmpty())							// if no file was selected
+        return false;								// return false
+      else
+        return save_file(file_name);						// otherwise save content to file
+    }
+
+
+
+    void MainWindow::about()
+    {
+#ifdef Q_WS_MAC
+      static QPointer<QMessageBox> old_msg_box;
+
+      if (old_msg_box)
+        {
+          old_msg_box->show();
+          old_msg_box->raise();
+          old_msg_box->activateWindow();
+          return;
+        };
+#endif
+
+      QString title = "About parameterGUI";
+
+      QString trAboutparameterGUIcaption;
+      trAboutparameterGUIcaption = QMessageBox::tr(
+        "<h3>parameterGUI: A GraphicalUserInterface for parameter handling in deal.II</h3>"
+        "<p>This program uses Qt version %1.</p>"
+        ).arg(QLatin1String(QT_VERSION_STR));
+
+      QString trAboutparameterGUItext;
+      trAboutparameterGUItext = QMessageBox::tr(
+        "<p>The parameterGUI is a graphical user interface for editing XML parameter files "
+        "created by the ParameterHandler class of deal.II. Please see "
+        "<a href=\"http://www.dealii.org/7.0.0/doxygen/deal.II/classParameterHandler.html\">dealii.org/doc</a> for more information. "
+        "The parameterGUI parses XML files into a tree structure and provides "
+        " special editors for different types of parameters.</p>"
+
+        "<p><b>Editing parameter values:</b><br>"
+        "Parameters can be edited by (double-)clicking on the value or "
+        "by pressing the platform edit key (F2 on Linux) over an parameter item.</p>"
+
+        "<p><b>Editors for parameter values:</b>"
+        " <ul>"
+        "  <li>Integer- and Double-type parameters: SpinBox</li>"
+        "  <li>Booleans: ComboBox</li>"
+        "  <li>Selection: ComboBox</li>"
+        "  <li>File- and DirectoryName parameters: BrowseLineEditor</li>"
+        "  <li>Anything|MultipleSelection|List: LineEditor</li>"
+        " </ul>"
+        "</p>"
+
+        "<p>Please see <a href=\"http://www.dealii.org\">dealii.org</a> for more information</p>"
+        "<p><b>Authors:</b><br> "
+        "Martin Steigemann,  <a href=\"mailto:martin.steigemann at mathematik.uni-kassel.de\">martin.steigemann at mathematik.uni-kassel.de</a><br>"
+        "Wolfgang Bangerth,  <a href=\"mailto:bangerth at math.tamu.edu\">bangerth at math.tamu.edu</a></p>"
+        );
+
+      QMessageBox *msg_box = new QMessageBox;
+      msg_box->setAttribute(Qt::WA_DeleteOnClose);
+      msg_box->setWindowTitle(title);
+      msg_box->setText(trAboutparameterGUIcaption);
+      msg_box->setInformativeText(trAboutparameterGUItext);
+
+      QPixmap pm(QLatin1String(":/images/logo_dealii_gui_128.png"));
+
+      if (!pm.isNull())
+        msg_box->setIconPixmap(pm);
+
+#ifdef Q_WS_MAC
+      old_msg_box = msg_box;
+      msg_box->show();
+#else
+      msg_box->exec();
+#endif
+    }
+
+
+
+    void MainWindow::tree_was_modified()
+    {
+      setWindowModified(true);							// store, that the window was modified
+										// this is a function from the QMainWindow class
+										// and we use the windowModified mechanism to show a "*"
+										// in the window title, if content was modified
+    }
+
+
+
+    void MainWindow::show_message ()
+    {
+      QString title = "parameterGUI";
+
+      info_message = new InfoMessage(this);
+
+      info_message->setWindowTitle(title);
+      info_message->setInfoMessage(tr("Start Editing by double-clicking on the parameter value or"
+                                      " by hitting the platform edit key. For example, on Linux this is the F2-key!"));
+      info_message->showMessage();
+    }
+
+
+
+    void MainWindow::closeEvent(QCloseEvent *event)
+    {
+      if (maybe_save())								// reimplement the closeEvent from the QMainWindow class
+        event->accept();							// check, if we have to save modified content,
+      else									// if content was saved, accept the event,
+        event->ignore();							// otherwise ignore it
+    }
+
+
+
+    void MainWindow::create_actions()
+    {
+      QStyle * style = tree_widget->style();
+
+      open_act = new QAction(tr("&Open..."), this);				// create actions
+      open_act->setIcon(style->standardPixmap(QStyle::SP_DialogOpenButton));    // and set icons
+      open_act->setShortcut(Qt::CTRL + Qt::Key_O);				// set a short cut
+      open_act->setStatusTip(tr("Open a XML file"));				// set a status tip
+      connect(open_act, SIGNAL(triggered()), this, SLOT(open()));		// and connect
+
+      save_act = new QAction(tr("&Save ..."), this);
+      save_act->setIcon(style->standardPixmap(QStyle::SP_DialogSaveButton));
+      save_act->setShortcut(Qt::CTRL + Qt::Key_S);
+      save_act->setStatusTip(tr("Save the current XML file"));
+      connect(save_act, SIGNAL(triggered()), this, SLOT(save()));
+
+      save_as_act = new QAction(tr("&Save As..."), this);
+      save_as_act->setIcon(style->standardPixmap(QStyle::SP_DialogSaveButton));
+      save_as_act->setShortcut(Qt::CTRL + Qt::SHIFT + Qt::Key_Q);
+      save_as_act->setStatusTip(tr("Save the current XML file as"));
+      connect(save_as_act, SIGNAL(triggered()), this, SLOT(save_as()));
+
+      exit_act = new QAction(tr("E&xit"), this);
+      exit_act->setIcon(style->standardPixmap(QStyle::SP_DialogCloseButton));
+      exit_act->setShortcut(Qt::CTRL + Qt::Key_Q);
+      exit_act->setStatusTip(tr("Exit the parameterGUI application"));
+      connect(exit_act, SIGNAL(triggered()), this, SLOT(close()));
+
+      about_act = new QAction(tr("&About"), this);
+      about_act->setIcon(style->standardPixmap(QStyle::SP_FileDialogInfoView));
+      about_act->setStatusTip(tr("Show the parameterGUI About box"));
+      connect(about_act, SIGNAL(triggered()), this, SLOT(about()));
+
+      about_qt_act = new QAction(tr("About &Qt"), this);
+      about_qt_act->setStatusTip(tr("Show the Qt library's About box"));
+      connect(about_qt_act, SIGNAL(triggered()), qApp, SLOT(aboutQt()));
+    }
+
+
+
+    void MainWindow::create_menus()
+    {
+        file_menu = menuBar()->addMenu(tr("&File"));				// create a file menu
+        file_menu->addAction(open_act);						// and add actions
+        file_menu->addAction(save_act);
+        file_menu->addAction(save_as_act);
+        file_menu->addAction(exit_act);
+
+        menuBar()->addSeparator();
+
+        help_menu = menuBar()->addMenu(tr("&Help"));				// create a help menu
+        help_menu->addAction(about_act);
+        help_menu->addAction(about_qt_act);
+    }
+
+
+
+    bool MainWindow::maybe_save()
+    {
+      if (isWindowModified())							// if content was modified
+        {
+          QMessageBox::StandardButton ret;					// ask, if content should be saved
+          ret = QMessageBox::warning(this, tr("parameterGUI"),
+                                     tr("The content has been modified.\n"
+                                        "Do you want to save your changes?"),
+                  QMessageBox::Save | QMessageBox::Discard |QMessageBox::Cancel);
+
+          if (ret == QMessageBox::Save)
+            return save();
+          else if (ret == QMessageBox::Cancel)
+            return false;
+        };
+
+      return true;
+    }
+
+
+
+    bool MainWindow::save_file(const QString &filename)
+    {
+      QFile  file(filename);
+
+      if (!file.open(QFile::WriteOnly | QFile::Text))				// open a file dialog
+        {
+          QMessageBox::warning(this, tr("parameterGUI"),
+                                     tr("Cannot write file %1:\n%2.")
+                                     .arg(filename)
+                                     .arg(file.errorString()));
+          return false;
+        };
+
+      XMLParameterWriter  xml_writer(tree_widget);				// create a xml_writer
+
+      if (!xml_writer.write_xml_file(&file))					// and read the xml file
+        return false;
+
+      statusBar()->showMessage(tr("File saved"), 2000);				// if we succeed, show a message
+      set_current_file(filename);						// and reset the window
+
+      return true;
+    }
+
+
+
+    void MainWindow::load_file(const QString &filename)
+    {
+      QFile  file(filename);
+
+      if (!file.open(QFile::ReadOnly | QFile::Text))				// open the file
+        {
+          QMessageBox::warning(this, tr("parameterGUI"),
+                                     tr("Cannot read file %1:\n%2.")
+                                     .arg(filename)
+                                     .arg(file.errorString()));
+          return;
+        };
+
+      tree_widget->clear();							// clear the tree
+
+      XMLParameterReader  xml_reader(tree_widget);				// and read the xml file
+
+      if (!xml_reader.read_xml_file(&file))
+        {
+          QMessageBox::warning(this, tr("parameterGUI"),
+                                     tr("Parse error in file %1:\n\n%2")
+                                     .arg(filename)
+                                     .arg(xml_reader.error_string()));
+        }
+      else
+        {
+          statusBar()->showMessage(tr("File loaded - Start editing by double-clicking or hitting F2"), 25000);
+          set_current_file(filename);						// show a message and set current file
+
+          show_message ();							// show some informations how values can be edited
+        };
+    }
+
+
+
+    void MainWindow::set_current_file(const QString  &filename)
+    {
+										// We use the windowModified mechanism from the
+										// QMainWindow class to indicate in the window title,
+										// if the content was modified.
+										// If there is "[*]" in the window title, a * will
+										// added automatically at this position, if the
+										// window was modified.
+										// We set the window title to
+										// file_name[*] - XMLParameterHandler
+
+      current_file = filename;							// set the (global) current file to file_name
+
+      std::string win_title = (filename.toStdString());				// and create the window title,
+
+      if (current_file.isEmpty())						// if file_name is empty
+        win_title = "[*]parameterGUI";						// set the title to our application name,
+      else
+        win_title += "[*] - parameterGUI";					// if there is a file_name, add the
+										// the file_name and a minus to the title
+
+      setWindowTitle(tr(win_title.c_str()));					// set the window title
+      setWindowModified(false);							// and reset window modified
+    }
+  }
+}
diff --git a/contrib/parameter_gui/mainwindow.h b/contrib/parameter_gui/mainwindow.h
new file mode 100644
index 0000000..c9c49e5
--- /dev/null
+++ b/contrib/parameter_gui/mainwindow.h
@@ -0,0 +1,183 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef MAINWINDOW_H
+#define MAINWINDOW_H
+
+#include <QMainWindow>
+#include <QTreeWidget>
+#include <QDialog>
+#include <QSettings>
+
+#include "info_message.h"
+
+
+namespace dealii
+{
+/*! @addtogroup ParameterGui
+ *@{
+ */
+  namespace ParameterGui
+  {
+/**
+ * The MainWindow class of the the parameterGUI.
+ * The parameterGUI is a graphical user interface for editing parameter files based on the XML format,
+ * created by the @ref ParameterHandler::print_parameters() function with @ref ParameterHandler::XML as second argument.
+ * Please see <tt>Representation of Parameters</tt> in the documentation of the @ref ParameterHandler class for more details.
+ * The MainWindow class provides the basic functionality of the GUI as save- and load-file-actions and documentation.
+ * The parameterGUI provides special editors for the different types of parameters defined in the ParameterHandler class.
+ *
+ * @note This class is used in the graphical user interface for the @ref ParameterHandler class.
+ *       It is not compiled into the deal.II libraries and can not be used by applications using deal.II.
+ *
+ * @ingroup ParameterGui
+ * @author Martin Steigemann, Wolfgang Bangerth, 2010
+ */
+    class MainWindow : public QMainWindow
+    {
+      Q_OBJECT
+
+      public:
+				     /**
+				      * Constructor.
+				      * If a @p filename is given,
+				      * the MainWindow tries to open
+				      * and parse the file.
+				      */
+        MainWindow(const QString  &filename = "");
+
+      protected:
+				     /**
+				      * Reimplemented from QMainWindow.
+				      * We ask, if changes should be saved.
+				      */
+        void closeEvent(QCloseEvent *event);
+
+      private slots:
+
+				     /**
+				      * Open a parameter file.
+				      */
+        void open();
+				     /**
+				      * Save the parameter file.
+				      */
+        bool save();
+				     /**
+				      * Open a file dialog to save the parameter file.
+				      */
+        bool save_as();
+				     /**
+				      * Show some information on the parameterGUI
+				      */
+        void about();
+
+				     /**
+				      * A <tt>slot</tt> that should be always called,
+				      * if parameter values were changed.
+				      */
+        void tree_was_modified();
+
+      private:
+				     /**
+				      * Show an information dialog, how
+				      * parameters can be edited.
+				      */
+        void show_message ();
+				     /**
+				      * This function creates all actions.
+				      */
+        void create_actions();
+				     /**
+				      * This function creates all menus.
+				      */
+        void create_menus();
+				     /**
+				      * This function checks, if parameters were changed
+				      * and show a dialog, if changes should be saved.
+				      * This function should be always called,
+				      * before open a new parameter file or before closing the GUI
+				      */
+        bool maybe_save ();
+				     /**
+				      * Save parameters to @p filename in XML format.
+				      */
+        bool save_file (const QString &filename);
+				     /**
+				      * Load parameters from @p filename in XML format.
+				      */
+        void load_file (const QString &filename);
+				     /**
+				      * This functions writes the current @p filename to the window title.
+				      */
+        void set_current_file (const QString  &filename);
+				     /**
+				      * This is the tree structure in which we store all parameters.
+				      */
+        QTreeWidget * tree_widget;
+				     /**
+				      * This menu provides all file actions as <tt>open</tt>, <tt>save</tt>, <tt>save as</tt>
+				      * and <tt>exit</tt>
+				      */
+        QMenu * file_menu;
+				     /**
+				      * This menu provides some informations <tt>about</tt> the parameterGUI
+				      * and <tt>about Qt</tt>
+				      */
+        QMenu * help_menu;
+				     /**
+				      * QAction <tt>open</tt> a file.
+				      */
+        QAction * open_act;
+				     /**
+				      * QAction <tt>save</tt> a file.
+				      */
+        QAction * save_act;
+				     /**
+				      * QAction <tt>save as</tt> a file.
+				      */
+        QAction * save_as_act;
+				     /**
+				      * QAction <tt>exit</tt> the GUI.
+				      */
+        QAction * exit_act;
+				     /**
+				      * QAction <tt>about</tt> the parameterGUI.
+				      */
+        QAction * about_act;
+				     /**
+				      * QAction <tt>about</tt> Qt.
+				      */
+        QAction * about_qt_act;
+				     /**
+				      * This value stores the current <tt>filename</tt> we work on.
+				      */
+        QString  current_file;
+				     /**
+				      * This dialog shows a short information message after loading a file.
+				      */
+        InfoMessage * info_message;
+				     /**
+				      * An object for storing user settings.
+				      */
+        QSettings * gui_settings;
+    };
+  }
+/**@}*/
+}
+
+
+#endif
diff --git a/contrib/parameter_gui/parameter_delegate.cpp b/contrib/parameter_gui/parameter_delegate.cpp
new file mode 100644
index 0000000..462486a
--- /dev/null
+++ b/contrib/parameter_gui/parameter_delegate.cpp
@@ -0,0 +1,329 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <QtGui>
+
+#include "parameter_delegate.h"
+
+#include <limits>
+
+
+namespace dealii
+{
+  namespace ParameterGui
+  {
+    ParameterDelegate::ParameterDelegate(const int value_column, QObject *parent)
+                     : QItemDelegate(parent)
+    {
+      this->value_column = value_column;
+
+      double_steps    = 0.1;			// any click in the editor will increase or decrease the value about double_steps
+      double_decimals = 14;			// number of decimals shown in the editor
+
+      int_steps = 1;				// step value for increasing or decreasing integers
+    }
+
+
+
+    QSize ParameterDelegate::sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const
+    {
+      if (index.column() == value_column)
+        {
+          return QSize(400,30);		// we increase the height of all lines to show editors
+
+/*
+      QString pattern_description = index.data(Qt::StatusTipRole).toString();	// load pattern description
+										// stored in the StatusLine
+      QRegExp  rx_string("\\b(FileName|DirectoryName)\\b");
+
+      if (rx_string.indexIn (pattern_description) != -1)
+        {
+          return QSize(400,35);					// we increase the height for FileName and
+        }							// DirectoryName to show a "browse" button
+      else
+        return QItemDelegate::sizeHint(option, index);
+*/
+
+        }
+      else
+        return QItemDelegate::sizeHint(option, index);
+    }
+
+
+
+    void ParameterDelegate::paint(QPainter *painter, const QStyleOptionViewItem &option, const QModelIndex &index) const
+    {
+      if (index.column() == value_column)
+        {
+          QString pattern_description = index.data(Qt::StatusTipRole).toString();	// load pattern description
+											// stored in the StatusLine
+          QRegExp  rx_string("\\b(FileName|DirectoryName)\\b");				// if the type is Filename
+											// or DirectoryName
+          if (rx_string.indexIn (pattern_description) != -1)
+            {
+              QString value = index.model()->data(index, Qt::DisplayRole).toString();	// take the value
+
+              QStyleOptionViewItem my_option = option;					// load options
+              my_option.displayAlignment = Qt::AlignLeft | Qt::AlignVCenter;
+
+              drawDisplay(painter, my_option, my_option.rect, value);			// print the text in the display
+              drawFocus(painter, my_option, my_option.rect);				// if the line has the
+											// focus, print a rectangle
+            }
+          else
+            QItemDelegate::paint(painter, option, index);				// for all other types use
+											// the standard delegate
+        }
+      else
+        QItemDelegate::paint(painter, option, index);
+    }
+
+
+
+    QWidget *ParameterDelegate::createEditor(QWidget *parent,
+                                             const QStyleOptionViewItem &option,
+                                             const QModelIndex &index) const
+    {
+      if (index.column() == value_column)
+        {
+          QString pattern_description = index.data(Qt::StatusTipRole).toString();	// load pattern description
+											// stored in the StatusLine
+          QRegExp  rx_string("\\b(Anything|MultipleSelection|List|Map)\\b"),
+                   rx_filename("\\b(FileName)\\b"),	
+                   rx_dirname("\\b(DirectoryName)\\b"),	
+                   rx_integer("\\b(Integer)\\b"),
+                   rx_double("\\b(Double|Float|Floating)\\b"),
+                   rx_selection("\\b(Selection)\\b"),
+                   rx_bool("\\b(Bool)\\b");
+
+          if (rx_string.indexIn (pattern_description) != -1)				// if the type is "Anything"
+            {
+              QLineEdit * line_editor = new QLineEdit(parent);				// choose a LineEditor
+
+              connect(line_editor, SIGNAL(editingFinished()),				// and connect editors signal
+                      this, SLOT(commit_and_close_editor()));				// to the closer function
+
+              return line_editor;
+            }
+          else if (rx_filename.indexIn (pattern_description) != -1)			// if the type is "FileName"
+            {
+              BrowseLineEdit * filename_editor =					// choose a BrowseLineEditor
+                                 new BrowseLineEdit(BrowseLineEdit::file, parent);
+
+              connect(filename_editor, SIGNAL(editingFinished()),
+                      this, SLOT(commit_and_close_editor()));
+
+              return filename_editor;
+            }
+          else if (rx_dirname.indexIn (pattern_description) != -1)			// if the type is "DirectoryName"
+            {
+              BrowseLineEdit * dirname_editor =						// choose a BrowseLineEditor
+                                 new BrowseLineEdit(BrowseLineEdit::directory, parent);
+
+              connect(dirname_editor, SIGNAL(editingFinished()),
+                      this, SLOT(commit_and_close_editor()));
+
+              return dirname_editor;
+            }
+          else if (rx_integer.indexIn (pattern_description) != -1)		// if the tpye is "Integer"
+            {
+              QSpinBox * spin_box = new QSpinBox(parent);			// choose a spin box
+
+              const int min_int_value = std::numeric_limits<int>::min();
+              const int max_int_value = std::numeric_limits<int>::max();
+
+              spin_box->setMaximum(max_int_value);				// set max and min from the limits.h class
+              spin_box->setMinimum(min_int_value);
+              spin_box->setSingleStep(int_steps);				// and every klick is a SingleStep
+
+              connect(spin_box, SIGNAL(editingFinished()),			// connect editors signal to the closer function
+                      this, SLOT(commit_and_close_editor()));
+
+              return spin_box;
+            }
+          else if (rx_double.indexIn (pattern_description) != -1)		// the same with "Double"
+            {
+              QDoubleSpinBox * double_spin_box = new QDoubleSpinBox(parent);	// choose a spin box
+
+              const double min_double_value = -std::numeric_limits<double>::max();
+              const double max_double_value = std::numeric_limits<double>::max();
+
+              double_spin_box->setMaximum(max_double_value);		// set max and min from the limits.h class
+              double_spin_box->setMinimum(min_double_value);
+              double_spin_box->setDecimals(double_decimals);		// show "double_decimals" decimals
+              double_spin_box->setSingleStep(double_steps);		// and every klick is a SingleStep
+
+              connect(double_spin_box, SIGNAL(editingFinished()),		// connect editors signal to the closer function
+                      this, SLOT(commit_and_close_editor()));
+
+              return double_spin_box;
+            }
+          else if (rx_selection.indexIn (pattern_description) != -1)		// and selections
+            {
+              QComboBox * combo_box = new QComboBox(parent);			// we assume, that pattern_desctiption is of the form
+										// "Type: [Selection item1|item2| ....|item ]    "
+              std::vector<std::string> choices;					// list with the different items
+              std::string  tmp(pattern_description.toStdString());
+
+              if (tmp.find("[") != std::string::npos)				// delete all char before [
+                tmp.erase (0, tmp.find("[")+1);
+
+              if (tmp.find("]") != std::string::npos)				// delete all char after ]
+                tmp.erase (tmp.find("]"),tmp.length());
+
+              if (tmp.find(" ") != std::string::npos)				// delete all char before " "
+                tmp.erase (0, tmp.find(" ")+1);
+
+              while (tmp.find('|') != std::string::npos)			// extract items
+                {
+                  choices.push_back(std::string(tmp, 0, tmp.find('|')));
+                  tmp.erase (0, tmp.find('|')+1);
+                };
+
+              if (tmp.find(" ") != std::string::npos)				// delete " "
+                tmp.erase (tmp.find(" "));
+
+              choices.push_back(tmp);						// add last item
+
+              for (unsigned int i=0; i<choices.size(); ++i)			// add items to the combo box
+                combo_box->addItem (tr(choices[i].c_str()), tr(choices[i].c_str()));
+
+              combo_box->setEditable(false);
+
+              connect(combo_box, SIGNAL(currentIndexChanged(int)),		// connect editors signal to the closer function
+                      this, SLOT(commit_and_close_editor()));
+
+              return combo_box;
+           }
+          else if (rx_bool.indexIn (pattern_description) != -1)			// and booleans
+            {
+              QComboBox * combo_box = new QComboBox(parent);
+
+              std::vector<std::string> choices;					// list with the different items
+              choices.push_back(std::string("true"));				// add true
+              choices.push_back(std::string("false"));				// and false
+
+              for (unsigned int i=0; i<choices.size(); ++i)			// add items to the combo box
+                combo_box->addItem (tr(choices[i].c_str()), tr(choices[i].c_str()));
+
+              combo_box->setEditable(false);
+
+              connect(combo_box, SIGNAL(currentIndexChanged(int)),		// connect editors signal to the closer function
+                      this, SLOT(commit_and_close_editor()));
+
+              return combo_box;
+            }
+          else
+            {
+              return QItemDelegate::createEditor(parent, option, index);
+            };
+        };
+
+      return 0;				// if it is not the column "parameter values", do nothing
+    }
+
+
+
+    void ParameterDelegate::setEditorData(QWidget *editor, const QModelIndex &index) const
+    {
+      if (index.column() == value_column)
+        {
+          QString pattern_description = index.data(Qt::StatusTipRole).toString();	// load pattern description
+											// stored in the StatusLine
+          QRegExp  rx_filename("\\b(FileName)\\b"),
+                   rx_dirname("\\b(DirectoryName)\\b"),
+                   rx_selection("\\b(Selection)\\b");
+
+          if (rx_filename.indexIn (pattern_description) != -1)				// if the type is "FileName"
+            {
+              QString  file_name = index.data(Qt::DisplayRole).toString();
+
+              BrowseLineEdit *filename_editor = qobject_cast<BrowseLineEdit *>(editor);	// set the text of the editor
+              filename_editor->setText(file_name);
+            }
+          else if (rx_dirname.indexIn (pattern_description) != -1)			// if the type is "DirectoryName"
+            {
+              QString  dir_name = index.data(Qt::DisplayRole).toString();
+
+              BrowseLineEdit *dirname_editor = qobject_cast<BrowseLineEdit *>(editor);	// set the text of the editor
+              dirname_editor->setText(dir_name);
+            }
+          else if (rx_selection.indexIn (pattern_description) != -1)			// if we have a combo box,
+            {
+              QRegExp  rx(index.data(Qt::DisplayRole).toString());
+
+              QComboBox * combo_box = qobject_cast<QComboBox *>(editor);
+
+              for (int i=0; i<combo_box->count(); ++i)					// we look, which index
+                if (rx.exactMatch(combo_box->itemText(i)))				// the data has and set
+                  combo_box->setCurrentIndex(i);					// it to the combo_box
+            }
+          else
+            QItemDelegate::setEditorData(editor, index);				// if it is not FileName,
+											// DirectoryName or Selection
+											// use the standard delegate
+        };
+    }
+
+
+
+    void ParameterDelegate::commit_and_close_editor()
+    {
+      QWidget * editor = qobject_cast<QWidget *>(sender());
+      emit commitData(editor);
+      emit closeEditor(editor);
+    }
+
+
+
+    void ParameterDelegate::setModelData(QWidget *editor, QAbstractItemModel *model,
+                                         const QModelIndex &index) const
+    {
+      if (index.column() == value_column)
+        {
+          QString pattern_description = index.data(Qt::StatusTipRole).toString();	// load pattern description
+											// stored in the StatusLine
+
+          QRegExp  rx_filename("\\b(FileName)\\b"),
+                   rx_dirname("\\b(DirectoryName)\\b"),
+                   rx_selection("\\b(Selection)\\b");
+
+          if (rx_filename.indexIn (pattern_description) != -1)				// if the type is "FileName"
+            {
+              BrowseLineEdit * filename_editor = qobject_cast<BrowseLineEdit *>(editor);	// set the text from the editor
+              QString value = filename_editor->text();
+              model->setData(index, value);
+            }
+          else if (rx_dirname.indexIn (pattern_description) != -1)			// if the type is "DirectoryName"
+            {
+              BrowseLineEdit * dirname_editor = qobject_cast<BrowseLineEdit *>(editor);	// set the text from the editor
+              QString value = dirname_editor->text();
+              model->setData(index, value);
+            }
+          else if (rx_selection.indexIn (pattern_description) != -1)			// if the type is "Selection"
+            {
+              QComboBox * combo_box = qobject_cast<QComboBox *>(editor);		// set the text from the editor
+              QString value = combo_box->currentText();
+              model->setData(index, value);
+            }
+          else
+            QItemDelegate::setModelData(editor, model, index);				// if it is not FileName or DirectoryName,
+											// use the standard delegate
+        };
+    }
+  }
+}
+
diff --git a/contrib/parameter_gui/parameter_delegate.h b/contrib/parameter_gui/parameter_delegate.h
new file mode 100644
index 0000000..ddf401b
--- /dev/null
+++ b/contrib/parameter_gui/parameter_delegate.h
@@ -0,0 +1,122 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef PARAMETERDELEGATE_H
+#define PARAMETERDELEGATE_H
+
+#include <QItemDelegate>
+#include <QModelIndex>
+#include <QObject>
+#include <QLineEdit>
+#include <QComboBox>
+#include <QFileDialog>
+
+#include "browse_lineedit.h"
+
+
+namespace dealii
+{
+/*! @addtogroup ParameterGui
+ *@{
+ */
+  namespace ParameterGui
+  {
+/**
+ * The ParameterDelegate class implements special delegates for the QTreeWidget class used in the parameterGUI.
+ * The QTreeWidget class provides some different standard delegates for editing parameters shown in the
+ * tree structure. The ParameterDelegate class provides special editors for the different types of parameters defined in
+ * the ParameterHandler class. For all parameter types based on strings as "Anything", "MultipleSelection" "Map" and
+ * "List" a simple line editor will be shown up. In the case of integer and double type parameters the editor is a spin box and for
+ * "Selection" type parameters a combo box will be shown up. For parameters of type "FileName" and "DirectoryName"
+ * the delegate shows a @ref BrowseLineEdit editor. The column of the tree structure with the parameter values has to be set
+ * in the constructor.
+ *
+ * @note This class is used in the graphical user interface for the @ref ParameterHandler class.
+ *       It is not compiled into the deal.II libraries and can not be used by applications using deal.II.
+ *
+ * @ingroup ParameterGui
+ * @author Martin Steigemann, Wolfgang Bangerth, 2010
+ */
+    class ParameterDelegate : public QItemDelegate
+    {
+      Q_OBJECT
+
+      public:
+				     /**
+				      * Constructor, @p value_column specifies the column
+				      * of the parameter tree this delegate will be used on.
+				      */
+        ParameterDelegate (const int value_column, QObject *parent = 0);
+				     /**
+				      * This function creates the appropriate editor for the parameter
+				      * based on the <tt>index</tt>.
+				      */
+        QWidget * createEditor(QWidget *parent, const QStyleOptionViewItem &option,
+                               const QModelIndex &index) const;
+				     /**
+				      * Reimplemented from QItemDelegate.
+				      */
+        QSize sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const;
+				     /**
+				      * Reimplemented from QItemDelegate.
+				      */
+        void paint(QPainter *painter, const QStyleOptionViewItem &option, const QModelIndex &index) const;
+				     /**
+				      * Reimplemented from QItemDelegate.
+				      */
+        void setEditorData(QWidget *editor, const QModelIndex &index) const;
+				     /**
+				      * Reimplemented from QItemDelegate.
+				      */
+        void setModelData(QWidget *editor, QAbstractItemModel *model,
+                          const QModelIndex &index) const;
+
+      private slots:
+				     /**
+				      * Reimplemented from QItemDelegate.
+				      */
+        void commit_and_close_editor();
+
+      private:
+				     /**
+				      * The column this delegate will be used on.
+				      */
+        int value_column;
+				     /**
+				      * For parameters of type <tt>double</tt> a spin box
+				      * will be shown as editor. Any click on the spin box
+				      * will change the value about <tt>double_steps</tt>.
+				      */
+        double  double_steps;
+				     /**
+				      * For parameters of type <tt>integer</tt> a spin box
+				      * will be shown as editor. Any click on the spin box
+				      * will change the value about <tt>int_steps</tt>.
+				      */
+        unsigned int  int_steps;
+				     /**
+				      * For parameters of type <tt>double</tt> a spin box
+				      * will be shown as editor. The spin box will show
+				      * parameters with a precision of <tt>double_decimals</tt>.
+				      */
+        unsigned int  double_decimals;
+    };
+  }
+/**@}*/
+}
+
+
+#endif
diff --git a/contrib/parameter_gui/parameters.xml b/contrib/parameter_gui/parameters.xml
new file mode 100644
index 0000000..bdf3b3d
--- /dev/null
+++ b/contrib/parameter_gui/parameters.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="utf-8"?>
+<ParameterHandler><BasicControl><Equation><value>Elasticity</value><default_value>Elasticity</default_value><documentation>Basic equation to solve</documentation><pattern>0</pattern><pattern_description>[Anything]</pattern_description></Equation><Domain><value>CtsSpecimen</value><default_value>Specimen</default_value><documentation>Domain</documentation><pattern>1</pattern><pattern_description>[Selection CtSpecimen|CtsSpecimen|SENBeam ]</pattern_description></Domain><AsymptoticOrder><val [...]
\ No newline at end of file
diff --git a/contrib/parameter_gui/xml_parameter_reader.cpp b/contrib/parameter_gui/xml_parameter_reader.cpp
new file mode 100644
index 0000000..a608ea4
--- /dev/null
+++ b/contrib/parameter_gui/xml_parameter_reader.cpp
@@ -0,0 +1,368 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <QtGui>
+
+#include "xml_parameter_reader.h"
+
+
+namespace dealii
+{
+  namespace ParameterGui
+  {
+    XMLParameterReader::XMLParameterReader(QTreeWidget *tree_widget)
+                      : tree_widget(tree_widget)
+    {
+      QStyle * style = tree_widget->style();
+
+      subsection_icon.addPixmap(style->standardPixmap(QStyle::SP_DirClosedIcon), QIcon::Normal, QIcon::Off);
+      subsection_icon.addPixmap(style->standardPixmap(QStyle::SP_DirOpenIcon), QIcon::Normal, QIcon::On);
+
+      parameter_icon.addPixmap(style->standardPixmap(QStyle::SP_FileIcon));
+    }
+
+
+
+    bool XMLParameterReader::read_xml_file(QIODevice *device)
+    {
+      xml.setDevice(device);
+
+		// We look for a StartElement "ParameterHandler"
+		// and start parsing after this.
+		//  <ParameterHandler>
+		//   <subsection>
+		//    ...
+		//   </subsection>
+		//  </ParameterHandler>
+
+      while (xml.readNext() != QXmlStreamReader::Invalid)
+        {
+          if (xml.isStartElement())
+            if (xml.name() == "ParameterHandler")
+              {
+                parse_parameters();
+
+                return !xml.error();;
+              };
+        };
+
+      xml.raiseError(QObject::tr("The file is not an ParameterHandler XML file."));
+
+      return !xml.error();
+    }
+
+
+
+    QString XMLParameterReader::error_string() const
+    {
+      return QObject::tr("%1\nLine %2, column %3")
+             .arg(xml.errorString())
+             .arg(xml.lineNumber())
+             .arg(xml.columnNumber());
+    }
+
+
+
+    void XMLParameterReader::parse_parameters()
+    {
+      Q_ASSERT(xml.isStartElement() && xml.name() == "ParameterHandler");
+
+      while (xml.readNext() != QXmlStreamReader::Invalid)	// go to the next <start_element>
+        {							// if it is the closing element of ParameterHandler,
+          if (xml.isEndElement() && xml.name() == "ParameterHandler")
+            break;						// break the loop
+
+          if (xml.isStartElement())				// if it is a start element
+            read_subsection_element(0);			// it must be a subsection or a parameter
+        };
+    }
+
+
+
+    void XMLParameterReader::read_subsection_element(QTreeWidgetItem *parent)
+    {
+		// The structure of the parameter file is assumed to be of the form
+		//
+		//  <subsection>
+		//    <subsection>
+		//      ...
+		//        <parameter>
+		//          <value> ... </value>
+		//          ...
+		//          <pattern_description> ... </pattern_description>
+		//        </parameter>
+		//        <parameter>
+		//        ...
+		//        </parameter>
+		//        ...
+		//    </subsection>
+		//    <subsection>
+		//      ...
+		//    </subsection>
+		//    ...
+		//  </subsection>
+		//
+		// Any subsection has a user-specified name also any parameter, but we do not know
+		// the userspecified names and we can not assume anything. So, when parsing the file,
+		// we do not know, if the actual <start_element> is a <subsection> or a <parameter>
+		// in a subsection. To decide, if the element is a subsection- or a parameter-name,
+		// we assume, that if the next <start_element> is <value>, we have a <parameter>
+		// and a parameter has the entries <value>, <default_value>, <documentation>,
+		// <pattern> and <pattern_description>
+
+      Q_ASSERT(xml.isStartElement());					// the actual element is <subsection>
+
+      QTreeWidgetItem * subsection = create_child_item(parent);		// create a new subsection in the tree
+
+      subsection->setIcon(0, subsection_icon);				// set the icon,
+      subsection->setText(0, demangle(xml.name().toString()));		// the name
+
+      tree_widget->setItemExpanded(subsection, 0);			// and the folder is not expanded
+
+      while (xml.readNext() != QXmlStreamReader::Invalid)		// read the next element
+        {
+          if (xml.isEndElement())					// if the next element is </subsection>, break the loop
+            break;
+
+          if (xml.isStartElement())					// if it is a start element
+            {
+              if (xml.name() == "value")				// it can be <value>, then we have found a parameter,
+                {
+                  subsection->setFlags(subsection->flags() | Qt::ItemIsEditable);	// values can be edited,
+                  read_parameter_element (subsection);
+                }
+              else							// or it can be a new <subsection>
+                {
+                  subsection->setFlags(subsection->flags() | Qt::NoItemFlags);		// subsections can not be edited,
+                  read_subsection_element (subsection);
+                };
+            };
+        };
+    }
+
+
+
+    void XMLParameterReader::read_parameter_element(QTreeWidgetItem *parent)
+    {
+      Q_ASSERT(xml.isStartElement() && xml.name() == "value");		// the actual element is <value>,
+									// then we have found a parameter-item
+      QString value = xml.readElementText();				// read the element text
+      parent->setText(1, value);					// and store as text to the item
+      parent->setIcon(0, parameter_icon);				// change the icon of parent
+
+      while (xml.readNext() != QXmlStreamReader::Invalid)				// go to the next <start_element>
+        {
+          if (xml.isStartElement())
+            {
+              if (xml.isStartElement() && xml.name() == "default_value")		// if it is <default_value>
+                {
+                  QString default_value = xml.readElementText();			// store it
+                  parent->setText(2, default_value);
+                }
+              else if (xml.isStartElement() && xml.name() == "documentation")		// if it is <documentation>
+                {
+                  QString documentation = xml.readElementText();			// store it
+                  parent->setText(3, documentation);
+
+                  if (!documentation.isEmpty())						// if there is a documentation,
+                    {
+                      parent->setToolTip(0, "Documentation: " + documentation);		// set Documentation as ToolTip for both columns
+                      parent->setToolTip(1, "Documentation: " + documentation);
+                      parent->setStatusTip(0, "Documentation: " + documentation);	// and as StatusTip for the first column also
+                    };
+                }
+              else if (xml.isStartElement() && xml.name() == "pattern")			// if it is <pattern>
+                {
+                  QString pattern = xml.readElementText();				// store it as text
+                  parent->setText(4, pattern);						// we only need this value
+											// for writing back to XML later
+                }
+              else if (xml.isStartElement() &&  xml.name() == "pattern_description")	// if it is <pattern_description>
+                {
+                  QString pattern_description = xml.readElementText();			// store it as text
+                  parent->setText(5, pattern_description);
+											// show the type and default
+											// in the StatusLine
+                  parent->setStatusTip(1, "Type: " + pattern_description + "   Default: " + parent->text(2));
+
+						// in order to store values as correct data types,
+						// we check the following types in the pattern_description:
+
+                  QRegExp  rx_string("\\b(Anything|FileName|DirectoryName|Selection|List|MultipleSelection)\\b"),	
+                           rx_integer("\\b(Integer)\\b"),
+                           rx_double("\\b(Float|Floating|Double)\\b"),
+                           rx_bool("\\b(Selection true|false)\\b");
+
+                  if (rx_string.indexIn (pattern_description) != -1)			// the type "Anything" or "Filename"
+                    {
+                      QString value = parent->text(1);					// store as a QString
+
+                      parent->setData(1, Qt::EditRole, value);				// and set the data in the item
+                      parent->setData(1, Qt::DisplayRole, value);
+                    }
+                  else if (rx_integer.indexIn (pattern_description) != -1)		// if the tpye is "Integer"
+                    {
+                      QString text = parent->text(1);
+
+                      bool ok = true;
+
+                      int value = text.toInt(&ok);					// we convert the string to int
+
+                      if (ok)								// and store
+                        {
+                          parent->setData(1, Qt::EditRole, value);
+                          parent->setData(1, Qt::DisplayRole, value);
+                        }
+                      else								// otherwise raise an error
+                        xml.raiseError(QObject::tr("Cannot convert integer type to integer!"));
+                    }
+                  else if (rx_double.indexIn (pattern_description) != -1)		// the same with "Float"
+                    {
+                      QString text = parent->text(1);
+
+                      bool ok  = true;
+
+                      double value = text.toDouble(&ok);
+
+                      if (ok)
+                        {
+                          parent->setData(1, Qt::EditRole, value);
+                          parent->setData(1, Qt::DisplayRole, value);
+                        }
+                      else
+                        xml.raiseError(QObject::tr("Cannot convert double type to double!"));
+                    };
+
+                  if (rx_bool.indexIn (pattern_description) != -1)				// and booleans
+                    {
+                      QRegExp  test(parent->text(1));
+
+                      bool value = true;
+
+                      if (test.exactMatch("true"))
+                        value = true;
+                      else if (test.exactMatch("false"))
+                        value = false;
+                      else
+                        xml.raiseError(QObject::tr("Cannot convert boolen type to boolean!"));
+
+                      parent->setText(1, "");						// this is needed because we use
+                      parent->setData(1, Qt::EditRole, value);				// for booleans the standard
+                      parent->setData(1, Qt::DisplayRole, value);				// delegate
+                    };
+
+                  break;									// and break the loop
+                }
+              else
+                {									// if there is any other element, raise an error
+                  xml.raiseError(QObject::tr("Incomplete or unknown Parameter!"));
+                  break;								// and break the loop, here
+                };									// we assume the special structure
+            };										// of the parameter-file!
+        };
+    }
+
+
+
+    QTreeWidgetItem *XMLParameterReader::create_child_item(QTreeWidgetItem *item)
+    {
+      QTreeWidgetItem * child_item;							// create a new child-item
+
+      if (item)
+        child_item = new QTreeWidgetItem(item);						// if item is not empty,
+      else										// append the new item as a child
+        child_item = new QTreeWidgetItem(tree_widget);					// otherwise create a new item
+											// in the tree
+
+      child_item->setData(0, Qt::DisplayRole, xml.name().toString());			// set xml.tag_name as data
+      child_item->setText(0, xml.name().toString());					// set xml.tag_name as data
+
+      return child_item;
+    }
+
+
+
+    QString XMLParameterReader::demangle (const QString &s)
+    {
+      std::string  s_temp (s.toStdString()); 		// this function is copied from the ParameterHandler class
+
+      std::string u;
+      u.reserve (s_temp.size());
+
+      for (unsigned int i=0; i<s_temp.size(); ++i)
+        if (s_temp[i] != '_')
+          u.push_back (s_temp[i]);
+        else
+          {
+            Q_ASSERT (i+2 < s_temp.size());
+
+            unsigned char c = 0;
+            switch (s_temp[i+1])
+              {
+                case '0':  c = 0 * 16;  break;
+                case '1':  c = 1 * 16;  break;
+                case '2':  c = 2 * 16;  break;
+                case '3':  c = 3 * 16;  break;
+                case '4':  c = 4 * 16;  break;
+                case '5':  c = 5 * 16;  break;
+                case '6':  c = 6 * 16;  break;
+                case '7':  c = 7 * 16;  break;
+                case '8':  c = 8 * 16;  break;
+                case '9':  c = 9 * 16;  break;
+	    case 'a':  c = 10 * 16;  break;
+	    case 'b':  c = 11 * 16;  break;
+	    case 'c':  c = 12 * 16;  break;
+	    case 'd':  c = 13 * 16;  break;
+	    case 'e':  c = 14 * 16;  break;
+	    case 'f':  c = 15 * 16;  break;
+	    default:
+		  Q_ASSERT (false);
+	  }
+	switch (s_temp[i+2])
+	  {
+	    case '0':  c += 0;  break;
+	    case '1':  c += 1;  break;
+	    case '2':  c += 2;  break;
+	    case '3':  c += 3;  break;
+	    case '4':  c += 4;  break;
+	    case '5':  c += 5;  break;
+	    case '6':  c += 6;  break;
+	    case '7':  c += 7;  break;
+	    case '8':  c += 8;  break;
+	    case '9':  c += 9;  break;
+	    case 'a':  c += 10;  break;
+	    case 'b':  c += 11;  break;
+	    case 'c':  c += 12;  break;
+	    case 'd':  c += 13;  break;
+	    case 'e':  c += 14;  break;
+	    case 'f':  c += 15;  break;
+	    default:
+		  Q_ASSERT (false);
+	  }
+
+    	u.push_back (static_cast<char>(c));
+
+					 // skip the two characters
+	    i += 2;
+          }
+
+      QString  v (u.c_str());
+
+      return v;
+    }
+  }
+}
+
diff --git a/contrib/parameter_gui/xml_parameter_reader.h b/contrib/parameter_gui/xml_parameter_reader.h
new file mode 100644
index 0000000..41e8a9f
--- /dev/null
+++ b/contrib/parameter_gui/xml_parameter_reader.h
@@ -0,0 +1,128 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef XMLPARAMETERREADER_H
+#define XMLPARAMETERREADER_H
+
+#include <QIcon>
+#include <QXmlStreamReader>
+#include <QTreeWidget>
+#include <QTreeWidgetItem>
+
+
+namespace dealii
+{
+/*! @addtogroup ParameterGui
+ *@{
+ */
+  namespace ParameterGui
+  {
+/**
+ * The XMLParameterReader class provides an interface to parse parameters from XML files to a QTreeWidget.
+ * This class makes extensive use of the QXmlStreamReader class, which implements the basic functionalities
+ * for parsing XML files.
+ *
+ * @note This class is used in the graphical user interface for the @ref ParameterHandler class.
+ *       It is not compiled into the deal.II libraries and can not be used by applications using deal.II.
+ *
+ * @ingroup ParameterGui
+ * @author Martin Steigemann, Wolfgang Bangerth, 2010
+ */
+    class XMLParameterReader
+    {
+      public:
+				     /**
+				      * Constructor.
+				      * The parameter values will be stored in @p tree_widget.
+				      */
+        XMLParameterReader (QTreeWidget *tree_widget);
+				     /**
+				      * This function reads the parameters from @p device into the <tt>tree_widget</tt>.
+				      * We use the QXmlStreamReader class for this.
+				      * There must be a start element
+				      * <code><ParameterHandler></code>
+				      * and an end element <code></ParameterHandler></code>
+				      * otherwise an exception is thrown.
+				      */
+        bool read_xml_file (QIODevice *device);
+				     /**
+				      * This function returns an error message.
+				      */
+        QString error_string () const;
+
+      private:
+				     /**
+				      * This function implements a loop over the XML file
+				      * and parses XML elements. It calls @ref read_subsection_element
+				      * till the <code></ParameterHandler></code> element is found
+				      * or the end of the file is reached. In this case, an exception is thrown.
+				      */
+        void parse_parameters ();
+				     /**
+				      * This functions parses a <tt>subsection</tt>.
+				      * and adds it as a child to @p parent.
+				      * If the next element is <code><value></code>,
+				      * this functions calls @ref read_parameter_element
+				      * otherwise the function itself recursively.
+				      */
+        void read_subsection_element (QTreeWidgetItem *parent);
+				     /**
+				      * This function parses a <tt>parameter</tt> and
+				      * and adds it as a child to @p parent.
+				      * A <tt>parameter</tt> description consists of five elements:
+				      * @code
+				      *   <value>value</value>
+				      *   <default_value>default_value</default_value>
+				      *   <documentation>documentation</documentation>
+				      *   <pattern>pattern</pattern>
+				      *   <pattern_description>[pattern_description]</pattern_description>
+				      * @endcode
+				      * If a <tt>parameter</tt> description is incomplete, an exception
+				      * is thrown.
+				      */
+        void read_parameter_element (QTreeWidgetItem *parent);
+				     /**
+				      * Reimplemented from the @ref ParameterHandler class.
+				      * Unmangle a string @p s into its original form.
+				      */
+        QString  demangle (const QString &s);
+				     /**
+				      * This helper function creates a new child of @p item in the tree.
+				      */
+        QTreeWidgetItem * create_child_item(QTreeWidgetItem *item);
+				     /**
+				      * The QXmlStreamReader object for reading XML elements.
+				      */
+        QXmlStreamReader  xml;
+				     /**
+				      * A pointer to the tree structure.
+				      */
+        QTreeWidget * tree_widget;
+				     /**
+				      * An icon for subsections in the tree structure.
+				      */
+        QIcon  subsection_icon;
+				     /**
+				      * An icon for parameters in the tree structure.
+				      */
+        QIcon  parameter_icon;
+    };
+  }
+/**@}*/
+}
+
+
+#endif
diff --git a/contrib/parameter_gui/xml_parameter_writer.cpp b/contrib/parameter_gui/xml_parameter_writer.cpp
new file mode 100644
index 0000000..4e33312
--- /dev/null
+++ b/contrib/parameter_gui/xml_parameter_writer.cpp
@@ -0,0 +1,103 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <QtGui>
+
+#include "xml_parameter_writer.h"
+
+
+namespace dealii
+{
+  namespace ParameterGui
+  {
+    XMLParameterWriter::XMLParameterWriter(QTreeWidget *tree_widget)
+                      : tree_widget(tree_widget)
+    {
+      xml.setAutoFormatting(true);						// enable auto-formatting
+    }
+
+
+
+    bool XMLParameterWriter::write_xml_file(QIODevice *device)
+    {
+      xml.setDevice(device);							// setup the output device
+      xml.writeStartDocument();							// write the head <?xml ... ?>
+      xml.writeStartElement("ParameterHandler");				// write the root element <ParameterHandler>
+										// loop over the elements
+      for (int i = 0; i < tree_widget->topLevelItemCount(); ++i)
+        write_item(tree_widget->topLevelItem(i));				// and write the items
+
+      xml.writeEndDocument()		;					// close the first element
+
+      return true;
+    }
+
+
+
+    void XMLParameterWriter::write_item(QTreeWidgetItem *item)
+    {
+      QString tag_name = mangle(item->text(0));					// store the element name
+
+      xml.writeStartElement(tag_name);						// and write <tag_name> to the file
+
+      if (!item->text(1).isEmpty())						// if the "value"-entry of this item is not empty
+        {									// we have a parameter
+          xml.writeTextElement("value", item->data(1,Qt::EditRole).toString());
+          xml.writeTextElement("default_value", item->text(2));			// and we write its values
+          xml.writeTextElement("documentation", item->text(3));
+          xml.writeTextElement("pattern", item->text(4));
+          xml.writeTextElement("pattern_description", item->text(5));
+        };
+
+      for (int i = 0; i < item->childCount(); ++i)				// go over the childrens recursively
+        write_item(item->child(i));
+
+      xml.writeEndElement();							// write closing </tag_name>
+    }
+
+
+
+    QString XMLParameterWriter::mangle (const QString &s)
+    {
+      std::string  s_temp (s.toStdString()); 					// this function is copied from
+										// the ParameterHandler class
+      std::string u;								// and adapted to mangle QString
+      u.reserve (s_temp.size());
+
+      static const std::string allowed_characters
+        ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");
+
+				   // for all parts of the string, see
+				   // if it is an allowed character or
+				   // not
+      for (unsigned int i=0; i<s_temp.size(); ++i)
+        if (allowed_characters.find (s_temp[i]) != std::string::npos)
+          u.push_back (s_temp[i]);
+        else
+          {
+	    u.push_back ('_');
+	    static const char hex[16]
+	      = { '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+	    u.push_back (hex[static_cast<unsigned char>(s_temp[i])/16]);
+	    u.push_back (hex[static_cast<unsigned char>(s_temp[i])%16]);
+          }
+
+      QString  v (u.c_str());
+
+      return v;
+    }
+  }
+}
diff --git a/contrib/parameter_gui/xml_parameter_writer.h b/contrib/parameter_gui/xml_parameter_writer.h
new file mode 100644
index 0000000..de2f425
--- /dev/null
+++ b/contrib/parameter_gui/xml_parameter_writer.h
@@ -0,0 +1,100 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by Martin Steigemann and Wolfgang Bangerth
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef XMLPARAMETERWRITER_H
+#define XMLPARAMETERWRITER_H
+
+#include <QXmlStreamWriter>
+#include <QTreeWidget>
+#include <QTreeWidgetItem>
+
+
+namespace dealii
+{
+/*! @addtogroup ParameterGui
+ *@{
+ */
+  namespace ParameterGui
+  {
+/**
+ * The XMLParameterWriter class provides an interface to write parameters stored in a QTreeWidget to a file in XML format.
+ * This class makes extensive use of the QXmlStreamWriter class, which implements the basic functionalities for writing
+ * XML files.
+ *
+ * @note This class is used in the graphical user interface for the @ref ParameterHandler class.
+ *       It is not compiled into the deal.II libraries and can not be used by applications using deal.II.
+ *
+ * @ingroup ParameterGui
+ * @author Martin Steigemann, Wolfgang Bangerth, 2010
+ */
+    class XMLParameterWriter
+    {
+      public:
+				     /**
+				      * Constructor.
+				      * Parameter values from @p tree_widget will be written.
+				      */
+        XMLParameterWriter (QTreeWidget *tree_widget);
+				     /**
+				      * This function writes the parameter values stored in <tt>tree_widget</tt>
+				      * to @p device in XML format. We use the QXmlStreamWriter class
+				      * for this. The root element is
+				      * <code><ParameterHandler></code>
+				      */
+        bool write_xml_file (QIODevice *device);
+
+      private:
+				     /**
+				      * This function writes a given @p item of <tt>tree_widget</tt>
+				      * to a file in XML format. For this the QXmlStreamWriter class is used.
+				      * If the @p item is a parameter, the elements that describes this parameter
+				      * are written:
+				      * @code
+				      *   <value>value</value>
+				      *   <default_value>default_value</default_value>
+				      *   <documentation>documentation</documentation>
+				      *   <pattern>pattern</pattern>
+				      *   <pattern_description>[pattern_description]</pattern_description>
+				      * @endcode
+				      * If the @p item is a subsection, a start element <code>this_subsection</code> is written
+				      * and <tt>write_item</tt> is called recursively to write the next <tt>item</tt>.
+				      */
+        void write_item (QTreeWidgetItem *item);
+				     /**
+				      * Reimplemented from the @ref ParameterHandler class.
+				      * Mangle a string @p s so that it
+				      * doesn't contain any special
+				      * characters or spaces.
+				      */
+        QString  mangle (const QString &s);
+				     /**
+				      * An QXmlStreamWriter object
+				      * which implements the functionalities
+				      * we need for writing parameters to XML files.
+				      */
+        QXmlStreamWriter  xml;
+				     /**
+				      * A pointer to the QTreeWidget structure
+				      * which stores the parameters.
+				      */
+        QTreeWidget * tree_widget;
+    };
+  }
+/**@}*/
+}
+
+
+#endif
diff --git a/contrib/utilities/astyle.rc b/contrib/utilities/astyle.rc
new file mode 100644
index 0000000..91c5c92
--- /dev/null
+++ b/contrib/utilities/astyle.rc
@@ -0,0 +1,33 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+# astyle 2.02 indentation style for deal.II
+
+--style=gnu
+
+--convert-tabs
+--indent-preprocessor
+--indent=spaces=2
+--indent-namespaces
+--min-conditional-indent=0
+--pad-header
+
+# write things as 'char *p'
+--align-pointer=name
+--align-reference=name
+
+--max-instatement-indent=60
+--suffix=none
+--quiet
diff --git a/contrib/utilities/check_indentation.sh b/contrib/utilities/check_indentation.sh
new file mode 100755
index 0000000..7a859c9
--- /dev/null
+++ b/contrib/utilities/check_indentation.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+if [ "${TRAVIS_PULL_REQUEST}" = "false" ]; then 
+	echo "Running indentation test on master merge."
+else 
+	echo "Running indentation test on Pull Request #${TRAVIS_PULL_REQUEST}"
+fi
+
+./contrib/utilities/indent
+git diff
+git diff-files --quiet 
diff --git a/contrib/utilities/dotgdbinit.py b/contrib/utilities/dotgdbinit.py
new file mode 100644
index 0000000..ff0a049
--- /dev/null
+++ b/contrib/utilities/dotgdbinit.py
@@ -0,0 +1,210 @@
+# ---------------------------------------------------------------------
+#
+# Copyright (C) 2015 by the deal.II authors
+#
+# This file is part of the deal.II library.
+#
+# The deal.II library is free software; you can use it, redistribute
+# it, and/or modify it under the terms of the GNU Lesser General
+# Public License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# The full text of the license can be found in the file LICENSE at
+# the top level of the deal.II distribution.
+#
+# ---------------------------------------------------------------------
+
+#
+# Instructions: Place a copy of this file, renamed as ".gdbinit", in your home
+# directory to enable pretty-printing of various deal.II objects. If you already
+# have a ".gdbinit" file or would like to manage multiple sets of pretty
+# printers, then see the directions included in the Documentation, in the
+# "Configuration for debugging via GDB" section in the "Information for users"
+# category.
+#
+# This has only been tested with GDB 7.7.1 and newer, but it should work with
+# slightly older versions of GDB (the Python interface was added in 7.0,
+# released in 2009).
+#
+# Authors: Wolfgang Bangerth, 2015, David Wells, 2015
+#
+set print pretty 1
+
+python
+
+import gdb
+import re
+
+
+def build_output_string(keys, accessor):
+    """Build an output string of the form
+    {
+      a = foo,
+      b = bar
+    }
+    where a and b are elements of keys and foo and bar are values of
+    accessor (e.g., accessor['a'] = foo).
+
+    Note that accessor need not be a dictionary (i.e., gdb.Values
+    redefines __getitem__)."""
+    return ("{\n" +
+            ",\n".join(["  {} = {}".format(key, accessor[key])
+                        for key in keys]) +
+            "\n}")
+
+
+class PointPrinter(object):
+    """Print a deal.II Point instance."""
+    def __init__(self, typename, val):
+        self.typename = typename
+        self.val = val
+
+    def to_string(self):
+        return self.val['values']
+
+
+class TensorPrinter(object):
+    """Print a deal.II Tensor instance."""
+    def __init__(self, typename, val):
+        self.typename = typename
+        self.val = val
+
+    def to_string(self):
+        if int(self.val.type.template_argument(0)) == 0:
+            return self.val['value']
+        else:
+            return self.val['values']
+
+
+class TriaIteratorPrinter(object):
+    """Print a deal.II TriaIterator instance."""
+    def __init__(self, typename, val):
+        self.typename = typename
+        self.val = val
+
+    def to_string(self):
+        keys = ['tria', 'present_level', 'present_index']
+        if 'DoFHandler' in str(self.val.type.template_argument(0)):
+            keys.insert(1, 'dof_handler')
+
+        return build_output_string(keys, self.val['accessor'])
+
+
+class VectorPrinter(object):
+    """Print a deal.II Vector instance."""
+    def __init__(self, typename, val):
+        self.typename = typename
+        self.val = val
+
+    def children(self):
+        # The first entry (see the "Pretty Printing API" documentation of GDB)
+        # in the tuple should be a name for the child, which should be nothing
+        # (an empty string) here.
+        return (("", (self.val['val'] + count).dereference())
+                for count in range(int(self.val['vec_size'])))
+
+    def to_string(self):
+        return "{}[{}]".format(self.val.type.template_argument(0),
+                               self.val['vec_size'])
+
+    @staticmethod
+    def display_hint():
+        """Provide a hint to GDB that this is an array-like container
+        (so print values in sequence)."""
+        return "array"
+
+
+class QuadraturePrinter(object):
+    """Print a deal.II Quadrature instance."""
+    def __init__(self, typename, val):
+        self.typename = typename
+        self.val = val
+
+    def to_string(self):
+        return build_output_string(['quadrature_points', 'weights'], self.val)
+
+
+class RxPrinter(object):
+    """A "regular expression" printer which conforms to the
+    "SubPrettyPrinter" protocol from gdb.printing."""
+    def __init__(self, name, function):
+        self.name = name
+        self.function = function
+        self.enabled = True
+
+    def __call__(self, value):
+        if self.enabled:
+            return self.function(self.name, value)
+        else:
+            return None
+
+
+class Printer(object):
+    """A pretty-printer that conforms to the "PrettyPrinter" protocol
+    from gdb.printing. It can also be used directly as an old-style
+    printer."""
+    def __init__(self, name):
+        self.name = name
+        self.subprinters = list()
+        self.lookup = dict()
+        self.enabled = True
+        self.compiled_rx = re.compile('^([a-zA-Z0-9_:]+)<.*>$')
+
+    def add(self, name, function):
+        printer = RxPrinter(name, function)
+        self.subprinters.append(printer)
+        self.lookup[name] = printer
+
+    @staticmethod
+    def get_basic_type(object_type):
+        # If it points to a reference, then get the reference.
+        if object_type.code == gdb.TYPE_CODE_REF:
+            object_type = object_type.target()
+
+        object_type = object_type.unqualified().strip_typedefs()
+
+        return object_type.tag
+
+    def __call__(self, val):
+        typename = self.get_basic_type(val.type)
+        if typename:
+            # All the types we match are template types, so we can use a
+            # dictionary.
+            match = self.compiled_rx.match(typename)
+            if match:
+                basename = match.group(1)
+                if basename in self.lookup:
+                    return self.lookup[basename](val)
+
+        return None
+
+
+dealii_printer = Printer("deal.II")
+
+
+def register_dealii_printers():
+    """Register deal.II pretty-printers with gdb."""
+    printers = {
+        PointPrinter: ['Point'],
+        TensorPrinter: ['Tensor'],
+        VectorPrinter: ['Vector'],
+        TriaIteratorPrinter:
+        ['TriaRawIterator', 'TriaIterator', 'TriaActiveIterator'],
+        QuadraturePrinter:
+        ['Quadrature', 'QGauss', 'QGaussLobatto', 'QMidpoint', 'QSimpson',
+         'QTrapez', 'QMilne', 'QWeddle', 'QGaussLog', 'QGaussLogR',
+         'QGaussOneOverR', 'QSorted', 'QTelles', 'QGaussChebyshev',
+         'QGaussRadauChebyshev', 'QIterated', 'QAnisotropic']
+    }
+    for printer, class_names in printers.items():
+        for class_name in class_names:
+            dealii_printer.add('dealii::' + class_name, printer)
+    try:
+        from gdb import printing
+        printing.register_pretty_printer(gdb, dealii_printer)
+    except ImportError:
+        gdb.pretty_printers.append(dealii_printer)
+
+
+register_dealii_printers()
+
+end
diff --git a/contrib/utilities/embedding.cc b/contrib/utilities/embedding.cc
new file mode 100644
index 0000000..b1998ee
--- /dev/null
+++ b/contrib/utilities/embedding.cc
@@ -0,0 +1,227 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// Author: Guido Kanschat
+
+#include <base/quadrature_lib.h>
+#include <lac/vector.h>
+#include <lac/full_matrix.h>
+#include <lac/full_matrix.templates.h>
+#include <lac/solver_richardson.h>
+#include <lac/vector_memory.h>
+#include <grid/tria.h>
+#include <grid/tria_iterator.h>
+#include <dofs/dof_accessor.h>
+#include <grid/grid_generator.h>
+#include <fe/fe_q.h>
+#include <fe/fe_dgq.h>
+#include <fe/fe_dgp.h>
+#include <fe/fe_system.h>
+#include <fe/mapping_cartesian.h>
+#include <fe/fe_values.h>
+#include <vector>
+#include <iomanip>
+#include <fstream>
+#include <strstream>
+#include <string>
+
+/*
+ * Enter name of the finite element family here.
+ */
+
+#define ELNAME "FE_DGP"
+#define PUSH(k) FE_DGP<dim> q ## k (k);\
+ elements.push_back (ElPair(&q ## k, "dgp" # k))
+
+
+
+template <int dim>
+void compute_embedding (unsigned int degree,
+			Triangulation<dim>& tr_coarse,
+			Triangulation<dim>& tr_fine,
+			const FiniteElement<dim>& fe_coarse,
+			const FiniteElement<dim>& fe_fine,
+			const char* name)
+{
+  deallog.push(name);
+  
+  DoFHandler<dim> dof_coarse(tr_coarse);
+  dof_coarse.distribute_dofs(fe_coarse);
+  DoFHandler<dim> dof_fine(tr_fine);
+  dof_fine.distribute_dofs(fe_fine);
+  
+  FullMatrix<long double> A(dof_fine.n_dofs());
+  Vector<long double> f(dof_fine.n_dofs());
+  Vector<long double> u(dof_fine.n_dofs());
+  vector<vector<vector<double> > >
+    result(2<<dim,
+	   vector<vector<double> >(fe_coarse.dofs_per_cell,
+				   vector<double>(fe_fine.dofs_per_cell, 0.)));
+
+  DoFHandler<dim>::active_cell_iterator coarse_cell
+    = dof_coarse.begin_active();
+  vector<unsigned int> indices(fe_fine.dofs_per_cell);
+
+  MappingCartesian<dim> mapping;
+  QGauss<dim> q_fine(degree+1);
+  
+  FEValues<dim> fine (mapping, fe_fine, q_fine,
+		      update_q_points
+		      | update_JxW_values
+		      | update_values);
+
+
+  DoFHandler<dim>::active_cell_iterator c;
+
+  for (unsigned int coarse_no=0;coarse_no<fe_coarse.dofs_per_cell;
+       ++coarse_no)
+    {
+      A.clear();
+      f.clear();
+      u.clear();
+
+      for (c = dof_fine.begin_active();
+	   c != dof_fine.end();
+	   ++c)
+	{
+	  c->get_dof_indices(indices);
+	  fine.reinit(c);
+					   // Build mass matrix and RHS
+	  
+	  Quadrature<dim> q_coarse (fine.get_quadrature_points(),
+				    fine.get_JxW_values());
+	  FEValues<dim> coarse (mapping, fe_coarse, q_coarse,
+				update_values);
+	  coarse.reinit(coarse_cell);
+	  
+	  for (unsigned int k=0;k<fine.n_quadrature_points;++k)
+	    {
+	      double dx = fine.JxW(k);
+	      for (unsigned int i=0;i<fe_fine.dofs_per_cell;++i)
+		{
+		  double v = fine.shape_value(i,k);
+		  f(indices[i]) += dx *
+		    v * coarse.shape_value(coarse_no, k);
+		  for (unsigned int j=0;j<fe_fine.dofs_per_cell;++j)
+		    A(indices[i],indices[j]) += dx*v*fine.shape_value(j,k);
+		}
+	    }
+	}
+//      A.print_formatted(cout, 2, false, 4, "~", 9);
+      FullMatrix<double> P(A.n());
+      P.invert(A);
+      SolverControl control (100, 1.e-24, true, false);
+      PrimitiveVectorMemory<Vector<long double> > mem;
+      SolverRichardson<Vector<long double> > solver(control, mem);
+      solver.solve(A,u,f,P);
+      
+      unsigned int cell_i=0;
+      for (c = dof_fine.begin_active();
+	   c != dof_fine.end();
+	   ++c, ++cell_i)
+	{
+	  c->get_dof_indices(indices);
+	  for (unsigned int i=0;i<fe_fine.dofs_per_cell;++i)
+	    result[cell_i][coarse_no][i] = (fabs(u(indices[i])) > 1.e-16)
+	      ? (27.*u(indices[i])) : 0.;
+	}
+    }
+
+  for (unsigned int cell=0;cell<tr_fine.n_active_cells();++cell)
+    {
+      cout << "static const double "
+	   << name << "_"
+	   << cell << "[] =" << endl << '{' << endl;
+      for (unsigned int i=0;i<fe_fine.dofs_per_cell;++i)
+	{
+	  for (unsigned int j=0;j<fe_coarse.dofs_per_cell;++j)
+	    {
+	      double s = result[cell][j][i];
+	      if (fabs(s) < 1.e-13)
+		cout << " 0,";
+	      else
+		cout << ' ' << setprecision(10) << s << "/27.,";
+	    }
+	  cout << endl;
+	}
+      cout << "};" << endl << endl;
+    }
+  deallog.pop();
+}
+
+
+template <int dim>
+void loop ()
+{
+  char prefix[3];
+  sprintf(prefix, "%dd", dim);
+  deallog.push(prefix);
+  
+  cout << "namespace " << ELNAME << "_" << dim << "d\n{";
+
+  Triangulation<dim> tr_coarse;
+  Triangulation<dim> tr_fine;
+  GridGenerator::hyper_cube (tr_coarse, 0, 1);
+  GridGenerator::hyper_cube (tr_fine, 0, 1);
+  tr_fine.refine_global(1);
+
+  typedef pair<const FiniteElement<dim>*, const char*> ElPair ;
+  vector <ElPair> elements(0);
+
+				   /*
+				    * List element degrees for
+				    * computation here.
+				    */
+  PUSH(0);
+  PUSH(1);
+  PUSH(2);
+  PUSH(3);
+  PUSH(4);
+  PUSH(5);
+  PUSH(6);
+
+  char* name = new char[100];
+
+				   // Embed all lower spaces into
+				   // higher or just the same degree
+				   // on different grids.
+  bool same_degree_only = true;
+  
+  unsigned int n = elements.size();
+  for (unsigned int i=0;i<n;++i)
+    for (unsigned int j=((same_degree_only) ? i : 0);j<=i;++j)
+      {
+	ostrstream os (name, 99);
+	os << elements[i].second << "_into_"
+	   << elements[j].second << "_refined" << ends;
+	
+	compute_embedding (i, tr_coarse, tr_fine,
+			   *elements[i].first,
+			   *elements[j].first,
+			   name);
+      }
+
+  delete [] name;
+  cout << "};\n";
+  deallog.pop();
+  
+}
+
+int main ()
+{
+  loop<1> ();
+  loop<2> ();
+  loop<3> ();
+}
diff --git a/contrib/utilities/fe_table.pl b/contrib/utilities/fe_table.pl
new file mode 100644
index 0000000..6ae8abf
--- /dev/null
+++ b/contrib/utilities/fe_table.pl
@@ -0,0 +1,84 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2005 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+# Author: Guido Kanschat
+
+# Create a table of finite element capabilities out of
+# tests/fe/fe_data_test.output
+
+use strict;
+
+print <<'EOF'
+<html>
+<head>
+<title>Finite element capabilities</title>
+</head>
+<body>
+<table border="1">
+<tr>
+<th rowspan="2">Element</th>
+<th rowspan="2">degree</th>
+<th rowspan="2">DoFs</th>
+<th colspan="4">DoFs on</th>
+<th rowspan="2">conforms</th>
+<th rowspan="2">components</th>
+<th colspan="4">Support points</th>
+</tr>
+<tr>
+<th>V</th><th>L</th><th>Q</th><th>H</th>
+<th>uc</th><th>uf</th><th>gc</th><th>gf</th>
+</tr>
+EOF
+    ;
+
+my @field = ('degree', 'dofs_per_cell',
+	     'dofs_per_vertex', 'dofs_per_line', 'dofs_per_quad', 'dofs_per_hex',
+	     'conformity', 'components',
+	     'unit_support_points', 'unit_face_support_points',
+	     'generalized_support_points', 'generalized_face_support_points');
+
+my %fe;
+my $hashref;
+
+while(<>)
+{
+    if (/DEAL::fe_data.*:(.*)/)
+    {
+	$fe{$1} =  { 'set' => 't' } unless ($1 =~ m/FESystem/);
+	$hashref = $fe{$1};
+    }
+    foreach my $entry (@field)
+    {
+	$hashref->{$entry} = $1 if /DEAL::$entry=(.*)/;
+    }
+}
+
+foreach (sort keys %fe)
+{
+    print '<tr><td>',$_,"</td>\n";
+    $hashref = $fe{$_};
+    foreach (@field)
+    {
+	print "<td>", $hashref->{$_}, "</td>\n";
+    }
+    print "</tr>\n";
+}
+
+print <<'EOF'
+</table>
+</body>
+</html>
+EOF
+    ;
diff --git a/contrib/utilities/gridio.cc b/contrib/utilities/gridio.cc
new file mode 100644
index 0000000..5940b74
--- /dev/null
+++ b/contrib/utilities/gridio.cc
@@ -0,0 +1,80 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// Author: Guido Kanschat
+
+// A little program reading a grid *.inp and writing it to *.eps.
+// Some more functionality should be added some time.
+
+#include <grid/tria.h>
+#include <grid/grid_in.h>
+#include <grid/grid_out.h>
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#include <unistd.h>
+
+using namespace std;
+
+template <int dim>
+void convert(const char* infile,
+	     const char* outfile,
+	     GridOut::OutputFormat oformat)
+{
+  Triangulation<dim> tr;
+  GridIn<dim> gin;
+  gin.attach_triangulation(tr);
+  gin.read(infile);
+  
+  GridOut gout;
+  GridOutFlags::DX dx_flags(true, true, true, false, true);
+  gout.set_flags(dx_flags);
+  
+  ofstream out(outfile);
+  gout.write(tr, out, oformat);
+}
+
+
+int main(int argc, char** argv)
+{
+  if (argc<4)
+    {
+      cerr << "Usage: " << argv[0]
+	   << " dim infile outfile" << endl;
+      exit(1);
+    }
+
+  const unsigned int d = atoi(argv[1]);
+  
+  std::string outstring(argv[3]);
+  GridOut::OutputFormat oformat = GridOut::eps;
+  
+  const unsigned int dotpos = outstring.find_last_of('.');
+  if (dotpos < outstring.length())
+    {
+      std::string ext = outstring.substr(dotpos+1);
+      if (ext == "inp")
+	ext = "ucd";
+      
+      oformat = GridOut::parse_output_format(ext);
+    }
+
+  if (d == 2)
+    convert<2>(argv[2], argv[3], oformat);
+  else if (d == 3)
+    convert<3>(argv[2], argv[3], oformat);
+}
diff --git a/contrib/utilities/indent b/contrib/utilities/indent
new file mode 100755
index 0000000..ca5c34f
--- /dev/null
+++ b/contrib/utilities/indent
@@ -0,0 +1,48 @@
+#!/bin/bash
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2013, 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+if test ! -d source -o ! -d include -o ! -d examples ; then
+  echo "*** This script must be run from the top-level directory of deal.II."
+  exit
+fi
+
+if test ! -f contrib/utilities/astyle.rc ; then
+  echo "*** No style file contrib/utilities/astyle.rc found."
+  exit
+fi
+
+if test -z "`which astyle`" ; then
+  echo "*** No astyle program found."
+  echo "***"
+  echo "*** You can download astyle from http://astyle.sourceforge.net/"
+  echo "*** Note that you will need exactly version 2.04 (no newer or"
+  echo "*** older version will yield the correct indentation)."
+  exit
+fi
+
+if test "`astyle --version 2>&1`" != "Artistic Style Version 2.04" ; then
+  echo "*** Found a version of astyle different than the required version 2.04."
+  exit
+fi
+
+
+
+# collect all header and source files and process them in batches of 50 files
+# with up to 10 in parallel
+echo "--- Indenting all deal.II header and source files"
+
+find tests include source examples \( -name '*.cc' -o -name '*.h' \) -print | xargs -n 50 -P 10 astyle --options=contrib/utilities/astyle.rc
+
diff --git a/contrib/utilities/interpolation.cc b/contrib/utilities/interpolation.cc
new file mode 100644
index 0000000..db25977
--- /dev/null
+++ b/contrib/utilities/interpolation.cc
@@ -0,0 +1,344 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// Author: Guido Kanschat
+
+#include <base/quadrature_lib.h>
+#include <lac/vector.h>
+#include <lac/full_matrix.h>
+#include <lac/full_matrix.templates.h>
+#include <grid/tria.h>
+#include <grid/tria_iterator.h>
+#include <dofs/dof_accessor.h>
+#include <grid/grid_generator.h>
+#include <fe/fe_q.h>
+#include <fe/fe_dgq.h>
+#include <fe/fe_system.h>
+#include <fe/mapping_q1.h>
+#include <fe/fe_values.h>
+#include <vector>
+#include <iomanip>
+#include <fstream>
+#include <strstream>
+#include <string>
+
+
+template <int dim>
+inline bool in_cube (const Point<dim>& p)
+{
+  if ((p(0) < -1e-10) || (p(0) > 1.000000001))
+    return false;
+  if ((dim>1) && ((p(1) < -1e-10) || (p(1) > 1.000000001)))
+    return false;
+  if ((dim>2) && ((p(2) < -1e-10) || (p(2) > 1.000000001)))
+    return false;
+  return true;
+}
+
+template <int dim>
+inline void
+move_points (vector<Point<dim> >& v, unsigned int i)
+{
+  Point<dim> p;
+  switch (dim)
+    {
+    case 1:
+      if (i) p(0) = -1.;
+      break;
+    case 2:
+      if ((i==1) || (i==2))
+	p(0) = -1.;
+      if ((i==2) || (i==3))
+	p(1) = -1.;
+      break;
+    case 3:
+      if ((i==1) || (i==2) || (i==5) || (i==6))
+	p(0) = -1.;
+      if ((i==4) || (i==5) || (i==6) || (i==7))
+	p(1) = -1.;
+      if ((i==2) || (i==3) || (i==6) || (i==7))
+	p(2) = -1.;
+      break;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+  for (unsigned int j=0;j<v.size();++j)
+    v[j] += p;
+}
+
+
+template <int dim>
+void compute_interpolation (Triangulation<dim>& tr_coarse,
+			    Triangulation<dim>& tr_fine,
+			    const FiniteElement<dim>& fe_coarse,
+			    const FiniteElement<dim>& fe_fine,
+			    const char* name)
+{
+  DoFHandler<dim> dof_coarse(tr_coarse);
+  dof_coarse.distribute_dofs(fe_coarse);
+  DoFHandler<dim> dof_fine(tr_fine);
+  dof_fine.distribute_dofs(fe_fine);
+  
+  vector<vector<vector<double> > >
+    result(2<<dim,
+	   vector<vector<double> >(fe_coarse.dofs_per_cell,
+				   vector<double>(fe_fine.dofs_per_cell, 0.)));
+
+  vector<unsigned int> indices(fe_fine.dofs_per_cell);
+
+  MappingQ1<dim> mapping;
+
+  vector<Point<dim> > points;
+  fe_coarse.get_unit_support_points (points);
+  vector<Point<dim> > q_points(points.size());
+  vector<double> dummy_weights(points.size(), 0.);
+  
+				   // Coarse level cell of fine grid
+  DoFHandler<dim>::cell_iterator father = dof_fine.begin (0);
+
+  for (unsigned int child_no = 0; child_no < GeometryInfo<dim>::children_per_cell;
+       ++child_no)
+    {
+      DoFHandler<dim>::active_cell_iterator c = father->child(child_no);
+      
+      c->get_dof_indices(indices);
+				       // Evaluate at support points
+				       // of father cell.
+      for (unsigned int i=0;i<points.size();++i)
+	q_points[i] = 2.*points[i];
+      move_points (q_points, child_no);
+      
+      Quadrature<dim> q_fine (q_points, dummy_weights);
+      
+      FEValues<dim> fine (mapping, fe_fine, q_fine,
+			  update_values);
+      fine.reinit(c);
+					   // Build mass matrix and RHS
+	  
+	  for (unsigned int k=0;k<fine.n_quadrature_points;++k)
+	    {
+//	      cerr << k << '\t' << q_points[k];
+	      if (in_cube(q_points[k]))
+		{
+		  for (unsigned int i=0;i<fe_fine.dofs_per_cell;++i)
+		    {
+		      double v = fine.shape_value(i,k);
+//		      cerr << '[' << i << ' ' << v << ']';
+		      result[child_no][k][i] = v;
+		    }
+		}
+//	      cerr << endl;      
+	    }
+    }
+  
+  for (unsigned int cell=0;cell<tr_fine.n_active_cells();++cell)
+    {
+      cout << "static double[] "
+	   << name << "_"
+	   << dim << "d_"
+	   << cell << " =" << endl << '{' << endl;
+      for (unsigned int i=0;i<fe_coarse.dofs_per_cell;++i)
+	{
+	  for (unsigned int j=0;j<fe_fine.dofs_per_cell;++j)
+	    {
+	      double a = result[cell][i][j];
+	      if ((a<1.e-14) && (a>-1.e-14))
+		a = 0.;
+//	      cout << ' ' << setprecision(10) << a << ",";
+	      if (a != 0.)
+		cout << "restriction[" << cell << "]("
+		     << i << ',' << j << ") = "
+		     << a << ';' << endl;
+	    }
+	  cout << endl;
+	}
+      cout << "};" << endl << endl;
+    }
+}
+
+
+template <int dim>
+void compute_projection (Triangulation<dim>& tr_coarse,
+			 Triangulation<dim>& tr_fine,
+			 const FiniteElement<dim>& fe_coarse,
+			 const FiniteElement<dim>& fe_fine,
+			 const char* name)
+{
+  DoFHandler<dim> dof_coarse(tr_coarse);
+  dof_coarse.distribute_dofs(fe_coarse);
+  DoFHandler<dim> dof_fine(tr_fine);
+  dof_fine.distribute_dofs(fe_fine);
+  DoFHandler<dim>::cell_iterator cell_coarse = dof_coarse.begin_active();
+
+  vector<vector<vector<double> > >
+    result(2<<dim,
+	   vector<vector<double> >(fe_coarse.dofs_per_cell,
+				   vector<double>(fe_fine.dofs_per_cell, 0.)));
+
+  vector<unsigned int> indices(fe_fine.dofs_per_cell);
+
+  MappingQ1<dim> mapping;
+  QGauss<dim> quadrature (9);
+  FullMatrix<double> mass (fe_coarse.dofs_per_cell);
+  
+  FEValues<dim> coarse (mapping, fe_coarse, quadrature,
+			update_values
+			| update_JxW_values);
+  coarse.reinit (cell_coarse);
+
+				   // Build coarse level mass matrix
+  for (unsigned int k=0;k<coarse.n_quadrature_points;++k)
+    {
+      for (unsigned int i=0;i<fe_coarse.dofs_per_cell;++i)
+	for (unsigned int j=0;j<fe_coarse.dofs_per_cell;++j)
+	  mass(i,j) += coarse.JxW(k)
+	    * coarse.shape_value(i,k) * coarse.shape_value(j,k);
+    }
+  
+  FullMatrix<double> inverse (fe_coarse.dofs_per_cell);
+  inverse.invert(mass);
+  Vector<double> rhs (fe_coarse.dofs_per_cell);
+  Vector<double> u (fe_coarse.dofs_per_cell);
+
+				   // Coarse level cell of fine grid
+  DoFHandler<dim>::cell_iterator father = dof_fine.begin (0);
+
+  FEValues<dim> fine (mapping, fe_fine, quadrature,
+		      update_values
+		      | update_JxW_values
+		      | update_q_points);
+
+				   // loop over all fine shape functions
+  for (unsigned int n=0;n<fe_fine.dofs_per_cell;++n)
+    {
+      rhs = 0.;
+      DoFHandler<dim>::active_cell_iterator c = father->child(0);
+      
+      c->get_dof_indices(indices);
+
+      fine.reinit(c);
+
+				       // Build a quadrature formula
+				       // for the coarse cell
+      Quadrature<dim> q_coarse (fine.get_quadrature_points(),
+				fine.get_JxW_values());
+      FEValues<dim> coarse (mapping, fe_coarse, q_coarse,
+			    update_values);
+      coarse.reinit(cell_coarse);
+					   // Build RHS
+	  
+      for (unsigned int k=0;k<fine.n_quadrature_points;++k)
+	{
+	  for (unsigned int i=0;i<fe_coarse.dofs_per_cell;++i)
+	    {
+	      double v = coarse.shape_value(i,k);
+	      double f = fine.shape_value(n,k);
+	      rhs(i) += fine.JxW(k) * v*f;
+	    }
+	}
+      inverse.vmult(u,rhs);
+      for (unsigned int i=0;i<fe_coarse.dofs_per_cell;++i)
+	result[0][i][n] = u(i);
+    }
+  
+  for (unsigned int cell=0;cell<1;++cell)
+    {
+      cout << "static double "
+	   << name << "_"
+	   << dim << "d[] =" << endl << '{' << endl;
+      for (unsigned int i=0;i<fe_coarse.dofs_per_cell;++i)
+	{
+	  for (unsigned int j=0;j<fe_fine.dofs_per_cell;++j)
+	    {
+	      double a = result[cell][i][j];
+	      if ((a<1.e-14) && (a>-1.e-14)) a = 0.;
+	      cout << ' ' << setprecision(8) << a << ",";
+	    }
+	  cout << endl;
+	}
+      cout << "};" << endl << endl;
+    }
+}
+
+#define PUSH_Q(k) FE_Q<dim> q ## k(k);\
+ elements.push_back (ElPair(&q ## k, "dgq" # k))
+#define PUSH_DGQ(k) FE_DGQ<dim> dgq ## k(k);\
+ dgelements.push_back (ElPair(&dgq ## k, "dgq" # k))
+
+template <int dim>
+void loop ()
+{
+  Triangulation<dim> tr_coarse;
+  Triangulation<dim> tr_fine;
+  GridGenerator::hyper_cube (tr_coarse, 0, 1);
+  GridGenerator::hyper_cube (tr_fine, 0, 1);
+  tr_fine.refine_global(1);
+
+  typedef pair<const FiniteElement<dim>*, const char*> ElPair ;
+  vector <ElPair> elements(0);
+  vector <ElPair> dgelements(0);
+
+  PUSH_Q(1);
+  PUSH_Q(2);
+  PUSH_Q(3);
+  PUSH_Q(4);
+  PUSH_DGQ(0);
+  PUSH_DGQ(1);
+  PUSH_DGQ(2);
+  PUSH_DGQ(3);
+  PUSH_DGQ(4);
+  PUSH_DGQ(5);
+  PUSH_DGQ(6);
+  PUSH_DGQ(7);
+
+  char* name = new char[100];
+
+  unsigned int n = elements.size();
+  for (unsigned int i=0;i<n;++i)
+    for (unsigned int j=i;j<=i;++j)
+      {
+//  	ostrstream os (name, 99);
+//  	os << "interpolate " << elements[j].second << " refined onto "
+//  	   << elements[i].second << ends;
+	
+//  	compute_interpolation (tr_coarse, tr_fine,
+//  			       *elements[i].first,
+//  			       *elements[j].first,
+//  			       name);
+      }
+
+  n = dgelements.size();
+  for (unsigned int i=0;i<n;++i)
+    for (unsigned int j=i;j<=i;++j)
+      {
+	ostrstream os (name, 99);
+	os << "project_" << dgelements[j].second << "_refined_onto_"
+	   << dgelements[i].second << ends;
+	
+	compute_projection (tr_coarse, tr_fine,
+			    *dgelements[i].first,
+			    *dgelements[j].first,
+			    name);
+      }
+
+  delete [] name;
+}
+
+int main ()
+{
+  loop<1> ();
+  loop<2> ();
+  loop<3> ();
+}
diff --git a/contrib/utilities/lagrange_basis b/contrib/utilities/lagrange_basis
new file mode 100644
index 0000000..0d1de98
--- /dev/null
+++ b/contrib/utilities/lagrange_basis
@@ -0,0 +1,80 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2001 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Author: Ralf Hartmann, 2001
+#
+
+#
+# Maple script to compute the coefficients of the LagrangeEquidistant
+# basis functions of degree p. These are used as shape functions for
+# Qp elements. For higher p just change variable p in line 10.
+# Call
+#   perl -p -e 's/ *t0 = (.*);\n/ $1/g;' lagrange_txt
+# to get a c-code ready to be copied into the source codes.
+#
+
+  p := 10:
+
+  n_functions := p+1:
+
+  # first compute the support points
+  support_points := array(0..n_functions-1):
+  for i from 0 to n_functions-1 do
+    support_points[i] := i/(n_functions-1):
+  od;
+
+  poly := array(0..n_functions-1):
+
+  for i from 0 to n_functions-1 do
+    # note that the interp function wants vectors indexed from
+    #   one and not from zero.
+    values := array(1..n_functions):
+    for j from 1 to n_functions do
+      values[j] := 0:
+    od:
+    values[i+1] := 1:
+
+    shifted_support_points := array (1..n_functions):
+    for j from 1 to n_functions do
+      shifted_support_points[j] := support_points[j-1]:
+    od:
+
+    poly[i] := interp (shifted_support_points, values, x):
+  od:
+
+  readlib(C):
+  writeto(lagrange_output):
+  printf(`      case %d:\n      {\n		static const double x%d[%d]=\n	{`, p,p,(p+1)*(p+1)):
+  a := array(0..n_functions-1, 0..n_functions-1):
+  b := array(0..n_functions-1):
+  # a[i,j] is the jth coefficient of the ith base function.
+  for i from 0 to n_functions-1 do
+    for j from 0 to n_functions-1 do
+      b[j] := coeff(poly[i], x, j):
+    od:
+    C(b[0]):
+    for j from 1 to n_functions-1 do
+      printf(`,`):
+      C(b[j]):
+    od:
+    if (i<n_functions-1) then
+      printf(`,`):
+    fi:
+  od:
+  printf(`};\n		 x=&x%d[0];\n	 break;\n         }\n`, p):
+
+
+
diff --git a/contrib/utilities/makeofflinedoc.sh b/contrib/utilities/makeofflinedoc.sh
new file mode 100755
index 0000000..9a5b299
--- /dev/null
+++ b/contrib/utilities/makeofflinedoc.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+# This script downloads images referenced in the tutorial steps and patches
+# the URLs to point to the local files. To be run in the doc/doxygen/deal.II
+# directory of an installed deal.II documentation.
+
+if [ ! -f Tutorial.html ]
+then
+  echo "Please run this script in the doc output directory (<install>/doc/doxygen/deal.II)"
+  exit 1
+fi
+
+mkdir -p images
+
+echo "Downloading images (press ctrl-c to cancel) ..."
+cd images
+{
+trap "echo \"(skipping)\"" SIGINT
+wget -q -nd -A svg,png,gif -m -l 1 -np http://www.dealii.org/images/steps/developer/
+}
+rm robots.txt*
+cd ..
+
+echo "Patching html files ..."
+sed -i 's#"http://www.dealii.org/images/steps/developer/\(step-.*\)"#"images/\1"#g' step_*.html
+sed -i 's#"https://www.dealii.org/images/steps/developer/\(step-.*\)"#"images/\1"#g' step_*.html
+
+echo "all done!"
diff --git a/contrib/utilities/setup_astyle.sh b/contrib/utilities/setup_astyle.sh
new file mode 100755
index 0000000..4f0f53d
--- /dev/null
+++ b/contrib/utilities/setup_astyle.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+PRG=$PWD/programs
+
+if [ ! -d $PRG ] 
+then
+    echo "create folder `$PRG`"
+    mkdir $PRG
+fi
+
+# astyle
+if [ ! -d $PRG/astyle ]
+then
+    echo "Downloading and installing astyle."
+    mkdir $PRG/astyle
+    wget http://downloads.sourceforge.net/project/astyle/astyle/astyle%202.04/astyle_2.04_linux.tar.gz  > /dev/null
+    tar xfz astyle_2.04_linux.tar.gz -C $PRG > /dev/null
+    cd $PRG/astyle/build/gcc
+    make -j4 > /dev/null
+fi
diff --git a/contrib/utilities/simplify.pl b/contrib/utilities/simplify.pl
new file mode 100644
index 0000000..f352086
--- /dev/null
+++ b/contrib/utilities/simplify.pl
@@ -0,0 +1,89 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2002 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+s! -?\d\.\d+e-1[0123456789]/27.! 0.!g;
+s! 0/27.! 0.!g;
+s! (-?)27/27.! ${1}1.!g;
+s! (-?)13\.5/27.! ${1}.5!g;
+s! (-?)20\.25/27.! ${1}.75!g;
+s! (-?)6\.75/27.! ${1}.25!g;
+s! (-?)3\.375/27.! ${1}.125!g;
+s! (-?)10\.125/27.! ${1}.375!g;
+s! (-?)16\.875/27.! ${1}.625!g;
+s! (-?)23\.625/27.! ${1}.875!g;
+s! (-?)1\.6875/27.! ${1}1./16.!g;
+s! (-?)5\.0625/27.! ${1}3./16.!g;
+s! (-?)8\.4375/27.! ${1}5./16.!g;
+s! (-?)11\.8125/27.! ${1}7./16.!g;
+s! (-?)15\.1875/27.! ${1}9./16.!g;
+s! (-?)18\.5625/27.! ${1}11./16.!g;
+s! (-?)21\.9375/27.! ${1}13./16.!g;
+s! (-?)25\.3125/27.! ${1}15./16.!g;
+s! (-?)0\.84375/27.! ${1}1./32.!g;
+s! (-?)2\.53125/27.! ${1}3./32.!g;
+s! (-?)4\.21875/27.! ${1}5./32.!g;
+s! (-?)5\.90625/27.! ${1}7./32.!g;
+s! (-?)7\.59375/27.! ${1}9./32.!g;
+s! (-?)9\.28125/27.! ${1}11./32.!g;
+s! (-?)10\.96875/27.! ${1}13./32.!g;
+s! (-?)12\.65625/27.! ${1}15./32.!g;
+s! (-?)14\.34375/27.! ${1}17./32.!g;
+s! (-?)16\.03125/27.! ${1}19./32.!g;
+s! (-?)17\.71875/27.! ${1}21./32.!g;
+s! (-?)19\.40625/27.! ${1}23./32.!g;
+s! (-?)21\.09375/27.! ${1}25./32.!g;
+s! (-?)22\.78125/27.! ${1}27./32.!g;
+s! (-?)24\.46875/27.! ${1}29./32.!g;
+s! (-?)26\.15625/27.! ${1}31./32.!g;
+s! (-?)0\.421875/27.! ${1}1./64.!g;
+s! (-?)1\.265625/27.! ${1}3./64.!g;
+s! (-?)2\.109375/27.! ${1}5./64.!g;
+s! (-?)2\.953125/27.! ${1}7./64.!g;
+s! (-?)3\.796875/27.! ${1}9./64.!g;
+s! (-?)4\.640625/27.! ${1}11./64.!g;
+s! (-?)5\.484375/27.! ${1}13./64.!g;
+s! (-?)6\.328125/27.! ${1}15./64.!g;
+s! (-?)7\.171875/27.! ${1}17./64.!g;
+s! (-?)8\.015625/27.! ${1}19./64.!g;
+s! (-?)8\.859375/27.! ${1}21./64.!g;
+s! (-?)9\.703125/27.! ${1}23./64.!g;
+s! (-?)10\.546875/27.! ${1}25./64.!g;
+s! (-?)11\.390625/27.! ${1}27./64.!g;
+s! (-?)12\.234375/27.! ${1}29./64.!g;
+s! (-?)13\.078125/27.! ${1}31./64.!g;
+s! (-?)13\.921875/27.! ${1}33./64.!g;
+s! (-?)14\.765625/27.! ${1}35./64.!g;
+s! (-?)15\.609375/27.! ${1}37./64.!g;
+s! (-?)16\.453125/27.! ${1}39./64.!g;
+s! (-?)17\.296875/27.! ${1}41./64.!g;
+s! (-?)18\.140625/27.! ${1}43./64.!g;
+s! (-?)18\.984375/27.! ${1}45./64.!g;
+s! (-?)19\.828125/27.! ${1}47./64.!g;
+s! (-?)20\.671875/27.! ${1}49./64.!g;
+s! (-?)21\.515625/27.! ${1}51./64.!g;
+s! (-?)22\.359375/27.! ${1}53./64.!g;
+s! (-?)23\.203125/27.! ${1}55./64.!g;
+s! (-?)24\.046875/27.! ${1}57./64.!g;
+s! (-?)24\.890625/27.! ${1}59./64.!g;
+s! (-?)25\.734375/27.! ${1}61./64.!g;
+s! (-?)26\.578125/27.! ${1}63./64.!g;
+s! (-?)23\.3826859/27.! ${1}SQRT3/2.!g;
+s! (-?)11\.69134295/27.! ${1}SQRT3/4.!g;
+s! (-?)5\.845671476/27.! ${1}SQRT3/8.!g;
+s! (-?)2\.922835738/27.! ${1}SQRT3/16.!g;
+s! (-?)1\.461417869/27.! ${1}SQRT3/32.!g;
+s! (-?)0\.7307089344/27.! ${1}SQRT3/64.!g;
+s! (-?)4\.384253607/27.! ${1}SQRT3*3./32.!g;
+s! (-?)17\.53701443/27.! ${1}SQRT3*3./8.!g;
diff --git a/contrib/utilities/update-copyright b/contrib/utilities/update-copyright
new file mode 100755
index 0000000..a758599
--- /dev/null
+++ b/contrib/utilities/update-copyright
@@ -0,0 +1,67 @@
+#!/bin/bash
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+# Purpose: Update the copyright year of every file based on the last
+#          modification recorded in the git logs
+
+
+if test ! -d source -o ! -d include -o ! -d examples ; then
+  echo "*** This script must be run from the top-level directory of deal.II."
+  exit
+fi
+
+
+files="`echo include/deal.II/*/*h \
+             source/*/*cc \
+             source/*/*in \
+             examples/*/*.cc`
+       `find cmake/ | egrep '\.(cmake|in|cc)$'`
+       `find . -name CMakeLists.txt`
+       `find tests/ | egrep '\.(h|cc)$'`
+       "
+
+for i in $files ; do
+  # get the last year this file was modified from the git log.
+  # we don't want to see patches that just updated the copyright
+  # year, so output the dates and log messages of the last 3
+  # commits, throw away all that mention both the words
+  # "update" and "copyright", and take the year of the first
+  # message that remains
+  #
+  # (it should be enough to look at the last 2 messages since
+  # ideally no two successive commits should have updated the
+  # copyright year. let's err on the safe side and take the last
+  # 3 commits.)
+  last_year=`git log -n 3 --date=short --format="format:%cd %s" $i | \
+             egrep -i -v "update.*copyright|copyright.*update" | \
+             head -n 1 | \
+             perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'`
+
+  # get the first year this file was modified from the actual
+  # file. this may predate the git log if the file was copied
+  # from elsewhere
+  first_year=`cat $i | egrep 'Copyright \(C\) [0-9]{4}' | \
+              perl -p -e "s/.*Copyright \(C\) (\d{4}).*/\1/g;"`
+
+  # print a status message. we really only have to update
+  # the copyright year if the first and last year are
+  # different
+  echo "Processing $i: ${first_year} - ${last_year}"
+  if test ! "${first_year}" = "${last_year}" ; then
+    perl -pi -e "s/(Copyright \(C\) \d{4})( - \d{4})?(, \d{4}( - \d{4})?)*/\1 - ${last_year}/g;" $i
+  fi
+done
diff --git a/contrib/utilities/wrapcomments.py b/contrib/utilities/wrapcomments.py
new file mode 100755
index 0000000..8a0d93f
--- /dev/null
+++ b/contrib/utilities/wrapcomments.py
@@ -0,0 +1,555 @@
+#!/usr/bin/python
+
+# run this script on all headers of deal.II to fix comment line wrapping for
+# doxygen comments.
+# Example:
+# cd include
+# find . -name "*h" -print | while read file;do ../contrib/utilities/wrapcomments.py $file >temp;mv temp $file;done
+
+from __future__ import print_function
+import textwrap
+import sys, re
+wrapper = textwrap.TextWrapper()
+
+# take an array of lines and wrap them to 78 columns and let each line start
+# with @p startwith
+def wrap_block(lines, startwith):
+    longline = " ".join(lines)
+    wrapper.initial_indent = startwith
+    wrapper.subsequent_indent = startwith
+    wrapper.break_long_words = False
+    wrapper.width = 78
+    return wrapper.wrap(longline)
+
+# strips whitespace and leading "*" from each lines
+def remove_junk(lines):
+    out = []
+    for line in lines:
+        line = line.strip()
+        if line.startswith("*"):
+            line = line[1:].strip()
+        out.append(line)
+    return out
+
+# returns True if at least one entry in @p list is contained in @p str
+def one_in(list, str):
+    for li in list:
+        if li in str:
+            return True
+    return False
+
+# returns True if @p str starts with one of the entries in @p list
+def starts_with_one(list, str):
+    for li in list:
+        if str.startswith(li):
+            return True
+    return False
+
+# take a doxygen comment block "/** bla ... */" given as a list of lines and
+# format it in a pretty way rewrapping lines while taking care to keep certain
+# structure.
+def format_block(lines, infostr=""):
+    if len(lines)==1 and "/*" in lines[0] and "*/" in lines[0]:
+        return lines
+
+    if (not "/**" in lines[0]
+          or not "*/" in lines[-1]):
+        print("%s not a code block"%infostr, file=sys.stderr)
+        return lines
+
+    for line in lines[1:-1]:
+        assert(not "/*" in line)
+        assert(not "*/" in line)
+
+    lines[-1]=lines[-1].replace("**/","*/")
+
+    if not lines[0].strip().startswith("/**"):
+        print ("%s error, ignoring code block with junk in same line before"%infostr, file=sys.stderr)
+        return lines
+    if not lines[-1].strip().endswith("*/"):
+        print ("%s error, ignoring code block not ending at end of line"%infostr, file=sys.stderr)
+        return lines
+
+    if lines[0].strip()!="/**":
+        #print ("%s warning code block not starting in separate line"%infostr, file=sys.stderr)
+        idx = lines[0].find("/**")
+        temp = [lines[0][0:idx+3], lines[0][idx+3:]]
+        temp.extend(lines[1:])
+        lines = temp
+    if lines[-1].strip()!="*/":
+        #print ("%s warning code block not ending in separate line"%infostr, file=sys.stderr)
+        idx = lines[-1].find("*/")
+        temp = lines[0:-1]
+        temp.append(lines[-1][0:idx])
+        temp.append(lines[-1][idx:])
+        lines = temp
+
+    idx = lines[0].find("/**")
+    start = lines[0][:idx]+" * "
+    
+    out = [lines[0].rstrip()]
+    idx = 1
+    endidx = len(lines)-1
+    curlines = []
+
+    ops_startline = ["<li>", "@param", "@returns", "@warning", "@ingroup", "@author", "@date", "@related", "@relates", "@relatesalso", "@deprecated", "@image", "@return", "@brief", "@attention", "@copydoc", "@addtogroup", "@todo", "@tparam", "@see", "@note", "@skip", "@skipline", "@until", "@line", "@dontinclude", "@include"]
+
+    # subset of ops_startline that does not want stuff from the next line appended
+    # to this.
+    ops_also_end_paragraph = ["@image", "@skip", "@skipline", "@until", "@line", "@dontinclude", "@include"]
+
+    # stuff handled in the while loop down: @code, @verbatim, @f @ref
+
+    ops_separate_line = ["<ol>", "</ol>", "<ul>", "</ul>", "@{", "@}", "<br>"]
+
+
+    #todo:
+    #  @arg @c @cond  @em @endcond @f{ @internal @name @post @pre  @sa 
+
+    while idx<endidx:
+        if one_in(ops_separate_line, lines[idx]):
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            thisline = remove_junk([lines[idx]])[0]
+            for it in ops_separate_line:
+                if it in thisline and thisline!=it:
+                    print ("%s warning %s not in separate line"%(infostr, it), file=sys.stderr)
+            out.append(start + thisline)
+        elif re.match(r'\*\s+- ',lines[idx].strip()):
+            # bullet ('-') list
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            thisline = lines[idx].strip()[2:]
+            out.append(start + thisline)
+        elif lines[idx].strip().startswith("* ") and re.match(r'\s*\d+.',lines[idx][3:]):
+            # numbered list
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            thisline = lines[idx].strip()[2:]
+            out.append(start + thisline)
+            
+        elif "@page" in lines[idx]:
+            # do not break @page
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            thisline = remove_junk([lines[idx]])[0]
+            if not thisline.startswith("@page") and not thisline.startswith("(@page"):
+                print ("%s warning %s not at start of line"%(infostr, "@page"), file=sys.stderr)
+            out.append(start + thisline.strip())
+
+        elif "@ref" in lines[idx]:
+            # @ref link "some long description"
+            # is special, and we mustn't break it
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            thisline = remove_junk([lines[idx]])[0]
+            if not thisline.startswith("@ref") and not thisline.startswith("(@ref"):
+                print ("%s warning %s not at start of line"%(infostr, "@ref"), file=sys.stderr)
+
+            # format:
+            # @ref name "some text" blub
+            # or @ref name blurb
+            withquotes = thisline.split('"')
+            if len(withquotes)==3:
+                thisline = withquotes[0]+'"'+withquotes[1]+'"'
+                remain = withquotes[2]
+                for st in [')', '.', ',', ':']:
+                    if remain.startswith(st):
+                        thisline = thisline + st
+                        remain = remain[1:]
+
+                # do not wrap the @ref line:
+                out.append(start + thisline.strip())
+                if len(withquotes[0].strip().split(' '))!=2:
+                    print ("%s warning @ref line looks broken"%(infostr), file=sys.stderr)     
+            elif len(withquotes)==1:
+                words = thisline.strip().split(" ")
+                if len(words)<2 or len(words[0])==0 or len(words[1])==0:
+                    print ("%s warning @ref line looks broken"%(infostr), file=sys.stderr)     
+                thisline = words[0] + ' ' + words[1]
+                out.append(start + thisline.strip())
+                remain = " ".join(words[2:])
+            else:
+                print ("%s warning @ref quotes are not in single line"%(infostr), file=sys.stderr)
+                remain = ''
+
+            if len(remain)>0:
+                curlines.append(remain)
+            
+        elif one_in(ops_startline, lines[idx]):
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            thisline = remove_junk([lines[idx]])[0]
+            if not starts_with_one(ops_startline, thisline):
+                for it in ops_startline:
+                    if it in thisline:
+                        print ("%s warning %s not at start of line"%(infostr, it), file=sys.stderr)
+            if one_in(ops_also_end_paragraph, lines[idx]):
+                out.append(lines[idx].rstrip())
+            else:
+                curlines.append(lines[idx])
+        elif one_in(["@code", "@verbatim", "@f[", "@f{"], lines[idx]):
+            if "@f{" in lines[idx]:
+                if not lines[idx].endswith("}{"):
+                    print ("%s warning malformed @f{*}{"%(infostr), file=sys.stderr)
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            while True:
+                thisline = lines[idx].rstrip()
+                if thisline.strip()=="*":
+                    thisline = start
+                elif thisline.strip()=="@code" or thisline.strip()=="@endcode":
+                    thisline = start + thisline.strip()
+                elif thisline.strip()[0:2]!="* ":
+                    if thisline[0:len(start)].strip()=="":
+                        # just a missing *, so keep old indentation
+                        thisline = start + thisline[len(start):]
+                    else:
+                        # no way to recover indentation:
+                        print ("%s Error: wrong formatting inside @code block"%infostr, file=sys.stderr)
+                        thisline = start + thisline.strip()
+                else:
+                    thisline = start + thisline.strip()[2:]
+                out.append(thisline.rstrip())
+                if one_in(["@endcode", "@endverbatim", "@f]", "@f}"], lines[idx]):
+                    break
+                idx += 1
+        elif lines[idx].strip()=="*":
+            if curlines!=[]:
+                out.extend(wrap_block(remove_junk(curlines), start))
+                curlines=[]
+            out.append(start[:-1]) #skip whitespace at the end
+        else:
+            curlines.append(lines[idx])
+        idx += 1
+
+    if curlines!=[]:
+        out.extend(wrap_block(remove_junk(curlines), start))        
+
+    out.append(start[0:-2] + lines[-1].strip())
+
+    return out
+
+
+# test the routines:
+lineI = ["   * blub", \
+         "   * two three ", \
+         "   * four"]
+lineO = ["blub", \
+         "two three", \
+         "four"]
+assert(remove_junk(lineI)==lineO)
+
+lineI = ["blub", \
+         "two three", \
+         "four"]
+lineO = ["   * blub two three four"]
+assert(wrap_block(lineI,"   * ")==lineO)
+
+lineI = [" * 1 2 3 4 5 6 7 9 0 1 2 3 4 5 6 7 9 0 1 2 3 4 5 6 7 9 0 1 2 3 4 5 6 7 9 0 1 2",\
+         " * A 4 5 6 7 9 0 1 2 3 4 5 6 7 9 0"]
+assert(wrap_block(remove_junk(lineI)," * ")==lineI)
+
+lineI = [" * Structure which is passed to Triangulation::create_triangulation. It",\
+         " * contains all data needed to construct a cell, namely the indices of the",\
+         " * vertices and the material indicator."]
+assert(wrap_block(remove_junk(lineI)," * ")==lineI)
+
+lineI = ["  /**", \
+         "   * blub", \
+         "   * two three", \
+         "   * four", \
+         "   */"]
+lineO = ["  /**", \
+         "   * blub two three four", \
+         "   */"]
+assert(format_block(lineI)==lineO)
+
+lineI = ["  /**", \
+         "   * blub", \
+         "   * two three", \
+         "   * ", \
+         "   * four", \
+         "   */"]
+lineO = ["  /**", \
+         "   * blub two three", \
+         "   *", \
+         "   * four", \
+         "   */"]
+assert(format_block(lineI)==lineO)
+
+lineI = ["  /**", \
+         "   * blub", \
+         "   * @code", \
+         "   *   two three", \
+         "   * @endcode", \
+         "   * four", \
+         "   */"]
+assert(format_block(lineI)==lineI)
+
+lineI = ["  /**", \
+         "   * blub", \
+         "   * @code ", \
+         "   *   two three", \
+         "   *   two three  ", \
+         "   * ", \
+         "   *   two three", \
+         "   * @endcode ", \
+         "   * four", \
+         "   */"]
+lineO = ["  /**", \
+         "   * blub", \
+         "   * @code", \
+         "   *   two three", \
+         "   *   two three", \
+         "   *", \
+         "   *   two three", \
+         "   * @endcode", \
+         "   * four", \
+         "   */"]
+assert(format_block(lineI)==lineO)
+ 
+lineI = ["  /**", \
+         "   * blub", \
+         "       @code ", \
+         "       two three", \
+         "      @endcode ", \
+         "   * four", \
+         "   */"]
+lineO = ["  /**", \
+         "   * blub", \
+         "   * @code", \
+         "   *   two three", \
+         "   * @endcode", \
+         "   * four", \
+         "   */"]
+assert(format_block(lineI)==lineO)
+
+
+lineI = ["  /**", \
+         "   * blub", \
+         "   * @code ", \
+         "    *   two three", \
+         "   *   two three  ", \
+         " * ", \
+         "       two three", \
+         "    ", \
+         "   * @endcode ", \
+         "   * four", \
+         "   */"]
+lineO = ["  /**", \
+         "   * blub", \
+         "   * @code", \
+         "   *   two three", \
+         "   *   two three", \
+         "   *", \
+         "   *   two three", \
+         "   *", \
+         "   * @endcode", \
+         "   * four", \
+         "   */"]
+assert(format_block(lineI)==lineO)
+
+
+
+lineI = ["  /**", \
+         "   * blub", \
+         "   * <ul>", \
+         "   * <li> bla", \
+         "   * <li> blub", \
+         "   * </ul>", \
+         "   */"]
+assert(format_block(lineI)==lineI)
+ 
+lineI = ["    /** @addtogroup Exceptions", \
+         "     * @{ */"]
+lineO = ["    /**", \
+         "     * @addtogroup Exceptions", \
+         "     * @{", \
+         "     */"]
+assert(format_block(lineI)==lineO)
+
+lineI = [" /** ", \
+         "  *   bla", \
+         "  * @image testing.png", \
+         "  *  blub", \
+         "  */"]
+lineO = [" /**", \
+         "  * bla", \
+         "  * @image testing.png", \
+         "  * blub", \
+         "  */"]
+assert(format_block(lineI)==lineO)
+
+lineI = [" /** ", \
+         "  * @ref a b c d e", \
+         "  * @ref a \"b c\" d dd", \
+         "  */"]
+lineO = [" /**", \
+         "  * @ref a", \
+         "  * b c d e", \
+         "  * @ref a \"b c\"", \
+         "  * d dd", \
+         "  */"]
+assert(format_block(lineI)==lineO)
+
+long = "long "*20
+lineI = [" /** ", \
+         "  * @ref a \""+long+"\" d", \
+         "  */"]
+lineO = [" /**", \
+         "  * @ref a \""+long+"\"", \
+         "  * d", \
+         "  */"]
+assert(format_block(lineI)==lineO)
+
+lineI = [" /** ", \
+         "  * @ref a. c", \
+         "  * @ref a \"b c\". c2", \
+         "  */"]
+lineO = [" /**", \
+         "  * @ref a.", \
+         "  * c", \
+         "  * @ref a \"b c\".", \
+         "  * c2", \
+         "  */"]
+assert(format_block(lineI)==lineO)
+
+# do not break @page:
+longtext = "bla bla"*20
+lineI = [" /**", \
+         "  * @page " + longtext, \
+         "  * hello", \
+         "  */"]
+assert(format_block(lineI)==lineI)
+
+# do not break $very_long_formula_without_spacing$:
+longtext = "blabla"*20
+lineI = [" /**", \
+         "  * a $" + longtext + "$", \
+         "  */"]
+lineO = [" /**", \
+         "  * a", \
+         "  * $" + longtext + "$", \
+         "  */"]
+assert(format_block(lineI)==lineO)
+
+# nested lists
+lineI = [" /**", \
+         "  * Hello:", \
+         "  * - A", \
+         "  * - B", \
+         "  *   - C", \
+         "  *   - D", \
+         "  * - E", \
+         "  *         - very indented", \
+         "  */"]
+lineO = lineI
+assert(format_block(lineI)==lineO)
+
+# @f{}
+lineI = [" /**", \
+         "  * Hello:", \
+         "  * @f{aligned*}{", \
+         "  *   A\\\\", \
+         "  *   B", \
+         "  * @f}", \
+         "  * bla", \
+         "  */"]
+lineO = lineI
+assert(format_block(lineI)==lineO)
+
+# @until 
+lineI = [" /**", \
+         "  * Hello:", \
+         "  * @include a", \
+         "  * bla", \
+         "  * @dontinclude a", \
+         "  * bla", \
+         "  * @line a", \
+         "  * bla", \
+         "  * @skip a", \
+         "  * bla", \
+         "  * @until a", \
+         "  * bla", \
+         "  */"]
+lineO = lineI
+assert(format_block(lineI)==lineO)
+
+# lists
+lineI = [" /**", \
+         "  * Hello:", \
+         "  *  - a", \
+         "  *  - b", \
+         "  * the end.", \
+         "  */"]
+lineO = lineI
+assert(format_block(lineI)==lineO)
+
+# numbered lists
+lineI = [" /**", \
+         "  * Hello:", \
+         "  * 1. a", \
+         "  * 2. b", \
+         "  * the end.", \
+         "  */"]
+lineO = lineI
+assert(format_block(lineI)==lineO)
+
+
+
+#print (lineI)
+#print (format_block(lineI))
+
+
+
+
+# now open the file and do the work
+
+
+args=sys.argv
+args.pop(0)
+
+if len(args)!=1:
+    print("Usage: wrapcomments.py infile >outfile")
+    exit(0)
+
+f = open(args[0])
+lines = f.readlines()
+f.close()
+
+out = []
+cur = []
+inblock = False
+lineidx = 0
+for line in lines:
+    lineidx += 1
+    line = line.replace("\n","")
+    if not inblock and "/**" in line:
+        inblock = True
+        cur = []
+    if inblock:
+        cur.append(line)
+    else:
+        out.append(line)
+    if inblock and "*/" in line:
+        out.extend(format_block(cur, args[0]+":%d"%lineidx))
+        cur = []
+        inblock = False
+
+assert(cur==[])
+
+for line in out:
+    print (line)
+
+
+
+
+
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
new file mode 100644
index 0000000..36fc773
--- /dev/null
+++ b/doc/CMakeLists.txt
@@ -0,0 +1,73 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II Authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Set up all necessary bits for the documentation
+#
+IF(DEAL_II_COMPONENT_DOCUMENTATION)
+
+  MESSAGE(STATUS "")
+  MESSAGE(STATUS "Setting up documentation")
+
+  ADD_SUBDIRECTORY(doxygen)
+
+  #
+  # Install the static elements of the html documentation:
+  #
+  INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/
+    DESTINATION ${DEAL_II_DOCHTML_RELDIR}
+    COMPONENT documentation
+    PATTERN "CMakeLists.txt" EXCLUDE
+    PATTERN "doxygen" EXCLUDE
+    PATTERN "*.in" EXCLUDE
+    )
+
+  CONFIGURE_FILE(
+    ${CMAKE_CURRENT_SOURCE_DIR}/title.html.in
+    ${CMAKE_CURRENT_BINARY_DIR}/title.html
+    )
+  INSTALL(FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/title.html
+    DESTINATION ${DEAL_II_DOCHTML_RELDIR}
+    COMPONENT documentation
+    )
+
+  MESSAGE(STATUS "Setting up documentation - Done")
+  MESSAGE(STATUS "")
+
+ENDIF(DEAL_II_COMPONENT_DOCUMENTATION)
+
+#
+# Always install a minimalistic README and LICENSE file:
+#
+
+INSTALL(FILES
+  ${CMAKE_SOURCE_DIR}/README.md
+  ${CMAKE_SOURCE_DIR}/LICENSE
+  DESTINATION ${DEAL_II_DOCREADME_RELDIR}
+  COMPONENT library
+  )
+
+#
+# Add a dummy target to make documentation files known to IDEs.
+#
+
+FILE(GLOB _misc
+  ${CMAKE_CURRENT_SOURCE_DIR}/doxygen/headers/*.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/news/changes.h
+  )
+
+ADD_LIBRARY(doxygen_headers OBJECT ${_misc})
+SET_TARGET_PROPERTIES(doxygen_headers PROPERTIES LINKER_LANGUAGE C)
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 0000000..33f0385
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,10 @@
+This folder contains the html documentation of deal.II
+======================================================
+
+The documentation is generally covered by the same license as the deal.II
+library itself, namely LGPL-2.1+.
+
+Exceptions:
+
+  `external-libraries/p4est-setup.sh` - coming from the p4est project, GPL-2+
+
diff --git a/doc/deal.ico b/doc/deal.ico
new file mode 100644
index 0000000..b7cfd41
Binary files /dev/null and b/doc/deal.ico differ
diff --git a/doc/developers/Toolchain-x86_64-w64-mingw32.sample b/doc/developers/Toolchain-x86_64-w64-mingw32.sample
new file mode 100644
index 0000000..a26bf5a
--- /dev/null
+++ b/doc/developers/Toolchain-x86_64-w64-mingw32.sample
@@ -0,0 +1,16 @@
+#
+# Example Toolchain file for a MinGW32 cross compiler for Windows64
+#
+
+SET(CMAKE_SYSTEM_NAME Windows)
+SET(CMAKE_SYSTEM_PROCESSOR "x86_64")
+
+SET(CMAKE_RC_COMPILER "x86_64-w64-mingw32-windres")
+SET(CMAKE_CXX_COMPILER "x86_64-w64-mingw32-g++")
+SET(CMAKE_C_COMPILER "x86_64-w64-mingw32-gcc")
+SET(CMAKE_Fortran_COMPILER "x86_64-w64-mingw32-gfortran")
+
+SET(CMAKE_FIND_ROOT_PATH "/usr/x86_64-w64-mingw32/")
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/doc/developers/Toolchain.sample b/doc/developers/Toolchain.sample
new file mode 100644
index 0000000..d78bd7c
--- /dev/null
+++ b/doc/developers/Toolchain.sample
@@ -0,0 +1,14 @@
+#
+# Example Toolchain file
+#
+
+SET(CMAKE_SYSTEM_NAME Linux)
+SET(CMAKE_SYSTEM_PROCESSOR "x86_64")
+
+SET(CMAKE_C_COMPILER "/usr/bin/gcc")
+SET(CMAKE_CXX_COMPILER "/usr/bin/g++")
+
+SET(CMAKE_FIND_ROOT_PATH "/")
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/doc/developers/cmake-internals.html b/doc/developers/cmake-internals.html
new file mode 100644
index 0000000..4c29722
--- /dev/null
+++ b/doc/developers/cmake-internals.html
@@ -0,0 +1,714 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>Build system internals</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 2013, 2015 by the deal.II Authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+
+<body>
+<h1><acronym>Build system internals</h1>
+
+<p>
+  This page provides details about the CMake build system. Files
+  processed by the top level <code>CMakeLists.txt</code> script are
+  listed in the TOC in chronological order.
+</p>
+
+<div class="toc">
+  <ol>
+    <li><a href="#codingstyle">Coding convention</a></li>
+    <li>Configuration
+      <ol>
+	<li><a href="#setup"><code>./CMakeLists.txt</code> and
+	    <code>./cmake/setup_*.cmake</code></a></li>
+	<li><a href="#checks"><code>./cmake/checks/check_*.cmake</code></a></li>
+        <li><a href="#findmodules"><code>./cmake/modules/Find*.cmake</code></a></li>
+	<li><a href="#configure"><code>./cmake/configure/configure_*.cmake</code></a></li>
+	<li><a href="#variables">Global variables controlling the build process</a></li>
+      </ol>
+    </li>
+    <li>Target definition and installation
+      <ol>
+	<li><a href="#config.h.in"><code>./include/deal.II/base/config.h.in</code></a></li>
+        <li><a href="#source"><code>./source/CMakeLists.txt</code></a></li>
+        <li><a href="#projectconfig"><code>./cmake/config/CMakeLists.txt</code></a></li>
+      </ol>
+    </li>
+  </ol>
+</div>
+
+<a name="codingstyle"></a>
+<h2> Coding convention </h2>
+<p>
+  Coding conventions are always a matter of choice. Nevertheless, the
+  following rules should be considered:
+  <ul>
+    <li>
+      Statements and keywords are written in all caps.
+    <li>
+      Indenting is done by two spaces; the usual indenting rules apply.
+    <li>
+      The <code>ELSE()</code>, <code>ENDIF()</code>,
+      <code>ENDFOREACH()</code>, etc. statements shall not repeat the
+      corresponding condition in <code>IF()</code>,
+      <code>FOREACH()</code>, etc.
+    <li>
+      To emphasize a comment it may be enclosed by a leading and
+      trailing empty comment line.
+  </ul>
+  An example:
+<pre class="cmake">
+FOREACH(_build ${DEAL_II_BUILD_TYPES})
+  #
+  # Set an appropriate keyword depending on target and build type:
+  #
+  IF(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "DebugRelease")
+    SET(_keyword "general")
+  ELSE()
+    IF(_build MATCHES DEBUG)
+      SET(_keyword "debug")
+    ELSE()
+      SET(_keyword "optimized")
+    ENDIF()
+  ENDIF()
+ENDFOREACH()
+</pre>
+
+<ul>
+  <li>
+    Line break is at 78 characters and should be obeyed whenever
+    reasonable.
+  <li>
+    Long statements should be broken into several lines at reasonable
+    places. Additional lines for a statement are indented by 2
+    spaces.
+  <li>
+    Multiline statements must end with the closing bracket at a
+    single line:
+</ul>
+<pre class="cmake">
+LIST(APPEND CONFIG_LIBRARIES
+  ${_keyword}
+  ${CONFIG_LIBRARIES_${_build}}
+  )
+
+SET_TARGET_PROPERTIES(${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX}
+  PROPERTIES
+  VERSION ${VERSION}
+  SOVERSION ${VERSION}
+  LINK_FLAGS "${DEAL_II_LINKER_FLAGS_${build}}"
+  COMPILE_DEFINITIONS "${DEAL_II_DEFINITIONS};${DEAL_II_DEFINITIONS_${build}}"
+  COMPILE_FLAGS "${DEAL_II_CXX_FLAGS_${build}}"
+  )
+</pre>
+
+<p>
+  CMake operates almost always with variables in global state. To
+  guard against accidental overwrite of variables the following naming
+  conventions must be followed at all times:
+</p>
+
+<ul>
+  <li>
+    Global (configuration) variables are written in all caps. When
+    introducing a new one, ensure that the name isn't already used
+    somewhere else. Unrelated global variables must never be
+    overwritten.
+  <li>
+    Global variables can be prefixed by <code>DEAL_II_</code>.
+    (Global variables defined by CMake are usually prefixed by
+    <code>CMAKE_</code>.)
+  <li>
+    Local variables should always be named in all lowercase with a
+    leading "_". Local variables cannot be assumed to remain valid.
+    The may get overwritten at any time.
+</ul>
+</p>
+
+<h2>Configuration</h2>
+
+<a name="setup"></a>
+<h3> <code>./CMakeLists.txt</code> and <code>./cmake/setup_*.cmake</code> </h3>
+
+<p>
+  The very first configuration steps after some initial setup in
+  <code>./CMakeLists.txt</code> takes place in some
+  <code>./cmake/setup_*.cmake</code> files:
+  <ul>
+    <li> <code>setup_cached_variables.cmake</code>:
+      This sets up all cached variables prior to the call to
+      <code>PROJECT(deal.II)</code>. For details see the comment at the
+      top. Furthermore, some bookkeeping for compiler and linker flags
+      takes place, see <a href="../users/cmake.html#configurebuild">the section
+        about compile flags</a>.
+    <li> <code>setup_deal_ii.cmake</code>:
+      This file is included immediately after the call to
+      <code>PROJECT(deal.II)</code> and will set up all <i>magic
+        numbers</i> such as names, definitions, relative and absolute
+      paths used in the build system. Most of the definitions are
+      guarded with the help of the <code>SET_IF_EMPTY</code> macro so
+      that it is possible to override the values from the command line.
+    <li> <code>setup_compiler_flags.cmake</code>
+      sets up a suitable set of default compile flag for a known
+      compiler by including the appropriate
+      <code>setup_compiler_flags_*.cmake</code> file. When adding new
+      flags or compiler support, please respect the following note
+
+<pre>
+#
+# (./cmake/setup_compiler_flags.cmake)
+#
+# ####################
+# #     FAT NOTE:    #
+# ####################
+#
+# All configuration in setup_compiler_flags.cmake and
+# setup_compiler_flags_<compiler>.cmake shall ONLY modify:
+#
+#   DEAL_II_CXX_FLAGS
+#   DEAL_II_CXX_FLAGS_DEBUG
+#   DEAL_II_CXX_FLAGS_RELEASE
+#   DEAL_II_LINKER_FLAGS
+#   DEAL_II_LINKER_FLAGS_DEBUG
+#   DEAL_II_LINKER_FLAGS_RELEASE
+#
+# All modifications shall be guarded with the ENABLE_IF_SUPPORTED
+# or ENABLE_IF_LINKS macro, e.g.
+#
+#   ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-fpic")
+#   ENABLE_IF_LINKS(DEAL_II_LINKER_FLAGS "-Wl,--as-needed")
+#
+# Compiler flags for platform dependent optimization (such as
+# -march=native) must always be guarded with
+# DEAL_II_ALLOW_PLATFORM_INTROSPECTION:
+#
+#   IF(DEAL_II_ALLOW_PLATFORM_INTROSPECTION)
+#     ENABLE_IF_SUPPORTED(DEAL_II_CXX_FLAGS "-march=native")
+#   ENDIF()
+#
+# Checks for compiler features (such as C++11 support) and compiler
+# specific bugs that
+#   - usually set up further configuration (such as preprocessor
+#     definitions)
+#   - disable a specific flag for a specific compiler version.
+#
+# belong the corresponding file:
+#
+#   ./cmake/checks/check_01_compiler_features.cmake
+#   ./cmake/checks/check_01_cpu_features.cmake
+#   ./cmake/checks/check_01_cxx_features.cmake
+#   ./cmake/checks/check_01_system_features.cmake
+#   ./cmake/checks/check_02_compiler_bugs.cmake
+#
+</pre>
+  </ul>
+</p>
+
+
+<a name="checks"></a>
+<h3> <code>./cmake/checks/check_*.cmake</code> </h3>
+
+<p>
+  The next step in the configuration process is to include all
+  checks residing under <code>./cmake/checks</code>. Currently
+  there are:
+
+<pre>
+./cmake/checks/check_01_compiler_features.cmake
+  - Search for support for compiler dependent features such as stack
+    trace support, demangler support, etc.
+
+./cmake/checks/check_01_cpu_features.cmake
+  - Platform introspection for CPU features goes here and must be
+    guarded with DEAL_II_ALLOW_PLATFORM_INTROSPECTION
+
+./cmake/checks/check_01_cxx_features.cmake
+  - Check for supported C++ language features such as sufficient C++11
+    support
+
+./cmake/checks/check_01_system_features.cmake
+  - Checks for specific platform (Linux/Darwin/CYGWIN/Windows..)
+    features and support
+
+./cmake/checks/check_02_compiler_bugs.cmake
+  - Check for compiler bugs
+</pre>
+
+<ul>
+  <li>
+    A check usually consists of a call to one of the macros below
+    that will set up a global variable. Please stick to the naming
+    convention <code>HAVE_<..></code>, resp.
+    <code>DEAL_II_(HAVE|USE)_<..></code>. <b>It is forbidden to
+      use a variable name starting with
+      <code>DEAL_II_WITH_<..></code> because this prefix is
+      exclusively reserved for the feature mechanism described
+      below.</b> For some tests it might be necessary to manipulate
+    <a href="#variables">global variables</a>.
+  <li>
+    A platform check should have a prominent comment explaining what
+    it does and why it is there, and should state author and year.
+  <li>
+    There are a number of readily available platform check macros:
+
+<pre>
+CHECK_CXX_SOURCE_COMPILES(source variable)
+  - Checks whether it is possible to compile _and_ link the code snippet
+    <source>. If successful, variable is set to 1.
+
+CHECK_CXX_SOURCE_RUNS(source variable)
+  - variable is set to 1 if <source> could be successfully compiled and
+    linked and the resulting program ran and exited without error.
+    Avoid this macro outside of a DEAL_II_ALLOW_PLATFORM_INTROSPECTION
+    guard. A sensible fallback should be provided if the check cannot
+    be run (e.g. when cross compiling).
+
+CHECK_CXX_COMPILER_BUG(source variable)
+  - Inverts the logic of CHECK_CXX_SOURCE_COMPILES(), i.e. variable is
+    set to 1 if it was not possible to compile and link <source>.
+
+CHECK_INCLUDE_FILE_CXX(header variable)
+  - Check whether it is possible to compile and link a dummy program
+    including <header>.
+
+CHECK_FUNCTION_EXISTS(function variable)
+  - Check for the existence of a function prototype with name
+    <function>. (Don't forget to specify the link libraries, see
+    below.) Use CHECK_CXX_SYMBOL_EXISTS to search for C++ function
+    definitions instead, if possible.
+
+CHECK_CXX_SYMBOL_EXISTS(symbol header_file variable)
+  - Check for the existence of a symbol definition in the header_file
+    as well as for the presence in the current link interface
+    (Don't forget to specify the link libraries, see below.)
+
+CHECK_CXX_COMPILER_FLAG(flag variable)
+  - Sets the variable to 1 if the compiler understands the flag.
+</pre>
+
+  <li> Necessary compiler flags can easily set in the string variable
+    <code>CMAKE_REQUIRED_FLAGS</code>. There is a small macro that does this
+    job nicely:
+
+<pre class="cmake">
+PUSH_CMAKE_REQUIRED("-Werror")
+CHECK_CXX_SOURCE_COMPILES(...)
+RESET_CMAKE_REQUIRED()
+</pre>
+
+  <li> Necessary include directories and libraries necessary for
+    linkage can be set in the list variables
+    <code>CMAKE_REQUIRED_INCLUDES</code> and
+    <code>CMAKE_REQUIRED_LIBRARIES</code>. It is best to append these
+    lists and later on reset <code>CMAKE_REQUIRED_*</code> (including
+    <code>CMAKE_REQUIRED_FLAGS</code>) to their default values:
+
+<pre class="cmake">
+LIST(APPEND CMAKE_REQUIRED_INCLUDES <a list of includes>)
+LIST(APPEND CMAKE_REQUIRED_LIBRARIES <a list of libraries>)
+CHECK_CXX_SOURCE_COMPILES(...)
+RESET_CMAKE_REQUIRED()
+</pre>
+</ul>
+</p>
+
+<a name="findmodules"></a>
+<h3> <code>./cmake/modules/Find*.cmake</code> </h3>
+
+<p>
+  These are find modules for the <code>configure_*.cmake</code> files
+  and the <code>CONFIGURE_FEATURE</code> macro as will explained later.
+  It is crucial that a find module behaves correctly. Therefore, the
+  following rules are mandatory:
+  <ul>
+    <li>
+      The <i>sole</i> purpose of a find module is to find an external
+      library (no deal.II specific dependency checking, no
+      compatibility checking).
+    <li>
+      It should do so by appropriate <code>DEAL_II_FIND_LIBRARY</code>,
+      <code>DEAL_II_FIND_PATH</code> and <code>DEAL_II_FIND_FILE</code>
+      calls (same syntax as the native CMake functions; just a small
+      wrapper to provide some useful output). The results of this calls
+      should be the only cached variables.
+    <li>
+      A <code>WARNING</code>, <code>SEND_ERROR</code> or
+      <code>FATAL_ERROR</code> must be avoided (the only exception is
+      the <code>REQUIRED</code> keyword).
+    <li>
+      The following uncached variables are recognized by the feature
+      configuration mechanism:
+<pre class="cmake">
+FEATURE_FOUND
+FEATURE_LIBRARIES (with optimized, debug, release keywords)
+FEATURE_LIBRARIES(_DEBUG|_RELEASE)
+FEATURE_INCLUDE_DIRS FEATURE_USER_INCLUDE_DIRS
+FEATURE_LINKER_FLAGS(|_DEBUG|_RELEASE)
+FEATURE_CXX_FLAGS(|_DEBUG|_RELEASE)
+FEATURE_DEFINITIONS(|_DEBUG|_RELEASE)
+FEATURE_VERSION
+FEATURE_VERSION(_MAJOR|_MINOR|_SUBMINOR)
+</pre>
+      The <code>DEAL_II_PACKAGE_HANDLE</code> macro should be exclusively
+      used for setting up these variables (except the version variants). An
+      example invocation is
+<pre class="cmake">
+DEAL_II_PACKAGE_HANDLE(UMFPACK
+  LIBRARIES
+    REQUIRED UMFPACK_LIBRARY
+    OPTIONAL CHOLMOD_LIBRARY CCOLAMD_LIBRARY COLAMD_LIBRARY CAMD_LIBRARY ${_suitesparse_config}
+    REQUIRED AMD_LIBRARY
+    OPTIONAL METIS_LIBRARIES LAPACK_LIBRARIES rt_LIBRARY
+  INCLUDE_DIRS
+    REQUIRED UMFPACK_INCLUDE_DIR AMD_INCLUDE_DIR
+    OPTIONAL SuiteSparse_config_INCLUDE_DIR
+  LINKER_FLAGS
+    OPTIONAL LAPACK_LINKER_FLAGS
+  CLEAR
+    UMFPACK_LIBRARY CHOLMOD_LIBRARY CCOLAMD_LIBRARY COLAMD_LIBRARY
+    CAMD_LIBRARY SuiteSparse_config_LIBRARY AMD_LIBRARY UMFPACK_INCLUDE_DIR
+    AMD_INCLUDE_DIR SuiteSparse_config_INCLUDE_DIR
+  )
+</pre>
+      The macro concatenates all specified variables into the final
+      <code>FEATURE_SUFFIX</code> variable. Hereby, a feature is
+      successfully found if all <code>REQUIRED</code> variables are
+      non-empty an not set to <code>"-NOTFOUND"</code>.
+      <code>OPTIONAL</code> variables are just filtered out in this case.
+      As a last set of parameters the full list of cached search result
+      variables must be specified after the <code>CLEAR</code> keyword -
+      this is used to provide a possibility to undo a feature search.
+    <li>
+      Only "local" variables "<code>_<all lowercase></code>" or
+      "global" variables prefixed by <code>FEATURE_</code> may be
+      altered. Do not set <code>DEAL_II_*</code> or <code>CMAKE_*</code>
+      variables directly!
+    <li>
+      A hint with <code>FEATURE_DIR</code> can be set up for
+      convenience. It is best to start the <code>Find</code> module by
+<pre class="cmake">
+SET(FEATURE_DIR "" CACHE PATH "short description")
+SET_IF_EMPTY(FEATURE_DIR "$ENV{FEATURE_DIR}")
+</pre>
+      and use <code>FEATURE_DIR</code> as a hint.
+</ul>
+
+
+<a name="configure"></a>
+<h3> <code>./cmake/configure/configure_*.cmake</code> </h3>
+
+<p>
+  The final step in the configuration phase is the setup of features
+  (which refer to external or bundled libraries
+  <acronym>deal.II</acronym> can optionally interface with.)
+</p>
+
+<p>
+  At bare minimum <code>configure_<feature>.cmake</code>
+  file for a feature just consists of a call to the
+  <code>CONFIGURE_FEATURE(<FEATURE>)</code> macro which is
+  implemented in
+  <code>./cmake/macros/macro_configure_feature.cmake</code>.
+  In this case the corresponding <code>Find<FEATURE>.cmake</code>
+  module is used to determine whether an external dependency can be
+  resolved or not. Depending on the current state of
+  <code>DEAL_II_WITH_<FEATURE></code> (see
+  <a href="../users/cmake.html#configurefeature">here</a>) the
+  configuration variables
+<pre>
+FEATURE_LIBRARIES
+FEATURE_LIBRARIES(|_DEBUG|_RELEASE)
+FEATURE_(|USER_|BUNDLED_)INCLUDE_DIRS
+FEATURE_LINKER_FLAGS(|_DEBUG|_RELEASE)
+FEATURE_CXX_FLAGS(|_DEBUG|_RELEASE)
+FEATURE_DEFINITIONS(|_DEBUG|_RELEASE)
+</pre>
+  are appended to the set of <a href="#variables">global variables</a>
+  and <code>DEAL_II_WITH_<FEATURE></code> is set to
+  <code>TRUE</code>.
+</p>
+
+<p>
+  It is possible to override this default behaviour with the following
+  variables and macros (all of them are optional and will be replaced
+  by an appropriate default action if unset):
+  <ul>
+    <li>
+     <code><FEATURE></code> means all caps,
+     <code><feature></code> means all lowercase
+
+    <li>
+      In <code>./cmake/configure/configure_<feature>.cmake</code>:
+<pre>
+FEATURE_<FEATURE>_DEPENDS              (a variable)
+  - a variable which contains an optional list of other features
+    this feature depends on (and which have to be enabled for this feature
+    to work.)
+    Features must be given with short name, i.e. without DEAL_II_WITH_
+
+FEATURE_<FEATURE>_AFTER                (a variable)
+  - a variable which contains an optional list of other features
+    that have to be configured prior to this feature
+    Features must be given with short name, i.e. without DEAL_II_WITH_
+
+FEATURE_<FEATURE>_FIND_EXTERNAL(var)   (a macro)
+  - which should set var to TRUE if all dependencies for the feature are
+    fulfilled. In this case all necessary variables for
+    FEATURE_<FEATURE>_CONFIGURE_EXTERNAL must be set.
+    Otherwise var should remain unset.
+    If this macro is undefined, FIND_PACKAGE(<FEATURES>) is
+    called directly instead.
+
+FEATURE_<FEATURE>_CONFIGURE_EXTERNAL()  (a macro)
+  - which should setup all necessary configuration for the feature with
+    external dependencies. If something goes wrong this macro must
+    issue a FATAL_ERROR.
+    If this macro is undefined, the information provided in
+    <FEATURES>_LIBRARIES, <FEATURES>_INCLUDE_DIRS and
+    <FEATURES>_LINKER_FLAGS is used for the build.
+
+FEATURE_<FEATURE>_CONFIGURE_BUNDLED()  (a macro)
+  - which should setup all necessary configuration for the feature with
+    bundled source dependencies. If something goes wrong this macro must
+    issue a FATAL_ERROR.
+
+FEATURE_<FEATURE>_ERROR_MESSAGE()      (macro)
+  - which should print a meaningful error message (with FATAL_ERROR) for
+    the case that no external library was found (and bundled is not
+    allowed to be used.) If not defined, a suitable default error message
+    will be printed.
+</pre>
+
+    <li>
+      In <code>./bundled/configure_bundled.cmake</code>:
+<pre>
+FEATURE_<FEATURE>_HAVE_BUNDLED         (a boolean)
+  - which should either be set to TRUE if all necessary libraries of the
+    features comes bundled with deal.II and hence can be supported
+    without external dependencies, or unset.
+
+DEAL_II_FORCE_BUNDLED_<FEATURE>        (an option)
+  - If <feature> can be set up by bundled libraries, this
+    configuration option must be present to force a use of bundled
+    dependencies
+</pre>
+
+    <li>
+      Furthermore, if
+      <code>FEATURE_<FEATURE>_BUNDLED_CONFIGURED</code> is set to
+      <code>TRUE</code> the file <code>./bundled/CMakeLists.txt</code>
+      must compile and install the bundled package appropriately.
+  </ul>
+</p>
+
+
+
+<a name="variables"></a>
+<h3> Global variables controlling the build process </h3>
+
+<p>
+  The following list describes all global variables controlling the
+  build process and the visibility associated with it (internal use for
+  compiling deal.Ii, externally used variables will get exported in
+  deal.IIConfig.cmake). Lists should be manipulated with
+  <code>LIST(APPEND ...)</code>, flags with <code>ADD_FLAGS(...)</code>
+  (or if it is necessary to guard them with
+  <code>ENABLE_IF_SUPPORTED(...)</code>.)
+</p>
+<p>
+  <b>Feature configuration must not be added directly to this variables but
+    to corresponding <code><FEATURE>_*</code> variables, instead.
+  Feature configuration variables get appended to the below list of global
+  configuration variables automatically.</b>
+</p>
+
+
+  <ul>
+    <li>
+      The general (internal) logic for variables applies:
+      <ul>
+        <li>A variable name without <code>_DEBUG</code> or
+          <code>_RELEASE</code>: Used for all targets
+        <li> <code><...>_DEBUG</code>: <i>additionally</i> used for debug targets
+        <li> <code><...>_RELEASE</code>: <i>additionally</i> used for release targets
+      </ul>
+
+    <li>
+      For internal and external use, used to keep track of external
+      libraries, the <acronym>deal.II</acronym> library and user
+      programs have to be linked against:
+      <ul>
+        <li> <code>DEAL_II_LIBRARIES</code>
+        <li> <code>DEAL_II_LIBRARIES_DEBUG</code>
+        <li> <code>DEAL_II_LIBRARIES_RELEASE</code>
+      </ul>
+
+    <li>
+      For internal use, for setting necessary include dirs for the compilation of the
+      <acronym>deal.II</acronym> library:
+      <ul>
+        <li> <code>DEAL_II_INCLUDE_DIRS</code>
+      </ul>
+    <li>
+      Used to keep track of external include dirs, necessary for the
+      compilation of user programs:
+      <ul>
+        <li> <code>DEAL_II_USER_INCLUDE_DIRS</code>
+      </ul>
+    <li>
+      Include dirs from bundled packages necessary for the compilation of
+      the library and user projects out of the build directory:
+      <ul>
+        <li> <code>DEAL_II_BUNDLED_INCLUDE_DIRS</code>
+      </ul>
+
+    <li>
+      For internal use, for setting necessary preprocessor definitions
+      (<code>-D<...></code>) for the compilation of the
+      deal.II library:
+      <ul>
+        <li> <code>DEAL_II_DEFINITIONS</code>
+        <li> <code>DEAL_II_DEFINITIONS_DEBUG</code>
+        <li> <code>DEAL_II_DEFINITIONS_RELEASE</code>
+      </ul>
+    <li>
+      For external use, used to keep track of external preprocessor
+      definitions, necessary for the compilation of user programs:
+      <ul>
+        <li> <code>DEAL_II_USER_DEFINITIONS</code>
+        <li> <code>DEAL_II_USER_DEFINITIONS_DEBUG</code>
+        <li> <code>DEAL_II_USER_DEFINITIONS_RELEASE</code>
+      </ul>
+
+    <li>
+      For internal and external use, for setting necessary compiler flags,
+      e.g. <code>-std=c++11</code> (if available):
+      <ul>
+        <li> <code>DEAL_II_CXX_FLAGS</code>
+        <li> <code>DEAL_II_CXX_FLAGS_DEBUG</code>
+        <li> <code>DEAL_II_CXX_FLAGS_RELEASE</code>
+      </ul>
+
+    <li>
+      For internal and external use, for setting necessary linker flags for
+      the deal.II library and user programs:
+      <ul>
+        <li> <code>DEAL_II_LINKER_FLAGS</code>
+        <li> <code>DEAL_II_LINKER_FLAGS_DEBUG</code>
+        <li> <code>DEAL_II_LINKER_FLAGS_RELEASE</code>
+      </ul>
+
+  </ul>
+</p>
+
+
+<h2>Target definition and installation</h2>
+<a name="config.h.in"></a>
+<h3> <code>./include/deal.II/base/config.h.in</code> </h3>
+
+In contrast to autoconf there is no intermediate step any more that
+automatically generates config.h.in. The setup in this file has to be
+done by hand. Please note:
+  <ul>
+    <li> <code>config.h.in</code> should only contain a minimum of
+    necessary compile definitions to avoid unnecessary recompilation if
+    configuration changes.
+    <li> Definition toggles in <code>config.h.in</code> should have a
+      prominent comment explaining it and should be grouped by file
+      exporting the definition.
+  </ul>
+
+
+<a name="source"></a>
+<h3><code>./source/CMakeLists.txt</code></h3>
+
+<p>
+  All parts of the library are organized into logical object libraries
+  with their respective sources lying under
+  <code>./source/<foo></code>, or
+  <code>./bundled/<foo>/<...></code>. The actual setup of
+  an object library happens within that subdirectories with the help of
+  two macros:
+<pre class="cmake">
+#
+# Glob for all header files associated with the object target:
+# As this list is only for cosmetic reasons, so that associated header
+# files show up in IDEs, we don't manage an explicit list (with the
+# trade-off to have to run "make rebuild_cache" when a new header file
+# emerges...)
+#
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/dofs/*.h
+  )
+
+#
+# A list of source files forming the object target:
+#
+SET(_src
+  ...
+  dof_tools.cc
+  )
+
+#
+# A list of instantiations that must be expanded:
+#
+SET(_inst
+  ...
+  dof_tools.inst.in
+  )
+
+#
+# The following macro will set up an obj_dofs.debug and
+# obj_dofs.release target  with appropriate compile flags and
+# definitions for a simultaneous build of debug and release library.
+# Furthermore, the object name will be stored in
+#   ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/deal_ii_objects_(debug/release)
+# so that it is available in global scope.
+#
+# Header files and instantiation files (${_header}, ${_inst}) are added
+# for cosmetic reasons, so that they show up in IDEs.
+#
+DEAL_II_ADD_LIBRARY(obj_dofs OBJECT ${_src} ${_header} ${_inst})
+
+#
+# This macro will set up an obj_dofs.inst target for expanding all
+# files listed in ${inst_in_files}. Appropriate target dependencies
+# will be added to obj_dofs.debug and obj_dofs.release.
+#
+EXPAND_INSTANTIATIONS(obj_dofs "${inst_in_files}")
+</pre>
+</p>
+
+<p>
+  Later, all object targets are collected in
+  <code>./source/CMakeLists.txt</code> to define the actual debug and
+  releases libraries. For further details, see
+  <code>./source/CMakelists.txt</code> and
+  <code>./cmake/macros/macro_deal_ii_add_library.cmake</code>.
+</p>
+
+<a name="projectconfig"></a>
+<h3><code>./cmake/config/CMakeLists.txt</code></h3>
+
+<p>
+  The final bits of configuration happens in
+  <code>./cmake/config/CMakeLists.txt</code> where the templates for
+  the project configuration <code>deal.IIConfig.cmake</code> and the
+  compatibility file <code>Make.global_options</code> get expanded.
+  Furthermore, the configuration for the template expansion mechanism
+  resides under <code>./cmake/config/template_arguments.in</code>.
+</p>
+
+<hr />
+<div class="right">
+  <a href="http://validator.w3.org/check?uri=referer" target="_top">
+    <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+    <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+</div>
+
+</body>
+</html>
diff --git a/doc/developers/porting.html b/doc/developers/porting.html
new file mode 100644
index 0000000..7635de0
--- /dev/null
+++ b/doc/developers/porting.html
@@ -0,0 +1,166 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>Porting deal.II</title>
+    <link href="../screen.css" rel="StyleSheet" media="screen">
+    <meta name="copyright" content="Copyright (C) 2000, 2001, 2002, 2005, 2010, 2012, 2013, 2015 by the deal.II Authors">
+    <meta name="keywords" content="deal.II porting">
+  </head>
+
+  <body>
+
+
+    <h1>Porting <acronym>deal.II</acronym> to new systems</h1>
+
+    <p>
+      <acronym>deal.II</acronym> uses very few
+      <a href="http://www.opengroup.org/austin/">POSIX</a> specific system
+      features and is otherwise fairly ISO (1998) C++ Standard compliant.
+
+      Consequently, there is a good chance that <acronym>deal.II</acronym>
+      will run on a reasonably well behaved system besides the ones listed
+      in the <a href="../readme.html" target="body">ReadMe</a>. Nevertheless,
+      there are cases where some adjustments are necessary.
+    </p>
+
+    <h2>Unknown compiler</h2>
+
+    <p>
+      Currently, the <acronym>deal.II</acronym> CMake build system
+      recognizes
+      <a href="http://gcc.gnu.org/">gcc</a>,
+      <a href="http://clang.llvm.org/">clang</a>, as well as
+      <a href="http://software.intel.com/en-us/intel-compilers">icc</a>, and
+      sets up reasonable default compiler flags.
+      <ul>
+        <li>
+          To start porting to an unknown compiler, specify
+          <code>-DDEAL_II_SETUP_DEFAULT_COMPILER_FLAGS=OFF</code> and set all
+          necessary compiler flags by hand via
+<pre>
+DEAL_II_CXX_FLAGS         - used during all builds
+DEAL_II_CXX_FLAGS_DEBUG   - additional flags for the debug library
+DEAL_II_CXX_FLAGS_RELEASE - additional flags for the release library
+</pre>
+          After that try to compile the library with minimal external
+          dependencies (<code>-DDEAL_II_ALLOW_AUTODETECTION=OFF</code>, for
+          further information see the <a href="../users/cmake.html">deal.II CMake
+            documentation</a>).
+        <li>
+          For adding permanent support for the unknown compiler to the
+          build system, have a look at
+<pre>
+cmake/setup_compiler_flags.cmake
+cmake/setup_compiler_flags_gnu.cmake
+cmake/setup_compiler_flags_icc.cmake
+</pre>
+          Patches are highly welcome! See <a href="http://www.dealii.org/participate.html">here</a>
+          for information on how to get in contact with us.
+        <li>
+          You might want to have a look at
+<pre>
+cmake/checks/check_01_for_compiler_features.cmake
+cmake/checks/check_01_for_cxx_features.cmake
+cmake/checks/check_03_for_compiler_bugs.cmake
+include/deal.II/base/config.h.in
+</pre>
+          to see how compiler specific checks are done.
+      </ul>
+    </p>
+
+    <h2>Porting to a new platform</h2>
+
+    <p>
+      <acronym>deal.II</acronym> should support almost all reasonably
+      <a href="http://www.opengroup.org/austin/">POSIX</a> compliant
+      platforms out of the box. Nevertheless, the following bits of
+      information might help:
+      <ul>
+        <li>
+          The build system of <acronym>deal.II</acronym> uses <a
+          href="http://www.cmake.org/" target="_top">CMake</a>.  So,
+          in order to port <acronym>deal.II</acronym> to a new platform,
+          it is obviously necessary that <a href="http://www.cmake.org/"
+          target="_top">CMake</a> supports the platform in question
+          with at least one generator for a native build tool, see <a
+          href="http://www.cmake.org/cmake/help/documentation.html">here</a>.
+        <li>
+          <acronym>deal.II</acronym> is mainly developed with <a
+          href="http://gcc.gnu.org/">gcc</a> on GNU/Linux, so it is
+          best to begin porting to a new platform with the help of <a
+          href="http://gcc.gnu.org/">gcc</a>.
+          After that a platform specific compiler might be tried.
+        <li>
+          Almost all <a href="http://www.opengroup.org/austin/">POSIX</a>
+          specific code is well guarded with fall-back code in case the
+          specific POSIX function is not available. There is (currently)
+          one exception, though: Certain routines in
+          <code>source/base/timer.cc</code> have implementations for POSIX
+          and Windows and throw a compile time error on other platforms.
+        <li>
+          Have a look at
+<pre>
+cmake/checks/check_for_compiler_bugs.cmake
+cmake/checks/check_for_compiler_features.cmake
+cmake/checks/check_for_cxx_features.cmake
+cmake/checks/check_for_system_features.cmake
+include/deal.II/base/config.h.in
+</pre>
+          to see how platform and compiler specific checks are done.
+	<li>
+          Of course, we would be happy to hear about the changes you made
+          for your system, so that we can include them into the next version
+          of the library!
+      </ul>
+    </p>
+
+    <h2>Cross compiling</h2>
+
+    <p>
+      It is possible to use <a href="http://www.cmake.org/"
+      target="_top">CMake</a> to cross compile
+      <acronym>deal.II</acronym> for a foreign platform.
+      Further information on that topic can be found at the <a
+      href="http://www.cmake.org/Wiki/CMake_Cross_Compiling">CMake
+      wiki</a>.
+    </p>
+
+    <p>
+      You have to set up a native deal.II build directory first and run
+      <code>make expand_instantiations_exe</code> in it. The executable is
+      needed for the build system (and obviously the cross compiled version
+      cannot be used). Locate the <code>expand_instantiations</code>
+      executable (it usually resides under
+      <code>${CMAKE_BINARY_DIR}/bin</code> and export its location with the
+      <code>PATH</code> environment variable.
+    </p>
+
+    <p>
+      Assuming you have a working cross compilation toolchain, the next
+      step is to set up a
+      <a href="Toolchain.sample"><i>toolchain file</i></a>
+      (or for for
+      <a href="Toolchain-x86_64-w64-mingw32.sample">Windows64 using MinGW</a>).
+      After that invoke <code>cmake</code> with something like:
+<pre>
+$ cmake -DCMAKE_TOOLCHAIN_FILE=<...>/Toolchain.sample \
+        -DDEAL_II_FORCE_BUNDLED_BOOST=ON \
+        -DDEAL_II_ALLOW_AUTODETECTION=OFF \
+        ../deal.II
+</pre>
+      where <code>CMAKE_TOOLCHAIN_FILE</code> points to the toolchain file.
+      The remaining configuration can be adjusted at will, see <a
+      href="cmake.html">the documentation</a>.
+    </p>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/doc/developers/testsuite.html b/doc/developers/testsuite.html
new file mode 100644
index 0000000..d34da0b
--- /dev/null
+++ b/doc/developers/testsuite.html
@@ -0,0 +1,863 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+                 "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
+  <title>The deal.II Testsuite</title>
+  <link href="../screen.css" rel="StyleSheet">
+  <meta name="copyright" content="Copyright (C) 1998 - 2015 by the deal.II Authors">
+  <meta name="keywords" content="deal dealii finite elements fem triangulation">
+  <meta http-equiv="content-language" content="en">
+</head>
+<body>
+
+
+    <h1>The deal.II Testsuite</h1>
+
+    <p>
+      deal.II has a testsuite that has, at the time this article was last
+      updated (April 2015), some 3,600 small programs (growing by roughly
+      one per day) that we run every time we make a change to make sure
+      that no existing functionality is broken. The expected output for
+      every test is stored in an <code>*.output</code>, and when you run a
+      test you are notified if a test produces different output.
+    </p>
+
+    <p>
+      These days, every time we add a significant piece of functionality,
+      we add at least one new test to the testsuite, and we also do so if
+      we fix a bug, in both cases to make sure that future changes do not
+      break this functionality (again). Machines running the testsuite
+      submit the results to
+      <a href="http://cdash.kyomu.43-1.org/index.php?project=deal.II"
+      target="body">a webpage showing the status of our regression tests</a>.
+    </p>
+
+    <div class="toc">
+      <ol>
+        <li><a href="#setup">Setting up the testsuite</a></li>
+        <ol>
+          <li><a href="#setupbuild">For a build directory</a></li>
+          <li><a href="#setupinstalled">For an already installed library</a></li>
+        </ol>
+        <li><a href="#run">Running the testsuite</a></li>
+        <ol>
+          <li><a href="#runoutput">How to interpret the output</a></li>
+          <li><a href="#coverage">Generating coverage information</a></li>
+        </ol>
+        <li><a href="#layout">Testsuite development</a></li>
+        <ol>
+          <li><a href="#layoutgeneral">General layout</a></li>
+          <li><a href="#restrictbuild">Restricting tests to build configurations</a></li>
+          <li><a href="#restrictfeature">Restricting tests to feature configurations</a></li>
+          <li><a href="#mpi">Running tests with MPI</a></li>
+          <li><a href="#binary">Tests with binary output</a></li>
+          <li><a href="#variants">Tests with multiple comparison files</a></li>
+          <li><a href="#expect">Changing condition for success</a></li>
+          <li><a href="#layoutaddtests">Adding new tests</a></li>
+          <li><a href="#layoutaddcategory">Adding new categories</a></li>
+        </ol>
+        <li><a href="#submit">Submitting test results</a></li>
+        <li><a href="#build_tests">Build tests</a></li>
+          <ol>
+            <li><a href="#dedicatedbuilds">Dedicated build tests</a></li>
+          </ol>
+      </ol>
+    </div>
+
+    <a name="setup"></a>
+    <h2>Setting up the testsuite</h2>
+
+    <p>
+      The testsuite is part of the development sources of deal.II and located
+      under the <code>tests</code> subdirectory. The easiest way to obtain
+      both of them is to check out the current development sources via git:
+<pre>
+$ git clone https://github.com/dealii/dealii
+</pre>
+    </p>
+
+    <a name="setupbuild"></a>
+    <h3>For a build directory</h3>
+
+    <p>
+      To enable the testsuite for a given build directory, ensure that
+      deal.II is successfully configured and build (installation is not
+      necessary). After that you can set up the testsuite via the
+      "setup_tests" target:
+<pre>
+$ make setup_tests
+</pre>
+      This will set up all tests supported by the current configuration.
+      The testsuite can now be run in the current <i>build directory</i> as
+      described below.
+    </p>
+
+    <p>
+      The setup can be fine-tuned using the following commands:
+<pre>
+
+$ make prune_tests - removes all testsuite subprojects
+</pre>
+
+    <p>
+      In addition, when setting up the testsuite, the following environment
+      variables can be used to override default behavior when
+      calling <code>make setup_tests</code>:
+<pre>
+TEST_TIME_LIMIT
+  - The time limit (in seconds) a single test is allowed to take. Defaults
+    to 180 seconds
+
+TEST_PICKUP_REGEX
+  - A regular expression to select only a subset of tests during setup.
+    An empty string is interpreted as a catchall (this is the default).
+</pre>
+For example,
+<pre>
+TEST_PICKUP_REGEX="umfpack" make setup_tests
+</pre>
+will only enable tests which match the string "umfpack" in category or
+name.
+    </p>
+
+    <a name="setupinstalled"></a>
+    <h3>For an already installed library</h3>
+
+    <p>
+      The testsuite can also be set up for an already installed library
+      (starting with version 8.3). For this, create a build directory for
+      the testsuite and run cmake pointing to the <code>tests</code>
+      subdirectory, e.g.,
+<pre>
+$ mkdir tests/build
+$ cd tests/build
+$ cmake ..
+</pre>
+      After that the same configuration targets as described <a
+      href="setupbuild">above</a> are available.
+    </p>
+
+    <a name="run"></a>
+    <h2>Running the testsuite</h2>
+
+    <p>
+      The testsuite can now be run in the <i>build directory</i> via
+<pre>
+$ ctest [-j N]
+</pre>
+      Here, <code>N</code> is the number of concurrent tests that should be
+      run, in the same way as you can say <code>make -jN</code>. The testsuite
+      is huge and will need around 12h on current computers
+      running single threaded.
+    </p>
+
+    <p>
+      If you only want to run a subset of tests
+      matching a regular expression, or if you want to exclude tests matching
+      a regular expression, you can use
+<pre>
+$ ctest [-j N] -R '<positive regular expression>'
+$ ctest [-j N] -E '<negative regular expression>'
+</pre>
+    </p>
+
+    <p>
+      <b>Note:</b>
+      Not all tests succeed on every machine even if all computations are
+      correct, because your machine generates slightly different floating
+      point outputs. To increase the number of tests that work correctly,
+      install the
+      <a href="http://www.nongnu.org/numdiff/">numdiff</a> tool that compares
+      stored and newly created output files based on floating point
+      tolerances. To use it, simply export where the <code>numdiff</code>
+      executable can be found via the <code>PATH</code>
+      environment variable so that it can be found during
+      <code>make setup_tests</code>.
+    </p>
+
+    <a name="runoutput"></a>
+    <h3>How to interpret the output</h3>
+
+    <p>
+      A typical output of a <code>ctest</code> invocation looks like:
+<pre>
+$ ctest -j4 -R "base/thread_validity"
+Test project /tmp/trunk/build
+      Start 747: base/thread_validity_01.debug
+      Start 748: base/thread_validity_01.release
+      Start 775: base/thread_validity_05.debug
+      Start 776: base/thread_validity_05.release
+ 1/24 Test #776: base/thread_validity_05.release ...   Passed    1.89 sec
+ 2/24 Test #748: base/thread_validity_01.release ...   Passed    1.89 sec
+      Start 839: base/thread_validity_03.debug
+      Start 840: base/thread_validity_03.release
+ 3/24 Test #747: base/thread_validity_01.debug .....   Passed    2.68 sec
+[...]
+      Start 1077: base/thread_validity_08.debug
+      Start 1078: base/thread_validity_08.release
+16/24 Test #1078: base/thread_validity_08.release ...***Failed    2.86 sec
+18/24 Test #1077: base/thread_validity_08.debug .....***Failed    3.97 sec
+[...]
+
+92% tests passed, 2 tests failed out of 24
+
+Total Test time (real) =  20.43 sec
+
+The following tests FAILED:
+        1077 - base/thread_validity_08.debug (Failed)
+        1078 - base/thread_validity_08.release (Failed)
+Errors while running CTest
+</pre>
+      If a test failed (like <code>base/thread_validity_08.debug</code> in above
+      example output), you might want to find out what exactly went wrong. To
+      this end, you can search
+      through <code>Testing/Temporary/LastTest.log</code> for the exact output
+      of the test, or you can rerun this one test, specifying <code>-V</code>
+      to select verbose output of tests:
+<pre>
+$ ctest -V -R "base/thread_validity_08.debug"
+[...]
+test 1077
+    Start 1077: base/thread_validity_08.debug
+
+1077: Test command: [...]
+1077: Test timeout computed to be: 600
+1077: Test base/thread_validity_08.debug: RUN
+1077: ===============================   OUTPUT BEGIN  ===============================
+1077: Built target thread_validity_08.debug
+1077: Generating thread_validity_08.debug/output
+1077: terminate called without an active exception
+1077: /bin/sh: line 1: 18030 Aborted [...]/thread_validity_08.debug
+1077: base/thread_validity_08.debug: BUILD successful.
+1077: base/thread_validity_08.debug: RUN failed. Output:
+1077: DEAL::OK.
+1077: gmake[3]: *** [thread_validity_08.debug/output] Error 1
+1077: gmake[2]: *** [CMakeFiles/thread_validity_08.debug.diff.dir/all] Error 2
+1077: gmake[1]: *** [CMakeFiles/thread_validity_08.debug.diff.dir/rule] Error 2
+1077: gmake: *** [thread_validity_08.debug.diff] Error 2
+1077:
+1077:
+1077: base/thread_validity_08.debug: ******    RUN failed    *******
+1077:
+1077: ===============================    OUTPUT END   ===============================
+</pre>
+      So this specific test aborted in the <code>RUN</code> stage.
+    </p>
+
+    <p>
+      The general output for a successful test <code><test></code> in
+      category <code><category></code> for build type
+      <code><build></code> is
+<pre>
+xx: Test <category>/<test>.<build>: PASSED
+xx: ===============================   OUTPUT BEGIN  ===============================
+xx: [...]
+xx: <category>/<test>.<build>: PASSED.
+xx: ===============================    OUTPUT END   ===============================
+</pre>
+      And for a test that fails in stage <code><stage></code>:
+<pre>
+xx: Test <category>/<test>.<build>: <stage>
+xx: ===============================   OUTPUT BEGIN  ===============================
+xx: [...]
+xx: <category>/<test>.<build>: <stage> failed. [...]
+xx:
+xx: <category>/<test>.<build>: ******    <stage> failed    *******
+xx: ===============================    OUTPUT END   ===============================
+</pre>
+      Hereby, <code><stage></code> indicates the stage in which the
+      test failed:
+      <ul>
+        <li>
+          <code>CONFIGURE</code>: only for test in the "build_tests"
+          category: The test project failed in the configuration stage
+        </li>
+        <li>
+          <code>BUILD</code>: compilation error occurred
+        </li>
+        <li>
+          <code>RUN</code>: the test executable could not be run / aborted
+        </li>
+        <li>
+          <code>DIFF</code>: the test output differs from the reference output
+        </li>
+        <li>
+          <code>PASSED</code>: the test run successful
+        </li>
+      </ul>
+      Typically, tests fail because the output has changed, and you will see
+      this in the DIFF phase of the test.
+    </p>
+
+
+    <a name="coverage"></a>
+    <h3>Generating coverage information</h3>
+
+    <p>
+      The testsuite can also be used to provide <i>coverage</i> information,
+      i.e., data that shows which lines of the library are executed how many
+      times by running through all of the tests in the testsuite. This is of
+      interest in finding places in the library that are not covered by
+      the testsuite and, consequently, are prone to the inadvertent
+      introduction of bugs since existing functionality is not subject to
+      existing tests.
+    </p>
+
+    <p>
+      To run the testsuite in this mode, essentially, you have to do three
+      things:
+      <ol>
+	<li>
+	  build the library with appropriate profiling flags
+	</li>
+	<li>
+	  run all or some tests (built with the same profiling flags)
+	</li>
+	<li>
+	  gather all information and convert them to a viewable format.
+	</li>
+      </ol>
+      In order to achieve the first two, configure the library with
+<pre>
+cmake -DCMAKE_BUILD_TYPE=Debug -DDEAL_II_SETUP_COVERAGE=ON <...>
+</pre>
+      You can then build the library and run the tests as usual.
+    </p>
+
+    <p>
+      For the last point, one can in principal use whatever tool one
+      wants. That said, the deal.II ctest driver already has builtin
+      functionality to gather all profiling files and submit them to cdash
+      where we already gather testsuite results
+      (see <a href="#submit">below</a>). You can do so by invoking
+<pre>
+ctest -DCOVERAGE=ON <...> -S ../tests/run_testsuite.cmake
+</pre>
+      when running the testsuite, or directly by
+<pre>
+ctest <...> -S ../tests/run_coverage.cmake
+</pre>
+    </p>
+
+    <p>
+      At the end of all of this, results will be shown in a separate section
+      "Coverage" at the
+      <a href="http://cdash.kyomu.43-1.org/index.php?project=deal.II&display=project"
+      target="_top">deal.II cdash site</a>.
+    </p>
+
+
+    <a name="layout"></a>
+    <h2>Testsuite development</h2>
+
+    <p>
+      The following outlines what you need to know if you want to understand
+      how the testsuite actually works, for example because you may want to
+      add tests along with the functionality you are currently developing.
+    </p>
+
+
+
+    <a name="layoutgeneral"></a>
+    <h3>General layout</h3>
+
+    <p>
+      A test usually consists of a source file and an output file for
+      comparison (under the testsuite directory <code>tests</code>):
+<pre>
+category/test.cc
+category/test.output
+</pre>
+      <code>category</code> will be one of the existing subdirectory
+      under <code>tests/</code>, e.g., <code>lac/</code>, <code>base/</code>,
+      or <code>mpi/</code>. Historically, we have grouped tests into the
+      directories <code>base/, lac/, deal.II/</code> depending on their
+      functionality, and <code>bits/</code> if they were small unit tests, but
+      in practice we have not always followed this rigidly. There are also
+      more specialized directories <code>trilinos/, petsc/,
+      serialization/, mpi/</code> etc, whose meaning is more obvious.
+      <code>test.cc</code> must be a regular executable (i.e. having an
+      <code>int main()</code> routine). It will be compiled, linked and
+      run. The executable should not output anything to <code>cout</code>
+      (at least under normal circumstances, i.e. no error condition),
+      instead the executable should output to a file <code>output</code>
+      in the current working directory. In practice, we rarely write the
+      source files completely from scratch, but we find an existing test that
+      already does something similar and copy/modify it to fit our needs.
+    </p>
+    <p>
+      For a normal test, <code>ctest</code> will typically run the following 3
+      stages:
+      <ul>
+        <li>
+          <code>BUILD</code>: The build stage generates an executable in
+          <code>BUILD_DIR/tests/<category>/<test></code>.
+        </li>
+        <li>
+          <code>RUN</code>: The run stage then invokes the executable in
+          the directory where it is located. By convention, each test
+          either prints its test results directly to <code>stdout</code>,
+          or directly into a file called <code>output</code> (in the
+          current working directory). The latter takes precedence.
+          The output (via stdout or file) will then be located in
+          <code>BUILD_DIR/tests/<category>/<test>/output</code>.
+          If the run fails (e.g. because the program aborts with an error
+          code) the file <code>output</code> is renamed to
+          <code>failing_output</code>.
+        </li>
+        <li>
+          <code>DIFF</code>: As a last stage the generated output file will
+          be compared to
+          <code>SOURCE_DIR/tests/<category>/<test>[...].output</code>.
+          and stored in
+          <code>BUILD_DIR/tests/<category>/<test>/diff</code>.
+          If the diff fails  the file <code>diff</code> is renamed to
+          <code>failing_diff</code>.
+        </li>
+      </ul>
+    </p>
+
+
+    <a name="restrictbuild"></a>
+    <h3>Restricting tests to build configurations</h3>
+
+    <p>
+      Comparison file can actually be named in a more complex way than
+      just <code>category/test.output</code>. In pseudo code:
+<pre>
+category/test.[with_<string>(<=|>=|=|<|>)<on|off|version>.]*
+              [mpirun=<x>.][expect=<y>.][binary.][<debug|release>.]output
+</pre>
+      Normally, a test will be set up so that it runs twice, once in debug and
+      once in release configuration.
+      If a specific test can only be run in debug or release configurations but
+      not in both it is possible to restrict the setup by prepending
+      <code>.debug</code> or <code>.release</code> directly before
+      <code>.output</code>, e.g.:
+<pre>
+category/test.debug.output
+</pre>
+      This way, the test will only be set up to build and run against the debug
+      library. If a test should run in both configurations but, for some
+      reason, produces different output (e.g., because it triggers an
+      assertion in debug mode), then you can just provide two different output
+      files:
+<pre>
+category/test.debug.output
+category/test.release.output
+</pre>
+    </p>
+
+    <a name="restrictfeature"></a>
+    <h3>Restricting tests to feature configurations</h3>
+    <p>
+      In a similar vain as for build configurations, it is possible to restrict
+      tests to specific feature configurations, e.g.,
+<pre>
+category/test.with_umfpack=on.output, or
+category/test.with_zlib=off.output
+</pre>
+      These tests will only be set up if the specified feature was configured.
+      It is possible to provide different output files for disabled/enabled
+      features, e.g.,
+<pre>
+category/test.with_64bit_indices=on.output
+category/test.with_64bit_indices=off.output
+</pre>
+      Furthermore, a test can be restricted to be run only if specific
+      versions of a feature are available. For example
+<pre>
+category/test.with_trilinos.geq.11.14.1.output
+</pre>
+      will only be run if (a) trilinos is available, i.e.,
+      <code>DEAL_II_WITH_TRILINOS=TRUE</code> and (b) if trilinos is at least
+      of version 11.14.1. Supported operators are
+      <code>=</code>, <code>.le.;</code>, <code>.ge.</code>, <code>.leq.</code>,
+      <code>.geq.</code>.
+    </p>
+    <p>
+      It is also possible to declare multiple constraints subsequently, e.g.
+<pre>
+category/test.with_umfpack=on.with_zlib=on.output
+</pre>
+    </p>
+    <p>
+      <b>Note:</b> The tests in some subdirectories of <code>tests/</code> are
+      automatically run only if some feature is enabled. In this case a
+      feature constraint encoded in the output file name is redundant and
+      should be avoided. In particular, this holds for subdirectories
+      <code>distributed_grids</code>, <code>lapack</code>,
+      <code>metis</code>, <code>petsc</code>, <code>slepc</code>,
+      <code>trilinos</code>, <code>umfpack</code>, <code>gla</code>, and
+      <code>mpi</code>
+    </p>
+
+
+    <a name="mpi"></a>
+    <h3>Running tests with MPI</h3>
+    <p>
+      If a test should be run with MPI in parallel, the number of MPI
+      processes <code>N</code> with which a program needs to be run for
+      comparison with a given output file is specified as follows:
+<pre>
+category/test.mpirun=N.output
+</pre>
+      It is quite typical for an MPI-enabled test to have multiple output
+      files for different numbers of MPI processes.
+    </p>
+
+
+    <a name="binary"></a>
+    <h3>Tests with binary output</h3>
+    <p>
+      If a test produces binary output add <code>binary</code> to the
+      output file to indicate this:
+<pre>
+category/test.binary.output
+</pre>
+      The testsuite ensures that a diff tool suitable for comparing binary
+      output files is used instead of the default diff tool, which (as in
+      the case of <code>numdiff</code>) might be unable to compare binary
+      files.
+    </p>
+
+
+    <a name="variants"></a>
+    <h3>Tests with multiple comparison files</h3>
+    <p>
+      Sometimes it is necessary to provide multiple comparison files for a
+      single test, for example because you want to test code on multiple
+      platforms that produce different output files that, nonetheless, all
+      should be considered correct. An example would be tests that use the
+      <code>rand()</code> function that is implemented differently on
+      different platforms. Additional comparison files have the same path
+      as the main comparison file (in this case <code>test.output</code>)
+      followed by a dot and a variant description:
+<pre>
+category/test.output
+category/test.output.2
+category/test.output.3
+category/test.output.4
+</pre>
+      The testsuite will try to match the output against all variants in
+      alphabetical order starting with the main output file.
+    </p>
+
+    <p>
+      <b>Warning:</b> This mechanism is only meant as a last resort for
+      tests where <i>no</i> alternative approach is viable. Especially,
+      consider first to
+      <ol>
+        <li> make the test more robust such that differences can be
+          expressed in round-off errors detectable by numdiff.
+        <li> restrict comparison files to specific
+          <a href="#restrictfeature">versions of an external feature</a>.
+      </ol>
+    </p>
+
+    <p>
+      <b>Note:</b> The main comparison file (i.e., the one ending in
+      <code>output</code> is mandatory. Otherwise, no test will be
+      configured.
+    </p>
+
+
+    <a name="expect"></a>
+    <h3>Changing condition for success</h3>
+    <p>
+      Normally a test is considered to be successful if all test stages
+      could be run and the test reached the <code>PASSED</code> stage (see
+      the <a href="#runoutput">output description</a> section for details).
+      If (for some reason) the test should succeed ending at a specific
+      test stage different than <code>PASSED</code> you can specify it via
+      <code>expect=<stage></code>, e.g.:
+<pre>
+category/test.expect=run.output
+</pre>
+    </p>
+
+
+    <a name="layoutaddtests"></a>
+    <h3>Adding new tests</h3>
+
+    <p>
+      We typically add one or more new tests every
+      time we add new functionality to the library or fix a bug. If you
+      want to contribute code to the library, you should do this
+      as well. Here's how: you need a testcase and a file with the
+      expected output.
+    </p>
+
+    <h4>The testcase</h4>
+    <p>
+      For the testcase, we usually start from one of the existing tests, copy
+      and modify it to where it does what we'd like to test. Alternatively,
+      you can also start from a template like this:
+<pre>
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II Authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// a short (a few lines) description of what the program does
+
+#include "../tests.h"
+
+// all include files you need here
+
+int main ()
+{
+  // Initialize deallog for test output.
+  // This also reroutes deallog output to a file "output".
+  initlog();
+
+  // your testcode here:
+  int i = 0;
+  deallog << i << std::endl;
+
+  return 0;
+}
+</pre>
+
+    <p>This code opens an output file <code>output</code> in the current working
+    directory and then writes all output you generate to it, through the
+    <code>deallog</code> stream. The <code>deallog</code> stream works like
+    any other <code>std::ostream</code> except that it does a few more
+    things behind the scenes that are helpful in this context. In above
+    case, we only write a zero to the output file. Most tests of course
+    write computed data to the output file to make sure that whatever we
+    compute is what we got when the test was first written.
+    </p>
+
+    <p>
+    There are a number of directories where you can put a new test.
+    Extensive tests of individual classes or groups of classes
+    have traditionally been into the <code>base/</code>,
+    <code>lac/</code>, <code>deal.II/</code>, <code>fe/</code>,
+    <code>hp/</code>, or <code>multigrid/</code> directories, depending on
+    where the classes that are tested are located. More atomic tests often go
+    into <code>bits/</code>. There are also
+    directories for PETSc and Trilinos wrapper functionality.
+    </p>
+
+    <h4>An expected output</h4>
+
+    <p>
+      In order to run your new test, copy it to an appropriate category and
+      create an empty comparison file for it:
+<pre>
+category/my_new_test.cc
+category/my_new_test.output
+</pre>
+      Now, rerun
+<pre>
+$ make setup_tests
+</pre>
+      so that your new test is picked up. After that it is possible to
+      invoke it with
+<pre>
+$ ctest -V -R "category/my_new_test"
+</pre>
+    </p>
+
+    <p>
+      If you run your new test executable this way, the test should compile
+      and run successfully but fail in the diff stage (because of the empty
+      comparison file). You will get an output file
+      <code>BUILD_DIR/category/my_new_test/output</code>. Take a look at it to
+      make sure that the output is what you had expected. (For complex tests,
+      it may sometimes be impossible to say whether the output is correct, and
+      in this case we sometimes just take it to make
+      sure that future invocations of the test yield the same results.)
+    </p>
+
+    <p>
+      The next step is to copy and rename this output file to the source
+      directory and replace the original comparison file with it:
+<pre>
+category/my_new_test.output
+</pre>
+      At this point running the test again should be successful:
+<pre>
+$ ctest -V -R "category/my_new_test"
+</pre>
+    </p>
+
+    <a name="layoutaddcategory"></a>
+    <h2>Adding new categories</h2>
+
+    <p>
+      If you want to create a new category in the testsuite, create an new
+      folder under <cmake>./tests</cmake> that is named accordingly and put
+      a <code>CMakeLists.txt</code> file into it containing
+    </p>
+<pre>
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+INCLUDE(../setup_testsubproject.cmake)
+PROJECT(testsuite CXX)
+INCLUDE(${DEAL_II_TARGET_CONFIG})
+DEAL_II_PICKUP_TESTS()
+</pre>
+
+    <a name="submit"></a>
+    <h2>Submitting test results</h2>
+
+    <p>
+      To submit test results to our <a
+        href="http://cdash.kyomu.43-1.org/index.php?project=deal.II">CDash</a>
+      instance just invoke ctest within a build directory (or designated
+      build directory) with the <code>-S</code> option pointing to the
+<pre>
+$ ctest [...] -V -S ../tests/run_testsuite.cmake
+</pre>
+      The script will run configure, build and ctest and submit the results
+      to the CDash server. It does not matter whether the configure, build
+      or ctest stages were run before that. Also in script mode, you can
+      specify the same options for <code>ctest</code> as explained above.
+    </p>
+
+    <p>
+      <b>Note:</b> The default output in script mode is very minimal.
+      Therefore, it is recommended to specify <code>-V</code> which will
+      give the same level of verbosity as the non-script mode.
+    </p>
+
+    <p>
+      <b>Note:</b> The following variables can be set to via
+<pre>
+ctest -D<variable>=<value> [...]
+</pre>
+      to control the behaviour of the <code>run_testsuite.cmake</code>
+      script:
+<pre>
+CTEST_SOURCE_DIRECTORY
+  - The source directory of deal.II
+  - If unspecified, "../deal.II" and "../../" relative to the location
+    of this script is used. If this is not a source directory, an error
+    thrown.
+
+CTEST_BINARY_DIRECTORY
+  - The designated build directory (already configured, empty, or non
+    existent - see the information about TRACKs what will happen)
+  - If unspecified the current directory is used. If the current
+    directory is equal to CTEST_SOURCE_DIRECTORY or the "tests"
+    directory, an error is thrown.
+
+CTEST_CMAKE_GENERATOR
+  - The CMake Generator to use (e.g. "Unix Makefiles", or "Ninja", see
+    $ man cmake)
+  - If unspecified the current generator of a configured build directory
+    will be used, otherwise "Unix Makefiles".
+
+TRACK
+  - The track the test should be submitted to. Defaults to "Experimental".
+    Possible values are:
+
+    "Experimental"     - all tests that are not specifically "build" or
+                         "regression" tests should go into this track
+
+    "Build Tests"      - Build tests that configure and build in a
+                         clean directory and run the build tests
+                         "build_tests/*"
+
+    "Nightly"          - Reserved for nightly regression tests for
+                         build bots on various architectures
+
+    "Regression Tests" - Reserved for the regression tester
+
+CONFIG_FILE
+  - A configuration file (see docs/users/config.sample)
+    that will be used during the configuration stage (invokes
+    $ cmake -C ${CONFIG_FILE}). This only has an effect if
+    CTEST_BINARY_DIRECTORY is empty.
+
+DESCRIPTION
+  - A string that is appended to CTEST_BUILD_NAME
+
+COVERAGE
+  - If set to ON deal.II will be configured with
+    DEAL_II_SETUP_COVERAGE=ON, CMAKE_BUILD_TYPE=Debug and the
+    CTEST_COVERAGE() stage will be run. Test results must go into the
+    "Experimental" section.
+
+MAKEOPTS
+  - Additional options that will be passed directly to make (or ninja).
+</pre>
+      Furthermore, the variables described <a href="#setup">above</a> can also be
+      set and will be handed automatically down to <code>cmake</code>.
+    </p>
+
+
+
+    <a name="build_tests"></a>
+    <h2>Build tests</h2>
+
+    <p>
+      Build tests are used to check that deal.II can be compiled on
+      different systems and with different compilers as well as
+      different configuration options. Results are collected in the "Build
+      Tests" track in <a
+      href="http://cdash.kyomu.43-1.org/index.php?project=deal.II">CDash</a>.<p>
+
+      <p>Running the build test suite is simple and we encourage deal.II
+      users with configurations not found on the <a
+      href="http://cdash.kyomu.43-1.org/index.php?project=deal.II">CDash page</a> to
+      participate. Assuming you checked out deal.II into the directory
+      <code>dealii</code>, running it is as simple as:
+<pre>
+mkdir dealii/build
+cd dealii/build
+ctest -j4 -S ../tests/run_buildtest.cmake
+</pre>
+    </p>
+
+    <p>
+      What this does is to compile and build deal.II in the directory
+      <code>build</code>, try to configure, build (and run a subset) of all
+      tutorial programs supported by the current configuration and send the
+      results to the CDash instance.
+    </p>
+
+    <p>
+      <b>Note:</b> Build tests require the designated build directory to be
+      completely empty. If you want to specify a build configuration for
+      cmake use a <a href="../users/config.sample">configuration file</a>
+      to preseed the cache as explained <a href="#submit">above</a>:
+<pre>
+$ ctest -DCONFIG_FILE="[...]/config.sample" [...]
+</pre>
+    </p>
+
+
+
+    <a name="dedicatedbuilds"></a>
+    <h3>Dedicated build tests</h3>
+
+    <p>
+      Build tests work best if they run automatically and periodically.
+      There is a detailed example for such dedicated build tests on the <a
+        href="https://github.com/dealii/dealii/wiki/Build-Tests">wiki</a>.
+    </p>
+
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/doc/developers/writing-documentation.html b/doc/developers/writing-documentation.html
new file mode 100644
index 0000000..17e6235
--- /dev/null
+++ b/doc/developers/writing-documentation.html
@@ -0,0 +1,464 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+                 "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
+  <title>Writing documentation</title>
+  <link href="../screen.css" rel="StyleSheet">
+  <meta name="copyright" content="Copyright (C) 1998 - 2015 by the deal.II Authors">
+  <meta name="keywords" content="deal dealii finite elements fem triangulation">
+  <meta http-equiv="content-language" content="en">
+</head>
+<body>
+
+    <h1>Writing Documentation</h1>
+
+    <p>
+    It is our firm belief that software can only be successful if it
+    is properly documented. Too many academic software projects die
+    prematurely once their creators leave the university or the
+    workgroup in which the software was developed, since with their
+    creators also knowledge of internal structures, interfaces, and
+    the valuable bag of tricks leaves, a gap that can not be closed by
+    reading sources, trial-and-error, and guessing.
+    </p>
+
+    <p>
+    The <acronym>deal.II</acronym> project has therefore from its
+    infancy adopted a policy that every aspect of the interface
+    needs to be well-documented before its inclusion into the
+    source tree. Since we have found that it is impossible to keep
+    documentation up-to-date if it is not written directly into the
+    program code, we write the documentation directly at the place of
+    declaration of a function or class and use automatic tools to
+    extract this information from the files and process it into HTML
+    for our web-pages, or LaTeX for printing.
+    </p>
+
+    <p>
+    In addition to the API documentation, we maintain a series of
+    <a href="../doxygen/deal.II/Tutorial.html" target="_top">
+    well-documented example programs</a>, which also follow a certain
+    ``literate programming'' style in that the explanations of what
+    is happening are integrated into the program source by means of
+    comments, and are extracted by small scripts.
+    </p>
+
+    <p>
+    This document first explains the basics of
+    <a href="#API" target="body">documenting the API</a> and then of
+    <a href="#examples" target="body">writing example programs</a>.
+    </p>
+
+
+    <a name="API"></a>
+    <h2>Writing API documentation</h2>
+
+    <p>
+    In order to extract documentation from the header files of the
+    project, we use
+    <a href="http://www.doxygen.org/" target="_top">doxygen</a>.
+    It requires that documentation is written in a form which
+    closely follows the
+    <a href="http://java.sun.com/products/jdk/javadoc/index.html" target="_top">
+    JavaDoc</a> standard.
+    </p>
+
+
+    <h3>Basic layout of documentation</h3>
+
+    <p>
+    Basically, every declaration, whether class or member
+    function/variable declaration, global function or namespace, may
+    be preceded by a comment of the following form:
+    </p>
+<pre>
+/**
+ * This is an example documentation.
+ *
+ * @author Wolfgang Bangerth, 2000
+ */
+class TestClass
+{
+  public:
+           /**
+            * Constructor
+            */
+    TestClass ();
+
+           /**
+            * Example function
+            */
+    virtual void test () const = 0;
+
+           /**
+            * Member variable
+            */
+    const unsigned int abc;
+};
+</pre>
+    <p>
+    <acronym>doxygen</acronym> will then generate a page for the class
+    <code>TestClass</code> and document each of the member functions
+    and variables. The content of the <code>@author</code> tag will be
+    included into the online documentation of the class.
+    </p>
+
+
+    <h3>Extended Layout</h3>
+
+    <p>
+    In order to allow better structured output for long comments,
+    doxygen supports a great number of tags for enumerations,
+    sectioning, markup, and other fields. We encourage you to take a
+    look at the <a href="http://www.doxygen.org/"
+    target="_top">doxygen webpage</a> to get an overview. However,
+    here is a brief summary of the most often used features:
+    </p>
+    <ul>
+    <li> <em>Virtual functions:</em>
+        Do not document virtual functions in derived classes, if there
+        is nothing different from the base class. If a virtual
+        function is documented in a base class, doxygen will
+        automatically copy this documentation to the derived
+        classes. This mechanism is interrupted, if a documentation
+        block is found in the derived class.
+
+    <li> <em>Itemized lists:</em>
+         By writing comments like the following,
+<pre>
+/**
+ * <ul>
+ *   <li> foo
+ *   <li> bar
+ * </ul>
+ */
+</pre>
+         you can get itemized lists both in the online and printed
+         documentation:
+         <ul>
+         <li> foo
+         <li> bar
+         </ul>
+
+         <p>
+         In other words, one can use standard HTML tags for this
+         task. Likewise, you can get numbered lists by using the
+         respective HTML tags <code><ol></code>.
+         </p>
+
+    <li> <em>Verbatim output:</em>
+         <p>
+         If you write comments like this,
+         </p>
+<pre>
+/**
+ * @verbatim
+ *   void foobar ()
+ *   {
+ *     i = 0;
+ *   }
+ * @endverbatim
+ */
+</pre>
+         you will get the lines between the verbatim environment with
+         the same formatting and in typewriter font:
+<pre>
+void foobar ()
+{
+  i = 0;
+}
+</pre>
+         This is useful if you want to include small sample code snippets
+         into your documentation. In particular, it is important that
+         the formatting is preserved, which is not the case for all
+         other text.
+
+    <li> <em>Typewriter font:</em>
+         <p>In order to use typewriter font for instance for function
+         arguments or variables, use the <code><code></code> HTML
+         tag. For a single word, you can also use the form <code>@p
+         one_word_without_spaces</code>. The <code><tt></code> is obsolete in HTML5</p>
+
+         <p>
+         If you refer to member variables and member functions
+         <acronym>doxygen</acronym> has better options than this: use
+         <tt>function_name()</tt> to reference member functions and
+         <tt>#variable_name</tt> for member variables to create links
+         automatically. Refer to the documentation of <a
+         href="http:www.doxygen.org"><acronym>doxygen</acronym></a> to
+         get even more options for global variables.
+         </p>
+
+    <li> <em>Emphasis:</em>
+         <p>
+         To generate output in italics, use the <code>@em
+         one_word_without_spaces</code> tag or the <tt><em></tt>
+         HTML tag. To generate boldface, use <tt><b></tt>
+         </p>
+
+    <li> <em>Formulæ:</em>
+         <p>
+         For simple and short formulæ use the <tt><i></tt>
+         HTML tag. Note that you can use <tt><sub></tt> and
+         <tt><sup></tt> to get subscripts an superscripts,
+         respectively. Only for longer formulæ use
+         <tt>$formula$</tt> to generate a LaTeX formula which will then be
+         included as a graphical image.
+         </p>
+
+    <li> <em>Sections:</em>
+         <p>
+         Sections in class and function documentations can be
+         generated using the <tt><hN></tt> HTML headline
+         tags. Headlines inside class documentation should start at
+         level 3 (<tt><h3></tt>) to stay consistent with the
+         structure of the <acronym>doxygen</acronym> output.
+         </p>
+
+         <p>
+         Sections cannot be referenced, unless you add a <tt><A
+         NAME="..."></tt> name anchor to them. If you really have
+         to do this, please make sure the name does not interfere with
+         <acronym>doxygen</acronym> generated anchors.
+         </p>
+
+    <li> <em>Exclusion from documentation:</em>
+         <p> <acronym>doxygen</acronym> sometimes has problems with inlined
+         functions of template classes. For these cases (and other cases of
+         parts of the code to be excluded from documentation), we define a
+         preprocessor symbol <tt>DOXYGEN</tt> when running
+         <acronym>doxygen</acronym>. Therefore, the following template can be
+         used to avoid documentation:</p>
+<pre>
+/* documented code here */
+
+#ifndef DOXYGEN
+
+/* code here is compiled, but ignored by doxygen */
+
+#endif // DOXYGEN
+</pre>
+    </ul>
+
+    <h3>Code examples for the usage of single classes</h3>
+    <p>
+    Writing example files for classes is supported by
+    <acronym>doxygen</acronym>. These example files go into
+    <tt>deal.II/examples/doxygen</tt>. If they are short,
+    documentation should be inside and they are included into the
+    documentation with <code>@include filename</code>. Take a look how
+    the class <code>BlockMatrixArray</code> does this.
+    </p>
+
+    <p>
+    Larger example files should be documented using the
+    <acronym>doxygen</acronym> command <code>@dotinclude</code> and
+    related commands. However, if these programs do something
+    reasonable and do not only demonstrate a single topic, you should
+    consider converting them to a complete example program in the
+    <code>step-XX</code> series.
+    </p>
+
+
+
+    <a name="examples"></a>
+    <h2>Writing example programs for the tutorial</h2>
+
+    <p>
+    Tutorial programs consist of an introduction, a well documented
+    code, and a section that shows the output and numerical results
+    of running the program. These three parts are written in separate
+    files: for the <code>step-xx</code> program, for example, they
+    would be in the
+    files <code>examples/doc/step-xx/doc/intro.dox</code>,
+    <code>examples/doc/step-xx/step-xx.cc</code> and
+    <code>examples/doc/step-xx/doc/results.dox</code>. There are a
+    number of scripts that then process and concatenate these three
+    different files and send them through doxygen for generation of
+    HTML output. In general, if you want to see how certain markup
+    features can be used, it is worthwhile looking at the existing
+    tutorial program pages and the files they are generated from.
+    </p>
+
+    <h3>The introduction</h3>
+    <p>
+      The introduction, as well as the results section, will be
+      processed as if they were doxygen comments. In other words, all
+      the usual doxygen markup will work in these sections, including
+      latex formulas, though the format for the formula environment is
+      a bit awkward. Since it takes much longer to run doxygen for all
+      of deal.II than to run latex, most of the lengthier
+      introductions are just written in latex (with a minimal amount
+      of markup) and later converted into doxygen format. One thing to
+      be aware of is that you can reference formulas in doxygen, so
+      you have to work around that using text rather than formula
+      numbers.
+    </p>
+
+    <p>
+      More important is what goes into the introduction. Typically,
+      this would first be a statement of the problem that we want to
+      solve. Take a look, for example, at
+      the <a href="../doxygen/deal.II/step_22.html"
+      target="_top">step-22</a>
+      or <a href="../doxygen/deal.II/step_31.html"
+      target="_top">step-31</a> tutorial programs. Then come a few
+      sections in which we would discuss in mathematical terms the
+      algorithms that we want to use; this could, for example, include
+      the time stepping, discretization, or solver
+      approaches. <a href="../doxygen/deal.II/step_22.html"
+      target="_top">step-22</a>
+      and <a href="../doxygen/deal.II/step_31.html"
+      target="_top">step-31</a> are again good, if lengthy, examples
+      for this.
+    </p>
+
+    <p>
+      On the other hand, if a program is an extension of a previous
+      program, these things need not be repeated: you would just
+      reference the previous program. For
+      example, <a href="../doxygen/deal.II/step_16.html"
+      target="_top">step-16</a> does not talk about adaptive meshes
+      any more — it
+      extends <a href="../doxygen/deal.II/step_6.html"
+      target="_top">step-6</a> and simply refers there for
+      details. Likewise, <a href="../doxygen/deal.II/step_32.html"
+      target="_top">step-32</a> simply refers
+      to <a href="../doxygen/deal.II/step_31.html"
+      target="_top">step-31</a> for the problem statement and basic
+      algorithm and simply focuses on those parts that are new
+      compared to
+      <a href="../doxygen/deal.II/step_31.html" target="_top">step-31</a>.
+    </p>
+
+    <p>
+      The purpose of the introduction is to explain what the program
+      is doing. It should set the mindset so that when you read
+      through the code you already know <i>why</i> we are doing
+      something. You may not yet know <i>how</i> this done, but this
+      is what the documentation within the code is doing. At least you
+      don't have to wonder any more why we are building up this
+      complicated preconditioner — we've already discussed this
+      in the introduction.
+    </p>
+
+    <p>
+      If it helps the understanding, the introduction can refer to
+      particular pieces of code (but doesn't have to). For example,
+      the introduction to <a href="../doxygen/deal.II/step_20.html"
+      target="_top">step-20</a> has pretty lengthy code snippets that
+      explain how to implement a general interface of operators that
+      may or may not be matrices. This would be awkward to do within
+      the code since in the code the view is somewhat smaller (you
+      have to have complete parameter lists, follow the syntax of the
+      programming language, etc, all of which obscures the things one
+      wants to discuss when giving a broad overview related to
+      particular C++ constructs). On the other hand, showing code
+      snippets in the introduction risks duplicating code in two
+      places, which will eventually get out of synch. Consequently,
+      this instrument should only be used sparingly.
+    </p>
+
+
+    <h3>The actual code</h3>
+    <p>
+      At present, the tools that extract information from the actual example
+      programs code are rather dumb. They are, to be precise, three Perl
+      scripts located in the directory of the
+      <code>deal.II/doc/doxygen/tutorial</code> tree, where
+      the <code>.cc</code> files of the tutorial programs are converted
+      into doxygen input files. In essence, what these scripts do is to
+      create doxygen input that contains the comments of the program as
+      text, and the actual code of the programs as code snippets. You
+      can see this when you look at the pages for each of the tutorials
+      where the code is indented relative to the text.
+    </p>
+
+    <p>
+      The whole thing being interpreted by doxygen means that you can
+      put anything doxygen understands into comments. This includes,
+      for example references to classes or members in the library (in
+      fact, you just need to write their name out and doxygen will
+      automatically link them), formulas, lists, etc. It all will come
+      out as if you had written comments for doxygen in the first
+      place.
+    </p>
+
+    <p>
+      The bigger question is <i>how</i> to write the comments that
+      explain what's going on in individual code blocks. Many years
+      back we wrote them so that every line or every two lines had
+      their own comment. You can still see this in some of the older
+      tutorial programs, though many of them have in the meantime been
+      converted to a newer style: it turns out that if you have
+      comments so frequently, it becomes hard to follow the flow of an
+      algorithm. In essence, you know exactly what each line does, but
+      you can't get an overview of what the function as a whole
+      does. But that's exactly the point of the tutorial programs, of
+      course!
+    </p>
+
+    <p>
+      So the way we now believe tutorial programs should be written is
+      to have comments for each logical block. For example,
+      the <code>solve()</code> function in many of the programs is
+      relatively straightforward and has at most a dozen lines of
+      code. So put a comment in front of the function that explains
+      all the things that are going on in the function, and then show
+      the function without comments in it — this way, a reader
+      will read through the half or full page of documentation
+      understanding the big picture, and can then see the whole
+      function all at once on a single screen without having to scroll
+      up and down. In the old way, the code would be spread out over a
+      couple pages, with comments between almost any two lines, making
+      it hard to see how it all fits together.
+    </p>
+
+    <p>
+      It is somewhat subjective how much code you should leave in each
+      block that you document separately. It might be a single line if
+      something really important and difficult happens there, but most
+      of the time it's probably more along the lines of 6 to 12 lines
+      — a small enough part of the code so that it's easy enough
+      to grasp by looking at it all at once, but large enough that it
+      contributes a significant part or all of an algorithm.
+    </p>
+
+
+    <h3>The results section</h3>
+
+    <p>
+      The results section should show (some of) the output of a
+      program, such as the console output and/or a visualization of
+      graphical output. It should also contain a brief discussion of
+      this output. It is intended to demonstrate what the program
+      does, so that a reader can see what happens if the program were
+      executed without actually running it. It helps to show a few
+      nice graphics there.
+    </p>
+
+    <p>
+      This section needs not be overly comprehensive. If the program
+      is the implementation of a method that's discussed in an
+      accompanying paper, it's entirely ok to say "for further
+      numerical results, see ...".
+    </p>
+
+    <p>
+      Like the introduction, the results section file is copied
+      verbatim into input for doxygen, so all doxygen markup is
+      possible there.
+    </p>
+
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/doc/documentation.html b/doc/documentation.html
new file mode 100644
index 0000000..e70238a
--- /dev/null
+++ b/doc/documentation.html
@@ -0,0 +1,74 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+          "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>The deal.II Online Documentation</title>
+    <meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
+    <link href="screen.css" rel="StyleSheet" type="text/css">
+    <meta name="copyright" content="Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 by the deal.II Authors">
+    <meta name="date" content="2006/02/07 22:49:01">
+    <meta name="keywords" content="deal.II">
+  </head>
+<body>
+
+<p>
+  <br />
+  <acronym>deal.II</acronym> comes with extensive online
+  documentation that can be grouped into the following categories:
+</p>
+
+<div style="text-align: center;">
+  <div style="clear: both; text-align: center; max-width: 45em; margin: auto;">
+    <div class="infobox">
+      <p>
+        <b>Information for users</b>
+      </p>
+      <div style="border:1px solid #aaa; background-color: #f9f9f9;
+                  padding: 5px; font-size: 88%; text-align: left;">
+        <ol>
+          <li><a href="readme.html" target="body">README and
+          installation instructions</a></li>
+          <li><a href="users/cmake.html" target="body">CMake documentation</a></li>
+          <li><a href="users/cmakelists.html" target="body">CMake in user projects</a></li>
+          <li><a href="users/testsuite.html" target="body">Setting up testsuite in user projects</a></li>
+          <li><a href="users/gdb.html" target="body">Configuration for debugging via GDB</a></li>
+          <li><a href="doxygen/deal.II/Tutorial.html" target="_top">Tutorial</a></li>
+          <li><a href="doxygen/deal.II/index.html" target="_top">Manual</a></li>
+          <li><a href="http://www.math.tamu.edu/~bangerth/videos.html" target="_top">Wolfgang's lectures</a></li>
+          <li><a href="http://www.dealii.org/reports.html" target="body">Technical reports</a></li>
+          <li><a href="http://www.dealii.org/publications.html" target="_top">Publications</a></li>
+        </ol>
+      </div>
+    </div>
+
+    <div class="infobox">
+      <p>
+        <b>Information for developers</b>
+      </p>
+      <div style="border:1px solid #aaa; background-color: #f9f9f9;
+                  padding: 5px; font-size: 88%; text-align: left;">
+        <ol>
+          <li><a href="developers/cmake-internals.html" target="body">CMake internals</a></li>
+          <li><a href="developers/writing-documentation.html" target="body">Writing documentation</a></li>
+          <li><a href="developers/porting.html" target="body">Porting</a></li>
+          <li><a href="doxygen/deal.II/CodingConventions.html" target="body">Coding conventions</a></li>
+          <li><a href="developers/testsuite.html" target="body">Testsuite</a></li>
+        </ol>
+      </div>
+    </div>
+  </div>
+</div>
+
+<div style="clear: both;">
+  <br>
+  <hr />
+  <div class="right">
+    <a href="http://validator.w3.org/check?uri=referer" target="_top">
+      <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+    <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+      <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  </div>
+</div>
+
+</body>
+</html>
diff --git a/doc/doxygen/CMakeLists.txt b/doc/doxygen/CMakeLists.txt
new file mode 100644
index 0000000..48d9dee
--- /dev/null
+++ b/doc/doxygen/CMakeLists.txt
@@ -0,0 +1,229 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Generate the source documentation via doxygen:
+#
+
+FIND_PACKAGE(Perl)
+FIND_PACKAGE(Doxygen)
+
+#
+# Do we have all necessary dependencies?
+#
+IF(NOT PERL_FOUND)
+  MESSAGE(FATAL_ERROR
+    "Could not find a perl installation which is required for building the documentation"
+    )
+ENDIF()
+IF(NOT DOXYGEN_FOUND)
+  MESSAGE(FATAL_ERROR
+    "Could not find doxygen which is required for building the documentation"
+    )
+ENDIF()
+
+########################################################################
+#
+# Process the tutorial and code-gallery files into inputs for doxygen
+#
+########################################################################
+
+ADD_SUBDIRECTORY(tutorial)
+ADD_SUBDIRECTORY(code-gallery)
+
+
+########################################################################
+#
+# Set up all of the other input pieces we want to give to doxygen
+#
+########################################################################
+
+#
+# Prepare auxiliary files for doxygen:
+#
+IF(DEAL_II_DOXYGEN_USE_MATHJAX)
+  SET(_use_mathjax YES)
+ELSE()
+  SET(_use_mathjax NO)
+ENDIF()
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/options.dox.in
+  ${CMAKE_CURRENT_BINARY_DIR}/options.dox
+  )
+
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/scripts/mod_header.pl.in
+  ${CMAKE_CURRENT_BINARY_DIR}/scripts/mod_header.pl
+  )
+
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/scripts/mod_footer.pl.in
+  ${CMAKE_CURRENT_BINARY_DIR}/scripts/mod_footer.pl
+  )
+
+
+#
+# Generate header, footer and style files for doxygen
+#
+ADD_CUSTOM_COMMAND(
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/header.html
+    ${CMAKE_CURRENT_BINARY_DIR}/footer.html
+  COMMAND ${DOXYGEN_EXECUTABLE} -w html header.html footer.html style.css options.dox
+  COMMAND ${PERL_EXECUTABLE} -pi~ ${CMAKE_CURRENT_BINARY_DIR}/scripts/mod_header.pl header.html
+  COMMAND ${PERL_EXECUTABLE} -pi~ ${CMAKE_CURRENT_BINARY_DIR}/scripts/mod_footer.pl footer.html
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/options.dox
+    ${CMAKE_CURRENT_BINARY_DIR}/scripts/mod_header.pl
+    ${CMAKE_CURRENT_BINARY_DIR}/scripts/mod_footer.pl
+  )
+
+#
+# Finalize the doxygen configuration:
+#
+
+SET(_doxygen_input
+  ${CMAKE_CURRENT_SOURCE_DIR}/headers/
+)
+
+LIST(APPEND _doxygen_input
+  ${CMAKE_SOURCE_DIR}/include/
+  ${CMAKE_SOURCE_DIR}/source/
+  ${CMAKE_BINARY_DIR}/include/
+  ${CMAKE_SOURCE_DIR}/doc/news/
+  ${CMAKE_CURRENT_BINARY_DIR}/tutorial/tutorial.h
+  ${CMAKE_SOURCE_DIR}/contrib/parameter_gui/
+  )
+
+# Add other directories in which to find images
+SET(_doxygen_image_path
+  ${CMAKE_CURRENT_SOURCE_DIR}/images
+  ${CMAKE_SOURCE_DIR}/contrib/parameter_gui/images
+  )
+
+file(GLOB _doxygen_depend
+  ${CMAKE_CURRENT_SOURCE_DIR}/headers/*.h
+  ${CMAKE_SOURCE_DIR}/contrib/parameter_gui/*.h
+  ${CMAKE_SOURCE_DIR}/contrib/parameter_gui/main.cpp
+  ${CMAKE_SOURCE_DIR}/doc/news/*.h
+  ${CMAKE_SOURCE_DIR}/include/deal.II/**/*.h
+  )
+LIST(APPEND _doxygen_depend
+  ${CMAKE_BINARY_DIR}/include/deal.II/base/config.h
+  ${CMAKE_CURRENT_BINARY_DIR}/tutorial/tutorial.h
+  )
+
+# find all tutorial programs so we can add dependencies as appropriate
+FILE(GLOB _deal_ii_steps
+  ${CMAKE_SOURCE_DIR}/examples/step-*
+  )
+FOREACH(_step ${_deal_ii_steps})
+  GET_FILENAME_COMPONENT(_step "${_step}" NAME)
+  LIST(APPEND _doxygen_depend
+    ${CMAKE_CURRENT_BINARY_DIR}/tutorial/${_step}.h
+    )
+  LIST(APPEND _doxygen_input
+    ${CMAKE_CURRENT_BINARY_DIR}/tutorial/${_step}.h
+    )
+ENDFOREACH()
+
+# Also find all code gallery programs (if available) for the same reason.
+# The logic here follows the same as in code-gallery/CMakeLists.txt
+SET_IF_EMPTY(DEAL_II_CODE_GALLERY_DIRECTORY ${CMAKE_SOURCE_DIR}/code-gallery)
+IF (EXISTS ${DEAL_II_CODE_GALLERY_DIRECTORY}/README.md)
+  FILE(GLOB _code_gallery_names
+       "${DEAL_II_CODE_GALLERY_DIRECTORY}/*/doc/author")
+  STRING(REGEX REPLACE "/+doc/+author" "" _code_gallery_names "${_code_gallery_names}")
+
+  FOREACH(_step ${_code_gallery_names})
+    GET_FILENAME_COMPONENT(_step "${_step}" NAME)
+
+    LIST(APPEND _doxygen_depend
+      ${CMAKE_CURRENT_BINARY_DIR}/code-gallery/${_step}.h
+      )
+    LIST(APPEND _doxygen_input
+      ${CMAKE_CURRENT_BINARY_DIR}/code-gallery/${_step}.h
+      )
+  ENDFOREACH()
+ENDIF()
+
+
+# always make the doxygen run depend on the code-gallery.h file
+# (whether generated from the code gallery or copied from
+# no-code-gallery.h; both happen in code-gallery/CMakeLists.txt)
+LIST(APPEND _doxygen_input
+  ${CMAKE_CURRENT_BINARY_DIR}/code-gallery/code-gallery.h
+)
+LIST(APPEND _doxygen_depend
+  ${CMAKE_CURRENT_BINARY_DIR}/code-gallery/code-gallery.h
+)
+
+
+TO_STRING(_doxygen_image_path_string ${_doxygen_image_path})
+TO_STRING(_doxygen_input_string ${_doxygen_input})
+
+FILE(APPEND "${CMAKE_CURRENT_BINARY_DIR}/options.dox"
+  "
+  INPUT=${_doxygen_input_string}
+  IMAGE_PATH=${_doxygen_image_path_string}
+  "
+  )
+
+########################################################################
+#
+# And, finally, call doxygen:
+#
+########################################################################
+
+ADD_CUSTOM_COMMAND(
+  OUTPUT
+    ${CMAKE_BINARY_DIR}/doxygen.log
+  COMMAND ${DOXYGEN_EXECUTABLE}
+    ${CMAKE_CURRENT_BINARY_DIR}/options.dox
+    > ${CMAKE_BINARY_DIR}/doxygen.log 2>&1 # *pssst*
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS
+    tutorial
+    code-gallery
+    ${CMAKE_CURRENT_BINARY_DIR}/options.dox
+    ${CMAKE_CURRENT_BINARY_DIR}/header.html
+    ${CMAKE_CURRENT_BINARY_DIR}/footer.html
+    ${CMAKE_CURRENT_SOURCE_DIR}/DoxygenLayout.xml
+    ${_doxygen_depend}
+  COMMENT "Generating documentation via doxygen."
+  VERBATIM
+  )
+ADD_CUSTOM_TARGET(doxygen ALL
+  DEPENDS ${CMAKE_BINARY_DIR}/doxygen.log
+  )
+ADD_DEPENDENCIES(documentation doxygen)
+
+INSTALL(FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/deal.tag
+  DESTINATION ${DEAL_II_DOCHTML_RELDIR}/doxygen
+  COMPONENT documentation
+  )
+
+INSTALL(FILES
+  ${CMAKE_SOURCE_DIR}/doc/deal.ico
+  DESTINATION ${DEAL_II_DOCHTML_RELDIR}/doxygen/deal.II
+  COMPONENT documentation
+  )
+
+INSTALL(DIRECTORY
+  ${CMAKE_CURRENT_BINARY_DIR}/deal.II
+  DESTINATION ${DEAL_II_DOCHTML_RELDIR}/doxygen
+  COMPONENT documentation
+  )
+
diff --git a/doc/doxygen/DoxygenLayout.xml b/doc/doxygen/DoxygenLayout.xml
new file mode 100644
index 0000000..c2ba417
--- /dev/null
+++ b/doc/doxygen/DoxygenLayout.xml
@@ -0,0 +1,197 @@
+<doxygenlayout version="1.0">
+  <!-- Generated by doxygen 1.8.6 -->
+  <!-- Navigation index tabs for HTML output -->
+  <navindex>
+    <tab type="mainpage" visible="yes" title=""/>
+    <tab type="user" visible="yes" title="Tutorial" url="@ref Tutorial" />
+    <tab type="user" visible="yes" title="Code gallery" url="@ref CodeGallery" />
+    <tab type="modules" visible="yes" title="" intro=""/>
+    <tab type="namespaces" visible="yes" title="">
+      <tab type="namespacelist" visible="yes" title="" intro=""/>
+      <tab type="namespacemembers" visible="yes" title="" intro=""/>
+    </tab>
+    <tab type="classes" visible="yes" title="">
+      <tab type="classlist" visible="yes" title="" intro=""/>
+      <tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/> 
+      <tab type="hierarchy" visible="yes" title="" intro=""/>
+      <tab type="classmembers" visible="yes" title="" intro=""/>
+    </tab>
+    <tab type="pages" visible="yes" title="" intro=""/>
+    <tab type="files" visible="yes" title="">
+      <tab type="filelist" visible="yes" title="" intro=""/>
+      <tab type="globals" visible="yes" title="" intro=""/>
+    </tab>
+    <tab type="examples" visible="yes" title="" intro=""/>  
+    <tab type="user" visible="yes" title="dealii.org" url="http://www.dealii.org/" />
+  </navindex>
+
+  <!-- Layout definition for a class page -->
+  <class>
+    <briefdescription visible="yes"/>
+    <includes visible="$SHOW_INCLUDE_FILES"/>
+    <inheritancegraph visible="$CLASS_GRAPH"/>
+    <collaborationgraph visible="$COLLABORATION_GRAPH"/>
+    <memberdecl>
+      <nestedclasses visible="yes" title=""/>
+      <publictypes title=""/>
+      <services title=""/>
+      <interfaces title=""/>
+      <publicslots title=""/>
+      <signals title=""/>
+      <publicmethods title=""/>
+      <publicstaticmethods title=""/>
+      <publicattributes title=""/>
+      <publicstaticattributes title=""/>
+      <protectedtypes title=""/>
+      <protectedslots title=""/>
+      <protectedmethods title=""/>
+      <protectedstaticmethods title=""/>
+      <protectedattributes title=""/>
+      <protectedstaticattributes title=""/>
+      <packagetypes title=""/>
+      <packagemethods title=""/>
+      <packagestaticmethods title=""/>
+      <packageattributes title=""/>
+      <packagestaticattributes title=""/>
+      <properties title=""/>
+      <events title=""/>
+      <privatetypes title=""/>
+      <privateslots title=""/>
+      <privatemethods title=""/>
+      <privatestaticmethods title=""/>
+      <privateattributes title=""/>
+      <privatestaticattributes title=""/>
+      <friends title=""/>
+      <related title="" subtitle=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <inlineclasses title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <services title=""/>
+      <interfaces title=""/>
+      <constructors title=""/>
+      <functions title=""/>
+      <related title=""/>
+      <variables title=""/>
+      <properties title=""/>
+      <events title=""/>
+    </memberdef>
+    <allmemberslink visible="yes"/>
+    <usedfiles visible="$SHOW_USED_FILES"/>
+    <authorsection visible="yes"/>
+  </class>
+
+  <!-- Layout definition for a namespace page -->
+  <namespace>
+    <briefdescription visible="yes"/>
+    <memberdecl>
+      <nestednamespaces visible="yes" title=""/>
+      <constantgroups visible="yes" title=""/>
+      <classes visible="yes" title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <inlineclasses title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+    </memberdef>
+    <authorsection visible="yes"/>
+  </namespace>
+
+  <!-- Layout definition for a file page -->
+  <file>
+    <briefdescription visible="yes"/>
+    <includes visible="$SHOW_INCLUDE_FILES"/>
+    <includegraph visible="$INCLUDE_GRAPH"/>
+    <includedbygraph visible="$INCLUDED_BY_GRAPH"/>
+    <sourcelink visible="yes"/>
+    <memberdecl>
+      <classes visible="yes" title=""/>
+      <namespaces visible="yes" title=""/>
+      <constantgroups visible="yes" title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <inlineclasses title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+    </memberdef>
+    <authorsection/>
+  </file>
+
+  <!-- Layout definition for a group page -->
+  <group>
+    <briefdescription visible="yes"/>
+    <groupgraph visible="$GROUP_GRAPHS"/>
+    <memberdecl>
+      <nestedgroups visible="yes" title=""/>
+      <dirs visible="yes" title=""/>
+      <files visible="yes" title=""/>
+      <namespaces visible="yes" title=""/>
+      <classes visible="yes" title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <enumvalues title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <signals title=""/>
+      <publicslots title=""/>
+      <protectedslots title=""/>
+      <privateslots title=""/>
+      <events title=""/>
+      <properties title=""/>
+      <friends title=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <pagedocs/>
+      <inlineclasses title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <enumvalues title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <signals title=""/>
+      <publicslots title=""/>
+      <protectedslots title=""/>
+      <privateslots title=""/>
+      <events title=""/>
+      <properties title=""/>
+      <friends title=""/>
+    </memberdef>
+    <authorsection visible="yes"/>
+  </group>
+
+  <!-- Layout definition for a directory page -->
+  <directory>
+    <briefdescription visible="yes"/>
+    <directorygraph visible="yes"/>
+    <memberdecl>
+      <dirs visible="yes"/>
+      <files visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+  </directory>
+</doxygenlayout>
diff --git a/doc/doxygen/code-gallery/CMakeLists.txt b/doc/doxygen/code-gallery/CMakeLists.txt
new file mode 100644
index 0000000..9c30487
--- /dev/null
+++ b/doc/doxygen/code-gallery/CMakeLists.txt
@@ -0,0 +1,149 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2015 - 2016 by the deal.II Authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+
+#
+# A target for the preparation of all the stuff happening in here...
+#
+
+ADD_CUSTOM_TARGET(code-gallery)
+
+
+#
+# Check whether someone has either specified a global variable that
+# points to a code gallery directory, or whether it has been put into
+# a top-level code-gallery/ directory alongside the tutorial/
+# directory
+#
+SET_IF_EMPTY(DEAL_II_CODE_GALLERY_DIRECTORY ${CMAKE_SOURCE_DIR}/code-gallery)
+IF (EXISTS ${DEAL_II_CODE_GALLERY_DIRECTORY}/README.md)
+
+  MESSAGE(STATUS "Setting up code gallery documentation from ${DEAL_II_CODE_GALLERY_DIRECTORY}.")
+
+  # Collect the names of all code gallery projects. To
+  # do so, find all 'author' files, then strip the last two
+  # levels of these paths.
+  #
+  # For unclear reasons, the glob returns these files as
+  # "/a/b/c/name//doc//author", so make sure we eat the
+  # double slashes in the second step
+  FILE(GLOB _code_gallery_names
+       "${DEAL_II_CODE_GALLERY_DIRECTORY}/*/doc/author")
+  STRING(REGEX REPLACE "/+doc/+author" "" _code_gallery_names "${_code_gallery_names}")
+
+  FOREACH(_step ${_code_gallery_names})
+    GET_FILENAME_COMPONENT(_step "${_step}" NAME)
+    LIST(APPEND _code_gallery_names_sans_dir "${_step}")
+  ENDFOREACH()
+
+  # Describe how to build code-gallery.h:
+  ADD_CUSTOM_COMMAND(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/code-gallery.h
+    COMMAND ${PERL_EXECUTABLE}
+    ARGS
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/code-gallery.pl
+      ${CMAKE_CURRENT_SOURCE_DIR}/code-gallery.h.in
+      ${DEAL_II_CODE_GALLERY_DIRECTORY}
+      ${_code_gallery_names_sans_dir}
+      > ${CMAKE_CURRENT_BINARY_DIR}/code-gallery.h
+    DEPENDS
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/code-gallery.pl
+      ${CMAKE_CURRENT_SOURCE_DIR}/code-gallery.h.in
+      ${_code_gallery_names}
+    )
+  ADD_CUSTOM_TARGET(build_code-gallery_h
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/code-gallery.h)
+  ADD_DEPENDENCIES(code-gallery build_code-gallery_h)
+
+
+  # Now set up targets for each of the code gallery programs
+  FOREACH(_step ${_code_gallery_names})
+    GET_FILENAME_COMPONENT(_step "${_step}" NAME)
+    MESSAGE(STATUS "  Setting up ${_step}")
+
+    # Get all source files so we can let the perl script work on
+    # them and so we properly describe the dependencies. exclude
+    # meta-files necessary to describe each code gallery project
+    FILE(GLOB_RECURSE _src_files
+         ${DEAL_II_CODE_GALLERY_DIRECTORY}/${_step}/*)
+    STRING(REPLACE "${DEAL_II_CODE_GALLERY_DIRECTORY}/${_step}/" "" _relative_src_files
+           "${_src_files}")
+    LIST(REMOVE_ITEM _relative_src_files doc/author)
+    LIST(REMOVE_ITEM _relative_src_files doc/tooltip)
+    LIST(REMOVE_ITEM _relative_src_files doc/dependencies)
+    LIST(REMOVE_ITEM _relative_src_files doc/builds-on)
+
+    ADD_CUSTOM_COMMAND(
+      OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_step}.h
+      COMMAND ${PERL_EXECUTABLE}
+      ARGS
+        ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/make_gallery.pl
+        ${CMAKE_SOURCE_DIR} 
+        ${_step} 
+        ${DEAL_II_CODE_GALLERY_DIRECTORY}/${_step}
+        ${_relative_src_files} 
+        > ${CMAKE_CURRENT_BINARY_DIR}/${_step}.h
+      WORKING_DIRECTORY
+        ${CMAKE_CURRENT_BINARY_DIR}
+      DEPENDS
+        ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/make_gallery.pl
+        ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/program2doxygen
+        ${_src_files}
+      )
+
+    # Copy files of interest (non-metadata) to the build directory
+    # so we can link to them, and schedule them for installation
+    FILE(COPY ${DEAL_II_CODE_GALLERY_DIRECTORY}/${_step}
+         DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/
+         PATTERN REGEX "doc/tooltip|doc/dependencies|doc/builds-on" EXCLUDE)
+    INSTALL(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${_step}
+            DESTINATION ${DEAL_II_DOCHTML_RELDIR}/doxygen/code-gallery
+            COMPONENT documentation
+           )
+
+    # Create a target for this program and add it to the top-level
+    # target of this directory
+    ADD_CUSTOM_TARGET(code-gallery_${_step}
+      DEPENDS
+        ${CMAKE_CURRENT_BINARY_DIR}/${_step}.h
+      )
+    ADD_DEPENDENCIES(code-gallery code-gallery_${_step})
+
+
+  ENDFOREACH()
+
+ELSE()
+
+  # no copy of the code gallery is available. say so. but also
+  # install a file that creates a doxygen page we can link to
+  # nonetheless, so we don't get bad doxygen references
+  MESSAGE(STATUS "Setting up code gallery documentation.")
+  MESSAGE(STATUS "  Skipping as no code gallery exists in ${DEAL_II_CODE_GALLERY_DIRECTORY}.")
+
+  ADD_CUSTOM_COMMAND(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/code-gallery.h
+    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/no-code-gallery.h ${CMAKE_CURRENT_BINARY_DIR}/code-gallery.h
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/no-code-gallery.h
+	)
+
+  # Make the custom command for code-gallery.h visible to the parent CMakeLists.txt by attaching to the code-gallery
+  # custom target:
+  ADD_CUSTOM_TARGET(build_code-gallery_h
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/code-gallery.h)
+  ADD_DEPENDENCIES(code-gallery build_code-gallery_h)
+
+ENDIF()
+
diff --git a/doc/doxygen/code-gallery/code-gallery.h.in b/doc/doxygen/code-gallery/code-gallery.h.in
new file mode 100644
index 0000000..13d5b69
--- /dev/null
+++ b/doc/doxygen/code-gallery/code-gallery.h.in
@@ -0,0 +1,40 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @page CodeGallery The deal.II code gallery
+ *
+ * The deal.II code gallery contains a collection of programs based on deal.II
+ * that were contributed by others to serve as starting points for more complex
+ * applications. The code gallery is an extension of the @ref Tutorial "tutorial"
+ * in that the programs are intended to show how applications can be
+ * implemented with deal.II, but without the requirement to have these code
+ * documented at the same, extensive level as used in the tutorial.
+ * Instructions for obtaining the code gallery programs can be found at
+ * http://dealii.org/code-gallery.html .
+ *
+ * @warning The programs that form part of the code gallery are contributed by
+ *   others and are not part of deal.II itself. The deal.II authors make
+ *   no assurances that these programs are documented in any reasonable way,
+ *   nor that the programs are in fact correct. Please contact the authors
+ *   of the code gallery programs if you have questions.
+ *
+ * The code gallery currently consists of the following programs (with
+ * connections to other programs shown in the graph on the
+ * @ref TutorialConnectionGraph "tutorial page"):
+ *
+@@GALLERY_LIST@@
+ */
diff --git a/doc/doxygen/code-gallery/no-code-gallery.h b/doc/doxygen/code-gallery/no-code-gallery.h
new file mode 100644
index 0000000..4d93ccd
--- /dev/null
+++ b/doc/doxygen/code-gallery/no-code-gallery.h
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @page CodeGallery The deal.II code gallery
+ *
+ * In order to create pages for the code gallery, there needs to be a
+ * version of the code gallery checked out alongside the other
+ * top-level directories in the <code>deal.II/</code> directory. No
+ * such copy was found when configuring your version of deal.II.
+ *
+ * Instructions for obtaining the code gallery can be found at
+ * http://dealii.org/code-gallery.html .
+ */
diff --git a/doc/doxygen/deal.II/images/step-1.grid-1.png b/doc/doxygen/deal.II/images/step-1.grid-1.png
new file mode 100644
index 0000000..e7edc25
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-1.grid-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-1.grid-2.png b/doc/doxygen/deal.II/images/step-1.grid-2.png
new file mode 100644
index 0000000..8dc1d31
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-1.grid-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-1.grid-2r2.png b/doc/doxygen/deal.II/images/step-1.grid-2r2.png
new file mode 100644
index 0000000..7024ebd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-1.grid-2r2.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.ball_mapping_q1_ref0.png b/doc/doxygen/deal.II/images/step-10.ball_mapping_q1_ref0.png
new file mode 100644
index 0000000..37bfb68
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.ball_mapping_q1_ref0.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.ball_mapping_q1_ref1.png b/doc/doxygen/deal.II/images/step-10.ball_mapping_q1_ref1.png
new file mode 100644
index 0000000..132fe88
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.ball_mapping_q1_ref1.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.ball_mapping_q2_ref0.png b/doc/doxygen/deal.II/images/step-10.ball_mapping_q2_ref0.png
new file mode 100644
index 0000000..c46928c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.ball_mapping_q2_ref0.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.ball_mapping_q2_ref1.png b/doc/doxygen/deal.II/images/step-10.ball_mapping_q2_ref1.png
new file mode 100644
index 0000000..34287d0
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.ball_mapping_q2_ref1.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.ball_mapping_q3_ref0.png b/doc/doxygen/deal.II/images/step-10.ball_mapping_q3_ref0.png
new file mode 100644
index 0000000..8b28048
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.ball_mapping_q3_ref0.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.ball_mapping_q3_ref1.png b/doc/doxygen/deal.II/images/step-10.ball_mapping_q3_ref1.png
new file mode 100644
index 0000000..4de0229
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.ball_mapping_q3_ref1.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.quarter-q1.png b/doc/doxygen/deal.II/images/step-10.quarter-q1.png
new file mode 100644
index 0000000..c41bd99
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.quarter-q1.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.quarter-q2.png b/doc/doxygen/deal.II/images/step-10.quarter-q2.png
new file mode 100644
index 0000000..b72444e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.quarter-q2.png differ
diff --git a/doc/doxygen/deal.II/images/step-10.quarter-q3.png b/doc/doxygen/deal.II/images/step-10.quarter-q3.png
new file mode 100644
index 0000000..013117e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-10.quarter-q3.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.3d-solution.png b/doc/doxygen/deal.II/images/step-12.3d-solution.png
new file mode 100644
index 0000000..5520d74
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.3d-solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.cg.sol-0.png b/doc/doxygen/deal.II/images/step-12.cg.sol-0.png
new file mode 100644
index 0000000..e7c7afc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.cg.sol-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.cg.sol-1.png b/doc/doxygen/deal.II/images/step-12.cg.sol-1.png
new file mode 100644
index 0000000..5be106b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.cg.sol-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.cg.sol-2.png b/doc/doxygen/deal.II/images/step-12.cg.sol-2.png
new file mode 100644
index 0000000..39c8fc4
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.cg.sol-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.cg.sol-3.png b/doc/doxygen/deal.II/images/step-12.cg.sol-3.png
new file mode 100644
index 0000000..d52f8e8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.cg.sol-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.cg.sol-4.png b/doc/doxygen/deal.II/images/step-12.cg.sol-4.png
new file mode 100644
index 0000000..f5c3079
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.cg.sol-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.cg.sol-5.png b/doc/doxygen/deal.II/images/step-12.cg.sol-5.png
new file mode 100644
index 0000000..182462b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.cg.sol-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.grid-5.png b/doc/doxygen/deal.II/images/step-12.grid-5.png
new file mode 100644
index 0000000..a2383b9
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.grid-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.sol-0.png b/doc/doxygen/deal.II/images/step-12.sol-0.png
new file mode 100644
index 0000000..16ae3fb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.sol-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.sol-2.png b/doc/doxygen/deal.II/images/step-12.sol-2.png
new file mode 100644
index 0000000..081f3af
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.sol-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.sol-5-3d.png b/doc/doxygen/deal.II/images/step-12.sol-5-3d.png
new file mode 100644
index 0000000..6f80120
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.sol-5-3d.png differ
diff --git a/doc/doxygen/deal.II/images/step-12.sol-5.png b/doc/doxygen/deal.II/images/step-12.sol-5.png
new file mode 100644
index 0000000..d9cfd87
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-12.sol-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.error.png b/doc/doxygen/deal.II/images/step-13.error.png
new file mode 100644
index 0000000..3752c0d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.error.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.grid-kelly-8.png b/doc/doxygen/deal.II/images/step-13.grid-kelly-8.png
new file mode 100644
index 0000000..0e36185
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.grid-kelly-8.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-0.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-0.png
new file mode 100644
index 0000000..f993cb7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-1.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-1.png
new file mode 100644
index 0000000..f657aca
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-2.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-2.png
new file mode 100644
index 0000000..18c5e74
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-3.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-3.png
new file mode 100644
index 0000000..241a58d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-4.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-4.png
new file mode 100644
index 0000000..9c7b797
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-5.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-5.png
new file mode 100644
index 0000000..ad841b8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-6.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-6.png
new file mode 100644
index 0000000..fc0d2cd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-6.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-7.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-7.png
new file mode 100644
index 0000000..1701b06
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-8.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-8.png
new file mode 100644
index 0000000..22ec43e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-8.png differ
diff --git a/doc/doxygen/deal.II/images/step-13.solution-kelly-9.png b/doc/doxygen/deal.II/images/step-13.solution-kelly-9.png
new file mode 100644
index 0000000..d5ac791
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-13.solution-kelly-9.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.error-estimation.png b/doc/doxygen/deal.II/images/step-14.point-derivative.error-estimation.png
new file mode 100644
index 0000000..f9d87a2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.error-estimation.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.error.png b/doc/doxygen/deal.II/images/step-14.point-derivative.error.png
new file mode 100644
index 0000000..8195cf8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.error.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.grid-0.png b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-0.png
new file mode 100644
index 0000000..b6c9a00
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.grid-5.png b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-5.png
new file mode 100644
index 0000000..4d7842e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.grid-6.png b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-6.png
new file mode 100644
index 0000000..2e9574b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-6.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.grid-7.png b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-7.png
new file mode 100644
index 0000000..bfc8943
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.grid-8.png b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-8.png
new file mode 100644
index 0000000..8dc4427
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-8.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.grid-9.png b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-9.png
new file mode 100644
index 0000000..5de3f40
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.grid-9.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-derivative.solution-5-dual.png b/doc/doxygen/deal.II/images/step-14.point-derivative.solution-5-dual.png
new file mode 100644
index 0000000..29f7640
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-derivative.solution-5-dual.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.error-comparison.png b/doc/doxygen/deal.II/images/step-14.point-value.error-comparison.png
new file mode 100644
index 0000000..627e084
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.error-comparison.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.error-estimation.png b/doc/doxygen/deal.II/images/step-14.point-value.error-estimation.png
new file mode 100644
index 0000000..5f41e34
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.error-estimation.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.error.png b/doc/doxygen/deal.II/images/step-14.point-value.error.png
new file mode 100644
index 0000000..9f48c91
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.error.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.grid-0.png b/doc/doxygen/deal.II/images/step-14.point-value.grid-0.png
new file mode 100644
index 0000000..b6c9a00
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.grid-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.grid-2.png b/doc/doxygen/deal.II/images/step-14.point-value.grid-2.png
new file mode 100644
index 0000000..61d9036
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.grid-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.grid-4.png b/doc/doxygen/deal.II/images/step-14.point-value.grid-4.png
new file mode 100644
index 0000000..14669eb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.grid-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.grid-5.png b/doc/doxygen/deal.II/images/step-14.point-value.grid-5.png
new file mode 100644
index 0000000..f53aa60
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.grid-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.grid-7.png b/doc/doxygen/deal.II/images/step-14.point-value.grid-7.png
new file mode 100644
index 0000000..1807fda
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.grid-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.grid-8.png b/doc/doxygen/deal.II/images/step-14.point-value.grid-8.png
new file mode 100644
index 0000000..c40d61f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.grid-8.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.solution-5-dual.png b/doc/doxygen/deal.II/images/step-14.point-value.solution-5-dual.png
new file mode 100644
index 0000000..b6b7f51
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.solution-5-dual.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.point-value.solution-5.png b/doc/doxygen/deal.II/images/step-14.point-value.solution-5.png
new file mode 100644
index 0000000..f002b72
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.point-value.solution-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.step-13.error-comparison.png b/doc/doxygen/deal.II/images/step-14.step-13.error-comparison.png
new file mode 100644
index 0000000..5f02de6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.step-13.error-comparison.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.step-13.grid-10.png b/doc/doxygen/deal.II/images/step-14.step-13.grid-10.png
new file mode 100644
index 0000000..eedfcda
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.step-13.grid-10.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.step-13.grid-9.png b/doc/doxygen/deal.II/images/step-14.step-13.grid-9.png
new file mode 100644
index 0000000..67f5f12
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.step-13.grid-9.png differ
diff --git a/doc/doxygen/deal.II/images/step-14.step-13.solution-7.png b/doc/doxygen/deal.II/images/step-14.step-13.solution-7.png
new file mode 100644
index 0000000..c3e637f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-14.step-13.solution-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.grid.png b/doc/doxygen/deal.II/images/step-15.grid.png
new file mode 100644
index 0000000..e7df8ef
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.grid.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-0.png b/doc/doxygen/deal.II/images/step-15.solution-0.png
new file mode 100644
index 0000000..e259bbd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-1.png b/doc/doxygen/deal.II/images/step-15.solution-1.png
new file mode 100644
index 0000000..ed17392
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-2.png b/doc/doxygen/deal.II/images/step-15.solution-2.png
new file mode 100644
index 0000000..a5fd11c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-3.png b/doc/doxygen/deal.II/images/step-15.solution-3.png
new file mode 100644
index 0000000..9521a33
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-4.png b/doc/doxygen/deal.II/images/step-15.solution-4.png
new file mode 100644
index 0000000..4e0f376
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-5.png b/doc/doxygen/deal.II/images/step-15.solution-5.png
new file mode 100644
index 0000000..2011553
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-6.png b/doc/doxygen/deal.II/images/step-15.solution-6.png
new file mode 100644
index 0000000..d2c858b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-6.png differ
diff --git a/doc/doxygen/deal.II/images/step-15.solution-7.png b/doc/doxygen/deal.II/images/step-15.solution-7.png
new file mode 100644
index 0000000..3a7a0f0
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-15.solution-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-16.solution.png b/doc/doxygen/deal.II/images/step-16.solution.png
new file mode 100644
index 0000000..ac8440a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-16.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-17.12-grid.png b/doc/doxygen/deal.II/images/step-17.12-grid.png
new file mode 100644
index 0000000..3150eda
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-17.12-grid.png differ
diff --git a/doc/doxygen/deal.II/images/step-17.12-partition.png b/doc/doxygen/deal.II/images/step-17.12-partition.png
new file mode 100644
index 0000000..a496ec2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-17.12-partition.png differ
diff --git a/doc/doxygen/deal.II/images/step-17.12-ux.png b/doc/doxygen/deal.II/images/step-17.12-ux.png
new file mode 100644
index 0000000..7dfb0b2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-17.12-ux.png differ
diff --git a/doc/doxygen/deal.II/images/step-17.12-uy.png b/doc/doxygen/deal.II/images/step-17.12-uy.png
new file mode 100644
index 0000000..1b759fd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-17.12-uy.png differ
diff --git a/doc/doxygen/deal.II/images/step-17.4-3d-partition.png b/doc/doxygen/deal.II/images/step-17.4-3d-partition.png
new file mode 100644
index 0000000..45b25f1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-17.4-3d-partition.png differ
diff --git a/doc/doxygen/deal.II/images/step-17.4-3d-ux.png b/doc/doxygen/deal.II/images/step-17.4-3d-ux.png
new file mode 100644
index 0000000..a4bd3b1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-17.4-3d-ux.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-0002.p.png b/doc/doxygen/deal.II/images/step-18.parallel-0002.p.png
new file mode 100644
index 0000000..69ab3cc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-0002.p.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-0002.s.png b/doc/doxygen/deal.II/images/step-18.parallel-0002.s.png
new file mode 100644
index 0000000..d1618ea
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-0002.s.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-0005.s.png b/doc/doxygen/deal.II/images/step-18.parallel-0005.s.png
new file mode 100644
index 0000000..2b240f3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-0005.s.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-0007.s.png b/doc/doxygen/deal.II/images/step-18.parallel-0007.s.png
new file mode 100644
index 0000000..dfcf9fc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-0007.s.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-0008.s.png b/doc/doxygen/deal.II/images/step-18.parallel-0008.s.png
new file mode 100644
index 0000000..c87f944
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-0008.s.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-0009.s.png b/doc/doxygen/deal.II/images/step-18.parallel-0009.s.png
new file mode 100644
index 0000000..983d86d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-0009.s.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-000mesh.png b/doc/doxygen/deal.II/images/step-18.parallel-000mesh.png
new file mode 100644
index 0000000..426a2ba
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-000mesh.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.parallel-0010.s.png b/doc/doxygen/deal.II/images/step-18.parallel-0010.s.png
new file mode 100644
index 0000000..d39ca4b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.parallel-0010.s.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.sequential-0002.0000.png b/doc/doxygen/deal.II/images/step-18.sequential-0002.0000.png
new file mode 100644
index 0000000..3fbdc77
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.sequential-0002.0000.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.sequential-0005.0000.png b/doc/doxygen/deal.II/images/step-18.sequential-0005.0000.png
new file mode 100644
index 0000000..df8662d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.sequential-0005.0000.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.sequential-0007.0000.png b/doc/doxygen/deal.II/images/step-18.sequential-0007.0000.png
new file mode 100644
index 0000000..e531014
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.sequential-0007.0000.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.sequential-0008.0000.png b/doc/doxygen/deal.II/images/step-18.sequential-0008.0000.png
new file mode 100644
index 0000000..331620f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.sequential-0008.0000.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.sequential-0009.0000.png b/doc/doxygen/deal.II/images/step-18.sequential-0009.0000.png
new file mode 100644
index 0000000..a7e2a93
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.sequential-0009.0000.png differ
diff --git a/doc/doxygen/deal.II/images/step-18.sequential-0010.0000.png b/doc/doxygen/deal.II/images/step-18.sequential-0010.0000.png
new file mode 100644
index 0000000..b3a6072
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-18.sequential-0010.0000.png differ
diff --git a/doc/doxygen/deal.II/images/step-19.solution-0005.png b/doc/doxygen/deal.II/images/step-19.solution-0005.png
new file mode 100644
index 0000000..f9d9b5f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-19.solution-0005.png differ
diff --git a/doc/doxygen/deal.II/images/step-2.sparsity-1.png b/doc/doxygen/deal.II/images/step-2.sparsity-1.png
new file mode 100644
index 0000000..364651e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-2.sparsity-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-2.sparsity-1.svg b/doc/doxygen/deal.II/images/step-2.sparsity-1.svg
new file mode 100644
index 0000000..232cc6a
--- /dev/null
+++ b/doc/doxygen/deal.II/images/step-2.sparsity-1.svg
@@ -0,0 +1,1183 @@
+<svg xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 152 152 ">
+<style type="text/css" >
+     <![CDATA[
+      rect.pixel {
+          fill:   #ff0000;
+      }
+    ]]>
+  </style>
+
+   <rect width="152" height="152" fill="rgb(128, 128, 128)"/>
+   <rect x="1" y="1" width="150" height="150" fill="rgb(255, 255, 255)"/>
+
+  <rect class="pixel" x="1" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="150" width=".9" height=".9"/>
+</svg>
diff --git a/doc/doxygen/deal.II/images/step-2.sparsity-2.png b/doc/doxygen/deal.II/images/step-2.sparsity-2.png
new file mode 100644
index 0000000..50e7754
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-2.sparsity-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-2.sparsity-2.svg b/doc/doxygen/deal.II/images/step-2.sparsity-2.svg
new file mode 100644
index 0000000..01ebb93
--- /dev/null
+++ b/doc/doxygen/deal.II/images/step-2.sparsity-2.svg
@@ -0,0 +1,1183 @@
+<svg xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 152 152 ">
+<style type="text/css" >
+     <![CDATA[
+      rect.pixel {
+          fill:   #ff0000;
+      }
+    ]]>
+  </style>
+
+   <rect width="152" height="152" fill="rgb(128, 128, 128)"/>
+   <rect x="1" y="1" width="150" height="150" fill="rgb(255, 255, 255)"/>
+
+  <rect class="pixel" x="1" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="1" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="2" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="3" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="4" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="5" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="1" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="6" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="7" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="8" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="9" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="10" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="11" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="12" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="2" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="13" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="3" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="14" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="15" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="16" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="9" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="17" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="5" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="18" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="19" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="20" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="4" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="6" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="12" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="21" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="22" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="23" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="24" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="25" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="26" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="27" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="28" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="29" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="30" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="31" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="32" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="33" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="34" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="35" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="26" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="36" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="16" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="37" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="27" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="38" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="17" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="39" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="28" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="40" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="41" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="42" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="43" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="44" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="31" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="45" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="20" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="46" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="32" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="47" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="15" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="21" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="33" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="48" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="7" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="49" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="8" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="50" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="10" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="18" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="51" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="13" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="52" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="53" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="11" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="14" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="19" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="54" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="55" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="56" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="57" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="58" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="59" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="60" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="35" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="60" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="61" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="36" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="61" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="62" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="37" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="62" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="63" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="38" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="63" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="64" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="39" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="64" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="65" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="40" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="65" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="66" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="67" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="68" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="69" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="70" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="71" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="72" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="73" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="74" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="43" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="74" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="75" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="44" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="75" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="76" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="45" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="76" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="77" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="46" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="77" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="78" width=".9" height=".9"/>
+  <rect class="pixel" x="79" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="34" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="47" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="48" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="59" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="78" y="79" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="29" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="41" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="66" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="80" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="51" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="81" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="68" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="82" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="83" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="84" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="85" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="86" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="30" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="42" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="54" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="71" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="73" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="87" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="22" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="88" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="23" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="89" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="24" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="52" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="90" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="49" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="91" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="92" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="25" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="50" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="53" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="93" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="94" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="95" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="96" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="67" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="80" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="97" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="81" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="97" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="98" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="82" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="98" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="99" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="100" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="101" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="102" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="103" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="104" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="105" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="106" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="107" width=".9" height=".9"/>
+  <rect class="pixel" x="108" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="72" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="85" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="86" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="87" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="107" y="108" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="69" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="83" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="99" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="109" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="90" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="110" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="101" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="111" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="112" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="113" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="114" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="115" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="70" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="84" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="93" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="104" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="106" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="116" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="55" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="56" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="94" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="117" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="57" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="91" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="118" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="88" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="119" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="120" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="58" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="89" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="92" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="121" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="100" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="109" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="122" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="110" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="122" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="123" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="111" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="123" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="124" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="125" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="126" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="127" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="128" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="129" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="130" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="131" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="132" width=".9" height=".9"/>
+  <rect class="pixel" x="133" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="105" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="114" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="115" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="116" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="132" y="133" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="102" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="112" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="124" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="134" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="118" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="135" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="126" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="136" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="137" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="138" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="139" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="140" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="103" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="113" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="121" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="129" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="131" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="141" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="95" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="96" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="117" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="119" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="120" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="142" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="125" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="134" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="143" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="135" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="143" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="144" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="136" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="144" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="145" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="146" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="147" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="148" width=".9" height=".9"/>
+  <rect class="pixel" x="149" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="130" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="139" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="140" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="141" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="148" y="149" width=".9" height=".9"/>
+  <rect class="pixel" x="150" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="127" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="128" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="137" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="138" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="142" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="145" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="146" y="150" width=".9" height=".9"/>
+  <rect class="pixel" x="147" y="150" width=".9" height=".9"/>
+</svg>
diff --git a/doc/doxygen/deal.II/images/step-20.k-random.png b/doc/doxygen/deal.II/images/step-20.k-random.png
new file mode 100644
index 0000000..b9858cc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.k-random.png differ
diff --git a/doc/doxygen/deal.II/images/step-20.p-random.png b/doc/doxygen/deal.II/images/step-20.p-random.png
new file mode 100644
index 0000000..af0677e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.p-random.png differ
diff --git a/doc/doxygen/deal.II/images/step-20.p.png b/doc/doxygen/deal.II/images/step-20.p.png
new file mode 100644
index 0000000..d6802ed
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.p.png differ
diff --git a/doc/doxygen/deal.II/images/step-20.u-random.png b/doc/doxygen/deal.II/images/step-20.u-random.png
new file mode 100644
index 0000000..0492c40
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.u-random.png differ
diff --git a/doc/doxygen/deal.II/images/step-20.u-wiggle.png b/doc/doxygen/deal.II/images/step-20.u-wiggle.png
new file mode 100644
index 0000000..e1b5bf1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.u-wiggle.png differ
diff --git a/doc/doxygen/deal.II/images/step-20.u.png b/doc/doxygen/deal.II/images/step-20.u.png
new file mode 100644
index 0000000..5549c25
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.u.png differ
diff --git a/doc/doxygen/deal.II/images/step-20.v-wiggle.png b/doc/doxygen/deal.II/images/step-20.v-wiggle.png
new file mode 100644
index 0000000..815483a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.v-wiggle.png differ
diff --git a/doc/doxygen/deal.II/images/step-20.v.png b/doc/doxygen/deal.II/images/step-20.v.png
new file mode 100644
index 0000000..d3a23d3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-20.v.png differ
diff --git a/doc/doxygen/deal.II/images/step-21.centerline.gif b/doc/doxygen/deal.II/images/step-21.centerline.gif
new file mode 100644
index 0000000..d7bc81e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-21.centerline.gif differ
diff --git a/doc/doxygen/deal.II/images/step-21.random2d.gif b/doc/doxygen/deal.II/images/step-21.random2d.gif
new file mode 100644
index 0000000..c5d025e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-21.random2d.gif differ
diff --git a/doc/doxygen/deal.II/images/step-21.random3d.gif b/doc/doxygen/deal.II/images/step-21.random3d.gif
new file mode 100644
index 0000000..94d8fb8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-21.random3d.gif differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.mesh-0.png b/doc/doxygen/deal.II/images/step-22.2d.mesh-0.png
new file mode 100644
index 0000000..ddf20d8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.mesh-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.mesh-1.png b/doc/doxygen/deal.II/images/step-22.2d.mesh-1.png
new file mode 100644
index 0000000..c651723
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.mesh-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.mesh-2.png b/doc/doxygen/deal.II/images/step-22.2d.mesh-2.png
new file mode 100644
index 0000000..b4619e2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.mesh-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.mesh-3.png b/doc/doxygen/deal.II/images/step-22.2d.mesh-3.png
new file mode 100644
index 0000000..fd8c36e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.mesh-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.mesh-4.png b/doc/doxygen/deal.II/images/step-22.2d.mesh-4.png
new file mode 100644
index 0000000..0aaf74b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.mesh-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.mesh-5.png b/doc/doxygen/deal.II/images/step-22.2d.mesh-5.png
new file mode 100644
index 0000000..357bc6a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.mesh-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.solution.png b/doc/doxygen/deal.II/images/step-22.2d.solution.png
new file mode 100644
index 0000000..f21edb5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.sparsity-nor.png b/doc/doxygen/deal.II/images/step-22.2d.sparsity-nor.png
new file mode 100644
index 0000000..6df2b17
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.sparsity-nor.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.2d.sparsity-ren.png b/doc/doxygen/deal.II/images/step-22.2d.sparsity-ren.png
new file mode 100644
index 0000000..8ceb12e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.2d.sparsity-ren.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d-extension.png b/doc/doxygen/deal.II/images/step-22.3d-extension.png
new file mode 100644
index 0000000..186fc41
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d-extension.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d-grid-extension.png b/doc/doxygen/deal.II/images/step-22.3d-grid-extension.png
new file mode 100644
index 0000000..04d9398
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d-grid-extension.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.mesh-0.png b/doc/doxygen/deal.II/images/step-22.3d.mesh-0.png
new file mode 100644
index 0000000..b0e918a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.mesh-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.mesh-1.png b/doc/doxygen/deal.II/images/step-22.3d.mesh-1.png
new file mode 100644
index 0000000..48244e7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.mesh-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.mesh-2.png b/doc/doxygen/deal.II/images/step-22.3d.mesh-2.png
new file mode 100644
index 0000000..e24523a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.mesh-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.mesh-3.png b/doc/doxygen/deal.II/images/step-22.3d.mesh-3.png
new file mode 100644
index 0000000..76533a5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.mesh-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.mesh-4.png b/doc/doxygen/deal.II/images/step-22.3d.mesh-4.png
new file mode 100644
index 0000000..3636de6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.mesh-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.mesh-5.png b/doc/doxygen/deal.II/images/step-22.3d.mesh-5.png
new file mode 100644
index 0000000..4d98dd7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.mesh-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.solution.png b/doc/doxygen/deal.II/images/step-22.3d.solution.png
new file mode 100644
index 0000000..86c7a55
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.3d.sparsity_uu-ren.png b/doc/doxygen/deal.II/images/step-22.3d.sparsity_uu-ren.png
new file mode 100644
index 0000000..7df5e59
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.3d.sparsity_uu-ren.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.profile-3.original.png b/doc/doxygen/deal.II/images/step-22.profile-3.original.png
new file mode 100644
index 0000000..1bea1c4
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.profile-3.original.png differ
diff --git a/doc/doxygen/deal.II/images/step-22.profile-3.png b/doc/doxygen/deal.II/images/step-22.profile-3.png
new file mode 100644
index 0000000..7aaa57d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-22.profile-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-23.movie.gif b/doc/doxygen/deal.II/images/step-23.movie.gif
new file mode 100644
index 0000000..0ed0afa
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-23.movie.gif differ
diff --git a/doc/doxygen/deal.II/images/step-24.multi.png b/doc/doxygen/deal.II/images/step-24.multi.png
new file mode 100644
index 0000000..509201d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.multi.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.multi_movie.gif b/doc/doxygen/deal.II/images/step-24.multi_movie.gif
new file mode 100644
index 0000000..c922c67
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.multi_movie.gif differ
diff --git a/doc/doxygen/deal.II/images/step-24.multi_s.png b/doc/doxygen/deal.II/images/step-24.multi_s.png
new file mode 100644
index 0000000..9bbd989
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.multi_s.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.multi_s2.png b/doc/doxygen/deal.II/images/step-24.multi_s2.png
new file mode 100644
index 0000000..77ddcf5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.multi_s2.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.multi_sf.png b/doc/doxygen/deal.II/images/step-24.multi_sf.png
new file mode 100644
index 0000000..55b0c09
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.multi_sf.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.one.png b/doc/doxygen/deal.II/images/step-24.one.png
new file mode 100644
index 0000000..f63dac9
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.one.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.one_movie.gif b/doc/doxygen/deal.II/images/step-24.one_movie.gif
new file mode 100644
index 0000000..edccbbf
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.one_movie.gif differ
diff --git a/doc/doxygen/deal.II/images/step-24.one_s.png b/doc/doxygen/deal.II/images/step-24.one_s.png
new file mode 100644
index 0000000..934bf6a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.one_s.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.one_s2.png b/doc/doxygen/deal.II/images/step-24.one_s2.png
new file mode 100644
index 0000000..784fb79
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.one_s2.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.one_sf.png b/doc/doxygen/deal.II/images/step-24.one_sf.png
new file mode 100644
index 0000000..f94482d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.one_sf.png differ
diff --git a/doc/doxygen/deal.II/images/step-24.traces.png b/doc/doxygen/deal.II/images/step-24.traces.png
new file mode 100644
index 0000000..b57091f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-24.traces.png differ
diff --git a/doc/doxygen/deal.II/images/step-25.1d-breather.gif b/doc/doxygen/deal.II/images/step-25.1d-breather.gif
new file mode 100644
index 0000000..63b0980
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.1d-breather.gif differ
diff --git a/doc/doxygen/deal.II/images/step-25.1d-breather_stp.png b/doc/doxygen/deal.II/images/step-25.1d-breather_stp.png
new file mode 100644
index 0000000..09a8728
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.1d-breather_stp.png differ
diff --git a/doc/doxygen/deal.II/images/step-25.2d-angled_kink.gif b/doc/doxygen/deal.II/images/step-25.2d-angled_kink.gif
new file mode 100644
index 0000000..5fe6b63
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.2d-angled_kink.gif differ
diff --git a/doc/doxygen/deal.II/images/step-25.2d-kink.gif b/doc/doxygen/deal.II/images/step-25.2d-kink.gif
new file mode 100644
index 0000000..d2df430
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.2d-kink.gif differ
diff --git a/doc/doxygen/deal.II/images/step-25.2d-kink.png b/doc/doxygen/deal.II/images/step-25.2d-kink.png
new file mode 100644
index 0000000..2782629
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.2d-kink.png differ
diff --git a/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.1.png b/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.1.png
new file mode 100644
index 0000000..2059fb3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.2.png b/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.2.png
new file mode 100644
index 0000000..b5c66dd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.2.png differ
diff --git a/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.gif b/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.gif
new file mode 100644
index 0000000..966335a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-25.2d-pseudobreather.gif differ
diff --git a/doc/doxygen/deal.II/images/step-26.movie.gif b/doc/doxygen/deal.II/images/step-26.movie.gif
new file mode 100644
index 0000000..101f077
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-26.movie.gif differ
diff --git a/doc/doxygen/deal.II/images/step-26.surface.png b/doc/doxygen/deal.II/images/step-26.surface.png
new file mode 100644
index 0000000..8502bbc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-26.surface.png differ
diff --git a/doc/doxygen/deal.II/images/step-26.volume.png b/doc/doxygen/deal.II/images/step-26.volume.png
new file mode 100644
index 0000000..37d843f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-26.volume.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.fe_degree-0.png b/doc/doxygen/deal.II/images/step-27.fe_degree-0.png
new file mode 100644
index 0000000..c650243
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.fe_degree-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.fe_degree-1.png b/doc/doxygen/deal.II/images/step-27.fe_degree-1.png
new file mode 100644
index 0000000..6bbda6f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.fe_degree-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.fe_degree-2.png b/doc/doxygen/deal.II/images/step-27.fe_degree-2.png
new file mode 100644
index 0000000..57eba26
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.fe_degree-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.fe_degree-3.png b/doc/doxygen/deal.II/images/step-27.fe_degree-3.png
new file mode 100644
index 0000000..bfa7a42
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.fe_degree-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.fe_degree-4.png b/doc/doxygen/deal.II/images/step-27.fe_degree-4.png
new file mode 100644
index 0000000..fe086b0
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.fe_degree-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.fe_degree-5.png b/doc/doxygen/deal.II/images/step-27.fe_degree-5.png
new file mode 100644
index 0000000..e05df76
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.fe_degree-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.mesh-0.png b/doc/doxygen/deal.II/images/step-27.mesh-0.png
new file mode 100644
index 0000000..f53953a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.mesh-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.mesh-1.png b/doc/doxygen/deal.II/images/step-27.mesh-1.png
new file mode 100644
index 0000000..1e86419
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.mesh-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.mesh-2.png b/doc/doxygen/deal.II/images/step-27.mesh-2.png
new file mode 100644
index 0000000..92da56d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.mesh-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.mesh-3.png b/doc/doxygen/deal.II/images/step-27.mesh-3.png
new file mode 100644
index 0000000..525a68c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.mesh-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.mesh-4.png b/doc/doxygen/deal.II/images/step-27.mesh-4.png
new file mode 100644
index 0000000..bfc9d66
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.mesh-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.mesh-5.png b/doc/doxygen/deal.II/images/step-27.mesh-5.png
new file mode 100644
index 0000000..37fd476
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.mesh-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.smoothness-0.png b/doc/doxygen/deal.II/images/step-27.smoothness-0.png
new file mode 100644
index 0000000..3dd1154
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.smoothness-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.smoothness-1.png b/doc/doxygen/deal.II/images/step-27.smoothness-1.png
new file mode 100644
index 0000000..7114778
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.smoothness-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.smoothness-2.png b/doc/doxygen/deal.II/images/step-27.smoothness-2.png
new file mode 100644
index 0000000..621077d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.smoothness-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.smoothness-3.png b/doc/doxygen/deal.II/images/step-27.smoothness-3.png
new file mode 100644
index 0000000..a55a03b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.smoothness-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.smoothness-4.png b/doc/doxygen/deal.II/images/step-27.smoothness-4.png
new file mode 100644
index 0000000..4117b20
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.smoothness-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.smoothness-5.png b/doc/doxygen/deal.II/images/step-27.smoothness-5.png
new file mode 100644
index 0000000..0f0ab78
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.smoothness-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-27.solution.png b/doc/doxygen/deal.II/images/step-27.solution.png
new file mode 100644
index 0000000..317aa88
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-27.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-28.convergence.png b/doc/doxygen/deal.II/images/step-28.convergence.png
new file mode 100644
index 0000000..65619bf
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-28.convergence.png differ
diff --git a/doc/doxygen/deal.II/images/step-28.error-vs-dofs.png b/doc/doxygen/deal.II/images/step-28.error-vs-dofs.png
new file mode 100644
index 0000000..0240f37
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-28.error-vs-dofs.png differ
diff --git a/doc/doxygen/deal.II/images/step-28.error-vs-time.png b/doc/doxygen/deal.II/images/step-28.error-vs-time.png
new file mode 100644
index 0000000..f3f8924
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-28.error-vs-time.png differ
diff --git a/doc/doxygen/deal.II/images/step-28.grid-0.9.order2.png b/doc/doxygen/deal.II/images/step-28.grid-0.9.order2.png
new file mode 100644
index 0000000..439a916
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-28.grid-0.9.order2.png differ
diff --git a/doc/doxygen/deal.II/images/step-28.grid-1.9.order2.png b/doc/doxygen/deal.II/images/step-28.grid-1.9.order2.png
new file mode 100644
index 0000000..7212932
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-28.grid-1.9.order2.png differ
diff --git a/doc/doxygen/deal.II/images/step-28.solution-0.9.order2.png b/doc/doxygen/deal.II/images/step-28.solution-0.9.order2.png
new file mode 100644
index 0000000..a01e238
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-28.solution-0.9.order2.png differ
diff --git a/doc/doxygen/deal.II/images/step-28.solution-1.9.order2.png b/doc/doxygen/deal.II/images/step-28.solution-1.9.order2.png
new file mode 100644
index 0000000..aa54318
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-28.solution-1.9.order2.png differ
diff --git a/doc/doxygen/deal.II/images/step-29.contours.png b/doc/doxygen/deal.II/images/step-29.contours.png
new file mode 100644
index 0000000..251091a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-29.contours.png differ
diff --git a/doc/doxygen/deal.II/images/step-29.intensity.png b/doc/doxygen/deal.II/images/step-29.intensity.png
new file mode 100644
index 0000000..c6d9609
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-29.intensity.png differ
diff --git a/doc/doxygen/deal.II/images/step-29.surface.png b/doc/doxygen/deal.II/images/step-29.surface.png
new file mode 100644
index 0000000..32b0a19
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-29.surface.png differ
diff --git a/doc/doxygen/deal.II/images/step-29.v.png b/doc/doxygen/deal.II/images/step-29.v.png
new file mode 100644
index 0000000..2b7dc06
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-29.v.png differ
diff --git a/doc/doxygen/deal.II/images/step-29.w.png b/doc/doxygen/deal.II/images/step-29.w.png
new file mode 100644
index 0000000..ad29d9d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-29.w.png differ
diff --git a/doc/doxygen/deal.II/images/step-3.solution-1.png b/doc/doxygen/deal.II/images/step-3.solution-1.png
new file mode 100644
index 0000000..b278161
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-3.solution-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-3.solution-2.png b/doc/doxygen/deal.II/images/step-3.solution-2.png
new file mode 100644
index 0000000..e0c82ad
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-3.solution-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-30.grid-3.aniso.png b/doc/doxygen/deal.II/images/step-30.grid-3.aniso.png
new file mode 100644
index 0000000..89c9aaf
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-30.grid-3.aniso.png differ
diff --git a/doc/doxygen/deal.II/images/step-30.sol-1.aniso.png b/doc/doxygen/deal.II/images/step-30.sol-1.aniso.png
new file mode 100644
index 0000000..5d82ebd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-30.sol-1.aniso.png differ
diff --git a/doc/doxygen/deal.II/images/step-30.sol-1.iso.png b/doc/doxygen/deal.II/images/step-30.sol-1.iso.png
new file mode 100644
index 0000000..1d9e828
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-30.sol-1.iso.png differ
diff --git a/doc/doxygen/deal.II/images/step-30.sol-5.aniso.png b/doc/doxygen/deal.II/images/step-30.sol-5.aniso.png
new file mode 100644
index 0000000..140d092
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-30.sol-5.aniso.png differ
diff --git a/doc/doxygen/deal.II/images/step-30.sol-5.iso.png b/doc/doxygen/deal.II/images/step-30.sol-5.iso.png
new file mode 100644
index 0000000..0174c28
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-30.sol-5.iso.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.00.png b/doc/doxygen/deal.II/images/step-31.2d.grid.00.png
new file mode 100644
index 0000000..03af310
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.00.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.01.png b/doc/doxygen/deal.II/images/step-31.2d.grid.01.png
new file mode 100644
index 0000000..f021152
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.01.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.02.png b/doc/doxygen/deal.II/images/step-31.2d.grid.02.png
new file mode 100644
index 0000000..091d282
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.02.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.03.png b/doc/doxygen/deal.II/images/step-31.2d.grid.03.png
new file mode 100644
index 0000000..99ee500
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.04.png b/doc/doxygen/deal.II/images/step-31.2d.grid.04.png
new file mode 100644
index 0000000..97bd392
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.04.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.05.png b/doc/doxygen/deal.II/images/step-31.2d.grid.05.png
new file mode 100644
index 0000000..93b4b20
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.05.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.06.png b/doc/doxygen/deal.II/images/step-31.2d.grid.06.png
new file mode 100644
index 0000000..d954f86
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.06.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.grid.07.png b/doc/doxygen/deal.II/images/step-31.2d.grid.07.png
new file mode 100644
index 0000000..9e53aa3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.grid.07.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.00.png b/doc/doxygen/deal.II/images/step-31.2d.solution.00.png
new file mode 100644
index 0000000..7275164
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.00.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.01.png b/doc/doxygen/deal.II/images/step-31.2d.solution.01.png
new file mode 100644
index 0000000..0c25403
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.01.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.02.png b/doc/doxygen/deal.II/images/step-31.2d.solution.02.png
new file mode 100644
index 0000000..b27d0e6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.02.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.03.png b/doc/doxygen/deal.II/images/step-31.2d.solution.03.png
new file mode 100644
index 0000000..2f8cd67
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.04.png b/doc/doxygen/deal.II/images/step-31.2d.solution.04.png
new file mode 100644
index 0000000..8c62608
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.04.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.05.png b/doc/doxygen/deal.II/images/step-31.2d.solution.05.png
new file mode 100644
index 0000000..51c83c3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.05.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.06.png b/doc/doxygen/deal.II/images/step-31.2d.solution.06.png
new file mode 100644
index 0000000..c73b385
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.06.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.2d.solution.07.png b/doc/doxygen/deal.II/images/step-31.2d.solution.07.png
new file mode 100644
index 0000000..1ce8fdd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.2d.solution.07.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.00.png b/doc/doxygen/deal.II/images/step-31.3d.solution.00.png
new file mode 100644
index 0000000..68d0e08
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.00.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.01.png b/doc/doxygen/deal.II/images/step-31.3d.solution.01.png
new file mode 100644
index 0000000..b0d571b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.01.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.02.png b/doc/doxygen/deal.II/images/step-31.3d.solution.02.png
new file mode 100644
index 0000000..bd9a473
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.02.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.03.png b/doc/doxygen/deal.II/images/step-31.3d.solution.03.png
new file mode 100644
index 0000000..99ae0f4
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.04.png b/doc/doxygen/deal.II/images/step-31.3d.solution.04.png
new file mode 100644
index 0000000..64c7c60
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.04.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.05.png b/doc/doxygen/deal.II/images/step-31.3d.solution.05.png
new file mode 100644
index 0000000..8a88da8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.05.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.06.png b/doc/doxygen/deal.II/images/step-31.3d.solution.06.png
new file mode 100644
index 0000000..85595e3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.06.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.07.png b/doc/doxygen/deal.II/images/step-31.3d.solution.07.png
new file mode 100644
index 0000000..ae850cf
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.07.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.08.png b/doc/doxygen/deal.II/images/step-31.3d.solution.08.png
new file mode 100644
index 0000000..29cf7b7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.08.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.09.png b/doc/doxygen/deal.II/images/step-31.3d.solution.09.png
new file mode 100644
index 0000000..bc5fa63
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.09.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.3d.solution.10.png b/doc/doxygen/deal.II/images/step-31.3d.solution.10.png
new file mode 100644
index 0000000..4d18f51
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.3d.solution.10.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.01.png b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.01.png
new file mode 100644
index 0000000..2eae67f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.01.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.03.png b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.03.png
new file mode 100644
index 0000000..7bda4fa
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.1.png b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.1.png
new file mode 100644
index 0000000..702bcf9
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.5.png b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.5.png
new file mode 100644
index 0000000..1cff898
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.timestep.q1.beta=0.5.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.01.png b/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.01.png
new file mode 100644
index 0000000..4e58fcd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.01.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.03.png b/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.03.png
new file mode 100644
index 0000000..9517f80
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.1.png b/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.1.png
new file mode 100644
index 0000000..0c56926
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-31.timestep.q2.beta=0.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d-initial.png b/doc/doxygen/deal.II/images/step-32.2d-initial.png
new file mode 100644
index 0000000..312ca24
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d-initial.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.grid.2100.png b/doc/doxygen/deal.II/images/step-32.2d.grid.2100.png
new file mode 100644
index 0000000..ffa5bf7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.grid.2100.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.partition.2100.png b/doc/doxygen/deal.II/images/step-32.2d.partition.2100.png
new file mode 100644
index 0000000..4199c68
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.partition.2100.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.t_vs_vmax.png b/doc/doxygen/deal.II/images/step-32.2d.t_vs_vmax.png
new file mode 100644
index 0000000..34c2d28
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.t_vs_vmax.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0000.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0000.png
new file mode 100644
index 0000000..be461bd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0000.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0100.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0100.png
new file mode 100644
index 0000000..42c67b3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0100.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0200.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0200.png
new file mode 100644
index 0000000..80b11d1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0200.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0300.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0300.png
new file mode 100644
index 0000000..9d01075
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0300.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0400.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0400.png
new file mode 100644
index 0000000..27820e5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0400.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0500.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0500.png
new file mode 100644
index 0000000..2e017d3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0500.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0600.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0600.png
new file mode 100644
index 0000000..7df31de
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0600.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0700.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0700.png
new file mode 100644
index 0000000..a6a923c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0700.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0800.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0800.png
new file mode 100644
index 0000000..558b941
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0800.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.0900.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.0900.png
new file mode 100644
index 0000000..b28d46c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.0900.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1000.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1000.png
new file mode 100644
index 0000000..deeb2d1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1000.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1100.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1100.png
new file mode 100644
index 0000000..86b922c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1100.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1200.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1200.png
new file mode 100644
index 0000000..0580858
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1200.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1300.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1300.png
new file mode 100644
index 0000000..f4ce086
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1300.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1400.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1400.png
new file mode 100644
index 0000000..4b3bf93
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1400.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1500.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1500.png
new file mode 100644
index 0000000..8b9f7bb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1500.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1600.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1600.png
new file mode 100644
index 0000000..c8ca268
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1600.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1700.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1700.png
new file mode 100644
index 0000000..d29c757
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1700.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1800.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1800.png
new file mode 100644
index 0000000..1f1eaed
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1800.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.1900.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.1900.png
new file mode 100644
index 0000000..f95b93e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.1900.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.2000.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.2000.png
new file mode 100644
index 0000000..2a5b3fc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.2000.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.2d.temperature.2100.png b/doc/doxygen/deal.II/images/step-32.2d.temperature.2100.png
new file mode 100644
index 0000000..c0c5413
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.2d.temperature.2100.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.3d-sphere.partition.png b/doc/doxygen/deal.II/images/step-32.3d-sphere.partition.png
new file mode 100644
index 0000000..8f3636b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.3d-sphere.partition.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.3d-sphere.solution.png b/doc/doxygen/deal.II/images/step-32.3d-sphere.solution.png
new file mode 100644
index 0000000..6a8e796
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.3d-sphere.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.3d.cube.0.png b/doc/doxygen/deal.II/images/step-32.3d.cube.0.png
new file mode 100644
index 0000000..38d61ad
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.3d.cube.0.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.3d.cube.1.png b/doc/doxygen/deal.II/images/step-32.3d.cube.1.png
new file mode 100644
index 0000000..f347f1d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.3d.cube.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.beta.2d.png b/doc/doxygen/deal.II/images/step-32.beta.2d.png
new file mode 100644
index 0000000..afeb806
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.beta.2d.png differ
diff --git a/doc/doxygen/deal.II/images/step-32.beta_cr.2d.png b/doc/doxygen/deal.II/images/step-32.beta_cr.2d.png
new file mode 100644
index 0000000..56cac1f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-32.beta_cr.2d.png differ
diff --git a/doc/doxygen/deal.II/images/step-33.oscillation.gif b/doc/doxygen/deal.II/images/step-33.oscillation.gif
new file mode 100644
index 0000000..c17e80b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-33.oscillation.gif differ
diff --git a/doc/doxygen/deal.II/images/step-33.slide.adapt.ed2.gif b/doc/doxygen/deal.II/images/step-33.slide.adapt.ed2.gif
new file mode 100644
index 0000000..eb69ae8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-33.slide.adapt.ed2.gif differ
diff --git a/doc/doxygen/deal.II/images/step-33.slide.ed2.gif b/doc/doxygen/deal.II/images/step-33.slide.ed2.gif
new file mode 100644
index 0000000..5d59939
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-33.slide.ed2.gif differ
diff --git a/doc/doxygen/deal.II/images/step-33.slide.gif b/doc/doxygen/deal.II/images/step-33.slide.gif
new file mode 100644
index 0000000..1b35b6b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-33.slide.gif differ
diff --git a/doc/doxygen/deal.II/images/step-33.slide_adapt.gif b/doc/doxygen/deal.II/images/step-33.slide_adapt.gif
new file mode 100644
index 0000000..afefcac
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-33.slide_adapt.gif differ
diff --git a/doc/doxygen/deal.II/images/step-34_2d.png b/doc/doxygen/deal.II/images/step-34_2d.png
new file mode 100644
index 0000000..879a746
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-34_2d.png differ
diff --git a/doc/doxygen/deal.II/images/step-34_3d-2.png b/doc/doxygen/deal.II/images/step-34_3d-2.png
new file mode 100644
index 0000000..cbbadc6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-34_3d-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-34_3d.png b/doc/doxygen/deal.II/images/step-34_3d.png
new file mode 100644
index 0000000..ea18666
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-34_3d.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.velocity.0.png b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.0.png
new file mode 100644
index 0000000..1925724
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.0.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.velocity.1.png b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.1.png
new file mode 100644
index 0000000..8a8a0ac
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.velocity.2.png b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.2.png
new file mode 100644
index 0000000..dcd6653
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.2.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.velocity.3.png b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.3.png
new file mode 100644
index 0000000..ca4c69a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.3.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.velocity.4.png b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.4.png
new file mode 100644
index 0000000..1dd22a5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.velocity.4.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.0.png b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.0.png
new file mode 100644
index 0000000..7345da8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.0.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.1.png b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.1.png
new file mode 100644
index 0000000..f21824c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.2.png b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.2.png
new file mode 100644
index 0000000..908f795
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.2.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.3.png b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.3.png
new file mode 100644
index 0000000..7307df6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.3.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.4.png b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.4.png
new file mode 100644
index 0000000..6b571d7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_100.vorticity.4.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_500.velocity.0.png b/doc/doxygen/deal.II/images/step-35.Re_500.velocity.0.png
new file mode 100644
index 0000000..94bc72c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_500.velocity.0.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_500.velocity.1.png b/doc/doxygen/deal.II/images/step-35.Re_500.velocity.1.png
new file mode 100644
index 0000000..59ac23a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_500.velocity.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_500.vorticity.0.png b/doc/doxygen/deal.II/images/step-35.Re_500.vorticity.0.png
new file mode 100644
index 0000000..714cdfb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_500.vorticity.0.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_500.vorticity.1.png b/doc/doxygen/deal.II/images/step-35.Re_500.vorticity.1.png
new file mode 100644
index 0000000..b1905f0
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_500.vorticity.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_500.zoom.png b/doc/doxygen/deal.II/images/step-35.Re_500.zoom.png
new file mode 100644
index 0000000..50f0f05
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_500.zoom.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.Re_500.zoom_2.png b/doc/doxygen/deal.II/images/step-35.Re_500.zoom_2.png
new file mode 100644
index 0000000..2c14ccd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.Re_500.zoom_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-35.geometry.png b/doc/doxygen/deal.II/images/step-35.geometry.png
new file mode 100644
index 0000000..2326ae9
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-35.geometry.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.default.eigenfunction.0.png b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.0.png
new file mode 100644
index 0000000..83ed98d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.0.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.default.eigenfunction.1.png b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.1.png
new file mode 100644
index 0000000..378aa78
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.default.eigenfunction.2.png b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.2.png
new file mode 100644
index 0000000..b0b2824
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.2.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.default.eigenfunction.3.png b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.3.png
new file mode 100644
index 0000000..93aa5d8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.3.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.default.eigenfunction.4.png b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.4.png
new file mode 100644
index 0000000..fc2f1d8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.default.eigenfunction.4.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.0.png b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.0.png
new file mode 100644
index 0000000..0257a58
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.0.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.1.png b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.1.png
new file mode 100644
index 0000000..29412ed
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.2.png b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.2.png
new file mode 100644
index 0000000..52ac33a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.2.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.3.png b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.3.png
new file mode 100644
index 0000000..eb30f08
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.3.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.4.png b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.4.png
new file mode 100644
index 0000000..03eb4aa
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.mod.eigenfunction.4.png differ
diff --git a/doc/doxygen/deal.II/images/step-36.mod.potential.png b/doc/doxygen/deal.II/images/step-36.mod.potential.png
new file mode 100644
index 0000000..33974de
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-36.mod.potential.png differ
diff --git a/doc/doxygen/deal.II/images/step-37.solution.png b/doc/doxygen/deal.II/images/step-37.solution.png
new file mode 100644
index 0000000..9a30a53
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-37.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-38.solution-2d.png b/doc/doxygen/deal.II/images/step-38.solution-2d.png
new file mode 100644
index 0000000..732fcac
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-38.solution-2d.png differ
diff --git a/doc/doxygen/deal.II/images/step-38.solution-3d.png b/doc/doxygen/deal.II/images/step-38.solution-3d.png
new file mode 100644
index 0000000..f1a5e87
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-38.solution-3d.png differ
diff --git a/doc/doxygen/deal.II/images/step-38.warp-1.png b/doc/doxygen/deal.II/images/step-38.warp-1.png
new file mode 100644
index 0000000..8e3cc43
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-38.warp-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-38.warp-2.png b/doc/doxygen/deal.II/images/step-38.warp-2.png
new file mode 100644
index 0000000..8df5989
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-38.warp-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-39-convergence.png b/doc/doxygen/deal.II/images/step-39-convergence.png
new file mode 100644
index 0000000..3f85cdf
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-39-convergence.png differ
diff --git a/doc/doxygen/deal.II/images/step-4.contours-3d.png b/doc/doxygen/deal.II/images/step-4.contours-3d.png
new file mode 100644
index 0000000..cc59f3c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-4.contours-3d.png differ
diff --git a/doc/doxygen/deal.II/images/step-4.solution-2d.png b/doc/doxygen/deal.II/images/step-4.solution-2d.png
new file mode 100644
index 0000000..97a6320
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-4.solution-2d.png differ
diff --git a/doc/doxygen/deal.II/images/step-4.solution-3d.png b/doc/doxygen/deal.II/images/step-4.solution-3d.png
new file mode 100644
index 0000000..5abb0e4
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-4.solution-3d.png differ
diff --git a/doc/doxygen/deal.II/images/step-40.256.png b/doc/doxygen/deal.II/images/step-40.256.png
new file mode 100644
index 0000000..e71abac
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-40.256.png differ
diff --git a/doc/doxygen/deal.II/images/step-40.4096.png b/doc/doxygen/deal.II/images/step-40.4096.png
new file mode 100644
index 0000000..c55295f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-40.4096.png differ
diff --git a/doc/doxygen/deal.II/images/step-40.mesh.png b/doc/doxygen/deal.II/images/step-40.mesh.png
new file mode 100644
index 0000000..7668eee
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-40.mesh.png differ
diff --git a/doc/doxygen/deal.II/images/step-40.solution.png b/doc/doxygen/deal.II/images/step-40.solution.png
new file mode 100644
index 0000000..a9e261c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-40.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-40.strong.png b/doc/doxygen/deal.II/images/step-40.strong.png
new file mode 100644
index 0000000..2cae6b5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-40.strong.png differ
diff --git a/doc/doxygen/deal.II/images/step-40.strong2.png b/doc/doxygen/deal.II/images/step-40.strong2.png
new file mode 100644
index 0000000..3b8cdbd
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-40.strong2.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.00.png b/doc/doxygen/deal.II/images/step-41.active-set.00.png
new file mode 100644
index 0000000..33b1e04
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.00.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.03.png b/doc/doxygen/deal.II/images/step-41.active-set.03.png
new file mode 100644
index 0000000..6b05065
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.06.png b/doc/doxygen/deal.II/images/step-41.active-set.06.png
new file mode 100644
index 0000000..9cbcda2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.06.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.09.png b/doc/doxygen/deal.II/images/step-41.active-set.09.png
new file mode 100644
index 0000000..febeb7f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.09.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.12.png b/doc/doxygen/deal.II/images/step-41.active-set.12.png
new file mode 100644
index 0000000..ea614e6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.12.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.15.png b/doc/doxygen/deal.II/images/step-41.active-set.15.png
new file mode 100644
index 0000000..fe8ec86
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.15.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.18.png b/doc/doxygen/deal.II/images/step-41.active-set.18.png
new file mode 100644
index 0000000..29b2929
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.18.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.active-set.png b/doc/doxygen/deal.II/images/step-41.active-set.png
new file mode 100644
index 0000000..6eaaa2c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.active-set.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.00.png b/doc/doxygen/deal.II/images/step-41.displacement.00.png
new file mode 100644
index 0000000..5192fda
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.00.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.03.png b/doc/doxygen/deal.II/images/step-41.displacement.03.png
new file mode 100644
index 0000000..16b72f7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.06.png b/doc/doxygen/deal.II/images/step-41.displacement.06.png
new file mode 100644
index 0000000..439537d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.06.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.09.png b/doc/doxygen/deal.II/images/step-41.displacement.09.png
new file mode 100644
index 0000000..cebf2ad
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.09.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.12.png b/doc/doxygen/deal.II/images/step-41.displacement.12.png
new file mode 100644
index 0000000..f194552
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.12.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.15.png b/doc/doxygen/deal.II/images/step-41.displacement.15.png
new file mode 100644
index 0000000..fce1a33
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.15.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.18.png b/doc/doxygen/deal.II/images/step-41.displacement.18.png
new file mode 100644
index 0000000..95587e0
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.18.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.3d.00.png b/doc/doxygen/deal.II/images/step-41.displacement.3d.00.png
new file mode 100644
index 0000000..d5e6600
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.3d.00.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.3d.03.png b/doc/doxygen/deal.II/images/step-41.displacement.3d.03.png
new file mode 100644
index 0000000..eea99e1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.3d.03.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.3d.06.png b/doc/doxygen/deal.II/images/step-41.displacement.3d.06.png
new file mode 100644
index 0000000..d4f790c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.3d.06.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.3d.09.png b/doc/doxygen/deal.II/images/step-41.displacement.3d.09.png
new file mode 100644
index 0000000..8dfc4b9
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.3d.09.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.3d.12.png b/doc/doxygen/deal.II/images/step-41.displacement.3d.12.png
new file mode 100644
index 0000000..42f788b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.3d.12.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.3d.15.png b/doc/doxygen/deal.II/images/step-41.displacement.3d.15.png
new file mode 100644
index 0000000..9453509
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.3d.15.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.3d.18.png b/doc/doxygen/deal.II/images/step-41.displacement.3d.18.png
new file mode 100644
index 0000000..6e2febe
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.3d.18.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.displacement.png b/doc/doxygen/deal.II/images/step-41.displacement.png
new file mode 100644
index 0000000..2336716
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.displacement.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.forces.01.png b/doc/doxygen/deal.II/images/step-41.forces.01.png
new file mode 100644
index 0000000..46421de
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.forces.01.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.forces.09.png b/doc/doxygen/deal.II/images/step-41.forces.09.png
new file mode 100644
index 0000000..c1b6d87
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.forces.09.png differ
diff --git a/doc/doxygen/deal.II/images/step-41.forces.18.png b/doc/doxygen/deal.II/images/step-41.forces.18.png
new file mode 100644
index 0000000..3c3ed64
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-41.forces.18.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.CellConstitution.png b/doc/doxygen/deal.II/images/step-42.CellConstitution.png
new file mode 100644
index 0000000..be03288
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.CellConstitution.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.CellConstitutionBall.png b/doc/doxygen/deal.II/images/step-42.CellConstitutionBall.png
new file mode 100644
index 0000000..be03288
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.CellConstitutionBall.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.CellConstitutionBall2.png b/doc/doxygen/deal.II/images/step-42.CellConstitutionBall2.png
new file mode 100644
index 0000000..e450980
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.CellConstitutionBall2.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.CellConstitutionColorbar.png b/doc/doxygen/deal.II/images/step-42.CellConstitutionColorbar.png
new file mode 100644
index 0000000..ec9790f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.CellConstitutionColorbar.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.CellConstitutionLi2.png b/doc/doxygen/deal.II/images/step-42.CellConstitutionLi2.png
new file mode 100644
index 0000000..2326e7a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.CellConstitutionLi2.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.adaptive-contact.png b/doc/doxygen/deal.II/images/step-42.adaptive-contact.png
new file mode 100644
index 0000000..fe4cedc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.adaptive-contact.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.character.png b/doc/doxygen/deal.II/images/step-42.character.png
new file mode 100644
index 0000000..0400056
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.character.png differ
diff --git a/doc/doxygen/deal.II/images/step-42.starke-skalierung.png b/doc/doxygen/deal.II/images/step-42.starke-skalierung.png
new file mode 100644
index 0000000..2ef735f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-42.starke-skalierung.png differ
diff --git a/doc/doxygen/deal.II/images/step-43.3d.mesh.png b/doc/doxygen/deal.II/images/step-43.3d.mesh.png
new file mode 100644
index 0000000..9a3494a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-43.3d.mesh.png differ
diff --git a/doc/doxygen/deal.II/images/step-43.3d.saturation.png b/doc/doxygen/deal.II/images/step-43.3d.saturation.png
new file mode 100644
index 0000000..51d1d3e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-43.3d.saturation.png differ
diff --git a/doc/doxygen/deal.II/images/step-43.3d.streamlines.png b/doc/doxygen/deal.II/images/step-43.3d.streamlines.png
new file mode 100644
index 0000000..698660d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-43.3d.streamlines.png differ
diff --git a/doc/doxygen/deal.II/images/step-43.3d.velocity.png b/doc/doxygen/deal.II/images/step-43.3d.velocity.png
new file mode 100644
index 0000000..4d51e65
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-43.3d.velocity.png differ
diff --git a/doc/doxygen/deal.II/images/step-43.spe10.1.png b/doc/doxygen/deal.II/images/step-43.spe10.1.png
new file mode 100644
index 0000000..d97433e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-43.spe10.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-43.spe10.2.png b/doc/doxygen/deal.II/images/step-43.spe10.2.png
new file mode 100644
index 0000000..23f4de5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-43.spe10.2.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.2d-gr_2.png b/doc/doxygen/deal.II/images/step-44.2d-gr_2.png
new file mode 100644
index 0000000..cc32797
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.2d-gr_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.2d-gr_5.png b/doc/doxygen/deal.II/images/step-44.2d-gr_5.png
new file mode 100644
index 0000000..5017218
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.2d-gr_5.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Normalised_runtime.png b/doc/doxygen/deal.II/images/step-44.Normalised_runtime.png
new file mode 100644
index 0000000..5b067c0
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Normalised_runtime.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q1-P0_convergence.png b/doc/doxygen/deal.II/images/step-44.Q1-P0_convergence.png
new file mode 100644
index 0000000..c1fb0ea
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q1-P0_convergence.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-dilatation.png b/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-dilatation.png
new file mode 100644
index 0000000..f230612
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-dilatation.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-displacement.png b/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-displacement.png
new file mode 100644
index 0000000..95b1f48
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-displacement.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-pressure.png b/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-pressure.png
new file mode 100644
index 0000000..12f766c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q1-P0_gr_1_p_ratio_80-pressure.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q2-P1_convergence.png b/doc/doxygen/deal.II/images/step-44.Q2-P1_convergence.png
new file mode 100644
index 0000000..15f0fb3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q2-P1_convergence.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-dilatation.png b/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-dilatation.png
new file mode 100644
index 0000000..1fc7389
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-dilatation.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-displacement.png b/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-displacement.png
new file mode 100644
index 0000000..de5715d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-displacement.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-pressure.png b/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-pressure.png
new file mode 100644
index 0000000..a286d7e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.Q2-P1_gr_3_p_ratio_80-pressure.png differ
diff --git a/doc/doxygen/deal.II/images/step-44.setup.png b/doc/doxygen/deal.II/images/step-44.setup.png
new file mode 100644
index 0000000..d31b495
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-44.setup.png differ
diff --git a/doc/doxygen/deal.II/images/step-45.non_periodic.png b/doc/doxygen/deal.II/images/step-45.non_periodic.png
new file mode 100644
index 0000000..db0d421
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-45.non_periodic.png differ
diff --git a/doc/doxygen/deal.II/images/step-45.periodic.png b/doc/doxygen/deal.II/images/step-45.periodic.png
new file mode 100644
index 0000000..7315753
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-45.periodic.png differ
diff --git a/doc/doxygen/deal.II/images/step-45.periodic_cells.png b/doc/doxygen/deal.II/images/step-45.periodic_cells.png
new file mode 100644
index 0000000..3e19509
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-45.periodic_cells.png differ
diff --git a/doc/doxygen/deal.II/images/step-45.solution.png b/doc/doxygen/deal.II/images/step-45.solution.png
new file mode 100644
index 0000000..d66d77a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-45.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-45_non_periodic.png b/doc/doxygen/deal.II/images/step-45_non_periodic.png
new file mode 100644
index 0000000..534a9e2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-45_non_periodic.png differ
diff --git a/doc/doxygen/deal.II/images/step-45_periodic.png b/doc/doxygen/deal.II/images/step-45_periodic.png
new file mode 100644
index 0000000..5acd01f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-45_periodic.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.3d.displacement.png b/doc/doxygen/deal.II/images/step-46.3d.displacement.png
new file mode 100644
index 0000000..bd0861f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.3d.displacement.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.3d.streamlines.png b/doc/doxygen/deal.II/images/step-46.3d.streamlines.png
new file mode 100644
index 0000000..bab1ec3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.3d.streamlines.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.3d.velocity.png b/doc/doxygen/deal.II/images/step-46.3d.velocity.png
new file mode 100644
index 0000000..b763608
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.3d.velocity.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.displacement.png b/doc/doxygen/deal.II/images/step-46.displacement.png
new file mode 100644
index 0000000..9d8119f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.displacement.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.layout.png b/doc/doxygen/deal.II/images/step-46.layout.png
new file mode 100644
index 0000000..59ea2c2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.layout.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.pressure.png b/doc/doxygen/deal.II/images/step-46.pressure.png
new file mode 100644
index 0000000..59f865c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.pressure.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.velocity-magnitude.png b/doc/doxygen/deal.II/images/step-46.velocity-magnitude.png
new file mode 100644
index 0000000..66c6bda
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.velocity-magnitude.png differ
diff --git a/doc/doxygen/deal.II/images/step-46.velocity.png b/doc/doxygen/deal.II/images/step-46.velocity.png
new file mode 100644
index 0000000..1d61cbb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-46.velocity.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.gmsh_picture.png b/doc/doxygen/deal.II/images/step-49.gmsh_picture.png
new file mode 100644
index 0000000..0e6860b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.gmsh_picture.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-1.png b/doc/doxygen/deal.II/images/step-49.grid-1.png
new file mode 100644
index 0000000..684f168
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-2.png b/doc/doxygen/deal.II/images/step-49.grid-2.png
new file mode 100644
index 0000000..c97fee5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-2a.png b/doc/doxygen/deal.II/images/step-49.grid-2a.png
new file mode 100644
index 0000000..8f62699
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-2a.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-2b.png b/doc/doxygen/deal.II/images/step-49.grid-2b.png
new file mode 100644
index 0000000..e0b8722
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-2b.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-2d-refined.png b/doc/doxygen/deal.II/images/step-49.grid-2d-refined.png
new file mode 100644
index 0000000..55f337e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-2d-refined.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-3.png b/doc/doxygen/deal.II/images/step-49.grid-3.png
new file mode 100644
index 0000000..34f1b34
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-3a.png b/doc/doxygen/deal.II/images/step-49.grid-3a.png
new file mode 100644
index 0000000..f9914ac
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-3a.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-4.png b/doc/doxygen/deal.II/images/step-49.grid-4.png
new file mode 100644
index 0000000..ff8d5ce
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-4base.png b/doc/doxygen/deal.II/images/step-49.grid-4base.png
new file mode 100644
index 0000000..f18de2c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-4base.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-5.png b/doc/doxygen/deal.II/images/step-49.grid-5.png
new file mode 100644
index 0000000..5b0920d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-5a.png b/doc/doxygen/deal.II/images/step-49.grid-5a.png
new file mode 100644
index 0000000..3257a9a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-5a.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-6.png b/doc/doxygen/deal.II/images/step-49.grid-6.png
new file mode 100644
index 0000000..ba416ad
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-6.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-6a.png b/doc/doxygen/deal.II/images/step-49.grid-6a.png
new file mode 100644
index 0000000..34240c3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-6a.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-7.png b/doc/doxygen/deal.II/images/step-49.grid-7.png
new file mode 100644
index 0000000..d56dfe6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.grid-7a.png b/doc/doxygen/deal.II/images/step-49.grid-7a.png
new file mode 100644
index 0000000..96f8f3b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.grid-7a.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.yuhan.1.png b/doc/doxygen/deal.II/images/step-49.yuhan.1.png
new file mode 100644
index 0000000..22b04c5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.yuhan.1.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.yuhan.2.png b/doc/doxygen/deal.II/images/step-49.yuhan.2.png
new file mode 100644
index 0000000..c65c659
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.yuhan.2.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.yuhan.3.png b/doc/doxygen/deal.II/images/step-49.yuhan.3.png
new file mode 100644
index 0000000..b59e259
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.yuhan.3.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.yuhan.4.png b/doc/doxygen/deal.II/images/step-49.yuhan.4.png
new file mode 100644
index 0000000..7e06efa
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.yuhan.4.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.yuhan.5.png b/doc/doxygen/deal.II/images/step-49.yuhan.5.png
new file mode 100644
index 0000000..bd9e2e2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.yuhan.5.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.yuhan.6.png b/doc/doxygen/deal.II/images/step-49.yuhan.6.png
new file mode 100644
index 0000000..7cc279e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.yuhan.6.png differ
diff --git a/doc/doxygen/deal.II/images/step-49.yuhan.7.png b/doc/doxygen/deal.II/images/step-49.yuhan.7.png
new file mode 100644
index 0000000..1998228
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-49.yuhan.7.png differ
diff --git a/doc/doxygen/deal.II/images/step-5.solution-0.png b/doc/doxygen/deal.II/images/step-5.solution-0.png
new file mode 100644
index 0000000..63ed70d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-5.solution-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-5.solution-1.png b/doc/doxygen/deal.II/images/step-5.solution-1.png
new file mode 100644
index 0000000..03f1d15
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-5.solution-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-5.solution-2.png b/doc/doxygen/deal.II/images/step-5.solution-2.png
new file mode 100644
index 0000000..0a4a9a1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-5.solution-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-5.solution-3.png b/doc/doxygen/deal.II/images/step-5.solution-3.png
new file mode 100644
index 0000000..3333499
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-5.solution-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-5.solution-4.png b/doc/doxygen/deal.II/images/step-5.solution-4.png
new file mode 100644
index 0000000..87805a7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-5.solution-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-5.solution-5.png b/doc/doxygen/deal.II/images/step-5.solution-5.png
new file mode 100644
index 0000000..f3f3aac
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-5.solution-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.2d_plain.png b/doc/doxygen/deal.II/images/step-51.2d_plain.png
new file mode 100644
index 0000000..4202ca1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.2d_plain.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.2d_post.png b/doc/doxygen/deal.II/images/step-51.2d_post.png
new file mode 100644
index 0000000..c139f9d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.2d_post.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.2d_postb.png b/doc/doxygen/deal.II/images/step-51.2d_postb.png
new file mode 100644
index 0000000..1ffb972
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.2d_postb.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.2dt_plain.png b/doc/doxygen/deal.II/images/step-51.2dt_plain.png
new file mode 100644
index 0000000..84519c4
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.2dt_plain.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.2dt_post.png b/doc/doxygen/deal.II/images/step-51.2dt_post.png
new file mode 100644
index 0000000..50e4a67
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.2dt_post.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.2dt_postb.png b/doc/doxygen/deal.II/images/step-51.2dt_postb.png
new file mode 100644
index 0000000..0b1e910
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.2dt_postb.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.3d_plain.png b/doc/doxygen/deal.II/images/step-51.3d_plain.png
new file mode 100644
index 0000000..90a1c1b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.3d_plain.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.3d_post.png b/doc/doxygen/deal.II/images/step-51.3d_post.png
new file mode 100644
index 0000000..599a939
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.3d_post.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.3d_postb.png b/doc/doxygen/deal.II/images/step-51.3d_postb.png
new file mode 100644
index 0000000..e96b793
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.3d_postb.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.3dt_plain.png b/doc/doxygen/deal.II/images/step-51.3dt_plain.png
new file mode 100644
index 0000000..03a63dc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.3dt_plain.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.3dt_post.png b/doc/doxygen/deal.II/images/step-51.3dt_post.png
new file mode 100644
index 0000000..d3b15da
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.3dt_post.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.3dt_postb.png b/doc/doxygen/deal.II/images/step-51.3dt_postb.png
new file mode 100644
index 0000000..f17e9bc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.3dt_postb.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.post_2.png b/doc/doxygen/deal.II/images/step-51.post_2.png
new file mode 100644
index 0000000..872d307
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.post_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.post_3.png b/doc/doxygen/deal.II/images/step-51.post_3.png
new file mode 100644
index 0000000..48fc153
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.post_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.post_4.png b/doc/doxygen/deal.II/images/step-51.post_4.png
new file mode 100644
index 0000000..2d05d83
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.post_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.post_8.png b/doc/doxygen/deal.II/images/step-51.post_8.png
new file mode 100644
index 0000000..e0ce3e6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.post_8.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.post_q3_2.png b/doc/doxygen/deal.II/images/step-51.post_q3_2.png
new file mode 100644
index 0000000..97af395
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.post_q3_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.sol_2.png b/doc/doxygen/deal.II/images/step-51.sol_2.png
new file mode 100644
index 0000000..73f7bff
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.sol_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.sol_3.png b/doc/doxygen/deal.II/images/step-51.sol_3.png
new file mode 100644
index 0000000..7d10e3c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.sol_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.sol_4.png b/doc/doxygen/deal.II/images/step-51.sol_4.png
new file mode 100644
index 0000000..e6a76c7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.sol_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.sol_8.png b/doc/doxygen/deal.II/images/step-51.sol_8.png
new file mode 100644
index 0000000..ab97d47
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.sol_8.png differ
diff --git a/doc/doxygen/deal.II/images/step-51.sol_q3_2.png b/doc/doxygen/deal.II/images/step-51.sol_q3_2.png
new file mode 100644
index 0000000..3c6971b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-51.sol_q3_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-53.mesh.png b/doc/doxygen/deal.II/images/step-53.mesh.png
new file mode 100644
index 0000000..71d7821
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-53.mesh.png differ
diff --git a/doc/doxygen/deal.II/images/step-53.smooth-geometry.png b/doc/doxygen/deal.II/images/step-53.smooth-geometry.png
new file mode 100644
index 0000000..e940ebe
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-53.smooth-geometry.png differ
diff --git a/doc/doxygen/deal.II/images/step-53.topo.png b/doc/doxygen/deal.II/images/step-53.topo.png
new file mode 100644
index 0000000..e537f84
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-53.topo.png differ
diff --git a/doc/doxygen/deal.II/images/step-53.topozoom.png b/doc/doxygen/deal.II/images/step-53.topozoom.png
new file mode 100644
index 0000000..ce6511b
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-53.topozoom.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.CurveSplit.png b/doc/doxygen/deal.II/images/step-54.CurveSplit.png
new file mode 100644
index 0000000..8ea7b1f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.CurveSplit.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.DirectionalProjection.png b/doc/doxygen/deal.II/images/step-54.DirectionalProjection.png
new file mode 100644
index 0000000..70fbd3f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.DirectionalProjection.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.NormalProjection.png b/doc/doxygen/deal.II/images/step-54.NormalProjection.png
new file mode 100644
index 0000000..effc0a6
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.NormalProjection.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.NormalProjectionEdge.png b/doc/doxygen/deal.II/images/step-54.NormalProjectionEdge.png
new file mode 100644
index 0000000..3717baa
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.NormalProjectionEdge.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.ProjectionComparisons.png b/doc/doxygen/deal.II/images/step-54.ProjectionComparisons.png
new file mode 100644
index 0000000..7ce4b26
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.ProjectionComparisons.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.bare.png b/doc/doxygen/deal.II/images/step-54.bare.png
new file mode 100644
index 0000000..ca91065
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.bare.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.common_0.png b/doc/doxygen/deal.II/images/step-54.common_0.png
new file mode 100644
index 0000000..3376218
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.common_0.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_1.png b/doc/doxygen/deal.II/images/step-54.directional_1.png
new file mode 100644
index 0000000..c782da8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_1.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_2.png b/doc/doxygen/deal.II/images/step-54.directional_2.png
new file mode 100644
index 0000000..2b7fe5a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_3.png b/doc/doxygen/deal.II/images/step-54.directional_3.png
new file mode 100644
index 0000000..6b44b0c
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_4.png b/doc/doxygen/deal.II/images/step-54.directional_4.png
new file mode 100644
index 0000000..7a1cb73
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_5.png b/doc/doxygen/deal.II/images/step-54.directional_5.png
new file mode 100644
index 0000000..21094bb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_5.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_front_3.png b/doc/doxygen/deal.II/images/step-54.directional_front_3.png
new file mode 100644
index 0000000..81975ad
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_front_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_front_4.png b/doc/doxygen/deal.II/images/step-54.directional_front_4.png
new file mode 100644
index 0000000..8987363
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_front_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.directional_front_5.png b/doc/doxygen/deal.II/images/step-54.directional_front_5.png
new file mode 100644
index 0000000..5807c50
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.directional_front_5.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_1.png b/doc/doxygen/deal.II/images/step-54.normal_1.png
new file mode 100644
index 0000000..9c2a7b3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_1.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_2.png b/doc/doxygen/deal.II/images/step-54.normal_2.png
new file mode 100644
index 0000000..945e2bb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_3.png b/doc/doxygen/deal.II/images/step-54.normal_3.png
new file mode 100644
index 0000000..8ae1c07
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_4.png b/doc/doxygen/deal.II/images/step-54.normal_4.png
new file mode 100644
index 0000000..a85ccd2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_5.png b/doc/doxygen/deal.II/images/step-54.normal_5.png
new file mode 100644
index 0000000..679d264
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_5.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_front_3.png b/doc/doxygen/deal.II/images/step-54.normal_front_3.png
new file mode 100644
index 0000000..1de6be7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_front_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_front_4.png b/doc/doxygen/deal.II/images/step-54.normal_front_4.png
new file mode 100644
index 0000000..cec0300
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_front_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_front_5.png b/doc/doxygen/deal.II/images/step-54.normal_front_5.png
new file mode 100644
index 0000000..8b066e8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_front_5.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_1.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_1.png
new file mode 100644
index 0000000..e813ec7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_1.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_2.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_2.png
new file mode 100644
index 0000000..4796a26
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_2.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_3.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_3.png
new file mode 100644
index 0000000..7d5e3c7
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_4.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_4.png
new file mode 100644
index 0000000..3b65a51
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_5.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_5.png
new file mode 100644
index 0000000..38bf0bc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_5.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_3.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_3.png
new file mode 100644
index 0000000..7ebd53a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_3.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_4.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_4.png
new file mode 100644
index 0000000..50b5b96
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_4.png differ
diff --git a/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_5.png b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_5.png
new file mode 100644
index 0000000..58e8b86
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-54.normal_to_mesh_front_5.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-0.png b/doc/doxygen/deal.II/images/step-6.grid-0.png
new file mode 100644
index 0000000..acb2908
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-1.png b/doc/doxygen/deal.II/images/step-6.grid-1.png
new file mode 100644
index 0000000..2620ac1
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-2.png b/doc/doxygen/deal.II/images/step-6.grid-2.png
new file mode 100644
index 0000000..eed733f
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-3.png b/doc/doxygen/deal.II/images/step-6.grid-3.png
new file mode 100644
index 0000000..fb729be
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-4.png b/doc/doxygen/deal.II/images/step-6.grid-4.png
new file mode 100644
index 0000000..4e97a09
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-5.png b/doc/doxygen/deal.II/images/step-6.grid-5.png
new file mode 100644
index 0000000..fc17a70
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-6.png b/doc/doxygen/deal.II/images/step-6.grid-6.png
new file mode 100644
index 0000000..d2c8e9d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-6.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.grid-7.png b/doc/doxygen/deal.II/images/step-6.grid-7.png
new file mode 100644
index 0000000..512dafb
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.grid-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-0.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-0.png
new file mode 100644
index 0000000..ba40dc2
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-0.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-1.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-1.png
new file mode 100644
index 0000000..1d4210d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-1.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-2.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-2.png
new file mode 100644
index 0000000..7a1c62a
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-2.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-3.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-3.png
new file mode 100644
index 0000000..6bc7824
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-3.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-4-bad.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-4-bad.png
new file mode 100644
index 0000000..d4fcd12
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-4-bad.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-4.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-4.png
new file mode 100644
index 0000000..47345e5
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-4.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-5.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-5.png
new file mode 100644
index 0000000..db827ca
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-5.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-6.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-6.png
new file mode 100644
index 0000000..31b2d42
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-6.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.manifold-grid-7.png b/doc/doxygen/deal.II/images/step-6.manifold-grid-7.png
new file mode 100644
index 0000000..10365ba
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.manifold-grid-7.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.q1.dofs_vs_iterations.png b/doc/doxygen/deal.II/images/step-6.q1.dofs_vs_iterations.png
new file mode 100644
index 0000000..717f00e
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.q1.dofs_vs_iterations.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.q1.dofs_vs_time.png b/doc/doxygen/deal.II/images/step-6.q1.dofs_vs_time.png
new file mode 100644
index 0000000..a6bfac3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.q1.dofs_vs_time.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.q2.dofs_vs_iterations.png b/doc/doxygen/deal.II/images/step-6.q2.dofs_vs_iterations.png
new file mode 100644
index 0000000..1a9a511
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.q2.dofs_vs_iterations.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.q2.dofs_vs_time.png b/doc/doxygen/deal.II/images/step-6.q2.dofs_vs_time.png
new file mode 100644
index 0000000..7a48576
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.q2.dofs_vs_time.png differ
diff --git a/doc/doxygen/deal.II/images/step-6.solution.png b/doc/doxygen/deal.II/images/step-6.solution.png
new file mode 100644
index 0000000..0c156d9
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-6.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-7.solution.png b/doc/doxygen/deal.II/images/step-7.solution.png
new file mode 100644
index 0000000..e4988cc
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-7.solution.png differ
diff --git a/doc/doxygen/deal.II/images/step-8.grid.png b/doc/doxygen/deal.II/images/step-8.grid.png
new file mode 100644
index 0000000..ce3c0c8
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-8.grid.png differ
diff --git a/doc/doxygen/deal.II/images/step-8.vectors.png b/doc/doxygen/deal.II/images/step-8.vectors.png
new file mode 100644
index 0000000..2a08574
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-8.vectors.png differ
diff --git a/doc/doxygen/deal.II/images/step-8.x.png b/doc/doxygen/deal.II/images/step-8.x.png
new file mode 100644
index 0000000..1310b95
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-8.x.png differ
diff --git a/doc/doxygen/deal.II/images/step-8.y.png b/doc/doxygen/deal.II/images/step-8.y.png
new file mode 100644
index 0000000..4eb027d
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-8.y.png differ
diff --git a/doc/doxygen/deal.II/images/step-9.grid.png b/doc/doxygen/deal.II/images/step-9.grid.png
new file mode 100644
index 0000000..2c28509
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-9.grid.png differ
diff --git a/doc/doxygen/deal.II/images/step-9.solution.png b/doc/doxygen/deal.II/images/step-9.solution.png
new file mode 100644
index 0000000..e5164f3
Binary files /dev/null and b/doc/doxygen/deal.II/images/step-9.solution.png differ
diff --git a/doc/doxygen/headers/boundary.h b/doc/doxygen/headers/boundary.h
new file mode 100644
index 0000000..b6d9bf2
--- /dev/null
+++ b/doc/doxygen/headers/boundary.h
@@ -0,0 +1,136 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup boundary Boundary and manifold description for triangulations
+ *
+ * The classes in this module are concerned with the description of the
+ * geometry of a domain in which a Triangulation lives. This geometry
+ * description is necessary in three contexts:
+ * <ul>
+ *
+ *   <li> Mesh refinement: Whenever a cell is refined, it is necessary
+ *   to introduce at least one new vertex. In the simplest case, one
+ *   assumes that the cells and their faces consists of straight line
+ *   segments, bilinear surface or trilinear volumes between the
+ *   vertices of the original, coarsest mesh, and the next vertex is
+ *   simply put into the middle of the old ones. This is the default
+ *   behavior of the Triangulation class, and is described by the
+ *   StraightBoundary and FlatManifold classes.
+ *
+ *   On the other hand, if one deals with curved geometries and
+ *   boundaries, this is not the appropriate thing to do. The classes
+ *   derived from the Manifold and Boundary base classes describe the
+ *   geometry of a domain. One can then attach an object of a class
+ *   derived from this base classes to the Triangulation object using
+ *   the Triangulation::set_boundary() or
+ *   Triangulation::set_manifold() functions, and the Triangulation
+ *   will ask the manifold object where a new vertex should be located
+ *   upon mesh refinement. Several classes already exist to support
+ *   the boundary of the most common geometries, e.g.,
+ *   CylinderBoundary, HyperBallBoundary, or HyperShellBoundary.
+ *
+ *   <li> Integration: When using higher order finite element methods,
+ *   it is often necessary to compute cell terms (like cell
+ *   contributions to the matrix and right hand side of the linear
+ *   system) using curved approximations of the geometry, rather than
+ *   the straight line approximation. The actual implementation of
+ *   such curved elements happens in the Mapping class (see the @ref
+ *   mapping module), which however obtains its information about the
+ *   manifold description from the classes described here. The same
+ *   is, of course, true when integrating boundary terms (e.g.,
+ *   inhomogenous Neumann boundary conditions).
+ *
+ *   <li> In cases where a Triangulation is embedded into a higher
+ *   dimensional space, i.e., whenever the second template argument of
+ *   the Triangulation class is explicitly specified and larger than
+ *   the first (for an example, see step-34), the manifold description
+ *   objects serve as a tool to describe the geometry not only of the
+ *   boundary of the domain but of the domain itself, in case the
+ *   domain is a manifold that is in fact curved. In these cases, one
+ *   can use the Triangulation::set_manifold() function to indicate
+ *   what manifold description to use when refining the curve, or when
+ *   computing integrals using high order mappings.
+ *
+ * </ul>
+ *
+ * In the context of triangulations, each object stores a number
+ * called <tt>manifold_id</tt>, and each face of a cell that is
+ * located at the boundary of the domain stores a number called
+ * <tt>boundary_id</tt> that uniquely identifies which part of the
+ * boundary this face is on. If nothing is specified at creation time,
+ * each boundary face has a zero boundary id and each triangulation
+ * object has an invalid manifold id. On the other hand, the boundary
+ * id of faces and the manifold id of objects can be set either at
+ * creation time or later by looping over all cells and querying their
+ * faces.
+ *
+ * It is then possible to associate objects describing the geometry to
+ * certain boundary_id values used in a triangulation and to certain
+ * manifold_id values.
+ *
+ * Before version 8.2, the library allowed only boundary faces to
+ * follow a curved geometric description. Since version 8.2 this has
+ * been introduced also for interior faces and cells, and the
+ * boundary_id has been separated from the manifold_id.
+ *
+ * Although the old behavior is still supported, one should use the
+ * boundary indicator only for the physical meaning associated, for
+ * example, to boundary conditions, and revert to manifold_ids to
+ * describe the geometry of the triangulation.
+ *
+ * The behavior of the Triangulation class w.r.t. geometry
+ * descriptions is the following: Triangulation::set_boundary() and
+ * Triangulation::set_manifold() do the exact same thing: they attach
+ * a manifold descriptor to the specified id. The first function
+ * expects a Boundary descriptor (which is a specialization of a
+ * Manifold description) and is provided mainly for backward
+ * compatibility, while the second class expects a Manifold
+ * descriptor. Notice that the Triangulation class only uses the
+ * Manifold interface, and you could describe both the interior and
+ * the boundary of the domain using the same object. The additional
+ * information contained in the Boundary interface is related to the
+ * computation of the exact normals. 
+ *
+ * Whenever a new vertex is needed in an object, the Triangulation
+ * queries the manifold_id of the object which needs refinement. If
+ * the manifold_id is set to numbers::invalid_manifold_id, then the
+ * Triangulation queries the boundary_id (if the face is on the
+ * boundary) or the material_id (if the Triangulation is of
+ * codimension one and the object is a cell). If the previous queries
+ * resulted in a number different from numbers::invalid_manifold_id,
+ * then the Triangulation looks whether a previous call to
+ * Triangulation::set_manifold() (or set_boundary()) was performed
+ * with the given id, and if yes, it uses the stored object to obtain
+ * new vertices, otherwise it uses a FlatManifold or StraightBoundary
+ * object.
+ *
+ * @note This behavior is backward compatible to that of deal.II versions
+ * prior to 8.2. If one ignores the manifold_id of an object (i.e., if it has
+ * never been set), by default it is and remains set to
+ * numbers::invalid_manifold_id. In that case, the first query above will
+ * trigger a query to the old style boundary_id. This behavior will be
+ * maintained for a while, but might eventually be changed. The suggested
+ * strategy is to use manifold_ids to describe the geometry, and boundary_ids
+ * to describe boundary conditions.
+ * 
+ *
+ * @see @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+ * @see @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+ *
+ * @ingroup grid
+ * @author Wolfgang Bangerth, Luca Heltai, 1998-2014
+ */
diff --git a/doc/doxygen/headers/c++11.h b/doc/doxygen/headers/c++11.h
new file mode 100644
index 0000000..1445a26
--- /dev/null
+++ b/doc/doxygen/headers/c++11.h
@@ -0,0 +1,115 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014, 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup CPP11 deal.II and the C++11 standard
+ *
+ * At present, deal.II only requires a compiler that conforms to the
+ * <a href="http://en.wikipedia.org/wiki/C%2B%2B#Standardization">C++98</a>
+ * standard and does not rely on compilers to either
+ * provide the features introduced in
+ * <a href="http://en.wikipedia.org/wiki/C%2B%2B03">C++03</a> or
+ * <a href="http://en.wikipedia.org/wiki/C%2B%2B11">C++11</a>
+ * 
+ * That said, deal.II interfaces with C++11 in several ways as
+ * outlined below.
+ * 
+ * 
+ * <h3>Use of C++11 classes and substitution by BOOST</h3>
+ *
+ * deal.II makes use of many of the classes that were only
+ * added as part of C++11. This includes std::shared_ptr,
+ * std::function, std::bind, std::tuple and a number of others.
+ * Because we do not assume that the compiler actually supports
+ * C++11, there needs to be a way to ensure that these classes
+ * are available also for pre-C++11 compilers. This is done using
+ * the following approach:
+ *
+ * - We create a namespace std_cxx11.
+ * - If the compiler supports C++11, we import the relevant classes
+ *   and functions into this namespace using statements such as
+ *   @code
+ *     namespace std_cxx11 {  using std::shared_ptr;  }
+ *   @endcode
+ * - If the compiler does not support C++11, if its support for
+ *   C++11 is incomplete, or if it is buggy, then we use as a fallback
+ *   the corresponding classes and functions provided by the
+ *   <a href="http://www.boost.org">BOOST library</a> through
+ *   statements such as
+ *   @code
+ *     namespace std_cxx11 {  using boost::shared_ptr;  }
+ *   @endcode
+ *
+ * Consequently, namespace std_cxx11 contains all of the symbols
+ * we require. The classes that can be used this way are obviously
+ * a subset of the intersection between C++11 and what BOOST provides.
+ *
+ *
+ * <h3>Support for C++11 range-based for loops</h3>
+ *
+ * C++11 provides many new core language features, such as
+ * rvalue references and move semantics, initialized lists, tuples,
+ * variadic templates and
+ * others. For a complete list, see  http://en.wikipedia.org/wiki/C++11 .
+ * We can not use most of these in deal.II itself because we cannot rely
+ * on compilers supporting them.
+ *
+ * However, this does not preclude users from using such features in their
+ * own applications if they can be reasonably sure that the compilers on
+ * all of the systems they will work on do support C++11. An example are
+ * <a href="http://en.wikipedia.org/wiki/C++11#Type_inference">automatically
+ * typed variables</a>.
+ *
+ * deal.II does provide some features that make programming simpler when using
+ * C++11. This is true, in particular, for
+ * <a href="http://en.wikipedia.org/wiki/C++11#Range-based_for_loop">range-based
+ * for loops</a>. deal.II-based codes often have many loops of the kind
+ * @code
+ *   Triangulation<dim> triangulation;
+ *   ...
+ *   typename Triangulation<dim>::active_cell_iterator
+ *     cell = triangulation.begin_active(),
+ *     endc = triangulation.end();
+ *   for (; cell!=endc; ++cell)
+ *     cell->set_refine_flag();
+ * @endcode
+ * Using C++11's range-based for loops, you can now write this as follows:
+ * @code
+ *   Triangulation<dim> triangulation;
+ *   ...
+ *   for (auto cell : triangulation.active_cell_iterators())
+ *     cell->set_refine_flag();
+ * @endcode
+ * This relies on functions such as Triangulation::active_cell_iterators(),
+ * and equivalents in the DoF handler classes,
+ * DoFHandler::active_cell_iterators(), hp::DoFHandler::active_cell_iterators().
+ * There are variants of these functions that provide iterator ranges
+ * for all cells (not just the active ones) and for cells on individual
+ * levels.
+ *
+ *
+ * <h3>Things that are only enabled if your compiler supports C++11</h3>
+ *
+ * There is a small number of places inside deal.II where we allow ourselves
+ * the use of C++11 because it makes things so much simpler. These features
+ * are simply not available if your compiler does not support C++11, but this
+ * does not affect the usability of the remainder of deal.II.
+ *
+ * Specifically, these places are:
+ * - The LinearOperator class and all associated functions such as
+ *   linear_operator(), null_operator(), and everything else that is part of
+ *   the LAOperators documentation module.
+ */
diff --git a/doc/doxygen/headers/coding_conventions.h b/doc/doxygen/headers/coding_conventions.h
new file mode 100644
index 0000000..cf8d091
--- /dev/null
+++ b/doc/doxygen/headers/coding_conventions.h
@@ -0,0 +1,404 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @page CodingConventions Coding conventions used throughout deal.II
+
+Throughout deal.II, we strive to keep our programming style and the kind of
+interfaces we provide as consistent as possible. To this end, we have adopted
+a set of coding conventions that we attempt to follow wherever possible. They
+have two parts: style issues, and something we call "defensive programming",
+the latter being an attempt to let our code help us find bugs.  When reading
+through them, it is important to remember that styles are not god given or
+better than any other set of conventions; their purpose is merely to keep
+deal.II as uniform as possible. Uniformity reduces the number of bugs we
+produce because we can, for example, always assume that input arguments come
+before output arguments of a function call. They also simplify reading code
+because some things become clear already by looking at the style a piece of
+code is written, without having to look up the exact definition of something.
+
+<h3>Notes on deal.II indentation</h3>
+
+<p>deal.II uses <code>astyle</code> to normalize indentation. A
+style file is provided at
+<code>
+<pre>
+  ./contrib/utilities/astyle.rc
+</pre>
+</code>
+
+<p>Before a commit, you should run
+<code>
+<pre>
+  astyle --options=<SOURCE DIRECTORY>/contrib/utilities/astyle.rc <file>
+</pre>
+</code>
+on each of your files. This will make sure indentation is conforming to the
+style guidelines outlined in this page.</p>
+
+<h3>Style issues</h3>
+
+<ol>
+<li> %Functions which return the number of something (number of cells,
+  degrees of freedom, etc) should start with <code>n_*</code>. Example:
+  SparsityPattern::n_nonzero_entries().</li>
+
+<li> %Function which set a bit or flag should start with <code>set_*</code>;
+  functions which clear bits of flags should be named <code>clear_*</code>.
+  Example: CellIterator::set_refine_flag().</li>
+
+<li> Traditional logical operators should be used instead of their English
+  equivalents (i.e., use <code>&&</code>, <code>||</code>, and <code>!</code>
+  instead of <code>and</code>, <code>or</code>, and <code>not</code>).
+
+<li> In the implementation files, after each function, at least three
+  empty lines are expected to
+  enable better readability. One empty line occurs in functions to
+  group blocks of code, two empty lines are not enough to visibly
+  distinguish sufficiently that the code belongs to two different functions.</li>
+
+<li> Whenever an integer variable can only assume nonnegative values,
+  it is marked as unsigned. The same applies to functions that can only
+  return positive or zero values. Example: Triangulation::n_active_cells().</li>
+
+<li> Whenever an argument to a function will not be changed, it should be marked
+  const, even if passed by value. Generally, we mark input parameters as
+  const. This aids as an additional documentation tool to clarify the
+  intent of a parameter (input, output, or both)
+  and lets the compiler issue warnings if such a parameter is
+  changed, which is often either involuntarily or poor style.</li>
+
+<li> Whenever a function does not change any of the member variable of
+  the embedding class/object, it should be marked as const.</li>
+
+<li> %Function and variable names may not consist of only one or two
+  letters, unless the variable is a pure counting index.</li>
+
+<li> Use the geometry information in GeometryInfo to get the
+  number of faces per cell, the number of children per cell, the
+  child indices of the child cells adjacent to face 3, etc, rather
+  than writing them into the code directly as <code>2*dim</code>,
+  <code>(1@<@<dim)</code> and
+  <code>{0,3}</code>. This reduces the possibilities for errors and enhances
+  readability of code.</li>
+
+<li> The layout of class declarations is the following: first the
+  block of public functions, beginning with the constructors, then
+  the destructors. If there are public member variables, these have
+  to occur before the constructor. Public variables shall only be
+  used if constant (in particular if they are static and constant)
+  or unavoidable.
+  <br>
+  After the public members, the protected and finally the private
+  members are to be listed. The order is as above: first variables
+  then functions.
+  <br>
+  Exceptions shall be declared at the end of the public section
+  before the non-public sections start.</li>
+
+<li> If a function has both input and output parameters, usually the
+  input parameters shall precede the output parameters, unless there
+  are good reasons to change this order. (The most common reason is trailing
+  input parameters with default values.) </li>
+
+<li> Exceptions are used for %internal parameter checking and for
+  consistency checks through the Assert macro. Exception handling
+  like done by the C++ language (<code>try/throw/catch</code>, and using the
+  AssertThrow macro) are used to
+  handle run time errors (like I/O failures) which must be on
+  in any case, not only in debug mode.</li>
+
+<li> Classes and types generally are named using uppercase letters to denote
+  word beginnings (e.g. TriaIterator) — sometimes called
+  <a href="http://en.wikipedia.org/wiki/Camel_case"><i>camel
+  case</i></a> — while functions and variables
+  use lowercase letters and underscores to separate words.
+  The only exception are the iterator typedefs in Triangulation
+  and DoFHandler (named cell_iterator, active_line_iterator, etc)
+  to make the connection to the standard library container classes clear.</li>
+
+<li> For classes with multiple template arguments, the dimension is usually
+  put before the data type specifier, i.e., we use Point<dim,number> and not
+  Point<number,dim>.
+
+<li> Each class has to have at least 200 pages of documentation ;-)</li>
+
+</ol>
+
+
+<h3>Defensive programming</h3>
+
+<p> Defensive programming is a term that we use frequently when we talk about
+writing code while in the mindset that errors will happen. Here, errors can
+come in two ways: first, I can make a mistake myself while writing a
+functions; and secondly, someone else can make a mistake while calling my
+function. In either case, I would like to write my code in such a way that
+errors are (i) as unlikely as possible, (ii) that the compiler can already
+find some of the mistakes, and (iii) that the remaining mistakes are
+relatively easy to find, for example because the program aborts. Defensive
+programming is then a set of strategies that make these goals more likely.
+</p>
+
+<p>
+Over time, we have learned a number of techniques to this end, some of which
+we list here:
+<ol>
+<li> <i>Assert preconditions on parameters:</i> People call functions with wrong
+  or nonsensical parameters, all the time. As the prototypical example,
+  consider a trivial implementation of vector addition:
+  <code>
+  <pre>
+    Vector &
+    operator += (Vector       &lhs,
+                 const Vector &rhs)
+    {
+      for (unsigned int i=0; i<lhs.size(); ++i)
+        lhs(i) += rhs(i);
+      return lhs;
+    }
+  </pre>
+  </code>
+  While correct, this function will get into trouble if the two vectors
+  do not have the same size. You think it is silly to call this function
+  with vectors of different size? Yes, of course it is. But it happens
+  all the time: people forget to reinitialize a vector, or it is reset in
+  a different function, etc. It happens. So if you are in such an unlucky
+  case, it can take a long time to figure out what's going on because
+  you are likely to just read uninitialized memory, or maybe you are
+  writing to memory the <code>lhs</code> vector doesn't actually own.
+  Neither is going to lead to immediate termination of the program,
+  but you'll probably get random errors at a later time. It would be
+  much easier if the program just stopped here right away. The following
+  implementation will do exactly this:
+  <code>
+  <pre>
+    Vector &
+    operator += (Vector       &lhs,
+                 const Vector &rhs)
+    {
+      Assert (lhs.size() == rhs.size(),
+              ExcDimensionMismatch (lhs.size(), rhs.size());
+      for (unsigned int i=0; i<lhs.size(); ++i)
+        lhs(i) += rhs(i);
+      return lhs;
+    }
+  </pre>
+  </code>
+  The <code>Assert</code> macro ensures that the condition is true
+  at run time, and otherwise prints a string containing information
+  encoded by the second argument and aborts the program. This way,
+  when you write a new program that happens to call this function,
+  you will learn of your error right away and have the opportunity
+  to fix it without ever having to seriously debug anything.
+  <p>
+  As a general guideline, whenever you implement a new function,
+  think about the <i>preconditions</i> on parameter, i.e. what does the
+  function expect to be true about each of them, or their combination.
+  Then write assertions for all of these preconditions. This may be
+  half a dozen assertions in some cases but remember that each assertion
+  is a potential bug already found through trivial means.
+  <p>
+  In a final note, let us remark that assertions are of course expensive:
+  they may make a program 3 or 5 times slower when you link it against
+  the debug version of the library. But if you consider your <i>overall</i>
+  development time, the ability to find bugs quickly probably far outweighs
+  the time you spend waiting for your program to finish. Furthermore,
+  calls to the Assert macro are removed from the program in optimized mode
+  (which you presumably only use once you know that everything runs just
+  fine in debug mode. The optimized libraries are faster by a factor of
+  3-5 than the debug libraries, at the price that it's much harder to find
+  bugs.
+  </li>
+
+<li> <i>Assert postconditions:</i> If a function computes something
+  non-trivial there may be a bug in the code. To find these, use
+  postconditions: just like you have certain knowledge about useful values
+  for input parameters, you have knowledge about what you expect possible
+  return values to be. For example, a function that computes the norm of
+  a vector would expect the norm to be positive. You can write this as
+  follows:
+  <code>
+  <pre>
+    double norm (const Vector &v)
+    {
+      double s = 0;
+      for (unsigned int i=0; i<v.size(); ++i)
+        s += v(i) * v(i);
+
+      Assert (s >= 0, ExcInternalError());
+      return std::sqrt(s);
+    }
+  </pre>
+  </code>
+  This function is too simple to really justify this assertion, but imagine
+  the computation to be lengthier and you can see how the assertion helps
+  you ensure (or <i>hedge</i>) yourself against mistakes. Note that one
+  could argue that the assertion should be removed once we've run the program
+  a number of times and found that the condition never triggers. But it's
+  better to leave it right where it is: it encodes for the future (and for
+  readers) knowledge you have about the function; if someone comes along
+  and replaced the implementation of the function by a more efficient
+  algorithm, the assertion can help make sure that the function continues
+  to do what it is supposed to do.
+  </li>
+
+<li> <i>Assert internal states:</i> In a similar vein, if you have a
+  complex algorithm, use assertions to ensure that your mental model
+  of what is going on matches what is indeed true. For example, assume
+  you are writing a function that ensures that mesh sizes do not change
+  too much locally. You may end up with a code of the following kind:
+  <code>
+  <pre>
+    for (cell=triangulation.begin(); ...)
+      for (face=0; ...)
+        {
+          if (something)
+            { ... }
+          else
+            {
+                // we have a cell whose neighbor must
+                // be at the boundary if we got here
+            }
+        }
+  </pre>
+  </code>
+  The conditions that got us into the else-branch may be
+  complicated, and while it may be true that we believed that the
+  only possibility we got here is that the neighbor is at the boundary,
+  there may have been a bug in our implementation. There may also have been
+  a bug in our thinking, or someone changes the code way above in the same
+  function and forgets about the issue here, or a change at a completely
+  different location in the library makes the assumption untenable. In
+  all of these cases, the explicit statement of our assertion makes sure
+  that these problems are easily found.
+  </li>
+
+<li> <i>Initialize variables at the point of their declaration if they
+  live on the stack:</i>
+  Traditional C required that variables are declared at the beginning of
+  the function even if they are only used further below. This leads to
+  code like this that we may imagine in a 1d code:
+  <code>
+  <pre>
+    template @<int dim@>
+    void foo ()
+    {
+      Point<dim> cell_center;
+      ... // something lengthy and complicated
+      for (cell = dof_handler.begin_active(); ...)
+        {
+          cell_center = (cell->vertex(0) + cell->vertex(1)) / 2;
+          ...
+        }
+  </pre>
+  </code>
+  The problem is that if the code between the declaration and initialization
+  is long and complicated, you can't look up on one page what the type of
+  a variable is and what it's value may be. In fact, it may not even be
+  quite clear that the variable is used initialized at all, or whether it
+  is accidentally left uninitialized.
+  <p>
+  A better way to do this would be as follows:
+  <code>
+  <pre>
+    template @<int dim@>
+    void foo ()
+    {
+      ... // something lengthy and complicated
+      for (cell = dof_handler.begin_active(); ...)
+        {
+          Point<dim> cell_center = (cell->vertex(0) + cell->vertex(1)) / 2;
+          ...
+        }
+  </pre>
+  </code>
+  This makes it much clearer what the type of the variable is
+  and that it is in fact only ever used when initialized. Furthermore,
+  if someone wants to read the code to see what the variable is in fact
+  doing, declaring and initializing it in the innermost possible scope
+  makes this task easier: we don't have to look upwards for it beyond
+  the declaration, and we don't have to look downward beyond the end
+  of the current scope since this is where the variable dies.
+  <p>
+  As a final note, it is clear that you can only do this sort of stuff
+  for variables that completely live on the stack without allocating memory
+  on the heap. Within deal.II, this is only true for builtin types like
+  <code>int, double, char</code>, etc, as well as the Point and Tensor
+  classes. Everything else has something like a <code>std::vector</code>
+  as a member variable, which requires memory allocation — you don't
+  want to declare these inside loops, at least not if the loop is
+  traversed frequently.
+  </li>
+
+<li> <i>Make variables const:</i> To pick up on the example above, note
+  that in most cases we will never change the variable so initialized
+  any more. In other words, if this is the case, we may as well write
+  things as follows:
+  <code>
+  <pre>
+    template @<int dim@>
+    void foo ()
+    {
+      ... // something lengthy and complicated
+      for (cell = dof_handler.begin_active(); ...)
+        {
+          <b>const</b> Point<dim> cell_center = (cell->vertex(0) + cell->vertex(1)) / 2;
+          ...
+        }
+  </pre>
+  </code>
+  By marking the variable as constant we make sure that we don't accidentally
+  change it. For example, the compiler could catch code like this:
+  <code>
+  <pre>
+        if (cell_center[0] = 0)
+          ...
+  </pre>
+  </code>
+  This was most likely meant to be a <code>==</code> rather than an
+  assignment. By marking the variable as const, the compiler would have
+  told us about this bug. Maybe equally importantly, human readers of the
+  code need not look further down whether the value of the variable may
+  actually be changed somewhere between declaration and use — it
+  can't be if it is marked as const.
+  </li>
+
+<li> <i>Make input arguments of functions const:</i> The same essentially
+  holds true as well as for function arguments: If you have no intention
+  of changing a variable (which is typically the case for input arguments),
+  then mark it as constant. For example, the following function should take
+  its argument as a constant value:
+  <code>
+  <pre>
+     template @<int dim@>
+     typename Triangulation<dim>::cell_iterator
+     CellAccessor<dim>::child (const unsigned int child_no)
+     {
+       ...;
+       return something;
+     }
+  </pre>
+  </code>
+  Here, the user calls <code>cell-@>child(3)</code>, for example. There really
+  is no reason why the function would ever want to change the value of
+  the <code>child_no</code> argument — so mark it as constant:
+  this both helps the reader of the code understand that this is an
+  input argument of the function for which we need not search below whether
+  it is ever changed, and it helps the compiler help us finding bugs if
+  we ever accidentally change the value.
+</ol>
+
+ */
diff --git a/doc/doxygen/headers/concepts.h b/doc/doxygen/headers/concepts.h
new file mode 100644
index 0000000..3273aa0
--- /dev/null
+++ b/doc/doxygen/headers/concepts.h
@@ -0,0 +1,270 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Concepts Concepts, or expectations on template parameters
+ *
+ * Sometimes imposing constraints on the type of an object without requiring
+ * it to belong to a specific inheritance hierarchy is useful. These are
+ * usually referred to as <em>concepts</em> in the C++ community. This module
+ * lists the concepts commonly used in deal.II with brief descriptions of
+ * their intent. The convention in deal.II for listing constraints on a type
+ * is to provide the name of the concept as a <code>typename</code> in a
+ * template: for example, the type of a Vector depends on the type of the
+ * underlying field, and so it is defined as a
+ * template:
+ * @code
+ * template <typename Number>
+ * class Vector;
+ * @endcode
+ * The point here is that you are creating a vector that can store
+ * elements of type @p Number. But there are some underlying
+ * assumptions on this. For example, the deal.II Vector class is not
+ * intended to be used just as a collection (unlike
+ * <code>std::vector</code>) but defines vector space operations such
+ * as addition of vectors, or the norm of vectors. Consequently, the
+ * data type users can specify for @p Number must satisfy certain
+ * conditions (i.e., it must conform to or "model" a "concept"):
+ * Specifically, the type must denote objects that represent the
+ * elements of what mathematically call a "field" (which you can think
+ * of as, well, "numbers": things we can add, multiply, divide, take
+ * the absolute value of, etc). The point of a concept is then to
+ * describe <em>what conditions a type must satisfy</em> to be a valid
+ * template argument in a given context.
+ *
+ * This page describes these conditions for a number of concepts used
+ * throughout deal.II. Specifically, in the example above, the
+ * @ref ConceptNumber "Number concept" discussed below describes the
+ * types that could be used as argument for the Vector class.
+ *
+ * <dl>
+ *
+ * <dt class="concepts">@anchor ConceptDoFHandlerType <b>DoFHandlerType</b></dt>
+ *
+ * <dd>
+ * deal.II includes both DoFHandler and hp::DoFHandler as objects which manage
+ * degrees of freedom on a mesh. Though the two do not share any sort of
+ * inheritance relationship, they are similar enough that many functions just
+ * need something which resembles a DoFHandler to work correctly.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptMatrixType <b>MatrixType</b></dt>
+ *
+ * <dd>
+ * Many functions and classes in deal.II require an object which knows how to
+ * calculate matrix-vector products (the member function <code>vmult</code>),
+ * transposed matrix-vector products (the member function
+ * <code>Tvmult</code>), as well as the `multiply and add' equivalents
+ * <code>vmult_add</code> and <code>Tvmult_add</code>. Some functions only
+ * require <code>vmult</code> and <code>Tvmult</code>, but an object should
+ * implement all four member functions if the template requires a MatrixType
+ * argument. Writing classes that satisfy these conditions is a sufficiently
+ * common occurrence that the LinearOperator class was written to make things
+ * easier; see @ref LAOperators for more information.
+ *
+ * One way to think of <code>MatrixType</code> is to pretend it is a base
+ * class with the following signature (this is nearly the interface provided
+ * by SparseMatrix):
+ *
+ * @code
+ * class MatrixType
+ * {
+ * public:
+ *   template <typename VectorType>
+ *   virtual void vmult(VectorType &u, const VectorType &v) const =0;
+ *
+ *   template <typename VectorType>
+ *   virtual void Tvmult(VectorType &u, const VectorType &v) const =0;
+ *
+ *   template <typename VectorType>
+ *   virtual void vmult_add(VectorType &u, const VectorType &v) const =0;
+ *
+ *   template <typename VectorType>
+ *   virtual void Tvmult_add(VectorType &u, const VectorType &v) const =0;
+ * };
+ * @endcode
+ *
+ * Template functions in C++ cannot be virtual (which is the main reason why
+ * this approach is not used in deal.II), so implementing this interface with
+ * inheritance will not work, but it is still a good way to think about this
+ * template concept. One can use the PointerMatrixAux class to implement
+ * <code>vmult_add</code> and <code>Tvmult_add</code> instead of implementing
+ * them manually.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptMeshType <b>MeshType</b></dt>
+ *
+ * <dd>
+ * Meshes can be thought of as arrays of vertices and connectivities, but a
+ * more fruitful view is to consider them as <i>collections of cells</i>. In
+ * C++, collections are often called <i>containers</i> (typical containers are
+ * std::vector, std::list, etc.) and they are characterized by the ability to
+ * iterate over the elements of the collection. The <tt>MeshType</tt> concept
+ * refers to any container which defines appropriate methods (such as
+ * DoFHandler::begin_active()) and <tt>typedefs</tt> (such as
+ * DoFHandler::active_cell_iterator) for managing collections of cells.
+ *
+ * Instances of Triangulation, DoFHandler, and hp::DoFHandler may
+ * all be considered as containers of cells. In fact, the most important parts
+ * of the public interface of these classes consists simply of the ability to
+ * get iterators to their elements. Since these parts of the interface are
+ * generic, i.e., the functions have the same name in all classes, it is
+ * possible to write operations that do not actually care whether they work on
+ * a triangulation or a DoF handler object. Examples abound, for example, in
+ * the GridTools namespace, underlining the power of the abstraction that
+ * meshes and DoF handlers can all be considered simply as collections
+ * (containers) of cells.
+ *
+ * On the other hand, meshes are non-standard containers unlike std::vector or
+ * std::list in that they can be sliced several ways. For example, one can
+ * iterate over the subset of active cells or over all cells; likewise, cells
+ * are organized into levels and one can get iterator ranges for only the
+ * cells on one level. Generally, however, all classes that implement the
+ * containers-of-cells concept use the same function names to provide the same
+ * functionality.
+ *
+ * %Functions that may be called with either class indicate this by accepting
+ * a template parameter like
+ * @code
+ * template <template <int, int> class MeshType>
+ * @endcode
+ * or
+ * @code
+ * template <typename MeshType>
+ * @endcode
+ * The classes that satisfy this concept are collectively referred to as
+ * <em>mesh classes</em>. The exact definition of <tt>MeshType</tt> relies a
+ * lot on library internals, but it can be summarized as any class with the
+ * following properties:
+ * <ol>
+ *   <li>A <tt>typedef</tt> named <tt>active_cell_iterator</tt>.
+ *   </li>
+ *   <li>A method <tt>get_triangulation()</tt> which returns a reference to
+ *   the underlying geometrical description (one of the Triangulation classes)
+ *   of the collection of cells. If the mesh happens to be a Triangulation,
+ *   then the mesh just returns a reference to itself.
+ *   </li>
+ *   <li>A method <tt>begin_active()</tt> which returns an iterator pointing
+ *   to the first active cell.
+ *   </li>
+ *   <li>A static member value <tt>dimension</tt> containing the dimension in
+ *       which the object lives.
+ *   </li>
+ *   <li>A static member value <tt>space_dimension</tt> containing the dimension
+ *       of the object (e.g., a 2D surface in a 3D setting would have
+ *       <tt>space_dimension = 2</tt>).
+ *   </li>
+ * </ol>
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptNumber <b>Number</b></dt>
+ *
+ * <dd>
+ * This concept describes scalars which make sense as vector or matrix
+ * entries, which is usually some finite precision approximation of a field
+ * element. The canonical examples are <code>double</code> and
+ * <code>float</code>, but deal.II supports <code>std::complex<T></code>
+ * for floating point type <code>T</code> in many places as well.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptPolynomialType <b>PolynomialType</b></dt>
+ *
+ * <dd>
+ * See the description in @ref Polynomials for more information. In some
+ * contexts, anything that satisfies the interface resembling
+ * @code
+ * template <int dim>
+ * class PolynomialType
+ * {
+ *   virtual void compute (const Point<dim>            &unit_point,
+ *                         std::vector<Tensor<1,dim> > &values,
+ *                         std::vector<Tensor<2,dim> > &grads,
+ *                         std::vector<Tensor<3,dim> > &grad_grads) const =0;
+ * }
+ * @endcode
+ *
+ * may be considered as a polynomial for the sake of implementing finite
+ * elements.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptPreconditionerType <b>PreconditionerType</b></dt>
+ *
+ * <dd>
+ * This is essentially a synonym for <code>MatrixType</code>, but usually only
+ * requires that <code>vmult()</code> and <code>Tvmult()</code> be
+ * defined. Most of the time defining <code>Tvmult()</code> is not
+ * necessary. One should think of <code>vmult()</code> as applying some
+ * approximation of the inverse of a linear operator to a vector, instead of
+ * the action of a linear operator to a vector, for the preconditioner
+ * classes.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptRelaxationType <b>RelaxationType</b></dt>
+ *
+ * <dd>
+ * This is an object capable of relaxation for multigrid methods. One can
+ * think of an object satisfying this constraint as having the following
+ * interface as well as the constraints required by
+ * @ref ConceptMatrixType "MatrixType":
+ * @code
+ * class RelaxationType
+ * {
+ * public:
+ *   template <typename VectorType>
+ *   virtual void step(VectorType &u, const VectorType &v) const =0;
+ *
+ *   template <typename VectorType>
+ *   virtual void Tstep(VectorType &u, const VectorType &v) const =0;
+ * };
+ * @endcode
+ * where these two member functions perform one step (or the transpose of such
+ * a step) of the smoothing scheme. In other words, the operations performed by
+ * these functions are
+ * $u = u - P^{-1} (A u - v)$ and $u = u - P^{-T} (A u - v)$.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptSparsityPatternType <b>SparsityPatternType</b></dt>
+ *
+ * <dd>
+ * Almost all functions (with the notable exception of
+ * SparsityTools::distribute_sparsity_pattern) which take a sparsity pattern
+ * as an argument can take either a regular SparsityPattern or a
+ * DynamicSparsityPattern, or even one of the block sparsity patterns. See
+ * @ref Sparsity for more information.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptStreamType <b>StreamType</b></dt>
+ *
+ * <dd>
+ * Deriving new stream classes in C++ is well-known to be difficult. To get
+ * around this, some functions accept a parameter which defines
+ * <code>operator<<</code>, which allows for easy output to any kind of
+ * output stream.
+ * </dd>
+ *
+ * <dt class="concepts">@anchor ConceptVectorType <b>VectorType</b></dt>
+ *
+ * <dd>
+ * deal.II supports many different vector classes, including bindings to
+ * vectors in other libraries. These are similar to standard library vectors
+ * (i.e., they define <code>begin()</code>, <code>end()</code>,
+ * <code>operator[]</code>, and <code>size()</code>) but also define numerical
+ * operations like <code>add()</code>. Some examples of VectorType include
+ * Vector, TrilinosWrappers::MPI::Vector, and BlockVector.
+ * </dd>
+ *
+ * </dl>
+ */
diff --git a/doc/doxygen/headers/constraints.h b/doc/doxygen/headers/constraints.h
new file mode 100644
index 0000000..5e1f794
--- /dev/null
+++ b/doc/doxygen/headers/constraints.h
@@ -0,0 +1,493 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup constraints Constraints on degrees of freedom
+ * @ingroup dofs
+ *
+ * This module deals with constraints on degrees of
+ * freedom. The central class to deal with constraints is the ConstraintMatrix
+ * class.
+ *
+ * Constraints typically come from several sources, for example:
+ * - If you have Dirichlet-type boundary conditions, $u|_{\partial\Omega}=g$,
+ *   one usually enforces
+ *   them by requiring that degrees of freedom on the boundary have
+ *   particular values, for example $x_{12}=42$ if the boundary condition
+ *   $g(\mathbf x)$ requires that the finite element solution $u(\mathbf x)$
+ *   at the location of degree
+ *   of freedom 12 has the value 42. Such constraints are generated by
+ *   those versions of the VectorTools::interpolate_boundary_values
+ *   function that take a ConstraintMatrix argument (though there are
+ *   also other ways of dealing with Dirichlet conditions, using
+ *   MatrixTools::apply_boundary_values, see for example step-3 and step-4).
+ * - If you have boundary conditions that set a certain part of the
+ *   solution's value, for example no normal flux, $\mathbf n \cdot
+ *   \mathbf u=0$ (as happens in flow problems and is handled by the
+ *   VectorTools::compute_no_normal_flux_constraints function) or
+ *   prescribed tangential components, $\mathbf{n}\times\mathbf{u}=
+ *   \mathbf{n}\times\mathbf{f}$ (as happens in electromagnetic problems and
+ *   is handled by the VectorTools::project_boundary_values_curl_conforming
+ *   function). For the former case, imagine for example that we are at
+ *   at vertex where the normal vector has the form $\frac 1{\sqrt{14}}
+ *   (1,2,3)^T$ and that the $x$-, $y$- and $z$-components of the flow
+ *   field at this vertex are associated with degrees of freedom 12, 28,
+ *   and 40. Then the no-normal-flux condition means that we need to have
+ *   the condition $\frac 1{\sqrt{14}} (x_{12}+2x_{28}+3x_{40})=0$.
+ *   The prescribed tangential component leads to similar constraints
+ *   though there is often something on the right hand side.
+ * - If you have hanging node constraints, for example in a mesh like this:
+ *        @image html hanging_nodes.png ""
+ *   Let's assume the bottom right one of the two red degrees of freedom
+ *   is $x_{12}$ and that the two yellow neighbors on its left and right
+ *   are $x_{28}$ and $x_{40}$. Then, requiring that the finite element
+ *   function be continuous is equivalent to requiring that $x_{12}=
+ *   \frac 12 (x_{28}+x_{40})$. A similar situation occurs in the
+ *   context of hp adaptive finite element methods.
+ *   For example, when using Q1 and Q2 elements (i.e. using
+ *   FE_Q(1) and FE_Q(2)) on the two marked cells of the mesh
+ *       @image html hp-refinement-simple.png
+ *   there are three constraints: first $x_2=\frac 12 x_0 + \frac 12 x_1$,
+ *   then $x_4=\frac 14 x_0 + \frac 34 x_1$, and finally the identity
+ *   $x_3=x_1$. Similar constraints occur as hanging nodes even if all
+ *   cells used the same finite elements. In all of these cases, you
+ *   would use the DoFTools::make_hanging_node_constraints function to
+ *   compute such constraints.
+ * - Other linear constraints, for example when you try to impose a certain
+ *   average value for a problem that would otherwise not have a unique
+ *   solution. An example of this is given in the step-11 tutorial program.
+ *
+ * In all of these examples, constraints on degrees of freedom are linear
+ * and possibly inhomogeneous. In other words, the always have
+ * the form $x_{i_1} = \sum_{j=2}^M a_{i_j} x_{i_j} + b_i$. The deal.II
+ * class that deals with storing and using these constraints is
+ * ConstraintMatrix. The naming stems from the fact that the class
+ * originally only stored the (sparse) matrix $a_{i_j}$. The class name
+ * component "matrix" no longer makes much sense today since the class has
+ * learned to also deal with inhomogeneities $b_i$.
+ *
+ *
+ * <h3>Eliminating constraints</h3>
+ *
+ * When building the global system matrix and the right hand sides, one can
+ * build them without taking care of the constraints, i.e. by simply looping
+ * over cells and adding the local contributions to the global matrix and
+ * right hand side objects. In order to do actual calculations, you have to
+ * 'condense' the linear system: eliminate constrained degrees of freedom and
+ * distribute the appropriate values to the unconstrained dofs. This changes
+ * the sparsity pattern of the sparse matrices used in finite element
+ * calculations and is thus a quite expensive operation. The general scheme of
+ * things is then that you build your system, you eliminate (condense) away
+ * constrained nodes using the ConstraintMatrix::condense() functions, then
+ * you solve the remaining system, and finally you compute the values of
+ * constrained nodes from the values of the unconstrained ones using the
+ * ConstraintMatrix::distribute() function. Note that the
+ * ConstraintMatrix::condense() function is applied to matrix and right hand
+ * side of the linear system, while the ConstraintMatrix::distribute()
+ * function is applied to the solution vector. This is the method used in
+ * the first few tutorial programs, see for example step-6.
+ *
+ * This scheme of first building a linear system and then eliminating
+ * constrained degrees of freedom is inefficient, and a bottleneck if there
+ * are many constraints and matrices are full, i.e. especially for 3d and/or
+ * higher order or hp finite elements. Furthermore, it is impossible to
+ * implement for %parallel computations where a process may not have access
+ * to elements of the matrix. We therefore offer a second way of
+ * building linear systems, using the
+ * ConstraintMatrix::add_entries_local_to_global() and
+ * ConstraintMatrix::distribute_local_to_global() functions discussed
+ * below. The resulting linear systems are equivalent to those one gets after
+ * calling the ConstraintMatrix::condense() functions.
+ *
+ * @note Both ways of applying constraints set the value of the matrix
+ * diagonals to constrained entries to a <i>positive</i> entry of the same
+ * magnitude as the other entries in the matrix. As a consequence, you need to
+ * set up your problem such that the weak form describing the main matrix
+ * contribution is not <i>negative definite</i>. Otherwise, iterative solvers
+ * such as CG will break down or be considerably slower as GMRES.
+ *
+ * @note While these two ways are <i>equivalent</i>, i.e., the solution of
+ * linear systems computed via either approach is the same, the linear
+ * systems themselves do not necessarily have the same matrix and right
+ * hand side vector entries. Specifically, the matrix diagonal and right hand
+ * side entries corresponding to constrained degrees of freedom may be different
+ * as a result of the way in which we compute them; they are, however, always
+ * chosen in such a way that the solution to the linear system is the same.
+ *
+ * <h4>Condensing matrices and sparsity patterns</h4>
+ *
+ * As mentioned above, the first way of using constraints is to build linear
+ * systems without regards to constraints and then "condensing" them away.
+ * Condensation of a matrix is done in four steps:
+ * - first one builds the
+ *   sparsity pattern (e.g. using DoFTools::make_sparsity_pattern());
+ * - then the sparsity pattern of the condensed matrix is made out of
+ *   the original sparsity pattern and the constraints;
+ * - third, the global matrix is assembled;
+ * - and fourth, the matrix is finally condensed.
+ *
+ * In the condensation process, we are not actually changing the number of
+ * rows or columns of the sparsity pattern, matrix, and vectors. Instead, the
+ * condense functions add nonzero entries to the sparsity pattern of the
+ * matrix (with constrained nodes in it) where the condensation process of the
+ * matrix will create additional nonzero elements. In the condensation process
+ * itself, rows and columns subject to constraints are distributed to the rows
+ * and columns of unconstrained nodes. The constrained degrees of freedom
+ * remain in place. In order not to
+ * disturb the solution process, these rows and columns are filled with zeros
+ * and an appropriate positive value on the main diagonal (we choose an
+ * average of the magnitudes of the other diagonal elements, so as to make
+ * sure that the new diagonal entry has the same order of magnitude as the
+ * other entries; this preserves the scaling properties of the matrix). The
+ * corresponding value in the right hand sides is set to zero. This way, the
+ * constrained node will always get the value zero upon solution of the
+ * equation system and will not couple to other nodes any more.
+ *
+ * Keeping the entries in the matrix has the advantage over creating a new and
+ * smaller matrix, that only one matrix and sparsity pattern is
+ * needed thus less memory is required. Additionally, the condensation process is
+ * less expensive, since not all but only constrained values in the matrix
+ * have to be copied. On the other hand, the solution process will take a bit
+ * longer, since matrix vector multiplications will incur multiplications with
+ * zeroes in the lines subject to constraints. Additionally, the vector size
+ * is larger, resulting in more memory
+ * consumption for those iterative solution methods using a larger number of
+ * auxiliary vectors (e.g. methods using explicit orthogonalization
+ * procedures).
+ * Nevertheless, this process is more efficient due to its lower
+ * memory consumption and is discussed in the first few programs
+ * of the @ref Tutorial , for example in step-6.
+ *
+ * The condensation functions exist for different argument types: SparsityPattern,
+ * SparseMatrix and
+ * BlockSparseMatrix. Note that there are no versions for arguments of type
+ * PETScWrappers::SparseMatrix() or any of the other PETSc or Trilinos
+ * matrix wrapper classes. This is due to the fact that it is relatively
+ * hard to get a representation of the sparsity structure of PETSc matrices,
+ * and to modify them efficiently; this holds in particular, if the matrix is actually
+ * distributed across a cluster of computers. If you want to use
+ * PETSc/Trilinos matrices, you can either copy an already condensed deal.II
+ * matrix, or assemble the PETSc/Trilinos matrix in the already condensed form,
+ * see the discussion below.
+ *
+ *
+ * <h5>Condensing vectors</h5>
+ *
+ * Condensing vectors works exactly as described above for matrices. Note that
+ * condensation is an idempotent operation, i.e. doing it more than once on a
+ * vector or matrix yields the same result as doing it only once: once an
+ * object has been condensed, further condensation operations don't change it
+ * any more.
+ *
+ * In contrast to the matrix condensation functions, the vector condensation
+ * functions exist in variants for PETSc and Trilinos vectors. However,
+ * using them is typically expensive, and should be avoided. You should use
+ * the same techniques as mentioned above to avoid their use.
+ *
+ *
+ * <h4>Avoiding explicit condensation</h4>
+ *
+ * Sometimes, one wants to avoid explicit condensation of a linear system
+ * after it has been built at all. There are two main reasons for wanting to
+ * do so:
+ *
+ * <ul>
+ * <li>
+ * Condensation is an expensive operation, in particular if there
+ * are many constraints and/or if the matrix has many nonzero entries. Both
+ * is typically the case for 3d, or high polynomial degree computations, as
+ * well as for hp finite element methods, see for example the @ref hp_paper
+ * "hp paper". This is the case discussed in the hp tutorial program, @ref
+ * step_27 "step-27", as well as in step-22 and @ref step_31
+ * "step-31".
+ *
+ * <li>
+ * There may not be a ConstraintMatrix::condense() function for the matrix
+ * you use (this is, for example, the case for the PETSc and Trilinos
+ * wrapper classes where we have no access to the underlying representation
+ * of the matrix, and therefore cannot efficiently implement the
+ * ConstraintMatrix::condense() operation). This is the case discussed
+ * in step-17, step-18, step-31, and step-32.
+ * </ul>
+ *
+ * In this case, one possibility is to distribute local entries to the final
+ * destinations right at the moment of transferring them into the global
+ * matrices and vectors, and similarly build a sparsity pattern in the
+ * condensed form at the time it is set up originally.
+ *
+ * The ConstraintMatrix class offers support for these operations as well. For
+ * example, the ConstraintMatrix::add_entries_local_to_global() function adds
+ * nonzero entries to a sparsity pattern object. It not only adds a given
+ * entry, but also all entries that we will have to write to if the current
+ * entry corresponds to a constrained degree of freedom that will later be
+ * eliminated. Similarly, one can use the
+ * ConstraintMatrix::distribute_local_to_global() functions to directly
+ * distribute entries in vectors and matrices when copying local contributions
+ * into a global matrix or vector. These calls make a subsequent call to
+ * ConstraintMatrix::condense() unnecessary. For examples of their use see the
+ * tutorial programs referenced above.
+ *
+ * Note that, despite their name which describes what the function really
+ * does, the ConstraintMatrix::distribute_local_to_global() functions has to
+ * be applied to matrices and right hand side vectors, whereas the
+ * ConstraintMatrix::distribute() function discussed below is applied to the
+ * solution vector after solving the linear system.
+ *
+ *
+ * <h3>Distributing constraints</h3>
+ *
+ * After solving the condensed system of equations, the solution vector has to
+ * be "distributed": the modification to the original linear system that
+ * results from calling ConstraintMatrix::condense leads to a linear system
+ * that solves correctly for all degrees of freedom that are unconstrained but
+ * leaves the values of constrained degrees of freedom undefined. To get the
+ * correct values also for these degrees of freedom, you need to "distribute"
+ * the unconstrained values also to their constrained colleagues. This is done
+ * by the ConstraintMatrix::distribute() function.
+ * The operation of distribution undoes the
+ * condensation process in some sense, but it should be noted that it is not
+ * the inverse operation. Basically, distribution sets the values of the
+ * constrained nodes to the value that is computed from the constraint given
+ * the values of the unconstrained nodes plus possible inhomogeneities.
+ *
+ *
+ * <h3>Treatment of inhomogeneous constraints</h3>
+ *
+ * In case some constraint lines have inhomogeneities (which is typically the
+ * case if the constraint comes from implementation of inhomogeneous boundary
+ * conditions), the situation is a bit more complicated than if the only
+ * constraints were due to hanging nodes alone. This is because the
+ * elimination of the non-diagonal values in the matrix generate contributions
+ * in the eliminated rows in the vector. This means that inhomogeneities can
+ * only be handled with functions that act simultaneously on a matrix and a
+ * vector. This means that all inhomogeneities are ignored in case the
+ * respective condense function is called without any matrix (or if the matrix
+ * has already been condensed before).
+ *
+ * The use of ConstraintMatrix for implementing Dirichlet boundary conditions
+ * is discussed in the step-22 tutorial program. A further example that applies
+ * the ConstraintMatrix is step-41. The situation here is little more complicated,
+ * because there we have some constraints which are not at the boundary.
+ * There are two ways to apply inhomogeneous constraints after creating the
+ * ConstraintMatrix:
+ *
+ * First approach:
+ * - Apply the ConstraintMatrix::distribute_local_to_global() function to the
+ *   system matrix and the right-hand-side with the parameter
+ *   use_inhomogeneities_for_rhs = false (i.e., the default)
+ * - Set the solution to zero in the inhomogeneous constrained components
+ *   using the ConstraintMatrix::set_zero() function (or start with a solution
+ *   vector equal to zero)
+ * - solve() the linear system
+ * - Apply ConstraintMatrix::distribute() to the solution
+ *
+ * Second approach:
+ * - Use the ConstraintMatrix::distribute_local_to_global() function with the parameter
+ *   use_inhomogeneities_for_rhs = true and apply it to
+ *   the system matrix and the right-hand-side
+ * - Set the concerning components of the solution to the inhomogeneous
+ *   constrained values (for example using ConstraintMatrix::distribute())
+ * - solve() the linear system
+ * - Depending on the solver now you have to apply the ConstraintMatrix::distribute()
+ *   function to the solution, because the solver could change the constrained
+ *   values in the solution. For a Krylov based solver this should not be strictly
+ *   necessary, but it is still possible that there is a difference between the
+ *   inhomogeneous value and the solution value in the order of machine precision,
+ *   and you may want to call ConstraintMatrix::distribute() anyway if you have
+ *   additional constraints such as from hanging nodes.
+ *
+ * Of course, both approaches lead to the same final answer but in different
+ * ways. Using the first approach (i.e., when using use_inhomogeneities_for_rhs = false
+ * in ConstraintMatrix::distribute_local_to_global()), the linear system we
+ * build has zero entries in the right hand side in all those places where a
+ * degree of freedom is constrained, and some positive value on the matrix
+ * diagonal of these lines. Consequently, the solution vector of the linear
+ * system will have a zero value for inhomogeneously constrained degrees of
+ * freedom and we need to call ConstraintMatrix::distribute() to give these
+ * degrees of freedom their correct nonzero values.
+ *
+ * On the other hand, in the second approach, the matrix diagonal element and
+ * corresponding right hand side entry for inhomogeneously constrained degrees
+ * of freedom are so that the solution of the linear system already has the
+ * correct value (e.g., if the constraint is that $x_{13}=42$ then row $13$ if
+ * the matrix is empty with the exception of the diagonal entry, and
+ * $b_{13}/A_{13,13}=42$ so that the solution of $Ax=b$ must satisfy
+ * $x_{13}=42$ as desired). As a consequence, we do not need to call
+ * ConstraintMatrix::distribute() after solving to fix up inhomogeneously
+ * constrained components of the solution, though there is also no harm in
+ * doing so.
+ *
+ * There remains the question of which of the approaches to take and why we
+ * need to set to zero the values of the solution vector in the first
+ * approach. The answer to both questions has to do with how iterative solvers
+ * solve the linear system. To this end, consider that we typically stop
+ * iterations when the residual has dropped below a certain fraction of the
+ * norm of the right hand side, or, alternatively, a certain fraction of the
+ * norm of the initial residual. Now consider this:
+ *
+ * - In the first approach, the right hand side entries for constrained
+ *   degrees of freedom are zero, i.e., the norm of the right hand side
+ *   really only consists of those parts that we care about. On the other
+ *   hand, if we start with a solution vector that is not zero in
+ *   constrained entries, then the initial residual is very large because
+ *   the value that is currently in the solution vector does not match the
+ *   solution of the linear system (which is zero in these components).
+ *   Thus, if we stop iterations once we have reduced the initial residual
+ *   by a certain factor, we may reach the threshold after a single
+ *   iteration because constrained degrees of freedom are resolved by
+ *   iterative solvers in just one iteration. If the initial residual
+ *   was dominated by these degrees of freedom, then we see a steep
+ *   reduction in the first step although we did not really make much
+ *   progress on the remainder of the linear system in this just one
+ *   iteration. We can avoid this problem by either stopping iterations
+ *   once the norm of the residual reaches a certain fraction of the
+ *   <i>norm of the right hand side</i>, or we can set the solution
+ *   components to zero (thus reducing the initial residual) and iterating
+ *   until we hit a certain fraction of the <i>norm of the initial
+ *   residual</i>.
+ * - In the second approach, we get the same problem if the starting vector
+ *   in the iteration is zero, since then then the residual may be
+ *   dominated by constrained degrees of freedom having values that do not
+ *   match the values we want for them at the solution. We can again
+ *   circumvent this problem by setting the corresponding elements of the
+ *   solution vector to their correct values, by calling
+ *   ConstraintMatrix::distribute() <i>before</i> solving the linear system
+ *   (and then, as necessary, a second time after solving).
+ *
+ * In addition to these considerations, consider the case where we have
+ * inhomogeneous constraints of the kind $x_{3}=\tfrac 12 x_1 + \tfrac 12$,
+ * e.g., from a hanging node constraint of the form $x_{3}=\tfrac 12 (x_1 +
+ * x_2)$ where $x_2$ is itself constrained by boundary values to $x_2=1$.
+ * In this case, the ConstraintMatrix can of course not figure out what
+ * the final value of $x_3$ should be and, consequently, can not set the
+ * solution vector's third component correctly. Thus, the second approach will
+ * not work and you should take the first.
+ *
+ *
+ * <h3>Dealing with conflicting constraints</h3>
+ *
+ * There are situations where degrees of freedom are constrained in more
+ * than one way, and sometimes in conflicting ways. Consider, for example
+ * the following situation:
+ *     @image html conflicting_constraints.png ""
+ * Here, degree of freedom $x_0$ marked in blue is a hanging node. If we
+ * used trilinear finite elements, i.e. FE_Q(1), then it would carry the
+ * constraint $x_0=\frac 12 (x_{1}+x_{2})$. On the other hand, it is at
+ * the boundary, and if we have imposed boundary conditions
+ * $u|_{\partial\Omega}=g$ then we will have the constraint $x_0=g_0$
+ * where $g_0$ is the value of the boundary function $g(\mathbf x)$ at
+ * the location of this degree of freedom.
+ *
+ * So, which one will win? Or maybe: which one <i>should</i> win? There is
+ * no good answer to this question:
+ * - If the hanging node constraint is the one that is ultimately enforced,
+ *   then the resulting solution does not satisfy boundary
+ *   conditions any more for general boundary functions $g$.
+ * - If it had been done the other way around, the solution would not satisfy
+ *   hanging node constraints at this point and consequently would not
+ *   satisfy the regularity properties of the element chosen (e.g. would not
+ *   be continuous despite using a $Q_1$ element).
+ * - The situation becomes completely hopeless if you consider
+ *   curved boundaries since then the edge midpoint (i.e. the hanging node)
+ *   does in general not lie on the mother edge. Consequently, the solution
+ *   will not be $H^1$ conforming anyway, regardless of the priority of
+ *   the two competing constraints. If the hanging node constraint wins, then
+ *   the solution will be neither conforming, nor have the right boundary
+ *   values.
+ * In other words, it is not entirely clear what the "correct" solution would
+ * be. In most cases, it will not matter much: in either case, the error
+ * introduced either by the non-conformity or the incorrect boundary values
+ * will be at worst at the same order as the discretization's overall error.
+ *
+ * That said, what should you do if you know what you want is this:
+ * - If you want the hanging node constraints to win, then first build
+ *   these through the DoFTools::make_hanging_node_constraints() function.
+ *   Then interpolate the boundary values using
+ *   VectorTools::interpolate_boundary_values() into the same ConstraintMatrix
+ *   object. If the latter function encounters a boundary node that already
+ *   is constrained, it will simply ignore the boundary values at this
+ *   node and leave the constraint untouched.
+ * - If you want the boundary value constraint to win, build the hanging
+ *   node constraints as above and use these to assemble the matrix using
+ *   the ConstraintMatrix::distribute_local_to_global() function (or,
+ *   alternatively, assemble the matrix and then use
+ *   ConstraintMatrix::condense() on it). In a second step, use the
+ *   VectorTools::interpolate_boundary_values() function that returns
+ *   a std::map and use it as input for MatrixTools::apply_boundary_values()
+ *   to set boundary nodes to their correct value.
+ *
+ * Either behavior can also be achieved by building two separate
+ * ConstraintMatrix objects and calling ConstraintMatrix::merge function with
+ * a particular second argument.
+ *
+ *
+ * <h3>Applying constraints indirectly with a LinearOperator</h3>
+ *
+ * Sometimes it is either not desirable, or not possible to directly
+ * condense, or eliminate constraints from a system of linear equations. In
+ * particular if there is no underlying matrix object that could be
+ * condensed (or taken care of constraints during assembly). This is
+ * usually the case if the system is described by a LinearOperator.
+ *
+ * In this case we can solve the modified system
+ * @f[
+ *   (C^T A C + Id_c) \tilde x = C^T (b - A\,k)
+ * @f]
+ * instead [1] (M. S. Shephard. Linear multipoint constraints applied via
+ * transformation as part of a direct stiffness assembly process.
+ * <i>International Journal for Numerical Methods in Engineering</i>
+ * 20(11):2107-2112, 1985).
+ *
+ * Here, $A$ is a given (unconstrained) system matrix for wich we only
+ * assume that we can apply it to a vector but can not necessarily access
+ * individual matrix entries. $b$ is the corresponding right hand side of a
+ * system of linear equations $A\,x=b$. The matrix $C$ describes the
+ * homogeneous part of the linear constraints stored in a ConstraintMatrix
+ * and the vector $k$ is the vector of corresponding inhomogeneities. More
+ * precisely, the ConstraintMatrix::distribute() operation applied on a
+ * vector $x$ is the operation
+ * @f[
+    x \leftarrow C\,x+k.
+ * @f]
+ * And finally, $Id_c$ denotes the identity on the subspace of constrained
+ * degrees of freedom.
+ *
+ * The corresponding solution of $A\,x=b$ that obeys these constraints is
+ * then recovered by distributing constraints: $x=C\tilde x+k$.
+ *
+ * The whole system can be set up and solved with the following snippet of
+ * code:
+ * @code
+ * #include <deal.II/lac/constrained_linear_operator.h>
+ *
+ * // ...
+ *
+ * // system_matrix     - unconstrained and assembled system matrix
+ * // right_hand_side   - unconstrained and assembled right hand side
+ * // constraint_matrix - a ConstraintMatrix object
+ * // solver            - an appropriate, iterative solver
+ * // preconditioner    - a preconditioner
+ *
+ * const auto op_a = linear_operator(system_matrix);
+ * const auto op_amod = constrained_linear_operator(constraint_matrix, op_a);
+ * Vector<double> rhs_mod = constrained_right_hand_side(constraint_matrix,
+ *                                                      op_a,
+ *                                                      right_hand_side);
+ *
+ * solver.solve(op_amod, solution, rhs_mod, preconditioner);
+ * constraint_matrix.distribute(solution);
+ * @endcode
+ */
diff --git a/doc/doxygen/headers/distributed.h b/doc/doxygen/headers/distributed.h
new file mode 100644
index 0000000..bc550c2
--- /dev/null
+++ b/doc/doxygen/headers/distributed.h
@@ -0,0 +1,436 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup distributed Parallel computing with multiple processors using distributed memory
+ * @ingroup Parallel
+ *
+ * @brief A module discussing the use of parallelism on distributed memory
+ * clusters.
+ *
+ * @dealiiVideoLecture{39,41,41.25,41.5}
+ *
+ * <h3>Overview</h3>
+ *
+ * deal.II can use multiple machine connected via MPI to parallelize
+ * computations, in addition to the parallelization within a shared
+ * memory machine discussed in the @ref threads module. There are
+ * essentially two ways to utilize multiple machines:
+ *
+ * - Each machine keeps the entire mesh and DoF handler locally, but
+ *   only a share of the global matrix, sparsity pattern, and solution
+ *   vector is stored on each machine.
+ * - The mesh and DoF handler are also distributed, i.e. each processor
+ *   stores only a share of the cells and degrees of freedom. No
+ *   processor has knowledge of the entire mesh, matrix, or solution,
+ *   and in fact problems solved in this mode are usually so large
+ *   (say, 100s of millions to billions of degrees of freedom) that no
+ *   processor can or should store even a single solution vector.
+ *
+ * The first of these two options is relatively straightforward
+ * because most of the things one wants to do in a finite element
+ * program still work in essentially the same way, and handling
+ * distributed matrices, vectors, and linear solvers is something for
+ * which good external libraries such as Trilinos or
+ * PETSc exist that can make things look almost exactly the
+ * same as they would if everything was available locally. The use of
+ * this mode of parallelization is explained in the tutorial programs
+ * step-17, and step-18 and will not be discussed here in more detail.
+ *
+ * The use of truly distributed meshes is somewhat more complex because it
+ * changes or makes impossible some of the things that can otherwise be done
+ * with deal.II triangulations, DoF handlers, etc. This module documents these
+ * issues with a vantage point at 50,000 ft above ground without going into
+ * too many details. All the algorithms described below are implement in
+ * classes and functions in namespace parallel::distributed.
+ *
+ * One important aspect in parallel computations using MPI is that write
+ * access to matrix and vector elements requires a call to compress() after
+ * the operation is finished and before the object is used (for example read
+ * from). Also see @ref GlossCompress.
+ *
+ * <h4>Other resources</h4>
+ *
+ * A complete discussion of the algorithms used in this namespace, as well as
+ * a thorough description of many of the terms used here, can be found in the
+ * @ref distributed_paper "Distributed Computing paper". In particular, the
+ * paper shows that the methods discussed in this module scale to thousands of
+ * processors and well over a billion degrees of freedom (they may scale to
+ * even bigger problems but at the time of writing this, we do not have
+ * solvers that are capable of more than $2^{31}$ degrees of freedom due to
+ * the use of <code>signed int</code> as index). The paper also gives a
+ * concise definition of many of the terms that are used here and in other
+ * places of the library related to distributed computing.  The step-40
+ * tutorial program shows an application of the classes and methods of this
+ * namespace to the Laplace equation, while step-32 extends the step-31
+ * program to massively parallel computations and thereby explains the use of
+ * the topic discussed here to more complicated applications.
+ *
+ * For a discussion of what we consider "scalable" programs, see
+ * @ref GlossParallelScaling "this glossary entry".
+ *
+ *
+ * <h4>Distributed triangulations</h4>
+ *
+ * In %parallel %distributed mode, objects of type
+ * parallel::distributed::Triangulation on each processor only store
+ * a subset of cells. In particular, the global mesh can be thought of
+ * as decomposed so that each MPI process "owns" a number of
+ * cells. The mesh each process then stores locally consists of
+ * exactly those cells that it owns, as well as one layer of @ref
+ * GlossGhostCell "ghost cells" around the ones it locally owns, and a
+ * number of cells we call @ref GlossArtificialCell "artificial". The
+ * latter are cells that ensure that each processor has a mesh that
+ * has all the coarse level cells and that respects the invariant that
+ * neighboring cells can not differ by more than one level of
+ * refinement. The following pictures show such a mesh, %distributed
+ * across four processors, and the collection of cells each of these
+ * processors stores locally:
+ *
+ * <table align="center">
+ *   <tr>
+ *     <td> @image html distributed_mesh_0.png </td>
+ *     <td> @image html distributed_mesh_1.png </td>
+ *   </tr>
+ *   <tr>
+ *     <td> @image html distributed_mesh_2.png </td>
+ *     <td> @image html distributed_mesh_3.png </td>
+ *   </tr>
+ * </table>
+ *
+ * The cells are colored based on the @ref GlossSubdomainId "subdomain id",
+ * which identifies which processor owns a cell: turquoise for
+ * processor 0, green for processor 1, yellow for processor 2, and red
+ * for processor 3. As can be seen, each process has one layer of
+ * ghost cells around its own cells, which are correctly colored by
+ * the subdomain id that identifies the processor that owns each of
+ * these cells. Note also how each processor stores a number of
+ * artificial cells, indicated in blue, that only exist to ensure that
+ * each processor knows about all coarse grid cells and that the
+ * meshes have the 2:1 refinement property; however, in the area
+ * occupied by these artificial cells, a processor has no knowledge
+ * how refined the mesh there really is, as these are areas that are
+ * owned by other processors. As a consequence, all algorithms we will
+ * develop can only run over the locally owned cells and if necessary
+ * the ghost cells; trying to access data on any of the artificial
+ * cells is most likely an error. Note that we can determine whether
+ * we own a cell by testing that <code>cell-@>subdomain_id() ==
+ * triangulation.locally_owned_subdomain()</code>.
+ *
+ * The "real" mesh one has to think of here is the one that would
+ * result from forming the union of cells each of the processes own,
+ * i.e. from the overlap of the turquoise, green, yellow and red
+ * areas, disregarding the blue areas.
+ *
+ * @note The decomposition of this "real" mesh into the pieces stored
+ *   by each processes is provided by the <a href="http://www.p4est.org">p4est</a>
+ *   library. p4est stores the complete mesh in a distributed data structure
+ *   called a parallel forest (thus the name). A parallel forest consists of
+ *   quad-trees (in 2d) or oct-trees (in 3d) originating in each
+ *   coarse mesh cell and representing the refinement structure
+ *   from parent cells to their four (in 2d) or eight (in 3d)
+ *   children. Internally, this parallel forest is represented by
+ *   a (distributed) linear array of cells that corresponds to a
+ *   depth-first traverse of each tree, and each process then stores
+ *   a contiguous section of this linear array of cells. This results
+ *   in partitions such as the one shown above that are not optimal
+ *   in the sense that they do not minimize the length of the
+ *   interface between subdomains (and consequently do not minimize
+ *   the amount of communication) but that in practice are very
+ *   good and can be manipulated with exceedingly fast algorithms.
+ *   The efficiency of storing and manipulating cells in this way
+ *   therefore often outweighs the loss in optimality of communication.
+ *   (The individual subdomains resulting from this method of
+ *   partitioning may also sometimes consist of disconnected
+ *   parts, such as shown at the top right. However, it can be
+ *   proven that each subdomain consists of at most two disconnected
+ *   pieces; see C. Burstedde, T. Isaac: "Morton curve segments produce
+ *   no more than two distinct face-connected subdomains",
+ *   <a href="http://arxiv.org/abs/1505.05055>arXiv 1505.05055</a>,
+ *   2015.)
+ *
+ *
+ * <h4>Distributed degree of freedom handler</h4>
+ *
+ * The DoFHandler class builds on the Triangulation class, but it can
+ * detect whenever we actually use an object of type
+ * parallel::distributed::Triangulation as triangulation. In that
+ * case, it assigns global %numbers for all degrees of freedom that
+ * exist, given a finite element, on the global mesh, but each
+ * processor will only know about those that are defined on locally
+ * relevant cells (i.e. cells either locally owned or that are ghost
+ * cells). Internally, the algorithm essentially works by just looping
+ * over all cells we own locally and assigning DoF indices to the
+ * degrees of freedom defined on them and, in the case of degrees of
+ * freedom at the interface between subdomains owned by different
+ * processors, that are not owned by the neighboring processor. All
+ * processors then exchange how many degrees of freedom they locally
+ * own and shift their own indices in such a way that every degree of
+ * freedom on all subdomains are uniquely identified by an index
+ * between zero and DoFHandler::n_dofs() (this function returns the
+ * global number of degrees of freedom, accumulated over all
+ * processors). Note that after this step, the degrees of freedom
+ * owned by each process form a contiguous range that can, for
+ * example, be obtained by the contiguous index set returned by
+ * DoFHandler::locally_owned_dofs(). After
+ * assigning unique indices to all degrees of freedom, the
+ * DoFHandler::distribute_dofs() function then
+ * loops over all ghost cells and communicates with neighboring
+ * processors to ensure that the global indices of degrees of freedom
+ * on these ghost cells match the ones that the neighbor has assigned
+ * to them.
+ *
+ * Through this scheme, we can make sure that each cell we locally own
+ * as well as all the ghost cells can be asked to yield the globally
+ * correct indices for the degrees of freedom defined on
+ * them. However, asking for degrees of freedom on artificial cells is
+ * likely going to lead to nothing good, as no information is
+ * available for these cells (in fact, it isn't even known whether
+ * these cells are active on the global mesh, or are further refined).
+ *
+ * As usual, degrees of freedom can be renumbered after being enumerated,
+ * using the functions in namespace DoFRenumbering.
+ *
+ *
+ * <h4>Linear systems for %distributed computations</h4>
+ *
+ * One thing one learns very quickly when working with very large
+ * numbers of processors is that one can not store information about
+ * every degree of freedom on each processor, even if this information
+ * is "this degree of freedom doesn't live here". An example for this
+ * is that we can create an object for a (compressed) sparsity pattern
+ * that has DoFHandler::n_dofs() rows,
+ * but for which we fill only those rows that correspond to the
+ * DoFHandler::n_locally_owned_dofs() locally
+ * owned degrees of freedom. The reason is simple: for the sake of
+ * example, let's assume we have 1 billion degrees of freedom
+ * distributed across 100 processors; if we even only hold 16 bytes
+ * per line in this sparsity pattern (whether we own the corresponding
+ * DoF or not), we'll need 16 GB for this object even if every single
+ * line is empty. Of course, only 10 million lines will be non-empty,
+ * for which we need 160 MB plus whatever is necessary to store the
+ * actual column indices of nonzero entries. Let's say we have a
+ * moderately complex problem with 50 entries per row, for each of
+ * which we store the column index worth 4 bytes, then we'll need 216
+ * bytes for each of the 10 million lines that correspond to the
+ * degrees of freedom we own, for a total of 2.16 GB. And we'll need
+ * 16 bytes for each of the 990 million lines that we don't own, for a
+ * total of 15.840 GB. It is clear that this ratio doesn't become any
+ * better if we go to even higher %numbers of processors.
+ *
+ * The solution to this problem is to really only use any memory at
+ * all for those parts of the linear system that we own, or need for
+ * some other reason. For all other parts, we must know that they
+ * exist, but we can not set up any part of our data structure. To
+ * this end, there exists a class called IndexSet that denotes a set
+ * of indices which we care for, and for which we may have to allocate
+ * memory. The data structures for sparsity patterns, constraint
+ * matrices, matrices and vector can be initialized with these
+ * IndexSet objects to really only care for those rows or entries that
+ * correspond to indices in the index set, and not care about all
+ * others. These objects will then ask how many indices exist in the
+ * set, allocate memory for each one of them (e.g. initialize the data
+ * structures for a line of a sparsity pattern), and when you want to
+ * access data for global degree of freedom <code>i</code> you will be
+ * redirected to the result of calling IndexSet::index_within_set()
+ * with index <code>i</code> instead. Accessing data for elements
+ * <code>i</code> for which IndexSet::is_element() is false will yield
+ * an error.
+ *
+ * The remaining question is how to identify the set of indices that
+ * correspond to degrees of freedom we need to worry about on each
+ * processor. To this end, you can use the
+ * DoFTools::extract_locally_owned_dofs() function to get at all the
+ * indices a processor owns. Note that this is a subset of the degrees
+ * of freedom that are defined on the locally owned cells (since some
+ * of the degrees of freedom at the interface between two different
+ * subdomains may be owned by the neighbor). This set of degrees of
+ * freedom defined on cells we own can be obtained using the function
+ * DoFTools::extract_locally_active_dofs(). Finally, one
+ * sometimes needs the set of all degrees of freedom on the locally
+ * owned subdomain as well as the adjacent ghost cells. This
+ * information is provided by the
+ * DoFTools::extract_locally_relevant_dofs() function.
+ *
+ *
+ * <h5>Vectors with ghost elements</h5>
+ *
+ * A typical parallel application is dealing with two different kinds
+ * of parallel vectors: vectors with ghost elements (also called
+ * ghosted vectors) and vectors without ghost elements.  (Both
+ * kinds can typically be represented by the same data type, but there
+ * are of course different vector types that can each represent both flavors:
+ * for example TrilinosWrappers::MPI::Vector, PETScWrappers::Vector, and
+ * BlockVector objects built on these).
+ * You can find a discussion of what distinguishes these kinds of vectors
+ * in the @ref GlossGhostedVector "glossary entry on ghosted vectors".
+ *
+ * From a usage point of view, ghosted vectors are typically used for
+ * data output, postprocessing, error estimation, input in
+ * integration. This is because in these operations, one typically
+ * needs access not only to @ref GlossLocallyOwnedDof "locally owned dofs"
+ * but also to @ref GlossLocallyActiveDof "locally active dofs"
+ * and sometimes to @ref GlossLocallyRelevantDof "locally relevant dofs",
+ * and their values may not be stored in non-ghosted vectors on the
+ * processor that needs them. The operations listed above also only
+ * require read-only access to vectors, and ghosted vectors are therefore
+ * usable in these contexts.
+ *
+ * On the other hand, vectors without ghost entries are used in all
+ * other places like assembling, solving, or any other form of
+ * manipulation. These are typically write-only operations and
+ * therefore need not have read access to vector elements that may be
+ * owned by another processor.
+ *
+ * You can copy between vectors with and without ghost
+ * elements (you can see this in step-40 and step-32) using operator=.
+ *
+ *
+ * <h5>Sparsity patterns</h5>
+ *
+ * At the time of writing this, the only class equipped to deal with the
+ * situation just explained is DynamicSparsityPattern. A version of
+ * the function DynamicSparsityPattern::reinit() exists that takes an
+ * IndexSet argument that indicates which lines of the sparsity pattern to
+ * allocate memory for. In other words, it is safe to create such an object
+ * that will report as its size 1 billion, but in fact only stores only as
+ * many rows as the index set has elements. You can then use the usual
+ * function DoFTools::make_sparsity_pattern to build the sparsity pattern that
+ * results from assembling on the locally owned portion of the mesh. The
+ * resulting object can be used to initialize a PETSc or Trilinos matrix which
+ * support very large object sizes through completely distributed storage. The
+ * matrix can then be assembled by only looping over those cells owned by the
+ * current processor.
+ *
+ * The only thing to pay attention to is for which degrees of freedom the
+ * sparsity needs to store entries. These are, in essence, the ones we could
+ * possibly store values to in the matrix upon assembly. It is clear that
+ * these are certainly the locally active degrees of freedom (which live on
+ * the cells we locally own) but through constraints, it may also be possible
+ * to write to entries that are located on ghost cells. Consequently, you need
+ * to pass the index set that results from
+ * DoFTools::extract_locally_relevant_dofs() upon initializing the sparsity
+ * pattern.
+ *
+ *
+ * <h4>Constraints on degrees of freedom</h4>
+ *
+ * When creating the sparsity pattern as well as when assembling the linear
+ * system, we need to know about constraints on degrees of freedom, for
+ * example resulting from hanging nodes or boundary conditions. Like the
+ * DynamicSparsityPattern class, the ConstraintMatrix can also take
+ * an IndexSet upon construction that indicates for which of the possibly very
+ * large number of degrees of freedom it should actually store
+ * constraints. Unlike for the sparsity pattern, these are now only those
+ * degrees of freedom which we work on locally when assembling, namely those
+ * returned by DoFTools::extract_locally_active_dofs() (a superset of the
+ * locally owned ones).
+ *
+ * There are, however, situations where more complicated constraints appear in
+ * finite element programs. An example is in $hp$ adaptive computations where
+ * degrees of freedom can be constrained against other degrees of freedom that
+ * are themselves constrained. In a case like this, in order to fully resolve
+ * this chain of constraints, it may not be sufficient to only store
+ * constraints on locally active degrees of freedom but one may also need to
+ * have constraints available on locally relevant ones. In that case, the
+ * ConstraintMatrix object needs to be initialized with the IndexSet produced
+ * by DoFTools::extract_locally_relevant_dofs() .
+ *
+ * In general, your program will continue to do something if you happen to not
+ * store all necessary constraints on each processor: you will just generate
+ * wrong matrix entries, but the program will not abort. This is opposed to
+ * the situation of the sparsity pattern: there, if the IndexSet passed to the
+ * DynamicSparsityPattern indicates that it should store too few rows
+ * of the matrix, the program will either abort when you attempt to write into
+ * matrix entries that do not exist or the matrix class will silently allocate
+ * more memory to accommodate them. As a consequence, it is useful to err on
+ * the side of caution when indicating which constraints to store and use the
+ * result of DoFTools::extract_locally_relevant_dofs() rather than
+ * DoFTools::extract_locally_active_dofs() . This is also affordable since the
+ * set of locally relevant degrees of freedom is only marginally larger than
+ * the set of locally active degrees of freedom. We choose this strategy in
+ * both step-32 and step-40.
+ *
+ *
+ * <h4>Postprocessing</h4>
+ *
+ * Like everything else, you can only do postprocessing on cells a
+ * local processor owns. The DataOut and KellyErrorEstimator classes
+ * do this automatically: they only operate on locally owned cells
+ * without the need to do anything in particular. At least for large
+ * computations, there is also no way to merge the results of all
+ * these local computations on a single machine, i.e. each processor
+ * has to be self-sufficient. For example, each processor has to
+ * generate its own parallel output files that have to be visualized
+ * by a program that can deal with multiple input files rather than
+ * merging the results of calling DataOut to a single processor before
+ * generating a single output file. The latter can be achieved, for
+ * example, using the DataOutBase::write_vtu() and
+ * DataOutBase::write_pvtu_record() functions.
+ *
+ * These same considerations hold for all other postprocessing actions
+ * as well: while it is, for example, possible to compute a global
+ * energy dissipation rate by doing the computations locally and
+ * accumulating the resulting single number processor to a single
+ * number for the entire communication, it is in general not possible
+ * to do the same if the volume of data produced by every processor is
+ * significant.
+ *
+ * There is one particular consideration for postprocessing, however: whatever
+ * you do on each cell a processor owns, you need access to at least all those
+ * values of the solution vector that are active on these cells (i.e. to the
+ * set of all <i>locally active degrees of freedom</i>, in the language of the
+ * @ref distributed_paper "Distributed Computing paper"), which is a superset
+ * of the degrees of freedom this processor actually owns (because it may not
+ * own all degrees of freedom on the interface between its own cells and those
+ * cells owned by other processors). Sometimes, however, you need even more
+ * information: for example, to compute the KellyErrorIndicator results, one
+ * needs to evaluate the gradient at the interface on the current as well as
+ * its neighbor cell; the latter may be owned by another processor, so we need
+ * those degrees of freedom as well. In general, therefore, one needs access
+ * to the solution values for all degrees of freedom that are <i>locally
+ * relevant</i>. On the other hand, both of the packages we can use for
+ * parallel linear algebra (PETSc and Trilinos) subdivide vectors into chunks
+ * each processor owns and chunks stored on other processors. To postprocess
+ * stuff therefore means that we have to tell PETSc or Trilinos that it should
+ * also import <i>ghost elements</i>, i.e. additional vector elements of the
+ * solution vector other than the ones we store locally. Both the
+ * PETScWrappers::MPI::Vector and TrilinosWrappers::MPI::Vector class support
+ * specifying this information (see step-40 and step-32, respectively) through
+ * the PETScWrappers::MPI::Vector::update_ghost_values() function or, in the
+ * case of Trilinos, construction of a vector with an the locally relevant
+ * degrees of freedom index set.
+ */
+
+
+
+namespace parallel
+{
+  /**
+   * A namespace for class and
+   * functions that support %parallel
+   * computing on %distributed memory
+   * machines. See the @ref
+   * distributed module for an
+   * overview of the facilities this
+   * namespace offers.
+   *
+   * @ingroup distributed
+   */
+  namespace distributed
+  {
+  }
+}
diff --git a/doc/doxygen/headers/dofs.h b/doc/doxygen/headers/dofs.h
new file mode 100644
index 0000000..36f5e9c
--- /dev/null
+++ b/doc/doxygen/headers/dofs.h
@@ -0,0 +1,63 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup dofs Degrees of Freedom
+ *
+ * This module groups classes and namespaces that have to do with
+ * handling degrees of freedom. The central class of this group is the
+ * DoFHandler class: it is built on top of a triangulation and a
+ * finite element class and allocated degrees of freedom on each cell
+ * of the triangulation as required for the finite element space
+ * described by the finite element object. There are other variants of
+ * the DoFHandler class such as hp::DoFHandler that do similar
+ * things for more special cases.
+ *
+ * DoFHandler objects are used together with objects of type FiniteElement
+ * (or hp::FECollection in the case of hp::DoFHandler) to enumerate all the
+ * degrees of freedom that exist in a triangulation for this particular
+ * finite element. As such, the combination of mesh, finite element, and
+ * DoF handler object can be thought of as providing a <i>basis</i> of
+ * the finite element space: the mesh provides the locations at which basis
+ * functions are defined; the finite element describes what kinds of basis
+ * functions exist; and the DoF handler object provides an enumeration of
+ * the basis, i.e., it is provides a concrete structure of the space so that
+ * we can describe functions in this finite dimensional space by vectors
+ * of coefficients.
+ *
+ * DoFHandlers extend Triangulation objects (and the other classes in the @ref
+ * grid module) in that they, too, offer iterators that run over all cells,
+ * faces, or other geometric objects that make up a triangulation. These
+ * iterators are derived from the triangulation iterators and therefore offer
+ * the same functionality, but they also offer additional functions. For
+ * example, they allow to query the indices of the degrees of freedom
+ * associated with the present cell. Note that DoFHandler classes are <i>not
+ * derived</i> from Triangulation, though they use Triangulation objects;
+ * the reason is that there can be more than one DoFHandler object that works
+ * on the same Triangulation object.
+ *
+ * In addition to the DoF handler classes, this module holds a number of
+ * auxiliary classes not commonly used in application programs, as well as
+ * three classes that are not directly associated with the data structures of
+ * the DoFHandler class. The first of these is the ConstraintMatrix class that
+ * stores and treats the constraints associated with hanging nodes. Secondly,
+ * the DoFRenumbering namespace offers functions that can reorder degrees of
+ * freedom; among its functions are ones that sort degrees of freedom in
+ * downstream direction, for example, and ones that sort degrees of freedom in
+ * such a way that the bandwidth of associated matrices is minimized. Finally,
+ * the DoFTools namespace offers a variety of algorithms around handling
+ * degrees of freedom.
+ */
diff --git a/doc/doxygen/headers/exceptions.h b/doc/doxygen/headers/exceptions.h
new file mode 100644
index 0000000..5cc0239
--- /dev/null
+++ b/doc/doxygen/headers/exceptions.h
@@ -0,0 +1,355 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Exceptions Exceptions and assertions
+ *
+ * This module contains classes that are used in the exception mechanism of
+ * deal.II.
+ *
+ * <h2>Brief overview</h2>
+ * 
+ * Exceptions are used in two different ways:
+ * <ul>
+ * 
+ *   <li> Static assertions: These are checks that are only enabled in debug
+ *   mode, not in optimized (or production) mode. They are meant to check that
+ *   parameters to functions satisfy certain properties and similar
+ *   assertions. For example, static assertions are used to make sure that two
+ *   vectors that are added together have the same number of components --
+ *   everything else would not make any sense anyway.
+ *
+ *   Such checks are performed by the Assert macro in several thousand places
+ *   within the library. Also, several tutorial programs starting with step-5
+ *   show how to do this.
+ *
+ *   If a static assertion is violated, the exception mechanism generates an
+ *   exception of a type that indicates what exactly goes wrong, displays
+ *   appropriate information, and then aborts the program -- if you try to add
+ *   two vectors of different length, there is nothing that can be done within
+ *   the program to cope with the situation, you have to go fix the program
+ *   code instead. The exceptions of this module are used to indicate the
+ *   reason for the failure.
+ *
+ *
+ *   <li> Dynamic assertions: These are used to check dynamic features, such
+ *   as whether an output file can be written to. These are things that can't
+ *   be checked statically, i.e. they may change from program run to program
+ *   run. It is therefore insufficient to only check these situations in debug
+ *   mode.
+ *
+ *   Rather, one has to check them every time during execution of a
+ *   program. Within deal.II, this is done using the AssertThrow macro
+ *   introduced in step-9, step-13, and
+ *   following tutorial programs. The macro checks a condition, and if
+ *   violated throws an exception of one of the types declared in this
+ *   module, using the C++ <code>throw</code> mechanism. Since these
+ *   are run-time exceptions, this gives the program the chance to
+ *   catch the exception and, for example, write the output to a
+ *   writable file instead.
+ * </ul>
+ *
+ *
+ * <h2>Detailed description</h2>
+ *
+ *  The error handling mechanism in <tt>deal.II</tt> is generally used in two ways.
+ *  The first uses error checking in debug mode only and is useful for programs
+ *  which are not fully tested. When the program shows no errors anymore, one may
+ *  switch off error handling and get better performance by this, since checks
+ *  for errors are done quite frequently in the library (a typical speed up is
+ *  a factor of four!). This mode of exception generation is most useful for
+ *  internal consistency checks such as range checking or checking of the
+ *  validity of function arguments. Errors of this kind usually are programming
+ *  errors and the program should abort with as detailed a message as possible,
+ *  including location and reason for the generation of the exception.
+ *
+ *  The second mode is for error checks which should always be on, such as for
+ *  I/O errors, failing memory requests and the like. It does not make much
+ *  sense to turn this mode off, since this kind of errors may happen in tested
+ *  and untested programs likewise. Exceptions of this kind do not terminate the
+ *  program, rather they throw exceptions in the <tt>C++</tt> manner, allowing the
+ *  program to catch them and eventually do something about it. As it may be
+ *  useful to have some information printed out if an exception could not be
+ *  handled properly, additional information is passed along as for the first
+ *  mode. The latter makes it necessary to provide a family of macros which
+ *  enter this additional information into the exception class; this could
+ *  in principle be done by the programmer himself each time by hand, but since
+ *  the information can be obtained automatically, a macro is provided for
+ *  this.
+ *
+ *  Both modes use exception classes, which need to have special features
+ *  in additional to the <tt>C++</tt> standard's <tt>std::exception</tt> class.
+ *  Such a class is declared by the following lines of code:
+ *  @code
+ *     DeclException2 (ExcDomain, int, int,
+ *                     << "Index= " << arg1 << "Upper Bound= " << arg2);
+ *  @endcode
+ *  
+ *  This declares an exception class named <tt>ExcDomain</tt>, which
+ *  has two variables as additional information (named <tt>arg1</tt>
+ *  and <tt>arg2</tt> by default) and which outputs the given sequence
+ *  (which is appended to an <tt>std::ostream</tt> variable's name,
+ *  thus the weird syntax). There are other <tt>DeclExceptionN</tt>
+ *  macros for exception classes with more or no parameters. By
+ *  convention, the name of all exception classes starts with
+ *  <tt>Exc...</tt> and most of them are declared locally to the class
+ *  it is to be used in (a few very frequently found ones are also
+ *  declared in the StandardExceptions namespace and are available
+ *  everywhere). Declaring exceptions globally is possible but
+ *  pollutes the global namespace, is less readable and most of the time
+ *  unnecessary.
+ *
+ *  Since exception classes are declared the same way for both modes
+ *  of error checking, it is possible to use an exception declared
+ *  through the <tt>DeclExceptionN(...)</tt> macro family for both
+ *  static as well as dynamic checks.
+ *
+ *
+ *  <h3>Use of the debug mode exceptions (static checks)</h3>
+ *
+ *  To use the exception mechanism for debug mode error checking, write lines
+ *  like the following in your source code:
+ *  @code
+ *    Assert (n<dim, ExcDomain(n,dim));
+ *  @endcode
+ *  which by macro expansion does essentially the following:
+ *  @code
+ *    #ifdef DEBUG
+ *        if (!(cond))
+ *              issue error of class ExcDomain(n,dim)
+ *    #else
+ *        do nothing
+ *    #endif
+ *  @endcode
+ *  i.e. it issues an error only if the preprocessor variable
+ *  <tt>DEBUG</tt> is set and if the given condition (in this case
+ *  <tt>n < dim</tt> is violated).
+ *
+ *  If the exception was declared using the <tt>DeclException0 (...)</tt>
+ *  macro, i.e. without any additional parameters, its name has
+ *  nonetheless to be given with parentheses:
+ *  <tt>Assert (i>m, ExcSomewhat());</tt>
+ *
+ *  <h4>How it works internally</h4>
+ *
+ *  If the <tt>DEBUG</tt> preprocessor directive is set, the call <tt>Assert
+ *  (cond, exc);</tt> is basically converted by the preprocessor into the
+ *  following sequence:
+ *  @code
+ *    if (!(cond))
+ *      deal_II_exceptions::internals::issue_error_assert_1
+ *             (__FILE__,
+ *              __LINE__,
+ *              __PRETTY_FUNCTION__,
+ *              #cond,
+ *              #exc,
+ *              &exc);
+ *  @endcode
+ *  
+ *  (Note that function names and exact calling sequences may change
+ *  over time, but the general principle remains the same.) I.e., if
+ *  the given condition is violated, then the file and line in which
+ *  the exception occurred as well as the condition itself and the call
+ *  sequence of the exception object is passed to the
+ *  deal_II_exceptions::internals::issue_error_assert_1()
+ *  function. Additionally an object of the form given by <tt>exc</tt>
+ *  is created (this is normally an unnamed object like in
+ *  <tt>ExcDomain (n, dim)</tt> of class <tt>ExcDomain</tt>) and
+ *  transferred to this function.
+ *
+ *  <tt>__PRETTY__FUNCTION__</tt> is a macro defined by some compilers and
+ *  gives the name of the function. If another compiler is used, we
+ *  try to set this function to something reasonable, if the compiler
+ *  provides us with that, and <tt>"(not available)"</tt> otherwise.
+ *
+ *  In <tt>issue_error_assert</tt>, the given data is transferred into
+ *  the <tt>exc</tt> object by calling the set_fields() function;
+ *  after that, the general error info is printed onto
+ *  <tt>std::cerr</tt> using the PrintError() function of <tt>exc</tt>
+ *  and finally the exception specific data is printed using the user
+ *  defined function PrintError() (which is normally created using the
+ *  <tt>DeclException (...)</tt> macro family. If it can be obtained
+ *  from the operating system, the output may also contain a
+ *  stacktrace to show where the error happened. Several of the
+ *  @ref Tutorial programs show a typical output.
+ *
+ *  After printing all this information,
+ *  deal_II_exceptions::internals::abort() is called (with one
+ *  exception, see the end of this section). This terminates the
+ *  program, which is the right thing to do for this kind of error
+ *  checking since it is used to detect programming errors rather than
+ *  run-time errors; a program can, by definition, not recover from
+ *  programming errors.
+ *
+ *  If the preprocessor variable <tt>DEBUG</tt> is not set, then nothing
+ *  happens, i.e. the <tt>Assert</tt> macro is expanded to <tt>{}</tt>.
+ *
+ *  Sometimes, there is no useful condition for an exception other
+ *  than that the program flow should not have reached a certain point,
+ *  e.g. a <tt>default</tt> section of a <tt>switch</tt> statement. In this case,
+ *  raise the exception by the following construct:
+ *  @code
+ *    Assert (false, ExcInternalError());
+ *  @endcode
+ *  See the step-7 and several other of the tutorial programs for
+ *  a use of this construct.
+ *
+ *  As mentioned above, the program is terminated once a call to
+ *  <tt>Assert</tt> fails. However, there is one case where we do not want
+ *  to do this, namely when a C++ exception is active. The usual case
+ *  where this happens is that someone throws an exception through the
+ *  <tt>AssertThrow</tt> mechanism (see below) which, while the stack is
+ *  unwound, leads to the destruction of other objects in stack frames
+ *  above. If other objects refer to the objects being thus destroyed,
+ *  some destructors raise an exception through <tt>Assert</tt>. If we
+ *  would abort the program then, we would only ever see the message
+ *  that an object is being destroyed which is still referenced from
+ *  somewhere, but we would never see the original exception that
+ *  triggered this. (You can see it in the debugger by putting a break
+ *  point on the function <tt>__throw</tt>, but you cannot see it from the
+ *  program itself.) In that case, we use a C++ standard library
+ *  function to detect the presence of another active exception and do
+ *  not terminate the program to allow that the thrown exception
+ *  propagates to some place where its message can be displayed.
+ *
+ *  Since it is common that one failed assertion leads to a whole
+ *  chain of others, we only ever print the very first message. If the
+ *  program is then aborted, that is no problem. If it is not (since a
+ *  C++ exception is active), only the first is displayed and a
+ *  message about suppressed follow-up messages is shown.
+ *
+ *
+ *  <h3>Use of run-time exceptions</h3>
+ *
+ *  For this mode, the standard <tt>C++</tt> <tt>throw</tt> and <tt>catch</tt>
+ *  concept exists. We
+ *  want to keep to this, but want to extend it a bit. In general, the
+ *  structure is the same, i.e. you normally raise and exception by
+ *  @code
+ *    if (!(cond))
+ *      throw ExcSomething();
+ *  @endcode
+ *  and catch it using the statement
+ *  @code
+ *    try {
+ *      do_something ();
+ *    }
+ *    catch (exception &e) {
+ *      std::cerr << "Exception occurred:" << std::endl
+ *           << e.what ()
+ *           << std::endl;
+ *      do_something_to_reciver ();
+ *    };
+ *  @endcode
+ *  <tt>exception</tt> is a standard <tt>C++</tt> class providing basic functionality for
+ *  exceptions, such as the virtual function <tt>what()</tt> which returns some
+ *  information on the exception itself. This information is useful if an
+ *  exception can't be handled properly, in which case as precise a description
+ *  as possible should be printed.
+ *
+ *  The problem here is that to get significant and useful information out
+ *  of <tt>what()</tt>, it is necessary to overload this function in out exception
+ *  class and call the <tt>throw</tt> operator with additional arguments to the
+ *  exception class. The first thing, overloading the <tt>what</tt> function is
+ *  done using the <tt>DeclExceptionN</tt> macros, but putting the right information,
+ *  which is the same as explained above for the <tt>Assert</tt> expansion, requires
+ *  some work if one would want to write it down each time:
+ *  @code
+ *    if (!(cond))
+ *      {
+ *        ExcSomething e(additional information);
+ *        e.set_fields (__FILE__, __LINE__, __PRETTY_FUNCTION__,
+ *                      "condition as a string",
+ *                      "name of condition as a string");
+ *        throw e;
+ *      };
+ *  @endcode
+ *
+ *  For this purpose, the macro <tt>AssertThrow</tt> was invented. It does
+ *  mainly the same job as does the <tt>Assert</tt> macro, but it does not
+ *  kill the program, it rather throws an exception as shown above. The mode
+ *  of usage is
+ *  @code
+ *    AssertThrow (cond, ExcSomething(additional information));
+ *  @endcode
+ *
+ *  The condition to be checked is incorporated into the macro in order to
+ *  allow passing the violated condition as a string. The expansion of the
+ *  <tt>AssertThrow</tt> macro is not affected by the <tt>DEBUG</tt>
+ *  preprocessor variable.
+ *
+ *
+ *  <h3>Description of the DeclExceptionN macro family</h3>
+ *
+ *  There is a whole family of <tt>DeclExceptionX</tt> macros
+ *  where <tt>X</tt> is to be replaced by the number of additional
+ *  parameters (0 to 5 presently).
+ *  These macros are used to declare exception classes in the following
+ *  way:
+ *  @code
+ *    DeclException2 (ExcDomain,
+ *                    int,
+ *                    int,
+ *                    << " i=" << arg1 << ", m=" << arg2);
+ *  @endcode
+ *  The first argument denotes the name of the exception class to be created.
+ *  The next arguments are the types of the parameters (in this
+ *  case there two types, corresponding to the <tt>X</tt> in
+ *  <tt>DeclExceptionX</tt>) and finally the output
+ *  sequence with which you can print additional information.
+ *  
+ *  The syntax of the output sequence is a bit weird but gets
+ *  clearer once you see how this macro is defined (again schematically, actual
+ *  function names and definitions may change over time and be different):
+ *  @code
+ *  class name : public ExceptionBase {
+ *    public:
+ *      name (const type1 a1, const type2 a2) :
+ *                     arg1 (a1), arg2(a2) {};
+ *      virtual void print_info (std::ostream &out) const {
+ *        out outsequence << std::endl;
+ *      };
+ *    private:
+ *      type1 arg1;
+ *      type2 arg2;
+ *  };
+ *  @endcode
+ *   
+ *  If declared as specified, you can later use this exception class
+ *  in the following manner:
+ *  @code
+ *    int i=5;
+ *    int m=3;
+ *    Assert (i<m, MyExc2(i,m));
+ *  @endcode
+ *  and the output if the condition fails will be
+ *  @code
+ *    --------------------------------------------------------
+ *    An error occurred in line <301> of file <exc-test.cc>.
+ *    The violated condition was: 
+ *      i<m
+ *    The name and call sequence of the exception was:
+ *      MyExc2(i,m)
+ *    Additional Information: 
+ *      i=5, m=3
+ *    --------------------------------------------------------
+ *  @endcode
+ *  
+ *  Obviously for the <tt>DeclException0(name)</tt> macro, no types and
+ *  also no output sequence is allowed.
+ *
+ * @author Wolfgang Bangerth, 1998-2006
+ */
diff --git a/doc/doxygen/headers/fe.h b/doc/doxygen/headers/fe.h
new file mode 100644
index 0000000..af7c19d
--- /dev/null
+++ b/doc/doxygen/headers/fe.h
@@ -0,0 +1,203 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup feall Finite elements
+ *
+ * All classes related to shape functions and to access to shape
+ * functions.  This concerns the actual values of finite elements. For
+ * the numbering of degrees of freedom refer to the module on @ref dofs.
+ *
+ * The classes and functions of this module fall into several sub-groups that
+ * are discussed in their respective sub-modules listed above. In addition,
+ * the FETools class provides functions that provide information on finite
+ * elements, transformations between elements, etc.
+ */
+
+
+/**
+ * @defgroup febase Base classes
+ *
+ * The members of this sub-module describe the implementational mechanics of
+ * finite element classes, without actually implementing a concrete
+ * element. For example, the FiniteElement base class declares the virtual
+ * functions a derived class has to implement if it wants to describe a finite
+ * element space. Likewise, the FiniteElementData holds variables that
+ * describe certain values characterizing a finite element, such as the number
+ * of degrees of freedom per vertex, line, or face.
+ *
+ * On the other hand, classes like FE_Poly and FE_PolyTensor are higher
+ * abstractions. They describe finite elements that are built atop polynomial
+ * descriptions of the shape functions on the unit cell. Classes derived from
+ * them then only have to provide a description of the particular polynomial
+ * from which a finite element is built. For example, the FE_Q class that
+ * implements the usual Lagrange elements uses the FE_Poly base class to
+ * generate a finite element by providing it with a set of Lagrange
+ * interpolation polynomials corresponding to an equidistant subdivision of
+ * interpolation points.
+ *
+ * Finally, the FESystem class is used for vector-valued problems. There, one
+ * may want to couple a number of scalar (or also vector-valued) base elements
+ * together to form the joint finite element of a vector-valued operator. As
+ * an example, for 3d Navier-Stokes flow, one may want to use three Q1
+ * elements for the three components of the velocity, and a piecewise constant
+ * Q0 element for the pressure. The FESystem class can be used to couple these
+ * four base elements together into a single, vector-valued element with 4
+ * vector components. The step-8, step-17, and step-18 tutorial programs give
+ * an introduction into the use of this class in the context of the
+ * vector-valued elasticity (Lamé) equations. step-20 discusses a mixed
+ * Laplace discretization that also uses vector-valued elements.
+ * 
+ * @ingroup feall
+ */
+
+
+/**
+ * @defgroup feaccess Finite element access/FEValues classes
+ *
+ * The classes in this module are used when one wants to assemble matrices or
+ * vectors. They link finite elements, quadrature objects, and mappings: the
+ * finite element classes describe a finite element space on a unit cell
+ * (i.e. the unit line segment, square, or cube <tt>[0,1]^d</tt>), the
+ * quadrature classes describe where quadrature points are located and what
+ * weight they have, and the mapping classes describe how to map a point from
+ * the unit cell to a real cell and back. Since integration happens at
+ * quadrature points on the real cell, and needs to know their location as
+ * well as the values and gradients of finite element shape functions at these
+ * points. The FEValues class coordinates getting this information. For
+ * integrations on faces (for example for integration on the boundary, or
+ * interfaces between cells), the FEFaceValues class offers similar
+ * functionality as the FEValues class does for cells. Finally, the
+ * FESubfaceValues class offers the possibility to ingrate on parts of faces
+ * if the neighboring cell is refined and the present cell shares only a part
+ * of its face with the neighboring cell. If vector-valued elements are used,
+ * the FEValues and related classes allow access to all vector components; if
+ * one wants to pick individual components, there are extractor classes that
+ * make this task simpler, as described in the @ref vector_valued module.
+ *
+ * The last member of this group, the UpdateFlags enumeration, is used as an
+ * optimization: instead of letting the FEValues class compute every possible
+ * piece of data relating to a given finite element on a cell, you have to
+ * specify up front which information you are actually interested in. The
+ * UpdateFlags enumeration is used to offer symbolic names denoting what you
+ * want the FEValues class to compute.
+ * 
+ * All these classes are used in all @ref Tutorial "tutorial programs" from
+ * step-3 onward, and are described there in significant detail.
+ *
+ * The actual workings of the FEValues class and friends is
+ * complicated because it has to be general yet efficient. The page on
+ * @ref UpdateFlags attempts to give an overview of how this
+ * works.
+ *
+ * @ingroup feall
+ */
+
+
+/**
+ * @defgroup fe Finite element space descriptions
+ *
+ * The classes here describe finite element spaces, such as the simplest Q1
+ * (bi-/trilinear) spaces, and higher order Lagrangian spaces Qp, but also
+ * more specialized spaces such as Nedelec or Raviart-Thomas ones. Concrete
+ * implementations are derived from the abstract FiniteElement base class.
+ *
+ * In essence, the functions these classes have to implement provide the
+ * ability to query the value or derivatives of a shape function at a given
+ * point on the unit cell. To be useful in integrating matrix and right hand
+ * side entries, one has to have the ability to map these shape functions and
+ * gradients to the real cell. This is done using classes derived from the
+ * Mapping base class (see the @ref mapping module) in conjunction with the
+ * FEValues class (see the @ref feaccess module).
+ *
+ * <h3>Vector-valued finite elements</h3>
+ *
+ * deal.II provides two different kinds of vector valued
+ * elements. First, there is a group of genuine vector elements,
+ * usually distinguished by the fact, that each vector component
+ * consists of a different set of anisotropic polynomials. These
+ * elements are typically associated with differential
+ * forms. Currently, they are
+ *
+ * <ul>
+ * <li> FE_ABF
+ * <li> FE_BDM, FE_DGBDM
+ * <li> FE_Nedelec, FE_DGNedelec
+ * <li> FE_RaviartThomas, FE_DGRaviartThomas
+ * </ul>
+ *
+ * Additionally, deal.II offers a mechanism to create a vector element
+ * from existing scalar or vector elements. The FESystem class is
+ * responsible for this: it doesn't describe shape functions itself, but
+ * assembles a vector-valued finite element from other finite element
+ * objects. This functionality is described step-8, step-17 and other
+ * tutorial programs after that.
+ *
+ * @note Support  for the implementation of  vector-valued elements is
+ * provided  by  the  class  FE_PolyTensor. Typically,  a  new  vector
+ * element should be derived from this class.
+ *
+ * <h3>Discontinuous Galerkin</h3>
+ *
+ * For each finite element conforming to any space of weakly
+ * differentiable functions like <i>H<sup>1</sup></i> or
+ * <i>H<sup>curl</sup></i>, we can define an analogue DG space by
+ * simply assigning all degrees of freedom on vertices, edges or faces
+ * to the interior of the cell. This is to be understood in the
+ * topological sense. The interpolation operator for such a degree of
+ * freedom would still be on the boundary.  While not done so
+ * consistently, we provide quite a few of these elements, plus those,
+ * which have no conforming counterparts, like FE_DGP. Here is a list of the current DG elements:
+ * <ul>
+ * <li> scalar: FE_DGP, FE_DGQ
+ * <li> scalar, different shape functions: FE_DGPMonomial, FE_DGPNonparametric, FE_DGQArbitraryNodes
+ * <li> vector-valued:  FE_DGBDM, FE_DGNedelec, FE_DGRaviartThomas
+ * </ul> 
+ *
+ * @note The implementation of vector valued DG elements is supported
+ * by the class FE_DGVector, in the way, that only the vector
+ * polynomial space has to be provided. The actual class derived from
+ * this only has to implement a constructor and
+ * FiniteElement::get_name().
+ *
+ *  @ingroup feall
+ */
+
+
+/**
+ * @defgroup mapping Mappings between reference and real cell
+ *
+ * The classes in this module are used to map from unit coordinates to the
+ * coordinates of a cell in real cell. Most commonly, one uses the MappingQ1
+ * class that provides a Q1 (bi-/trilinear) mapping (i.e. a mapping that is
+ * isoparametric for the usual Q1 elements). However, there are other classes
+ * that implement higher-order mappings as well to provide for curvilinear
+ * elements. These are discussed in the step-11 and step-12 tutorial programs.
+ *
+ * The MappingQ1Eulerian class is an extension to the MappingQ1 class in that
+ * it accepts a vector that describes a displacement field for each position
+ * of the domain. This is used in Eulerian computations without the need to
+ * actually move vertices after each time step.
+ * 
+ * In addition, the MappingC1 class provides for a boundary of the
+ * computational domain that is not only curved, but also has a continuous
+ * derivative at the interface between two cells on the boundary.
+ * 
+ * Finally, the MappingCartesian class is an optimization for elements that
+ * are brick-shaped and with edges parallel to the coordinate axes.
+ * 
+ * @ingroup feall
+ */
diff --git a/doc/doxygen/headers/fe_vs_mapping_vs_fevalues.h b/doc/doxygen/headers/fe_vs_mapping_vs_fevalues.h
new file mode 100644
index 0000000..b31f1e8
--- /dev/null
+++ b/doc/doxygen/headers/fe_vs_mapping_vs_fevalues.h
@@ -0,0 +1,333 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup FE_vs_Mapping_vs_FEValues How Mapping, FiniteElement, and FEValues work together
+ * 
+ * <h2>Introduction</h2>
+ * 
+ * Most people create finite element (and, potentially, mapping) objects once
+ * but then never actually call any member functions on them -- they simply
+ * use them for assembly via the FEValues interface. The only other interaction
+ * most will have is by reading the FiniteElementData::dofs_per_cell variable,
+ * but that is also just set during construction time. In other words, people
+ * never observe FiniteElement or Mapping objects actually <i>do</i>
+ * anything -- and that is completely by design.
+ *
+ * This document is therefore for those who are interested in writing finite
+ * element or mapping classes and want to understand how FEValues works and
+ * interacts with the FiniteElement and Mapping classes. In the following,
+ * we will not make a distinction between FEValues (which acts on cells),
+ * FEFaceValues (which acts on faces), and FESubfaceValues (which acts on the
+ * children of a face of a cell) as they conceptually all work the same.
+ * Consequently, the term "FEValues" will be used generally for all three of
+ * these classes in the text below.
+ *
+ *
+ * <h2>Who is responsible for what?</h2>
+ *
+ * Before going into detail about data and control flow, let us define which
+ * class is responsible for providing what kind of information.
+ *
+ * <h3>%FEValues objects</h3>
+ *
+ * FEValues is an abstraction that derived from the observation that almost
+ * everything one ever does in finite element codes only requires the
+ * evaluation of finite element shape functions at quadrature points. This
+ * could be, for example, the approximation of an integral of the form
+ *   @f[
+ *     A^K_{ij} = \int_K \nabla \varphi_i(\bf x) \cdot \nabla \varphi_j(\bf x) \; dx
+ *   @f]
+ * by quadrature
+ *   @f[
+ *     A^K_{ij} = \sum_q \nabla \varphi_i(\bf x_q) \cdot \nabla \varphi_j(\bf x_q) \;
+ *     |\text{det}\; J(\bf x_q)| w_q,
+ *   @f]
+ * but it is equally valid when wanting to generate graphical output: there we
+ * only need to know the values of a finite element field at the vertices
+ * of a mesh, and this too can be written as evaluating everything at
+ * quadrature points -- these quadrature points are then simply the vertices
+ * of the cells (provided, for example, by QTrapez).
+ *
+ * FEValues's role is to provide a user the values of shape functions, their
+ * gradients, etc, at quadrature points. The same is true with some geometric
+ * information, e.g., the normal vectors at the quadrature points. To this end,
+ * it provides a large number of member functions in the FEValuesBase base
+ * class that allow a user to query basically everything one can ask for in
+ * regard to shape functions and geometry information, but only at the
+ * quadrature points for which the FEValues object was initialized.
+ *
+ * FEValues does not actually compute this information itself. It really only
+ * provides a place to store it, and then orchestrates the interaction
+ * between mapping and finite element classes to have them compute what
+ * is requested and store the result in the locations provided by
+ * FEValues.
+ *
+ * As a final note, recall that FEValues can provide an incredible array
+ * of information, but that almost all of it is not necessary in any given
+ * context. For example, to compute the integral above, it is not necessary
+ * to know the second derivatives of the shape functions, or to know the
+ * normal vectors at quadrature points. To this end, FEValues uses
+ * UpdateFlags in its interactions with the Mapping and FiniteElement
+ * class to determine what actually needs to be computed. This is discussed
+ * in slightly more detail in @ref UpdateFlags.
+ *
+ *
+ * <h3>Mappings</h3>
+ *
+ * Mappings (i.e., classes derived from the Mapping base class) are responsible
+ * for everything that has to do with the mapping from the reference (unit) cell
+ * $[0,1]^\text{dim}$ to each of the actual cells
+ * $K\subset{\mathbb R}^\text{spacedim}$. This is facilitated by a mapping function
+ * $\mathbf F_K:[0,1]^\text{dim} \mapsto K$. The mapping classes therefore
+ * implement interfaces that allow evaluating $\mathbf F_K$ to map forward
+ * points $\hat{\mathbf x}$ from the reference cell to $K$, and to map backward
+ * from the real cell to the reference cell using $\mathbf F_K^{-1}$.
+ * Other common operations that mappings provide is to map vectors (which you
+ * can think of as vectors attached to a point $\hat{\mathbf x}$ on the
+ * reference cell and pointing in certain directions) to their equivalent
+ * vectors on the real cell. This is, for example, what one needs to do
+ * for the gradients of shape functions: these are vectors defined on the
+ * reference cell, and we need to map these gradients to the real cell $K$.
+ * Similar operations can also be defined for matrices (tensors of rank 2,
+ * as opposed to vectors which are tensors of rank 1) and higher order tensors.
+ *
+ * Many of these mappings do not only need the map $\mathbf F_K$ itself,
+ * but also the gradients of this mapping, typically referred to as the
+ * Jacobian $J_K=\hat\nabla \mathbf F_K$, as well as higher derivatives.
+ *
+ * Since FEValues only ever needs to evaluate these things at quadrature
+ * points, mappings do not in general need to provide the ability to
+ * evaluate at <i>arbitrary</i> points. Rather, as we will see below, they will
+ * be initialized to use a set of quadrature points defined on the
+ * reference cell, will then be "re-initialized" for a particular cell,
+ * and all further operations will then only require
+ * the evaluation of $\mathbf F_K$ at these quadrature points on the
+ * real cell.
+ *
+ * The mapping classes then have the dual role to (i) compute geometric
+ * information (e.g., the normal vectors, determinants of the Jacobians, etc)
+ * and putting them into the data structures from which FEValues can
+ * provide them to the user, and (ii) to provide the support finite
+ * elements need to map shape functions and their derivatives from
+ * the reference cell to the real cell.
+ *
+ *
+ * <h3>Finite elements</h3>
+ *
+ * Finite element classes (i.e., classes derived from FiniteElement) are
+ * responsible for defining their shape functions, derivatives, and many
+ * other aspects on the reference cell, but also for computing the mapped
+ * values and derivatives on actual cells (obviously with the help of a
+ * mapping object). For the current discussion, only the latter role is
+ * important.
+ * 
+ * As with mappings, all that is important for us here is that the finite
+ * element classes can provide this information at given quadrature points,
+ * and that they can put the computed information into structures provided
+ * by FEValues and from which FEValues member functions can then pass
+ * it on to the user through the member functions in FEValuesBase.
+ *
+ *
+ * <h2>What to compute?</h2>
+ *
+ * Let's say a user wants to compute the gradients of shape functions,
+ * for example to compute the integral above. Then she would initialize
+ * an FEValues object by giving the update_gradients flag (as is done
+ * in basically every tutorial program, starting with step-3). What
+ * this indicates is that the user expects the FEValues object to be
+ * able to provide the gradients of shape functions on the real cell,
+ * but expressed no expectation of any other information.
+ *
+ * FEValues will then first have to find out what the mapping and
+ * finite element objects actually require of each other to make this happen.
+ * This already happens at the time the FEValues constructor is run.
+ * Because the mapping does not depend on the finite element (though the
+ * latter does depend on the former), FEValues first asks the finite
+ * element via FiniteElement::requires_update_flags() which <i>other</i>
+ * pieces of information it also requires to make the user request
+ * happen. As an example, if the finite element were of type
+ * FE_Q, then it would determine that in order to compute the
+ * gradients of the shape functions on the real cell $K$, it will
+ * need to compute the gradients of the shape functions on the
+ * reference cell (something it can do on its own, without any
+ * external help) but that these reference gradients will then have
+ * to be multiplied by the inverse of the Jacobian of the mapping,
+ * $J^{-1}_K$, at each of the quadrature points. This multiplication
+ * is typically referred to as a <i>covariant transformation</i>,
+ * and so FE_Q's implementation of FiniteElement::requires_update_flags()
+ * function (provided in the intermediate class FE_Poly) will return
+ * both the original update_gradients flag as well as
+ * update_covariant_transformation.
+ *
+ * In a second step, the FEValues object will then call the corresponding
+ * function in the mapping, Mapping::requires_update_flags() to determine
+ * what is required to provide both update_gradients and
+ * update_covariant_transformation. The former is not within the realm
+ * of the mapping, so is ignored. The latter will typically require
+ * the computation of the Jacobian matrix $J_K$ first, which a typical
+ * mapping class will indicate by adding update_contravariant_transformation
+ * to the list.
+ *
+ * 
+ * <h2>Pre-computing things</h2>
+ *
+ * At this point, the FEValues object has found out the complete
+ * set of flags indicating what everyone has to compute to satisfy
+ * the user request. The next step, still during the construction
+ * of the FEValues object, stems from the realization that
+ * many things could be pre-computed once and then re-used every time
+ * we move to a real cell. An example would be the fact that to
+ * compute the gradients of the shape functions on the real cell,
+ * we need to know the gradients of the shape functions on the
+ * reference cell (at the quadrature points on the reference cell)
+ * and that these will always be the same: every time we visit
+ * a new cell, these values will remain the same, so it would be
+ * inefficient to re-compute them every time. Similar arguments
+ * can be made for some of the information computed by some of
+ * the mapping classes.
+ *
+ * The FEValues object therefore initializes both the mapping and
+ * the finite element object it points to, using both the
+ * quadrature object and the final set of update flags computed
+ * as described in the previous section. This initialization
+ * involves pre-computing as much as these classes can already
+ * pre-compute given the set of update flags, and then storing
+ * this information for later use.
+ *
+ * The question then arises: where to store this information. In
+ * practice, we do not want to store this information in the mapping
+ * or finite element object itself, because this would mean that
+ * (i) only one FEValues object could use any given mapping or finite
+ * element object at a time, and (ii) that these objects could not
+ * be used in a multithreaded context.
+ *
+ * Rather, the approach works like this:
+ * - FEValues calls Mapping::get_data() (and FEFaceValues calls
+ *   Mapping::get_face_data(), and FESubfaceValues calls
+ *   Mapping::get_subface_data()) with the quadrature object and
+ *   the final set of update flags. The implementation of these
+ *   functions in the classes derived from Mapping will then
+ *   allocate an object of a type derived from
+ *   Mapping::InternalDataBase where they can store essentially whatever
+ *   it is they find useful for later re-use. Mapping::InternalDataBase
+ *   itself does not actually provide any member variables of significance,
+ *   but it is really left to derived classes what they think they can
+ *   usefully pre-compute and store already at this time. If a mapping
+ *   has nothing to pre-compute (or the author of the mapping class is
+ *   lazy and does not want to think about what could possibly be
+ *   pre-computed), then such a class would simply derive its
+ *   own InternalData object from Mapping::InternalDataBase without
+ *   actually adding any member variables.
+ *
+ *   The object so produced is then returned to the calling site
+ *   in FEValues and stored by the FEValues object. It will be handed
+ *   back every time later on the FEValues object wants any information
+ *   from the mapping, thereby providing the mapping object the
+ *   ability to read the data it had previously stored.
+ *
+ * - Secondly, FEValues also calls FiniteElement::get_data() (and FEFaceValues
+ *   calls Mapping::get_face_data(), and FESubfaceValues calls
+ *   Mapping::get_subface_data()), again with the quadrature object and
+ *   the final set of update flags. These functions do essentially the
+ *   same as their counterparts in the mappings, and again the object
+ *   so initialized, this time of a type derived from
+ *   FiniteElement::InternalDataBase, will always be given back to the finite
+ *   element whenever the FEValues object wants something from the finite
+ *   element object at a later time.
+ *
+ * This approach allows us to use finite element and mapping objects from
+ * multiple FEValues objects at the same time, and possibly from multiple
+ * threads at the same time. The point is simply that every user of a
+ * finite element or mapping object would hold their own, unique, object
+ * returned from the <code>get_data()</code> functions, and that everything
+ * that ever happens happens on these objects, rather than on the member
+ * variables of the mapping or finite element object itself.
+ *
+ *
+ * <h2>Computing on a given cell</h2>
+ *
+ * All of the previous steps happened at the time the FEValues object
+ * was created. Up to this point, all we did was set up data structures,
+ * but nothing useful has been computed so far from the perspective of
+ * the user. This only happens when FEValues::reinit() is called on
+ * a concrete cell $K$.
+ *
+ * The things FEValues then does are, in this order:
+ *
+ * - FEValues figures out whether the cell is a translation
+ *   or other similarly simple transformation of the previous cell for which
+ *   FEValues::reinit() was called. The result of this, stored in a
+ *   CellSimilarly::Similarity object will then be passed to mapping and
+ *   finite element to potentially simplify some computations. For example,
+ *   if the current cell is simply a translation of the previous one, then
+ *   there is no need to re-compute the Jacobian matrix $J_K$ of the
+ *   mapping (or its inverse) because it will be the same as for the
+ *   previous cell.
+ *
+ * - Next, FEValues::reinit() calls
+ *   Mapping::fill_fe_values() (and, obviously,
+ *   FEFaceValues calls Mapping::fill_fe_face_values() and
+ *   FESubfaceValues calls Mapping::fill_fe_subface_values()). The arguments
+ *   to this function include the cell (or face, or subface) which we are
+ *   asked to visit, as well as the cell similarity argument from
+ *   above, a reference to the object we had previously obtained from
+ *   Mapping::get_data(), and a reference to an object of type
+ *   internal::FEValues::MappingRelatedData into which the mapping is
+ *   supposed to write its results. In particular, it will need to
+ *   compute all mapping related information previously specified by
+ *   the update flags, and then write them into the output object.
+ *   Examples of fields in the output object that the mapping needs
+ *   to fill are the computation of JxW values, the computation of
+ *   Jacobian matrices and their inverses, and the normal vectors to
+ *   cells (if dim is less than spacedim) and faces.
+ *
+ * - Finally, FEValues::reinit() calls
+ *   FiniteElement::fill_fe_values() (and, obviously,
+ *   FEFaceValues calls FiniteElement::fill_fe_face_values() and
+ *   FESubfaceValues calls FiniteElement::fill_fe_subface_values()). The arguments
+ *   to this function include the cell (or face, or subface) which we are
+ *   asked to visit, as well as the cell similarity argument from
+ *   above, a reference to the object we had previously obtained from
+ *   FiniteElement::get_data(), and a reference to an object of type
+ *   internal::FEValues::MappingRelatedData into which the mapping is
+ *   supposed to write its results.
+ *
+ *   In addition to these, the FiniteElement::fill_fe_values() function
+ *   also receives references to the mapping object in use, as well as the
+ *   Mapping::InternalDataBase object we had previously received from
+ *   Mapping::get_data(). The reason is that typically, the finite
+ *   element wants to map values or gradients of shape functions from the reference
+ *   cell to the actual cell, and these mappings are facilitated by the
+ *   various Mapping::transform() functions -- which all require a reference
+ *   to the internal object that the FEValues object had previously acquired
+ *   from the mapping. This is probably best understood by looking at actual code,
+ *   and a simple yet instructive example can be found in
+ *   FE_Poly::fill_fe_values(), a function that works on general scalar,
+ *   polynomial finite element bases.
+ *
+ *   As with the mapping, the FiniteElement::fill_fe_values() functions then
+ *   use whatever information they had previously computed upon construction
+ *   of the FEValues object (i.e., when it called FiniteElement::get_data()),
+ *   and use this and the functions in the mapping to compute whatever was
+ *   requested as specified by the update flags.
+ *
+ * This all done, we are finally in a position to offer the owner of the
+ * FEValues access to the fields originally requested via the update
+ * flags.
+ *
+ * @ingroup feall
+ */
diff --git a/doc/doxygen/headers/functions.h b/doc/doxygen/headers/functions.h
new file mode 100644
index 0000000..1be1efb
--- /dev/null
+++ b/doc/doxygen/headers/functions.h
@@ -0,0 +1,71 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup functions Functions
+ *
+ * Functions are used in various places in deal.II, for example to
+ * describe boundary conditions, coefficients in equations, forcing
+ * terms, or exact solutions. Since closed form expressions for
+ * equations are often hard to pass along as function arguments,
+ * deal.II uses the Function base class to describe these
+ * objects. Essentially, the interface of this base class requires
+ * derived classes to implement the ability to return the value of a
+ * function at one or a list of particular locations, and possibly (if
+ * needed) of gradients or second derivatives of the function. With
+ * this, function objects can then be used by algorithms like
+ * VectorTools::interpolate, VectorTools::project_boundary_values, and
+ * other functions.
+ *
+ * Some functions are needed again and again, and are therefore
+ * already provided in deal.II. This includes a function with a
+ * constant value; a function that is zero everywhere, or a
+ * vector-valued function for which only one vector component has a
+ * particular value and all other components are zero. Some more
+ * specialized functions are also defined in the Functions namespace.
+ *
+ *
+ * <h3>Time dependent functions</h3>
+ * 
+ * For time dependent computations, boundary conditions and/or right
+ * hand side functions may also change with time. Since at a given
+ * time step one is usually only interested in the spatial dependence
+ * of a function, it would be awkward if one had to pass a value for
+ * the time variable to all methods that use function objects. For
+ * example, the VectorTools::interpolate_boundary_values function
+ * would have to take a time argument which it can use when it wants
+ * to query the value of the boundary function at a given time
+ * step. However, it would also have to do so if we are considering a
+ * stationary problem, for which there is nothing like a time
+ * variable.
+ *
+ * To circumvent this problem, function objects are always considered
+ * spatial functions only. However, the Function class is derived from
+ * the FunctionTime base class that stores a value for a time
+ * variable, if so necessary. This way, one can define a function
+ * object that acts as a spatial function but can do so internally by
+ * referencing a particular time. In above example, one would set the
+ * time of the function object to the present time step before handing
+ * it off to the VectorTools::interpolate_boundary_values method.
+ *
+ * 
+ * <h3>Tensor-valued functions</h3>
+ *
+ * The Function class is the most frequently used, but sometimes one needs a
+ * function the values of which are tensors, rather than scalars. The
+ * TensorFunction template can do this for you. Apart from the return type,
+ * the interface is most the same as that of the Function class.
+ */
diff --git a/doc/doxygen/headers/geodynamics.h b/doc/doxygen/headers/geodynamics.h
new file mode 100644
index 0000000..9079094
--- /dev/null
+++ b/doc/doxygen/headers/geodynamics.h
@@ -0,0 +1,145 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ at defgroup geodynamics The geodynamics demonstration suite
+
+deal.II's @ref Tutorial "tutorial" contains a set of programs that together
+form the geodynamics demonstration suite. The idea of these programs is to
+demonstrate techniques for advanced finite element software using
+applications from geodynamics, i.e. the investigation of processes in the
+solid earth. By doing so, these programs are supposed to provide a basis
+for more specialized, dedicated programs that can solve actual geodynamics
+problems, for example as part of the work of graduate students or
+postdocs. A more thorough discussion of the motivation for these programs
+follows below.
+
+Currently, the geodynamics testsuite contains the following
+programs:
+
+- step-8: Elasticity
+- step-16: A %parallel elasticity solver
+- step-20: Porous media flow
+- step-21: Multiphase flow through porous media
+- step-22: Stokes flow
+- step-31: Thermal convection (Boussinesq flow)
+- step-32: A %parallel Boussinesq solver for mantle convection
+
+Some of these programs were developed under contract from the California
+Institute of Technology with support by the National Science Foundation
+under Award No. EAR-0426271, the grant that funded the <a target="_top"
+href="http://www.geodynamics.org">Computational Infrastructure in
+Geodynamics</a> initiative. The recipient, Wolfgang Bangerth, gratefully
+acknowledges this source of support.
+
+
+<h3>Rationale</h3>
+
+Adaptive mesh refinement (AMR) has long been identified as a key technology
+that would aid in the accurate and efficient numerical solution of a number of
+geodynamics applications. It has been discussed in the geodynamics community
+for several years and has been a continuous topic on the task list of CIG
+since its inception. Yet, relatively little has happened in this direction so
+far. Only recently have there been attempts to use AMR in geodynamics: CIG
+sponsored a workshop on AMR technique in Boulder in October 2007, and a
+collaboration between George Biros, Omar Ghattas, Mike Gurnis, and Shijie
+Zhong's groups is currently developing a %parallel adaptive mantle convection
+solver.
+
+One of the reasons for the slow adoption of AMR techniques in geodynamics is
+the relatively steep initial hurdle: codes have to provide the data structures
+and algorithms to deal with adaptive meshes, finite elements have to be able
+to deal with hanging nodes, etc. To do so efficiently and in sufficient
+generality adds several 10,000 lines of code to finite element programs, too
+much for the average student to do within the time frame of a dissertation. On
+the other hand, there are libraries that provide the infrastructure code on
+which applications supporting AMR can rapidly be built. deal.II
+of course provides exactly this infrastructure.
+
+The goal of the geodynamics testsuite is to write programs for a variety of
+topics relevant to geodynamics. Continuing in the style of the existing tutorial
+programs -- an extensive introduction explaining the background and
+formulation of an application as well as the concepts of the numerical scheme
+used in its solution; detailed comments throughout the code explaining
+implementation details; and a section showing numerical results -- we intend to
+provide the resulting programs as well-documented applications solving model
+problems. In particular, they are aimed at the following goals:
+<ul>
+<li> <i>Starting points:</i> The existing tutorial of deal.II has proven to
+  be an excellent starting point for graduate students and researchers to
+  jump-start developing their own applications. By providing programs that are
+  already close to the targeted application, first results can often be
+  obtained very quickly, both maintaining the initial enthusiasm during
+  development as well as allowing to spend research time on implementing
+  application specific behavior rather than using months of work on basic
+  infrastructure code supporting AMR.
+
+  Supporting this point is the fact that although there are currently at least
+  170 publications presenting results obtained with deal.II, we are aware of
+  only a handful of applications that have been built with deal.II from
+  scratch; all others have started as modifications of one of the tutorial
+  programs.
+
+<li> <i>Training:</i> The tutorial programs we propose to write will
+  provide students and researchers with a reference implementation of current
+  numerical technology such as AMR, higher order elements, sophisticated
+  linear and nonlinear solvers, stabilization techniques, etc. Providing these
+  as starting points for further development by others will also serve the
+  goal of training a new generation of geodynamicists in modern numerical
+  algorithms.
+
+<li> <i>Extending equations and formulations:</i> In deal.II, it is fairly
+  simple to extend a set of equations by another equation, for example an
+  additional advected quantity that enters the existing equations as a right
+  hand side or in one of the coefficients. Since applications typically use
+  blocked matrices rather than the one-big-matrix-for-everything approach, it
+  is also not complicated to find suitable linear solvers for augmented
+  equations. Consequently, deal.II is a good tool for trying out more complex
+  formulations of problems, or more complete models and their effects on the
+  accuracy of solutions.
+
+<li> <i>Rapid prototyping and benchmarking:</i> deal.II provides many
+  interchangeable components that allow rapid prototyping of finite element
+  kinds and orders, stabilization techniques, or linear solvers. For example,
+  typically only a few lines of code have to be changed to replace low-order
+  by high-order elements. Through this, it becomes relatively simple to try
+  out higher order elements, a different block elimination solver, or a
+  different stabilization technique. In turn, this may help in benchmarking
+  applications both regarding computing times to solve as well as concerning
+  the accuracy of numerical solutions.
+
+  The applications in this module will already have been benchmarked for
+  correctness. Existing tutorial programs typically employ simpler rather than
+  more complicated solver schemes for exposition but frequently suggest more
+  complicated schemes including hints on how they might be implemented in an
+  appendix. 
+
+<li> <i>Try algorithms:</i> The rapid prototyping abilities of deal.II may
+  also help in determining best algorithms on the scale of programs to which
+  deal.II is applicable, and then to implement this particular algorithm
+  (without the ability to change it easily) in a dedicated program that can
+  run on larger scale machines. For example, a small mantle convection code
+  built on deal.II may be used to determine whether second order elements are
+  useful for this purpose (see, for example, the results shown in
+  step-31). If so, then one may use this result to implement
+  second, rather than first, order elements in dedicated, large-scale mantle
+  convection codes such as that which
+  Ghattas and Zhong are building and that may run on 10,000s of processors, a
+  range currently unattainable by deal.II.
+</ul>
+
+
+*/
diff --git a/doc/doxygen/headers/geometry_and_primitives.h b/doc/doxygen/headers/geometry_and_primitives.h
new file mode 100644
index 0000000..e6644bf
--- /dev/null
+++ b/doc/doxygen/headers/geometry_and_primitives.h
@@ -0,0 +1,52 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup geomprimitives Geometric and other primitives
+ *
+ * This group contains a number of classes that act as geometric
+ * primitives or primitives for other mathematical objects. For
+ * example, the Tensor @<rank,dim@> class provides tensors of rank
+ * <code>rank</code> in <code>dim</code> space dimensions. Likewise,
+ * the SymmetricTensor offers symmetric tensors.
+ *
+ * Geometrically, the Point class is the foundation of all geometric
+ * descriptions in the deal.II library. It denotes a geometric point
+ * in <code>dim</code> dimensional space. One can view a point as a
+ * vector the with <code>dim</code> coordinates that connects the
+ * origin with that particular point; as such, the Point class is
+ * derived from tensors of rank 1 (i.e. vectors), but in contrast to
+ * arbitrary tensors points have the special connotation of points in
+ * space, and therefore have some additional properties.
+ *
+ * In deal.II, meshes are built from line segments, quadrilaterals, or
+ * hexahedra (depending on the space dimension). The GeometryInfo
+ * class is used to describe properties of these basic objects in unit
+ * space (i.e. for the unit line, unit square, and unit cube). It
+ * offers static data members denoting the number of vertices per
+ * cell, lines per face, or where which vertex is located. This
+ * abstraction allows to write applications mostly independently of
+ * the actual space dimension: loops over all vertices would simply
+ * run from zero to GeometryInfo<dim>::vertices_per_cell instead of
+ * from 0 to 4 (in 2d) or 0 to 8 (in 3d). In this way, the program
+ * will be correct in 2d as well as 3d, and one can run a program in a
+ * different space dimension simply by recompilation instead of having
+ * to change a significant portion of the code. These
+ * dimension-independent programming techniques are extensively
+ * discussed in the first few tutorial programs and are used
+ * throughout deal.II.
+ */
+
diff --git a/doc/doxygen/headers/global_dof_index.h b/doc/doxygen/headers/global_dof_index.h
new file mode 100644
index 0000000..66a4876
--- /dev/null
+++ b/doc/doxygen/headers/global_dof_index.h
@@ -0,0 +1,88 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @page GlobalDoFIndex When to use types::global_dof_index instead of unsigned int
+ *
+ * deal.II can be configured to use 64-bit indices for degrees of freedom,
+ * rather than the usual unsigned integers that default to 32-bit on most
+ * current systems. This is necessary since we want to be able to solve
+ * problems with more than 4 billion unknowns (the limit of what can be
+ * represented with 32-bit unsigned integers). At the same time, we do not
+ * want to indiscriminately replace all integers in deal.II with 64-bit
+ * versions, since this would increase memory use in many places where we
+ * represent quantities that will most definitely not be larger than 4 billion.
+ *
+ * The data type we define for these indices to keep the bulk
+ * of the code base free of <code>\#ifdef</code>s is types::global_dof_index.
+ * If deal.II is configured as normal, this type is <code>unsigned int</code>,
+ * but can be switched to <code>unsigned long long int</code> if the right
+ * flag is provided (see the ReadMe file). This page is intended to clarify
+ * when types::global_dof_index must be used and when one can use a regular
+ * unsigned integer:
+ *
+ * <dl>
+ *
+ * <dt class="glossary">@anchor GlobalDoFIndexBlockIndices
+ * <b>BlockIndices</b></dt>
+ * <dd>
+ * The number of blocks is an unsigned int because the number is expected to
+ * be low, i.e less than four billions. However, the size of the block is a
+ * types::global_dof_index because each block can be arbitrary large.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor GlobalDoFIndexCell <b>Cell</b></dt>
+ * <dd>
+ * The ID of cell is not unique: Cells with different levels of refinement
+ * and/or on different processors can have the same ID. Thus, all the data
+ * associated to cells can be unsigned int because on a single processor,
+ * one one mesh level, there will definitely not be more than 4 billion
+ * cells.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor GlobalDoFIndexDoFHandler
+ * <b>DoFHandler</b></dt>
+ * <dd>
+ * The ID of each degree of freedom is unique in a parallel computation.
+ * Therefore, degrees of freedom
+ * are types::global_dof_index.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor GlobalDoFIndexFullMatrix
+ * <b>FullMatrix</b></dt>
+ * <dd>
+ * The numbers of row and column are types::global_dof_index even if it is not
+ * expected that someone will create a FullMatrix with so many entries.
+ * However, some functions of ConstraintMatrix are templated on the matrix
+ * type and thus, the
+ * size of a FullMatrix has to be of the same type than the size of
+ * SparseMatrix.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor GlobalDoFIndexSparseMatrix
+ * <b>SparseMatrix</b></dt>
+ * <dd>
+ * The size of SparseMatrix can be arbitrary large and it is conceivable that
+ * with sufficient memory on a single node, one may generate a matrix with
+ * more than 4 billion rows or columns. Therefore, types::global_dof_index is
+ * used. However, even for a large complex problem we can solve now, it is not
+ * reasonable to expect the number of non-zero entries in a sparse matrix to
+ * go over four billion. Thus, we still use unsigned int for, e.g.,
+ * SparsityPattern::row_lengths and similar functions.
+ * </dd>
+ *
+ * </dl>
+ */
diff --git a/doc/doxygen/headers/glossary.h b/doc/doxygen/headers/glossary.h
new file mode 100644
index 0000000..59b441a
--- /dev/null
+++ b/doc/doxygen/headers/glossary.h
@@ -0,0 +1,1708 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @page DEALGlossary Glossary
+ *
+ * This glossary explains a few terms that are frequently used in the
+ * documentation of classes of deal.II. The glossary often only gives
+ * a microscopic view of a particular concept; if you struggle with
+ * the bigger picture, it may therefore also be worth to consult the
+ * global overview of classes on the @ref index page.
+ *
+ * <dl>
+ *
+ * <dt class="glossary">@anchor GlossActive <b>Active cells</b></dt>
+ * <dd>A cell, face or edge is defined as <i>active</i> if it is not
+ * refined any further, i.e., if it does not have children. Unless
+ * working with a multigrid algorithm, active cells are the only
+ * ones carrying degrees of freedom.</dd>
+ *
+ *
+ *
+ * <dt class="glossary">@anchor GlossArtificialCell <b>Artificial cells</b></dt>
+ * <dd>
+ * If a mesh is distributed across multiple MPI processes using the
+ * parallel::distributed::Triangulation class, each processor stores
+ * only the cells it owns, one layer of adjacent cells that are owned
+ * by other processors (called @ref GlossGhostCell "ghost cells"), all coarse level
+ * cells, and all cells that are necessary to maintain the invariant
+ * that adjacent cells must differ by at most one refinement
+ * level. The cells stored on each process that are not owned by this
+ * process and that are not ghost cells are called "artificial cells",
+ * and for these cells the predicate
+ * <code>cell-@>is_artificial()</code> returns true. Artificial cells
+ * are guaranteed to exist in the globally distributed mesh but they
+ * may be further refined on other processors. See the
+ * @ref distributed_paper "Distributed Computing paper" for more
+ * information.
+ *
+ * The concept of artificial cells has no meaning for triangulations
+ * that store the entire mesh on each processor, i.e. the
+ * dealii::Triangulation class.  </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossBlockLA <b>Block (linear algebra)</b></dt>
+
+ * <dd>It is often convenient to treat a matrix or vector as a collection of
+ * individual blocks. For example, in step-20 (and other tutorial
+ * programs), we want to consider the global linear system $Ax=b$ in
+ * the form
+ * @f{eqnarray*}
+  \left(\begin{array}{cc}
+    M & B^T \\ B & 0
+  \end{array}\right)
+  \left(\begin{array}{cc}
+    U \\ P
+  \end{array}\right)
+  =
+  \left(\begin{array}{cc}
+    F \\ G
+  \end{array}\right),
+ * @f}
+ * where $U,P$ are the values of velocity and pressure degrees of freedom,
+ * respectively, $M$ is the mass matrix on the velocity space, $B$ corresponds to
+ * the negative divergence operator, and $B^T$ is its transpose and corresponds
+ * to the negative gradient.
+ *
+ * Using such a decomposition into blocks, one can then define
+ * preconditioners that are based on the individual operators that are
+ * present in a system of equations (for example the Schur complement,
+ * in the case of step-20), rather than the entire matrix. In essence,
+ * blocks are used to reflect the structure of a PDE system in linear
+ * algebra, in particular allowing for modular solvers for problems
+ * with multiple solution components. On the other hand, the matrix
+ * and right hand side vector can also treated as a unit, which is
+ * convenient for example during assembly of the linear system when
+ * one may not want to make a distinction between the individual
+ * components, or for an outer Krylov space solver that doesn't care
+ * about the block structure (e.g. if only the preconditioner needs
+ * the block structure).
+ *
+ * Splitting matrices and vectors into blocks is supported by the
+ * BlockSparseMatrix, BlockVector, and related classes. See the
+ * overview of the various linear algebra classes in the @ref LAC
+ * module. The objects present two interfaces: one that makes the
+ * object look like a matrix or vector with global indexing
+ * operations, and one that makes the object look like a collection of
+ * sub-blocks that can be individually addressed. Depending on
+ * context, one may wish to use one or the other interface.
+ *
+ * Typically, one defines the sub-structure of a matrix or vector by
+ * grouping the degrees of freedom that make up groups of physical
+ * quantities (for example all velocities) into individual blocks of
+ * the linear system. This is defined in more detail below in the
+ * glossary entry on @ref GlossBlock "Block (finite element)".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossBlock <b>Block (finite element)</b></dt>
+ * <dd>
+ * <i>Intent:</i>
+ * Blocks are a generalization of @ref GlossComponent "components" in that
+ * they group together one or more components of a vector-valued finite
+ * element that one would like to consider jointly. One often wants to do this
+ * to define operators that correspond to the structure of a (part of a)
+ * differential operator acting on the vector-valued solution, such as the
+ * Schur complement solver in step-20, or the block solvers and
+ * preconditioners of step-22.
+ *
+ * For the purpose of a discretization, blocks are the better concept to use
+ * since it is not always possible to address individual components of a
+ * solution. This is, in particular, the case for non- at ref
+ * GlossPrimitive "primitive"
+ * elements. Take for instance the solution of the mixed Laplacian
+ * system with the FE_RaviartThomas element (see step-20). There, the first
+ * <tt>dim</tt> components are the directional velocities. Since the shape
+ * functions are linear combinations of those, these <tt>dim</tt> components
+ * constitute only a single block. On the other hand, the pressure variable is
+ * scalar and would form a the second block, but in the <tt>dim+1</tt>st
+ * component.
+ *
+ * The minimal size of each block is dictated by the underlying finite element
+ * (a block consists of a single component for scalar elements, but in the
+ * case of the FE_RaviartThomas, for example, a block consists of <tt>dim</tt>
+ * components). However, several such minimal blocks can be grouped together
+ * into user defined blocks at will, and in accordance with the
+ * application. For instance, for the
+ * <b>Q</b><sub>2</sub><sup><i>d</i></sup>-<b>Q</b><sub>1</sub> (Taylor-Hood) Stokes
+ * element, there are <i>d</i>+1 components each of which could in principle form
+ * its own block. But we are typically more interested in having only two
+ * blocks, one of which consists of all the velocity vector components
+ * (i.e. this block would have <i>d</i> components) and the other having only the
+ * single pressure component.
+ *
+ * <i>Implementation:</i>
+ * deal.II has a number of different finite element classes, all of which are
+ * derived from the FiniteElement base class
+ * (see the @ref feall "module on finite element classes").
+ * With one exception, whether they are scalar or
+ * vector valued, they all define a single block: all vector components the
+ * finite element defines through its FiniteElement::n_components() function
+ * form a single block, i.e. FiniteElement::n_blocks() returns one.
+ *
+ * The exception is the FESystem class that takes multiple simpler elements
+ * and connects them into more complicated ones. Consequently, it can have
+ * more than one block. A FESystem has as many blocks as it has base elements
+ * times their multiplicity (see the constructors of FESystem to understand
+ * this statement). In other words, it does not care how many blocks each base
+ * element has, and consequently you can produce a Stokes element that has
+ * only two blocks by creating the object
+ * @code
+ *    FESystem<dim> (FESystem<dim> (FE_Q<dim>(2), dim), 1,
+ *                   FE_Q<dim>(1), 1);
+ * @endcode
+ * On the other hand, we could have produced a similar object with dim+1
+ * blocks using
+ * @code
+ *    FESystem<dim> (FE_Q<dim>(2), dim,
+ *                   FE_Q<dim>(1), 1);
+ * @endcode
+ * With the exception of the number of blocks, the two objects are the
+ * same for all practical purposes, however.
+ *
+ * <i>Global degrees of freedom:</i>
+ * While we have defined blocks above in terms of the vector components of a
+ * vector-valued solution function (or, equivalently, in terms of the
+ * vector-valued finite element space), every shape function of a finite
+ * element is part of one block or another. Consequently, we can partition all
+ * degrees of freedom defined on a DoFHandler into individual blocks. Since by
+ * default the DoFHandler class enumerates degrees of freedom in a more or
+ * less random way, you will first want to call the
+ * DoFRenumbering::component_wise function to make sure that all degrees of
+ * freedom that correspond to a single block are enumerated consecutively.
+ *
+ * If you do this, you naturally partition matrices and vectors into blocks as
+ * well (see @ref GlossBlockLA "block (linear algebra)).  In most cases, when
+ * you subdivide a matrix or vector into blocks, you do so by creating one
+ * block for each block defined by the finite element (i.e. in most practical
+ * cases the FESystem object). However, this needs not be so: the
+ * DoFRenumbering::component_wise function allows to group several vector
+ * components or finite element blocks into the same logical block (see, for
+ * example, the @ref step_22 "step-22" or step-31 tutorial programs, as
+ * opposed to step-20). As a consequence, using this feature, we can achieve
+ * the same result, i.e. subdividing matrices into $2\times 2$ blocks and
+ * vectors into 2 blocks, for the second way of creating a Stokes element
+ * outlined above using an extra argument as we would have using the first way
+ * of creating the Stokes element with two blocks right away.
+ *
+ * More information on this topic can be found in the documentation of
+ * FESystem, the @ref vector_valued module and the tutorial programs
+ * referenced therein.
+ *
+ * <i>Selecting blocks:</i>
+ * Many functions allow you to restrict their operation to certain
+ * vector components or blocks. For example, this is the case for
+ * the functions that interpolate boundary values: one may want
+ * to only interpolate the boundary values for the velocity block of
+ * a finite element field but not the pressure block. The way to do
+ * this is by passing a BlockMask argument to such functions, see the
+ * @ref GlossBlockMask "block mask entry of this glossary".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossBlockMask <b>Block mask</b></dt>
+ *
+ * <dd>
+ * In much the same way as one can think of elements as being composed
+ * of physical vector components (see @ref GlossComponent) or logical
+ * blocks (see @ref GlossBlock), there is frequently a need to select
+ * a set of such blocks for operations that are not intended to be run
+ * on <i>all</i> blocks of a finite element space. Selecting which blocks
+ * to work on happens using the BlockMask class.
+ *
+ * Block masks work in much the same way as component masks, including the
+ * fact that the BlockMask class has similar semantics to the ComponentMask
+ * class. See @ref GlossComponentMask "the glossary entry on component masks"
+ * for more information.
+ *
+ * @note While components and blocks provide two alternate but equally valid
+ * viewpoints on finite elements with multiple vector components, the fact
+ * is that throughout the library there are far more places where you can
+ * pass a ComponentMask argument rather than a BlockMask argument. Fortunately,
+ * one can be converted into the other, using the syntax
+ * <code>fe.component_mask(block_mask)</code> where <code>block_mask</code>
+ * is a variable of type BlockMask. In other words, if you have a block
+ * mask but need to call a function that only accepts a component mask, this
+ * syntax can be used to obtain the necessary component mask.
+ *
+ * <b>Creation of block masks:</b>
+ * Block masks are typically created by asking the finite element
+ * to generate a block mask from certain selected vector components using
+ * code such as this where we create a mask that only denotes the
+ * velocity components of a Stokes element (see @ref vector_valued):
+ * @code
+ *   FESystem<dim> stokes_fe (FESystem<dim>(FE_Q<dim>(2), dim), 1,    // Q2 element for the velocities
+ *                            FE_Q<dim>(1),                     1);     // Q1 element for the pressure
+ *   FEValuesExtractors::Scalar pressure(dim);
+ *   BlockMask pressure_mask = stokes_fe.block_mask (pressure);
+ * @endcode
+ * The result is a block mask that, in 1d as well as 2d and 3d, would have values
+ * <code>[false, true]</code>. Similarly, using
+ * @code
+ *   FEValuesExtractors::Vector velocities(0);
+ *   BlockMask velocity_mask = stokes_fe.block_mask (velocities);
+ * @endcode
+ * would result in a mask <code>[true, false]</code> in any dimension.
+ *
+ * Note, however, that if we had defined the finite element in the following
+ * way:
+ * @code
+ *   FESystem<dim> stokes_fe (FE_Q<dim>(2), dim,    // Q2 element for the velocities
+ *                            FE_Q<dim>(1), 1);     // Q1 element for the pressure
+ * @endcode
+ * then the code
+ * @code
+ *   FEValuesExtractors::Scalar pressure(dim);
+ *   BlockMask pressure_mask = stokes_fe.block_mask (pressure);
+ * @endcode
+ * would yield a block mask that in 2d has elements
+ * <code>[false, false, true]</code> because the element has
+ * <code>dim+1</code> components and equally many blocks. See the
+ * discussion on what a block represents exactly in the
+ * @ref GlossBlock "block entry of this glossary".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossBoundaryForm <b>%Boundary form</b></dt>
+ *
+ * <dd>For a dim-dimensional triangulation in dim-dimensional space,
+ * the boundary form is a vector defined on faces. It is the vector
+ * product of the image of coordinate vectors on the surface of the
+ * unit cell. It is a vector normal to the surface, pointing outwards
+ * and having the length of the surface element.
+ *
+ * A more general definition would be that (at least up to the length
+ * of this vector) it is exactly that vector that is necessary when
+ * considering integration by parts, i.e. equalities of the form
+ * $\int_\Omega \text{div} \vec \phi = -\int_{\partial\Omega} \vec n
+ * \cdot \vec \phi$. Using this definition then also explains what
+ * this vector should be in the case of domains (and corresponding
+ * triangulations) of dimension <code>dim</code> that are embedded in
+ * a space <code>spacedim</code>: in that case, the boundary form is
+ * still a vector defined on the faces of the triangulation; it is
+ * orthogonal to all tangent directions of the boundary and within the
+ * tangent plane of the domain. Note that this is compatible with case
+ * <code>dim==spacedim</code> since there the tangent plane is the
+ * entire space ${\mathbb R}^\text{dim}$.
+ *
+ * In either case, the length of the vector equals the determinant of
+ * the transformation of reference face to the face of the current
+ * cell.  </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossBoundaryIndicator <b>%Boundary indicator</b></dt>
+ *
+ * <dd> In a Triangulation object, every part of the boundary may be
+ * associated with a unique number (of type types::boundary_id) that
+ * is used to determine what kinds of boundary conditions are to be
+ * applied to a particular part of a boundary. The boundary is
+ * composed of the faces of the cells and, in 3d, the edges of these
+ * faces.
+ *
+ * By default, all boundary indicators of a mesh are zero, unless you are
+ * reading from a mesh file that specifically sets them to something different,
+ * or unless you use one of the mesh generation functions in namespace GridGenerator
+ * that have a 'colorize' option. A typical piece of code that sets the boundary
+ * indicator on part of the boundary to something else would look like
+ * this, here setting the boundary indicator to 42 for all faces located at
+ * $x=-1$:
+ * @code
+ *   for (typename Triangulation<dim>::active_cell_iterator
+ *          cell = triangulation.begin_active();
+ *        cell != triangulation.end();
+ *        ++cell)
+ *     for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+ *       if (cell->face(f)->at_boundary())
+ *         if (cell->face(f)->center()[0] == -1)
+ *           cell->face(f)->set_boundary_id (42);
+ * @endcode
+ * This calls functions TriaAccessor::set_boundary_id. In 3d, it may
+ * also be appropriate to call TriaAccessor::set_all_boundary_ids instead
+ * on each of the selected faces. To query the boundary indicator of a particular
+ * face or edge, use TriaAccessor::boundary_id.
+ *
+ * In older versions of the library (prior to 8.2), if you wanted also
+ * to change the way the Triangulation class treated the boundary for
+ * the purposes of mesh refinement, you could call
+ * Triangulation::set_boundary to associate a boundary object with a
+ * particular boundary indicator. This method is still supported, and
+ * it allows the Triangulation object to use a different method of
+ * finding new points on faces and edges to be refined; the default is
+ * to use a StraightBoundary object for all faces and edges. The
+ * results section of step-49 has a worked example that shows all of
+ * this in action.
+ *
+ * The suggested method from version 8.2 onwards, is to split the
+ * geometrical description of the boundary from its physical meaning,
+ * by using separately manifold_ids and boundary_ids. The former are
+ * used to describe how the geometry changes, and the latter are used
+ * to identify the boundary conditions.
+ *
+ * Many of the functions in namespaces DoFTools and VectorTools take
+ * arguments that specify which part of the boundary to work on, and
+ * they specifically refer to boundary_ids. Examples are
+ * DoFTools::make_periodicity_constraints,
+ * DoFTools::extract_boundary_dofs,
+ * DoFTools::make_zero_boundary_constraints and
+ * VectorTools::interpolate_boundary_values,
+ * VectorTools::compute_no_normal_flux_constraints.
+ *
+ * @note Boundary indicators are inherited from mother faces and edges to
+ * their children upon mesh refinement. Some more information about boundary
+ * indicators is also presented in a section of the documentation of the
+ * Triangulation class.
+ *
+ * @note For parallel triangulations of type parallel::distributed::Triangulation,
+ * it is not enough to set boundary indicators only once at the beginning. See
+ * the long discussion on this topic in the class documentation of
+ * parallel::distributed::Triangulation .
+ * </dd>
+ *
+ * @see @ref boundary "The module on boundaries"
+ *
+ *
+ * <dt class="glossary">@anchor GlossComponent <b>Component</b></dt>
+ *
+ * <dd> When considering systems of equations in which the solution is not
+ * just a single scalar function, we say that we have a <i>vector system</i>
+ * with a <i>vector-valued solution</i>. For example, the vector solution in
+ * the elasticity equation considered in step-8 is $u=(u_x,u_y,u_z)^T$
+ * consisting of the displacements in each of the three coordinate
+ * directions. The solution then has three elements. Similarly, the 3d Stokes
+ * equation considered in step-22 has four elements: $u=(v_x,v_y,v_z,p)^T$. We
+ * call the elements of the vector-valued solution <i>components</i> in
+ * deal.II. To be well-posed, for the solution to have $n$ components, there
+ * need to be $n$ partial differential equations to describe them. This
+ * concept is discussed in great detail in the @ref vector_valued module.
+ *
+ * In finite element programs, one frequently wants to address individual
+ * elements (components) of this vector-valued solution, or sets of
+ * components. For example, we do this extensively in step-8, and a lot
+ * of documentation is also provided in the module on
+ * @ref vector_valued "Handling vector valued problems". If you are thinking
+ * only in terms of the partial differential equation (not in terms of
+ * its discretization), then the concept of <i>components</i> is the natural
+ * one.
+ *
+ * On the other hand, when talking about finite elements and degrees of
+ * freedom, <i>components</i> are not always the correct concept because
+ * components are not always individually addressable. In particular, this is
+ * the case for @ref GlossPrimitive "non-primitive finite elements". Similarly,
+ * one may not always <i>want</i> to address individual components but rather
+ * sets of components — e.g. all velocity components together, and
+ * separate from the pressure in the Stokes system, without further splitting
+ * the velocities into their individual components. In either case, the
+ * correct concept to think in is that of a @ref GlossBlock "block".  Since
+ * each component, if individually addressable, is also a block, thinking in
+ * terms of blocks is most frequently the better strategy.
+ *
+ * For a given finite element, the number of components can be queried using
+ * the FiniteElementData::n_components() function, and you can find out
+ * which vector components are nonzero for a given finite element shape
+ * function using FiniteElement::get_nonzero_components(). The values and
+ * gradients of individual components of a
+ * shape function (if the element is primitive) can be queried using the
+ * FiniteElement::shape_value_component() and
+ * FiniteElement::shape_grad_component() functions on the reference cell. The
+ * FEValues::shape_value_component() and FEValues::shape_grad_component()
+ * functions do the same on a real cell. See also the documentation of the
+ * FiniteElement and FEValues classes.
+  *
+ * <i>Selecting components:</i>
+ * Many functions allow you to restrict their operation to certain
+ * vector components or blocks. For example, this is the case for
+ * the functions that interpolate boundary values: one may want
+ * to only interpolate the boundary values for the velocity components of
+ * a finite element field but not the pressure component. The way to do
+ * this is by passing a ComponentMask argument to such functions, see the
+ * @ref GlossComponentMask "component mask entry of this glossary".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossComponentMask <b>Component mask</b></dt>
+ *
+ * <dd>
+ * When using vector-valued elements (see @ref vector_valued) to solve systems
+ * of equations, one frequently wants to restrict some operations to only certain
+ * solution variables. For example, when solving the Stokes equations, one may
+ * wish to only interpolate boundary values for the velocity components
+ * but not the pressure. In deal.II, this is typically done by passing functions
+ * a <i>component mask</i>. Component masks are always specified as a
+ * ComponentMask object which one can think of as an array with
+ * as many entries as the finite element has components (e.g., in the Stokes case, there are
+ * <code>dim+1</code> components) and where each entry is either true or false.
+ * In the example where we would like to interpolate boundary values only for
+ * the velocity components of the Stokes system, this component mask would then
+ * be <code>[true, true, false]</code> in 2d and <code>[true, true, true, false]</code>
+ * in 3d to indicate that no boundary values shall be set for the pressure variable
+ * (the last of the <code>dim+1</code> vector components of the solution.
+ *
+ * There are many functions that take such component masks, for example
+ * DoFTools::make_zero_boundary_values,
+ * VectorTools::interpolate_boundary_values,
+ * KellyErrorEstimator::estimate, etc. In some cases, there are multiple
+ * functions with these names but only some of them have a component mask
+ * argument.
+ *
+ * <b>Semantics of component masks:</b>
+ * Many of the functions that take a component mask object that has been default
+ * constructed to indicate <i>all components</i>, i.e., as if the vector had the
+ * correct length and was filled with only <code>true</code> values. The reason
+ * is that default initialized objects can be constructed in place using the code snippet
+ * <code>ComponentMask()</code> and can thus be used as a default
+ * argument in function signatures.
+ *
+ * In other words, ComponentMask objects can be in one of two states: They can have
+ * been initialized by a vector of booleans with a nonzero length; in that case,
+ * they represent a mask of a particular length where some elements may be true
+ * and others may be false. Or, the ComponentMask may have been default initialized
+ * (using the default constructor) in which case it represents an array of indefinite
+ * length (i.e., a length appropriate to the circumstances) in which <i>every entry</i>
+ * is true.
+ *
+ * <b>Creation of component masks:</b>
+ * Component masks are typically created by asking the finite element
+ * to generate a component mask from certain selected components using
+ * code such as this where we create a mask that only denotes the
+ * velocity components of a Stokes element (see @ref vector_valued):
+ * @code
+ *   FESystem<dim> stokes_fe (FE_Q<dim>(2), dim,    // Q2 element for the velocities
+ *                            FE_Q<dim>(1), 1);     // Q1 element for the pressure
+ *   FEValuesExtractors::Scalar pressure(dim);
+ *   ComponentMask pressure_mask = stokes_fe.component_mask (pressure);
+ * @endcode
+ * The result is a component mask that, in 2d, would have values
+ * <code>[false, false, true]</code>. Similarly, using
+ * @code
+ *   FEValuesExtractors::Vector velocities(0);
+ *   ComponentMask velocity_mask = stokes_fe.component_mask (velocities);
+ * @endcode
+ * would result in a mask <code>[true, true, false]</code> in 2d. Of
+ * course, in 3d, the result would be <code>[true, true, true, false]</code>.
+ *
+ * @note Just as one can think of composed elements as being made up of
+ * @ref GlossComponent "components" or @ref GlossBlock "blocks", there are
+ * component masks (represented by the ComponentMask class) and
+ * @ref GlossBlockMask "block masks" (represented by the BlockMask class).
+ * The FiniteElement class has functions that convert between the two kinds of
+ * objects.
+ *
+ * @note Not all component masks actually make sense. For example, if you have
+ * a FE_RaviartThomas object in 2d, then it doesn't make any sense to have a
+ * component mask of the form <code>[true, false]</code> because you try to
+ * select individual vector components of a finite element where each shape
+ * function has both $x$ and $y$ velocities. In essence, while you can of
+ * course create such a component mask, there is nothing you can do with it.
+ * </dd>
+ *
+ *
+ *
+ * <dt class="glossary">@anchor GlossCompress <b>Compressing distributed
+ *                                              vectors and matrices</b></dt>
+ *
+ * <dd>
+ * For %parallel computations, deal.II uses the vector and matrix
+ * classes defined in the PETScWrappers and TrilinosWrappers
+ * namespaces. When running programs in %parallel using MPI, these
+ * classes only store a certain number of rows or elements on the
+ * current processor, whereas the rest of the vector or matrix is
+ * stored on the other processors that belong to our MPI
+ * universe. This presents a certain problem when you assemble linear
+ * systems: we add elements to the matrix and right hand side vectors
+ * that may or may not be stored locally. Sometimes, we may also want
+ * to just <i>set</i> an element, not add to it.
+ *
+ * Both PETSc and Trilinos allow adding to or setting elements that
+ * are not locally stored. In that case, they write the value that we
+ * want to store or add into a cache, and we need to call one of the
+ * functions TrilinosWrappers::VectorBase::compress(),
+ * TrilinosWrappers::SparseMatrix::compress(),
+ * PETScWrappers::VectorBase::compress() or
+ * PETScWrappers::MatrixBase::compress() which will then ship the
+ * values in the cache to the MPI process that owns the element to
+ * which it is supposed to be added or written to. Due to the MPI
+ * model that only allows to initiate communication from the sender
+ * side (i.e. in particular, it is not a remote procedure call), these
+ * functions are collective, i.e. they need to be called by all
+ * processors.
+ *
+ * There is one snag, however: both PETSc and Trilinos need to know whether
+ * the operation that these <code>compress()</code> functions invoke applies
+ * to adding elements or setting them.  In some cases, not all processors may
+ * be adding elements, for example if a processor does not own any cells when
+ * using a very coarse (initial) mesh.  For this reason, compress() takes an
+ * argument of type VectorOperation, which can be either ::%add, or ::%insert.
+ * This argument is required for vectors and matrices starting with the 7.3
+ * release.
+ *
+ * In short, you need to call compress() in the following cases (and only in
+ * those cases, though calling compress() in other cases just costs some
+ * performance):
+ *
+ * 1. At the end of your assembly loop on matrices and vectors. This needs to
+ * be done if you write entries directly or if you use
+ * ConstraintMatrix::distribute_local_to_global. Use VectorOperation::add.
+ *
+ * 2. When you are done setting individual elements in a matrix/vector before
+ * any other operations are done (adding to elements, other operations like
+ * scaling, solving, reading, etc.). Use VectorOperation::insert.
+ *
+ * 3. Like in 2., but for adding values to individual elements. Use
+ * VectorOperation::add.
+ *
+ * All other operations like scaling or adding vectors, assignments, calls
+ * into deal.II (VectorTools, ConstraintMatrix, ...) or solvers do not require
+ * calls to compress().
+ * </dd>
+ *
+ * @note Compressing is an operation that only applies to vectors whose
+ * elements are uniquely owned by one and only one processor in a parallel
+ * MPI universe. It does not apply to
+ * @ref GlossGhostedVector "vectors with ghost elements".
+ *
+ *
+ * <dt class="glossary">@anchor GlossConcept <b>Concepts in deal.II</b></dt>
+ *
+ * <dd> There are several places in deal.II where we require that a type in a
+ * template match a certain interface or behave in a certain way: such
+ * constraints are called <em>concepts</em> in C++. See the discussion in
+ * @ref Concepts for more information and a list of concepts in deal.II.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor GlossDoF <b>Degree of freedom</b></dt>
+ *
+ * <dd> The term "degree of freedom" (often abbreviated as "DoF") is commonly
+ * used in the finite element community to indicate two slightly different,
+ * but related things. The first is that we'd like to represent the finite
+ * element solution as a linear combination of shape functions, in the form
+ * $u_h(\mathbf x) = \sum_{j=0}^{N-1} U_j \varphi_j(\mathbf x)$. Here, $U_j$
+ * is a vector of expansion coefficients. Because we don't know their values
+ * yet (we will compute them as the solution of a linear or nonlinear system),
+ * they are called "unknowns" or "degrees of freedom". The second meaning of
+ * the term can be explained as follows: A mathematical description of finite
+ * element problem is often to say that we are looking for a finite
+ * dimensional function $u_h \in V_h$ that satisfies some set of equations
+ * (e.g. $a(u_h,\varphi_h)=(f,\varphi_h)$ for all test functions $\varphi_h\in
+ * V_h$). In other words, all we say here that the solution needs to lie in
+ * some space $V_h$. However, to actually solve this problem on a computer we
+ * need to choose a basis of this space; this is the set of shape functions
+ * $\varphi_j(\mathbf x)$ we have used above in the expansion of $u_h(\mathbf
+ * x)$ with coefficients $U_j$. There are of course many bases of the space
+ * $V_h$, but we will specifically choose the one that is described by the
+ * finite element functions that are traditionally defined locally on the
+ * cells of the mesh. Describing "degrees of freedom" in this context requires
+ * us to simply <i>enumerate</i> the basis functions of the space $V_h$. For
+ * $Q_1$ elements this means simply enumerating the vertices of the mesh in
+ * some way, but for higher elements one also has to enumerate the shape
+ * functions that are associated with edges, faces, or cell interiors of the
+ * mesh. The class that provides this enumeration of the basis functions of
+ * $V_h$ is called DoFHandler.  The process of enumerating degrees of freedom
+ * is referred to as "distributing DoFs" in deal.II.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor GlossDirectionFlag <b>Direction flags</b></dt>
+ *
+ * <dd>The <i>direction flag</i> is used in triangulations embedded in a
+ * higher dimensional space to denote the orientation of cells and make the
+ * manifold oriented. It is accessed using CellAccessor::direction_flag()
+ * and set by the Triangulation class upon creation of a triangulation. You
+ * can change all direction flags of a triangulation using the
+ * Triangulation::flip_all_direction_flags() function.
+ *
+ * The flag is necessary to make cases like this work: assume we have a
+ * one-dimensional mesh embedded in a two-dimensional space,
+ *
+ *   @image html direction_flag.png "One dimensional mesh in two dimensions"
+ *
+ * In one dimensional meshes in one dimensional space, we can always make sure
+ * that the location of the left vertex of a cell has a smaller value than the
+ * location of the right vertex. However, if we embed a mesh in a higher
+ * dimensional space, we can no longer do this. For example, the cells in the
+ * mesh above may be described by the following vertex sets: <code>(0,1),
+ * (1,2), (3,2), (4,3), (4,5)</code>. (As a side remark, note that here we
+ * have vertices -- e.g. vertex 2 -- that are the right end points of more
+ * than one cell.) If we define the normal to each cell as that unit vector
+ * that is right perpendicular to the vector that connects the first to the
+ * second vertex of the line, then we would end up with the following picture:
+ *
+ *   @image html direction_flag_normals.png "Normal vectors"
+ *
+ * In other words, this one-dimensional manifold is not oriented. We could in
+ * principle revert the order of vertices when creating such a mesh (though
+ * there are good reasons not to do so, for example because this mesh may have
+ * resulted from extracting the surface mesh of a two dimensional mesh, and we
+ * want to preserve the order of vertices of each line segment because they
+ * currently match the order of vertices of the faces of the 2d cells). An
+ * alternative strategy, chosen in deal.II, is to simply associate with each
+ * cell whether the normal should be the left or right normal to the
+ * cell. (The default is right normals.) In the example above, the flags for
+ * the five cells would be <code>true, true, false, false,
+ * true</code>. Multiplying the right normal with plus or minus one, depending
+ * on the value of the flag on each cell, yields a set of normal vectors that
+ * orient the manifold.
+ *
+ * Similar issues happen with two-dimensional meshes in three space
+ * dimensions. We note that it would not be possible to find consistent
+ * direction flags if the two-dimensional manifold is not orientable; such
+ * manifolds are not currently supported by deal.II.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossDistorted <b>Distorted cells</b></dt>
+ *
+ * <dd>A <i>distorted cell</i> is a cell for which the mapping from
+ * the reference cell to real cell has a Jacobian whose determinant is
+ * non-positive somewhere in the cell. Typically, we only check the sign
+ * of this determinant at the vertices of the cell. The function
+ * GeometryInfo::alternating_form_at_vertices computes these
+ * determinants at the vertices.
+ *
+ * By way of example, if all of the determinants are of roughly equal value
+ * and on the order of $h^\text{dim}$ then the cell is well-shaped. For
+ * example, a square cell or face has determinants equal to $h^\text{dim}$
+ * whereas a strongly sheared parallelogram has a determinant much
+ * smaller. Similarly, a cell with very unequal edge lengths will have widely
+ * varying determinants. Conversely, a pinched cell in which the location of
+ * two or more vertices is collapsed to a single point has a zero determinant
+ * at this location. Finally, an inverted or twisted cell in which the
+ * location of two vertices is out of order will have negative determinants.
+ *
+ * The following two images show a well-formed, a pinched, and a twisted cell
+ * for both 2d and 3d:
+ *
+ * @image html distorted_2d.png "A well-formed, a pinched, and a twisted cell in 2d."
+ *
+ * @image html distorted_3d.png "A well-formed, a pinched, and a twisted cell in 3d."
+ * </dd>
+ *
+ * Distorted cells can appear in two different ways: The original
+ * coarse mesh can already contain such cells, or they can be created
+ * as the result of mesh refinement if the boundary description in use
+ * is sufficiently irregular.
+ *
+ * If the appropriate flag is given upon creation of a triangulation,
+ * the function Triangulation::create_triangulation, which is called
+ * by the various functions in GridGenerator and GridIn (but can also
+ * be called from user code, see step-14, will signal
+ * the creation of coarse meshes with distorted cells by throwing an
+ * exception of type Triangulation::DistortedCellList. There are
+ * legitimate cases for creating meshes with distorted cells (in
+ * particular collapsed/pinched cells) if you don't intend to assemble
+ * anything on these cells. For example, consider a case where one
+ * would like to simulate the behavior of an elastic material with a
+ * fluid-filled crack such as an oil reservoir. If the pressure
+ * becomes too large, the crack is closed -- and the cells that
+ * discretize the crack volume are collapsed to zero volume. As long
+ * as you don't integrate over these cells to simulate the behavior of
+ * the fluid (of which there isn't any if the crack has zero volume),
+ * such meshes are perfectly legitimate. As a consequence,
+ * Triangulation::create_triangulation does not simply abort the
+ * program, but throws an exception that contains a list of cells that
+ * are distorted; this exception can be caught and, if you believe
+ * that you can ignore this condition, you can react by doing nothing
+ * with the caught exception.
+ *
+ * The second case in which distorted cells can appear is through mesh
+ * refinement when we have curved boundaries. Consider, for example, the
+ * following case where the dashed line shows the exact boundary that the
+ * lower edge of the cell is supposed to approximate (let's assume for
+ * simplicity that the left, top and right edges are interior edges and
+ * therefore will be considered as straight; in fact, for this particular case
+ * in 2d where only one side of a cell is at the boundary we have special code
+ * that avoids the situation depicted, but you will get the general idea of
+ * the problem that holds in 3d or if more than one side of the cell is at the
+ * boundary):
+ *
+ * @image html distorted_2d_refinement_01.png "One cell with an edge approximating a curved boundary"
+ *
+ * Now, if this cell is refined, we first split all edges and place
+ * new mid-points on them. For the left, top and right edge, this is
+ * trivial: because they are considered straight, we just take the
+ * point in the middle between the two vertices. For the lower edge,
+ * the Triangulation class asks the Boundary object associated with
+ * this boundary (and in particular the Boundary::new_point_on_line
+ * function) where the new point should lie. The four old vertices and
+ * the four new points are shown here:
+ *
+ * @image html distorted_2d_refinement_02.png "Cell after edge refinement"
+ *
+ * The last step is to compute the location of the new point in the interior
+ * of the cell. By default, it is chosen as the average location (arithmetic
+ * mean of the coordinates) of these 8 points (in 3d, the 26 surrounding
+ * points have different weights, but the idea is the same):
+ *
+ * @image html distorted_2d_refinement_03.png "Cell after edge refinement"
+ *
+ * The problem with that is, of course, that the bottom two child cells are
+ * twisted, whereas the top two children are well-shaped. While such
+ * meshes can happen with sufficiently irregular boundary descriptions
+ * (and if the coarse mesh is entirely inadequate to resolve the
+ * complexity of the boundary), the Triangulation class does not know
+ * what to do in such situations unless one attaches an appropriate manifold
+ * object to the cells in question (see the
+ * @ref manifold "documentation module on manifolds"). Consequently, absent
+ * such a manifold description or if the manifold description does not
+ * provide a sufficient description of the geometry, the
+ * Triangulation::execute_coarsening_and_refinement function does
+ * create such meshes, but it keeps a list of cells whose children are
+ * distorted. If this list is non-empty at the end of a refinement
+ * step, it will throw an exception of type
+ * Triangulation::DistortedCellList that contains those cells that
+ * have distorted children. The caller of
+ * Triangulation::execute_coarsening_and_refinement can then decide
+ * what to do with this situation.
+ *
+ * One way to deal with this problem is to use the
+ * GridTools::fix_up_distorted_child_cells function that attempts to
+ * fix up exactly these cells if possible by moving around the node at
+ * the center of the cell.
+ *
+ * Note that the Triangulation class does not test for the presence of
+ * distorted cells by default, since the determination whether a cell
+ * is distorted or not is not a cheap operation. If you want a
+ * Triangulation object to test for distortion of cells, you need to
+ * specify this upon creation of the object by passing the appropriate
+ * flag.
+ *
+ *
+ * <dt class="glossary">@anchor distributed_paper
+ *                           <b>Distributed computing paper</b></dt>
+ *
+ * <dd>The "distributed computing paper" is a paper by W. Bangerth,
+ * C. Burstedde, T. Heister and M. Kronbichler titled "Algorithms and Data
+ * Structures for Massively Parallel Generic Finite Element Codes" that
+ * describes the implementation of %parallel distributed computing in deal.II,
+ * i.e. computations where not only the linear system is split onto different
+ * machines as in, for example, step-17, but also the Triangulation and
+ * DoFHandler objects. In essence, it is a guide to the parallel::distributed
+ * namespace and the techniques used in step-40.
+ *
+ * The full reference for the paper is as follows:
+ * @code{.bib}
+ at Article{BBHK11,
+  author =       {Wolfgang Bangerth and Carsten Burstedde and Timo Heister
+                  and Martin Kronbichler},
+  title =        {Algorithms and data structures for massively parallel generic
+  adaptive finite element codes},
+  journal =      {ACM Trans. Math. Softw.},
+  year =         2011,
+  volume =       38,
+  pages =        {14/1--28}}
+ * @endcode
+ *
+ * For massively %parallel
+ * computations, deal.II builds on the
+ * <a href="http://www.p4est.org/" target="_top">p4est</a>
+ * library. If you use this functionality, please also cite the
+ * p4est paper listed at their website.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossFaceOrientation <b>Face orientation</b></dt>
+ * <dd>In a triangulation, the normal vector to a face
+ * can be deduced from the face orientation by
+ * applying the right hand side rule (x,y -> normal).  We note, that
+ * in the standard orientation of faces in 2d, faces 0 and 2 have
+ * normals that point into the cell, and faces 1 and 3 have normals
+ * pointing outward. In 3d, faces 0, 2, and 4
+ * have normals that point into the cell, while the normals of faces
+ * 1, 3, and 5 point outward. This information, again, can be queried from
+ * GeometryInfo<dim>::unit_normal_orientation.
+ *
+ * However, it turns out that a significant number of 3d meshes cannot
+ * satisfy this convention. This is due to the fact that the face
+ * convention for one cell already implies something for the
+ * neighbor, since they share a common face and fixing it for the
+ * first cell also fixes the normal vectors of the opposite faces of
+ * both cells. It is easy to construct cases of loops of cells for
+ * which this leads to cases where we cannot find orientations for
+ * all faces that are consistent with this convention.
+ *
+ * For this reason, above convention is only what we call the
+ * <em>standard orientation</em>. deal.II actually allows faces in 3d
+ * to have either the standard direction, or its opposite, in which
+ * case the lines that make up a cell would have reverted orders, and
+ * the normal vector would have the opposite direction. You can ask a
+ * cell whether a given face has standard orientation by calling
+ * <tt>cell->face_orientation(face_no)</tt>: if the result is @p true,
+ * then the face has standard orientation, otherwise its normal vector
+ * is pointing the other direction. There are not very many places in
+ * application programs where you need this information actually, but
+ * a few places in the library make use of this. Note that in 2d, the
+ * result is always @p true. However, while every face in 2d is always
+ * in standard orientation, you can sometimes specify something to
+ * assume that this is not so; an example is the function
+ * DoFTools::make_periodicity_constraints().
+ *
+ * There are two other flags that describe the orientation of a face:
+ * face_flip and face_rotation. Some documentation for these
+ * exists in the GeometryInfo class. An example of their use in user
+ * code is given in the DoFTools::make_periodicity_constraints function.
+ *
+ *
+ * <dt class="glossary">@anchor GlossGeneralizedSupport <b>Generalized support points</b></dt>
+ * <dd>While @ref GlossSupport "support points" allow very simple interpolation
+ * into the finite element space, their concept is restricted to
+ * @ref GlossLagrange "Lagrange elements". For other elements, more general
+ * interpolation operators can be defined, often relying on integral values
+ * or moments. Since these integral values are again computed using a
+ * quadrature rule, we consider them a generalization of support
+ * points.
+ *
+ * Note that there is no simple relation between
+ * @ref GlossShape "shape functions" and generalized support points, unlike for
+ * regular @ref GlossSupport "support points". Instead, FiniteElement defines
+ * a couple of interpolation functions doing the actual interpolation.
+ *
+ * If a finite element is Lagrangian, generalized support points
+ * and support points coincide.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossGhostCell <b>Ghost cells</b></dt>
+ * <dd>
+ * If a mesh is distributed across multiple MPI processes using the
+ * parallel::distributed::Triangulation class, each processor stores
+ * only the cells it owns, one layer of adjacent cells that are owned
+ * by other processors, all coarse level cells, and all cells that are
+ * necessary to maintain the invariant that adjacent cells must differ
+ * by at most one refinement level. The cells stored on each process
+ * that are not owned by this process but that are adjacent to the
+ * ones owned by this process are called "ghost cells", and for these
+ * cells the predicate <code>cell-@>is_ghost()</code> returns
+ * true. Ghost cells are guaranteed to exist in the globally
+ * distributed mesh, i.e. these cells are actually owned by another
+ * process and are not further refined there. See the
+ * @ref distributed_paper "Distributed Computing paper" for more
+ * information.
+ *
+ * The layer of ghost cells consists of all cells that are face, edge, or
+ * vertex neighbors of any locally owned cell and that are not locally
+ * owned themselves. In other word, the ghost cells completely enclose the
+ * subdomain of locally owned cells (with the exception of the boundary of
+ * the domain, of course).
+ *
+ * The concept of ghost cells has no meaning for triangulations that
+ * store the entire mesh on each processor, i.e. the
+ * dealii::Triangulation class.  </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossGhostedVector <b>Ghosted vectors</b></dt>
+ * <dd>
+ * In parallel computations, vectors come in two general kinds:
+ * without and with ghost elements. Vectors without ghost
+ * elements uniquely partition the vector elements between
+ * processors: each vector entry has exactly one processor that
+ * owns it, and this is the only processor that stores the value
+ * of this entry. In other words, if processor zero stores elements
+ * 0...49 of a vector and processor one stores elements 50...99,
+ * then processor one is out of luck accessing element 42 of this
+ * vector: it is not stored here and the value can not be assessed.
+ * This will result in an assertion.
+ *
+ * On the other hand, there are many situations where one needs to
+ * know vector elements that aren't locally owned, for example to
+ * evaluate the solution on a locally owned cell (see
+ * @ref GlossLocallyOwnedCell) for which one of the degrees of freedom
+ * is at an interface to a cell that we do not own locally (which,
+ * in this case must then be a @ref GlossGhostCell "ghost cell")
+ * and for which the neighboring cell may be the owner -- in other
+ * words, the degree of freedom is not a
+ * @ref GlossLocallyOwnedDof "locally owned" but instead only a
+ * @ref GlossLocallyActiveDof "locally active DoFs". The values of such
+ * degrees of freedom are typically stored on the machine that owns the
+ * degree of freedom and, consequently, would not be accessible on the
+ * current machine.
+ *
+ * Because one often needs these values anyway, there is a second kind of
+ * vector, often called "ghosted vector". Ghosted vectors store some elements
+ * on each processor for which that processor is not the owner.
+ * For such vectors, you can read those elements that the
+ * processor you are currently on stores but you cannot write into them
+ * because to make this work would require propagating the new value to
+ * all other processors that have a copy of this value (the list of
+ * such processors may be something which the current processor does not
+ * know and has no way of finding out efficiently). Since you cannot
+ * write into ghosted vectors, the only way to initialize such a vector
+ * is by assignment from a non-ghosted vector. This implies having to
+ * import those elements we locally want to store from other processors.
+ *
+ * The way ghosted vectors are actually stored is different between the
+ * various implementations of parallel vectors. For PETSc (and the corresponding
+ * PETScWrappers::MPI::Vector class), ghosted vectors store the same
+ * elements as non-ghosted ones would, plus some additional elements
+ * that are owned by other processors. In other words, for each element
+ * there is a clear owner among all of the processors and those elements
+ * that the current processor stores but does not own (i.e., the
+ * "ghost elements") are simply mirror images of a master value somewhere
+ * else -- thus, the name "ghost". This is also the case for the
+ * parallel::distributed::Vector class.
+ *
+ * On the other hand, in Trilinos (and consequently in
+ * TrilinosWrappers::MPI::Vector), a ghosted vector is simply a view
+ * of the parallel vector where the element distributions overlap. The
+ * 'ghosted' Trilinos vector in itself has no idea of which entries
+ * are ghosted and which are locally owned. In fact, a ghosted vector
+ * may not even store all of the elements a non-ghosted vector would
+ * store on the current processor. Consequently, for Trilinos vectors,
+ * there is no notion of an 'owner' of vector elements in the way we
+ * have it in the the non-ghost case view (or in the PETSc case) and
+ * the name "ghost element" may be misleading since in this view,
+ * every element we have available locally may or may not be stored
+ * somewhere else as well, but even if it is, the local element is not
+ * a mirror value of a master location as there is no owner of each
+ * element.
+ *
+ * @note The @ref distributed documentation module provides a brief
+ * overview of where the different kinds of vectors are typically
+ * used.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor hp_paper <b>%hp paper</b></dt>
+ * <dd>The "hp paper" is a paper by W. Bangerth and O. Kayser-Herold, titled
+ * "Data Structures and Requirements for hp Finite Element Software", that
+ * describes many of the algorithms and data structures used in the implementation
+ * of the hp framework of deal.II. In particular, it summarizes many of the
+ * tricky points that have to be considered for %hp finite elements using continuous
+ * elements.
+ *
+ * The full reference for this paper is as follows:
+ * @code{.bib}
+ at Article{BK07,
+  author =       {Wolfgang Bangerth and Oliver Kayser-Herold},
+  title =        {Data Structures and Requirements for hp Finite Element
+                  Software},
+  journal =      {ACM Trans. Math. Softw.},
+  year =         2009,
+  volume =       36,
+  number =       1,
+  pages =        {4/1--4/31}
+}
+ * @endcode
+ * It is available from <a href="http://www.math.tamu.edu/~bangerth/publications.html">http://www.math.tamu.edu/~bangerth/publications.html</a>, also see <a href="https://www.dealii.org/publications.html#details">deal.II publications</a> for details.
+ *
+ * The numerical examples shown in that paper are generated with a slightly
+ * modified version of step-27. The main difference to that
+ * tutorial program is that various operations in the program were timed for
+ * the paper to compare different options and show that $hp$ methods are
+ * really not all that expensive.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossInterpolation <b>Interpolation with finite elements</b></dt>
+ * <dd>The purpose of interpolation with finite elements is computing
+ * a vector of coefficients representing a finite element function,
+ * such that the @ref GlossNodes "node values" of the original
+ * function and the finite element function coincide. Therefore, the
+ * interpolation process consists of evaluating all @ref GlossNodes
+ * "node functionals" <i>N<sub>i</sub></i> for the given function
+ * <i>f</i> and store the result as entry <i>i</i> in the coefficient
+ * vector.
+ *
+ *
+ * <dt class="glossary">@anchor GlossLagrange <b>Lagrange elements</b></dt>
+ * <dd>Finite elements based on Lagrangian interpolation at
+ * @ref GlossSupport "support points".</dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossLocallyOwnedCell <b>Locally owned cell</b></dt>
+ * <dd>This concept identifies a subset of all cells when using
+ * distributed meshes, see the @ref distributed module. In such meshes, each
+ * cell is owned by exactly one processor. The locally owned ones are those
+ * owned by the current processor.
+ *
+ * Each processor in a parallel computation has a triangulation covering
+ * the entire domain that consists of cells that are locally owned, of
+ * @ref GlossGhostCell "ghost cells" and of
+ * @ref GlossArtificialCell "artificial cells".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossLocallyOwnedDof <b>Locally owned degrees of freedom</b></dt>
+ * <dd>This concept identifies a subset of all degrees of freedom when using
+ * distributed meshes, see the @ref distributed module.  Locally owned degrees
+ * of freedom live on locally owned cells. Since degrees of freedom are owned
+ * by only one processor, degrees of freedom on interfaces between cells owned
+ * by different processors may be owned by one or the other, so not all
+ * degrees of freedom on a locally owned cell are also locally owned degrees
+ * of freedom.
+ *
+ * Locally owned DoFs are a subset of the
+ * @ref GlossLocallyActiveDof "locally active DoFs".</dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossLocallyActiveDof <b>Locally active degrees of freedom</b></dt>
+ * <dd>This concept identifies a subset of all degrees of freedom when using
+ * distributed meshes, see the @ref distributed module.  Locally active degrees
+ * of freedom are those that live on locally owned cells. Degrees of freedom
+ * on interfaces between cells owned by different processors therefore belong
+ * to the set of locally active degrees of freedom for more than one processor.
+ *
+ * Locally active DoFs are a superset of the
+ * @ref GlossLocallyOwnedDof "locally owned DoFs" and a subset of the
+ * @ref GlossLocallyRelevantDof "locally relevant DoFs".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossLocallyRelevantDof <b>Locally relevant degrees of freedom</b></dt>
+ * <dd>This concept identifies a subset of all degrees of freedom when using
+ * distributed meshes, see the @ref distributed module.  Locally relevant
+ * degrees of freedom are those that live on locally owned or ghost cells.
+ * Consequently, they may be owned by different processors.
+ *
+ * Locally relevant DoFs are a superset of the
+ * @ref GlossLocallyActiveDof "locally active DoFs."
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossManifoldIndicator <b>%Manifold indicator</b></dt>
+ *
+ * <dd> Every object that makes up a Triangulation (cells, faces,
+ * edges, etc.), is associated with a unique number (of type
+ * types::manifold_id) that is used to identify which manifold object
+ * is responsible to generate new points when the mesh is refined.
+ *
+ * By default, all manifold indicators of a mesh are set to
+ * numbers::invalid_manifold_id. A typical piece of code that sets the
+ * manifold indicator on a object to something else would look like
+ * this, here setting the manifold indicator to 42 for all cells whose
+ * center has an $x$ component less than zero:
+ *
+ * @code
+ * for (typename Triangulation<dim>::active_cell_iterator cell =
+ *  triangulation.begin_active();
+ *  cell != triangulation.end(); ++cell)
+ *   if (cell->center()[0] < 0)
+ *     cell->set_manifold_id (42);
+ * @endcode
+ *
+ * Here we call the function TriaAccessor::set_manifold_id(). It may
+ * also be appropriate to call TriaAccessor::set_all_manifold_ids
+ * instead, to set recursively the manifold id on each face (and edge,
+ * if in 3d). To query the manifold indicator of a particular object
+ * edge, use TriaAccessor::manifold_id().
+ *
+ * The code above only sets the manifold indicators of a particular
+ * part of the Triangulation, but it does not by itself change the way
+ * the Triangulation class treats this object for the purposes of mesh
+ * refinement. For this, you need to call Triangulation::set_manifold()
+ * to associate a manifold object with a particular manifold
+ * indicator. This allows the Triangulation objects to use a different
+ * method of finding new points on cells, faces or edges to be
+ * refined; the default is to use a FlatManifold object for all faces
+ * and edges.
+ *
+ * @note Manifold indicators are inherited from parents to their
+ * children upon mesh refinement. Some more information about manifold
+ * indicators is also presented in a section of the documentation of
+ * the Triangulation class as well as in the
+ * @ref manifold "Manifold documentation module". Manifold indicators
+ * are used in step-53 and step-54.
+ * </dd>
+ *
+ * @see @ref manifold "The module on Manifolds"
+ *
+ *
+ * <dt class="glossary">@anchor GlossMaterialId <b>Material id</b></dt>
+ * <dd>Each cell of a triangulation has associated with it a property called
+ * "material id". It is commonly used in problems with heterogeneous
+ * coefficients to identify which part of the domain a cell is in and,
+ * consequently, which value the coefficient should have on this particular
+ * cell. The material id is inherited from mother to child cell upon mesh
+ * refinement.
+ *
+ * The material id is set and queried using the CellAccessor::material_id,
+ * CellAccessor::set_material_id and CellAccessor::recursively_set_material_id
+ * functions.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossMPICommunicator <b>MPI Communicator</b></dt>
+ * <dd>
+ * In the language of the Message Passing Interface (MPI), a communicator
+ * can be thought of as a mail system that allows sending messages to
+ * other members of the mail system. Within each communicator, each
+ * @ref GlossMPIProcess "process" has a
+ * @ref GlossMPIRank "rank" (the equivalent of a house number) that
+ * allows to identify senders and receivers of messages. It is not
+ * possible to send messages via a communicator to receivers that are
+ * not part of this communicator/mail service.
+ *
+ * When starting a parallel program via a command line call such as
+ * @code
+ *  mpirun -np 32 ./step-17
+ * @endcode
+ * (or the equivalent used in the batch submission system used on your
+ * cluster) the MPI system starts 32 copies of the step-17 executable.
+ * Each of these has access to the <code>MPI_COMM_WORLD</code> communicator
+ * that then consists of all 32 processors, each with its own rank. A subset
+ * of processes within this MPI universe can later agree to create other
+ * communicators that allow communication between only a subset of
+ * processes.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossMPIProcess <b>MPI Process</b></dt>
+ * <dd>
+ * When running parallel jobs on distributed memory machines, one
+ * almost always uses MPI. There, a command line call such as
+ * @code
+ *  mpirun -np 32 ./step-17
+ * @endcode
+ * (or the equivalent used in the batch submission system used on your
+ * cluster) starts 32 copies of the step-17 executable. Some of these may actually
+ * run on the same machine, but in general they will be running on different
+ * machines that do not have direct access to each other's memory space.
+ *
+ * In the language of the Message Passing Interface (MPI), each of these
+ * copies of the same executable running on (possibly different) machines
+ * are called <i>processes</i>. The collection of all processes running in
+ * parallel is called the "MPI Universe" and is identified by the
+ * @ref GlossMPICommunicator "MPI communicator" <code>MPI_COMM_WORLD</code>.
+ *
+ * Each process has immediate access only to the objects in its own
+ * memory space. A process can not read from or write into the memory
+ * of other processes. As a consequence, the only way by which
+ * processes can communicate is by sending each other messages. That
+ * said (and as explained in the introduction to step-17), one
+ * typically calls higher level MPI functions in which all processes
+ * that are part of a communicator participate. An example would
+ * be computing the sum over a set of integers where each process
+ * provides one term of the sum.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossMPIRank <b>MPI Rank</b></dt>
+ * <dd>
+ * In the language of the Message Passing Interface (MPI), the <i>rank</i>
+ * of an @ref GlossMPIProcess "MPI process" is the number this process
+ * carries within the set <code>MPI_COMM_WORLD</code> of all processes
+ * currently running as one parallel job. More correctly, it is the
+ * number within an @ref GlossMPICommunicator "MPI communicator" that
+ * groups together a subset of all processes with one parallel job
+ * (where <code>MPI_COMM_WORLD</code> simply denotes the <i>complete</i>
+ * set of processes).
+ *
+ * Within each communicator, each process has a unique rank, distinct from the
+ * all other processes' ranks, that allows
+ * identifying one recipient or sender in MPI communication calls. Each
+ * process, running on one processor, can inquire about its own rank
+ * within a communicator by calling Utilities::MPI::this_mpi_process().
+ * The total number of processes participating in a communicator (i.e.,
+ * the <i>size</i> of the communicator) can be obtained by calling
+ * Utilities::MPI::n_mpi_processes().
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor mg_paper <b>%Multigrid paper</b></dt>
+ * <dd>The "multigrid paper" is a paper by B. Janssen and G. Kanschat, titled
+ * "Adaptive Multilevel Methods with Local Smoothing for H1- and Hcurl-Conforming High Order Finite Element Methods", that
+ * describes many of the algorithms and data structures used in the implementation
+ * of the multigrid framework of deal.II. It underlies the implementation of
+ * the classes that are used in step-16 for multigrid
+ * methods.
+ *
+ * The full reference for this paper is as follows:
+ * @code{.bib}
+ at article{janssen2011adaptive,
+  title=    {Adaptive Multilevel Methods with Local Smoothing for H^1- and H^{curl}-Conforming High Order Finite Element Methods},
+  author=   {Janssen, B{\"a}rbel and Kanschat, Guido},
+  journal=  {SIAM Journal on Scientific Computing},
+  volume=   {33},
+  number=   {4},
+  pages=    {2095--2114},
+  year=     {2011},
+  publisher={SIAM}}
+ * @endcode
+ * See 
+ * <a href="http://dx.doi.org/10.1137/090778523">DOI:10.1137/090778523</a>
+ * for the paper and <a href="https://www.dealii.org/publications.html#details">deal.II publications</a> for more details.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossNodes <b>Node values or node functionals</b></dt>
+ *
+ * <dd>It is customary to define a FiniteElement as a pair consisting
+ * of a local function space and a set of node values $N_i$ on the
+ * mesh cells (usually defined on the @ref GlossReferenceCell
+ * "reference cell"). Then, the basis of the local function space is
+ * chosen such that $N_i(v_j) = \delta_{ij}$, the Kronecker delta.
+ *
+ * This splitting has several advantages, concerning analysis as well
+ * as implementation. For the analysis, it means that conformity with
+ * certain spaces (FiniteElementData::Conformity), e.g. continuity, is
+ * up to the node values. In deal.II, it helps simplifying the
+ * implementation of more complex elements like FE_RaviartThomas
+ * considerably.
+ *
+ * Examples for node functionals are values in @ref GlossSupport
+ * "support points" and moments with respect to Legendre
+ * polynomials. Let us give some examples:
+ *
+ * <table><tr>
+ *   <th>Element</th>
+ *   <th>Function space</th>
+ *   <th>Node values</th></tr>
+ *   <tr><th>FE_Q, FE_DGQ</th>
+ *     <td><i>Q<sub>k</sub></i></td>
+ *     <td>values in support points</td></tr>
+ *   <tr><th>FE_DGP</th>
+ *     <td><i>P<sub>k</sub></i></td>
+ *     <td>moments with respect to Legendre polynomials</td></tr>
+ *   <tr><th>FE_RaviartThomas (2d)</th>
+ *     <td><i>Q<sub>k+1,k</sub> x Q<sub>k,k+1</sub></i></td>
+ *     <td>moments on edges and in the interior</td></tr>
+ *   <tr><th>FE_RaviartThomasNodal</th>
+ *     <td><i>Q<sub>k+1,k</sub> x Q<sub>k,k+1</sub></i></td>
+ *     <td>Gauss points on edges(faces) and anisotropic Gauss points in the interior</td></tr>
+ * </table>
+ *
+ *
+ * <dt class="glossary">@anchor GlossParallelScaling <b>Parallel scaling</b></dt>
+ * <dd>When we say that a parallel program "scales", what we mean is that the
+ * program does not become unduly slow (or takes unduly much memory) if we
+ * make the problem it solves larger, and that run time and memory consumption
+ * decrease proportionally if we keep the problem size the same but increase
+ * the number of processors (or cores) that work on it.
+ *
+ * More specifically, think of a problem whose size is given by a number $N$
+ * (which could be the number of cells, the number of unknowns, or some other
+ * indicative quantity such as the number of CPU cycles necessary to solve
+ * it) and for which you have $P$ processors available for solution. In an
+ * ideal world, the program would then require a run time of ${\cal O}(N/P)$,
+ * and this would imply that we could reduce the run time to any desired
+ * value by just providing more processors. Likewise, for a program to be
+ * scalable, its overall memory consumption needs to be ${\cal O}(N)$ and on
+ * each involved process needs to be ${\cal O}(N/P)$, again
+ * implying that we can fit any problem into the fixed amount of memory
+ * computers attach to each processor, by just providing
+ * sufficiently many processors.
+ *
+ * For practical assessments of scalability, we often distinguish between
+ * "strong" and "weak" scalability. These assess asymptotic statements
+ * such as ${\cal O}(N/P)$ run time in the limits $N\rightarrow \infty$
+ * and/or $P\rightarrow \infty$. Specifically, when we say that a program
+ * is "strongly scalable", we mean that if we have a problem of fixed
+ * size $N$, then we can reduce the run time and memory consumption (on
+ * every processor) inversely proportional to $P$ by just throwing more
+ * processors at the problem. In particular, strong scalability implies
+ * that if we provide twice as many processors, then run time and memory
+ * consumption on every process will be reduced by a factor of two. In
+ * other words, we can solve the <i>same problem</i> faster and faster
+ * by providing more and more processors.
+ *
+ * Conversely, "weak scalability" means that if we increase the problem
+ * size $N$ by a fixed factor, and increase the number of processors
+ * $P$ available to solve the problem by the same factor, then the
+ * overall run time (and the memory consumption on every processor)
+ * remains the same. In other words, we can solve <i>larger and larger
+ * problems</i> within the same amount of wallclock time by providing
+ * more and more processors.
+ *
+ * No program is truly scalable in this theoretical sense. Rather, all programs
+ * cease to scale once either $N$ or $P$ grows larger than certain limits.
+ * We therefore often say things such as "the program scales up to
+ * 4,000 cores", or "the program scales up to $10^{8}$ unknowns". There are
+ * a number of reasons why programs cannot scale without limit; these can
+ * all be illustrated by just looking at the (relatively simple) step-17
+ * tutorial program:
+ * - Sequential sections: Many programs have sections of code that
+ *   either cannot or are not parallelized, i.e., where one processor has to do
+ *   a certain, fixed amount of work that does not decrease just because
+ *   there are a total of $P$ processors around. In step-17, this is
+ *   the case when generating graphical output: one processor creates
+ *   the graphical output for the entire problem, i.e., it needs to do
+ *   ${\cal O}(N)$ work. That means that this function has a run time
+ *   of ${\cal O}(N)$, regardless of $P$, and consequently the overall
+ *   program will not be able to achieve ${\cal O}(N/P)$ run time but
+ *   have a run time that can be described as $c_1N/P + c_2N$ where
+ *   the first term comes from scalable operations such as assembling
+ *   the linear system, and the latter from generating graphical
+ *   output on process 0. If $c_2$ is sufficiently small, then the
+ *   program might look like it scales strongly for small numbers of
+ *   processors, but eventually strong scalability will cease. In
+ *   addition, the program can not scale weakly either because
+ *   increasing the size $N$ of the problem while increasing the
+ *   number of processors $P$ at the same rate does not keep the
+ *   run time of this one function constant.
+ * - Duplicated data structures: In step-17, each processor stores the entire
+ *   mesh. That is, each processor has to store a data structure of size
+ *   ${\cal O}(N)$, regardless of $P$. Eventually, if we make the problem
+ *   size large enough, this will overflow each processor's memory space
+ *   even if we increase the number of processors. It is thus clear that such
+ *   a replicated data structure prevents a program from scaling weakly.
+ *   But it also prevents it from scaling strongly because in order to
+ *   create an object of size ${\cal O}(N)$, one has to at the very
+ *   least write into ${\cal O}(N)$ memory locations, costing
+ *   ${\cal O}(N)$ in CPU time. Consequently, there is a component of the
+ *   overall algorithm that does not behave as ${\cal O}(N/P)$ if we
+ *   provide more and more processors.
+ * - Communication: If, to pick just one example, you want to compute
+ *   the $l_2$ norm of a vector of which all MPI processes store a few
+ *   entries, then every process needs to compute the sum of squares of
+ *   its own entries (which will require ${\cal O}(N/P)$ time, and
+ *   consequently scale perfectly), but then every process needs to
+ *   send their partial sum to one process that adds them all up and takes
+ *   the square root. In the very best case, sending a message that
+ *   contains a single number takes a constant amount of time,
+ *   regardless of the overall number of processes. Thus, again, every
+ *   program that does communication cannot scale strongly because
+ *   there are parts of the program whose CPU time requirements do
+ *   not decrease with the number of processors $P$ you allocate for
+ *   a fixed size $N$. In reality, the situation is actually even
+ *   worse: the more processes are participating in a communication
+ *   step, the longer it will generally take, for example because
+ *   the one process that has to add everyone's contributions has
+ *   to add everything up, requiring ${\cal O}(P)$ time. In other words,
+ *   CPU time <i>increases</i> with the number of processes, therefore
+ *   not only preventing a program from scaling strongly, but also from
+ *   scaling weakly. (In reality, MPI libraries do not implement $l_2$
+ *   norms by sending every message to one process that then adds everything
+ *   up; rather, they do pairwise reductions on a tree that doesn't
+ *   grow the run time as ${\cal O}(P)$ but as ${\cal O}(\log_2 P)$,
+ *   at the expense of more messages sent around. Be that as it may,
+ *   the fundamental point is that as you add more processors, the
+ *   run time will grow with $P$ regardless of the way the operation
+ *   is actually implemented, and it can therefore not scale.)
+ *
+ * These, and other reasons that prevent programs from scaling perfectly can
+ * be summarized in <a href="https://en.wikipedia.org/wiki/Amdahl%27s_law">
+ * <i>Amdahl's law</i></a> that states that if a fraction $\alpha$
+ * of a program's overall work $W$ can be parallelized, i.e., it can be
+ * run in ${\cal O}(\alpha W/P)$ time, and a fraction $1-\alpha$ of the
+ * program's work can not be parallelized (i.e., it consists either of
+ * work that only one process can do, such as generating graphical output
+ * in step-17; or that every process has to execute in a replicated way,
+ * such as sending a message with a local contribution to a dedicated
+ * process for accumulation), then the overall run time of the program
+ * will be
+ * @f{align*}
+ *   T = {\cal O}\left(\alpha \frac WP + (1-\alpha)W \right).
+ * @f}
+ * Consequently, the "speedup" you get, i.e., the factor by which your
+ * programs run faster on $P$ processors compared to running the program
+ * on a single process (assuming this is possible) would be
+ * @f{align*}
+ *   S = \frac{W}{\alpha \frac WP + (1-\alpha)W}
+ *     = \frac{P}{\alpha + (1-\alpha)P}.
+ * @f}
+ * If $\alpha<1$, which it is for all practically existing programs,
+ * then $S\rightarrow \frac{1}{1-\alpha}$ as $P\rightarrow \infty$, implying
+ * that there is a point where it does not pay off in any significant way
+ * any more to throw more processors at the problem.
+ *
+ * In practice, what matters is <i>up to which problem size</i> or
+ * <i>up to which number of processes</i> or <i>down to which size
+ * of local problems ${\cal}(N/P)$</i> a program scales. For deal.II,
+ * experience shows that on most clusters with a reasonable fast
+ * network, one can solve problems up to a few billion unknowns,
+ * up to a few thousand processors, and down to somewhere between
+ * 40,000 and 100,000 unknowns per process. The last number is the
+ * most relevant: if you have a problem with, say, $10^8$ unknowns,
+ * then it makes sense to solve it on 1000-2500 processors since the
+ * number of degrees of freedom each process handles remains at more
+ * than 40,000. Consequently, there is enough work every process
+ * has to do so that the ${\cal O}(1)$ time for communication does
+ * not dominate. But it doesn't make sense to solve such a problem with
+ * 10,000 or 100,000 processors, since each of these processor's local
+ * problem becomes so small that they spend most of their time waiting
+ * for communication, rather than doing work on their part of the work.
+ * </dd>
+ *
+ * <dt class="glossary">@anchor GlossPeriodicConstraints <b>Periodic boundary
+ * conditions</b></dt>
+ * <dd>Periodic boundary condition are often used when only part of the physical
+ * relevant domain is modeled. One assumes that the solution simply continues
+ * periodically with respect to the boundaries that are considered periodic.
+ * In deal.II, support for this is through DoFTools::make_periodicity_constraints()
+ * and GridTools::collect_periodic_faces(). As soon as a
+ * parallel::distributed::Triangulation is used also
+ * parallel::distributed::Triangulation::add_periodicity() has to be called to make
+ * sure that all the processes know about relevant parts of the triangulation on both
+ * sides of the periodic boundary. A typical process for distributed triangulations would be:
+ * -# Create a mesh
+ * -# Gather the periodic faces using GridTools::collect_periodic_faces() (Triangulation)
+ * -# Add the periodicity information to the mesh
+ * using parallel::distributed::Triangulation::add_periodicity()
+ * -# Gather the periodic faces using GridTools::collect_periodic_faces() (DoFHandler)
+ * -# Add periodicity constraints using DoFTools::make_periodicity_constraints()
+ * 
+ * An example for this can be found in step-45. 
+ * 
+ *
+ * <dt class="glossary">@anchor GlossPrimitive <b>Primitive finite
+ * elements</b></dt>
+ * <dd>A finite element (described by its shape functions) is primitive if
+ * there is a unique relation from shape function number to vector @ref
+ * GlossComponent "component". What this means is that each shape function of
+ * a vector-valued element has exactly one nonzero component if an element is
+ * primitive. This includes, in particular, all scalar elements as well as
+ * vector-valued elements assembled via the FESystem class from other
+ * primitive (for example scalar) elements as shown in step-8,
+ * step-29, step-22 and several others. On the other hand,
+ * the FE_RaviartThomas class used in step-20 and step-21, or the FE_Nedelec
+ * class provide non-primitive finite elements because there, each
+ * vector-value shape function may have several non-zero components.</dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossReferenceCell <b>Reference cell</b></dt>
+ * <dd>The hypercube [0,1]<sup>dim</sup>, on which all parametric finite
+ * element shape functions are defined. Many properties of the reference
+ * cell are described by the GeometryInfo class.</dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossSerialization <b>Serialization</b></dt>
+
+ * <dd>The term "serialization" refers to the process of writing the state of
+ * an object to a stream and later retrieve it again. A typical use case is to
+ * save the state of a program to disk for possible later resurrection, often
+ * in the context of checkpoint/restart strategies for long running
+ * computations or on computers that aren't very reliable (e.g. on very large
+ * clusters where individual nodes occasionally fail and then bring down an
+ * entire MPI job). In either case, one wants to occasionally save the state
+ * of the program so that, upon failure, one can restart it at that point
+ * rather than having to run it again from the beginning.
+ *
+ * deal.II implements serialization facilities by implementing the necessary
+ * interfaces for the <a
+ * href="http://www.boost.org/doc/libs/1_46_1/libs/serialization/doc/index.html"
+ * target="_top">BOOST serialization</a> library. See there for examples on
+ * how to save and restore objects. </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossShape <b>Shape functions</b></dt>
+ * <dd>The restriction of the finite element basis functions to a single
+ * grid cell.</dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossSubdomainId <b>Subdomain id</b></dt>
+ * <dd>Each cell of a triangulation has associated with it a property called
+ * the "subdomain id" that can be queried using a call like
+ * <code>cell-@>subdomain_id()</code> and that can be set for example by using
+ * <code>cell-@>set_subdomain_id(13)</code>. (These calls resolve to
+ * CellAccessor::subdomain_id() and CellAccessor::set_subdomain_id(), respectively.)
+ * While in principle this property
+ * can be used in any way application programs deem useful (it is simply an
+ * integer associated with each cell that can indicate whatever you want), at
+ * least for programs that run in %parallel it usually denotes the
+ * @ref GlossMPIRank "MPI rank" of the processor that "owns" this cell.
+ *
+ * For programs that are parallelized based on MPI but where each processor
+ * stores the entire triangulation (as in, for example, step-17 and step-18,
+ * but not in step-40), subdomain ids are assigned to cells by
+ * partitioning a mesh, and each MPI process then only works on those cells it
+ * "owns", i.e., that belong to a subdomain the processor owns
+ * (traditionally, this is the case for the subdomain id whose numerical value
+ * coincides with the rank of the MPI process within the MPI
+ * communicator). Partitioning is typically done using the
+ * GridTools::partition() function, but any other method can also be used to
+ * do this. (Alternatively, the parallel::shared::Triangulation class can
+ * partition the mesh automatically using a similar approach.)
+ *
+ * On the other hand, for programs that are parallelized using MPI but
+ * where meshes are held distributed across several processors using
+ * the parallel::distributed::Triangulation class, the subdomain id of
+ * cells is tied to the processor that owns the cell. In other words,
+ * querying the subdomain id of a cell tells you if the cell is owned
+ * by the current processor (i.e. if <code>cell-@>subdomain_id() ==
+ * triangulation.parallel::distributed::Triangulation::locally_owned_subdomain()</code>)
+ * or by another processor. In the %parallel distributed case,
+ * subdomain ids are only assigned to cells that the current processor
+ * owns as well as the immediately adjacent @ref GlossGhostCell "ghost cells".
+ * Cells further away are held on each processor to ensure
+ * that every MPI process has access to the full coarse grid as well
+ * as to ensure the invariant that neighboring cells differ by at most
+ * one refinement level. These cells are called "artificial" (see
+ * @ref GlossArtificialCell "here") and have the special subdomain id value
+ * types::artificial_subdomain_id.
+ *
+ * In addition to regular subdomain ids, there is a second, closely related set
+ * of flags that are associated with each cell: "level subdomain ids."
+ * These exist not only for active cells but, in fact, for every cell in
+ * a mesh hierarchy. Their meaning is entirely analogous to the regular
+ * subdomain ids, but they are read and written by the
+ * CellAccessor::level_subdomain_id() and CellAccessor::set_level_subdomain_id()
+ * functions.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossSupport <b>Support points</b></dt> <dd>Support points are
+ * by definition those points $p_i$, such that for the shape functions
+ * $v_j$ holds $v_j(p_i) = \delta_{ij}$. Therefore, a finite element
+ * interpolation can be defined uniquely by the values in the support
+ * points.
+ *
+ * Lagrangian elements fill the vector accessed by
+ * FiniteElementBase::get_unit_support_points(), such that the
+ * function FiniteElementBase::has_support_points() returns
+ * <tt>true</tt>. Naturally, these support points are on the
+ * @ref GlossReferenceCell "reference cell".  Then, FEValues can be used
+ * (in conjunction with a Mapping) to access support points on the
+ * actual grid cells.
+ *
+ * @note The concept of @ref GlossSupport "support points" is
+ * restricted to the finite element families based on Lagrange
+ * interpolation. For a more general concept, see
+ * @ref GlossGeneralizedSupport "generalized support points".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossTargetComponent <b>Target component</b></dt> <dd>When
+ * vectors and matrices are grouped into blocks by component, it is
+ * often desirable to collect several of the original components into
+ * a single one. This could be for instance, grouping the velocities
+ * of a Stokes system into a single block.</dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossUnitCell <b>Unit cell</b></dt>
+ * <dd>See @ref GlossReferenceCell "Reference cell".</dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossUnitSupport <b>Unit support points</b></dt>
+ * <dd>These are the @ref GlossSupport "support points" on the reference cell, defined in
+ * FiniteElementBase. For example, the usual Q1 element in 1d has support
+ * points  at <tt>x=0</tt> and <tt>x=1</tt> (and similarly, in higher
+ * dimensions at the vertices of the unit square or cube). On the other
+ * hand, higher order Lagrangian elements have unit support points also
+ * in the interior of the unit line, square, or cube.
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossUserFlags <b>User flags</b></dt>
+ * <dd>
+ *   A triangulation offers one bit per line, quad, etc for user flags.
+ *   This field can be
+ *   accessed as all other data using iterators, using the syntax
+ *   @code
+ *      cell->set_user_flag();                // set the user flag of a cell
+ *      if (cell->user_flag_set() == false)   // if cell hasn't been flagged yet
+ *        {
+ *           cell->face(0)->set_user_flag();  // flag its first face
+ *        }
+ *   @endcode
+ *   Typically, this user flag is
+ *   used if an algorithm walks over all cells and needs information whether
+ *   another cell, e.g. a neighbor, has already been processed. Similarly,
+ *   it can be used to flag faces, quads or lines at the boundary for which
+ *   some operation has already been performed. The latter is often useful
+ *   since a loop such as
+ *   @code
+ *      // in 3d
+ *      for (cell=dof_handler.begin_active();
+ *           cell!=dof_handler.end(); ++cell)
+ *        for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+ *          if (cell->line(l)->at_boundary())
+ *            {
+ *               do something with this line
+ *            }
+ *   @endcode
+ *   encounters some boundary lines more than once. Consequently, one would
+ *   set the user flag of the line in the body of the loop, and only enter the
+ *   body if the user flag had not previously been set. There are a number of
+ *   additional functions that can be accessed through the iterator interface;
+ *   see the TriaAccessor class for more information. Note that there are no
+ *   user flags that can be associated with vertices; however, since vertices
+ *   are numbered consecutively, this can easily be emulated in user code
+ *   using a vector of bools.
+ *
+ *   There are two functions, Triangulation::save_user_flags and
+ *   Triangulation::load_user_flags which
+ *   write and read these flags to and from a stream or a vector of bools. Unlike
+ *   Triangulation::save_refine_flags and Triangulation::load_refine_flags,
+ *   these two functions store
+ *   and read the flags of all used lines, quads, etc, i.e., not only of the
+ *   active ones.
+ *
+ *   If you want to store more specific user flags, you can use the functions
+ *   Triangulation::save_user_flags_line and Triangulation::load_user_flags_line
+ *   and the similarly for quads, etc.
+ *
+ *   As for the refinement and coarsening flags, there exist two versions of these
+ *   functions, one which reads/writes from a stream and one which does so from
+ *   a <tt>vector@<bool@></tt>. The latter is used to store flags temporarily, while the
+ *   first is used to store them in a file.
+ *
+ *   It is good practice to clear the user flags using the
+ *   Triangulation::clear_user_flags() function before usage, since it is
+ *   often necessary to use the flags in more than one function. If the flags may
+ *   be in use at the time a function that needs them is called, then this function
+ *   should save and restore the flags as described above.
+ *
+ *   @note If more information than just a single boolean flag needs to be stored
+ *   with a cell, line, or face, then see about @ref GlossUserData "user data".
+ * </dd>
+ *
+ *
+ * <dt class="glossary">@anchor GlossUserData <b>User pointers and user indices</b></dt>
+ * <dd>
+ *   Just like the @ref GlossUserFlags "user flags", the Triangulation class offers a
+ *   field for each line, quad and hex in which to store more descriptive data than just
+ *   a single boolean flag. This is called "user data" and the data that can be stored
+ *   in it is either a single unsigned integer or a void pointer. Both are typically
+ *   used to index into a bigger array that contains more detailed data an application
+ *   wants to attach to a mesh entity.
+ *
+ *   User data is stored and retrieved in the following manner:
+ *   @code
+ *      for (cell=dof_handler.begin_active();
+ *           cell!=dof_handler.end(); ++cell)
+ *        for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+ *          if (cell->line(l)->at_boundary())
+ *            {
+ *              cell->line(l)->set_user_index(42);
+ *            }
+ *   @endcode
+ *   Similarly, there are functions TriaAccessor::set_user_pointer to set a pointer, and
+ *   TriaAccessor::user_index and TriaAccessor::user_pointer to retrieve the index
+ *   and pointer. To clear all user indices or pointers, use Triangulation::clear_user_data().
+ *   As with flags, there are functions that allow to save and restore user data,
+ *   either for all entities of the mesh hierarchy or for lines, quads or hexes
+ *   separately. There are a number of additional functions that can be accessed
+ *   through the iterator interface; see the TriaAccessor class for more information.
+ *
+ *   @note User pointers and user indices are stored in the same
+ *   place. In order to avoid unwanted conversions, Triangulation
+ *   checks which one of them is in use and does not allow access to
+ *   the other one, until Triangulation::clear_user_data() has been called.
+ *
+ *   @note The usual warning about the missing type safety of @p void pointers are
+ *   obviously in place here; responsibility for correctness of types etc
+ *   lies entirely with the user of the pointer.
+ *
+ *
+ * <dt class="glossary">@anchor workstream_paper <b>%WorkStream paper</b></dt>
+ * <dd>The "WorkStream paper" is a paper by B. Turcksin, M. Kronbichler and W. Bangerth
+ *   that discusses the design and implementation of WorkStream. WorkStream is, at its
+ *   core, a design pattern, i.e., something that is used over and over in finite element
+ *   codes and that can, consequently, be implemented generically. In particular, the
+ *   paper lays out the motivation for this pattern and then proposes different ways
+ *   of implementing it. It also compares the performance of different implementations.
+ *
+ * The full reference for this paper is as follows:
+ * @code{.bib}
+ at Article{TKB16,
+  author =       {Bruno Turcksin and Martin Kronbichler and Wolfgang Bangerth},
+  title =        {\textit{WorkStream} -- a design pattern for multicore-enabled finite element computations},
+  journal =      {accepted for publication in the ACM Trans. Math. Softw.},
+  year =         2015
+}
+ * @endcode
+ * It is available from <a href="http://www.math.tamu.edu/~bangerth/publications.html">http://www.math.tamu.edu/~bangerth/publications.html</a>, also see <a href="https://www.dealii.org/publications.html#details">deal.II publications</a> for details.
+ * </dd>
+ *
+ * </dl>
+ */
diff --git a/doc/doxygen/headers/grid.h b/doc/doxygen/headers/grid.h
new file mode 100644
index 0000000..f84e413
--- /dev/null
+++ b/doc/doxygen/headers/grid.h
@@ -0,0 +1,117 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup grid Grid classes
+ *
+ * This module groups classes that have to do with the topology and
+ * geometry of meshes. A mesh can be thought of a collection of cells;
+ * if the mesh has been refined (possibly in an adaptive way), then
+ * this collection is grouped into a hierarchy of refinement
+ * levels. In addition to cells, the geometric objects that make up a
+ * triangulation are the faces of cells (and in 3d the edges of cells)
+ * as well as the vertices of the cells. Note that we abuse the word
+ * <i>triangulation</i> somewhat, since deal.II only implements
+ * triangulations made up of linear, quadrilateral, and hexahedral
+ * cells; triangles and tetrahedra are not supported.
+ *
+ * This collection of cells is managed by the Triangulation class. It holds
+ * the relevant data in memory and offers interfaces to query it. Most things
+ * you want to do on cells are performed in loops over all cells. For this
+ * purpose, the Triangulation class offers the concept of iterators (see @ref
+ * Iterators): although implemented differently, they behave like pointers to
+ * cells or faces and can be queried for the geometric properties of cells as
+ * well as information like neighboring cells or faces of a cell.
+ *
+ * It is worth noting that the Triangulation class only stores geometry
+ * (i.e. the location of vertices and cells) and topology of a mesh
+ * (i.e. which cells are neighbors of which other cells, etc). It has nothing
+ * to do with finite elements or degrees of freedom that might be defined on a
+ * mesh. These functions are performed by the DoFHandler class (see the @ref
+ * dofs module) that gets a description of the finite element space and the
+ * allocates and manages degrees of freedom on vertices, faces, or cells, as
+ * described by the finite element class. This separation makes it possible to
+ * have multiple DoFHandler classes work on the same mesh at the same time.
+ * 
+ * 
+ * <h3>Grid generation</h3>
+ *
+ * There are three ways to create a mesh:
+ * <ul>
+ * <li> Creation by the GridGenerator class;
+ * <li> Reading from a file;
+ * <li> Creation by hand.
+ * </ul>
+ *
+ * For the first case, the GridGenerator class provides functions that can
+ * generate the simplest and most common geometries automatically. For
+ * example, a rectangular (or brick) geometry as well as circles, spheres, or
+ * cylinders can be generate with the functions in this class. Most of the
+ * tutorial programs use this mechanism.
+ *
+ * Secondly, it is possible to read in meshes from an input file in a number
+ * of different formats using the GridIn class. Using this class, it is
+ * possible to read meshes with several 10 or 100 thousand cells, although
+ * this is not really recommended: the power of adaptive finite element
+ * methods only comes to bear if the initial mesh is as coarse as possible and
+ * there is room for a number of adaptive refinement steps. If the initial
+ * mesh is too fine already, then one runs out of memory or compute time
+ * before adaptive mesh refinement is able to do much good. Nevertheless, the
+ * GridIn class can be used in cases of complicated geometries or for
+ * comparison or interaction with other programs that compute on meshes that
+ * are then exchanged through this class The step-5 tutorial program shows how
+ * to use the GridIn class.
+ *
+ * The third way is to create a mesh by hand, by building a data structure
+ * that describes the vertices and cells of a triangulation. This is useful in
+ * cases of moderate complexity where a mesh can still be built by hand
+ * without resorting to a mesh generator, but where the domain is not one of
+ * those already supported by the GridIn class. In this method, the data
+ * structure so built is handed to the create_triangulation() function of the
+ * Triangulation class. The step-14 tutorial program shows how this can be
+ * done.
+ *
+ *
+ * <h3>Grid output</h3>
+ *
+ * Meshes can be written to output files in a number of different formats. If
+ * this involves simulation results obtained on this mesh, then this is done
+ * using the DataOut class (described in more detail in the @ref output
+ * module). On the other hand, if only the geometry and topology of the mesh
+ * is to be written to a file, the GridOut class can do this for you.
+ *
+ *
+ * <h3>Tool classes</h3>
+ *
+ * The GridTool class offers an assortment of functions that act on grids. For
+ * example, this includes moving around nodes, stretching or rotating entire
+ * triangulations, computing the diameter of a domain, or subdividing it into
+ * chunks of roughly equal size for parallel computations.
+ *
+ * The GridRefinement class implements a number of mesh refinement algorithms,
+ * based on refinement indicators given to its member functions.
+ *
+ * 
+ * <h3>Internal classes</h3>
+ *
+ * In addition to the above, there are a significant number of classes in this
+ * module that are only used in the internal data structures of mesh
+ * handling. They are generally in the internal namespace, and not meant for
+ * use in application code.
+ *
+ * 
+ * @author Wolfgang Bangerth, 1998-2006
+ */
diff --git a/doc/doxygen/headers/hp.h b/doc/doxygen/headers/hp.h
new file mode 100644
index 0000000..f62e36b
--- /dev/null
+++ b/doc/doxygen/headers/hp.h
@@ -0,0 +1,104 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup hp hp finite element support
+ *
+ * Classes and functions that have to do with hp finite elements. The step-27
+ * tutorial program gives an overview of how to use the classes in this
+ * namespace. A slightly more exotic application is given in step-46.
+ *
+ * The hp namespace implements the algorithms and data structures used for
+ * the hp framework in deal.II. An overview over the details of how these
+ * algorithms work and what data structures are used is given in the
+ * @ref hp_paper "hp paper".
+ */
+
+/**
+ * @defgroup hpcollection hp Collections
+ *
+ * In the implementation of the hp finite element method, each cell might have
+ * a different finite element associated with it. To handle this, the
+ * hp::DoFHandler must have a whole set of finite element classes associated
+ * with it. This concept is represented by the hp::FECollection class: Objects
+ * of this type act as containers that hold a whole set of finite element
+ * objects. Instead of storing pointers to finite element objects on each
+ * cell, we then only store an index for each cell that identifies the finite
+ * element object within the collection that should be used by this cell. The
+ * DoFHandler object associated with the given cell can then assign degrees of
+ * freedom to each cell in accordance with the finite element used for it.
+ *
+ * A similar situation arises when integrating terms on a cell: one may want
+ * to use different quadrature formulas for different finite elements. For
+ * example, on cells where we use a Q1 element, a QGauss(2) object (i.e. a
+ * quadrature formula with two points in each space direction) may be
+ * sufficient, but on another cell where a Q3 element is used, this would lead
+ * to underintegration and we should use a QGauss(4) formula instead. Just as
+ * above, there exists a class hp::QCollection that acts as a collection of
+ * quadrature formulas.
+ *
+ * Finally, one may want to use different orders for the boundary
+ * approximation for cells with different orders for the finite element. The
+ * hp::MappingCollection class allows to do this.
+ *
+ * All of these three classes, the hp::FECollection, hp::QCollection,
+ * and hp::MappingCollection classes, implement an interface very
+ * similar to that of <code>std::vector</code>. They have functions
+ * <code>push_back()</code> to add a finite element, quadrature
+ * formula, or mapping to the collection. They have an
+ * <code>operator[] (unsigned int)</code> function that allows to
+ * retrieve a reference to a given element of the collection. And they
+ * have a <code>size()</code> function that returns the number of
+ * elements in the collection. Some of the classes, in particular that
+ * holding finite element objects, also implement other functions
+ * specific to their purpose.
+ *
+ * The similarity goes beyond the interface: When adding an element to the
+ * collection, all of the classes create a copy of the argument. This allows
+ * to pass a temporary object to the function adding the element. For example,
+ * the following works:
+ * @verbatim
+ *   FECollection<dim> fe_collection;
+ *   for (unsigned int degree=1; degree<5; ++degree)
+ *     fe_collection.push_back (FE_Q<dim>(degree));
+ * @endverbatim
+ * 
+ * This way, one can add elements of polynomial degree 1 through 4 to the
+ * collection. It is not necessary to retain the added object: the collection
+ * makes a copy of it, it does not only store a pointer to the given finite
+ * element object. This same observation also holds for the other collection
+ * classes.
+ *
+ * It is customary that within an hp finite element program, one keeps
+ * collections of finite elements and quadrature formulas with the same number
+ * of elements, each element of the one collection matching the element in the
+ * other. This is not necessary, but it often makes coding a lot simpler. If a
+ * collection of mappings is used, the same holds for hp::MappingCollection
+ * objects as well.
+ *
+ * @ingroup hp
+ */
+
+
+/**
+ * A namespace for the implementation of hp finite element specific algorithms
+ * and data structures.
+ *
+ * @ingroup hp
+ */
+namespace hp
+{
+}
diff --git a/doc/doxygen/headers/instantiations.h b/doc/doxygen/headers/instantiations.h
new file mode 100644
index 0000000..f595e61
--- /dev/null
+++ b/doc/doxygen/headers/instantiations.h
@@ -0,0 +1,127 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @page Instantiations Template instantiations
+ *
+ * Instantiation of complex class and function templates is expensive both in
+ * terms of compile time and disk space. Therefore, we try to separate
+ * declaration and implementation of templates as far as possible, and make
+ * sure that implementations are read by the compiler only when 
+ * necessary.
+ *
+ * Template classes in <tt>deal.II</tt> can be grouped into three categories,
+ * depending on the number of probable different instantiations. These three
+ * groups are discussed in the following.
+ * 
+ *
+ * @section Inst1 Known and fixed number of instantiations
+ *
+ * These are the classes having template parameters with very
+ * predictable values. The typical prototype is
+ * @code
+ * template <int dim> class Function;
+ * @endcode
+ *
+ * Here, we have a small number of instantiations (<code>dim = 1,2,3</code>)
+ * known at the time of design of the library. Therefore, member functions of
+ * this class are defined in a <tt>.cc</tt> file in the source directory and
+ * we instantiate the template for these known values explicitly in the source
+ * file.
+ *
+ * From an application viewpoint, all you actually get to see then is the
+ * declaration of the template. Actual instantiations of member functions
+ * happens inside the library and is done when you compile the library, not
+ * when you compile your application code.
+ *
+ * For these classes, adding instantiations for new parameters involves
+ * changing the library. However, this is rarely needed, of course, unless you
+ * are not content with computing only in 1d, 2d, or 3d.
+ * 
+ *
+ * @subsection Inst1a Available instances
+ *
+ * If the template parameter is <tt>dim</tt>, the available instances
+ * are for <tt>dim=1,2,3</tt>, if there is no other information.
+ *
+ * There are other cases of classes (not depending on the spatial
+ * dimension) for which only a certain, small number of template
+ * arguments is supported and explicit instantiations are provided in
+ * the library. In particular, this includes all the linear algebra
+ * classes that are templatized on the type of the scalar underlying
+ * stored values: we only support <code>double</code>,
+ * <code>float</code>, and in some cases <code>long double</code>,
+ * <code>std::complex@<double@></code>,
+ * <code>std::complex@<float@></code>, and <code>std::complex@<long
+ * double@></code>.
+ * 
+ *
+ * @section Inst2 A few instantiations, most of which are known
+ *
+ * These are class templates usually having a small number of instantiations,
+ * but additional instantiations may be necessary. Therefore, a set of
+ * instantiations for the most likely parameters is provided precompiled in
+ * the libraries, but the implementation of the templates are provided in a
+ * special header file so that it is accessible in case someone wants to
+ * instantiate it for an unforeseen argument.
+ *
+ * Typical examples for this would be some of the linear algebra classes that
+ * take a vector type as template argument. They would be instantiated within
+ * the library for <code>Vector<double></code>,
+ * <code>Vector<float></code>, <code>BlockVector<double></code>,
+ * and <code>BlockVector<float></code>, for example. However, they may
+ * also be used with other vector types as long as they satisfy certain
+ * interfaces, including vector types that are not part of the library but
+ * possibly defined in an application program. In such a case, applications
+ * can instantiate these templates by hand as described in the next section.
+ * 
+ *
+ * @subsection Inst2c Creating new instances
+ *
+ * Choose one of your source files to provide the required
+ * instantiations. Say that you want the class template <tt>XXXX</tt>,
+ * defined in the header file <tt>xxxx.h</tt>, instantiated with the
+ * template parameter <tt>Lager</tt>. Then, your file should contain
+ * the lines
+ * @code
+ *                   // Include class template declaration
+ * #include <xxxx.h>
+ *                   // Include member definitions
+ * #include <xxxx.templates.h>
+ *
+ * ...
+ *
+ * template class XXXX<Lager>;
+ * @endcode
+ *
+ * 
+ * @subsection Inst2p Provided instances
+ *
+ * Like with the classes in section @ref Inst1, the instances provided in the
+ * library are often listed in the documentation of that class in a form
+ * similar to this:
+ @verbatim
+ Template Instantiations: some  (<p1>a,b,c<p2>)
+ @endverbatim
+ *
+ *
+ * @section Inst3 Many unknown instantiations
+ *
+ * These are the classes, where no reasonable predetermined set of instances
+ * exists. Therefore, all member definitions are included in the header file
+ * and are instantiated wherever needed.  An example would be the SmartPointer
+ * class template that can be used with virtually any template argument.
+ */
diff --git a/doc/doxygen/headers/integrators.h b/doc/doxygen/headers/integrators.h
new file mode 100644
index 0000000..6836bf6
--- /dev/null
+++ b/doc/doxygen/headers/integrators.h
@@ -0,0 +1,60 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Integrators Integrators
+ *
+ * A collection of namespaces and functions which simplify the coding
+ * of forms and bilinear forms on finite element spaces. Functions for
+ * two different purposes are gathered here: the abstract integration
+ * on finite element meshes in MeshWorker and actual implementations
+ * of the integration of cell and face terms for concrete problems in
+ * LocalIntegrators.
+ *
+ * @note Documentation on coding conventions, relations between
+ * classes, and details of the implementation is found in the
+ * documentation of namespaces in this module.
+ *
+ * <h3>Integration on finite element meshes</h3>
+ *
+ * When we integrate a function or a functional on a finite element
+ * space, the structure of the integration loop is always the same. We
+ * have between 3 and 5 nested loops, from outside to inside:
+ * <ol>
+ * <li> Loop over all cells
+ * <li> Optionally, loop over all faces to compute fluxes
+ * <li> Loop over all quadrature points of the cell/face
+ * <li> Optionally, loop over all test functions to compute forms
+ * <li> Optionally, loop over all trial functions to compute bilinear
+ * forms
+ * </ol>
+ *
+ * These loops naturally fall into two classes, namely the computation
+ * of cell and face contributions (loops 3 to 5), and the outer loops
+ * over the mesh objects, often referred to as <em>assembling</em>.
+ *
+ * Support for the outer loop in deal.II can be found in the namespace
+ * MeshWorker (see the documentation there). In order to support the
+ * cell and face contributions (referred to as local contributions
+ * from now on), deal.II offers FEValuesBase and its derived
+ * classes. While the outer loop is generic (with exception of the
+ * data types), the computation of local contributions is problem
+ * dependent. Therefore, no generic algorithm is possible
+ * here. Nevertheless, we can define a generic interface for functions
+ * for this purpose and provide a library of local integrators for use
+ * in applications. These are collected in the namespace
+ * LocalIntegrators
+ */
diff --git a/doc/doxygen/headers/io.h b/doc/doxygen/headers/io.h
new file mode 100644
index 0000000..349bcf2
--- /dev/null
+++ b/doc/doxygen/headers/io.h
@@ -0,0 +1,134 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup IO Input/Output
+ *
+ * This module collects the classes used for reading and writing meshes and
+ * data. There are two sub-modules for each of these operations.
+ */
+
+/**
+ * @defgroup input Input
+ *
+ * deal.II can read meshes in a number of different formats. However, all of
+ * them are constrained to so-called "coarse meshes", i.e. meshes that have no
+ * refinement hierarchy and in particular no hanging nodes. The GridIn class
+ * describes in detail what formats are supported.
+ *
+ * In addition, deal.II can read an intermediate graphics format using the
+ * DataOutReader. This format is used as an intermediate step between data
+ * associated with a simulation and is written by the DataOutBase class (or
+ * through the more derived classes described in the \ref output module). The
+ * DataOutReader class reads this data back in, and it can then be converted
+ * to any of a number of data formats supported by visualization programs.
+ *
+ * Finally, the ParameterHandler and MultipleParameterLoop classes (and the
+ * associated Patterns namespace) are used to deal with parameter files
+ * describing run-time parameters to a program that one doesn't want to
+ * hard-code within the program source.
+ *
+ *
+ * <h3>The PathSearch class</h3>
+ *
+ * The PathSearch class is a helper class in input handling. It is
+ * used to find a file in a list of directories, in much the same way
+ * as unix systems find executables among the directories listed in
+ * the <code>PATH</code> environment variable.
+ *
+ * @ingroup IO
+ */
+
+/**
+ * @defgroup output Graphical output
+ *
+ * deal.II generates three types of output: it can write triangulations/meshes
+ * in formats understood by several mesh readers (including those of deal.II
+ * itself), and it can create output used for visualization of data. Finally,
+ * it can output matrices in a graphical format.
+ *
+ * 
+ * <h3>Visualization of data</h3>
+ * 
+ * deal.II supports, through the DataOutBase class, a large number of popular
+ * visualization formats, such as those used by the OpenDX, gmv, or gnuplot
+ * programs. A complete list of supported formats is listed in the
+ * documentation of the DataOutBase class.
+ *
+ * The DataOutBase class is only responsible for actually writing some
+ * intermediate format in a number of different visualization formats. This
+ * intermediate format is generated by classes derived, directly or
+ * indirectly, from DataOutBase. For example, the DataOut class is most often
+ * used to generate this intermediate format from a triangulation, a
+ * DoFHandler object (that associates a particular finite element class with
+ * the triangulation), and one or more data vectors. The DataOut class creates
+ * intermediate data from each cell, which is subsequently written by the
+ * DataOutBase class in some final format. Almost all example programs,
+ * starting with step-3, make use of this method of generating output.
+ *
+ * The DataOutFaces class is another way to create intermediate format from
+ * simulation data. However, instead of creating visualization data from each
+ * cell of the triangulation, it only creates information for all faces of
+ * cells that are located on the surface (though the class has a way to
+ * override the choice for which faces output should be generated). While this
+ * may not be particularly interesting in 2d (the faces would only be line
+ * segments) it is often helpful in 3d if what one really wants to know is the
+ * shape of the domain or the value of one variable on the surface. Using the
+ * DataOutFaces class then saves the effort of generating and storing data for
+ * all interior cells, which can be very expensive for large 3d simulations.
+ *
+ * A third class, the DataOutRotation class, allows to take a two-dimensional
+ * simulation and generate three-dimensional data from it by rotating the
+ * two-dimensional domain around a given axis. This is mostly useful for the
+ * visualization of simulations that use the rotational symmetry of, for
+ * example, a cylinder.
+ *
+ * Finally, the DataOutStack class allows to visualize data from time
+ * dependent simulations in the space-time domain: it collects the results
+ * from each time step and at the end outputs all of this information at once
+ * as a space-time file.
+ * 
+ * 
+ * <h3>Grid output</h3>
+ *
+ * Meshes, without any data vectors associated with it, can be written in a
+ * number of formats as well. This is done through the GridOut class, and the
+ * documentation of that class lists the supported formats.
+ *
+ * Several of the tutorial programs, notably step-1, step-6, step-9, step-10,
+ * step-12, and step-14 demonstrate the use of the GridOut class.
+ *
+ *
+ * <h3>Matrix output</h3>
+ *
+ * Through the MatrixOut class, deal.II can also give a graphical
+ * visualization of matrices, in the form of color or skyline plots. The
+ * MatrixOut class uses the DataOutBase for output. Therefore, matrices can be
+ * visualized in all formats supported by the latter class.
+ *
+ * @ingroup IO
+ */
+
+/**
+ * @defgroup textoutput Textual output
+ *
+ * In addition to classes that provide graphical output formats (see the @ref
+ * output module), deal.II has a number of classes that facilitate textual
+ * output in a number of ways. They are collected in this module. See the
+ * documentation of these classes for more details.
+ *
+ * @ingroup IO
+ */
diff --git a/doc/doxygen/headers/iterators.h b/doc/doxygen/headers/iterators.h
new file mode 100644
index 0000000..16d405f
--- /dev/null
+++ b/doc/doxygen/headers/iterators.h
@@ -0,0 +1,446 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ @defgroup Iterators Iterators on mesh-like containers
+ @{
+
+deal.II has several classes which are understood conceptually as
+meshes. Apart from the obvious Triangulation, these are, for example,
+DoFHandler and hp::DoFHandler. All of those define a set
+of iterators, allowing the user to traverse the whole mesh, i.e. the
+set of cells, faces, edges, etc that comprise the mesh, or portions of
+it. These iterators are all in a sense derived from the TriaIterator
+class.
+
+Basically, the template signature of TriaIterator is
+ at code
+  TriaIterator<Accessor>
+ at endcode
+
+Conceptually, this type represents something like a pointer to an object
+represented by the <code>Accessor</code> class.  Usually, you will not use the
+actual class names spelled out directly, but employ one of the typedefs
+provided by the mesh classes, such as <code>typename
+Triangulation::cell_iterator</code>. Before going into this, let us
+first discuss the concept of iterators, before delving into what the accessors
+do.
+
+As usual in C++, iterators, just as pointers, are incremented to the next
+element using <tt>operator ++</tt>, and decremented to the previous element
+using <tt>operator --</tt>. One can also jump <tt>n</tt> elements ahead using
+the addition operator, <tt>it=it+n</tt>, and correspondingly to move a number
+of elements back. In addition, and keeping with the tradition of the standard
+template library, meshes provide member functions <tt>begin()</tt> and
+<tt>end()</tt> that provide the first element of a collection and a
+one-past-the-end iterator, respectively. Since there are a number of different
+iterators available, there is actually a whole family of such functions, such
+as <tt>begin_active()</tt>, <tt>begin_face()</tt>, etc.
+
+In terms of the concepts for iterators defined in the C++ standard, the
+deal.II mesh iterators are bi-directional iterators: they can be incremented
+and decremented, but an operation like <tt>it=it+n</tt> takes a compute time
+proportional to <tt>n</tt>, since it is implemented as a sequence of
+<tt>n</tt> individual unit increments. Note that this is in contrast to the
+next more specialized iterator concept, random access iterators, for which
+access to an arbitrary object requires only constant time, rather than linear.
+
+
+ at section IteratorsAndSets Iterators as pointers into sets of objects
+
+As mentioned above, iterators in deal.II can be considered as iterating over
+all the objects that constitute a mesh. (These objects are lines, quads, and
+hexes, and are represented by the type of Accessor class given as template argument to the iterator.) This suggests to view a triangulation as a
+collection of cells and other objects that are held together by a certain data
+structure that links all these objects, in the same was as a linked list is
+the data structure that connects objects in a linear fashion.
+
+Triangulations in deal.II can indeed be considered in this way. In particular,
+they use the computational notion of a forest of regular trees to store their
+data. This can be understood as follows: Consider the cells of the coarse mesh
+as roots; then, if one of these coarse mesh cells is refined, it will have
+2<sup>dim</sup> children, which in turn can, but do not have to have
+2<sup>dim</sup> children of their own, and so on. This means, that each cell
+of the coarse mesh can be considered the root of a binary tree (in 1d), a
+quadtree (in 2d), or an octree (in 3d). The collection of these trees
+emanating from the cells of the coarse mesh then constitutes the forest that
+completely describes the triangulation, including all of its active and
+inactive cells. In particular, the active cells are those terminal nodes in
+the tree that have no descendants, i.e. cells which are not further
+refined. Correspondingly, inactive cells correspond to nodes in the tree with
+descendants, i.e. cells that are further refined.
+
+A triangulation contains forests for lines (each of which may have 2
+children), quads (each with possibly four children), and hexes (each with no
+or 8 children). Depending on the dimension, these objects are also termed
+cells or faces.
+
+Iterators loop over the elements of such forests. While the usual iterators
+loop over all nodes of a forest, active iterators iterate over the
+elements in the same order, but skip all non-active entries and therefore only
+visit terminal nodes (i.e. active cells, faces, etc). There are many ways to
+traverse the elements of a forest, for example breadth first or depth
+first. Depending on the type of data structure used to store the forest, some
+ways are more efficient than others. At present, the way iterators traverse
+forests in deal.II is breadth first. I.e., iterators first visit all the
+elements (cells, faces, etc) of the coarse mesh before moving on to all the
+elements of the immediate level, i.e. the immediate children of the coarse
+mesh objects; after this come the grandchildren of the coarse mesh, and so on.
+However, it must be noted that programs should not rely on this particular
+order of traversing a tree: this is considered an implementation detail that
+can change between versions, even if we consider this an unlikely option at
+the present time.
+
+
+
+ at section IteratorsDifferences Different kinds of iterators
+
+Iterators have two properties: what they point to (i.e. the type of the
+Accessor template argument), and the exact definition of the set they iterate
+over. In general, iterators are always declared as
+ at code
+  KindIterator<Accessor>
+ at endcode
+
+Here, <tt>Kind</tt> determines what property an accessor needs to have to be
+reached by this iterator (or omitted, for that matter). For example,
+ at code
+  Iterator<Accessor>
+ at endcode
+iterates over all objects of kind Accessor that make up the mesh (for example
+all cells, whether they are further refined and have children, or not), whereas
+ at code
+  ActiveIterator<Accessor>
+ at endcode
+skips all objects that have children, i.e. objects that are not active.
+Active iterators therefore operate on a subset of the objects
+that normal iterators act on, namely those that possess the property that
+they are active. Note that this is independent of the kind of object we
+are operating on: all valid accessor classes have to provide the iterator
+classes a method to find out whether they are active or not.
+
+(For completeness, let us mention that there is a third kind of iterators: "raw
+iterators" also traverse objects that are unused in the triangulation, but
+allocated anyway for efficiency reasons. User code should never use raw
+iterators, they are only for %internal purposes of the library.)
+
+Whether an object is active can be considered a "predicate": a property that
+is either true or false. Filtered iterators can be used to restrict the scope
+of existing iterators even more. For instance, you could imagine to iterate
+over the subset of those @ref GlossActive "active cells" having their user
+flag set or belonging to a certain subdomain (both properties are either true
+or false for a given object).
+
+This is achieved by using an object of type FilteredIterator
+<BaseIterator>, where BaseIterator usually is one of the
+standard iterators discussed above.
+
+The FilteredIterator gets an additional Predicate in its constructor and will
+skip all objects where this Predicate evaluates to <tt>false</tt>. A
+collection of predicates already implemented can be found in the namespace
+IteratorFilters.
+
+
+ at subsection IteratorsLoops Iterating over objects
+
+All iterators of the same kind and iterating over the
+same kind of geometrical objects traverse the mesh in the same
+order. Take this code example:
+ at code
+  Triangulation<dim> tria;
+  DoFHandler<dim>    dof1(tria);
+  DoFHandler<dim>    dof2(tria);
+  ...
+  typename Trianguation<dim>::cell_iterator ti  = tria.begin();
+  typename DoFHandler<dim>::cell_iterator   di1 = dof1.begin();
+  typename DoFHandler<dim>::cell_iterator   di2 = dof2.begin();
+  ...
+  while (ti != tria.end())
+  {
+    // do something
+    ++ti;
+    ++di1;
+    ++di2;
+  }
+ at endcode
+
+Here, all iterators will always point to the same mesh cell, even though
+<tt>DoFHandler</tt> and <tt>Triangulation</tt> are very different classes,
+and even if the DoFHandlers are handling different finite elements: they
+all access cells in the same order, the difference is only in the Accessor.
+As mentioned above, the order in which iterators traverse the forest of
+objects is actually well-defined, but application programs should not
+assume any such order, but rather consider this an implementation detail
+of the library.
+
+Corresponding to above example, the order in which iterators traverse active
+objects is the same for all iterators in the following snippet, the difference to the previous example being that here we only consider active cells:
+ at code
+  typename Trianguation<dim>::active_cell_iterator ti  = tria.begin_active();
+  typename DoFHandler<dim>::active_cell_iterator   di1 = dof1.begin_active();
+  typename DoFHandler<dim>::active_cell_iterator   di2 = dof2.begin_active();
+  ...
+  while (ti != tria.end())
+  {
+    // do something
+    ++ti;
+    ++di1;
+    ++di2;
+  }
+ at endcode
+
+
+
+ at section IteratorsAccessors Accessors
+
+Iterators are like pointers: they can be incremented and decremented, but they
+are really rather dumb. Their magic only lies in the fact that they point to
+some useful object, in this case the Accessor. For pointers, they point to an
+actual object that stores some data. On the other hand, the deal.II iterators,
+when dereferenced, do not return a reference to an actual object, but return
+an object that knows how to get at the data that represents cells. In general, this
+object doesn't store itself where the vertices of a cell are or what its neighbors
+are. However, it knows how to tease this sort of information from out of the
+arrays and tables and lists that the Triangulation class sets up to describe a
+mesh.
+
+Accessing data that characterizes a cell is always done through the Accessor,
+i.e. the expression <code>i->xxx()</code> grants access to <b>all</b>
+attributes of this Accessor. Examples of properties you can query from an
+iterator are
+ at code
+  cell->vertex(1);
+  line->child(0);
+  hex->face(3);
+  cell->at_boundary();
+  face->boundary_id();
+ at endcode
+
+Since dereferencing iterators yields accessor objects, these calls are to
+member functions <code>Accessor::vertex()</code>,
+<code>Accessor::child()</code> etc. These in turn figure out the relevant data
+from the various data structures that store this data. How this is actually
+done and what data structures are used is not really of concern to authors of
+applications in deal.II. In particular, by hiding the actual data structures
+we are able to store data in an efficient way, not necessarily in a way that
+makes it easily accessible or understandable to application writers.
+
+
+
+ at section IteratorsTypedefs Kinds of accessors
+
+Depending on what sort of data you want to access, there are different kinds
+of accessor classes:
+
+- The TriaAccessor class provides you with data that identifies the geometric
+  properties of cells, faces, lines, quads, and hexes that make up a
+  triangulation, as well as mother-child relationships.
+
+- The CellAccessor class is derived from the TriaAccessor class for cases
+  where an object has full dimension, i.e. is a cell rather than for example a
+  line bounding a cell. In that case, additional information about the
+  topological connection of a mesh is available from an accessor such as to
+  request iterators pointing to neighbors of a cell.
+
+- The DoFAccessor class lets you access information related to degrees
+  of freedom associated with cells, faces, etc; it does so for both
+  DoFHandler and hp::DoFHandler objects. Note that the DoFAccessor
+  class is derived from either TriaAccessor or CellAccessor (depending
+  on whether the DoFAccessor points to an object of full dimension or
+  not) and so is able to provide a superset of information over its
+  base classes. Additionally, the DoFAccessor class comes in two
+  flavors, one accessing degrees of freedom on the level of a cell and
+  the other accessing the active dofs of an active cell.
+
+- The DoFCellAccessor class has the same purpose and relation to
+  DoFCellAccessor as the CellAccessor has to TriaAccessor.
+
+Except to look up member documentation, you will not usually have to deal with
+the actual class names listed above. Rather, one uses the typedefs provided by
+the mesh classes Triangulation, DoFHandler and hp::DoFHandler, as well
+as the function that generate such objects:
+
+<table border=1>
+  <tr>
+    <th>Class</th>
+    <th>cell_iterator type</th>
+    <th>function call</th>
+  </tr>
+
+  <tr>
+    <th>Triangulation</th>
+    <td>typename Triangulation::cell_iterator</td>
+    <td>triangulation.begin()</td>
+  </tr>
+
+  <tr>
+    <th>DoFHandler</th>
+    <td>typename DoFHandler::cell_iterator</td>
+    <td>dof_handler.begin()</td>
+  </tr>
+
+  <tr>
+    <th>hp::DoFHandler</th>
+    <td>typename hp::DoFHandler::cell_iterator</td>
+    <td>hp_dof_handler.begin()</td>
+  </tr>
+</table>
+
+
+<table border=1>
+  <tr>
+    <th>Class</th>
+    <th>face_iterator type</th>
+    <th>function call</th>
+  </tr>
+
+  <tr>
+    <th>Triangulation</th>
+    <td>typename Triangulation::face_iterator</td>
+    <td>triangulation.begin_face()</td>
+  </tr>
+
+  <tr>
+    <th>DoFHandler</th>
+    <td>typename DoFHandler::face_iterator</td>
+    <td>dof_handler.begin_face()</td>
+  </tr>
+
+  <tr>
+    <th>hp::DoFHandler</th>
+    <td>typename hp::DoFHandler::face_iterator</td>
+    <td>hp_dof_handler.begin_face()</td>
+  </tr>
+</table>
+
+
+Likewise, active iterators have the following properties:
+
+<table border=1>
+  <tr>
+    <th>Class</th>
+    <th>cell_iterator type</th>
+    <th>function call</th>
+  </tr>
+
+  <tr>
+    <th>Triangulation</th>
+    <td>typename Triangulation::active_cell_iterator</td>
+    <td>triangulation.begin_active()</td>
+  </tr>
+
+  <tr>
+    <th>DoFHandler</th>
+    <td>typename DoFHandler::active_cell_iterator</td>
+    <td>dof_handler.begin_active()</td>
+  </tr>
+
+  <tr>
+    <th>hp::DoFHandler</th>
+    <td>typename hp::DoFHandler::active_cell_iterator</td>
+    <td>hp_dof_handler.begin_active()</td>
+  </tr>
+</table>
+
+
+<table border=1>
+  <tr>
+    <th>Class</th>
+    <th>face_iterator type</th>
+    <th>function call</th>
+  </tr>
+
+  <tr>
+    <th>Triangulation</th>
+    <td>typename Triangulation::active_face_iterator</td>
+    <td>triangulation.begin_active_face()</td>
+  </tr>
+
+  <tr>
+    <th>DoFHandler</th>
+    <td>typename DoFHandler::active_face_iterator</td>
+    <td>dof_handler.begin_active_face()</td>
+  </tr>
+
+  <tr>
+    <th>hp::DoFHandler</th>
+    <td>typename hp::DoFHandler::active_face_iterator</td>
+    <td>hp_dof_handler.begin_active_face()</td>
+  </tr>
+</table>
+
+
+In addition to these types and calls that act on cells and faces (logical
+concepts that depend on the dimension: a cell is a quadrilateral in 2d, but
+a hexahedron in 3d), there are corresponding types and calls like
+<code>begin_active_quad()</code> or <code>end_quad()</code> that act on the
+dimension independent geometric objects line, quad, and hex. These calls,
+just as the ones above, exist in active and non-active forms.
+
+The actual definition of all the typedefs local to the mesh classes are
+stated in the
+
+- internal::Triangulation::Iterators<1,spacedim>,
+  internal::Triangulation::Iterators<2,spacedim>, and
+  internal::Triangulation::Iterators<3,spacedim> classes for Triangulation
+  iterators,
+
+- internal::DoFHandler::Iterators<DoFHandlerType<1,spacedim> >,
+  internal::DoFHandler::Iterators<DoFHandlerType<2,spacedim> >, and
+  internal::DoFHandler::Iterators<DoFHandlerType<3,spacedim> > classes for DoFHandler
+  and hp::DoFHandler iterators,
+
+
+ at section IteratorAccessorInternals Iterator and accessor internals
+
+Iterators, being like pointers, act as if they pointed to an actual
+object, but in reality all they do is to return an accessor when
+dereferenced. The accessor object contains the state, i.e. it knows
+which object it represents, by storing for example which Triangulation
+it belongs to, and the level and index within this level of a cell. It
+is therefore able to access the data that corresponds to the cell (or
+face, or edge) it represents
+
+There is a representation of past-the-end-pointers, denoted by special
+values of the member variables <code>present_level</code> and <code>present_index</code> in
+the TriaAccessor class: If <code>present_level</code> @> =0 and <code>present_index</code> @> =0,
+then the object is valid; if
+<code>present_level</code>==-1 and <code>present_index</code>==-1, then the iterator points
+past the end; in all other cases, the iterator is considered invalid.
+You can check this by calling the TriaAccessorBase::state() function.
+
+Past-the-end iterators may also be used to compare an iterator with
+the before-the-start value, when running backwards. There is no
+distinction between the iterators pointing past the two ends of a
+vector.
+
+Cells are stored based on a hierarchical structure of levels, therefore the
+above mentioned structure is useful. Faces however are not organized in
+levels, and accessors for objects of lower dimensionality do not have a
+<code>present_level</code> member variable.
+
+
+ at ingroup grid
+*/
+
+//@}
+
+
+/**
+ * @defgroup Accessors Accessor classes of the mesh iterators
+ * @ingroup Iterators
+ */
+
+
diff --git a/doc/doxygen/headers/lac.h b/doc/doxygen/headers/lac.h
new file mode 100644
index 0000000..8110e83
--- /dev/null
+++ b/doc/doxygen/headers/lac.h
@@ -0,0 +1,30 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup LAC Linear algebra classes
+ *
+ * This module contains classes that involve linear algebra, i.e., those
+ * associated with matrices, vectors, and the solution of linear systems.
+ *
+ * The description of individual groups of classes can be found in
+ * sub-modules.
+ *
+ * The files implementing linear algebra functionality are in the
+ * <code>lac</code> subdirectory, an abbreviation for <em>L</em>inear
+ * <em>A</em>lgebra <em>C</em>lasses.
+ */
diff --git a/doc/doxygen/headers/laoperators.h b/doc/doxygen/headers/laoperators.h
new file mode 100644
index 0000000..1af479d
--- /dev/null
+++ b/doc/doxygen/headers/laoperators.h
@@ -0,0 +1,185 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup LAOperators Linear Operators
+ *
+ * <h3>Linear Operator</h3>
+ *
+ * If deal.II is configured with C++11 support (i.e.,
+ * <code>DEAL_II_WITH_CXX11=ON</code> or <code>DEAL_II_WITH_CXX14=ON</code>
+ * during configuration) a versatile mechanism for storing the concept of a
+ * linear operator is available. (For questions about C++11, see
+ * @ref CPP11 .)
+ *
+ * This is done with a LinearOperator class that, like
+ * @ref ConceptMatrixType "the MatrixType concept",
+ * defines a minimal interface for <i>applying</i> a linear operation on a
+ * vector.
+ *
+ * @code
+ *   std::function<void(Range &, const Domain &)> vmult;
+ *   std::function<void(Range &, const Domain &)> vmult_add;
+ *   std::function<void(Domain &, const Range &)> Tvmult;
+ *   std::function<void(Domain &, const Range &)> Tvmult_add;
+ * @endcode
+ *
+ * Thus, such an object can be used as a matrix object in all
+ * @ref Solvers "iterative solver" classes, either as a matrix object, or as
+ * @ref Preconditioners "preconditioner".
+ *
+ * The big advantage of the LinearOperator class is that it provides
+ * syntactic sugar for complex matrix-vector operations. As an example
+ * consider the operation $(A+k\,B)\,C$, where $A$, $B$ and $C$ denote
+ * (possibly different) SparseMatrix objects. In order to construct a
+ * LinearOperator <code>op</code> that performs above computation when
+ * applied on a vector, one can write:
+ * @code
+ * dealii::SparseMatrix<double> A, B, C;
+ * double k;
+ * // Setup and assembly...
+ *
+ * const auto op_a = linear_operator(A);
+ * const auto op_b = linear_operator(B);
+ * const auto op_c = linear_operator(C);
+ *
+ * const auto op = (op_a + k * op_b) * op_c;
+ * @endcode
+ * Now, <code>op</code> can be used as a matrix object for further
+ * computation.
+ *
+ * The linear_operator() function can be used to wrap an ordinary matrix or
+ * preconditioner object into a LinearOperator. A linear operator can be
+ * transposed with transpose_operator(), or inverted by using the
+ * inverse_operator() together with an iterative solver.
+ *
+ * For objects of type LinearOperator, all vector space operations, i.e.,
+ * addition and subtraction, scalar multiplication and composition (of
+ * compatible linear operators) are implemented:
+ * @code
+ * dealii::LinearOperator<> op_a, op_b;
+ * double k;
+ *
+ * // vector space addition, subtraction and scalar multiplication
+ * op_a + op_b;
+ * op_a - op_b;
+ * k * op_a;
+ * op_a * k;
+ *
+ * // in-place variants
+ * op_a += op_b;
+ * op_a -= op_b;
+ * op_a *= k;
+ *
+ * // operator composition
+ * op_a * op_b;
+ * op_a *= op_b; // If op_b is an endomorphism of the domain space of op_a
+ * @endcode
+ *
+ * block_operator() and block_diagonal_operator() provide further
+ * encapsulation of individual linear operators into blocked linear
+ * operator variants.
+ *
+ * @note The LinearOperator facility obsoletes some of the @ref Matrix2
+ * "derived matrix" classes, such as BlockDiagonalMatrix, IterativeInverse,
+ * ProductMatrix, ScaledMatrix, ProductSparseMatrix,
+ * InverseMatrixRichardson, SchurMatrix, ShiftedMatrix,
+ * ShiftedMatrixGeneralized, TransposeMatrix
+ *
+ *
+ * <h3>Packaged Operation</h3>
+ *
+ * An  application of a LinearOperator object to a vector via
+ * <code>operator*</code> yields a PackagedOperation object that stores
+ * this computation.
+ *
+ * The PackagedOperation class allows lazy evaluation of expressions
+ * involving vectors and linear operators. This is done by storing the
+ * computational expression and only performing the computation when either
+ * the object is implicitly converted to a vector object, or
+ * PackagedOperation::apply() (or PackagedOperation::apply_add()) is
+ * invoked by hand. This avoids unnecessary temporary storage of
+ * intermediate results.
+ *
+ * As an example consider the addition of multiple vectors:
+ * @code
+ *   dealii::Vector<double> a, b, c, d;
+ *   // ..
+ *   dealii::Vector<double> result = a + b - c + d;
+ * @endcode
+ * Converting the PackagedOperation <code>a + b - c + d</code> to a vector
+ * results in code equivalent to the following code
+ * @code
+ *   dealii::Vector<double> a, b, c, d;
+ *   // ..
+ *   dealii::Vector<double> result = a;
+ *   result += b;
+ *   result -= c;
+ *   result += d;
+ * @endcode
+ * that avoids any intermediate storage. As a second example (involving a
+ * LinearOperator object) consider the computation of a residual $b-Ax$:
+ *
+ * @code
+ *   dealii::SparseMatrix<double> A;
+ *   dealii::Vector<double> b, x;
+ *   // ..
+ *   const auto op_a = linear_operator(A);
+ *
+ *   dealii::Vector<double> residual =  b - op_a * x;
+ * @endcode
+ * Here, the expression <code>b - op_a * x</code> results again in an
+ * object of type PackagedOperation that stores the <i>sequence of
+ * operations</i> that should be performed using the two vectors and the
+ * linear operator. Converting the expression to a vector (as happens here
+ * with the assignment to the vector <code>residual</code>) executes the
+ * computation (see the following note).
+ *
+ * @note
+ * Lazy evaluation of a computational expression necessarily involves
+ * references to the underlying vector and matrix objects. For example, the
+ * creation of a <code>residual_expr</code> object
+ * @code
+ *   auto residual_expr =  b - op_a * x;
+ * @endcode
+ * stores the computational expression of the residual with references to
+ * the vector <code>b</code> and matrix <code>A</code>. It does not perform
+ * any computation at this point. In particular, if <code>b</code> or
+ * <code>A</code> are changed <b>after</b> the creation of
+ * <code>residual_expr</code> every subsequent evaluation of the expression
+ * is performed with the new values
+ * @code
+ *   auto residual_expr =  b - op_a * x;
+ *   residual_expr.apply(tmp);  // tmp is a Vector<double>
+ *
+ *   // modify b, or A
+ *
+ *   residual_expr.apply(tmp2); // tmp2 is a Vector<double>
+ *
+ *   // tmp and tmp2 are different
+ * @endcode
+ * Thus, as a safeguard, if you want to compute the result of an expression
+ * right away, always explicitly use a vector type on the left side (and
+ * not <code>auto</code>):
+ * @code
+ *   Vector<double> residual =  b - op_a * x; // computes the residual at this point
+ * @endcode
+ *
+ *
+ * @ingroup LAC
+ * @ingroup MATRICES
+ */
diff --git a/doc/doxygen/headers/main.h b/doc/doxygen/headers/main.h
new file mode 100644
index 0000000..39996bb
--- /dev/null
+++ b/doc/doxygen/headers/main.h
@@ -0,0 +1,259 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @mainpage
+ *
+ * This is the main starting page for the deal.II class and function
+ * documentation. Documentation on other aspects, such as the build
+ * system, can be found elsewhere. In addition, there are
+ * <a href="Tutorial.html">Tutorial programs on the use of the library</a>.
+ *
+ * Many of the classes in the deal.II library can be grouped into modules (see
+ * the <a href="modules.html">Modules page</a> or the corresponding entry in
+ * the menu at the top of this page). These modules form around the building
+ * blocks of any finite element program. An outline of how the primary groups
+ * of classes in deal.II interact is given by the following clickable graph,
+ * with a more detailed description below:
+ *
+ * @dot
+ digraph G
+{
+  graph[rankdir="TB",bgcolor="transparent"];
+
+  edge [fontname="FreeSans",fontsize=15,labelfontname="FreeSans",labelfontsize=10];
+  node [fontname="FreeSans",fontsize=15,
+        shape=record,height=0.2,width=0.4,
+        color="black", fillcolor="white", style="filled"];
+
+  FE [label="FiniteElement",URL="\ref feall"];
+  Tria [label="Triangulation",URL="\ref grid"];
+  DoFHandler [label="DoFHandler",URL="\ref dofs"];
+  Quadrature [label="Quadrature",URL="\ref Quadrature"];
+  Mapping [label="Mapping",URL="\ref mapping"];
+  FEValues [label="FEValues",URL="\ref feaccess"];
+  Linear [label="Linear Systems",URL="\ref LAC"];
+  LinearSolver [label="Linear Solvers",URL="\ref Solvers"];
+  Output [label="Output",URL="\ref output"];
+
+  Tria -> DoFHandler [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  FE -> DoFHandler [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  FE -> FEValues [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  Mapping -> FEValues [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  Quadrature -> FEValues [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  FEValues -> Linear [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  DoFHandler -> Linear [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  Linear -> LinearSolver [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+  LinearSolver -> Output [color="black",fontsize=10,style="solid",fontname="FreeSans"];
+}
+ * @enddot
+ *
+ * These groups are all covered in the tutorial programs, with a first
+ * overview of how they fit together given in step-3. The following
+ * is a guide to this classification of groups, as well as links
+ * to the documentation pertaining to each of them:
+ *
+ * <ol>
+ *   <li> <b>%Triangulation</b>: Triangulations are collections of
+ *   cells and their lower-dimensional boundary objects. Cells are
+ *   images of the reference hypercube [0,1]<sup>dim</sup> under a
+ *   suitable mapping in the module on @ref mapping.
+ *
+ *   The triangulation stores geometric and topological
+ *   properties of a mesh: how are the cells connected and where are
+ *   their vertices. A triangulation doesn't know
+ *   anything about the finite elements that you may want to used on
+ *   this mesh, and a triangulation does not even know anything about
+ *   the shape of its cells: in 2d it only knows that a cell has 4
+ *   faces (lines) and 4 vertices (and in 3d that it has 6 faces
+ *   (quadrilaterals), 12 lines, and 8 vertices), but everything else
+ *   is defined by a mapping class.
+ *
+ *   The properties and data of triangulations are almost always
+ *   queried through loops over all cells, possibly querying all faces
+ *   of each cell as well. Most
+ *   of the knowledge about a mesh is therefore hidden behind
+ *   @em iterators, i.e. pointer-like structures that one can
+ *   iterate from one cell to the next, and that one can ask for
+ *   information about the cell it presently points to.
+ *
+ *   The classes that describe triangulations and cells are located
+ *   and documented in the @ref grid module. Iterators are described
+ *   in the @ref Iterators module.
+ *
+ *   <li> <b>Finite Element</b>: Finite element classes describe the
+ *   properties of a finite element space as defined on the unit
+ *   cell. This includes, for example, how many degrees of freedom are
+ *   located at vertices, on lines, or in the interior of cells. In
+ *   addition to this, finite element classes of course have to
+ *   provide values and gradients of individual shape functions at
+ *   points on the unit cell.
+ *
+ *   The finite element classes are described in the @ref feall module.
+ *
+ *   <li> <b>%Quadrature</b>: As with finite elements, quadrature
+ *   objects are defined on the unit cell. They only describe the
+ *   location of quadrature points on the unit cell, and the weights
+ *   of quadrature points thereon.
+ *
+ *   The documentation of the classes describing particular quadrature
+ *   formulas is found in the @ref Quadrature module.
+ *
+ *   <li> <b>%DoFHandler</b>: %DoFHandler objects are the confluence
+ *   of triangulations and finite elements: the finite element class
+ *   describes how many degrees of freedom it needs per vertex, line,
+ *   or cell, and the DoFHandler class allocates this space so that
+ *   each vertex, line, or cell of the triangulation has the correct
+ *   number of them. It also gives them a global numbering.
+ *
+ *   A different viewpoint is this: While the mesh and finite element describe
+ *   abstract properties of the the finite dimensional space $V_h$ in which we
+ *   seek the discrete solution, the %DoFHandler classes enumerate a concrete
+ *   basis of this space so that we can represent the discrete solution as
+ *   $u_h(\mathbf x)= \sum_j U_j \varphi_i(\mathbf x)$ by an ordered set of
+ *   coefficients $U_j$.
+ *
+ *   Just as with triangulation objects, most operations on
+ *   DoFHandlers are done by looping over all cells and doing something
+ *   on each or a subset of them. The interfaces of the two classes are
+ *   therefore rather similar: they allow to get iterators to the
+ *   first and last cell (or face, or line, etc) and offer information
+ *   through these iterators. The information that can be gotten from
+ *   these iterators is the geometric and topological information that
+ *   can already be gotten from the triangulation iterators (they are
+ *   in fact derived classes) as well as things like the global
+ *   numbers of the degrees of freedom on the present cell. On can
+ *   also ask an iterator to extract the values corresponding to the
+ *   degrees of freedom on the present cell from a data vector that
+ *   stores values for all degrees of freedom associated with a
+ *   triangulation.
+ *
+ *   It is worth noting that, just as triangulations, DoFHandler
+ *   classes do not know anything about the mapping from the unit cell
+ *   to its individual cells. It is also ignorant of the shape
+ *   functions that correspond to the degrees of freedom it manages:
+ *   all it knows is that there are, for example, 2 degrees of freedom
+ *   for each vertex and 4 per cell interior. Nothing about their
+ *   specifics is relevant to the DoFHandler class with the exception of
+ *   the fact that they exist.
+ *
+ *   The DoFHandler class and its associates are described in the @ref
+ *   dofs module. In addition, there are specialized versions that can
+ *   handle multilevel and hp discretizations. These are described in
+ *   the @ref mg and @ref hp modules. Finite element methods frequently
+ *   imply constraints on degrees of freedom, such as for hanging nodes
+ *   or nodes at which boundary conditions apply; dealing with such
+ *   constraints is described in the @ref constraints module.
+ *
+ *   <li> <b>%Mapping</b>: The next step in a finite element program
+ *   is that one would want to compute matrix and right hand side
+ *   entries or other quantities on each cell of a triangulation,
+ *   using the shape functions of a finite element and quadrature
+ *   points defined by a quadrature rule. To this end, it is necessary
+ *   to map the shape functions, quadrature points, and quadrature
+ *   weights from the unit cell to each cell of a triangulation. This
+ *   is not directly done by, but facilitated by the Mapping and
+ *   derived classes: they describe how to map points from unit to
+ *   real space and back, as well as provide gradients of this
+ *   derivative and Jacobian determinants.
+ *
+ *   These classes are all described in the @ref mapping module.
+ *
+ *   <li> <b>%FEValues</b>: The next step is to actually take a finite
+ *   element and evaluate its shape functions and their gradients at
+ *   the points defined by a quadrature formula when mapped to the
+ *   real cell. This is the realm of the FEValues class and siblings:
+ *   in a sense, they offer a point-wise view of the finite element
+ *   function space.
+ *
+ *   This seems restrictive: in mathematical analysis, we always write
+ *   our formulas in terms of integrals over cells, or faces of cells,
+ *   involving the finite element shape functions. One would therefore
+ *   think that it is necessary to describe finite element spaces as
+ *   continuous spaces. However, in practice, this is not necessary:
+ *   all integrals are in actual computations replaced by
+ *   approximations using quadrature formula, and what is therefore
+ *   really only necessary is the ability to evaluate shape functions
+ *   at a finite number of given locations inside a domain. The
+ *   FEValues classes offer exactly this information: Given finite
+ *   element, quadrature, and mapping objects, they compute the
+ *   restriction of a continuous function space (as opposed to
+ *   discrete, not as opposed to discontinuous) to a discrete number
+ *   of points.
+ *
+ *   There are a number of objects that can do this: FEValues for
+ *   evaluation on cells, FEFaceValues for evaluation on faces of
+ *   cells, and FESubfaceValues for evaluation on parts of faces of
+ *   cells. All these classes are described in the @ref feaccess
+ *   module.
+ *
+ *   <li> <b>Linear Systems</b>: If one knows how to evaluate the
+ *   values and gradients of shape functions on individual cells using
+ *   FEValues and friends, and knows how to get the global numbers of
+ *   the degrees of freedom on a cell using the DoFHandler iterators,
+ *   then the next step is to use the bilinear form of the problem to
+ *   assemble the system matrix (and right hand side) of the linear
+ *   system. We will then determine the solution of our problem from
+ *   this linear system.
+ *
+ *   To do this, we need to have classes that store and manage the
+ *   entries of matrices and vectors. deal.II comes with a whole set
+ *   of classes for this purpose, as well as with interfaces to other
+ *   software packages that offer similar functionality. Documentation
+ *   to this end can be found in the @ref LAC module.
+ *
+ *   <li> <b>Linear Solvers</b>: In order to determine the solution of
+ *   a finite-dimensional, linear system of equations, one needs
+ *   linear solvers. In finite element applications, they are
+ *   frequently iterative, but sometimes one may also want to use
+ *   direct or sparse direct solvers. deal.II has quite a number of
+ *   these. They are documented in the @ref Solvers module.
+ *
+ *   <li> <b>Output</b>: Finally, once one has obtained a solution of
+ *   a finite element problem on a given triangulation, one will often
+ *   want to postprocess it using a visualization program. This
+ *   library doesn't do the visualization by itself, but rather generates output
+ *   files in a variety of graphics formats understood by widely
+ *   available visualization tools.
+ *
+ *   A description of the classes that do so is given in the @ref
+ *   output module.
+ * </ol>
+ *
+ * In addition, deal.II has a number of groups of classes that go
+ * beyond the ones listed here. They pertain to more refined concepts
+ * of the hierarchy presented above, or to tangential aspects like
+ * handling of input and output that are not necessarily specific to
+ * finite element programs, but appear there as well. These classes
+ * are all listed in the Classes and Namespaces views reachable from
+ * the menu bar at the top of this page, and are also grouped into
+ * modules of their own (see the <a href="modules.html">Modules link</a>
+ * at the top of this page).
+ *
+ * We provide the Doxygen tag file for those of you who would like to directly link the
+ * documentation of application programs to the deal.II online documentation. The tag file
+ * is at <a href="../deal.tag"><code>deal.tag</code></a>. For each release of deal.II,
+ * it resides in the directory right above the Doxygen reference documentation. In order
+ * to use the tag file, you have to download it into a place where Doxygen can find it.
+ * After that, find the key <code>TAGFILES</code> in your Doxygen options file and write something like
+ * <pre>
+ * TAGFILES = deal.tag=http://www.dealii.org/X.Y.Z/doxygen/deal.II
+ *</pre>
+ * where <code>X.Y.Z</code> refers to the release you want to link to. Be sure you use
+ * the matching tag file. In theory, you can also link against the developing revisions
+ * of deal.II, but then you have to fear that your links may become invalid if
+ * the deal.II structure changes.
+ */
diff --git a/doc/doxygen/headers/manifold.h b/doc/doxygen/headers/manifold.h
new file mode 100644
index 0000000..ee3e58a
--- /dev/null
+++ b/doc/doxygen/headers/manifold.h
@@ -0,0 +1,267 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup manifold Manifold description for triangulations
+ *
+ * <h3>Overview</h3>
+ *
+ * The classes in this module are concerned with the description of the
+ * manifold in which the domain that a Triangulation describes lives. This
+ * manifold description is necessary in two contexts:
+ *
+ * <ul>
+ *
+ *   <li> Mesh refinement: Whenever a cell is refined, it is necessary
+ *   to introduce new vertices in the Triangulation. In the
+ *   simplest case, one assumes that the objects that make up the
+ *   Triangulation are straight line segments, a bi-linear surface or
+ *   a tri-linear volume. The next vertex is then simply put into the
+ *   middle of the old ones (where "middle" means a suitable average of the
+ *   locations of the pre-existing vertices). This is the default behavior of
+ *   the Triangulation class, and is described by the FlatManifold class.
+ *
+ *   On the other hand, if one deals with curved geometries, or geometries
+ *   which require a denser refinement in some direction, this is not the
+ *   appropriate thing to do. The classes derived from the Manifold base class
+ *   therefore describe the geometry of a domain. One can then attach an
+ *   object of a class derived from this base class to the Triangulation
+ *   object using the Triangulation::set_manifold() function associating it
+ *   with a manifold id (see types::manifold_id), use this manifold id on the
+ *   cells, faces or edges of the triangulation that should be described by
+ *   this manifold using the TriaAccessor::set_manifold_id() function, and
+ *   then the Triangulation will ask the manifold object where a new vertex to
+ *   be located on a cell, face or edge so attributed should be located upon
+ *   mesh refinement. Several classes already exist to support the most common
+ *   geometries, e.g., CylinderManifold, or PolarManifold, which represent
+ *   respectively the geometry obtained when describing your space in
+ *   cylindrical coordinates or in polar coordinates.
+ *
+ *   <li> Integration: When using higher order finite element methods, it is
+ *   often necessary to compute cell terms (like cell contributions to the
+ *   matrix and right hand side of the linear system) using curved
+ *   approximations of the boundary, rather than the straight line
+ *   approximation. The actual implementation of such curved elements happens
+ *   in the Mapping class (see the @ref mapping module), which however obtains
+ *   its information about the boundary of the domain from the classes
+ *   described here. The same is, of course, true when integrating boundary
+ *   terms (e.g., inhomogenous Neumann boundary conditions).
+ *
+ * </ul>
+ *
+ * In deal.II, a Manifold is seen as a collection of points, together
+ * with a notion of distance between points (on the manifold). New
+ * points are typically obtained by providing a local coordinate
+ * system on the manifold, identifying existing points in the local
+ * coordinate system (pulling them back using the local map to obtain
+ * their local coordinates), find the new point in the local
+ * coordinate system by weighted sums of the existing points, and
+ * transforming back the point in the real space (pushing it forward
+ * using the local map). The main class that implements this mechanism
+ * is the ChartManifold class, and this is the class that users will
+ * likely overload for complex geometries.
+ *
+ * While this process is non trivial in most cases of interest, for most of
+ * the trivial geometries, like cylinders, spheres or shells, deal.II provides
+ * reasonable implementations. More complicated examples can be described
+ * using the techniques shown in step-53 and step-54.
+ *
+ * The boundary of a Triangulation is a special case of Manifold, for
+ * which additional information can be useful in user codes, such as
+ * normal vectors to surfaces or to curves. If your coarse mesh is reasonably
+ * shaped, you might be interested in only attaching a manifold
+ * description to boundary portion of your domain. This can be done
+ * using the Triangulation::set_boundary() function, which take as arguments a Boundary
+ * object (derived from Manifold). Notice that Triangulation uses only
+ * the Manifold interface, not the Boundary interface. Other tools,
+ * however, might need to compute exact normals at quadrature points,
+ * and therefore a wrapper to query Boundary objects is provided. 
+ *
+ *
+ * <h3>An example</h3>
+ *
+ * A simple example why dealing with curved geometries is already provided
+ * by step-1, though it is not elaborated there. For example, consider this
+ * small variation of the <code>second_grid()</code> function shown there,
+ * where we simply refine <i>every</i> cell several times and do not deal
+ * with boundaries at all:
+ * @code
+ *  Triangulation<2> triangulation;
+ *
+ *  const Point<2> center (1,0);
+ *  const double inner_radius = 0.5,
+ *               outer_radius = 1.0;
+ *  GridGenerator::hyper_shell (triangulation,
+ *                              center, inner_radius, outer_radius,
+ *                              10);
+ *
+ *  triangulation.refine_global (3);
+ * @endcode
+ * This code leads to a mesh that looks like this:
+ *
+ * @image html hypershell-nothing.png ""
+ *
+ * Our intention was to get a mesh that resembles a ring. However, since we did
+ * not describe this to the triangulation, what happens is that we start with
+ * the 10 coarse cells in circumferential direction we told
+ * GridGenerator::hyper_shell() to create, and each of these is then 3 times
+ * globally refined. Each time refinement requires a new vertex, it is placed
+ * in the middle of the existing ones, regardless of what we may have intended
+ * (but omitted to describe in code).
+ *
+ * This is easily remedied. step-1 already shows how to do this. Consider this
+ * code:
+ * @code
+ *  Triangulation<2> triangulation;
+ *
+ *  const Point<2> center (1,0);
+ *  const double inner_radius = 0.5,
+ *               outer_radius = 1.0;
+ *  GridGenerator::hyper_shell (triangulation,
+ *                              center, inner_radius, outer_radius,
+ *                              10);
+ *  const HyperShellBoundary<2> boundary_description(center);
+ *  triangulation.set_boundary (0, boundary_description);
+ *
+ *  triangulation.refine_global (3); 
+ * @endcode
+ * This code is better, producing the following mesh:
+ *
+ * @image html hypershell-boundary-only.png ""
+ *
+ * The mesh looks better in that it faithfully reproduces the circular inner
+ * and outer boundaries of the domain. However, it is still possible to
+ * identify the original 10 cells by the kinks in the tangential lines. They
+ * result from the fact that every time a cell is refined, new vertices on
+ * interior lines are just placed into the middle of the existing line (the
+ * boundary lines are handled differently because we have attached boundary
+ * objects). In other words, they end up in places that may be in the geometric
+ * middle of a straight line, but not on a circle around the center.
+ *
+ * This can be remedied by assigning a manifold description not only to
+ * the lines along the boundary, but also to the radial lines and cells (which,
+ * in turn, will inherit it to the new lines that are created upon mesh
+ * refinement). This code achieves this:
+ * @code
+ *  Triangulation<2> triangulation;
+ *
+ *  const Point<2> center (1,0);
+ *  const double inner_radius = 0.5,
+ *               outer_radius = 1.0;
+ *  GridGenerator::hyper_shell (triangulation,
+ *                              center, inner_radius, outer_radius,
+ *                              10);
+ *  const SphericalManifold<2> boundary_description(center);
+ *  triangulation.set_manifold (0, boundary_description);
+ *
+ *  Triangulation<2>::active_cell_iterator
+ *    cell = triangulation.begin_active(),
+ *    endc = triangulation.end();
+ *  for (; cell!=endc; ++cell)
+ *    cell->set_all_manifold_ids (0);  
+ *  
+ *  triangulation.refine_global (3);
+ * @endcode
+ * This leads to the following mesh:
+ *
+ * @image html hypershell-all.png ""
+ *
+ * So why does this matter? After all, the last two meshes describe the
+ * exact same domain and we know that upon mesh refinement we obtain the
+ * correct solution regardless of the choice of cells, as long as the
+ * diameter of the largest cell goes to zero.
+ *
+ * There are two answers to this question. First, the numerical effort
+ * of solving a partial differential equation to a certain accuracy typically
+ * depends on the <i>quality</i> of cells since the constant $C$ in error
+ * estimates of the form $\|u-u_h\|_{H^1} \le Ch^p \|u\|_{H^{p+1}}$ depends
+ * on factors such as the maximal ratio of radii of the smallest circumscribed
+ * to largest inscribed circle over all cells (for triangles; or a suitable
+ * generalization for other types of cells). Thus, it is worthwhile creating
+ * meshes with cells that are as well-formed as possible. This is arguably
+ * not so much of an issue for the meshes shown above, but is sometimes an
+ * issue. Consider, for example, the following code and mesh:
+ * @code
+ *  Triangulation<2> triangulation;
+ *
+ *  const Point<2> center (1,0);
+ *  const double inner_radius = 0.5,
+ *               outer_radius = 1.0;
+ *  GridGenerator::hyper_shell (triangulation,
+ *                              center, inner_radius, outer_radius,
+ *                              4);    // four circumferential cells
+ *  const HyperShellBoundary<2> boundary_description(center);
+ *  triangulation.set_boundary (0, boundary_description);
+ *
+ *  triangulation.refine_global (3); 
+ * @endcode
+ *
+ * @image html hypershell-boundary-only-4.png ""
+ *
+ * Here, we create only four circumferential cells in the beginning,
+ * and refining them leads to the mesh shown. Clearly, here we have
+ * cells with bad aspect ratios.
+ *
+ * If we drive this further and start with a coarse mesh of only
+ * three cells (which may be inappropriate here, since we know that it
+ * is not sufficient, but may also be impossible to avoid for complex
+ * geometries generated in mesh generators), then we obtain the following
+ * mesh:
+ *
+ * @image html hypershell-boundary-only-3.png ""
+ *
+ * This mesh neither has the correct geometry after refinement, nor do
+ * all cells have positive area as is necessary for the finite element
+ * method to work. However, even when starting with such in inopportune
+ * mesh, we can make things work by attaching a suitable geometry description
+ * not only to the boundary but also to interior cells and edges, using
+ * the same code as above:
+ * @code
+ *  Triangulation<2> triangulation;
+ *
+ *  const Point<2> center (1,0);
+ *  const double inner_radius = 0.5,
+ *               outer_radius = 1.0;
+ *  GridGenerator::hyper_shell (triangulation,
+ *                              center, inner_radius, outer_radius,
+ *                              3);    // three circumferential cells
+ *  const SphericalManifold<2> boundary_description(center);
+ *  triangulation.set_manifold (0, boundary_description);
+ *
+ *  Triangulation<2>::active_cell_iterator
+ *    cell = triangulation.begin_active(),
+ *    endc = triangulation.end();
+ *  for (; cell!=endc; ++cell)
+ *    cell->set_all_manifold_ids (0);  
+ *  
+ *  triangulation.refine_global (3);
+ * @endcode
+ *
+ * @image html hypershell-all-3.png ""
+ *
+ * Here, even starting with an initial, inappropriately chosen mesh retains
+ * our ability to adequately refine the mesh into one that will serve us
+ * well. This example may be manufactured here, but it is relevant, for example
+ * in the context of what GridGenerator::hyper_shell() produces in 3d
+ * (see the documentation of this function). It is also germane to the
+ * cases discussed in the @ref GlossDistorted "glossary entry on distorted cells".
+ * 
+ *
+ * @see @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+ *
+ * @ingroup grid
+ * @author Luca Heltai, 2013
+ */
diff --git a/doc/doxygen/headers/matrices.h b/doc/doxygen/headers/matrices.h
new file mode 100644
index 0000000..ee54556
--- /dev/null
+++ b/doc/doxygen/headers/matrices.h
@@ -0,0 +1,72 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013, 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup Matrices Matrix classes
+ *
+ * deal.II comes with a number of different matrix classes, tailored to the
+ * various purposes for which matrices are used. For example, there are full
+ * matrices, sparse matrices using different storage schemes, matrices
+ * composed of individual blocks, and matrices implemented as interfaces to
+ * other linear algebra classes. As far as possible, all these implementations
+ * share a common interface that contains at least the operations necessary to
+ * write iterative linear solvers (see @ref Solvers), but also element-wise
+ * access to read from and write to a matrix.
+ *
+ * This module is split into different parts. @ref Matrix1 "Basic matrices"
+ * contains all the matrix classes actually storing entries. @ref Matrix2
+ * "Derived matrices", on the other hand, only use basic matrices, but
+ * implement certain operations on them. For example, TransposeMatrix provides
+ * a matrix-vector multiplication that acts as if the underlying matrix had
+ * been transposed, without actually ever storing a transposed matrix.
+ *
+ * @ref Preconditioners are matrix classes as well, since they perform linear
+ * operations on vectors.
+ *
+ * @ingroup LAC
+ */
+
+
+/**
+ * @defgroup Matrix1 Basic matrices
+ *
+ * These are the actual matrix classes provided by deal.II. It is possible to
+ * store values in them and retrieve them. Furthermore, they provide the full
+ * interface required by linear solvers (see @ref Solvers).
+ *
+ * Among the matrices in this group are full matrices, different sparse
+ * matrices, and block matrices. In addition, some of the classes in the
+ * interfaces to other linear algebra libraries (for example the
+ * PETScWrappers) are matrices.
+ *
+ * Most of the deal.II sparse matrix classes are separated from their sparsity
+ * patterns, to make storing several matrices with the same sparsity pattern
+ * more efficient. See @ref Sparsity for more information.
+ *
+ * @ingroup Matrices
+ */
+
+
+/**
+ * @defgroup Matrix2 Derived matrices
+ *
+ * These matrices are built on top of the basic matrices. They perform special
+ * operations using the interface defined by
+ * @ref ConceptMatrixType "the MatrixType concept".
+ *
+ * @ingroup Matrices
+ */
diff --git a/doc/doxygen/headers/memory.h b/doc/doxygen/headers/memory.h
new file mode 100644
index 0000000..dbdc974
--- /dev/null
+++ b/doc/doxygen/headers/memory.h
@@ -0,0 +1,40 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup memory Memory handling
+ *
+ * This group has some basic classes and namespaces for memory
+ * handling. The Subscriptor and SmartPointer classes are used for
+ * counted memory handling, i.e. whenever a SmartPointer is set to
+ * point to an object, it increases a counter in that object; when the
+ * pointer is set to point elsewhere, it decreases it again. This way,
+ * one always knows how many users of an object there still are. While
+ * this is rarely useful in itself, it is used to generate an
+ * exception if an object is destroyed while a pointer somewhere is
+ * still pointing to it, as any access through that pointer at a later
+ * time would otherwise lead to access of invalid memory regions.
+ *
+ * In contrast to this, the MemoryConsumption namespace provides
+ * functions that can be used to determine the memory consumption of
+ * objects. For some simple classes, like the standard library
+ * containers, it directly determines how much memory they need (or at
+ * least gives an estimate). For deal.II classes, it uses the
+ * <code>memory_consumption</code> member function that most classes
+ * have.
+ *
+ * @ingroup utilities
+ */
diff --git a/doc/doxygen/headers/mesh_worker.h b/doc/doxygen/headers/mesh_worker.h
new file mode 100644
index 0000000..358e883
--- /dev/null
+++ b/doc/doxygen/headers/mesh_worker.h
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup MeshWorker The MeshWorker interface
+ *
+ * A collection of classes and functions simplifying the coding of
+ * loops over all cells and faces.
+ * All classes and functions of this module are in the MeshWorker
+ * namespace, which also contains documentation on the usage.
+ *
+ * @ingroup Integrators
+ */
diff --git a/doc/doxygen/headers/mg.h b/doc/doxygen/headers/mg.h
new file mode 100644
index 0000000..c29314a
--- /dev/null
+++ b/doc/doxygen/headers/mg.h
@@ -0,0 +1,83 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup mg Multilevel support
+ *
+ * Classes that have to do with multigrid algorithms.
+ *
+ * The main class with implementation of the multigrid scheme is
+ * Multigrid with its function Multigrid::cycle(). It uses the
+ * following abstract classes in order to perform the multigrid cycle:
+ *
+ * <ol>
+ * <li> MGMatrixBase contains the level matrices with a fairly general
+ * implementation in MGMatrix
+ * <li> MGCoarseGridBase is the solver on the coarsest level.
+ * <li> MGSmootherBase performs smoothing on each level.
+ * <li> MGTransferBase organizes the transfer between levels.
+ * </ol>
+ *
+ * Additionally, there is a class PreconditionMG, which is a wrapper
+ * around Multigrid with the standard interface of deal.II @ref
+ * Preconditioners. PreconditionMG also uses the classes inheriting
+ * from MGTransferBase, for instance MGTransferPrebuilt, where it uses
+ * MGTransferPrebuilt::copy_to_mg() and
+ * MGTransferPrebuilt::copy_from_mg_add(), which transfer between the
+ * global vector and the level vectors.
+ *
+ * Finally, we have several auxiliary classes, namely MGLevelObject,
+ * which stores an object on each level*
+ * 
+ * See the step-16 and step-39 example programs on how to use this
+ * functionality.
+
+ * <h3>Multigrid and hanging nodes</h3>
+ *
+ * Using multigrid methods on adaptively refined meshes involves
+ * more infrastructure than with regular refinement. First, in order
+ * to keep the complexity optimal, we need to decide how to do the
+ * smoothing on each level. And to this end, we have to define what a
+ * level is in the sense of multilevel decomposition.
+ *
+ * First, we define that a level in the multigrid sense is constituted
+ * by all cells of a certain level in the mesh hierarchy. Thus,
+ * smoothing on a certain level is restricted to the subdomain which
+ * consists of cells of this level or finer. This is usually referred
+ * to as local smoothing. The advantage of this definition is, that
+ * level matrices for the multigrid scheme can be assembled easily by
+ * traversing to all cells of a certain level, and that these level
+ * matrices do not contain hanging nodes.
+ *
+ * The disadvantage of this decomposition is, that we need additional
+ * matrices to handle the issues that arise at refinement
+ * edges. Furthermore, the treatment is different, depending on
+ * whether the method is continuous (thus having degrees of freedom on
+ * the refinement edge) or discontinuous (employs flux matrices at the
+ * refinement edge). While these matrices are small, we have to
+ * assemble them and notify the multigrid method of them.
+ */
+
+/**
+ * This namespace contains the reimplementation of multilevel support
+ * after we know what is needed in the context of local refinement and
+ * block systems.
+ *
+ * @ingroup mg
+ */
+namespace mg
+{
+}
diff --git a/doc/doxygen/headers/multithreading.h b/doc/doxygen/headers/multithreading.h
new file mode 100644
index 0000000..146bd10
--- /dev/null
+++ b/doc/doxygen/headers/multithreading.h
@@ -0,0 +1,1233 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup threads Parallel computing with multiple processors accessing shared memory
+ * @ingroup Parallel
+ *
+ * @brief A module discussing the use of parallelism on shared memory
+ * machines. See the detailed documentation and
+ * @ref MTToC "Table of Contents" below the lengthy list of members
+ * of this module.
+ *
+ * @dealiiVideoLecture{39,40}
+ *
+ * On machines with more than one processor (or multicore processors),
+ * it is often profitable to run several parts of the computations in
+ * %parallel. For example, one could have several threads running in
+ * %parallel, each of which assembles the cell matrices of a subset of
+ * the triangulation and then writes them into the global
+ * matrix. Since assembling matrices is often an expensive operation,
+ * this frequently leads to significant savings in compute time on
+ * multiprocessor machines.
+ *
+ * deal.II supports operations running in %parallel on shared-memory
+ * (SMP) machines through the functions and classes in the Threads
+ * namespace. The MultithreadInfo class allows to query certain
+ * properties of the system, such as the number of CPUs. These
+ * facilities for %parallel computing are described in the
+ * following. The step-9, step-13, step-14, step-32, step-35 and
+ * step-37 tutorial programs also show their use in practice, with the
+ * ones starting with step-32 using a more modern style of doing
+ * things in which essentially we describe <i>what</i> can be done in
+ * %parallel, while the older tutorial programs describe <i>how</i>
+ * things have to be done in %parallel.
+ *
+ * On the other hand, programs running on distributed memory machines
+ * (i.e. clusters) need a different programming model built on top of MPI and
+ * PETSc or Trilinos. This is described in the step-17, step-18 and step-32
+ * example programs.
+ *
+ * @anchor MTToC
+ * <table class="tutorial" width="50%">
+ * <tr><th><b>%Table of contents</b></th></tr>
+ * <tr><td width="100%" valign="top">
+ * <ol>
+ *  <li> @ref MTTasks "Task-based parallelism"
+ *  <li> @ref MTUsing "Using tasks from within deal.II"
+ *  <li> @ref MTHow "How scheduling tasks works and when task-based programming is not efficient"
+ *  <li> @ref MTSimpleLoops "Abstractions for tasks: Simple loops"
+ *  <li> @ref MTComplexLoops "Abstractions for tasks: More complex loops"
+ *  <li> @ref MTWorkStream "Abstractions for tasks: Work streams"
+ *  <li> @ref MTTaskSynchronization "Tasks and synchronization"
+ *  <li> @ref MTThreads "Thread-based parallelism"
+ *  <li> @ref MTTaskThreads "Controlling the number of threads used for tasks"
+ * </ol> </td> </tr> </table>
+ *
+ *
+ * @anchor MTTasks
+ * <h3>Task-based parallelism</h3>
+ *
+ * The traditional view of parallelism on shared memory machines has been to
+ * decompose a program into <i>threads</i>, i.e. running different parts of
+ * the program in %parallel <i>at the same time</i> (if there are more threads
+ * than processor cores on your machine, the operating system will run each
+ * thread round-robin for a brief amount of time before switching execution to
+ * another thread, thereby simulating that threads run
+ * concurrently). deal.II's facilities for threads are described below (see
+ * @ref MTThreads "Thread-based parallelism"), but we would first like to
+ * discuss an abstraction that is often more suitable than threads:
+ * <i>tasks</i>.
+ *
+ * Tasks are essentially the individual parts of a program. Some of them are
+ * independent, whereas others depend on previous tasks to be completed
+ * first. By way of example, consider the typical layout of a part of the
+ * <code>setup_dofs</code> function that most of the tutorial programs have:
+ * @code
+1  dof_handler.distribute_dofs (fe);
+2  DoFTools::make_hanging_node_constraints (dof_handler, hanging_node_constraints);
+3  DoFTools::make_sparsity_pattern (dof_handler, sparsity_pattern);
+4  hanging_node_constraints.condense (sparsity_pattern);
+ * @endcode
+ *
+ * Here, each of the operations require a significant amount of
+ * computations. But note that not all of them depend on each other: clearly
+ * we can not run statements 2-4 before 1, and 4 needs to wait for the
+ * completion of statements 2 and 3. But statements 2 and 3 are independent:
+ * they could be run in any order, or in %parallel. In essence, we have
+ * identified four <i>tasks</i>, some of which are dependent on each other,
+ * whereas others are independent. In the current example, tasks are
+ * identified with individual C++ statements, but often they more
+ * generally coincide with entire code blocks.
+ *
+ * The point here is this: If we wanted to use threads to exploit the
+ * independence of tasks 2 and 3, we would start two threads and run each of
+ * tasks 2 and 3 on its own thread; we would then wait for the two threads to
+ * finish (an operation called "joining a thread") and go on with statement
+ * 4. Code to achieve this would look like this (the actual syntax is
+ * explained in more detail below):
+ * @code
+   dof_handler.distribute_dofs (fe);
+
+   Threads::Thread<void>
+     thread_1 = Threads::new_thread (&DoFTools::make_hanging_node_constraints,
+                                     dof_handler, hanging_node_constraints);
+   Threads::Thread<void>
+     thread_2 = Threads::new_thread (&DoFTools::make_sparsity_pattern,
+                                     dof_handler, sparsity_pattern);
+   thread_1.join();
+   thread_2.join();
+   hanging_node_constraints.condense (sparsity_pattern);
+ * @endcode
+ *
+ * But what if
+ * your computer has only one processor core, or if we have two but there is
+ * already a different part of the program running in %parallel to the code
+ * above? In that case, the code above would still start new threads, but the
+ * program is not going to run faster since no additional compute resources
+ * are available; rather, the program will run slower since threads have to be
+ * created and destroyed, and the operating system has to schedule threads to
+ * oversubscribed compute resources.
+ *
+ * A better scheme would identify independent tasks and then hand them off to
+ * a scheduler that maps tasks to available compute resources. This way, the
+ * program could, for example, start one thread per processor core and then
+ * let threads work on tasks. Tasks would run to completion, rather than
+ * concurrently, avoiding the overhead of interrupting threads to run a
+ * different thread. In this model, if two processor cores are available,
+ * tasks 2 and 3 above would run in %parallel; if only one is available, the
+ * scheduler would first completely execute task 2 before doing task 3, or the
+ * other way around. This model is able to execute much more efficiently in
+ * particular if a large number of tasks is available for execution, see for
+ * example the discussion below in section
+ * @ref MTWorkStream "Abstractions for tasks: Work streams". In
+ * essence, tasks are a high-level description of what needs to be done,
+ * whereas threads are a low-level way of implementing how these tasks can be
+ * completed. As in many other instances, being able to use a high-level
+ * description allows to find efficient low-level implementations; in this
+ * vein, it often pays off to use tasks, rather than threads, in a program.
+ *
+ * deal.II does not implement scheduling tasks to threads itself. For this, we
+ * use the <a href="http://www.threadingbuildingblocks.org" target="_top">Threading Building
+ * Blocks (TBB) library</a> for which we provide simple wrappers. TBB
+ * abstracts the details of how to start or stop threads, start tasks on
+ * individual threads, etc, and provides interfaces that are portable across
+ * many different systems.
+ *
+ *
+ *
+ * @anchor MTUsing
+ * <h3>Using tasks from within deal.II</h3>
+ *
+ * Ideally, the syntax to start tasks (and similarly for threads, for that
+ * matter), would be something like this for the example above:
+ * @code
+   Threads::Task<void>
+     thread
+     = new_task DoFTools::make_hanging_node_constraints (dof_handler,
+                                                         hanging_node_constraints);
+ * @endcode
+ * In other words, we would like to indicate the fact that the function call
+ * should be run on a separate task by simply prefixing the call with a
+ * keyword (such as <code>new_task</code> here, with a similar keyword
+ * <code>new_thread</code> for threads). Prefixing a call would return a
+ * handle for the task that we can use to wait for the task's completion and
+ * that we may use to query the return value of the function called (unless it
+ * is void, as it is here).
+ *
+ * Since C++ does not support the creation of new keywords, we have to be a
+ * bit more creative. The way chosen is to introduce a function
+ * <code>new_task</code> that takes as arguments the function to call as well
+ * as the arguments to the call. The <code>new_task</code> function is
+ * overloaded to accommodate starting tasks with functions that take no, one,
+ * two, and up to 9 arguments. In deal.II, these functions live in the Threads
+ * namespace. Consequently, the actual code for what we try to do above looks
+ * like this:
+ * @code
+   Threads::Task<void>
+     thread
+     = Threads::new_task (&DoFTools::make_hanging_node_constraints,
+                          dof_handler,
+                          hanging_node_constraints);
+ * @endcode
+ *
+ * Similarly, if we want to call a member function on a different task, we can
+ * do so by specifying the object on which to call the function as first
+ * argument after the function pointer:
+ * @code
+   class C {
+     public:
+       double f(int);
+   };
+
+   int main () {
+     C c;
+
+     // call f(13) as usual, i.e. using the current processor:
+     c.f(13);
+
+     // call f(42) as a separate task, to be scheduled
+     // whenever processor resources become available:
+     Threads::Task<double>
+       task = Threads::new_task (&C::f, c, 42);
+
+     // do something else in between:
+     ...;
+
+     // having finished our other business, wait until the task
+     // above has terminated and get the value returns by c.f(42):
+     double result = task.return_value();
+ * @endcode
+ * Here, note first how we pass the object <code>c</code> (i.e. the
+ * <code>this</code> pointer the function <code>C::f</code> will see) as if it
+ * was the first argument to the function. Secondly, note how we can acquire
+ * the value returned by the function on the separate task by calling
+ * Threads::Task::return_value(). This function implies waiting for the
+ * completion of the task, i.e. the last line is completely equivalent to
+ * @code
+     task.join ();
+     double result = task.return_value();
+ * @endcode
+ *
+ * Note also that it is entirely valid if <code>C::f</code> wants to start
+ * tasks of its own:
+ * @code
+   class C {
+     public:
+       double f(int);
+     private:
+       double f1(int);
+       double f2(int);
+   };
+
+   double C::f (int i) {
+     Threads::Task<double> t1 = Threads::new_task (&C::f1, *this, i);
+     Threads::Task<double> t2 = Threads::new_task (&C::f2, *this, i);
+     return t1.return_value() + t2.return_value();
+   }
+
+   int main () {
+     C c;
+
+     Threads::Task<double>
+       task = Threads::new_task (&C::f, c, 42);
+
+     // do something else in between:
+     ...;
+
+     double result = task.return_value();
+ * @endcode
+ * Here, we let <code>C::f</code> compute its return value as
+ * <code>c.f1(i)+c.f2(i)</code>. If sufficient CPU resources are available,
+ * then the two parts of the addition as well as the other things in
+ * <code>main()</code> will all run in %parallel. If not, then we will
+ * eventually block at one of the places where the return value is needed,
+ * thereby freeing up the CPU resources necessary to run all those spawned
+ * tasks to completion.
+ *
+ *
+ * In many cases, such as the introductory example of the
+ * <code>setup_dofs</code> function outlined above, one can identify several
+ * independent jobs that can be run as tasks, but will have to wait for all of
+ * them to finish at one point. One can do so by storing the returned object
+ * from all the Threads::new_task() calls, and calling Threads::Task::join()
+ * on each one of them. A simpler way to do this is to put all of these task
+ * objects into a Threads::TaskGroup object and waiting for all of them at
+ * once. The code would then look like this:
+ * @code
+   dof_handler.distribute_dofs (fe);
+
+   Threads::TaskGroup<void> task_group;
+   task_group += Threads::new_task (&DoFTools::make_hanging_node_constraints,
+                                    dof_handler, hanging_node_constraints);
+   task_group += Threads::new_task (&DoFTools::make_sparsity_pattern,
+                                    dof_handler, sparsity_pattern);
+   task_group.join_all ();
+   hanging_node_constraints.condense (sparsity_pattern);
+ * @endcode
+ *
+ *
+ * @anchor MTHow
+ * <h3>How scheduling tasks works and when task-based programming is not efficient</h3>
+ *
+ * The exact details of how tasks are scheduled to run are %internal to the
+ * Threading Building Blocks (TBB) library that deal.II uses for tasks. The
+ * documentation of TBB gives a detailed description of how tasks are
+ * scheduled to threads but is rather quiet on how many threads are actually
+ * used. However, a reasonable guess is probably to assume that TBB creates as
+ * many threads as there are processor cores on your system. This way, it is
+ * able to fully utilize the entire system, without having too many threads
+ * that the operating system will then have to interrupt regularly so that
+ * other threads can run on the available processor cores.
+ *
+ * The point then is that the TBB scheduler takes tasks and lets threads
+ * execute them. %Threads execute tasks completely, i.e. the TBB scheduler does
+ * not interrupt a task half way through to make some halfway progress with
+ * another task. This makes sure that caches are always hot, for example, and
+ * avoids the overhead of preemptive interrupts.
+ *
+ * The downside is that the CPU cores are only fully utilized if the threads
+ * are actually doing something, and that means that (i) there must be enough
+ * tasks available, and (ii) these tasks are actually doing something. Note
+ * that both conditions must be met; in particular, this means that CPU cores
+ * are underutilized if we have identified a sufficient number of tasks but if
+ * some of them twiddle thumbs, for example because a task is writing data to
+ * disk (a process where the CPU frequently has to wait for the disk to
+ * complete a transaction) or is waiting for input. Other cases are where
+ * tasks block on other external events, for example synchronising with other
+ * tasks or threads through a mutex. In such cases, the scheduler would let a
+ * task run on a thread, but doesn't notice that that thread doesn't fully
+ * utilize the CPU core.
+ *
+ * In cases like these, it <i>does</i> make sense to create a new thread (see
+ * @ref MTThreads "Thread-based parallelism" below) that the operating system
+ * can put on hold while they are waiting for something external, and let a
+ * different thread (for example one running a task scheduled by TBB) use the
+ * CPU at the same time.
+ *
+ *
+ * @anchor MTSimpleLoops
+ * <h3>Abstractions for tasks: Simple loops</h3>
+ *
+ * Some loops execute bodies on data that is completely independent
+ * and that can therefore be executed in %parallel. Rather than a
+ * priori split the loop into a fixed number of chunks and executing
+ * them on tasks or threads, the TBB library uses the following
+ * concept: the range over which the loop iterates is split into a
+ * certain number of sub-ranges (for example two or three times as
+ * many as there are CPU cores) and are equally distributed among
+ * threads; threads then execute sub-ranges and, if they are done with
+ * their work, steal entire or parts of sub-ranges from other threads
+ * to keep busy. This way, work is load-balanced even if not every
+ * loop iteration takes equally much work, or if some of the CPU cores fall
+ * behind because the operating system interrupted them for some other
+ * work.
+ *
+ * The TBB library primitives for this are a bit clumsy so deal.II has
+ * wrapper routines for the most frequently used operations. The
+ * simplest one is akin to what the std::transform does: it takes
+ * one or more ranges of input operators, one output iterator, and a
+ * function object. A typical implementation of std::transform would
+ * look like this:
+ * @code
+     template <typename InputIterator1, typename InputIterator,
+               typename OutputIterator, typename FunctionObject>
+     void transform (const InputIterator1 &begin_in_1,
+                     const InputIterator1 &end_in_1,
+                     const InputIterator2 &begin_in_2,
+                     const OutputIterator &begin_out,
+                     FunctionObject       &function)
+     {
+       InputIterator1 in_1 = begin_in_1;
+       InputIterator2 in_2 = begin_in_2;
+       OutputIterator out  = begin_out;
+
+       for (; in_1 != end_in_1; ++in_1, ++in_2, ++out)
+         *out = function(*in_1, *in_2);
+     }
+ * @endcode
+ *
+ * In many cases, <code>function</code> has no state, and so we can
+ * split this loop into several sub-ranges as explained
+ * above. Consequently, deal.II has a set of functions
+ * parallel::transform that look like the one above but that do their
+ * work in %parallel (there are several versions with one, two, and
+ * more input iterators for function objects that take one, two, or
+ * more arguments). The only difference in calling these functions is
+ * that they take an additional last argument that denotes the minimum
+ * size of sub-ranges of <code>[begin_in_1,end_in_1)</code>; it should
+ * be big enough so that we don't spend more time on scheduling
+ * sub-ranges to processors but small enough that processors can be
+ * efficiently load balanced. A rule of thumb appears to be that a
+ * sub-range is too small if it takes less than 2000 instructions to
+ * execute it.
+ *
+ * An example of how to use these functions are vector operations like
+ * the addition in $z = x+y$ where all three objects are of type Vector:
+ * @code
+     parallel::transform (x.begin(), x.end(),
+                          y.begin(),
+                          z.begin(),
+                          (boost::lambda::_1 + boost::lambda::_2),
+                          1000);
+ * @endcode
+ *
+ * In this example, we used the <a
+ * href="http://www.boost.org/doc/libs/1_37_0/doc/html/lambda.html">Boost
+ * Lambda</a> library to construct, on the fly, a function object that
+ * takes two arguments and returns the sum of the two. This is exactly
+ * what we needed when we want to add the individual elements of
+ * vectors $x$ and $y$ and write the sum of the two into the elements
+ * of $z$. Because of the way Boost Lambda is written, the function
+ * object that we get here is completely known to the compiler and
+ * when it expands the loop that results from parallel::transform will
+ * be as if we had written the loop in its obvious form:
+ * @code
+       InputIterator1 in_1 = x.begin();
+       InputIterator2 in_2 = y.begin();
+       OutputIterator out  = z.begin();
+
+       for (; in_1 != x.end(); ++in_1, ++in_2, ++out)
+         *out = *in_1 + *in_2;
+ * @endcode
+ * The next C++ standard will contain a more elegant way to achieve the
+ * same effect shown above using the Boost library, through a
+ * mechanism known as <i>lambda expressions</i> and <i>closures</i>.
+ *
+ * Note also that we have made sure that no CPU ever gets a chunk of
+ * the whole loop that is smaller than 1000 iterations (unless the
+ * whole range is smaller).
+ *
+ *
+ * @anchor MTComplexLoops
+ * <h3>Abstractions for tasks: More complex loops</h3>
+ *
+ * The scheme shown in the previous section is effective if the
+ * operation done in each iteration is such that it does not require
+ * significant setup costs and can be inlined by the compiler. Lambda
+ * expressions are exactly of this kind because the compiler knows
+ * everything about the lambda expression and can inline it, thereby
+ * eliminating the overhead of calling an external function. However,
+ * there are cases where it is inefficient to call some object or
+ * function within each iteration.
+ *
+ * An example for this case is sparse matrix-vector multiplication. If you
+ * know how data is stored in compressed row format like in the SparseMatrix
+ * class, then a matrix-vector product function looks like this:
+ * @code
+    void SparseMatrix::vmult_one_row (const Vector &src,
+                                      Vector       &dst) const
+    {
+      const double       *val_ptr    = &values[0];
+      const unsigned int *colnum_ptr = &colnums[0];
+      Vector::iterator dst_ptr = dst.begin();
+
+      for (unsigned int row=0; row<n_rows; ++row, ++dst_ptr)
+        {
+          double s = 0.;
+          const double *const val_end_of_row = &values[rowstart[row+1]];
+          while (val_ptr != val_end_of_row)
+            s += *val_ptr++ * src(*colnum_ptr++);
+          *dst_ptr = s;
+        }
+    }
+ * @endcode
+ * Inside the for loop, we compute the dot product of a single row of the
+ * matrix with the right hand side vector <code>src</code> and write it into
+ * the corresponding element of the <code>dst</code> vector. The code is made
+ * more efficient by utilizing that the elements of the <i>next</i> row follow
+ * the ones of the current row <i>immediately</i>, i.e. at the beginning of
+ * the loop body we do not have to re-set the pointers that point to the
+ * values and column %numbers of each row.
+ *
+ * Using the parallel::transform function above, we could in principle write
+ * this code as follows:
+ * @code
+    void SparseMatrix::vmult (const Vector     &src,
+                              Vector           &dst,
+                              Vector::iterator &dst_row) const
+    {
+      const unsigned int  row = (dst_row - dst.begin());
+
+      const double       *val_ptr    = &values[rowstart[row]];
+      const unsigned int *colnum_ptr = &colnums[rowstart[row]];
+
+      double s = 0.;
+      const double *const val_end_of_row = &values[rowstart[row+1]];
+      while (val_ptr != val_end_of_row)
+        s += *val_ptr++ * src(*colnum_ptr++);
+      *dst_row = s;
+    }
+
+    void SparseMatrix::vmult (const Vector &src,
+                              Vector       &dst) const
+    {
+      parallel::transform (dst.begin(), dst.end(),
+                           std_cxx11::bind (&SparseMatrix::vmult_one_row,
+                                        this,
+                                        std_cxx11::cref(src),
+                                        std_cxx11::ref(dst),
+                                        std_cxx11::_1),
+                           200);
+    }
+ * @endcode
+ * Note how we use <a
+ * href="http://www.boost.org/doc/libs/1_37_0/libs/bind/bind.html">std_cxx11::bind</a>
+ * to <i>bind</i> certain arguments to the <code>vmult_one_row</code>
+ * function, leaving one argument open and thus allowing the
+ * parallel::transform function to consider the passed function argument as
+ * unary. Also note that we need to make the source and destination vectors as
+ * (const) references to prevent std_cxx11::bind from passing them by value
+ * (implying a copy for <code>src</code> and writing the result into a
+ * temporary copy of <code>dst</code>, neither of which is what we desired).
+ * Finally, notice the grainsize of a minimum of 200 rows of a matrix that
+ * should be processed by an individual CPU core.
+ *
+ * The point is that while this is correct, it is not efficient: we have to
+ * set up the <code>row, val_ptr, colnum_ptr</code> variables in each
+ * iteration of the loop. Furthermore, since now the function object to be
+ * called on each row is not a simple Boost Lambda expression any more, there
+ * is an implied function call including argument passing in each iteration of
+ * the loop.
+ *
+ * A more efficient way is to let TBB split the original range into
+ * sub-ranges, and then call a target function not on each individual element
+ * of the loop, but on the entire range. This is facilitated by the
+ * parallel::apply_to_subranges function:
+ * @code
+    void
+    SparseMatrix::vmult_on_subrange (const unsigned int  begin_row,
+                                     const unsigned int  end_row,
+                                     const Vector     &src,
+                                     Vector           &dst)
+    {
+      const double       *val_ptr    = &values[rowstart[begin_row]];
+      const unsigned int *colnum_ptr = &colnums[rowstart[begin_row]];
+      Vector::iterator dst_ptr = dst.begin() + begin_row;
+
+      for (unsigned int row=begin_row; row<end_row; ++row, ++dst_ptr)
+        {
+          double s = 0.;
+          const double *const val_end_of_row = &values[rowstart[row+1]];
+          while (val_ptr != val_end_of_row)
+            s += *val_ptr++ * src(*colnum_ptr++);
+          *dst_ptr = s;
+        }
+    }
+
+    void SparseMatrix::vmult (const Vector &src,
+                              Vector       &dst) const
+    {
+       parallel::apply_to_subranges (0, n_rows(),
+                                     std_cxx11::bind (vmult_on_subrange,
+                                                  this,
+                                                  std_cxx11::_1, std_cxx11::_2,
+                                                  std_cxx11::cref(src),
+                                                  std_cxx11::ref(dst)),
+                                     200);
+    }
+ * @endcode
+ * Here, we call the <code>vmult_on_subrange</code> function on sub-ranges
+ * of at least 200 elements each, so that the initial setup cost can amortize.
+ *
+ * A related operation is when the loops over elements each produce a
+ * result that must then be accumulated (other reduction operations
+ * than addition of numbers would work as well). An example is to form
+ * the matrix norm $x^T M x$ (it really is only a norm if $M$ is
+ * positive definite, but let's assume for a moment that it is). A
+ * sequential implementation would look like this for sparse matrices:
+ * @code
+    double SparseMatrix::mat_norm (const Vector &x) const
+    {
+      const double       *val_ptr    = &values[0];
+      const unsigned int *colnum_ptr = &colnums[0];
+
+      double norm_sqr = 0;
+
+      for (unsigned int row=0; row<n_rows; ++row, ++dst_ptr)
+        {
+          double s = 0.;
+          const double *const val_end_of_row = &values[rowstart[row+1]];
+          while (val_ptr != val_end_of_row)
+            s += *val_ptr++ * x(*colnum_ptr++);
+          norm_sqr += x(row) * s;
+        }
+
+      return std::sqrt (norm_sqr);
+    }
+ * @endcode
+ *
+ * It would be nice if we could split this operation over several
+ * sub-ranges of rows, each of which compute their part of the square
+ * of the norm, add results together from the various sub-ranges, and
+ * then take the square root of the result. This is what the
+ * parallel::accumulate_from_subranges function does (note that you
+ * have to specify the result type as a template argument and that, as
+ * usual, the minimumum number of elements of the outer loop that can
+ * be scheduled on a single CPU core is given as the last argument):
+ * @code
+    double
+    SparseMatrix::mat_norm_sqr_on_subrange (const unsigned int  begin_row,
+                                            const unsigned int  end_row,
+                                            const Vector     &x)
+    {
+      const double       *val_ptr    = &values[rowstart[begin_row]];
+      const unsigned int *colnum_ptr = &colnums[rowstart[begin_row]];
+      Vector::iterator dst_ptr = dst.begin() + begin_row;
+
+      double norm_sqr = 0;
+
+      for (unsigned int row=begin_row; row<end_row; ++row, ++dst_ptr)
+        {
+          double s = 0.;
+          const double *const val_end_of_row = &values[rowstart[row+1]];
+          while (val_ptr != val_end_of_row)
+            s += *val_ptr++ * x(*colnum_ptr++);
+          norm_sqr += x(row) * s;
+        }
+
+      return norm_sqr;
+    }
+
+    double SparseMatrix::mat_norm (const Vector &x) const
+    {
+      return
+        std::sqrt
+        (parallel::accumulate_from_subranges (0, n_rows(),
+                                              std_cxx11::bind (mat_norm_sqr_on_subrange,
+                                                           this,
+                                                           std_cxx11::_1, std_cxx11::_2,
+                                                           std_cxx11::cref(x)),
+                                              200));
+    }
+ * @endcode
+ *
+ *
+ * @anchor MTWorkStream
+ * <h3>Abstractions for tasks: Work streams</h3>
+ *
+ * In the examples shown in the introduction we had identified a
+ * number of functions that can be run as independent tasks. Ideally,
+ * this number of tasks is larger than the number of CPU cores (to
+ * keep them busy) but is also not exceedingly huge (so as not to
+ * inundate the scheduler with millions of tasks that will then have
+ * to be distributed to 2 or 4 cores, for example). There are,
+ * however, cases where we have many thousands or even millions of
+ * relatively independent jobs: for example, assembling local
+ * contributions to the global linear system on each cell of a mesh;
+ * evaluating an error estimator on each cell; or postprocessing on
+ * each cell computed data for output fall into this class. These
+ * cases can be treated using a software design pattern we call
+ * "%WorkStream". In the following, we will walk through the rationale
+ * for this pattern and its implementation; more details as well as
+ * examples for the speedup that can be achieved with it are given in
+ * the @ref workstream_paper .
+ *
+ * Code like this could then be written like this:
+ * @code
+   template <int dim>
+   void MyClass<dim>::assemble_on_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell)
+   { ... }
+
+   template <int dim>
+   void MyClass<dim>::assemble_system ()
+   {
+     Threads::TaskGroup<void> task_group;
+     for (typename DoFHandler<dim>::active_cell_iterator
+            cell = dof_handler.begin_active();
+          cell != dof_handler.end(); ++cell)
+       task_group += Threads::new_task (&MyClass<dim>::assemble_on_one_cell,
+                                        *this,
+                                        cell);
+     task_group.join_all ();
+   }
+ * @endcode
+ * On a big mesh, with maybe a million cells, this would create a massive
+ * number of tasks; while it would keep all CPU cores busy for a while, the
+ * overhead of first creating so many tasks, scheduling them, and then waiting
+ * for them would probably not lead to efficient code. A better strategy would
+ * be if the scheduler could somehow indicate that it has available resources,
+ * at which point we would feed it another newly created task, and we would do
+ * so until we run out of tasks and the ones that were created have been
+ * worked on.
+ *
+ * This is essentially what the WorkStream::run function does: You give it an iterator
+ * range from which it can draw objects to work on (in the above case it is
+ * the interval given by <code>dof_handler.begin_active()</code> to
+ * <code>dof_handler.end()</code>), and a function that would do the work on
+ * each item (the function <code>MyClass::assemble_on_one_cell</code>)
+ * together with an object if it is a member function.
+ *
+ * In the following, let us lay out a rationale for why the functions in the
+ * WorkStream namespace are implemented the way they are. More information on
+ * their implementation can be found in the @ref workstream_paper .
+ * To see the WorkStream class used in practice on tasks like the ones
+ * outlined above, take a look at the step-9, step-13, step-14, step-32, step-35 or step-37
+ * tutorial programs.
+ *
+ * To begin with, given the brief description above,
+ * the way the <code>MyClass::assemble_system</code>
+ * function could then be written is like this (note that this is not quite
+ * the correct syntax, as will be described below):
+ * @code
+   template <int dim>
+   void MyClass<dim>::assemble_on_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell)
+   { ... }
+
+   template <int dim>
+   void MyClass<dim>::assemble_system ()
+   {
+     WorkStream::run (dof_handler.begin_active(),
+                      dof_handler.end(),
+                      *this,
+                      &MyClass<dim>::assemble_on_one_cell);
+   }
+ * @endcode
+ *
+ * There are at least three problems with this, however:
+ *<ul>
+ *<li>First, let us take a look at how the <code>MyClass::assemble_on_one_cell</code>
+ *   function likely looks:
+ * @code
+   template <int dim>
+   void MyClass<dim>::assemble_on_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell)
+   {
+     FEValues<dim> fe_values (...);
+     FullMatrix<double> cell_matrix (...);
+     Vector<double>     cell_rhs (...);
+
+     // assemble local contributions
+     fe_values.reinit (cell);
+     for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+       for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+         for (unsigned int q=0; q<fe_values.n_quadrature_points; ++q)
+           cell_matrix(i,j) += ...;
+     ...same for cell_rhs...
+
+     // now copy results into global system
+     std::vector<unsigned int> dof_indices (...);
+     cell->get_dof_indices (dof_indices);
+     for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+       for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+         system_matrix.add (dof_indices[i], dof_indices[j],
+                            cell_matrix(i,j));
+     ...same for rhs...
+   }
+ * @endcode
+
+ *   The problem here is that several tasks, each running
+ *   <code>MyClass::assemble_on_one_cell</code>, could potentially try
+ *   to write into the object <code>MyClass::system_matrix</code> <i>at
+ *   the same time</i>. This could be avoided by explicit synchronisation
+ *   using a Threads::Mutex, for example, and would look like this:
+ * @code
+     // now copy results into global system
+     std::vector<unsigned int> dof_indices (...);
+     cell->get_dof_indices (dof_indices);
+
+     static Threads::Mutex mutex;
+     mutex.acquire ();
+     for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+       for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+         system_matrix.add (dof_indices[i], dof_indices[j],
+                            cell_matrix(i,j));
+     ...same for rhs...
+     mutex.release ();
+   }
+ * @endcode
+
+ *   By making the mutex a static variable, it exists only once globally
+ *   (i.e. once for all tasks that may be running in %parallel) and only one of
+ *   the tasks can enter the region protected by the acquire/release calls on
+ *   the mutex. As an aside, a better way to write this code would be like
+ *   this, ensuring that the mutex is released even in case an exception is
+ *   thrown, and without the need to remember to write the call to
+ *   Threads::Mutex::release():
+ * @code
+     // now copy results into global system
+     static Threads::Mutex mutex;
+     Threads::Mutex::ScopedLock lock (mutex);
+     for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+       for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+         system_matrix.add (dof_indices[i], dof_indices[j],
+                            cell_matrix(i,j));
+     ...same for rhs...
+   }
+ * @endcode
+ *   Here, the mutex remains locked from the time the ScopedLock is created to
+ *   where it is destroyed, at the end of the code block.
+ *
+ *   Note that although we now avoid the race condition that multiple threads
+ *   could be writing to the same object, this code is not very efficient:
+ *   mutexes are expensive on multicore machines, and we also block threads
+ *   some of the time which is inefficient with tasks as explained above in
+ *   the section on
+ *   @ref MTHow "How scheduling tasks works and when task-based programming is not efficient".
+ *
+ *<li>A second correctness problem is that even if we do lock the global matrix
+ *   and right hand side objects using a mutex, we do so in a more or less
+ *   random order: while tasks are created in the order in which we traverse
+ *   cells normally, there is no guarantee that by the time we get to the
+ *   point where we want to copy the local into the global contributions the
+ *   order is still as if we computed things sequentially. In other words, it
+ *   may happen that we add the contributions of cell 1 before those of cell
+ *   0. That may seem harmless because addition is commutative and
+ *   associative, but in fact it
+ *   is not if done in floating point arithmetic: $a+b+c \neq a+c+b$ -- take
+ *   for example $a=1, b=-1, c=10^{-20}$ (because $1+10^{-20}=1$ in floating
+ *   point arithmetic, using double precision).
+ *
+ *   As a consequence, the exact values that end up in the global matrix and
+ *   right hand side will be close but may differ by amounts close to
+ *   round-off depending on the order in which tasks happened to finish their
+ *   job. That's not a desirable outcome, since results will not be
+ *   reproducible this way.
+ *
+ *   As a consequence, the way the WorkStream class is designed is to use two
+ *   functions: the <code>MyClass::assemble_on_one_cell</code> computes the
+ *   local contributions and stores them somewhere (we'll get to that next), and
+ *   a second function, say <code>MyClass::copy_local_to_global</code>, that
+ *   copies the results computed on each cell into the global objects. The
+ *   trick implemented in the WorkStream class is that (i) the
+ *   <code>MyClass::copy_local_to_global</code> never runs more than once in
+ *   %parallel, so we do not need to synchronise execution through a mutex, and
+ *   (ii) it runs in exactly the same order on cells as they appear in the
+ *   iterator range, i.e. we add elements into the global matrix the same way
+ *   <i>every time, independently of when the computation of these element
+ *   finishes</i>.
+ *
+ *   We now only have to discuss how the
+ *   <code>MyClass::assemble_on_one_cell</code> communicates to
+ *   <code>MyClass::copy_local_to_global</code> what it has computed. The way
+ *   this is done is to use an object that holds all temporary data:
+ * @code
+   struct PerTaskData {
+     FullMatrix<double>        cell_matrix;
+     Vector<double>            cell_rhs;
+     std::vector<unsigned int> dof_indices;
+   }
+
+   template <int dim>
+   void MyClass<dim>::assemble_on_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                            PerTaskData &data)
+   {
+     FEValues<dim> fe_values (...);
+
+     data.cell_matrix = 0;
+     data.cell_rhs    = 0;
+
+     // assemble local contributions
+     fe_values.reinit (cell);
+     for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+       for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+         for (unsigned int q=0; q<fe_values.n_quadrature_points; ++q)
+           data.cell_matrix(i,j) += ...;
+     ...same for cell_rhs...
+
+     cell->get_dof_indices (data.dof_indices);
+   }
+
+   template <int dim>
+   void MyClass<dim>::copy_local_to_global (const PerTaskData &data)
+   {
+     for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+       for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+         system_matrix.add (data.dof_indices[i], data.dof_indices[j],
+                            data.cell_matrix(i,j));
+     ...same for rhs...
+   }
+
+   template <int dim>
+   void MyClass<dim>::assemble_system ()
+   {
+     PerTaskData per_task_data;
+     ...initialize members of per_task_data to the correct sizes...
+
+     WorkStream::run (dof_handler.begin_active(),
+                      dof_handler.end(),
+                      *this,
+                      &MyClass<dim>::assemble_on_one_cell,
+                      &MyClass<dim>::copy_local_to_global,
+                      per_task_data);
+   }
+ * @endcode
+ *
+ *   The way this works is that we create a sample <code>per_task_data</code>
+ *   object that the work stream object will replicate once per task that runs
+ *   in %parallel. For each task, this object will be passed first to one of
+ *   possibly several instances of <code>MyClass::assemble_on_one_cell</code>
+ *   running in %parallel which fills it with the data obtained on a single
+ *   cell, and then to a sequentially running
+ *   <code>MyClass::copy_local_to_global</code> that copies data into the
+ *   global object. In practice, of course, we will not generate millions of
+ *   <code>per_task_data</code> objects if we have millions of cells; rather,
+ *   we recycle these objects after they have been used by
+ *   <code>MyClass::copy_local_to_global</code> and feed them back into
+ *   another instance of <code>MyClass::assemble_on_one_cell</code>; this
+ *   means that the number of such objects we actually do create is a small
+ *   multiple of the number of threads the scheduler uses, which is typically
+ *   about as many as there are CPU cores on a system.
+ *
+ * <li>The last issue that is worth addressing is that the way we wrote the
+ *   <code>MyClass::assemble_on_one_cell</code> function above, we create and
+ *   destroy an FEValues object every time the function is called, i.e. once
+ *   for each cell in the triangulation. That's an immensely expensive
+ *   operation because the FEValues class tries to do a lot of work in its
+ *   constructor in an attempt to reduce the number of operations we have to
+ *   do on each cell (i.e. it increases the constant in the ${\cal O}(1)$
+ *   effort to initialize such an object in order to reduce the constant in
+ *   the ${\cal O}(N)$ operations to call FEValues::reinit on the $N$ cells of
+ *   a triangulation). Creating and destroying an FEValues object on each cell
+ *   invalidates this effort.
+ *
+ *   The way to avoid this is to put the FEValues object into a second
+ *   structure that will hold scratch data, and initialize it in the
+ *   constructor:
+ * @code
+   struct PerTaskData {
+     FullMatrix<double>        cell_matrix;
+     Vector<double>            cell_rhs;
+     std::vector<unsigned int> dof_indices;
+
+     PerTaskData (const FiniteElement<dim> &fe)
+                :
+                cell_matrix (fe.dofs_per_cell, fe.dofs_per_cell),
+                cell_rhs (fe.dofs_per_cell),
+                dof_indices (fe.dofs_per_cell)
+       {}
+   }
+
+   struct ScratchData {
+     FEValues<dim>             fe_values;
+
+     ScratchData (const FiniteElement<dim> &fe,
+                  const Quadrature<dim>    &quadrature,
+                  const UpdateFlags         update_flags)
+                :
+                fe_values (fe, quadrature, update_flags)
+       {}
+
+     ScratchData (const ScratchData &scratch)
+                :
+                fe_values (scratch.fe_values.get_fe(),
+                           scratch.fe_values.get_quadrature(),
+                           scratch.fe_values.get_update_flags())
+       {}
+   }
+ * @endcode
+ * and then use this FEValues object in the assemble function:
+ * @code
+   template <int dim>
+   void MyClass<dim>::assemble_on_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                            ScratchData &scratch,
+                                            PerTaskData &data)
+   {
+     scratch.fe_values.reinit (cell);
+     ...
+   }
+ * @endcode
+ *   Just as for the <code>PerTaskData</code> structure, we will create a
+ *   sample <code>ScratchData</code> object and pass it to the work stream
+ *   object, which will replicate it as many times as necessary. For this
+ *   to work <code>ScratchData</code> structures need to copyable. Since FEValues
+ *   objects are rather complex and cannot be copied implicitly, we provided
+ *   our own copy constructor for the <code>ScratchData</code> structure.
+ *
+ *   The same approach, putting things into the <code>ScratchData</code>
+ *   data structure, should be used for everything that is expensive to
+ *   construct. This holds, in particular, for everything that needs to
+ *   allocate memory upon construction; for example, if the values of a
+ *   function need to be evaluated at quadrature points, then this is
+ *   expensive:
+ * @code
+   template <int dim>
+   void MyClass<dim>::assemble_on_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                            ScratchData &scratch,
+                                            PerTaskData &data)
+   {
+     std::vector<double> rhs_values (fe_values.n_quadrature_points);
+     rhs_function.value_list (data.fe_values.get_quadrature_points,
+                              rhs_values)
+     ...
+   }
+ * @endcode
+ * whereas this is a much cheaper way:
+ * @code
+   struct ScratchData {
+     std::vector<double>       rhs_values;
+     FEValues<dim>             fe_values;
+
+     ScratchData (const FiniteElement<dim> &fe,
+                  const Quadrature<dim>    &quadrature,
+                  const UpdateFlags         update_flags)
+                :
+                rhs_values (quadrature.size()),
+                fe_values (fe, quadrature, update_flags)
+       {}
+
+      ScratchData (const ScratchData &scratch)
+                :
+                rhs_values (scratch.rhs_values),
+                fe_values (scratch.fe_values.get_fe(),
+                           scratch.fe_values.get_quadrature(),
+                           scratch.fe_values.get_update_flags())
+       {}
+   }
+
+   template <int dim>
+   void MyClass<dim>::assemble_on_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                            ScratchData &scratch,
+                                            PerTaskData &data)
+   {
+     rhs_function.value_list (scratch.fe_values.get_quadrature_points,
+                              scratch.rhs_values)
+     ...
+   }
+ * @endcode
+ *
+ * </ul>
+ *
+ * As a final point: What if, for some reason, my assembler and copier
+ * function do not match the above signature with three and one argument,
+ * respectively? That's not a problem either. The WorkStream namespace offers two
+ * versions of the WorkStream::run() function: one that takes an object and
+ * the addresses of two member functions, and one that simply takes two
+ * function objects that can be called with three and one argument,
+ * respectively. So, in other words, the following two calls are exactly
+ * identical:
+ * @code
+     WorkStream::run (dof_handler.begin_active(),
+                      dof_handler.end(),
+                      *this,
+                      &MyClass<dim>::assemble_on_one_cell,
+                      &MyClass<dim>::copy_local_to_global,
+                      per_task_data);
+     // ...is the same as:
+     WorkStream::run (dof_handler.begin_active(),
+                      dof_handler.end(),
+                      std_cxx11::bind(&MyClass<dim>::assemble_on_one_cell,
+                                      *this,
+                                      std_cxx11::_1,
+                                      std_cxx11::_2,
+                                      std_cxx11::_3),
+                      std_cxx11::bind(&MyClass<dim>::copy_local_to_global,
+                                      *this,
+                                      std_cxx11::_1),
+                      per_task_data);
+ * @endcode
+ * Note how <code>std_cxx11::bind</code> produces a function object that takes three
+ * arguments by binding the member function to the <code>*this</code>
+ * object. <code>std_cxx11::_1, std_cxx11::_2</code> and <code>std_cxx11::_3</code> are placeholders for the first,
+ * second and third argument that can be specified later on. In other words, for
+ * example if <code>p</code> is the result of the first call to
+ * <code>std_cxx11::bind</code>, then the call <code>p(cell, scratch_data,
+ * per_task_data)</code> will result in executing
+ * <code>this-@>assemble_on_one_cell (cell, scratch_data, per_task_data)</code>,
+ * i.e. <code>std_cxx11::bind</code> has bound the object to the function pointer
+ * but left the three arguments open for later.
+ *
+ * Similarly, let us assume that <code>MyClass::assemble_on_one_cell</code>
+ * has the following signature in the solver of a nonlinear, time-dependent problem:
+ * @code
+   template <int dim>
+   void
+   MyClass<dim>::assemble_on_one_cell (const Vector<double> &linearization_point,
+                                       const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                       ScratchData &scratch,
+                                       PerTaskData &data,
+                                       const double current_time)
+   { ... }
+ * @endcode
+ * Because WorkStream expects to be able to call the worker function with
+ * just three arguments, the first of which is the iterator and the second
+ * and third the ScratchData and PerTaskData objects, we need to pass the following
+ * to it:
+ * @code
+     WorkStream::run (dof_handler.begin_active(),
+                      dof_handler.end(),
+                      std_cxx11::bind(&MyClass<dim>::assemble_on_one_cell,
+                                      *this,
+                                      current_solution,
+                                      std_cxx11::_1,
+                                      std_cxx11::_2,
+                                      std_cxx11::_3,
+                                      previous_time+time_step),
+                      std_cxx11::bind(&MyClass<dim>::copy_local_to_global,
+                                      *this,
+                                      std_cxx11::_1),
+                      per_task_data);
+ * @endcode
+ * Here, we bind the object, the linearization point argument, and the
+ * current time argument to the function before we hand it off to
+ * WorkStream::run(). WorkStream::run() will then simply call the
+ * function with the cell and scratch and per task objects which will be filled
+ * in at the positions indicated by <code>std_cxx11::_1, std_cxx11::_2</code>
+ * and <code>std_cxx11::_3</code>.
+ *
+ * There are refinements to the WorkStream::run function shown above.
+ * For example, one may realize that the basic idea above can only scale
+ * if the copy-local-to-global function is much quicker than the
+ * local assembly function because the former has to run sequentially.
+ * This limitation can only be improved upon by scheduling more work
+ * in parallel. This leads to the notion of coloring the graph of
+ * cells (or, more generally, iterators) we work on by recording
+ * which write operations conflict with each other. Consequently, there
+ * is a third version of WorkStream::run that doesn't just take a
+ * range of iterators, but instead a vector of vectors consisting of
+ * elements that can be worked on at the same time. This concept
+ * is explained in great detail in the @ref workstream_paper , along
+ * with performance evaluations for common examples.
+ *
+ *
+ * @anchor MTTaskSynchronization
+ * <h3>Tasks and synchronization</h3>
+ *
+ * Tasks are powerful but they do have their limitation: to make
+ * things efficient, the task scheduler never interrupts tasks by
+ * itself. With the exception of the situation where one calls the
+ * Threads::Task::join function to wait for another task to finish,
+ * the task scheduler always runs a task to completion. The downside
+ * is that the scheduler does not see if a task is actually idling,
+ * for example if it waits for something else to happen (file IO to
+ * finish, input from the keyboard, etc). In cases like this, the task
+ * scheduler could in principle run a different task, but since it
+ * doesn't know what tasks are doing it doesn't. Functions that do
+ * wait for external events to happen are therefore not good
+ * candidates for tasks and should use threads (see below).
+ *
+ * However, there are cases where tasks are not only a bad abstraction
+ * for a job but can actually not be used: As a matter of principle,
+ * tasks can not synchronize with other tasks through the use of a
+ * mutex or a condition variable (see the Threads::Mutex and
+ * Threads::ConditionVariable classes). The reason is that if task A
+ * needs to wait for task B to finish something, then this is only
+ * going to work if there is a guarantee that task B will eventually
+ * be able to run and finish the task. Now imagine that you have 2
+ * processors, and tasks A1 and A2 are currently running; let's assume
+ * that they have queued tasks B1 and B2, and are now waiting with a
+ * mutex for these queued tasks to finish (part of) their work. Since
+ * the machine has only two processors, the task scheduler will only
+ * start B1 or B2 once either A1 or A2 are done -- but this isn't
+ * happening since they are waiting using operating system resources
+ * (a mutex) rather than task scheduler resources. The result is a
+ * deadlock.
+ *
+ * The bottom line is that tasks can not use mutices or condition variables to
+ * synchronize with other tasks. If communication between tasks is necessary,
+ * you need to use threads because the operating system makes sure that all
+ * threads eventually get to run, independent of the total number of threads.
+ * Note however that the same is not true if you only use a Thread::Mutex on
+ * each task separately to protect access to a variable that the tasks may
+ * write to: this use of mutices is ok; tasks may simply not want to wait for
+ * another task to do something.
+ *
+ *
+ * @anchor MTThreads
+ * <h3>Thread-based parallelism</h3>
+ *
+ * Even though tasks are a higher-level way to describe things, there are
+ * cases that are poorly suited to a task (for a discussion of some of
+ * these cases see
+ * @ref MTHow "How scheduling tasks works and when task-based programming is not efficient"
+ * above). Generally, jobs that are not able to fully utilize the CPU are bad
+ * fits for tasks and good fits for threads.
+ *
+ * In a case like this, you can resort to explicitly start threads, rather
+ * than tasks, using pretty much the same syntax as above. For example, if you
+ * had a function in your application that generates graphical output and then
+ * estimates the error to refine the mesh for the next iteration of an
+ * adaptive mesh scheme, it could look like this:
+ * @code
+   template <int dim>
+   void MyClass<dim>::output_and_estimate_error () const
+   {
+     DataOut<dim> data_out;
+     data_out.attach_dof_handler (dof_handler);
+     data_out.add_data_vector (solution, "solution");
+     data_out.build_patches ();
+
+     std::ofstream output ("solution.vtk");
+
+     Threads::Thread<void>
+       thread = Threads::new_thread (&DataOut<dim>::write_vtk, data_out, output);
+
+     Vector<float> error_per_cell (triangulation.n_active_cells());
+     KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(3),
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        estimated_error_per_cell);
+     thread.join ();
+ * @endcode
+ *
+ * Here, Threads::new_thread starts the given function that writes to the
+ * output file on a new thread that can run in %parallel to everything else:
+ * In %parallel to the KellyErrorEstimator::estimate() function, the
+ * DataOut::write_vtk() function will run on a separate thread. This execution
+ * is independent of the scheduler that takes care of tasks, but that is
+ * not a problem because writing lots of data to a file is not something that
+ * will keep a CPU very busy.
+ *
+ * Creating threads works pretty much the same way as tasks, i.e. you can wait
+ * for the termination of a thread using Threads::Thread::join(), query the
+ * return value of a finished thread using Threads::Thread::return_value(),
+ * and you can group threads into a Threads::ThreadGroup object and wait for
+ * all of them to finish.
+ *
+ *
+ * @anchor MTTaskThreads
+ * <h3>Controlling the number of threads used for tasks</h3>
+ * As mentioned earlier, deal.II does not implement scheduling tasks to
+ * threads or even starting threads itself. The TBB library does a good job at
+ * deciding how many threads to use and they do not recommend setting the
+ * number of threads explicitly. However, on large symmetric multiprocessing
+ * (SMP) machines, especially ones with a resource/job manager or on systems
+ * on which access to some parts of the memory is possible but very expensive
+ * for processors far away (e.g. very large NUMA SMP machines), it may be necessary
+ * to explicitly set the number of threads to prevent the TBB from using too
+ * many CPUs. Another use case is if you run multiple MPI jobs on a single
+ * machine and each job should only use a subset of the available processor
+ * cores.
+ *
+ * Setting the number of threads explicitly is done by calling
+ * MultithreadInfo::set_thread_limit() before any other calls to functions
+ * that may create threads. In practice, it should be one of the first
+ * functions you call in <code>main()</code>.
+ *
+ * If you run your program with MPI, then you can use the optional third
+ * argument to the constructor of the MPI_InitFinalize class to achieve the
+ * same goal.
+ *
+ * @note A small number of places inside deal.II also uses thread-based
+ * parallelism explicitly, for example for running background tasks that have
+ * to wait for input or output to happen and consequently do not consume
+ * much CPU time. Such threads do not run under the control of the TBB
+ * task scheduler and, therefore, are not affected by the procedure above.
+ * Under some circumstances, deal.II also calls the BLAS library
+ * which may sometimes also start threads of its own. You will have to consult
+ * the documentation of your BLAS installation to determine how to set the
+ * number of threads for these operations.
+ */
diff --git a/doc/doxygen/headers/namespace_dealii.h b/doc/doxygen/headers/namespace_dealii.h
new file mode 100644
index 0000000..9fee9e8
--- /dev/null
+++ b/doc/doxygen/headers/namespace_dealii.h
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * This is the namespace in which everything in deal.II is. To avoid prefixing
+ * every class and function call with the namespace name, the @ref Tutorial
+ * therefore have a <code>using namespace dealii;</code> at the top of the
+ * code.
+ *
+ * Throughout the documentation, the namespace prefix is suppressed for
+ * brevity.
+ */
+namespace dealii
+{
+}
diff --git a/doc/doxygen/headers/numerical_algorithms.h b/doc/doxygen/headers/numerical_algorithms.h
new file mode 100644
index 0000000..5dd5809
--- /dev/null
+++ b/doc/doxygen/headers/numerical_algorithms.h
@@ -0,0 +1,39 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup numerics Numerical algorithms
+ *
+ * This module groups a diverse set of classes that generally implement some
+ * sort of numerical algorithm on top all the basic triangulation, DoFHandler,
+ * and finite element classes in the library. They are generally unconnected
+ * to each other.
+ *
+ * Some of the classes, like DerivativeApproximation, KellyErrorEstimator and
+ * SolutionTransfer, act on solutions already obtained, and compute derived
+ * quantities in the first two cases, or help transferring a set of vectors
+ * from one mesh to another.
+ *
+ * The namespaces MatrixCreator, MatrixTools, and VectorTools provide an
+ * assortment of services, such as creating a Laplace matrix, projecting or
+ * interpolating a function onto the present finite element space, etc.  The
+ * difference to the functions in the DoFTools and FETools functions is that
+ * they work on vectors (i.e. members of a finite element function space on a
+ * given triangulation) or help in the creation of it. On the other hand, the
+ * DoFTools functions only act on a given DoFHandler object without reference
+ * to a data vector, and the FETools objects generally work with finite
+ * element classes but again without any associated data vectors.
+ */
diff --git a/doc/doxygen/headers/parallel.h b/doc/doxygen/headers/parallel.h
new file mode 100644
index 0000000..37551f5
--- /dev/null
+++ b/doc/doxygen/headers/parallel.h
@@ -0,0 +1,39 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Parallel Parallel computing
+ *
+ * @brief A module discussing the use of multiple processors.
+ *
+ * This module contains information on %parallel computing. It is
+ * subdivided into parts on @ref threads and on @ref distributed.
+ */
+
+
+/**
+ * A namespace in which we define classes and algorithms that deal
+ * with running in %parallel on shared memory machines when deal.II is
+ * configured to use multiple threads (see @ref threads), as well as
+ * running things in %parallel on %distributed memory machines (see
+ * @ref distributed).
+ *
+ * @ingroup threads
+ * @author Wolfgang Bangerth, 2008, 2009
+ */
+namespace parallel
+{
+}
diff --git a/doc/doxygen/headers/petsc.h b/doc/doxygen/headers/petsc.h
new file mode 100644
index 0000000..5b34d48
--- /dev/null
+++ b/doc/doxygen/headers/petsc.h
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup PETScWrappers PETScWrappers
+ * 
+ * The classes in this module are
+ * wrappers around functionality provided by the PETSc library. They provide a
+ * modern object-oriented interface that is compatible with the interfaces of
+ * the other linear algebra classes in deal.II. All classes and functions in
+ * this group reside in a namespace @p PETScWrappers.
+ *
+ * These classes are only available if a PETSc installation was detected
+ * during configuration of deal.II. Refer to the README file for more details
+ * about this.
+ *
+ * @author Wolfgang Bangerth, 2004
+ *
+ * @ingroup LAC
+ */
diff --git a/doc/doxygen/headers/polynomials.h b/doc/doxygen/headers/polynomials.h
new file mode 100644
index 0000000..6b0d0fe
--- /dev/null
+++ b/doc/doxygen/headers/polynomials.h
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Polynomials Polynomials and polynomial spaces
+ *
+ * This module groups classes that define certain families of polynomial
+ * functions. In particular, this includes Lagrangian interpolation
+ * polynomials for equidistant support points and their tensor products in
+ * higher dimensions, but also more exotic ones like Brezzi-Douglas-Marini or
+ * Raviart-Thomas spaces.
+ */
diff --git a/doc/doxygen/headers/preconditioners.h b/doc/doxygen/headers/preconditioners.h
new file mode 100644
index 0000000..7b03fa5
--- /dev/null
+++ b/doc/doxygen/headers/preconditioners.h
@@ -0,0 +1,106 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup Preconditioners Preconditioners and Relaxation Operators
+ *
+ * <h3>Preconditioners</h3>
+ *
+ * Preconditioners are used to accelerate the iterative solution of linear
+ * systems. Typical preconditioners are Jacobi, Gauss-Seidel, or SSOR, but the
+ * library also supports more complex ones such as Vanka or incomplete LU
+ * decompositions (ILU). In addition, sparse direct solvers can be used as
+ * preconditioners when available.
+ *
+ * Broadly speaking, preconditioners are operators, which are multiplied with
+ * a matrix to improve conditioning. The idea is, that the preconditioned
+ * system <i>P<sup>-1</sup>Ax = P<sup>-1</sup>b</i> is much easier to solve
+ * than the original system <i>Ax = b</i>. What this means exactly depends on
+ * the structure of the matrix and cannot be discussed here in generality. For
+ * symmetric, positive definite matrices <i>A</i> and <i>P</i>, it means that
+ * the spectral condition number (the quotient of greatest and smallest
+ * eigenvalue) of <i>P<sup>-1</sup>A</i> is much smaller than the one of
+ * <i>A</i>.
+ *
+ * At hand of the simplest example, Richardson iteration, implemented in
+ * SolverRichardson, the preconditioned iteration looks like
+ * @f[
+ *  x^{k+1} = x^k - P^{-1} \bigl(A x^k - b\bigr).
+ * @f]
+ * Accordingly, preconditioning amounts to applying a linear operator to the
+ * residual, and consequently, the action of the preconditioner
+ * <i>P<sup>-1</sup></i> is implemented as <tt>vmult()</tt>.
+ * Templates in deal.II that require a preconditioner indicate the
+ * requirement with
+ * @ref ConceptPreconditionerType "the PreconditionerType concept". In
+ * practice, one can usually treat any matrix-like object which defines
+ * <code>vmult()</code> and <code>Tvmult()</code> as a preconditioner. All
+ * preconditioner classes in this module implement this interface.
+ *
+ * When used
+ * in Krylov space methods, it is up to the method, whether it simply
+ * replaces multiplications with <i>A</i> by those with
+ * <i>P<sup>-1</sup>A</i> (for instance SolverBicgstab), or does more
+ * sophisticated things. SolverCG for instance uses
+ * <i>P<sup>-1</sup></i> to define an inner product, which is the
+ * reason why it requires a symmetric, positive definite operator <i>P</i>.
+ *
+ * <h3>Relaxation methods</h3>
+ *
+ * Many preconditioners rely on an additive splitting <i>A = P - N</i>
+ * into two matrices. In this case, the iteration step of the
+ * Richardson method above can be simplified to
+ * @f[
+ *  x^{k+1} = P^{-1} \bigl(N x^k + b\bigr),
+ * @f]
+ * thus avoiding multiplication with <i>A</i> completely. We call
+ * operators mapping the previous iterate <i>x<sup>k</sup></i> to the
+ * next iterate in this way relaxation operators. Their generic
+ * interface is given by @ref ConceptRelaxationType "the RelaxationType concept".
+ * The classes with names starting with <tt>Relaxation</tt> in this module
+ * implement this interface, as well as the preconditioners
+ * PreconditionJacobi, PreconditionSOR, PreconditionBlockJacobi,
+ * PreconditionBlockSOR, and PreconditionBlockSSOR.
+ *
+ * <h3>The interface</h3>
+ *
+ * In this section, we discuss the interface preconditioners usually
+ * have to provide to work inside the deal.II library.
+ *
+ * <h4>Initialization</h4>
+ *
+ * In order to be able to be stored in containers, all preconditioners
+ * have a constructor with no arguments. Since this will typically
+ * produce a useless object, all preconditioners have a function
+ * @code
+ *   void initialize (...)
+ * @endcode
+ *
+ * This function receives the matrix to be preconditioned as well as
+ * additional required parameters and sets up the internal structures
+ * of the preconditioner.
+ *
+ * <h4>Relaxation methods</h4>
+ *
+ * Some preconditioners, like SOR and Jacobi, were used as iterative solvers
+ * long before they were used as preconditioners. Thus, they satisfy both
+ * @ref ConceptMatrixType "MatrixType" and
+ * @ref ConceptRelaxationType "RelaxationType" concepts.
+ *
+ * @ingroup LAC
+ * @ingroup Matrices
+ */
diff --git a/doc/doxygen/headers/quadrature.h b/doc/doxygen/headers/quadrature.h
new file mode 100644
index 0000000..858212c
--- /dev/null
+++ b/doc/doxygen/headers/quadrature.h
@@ -0,0 +1,101 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Quadrature Quadrature formulas
+ *
+ * This module contains the base class Quadrature as well as the quadrature
+ * formulas provided by deal.II. Quadrature formulas provide two essential
+ * pieces of data: the locations of quadrature points on the unit cell
+ * [0,1]^d, and the weight of each quadrature point.
+ *
+ * Since deal.II uses quadrilaterals and hexahedra, almost all quadrature
+ * formulas are generated as tensor products of 1-dimensional quadrature
+ * formulas defined on the unit interval [0,1], which makes their definition
+ * for the higher-dimensional case almost trivial. However, the library also
+ * allows anisotropic tensor products (more quadrature points in one
+ * coordinate direction than in another) through the QAnisotropic class, as
+ * well as the definition of quadrature formulas that are not tensor products.
+ *
+ * 
+ * <h3>Use</h3>
+ * 
+ * Quadrature formulas are used, among other uses, when integrating matrix
+ * entries and the components of the right hand side vector. To this end, the
+ * quadrature point defined on the unit cell have to be mapped to the
+ * respective locations on a real cell, and the weights have to be multiplied
+ * by the determinant of the Jacobian. This step is done by classes derived
+ * from the Mapping base class, although this is often hidden since many parts
+ * of the library fall back to using an object of type MappingQ1 if no
+ * particular mapping is provided.
+ *
+ * The next step is to evaluate shape functions and their gradients at these
+ * locations. While the classes derived from the FiniteElement base class
+ * provide a description of the shape functions on the unit cell, the actual
+ * evaluation at quadrature points and joining this with the information
+ * gotten from the mapping is done by the FEValues class and its
+ * associates. In essence, the FEValues class is therefore a view to the
+ * finite element space (defined by the FiniteElement classes) evaluated at
+ * quadrature points (provided by the Quadrature classes) mapped to locations
+ * inside cells in real, as opposed to unit, space (with the mapping provided
+ * by the Mapping classes).
+ *
+ * The FEValues class provides, as a side product, the location of the
+ * quadrature points as mapped to a real cell, for other uses as well. This
+ * can then be used, for example, to evaluate a right hand side function at
+ * these points.
+ * 
+ *
+ * <h3>QIterated</h3>
+ * 
+ * The class QIterated is used to construct an iterated quadrature formula out
+ * of an existing one, thereby increasing the accuracy of the formula without
+ * increasing the order. For example, by iterating the trapezoidal rule with
+ * points at 0 and 1 and weights 1/2 and 1/2 twice, we get a quadrature
+ * formula with points at 0, 1/2, and 1 with weights 1/4, 1/2, and 1/4,
+ * respectively. This formula is obtained by projecting the quadrature formula
+ * onto the subintervals [0,1/2] and [1/2,1], respectively, and then merging
+ * the right endpoint of the left interval with the left endpoint of the right
+ * interval. In the same way, all one-dimensional quadrature formulas can be
+ * iterated. Higher dimensional iterated formulas are generated as tensor
+ * products of one-dimensional iterated formulas.
+ *
+ *
+ * <h3>QAnisotropic</h3>
+ *
+ * While the usual quadrature formulas of higher dimensions
+ * generate tensor products which are equal in each direction, the
+ * class QAnisotropic generates tensor products of possibly different
+ * formulas in each direction.
+ *
+ *
+ * <h3>QProjector</h3>
+ *
+ * The class QProjector is not actually a quadrature rule by itself,
+ * but it provides functions for computing quadrature formulas on the
+ * surfaces of higher dimensional cells.
+ *
+ * All other classes in this module actually implement quadrature
+ * rules of different order and other characteristics.
+ *
+ *
+ * <h3>QuadratureSelector</h3>
+ *
+ * This class is used to generate a quadrature object based on a
+ * string that identifies the quadrature formula. This is useful in
+ * cases where one wants to specify a certain quadrature formula in an
+ * input file, rather than hardcode it in the program.
+ */
diff --git a/doc/doxygen/headers/slepc.h b/doc/doxygen/headers/slepc.h
new file mode 100644
index 0000000..28f4fdf
--- /dev/null
+++ b/doc/doxygen/headers/slepc.h
@@ -0,0 +1,32 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup SLEPcWrappers SLEPcWrappers
+ * 
+ * The classes in this module are wrappers around functionality
+ * provided by the SLEPc library. All classes and functions in this
+ * group reside in a namespace @p PETScWrappers.
+ *
+ * These classes are only available if a SLEPc installation and a
+ * PETSc installation was detected during configuration of
+ * deal.II. Refer to the README file for more details about this.
+ *
+ * @author Toby D. Young, 2011
+ *
+ * @ingroup LAC PETScWrappers
+ */
diff --git a/doc/doxygen/headers/solvers.h b/doc/doxygen/headers/solvers.h
new file mode 100644
index 0000000..0cd4b47
--- /dev/null
+++ b/doc/doxygen/headers/solvers.h
@@ -0,0 +1,35 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @defgroup Solvers Linear solver classes
+ *
+ * This module groups iterative and direct solvers, eigenvalue solvers, and
+ * some control classes. All these classes operate on objects of the
+ * @ref Matrices "matrix" and @ref Vectors "vector classes" defined in deal.II.
+ *
+ * In order to work properly, solvers that take matrix and vector classes as
+ * template arguments require that these classes satisfy a certain minimal
+ * interface that can be used from inside the solver. For iterative solvers,
+ * this interface is defined in the Solver class. In addition, solvers are
+ * controlled using objects of classes that are derived from the SolverControl
+ * class (for example its derived class ReductionControl), in order to
+ * determine the maximal number of iterations or a desired tolerance.
+ *
+ * If detected during configuration (see the ReadMe file), some sparse direct
+ * solvers are also supported.
+ *
+ * @ingroup LAC
+ */
diff --git a/doc/doxygen/headers/sparsity.h b/doc/doxygen/headers/sparsity.h
new file mode 100644
index 0000000..9f70f93
--- /dev/null
+++ b/doc/doxygen/headers/sparsity.h
@@ -0,0 +1,131 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Sparsity Sparsity patterns
+ *
+ * Almost all finite element formulations lead to matrices that are
+ * "sparse", i.e., for which the number of nonzero elements per row is
+ * (i) relatively small compared to the overall size of the matrix,
+ * and (ii) bounded by a fixed number that does not grow if the mesh
+ * is refined. For such cases, it is more efficient to not store
+ * <i>all</i> elements of the matrix, but only those that are actually
+ * (or may be) nonzero. This requires storing, for each row, the
+ * column indices of the nonzero entries (we call this the "sparsity
+ * pattern") as well as the actual values of these nonzero
+ * entries. (In practice, it sometimes happens that some of the
+ * nonzero values are, in fact, zero. Sparsity patterns and sparse
+ * matrices only intend to provision space for entries that <i>may</i>
+ * be nonzero, and do so at a time when we don't know yet what values
+ * these entries will ultimately have; they may have a zero value if a
+ * coefficient or cell happens to have particular values.)
+ *
+ * In deal.II, sparsity patterns are typically separated from the actual
+ * sparse matrices (with the exception of the SparseMatrixEZ class and some
+ * classes from interfaces to external libraries such as PETSc). The reason is
+ * that one often has several matrices that share the same sparsity pattern;
+ * examples include the stiffness and mass matrices necessary for time
+ * stepping schemes, or the left and right hand side matrix of generalized
+ * eigenvalue problems. It would therefore be wasteful if each of them had to
+ * store their sparsity pattern separately.
+ *
+ * Consequently, deal.II has sparsity pattern classes that matrix classes
+ * build on. There are two main groups of sparsity pattern classes, as
+ * discussed below:
+ *
+ *
+ * <h4>"Static" sparsity patterns</h4>
+ *
+ * The main sparse matrix class in deal.II, SparseMatrix, only stores a value
+ * for each matrix entry, but not where these entries are located. For this,
+ * it relies on the information it gets from a sparsity pattern object
+ * associated with this matrix. This sparsity pattern object must be of type
+ * SparsityPattern.
+ *
+ * Because matrices are large objects and because it is comparatively
+ * expensive to change them, SparsityPattern objects are built in two phases:
+ * first, in a "dynamic" phase, one allocates positions where one expects
+ * matrices built on it to have nonzero entries; in a second "static" phase,
+ * the representation of these nonzero locations is "compressed" into the
+ * usual Compressed Sparse Row (CSR) format. After this, no new nonzero
+ * locations may be added. Only after compression can a sparsity pattern be
+ * associated to a matrix, since the latter requires the efficient compressed
+ * data format of the former. Building a sparsity pattern during the dynamic
+ * phase often happens with the DoFTools::make_sparsity_pattern()
+ * function. Although this may appear a restriction, it is typically not a
+ * significant problem to first build a sparsity pattern and then to write
+ * into the matrix only in the previously allocated locations, since in finite
+ * element codes it is normally quite clear which elements of a matrix can
+ * possibly be nonzero and which are definitely zero.
+ *
+ * The advantage of this two-phase generation of a sparsity pattern is that
+ * when it is actually used with a matrix, a very efficient format is
+ * available. In particular, the locations of entries are stored in a linear
+ * array that allows for rapid access friendly to modern CPU types with deep
+ * hierarchies of caches. Consequently, the static SparsityPattern class is
+ * the only one on which deal.II's main SparseMatrix class can work.
+ *
+ * The main drawback of static sparsity patterns is that their efficient
+ * construction requires a reasonably good guess how many entries each of the
+ * rows may maximally have. During the actual construction, for example in the
+ * DoFTools::make_sparsity_pattern() function, only at most as many entries can
+ * be allocated as previously stated. This is a problem because it is often
+ * difficult to estimate the maximal number of entries per row. Consequently,
+ * a common strategy is to first build and intermediate sparsity pattern that
+ * uses a less efficient storage scheme during construction of the sparsity
+ * pattern and later copy it directly into the static, compressed form. Most
+ * tutorial programs do this, starting at step-2 (see also, for example the
+ * step-11, step-18, and step-27 tutorial programs).
+ *
+ *
+ * <h4>"Dynamic" or "compressed" sparsity patterns</h4>
+ *
+ * As explained above, it is often complicated to obtain good estimates for
+ * the maximal number of entries in each row of a sparsity
+ * pattern. Consequently, any attempts to allocate a regular SparsityPattern
+ * with bad estimates requires huge amounts of memory, almost all of which
+ * will not be used and be de-allocated upon compression.
+ *
+ * To avoid this, deal.II contains a "dynamic" or "compressed" sparsity
+ * pattern called DynamicSparsityPattern that only allocates as much memory as
+ * necessary to hold the currently added entries. While this saves much memory
+ * compared to the worst-case behavior mentioned above, it requires the use of
+ * less efficient storage schemes for insertion of elements, and the frequent
+ * allocation of memory often also takes significant compute time. The
+ * tradeoff to avoid excessive memory allocation cannot be avoided, however.
+ *
+ * The class is typically used in the following way
+ * @verbatim
+ * DynamicSparsityPattern dsp (dof_handler.n_dofs());
+ * DoFTools::make_sparsity_pattern (dof_handler,
+ *                                  dsp);
+ * constraints.condense (dsp);
+ *
+ * SparsityPattern final_sparsity_pattern;
+ * final_sparsity_pattern.copy_from (dsp);
+ * @endverbatim
+ *
+ * The intermediate, compressed sparsity pattern is directly copied into the
+ * "compressed" form of the final static pattern.
+ *
+ * <h4>Dynamic block sparsity patterns</h4>
+ *
+ * The class BlockDynamicSparsityPattern implements an array of dynamic
+ * sparsity patterns for constructing block matrices. See the documentation and
+ * step-22 for more information.
+ *
+ * @ingroup LAC
+ */
diff --git a/doc/doxygen/headers/std_cxx11.h b/doc/doxygen/headers/std_cxx11.h
new file mode 100644
index 0000000..30c4cde
--- /dev/null
+++ b/doc/doxygen/headers/std_cxx11.h
@@ -0,0 +1,32 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * A namespace that contains a selection of classes and functions that are part
+ * of the C++11 standard and that are also provided by the
+ * <a href="http://www.boost.org/">BOOST</a> library. The
+ * elements that are available through the current namespace are either
+ * imported from namespace <code>std</code> (if a compiler's library provides
+ * them) or from namespace boost.
+ *
+ * For more information on the topic,
+ * see also @ref CPP11 "C++11 standard"
+ *
+ * @ingroup CPP11
+ */
+namespace std_cxx11
+{
+}
diff --git a/doc/doxygen/headers/trilinos.h b/doc/doxygen/headers/trilinos.h
new file mode 100644
index 0000000..a38363c
--- /dev/null
+++ b/doc/doxygen/headers/trilinos.h
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup TrilinosWrappers TrilinosWrappers
+ * 
+ * The classes in this module are wrappers around functionality
+ * provided by the Trilinos library. They provide a modern
+ * object-oriented interface that is compatible with the interfaces of
+ * the other linear algebra classes in deal.II. All classes and
+ * functions in this group reside in a namespace @p TrilinosWrappers.
+ *
+ * These classes are only available if a Trilinos installation was
+ * detected during configuration of deal.II. Refer to the README file
+ * for more details about this.
+ *
+ * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+ *
+ * @ingroup LAC
+ */
diff --git a/doc/doxygen/headers/update_flags.h b/doc/doxygen/headers/update_flags.h
new file mode 100644
index 0000000..1c13573
--- /dev/null
+++ b/doc/doxygen/headers/update_flags.h
@@ -0,0 +1,256 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup UpdateFlags The interplay of UpdateFlags, Mapping, and FiniteElement in FEValues
+ * 
+ * <h2>Introduction</h2>
+ * 
+ * In order to compute local contributions of an individual cell to the global
+ * matrix and right hand side, we usually employ two techniques:
+ * - First, the integral is transformed from the actual cell $K$ to the
+ *   unit/reference cell $\hat K$. For example, for the Laplace equation, we
+ *   transform
+ *   @f[
+ *     A^K_{ij} = \int_K \nabla \varphi_i(\bf x) \cdot \nabla \varphi_j(\bf x) \; dx
+ *   @f]
+ *   into
+ *   @f[
+ *     A^K_{ij} =
+ *     \int_{\hat K}
+ *     \left[ J^{-1}(\hat{\bf x}) \hat \nabla \varphi_i(\hat{\bf x}) \right]
+ *     \cdot
+ *     \left[ J^{-1}(\hat{\bf x}) \hat \nabla \varphi_j(\hat{\bf x}) \right]
+ *     \;
+ *     |\textrm{det}\; J(\hat{\bf x})|
+ *     \;\;
+ *     d\hat x,
+ *   @f]
+ *   where a hat indicates reference coordinates, and $J(\hat{\bf
+ *   x}_q)$ is the Jacobian
+ *   $\frac{\partial \bf F_K(\hat{\bf x})}{\partial\bf \hat x}$ of the mapping
+ *   $\bf x = \bf F_K(\hat{\bf x})$.
+ * - Second, this integral is then approximated through quadrature. This yields
+ *   the formula
+ *   @f[
+ *     A^K_{ij} = \sum_{q}J^{-1}\left[(\hat{\bf x}_q) \hat \nabla \varphi_i(\hat{\bf x}_q)\right] \cdot
+ *     \left[J^{-1}(\hat{\bf x}_q) \hat \nabla \varphi_j(\hat{\bf x}_q)\right]\ |\textrm{det}\ J(\hat{\bf x}_q)|
+ *     w_q,
+ *   @f]
+ *   where $q$ indicates the index of the quadrature point, $\hat{\bf x}_q$ its
+ *   location on the reference cell, and $w_q$ its weight.
+ * 
+ * In order to evaluate such an expression in an application code, we
+ * have to access three different kinds of objects: a quadrature
+ * object that describes locations $\hat{\bf x}_q$ and weights $w_q$ of
+ * quadrature points on the reference cell; a finite element object that
+ * describes the gradients $\hat\nabla \varphi_i(\hat{\bf x}_q)$ of shape
+ * functions on the unit cell; and a mapping object that provides the
+ * Jacobian as well as its determinant. Dealing with all these
+ * objects would be cumbersome and error prone.
+ * 
+ * On the other hand, these three kinds of objects almost always appear together,
+ * and it is in fact very rare for deal.II application codes to do anything with
+ * quadrature, finite element, or mapping objects besides using them together.
+ * For this reason, deal.II uses the FEValues abstraction
+ * combining information on the shape functions, the geometry of the actual mesh
+ * cell and a quadrature rule on a reference cell. Upon construction it takes one
+ * object of each of the three mentioned categories. Later, it can be
+ * "re-initialized" for a concrete grid cell and then provides mapped quadrature
+ * points and weights, mapped shape function values and derivatives as well as
+ * some properties of the transformation from the reference cell to the actual
+ * mesh cell.
+ * 
+ * Since computation of any of these values is potentially expensive (for
+ * example when using high order mappings with high order elements), the
+ * FEValues class only computes what it is explicitly asked for. To this
+ * end, it takes a list of flags of type UpdateFlags at construction time
+ * specifying which quantities should be updated each time a cell is
+ * visited. In the case above, you want the gradients of the shape functions
+ * on the real cell, which is encoded by the flag <code>update_gradients</code>,
+ * as well as the product of the determinant of the Jacobian times the
+ * quadrature weight, which is mnemonically encoded using the
+ * term <code>JxW</code> and encoded in the flag <code>update_JxW_values</code>.
+ * Because these flags are represented by single bits in integer numbers,
+ * producing a <i>set of flags</i> amounts to setting multiple bits
+ * in an integer, which is facilitated using the operation
+ * <code>update_gradients | update_JxW_values</code> (in other words, and
+ * maybe slightly confusingly so, the operation @"this operation <i>and</i> that
+ * operation@" is represented by the expression @"single-bit-in-an-integer-for-this-operation
+ * <i>binary-or</i> single-bit-in-an-integer-for-that-operation@"). To
+ * make operations cheaper, FEValues and the mapping and finite element objects
+ * it depends on really only compute those pieces of information that you
+ * have specified in the update flags (plus some information necessary to
+ * compute what has been specified, see below), and not everything that
+ * could possibly be computed on a cell. This optimization makes it much
+ * cheaper to iterate over cells for assembly, but it also means that one
+ * should take care to provide the minimal set of flags possible.
+ *
+ * In addition, once you pass a set of flags for what you want, the functions
+ * filling the data fields of FEValues are able to distinguish between
+ * values that have to be recomputed on each cell (for example mapped
+ * gradients) and quantities that do not change from cell to cell (for
+ * example the values of shape functions of the usual $Q_p$
+ * finite elements at the same quadrature points on different cells; this
+ * property does not hold for the shape functions of Raviart-Thomas
+ * elements, however, which must be rotated with the local cell).
+ * This allows further optimization of the computations underlying assembly.
+ * 
+ *
+ * <h2> Tracking dependencies </h2>
+ * 
+ * Let's say you want to compute the Laplace matrix as shown above. In that
+ * case, you need to specify the <code>update_gradients</code> flag
+ * (for $\nabla\varphi_i(\bf x_q)$) and the <code>update_JxW_values</code>
+ * flag (for computing $|\textrm{det}\; J(\bf x_q)|w_q$). Internally, however,
+ * the finite element requires the computation of the inverse of the full
+ * Jacobian matrix, $J^{-1}(\bf x_q)$ (and not just the determinant of the matrix),
+ * and to compute the inverse of the Jacobian, it is also necessary to compute
+ * the Jacobian matrix first.
+ * 
+ * Since these are requirements that are not important to the user, it
+ * is not necessary to specify this in user code. Rather, given a set
+ * of update flags, the FEValues object first asks the finite element
+ * object what information it needs to compute in order to satisfy the
+ * user's request provided in the update flags. The finite element
+ * object may therefore add other flags to the update flags (e.g., in
+ * the example above, an FE_Q object would add
+ * <code>update_covariant_transformation</code> to the list, since
+ * that is the necessary transformation from
+ * $\hat\nabla\hat\varphi_i(\hat{\bf x}_q)$ to $\nabla\varphi_i(\bf
+ * x_q)$). With these updated flags, FEValues then asks the mapping
+ * whether it also wants to add more flags to the list to satisfy the
+ * needs of both the user and the finite element object, by calling
+ * Mapping::requires_update_flags(). (This procedure of first asking
+ * the finite element and then the mapping does not have to be
+ * iterated because mappings never require information computed by the
+ * finite element classes, while finite element classes typically need
+ * information computed by mappings.) Using this final list, the
+ * FEValues object then asks both the finite element object and
+ * mapping object to create temporary structures into which they can
+ * store some temporary information that can be computed once and for
+ * all, and these flags will be used when re-computing data on each
+ * cell we will visit later on.
+ * 
+ * 
+ * <h2>Update once or each</h2>
+ * 
+ * As outlined above, we have now determined the final set of things that are
+ * necessary to satisfy a user's desired pieces of information as conveyed by
+ * the update flags she provided. This information will then typically be queried
+ * on every cell the user code visits in a subsequent integration loop.
+ * 
+ * Given that many of the things mappings or finite element classes compute are
+ * potentially expensive, FEValues employs a system whereby mappings and finite
+ * element objects are encouraged to pre-compute information that can be computed
+ * once without reference to a concrete cell, and make use of this when asked
+ * to visit a particular cell of the mesh. An example is that the values of
+ * the shape functions of the common FE_Q element are defined on the reference
+ * cell, and the values on the actual cell are simply exactly the values on
+ * the reference cell -- there is consequently no need to evaluate shape functions
+ * on every cell, but it is sufficient to do this once at the beginning, store
+ * the values somewhere, and when visiting a concrete cell simply copying these
+ * values from their temporary location to the output structure. (Note, however,
+ * that this is specific to the FE_Q element: this is not the case if we used
+ * a FE_RaviartThomas element, since there,
+ * computing the values of the shape functions on a cell involves knowing the
+ * Jacobian of the mapping which depends on the geometry of the cell we visit;
+ * thus, for this element, simply copying pre-computed information is not
+ * sufficient to evaluate the values of shape functions on a particular cell.)
+ * 
+ * To accommodate this structure, both mappings and finite element classes
+ * may internally split the update flags into two sets commonly referenced as
+ * <code>update_once</code> and <code>update_each</code>. The former contains
+ * all those pieces of information that can be pre-computed once at the
+ * time the FEValues object starts to interact with a mapping or
+ * finite element, whereas the latter contains those flags corresponding to
+ * things that need to be computed on every cell. For example, if
+ * <code>update_flags=update_values</code>, then the FE_Q class will
+ * set <code>update_once=update_values</code> and
+ * <code>update_each=0</code>, whereas the Raviart-Thomas element will
+ * do it the other way around.
+ * 
+ * These sets of flags are intended to be mutually exclusive. There is,
+ * on the other hand, nothing that ever provides this decomposition to
+ * anything outside the mapping or finite element classes -- it is a purely
+ * internal decomposition.
+ * 
+ * 
+ * <h2>Generation of the actual data</h2>
+ * 
+ * As outlined above, data is computed at two different times: once at
+ * the beginning on the reference cell, and once whenever we move to an
+ * actual cell. The functions involved in each of these steps are
+ * discussed next:
+ * 
+ * 
+ * <h3>Initialization</h3>
+ * 
+ * Computing data on the reference cell before we even visit the first
+ * real cell is a two-step process. First, the constructor of FEValues,
+ * FEFaceValues and FESubfaceValues, respectively, need to allow the
+ * Mapping and FiniteElement objects to set up internal data
+ * structures. These structures are internal in the following sense: the
+ * FEValues object asks the finite element and mapping objects to create
+ * an object of type FiniteElement::InternalDataBase and
+ * Mapping::InternalDataBase each; the actual finite element and mapping
+ * class may in fact create objects of a derived type if they wish to
+ * store some data beyond what these base classes already provide. The
+ * functions involved in this are
+ * <ul>
+ * <li>Mapping::get_data()
+ * <li>Mapping::get_face_data()
+ * <li>Mapping::get_subface_data()
+ * <li>FiniteElement::get_data()
+ * <li>FiniteElement::get_face_data()
+ * <li>FiniteElement::get_subface_data()
+ * </ul>
+ * 
+ * The FEValues object then takes over ownership of these objects and will
+ * destroy them at the end of the FEValues object's lifetime. After this,
+ * the FEValues object asks the FiniteElement and Mapping objects to fill
+ * these InternalDataBase objects with the data that pertains to what
+ * can and needs to be computed on the reference cell. This is done in these
+ * functions:
+ * <ul>
+ * <li>FEValues::initialize()
+ * <li>FEFaceValues::initialize()
+ * <li>FESubfaceValues::initialize()
+ * </ul>
+ * 
+ * 
+ * <h3>Reinitialization for a mesh cell</h3>
+ * 
+ * Once initialization is over and we call FEValues::reinit, FEFaceValues::reinit
+ * or FESubfaceValues::reinit to move to a concrete cell or face, we need
+ * to calculate the update_each kinds of data. This done in the following
+ * functions:
+ * <ul>
+ * <li>FEValues::reinit() calls Mapping::fill_fe_values(), then FiniteElement::fill_fe_values()
+ * <li>FEFaceValues::reinit() calls Mapping::fill_fe_face_values(), then FiniteElement::fill_fe_face_values()
+ * <li>FESubfaceValues::reinit() calls Mapping::fill_fe_subface_values(),
+ * thenFiniteElement::fill_fe_subface_values()
+ * </ul>
+ * 
+ * This is, where the actual data fields for FEValues, stored in
+ * internal::FEValues::MappingRelatedData and
+ * internal::FEValues::FiniteElementRelatedData objects, are
+ * computed. These functions call the function in Mapping first, such
+ * that all the mapping data required by the finite element is
+ * available. Then, the FiniteElement function is called.
+ * 
+ * @ingroup feall
+ */
diff --git a/doc/doxygen/headers/utilities.h b/doc/doxygen/headers/utilities.h
new file mode 100644
index 0000000..179f865
--- /dev/null
+++ b/doc/doxygen/headers/utilities.h
@@ -0,0 +1,42 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup utilities Utility functions and classes
+ *
+ * This module simply collects a number of functions and classes that provide
+ * general tools for tasks that do not usually have much to do with finite
+ * element programs in particular, but happen to be required there just as
+ * well.
+ */
+
+
+/**
+ * @defgroup data Data storage primitives
+ *
+ * Here are a few simple classes that help in storage and viewing data. For
+ * example, the Table templates allow to use not only arrays of objects (for
+ * which one might want to use the std::vector class), but also
+ * two-dimensional (rectangular) tables of arbitrary objects, as well as
+ * higher-order analogs up to tables with (presently) seven indices.
+ *
+ * Similarly, the VectorSlice function is a primitive that takes anything that
+ * has an interface that resembles a vector (for example the deal.II Vector or
+ * the std::vector classes) and presents a view on it as if it were a vector
+ * in itself.
+ *
+ * @ingroup utilities
+ */
diff --git a/doc/doxygen/headers/vector_memory.h b/doc/doxygen/headers/vector_memory.h
new file mode 100644
index 0000000..c1026d5
--- /dev/null
+++ b/doc/doxygen/headers/vector_memory.h
@@ -0,0 +1,30 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup VMemory Vector memory management
+ *
+ * This module groups a few classes that are used to avoid allocating and
+ * deallocating vectors over and over in iterative procedures. These methods
+ * all use an object of the base class VectorMemory to get their auxiliary
+ * vectors.
+ *
+ * Some discussion on this topic can be found in the discussion of the
+ * InverseMatrix class in step-20.
+ *
+ * @ingroup LAC
+ */
diff --git a/doc/doxygen/headers/vector_valued.h b/doc/doxygen/headers/vector_valued.h
new file mode 100644
index 0000000..b3e9f6b
--- /dev/null
+++ b/doc/doxygen/headers/vector_valued.h
@@ -0,0 +1,1109 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+/**
+ * @defgroup vector_valued Handling vector valued problems
+ *
+ *
+ * Vector-valued problems are systems of partial differential
+ * equations. These are problems where the
+ * solution variable is not a scalar function, but a vector-valued function or
+ * a set of functions. This includes, for example:
+ * <ul>
+ *   <li>The elasticity equation discussed in step-8,
+ *       step-17, and step-18 in which the
+ *       solution is the vector-valued displacement at each point.
+ *   <li>The mixed Laplace equation and its extensions discussed in
+ *       step-20, and step-21 in which the
+ *       solution is the scalar pressure and the vector-valued velocity
+ *       at each point.
+ *   <li>The Stokes equation and its extensions discussed in
+ *       step-22, and step-31 in which again
+ *       the solution is the scalar pressure and the vector-valued velocity
+ *       at each point.
+ *   <li>Complex-valued solutions consisting of real and imaginary parts, as
+ *       discussed for example in step-29.
+ * </ul>
+ *
+ * This page gives an overview of how to implement such vector-valued problems
+ * easily in deal.II. In particular, it explains the usage of the class
+ * FESystem, which allows us to write code for systems of partial
+ * differential very much like we write code for single equations.
+ *
+ * @dealiiVideoLecture{19,20,21}
+ *
+ * <table class="tutorial" width="50%">
+ * <tr><th><b>%Table of contents</b></th></tr>
+ * <tr><td width="100%" valign="top">
+ * <ol>
+ *  <li> @ref VVExamples "Examples of vector-valued problems"
+ *  <li> @ref VVFEs "Describing finite element spaces"
+ *  <li> @ref VVAssembling "Assembling linear systems"
+ *  <li> @ref VVAlternative "An alternative approach"
+ *  <li> @ref VVBlockSolvers "Block solvers"
+ *  <li> @ref VVExtracting "Extracting data from solutions"
+ *  <li> @ref VVOutput "Generating graphical output"
+ * </ol> </td> </tr> </table>
+ *
+ *
+ *
+ * @anchor VVExamples
+ * <h3>Examples of vector-valued problems</h3>
+ *
+ * The way one deals systematically with vector-valued problems is not
+ * fundamentally different from scalar problems: first, we need a weak
+ * (variational) formulation of the problem that takes into account
+ * all the solution variables. After we did so, generating the system
+ * matrix and solving the linear system follows the same outlines that
+ * we are used to already.
+ *
+ * <h4>Linear elasticity</h4>
+ *
+ * Let us take for example the elasticity problem from step-8 and even
+ * simplify it by choosing $\lambda = 0$ and $\mu = 1$ to highlight the important concepts. Therefore,
+ * let consider the following weak formulation: find $\mathbf u \in
+ * \mathbf V = H^1_0(\Omega; \mathbb R^3)$ such that for all $\mathbf
+ * v\in V$ holds
+ * @f[
+ * a(u,v) \equiv 2\int_{\Omega} \mathbf D\mathbf u : \mathbf D\mathbf
+ * v\,dx = \int_\Omega \mathbf f\cdot \mathbf v \,dx.
+ * @f]
+ * Here, <b>D</b> denotes the symmetric gradient defined by
+ * $\mathbf Du = \tfrac12 (\nabla \mathbf u + (\nabla \mathbf u)^T)$
+ * and the colon indicates double contraction of two tensors of rank 2
+ * (the Frobenius inner product). This bilinear form looks indeed very
+ * much like the bilinear form of the Poisson problem in step-3. The
+ * only differences are
+ * <ol>
+ * <li>We replaced the gradient operator by the symmetric gradient;
+ * this is actually not a significant difference, and everything said
+ * here is true if your replace $\mathbf D$ by $\nabla$. Indeed, let
+ * us do this to simplify the discussion:
+ * @f[
+ * a(u,v) \equiv \int_{\Omega} \nabla\mathbf u : \nabla\mathbf
+ * v\,dx = \int_\Omega \mathbf f\cdot \mathbf v \,dx.
+ * @f]
+ * Note though, that this system is not very exciting, since we could
+ * solve for the three components of <b>u</b> separately.
+ *
+ * <li> The trial and test functions are now from the space
+ * $H^1_0(\Omega; \mathbb R^3)$, which can be viewed as three copies
+ * of the scalar space $H^1_0(\Omega)$. And this is exactly, how we
+ * are going to implement this space below, using FESystem.
+ * </ol>
+ *
+ * But for now, let us look at the system a little more
+ * closely. First, let us exploit that
+ * <b>u</b>=(<i>u</i><sub>1</sub>,<i>u</i><sub>2</sub>,<i>u</i><sub>3</sub>)<sup>T</sup>
+ * and <b>v</b> accordingly. Then, we can write the simplified equation in
+ * coordinates as
+ * @f[
+ * a(u,v) = \int_\Omega \bigl(\nabla u_1\cdot \nabla v_1
+ +\nabla u_2\cdot \nabla v_2+\nabla u_3\cdot \nabla v_3\bigr)\,dx
+ = \int_\Omega \bigl(f_1v_1 + f_2 v_2 + f_3 v_3\bigr)\,dx.
+ * @f]
+ * We see, that this is just three copies of the bilinear form of the
+ * Laplacian, one applied to each component (this is where the
+ * formulation with the $\mathbf D$ is more exciting, and we want to
+ * derive a framework that applies to that one as well). We can make
+ * this weak form a system of differential equations again by choosing
+ * special test functions: first, choose
+ * <b>v</b>=(<i>v</i><sub>1</sub>,0,0)<sup>T</sup>, then
+ * <b>v</b>=(0,<i>v</i><sub>2</sub>,0)<sup>T</sup>, and finally
+ * <b>v</b>=(0,0,<i>v</i><sub>3</sub>)<sup>T</sup>. writing the outcomes below
+ * each other, we obtain the system
+ * @f[
+ * \begin{matrix}
+ * (\nabla u_1,\nabla v_1) &&& = (f_1, v_1)
+ * \\
+ * & (\nabla u_2,\nabla v_2) && = (f_2, v_2)
+ * \\
+ * && (\nabla u_3,\nabla v_3) & = (f_3, v_3)
+ * \end{matrix}
+ * @f]
+ * where we used the standard inner product notation $(\mathbf
+ * f,\mathbf g) =
+ * \int_\Omega \mathbf f \cdot \mathbf g \,dx$. It is important for our understanding, that
+ * we keep in mind that the latter form as a system of PDE is
+ * completely equivalent to the original definition of the bilinear
+ * form <i>a</i>(<i>u</i>,<i>v</i>), which does not immediately
+ * exhibit this system structure. Let us close by writing the full
+ * system of the elastic equation with symmetric gradient <b>D</b>:
+ * @f[
+ * \begin{matrix}
+ * (\nabla u_1,\nabla v_1) + (\partial_1 u_1,\partial_1 v_1)
+ * & (\partial_1 u_2,\partial_2 v_1)
+ * & (\partial_1 u_3,\partial_3 v_1)
+ * & = (f_1, v_1)
+ * \\
+ * (\partial_2 u_1,\partial_1 v_2)
+ * & (\nabla u_2,\nabla v_2) + (\partial_2 u_2,\partial_2 v_2)
+ * & (\partial_2 u_3,\partial_3 v_2)
+ * & = (f_2, v_2)
+ * \\
+ * (\partial_3 u_1,\partial_1 v_3)
+ * & (\partial_3 u_2,\partial_2 v_3)
+ * & (\nabla u_3,\nabla v_3) + (\partial_3 u_3,\partial_3 v_3)
+ * & = (f_3, v_3)
+ * \end{matrix}.
+ * @f]
+ * Very formally, if we believe in operator valued matrices, we can
+ * rewrite this in the form <b>v</b><sup>T</sup><b>Au</b> = <b>v</b><sup>T</sup><b>f</b> or
+ * @f[
+ * \begin{pmatrix}
+ * v_1 \\ v_2 \\ v_3
+ * \end{pmatrix}^T
+ * \begin{pmatrix}
+ * (\nabla \cdot,\nabla \cdot) + (\partial_1 \cdot,\partial_1 \cdot)
+ * & (\partial_1 \cdot,\partial_2 \cdot)
+ * & (\partial_1 \cdot,\partial_3 \cdot)
+ * \\
+ * (\partial_2 \cdot,\partial_1 \cdot)
+ * & (\nabla \cdot,\nabla \cdot) + (\partial_2 \cdot,\partial_2 \cdot)
+ * & (\partial_2 \cdot,\partial_3 \cdot)
+ * \\
+ * (\partial_3 \cdot,\partial_1 \cdot)
+ * & (\partial_3 \cdot,\partial_2 \cdot)
+ * & (\nabla \cdot,\nabla \cdot) + (\partial_3 \cdot,\partial_3 \cdot)
+ * \end{pmatrix}
+ * \begin{pmatrix}
+ * u_1 \\ u_2 \\ u_3
+ * \end{pmatrix}
+ * =
+ * \begin{pmatrix}
+ * v_1 \\ v_2 \\ v_3
+ * \end{pmatrix}^T
+ * \begin{pmatrix} f_1 \\ f_2 \\ f_3\end{pmatrix}
+ * @f]
+ *
+ * <h4>Mixed elliptic problems</h4>
+ * Now, let us
+ * consider a more complex example, the mixed Laplace equations discussed in
+ * step-20 in three dimensions:
+ at f{eqnarray*}
+  \textbf{u} + \nabla p &=& 0,
+  \\
+  -\textrm{div}\; \textbf{u} &=& f,
+ at f}
+ *
+ * Here, we have four solution components: the scalar pressure
+ * $p \in L^2(\Omega)$ and the vector-valued velocity $\mathbf u \in
+ * \mathbf V
+ * = H^{\text{div}}_0(\Omega)$ with three vector
+ * components. Note as important difference to the previous example,
+ * that the vector space <b>V</b> is not just simply a copy of three
+ * identical spaces/
+ *
+ * A systematic way to get a weak or variational form for this and other
+ * vector problems is to first consider it as a problem where the operators
+ * and solution variables are written in vector and matrix form. For the
+ * example, this would read as follows:
+ at f{eqnarray*}
+  \left(
+  \begin{array}{cc} \mathbf 1 & \nabla \\ -\nabla^T & 0 \end{array}
+  \right)
+  \left(
+  \begin{array}{c} \mathbf u \\ p \end{array}
+  \right)
+  =
+  \left(
+  \begin{array}{c} \mathbf 0 \\ f \end{array}
+  \right)
+ at f}
+ *
+ * This makes it clear that the solution
+ at f{eqnarray*}
+  U =
+  \left(
+  \begin{array}{c} \mathbf u \\ p \end{array}
+  \right)
+ at f}
+ * indeed has four components. We note that we could change the
+ * ordering of the solution components $\textbf u$ and $p$ inside $U$ if we
+ * also change columns of the matrix operator.
+ *
+ * Next, we need to think about test functions $V$. We want to multiply both
+ * sides of the equation with them, then integrate over $\Omega$. The result
+ * should be a scalar equality. We can achieve this by choosing $V$ also
+ * vector valued as
+ at f{eqnarray*}
+  V =
+  \left(
+  \begin{array}{c} \mathbf v \\ q \end{array}
+  \right).
+ at f}
+ *
+ * It is convenient to multiply the matrix-vector equation by the test
+ * function from the left, since this way we automatically get the correct
+ * matrix later on (in the linear system, the matrix is also multiplied from
+ * the right with the solution variable, not from the left), whereas if we
+ * multiplied from the right then the matrix so assembled is the transpose of
+ * the one we really want.
+ *
+ * With this in mind, let us multiply by $V$ and integrate to get the
+ * following equation which has to hold for all test functions $V$:
+ at f{eqnarray*}
+  \int_\Omega
+  \left(
+  \begin{array}{c} \mathbf v \\ q \end{array}
+  \right)^T
+  \left(
+  \begin{array}{cc} \mathbf 1 & \nabla \\ -\nabla^T & 0 \end{array}
+  \right)
+  \left(
+  \begin{array}{c} \mathbf u \\ p \end{array}
+  \right)
+  \ dx
+  =
+  \int_\Omega
+  \left(
+  \begin{array}{c} \mathbf v \\ q \end{array}
+  \right)^T
+  \left(
+  \begin{array}{c} \mathbf 0 \\ f \end{array}
+  \right)
+  \ dx,
+ at f}
+ * or equivalently:
+ at f{eqnarray*}
+  (\mathbf v, \mathbf u)
+  +
+  (\mathbf v, \nabla p)
+  -
+  (q, \mathrm{div}\ \mathbf u)
+  =
+  (q,f),
+ at f}
+*
+* We get the final form by integrating by part the second term:
+ at f{eqnarray*}
+  (\mathbf v, \mathbf u)
+  -
+  (\mathrm{div}\ \mathbf v, p)
+  -
+  (q, \mathrm{div}\ \mathbf u)
+  =
+  (q,f) - (\mathbf n\cdot\mathbf v, p)_{\partial\Omega}.
+ at f}
+ *
+ * It is this form that we will later use in assembling the discrete weak form
+ * into a matrix and a right hand side vector: the form in which we have
+ * solution and test functions $U,V$ that each consist of a number of vector
+ * components that we can extract.
+ *
+ *
+ * @anchor VVFEs
+ * <h3>Describing finite element spaces</h3>
+ *
+ * Once we have settled on a bilinear form and a functional setting, we need to find a way to describe
+ * the vector-valued finite element spaces from which we draw solution and test
+ * functions. This is where the FESystem class comes in: it composes
+ * vector-valued finite element spaces from simpler ones.
+ * In the example of the elasticity problem, we need <code>dim</code>
+ * copies of the same element, for instance
+ * @code
+ *   FESystem<dim> elasticity_element (FE_Q<dim>(1), dim);
+ * @endcode
+ * This will generate a vector valued space of dimension
+ * <code>dim</code>, where each component is a
+ * continuous bilinear element of type FE_Q. It will have <code>dim</code> times
+ * as many basis functions as the corresponding FE_Q, and each of
+ * these basis functions is a basis function of FE_Q, lifted into one
+ * of the components of the vector.
+ *
+ * For the mixed Laplacian, the situation is more complex. First, we
+ * have to settle on a pair of discrete spaces $\mathbf V_h \times Q_h
+ * \subset H^{\text{div}}_0(\Omega) \times L^2_0(\Omega)$. One option
+ * would be the stable Raviart-Thomas pair
+ * @code
+ *   FESystem<dim> rt_element (FE_RaviartThomas<dim>(1), 1,
+ *                             FE_DGQ<dim>(1),          1);
+ * @endcode
+ * The first element in this system is already a vector valued
+ * element of dimension <code>dim</code>, while the second is a
+ * regular scalar element.
+ *
+ * Alternatively to using the stable Raviart-Thomas pair, we could
+ * consider a stabilized formulation for the mixed Laplacian, for
+ * instance the LDG method. There, we have the option of using the
+ * same spaces for velocity components and pressure, namely
+ * @code
+ *   FESystem<dim> ldg_convoluted_element_1 (FE_DGQ<dim>(1), dim+1);
+ * @endcode
+ * This system just has <code>dim+1</code> equal copies of the same
+ * discontinuous element, which not really reflects the structure of
+ * the system. Therefore, we prefer
+ * @code
+ *   FESystem<dim> ldg_equal_element (FESystem<dim>(FE_DGQ<dim>(1), dim), 1,
+ *                                    FE_DGQ<dim>(1),                     1);
+ * @endcode
+ * Here, we have a system of two elements, one vector-valued and one
+ * scalar, very much like with the <code>rt_element</code>. Indeed, in
+ * many codes, the two can be interchanged. This element also allows
+ * us easily to switch to an LDG method with lower order approximation
+ * in the velocity, namely
+ * @code
+ *   FESystem<dim> ldg_unequal_element (FESystem<dim>(FE_DGQ<dim>(1), dim), 1,
+ *                                      FE_DGQ<dim>(2),                     1);
+ * @endcode
+ * It must be pointed out,
+ * that this element is different from
+ * @code
+ *   FESystem<dim> ldg_convoluted_element_2 (FE_DGQ<dim>(1), dim,
+ *                                           FE_DGQ<dim>(2), 1);
+ * @endcode
+ * While the constructor call is very similar to
+ * <code>rt_element</code>, the result actually resembles more
+ * <code>ldg_convoluted_element_1</code> in that this element produces
+ * <code>dim+1</code> independent components.
+ * A more detailed comparison of the resulting FESystem objects is below.
+ *
+ * <h4>Internal structure of FESystem</h4>
+ *
+ * FESystem has a few internal variables which reflect the internal
+ * structure set up by the constructor. These can then also be used by
+ * application programs to give structure to matrix assembling and
+ * linear algebra. We give the names and values of these variables for
+ * the examples above in the following table:
+ * <table border="1">
+ * <tr><th>System Element</th>
+ * <th>FiniteElementData::n_blocks()</th>
+ * <th>FiniteElementData::n_components()</th>
+ * <th>FiniteElement::n_base_elements()</th>
+ * </tr>
+ * <tr><td><code>elasticity_element</code></td><td><code>dim</code></td><td><code>dim</code></td><td>1</td>
+ * </tr>
+ * <tr><td><code>rt_element</code></td><td>2</td><td><code>dim+1</code></td><td>2</td>
+ * </tr>
+ * <tr><td><code>ldg_equal_element</code></td><td>2</td><td><code>dim+1</code></td><td>2</td>
+ * </tr>
+ * <tr><td><code>ldg_convoluted_element_1</code></td><td><code>dim+1</code></td><td><code>dim+1</code></td><td>1</td>
+ * </tr>
+ * <tr><td><code>ldg_convoluted_element_2</code></td><td><code>dim+1</code></td><td><code>dim+1</code></td><td>2</td>
+ * </tr>
+ * </table>
+ *
+ * From this table, it is clear that the FESystem reflects a lot of
+ * the structure of the system of differential equations in the cases
+ * of the <code>rt_element</code> and the
+ * <code>ldg_equal_element</code>, in that we have a vector valued and
+ * a scalar variable. On the other hand, the convoluted elements do
+ * not have this structure and we have to reconstruct it somehow when
+ * assembling systems, as described below.
+ *
+ * At this point, it is important to note that nesting of two FESystem
+ * object can give the whole FESystem a richer structure than just
+ * concatenating them. This structure can be exploited by application
+ * programs, but is not automatically so.
+ *
+ * @anchor VVAssembling
+ * <h3>Assembling linear systems</h3>
+ * The next step is to assemble the linear system. How to do this for the
+ * simple case of a scalar problem has been shown in many tutorial programs,
+ * starting with step-3. Here we will show how to do it for
+ * vector problems. Corresponding to the different characterizations
+ * of weak formulations above and the different system elements
+ * created, we have a few options which are outlined below.
+ *
+ * The whole concept is probably best explained by showing an example
+ * illustrating how the local contribution of a cell to the weak form of above
+ * mixed Laplace equations could be assembled.
+ *
+ * <h4>A single FEValues and FEValuesExtractors</h4>
+ * This is essentially how
+ * step-20 does it:
+ * @code
+  const FEValuesExtractors::Vector velocities (0);
+  const FEValuesExtractors::Scalar pressure (dim);
+
+  ...
+
+  typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+      local_matrix = 0;
+      local_rhs = 0;
+
+      right_hand_side.value_list (fe_values.get_quadrature_points(),
+                                  rhs_values);
+
+      for (unsigned int q=0; q<n_q_points; ++q)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              local_matrix(i,j) += (fe_values[velocities].value (i, q) *
+                                    fe_values[velocities].value (j, q)
+                                    -
+                                    fe_values[velocities].divergence (i, q) *
+                                    fe_values[pressure].value (j, q)
+                                    -
+                                    fe_values[pressure].value (i, q) *
+                                    fe_values[velocities].divergence (j, q)) *
+                                    fe_values.JxW(q);
+
+            local_rhs(i) += fe_values[pressure].value (i, q)
+                            rhs_values[q] *
+                            fe_values.JxW(q);
+          }
+ * @endcode
+ *
+ * So here's what is happening:
+ * <ul>
+ *   <li> The first thing we do is to declare "extractors" (see the
+ *        FEValuesExtractors namespace). These are objects that don't
+ *        do much except store which components of a vector-valued finite
+ *        element constitute a single scalar component, or a tensor of
+ *        rank 1 (i.e. what we call a "physical vector", always consisting
+ *        of <code>dim</code> components). Here, we declare
+ *        an object that represents the velocities consisting of
+ *        <code>dim</code> components starting at component zero, and the
+ *        extractor for the pressure, which is a scalar component at
+ *        position <code>dim</code>.
+ *
+ *   <li> We then do our usual loop over all cells, shape functions, and
+ *        quadrature points. In the innermost loop, we compute the local
+ *        contribution of a pair of shape functions to the global matrix
+ *        and right hand side vector. Recall that the cell contributions
+ *        to the bilinear form (i.e. neglecting boundary terms) looked as
+ *        follows, based on shape functions
+ *        $V_i=\left(\begin{array}{c}\mathbf v_i \\ q_i\end{array}\right),
+ *         V_j=\left(\begin{array}{c}\mathbf v_j \\ q_j\end{array}\right)$:
+          @f{eqnarray*}
+            (\mathbf v_i, \mathbf v_j)
+            -
+            (\mathrm{div}\ \mathbf v_i, q_j)
+            -
+            (q_i, \mathrm{div}\ \mathbf v_j)
+          @f}
+ *        whereas the implementation looked like this:
+ *        @code
+              local_matrix(i,j) += (fe_values[velocities].value (i, q) *
+                                    fe_values[velocities].value (j, q)
+                                    -
+                                    fe_values[velocities].divergence (i, q) *
+                                    fe_values[pressure].value (j, q)
+                                    -
+                                    fe_values[pressure].value (i, q) *
+                                    fe_values[velocities].divergence (j, q)
+                                   ) *
+                                   fe_values.JxW(q);
+ *        @endcode
+ *        The similarities are pretty obvious.
+ *
+ *   <li> Essentially, what happens in above code is this: when you do
+ *        <code>fe_values[pressure]</code>, a so-called "view" is created, i.e.
+ *        an object that unlike the full FEValues object represents not all
+ *        components of a finite element, but only the one(s) represented by
+ *        the extractor object <code>pressure</code> or
+ *        <code>velocities</code>.
+ *
+ *   <li> These views can then be asked for information about these individual
+ *        components. For example, when you write
+ *        <code>fe_values[pressure].value(i,q)</code> you get the
+ *        value of the pressure component of the $i$th shape function $V_i$ at
+ *        the $q$th quadrature point. Because the extractor
+ *        <code>pressure</code> represents a scalar component, the results of
+ *        the operator <code>fe_values[pressure].value(i,q)</code> is a scalar
+ *        number. On the other hand, the call
+ *        <code>fe_values[velocities].value(i,q)</code> would produce the
+ *        value of a whole set of <code>dim</code> components, which would
+ *        be of type <code>Tensor@<1,dim@></code>.
+ *
+ *   <li> Other things that can be done with views is to ask for the gradient
+ *        of a particular shape function's components described by an
+ *        extractor. For example, <code>fe_values[pressure].gradient(i,q)</code>
+ *        would represent the gradient of the scalar pressure component, which
+ *        is of type <code>Tensor@<1,dim@></code>, whereas the gradient of the
+ *        velocities components, <code>fe_values[velocities].gradient(i,q)</code>
+ *        is a <code>Tensor@<2,dim@></code>, i.e. a matrix $G_{ij}$ that consits
+ *        of entries $G_{ij}=\frac{\partial\phi_i}{\partial x_j}$. Finally,
+ *        both scalar and vector views can be asked for the second derivatives
+ *        ("Hessians") and vector views can be asked for the symmetric gradient,
+ *        defined as $S_{ij}=\frac 12 \left[\frac{\partial\phi_i}{\partial x_j}
+ *        + \frac{\partial\phi_j}{\partial x_i}\right]$ as well as the
+ *        divergence $\sum_{d=0}^{dim-1} \frac{\partial\phi_d}{\partial x_d}$.
+ * </ul>
+ * Other examples of using extractors and views are shown in tutorial programs
+ * step-21,
+ * step-22,
+ * step-31 and several other programs.
+ *
+ * @note In the current context, when we talk about a vector (for example in
+ * extracting the velocity components above), we mean the word in the sense
+ * physics uses it: it has <code>spacedim</code> components that behave in
+ * specific ways under coordinate system transformations. Examples include
+ * velocity or displacement fields. This is opposed to how mathematics uses
+ * the word "vector" (and how we use this word in other contexts in the
+ * library, for example in the Vector class), where it really stands for a
+ * collection of numbers. An example of this latter use of the word could be
+ * the set of concentrations of chemical species in a flame; however, these
+ * are really just a collection of scalar variables, since they do not change
+ * if the coordinate system is rotated, unlike the components of a velocity
+ * vector, and consequently, this FEValuesExtractors::Vector class should not
+ * be used for this case.
+ *
+ *
+ * @anchor VVAlternative
+ * <h3>An alternative approach</h3>
+ *
+ * There are situations in which one can optimize the assembly of a matrix or
+ * right hand side vector a bit, using knowledge of the finite element in
+ * use. Consider, for example, the bilinear form of the elasticity equations
+ * which we are concerned with first in step-8:
+ *
+ at f[
+  a({\mathbf u}, {\mathbf v}) =
+  \left(
+    \lambda \nabla\cdot {\mathbf u}, \nabla\cdot {\mathbf v}
+  \right)_\Omega
+  +
+  \sum_{i,j}
+  \left(
+    \mu \partial_i u_j, \partial_i v_j
+  \right)_\Omega,
+  +
+  \sum_{i,j}
+  \left(
+    \mu \partial_i u_j, \partial_j v_i
+  \right)_\Omega,
+ at f]
+ *
+ * Here, $\mathbf u$ is a vector function with <code>dim</code> components,
+ * $\mathbf v$ the corresponding test function, and $\lambda,\mu$ are material
+ * parameters. Given our discussions above, the obvious way to implement this
+ * bilinear form would be as follows, using an extractor object that interprets
+ * all <code>dim</code> components of the finite element as single vector,
+ * rather than disjoint scalar components:
+ *
+ * @code
+      const FEValuesExtractors::Vector displacements (0);
+
+      ...
+
+      for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            const Tensor<2,dim> phi_i_grad
+              = fe_values[displacements].gradient (i,q_point);
+            const double phi_i_div
+              = fe_values[displacements].divergence (i,q_point);
+
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+                const Tensor<2,dim> phi_j_grad
+                  = fe_values[displacements].gradient (j,q_point);
+                const double phi_j_div
+                  = fe_values[displacements].divergence (j,q_point);
+
+                cell_matrix(i,j)
+                  +=  (lambda_values[q_point] *
+                       phi_i_div * phi_j_div
+                       +
+                       mu_values[q_point] *
+                       double_contract(phi_i_grad, phi_j_grad)
+                       +
+                       mu_values[q_point] *
+                       double_contract(phi_i_grad, transpose(phi_j_grad))
+                      ) *
+                      fe_values.JxW(q_point);
+              }
+          }
+ * @endcode
+ *
+ * Now, this is not the code used in step-8. In fact,
+ * if one used the above code over the one implemented in that program,
+ * it would run about 8 per cent slower. It can be improved (bringing
+ * down the penalty to about 4 per cent) by taking a close look at the
+ * bilinear form. In fact, we can transform it as follows:
+ at f{eqnarray*}
+  a({\mathbf u}, {\mathbf v})
+  &=&
+  \left(
+    \lambda \nabla\cdot {\mathbf u}, \nabla\cdot {\mathbf v}
+  \right)_\Omega
+  +
+  \sum_{i,j}
+  \left(
+    \mu \partial_i u_j, \partial_i v_j
+  \right)_\Omega
+  +
+  \sum_{i,j}
+  \left(
+    \mu \partial_i u_j, \partial_j v_i
+  \right)_\Omega
+  \\
+  &=&
+  \left(
+    \lambda \nabla\cdot {\mathbf u}, \nabla\cdot {\mathbf v}
+  \right)_\Omega
+  +
+  2
+  \sum_{i,j}
+  \left(
+    \mu \partial_i u_j, \frac 12[\partial_i v_j + \partial_j v_i]
+  \right)_\Omega
+  \\
+  &=&
+  \left(
+    \lambda \nabla\cdot {\mathbf u}, \nabla\cdot {\mathbf v}
+  \right)_\Omega
+  +
+  2
+  \sum_{i,j}
+  \left(
+    \mu \frac 12[\partial_i u_j + \partial_j u_i], \frac 12[\partial_i v_j + \partial_j v_i]
+  \right)_\Omega
+  \\
+  &=&
+  \left(
+    \lambda \nabla\cdot {\mathbf u}, \nabla\cdot {\mathbf v}
+  \right)_\Omega
+  +
+  2
+  \sum_{i,j}
+  \left(
+  \mu \varepsilon(\mathbf u), \varepsilon(\mathbf v)
+  \right)_\Omega,
+ at f}
+ * where $\varepsilon(\mathbf u) = \frac 12 \left([\nabla\mathbf u] +
+ * [\nabla\mathbf u]^T\right)$ is the symmetrized gradient.
+ * In the second to last step, we used that the scalar product between
+ * an arbitrary tensor $\nabla\mathbf u$ and a symmetric tensor
+ * $\frac 12[\partial_i v_j + \partial_j v_i]$ equals the scalar product
+ * of the symmetric part of the former with the second tensor. Using the
+ * techniques discussed above, the obvious way to implement this goes
+ * like this:
+ *
+ * @code
+      for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            const SymmetricTensor<2,dim> phi_i_symmgrad
+              = fe_values[displacements].symmetric_gradient (i,q_point);
+            const double phi_i_div
+              = fe_values[displacements].divergence (i,q_point);
+
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+                const SymmetricTensor<2,dim> phi_j_symmgrad
+                  = fe_values[displacements].symmetric_gradient (j,q_point);
+                const double phi_j_div
+                  = fe_values[displacements].divergence (j,q_point);
+
+                cell_matrix(i,j)
+                  +=  (phi_i_div * phi_j_div *
+                       lambda_values[q_point]
+                       +
+                       2 *
+                       (phi_i_symmgrad * phi_j_symmgrad) *
+                       mu_values[q_point]) *
+                      fe_values.JxW(q_point);
+              }
+          }
+ * @endcode
+ *
+ * So if, again, this is not the code we use in step-8, what do
+ * we do there? The answer rests on the finite element we use. In step-8, we use the
+ * following element:
+ * @code
+ *   FESystem<dim> finite_element (FE_Q<dim>(1), dim);
+ * @endcode
+ * In other words, the finite element we use consists of <code>dim</code> copies
+ * of the same scalar element. This is what we call a @ref GlossPrimitive
+ * "primitive" element: an element that may be vector-valued but where each
+ * shape function has exactly one non-zero component. In other words: if the
+ * $x$-component of a displacement shape function is nonzero, then the $y$-
+ * and $z$-components must be zero and similarly for the other components.
+ * What this means is that also
+ * derived quantities based on shape functions inherit this sparsity property.
+ * For example: the divergence
+ * $\mathrm{div}\ \Phi(x,y,z)=\partial_x\varphi_x(x,y,z) +
+ * \partial_y\varphi_y(x,y,z) + \partial_z\varphi_z(x,y,z)$
+ * of a vector-valued shape function
+ * $\Phi(x,y,z)=(\varphi_x(x,y,z), \varphi_y(x,y,z), \varphi_z(x,y,z))^T$ is,
+ * in the present case, either
+ * $\mathrm{div}\ \Phi(x,y,z)=\partial_x\varphi_x(x,y,z)$,
+ * $\mathrm{div}\ \Phi(x,y,z)=\partial_y\varphi_y(x,y,z)$, or
+ * $\mathrm{div}\ \Phi(x,y,z)=\partial_z\varphi_z(x,y,z)$, because exactly one
+ * of the $\varphi_\ast$ is nonzero. Knowing this means that we can save a
+ * number of computations that, if we were to do them, would only yield
+ * zeros to add up.
+ *
+ * In a similar vein, if only one component of a shape function is nonzero,
+ * then only one row of its gradient $\nabla\Phi$ is nonzero. What this means
+ * for terms like $(\mu \nabla\Phi_i,\nabla\Phi_j)$, where the scalar product
+ * between two tensors is defined as
+ * $(\tau, \gamma)_\Omega=\int_\Omega \sum_{i,j=1}^d \tau_{ij} \gamma_{ij}$,
+ * is that the term is only nonzero if both tensors have their nonzero
+ * entries in the same row, which means that the two shape functions have
+ * to have their single nonzero component in the same location.
+ *
+ * If we use this sort of knowledge, then we can in a first step avoid
+ * computing gradient tensors if we can determine up front that their
+ * scalar product will be nonzero, in a second step avoid
+ * building the entire tensors and only get its nonzero components,
+ * and in a final step simplify the scalar product by only considering
+ * that index $i$ for the one nonzero row, rather than multiplying and
+ * adding up zeros.
+ *
+ * The vehicle for all this is the ability to determine which vector
+ * component is going to be nonzero. This information is provided by the
+ * FiniteElement::system_to_component_index function. What can be done with
+ * it, using the example above, is explained in detail in
+ * step-8.
+ *
+ *
+ * @anchor VVBlockSolvers
+ * <h3>Block solvers</h3>
+ *
+ * Using techniques as shown above, it isn't particularly complicated to
+ * assemble the linear system, i.e. matrix and right hand side, for a
+ * vector-valued problem. However, then it also has to be solved. This is more
+ * complicated. Naively, one could just consider the matrix as a whole. For
+ * most problems, this matrix is not going to be definite (except for special
+ * cases like the elasticity equations covered in step-8 and
+ * step-17). It will, often, also not be symmetric. This rather
+ * general class of matrices presents problems for iterative solvers: the lack
+ * of structural properties prevents the use of most efficient methods and
+ * preconditioners. While it can be done, the solution process will therefore
+ * most often be slower than necessary.
+ *
+ * The answer to this problem is to make use of the structure of the
+ * problem. For example, for the mixed Laplace equations discussed above, the
+ * operator has the form
+ at f{eqnarray*}
+  \left(
+  \begin{array}{cc} \mathbf 1 & \nabla \\ -\nabla^T & 0 \end{array}
+  \right)
+ at f}
+ *
+ * It would be nice if this structure could be recovered in the linear system
+ * as well. For example, after discretization, we would like to have a matrix
+ * with the following block structure:
+ at f{eqnarray*}
+  \left(
+  \begin{array}{cc} M & B \\ B^T & 0 \end{array}
+  \right),
+ at f}
+ * where $M$ represents the mass matrix that results from discretizing the
+ * identity operator $\mathbf 1$ and $B$ the equivalent of the gradient
+ * operator.
+ *
+ * By default, this is not what happens, however. Rather, deal.II assigns
+ * %numbers to degrees of freedom in a rather random manner. Consequently, if
+ * you form a vector out of the values of degrees of freedom will not be
+ * neatly ordered in a vector like
+ at f{eqnarray*}
+  \left(
+  \begin{array}{c} U \\ P \end{array}
+  \right).
+ at f}
+ * Rather, it will be a permutation of this, with %numbers of degrees of
+ * freedom corresponding to velocities and pressures intermixed. Consequently,
+ * the system matrix will also not have the nice structure mentioned above,
+ * but with the same permutation or rows and columns.
+ *
+ * What is needed is to re-enumerate degrees of freedom so that velocities
+ * come first and pressures last. This can be done using the
+ * DoFRenumbering::component_wise function, as explained in @ref step_20
+ * "step-20", step-21, step-22, and @ref step_31
+ * "step-31". After this, at least the degrees of freedom are partitioned
+ * properly.
+ *
+ * But then we still have to make use of it, i.e. we have to come up with a
+ * solver that uses the structure. For example, in step-20, we
+ * do a block elimination of the linear system
+ at f{eqnarray*}
+  \left(
+  \begin{array}{cc} M & B \\ B^T & 0 \end{array}
+  \right)
+  \left(
+  \begin{array}{c} U \\ P \end{array}
+  \right)
+  =
+  \left(
+  \begin{array}{c} F \\ G \end{array}
+  \right).
+ at f}
+ * What this system means, of course, is
+ at f{eqnarray*}
+  MU + BP &=& F,\\
+  B^TU  &=& G.
+ at f}
+ *
+ * So, if we multiply the first equation by $B^TM^{-1}$ and subtract the
+ * second from the result, we get
+ at f{eqnarray*}
+  B^TM^{-1}BP &=& B^TM^{-1}F-G.
+ at f}
+ *
+ * This is an equation that now only contains the pressure variables. If we
+ * can solve it, we can in a second step solve for the velocities using
+ at f{eqnarray*}
+  MU = F-BP.
+ at f}
+ *
+ * This has the advantage that the matrices $B^TM^{-1}B$ and $M$ that we have
+ * to solve with are both symmetric and positive definite, as opposed to the
+ * large whole matrix we had before.
+ *
+ * How a solver like this is implemented is explained in more detail in @ref
+ * step_20 "step-20", step-31, and a few other tutorial
+ * programs. What we would like to point out here is that we now need a way to
+ * extract certain parts of a matrix or vector: if we are to multiply, say,
+ * the $U$ part of the solution vector by the $M$ part of the global matrix,
+ * then we need to have a way to access these parts of the whole.
+ *
+ * This is where the BlockVector, BlockSparseMatrix, and similar classes come
+ * in. For all practical purposes, then can be used as regular vectors or
+ * sparse matrices, i.e. they offer element access, provide the usual vector
+ * operations and implement, for example, matrix-vector multiplications. In
+ * other words, assembling matrices and right hand sides works in exactly the
+ * same way as for the non-block versions. That said, internally they store
+ * the elements of vectors and matrices in "blocks"; for example, instead of
+ * using one large array, the BlockVector class stores it as a set of arrays
+ * each of which we call a block. The advantage is that, while the whole thing
+ * can be used as a vector, one can also access an individual block which
+ * then, again, is a vector with all the vector operations.
+ *
+ * To show how to do this, let us consider the second equation $MU=F-BP$ to be
+ * solved above. This can be achieved using the following sequence similar to
+ * what we have in step-20:
+ * @code
+    Vector<double> tmp (solution.block(0).size());
+    system_matrix.block(0,1).vmult (tmp, solution.block(1));
+    tmp *= -1;
+    tmp += system_rhs.block(0);
+
+
+    SolverControl solver_control (solution.block(0).size(),
+                                  1e-8*tmp.l2_norm());
+    SolverCG<> cg (solver_control, vector_memory);
+
+    cg.solve (system_matrix.block(0,0),
+              solution.block(0),
+              tmp,
+              PreconditionIdentity());
+ * @endcode
+ *
+ * What's happening here is that we allocate a temporary vector with as many
+ * elements as the first block of the solution vector, i.e. the velocity
+ * component $U$, has. We then set this temporary vector equal to the $(0,1)$
+ * block of the matrix, i.e. $B$, times component 1 of the solution which is
+ * the previously computed pressure $P$. The result is multiplied by $-1$, and
+ * component 0 of the right hand side, $F$ is added to it. The temporary
+ * vector now contains $F-BP$. The rest of the code snippet simply solves a
+ * linear system with $F-BP$ as right hand side and the $(0,0)$ block of the
+ * global matrix, i.e. $M$. Using block vectors and matrices in this way
+ * therefore allows us to quite easily write rather complicated solvers making
+ * use of the block structure of a linear system.
+ *
+ *
+ *
+ * @anchor VVExtracting
+ * <h3>Extracting data from solutions</h3>
+ *
+ * Once one has computed a solution, it is often necessary to evaluate it at
+ * quadrature points, for example to evaluate nonlinear residuals for the next
+ * Newton iteration, to evaluate the finite element residual for error
+ * estimators, or to compute the right hand side for the next time step in
+ * a time dependent problem.
+ *
+ * The way this is done us to again use an FEValues object to evaluate
+ * the shape functions at quadrature points, and with those also the
+ * values of a finite element function. For the example of the mixed
+ * Laplace problem above, consider the following code after solving:
+ * @code
+  std::vector<Vector<double> > local_solution_values (n_q_points,
+                                                      Vector<double> (dim+1));
+
+  typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+
+      fe_values.get_function_values (solution,
+                                     local_solution_values);
+ * @endcode
+ *
+ * After this, the variable <code>local_solution_values</code> is a list
+ * of vectors of a length equal to the number of quadrature points we
+ * have initialized the FEValues object with; each of these vectors
+ * has <code>dim+1</code> elements containing the values of the
+ * <code>dim</code> velocities and the one pressure at a quadrature point.
+ *
+ * We can use these values to then construct other things like residuals.
+ * However, the construct is a bit awkward. First, we have a
+ * <code>std::vector</code> of <code>dealii::Vector</code>s, which always
+ * looks strange. It is also inefficient because it implies dynamic memory
+ * allocation for the outer vector as well as for all the inner vectors.
+ * Secondly, maybe we are only interested in the velocities,
+ * for example to solve an advection problem in a second stage (as, for
+ * example, in step-21 or step-31). In that
+ * case, one would have to hand-extract these values like so:
+ * @code
+   for (unsigned int q=0; q<n_q_points; ++q)
+     {
+       Tensor<1,dim> velocity;
+       for (unsigned int d=0; d<dim; ++d)
+         velocity[d] = local_solution_values[q](d);
+
+       ... do something with this velocity ...
+ * @endcode
+ * Note how we convert from a dealii::Vector (which is simply a collection
+ * of vector elements) into a <code>Tensor@<1,dim@></code> because the
+ * velocity is a quantity characterized by <code>dim</code> elements that
+ * have certain transformation properties under rotations of the coordinate
+ * system.
+ *
+ * This code can be written more elegantly and efficiently using code like
+ * the following:
+ * @code
+  std::vector<Tensor<1,dim> > local_velocity_values (n_q_points);
+
+  const FEValuesExtractors::Vector velocities (0);
+
+  typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+
+      fe_values[velocities].get_function_values (solution,
+                                                 local_velocity_values);
+ * @endcode
+ *
+ * As a result, we here get the velocities right away, and in the
+ * right data type (because we have described, using the extractor,
+ * that the first <code>dim</code> components of the finite element
+ * belong together, forming a tensor). The code is also more efficient:
+ * it requires less dynamic memory allocation because the Tensor
+ * class allocates its components as member variables rather than on
+ * the heap, and we save cycles because we don't even bother computing
+ * the values of the pressure variable at the quadrature points. On
+ * the other hand, if we had been interested in only the pressure and
+ * not the velocities, then the following code extracting scalar
+ * values would have done:
+ * @code
+  std::vector<double> local_pressure_values (n_q_points);
+
+  const FEValuesExtractors::Scalar pressure (dim);
+
+  typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+
+      fe_values[pressure].get_function_values (solution,
+                                               local_pressure_values);
+ * @endcode
+ *
+ * In similar cases, one sometimes needs the gradients or second
+ * derivatives of the solution, or of individual scalar or vector
+ * components. To get at those of all components of the solution,
+ * the functions FEValuesBase::get_function_gradients and
+ * FEValuesBase::get_function_hessians are the equivalent of the
+ * function FEValuesBase::get_function_values used above.
+ *
+ * Likewise, to extract the gradients of scalar components,
+ * FEValuesViews::Scalar::get_function_gradients and
+ * FEValuesViews::Scalar::get_function_hessians do the job.
+ * For vector- (tensor-)valued quantities, there are functions
+ * FEValuesViews::Vector::get_function_gradients and
+ * FEValuesViews::Vector::get_function_hessians, and in
+ * addition
+ * FEValuesViews::Vector::get_function_symmetric_gradients and
+ * FEValuesViews::Vector::get_function_divergences.
+ *
+ * Moreover, there is a shortcut available in case when only the
+ * Laplacians of the solution (which is the trace of the hessians) is
+ * needed, usable for both scalar and vector-valued problems as
+ * FEValuesViews::Scalar::get_function_laplacians and
+ * FEValuesViews::Vector::get_function_laplacians.
+ *
+ *
+ * @anchor VVOutput
+ * <h3>Generating graphical output</h3>
+ *
+ * As mentioned above, an FESystem object may hold multiple vector components,
+ * but it doesn't have a notion what they actually mean. As an example, take
+ * the object
+ * @code
+ *   FESystem<dim> finite_element (FE_Q<dim>(1), dim+1);
+ * @endcode
+ * It has <code>dim+1</code> vector components, but what do they mean? Are they
+ * the <code>dim</code> components of a velocity vector plus one pressure? Are
+ * they the pressure plus the <code>dim</code> velocity components? Or are
+ * they a collection of scalars?
+ *
+ * The point is that the FESystem class doesn't care. The <i>interpretation</i>
+ * of what the components mean is up to the person who uses the element later,
+ * for example in assembling a linear form, or in extracting data solution
+ * components for a linearized system in the next Newton step. In almost
+ * all cases, this interpretation happens at the place where it is needed.
+ *
+ * There is one case where one has to be explicit, however, and that is in
+ * generating graphical output. The reason is that many file formats for
+ * visualization want data that represents vectors (e.g. velocities,
+ * displacements, etc) to be stored separately from scalars (pressures,
+ * densities, etc), and there often is no way to group a bunch of scalars into
+ * a vector field from within a visualization program.
+ *
+ * To achieve this, we need to let the DataOut class and friends know which
+ * components of the FESystem form vectors (with <code>dim</code> components)
+ * and which are scalars. This is shown, for example, in step-22 where we
+ * generate output as follows:
+ * @code
+  std::vector<std::string> solution_names (dim, "velocity");
+  solution_names.push_back ("pressure");
+
+  std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    data_component_interpretation
+    (dim, DataComponentInterpretation::component_is_part_of_vector);
+  data_component_interpretation
+    .push_back (DataComponentInterpretation::component_is_scalar);
+
+  DataOut<dim> data_out;
+  data_out.attach_dof_handler (dof_handler);
+  data_out.add_data_vector (solution, solution_names,
+                            DataOut<dim>::type_dof_data,
+                            data_component_interpretation);
+  data_out.build_patches ();
+ * @endcode
+ * In other words, we here create an array of <code>dim+1</code> elements in
+ * which we store which elements of the finite element are vectors and which
+ * are scalars; the array is filled with <code>dim</code> copies of
+ * DataComponentInterpretation::component_is_part_of_vector and a single
+ * trailing element of DataComponentInterpretation::component_is_scalar . The
+ * array is then given as an extra argument to DataOut::add_data_vector to
+ * explain how the data in the given solution vector is to be interpreted.
+ * Visualization programs like Visit and Paraview will then offer to show
+ * these <code>dim</code> components as vector fields, rather than as
+ * individual scalar fields.
+ *
+ *
+ * @ingroup feall feaccess
+ */
+
diff --git a/doc/doxygen/headers/vectors.h b/doc/doxygen/headers/vectors.h
new file mode 100644
index 0000000..a0767f6
--- /dev/null
+++ b/doc/doxygen/headers/vectors.h
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @defgroup Vectors Vector classes
+ *
+ * Here, we list all the classes that satisfy the <code>VectorType</code>
+ * concept and may be used in linear solvers (see @ref Solvers) and for
+ * matrix-vector operations.
+ *
+ * @ingroup LAC
+ */
diff --git a/doc/doxygen/images/cg-monitor-smoothing-0.png b/doc/doxygen/images/cg-monitor-smoothing-0.png
new file mode 100644
index 0000000..88476ef
Binary files /dev/null and b/doc/doxygen/images/cg-monitor-smoothing-0.png differ
diff --git a/doc/doxygen/images/cg-monitor-smoothing-1.png b/doc/doxygen/images/cg-monitor-smoothing-1.png
new file mode 100644
index 0000000..93985d8
Binary files /dev/null and b/doc/doxygen/images/cg-monitor-smoothing-1.png differ
diff --git a/doc/doxygen/images/cg-monitor-smoothing-2.png b/doc/doxygen/images/cg-monitor-smoothing-2.png
new file mode 100644
index 0000000..da9bb01
Binary files /dev/null and b/doc/doxygen/images/cg-monitor-smoothing-2.png differ
diff --git a/doc/doxygen/images/cg-monitor-smoothing-3.png b/doc/doxygen/images/cg-monitor-smoothing-3.png
new file mode 100644
index 0000000..92e4c81
Binary files /dev/null and b/doc/doxygen/images/cg-monitor-smoothing-3.png differ
diff --git a/doc/doxygen/images/cg-monitor-smoothing-4.png b/doc/doxygen/images/cg-monitor-smoothing-4.png
new file mode 100644
index 0000000..d1bd7ae
Binary files /dev/null and b/doc/doxygen/images/cg-monitor-smoothing-4.png differ
diff --git a/doc/doxygen/images/cg-monitor-smoothing-5.png b/doc/doxygen/images/cg-monitor-smoothing-5.png
new file mode 100644
index 0000000..43e5707
Binary files /dev/null and b/doc/doxygen/images/cg-monitor-smoothing-5.png differ
diff --git a/doc/doxygen/images/cheese_2d.png b/doc/doxygen/images/cheese_2d.png
new file mode 100644
index 0000000..927624e
Binary files /dev/null and b/doc/doxygen/images/cheese_2d.png differ
diff --git a/doc/doxygen/images/collaboration.eps b/doc/doxygen/images/collaboration.eps
new file mode 100644
index 0000000..c0b3e25
--- /dev/null
+++ b/doc/doxygen/images/collaboration.eps
@@ -0,0 +1,258 @@
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: collaboration.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 4
+%%CreationDate: Sun Jan 22 22:00:32 2006
+%%For: bangerth at bangerth (Wolfgang Bangerth)
+%%BoundingBox: 0 0 549 513
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+newpath 0 513 moveto 0 0 lineto 549 0 lineto 549 513 lineto closepath clip newpath
+-21.6 545.4 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+  bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+  4 -2 roll mul srgb} bind def
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+
+$F2psBegin
+10 setmiterlimit
+0 slj 0 slc
+ 0.05039 0.05039 sc
+%
+% Fig objects follow
+%
+% 
+% here starts figure with depth 51
+% Polyline
+15.000 slw
+gs  clippath
+9118 5399 m 9234 5429 l 9306 5151 l 9188 5369 l 9190 5121 l cp
+eoclip
+n 9765 3150 m
+ 9180 5400 l gs col0 s gr gr
+
+% arrowhead
+n 9190 5121 m 9188 5369 l 9306 5151 l 9190 5121 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+8270 5428 m 8386 5400 l 8319 5121 l 8317 5369 l 8202 5149 l cp
+eoclip
+n 7785 3150 m
+ 8325 5400 l gs col0 s gr gr
+
+% arrowhead
+n 8202 5149 m 8317 5369 l 8319 5121 l 8202 5149 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+n 3600 675 m 5400 675 l 5400 1485 l 3600 1485 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 9495 2340 m 11295 2340 l 11295 3150 l 9495 3150 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 5895 2340 m 7875 2340 l 7875 3150 l 5895 3150 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 450 2340 m 2340 2340 l 2340 3150 l 450 3150 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 3150 4455 m 5040 4455 l 5040 5265 l 3150 5265 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 7830 5445 m 9720 5445 l 9720 6255 l 7830 6255 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 7830 3780 m 9720 3780 l 9720 4590 l 7830 4590 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 5220 7065 m 7200 7065 l 7200 7875 l 5220 7875 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 5220 8325 m 7200 8325 l 7200 9135 l 5220 9135 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+n 5175 9990 m 7155 9990 l 7155 10800 l 5175 10800 l
+ cp gs col6 1.00 shd ef gr gs col0 s gr 
+% Polyline
+gs  clippath
+1943 2245 m 1988 2356 l 2254 2247 l 2010 2283 l 2209 2136 l cp
+eoclip
+n 3960 1485 m
+ 1980 2295 l gs col0 s gr gr
+
+% arrowhead
+n 2209 2136 m 2010 2283 l 2254 2247 l 2209 2136 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6420 2355 m 6475 2248 l 6220 2116 l 6406 2280 l 6165 2223 l cp
+eoclip
+n 4860 1485 m
+ 6435 2295 l gs col0 s gr gr
+
+% arrowhead
+n 6165 2223 m 6406 2280 l 6220 2116 l 6165 2223 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+9769 2356 m 9790 2238 l 9507 2187 l 9733 2289 l 9486 2305 l cp
+eoclip
+n 5310 1485 m
+ 9765 2295 l gs col0 s gr gr
+
+% arrowhead
+n 9486 2305 m 9733 2289 l 9507 2187 l 9486 2305 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+3484 4466 m 3559 4372 l 3334 4193 l 3485 4390 l 3259 4287 l cp
+eoclip
+n 1980 3195 m
+ 3510 4410 l gs col0 s gr gr
+
+% arrowhead
+n 3259 4287 m 3485 4390 l 3334 4193 l 3259 4287 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+4588 4369 m 4657 4467 l 4893 4302 l 4662 4391 l 4824 4204 l cp
+eoclip
+n 6435 3150 m
+ 4635 4410 l gs col0 s gr gr
+
+% arrowhead
+n 4824 4204 m 4662 4391 l 4893 4302 l 4824 4204 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6843 6974 m 6899 7080 l 7154 6945 l 6914 7005 l 7097 6839 l cp
+eoclip
+n 8325 6255 m
+ 6885 7020 l gs col0 s gr gr
+
+% arrowhead
+n 7097 6839 m 6914 7005 l 7154 6945 l 7097 6839 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+5535 7063 m 5639 7002 l 5495 6754 l 5564 6992 l 5391 6814 l cp
+eoclip
+n 4590 5310 m
+ 5580 7020 l gs col0 s gr gr
+
+% arrowhead
+n 5391 6814 m 5564 6992 l 5495 6754 l 5391 6814 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6150 8340 m 6270 8340 l 6270 8053 l 6210 8293 l 6150 8053 l cp
+eoclip
+n 6210 7920 m
+ 6210 8325 l gs col0 s gr gr
+
+% arrowhead
+n 6150 8053 m 6210 8293 l 6270 8053 l 6150 8053 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6150 10005 m 6270 10005 l 6270 9718 l 6210 9958 l 6150 9718 l cp
+eoclip
+n 6210 9135 m
+ 6210 9990 l gs col0 s gr gr
+
+% arrowhead
+n 6150 9718 m 6210 9958 l 6270 9718 l 6150 9718 l  cp gs 0.00 setgray ef gr  col0 s
+/Helvetica-Bold ff 240.00 scf sf
+3915 1170 m
+gs 1 -1 sc (1. Unit cell) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+495 2835 m
+gs 1 -1 sc (2. Triangulation) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+5940 2835 m
+gs 1 -1 sc (3. Finite Element) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+9630 2835 m
+gs 1 -1 sc (4. Quadrature) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+3240 4950 m
+gs 1 -1 sc (5. DoFHandler) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+8145 4275 m
+gs 1 -1 sc (6. Mapping) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+8055 5940 m
+gs 1 -1 sc (7. FEValues) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+5355 8865 m
+gs 1 -1 sc (9. Linear Solver) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+5265 7560 m
+gs 1 -1 sc (8. Linear System) col0 sh gr
+/Helvetica-Bold ff 240.00 scf sf
+5535 10485 m
+gs 1 -1 sc (10. Output) col0 sh gr
+% here ends figure;
+$F2psEnd
+rs
+showpage
diff --git a/doc/doxygen/images/collaboration.fig b/doc/doxygen/images/collaboration.fig
new file mode 100644
index 0000000..38df6bd
--- /dev/null
+++ b/doc/doxygen/images/collaboration.fig
@@ -0,0 +1,72 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 3600 675 5400 675 5400 1485 3600 1485 3600 675
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 9495 2340 11295 2340 11295 3150 9495 3150 9495 2340
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 5895 2340 7875 2340 7875 3150 5895 3150 5895 2340
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 450 2340 2340 2340 2340 3150 450 3150 450 2340
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 3150 4455 5040 4455 5040 5265 3150 5265 3150 4455
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 7830 5445 9720 5445 9720 6255 7830 6255 7830 5445
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 7830 3780 9720 3780 9720 4590 7830 4590 7830 3780
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 5220 7065 7200 7065 7200 7875 5220 7875 5220 7065
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 5220 8325 7200 8325 7200 9135 5220 9135 5220 8325
+2 2 0 2 0 6 50 -1 20 0.000 0 0 -1 0 0 5
+	 5175 9990 7155 9990 7155 10800 5175 10800 5175 9990
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 3960 1485 1980 2295
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 4860 1485 6435 2295
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 5310 1485 9765 2295
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 1980 3195 3510 4410
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 6435 3150 4635 4410
+2 1 0 2 0 7 51 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 9765 3150 9180 5400
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 8325 6255 6885 7020
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 4590 5310 5580 7020
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 6210 7920 6210 8325
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 6210 9135 6210 9990
+2 1 0 2 0 7 51 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 2.00 120.00 240.00
+	 7785 3150 8325 5400
+4 0 0 50 -1 18 16 0.0000 4 180 1170 3915 1170 1. Unit cell\001
+4 0 0 50 -1 18 16 0.0000 4 240 1770 495 2835 2. Triangulation\001
+4 0 0 50 -1 18 16 0.0000 4 180 1875 5940 2835 3. Finite Element\001
+4 0 0 50 -1 18 16 0.0000 4 195 1545 9630 2835 4. Quadrature\001
+4 0 0 50 -1 18 16 0.0000 4 180 1635 3240 4950 5. DoFHandler\001
+4 0 0 50 -1 18 16 0.0000 4 240 1245 8145 4275 6. Mapping\001
+4 0 0 50 -1 18 16 0.0000 4 180 1350 8055 5940 7. FEValues\001
+4 0 0 50 -1 18 16 0.0000 4 180 1770 5355 8865 9. Linear Solver\001
+4 0 0 50 -1 18 16 0.0000 4 240 1890 5265 7560 8. Linear System\001
+4 0 0 50 -1 18 16 0.0000 4 225 1170 5535 10485 10. Output\001
diff --git a/doc/doxygen/images/collaboration.png b/doc/doxygen/images/collaboration.png
new file mode 100644
index 0000000..dc9e33b
Binary files /dev/null and b/doc/doxygen/images/collaboration.png differ
diff --git a/doc/doxygen/images/cone_2d.png b/doc/doxygen/images/cone_2d.png
new file mode 100644
index 0000000..802db1d
Binary files /dev/null and b/doc/doxygen/images/cone_2d.png differ
diff --git a/doc/doxygen/images/cone_3d.png b/doc/doxygen/images/cone_3d.png
new file mode 100644
index 0000000..cf187fb
Binary files /dev/null and b/doc/doxygen/images/cone_3d.png differ
diff --git a/doc/doxygen/images/conflicting_constraints.fig b/doc/doxygen/images/conflicting_constraints.fig
new file mode 100644
index 0000000..099de39
--- /dev/null
+++ b/doc/doxygen/images/conflicting_constraints.fig
@@ -0,0 +1,29 @@
+#FIG 3.2  Produced by xfig version 3.2.5b
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 1 50 -1 20 0.000 1 0.0000 3600 2700 101 101 3600 2700 3701 2700
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 3
+	 5400 3600 6300 2700 6300 900
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 1800 2700 3600 2700
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 3
+	 2700 3600 2700 1800 3600 900
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 3
+	 1800 1800 1800 3600 3555 3600
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 4
+	 3600 1800 3600 3600 5400 3600 5400 1800
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 3600 1800 4500 900 6300 900 5400 1800 3600 1800
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 2250 1350 4050 1350
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 1800 1800 2700 900 4500 900 3600 1800 1800 1800
+4 0 0 50 -1 19 14 0.0000 4 165 135 3780 2790 0\001
+4 0 0 50 -1 19 14 0.0000 4 165 135 3645 3555 2\001
+4 0 0 50 -1 19 14 0.0000 4 165 135 3645 2025 1\001
diff --git a/doc/doxygen/images/conflicting_constraints.png b/doc/doxygen/images/conflicting_constraints.png
new file mode 100644
index 0000000..99efb09
Binary files /dev/null and b/doc/doxygen/images/conflicting_constraints.png differ
diff --git a/doc/doxygen/images/cubes_hole.png b/doc/doxygen/images/cubes_hole.png
new file mode 100644
index 0000000..e0d8613
Binary files /dev/null and b/doc/doxygen/images/cubes_hole.png differ
diff --git a/doc/doxygen/images/dgp_doesnt_contain_p.fig b/doc/doxygen/images/dgp_doesnt_contain_p.fig
new file mode 100644
index 0000000..26a3ff5
--- /dev/null
+++ b/doc/doxygen/images/dgp_doesnt_contain_p.fig
@@ -0,0 +1,20 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+5 1 0 2 0 7 50 -1 -1 0.000 0 0 1 0 3555.000 4428.000 2790 3240 3555 3015 4320 3240
+	1 1 3.00 60.00 120.00
+2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 900 2700 2700 2700 2700 4500 900 4500 900 2700
+2 3 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 4500 4500 6300 4500 8100 900 4500 2700 4500 4500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 3
+	 1620 3420 1755 3330 1890 3420
+4 0 0 50 -1 0 24 0.0000 4 240 255 1620 3690 K\001
+4 0 0 50 -1 0 24 0.0000 4 240 255 5535 3420 K\001
+4 0 0 50 -1 0 24 0.0000 4 240 195 3420 2835 F\001
diff --git a/doc/doxygen/images/dgp_doesnt_contain_p.png b/doc/doxygen/images/dgp_doesnt_contain_p.png
new file mode 100644
index 0000000..59275e0
Binary files /dev/null and b/doc/doxygen/images/dgp_doesnt_contain_p.png differ
diff --git a/doc/doxygen/images/direction_flag.fig b/doc/doxygen/images/direction_flag.fig
new file mode 100644
index 0000000..ee0f36a
--- /dev/null
+++ b/doc/doxygen/images/direction_flag.fig
@@ -0,0 +1,16 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 3 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 6
+	 1665 3375 2520 4545 4050 4005 4590 2520 2610 2025 1665 3375
+4 0 0 50 -1 19 20 0.0000 4 225 180 2475 4860 0\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 4185 4185 1\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 4680 2520 2\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 2520 1845 3\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 1395 3375 4\001
diff --git a/doc/doxygen/images/direction_flag.png b/doc/doxygen/images/direction_flag.png
new file mode 100644
index 0000000..42e6852
Binary files /dev/null and b/doc/doxygen/images/direction_flag.png differ
diff --git a/doc/doxygen/images/direction_flag_normals.fig b/doc/doxygen/images/direction_flag_normals.fig
new file mode 100644
index 0000000..7b433bc
--- /dev/null
+++ b/doc/doxygen/images/direction_flag_normals.fig
@@ -0,0 +1,31 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 3 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 6
+	 1665 3375 2520 4545 4050 4005 4590 2520 2610 2025 1665 3375
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	3 1 2.00 60.00 120.00
+	 3330 4275 3600 5040
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	3 1 2.00 60.00 120.00
+	 4320 3240 5040 3465
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	3 1 2.00 60.00 120.00
+	 3510 2250 3330 2925
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	3 1 2.00 60.00 120.00
+	 2160 2700 2745 3105
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	3 1 2.00 60.00 120.00
+	 2070 3915 1530 4365
+4 0 0 50 -1 19 20 0.0000 4 225 180 2475 4860 0\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 4185 4185 1\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 4680 2520 2\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 2520 1845 3\001
+4 0 0 50 -1 19 20 0.0000 4 225 180 1395 3375 4\001
diff --git a/doc/doxygen/images/direction_flag_normals.png b/doc/doxygen/images/direction_flag_normals.png
new file mode 100644
index 0000000..7fc6a2e
Binary files /dev/null and b/doc/doxygen/images/direction_flag_normals.png differ
diff --git a/doc/doxygen/images/distorted_2d.fig b/doc/doxygen/images/distorted_2d.fig
new file mode 100644
index 0000000..c681447
--- /dev/null
+++ b/doc/doxygen/images/distorted_2d.fig
@@ -0,0 +1,15 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 4050 450 4050 1800 5400 450 5400 1800 4050 450
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 4
+	 2250 450 2250 1800 3600 1800 2250 450
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 450 450 450 1800 1800 1800 1800 450 450 450
diff --git a/doc/doxygen/images/distorted_2d.png b/doc/doxygen/images/distorted_2d.png
new file mode 100644
index 0000000..896460e
Binary files /dev/null and b/doc/doxygen/images/distorted_2d.png differ
diff --git a/doc/doxygen/images/distorted_2d_refinement_01.fig b/doc/doxygen/images/distorted_2d_refinement_01.fig
new file mode 100644
index 0000000..45e0702
--- /dev/null
+++ b/doc/doxygen/images/distorted_2d_refinement_01.fig
@@ -0,0 +1,18 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 1800 45 45 450 1800 495 1800
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 450 45 45 450 450 495 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 450 45 45 1800 450 1845 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 1800 45 45 1800 1800 1845 1800
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 450 450 1800 450 1800 1800 450 1800 450 450
+3 2 1 1 0 7 50 -1 -1 3.000 0 0 0 5
+	 225 2520 450 1800 1125 585 1800 1800 2025 2520
+	 0.000 -1.000 -1.000 -1.000 0.000
diff --git a/doc/doxygen/images/distorted_2d_refinement_01.png b/doc/doxygen/images/distorted_2d_refinement_01.png
new file mode 100644
index 0000000..4acc8bc
Binary files /dev/null and b/doc/doxygen/images/distorted_2d_refinement_01.png differ
diff --git a/doc/doxygen/images/distorted_2d_refinement_02.fig b/doc/doxygen/images/distorted_2d_refinement_02.fig
new file mode 100644
index 0000000..759d01e
--- /dev/null
+++ b/doc/doxygen/images/distorted_2d_refinement_02.fig
@@ -0,0 +1,22 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 1800 45 45 450 1800 495 1800
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 450 45 45 450 450 495 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 450 45 45 1800 450 1845 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 1800 45 45 1800 1800 1845 1800
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 1125 45 45 450 1125 495 1125
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1125 450 45 45 1125 450 1170 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 1125 45 45 1800 1125 1845 1125
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1125 585 45 45 1125 585 1170 585
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 6
+	 450 450 450 1800 1125 585 1800 1800 1800 450 450 450
+3 2 1 1 0 7 50 -1 -1 3.000 0 0 0 5
+	 225 2520 450 1800 1125 585 1800 1800 2025 2520
+	 0.000 -1.000 -1.000 -1.000 0.000
diff --git a/doc/doxygen/images/distorted_2d_refinement_02.png b/doc/doxygen/images/distorted_2d_refinement_02.png
new file mode 100644
index 0000000..d9d2168
Binary files /dev/null and b/doc/doxygen/images/distorted_2d_refinement_02.png differ
diff --git a/doc/doxygen/images/distorted_2d_refinement_03.fig b/doc/doxygen/images/distorted_2d_refinement_03.fig
new file mode 100644
index 0000000..bc770a7
--- /dev/null
+++ b/doc/doxygen/images/distorted_2d_refinement_03.fig
@@ -0,0 +1,27 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 1800 45 45 450 1800 495 1800
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 450 45 45 450 450 495 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 450 45 45 1800 450 1845 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 1800 45 45 1800 1800 1845 1800
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 450 1125 45 45 450 1125 495 1125
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1125 450 45 45 1125 450 1170 450
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1800 1125 45 45 1800 1125 1845 1125
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1125 585 45 45 1125 585 1170 585
+1 3 0 1 0 0 50 -1 20 0.000 1 0.0000 1125 945 45 45 1125 945 1170 945
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 6
+	 450 450 450 1800 1125 585 1800 1800 1800 450 450 450
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 3
+	 450 1125 1125 945 1755 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 1125 450 1125 945
+3 2 1 1 0 7 50 -1 -1 3.000 0 0 0 5
+	 225 2520 450 1800 1125 585 1800 1800 2025 2520
+	 0.000 -1.000 -1.000 -1.000 0.000
diff --git a/doc/doxygen/images/distorted_2d_refinement_03.png b/doc/doxygen/images/distorted_2d_refinement_03.png
new file mode 100644
index 0000000..d03c0a7
Binary files /dev/null and b/doc/doxygen/images/distorted_2d_refinement_03.png differ
diff --git a/doc/doxygen/images/distorted_3d.fig b/doc/doxygen/images/distorted_3d.fig
new file mode 100644
index 0000000..80f6ad7
--- /dev/null
+++ b/doc/doxygen/images/distorted_3d.fig
@@ -0,0 +1,30 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 495 2205 495 3555 1845 3555 1845 2205 495 2205
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 4
+	 3600 2250 3600 3600 4950 3600 3600 2250
+2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 6750 2250 6750 3600 8100 2250 8100 3600 6750 2250
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 7
+	 1845 3555 2430 2880 2430 1530 1845 2205 2430 1530 1080 1530
+	 495 2205
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 7
+	 4950 3600 5535 2925 5535 1575 4950 3600 5535 1575 4185 1575
+	 3600 2250
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 7
+	 8100 2250 8685 2925 8685 1575 8100 3600 8685 1575 7335 1575
+	 6750 2250
+2 1 2 1 0 7 50 -1 -1 3.000 0 0 -1 0 0 5
+	 495 3555 1080 2880 2430 2880 1080 2880 1080 1530
+2 1 2 1 0 7 50 -1 -1 3.000 0 0 -1 0 0 5
+	 3600 3600 4185 2925 5535 2925 4185 2925 4185 1575
+2 1 2 1 0 7 50 -1 -1 3.000 0 0 -1 0 0 5
+	 6750 3600 7335 2925 8685 2925 7335 2925 7335 1575
diff --git a/doc/doxygen/images/distorted_3d.png b/doc/doxygen/images/distorted_3d.png
new file mode 100644
index 0000000..7c95e25
Binary files /dev/null and b/doc/doxygen/images/distorted_3d.png differ
diff --git a/doc/doxygen/images/distributed_mesh_0.png b/doc/doxygen/images/distributed_mesh_0.png
new file mode 100644
index 0000000..0975c9a
Binary files /dev/null and b/doc/doxygen/images/distributed_mesh_0.png differ
diff --git a/doc/doxygen/images/distributed_mesh_1.png b/doc/doxygen/images/distributed_mesh_1.png
new file mode 100644
index 0000000..12864ae
Binary files /dev/null and b/doc/doxygen/images/distributed_mesh_1.png differ
diff --git a/doc/doxygen/images/distributed_mesh_2.png b/doc/doxygen/images/distributed_mesh_2.png
new file mode 100644
index 0000000..16db6f6
Binary files /dev/null and b/doc/doxygen/images/distributed_mesh_2.png differ
diff --git a/doc/doxygen/images/distributed_mesh_3.png b/doc/doxygen/images/distributed_mesh_3.png
new file mode 100644
index 0000000..9b97ad2
Binary files /dev/null and b/doc/doxygen/images/distributed_mesh_3.png differ
diff --git a/doc/doxygen/images/fe_q_bubbles_conditioning.png b/doc/doxygen/images/fe_q_bubbles_conditioning.png
new file mode 100644
index 0000000..a061872
Binary files /dev/null and b/doc/doxygen/images/fe_q_bubbles_conditioning.png differ
diff --git a/doc/doxygen/images/hanging_nodes.png b/doc/doxygen/images/hanging_nodes.png
new file mode 100644
index 0000000..dbd412b
Binary files /dev/null and b/doc/doxygen/images/hanging_nodes.png differ
diff --git a/doc/doxygen/images/hp-refinement-simple.png b/doc/doxygen/images/hp-refinement-simple.png
new file mode 100644
index 0000000..1343162
Binary files /dev/null and b/doc/doxygen/images/hp-refinement-simple.png differ
diff --git a/doc/doxygen/images/hyper_cross_2d.png b/doc/doxygen/images/hyper_cross_2d.png
new file mode 100644
index 0000000..23d902c
Binary files /dev/null and b/doc/doxygen/images/hyper_cross_2d.png differ
diff --git a/doc/doxygen/images/hyper_cross_3d.png b/doc/doxygen/images/hyper_cross_3d.png
new file mode 100644
index 0000000..4fba21f
Binary files /dev/null and b/doc/doxygen/images/hyper_cross_3d.png differ
diff --git a/doc/doxygen/images/hyper_cubes.png b/doc/doxygen/images/hyper_cubes.png
new file mode 100644
index 0000000..f6b44c9
Binary files /dev/null and b/doc/doxygen/images/hyper_cubes.png differ
diff --git a/doc/doxygen/images/hyper_l.png b/doc/doxygen/images/hyper_l.png
new file mode 100644
index 0000000..3726ac2
Binary files /dev/null and b/doc/doxygen/images/hyper_l.png differ
diff --git a/doc/doxygen/images/hyper_shell_12_cut.png b/doc/doxygen/images/hyper_shell_12_cut.png
new file mode 100644
index 0000000..56f1f61
Binary files /dev/null and b/doc/doxygen/images/hyper_shell_12_cut.png differ
diff --git a/doc/doxygen/images/hyper_shell_6_cross_plane.png b/doc/doxygen/images/hyper_shell_6_cross_plane.png
new file mode 100644
index 0000000..98a3516
Binary files /dev/null and b/doc/doxygen/images/hyper_shell_6_cross_plane.png differ
diff --git a/doc/doxygen/images/hyper_shell_96_cut.png b/doc/doxygen/images/hyper_shell_96_cut.png
new file mode 100644
index 0000000..ac963ad
Binary files /dev/null and b/doc/doxygen/images/hyper_shell_96_cut.png differ
diff --git a/doc/doxygen/images/hypershell-all-3.png b/doc/doxygen/images/hypershell-all-3.png
new file mode 100644
index 0000000..ee17b72
Binary files /dev/null and b/doc/doxygen/images/hypershell-all-3.png differ
diff --git a/doc/doxygen/images/hypershell-all.png b/doc/doxygen/images/hypershell-all.png
new file mode 100644
index 0000000..0b28c9e
Binary files /dev/null and b/doc/doxygen/images/hypershell-all.png differ
diff --git a/doc/doxygen/images/hypershell-boundary-only-3.png b/doc/doxygen/images/hypershell-boundary-only-3.png
new file mode 100644
index 0000000..3f3ad94
Binary files /dev/null and b/doc/doxygen/images/hypershell-boundary-only-3.png differ
diff --git a/doc/doxygen/images/hypershell-boundary-only-4.png b/doc/doxygen/images/hypershell-boundary-only-4.png
new file mode 100644
index 0000000..032f0e7
Binary files /dev/null and b/doc/doxygen/images/hypershell-boundary-only-4.png differ
diff --git a/doc/doxygen/images/hypershell-boundary-only.png b/doc/doxygen/images/hypershell-boundary-only.png
new file mode 100644
index 0000000..c282e35
Binary files /dev/null and b/doc/doxygen/images/hypershell-boundary-only.png differ
diff --git a/doc/doxygen/images/hypershell-nothing.png b/doc/doxygen/images/hypershell-nothing.png
new file mode 100644
index 0000000..3bacb25
Binary files /dev/null and b/doc/doxygen/images/hypershell-nothing.png differ
diff --git a/doc/doxygen/images/hypershell3d-12.png b/doc/doxygen/images/hypershell3d-12.png
new file mode 100644
index 0000000..eb07a94
Binary files /dev/null and b/doc/doxygen/images/hypershell3d-12.png differ
diff --git a/doc/doxygen/images/hypershell3d-6.png b/doc/doxygen/images/hypershell3d-6.png
new file mode 100644
index 0000000..9913c0a
Binary files /dev/null and b/doc/doxygen/images/hypershell3d-6.png differ
diff --git a/doc/doxygen/images/limit_level_difference_at_vertices.fig b/doc/doxygen/images/limit_level_difference_at_vertices.fig
new file mode 100644
index 0000000..dee6e64
--- /dev/null
+++ b/doc/doxygen/images/limit_level_difference_at_vertices.fig
@@ -0,0 +1,27 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+	 900 900 4500 900 4500 4500 900 4500 900 900
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 2700 900 2700 4500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 900 2700 4455 2700
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 1800 900 1800 4500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 900 3600 4455 3600
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 3600 2700 3600 4500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 2250 2700 2250 3600
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 1800 3150 2700 3150
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 900 1800 2700 1800
diff --git a/doc/doxygen/images/limit_level_difference_at_vertices.png b/doc/doxygen/images/limit_level_difference_at_vertices.png
new file mode 100644
index 0000000..5eba043
Binary files /dev/null and b/doc/doxygen/images/limit_level_difference_at_vertices.png differ
diff --git a/doc/doxygen/images/logo200.png b/doc/doxygen/images/logo200.png
new file mode 100644
index 0000000..f1c9914
Binary files /dev/null and b/doc/doxygen/images/logo200.png differ
diff --git a/doc/doxygen/images/multigrid.png b/doc/doxygen/images/multigrid.png
new file mode 100644
index 0000000..5820426
Binary files /dev/null and b/doc/doxygen/images/multigrid.png differ
diff --git a/doc/doxygen/images/no_normal_flux_1.fig b/doc/doxygen/images/no_normal_flux_1.fig
new file mode 100644
index 0000000..66c8e10
--- /dev/null
+++ b/doc/doxygen/images/no_normal_flux_1.fig
@@ -0,0 +1,25 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+5 1 1 1 0 7 50 -1 -1 3.000 0 0 0 0 4245.855 4744.480 4275 1620 6345 2430 7335 4275
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 7
+	 4275 1620 6345 2430 7335 4275 5490 5130 4905 3915 3735 3510
+	 4275 1620
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 4905 3915 6345 2430
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 4.00 60.00 120.00
+	 6345 2430 7155 1935
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 4.00 60.00 120.00
+	 6345 2430 6660 1620
+4 0 0 50 -1 19 12 0.0000 4 135 240 4590 2835 K1\001
+4 0 0 50 -1 19 12 0.0000 4 135 240 5805 3915 K2\001
+4 0 0 50 -1 19 12 0.0000 4 135 285 6885 3375 F 2\001
+4 0 0 50 -1 19 12 0.0000 4 135 285 5175 1890 F 1\001
diff --git a/doc/doxygen/images/no_normal_flux_1.png b/doc/doxygen/images/no_normal_flux_1.png
new file mode 100644
index 0000000..56e0693
Binary files /dev/null and b/doc/doxygen/images/no_normal_flux_1.png differ
diff --git a/doc/doxygen/images/no_normal_flux_2.fig b/doc/doxygen/images/no_normal_flux_2.fig
new file mode 100644
index 0000000..7822a97
--- /dev/null
+++ b/doc/doxygen/images/no_normal_flux_2.fig
@@ -0,0 +1,24 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 7
+	 4275 1620 6345 2430 7335 4275 5490 5130 4905 3915 3735 3510
+	 4275 1620
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 4905 3915 6345 2430
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 4.00 60.00 120.00
+	 4300 1612 4615 802
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 4.00 60.00 120.00
+	 4285 1636 3465 1395
+4 0 0 50 -1 19 12 0.0000 4 135 240 4590 2835 K1\001
+4 0 0 50 -1 19 12 0.0000 4 135 240 5805 3915 K2\001
+4 0 0 50 -1 19 12 0.0000 4 135 285 5175 1890 F 1\001
+4 0 0 50 -1 19 12 0.0000 4 135 285 3600 2565 F 2\001
diff --git a/doc/doxygen/images/no_normal_flux_2.png b/doc/doxygen/images/no_normal_flux_2.png
new file mode 100644
index 0000000..2698253
Binary files /dev/null and b/doc/doxygen/images/no_normal_flux_2.png differ
diff --git a/doc/doxygen/images/no_normal_flux_3.fig b/doc/doxygen/images/no_normal_flux_3.fig
new file mode 100644
index 0000000..dcc3c77
--- /dev/null
+++ b/doc/doxygen/images/no_normal_flux_3.fig
@@ -0,0 +1,28 @@
+#FIG 3.2  Produced by xfig version 3.2.5
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 7
+	 4275 1620 6345 2430 7335 4275 5490 5130 4905 3915 3735 3510
+	 4275 1620
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 4905 3915 6345 2430
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 4.00 60.00 120.00
+	 4300 1612 4615 802
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 4.00 60.00 120.00
+	 4285 1636 3465 1395
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 4
+	 4275 1620 4725 2835 5580 3240 6345 4725
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+	 4725 2835 4365 3735
+4 0 0 50 -1 19 12 0.0000 4 135 285 5175 1890 F 1\001
+4 0 0 50 -1 19 12 0.0000 4 135 285 3600 2565 F 2\001
+4 0 0 50 -1 19 12 0.0000 4 135 240 4995 2475 K1\001
+4 0 0 50 -1 19 12 0.0000 4 135 240 4140 2970 K2\001
diff --git a/doc/doxygen/images/no_normal_flux_3.png b/doc/doxygen/images/no_normal_flux_3.png
new file mode 100644
index 0000000..79c3125
Binary files /dev/null and b/doc/doxygen/images/no_normal_flux_3.png differ
diff --git a/doc/doxygen/images/no_normal_flux_4.fig b/doc/doxygen/images/no_normal_flux_4.fig
new file mode 100644
index 0000000..20f05de
--- /dev/null
+++ b/doc/doxygen/images/no_normal_flux_4.fig
@@ -0,0 +1,28 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 3
+	 4140 5040 4590 3690 5850 3150
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 3
+	 5310 6435 6705 5895 7155 4545
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 11
+	 2745 3510 4590 3690 5805 5130 7155 4545 5850 3150 4095 2970
+	 2745 3510 2790 4905 4140 5040 5310 6435 5805 5130
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 3.00 60.00 120.00
+	 4590 3690 4590 3150
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 3.00 60.00 120.00
+	 4590 3690 4905 3285
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 3.00 60.00 120.00
+	 4590 3690 4320 4095
+2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+	1 1 3.00 60.00 120.00
+	 4590 3690 4185 4050
diff --git a/doc/doxygen/images/no_normal_flux_4.png b/doc/doxygen/images/no_normal_flux_4.png
new file mode 100644
index 0000000..d11fdb4
Binary files /dev/null and b/doc/doxygen/images/no_normal_flux_4.png differ
diff --git a/doc/doxygen/images/no_normal_flux_5.png b/doc/doxygen/images/no_normal_flux_5.png
new file mode 100644
index 0000000..70e94d8
Binary files /dev/null and b/doc/doxygen/images/no_normal_flux_5.png differ
diff --git a/doc/doxygen/images/no_normal_flux_6.png b/doc/doxygen/images/no_normal_flux_6.png
new file mode 100644
index 0000000..497aefe
Binary files /dev/null and b/doc/doxygen/images/no_normal_flux_6.png differ
diff --git a/doc/doxygen/images/parameter_gui.png b/doc/doxygen/images/parameter_gui.png
new file mode 100644
index 0000000..204eab6
Binary files /dev/null and b/doc/doxygen/images/parameter_gui.png differ
diff --git a/doc/doxygen/images/parameter_handler.fig b/doc/doxygen/images/parameter_handler.fig
new file mode 100644
index 0000000..56d16d6
--- /dev/null
+++ b/doc/doxygen/images/parameter_handler.fig
@@ -0,0 +1,27 @@
+#FIG 3.2  Produced by xfig version 3.2.5b
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 5 0 1 0 -1 57 -1 -1 0.000 0 0 -1 0 0 5
+	0 parameter_handler_background.png
+	 450 315 10935 315 10935 5837 450 5837 450 315
+4 0 0 50 -1 19 12 0.0000 4 135 105 4500 1665 0\001
+4 0 0 50 -1 19 12 0.0000 4 195 3480 4500 5040 [Floating point range 0...1 (inclusive)]\001
+4 0 0 50 -1 19 12 0.0000 4 135 210 4500 1935 10\001
+4 0 0 50 -1 19 12 0.0000 4 195 3135 4500 1350 [Integer range 1...1000 (inclusive)]\001
+4 0 0 50 -1 19 12 0.0000 4 195 4650 4500 1080 A parameter that describes the maximal number...\001
+4 0 0 50 -1 19 12 0.0000 4 135 210 4500 810 10\001
+4 0 0 50 -1 19 12 0.0000 4 135 105 4500 5355 2\001
+4 0 0 50 -1 19 12 0.0000 4 135 105 4500 3645 1\001
+4 0 0 50 -1 19 12 0.0000 4 150 555 4500 3960 SSOR\001
+4 0 0 50 -1 19 12 0.0000 4 180 1230 4500 3375 SSOR|Jacobi\001
+4 0 0 50 -1 19 12 0.0000 4 195 2280 4500 3105 A string that describes...\001
+4 0 0 50 -1 19 12 0.0000 4 150 555 4500 2790 SSOR\001
+4 0 0 50 -1 19 12 0.0000 4 135 270 4500 4500 1.0\001
+4 0 0 50 -1 19 12 0.0000 4 135 270 4500 5625 1.0\001
+4 0 0 50 -1 19 12 0.0000 4 180 3825 4500 4770 The numerical value (between zero and...\001
diff --git a/doc/doxygen/images/parameter_handler.png b/doc/doxygen/images/parameter_handler.png
new file mode 100644
index 0000000..ca1ab96
Binary files /dev/null and b/doc/doxygen/images/parameter_handler.png differ
diff --git a/doc/doxygen/images/parameter_handler_background.png b/doc/doxygen/images/parameter_handler_background.png
new file mode 100644
index 0000000..46a08b4
Binary files /dev/null and b/doc/doxygen/images/parameter_handler_background.png differ
diff --git a/doc/doxygen/images/reorder_sparsity_step_31_boost_cmk.png b/doc/doxygen/images/reorder_sparsity_step_31_boost_cmk.png
new file mode 100644
index 0000000..2963cf6
Binary files /dev/null and b/doc/doxygen/images/reorder_sparsity_step_31_boost_cmk.png differ
diff --git a/doc/doxygen/images/reorder_sparsity_step_31_boost_king.png b/doc/doxygen/images/reorder_sparsity_step_31_boost_king.png
new file mode 100644
index 0000000..697651d
Binary files /dev/null and b/doc/doxygen/images/reorder_sparsity_step_31_boost_king.png differ
diff --git a/doc/doxygen/images/reorder_sparsity_step_31_boost_md.png b/doc/doxygen/images/reorder_sparsity_step_31_boost_md.png
new file mode 100644
index 0000000..0332362
Binary files /dev/null and b/doc/doxygen/images/reorder_sparsity_step_31_boost_md.png differ
diff --git a/doc/doxygen/images/reorder_sparsity_step_31_deal_cmk.png b/doc/doxygen/images/reorder_sparsity_step_31_deal_cmk.png
new file mode 100644
index 0000000..ee20fdb
Binary files /dev/null and b/doc/doxygen/images/reorder_sparsity_step_31_deal_cmk.png differ
diff --git a/doc/doxygen/images/reorder_sparsity_step_31_downstream.png b/doc/doxygen/images/reorder_sparsity_step_31_downstream.png
new file mode 100644
index 0000000..2bcc5aa
Binary files /dev/null and b/doc/doxygen/images/reorder_sparsity_step_31_downstream.png differ
diff --git a/doc/doxygen/images/reorder_sparsity_step_31_original.png b/doc/doxygen/images/reorder_sparsity_step_31_original.png
new file mode 100644
index 0000000..fd83778
Binary files /dev/null and b/doc/doxygen/images/reorder_sparsity_step_31_original.png differ
diff --git a/doc/doxygen/images/reorder_sparsity_step_31_random.png b/doc/doxygen/images/reorder_sparsity_step_31_random.png
new file mode 100644
index 0000000..9c640c0
Binary files /dev/null and b/doc/doxygen/images/reorder_sparsity_step_31_random.png differ
diff --git a/doc/doxygen/images/simplex_2d.png b/doc/doxygen/images/simplex_2d.png
new file mode 100644
index 0000000..f9403ac
Binary files /dev/null and b/doc/doxygen/images/simplex_2d.png differ
diff --git a/doc/doxygen/images/simplex_3d.png b/doc/doxygen/images/simplex_3d.png
new file mode 100644
index 0000000..5ab117b
Binary files /dev/null and b/doc/doxygen/images/simplex_3d.png differ
diff --git a/doc/doxygen/images/sphere.png b/doc/doxygen/images/sphere.png
new file mode 100644
index 0000000..04c24bd
Binary files /dev/null and b/doc/doxygen/images/sphere.png differ
diff --git a/doc/doxygen/images/sphere_section.png b/doc/doxygen/images/sphere_section.png
new file mode 100644
index 0000000..b064c2e
Binary files /dev/null and b/doc/doxygen/images/sphere_section.png differ
diff --git a/doc/doxygen/options.dox.in b/doc/doxygen/options.dox.in
new file mode 100644
index 0000000..4cce0f8
--- /dev/null
+++ b/doc/doxygen/options.dox.in
@@ -0,0 +1,235 @@
+# This file contains project-specific configurations for the 
+# deal.II documentation.
+
+PROJECT_NAME           = "The deal.II Library"
+PROJECT_NUMBER         = "Reference documentation for deal.II version @DEAL_II_PACKAGE_VERSION@"
+PROJECT_LOGO           = @CMAKE_CURRENT_SOURCE_DIR@/images/logo200.png
+OUTPUT_DIRECTORY       = .
+
+# The doxygen documentation says this about the following flag:
+#   If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+#   4096 sub-directories (in 2 levels) under the output directory of each output
+#   format and will distribute the generated files over these directories.
+#   Enabling this option can be useful when feeding doxygen a huge amount of
+#   source files, where putting all generated files in the same directory would
+#   otherwise cause performance problems for the file system.
+# We set the flag to NO since we occasionally need to reference individual
+# doxygen-generated files from other parts of the documentation and need
+# to have predictable file paths.
+#
+# The same is true for SHORT_NAMES, which compresses file names, and
+# CASE_SENSE_NAMES which allows file names to contain upper case letters.
+CREATE_SUBDIRS         = NO
+SHORT_NAMES            = NO
+CASE_SENSE_NAMES       = YES
+
+
+INLINE_INHERITED_MEMB  = NO
+STRIP_FROM_PATH        = @CMAKE_SOURCE_DIR@/include @CMAKE_BINARY_DIR@/include
+STRIP_FROM_INC_PATH    = @CMAKE_SOURCE_DIR@/include @CMAKE_BINARY_DIR@/include
+FULL_PATH_NAMES        = YES
+JAVADOC_AUTOBRIEF      = NO
+INHERIT_DOCS           = YES
+TAB_SIZE               = 8
+
+BUILTIN_STL_SUPPORT    = YES
+
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+EXTRACT_ALL            = NO
+EXTRACT_PRIVATE        = YES
+EXTRACT_STATIC         = YES
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = YES
+HIDE_UNDOC_CLASSES     = YES
+SORT_MEMBER_DOCS       = NO
+SORT_BRIEF_DOCS        = NO
+SORT_BY_SCOPE_NAME     = NO
+GENERATE_TODOLIST      = YES
+GENERATE_DEPRECATEDLIST= YES
+SHOW_USED_FILES        = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+QUIET                  = YES
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = NO
+WARN_IF_DOC_ERROR      = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+INPUT                  =
+RECURSIVE              = YES
+EXCLUDE_PATTERNS       = *.templates.h
+EXAMPLE_PATH           = @CMAKE_BINARY_DIR@/doc/doxygen/tutorial \
+                         @CMAKE_SOURCE_DIR@/examples/doxygen
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             =
+INPUT_FILTER           = ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/filter
+FILTER_SOURCE_FILES    = YES
+
+# Have some user defined commands that we can use in the documentation
+# and that expands to specific text. For some more transformations, see
+# the scripts/filter perl script that is run over the header files before
+# doxygen sees it and that can expand more complex expressions.
+ALIASES += dealiiRequiresUpdateFlags{1}="@note For this function to work properly, the underlying FEValues, FEFaceValues, or FESubfaceValues object on which you call it must have computed the information you are requesting. To do so, the <code>\1</code> flag must be an element of the list of UpdateFlags that you passed to the constructor of this object. See @ref UpdateFlags for more information."
+
+ALIASES += dealiiOperationIsMultithreaded="@note If deal.II is configured with threads, this operation will run multi-threaded by splitting the work into smaller chunks (assuming there is enough work to make this worthwhile)."
+
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+SOURCE_BROWSER         = YES
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+USE_HTAGS              = NO
+VERBATIM_HEADERS       = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+ALPHABETICAL_INDEX     = YES
+COLS_IN_ALPHA_INDEX    = 3
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+GENERATE_HTML          = YES
+HTML_OUTPUT            = deal.II
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = header.html
+HTML_FOOTER            = footer.html
+USE_MATHJAX            = @_use_mathjax@
+MATHJAX_RELPATH        = https://cdn.mathjax.org/mathjax/latest
+MATHJAX_EXTENSIONS     = TeX/AMSmath TeX/AMSsymbols
+HTML_COLORSTYLE_HUE    = 220
+HTML_COLORSTYLE_SAT    = 200
+HTML_EXTRA_STYLESHEET  = @CMAKE_CURRENT_SOURCE_DIR@/stylesheet.css
+LAYOUT_FILE            = @CMAKE_CURRENT_SOURCE_DIR@/DoxygenLayout.xml
+
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# don't generate latex output -- it's simply too big!
+GENERATE_LATEX         = NO
+
+# but set a few flags for when processing formulas embedded
+# in the documentation.
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = amsmath amsfonts
+LATEX_BATCHMODE        = YES
+
+#---------------------------------------------------------------------------
+# Other output formats
+#---------------------------------------------------------------------------
+
+GENERATE_RTF           = NO
+GENERATE_MAN           = NO
+GENERATE_XML           = YES
+GENERATE_AUTOGEN_DEF   = NO
+GENERATE_PERLMOD       = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = YES
+EXPAND_ONLY_PREDEF     = YES
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = @CMAKE_SOURCE_DIR@/include/deal.II
+INCLUDE_FILE_PATTERNS  =
+
+# set a few variables that help us generate documentation for
+# things like the PETSc and Trilinos wrappers, even if they were
+# not configured
+#
+# in the case of DEAL_II_NAMESPACE_OPEN/CLOSE, just expand them
+# to nothing so that they don't show up in the documentation
+# at all.
+PREDEFINED             = DOXYGEN=1 \
+                         DEBUG=1 \
+                         DEAL_II_USE_MT_POSIX=1 \
+                         DEAL_II_WITH_ARPACK=1 \
+                         DEAL_II_WITH_BOOST=1 \
+                         DEAL_II_WITH_CXX11=1 \
+                         DEAL_II_WITH_CXX14=1 \
+                         DEAL_II_WITH_HDF5=1 \
+                         DEAL_II_WITH_LAPACK=1 \
+                         DEAL_II_WITH_METIS=1 \
+                         DEAL_II_WITH_MPI=1 \
+                         DEAL_II_WITH_MUPARSER=1 \
+                         DEAL_II_WITH_NETCDF=1 \
+                         DEAL_II_WITH_OPENCASCADE=1 \
+                         DEAL_II_WITH_P4EST=1 \
+                         DEAL_II_WITH_PETSC=1 \
+                         DEAL_II_WITH_SLEPC=1 \
+                         DEAL_II_WITH_THREADS=1 \
+                         DEAL_II_WITH_TRILINOS=1 \
+                         DEAL_II_WITH_UMFPACK=1 \
+                         DEAL_II_WITH_ZLIB=1 \
+                         DEAL_II_NAMESPACE_OPEN= \
+                         DEAL_II_NAMESPACE_CLOSE=
+
+# do not expand exception declarations
+EXPAND_AS_DEFINED      = DeclException0 \
+                         DeclException1 \
+                         DeclException2 \
+                         DeclException3 \
+                         DeclException4 \
+                         DeclException5
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+GENERATE_TAGFILE       = deal.tag
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = NO
+HAVE_DOT               = YES
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = NO
+GROUP_GRAPHS           = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = YES
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+CALLER_GRAPH           = NO
+GRAPHICAL_HIERARCHY    = NO
+DIRECTORY_GRAPH        = YES
+DOT_IMAGE_FORMAT       = svg
+MAX_DOT_GRAPH_DEPTH    = 5
+DOT_TRANSPARENT        = NO
+GENERATE_LEGEND        = YES
+
+DOT_CLEANUP            = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+
+SEARCHENGINE           = YES
+SERVER_BASED_SEARCH    = NO
diff --git a/doc/doxygen/scripts/code-gallery.pl b/doc/doxygen/scripts/code-gallery.pl
new file mode 100644
index 0000000..f83aa6f
--- /dev/null
+++ b/doc/doxygen/scripts/code-gallery.pl
@@ -0,0 +1,68 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+use strict;
+
+my $gallery_file = shift;
+open GALLERY, "<$gallery_file";
+
+my $gallery_dir = shift;
+
+
+# Print the first part of gallery.h.in up until the point where we
+# find the line with '@@GALLERY_LIST@@'
+while (my $line = <GALLERY>)
+{
+  last if($line =~ m/\@\@GALLERY_LIST\@\@/);
+  print $line;
+}
+
+# print the list of code gallery programs as a descriptor/description
+# list
+print "<dl>\n";
+foreach my $gallery (sort @ARGV)
+{
+    my $gallery_underscore = $gallery;
+
+    open AUTHOR, "<$gallery_dir/$gallery/doc/author";
+    my $authors;
+    while (my $line = <AUTHOR>) {
+        chop $line;
+        $authors .= $line . ", ";
+    }
+
+    # remove trailing whitespaces, as well as the trailing comma
+    chop $authors;
+    $authors =~ s/,$//;
+
+    $gallery_underscore    =~ s/-/_/;
+    print "  <dt><b>\@ref code_gallery_${gallery_underscore} \"$gallery\"</b> (by $authors)</dt>\n";
+    print "    <dd>\n";
+    open TOOLTIP, "<$gallery_dir/$gallery/doc/tooltip";
+    while (my $line = <TOOLTIP>) {
+        print "      $line";
+    }
+    print "    </dd>\n";
+    print "\n";
+}
+print "</dl>\n";
+
+
+# Then print the rest of code-gallery.h.in
+while (my $line = <GALLERY>)
+{
+  print $line;
+}
+close GALLERY;
diff --git a/doc/doxygen/scripts/create_anchors b/doc/doxygen/scripts/create_anchors
new file mode 100644
index 0000000..c7ef23a
--- /dev/null
+++ b/doc/doxygen/scripts/create_anchors
@@ -0,0 +1,31 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2007 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+# If we find a heading in a .dox file, create an HTML anchor for it.
+
+while (<>) {
+    if ( /<h.>(.*)<\/h.>\s*/ ) {
+	$reftext = $1;
+
+	# for the anchor, use the name of the section but discard
+	# everything except for letters, numbers, and underscores
+	$reftext =~ s/[^a-zA-Z0-9_]//g;
+
+	print "<a name=\"$reftext\"></a>$_\n";
+    } else {
+        print;
+    }
+}
diff --git a/doc/doxygen/scripts/filter b/doc/doxygen/scripts/filter
new file mode 100755
index 0000000..afddfc4
--- /dev/null
+++ b/doc/doxygen/scripts/filter
@@ -0,0 +1,125 @@
+#!/usr/bin/perl -p
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2007 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+# make sure we can just write $...$ for formulas.
+s/\$/\@f\$/g;
+
+# however, undo that change if the dollar sign was escaped with a backslash
+s/\\\@f\$/\$/g;
+
+# We don't let doxygen put everything into a namespace
+# dealii. consequently, doxygen can't link references that contain an
+# explicit dealii:: qualification. remove it and replace it by the
+# global scope
+#
+# Now, as of doxygen 1.5, this still produces the wrong result, but
+# that's not our fault. This is reported here:
+#    http://bugzilla.gnome.org/show_bug.cgi?id=365053
+s/(::)?dealii::/::/g;
+
+# Replace all occurrences of something like step-xx by
+#    @ref step_xx "step-xx"
+# so that doxygen properly cross references them. Before we had
+# this rule, we actually had to write this sequence out in our
+# documentation. Unfortunately, as a consequence, there are vestiges
+# of this style, so we can't substitute things that look like
+# "step-xx". We therefore not substitute if step-xx is preceded or
+# followed by quotation marks, or if the text is explicitly
+# preceded by a backslash for escaping.
+#
+# There are other exceptions:
+# - the scripts in doc/doxygen/tutorial produce files that have
+#   table of contents entries. We don't want these cross-linked
+#   to itself.
+# - things like step-12.solution.png that typically appear in
+#   @image commands.
+# - things in headings
+s/(?<![\"\\\/])step-(\d+)(?!\")/\@ref step_\1 \"step-\1\"/gi
+  if !m/(\@page|\<img|\@image|<h\d>)/i;
+
+# If step-xx was explicitly escaped with a backslash, remove the
+# latter
+s/\\(step-\d+)/\1/g;
+
+
+s#(static dealii::ExceptionBase\&)#\n//\! \@ingroup Exceptions\n \1#g;
+
+# doxygen version 1.7.1 and later have the habit of thinking that
+# everything that starts with "file:" is the beginning of a link,
+# but we occasionally use this in our tutorials in the form
+# "...this functionality is declared in the following header file:",
+# where it leads to a non-functional link. We can avoid the problem
+# by replacing a "file:" at the end of a line with the text
+# "file :", which doxygen doesn't recognize:
+s#file:[ \t]*$#file :#g;
+
+
+
+# Handle commands such as @dealiiVideoLecture{20.5,33} by expanding it
+# into a note with some text
+if (m/(\@dealiiVideoLecture\{([0-9\.]+)((, *[0-9\.]+ *)*)\})/)
+{
+    $substext = $1;
+
+    $text = "\@note The material presented here is also discussed in ";
+
+    # add links to the individual lectures
+    $text = $text . "<a href=\"http://www.math.tamu.edu/~bangerth/videos.676.$2.html\">video lecture $2</a>";
+    
+    if (length($3) > 0)
+    {
+        # if it is a list of lectures, also list the others.
+	$x = $3;
+	$x =~ s/^, *//g;
+	@otherlectures = split (',', "$x");
+
+	foreach $lecture (@otherlectures)
+	{
+	    $text = $text . ", <a href=\"http://www.math.tamu.edu/~bangerth/videos.676.$lecture.html\">video lecture $lecture</a>";
+	}
+    }
+
+    $text = $text . ". (All video lectures are also available <a href=\"http://www.math.tamu.edu/~bangerth/videos.html\">here</a>.)";
+    s/(\@dealiiVideoLecture\{([0-9\.]+)((, *[0-9\.]+ *)*)\})/$text/;
+}
+
+
+# @dealiiVideoLectureSeeAlso works as above, but just expands into
+# regular text, no @note
+if (m/(\@dealiiVideoLectureSeeAlso\{([0-9\.]+)((, *[0-9\.]+ *)*)\})/)
+{
+    $substext = $1;
+
+    $text = "See also ";
+
+    # add links to the individual lectures
+    $text = $text . "<a href=\"http://www.math.tamu.edu/~bangerth/videos.676.$2.html\">video lecture $2</a>";
+    
+    if (length($3) > 0)
+    {
+	$x = $3;
+	$x =~ s/^, *//g;
+	@otherlectures = split (',', "$x");
+
+	foreach $lecture (@otherlectures)
+	{
+	    $text = $text . ", <a href=\"http://www.math.tamu.edu/~bangerth/videos.676.$lecture.html\">video lecture $lecture</a>";
+	}
+    }
+
+    $text = $text . ".";
+    s/(\@dealiiVideoLectureSeeAlso\{([0-9\.]+)((, *[0-9\.]+ *)*)\})/$text/;
+}
diff --git a/doc/doxygen/scripts/intro2toc b/doc/doxygen/scripts/intro2toc
new file mode 100644
index 0000000..29164ed
--- /dev/null
+++ b/doc/doxygen/scripts/intro2toc
@@ -0,0 +1,67 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+print "    <ul>\n";
+
+$level = 3;
+while (<>) {
+    if ( /<h(.)>(.*)<\/h.>\s*/ ) {
+	$newlevel = $1;
+	$text = $2;
+	# only allow header levels 3 through 6, since higher ones are
+	# reserved for top-level document headers
+        #
+	if (($newlevel =~ /[3456]/)) {
+
+          if ($newlevel > $level) {
+              for ($i=$level; $i<$newlevel; ++$i) {
+                  print "      <ul>\n";
+              }
+          } elsif ($newlevel < $level) {
+              for ($i=$newlevel; $i<$level; ++$i) {
+                  print "      </ul>\n";
+              }
+          }
+
+          $reftext = $text;
+
+          # for the anchor, use the name of the section but discard
+          # everything except for letters, numbers, and underscores
+          $reftext =~ s/[^a-zA-Z0-9_]//g;
+
+          # replace quotation marks by the appropriate HTML quotation marks
+          $text =~ s!``!“!g;
+          $text =~ s!''!”!g;
+
+          # replace double dashes in comments by —
+          $text =~ s!--!—!g;
+
+          # we sometimes escape words with % (as in "%Boundary
+          # conditions") because doxygen would otherwise link the word
+          # to a class name. This isn't necessary in the index because
+          # the text already appears in a hyperref, so get rid of the
+          # percent sign
+          $text =~ s!\%!!g;
+
+          print "        <li><a href=\"#$reftext\">$text</a>\n";
+
+          $level = $newlevel;
+       }
+    }
+}
+
+for (; $level>=3; --$level) {
+    print "    </ul>\n";
+}
diff --git a/doc/doxygen/scripts/make_gallery.pl b/doc/doxygen/scripts/make_gallery.pl
new file mode 100644
index 0000000..06a2ebf
--- /dev/null
+++ b/doc/doxygen/scripts/make_gallery.pl
@@ -0,0 +1,164 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013, 2015, 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+if ($#ARGV < 1) {
+  print "\nUsage: make_gallery.pl cmake_source_dir gallery_name gallery_dir gallery_src_files...\n";
+  exit;
+}
+
+my $cmake_source_dir = shift(@ARGV);
+
+my $gallery = shift(@ARGV);
+my $gallery_underscore = $gallery;
+$gallery_underscore    =~ s/-/_/;
+
+my $gallery_dir = shift(@ARGV);
+my $author_file = "$gallery_dir/doc/author";
+
+# next get the source files. sort all markdown files
+# first so that we get to show them first. this makes
+# sense because markdown files typically provide
+# overview information
+my @src_files = grep { $_ =~ m/.*\.(md|markdown)/ } @ARGV;
+push @src_files, sort(grep { !($_ =~ m/.*\.(md|markdown)/) }@ARGV);
+
+
+# read the names of authors; escape '<' and '>' as they
+# appear in the email address. also trim trailing space and
+# newlines
+open AUTHORS, "<$author_file";
+my $authors = <AUTHORS>;
+$authors    =~ s/</</g; 
+$authors    =~ s/>/>/g; 
+$authors    =~ s/\s*$//g;
+
+print
+"/**
+  * \@page code_gallery_$gallery_underscore The $gallery code gallery program
+\@htmlonly
+<p align=\"center\"> 
+  This program was contributed by $authors.
+  <br>
+  It comes without any warranty or support by its authors or the authors of deal.II.
+</p>
+
+\@endhtmlonly
+
+This program is part of the \@ref CodeGallery \"deal.II code gallery\" and
+consists of the following files (click to inspect):
+";
+
+foreach my $file (@src_files)
+{ 
+  print "- <a href=\"../code-gallery/$gallery/$file\">$file</a>\n";
+  if ($file =~ /.*\.(md|markdown|cc|cpp|cxx|c\+\+|h|hh|hxx)/) 
+  {
+      print "  (<a href=\"#ann-$file\">annotated version</a>)\n";
+  }
+}
+print "\n";
+
+
+# Next go through the list of files and see whether any of these are
+# pictures we could show here:
+my @picture_files;
+foreach my $file (@src_files)
+{ 
+    if ($file =~ /.*\.(png|jpg|gif|svg)/)
+    {
+        push @picture_files, $file;
+    }
+}
+
+if (@picture_files)
+{
+    print "<h1>Pictures from this code gallery program</h1>\n";
+    print "<p align=\"center\">\n";
+    print "<table>\n";
+
+    # print four pictures per row
+    while (@picture_files)
+    {
+        print "     <tr>\n";
+        for my $i (0 .. 3)
+        {
+            if (@picture_files) 
+            {
+                print "       <td>\n";
+                my $pic = pop(@picture_files);
+                print "         <img width=\"250\" src=\"../code-gallery/$gallery/$pic\">\n";
+                print "       </td>\n";
+            }
+        }
+        print "     </tr>\n";
+    }
+
+    print "</table>\n";
+    print "</p>\n";
+}
+
+
+# Then go through the list of files again and see which ones we can
+# annotate and copy into the current document
+foreach my $file (@src_files)
+{ 
+    # just copy markdown files as-is, but make sure we update links
+    # that may be inlined. doxygen doesn't seem to understand the
+    # ```...``` form of offset commands, so keep track of that as
+    # well
+    if ($file =~ /.*\.(md|markdown)/)
+    {
+        print "<a name=\"ann-$file\"></a>\n";
+        print "<h1>Annotated version of $file</h1>\n";
+
+        open MD, "<$gallery_dir/$file";
+        my $incode = 0;
+        while ($line = <MD>) 
+        {
+            # replace ``` markdown commands by doxygen equivalents
+            while ($line =~ m/```/)
+            {
+                if ($incode == 0) {
+                    $line =~ s/```/\@code{.sh}/;
+                    $incode = 1;
+                } else {
+                    $line =~ s/```/\@endcode/;
+                    $incode = 0;
+                }
+            }
+
+            # update markdown links of the form "[text](./filename)"
+            $line =~ s/(\[.*\])\(.\//\1\(..\/code-gallery\/$gallery\//g;
+            print "$line";
+        }
+
+        print "\n\n";
+    }
+
+    # annotate source files
+    if ($file =~ /.*\.(cc|cpp|cxx|c\+\+|h|hh|hxx)/)
+    {
+        print "<a name=\"ann-$file\"></a>\n";
+        print "<h1>Annotated version of $file</h1>\n";
+
+        system $^X, "$cmake_source_dir/doc/doxygen/scripts/program2doxygen", "$gallery_dir/$file";
+
+        print "\n\n";
+    }
+}
+
+
+# end the doxygen input file
+print "*/\n";
diff --git a/doc/doxygen/scripts/make_step.pl b/doc/doxygen/scripts/make_step.pl
new file mode 100644
index 0000000..36907f9
--- /dev/null
+++ b/doc/doxygen/scripts/make_step.pl
@@ -0,0 +1,70 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013, 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+if ($#ARGV != 1) {
+  print "\nUsage: make_step.pl step cmake_source_dir\n";
+  exit;
+}
+
+$step=$ARGV[0];
+$step_underscore=$step;
+$step_underscore=~ s/-/_/;
+
+$cmake_source_dir=$ARGV[1];
+
+print
+"/**
+  * \@page $step_underscore The $step tutorial program
+\@htmlonly
+<table class=\"tutorial\" width=\"50%\">
+<tr><th colspan=\"2\"><b><small>Table of contents</small></b></th></tr>
+<tr><td width=\"50%\" valign=\"top\">
+<ol>
+  <li> <a href=\"#Intro\" class=bold>Introduction</a>
+";
+
+system $^X, "$cmake_source_dir/doc/doxygen/scripts/intro2toc", "$cmake_source_dir/examples/$step/doc/intro.dox";
+
+print "  <li> <a href=\"#CommProg\" class=bold>The commented program</a>\n";
+
+system $^X, "$cmake_source_dir/doc/doxygen/scripts/program2toc", "$cmake_source_dir/examples/$step/$step.cc";
+
+print
+"</ol></td><td width=\"50%\" valign=\"top\"><ol>
+  <li value=\"3\"> <a href=\"#Results\" class=bold>Results</a>
+";
+
+system $^X, "$cmake_source_dir/doc/doxygen/scripts/intro2toc", "$cmake_source_dir/examples/$step/doc/results.dox";
+
+print
+"  <li> <a href=\"#PlainProg\" class=bold>The plain program</a>
+</ol> </td> </tr> </table>
+\@endhtmlonly
+";
+
+system $^X, "$cmake_source_dir/doc/doxygen/scripts/create_anchors", "$cmake_source_dir/examples/$step/doc/intro.dox";
+
+print " * <a name=\"CommProg\"></a>\n";
+print " * <h1> The commented program</h1>\n";
+system $^X, "$cmake_source_dir/doc/doxygen/scripts/program2doxygen", "$cmake_source_dir/examples/$step/$step.cc";
+
+system $^X, "$cmake_source_dir/doc/doxygen/scripts/create_anchors", "$cmake_source_dir/examples/$step/doc/results.dox";
+
+print
+"<a name=\"PlainProg\"></a>
+<h1> The plain program</h1>
+\@include \"$step.cc\"
+ */
+";
diff --git a/doc/doxygen/scripts/mod_footer.pl.in b/doc/doxygen/scripts/mod_footer.pl.in
new file mode 100644
index 0000000..ab562cb
--- /dev/null
+++ b/doc/doxygen/scripts/mod_footer.pl.in
@@ -0,0 +1,15 @@
+
+use Sys::Hostname;
+my $host = hostname;
+
+my $hosting = << 'EOT'
+  Hosting provided by 
+<a href="http://www.iwr.uni-heidelberg.de/"><img src="http://www.dealii.org/pictures/IWRlogo4.png" alt="IWR"></a>
+<a href="http://www.uni-heidelberg.de/"><img src="http://www.dealii.org/pictures/UniLogo4.png" alt="Universität Heidelberg"></a>
+EOT
+    ;
+
+if ($host eq "simweb")
+{
+    s/\$doxygenversion/\$doxygenversion $hosting/;
+}
diff --git a/doc/doxygen/scripts/mod_header.pl.in b/doc/doxygen/scripts/mod_header.pl.in
new file mode 100644
index 0000000..3630f4b
--- /dev/null
+++ b/doc/doxygen/scripts/mod_header.pl.in
@@ -0,0 +1,15 @@
+
+
+# Modify these to enter the current data automatically
+my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime;
+$year += 1900;
+
+if (m'</head>')
+{
+    print '<link rel="SHORTCUT ICON" href="deal.ico"></link>', "\n";
+    print '<meta name="author" content="The deal.II Authors <authors at dealii.org>"></meta>', "\n";
+    print '<meta name="copyright" content="Copyright (C) 1998 - ', $year, ' by the deal.II authors"></meta>', "\n";
+    print '<meta name="deal.II-version" content="@DEAL_II_PACKAGE_VERSION@"></meta>', "\n";
+}
+
+s/\$projectname// unless (m/<title>/);
diff --git a/doc/doxygen/scripts/program2doxygen b/doc/doxygen/scripts/program2doxygen
new file mode 100644
index 0000000..4e4be27
--- /dev/null
+++ b/doc/doxygen/scripts/program2doxygen
@@ -0,0 +1,121 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+# skip header lines at the top of the file, such as copyright notices 
+# and license information, if the file is a step-xx.cc tutorial. don't
+# skip for other files such as code-gallery files
+if ($ARGV[0] =~ /step-\d+.cc/)
+{
+  $_ = <>;
+  while ( m!^/\*!  ||  m!\s*\*! || m/^$/ ) {
+      $_ = <>;
+  }
+}
+
+
+# have three states, in which the program can be:
+# comment-mode, program-mode and skip-mode
+$comment_mode = 0;
+$program_mode = 1;
+$skip_mode    = 2;
+$state =  $comment_mode;
+
+print " * \n";
+
+do {
+    # substitute tabs. also make sure that we don't output comment end
+    # markers that might confuse doxygen
+    s!/\*!/ \*!g;
+    s!\*/!\* /!g;
+
+    s/\t/        /g;
+
+    # We shall skip something...
+    if (($state != $skip_mode) && m!^\s*//\s*\@cond SKIP!)
+    {
+        # Cleanly close @code segments in program mode:
+        if ($state == $program_mode)
+        {
+            print " * \@endcode\n";
+            print " * \n";
+        }
+        $state = $skip_mode;
+    }
+    elsif (($state == $program_mode) && m!^\s*//!)
+    {
+        print " * \@endcode\n";
+        print " * \n";
+        $state = $comment_mode;
+    }
+    # if in comment mode and no comment line: toggle state.
+    # don't do so, if only a blank line
+    elsif (($state == $comment_mode) && !m!^\s*//! && !m!^\s*$!)
+    {
+        print " * \n";
+        print " * \@code\n";
+        $state = $program_mode;
+    }
+
+    if ($state == $comment_mode)
+    {
+        # in comment mode: first skip leading whitespace and
+        # comment // signs
+        s!\s*//\s*(.*)\n!$1!;
+
+        # second, replace section headers, and generate addressable
+        # anchor
+        if ( /\@sect/ ) {
+           s!\@sect(\d)\{(.*)\}\s*$!<h$1>$2</h$1>!g;
+           $sect_name = $2;
+
+           # for the anchor, use the name of the section but discard
+           # everything except for letters, numbers, and underscores
+           $sect_name =~ s/[^a-zA-Z0-9_]//g;
+
+           $_ = "\n * <a name=\"$sect_name\"></a> \n * $_";
+        }
+
+        # finally print this line
+        print " * $_\n";
+
+        # if empty line, introduce paragraph break
+        print " * \n" if  $_ =~ m!^\s*$!;
+    }
+    elsif ($state == $program_mode)
+    {
+        # in program mode, output the program line. the only thing we need
+        # to do is to avoid $ signs because that confuses doxygen. since
+        # we don't want formulas rendered in the program text anyway,
+        # simply replace them by spaces (it would be nice to suppress their
+        # meaning somehow, but I don't know how...)
+        s/\$//g;
+
+        print " * $_";
+    }
+    elsif ($state == $skip_mode)
+    {
+        # This is the end of a @cond - @endcond block, so back to
+        # comment_mode:
+        if (m!^\s*//\s*\@endcond!)
+        {
+            $state = $comment_mode;
+        }
+    }
+} while (<>);
+
+if ($state == $program_mode) {
+   print " * \@endcode\n";
+}
diff --git a/doc/doxygen/scripts/program2doxyplain b/doc/doxygen/scripts/program2doxyplain
new file mode 100644
index 0000000..21f99f8
--- /dev/null
+++ b/doc/doxygen/scripts/program2doxyplain
@@ -0,0 +1,51 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2013, 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+print "\n";
+
+
+while (<>) {
+    # simply print non-comment lines and let doxygen do all the work.
+    # the only thing we have to make sure is that if we copy /*...*/
+    # comments that doxygen doesn't get confused; do so by simply
+    # adding a space
+    #
+    # also remove all $ signs to avoid confusing doxygen with what
+    # may or may not be a formula
+    if ( ! m!^\s*//! ) {
+        s!/\*!/ \*!g;
+        s!\*/!\* /!g;
+
+        s!\$!!g;
+
+	print " * $_";
+    } else {
+	# for comments, all we do is replace section headers, and
+	# generate addressable anchors
+	if ( /\@sect/ ) {
+	   m!\@sect(\d)\{(.*)\}\s*$!;
+	   $sect_name = $2;
+
+	   # for the anchor, use the name of the section but discard
+	   # everything except for letters, numbers, and underscores
+	   $sect_name =~ s/[^a-zA-Z0-9_]//g;
+
+	   print "\@endcode\n";
+	   print " <a name=\"plain-$sect_name\"></a>\n";
+	   print "\@code\n";
+	}
+    }	
+}
diff --git a/doc/doxygen/scripts/program2html b/doc/doxygen/scripts/program2html
new file mode 100644
index 0000000..7219638
--- /dev/null
+++ b/doc/doxygen/scripts/program2html
@@ -0,0 +1,99 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+print "<a name=\"CommProg\"></a>\n";
+print "<h1> The commented program</h1>\n";
+
+# ignore comments at the start of the program. this includes subversion
+# tags and copyright notices.
+$_ = <>;
+while ( m!^/[/\*]!  ||  m!\s*\*! || m/^$/ ) {
+    $_ = <>;
+}
+
+# have two states, in which the program can be:
+# comment-mode and program-mode
+$comment_mode = 0;
+$program_mode = 1;
+$state =  $comment_mode;
+
+print "<p>\n";
+
+do {
+    # substitute special characters
+    s/&/&/g;
+    s/</</g;
+    s/>/>/g;
+    s/\t/        /g;
+
+    if (($state == $program_mode) && m!^\s*//!)
+    {
+	$state = $comment_mode;
+	print "</code></pre>\n";
+	print "\n";
+	print "<p>\n";
+    }
+    # if in comment mode and no comment line: toggle state.
+    # don't do so, if only a blank line
+    elsif (($state == $comment_mode) && !m!^\s*//! && !m!^\s*$!)
+    {
+	$state = $program_mode;
+	print "</p>\n";
+	print "\n";
+	print "<pre><code>\n";
+    }
+
+    if ($state == $comment_mode)
+    {
+	# in comment mode: first skip leading whitespace and
+	# comment // signs
+	s!\s*//\s*(.*)\n!$1!;
+
+	# second, replace section headers, and generate addressable
+	# anchor
+	if ( /\@sect/ ) {
+	   s!\@sect(\d)\{(.*)\}\s*$!<h$1>$2</h$1>!g;
+	   $sect_name = $2;
+
+	   # for the anchor, use the name of the section but discard
+	   # everything except for letters, numbers, and underscores
+	   $sect_name =~ s/[^a-zA-Z0-9_]//g;
+
+	   $_ = "\n<a name=\"$sect_name\"></a>" . $_;
+	}
+
+	# replace quotation marks by the appropriate HTML quotation marks
+	s!``!“!g;
+	s!''!”!g;
+
+        # replace double dashes in comments by —
+	s!--!—!g;
+
+	# finally print this line
+	print $_, "\n";
+
+	# if empty line, introduce paragraph break
+	print "</p>\n\n<p>" if  $_ =~ m!^\s*$!;
+    }
+    else
+    {
+	print "        $_";
+    }
+} while (<>);
+
+if ($state == $program_mode) {
+   print "</code></pre>\n";
+}
+
diff --git a/doc/doxygen/scripts/program2plain b/doc/doxygen/scripts/program2plain
new file mode 100644
index 0000000..251a118
--- /dev/null
+++ b/doc/doxygen/scripts/program2plain
@@ -0,0 +1,28 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2010 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+# Remove all comments from the source file
+
+# The variable tracing whether we are inside a block comment
+
+my $block_comment = 0;
+while (<>) {
+    # Eliminate //-comment lines
+    next if (m!^\s*//!);
+
+    # Otherwise print the line
+    print;
+}	
+
diff --git a/doc/doxygen/scripts/program2toc b/doc/doxygen/scripts/program2toc
new file mode 100644
index 0000000..49dc4e5
--- /dev/null
+++ b/doc/doxygen/scripts/program2toc
@@ -0,0 +1,63 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+print "    <ul>\n";
+
+$level = 3;
+while (<>) {
+    if ( /\@sect(.)\{(.*)\}/ ) {
+	$newlevel = $1;
+	$text = $2;
+	
+	# only allow header levels 3 through 6, since higher ones are
+	# reserved for top-level document headers
+	if (! ($newlevel =~ /[3456]/)) {
+	    print STDERR "Only header levels 3 through 6 are allowed.\n";
+	    print STDERR "You had $newlevel.\n";
+	    die;
+	}
+
+	if ($newlevel > $level) {
+	    for ($i=$level; $i<$newlevel; ++$i) {
+	        print "      <ul>\n";
+            }
+	} elsif ($newlevel < $level) {
+	    for ($i=$newlevel; $i<$level; ++$i) {
+	        print "      </ul>\n";
+            }
+	}
+
+	$reftext = $text;
+
+	# for the anchor, use the name of the section but discard
+	# everything except for letters, numbers, and underscores
+	$reftext =~ s/[^a-zA-Z0-9_]//g;
+
+	# replace quotation marks by the appropriate HTML quotation marks
+	$text =~ s!``!“!g;
+	$text =~ s!''!”!g;
+
+        # replace double dashes in comments by —
+	$text =~ s!--!—!g;
+
+	print "        <li><a href=\"#$reftext\">$text</a>\n";
+
+	$level = $newlevel;
+    } 
+}
+
+for (; $level>=3; --$level) {
+    print "      </ul>\n";
+}
diff --git a/doc/doxygen/scripts/steps.pl b/doc/doxygen/scripts/steps.pl
new file mode 100644
index 0000000..41a9f59
--- /dev/null
+++ b/doc/doxygen/scripts/steps.pl
@@ -0,0 +1,230 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+use strict;
+
+my $tutorial_file = shift;
+open TUTORIAL, "<$tutorial_file";
+
+# Print the first part of tutorial.h.in up until the point where we
+# find the line with '@@TUTORIAL_MAP@@'
+while (my $line = <TUTORIAL>)
+{
+  last if($line =~ m/\@\@TUTORIAL_MAP\@\@/);
+  print $line;
+}
+
+# List of additional node attributes to highlight purpose and state of the example
+my %style = (
+ "basic"          => ',height=.8,width=.8,shape="octagon",fillcolor="green"',
+ "techniques"     => ',height=.35,width=.35,fillcolor="orange"',
+ "fluids"         => ',height=.25,width=.25,fillcolor="yellow"',
+ "solids"         => ',height=.25,width=.25,fillcolor="lightblue"',
+ "time dependent" => ',height=.25,width=.25,fillcolor="blue"',
+ "unfinished"     => ',height=.25,width=.25,style="dashed"',
+ "code-gallery"   => ',height=.08,width=.125,shape="circle"',
+    );
+
+# Print a preamble setting common attributes
+print << 'EOT'
+digraph StepsMap
+{
+  overlap=false;
+  edge [fontname="FreeSans",
+        fontsize="10",
+        labelfontname="FreeSans",
+        labelfontsize="10",
+        color="black",
+        style="solid"];
+  node [fontname="FreeSans",
+        fontsize="10",
+        shape="rectangle",
+        height=0.2,
+        width=0.4,
+        color="black",
+        fillcolor="white",
+        style="filled"];
+EOT
+    ;
+
+# Print all nodes of the graph by looping over the remaining
+# command line arguments denoting the tutorial programs
+
+my $step;
+foreach $step (@ARGV)
+{
+    # read first line of tooltip file
+    open TF, "$step/doc/tooltip"
+        or die "Can't open tooltip file $step/doc/tooltip";
+    my $tooltip = <TF>;
+    close TF;
+    chop $tooltip;
+
+    # read first line of 'kind' file if it is a step;
+    # otherwise assume it is a code gallery program. for
+    # each of them, output something for 'dot' to generate
+    # the dependencies graph from
+    if ($step =~ /step-/
+        &&
+        !($step =~ /code-gallery/))
+    {
+      open KF, "$step/doc/kind"
+          or die "Can't open kind file $step/doc/kind";
+      my $kind = <KF>;
+      chop $kind;
+      close KF;
+
+      die "Unknown kind '$kind' in file $step/doc/kind" if (! defined $style{$kind});
+
+      my $number = $step;
+      $number =~ s/^.*-//;
+
+      printf "  Step$number [label=\"$number\", URL=\"\\ref step_$number\", tooltip=\"$tooltip\"";
+      print "$style{$kind}";
+    }
+    else
+    {
+      # get at the name of the program; also create something
+      # that can serve as a tag without using special characters
+      my $name = $step;
+      $name =~ s/^.*code-gallery\///;
+      my $tag = $name;
+      $tag =~ s/[^a-zA-Z]/_/g;
+
+      printf "  code_gallery_$tag [label=\"\", URL=\"\\ref code_gallery_$tag\", tooltip=\"$tooltip\"";
+      my $kind = "code-gallery";
+      print "$style{$kind}";
+    }
+
+    print "];\n";
+}
+
+# Print all edges by going over the same list of tutorials again.
+# Keep sorted by second node on edge!
+
+my $step;
+foreach $step (@ARGV)
+{
+    # read first line of dependency file
+    open BF, "$step/doc/builds-on"
+        or die "Can't open builds-on file $step/doc/builds-on";
+    my $buildson = <BF>;
+    close BF;
+    chop $buildson;
+
+    my $destination;
+    if ($step =~ /step-/
+        &&
+        !($step =~ /code-gallery/))
+    {
+      my $number = $step;
+      $number =~ s/^.*-//;
+      $destination = "Step$number";
+    }
+    else
+    {
+      my $name = $step;
+      $name =~ s/^.*code-gallery\///;
+      my $tag = $name;
+      $tag =~ s/[^a-zA-Z]/_/g;
+      $destination = "code_gallery_$tag";
+    }
+
+    my $source;
+    foreach $source (split ' ', $buildson) {
+        $source =~ s/step-/Step/g;
+        print "  $source -> $destination";
+        if ($destination =~ /code_gallery/)
+        {
+            print " [style=\"dashed\", arrowhead=\"empty\"]";
+        }
+        print "\n";
+    }
+}
+
+print "}\n";
+
+# Copy that part of tutorial.h.in up until the point where we
+# find the line with '@@TUTORIAL_LEGEND@@'
+while (my $line = <TUTORIAL>)
+{
+  last if($line =~ m/\@\@TUTORIAL_LEGEND\@\@/);
+  print $line;
+}
+
+# Print a preamble setting common attributes
+print << 'EOT'
+graph StepsDescription
+{
+  overlap=false;
+  edge [fontname="FreeSans",
+        fontsize="10",
+        labelfontname="FreeSans",
+        labelfontsize="10",
+        color="black",
+        style="solid"];
+  node [fontname="FreeSans",
+        fontsize="10",
+        shape="rectangle",
+        height=0.2,
+        width=0.4,
+        color="black",
+        fillcolor="white",
+        style="filled"];
+EOT
+    ;
+
+my %kind_descriptions = (
+ "basic"          => 'Basic techniques',
+ "techniques"     => 'Advanced techniques',
+ "fluids"         => 'Fluid dynamics',
+ "solids"         => 'Solid mechanics',
+ "time dependent" => 'Time dependent problems',
+ "unfinished"     => 'Unfinished codes',
+ "code-gallery"   => 'Code gallery',
+    );
+
+# for each kind, print a box in the same style as used in
+# the connections graph; also print a fake box with a
+# description of what each kind is. then connect these
+my $kind;
+foreach $kind (keys %style)
+{
+    my $escaped_kind = $kind;
+    $escaped_kind =~ s/[^a-zA-Z]/_/g;
+    printf "  $escaped_kind [label=\"\" $style{$kind}];\n";
+    printf "  fake_$escaped_kind [label=\"$kind_descriptions{$kind}\", shape=plaintext];\n";
+    printf "  $escaped_kind -- fake_$escaped_kind [style=dotted, arrowhead=odot, arrowsize=1];\n";
+}
+# now add connections to make sure they appear nicely next to each other
+# in the legend
+print "  basic -- techniques -- fluids -- solids -- time_dependent -- unfinished -- code_gallery;\n";
+
+# we need to tell 'dot' that all of these are at the same
+# rank to ensure they appear next to (as opposed to atop)
+# each other
+print "  {rank=same; basic, techniques, fluids, solids, time_dependent, unfinished, code_gallery}";
+
+# end the graph
+print "}\n";
+
+
+
+# Then print the rest of tutorial.h.in
+while (my $line = <TUTORIAL>)
+{
+  print $line;
+}
+close TUTORIAL;
diff --git a/doc/doxygen/scripts/validate-xrefs.pl b/doc/doxygen/scripts/validate-xrefs.pl
new file mode 100644
index 0000000..e157fe8
--- /dev/null
+++ b/doc/doxygen/scripts/validate-xrefs.pl
@@ -0,0 +1,157 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Author: Wolfgang Bangerth, Guido Kanschat 2000, 2004
+#
+# Check whether references in HTML files are valid or
+# point to non-existing files/links/etc
+#
+
+
+# set this to 1 if you want verbose output
+$debug = 0;
+
+$startdir = `pwd`;
+chop $startdir;
+
+foreach $filename (@ARGV)
+{
+    chdir $startdir || die "Could not change dir to $startdir\n";
+    open IN, $filename
+        or die "---Can't open file `$filename'\n";
+
+    print "File: $filename\n" if $debug;
+    if ($filename =~ m!(.+)/([^/]+)!)
+    {
+	chdir $1;
+	$filename = $2;
+    }
+
+    while (<IN>) {
+	# save the entire line for simpler grepping when an error
+	# occurs
+	$this_line = $_;
+
+	# if line ends with an = character, the concatenate it with the next
+	# one
+	while ( /=\s*$/ ) {
+	    $newline = <IN>;
+	    $newline =~ s/^\s*//g;
+	    $_ = $thisline . $newline;
+	    $this_line = $_;
+        }
+
+        # first find all hrefs
+        while ( /<\s*a\s+href=\"?(.*?)[\s\"]/gi ) {
+	    # then decide whether they are relevant for
+            # our purpose
+	    $link = $1;
+
+	    if ( $link =~ /^mailto|http(s)?:\/\//i ) {
+	        # link is external. don't check it
+	        print "external link: $link\n" if $debug;
+	        next;
+	    }
+	    elsif ( $link =~ m/^#(.*)/ )
+		{
+			# this is a reference within this file. try to
+	        # find its anchor
+	        $internal_ref = $1;
+	        print "internal reference: $link\n" if $debug;
+
+	        open IN2, $filename;
+	        $found = 0;
+	        while ( <IN2> ) {
+		    while ( /<a[^>]* (name=|class=\"anchor\" id=)\"?(.*?)[\s\"]/gi ) {
+		        if ( $2 eq $internal_ref)
+			{
+			    print "                    found.\n" if $debug;
+			    $found = 1;
+			    last;
+			}
+		    }
+		}
+
+		die "---Internal reference `$internal_ref' not found in file $filename\n This line is: $this_line.\n"
+		    unless $found
+                            # work around a bug in doxygen 1.6.3:
+			    #   https://bugzilla.gnome.org/show_bug.cgi?id=620372
+                            || ($internal_ref =~ /^index_[:_~]/) ;
+		next;
+	    }
+	    elsif ( $link =~ /^(.*?)#(.*)/ )
+	    {
+		# this is a reference within another file. try to
+		# find its anchor
+		$external_file = $1;
+		$external_ref = $2;
+
+		# if the file name was prepended with http: (but is a local file,
+		# so no double-slash), then split off http:
+		$external_file =~ s/^http(s)?://g;
+
+		print "external reference: $link\n" if $debug;
+
+		open IN2, $external_file;
+		$found = 0;
+		while ( <IN2> ) {
+		    while ( /<a[^>]* (name=|class=\"anchor\" id=)\"?(.*?)[\s\"]/gi ) {
+			if ( $2 eq $external_ref)
+			{
+			    print "                    found.\n" if $debug;
+			    $found = 1;
+			    last;
+			}
+		    }
+		}
+
+		die "---External reference `$external_file#$external_ref' not found in file $filename\n This line is: $this_line.\n"
+		    unless $found
+                            # work around a bug in doxygen 1.6.3:
+			    #   https://bugzilla.gnome.org/show_bug.cgi?id=620372
+                            || ($external_ref =~ /^index_[:_~]/) ;
+		next;
+	    }
+	    else {
+		# this must now be a regular file which is
+		# referenced. the file must be local
+
+		# if the file name was prepended with http: (but is a local file,
+		# so no double-slash), then split off http:
+		$link =~ s/^http(s)?://g;
+
+		die "---Local file `$link' not found in file `$filename'\n This line is: $this_line.\n"
+		    unless ((-r $link) && (-f $link));
+	    }
+	}
+
+	# check whether references to images are valid
+	while ( /img\s+src=\"?(.*?)[\s\"]/gi ) {
+	    # check whether the file for the image is present
+	    $link = $1;
+
+	    # ignore online links
+	    if ($link =~ /^http/)
+	    {
+		next;
+	    }
+
+	    die "---Local image `$link' not found in file `$filename'\n This line is: $this_line.\n"
+		unless ((-r $link) && (-f $link));
+	}
+   }
+}
+
diff --git a/doc/doxygen/stylesheet.css b/doc/doxygen/stylesheet.css
new file mode 100644
index 0000000..7002ba8
--- /dev/null
+++ b/doc/doxygen/stylesheet.css
@@ -0,0 +1,21 @@
+
+#titlearea
+{
+    background-color: #0000ff;
+    color: #80ff00;
+}
+
+table.tutorial {
+    color: black;
+    border: 1px solid #aaa;
+    background-color: #f9f9f9;
+    padding: 5px;
+    font-size: 95%;
+}
+div.tutorial {
+    color: black;
+    border: 1px solid #aaa;
+    background-color: #f9f9f9;
+    padding: 50px;
+    font-size: 95%;
+}
diff --git a/doc/doxygen/tutorial/CMakeLists.txt b/doc/doxygen/tutorial/CMakeLists.txt
new file mode 100644
index 0000000..e482f76
--- /dev/null
+++ b/doc/doxygen/tutorial/CMakeLists.txt
@@ -0,0 +1,131 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II Authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+
+
+# Collect all of the directory names for the tutorial programs
+FILE(GLOB _deal_ii_steps
+  ${CMAKE_SOURCE_DIR}/examples/step-*
+  )
+
+# Also collect the names of all code gallery projects. To
+# do so, find all 'author' files, then strip the last two
+# levels of these paths.
+#
+# For unclear reasons, the glob returns these files as
+# "/a/b/c/name//doc//author", so make sure we eat the
+# double slashes in the second step
+SET_IF_EMPTY(DEAL_II_CODE_GALLERY_DIRECTORY ${CMAKE_SOURCE_DIR}/code-gallery)
+FILE(GLOB _code_gallery_names 
+     "${DEAL_II_CODE_GALLERY_DIRECTORY}/*/doc/author")
+STRING(REGEX REPLACE "/+doc/+author" "" _code_gallery_names "${_code_gallery_names}")
+
+#
+# Define target for the tutorial. It depends on the
+# file tutorial.h built via the next target below, as well
+# as the various files we create from the tutorial 
+# directories below that. These dependencies are added
+# below the respective targets.
+#
+# This file uses the DEAL_II_STEPS variable set in
+# ../CMakeLists.txt.
+#
+
+ADD_CUSTOM_TARGET(tutorial)
+
+#
+# Describe how to build tutorial.h:
+#
+
+file(GLOB _deal_ii_steps_buildson
+  ${CMAKE_SOURCE_DIR}/examples/step-*/doc/builds-on
+  )
+file(GLOB _deal_ii_steps_kind
+  ${CMAKE_SOURCE_DIR}/examples/step-*/doc/kind
+  )
+file(GLOB _deal_ii_steps_tooltip
+  ${CMAKE_SOURCE_DIR}/examples/step-*/doc/tooltip
+  )
+
+ADD_CUSTOM_COMMAND(
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tutorial.h
+  COMMAND ${PERL_EXECUTABLE}
+  ARGS
+    ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/steps.pl
+    ${CMAKE_CURRENT_SOURCE_DIR}/tutorial.h.in
+    ${_deal_ii_steps}
+    ${_code_gallery_names}
+    > ${CMAKE_CURRENT_BINARY_DIR}/tutorial.h
+  DEPENDS
+    ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/steps.pl
+    ${CMAKE_CURRENT_SOURCE_DIR}/tutorial.h.in
+    ${_deal_ii_steps}
+    ${_deal_ii_steps_kind}
+    ${_deal_ii_steps_tooltip}
+    ${_deal_ii_steps_buildson}
+    ${_code_gallery_names}
+  )
+ADD_CUSTOM_TARGET(build_tutorial_h
+  DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tutorial.h)
+ADD_DEPENDENCIES(tutorial build_tutorial_h)
+
+
+#
+# Prepare the steps for documentation generation
+#
+
+FOREACH(_step ${_deal_ii_steps})
+  GET_FILENAME_COMPONENT(_step "${_step}" NAME)
+
+  ADD_CUSTOM_COMMAND(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_step}.cc
+    COMMAND ${PERL_EXECUTABLE}
+    ARGS
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/program2plain
+      < ${CMAKE_SOURCE_DIR}/examples/${_step}/${_step}.cc
+      > ${CMAKE_CURRENT_BINARY_DIR}/${_step}.cc
+    DEPENDS
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/program2plain
+      ${CMAKE_SOURCE_DIR}/examples/${_step}/${_step}.cc
+    VERBATIM
+    )
+
+  ADD_CUSTOM_COMMAND(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_step}.h
+    COMMAND ${PERL_EXECUTABLE}
+    ARGS
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/make_step.pl
+      ${_step} ${CMAKE_SOURCE_DIR}
+      > ${CMAKE_CURRENT_BINARY_DIR}/${_step}.h
+    WORKING_DIRECTORY
+      ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/make_step.pl
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/intro2toc
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/create_anchors
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/program2doxygen
+      ${CMAKE_SOURCE_DIR}/doc/doxygen/scripts/program2doxyplain
+      ${CMAKE_SOURCE_DIR}/examples/${_step}/${_step}.cc
+      ${CMAKE_SOURCE_DIR}/examples/${_step}/doc/intro.dox
+      ${CMAKE_SOURCE_DIR}/examples/${_step}/doc/results.dox
+    )
+
+  ADD_CUSTOM_TARGET(tutorial_${_step}
+    DEPENDS
+      ${CMAKE_CURRENT_BINARY_DIR}/${_step}.h
+      ${CMAKE_CURRENT_BINARY_DIR}/${_step}.cc
+    )
+  ADD_DEPENDENCIES(tutorial tutorial_${_step})
+ENDFOREACH()
diff --git a/doc/doxygen/tutorial/tutorial.h.in b/doc/doxygen/tutorial/tutorial.h.in
new file mode 100644
index 0000000..e072441
--- /dev/null
+++ b/doc/doxygen/tutorial/tutorial.h.in
@@ -0,0 +1,994 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+/**
+ * @page Tutorial Tutorial programs
+ *
+ * New to deal.II? You might want to start with tutorial Step-1 and work
+ * your way up to Step-5. At that point you can explore what features you
+ * are interested in and look at the large collection of programs listed
+ * below.
+ *
+ * The deal.II tutorial contains a collection of programs, each more or
+ * less built atop of previous ones, which demonstrate various aspects of
+ * the library. Each such example has the following structure:
+ * <ol>
+ *   <li> <b>Introduction:</b> What the program does, including
+ *        the mathematical model, and
+ *        what programming techniques are new.
+ *   <li> <b>The commented program:</b> An extensively documented listing of the
+ *        source code.
+ *   <li> <b>Results:</b> The output of the program, with comments and
+ *        interpretation.
+ *   <li> <b>The plain program:</b> The source code stripped of
+ *        all comments.
+ * </ol>
+ * You can browse the available tutorial programs
+ * <ol>
+ *   <li> as <b><a href="#graph">a graph</a></b> that shows how tutorial programs build upon each other.
+ *   <li> as <b><a href="#list">a list</a></b> that provides a short
+ *     synopsis of each program.
+ *   <li> or <b><a href="#topic">grouped by topic</a></b>.
+ * </ol>
+ *
+ * The programs are in the <code>examples/</code> directory of your local
+ * deal.II installation. After compiling the library itself, if you go into
+ * one of the tutorial directories, you can configure the program by typing
+ * <code>cmake .</code>, build it via <code>make</code> and run it using
+ * <code>make run</code>. The latter command also compiles the program if
+ * that has not already been done. The CMakefiles.txt files in the
+ * different directories are based on the 
+ * <a href="../../users/cmakelists.html" target="_top">small program Makefile template</a>.
+ *
+ * @note Some of the tutorial programs also jointly form
+ *   the <a href="../../doxygen/deal.II/group__geodynamics.html">geodynamics
+ *   demonstration suite</a>. More, often more complex but less well documented,
+ *   deal.II-based programs than the ones that form the tutorial can also be
+ *   found in the @ref CodeGallery .
+ *
+ *
+ * <a name="graph"></a>
+ * @anchor TutorialConnectionGraph
+ * <h3>Connections between tutorial programs</h3>
+ *
+ * The following graph shows the connections between tutorial programs and
+ * how they build on each other.
+ * Click on any of the boxes to go to one of the programs. If you hover
+ * your mouse pointer over a box, a brief description of the program
+ * should appear.
+ * @dot
+@@TUTORIAL_MAP@@
+ * @enddot
+ *
+ * <b>Legend:</b><br />
+ * @dot
+@@TUTORIAL_LEGEND@@
+ * @enddot
+ *
+ * <a name="list"></a>
+ * <h3>Tutorial programs listed by number</h3>
+ *
+ * <table align="center" width="90%">
+ *   <tr valign="top">
+ *       <td width="100px">step-1</td>
+ *       <td> Creating a grid. A simple way to write it to a file.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-2</td>
+ *       <td> Associate degrees of freedom to
+ *       each vertex and compute the resulting sparsity pattern of
+ *       matrices. Show that renumbering reduces the bandwidth of
+ *       matrices significantly, i.e. clusters nonzero entries around the
+ *       diagonal.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-3</td>
+ *       <td> Actually solve Laplace's
+ *       problem. Object-orientation. Assembling matrices and
+ *       vectors. %Boundary values.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-4</td>
+ *       <td> This example is programmed in a
+ *       way that it is independent of the dimension for which we want to
+ *       solve Laplace's equation; we will solve the equation in 2D and
+ *       3D, although the program is exactly the same. Non-constant right
+ *       hand side function. Non-homogeneous boundary values.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-5</td>
+ *       <td> Computations on successively
+ *       refined grids. Reading a grid from disk. Some optimizations.
+ *       Using assertions. Non-constant coefficient in
+ *       the elliptic operator (yielding the extended Poisson
+ *       equation). Preconditioning the CG solver for the
+ *       linear system of equations.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-6</td>
+ *       <td> Adaptive local
+ *       refinement. Handling of hanging nodes. Higher order elements.
+ *       Catching exceptions in the <code>main</code>; function.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-7</td>
+ *       <td> Helmholtz
+ *       equation. Non-homogeneous Neumann boundary conditions and
+ *       boundary integrals. Verification of correctness of computed
+ *       solutions. Computing the error between exact and numerical
+ *       solution and output of the data in tables. Using counted pointers.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-8</td>
+ *       <td> The elasticity equations will be
+ *       solved instead of Laplace's equation. The solution is
+ *       vector-valued and the equations form a system with as many
+ *       equations as the dimension of the space in which it is posed.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-9</td>
+ *       <td> Linear advection equation, assembling
+ *       the system of equations in parallel using multi-threading,
+ *       implementing a refinement criterion based on a finite difference
+ *       approximation of the gradient.
+ *
+ *   <tr valign="top">
+ *       <td>step-10</td>
+ *       <td> Higher order mappings. Do not
+ *       solve equations, but rather compute the value of pi to high
+ *       accuracy.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-11</td>
+ *       <td> Solving a Laplace problem with
+ *       higher order mappings. Using mean value constraints and
+ *       intermediate representations of sparsity patterns.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-12</td>
+ *       <td> Discontinuous Galerkin methods for linear advection problems.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-13</td>
+ *       <td> Software design questions and
+ *       how to write a modular, extensible finite element program.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-14</td>
+ *       <td> Duality based error estimators,
+ *       more strategies to write a modular, extensible finite element
+ *       program.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-15</td>
+ *       <td> A nonlinear elliptic problem: The minimal surface equation.
+ *       Newton's method. Transferring a solution across mesh refinement.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-16</td>
+ *       <td> Multigrid preconditioning of the Laplace equation on adaptive
+ *       meshes.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-17</td>
+ *       <td> Using PETSc for linear algebra; running
+ *       in parallel on clusters of computers linked together by MPI.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-18</td>
+ *       <td> A time dependent problem; using a much
+ *       simplified version of implementing elasticity; moving meshes; handling
+ *       large scale output of parallel programs.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-19</td>
+ *       <td> Input parameter file handling. Merging
+ *       output of a parallel program.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-20</td>
+ *       <td> Mixed finite elements. Using block
+ *       matrices and block vectors to define more complicated solvers and
+ *       preconditioners working on the Schur complement.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-21</td>
+ *       <td> The time dependent two-phase flow in
+ *       porous media. Extensions of mixed Laplace discretizations. More
+ *       complicated block solvers. Simple time stepping.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-22</td>
+ *       <td> Solving the Stokes equations of slow fluid flow on adaptive
+ *       meshes. More on Schur complement solvers. Advanced use of the
+ *       ConstraintMatrix class.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-23</td>
+ *       <td> Finally a "real" time dependent problem, the wave equation.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-24</td>
+ *       <td> A variant of step-23 with absorbing
+ *       boundary conditions, and extracting practically useful data.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-25</td>
+ *       <td> The sine-Gordon
+ *       soliton equation, which is a nonlinear variant of the time
+ *       dependent wave equation covered in step-23 and step-24.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-26</td>
+ *       <td> The heat equation, solved on a mesh that is adapted
+ *       every few time steps.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-27</td>
+ *       <td> hp finite element methods  </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-28</td>
+ *       <td> Multiple grids for solving a multigroup diffusion equation
+ *       in nuclear physics simulating a nuclear reactor core  </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-29</td>
+ *       <td> Solving a complex-valued Helmholtz equation. Sparse direct
+ *       solvers. Dealing with parameter files.  </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-30</td>
+ *       <td> Anisotropic refinement for DG finite element methods.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-31</td>
+ *       <td> Time-dependent Stokes flow driven by temperature
+ *       differences in a fluid. Adaptive meshes that change between time
+ *       steps.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-32</td>
+ *       <td> A massively parallel solver for time-dependent Stokes flow driven
+ *       by temperature differences in a fluid. Adapting methods for real-world
+ *       equations.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-33</td>
+ *       <td> A nonlinear hyperbolic conservation law: The Euler equations of
+ *       compressible gas dynamics.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-34</td>
+ *       <td> %Boundary element methods (BEM) of low order: Exterior irrotational
+ *       flow. The ParsedFunction class.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-35</td>
+ *       <td> A projection solver for the Navier–Stokes equations.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-36</td>
+ *       <td> Using SLEPc for linear algebra; solving an eigenspectrum
+ *       problem. The Schrödinger wave equation.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-37</td>
+ *       <td> Solving a Poisson problem with a multilevel preconditioner without
+ *       explicitly storing the matrix (a matrix-free method).
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-38</td>
+ *       <td>Solving the Laplace-Beltrami equation on curved manifolds embedded
+ *       in higher dimensional spaces.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-39</td>
+ *       <td> Solving Poisson's equation once more, this time with the
+ *       interior penalty method, one of the discontinous Galerkin
+ *       methods developed for this problem. Error estimator, adaptive
+ *       meshes, and multigrid preconditioner, all using the MeshWorker
+ *       framework.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-40</td>
+ *       <td> Techniques for the massively parallel solution of the Laplace
+ *       equation (up to 10,000s of processors).
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-41</td>
+ *       <td> Solving the obstacle problem, a variational inequality.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-42</td>
+ *       <td> A solver for an elasto-plastic contact problem, running on
+ *       parallel machines.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-43</td>
+ *       <td> Advanced techniques for the simulation of porous media flow.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-44</td>
+ *       <td> Finite strain hyperelasticity based on a three-field formulation.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-45</td>
+ *       <td> Periodic boundary conditions.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-46</td>
+ *       <td> Coupling different kinds of equations in different parts of the domain.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-48</td>
+ *       <td> Explicit time stepping for the Sine–Gordon equation based on
+ *       a diagonal mass matrix. Efficient implementation of (nonlinear) finite
+ *       element operators.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-49</td>
+ *       <td> Advanced mesh creation and manipulation techniques.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-51</td>
+ *       <td> Solving the convection-diffusion equation with a hybridizable
+ *       discontinuous Galerkin method using face elements.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-52</td>
+ *       <td> Solving the time dependent neutron diffusion equation using 
+ *       Runge-Kutta methods.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-53</td>
+ *       <td> Describing the geometry of complex domains and curved boundaries.
+ *       </td></tr>
+ *
+ *   <tr valign="top">
+ *       <td>step-54</td>
+ *       <td> Using CAD files to describe the boundary of your domain.
+ *       </td></tr>
+ * </table>
+ *
+ * <a name="topic"></a>
+ * <h3>Tutorial programs grouped by topics</h3>
+ *
+ * <h4><b>Basic techniques</b></h4>
+ * <table align="center" width="90%">
+ *
+ *   <tr valign="top">
+ *     <td width="400px"> Creating a grid. A simple way to write it to a file
+ *     <td>step-1</td>
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Degrees of freedom
+ *     <td>step-2</td>
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Solve the Laplace equation
+ *     <td>step-3</td>
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Dimension independent programming, non-zero data
+ *     <td>step-4</td>
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Computing on uniformly refined meshes
+ *     <td>step-5</td>
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Adaptivity
+ *     <td>step-6, step-26</td>
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Evaluating errors
+ *     <td>step-7</td>
+ *     </td>
+ *
+ *   <tr valign="top">
+ *     <td> Nonlinear problems, Newton's method
+ *     </td>
+ *     <td>step-15</td>
+ *   </tr>
+ *
+ * </table>
+ * <h4><b>Advanced techniques</b></h4>
+ * <table align="center" width="90%">
+ *
+ *   <tr valign="top">
+ *     <td width="400px"> Multithreading
+ *     </td>
+ *     <td>
+ *       step-9,
+ *       step-28,
+ *       step-32,
+ *       step-44,
+ *       step-48,
+ *       step-51
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Block solvers and preconditioners
+ *     </td>
+ *     <td>
+ *       step-20,
+ *       step-21,
+ *       step-22,
+ *       step-31,
+ *       step-32,
+ *       step-43,
+ *       step-44
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Using Trilinos
+ *     </td>
+ *     <td>
+ *       step-31,
+ *       step-32,
+ *       step-33,
+ *       step-41,
+ *       step-42,
+ *       step-43
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Parallelization via PETSc and MPI
+ *     </td>
+ *     <td>
+ *       step-17,
+ *       step-18,
+ *       step-19,
+ *       step-40
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Parallelization via Trilinos and MPI
+ *     </td>
+ *     <td>
+ *       step-32,
+ *       step-42
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Parallelization on very large numbers of processors
+ *     </td>
+ *     <td>
+ *       step-32,
+ *       step-40,
+ *       step-42
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Input parameter handling
+ *     </td>
+ *     <td>
+ *       step-19,
+ *       step-28,
+ *       step-29,
+ *       step-32,
+ *       step-33,
+ *       step-34,
+ *       step-35,
+ *       step-36,
+ *       step-42,
+ *       step-44
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Higher order mappings
+ *     </td>
+ *     <td>
+ *       step-10,
+ *       step-11,
+ *       step-32
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Error indicators and estimators
+ *     </td>
+ *     <td>
+ *       step-6,
+ *       step-9,
+ *       step-14,
+ *       step-39
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Transferring solutions across mesh refinement
+ *     </td>
+ *     <td>
+ *       step-15,
+ *       step-28,
+ *       step-31,
+ *       step-32,
+ *       step-33,
+ *       step-42,
+ *       step-43
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Discontinuous Galerkin methods
+ *     </td>
+ *     <td>
+ *       step-12,
+ *       step-21,
+ *       step-39,
+ *       step-46,
+ *       step-51
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> hp finite elements
+ *     </td>
+ *     <td>
+ *       step-27,
+ *       step-46
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Anisotropic refinement for DG finite element methods
+ *     </td>
+ *     <td>step-30</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Computing Jacobians from residuals, automatic differentiation
+ *     </td>
+ *     <td>step-33</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> %Boundary element methods, curved manifolds
+ *     </td>
+ *     <td>
+ *       step-32,
+ *       step-34,
+ *       step-38,
+ *       step-53,
+ *       step-54
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Periodic boundary conditions
+ *     </td>
+ *     <td>
+ *       step-45
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Matrix-free methods
+ *     </td>
+ *     <td>
+ *       step-37,
+ *       step-48
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Advanced meshes and geometries
+ *     </td>
+ *     <td>
+ *       step-49,
+ *       step-53,
+ *       step-54
+ *     </td>
+ *   </tr>
+ *
+ * </table>
+ * <h4><b>Linear solvers</b></h4>
+ * <table align="center" width="90%">
+ *
+ *   <tr valign="top">
+ *     <td width="400px"> Conjugate Gradient solver
+ *     </td>
+ *     <td>step-3</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Preconditioned CG solver
+ *     </td>
+ *     <td>step-5</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> BiCGStab
+ *     </td>
+ *     <td>step-9</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Multilevel preconditioners
+ *     </td>
+ *     <td>
+ *       step-16,
+ *       step-31,
+ *       step-32,
+ *       step-37,
+ *       step-39,
+ *       step-41,
+ *       step-42,
+ *       step-43
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Parallel solvers
+ *     </td>
+ *     <td>
+ *       step-17,
+ *       step-18,
+ *       step-32,
+ *       step-40,
+ *       step-42
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Block and Schur complement solvers
+ *     </td>
+ *     <td>
+ *       step-20,
+ *       step-21,
+ *       step-22,
+ *       step-31,
+ *       step-32,
+ *       step-43
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Decoupled projection solvers
+ *     </td>
+ *     <td>step-35</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Linear Newton systems from nonlinear equations
+ *     </td>
+ *     <td>
+ *       step-33,
+ *       step-41,
+ *       step-42,
+ *       step-44
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Eigenvalue solvers
+ *     </td>
+ *     <td>step-36</td>
+ *   </tr>
+ *
+ * </table>
+ * <h4><b>Other equations</b></h4>
+ * <table align="center" width="90%">
+ *
+ *   <tr valign="top">
+ *     <td width="400px"> Helmholtz equation
+ *     </td>
+ *     <td>
+ *       step-7,
+ *       step-29
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Elasticity and elasto-plasticity equations
+ *     </td>
+ *     <td>
+ *       step-8,
+ *       step-42,
+ *       step-46
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> The heat equation
+ *     </td>
+ *     <td>
+ *       step-26
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Minimal surface equation
+ *     </td>
+ *     <td>
+ *       step-15
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Quasi-static elasticity equations
+ *     </td>
+ *     <td>
+ *       step-18,
+ *       step-44
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Transport (advection) equations
+ *     </td>
+ *     <td>step-9,
+ *         step-21,
+ *         step-31,
+ *         step-32,
+ *         step-43,
+ *         step-51
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> The nonlinear hyperbolic Euler system of compressible gas dynamics
+ *     </td>
+ *     <td>step-33</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Mixed Laplace, Darcy, Porous media
+ *     </td>
+ *     <td>
+ *       step-20,
+ *       step-21,
+ *       step-43
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Stokes and incompressible Navier-Stokes flow
+ *     </td>
+ *     <td>
+ *       step-22,
+ *       step-31,
+ *       step-32,
+ *       step-35,
+ *       step-46
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> The wave equation, in linear and nonlinear variants
+ *     </td>
+ *     <td>
+ *       step-23,
+ *       step-24,
+ *       step-25,
+ *       step-48
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> A multigroup diffusion problem in neutron transport
+ *     </td>
+ *     <td>step-28</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Irrotational flow
+ *     </td>
+ *     <td>step-34</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> An eigenspectrum problem
+ *     </td>
+ *     <td>step-36</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> The obstacle problem, a variational inequality
+ *     </td>
+ *     <td>
+ *       step-41,
+ *       step-42
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Coupling different equations in different parts of the domain
+ *     </td>
+ *     <td>step-46</td>
+ *   </tr>
+ *
+ * </table>
+ * <h4><b>%Vector problems</b></h4>
+ * <table align="center" width="90%">
+ *
+ *   <tr valign="top">
+ *     <td width="400px"> Elasticity and elasto-plasticity equations
+ *     </td>
+ *     <td>
+ *       step-8,
+ *       step-42
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Mixed Laplace
+ *     </td>
+ *     <td>step-20</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Mixed Laplace plus an advection equation
+ *     </td>
+ *     <td>step-21,
+ *         step-43
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Incompressible Stokes and Navier-Stokes flow
+ *     </td>
+ *     <td>step-22,
+ *         step-31,
+ *         step-32,
+ *         step-35</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> A complex-valued Helmholtz problem
+ *     </td>
+ *     <td>step-29</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> The Euler equations of compressible gas dynamics
+ *     </td>
+ *     <td>step-33</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Coupling different equations in different parts of the domain
+ *     <td>step-46</td>
+ *   </tr>
+ *
+ * </table>
+ *
+ *
+ * <h4><b>Time dependent problems</b></h4>
+ * <table align="center" width="90%">
+ *
+ *   <tr valign="top">
+ *     <td> The heat equation
+ *     </td>
+ *     <td>step-26
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td width="400px"> Quasi-static elasticity
+ *     </td>
+ *     <td>
+ *      step-18,
+ *      step-44
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Porous media flow
+ *     </td>
+ *     <td>step-21,
+ *         step-43
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> The wave equation, in linear and nonlinear variants
+ *     </td>
+ *     <td>step-23,
+ *         step-24,
+ *         step-25,
+ *         step-48
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Time dependent Stokes flow driven by buoyancy
+ *     </td>
+ *     <td>step-31,
+ *         step-32
+ *     </td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> The Euler equations of compressible gas dynamics
+ *     </td>
+ *     <td>step-33</td>
+ *   </tr>
+ *
+ *   <tr valign="top">
+ *     <td> Time dependent neutron diffusion equation
+ *     </td>
+ *     <td>step-52</td>
+ *   </tr>
+ * </table>
+ */
diff --git a/doc/external-libs/arpack.html b/doc/external-libs/arpack.html
new file mode 100644
index 0000000..dea9116
--- /dev/null
+++ b/doc/external-libs/arpack.html
@@ -0,0 +1,110 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+
+<html>
+  <head>
+    <title>The deal.II Readme on interfacing to ARPACK</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 2010, 2013, 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+  <body>
+
+
+    <h2>Installation of <acronym>ARPACK</acronym></h2>
+
+    <p>
+      <a href="http://www.caam.rice.edu/software/ARPACK/">ARPACK</a>
+      is a collection of Fortran77 subroutines designed to solve large
+      scale eigenvalue problems.
+      <a href="http://www.caam.rice.edu/software/ARPACK/" target="_top">ARPACK</a>
+      should be readily packaged by most Linux distributions.
+      Don't forget to install a development version of the library.
+    </p>
+
+    <p>
+      Below is a short summary of instructions on how to compile and install
+      <acronym>ARPACK</acronym> by hand (for the case you wish to do so).
+    </p>
+
+    <h3>How to compile and install <acronym>ARPACK</acronym> by hand</h3>
+
+    <p>
+      After you downloaded the Fortran version of <acronym>ARPACK</acronym>
+      and the patch, unzip the files you got. That will create
+      a directory named <acronym>ARPACK</acronym>. If you need
+      further instructions please read the README file or the
+      <a href="http://www.caam.rice.edu/software/ARPACK/SRC/instruction.arpack">instructions</a>.
+      We will explain here in a few steps what has to be done to be able
+      to compile
+      <acronym>ARPACK</acronym>.
+    </p>
+
+    <ul>
+      <li>edit <code>ARmake.inc</code></li>
+      <ul>
+        <li>change home to the correct directory</li>
+        <li>choose which <acronym>BLAS</acronym> and
+        <acronym>LAPACK</acronym> you would like to use</li>
+      </ul>
+      <li>change the file second.f in the UTIL directory</li>
+      <li>do <code>make lib</code> in the current directory
+      to build the standard library <code>libarpack_$(PLAT).a</code>
+      </li>
+    </ul>
+
+    <p>
+    Note: For compilation of <acronym>ARPACK</acronym> we emphasise
+    adding the compiler flag <code>-fPIC</code>. This is a definite
+    requirement if we are compiling <acronym>deal.II</acronym> with
+    shared libraries (which is the default). If we had preferred to be
+    compiling <acronym>deal.II</acronym> without shared libraries,
+    that's ok too; in that case we would do exactly the same thing
+    as described above, but this time omitting
+    the <code>-fPIC</code> flag from the scheme.
+    </p>
+
+    <p>
+    Try to run one of the examples and compare the output.
+    How the output should look like is stated in the README
+    that can be found in the <code>EXAMPLES</code> directory.
+    </p>
+
+    <p>
+    If that output you produced looks like it should you can
+    proceed to compile <acronym>deal.II</acronym> with
+    <acronym>ARPACK</acronym>.
+    </p>
+
+    <h2>Interfacing <acronym>deal.II</acronym>
+      to <acronym>ARPACK</acronym></h2>
+
+    <p>
+      Support for <acronym>ARPACK</acronym> will be
+      enabled automatically if a system wide installation of
+      <acronym>ARPACK</acronym> can be found.
+      To use a self compiled version, specify
+      <pre>
+
+    -DARPACK_DIR=/path/to/arpack
+      </pre>
+      when invoking <code>cmake</code>.
+    </p>
+    <p>
+      You can override the autodetection by manually setting
+      <pre>
+
+    -DDEAL_II_WITH_ARPACK=OFF|ON
+      </pre>.
+    </p>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/doc/external-libs/opencascade.html b/doc/external-libs/opencascade.html
new file mode 100644
index 0000000..5bb2313
--- /dev/null
+++ b/doc/external-libs/opencascade.html
@@ -0,0 +1,84 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"                  
+	  "http://www.w3.org/TR/html4/loose.dtd">                        
+
+<html>                                                                          
+  <head>                            
+    <title>The deal.II Readme on interfacing to OpenCASCADE</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 2010, 2013, 2014, 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II"> 
+  </head>                                                                      
+  <body> 
+
+    
+    <h2>Installation of <acronym>OpenCASCADE</acronym></h2>
+
+    <p> <a href="http://www.opencascade.org/">OpenCASCADE</a> is a
+      software development kit (SDK) intended for development of
+      applications dealing with 3D CAD data, freely available in open
+      source. It includes a set of C++ class libraries providing
+      services for 3D surface and solid modeling, visualization, data
+      exchange and rapid application development.</p>
+
+    <p> Below is a short summary of instructions on how to compile and
+      install <acronym>OpenCASCADE</acronym> community edition by hand
+      (for the case you wish to do so).  </p>
+
+    <h3>How to compile and install <acronym>OpenCASCADE</acronym>
+     </h3>
+
+    <p>
+Get a clone of the OCE repository:
+<pre>
+git clone git://github.com/tpaviot/oce.git
+</pre>
+
+It is a good practice to build in a separate directory:
+<pre>
+mkdir build
+cd build
+cmake -D OCE_INSTALL_PREFIX=/path/to/where/you/want/oce \
+      -D OCE_TESTING=OFF \
+      -D OCE_VISUALISATION=OFF \
+      -D OCE_DISABLE_X11=ON \
+      ..
+make install
+</pre>
+
+This will turn off some packages we don't need. The default package options also work, though.
+
+    <h2>Interfacing <acronym>deal.II</acronym>
+      to <acronym>OpenCASCADE</acronym></h2>
+
+    <p>
+      Support for <acronym>OpenCASCADE</acronym> will be
+      enabled automatically if a system wide installation of
+      <acronym>OpenCASCADE</acronym> can be found.
+      To use a self compiled version, specify
+      <pre>
+
+    -DOPENCASCADE_DIR=/path/to/opencascade/or/oce
+      </pre>
+      when invoking <code>cmake</code>. Alternatively, you can also set an environment
+      variable <code>OPENCASCADE_DIR</code> (valid alternatives are
+      also <code>OCC_DIR</code> and <code>OCE_DIR</code>) and cmake
+      will pick up this path.
+    </p>
+    <p>
+      You can override the autodetection by manually setting
+      <pre>
+
+    -DDEAL_II_WITH_OPENCASCADE=OFF|ON
+      </pre>.
+    </p>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/doc/external-libs/p4est-setup.sh b/doc/external-libs/p4est-setup.sh
new file mode 100755
index 0000000..6876059
--- /dev/null
+++ b/doc/external-libs/p4est-setup.sh
@@ -0,0 +1,133 @@
+#! /bin/bash
+#
+# This file is part of p4est [1].
+# p4est is a C library to manage a collection (a forest) of multiple
+# connected adaptive quadtrees or octrees in parallel.
+#
+# Copyright (C) 2010 The University of Texas System
+# Written by Carsten Burstedde, Lucas C. Wilcox, and Tobin Isaac
+# Modified 2010 by Wolfgang Bangerth
+# Modified 2010 by Timo Heister
+# Modified 2013 by Matthias Maier
+#
+# p4est is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# [1] http://www.p4est.org
+#
+
+# This program comes with ABSOLUTELY NO WARRANTY.
+
+# unpack under current directory
+UNPACK=`pwd`
+
+# choose names for fast and debug compilation directories
+BUILD_DIR="$UNPACK/p4est-build"
+BUILD_FAST="$BUILD_DIR/FAST"
+BUILD_DEBUG="$BUILD_DIR/DEBUG"
+
+function busage() {
+        echo "Usage: `basename $0` <p4est_tar.gz_file> [<install location>]"
+        echo "   or: `basename $0` /path/to/p4est-src/ [<install location>]"
+}
+function bdie () {
+        echo "Error: $@"
+        exit 1
+}
+
+if test -z "$CFLAGS" -a -z "$P4EST_CFLAGS_FAST" ; then
+        export CFLAGS_FAST="-O2"
+else
+        export CFLAGS_FAST="$CFLAGS $P4EST_CFLAGS_FAST"
+fi
+echo "CFLAGS_FAST: $CFLAGS_FAST"
+if test -z "$CFLAGS" -a -z "$P4EST_CFLAGS_DEBUG" ; then
+        export CFLAGS_DEBUG="-O0 -g"
+else
+        export CFLAGS_DEBUG="$CFLAGS $P4EST_CFLAGS_DEBUG"
+fi
+echo "CFLAGS_DEBUG: $CFLAGS_DEBUG"
+
+TGZ="$1"; shift
+if test -d "$TGZ" ; then
+  SRCDIR="$TGZ"  
+  echo "using existing source dir '$SRCDIR'"
+else
+    if test ! -f "$TGZ" ; then
+        busage
+        bdie "File not found"
+    fi
+    if ! (echo "$TGZ" | grep -q 'p4est.*.tar.gz') ; then
+        busage
+        bdie "File name mismatch"
+    fi
+fi
+
+# choose names for fast and debug installation directories
+INSTALL_DIR="$1"; shift
+if test -z "$INSTALL_DIR" ; then
+        INSTALL_DIR="$UNPACK/p4est-install"
+fi
+INSTALL_FAST="$INSTALL_DIR/FAST"
+INSTALL_DEBUG="$INSTALL_DIR/DEBUG"
+
+echo
+echo "This script tries to unpack, configure and build the p4est library."
+echo "Build FAST: $BUILD_FAST"
+echo "Build DEBUG: $BUILD_DEBUG"
+echo "Install FAST: $INSTALL_FAST"
+echo "Install DEBUG: $INSTALL_DEBUG"
+echo "Checking environment: CFLAGS P4EST_CFLAGS_FAST P4EST_CFLAGS_DEBUG"
+
+# remove old versions
+if test -d "$BUILD_DIR" ; then
+        rm -rf "$BUILD_DIR"
+fi
+
+if test -f "$TGZ" ; then
+    DIR=`echo "$TGZ" | sed 's/\(p4est.*\).tar.gz/\1/'`
+    DIR=`basename $DIR`
+    echo "Unpack directory: $UNPACK/$DIR"
+    if test -d "$UNPACK/$DIR" ; then
+        echo "Source directory found (remove it to unpack anew)"
+    else
+        echo -n "Unpacking... "
+        tar -xvz -f "$TGZ" -C "$UNPACK" >/dev/null
+        echo "done"
+    fi
+    SRCDIR=$UNPACK/$DIR
+fi
+test -f "$SRCDIR/src/p4est.h" || bdie "Main header file missing"
+test -f "$SRCDIR/configure" || bdie "Configure script missing"
+
+echo "See output in files .../config.output and .../make.output"
+echo
+echo "Build FAST version in $BUILD_FAST"
+mkdir -p "$BUILD_FAST"
+cd "$BUILD_FAST"
+"$SRCDIR/configure" --enable-mpi --enable-shared \
+        --disable-vtk-binary --without-blas \
+        --prefix="$INSTALL_FAST" CFLAGS="$CFLAGS_FAST" \
+        CPPFLAGS="-DSC_LOG_PRIORITY=SC_LP_ESSENTIAL" \
+        "$@" > config.output || bdie "Error in configure"
+make -C sc -j 8 > make.output || bdie "Error in make sc"
+make -j 8 >> make.output || bdie "Error in make p4est"
+make install >> make.output || bdie "Error in make install"
+echo "FAST version installed in $INSTALL_FAST"
+
+echo
+echo "Build DEBUG version in $BUILD_DEBUG"
+mkdir -p "$BUILD_DEBUG"
+cd "$BUILD_DEBUG"
+"$SRCDIR/configure" --enable-debug --enable-mpi --enable-shared \
+        --disable-vtk-binary --without-blas \
+        --prefix="$INSTALL_DEBUG" CFLAGS="$CFLAGS_DEBUG" \
+        CPPFLAGS="-DSC_LOG_PRIORITY=SC_LP_ESSENTIAL" \
+        "$@" > config.output || bdie "Error in configure"
+make -C sc -j 8 > make.output || bdie "Error in make sc"
+make -j 8 >> make.output || bdie "Error in make p4est"
+make install >> make.output || bdie "Error in make install"
+echo "DEBUG version installed in $INSTALL_DEBUG"
+echo
diff --git a/doc/external-libs/p4est.html b/doc/external-libs/p4est.html
new file mode 100644
index 0000000..b7d3941
--- /dev/null
+++ b/doc/external-libs/p4est.html
@@ -0,0 +1,66 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN"
+   "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<html>
+  <head>
+    <title>The deal.II Readme on interfacing to p4est</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 2010 - 2013, 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+
+  <body>
+
+
+    <h1>Using and installing instructions for the p4est library</h1>
+
+    <p>
+      <a href="http://www.p4est.org/" target="_top">p4est</a> is a
+      library that manages meshes that are distributed across multiple
+      processors. It forms the basis of deal.II's implementation of
+      finite element solvers that can use meshes that are too large to
+      be held on each processor individually.
+    </p>
+
+    <p>
+      You need to install p4est before deal.II. To do so, you can
+      download it from <a href="http://www.p4est.org/" target="_top">here</a>.
+      You can either choose to manually compile and install p4est (as
+      explained in documentation of p4est), or alternatively use
+      <a href="p4est-setup.sh">a script</a> that will automatically compile
+      and install a debug and optimized version of p4est.
+
+    </p>
+
+    <p>
+      If you want to use the script, copy the tarball to a fresh directory
+      together with the <a href="p4est-setup.sh">p4est-setup.sh script</a>
+      (the one from this link, not one you may have gotten from the p4est
+      webpage). Then call the script as follows:
+      <pre>
+
+sh ./p4est-setup.sh p4est-x-y-z.tar.gz /path/to/installation
+      </pre>
+      where <code>p4est-x-y-z.tar.gz</code> is the name of the p4est
+      distribution file, and <code>/path/to/installation</code> is a
+      directory into which you want to install p4est.
+    </p>
+
+    <p>
+      In both cases, you can configure <acronym>deal.II</acronym> using a
+      line like
+      <pre>
+
+cmake -DP4EST_DIR=/path/to/installation -DDEAL_II_WITH_P4EST=ON -DDEAL_II_WITH_MPI=ON <...>
+      </pre>
+      if the p4est library isn't picked up automatically.
+    </p>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+  </body>
+</html>
diff --git a/doc/external-libs/petsc.html b/doc/external-libs/petsc.html
new file mode 100644
index 0000000..10a92ab
--- /dev/null
+++ b/doc/external-libs/petsc.html
@@ -0,0 +1,156 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>The deal.II Readme on interfacing to PETSc</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 2008 - 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+
+  <body>
+
+    <h1>Interfacing <acronym>deal.II</acronym> to PETSc</h1>
+
+    <p>
+      <a href="http://www.mcs.anl.gov/petsc/"
+      target="_top">PETSc</a> is a
+      software package that provides lots of functionality for linear
+      algebra, among other things. For example, it includes implementations of a variety of
+      linear solvers, as well as various different sparse and dense matrix and
+      vector formats. Of particular interest to deal.II is their ability to
+      provide this functionality both on sequential and parallel (using MPI)
+      computers.
+    </p>
+
+    <p>
+      <acronym>deal.II</acronym> has wrapper classes to the linear algebra
+      parts of PETSc that provide almost the
+      same interfaces as the built-in <acronym>deal.II</acronym> linear
+      algebra classes. We use these interfaces for parallel computations based
+      on MPI since the native deal.II linear algebra classes lack this
+      ability. They are used, among other programs, in step-17, step-18 and
+      step-40.
+    </p>
+
+    <h4>Installing <acronym>deal.II</acronym> with PETSc</h4>
+
+    <p style="color: red"><b>Note:</b> The most recent version of PETSc
+      that has been reported to be compatible with
+      <acronym>deal.II</acronym> is version 3.6.0. If you use a later
+      version than this and encounter problems, let us know.
+    </p>
+
+    <p>
+      When you compile and install PETSc, you need to set
+      environment variables <code>PETSC_DIR</code> and <code>PETSC_ARCH</code>
+      to a path to PETSc and denoting the architecture for which PETSc is
+      compiled. <code>PETSC_ARCH</code> is in reality just a name you give to
+      your installation, it is a string you can choose however you like. The
+      point of it is that it allows you to have multiple possibly different
+      PETSc installations. A consequence of this is that you need to
+      let <acronym>deal.II</acronym>'s <code>cmake</code> scripts know which
+      one of these installations you want it to use, i.e., you need to set the
+      <code>PETSC_ARCH</code> variable to the same value you used when you
+      installed PETSc. The same is true for <code>PETSC_DIR</code>. You can
+      this via environment variables. <code>cmake</code> will then also
+      recognize that PETSc shall be used, and enable the wrapper classes,
+      without you having to explicitly say that you want to use PETSc.
+    </p>
+
+    <p>
+      Alternatively, the <code>-DPETSC_DIR=DIR</code> and
+      <code>-DPETSC_ARCH=ARCH</code> options for <code>cmake</code>
+      can be used to override the values of <code>PETSC_DIR</code>
+      and <code>PETSC_ARCH</code> or if these environment
+      variables are not set at all. If you do have a PETSc
+      installation and have set the <code>PETSC_DIR</code> and
+      <code>PETSC_ARCH</code> environment variables but do not wish
+      <acronym>deal.II</acronym> to be configured for PETSc use, you
+      should specify <code>-DDEAL_II_WITH_PETSC=OFF</code> as a flag
+      during configuration.
+    </p>
+
+    <p><b>Note:</b> <acronym>deal.II</acronym> can be installed with both
+      PETSc and Trilinos and they do not usually get in their
+      respective ways. There are, however, occasions where this is not true
+      and this fundamentally comes from the fact that both of these packages
+      are built from subpackages that are developed by independent
+      groups. Unfortunately, some of these sub-packages can be configured to
+      be part of both PETSc and Trilinos, and if you try to
+      use <acronym>deal.II</acronym> with versions of PETSc and Trilinos
+      that <i>both</i> contain a particular sub-package, little good will come
+      of it in general. In particular, we have experienced this with the ML
+      package that can serve as an algebraic multigrid method to both PETSc
+      and Trilinos. If both of these packages are configured to use ML, then
+      difficult to understand error messages at compile or link time are
+      almost inevitable, and there is little the <acronym>deal.II</acronym>
+      build system can do to prevent this. Thus, <i>don't try to do that!</i>
+    </p>
+
+
+    <h4>Installing PETSc</h4>
+
+
+    <p>
+      Installing PETSc correctly can be a bit of a
+      challenge. To start, take a look at
+      the <a href="http://www.mcs.anl.gov/petsc/documentation/installation.html"
+      target="_top">PETSc installation instructions</a>. We have found that
+      the following steps generally appear to work where we simply unpack and
+      build PETSc in its final location (i.e., we do not first build and then
+      install it into a separate directory):
+      <pre>
+
+	tar xvzf petsc-x-y-z.tar.gz
+        cd petsc-x-y-z
+	export PETSC_DIR=`pwd`
+	export PETSC_ARCH=x86_64       # or any other identifying text for your machine
+	./config/configure.py --with-shared=1 --with-x=0 --with-mpi=1 --download-hypre=1
+	make
+      </pre>
+    </p>
+
+    <p>
+      This automatically builds PETSc with both MPI and the algebraic
+      multigrid preconditioner package Hypre (which we use in step-40).
+      <br>
+      Now let PETSc check his own sanity:
+      <pre>
+
+	make test
+      </pre>
+      will self-check the serial (and MPI) implementation of PETSc.
+      <br>
+      You may wish to put the <code>export</code> commands into
+      your <code>~/.bashrc</code> or <code>~/.cshrc</code> files, with
+      the first one replaced by something of the kind
+      <pre>
+
+	export PETSC_DIR=/path/to/petsc-x-y-z
+      </pre>
+    </p>
+
+    <p>
+      By default, PETSc is compiled in "debug mode". You can switch this to
+      "optimized mode" by adding the command line parameter
+      <pre>
+
+	--with-debugging=0
+      </pre>
+      to the call of <code>./config/configure.py</code> above. In some cases,
+      this has made linear solvers run up to 30% faster. As with choosing
+      between <acronym>deal.II</acronym>'s debug and optimized modes, you
+      should only use optimized PETSc builds once you have tested that your
+      program runs well in debug mode.
+    </p>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+  </body>
+</html>
diff --git a/doc/external-libs/slepc.html b/doc/external-libs/slepc.html
new file mode 100644
index 0000000..81f3869
--- /dev/null
+++ b/doc/external-libs/slepc.html
@@ -0,0 +1,116 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>The deal.II Readme on interfacing to SLEPc</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+
+  <body>
+
+    <h1>Interfacing <acronym>deal.II</acronym> to SLEPc</h1>
+
+    <p>
+      <a href="http://www.grycap.upv.es/slepc/"
+      target="_top">SLEPc</a> is a software package that provides
+      functionality for solving a variety of eigenspectrum problems,
+      both standard and generalised. It relies on various different
+      sparse and dense matrix and vector formats that are defined in
+      the PETSc package.  Naturally, this means PETSc must be
+      correctly installed before a working copy of SLEPc can be
+      installed as described <a href="petsc.html">here</a>.
+    </p>
+
+    <p>
+      <acronym>deal.II</acronym> has wrapper classes to the solver and
+      spectral transformation parts of SLEPc that loosely provide
+      almost the same interfaces as the wrapper classes to the PETSc
+      functionality. They are used in a simple case in step-36.
+    </p>
+
+    <h4>Installing <acronym>deal.II</acronym> with SLEPc</h4>
+
+    <p>
+      SLEPc requires the environment variables <code>PETSC_DIR</code>
+      and <code>PETSC_ARCH</code> to be set already and usually
+      requires you to set the environment
+      variable <code>SLEPC_DIR</code> as well. If the environment
+      variable is set, then
+      <acronym>deal.II</acronym> will pick up on this during
+      configuration; just as before with PETSc.
+    </p>
+
+    <p>
+      Alternatively, the <code>-DSLEPC_DIR=DIR</code> option
+      for <code>cmake</code> can be used to override the values
+      of <code>SLEPc_DIR</code> if, for example, these environment
+      variables are not set at all. You can also
+      specify <code>-DDEAL_II_WITH_SLEPC=OFF</code> as a flag during
+      configuration to have <acronym>deal.II</acronym> completely
+      ignore a SLEPc installation.
+    </p>
+
+    <h4>Installing SLEPc</h4>
+
+    <p>
+      <b>Note:</b> The version numbers (x,y,z) of your SLEPc
+      installation must be identical to the same version numbers of
+      your PETSc installation.
+    </p>
+
+    <p>
+      Installing SLEPc is not much of a challenge once PETSc has been
+      correctly installed (check this first!). What is happening here,
+      is that SLEPc scans your PETSc installation and figures out how
+      that was configured; its location, the
+      given <code>PETSC_ARCH</code>, MPI settings, debug
+      mode, and so on.
+    </p>
+
+    <p>
+      This builds a simple SLEPc installation:
+      <pre>
+
+	tar xvzf slepc-x-y-z.tar.gz
+        cd slepc-x-y-z
+	export SLEPC_DIR=`pwd`
+	export LD_LIBRARY_PATH=$SLEPC_DIR/$PETSC_ARCH/lib:$LD_LIBRARY_PATH
+	./configure
+	make
+      </pre>
+      Now let SLEPc check his own sanity:
+      <pre>
+
+	make test
+      </pre>
+      will self-check the serial (and MPI) implementation of SLEPc.
+    </p>
+
+    <p>
+      Finally, you may want to put the two <code>export</code>
+      commands above into your <code>~/.bashrc</code>
+      or <code>~/.cshrc</code> files, with the first one replaced by
+      <pre>
+
+	export SLEPC_DIR=/path/to/slepc-x-y-z
+      </pre>
+      The <code>cmake</code> installation should then be able to pick
+      up where you installed SLEPc and include it automatically in the
+      deal.II build without any further need for instructions. You
+      will see at the bottom of the <code>cmake</code> output if that
+      is what actually happened.
+    </p>
+
+
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+  </body>
+</html>
diff --git a/doc/external-libs/trilinos.html b/doc/external-libs/trilinos.html
new file mode 100644
index 0000000..99b8c88
--- /dev/null
+++ b/doc/external-libs/trilinos.html
@@ -0,0 +1,165 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>The deal.II Readme on interfacing to Trilinos</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 2008 - 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+
+  <body>
+
+    <h1>Interfacing <acronym>deal.II</acronym> to Trilinos</h1>
+
+    <p>
+      <a href="https://trilinos.org/" target="_top">Trilinos</a> is a
+      software package that provides lots of functionality for linear
+      algebra, among other things. For example, it includes implementations of a variety of
+      linear solvers, as well as various different sparse and dense matrix and
+      vector formats. Trilinos also has many subpackages that deal with
+      problems that go far beyond linear algebra, for example nonlinear
+      solvers, automatic differentiation packages, uncertainty propagation
+      engines, etc. Of particular interest to deal.II is their ability to
+      provide this functionality both on sequential and parallel (using MPI)
+      computers. Compared to <a href="http://www.mcs.anl.gov/petsc/"
+				target="_top">PETSc</a>, which is written in C, Trilinos is written in
+      C++ and can be
+      considered to be a more modern version of PETSc though both packages are
+      under continuing development at their respective national laboratories.
+    </p>
+
+    <p>
+      <acronym>deal.II</acronym> has wrapper classes to the linear algebra
+      parts of Trilinos that provide almost the
+      same interfaces as the built-in <acronym>deal.II</acronym> linear
+      algebra classes. We use these interfaces for parallel computations based
+      on MPI since the native deal.II linear algebra classes lack this
+      ability. They are used, among other programs, in step-31 and step-32.
+    </p>
+
+    <p>
+      While building deal.II with Trilinos is covered in
+      the <a href="../readme.html">ReadMe file</a>, we here give an
+      introduction to building Trilinos in such a way that it contains
+      everything that we need from the <acronym>deal.II</acronym> side.
+    </p>
+
+
+    <h5>Installing Trilinos</h5>
+
+    <p style="color: red">
+      Note: The current version of deal.II requires at least Trilinos 11.2.
+      Deal.II is known to work with Trilinos up to 11.14 and 12.4. Others versions of
+      Trilinos should work too but we do not do regression tests with them. 
+    </p>
+
+    <p>
+      Trilinos uses <a href="http://cmake.org/">cmake</a> to configure and
+      build. The following slightly longish set of commands will set up a
+      reasonable configuration (we require MueLu starting from 12.0):
+      <pre>
+
+	cd trilinos-12.0.1
+	mkdir build
+	cd build
+
+	cmake \
+	-D Trilinos_ENABLE_Sacado=ON \
+	-D Trilinos_ENABLE_MueLu:BOOL=ON \
+	-D Trilinos_ENABLE_Stratimikos=ON \
+	-D CMAKE_BUILD_TYPE=RELEASE \
+	-D CMAKE_CXX_FLAGS="-g -O3" \
+	-D CMAKE_C_FLAGS="-g -O3" \
+	-D CMAKE_FORTRAN_FLAGS="-g -O5" \
+	-D Trilinos_EXTRA_LINK_FLAGS="-lgfortran" \
+	-D CMAKE_VERBOSE_MAKEFILE=FALSE \
+	-D Trilinos_VERBOSE_CONFIGURE=FALSE \
+	-D TPL_ENABLE_MPI=ON \
+	-D BUILD_SHARED_LIBS=ON \
+	-D CMAKE_INSTALL_PREFIX:PATH=$HOME/share/trilinos \
+	..
+
+	make install
+      </pre>
+      You will need to adjust the path into which you want to install Trilinos
+      in the CMAKE_INSTALL_PREFIX line.
+    </p>
+
+
+    <h5>Parallel builds</h5>
+
+    <p>
+      If your computer has more than one processor core, use
+      <code>make -jN</code> instead of <code>make</code> in the last line
+      above, where <code>N</code> is the number of processors you have.
+    </p>
+
+
+    <h5>BLAS and LAPACK</h5>
+
+    <p>
+      Trilinos sometimes searches for other libraries but can't find
+      them if they are not in the usual directories or have other
+      names. A common example are BLAS or LAPACK. In a case like
+      this, you may have to specifically pass the directories and/or
+      library names under which they can be found
+      to <code>cmake</code>. For example, this may mean to add the
+      following flags to the call above:
+      <pre>
+
+	-D BLAS_LIBRARY_NAMES:STRING=goto \
+	-D BLAS_LIBRARY_DIRS:STRING=/apps/GotoBLAS/lib64 \
+	-D LAPACK_LIBRARY_NAMES:STRING=lapack \
+	-D LAPACK_LIBRARY_DIRS:STRING=/apps/lapack-3.2.1/lib64
+      </pre>
+    </p>
+
+
+    <h5>Using external direct solvers</h5>
+
+    <p>
+      Trilinos (via its Amesos package) can interface with a number of direct
+      solvers (see, for example,
+      <a href="http://trilinos.org/docs/r11.8/packages/amesos/doc/html/index.html"
+	 target="_top">this page for Trilinos 11.8</a>). Most of them are external
+      packages to Trilinos and you will need to tell Trilinos configuration
+      scripts that you want to use them, for example via the 
+      <code>TrilinosWrappers::SolverDirect</code> class.  This can be tricky,
+      but adding defines similar to the following to the cmake command line
+      will achieve the goal to enable the UMFPACK and SuperLU/SuperLUDist
+      solvers:
+      <pre>
+
+        -D TPL_ENABLE_UMFPACK:BOOL=ON \
+        -D TPL_ENABLE_SuperLU:BOOL=ON \
+        -D TPL_ENABLE_SuperLUDist:BOOL=ON \
+        -D TPL_UMFPACK_INCLUDE_DIRS="/usr/include" \
+        -D SuperLUDist_INCLUDE_DIRS:FILEPATH="/path/to/SuperLU_DIST_3.2/SRC" \
+	-D TPL_SuperLUDist_LIBRARIES:FILEPATH="/path/to/SuperLU_DIST_3.2/lib/libsuperlu_dist.a" \
+        -D SuperLU_INCLUDE_DIRS:FILEPATH="/path/to/SuperLU_4.3/SRC" \
+        -D TPL_SuperLU_LIBRARIES:FILEPATH="/path/to/SuperLU_4.3/lib/libsuperlu_4.3.a"
+      </pre>
+      Similarly, to enable MUMPS, commands should include
+      <pre>
+
+        -D TPL_ENABLE_MUMPS:BOOL=ON \
+        -D TPL_ENABLE_SCALAPACK:BOOL=ON
+      </pre>
+      and possibly followed by
+      <pre>
+
+        -D TPL_MUMPS_INCLUDE_DIRS:PATH=/usr/include/openmpi-x86_64 \
+        -D SCALAPACK_LIBRARY_DIRS:PATH=/lib64/openmpi/lib \
+      </pre>
+      where you need to adjust the exact paths, of course.
+    </p>
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+  </body>
+</html>
diff --git a/doc/index.html b/doc/index.html
new file mode 100644
index 0000000..28336b0
--- /dev/null
+++ b/doc/index.html
@@ -0,0 +1,39 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN"
+          "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<html>
+  <head>
+    <title>deal.II Documentation</title>
+    <link href="screen.css" rel="StyleSheet">
+    <link rel="SHORTCUT ICON" href="deal.ico">
+    <meta name="copyright" content="Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, 2005, 2012 by the deal.II Authors">
+    <meta name="keywords" content="deal dealii finite elements fem triangulation">
+    <meta http-equiv="content-language" content="en">
+
+    <!-- Avoid being displayed in a foreign frame //-->
+    <script type="text/javascript">
+      <!--
+	  if(top!=self)
+	  top.location=self.location;
+	  //-->
+    </script>
+  </head>
+
+  <frameset rows="70,*">
+    <frame name="title" src="title.html" frameborder="1">
+    <frameset cols="150,*">
+      <frame name="navbar" src="navbar.html" frameborder="0">
+      <frame name="body" src="documentation.html" frameborder="0">
+    </frameset>
+    <noframes>
+      <h1>The deal.II Documentation Page</h1>
+      Your browser does not seem to understand frames. A version of this
+      page that does not use frames can be found at
+      <a href="documentation.html">documentation.html</a>.
+    </noframes>
+  </frameset>
+</html>
+
+
+
+
+
diff --git a/doc/navbar.html b/doc/navbar.html
new file mode 100644
index 0000000..f0f156a
--- /dev/null
+++ b/doc/navbar.html
@@ -0,0 +1,51 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>deal.II navigation bar</title>
+    <link href="screen.css" rel="StyleSheet">
+    <meta name="copyright" content="Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010, 2011, 2012, 2013, 2014 by the deal.II Authors">
+    <meta name="date" content="2006/02/07 22:49:06">
+  </head>
+  <body class="navbar">
+
+    <p>
+      <a href="http://www.dealii.org"target="_top">http://www.dealii.org</a><br />
+    </p>
+
+    <hr>
+
+    <p>
+      <a href="index.html" target="_top">Home</a><br />
+      <a href="readme.html" target="body">README</a><br />
+    </p>
+
+    <hr>
+
+    <b><small>Doxygen and Lectures</small></b>
+    <p>
+      <a href="doxygen/deal.II/Tutorial.html" target="_top">Tutorial</a><br />
+      <a href="doxygen/deal.II/index.html" target="_top">Manual</a><br />
+      <a href="http://www.math.tamu.edu/~bangerth/videos.html"
+      target="_top">Wolfgang's lectures</a><br />
+    </p>
+
+    <hr>
+
+    <b><small>Online Resources</small></b>
+    <p>
+      <a href="https://github.com/dealii/dealii/wiki/FrequentlyAskedQuestions" target="_top">FAQ</a><br />
+      <a href="http://www.dealii.org/news.html" target="_top">News</a><br />
+      <a href="http://www.dealii.org/download/" target="_top">Download</a><br />
+      <a href="http://www.dealii.org/mail.html" target="_top">Mailing list</a><br />
+      <a href="https://github.com/dealii/dealii/wiki" target="_top">Wiki</a><br />
+      <a href="https://github.com/dealii/dealii/issues" target="_top">Bug tracker</a><br />
+      <a href="http://www.dealii.org/reports.html" target="_top">Reports</a><br />
+      <a href="http://www.dealii.org/publications.html" target="_top">Publications</a><br />
+      <a href="http://www.dealii.org/authors.html" target="_top">Authors</a> <br />
+      <a href="http://www.dealii.org/license.html" target="_top">License</a> <br />
+      <a href="http://www.dealii.org/mail.html" target="_top">Mailing lists</a>
+    </p>
+
+  </body>
+</html>
diff --git a/doc/news/1.0.0-vs-2.0.0.h b/doc/news/1.0.0-vs-2.0.0.h
new file mode 100644
index 0000000..166e503
--- /dev/null
+++ b/doc/news/1.0.0-vs-2.0.0.h
@@ -0,0 +1,36 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_1_0_and_2_0 Changes between Version 1.0 and 2.0
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+</p>
+
+
+<h2>New multithreading scheme</h2>
+
+<p>
+After the old scheme to implement multithreading in
+deal.II has proven to be too difficult, we
+implemented something that is significantly more complex, but at the
+same time significantly simpler to use. There is also a report on this
+subject, explaining why's and how's, which can be accessed from the
+<a href="../../documentation.html" target="body">documentation</a>
+page.
+</p>
+
+*/
diff --git a/doc/news/2.0.0-vs-3.0.0.h b/doc/news/2.0.0-vs-3.0.0.h
new file mode 100644
index 0000000..693f0dd
--- /dev/null
+++ b/doc/news/2.0.0-vs-3.0.0.h
@@ -0,0 +1,82 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_2_0_and_3_0 Changes between Version 2.0 and 3.0
+
+<p>
+Version 3.0  of the deal.II object-oriented finite  element library is
+available on the deal.II home-page at
+</p>
+<p align="center">
+<a href="http://www.dealii.org" target="_top">
+      http://www.dealii.org
+</a>
+</p>
+
+<p>
+deal.II is a C++ program  library targeted at adaptive finite elements
+and error estimation.  It uses state-of-the-art programming techniques
+of the C++ programming language to offer you a modern interface to the
+complex  data structures  and algorithms  required for  adaptivity and
+enables you to use a variety of finite elements in one, two, and three
+space dimensions, as well as support for time-dependent problems.
+</p>
+
+<p>
+The library is written for research purposes and offers many features:
+<ul>
+<li><p> Support  for one, two, and  three space dimensions,  using a unified
+  interface that enables writing programs almost dimension independent.</p>
+
+<li><p> Handling of locally  refined  grids,  including  different  adaptive
+  refinement strategies based  on  local error  indicators  and  error
+  estimators.</p>
+
+<li><p> Support  for  a  variety  of  finite  elements,  including  Lagrange 
+  elements of order one through four, and discontinuous elements.</p>
+
+<li><p> Extensive documentation: all documentation is available online in  a
+  logical tree structure to allow fast access to the  information  you 
+  need. If printed it comprises about 200 pages of tutorials,  several
+  reports, and far more than  1,000  pages  of  programming  interface
+  documentation with  explanations  of  all  classes,  functions,  and 
+  variables.</p>
+
+<li><p> Modern software techniques that make  access  to  the  complex  data
+  structures and algorithms as transparent as  possible.  The  use  of 
+  object oriented programming allows for program structures similar to
+  the structures in mathematical analysis.</p>
+
+<li><p> Fast algorithms that enable you to solve problems with up to several
+  millions of degrees of freedom quickly. As  opposed  to  programming 
+  symbolic algebra packages the penalty for readability is low.</p>
+
+<li><p> Support for several output formats, including  some  common  formats 
+  for visualization of scientific data.</p>
+
+<li><p> Support for  a  variety  of  computer  platforms,  including  multi-
+  processor machines.</p>
+
+<li><p> Free source code under an Open Source license, and the invitation to
+  contribute to further development of the library.</p>
+</ul>
+</p>
+
+<p>
+Wolfgang Bangerth, Guido Kanschat, the deal.II team
+</p>
+
+*/
diff --git a/doc/news/3.0.0-vs-3.0.1.h b/doc/news/3.0.0-vs-3.0.1.h
new file mode 100644
index 0000000..b131336
--- /dev/null
+++ b/doc/news/3.0.0-vs-3.0.1.h
@@ -0,0 +1,76 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_0_0_and_3_0_1 Changes between Version 3.0.0 and 3.0.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+<ol>
+  <li> <p>
+       Fix: in three space dimensions, the triangulation class
+       over-estimated the necessary amount of memory needed upon
+       refinement and allocated too much.
+       <br>
+       (WB 2000/04/19)
+       </p>
+
+  <li> <p>
+       Fix: getting the support points from system finite elements
+       (class <code>FESystem</code>) resulted in an
+       exception. 
+       <br>
+       (WB 2000/05/10)
+       </p>
+
+  <li> <p>
+       Fix: <code>FullMatrix::Tmmult</code> and
+       <code>FullMatrix::Tvmult</code> were broken.
+       <br>
+       (WB 2000/05/08 and 2000/05/26)
+       </p>
+
+  <li> <p>
+       Fix: slight bug in 
+       <code>DataOut::build_patches</code>
+       in multithreaded mode fixed.
+       <br>
+       (Ralf Hartmann, 2000/05/29)
+       </p>
+
+  <li> <p>
+       Fix: 
+       <code>SparsityPattern::print_gnuplot</code>
+       wrote rows and columns exchanged. Since most matrices have
+       symmetric sparsity patterns, this has gone unnoticed by now.
+       <br>
+       (WB 2000/05/30)
+       </p>
+
+  <li> <p>
+       Fixed: the 
+       <code class="program">common/scripts/make_dependencies.pl</code> 
+       script that sets up the dependencies for the make files had a
+       problem when the path to the library included special characters
+       such as `+'. This is now fixed.
+       <br>
+       (WB 2000/06/15)
+       </p>
+</ol>
+
+*/
diff --git a/doc/news/3.0.0-vs-3.1.0.h b/doc/news/3.0.0-vs-3.1.0.h
new file mode 100644
index 0000000..94d97c5
--- /dev/null
+++ b/doc/news/3.0.0-vs-3.1.0.h
@@ -0,0 +1,1113 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_0_0_and_3_1_0 Changes between Version 3.0.0 and 3.1.0
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li> <p>
+       New: All identifiers that the C++ standard (see
+       <a href="http://www.cygnus.com/misc/wp/dec96pub/"
+       target="_top"> this page</a> for the public draft -- the
+       standard itself is not available online for copyright reasons)
+       mandates to be in namespace <code>std::</code> are correctly
+       prefixed by the namespace name. While previous gcc compilers
+       accepted these names also without the prefix, gcc 3.0 will
+       require them, as do some commercial compilers.
+       <br>
+       (WB 2001/01/16)
+       </p>
+
+  <li> <p>
+       New: In multithreading mode, the compile flags now also have 
+       <code class="program">_REENTRANT</code> defined. If this flag
+       is defined, then the standard Unix headers also declare
+       reentrant versions (with suffix <code class="program">_r</code>)
+       of many functions.
+       <br>
+       (WB 2000/09/20)
+       </p>
+
+  <li> <p>
+       New: Major and minor version numbers of the
+       deal.II library are now passed to the
+       compiler as preprocessor variables
+       <code class="program">DEAL_2_MAJOR</code> and
+       <code class="program">DEAL_2_MINOR</code>. Their present values
+       are 3 and 1, respectively. Variables of the same name are also
+       available in Makefiles.
+       <br>
+       (WB 2000/09/18)
+       </p>
+
+  <li> <p>
+       New: the <code class="program">step-9</code> example program is
+       now ready, showing several advanced programming techniques in
+       deal.II. 
+       <br>
+       (WB 2000/07/18)
+       </p>
+
+  <li> <p>
+       Changed: deal.II now uses the 
+       <code class="program">kdoc2</code> program to generate the API
+       documentation. This makes up for much nices documentation and
+       also works better than the previous <code class="program">kdoc1</code>.
+       A copy of <code class="program">kdoc2</code> is provided within
+       the deal.II source tree.
+       <br>
+       (WB 2000/06/28)
+       </p>
+
+  <li> <p>
+       Changed: when compiling files, the output generated by the
+       <code class="program">make</code> program now indicates also
+       whether we are compiling for multithread mode, besides the
+       information whether the file is compiled for debug or optimized
+       mode.
+       <br>
+       (Ralf Hartmann 2000/06/26)
+       </p>
+
+  <li> <p>
+       Fixed: the 
+       <code class="program">common/scripts/make_dependencies.pl</code> 
+       script that sets up the dependencies for the make files had a
+       problem when the path to the library included special characters
+       such as `+'. This is now fixed.
+       <br>
+       (WB 2000/06/15)
+       </p>
+
+  <li> <p>
+       New: the <code class="program">configure</code> script now
+       checks whether the <code>getrusage</code> function is properly
+       declared (this function is used in the timer class). The
+       problem is that on SunOS 4.x, this function exists, but is not
+       declared in the right file (although it is listed in the man
+       pages). We then have to declare it ourselves.
+       <br>
+       (WB 2000/06/14)
+       </p>
+
+  <li> <p>
+       New: the <code class="program">configure</code> script now
+       enforces the compiler flags <code>-ansi -pedantic</code> when
+       compiling in debug mode. This should force us to write more
+       standard compliant code. (Since the ACE library is not
+       standards conforming, the <code>-ansi</code> flag is not used
+       when multithreading is requested.)
+       <br>
+       (WB 2000/06/14)
+       </p>
+
+  <li> <p>
+       Improved: the <code class="program">configure</code> script is
+       now used to select the compiler options. Previously, selection
+       of compiler options was done both in 
+       <code class="program">configure</code> as well as in the global
+       options Makefile 
+       <code class="program">common/Make.global_options</code>.
+       <br>
+       (WB 2000/06/02)
+       </p>
+
+  <li> <p>
+       Improved: Dependence on the files 
+       <code class="file">forward_declarations.h</code> has been widely
+       removed to improve compilation time
+       <br>
+       (GK 2000/05/24)
+       </p>
+
+
+  <li> <p>
+       Fix: <code class="program">configure</code> now uses 
+       <code class="program">config.guess</code> to determine the
+       operating system and hardware platform.
+       <br>
+       (GK 2000/04/05)
+       </p>
+
+  <li> <p>
+       Changed: License is now deal.II Public
+       License, restricting commercial use more explicitely.
+       <br>
+       (WB/RH/GK)
+       </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+
+  <li> <p>
+       Improved: exceptions <code>ExcIO</code> and <code
+       class="class">ExcOutOfMemory</code> are defined globally now.
+       <br>
+       (GK 2000/12/07)
+       </p>
+
+  <li> <p>
+       Improved: <code>ParameterHandler</code> now has a
+       function <code>log_parameters</code> which
+       allows output of all parameters to a log file.
+       <br>
+       (GK 2000/12/07)
+       </p>
+
+  <li> <p>
+       New: almost all classes that store data now have a function
+       <code>memory_consumption</code> that returns an
+       estimate of the amount of memory (in bytes) used up by this
+       class. Supporting functions to compute the size of STL vectors
+       and other objects can be found the 
+       <code>MemoryConsumption</code> namespace.
+       <br>
+       (WB 2000/11/27)
+       </p>
+
+  <li> <p>
+       New: Class <code>FunctionDerivative</code>
+       computes finite difference approximations of a directional
+       derivative of a <code>Function</code>.
+       <br>
+       (GK 2000/11/13)
+       </p>
+
+  <li> <p>
+       New: The <code>DataOutBase</code> and
+       <code>DataOutBase::Patch</code> classes have been
+       changed so as to allow output of objects that have an other
+       dimension than the surrounding space, for example writing faces
+       instead of cells (this might be useful to write only external
+       faces in 3d computations).
+       <br>
+       (WB 2000/09/07)
+       </p>       
+
+  <li> <p>
+       New: There is a new <code>distance</code> function
+       in the <code>Point</code> class that calculates the
+       distance between two points.
+       <br>
+       (Ralf Hartmann 2000/09/06)
+       </p>   
+
+  <li> <p> 
+       New: <code>Timer</code> now uses the system
+       function <code>getrusage (RUSAGE_CHILDREN,
+       .)</code> that is need in multithreading. But still the <code
+       class="class">Timer</code> class does not yet work in
+       multithreading, as getrusage with flag RUSAGE_CHILDREN gives always
+       0 (at least on Solaris7).
+       <br>
+       (Ralf Hartmann 2000/08/25)
+       </p>   
+
+  <li> <p>
+       New: There are now a set of functions
+       <code>outer_product</code> that for the outer
+       product of tensors.
+       <br>
+       (WB 2000/07/23)
+       </p>       
+
+  <li> <p>
+       New: The classes 
+       <code>Patterns::Integer</code>
+       and <code>Patterns::Double</code> now allow that
+       a range may be specified in which the parameter shall
+       be. Furthermore, instead of a class,
+       <code>Patterns</code> is now a namespace, so it
+       can be reopened in case you want to write another pattern class
+       and want to put it into the same namespace.
+       <br>
+       (WB 2000/07/21)
+       </p>
+
+  <li> <p>
+       New: classes <code>QMilne</code> and <code
+       class="class">QWeddle</code> for closed Newton-Cotes-formulæ of
+       orders 7 and 9, respectively.
+       <br>
+       (GK 2000/07/07)
+       </p>
+
+  <li> <p>
+       New: There is now a function <code>invert
+       (Tensor<2,dim>)</code> that returns the inverse of a tensor of
+       rank 2.
+       <br>
+       (WB 2000/04/14)
+       </p>
+
+  <li> <p>
+       New multithreading scheme is implemented.
+       <br>
+       (WB 2000/04/13)
+       </p>
+
+  <li> <p>
+       Improved: <code>Subscriptor</code> prints the
+       real class name if a subscribed object is deleted.
+       <br>
+       (GK 2000/04/12)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p>
+       Fixed: <code>SparseILU</code> had two bugs which
+       are now fixed.
+       <br>
+       (<a href="mailto:or at winfos.com">Oliver Rheinbach</a>, 2001/02/02)
+       </p>
+
+  <li> <p>
+       Improved: block classes have a variable number of blocks now,
+       not a template parameter.
+       <br>
+       (GK 2000/12/07)
+       </p>
+
+  <li> <p>
+       New: <code>BlockDiagonalMatrix</code> is a
+       template that generates a matrix with multiple copies of the
+       same block on the diagonal.
+       <br>
+       (GK 2000/12/07)
+       </p>
+
+  <li> <p>
+
+       Improved: Krylov-space solvers do not use the function residual
+       anymore. This allows easier implementation of new matrix
+       classes. <code>SolverGMRES</code> now also counts
+       the first iteration step. Finally, all solvers inherit a <code
+       class="class">Subscriptor</code>.
+       <br>
+       (GK 2000/12/07)
+       </p>
+
+  <li> <p>
+       New: There are now functions 
+       <code>SparsityPattern::symmetrize</code> and
+       <code>SparseMatrix::symmetrize</code> that
+       generate a symmetric matrix from a non-symmetric one.
+       <br>
+       (WB 2000/12/02)
+       </p>
+
+  <li> <p>
+       New: almost all classes that store data now have a function
+       <code>memory_consumption</code> that returns an
+       estimate of the amount of memory (in bytes) used up by this
+       class.
+       <br>
+       (WB 2000/11/27)
+       </p>
+
+  <li> <p> 
+       New: <code>
+       SparseMatrix<number>::n_actually_nonzero_elements</code>
+       returns the number of entries that are actually nonzero.
+       <br>
+       (Ralf Hartmann 2000/11/22)
+       </p>       
+
+  <li> <p>
+       Fixed: unlike announced in the docs, the
+       <code>FullMatrix::norm2</code> function did not
+       return the Frobenius norm of a matrix, but its square. This is
+       fixed now.
+       <br>
+       (GK 2000/08/28)
+       </p>       
+
+  <li> <p>
+       Improved: <code>PreconditionBlockSOR</code><code
+       class="member">::Tvmult(...)</code> is implemented.
+       <br>
+       (GK 2000/07/07)
+       </p>
+
+  <li> <p>
+       Improved: The breakdown criterion of <code
+       class="class">SolverBicgstab</code> can be changed by the use
+       of <code>SolverBicgstab::AdditionalData</code>.
+       <br>
+       (GK 2000/07/07)
+       </p>       
+
+  <li> <p>
+       New: <code>SolverRichardson</code> has a
+       transposed solver <code>Tsolve(...)</code>. It
+       uses the functions <code>Tvmult(...)</code> of
+       the provided matrix and preconditioner.
+       <br>
+       (GK 2000/07/07)
+       </p>
+
+  <li> <p>
+       Improved: <code>FullMatrix</code><code
+       class="member">::invert(...)</code> now inverts matrices of all
+       sizes. If there is no hardcoded inversion, <code
+       class="member">gauss_jordan()</code> is used implicitly.
+       <br>
+       (GK 2000/06/30)
+       </p>
+
+  <li> <p> 
+       New: For recognizing a diverging solver before the maximum
+       number of steps is reached, 
+       <code>SolverControl</code> returns 
+       <code>failure</code> also if the residual
+       increases over the start residual by a specific factor. This
+       factor is given to the <code>SolverControl</code> 
+       object by the <code>set_failure_criterion</code>
+       function. After calling the latter function, checking of this
+       additional failure criterion may again be disabled by calling
+       <code>clear_failure_criterion</code>.
+       <br>
+       (Ralf Hartmann 2000/06/26)
+       </p>
+
+  <li> <p>
+       Improved: The interface of preconditioner classes has changed.
+       Preconditioners are now considered linear operators like
+       matrices: they have members <code>vmult</code>
+       and <code>Tvmult</code> instead of the old <code
+       class="member">operator()</code>. This will allow the
+       implementation of further non-symmetric solvers.
+       <br>
+       (GK 2000/06/20)
+       </p>
+
+  <li> <p>
+       New: there is now a function
+       <code>SparseMatrix::el</code> that does mostly
+       the same as <code>SparseMatrix::operator()</code>,
+       but returns a zero if elements of the matrix are accessed that
+       are not in the sparsity pattern. Thus, the new function allows
+       to actually traverse rows or columns of the matrix without
+       taking care of the sparsity pattern, while
+       <code>SparseMatrix::operator()</code> should be
+       used to write algorithms more efficiently.
+       <br>
+       (WB 2000/06/02)
+       </p>
+
+  <li> <p>
+       Fix: 
+       <code>SparsityPattern::print_gnuplot</code>
+       wrote rows and columns exchanged. Since most matrices have
+       symmetric sparsity patterns, this has gone unnoticed by now.
+       <br>
+       (WB 2000/05/30)
+       </p>
+
+  <li> <p>
+       Fix: the 
+       <code>FullMatrix::Tvmult</code> function
+       suffered from the same problems as the
+       <code>FullMatrix::Tmmult</code> function. This
+       is now fixed as well.
+       <br>
+       (WB 2000/05/26)
+       </p>
+
+  <li> <p>
+       New: Class <code>PreconditionBlockJacobi</code>.
+       <br>
+       (GK 2000/05/24)
+       </p>
+
+  <li> <p>
+       New: <code>SolverControl</code> has an interface
+       to <code>ParameterHandler</code>, defining and
+       reading parameters from a file automatically.
+       <br>
+       (GK 2000/05/24)
+       </p>
+
+  <li> <p>
+       New: <code>BlockIndices</code>: Class that
+       manages the conversion of global indices into a block
+       vector/matrix/... to the indices local to each of the blocks. 
+       <br>
+       (WB 2000/05/08)
+       </p>  
+
+  <li> <p>
+       New: <code>BlockSparsityPattern</code> and
+       <code>BlockSparseMatrix</code>: Classes that
+       represent matrices that are composed of sparse matrices.
+       <br>
+       (WB 2000/05/08)
+       </p>  
+
+  <li> <p>
+       Fix: the 
+       <code>FullMatrix::mmult</code> and
+       <code>FullMatrix::Tmmult</code> code don't
+       resize their output argument any more, as this is not common
+       style in the library. Furthermore, 
+       <code>FullMatrix::Tmmult</code> was utterly
+       broken. 
+       <br>
+       (WB 2000/05/08)
+       </p>
+
+  <li> <p>
+       Change: the <code>matrix_norm</code> functions
+       of sparse and full matrices are renamed to 
+       <code>matrix_norm_square</code>, since they in
+       fact return the square of the norm. This should avoid confusion
+       in some cases.
+       <br>
+       (WB 2000/05/05)
+       </p>
+
+  <li> <p>
+       Fix: the ``copy-like'' constructor of 
+       <code>SparsityPattern</code> that copies another
+       object and adds some off-diagonals had a bug that caused an
+       exception in some cases. This is now fixed.
+       <br>
+       (WB 2000/05/04)
+       </p>
+
+  <li> <p>
+       New: <code>SwappableVector</code>: Class that
+       allows to swap out the data of a vector to disk and reload it
+       later on. It also has a function to preload the data before its
+       use in a separate thread if the library is configured for
+       multi-threading.
+       <br>
+       (WB 2000/05/03)
+       </p>
+
+  <li> <p>
+       New: there are now functions <code>Vector::swap</code> 
+       and <code>BlockVector::swap</code>, as well as
+       global functions <code>swap(u,v)</code> that
+       exchange the data of two vectors without needing a temporary
+       vector and without copying around data. Their run-time is
+       therefore independent of the length of the vectors.
+       <br>
+       (WB 2000/05/02)
+       </p>
+
+  <li> <p>
+       Fix: <code>SolverCG</code> counts steps properly.
+       <br>
+       (GK 2000/04/25)
+       </p>
+
+  <li> <p>
+       Change: the solver classes in LAC lost their first template
+       argument. Their names are now 
+       <code>SolverXX<VECTOR></code>, where
+       <code>XX</code> denotes the name of the solver
+       (e.g. CG, GMRES, etc). Furthermore, the
+       inheritance from <code>Solver</code> was made private
+       to reflect the logical structure.
+       <br>
+       (GK 2000/04/25)
+       </p>
+
+  <li> <p>
+       New: <code>EigenInverse</code> implements inverse
+       iteration by Wieland.
+       <br>
+       (GK 2000/04/20)
+       </p>
+
+  <li> <p>
+       New: <code>EigenPower</code> implements power
+       method by von Mises
+       <br>
+       (GK 2000/04/19)
+       </p>
+
+  <li> <p>
+       New: <code>PreconditionBlockSOR::set_omega</code>
+       allows to change the relaxation parameter.
+       <br>
+       (GK 2000/04/12)
+       </p>
+
+  <li> <p>
+       New: <code>SolverXX</code>: There is a virtual function
+       <code>print_vectors</code> called in every step. It is void in the
+       solver itself but can be used to print intermediate iteration
+       vectors.
+       <br>
+       (GK 2000/04/05)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p>
+       Extend: <code>DoFTools::extract_boundary_dofs</code>
+       now allows to also specify which boundary conditions shall be
+       considered. 
+       <br>
+       (WB 2000/12/04)
+       </p>
+
+  <li> <p>
+       New: some arguments of the functions 
+       <code>DoFHandler::n_boundary_dofs</code>,
+       <code>DoFTools::extract_boundary_dofs</code>,
+       and
+       <code>DoFTools::map_dof_to_boundary_indices</code> 
+       are changed from <code>list</code> to
+       <code>set</code>, since that resembles more
+       closely the purpose of the parameter, and makes computations
+       slightly faster.
+       <br>
+       (WB 2000/12/04)
+       </p>
+
+  <li> <p>
+       New: almost all classes that store data now have a function
+       <code>memory_consumption</code> that returns an
+       estimate of the amount of memory (in bytes) used up by this
+       class.
+       <br>
+       (WB 2000/11/27)
+       </p>
+
+  <li> <p>
+       New: The <code>ConstraintMatrix::add_entries</code>
+       function add several constraints at once.
+       <br>
+       (WB 2000/10/26)
+       </p>
+
+  <li> <p>
+       New: The <code>DoFRenumbering::random</code>
+       function renumbers degrees of freedom in a random way.
+       <br>
+       (WB 2000/10/22)
+       </p>
+
+  <li> <p>
+       New: The <code>TriaAccessor</code> now has a
+       function <code>point_inside</code> that checks
+       whether a certain point is inside a given cell.
+       <br>
+       (Thomas Richter 2000/10/12)
+       </p>
+
+  <li> <p>
+       New: The <code>FiniteElement</code> has got two
+       new functions <code>transform_unit_to_real_cell
+       </code> and <code>transform_real_to_unit_cell
+       </code>. They allow to transform points from reference (unit)
+       cell to real cell and visa versa. <code
+       class="member">transform_real_to_unit_cell</code> involves a
+       Newton iteration and works for all dimensions and all mappings
+       used to transform from unit to real cell.
+       <br>
+       (Ralf Hartmann 2000/10/12)
+       </p>
+
+  <li> <p>
+       Extended: The <code>Triangulation</code>
+       class can handle boundary information in 3d as well
+       (i.e. lines and quads in 3d with special material IDs).
+       <br>
+       (<a href="mailto://ms@biomech.tu-graz.ac.at">Michael 
+        Stadler</a> 2000/10/11)
+       </p>       
+
+  <li> <p>
+       Extended: The <code>GridIn</code>
+       class can now read 3D UCD data, including boundary information
+       (i.e. lines and quads in 3d with special material IDs).
+       <br>
+       (<a href="mailto://ms@biomech.tu-graz.ac.at">Michael
+        Stadler</a> 2000/10/11)
+       </p>       
+
+  <li> <p>
+       New: The <code>GridRefinement::refine_and_coarsen_optimize</code>
+       function implements an alternative way to flag cells for
+       refinement and coarsening.
+       <br>
+       (Thomas Richter 2000/10/10)
+       </p>       
+
+  <li> <p>
+       Extended: The <code>GridIn::delete_unused_vertices</code>
+       function now eliminates vertices from the input that are not
+       referenced by any of the cells in the input file. This makes is
+       simpler to delete some cells from the input file by hand,
+       without the need to update the vertex lists, which can be
+       tiring as several cells usually use each vertex. All functions
+       in the <code>GridIn</code> reading grids in
+       several input files call this function before passing the data
+       to the triangulation object.
+       <br>
+       (WB 2000/09/26)
+       </p>       
+
+  <li> <p>
+       New: The <code>GridIn</code>
+       class can now read the basics of grids in DB Mesh format.
+       <br>
+       (WB 2000/09/26)
+       </p>       
+
+  <li> <p>
+       Extended: The <code>KellyErrorEstimator</code>
+       class is now able to estimate errors for several solution
+       vectors at the same time, provided they live on the same
+       <code>DoFHandler</code> object.
+       <br>
+       (WB 2000/09/11)
+       </p>       
+
+  <li> <p>
+       New: The <code>DataOut_Faces</code> class allows to
+       output faces instead of cells. This might be handy for 3d
+       computations if one is only interested in surface plots, or
+       cuts through the domain.
+       <br>
+       (WB 2000/09/07)
+       </p>       
+
+  <li> <p>
+       Removed: The 
+       <code>DataOut_Old</code> class has finally gone for
+       good. It was already deprecated in version 3.0, and has been
+       superceded for a long time by the framwork of classes around
+       <code>DataOutBase</code> and
+       <code>DataOut</code>.
+       <br>
+       (WB 2000/09/07)
+       </p>
+
+  <li> <p>
+       New: There is now a function
+       <code>DoFHandler::n_boundary_dofs</code>
+       that takes the list of selected boundary indicators as a
+       <code>list</code> of values, rather than the
+       usual <code>map</code> of pairs of boundary
+       indicators and function object pointers.
+       <br>
+       (WB 2000/08/25)
+       </p>
+
+  <li> <p>
+       Changed: The 
+       <code>map_dof_to_boundary_index</code>
+       functions have been moved from the <code>DoFHandler</code>
+       to the <code>DoFTools</code> class, in order to
+       further remove code from the big classes which is necessarily
+       needed there.
+       <br>
+       (WB 2000/08/25)
+       </p>
+
+  <li> <p>
+       New: there is now a class <code>DataOutRotation</code>
+       that can be used to output data which has been computed
+       exploiting rotational symmetry, on the original domain. Thus,
+       the output is of one dimension higher than the computation was,
+       where the computed solution is rotated around the axis of
+       symmetry.
+       <br>
+       (WB 2000/08/14)
+       </p>
+
+  <li> <p>
+       New: class <code>HalfHyperShellBoundary</code>
+       and <code>GridGenerator::half_hyper_shell</code>
+       generate a half shell, useful for computations with a shell
+       domain and rotational symmetry.
+       <br>
+       (WB 2000/08/08)
+       </p>
+
+  <li> <p>
+       Changed: The functions
+       <code>Triangulation::refine</code>,
+       <code>Triangulation::coarsen</code>,
+       <code
+       class="member">Triangulation::refine_and_coarsen_fixed_fraction</code>,
+       and
+       <code
+       class="member">Triangulation::refine_and_coarsen_fixed_number</code>
+       have been moved from the triangulation class to a separate
+       class <code>GridRefinement</code> as they are not
+       intricately bound to the triangulation but rather form a
+       distinct class of functions that flag cells for refinement or
+       coarsening based on error indicators.
+       <br>
+       (WB 2000/07/28)
+       </p>
+
+  <li> <p>
+       New: <code>DataOut::clear_input_data_references</code>
+       clears all reference to input data vectors and to the <code
+       class="class">DoFHandler</code>. This function may be useful
+       after the patches have been built, to release memory as early
+       as possible, in this case before the output is actually written.
+       <br>
+       (WB 2000/07/26)
+       </p>
+
+  <li> <p>
+       New: <code>class DerivativeApproximation</code>
+       approximates the norm of the gradient or second derivative of a
+       finite element field on each cell from finite difference
+       approximations.
+       <br>
+       (WB 2000/04/14 and 2000/07/23)
+       </p>
+
+  <li> <p>
+       Fix: Add a missing assertions in <code
+       class="member">FEValuesBase::get_function_*</code>. If an <code
+       class="class">ExcAccessToUninitializedField</code> is now thrown
+       then probably an <code>update_values</code> or
+       <code>update_gradients</code> is missing in the
+       <code>UpdateFlags</code> of a used <code
+       class="class">FEValues</code>. Adding this assertion uncovered
+       several other errors which are now also fixed; these errors
+       were dormant, since the values of finite elements are always
+       computed for the presently available Lagrange elements, but
+       would have been activated once there are other classes of
+       elements. 
+       <br>
+       (Ralf Hartmann 2000/07/20)
+       </p>
+
+  <li> <p>
+       New: Class <code>DoFRenumbering</code> now has
+       functions for cell-wise downstream renumbering for
+       discontinuous elements. The parameters of the function <code
+       class="member">sort_selected_dofs_back</code> have been
+       switched to match the interfaces of all other functions in this
+       class.
+       <br>
+       (GK 2000 Jour de Bastille)
+       </p>
+
+  <li> <p>
+       New: Function <code>VectorTools</code><code
+       class="member">::compute_mean_value</code> integrates the mean
+       value of one component of a finite element function.
+       <br>
+       (GK 2000/07/12)
+       </p>
+       
+  <li> <p>
+       New: The new function <code>get_face</code>
+       of <code>FEFaceValues</code> and <code
+       class="class">FESubfaceValues</code> returns an iterator of the
+       present face. This is the face, for that the <code
+       class="class">FE(Sub)FaceValues</code> object was reinited
+       the last time.
+       <br>
+       (Ralf Hartmann 2000/06/26)
+       </p>
+
+  <li> <p>
+       New: classes <code>FEDG_Px</code> implement
+       complete polynomial spaces of degree <em>x</em> on
+       quadrilaterals. Since they have less degrees of freedom than
+       <code>FEDG_Qx</code>, there is no continuous
+       version for these elements. Implementation for P4 and 3D is
+       still incomplete.
+       <br>
+       (GK 2000/06/20)
+       </p>
+
+  <li> <p>
+       Fix: slight bug in 
+       <code>DataOut::build_patches</code>
+       in multithreaded mode fixed.
+       <br>
+       (Ralf Hartmann, 2000/05/29)
+       </p>
+
+  <li> <p>
+       New: class <code>FETools</code> performs
+       interpolations and extrapolations of discrete functions from one
+       <code>FiniteElement</code> to another <code
+       class="class">FiniteElement</code>. It also provides the local
+       interpolation matrices that interpolate on each
+       cell. Furthermore it provides the interpolation difference
+       matrix id-I<sub>h</sub> that is needed for evaluating
+       (id-I<sub>h</sub>)z for e.g. the dual solution z.
+       </p>
+
+       <p>
+       Removed: The obsolete 
+       <code>MatrixCreator::create_interpolation_matrix</code>
+       function is now removed.
+       </p>
+
+       <p>
+       (Ralf Hartmann 2000/05/26)
+       </p>
+
+  <li> <p>
+       New: <code>DoFTools::make_flux_sparsity_pattern</code>
+       can be optimized by providing two coefficient matrices.
+       <br>
+       (GK 2000/05/25)
+       </p>
+
+  <li> <p>
+       Improved: <code>VectorTools::integrate_difference</code>
+       allows for vector-valued weight functions. L<sup>1</sup> and
+       L<sup>infinity</sup> norms are calculated correctly.
+       <br>
+       (GK 2000/05/25)
+       </p>
+
+  <li> <p>
+       Changed: <code>FE_DGx::restriction</code> is
+       not an interpolation any more but a local projection which is
+       more reasonable for DG elements.
+       <br>
+       (Ralf Hartmann 2000/05/22)
+       </p>
+
+  <li> <p>
+       Changed: enum 
+       <code>MeshSmoothing</code> is moved into the 
+       <code>Triangulation</code> class.
+       <br>
+       (Ralf Hartmann 2000/05/18)
+       </p>
+
+  <li> <p> 
+       New: <code>Triangulation<dim>::patch_level_1</code>
+       is a new mesh smoothing. A mesh of patch level 1 consists of
+       patches, i.e. they consists of cells that are all refined at
+       least once.
+       </p>
+
+       <p>
+       Changed: As follows from the existence of this new smoothing,
+       <code>maximum_smoothing</code> will include this
+       smoothing.
+       </p>
+
+       <p>
+       (Ralf Hartmann 2000/05/18)
+       </p>
+
+  <li> <p>
+       Changed: the 
+       <code>MatrixTools::apply_boundary_values</code>
+       now uses a much faster algorithm when working on matrices with
+       symmetric sparsity patterns. On the other hand, it does no more
+       eliminate whole rows when a matrix has a non-symmetric sparsity
+       pattern, or if the user (through a new flag) tells the function
+       that this is not necessary, for example if the matrix itself is
+       non-symmetric. 
+       </p>
+
+       <p>
+       For symmetric sparsity patterns, the algorithm now eliminates
+       each boundary value in O(m*log(m)) steps instead of
+       O(N*log(m)), where N=number of rows of the matrix, and m=number
+       of entries per row. Note that m is roughly constant,
+       irrespective of N, so the old algorithm became slower with
+       finer grids, while the new one is O(1) for each boundary degree
+       of freedom.
+       </p>
+
+       <p>
+       (John Burnell, WB 2000/05/17)
+       </p>
+
+  <li> <p>
+       New: many functions are now templatized on the data type of the
+       vector they take or return. They thus now support
+       <code>Vector<float></code>, but also
+       <code>BlockVector<...></code>. An
+       incomplete and growing list of functions that were treated in
+       this way is:
+       <ul>
+       <li>
+       <code>DoFAccessor::get_dof_values</code>,
+       <li>
+       <code>DoFAccessor::set_dof_values</code>,
+       <li>
+       <code>DoFAccessor::get_interpolated_dof_values</code>,
+       <li>
+       <code>DoFAccessor::set_dof_values_by_interpolation</code>,
+       <li>
+       <code>FEValues::get_function_values</code>,
+       <li>
+       <code>FEValues::get_function_grads</code>,
+       <li>
+       <code>FEValues::get_function_2nd_derivatives</code>,
+       <li>
+       <code>MatrixTools::apply_boundary_values</code>,
+       <li>
+       Several functions in <code>ConstraintMatrix</code>.
+       </ul>
+       <br>
+       (WB 2000/05/16 and later)
+       </p>
+
+  <li> <p>
+       Changed: The computation of the Jacobian matrices in the 
+       <code>FEValues</code> class is now done more
+       efficiently. The speedup is in the range of a factor of 40 for
+       3D. 
+       <br>
+       (John Burnell, WB 2000/05/16)
+       </p>
+
+  <li> <p> 
+       Change: <code>DoFTools::make_hanging_node_constraints ()</code>
+       does not use the user flags any more, and can thus run in
+       parallel more than once. 
+       <br>
+       (WB 2000/05/15)
+       </p>
+
+  <li> <p> 
+       Extended: <code>DoFTools::make_sparsity_pattern ()</code>
+       now accepts a template parameter as sparsity pattern. This
+       allows to use this function for the usual 
+       <code>SparsityPattern</code>, or for 
+       <code>BlockSparsityPattern</code> arguments.
+       <br>
+       (WB 2000/05/15)
+       </p>
+
+  <li> <p> 
+       New: <code>DoFTools::extract_hanging_node_dofs ()</code>
+       identifies nodes that will be constrained by hanging node constraints.
+       <br>
+       (WB 2000/05/12)
+       </p>
+
+  <li> <p> 
+       New: <code>DoFRenumbering::sort_selected_dofs_back ()</code>
+       sorts selected degrees of freedom to the end of the index
+       range.
+       <br>
+       (WB 2000/05/12)
+       </p>
+
+  <li> <p> 
+       Change: the return value of
+       <code>DoFHandler::max_couplings_between_dofs ()</code>
+       is bounded by <code>DoFHandler::n_dofs()</code>.
+       <br>
+       (Ralf Hartmann 2000/05/11)
+       </p>
+
+  <li> <p> 
+       New: <code>FEValuesBase::get_cell ()</code>
+       returns present cell.
+       <br>
+       (Ralf Hartmann 2000/05/11)
+       </p>
+
+  <li> <p> 
+       Fix: <code>FESystem::reinit()</code> generated
+       an exception if <code>update_support_points</code> 
+       was set.
+       <br>
+       (WB 2000/05/10)
+       </p>
+
+  <li> <p> 
+       New: <code>IntergridMap::get_{source,destination}_grid</code> 
+       functions return the grids for which the map was created.
+       <br>
+       (WB 2000/04/19)
+       </p>
+
+  <li> <p>
+       Fix: in three space dimensions, the triangulation class
+       over-estimated the necessary amount of memory needed upon
+       refinement and allocated too much.
+       <br>
+       (WB 2000/04/19)
+       </p>
+
+  <li> <p>
+       New: <code>DoFTools::extract_boundary_dofs</code> 
+       finds all degrees of freedom which are at the boundary and belong to
+       specified components.
+       <br>
+       (WB 2000/04/17)
+       </p>
+
+  <li> <p>
+       New: <code>DoFTools::compute_intergrid_constraints</code> 
+       allows to use different discretization grids for different
+       variables.
+       <br>
+       (WB 2000/04/15)
+       </p>
+
+  <li> <p>
+       New: <code>DataOut::clear_data_vectors</code>
+       allows to re-use an object without deleting the <code
+       class="class">DoFHandler</code>. 
+       <br>
+       (GK 2000/04/05)
+       </p>
+
+  <li> <p>
+       New: <code>class DoFPrintSolverStep</code> prints
+       intermediate vectors of a solver as finite element functions using
+       <code>DataOut</code>.
+       <br>
+       (GK 2000/04/05)
+       </p>
+
+</ol>
+
+*/
diff --git a/doc/news/3.1.0-vs-3.1.1.h b/doc/news/3.1.0-vs-3.1.1.h
new file mode 100644
index 0000000..3d8371f
--- /dev/null
+++ b/doc/news/3.1.0-vs-3.1.1.h
@@ -0,0 +1,181 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_1_0_and_3_1_1 Changes between Version 3.1.0 and 3.1.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+<ol>
+  <li> <p>
+       Fixed: Templatized classes which had a default template
+       argument that contains colons (such as nested types) did
+       not show up in the forward declaration files, and were also
+       missing from the class index. This is fixed now.
+       <br>
+       (WB 2001/05/02)
+       </p>
+
+  <li> <p>
+       Fixed: The vertex numbers written by the <code
+       class="class">GridOut</code>::<code
+       class="member">write_ucd_faces</code> function are now also
+       1-based. This, to be consistent with the vertex numbers given
+       by the <code>GridOut</code>::<code
+       class="member">write_ucd</code> function.
+       <br>
+       (RH 2001/04/20)
+       </p>
+
+  <li> <p>
+       Fixed: the <code
+       class="member">DoFRenumbering::Cuthill_McKee</code> function
+       did not work correctly when giving the <code
+       class="member">reversed_numbering</code> flag (off-by-one
+       indexing). This is now fixed.
+       <br>
+       (<a href="mailto:or at winfos.com">Oliver Rheinbach</a> 2001/04/12)
+       </p>
+       
+  <li> <p>
+       Fixed: When using Neuman boundary functions in the 
+       <code>KellyErrorEstimator</code> class, it was
+       assumed that the function object had <code
+       class="member">Function::vector_value</code> overloaded, even
+       in the scalar case. We now use <code
+       class="member">Function::value</code> instead.
+       <br>
+       (WB 2001/04/09)
+       </p>
+       
+  <li> <p>
+       New/Fixed: Now there exists a new <code
+       class="class">Triangulation</code>::<code
+       class="member">ExcMultiplySetLineInfoOfLine</code> exception,
+       that is thrown if the <code>SubCellData</code>
+       that is given to <code
+       class="class">Triangulation</code>::<code
+       class="member">create_triangulation</code>, multiply includes
+       the line info of a specific line. Before the fix the wrong
+       <code>ExcInteriorLineCantBeBoundary</code>
+       exception was thrown.
+       <br>
+       (RH 2001/04/03)
+       </p>
+       
+  <li> <p>
+       Fixed: Missing <code>ucd</code> is now added to the list of
+       supported output formats returned by <code
+       class="class">GridOut</code>::<code
+       class="member">get_output_format_names</code>.
+       <br>
+       (RH 2001/04/03)
+       </p>
+
+  <li> <p>
+       Fixed: the program that generated HTML files from CVS back logs
+       was run unintentionally. It failed because the distribution
+       does not have the necessary CVS files. The program is no more
+       run now.
+       <br>
+       (WB 2001/03/23)
+       </p>
+
+  <li> <p>
+       Fixed: the program that generated HTML from the example
+       programs was broken on some platforms for reasons beyond our
+       knowledge. This is now fixed.
+       <br>
+       (Roger Young, WB 2001/03/22)
+       </p>
+
+  <li> <p> 
+       Removed: The explicite instantiations of <code
+       class="class">SparseMatrix<long double></code> are removed as a
+       prerelease of gcc3.0 fails to compile it. Now the user of <code
+       class="class">SparseMatrix<long double></code> needs to include
+       <code>lac/include/lac/sparse_matrix.templates.h</code> into his
+       source file and to use a different compiler, e.g. gcc 2.95.2 or
+       a future version of gcc3.0 (that will then hopefully be fixed).
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+       Changed: We now unconditionally include
+       <code>deal.II/include/grid/tria_accessor.templates.h</code>
+       (which contains some inline functions for triangulation
+       accessor classes) into 
+       <code>deal.II/include/grid/tria_accessor.h</code> to work
+       around a problem with gcc3.0 which does not place a copy of
+       these functions into the library. Previously we only included
+       the file in optimized mode.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+       New: The class <code>GridReordering::Cell</code> has
+       now a copy constructor to work around a bug in a gcc3.0
+       snapshot.
+       <br>
+       (RH, WB 2001/03/14)
+       </p>
+
+  <li> <p>
+       Fixed: a missing <code>const</code> prevented the <code
+       class="class">PreconditionSelector</code> class to be
+       compiled.
+       <br>
+       (Roger Young, John Burnell 2001/03/08)
+       </p>
+
+  <li> <p>
+       Updated: the scripts <code>config.sub</code> and
+       <code>config.guess</code> were updated to their newest
+       versions, since <code>./configure</code> did not run properly
+       on all systems. They are needed to determine the host system
+       type correctly.
+       <br>
+       (WB 2001/03/06)
+       </p>
+
+  <li> <p>
+       Fix: in the triangulation, the <code
+       class="member">straight_boundary</code> variable, which is a
+       reference, was assigned the address of a temporary object. It
+       is unclear how this could have worked for three years, but it
+       apparently did...
+       <br>
+       (WB 2001/02/26)
+       </p>
+
+  <li> <p>
+       Fix: the <code
+       class="member">DoFTools::compute_intergrid_constraints</code>
+       function took memory quadratic in the number of degrees of
+       freedom. This is now reduced to linear behaviour, with a
+       constant that depends on the number of levels by which the two
+       grids differ.
+       <br>
+       (WB 2001/02/26)
+       </p>
+</ol>
+
+
+*/
diff --git a/doc/news/3.1.0-vs-3.2.0.h b/doc/news/3.1.0-vs-3.2.0.h
new file mode 100644
index 0000000..8e15194
--- /dev/null
+++ b/doc/news/3.1.0-vs-3.2.0.h
@@ -0,0 +1,1242 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_1_0_and_3_2_0 Changes between Version 3.1.0 and 3.2.0
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li> <p>
+       New: example program step-11.
+       <br>
+       (WB 2001/09/17)
+       </p>
+
+  <li> <p>
+       New: example program step-10.
+       <br>
+       (WB, RH 2001/09/12)
+       </p>
+
+  <li> <p>
+       New: the <code>./configure</code> script now also recognizes
+       Compaq's cxx compiler. The library now also compiles cleanly
+       with this compiler.
+       <br>
+       (WB 2001/07/06)
+       </p>
+
+  <li> <p>
+       Changed: The libraries are no more linked using the C++
+       compilation flags, but rather using LDFLAGS. Some compilers
+       object to compilation flags on the linker line.
+       <br>
+       (WB 2001/05/23)
+       </p>
+
+  <li> <p>
+       Changed: If in multithreaded mode, the ACE library is now
+       automatically added to the <code>\$(LIBS)</code> Makefile
+       variable. There is no need anymore for a special clause in your
+       Makefile.
+       <br>
+       (WB 2001/05/23)
+       </p>
+
+  <li> <p>
+       New: the <code>./configure</code> script now also recognizes
+       the Intel ICC compiler that was released for Linux lately. The
+       library now also compiles cleanly with this compiler.
+       <br>
+       (WB 2001/05/22)
+       </p>
+
+  <li> <p>
+       Fixed: the program that generated HTML from the example
+       programs was broken on some platforms for reasons beyond our
+       knowledge. This is now fixed.
+       <br>
+       (Roger Young, WB 2001/03/22)
+       </p>
+
+  <li> <p>
+       Improved: libraries are now all in a subdirectory lib in the
+       deal.II home directory. It is now easy to
+       make this directory a link to put the libraries on a faster
+       disk.
+       <br>
+       (GK 2001/03/14)
+       </p>
+
+  <li> <a name="new_fe_mapping_design"></a>
+       <i> New Design of <code>FiniteElements</code>
+       and <code>Mappings</code></i>
+              
+       <p>
+       New: The finite element
+       classes have been redesigned from scratch to allow also
+       elements that depend on the actual cell shape, like
+       Raviart-Thomas, BDM or edge elements. We have implemented
+       continuous Lagrange elements up to degree four and discontinous
+       Lagrange elements up to degree 10. They can be easily extended
+       to higher degree.
+       </p>
+
+       <p> 
+       Furthermore we have totally reimplemented the mapping
+       between unit and real cell.  In future we won't be restricted
+       to Q<sub>1</sub> mapping any more, but we will have
+       Q<sub>p</sub> mappings of arbitrary degree p. Presently
+       implemented are mappings up to degree 10 in 2d, and degree
+       three in 3d. This allows to approximate curved boundaries not
+       only by straight lines (linear approximation) but also by
+       quadratic, cubic, etc approximation. The finite elements will
+       be able to be combined with arbitrary mappings. So in future we
+       can use subparametric, iso- as well as superparametric
+       elements.
+       </p>
+
+       <p>
+       The new implementation uses a totally new structure of
+       communication between <code>FEValues</code>, the
+       <code>FiniteElements</code> and the <code
+       class="class">Mappings</code>. Despite of this, the new
+       structure will almost not be 'visible' to the user of
+       deal.II as we tried to retain the interface
+       to the user (mainly that of <code
+       class="class">FEValues</code>) as far as possible.
+       </p>
+
+       <p>
+       Together with this new design comes a reduction of 25000(!)
+       lines of deal.II code. This elimination
+       mainly affected code that previously was machine generated
+       (e.g. by maple). In view of almost unchanged execution times of
+       applications, the faster compilation and additional
+       functionality constitutes a significant improvement of
+       deal.II. Results produced by the new code
+       match those of the old design up to machine precision.
+       <br>
+       (RH & GK 2001/03/13)
+       </p>
+
+  <li> <p>
+       New: There is now some support to include and use routines from the 
+       <a href="http://www.cse.clrc.ac.uk/Activity/HSL" 
+       target="_top">Harwell Subroutine Library</a>.
+       <br>
+       (WB 2001/01/30)
+       </p>
+
+  <li> <p>
+       New: The <code>./configure</code> script now checks for the
+       existence of a Fortran 77 compiler, and sets its path, some
+       compiler flags and the libraries to be linked in when mixing
+       C++ and F77 in some variables in the file
+       <code>common/Make.global_options</code>.
+       <br>
+       (WB 2000/12/30)
+       </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li> <p> 
+       New: Color function <code
+       class="member">DataOutBase::EpsFlags::reverse_grey_scale_color_function</code>.
+       <br>
+       (WB 2001/08/24)
+       </p>
+
+  <li> <p> 
+       New: Function <code>QProjector::project_to_child</code>
+       generates quadrature formulae which act on the area which a
+       child would occupy.
+       <br>
+       (WB 2001/08/23)
+       </p>
+
+  <li> <p>
+       Changed: The examples classes in the base directory are now
+       moved into a namespace <code>Functions</code> of
+       their own. This improves encapsulation, but also keeps the
+       documentation of these functions together, as they were
+       previously scrambled all over the screen in the documentation
+       page of the base library.
+       <br>
+       (WB 2001/07/18)
+       </p>
+
+  <li> <p> 
+       New: classes <code>FourierSineFunction</code>
+       and <code>FourierCosineFunction</code>,
+       resembling one mode of a Fourier decomposition. Classes <code
+       class="class">FourierSineSum</code> and <code
+       class="class">FourierCosineSum</code>, resembling sums of such
+       modes of a Fourier decomposition.
+       <br>
+       (WB 2001/07/18)
+       </p>
+
+  <li> <p>
+       New: class <code>vector2d</code> was introduced
+       in analogy to STL class <code>vector</code>. The
+       bew class provides a two-dimensional array and replaces the use
+       of <code>FullMatrix</code> in the base library.
+       <br>
+       (GK 2001/05/21)
+       </p>
+
+  <li> <p>
+       Improved: <code>JobIdentifier</code>::<code
+       class="member">operator()</code> includes host name if
+       supported by the operating system
+       <br>
+       (GK 2001/05/17)
+       </p>
+
+  <li> <p>
+       New: There is now a new <code
+       class="class">AutoDerivativeFunction</code> class that
+       automatically computes the gradient of a function by employing
+       numerical difference quotients. This only, if the user function
+       does not provide the gradient function himself. This class can
+       replace the <code>Function</code> class as base
+       class for user defined <code>Function</code>
+       classes.
+       <br>
+       This new class can be used if the user only implements the
+       <code>value</code> function but wants to call
+       also the <code>gradient</code> functions.
+       <br>
+       (RH 2001/05/15)
+       </p>
+
+  <li> <p>
+       New: The <code>Quadrature</code> class now has a
+       constructor that only accepts quadrature points and sets the
+       weights to invalid values.
+       <br>
+       (GK 2001/04/26)
+       </p>
+
+  <li> <p>
+       New: The function <code>Logstream</code>::<code
+       class="member">get_prefix</code> allows access to the prefix
+       string used for log-files.
+       <br>
+       (GK 2001/04/26)
+       </p>
+
+  <li> <p>
+       New: There is now a global function <code
+       class="function">trace</code> that computes the trace of a tensor
+       of rank 2.
+       <br>
+       (WB 2001/04/12)
+       </p>
+
+  <li> <p>
+       New: The <code>Threads</code> now has a barrier
+       class in order to synchronise multiple threads. In
+       multithreaded mode, this is an alias to the <code
+       class="class">ACE_Barrier</code> class of the ACE library, if
+       deal.II is not configured for multithreading, then the class is
+       a dummy class that does nothing.
+       <br>
+       (WB 2001/03/19)
+       </p>
+
+  <li> <p>
+       New: We now support the output format of the <a
+       href="http://www.kitware.com/vtk.html"
+       target="_top">Visualization Toolkit (Vtk)</a> from the <code
+       class="class">DataOutBase</code> class and all derived classes.
+       <br>
+       (WB 2001/03/19)
+       </p>
+
+  <li> <p>
+       New: The class <code
+       class="class">TensorProductPolynomials<dim></code>
+       performs the tensor product of 1d polynomials, computes its
+       values, its first and second derivatives. If <code>n</code>
+       polynomials are given to the constructor of this class, it
+       constructs <code>n<sup>dim</sup></code> tensor product
+       polynomials.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+       New: <code>LagrangeEquidistant</code> is a new
+       class derived from <code>Polynomial</code>
+       providing the 1d Lagrange interpolation of degree
+       <code>n</code> of <code>n+1</code> equidistant support points
+       in <code>[0,1]</code>. It is implemented up to degree 10.
+       This class is used for the implementation of the continuous and
+       discontinuous Lagrange elements.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+       New: The new <code>Polynomial</code> class can
+       be used as base class for all 1d polynomials. It stores the
+       coefficients of the polynomial and uses the Horner scheme to
+       evaluate values and all derivates.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+       New: function <code>contract</code> for two arguments of
+       <code>Tensor<1,dim></code>
+       <br>
+       (GK 2001/03/05)
+       </p>
+
+  <li> <p> New: <code>Logstream</code>::<code
+       class="member">log_time_differences (bool)</code> makes <code
+       class="class">Logstream</code> print the time since the last
+       log line instead of accumulated time since program start.
+       <br>
+       (GK 2001/03/05)
+       </p>
+
+  <li> <p> Fix: <code>Logstream</code>::<code
+       class="member">pop()</code> does not perform anything on empty
+       stacks.
+       <br>
+       (GK 2001/03/05)
+       </p>
+
+  <li> <p>
+       Changed: Sort the quadrature points of each <code
+       class="class">Quadrature<1></code> in ascending order. This
+       actually changed the order of the points of only <code
+       class="class">QGauss4</code> and <code
+       class="class">QGauss5</code>.
+       <br>
+       (Ralf Hartmann 2001/01/22)
+       </p>
+
+  <li> <p>
+       New: function <code>contract</code> for two arguments of
+       <code>Tensor<1,dim></code>
+       <br>
+       (GK 2001/01/15)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p>
+       New: Function <code>FullMatrix::symmetrize()</code>.
+       <br>
+       (WB 2001/09/20)
+       </p>
+
+  <li> <p>
+       Improved: the stopping criterion of <code
+       class="class">SolverBicgstab</code> without computing the exact
+       residual is implemented
+       <br>
+       (GK 2001/09/11)
+       </p>
+
+  <li> <p>
+       New: The <code>FullMatrix</code> class now has a
+       function <code>operator*=</code>, which simply
+       scales the matrix.
+       <br>
+       (WB 2001/09/02)
+       </p>
+
+  <li> <p>
+       New: Function <code>BlockSparseMatrix::el()</code>,
+       just like <code>SparseMatrix::el()</code>.
+       <br>
+       (WB 2001/08/21)
+       </p>
+
+  <li> <p>
+       New: There is now a class <code>MatrixOut</code>
+       which can be used to convert a matrix into a graphical output,
+       for example in order to see visually that a matrix is
+       diagonally dominant or not.
+       <br>
+       (WB 2001/08/21)
+       </p>
+
+  <li> <p>
+       Changed: Base class <code>Solver</code> and all
+       <code>Preconditioner</code> classes are now
+       derived from <code>Subscriptor</code>.
+       Class <code>PreconditionLACSolver</code> now uses
+       smartpointers to the given solver and preconditioner
+       objects. You will, therefore, have to derive your own
+       preconditioners and solvers from <code
+       class="class">Subscriptor</code> if you want to use it with
+       <code>PreconditionLACSolver</code>.
+       <br>
+       (WB 2001/08/16)
+       </p>
+
+  <li> <p>
+       New: Classes <code>Vector</code> and
+       <code>BlockVector</code> now have member functions
+       <code>operator*=</code> which scale the vectors
+       by a constant factor. These functions are aliases for the 
+       <code>scale</code> member functions except that
+       they return a reference to themselves.
+       <br>
+       (WB 2001/08/14)
+       </p>
+
+  <li> <p>
+       New: There is now a function 
+       <code>FullMatrix::precondition_Jacobi</code>. The 
+       <code>PreconditionJacobi</code> class is
+       therefore now also applicable with the full matrix class.
+       <br>
+       (WB 2001/08/11)
+       </p>
+
+  <li> <p>
+       New: The <code>Vector</code> and
+       <code>BlockVector</code> classes can now be
+       initialized using a new constructor that takes two iterators
+       that denote a range of elements which are to be copied.
+       <br>
+       (WB 2001/08/08)
+       </p>
+
+  <li> <p>
+       Changed: The <code>SolverCG</code> class now
+       saves the initial matrix vector product if the initial value of
+       the solution is vector is zero, as it is common practice to use
+       this is starting vector.
+       <br>
+       (WB 2001/06/25)
+       </p>
+
+  <li> <p>
+       New: <code>SparsityPattern::reinit</code> no
+       more throws an error if the given maximal row lengths are all
+       zero.
+       <br>
+       (WB 2001/06/25)
+       </p>
+
+  <li> <p>
+       New: Class <code>CompressedBlockSparsityPattern</code>
+       may be used as an intermediate form of the <code
+       class="class">BlockSparsityPattern</code>.
+       <br>
+       (WB 2001/06/23)
+       </p>
+
+  <li> <p>
+       New: Class <code>CompressedSparsityPattern</code>
+       may be used as an intermediate form of the <code
+       class="class">SparsityPattern</code> class if memory
+       requirements are tight during construction of the sparsity
+       pattern.
+       <br>
+       (WB 2001/06/22)
+       </p>
+
+  <li> <p>
+       New: There are now functions 
+       <code>SparsityPattern::copy_from</code> and
+       <code>SparseMatrix::copy_from</code>
+       that can be used to construct sparsity patterns and matrix at
+       once, i.e. without setting up a sparsity pattern element by
+       element, possibly after using a way too large value for the
+       maximal row length, then calling 
+       <code>SparsityPattern::compress</code> etc.
+       <br>
+       (WB 2001/05/07)
+       </p>
+
+  <li> <p>
+       New: <code>BlockIndices::block_size</code>
+       returns the size of a specified block.
+       <br>
+       (WB 2001/05/07)
+       </p>
+
+  <li> <p>
+       New: There is now a (private) function <code
+       class="member">SparsityPattern::optimized_lower_bound</code>
+       that is used as an optimized replacement for <code
+       class="member">std::lower_bound</code> for searching in the
+       column number arrays. It unrolls small loops and it also seems
+       that the compiler is able to optimized it better due to
+       eliminated template parameters, making it about twice as fast
+       as the standard implementation. In effect, it also speeds up
+       the SSOR preconditioner that spent about one third of its time
+       in that function by approximately 15 per cent.
+       <br>
+       (WB 2001/04/25)
+       </p>
+
+  <li> <p>
+       New: The <code>FilteredMatrix</code> class is a
+       replacement for the <code
+       class="class">MatrixTools::apply_boundary_values</code>
+       function for cases where you would like to solve several times
+       with the same matrix, either for different right hand sides, or
+       for different boundary values.
+       <br>
+       (WB 2001/04/27)
+       </p>
+
+  <li> <p>
+       New: There is now a function <code
+       class="class">Vector</code>::<code
+       class="member">scale(Vector)</code>
+       that scales each element of the vector by the corresponding
+       element of the argument.
+       <br>
+       (WB 2001/04/23)
+       </p>
+
+  <li> <p> Changed: Solver functions <code>solve</code>
+       return void now. If the solver has not converged within the
+       maximum number of iterations or encounters a breakdown, it
+       throws an exception of type <code
+       class="class">SolverControl</code>::<code
+       class="member">NoConvergence</code> instead of
+       returning a special value.
+       <br>
+       (GK 2001/03/29)
+       </p>
+
+  <li> <p> 
+       New: The functions <code>FullMatrix</code>::<code
+       class="member">mmult</code> and <code
+       class="member">Tmmult</code> now have an additional
+       <code>adding</code> argument. If this flag is
+       <code>true</code>, the matrix-matrix product is added to the
+       resulting matrix; if <code>false</code>, the resulting matrix
+       is set to (overwritten by) the matrix-matrix product. The
+       latter is the behaviour these functions had before. Hence the
+       default value is set to be <code>false</code> to ensure
+       backward compatibility.
+       <br>
+       (RH 2001/03/29)
+       </p>
+
+  <li> <p>
+       New: class <code>SchurMatrix</code> implements
+       a Schur complement for block matrices. It provides matrix-vector
+       multiplication suitable for iterative methods as well as pre- and
+       post-processing of right hand side and slution, respectively. 
+       <br>
+       (GK 2001/03/22)
+       </p>
+
+  <li> <p>
+       Removed: The explicite instantiations of <code
+       class="class">SparseMatrix<long double></code> are
+       removed as a prerelease of gcc3.0 fails to compile it. A user
+       of <code>SparseMatrix<long double></code>
+       needs now to include
+       <code>lac/include/lac/sparse_matrix.templates.h</code> into his
+       source file and to use an appropriate compiler, e.g. gcc2.95.2 or
+       a future version of gcc3.0 (that will then hopefully be fixed).
+       <br>
+       (RH 2001/03/14)
+       </p>
+  
+  <li> <p> 
+       New: class <code
+       class="class">BlockMatrixArray<MATRIX></code> implements
+       a block matrix based on an array of matrix pointers. Since this
+       array may be incomplete, no direct access to entries is
+       allowed. Matrix blocks may be scaled and transposed.
+       <br>
+       (GK 2001/02/13)
+       </p>
+
+  <li> <p>
+       New: There is now some support to include and use routines from the 
+       <a href="http://www.cse.clrc.ac.uk/Activity/HSL" 
+       target="_top">Harwell Subroutine Library</a>, and support
+       classes 
+       <code>SparseDirectMA27</code> and
+       <code>SparseDirectMA47</code>
+       for the sparse direct solvers MA27 and MA47.
+       <br>
+       (WB 2001/01/30)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p> 
+       New: Class <code>MappingQ1Eulerian</code>
+       implementing an Eulerian mapping.
+       <br>
+       (<a href="mailto:ms at biomech.tu-graz.ac.at">Michael Stadler</a> 2001/09/24)
+       </p>
+
+  <li> <p> 
+       New: <code>VectorTools::create_boundary_right_hand_side</code>
+       integrates boundary forces for inhomogeneous Neumann boundary values.
+       <br>
+       (WB 2001/09/13)
+       </p>
+
+  <li> <p> 
+       New: <code>DoFTools::make_flux_sparsity_pattern</code>
+       now exists also for 1d.
+       <br>
+       (WB 2001/09/03)
+       </p>
+
+  <li> <p> 
+       New: There are now two functions
+       <code
+       class="member">FETools::hierarchic_to_lexicographic_numbering</code>
+       and <code
+       class="member">FETools::lexicographic_to_hierarchic_numbering</code>
+       which map the hierarchical numbering used in continuous finite
+       element classes to a lexicographical numbering and back.
+       <br>
+       (WB 2001/08/31)
+       </p>
+
+  <li> <p> 
+       New: <code>ConstraintMatrix::close</code>
+       now simply returns instead of throwing an exception, if the
+       matrix was already closed.
+       <br>
+       (WB 2001/08/30)
+       </p>
+
+  <li> <p>
+       New: Member function
+       <code>ConstraintMatrix::is_identity_constrained</code>.
+       <br>
+       (WB 2001/08/29)
+       </p>
+
+  <li> <p>
+       Fixed: in a rather rare case, some work was done twice in the
+       <code>KellyErrorEstimator</code> class when in
+       multithread mode. This is now fixed.
+       <br>
+       (WB 2001/08/24)
+       </p>
+
+  <li> <p>
+       New: There is now a class <code>GridTools</code>
+       which provides algorithms working on triangulations. At
+       present, it offers a function computing the diameter of a
+       triangulation.
+       <br>
+       (WB 2001/08/16)
+       </p>
+
+  <li> <p>
+       Changed: The <code>MatrixCreator</code> and <code
+       class="class">MatrixTools</code> class have lost their template
+       arguments. Rather, the individual functions now are templated
+       on their own, so the compiler can pick out the correct space
+       dimension on its own.
+       <br>
+       (WB 2001/08/15)
+       </p>
+
+  <li> <p>
+       Extended: <code>ConstraintMatrix::merge</code>
+       can now handle arguments which further constrain the present object.
+       <br>
+       (WB 2001/07/30)
+       </p>
+
+  <li> <p>
+       New: Implement
+       <code>DoFTools::make_sparsity_pattern</code>,
+       <code>DoFTools::make_boundary_sparsity_pattern</code>,
+       and 
+       <code>ConstraintMatrix::condense</code> to work on
+       the <code>CompressedSparsityPattern</code> and
+       <code>CompressedBlockSparsityPattern</code> and
+       classes.
+       <br>
+       (WB 2001/06/22)
+       </p>
+
+  <li> <p>
+       New: <code>FE*Values::get_quadrature</code>
+       returns a reference to the quadrature formula used by a
+       FEValues object.
+       <br>
+       (WB 2001/06/21)
+       </p>
+
+  <li> <p>
+       New:  <code
+       class="member">DoFTools::compute_intergrid_transfer_representation</code>
+       is a function that generates a matrix representation that can
+       be used to transfer data from one grid to one component of the
+       fields on another, finer, grid.
+       <br>
+       (WB 2001/05/24)
+       </p>
+
+  <li> <p>
+       Changed: the  <code>GeometryInfo</code> class
+       has been reverted from a general template that calculates the
+       values of its member variables by recursion/induction to a set
+       of explicitely specialized classes. This seemed necessary since
+       most compilers objected to the old implementation as the
+       declaration of the values of the class required knowledge of
+       the elements of the class with one lower dimension which was,
+       however, only being declared at present except for 1d. Also,
+       allocating space for variables was difficult as that would mean
+       declaring specializations after their first use. The new (and
+       very old) implementation is entirely compatible to the previous
+       one.
+       <br>
+       (WB 2001/05/23)
+       </p>
+
+  <li> <p>
+       Changed: the classes that denote flags to the <code
+       class="class">TimeStepBase_Tria</code> class have been move
+       from local classes to a namespace of their own, names
+       <code>TimeStepBase_Tria_Flags</code>.
+       <br>
+       (WB 2001/05/23)
+       </p>
+
+  <li> <p>
+       Fixed: due to a bug in gcc, the compiler did not check that we
+       did not use the set of given boundary indicators to the
+       <code>DoFTools::extract_boundary_dofs</code>
+       function in 1d. That set was therefore ignored. This is now
+       fixed.
+       <br>
+       (WB 2001/05/18)
+       </p>
+
+  <li> <p>
+       Changed: the flags which are given to the <code
+       class="class">GridOut</code> class to modify the appearance of
+       the output have been moved from local classes of the <code
+       class="class">GridOut</code> class to a namespace names
+       <code>GridOutFlags</code> and have lost the
+       trailing <code>Flags</code> part in their name.
+       This change was necessary as C++ does not allow explicit
+       specialization of member classes; the previous use in the
+       library was only accepted by GCC as an extension.
+       <br>
+       (WB 2001/05/17)
+       </p>
+
+  <li> <p>
+       New: The functions <code
+       class="member">DoFTools::map_dof_to_boundary_indices</code>,
+       <code>DoFTools::make_boundary_sparsity_pattern</code>,
+       <code>DoFHandler::n_boundary_dofs</code>,
+       <code
+       class="member">DoFHandler::max_couplings_between_boundary_dofs</code>,
+       and <code>VectorTools::project_boundary_values</code>,
+       are now also implemented in 1d.
+       <br>
+       (WB 2001/04/29)
+       </p>
+
+  <li> <p>
+       New: There are now functions <code
+       class="class">DoFTools</code>::<code
+       class="member">map_dofs_to_support_points</code> and 
+       <code
+       class="class">DoFTools</code>::<code
+       class="member">map_support_points_to_dofs</code> that
+       generate a map between DoF indices and the support points of
+       these DoFs, and the other way round.
+       <br>
+       (WB 2001/04/24)
+       </p>
+
+  <li> <p>
+       New: Implement the <code
+       class="member">VectorTools::interpolate_boundary_values</code>
+       a second time, this time taking a <code
+       class="class">FunctionMap</code> object as do all the other
+       functions of this type.
+       <br>
+       (WB 2001/04/23)
+       </p>
+
+  <li> <p>
+       Fixed: The vertex numbers written by the <code
+       class="class">GridOut</code>::<code
+       class="member">write_ucd_faces</code> function are now also
+       1-based. This, to be consistent with the vertex numbers given
+       by the <code>GridOut</code>::<code
+       class="member">write_ucd</code> function.
+       <br>
+       (RH 2001/04/20)
+       </p>
+
+  <li> <p>
+       Extended: the <code
+       class="class">DerivativeApproximation</code> class now also
+       works in 3d, as the spectral norm of a symmetric 3x3 matrix can
+       now be computed.
+       <br>
+       (<a href="mailto:roger at kea.grace.cri.nz">Roger Young</a> 2001/04/12)
+       </p>
+       
+  <li> <p>
+       Fixed: the <code
+       class="class">DoFRenumbering</code>::<code
+       class="member">Cuthill_McKee</code> function
+       did not work correctly when giving the <code>reversed_numbering</code>
+       flag (off-by-one indexing). This is now fixed.
+       <br>
+       (<a href="mailto:or at winfos.com">Oliver Rheinbach</a> 2001/04/12)
+       </p>
+       
+  <li> <p>
+       Changed: A <code>typedef FunctionMap</code> was
+       declared in at least four places previously (in classes <code
+       class="class">DoFHandler</code>, <code
+       class="class">MatrixTools</code>, <code
+       class="class">VectorTools</code>, and <code
+       class="class">KellyErrorEstimator</code>). It is now unified in
+       one place and is called <code
+       class="class">FunctionMap<dim></code>::<code
+       class="member">type</code> (it is a local
+       typedef in a class called <code
+       class="class">FunctionMap</code>). This type is defined in the
+       file <tt>dofs/function_map.h</tt>.
+       <br>
+       (WB 2001/04/09)
+       </p>
+       
+  <li> <p>
+       Fixed: When using Neuman boundary functions in the 
+       <code>KellyErrorEstimator</code> class, it was
+       assumed that the function object had <code
+       class="class">Function</code>::<code
+       class="member">vector_value</code> overloaded, even
+       in the scalar case. We now use <code
+       class="member">Function</code>::<code
+       class="member">value</code> instead.
+       <br>
+       (WB 2001/04/09)
+       </p>
+       
+  <li> <p>
+       New: The various <code
+       class="class">MatrixCreator</code>::<code
+       class="member">create_*_matrix</code> functions
+       are now using multiple threads to do their work, if the library
+       is configured to use multithreading.
+       <br>
+       (WB 2001/04/08)
+       </p>
+       
+  <li> <p>
+       New: The following functions are extended to work
+       on arbitrary mappings:
+       <br>
+       The static <code>create_mass_matrix</code>,
+       <code>create_boundary_mass_matrix</code> and
+       <code>create_laplace_matrix</code> member
+       functions of the class <code>MatrixCreator</code>,
+       <br>
+       the static <code>project</code> and <code
+       class="member">project_boundary_values</code> member functions
+       of the class <code>VectorTools</code>,
+       <br>
+       the two versions of the static <code
+       class="member">estimate</code> member functions of the class
+       <code>KellyErrorEstimator</code>,
+       <br>
+       and the static <code>approximate_gradient</code>
+       and <code>approximate_second_derivative</code>
+       member functions of the class <code
+       class="class">DerivativeApproximation</code>.
+       </p>
+
+       <p>
+       All these functions now take a <code
+       class="class">Mapping</code> object as additional argument.
+       <br>      
+       For backward compatibility there exist second versions of
+       each of these functions that still don't have a <code
+       class="class">Mapping</code> argument. These functions
+       implicitely use a mapping of the class <code
+       class="class">MappingQ1</code>.
+       <br>
+       (RH 2001/04/05)
+       </p>
+
+  <li> <p>
+       Removed: Along with the redesign of the <code
+       class="class">FiniteElement</code> and the <code
+       class="class">Mapping</code> classes the <code
+       class="class">FiniteElement</code>::<code
+       class="member">get_local_mass_matrix</code> function is now
+       removed. This was necessary as the local mass matrix depends not only on the
+       considered <code>FiniteElement</code> but also on
+       the mapping between the unit to the real cell. Hence this function
+       cannot be a member function of a <code
+       class="class">FiniteElement</code> any more.
+       </p>
+
+       <p>
+       As a consequence also the <code
+       class="class">MatrixCreator</code>::<code
+       class="member">create_mass_matrix</code> function with two
+       arguments is removed, as it relied on the <code
+       class="member">get_local_mass_matrix</code> function. If in
+       future the user wants to create a mass matrix he needs to use
+       one of the remaining <code
+       class="class">MatrixCreator</code>::<code
+       class="member">create_mass_matrix</code> functions that require
+       an appropriate <code>Quadrature</code> as
+       argument.
+       <br>
+       (RH 2001/04/04)
+       </p>
+       
+  <li> <p>
+       New/Fixed: Now there exists a new <code
+       class="class">Triangulation</code>::<code
+       class="member">ExcMultiplySetLineInfoOfLine</code> exception,
+       that is thrown if the <code>SubCellData</code>
+       that is given to <code
+       class="class">Triangulation</code>::<code
+       class="member">create_triangulation</code>, multiply includes
+       the line info of a specific line. Before the fix the wrong
+       <code>ExcInteriorLineCantBeBoundary</code>
+       exception was thrown.
+       <br>
+       (RH 2001/04/03)
+       </p>
+       
+  <li> <p>
+       Fixed: Missing <code>ucd</code> is now added to the list of
+       supported output formats returned by <code
+       class="class">GridOut</code>::<code
+       class="member">get_output_format_names</code>.
+       <br>
+       (RH 2001/04/03)
+       </p>
+
+  <li> <p>
+       New/fixed: In some obscure corner cases, the detection logic in <code
+       class="member">DataOut_DoFData::add_data_vector</code> would
+       not have been able to detect whether something is a DoF data
+       vector or a vector of cell data, and in some equally rare cases
+       this would also have made a difference. This is now fixed by
+       adding another argument to the function telling it either to
+       automatically detect the vector type (default) or to assume
+       that it has a certain type (for these corner cases).
+       <br>
+       (WB 2001/03/30)
+       </p>
+
+  <li> <p>
+       Removed: the <code>ProblemBase</code> class,
+       which has been deprecated since before the release of 
+       deal.II 3.0, has finally been removed. The
+       same applies for the classes
+       <code>Assembler</code>,
+       <code>Equation</code>,
+       <code>MassMatrix</code>, and
+       <code>LaplaceMatrix</code>.
+       <br>
+       (WB 2001/03/27)
+       </p>
+
+  <li> <p>
+       New: There is now a class <code>MappingC1</code>
+       that implements a continuously differentiable C<sup>1</sup>
+       mapping of the boundary by using a cubic mapping with
+       continuous derivatives at cell boundaries. The class presently
+       only implements something for 2d and 1d (where it does nothing).
+       <br>
+       (WB 2001/03/27)
+       </p>
+
+  <li> <p>
+       New: The static <code>interpolate</code>, <code
+       class="member">create_right_hand_side</code>, <code
+       class="member">interpolate_boundary_values</code>, <code
+       class="member">integrate_difference</code> and <code
+       class="member">compute_mean_value</code> member functions of
+       the class <code>VectorTools</code> are extended
+       to work on arbitrary mappings. All these functions now take a
+       <code>Mapping</code> object as additional
+       argument. 
+       <br>
+       For backward compatibility there exist second versions of
+       each of these functions that still don't have a <code
+       class="class">Mapping</code> argument. These functions
+       implicitely use a mapping of the class <code
+       class="class">MappingQ1</code>.
+       <br>
+       (RH 2001/03/27)
+       </p>
+
+  <li> <p>
+       New: <code>Boundary</code> and derived classes
+       now have a function <code
+       class="member">get_normals_at_vertices</code> that returns a
+       multiple of the normal vector to the boundary at the
+       vertices of a given face. This is used in the construction of
+       C<sup>1</sup> mappings of the boundary.
+       <br>
+       (WB 2001/03/23)
+       </p>
+
+  <li> <p>
+       New: The new function <code
+       class="class">PersistentTriangulation<dim></code>::<code
+       class="member">restore(unsigned int)</code> allows to restore
+       the triangulation step by step.
+       <br>
+       New: Now there exists a function <code
+       class="class">PersistentTriangulation<dim></code>::<code
+       class="member">clear_flags</code> to allow to re-<code
+       class="member">read_flags</code> without the need of clearing
+       the triangulation in advance.
+       <br>
+       New: The new function <code
+       class="class">PersistentTriangulation<dim></code>::<code
+       class="member">n_refinement_steps</code> returns the number of
+       refinement steps stored in <code
+       class="member">refine_flags</code>.
+       <br>
+       (RH 2001/03/20)
+       </p>
+
+  <li> <p>
+       New: <code>GridGenerator</code>::<code
+       class="member">hyper_rectangle</code> creates
+       coordinate-parallel rectangles in arbitrary dimension.
+       <br>
+       (GK 2001/03/16)
+       </p>
+
+  <li> <p>
+       Changed: The syntax of the <code
+       class="class">FiniteElement<dim></code>::<code
+       class="member">get_unit_support_points</code> function is
+       changed; it returns a reference to the vector of points in lieu
+       of taking this vector as argument. The unit support points are
+       now computed by the constructor of the <code
+       class="class">FiniteElement</code> and not on each <code
+       class="class">FiniteElement<dim></code>::<code
+       class="member">get_unit_support_points</code> function call as
+       before.
+       <br>
+       (WB 2001/03/14)
+       </p>
+
+  <li> <p> 
+       Removed: The function <code
+       class="class">FiniteElement<dim></code>::<code
+       class="member">get_support_points</code>
+       is removed as the <code>FiniteElement</code>
+       cannot know the position of the support points by itself. This is
+       because the support points depend on the unit support points
+       (known to the <code>FiniteElement</code> class)
+       but they also depend on the mapping.
+
+       In future the support points can be computed using a <code
+       class="class">FEValues</code> object initialized with the <code
+       class="class">Mapping</code> and a <code
+       class="class">Quadrature</code> that uses the unit support
+       points as quadrature points.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p> 
+       New: The class <code>Boundary</code> has two new
+       functions <code
+       class="member">get_intermediate_points_on_line</code> and <code
+       class="member">get_intermediate_points_on_quad</code> that
+       needs to be implemented in derived classes only if <code
+       class="class">GridOut</code> is used with <code
+       class="class">MappingQ</code> of degree <code>p>2</code>.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+
+       New: The functions <code
+       class="member">GridOut::write_gnuplot</code> and <code
+       class="member">GridOut::write_eps</code> now take a pointer to
+       a <code>Mapping</code> object as additional
+       argument. This allows to write grids in 2d whereby cells with a
+       curved boundary are transformed by the given <code
+       class="class">Mapping</code> class. The default mapping is
+       <code>MappingQ1</code>. Note, that the grids do
+       not show the `original' domain with boundaries described by the
+       <code>Boundary</code> classes but the
+       discretized domain whose boundary cells are transformed using the
+       given mapping.
+       </p>
+       There are also a new <code
+       class="member">GnuplotFlags::n_boundary_face_points</code> and
+       <code>EpsFlags::n_boundary_face_points</code>
+       variables to set the number of additional points written
+       to represent the curved boundary faces.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+       New: The constructor of the <code>FEValues</code>
+       and <code>FE(Sub)FaceValues</code> classes now
+       take a reference of a <code>Mapping</code> object
+       as additional argument. This is the new possibility to combine
+       a <code>FiniteElement</code> with an arbitrary
+       <code>Mapping</code>, see also <a
+       href="#new_fe_mapping_design">New FE and Mapping Design</a>.
+       </p>
+
+       <p>
+       For backward compatibility there still exists a constructor of
+       <code>FEValues</code> without a <code
+       class="class">Mapping</code> argument that uses a <code
+       class="class">MappingQ1</code> by default.
+       <br>
+       (RH 2001/03/14)
+       </p>
+       
+  <li> <p>
+       Changed: We now unconditionally include
+       <code>deal.II/include/grid/tria_accessor.templates.h</code>
+       (which containts some inline functions for triangulation
+       accessor classes) into 
+       <code>deal.II/include/grid/tria_accessor.h</code> to work
+       around a problem with gcc3.0 which does not place a copy of
+       these functions into the library. Previously we only included
+       the file in optimized mode.
+       <br>
+       (RH 2001/03/14)
+       </p>
+
+  <li> <p>
+       New: The class <code>GridReordering::Cell</code> has
+       now a copy constructor to work around a bug in a gcc3.0
+       snapshot.
+       <br>
+       (RH, WB 2001/03/14)
+       </p>
+
+  <li> <p>
+       Changed: when refining a hexahedron in 3d, the rules by which
+       the new vertices at the centers of the faces and at the center
+       of the cell are placed, are changed. They are changed in a way
+       as to minimize the distortion of the resulting cells from the
+       optimal shape and to make them look as much alike as possible
+       to generate a smoother grid.
+       <br>
+       (WB 2001/03/02)
+       </p>
+
+  <li> <p>
+       Fix: the <code
+       class="class">DoFTools</code>::<code
+       class="member">compute_intergrid_constraints</code>
+       function took memory quadratic in the number of degrees of
+       freedom. This is now reduced to linear behaviour, with a
+       constant that depends on the number of levels by which the two
+       grids differ.
+       <br>
+       (WB 2001/02/26)
+       </p>
+
+  <li> <p>
+       Fix: in the triangulation, the <code
+       class="member">straight_boundary</code> variable, which is a
+       reference, was assigned the address of a temporary object. It
+       is unclear how this could have worked for three years, but it
+       apparently did...
+       <br>
+       (WB 2001/02/26)
+       </p>
+
+  <li> <p> 
+       New: The <code>DoFTools</code> class now has
+       a function <code>count_dofs_per_component</code>
+       that counts the number of degrees of freedom in each of the
+       components of the finite element, i.e. how many degrees of
+       freedom there are on the global mesh for each variable (field).
+       <br>
+       (WB 2001/02/22)
+       </p>
+
+  <li> <p>
+       New: The <code>CellAccessor</code> class now has a function
+       <code>has_boundary_lines</code> that mostly has
+       the same semantics that <code>at_boundary</code>
+       has, but also covers the case that a hexahedron in 3d may be at
+       the boundary only by one line, rather than by a whole face. In
+       that case, <code>at_boundary</code> reports
+       false, while <code>has_boundary_lines</code>
+       reports true.
+       <br>
+       (WB 2001/02/21)
+       </p>
+
+  <li> <p>
+       New: There is now a function
+       <code>ConstraintMatrix</code>::<code
+       class="member">merge</code> that merges
+       the constraints represented by two constraint matrices into one
+       object.
+       <br>
+       (WB 2001/01/31)
+       </p>
+</ol>
+
+*/
diff --git a/doc/news/3.1.1-vs-3.1.2.h b/doc/news/3.1.1-vs-3.1.2.h
new file mode 100644
index 0000000..c5329da
--- /dev/null
+++ b/doc/news/3.1.1-vs-3.1.2.h
@@ -0,0 +1,40 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_1_1_and_3_1_2 Changes between Version 3.1.1 and 3.1.2
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+<ol>
+  <li> <p>
+       The only is change is that the configuration detects the
+       presence of gcc3.0 and works around a bug in it by compiling
+       the file <code>tria_accessor.cc(3d)</code> without optimization
+       flags even for optimized libraries. The gcc bug is documented
+       at the <a
+       href="http://gcc.gnu.org/cgi-bin/gnatsweb.pl?database=gcc&user=guest&password=guest&cmd=login"
+       target="_top">gcc bug tracking system page</a>, as bugs reports c++/615 and
+       optimization/2938.
+       <br>
+       (WB 2001/06/27)
+       </p>
+</ol>
+
+
+*/
diff --git a/doc/news/3.2.0-vs-3.2.1.h b/doc/news/3.2.0-vs-3.2.1.h
new file mode 100644
index 0000000..45db8c8
--- /dev/null
+++ b/doc/news/3.2.0-vs-3.2.1.h
@@ -0,0 +1,137 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_2_0_and_3_2_1 Changes between Version 3.2.0 and 3.2.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+<h2>Changes between versions 3.2.0 and 3.2.1</h2>
+
+<ol>
+  <li> <p>
+       Fixed: In the <code>ParameterHandler</code>
+       class, we leaked 8 or 12 bytes of memory per declared
+       parameter. This is now fixed.
+       <br>
+       (WB 2001/11/28)
+       </p>
+
+  <li> <p>
+       Fixed: he <code>DoFHandler</code> class had a
+       memory leak. This is now fixed.
+       <br>
+       (WB 2001/11/28)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">FilteredMatrix::allocate_tmp_vector</code>
+       function had a bug with block vectors.
+       <br>
+       (WB 2001/11/22)
+       </p>
+
+  <li> <p> New: example program step-12. Discontinuous Galerkin
+       discretization.
+       <br>
+       (RH 2001/11/21)
+       </p>
+
+  <li> <p> New: The new <code
+       class="member">CellAccessor<dim>::neighbor_of_coarser_neighbor</code>
+       function returns where to find the present cell from a coarser
+       neighbor.
+       <br>
+       (RH 2001/11/21)
+       </p>
+
+  <li> <p>
+       Fixed: when checking for convergence in linear solvers in
+       <code>SolverControl::check</code>, we first
+       checked whether the maximal iteration count was reached, and
+       only then whether the target residual was achieved. In cases,
+       where the target residual was only reached in the very last
+       allowed iteration, this led to a failure notice of the linear
+       solver, rather than to a success message. This is now fixed.
+       <br>
+       (WB 2001/11/19)
+       </p>
+
+  <li> <p>
+       Fixed: an error in the definition of the <code
+       class="member">SolverMinRes::solve</code> function prevented
+       its compilation under some circumstances.
+       <br>
+       (WB 2001/11/14)
+       </p>
+
+  <li> <p>
+       Fixed: upon breakdown, the <code
+       class="class">SolverBicgstab</code> forgot to increment the
+       iteration counter for the breakdown cycle. This is now fixed.
+       <br>
+       (WB 2001/11/14)
+       </p>
+
+  <li> <p>
+       Fixed: the <code
+       class="member">DoFTools::make_flux_sparsity_pattern</code> was
+       implemented in 1d, but not instantiated. This is now fixed.
+       <br>
+       (WB 2001/11/14)
+       </p>
+
+  <li> <p>
+       Fixed: the <code
+       class="member">GridGenerator::hyper_rectangle</code> function
+       was broken in 3d.
+       <br>
+       (WB 2001/10/26)
+       </p>
+
+  <li> <p>
+       Fixed: class <code>SparsityPattern</code> can
+       handle rows of length zero now. For quadratic matrices, these
+       rows are changed to length one, since a diagonal element must
+       be stored.
+       <br>
+       (GK 2001/10/11)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>DataOutBase::EpsFlags</code>
+       class forgot to declare the reverse grey scale function as one
+       possible input for the color function for the
+       <code>ParameterHandler</code> class. This is now
+       possible.
+       <br>
+       (WB 2001/10/10)
+       </p>
+
+  <li> <p>
+       Fixed: the iterator category template base class of grid
+       iterators was incorrectly set.
+       <br>
+       (WB 2001/09/28)
+       </p>
+</ol>
+
+
+*/
diff --git a/doc/news/3.2.0-vs-3.3.0.h b/doc/news/3.2.0-vs-3.3.0.h
new file mode 100644
index 0000000..e6e9786
--- /dev/null
+++ b/doc/news/3.2.0-vs-3.3.0.h
@@ -0,0 +1,543 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_2_and_3_3 Changes between Version 3.2 and 3.3
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li> <p>
+       New: Output for 
+       <a href="http://www.amtec.org" target="_top">Tecplot</a> has
+       been added. It can be used by choosing output format �tecplot�.
+       <br>
+       (<a href="mailto:benkirk at cfdlab.ae.utexas.edu">Benjamin Shelton Kirk</a> 2002/01/29)
+       </p>
+
+  <li> <p> New: configuration detects whether the compiler has the
+       include file <code><ostream></code>. Most files in the
+       library then include this file over
+       <code><iostream></code> to save compile time.
+       <br>
+       (WB 2002/01/25)
+       </p>
+
+  <li> <p> Fixed: All example and test programs as well as a number of
+       large applications have been checked against the memory checker
+       "purify". Only three memory leaks were found and fixed. We
+       believe that no major leaks exist in the library any more.
+       <br>
+       (WB 2001/12/07)
+       </p>
+
+  <li> <p>
+       New: Output for 
+       <a href="http://www.opendx.org" target="_top">OpenDX</a> has
+       been added. It can be used by choosing output format �dx� (not
+       yet for grid output). The data format is very basic now, but it
+       is planned to improve this to make use of the excellent
+       capabilites of OpenDX.
+       </p>
+       <p>
+       Additionally, a directory <tt>contrib/dx</tt> has been added,
+       containing visual programs for OpenDX. Programs for the
+       existing output of data as a single vector are found in the
+       subdirectory <tt>single</tt>.
+       <br>
+       (GK 2001/12/07)
+       </p>
+
+  <li> <p> Fixed: Previously, the \$(INCLUDE) variable in Makefiles
+       included the values of the \$INCLUDE environment variable. This
+       is not desirable, since the compiler evaluates that variable
+       anyway and the Makefile variable has <code>-I</code> prefixed
+       to all paths while the environment variable has not.
+       <br>
+       (WB 2001/11/29)
+       </p>
+
+  <li> <p> Removed: the option to generate printable documentation was
+       removed. Since this comprised approximately 2000 pages and
+       since we believe that the online documentation is rather good,
+       this is probably no big loss.
+       <br>
+       (WB 2001/11/29)
+       </p>
+
+  <li> <p> New: example program step-12. Discontinuous Galerkin
+       discretization.
+       <br>
+       (RH 2001/11/21)
+       </p>
+
+  <li> <p>
+       New: deal.II now uses a file
+       <code>config.h</code> for most global preprocessor defines,
+       instead of an overly long list of compiler flags given on the
+       command line.
+       <br>
+       (WB 2001/10/27)
+       </p>
+
+  <li> <p>
+       Changed: If available, the library now uses the C++ standard
+       classes <code>istringstream</code> and <code
+       class="class">ostringstream</code> over the old classes
+       <code>i/ostrstream</code>. The ./configure script
+       finds out whether the new classes exist, or whether the
+       backward compatibility classes are to be used.
+       <br>
+       (WB 2001/10/25)
+       </p>
+
+  <li> <p>
+       New: the ./configure script now recognizes gcc3.1
+       (i.e. presently prereleases of it) and sets compilation flags
+       accordingly.
+       <br>
+       (WB 2001/10/25)
+       </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li> <p>
+       Fixed: For Mac OS X, the file <code>base/source/log.cc</code>
+       needed to include <code>sys/time.h</code> in addition to
+       <code>sys/resource.h</code>.
+       <br>
+       (Alexis Herault, WB 2002/01/28)
+       </p>
+
+  <li> <p> Fixed: Private definitions of the copy constructor and
+       assignment operator of the <code
+       class="class">ParameterHandler</code> class are now included in
+       order to inhibit the (unintentional) use of default copy
+       constructors.
+       <br>
+       (RH 2002/01/22)
+       </p>
+
+  <li> <p>
+       Improved: The cut-off functions <code
+       class="class">Functios::CutOffFunctionLinfty</code>, <code
+       class="class">Functios::CutOffFunctionW1</code>, and <code
+       class="class">Functios::CutOffFunctionCinfty</code> can be
+       vector-valued now and optionally only a single componente can
+       be selected.
+       <br>
+       (GK 2002/01/10)
+       </p>
+
+  <li> <p>
+       New: the <code
+       class="member">deal_II_exceptions::set_additional_assert_output</code>
+       function allows to set additional output to be printed upon
+       triggering an <code>Assert()</code> call. This
+       is helpful for parallel applications where you only see the
+       text of the message but do not know from which cluster node it
+       stems.
+       <br>
+       (WB 2002/01/10)
+       </p>
+
+  <li> <p>
+       Changed: when an assertion fails in the <code
+       class="member">Assert()</code> function, the program is usually
+       aborted. Don't abort it any more if there is an active C++
+       exception somewhere since we would lose its message if we
+       aborted the program. In that case only report the error and
+       write out an indication why we do not abort the program any
+       more. On the other hand, also suppress output of further failed
+       <code>Assert()</code> calls, since they often
+       are follow-ups of the first one.
+       <br>
+       (WB 2002/01/09)
+       </p>
+
+  <li> <p>
+       New: <code>ExcFileNotOpen</code> can be used
+       after initializing an <code>fstream</code>
+       object. This allows to avoid some cryptic <code
+       class="class">ExcIO</code>s.
+       <br>
+       (GK 2001/12/18)
+       </p>
+
+  <li> <p>
+       Changed: The <code>OutputStyle</code> enum used
+       to indicate the output format has been moved into the
+       <code>ParameterHandler</code> class.
+       <br>
+       (WB 2001/11/30)
+       </p>
+
+  <li> <p>
+       Fixed: In the <code>ParameterHandler</code>
+       class, we leaked 8 or 12 bytes of memory per declared
+       parameter. This is now fixed.
+       <br>
+       (WB 2001/11/28)
+       </p>
+
+  <li> <p>
+       New: <code>Functions::CutOffFunctionCinfty</code>,
+       <code>Functions::CutOffFunctionW1</code>, and
+       <code>Functions::CutOffFunctionLinfty</code>
+       implement functions with support in an arbitrary ball and
+       differentiability as indicated by their name
+       <br>
+       (GK 2001/10/24)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>DataOutBase::EpsFlags</code>
+       class forgot to declare the reverse grey scale function as one
+       possible input for the color function for the
+       <code>ParameterHandler</code> class. This is now
+       possible.
+       <br>
+       (WB 2001/10/10)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p>
+
+       Improved: all sparsity pattern classes have a function <code
+       class="member">exists</code>, allowing you to check whether a
+       certain index pair has been allocated in the pattern.
+       <br>
+       (GK 2002/02/01)
+       </p>
+
+  <li> <p>
+       Fixed: Allocation of temporary vectors in <code
+       class="member">FilteredMatrix::allocate_tmp_vector</code>
+       is now faster since it does no more copy the value of the
+       template vector.
+       <br>
+       (WB 2001/11/22)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">FilteredMatrix::allocate_tmp_vector</code>
+       function had a bug with block vectors.
+       <br>
+       (WB 2001/11/22)
+       </p>
+
+  <li> <p>
+       Improved: reinit function of <code>Vector</code>
+       and <code>BlockVector</code> allows use of a
+       vector with different number type.
+       <br>
+       (GK 2001/11/21)
+       </p>
+
+  <li> <p>
+       Fixed: when checking for convergence in linear solvers in
+       <code>SolverControl::check</code>, we first
+       checked whether the maximal iteration count was reached, and
+       only then whether the target residual was achieved. In cases,
+       where the target residual was only reached in the very last
+       allowed iteration, this led to a failure notice of the linear
+       solver, rather than to a success message. This is now fixed.
+       <br>
+       (WB 2001/11/19)
+       </p>
+
+  <li> <p>
+       New: the <code>SparseDirectMA27/47</code> classes
+       now provide access to Mutex locks for external
+       synchronisation. 
+       <br>
+       (WB 2001/11/14)
+       </p>
+
+  <li> <p>
+       Fixed: an error in the definition of the <code
+       class="member">SolverMinRes::solve</code> function prevented
+       its compilation under some circumstances.
+       <br>
+       (WB 2001/11/14)
+       </p>
+
+  <li> <p>
+       Fixed: upon breakdown, the <code
+       class="class">SolverBicgstab</code> forgot to increment the
+       iteration counter for the breakdown cycle. This is now fixed.
+       <br>
+       (WB 2001/11/14)
+       </p>
+
+  <li> <p>
+       Improved: class <code>SolverGMRES</code> accepts
+       a parameter in <code>AdditionalData</code>,
+       allowing for right preconditioning.
+       <br>
+       (GK 2001/11/09)
+       </p>
+
+  <li> <p>
+       Fixed: class <code>SparsityPattern</code> can
+       handle rows of length zero now. For quadratic matrices, these
+       rows are changed to length one, since a diagonal element must
+       be stored.
+       <br>
+       (GK 2001/10/11)
+       </p>
+
+  <li> <p>
+       New: The <code>BlockVector</code> now has a
+       full-fledged random access iterator type, working in exactly
+       the same way as the iterators of the C++ standard containers.
+       <br>
+       (WB 2001/09/28)
+       </p>
+
+  <li> <p> New: <code>Vector</code>::<code
+       class="member">operator *</code> is now templatized, allowing
+       for scalar products of vectors with different underlying types.
+       <br>
+       (WB 2001/09/27)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p>
+       Changed: The classes <code>FEQ1</code>-<code
+       class="class">FEQ4</code>, <code
+       class="class">FEDG_Q1</code><code>FEDG_Q4</code>
+       as well as the files with their definitions,
+       <tt>fe/fe_lib.lagrange.h</tt> and <tt>fe/fe_lib.dg.h</tt>
+       ceased to exist. They had been left in for backward
+       compatibility in an earlier version, but their existence is
+       more confusing than helpful. Please change your code to use the
+       classes <code>FE_Q</code> and <code
+       class="class">FE_DGQ</code>, respectively.
+       <br>
+       (GK 2002/02/01)
+       </p>
+
+  <li> <p>
+       New: The <code>FilteredIterator</code> class
+       provides a view on ranges of iterators by iterating over only
+       those objects that satisfy a certain predicate.
+       <br>
+       (WB 2002/01/07)
+       </p>
+
+  <li> <p>
+       Improved: It is now possible to read in unconnected domains
+       through the <code>GridIn</code> class, since
+       renumbering the cells now also works for these domains.
+       <br>
+       (<a href="mailto:ms at biomech.tu-graz.ac.at">Michael Stadler</a> 2001/12/14)
+       </p>
+
+  <li> <p>
+       Improved: Both functions <code
+       class="class">VectorTools</code>::<code
+       class="member">compute_mean_value</code> take ingoing and
+       outgoing vector types as template arguments. This allows
+       applying them to <code>BlockVector</code>.
+       <br>
+       (GK 2001/12/07)
+       </p>
+
+  <li> <p>
+       New: <code>GridGenerator</code> has a function
+       <code>cylinder</code> for cylinders in three
+       space dimensions. Accoridngly, a class <code
+       class="class">CylinderBoundary</code> has been created.
+       <br>
+       (GK 2001/12/07)
+       </p>
+
+  <li> <p>
+       New: <code>FiniteElement</code>::<code
+       class="member">has_support_on_face</code> allows to check
+       whether a shape function has non-zero values on a certain face
+       of a cell.
+       <br>
+       (GK 2001/12/04)
+       </p>
+
+  <li> <p>
+       Changed: The <code>IteratorState</code> enum used
+       to indicate the state in which an iterator can be is now
+       enclosed in a namespace of the same name, to take its members
+       out of the global namespace. When using one of these members,
+       you now have to prefix it by <code
+       class="class">IteratorState::</code>.
+       <br>
+       (WB 2001/11/30)
+       </p>
+
+  <li> <p>
+       Changed: The <code>NormType</code> enum used to
+       indicate the norm the <code
+       class="member">VectorTools::integrate_difference</code>
+       function shall integrate is moved from the global namespace
+       into the <code>VectorTools</code> class. You
+       therefore have to prefix the members of this enum by the
+       respective class name.
+       <br>
+       (WB 2001/11/29)
+       </p>
+
+  <li> <p>
+       Fixed: The functions <code
+       class="member">Mapping::transform_unit_to_real_cell</code>
+       leaked some memory. This is now fixed.
+       <br>
+       (RH, WB 2001/11/28)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>DoFHandler</code> class had a
+       memory leak. This is now fixed. Likewise for the <code
+       class="class">MGDoFHandler</code> class.
+       <br>
+       (WB 2001/11/28)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">GridRefinement::refine</code>
+       function failed when the threshold was zero. This is now fixed.
+       <br>
+       (RH 2001/11/26)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">MappingQ<dim>::transform_real_to_unit_cell</code>
+       function failed on a very unusual cell. This is now fixed.
+       <br>
+       (RH 2001/11/26)
+       </p>
+
+  <li> <p>
+       New: The new <code
+       class="member">CellAccessor<dim>::neighbor_of_coarser_neighbor</code>
+       function returns where to find the present cell from a coarser
+       neighbor.
+       <br>
+       (RH 2001/11/21)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">GridRefinement::refine_fixed_fraction</code>
+       function sometimes had problems when indicators vary over
+       several orders of magnitude, due to roundoff. This is now
+       fixed. 
+       <br>
+       (WB 2001/11/05)
+       </p>
+
+  <li> <p> 
+       New: <code
+       class="member">DoFTools::extract_subdomain_dofs</code> selects
+       those degrees of freedom which are located on cells with a
+       specified subdomain id.
+       <br>
+       (WB 2001/10/27)
+       </p>
+
+  <li> <p>
+       New: Cells now have an additional property
+       <em>subdomain_id</em> which can be used in parallel
+       computations to identify which cells are handled on which
+       processor. These flags are read and set using the functions
+       <code>cell->subdomain_id()</code> and <code
+       class="member">cell->set_subdomain_id(new_id)</code>. The
+       subdomain ids are unsigned integers, so should be sufficiently
+       large also for larger numbers of subdomains.
+       <br>
+       (WB 2001/10/27)
+       </p>
+
+  <li> <p>
+       Fixed: the <code
+       class="member">GridGenerator::hyper_rectangle</code> function
+       was broken in 3d.
+       <br>
+       (WB 2001/10/26)
+       </p>
+
+  <li> <p>
+       Improved: Both functions <code
+       class="class">DataOut_DoFData</code>::<code
+       class="member">add_data_vector</code> accepts <code
+       class="class">BlockVector</code> as argument.
+       <br>
+       (GK 2001/10/12)
+       </p>
+
+  <li> <p>
+       Improved: Both functions <code
+       class="class">VectorTools</code>::<code
+       class="member">integrate_difference</code> take ingoing and
+       outgoing vector types as template arguments. This allows
+       applying them to <code>BlockVector</code> and of
+       outputting a vector of doubles suitable for <code
+       class="class">DataOut</code>.
+       <br>
+       (GK 2001/10/12)
+       </p>
+
+  <li> <p>
+       Fixed: Functions creating sparsity patterns for DG elements in
+       <code>DoFTools</code> get the pattern type as
+       template argument, too..
+       <br>
+       (GK 2001/10/01)
+       </p>
+
+  <li> <p>
+       Fixed: the iterator category template base class of grid
+       iterators was incorrectly set.
+       <br>
+       (WB 2001/09/28)
+       </p>
+</ol>
+
+*/
diff --git a/doc/news/3.3.0-vs-3.3.1.h b/doc/news/3.3.0-vs-3.3.1.h
new file mode 100644
index 0000000..b4457c1
--- /dev/null
+++ b/doc/news/3.3.0-vs-3.3.1.h
@@ -0,0 +1,44 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_3_0_and_3_3_1 Changes between Version 3.3.0 and 3.3.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+<ol>
+  <li> <p> 
+       Fixed: In 3d, the function <code
+       class="member">DoFTools::make_hanging_node_constraints</code> 
+       contained an assertion that failed erroneously for finite
+       elements that do not have degrees of freedom on vertices. This
+       is now fixed.
+       <br> 
+       (WB 2002/02/21)
+       </p>
+
+  <li> <p>
+       Fixed: <code>TriaAccessor<3,3>::measure</code>
+       sometimes computed a negative value. This is now fixed.
+       <br> 
+       (WB 2002/02/21)
+       </p>
+</ol>
+
+*/
diff --git a/doc/news/3.3.0-vs-3.4.0.h b/doc/news/3.3.0-vs-3.4.0.h
new file mode 100644
index 0000000..216d39d
--- /dev/null
+++ b/doc/news/3.3.0-vs-3.4.0.h
@@ -0,0 +1,380 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_3_and_3_4 Changes between Version 3.3 and 3.4
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li> <p>
+       <strong>
+       Deprecated: The functions <code
+       class="member">FEValuesBase::get_shape_values</code>, <code
+       class="member">FEValuesBase::get_shape_grads</code>, and <code
+       class="member">FEValuesBase::get_shape_2nd_derivatives</code> are
+       now deprecated as they expose too much of the internal data
+       structure of their class, and interfere with plans for the
+       extension of this and related classes. The functions still
+       exist in this release of the library, but will be removed in
+       the next version. Use the <code
+       class="member">FEValuesBase::shape_value</code> and alike
+       functions as a replacement.
+       <br>
+       For more information, read
+       <a href="http://www.dealii.org/mail/msg00638.html" target="body">this mail</a>.
+       </strong>
+       <br>
+       (WB 2002/06/03)
+       </p>
+
+  <li> <p>
+       Added: The <code>configure</code> script now recognizes Intel's ECC
+       compiler when run on Itanium systems with this compiler present.
+       The ECC compiler is similar to the ICC compiler but it acccepts some
+       different options.
+       <br>
+       (BK 2002/05/22)
+       </p>
+
+  <li> <p> 
+       New: The step-14 example program demonstrates duality based
+       error estimators and some more software design concepts.
+       <br>
+       (WB 2002/05/05)
+       </p>
+
+  <li> <p> 
+       New: In all previous versions, deal.II used
+       the <a href="http://www.cs.wustl.edu/~schmidt/ACE.html"
+       target="_top">ACE (Adaptive Communications Environment)</a>
+       library to support cross-platform threading
+       facilities. While this is still supported, the default way
+       is now to use the POSIX threading functions that are
+       available on many systems. The relieves you from the need of
+       installing a huge library of which the most part is not used
+       anyway. However, if you use ACE for other reasons, then it is
+       still supported. For installation instructions, see the 
+       <a href="../../readme.html" target="body">ReadMe</a> file.
+       <br>
+       (WB 2002/04/30)
+       </p>
+
+  <li> <p> 
+       Changed: The Makefiles for the library are now truly
+       parallel. To this end, the automatic generation of the files
+       <tt>forward_declarations.h</tt> in the various directories had
+       to be changed. They will now be generated automatically at the
+       end of the targets <tt>all</tt>, <tt>debug</tt> and
+       <tt>optimized</tt>. They will not be generated while building a
+       single library. In this case, <tt>make forward</tt> can be used
+       to build them manually.
+       <br>
+       I introduced separate targets for the generation of the
+       optimized versions only.
+       <br>
+       (GK 2002/04/17)
+       </p>
+
+  <li> <p> 
+       New: The step-13 example program tells you something about
+       software design things for finite element programs.
+       <br>
+       (WB 2002/04/16)
+       </p>
+
+  <li> <p> 
+       Changed: Due to problems with undeclared functions and general
+       compatibility concerns, <code>-ansi</code> is now no more part
+       of the compile flags for gcc.
+       <br>
+       (WB 2002/04/16)
+       </p>
+
+  <li> <p> 
+       Fixed: Explicit specializations of member templates are now
+       conforming to the C++ standard. While most compilers accepted
+       the previous form, Sun's Forte compiler wants a strictly
+       conforming one.
+       <br>
+       (WB 2002/03/25)
+       </p>
+
+  <li> <p> 
+       Fixed: For gcc versions that used <code
+       class="class">ostrstream</code> instead of <code
+       class="class">ostringstream</code>, it was necessary to append
+       a final <code>std::ends</code> when piping text
+       into the string stream. This was not previously
+       conditionalized, but done for old and new classes.
+       <br>
+       (WB 2002/03/13)
+       </p>
+
+  <li> <p> 
+       Changed: The configure machinery has been revamped
+       significantly.
+       <br>
+       (WB 2002/03/08)
+       </p>
+
+  <li> <p> 
+       Added: The top-level Makefile now supports "optimized" as a
+       target that builds only optimized versions of the <code>base</code>,
+       <code>lac</code>, <code>1d</code>, <code>2d</code>, and <code>3d</code>
+       libraries. 
+       <br>
+       (BK 2002/02/19)
+       </p>
+
+  <li> <p> 
+       Changed: The build system was entirely revised. Object
+       files in debug mode now have the suffix <code>.g.o</code>
+       instead of <code>.go</code>. All object files from the
+       subdirectories are now placed into the <code>/lib</code>
+       top-level directory, rather than in library directories in the
+       individual subdirs.
+       <br>
+       (WB 2002/02/11)
+       </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li> <p> 
+       New: The <code>vector2d</code> row accessor
+       classes now have member functions <code
+       class="member">begin</code> and <code>end</code>
+       which allow iterating over the elements of a row of such an
+       object. 
+       <br>
+       (WB 2002/05/30)
+       </p>
+
+  <li> <p> 
+       New: The <code>Legendre</code> and
+       <code>LagrangeEquidistant</code> classes now have
+       static member functions <code
+       class="member">generate_complete_basis</code> which returns an
+       array of polynomial objects spanning the complete space up to a
+       specified order in 1d. This may be used to generate the
+       respective polynomial spaces in higher space dimensions.
+       <br>
+       (WB 2002/05/27)
+       </p>
+
+  <li> <p> 
+       Changed: The <code>Polynomial</code> and
+       <code>LagrangeEquidistant</code> classes have lost
+       their default constructor, as that did not make much sense
+       anyway.
+       <br>
+       (WB 2002/05/27)
+       </p>
+
+  <li> <p> 
+       Fixed: When forward declaring the <code
+       class="class">Tensor</code> class, we now also forward declare
+       its partial specialization for a rank one tensor. Not doing so
+       confused Sun's Forte compiler.
+       <br>
+       (WB 2002/03/22)
+       </p>
+
+  <li> <p> 
+       Fixed: The class <code>TensorFunction</code>
+       now uses local types <code>value_type</code> and
+       <code>gradient_type</code> as return values of
+       its member functions. This works around a bug in Sun's Forte
+       C++ compilers.
+       <br>
+       (WB 2002/03/20)
+       </p>
+
+  <li> <p> 
+       Improved: The <code>AssertThrow</code> macro now
+       uses <code>__builtin_expect</code> if the
+       compiler supports this. This indicates to the compiler that we
+       expect the condition to be true and that throwing an exception
+       is a rare case. By this information, the compiler can help the
+       branch prediction unit of modern processors to better predict
+       which direction a branch will take.
+       <br>
+       (WB 2002/03/13)
+       </p>
+
+  <li> <p>
+       New: The <code>vector2d</code> class now not only
+       allows access to elements through the <code
+       class="member">operator()(unsingned int,unsigned int)</code>
+       (i.e. matrix or Fortran style access), but also through nested
+       brackets via an <code>operator[]</code>
+       (i.e. like to a two-dimensional C-style array).
+       <br>
+       (WB 2002/03/08)
+       </p> 
+
+  <li> <p>
+       Changed: The function <code>MultithreadInfo</code>::
+       <code>get_n_cpus</code> now reports the proper number
+       of CPUs when running on Silicon Graphics.
+       <br>
+       (BK 2002/02/19)
+       </p> 
+
+  <li> <p> 
+       Changed: The quite logorrhoeic function name <code
+       class="class">TensorProductPolynomials</code>::<code
+       class="member">n_tensor_product_polynomials</code> was changed to
+       <code>n</code> to be compliant wth the new class <code
+       class="class">PolynomialSpace</code>.
+       <br>
+       (GK 2002/02/11)
+       </p>
+
+  <li> <p> 
+       New: The class <code>PolynomialSpace</code>
+       implements the space of polynomials at most a certain degree in
+       arbitrary space dimensions.
+       <br>
+       (GK 2002/02/11)
+       </p>
+
+  <li> <p> 
+       New: The function <code>DataOutBase</code>::
+       <code>write_tecplot_binary</code> has been
+       added.  This function will write Tecplot binary files if the
+       Tecplot API is detected by ./configure.  To use this feature be
+       sure that the environment variable TECHOME points to a valid
+       Tecplot installation and that the files
+       $TECHOME/include/TECIO.h and $TECHOME/lib/tecio.a exist.  The
+       name of the file to be written is specified through the <code
+       class="class">DataOutBase</code> ::<code
+       class="member">TecplotFlags</code>.  <code
+       class="member">tecplot_binary_file_name</code> variable. If the
+       API is not available this code simply calls the existing ASCII
+       output function.
+       <br>
+       (BK 2002/02/11)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p> Improved: <code>SolverGMRES</code> allocates
+       basis vectors only, when they are needed. Therefore, it is safe
+       now to ask for a basis larger than the expected number of
+       iteration steps. On the other hand, memory allocation failures
+       may occur during the iteration now.
+       <br>
+       (GK 2002/05/24)
+       </p>
+
+  <li> <p> 
+       New: Function <code
+       class="member">SparsityPattern::matrix_position</code> is the
+       inverse function for <code
+       class="member">SparsityPattern::operator()</code>.
+       <br>
+       (WB 2002/02/13)
+       </p>
+
+  <li> <p> 
+       New: Functions <code
+       class="member">SparsityPattern::copy_from</code> and <code
+       class="member">SparseMatrix::copy_from</code> allow to copy a full
+       matrix into a sparse matrix.
+       <br>
+       (WB 2002/02/06)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p> 
+       New: The <code>GeometryInfo</code> class now
+       provides two methods,
+       <code>unit_cell_vertex</code> and <code
+       class="member">vertices_adjacent_to_line</code>, that reveal
+       something about the placement and numbering of vertices on the
+       uni cell.
+       <br>
+       (GK 2002/05/29)
+       </p>
+
+  <li> <p> 
+       New: The <code>GridOut::</code>
+       <code>write_dx</code> function is now implemented.
+       It allows to write the mesh (cells and faces) with some additional
+       information that may be useful once in a while.
+       <br>
+       (GK 2002/05/02)
+       </p>
+
+  <li> <p> 
+       Fixed: The <code>IteratorState::IteratorState</code>
+       enum is now called <code
+       class="class">IteratorState::IteratorStates</code>. This works
+       around a bug in Sun's Forte C++ compilers which can't handle
+       members of namespaces with the same name as the enclosing
+       namespace.
+       <br>
+       (WB 2002/03/20)
+       </p>
+
+  <li> <p> 
+       Fixed: In 3d, the function <code
+       class="member">DoFTools::make_hanging_node_constraints</code> 
+       contained an assertion that failed erroneously for finite
+       elements that do not have degrees of freedom on vertices. This
+       is now fixed.
+       <br> 
+       (WB 2002/02/21)
+       </p>
+
+  <li> <p> 
+       Fixed: <code>TriaAccessor<3,3>::measure</code>
+       sometimes computed a negative value. This is now fixed.
+       <br> 
+       (WB 2002/02/21)
+       </p>
+
+  <li> <p> 
+       New: Finite element family with complete polynomial spaces
+       for discontinuous Galerkin: <code>FE_DGP</code>
+       <br> 
+       (GK 2002/02/11)
+       </p>
+</ol>
+
+*/
diff --git a/doc/news/3.4.0-vs-4.0.0.h b/doc/news/3.4.0-vs-4.0.0.h
new file mode 100644
index 0000000..61b8461
--- /dev/null
+++ b/doc/news/3.4.0-vs-4.0.0.h
@@ -0,0 +1,1378 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_3_4_and_4_0 Changes between Version 3.4 and 4.0
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li> <p> 
+       New: deal.II now uses a new threading
+       scheme. The new scheme is simpler to use, and in particular
+       more flexible in some cases where only one thread is started,
+       or where a thread is completely detached, since we got rid of
+       the <code>ThreadManager</code> class and now only store handles
+       to individual threads (which can be discarded, or added to a
+       <code>ThreadGroup</code> variable that is able to wait for a
+       whole set of threads at once.
+       <br>
+       The new scheme also implements a much needed feature: calling
+       functions on a new thread that return values. Previously, such
+       functions needed to be written in a way that they return their
+       return value through an additional reference parameter. This
+       was inflexible if one wanted to call functions that already
+       exist. This restriction is now lifted: such functions can be
+       called, and the return value can be accessed once the thread
+       has finished.
+       <br>
+       (WB 2003/02/06)
+       </p>
+
+  <li> <p> 
+       New: deal.II now makes use of some parts of
+       the <a href="http://www.boost.org/">boost</a> library, which is
+       supposed to be a testground for the next generation C++ standard
+       library. The parts which we use are now in
+       <code>contrib/boost/include/boost_local/</code> and can be
+       referenced from within your programs. The directory contains
+       the string <code>_local</code> since you may still want to use
+       another version or installation of boost in your own programs.
+       </p>
+       
+       <p>
+       Also note that boost is large -- much larger than the subset we
+       have imported --, so we only took what we needed.
+       <br>
+       (WB 2003/02/06)
+       </p>
+
+  <li> <p> 
+       Fixed: A longstanding bug in the documentation system has been fixed: if
+       a namespace was ended with <code>}</code> instead of <code>};</code>
+       (note the semicolon), then the documentation tool assumed that the
+       following entities were still part of the namespace just left. This was
+       since the closing brace alone was not accepted as ending a namespace
+       (after all, structures, classes, and enums -- the other entities that
+       can enclose other declarations -- need the semicolon). This led to some
+       classes not showing up in the class index of the sublibraries. This is
+       now fixed.
+       <br>
+       (WB 2003/02/02)
+       </p>
+
+  <li> <p> 
+       Changed: Classes and structures that are declared inside namespaces named
+       <code>internal</code> are now no longer shown in the class
+       index view of the documentation of each of the sublibraries. Since they
+       are supposed to be used internally only, this is no drawback for the
+       general public. However, they are documented as members of these
+       namespaces.
+       <br>
+       (WB 2003/02/02)
+       </p>
+
+  <li> <p> 
+       Fixed: Some of the formulas in the step-14 tutorial were obviously 
+       scrambled a little. This is now fixed.
+       <br>
+       (Roy Stogner, WB 2003/01/30)
+       </p>
+
+  <li> <p> 
+       Changed: The main Makefile has been changed to sequentialize building 
+       the base, lac, and deal.II sublibraries. We changed this, since on some
+       systems (notably AIX), the latter libraries need to be linked against
+       the former ones, when creating shared libraries.
+       <br>
+       (WB 2003/01/24)
+       </p>
+
+  <li> <p> 
+       New: Changes have been made to support compiling and using 
+       deal.II on AIX 5 systems.
+       <br>
+       (WB 2003/01/24)
+       </p>
+
+  <li> <p> 
+       Removed: Thread support now relies solely on the use of POSIX
+       functions. The use of the
+       <a href="http://www.cs.wustl.edu/~schmidt/ACE.html" target="_top">ACE
+       (Adaptive Communications Environment)</a> library for this is now no
+       longer supported. However, application programs can of course still use
+       ACE, but they will need to generate paths to this library in their
+       makefiles themselves.
+       <br>
+       (WB 2003/01/11)
+       </p>
+
+  <li> <p> 
+       New: Some changes have been made to support Mac OS X 10.2. Shared
+       libraries are not supported on this architecture, but everything else
+       should work.
+       <br>
+       (WB 2002/12/18)
+       </p>
+
+  <li> <p> 
+       New: deal.II can be compiled with version 7.0 of
+       Intel's icc compiler, which was recently released. Since this compiler
+       finally supports the very restrictive flags <code>-Xc -ansi</code> that
+       check for close conformance with the C++ standard, we use them (previous
+       versions of icc would crash when these two flags are given). This
+       requires that we distinguish between these compiler versions, and the
+       corresponding Makefile variable <code>GXX-VERSION</code> now no longer
+       holds the non-versioned string <code>intel_icc</code> when icc is
+       detected, but rather either <code>intel_icc5</code>,
+       <code>intel_icc6</code>, or <code>intel_icc7</code>, depending on what
+       version of the compiler was detected.
+       <br>
+       (WB 2002/12/05)
+       </p>
+
+  <li> <p> 
+       Changed: Previously, we just set the preprocessor variable
+       <code>DEAL_II_USE_MT</code>, when <code>--with-multithreading</code> was
+       given as argument to <code>./configure</code>. Tests in the code
+       therefore looked like <code>#ifdef DEAL_II_USE_MT</code>. This has been
+       changed so that the variable is always defined, but its value is now
+       equal to <code>1</code> 
+       when multithreading was requested, and zero otherwise. The reason for
+       this is that you can now write <code>if (DEAL_II_USE_MT && ...)</code>
+       conditions, and need not interleave if-else clauses from regular code
+       and the preprocessor, if conditions involve both the state of this
+       preprocessor variable and the run-time state of your program. However,
+       this change requires that all appearances of <code>#ifdef
+       DEAL_II_USE_MT</code> be changed to <code>#if DEAL_II_USE_MT ==
+       1</code>, since the variable is now defined unconditionally.
+       <br>
+       (WB 2002/11/14)
+       </p>
+
+  <li> <p> 
+       New: Object files are now named according to the local defaults
+       on the system we are running on. On Unix systems, this is
+       usually a <code>.o</code> suffix, while on Windows it is
+       <code>.obj</code>. Likewise for executables, which have no
+       suffix on Unix, but <code>.exe</code> on Windows.
+       <br>
+       (WB 2002/11/11)
+       </p>
+
+  <li> <p> 
+       New: deal.II can now also be compiled with Red Hat's
+       version of the gcc compiler, gcc 2.96. However, some problems remain;
+       for more information.
+       <br>
+       (WB 2002/10/14)
+       </p>
+
+  <li> <p> 
+       Fixed: On CygWin, one header files has a <code>#define quad
+       quad_t</code>. This is annoying, since we have local variables and
+       member functions with the name <code>quad</code>, and in fact it breaks
+       compilation on those versions of CygWin that has this. Fortunately, the
+       define is only active if a preprocessor variable
+       <code>_POSIX_SOURCE</code> is not set. Thus, we now check for the define
+       when configuring the library, and if necessary set the preprocessor
+       variable. However, while this allows to compile the library on these
+       systems, it may otherwise affect your code, if you use functions or
+       other features of the system that are not available when the flag is
+       set.
+       <br>
+       (Stephen Kolaroff, WB
+       2002/09/28) 
+       </p>
+
+  <li> <p> 
+       New: Since <code>math.h</code> only defines the values of PI or E (as
+       <code>M_PI</code> and <code>M_E</code>) when certain defines are set (on
+       Linux, these are <code>__USE_BSD</code> or <code>__USE_XOPEN</code>),
+       portable programs usually defined these constants themselves. In
+       deal.II, this happened at 6 different places. To
+       avoid this in the future, <code>base/config.h</code> now exports a
+       namespace <code>deal_II_numbers</code> that defines these two, and a
+       number of other numerical constants.
+       <br>
+       (WB 2002/09/12)
+       </p>
+
+  <li> <p> 
+       New: <code>base/config.h</code> now exports the
+       deal.II base directory through the
+       <code>DEAL_II_PATH</code> preprocessor variable.
+       <br>
+       (WB 2002/09/09)
+       </p>
+
+  <li> <p> 
+       Removed: The forward declarations files have gone. We have never
+       propagated their use in the example programs, but these files have been
+       there in the base, lac, and grid include directories, and forward
+       declared all classes that were present in the respective parts of the
+       library. This, the idea was, enables you to use just this include file
+       in your own header files, rather than including the full declarations of
+       these classes. However, maintaining these forward declaration files has
+       been a constant thorn in our side, be it that the timing of their
+       generation was difficult when using parallel builds, or that they were
+       difficult to generate at first. The latter is now the reason for their
+       abolition: we had a script for their generation, but it did not take
+       into account namespaces, so we got clashes when we found that we had
+       used the same class name in two different namespaces, since the script
+       put the forward declaration incorrectly into the global namespace where
+       they conflicted. Since we do not plan to extend the script by a
+       parser that can properly handle opening and closing braces of
+       namespaces, we simply drop these files.
+       <br>
+       What you should do if you have used these forward declaration files: you
+       have two possibilities - either include the respective header file in
+       which the class is fully declared, or write the forward declaration into
+       your headers yourself.
+       <br>
+       (WB 2002/09/05)
+       </p>
+
+  <li> <p> 
+       New: There is now
+       a new report
+       on assembling matrices available from the 
+       <a href="../documentation.html" target="body">documentation
+       page</a>. The main focus is assembling of matrices for
+       vector-valued problems, where shape functions are
+       vector-valued, and may have only one or more non-zero vector
+       components. 
+       <br>
+       (WB 2002/06/17)
+       </p>
+
+  <li> <p>
+       Removed: The functions <code
+       class="member">FEValuesBase::get_shape_values</code>, <code
+       class="member">FEValuesBase::get_shape_grads</code>, and <code
+       class="member">FEValuesBase::get_shape_2nd_derivatives</code> are
+       now removed as they expose too much of the internal data
+       structure of their class, and interfere with plans for the
+       extension of this and related classes. These functions, which
+       had been deprecated in the previous version, are thus not
+       part of the release after version 3.4 any more. Use the <code
+       class="member">FEValuesBase::shape_value</code> and alike
+       functions as a replacement.
+       <br>
+       For more information, read
+       <a href="http://www.dealii.org/mail/msg00638.html" target="body">this mail</a>.
+       <br>
+       (WB 2002/06/10)
+       </p>
+
+  <li> <p> 
+       New: deal.II now also supports vector-valued
+       finite elements with shape functions for which more than just
+       one vector component is non-zero. Such elements are, for
+       example, the Nedelec and Raviart-Thomas families. Previously,
+       vector-valued elements were only supported insofar as they
+       could be composed of scalar ones; in that case, each
+       (vector-valued) shape function had only one non-zero vector
+       component.
+       <br>
+       (WB 2002/06/10)
+       </p>
+
+  <li> <p> 
+       New: The top-level makefile now how a target <tt>distclean</tt>.
+       <tt>clean</tt> leaves the libraries now, removing
+       everything that is not needed to use
+       deal.II. <tt>distclean</tt> removes even the
+       libraries, leaving the directory more or less in the state like
+       after <tt>configure</tt>.
+       <br>
+       (GK 2002/06/07)
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li> <p>
+       Fixed: A bug in the <code
+       class="class">Patterns::MultipleSelection</code> class if more
+       than two elements in a comma-separated list were given.
+       <br>
+       (Brian Carnes 2003/05/14)
+       </p>
+
+  <li> <p>
+       Changed: The <code>Polynomials::Legendre</code>
+       class lost its template argument and is now just a regular
+       class. There was no real good reason for the template argument,
+       it had just crept in.
+       <br>
+       (WB 2003/05/12)
+       </p>
+
+  <li> <p>
+       New: There is now a class <code
+       class="class">AnisotropicPolynomials</code> that constructs a higher
+       dimensional polynomial space from a set of 1-d polynomials in each of
+       the coordinate directions.
+       <br>
+       (WB 2003/04/20)
+       </p>
+
+  <li> <p>
+       Changed: The <code>Table</code> accessor classes
+       have been moved to a namespace <code
+       class="class">internal</code>. Since these classes are not (or should
+       not be) used directly in applications, this should not change
+       compatibility. However, they will now no longer show up in the class
+       overview of the documentation, which they were cluttering up.
+       <br>
+       (WB 2003/02/13)
+       </p>
+
+  <li> <p> 
+       New: The <code>Function</code> class now has an assignment
+       operator. This way, you can put function objects into
+       containers. However, the assignment operator makes sure that only
+       objects can be assigned that have the same number of vector components.
+       <br>
+       (WB 2003/02/06)
+       </p>
+
+  <li> <p> 
+       New: The <code>ThreadMutex</code> classes now have a
+       member class <code>ScopedLock</code> that implements the
+       scoped thread-safe locking pattern of Doug Schmidt. It is also used in
+       various places of the code now.
+       <br>
+       (WB 2003/01/28)
+       </p>
+
+  <li> <p> 
+       Fixed: The <code>PosixThreadManager</code> called
+       its <code>wait</code> function in the
+       destructor. If this had been called before already, then the
+       same threads would have been waited for twice, which invokes
+       undefined behavior. This is fixed by making sure that <code
+       class="member">wait</code> removes the id's of the threads it
+       has already waited for, and so calling it more than once will
+       not wait for threads which have already been waited for.
+       <br>
+       (Michael Anderson, WB 2003/01/16)
+       </p>
+
+  <li> <p> 
+       Fixed: The <code>Subscriptor</code> uses a counter to
+       count how many <code>SmartPointer</code> objects subscribe
+       to the pointed-to object. This counter needs to be a volatile variable
+       in multithreaded mode, to avoid false compiler optimizations based on
+       the assumption that the variable cannot change between two subsequent
+       reads.
+       <br>
+       (WB 2003/01/11)
+       </p>
+
+  <li> <p> 
+       Fixed: In multithreaded mode, when a new thread is started, the
+       arguments to the function being called need to be copied from the stack
+       of the starting thread to that of the new thread. In order to
+       synchronise this, mutexes were used that were acquired from one thread
+       and released from another thread. On Linux this does not lead to
+       problems, but POSIX functions do not guarantee that this actually works,
+       and it also leads to problems when running programs under valgrind. This
+       is now fixed with the help of condition variables.
+       <br>
+       (Michael Anderson, WB 2003/01/11)
+       </p>
+
+  <li> <p> 
+       New: There are now classes <code>ThreadCondition</code>
+       that implement thread condition variable operations through POSIX
+       functions (in multithreaded mode) or doing nothing (in singlethreaded
+       mode).
+       <br>
+       (WB 2003/01/11)
+       </p>
+
+  <li> <p> 
+       New: Newer versions of gcc have a very nice feature: you can set
+       a verbose terminate handler, that not only aborts a program
+       when an exception is thrown and not caught somewhere, but
+       before aborting it prints that an exception has been thrown,
+       and possibly what the std::exception::what() function has to
+       say. Since many people run into the trap of not having a
+       catch clause in main(), they wonder where that abort may be
+       coming from. The terminate handler then at least says what is
+       missing in their program.
+       <br>
+       (WB 2002/12/19)
+       </p>
+
+  <li> <p> 
+       New: There is now a <code>Patterns::List</code> pattern
+       that matches a list of elements each of which has to satisfy a pattern
+       that was given to the constructor.
+       <br>
+       (WB 2002/11/29)
+       </p>
+
+  <li> <p> 
+       Changed: In POSIX mode, when the <code
+       class="member">ThreadManager</code> class created a new thread through
+       <code>pthread_create</code>, it only checked for the
+       error code and aborted if it was non-zero. Now, it checks whether the
+       error code is <code>EAGAIN</code> and simply retries the
+       call if this is the case. This may, in rare cases, lead to a deadlock or
+       an infinite loop, but will usually just wait until the respective
+       resources for thread creation are available from the operating system
+       and will then succeed.
+       <br>
+       (WB 2002/11/13)
+       </p>
+
+  <li> <p> 
+       Fixed: The <code>write_text</code> and <code
+       class="member">write_tex</code> functions of the <code
+       class="class">TableHandler</code> class now check whether their
+       <code>ofstream</code> arguments are in a proper state before
+       using them for output.
+       <br>
+       (RH 2002/11/11)
+       </p>
+
+  <li> <p> 
+       New: Added Hierarchical Polynomial (similar to Legendre class). Will
+       eventually be used in a hierarchical FiniteElement class similar to
+       FE_Q class. Included in Polynomials namespace.
+       <br>
+       (Brian Carnes 2002/10/15)
+       </p>
+
+  <li> <p> Changed: Because they became too many, the classes describing 1d
+       polynomials are now in a <code>namespace
+       Polynomials</code>.
+       <br>
+       (WB 2002/10/14)
+       </p>
+
+  <li> <p> Changed: When an exception is thrown but not caught in a sub-thread,
+       this exception is not passed to the main thread by the operating
+       system. Rather, if the exception is not caught from the function that
+       was invoked by the spawning system function, the entire program is
+       terminated without an additional message. The wrapper functions which
+       are used to spawn new threads in the <code>Threads</code>
+       namespace therefore now catch these exceptions and at least print the
+       message they carry, before aborting the program. This way, at least the
+       message gets displayed.
+       <br>
+       (WB 2002/10/13)
+       </p>
+
+  <li> <p> Changed: The class <code
+       class="member">Table<2>::fill</code> function, which is also
+       inherited from the <code>FullMatrix</code> class, used to
+       work also when the size of the matrix was zero, by simply not copying
+       something. This led to difficult to detect errors. It is therefore no
+       more allowed to call this function when the matrix is empty. For all
+       other cases, the status of copying without checking the size of the
+       array copied from remains unchanged.
+       <br>
+       (WB 2002/09/28)
+       </p>
+
+  <li> <p> New: The classes <code
+       class="class">TableIndices<N></code> and <code
+       class="class">Table<N,T></code> are now implemented also
+       for <code>N=4,5</code> and <code>6</code>. The <code
+       class="class">Table<N,T></code> class represents an
+       <code>N</code>-dimensional array and might replace the
+       N-times-nested-use of the <code
+       class="class">std::vector</code> class.
+       <br>
+       (RH 2002/09/24)
+       </p>
+
+  <li> <p> 
+       New: The <code>Threads::n_existing_threads</code>
+       function returns the present number of existing threads, allowing an
+       assessment whether it is useful to spawn new threads or rather perform
+       the operation in question sequentially.
+       <br>
+       (WB 2002/09/08)
+       </p>
+
+  <li> <p> New: Global exception class <code
+       class="class">ExcIteratorPastEnd</code>, which should be used if an
+       iterator is incremented or decremented beyond its end value.
+       <br>
+       (GK 2002/09/08)
+       </p>
+
+  <li> <p> 
+       Extended: Previously, the <code
+       class="class">Threads::PosixThreadBarrier</code>
+       class could not be used at all (they threw exceptions), if your system
+       did not have the respective POSIX functions. This restriction is lifted
+       for the special case that you give one as the number of parties that
+       will be waiting for the barrier: in this case, a barrier is a
+       no-operation, so we do not need assistence from the operating
+       system. This change makes it slightly simpler to write programs in a way
+       such that they run in both single- and multithreaded environments.
+       <br>
+       (WB 2002/09/05)
+       </p>
+
+  <li> <p> 
+       New: The old class <code>vector2d</code>, implementing a
+       two-dimensional array of objects is now gone. Instead, there is the new
+       <code>Table</code> class that implements tables of
+       arbitrary dimension. Transition is painless: where there was
+       <code>vector2d<type></code> before, use
+       <code>Table<2,type></code> now (and don't forget to update the
+       name of the header file, of course). If you have a three-dimensional
+       array, you can use <code>Table<3,type></code> now.
+       <br>
+       (WB 2002/09/03)
+       </p>
+
+  <li> <p> 
+       New: There are now functions returning the transpose of <code
+       class="class">Tensor</code> objects of rank 2.
+       <br>
+       (WB 2002/08/31)
+       </p>
+
+  <li> <p> 
+       New: Row accessors for the <code>vector2d</code>
+       class now have a member function <code
+       class="member">size</code> that returns the size of the row,
+       i.e. the number of columns of the table.
+       <br>
+       (WB 2002/07/24)
+       </p>
+
+  <li> <p> 
+       Fixed: In EPS output, colors were set to invalid values if the
+       values of the field that is used for coloring are all
+       equal. This happens, for example, in the very first time step
+       of time dependent problems, if initial values are zero. The
+       color value now used is arbitrary, but valid.
+       <br>
+       (WB 2002/07/24)
+       </p>
+
+  <li> <p> 
+       Changed: To save disk space, color values in EPS output are
+       written as grey scale with only one value instead of three RGB
+       values if the color so represented is actually a grey scale.
+       <br>
+       (WB 2002/07/24)
+       </p>
+
+  <li> <p> 
+       New: There are now operators to allow multiplication and
+       division of <code>Tensor</code> objects by scalar
+       factors.
+       <br>
+       (WB 2002/06/07)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p> New: Function 
+       <code>BlockSparseMatrix::print_formatted</code>
+       added, similar to the 
+       <code>BlockVector::print_formatted</code> function.
+       <br>
+       (Brian Carnes 2003/06/02)
+       </p> 
+
+  <li> <p> New: Functions <code>SparseMatrix::operator *=</code>
+       and <code>SparseMatrix::operator /=</code> as well as 
+       <code>BlockSparseMatrix::operator *=</code>
+       and <code>BlockSparseMatrix::operator /=</code> 
+       are added.
+       <br>
+       (Brian Carnes 2003/06/02)
+       </p> 
+
+  <li> <p> Deprecated: The functions <code>Vector::scale</code>
+       and <code>BlockVector::scale</code> are now deprecated
+       and will be removed in a future version. Use <code>operator*=</code> and
+       <code>operator/=</code> instead.
+       <br>
+       (WB 2003/05/31)
+       </p> 
+
+  <li> <p> New: <code>Vector</code>, <code
+       class="class">BlockVector</code> and
+       <code>FullMatrix</code> now have <code>operator/=</code>
+       for scaling by a scalar.
+       <br>
+       (WB 2003/05/31)
+       </p> 
+
+  <li> <p> New: <code>PointerMatrix</code> now
+       has <code>empty()</code> function, which returns true if the pointer is
+       null; otherwise we call the pointer's <code>empty()</code> function.
+       This requires the class MATRIX to have an <code>empty()</code>
+       function.
+       <br>
+       (Brian Carnes 2003/05/22)
+       </p>
+
+  <li> <p> New: <code>SparseLUDecomposition</code> now
+       has <code>empty()</code> function, which calls the inherited <code
+       class="class">SparseMatrix</code> <code>empty()</code> function.
+       <br>
+       (Brian Carnes 2003/05/22)
+       </p>
+
+  <li> <p> New: <code>PreconditionPSOR</code> implements
+       interface to permuted SOR preconditioner function in <code
+       class="class">SparseMatrix</code>.
+       <br>
+       (GK 2003/05/12)
+       </p>
+
+  <li> <p> Improved: <code>FullMatrix</code>::<code
+       class="member">fill</code> now copies the largest possible block,
+       whether the destination or source matrix is bigger. Additionally, an
+       offset inside the source matrix may be specified.
+       <br>
+       (GK 2003/03/31)
+       </p>
+
+  <li> <p>
+       New/Changed: The <code>SparseILU</code>, <code
+       class="class">SparseMIC</code> and <code
+       class="class">SparseLUDecomposition</code> now use the same
+       interface (<code>initialize</code>, <code
+       class="class">vmult</code>, <code>clear</code>)
+       as all <code>PreconditionBlock</code> classes. In
+       virtue of an unified preconditioner interface it is now
+       recommended to use the new methods. The old methods (<code
+       class="class">reinit</code>, <code
+       class="class">decompose</code>, <code
+       class="class">apply_decomposition</code>) are now deprecated,
+       and will be removed in a later version.
+       <br>
+       (RH 2003/02/19)
+       </p>
+
+  <li> <p>
+       Changed: The <code>BlockVector</code> accessor classes
+       have been moved to a namespace <code
+       class="class">internal</code>. Since these classes are not (or should
+       not be) used directly in applications, this should not change
+       compatibility. However, they will now no longer show up in the class
+       overview of the documentation, which they were cluttering up.
+       <br>
+       (WB 2003/02/13)
+       </p>
+
+  <li> <p> 
+       New: The <code>SolverGMRES</code> now accepts an
+       <code>AdditionalData</code> parameter
+       <code>use_default_residual</code> whose default is
+       <code>true</code>. By setting this flag to <code>false</code>,
+       the stopping criterion of the left-preconditioned GMRes solver
+       is not the default preconditioned residual but the normal
+       (unpreconditioned) residual and visa versa for the
+       right-preconditioned GMRes solver. Due to a performance loss of
+       the solver this flag should be set to <code>false</code> only
+       for debugging/testing purposes.
+       <br>
+       (RH 2003/01/31)
+       </p>
+
+  <li> <p> 
+       New: <code>FullMatrix</code> has a function <code
+       class="member">copy_from</code>, copying from sparse matrices.
+       It uses iterators of the sparse matrix classes.
+       <br>
+       (GK 2003/01/08)
+       </p>
+
+
+  <li><p>
+       Changed: In an attempt to unify the use of preconditioners a
+       little, the function <code>initialize</code> of
+       classes <code>PreconditionRelaxation</code> and
+       <code>PreconditionBlock</code> take an argument
+       of type <code>AdditionalData</code>, defined in
+       the same class.  Standard behavior of <code
+       class="class">PreconditionBlock</code> has been changed on this
+       occasion to invert diagonal blocks during initialization.
+       <br>
+       (GK 2003/01/06)
+       </p>
+
+  <li> <p>
+       New: The interface for sparse decompositions has been abstracted, and
+       there is now an Modified Incomplete Cholesky (MIC) decomposition in
+       addition to the Incomplete LU (ILU) decomposition.
+       <br>
+       (Stephen Kolaroff 2002/11/27)
+       </p>
+ 
+  <li> <p>
+       Changed: In multithread mode, the <code
+       class="class">SparseMatrix</code> would spawn
+       <code>multithread_info.n_default_threads</code> threads to
+       perform matrix-vector multiplications and similar
+       operations. It would even do so if
+       <code>multithread_info.n_default_threads</code> was equal to
+       one. In that case, we now do the operation on the thread we are
+       presently on, eliminating the overhead of spawning a single
+       thread, and later waiting and terminating it.
+       <br>
+       (WB 2002/11/13)
+       </p>
+ 
+  <li> <p>
+       Fixed: In the <code>SparseDirectMA27</code> class, wrapping 
+       the MA27 solver written in Fortran77 into some structure amenable to C++,
+       we wrote beyond the end of an array on rare inputs. This is now fixed. The
+       same probably holds for the respective code for MA47.
+       <br>
+       (WB 2002/09/30)
+       </p>
+
+  <li> <p>
+       New: Since the MA27 sparse direct solver uses Fortran common blocks, it
+       was previously impossible to run several instances of this solver in
+       parallel, in a multihreaded environment. To solve this problem, the
+       <code>SparseDirectMA27</code> class now has a detached
+       mode, in which it forks off a separate program that will do the
+       computations using this solver. The actual operations are therefore
+       distributed to distint programs that have separate address spaces. This
+       allows to have as many concurrent instances of this solver in parallel
+       as you want. For more information, read the documentation of the 
+       <code>SparseDirectMA27</code> class.
+       <br>
+       (WB 2002/09/25)
+       </p>
+
+  <li> <p> Changed: The classes <code
+       class="class">PreconditionBlock</code>, <code
+       class="class">PreconditionBlockJacobi</code>, <code
+       class="class">PreconditionBlockSOR</code>, and <code
+       class="class">PreconditionBlockSSOR</code> have changed their
+       template signature. The first template argument is now the matrix
+       type, not just a number type.
+       <br>
+       (GK 2002/09/18)
+       </p>
+
+  <li> <p> New: Class <code>BlockVector</code> has a
+       function <code>collect_sizes()</code>, very much as
+       <code>BlockSparsityPattern</code>. This allows
+       updating internal structures after blocks have been resized.
+       <br>
+       (GK 2002/09/17)
+       </p>
+
+  <li> <p> New: Class <code>SparseMatrix</code> has an
+       STL-conforming <code>const_iterator</code> and
+       functions <code>begin()</code> and <code
+       class="member">end()</code> for looping through all existing
+       entries. Furthermore, <code>begin(row)</code> and
+       <code>end(row)</code> allow looping through all
+       entries of a single line.
+       <br>
+       (GK 2002/09/11)
+       </p>
+
+  <li> <p>
+       New: Classes <code>SparsityPattern</code> and <code
+       class="class">SparseMatrix</code> now have functions <code 
+       class="member">block_write/block_read</code>, allowing to dump the data
+       of these objects into a file in binary format, and later to re-read it
+       without much need for parsing.
+       <br>
+       (WB 2002/09/09)
+       </p>
+
+  <li> <p>
+       New: <code>Vector</code> has a function <code
+       class="member">lp_norm</code>, computing the <i>l<sub>p</sub></i>-norm
+       of a vector for arbitrary <i>p</i>.
+       <br>
+       (GK 2002/08/13)
+       </p>
+
+  <li> <p> 
+       New: a way of using abstract base classes for matrices has
+       been implemented with <code>PointerMatrixBase</code>
+       and <code>PointerMatrix</code>. Storing a matrix in
+       <code>PointerMatrix</code> allows to use the base
+       class in functions only templated for the vector class.
+       <br>
+       (GK 2002/07/18)
+  </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p>
+       Fixed: The restriction matrices for the Q1 element in 1d had a
+       trivial bug in that one element was not set. Due to the fact
+       that contributions from all child cells are taken into account,
+       this did no harm, though, since all computations were done
+       correctly anyway.
+       <br>
+       (WB 2003/05/06)
+       </p>
+
+  <li> <p>
+       New: The <code>GeometryInfo</code> classes now
+       have new static member functions <code
+       class="member">child_cell_from_point</code> that, given a point
+       in the unit cell, returns which child cell it is on; <code
+       class="member">cell_to_child_coordinates</code> that transforms
+       coordinates between the unit coordinate systems from the mother
+       to the child cell; <code
+       class="member">child_to_cell_coordinates</code> that does
+       exactly the opposite; and <code
+       class="member">is_inside_unit_cell</code> that tells whether a 
+       given point is inside the unit cell.
+       <br>
+       (WB 2003/05/01)
+       </p>
+
+  <li> <p>
+       New: There are now functions <code
+       class="member">recursively_set/clear_user_pointer</code> that
+       do much the same as  <code
+       class="member">recursively_set/clear_user_flag</code> on a
+       line/quad/hex and all of its descendants, but on user pointers
+       rather than flags.
+       <br>
+       (WB 2003/04/11)
+       </p>
+
+  <li> <p>
+       New: The functions in the <code
+       class="class">DerivativeApproximation</code> class can now also
+       work on <code>BlockVector</code>.
+       <br>
+       (WB 2003/04/11)
+       </p>
+
+  <li> <p>
+       Changed: The cell argument to <code
+       class="member">Mapping::transform_unit_to_real_cell</code> (and its
+       reverse function) is now passed by reference, rather than by value, for
+       efficiency reasons.
+       <br>
+       (WB 2003/04/06)
+       </p>
+
+  <li> <p>
+       New: The <code
+       class="member">GridGenerator::subdivided_hyper_cube</code> generated a
+       hypercube as coarse grid that is subdivided a given number of times.
+       <br>
+       (WB 2003/04/05)
+       </p>
+
+  <li> <p>
+       New: The <code>DataOut_DoFData::merge_patches</code>
+       function allows to merge the patches of two objects, so as to create one
+       output file from several DoF handlers. This is useful if one uses a
+       domain decomposition algorithm where each block of the domain is
+       represented by one DoF handler.
+       <br>
+       (WB 2003/04/05)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>DataOutStack</code> class had a problem
+       when there were as many degrees of freedom as there were cells (i.e. if
+       we were using DG0 elements). This should now be fixed.
+       <br>
+       (WB 2003/03/02)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>DataOutFaces</code> class was
+       broken for cell data. It should now be correct, although the
+       algorithm used is not optimal, being approximately quadratic in
+       runtime.
+       <br>
+       (WB 2003/02/25)
+       </p>
+
+  <li> <p>
+       New: The <code>ConstraintMatrix::shift</code>
+       function shifts and translates the elements of the constraint
+       matrix by a certain number of indices.
+       <br>
+       (Roy Stogner 2003/02/24)
+       </p>
+
+  <li> <p>
+       New: The <code>GridReordering</code> class now uses a
+       vastly better algorithm in 2d than previously. The new algorithm is
+       linear in time, where it could be exponential before.
+       <br>
+       (Michael Anderson 2003/02/21)
+       </p>
+
+  <li> <p>
+       New: There is now a function <code
+       class="member">GridIn::read_xda</code> that allows reading
+       grids from a file in XDA format.
+       <br>
+       (WB 2003/02/21)
+       </p>
+
+  <li> <p> 
+       Changed: Some implementation details of the <code
+       class="class">GridReordering</code> class have been moved to a
+       namespace <code>internal</code>.
+       <br>
+       (WB 2003/02/19)
+       </p>
+
+  <li> <p>
+       New: There are now functions <code
+       class="member">FiniteElement::prolongation_is_implemented</code> and
+       <code>FiniteElement::constraints_are_implemented</code>
+       that inform the caller about whether the finite element in question in
+       fact implements its prolongation matrices and hanging node constraints.
+       <br>
+       (WB 2003/02/15)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">ConstraintMatrix::is_identity_constrained</code> function
+       would previously generate a segmentation fault if called on a constraint
+       matrix object that did not contain any constraints at all. This is now
+       fixed.
+       <br>
+       (WB 2003/02/15)
+       </p>
+
+  <li> <p>
+       Fixed: Objects of type <code>FESystem</code> 
+       could not be constructed if one of the elements it is to be composed of
+       did not implement interface constraints for hanging nodes. This is now
+       fixed: you can construct such a composed element, but it does not
+       implement hanging node constraints either.
+       <br>
+       (WB 2003/02/15)
+       </p>
+
+  <li> <p>
+       New: For each of the renumbering functions in the <code
+       class="class">DoFRenumbering</code> class there is now an
+       additional <code>compute_*</code>
+       function. These new functions compute and return the
+       corresponding renumbering vectors but do not perform the actual
+       renumbering on the <code>DoFHandler</code>
+       object. The behaviour of the old functions is not changed.
+       <br>
+       (RH 2003/02/03)
+       </p>
+
+  <li> <p> 
+       Fixed: The <code>GridReordering</code> tried to be
+       thread-safe in the initialization of some data, but was not due to a
+       typo. This is now fixed.
+       <br>
+       (WB 2003/01/28)
+       </p>
+
+  <li> <p> 
+       Changed: The <code>FEValues::get_cell</code> and
+       <code>FEValues::get_face</code> functions have
+       been removed, since they limited our ability to use this
+       class for other types of DoFHandlers, for example future
+       extensions for hp elements.
+       <br>
+       (WB 2003/01/17)
+       </p>
+
+  <li> <p> 
+       New: The DoF accessor classes now have a function <code
+       class="member">get_fe()</code> that returns a reference to the finite
+       element being used on this cell. The result is of course identical to
+       what a call to <code
+       class="member">iterator->get_dof_handler().get_fe()</code> would have
+       yielded.
+       <br>
+       (WB 2003/01/10)
+       </p>
+
+  <li> <p> 
+       New: Checked in new <code>GridGenerator</code> 
+       member function <code>half_hyper_ball</code>,
+       derived from member <code>hyper_ball</code>.     
+       The initial mesh contains four elements. This mesh will work with
+       the boundary class <code>HalfHyperBallBoundary</code>.
+       <br>
+       (Brian Carnes 2002/12/16)
+       </p>
+
+  <li> <p> 
+       New: Checked in new class <code>FE_Q_Hierarchical</code>
+       derived from class <code>FiniteElement</code>.  
+       This element is analogous to <code>FE_Q</code>, but 
+       makes use of hierarchical shape functions, based on the 
+       <code>Polynomials::Hierarchical</code> class. 
+       For <code>degree>1</code>, the non-nodal basis functions are "bubble"
+       functions, which are not Lagrange polynomials. Therefore, the usual
+       interpolation based on using unit support points will not work for
+       <code>degree>1</code>. It is planned to implement a different
+       interpolation-projection operator, based on an hp-type interpolant.
+       <br>
+       The files for this element are
+       <code>deal.II/include/fe/fe_q_hierarchical.h</code> and 
+       <code>deal.II/source/fe/fe_q_hierarchical.cc</code>.
+       <br>
+       (Brian Carnes 2002/12/13)
+       </p>
+
+  <li> <p> 
+       New: For finite element classes, the functions
+       <code>unit_support_point</code> and
+       <code>unit_face_support_point</code> return the position
+       of an individual support point. This is necessary when you want to get
+       information about the support points of certain components in a composed
+       finite element, where not all components provide support points, and the
+       composed element thus does not fill the array the 
+       <code>get_unit_support_points</code> function returns.
+       <br>
+       (WB 2002/11/23)
+       </p>
+
+  <li> <p> 
+       Fixed: Vectors could not be given as input and output vectors to the
+       <code>SolutionTransfer</code> class at the same time, but
+       this was not checked. An assertion has now been added to ensure this
+       requirement. 
+       <br>
+       (WB 2002/10/28)
+       </p>
+
+  <li> <p> 
+       Fixed: The <code
+       class="member">DoFRenumbering::component_wise</code> function accepts a
+       parameter indicating the order in which the degrees of freedom
+       corresponding to one vector component are to be sorted. However, it did
+       not honor this order, but always sorted them in the order in which the
+       components appear. This is now fixed.
+       <br>
+       (WB 2002/10/17)
+       </p>
+
+  <li> <p> 
+       New: The <code
+       class="member">FiniteElement::system_to_base_index</code> function now
+       exports the values of the <code
+       class="member">FiniteElement::system_to_base_table</code> member
+       variable. Likewise for the indices on faces.
+       <br>
+       (WB 2002/10/17)
+       </p>
+
+  <li> <p> 
+       New: The <code
+       class="member">FiniteElement::element_multiplicity</code> function was
+       previously only available in the <code>FESystem</code>
+       class, where it actually returned a non-trivial value. However, in some
+       cases one would need to access this field also for general finite
+       elements, even if the returned value will be equal to one in all cases
+       other than a composed element.
+       <br>
+       (WB 2002/10/17)
+       </p>
+
+  <li> <p> 
+       Fixed: The algorithm to generate neighbor information for patches from
+       cells in <code>DataOut::build_patches</code> was rather
+       inefficient, tripling the time for patch generation when support for
+       neighbor information was added. Furthermore, the algorithm was at least
+       O(N log N), where the rest was all O(N), making this particularly
+       problematic when the data set was already large. This should now be back
+       to the previous level, by using a more efficient algorithm.
+       <br>
+       (WB 2002/10/10)
+       </p>
+
+  <li> <p> 
+       Changed (internals): Previously, the finite element base class
+       initialized the restriction, prolongation, and face constraints matrices
+       to their correct size. Derived classes had to fill these classes, and
+       should have set their size back to zero in case they chose not to
+       implement them. However, we found a class that forgot to resize it to
+       zero, so it is now the other way round: they remain at size zero, and a
+       class that chooses to implement these matrices has to set them to the
+       correct size, to avoid programs that run on data that as just been
+       forgotten to add. (This information only concerns programs that
+       implement some finite element class on their own.)
+       <br>
+       (WB 2002/09/28)
+       </p>
+
+  <li> <p>Improved: The different transfer functions in <code
+       class="class">FETools</code> operate on template vector arguments.
+       <br>
+       (GK 2002/09/24)
+       </p>
+
+  <li> <p> New: the class <code
+       class="class">FE_DGPNonparametric</code> implements finite elements
+       where shape functions are polynomials of order <i>k</i> on the
+       actual grid cell. This is achieved by evaluating the polynomials at
+       the mapped quadrature points. No grid transfer matrices are
+       available for this class.
+       <br>
+       (GK 2002/09/19)
+       </p>
+
+  <li> <p> 
+       Fixed: Some of the various instances of the <code
+       class="member">VectorTools::interpolate_boundary_values</code> functions
+       were not explicitly instantiated, leading to linker errors. This is now
+       fixed. 
+       <br>
+       (WB 2002/09/19)
+       </p>
+
+  <li> <p> 
+       Removed: The <code
+       class="member">FiniteElement::component_to_system_index</code> function
+       and its counterpart for faces is gone. This data did not make much sense
+       in the case of elements that are non-zero in more than one vector
+       component, such as the Nedelec element. The respective information can
+       also be obtained from other sources provided by the finite element
+       classes, if so necessary.
+       <br>
+       (WB 2002/09/16)
+       </p>
+
+  <li> <p> 
+       Changed: The <code
+       class="member">FiniteElement::restriction_is_additive</code> function
+       used to take an argument that denoted the vector component of a finite
+       element. This has become difficult with elements that are non-zero in
+       more than one vector component, such as the Nedelec element. Thus, the
+       semantics of the function have been changed so that the argument now
+       denotes the index of a shape function, rather than a vector
+       component. Since the function is probably not used in application code,
+       this will most probably not lead to more serious problems.
+       <br>
+       (WB 2002/09/16)
+       </p>
+
+  <li> <p> 
+       New: The mapping classes now also know how to transform tensors of rank
+       2, not only vectors (tensors of rank 1) in a co- and contravariant way.
+       <br>
+       (WB 2002/09/03)
+       </p>
+
+  <li> <p> 
+       Fixed: the <code>GridIn</code> class had problems
+       when reading in UCD grids with comment lines that contained
+       only the comment sign, but nothing else. This is now fixed.
+       <br>
+       (WB 2002/08/30)
+       </p>
+
+  <li> <p> 
+       Improved: <code>VectorTools</code>::<code
+       class="member">integrate_difference</code> can compute <i>L<sup>p</sup></i>
+       and <i>W<sup>1,p</sup></i> norms for arbitrary <i>p</i>. The function
+       receives an additional optional argument <i>p</i> for this. All previous
+       fuctionality remains unchanged, although the code has been cleaned up a bit.
+       <br>
+       (GK 2002/08/01)
+       </p>
+
+  <li> <p> New: The <code>GridTools</code> class now
+       offers functions to apply general transformations to all
+       vertices of a triangulation, and also more specialized
+       functions that shift, scale, and rotate triangulations.
+       <br>
+       (RH 2002/07/25)
+       </p>
+
+  <li> <p> New: The existing <code
+       class="member">FETools::extrapolate</code> functions does not
+       take into account hanging node constraints. Therefore, it works
+       for continuous elements on globally refined grids and on
+       discontinuous elements, only. Now, there is a new <code
+       class="member">FETools::extrapolate</code> function with an
+       additional <code>ConstraintMatrix</code> argument
+       for the hanging node constraints. This new function works for
+       continuous as well as for discontinuous elements. But, the old
+       function is still supported.
+       <br>
+       (RH 2002/06/17)
+       </p>
+
+  <li> <p> New: There are now <code
+       class="member">Triangulation::save/load_user_pointers(vector<void
+       *> &)</code> functions similar to the respective <code
+       class="member">Triangulation::save/load_user_flags(vector<bool>
+       &)</code> functions.
+       <br>
+       (RH 2002/06/14)
+       </p>
+
+  <li> <p> Fixed: Bug in <code
+       class="member">Triangulation<3>::load_user_flags(vector<bool>
+       &)</code> is now fixed.
+       <br>
+       (RH 2002/06/14)
+       </p>
+
+  <li> <p> Fixed: <code
+       class="member">Triangulation::load_user_flags(vector<bool>
+       &)</code> erroneously threw an assertion for
+       <code>dim==1</code>. This is now fixed.
+       <br>
+       (RH 2002/06/14)
+       </p>
+
+  <li> <p> Changed: <code
+       class="member">Triangulation<1>::n_quads</code> now returns 0,
+       instead of throwing an assertion, as before. The same holds for
+       similar functions like <code
+       class="member">Triangulation<dim>::n_hexs</code> that now
+       returns 0 for <code>dim<3</code>.
+       <br>
+       (RH 2002/06/14)
+       </p>
+
+  <li> <p> Improved: Several functions like the <code
+       class="member">DoFHandler::distribute_dofs</code> and <code
+       class="member">DoFTools::make_flux_sparsity_pattern</code>
+       functions altered the <code
+       class="member">user_flags</code>. This was stated in the
+       documentation of these functions. Nevertheless, it might have
+       led to unexpected behaviour of the <code
+       class="member">user_flags</code> for users who weren't aware of
+       this <em>side-effect</em>. Now, these functions do not alter
+       the <code>user_flags</code>, any
+       more. Consequently, the users do not need to worry any more
+       about the reliability of the <code
+       class="member">user_flags</code> when calling any function of
+       the library.
+       <br>
+       (RH 2002/06/14)
+       </p>  
+
+  <li> <p> Fixed: <code
+       class="member">FE_Q::has_support_on_face</code> always returned
+       true in 1d, partly because faces are not really an issue in
+       1d. It now does so only when the support point of the
+       respective shape function is actually the requested vertex. The
+       same applied to <code
+       class="member">FE_DGQ::has_support_on_face</code>
+       <br>
+       (WB 2002/06/13)
+       </p>
+
+  <li> <p> New: The existing <code
+       class="member">FETools::interpolate</code>, <code
+       class="member">FETools::back_interpolate</code> and <code
+       class="member">FETools::interpolate_difference</code> functions
+       do not take into account hanging node constraints. Therefore,
+       they work for continuous elements on globally refined grids and
+       on discontinuous elements, only. Now, there are new functions
+       with the same names but additional <code
+       class="class">ConstraintMatrix</code> arguments for the hanging
+       node constraints. These new functions work for continuous as
+       well as for discontinuous elements. But, the old functions are
+       still supported.
+       <br>
+       (RH 2002/06/13)
+       </p>
+
+  <li> <p> Changed: The constructor of <code
+       class="class">DoFHandler</code> now takes a reference to a
+       <code>const</code> <code>Triangulation</code>.
+       <br>
+       (RH 2002/06/12)
+       </p>
+
+  <li> <p> Changed: The constructors of all <code
+       class="class">DoFAccessor</code>, <code
+       class="class">TriaAccessor</code> and <code
+       class="class">TriaIterator</code> classes now take pointers to
+       <tt>const</tt> <code>Triangulation</code>s.
+       <br>
+       (RH 2002/06/12)
+       </p>
+
+  <li> <p> Fixed: In debug mode the <code
+       class="member">MappingQ1::transform_real_to_unit_cell</code>
+       function erroneously threw an assertion when used in 1 or 3
+       dimensions. This is now fixed.
+       <br>
+       (RH 2002/06/10)
+       </p>
+
+  <li> <p> 
+       Fixed: The <code>get_dof_indices</code>
+       functions of DoF accessor classes used to work also for
+       non-active cells. However, the results were bogus except for
+       the special case that we had a finite element that has all its
+       degrees of freedom located in vertices. This is now fixed: the
+       function throws an exception in all other cases, since there is
+       no useful meaning for it then. It continues to work in the
+       special case.
+       <br>
+       (WB 2002/06/08)
+       </p>
+
+  <li> <p> 
+       New: For encapsulated postscript output of 2d grids, it is now
+       possible to tell the <code>GridOut</code> class to
+       write the cell numbers into each cell, as well as the numbers
+       of the vertices.
+       <br>
+       (WB 2002/06/04)
+       </p>
+</ol>
+
+
+*/
diff --git a/doc/news/4.0.0-vs-5.0.0.h b/doc/news/4.0.0-vs-5.0.0.h
new file mode 100644
index 0000000..b8252f8
--- /dev/null
+++ b/doc/news/4.0.0-vs-5.0.0.h
@@ -0,0 +1,872 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_4_0_and_5_0 Changes between Version 4.0 and 5.0
+ 
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+
+<ol>
+  <li> <p> Removed: All the matrix classes have functions <code>reinit</code> that are used to resize the
+       matrix. However, the sparse matrix classes had an equally named
+       function without arguments that basically left the size of the matrix
+       unchanged but set all elements of the matrix to zero. It could also be
+       abused to notify the matrix that the sparsity pattern on which it is
+       built has changed, an inherently risky procedure. The no-argument code
+       <code>reinit</code> function has therefore been removed to
+       avoid confusion with the <code>reinit</code> functions
+       that take arguments. Instead, you should now use <code>matrix=0</code> to simply set all elements of the
+       matrix to zero. If you want to notify a sparse matrix that its sparsity
+       pattern has changed, use the <code>reinit(SparsityPattern)</code> function.
+       <br> 
+       (WB 2004/05/10)
+       </p>
+
+  <li> <p> Removed: All the vector and block vector classes as well as
+       the <code>FullMatrix</code> class (the latter
+       through its <code>Table</code> base class) had a
+       member function <code>clear</code> which simply
+       resets all values of the vector or matrix to zero. It did not
+       change the size of the object, though. This was confusing,
+       since the standard C++ container classes implement the
+       semantics that the <code>clear</code> functions
+       delete all entries of the containers, i.e. resize it to zero,
+       and we implemented similar semantics also for the <code>SparseMatrix</code>, <code>DoFHandler</code>, <code>ConstraintMatrix</code> and various other
+       classes.
+       <br>
+       To avoid this confusion in the future, the <code>clear</code> functions have been dropped from
+       the interface of the vector and full matrix classes, and the
+       remaining instances where deal.II classes have a function of
+       that name really mean that the object is reset to its virginal
+       state. To set all
+       elements of a matrix or vector to zero without changing its size, the
+       recommended way is to use the more obvious notation <code>vector=0</code> and <code>matrix=0</code>. To
+       reset the elements of a table over arbitrary objects, use
+       <code>Table<T>::reset_values()</code>.
+       <br> 
+       (WB 2004/05/10)
+       </p>
+
+  <li> <p> Removed: The <code>SparseLUDecomposition::reinit</code> and <code>SparseMIC::reinit</code> functions without
+       argument had been deprecated before, and have now been removed.
+       <br> 
+       (WB 2004/05/10)
+       </p>
+
+  <li> <p> Changed: the template parameter of <code>MGTransferPrebuilt</code> is now the complete vector
+       type, not just the value type of the vector. This allows to operate
+       on <code>Vector</code> as well as on <code
+       class="class">BlockVector</code>. Unfortunately, the untested class
+       <code>MGTransferBlock</code> underwent some more
+       changes without testing, such that it should be used with high
+       caution.
+       <br>
+       (GK 2004/04/01)
+       </p>
+
+  <li> <p> Changed: The <code>FiniteElement</code> classes had a
+       function <code>restrict</code> that returns the
+       restriction matrix from children to the mother cell. Unfortunately,
+       <code>restrict</code> has become a keyword in recent standards of the C
+       language, and some C++ compilers have picked this up. The function has
+       therefore been renamed <code
+       class="member">get_restriction_matrix</code>, which also better
+       reflects what it is actually doing. To keep consistent, we have also
+       rename the corresponding function <code>prolongate</code> to <code>get_prolongation_matrix</code>. 
+       <br>
+       (WB 2004/02/29)
+       </p>
+
+  <li> <p>
+       Fixed and changed: The <code>SolutionTransfer</code><code>::(refine_)interpolate(const Vector &in, Vector
+       &out)</code> functions now require the <code>in</code> and <code>out</code>
+       vectors being already of right sizes,
+       i.e. <code>in.size()=n_dofs_old</code> and
+       <code>out.size()=n_dofs_refined</code>. Furthermore, the <code>SolutionTransfer</code><code>::(refine_)interpolate(const
+       vector<Vector> &all_in, vector<Vector>
+       &all_out)</code> now check that the number of in and output
+       vectors are the same, i.e.
+       <code>all_in.size()=all_out.size()</code>.
+       <br>
+       (RH 2003/10/24)
+       </p>
+
+  <li> <p>
+       Changed: The <code>QProjector</code> has functions that
+       project a lower-dimensional quadrature formula onto all faces or
+       subfaces of a cell. In 3d, it now does this but also adds projections of
+       these quadrature formula onto the faces from the other side. We need
+       this due to the fact that we now support faces in 3d that have a normal
+       vector opposite to the standard direction.
+       <br> 
+       (WB 2003/10/17)
+       </p>
+
+  <li> <p> Moved and changed: The header file
+       <tt>include/numerics/dof_renumbering.h</tt> has been moved to the 
+       directory <tt>include/dofs</tt>, where it logically
+       belongs. Furthermore, the sorting parameter of the function <code>DoFRenumbering</code><code>::component_wise</code> has changed its meaning. See
+       the reference documentation for details.
+       <br>
+       (GK 2003/07/03)
+       </p>
+</ol>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+
+  <li> <p> New: After the documentation tool for deal.II has been
+  changed to <a href="http://www.doxygen.org">Doxygen</a>, it is delivered in two
+  tar-files. Additional to the traditional source tarball, the preprocessed
+  documentation is available ready for reading with a web browser.
+  <br>
+  (GK 2004/05/04)
+  </p>
+
+  <li> <p> New:
+       The step-15 example is about solving a nonlinear 1d problem, and
+       dealing with transporting a solution across mesh refinement. Step-16 is
+       still not finished.
+       <br>
+       (WB 2004/04/17)
+       </p>
+
+  <li> <p> New:
+       The step-17 example program shows how to use the new PETSc wrapper
+       classes, and how to do large-scale computations with up to many
+       millions of unknowns on a cluster of computers. This program shows
+       that deal.II is well-suited for truly massive parallel
+       computations. The step-15 and step-16 programs have not yet been
+       finished (despite having been started long before step-17), which
+       explains the holes in the numbering of example programs.
+       <br>
+       (WB 2004/04/12)
+       </p>
+
+  <li> <p> New: deal.II is now able to interface to the 
+       <a href="http://www-users.cs.umn.edu/~karypis/metis/index.html"
+       target="_top">METIS</a> library to generate domain partitionings. This
+       is enabled if a METIS installation is detected, which either happens
+       automatically in <code>./configure</code>, or
+       by passing a value to the switch <code>--with-metis</code> to configure
+       the path of a METIS installation. For more information see the README
+       file. 
+       <br>
+       (WB 2004/03/08)
+       </p>
+
+  <li> <p>
+       New: We now support MIPSpro compiler version 7.41 from SGI. deal.II
+       now runs on IRIX64 machines in 64-bit mode.
+       <br>
+       Please note, that we cannot support earlier MIPSpro compilers
+       because the compiler version 7.3 was not C++ standard
+       conforming. Version 7.4 is standard conforming but still too
+       buggy.
+       <br>
+       (RH 2004/03/08)
+       </p>
+
+  <li> <p> New: deal.II now comes with a complete set of
+       wrappers classes for <a href="http://www.mcs.anl.gov/petsc/"
+       target="_top">PETSc</a> vectors, matrices, linear solvers and 
+       preconditioners. Many of the algorithms in deal.II have also been
+       updated to make use of these wrappers. All of this is only enabled if a
+       PETSc installation is detected. This either happens automatically in
+       <code>./configure</code>, or
+       by passing values to the switches <code>--with-petsc</code> and
+       <code>--with-petsc-arch</code> to configure path and architecture of an
+       existing PETSc installation. If these switches are not used, then
+       environment variables are searched for this information. For more
+       information see the README file.
+       <br>
+       (WB 2004/03/01)
+       </p>
+
+  <li> <p>
+       Changed: The part of the boost library in the <tt>contrib</tt>
+       directory is now updated to boost version 1-30.2. We include
+       only a minimal part (about 3% in size) of boost which is needed
+       to compile deal.II. For the case the compilation of deal.II on
+       some compiler/platforms requires boost files in addition to
+       those included in the <tt>contrib</tt> directory please report
+       the missing boost files to the deal.II mailing list.
+       <br> 
+       (RH 2004/02/16)
+       </p>
+
+  <li> <p>
+       Changed: We don't support compilation by Intel'c icc compiler version 5
+       anymore. Since the present version of this compiler is 8, this is
+       probably not a real problem.
+       <br> 
+       (WB 2003/12/20)
+       </p>
+
+  <li> <p>
+       Fixed: <code>step-9</code> had the computation of the value of the
+       solution in the mid-point of the neighbor cell wrong. This is now
+       fixed. Consequently, the resulting mesh also looks much nicer now (much
+       smoother). 
+       <br> 
+       (<a href="mailto:werner.scholz at tuwien.ac.at">Werner Scholz</a>
+        2003/12/11)
+       </p>
+
+  <li> <p>
+       New: The <code>config.h</code> file now declares a variable <code
+       class="member">deal_II_numbers::invalid_unsigned_int</code>.
+       It is a representation of the largest number that can be put into an
+       unsigned integer. This value is widely used throughout the library as a
+       marker for an invalid unsigned integer value, such as an invalid array
+       index, an invalid array size, and the like.
+       <br> 
+       (WB 2003/10/24)
+       </p>
+
+  <li> <p>
+       Augmented: The <code
+       class="member">GeometryInfo::child_cell_on_face</code> 
+       results in a result that might not be what you expect in 3d in some
+       cases. A warning has been added to the documentation, as well as a
+       reference to the function to call instead.
+       <br> 
+       (WB 2003/10/17)
+       </p>
+
+  <li> <p> Fixed: The step-14 program had a bug in the rare case that
+       there are more CPUs in a machine than there are cells. This is
+       now fixed.
+       <br>
+       (WB 2003/09/23)
+       </p>
+
+  <li> <p> Fixed: In the step-14 example program, overly conservative
+       quadrature formulas were chosen (with 2N+1 quadrature points per space
+       dimension, where N is the degree of polynomials). This is unnecessary,
+       and now fixed.
+       <br>
+       (WB 2003/09/23)
+       </p>
+
+  <li> <p> Fixed: On AIX systems, the xlf Fortran77 compiler wasn't recognized 
+       due to the fact that when called with -v it generates many pages
+       of output, later leading to a shell error. This is now fixed.
+       <br>
+       (WB 2003/09/19)
+       </p>
+
+  <li> <p> Fixed: The elastic example program, step-8, had a bug in the way
+       the system matrix is assembled. In effect, we were solving the
+       wrong equation. This should now be fixed.
+       <br>
+       (WB 2003/09/11)
+       </p>
+
+  <li> <p> Fixed: When building with both sparse direct solver MA27 and the
+       TECPLOT libraries, the <code>detached_ma27</code> would not
+       link properly. This is now fixed.
+       <br>
+       (WB 2003/09/05)
+       </p>
+
+  <li> <p> Improved: The script that builds the dependency lists for Makefiles
+       has been rewritten in C++, since the previous perl script
+       became just too slow after the addition of parts of
+       BOOST. Using the old perl script should still work, although it
+       simply forwards to the new program. In order to use the new
+       one, simply replace the line
+       <code><pre>
+         \$(PERL) \$D/common/scripts/make_dependencies.pl ...
+       </pre></code>
+       by
+       <code><pre>
+         \$D/common/scripts/make_dependencies ...
+       </pre></code>
+       i.e. call the program directly without the perl interpreter and
+       without the file extension for a perl program.
+       <br>
+       (WB 2003/08/19)
+       </p>
+
+  <li> <p> New: First steps to a migration of the documentation from
+       <tt>kdoc</tt> to <a href="http://www.doxygen.org">Doxygen</a> have
+       been done. It can be generated after installing <a
+       href="http://www.doxygen.org">Doxygen</a> by calling <tt>make</tt>
+       in <tt>doc/doxygen</tt> and using the preliminary link page <a
+       href="../doxygen/index.html">index.html</a> in that directory.
+       <br>
+       (GK 2003/08/02)
+       </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li> <p> 
+       New: There is now a new <code>PolynomialsP</code>
+       class which is based on <code>Polynomials::Monomial</code> and <code>PolynomialSpace</code>. In contrast to the
+       default ordering of the polynomials in <code>PolynomialSpace</code>, (e.g. for degree=2) <i>1,
+       x, x<sup>2</sup>, y, xy, y<sup>2</sup></i>, the <code>PolynomialsP</code> class now gives the
+       (natural?!)  ordering <i>1, x, y, xy, x<sup>2</sup>,
+       y<sup>2</sup></i>.
+       <br>
+       (RH 2004/03/11)
+       </p>
+
+  <li> <p> 
+       New: The classes <code>PolynomialSpace</code> and
+       <code>TensorProductPolynomials</code> now have
+       new <code>set_numbering</code> functions which
+       allow to change the ordering of the polynomials. The ordering
+       and the indices of the polynomials kann be checked by using the
+       new <code>output_indices</code> functions.
+       <br>
+       (RH 2004/03/11)
+       </p>
+
+  <li> <p> New: The class <code>PolynomialsBDM</code> implements BDM polynomials in
+       two dimensions on the unit square. It was implemented as is
+       according to some urgent need, but should be suitable to be fit
+       into a <code>FiniteElement</code> similar to
+       Raviart/Thomas.
+       <br>
+       (GK 2004/01/05)
+       </p>
+
+  <li> <p> New: Objects of type <code>Polynomial</code>
+       can now be added to and subtracted from each other through
+       operators <code>+=</code> and <code>-=</code>.
+       <br>
+       (GK 2003/12/16)
+       </p>
+
+  <li> <p> New: There is now a class <code>QuadratureSelector</code> that allows to select a
+       quadrature formula based on a string argument and possibly a
+       number indicating the order of the formula.
+       <br>
+       (Ralf B. Schulz 2003/10/29)
+       </p>
+
+  <li> <p> Fixed: The constructor of the <code>QGauss</code> class
+       computed positions and weights of quadrature points in long double accuracy.
+       However, on machines where long double is the same as double, it 
+       never reached the requested accuracy, in effect leading to an infinite loop.
+       This is now fixed.
+       <br>
+       (WB 2003/09/19)
+       </p>
+
+  <li> <p> New: The <code>Function</code> class now
+       exports the value of its template argument through the static
+       member variable <code>dimension</code>.
+       <br>
+       (WB 2003/09/15)
+       </p>
+
+  <li> <p> Changed: The <code>ParameterHandler::declare_entry</code> function
+       now allows to redeclare an entry that has already been
+       declared. This can be used to override a default value
+       previously set.
+       <br>
+       (WB 2003/09/03)
+       </p>
+
+  <li> <p> Improved: The <code>ParameterHandler::declare_entry</code> function now takes an
+       additional parameter (defaulting to the empty string) that can be used
+       to document the intent of a parameter. This string, together with the
+       default value of an entry, is written to the output by the <code>ParameterHandler::print_parameters</code> function that
+       can be used to generate a virginial parameter file, or one that contains
+       the settings of all parameters used in a computation.
+       <br>
+       (WB 2003/08/13)
+       </p>
+
+  <li> <p> Changed: The <code>ParameterHandler::declare_entry</code> previously
+       returned a value indicating whether the just-declared entry didn't
+       already existed and that the default value matches the given
+       pattern. However, this value could only always be true since these two
+       conditions were already guarded by assertions in the implementation at
+       least in debug mode, so the return value was meaningless. The function
+       has now no return type any more.
+       <br>
+       (WB 2003/08/13)
+       </p>
+
+  <li> <p> Improved: <code>Logstream</code>::<code>depth_console</code>, <code>Logstream</code>::<code>depth_file</code>, <code>Logstream</code>::<code>log_execution_time</code> and <code>Logstream</code>::<code>log_time_differences</code> return the previous value.
+       <br>
+       (GK 2003/06/22)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p> Improved: The matrix-vector operations of <code>SparseMatrix</code> accept arguments of type <code>BlockVector</code>.
+  <br>
+  (GK/2004/03/31)
+  </p>
+
+  <li> <p> Fixed: The <code>SparseMatrix</code> iterator classes
+       had various interesting bugs when rows of the matrix were completely
+       empty. These should now be fixed.
+       <br>
+       (WB 2004/03/30)
+       </p>
+
+  <li> <p> New: The <code>SparsityPattern</code> class now also
+       has an iterator class that allows to walk over all nonzero entries of a
+       sparsity pattern.
+       <br>
+       (WB 2004/03/29)
+       </p>
+
+  <li> <p> New: The iterator classes into <code>SparseMatrix</code> have been rearranged and extended, so
+       that it is now also possible to write to individual matrix entries
+       through these iterators.
+       <br>
+       (WB 2004/03/29)
+       </p>
+
+  <li> <p> New: The <code>Vector</code> and <code>BlockVector</code> classes now have member functions
+       <code>is_non_negative</code> that check whether a vector
+       has no negative entries.
+       <br>
+       (WB 2004/02/29)
+       </p>
+
+  <li> <p> Fixed: The <code>SolverMinRes</code> class had a nasty bug where we were
+       inadvertently copying vectors; this could also have led to a memory
+       corruption bug. This is now fixed.
+       <br>
+       (WB 2004/02/26)
+       </p>
+
+  <li> <p> New: There is now a function <code>FullMatrix::add_scaled</code>. It replaces the old
+       function <code>FullMatrix::add</code> which did the same,
+       but had a name that was incompatible with respective functions in the
+       other matrix classes.
+       <br>
+       (WB 2004/02/23)
+       </p>
+
+  <li> <p> New: <code>FullMatrix</code> has new functions <code>add</code> and ,<code>Tadd</code>
+       allowing to add to a selected block of the matrix.
+       <br>
+       (GK 2004/02/12)
+       </p>
+
+  <li> <p> New: The <code>Vector</code> class now has operators to compare for
+       equality and inequality.
+       <br>
+       (WB 2004/02/09)
+       </p>
+
+  <li> <p> New: The <code>SparseMatrix::operator()</code> generated an assertion
+       failure if the requested entry in the matrix isn't there. This has been
+       changed so that it actually throws an exception instead, also in
+       optimized mode.
+       <br>
+       (WB 2004/02/06)
+       </p>
+
+  <li> <p> New: There is now a function <code>SparseMatrix::frobenius_norm</code> that computes the
+       Frobenius norm of a sparse matrix.
+       <br>
+       (WB 2004/02/06)
+       </p>
+
+  <li> <p> Changed: In matrix-vector operations of the <code>Full/SparseMatrix</code> classes, source and destination
+       cannot be the same. We now also check that this is indeed the case.
+       <br>
+       (WB 2004/01/26)
+       </p>
+  
+  <li> <p> Improved: Initialization routines of class <code>SparseMatrix</code> have an additional parameter
+       controlling the storage of diagonal entries.
+       <br>
+       (GK 2003/11/18)
+       </p>
+
+  <li> <p> New: 
+       <code>SolverFGMRES</code> implements the flexible
+       GMRES method with varying preconditioner from the right. It is
+       also accessible in <code>SolverSelector</code> by choosing <tt>fgmres</tt>.
+       <br>
+       (GK 2003/10/07)
+       </p>
+
+  <li> <p> Changed: The <code>SparseDirectMA27</code>
+       class used to store a pointer to the sparsity pattern of the
+       matrix. It now releases this as soon as it doesn't need it any
+       more.
+       <br>
+       (WB 2003/09/09)
+       </p>
+
+  <li> <p> New: Some of the member matrix-vector functions of the
+       <code>BlockSparseMatrix</code> class that
+       previously could only be used with arguments of type <code>BlockVector</code> can now also be used with the
+       usual <code>Vector</code> class provided the
+       block matrix has only one block row or column.
+       <br>
+       (WB 2003/09/09)
+       </p>
+
+  <li> <p> Fixed: <code>FullMatrix</code>::<code>copy_from</code> didn't compile when copying
+       from a sparse matrix. This is now fixed.
+       <br>
+       (Ralf B. Schulz 2003/09/04)
+       </p>
+
+  <li> <p> New: The classes <code>FullMatrix</code> and
+       <code>PreconditionBlockJacobi</code> have a <code>const_iterator</code>.
+       <br>
+       (GK 2003/07/18)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p> Improved: The <code>DoFTools::compute_Cuthill_McKee</code> function
+       needs to build a sparsity pattern for its operations, and uses
+       the <code>DoFHandler::max_couplings_per_dof</code>
+       function for this. However, the estimates returned by the
+       latter function are rather bad in 3d, leading to excessive
+       memory allocation in the Cuthill McKee algorithm in 3d. This is
+       fixed by using an intermediate compressed sparsity pattern
+       instead if we are in 3d.
+       <br>
+       (WB 2004/05/18)
+  </p>
+
+  <li> <p> Improved: <code>Triangulation</code> has
+       functions <code>n_faces</code> and <code>n_active_faces</code>, globally as well as by level,
+       similar to <code>n_cells</code>.
+       <br>
+       (GK 2004/05/18)
+  </p>
+
+  <li> <p>
+       New: Added support for <a href="http://www.geuz.org/gmsh/"
+       target="_top">gmsh</a> mesh format in <code>GridIn::read_msh</code>.
+       <br>
+       (Luca Heltai 2004/04/21)
+       </p>
+              
+  <li> <p>
+       New: The function <code>GridGenerator::cylinder_shell</code> generates a domain
+       of the type which the name suggests.
+       <br>
+       (WB 2004/04/19)
+       </p>
+              
+  <li> <p>
+       Changed: The <code>KellyErrorEstimator::estimate</code> function takes an
+       additional parameter that lets it only compute error indicators for a
+       certain subdomain. This is meant to allow for a better parallelization
+       of efforts in parallel programs.
+       <br>
+       (GK 2004/04/01)
+       </p>
+
+  <li> <p>
+       Changed: <code>MGTransferSelect</code> uses target components
+       correctly. Unfortunately, the untested class <code>MGTransferBlock</code> does not work anymore. Since its
+       usefulness was not clear anyway, this state may continue for a while.
+       <br>
+       (GK 2004/04/01)
+       </p>
+
+  <li> <p>
+       New: There is now a new <code>FE_Poly</code>
+       class which is templatized for polynomial spaces like <code>TensorProductPolynomials</code>, <code>PolynomialSpace</code> or <code>PolynomialsP</code>. Many finite element classes
+       are now derived from this class and the implementation of all
+       common functionality is now moved from these finite element
+       classes to <code>FE_Poly</code>.
+       <br>
+       (RH 2004/03/18)
+       </p>
+
+  <li> <p> New: The new function <code>MatrixTools::local_apply_boundary_values</code> folds
+       boundary value information into local matrices and vectors before they
+       are written into the global matrix and vector. This way, the final call
+       to  <code>MatrixTools::apply_boundary_values</code> can
+       be avoided.
+       <br>
+       (WB 2004/03/16)
+       </p>
+
+  <li> <p> New: There are now functions <code>ConstraintMatrix::distribute_local_to_global</code> that
+       take a local matrix or vector and distribute it to a global one, but
+       taking care of constrained degrees of freedom; in that case, the
+       respective entries are already distributed to the final place in the
+       global matrix or vector. That way, the final call to the <code>ConstraintMatrix::condense</code> function can be
+       avoided.
+       <br>
+       (WB 2004/03/15)
+       </p>
+
+  <li> <p> New: The new functions <code>SparsityPattern::partition</code>, <code>GridTools::partition_triangulation</code>, <code>DoFTools::get_subdomain_association</code>, <code>DoFTools::count_dofs_with_subdomain_association</code>,
+       <code>GridTools::get_subdomain_association</code>, <code>GridTools::count_cells_with_subdomain_association</code>, 
+       and <code>DoFRenumbering::subdomain_wise</code> can now
+       be used to generate partitions of a triangulation and its associated
+       degrees of freedom suitable for parallel computations with PETSc.
+       <br>
+       (WB 2004/03/08)
+       </p>
+
+  <li> <p> Improved: When eliminating nodes from a matrix using the <code>ConstraintMatrix::condense</code> functions, the
+       diagonal entry was set to one. It is now set to an entry that more
+       resembles the size of the other diagonal entries, so that we don't run
+       into scaling problems for applications that have very large or small
+       domains.
+       <br>
+       (WB 2004/03/02)
+       </p>
+
+  <li> <p> Changed: The classes <code>DataOut*</code> and <code>KellyErrorEstimator</code> have been generalized to take
+       more and different vector types as input parameters. In particular,
+       they now take normal and block vectors over doubles and floats, as well
+       as PETSc vectors if PETSc support is detected during configuration of
+       the library.
+       <br>
+       (WB 2004/03/01)
+       </p>
+
+  <li> <p> Changed: The template parameter of the functions in the <code>GridRefinement</code> class have been changed. Where they
+       previously denoted the type over which the <code>Vector</code> class is to be templated, they now mean the
+       actual vector class. Thus, they can be any other template class as long
+       as there are suitable operations defined over them. In addition,
+       the documentation stated that they must be vectors of floats; this
+       hasn't been true any more for quite a while already, and is duly
+       removed from the documentation.
+       <br>
+       (WB 2004/02/28)
+       </p>
+
+  <li> <p>
+       New: The function
+       <code>FETools::project_dg</code>
+       performs <i>L<sup>2</sup></i>-projections between finite element spaces
+       of different degrees on the same mesh.
+       <br>
+       (GK 2003/11/28)
+       </p>
+
+  <li> <p>
+       Improved: <code>FiniteElementData</code> has a function
+       <code>tensor_degree()</code>, returning the degree of the
+       polynomial space suitable for choosing a tensor product quadrature
+       formula. 
+       <br>
+       (GK 2003/11/28)
+       </p>
+
+  <li> <p>
+       New: Long requested but never implemented before in the
+       library: there is now a function <code>GridTool::find_active_cell_around_point</code>
+       that, given a point, finds the active cell in which this point
+       lies.
+       <br>
+       (WB 2003/10/30)
+       </p>
+
+  <li> <p>
+       New: <code>MGCoarseGridHouseholder</code>
+       implements a coarse grid solver using QR-factorization.
+       <br>
+       (GK 2003/10/17)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>FEFaceValuesBase::boundary_form</code>
+       function was declared but not implemented. This is now fixed.
+       <br>
+       (Jörg R. Weimar 2003/10/22)
+       </p>
+
+  <li> <p>
+       Improved: The <code>MatrixCreator::create_mass_matrix</code>
+       functions are now templatized also on the template argument of
+       the <code>SparseMatrix</code> class. This allows
+       invoking this function for <code>SparseMatrix<double></code> and <code>SparseMatrix<float></code> objects.
+       <br>
+       (RH 2003/10/22)
+       </p>
+
+  <li> <p>
+       New: There is now also a function <code>MGDoFCellAccessor::neighbor_child_on_subface</code>
+       that returns the result of the <code>CellAccessor::neighbor_child_on_subface</code>
+       function but converts it so that one can also access MGDoF
+       data.
+       <br>
+       (RH 2003/10/22)
+       </p>
+
+  <li> <p>
+       New: There are now functions <code>CellAccessor::neighbor_child_on_subface</code> and <code>DoFCellAccessor::neighbor_child_on_subface</code>
+       that should be called instead of using <code>GeometryInfo::child_cell_on_face</code> in most cases.
+       <br> 
+       (WB 2003/10/17)
+       </p>
+
+  <li> <p>
+       New: <code>GridGenerator</code> has a new
+       function <code>subdivided_hyper_rectangle</code> 
+       which generates a rectangle with given corner points and possibly 
+       different numbers of subdivisions in different directions.
+       Use it, e.g., to generate a domain of 1*4 length units
+       with square cells.
+       <br> 
+       (Joerg Weimar 2003/09/09)
+       </p>
+
+  <li> <p>
+       Improved: The 3d grid reordering code in the <code>GridReordering</code> class now uses an algorithm
+       that is linear in the number of elements. The old code was
+       exponential, so this is a vast improvement.
+       <br> 
+       (Michael Anderson 2003/09/23)
+       </p>
+
+  <li> <p>
+       Improved: <code>GridOut</code> has a an improved
+       functionality for <code>write_eps</code> 
+       to color the grid according to the refinement level.
+       A corresponding option is included in 
+       <code>GridOutFlags::Eps<2></code>.
+       <br> 
+       (Joerg Weimar 2003/09/09)
+       </p>
+
+  <li> <p> New: The <code>TriaAccessor</code>::<code>point_inside</code> function is now also
+       implemented in 3d.
+       <br>
+       (Joerg Weimar, WB 2003/09/04)
+       </p>
+
+  <li> <p> New: The <code>TriaAccessor</code>::<code>recursively_set_material_id</code> function sets
+       the material id of the present cell and of all its children,
+       grandchildren, etc to the given value.
+       <br>
+       (WB 2003/09/04)
+       </p>
+
+  <li> <p> New: The new <code>FETools</code>::<code>get_fe_from_name</code> function can do the
+       reverse of the <code>FiniteElement</code>::<code>get_name</code> function: it takes a string and
+       parses it to regenerate a finite element from it. Useful for
+       parsing finite element names from input files.
+       <br>
+       (WB 2003/07/08)
+       </p>
+
+  <li> <p> New: The <code>DataOut_DoFData</code>::<code>merge_patches</code> now takes a second
+       parameter that indicates a shift for each vertex of the given
+       patches to be merged. This is sometimes nice if one wants to
+       generate "exploded" views of a collection of subdomains. It is
+       also templatized on the first argument, so can merge in some
+       other <code>DataOut_DoFData</code> that create
+       the same type of patches but are otherwise different.
+       <br>
+       (WB 2003/06/30)
+       </p>
+
+  <li> <p> Fixed: The <code>FETools</code>::<code>extrapolate</code> function operates on patches
+       of cells, but didn't check whether the grid is at least refined
+       once everywhere. If this was not the case, it would generate
+       wrong results. It now checks for this, and if the grid has
+       unrefined coarse grid cells, an exception is generated.
+       <br>
+       (WB 2003/06/25)
+       </p>
+
+  <li> <p>
+       Improved: <code>FEFaceValuesBase</code> has a new
+       function <code>orientation</code> accessing a unique
+       and consistent orientation for each face.
+       <br> 
+       (GK 2003/06/23)
+       </p>
+
+  <li> <p> 
+       Changed: Embedding and restriction matrices for intergrid transfer are
+       now computed in the constructor of most elements, rather than taken from
+       precomputed and tabulated values. This removes restrictions on which
+       elements are available since the old tables were only precomputed for
+       certain polynomial degrees and are now available for all.
+       <br>
+       (WB 2003/06/09)
+       </p>
+
+  <li> <p> 
+       New: Finite elements got a function <code>get_interpolation_matrix</code>, with which they can
+       compute interpolations between different finite elements. Most will use
+       this to compute interpolations between finite elements of the same kind
+       but different polynomial degrees. The <code>FETools::get_interpolation_matrix</code> makes use of
+       this function if it implements the desired interpolation, and falls back
+       to the old algorithm if this is not the case.
+       <br>
+       (WB 2003/06/09)
+       </p>
+
+  <li> <p> 
+       New: Finite elements got a function <code>get_name</code>, which can be used to identify a finite
+       element by its name.
+       <br>
+       (WB 2003/06/09)
+       </p>
+
+  <li> <p> 
+       New: Raviart-Thomas elements are now implemented in the <code>FE_RaviartThomas</code> class.
+       <br>
+       (WB 2003/06/09)
+       </p>
+</ol>
+
+
+*/
diff --git a/doc/news/5.0.0-vs-5.1.0.h b/doc/news/5.0.0-vs-5.1.0.h
new file mode 100644
index 0000000..9a75ada
--- /dev/null
+++ b/doc/news/5.0.0-vs-5.1.0.h
@@ -0,0 +1,568 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_5_0_and_5_1 Changes between Version 5.0 and 5.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+
+<ol>
+
+  <li> <p> Changed: The call <code
+  class="class">MGTransferBlock</code>::<code
+  class="member">build_matrices</code> and to the same function in
+  derived classes receaves a <code>DoFHandler</code> as
+  an additional argument. It is needed to prebuild the transfer
+  between vectors and multigrid vectors, since the loop over all cells
+  is to slow.
+  <br>
+  (GK 2004/08/27)
+  </p>
+
+  <li> <p>
+       Changed: Previously, parallel PETSc matrices only took the sizes of the
+       matrix and the number of rows to be stored locally as arguments. They
+       chose the partitioning of columns to be the same as that for the
+       rows. However, this does not work for non-quadratic parallel matrices,
+       and leads to very hard to find errors. Therefore, all constructors and
+       reinit functions of the parallel sparse matrix classes now take an
+       additional argument indicating the partitioning of columns of the matrix.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+</ol>
+
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li> <p>
+       New: Support of shared libraries under Cygwin / Windows systems. Shared 
+       libraries are used as default. To use, make sure the library path is included
+       in your <code>PATH</code> environment variable.
+       <br>
+       (Ralf B. Schulz, 2004/12/20)
+       </p>
+
+  <li> <p>
+       Fixed: Examples do not use obsolete classes
+       <code>QGaussN</code> anymore.
+       <br>
+       (GK 2004/12/18)
+       </p>
+
+  <li> <p>
+       Fixed: Configuration scripts were changed to allow any file suffix for
+       shared and static libraries to allow in the future the creation of dlls
+       under cygwin.
+       <br>
+       (Ralf B. Schulz, 2004/12/17)
+       </p>
+
+  <li> <p>
+       New: Configuration now detects the Intel Fortran compiler and can set 
+       compilation flags accordingly.
+       <br>
+       (WB 2004/11/04)
+       </p>
+
+  <li> <p>
+       Extended: deal.II 5.0.0 didn't work with the PETSc release 2.2.1 (which
+       came out after we released version 5.0.0). This should now be fixed:
+       deal.II can be linked against both PETSc 2.2.0 and 2.2.1.
+       <br>
+       (WB 2004/10/07)
+       </p>
+
+  <li> <p>
+       Improved: The documentation generated by Doxygen now states the header
+       file defining a class. Furthermore, all deal.II
+       exceptions are listed as classes and will soon be found in the module
+       <tt>Exceptions</tt>.
+       <br>
+       (GK 2004/09/16)
+       </p>
+
+  <li> <p>
+       New: configuration option <tt>--with-umfpack</tt> for using the 
+       UMFPack version and enabling the class <code
+       class="class">SparseDirectUMFPACK</code>, both included by
+       Wolfgang.
+       <br>
+       Deal.II comes with its own copy of the <a
+       href="http://www.cise.ufl.edu/research/sparse/umfpack/">UMFPACK</a>
+       library, courtesy of its author. In order to use, follow the steps
+       listed <a href="../readme.html#umfpack">here</a>. Note that UMFPACK
+       comes with its own license; to use UMFPACK, please read the <a
+       href="../readme.html#license">ReadMe</a> file and make sure that you
+       agree with its license.
+       <br>
+       (GK 2004/08/25)
+       </p>
+
+  <li> <p>
+       Fixed: None of the formulas in the step-8 tutorial program web
+       page were visible. This is now fixed.
+       <br> 
+       (WB 2004/06/29)
+       </p>
+
+</ol>
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li> <p> 
+       New: The <code>MultithreadInfo</code> class now also detects multiple
+       processors on Mac OS X.
+       <br> 
+       (Helmut Müller 2004/11/29)
+  </p>
+
+  <li> <p> 
+       New: The <code>
+       TableHandler::write_tex</code> now accepts the additional boolean
+       argument <code>with_header</code> which is set to
+       true by default and tells the function whether to add the latex 
+       header and footer (i.e. the \\documentclass{...},
+       \\begin{document} and \\end{document} stuff) to the table.<br> 
+       In addition to this, there are two new members in the above class: 
+       <code> TableHandler::tex_set_table_caption</code> and
+       <code> TableHandler::tex_set_table_label</code> to
+       add a caption and a label to the tex generated table.  
+       <br> 
+       (Luca Heltai 2004/10/29)
+  </p>
+
+  <li> <p>
+       Fixed: <code
+       class="member">DataOutBase<2,3>::write_tecplot</code>
+       sometimes did not write the variable name <code>z</code> when
+       only outputting faces of cells. This is now fixed.
+       <br>
+       (RH 2004/10/29)
+       </p>
+
+  <li> <p>
+       New: a class template <code>VectorSlice</code>
+       allows access to consecutive portions of a vector.
+       <br>
+       (GK 2004/09/16)
+       </p>
+
+  <li> <p> New: The classes <code
+       class="class">TableIndices<N></code> and <code
+       class="class">Table<N,T></code> are now implemented also
+       for <code>N=7</code>. The <code
+       class="class">Table<N,T></code> class represents an
+       <code>N</code>-dimensional array and might replace the
+       N-times-nested-use of the <code
+       class="class">std::vector</code> class. 
+       <br>
+       (RH 2004/08/13)
+       </p>
+
+  <li> <p>
+       New: Class <code>TableIndices</code> now has
+       operators that check for equality and inequality of objects.
+       <br> 
+       (WB 2004/07/28)
+       </p>
+
+  <li> <p>
+       New: A class <code>PointerComparison</code> for comparing
+       pointers that may or may not be of the same type.
+       <br> 
+       (WB 2004/06/22)
+       </p>
+
+  <li> <p>
+       Removed: The <code
+       class="class">ParameterHandler</code> class contained a remnant from
+       back in 1997 when it was modeled after a similar class in DiffPack: it
+       had a <code>status</code> flag that one could obtain via the <code
+       class="member">ok</code> function. It was never really used for
+       anything, and has thus finally been removed. The <code
+       class="member">ok</code> is consequently gone as well.
+       <br> 
+       (WB 2004/06/06)
+       </p>
+
+  <li> <p>
+       New: An object of the new <code
+       class="class">ConditionalOStream</code> class allows to print
+       to an output stream depending on a condition, which is active
+       or not. This is particular useful for parallel computations
+       when only one process should print to standard output, while in all
+       other processes we simply want output suppressed.
+       <br> 
+       (RH 2004/05/26)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p>
+       Fixed: PETSc had changed some of its interfaces in version 2.2.1, which
+       prevented deal.II from working with it (it couldn't be compiled at
+       all). We have added checks that make sure that it can now again be
+       compiled with PETSc versions 2.1.6, 2.2.0, and 2.2.1.
+       <br> 
+       (WB 2004/10/07)
+       </p>
+
+  <li> <p>
+       New: A class <code>PreconditionLU</code> which
+       provides a wrapper to the complete LU decomposition
+       preconditioner of PETSc. Furthermore a class 
+       <code>SolverPreOnly</code> was implemented. It
+       is a wrapper for the PETSc solver type KSPPREONLY, which
+       only applies the preconditioner. In conjunction with
+       <code>PreconditionLU</code> this provides a
+       simple direct solver, which could be used for small to medium
+       sized problems on a single processor machine.
+       <br> 
+       (Oliver Kayser-Herold 2004/07/27)
+       </p>
+ 
+ <li> <p>
+       Improved: <code>VectorTools::point_difference</code>
+       used to use an algorithm to find the cell the given point is in that
+       was linear in the total number of active cells. It has been rewritten
+       to use
+       <code>GridTools::find_active_cell_around_point</code>
+       which is only logarithmic in its complexity.
+       <br> 
+       (WB 2004/07/07)
+       </p>
+
+  <li> <p>
+       Fixed: Block matrix iterators could get into all kind of interesting
+       (and invalid) states when some of the blocks had empty rows. In this
+       common case, we would frequently skip elements when looping over the
+       elements of a block matrix. These cases should now be fixed.
+       <br> 
+       (WB 2004/07/07)
+       </p>
+
+  <li> <p>
+       Removed: Block matrix iterators used to have a function
+       <code>it->index()</code> that returned something like
+       the position within a row. However, this was fragile, and has been
+       removed. If you want an ordering of elements within a row, use the
+       <code>operator <</code> to compare iterators.
+       <br> 
+       (WB 2004/07/07)
+       </p>
+
+  <li> <p>
+       Improved: The <code
+       class="class">PETScWrappers::SolverGMRES::AdditionalData</code>
+       class now takes an additional flag indicating the use of left
+       or right preconditioning.
+       <br> 
+       (RH 2004/06/24)
+       </p>
+
+  <li> <p>
+       Improved: The <code>SparseDirectMA27</code> class can now
+       handle float as well as double input matrices and vectors.
+       <br> 
+       (WB 2004/06/23)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">SparseMatrix::vmult</code>-type functions
+       <code>vmult</code>, <code>Tvmult</code>, <code>vmult_add</code>, and
+       <code>Tvmult_add</code> take two different template arguments for input
+       and output vectors, but were only instantiated in case the arguments
+       were the same, and could also not be compiled if they weren't. Both
+       problems are now fixed.
+       <br> 
+       (WB 2004/06/22)
+       </p>
+
+  <li> <p>
+       Improved: The <code>CompressedSparsityPattern</code>
+       class used one of the C++ standard containers to store the column
+       indices of nonzero entries in a sparse matrix. This proved to be
+       inefficient since it requires the allocation of 20 bytes each time an
+       element was added, which for large matrices can be millions of
+       times. The new storage format uses a more compact data structure, and a
+       cache that requires memory allocation only every 8 additions, on
+       average. This should significantly reduce the total amount of memory
+       required as well as memory fragmentation. It also cuts run-time for
+       element addition by more than half.
+       <br> 
+       (WB 2004/06/21)
+       </p>
+
+  <li> <p>
+       Fixed: <code
+       class="member">CompressedSparsityPattern::max_entries_per_row()</code>
+       ignored the first row and thus sometimes returned a value that was too
+       low. This is now fixed.
+       <br> 
+       (WB 2004/06/21)
+       </p>
+
+  <li> <p>
+       Fixed: <code>BlockSparseMatrix::clear()</code> did not
+       do what it was supposed to do: it emptied the individual blocks and
+       removed them, but it still kept the number of blocks
+       unchanged. Subsequent accesses to these blocks, or to their information
+       yielded segmentation faults.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+
+  <li> <p>
+       New: Block matrices and vectors have been factored into abstract base
+       classes and concrete implementation classes. While the previous classes
+       still exist in all their functionality, this allowed us to now also
+       have block matrices and vectors for PETSc sequential and parallel
+       objects.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+
+  <li> <p>
+       New: Vector and block vector classes had a commented out template
+       constructor constructing such a vector from a vector object with a
+       different template argument, for example constructing a <code
+       class="class">Vector<double></code> from a <code
+       class="class">Vector<float></code>. This constructor has been
+       commented out a long time ago due to a compiler bug in which the
+       <code>explicit</code> keyword on template constructors was ignored, a
+       fact that is dangerous since it may lead the compiler to generate
+       temporaries without our ado. This bug is now detected during
+       configuration time of the library, and these constructors are available
+       whenever the compiler does not contain this bug.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+
+  <li> <p>
+       New: In analogy to the PETSc vector classes, the PETSc matrix classes
+       now also have member functions <code>local_range</code>,
+       <code>in_local_range</code>, and <code
+       class="member">local_size</code>.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+
+  <li> <p>
+       New: Parallel PETSc matrix and vector classes now have member functions
+       <code>get_mpi_communicator</code> that returns the MPI
+       communicator object these objects operate on.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+
+  <li> <p>
+       Changed: The PETSc linear solver classes now take a constant, rather
+       than a nonconstant reference to the MPI communicator to be used. This
+       prevents some unnecessary compiler problems in conjunction with the new
+       <code>get_mpi_communicator</code> function.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+
+  <li> <p>
+       New: Parallel and sequential PETSc sparse matrix classes can now be
+       initialized (via either constructor or reinit functions) with a
+       compressed sparsity pattern object, allowing for more efficient
+       preallocation of nonzero entries.
+       <br> 
+       (WB 2004/06/02)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p>
+       Improved: <code>GeometryInfo</code> has two new
+       fields <code>unit_normal_direction</code> and
+       <code>unit_normal_orientation</code>,
+       indicating the direction of the unit normal vector for each
+       face of the reference cell.
+       <br>
+       (GK 2004/12/18)
+       </p>
+
+  <li> <p>
+       Fixed: In rare cases of 3d meshes with a certain topology,
+       we triggered an assertion in <code
+       class="class">TriaAccessor</code>::<code
+       class="member">neighbor_child_on_subface</code>. It turns out
+       that the code actually computes the correct answer, but the
+       assertion had a condition that doesn't always have to be
+       satisfied. This bogus assertion is now fixed.
+       <br>
+       (WB 2004/11/12)
+       </p>
+
+  <li> <p> Fixed: The <code>GridGenerator</code>::<code
+       class="member">cylinder</code> function in 3d assigned the
+       wrong boundary value to the top and bottom part of the cylinder
+       if the half length of the cylinder was not equal to 1. This is
+       now fixed.
+       <br>
+       (Ralf Schulz 2004/10/27)
+       </p>
+
+  <li> <p>
+       Improved: Now the <code>FE_Q</code> class supports hanging node
+       constraints for elements of arbitrary polynomial degree also in
+       3D.
+       <br>
+       (Oliver Kayser-Herold 2004/10/21)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="class">StraightBoundary<3></code>::<code
+       class="member">get_new_point_on_quad</code>
+       did not work on general grids. This is now fixed.
+       <br>
+       (RH 2004/10/15)
+       </p>
+
+  <li> <p>
+       Improved: The <code>CylinderBoundary</code>
+       represented the hull of a circular tubes along the x-axis. It
+       is now extended to allow for circular tubes also along the y-
+       and z-axis.
+       <br>
+       (RH 2004/10/15)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>ConstraintMatrix</code> class had some
+       algorithms that were linear in the number of constraints. Since these
+       functions had to be called for each constraint, this resulted in a
+       quadratic behavior. To make things worse, these algorithms traversed
+       large memory blocks leading to a vast number of cache misses which made
+       them really slow. This is now fixed: the algorithm is O(1) and should
+       only access single elements in memory, and one 3d testcase is now a
+       full 5 per cent faster on about 10 minutes runtime.
+       <br>
+       (WB 2004/10/04)
+       </p>
+
+  <li> <p>
+       New: The <code>GridOut</code>::<code
+       class="member">write_gnuplot</code> function now supports
+       curved boundaries also for <code>dim==3</code>.
+       <br>
+       (RH 2004/09/20)
+       </p>
+
+  <li> <p>
+       Fixed: The documentation of the <code
+       class="member">Triangulation</code> class mentioned that no
+       places in the library use or touch the user pointers. That is
+       wrong, the <code>SolutionTransfer</code> class
+       actually does. This is now properly documented.
+       <br> 
+       (WB 2004/09/15)
+       </p>
+
+  <li> <p>
+       Fixed: The <code
+       class="member">DerivativeApproximation::approximate_second_derivative</code>
+       function produced wrong results ("not a number", or an exception instead of
+       "zero") if the field it was given was constant. This is now fixed.
+       <br> 
+       (WB 2004/08/05)
+       </p>
+
+  <li> <p>
+       New: The <code>MatrixTools::apply_boundary_values</code>
+       that works on block matrices and vectors is now templatized over the
+       number type, i.e. it also works for float matrices and vectors.
+       <br> 
+       (WB 2004/06/22)
+       </p>
+
+  <li> <p>
+       New: The new <code>FEValuesBase::get_cell</code>
+       function returns the current cell, i.e. the latest cell the
+       <code>FEValues</code> object was reinited with.
+       <br> 
+       (RH 2004/06/22)
+       </p>
+
+  <li> <p>
+       Changed: The <code
+       class="member">MatrixTools::local_apply_boundary_values</code> function
+       used to set the diagonal entries of boundary nodes to one. This is a
+       really bad choice, and the algorithm we use now is much better. There
+       are still a few problems when using this function for matrices that
+       will be solved as a Schur complement; there is nothing we can do in
+       that function in these cases, since we are lacking fundamental pieces
+       of information, but the cases where this applies and the strategies to
+       work with this problem anyway are now well documented in the
+       documentation of the <code>MatrixTools</code> class.
+       <br> 
+       (WB 2004/06/07)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>GridGenerator::cylinder_shell</code>
+       function generated cells were all inside-out, i.e. had negative
+       Jacobians. In usual finite element computations this simply leads to
+       all components of the linear system being negated, so it is not
+       harmful. It is fixed now anyway.
+       <br> 
+       (WB 2004/06/07)
+       </p>
+</ol>
+
+
+
+*/
diff --git a/doc/news/5.1.0-vs-5.2.0.h b/doc/news/5.1.0-vs-5.2.0.h
new file mode 100644
index 0000000..8198001
--- /dev/null
+++ b/doc/news/5.1.0-vs-5.2.0.h
@@ -0,0 +1,1013 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_5_1_and_5_2 Changes between Version 5.1 and 5.2
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+
+<ol>
+  <li> <p>
+       Changed: The way boundary constraints were handled in the
+       step-17 tutorial program was conceptually flawed. We tried to
+       eliminate boundary nodes locally on the cell level, and hanging
+       node constraints subsequently when transferring into the global
+       matrix. However, this doesn't work in general: the elimination
+       of hanging node constraints could re-populate rows and columns
+       that had already been vacated during boundary node
+       elimination. At the end of a long thought process, we came to
+       the conclusion that it is impossible to revert the traditional
+       order of operations: first eliminate all hanging node
+       constraints, then eliminate all boundary nodes. This leads to a
+       situation where the <code
+       class="member">MatrixTools::local_apply_boundary_values</code>
+       function is not very useful any more, except for the special
+       case where there are no hanging nodes. The step-17 example
+       program has therefore been changed to use the <code
+       class="member">MatrixTools::apply_boundary_values</code>
+       function again, though hanging node elimination still happens
+       during transfer into the global matrix.
+       <br>
+       (WB 2005/05/05)
+       </p>
+
+  <li> <p>
+       Changed: The class <code
+       class="class">MGCoarseGridLACIteration</code> lost two template
+       arguments. Since the matrix and preconditioner are now stored
+       in form of <code>PointerMatrix</code> objects,
+       handling of the class is much simpler, in particular when
+       exchanging preconditioners.
+       <br>
+       (GK 2005/05/03)
+       </p>
+
+  <li> <p>
+       Changed: The template argument of <code
+       class="class">BlockMatrixArray</code> was changed to
+       <tt>number</tt>, equal to the same argument of the used <code
+       class="class">BlockVector</code>. Furthermore, its constructor
+       requires an additional argument of type <code
+       class="class">VectorMemory<Vector<number> ></code>
+       providing space for auxiliary vectors. Since the entries are
+       now of type <code>PointerMatrixBase</code>, even
+       matrices with blocks of different types can be constructed.
+       <br>
+       (GK 2005/03/21)
+       </p>
+
+  <li> <p>
+       Changed: The <code>GeometryInfo</code>::<code
+       class="member">vertices_adjacent_to_line</code> function has
+       been renamed to <code>GeometryInfo</code>::<code
+       class="member">line_to_cell_vertices</code> to be named
+       analogous to the <code
+       class="member">face_to_cell_vertices</code>, <code
+       class="member">face_to_cell_lines</code> and <code
+       class="member">child_to_cell_coordinates</code> functions.
+       <br>
+       (RH 2005/03/14)
+       </p>
+
+  <li> <p>
+       Changed: the file <tt>multigrid/mg_dof_tools.h</tt> was renamed
+       to <tt>multigrid/mg_tools.h</tt> in order to match the name of
+       the class contained.
+       <br>
+       (GK 2005/03/09)
+       </p>
+
+  <li> <p>
+       Changed: <code>DoFTools</code>::<code
+       class="member">make_flux_sparsity_pattern</code>, <code
+       class="class">MGTools</code>::<code
+       class="member">make_flux_sparsity_pattern</code> and similar
+       functions in these classes do not receive arguments of type
+       <code>FullMatrix<double></code>
+       anymore. Instead, they get a <code
+       class="class">Table<2,DoFTools::Coupling></code>, which
+       contains more meaningful enums.
+       <br>
+       (GK 2005/03/09)
+       </p>
+
+  <li> <p>
+       Changed: <code>Multigrid</code>::<code
+       class="member">Multigrid</code> receives an argument
+       determining the type of multigrid cycle instead of the minimum
+       and maximum levels. The latter were rarely used anyway and can
+       be modified by <code>set_minlevel()</code> and
+       <code>set_maxlevel()</code>
+       <br>
+       (GK 2005/03/09)
+       </p>
+</ol>
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li> <p> New:
+       The step-19 tutorial demonstrates handling of parameters from a
+       parameter file, as well as some simple techniques for merging output
+       from parallel computations. 
+       <br>
+       (WB 2005/09/09)
+       </p>
+
+  <li> <p>
+       Fixed: The linking of PETSc libraries into the static
+       <tt>libpetsc.g.a</tt> and <tt>libpetsc.a</tt> libraries
+       (located in <tt>deal.II/lib</tt>) has now been fixed.
+       <br>
+       (RH 2005/08/26)
+       </p>
+
+  <li> <p> New:
+       The step-18 example program shows how to solve a quasi-static elasticity
+       problem with moving meshes, and this all in parallel.
+       <br>
+       (WB 2005/08/11)
+       </p>
+
+  <li> <p>
+       Changed: The files
+       <tt>deal.II/include/grid/geometry_info.h</tt> and
+       <tt>deal.II/source/grid/geometry_info.cc</tt> have been moved
+       to <tt>base/include/base</tt> and <tt>base/source</tt>,
+       respectively. A redirection header file has been left in the
+       old location for compatibility.
+       <br>
+       (GK, 2005/07/19)
+       </p>
+
+  <li> <p>
+       Changed: Cygwin has problems linking against deal.II DLLs if you
+       link against more than one dimension dependent library. The linker
+       might issue error messages about multiple defined symbols. This is now
+       detected, and the linker is forced to ignore these errors. However, if
+       you accidentally defined symbols twice you might have a hard time
+       debugging now... In this case remove the <code>-Xlinker 
+       --allow-multiple-definition</code> flag from Make.global_options.
+       <br>
+       (Ralf B. Schulz, 2005/07/13)
+       </p>
+
+  <li> <p>
+       Changed: Under Cygwin, linking against LAPACK and UMFPACK did
+       not work. This is now fixed in the Makefiles. Changes only
+       affect Cygwin.
+       <br>
+       (Ralf B. Schulz, 2005/07/02)
+       </p>
+
+  <li> <p>
+       New: The latest version of UMFPACK, version 4.4, has been imported.
+       <br>
+       (WB, 2005/06/30)
+       </p>
+
+  <li> <p>
+       Fixed: The documentation pages did not contain information
+       about the <code>Threads::spawn</code> functions. This is now fixed.
+       <br>
+       (WB, 2005/05/12)
+       </p>
+
+  <li> <p>
+       Fixed: The step-17 example program had a stupid mistake in that it
+       initializes the <code>pcout</code> variable with an uninitialized value
+       of <code>mpi_communicator</code>. While this seems to have worked for
+       some older MPI implementations, this is no longer the case on some
+       other systems. It is now fixed.
+       <br>
+       (WB, 2005/04/18)
+       </p>
+
+  <li> <p>
+       New: If there are references to other example programs in any of the
+       step-XX programs, then they will now show up as crosslinks in the HTML
+       version for simpler navigation of the tutorial.
+       <br>
+       (WB, 2005/04/04)
+       </p>
+
+  <li> <p>
+       New: The configuration script can now enable special optimizations for
+       x86-like processors if called with the <code>--with-cpu=...</code> option.
+       This can decrease computation time by up to 30% on certain systems.
+       The configuration flag has already existed for PowerPC64 processors, but
+       was not documented. Documentation has been added.
+       <br>
+       (Ralf B. Schulz, 2005/03/10)
+       </p>
+
+  <li> <p>
+       Fixed: When compiling shared libraries on CygWin systems, warnings
+       concerning the <code>-fPIC</code> option were issued by the compiler.
+       This is now fixed.       
+       Also, configure now issues a message at the end that you should include
+       the DLL-path in your <code>.bash_profile</code> file on these systems.
+       <br>
+       (Ralf B. Schulz, 2005/03/10)
+       </p>
+
+  <li> <p>
+       Fixed: On certain systems running CygWin, a call to socket functions
+       like <code>gethostname()</code> could cause deal.II to hang. The reason
+       seems to be that the call disables long double computations which
+       revert to just double precision. This then leads to an endless loop in
+       computing weights for quadrature points because the convergence
+       criterion cannot be reached with simple double precision. This is now
+       fixed by disabling the call to <code>gethostname()</code> on these
+       systems. A new preprocessor variable, <code>DEAL_II_BROKEN_SOCKETS</code>
+       has been added to <code>base/config.h</code> which is defined on
+       affected systems.
+       <br> 
+       (Ralf B. Schulz, WB, 2005/03/02)
+       </p>
+
+  <li> <p>
+       Fixed: The step-16 example program wasn't listed in the
+       navigation bar of the  tutorial section, although it was in the
+       table of contents. This is fixed now.
+       <br> 
+       (WB, 2005/02/09)
+       </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li> <p>
+       New: Class <code>PathSearch</code> allows to find
+       a file in a sequence of directories and by appending suffixes. The class
+       generates the complete file name including directory prefix and suffix
+       such that it can be used for subsequently opening the file.
+       <br> 
+       (GK, 2005/09/14)
+       </p>
+
+  <li> <p>
+       Extended: The <code>ParameterHandler</code> class now does a much better
+       job generating output in the <code>print_parameters()</code> function if
+       parameters have documentation strings attached to them. See the step-19
+       example program output for this.
+       <br>
+       (WB, 2005/09/12)
+       </p>
+
+  <li> <p>
+       New: There is now a namespace <code>Utilities</code> that contains
+       functions that are not particularly specific to finite element codes,
+       but are needed in various contexts of application programs anyway.
+       <br>
+       (WB, 2005/08/11)
+       </p>
+
+  <li> <p>
+       Changed: <code>ParameterHandler::get_bool()</code> only accepted
+       "true" or "false" as boolean values, now it also considers "yes"
+       and "no" as "true" and "false", respectively.
+       <br>
+       (Ralf B. Schulz, 2005/07/19)
+       </p>
+       
+  <li> <p>
+       Removed: The <code>write_multigrid</code> flag in <code
+       class="member">DataOutBase::DXFlags</code> 
+       has been removed, since it wasn't used anywhere.
+       <br>
+       (WB, 2005/07/14)
+       </p>
+
+  <li> <p>
+       New: A function <code>
+       deal_II_exceptions::disable_abort_on_exception</code> now allows
+       to disable program abortion when an assertion fails. This is used
+       in regression tests.
+       <br>
+       (Ralf B. Schulz, 2005/07/13)
+       </p>
+       
+  <li> <p>
+       Improved: The <code>QProjector</code> now has
+       functions <code>project_to_face</code> and <code
+       class="class">project_to_subface</code> returning a <code
+       class="class">Quadrature</code> object
+       <br>
+       (GK, 2005/07/12)
+       </p>
+
+  <li> <p>
+       Changed: The <code
+       class="member">ParameterHandler::print_parameters</code>
+       function now puts two newlines between regular entries and
+       subsection listings for better readability.
+       <br>
+       (WB, 2005/07/05)
+       </p>
+
+  <li> <p>
+       Changed: The <code
+       class="member">ParameterHandler::print_parameters</code>
+       function now omits printing subsections if they and the contained
+       sub-subsections contain no entries.
+       <br>
+       (WB, 2005/07/05)
+       </p>
+
+  <li> <p>
+       Changed: Some of the dimension independent functions in the <code
+       class="class">DataOutInterface</code> class have been moved into the
+       <code>DataOutBase</code> class that does not depend on any
+       dimension template parameters. Since the latter is a base class of the
+       former, there should be no problems for application programs.
+       <br>
+       (WB, 2005/07/01)
+       </p>
+
+  <li> <p>
+       Fixed: Several of the functions in class <code
+       class="class">ConstantFunction</code> did not check whether the
+       component handed to them was valid. In essence, even a scalar
+       function could be evaluated for component 42, and the function
+       ignored how many components it was declared to have. This is
+       now fixed.
+       <br>
+       (WB, 2005/06/30)
+       </p>
+
+  <li> <p>
+       Changed: The functions in class <code
+       class="class">ExceptionBase</code> have been renamed to conform
+       to our coding conventions that function names be all lower-case
+       with words separated by underscores. This is technically an
+       incompatible change, since these functions were publicly
+       visible, but were usually hidden behind macros and not used
+       anywhere explicitly in the library; we hope that this also
+       holds for external code.
+       <br>
+       (WB, 2005/06/30)
+       </p>
+
+  <li> <p>
+       New: The GNU C library supports access to the call stack. For
+       systems using this library, the <code
+       class="class">ExceptionBase</code> class now uses these
+       functions to outputs the callstack when the exception is
+       created.
+       <br>
+       (Ralf B. Schulz, 2005/06/30)
+       </p>
+
+  <li> <p>
+       Improved: The class <code>Quadrature</code> has a new
+       constructor generating the <tt>dim</tt>-dimensional tensor product of a
+       one-dimensonal formula directly.
+       <br>
+       (GK, 2005/06/10)
+       </p>
+
+  <li> <p>
+       New: The class <code
+       class="class">Polynomials::RaviartThomas</code> implements
+       Raviart-Thomas polynomials of arbitrary order in 2d and 3d.
+       <br>
+       (GK, 2005/05/30)
+       </p>
+
+  <li> <p>
+       New: The <code>Polynomials::Lagrange</code> class
+       allows generation of Lagrange interpolation polynomials for
+       arbitrary point sets. In order to get better interpolation for
+       higher order polynomials, <code
+       class="class">QGaussLobatto</code> has been added to produce
+       interpolation point sets clustering at the boundary of a cell
+       (weights are not computed yet).
+       <br>
+       (GK, 2005/05/30)
+       </p>
+
+  <li> <p>
+       New: The <code>QAnisotropic</code> class allows
+       generation of quadrature rules of different order in each
+       space dimension.
+       <br>
+       (GK, 2005/05/24)
+       </p>
+
+  <li> <p>
+       New: The <code>Tensor</code> classes now
+       have member functions that compute the Frobenius norm and its
+       square. There are also global <code>operator*</code> functions
+       that compute the contraction over a pair of indices of tensors.
+       <br>
+       (WB, 2005/04/06)
+       </p>
+
+  <li> <p>
+       New: The <code>DataOutBase</code> class now
+       allows to write data in a new intermediate format that
+       basically dumps the raw information available in patches.
+       <br>
+       (WB, 2005/04/05)
+       </p>
+
+  <li> <p>
+       New: The <code>DataOutReader</code> class allows
+       to read data back in from a file into which it has been written
+       using intermediate format. From there, it can then be converted
+       into any of the supported graphics formats.
+       <br>
+       (WB, 2005/04/05)
+       </p>
+
+  <li> <p>
+       New: There is now a <code
+       class="member">operator >> (std::istream &in,
+       Point<dim> &p)</code>, a function that had apparently been
+       missing for a long time already.
+       <br>
+       (WB, 2005/04/05)
+       </p>
+
+  <li> <p>
+       New: The new function <code
+       class="member">double_contract</code> contracts two tensors of
+       rank 4 and 2 into a tensor of rank 2, by contracting over two
+       indices at the same time.
+       <br> 
+       (WB, 2005/03/29)
+       </p>
+
+  <li> <p>
+       New: The new function <code
+       class="member">TableIndicesBase::sort</code> allows to sort the indices
+       in ascending order. This is useful for cases where the order of indices
+       is irrelevant (for example in symmetric tables).
+       <br> 
+       (WB, 2005/03/29)
+       </p>
+
+  <li> <p>
+       New: There is a new class <code>SymmetricTensor</code>
+       that provides storage and operations for symmetric tensors.
+       <br> 
+       (WB, 2005/03/28)
+       </p>
+
+  <li> <p>
+       New: Class <code>Subscriptor</code> receives a
+       text argument allowing to identify the violating pointer more
+       easily. The additional feature is being incorporated into <code
+       class="class">SmartPointer</code> constructors throughout the
+       library.
+       <br> 
+       (GK, 2005/03/16)
+       </p>
+
+  <li> <p>
+       New: Class <code>FunctionParser</code>. Wrapper
+       class for the fparser library (see 
+       <a href="http://warp.povusers.org/FunctionParser/">
+       http://warp.povusers.org/FunctionParser/</a>).
+       <br> 
+       (Luca Heltai, 2005/03/07).
+       </p>
+
+  <li> <p>
+       Fixed: The class <code
+       class="class">MultipleParameterLoop::UserClass</code> had only 
+       virtual abstract functions but no virtual destructor. This caused
+       warnings with some compilers, and is generally bad practice
+       anyway. This is now fixed. The same holds with respect to the class
+       <code>DataOutInterface</code>.
+       <br> 
+       (WB, 2005/02/20)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p>
+       New: The new function <code
+       class="member">PetscWrappers::VectorBase::set</code> allows to set
+       several vector elements at once.
+       <br> 
+       (WB, 2005/08/10)
+       </p>
+
+  <li> <p>
+       New: There are now functions <code
+       class="member">PetscWrappers::MatrixBase::clear_row</code> and <code
+       class="member">clear_rows</code> that allow to set the elements of a row
+       of a matrix to zero, without having to traverse the individual
+       elements.
+       <br> 
+       (WB, 2005/08/10)
+       </p>
+
+  <li> <p>
+       Fixed: <code>SparsityPattern</code>::<code
+       class="member">block_write</code> neither wrote the number of
+       columns of the pattern, nor the flag for optimizing the
+       diagonal, and <code>SparsityPattern</code>::<code
+       class="member">block_read</code> did not attempt to read this
+       data. Both is fixed now.
+       <br>
+       (GK, 2005/06/24)
+       </p>
+
+  <li> <p>
+       Improved: <code>SparseDirectUMFPACK</code> now
+       has the complete interface of a preconditioner
+       class. Therefore, it can be used in iterative solvers and in
+       multigrid smoothers.
+       <br> 
+       (GK, 2005/05/10)
+       </p>
+
+  <li> <p>
+       Fixed: The PETSc matrix iterators had trouble when some rows of a
+       matrix were empty. This has now been mostly fixed.
+       <br> 
+       (WB, 2005/05/02)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>PETScWrappers::MPI::SparseMatrix</code>
+       class had functions that allow to take a pre-existing sparsity pattern
+       as the basis for entry allocation. These functions had an option to
+       allow pre-setting these entries in the underlying data structures, but
+       this option was disabled by default because of unresolved questions
+       about its effectiveness. This has now been fixed: The code now properly
+       initializes these elements, and makes the resulting matrix much faster
+       to use.
+       <br> 
+       (WB, 2005/04/04)
+       </p>
+
+  <li> <p>
+       New: The <code>ProductSparseMatrix</code>
+       implements the product of two rectangular sparse matrices with
+       the same <code>value_type</code>
+       <br> 
+       (GK, 2005/03/11)
+       </p>
+
+  <li> <p>
+       New: The <code>PreconditionRichardson</code>
+       implements a Richardson preconditioner.
+       <br> 
+       (GK, 2005/03/10)
+       </p>
+
+  <li> <p> Fixed: The <code>BlockSparseMatrix</code>
+       class had no local typedef <code
+       class="member">value_type</code> like all other classes, which
+       made it a little awkward to use in some places. This has now
+       been fixed.
+
+       <br> 
+       (WB, 2005/03/03)
+       </p>
+
+  <li> <p> Fixed: The <code>PETScWrappers</code>::<code
+       class="member">MatrixBase</code> class documented that adding
+       or setting a value that hasn't been in the sparsity pattern
+       before will lead to an exception being thrown. This is of
+       course wrong: PETSc allocates matrix entries dynamically, as
+       needed. The documentation is now fixed.
+       <br> (WB, 2005/03/03)
+       </p>
+
+  <li> <p> New: The <code>SparseMatrix</code> iterators
+       had no <code>operator ></code>, only an <code
+       class="member">operator <</code>. The missing operator is
+       now implemented. The same holds for the <code
+       class="class">FullMatrix</code> class.
+       <br> (WB, 2005/03/01)
+       </p>
+
+  <li> <p> Fixed: The <code>SparseMatrix</code>
+       iterators could not be compared using <code
+       class="member">operator <</code>: the compiler complained
+       that a private member was accessed. This is now fixed.
+       <br> (WB, 2005/03/01)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+
+  <li> <p> Removed: The class <code
+  class="class">FiniteElementBase</code> has been removed and all its
+  functions are now in <code>FiniteElement</code>.
+  <br>
+  (GK, 2005/08/25)
+  </p>
+
+  <li> <p> New: class <code>DoFTools</code> now has two
+  functions <code>compute_row_length_vector</code>, one
+  for equations and one for systems. These give a much finer estimate
+  for the size of the sparsity pattern required than <code
+  class="class">DoFHandler</code>::<code
+  class="member">max_couplings_between_dofs</code>. This is
+  particularly true for systems with few couplings.
+  <br>
+  (GK, 2005/08/24)
+  </p>
+
+  <li> <p>
+       Remove: Due to the reimplementation of the 3d reordering
+       algorithm in <code>GridReordering</code> two
+       years ago, the <code>CellData::rotate</code>
+       function got obsolete. <code
+       class="member">CellData::rotate</code> is now removed.
+       <br>
+       (RH, 2005/08/09)
+       </p>
+
+  <li> <p>
+       Fixed: It was possible to compare iterators into different
+       <code>DoFHandler</code> objects for equality and
+       inequality as long as the underlying triangulation object was
+       the same. This doesn't make much sense, however, and even
+       sometimes declared to iterators equal if only they had the same
+       triangulation, refinement level, and index. Comparing iterators
+       into different containers is now completely disallowed.
+       <br> 
+       (WB, 2005/08/08)
+       </p>
+
+  <li> <p>
+       New: The <code>DataOutStack::add_data_vector</code>
+       function now also allows to give only a single name even if the data is
+       vector-valued. It then makes up names for the individual components by
+       adding numbers to the name, just like the <code
+       class="class">DataOut</code> class already does for a long time.
+       <br> 
+       (WB, 2005/07/29)
+       </p>
+
+  <li> <p>
+       New: The <code>DataOutStack</code> class can now also be
+       used to stack two-dimensional time or parameter dependent data into a 3d
+       output.
+       <br> 
+       (WB, 2005/07/25)
+       </p>
+
+  <li> <p>
+       New: Function <code>FETools</code>::<code
+       class="member">compute_face_embedding_matrices</code> allows for
+       automatic computation of embedding matrices on faces under the sole
+       assumption that the coarse grid space is embedded into the fine
+       grid space. In particular, no interpolation points are
+       required.<p>
+       
+       <p>Using this function, constraint matrices can be computed in
+       a general way.
+       <br> 
+       (GK, 2005/07/14)
+       </p>
+
+  <li> <p>
+       Improved: All <code>GridIn</code>::<code
+       class="member">read_*</code> functions now call <code
+       class="class">GridReordering</code>::<code
+       class="member">invert_all_cells_of_negative_grid</code>. This
+       way, also misoriented grids are represented in the right
+       orientation within deal.II.
+       <br>
+       (RH 2005/07/06)
+       </p>
+
+  <li> <p>
+       Improved: <code
+       class="class">Triangulation<3></code>::<code
+       class="member">create_triangulation</code> now checks that all
+       cells have positive volume. If not, an exception is thrown. In
+       that case use the <code
+       class="class">GridReordering</code>::<code
+       class="member">invert_all_cells_of_negative_grid</code>
+       function, see below, to fix this.
+       <br>
+       (RH 2005/07/06)
+       </p>
+
+  <li> <p>
+       New: There is now a <code
+       class="class">GridReordering</code>::<code
+       class="member">invert_all_cells_of_negative_grid</code>
+       function implemented in 3d which checks if some/all cells have
+       negative volumes. If all cell have negative volume then the
+       whole grid is reoriented. An assertion is thrown if only a
+       subset of cells have negative volumes, as then the grid might
+       be broken.
+       <br>
+       (RH 2005/07/06)
+       </p>
+
+  <li> <p>
+       New: There is now a new <code
+       class="class">GridTools</code>::<code
+       class="member">cell_measure</code> function. However, it is
+       mostly for internal use. Use <code>cell->measure()</code>
+       instead.
+       <br>
+       (RH 2005/07/06)
+       </p>
+
+  <li> <p>
+       Improved: <code>cell->measure()</code> used to give the
+       absolute value of the cell measure. It now gives the measure
+       with the correct sign. This is useful to find wrongly oriented
+       cells in 3d with negative volumes.
+       <br>
+       (RH 2005/07/05)
+       </p>
+
+  <li> <p>
+       Improved: The class <code
+       class="class">FiniteElementData</code> now stores information
+       on the Sobolev space a finite element space conforms with.
+       <br>
+       (GK 2005/06/29)
+       </p>
+  
+  <li> <p>
+       New: Added function <code
+       class="class">FiniteElementBase::component_to_system_index</code>
+       which was referred to in the documentation, but was missing.
+       <br>
+       (Ralf B. Schulz, 2005/06/30)
+       </p>
+
+  <li> <p>
+       Improved: The function <code
+       class="class">DoFTools</code>::<code
+       class="member">count_dofs_per_component</code> and its
+       counterpart in <code>MGTools</code> got an
+       additional argument. This argument allows to count the degrees
+       of freedom of nonprimitive vector valued elements only once,
+       instead of in every component.  Although this argument defaults
+       to the previous behavior, it had to be put into the argument
+       list ahead of <tt>target_component</tt> in order to make use of
+       default arguments more efficiently. The old order of arguments
+       can still be used through a wrapper function.
+       <br>
+       (GK 2005/06/22)
+       </p>
+
+  <li> <p>
+       Improved: The <code>GeometryInfo</code>::<code
+       class="member">child_cell_on_face</code>, <code
+       class="member">face_to_cell_vertices</code> and <code
+       class="member">face_to_cell_lines</code> now have an additional
+       <code>face_orientation</code> argument, which has no effect in
+       2d and which defaults to <code>true</code> (standard
+       orientation) in 3d. Now these functions return the right values
+       also for the case of non-standard oriented faces in 3d. This
+       avoids several awful pieces of code including questioning
+       face_orientation and using child_switch_tables.
+       <br> 
+       (RH, 2005/06/16)
+       </p>
+
+  <li> <p>
+       Changed: The method <code>FETools</code>::<code
+       class="member">lexicographic_to_hierarchic_numbering</code> now
+       takes a <code>FiniteElementData</code> instead of
+       a <code>FE_Q</code> object. Now this function can
+       also be called by the <code>FE_Q</code>
+       constructor which avoids code duplication.
+       <br> 
+       (RH, 2005/06/13)
+       </p>
+
+  <li> <p>
+       New: The method <code>create_mass_matrix</code>
+       in class <code>MatrixCreator</code> can now handle 
+       vector valued finite elements. A similar change was applied
+       to <code>create_right_hand_side</code> and
+       <code>create_boundary_right_hand_side</code>
+       in <code>VectorTools</code>. These two changes
+       now make the <code>project</code> function work also
+       for Raviart-Thomas elements and other vector valued FEs. This
+       is very useful, if some initial conditions have to be specified.
+       <br> 
+       (Oliver Kayser-Herold, 2005/06/03)
+       </p>
+
+  <li> <p>
+       New: The <code>DataOut</code> class
+       now supports Eulerian Mappings. If a solution is computed
+       on a deformed mesh, the output file generated by the DataOut
+       now shows the solution also on the deformed mesh. This
+       is an option and requires the mapping to be specified as
+       additional parameter to <code>build_patches</code>.
+       <br> 
+       (Oliver Kayser-Herold, 2005/05/31)
+       </p>
+
+  <li> <p>
+       New: The <code>MappingQ1Eulerian</code> class can
+       now cope with different vector types for the Euler vector.
+       This is useful if it should be used with the PETSc wrapper
+       classes.  The desired vector type can be specified as template
+       parameter.
+       <br>
+       (Oliver Kayser-Herold, 2005/05/31)
+       </p>
+
+  <li> <p>
+       New: The <code>FE_RaviartThomasNodal</code>
+       implements Raviart-Thomas elements using function values in
+       Gauss quadrature points on edges and in the interior for its
+       node values. The implementation is restricted to Cartesian mesh
+       cells right now, but works in 2D and 3D.
+       <br> 
+       (GK, 2005/05/30)
+       </p>
+
+  <li> <p>
+       Improved: The <code>Mapping::transform_*</code>
+       functions accept <code>VectorSlice</code> instead
+       of <code>Vector</code>, thus allowing more flexibility.
+       <br> 
+       (GK, 2005/05/24)
+       </p>
+
+  <li> <p>
+       New: The <code>MatrixTools::apply_boundary_values</code>
+       function now also works for PETSc sequential and parallel matrices.
+       <br> 
+       (WB, 2005/05/05)
+       </p>
+
+  <li> <p>
+       Improved: The function <code>GridIn</code>::<code
+       class="member">read</code> now searches for files using the
+       mechanism provided by the class <code
+       class="class">PathSearch</code>. Furthermore, a library of input
+       meshes has been started in <code>lib/meshes</code>.
+       <br> 
+       (GK, 2005/05/03)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>DataOut</code> class did not work
+       properly if the <code>DataOut::first_cell</code> and
+       <code>DataOut::next_cell</code> functions were
+       overloaded and cell data was to be output; in that case, data from the
+       wrong cells was written out. This is now fixed. In contrast to this,
+       nodal data was never affected.
+       <br> 
+       (WB, 2005/04/20)
+       </p>
+
+  <li> <p>
+       Improved: By employing the new <code
+       class="class">GeometryInfo</code>::<code
+       class="member">line_to_cell_vertices</code>, <code
+       class="class">GeometryInfo</code>::<code
+       class="member">face_to_cell_vertices</code> and <code
+       class="class">GeometryInfo</code>::<code
+       class="member">face_to_cell_lines</code> functions the <code
+       class="class">Triangulation</code>::<code
+       class="member">create_triangulation</code> functions are now
+       implemented independent of given conventions for the numbering
+       of vertices, lines and faces.
+       <br>
+       (RH, 2005/03/11)
+       </p>
+
+  <li> <p>
+       New: The new <code>GeometryInfo</code>::<code
+       class="member">line_to_cell_vertices</code> function maps line
+       vertex numbers to cell vertex numbers.
+       <br> 
+       (RH, 2005/03/11)
+       </p>
+
+  <li> <p>
+       New: The new <code>GeometryInfo</code>::<code
+       class="member">face_to_cell_lines</code> function maps face
+       line numbers to cell line numbers.
+       <br> 
+       (RH, 2005/03/11)
+       </p>
+
+  <li> <p>
+       New: The new <code>GeometryInfo</code>::<code
+       class="member">face_to_cell_vertices</code> function maps face
+       vertex numbers to cell vertex numbers.
+       <br> 
+       (RH, 2005/03/11)
+       </p>
+
+  <li> <p>
+       Fixed: There was a bug in the <code
+       class="class">KellyErrorEstimator</code> class that resulted in
+       assertions being thrown when run with multithreading
+       enabled. This is now fixed.
+       <br> 
+       (WB, 2005/03/10)
+       </p>
+
+  <li> <p>
+       Changed: The <code>Triangulation<2></code>::<code
+       class="member">execute_refinement</code> function has been
+       re-implemented to accommodate future developments. This change
+       results in different ordering and indexing of lines and
+       vertices. This leads to a change in the ordering of vertices in
+       output files generated by <code>GridOut</code>
+       for refined grids.
+       <br> 
+       (RH, 2005/03/09)
+       </p>
+
+  <li> <p>
+       Fixed: The class <code>MGDoFHandler</code> had trouble
+       when it encountered a triangulation that had unused vertices, something
+       that happens when one coarsens an existing triangulation. In
+       that case, it would throw unjustified exceptions. This is now
+       fixed.
+       <br> 
+       (WB, 2005/03/01)
+       </p>
+
+  <li> <p>
+       Fixed: The class <code>Triangulation::RefinementListener</code> had only
+       virtual abstract functions but no virtual destructor. This caused
+       warnings with some compilers, and is generally bad practice
+       anyway. This is now fixed.
+       <br> 
+       (WB, 2005/02/22)
+       </p>
+
+  <li> <p>
+       New: Function <code>FETools</code>::<code
+       class="member">compute_embedding_matrices</code> allows for
+       automatic computation of embedding matrices under the sole
+       assumption that the coarse grid space is embedded into the fine
+       grid space. In particular, no interpolation points are required.
+       <br> 
+       (GK, 2005/02/08)
+       </p>
+
+  <li> <p>
+       Fixed: Several wrong assertions in the Raviart-Thomas finite element
+       class have been fixed, that were triggered when integrating face terms.
+       <br> 
+       (Oliver Kayser-Herold, 2005/01/24; 
+        WB, 2005/01/31)
+       </p>
+</ol>
+
+
+*/
diff --git a/doc/news/5.2.0-vs-6.0.0.h b/doc/news/5.2.0-vs-6.0.0.h
new file mode 100644
index 0000000..f758a09
--- /dev/null
+++ b/doc/news/5.2.0-vs-6.0.0.h
@@ -0,0 +1,1728 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_5_2_and_6_0 Changes between Version 5.2 and 6.0
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+
+<ol>
+
+  <li> <p>Removed: Several functions in the linear algebra classes that have
+   been deprecated for more than a year have been removed.
+   <br>
+   (GK 2007/08/22)
+   </p>
+
+  <li> <p>Changed: Implementing gradients for the class <code>FunctionDerivative</code>, it became evident that its enums for
+  difference formulas clashed with those of <code>AutoDerivativeFunction</code>. Therefore, only the latter
+  survived.
+  <br>
+  (GK 2007/08/02)
+  </p>
+
+  <li> <p>Changed: When new multigrid transfer classes were introduced,
+  the existing class <code>MGTransferSelect</code> was
+  moved to the new header file
+  <code>multigrid/mg_transfer_component</code>. It also received a new
+  base class <code>MGTransferComponent</code>.
+  <br>
+  (GK 2007/04/26)
+  </p>
+
+  <li> <p>
+       Changed: Everything that is part of the deal.II library has
+       been moved into a namespace <code>dealii</code>. To allow your
+       code to compile, you will have to either put a <code>using
+       namespace dealii;</code> declaration at the top of each file,
+       or qualify each deal.II class and function explicitly with
+       <code>dealii::</code> namespace access.
+       <br> 
+       (WB 2006/10/22)
+       </p>
+
+  <li> <p>
+       Changed: Indices, such as vertex indices, are usually represented by
+       unsigned integers in deal.II. One place where we didn't do this was in
+       the <code>CellData</code> structure that can be used to describe cells
+       when building a triangulation. This has now been rectified. 
+       <br> 
+       (WB 2006/09/06)
+       </p>
+
+  <li> <p>
+       Changed: Lower dimensional objects have been removed from the
+       hierarchical structure of levels in <code>TriaLevel</code>. Faces, i.e. lines in 2D and
+       lines and quads in 3D, have no associated level
+       anymore. Therefore, the level argument of some iterator
+       functions have been removed. The affected functions are <code>Triangulation::begin_raw_face</code>, <code>begin_face</code>, <code>begin_active_face</code> and all <code>last_*_face</code> or <code>end_*_face</code> functions, no matter whether
+       raw, used or active. Also effected are the direct iterator
+       functions which are related to faces like <code>begin_line</code> in 2D and 3D or <code>begin_quad</code> in 3D. Again, the same applies
+       to <code>last_*</code> and <code>end_*</code>.
+       <br>
+       The respective functions in <code>DoFHandler</code>, <code>hp::DoFHandler</code> and <code>MGDoFHandler</code> have been changed
+       accordingly.
+       <br>
+       Nested loops with an outer loop over all levels and an inner
+       loop over all faces on that level have to be changed to a
+       single loop over all faces. In most cases, the necessary
+       changes should be internal to the library.
+       <br>
+       (Tobias Leicht, 2006/06/13)
+       </p>
+
+  <li> <p>
+       Changed: In order to facilitate the implementation of hp finite
+       elements, the ordering of boundary DoFs returned by <code>DoFTools::map_dof_to_boundary_indices</code> has been
+       changed. Fortunately, this is a rarely used function so that the effect
+       should be limited to only a few programs.
+       <br> 
+       (WB 2006/04/26)
+       </p>
+
+  <li> <p>
+       Changed: The <code>distribute_local_to_global</code> functions have
+       been moved from the classes implementing accessors to arbitrary
+       objects in the triangulation to the cell accessors, to
+       facilitate the implementation of hp methods. That means
+       that you can't call these functions for face or edge iterators
+       any more, but only cells. Since this is what is usually
+       desired, this should not be too severe a restriction.
+       <br> 
+       (WB 2006/04/26)
+       </p>
+
+  <li> <p>
+       Changed: The template argument of the <code>InterGridMap</code> class has been changed. Code
+       like e.g. <code>InterGridMap<DoFHandler,2></code> must be
+       replaced by <code>InterGridMap<DoFHandler<2> ></code>.
+       <br> 
+       (RH 2006/02/27)
+       </p>
+
+  <li> <p>
+       Changed: The <code>DoFObjectAccessor</code> and derived classes
+       used to take only the space dimension and the dimension of the
+       object they represented as template arguments. Now the later
+       argument is replaced by the type of DoFHandler they correspond
+       to. This may be <code>::DoFHandler</code> or
+       <code>hp::DoFHandler</code>.
+       <br> 
+       (RH 2006/02/10, WB 2005/12/20)
+       </p>
+
+  <li> <p>
+       Removed: Support for gcc2.95 in particular, and all compilers that do
+       not support the <code>std::ostringstream</code> class in general,
+       has been removed.
+       <br> 
+       (WB 2006/02/03)
+       </p>
+
+  <li> <p>
+       Changed: Several classes related to the storage of data in the
+       Triangulation and DoFHandler classes have been moved into internal
+       namespaces, and some have also been removed. Since these were not for
+       use anywhere outside the library, it is unlikely that this poses
+       problems to application programs. On the other hand, it moves common
+       names like <code>Line</code> and <code>Quad</code> out of the global
+       namespace.
+       <br> 
+       (WB 2006/01/13)
+       </p>
+
+  <li> <p>
+       Changed: Previously, if <code>mg_dof_handler</code> was a
+       multilevel DoF handler object, calling
+       <code>DoFRenumbering::component_wise(mg_dof_handler)</code>)
+       didn't quite do what was probably intended: it did an implicit
+       cast of <code>mg_dof_handler</code> to its base class,
+       <code>DoFHandler</code> and then renumbered the <em>global</em>
+       degrees of freedom, but didn't touch the multilevel part. This
+       has now been fixed: there is another function that takes a
+       <code>MGDoFHandler</code> object and that renumbers the global
+       part as well as all levels of the multigrid part. The change is
+       incompatible since the previous call now leads to a different
+       action; if you really want the old behavior back, cast the
+       argument to the base class before the call, like so:
+       <code>DoFRenumbering::component_wise(static_cast<DoFHandler<dim>&>(mg_dof_handler))</code>)
+       <br> 
+       (WB 2005/12/15)
+       </p>
+
+  <li> <p>
+       Changed: The internal numbering of faces, lines and vertices
+       has been reimplemented. Now the numbering scheme uses a
+       lexicographic ordering (with x running fastest) wherever
+       possible. For more information on the new numbering scheme, see
+       the <a
+       href="http://ganymed.iwr.uni-heidelberg.de/pipermail/dealii/2005/000827.html">announcement</a>
+       on the mailing list.  <br> The ordering of vertices in <code>CellData</code> given to the <code>Triangulation::create_triangulation</code>
+       function has been changed accordingly. For backward
+       compatibility there is now a new <code>Triangulation::create_triangulation_compatibility</code>
+       function which takes <code>CellData</code> in the
+       old vertex ordering. However, as this function might not be
+       supported forever users are advised to change their code to the
+       new numbering scheme and to use the <code>Triangulation::create_triangulation</code>
+       function.  
+       <br> 
+       (RH 2005/11/02)
+       </p>
+</ol>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+
+  <li> <p>Improved: Tests for libraries in configure can handle --without now.
+  <br>
+  (GK 2007/08/22)
+  </p>
+
+  <li> <p>Updated: The version of UMFPACK bundled with deal.II has been updated
+       to release 5.1.0.
+       <br>
+       (WB 2007/08/15)
+       </p>
+
+  <li> <p>New: The step-27 tutorial program has been added. It solves a Laplace
+          equation with hp finite elements 
+	  and shows how to set finite element degrees, assemble matrices on
+	  cells with varying polynomial degrees, and how to compute a simple
+	  criterion for estimating the local Sobolev smoothness of a function.
+       <br>
+       (WB 2007/08/09)
+       </p>
+
+  <li> <p>
+       Improved: The table of contents of tutorial programs now also shows
+       subsections of introduction and results.
+       <br>
+       (WB 2007/08/05)
+       </p>
+
+  <li> <p>
+       Extended: Up to now, in 3D only 'orientable' meshes could be used in
+       deal.II, i.e. all lines are in standard orientation and the faces can be
+       either in standard orientation or with a reversed normal. In order to
+       enable the treatment of all purely hexahedral meshes, this interface
+       (only the boolean flag <tt>face_orientation</tt> so far) has
+       been extended by new flags <tt>face_flip</tt> and <tt>face_rotation</tt>,
+       denoting a face which is rotated against the 'standard face' by 180 and 90
+       degrees, respectively. Furthermore, there is a new flag
+       <tt>line_orientation</tt> with obvious meaning. 
+		 <br> These flags have to be respected during creation and refinement of a
+		 triangulation, when projecting quadrature points to faces or when the dof
+		 indices on a cell are extracted. Furthermore, asking for vertices and
+		 lines of cells is in some cases a bit more complicated. The same applies,
+       for example, 
+		 to the extraction of the information, which child of a neighbor is behind
+		 a given subface. However, this information is supplied by various
+		 functions in <code>GeometryInfo</code>. As a rule-of-thumb:
+		 if you want to use non-standard meshes, all occurrences of
+		 <tt>face_orientation</tt> have to be supplemented by <tt>face_flip</tt>
+		 and <tt>face_rotation</tt>.
+       <br> In order to reduce the impact of possible bugs, the grid is still given to
+       <code>GridReordering</code>, which now returns the original
+       connectivity if the reordering was not possible.
+       <br> A more detailed document concerning these topics will be available
+       in the future.
+		 <br>
+       (Tobias Leicht 2007/02/09)
+       </p>
+
+  <li> <p>
+       New: a program <tt>reconfigure</tt> has been added to the
+       deal.II main directory, which reruns <tt>configure</tt> with
+       the sam command line arguments as last time.
+       <br>
+       (GK 2006/10/24)
+       </p>
+
+  <li> <p>New: The step-25 tutorial program, demonstrating the
+       solution of the nonlinear sine-Gordon equation (a variant of
+       the time dependent wave equation) has been added.
+       <br>
+       (WB 2006/11/05)
+       </p>
+
+  <li> <p>New: The step-21 tutorial program is ready. It considers time
+       dependent two-phase flow through a porous medium.
+       <br>
+       (WB 2006/10/30)
+       </p>
+
+  <li> <p>New: The step-23/step-24 tutorial programs have been added,
+       discussing how to solve the wave equation.
+       <br>
+       (WB 2006/10/03)
+       </p>
+
+  <li> <p>Fixed: when building the library in parallel, using <code>make
+        -jN</code> (most often with rather large values of <code>N</code>, for
+	example 8), it could sometimes happen that the same file is compiled
+	twice at exactly the same time. This leads to invalid object files and
+	libraries that would contain illegal instructions. This is now fixed.
+	<br>
+	(WB 2006/08/09)
+	</p> 
+
+  <li> <p> Changed: The default number of subdivisions in the <code>build_patches</code> functions of the DataOut classes is
+       now part of the parameters read by <code>parse_parameters</code>. This default value is used
+       whenever patches with zero subdivisions are being built.
+       <br>
+       (GK 2006/06/18)
+       </p>
+
+  <li> <p>New: a configuration parameter <tt>--with-boost</tt> has been
+	introduced, allowing the selection of a previously installed boost
+	library instead of the contibuted.
+	<br>
+	(GK 2006/03/23)
+	</p> 
+
+  <li> <p>New: The step-20 tutorial program was added. It shows how to use
+       Raviart-Thomas elements to solve a mixed formulation of the Laplace
+       equation. It also demonstrates the use of block matrices and vectors,
+       and how they can be used to define more complex solvers and
+       preconditioners working on the Schur complement.
+       <br>
+       (WB 2006/02/12)
+       </p>
+
+  <li> <p>
+       Updated: The <code>deal.II/contrib/boost</code> library has
+       been updated to boost version 1.33.1.
+       <br>
+       (RH 2006/02/08)
+       </p>
+
+  <li> <p>
+       Improved: Documentation has been greatly enhanced, both in the
+       API documentation as well as in the tutorial programs. In
+       particular, the step-1 through step-8 tutorial programs have
+       been overhauled and in many cases the documentation has been
+       rewritten in large parts. The presentation of tutorial programs
+       and reference manual should also be nicer and better readable
+       in many cases.
+       <br>
+       (WB 2006/02/07)
+       </p>
+
+  <li> <p>
+       Changed: Previously, the UMFPACK code was linked into its own
+       library, <code>liblac_umfpack.*</code>. Instead, it is now
+       directly linked into the <code>liblac.*</code> libraries. This
+       makes further linking simpler than before and also simplified
+       the link process.
+       <br>
+       (WB 2006/01/19)
+       </p>
+
+  <li> <p>
+       Improved: The order of quadrature points is now x fastest, z slowest, as
+       well as the vertices of cells. Also, the nodes and subcells of patches
+       in DataOutBase follow the same ordering. Output code could be simplified
+       a lot, saving 900 lines of code.
+       <br>
+       (GK 2006/01/18)
+       </p>
+
+  <li> <p>
+       Improved: Documentation of tutorial program is now run through
+       Doxygen. This allows cross-referencing the tutorial programs to
+       the API manual (and the other way round) and leads to a
+       generally much nicer output. In particular, formulas embedded
+       in the documentation of the programs are now properly displayed.
+       <br> 
+       (WB 2006/01/16)
+       </p>
+
+  <li> <p>
+       Improved: The link to the manual now points directly to
+       Doxygen's module list, and almost all classes are now grouped
+       into modules that capture related functionality.
+       <br> 
+       (WB 2006/01/16)
+       </p>
+
+  <li> <p>
+       Improved: If you do have a PETSc installation
+       and have set the <code>PETSC_DIR</code> and
+       <code>PETSC_ARCH</code> environment variables but do not wish
+       deal.II to be configured for PETSc use, you
+       should specify <code>--with-petsc=no</code> as a flag during
+       configuration. 
+       <br> 
+       (WB 2006/01/16)
+       </p>
+
+  <li> <p>
+       Improved: In all directories, including those for tutorial
+       programs, we generate a file called <code>Makefile.dep</code>
+       that lists the dependencies of object files on source files,
+       and that the <code>make</code> uses to determine which files to
+       recompile. If the generation of this file failed (for example
+       if <code>LD_LIBRARY_PATH</code> hadn't been set correctly), a
+       number of very strange things would happen, among others the
+       attempt to link an empty object file. This was almost
+       impossible to figure out if you didn't know what was going
+       on. This mechanism has now been robustified and should yield
+       better error messages.
+       <br> 
+       (WB 2005/11/23)
+       </p>
+
+  <li> <p>
+       New: The dynamic libraries, which is the name of shared libs
+       under Apples OSX, are now supported on these platforms and
+       enabled by default. They should safe a lot of harddisk space.
+       <br> 
+       (Oliver Kayser-Herold 2005/10/20)
+       </p>
+
+  <li> <p>
+       Fixed: The <tt>Tecplot</tt> library (<tt>tecio.a</tt>) was
+       detected but not added to <tt>LIBS</tt>. This is now fixed.
+       <br> 
+       (RH 2005/09/23)
+       </p>
+
+  <li> <p>
+       New: <tt>configure</tt> will automatically detect a
+       <tt>NetCDF</tt> installation, when its path is given by the
+       <tt>NETCDF_DIR</tt> environment variable. The path of the
+       <tt>NetCDF</tt> installation directory can also be specified
+       through the <tt>--with-netcdf=/path/to/netcdf</tt> configure
+       option.
+       <br> 
+       (RH 2005/09/23)
+       </p>
+
+  <li> <p>
+       Fixed: The PETSc libraries have been relinked each time
+       <tt>make</tt> was called. Now, the PETSc libraries will only be
+       relinked if necessary.
+       <br> 
+       (RH 2005/09/15)
+       </p>
+
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+
+  <li> <p> New: There is a new class:
+       <code>Functions::FEFieldFunction</code> which is a Function
+       interface to a finite element solution. 
+       <br> 
+       (Luca Heltai 2007/08/29)
+
+  
+  <li> <p> Improved: <code>FunctionDerivative</code> is now
+  derived from <code>AutoDerivativeFunction</code> and implements
+  gradients as well, giving you automatic second derivatives of a function.
+  <br>
+  (GK 2007/08/02)
+  </p>
+
+  <li> <p> New: The function <code>Utilities::fixed_power<n>(q)</code>
+       calculates <code>q</code> to the power of <code>n</code> where
+       <code>n</code> is a constant known at compile time. It allows to
+       calculate powers efficiently at compile time, most often things like a
+       number to the power <code>dim</code>.
+       <br> 
+       (WB 2007/06/23)
+       </p>
+
+  <li> <p> New: The deal.II intermediate format has been
+       changed. Since files in this format are only meant to be
+       processed by exactly the same version of deal.II, this should
+       be of no concern to users. However, this restriction was
+       previously undocumented, even if implied. The documentation for
+       this has now been updated.
+       <br> 
+       (WB 2007/02/08)
+       </p>
+
+  <li> <p> New: The new <code>Functions::Monomial</code> class implements
+       monomials as a function object.
+       <br> 
+       (WB 2006/12/15)
+       </p>
+
+  <li> <p> Fixed: If no substring is found with a width smaller than the given
+       threshold, the <code>Utilities::break_text_into_lines</code> function now
+       returns the smallest possible substring (larger than the threshold).
+       <br> 
+       (Tobias Leicht 2006/12/13)
+       </p>
+
+  <li> <p> Changed: We used to set preprocessor variables
+       <code>PACKAGE_NAME</code>, <code>PACKAGE_TARNAME</code>,
+       <code>PACKAGE_VERSION</code>, <code>PACKAGE_STRING</code>,
+       <code>PACKAGE_BUGREPORT</code> in the file
+       <code><base/config.h></code>. These variables were automatically
+       set by the autoconf package, with which we generate the
+       <code>./configure</code> script. However, these variables would conflict
+       if someone were to use a different package that also uses autoconf,
+       since that package would have the same variables, just set to different
+       values. The preprocessor warns about these cases.
+       <br>
+       We now avoid this conflict by not using these names any more. Instead,
+       we use the preprocessor names as above, prefixed with
+       <code>DEAL_II_</code>.
+       <br>
+       (WB 2006/11/10)
+       </p>
+
+  <li> <p> Extended: The <code>QGaussLobatto</code> quadrature
+       rule computes Legendre-Gauss-Lobatto nodes and quadrature weights.
+       <br>
+       (Florian Prill 2006/11/02)
+       </p>
+
+  <li> <p> Extended: The <code>contract</code> function family contracts two
+       tensors. There is now a new version of this function, that contracts a
+       tensor of rank three with a second one of rank two over given indices
+       <tt>index1</tt> and <tt>index2</tt> of the first and second tensor,
+       respectively, resulting in a tensor of rank three. 
+       <br>
+       (Tobias Leicht 2006/09/27)
+       </p>
+
+  <li> <p> Fixed: The
+       <code>DataOutBase::write_deal_II_intermediate</code> function
+       did not check whether it can actually write to the stream
+       given. It would therefore silently do nothing if the file could
+       not be written to. This is now fixed: an exception is generated
+       in that case, as for all the other output functions in that class.
+       <br>
+       (WB 2006/08/31)
+       </p>
+
+  <li> <p> Improved: Stack backtraces in multithreaded mode were often
+       very long and almost unreadable because the functions in
+       namespace <code>Threads</code> that are used to set up new
+       threads have long and awkward signatures. The output is now
+       filtered to make these backtraces easier to read.
+       <br>
+       (WB 2006/08/15)
+       </p>
+
+  <li> <p> New: The second argument to <code>Utilities::int_to_string</code>
+       can now be omitted, leading to a string that isn't zero padded at all.
+       <br>
+       (WB 2006/08/02)
+       </p>
+
+  <li> <p> Changed: When there is still a <code>SmartPointer</code> object
+       pointing to another object at the time it is destroyed, this would cause
+       the program to be aborted. However, there are cases where this is not
+       desirable, for example here:
+       <pre>
+       <code>
+          void f() 
+          {
+	    Triangulation tria;
+	    DoFHandler *dh = new DoFHandler(tria);
+	    ...some function that throws an exception
+          }
+       </code>
+       </pre>
+       When the exception is thrown but not caught, the two local objects are
+       destroyed in reverse order of their construction, i.e. first the pointer
+       then the triangulation. However, only the pointer, not the
+       <code>DoFHandler</code> pointed to is destroyed, triggering the abort in
+       the triangulation since there is still the <code>DoFHandler</code>
+       object pointing to it at the time of destruction of the
+       triangulation. In such cases, one would not want to see the program
+       aborted, since then one would never learn about the actual exception
+       being thrown.
+       <br>
+       The behavior of the <code>Subscriptor</code> class as therefore been
+       changed to not abort the program any more if an exception is being
+       handled at the moment. Rather, only an error message is shown on
+       <code>std::cerr</code>.
+       <br>
+       (WB 2006/08/01)
+       </p>
+
+  <li> <p> Fixed: The <code>TableHandler::write_tex</code>
+       accidentally took a parameter of type
+       <code>std::ofstream</code> instead of <code>std::ostream</code>
+       and could therefore only write to files, not to other kinds of
+       streams. This is now fixed.
+       <br>
+       (WB 2006/07/28)
+       </p>
+
+  <li> <p> New: <code>GeometryInfo</code> offers several new
+       functions, <code>is_inside_unit_cell</code> with an epsilon parameter to
+       specify allowable offsets from the actual unit cell,
+       <code>distance_to_unit_cell</code> returning the infinity norm of the
+       distance of a given point to the unit cell, and
+       <code>project_to_unit_cell</code> returning the projection of a point
+       onto the unit cell. Also, a new member <code>vertex_to_face</code> allow
+       to determine to which faces of a cell a vertex belongs. 
+       <br>
+       (Ralf B. Schulz 2006/05/10)
+       </p>
+
+  <li> <p> Improved: <code>DataOutBase</code>::<code>OutputFormat</code> has a new value <tt>none</tt>,
+       writing no output at all. This way, the writing of output files can be
+       controlled more easily from parameter files.
+       <br>
+       (GK 2006/04/14)
+       </p>
+
+  <li> <p> Improved: <code>VectorSlice</code> has new functions
+       <code>begin()</code> and <code>end()</code>,
+       returning the corresponding vector iterators.
+       <br>
+       (GK 2006/03/31)
+       </p>
+
+  <li> <p> New: The various tensor classes can now effectively be reset to zero
+       by simply writing <code>t=0;</code> as has long been allowed for
+       matrices and vectors.
+       <br>
+       (WB 2006/03/24)
+       </p>
+
+  <li> <p> New: The new <code>deal_II_numbers::is_finite</code> function can
+       be used to check whether a floating point number is finite,
+       i.e. neither plus or minus infinite nor NaN (not a number);
+       it is in the new include file <tt>base/numbers.h</tt>, which will
+       contain non-configured inline functions of this type.
+       <br>
+       (WB 2006/03/15) (GK 2006/03/22)
+       </p>
+
+  <li> <p> Improved: If the C++ runtime environment allows it, we now
+       demangle stacktraces generated whenever an exception is thrown.
+       <br>
+       (WB 2006/03/14)
+       </p>
+
+  <li> <p> Improved: The function <code>Subscriptor</code>::<code>list_subscribers</code> logs all current subscribers of an
+       object to <tt>deallog</tt>.
+       <br>
+       (GK 2006/03/08)
+       </p>
+
+  <li> <p>Fixed: Writing a denormal "NaN" through <code>LogStream</code> objects such as 
+       <code>deallog</code> erroneously printed zero, rather
+       than "nan". This is now fixed.
+       <br>
+       (Luca Heltai, WB 2006/03/07)
+       </p>
+
+  <li> <p>Improved: The <code>TableBase</code> base class of all
+       the <code>Table</code> classes had an
+       <code>>operator()</code> that takes a <code>TableIndices</code> object
+       that represents the entire set of indices at once. However, due to C++
+       name lookup rules, this operator wasn't accessible through the
+       <code>Table</code> class without explicitly specifying the base class
+       name. This is now fixed by also providing a similar
+       <code>>operator()</code> in the derived classes.
+       <br>
+       (WB 2006/02/21)
+       </p>
+
+  <li> <p>Fixed: The <code>QGauss</code> constructor
+       hangs on x86 linux systems when deal.II is run from inside
+       MATLAB. The reason is that while the processor offers long
+       double accuracy, MATLAB switches off support for that inside
+       the CPU. It therefore leaves codes that expect the additional
+       accuracy high and dry. Annoyingly, these codes of course run
+       just fine outside of MATLAB's environment. This behavior leads
+       to an infinite loop inside the QGauss constructor, where we
+       want to use the additional accuracy to compute quadrature
+       points to high precision. To avoid the infinite loop, we now
+       check during runtime whether the extra precision is available,
+       not only at compile time.
+       <br>
+       (Christian Kamm, WB 2006/02/21)
+       </p>
+
+  <li> <p>Fixed: The <code>ParameterHandler::get_integer</code>
+       had an erroneous check that the value given for a parameter really
+       represented an integer. This check always succeeded, even in face of an
+       error, and a zero value was returned every time the value in the
+       parameter file happened to be not an integer.
+       <br>
+       (WB 2006/02/17)
+       </p>
+
+  <li> <p>Improved: The <code>ComponentSelect</code> class can
+       now also handle the case that one wants to select multiple vector
+       components at once, not only a single one.
+       <br>
+       (WB 2006/02/12)
+       </p>
+
+  <li> <p>Improved: A new function <code>TableBase</code>::<code>fill</code>, filling the whole table with the same
+       element has been introduced.
+       <br>
+       (GK 2006/01/18)
+       </p>
+
+  <li> <p>Improved: <code>DataOutBase</code> now writes binary
+       files for OpenDX.
+       <br>
+       (GK 2006/01/18)
+       </p>
+
+  <li> <p>
+       New: There are now functions
+       <code>ParameterHandler::set</code> that allow to set the value of a
+       parameter to something different later on.
+       <br>
+       (Marc Secanell, WB 2006/1/2)
+       </p>
+
+  <li> <p>
+       New: There are new functions
+       <code>Utilities::match_at_string_start</code> and 
+       <code>Utilities::get_integer_at_position</code>.
+       <br>
+       (WB 2005/12/19)
+       </p>
+
+  <li> <p>
+       Fixed: The computation of <code>HMIN</code> and
+       <code>HMAX</code> in <code>DataOutBase::write_povray</code> has been
+       wrong. This is now fixed.
+       <br>
+       (RH 2005/11/02)
+       </p>
+
+  <li> <p>
+       Fixed: <code>DataOutBase<2,3>::write_tecplot_binary</code>
+       did not write the <code>z</code> coordinates. This is now
+       fixed.
+       <br>
+       (RH 2005/09/28)
+       </p>
+
+  <li> <p>
+       Fixed: The <tt>tecplot_binary</tt> <code>OutputFormat</code> has been added to <code>DataOutBase::get_output_format_names</code>. Now
+       an exception will be raised if <code>write_tecplot_binary</code> is invoked without
+       specifying the filename through the <code>DataOutBase::TecplotFlags</code> interface.
+       <br> 
+       (RH 2005/09/23)
+       </p>
+
+  <li> <p>
+       New: There are now new <code>get_n_mpi_processes</code> and <code>get_this_mpi_process</code> functions in the
+       <code>Utilities::System</code> namespace. In case
+       of a code not running in parallel, they simply return 1 and 0,
+       respectively.
+       <br> 
+       (RH 2005/09/20)
+       </p>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p>Changed: The <code>CompressedBlockSparsityPattern</code> has been renamed to
+       <code>BlockCompressedSparsityPattern</code> to be
+       consistent, since the "block" part builds on the "compressed sparsity
+       pattern", not the other way around. The old name remains as a typedef,
+       but its use is deprecated.
+       <br>
+       (WB 2007/06/17)
+       </p>
+
+  <li> <p>New: The <code>CompressedSetSparsityPattern</code>
+       class is an alternative to the <code>CompressedSparsityPattern</code> class that appears to be
+       better suited for problems that have many entries per row. There is also
+       a block version, <code>BlockCompressedSetSparsityPattern</code>. The two new
+       classes can used in all places where a regular compressed sparsity
+       pattern can also be used.
+       <br>
+       (Oliver Kayser-Herold, WB 2007/06/03)
+       </p>
+
+  <li> <p>Fixed: The <code>SolverBicgstab</code> class
+       did not handle hitting on the solution early very
+       gracefully (it threw an exception). This is now fixed.
+       <br>
+       (Roger Young 2007/03/07)
+       </p>
+
+  <li> <p>Fixed: The <code>SparseDirectMA27</code> class allows
+       to run the sparse direct solver as a separate program (the
+       <code>detached_ma27</code> program) rather than as part of the current
+       program in order to be able to run several instances of it in parallel
+       during multithreaded computations. However, the destructor of the <code>SparseDirectMA27</code> class had a bug that when using
+       this detached mode led to a segmentation fault. This is now fixed.
+       <br>
+       (WB 2007/02/09)
+       </p>
+
+  <li> <p>Improved: A simple <code>print_formatted</code> 
+       function has been added to <code>SparseMatrixEZ</code>.
+       <br>
+       (Moritz Allmaras 2007/02/08)
+       </p>
+
+  <li> <p>Fixed: The <code>SparseDirectMA27</code> 
+       class works for symmetric matrices and had a check to make sure the
+       matrix is indeed symmetric. However, the check compared entries for
+       equality, something one should not do for floating point numbers. It now
+       checks that symmetric entries are equal up to a small relative number.
+       <br>
+       (WB 2006/12/21)
+       </p>
+
+  <li> <p>Fixed: The <code>FullMatrix::invert</code> 
+       function would return wrong results if one tried to invert a
+       matrix of dimension smaller than 5 in situ. This is now fixed.
+       <br>
+       (Florian Prill 2006/12/18)
+       </p>
+
+  <li> <p>Improved: The <code>SparsityPattern</code> class would produce
+       segmentation faults if one tried to allocate a matrix with more
+       than about 4.2 billion elements (i.e. the number that one can
+       store in a 32-bit unsigned integer). This is now fixed: if you
+       have enough memory, this is now possible on 64-bit
+       systems. (The number of rows is still limited by the 32-bit
+       limit, but this is probably going to be enough for some time to
+       come.) This fix also required changing the return type of the
+       <code>SparsityPattern::get_rowstart_indices</code> function
+       from <code>const unsigned int *</code> to <code>const
+       std::size_t *</code>; since this function should not be used
+       outside of the library anyway, this is probably not a change
+       that affects user code.
+       <br>
+       (WB 2006/12/14)
+       </p>
+
+  <li> <p>New: The class <code>PointerMatrixVector</code> implements the functions
+       <code>vmult</code> and <code>Tvmult</code> for a <i>1xn</i>-matrix represented by
+       a single vector.
+       <br>
+       (GK 2006/09/17)
+       </p>
+
+  <li> <p>Improved: The class <code>SolverCG</code> can
+       now estimate the condition number of the linear system using <code>TridiagonalMatrix</code> and LAPACK.
+       <br>
+       (GK 2006/09/06)
+       </p>  
+
+  <li> <p>New: the class <code>TridiagonalMatrix</code>
+       has been introduced together with its basic methods, including an
+       eigenvalue solver for the symmetric case (LAPACK).
+       <br>
+       (GK 2006/09/06)
+       </p>
+
+  <li> <p> Improved: PETSc 2.3.1 (<i>build 16</i>) is now supported by the
+       linear preconditioner classes. The new PETSc functions 
+       <code>PCFactorSetXXX</code> are used.<br>       
+       (Florian Prill, 2006/08/04)
+       </p>
+
+  <li> <p>New: The class <code>TransposeMatrix</code>
+       modeled after <code>PointerMatrix</code> swaps the
+       <code>vmult</code> functions such that its effect is
+       the transpose of the matrix it points to.
+       <br>
+       (GK 2006/07/07)
+       </p>  
+
+  <li> <p>New: There is now a function <code>FullMatrix::trace</code> that does what its name suggests
+       it does.
+       <br>
+       (WB 2006/06/16)
+       </p>
+  
+  <li> <p>Improved: <code>PointerMatrixAux</code> now has a
+       default constructor and a function for setting the <code>VectorMemory</code> object.
+       <br>
+       (GK 2006/06/14)
+       </p>
+  
+  <li> <p>Fixed: <code>FullMatrix::print</code> would yield a link error
+       when used with <code>std::ofstream</code> since an explicit
+       instantiation was missing. The function has now been moved to
+       the header file and made inline so that it is always visible,
+       whatever the type of the output stream is.
+       <br> 
+       (WB 2006/06/07)
+       </p>
+  
+  <li> <p> Improved: The <code>SparseDirectUMFPACK</code> solver can now also be
+       used with sparse matrices with elements of type float, as well
+       as with block matrices with types float and double.
+       <br> 
+       (WB 2006/04/25)
+       </p>
+
+  <li> <p> New: The function <code>BlockSparsityPattern::row_length</code> adds up
+       the row lengths of the individual blocks of a block matrix for
+       a given row.
+       <br> 
+       (WB 2006/04/25)
+       </p>
+  
+  <li> <p> New: There is a new class <code>IdentityMatrix</code> that represents an
+       efficient version of the matrix with ones on the diagonal and
+       zeros everywhere else. The main usefulness of this matrix lies
+       in the fact that most other important matrix classes have
+       assignment operators and copy constructors that allow to
+       conveniently fill a (sparse, full) matrix with the identity
+       matrix.
+       <br> 
+       (WB 2006/04/24)
+       </p>
+  
+  <li> <p> New: There are now assignment operators from <code>BlockVector</code> to <code>Vector</code> and back.
+       <br>
+       (WB 2006/03/30)
+       </p>
+
+  <li> <p> Improved: <code>BlockSparsityPattern</code> can be
+       initialized directly using the vector generated by
+       <code>DoFTools</code>::<code>compute_row_length_vector</code>.
+       <br>
+       (GK 2006/03/30)
+       </p>
+
+  <li> <p>
+       Improved: All matrix (and some vector) classes now check whether
+       entries written into them represent finite floating point
+       values. This should catch some bugs earlier where one writes
+       infinite or NaN values into a matrix only to realize later that
+       the linear solver fails.
+       <br>
+       (Stephan Kramer, WB 2006/03/15)
+       </p>  
+
+  <li> <p>
+       Changed: There are new <code>FullMatrix</code>::<code>equ</code> functions which assign <tt>this</tt>
+       matrix to the linear combination of one, two or three
+       matrices. Also there is now a new <code>Vector</code>::<code>equ</code>
+       function for three vectors.
+       <br>
+       (RH 2006/02/21)
+       </p>  
+
+  <li> <p>
+       Fixed: The <code>SolverMinRes</code> class did not work
+       with the <code>BlockVector</code> class.
+       This is now fixed.
+       <br>
+       (Luca Heltai 2006/02/19)
+       </p>  
+
+  <li> <p>
+       Changed: There are now new <code>FullMatrix</code>::<code>add</code> functions which add two and three
+       scaled matrices.
+       <br>
+       (RH 2006/02/16)
+       </p>  
+
+  <li> <p>
+       Changed: The matrix classes <code>FullMatrix</code>, <code>SparseMatrix</code>, <code>SparseMatrixEZ</code> and <code>BlockMatrixBase</code> now have an <code>add</code> function analogous to the <code>add</code> of vector classes. The old <code>add_scaled</code> functions are now deprecated.
+       <br>
+       (RH 2006/02/16)
+       </p>  
+
+  <li> <p>       
+       Improved: <code>BlockMatrixArray</code>::<code>enter_aux</code> allows using matrices without
+       adding vector multiplication by using <code>PointerMatrixAux</code>.
+       <br>
+       (GK 2006/02/02)
+       </p>  
+
+  <li> <p>
+       New: The class <code>PointerMatrixAux</code> was
+       introduced for use with matrix classes lacking the adding
+       vector multiplication functions. It implements these by using
+       its own auxiliary vector.
+       <br>
+       (GK 2006/02/02)
+       </p>
+
+  <li> <p>
+       Improved: The <code>FilteredMatrix</code> class
+       was able to filter only the <code>SparseMatrix</code>
+       matrix class.
+       A new (faster) version is now in its place with the same interface 
+       that is able to perform such a filter on anything that provides 
+       the usual matrix vector operations.
+       <br> 
+       (Luca Heltai 2006/01/09)
+       </p>
+
+  <li> <p>
+       New: All solver classes took an argument of type <code>VectorMemory</code> which they use to allocate
+       memory for temporary vectors. A clever implementation of this
+       class might allow reusing temporary vectors, and thus reduce
+       the overhead of repeatedly allocating and freeing
+       memory. However, almost all instances of using these classes
+       used the <code>PrimitiveVectorMemory</code>
+       class. The solver class have now been changed so that if one
+       omits the respective argument, they fall back to using such a
+       memory object. Also, all example programs that did not
+       specifically use a different memory allocation class, have been
+       changed to not specify anything at all, and thus fall back to
+       the default.
+       <br> 
+       (WB 2005/11/23)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>SparseMatrix</code> iterators had a
+       problem when one wrote code like <code>iterator->value()=0</code>
+       (i.e. with a zero integer, rather than a floating point number on the
+       right), in that that opened up a second conversion sequence and the
+       compiler complained about an ambiguity. This is now fixed.
+       <br> 
+       (WB 2005/10/17)
+       </p>
+
+  <li> <p>
+       New: Now the class <code>SparseILU</code> supports
+       also the <code>Tvmult</code> method. This feature
+       allows the straightforward use of <code>SparseILU</code>
+       as a multigrid smoother within the framework.
+       <br> 
+       (Oliver Kayser-Herold 2005/09/15)
+       </p>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li> <p> New: There is a new class:
+       <code>Functions::ParsedFunction</code> which is friendly
+       wrapper to the <code>FunctionParser</code> class.
+       <br> 
+       (Luca Heltai 2007/08/29)
+  
+   <li> <p>Fixed: the function
+       <code>DataOut::build_patches</code> 
+       had a quadratic algorithm when generatic cell-data (as opposed
+       to DoF data). This algorithm became a bottleneck when
+       generating output on meshes with large number of cells. This is
+       now fixed.
+       <br>
+       (WB 2007/08/28)
+       </p>
+
+   <li> <p>New: the function
+       <code>DoFTools::get_active_fe_indices</code> 
+       extracts for each cell the active finite element index used on it.
+       <br>
+       (WB 2007/08/07)
+       </p>
+
+   <li> <p>Extended: the function
+       <code>Triangulation::copy_triangulation</code> 
+       copies all members of a triangulation except for the list of
+       <code>RefinementListener</code>s. In most cases this is exactly the
+       intended behavior. However, if a RefinementListener should be copied to
+       the new triangulation, e.g. if a Persistent Triangulation is created from
+       an ordinary one, it can do so now through implementing the new
+       function <code>RefinementListener::copy_notification</code>.
+       <br>
+       (Tobias Leicht 2007/06/05)
+       </p>
+
+   <li> <p>New: the function
+       <code>DoFTools::make_sparsity_pattern</code> 
+       now takes an optional constraint matrix argument that can be used to
+       avoid the subsequent call to condense the sparsity pattern. In effect,
+       the sparsity pattern is generated condensed right away. For problems in
+       3d and with many constraints, this is many times faster than separate
+       creation and condensation.
+       <br>
+       (WB 2007/06/03)
+       </p>
+
+   <li> <p>New: the new function
+       <code>ConstraintMatrix::add_entries_local_to_global</code> 
+       can be used to add entries to a matrix where entries that will appear if
+       the current entry corresponds to a constrained degree of freedom are
+       also added.
+       <br>
+       (WB 2007/06/03)
+       </p>
+
+   <li> <p>Fixed: the function
+       <code>GridTools::find_cells_adjacent_to_vertex</code> 
+       was not detecting properly the coarse cells adjacent to
+       refined cells.
+       <br>
+       (Luca Heltai 2007/05/15)
+       </p>
+
+   <li> <p>Fixed: the two tools
+       <code>DoFTools::count_dofs_per_component</code> and
+       <code>DoFTools::count_dofs_per_block</code> where changing the
+       size of the destination vector. Consistently with (most of) the 
+       rest of the library, now the vectors are expected to be the 
+       right size before calling these functions. 
+       <br>
+       (Luca Heltai 2007/05/15)
+       </p>
+
+  <li> <p>New: The classes <code>MGTransferBlockSelect</code> and <code>MGTransferBlock</code> allow for transfer of multigrid
+  vectors for single blocks of a system and for several blocks.
+  <br>
+  (GK 2007/04/26)
+  </p>
+
+   <li> <p>New: There is a new variant of
+       <code>DoFTools::make_sparsity_pattern</code> that can be used to
+       construct sparsity patterns corresponding to problems where one would
+       like to test shape functions defined on one mesh with shape functions
+       defined on a different mesh (or on the same mesh but using a different
+       DoFHandler that may use a different finite element, for example).
+       <br>
+       (Yaqi Wang 2007/03/09)
+       </p>
+
+   <li> <p>New: The
+       <code>GridTools::get_active_child_cells</code> function determines all
+       active children of a cell.
+       <br>
+       (Yaqi Wang 2007/03/09)
+       </p>
+
+   <li> <p>New: The
+       <code>ConstraintMatrix::write_dot</code> function can be used
+       to produce a graphical representation of the graph of
+       constraints stored in the constraint matrix. The output can be
+       sent through the "dot" program to produce a number of graphical
+       formats, such as postscript, png, or xfig.
+       <br>
+       (WB 2007/03/08)
+       </p>
+
+   <li> <p>Fixed: The
+       <code>GridGenerator::cylinder</code> function in 3d properly
+       set the boundary indicators of the end faces of the generated
+       cylinder to a different value than the outer faces, but it
+       forgot to also adjust the boundary indicators of the edges
+       purely in the interior to the ends of the cylinder. This is now
+       fixed.
+       <br>
+       (WB 2007/03/08)
+       </p>
+
+   <li> <p>Improved: The
+       <code>CylinderBoundary</code> class can now describe the
+       boundaries of arbitrarily oriented cylinders, not only does
+       oriented parallel to the axes and going through the origin.
+       <br>
+       (WB 2007/03/08)
+       </p>
+
+   <li> <p>Improved: The
+       <code>GridRefinement::refine_and_coarsen_fixed_number</code> and
+       <code>GridRefinement::refine_and_coarsen_fixed_fraction</code> functions
+       have gained an additional last argument that can be used to specify a
+       maximal number of cells that we would like to use in a
+       triangulation. Its default value is set to indicate that no limit is
+       desired, as is the previous behavior.
+       <br>
+       (WB 2007/02/20)
+       </p>
+
+   <li> <p>New: Added function <code>GridGenerator</code>::<code >hyper_cube_with_cylindrical_hole</code> that produces 
+	a square with a circular hole in the middle in 2d, and extrudes it
+	along the z-direction between 0 and L. 
+        <br>
+        (Luca Heltai 2007/02/15)
+        </p>
+
+   <li> <p>Workaround: The class <code>GridOut</code>::<code >write_msh</code> produces a mesh which can be visualized
+	in the Gmsh reader. A bug in Gmsh prevented the boundary indicator to
+	be properly visualized. The boundary indicator was added to the
+	material flag of the faces (which is ignored when reading back the
+	mesh) in order for the Gmsh reader to be able to display the boundary
+	indicator in a proper way. 
+        <br>
+        (Luca Heltai 2007/02/15)
+        </p>
+
+
+   <li> <p>Fixed: The function <code>DoFTools</code>::<code >distribute_cell_to_dof_vector</code> produced
+       wrong results if the vector into which the result is to be
+       stored contained nonzero values. This is now fixed.
+       <br>
+       (Rohallah Tavakoli, WB 2007/02/13)
+       </p>
+
+   <li> <p>Fixed: A local variable in
+       <code>TriaAccessor<3,3>::measure</code> was
+       erroneously marked as static could lead to
+       wrong results and crashes in multithreaded mode. This is now fixed.
+       <br>
+       (WB 2007/02/09)
+       </p>
+
+   <li> <p>Fixed: <code>MatrixCreator</code>::<code >create_mass_matrix</code> and <code>
+       create_laplace_matrix</code> computed wrong values for the right 
+       hand sides. This has been fixed. 
+       <br>
+       (Moritz Allmaras 2007/02/08)
+       </p>
+
+   <li> <p>Extended: <code>DataOutBase::Patch</code> has been extended by
+       a new boolean flag <tt>points_are_available</tt>, which
+       defaults to <tt>false</tt>. It is set to <tt>true</tt> if the
+       coordinates of the points defining the subdivision of a patch
+       are appended to the <tt>data</tt> table contained in a
+       <code>Patch</code>. This way, <code>DataOut</code>::<code>build_patches()</code> can use a <code>Mapping</code> to represent curved boundaries,
+       especially for higher order elements. This change corresponds
+       to an extension of the intermediate format for graphics.
+       <br>
+       Fixed: Using the given <code>Mapping</code> to
+       obtain function values in <code>DataOut</code>::<code>build_patches()</code> also fixes a bug for
+       <code>FE_RaviartThomas</code> and <code>FE_ABF</code>elements, which need to evaluate the
+       function values on the real (mapped) cell.
+       <br>
+       (Tobias Leicht 2007/01/30)
+       </p>
+
+  <li> <p> Fixed: On faces with wrong <code>face_orientation</code> the dofs
+       have to reordered in order to be combined with the correct shape
+       functions. This is only relevant for continuous elements in 3D. At least for
+       <code>FE_Q</code> and systems of <code>FE_Q</code> this works now, for other finite elements the
+       reordering vector still has to be implemented.
+       <br>
+       (Tobias Leicht, 2007/01/17)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>Triangulation</code>::<code>execute_coarsening_and_refinement</code> function has to
+       discard coarsening flags in 2d and 3d if a neighbor of a flagged cell is
+       refined or will be refined, in order to avoid that we end up with
+       neighboring cells that differ in refinement by two levels. The function
+       was overly conservative, however, in that it didn't allow a cell to be
+       coarsened if its neighbor is once refined but is also marked for
+       coarsening. This is now fixed and will lead to a few more cells being
+       coarsened.
+       <br>
+       (Yaqi Wang, WB 2006/12/27)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>Triangulation</code>::<code>
+       MeshSmoothing</code> flag <code>patch_level_1</code> wrongly produced cells
+       on level 0 during <code>coarsening_and_refinement</code>, which is now fixed.
+       <br>
+       (RH 2006/12/20)
+       </p>
+
+  <li> <p>
+       Extended: There is now a new <code>Triangulation</code>
+       ::<code>MeshSmoothing</code> flag <code>coarsest_level_1</code>
+       which ensures that after <code>coarsening_and_refinement</code> there are no
+       cells on level 0, i.e. the coarsest level is 1, if the triangulation had
+       <code>coarsest_level_1</code> already before.
+       <br>
+       (RH 2006/12/20)
+       </p>
+
+  <li> <p>
+       Extended: The <code>GridIn</code> class can now
+       read in tecplot files in ASCII format (block and point format,
+       ordered and unstructured grids, format specifiers acccording to
+       Tecplot 10 and younger versions). At the moment the
+       implementation is restricted to 2d grids but can easily be
+       extended to 3d as well.
+       <br>
+       (Tobias Leicht 2006/12/14)
+       </p>
+
+  <li> <p>
+       Extended: So far, the <code>GridReordering</code>::<code>invert_all_cells_of_negative_grid</code>
+       function did nothing in 2d. Now, it inverts cells from
+       clockwise to counterclockwise sense (in the old numbering
+       scheme).
+       <br>
+       (Tobias Leicht 2006/12/13)
+       </p>
+
+  <li> <p>
+       New: There is now a function <code>GridTools</code>::<code>delete_duplicated_vertices</code> that deletes
+       duplicate vertices which can occur if structured grids are read
+       into deal.II, leading to unwanted interior
+       boundaries. In order to reduce the effort of the quadratic
+       algorithm, a list of vertices to be considered can be supplied
+       if available.
+       <br>
+       (Tobias Leicht 2006/12/13)
+       </p>
+
+  <li> <p>
+       New: There are now two new functions <code>GridGenerator</code>::<code>subdivided_hyper_rectangle</code> that produces
+       a non-uniformly subdivided rectangle, ideally suited for graded
+       meshes. One of these functions is able to create meshes with holes. 
+       <br>
+       (Yaqi Wang 2006/11/15, 2006/12/29)
+       </p>
+
+  <li> <p> Fixed: Corrected <code>clone</code> method 
+       of <code>FE_DGQ</code> class for non-equidistant 
+       support points.
+       <br>
+       (Florian Prill 2006/10/31)
+       </p>
+
+  <li> <p> Improved: The lookup mechanism in <code>FETools</code>::<code>get_fe_from_name</code> has been changed, so
+       additional custom finite elements can be added using <code>FETools</code>::<code>add_fe_name</code>. In the course of this
+       change, the implementation of the lookup mechanism has been
+       simplified.
+       <br>
+       (GK 2006/10/24)
+       </p>
+
+  <li> <p>
+       New: There is a new functions <code>GridTools</code>::<code>create_union_triangulation</code>
+       that generates a triangulation that contains the respectively
+       finest cells of two input triangulations.
+       <br> 
+       (WB 2006/10/23)
+       </p>
+
+  <li> <p>
+       New: The <code>ConstraintMatrix</code> class did not
+       allow that one degree of freedom was constrained against another DoF
+       that was itself constrained. However, this is necessary for the
+       implementation of hp methods and is now allowed. Arbitrarily long chains
+       of constraints are resolved at the time the
+       <code>ConstraintMatrix::close()</code> function is called. The only
+       thing that is not allowed are cycles in such constraints.
+       <br> 
+       (WB 2006/09/19)
+       </p>
+
+  <li> <p>
+       New: There are new functions <code>GridTools::minimal_cell_diameter</code> and <code>GridTools::maximal_cell_diameter</code>, with obvious
+       functionality. 
+       <br> 
+       (WB 2006/09/06)
+       </p>
+
+  <li> <p>
+       Changed: The functions <code>FETools::compute_embedding_matrices</code>,
+       <code>FETools::compute_face_embedding_matrices</code>, and
+       <code>FETools::compute_projection_matrices</code>
+       (mostly used in internal computations in setting up finite
+       element objects) previously took pointers to the first element
+       of an array of matrices as arguments. This isn't type-safe, and
+       in particular did not allow to check for the number of matrices
+       in the array. The functions now take a reference to an array of
+       the correct length.
+       <br> 
+       (WB 2006/08/14)
+       </p>
+
+  <li> <p>
+       Extended: The <code>VectorTools::project</code> functions
+       are now also implemented for 1d.
+       <br> 
+       (WB 2006/08/08)
+       </p>
+
+  <li> <p>
+       Extended: <code >DerivativeApproximation</code> now offers access to the
+       full tensor of derivatives of orders one, two and three. This
+       information can be requested for a single cell by means of the <code>DerivativeApproximation</code><code>::approximate_derivative_tensor</code> function. If the
+       norm of this tensor is required later on, the new <code >DerivativeApproximation</code><code>::derivative_norm</code> function can be used. Note, that
+       for second order derivatives, this returns the largest eigenvalue
+       instead of the Frobenius norm, which is returned by the <code >Tensor<rank_,dim></code><code>::norm</code> function.
+       <br> 
+       (Tobias Leicht 2006/08/03)
+       </p>
+
+  <li> <p>
+       Fixed: <code >DerivativeApproximation</code> offers approximated
+       derivatives of a discrete function. The symmetrization of the derivative
+       tensor is now done at the right place, i.e. the derivative itself is
+       symmetrized instead of an intermediate tensor. This should improve the
+       results slightly, but cause no problems otherwise, as this is completely
+       internal to the class.
+       <br> 
+       (Tobias Leicht 2006/08/03)
+       </p>
+
+  <li> <p>
+       Fixed: The <code >DataOut::build_patches</code> and similar
+       functions in the related <code>DataOut*</code> classes allowed to pass
+       zero as the second argument (denoting the number of threads to use if
+       multithreading is enabled). This led to no output being created at
+       all. This is now fixed by throwing an exception in this case.
+       <br> 
+       (WB 2006/07/31)
+       </p>
+
+  <li> <p>
+       New: The new function <code>FiniteElementBase</code>::<code>n_dofs_per_object</code> returns either
+       <code>dofs_per_vertex</code>, <code>dofs_per_line</code>,
+       <code>dofs_per_quad</code>, ..., depending on the explicitly
+       specified function template argument. This is often useful for
+       template trickery.
+       <br> 
+       (WB, 2006/07/28)
+       </p>
+
+  <li> <p>
+       Fixed: <code >Triangulation<dim>::fix_coarsen_flags</code>
+       has been modified to allow coarsening in all possible cases. Up
+       to now, coarsening was forbidden, if the neighbor cell was not refined
+       but had the <code>refine_flag</code> set, whereas it was allowed, if
+       the neighbor cell already was refined. Now, in both cases coarsening is
+       allowed (if all children are flagged for coarsening). This leeds to
+       triangulations with a slightly reduced number of cells. In some cases
+       older references will have to be updated.
+       <br> 
+       (Tobias Leicht 2006/06/22)
+       </p>
+
+  <li> <p>
+       New: There are now new internal <code>TriaObjectAccessor<1,dim></code>::<code>lines()</code> and <code>TriaObjectAccessor<2,dim></code>::<code>quads()</code> functions. By using these
+       functions, 30 function specializations could be removed,
+       significantly reducing code duplication.
+       <br>
+       (RH 2006/06/13)
+       </p>
+
+  <li> <p>
+       New: Function <code>VectorTools</code>::<code>create_point_source_vector</code> to calculate the projection
+       of a Dirac pulse on the base functions. This models a point source as
+       used in the calculations of Green's functions and can also be used to
+       extract the value of a finite element solution in a single point.
+       <br> 
+       (Ralf B. Schulz, 2006/05/15)
+       </p>
+
+  <li> <p>
+       Changed: Functions <code>VectorTools</code>::<code>point_value</code> and <code>VectorTools</code>::<code>point_difference</code> using the old interface
+       without boundary mapping were replaced by wrapper functions
+       calling the new versions.
+       <br> 
+       (Ralf B. Schulz, 2006/05/15)
+       </p>
+
+  <li> <p>
+       Changed: The old version of <code>GridTools</code>::<code>find_active_cell_around_point</code> has been replaced
+       by a wrapper function for backward compatibility. The wrapper calls the
+       new version of this function, and it is highly recommended to use the new
+       version as it automatically delivers also the local coordinate of the
+       point (so it can save some computation time in most cases as you don't have
+       to calculate that again).
+       <br> 
+       (Ralf B. Schulz, 2006/05/12)
+       </p>
+
+  <li> <p>
+       Improved: The functions <code>VectorTools</code>::<code>point_value</code> and <code>VectorTools</code>::<code>point_difference</code> now can also use arbitrary
+       mappings, using the new <code>GridTools</code>::<code>find_active_cell_around_point</code> algorithm.
+       <br> 
+       (Ralf B. Schulz, 2006/05/11)
+       </p>
+
+  <li> <p>
+       New: In <code>GridTools</code>, several functions have been added.
+       <code>GridTools::find_closest_vertex</code> searches for the vertex
+       located at closest distance to a given point. <code>
+       GridTools::find_cells_adjacent_to_vertex</code> allows to determine
+       all cells adjacent to a given vertex. And finally, a new version of
+       <code>find_active_cell_around_point</code>, which takes additionally
+       a mapping as parameter, implements a new and faster algorithm to
+       determine the active cell in which a given point is located. For
+       points located on boundaries and edges, it is in most cases also able
+       to give the finest cell.
+       <br> 
+       (Ralf B. Schulz 2006/05/10)
+       </p>
+
+  <li> <p>
+       Changed: The <code>DoFObjectAccessor::get_dof_values</code> and
+       <code>DoFObjectAccessor::set_dof_values</code> were part of the
+       accessors for lines, quads, and hexes. However, they could not
+       be called for these objects unless the object was actually a
+       cell, i.e. one could never call this function for a line or
+       face in 3d, for example. The functions have therefore been
+       moved to the <code>DoFCellAccessor</code> class that provides
+       access to cells (i.e. lines in 1d, quads in 2d, and hexes in
+       3d) for which this operation is actually useful.
+       <br> 
+       (WB 2006/05/01)
+       </p>
+  
+  <li> <p> Fixed: second derivatives where not computed correctly in <code>FEFaceValuesBase</code>, i.e. when evaluating
+       second derivatives on faces of cells. This is now fixed. Using
+       second derivatives evaluated at quadrature points within a cell
+       was not affected by this problem.
+       <br>
+       (GK 2006/04/28)
+       </p>
+
+  <li> <p>
+       New: The functions <code>Triangulation::clear_user_flags_line</code>,
+       <code>Triangulation::clear_user_flags_quad</code>, and
+       <code>Triangulation::clear_user_flags_hex</code> can be used to
+       selectively clear only some of the user flags as needed.
+       <br> 
+       (WB 2006/04/25)
+       </p>
+
+  <li> <p>
+       New: The function <code>VectorTools::project</code> functions can now
+       also be used for vector arguments of type other than
+       <code>Vector<double></code>.
+       <br> 
+       (WB 2006/04/17)
+       </p>
+
+  <li> <p>
+       New: The function <code>GridTools::get_finest_common_cells</code> can be
+       used to find those cells that are shared by two triangulations that are
+       based on the same coarse mesh. This is useful, for example, when having
+       to integrate the solution on one mesh against the shape functions on
+       another mesh, a situation that frequently happens in time-dependent but
+       also in nonlinear problems.
+       <br> 
+       (WB 2006/04/14)
+       </p>
+
+  <li> <p>
+       New: The function <code>GridTools::have_same_coarse_mesh</code> figures
+       out whether two triangulations, two DoFHandlers, etc, are built on the
+       same coarse mesh.
+       <br> 
+       (WB 2006/04/14)
+       </p>
+
+  <li> <p>
+       New: Since calling <code>cell->get_dof_indices</code> is a
+       fairly frequent operation (called about 7 times per cell in
+       step-14, but much more often in more complicated programs), the
+       results of this function are now cached for faster access.
+       <br> 
+       (WB 2006/03/29)
+       </p>
+
+  <li> <p>
+       Fixed: An exception was generated when trying to interpolate
+       boundary values for a primitive component of a composite finite
+       element that also has non-primitive base elements (for example,
+       trying to interpolate boundary values for a the last component
+       of a RT x Q1 element). This is now fixed.
+       <br> 
+       (WB 2006/03/27)
+       </p>
+
+  <li> <p>
+       New: There is a new function <code>FiniteElement</code>::<code>face_to_equivalent_cell_index</code> that can
+       convert a face index of a degree of freedom into a
+       corresponding cell index.
+       <br> 
+       (WB 2006/03/27)
+       </p>
+
+  <li> <p>
+       New: There are now functions <code>VectorTools</code>::<code>point_value</code> that evaluate the value of a
+       finite element function at a given point inside the domain.
+       <br> 
+       (WB 2006/03/06)
+       </p>
+
+  <li> <p> Improved: <code>GridOut</code> now has functions for
+       declaring and parsing parameters in a <code>ParameterHandler</code>, like used in <code>DataOut_Interface</code> for a long time.
+       <br>
+       (GK 2006/03/06)
+       </p>
+
+  <li> <p>
+       Improved: The <code>Triangulation</code>, <code>PersistentTriangulation</code>, <code>DoFHandler</code>, <code>hp::DoFHandler</code> and <code>MGDoFHandler</code> classes now all have a <code>dimension</code> variable which allows to ask
+       the template argument <tt>dim</tt> by
+       <code>SomeClass::dimension</code>.
+       <br> 
+       (RH 2006/02/27)
+       </p>
+
+  <li> <p>
+       Fixed: When used, the copy constructor of <code>MappingQ</code> would lead to memory
+       corruption. This is now fixed.
+       <br> 
+       (RH 2006/02/23)
+       </p>
+
+  <li> <p>
+       New: There is now a <code>StaticMappingQ1</code>::<code>mapping</code> object of type <code>MappingQ1</code> which is used in several parts
+       of the library. This way multiple creation and storage of <code>MappingQ1</code> objects can be avoided.
+       <br>
+       Similar to <code>StaticMappingQ1</code>::<code>mapping</code> there is now also an object <code>hp::StaticMappingQ1</code>::<code>mapping_collection</code> of type <code>MappingCollection</code>.
+       <br>
+       (RH 2006/02/21)
+       </p>
+
+  <li> <p>
+       New: There is now a <code>GridOut</code>::<code>write_gmsh</code> function to write in the gmsh 
+       format. It has the same features of the write_ucd one. 
+       <br> 
+       (Luca Heltai 2006/02/17)
+       </p>
+
+  <li> <p>
+       Changed: The <code>hp::FECollection</code>::<code>get_fe</code>, <code>hp::QCollection</code>::<code>get_quadrature</code> and <code>hp::MappingCollection</code>::<code>get_mapping</code> functions are now renamed to
+       the standard <code>operator[]</code>
+       function. Similarly, the <code>FECollection</code>::<code>n_finite_elements</code>, <code>QCollection</code>::<code>n_quadratures</code> and <code>MappingCollection</code>::<code>n_mappings</code> functions are now renamed to
+       the standard <code>size()</code> function.
+       <br> 
+       (RH 2006/02/10)
+       </p>
+
+  <li> <p>
+       Improved: <code>GridGenerator</code>::<code>half_hyper_ball</code> now is implemented also 
+       in three dimensions, and the boundary now is colored with 0 and one 
+       respectively on the curved and plane.
+       <br> 
+       (Luca Heltai 2006/02/07)
+       </p>
+
+  <li> <p>
+       Improved: <code>FiniteElementData</code> and
+       <code>FiniteElement</code> now support the
+       concept of blocks along with components. See the glossary for
+       the difference.
+       <br>
+       (GK 2006/01/25)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>MGDoFHandler</code>::<code>distribute_dofs</code> and <code>renumber</code> functions could not handle
+       coarsened grids (unused vertices, faces and cells). This is now
+       fixed.
+       <br> 
+       (RH 2006/01/23)
+       </p>
+
+  <li> <p>
+       New: The <code>Mapping</code> class and derived classes now have
+       functions <code>clone()</code> that return a new object of the same
+       (derived) type.
+       <br> 
+       (WB 2006/1/8)
+       </p>
+
+  <li> <p>
+       Improved: The <code>KellyErrorEstimator</code> class now also allows to
+       select the cells on which it is supposed to work by giving a material
+       ID, instead of only a subdomain ID.
+       <br> 
+       (WB 2006/1/4)
+       </p>
+
+  <li> <p>
+       Improved: A new <code>TriaAccessor::ExcCellHasNoChildren</code>
+       exception will be raised if the <code>TriaObjectAccessor::child_index</code> function
+       is invoked for cells without children.
+       <br> 
+       (RH 2005/12/09)
+       </p>
+
+  <li> <p>
+       Fixed: We had a bug in <code>DataOut::build_patches</code>
+       that, when used in multithreaded mode, caused an exception to be
+       thrown. In particular, this happened when the step-14 program was run on
+       dual processor machines and the library was compiled for
+       multithreading. This is now fixed.
+       <br> 
+       (WB 2005/10/20)
+       </p>
+
+  <li> <p>
+       New: There is now a new <code>Triangulation::get_boundary_indicators</code>
+       function.
+       <br> 
+       (RH 2005/09/30)
+       </p>
+
+  <li> <p>
+       New: There is now a new <code>GridTools::delete_unused_vertices</code>
+       function. Previously a <tt>private</tt> function in <code>GridIn</code> it has now been moved to and made
+       <tt>public</tt> in <code>GridTools</code>.
+       <br> 
+       (RH 2005/09/28)
+       </p>
+
+  <li> <p>
+       New: The <code>GridIn<dim>::read_netcdf(string
+       &filename)</code> function reads grids from NetCDF files. The
+       only data format currently supported is the <tt>TAU grid
+       format</tt>.
+       <br> 
+       (RH 2005/09/23)
+       </p>
+
+  <li> <p>
+       Fixed: The <code>GridIn<dim>::read(filename, format)</code>
+       function ran into an exception when called with
+       <tt>Default</tt> format and a filename which does not include
+       <tt>/</tt>. This is now fixed.
+       <br> 
+       (RH 2005/09/21)
+       </p>
+</ol>
+
+
+
+*/
diff --git a/doc/news/6.0.0-vs-6.1.0.h b/doc/news/6.0.0-vs-6.1.0.h
new file mode 100644
index 0000000..e730884
--- /dev/null
+++ b/doc/news/6.0.0-vs-6.1.0.h
@@ -0,0 +1,563 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_6_0_and_6_1 Changes between Version 6.0 and 6.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+made to the three sub-libraries <a href="#base">base</a>,
+<a href="#lac">lac</a>, and <a href="#deal.II">deal.II</a>, as well as
+changes to the <a href="#general">general infrastructure,
+documentation, etc</a>.
+</p>
+
+<p>
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+  <li> <p>Removed: The <code>FullMatrix::add_diag</code> function was removed. It
+  offered functionality of questionable use in the first place, and its
+  implementation was correct only for matrices of size $3 \times 3$, $4 \times 4$
+  and $8 \times 8$ without anyone noticing for 10 years. Consequently, it can't
+  have been used very frequently.
+  <br>
+  (WB 2007/12/09)
+  </p>
+
+  <li> <p>Changed: The namespace deal_II_numbers has been renamed dealii::numbers.
+  The old name stemmed from a time when not everything was already in
+  namespace <code>dealii</code>. The old name is retained via a namespace
+  alias but is deprecated and will eventually be removed.
+  <br>
+  (WB 2007/11/02)
+  </p>
+
+  <li> <p>Changed: When writing output files in UCD format using either the
+  DataOutBase or the GridOut class, we used to write a preamble at the
+  beginning of the file that included the date and time the file was created.
+  However, several visualization programs get confused when confronted with
+  comments at the beginning of UCD files. Rather than printing a sensible
+  error message, they usually simply refuse to show any output, making it
+  very hard to track down the actual cause.
+  <br>
+  The classes mentioned above previously allowed to suppress writing a preamble
+  by setting a flag in the DataOutBase::UcdFlags or GridOutFlags::Ucd
+  structures. Given how complicated it is to find the actual
+  source of trouble, the default for these flags has been changed to not
+  produce this preamble any more. If desired, the flag can be used to still
+  produce a preamble.
+  <br>
+  (WB 2007/10/30)
+  </p>
+
+  <li> <p>Changed: The version number of the deal.II intermediate format written by
+  DataOutBase::write_deal_II_intermediate has been increased to 3 to accommodate the fact that
+  we now support writing vector-valued data to output files in at least some output formats.
+  (Previously, vector-valued date was written as a collection of scalar fields.) Since
+  we can only read files written in intermediate format that has the same number as the
+  files we generate, this means that files written with previous format numbers can now
+  no longer be read.
+  <br>
+  (WB 2007/10/11)
+  </p>
+
+  <li> <p>Changed: FilteredMatrix now can be applied to any matrix having the standard
+  set of <code>vmult</code> functions. In order to achieve this, its interface
+  had to be overhauled.
+  Only the <code>VECTOR</code> template argument remains. Furthermore, instead of
+  PreconditionJacobi being applied to FilteredMatrix, FilteredMatrix
+  can now be applied to any preconditioner.
+  <br>
+  (GK 2007/09/25)
+  </p>
+
+  <li> <p>Changed: The deprecated typedefs
+  <code>internal::Triangulation::Line</code>,
+  <code>internal::Triangulation::Quad</code>, and
+  <code>internal::Triangulation::Hexahedron</code> have been removed.
+  <br>
+  (WB 2007/09/07)
+  </p>
+</ol>
+
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+<li> <p>New: step-33, written mainly by David Neckels,
+  illustrates how the Euler equations of compressible inviscid gas
+  dynamics can be solved. More generally, the program also provides
+  a framework for the solution of arbitrary hyperbolic, nonlinear
+  conservation laws.
+  <br>
+  (WB, 2008/05/23)
+  </p>
+
+<li> <p>Improved: step-16 has been updated to show the
+  use of Multigrid with Dirichlet boundary conditions.
+  <br>
+  (GK, 2007/05/13)
+  </p>
+
+<li> <p>New: step-22, written by Martin Kronbichler and Wolfgang
+     Bangerth, shows how to solve, and in particular how to solve
+     efficiently, the Stokes equations that describe viscous
+     incompressible fluid flow.
+  <br>
+  (WB, 2008/03/28)
+  </p>
+
+<li> <p>New: step-29 demonstrates how problems involving
+  complex %numbers can be implemented by viewing real and imaginary parts
+  of a complex-valued solution as the two components of a vector-valued
+  function.
+  <br>
+  (Moritz Allmaras, 2007/10/31)
+  </p>
+
+<li> <p>New: A significantly simpler way to code the assembly of linear
+  systems for problems with more than one solution variable has been
+  implemented. This is explained in detail in the report on @ref vector_valued
+  and tutorial programs step-20 and step-21
+  have been converted as well.
+  <br>
+  (WB, 2008/02/23)
+  </p>
+
+  <li> <p>Improved: On Mac OS X, the operating system provides for
+  "frameworks", which are essentially collections of shared libraries.
+  We now link with the "Accelerate" framework (from Mac OS X 10.4
+  onwards) or the "vecLib" framework (for previous versions) instead
+  of the individual BLAS and LAPACK libraries if they are needed. This
+  insulates us from having to use the actual name of these libraries,
+  which may be subject to change, and it may also link with optimized
+  or vectorized libraries if they are available.
+  <br>
+  (Eh Tan, WB 2007/10/22)
+  </p>
+
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+<li> <p> New: The FunctionParser::default_variable_names() function returns
+default names for variables in the given space dimension. For example, in
+2d, this would be "x,y".
+<br>
+(WB 2008/05/12)
+</p> </li>
+
+<li> <p>New: ConvergenceTable::evaluate_convergence_rates has been incomplete
+for a long time. An implementation for reduction_rate and
+reduction_rate_log2 has been added in the case where one specifies
+a reference column that allows to compute error rates also for local
+refinement.
+<br>
+(Luca Heltai 2008/04/16)
+</p></li>
+
+<li> <p> Fixed: The Utilities::int_to_string function did not work for 5 or 6
+digits, while its counterpart Utilities::needed_digits does. This is now fixed,
+and the two are coherent.
+<br>
+(Luca Heltai 2008/04/16)
+</p> </li>
+
+<li> <p> New: The SymmetricTensor class now has a constructor that creates
+an object from an array consisting its independent components.
+<br>
+(WB 2008/02/21)
+</p> </li>
+
+<li> <p> New: There are now output operators (i.e. <code>operator@<@<</code>)
+for the SymmetricTensor class.
+<br>
+(WB 2008/02/12)
+</p> </li>
+
+<li> <p> Improved: LogStream is now thread safe and output lines from different
+threads are separated now. Additionally, a function LogStream::log_thread_id()
+has been added to log the id of the thread printing the message. The semantics
+has slightly changed in that the header is now printed at the time when the line
+is finished.
+<br>
+(GK 2008/01/22)
+</p> </li>
+
+<li> <p> New: There is a function Threads::this_thread_id() now returning
+an integer id of the current thread.
+<br>
+(GK 2008/01/22)
+</p> </li>
+
+<li> <p> Improved: Quadrature now has an assignment operator.
+<br>
+(GK 2007/12/27)
+</p> </li>
+
+<li> <p>Fixed: MultithreadInfo::n_cpus now gives the correct result
+  also on my Intel MacBook Pro.
+  <br>
+  (Luca Heltai 2007/11/12)
+  </p> </li>
+
+  <li> <p>New: There is now a template numbers::NumberTraits that provides
+  the means to implement linear algebra and other algorithms for both real
+  and complex data types.
+  <br>
+  (WB 2007/11/03)
+  </p> </li>
+
+  <li> <p>New: The DataOutBase class and derived classes can now deal with data that is
+  logically vector-valued, i.e. derived classes can pass down information that some of
+  the components of the output should be grouped together as vectors, instead of being
+  treated as a number of separate scalar fields. This allows visualization programs to
+  display these components as vector-fields right away, without the need to manually group
+  them together into a vector.
+  <p>
+  While all output format writers receive the information necessary to do this, currently
+  only the VTK reader in DataOutBase::write_vtk as well as the deal.II
+  intermediate format writer make use of it.
+  <p>
+  The use of this ability is explained in the step-22 and
+  step-33 tutorial programs.
+  <br>
+  (WB 2007/10/11)
+  </p> </li>
+
+  <li> <p>New: Macro #AssertDimension introduced for easier handling of
+  ExcDimensionMismatch.
+  <br>
+  (GK 2007/09/13)
+  </p> </li>
+
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li> <p>Changed: A few simple PETSc wrappers for helper functions
+  PETScWrappers::MatrixBase::transpose,
+  PETScWrappers::MatrixBase::is_symmetric,
+  PETScWrappers::MatrixBase::is_hermitian, and
+  PETScWrappers::MatrixBase::write_ascii have been added.
+  <br>
+  (Toby D. Young 2008/04/30)
+  </p>
+
+<li> Fixed: The SolverQMRS class could not deal with block vectors
+and block matrices. This is now fixed.
+<br>
+(WB 2008/3/20)
+</li>
+
+<li> Fixed: The implementation of SparseILU::decompose was rather
+inefficient in that it accessed random elements of the matrix in its
+inner loop. It has been replaced by the algorithm given in the book
+by Yves Saad: "Iterative methods for sparse linear systems", second
+edition, in section 10.3.2. The code is now about 8 times faster than
+before.
+<br>
+(WB 2008/2/29)
+</li>
+
+<li> Fixed: The implementation of SparseILU::vmult very needlessly called
+SparseMatrix::vmult just to throw away the (nonsensical) result away
+immediately. This is now fixed.
+<br>
+(WB 2008/2/27)
+</li>
+
+<li> New: The functionality print_gnuplot is now also available for sparsity
+patterns derived from the class BlockSparsityPatternBase, e.g. for
+BlockSparsityPattern in the same way as for general sparsity patterns.
+<br>
+(Martin Kronbichler 2008/2/26)
+</li>
+
+<li> Fixed: The PETScWrappers::MatrixBase::clear_row and
+PETScWrappers::MatrixBase::clear_rows functions had a memory leak that made
+PETSc become progressively slower over time. This is now fixed.
+<br>
+(Daniel Goldberg 2008/2/20)
+</li>
+
+<li> Improved: All GrowingVectorMemory objects that have the same template
+argument will access the same memory pool. Therefore, it is now not a
+crime any more to just create a memory pool and discard later. Furthermore,
+logging of statistics has been switched off by default, such that linear
+solvers remain silent.
+<br>
+(GK 2007/12/16)
+</li>
+
+<li> Fixed: Vector::operator/= can't work when the scaling factor is zero,
+but it happened to check whether the factor was positive. That's of course
+bogus, the check should have been whether it is non-zero. This has now been
+fixed.
+<br>
+(WB 2007/11/03)
+</li>
+
+<li> New: A class ScaledMatrix was introduced which combines the vector operations of
+an underlying matrix with a scaling.
+<br>
+(GK 2007/10/30)
+</li>
+
+<li> Improved: FilteredMatrix has an iterator now that allows users to access the
+constraints individually.
+<br>
+(GK 2007/10/30)
+</li>
+
+
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+
+  <li> <p> Improved: GridOutFlags::Gnuplot now takes an additional curved_inner_cells
+  parameter. Similar to curved_inner_cells in DataOut this allows the output of interior
+  cells with curved boundaries. This is useful for MappingQEulerian or similar mappings.
+  <br>
+  (RH 2008/05/12)
+  </p></li>
+
+  <li> <p>Fixed: Functions::FEFieldFunction<dim>.set_active_cell was declared
+  inline in the declaration but not in the implementation. This is now fixed.
+  <br>
+  (Luca Heltai 2008/04/16)
+  </p></li>
+
+  <li> <p>New: The FE_RaviartThomasNodal now supports hp functionality.
+  <br>
+  (Zhu Liang, 2008/04/04)
+  </p></li>
+
+  <li> <p>Improved: The StraightBoundary object Triangulation::straight_boundary
+  is now made public. This allows to use Triangulation::straight_boundary instead
+  of recreating a StraightBoundary object when needed.
+  <br>
+  (RH, 2008/04/04)
+  </p></li>
+
+  <li> <p>Extendend: SolutionTransfer has an additional template parameters DH
+  for the dof handler class to use, which defaults to DoFHandler. However, an
+  instantiation is also provided for hp::DoFHandler.
+  <br>
+  (Tobias Leicht, 2008/04/03)
+  </p></li>
+
+  <li> <p>Extended: DataOut has an extended interface now which enables a user
+  decision, which cells shall be written as curved cells using the provided
+  mapping: none, only cells at the boundary, or all cells. The last choice can
+  be useful when a mapping like MappingQEulerian is employed.
+  <br>
+  (Tobias Leicht, 2008/03/20)
+  </p></li>
+
+  <li> <p>Changed: From now on the SolutionTransfer class does not use any user
+  pointers of the underlying Triangulation object. Thus several SolutionTransfer
+  instances with different DoFHandler objects can be used simultaneously. It is
+  no longer necessary to assemble all solution vectors into a new one with a
+  combined DoFHandler.
+  <br>
+  (Tobias Leicht, 2008/03/18)
+  </p></li>
+
+  <li> <p>New: There is now a namespace DoFRenumbering::boost that contains the
+  implementation of interfaces to three reordering strategies provided by the
+  Boost Graph Library, namely DoFRenumbering::boost::Cuthill_McKee,
+  DoFRenumbering::boost::king_ordering, and DoFRenumbering::boost::minimum_degree.
+  <br>
+  (WB 2008/03/07)
+  </p></li>
+
+  <li> <p>Changed: The DoFRenumbering class has been converted into a namespace.
+  Since all its members were static, this hasn't changed anything fundamental
+  and use of the functions therein should work in exactly the same way as before.
+  <br>
+  (WB 2008/03/07)
+  </p></li>
+
+  <li> <p>New: The new TriaObjectAccessor::set_all_boundary_indicators function
+  does not only set the boundary indicator of the current object but of all that
+  bound it as well. For example, in 3d, if TriaObjectAccessor::set_boundary_indicator
+  is called on a face, then the boundary indicator of the 4 edges that bound the face
+  remain unchanged. On the other hand, the boundary indicators of face and edges are
+  all set at the same time using the new function.
+  <br>
+  (WB 2008/02/26)
+  </p></li>
+
+  <li> <p>Improved: The tria pointer and the GridIn::debug_output_grid function
+  are changed from private to protected to be accessible in derived classes.
+  <br>
+  (RH 2008/02/24)
+  </p></li>
+
+  <li> <p>New: The new Boundary::get_new_point_on_face and
+  get_intermediate_points_on_face functions offer an dimension independent
+  interface to the Boundary class.
+  <br>
+  (RH 2008/02/24)
+  </p></li>
+
+  <li> <p>New: There is now a FEFaceValuesBase::get_face_index function which
+  returns the index of the face selected the last time the reinit() function
+  was called.
+  <br>
+  (RH 2008/02/15)
+  </p></li>
+
+  <li> <p>New: The MappingQEulerian class provides an arbitrary order Eulerian
+  mapping where the nodes of a mesh are displaced by a previously computed
+  vector field.
+  <br>
+  (Joshua White 2008/02/05)
+  </p></li>
+
+  <li> <p>New: The MappingQ class constructor now takes a parameter that determines
+  whether a higher order mapping should also be used for interior cells. By default,
+  the higher order mapping is only used for cells on the boundary; cells in the
+  interior are bounded by straight edges described by a (bi-, tri-)linear mapping.
+  <br>
+  (WB 2008/02/05)
+  </p></li>
+
+  <li> <p>New: The function VectorTools::compute_no_normal_flux_constraints computes
+  the constraints that correspond to boundary conditions of the
+  form $\vec u \cdot \vec n = 0$.
+  <br>
+  (WB 2008/01/23)
+  </p></li>
+
+  <li> <p>Fixed: Neither ConstraintMatrix::print nor ConstraintMatrix::write_dot
+  produced any output for constraints of the form $x_i=0$, i.e. where the right
+  hand side is a trivial linear combination of other degrees of freedom. This
+  is now fixed.
+  <br>
+  (WB 2008/01/23)
+  </p></li>
+
+  <li> <p>Improved: GridGenerator::subdivided_hyper_rectangle now also colorizes
+  cells according to the octant they are in.
+  <br>
+  (GK 2007/12/20)
+  </p></li>
+
+  <li> <p>New: The DataOut, DataOutRotation, DataOutStack, and DataOutFaces
+  can now deal with vector-valued data if the functions in DataOutBase
+  that write in a particular graphical output format can deal with it.
+  Previously, if a finite element field had more than one component,
+  they were all output as logically independent scalar components;
+  most visualization programs then allowed to display vector fields
+  by composing them of individual scalar fields for each vector component.
+  </p><p>
+  With the new scheme, the DataOut_DoFData::add_data_vector() functions
+  inherited by the classes listed above take an additional parameter
+  that may be used to indicate that certain components of the data
+  logically form a vector field. The output formats for which this
+  is presently implemented then indicate this fact in the output file. The
+  mechanism is shown in action in step-22 and
+  step-33.
+  <br>
+  (WB 2007/10/14)
+  </p></li>
+
+  <li> <p>Improved: UpdateFlags::update_q_points has been renamed to
+  UpdateFlags::update_quadrature_points. Additional update flags for support
+  points have been added without functionality, yet.
+  <br>
+  (GK 2007/10/12)
+  </p></li>
+
+  <li> <p>Improved: The number of blocks of an FESystem was properly defined and the
+  constructors changed accordingly. At least non of the test programs noticed the change.
+  <br>
+  (GK 2007/10/03)
+  </p></li>
+
+  <li> <p>Improved: In an effort to make names more consistent, second
+  derivatives in FEValuesBase and UpdateFlags have been renamed to
+  Hessians. Thus, the clash between the forms <tt>2nd</tt> and
+  <tt>second</tt> has been removed. Old function and enum names are
+  available for compatibility but have been marked deprecated.
+  <br>
+  (GK 2007/09/12)
+  </p></li>
+
+  <li> <p>Fixed+New: The GridOutFlags::Ucd and
+  GridOutFlags::Msh structures now take a new parameter
+  <code>write_lines</code> to output lines with boundary id different
+  from 0 in three-dimensional meshes. This fixes an annoying bug for
+  which meshes with ids different from zero where not written in a
+  compatible way, and if re-read with the corresponding
+  GridIn functions, would not yield the same mesh upon
+  refinement.
+  <br>
+  (Luca Heltai 2007/09/10)
+  </p>
+  </li>
+
+  <li> <p>Extended: The possibilities of graphical output via the DataOut,
+  DataOutFaces and DataOutRotation classes have been extended by the
+  ability to perform a postprocessing step before a given data
+  vector is written to the output. This way, derived variables can be
+  written instead of the original data. To this end, there is a new
+  version of DataOut_DoFData::add_data_vector taking a
+  data vector and a DataPostprocessor, which performs the actual
+  calculation of new data based on the values and possibly derivatives
+  of the original data at each point of the patch.
+  <br>
+  (Tobias Leicht 2007/09/10)
+  </p>
+  </li>
+
+</ol>
+
+
+*/
diff --git a/doc/news/6.1.0-vs-6.2.0.h b/doc/news/6.1.0-vs-6.2.0.h
new file mode 100644
index 0000000..c82df3e
--- /dev/null
+++ b/doc/news/6.1.0-vs-6.2.0.h
@@ -0,0 +1,1384 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_6_1_and_6_2 Changes between Version 6.1 and 6.2
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+made to the three sub-libraries <a href="#base">base</a>,
+<a href="#lac">lac</a>, and <a href="#deal.II">deal.II</a>, as well as
+changes to the <a href="#general">general infrastructure,
+documentation, etc</a>.
+</p>
+
+<p>
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+  <li>
+  <p>
+  Changed: The implementation of the reordering algorithms in the boost
+  namespace created multiple edges between two nodes. The new implementation
+  submits not more than one edge, but that affects the ordering of the
+  degrees of freedom and deviates from previous behavior.
+  <br>
+  (Martin Kronbichler 2009/04/22)
+  </p>
+
+  <li>
+  <p>
+  Changed: The files implementing the class ConstraintMatrix are now
+  saved in the lac/ subdirectory, named <code>constraint_matrix.h</code>
+  and included by the line <code>\#include
+  @<lac/constraint_matrix.h@></code>. After all, constraints are just
+  algebraic objects and do not depend on anything in the deal.II
+  directory (in particular, they are dimension-independent).  The old
+  <code>\#include @<dofs/dof_constraints.h@></code> include line is still
+  enabled by a reference to the new file, but users should try to adapt
+  their codes to the new file names as the reference might be removed in
+  the future.
+  <br>
+  (Martin Kronbichler 2009/03/04)
+  </p>
+
+  <li>
+  <p>
+  Changed: The function DoFTools::get_subdomain_association function used
+  to assign degrees of freedom to the subdomain of the last cell on which
+  the degree of freedom is a part. This introduced a bias for degrees of
+  freedom's subdomains located on boundaries of subdomains, and
+  consequently to unequal numbers of DoFs per subdomain even if the
+  number of cells is roughly equal across subdomains. This behavior has
+  been changed by assigning degrees of freedom pseudo-randomly to any of
+  the subdomains on which they are located. This is a deviation from
+  previous behavior, however.
+  <br>
+  (Timo Heister, WB 2008/11/02)
+  </p>
+
+  <li>
+  <p>
+  Changed: The way we set up threads in the Threads::spawn functions
+  and friends has been completely written, using the boost::bind and
+  boost::function libraries. This has made things significantly simpler
+  and allowed us to remove some 4,100 lines of code. The only
+  user-visible side effect is that you can now no longer spawn
+  functions with 10 arguments (this was the maximum before) whereas 9
+  arguments continues to work; the reason for this is a limitation in
+  the boost::bind library. This limit will be lifted with the next
+  C++ standard, however, using variadic templates.
+  <br>
+  (WB 2008/10/10)
+  </p>
+
+  <li>
+  <p>
+  Changed: The SolutionTransfer class used to take a type as second
+  template argument that denoted the scalar upon which Vector objects
+  were built, in order to allow interpolating Vector@<float@> objects,
+  for example. This argument has now been changed to a vector type,
+  and been given a default of Vector@<double@>; however, one can
+  now also pass in BlockVector objects, or objects of type
+  PETScWrappers::Vector, etc. On the downside, the old
+  SolutionTransfer::refine_interpolate function with only a single
+  argument has been deleted since there is no reliable way to resize
+  a vector unless it is a plain Vector@<double@>.
+  <br>
+  (WB 2008/08/28)
+  </p>
+
+  <li>
+  <p>
+  Changed: The FiniteElement::get_prolongation_matrix and
+  FiniteElement::get_restriction_matrix functions now have an
+  additional argument of type RefinementCase to enable a
+  meaningful use of these matrices in case of anisotropic
+  refinement. These matrices are used for solution transfer and
+  multigrid operations, but there should be no need to use them
+  in user codes directly. Note, that the matrices are not fully
+  implemented for all finite elements up to now.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/07/04)
+  </p>
+
+  <li>
+  <p>
+  Changed: <code>GeometryInfo::children_per_cell</code> has been
+  replaced by GeometryInfo::max_children_per_cell, which
+  represents the maximum number of children a cell might have,
+  i.e. the number of children in the case of isotropic
+  refinement. But note, that this number will rarely be needed in
+  user codes. In general, the number of children of a cell varies
+  from cell to cell and can be obtained by
+  <code>cell->n_children()</code>, which gives the number of
+  children of a specific <code>cell</code> which is refined iso-
+  or anisotropically.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/26)
+  </p>
+
+  <li>
+  <p>
+  Changed: The analogous change applies to the number of children
+  of a face. <code>GeometryInfo::subfaces_per_face</code> has
+  been replaced by GeometryInfo::max_children_per_face, which
+  represents the maximum number of children a face might have,
+  i.e. the number of children in the case of an isotropic refined
+  face.  But note, that this number will rarely be needed in user
+  codes. In general, the number of children of a face varies from
+  face to face and can be obtained by
+  <code>face->n_children()</code>, which gives the number of
+  children of a specific <code>face</code> which is refined iso-
+  or anisotropically.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/26)
+  </p>
+
+  <li>
+  <p>
+  Changed: The GeometryInfo::child_cell_on_face function is
+  generalized to anisotropic refinement and has now an additional
+  RefinementCase argument. This function will rarely be used in
+  user codes, as e.g. the neighbor's child at a specific face and
+  subface should be accessed through
+  <code>cell->neighbor_child_on_subface</code> rather than
+  through the GeometryInfo::child_cell_on_face function.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/27)
+  </p>
+</ol>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li>
+  <p>
+  Updated: The step-22 tutorial program now uses some
+  advanced features of the ConstraintMatrix such as imhomogeneities
+  for implementing Dirichlet boundary condition and condensation of
+  constraints on the fly.
+  <br>
+  (Martin Kronbichler 2009/04/20)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: There is now a program, contributed by Jean-Paul Pelteret, that can
+  convert Cubit generated meshes into UCD format that the GridIn class can
+  read. The program along with a description is in the contrib/mesh_conversion
+  directory.
+  <br>
+  (Jean-Paul Pelteret 2009/04/16)
+  </p>
+
+  <li>
+  <p>
+  Updated: Configure mechanism and a few PETSc functions in the lac directory
+  have been upgraded to support migration to PETSc version >=3.0.0 in anaology
+  to the existing interface.
+  <br>
+  (Toby D. Young 2009/03/31)
+  </p>
+
+  <li>
+  <p>
+  New: A new tutorial program  step-34 was added to the
+  library that shows the usage of the new codimension one functionality
+  recently added to the library. In this tutorial we show the use of
+  boundary element methods on piecewise constant functions defined over
+  a surface, and we solve the irrotational flow problem, or exterior
+  Neumann Laplace problem.
+  <br>
+  (Luca Heltai 2009/03/10)
+  </p>
+
+  <li>
+  <p>
+  Fixed: When linking with both Trilinos and BLAS, the BLAS libraries appeared
+  first on the command line; however, if the Trilinos libraries required BLAS
+  then this was the wrong order. This is now fixed.
+  <br>
+  (WB 2009/02/11)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Changed: The subversion repository for deal.II development is now
+  located on commercially hosted space at <a
+  href="http://www.dealii.org/svn/dealii/">http://www.dealii.org/svn/dealii/</a>.
+  <br>
+  (GK 2009/01/20)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Changed: Some parts of the library used to use classes and functions
+  from the <a href="http://www.boost.org/">BOOST</a> library. Since
+  many of the components of BOOST have been voted into what will be the
+  next C++ standard, we now use a namespace std_cxx1x (coined on the
+  provisional name C++0x used for the next C++ standard) into which we
+  import BOOST components as necessary. If a compiler supports C++0x,
+  we can then later replace BOOST components by elements from namespace
+  <code>std</code> in one central place, rather than all throughout the
+  code.
+  <br>
+  (WB 2009/01/12)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Updated: The step-33 tutorial program now uses the
+  Trilinos wrapper
+  classes for accessing Trilinos linear algebra routines instead of
+  implementing that by hand.
+  <br>
+  (Martin Kronbichler 2009/01/07)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Updated: The version of the <a href="http://www.boost.org/">BOOST</a>
+  library that is in the <code>contrib/</code> directory and is used in
+  various places of the library has been upgraded to 1.37.
+  <br>
+  (WB 2008/12/07)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: Based on work by Cataldo Manigrasso and Luca Heltai
+  developed over the last few months on a branch, deal.II now
+  supports meshes that are embedded in higher space
+  dimensions. This allows, for example, to solve equations on
+  the surface of a sphere (e.g. to use boundary element
+  methods for exterior problems).
+  <br>
+  In practice, this is implemented by giving a large number
+  of classes a second template argument. Whereas the first one,
+  typically named <code>dim</code> still denotes the dimensionality
+  of the triangulation (e.g. <code>dim=2</code> for triangulations
+  of quadrilaterals, or <code>dim=3</code> for those made
+  up of hexahedra), the second template argument <code>spacedim</code>
+  denotes the dimensionality of the space this all is embedded in.
+  As a consequence, if one wants to have a triangulation of a
+  two-dimensional manifold in three-dimensional space, one would
+  use <code>dim=2, spacedim=3</code> as template arguments.
+  <br>
+  In order to remain backward compatible, the new second template
+  argument always has a default value that equals <code>dim</code>
+  meaning it is still sufficient to write <code>Triangulation@<2@></code>
+  if you intend two-dimensional triangulations of two-dimensional
+  domains.
+  <br>
+  The new functionality is described in the step-34
+  tutorial program.
+  <br>
+  (Cataldo Manigrasso, Luca Heltai 2008/12/05)
+  </p>
+
+  <li>
+  <p>
+  Changed: The various accessor classes have been rewritten entirely.
+  Where previously we had these accessor classes as separate
+  specializations for 1d, 2d, and 3d, each implementing the same
+  set of functions, these classes have now all been merged into
+  common templates TriaAccessor, DoFAccessor, and MGDoFAccessor,
+  along with the existing CellAccessor, DoFCellAccessor and
+  MGDoFCellAccessor. As a result, the documentation of individual
+  functions should now be much easier to find.
+  <br>
+  (WB 2008/12/05)
+  </p>
+
+  <li>
+  <p>
+  New: In analogy to the existing interfaces to the PETSc library, there
+  are now also interfaces to much of the linear algebra part of the
+  Trilinos library.
+  <br>
+  (Martin Kronbichler 2008/12/01)
+  </p>
+
+  <li>
+  <p>
+  New: The deal.II configure script now autodetects SLEPc, an eigenvalue
+  solver. Currently on SLEPc versions >=3.0.0 are supported.
+  <br>
+  (Toby D. Young 2008/11/27)
+  </p>
+
+  <li>
+  <p>
+  New: A new  tutorial program, step-31, is contributed by
+  Martin Kronbichler and Wolfgang Bangerth and shows the
+  solution of the Boussinesq approximation to thermally driven
+  convection in an incompressible fluid.
+  <br>
+  (Martin Kronbichler 2008/11/14)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Updated: In the step-22 tutorial program the generation of
+  the sparsity pattern using the class BlockCompressedSetSparsityPattern has
+  been replaced by a pattern of the class BlockCompressedSimpleSparsityPattern,
+  which uses far less memory and is slightly faster.
+  <br>
+  (Martin Kronbichler, WB 2008/11/12)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The shared libraries we create are now versioned, i.e. they have
+  the form <code>libdeal_II_2d.g.so.6.2.0</code>. The library without
+  the version number is simply a link to the versioned library name.
+  <br>
+  (WB 2008/10/07)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Updated: The version of the <a href="http://www.boost.org/">BOOST</a>
+  library that is in the <code>contrib/</code> directory and is used in
+  various places of the library has been upgraded to 1.36.
+  <br>
+  (WB 2008/09/19)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: In the step-23 tutorial program the terms corresponding
+  to the external force were computed incorrectly. The error wasn't visible
+  right away since in the program the right hand side is assumed to be zero.
+  This has now been fixed.
+  <br>
+  (Johan Lorentzon, WB 2008/09/12)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: A new  tutorial program, step-28, is contributed by
+  Yaqi Wang. It illustrates the solution
+  of a coupled system of diffusion equations relevant to
+  nuclear reactor physics where we use different meshes for
+  different components of a vector-valued solution.
+  <br>
+  (Yaqi Wang 2008/09/10)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: Contributed by Tobias Leicht as well as Ralf Hartmann, deal.II
+  now supports anisotropic refinement of meshes. This functionality
+  is explained in the step-30 tutorial program.
+  <br>
+  (Tobias Leicht, RH 2008/07/08)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: In the new step-33 tutorial program there was
+  a place where we incorrectly passed the diameter of a cell, rather
+  than a face, to a function. This is now fixed.
+  <br>
+  (Chih-Che Chueh, WB 2008/06/28)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The new step-33 tutorial program had a place where
+  we didn't release some memory. This is now fixed.
+  <br>
+  (Chih-Che Chueh, WB 2008/06/02)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: A missing include file prevented the <code>./configure</code> script
+  from detecting the presence of the demangler with recent versions of the
+  gcc compiler. The result is that backtraces after failed assertions only
+  show the mangles function names, not their plain text equivalent. This is
+  now fixed.
+  <br>
+  (WB 2008/05/27)
+  </p>
+  </li>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li>
+  <p>
+  New: There is now a new QGaussOneOverR class, that allows for integration
+  on the two dimensional reference element of arbitrary polynomial functions
+  with weight 1/R. This class is only instantiated for dim=2, and it is intended
+  for use with collocation type boundary element methods of order 1, where the
+  singularities are collocated on the vertices of the quadrilaterals.
+  <br>
+  (Luca Heltai 2009/03/11)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new QGaussLogR class, that generalizes the QGaussLog class to
+  allow for arbitrary location of singularities, and singularity factors.
+  <br>
+  (Luca Heltai 2009/03/11)
+  </p>
+
+  <li>
+  <p>
+  New: The FunctionParser class now supports the fparser library's interface to use
+  units (like cm, or km) in expressions. An example is given in the documentation of
+  that class.
+  <br>
+  (Victor Prosolin 2009/03/01)
+  </p>
+
+  <li>
+  <p>
+  Changed: The classes Threads::ThreadMutex and Threads::ThreadCondition have
+  been renamed Threads::Mutex and Threads::ConditionVariable. The old names
+  were somewhat redundant but have been retained as typedefs for compatibility.
+  <br>
+  (WB 2009/01/14)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a class TimerOutput that allows to neatly measure computing
+  times in different sections of a program and write that information to screen.
+  <br>
+  (Martin Kronbichler 2009/01/12)
+  </p>
+
+  <li>
+  <p>
+  New: The Timer class can now also return the wall time using the method
+  Timer::wall_time(), as opposed to the CPU time returned by
+  Timer::operator ().
+  <br>
+  (Martin Kronbichler 2009/01/07)
+  </p>
+
+  <li>
+  <p>
+  New: The new class types_are_equal allows to write some templates more
+  efficient by allowing to figure out whether certain template types are,
+  for example, equal to double or float (in which case we can use
+  BLAS functions, or could do something else special).
+  <br>
+  (WB 2008/10/31)
+  </p>
+
+  <li>
+  <p>
+  New: The Utilities::reverse_permutation and Utilities::invert_permutation
+  compute the reverse and inverse of a given permutation of indices.
+  <br>
+  (WB 2008/10/31)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The PolynomialsRaviartThomas class had a bug that led to random
+  results when used from multiple threads. As a consequence the FE_RaviartThomas
+  class was unusable in a multithreaded context. This has now been fixed.
+  <br>
+  (WB 2008/10/13)
+  </p>
+
+  <li>
+  <p>
+  New: There is a new function scalar_product(const Tensor<2,dim> &,
+  const Tensor<2,dim> &) that computes the scalar product
+  $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two tensors of rank 2.
+  <br>
+  (WB 2008/08/15)
+  </p>
+
+  <li>
+  <p>
+  New: If the compiler allows to do <code>\#include @<mpi.h@></code>, then
+  the preprocessor flag <code>DEAL_II_COMPILER_SUPPORTS_MPI</code> is now set in
+  <code>base/include/base/config.h</code>. This also fixes a problem in
+  <code>base/include/base/utilities.h</code> if a compiler capable of
+  including <code>mpi.h</code> was used but not PETSc.
+  <br>
+  (WB 2008/08/15)
+  </p>
+
+  <li>
+  <p>
+  Fixed: A misplaced <code>\#include</code> directive prevented the file
+  <code>base/source/data_out_base.cc</code> from being compilable by
+  the PGI C++ compiler. This is now fixed.
+  <br>
+  (WB 2008/08/05)
+  </p>
+
+  <li>
+  <p>
+  New: There are now a new
+  GeometryInfo::min_cell_refinement_case_for_face_refinement
+  (resp. GeometryInfo::min_cell_refinement_case_for_line_refinement)
+  function which returns the RefinementCase representing the
+  smallest refinement case of a cell for a given refinement of
+  one of its faces (resp. lines). In 2D for example a cell has to
+  be refined at least with RefinementCase::cut_y if the left line
+  (line 0) shall be refined. Another refinement possibility for
+  the cell would be <code>RefinementCase::cut_xy</code>, but that
+  is not the minimal case.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/06/28)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new GeometryInfo::line_refinement_case
+  function which returns the RefinementCase representing the
+  refinement case of a line for a given refinement case of the
+  corresponding cell.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/06/28)
+  </p>
+
+  <li>
+  <p>
+  New: The new
+  <tt>GeometryInfo::n_children(refinement_case)</tt>
+  function returns the number of children a cell/face has when
+  refined with the RefinementCase <tt>refinement_case</tt>.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/11/07)
+  </p>
+
+  <li>
+  <p>
+  New: Given a RefinementCase of a cell the new
+  GeometryInfo::face_refinement_case function returns the
+  RefinementCase of a face.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/11/07)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new RefinementCase
+  GeometryInfo::isotropic_refinement representing the isotropic
+  refinement case in <code>dim</code> dimensions,
+  i.e. GeometryInfo<1>::isotropic_refinement = RefinementCase::cut_x,
+  GeometryInfo<2>::isotropic_refinement = RefinementCase::cut_xy
+  and
+  GeometryInfo<3>::isotropic_refinement = RefinementCase::cut_xyz.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/03/03)
+  </p>
+
+  <li>
+  <p>
+  New: The class QGaussLog allows the quadrature of integrals with logarithmic
+  kernels.
+  <br>
+  (Cataldo Manigrasso 2008/05/28)
+  </p>
+
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li>
+  <p>
+  Updated: The SparseMatrix::precondition_SSOR function was implemented
+  inefficiently. The new implementation uses about half the instruction
+  count and reduces execution times on typical applications by ten to
+  thirty percent.
+  <br>
+  (Martin Kronbichler 2009/04/21)
+  </p>
+
+  <li>
+  <p>
+  Updated: The SparseILU::vmult kernel has been re-written to use similar
+  data structures as SparseMatrix::vmult, which reduces the count of
+  operations by one third and the execution times on typical applications
+  by ten to twenty percent.
+  <br>
+  (Martin Kronbichler 2009/04/16)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new class VectorView<Number> that allows views of
+  arbitrary areas of memory to be seen as a Vector<Number>, simplifying
+  a lot of works for portability issues between different libraries, and
+  allowing for subviews of Vector<Number> classes without the need to
+  copy back and forth large chunk of memories.
+  <br>
+  To be used with EXTREME caution, and only when you know exactly what you
+  are doing, and speed is a necessity.
+  <br>
+  (Luca Heltai 2009/04/05)
+  </p>
+
+  <li>
+  <p>
+  Updated: The local_to_global functions in ConstraintMatrix got smarter,
+  which accelerates sparsity pattern generation and makes writing into
+  sparse matrices using distribute_local_to_global faster.
+  <br>
+  (Martin Kronbichler 2009/03/16)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The FullMatrix::swap_row and FullMatrix::swap_col functions
+  had bugs that made them only work in case the matrix was square.
+  This is now fixed.
+  <br>
+  (WB 2009/03/05)
+  </p>
+
+  <li>
+  <p>
+  New: Added a few simple helper functions (to VectorBase) that allow
+  some manipulation of PETSc vectors. These functions do what they say
+  in the documentation.
+  <br>
+  (Toby D. Young 2009/01/08)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a class TrilinosWrappers::SparsityPattern that allows to
+  construct distributed sparsity patterns that can be used for initializing
+  Trilinos sparse matrices. This means that for large problems on several
+  MPI processors the memory requirements are reduced and the initialization
+  speed is increased.
+  <br>
+  (Martin Kronbichler 2008/12/29)
+  </p>
+
+  <li>
+  <p>
+  New: All SparseMatrix classes (SparseMatrix<number>, PETSc sparse
+  matrices, Trilinos sparse matrices, block sparse matrices) can now
+  directly add and set a FullMatrix and some other arrays into their value
+  list. This is faster and more convenient than an element-by-element
+  addition/set.
+  <br>
+  (Martin Kronbichler 2008/11/26)
+  </p>
+
+  <li>
+  <p>
+  New: The class LAPACKFullMatrix can now invert full matrices using
+  the (optimized) LAPACK functions getrf and getri. The speedup over
+  the FullMatrix::gauss_jordan() function is a factor of two for matrices
+  with 100 rows and columns, and grows with matrix size.
+  <br>
+  (Martin Kronbichler 2008/11/11)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The BlockMatrixBase::clear() function that is used by all other
+  block matrix type classes had a memory leak in that the memory
+  allocated by all sub-objects was not freed. This is now fixed.
+  <br>
+  (WB 2008/11/05)
+  </p>
+
+  <li>
+  <p>
+  New: The function SparsityTools::reorder_Cuthill_McKee reorders
+  the nodes of a graph based on their connectivity to other nodes.
+  <br>
+  (WB 2008/10/31)
+  </p>
+
+  <li>
+  <p>
+  New: The function GridTools::get_face_connectivity_of_cells produces a
+  sparsity pattern that describes the connectivity of cells of a
+  triangulation based on whether they share common faces.
+  <br>
+  (WB 2008/10/31)
+  </p>
+
+  <li>
+  <p>
+  Changed: The function SparsityPattern::partition has been deprecated. It
+  is now available in a new namespace SparsityTools that collects algorithms
+  that work on sparsity patterns or connectivity graphs.
+  <br>
+  (WB 2008/10/31)
+  </p>
+
+  <li>
+  <p>
+  Fixed: Whereas the Vector class copy operator resized the left hand side
+  operand whenever necessary, the corresponding operator of the BlockVector
+  class did not. This is now fixed.
+  <br>
+  (Christian Cornelssen, WB 2008/10/28)
+  </p>
+
+  <li>
+  <p>
+  Changed: The SparseDirectUMFPACK class now calls the umfpack_dl_* routines
+  instead of umfpack_di_*. On machines with 64-bit longs this allows the
+  UMFPACK solver to allocate more than 2GB of memory for large problems.
+  <br>
+  (Moritz Allmaras 2008/10/16)
+  </p>
+
+  <li>
+  <p>
+  Improved: The SparseILU::initialize function, for some reason, required
+  the second argument, SparseILU::AdditionalParameters, to be present even
+  if it is a default constructed object. This argument now has a default value
+  that equates to a default constructed object that can therefore be omitted
+  when so desired.
+  <br>
+  (WB 2008/09/23)
+  </p>
+
+  <li>
+  <p>
+  New: Added the CompressedSimpleSparsityPattern as an alternative to
+  CompressedSparsityPattern and CompressedSetSparsityPattern, which
+  should be faster in most cases but its memory usage is somewhere
+  inbetween.
+  <br>
+  (Timo Heister 2008/09/03)
+  </p>
+
+  <li>
+  <p>
+  Improved: The CompressedSparsityPattern can now elide some operations
+  upon entering entries that may have been added before already.
+  <br>
+  (Timo Heister, WB 2008/08/28)
+  </p>
+
+  <li>
+  <p>
+  Fixed: There are now functions CompressedSparsityPattern::print and
+  CompressedSetSparsityPattern::print, in analogy
+  to SparsityPattern::print .
+  <br>
+  (WB 2008/07/31)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The FullMatrix::copy_from(FullMatrix,bool) function had a bug
+  that resulted in an exception being triggered whenever the given matrix
+  had rows where the diagonal element is zero and the second parameter
+  to the function was set to <code>true</code>. This is now fixed.
+  <br>
+  (WB 2008/07/30)
+  </p>
+
+
+  <li> New: Added two new functions to calculate the left or the right inverse of a given
+  rectangular matrix (FullMatrix::left_invert, FullMatrix::right_invert).
+  They throw an exception if the matrices are not invertible.
+  <br>
+  (Cataldo Manigrasso 2008/6/12)
+  </li>
+
+  <li> Fixed: FullMatrix::Tadd can now transpose correctly rectangular
+  matrices, there was a mismatch about the indexes in the code.
+  <br>
+  (Cataldo Manigrasso 2008/6/12)
+</li>
+
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li>
+  <p>
+  Fixed: VectorTools::integrate_difference now works also on codimension one grids.
+  <br>
+  (Luca Heltai 2009/04/10)
+  </p>
+
+  <li>
+  <p>
+  Changed: The ConstraintMatrix class can now also handle inhomogeneous
+  constraints. This means that e.g. Dirichlet boundary conditions are now a
+  special case of algebraic constraints. There are new condense functions
+  that simultaneously act on matrices and vectors for use of this feature.
+  <br>
+  (Martin Kronbichler 2009/02/25)
+  </p>
+
+  <li>
+  <p>
+  Changed: The DoFTools::count_dofs_per_component and
+  DoFTools::count_dofs_per_block erroneously resized the output argument
+  to the number of components or blocks in the finite element, respectively,
+  even if the target component/block list given as an additional argument
+  needed a different number of output elements. This is now fixed.
+  <br>
+  (WB 2009/02/05)
+  </p>
+
+  <li>
+  <p>
+  New: The GridGenerator::half_hyper_shell function now also exists in 3d.
+  However, to use it with the HalfHyperShellBoundary class, one has to
+  pass inner and outer radii to the constructor.
+  <br>
+  (Martin Kronbichler, WB, 2009/01/13)
+  </p>
+
+  <li>
+  <p>
+  New: The restriction and embedding matrices for FE_Q are now implemented.
+  <br>
+  (Tobias Leicht, RH, 2008/12/23)
+  </p>
+
+  <li>
+  <p>
+  Improved: The GeometryInfo::cell_to_child_coordinates and
+  GeometryInfo::child_to_cell_coordinates functions are extended to
+  anisotropic refinement.
+  <br>
+  (Tobias Leicht, RH, 2008/12/23)
+  </p>
+
+  <li>
+  <p>
+  Fixed: There were cases with both anisotropic refinement and non-standard
+  line orientation where we created wrong meshes in 3d. This is now fixed.
+  <br>
+  (Tobias Leicht, RH, 2008/12/23)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The FEValuesExtractors classes were not copyable though there
+  are legitimate reasons to do so. This is now fixed.
+  <br>
+  (WB 2008/12/17)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The MappingQEulerian function was not thread-safe. This is now fixed.
+  <br>
+  (WB 2008/12/16)
+  </p>
+
+  <li>
+  <p>
+  New: The functions FETools::lexicographic_to_hierarchic_numbering and
+  FETools::hierarchic_to_lexicographic_numbering now also exist in variants
+  returning their result by value, rather than through their last argument.
+  <br>
+  (WB 2008/12/16)
+  </p>
+
+  <li>
+  <p>
+  New: FEValues objects can now directly calculate the Laplacians of
+  solution functions at quadrature points. This is equivalent to the
+  trace of the Hessians, but faster and more convenient in user code.
+  step-31 program has also been updated to use this.
+  <br>
+  (Martin Kronbichler 2008/12/15)
+  </p>
+
+  <li>
+  <p>
+  Improved: The FEValuesViews objects that one gets when writing things
+  like <code>fe_values[velocities]</code> have acquired the ability to
+  extract individual components of solutions at quadrature points by
+  writing code like
+  <code>fe_values[velocities].get_function_values (global_solution,
+  local_velocity_values)</code>. See the new section at the end of the
+  @ref vector_valued module for an overview of this facility. The
+  step-31 program has also been updated to use this.
+  <br>
+  (WB 2008/12/13)
+  </p>
+
+  <li>
+  <p>
+  Improved: The FEValuesViews objects that one gets when writing things
+  like <code>fe_values[velocities]</code> (see @ref vector_valued) have
+  become a lot smarter. They now compute a significant amount of data
+  at creation time, rather than on the fly. This means that creating such
+  objects becomes more expensive but using them is cheaper. To offset this
+  cost, FEValuesBase objects now create all possible FEValuesViews objects
+  at creation time, rather than whenever you do things like
+  <code>fe_values[velocities]</code>, and simply return a reference to a
+  pre-generated object. This turns an $O(N)$ effort into an $O(1)$
+  effort, where $N$ is the number of cells.
+  <br>
+  (WB 2008/12/10)
+  </p>
+
+  <li>
+  <p>
+  Upgraded: The FunctionParser classes now use version 2.83 of the fparser
+  library.
+  <br>
+  (Luca Heltai 2008/12/08)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The GridGenerator::laplace_transform would only do at most 1000
+  iterations in its solver, irrespective of the actual number of nodes to
+  be moved around. This is now fixed: the maximum number now equals the
+  number of nodes.
+  <br>
+  (Luca D'Auria 2008/12/05)
+  </p>
+
+  <li>
+  <p>
+  New: The function DoFTools::make_zero_boundary_constraints() computes the
+  constraints that result from requiring the degrees of freedom at the
+  boundary to be zero. Use cases are when the sparsity pattern is not known
+  / not wanted / not inaccessible.
+  <br>
+  (Toby D. Young 2008/12/04)
+  </p>
+
+  <li>
+  <p>
+  Updated: The function ConstraintMatrix::distribute_local_to_global() for
+  matrices does now use row-wise addition into sparse matrices, which
+  accelerates the transfer from local to global data.
+  <br>
+  (Martin Kronbichler, WB 2008/11/27)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The VectorTools::interpolate_boundary_values function was implemented a bit
+  clumsily and was using much more time than necessary. This should be fixed now.
+  <br>
+  (WB 2008/11/25)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The GridIn::read_msh function had a bug that made it reject
+  MSH input files if they contained type-15 cells with more than one
+  associated vertex.
+  <br>
+  (WB 2008/11/05)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The DoFRenumbering::Cuthill_McKee algorithm had a bug when applied
+  to MGDoFHandler objects and if the <code>reverse</code> flag was set. This
+  should now be fixed.
+  <br>
+  (WB 2008/10/31)
+  </p>
+
+  <li>
+  <p>
+  New: MatrixTools::apply_boundary_values() also for PETScWrappers::MPI::BlockSparseMatrix.
+  <br>
+  (Timo Heister 2008/10/27)
+  </p>
+
+  <li>
+  <p>
+  New: When calling function DoFTools::make_sparsity_pattern with a ConstraintMatrix, it is now possible to set a bool argument keep_constrained_dofs. When this flag is set to false, constrained rows and columns will not be part of the sparsity pattern, which increases the performance of matrix operations and decrease memory consumption in case there are many constraints.
+  <br>
+  (Martin Kronbichler 2008/10/21)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a second DoFTools::count_dofs_with_subdomain_association function that
+  calculates the number of degrees of freedom associated with a certain subdomain and
+  splits the result up according to the vector component of each degree of freedom. This
+  function is needed when splitting block matrices in parallel computations.
+  <br>
+  (WB 2008/10/07)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The GridOut::write_gnuplot function had a bug that made it output only the
+  very first cell. This is now fixed.
+  <br>
+  (WB 2008/10/09)
+  </p>
+
+  <li>
+  <p>
+  New: The GridIn::read_msh function can now read version 2 of the MSH format described
+  <a target="_top" href="http://www.geuz.org/gmsh/doc/texinfo/gmsh_10.html">here</a>.
+  <br>
+  (WB 2008/10/07)
+  </p>
+
+  <li>
+  <p>
+  Fixed: In rare cases, when Triangulation::limit_level_difference_at_vertices
+  is passed to the constructor of the Triangulation class, meshes could be
+  generated that do not honor this flag. This is now fixed.
+  <br>
+  (WB 2008/10/06)
+  </p>
+
+  <li>
+  <p>
+  New: The class FE_Q can now alternatively be constructed based on
+  support points from a given one-dimensional quadrature rule.
+  <br>
+  (Katharina Kormann, Martin Kronbichler, 2008/09/07)
+  </p>
+
+  <li>
+  <p>
+  Fixed: Using the ConstraintMatrix class, when a degree of freedom was
+  constrained against another DoF, and that other DoF was constrained to
+  be equal to zero (i.e. the trivial linear combination of a third set of
+  degrees of freedom), an exception resulted. This is now fixed.
+  <br>
+  (WB 2008/08/15)
+  </p>
+
+  <li>
+  <p>
+  New: It is now possible to get the inverse of the Jacobian
+  matrix from the transformation form the real to the unit cell by
+  using FEValues::inverse_jacobian.
+  <br>
+  (Martin Kronbichler 2008/08/13)
+  </p>
+
+  <li>
+  <p>
+  New: There is a second GridTools::partition_triangulation
+  function that takes a cell connectivity pattern as argument, rather
+  than computing it itself as the existing function. Use cases are
+  discussed in the documentation of the new function.
+  <br>
+  (WB 2008/08/06)
+  </p>
+
+  <li>
+  <p>
+  Fixed: GridTools::find_cells_adjacent_to_vertex had a bug that
+  prevented its correct functioning in three dimensions. Some
+  cases were left out due to uncorrect assumptions on the various
+  refinement possibilities.
+  <br>
+  (Luca Heltai 2008/07/17)
+  </p>
+
+  <p>
+  New: There is now a new
+  Triangulation::prevent_distorted_boundary_cells function which is
+  only useful in case of anisotropic refinement. At the boundary
+  of the domain, the new point on the face may be far inside the
+  current cell, if the boundary has a strong curvature. If we
+  allow anistropic refinement here, the resulting cell may be
+  strongly distorted, especially if it is refined again later
+  on. To prevent this problem, this function flags such cells for
+  isotropic refinement. It is called automatically from
+  Triangulation::prepare_coarsening_and_refinement. Therefore
+  this should have no effect on user codes.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/08/02)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new Triangulation::create_children function
+  which actually sets up the children of a cell and updates the
+  neighbor information. This work has been done in
+  Triangulation::execute_refinement so far. Memory allocation has
+  to be done prior to the function call. However, this is really
+  something internal to the library and there should never be the
+  need to use this function in user_codes. Calling
+  Triangulation::execute_coarsening_and_refinement will be all
+  you need.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/06/29)
+  </p>
+
+  <li>
+  <p>
+  New: A part of the functionality of Triangulation::execute_coarsening has been implemented in a new way
+  and shifted to the new function Triangulation::coarsening_allowed. This function decides,
+  depending on the refinement situation of all a cells neighbors, if the
+  cell may be coarsened, in which case the bool value <code>true</code> is
+  returned. This is a functionality which is partly dimension dependend.
+  <br>
+  However, there should never be any reason to use this function in user codes.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/06/28)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new CellAccessor::neighbor_is_coarser function,
+  which returns a bool value <code>true</code>, if the requested
+  neighbor is a coarser one and <code>false</code> if the
+  neighbor is as refined as the current cell. This new
+  functionality is needed in the internals of the library to
+  decide, if <code>neighbor_of_neighbor</code> or
+  <code>neighbor_of_coarser_neighbor</code> has to be
+  called. This decision is trivial in case of isotropic
+  refinement, where the level of the cell and its neighbor is all
+  the information needed. In case of aniostropic refinement, this
+  new function can be used.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/06/28)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new
+  <code>Triangulation::MeshSmoothing::allow_anisotropic_smoothing</code>
+  smoothing flag for triangulations. An important part of the
+  smoothing process for refinement and coarsen flags is to
+  ensure, that no double refinement takes places at any face. If
+  a cell is refined twice, its neighbor has to be refined at
+  least once. However, technically it is not always necessary to
+  refine the neighbor isotropically, it may be sufficient to
+  choose only one direction for the refinement. While this allows
+  to reduce the number of newly created cells to a minimum, it is
+  incompatible with older versions of the library, as anisotropic
+  refinement takes place, even if all the explicitly set
+  refinement flags ask for isotropic refinement.
+  <br>
+  Therefore this functionality is off by default. However, in order to use
+  the new feature of anisotropic refinement to full extend, this flag should
+  be set explicitly.
+  <br>
+  Note, that for reasons of backwards compatibility this flag is
+  NOT included in the general <code>MeshSmoothing::maximum_smoothing</code>
+  flag.
+  <br>
+  (Tobias Leicht 2008/07/08 as of branch_anisotropic at 2006/06/28)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new
+  TriaObjectAccessor::clear_refinement_case function.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/12/19)
+  </p>
+
+  <li>
+  <p>
+  Extended: The CellAccessor::neighbor_of_neighbor function is
+  now extended to anisotropic refinement.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/12/15)
+  </p>
+
+  <li>
+  <p>
+  Extended (internal): Lines on <code>level>0</code> have always
+  been stored pairwise. In order to allow the creation of single
+  interior lines we now also allow storage of single lines. The
+  <tt>TriangulationLevel<1>::reserve_space</tt> function
+  now takes an additional <code>n_consecutive_lines</code>
+  parameter which allows to create new single as well as pairs
+  for lines <code>n_consecutive_lines=1</code> or
+  <code>n_consecutive_lines=2</code>, respectively.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/12/15)
+  </p>
+
+  <li>
+  <p>
+  Changed: When allowing anisotropic refinement it cannot be
+  guaranteed that all children of a cell are stored in
+  consecutive components of the TriaLevel::cells vector,
+  respectively. It is only known that children are stored at
+  least in pairs. Therefore, it is not sufficient any more to
+  store the index of the first child of a cell, only. Now the
+  indices of every second child, i.e. of the even numbered
+  children, must be stored.  For this, the
+  TriaObjectAccessor::set_children function for cells now has a
+  new argument representing the number of the child for which the
+  index shall be set. This function can only be called for even
+  numbered children. The same applies to the respective function
+  for faces.
+  <br>
+  Finally, we note that these functions are for internal use,
+  only.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/03/03)
+  </p>
+
+  <li>
+  <p>
+  Changed: The CellAccessor::refine_flag_set function now returns
+  a RefinementCase argument instead of a boolean. Be aware, that
+  you now still can ask <code>if(!cell->refine_flag_set())</code>
+  , <code>if(cell->refine_flag_set()==false)</code> and
+  <code>if(cell->refine_flag_set())</code> , but you cannot ask
+  <code>if(cell->refine_flag_set()==true)</code> any more.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/03/03)
+  </p>
+
+  <li>
+  <p>
+  Extended: The CellAccessor::set_refine_flag function now has a
+  new RefinementCase argument which defaults to
+  <tt>GeometryInfo<dim>::isotropic_refinement</tt>.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/03/03)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new class RefinementCase defined in
+  <code>geometry_info.h</code>, which allows to describe all
+  possible (anisotropic and isotropic) refinement cases in (1,) 2
+  and 3 dimensions.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/26)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new TriaObjectAccessor::n_children function
+  which returns the number of children of the cell or face it was
+  invoked for.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/26)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a new TriaObjectAccessor::refinement_case
+  function which returns the RefinementCase a cell or face is
+  refined with.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/26)
+  </p>
+
+  <li>
+  <p>
+  New (for internal use only): There is now a new
+  TriaObjectAccessor::set_refinement_case function. This function
+  is mainly for internal use (required by
+  Triangulation::execute_coarsening_and_refinement). It is not
+  provided for <code>dim=1</code> as there the refinement case
+  defaults to isotropic refinement.
+  <br>
+  Note, that users should still use the
+  CellAccessor::set_refine_flag function for setting the
+  RefinementCase of cells to be refined within the next
+  Triangulation::execute_coarsening_and_refinement function.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/26)
+  </p>
+
+  <li>
+  <p>
+  New: New <code>vector@<RefinementCase@> refinement_cases</code>
+  vectors have been introduced in TriaObjects. For memory
+  efficiency (but with a penalty on run-time) they might be
+  replaced by <code>vector@<vector@<bool@> @> (dim, vector@<bool@>
+  (n_quads/n_hexes))</code>, later.
+  <br>
+  (RH 2008/07/08 as of branch_anisotropic at 2005/02/26)
+  </p>
+
+  <li>
+  <p>
+  Fixed: When using a higher order mapping of degree at least 3 (i.e.
+  the MappingQ class) on meshes that have cells with non-standard
+  face orientation and that touch the boundary of the domain, then
+  some interpolation points were wrongly computed.
+  <br>
+  (Tobias Leicht, Timo Heister, WB 2008/06/10)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The ParameterHandler::get_integer function now throws an
+  exception when called for non-integer parameters. This exception has
+  been commented out some time ago but is now reincluded.
+  <br>
+  (RH 2008/06/11)
+  </p>
+  </li>
+</ol>
+
+
+*/
diff --git a/doc/news/6.2.0-vs-6.2.1.h b/doc/news/6.2.0-vs-6.2.1.h
new file mode 100644
index 0000000..5b939b2
--- /dev/null
+++ b/doc/news/6.2.0-vs-6.2.1.h
@@ -0,0 +1,59 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_6_2_0_and_6_2_1 Changes between Version 6.2.0 and 6.2.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+</p>
+
+
+<ol>
+  <li>
+  <p>
+  A trivial mistake made deal.II unable to compile against any PETSc
+  version prior to 3.0.0. This is now fixed.
+  </p>
+  </li>
+
+  <li>
+  <p>
+  When running in parallel, the step-18 tutorial program
+  produced an error indicating that resetting user pointers was not
+  possible. This is now fixed.
+  </p>
+  </li>
+
+  <li>
+  <p>
+  The documentation tar-ball we provide for those who do not want to re-build
+  their own documentation locally using doxygen, did not include any typeset
+  formulas (an oversight: we used a machine without a latex installation to
+  build this package). The 6.2.1 package gets this right.
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Some versions of gcc 3.3.x had a bug that showed with code recently introduced
+  into our sources in that it erroneously warned about perfectly legitimate
+  constructs. This is now fixed.
+  </p>
+  </li>
+</ol>
+
+
+*/
diff --git a/doc/news/6.2.0-vs-6.3.0.h b/doc/news/6.2.0-vs-6.3.0.h
new file mode 100644
index 0000000..e252156
--- /dev/null
+++ b/doc/news/6.2.0-vs-6.3.0.h
@@ -0,0 +1,1101 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_6_2_and_6_3 Changes between Version 6.2 and 6.3
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+made to the three sub-libraries <a href="#base">base</a>,
+<a href="#lac">lac</a>, and <a href="#deal.II">deal.II</a>, as well as
+changes to the <a href="#general">general infrastructure,
+documentation, etc</a>.
+</p>
+
+<p>
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+  <li>
+  <p>
+  Removed: The class TrilinosWrappers::SolverBlock and
+  TrilinosWrappers::PreconditionBlock have been deleted for compatibility
+  reasons with Trilinos 10.2. These solvers can be exchanged by deal.II's
+  own iterative solvers with block matrices and vectors without loss in
+  performance.
+  <br>
+  (Martin Kronbichler 2010/06/24)
+  </p>
+
+  <li>
+  <p>
+  Changed: The SparseMatrix::operator() used to always check that an entry
+  exists when accessing it. If it didn't, it would throw an exception that
+  could be caught in a user program. Since these accesses are very frequent,
+  this check now only happens in debug mode when the program aborts if a
+  nonexistent element is accessed. If you access a nonexistent element in
+  optimized mode, anything might happen (as in many other functions if
+  input arguments do not satisfy their constraints).
+  <br>
+  (WB 2010/06/04)
+  </p>
+
+  <li>
+  <p>
+  Removed: The interface to PETSc has been simplified to better handle
+  incremental changes in PETSc versions and accommodate changes in
+  functionality between versions. As a part of this process, the
+  deal.II configure script no longer handles PETSc versions
+  <2.3.0. Attempting to configure deal.II with PETSc versions that are
+  not supported will result in the error message, "Unknown PETSc
+  version". The usage of the PETScWrappers are otherwise not affected
+  by this change.
+  <br>
+  (Toby D. Young 2010/03/06)
+  </p>
+
+  <li>
+  <p>
+  Removed: The class TrilinosWrappers::PreconditionStokes has been deleted
+  because it did not work properly, and it is too specific to be part of
+  the general library. A preconditioner that has the same properties is
+  explained in the @ref step_31 step-31 tutorial program.
+  <br>
+  (Martin Kronbichler 2009/11/23)
+  </p>
+
+  <li>
+  <p>
+  Changed: The class MGSmootherRelaxation now instead of a
+  preconditioner takes a relaxation method with the functions
+  <code>step</code> and <code>Tstep</code>. These perform a complete
+  relaxation step, thus saving an auxiliary vector and computational
+  effort for Gauss-Seidel type methods.
+
+  While all relaxation preconditioners have been provided with the new
+  two functions, you may have used MGSmootherRelaxation with your own
+  preconditioner. In that case, you have two options:
+  <ol>
+    <li> Configure deal.II with <tt>--enable-mgcompatibility</tt>,
+    which restores the old behavior of MGSmootherRelaxation.
+    <li> Use MGSmootherPrecondition, which does what
+    MGSmootherRelaxation did before.
+  </ol>
+  <br>
+  (GK 2009/08/04)
+  </p>
+</ol>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+   <li>
+   <p>
+   New: The new tutorial program step-45,
+   contributed by Markus Bürg, shows how to implement periodic boundary
+   conditions.
+   <br>
+   (Markus Bürg, 2010/06/23)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Improved: Exception classes declared locally to another class using
+   DeclException0 through DeclException5 were previously shown as local
+   classes in the class overview of the library. This was annoying since
+   they were not really of interest but made up most of the list.
+   The documentation now shows them in one central place in the
+   @ref Exceptions module.
+   <br>
+   (WB 2010/04/26)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Improved: We now compile all files with their full path name on the command
+   line. This makes it simpler for tools like debuggers or profilers (e.g.
+   valgrind) to find the source files that corresponds to an executable.
+   <br>
+   (WB 2010/04/15)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Improved:
+          Over the last few months, the multigrid implementation has seen
+          significant rewrites, with much of the work done by Bärbel
+          Janssen. The goal — now achieved — was to finally fully
+          support multigrid also for continuous finite elements on adaptively
+          refined meshes (uniformly refined meshes and discontinuous elements
+          have worked for a long time). As part of this process,
+	  step-16 has
+          been rewritten and now solves the same problem
+	  step-6 solves, just
+          with a multigrid solver.
+   <br>
+   (Bärbel Janssen, WB 2010/02/13)
+   </p>
+   </li>
+
+
+   <li>
+   <p>
+   New: The version of <a href="http://www.boost.org/">boost</a>
+   included in the <code>contrib/</code> directory has been updated
+   to 1.41.0.
+   <br>
+   (WB 2009/12/10)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   New: There is now a new tutorial program, step-35,
+   contributed by Abner Salgado-Gonzalez, that implements a solver
+   for the Navier-Stokes equations using a decoupled projection
+   scheme.
+   <br>
+   (Abner Salgado-Gonzalez 2009/10/07)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   New: A report has been added on the
+   <a href="http://www.dealii.org/reports/codimension-one/desimone-heltai-manigrasso.pdf"
+   target="body">codimension one</a> capabilities
+   of the library (by Antonio DeSimone, Luca Heltai
+   and Cataldo Manigrasso, SISSA, Trieste, Italy). It
+   explains in detail how to use the
+   library for the solution of problems defined on codimension
+   one manifolds, such as, for example, %Boundary Element Methods.
+   <br>
+   (Luca Heltai, 2009/09/23)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   New: The configure switch <code>--with-cpu=...</code> now allows the value
+   <code>native</code>, indicating that we would like the compiler to figure
+   out which CPU we are running on and optimize for it. The resulting
+   libraries may not work on other (previous generation) processors, however.
+   <br>
+   (WB 2009/08/20)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Fixed: step-31 had a bug in the computation of the
+   global scaling parameter in the function that evaluates the artificial
+   viscosity: we computed
+   $c(\mathbf{u},T) =
+    c_R\ \|\mathbf{u}\|_{L^\infty(\Omega)} \ \mathrm{var}(T)
+    \frac{1}{|\mathrm{diam}(\Omega)|^{\alpha-2}}$
+   when it should have been
+   $c(\mathbf{u},T) =
+    c_R\ \|\mathbf{u}\|_{L^\infty(\Omega)} \ \mathrm{var}(T)
+    \ |\mathrm{diam}(\Omega)|^{\alpha-2}$. This didn't matter much in this
+   program because $\mathrm{diam}(\Omega)=2^{1/\textrm{dim}}$ and so is close
+   to one. It would matter, however, if the domain had been different, as
+   it is, for example, in the future step 32.
+   <br>
+   (WB 2009/08/19)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Changed: When using Trilinos wrapper objects in %parallel through MPI, each
+   object now uses a separate and distinct MPI communicator object. This
+   ensures that different objects (such as different matrices, or different
+   vectors) communicate on separate channels, thereby simplifying debugging
+   and possibly the parallelization of programs.
+   <br>
+   (WB 2009/08/10)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   New: There is now a new tutorial program, step-36,
+   contributed by Toby D. Young and Wolfgang Bangerth, that demonstrates
+   solving eigenvalue problems.
+   <br>
+   (Toby D. Young, WB 2009/07/29)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Changed: When configuring to use METIS for partitioning meshes in %parallel,
+   the METIS header files had to be modified by hand. In addition, with some
+   MPI implementations one would get into trouble if <code>mpi.h</code>
+   included <code>mpicxx.h</code>. These two problems have now been
+   worked around.
+   <br>
+   (WB 2009/07/06)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   New: As a primary means of parallelizing programs, deal.II now uses
+   a task-based, rather than thread-based approach, in which one
+   uses a high-level description of <i>what</i> needs to be done,
+   rather than how these jobs have to be mapped onto threads. We then
+   use the <a href="http://www.threadingbuildingblocks.org">Threading
+   Building Blocks (TBB) library</a> to schedule tasks onto available
+   hardware resources. This new scheme of describing parallism and
+   various abstractions to make programming in this framework easier
+   are described in great detail in the
+   @ref threads "Parallel computing with multiple processors" module.
+   In addition, most of the parallelism already used within deal.II
+   has been converted to use tasks, rather than threads, and so have
+   some of the tutorial programs.
+   <br>
+   (WB 2009/01/09)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Changed: The support for threading has been completely re-written. In
+   particular, the Threads::spawn functions have been deprecated, and
+   new functions Threads::new_thread have been introduced.
+   Threading is now discussed in a lot of detail in the
+   @ref threads "Parallel computing with multiple processors" module.
+   <br>
+   (WB 2009/01/09)
+   </p>
+   </li>
+
+   <li>
+   <p>
+   Changed: Previously, one had to give the two flags
+   <code>--enable-multithreading --with-multithreading</code> to
+   <code>./configure</code> to enable thread usage throughout the library.
+   This has now been simplified: only the flag <code>--enable-threads</code>
+   is now necessary. Furthermore, since most current machines have multiple
+   cores these days, the default is now to use threads. This can be switched
+   off using <code>--disable-threads</code>, however.
+   <br>
+   (WB 2008/09/29)
+   </p>
+   </li>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li><p>New: The Timer class can now accumulate and average run times of
+  pieces of code across multiple MPI processes.
+  <br>
+  (Timo Heister 2010/06/07)
+  </p></li>
+
+  <li><p>New: The Utilities::System::compute_point_to_point_communication_pattern
+  function can be used to compute who wants to send messages to the
+  current processor in unstructured point-to-point MPI communications.
+  <br>
+  (WB 2010/06/07)
+  </p></li>
+
+  <li><p>New: The DataOutBase class (and all derived classes such as DataOut,
+  MatrixOut, etc) can now produce the XML-based version of the VTK file format
+  (the so-called VTU format). Furthermore, the
+  DataOutInterfaces::write_pvtu_record function can be used to describe a set
+  of %parallel VTU files as part of a single visualization set.
+  <br>
+  (Scott Miller 2010/06/01)
+  </p></li>
+
+  <li><p>Changed: The Function::vector_gradient_list function was previously
+  implemented by calling Function::gradient on each point and each component.
+  It has been changed to now call Function::vector_gradient on each point
+  only, and derived classes should implement this function accordingly.
+  <br>
+  (WB 2010/02/10)
+  </p></li>
+
+  <li><p>Fixed: The file <code>data_out_base.cc</code> could not be compiled
+  when Tecplot was available. This should now be fixed.
+  <br>
+  (WB 2010/02/10)
+  </p></li>
+
+  <li><p>Fixed: The PolynomialsBDM, PolynomialsABF and Functions::FlowFunction
+  classes had a race condition in multithreaded programs. This now fixed.
+  <br>
+  (WB 2010/01/27)
+  </p></li>
+
+  <li><p>New: The new class IndexSet can represent sets and ranges of indices.
+  <br>
+  (WB 2009/10/09)
+  </p></li>
+
+  <li><p>New: There is now an <code>operator @<@<</code> for the
+  TableIndices class.
+  <br>
+  (WB 2009/09/24)
+  </p></li>
+
+  <li>
+  <p>
+  Fixed: If anything had been put into a LogStream object without flushing
+  it with std::endl before the destruction of the log stream, it was lost.
+  This is now fixed.
+  <br>
+  (WB 2009/09/23)
+  </p>
+  </li>
+
+  <li><p>New: SymmetricTensor::component_to_unrolled_index() and
+  SymmetricTensor::unrolled_to_component_indices() allow to convert between the
+  indices of an element of a symmetric tensor and the index within an unrolled vector.
+  <br>
+  (WB 2009/09/23)
+  </p></li>
+
+  <li><p>New: classes NamedData and NamedSelection provide an interface to store and
+  retrieve data objects with name identifiers.
+  <br>
+  (GK 2009/09/13)
+  </p></li>
+
+  <li>
+  <p>
+  New: The Utilities::System::job_supports_mpi() can be used to query whether
+  the current job runs under MPI or not.
+  <br>
+  (WB 2009/08/14)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The Utilities::Trilinos::comm_self function return an MPI
+  communicator that consists only of the current processor.
+  <br>
+  (WB 2009/08/07)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The Utilities::Trilinos::duplicate_communicator function allows to duplicate
+  an Epetra_Comm object to get a unique %parallel MPI communicator out of an
+  existing one. Utilities::Trilinos::duplicate_map creates a map that has
+  the same members as the given template but uses a separate communicator.
+  <br>
+  (WB 2009/08/06)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: There is now a specialization Tensor<0,dim> of tensors of rank 0. Since rank-0
+  tensors are scalars, this class essentially acts like a scalar, but it allows for
+  some neat template tricks that involve tensors of arbitrary rank.
+  <br>
+  (WB 2009/07/15)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The GeometryInfo::alternating_form_at_vertices can be used
+  to investigate the degree of distortion of cells.
+  <br>
+  (WB 2009/06/28)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The GeometryInfo::d_linear_shape_function and
+  GeometryInfo::d_linear_shape_function_gradient functions can be used
+  to represent the $d$-linear shape functions that are frequently
+  used to map the reference cell to real cells (though the
+  Mapping class hierarchy also allows to use higher order mappings).
+  <br>
+  (WB 2009/06/28)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The determinant() function is now implemented for rank-2 Tensor
+  arguments of all sizes. The implementation is not efficient for very large
+  matrix sizes, however.
+  <br>
+  (WB 2009/06/28)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Improved: The QGaussLobatto::gamma function now returns a long double
+  instead of an unsigned int, otherwise we will get an overflow and thus
+  meaningless weights for higher QGaussLobatto quadrature rules.
+  <br>
+  (Tobias Leicht, RH 2009/06/05)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The new function Utilities::duplicate_communicator can be used
+  to duplicate an MPI communicator to produce a unique duplicate.
+  <br>
+  (WB 2009/05/13)
+  </p>
+  </li>
+</ol>
+
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li><p>New: The ConstraintMatrix class can now handle storing only
+  a subset of all constraints, for example only for degrees of
+  freedom that are relevant for the subdomain that is owned by one
+  process in an MPI universe.
+  <br>
+  (Timo Heister, Martin Kronbichler 2010/06/07)
+  </p></li>
+
+  <li><p>New: The PETScWrappers::MPI::Vector and TrilinosWrappers::MPI::Vector
+  classes can now handle ghost elements, i.e. elements that are not
+  owned by the current processor but are available for reading
+  anyway. The simplest form of ghosting would be to simply import
+  an entire vector to local memory, but the new function allow to
+  select the elements we need to support the case of computations
+  where importing all elements of even a single vector would
+  exceed available memory.
+  <br>
+  (Timo Heister 2010/06/07)
+  </p></li>
+
+  <li>
+    <p>
+    New: A class SparseDirectMumps that provides an interface to
+    the MUltifrontal Massively Parallel sparse direct %Solver (MUMPS).
+    </p>
+  <br>
+  (Markus Buerg 2010/05/10)
+  </li>
+
+  <li>
+    <p>
+    Fixed: BlockSparsityPattern::copy_from accidentally only copied
+    n_block_rows times n_block_rows blocks, instead of n_block_rows
+    times n_block_cols. This is now fixed.
+    </p>
+  <br>
+  (WB 2010/01/06)
+  </li>
+
+  <li>
+    <p>
+    Fixed: SparsityPattern::copy_from crashed whenever a compressed sparsity
+    pattern was copied that had either zero rows or zero columns. This is now
+    fixed.
+    </p>
+  <br>
+  (WB 2010/01/06)
+  </li>
+
+  <li>
+    <p>
+    New: The function Householder::least_squares can handle BlockVectors as
+    well now. Note that in one place we still have to copy to a Vector to use
+    the function backward from FullMatrix.
+    </p>
+  <br>
+  (Bärbel Janssen 2010/01/05)
+  </li>
+
+  <li>
+    <p>
+    New: There are now two ConstraintMatrix::add_lines functions that can
+    add several constraints at once.
+    </p>
+  <br>
+  (WB 2010/01/05)
+  </li>
+
+  <li>
+    <p>
+    Improved: The Vector class has been equipped with an improved way to
+    calculate sums in inner products and norms. This reduces the accumulation
+    of round-off errors. Especially the solution with float vectors should
+    profit from the new implementation.
+    </p>
+  <br>
+  (Martin Kronbichler 2009/11/05)
+  </li>
+
+  <li>
+    <p>
+    Improved: The ConstraintMatrix class now uses a cache for random access to
+    the constraint lines. This considerably increases performance of the
+    *_local_to_global functions, where such an access pattern is usual. Moreover,
+    the ConstraintMatrix class has now a function get_dof_values that can import
+    data from a global vector to a cell vector with respecting the constraints.
+    </p>
+  <br>
+  (Martin Kronbichler 2009/09/30)
+  </li>
+
+  <li>
+    <p>
+    Fixed: SparsityTools::reorder_Cuthill_McKee would produce an error if the
+    input graph had disconnected components. This is now fixed.
+    </p>
+  <br>
+  (WB 2009/09/25)
+  </li>
+
+  <li>
+    <p>
+    Fixed: When using the TrilinosWrappers::MPI::Vector::reinit() function with a %parallel
+    vector, and if the vector initialized and the vector given had a local range on one of
+    the processors that exactly matched, the program would freeze if the local ranges on
+    the other processors did not also match exactly. This is now fixed.
+    </p>
+  <br>
+  (WB 2009/09/02)
+  </li>
+
+  <li><p> Improved: BlockVector and several of the block sparsity
+  patterns can now be initialized with BlockIndices
+  objects. Therefore, if an application needs such an object, it does
+  not have to store a vector of block sizes separately.
+  <br>
+  (GK 2009/08/26)
+  </p>
+  </li>
+
+  <li><p> New: The class PreconditionChebyshev implements a
+  preconditioner based on Chebyshev polynomials. It is based on matrix-vector
+  products together with some vector updates.
+  <br>
+  (Martin Kronbichler 2009/08/25)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: Crash or strange behaviour (wrong matrix entries written) in
+  PETScWrappers::MPI::BlockSparseMatrix when adding or setting elements
+  through any of the set() and add() routines. This happened when different
+  CPUs access different blocks at the start of assembly or when switching
+  between adding and setting.
+  <br>
+  (Timo Heister 2009/08/05)
+  </p>
+  </li>
+
+  <li> <p>New: The relaxation preconditioners PreconditionJacobi, PreconditionSOR and
+  PreconditionSSOR, as well as their blocked versions PreconditionBlockJacobi,
+  PreconditionBlockSOR and PreconditionBlockSSOR now have functions <code>step</code>
+  and <tt>Tstep</tt> performing one complete step of these methods.
+  <br>
+  (GK 2009/08/04)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: There are new functions FullMatrix::cholesky and
+  FullMatrix::outer_product.  FullMatrix::cholesky finds the Cholesky
+  decomposition of a matrix in lower triangular form.
+  FullMatrix::outer_product calculates <tt>*this</tt> $= VW^T$ where $V$
+  and $W$ are vectors.
+  <br>
+  (Jean Marie Linhart 2009/07/27)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The TrilinosWrappers::MPI::BlockVector class declares an assignment
+  operator from the non-Trilinos BlockVector class but it could not be
+  compiled due to an oversight. This is now fixed.
+  <br>
+  (WB 2009/06/29)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: Based on work by Francisco Alvaro, the existing SLEPcWrappers now
+  have a handle on the generalized eigenvalue problem where B=I.
+  <br>
+  (Toby D. Young 2009/06/25)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: Based on work with Francisco Alvaro and Jose
+  E. Roman, the new SLEPcWrappers give a handle on some of the
+  features of SLEPc (Scalable Library for Eigenvalue Problem
+  Computations): (1) The SLEPcWrappers::SolverBase class can be used
+  for specifying an eigenvalue problem, either in standard or
+  generalized form, on serial or %parallel architectures with support
+  for a few solver types; and (2) The
+  SLEPcWrappers::TransformationBase class encapsulates a variety of
+  spectral transformations providing some functionality required for
+  acceleration techniques based on the transformation of the spectrum.
+  <br>
+  (Toby D. Young 2009/06/25)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The TrilinosWrappers::BlockVector class declares an assignment
+  operator from the non-Trilinos BlockVector class but it wasn't implemented.
+  This is now fixed.
+  <br>
+  (WB 2009/06/24)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The SparseMatrix class has now a function SparseMatrix::mmult that
+  can multiply two sparse matrices with each other.
+  <br>
+  (Martin Kronbichler 2009/05/04)
+  </p>
+  </li>
+</ol>
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+
+<li> <p> New: The namespace MeshWorker contains a generic MeshWorker::loop() over all cells and faces as well as auxiliary classes, which allow to program integrals over mesh cells and faces in a very generic and local way. In particular, an application programmer will not have to distinguish between regular faces and faces with hanging nodes anymore. Two tutorial programs (step-12 and step-39) highlight the functionality of this framework.
+<br>
+(GK 2010/06/24)
+</p></li>
+
+  <li>
+  <p>New: The FE_Q_Hierarchical class now has functions
+  FE_Q_Hierarchical::hp_constraints_are_implemented and
+  FE_Q_Hierarchical::hp_vertex_dof_identities.
+  <br>
+  (Markus Bürg 2010/06/08)
+  </p></li>
+
+  <li>
+  <p>New: The FEValuesViews::Vector class now has functions
+  FEValuesViews::Vector::curl and FEValuesViews::Vector::get_function_curls.
+  <br>
+  (Markus Bürg 2010/05/13)
+  </p></li>
+
+  <li>
+  <p>New: TriaAccessor::extent_in_direction() returns the length
+  of an object in a given direction.
+  <br>
+  (James Avery 2010/05/10)
+  </p></li>
+
+  <li>
+  <p>New: There is a new function DoFTools::extract_dofs_with_support_on_boundary().
+  <br>
+  (WB 2010/05/07)
+  </p></li>
+
+  <li>
+  <p>Fixed: FE_DGQ::has_support_on_face() returned the wrong value in 1d if the
+  polynomial degree of the finite element equals zero (i.e. for piecewise
+  constants) where the lone shape function is nonzero on all faces. This is now
+  fixed.
+  <br>
+  (WB 2010/05/07)
+  </p></li>
+
+  <li>
+  <p>Fixed: VectorTools::interpolate_boundary_values inadvertently produces
+  an exception when used with hp::DoFHandler objects in 1d. This is now fixed.
+  <br>
+  (WB 2010/05/04)
+  </p></li>
+
+  <li>
+  <p>Fixed: The GridIn::read_msh function got into trouble if the mesh file
+  had a section that listed physical names for variables. This is now fixed.
+  <br>
+  (WB 2010/05/03)
+  </p></li>
+
+  <li>
+  <p> Improved: DoFHandler iterators now can be assigned from a Triangulation iterator
+  after the dof handler was set once.
+  <br>
+  (GK 2010/03/25)
+  </p></li>
+
+  <li>
+  <p>
+  New: The function DoFRenumbering::downstream has now an additional bool
+  argument. If enabled, the downstream comparison is performed on a DoF
+  basis, as opposed to the cell-based comparison that is used with a false
+  argument.
+  <br>
+  (Martin Kronbichler 2010/03/19)
+  </p>
+  </li>
+
+  <li><p> Improved: DoFHandler now has a BlockInfo object, automatically
+  updated after DoFHandler::distribute_dofs() and accessible by
+  DoFHandler::block_info(). This object can be used to initialize block
+  vectors and obliterates the necessity to count dofs per block (or dofs
+  per component in legacy code) in application programs.
+  <br>
+  (GK 2010/03/18)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The class MGTransferSelect is prepared for use on adaptively refined meshes.
+  <br>
+  (Bärbel Janssen 2010/02/05)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The function Cuthill_McKee in namespace DoFRenumbering is now also compiled for
+  MGDoFHandler as well as the make_sparsity_pattern functions in DoFTools.
+  <br>
+  (Bärbel Janssen 2010/01/08)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: Constructor of FESystem now also exists for four base elements.
+  <br>
+  (Thomas Wick 2010/01/08)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The functions in namespace DoFRenumbering::boost are now also compiled for
+  hp::DoFHandler arguments.
+  <br>
+  (WB 2009/12/14)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The functions DoFTools::count_dofs_per_component and DoFTools::extract_dofs
+  produced wrong results for elements that consist of two or more nested FESystems.
+  Moreoever, a bug in DoFTools::extract_constant_modes has been corrected and
+  DoFTools::distribute_cell_to_dof_vector now works according to the documentation,
+  namely leaving unselected components in the result vector unchanged, instead of
+  setting these to zero as was done before.
+  <br>
+  (Martin Kronbichler 2009/12/14)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The function Triangulation::n_levels() accidentally turned out to be
+  quite expensive, in particular if the mesh has been coarsened significantly
+  in the past. Since Triangulation::n_active_cells() calls Triangulation::n_levels()
+  repeatedly, by consequence it also is surprisingly expensive, which is
+  particularly annoying since this is a frequently called function. This has
+  now been fixed by caching the result of both functions.
+  <br>
+  (Wolfgang Bangerth 2009/11/22)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The function DataOut class got confused when its DataOut::first_cell() and
+  DataOut::next_cell() were overloaded and cell data was given. This is now fixed.
+  <br>
+  (Wolfgang Bangerth 2009/11/05)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The function DoFRenumbering::component_wise is now also implemented
+  for arguments of type hp::DoFHandler.
+  <br>
+  (Markus Bürg 2009/10/01)
+  </p>
+  </li>
+
+  <li><p>New: The class BlockInfo stores and computes all BlockIndices objects related to
+  DoFHandler and MGDoFHandler based on FESystem.
+  <br>
+  (GK 2009/09/13)
+  </p>
+  </li>
+
+  <li><p>Improved: MGDoFHandler now also has a typedef for MGDoFHandler::Container
+  <br>
+  (GK 2009/09/13)
+  </p>
+  </li>
+
+  <li><p> FETools::compute_block_renumbering() can nor return block sizes instead of
+  start indices.
+  <br>
+  (GK 2009/08/26)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The function GridGenerator::truncated_cone() and the class ConeBoundary
+  can now be used to describe conical objects.
+  <br>
+  (Markus Bürg 2009/08/17)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: Instead of asking face or edge iterators for their boundary indicator
+  using TriaAccessor::boundary_indicator() and then the triangulation for
+  the boundary object using Triangulation::get_boundary(), you can now directly
+  ask the iterator for the boundary object using TriaAccessor::get_boundary().
+  <br>
+  (WB 2009/07/31)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The CellAccessor::recursively_set_material_id function did not
+  set the material id for all children, but only for the first two, which
+  is obviously a bug. This should now be fixed.
+  <br>
+  (WB 2009/07/14)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The GridIn class sometimes had problems with input files that had
+  whitespace at the end of lines. This should now be fixed.
+  <br>
+  (WB 2009/07/10)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: Previously, the Triangulation::create_triangulation
+  function silently accepted input meshes with inverted cells
+  (i.e. cells with a zero or negative determinant of the Jacobian of
+  the mapping from the reference cell to the real cell). This can been
+  changed now: By passing the appropriate flag to the constructor of
+  the Triangulation class, the Triangulation::create_triangulation
+  function checks whether cells are distorted or
+  inverted, and may throw an exception containing a list of cells
+  for which this is the case. If you know that this is harmless, for
+  example if you have cells with collapsed vertices in your mesh but
+  you do not intend to integrate on them, then you can catch and
+  ignore this message. In all other cases, the output of your
+  computations are likely to be wrong anyway.
+  <br>
+  The same is true for the Triangulation::execute_coarsening_and_refinement
+  function: if it creates cells that are distorted, it throws a list of cells
+  whose children are distorted.
+  <br>
+  The whole issue is described in some detail in the entry on
+  @ref GlossDistorted "distorted cells" in the glossary.
+  <br>
+  (WB 2009/06/29)
+  </p>
+  </li>
+
+
+  <li>
+  <p>
+  New: The new hp::DoFHandler::set_active_fe_indices function allows
+  to distribute all active FE indices at once based on a given
+  vector. This might be useful if this information is stored
+  somewhere and has to be reconstructed or else if two DoFHandler
+  objects with the same FE index distribution should be created.
+  There is now also a corresponding
+  hp::DoFHandler::get_active_fe_indices function.
+  <br>
+  (Tobias Leicht, RH 2009/06/12)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  Fixed: The projection of quadrature points to subfaces in
+  MappingQ in case of 3d anisotropic refinement did not respect
+  non-standard face orientation/flip/rotation cases. This
+  has now been fixed.
+  <br>
+  (Tobias Leicht, RH 2009/06/12)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The new Triangulation::n_raw_faces() function forwards
+  to Triangulation::n_raw_lines() in 2d and
+  Triangulation::n_raw_quads() in 3d.
+  <br>
+  (Tobias Leicht, RH 2009/06/12)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: There is now a new DataOutFaces::build_patches function which
+  takes a Mapping argument. For higher order mappings this allows to
+  represent curved boundaries by using more subdivisions. This function
+  is also useful in the context of MappingQ1Eulerian.
+  <br>
+  (Tobias Leicht, RH 2009/06/05)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: For empty triangulations the new Triangulation::set_mesh_smoothing
+  function allows to override the MeshSmoothing given to the constructor.
+  <br>
+  (RH 2009/06/05)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The new function TriaAccessor::is_translation_of computes
+  whether a cell, face, or edge is a translation of another.
+  <br>
+  (Martin Kronbichler, WB 2009/05/19)
+  </p>
+  </li>
+
+  <li>
+  <p>
+  New: The DoFTools::make_sparsity_pattern functions have acquired a
+  new paramater <code>subdomain_id</code>. If a value other than the
+  default value is passed for it, the function only assembles the
+  sparsity pattern on those cells that have the given subdomain id.
+  This is useful, for example, in conjunction with the
+  TrilinosWrappers::SparsityPattern class that can hold a sparsity
+  pattern distributed across several MPI processes; in that case, it is
+  not necessary that each process builds the entire sparsity pattern.
+  <br>
+  (WB 2009/04/29)
+  </p>
+  </li>
+
+   <li>
+   <p>
+   Fixed: The DoFRenumbering::component_wise function for MGDoFHandler objects
+   did a few things in %parallel that weren't thread-safe. This is now fixed.
+   <br>
+   (WB, 2009/01/20)
+   </p>
+
+   <li>
+   <p>
+   Changed: The two DataOut::build_patches, DataOutFaces::build_patches, and
+   DataOutRotation::build_patches functions have lost the argument
+   that indicated the number of threads with which they should build the
+   intermediate representation. This is something that now happens
+   transparently in the background and doesn't need caller input any more.
+   <br>
+   (WB 2008/12/16)
+   </p>
+
+   <li>
+   <p>
+   Changed: The KellyErrorEstimator::estimate functions had a parameter
+   that indicates the number of threads to be used in the computation.
+   This parameter continues to exist for compatibility, but is now ignored.
+   Rather, the number of threads is determined automatically by scheduling
+   the requested computations on available compute resources.
+   <br>
+   (WB, 2008/12/29)
+   </p>
+
+   <li>
+   <p>
+   New: The new function internal::hp::FEValuesBase::get_fe_collection function
+   allows to query the finite element collection currently in used in an hp::FEValues,
+   hp::FEFaceValues, or hp::FESubfaceValues object.
+   <br>
+   (WB 2008/09/30)
+   </p>
+
+   <li>
+   <p>
+   New: The new function FEValuesBase::get_update_flags allows to query
+   the update flags that are currently set on an FEValues, FEFaceValues, or
+   FESubfaceValues object.
+   <br>
+   (WB 2008/09/29)
+   </p>
+</ol>
+
+
+*/
diff --git a/doc/news/6.3.0-vs-6.3.1.h b/doc/news/6.3.0-vs-6.3.1.h
new file mode 100644
index 0000000..c4c71f2
--- /dev/null
+++ b/doc/news/6.3.0-vs-6.3.1.h
@@ -0,0 +1,180 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_6_3_0_and_6_3_1 Changes between Version 6.3.0 and 6.3.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+</p>
+
+<p>
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+  <li>None.
+</ol>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li>
+  <p>Fixed: The step-33 tutorial program can not be built with GCC versions
+  4.5.x. There are in fact two problems, one that pertains to uses of
+  <code>std::make_pair</code> that don't work any more with the upcoming
+  C++ 1x standard that GCC 4.5.x already follows, and some in which the
+  Trilinos package Sacado is incompatible with GCC 4.5.x, at least up to
+  Trilinos version 10.4.0. While the latter problem can only be fixed in
+  future Trilinos versions, at least the former problem is solved in step-33.
+  <br>
+  (WB 2010/07/18)
+  </p>
+
+  <li>
+  <p>Fixed: GCC version 3.4.0 failed to compile the file
+  <code>deal.II/source/numerics/matrices.cc</code> with
+  an internal compiler error. This has
+  now been worked around.
+  <br>
+  (WB 2010/07/15)
+  </p>
+
+  <li>
+  <p>Fixed: A problem in the Makefiles caused error messages when
+  building under CygWin.
+  <br>
+  (GK 2010/07/12)
+  </p>
+
+  <li>
+  <p>Fixed: GCC version 3.3.x failed to compile the files
+  <code>lac/include/lac/precondition_block.h</code>,
+  <code>deal.II/source/multigrid/mg_dof_handler.cc</code> and
+  <code>examples/step-34/step-34.cc</code>. These problems have
+  now been worked around.
+  <br>
+  (WB 2010/07/12)
+  </p>
+
+  <li>
+  <p>Fixed: Some older 3.x versions of GCC crashed compiling the functions in
+  namespace DoFRenumbering::boost. There is now a configuration time test
+  that checks that the compiler accepts the constructs in question. If the
+  compiler does not, then these functions are disabled.
+  <br>
+  (WB 2010/07/01)
+  </p>
+
+  <li>
+  <p>Fixed: Linking with more than one of the deal.II 1d, 2d, or 3d libraries
+  when using static libraries did not work. This is now fixed. However, due to
+  GCC bug <a href="http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10591"
+  target="_top">10591</a>, GCC versions prior to and including 4.1.x will
+  still not work. Working with shared libraries was not and is not affected
+  by this problem.
+  <br>
+  (WB 2010/07/01)
+  </p>
+
+  <li>
+  <p>Fixed: GCC version 4.0.1 had a bug that prevented it from compiling
+  release 6.3.0 because it apparently had an infinite loop allocating
+  memory when compiling <code>fe_values.cc</code> in optimized mode. This
+  problem had been fixed in GCC 4.0.2, but some versions of Mac OS X still use
+  this GCC version in their Xcode environment. In any case, the code in
+  deal.II has been changed to avoid this problem.
+  <br>
+  (WB 2010/06/30)
+  </p>
+
+  <li>
+  <p>Fixed: Configuring with an external BOOST version did not work when
+  using shared libraries since the test ran in the wrong order with respect
+  to another configure test. This is now fixed.
+  <br>
+  (Bradley Froehle 2010/06/29)
+  </p>
+
+  <li>
+  <p>
+  Fixed: deal.II release 6.3.0 did not compile with Trilinos versions 9.x and
+  10.0. This is now fixed.
+  <br>
+  (Martin Kronbichler, WB 2010/06/28)
+  </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+<ol>
+  <li>None so far.
+</ol>
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li>None so far.
+</ol>
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li>
+  <p>
+  Fixed: The FEValues::get_cell() function was unusable from user code
+  since its implementation used a class that was only forward declared
+  and not visible at the point of instantiations in user code. This is now
+  fixed.
+  <br>
+  (WB 2010/07/16)
+  </p>
+
+  <li>
+  <p>
+  Fixed: On some systems and compilers, the library could not be compiled
+  because of a duplicate symbol in <code>MeshWorker::LocalResults</code>.
+  This is now fixed.
+  <br>
+  (WB 2010/06/28)
+  </p>
+</ol>
+
+
+*/
diff --git a/doc/news/6.3.0-vs-7.0.0.h b/doc/news/6.3.0-vs-7.0.0.h
new file mode 100644
index 0000000..7576017
--- /dev/null
+++ b/doc/news/6.3.0-vs-7.0.0.h
@@ -0,0 +1,689 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_6_3_and_7_0 Changes between Version 6.3 and 7.0
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+made to the three sub-libraries <a href="#base">base</a>,
+<a href="#lac">lac</a>, and <a href="#deal.II">deal.II</a>, as well as
+changes to the <a href="#general">general infrastructure,
+documentation, etc</a>.
+</p>
+
+<p>
+All entries are signed with the names of the author. Regular
+contributor's names are abbreviated by WB (Wolfgang Bangerth), GK
+(Guido Kanschat), RH (Ralf Hartmann).
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+
+<li> The public member variable Quadrature::n_quadrature_points has
+been removed after being deprecated for nearly three
+years. Quadrature.size() replaces its function, and the new version
+more naturally preserves consistency.
+<br>
+(GK 2010/11/11)
+</li>
+
+<li>
+        deal.II has had a somewhat quirky directory and library structure for
+        historical reasons, but this has now changed: All include and source
+        files are now under the top-level <code>include/</code>
+        and <code>source</code> directories. Furthermore, we no longer build a
+        plethora of libraries but only <code>libdeal_II.g.so</code> (debug
+        version) and <code>libdeal_II.so</code> (optimized version). In
+	particular, we no longer build different versions of the library for
+	different space dimensions.
+	<br>
+	As a consequence, if your makefile makes any assumption on the
+	location of deal.II include files or the name of the deal.II library
+	it will need to be changed. The sample Makefiles have been
+	updated for this.
+	<br>
+	(WB 2010/10/25)
+	</li>
+
+<li> The <code>Polynomials::Lagrange::generate_complete_basis</code>
+function has been renamed to Polynomials::generate_complete_Lagrange_basis .
+The function was previously the only member of a class, and a static
+one on top of that, which did not make much sense.
+<br>
+(WB 2010/10/22)</li>
+
+<li> The <code>QGauss2, QGauss3, ..., QGauss7</code> classes — deprecated
+for more than 6 years already — have finally been removed.
+You should use code like <code>QGauss@<dim@>(4)</code> instead.
+<br>
+(WB 2010/09/19)</li>
+
+
+<li> The FE_Nedelec class had previously implemented the lowest order
+when the value 1 was passed to the constructor. This has now been
+adjusted: the lowest order now results from passing 0, making this
+consistent with the FE_RaviartThomas class and following the convention
+used in by Brezzi and Raviart (though not in the original paper of
+Nedelec).
+<br>
+(Markus Bürg 2010/09/14)</li>
+
+<li> The fields DoFHandler::tria and DoFHandler::selected_fe are now
+private instead of protected. Inheriting classes can only access them
+through DoFHandler::get_tria() and DoFHandler::get_fe(), respectively.
+<br>
+(GK 2010/08/16)</li>
+
+</ol>
+
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+  <li><p>New: The new step-38 program shows how to discretize and solve
+  partial differential equations posed on curved manifolds embedded in
+  higher dimensional spaces.
+  <br>
+  (Andrea Bonito, M. Sebastian Pauletti, 2011/01/02)
+  </p></li>
+
+  <li><p>New: deal.II now has a Qt based graphical user interface to edit
+  input parameter files for the ParameterHandler class. The executable is
+  located in <code>lib/bin/dealii_parameter_gui</code>.
+  <br>
+  (Martin Steigemann, 2010/12/20)
+  </p></li>
+
+  <li><p>New: A significant number of classes, primarily in the
+  <code>base/</code> subdirectory, now provide the necessary member functions
+  for serialization through BOOST's serialization library.
+  <br>
+  (Pradeep Rao, WB, 2010/12/06)
+  </p></li>
+
+  <li><p>Fixed: A good number of problems associated with meshes that are
+  embedded in a higher space dimension (i.e. where <code>dim @< spacedim</code>)
+  are now fixed.
+  <br>
+  (Sebastian Pauletti, Andrea Bonito, Luca Heltai, WB, 2010/12/06)
+  </p></li>
+
+  <li><p>Updated: The version of the <a href="http://www.threadingbuildingblocks.org">Threading
+  Building Blocks (TBB)</a> shipped with deal.II has been updated
+  to release 3.0 Update 3 (Commercially aligned version).
+  <br>
+  (WB, 2010/11/18)
+  </p></li>
+
+  <li><p>New: While 2d and 3d could mostly be handled with the
+  same code, dimension-independent programming was always a bit
+  different because the faces of 1d cells (i.e. vertices) did not
+  have appropriate iterators defined. This has now been changed:
+  there is now a class TriaAccessor<0,1,spacedim> (and corresponding
+  DoFAccessor<0,DH>) that allows to write things like
+  @code
+    cell->face(0)->boundary_indicator();
+    cell->face(1)->at_boundary();
+    cell->face(1)->get_dof_indices(...);
+  @endcode
+  even if the cell corresponds to a one-dimensional triangulation where
+  this was not previously possible.
+  <br>
+  (WB, 2010/11/11)
+  </p></li>
+
+  <li>
+  <p>New: After more than 2 years of work, we have merged a branch on which
+  we have implemented the functionality necessary to distribute meshes on
+  hundreds, thousands, or more processors. An overview of the framework
+  in which this is implemented is presented in the @ref distributed module,
+  the @ref distributed_paper, as well as in step-40.
+  <br>
+  (Timo Heister, Martin Kronbichler, Wolfgang Bangerth 2010/10/23)
+  </p>
+
+  <li>
+  <p>New: Documentation of how to handle constraints on degrees of freedom
+  has been centralized in a new documentation module on @ref constraints.
+  <br>
+  (WB 2010/09/16)
+  </p>
+
+  <li>
+  <p>Fixed: When using Trilinos and deal.II both with static libraries,
+  a linker error would occur whenever a program linked both the 2d and
+  3d libraries of deal.II. This is now fixed.
+  <br>
+  (WB 2010/07/23)
+  </p>
+
+  <li>
+  <p>Fixed: On all non-linux platforms, if static libraries were selected
+  and <code>./configure</code> was instructed to use Trilinos (which also
+  was compiled with static libraries), a failure would occur. This should
+  now be fixed.
+  <br>
+  (WB 2010/07/23)
+  </p>
+
+  <li>
+  <p>Fixed: The step-33 tutorial program can not be built with GCC versions
+  4.5.x. There are in fact two problems, one that pertains to uses of
+  <code>std::make_pair</code> that don't work any more with the upcoming
+  C++ 1x standard that GCC 4.5.x already follows, and some in which the
+  Trilinos package Sacado is incompatible with GCC 4.5.x, at least up to
+  Trilinos version 10.4.0. While the latter problem can only be fixed in
+  future Trilinos versions, at least the former problem is solved in step-33.
+  <br>
+  (WB 2010/07/18)
+  </p>
+
+  <li>
+  <p>Fixed: GCC version 3.4.0 failed to compile the file
+  <code>deal.II/source/numerics/matrices.cc</code> with
+  an internal compiler error. This has
+  now been worked around.
+  <br>
+  (WB 2010/07/15)
+  </p>
+
+  <li>
+  <p>Fixed: A problem in the Makefiles caused error messages when
+  building under CygWin.
+  <br>
+  (GK 2010/07/12)
+  </p>
+
+  <li>
+  <p>Fixed: GCC version 3.3.x failed to compile the files
+  <code>lac/include/lac/precondition_block.h</code>,
+  <code>deal.II/source/multigrid/mg_dof_handler.cc</code> and
+  <code>examples/step-34/step-34.cc</code>. These problems have
+  now been worked around.
+  <br>
+  (WB 2010/07/12)
+  </p>
+
+  <li>
+  <p>Fixed: Some older 3.x versions of GCC crashed compiling the functions in
+  namespace DoFRenumbering::boost. There is now a configuration time test
+  that checks that the compiler accepts the constructs in question. If the
+  compiler does not, then these functions are disabled.
+  <br>
+  (WB 2010/07/01)
+  </p>
+
+  <li>
+  <p>Fixed: Linking with more than one of the deal.II 1d, 2d, or 3d libraries
+  when using static libraries did not work. This is now fixed. However, due to
+  GCC bug <a href="http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10591"
+  target="_top">10591</a>, GCC versions prior to and including 4.1.x will
+  still not work. Working with shared libraries was not and is not affected
+  by this problem.
+  <br>
+  (WB 2010/07/01)
+  </p>
+
+  <li>
+  <p>Updated: The version of <a href="http://www.boost.org/">boost</a>
+   included in the <code>contrib/</code> directory has been updated
+   to 1.43.0.
+  <br>
+  (WB 2010/06/30)
+  </p>
+
+  <li>
+  <p>Fixed: GCC version 4.0.1 had a bug that prevented it from compiling
+  release 6.3.0 because it apparently had an infinite loop allocating
+  memory when compiling <code>fe_values.cc</code> in optimized mode. This
+  problem had been fixed in GCC 4.0.2, but some versions of Mac OS X still use
+  this GCC version in their Xcode environment. In any case, the code in
+  deal.II has been changed to avoid this problem.
+  <br>
+  (WB 2010/06/30)
+  </p>
+
+  <li>
+  <p>Fixed: Configuring with an external BOOST version did not work when
+  using shared libraries since the test ran in the wrong order with respect
+  to another configure test. This is now fixed.
+  <br>
+  (Bradley Froehle 2010/06/29)
+  </p>
+
+  <li>
+  <p>Updated: The conversion tool in <code>contrib/mesh_conversion</code> that
+  can read CUBIT output and convert it into something that is readable by
+  deal.II has been updated.
+  <br>
+  (Jean-Paul Pelteret 2010/06/28)
+  </p>
+
+  <li>
+  <p>
+  Fixed: deal.II release 6.3.0 did not compile with Trilinos versions 9.x and
+  10.0. This is now fixed.
+  <br>
+  (Martin Kronbichler, WB 2010/06/28)
+  </p>
+</ol>
+
+
+
+<a name="base"></a>
+<h3>base</h3>
+
+
+<ol>
+
+<li> A static member function Point::unit_vector() has been introduced
+to simplify the task of creating a vector in one coordinate direction
+independent of the dimension.
+<br>
+(GK 2010/11/17)
+</li>
+
+<li><p> There are now Patterns::FileName and Patterns::DirectoryName classes
+  that can be used to indicate that a given parameter is supposed to be
+  a file or directory name.
+  <br>
+  (Martin Steigemann 2010/10/25)
+  </p>
+
+  <li><p> New: The ParameterHandler class is now built on the
+  <a href="http://www.boost.org" target="_top">boost</a>
+  <code>property_tree</code> library which provides a much better
+  foundation for extensions. In particular, the description of parameters
+  can now be exported in XML and JSON formats for processing with external
+  programs, for example graphical user interfaces. As a consequence of
+  the re-write, a bug in ParameterHandler::print_parameters_section
+  was fixed when using the LaTeX output format.
+  <br>
+  (WB 2010/09/09)
+  </p>
+
+  <li><p> Fixed: The ParameterHandler::set() functions allowed to set values that
+  did not satisfy the pattern given during declaration of the parameter. This
+  is now fixed: the functions now throw an exception.
+  <br>
+  (WB 2010/09/09)
+  </p>
+
+  <li><p> Fixed: The Patterns::Integer and Patterns::Double classes did
+  not properly check that a value given matched the pattern because they
+  ignored text after an initial match. This led to <code>"3.141"</code> and
+  <code>"3.141..,-RXYZ"</code> to be recognized as valid integers and
+  double values, respectively. This is now fixed.
+  <br>
+  (WB 2010/09/08)
+  </p>
+
+  <li><p> Fixed: The computation of quadrature points in the QGaussLobatto
+  class uses a Newton method that was wrongly implemented. While the
+  results were correct (at least for moderate orders), it required more
+  iterations than necessary. This is now fixed.
+  <br>
+  (Andrea Bonito 2010/08/12)
+  </p>
+
+  <li><p> Changed: The DataOutBase::write_vtu function now writes data
+  as 64-bit values, rather than 32-bit values.
+  <br>
+  (Scott Miller 2010/08/5)
+  </p>
+
+  <li><p> New: MappingQ and MappingQEulerian now support order > 1 also in
+  codimension one. Step-34 has been modified to show how this works.
+  <br>
+  (Luca Heltai 2010/07/23-27)
+  </p>
+
+  <li><p> New: QGaussOneOverR now has a new constructor for arbitrary quadrature
+  points and not only the vertices of the reference cell.
+  <br>
+  (Luca Heltai 2010/07/21)
+  </p>
+</ol>
+
+
+<a name="lac"></a>
+<h3>lac</h3>
+
+<ol>
+  <li><p>New: Added a new constructor for PETScWrappers::Vector that takes
+  an existing PETSc Vec that was created by the user and is only wrapped for
+  usage.
+  <br>
+  (TH, 2010/12/03)
+  </p></li>
+
+  <li><p>New: When Hypre is available as a sub-package of PETSc, the new class
+  PETScWrappers::PreconditionBoomerAMG provides the BoomerAMG implementation
+  of the algebraic multigrid method of the Hypre package as a preconditioner.
+  This functionality is used, for example, in step-40.
+  <br>
+  (Timo Heister 2010/11/13)
+  </p></li>
+
+  <li><p>New: The class LAPACKFullMatrix now has functions to compute the
+  singular value decomposition of a matrix and its inverse.
+  <br>
+  (GK 2010/11/7)
+  </p></li>
+
+  <li><p>New: The classes RelaxationBlockSOR and RelaxationBlockSSOR
+  implement overlapping Schwarz relaxation methods. Additionally,
+  their base class RelaxationBlock and the helper class BlockList have
+  been added to the library.
+  <br>
+  (GK 2010/10/19)
+  </p></li>
+
+  <li><p>Improved: The ConstraintMatrix::merge function now takes a second
+  argument that indicates what should happen if the two objects to be
+  merged have constraints on the very same degree of freedom.
+  <br>
+  (WB 2010/09/18)
+  </p></li>
+
+  <li><p>Updated: Changes to the <code>SLEPcWrappers</code> to preserve
+  compatibility with SLEPc version 3.1. Main new updated features are
+  new solver classes: (i) Power; and (ii) Generalized Davidson.
+  <br>
+  (Toby D. Young 2010/08/04)
+  </p></li>
+
+  <li><p>Fixed: SparseMatrix::precondition_SSOR and
+  SparseMatrixEZ::precondition_SSOR had a bug, which made the SSOR
+  method inaccurate for relaxation parameters different from unity.
+  <br>
+  (GK 2010/07/20)
+  </p></li>
+</ol>
+
+
+
+<a name="deal.II"></a>
+<h3>deal.II</h3>
+
+<ol>
+  <li><p>New: The GridTools::transform function used to work only on
+  meshes that are not refined at all. It now also works on meshes
+  that have been refined uniformly, though not on adaptively refined
+  ones.
+  <br>
+  (WB, 2010/12/20)
+  </p></li>
+  
+  <li><p>New: When computing errors using
+  VectorTools::integrate_difference in codimension one problems, if
+  you specified a norm that requires the computation of the gradients,
+  you would get in trouble, because on codimension one manifolds we only
+  have information on the <em>tangential</em> gradient. This is now
+  fixed, by discarding the normal component of the provided function,
+  before computing the difference with the finite element function.
+  <br>
+  (Luca Heltai, 2010/12/05)
+  </p></li>
+  
+  <li><p>New: The VectorTools::interpolate_boundary_values function can now
+  also be used to interpolate boundary values on meshes embedded in a higher
+  dimensional space.
+  <br>
+  (WB, 2010/11/20)
+  </p></li>
+
+  <li><p>Fixed: Triangulation::execute_coarsening_and_refinement misplaced
+  the new central vertex of a quad in a two-dimensional mesh embedded into
+  higher dimensional space if said quad was at the boundary of the surface
+  described by this triangulation. This is now fixed.
+  <br>
+  (WB, 2010/11/18)
+  </p></li>
+
+  <li><p>New: The GridTools::extract_boundary_mesh function can be used to
+  generate a surface mesh from the boundary of a volume mesh.
+  <br>
+  (Sebastian Pauletti, 2010/11/15)
+  </p></li>
+
+  <li><p>Fixed: For DoF iterators, it was previously possible to write
+  code like <code>*it1 = *it2</code>, presumably with the intent to
+  copy the entire cell pointed to on the right hand side onto the cell
+  pointed to at the left. However, this is not what happens since
+  iterators are not pointers but only point to accessor classes. The
+  assignment operator has therefore been removed.
+  <br>
+  (WB, 2010/11/12)
+  </p></li>
+
+  <li><p>New: The GridOut::write_gnuplot function now also works for meshes
+  that are embedded in a higher
+  dimensional space (i.e. if the codimension was greater than 1).
+  <br>
+  (WB, 2010/11/03)
+  </p></li>
+
+  <li><p>New: The class hp::FEFaceValues and hp::FESubfaceValues were not
+  previously available for meshes that were embedded in a higher
+  dimensional space (i.e. if the codimension was greater than 1). This is
+  now fixed. As a consequence, the VectorTools::interpolate_boundary_values
+  function is now also available for such meshes.
+  <br>
+  (WB, 2010/11/02)
+  </p></li>
+
+  <li><p>Fixed: The FEValuesExtractors::Vector class did not work when the dimension
+  of the domain was not equal to the dimension of the space in which it is
+  embedded. This is now fixed.
+  <br>
+  (Sebastian Pauletti, WB, 2010/11/01)
+  </p></li>
+
+  <li><p>Fixed: The methods VectorTools::project_boundary_values_curl_conforming
+  set the boundary values now also H(curl)-conforming, when hanging node
+  constraints are present.
+  <br>
+  (Markus Buerg, 2010/10/11)
+  </p></li>
+
+  <li><p>Changed: The DataPostprocessor functions now take an additional
+  argument that indicates the location of an evaluation point. For backward
+  compatibility, the old function signature still exists so that applications
+  that overload one of the existing functions continue to work.
+  The old signature has been deprecated, however, and will be removed in a
+  future version.
+  <br>
+  (Scott Miller 2010/10/08)
+  </p></li>
+
+  <li><p>Changed: FETools is now a namespace rather than a class with only
+  static member functions.
+  <br>
+  (WB 2010/09/25)
+  </p></li>
+
+  <li><p>New: FE_DGVector implements discontinuous elements based on
+  vector valued polynomial spaces.
+  <br>
+  (GK 2010/09/17)
+  </p></li>
+
+  <li>
+  <p>
+  Fixed: The methods VectorTools::interpolate_boundary_values and
+  VectorTools::project_boundary_values with ConstraintMatrix argument added
+  inhomogeneities also to DoFs that were already constrained when entering the
+  call without changing the list of DoFs that those were constrained to. This
+  leads to wrong results for inhomogeneous constraints in case e.g.
+  DoFTools::make_hanging_node_constraints was called before. Now, the correct
+  way for combining hanging node constraints and Dirichlet conditions in one
+  ConstraintMatrix is to first make the hanging node constraints, and then
+  interpolate/project the boundary values. The latter operation will then
+  simply not add any new constraints if a DoF is already constrained. The
+  same is also true for VectorTools::compute_no_normal_flux_constraints.
+  <br>
+  (Luca Heltai, Martin Kronbichler, WB 2010/09/16)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The method FEValuesViews::Vector::curl aborted the program in 2d under certain
+  circumstances. This is now fixed.
+  <br>
+  (Andrea Bonito, WB 2010/09/14)
+  </p>
+
+  <li><p>Improved: MeshWorker::loop() now uses WorkStream for multithreading
+  <br>
+  (GK 2010/09/10)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The method FEValuesViews::SymmetricTensor< 2, dim, spacedim >::get_function_divergences
+  incorrectly calculated the divergence.
+  <br>
+  (Andrew McBride 2010/09/09)
+  </p>
+
+  <li>
+  <p>
+  New: Reimplementation of the parent function in the various DoFCellAccessors so
+  that one can call the parent function from a DoFIterator and also gets a
+  DoFIterator returned.
+  <br>
+  (Markus Buerg 2010/08/30)
+  </p>
+
+  <li>
+  <p>
+  New: FETools::compute_embedding_matrices now computes the embedding matrix
+  for all refinement cases in parallel.
+  <br>
+  (Markus Buerg 2010/08/26)
+  </p>
+
+  <li>
+  <p>
+  New: There is now a function CellAccessor::parent so that one can do
+  <code>cell-@>parent()</code>
+  <br>
+  (Markus Buerg 2010/08/26)
+  </p>
+
+  <li>
+  <p>
+  Improved: DoFHandler has a default constructor, so that it can be used in containers.
+  <br>
+  (GK 2010/08/16)
+  </p>
+
+  <li>
+  <p>
+  New: The functions VectorTools::project_boundary_values_curl_conforming
+  are added. They can compute Dirichlet boundary conditions for Nedelec
+  elements.
+  <br>
+  (Markus Buerg 2010/08/13)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The function FEValuesViews::Vector::get_function_curls produced
+  wrong results in some cases, because it erased the given vector first.
+  This is now fixed.
+  <br>
+  (Markus Buerg 2010/08/13)
+  </p>
+
+  <li>
+  <p>
+  New: Ability to project second-order SymmetricTensor and first-order Tensor objects from the quadrature points to the support points of the cell using  FETools::compute_projection_from_quadrature_points
+  <br>
+  (Andrew McBride 2010/07/29)
+  </p>
+
+  <li>
+  <p>
+  Fixed: In some rather pathological cases, the function
+  Triangulation::prepare_coarsening_and_refinement (which is called from
+  Triangulation::execute_coarsening_and_refinement) could take very long
+  if the flag Triangulation::eliminate_unrefined_islands was given in
+  the mesh smoothing flags upon construction of the triangulation. This is
+  now fixed.
+  <br>
+  (WB 2010/07/27)
+  </p>
+
+  <li><p> New: Brezzi-Douglas-Marini elements of arbitrary order in FE_BDM.
+  <br>
+  (GK 2010/07/19)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The FEValues::get_cell() function was unusable from user code
+  since its implementation used a class that was only forward declared
+  and not visible at the point of instantiations in user code. This is now
+  fixed.
+  <br>
+  (WB 2010/07/16)
+  </p>
+
+  <li>
+  <p>New: The multilevel matrices for continuous elements
+  can be built with the MeshWorker now.
+  <br>
+  (Bärbel Janssen 2010/07/01)
+  </p></li>
+
+  <li>
+  <p>
+  Fixed: On some systems and compilers, the library could not be compiled
+  because of a duplicate symbol in <code>MeshWorker::LocalResults</code>.
+  This is now fixed.
+  <br>
+  (WB 2010/06/28)
+  </p>
+
+  <li>
+  <p>
+  Fixed: The output of the function
+  FE_Q::adjust_quad_dof_index_for_face_orientation
+  was wrong in 3d for polynomial orders of three or greater. This is now
+  fixed.
+  <br>
+  (WB 2010/06/28)
+  </p>
+</ol>
+
+
+*/
diff --git a/doc/news/7.0.0-vs-7.1.0.h b/doc/news/7.0.0-vs-7.1.0.h
new file mode 100644
index 0000000..659c859
--- /dev/null
+++ b/doc/news/7.0.0-vs-7.1.0.h
@@ -0,0 +1,769 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_7_0_and_7_1 Changes between Version 7.0 and 7.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+<li> Changed: GridTools, DoFTools, MGTools, VectorTools, MatrixCreator
+and MatrixTools are now namespaces. They have long
+been classes that had only public, static member functions, making
+the end result semantically exactly equivalent to a namespace, which is
+also how it was used. This is now also reflected in the actual code.
+<br>
+(Wolfgang Bangerth, 2011/04/27, 2011/09/14)
+
+<li> Changed: The PETScWrappers::VectorBase and PETScWrappers::MatrixBase
+classes tried to keep track of
+whether the last operation done on a vector was to add to an element or to
+write into one. If the previous such operation was of a different kind
+than the current one, we would flush buffers (see the description in
+ at ref GlossCompress). However, trying to do this automatically turned
+out to be an endless source of hard-to-find bugs in %parallel programs.
+The scheme has therefore now been changed to the following: the classes
+keep track of the previous operation and if it differs from the
+current one, reports an error stating that the user needs to call
+PETScWrapper::VectorBase::compress() or
+PETScWrapper::MatrixBase::compress() instead.
+<br>
+(Wolfgang Bangerth, 2011/08/03, 2011/08/30)
+
+<li> Changed: The classes Tensor, SymmetricTensor and Point now have an
+additional template argument for the number type. While a default template
+value of <code>double</code> ensures that all old code is still valid, this
+change invalidates forward declarations of the form <code>template
+@<int dim@> class Point</code> that might be present in user-defined header
+files. Now forward declarations need to specify the type as well, i.e.,
+<code>template @<int dim, typename Number@> class Point</code>. However,
+nothing changes if the full declarations in <code>deal.II/base/tensor.h,
+deal.II/base/symmetric_tensor.h</code> and <code>deal.II/base/point.h</code>
+are included.
+<br>
+(Martin Kronbichler, 2011/08/02)
+
+<li> Removed: deal.II no longer supports Trilinos versions prior to 10.0.
+<br>
+(Wolfgang Bangerth, 2011/06/29)
+
+<li> Changed: deal.II has a namespace std_cxx1x that was used to
+import classes from BOOST that are part of the upcoming C++ 1x standard. On
+the other hand, if your compiler supported a sufficiently large subset
+of C++ 1x, we had code that simply did
+ at code
+  namespace std_cxx1x = std;
+ at endcode
+allowing you to refer to everything that was part of the compiler's namespace
+<code>std</code> under the alternative name. This turned out to be untenable
+in connection to the changed outlined below for _1, _2, etc. Consequently,
+if the compiler used supports C++ 1x, we now selectively import elements of the
+compiler's namespace std into namespace std_cxx1x as well. This may lead to
+incompatibilities if you are already using elements of the C++ 1x
+standard by referring to them through the std_cxx1x namespace and these elements
+are not on the list of selectively imported ones.
+<br>
+(Wolfgang Bangerth, 2011/05/29)
+
+<li> Changed: Previously, placeholder arguments like _1, _2, etc that are used
+in conjunction with the std_cxx1x::bind function could be referenced as if
+they are part of the global namespace. This was achieved by importing the
+corresponding elements of namespace std::placeholders into the global namespace
+if your compiler supported this part of the C++ 1x standard, or otherwise using
+the BOOST counterparts which are already in the global namespace. However,
+this leads to a conflict if one has a C++ 1x enabled compiler (e.g. GCC 4.6)
+<i>and</i> includes certain BOOST headers, since the importation of symbols
+into the global namespace now leads to ambiguous names. The only solution to
+the problem is to not import names into the global namespace, but rather
+import the names from either BOOST or namespace std into the deal.II namespace
+std_cxx1x. The downside is that all code that uses _1, _2, etc needs to be
+changed to use std_cxx1x::_1, std_cxx1x::_2, etc from now on.
+<br>
+(Wolfgang Bangerth, 2011/05/29)
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+<li> New: Long in the making, the new tutorial program step-32 is a massively
+%parallel simulator for the Boussinesq equations that describe thermal convection.
+<br>
+(Martin Kronbichler, Timo Heister, Wolfgang Bangerth, 2011/10/06)
+
+<li> New: There is now a namespace Utilities::MPI that holds some of the MPI-related
+functions that were previously part of Utilities::System. Specifically, the following
+functions were moved and in part renamed: Utilities::System::get_n_mpi_processes
+is now Utilities::MPI::n_mpi_processes; Utilities::System::get_this_mpi_process
+is now Utilities::MPI::this_mpi_process;
+Utilities::System::compute_point_to_point_communication_pattern
+is now Utilities::MPI::compute_point_to_point_communication_pattern;
+Utilities::System::duplicate_communicator
+is now Utilities::MPI::duplicate_communicator;
+Utilities::System::calculate_collective_mpi_min_max_avg
+is now Utilities::MPI::min_max_avg;
+Utilities::System::MPI_InitFinalize
+is now Utilities::MPI::MPI_InitFinalize.
+In addition, some of the arguments of these functions or classes
+have changed.
+<br>
+The previous functions should still be available, though their use
+is now deprecated.
+<br>
+(Wolfgang Bangerth, 2011/09/26)
+
+<li> Removed: Utilities::System::program_uses_mpi does exactly the same thing
+as Utilities::System::job_supports_mpi. The former has therefore been
+deprecated.
+<br>
+(Wolfgang Bangerth, 2011/09/26)
+
+<li> New: When using a new enough version of GCC, debug sections in
+object files are now compressed using the <code>-Wa,--compress-debug-sections</code>
+flag, resulting in savings in disk space on the order of 230 MB.
+<br>
+(Wolfgang Bangerth, 2011/09/22)
+
+<li> New: deal.II can now be configured and built with the
+<a href="http://clang.llvm.org">Clang C++ frontend of the LLVM compiler</a>.
+<br>
+(Timo Heister, Wolfgang Bangerth, 2011/08/20)
+
+<li> Changed: Several of the tutorial programs used the same class
+names, often <code>LaplaceProblem</code> in the earlier programs.
+There is nothing inherently wrong with this, since these are
+entirely separate programs, but this sometimes confused
+integrated development environments because searching for
+individual symbols would turn up several occurrences in different
+files. To make this a bit simpler, the main classes for step-3
+through step-6 were renamed to <code>StepX</code> (steps 1 and 2
+do not have a main class). Starting with step-7, everything specific
+to a tutorial program has been moved into a namespace called
+<code>StepX</code> to make the fully qualified names unique across
+different tutorial programs.
+<br>
+(Wolfgang Bangerth, Guido Kanschat 2011/08/16)
+
+<li> Extended: Many operations on objects of
+type Point@<0@>, Quadrature@<0@>, etc
+(including creation) were previously forbidden since such objects do not make
+much sense. However, this prevented a lot of code that could otherwise work
+in a dimension independent way, from working in 1d, e.g. integration on
+faces. Many of these places have now been cleaned up and work.
+<br>
+(Wolfgang Bangerth, 2011/08/12)
+
+<li> Extended: The classes Tensor, SymmetricTensor and Point now have an
+additional template argument for the number type. It is now possible to base
+these classes on any abstract data type that implements basic arithmetic
+operations, like <code>Tensor<1,dim,std::complex<double> ></code>. deal.II
+uses a default template argument <code>double</code> that ensures that all
+code using e.g. <code>Tensor<1,dim></code> remains valid.
+<br>
+(Martin Kronbichler, 2011/08/02)
+
+<li> Fixed: deal.II can link with Trilinos but previously it required a
+very specific set of Trilinos sub-libraries; if Trilinos had been compiled
+with a larger set of sub-libraries, linking would sometimes fail. This
+has now been made more generic and deal.II obtains the proper set of
+libraries from Trilinos.
+<br>
+(Wolfgang Bangerth, 2011/06/29)
+
+<li> Fixed: On Mac OS X, linking with some external libraries such as Trilinos
+sometimes failed due to a misconfiguration of linker flags. This should now be
+fixed.
+<br>
+(Praveen C, Martin Kronbichler, Wolfgang Bangerth, 2011/06/23)
+
+<li> Changed: Doing <code>make clean</code> was supposed to only remove object
+files but not libraries; however, it also removed the TBB libraries and a
+few executables. This has now been changed: <code>make clean</code> now only
+removes stuff that isn't needed to run executables, i.e. it leaves the TBB
+and other libraries alone. As before, the target <code>make distclean</code>
+is responsible for removing everything.
+<br>
+(Max Jensen, Wolfgang Bangerth, 2011/06/14)
+
+<li> New: The Triangulation and DoFHandler classes, together with many
+smaller classes can now be serialized, i.e. their data can be written
+to an output stream and later retrieved to restore the state of the program.
+<br>
+(Wolfgang Bangerth, 2011/06/13)
+
+<li> New/deprecated: The Triangulation class offers ways to get informed
+whenever the triangulation changes. Previously, the mechanism doing this
+was through the Triangulation::RefinementListener class. This has been
+deprecated and has been superceded by a BOOST signals based mechanism
+that is generally more powerful and does not rely on overloading
+particular virtual functions inherited from a base class.
+
+While the old mechanism should continue to work, you should consider
+upgrading. For more information on the signals mechanism, see the
+documentation of the Triangulation class.
+
+In addition to the change above, the new implementation now offers two
+more signals one can subscribe to: Triangulation::Signals::clead for
+when the triangulation is cleared, and Triangulation::Signals::any_change
+that can be used for any operation that changes the mesh. Furthermore,
+in a change from previous behavior, the Triangulations::Signal::create
+signal is now also triggered when another triangulation is copied to
+the one that owns the signal.
+<br>
+(Wolfgang Bangerth, 2011/06/01)
+
+<li> Removed: The <code>./configure</code> script allowed configuring
+for the GNU Scientific Library (GSL) in version 7.0 but didn't actually
+use any of the GSL functions. The corresponding code has therefore been
+removed again.
+<br>
+(Wolfgang Bangerth, 2011/05/22)
+
+<li> Changed: Traditionally, include directories were set through the
+<code>-I</code> flag in make files in such a way that one would do
+ at code
+  #include <base/quadrature.h>
+ at endcode
+In preparation for future changes that will make possible installing
+header files in a directory under <code>/usr/include</code> it seemed
+useful to install everything under <code>/usr/include/deal.II</code>
+and include them as
+ at code
+  #include <deal.II/base/quadrature.h>
+ at endcode
+This change has been made throughout the library and tutorial programs.
+However, the old way of using include directories will continue to work
+for at least one release for backward compatibility.
+<br>
+(Wolfgang Bangerth, 2011/05/16)
+
+<li> Changed: The version of BOOST we ship with deal.II has been upgraded
+to 1.46.1. BOOST now also resides in the directory <code>contrib/boost-1.46.1</code>
+instead of an unversioned directory.
+<br>
+(Wolfgang Bangerth, 2011/05/16)
+
+<li> New: The SparseDirectUMFPACK class can now also deal with matrices
+provided in SparseMatrixEZ format.
+<br>
+(Martin Genet, 2011/05/04)
+
+<li> New: The new tutorial program step-46 shows how to couple different
+models defined on subsets of
+the domain, in this case Stokes flow around an elastic solid. The
+trick here is that variables (here the flow velocity and pressure,
+and the solid displacement) do not live on the entire domain, but
+only on a part. The point of the program is how to represent this in
+source code.
+<br>
+(Wolfgang Bangerth, 2011/04/30)
+
+<li> Fixed: On Debian, the Trilinos packages use a different layout
+of include files and library names. The <code>./configure</code>
+script can now deal with this.
+<br>
+(Walter Landry, 2011/02/22)
+
+<li> Improved: Linking the deal.II libraries on file systems that
+are mounted remotely from a file server took painfully long. This
+is now fixed by linking everything on the local file system
+and only subsequently moving the file into its final location.
+<br>
+(Wolfgang Bangerth, 2011/01/28)
+
+<li> Changed: Most classes in deal.II have a member function
+<code>memory_consumption</code> that used to return an unsigned int.
+However, on most 64-bit systems, unsigned int is still only 32-bit
+wide, and consequently the return type does not provide enough
+precision to return the size of very large objects. The return types
+of all of these functions has been changed to std::size_t, which is
+defined to be a type that can hold the sizes of all objects possible
+on any system.
+<br>
+(Wolfgang Bangerth, 2011/01/22)
+
+<li> Fixed: When using the <code>--enable-mpi</code> to
+<code>./configure</code>, the script only tried <code>mpiCC</code>
+as the MPI C++ compiler. However, on some systems, it is called
+<code>mpicxx</code>. The script now tries that as well.
+<br>
+(Wolfgang Bangerth, 2011/01/22)
+
+<li> Fixed: When using Trilinos and using the Intel C++ compiler,
+we accidentally used invalid compiler flags that led to a warning
+every time we compiled a file.
+<br>
+(Wolfgang Bangerth, 2011/01/22)
+
+<li> Fixed: At the bottom of the page of tutorial programs we show a "plain"
+version of the tutorial program. However, the script that generates this plain
+version was broken and sometimes truncated the file. This
+should be fixed now.
+<br>
+(Wolfgang Bangerth, 2011/01/18)
+
+<li> Extended: Several missing instantiations of functions for triangulations
+and DoF handlers embedded in higher dimensional space have been added.
+<br>
+(Wolfgang Bangerth, 2011/01/15)
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+<li> New: There is a new predicate IteratorFilters::LocallyOwnedCell
+for filtered iterators.
+<br>
+(Wolfgang Bangerth, 2011/10/03)
+
+<li> Improved: The class BlockList used in RelaxationBlock has been replaced
+by SparsityPattern, since it only reproduced its functionality.
+<br>
+(Guido Kanschat, 2011/09/26)
+
+<li> New: SparsityPattern::row_position() finds a column index in a row and returns
+its "local" index or numbers::invalid_unsigned_int.
+<br>
+(Guido Kanschat, 2011/09/26)
+
+<li> New: The functions Utilities::MPI::sum and Utilities::MPI::max
+function can be used to compute the sum or maximum of values over a number of
+MPI processes without having to deal with the underlying MPI functions.
+<br>
+(Wolfgang Bangerth, 2011/09/25)
+
+<li> New: The CellAccessor::is_locally_owned function is a shortcut
+for the <code>!cell-@>is_ghost() && !cell-@>is_artificial()</code> pattern
+found in many places when dealing with distributed meshes.
+<br>
+(Wolfgang Bangerth, 2011/09/10)
+
+<li> New: The GridGenerator::torus function and TorusBoundary class can
+create and describe the surface of a torus.
+<br>
+(Daniel Castanon Quiroz, 2011/09/08)
+
+<li> Fixed: FEFieldFunction class was not thread-safe because it keeps a
+cache on the side that was invalidated when different
+threads kept pouncing on it. This is now fixed.
+<br>
+(Patrick Sodré, 2011/09/07)
+
+<li> New: There is now a function GridTools::volume() computing the volume
+of a triangulation.
+<br>
+(Wolfgang Bangerth, 2011/09/02)
+
+<li> New: Code like <code>cell-@>face(1)-@>set_boundary_indicator(42);</code>
+now also works in 1d.
+<br>
+(Wolfgang Bangerth, 2011/08/30)
+
+<li> Fixed: The TimerOutput::print_summary() function changed the
+precision of output on the stream it prints to, but didn't restore
+the previous value. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/08/30)
+
+<li> New: There are now two new functions Utilities::string_to_double.
+<br>
+(Wolfgang Bangerth, 2011/08/25)
+
+<li> Changed: The function VectorTools::compute_no_normal_flux_constraints
+used to compute its constraints by evaluating the normal vector to the
+surface as described by the mapping, rather than using the normal to
+the surface described by the Boundary object associated with this face.
+(Note that the Mapping computes its approximation by polynomial interpolation
+of the surface described by the Boundary object.) This has now been changed:
+the normal vector is now obtained from the Boundary object directly, at
+points computed by the Mapping.
+<br>
+(Wolfgang Bangerth, 2011/08/25)
+
+<li> New: The Boundary base class now has a function Boundary::normal_vector
+that returns the normal vector to the surface at a given location. Derived
+classes need to implement it, of course, if they want it to be used.
+<br>
+(Wolfgang Bangerth, 2011/08/25)
+
+<li> Fixed: The function VectorTools::compute_no_normal_flux_constraints had
+a problem that led to extremely difficult to pin down bugs when running
+with sufficiently many processors. Basically, the constraints computed
+by different processors did not agree which should be the independent
+degrees of freedom and which should be the constrained ones. The result
+were constraints that did not lead to a consistent linear system.
+<br>
+(Martin Kronbichler, 2011/08/24)
+
+<li> New: Added GridRefinement::hierarchical() to reorder the degrees of freedom
+by going through the cells in hierarchical order. This ensures consistent
+DoF numbering in parallel computations.
+<br>
+(Timo Heister, 2011/08/24)
+
+<li> Changed: Triangulation<dim>::get_boundary_indicators() returned
+wrong data for dim=1.
+<br>
+(Sebastian Pauletti 2011/08/17)
+
+<li> Improved: The function LogStream::timestamp() outputs all results of the Posix
+function times, namely wall time, user time and system time.
+<br>
+(Guido Kanschat, 2011/08/18)
+
+<li> Extended: GridGenerator::half_hyper_shell() got the option
+<code>colorize</code>, which assigns different boundary indicators to the
+different parts of the boundary. Added GridGenerator::quarter_hyper_shell()
+with the same options.
+<br>
+(Timo Heister, 2011/08/15)
+
+<li> Fixed: The functions VectorTools::create_boundary_right_hand_side()
+called with an empty set of boundary_indicators (the default), did not apply
+any boundary conditions. The empty set now applies it to all boundaries.
+<br>
+(Timo Heister, Sebastian Pauletti, 2011/08/15)
+
+<li> Fixed: The function VectorTools::compute_no_normal_flux_constraints had
+a bug that led to an exception whenever we were computing constraints for
+vector fields located on edges shared between two faces of a 3d cell if those
+faces were not perpendicular. This is now fixed.
+<br>
+(Wolfgang Bangerth, Thomas Geenen, Timo Heister, 2011/08/10)
+
+<li> New: The function FullMatrix::triple_product() adds triple products
+like Schur complements to existing matrices.
+<br>
+(Guido Kanschat, 2011/08/05)
+
+<li> Improved: The PETScWrapper::VectorBase class was rather generous in
+calling the PETSc <code>VecAssembleBegin/End</code> functions that incur
+communication in the %parallel case and are therefore causes of potential
+slowdowns. This has been improved.
+<br>
+(Wolfgang Bangerth, 2011/08/03)
+
+<li> Fixed: The function VectorTools::create_right_hand_side now also works
+for objects of type hp::DoFHandler with different finite elements.
+<br>
+(Daniel Gerecht, 2011/07/20)
+
+<li> Improved: Evaluation of Lagrangian basis functions has been made stable
+by exchanging polynomial evaluation from the standard form
+$a_n x^n+\ldots+a_1 x + a_0$ to a product of linear factors,
+$c (x - x_0) (x-x_1)\ldots (x-x_n)$. This ensures accurate evaluation up to
+very high order and avoids inaccuracies when using high order finite elements.
+<br>
+(Martin Kronbichler 2011/07/26)
+
+<li> Improved: The internal functions in the constructor of the FE_Q element
+have been improved for high order elements. Especially when the element is
+constructed for a 1D quadrature formula, the initialization is now much faster.
+E.g. the initialization up to order 12 in three dimension completes in less
+than a second, whereas it took hundreds of seconds before.
+<br>
+(Martin Kronbichler 2011/07/26)
+
+<li> New: There is now a class Threads::ThreadLocalStorage that allows threads
+to have their own copy of an object without having to fear interference from
+other threads in accessing this object.
+<br>
+(Wolfgang Bangerth 2011/07/07)
+
+<li> Fixed: The 2d grid reordering algorithm that is used by all grid readers had
+a component that was quadratic in its complexity, sometimes leading to cases
+where reading in a mesh in debug mode could take minutes for just a few tens
+of thousands of cells. This has now been fixed.
+<br>
+(Wolfgang Bangerth 2011/07/07)
+
+<li> New: The function DoFTools::count_dofs_per_component now also works
+for objects of type hp::DoFHandler.
+<br>
+(Christian Goll, Wolfgang Bangerth 2011/07/06)
+
+<li> Fixed: Under some circumstances, Threads::Thread::join() could only be
+called once and would generate a system exception when called a second time.
+Since it is often useful to not track whether this function had already been
+called, this is now worked around in such a way that one can always call
+the function multiple times.
+<br>
+(Wolfgang Bangerth 2011/07/03)
+
+<li> New: The Threads::Thread::join() function can now also be called even
+if no thread has been assigned to this thread object. The function then simply
+does nothing.
+<br>
+(Wolfgang Bangerth 2011/07/03)
+
+<li> New: There is now a new function Threads::Thread::valid that can be used
+to query whether the thread object has been assigned a thread.
+<br>
+(Wolfgang Bangerth 2011/07/01)
+
+<li> New: The new function GridGenerator::merge_triangulations can be used to compose
+coarse meshes from simpler ones by merging their cells into a single
+triangulation object.
+<br>
+(Wolfgang Bangerth 2011/06/17)
+
+<li> Fixed: If an FEValues object was kept around until after the triangulation
+on which it works has been refined or coarsened, and is then reinitialized
+with a cell from the refined triangulation, it could compute wrong results or
+crash outright. This has now been fixed.
+<br>
+(Wolfgang Bangerth 2011/06/02)
+
+<li> Changed: The TrilinosWrappers::SparsityPattern::trilinos_sparsity_pattern()
+function returned a reference to an object of kind Epetra_CrsMatrix. However, the
+actual object pointed to is of derived class Epetra_FECrsMatrix. The function
+has now been changed to return a reference to the latter type. Since derived
+references can be assigned to references to base, this change should not
+result in any incompatibilities.
+<br>
+(Wolfgang Bangerth 2011/05/27)
+
+<li> New: The class RelaxationBlockJacobi has been added to the relaxation classes.
+<br> (Guido Kanschat, 2011/05/19)
+
+<li> New: discontinuous Galerkin versions of vector-valued elements have been
+implemented: FE_DGBDM, FE_DGNedelec, and FE_DGRaviartThomas.
+<br> (Guido Kanschat, 2011/05/19)
+
+<li> New: Mapping<dim,spacedim>::transform_real_to_unit_cell  now
+works also in the codimension one case, where it performs the normal
+projection of the point on the codimension one surface.
+<br> (Luca Heltai, 2011/05/17)
+
+<li> New: The PersistentTriangulation class now works also in
+the codimension one case.
+<br>
+(Luca Heltai, 2011/05/16)
+
+<li> Fixed: The TrilinosWrappers::SparseMatrix::print() function
+didn't get column indices right. This is now fixed.
+<br>
+(Habib Talavatifard, Wolfgang Bangerth 2011/05/10)
+
+<li> Fixed: The TrilinosWrappers::SparseMatrix::operator() and
+TrilinosWrappers::SparseMatrix::el() functions sometimes produced
+wrong results for rectangular matrices. The same is true for
+TrilinosWrappers::SparsityPattern::exists(). This is now fixed.
+<br>
+(Habib Talavatifard, Wolfgang Bangerth 2011/05/09, 2011/05/27)
+
+<li> New: The version of DoFTools::make_flux_sparsity_pattern that takes
+the coupling masks is now also available for hp::DoFHandler objects.
+<br>
+(Wolfgang Bangerth, 2011/04/27)
+
+<li> Fixed: If Triangulation::create_triangulation is called after an
+hp::DoFHandler object is attached to the triangulation object, setting active
+FE indices leads to a crash. The problem did not happen if the mesh was
+refined before setting the FE indices. This is now fixed. In the process, the
+Triangulation::RefinementListener::create_notification function was
+introduced.
+<br>
+(Wolfgang Bangerth, 2011/04/22)
+
+<li> Fixed: The function FEValuesViews::SymmetricTensor::divergence had a bug.
+This is now fixed.
+<br>
+(Wolfgang Bangerth, Feifei Cheng, Venkat Vallala 2011/04/21)
+
+<li> Fixed: Under some conditions, FEFaceValues applied to an FESystem element
+that contained elements of type FE_Nothing would receive an erroneous
+exception. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/04/17)
+
+<li> New: There is now an operator* for the multiplication of a <code>SymmetricTensor@<2,dim@></code>
+and a <code>Tensor@<1,dim@></code>.
+<br>
+(Wolfgang Bangerth, 2011/04/12)
+
+<li> Fixed: Added some instantiations to make KellyErrorEstimator and SolutionTransfer
+work in  codimension one. Fixed some dim in spacedim.
+<br>
+(Luca Heltai, 2011/04/11)
+
+<li> Fixed: Added some instantiations to make anisotropic refinement work
+in codimension one.
+<br>
+(Luca Heltai, 2011/03/31)
+
+<li> Fixed: Corrections in the creation of the face and subface
+interpolation matrices in the class FE_Nedelec.
+<br>
+(Markus Bürg, 2011/03/17)
+
+<li> Fixed: In step-21, the inner iteration would sometimes not converge for
+very coarse meshes because of numerical roundoff. This is now fixed by allowing
+more than <code>rhs.size()</code> CG iterations if the number of degrees of freedom
+is very small.
+<br>
+(Jichao Yin, Wolfgang Bangerth, 2011/04/06)
+
+<li> New: There is now a new function ConditionalOStream::get_stream().
+<br>
+(Wolfgang Bangerth, 2011/03/09)
+
+<li> Fixed: FESystem::get_unit_face_support_points would refuse to return
+anything if one of the base elements did not have support points. This
+condition has been relaxed: it now only doesn't return anything if this
+base element has no support points and also has degrees of freedom on
+the face.
+<br>
+(Wolfgang Bangerth, 2011/03/07)
+
+<li> Fixed: Objects of type FE_Nothing could be generated with multiple vector components
+by passing an argument to the constructor. Yet, the FE_Nothing::get_name() function
+always just returned the string <code>FE_Nothing@<dim@>()</code> independently of the
+number of components. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/03/07)
+
+<li> Fixed: PETScWrappers:MPI:SparseMatrix and apply_boundary_values() produced an error in debug mode about non-existant SparsityPattern entries. Reason: clear_rows() also eliminated the whole row in the PETSc-internal SparsityPattern, which resulted in an error in the next assembly process.
+<br>
+(Timo Heister, 2011/02/23)
+
+<li> Fixed: It wasn't possible to use the FE_Nothing element inside an FESystem
+object and hand the result over to an FEValues object. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/02/18)
+
+<li> New: There is now a function DataOutBase::write_visit_record that does
+the equivalent for VisIt that DataOutBase::write_pvtu_record does for ParaView:
+generate a file that contains a list of all other VTK or VTU files of which the
+current parallel simulation consists.
+<br>
+(Wolfgang Bangerth, 2011/02/16)
+
+<li> New: There is now a function TrilinosWrappers::VectorBase::minimal_value.
+<br>
+(Wolfgang Bangerth, 2011/02/16)
+
+<li> Fixed: TableBase::operator= could not be compiled if the type of the
+elements of the table was <code>bool</code>. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/02/16)
+
+<li> Improved: The GridGenerator::hyper_shell function generated meshes in 3d
+that are valid but of poor quality upon refinement. There is now an additional
+option to generate a coarse mesh of 96 cells that has a much better quality.
+<br>
+(Wolfgang Bangerth, 2011/02/12)
+
+<li> Fixed: There are systems where the <code>libz</code> library is installed
+but the <code>zlib.h</code> header file is not available. Since the latter
+condition was not tested, this would result in compiler errors. This is now
+fixed.
+<br>
+(Wolfgang Bangerth, 2011/02/09)
+
+<li> Fixed: Prolongation and restriction matrices were not computed at all
+for elements of type FE_DGQ if <code>dim@<spacedim</code>. Consequently,
+consumers of this information, such as the SolutionTransfer class or
+the DoFCellAccess::set_dof_values_by_interpolation function did not
+work either and simply returned zero results. This is now fixed.
+<br>
+(M. Sebastian Pauletti, Wolfgang Bangerth, 2011/02/09)
+
+<li> Fixed: When refining a mesh with codimension one, edges were refined using
+the same manifold description as adjacent cells, but this ignored that a
+boundary indicator might have been purposefully set for edges that are truly at
+the boundary of the mesh. For such edges, the boundary indicator is now honored.
+<br>
+(M. Sebastian Pauletti, Wolfgang Bangerth, 2011/02/09)
+
+<li> Fixed: The functions VectorTools::compute_mean_value and
+VectorTools::integrate_difference now also work for distributed
+triangulations of type parallel::distributed::Triangulation.
+<br>
+(Wolfgang Bangerth, 2011/02/07)
+
+<li> Changed: If the <code>libz</code> library was detected during library
+configuration, the function DataOutBase::write_vtu now writes data in compressed
+format, saving a good fraction of disk space (80-90% for big output files).
+<br>
+(Wolfgang Bangerth, 2011/01/28)
+
+<li> New: Trilinos and PETSc vectors now have a function has_ghost_elements().
+<br>
+(Timo Heister, 2011/01/26)
+
+<li> Changed: The TrilinosWrappers::MPI::BlockVector::compress function now takes an
+argument (with a default value) in exactly the same way as the
+TrilinosWrappers::MPI::Vector::compress function already did.
+<br>
+(Wolfgang Bangerth, 2011/01/21)
+
+<li> Fixed: When calling DataOut::build_patches with a mapping, requesting more
+than one subdivision, and when <code>dim@<spacedim</code>, then some cells
+were not properly mapped. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/01/18)
+
+<li> New: Restructured the internals of <code>PETScWrappers::Precondition*</code>
+to allow a PETSc PC object to exist without a solver. New: Use
+<code>Precondition*::%vmult()</code> to apply the preconditioner once.
+Preconditioners now have a default constructor and an <code>initialize()</code>
+function and are no longer initialized in the solver call,
+but in the constructor or <code>initialize()</code>.
+<br>
+(Timo Heister, 2011/01/17)
+
+<li> Fixed: Boundary conditions in the step-23 tutorial program are now
+applied correctly. Matrix columns get eliminated with the used method
+and introduce some contribution to the right hand side coming from
+inhomogeneous boundary values. The old implementation did not reset the
+matrix columns before applying new boundary values.
+<br>
+(Martin Stoll, Martin Kronbichler, 2011/01/14)
+
+<li> Extended: <code>base/tensor.h</code> has an additional collection of
+contractions between three tensors (<i>ie</i>. <code>contract3</code>).
+This can be useful for writing matrix/vector assembly in a more compact
+form than before.
+<br>
+(Toby D. Young, 2011/01/12)
+
+</ol>
+
+
+*/
diff --git a/doc/news/7.1.0-vs-7.2.0.h b/doc/news/7.1.0-vs-7.2.0.h
new file mode 100644
index 0000000..6495f02
--- /dev/null
+++ b/doc/news/7.1.0-vs-7.2.0.h
@@ -0,0 +1,764 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_7_1_and_7_2 Changes between Version 7.1 and 7.2
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+<li> Changed/fixed: Several operations on Trilinos vectors that change the
+elements of these vectors were allowed accidentally for vectors that have
+ghost elements. This is a source of errors because a change on one
+MPI process will not show up on a different processor. Consequently, we
+intended to disallow all functions that modify vectors with ghost elements
+but this was not enforced for all of these functions. This is now fixed
+but it may lead to errors if your code relied on the existing behavior. The
+way to work around this is to only ever modify fully distributed vectors,
+and then copy it into a vector with ghost elements.
+<br>
+(Wolfgang Bangerth, 2012/08/06)
+
+<li> Changed: The argument material_id of the estimate-functions of
+the KellyErrorEstimator class is now of type types::material_id
+with default value numbers::invalid_material_id, instead of type
+unsigned int with default value numbers::invalid_unsigned_int. This
+should not make a difference to most users unless you specified the
+argument's default value by hand.
+<br>
+(Wolfgang Bangerth, Christian Goll 2012/02/27)
+
+<li>
+The member functions Triangulation::set_boundary and
+Triangulation::get_boundary now take a types::boundary_id instead of
+an unsigned int as argument. This now matches the actual data type
+used to store boundary indicators internally.
+<br>
+(Wolfgang Bangerth, Christian Goll 2012/02/27)
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+
+<ol>
+<li>
+New: We now support parallel output using HDF5/xdmf.
+<br>
+(Eric Heien, Timo Heister, 2012/08/28)
+
+<li>
+New: We are now compatible with Trilinos 10.4.2, 10.8.5, and 10.12.2. See the
+readme for more information.
+<br>
+(Timo Heister, 2012/08/24)
+
+<li>
+Changed: To make the naming of types defined in namespace types
+consistent, we have renamed types::subdomain_id_t to
+types::subdomain_id. The old name has been retained as a typedef
+but is now deprecated.
+<br>
+(Wolfgang Bangerth, 2012/08/22)
+
+<li>
+Changed: Unify the concept of compress() for all linear algebra
+objects. Introduce type VectorOperation to decide between
+add and insert. Implement also for serial vectors. Note:
+this breaks distributed::vector::compress(bool). See
+ at ref GlossCompress for more information.
+<br>
+(Timo Heister, 2012/08/13)
+
+<li>
+Changed: Support for the METIS 4.x has been replaced with support for
+METIS 5.x. Use <code>--with-metis=path/to/metis</code> to configure
+with METIS 5.x.
+<br>
+(Stefano Zampini, Toby D. Young 2012/08/13)
+
+<li>
+Changed: numerics/vectors.h is now called numerics/vector_tools.h and
+numerics/matrices.h is now called numerics/matrix_tools.h The old files are
+deprecated.
+<br>
+(Timo Heister 2012/08/09)
+
+<li>
+New: officially added support for clang 3.1 or newer.
+<br>
+(Timo Heister and Wolfgang Bangerth, 2012/08/07)
+
+<li>
+Changed: PETSc linking now prefers to use the libpetsc.so generated
+by PETSc starting from version 3.1+. This fixes the problem
+of linker errors on recent gcc/ubuntu versions.
+<br>
+(Timo Heister, 2012/08/07)
+
+<li>
+Fixed: On some 64-bit systems, we build deal.II with the <code>-m64</code>
+flag but forgot to build UMFPACK with this flag as well, leading to
+linker errors. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2012/07/31)
+
+Fixed: The Intel compiler, when using MPI, wants that <code>mpi.h</code>
+is included before header files like <code>stdio.h</code>. This can't
+be ensured in general because the inclusion might be indirectly, but
+we now work around the problem in other ways.
+<br>
+(Timo Heister, Wolfgang Bangerth, Michael Thomadakis, 2012/07/26)
+
+<li>
+Fixed: On some systems, the p4est library we use for distributed
+parallel computations installs its libraries into a <code>lib64/</code>
+directory instead of the usual <code>lib/</code>. deal.II can now deal
+with this.
+<br>
+(Wolfgang Bangerth, 2012/07/25)
+
+<li>
+New: step-43 is an extension of step-21 that shows efficient methods
+to solve multi-phase flow.
+<br>
+(Chih-Che Chueh, Wolfgang Bangerth, 2012/06/06)
+
+<li>
+New: step-15 has been replaced by a program that demonstrates the
+solution of nonlinear problem (the minimal surface equation) using
+Newton's method.
+<br>
+(Sven Wetterauer, 2012/06/03)
+
+<li>
+New: step-48 demonstrates the solution of a nonlinear wave equation
+with an explicit time stepping method. The usage of Gauss-Lobatto
+elements gives diagonal mass matrices, which obviates the solution of
+linear systems of equations. The nonlinear right hand side is
+evaluated with the matrix-free framework.
+<br>
+(Katharina Kormann and Martin Kronbichler, 2012/05/05)
+
+<li>
+New: step-37 shows how the matrix-free framework can be utilized to
+efficiently solve the Poisson equation without building global
+matrices. It combines fast operator evaluation with a multigrid solver
+based on polynomial Chebyshev smoother.
+<br>
+(Katharina Kormann and Martin Kronbichler, 2012/05/05)
+
+<li>
+New: A new matrix-free interface has been implemented. The framework
+is parallelized with MPI, TBB, and explicit vectorization instructions
+(new data type VectorizedArray). The class MatrixFree caches
+cell-based data in an efficient way. Common operations can be
+implemented using the FEEvaluation class.
+<br>
+(Katharina Kormann and Martin Kronbichler, 2012/05/05)
+
+<li>
+New: step-44 demonstrates one approach to modeling large
+deformations of nearly-incompressible elastic solids. The
+elastic response is governed by a non-linear, hyperelastic
+free-energy function. The geometrical response is also
+nonlinear, i.e., the program considers finite deformations.
+<br>
+(Andrew McBride and Jean-Paul Pelteret, 2012/04/25)
+
+<li>
+New: step-41 demonstrates solving the obstacle problem,
+a variational inequality.
+<br>
+(Joerg Frohne, 2012/04/22)
+
+<li>
+Changed: The version of BOOST we ship with deal.II has been upgraded
+to 1.49.0.
+<br>
+(Martin Kronbichler, 2012/04/07)
+
+<li>
+New: We have added a brief section to the step-12 tutorial programs that
+compares the DG solution computed there with one that one would obtain by
+using a continuous finite element.
+<br>
+(Wolfgang Bangerth, 2012/03/25)
+
+<li>
+New: Added support for codimension 2, i.e. for dim-dimensional objects
+embedded into spacedim=dim+2 dimensional space.
+<br>
+(Sebastian Pauletti, 2012/03/02)
+
+<li> Changed: Material and Boundary indicators have been both of the
+type unsigned char. Throughout the library, we changed their datatype
+to <code>types::material_id</code>
+resp. <code>types::boundary_id</code>, both typedefs of unsigned
+char. Internal faces are now characterized by
+numbers::internal_face_boundary_id(<code>=static_cast@<types::boundary_id@>(-1)</code>)
+instead of 255, so we get rid of that mysterious number in the source
+code.  Material_ids are also assumed to lie in the range from 0 to
+numbers::invalid_material_id-1 (where <code>numbers::invalid_material_id =
+static_cast<types::material_id>(-1)</code>). With this change, it is now
+much easier to extend the range of boundary or material ids, if
+needed.
+<br>
+(Christian Goll 2012/02/27)
+
+<li> New: Functions like FEValues::get_function_values have long been
+able to extract values from pretty much any vector kind given to it (e.g.
+of kind Vector, BlockVector, PETScWrappers::Vector, etc). The list of
+allowed "vector" types now also includes IndexSet, which is interpreted
+as a vector of elements that are either zero or one, depending on whether
+an index is in the IndexSet or not.
+<br>
+As a consequence of this, the DataOut::add_data_vector functions now also
+work for such types of vectors, a use demonstrated in step-41.
+<br>
+(Wolfgang Bangerth, 2012/02/14)
+
+<li> New: It has long been a nuisance that the deal.II vector classes
+could only be accessed using <code>operator()</code> whereas the C++
+<code>std::vector</code> class required <code>operator[]</code>. This
+diminished the usefulness of template code. Historically, the reason
+was that the deal.II vector classes should use the same operator as
+the matrix classes, and C++ does not allow to use <code>operator[]</code>
+for matrices since this operator can only take a single argument.
+<br>
+In any case, all deal.II vector classes now support both kinds of
+access operators interchangeably.
+<br>
+(Wolfgang Bangerth, 2012/02/12)
+
+<li> Fixed: Linking shared libraries on PowerPC systems (e.g. on
+BlueGene systems) failed due to a miscommunication between compiler
+and linker. This is now worked around.
+<br>
+(Aron Ahmedia, Wolfgang Bangerth, 2012/02/06)
+
+<li> New: There is now a distributed deal.II vector class
+parallel::distributed::Vector that can be used with MPI. The
+vector is based on a contiguous locally owned range and allows easy
+access of ghost entries from other processors. The vector interface is
+very similar to the non-distributed class Vector<Number>.
+<br>
+(Katharina Kormann, Martin Kronbichler, 2012/01/25)
+
+<li> Fixed: The <code>common/scripts/make_dependencies</code> program
+now behaves like the C++ compiler when
+searching include paths for <code># include "..."</code> directives.
+<br>
+(Timo Heister, 2012/01/10)
+
+<li> Fixed: The Intel compiler complains that it can't copy Trilinos vector
+reference objects, preventing the compiling of step-32. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/11/09)
+
+<li> Fixed: Intel ICC 12.1 gets into trouble with BOOST because BOOST
+believes that the compiler supports C++0x but one then still has to
+specify the corresponding flag on the command line to avoid compiler
+errors. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/11/06)
+
+<li> Fixed: On some systems, <code>mpiCC</code> turns out to alias the
+C compiler, not the C++ compiler as expected. Consequently, try to use
+<code>mpic++</code> or <code>mpicxx</code> before <code>mpiCC</code> as
+these should really be unambiguous.
+<br>
+(Wolfgang Bangerth, 2011/11/05)
+
+<li> Fixed: Intel's ICC compiler identifies itself as <code>icpc version
+12.1.0 (gcc version 4.2.1 compatibility)</code> which we mistook as being
+GCC version 4.2. The same is true for the Intel C compiler. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/11/05)
+
+<li> Fixed: deal.II could not be compiled with gcc 4.6.1 when MPI is
+enabled due to a missing include file in file
+<code>source/base/utilities.cc</code>. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/10/25)
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+<li> Changed: To support Trilinos version 10.12.x we need to cache the
+result of has_ghost_elements() in parallel vectors at construction time
+of the vector. Starting from 10.12 Trilinos will communicate in this
+call, which therefore only works if called from all CPUs.
+<br>
+(Timo Heister, 2012/08/22)
+
+<li> New: The copy constructor of FullMatrix from IdentityMatrix
+used to be explicit, but that didn't appear to be necessary in hindsight.
+Consequently, it is now a regular copy constructor.
+<br>
+(Wolfgang Bangerth, 2012/08/14)
+
+<li> New: The Patterns::Map pattern allows to describe maps from keys
+to values in input files.
+<br>
+(Wolfgang Bangerth, 2012/08/01)
+
+<li> Fixed: DoFTools::make_zero_boundary_constraints now also works for parallel distributed triangulations.
+<br>
+(Wolfgang Bangerth, 2012/07/24)
+
+<li> Fixed: GridTools::find_active_cell_around_point() works now also if the cell in which the point we look for lies is not adjacent to the closest vertex of p. The tests bits/find_cell_8 and _9 illustrate this.
+<br>
+(Wolfgang Bangerth, Christian Goll 2012/07/20)
+
+<li> Fixed: Using the SolutionTransfer class with hp::DoFHandler
+and on meshes where some cells are associated with a FE_Nothing element
+could result in an error. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2012/06/29)
+
+<li> Fixed: The MappingQ1::transform_real_to_unit_cell function as
+well as the equivalent ones in derived classes sometimes get into
+trouble if they are asked to compute the preimage of this point
+in reference cell coordinates. This is because for points outside
+the reference cell, the mapping from unit to real cell is not
+necessarily invertible, and consequently the Newton iteration to
+find the preimage did not always converge, leading to an exception.
+While this is not entirely wrong (we could, after all, not compute
+the desired quantity), not all callers of this function were prepared
+to accept this result -- in particular, the function
+CellAccessor<3>::point_inside should have really just returned false
+in such cases but instead let the exception so generated propagate
+through. This should now be fixed.
+<br>
+(Wolfgang Bangerth, Eric Heien, Sebastian Pauletti, 2012/06/27)
+
+<li> Fixed: The function VectorTools::compute_no_normal_flux_constraints had
+a bug that led to an exception whenever we were computing constraints for
+vector fields located on edges shared between two faces of a 3d cell if those
+faces were not parallel to the axes of the coordinate system. This is now fixed.
+<br>
+(Wolfgang Bangerth, Jennifer Worthen, 2012/06/27)
+
+<li>
+Fixed: Due to an apparent bug in autoconf, it was not possible to
+override the <code>F77</code> environment variable to select anything
+else than gfortran. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2012/06/08)
+
+<li> Fixed: TrilinosWrappers::VectorBase::swap() is now working as expected. (thanks Uwe Köcher)
+<br>
+(Timo Heister 2012/07/03)
+
+<li> Fixed: Some instantiations for
+DerivativeApproximation::approximate_derivative_tensor() were missing.
+<br>
+(Timo Heister 2012/06/07)
+
+<li> New: The finite element type FE_DGQArbitraryNodes is now
+working also in codimension one spaces.
+<br>
+(Luca Heltai, Andrea Mola 2012/06/06)
+
+<li> Fixed: Computing the $W^{1,\infty}$ norm and seminorm in
+VectorTools::integrate_difference was not implemented. This is now
+fixed.
+<br>
+(Wolfgang Bangerth 2012/06/02)
+
+<li> Fixed: A problem in MappingQ::transform_real_to_unit_cell
+that sometimes led the algorithm in this function to abort.
+<br>
+(Wolfgang Bangerth 2012/05/30)
+
+<li> New: The function DataOutInterface::write_pvd_record can be used
+to provide Paraview with metadata that describes which time in a
+simulation a particular output file corresponds to.
+<br>
+(Marco Engelhard 2012/05/30)
+
+<li> Fixed: A bug in 3d with hanging nodes in GridTools::find_cells_adjacent_to_vertex()
+that caused find_active_cell_around_point() to fail in those cases.
+<br>
+(Timo Heister, Wolfgang Bangerth 2012/05/30)
+
+<li> New: DoFTools::make_periodicity_constraints implemented which
+inserts algebraic constraints due to periodic boundary conditions
+into a ConstraintMatrix.
+<br>
+(Matthias Maier, 2012/05/22)
+
+<li> New: The GridIn::read_unv function can now read meshes generated
+by the Salome framework, see http://www.salome-platform.org/ .
+<br>
+(Valentin Zingan, 2012/04/27)
+
+<li> New: There is now a second DoFTools::map_dofs_to_support_points
+function that also works for parallel::distributed::Triangulation
+triangulations.
+<br>
+(Wolfgang Bangerth, 2012/04/26)
+
+<li> New: There is now a second DoFTools::extract_boundary_dofs
+function that also works for parallel::distributed::Triangulation
+triangulations.
+<br>
+(Wolfgang Bangerth, 2012/04/26)
+
+<li> New: The FullMatrix::extract_submatrix_from, FullMatrix::scatter_matrix_to,
+FullMatrix::set functions are new.
+<br>
+(Andrew McBride, Jean Paul Pelteret, Wolfgang Bangerth, 2012/04/24)
+
+<li> Fixed:
+The method FEValues<dim>::inverse_jacobian() previously returned the transpose of the
+inverse Jacobians instead of just the inverse Jacobian as documented. This is now fixed.
+<br>
+(Sebastian Pauletti, Katharina Kormann, Martin Kronbichler, 2012/03/11)
+
+<li> Extended:
+SolutionTransfer is now also able to transfer solutions between hp::DoFHandler where
+the finite element index changes during refinement.
+<br>
+(Katharina Kormann, Martin Kronbichler, 2012/03/10)
+
+<li> Changed:
+A new method to determine an initial guess for the Newton method was coded
+in MappingQ::transform_real_to_unit_cell.
+The code in transform_real_to_unit_cell was cleaned a little bit and a new code
+for the @<2,3@> case was added.
+<br>
+(Sebastian Pauletti, 2012/03/02)
+
+<li> Changed:
+In the context of codim@>0, Mapping::transform would require different inputs
+depending on the mapping type.
+For mapping_covariant, mapping_contravariant the input is DerivativeForm<1, dim, spacedim>
+but for mapping_covariant_gradient,  mapping_contravariant_gradient the input is Tensor<2,dim>.
+<br>
+(Sebastian Pauletti,  2012/03/02)
+
+<li> New:
+A new class DerivativeForm was added.
+This class is supposed to represent the derivatives of a mapping.
+<br>
+(Sebastian Pauletti, 2012/03/02)
+
+<li> Fixed: TrilinosWrappers::Vector::all_zero() in parallel.
+<br>
+(Timo Heister, Jörg Frohne, 2012/03/06)
+
+<li> New: GridGenerator::quarter_hyper_shell() in 3d.
+<br>
+(Thomas Geenen, 2012/03/05)
+
+<li> New: DataOut::write_vtu_in_parallel(). This routine uses MPI I/O to write
+a big vtu file in parallel.
+<br>
+(Timo Heister, 2012/02/29)
+
+<li> Fixed: parallel::distributed::Triangulation::clear() forgot
+to update the number cache of this class, leading to wrong results
+when calling functions like
+parallel::distributed::Triangulation::n_global_active_cells();
+<br>
+(Wolfgang Bangerth, 2012/02/20)
+
+<li> Improved: FEFieldFunction allows now for the computation of Laplacians.
+<br>
+(Christian Goll, 2012/02/16)
+
+<li> New: The function IndexSet::fill_binary_vector creates a numerical
+representation of an IndexSet containing zeros and ones.
+<br>
+(Wolfgang Bangerth, 2012/02/12)
+
+<li> New: The function IndexSet::clear resets an index set to be empty.
+<br>
+(Wolfgang Bangerth, 2012/02/12)
+
+<li> New: There are now global functions l1_norm() and linfty_norm() that compute
+the respective norms of a rank-2 tensor.
+<br>
+(Wolfgang Bangerth, 2012/02/08)
+
+<li> New: VectorTools::interpolate_to_different_mesh implemented which interpolates between
+     DoFHandlers with different triangulations based on a common coarse grid.
+<br>
+(Matthias Maier, 2012/02/08)
+
+<li> Improved: DoFTools::map_dofs_to_support_points() now also works within the hp framework.
+<br>
+(Christian Goll, 2012/02/02)
+
+<li> Improved: There is now a constructor for FESystem that allows to
+create collections of finite elements of arbitrary length.
+<br>
+(Jason Sheldon, 2012/01/27)
+
+<li> Improved: VectorTools::point_value() now also works within the hp framework.
+<br>
+(Christian Goll, 2012/01/26)
+
+<li> Fixed: GridTools::find_active_cell_around_point() for the hp-case works now also with MappingCollections containing only one mapping, as is the standard case in other functions using hp.
+<br>
+(Christian Goll, 2012/01/26)
+
+<li> Fixed: parallel::distributed::refine_and_coarsen_fixed_fraction()
+contained a rounding bug that often produced wrong results.
+<br>
+(Timo Heister, 2012/01/24)
+
+<li> Improved: Utilities::break_text_into_lines now also splits the string at '\\n'.
+<br>
+(Timo Heister, 2012/01/17)
+
+<li> Fixed: When <code>./configure</code> does not detect the presence
+of <code>zlib</code>, writing output in VTU format failed to produce
+a valid output file.
+<br>
+(Timo Heister, 2012/01/03)
+
+<li> Improved: <code>PETScWrappers::SolverXXX</code> class was
+restricted to using default solver options for the KSP only. It is now
+possible to override those by using PETSc command-line options
+<code>-ksp_*</code>; giving greater flexibility in controlling PETSc
+solvers. (See the class's documentation).
+<br>
+(Vijay S. Mahadevan, 2011/12/22)
+
+<li> New: The GridIn class now also reads the GMSH format 2.2 as written by
+GMSH 2.5.
+<br>
+(Vijay S. Mahadevan, Wolfgang Bangerth, 2011/12/19)
+
+<li> Improved: The GridRefinement::refine_and_coarsen_optimize function
+assumed that the expected convergence order was 2. It has now gotten an
+argument by which the user can prescribe a different value. A bug has also
+been fixed in which the function incorrectly assumed in its algorithm that
+the mesh was two-dimensional.
+<br>
+(Christian Goll, 2011/12/16)
+
+<li> Fixed: Restriction and prolongation didn't work for elements of
+kind FE_Nothing. Consequently, many other parts of the library also
+didn't work, such as the SolutionTransfer class. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/12/15)
+
+<li> Fixed: The DerivativeApproximation class now works for distributed computations.
+<br>
+(Timo Heister, 2011/12/15)
+
+<li> Changed: The ExcMessage exception class took an argument of
+type <code>char*</code> that was displayed when the exception
+was raised. However, character pointers are awkward to work with
+because (i) they can not easily be composed to contain things like
+file names that are only known at run-time, and (ii) the string
+pointed to by the pointer had to live longer than the local expression
+in which the exception is generated when using the AssertThrow macro
+(because, when we create an exception, the exception object is passed
+up the call stack until we find a catch-clause; at that time, however,
+the scope in which the exception object was created has long been left).
+This restriction made it impossible to construct the message using std::string
+and then just do something like <code>(std::string("file: ")+filename).c_str()</code>.
+<br>
+To remedy this flaw, the type of the argument to ExcMessage has been
+changed to std::string since objects of this type are readily copyable
+and therefore live long enough.
+<br>
+(Wolfgang Bangerth, 2011/12/14)
+
+<li> New: Setting up a class derived from DataPostprocessor required some
+pretty mechanical steps in which one has to overload four member functions.
+For common cases where a postprocessor only computes a single scalar or
+a single vector, this is tedious and unnecessary. For these cases, the
+new classes DataPostprocessorScalar and DataPostprocessorVector provide
+short cuts that make life simpler.
+<br>
+(Wolfgang Bangerth, 2011/12/14)
+
+<li> Changed: The DataPostprocessor class previously required users of this
+class to overload DataPostprocessor::get_names(),
+DataPostprocessor::get_data_component_interpretation()
+and DataPostprocessor::n_output_variables(). The latter function is redundant
+since its output must equal the length of the arrays returned by the
+first two of these functions. It has therefore been removed.
+<br>
+(Wolfgang Bangerth, 2011/12/14)
+
+<li> Improved: Objects of the type LogStream::Prefix can now be used
+as a safe implementation of the push and pop mechanism for log
+prefices.
+<br>
+(Guido Kanschat, 2011/12/09)
+
+<li> New: IndexSet::add_indices(IndexSet).
+<br>
+(Timo Heister, 2011/12/09)
+
+<li> Fixed: Finite element Hessians get computed for codimension one,
+at least for FE_Poly derived classes.
+<br>
+(Guido Kanschat, 2011/12/07)
+
+<li> Changed: Output of ParameterHandler::print_parameters with argument
+ParameterHandler::LaTeX was not particularly readable. The output has
+therefore been rewritten to be more structured and readable.
+<br>
+(Wolfgang Bangerth, 2011/11/28)
+
+<li> Fixed: The TimerOutput class set the alignment of output to right-aligned
+under some circumstances, but didn't reset this to the previous value at the
+end of output. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/11/28)
+
+<li> New: The copy constructor of the SparseMatrixEZ function now works
+(rather than throwing an exception) if the copied matrix has size zero.
+This is in accordance to the other matrix classes.
+<br>
+(Wolfgang Bangerth, 2011/11/15)
+
+<li> New: The class ScalarFunctionFromFunctionObject provides a quick way to
+convert an arbitrary function into a function object that can be passed
+to part of the library that require the Function@<dim@> interface.
+The VectorFunctionFromScalarFunctionObject class does a similar thing.
+<br>
+(Wolfgang Bangerth, 2011/11/15)
+
+<li> New: The DoFTools::count_dofs_per_block function now also works
+for objects of type hp::DoFHandler.
+<br>
+(Jason Sheldon, 2011/11/13)
+
+<li> New: FETools::get_fe_from_name() can now return objects of type FE_Nothing.
+<br>
+(Scott Miller, Jonathan Pitt, 2011/11/10)
+
+<li> New: Implementation of an alternative handling of
+inhomogeneous constraints in ConstraintMatrix. This is controlled with
+a new parameter use_inhomogeneities_for_rhs in
+distribute_local_to_global() and determines whether the correct or
+zero values (this was the case before and still is the default) are
+kept in the linear system during the solution process.
+<br>
+(Jörg Frohne, 2011/11/01)
+
+<li> Fixed: SparseMatrix::mmult and SpareMatrix::Tmmult had a number of
+issues that are now fixed: (i) rebuilding the sparsity pattern was allowed
+even if several of the matrices involved in these operations shared a
+sparsity pattern; (ii) the functions had a vector argument that had a default
+value but the default value could not be used because it wasn't used in a
+template context deducible by the compiler.
+<br>
+(Wolfgang Bangerth, 2011/10/30)
+
+<li> New:
+parallel::distributed::Triangulation::mesh_reconstruction_after_repartitioning
+setting which is necessary for save()/load() to be deterministic. Otherwise
+the matrix assembly is done in a different order depending on the order of old
+refinements.
+<br>
+(Timo Heister, 2011/10/26)
+
+<li> New: TriaAccessor<>::minimum_vertex_distance().
+<br>
+(Timo Heister, 2011/10/25)
+
+<li> New: TableHandler::print_text now supports not only printing column
+keys above their own column, but also in a separate header, to make it simpler
+for external plotting programs to skip this line.
+<br>
+(Wolfgang Bangerth, 2011/10/22)
+
+<li> Fixed: Trying to write a TableHandler object that is empty resulted
+in a segmentation fault. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/10/21)
+
+<li> New: The TableHandler class can now pad columns that have only been
+partially filled. See the documentation of the class for a description.
+<br>
+(Wolfgang Bangerth, 2011/10/18)
+
+<li> Fixed: In TableHandler::print_text, it can happen that the function
+wants to print an empty string as the element of the table to be printed.
+This can confuse machine readers of this table, for example for visualization,
+since they then do not see this column in that row. To prevent this, we now
+print <code>""</code> in such places.
+<br>
+(Wolfgang Bangerth, 2011/10/18)
+
+<li> Fixed: Using Trilinos versions 10.4 and later on Debian failed to
+configure due to a different naming scheme of Trilinos libraries on
+Debian. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/10/17)
+
+<li> Changed: The TableHandler class has been changed significantly
+internally.  It could previously store arbitrary values (though in practice,
+only int, unsigned int, double and std::string were implemented). The class is
+now restricted to this particular set of types. On the other hand, the
+TableHandler class can now be serialized.
+<br>
+(Wolfgang Bangerth, 2011/10/17)
+
+<li> Fixed: searching in the doxygen documentation.
+<br>
+(Timo Heister, 2011/10/13)
+
+<li> New: parallel::distributed::Triangulation::save()/load() to store
+the refinement information to disk. Also supports saving solution vectors
+using the SolutionTransfer class.
+<br>
+(Timo Heister, 2011/10/12)
+
+<li> Fixed: The DataOut_DoFData::merge_patches did not compile with newer compilers.
+This is now fixed.
+<br>
+(Wolfgang Bangerth, 2011/10/11)
+</ol>
+
+
+*/
diff --git a/doc/news/7.2.0-vs-7.3.0.h b/doc/news/7.2.0-vs-7.3.0.h
new file mode 100644
index 0000000..eb43292
--- /dev/null
+++ b/doc/news/7.2.0-vs-7.3.0.h
@@ -0,0 +1,533 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_7_2_and_7_3 Changes between Version 7.2 and 7.3
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+
+<li> Changed: There are various changes to PETScWrappers::MPI::Vector
+to minimize usage errors and to make the behavior similar to Trilinos:
+objects with ghost elements are now read-only, the function
+update_ghost_values() is no longer required (but called automatically
+if needed). This requires some changes in user code.
+<br>
+(Timo Heister, 2013/02/16)
+
+<li>Changed: Over the past few years, deal.II has accumulated a
+number of things that we would like to change but that would introduce
+incompatibility. Examples are inconsistent naming of functions or types,
+or things put into the wrong namespace. In reality, while there are
+about 100 examples of things we'd like to get rid of, most of them are
+rather obscure to begin with and will not affect most user code.
+Nevertheless, as a developer community, we are
+very careful in making such incompatible changes.
+
+All this said, it is sometimes necessary. We plan to create an incompatible
+release 8.0 at a later time. To give users a chance to already see which
+functions need to be changed, we have introduced markers into the deal.II
+header files that identify which functions, classes or symbols are now
+deprecated. If your compiler supports this, it will then warn if you are
+using any of these. The documentation for each of these symbols discusses
+the recommended alternative, which are in all cases already in place. In
+other words, you can already change your code in such a way that it compiles
+both with the current 7.3 release as well as with the future 8.0 release
+in which these symbols will have been removed.
+<br>
+(Matthias Maier, Timo Heister, Wolfgang Bangerth, 2013/1/5)
+
+<li>Changed: deal.II previously had two separate classes, DoFHandler
+and MGDoFHandler, for standard and multilevel discretizations, respectively.
+There were also corresponding accessor hierarchies for the cell and
+face iterators. This turned out to be an obstacle for future developments.
+Consequently, the functionality of the MGDoFHandler class has been merged
+into the standard DoFHandler (which already was a base class of MGDoFHandler;
+moving the functions down in the class hierarchy therefore does not
+significantly affect how these classes are used). The MGDoFHandler class
+still exists for now, but is deprecated and will be removed in deal.II 8.0.
+Use the DoFHandler class instead which has obtained all the functionality of
+the old MGDoFHandler class.
+
+From a user perspective, there is only one significant change: The old
+MGDoFHandler::distribute_dofs() function called DoFHandler::distribute_dofs()
+and then distributed degrees of freedom on all levels of the multilevel
+hierarchy. In the new scheme, this is now achieved using the function
+DoFHandler::distribute_mg_dofs(). In other words, where you previously
+used the class MGDoFHandler and called MGDoFHandler::distribute_dofs(), you
+should now use the class DoFHandler and call first DoFHandler::distribute_dofs()
+and then DoFHandler::distribute_mg_dofs().
+<br>
+(Markus Bürg, Timo Heister, Guido Kanschat, 2013/01/03)
+
+<li>Removed: The Triangulation and DoFHandler classes had a great number
+of functions that allowed to get the first and last iterators to cells,
+faces, lines, quads and hexes overall and on each level of the mesh
+hierarchy individually. While conceptually a nice idea to offer such a
+rich set of functions, there are two factors that led us to drastically
+reduce this set of functions: (i) A large interface makes it more
+difficult to fully document what every function is doing and more
+difficult to find what one is looking for. (ii) A large interface
+makes it incredibly difficult to evolve the data structures on which
+these functions operate. We felt that the many functions we have impeded
+our ability to make changes to the internal storage of data. A survey
+of the deal.II code base as well as some larger applications also shows
+that most of these functions are rarely used, if at all.
+<br>
+Consequently, most of these functions have been removed. The only functions
+that remain are the following (for each of the Triangulation and the
+various DoFHandler classes):
+<pre>
+<code>
+    cell_iterator        begin       (const unsigned int level = 0) const;
+    active_cell_iterator begin_active(const unsigned int level = 0) const;
+    cell_iterator        end         () const;
+    cell_iterator        end         (const unsigned int level) const;
+    active_cell_iterator end_active  (const unsigned int level) const;
+</code>
+</pre>
+<br>
+Codes that have previously used functions like <code>begin_active_line</code>
+etc. to loop over the lines or quads of a triangulation need to be changed
+to loop over cells, and on each cell loop over the lines or quads of this
+cell. In most cases we have encountered (in deal.II or its testsuite) this
+was a rather trivial modification. A case to watch out for is that the
+old loop over all lines encountered all lines only once whereas one may
+encounter it multiple times when looping over all cells and then the lines
+of each cell. This case is easily avoided by flagging each treated line
+using the @ref GlossUserFlags "user flags" associated with lines, quads,
+and cells.
+<br>
+(Wolfgang Bangerth, 2012/09/22)
+
+<li>New: In the past, deal.II used a std::vector of bools in many places
+to denote component masks (see @ref GlossComponentMask) as well as for
+block masks (see @ref GlossBlockMask). This was neither
+descriptive (the data type does not indicate what it is supposed to
+represent, nor whether the proper size for such an argument would be equal
+to the number of degrees of freedom per cell, the number of vector components
+of the finite element, or the number of degrees of freedom in total).
+<br>
+There are now new class types ComponentMask and BlockMask that are used in these places.
+They are used both descriptively (as a return type of the function
+FiniteElement::get_nonzero_components indicating the vector components within
+which a given shape function is nonzero) as well as prescriptively (as
+input arguments to functions such as those listed in the glossary entry
+linked to above).
+<br>
+While the descriptive places are not backward compatible (they return a
+ComponentMask which is not convertible to the std::vector of bools returned
+before), most of the prescriptive places are in fact backward compatible
+(because the std::vector of bool that was passed previously can
+implicitly be converted to an object of type ComponentMask. The sole
+exception is the function DoFTools::extract_dofs (and its multigrid
+equivalent DoFTools::extract_level_dofs) that previously
+could interpret its argument as either a component or
+a block mask, depending on a boolean flag. This function now exists
+in two different versions, one that takes a ComponentMask and
+one that takes a BlockMask. Call sites need to be adjusted.
+<br>
+(Wolfgang Bangerth, 2012/09/22)
+
+<li> Removed: the optional argument offset got removed from
+DoFHandler and MGDoFHandler::distribute_dofs() because it was
+never working correctly and it is not used.
+<br>
+(Timo Heister, 2012/09/03)
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+
+<ol>
+
+<li> Changed: parallel sparse matrices now also require the use of
+compress(VectorOperation) like vectors do. See the glossary for details.
+Old functions are now deprecated.
+<br>
+(Timo Heister, 2013/02/25)
+
+<li> New: step-49 demonstrates advanced techniques for mesh creation and
+manipulation.
+<br>
+(Timo Heister, 2013/02/19)
+
+<li> Fixed: Many places in the documentation have been made to consistently
+use doxygen markup that indicates that this is code, so that doxygen will
+cross-link the piece of code to class and function names. Many typos have also
+been fixed.
+<br>
+(Felix Gruber, 2013/02/15)
+
+<li> Fixed: Starting in release 7.1, we first built the deal.II shared libraries
+in the local <code>/tmp</code> or similar directory rather than the final location
+because linking becomes very slow over remotely mounted file systems. Unfortunately,
+this schemes turns out not to work under Cygwin on Windows systems: executables
+cannot link with libraries that have been moved/renamed after linking. If we are
+running on Cygwin, we therefore revert to the old scheme.
+<br>
+(Wolfgang Bangerth, 2013/02/11)
+
+<li> New: finite element FE_Q_DG0 that implements polynomials
+of order k with an additional discontinuous constant function.
+<br>
+(Daniel Arndt, Timo Heister, 2013/01/07)
+
+<li> step-6 now uses ConstraintMatrix::distribute_local_to_global()
+instead of condense(), which is the preferred way to use a ConstraintMatrix
+ (and the only sensible way in parallel).
+<br>
+(Timo Heister, 2012/11/02)
+
+<li> Simplifications of the internal structures of Triangulation and
+DoFHandler, in particular removal of specializations.
+<br>
+(Guido Kanschat, 2012/09/13)
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+<li> Fixed: The ArpackSolver interface to the ARPACK eigenvalue solver could
+not be compiled with newer C++ compilers. This is now fixed.
+<br>
+(Juan Carlos Araujo Cabarcas, Wolfgang Bangerth, 2013/02/20)
+
+<li> New: PETScWrappers::MPI::BlockVector now has a constructor and reinit
+that takes a std::vector<IndexSet> (same interface as in Trilinos).
+<br>
+(Timo Heister, 2013/02/19)
+
+<li> New: PETScWrappers::*Matrix::%add(other, factor) to
+add a scaled other matrix to the current matrix.
+<br>
+(Jose Javier Munoz Criollo, 2013/02/19)
+
+<li> New: GridGenerator::extrude_triangulation() allows
+you to extrude a 2d mesh to turn it into a 3d mesh.
+<br>
+(Timo Heister, 2013/02/16)
+
+<li> PETScWrappers::Vector::operator= and PETScWrappers::MPI::Vector::operator=
+now call update_ghost_values()
+automatically if necessary. This means that update_ghost_values()
+does not need to be called from user code at all any more.
+<br>
+(Timo Heister, 2013/02/14)
+
+<li> Fixed: VectorTools::interpolate did not work properly in 1d if
+boundary indicators had been set to anything but the default (i.e.,
+zero at the left and one at the right end of the domain). This was
+a hold-over from the past when these were the only possible values.
+This is now fixed.
+<br>
+(Kevin Dugan, Wolfgang Bangerth, 2013/02/14)
+
+<li> Improved: The iterator class of the deal.II SparseMatrix class
+and SparsityPattern have been revised for performance. Iterating over
+a row of the matrix and querying the column index and the value is now
+similarly fast as iterating over a vector.
+<br>
+(Martin Kronbichler, 2013/02/12)
+
+<li> New: A new overload of BlockMatrixBase::add allows to add one block
+matrix to another, with a scaling factor.
+<br>
+(Jean-Paul Pelteret, 2013/02/05)
+
+<li> Fixed: The FEValues machinery silently accepted the case when the
+mapped cell (or the cell geometry) were distorted. An assertion has been
+added to the computation of the Jacobian determinants for the volume
+element that aborts whenever the Jacobian determinant in a quadrature
+point becomes too small or negative.
+<br>
+(Martin Kronbichler, 2013/01/18)
+
+<li> Improved: SLEPcWrappers:: The interface to SLEPc has an improved
+handle on SolverControl and solver data can now be extracted at run
+time. An example usage has been added to step-36.
+<br>
+(Toby D. Young, 2013/01/18)
+
+<li> Fixed: Various variants of the TrilinosWrappers::SparseMatrix::reinit
+functions take a parameter <code>drop_tolerance</code> that allows to remove
+small values from the matrix and replace them by zero instead. This was not
+enforced for values on the diagonal of the matrix but only for off-diagonal
+ones. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2013/01/17)
+
+<li> New: All vector classes should now have a in_local_range()
+function indicating whether a given index is locally stored or not.
+<br>
+(Daniel Arndt, 2013/01/14)
+
+<li> Fixed: The one-argument version of ConstraintMatrix::condense was
+not prepared to deal with parallel vectors. This is now fixed.
+<br>
+(Daniel Arndt, Wolfgang Bangerth, 2013/1/9)
+
+<li> New: Utilities::int_to_string can now deal with any 32-bit
+integer, not just those with up to 6 digits.
+<br>
+(Wolfgang Bangerth, 2013/1/5)
+
+<li> New: The PETScWrappers::MatrixBase::write_ascii() now takes a
+(defaulted) argument allowing to select the PETSc Viewer style.
+<br>
+(Fahad Alrashed, 2013/1/2)
+
+<li> New: The PETScWrappers::MPI::Vector::print() function overloads the
+function of same name in the base class to ensure that the output
+generated by a parallel vector makes sense.
+<br>
+(Fahad Alrashed, 2013/1/2)
+
+<li> New: The PETScWrappers::VectorBase class now has a function
+PETScWrappers::VectorBase::write_ascii() that allows writing the
+vector's data to the default output stream.
+<br>
+(Fahad Alrashed, 2013/1/2)
+
+<li> Fixed: PETScWrappers::SparseDirectMUMPS forgot to release
+its memory upon destruction. This is now fixed.
+<br>
+(Alexander Grayver, 2012/12/12)
+
+<li> Fixed: Using the copy constructor of FESystem led to trouble
+down the road because some pointers were freed by the copy while
+still in use by the original object. This is now fixed.
+<br>
+(Timo Heister, Wolfgang Bangerth, 2012/12/03)
+
+<li> Improved: GridTools::make_periodicity_constraints substantially:
+The low level interface now allows to specify a face orientation that
+will be used when matching and constraining DoFs for periodic boundary
+conditions. With that, the high level interface will work correctly on
+(mainly 3d) grids that have cells in non standard orientation.
+<br>
+(Matthias Maier, 2012/12/01)
+
+<li> Fixed: Fix GeometryInfo<2>::child_cell_on_face to respect face_flip
+<br>
+(Matthias Maier, 2012/12/01)
+
+<li> New: There is now a version of DoFTools::make_zero_boundary_constraints()
+that accepts a boundary indicator as argument.
+<br>
+(Wolfgang Bangerth, 2012/11/25)
+
+<li> Fixed: The DoFTools::make_flux_sparsity_pattern() function
+had a bug that triggered in 1d whenever there were neighboring
+cells that differed in refinement level by more than one. This
+is now fixed.
+<br>
+(Wolfgang Bangerth, 2012/11/20)
+
+<li> Improved: The inner product, norm, and mean value computation
+of deal.II's own Vector class are now parallelized by threads.
+The parallelization does not change the order in which the additions
+take place, ensuring exact reproducibility from one run to the next.
+<br>
+(Martin Kronbichler, 2012/11/18)
+
+<li> New: The TrilinosWrappers::PreconditionBase class now has
+a function TrilinosWrappers::PreconditionBase::Tvmult that
+allows applying the transpose preconditioner.
+<br>
+(Guido Kanschat, 2012/11/04)
+
+<li> New: The parallel::distributed::Triangulation::n_global_levels()
+function returns the maximal refinement level over all involved
+processors.
+<br>
+(Timo Heister, 2012/11/04)
+
+<li> New: In addition to the regular subdomain ids (see
+ at ref GlossSubdomainId) there is now a second set of flags called
+"level subdomain ids" that also assigns a subdomain to every cell
+in a multigrid hierarchy.
+<br>
+(Timo Heister, 2012/11/04)
+
+<li> Changed: GridOut::write_xfig has been improved in a number
+of ways. In particular, one can now color cells based on a number
+of different criteria that can be set in GridOutFlags::XFig.
+<br>
+(Guido Kanschat, 2012/11/04)
+
+<li> The class Utilities::MPI::MPI_InitFinalize now also initializes
+PETSc, when PETSc is installed.
+<br>
+(Timo Heister, 2012/11/02)
+
+<li> Fixed: DoFTools::make_flux_sparsity_pattern wasn't prepared to
+deal with adaptively refined meshes in 1d.
+<br>
+(Wolfgang Bangerth, 2012/10/30)
+
+<li> New: Added PETScWrappers::PreconditionParaSails and
+PETScWrappers::PreconditionNone. PETScWrappers::PreconditionParaSails
+implements the interface to use the ParaSails sparse approximate
+inverse preconditioner from the HYPRE suite. ParaSails supports
+parallel distributed computations and can handle nonsymmetric
+and also indefinite problems. PETScWrappers::PreconditionNone
+implements non-preconditioning in PETSc which can be of use
+together with the PETScWrappers::MatrixFree class.
+<br>
+(Martin Steigemann, 2012/10/26)
+
+<li> New: The PETScWrappers::SparseDirectMUMPS class now allows to
+exploit symmetry of the matrix, using the
+PETScWrappers::SparseDirectMUMPS::set_symmetric_mode() function.
+<br>
+(Alexander Grayver, 2012/10/23)
+
+<li> Fixed: Several static const member variables of the Accessor
+classes were not properly instantiated. This only rarely created
+trouble because they are typically only used as template arguments
+and the compiler substituted them. However, one would get linker
+errors when passing around a reference to them. This is now fixed.
+<br>
+(Wolfgang Bangerth, Guido Kanschat, 2012/10/11)
+
+<li> Fixed: Handle lucky breakdowns in GMRES/FGMRES.
+<br>
+(Bärbel Janssen, Timo Heister, 2012/10/09)
+
+<li> Fixed: GridTools::find_cells_adjacent_to_vertex got into
+trouble with anisotropically refined meshes. This is now fixed.
+<br>
+(Abner Salgado, Tobias Leicht, Wolfgang Bangerth, 2012/10/08)
+
+<li> Fixed: FESystem can now deal with n_elements==0 for a block.
+<br>
+(Timo Heister, 2012/09/28)
+
+<li> Fixed: ParameterHandler::print_parameters, when using
+ParameterHandler::OutputStyle::LaTeX would always print a list
+of parameters in each section as a latex itemized environment.
+However, if there are none, we end up with an empty list which
+latex does not like. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2012/09/27)
+
+<li> New: Added SparsityTools::distribute_sparsity_pattern() for
+BlockCompressedSimpleSparsityPattern. This allows parallel computations
+with distributed::Triangulation and PETScWrappers::MPI::BlockSparseMatrix.
+<br>
+(Timo Heister, 2012/09/25)
+
+<li> New: Added BlockCompressedSimpleSparsityPattern::column_number().
+<br>
+(Timo Heister, 2012/09/25)
+
+<li> New: There is now a function hp::FECollection::n_blocks() in analogy to
+the existing function hp::FECollection::n_components().
+<br>
+(Wolfgang Bangerth, 2012/09/20)
+
+<li> Changed: step-8 now outputs data in VTK format, rather than GMV.
+GMV has long been dead.
+<br>
+(Wolfgang Bangerth, 2012/09/19)
+
+<li> Fixed: One can compile deal.II with MPI support but run programs
+that aren't intended to use parallel communications and that, in fact,
+do not call <code>MPI_Init</code> at all. They are nevertheless supposed
+to work but previously the TimerOutput would crash under these conditions.
+This is now fixed.
+<br>
+(Timo Heister, Wolfgang Bangerth, 2012/09/18)
+
+<li> Fixed: If you pipe content into the deallog object and there
+is no end-line or flush after this content, and if a file stream
+is associated to this object, and if that happens at the end of
+the lifetime of the program, then the program would crash.
+This is now fixed.
+<br>
+(Timo Heister, Wolfgang Bangerth, 2012/09/17)
+
+<li> Fixed: The use of TableHandler::set_precision affected not only the
+precision with which elements of a table were printed, but also the
+precision carried by the output stream after writing the table was
+finished. It thus affected the precision
+with which later output was produced. This is now fixed.
+<br>
+(Timo Heister, 2012/09/16)
+
+<li> Fixed: Output of super-columns in TableHandler::write_text()
+was inconsistent. This is now fixed.
+<br>
+(Timo Heister, 2012/09/16)
+
+<li> Changed: Due to incompatibilties with some hdf5 packages installed from
+repositories we disable auto-detection of hdf5. Use --with-hdf if you need it.
+<br>
+(Timo Heister, 2012/09/14)
+
+<li> New MeshWorker::LocalIntegrator and integration_loop() provide a
+less confusing interface to MeshWorker loops.
+<br>
+(Guido Kanschat, 2012/09/13)
+
+<li> New: TableHandler TextOutputFormat::simple_table_with_separate_column_description
+that skips aligning the columns for increased performance.
+<br>
+(Timo Heister, 2012/09/10)
+
+<li> Fixed: The Clang C++ compiler had some trouble dealing with obtaining
+the return value of a Threads::Task object, due to a compiler bug in
+dealing with friend declarations. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2012/09/04)
+
+<li> Fixed: When applying a ConstraintMatrix to a block matrix
+where the last few rows are empty, we ran into an unrelated assertion.
+This is now fixed.
+<br>
+(Jason Sheldon, Wolfgang Bangerth, 2012/09/04)
+</ol>
+
+
+*/
diff --git a/doc/news/7.3.0-vs-8.0.0.h b/doc/news/7.3.0-vs-8.0.0.h
new file mode 100644
index 0000000..c953d3c
--- /dev/null
+++ b/doc/news/7.3.0-vs-8.0.0.h
@@ -0,0 +1,641 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_7_3_and_8_0 Changes between Version 7.3 and 8.0
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the authors.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+
+<li> Removed: it was possible to call get_dof_indices(), get_dof_values(),
+set_dof_values(), and distribute_local_to_global() for cells that were not
+active, if the finite element only had DoFs on vertices (i.e. Q1). This is
+no longer allowed.
+<br>
+(Timo Heister, 2013/06/02)
+
+<li> Changed: Internal structures of ExceptionBase are now thread safe. The
+Assert macro does not print an exception to deallog any more prior to
+throwing if deal_II_exceptions::abort_on_exception==false. Removed: A
+number of obsolete Exceptions that are not used in the library any more.
+<br>
+(Matthias Maier, 2013/04/16)
+
+<li> Removed: A number of header files that have been deprecated a long time
+ago have been removed. All of them had previously only included the header file
+that had superseded them. To upgrade, simply include the currently used
+header file. This is also backward compatible with deal.II 7.3.
+<br>
+(Guido Kanschat, 2013/04/12)
+
+<li> Removed: The interfaces to the obsolete direct solvers MA27 and MA47 from
+the Harwell Subroutine Library. Support for the HSL routines were not ported to
+the new build system. However, the sparse direct solver UMFPACK remains to be
+supported and is provided as part of the standard deal.II distribution, unlike
+the HSL functions.
+<br>
+(Matthias Maier, 2013/04/01)
+
+<li> Changed: The TimeDependent::end_sweep function with an argument indicating
+the number of threads has been removed. Use the corresponding function without
+an argument. Since the argument had a default value, few users will have used
+this function.
+<br>
+(Wolfgang Bangerth, 2013/03/17)
+
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+
+<ol>
+  <li> Improvements on the CMake build system: A Working C (or Fortran) compiler
+  is now fully optional; Improved support for static linkage with a toggle
+  "DEAL_II_PREFER_STATIC_LIBS" that will prefer static archives for non system
+  libraries and "DEAL_II_STATIC_EXECUTABLE" that will switch the complete link
+  interface to static linkage.
+  <br>
+  (Matthias Maier, 2013/07/16)
+  </li>
+
+  <li> New: various functions for parallel computations got introduced to
+  make Trilinos and PETSc interfaces similar. Now step-40 can be used with
+  PETSc or Trilinos with just a few changes. This patch also introduces
+  better support for block systems in the PETSc interfaces.
+  <br>
+  (Timo Heister, 2013/07/15)
+  </li>
+
+  <li> New: deal.II can now be compiled to 64-bit global dof indices. To turn
+  this feature on, use the cmake option -DDEAL_II_WITH_64BIT_INDICES=ON. If
+  PETSc and/or Trilinos are used, they must be compiled to support 64-bit
+  indices. To write a code that can use 32-bit and 64-bit indices depending on
+  deal.II compilation option, use types::global_dof_index for all the global
+  dof indices.
+  <br>
+  (Kainan Wang and Bruno Turcksin, 2013/06/05)
+  </li>
+
+  <li> New: All vector classes now have a member function
+  <code>locally_owned_elements</code> that returns an index
+  set indicating which elements of this vector the current
+  processor owns.
+  <br>
+  (Wolfgang Bangerth, 2013/05/24)
+  </li>
+
+
+  <li> New: A new element FE_Q_iso_Q1 has been implemented that is defined by
+  a subdivision of the element into smaller Q1 elements. An element of order
+  @p p is similar to FE_Q of degree @p p with the same numbering of degrees of
+  freedom. The element is useful e.g. for defining a sparser preconditioner
+  matrix for AMG at higher order FE_Q elements or for representing a component
+  of a system of PDEs where higher resolution is preferred over high order.
+  <br>
+  (Martin Kronbichler, 2013/05/14)
+  </li>
+
+  <li> New: The step-49 tutorial program now also has a discussion on
+  what to do once you have a coarse mesh and want to refine it.
+  <br>
+  (Wolfgang Bangerth, 2013/04/03)
+  </li>
+
+  <li> New: The number of threads used by deal.II/TBB can now be limited at
+  run time. Using MPI based code using PETSc/Trilinos no longer requires you
+  to compile the library without threads. See MPI_InitFinalize and
+  MultithreadInfo::set_thread_limit for details.
+  <br>
+  (Timo Heister, 2013/03/26)
+  </li>
+
+  <li> New: The results section of step-36 now explains how to use ARPACK
+  as an alternative to SLEPc as eigenvalue solver.
+  <br>
+  (Juan Carlos Araujo Cabarcas, 2013/03/25)
+  </li>
+
+  <li> New: deal.II now uses <a href="http://www.cmake.org/">CMake</a>
+  as its configuration and build tool. Please read through the
+  readme and other installation files for information about how the
+  installation process has changed.
+  <br>
+  Because this touches the configuration of every external package we
+  interact with, there are a number of other changes as a result:
+  <ul>
+    <li>The minimum supported version for Trilinos is now 10.8.x.
+    <li>We no longer link with different versions of the p4est library
+        in debug and optimized mode. Rather, we now link with the same
+	library in both modes. The p4est installation instructions have
+	been updated.
+  </ul>
+  <br>
+  (Matthias Maier, 2013/03/07)
+  </li>
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+
+<li>New: VectorTools::project and a whole host of similar functions
+are now also available for objects of type hp::DoFHandler.
+<br>
+(Wolfgang Bangerth, 2013/07/21)
+</li>
+
+<li>Fixed: hp::DoFHandler::n_boundary_dofs() had a bug that always led
+to a failed assertion. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2013/07/21)
+</li>
+
+<li>Fixed: VectorTools::project has an option to first project onto the
+boundary. However, the implementation of this option ignored the mapping
+that is provided to the function. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2013/07/16)
+</li>
+
+<li>Improved: The WorkStream class used throughout deal.II is now using
+thread local variables and initializes temporary variables on the thread
+that uses them, leading to better cache locality.
+<br>
+(Wolfgang Bangerth, 2013/07/16)
+</li>
+
+<li>Improved: The "pure" functions in MeshWorker::LocalIntegrator are now implemented and throw
+an exception if not overloaded.
+<br>
+(Guido Kanschat, 2013/07/16)
+</li>
+
+<li> New: The function SparseDirectUMFPACK::Tvmult is now implemented.
+<br>
+(Matthias Maier, 2013/07/03)
+</li>
+
+<li> New: In addition to the FEValuesExtractors::Scalar,
+FEValuesExtractors::Vector, and FEValuesExtractors::SymmetricTensor classes,
+there are now also fully featured FEValuesExtractors::Tensor extractors
+for non-symmetric tensors of rank 2.
+<br>
+(Denis Davydov, 2013/07/02)
+</li>
+
+<li> New: There are now functions Tensor::component_to_unrolled_index()
+and Tensor::unrolled_to_component_indices() in the same way as they
+already exist for the SymmetricTensor class.
+<br>
+(Denis Davydov, 2013/07/02)
+</li>
+
+<li> New: There is now a read-write version of TableIndices::operator[].
+<br>
+(Denis Davydov, 2013/07/02)
+</li>
+
+<li> New: The function parallel::distributed::Triangulation::copy_triangulation() is
+now implemented.
+<br>
+(Martin Steigemann, 2013/07/02)
+</li>
+
+<li> New: TriaRawIterator::operator < (TriaRawIterator&) now implements a total ordering
+relation for cells even on distributed::parallel::Triangulation across processors.
+Additionally, TriaRawAccessor and CellAccessor now have an ordering relation.
+<br>
+(Guido Kanschat, 2013/06/24)
+</li>
+
+<li> New: CellAccessor::id() that returns a unique CellId that
+also works in parallel computations (where level and index is not
+useful).
+<br>
+(Timo Heister, 2013/06/24)
+</li>
+
+<li> New: added ConstantTensorFunction<rank, dim> and ZeroTensorFunction<rank, dim> to provide
+a tensor valued analogue to ConstantFunction and ZeroFunction.
+<br>
+(Matthias Maier, 2013/06/20)
+</li>
+
+<li> Fixed: BlockSparsityPattern::column_number was returning
+wrong values.
+<br>
+(Timo Heister, 2013/06/16)
+</li>
+
+<li> Fixed: The stabilization parameter for the artificial diffusion
+in the step-31 tutorial program has been increased slightly to avoid
+instabilities at later times (<i>t</i> > 60).
+<br>
+(Martin Kronbichler, 2013/06/04)
+</li>
+
+<li> Fixed: If an exception was generated on a task created by
+Threads::new_task, the program would terminate with a segmentation
+fault, leaving little trace of what had happened. This is now handled
+more gracefully.
+<br>
+(Wolfgang Bangerth, 2013/06/02)
+</li>
+
+<li> Changed: subdomain ids can now only be queried/set on active cells.
+Consequently, is_artificial(), is_ghost(), and is_locally_owned() is
+now restricted to active cells.
+<br>
+(Timo Heister, 2013/05/31)
+</li>
+
+<li> Improved: Triangulation::begin(level) and Triangulation::end(level) now return an
+empty iterator range if the level is larger than the maximal locally owned level,
+but still in the global level range of a distributed Triangulation.
+<br>
+(Timo Heister and Guido Kanschat, 2013/05/26)
+</li>
+
+<li> New: The IndexSet::add_indices function that takes another IndexSet
+object now has an additional argument <code>offset</code> that can be used
+to offset the indices of first argument.
+<br>
+(Wolfgang Bangerth, 2013/05/25)
+</li>
+
+<li> New: ConstraintMatrix::distribute is now also implemented for
+arguments of type PETScWrappers::MPI::BlockVector.
+<br>
+(Wolfgang Bangerth, 2013/05/25)
+</li>
+
+<li> Fixed: IndexSet::operator== returned the wrong results
+in some cases.
+<br>
+(Wolfgang Bangerth, 2013/05/25)
+</li>
+
+<li> New: The global function <code>complete_index_set()</code>
+creates and returns an index set of given size that contains
+every single index with this range.
+<br>
+(Wolfgang Bangerth, 2013/05/24)
+</li>
+
+<li> New: All vector classes now have a static member variable
+<code>supports_distributed_data</code> that indicates whether the
+vector class supports data that is distributed across multiple
+processors. This variable is provided as a <i>traits variable</i>
+to allow generic algorithms working on general vector types to
+query the capabilities of the vector class at compile time.
+<br>
+(Wolfgang Bangerth, 2013/05/23)
+</li>
+
+<li> Fixed: FETools::back_interpolate has been revised to work correctly
+also with parallel::distributed::Vector.
+<br>
+(Martin Steigemann, 2013/05/23)
+</li>
+
+<li> Removed: The file <code>mesh_worker/vector_info.h</code> was unused and
+untested. It has thus been removed.
+<br>
+(Wolfgang Bangerth, Guido Kanschat, 2013/05/21)
+</li>
+
+<li> Fixed: The method parallel::distributed::Vector::compress
+(VectorOperation::insert) previously set the elements of ghost elements
+unconditionally on the owning processor, even if they had not been touched.
+This led to a problem in certain library functions where vector entries became
+zero in a spurious way. This is now fixed by discarding the elements in ghost
+entries for the VectorOperation::insert operation. This is legitimate since we
+assume consistency of set elements across processors, so the owning processor
+sets the element already.
+<br>
+(Martin Kronbichler, 2013/05/21)
+</li>
+
+<li> Improved: DoFTools::make_periodicity_constraints now also works
+for meshes where the refinement level of the two sides of the domain
+is not the same, i.e., one side is more refined than the other.
+<br>
+(Wolfgang Bangerth, 2013/05/20)
+</li>
+
+<li> Improved: Through the fields DataOutBase::VtkFlags::time and
+DataOutBase::VtkFlags::cycle, it is now possible to encode the time and/or
+cycle within a nonlinear or other iteration in VTK and VTU files written
+via DataOutBase::write_vtk and DataOutBase::write_vtu.
+<br>
+(Wolfgang Bangerth, 2013/05/12)
+</li>
+
+<li> Fixed: The method ConvergenceTable::evaluate_convergence_rates with
+ reference column did not take the dimension of the reference column into
+ account, leading to wrong logarithmic rates for dim!=2. This can now be fixed
+ by specifying the dimension as a last argument.
+<br>
+(Martin Kronbichler, 2013/05/10)
+</li>
+
+<li> Improved: The functions MatrixTools::create_mass_matrix and
+MatrixTools::create_laplace_matrix take now an optional ConstraintMatrix
+argument that allows to directly apply the constraints. This also helps
+VectorTools::project. Note that not providing constraints remains the default
+and recommended way to ensure consistency when several matrices are combined.
+<br>
+(Martin Kronbichler, 2013/05/08)
+</li>
+
+<li> New: The classes TrilinosWrappers::SparseMatrix and
+TrilinosWrappers::BlockSparseMatrix now fully implement vmult and Tvmult with
+deal.II's own vector classes Vector<double> and
+parallel::distributed::Vector<double>.
+<br>
+(Martin Kronbichler, 2013/05/08)
+</li>
+
+<li> Improved: The matrix-vector product ChunkSparseMatrix::vmult now runs in
+parallel in shared memory.
+<br>
+(Martin Kronbichler, 2013/05/07)
+</li>
+
+<li> New: The class ChunkSparseMatrix and the associated
+ChunkSparsityPattern now offer iterator classes to iterate over rows of the
+matrix in an STL-like way.
+<br>
+(Martin Kronbichler, 2013/05/07)
+</li>
+
+<li> Fixed: The stopping criterion for early exit in SolverBicgstab did not
+work properly for systems with large values, leading to premature exit. This
+is now fixed.
+<br>
+(Martin Kronbichler, 2013/05/07)
+</li>
+
+<li> Changed: The SolverGMRES implementation previously applied two
+iterations of the modified Gram–Schmidt algorithm for
+orthogonalization. In many situations one iteration is enough. The algorithm
+can now detect loss of orthogonality and enables re-orthogonalization only if
+necessary. The second iteration (and, hence, old behavior) can be forced by
+the flag SolverGMRES::AdditionalData::force_re_orthogonalization.
+<br>
+(Martin Kronbichler, 2013/05/06)
+</li>
+
+<li> Changed: FETools::interpolate is now instantiated for all
+vector types, not just dealii::Vector and dealii::BlockVector.
+<br>
+(Wolfgang Bangerth, 2013/05/06)
+</li>
+
+<li> Fixed: setting values in TrilinosWrappers::SparseMatrix
+in parallel was adding the values instead.
+<br>
+(Bruno Turcksin, Timo Heister, 2013/05/03)
+</li>
+
+<li> Fixed: Generate an error if the user tries to refine a cell
+that is already on the maximum level in a distributed triangulation.
+<br>
+(Timo Heister, 2013/05/01)
+</li>
+
+<li> Fixed: The version of ParameterHandler::set that takes a boolean
+as second argument was broken and did not work. This is now fixed.
+<br>
+(Ashkan Dorostkar, Wolfgang Bangerth, 2013/04/30)
+</li>
+
+<li> Fixed: PETScWrappers::VectorBase::print now saves and restores
+the precision
+and width associated with the stream it prints to around setting
+the values passed as arguments.
+<br>
+(Fahad Alrashed, 2013/04/22)
+</li>
+
+<li> Fixed: FullMatrix::print now saves and restores the precision
+and width associated with the stream it prints to around setting
+the values passed as arguments.
+<br>
+(Fahad Alrashed, 2013/04/22)
+</li>
+
+<li> New: LogStream now has member functions LogStream::width,
+LogStream::precision and LogStream::flags that make it look more
+like normal objects of type <code>std::ostream</code>.
+<br>
+(Fahad Alrashed, 2013/04/22)
+</li>
+
+<li> New: SparseDirectUMFPACK has long had the ability to work with
+BlockSparseMatrix objects, but couldn't deal with BlockVector objects.
+This is now fixed.
+<br>
+(Wolfgang Bangerth, 2013/04/21)
+</li>
+
+<li> New: Class TimerOutput::Scope does automatic scope based enter/
+exit_section of a TimerOutput object.
+<br>
+(Timo Heister, 2013/04/18)
+</li>
+
+<li> Fixed: TimerOutput constructed with an MPI_COMM in wall_time
+mode now constructs synchronized Timer objects. This gives reliable
+parallel benchmark timings.
+<br>
+(Timo Heister, 2013/04/18)
+</li>
+
+<li> Improved and Fixed: LogStream (and deallog) now respect std::flush in
+addition to std::endl to write out content to the console/file.
+Furthermore, LogStream::push(...) and LogStream::pop() now work in a thread
+safe manner. Also allow to pop() the prefix "DEAL".
+<br>
+(Matthias Maier, 2013/04/18)
+</li>
+
+<li> Fixed: The HalfHyperShellBoundary class got refining
+the edges that sit at the perimeter of the circular face of the domain
+wrong. This is now fixed.
+<br>
+(Wolfgang Bangerth, Jörg Frohne, 2013/04/17)
+</li>
+
+<li> New: Functions::FEFieldFunction can now deal with
+parallel::distributed::Triangulation objects.
+<br>
+(Wolfgang Bangerth, 2013/04/15)
+</li>
+
+<li> New: There is now a version of SparseMatrix::copy_from that can copy
+from TrilinosWrappers::SparseMatrix.
+<br>
+(Wolfgang Bangerth, Jörg Frohne, 2013/04/15)
+</li>
+
+<li> Improved: The SolverCG implementation now uses only three auxiliary
+vectors, down from previously four. Also, there are some shortcuts in case
+PreconditionIdentity is used that improve the solver's performance.
+<br>
+(Martin Kronbichler, 2013/04/11)
+</li>
+
+<li> Fixed: The results section of step-23 did not show the movie in release 7.3
+due to a poor HTML markup. This is now fixed.
+<br>
+(Wolfgang Bangerth, 2013/04/10)
+</li>
+
+<li> Fixed: It is now possible to use the MeshWorker framework in 1d as well.
+<br>
+(Wolfgang Bangerth, Scott Miller, 2013/04/09)
+</li>
+
+<li> Fixed: It was not possible to create a default-constructed object of
+type Triangulation<1>::face_iterator. This is now fixed.
+<br>
+(Wolfgang Bangerth, Scott Miller, 2013/04/09)
+</li>
+
+<li> New: VectorTools::subtract_mean_value can now be called without a
+boolean mask. The vector type is templatified and instantiated for all
+non distributed vectors.
+<br>
+(Matthias Maier, 2013/04/08)
+</li>
+
+<li> Fixed: It is now possible to call ConvergenceTable::evaluate_convergence_rates
+multiple times.
+<br>
+(Matthias Maier, 2013/04/08)
+</li>
+
+<li> Fixed: GridTools::distort_random (previously called Triangulation::distort_random)
+had a bug where points were only ever moved in <i>positive</i> coordinate
+directions rather than with uniform probability in either direction. The 1d
+implementation also had the problem that it did not move vertices if the
+<i>cell</i> they were on was at the boundary, even if the <i>vertex</i>
+itself was not. All of these problems are now fixed.
+<br>
+(Wolfgang Bangerth, 2013/04/05)
+</li>
+
+<li> New: There is a class VectorFunctionFromTensorFunction that converts
+between objects of type TensorFunction and Function.
+<br>
+(Spencer Patty, 2013/4/2)
+</li>
+
+<li> Fixed: The ParameterHandler class could not deal with parameters named
+<code>"value"</code> (and a few other names). This is now fixed.
+<br>
+(Denis Davydov, Matthias Maier, Wolfgang Bangerth, 2013/3/31)
+</li>
+
+<li> Changed: TimerOutput no longer assumes that sections are not nested
+when outputting percentage and total run time.
+<br>
+(Timo Heister, 2013/3/28)
+</li>
+
+<li> New: MPI_InitFinalize can also initialize PETSc/Slepc when
+not compiling with MPI. This is now the preferred way to initialize
+MPI/PETSc/Slepc in all cases.
+<br>
+(Timo Heister, 2013/3/26)
+</li>
+
+<li> Added/fixed: IterativeInverse::vmult() can now handle vectors
+using a different number type than the matrix type. As usual, the
+number types must be compatible. Addtitionally, the initial guess is
+always set to zero, since starting with the incoming vector makes no
+sense.
+<br>
+(Guido Kanschat, 2013/03/21)
+</li>
+
+<li> Added GridOut::write_svg() to allow for the output of
+two-dimensional triangulations in two space dimensions in the SVG
+format (Scalable Vector Graphics, an generic XML-based vector image
+format developed and maintained by the World Wide Web Consortium W3C).
+This function also provides cell coloring and cell labeling for the
+visualization of basic cell properties. Pespective view is further
+possible and the cell level number may be converted into altitude,
+revealing the inactive cells lying below.
+<br>
+(Christian Wülker, 2013/03/21)
+</li>
+
+<li> Added TimerOutput::reset to remove the collected information so far and
+added a new frequency TimerOutput::never to only output information if
+triggered by print_summary().
+<br>
+(Timo Heister, 2013/03/20)
+</li>
+
+<li> Changed: FEValuesExtractors::Scalar, FEValuesExtractors::Vector and
+FEValuesExtractors::SymmetricTensor could not be default constructed, and
+consequently one could not easily put them into arrays (where they would
+be default constructed when changing the size, and later assigned useful
+values). These classes can now be default constructed to invalid
+values, but can of course not be used in any useful way.
+<br>
+(Wolfgang Bangerth, 2013/03/15)
+</li>
+
+<li> Fixed: FETools::interpolation_difference did not work with PETSc.
+This is now fixed.
+<br>
+(Timo Heister, 2013/03/01)
+</li>
+
+</ol>
+
+
+*/
diff --git a/doc/news/8.0.0-vs-8.1.0.h b/doc/news/8.0.0-vs-8.1.0.h
new file mode 100644
index 0000000..b44ce49
--- /dev/null
+++ b/doc/news/8.0.0-vs-8.1.0.h
@@ -0,0 +1,750 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_8_0_and_8_1 Changes between Version 8.0 and 8.1
+
+<p>
+This is the list of changes made between the deal.II releases listed above.
+All entries are signed with the names of the author.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+  <li>Changed: During the implementation of the 64-bit features for deal.II
+  8.0, many linear algebra classes obtained a local typedef
+  <code>size_type</code> indicating the integer type that is used to index
+  into them. In some instances, this was accidentally set to
+  <code>types::global_dof_index</code> (which may be a 64-bit data type)
+  even in cases where this is clearly not going to work, for example for
+  FullMatrix::size_type, since we will not be able to store full matrix
+  objects of sizes for which a 32-bit index type is not sufficient. In
+  these cases, the typedef was reverted to just <code>unsigned int</code>.
+  <br>
+  (Wolfgang Bangerth, 2013/12/04)
+  </li>
+
+  <li> Removed: With the switch of the testsuite to CMake, the old report_features
+  and build test facilities are removed.
+  <br>
+  (Matthias Maier, 2013/12/03)
+  </li>
+
+  <li>
+  Changed: The kinds of template arguments for the VectorTools::interpolate
+  function taking a Mapping as first argument has changed. This was done to
+  work around a bug in the Intel ICC compiler which led to linker errors. Since
+  the actual function argument list remains unchanged, the only way you will
+  notice this change is if you <i>explicitly</i> specify template arguments.
+  The only place one would typically do that is if you take the address of
+  a template function. Since this is not a common operation, the impact of this
+  change is probably limited.
+  <br>
+  (Wolfgang Bangerth, 2013/11/27)
+  </li>
+
+  <li>
+  Changed: The ghost handling of the parallel::distributed::Vector class has
+  been reworked: The vector now carries a global state that stores whether
+  ghost elements have been updated or not. If a vector has ghost elements, it
+  does not allow calls to compress() any more. Instead, a compress operation
+  can now only be done when the ghost entries have been cleared before by
+  calling zero_out_ghosts() or operator=0. The state can be queried by the new
+  method has_ghost_elements(). This change avoids spurious entries to be
+  inserted with compress(), but requires some change in user codes. The
+  behavior of a ghosted vector is now very similar to ghosted PETSc and
+  Trilinos vectors. The only difference is that the <i>same</i> vector can
+  also be used as a non-ghosted vector which is designed for use in assembly
+  routines.
+  <br>
+  (Martin Kronbichler, 2013/10/18)
+  </li>
+
+  <li>
+  Removed: GridTools::collect_periodic_face_pairs. This function is superseded
+  by GridTools::collect_periodic_faces which exports an
+  std::vector<PeriodicFacepair<...>> instead.
+  <br>
+  (Matthias Maier, 2013/09/30)
+  </li>
+
+  <li>
+  Removed: The member function face_to_equivalent_cell_index() in
+  FiniteElementData has been removed. It had been deprecated a while
+  back already. Please use FiniteElement::face_to_cell_index() instead.
+  <br>
+  (Wolfgang Bangerth, 2013/08/09)
+  </li>
+
+  <li>
+  Changed: The typedefs DataOut::cell_iterator and
+  DataOut::active_cell_iterator were previously defined as
+  DoFHandler::(active)_cell_iterator, while they are now
+  Triangulation::(active)_cell_iterator. This is necessary to support DataOut
+  on multiple DoFHandler objects. This affects possible overloading of
+  DataOut::next_cell(cell_iterator). Use the typedef
+  DataOut::(active)_cell_iterator as argument type instead.
+  <br>
+  (Martin Kronbichler, 2013/07/24)
+  </li>
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+
+<ol>
+  <li> New: step-26 fills a long-standing gap: There was no tutorial
+          program solving the heat equation. There was also no tutorial
+          showing in relatively easy terms how to do adaptive meshes
+          in time dependent problems. This program fills both of these
+          needs.
+  <br>
+  (Wolfgang Bangerth, 2013/12/18)
+  </li>
+
+  <li> Improved: The build system now supports usage of the library
+  out of the build directory without prior installation. This is done by
+  exporting an additional project configuration just for the build directory.
+  Furthermore, a bunch of convenience targets get now defined that just build
+  individual components (such as just the documentation or the libraries), and
+  if <tt>CMAKE_INSTALL_PREFIX</tt> is set, also install that specific component.
+  <br>
+  (Matthias Maier, Luca Heltai, 2013/12/03)
+  </li>
+
+  <li> Fixed: Missing instantiations of SparseDirectMUMPS have been added.
+  <br>
+  (Timo Heister, 2013/11/25)
+  </li>
+
+  <li> New: introduced "make test" that runs a minimal set of tests. We
+  encourage every user to run this, especially if they run in to problems.
+  The tests are automatically picked depending on the configuration and
+  will be shipped with every release.
+  <br>
+  (Timo Heister, Matthias Maier, 2013/11/08)
+  </li>
+
+  <li> Changed: It is now possible to restore a parallel Triangulation
+  (and solutions) with a different number of processors it was saved with
+  using Triangulation::save() and Triangulation::load().
+  <br>
+  (Timo Heister, 2013/11/02)
+  </li>
+
+  <li> Added support for Windows: It is now possible again to use gcc on Windows
+  in order to compile the library. We support gcc-4.8.1 on Cygwin64 and MinGW-w64.
+  <br>
+  (Matthias Maier, 2013/11/01)
+  </li>
+
+  <li> Changed: step-9, step-13 and step-14 have been converted to use the
+  more modern WorkStream concept for assembling linear systems and computing
+  error indicators in parallel.
+  <br>
+  (Bruno Turcksin, Wolfgang Bangerth, 2013/10/26)
+  </li>
+
+  <li> New: The testsuite is now ported to <a href="http://www.cmake.org/">
+  CMake</a> and uses CTest as test driver.
+  <br>
+  (Wolfgang Bangerth, Timo Heister, Matthias Maier, Bruno Turcksin, 2013/10/20)
+  </li>
+
+  <li>
+  Changed: multithreadinfo::n_default_threads is now deprecated. Use the
+  new n_threads() function instead, which works correctly with TBB.
+  <br>
+  (Timo Heister, 2013/10/02)
+  </li>
+
+  <li>
+  Changed: if configured with TBB but the number of threads is set to 1,
+  do not bother to use TBB in workstream.
+  <br>
+  (Timo Heister, 2013/10/02)
+  </li>
+
+  <li>
+  New: step-51 demonstrates the use of hybridized discontinuous Galerkin
+  methods in deal.II, using the face elements FE_FaceQ. The programs solves a
+  scalar convection-diffusion equation.
+  <br>
+  (Martin Kronbichler and Scott Miller, 2013/10/01)
+  </li>
+
+  <li>
+  New: There is now an element FE_FaceP that can be combined with FE_DGP in
+  hybridized DG methods.
+  <br>
+  (Martin Kronbichler, 2013/09/17)
+  </li>
+
+  <li>
+  Fixed: The DataOutBase::XDMFEntry class now has a proper serialization
+  function to allow for checkpointing.
+  <br>
+  (Eric Heien, 2013/09/27)
+  </li>
+
+  <li>
+  New: DataOutBase::DataOutFilter provides a way to remove duplicate vertices
+  and values from a solution vector when generating output. Currently it only
+  supports HDF5/XDMF output.
+  <br>
+  (Eric Heien, 2013/09/27)
+  </li>
+
+  <li>
+  Removed: DataOutBase::HDF5MemStream was removed and the functionality replaced
+  by DataOutBase::DataOutFilter. The user only manipulates these through
+  DataOutBase::write_hdf5_parallel so this change should be transparent.
+  <br>
+  (Eric Heien, 2013/09/27)
+  </li>
+
+  <li>
+  New: Like the usual DoFHandler class, the hp::DoFHandler class now also
+  has a cache that makes operations such as <code>cell-@>get_dof_indices(...)</code>
+  faster. This should accelerate many parts of the library that deal with
+  hp finite elements.
+  <br>
+  (Wolfgang Bangerth, 2013/09/10)
+  </li>
+
+  <li>
+  New: parallel::distributed::Triangulation now supports periodic boundaries.
+  DoFTools::make_periodicity_constraints and similar functions are now working
+  on parallel::distributed::Triangulation objects.
+  <br>
+  (Tobin Isaac, Craig Michoski, Daniel Arndt, 2013/09/06)
+  </li>
+
+  <li>
+  New: It is now possible to compile and link deal.II against LLVM's libcxx. For
+  this, a few issues with C++ standard violations are resolved.
+  <br>
+  (Matthias Maier, 2013/08/09)
+  </li>
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+  <li> Fixed: The DerivativeApproximation class did not work for
+  parallel programs. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2013/12/18)
+  </li>
+
+  <li> Fixed: Move the implementation of Subscriptor::(un)subscribe() to
+  the .cc file so that it is possible to link against the debug library
+  without specifying <code>-DDEBUG</code>
+  <br>
+  (Wolfgang Bangerth, 2013/12/13)
+  </li>
+
+  <li> Fixed: Since the introduction of ThreadLocalStorage in version 8.0, the
+  way in which FEValues objects visit cells in a parallel assembly loop is no
+  longer deterministic. Therefore, the detection of CellSimilarity that can
+  speed up computations of certain geometric quantities (shape gradients) on
+  cells that are translations is disabled when the number of threads is
+  greater than one. This produces somewhat slower code (usually not more than
+  a few percent) but ensures exact reproducibility of results.
+  <br>
+  (Martin Kronbichler, Wolfgang Bangerth, 2013/12/09)
+  </li>
+
+  <li> Fixed: Several functions in namespace GridTools were not instantiated
+  for parallel::distributed::Triangulation objects. This is now fixed.
+  <br>
+  (Denis Davydov, Wolfgang Bangerth, 2013/12/01)
+  </li>
+
+  <li> Improved: The methods ConstraintMatrix::distribute_local_to_global
+  now use scratch data that is private to each thread instead of allocating
+  it for every cell anew. This gives better performance, in particular in
+  parallel, of these operations, while maintaining thread-safety (when
+  accessing non-overlapping rows, no race condition can exist).
+  <br>
+  (Martin Kronbichler, 2013/12/03)
+  </li>
+
+  <li> Improved: When attempting operations such as FEValues::get_function_values()
+  or FEValues::shape_value(), the FEValues object needs to know that what these
+  functions return has been computed previously. What is computed is specified
+  by the update flags that are passed to the constructor of all FEValues, FEFaceValues
+  and FESubfaceValues objects. If a user attempts an operation for which the
+  corresponding flag was not specified, an exception is generated. This exception
+  did say previously what the cause was, but it was not overly explicit.
+  The exception now generates a message that says exactly what is going wrong.
+  <br>
+  (Wolfgang Bangerth, 2013/12/01)
+  </li>
+
+  <li> Fixed: GridGenerator::truncated_cone() failed if half_length < 0.5*radius in 3d.
+  <br>
+  (Timo Heister, 2013/11/25)
+  </li>
+
+  <li> Fixed: make_hanging_node_constraints failed with an exception in a
+  parallel::distributed computation if the element is
+  RaviartThomas (and probably others).
+  <br>
+  (Timo Heister, 2013/11/23)
+  </li>
+
+  <li> Improved: CMake: Added a configuration check for incompatible ninja
+  + icc setup, fixed several setup and performance issues with the
+  testsuite.
+  <br>
+  (Matthias Maier, 2013/11/20)
+  </li>
+
+  <li> Changed: when a dealii::Exception is thrown, defer the symbol lookup of the
+  stack trace to when it is needed. This improves performance if what() is never
+  called.
+  <br>
+  (Timo Heister, 2013/11/17)
+  </li>
+
+  <li> Fixed: GridGenerator::parallelogram was not instantiated properly
+  when using intel compilers.
+  <br>
+  (Timo Heister, 2013/11/17)
+  </li>
+
+  <li>
+  Fixed: MappingQ1::transform_real_to_unit_cell() could fail in some
+  cases with very elongated and twisted cells. This should now be fixed
+  with an algorithm that uses a better method of computing the Newton
+  convergence.
+  <br>
+  (Wolfgang Bangerth, 2013/11/17)
+  </li>
+
+  <li>
+  Fixed: VectorTools::compute_no_normal_flux_constraints had a bug that
+  only appeared in rare cases at vertices of the domain if one adjacent
+  cell had two boundary indicators selected for no normal flux and another
+  had only one. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2013/11/17)
+  </li>
+
+  <li> Fixed: dealii::FETools::interpolation_difference was
+  not working for TrilinosWrappers::MPI::Vectors with ghost
+  entries. The TrilinosWrappers::VectorBase class has now a
+  get_mpi_communicator method similar to the PETSc vector
+  classes.
+  <br>
+  (Martin Steigemann, Martin Kronbichler, 2013/11/17)
+  </li>
+
+  <li> Fixed: Bundled fparser is now compiled with FP_USE_THREAD_SAFE_EVAL in
+  case of enabled threading support so that it is thread safe.
+  <br>
+  (Matthias Maier, reported by Francesco Cattoglio 2013/11/16)
+  </li>
+
+  <li> Fixed: The CellData class now has a default constructor that
+  sets the material and boundary indicators to zero. This fixes certain
+  internal errors with the Intel ICC compiler.
+  <br>
+  (Wolfgang Bangerth, 2013/11/13)
+  </li>
+
+  <li> Cleanup: Removed obsolete files and files with unknown licensing
+  status from the source tree. Along the way, parameter_gui now uses
+  default icons from the desktop environment instead of bundled ones.
+  <br>
+  (Matthias Maier, 2013/11/11)
+  </li>
+
+  <li> New: There is now a framework for coloring graphs, with functions
+  in namespace GraphColoring.
+  <br>
+  (Bruno Turcksin, Martin Kronbichler, 2013/11/06)
+  </li>
+
+  <li>
+  Fixed: the DerivativeApproximation class was not working correctly when
+  used with parallel vectors.
+  (Timo Heister, 2013/10/28)
+  </li>
+
+  <li>
+  ~Subscriptor and ~GrowingVectorMemory no longer throw an exception (the
+  former if disable_abort_on_exception was called) to be compatible with the
+  C++11 standard which otherwise requires the program to immediately call
+  std::terminate. This was done with a new macro "AssertNothrow".
+  <br>
+  (Wolfgang Bangerth, Matthias Maier, Bruno Turcksin 2013/10/22)
+  </li>
+
+  <li>
+  dealii::SolverControl::NoConvergence now inherits dealii::ExceptionBase and
+  is thrown via AssertThrow(false, ... ).
+  <br>
+  (Matthias Maier, 2013/10/20)
+  </li>
+
+  <li>
+  New: parallel::distributed::BlockVector has now methods update_ghost_values,
+  compress, zero_out_ghosts, and has_ghost_elements that do the respective
+  operation on each block of parallel::distributed::Vector.
+  <br>
+  (Martin Kronbichler, 2013/10/18)
+  </li>
+
+  <li>
+  Fixed: When deriving from DataOut to filter the cells where output is generated, there were two different bugs that result in segmentation faults or wrong cells written (example, step-18).
+  <br>
+  (Timo Heister, 2013/10/16)
+  </li>
+
+  <li>
+  New: GridIn::read_vtk() reads 2d and 3d meshes in VTK format.
+  <br>
+  (Mayank Sabharwal, Andreas Putz, 2013/10/07)
+  </li>
+
+  <li>
+  Fixed: ConstraintMatrix would not compress() the IndexSet in the constructor
+  leading to crashes that only happen in release mode. This is now fixed.
+  <br>
+  (Timo Heister, 2013/09/27)
+  </li>
+
+  <li>
+  Fixed: PetscWrappers::MatrixBase::row_length() no longer worked after recent changes
+  to PETSc (around PETSc release 3.4). This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2013/09/24)
+  </li>
+
+  <li>
+  New: Added write_visit_record that allows writing .visit files with multiple blocks
+  and multiple time steps.
+  <br>
+  (Fahad Alrashed, 2013/09/21)
+  </li>
+
+  <li>
+  Changed: GridTools::have_same_coarse_mesh was only instantiated for
+  MGDoFHandler arguments in debug mode. This is now fixed.
+  <br>
+  (Timo Heister, 2013/09/20)
+  </li>
+
+  <li>
+  Changed: GridTools::find_active_cell_around_point now throws the exception
+  GridTools::ExcPointNotFound
+  if the point is outside the Triangulation. This exception can be caught.
+  <br>
+  (Timo Heister, 2013/09/18)
+  </li>
+
+  <li>
+  Changed: we now call MPI_Init_thread instead of MPI_Init.
+  <br>
+  (Timo Heister, 2013/09/17)
+  </li>
+
+  <li>
+  Enhancement: It is now possible to use the build directory directly without
+  the need to install first. For this, a second copy of all necessary project
+  configuration files (deal.IIConfig.cmake, ..., Make.global_options) are
+  generated and deployed in the build directory. (This is fully compatible with
+  the old possibility to install into the build dir.)
+  <br>
+  (Matthias Maier, 2013/09/15)
+  </li>
+
+  <li>
+  Fixed: DoFTools::extract_locally_*_dofs now instantiated for hp::DofHandler.
+  <br>
+  (Jean-Paul Pelteret, 2013/09/11)
+  </li>
+
+  <li>
+  Changed: distributed::parallel:BlockVector::operator= now allows importing
+  of ghost values like all other vector types. Also added some new constructors
+  for BlockVector and Vector using IndexSets to mirror the other linear algebra
+  classes.
+  <br>
+  (Timo Heister, 2013/09/04)
+  </li>
+
+  <li>
+  Fixed: VectorTools::compute_no_normal_flux_constraints had a bug that
+  only manifested on complex meshes. This is now fixed.
+  <br>
+  (Chih-Che Chueh, Wolfgang Bangerth, 2013/09/04)
+  </li>
+
+  <li>
+  New: All vector classes now have functions <code>extract_subvector_to()</code>
+  that allow extracting not just a single value but a whole set.
+  <br>
+  (Fahad Alrashed, 2013/09/02)
+  </li>
+
+  <li>
+  Fixed: <code>common/Make.global_options</code> now exports enable-threads
+  correctly, furthermore, <code>lib-suffix</code>, <code>shared-lib-suffix</code>
+  and <code>static-lib-suffix</code> are now exported as well for better legacy
+  support.
+  <br>
+  (Matthias Maier, 2013/08/30)
+  </li>
+
+  <li>
+  New: The ParameterHandler class can now deal with including one parameter
+  file from another.
+  <br>
+  (Wolfgang Bangerth, 2013/08/25)
+  </li>
+
+  <li>
+  New: The method VectorTools::compute_normal_flux_constraints can be used to
+  force a vector finite element function to be normal to the boundary.
+  <br>
+  (Martin Kronbichler, 2013/08/23)
+  </li>
+
+  <li>
+  Improved: MappingQ now uses the points of the Gauss-Lobatto quadrature
+  formula as support points instead of equispaced ones. This allows its use
+  for high polynomial orders and also gives better interpolation of circular
+  boundaries. Beware that mappings of order three and higher will behave
+  slightly differently now (usually better).
+  <br>
+  (Martin Kronbichler, 2013/08/23)
+  </li>
+
+  <li>
+  Improved: Several .cc files in the deal.II directory have been split in
+  order to better utilize multiple processors when compiling in parallel and
+  reduce memory requirements of the compilation stage.
+  <br>
+  (Martin Kronbichler, 2013/08/22)
+  </li>
+
+  <li>
+  Fixed: The ParameterHandler::declare_entry() did not check that the
+  default value of a parameter indeed satisfies the pattern given for this
+  parameter (despite a statement in the documentation that this checking
+  would happen). This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2013/08/21)
+  </li>
+
+  <li>
+  New: Patterns::List and Patterns::Map now accept a string
+  different than the default comma that denotes the separator
+  between entries of the list or map.
+  <br>
+  (Wolfgang Bangerth, 2013/08/21)
+  </li>
+
+  <li>
+  Fixed: Some operations in the MappingQ class are now done in higher
+  precision arithmetic to mitigate the ill-conditioning that appears
+  when using mappings of high order (say, order 6 or 8 or 10).
+  <br>
+  (Juan Carlos Araujo Cabarcas, 2013/08/20)
+  </li>
+
+  <li>
+  Fixed: The SLEPcWrappers classes could not be compiled for 64-bit
+  indices. This is now fixed.
+  <br>
+  (Denis Davydov, Wolfgang Bangerth, 2013/08/20)
+  </li>
+
+  <li>
+  Fixed: SolutionTransfer used to crash whenever one transfered in the hp
+  context between cells that use FE_Nothing and FE_Q. This is now fixed.
+  <br>
+  (Krzyszof Bzowski, Wolfgang Bangerth, 2013/08/18)
+  </li>
+
+  <li>
+  Fixed: Under some circumstances (see http://code.google.com/p/dealii/issues/detail?id=82)
+  the DoFTools::make_periodicity_constraints() function could create cycles in
+  the ConstraintMatrix object. This is now fixed.
+  <br>
+  (David Emerson, Wolfgang Bangerth, 2013/08/16)
+  </li>
+
+  <li>
+  New: There is now a function ConstraintMatrix::are_identity_constrained().
+  <br>
+  (Wolfgang Bangerth, 2013/08/16)
+  </li>
+
+  <li>
+  New: TableHandler::write_text() now also supports output in
+  org-mode (http://orgmode.org/) format via a new entry in the
+  TableHandler::TextOutputFormat enumeration.
+  <br>
+  (Oleh Krehel, 2013/08/15)
+  </li>
+
+  <li>
+  New: There are now global functions <code>scalar_product</code>
+  that compute the scalar product (double contraction) between
+  tensors of rank 2.
+  <br>
+  (Scott Miller, 2013/08/14)
+  </li>
+
+  <li>
+  Fixed: Creating objects of type MappingQ was previously only possible
+  for low order polynomials. For orders higher than around 6, one ran
+  into assertions that tested for internal consistency. These assertions
+  have now been appropriately relaxes for the growth of round-off errors
+  with growing polynomial degrees.
+  <br>
+  (Juan Carlos Araujo Cabarcas, Wolfgang Bangerth, 2013/08/14)
+  </li>
+
+  <li>
+  New: MappingQEulerian is now also instantiated for vector elements
+  of type TrilinosWrappers::Vector as well as the MPI and block
+  variants.
+  <br>
+  (Armin Ghajar Jazi, 2013/08/14)
+  </li>
+
+  <li>
+  Fixed: The FiniteElement::face_to_cell_index() function had a bug
+  that made it work incorrectly for elements that have more than one
+  degree of freedom per line (in 2d) or per quad (in 3d). This is now
+  fixed for the most common cases, namely the FE_Q elements as well
+  as elements composed of FESystem elements. For all other cases, an
+  exception is generated reporting that this case is not implemented.
+  If you run into this, let us know.
+  <br>
+  (Wolfgang Bangerth, 2013/08/10)
+  </li>
+
+  <li>
+  New: DataOutBase::VtkFlags now has a flag
+  DataOutBase::VtkFlags::print_date_and_time that can be used to suppress output
+  of date and time in output files. This is useful in test suites where a newer
+  run at a different time produces differences against previously stored files,
+  even though the actual data is exactly the same.
+  <br>
+  (Oleh Krehel, 2013/08/06)
+  </li>
+
+  <li>
+  Fixed: The various block matrix classes are all derived from BlockMatrixBase
+  which had race conditions when the set() or add() functions were called from
+  different threads. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2013/08/05)
+  </li>
+
+  <li>
+  Fixed: various fixes with assignment and reinit of PETScWrappers::MPI::Vector.
+  <br>
+  (Timo Heister, 2013/08/05)
+  </li>
+
+  <li>Fixed: An assertion wrongly triggered in
+  DoFTools::make_hanging_node_constraints when used with a particular
+  combination of FESystem elements containing FE_Nothing. This is now fixed.
+  <br>
+  (Denis Davydov, Wolfgang Bangerth, 2013/08/01)
+  </li>
+
+  <li>
+  New: Add has_ghost_elements() for PETScWrappers::MPI::BlockVector and
+  TrilinosWrappers::MPI::BlockVector.
+  <br>
+  (Timo Heister, 2013/08/01)
+  </li>
+
+  <li>
+  SparsityTools::distribute_sparsity_pattern did not work correctly for
+  block systems, this has been fixed (function has a different signature).
+  <br>
+  (Timo Heister, 2013/07/31)
+  </li>
+
+  <li>Fixed: When typing <code>make run</code> in the step-32 directory,
+  the program was executed with <code>mpirun -np 2 ./step-32</code>. This
+  assumes that a program <code>mpirun</code> exists, but also does that
+  deal.II was in fact compiled with MPI support on. Neither was intended.
+  This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2013/07/24)
+  </li>
+
+  <li>New: The DataOut, DataOutFaces, and DataOutRotation classes now allow
+  the output of data vectors using different DoFHandler objects (based on the
+  same triangulation), by new functions add_data_vector. This is used in the
+  step-31 tutorial program which avoids creating a joint DoFHandler just for
+  output.
+  <br>
+  (Martin Kronbichler, 2013/07/24)
+  </li>
+
+  <li>Changed: GridGenerator used to be a class with only static members
+  but is now a namespace, like all other similar constructs in deal.II.
+  <br>
+  (Wolfgang Bangerth, 2013/07/24)
+  </li>
+
+  <li>Changed: In GridGenerator, several functions had erroneously been changed
+  to take an argument of type <code>size_type</code> rather than <code>unsigned
+  int</code>. <code>GridGenerator::size_type</code> was a typedef to
+  types::global_dof_index, which for most users was <code>unsigned int</code>
+  anyway, but could also be set to be a 64-bit integer type. In any case, the
+  change has been reverted and these functions take just a regular
+  <code>unsigned int</code> again.
+  <br>
+  (Wolfgang Bangerth, 2013/07/24)
+  </li>
+</ol>
+
+
+*/
diff --git a/doc/news/8.1.0-vs-8.2.0.h b/doc/news/8.1.0-vs-8.2.0.h
new file mode 100644
index 0000000..8cc6872
--- /dev/null
+++ b/doc/news/8.1.0-vs-8.2.0.h
@@ -0,0 +1,1254 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2014, 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ at page changes_between_8_1_and_8_2 Changes between Version 8.1 and 8.2
+
+<p>
+This is the list of changes made between the release of deal.II version
+8.1.0 and that of 8.2.0. All entries are signed with the names of the
+authors.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+  <li> Removed: The base class to the iterative linear solvers, Solver,
+  received a SolverControl object upon construction and had a member
+  function <code>control()</code> that returned a reference to the
+  object previously passed in. The class now no longer stores such
+  a reference, and consequently, the function has been removed.
+  <br>
+  (Wolfgang Bangerth, 2014/10/24)
+  </li>
+
+  <li> Removed: The constructor of the Utilities::MPI::MPI_InitFinalize
+  class used to interpret a last argument equal to numbers::invalid_unsigned_int
+  as @"<i>create as many threads as there are processor cores on the current
+  system</i>@" for each MPI process. If there were multiple MPI processes on a
+  given node, this would lead to (sometimes massive) overallocation of resources
+  because <i>every</i> MPI process would create as many threads as there are cores.
+  This has now been changed: an argument equal to numbers::invalid_unsigned_int
+  is now interpreted as @"<i>subdivide the available cores between all MPI
+  processes running on the current system and let each process create as many
+  threads as cores were allocated to it</i>@".
+  <br>
+  (Wolfgang Bangerth, 2014/09/16)
+  </li>
+
+  <li> Removed: CMake's configure phase no longer calls back into testsuite
+  subprojects. If a build directory is reconfigured the testsuite has to be
+  regenerated by hand using <code>make regen_tests</code>.
+  <br>
+  (Matthias Maier, 2014/07/16)
+  </li>
+
+  <li> Removed: Class PointerMatrixBase (and, consequently, the various
+  classes derived from it) had comparison operators that were intended to
+  work generically for any kind of derived class. However, the implementation
+  used a scheme that was not robust enough to handle the various situations
+  that derived classes implemented and, consequently, was not always correct.
+  These operators were not previously used inside the library and, likely,
+  were not widely used in applications either. They have now been removed.
+  <br>
+  (Wolfgang Bangerth, 2014/02/15)
+  </li>
+
+  <li> The change from functionparser to muparser introduced a small number of
+  incompatibilities: units, use_degress, and recursion with 'eval' are no
+  longer supported. Comparing for equality is done using '==' instead of '='.
+  <br>
+  (Timo Heister, 2014/02/10)
+  </li>
+
+  <li> Changed: The various classes generating graphical output, such
+  as DataOut or DataOutStack, are all derived from a common interface
+  class DataOutInterface which, in turn was derived from DataOutBase
+  through <i>private</i> inheritance. Because we frequently also
+  access the (public) members of this private base class this has tripped
+  up most every compiler we know of at one point or another. Furthermore,
+  because DataOutBase was a class that only defined static member functions
+  and had not member variables, there was really no reason for this
+  construct.
+  <br>
+  For these reasons, DataOutBase is now just a regular namespace and the
+  inheritance is gone. For the most part, this should not lead to any
+  incompatibilities except in cases where you accessed members of
+  DataOutBase through their derived classes. For example, it was possible
+  to write DataOut::Patch even though the
+  Patch class is actually declared in DataOutBase. Since
+  the inheritance is now gone, this is no longer possible and one
+  actually has to write DataOutBase::Patch instead. Using this form
+  turns out to be compatible also with older versions of deal.II.
+  <br>
+  (Wolfgang Bangerth, 2014/02/01)
+  </li>
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+
+<ol>
+  <li> New: Several parts of the library have been adjusted to allow
+  compilation with Microsoft Visual Studio. Unfortunately, not all
+  of the library can be compiled yet.
+  <br>
+  (Lukas Korous, 2014/12/03)
+  </li>
+
+  <li> New: Added support for OpenCASCADE (www.opencascade.org)
+  library, which allows the use of arbitrary IGES files to describe
+  the boundary of a mesh. The use of the new classes is described in
+  the new Step-54.
+  <br>
+  (Luca Heltai, Andrea Mola, 2014/10/31)
+  </li>
+
+  <li> New: The classes implementing iterative solvers have gained
+  a mechanism by which it is possible to observe the progress of
+  the iterations, or to influence when to stop the iteration. The
+  documentation of the Solver class now contains an extended
+  discussion and example of this functionality.
+  <br>
+  (Wolfgang Bangerth, 2014/10/24)
+  </li>
+
+  <li> New: There is now a section in the introduction of step-36 that
+  discusses the interaction of Dirichlet boundary values and the solution
+  of eigenvalue problems.
+  <br>
+  (Denis Davydov, Wolfgang Bangerth, 2014/09/28)
+  </li>
+
+  <li> New: The glossary now contains an extensive entry on ghosted and
+  non-ghosted vectors (see @ref GlossGhostedVector ).
+  <br>
+  (Wolfgang Bangerth, 2014/09/27)
+  </li>
+
+  <li> New: MappingQ is now aware of
+  Manifold, i.e., we can use high order mappings that
+  actually follow the geometry also on the interior of codimension
+  zero meshes.
+  <br>
+  (Luca Heltai, 2014/09/13)
+  </li>
+
+  <li> New: The new tutorial program step-52 explains how to use the
+  new time stepping methods.
+  <br>
+  (Bruno Turcksin, Damien Lebrun-Grandie, 2014/09/12)
+  </li>
+
+  <li> New: The new tutorial program step-53 explains how to deal with
+  complicated geometries.
+  <br>
+  (Wolfgang Bangerth, Luca Heltai, 2014/09/02)
+  </li>
+
+  <li> Changed: Namespace std_cxx1x has been renamed to namespace
+  std_cxx11 to match the fact that the corresponding C++ standard
+  was approved back in 2011. The old namespace name was retained for
+  backward compatibility but is now deprecated.
+  <br>
+  (Wolfgang Bangerth, 2014/09/01)
+  </li>
+
+  <li> New: Most of the operations done on Vector (like add, sadd, etc.)
+  are now vectorized (SIMD) using OpenMP 4.0.
+  <br>
+  (Bruno Turcksin, 2014/08/25)
+  </li>
+
+  <li> Updated: The version of BOOST that comes bundled with deal.II has
+  been updated to 1.56.0.
+  <br>
+  (Wolfgang Bangerth, 2014/08/19)
+  </li>
+
+  <li> Fixed: Newer versions of GCC (e.g. 4.9.x) are no longer compatible
+  with BOOST 1.46. Consequently, the CMake scripts now require at least
+  BOOST 1.48 in order to use a BOOST version found on the system. If no
+  installed BOOST library is found, or if the version is older than 1.48,
+  CMake will simply take the one that comes bundled with deal.II.
+  <br>
+  (Wolfgang Bangerth, 2014/08/19)
+  </li>
+
+  <li> New: There is now a documentation module that describes
+  deal.II's support for and interaction with the
+  @ref CPP11 "C++11 standard".
+  <br>
+  (Wolfgang Bangerth, 2014/08/14)
+  </li>
+
+  <li> New: Added support for curved interior cells for all Triangulation
+  dimensions.
+  <br>
+  A new Manifold<dim,spacedim> class was introduced which only contains the
+  interface needed by Triangulation to refine objects, leaving all boundary
+  related functions in the class Boundary<dim,spacedim>, which was made
+  derived from Manifold<dim,spacedim>.
+  <br>
+  This new construction allows for curved interior cells, and custom refinement
+  strategies (see, for example, the new tutorial program step-53).
+  At the moment the following Manifolds are supported:
+  <ul>
+  <li> FlatManifold<dim,spacedim>: This class replaces the old
+  StraightBoundary<dim,spacedim>, and it adds support for periodic
+  manifolds. This is the simplest class one can use to create new Manifold classes;
+  </li>
+  <li> ManifoldChart<dim,spacedim,chartdim>: This is one of the most general Manifold
+  one can think of. The user can overload the functions ManifoldChart::pull_back() and
+  ManifoldChart::push_forward(), to obtain a very general curved geometry, following
+  concepts typical of elasticity;
+  </li>
+  <li> SphericalManifold<dim,spacedim>: A simple implementation of spherical coordinates
+  transformations. This manifold allows hyper shells with curved interior cells which
+  follow the natural shape of the shell;
+  </li>
+  </ul>
+  <br>
+  The functions
+  Triangulation::set_boundary() and Triangulation::get_boundary() can still be used to
+  set and get Boundary objects instead of Manifold ones. For the get function, an exception
+  is thrown if a conversion to a valid Boundary class cannot be made on the fly.
+  <br>
+  (Luca Heltai, 2014/08/06)
+  </li>
+
+  <li> Ported: The build system now supports CMake 3.0.
+  <br>
+  (Matthias Maier, 2014/07/15)
+  </li>
+
+  <li> New: Added support for curved interior boundaries, and general
+  manifold id description.
+  <br>
+  A new attribute was added to each triangulation object
+  (types::manifold_id) which allows one to specify a manifold
+  description, much in the same way we do for boundaries, also for
+  interior edges and faces. At the moment this information is used
+  only for objects of topological dimension less than spacedim, but a
+  generalization of the class Boundary<dim,spacedim> is on its way to
+  allow also curved cells.
+  <br>
+  For the moment, full backward compatibility is granted, and one can
+  keep using the old Boundary<dim,spacedim>, together with
+  Triangulation::set_boundary(), and Triangulation::get_boundary()
+  methods. However, this change suggests to use the types::boundary_id associated
+  with each boundary face to indicate only boundary condition types, and
+  it allows to separate the geometric meaning from the physical meaning. The
+  suggested usage is
+  - types::boundary_id is used for the physical meaning
+  - types::manifold_id is used for the geometric meaning.
+  <br>
+  (Luca Heltai, 2014/07/14)
+  </li>
+
+  <li> New: Tutorial programs (and a few other places) are now cross-linked
+  to their corresponding video lectures.
+  <br>
+  (Wolfgang Bangerth, 2014/07/10)
+  </li>
+
+  <li> Changed: The functionparser library bundled with deal.II got replaced
+  by the <a href="http://muparser.beltoforion.de/">muparser</a> library.
+  <br>
+  (Timo Heister, 2014/02/10)
+  </li>
+
+  <li> Changed: It was possible to call DoFCellAccessor::set_active_fe_index()
+  on non-active cells. However, this made no sense: Since degrees of
+  freedoms only exist on active cells
+  for hp::DoFHandler (i.e., there is currently no implementation
+  of multilevel hp::DoFHandler objects), it does not make sense
+  to assign active FE indices to non-active cells since they
+  do not have finite element spaces associated with them without
+  having any degrees of freedom.
+  <br>
+  The same of course is true for asking for the finite element active
+  on a non-active cell, i.e. using the functions
+  DoFAccessor::active_fe_index() and
+  DoFAccessor::get_fe(). All of these functions now produce exceptions on
+  non-active cells.
+  <br>
+  (Wolfgang Bangerth, 2014/01/24)
+  </li>
+
+  <li> New: deal.II now links with the
+  <a href="http://www.boost.org/doc/libs/1_55_0/libs/iostreams/doc/index.html">BOOST
+  Iostreams</a> library (at least if the libz and libbz2 libraries
+  can be found that are necessary for BOOST Iostreams).
+  Among many other things, this allows to easily
+  read files that have been compressed, as in the following code snippet:
+  @code
+    #include <boost/iostreams/filtering_stream.hpp>
+    #include <boost/iostreams/filter/gzip.hpp>
+    #include <boost/iostreams/device/file.hpp>
+
+    ...
+
+    boost::iostreams::filtering_istream in;
+    in.push(boost::iostreams::basic_gzip_decompressor<>());
+    in.push(boost::iostreams::file_source("myfile.gz"));
+
+    int i;
+    in >> i;
+  @endcode
+  More documentation on how to use BOOST Iostream can be found
+  in the documentation link referenced above.
+  <br>
+  (Wolfgang Bangerth, 2013/12/21)
+  </li>
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+  <li>Fixed: CMake now correctly detects -Wno-... support for gcc.
+  <br>
+  (Matthias Maier, 2014/12/11)
+  </li>
+
+  <li> New: The FE_Q and FE_Q_Hierarchical classes now include pictures
+  of their 2d shape functions in the class documentation.
+  <br>
+  (Manuel Quezada de Luna, Wolfgang Bangerth, 2014/12/09)
+  </li>
+
+  <li> Fixed: Calling DoFRenumbering::Cuthill_McKee threw an exception
+  whenever one of the processors of a parallel triangulation did not own
+  any degrees of freedom.
+  <br>
+  (Michal Wichrowski, Wolfgang Bangerth, 2014/12/04)
+  </li>
+
+  <li> Fixed: Serializing an object of type DoFHandler did not work without
+  including additional header files.
+  <br>
+  (David Wells, Wolfgang Bangerth, 2014/11/26)
+  </li>
+
+  <li> New: The class FEEvaluation with its fast tensor evaluation routines
+  can now be initialized from a mapping, a finite element, a quadrature, and
+  update flags on the fly similar to FEValues. This provides an alternative
+  entry point to these routines without the need to create a MatrixFree data
+  object.
+  <br>
+  (Martin Kronbichler, 2014/11/20)
+  </li>
+
+  <li> New: Complete the interface for periodic boundary conditions.
+  Now, it is possible to specify an additional (geometric) rotation matrix along
+  with vector components that should be rotated when applying periodic boundary
+  conditions (or alternatively, directly a transformation matrix working
+  directly on dofs). With this it is now possible to e.g. compute Stokes flow
+  on a quarter pie with periodic boundary conditions and the velocity correctly
+  transformed.
+  <br>
+  (Daniel Arndt, Matthias Maier, 2014/11/19)
+  </li>
+
+  <li> Improved: Vector updates in PreconditionChebyshev based on deal.II's own
+  vectors are now vectorized with OpenMP 4.0 SIMD pragmas.
+  <br>
+  (Martin Kronbichler, Bruno Turcksin, 2014/11/17)
+  </li>
+
+  <li> New: Added support for STEP files in the OpenCASCADE
+  wrappers. STEP files can now be imported and exported respectively
+  with the OpenCASCADE::read_STEP() and OpenCASCADE::write_STEP() functions.
+  <br>
+  (Andrea Mola, 2014/11/12)
+  </li>
+
+  <li> New: Timer now has get_lap_time() which returns the time
+  lapsed between the last start()/stop() cycle.
+  <br>
+  (Fahad Alrashed, 2014/11/09)
+  </li>
+
+  <li> New: GridTools::get_locally_owned_vertices() allows to query
+  which vertices of a triangulation are owned by the current
+  processor.
+  <br>
+  (Wolfgang Bangerth, 2014/11/09)
+  </li>
+
+  <li> New: parallel::distributed::Triangulation::communicate_locally_moved_vertices()
+  allows to
+  update vertex positions that have been moved just locally on distributed
+  meshes. GridTools::distort_random now works for distributed meshes and
+  hanging nodes in 3D as well.
+  <br>
+  (Daniel Arndt, 2014/11/06)
+  </li>
+
+  <li> New: TableHandler objects can be cleared - i.e. reset to a
+  zero-sized state.
+  <br>
+  (Fahad Alrashed, 2014/11/09)
+  </li>
+
+  <li> New: The FE_Nothing class now has a second template argument
+  corresponding to the space dimension in which the mesh is embedded,
+  like many other classes. This allows to use this element in codimension
+  one cases as well now.
+  <br>
+  (Wolfgang Bangerth, 2014/11/05)
+  </li>
+
+  <li> Fixed: Using the FEEvaluation framework did not work for
+  scalar elements in 1d because there were conflicting partial
+  specializations. This is now fixed.
+  <br>
+  (Shiva Rudraraju, 2014/11/04)
+  </li>
+
+  <li> New: There is now a macro <code>DEAL_II_VERSION_GTE</code>
+  that can be used to test whether the deal.II version is greater
+  than or equal a particular version number. This is useful if you
+  need to make application programs compatible with different
+  deal.II releases.
+  <br>
+  (Wolfgang Bangerth, 2014/10/31)
+  </li>
+
+  <li> New: The vector classes in deal.II (including Trilinos and PETSc
+  wrappers) now have a new method x.add_and_dot(factor,v,w) which performs
+  x.add(factor,v) and subsequent inner product of x with another vector
+  w. This operation occurs in some iterative solvers; by a combined operation,
+  reduced memory transfer and thus higher performance are enabled.
+  <br>
+  (Martin Kronbichler, 2014/10/27)
+  </li>
+
+  <li> Improved: Inner products and norms on deal.II's own vector classes now
+  use vectorization through VectorizedArray if available.
+  <br>
+  (Martin Kronbichler, 2014/10/27)
+  </li>
+
+  <li> Changed: PETSc and Trilinos vectors with ghost entries can now be reset to zero
+  using = 0.0;
+  <br>
+  (Timo Heister, 2014/10/14)
+  </li>
+
+  <li> New: The new function FiniteElement::get_associated_geometry_primitive() allows to
+  query whether a given degree of freedom is associated with a vertex, line,
+  quad, or hex.
+  <br>
+  (Wolfgang Bangerth, 2014/09/26)
+  </li>
+
+  <li> Fixed: The vector and array versions of Utilities::MPI::sum() and
+  Utilities::MPI::max() produced segmentation faults with some MPI implementations
+  if the input and output arguments were the same. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2014/09/29)
+  </li>
+
+  <li> Fixed: Trying to have FE_Q(p) and FE_DGQ(r) elements next to each
+  other in an hp::DoFHandler object led to assertions saying that these two
+  elements don't know how to compute interface constraints where such
+  elements touch. This has now been fixed: FE_DGQ is a discontinuous element,
+  so there cannot be any interface constraints at all.
+  <br>
+  (Wolfgang Bangerth, 2014/09/26)
+  </li>
+
+  <li> Fixed: The function TrilinosWrappers::VectorBase::sadd(double factor,
+  VectorBase &v) erroneously added factor*v instead of scaling the calling
+  vector by factor. This is now fixed.
+  <br>
+  (Martin Kronbichler, 2014/09/26)
+  </li>
+
+  <li> Fixed: The function TrilinosWrappers::SparseMatrix::add(double factor,
+  SparseMatrix &rhs) produced wrong results and ran into an exception if the
+  rhs matrix included off-processor column entries. This is now fixed.
+  <br>
+  (Martin Kronbichler, 2014/09/21)
+  </li>
+
+  <li> New: The function Threads::Task::joinable() can be used to verify whether
+  a task object can be joined or not.
+  <br>
+  (Wolfgang Bangerth, 2014/09/17)
+  </li>
+
+  <li> New: Added two optional parameters to TriaAccessor::center()
+  and a new method TriaAccessor::intermediate_point().
+  They allow to query for a
+  geometrically coherent center, or ask for arbitrary points on the
+  underlying Manifold, given the dim coordinates in the reference
+  element.
+  Triangulation was refactored internally to use the new
+  TriaAccessor::center() interface when querying for new points.
+  <br>
+  (Luca Heltai, 2014/09/13)
+  </li>
+
+  <li> Improved: Optimize construction of high-order FE_Nedelec by moving out some
+  non-essential computations. Namely, construct restriction and prolongation
+  matrices on first request. This reduces time spent in FE_Nedelec constructor
+  substantially.
+  <br>
+  (Alexander Grayver, 2014/08/22)
+  </li>
+
+  <li> New: There is now a GridGenerator::flatten_triangulation()
+  taking a Triangulation<dim, spacedim_1> as input and returning
+  a Triangulation<dim, spacedim_2> as output. The output
+  triangulation will contain a single level with all active
+  cells of the input triangulation, and will be topologically
+  equivalent to the input triangulation. If the two space dimensions
+  are equal, then this function will copy the triangulation
+  removing all levels, e.g., flattening it. If the two space dimensions
+  are different, then this function will copy the vertices only
+  up to the smallest space dimension parameter. <br>
+  Using this function, you can create a Triangulation<2,3> from
+  a Triangulation<2,2> or project to the plane z=0 your
+  Triangulation<2,3>. No checks are performed on the validity of
+  the resulting Triangulation.
+  <br>
+  (Luca Heltai, 2014/08/19)
+  </li>
+
+  <li> Changed: The functions GridTools::extract_boundary_mesh() and
+  GridTools::create_union_triangulation() have been moved to
+  GridGenerator::extract_boundary_mesh() and
+  GridGenerator::create_union_triangulation() since, conceptually, they
+  generate meshes. The old functions have been retained but are now
+  deprecated.
+  <br>
+  (Wolfgang Bangerth, 2014/08/19)
+  </li>
+
+  <li> New: TriaAccessor::measure() is now also implemented for faces of
+  3d cells as long as the face is planar.
+  <br>
+  (Kevin Drzycimski, 2014/08/19)
+  </li>
+
+  <li> Fixed: Support SLEPc 3.5 by disabling SDFOLD spectrum transformation type
+  that has been removed from SLEPc. Therefore, TransformationSpectrumFolding
+  cannot be used with newer SLEPc versions.
+  <br>
+  (Alexander Grayver, 2014/08/15)
+  </li>
+
+  <li> New: To better support applications that want to use C++11's
+  <a href="http://en.wikipedia.org/wiki/C%2B%2B11#Range-based_for_loop">range-based
+  for loops</a>, there are now functions Triangulation::cell_iterators(),
+  Triangulation::all_cell_iterators() and similarly in classes DoFHandler
+  and hp::DoFHandler
+  that return a range object that can then be used in range-based for loops.
+  The underlying implementation uses the new IteratorRange class.
+  <br>
+  See the new @ref CPP11 "C++11" page for more information.
+  <br>
+  (Wolfgang Bangerth, 2014/08/07)
+  </li>
+
+  <li> New: The new class FunctionManifold allows arbitrary manifold descriptions, in which you have
+  an explicit Function<chartdim> for the ManifoldChart::push_forward()
+  method, and an explicit Function<spacedim> for ManifoldChart::pull_back()
+  method (or an expression for both). In these cases, you can construct a
+  Manifold description on the fly.
+  <br>
+  (Luca Heltai, 2014/08/07)
+  </li>
+
+  <li> New: Added CylindricalManifold description.
+  <br>
+  This class allows refinement of cylindrical manifolds. It is a good
+  companion for GridGenerator::cylinder() and the perfect companion
+  for GridGenerator::cylinder_shell().
+  <br>
+  (Luca Heltai, 2014/08/06)
+  </li>
+
+  <li> New: TrilinosWrappers::PreconditionAMG can now be initialized from an
+  object of type Epetra_RowMatrix, which allows using it with more arbitrary
+  matrix objects, including matrix-free methods.
+  <br>
+  (Martin Kronbichler, 2014/08/06)
+  </li>
+
+  <li> Fixed: The FE_Nedelec element computed face interpolation matrices
+  wrongly for elements of order p>1. This also led to trouble computing
+  hanging node constraints in the context of hp adaptivity. This is now fixed.
+  <br>
+  (Alexander Grayver, 2014/08/05)
+  </li>
+
+  <li> New: The function GridTools::get_patch_around_cell() extracts
+  the set of cells that surround a single cell. The new functions
+  DoFTools::count_dofs_on_patch() and DoFTools::get_dofs_on_patch()
+  then help set up local problem on patches.
+  <br>
+  (Arezou Ghesmati, Wolfgang Bangerth, 2014/07/29)
+  </li>
+
+  <li> Fixed: Utilities::string_to_int() and
+  Utilities::string_to_double() did not catch if the
+  string given started with an integer or double but contained additional
+  text. They now throw an exception if this happens.
+  <br>
+  (Wolfgang Bangerth, 2014/07/20)
+  </li>
+
+  <li> New: The function GridOut::write can now be used also in
+  the codimension one case.
+  <br>
+  (Luca Heltai, 2014/07/18)
+  </li>
+
+  <li> New: The Function classes now take an additional optional
+  template argument, specifying the type of number to use, which defaults
+  to double.
+  <br>
+  (Luca Heltai, 2014/07/18)
+  </li>
+
+  <li> New: The GridReordering::reorder_cells() function used a
+  numbering format for the vertices in a cell that was last used in
+  deal.II version 5.2. This format is still used internally, but
+  the function now also understands the numbering that has been
+  used in deal.II ever since. The choice is made by an additional
+  argument to the function that defaults to the old-style
+  format for backward compatibility.
+  <br>
+  (Wolfgang Bangerth, 2014/07/14)
+  </li>
+
+  <li> New: There are now functions GridOut::write_vtk() and
+   GridOut::write_vtu() that can
+  write a mesh in VTK/VTU format.
+  <br>
+  (Wolfgang Bangerth, 2014/07/14)
+  </li>
+
+  <li> Fixed: PETSc up to at least version 3.5 has a bug where it does
+  not zero-initialize the ghost elements of a newly created ghosted
+  parallel vector. This is now worked around inside deal.II.
+  <br>
+  (Wolfgang Bangerth, Michal Wichrowski, 2014/07/12)
+  </li>
+
+  <li> Improved: The Trilinos direct solver, TrilinosWrappers::SolverDirect,
+  now takes a string to select among the available Amesos solvers. Moreover,
+  the solver now also supports deal.II's distributed vectors.
+  <br>
+  (Uwe Köcher, Martin Kronbichler, 2014/07/09)
+  </li>
+
+  <li> New: There are now three new preconditioner classes
+  TrilinosWrappers::PreconditionBlockJacobi,
+  TrilinosWrappers::PreconditionBlockSSOR, and
+  TrilinosWrappers::PreconditionBlockSOR that work on small dense blocks of
+  the global matrix instead of the point-wise relaxation methods in
+  TrilinosWrappers::Precondition{Jacobi,SSOR,SOR} that work on each row
+  separately.
+  <br>
+  (Martin Kronbichler, 2014/07/04)
+  </li>
+
+  <li> Fixed: Some versions of DoFTools::extract_boundary_dofs() were
+  not instantiated for some combinations of arguments. This could lead
+  to missing symbol errors during linking of applications on some
+  platforms and with some compiler versions. This is now
+  fixed.
+  <br>
+  (Wolfgang Bangerth, 2014/07/04)
+  </li>
+
+  <li> Fixed: GridGenerator::merge_triangulations sometimes produced invalid
+  orientations of faces. This is now fixed.
+  <br>
+  (Daniel Arndt, 2014/06/30)
+  </li>
+
+  <li> Fixed: TrilinosWrappers::PreconditionAMG did not read user-provided
+  constant modes (aka null space) when the null space dimension is one but not
+  just the trivial one vector. This is now fixed.
+  <br>
+  (Martin Kronbichler, 2014/06/30)
+  </li>
+
+  <li> Simplified interfaces for FEEvaluation: Previously, the user had to
+  select the appropriate kernel (FEEvaluation, FEEvaluationGeneral,
+  FEEvaluationDGP, FEEvaluationGL) for the matrix-free evaluation
+  routines. This made it difficult to write compact code that needs to select
+  between different elements. Therefore, all the functionality has been merged
+  into FEEvaluation and one should only use FEEvaluation, while the other
+  interfaces have been marked deprecated and will be removed in a future
+  version. The internal data structures for the various special cases have
+  been kept in order to provide for the most efficient routines, and are
+  selected at construction of FEEvaluation.
+  <br>
+  (Martin Kronbichler, 2014/06/27)
+  </li>
+
+  <li> Bugfix: TBB specific include directories have to be added to the
+  list of user include directories because of direct inclusion of header
+  files in base/thread_local_storage.h.
+  <br>
+  (Matthias Maier, Kainan Wang, 2014/06/25)
+  </li>
+
+  <li> Improved: GridGenerator::hyper_cube() accepts a
+  <code>colorize</code> argument.
+  <br>
+  (Guido Kanschat, 2014/06/23)
+  </li>
+
+  <li> New: %Functions DoFTools::extract_locally_relevant_dofs(),
+  parallel::distributed::GridRefinement::refine_and_coarsen_fixed_number() and
+  parallel::distributed::GridRefinement::refine_and_coarsen_fixed_fraction() are
+  now also instantiated for objects of codimension 1.
+  <br>
+  (Michal Wichrowski, 2014/06/15)
+  </li>
+
+  <li> New: There are now functions Triangulation::has_hanging_nodes()
+  and parallel::distributed::Triangulation::has_hanging_nodes().
+  <br>
+  (Denis Davydov, 2014/06/15)
+  </li>
+
+  <li> New: There is now function Triangulation::n_global_active_cells().
+  <br>
+  (Denis Davydov, 2014/06/19)
+  </li>
+
+  <li> New: The class FEEvaluation now provides optimized matrix-free
+  evaluation routines for FE_Q_DG0 elements.
+  <br>
+  (Martin Kronbichler, 2014/06/13)
+  </li>
+
+  <li> Bugfix: Filter libclang_rt* from the PETSc link line.
+  <br>
+  (Matthias Maier, 2014/06/04)
+  </li>
+
+  <li> Improved: ParameterHandler parsing added to DoFOutputOperator
+  <br>
+  (Guido Kanschat, 2014/06/03)
+  </li>
+
+  <li> Improved: Algorithms::ThetaTimestepping and other operators now use AnyData
+  to communicate with operators they rely on. This way, time step data
+  can be forwarded using the same mechanism, and no complicated back
+  access to member data is required
+  anymore. MeshWorker::IntegrationInfo uses AnyData as well.
+  <br>
+  (Guido Kanschat, 2014/06/03)
+  </li>
+
+  <li> Bugfix: CMake: Also clean CMAKE_MODULE_PATH prior to call to
+  FIND_PACKAGE(Boost) inside FindBOOST.cmake because apparently
+  "Boost" == "BOOST" for the Mac file system...
+  <br>
+  (Matthias Maier, 2014/05/28)
+  </li>
+
+  <li> Improved: CMake: Search results and error conditions for external
+  libraries are now much more verbose. Added an MPI sanity check.
+  <br>
+  (Matthias Maier, 2014/05/28)
+  </li>
+
+  <li> Fixed: The MatrixTools::apply_boundary_values() variant that works
+  on PETSc matrices could produce a deadlock in parallel if one processor
+  had no boundary values to apply. This is now fixed.
+  <br>
+  (Michal Wichrowski, 2014/05/19)
+  </li>
+
+  <li> New: AnyData::try_read() is a function that allows users to check
+  whether an entry exists and get a pointer to it without throwing an
+  exception in case of failure.
+  <br>
+  (Guido Kanschat, 2014/05/16)
+  </li>
+
+  <li> New: The GMRES solver of deal.II can now write an estimate of
+  eigenvalues to the log file, in analogy to the CG solver. This is enabled
+  by the flag SolverGMRES::AdditionalData::compute_eigenvalues.
+  <br>
+  (Martin Kronbichler, 2014/05/11)
+  </li>
+
+  <li> New: The GridIn::read_vtk() function places fewer restrictions
+  on the VTK files it wants to read and should, consequently, be able
+  to read more correctly formatted VTK files than before.
+  <br>
+  (Giorgos Kourakos, 2014/05/08)
+  </li>
+
+  <li> New: There is now a QSorted quadrature which takes an
+  arbitrary quadrature at construction time and reorders the quadrature
+  points according to the weights, from smaller to bigger. This should
+  improve stability of higher order polynomial integration.
+  <br>
+  (Luca Heltai, 2014/05/07)
+  </li>
+
+  <li> New: The class VectorizedArray<Number> now provides methods
+  VectorizedArray::load(ptr) to read from arbitrary pointer addresses and
+  VectorizedArray::store(ptr) to write to arbitrary pointer addresses,
+  as opposed to the data layout of VectorizedArray that requires pointers
+  to be aligned by the size of the array in bytes. This also circumvents
+  a (rare) compiler optimization bug with gcc-4.6 on SSE code in combination
+  with function calls, e.g. to std::sin.
+  <br>
+  (Martin Kronbichler, 2014/05/05)
+  </li>
+
+  <li> Changed: Namespace SparsityTools had a local typedef <code>size_type</code>
+  that was set equal to types::global_dof_index. This typedef has been removed
+  and we now use SparsityPattern::size_type wherever applicable as this is the
+  type we really want to use. (The code worked previously because
+  types::global_dof_index and SparsityPattern::size_type happen to be the same
+  as far as the underlying type is concerned; however, they are different
+  semantically.)
+  <br>
+  (Wolfgang Bangerth, 2014/05/04)
+  </li>
+
+  <li> Updated: The step-16 tutorial program was updated to the new layout
+  multigrid objects and thus avoids using deprecated interfaces.
+  <br>
+  (Martin Kronbichler, 2014/05/03)
+  </li>
+
+  <li> Fixed: FE_DGQArbitraryNodes::has_support_on_face was broken when
+  polynomials with support points not on the element boundary were used.
+  This is now fixed.
+  <br>
+  (Martin Kronbichler, 2014/04/30)
+  </li>
+
+  <li> Fixed: parallel::distributed::Triangulation::load now has an
+  additional parameter <code>autopartition</code> to control p4est's behavior of
+  rebalancing triangulations between MPI nodes upon reading. It is
+  particularly useful to disable this behavior when data is stored
+  separately (examples for this are in the regression tests mpi/p4est_save_0?).
+  <br>
+  (Alexander Grayver, Matthias Maier, 2014/04/26)
+  </li>
+
+  <li> Fixed: GridTools::find_active_cell_around_point() could get into an infinite
+  loop if the point we are looking for is in fact not within the domain. This is now
+  fixed.
+  <br>
+  (Giorgos Kourakos, Timo Heister, Wolfgang Bangerth, 2014/04/14)
+  </li>
+
+  <li> Changed: TableBase now uses AlignedVector for storing data
+  instead of std::vector, which allows its use for VectorizedArray<Number>
+  data fields which require more alignment.
+  <br>
+  (Martin Kronbichler, 2014/04/09)
+  </li>
+
+  <li> Improved: Piola transformation for FE_BDM is now active.
+  <br>
+  (Guido Kanschat, 2014/04/09)
+  </li>
+
+  <li> Changed: operator< for cell iterators no longer looks at
+  (level-)subdomain ids but only compares level() and index(). This makes the
+  ordering inconsistent between processes on a
+  parallel::distributed::Triangulation, but fixes the problem that the
+  ordering of cells changes under mesh refinement or other causes for changing
+  the subdomain id.
+  <br>
+  (Timo Heister, 2014/04/08)
+  </li>
+
+  <li> New: GridTools::laplace_transform() now takes an additional, optional
+  parameter that indicates the "stiffness" of the mapping.
+  <br>
+  (Denis Davydov, Jean-Paul Pelteret, 2014/04/07)
+  </li>
+
+  <li> Fixed: DoFTools::extract_constant_modes now correctly identifies both
+  constant modes in the scalar element FE_Q_DG0, which has been realized by a
+  few modifications in how the constant modes propagate from the element to
+  the extract_constant_modes() function.
+  <br>
+  (Martin Kronbichler, 2014/04/04)
+  </li>
+
+  <li> Fixed: GridTools::laplace_transform had previously announced in
+  the documentation that one can also set the location of interior points,
+  but this was not in fact what was implemented. This has now been fixed:
+  the code can now do that.
+  <br>
+  (Denis Davydov, Wolfgang Bangerth, 2014/03/23)
+  </li>
+
+  <li> Improved: Inhomogeneous tangential and normal flow constraints can
+       now be treated via VectorTools::compute_nonzero_normal_flux_constraints
+       and VectorTools::compute_nonzero_tangential_flux_constraints.
+  <br>
+  (Daniel Arndt, 2014/03/16)
+  </li>
+
+  <li> Changed: Class TriaAccessor had a function parent_index(), but this function
+  could only work for cell accessors. The function has consequently been moved
+  to class CellAccessor.
+  <br>
+  (Wolfgang Bangerth, 2014/03/15)
+  </li>
+
+  <li> Fixed: step-32 had a piece of code where we accessed an internal
+  representation of how Trilinos vectors are actually stored. This is poor
+  style and has been rewritten.
+  <br>
+  (Wolfgang Bangerth, 2014/03/14)
+  </li>
+
+  <li> Fixed: VectorTools::project_boundary_values_curl_conforming contained
+  a bug for some cases. This is now fixed.
+  <br>
+  (Markus Bürg, 2014/03/10)
+  </li>
+
+  <li> Fixed: ParameterHandler will no longer output an error if the file
+  to be read ends with "end" without a newline.
+  <br>
+  (Timo Heister, 2014/02/28)
+  </li>
+
+  <li>Improved: DoFRenumbering::Cuthill_McKee can now run with distributed
+  triangulations with the renumbering only done within each processor's
+  subdomain.
+  <br>
+  (Martin Kronbichler, 2014/02/20)
+
+  <li>Fixed: There was an indexing error in GridIn::read_vtk() that triggered
+  for some input files. This is now fixed.
+  <br>
+  (Mayank Sabharwal, 2014/02/19)
+
+  <li>New: There is a new namespace TimeStepping for the algorithms that do time
+  integrations. In this new namespace, several Runge-Kutta methods have been
+  implemented: explicit methods, implicit methods, and embedded explicit methods.
+  <br>
+  (Damien Lebrun-Grandie, Bruno Turcksin, 2014/02/17)
+
+  <li>New: There is now a class FEEvaluationDGP that implements matrix-free
+  evaluation routines by truncated tensor products for FE_DGP elements.
+  <br>
+  (Martin Kronbichler, 2014/02/17)
+
+  <li>Changed: The InverseMatrixRichardson used to eat all exceptions
+  that may have been produced by the underlying Richardson solver, leaving
+  no trace that the underlying solver may have failed when you call functions
+  such as InverseMatrixRichardson::vmult(). These exceptions are now propagated
+  out to the caller.
+  <br>
+  (Wolfgang Bangerth, 2014/02/16)
+
+
+  <li>New: FE_TraceQ implements finite elements on faces, which
+  correspond to the traces of H<sup>1</sup>-conforming elements.
+  <br>
+  (Angela Klewinghaus, 2014/02/14)
+
+  <li>New: FE_FaceQ and FE_FaceP now also work in 1D (with a single dof
+  on each vertex).
+  <br>
+  (Martin Kronbichler, 2014/02/11)
+
+  <li>Fixed: FE_DGQ::has_support_on_face returned a wrong number for element
+  degree larger than 1 in 1D. This is now fixed.
+  <br>
+  (Martin Kronbichler, 2014/02/10)
+
+  <li>Changed: DerivativeApproximation used to be a class that only had
+  static members. It is now a namespace.
+  <br>
+  (Wolfgang Bangerth, 2014/02/08)
+
+  <li>New: Threads::ThreadLocalStorage::clear() clears out all objects allocated on the
+  current and all other threads.
+  <br>
+  (Wolfgang Bangerth, 2014/02/06)
+
+  <li>Fixed: A configuration error on Debian Testing where accidentally a
+  non-pic libSuiteSparse_config.a was picked up when building a shared
+  library up resulting in a link error.
+  <br>
+  (Matthias Maier, 2014/02/04)
+
+  <li> Changed: GridTools::transform() can now deal with meshes with hanging nodes.
+  <br>
+  (Timo Heister, 2014/02/04)
+  </li>
+
+  <li>Fixed: Calling FEValuesViews::Vector::get_function_curls() computed
+  wrong results in some cases (see https://code.google.com/p/dealii/issues/detail?id=182).
+  This is now fixed.
+  <br>
+  (Christoph Heiniger, Wolfgang Bangerth, 2014/02/03)
+
+  <li>Added: The class LAPACKFullMatrix now implements interfaces to
+  matrix-matrix multiplication. Also, LAPACKFullMatrix::apply_lu_factorization
+  now also operates on multiple right hand sides in form of another
+  LAPACKFullMatrix.
+  <br>
+  (Martin Kronbichler, 2014/02/03)
+
+  <li>Fixed: The build system does no longer record full paths to system
+  libraries but uses the appropriate short names instead.
+  <br>
+  (Matthias Maier, 2014/02/01)
+
+  <li>Reworked: External feature setup. Disabling a feature now cleans up
+  associated internal, cached variables. A per-feature linkage test now spots
+  common linking inconsistencies early in the configuration stage (and not
+  just after a complete compilation).
+  <br>
+  (Matthias Maier, 2014/02/01)
+
+  <li>New/fixed: The ParameterHandler::print_parameters_section
+  method did not work for XML output. There is now a flag
+  include_top_level_elements which prints all higher
+  subsection elements, default is false.
+  For XML output setting this flag to true is required
+  to ensure that the output is a valid XML document,
+  starting with one root element ParameterHandler and
+  compatible with read_input_from_xml and the parameterGUI.
+  <br>
+  (Martin Steigemann, 2014/02/01)
+
+  <li>New: There is now a method to copy the content from a
+  PETScWrappers::MPI::Vector and TrilinosWrappers::MPI::Vector to
+  deal.II's parallel distributed vector.
+  <br>
+  (Ben Thompson, Martin Kronbichler, 2014/01/31)
+
+  <li>Fixed: The SolutionTransfer class had all sorts of problems when
+  used with hp::DoFHandler that made its results at least questionable.
+  Several parts of this class have been rewritten to make the results
+  more predictable and, likely, more correct.
+  <br>
+  (Wolfgang Bangerth, 2014/01/26)
+
+  <li>Fixed: A regression where a single whitespace accidentally added to
+  DEAL_II_LINKER_FLAGS internally prevented cmake-2.8.8 from configuring
+  successfully.
+  <br>
+  (Matthias Maier, Krysztof Bzowski, 2014/01/26)
+
+  <li> Fixed: SparsityPattern::max_entries_per_row() forgot to consider
+  the last row of the matrix and consequently sometimes returned
+  wrong values. This is now fixed.
+  <br>
+  (Martin Kronbichler, 2014/01/22)
+  </li>
+
+  <li> Improved: In rare cases when the vector of error indicators
+  has entries equal to zero the adjust_interesting_range method
+  produces a negative lower bound. As a result the
+  parallel::distributed::GridRefinement::refine_and_coarsen_fixed_*
+  methods flagged the wrong number of cells for coarsening and refinement.
+  This is now changed by adjusting the lower bound in adjust_interesting_range
+  only, if not equal to zero.
+  <br>
+  (Martin Steigemann, 2014/01/22)
+  </li>
+
+  <li> Changed: It was previously possible to set the
+  <code>active_fe_index</code> on non-active cells of an hp::DoFHandler.
+  However, this was prone to mistakes because it may lead to the assumption
+  that a finite element space out of the ones described by the hp::FECollection
+  associated with this hp::DoFHandler was actually associated with such
+  a cell. Since we do not actually distribute degrees of freedom for such
+  hp::DoFHandler objects on non-active cells, this is not the case. Consequently,
+  it no longer has any effect to assign active FE indices to non-active cells:
+  these values are simply reset later on.
+  <br>
+  (Wolfgang Bangerth, 2014/01/20)
+  </li>
+
+  <li> Fixed: The method DoFTools::extract_constant_modes only worked for
+  elements where the constant function 1 is represented by all ones. This
+  is now fixed by querying the element for its constant modes on each cell.
+  <br>
+  (Martin Kronbichler, 2014/01/19)
+  </li>
+
+  <li> Fixed: PETScWrappers::MPI::Vector::all_zero() was broken with more than
+  one processor (illegal memory access) and did not communicate between all
+  processors. Documentation for many vector types of all_zero() has been extended.
+  <br>
+  (Timo Heister, 2014/01/17)
+  </li>
+
+  <li> Fixed/new: DoFCellAccessor::set_dof_values_by_interpolation and
+  DoFCellAccessor::get_interpolated_dof_values could previously be
+  called for hp::DoFHandler objects on cells that are non-active. This
+  makes no sense since these cells have no associated finite element
+  space. Doing so now raises an exception.
+  <br>
+  However, there are legitimate cases where one may want to interpolate
+  from children to a parent's finite element space or the other way around.
+  Since in the hp
+  case no finite element space is naturally associated with an inactive
+  cell, it is now possible to pass an explicit finite element index
+  argument to these functions specifying which element of an hp::FECollection
+  object describes the space onto which you want to interpolate.
+  <br>
+  (Mihai Alexe, Wolfgang Bangerth, 2014/01/18)
+  </li>
+
+  <li> Fixed: The methods IndexSet::do_compress() and
+  IndexSet::add_indices(IndexSet&) had quadratic complexity in the number of
+  ranges. The algorithms have been changed into linear complexity ones.
+  <br>
+  (Martin Kronbichler, 2014/01/15)
+  </li>
+
+  <li> Fixed: There were several bugs in functions like
+  FEValues::get_function_values() where the code did not properly handle the
+  case of FE_Nothing. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2014/01/08)
+  </li>
+
+  <li> Fixed: DataOut got confused in some situations where one uses FE_Nothing.
+  This is now fixed.
+  <br>
+  (Minh Do-Quang, Wolfgang Bangerth, 2014/01/08)
+  </li>
+
+  <li> Fixed: FESystem::get_interpolation_matrix, a function that is among
+  other places used by SolutionTransfer, had a bug that prevented it from
+  running correctly in some situations where one uses FE_Nothing.
+  This is now fixed.
+  <br>
+  (Minh Do-Quang, Wolfgang Bangerth, 2014/01/08)
+  </li>
+
+  <li> Improved: When you call WorkStream::run with an empty function object
+  for the copier, operations on individual cells are essentially all independent.
+  In other words, you have a massively parallel collection of jobs. In this
+  case, a parallel for loop over all elements is better suited than the
+  pipeline approach currently used. This has now been implemented.
+  <br>
+  (Wolfgang Bangerth, 2013/12/26)
+  </li>
+
+  <li> New: The new function VectorTools::interpolate_based_on_material_id()
+  can be used to interpolate several functions onto a mesh, based on the
+  material id of each cell individually.
+  <br>
+  (Valentin Zingan, 2013/12/26)
+  </li>
+
+  <li> New: A new reinit() method has been introduced to
+  TrilinosWrappers::SparsityPattern that takes all rows that are possibly
+  written into as an optional argument. This allows for pre-allocating all
+  possible entries right away, which makes writing into the matrix from
+  several threads possible (otherwise, only one processor at a time can write
+  off-processor data). Similarly, TrilinosWrappers::MPI::Vector objects can
+  be initialized with hints to ghost elements for a writable vector that can
+  be added into from multiple threads.
+  <br>
+  (Martin Kronbichler, 2013/12/23)
+  </li>
+
+  <li> New: The TableBase::fill function has become more powerful in that
+  it now doesn't just take pointers to initializing elements but can deal
+  with arbitrary input iterators. It now also takes a flag that denotes the
+  order in which table elements are initialized, allowing to switch between
+  C- and Fortran-style table layouts.
+  <br>
+  Along with the TableBase::fill function, the Table classes of various
+  ranks have also gotten constructors that allow the in-place construction
+  not only of a table of correct size, but already initialized from
+  somewhere. This finally allows to mark Table objects as const by creating
+  them already with the correct content.
+  <br>
+  (Wolfgang Bangerth, 2013/12/21)
+  </li>
+
+  <li> New: There is now a new class Functions::InterpolatedTensorProductGridData that can
+  be used to (bi-/tri-)linearly interpolate data given on a tensor product
+  mesh of $x$ (and $y$ and $z$) values, for example to evaluate experimentally
+  determined coefficients, or to assess the accuracy of a solution by
+  comparing with a solution generated by a different code and written in
+  gridded data. There is also a new class Functions::InterpolatedUniformGridData that
+  can perform the same task more efficiently if the data is stored on meshes
+  that are uniform in each coordinate direction.
+  <br>
+  (Wolfgang Bangerth, 2013/12/20)
+  </li>
+
+  <li> Fixed: ParameterHandler::get_double() and ParameterHandler::get_integer()
+  had bugs in that they didn't detect if they were asked to return a number
+  for a parameter whose value was in fact not a number but some general
+  text. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2013/12/19)
+  </li>
+
+  <li> Fixed: VectorTools::project_boundary_values could not deal with
+  function values close to (but not exactly equal to) zero. This is now fixed.
+  <br>
+  (Martin Kronbichler, 2013/12/16)
+  </li>
+
+  <li> New: It is now possible to select between different smoothers and coarse
+  solvers in the Trilinos AMG preconditioners by a string to the smoother's name.
+  <br>
+  (Andrew Baker, 2013/12/14)
+  </li>
+
+</ol>
+
+
+*/
diff --git a/doc/news/8.2.0-vs-8.2.1.h b/doc/news/8.2.0-vs-8.2.1.h
new file mode 100644
index 0000000..1b8bd84
--- /dev/null
+++ b/doc/news/8.2.0-vs-8.2.1.h
@@ -0,0 +1,43 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ at page changes_between_8_2_0_and_8_2_1 Changes between Version 8.2.0 and 8.2.1
+
+<p>
+This is the list of changes made between the release of deal.II version
+8.2.0 and that of 8.2.1. All entries are signed with the names of the
+authors.
+</p>
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+  <li> Ported: The build system now supports CMake up to version 3.1.
+  <br>
+  (Matthias Maier, 2015/01/06)
+  </li>
+
+  <li> Fixed: CMake now also handles and exports the subminor version
+  number correctly ("pre" and "rc?" are replaced by "0").
+  <br>
+  (Matthias Maier, 2015/01/02)
+  </li>
+</ol>
+
+*/
diff --git a/doc/news/8.2.1-vs-8.3.0.h b/doc/news/8.2.1-vs-8.3.0.h
new file mode 100644
index 0000000..0343044
--- /dev/null
+++ b/doc/news/8.2.1-vs-8.3.0.h
@@ -0,0 +1,1150 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ * @page changes_between_8_2_1_and_8_3 Changes between Version 8.2.1 and 8.3
+
+<p>
+This is the list of changes made between the release of deal.II version
+8.2.1 and that of 8.3.0. All entries are signed with the names of the
+author.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+
+  <li>Changed: GridTools::distort_random is now deterministic (gives the same
+  distorted mesh when called with the same input Triangulation).
+  <br>
+  (Timo Heister, 2015/07/09)
+  </li>
+
+  <li>Changed: MatrixCreator::create_mass_matrix() took matrix and vector
+  objects where the scalar type of the matrix was a template argument
+  but the scalar type of the vector was always <code>double</code>. This
+  has been changed so that the two always need to match.
+  <br>
+  (Denis Davydov, Wolfgang Bangerth, 2015/06/17)
+  </li>
+
+  <li>Changed: Functions such as FEValuesBase::get_function_values() that
+  extracted the values of functions at the quadrature points of a cell
+  implicitly always assumed that <code>double</code> is a reasonable
+  type to store the result in. This, however, is not true if the solution
+  vector's underlying scalar type was, for example,
+  <code>std::complex@<double@></code>. All of the functions of
+  FEValuesBase as well as of the various FEValuesViews class that extract
+  values from solution vectors have been changed so that they now return
+  their results in vectors that use the same underlying scalar type
+  (float, double, or std::complex) as was used in the solution vector.
+  <br>
+  Most user codes will be entirely unaffected by this because they simply
+  use the default vector types which all store their data as doubles.
+  You may have to adjust your code, though, if you use non-standard
+  types such as Vector@<float@> for solution vectors, or use
+  complex-valued data types. This includes compiling PETSc with
+  complex scalars.
+  <br>
+  (Denis Davydov, 2015/05/08)
+  </li>
+
+  <li>Removed: The generic, templated vmult, Tvmult, etc. -interfaces of
+  LAPACKFullMatrix - they were never implemented.
+  <br>
+  (Matthias Maier, 2015/04/10)
+  </li>
+
+  <li>Removed: SparseDirectUMFPACK::vmult_add and
+  SparseDirectUMFPACK::Tvmult_add - they were never implemented.
+  <br>
+  (Matthias Maier, 2015/04/10)
+  </li>
+
+  <li>Removed: The class NamedData has been removed after it had been
+  superseded by AnyData a while ago. This affects the use of classes
+  in Algorithms and MeshWorker
+  <br>
+  (Guido Kanschat, 2015/04/02)
+  </li>
+
+  <li> Removed: The CMake configuration does not use the variable
+  <code>DEAL_II_CMAKE_MACROS_RELDIR</code> any more. Instead, the fixed
+  location <code>\${DEAL_II_SHARE_RELDIR}/macros</code> is used
+  unconditionally
+  <br>
+  (Matthias Maier, 2015/03/26)
+  </li>
+
+  <li> Changed: The TrilinosWrappers::SparseMatrix::clear_row() function used
+  to call TrilinosWrappers::SparseMatrix::compress() before doing its work,
+  but this is neither efficient nor safe. You will now have to do this
+  yourself after assembling a matrix and before clearing rows.
+  <br>
+  The changes to the function above also affect the
+  MatrixTools::apply_boundary_values() variants that operate on Trilinos
+  matrices.
+  <br>
+  (Wolfgang Bangerth, 2015/03/09)
+  </li>
+
+  <li> Changed: Implicit conversion from Tensor@<1,dim@> to Point@<dim@> was
+  previously possible. This has now been prohibited (but you can still
+  do the conversion with an explicit cast) as such conversions are
+  likely incorrect uses of class Point (which should represent only
+  points in space, i.e., vectors anchored at the origin) whereas Tensor
+  should be used for vectors anchored elsewhere (such as normal vectors,
+  directions, differences between points, etc). The difference in
+  usage between Point and Tensor have now been clarified in the documentation
+  of class Point.
+  <br>
+  (Wolfgang Bangerth, 2015/01/12)
+  </li>
+
+  <li> Changed: The project configuration no longer exports
+  <code>[...]/include/deal.II</code>. Thus it is now mandatory to prefix
+  all includes of deal.II headers with <code>deal.II/</code>, i.e.
+  <code>#include <deal.II/[...]></code>.
+  <br>
+  (Matthias Maier, 2015/01/19)
+  </li>
+
+  <li> Changed: ParameterHandler::leave_subsection() no longer returns a bool
+  indicating if there was a subsection to leave. This never worked in the
+  first place, because an exception was thrown.
+  <br>
+  (Timo Heister, 2015/01/19)
+  </li>
+
+  <li> Changed: Make.global_options was completely redesigned. It still
+  contains Makefile sourcable information, but they now closely mimic the
+  declarative style of deal.IIConfig.cmake. Thus, projects that still use
+  Makefiles that source Make.global_options have to be ported to the new
+  layout.
+  <br>
+  (Matthias Maier, 2015/01/13)
+  </li>
+
+  <li> Removed: The Component <code>compat_files</code> was removed entirely. deal.II
+  now always configures and installs with a somewhat FSHS compliant
+  directory structure. Further, the ancient make_dependencies binary was
+  removed. Either migrate your project to CMake, or port your build system
+  to the new (incompatible) Make.global_options found at
+  <code>\${DEAL_II_SHARE_RELDIR}</code>.
+  <br>
+  (Matthias Maier, 2015/01/13)
+  </li>
+
+  <li> Changed: The two-argument call to the MPI_InitFinalize constructor
+  used to imply that the user wanted only one thread per MPI process. This
+  has been changed and now means that every processor core on the system is
+  used. If you run as many MPI processes as there are processor cores, then
+  this means one thread per MPI process, as before. On the other hand, if you
+  start fewer MPI processes than there are cores, then your program will now
+  be allowed to use more than one thread. You can get the old behavior by
+  setting the third (optional) argument to one.
+  <br>
+  (Wolfgang Bangerth, 2015/01/14)
+  </li>
+
+  <li> Removed: TrilinosWrappers::SparseMatrix copy constructor got removed
+  to be in line with PETSc and dealii::SparseMatrix. You can use reinit()
+  and copy_from().
+  <br>
+  (Timo Heister, 2015/01/12)
+  </li>
+
+  <li> Removed: The following compatibility definitions were removed from
+  <code>include/deal.II/base/config.h.in</code> (replacement in brackets):
+  - DEAL_II_CAN_USE_CXX11 (new: DEAL_II_WITH_CXX11)
+  - DEAL_II_CAN_USE_CXX1X (new: DEAL_II_WITH_CXX11)
+  - DEAL_II_COMPILER_SUPPORTS_MPI (new: DEAL_II_WITH_MPI)
+  - DEAL_II_MAJOR (new: DEAL_II_VERSION_MAJOR)
+  - DEAL_II_MINOR (new: DEAL_II_VERSION_MINOR)
+  - DEAL_II_USE_ARPACK (new: DEAL_II_WITH_ARPACK)
+  - DEAL_II_USE_CXX11 (new: DEAL_II_WITH_CXX11)
+  - DEAL_II_USE_METIS (new: DEAL_II_WITH_METIS)
+  - DEAL_II_USE_MT (new: DEAL_II_WITH_THREADS)
+  - DEAL_II_USE_P4EST (new: DEAL_II_WITH_P4EST)
+  - DEAL_II_USE_PETSC (new: DEAL_II_WITH_PETSC)
+  - DEAL_II_USE_SLEPC (new: DEAL_II_WITH_SLEPC)
+  - DEAL_II_USE_TRILINOS (new: DEAL_II_WITH_TRILINOS)
+  <br>
+  (Matthias Maier, 2015/01/12)
+  </li>
+
+  <li> Removed: The direct Mumps interface through
+  <code>SparseDirectMUMPS</code> has been removed. The MUMPS solver is
+  still available through the Trilinos or PETSc interfaces. Alternatively,
+  there is <code>SparseDirectUMFPACK</code>, which has a similar interface.
+  <br>
+  (Matthias Maier, 2015/01/11)
+  </li>
+
+  <li> Removed: This release removes a number of functions that have long
+  been deprecated and that were previously already marked as
+  deprecated (i.e., they would have yielded warnings by the compiler whenever
+  you tried to use them). In almost all cases, there is a function with same
+  name but different argument list that should be used instead.
+  Specifically, the removed functions and classes are:
+  <br>
+  <em>With headers in <code>deal.II/base/</code>:</em>
+  - ThreadManagement::spawn.
+  - Threads::ThreadCondition and Threads::ThreadMutex.
+  - DataOutBase::create_xdmf_entry with 3 arguments.
+  - DataOutBase::write_hdf5_parallel with 2 arguments.
+  - The versions of FunctionParser::initialize that took a
+    <code>use_degrees</code> or <code>constants</code> argument.
+    The implementation as it is now no longer supports either of
+    these two concepts (since we switched from the FunctionParser
+    library to the muparser library after the deal.II 8.1 release).
+  - GridOutFlags::XFig::level_color.
+  - class BlockList.
+  - The MPI support functions in namespace Utilities and Utilities::System.
+  - Deprecated members of namespace types.
+  - Namespace deal_II_numbers.
+  - MultithreadInfo::n_default_threads.
+  - Table::data.
+
+  <br>
+  <em>With headers in <code>deal.II/lac/</code>:</em>
+  - The deprecated constructors of SparseMIC,
+    SparseILU, and SparseLUDecomposition.
+  - SparseMIC::decompose and SparseILU::decompose.
+  - SparseMIC::reinit and SparseLUDecomposition::reinit.
+  - SparseILU::apply_decomposition.
+  - SparseLUDecomposition::decompose and SparseLUDecomposition::is_decomposed.
+  - The compress() functions without argument in the various vector
+    classes. You should use the versions with a VectorOperation
+    argument instead.
+  - Vector::scale.
+  - TrilinosWrappers::*Vector*::compress with an Epetra_CombineMode
+    argument.
+  - SparsityPattern and ChunkSparsityPattern functions that take an
+    <code>optimize_diagonal</code> argument.
+  - SparsityPattern::partition.
+  - SparsityPattern::get_rowstart_indices and
+    SparsityPattern::get_column_numbers.
+  - SparsityPattern::row_iterator and corresponding row_begin() and row_end()
+    functions.
+  - CompressedSparsityPattern::row_iterator and corresponding row_begin()
+    and row_end() functions.
+  - The typedef CompressedBlockSparsityPattern.
+  - The deprecated constructors of SparsityPattern iterator classes.
+  - The deprecated variants of DoFTools::make_periodicity_constraints.
+  - BlockMatrixArray and BlockTrianglePreconditioner functions that
+    take an explicit VectorMemory object.
+  - The SolverSelector constructor that takes a VectorMemory argument.
+  - The version of parallel::distributed::Vector::compress_finish
+    function that takes a boolean as argument.
+  - The version of BlockVector::scale and
+    parallel::distributed::Vector::scale,
+    parallel::distributed::BlockVector::scale
+    function that takes a scalar as argument.
+  - PreconditionBlock::size.
+  - Classes PreconditionedMatrix and PreconditionLACSolver.
+  - PETScWrappers::VectorBase::update_ghost_values.
+  - PETScWrappers::MPI::Vector constructors and reinit variants.
+  - SparseMatrixIterators::Accessor and SparseMatrixIterators::Iterator
+    constructors.
+  - SparseMatrix::raw_entry and SparseMatrix::global_entry.
+  - The ConstraintMatrix functions that transform a matrix, vector, or
+    linear system into a smaller by not just setting the corresponding
+    rows and columns to zero, but actually shrinking the size of the
+    linear system.
+
+  <br>
+  <em>With headers in <code>deal.II/deal.II/</code>:</em>
+  - GridGenerator::laplace_transformation.
+  - The version of GridGenerator::parallelogram where the corners are given
+    as a rank-2 tensor rather than as an array of points.
+  - GridTools::create_union_triangulation.
+  - GridTools::extract_boundary_mesh.
+  - Triangulation::distort_random.
+  - Triangulation::clear_user_pointers.
+  - The refinement listener concept of the Triangulation class. This
+    approach to getting notified about what happens to triangulations
+    has been superseded by the signals defined by the triangulation
+    class.
+
+  <br>
+  <em>With headers in <code>deal.II/fe/</code>:</em>
+  - In FEValues and related classes, the functions that contain the
+    term <code>2nd_derivatives</code> were removed in favor of those
+    with names containing <code>hessian</code>. Similarly, functions
+    with names including <code>function_grads</code> were removed in
+    favor of those called <code>function_gradients</code>. Finally,
+    the <code>cell_normal_vector</code> functions were replaced by
+    <code>normal_vector</code> ones. In all cases, the new functions
+    have been around for a while.
+  - Mapping::transform_covariant and Mapping::transform_contravariant.
+
+  <br>
+  <em>With headers in <code>deal.II/dofs/</code>:</em>
+  - DoFRenumbering::downstream_dg.
+  - DoFTools::count_dofs_per_component.
+  - DoFTools::make_sparsity_pattern with a vector-of-vector mask.
+
+  <br>
+  <em>With headers in <code>deal.II/multigrid/</code>:</em>
+  - The constructors of classes MGSmoother, MGSmootherRelaxation and
+    MGSmootherPrecondition that take a VectorMemory object.
+  - MGLevelObject::get_minlevel and MGLevelObject::get_maxlevel.
+  - MGConstrainedDoFs::non_refinement_edge_index
+  - MGConstrainedDoFs::at_refinement_edge_boundary
+  - MGTools::count_dofs_per_component.
+  - MGTools::apply_boundary_values.
+  - MGTools::extract_inner_interface_dofs.
+  - Class MGMatrix.
+  - Multigrid::vmult and friends.
+
+  <br>
+  <em>With headers in <code>deal.II/matrix_free/</code>:</em>
+  - Classes FEEvaluationDGP, FEEvaluationGeneral and FEEvaluationGL.
+
+  <br>
+  <em>With headers in <code>deal.II/mesh_worker/</code>:</em>
+  - Deprecated variants of MeshWorker::loop and MeshWorker::integration_loop.
+
+  <br>
+  <em>With headers in <code>deal.II/algorithm/</code>:</em>
+  - Algorithms::ThetaTimestepping::operator().
+  - Algorithms::ThetaTimestepping::initialize.
+  - Algorithms::Newton::initialize.
+
+  <br>
+  <em>With headers in <code>deal.II/numerics/</code>:</em>
+  - TimeDependent::end_sweep (with an argument).
+  - PointValueHistory::mark_locations.
+  - The DataPostprocessor::compute_derived_quantities_scalar and
+    DataPostprocessor::compute_derived_quantities_vector functions without
+    evaluation points. If you have
+    data postprocessor classes implemented in your program that overload these
+    functions, you will have to change it in a way that they overload the
+    functions of same name but with the evaluation point argument instead.
+  <br>
+  This release also removes the deprecated class MGDoFHandler. The
+  functionality of this class had previously been incorporated into
+  the DoFHandler class. Unlike the changes above, if you were still
+  using this class, you will need to do the following changes to
+  your code:
+  - Where you called <code>mg_dof_handler.distribute_dofs()</code>
+    you now also need to explicitly call
+    <code>mg_dof_handler.distribute_mg_dofs()</code>.
+  - If you called <code>mg_dof_handler.begin(level)</code>, you
+    will now have to write this as
+    <code>mg_dof_handler.begin_mg(level)</code> to make clear that
+    you are not just interested in an iterator to a cell on a given
+    level, but in fact to a cell that can access the degrees of
+    freedom on a particular level of a multigrid hierarchy.
+  - The type previously referred to as
+    <code>MGDoFHandler::cell_iterator</code> now corresponds to
+    <code>MGDoFHandler::level_cell_iterator</code>.
+  - Where you previously called DoFRenumbering::component_wise
+    for the entire MGDoFHandler object, you now need to call
+    this function for the DoFHandler object, and then call the
+    same function with the <code>level</code> argument for each
+    of the levels of the triangulation individually.
+  <br>
+  (Wolfgang Bangerth, 2014/12/29-2015/01/22)
+  </li>
+
+  <li> Removed: The config.h file no longer exports HAVE_* definitions.
+  Those are either entirely removed (for the blas/lapack symbols) or
+  renamed to DEAL_II_HAVE_*. This change is done in order to avoid clashes
+  with external projects also exporting HAVE_* definitions in their header
+  files.
+  <br>
+  (Matthias Maier, 2014/12/29)
+  </li>
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+
+<ol>
+  <li> New: IndexSet now can be constructed using Epetra_Map.
+  <br>
+  (Luca Heltai, 2015/07/25)
+  </li>
+
+  <li> New: Added the class Functions::Polynomial for representation of polynomials.
+  The new class is derived from the Function class.
+  <br>
+  (Angel Rodriguez, 2015/07/01)
+  </li>
+
+  <li> New: deal.II now supports compilation in C++14 mode, which may be
+  enabled with the CMake option <code>DEAL_II_WITH_CXX14</code>.
+  <br>
+  (David Wells, 2015/06/21)
+  </li>
+
+  <li> New: Implement a modified version of the Kelly error estimator, which
+  effectively provides the boundary residual term for the hp-FEM error estimators.
+  <br>
+  (Denis Davydov, 2015/06/17)
+  </li>
+
+  <li> New: DerivativeForm() now takes an additional optional
+  template argument specifying the type, similarly to Tensor() classes.
+  <br>
+  (Luca Heltai, 2015/05/16)
+  </li>
+
+  <li> New: Utilities::MPI::min() functions.
+  <br>
+  (Timo Heister, 2015/05/12)
+  </li>
+
+  <li> New: A PackagedOperation class that stores computation expressions.
+  The primary purpose of this class is to provide syntactic sugar for vector
+  operations (vector space addition, scalar multiplication, application of a
+  linear operator) while avoiding intermediate storage.
+  <br>
+  (Matthias Maier, 2015/05/10)
+  </li>
+
+  <li> Fixed: Utilities::generate_normal_random_number() will now
+  produce a deterministic sequence of numbers on every thread.
+  Furthermore, it will also work on systems that do not have the
+  <code>rand_r</code> library function, such as Cygwin.
+  <br>
+  (Wolfgang Bangerth, 2015/04/19)
+  </li>
+
+  <li> New: A LinearOperator class that stores the abstract concept of a
+  linear operator. The class is fully compatible with the solver and
+  preconditioner interfaces. The primary purpose of this class is to
+  provide syntactic sugar for complex matrix-vector operations and free the
+  user from having to create, set up and handle intermediate storage
+  locations by hand.
+  <br>
+  (Matthias Maier, 2015/04/08)
+  </li>
+
+  <li> Fixed: There were a number of places in the library where we unconditionally
+  called functions <code>_mm_malloc()/_mm_free()</code> to allocate and free
+  memory with a known alignment. This function, however, is only available on
+  systems with x86 or x64_64 compatible processors. These places have now been
+  replaced by calling <code>posix_memalign()</code> instead, a function that
+  should be more widely available.
+  <br>
+  (Wolfgang Bangerth, 2015/04/15)
+  </li>
+
+  <li> Deprecated: The library uses functions such as CellAccessor::subdomain_id(),
+  TriaAccessor::manifold_id(), etc, but used the deviant spelling
+  TriaAccessor::boundary_indicator(), TriaAccessor::set_boundary_indicator(),
+  TriaAccessor::set_all_boundary_indicators(). These last three functions are now
+  deprecated and have been replaced by TriaAccessor::boundary_id(),
+  TriaAccessor::set_boundary_id(), and TriaAccessor::set_all_boundary_ids() for
+  consistency. Similar, Triangulation::get_boundary_indicators() has been
+  deprecated in favor of Triangulation::get_boundary_ids().
+  <br>
+  (Wolfgang Bangerth, 2015/04/11)
+  </li>
+
+  <li> Changed: All example programs used to have calls to set_boundary()
+  methods to deal with curved boundaries. These have been replaced with
+  the corresponding set_manifold() equivalent.
+  <br>
+  (Luca Heltai, 2015/04/06)
+  </li>
+
+  <li> New: A new flag no_automatic_repartitioning in
+  parallel::distributed::Triangulation will disable the automatic
+  repartitioning when calling execute_coarsening_and_refinement() (or things
+  like refine_global(), ...), resulting in all cells staying on the processor
+  they were before. The new function repartition() will execute the
+  repartitioning as done automatically before.
+  <br>
+  (Timo Heister, 2015/03/22)
+  </li>
+
+  <li> Changed: All (Block)Compressed*SparsityPattern classes got
+  replaced by DynamicSparsityPattern and
+  BlockDynamicSparsityPattern, respectively and all examples now
+  teach the dynamic way of creating dynamic sparsity patterns.
+  <br>
+  (Timo Heister, 2015/03/22)
+  </li>
+
+  <li> Improved: We have traditionally had a large number of exceptions
+  that did not output any useful error message other than the name
+  of the exception class. This name was suggestive of the error that
+  had happened, but did not convey a sufficient amount of information
+  to what happened in many of the places where these kinds of exceptions
+  were used, nor what may have caused the exception, or how it could
+  be avoided. We have gone through many of these places and changed
+  the exception to be much more verbose in what they state about the
+  problem, its origin, and how it may be solved.
+  <br>
+  (Wolfgang Bangerth, 2015/02/28-2015/03/31)
+  </li>
+
+  <li> Changed: We have traditionally used Point@<dim@> to represent points
+  in physical space, i.e., vectors that are anchored at the origin, whereas
+  for vectors anchored elsewhere (e.g., differences between points, normal
+  vectors, gradients, etc) we have used Tensor@<1,dim@>. This has now be
+  made more formal in the documentation but also in the return types of
+  <code>operator-()</code> for Point objects: The difference between two
+  points, <code>p1-p2</code> now returns a Tensor@<1,dim@>. On the other
+  hand, subtracting a Tensor@<1,dim@> object from a Point, <code>p-t</code>,
+  results in a Point@<dim@>.
+  <br>
+  (Wolfgang Bangerth, 2015/02/05)
+  </li>
+
+  <li> New: Examples from 1 to 16 now use the Manifold interface
+  instead of the old Boundary interface to describe curved boundaries
+  and domains.
+  <br>
+  (Luca Heltai, 2015/01/15)
+  </li>
+
+  <li> New: The build system now queries for git branch name and
+  revision sha1 (and automatically reconfigures if necessary). This
+  information is used to annotate summary.log and detailed.log with the
+  current revision sha1. Further, a header file <deal.II/base/revision.h>
+  is now available that exports the macros: DEAL_II_GIT_BRANCH,
+  DEAL_II_GIT_REVISION, DEAL_II_GIT_REVISION_SHORT.
+  <br>
+  (Matthias Maier, 2015/01/02)
+  </li>
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+
+
+<ol>
+  <li> New: VectorTools::get_position_vector now works with arbitrary
+  FESystems, provided that the geometrical components are primitive, 
+  and that you provide a component mask to select what components of 
+  the finite element to use for the geometrical interpolation.
+  <br>
+  (Luca Heltai, 2015/07/25)
+  </li>
+
+  <li> Fixed: parallel::distributed::refine_and_coarsen_fixed_fraction()
+  in rare circumstances decided to not refine any cells at all, even
+  if the refinement threshold was nonzero. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, Andrea Bonito, 2015/07/24)
+  </li>
+
+  <li> Fixed: Bug in DynamicSparsityPattern::iterator would cause invalid
+  stl::vector::iterator comparison.
+  <br>
+  (Timo Heister, 2015/07/22)
+  </li>
+
+  <li>New: parallel::distributed::Triangulation::add_periodicity
+  now allows for arbitrary orientations between matching faces.
+  <br>
+  (Daniel Arndt, 2015/07/12)
+  </li>
+
+  <li> New: Utilities::trim() function removes trailing and leading spaces.
+  <br>
+  (Timo Heister, 2015/07/11)
+  </li>
+
+  <li>Changed: The IsBlockMatrix class is now declared in
+  <code>constraint_matrix.h</code> instead of its former home in
+  <code>block_indices.h</code>.
+  <br>
+  (Wolfgang Bangerth, 2015/07/10)
+  </li>
+
+  <li>New: CellId::to_string() returns a string representation of a CellId object.
+  <br>
+  (Timo Heister, 2015/07/05)
+  </li>
+
+  <li>New: Utilities::replace_in_string().
+  <br>
+  (Timo Heister, 2015/07/05)
+  </li>
+
+  <li>Improved: GridOut::write_vtk() and GridOut::write_vtu() now
+  output material id, level and subdomain ids of the cells.
+  <br>
+  (Guido Kanschat, 2015/07/05)
+  </li>
+
+  <li>Improved: The font scaling in GridOut::write_svg() was broken,
+  since the units were missing. It has been fixed and an additional
+  parameter GridOutFlags::Svg::cell_font_scaling has been introduced
+  for tuning.
+  <br>
+  (Guido Kanschat, 2015/07/04)
+  </li>
+
+  <li> New: VectorizedArray now provides two methods
+  vectorized_load_and_transpose() and vectorized_transpose_and_store() that
+  perform vectorized reads or writes and convert from array-of-struct into
+  struct-of-array or the other way around.
+  <br>
+  (Martin Kronbichler, 2015/07/02)
+  </li>
+
+  <li>New: GridGenerator::cheese() for a mesh with many holes;
+  GridGenerator::simplex() for simplices in 2 and 3 dimensions;
+  GridGenerator::hyper_cross() for crosses in 2 and 3 dimensions.
+  <br>
+  (Guido Kanschat, 2015/07/02)
+  </li>
+
+  <li> Fixed: The specialization of DoFAccessor for zero-dimensional objects,
+  i.e., for vertices as created by accessing the faces of one-dimensional
+  cells, had a member function DoFAccessor::child() that was declared but not
+  implemented. This is now fixed.
+  <br>
+  (Wolfgang Bangerth,  2015/07/01)
+  </li>
+
+  <li> Improved: Functions::Monomial::gradient function now works when both base and exponent
+  are equal to zero for one or more components of the monomial.
+  Also, an assertion is added to avoid exponentiation of negative base numbers with real exponents.
+  <br>
+  (Angel Rodriguez,  2015/06/29)
+  </li>
+
+  <li> Fixed: The function numbers::is_finite() produced incorrect results when
+  called with a NaN number (specifically, it produces an uncatchable floating
+  point exception when called with a signaling NaN). This was clearly not
+  intended since such values are definitely not finite.
+  <br>
+  (Wolfgang Bangerth, 2015/06/29)
+  </li>
+
+  <li> Improved: The SparseMatrix class can now also use <code>std::complex</code>
+  scalars for its elements.
+  <br>
+  (Wolfgang Bangerth, 2015/06/26)
+  </li>
+
+  <li> Improved: FE_DGQArbitraryNodes::get_name() now also detects if
+  the quadrature rule was Gauss points.
+  <br>
+  (Guido Kanschat, 2015/06/22)
+  </li>
+
+  <li> Improved: DoFRenumbering::Cuthill_McKee() can now also
+  use starting indices for parallel triangulations.
+  <br>
+  (Wolfgang Bangerth, 2015/06/11)
+  </li>
+
+  <li> Improved: VectorTools::interpolate now works with FE_Nothing.
+  <br>
+  (Angel Rodriguez, 2015/06/03)
+  </li>
+
+  <li> Improved: deal.II now uses a variety of strategies to silence compiler
+  warnings about unused variables and unused parameters.
+  <br>
+  (David Wells, 2015/04/13)
+  </li>
+
+  <li> New: Add a clear function to the PETSc::Vector
+  and PETSc::MPI::Vector classes similar to the Trilinos vector classes.
+  <br>
+  (Martin Steigemann 2015/05/22)
+  </li>
+
+  <li> New: Three new quadrature formulas in quadrature_lib, based on
+  Chebyshev quadrature rules. See functions QGaussChebyshev,
+  QGaussRadauChebyshev and QGaussLobattoChebyshev.
+  <br>
+  (Giuseppe Pitton, Luca Heltai 2015/05/11)
+  </li>
+
+  <li> Fixed: MatrixOut now also works with Trilinos and PETSc matrices.
+  <br>
+  (Wolfgang Bangerth, 2015/05/11)
+  </li>
+
+  <li> Changed: TrilinosWrappers::Vector, TrilinosWrappers::BlockVector,
+  PETScWrappers::Vector, and PETScWrappers::BlockVector are deprecated. Either
+  use the MPI or the deal.II version of the Vector/BlockVector.
+  <br>
+  (Bruno Turcksin, 2015/05/04)
+  </li>
+
+  <li> Fixed: GridGenerator::half_hyper_shell can now be colorized.
+  <br>
+  (Daniel Arndt, 2015/05/05)
+  </li>
+
+  <li> New: The function VectorTools::point_gradient has been added to compute
+  the gradient of a given FE function.
+  <br>
+  (Daniel Arndt, 2015/05/03)
+  </li>
+
+  <li> New: dealii:Vector, dealii::BlockVector,
+  TrilinosWrappers::MPI::Vector, TrilinosWrappers::MPI::BlockVector,
+  PETScWrappers::MPI::Vector and PETScWrappers::MPI::BlockVector now have
+  move constructors and move assignment operators in C++11 mode.
+  <br>
+  (Matthias Maier, 2015/05/01)
+  </li>
+
+  <li> New: Introduce DoFRenumbering::block_wise for multigrid computation.
+  <br>
+  (Timo Heister, Florian Sonner, 2015/04/30)
+  </li>
+
+  <li> New: There are now MPI sum functions for Tensors and SymmetricTensors
+  in the Utilities::MPI namespace.
+  <br>
+  (Ian Rose, 2015/04/24)
+  </li>
+
+  <li> Fixed: project_boundary_values_curl_conforming_l2() produced incorrect
+  results for non-uniform grids in 2D. Adjustment to the way 2D tangents to edges are
+  computed fixes this.
+  <br>
+  (Ross Kynch, 2015/04/23)
+  </li>
+
+  <li> Fixed: The TimerOutput class reported abnormally large cpu time when run
+  with more than one process with MPI. Now this is fixed.
+  <br>
+  (Lei Qiao, 2015/04/19)
+  </li>
+
+  <li> New: The VectorTools::integrate_difference() function can now
+  also compute the $H_\text{div}$ seminorm, using the
+  VectorTools::NormType::Hdiv_seminorm argument.
+  <br>
+  (Zhen Tao, Arezou Ghesmati, Wolfgang Bangerth, 2015/04/17)
+  </li>
+
+  <li> Fixed: The class SymmetricTensor<2,dim> is now usable also for dim>3.
+  <br>
+  (Martin Kronbichler, 2015/04/14)
+  </li>
+
+  <li> New: The DynamicSparsityPattern class (formerly called
+  CompressedSparsityPattern) now has an iterator class that allows to
+  walk over the nonzero elements of a matrix represented by this class.
+  <br>
+  (Wolfgang Bangerth, 2015/04/13)
+  </li>
+
+  <li> New: The GridGenerator::subdivided_hyper_cube() and
+  GridGenerator::subdivided_hyper_rectangle() now work also for codimension
+  one and two Triangulation;
+  <br>
+  (Luca Heltai, 2015/04/12)
+  </li>
+
+  <li> New: A new VectorTools::get_position_vector() function has been
+  added to the library that allows one to interpolate the Geometry of
+  a (possibly curved) triangulation to vector finite element fields
+  of at least spacedim components.
+  <br>
+  (Luca Heltai, 2015/04/11)
+  </li>
+
+  <li> New: TrilinosWrappers::BlockSparseMatrix now has member functions
+  TrilinosWrappers::BlockSparseMatrix::domain_paritioner() and
+  TrilinosWrappers::BlockSparseMatrix::range_partitioner() that return a
+  vector of the underlying block Epetra_Map.
+  <br>
+  (Matthias Maier, 2015/04/08)
+  </li>
+
+  <li> New: A new MappingFEField() class has been added to the library
+  that generalizes MappingQEulerian to allow arbitrary FiniteElements.
+  <br>
+  (Marco Tezzele, Luca Heltai, 2015/04/06)
+  </li>
+
+  <li> Changed: The cells of coarse meshes in
+  parallel::distributed::Triangulation used to be ordered in a Cuthill-McKee
+  numbering, which yields very high surface-to-volume ratios of the individual
+  processors' partition in case the coarse mesh consists of many cells, in
+  particular in 3D. The algorithm now uses SparsityTools::reorder_hierarchical
+  in order to get more compact partitions, similarly to the z-ordering applied
+  by p4est.
+  <br>
+  (Martin Kronbichler, 2015/04/10)
+  </li>
+
+  <li> New: There is now a new method
+  GridTools::get_vertex_connectivity_of_cells.
+  <br>
+  (Martin Kronbichler, 2015/04/10)
+  </li>
+
+  <li> New: There is now a new method SparsityTools::reorder_hierarchical to
+  sort nodes of a graph (sparsity pattern) in a z-like way by hierarchical
+  grouping of neighboring nodes.
+  <br>
+  (Martin Kronbichler, 2015/04/10)
+  </li>
+
+  <li> Changed: The methods SparsityTools::reorder_Cuthill_McKee and
+  GridTools::get_face_connectivity_of_cells used to take a SparsityPattern as
+  argument. The data type has been changed to DynamicSparsityPattern in order
+  to avoid copying things around. The old interface is still available but
+  marked as deprecated.
+  <br>
+  (Martin Kronbichler, 2015/04/10)
+  </li>
+
+  <li> New: One can now ask a cell the how many-th active cell it is,
+  using CellAccessor::active_cell_index() (which is called as
+  <code>cell-@>active_cell_index()</code>).
+  <br>
+  (Wolfgang Bangerth, 2015/04/10)
+  </li>
+
+  <li> Fixed: Added missing hp-related functions to FE_Q_Hierarchical together with
+  a couple of unit tests. Improved code comments.
+  <br>
+  (Denis Davydov, 2015/04/10)
+  </li>
+
+  <li> Fixed: deal.II did not compile on 32-bit systems when using newer
+  p4est versions (1.0 and later) due to a type mismatch. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2015/04/08)
+  </li>
+
+  <li> Changed: In the spirit of the changes made to the distinction
+  between Point and Tensor objects discussed above, the first argument
+  to GridTools::shift() has been changed from a Point to a Tensor@<1,dim@>.
+  <br>
+  (Wolfgang Bangerth, 2015/04/02)
+  </li>
+
+  <li> New: There is now a new quadrature formula in quadrature_lib. It is
+  now possible to use Telles' quadrature rules through the function QTelles
+  to integrate singular integrals
+  <br>
+  (Nicola Giuliani, 2015/04/01)
+  </li>
+
+  <li> New: Added FE_Bernstein: a scalar finite element based on Bernstein basis polynomials.
+  <br>
+  (Marco Tezzele, Luca Heltai, 2015/03/31)
+  </li>
+
+  <li> New: A function to get a map with all vertices at boundaries has
+  been added at GridTools::get_all_vertices_at_boundary(). This function
+  will return a map which can be used in functions like
+  GridTools::laplace_transform().
+  <br>
+  (Fernando Posada, 2015/03/31)
+  </li>
+
+  <li> Fixed: TrilinosWrappers::SparseMatrix::local_range() erroneously
+  threw an exception in 64-bit mode. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2015/03/24)
+  </li>
+
+  <li> New: The GridOut::write_gnuplot() function produced output
+  for 1d meshes embedded in higher dimensional spaces that was
+  invalid in that the lines showing individual cells were connected.
+  While this is not wrong for singly connected 1d meshes, it leads to wrong
+  results if the domain is not singly connected and not every cell is the
+  right neighbor of the previous cell.
+  <br>
+  (Wolfgang Bangerth, 2015/03/23)
+  </li>
+
+  <li> New: The various file format writers of class GridOut were not
+  instantiated for 1d meshes in 3d space. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2015/03/23)
+  </li>
+
+  <li> New: ParameterHandler::declare_alias() allows to define
+  alternate names for parameters. This is primarily intended to allow
+  for backward compatible ways of changing the names of parameters
+  to applications.
+  <br>
+  (Wolfgang Bangerth, 2015/03/22)
+  </li>
+
+  <li> New: GridGenerator::create_triangulation_with_removed_cells() creates
+  a new mesh out of an existing one by dropping individual cells.
+  <br>
+  (Wolfgang Bangerth, 2015/03/13)
+  </li>
+
+  <li> New: Add MueLu preconditioner from Trilinos through the class
+  TrilinosWrappers::PreconditionAMGMueLu. This is a new algebraic
+  multigrid package. The input parameters are almost the same as the ones
+  from ML so that the two preconditioners can be easily swapped.
+  <br>
+  (Bruno Turcksin, 2015/03/11)
+  </li>
+
+  <li> Fixed: Iterating over the elements of a TrilinosWrappers::SparseMatrix
+  object previously led to errors if the matrix was in fact stored in
+  parallel across multiple MPI processes. This is now fixed: rows not
+  stored locally on the processor where you run the iteration simply look
+  like they're empty.
+  <br>
+  (Wolfgang Bangerth, 2015/03/08)
+  </li>
+
+  <li> New: There is now a new macro DeclExceptionMsg that allows to
+  declare an exception that does not take any run-time arguments
+  yet still allows to specify an error message.
+  <br>
+  (Wolfgang Bangerth, 2015/02/27)
+  </li>
+
+  <li> New: There is now a class std_cxx11::unique_ptr that provides
+  the functionality of std::unique_ptr in C++11 mode, and that
+  provides an emulation for older compilers.
+  <br>
+  (Wolfgang Bangerth, 2015/02/22)
+  </li>
+
+  <li> New: IndexSet now has a local typedef IndexSet::value_type.
+  <br>
+  (Wolfgang Bangerth, 2015/02/22)
+  </li>
+
+  <li> New: FE_TraceQ can now also be used in 1D.
+  <br>
+  (Martin Kronbichler, 2015/02/21)
+  </li>
+
+  <li> New: FE_TraceQ now implements get_face_interpolation_matrix and
+  get_subface_interpolation_matrix, enabling
+  DoFTools::make_hanging_node_constraints on this element.
+  <br>
+  (Anton Evgrafov, 2015/02/21)
+  </li>
+
+  <li> Fixed: MappingQEulerian would previously not move interior points
+  in 1D for higher order mappings. This has been fixed by removing a few
+  specializations of MappingQ for 1D that are no longer necessary.
+  <br>
+  (Martin Kronbichler, 2015/02/19)
+  </li>
+
+  <li> Fixed: The implementation of the class GrowingVectorMemory has been
+  moved from source/lac/vector_memory.cc to the new file
+  include/deal.II/lac/vector_memory.templates.h. This allows users to
+  create instantiations of GrowingVectorMemory for their own vector classes
+  in case they intend to use them for the deal.II solvers.
+  <br>
+  (Martin Kronbichler, 2015/02/18)
+  </li>
+
+  <li> Changed: All members of MultithreadInfo are now static so it is no
+  longer necessary to use the global instance multithread_info (now
+  deprecated) or create your own instance (which does not work correctly
+  anyway).
+  <br>
+  (Timo Heister, 2015/02/13)
+  </li>
+
+  <li> Fixed: There was a bug in the energy source term of step-33 whereby
+  the term was erroneously multiplied by the density. This is now fixed.
+  <br>
+  (Praveen C, Lei Qiao, 2015/02/13)
+  </li>
+
+  <li> Changed: If you take the product of a Tensor and a scalar number,
+  you previously got a Tensor back that stored its elements in the same
+  data type as the original tensor. This leads to problems if you
+  multiply a <code>Tensor@<1,dim,double@></code> by a
+  <code>std::complex@<double@></code> because the result clearly needs
+  to store its elements as complex numbers, rather than as double
+  variables. This is now changed: The result of the product of a Tensor
+  and a scalar number is now a Tensor that stores its elements in a data
+  type appropriate for this product. The same approach is taken for the
+  SymmetricTensor class.
+  <br>
+  (Wolfgang Bangerth, 2015/02/11)
+  </li>
+
+  <li> New: There is now a new class ProductType that can be used
+  to infer the type of the product of two objects. There is now also
+  a class EnableIfScalar that helps restrict some templates to only
+  cases where a type is a scalar.
+  <br>
+  (Wolfgang Bangerth, 2015/02/04)
+  </li>
+
+  <li> New: The Tensor classes now have copy constructors and copy
+  operators that allow assignment from other tensors with different
+  underlying scalar types.
+  <br>
+  (Denis Davydov, 2015/02/03)
+  </li>
+
+  <li> New: Class hp::DoFHandler can now also be serialized.
+  <br>
+  (Lukas Korous, 2015/01/31)
+  </li>
+
+  <li> Bugfix: deal.II now correctly links against librt in case of bundled
+  boost being used.
+  <br>
+  (Matthias Maier, 2015/01/27)
+  </li>
+
+  <li> New: A new macro <code>DEAL_II_QUERY_GIT_INFORMATION</code> is
+  provided to query user projects for git repository information simmilarly
+  to those exported by deal.II.
+  <br>
+  (Matthias Maier, 2015/01/21)
+  </li>
+
+  <li> Fixed: FEFaceValues and FESubfaceValues did not fill the
+  jacobians and inverse jacobians if requested via the update flags.
+  This is now fixed.
+  <br>
+  (Martin Kronbichler, 2015/01/23)
+  </li>
+
+  <li> Fixed: ParameterHandler::read_input() now checks that
+  'subsection'/'end' are balanced in the input.
+  <br>
+  (Timo Heister, 2015/01/19)
+  </li>
+
+  <li> Fixed: In 3d, when you set the <code>colorize</code> flag of
+  GridGenerator::hyper_shell(), the faces of the domain were colored but
+  the edges were not. This was an oversight because to refine correctly,
+  the edges also have to have the appropriate boundary indicator set.
+  <br>
+  (Wolfgang Bangerth, 2015/01/16)
+  </li>
+
+  <li> New: dealii::multithread_info.n_cpus returns the correct number of CPU
+  on FreeBSD.
+  <br>
+  (Bruno Turcksin, 2015/01/14)
+  </li>
+
+  <li> Improved: MPI collective operations such as MPI::sum, MPI::max now
+  check for job_supports_mpi() internally, which allows running them also
+  without a call to MPI_Init.
+  <br>
+  (Martin Kronbichler, 2015/01/13)
+  </li>
+
+  <li> Changed: The method job_supports_mpi() now resides in the namespace
+  Utilities::MPI instead of Utilities::System for consistency with other MPI
+  methods. The old method has been marked deprecated and will be removed in
+  a future version.
+  <br>
+  (Martin Kronbichler, 2015/01/13)
+  </li>
+
+  <li> Fixed: The update of ghost values in parallel::distributed::Vector when
+  calling the assignment operator is now active when one of the two vector had
+  its ghost values updated before or when the layout of the right hand side
+  vector is one-to-one, more consistent with parallel PETSc and Trilinos
+  vectors.
+  <br>
+  (Martin Kronbichler, 2015/01/13)
+  </li>
+
+  <li> New: PETScWrappers::MPI::SparseMatrix::reinit(other) copies
+  the layout of another matrix. TrilinosWrappers::SparseMatrix
+  operator= and copy constructor are now disabled. This brings
+  functionality between PETSc and Trilinos in line.
+  <br>
+  (Timo Heister, 2015/01/12)
+  </li>
+
+  <li> New: Triangulation::set_all_manifold_ids() and
+  Triangulation::set_all_manifold_ids_on_boundaries()
+  set all manifold ids on every object or on every
+  boundary object respectively.
+  <br>
+  (Luca Heltai, 2015/01/12)
+  </li>
+
+  <li> New: GridTools::copy_boundary_to_manifold_id() and
+  GridTools::copy_material_to_manifold_id() copy
+  boundary_ids and material_ids to manifold_ids for
+  faces on the boundary and for cells respectively.
+  <br>
+  (Luca Heltai, 2015/01/09)
+  </li>
+
+  <li> Fixed: Utilities::int_to_string() produced wrong results if
+  the number of digits specified was ten or greater.
+  <br>
+  (David Wells, 2015/01/08)
+  </li>
+
+  <li> Fixed: VectorTools::interpolate_to_different_mesh() was accidentally
+  only instantiated for dealii::Vector arguments, rather than all vector
+  classes. This is now fixed.
+  <br>
+  (Benjamin Brands, Wolfgang Bangerth, 2014/12/29)
+  </li>
+
+  <li> Fixed: Use CASROOT environment variable as additional hint for
+  opencasacade.
+  <br>
+  (Matthias Maier, 2014/12/29)
+  </li>
+
+  <li> Fixed: Update the run_testsuite.cmake script to also pick up
+  muparser and opencascade configuration.
+  <br>
+  (Matthias Maier, 2014/12/29)
+  </li>
+
+  <li> Fixed: Update several places in the documentation that were not
+  updated from functionparser to muparser. Add several forgotten
+  DEAL_II_WITH_* variables to certain places in the documentation.
+  <br>
+  (Matthias Maier, 2014/12/29)
+  </li>
+</ol>
+
+*/
diff --git a/doc/news/8.3.0-vs-8.4.0.h b/doc/news/8.3.0-vs-8.4.0.h
new file mode 100644
index 0000000..70ffe89
--- /dev/null
+++ b/doc/news/8.3.0-vs-8.4.0.h
@@ -0,0 +1,1168 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ at page changes_between_8_3_and_8_4 Changes between Version 8.3.0 and 8.4.0
+
+<p>
+This is the list of changes made between the release of deal.II version
+8.3.0 and that of 8.4.0. All entries are signed with the names of the
+author.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+  <li> Changed: GridGenerator::hyper_rectangle and
+  GridGenerator::subdivided_hyper_rectangle now take points with @p dim
+  components to correctly handle meshes embedded in higher dimensional spaces.
+  <br>
+  (Timo Heister, 2016/02/04)
+  </li>
+
+  <li> Changed: The constructor of FiniteElementData had a last argument
+  <code>n_blocks</code> that was not actually used by the class to
+  initialize anything. It has been removed. In addition, the default
+  constructor of FiniteElementData has also been removed given that it
+  only creates a dysfunctional element.
+  <br>
+  (Wolfgang Bangerth, 2016/01/23).
+  </li>
+
+  <li> Rework: SLEPcWrappers were reworked to allow usage of PETSc solvers
+  and preconditioners inside SLEPc's eigensolvers. To that end extra methods
+  were introduced to PETSc wrappers. Moreover, initialization of the
+  underlying SLEPc objects is now done inside constructors of the wrapper
+  classes. As a result, one has to provide an MPI communicator to the constructors of
+  spectral transformation classes.
+  <br>
+  (Denis Davydov, 2015/12/29).
+  </li>
+
+  <li> Removed: The deprecated Operator class in the Algorithms namespace has been
+  removed.
+  <br>
+  (Timo Heister, 2015/12/21)
+  </li>
+
+  <li> Changed: deallog console depth is now 0 by default, causing no
+  output to the screen from solvers and other places in the library.
+  <br>
+  (Timo Heister, 2015/12/06)
+  </li>
+
+  <li> Changed: The function Utilities::trim() now removes general
+  white space characters, such as '<tt>\\r</tt>' and '<tt>\\n</tt>', as well as
+  space characters.
+  <br>
+  (David Wells, 2015/12/05)
+  </li>
+
+  <li> Removed: The previously deprecated global instance
+  <code>multithread_info</code> of
+  MultithreadInfo has been removed (all members of this class are static
+  so there is no reason to use/create an instance). The deprecated
+  MultithreadInfo::n_cpus member also got removed in favor of
+  MultithreadInfo::n_cores().
+  <br>
+  (Timo Heister, 2015/11/19)
+  </li>
+
+  <li> Removed: The <code>UpdateFlags</code> flags
+  <code>update_support_points</code>, <code>update_support_jacobians</code>,
+  and <code>update_support_inverse_jacobians</code> have been removed.
+  <code>update_support_points</code> was deprecated in 2013 and has not done
+  anything in a long time. The
+  other two appeared in 2007 and were never implemented.
+  <br>
+  (David Wells, 2015/09/16)
+  </li>
+
+  <li> Cleanup: The two argument variant of cross_product() that returned
+  the result by reference as first argument has been removed. Use the
+  function cross_product_2d(), or cross_product_3d(), that directly returns
+  the result instead. Further, the exception
+  Tensor<rank,dim,Number>::ExcInvalidTensorContractionIndex has been
+  removed
+  <br>
+  (Matthias Maier, 2015/09/14 - 2015/09/17)
+  </li>
+
+  <li> Cleanup: The following functions in tensor.h have been deprecated:
+  <br>
+  - double_contract(). Use the generic contraction function
+    contract() instead.
+  - The four and five argument variants of contract() that return the
+    result by reference as first argument and take the contraction indices as
+    arguments. use the generic contraction function contract() instead.
+  - The three argument variants of contract() that return the result by
+    reference as first argument. Use <code>operator*</code> instead.
+  - The three argument variant of cross_product() that returns the result
+    by reference as first argument. Use the cross_product() function that
+    directly returns the result instead.
+  - The three argument variants of <code>outer_product</code> that return
+    the result by reference as first argument. Use the function that
+    directly returns the result instead.
+  - determinant(dealii::Tensor<rank,1,Number>)
+  <br>
+  (Matthias Maier, 2015/09/14 - 2015/09/17)
+  </li>
+
+  <li> Removed: The Tensor and Point classes no
+  longer have a constructor taking a boolean argument. Those were replaced
+  by a default constructor that will always initialize underlying values with
+  zero.
+  <br>
+  (Matthias Maier, 2015/09/07)
+  </li>
+
+  <li> Removed: The testsuite no longer supports compiler constraints of
+  the form "<code>.compiler=[NAME]...</code>".
+  <br>
+  (Matthias Maier, 2015/08/21)
+  </li>
+
+  <li> Changed: The parameter @p first_vector_components has been removed
+  from GridTools::collect_periodic_faces(). Instead,
+  DoFTools::make_periodicity_constraints() now accepts a parameter
+  @p first_vector_components in all (supported) variants.
+  <br>
+  (Matthias Maier, 2015/08/21)
+  </li>
+
+  <li> Changed: FEValues::normal_vector() for historical reasons returned a
+  value of type Point, though a normal vector is more adequately described
+  as a Tensor@<1,dim@>. Many similar cases were already clarified in deal.II
+  8.3. The current case has now also been changed: FEValues::normal_vector()
+  now returns a Tensor, rather than a Point.
+  <br>
+  In a similar spirit, the FEValues::get_normal_vectors() function that
+  still returns a vector of Points has been deprecated, and a new function,
+  FEValues::get_all_normal_vectors() that returns a vector of tensors,
+  has been added. This was necessary since there is no way to change the
+  return type of the existing function in a backward compatible way. The
+  old function will be removed in the next version, and the new function
+  will then be renamed to the old name.
+  <br>
+  (Wolfgang Bangerth, 2015/08/20)
+  </li>
+
+  <li> Changed: The mesh_converter program has been removed from the
+  contrib folder. The equivalent functionality can now be found in
+  the GridIn class.
+  <br>
+  (Jean-Paul Pelteret, 2015/08/12)
+  </li>
+
+  <li> Changed: The signature of the FiniteElement::fill_fe_values(),
+  FiniteElement::fill_fe_face_values(), and FiniteElement::fill_fe_subface_values()
+  functions has been changed, in an effort to clarify which of these contain
+  input information and which contain output information for these functions.
+  The same has been done for the corresponding functions in the Mapping
+  class hierarchy.
+  <br>
+  Likewise the signature of FiniteElement::get_data() has been changed.
+  <br>
+  As part of a general overhaul, the FEValuesData class
+  has also been removed.
+  <br>
+  (Wolfgang Bangerth, 2015/07/20-2015/08/13)
+  </li>
+
+  <li> Changed: The functions update_once() and update_each() in the
+  Mapping classes computed information that was, in essence, only of use
+  internally. No external code actually needed to know which
+  pieces of information a mapping could compute once and which they needed
+  to compute on every cell. Consequently, these two functions have been
+  removed and have been replaced by Mapping::requires_update_flags().
+  <br>
+  A similar change has been applied to the FiniteElement class.
+  <br>
+  (Wolfgang Bangerth, 2015/07/20-2015/12/01)
+  </li>
+
+  <li> Changed: The function DoFRenumbering::random() now produces different
+  numberings than it did before, but in return has now acquired the property
+  that its results are predictable and repeatable.
+  <br>
+  (Wolfgang Bangerth, 2015/07/21)
+  </li>
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+<ol>
+  <li> New: A variant for GridGenerator::subdivided_parallelepiped() was added
+  that supports meshes embedded in higher dimesional spaces.
+  <br>
+  (Timo Heister, 2016/02/04)
+  </li>
+
+  <li> Fixed: Partitioning using METIS now works correctly with more
+  domains than cells.
+  <br>
+  (Timo Heister, 2016/01/26)
+  </li>
+
+  <li> New: The documentation of step-17 has been completely rewritten,
+  and many aspects of how one has to think when writing parallel programs
+  have been much better documented now.
+  <br>
+  (Wolfgang Bangerth, 2016/01/07)
+  </li>
+
+  <li> New: deal.II now provides a string <code>DEAL_II_ALWAYS_INLINE</code>
+  that, when supported by the compiler, can be used to annotate functions
+  to ensure that the compiler always inlines them.
+  <br>
+  (Matthias Maier, Wolfgang Bangerth, 2016/01/07)
+  </li>
+
+  <li> New: There is a new documentation module, @ref Concepts, which describes the meaning
+  behind template parameter type names.
+  <br>
+  (David Wells, 2015/12/09)
+  </li>
+
+  <li> Changed: The template type name arguments of some classes no longer
+  shadow class names. Additionally, template type names are now much more
+  consistent across deal.II.
+  <br>
+  (David Wells, 2015/10/18 - 2016/01/23)
+  </li>
+
+  <li> New: The WorkStream class's design and implementation are now much
+  better documented in the form of a @ref workstream_paper "preprint".
+  <br>
+  (Wolfgang Bangerth, 2015/11/29)
+  </li>
+
+  <li> New: There is now much more documentation for the FiniteElement class,
+  in particular detailing what one needs to implement when writing finite
+  element descriptions in derived classes.
+  <br>
+  (Wolfgang Bangerth, 2015/11/29)
+  </li>
+
+  <li> New: We now experimentally support Microsoft Visual C++ compiler under
+  Windows.
+  <br>
+  (Timo Heister, 2015/11/26)
+  </li>
+
+  <li> New: There is now a function template numbers::signaling_nan() that
+  is used to create invalid floating point objects. These objects can either
+  be scalars, or of type Tensor, SymmetricTensor, or DerivativeForm. The
+  content of these objects is a "signaling NaN" ("NaN" stands for "not a
+  number", and "signaling" implies that at least on platforms where this
+  is supported, any arithmetic operation using them terminates the program).
+  The purpose of this is to use them as markers for uninitialized objects
+  and arrays that are required to be filled in other places, and to trigger
+  an error when this later initialization does not happen before the first
+  use.
+  <br>
+  (Wolfgang Bangerth, Timo Heister, 2015/11/24)
+  </li>
+
+  <li> Changed: The function FE_DGPNonparametric::shape_value() and similar
+  functions in the same class returned values and derivatives of shape
+  functions on the reference cell. However, this element is not defined
+  through mapping of shape functions from the reference cell, and consequently
+  it makes no sense to ask for this information. These functions have therefore
+  been changed to throw an exception instead, as documented in
+  FiniteElement::shape_value().
+  <br>
+  (Wolfgang Bangerth, 2015/11/20)
+  </li>
+
+  <li> Changed: The functionality to distribute cells across processes
+  according to a vector of cell weights that was passed in a call to
+  parallel::distributed::Triangulation::repartition()
+  was replaced by a cell-wise signal. This signal is called during
+  parallel::distributed::Triangulation::execute_coarsening_and_refinement() and
+  parallel::distributed::Triangulation::repartition()
+  if any function is connected to it. It allows to connect a function that
+  takes the current cell iterator and a status argument that indicates whether
+  this cell will be refined, coarsened or remains unchanged and returns a
+  cell weight, which will be used to distribute cells across processes in a
+  way that keeps the sum of weights across each individual process
+  approximately equal.
+  <br>
+  (Rene Gassmoeller, 2015/11/02)
+  </li>
+
+  <li> New: Preliminary support for parallel, adaptive, geometric multigrid is
+  now in place with changes to MGConstrainedDoFs (many new functions), MGTransfer,
+  MGTools::extract_inner_interface_dofs, MGTransferPrebuilt,
+  DoFTools::extract_locally_relevant_level_dofs.
+  <br>
+  (Timo Heister, Guido Kanschat, 2015/10/26)
+  </li>
+
+  <li> New: Two cell level signals are added to class Triangulation, namely
+  pre_coarsening_on_cell and post_refinement_on_cell.
+  <br>
+  (Lei Qiao, 2015/10/22)
+  </li>
+
+  <li> New: parallel::distributed::Triangulation::ghost_owners()
+  returns the set of MPI ranks of the ghost cells. Similarly
+  parallel::distributed::Triangulation::level_ghost_owners() for level
+  ghosts.
+  <br>
+  (Timo Heister, 2015/09/30)
+  </li>
+
+  <li> Improved: The interfaces to all deal.II type solvers and
+  preconditioners have been updated such that they function as expected
+  with the LinearOperator class and its associated functions (i.e.,
+  linear_operator(), transpose_operator() and inverse_operator()).
+  These preconditioners can now be wrapped as a LinearOperator,
+  facilitating the construction of approximate matrix inverses such as in
+  the development of a block matrix preconditioner. An example of this
+  functionality can be found in
+  <code>tests/lac/linear_operator_08.cc</code>.
+  <br>
+  (Jean-Paul Pelteret, 2015/09/24 - 2015/10/19)
+  </li>
+
+  <li> New: MGTransferPrebuilt can now be used with parallel::distributed::Vector
+  and TrilinosWrappers::SparseMatrix as a transfer matrix.
+  <br>
+  (Martin Kronbichler, 2015/09/22)
+  </li>
+
+  <li> Fixed: parallel::distributed::Vector is now fully functional for
+  indices larger than 4 billion.
+  <br>
+  (Martin Kronbichler, 2015/09/22)
+  </li>
+
+  <li> New: PArpackSolver eigensolver interface class.
+  <br>
+  (Denis Davydov, 2015/09/17)
+  </li>
+
+  <li> Changed: All doxygen-generated pages now contain a link to the
+  tutorial in their top-level menus.
+  <br>
+  (Wolfgang Bangerth, 2015/09/13)
+  </li>
+
+  <li>New: A new namespace TensorAccessors is introduced that contains
+  generic algorithms for tensorial objects, i.e., objects that allow
+  repeated access via the index operator <code>operator[](unsigned int)</code>.
+  The methods in TensorAccessors is primarily meant to replace old internal
+  code in <code>tensor.h</code>, but it might also proof useful otherwise.
+  <br>
+  (Matthias Maier, 2015/09/11)
+  </li>
+
+  <li> New: A python script (including instructions) for enabling pretty
+  printing with GDB is now available in
+  <tt>/contrib/utilities/dotgdbinit.py</tt>.
+  <br>
+  (Wolfgang Bangerth, David Wells, 2015/09/11)
+  </li>
+
+  <li> Improved: When available, deal.II now uses the "gold" linker, a
+  reimplementation of the traditional Unix "ld" linker that is substantially
+  faster. This reduces build and, in particular, test turnaround times.
+  <br>
+  (Wolfgang Bangerth, Matthias Maier, 2015/09/06)
+  </li>
+
+  <li> Cleanup: The interface of Tensor<rank,dim,Number> has been cleaned
+  up (a lot of unnecessary partial template specializations have been
+  removed). The specialization Tensor<1,dim,Number> has been removed.
+  <br>
+  (Matthias Maier, 2015/09/02)
+  </li>
+
+  <li> Improved: The testsuite now supports multiple comparison files.
+  Apart from the main comparison file that ends in
+  <code>[...].output</code> all files of the form
+  <code>[...].output.[string]</code> are considered for comparison.
+  <br>
+  (Matthias Maier, 2015/08/29)
+  </li>
+
+  <li> New: A class BlockLinearOperator has been introduced that extends
+  the LinearOperator concept to block structures. A BlockLinearOperator can
+  be sliced back to a LinearOperator.
+  <br>
+  (Matthias Maier, 2015/08/27)
+  </li>
+
+  <li> Improved: Support for complex number types throughout the library.
+  Several parts of the library have been reorganized to support complex
+  number types.
+  <br>
+  <em>Classes that are now instantiated for complex number types:</em>
+  - FunctionTime
+  - Function
+  - TensorFunction
+  <br>
+  <em>Classes with fixed interface that now fully support complex number
+  types (pure template classes without explicit instantiations in the
+  library):</em>
+  - LinearOperator
+  - PackagedOperation
+  - Tensor
+  <br>
+  (Matthias Maier, 2015/08/25)
+  </li>
+
+  <li> Fixed: The testsuite now properly supports version constraints for
+  features. Those are annotated by
+  <code>.with_FEATURE(<=|>=|=|<|>)VERSION.</code>.
+  <br>
+  (Matthias Maier, 2015/08/25)
+  </li>
+
+  <li> Improved: The interface and documentation for periodic boundary
+  conditions have been restructured. A
+  @ref GlossPeriodicConstraints "glossary entry" has been written.
+  <br>
+  (Daniel Arndt, Matthias Maier, 2015/08/01-2015/08/21)
+  </li>
+
+  <li> New: There is a new documentation module on
+  @ref FE_vs_Mapping_vs_FEValues "How Mapping, FiniteElement, and FEValues work together".
+  <br>
+  (Wolfgang Bangerth, 2015/08/20)
+  </li>
+
+  <li> New: parallel::shared::Triangulation class which extends
+  Triangulation class to automatically partition triangulation when run
+  with MPI. Common functionality between parallel::shared::Triangulation and
+  parallel::distributed::Triangulation is implemented in the parent class
+  parallel::Triangulation.
+  <br>
+  (Denis Davydov, 2015/08/14)
+  </li>
+
+  <li> New: The online documentation of all functions now includes
+  links to the file and line where that function is implemented. Both
+  are clickable to provide immediate access to the source code of a
+  function.
+  <br>
+  (Jason Sheldon, Wolfgang Bangerth, 2015/08/13)
+  </li>
+
+  <li> New: FE_RannacherTurek describes a discontinuous FiniteElement
+  with vanishing mean values of jumps across faces.
+  <br>
+  (Patrick Esser, 2015/08/17)
+  </li>
+
+  <li> New: FE_Q_Bubbles describes a FiniteElement based on FE_Q
+  enriched by bubble functions.
+  <br>
+  (Daniel Arndt, 2015/08/12)
+  </li>
+
+  <li> New: The testsuite now runs in a mode in which we abort programs for
+  floating point exceptions due to divisions by zero or other invalid arithmetic.
+  <br>
+  (Wolfgang Bangerth, 2015/07/29)
+  </li>
+
+  <li> New: MultithreadInfo::set_thread_limit() can now be called more than
+  once and the environment variable DEAL_II_NUM_THREADS will be respected
+  even if user code never calls it.
+  <br>
+  (Timo Heister, 2015/07/26)
+  </li>
+
+  <li> New: IndexSet now implements iterators.
+  <br>
+  (Timo Heister, 2015/07/12)
+  </li>
+
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+
+
+<ol>
+  <li> New: FunctionParser now supports <code>rand()</code> and
+  <code>rand_seed(number)</code>, which return a random value in the
+  range [0,1].
+  <br>
+  (Wolfgang Bangerth, Luca Heltai, Alberto Sartori, 2016/02/09)
+  </li>
+
+  <li> Fixed: FullMatrix::TmTmult for matrix multiplication used to compute
+  wrong results for larger matrix sizes where external BLAS is called. This
+  has been fixed.
+  <br>
+  (Martin Kronbichler, 2016/02/02)
+  </li>
+
+  <li> Fixed: parallel::distributed::Triangulation with periodic boundary
+  conditions did not respect 2:1 balance over vertices on periodic
+  boundaries. This lead to incomplete ghost layers on multigrid levels. This
+  has been fixed.
+  <br>
+  (Martin Kronbichler, Timo Heister, 2016/01/27)
+  </li>
+
+  <li> Fixed: SparseVanka now really uses second-order couplings for the
+  right-hand side of the local problems.
+  <br>
+  (Florian Sonner, 2016/01/27)
+  </li>
+
+  <li> Fixed: A bug in the Neumann boundary handling of KellyErrorEstimator
+  in 1d has been fixed and KellyErrorEstimator now correctly handles
+  codimension one problems by using the correct normals from the manifold
+  inside the gradient jump computation.
+  <br>
+  (Andrea Bonito, Timo Heister, 2016/01/21)
+  </li>
+
+  <li> New: The new class MGTransferMatrixFree implements multigrid level
+  transfer using local polynomial embedding and restriction with tensor
+  product evaluation techniques. This is a faster and less memory-demanding
+  alternative to MGTransferPrebuilt.
+  <br>
+  (Martin Kronbichler, 2016/01/20)
+  </li>
+
+  <li> New: hp::FECollection now has constructors which take
+  multiple finite elements as arguments.
+  <br>
+  (Angel Rodriguez, 2016/01/18)
+  </li>
+
+  <li> New: The glossary now contains a long entry describing what
+  the term "scalability" means in the context of finite element codes.
+  See @ref GlossParallelScaling.
+  <br>
+  (Wolfgang Bangerth, 2016/01/11)
+  </li>
+
+  <li> Fixed: Tensor::operator[] that takes TableIndices as a parameter no
+  longer returns by value, but rather by reference. Tensor::operator<< for
+  dim==0 now accesses values by reference instead of making a copy. This is
+  useful when non-trivial number types are stored.
+  <br>
+  (Jean-Paul Pelteret, 2016/01/08)
+  </li>
+
+  <li> New: constrained_linear_operator() and constrained_right_hand_side()
+  provide a generic mechanism of applying constraints to a LinearOperator.
+  A detailed explanation with example code is given in the @ref constraints
+  module.
+  <br>
+  (Mauro Bardelloni, Matthias Maier, 2015/10/25 - 2015/12/27)
+  </li>
+
+  <li> New: OpenCASCADE::read_IGES() and OpenCASCADE::read_STEP() have
+  been unified in behaviour, and now they allow to extract *all* elements of
+  the IGES and STEP files instead of only the faces. This allows the
+  use of iges files describing edges only to be used as input for some of
+  the OpenCASCADE Manifold wrappers.
+  <br>
+  (Luca Heltai, 2015/12/13)
+  </li>
+
+  <li> New: A new linear operator representing the Schur complement,
+  namely schur_complement(), has been implemented. Some auxiliary functions
+  that are often used in conjunction with the Schur complement
+  (condense_schur_rhs() and postprocess_schur_solution()) are also provided
+  as a PackagedOperation.
+  An example of this functionality can be found in
+  <code>tests/lac/schur_complement_01.cc</code>.
+  The solution of a multi-component problem (namely step-22) using the
+  schur_complement can be found in
+  <code>tests/lac/schur_complement_03.cc</code> .
+  <br>
+  (Jean-Paul Pelteret, Matthias Maier, Martin Kronbichler, 2015/12/07)
+  </li>
+
+  <li> New: There is now a function Utilities::to_string that works like
+  int_to_string, but is more safe for long integers, negative integers, and
+  also handles floating point numbers. The implementation of int_to_string
+  was changed to simply call to_string. int_to_string is kept for
+  compatibility, but should only be used for unsigned integers.
+  <br>
+  (Rene Gassmoeller, 2015/12/09)
+  </li>
+
+  <li> Fixed: GridOut::write_msh() and GridOut::write_ucd() used the same
+  geometric element numbers for lines and faces. This caused visualization
+  programs to ignore parts with repeated geometric element numbers. This is now
+  fixed.
+  <br>
+  (David Wells, 2016/01/16)
+  </li>
+
+  <li> New: DoFTools::extract_dofs() are now instantiated also for
+  codimension different from zero.
+  <br>
+  (Alberto Sartori, 2016/01/13)
+  </li>
+
+  <li> Fixed: The DataOutFaces class should now also work with triangulations
+  of type parallel::distributed::Triangulation.
+  <br>
+  (Heikki Virtanen, Wolfgang Bangerth, 2016/01/11)
+  </li>
+
+  <li> Fixed: AlignedVector<T>::fill() (and thus, Table<N,T>::reinit) did not
+  correctly call the destructor of T() and could leak memory for complicated
+  class types that depend on their constructor to free memory.
+  <br>
+  (Martin Kronbichler, 2016/01/08)
+  </li>
+
+  <li> Fixed: inverse_operator() now populates <code>Tvmult</code> and
+  <code>Tvmult_add</code> correctly.
+  <br>
+  (Jean-Paul Pelteret, David Wells, Matthias Maier, 2015/12/30)
+  </li>
+
+  <li> New: MGTransferPrebuilt with parallel adaptive refinement has been
+  finalized for parallel::distributed::Vector.
+  <br>
+  (Martin Kronbichler, 2015/12/23)
+  </li>
+
+  <li> Fixed: Now all members in the class SparseMatrixEZ are initialized
+  correctly in the constructor. This was causing random crashes before.
+  <br>
+  (Timo Heister, 2015/12/21)
+  </li>
+
+  <li> New: There is now a new class ArrayView that presents a chunk of
+  memory as if it was an array of fixed size. This is eventually going
+  to replace the VectorSlice class which suffers from the defect that
+  its template argument does not encode the type of objects it points
+  to, but instead the type of the underlying container; consequently,
+  where the VectorSlice class is used as a function argument, it
+  automatically ties the type of object the function can be called
+  with (i.e., the underlying container) even if the called function
+  has no actual use for this kind of information.
+  <br>
+  (Wolfgang Bangerth, 2015/12/20)
+  </li>
+
+  <li> Fixed: Handling of constraints in step-26 was incorrect (hanging nodes
+  were condensed twice) leading to garbage solutions. This is now fixed.
+  <br>
+  (Timo Heister, 2015/12/20)
+  </li>
+
+  <li> Fixed: The implementation of ShiftedMatrixGeneralized contained several
+  errors that prevented it from being compiled. These have now been fixed.
+  <br>
+  (David Wells, 2015/12/18)
+  </li>
+
+  <li> New: There is now a function Triangulation::get_triangulation() that
+  allows writing code to get at the underlying triangulation for
+  everything that looks like a container, i.e., both Triangulation
+  or DoFHandler objects.
+  <br>
+  (Wolfgang Bangerth, 2015/12/10)
+  </li>
+
+  <li> Deprecated: The functions DoFHandler::get_tria() and
+  hp::DoFHandler::get_tria() were deprecated. UseDoFHandler::get_triangulation() and
+  hp::DoFHandler::get_triangulation() instead.
+  <br>
+  (Wolfgang Bangerth, 2015/12/10)
+  </li>
+
+  <li> New: parallel::distributed::Vector has now a method to return a shared
+  pointer to the underlying partitioner object.
+  <br>
+  (Martin Kronbichler, 2015/12/07)
+  </li>
+
+  <li> Improved: Many more functions in namespace GridTools and class
+  InterGridMap are now consistely instantiated also for types
+  parallel::distributed::Triangulation and parallel::shared::Triangulation.
+  <br>
+  (Gennadiy Rishin, Wolfgang Bangerth, 2015/12/07)
+  </li>
+
+  <li> Improved: Both versions of SparsityTools::distribute_sparsity_pattern()
+  are now plain, not
+  template, functions. This is not a breaking change because each function was
+  instantiated for exactly one template argument.
+  <br>
+  (David Wells, 2015/12/06)
+  </li>
+
+  <li> Improved: The method
+  parallel::distributed::Triangulation::fill_vertices_with_ghost_neighbors()
+  that is used for distributing DoFs on parallel triangulations previously
+  exhibited quadratic complexity in the number of coarse grid cells. This has
+  been changed into linear complexity calls (apart from a few issues
+  inside p4est).
+  <br>
+  (Martin Kronbichler, 2015/12/05)
+  </li>
+
+  <li> New: There are now new functions
+  GridTools::build_triangulation_from_patch() and
+  GridTools::get_cells_at_coarsest_common_level() that help build patches
+  around individual cells.
+  <br>
+  (Arezou Ghesmati, 2015/12/02)
+  </li>
+
+  <li> Fixed: The GridTools::copy_boundary_to_manifold_id() function
+  only copied boundary indicators from faces, but in 3d forgot about
+  edges. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2015/11/30)
+  </li>
+
+  <li> Fixed: The constructor of SymmetricTensor that takes an array
+  of initializing elements led to a compiler error. This is now
+  fixed.
+  <br>
+  (Wolfgang Bangerth, 2015/11/28)
+  </li>
+
+  <li> Fixed: parallel::distributed::Vector now detects if the size of MPI
+  messages exceeds 2GB or if the local range exceeds the size of 32-bit
+  integers and throws an exception informing about the unsupported sizes.
+  <br>
+  (Martin Kronbichler, 2015/11/26)
+  </li>
+
+  <li> New: In 3d, GridGenerator::extract_boundary_mesh() now copies the
+  manifold ids of edges of the volume mesh to the manifold ids of the edges
+  of the extracted surface mesh.
+  <br>
+  (Wolfgang Bangerth, 2015/11/25)
+  </li>
+
+  <li> New: Triangulation::create_triangulation() now accepts subcell-data
+  objects that may include information about interior edges and faces, to
+  facilitate setting manifold indicators on interior edges and faces.
+  <br>
+  (Wolfgang Bangerth, 2015/11/25)
+  </li>
+
+  <li> Fixed: GridGenerator::extract_boundary_mesh() in 3d could generate
+  surface cells that did not uniformly had a right- or left-handed coordinate
+  system associated with them when viewed from one side of the surface. This
+  has been fixed: they now all have a right-handed coordinate system when seen
+  from one side of the surface, and a left-handed one when viewed from the
+  other side.
+  <br>
+  (Daniel Weygand, Wolfgang Bangerth, 2015/11/22)
+  </li>
+
+  <li> Fixed: Trilinos ML preconditioner is now deterministic when using
+  version 12.4 or newer.
+  <br>
+  (Timo Heister, 2015/11/16)
+  </li>
+
+  <li> New: Extra parameters to GD and Lanczos SLEPc solvers. Also added unit tests.
+  <br>
+  (Denis Davydov, 2015/11/09)
+  </li>
+
+  <li> Fixed: FETools::project_dg was adding the vector projection to
+  the output vector. Now is the output vector initialized to zero.
+  <br>
+  (Adam Kosik, 2015/11/09)
+  </li>
+
+  <li> Fixed: A compilation issue with DEAL_II_INCLUDE_DIRS not used for
+  compiling bundled boost.
+  <br>
+  (Lukas Korous, 2015/11/01)
+  </li>
+
+  <li> New: 2nd derivatives are implemented for PolynomialsBDM in 3D.
+  <br>
+  (Alistair Bentley, 2015/10/27)
+  </li>
+
+  <li> Fixed: PolynomialsBDM::degree() now returns the correct value.
+  <br>
+  (Alistair Bentley, 2015/10/24)
+  </li>
+
+  <li> New: Triangulation::set_all_manifold_ids_on_boundary(boundary_id, manifold_id)
+  which sets the manifold_id for all parts of the boundary with a given boundary_id.
+  <br>
+  (Alberto Sartori, 2015/10/22)
+  </li>
+
+  <li> Fixed: The range vectors in the construction of an
+  inverse_operator() is now reinitialised before solve calls. This ensures
+  a consistent starting point for the solver.
+  <br>
+  (Jean-Paul Pelteret, 2015/10/19)
+  </li>
+
+  <li> New: Ghost cells for the multigrid levels in
+  parallel::distributed::Triangulation are now correctly created also for
+  periodic boundary conditions.
+  <br>
+  (Martin Kronbichler, 2015/10/18)
+  </li>
+
+  <li> Fixed: GridGenerator::subdivided_parallelepiped() produced
+  invalid, unconnected meshes and wrong boundary indicators.
+  <br>
+  (Timo Heister, 2015/10/13)
+  </li>
+
+  <li> Improved: DoFTools::compute_intergrid_transfer_representation
+  can now be used with a fine grid given by a parallel::Triangulation.
+  <br>
+  (Alexander Grayver, 2015/10/09)
+  </li>
+
+  <li> New: GridIn::read_unv() can now read more element codes that
+  are used in typical meshes.
+  <br>
+  (Aslan Kosakian, 2015/10/06)
+  </li>
+
+  <li> New: FunctionParser now supports <code>pow(a,b)</code>.
+  <br>
+  (Timo Heister, 2015/09/30)
+  </li>
+
+  <li> New: DoFTools::locally_relevant_dofs_per_subdomain() can be used
+  to extract an IndexSet of locally relevant DoFs for a Triangulation
+  partitioned using METIS or with a parallel::shared::Triangulation .
+  <br>
+  (Jean-Paul Pelteret, 2015/09/24)
+  </li>
+
+  <li> Fixed: hp::SolutionTransfer could get confused when dealing with
+  FE_Nothing elements. This is now fixed.
+  <br>
+  (Claire Bruna-Rosso, Wolfgang Bangerth, 2015/09/23)
+  </li>
+
+  <li> Improved: The construction of the non-local graph for quick data
+  exchange of TrilinosWrappers::SparseMatrix became very slow for a few
+  thousand processors. This has been fixed.
+  <br>
+  (Martin Kronbichler, 2015/09/22)
+  </li>
+
+  <li> Improved: Initializing a TrilinosWrappers::SparseMatrix from a
+  DynamicSparsityPattern included some O(global_size) operations. These have
+  been replaced by operations only on the local range.
+  <br>
+  (Martin Kronbichler, 2015/09/22)
+  </li>
+
+  <li> Changed: All doxygen-generated pages now contain a link to the
+  tutorial in their top-level menus.
+  <br>
+  (Wolfgang Bangerth, 2015/09/13)
+  </li>
+
+  <li>Cleanup: Constructors of AdditionalData in various linear solvers are now marked
+  explicit. This avoid bugs with implicit conversions like the one fixed in step-40.
+  <br>
+  (Timo Heister, Lei Qiao, 2015/09/09)
+  </li>
+
+  <li>New: Introduced third-order derivatives of the shape functions, which
+  can now be accessed through FEValues and FEValuesViews using similar interfaces
+  as shape_values, shape_derivatives and shape_hessians.
+  (Maien Hamed, 2015/09/08)
+  </li>
+
+  <li>Cleanup: TableIndices<N> can now be used (constructed and accessed)
+  with N > 7.
+  <br>
+  (Matthias Maier, 2015/09/08)
+  </li>
+
+  <li>New: std::begin and std::end are now available within the std_cxx11
+  namespace through <base/std_cxx11/iterator.h>
+  <br>
+  (Matthias Maier, 2015/09/08)
+  </li>
+
+  <li>New: MappingQ1Eulerian was not instantiated for the various
+  Trilinos vector types. It is now instantiated for the same
+  vector types as MappingQEulerian is.
+  <br>
+  (Wolfgang Bangerth, 2015/09/08)
+  </li>
+
+  <li>New: Introduced Hessian-related functions to the Function class.
+  <br>
+  (Denis Davydov, 2015/09/08)
+  </li>
+
+  <li>New: Memory consumption during compilation has been reduced by splitting
+  instantiation files. For this make_instantiations now supports additional
+  logic to split the the instantiations in .inst files into groups. This is
+  used in fe_values.cc, error_estimator.cc, and others.
+  <br>
+  (Timo Heister, 2015/09/05)
+  </li>
+
+  <li> Improved: Allow continuation lines in ParameterHandler. Any line in a
+  parameter file ending with a <tt>\\</tt> will now be combined with the next
+  line; see ParameterHandler's documentation for more information.
+  <br>
+  (Alberto Sartori, 2015/09/04, David Wells, 2016/01/18-2016/01/28)
+  </li>
+
+  <li> New: There is now a function SparsityPattern::print_svg() which prints
+  the sparsity of the matrix in a .svg file which can be opened in a web
+  browser.
+  <br>
+  (Conrad Clevenger, 2015/09/03)
+  </li>
+
+  <li> Openmp SIMD support is now enabled for Clang version 3.6, or newer
+  (or the equivalent XCode version). Further, openmp support is not any
+  more falsely activated for very old clang versions.
+  <br>
+  (Matthias Maier, 2015/09/03)
+  </li>
+
+  <li> Improved: DoFTools::make_hanging_node_constraints() now supports
+  hp-refinement cases when neither_element_dominates. To that end we look for
+  a least face dominating FE inside FECollection.
+  <br>
+  (Denis Davydov, 2015/09/02)
+  </li>
+
+  <li> Changed: FEValues::transform() has been deprecated. The functionality
+  of this function is a (small) subset of what the Mapping classes
+  already provide.
+  <br>
+  (Wolfgang Bangerth, 2015/09/02)
+  </li>
+
+  <li> New: introduced hp::FECollection::find_least_face_dominating_fe(const
+  std::set<unsigned int> &fes) which aims to find the least dominating finite
+  element w.r.t. those provided as fe_indices in @p fes.
+  <br>
+  (Denis Davydov, Wolfgang Bangerth, 2015/08/31)
+  </li>
+
+  <li> New: step-6 now has an additional subsection in the
+  "Possibilities for extensions" section that discusses how
+  to create a better mesh.
+  <br>
+  (Konstantin Ladutenko, Wolfgang Bangerth, 2015/08/31)
+  </li>
+
+  <li> New: Introduce an option for FE_Nothing to dominate any other FE.
+  Therefore at interfaces where, for example, a Q1 meets an FE_Nothing,
+  we will force the traces of the two functions to be the same. Because the
+  FE_Nothing encodes a space that is zero everywhere, this means that the Q1
+  field will be forced to become zero at this interface.
+  <br>
+  (Denis Davydov, 2015/08/31)
+  </li>
+
+  <li> Fixed: VectorTools::integrate_difference() for VectorTools::Hdiv_seminorm
+  was computed incorrectly.
+  <br>
+  (Timo Heister, 2015/08/31)
+  </li>
+
+  <li> New: Jacobian second and third derivatives are now computed by the mapping classes and can be
+  accessed through FEValues in much the same way as the Jacobian and Jacobian gradient.
+  <br>
+  (Maien Hamed, 2015/08/28-2015/08/31)
+  </li>
+
+  <li> Fixed: The GridIn class was not instantiated for the
+  <code>dim==1,spacedim==3</code> case. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, 2015/08/25)
+  </li>
+
+  <li> Fixed: In 1d, GridIn::read_msh() ignored boundary indicators
+  associated with vertices. This is now fixed.
+  <br>
+  (Jan Stebel, Wolfgang Bangerth, 2015/08/25)
+  </li>
+
+  <li> New: There are now a collection of functions named GridTools::compute_active_cell_halo_layer()
+  that determine which cells form a layer around a specified subdomain. There is also a function
+  GridTools::compute_ghost_cell_halo_layer() that returns the smallest layer of ghost cells around
+  all locally relevant cells.
+  <br>
+  (Jean-Paul Pelteret, Denis Davydov, Wolfgang Bangerth, 2015/08/21)
+  </li>
+
+  <li> Documentation: How to set up a testsuite in a user project is now
+  properly documented.
+  <br>
+  (Matthias Maier, 2015/08/01 - 2015/08/20)
+  </li>
+
+  <li> Fixed: The computation of gradients in FE_PolyTensor and its derived classes (in particular in
+  FE_RaviartThomas and FE_Nedelec) forgot to account for terms that appear on non-affine
+  cells. Consequently, the computed gradients did not match the actual derivatives of the values
+  these elements report. This is now fixed.
+  <br>
+  (Maien Hamed, 2015/08/18-2015/08/20)
+  </li>
+
+  <li> Improved: Generalized conversion between Tensor<order+1,dim> and
+  DerivativeForm<order,dim,dim> to general order using converting constructor
+  and assignment operator.
+  <br>
+  (Maien Hamed, 2015/08/01-2015/08/09)
+  </li>
+
+  <li> Changed: The function Vector::ratio() and the corresponding
+  functions in other vector classes have been deprecated.
+  <br>
+  (Wolfgang Bangerth, Bruno Turcksin, 2015/08/13)
+  </li>
+
+  <li> New: Direct support for Abaqus mesh files has been added to the GridIn
+  class through the function GridIn::read_abaqus().
+  <br>
+  (Jean-Paul Pelteret, Timo Heister,  Krzysztof Bzowski, 2015/08/12)
+  </li>
+
+  <li> Improved: Finite elements now compute hessians analytically rather than
+  by finite differencing.
+  <br>
+  (Maien Hamed, 2015/08/01-2015/08/09)
+  </li>
+
+  <li> New: The InterpolatedTensorProductGridData::gradient() function
+  is now implemented.
+  <br>
+  (Daniel Shapero, 2015/08/12)
+  </li>
+
+  <li> New: There is now a function Mapping::project_real_point_to_unit_point_on_face()
+  that calls Mapping::transform_real_to_unit_cell() and then projects the
+  result to a provided face.
+  <br>
+  (Jason Sheldon, 2015/08/11)
+  </li>
+
+  <li> New: FEFaceValues and FESubfaceValues can now also compute
+  gradients of the Jacobian of the transformation from unit to real cell,
+  controlled by update_jacobian_grads.
+  <br>
+  (Martin Kronbichler, 2015/08/08)
+  </li>
+
+  <li> New: There is now a function MemoryConsumption::memory_consumption()
+  for std_cxx11::unique_ptr arguments.
+  <br>
+  (Wolfgang Bangerth, 2015/08/07)
+  </li>
+
+  <li> Improved: CMake configuration: The DEAL_II_ADD_TEST now also
+  supports unit tests writing to stdout and stderr. Further, a second test
+  type consisting of an internal executable target, a configuration and a
+  comparison file is now supported.
+  <br>
+  (Matthias Maier, 2015/08/03)
+  </li>
+
+  <li> New: VtkFlags now stores a parameter describing the compression level
+  zlib uses when writing compressed output. For small problems, the flag
+  ZlibCompressionLevel::best_speed can make the call to write_vtu many times
+  faster.
+  <br>
+  (David Wells, 2015/08/03)
+  </li>
+
+  <li> Improved: The conversion Epetra_Map -> IndexSet is now an O(1)
+  operation for contiguous index ranges, improving over the old O(N) behavior.
+  <br>
+  (Martin Kronbichler, 2015/07/30)
+  </li>
+
+  <li> Changed: The initialization methods of TrilinosWrappers::SparseMatrix,
+  TrilinosWrappers::BlockSparseMatrix, TrilinosWrappers::SparsityPattern, and
+  TrilinosWrappers::BlockSparsityPattern with Epetra_Map arguments have been
+  marked as deprecated. Use the functions with IndexSet argument instead.
+  <br>
+  (Martin Kronbichler, Luca Heltai, 2015/07/30)
+  </li>
+
+  <li> New: FESystem now does some work in parallel if your system
+  has multiple processors.
+  <br>
+  (Wolfgang Bangerth, 2015/07/19)
+  </li>
+
+  <li> Fixed: When using FESystem with base elements that require
+  information other than the determinant of the Jacobian (e.g.,
+  elements that require the Jacobian itself), then this information
+  was not passed down to FiniteElement::fill_fe_values of the
+  base element. This is now fixed.
+  <br>
+  (Wolfgang Bangerth, Zhen Tao, 2015/07/17)
+  </li>
+
+  <li> New: The parallel::distributed::Triangulation can now be told to
+  partition the cells so that the sum of certain weights associated with each
+  cell, rather than the number of cells, is roughly constant between processors.
+  This is done by passing a vector of weights to the function that repartitions
+  the triangulation, parallel::distributed::Triangulation::repartition().
+  <br>
+  (Wolfgang Bangerth, 2015/07/14)
+  </li>
+
+  <li> New: DataOutBase::TecplotFlags now takes a third argument for solution
+  time which is useful to visualize transient data. If a user sets a non-negative
+  time, it will be saved into the tecplot file.
+  <br>
+  (Praveen Chandrashekar, 2015/08/30)
+  </li>
+
+</ol>
+
+*/
diff --git a/doc/news/8.4.0-vs-8.4.1.h b/doc/news/8.4.0-vs-8.4.1.h
new file mode 100644
index 0000000..e6ae37c
--- /dev/null
+++ b/doc/news/8.4.0-vs-8.4.1.h
@@ -0,0 +1,39 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ at page changes_between_8_4_0_and_8_4_1 Changes between Version 8.4.0 and 8.4.1
+
+<p>
+This is the list of changes made between the release of deal.II version
+8.4.0 and that of 8.4.1. All entries are signed with the names of the
+author.
+</p>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+<ol>
+  <li> Fixed: Compilation with older versions of gcc (4.4)
+  is now possible again.
+  <br>
+  (Bruno Turcksin, 2016/03/17)
+  </li>
+</ol>
+
+*/
diff --git a/doc/news/changes.h b/doc/news/changes.h
new file mode 100644
index 0000000..4614021
--- /dev/null
+++ b/doc/news/changes.h
@@ -0,0 +1,62 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/**
+ at page changes_after_8_4_1 Changes after Version 8.4.1
+
+<p>
+This is the list of changes made after the release of deal.II version
+8.4.1. All entries are signed with the names of the authors.
+</p>
+
+
+
+<!-- ----------- INCOMPATIBILITIES ----------------- -->
+
+<a name="incompatible"></a>
+<h3 style="color:red">Incompatibilities</h3>
+
+<p style="color:red">
+Following are a few modifications to the library that unfortunately
+are incompatible with previous versions of the library, but which we
+deem necessary for the future maintainability of the
+library. Unfortunately, some of these changes will require
+modifications to application programs. We apologize for the
+inconvenience this causes.
+</p>
+
+<ol>
+</ol>
+
+
+<!-- ----------- GENERAL IMPROVEMENTS ----------------- -->
+
+<a name="general"></a>
+<h3>General</h3>
+
+<ol>
+</ol>
+
+
+<!-- ----------- SPECIFIC IMPROVEMENTS ----------------- -->
+
+<a name="specific"></a>
+<h3>Specific improvements</h3>
+
+
+<ol> 
+</ol>
+
+*/
diff --git a/doc/pictures/background-grid.jpg b/doc/pictures/background-grid.jpg
new file mode 100644
index 0000000..973f7a9
Binary files /dev/null and b/doc/pictures/background-grid.jpg differ
diff --git a/doc/pictures/deal.II-text.jpg b/doc/pictures/deal.II-text.jpg
new file mode 100644
index 0000000..47260d8
Binary files /dev/null and b/doc/pictures/deal.II-text.jpg differ
diff --git a/doc/pictures/fail.gif b/doc/pictures/fail.gif
new file mode 100644
index 0000000..d52c94a
Binary files /dev/null and b/doc/pictures/fail.gif differ
diff --git a/doc/pictures/grid.1.gif b/doc/pictures/grid.1.gif
new file mode 100755
index 0000000..35cc473
Binary files /dev/null and b/doc/pictures/grid.1.gif differ
diff --git a/doc/pictures/grid.4.gif b/doc/pictures/grid.4.gif
new file mode 100755
index 0000000..aa5ff1b
Binary files /dev/null and b/doc/pictures/grid.4.gif differ
diff --git a/doc/pictures/hex.fig b/doc/pictures/hex.fig
new file mode 100644
index 0000000..ca5e0be
--- /dev/null
+++ b/doc/pictures/hex.fig
@@ -0,0 +1,123 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 1 0 1 0 11 50 -1 20 0.000 1 0.0000 1350 3780 270 450 1350 3780 1620 3780
+1 1 0 1 0 11 50 -1 20 0.000 1 0.0000 4050 2880 450 450 4050 2880 4500 2430
+1 1 0 1 0 11 50 -1 20 0.000 1 5.4978 3330 5580 270 450 3330 5580 3600 5580
+1 1 0 1 0 11 50 -1 20 0.000 1 5.4978 11655 1800 270 450 11655 1800 11925 1800
+1 1 0 1 0 11 50 -1 20 0.000 1 0.0000 13455 3690 270 450 13455 3690 13725 3690
+1 1 0 1 0 11 50 -1 20 0.000 1 0.0000 10710 4365 450 450 10710 4365 11160 3915
+1 1 0 1 0 29 50 -1 20 0.000 1 4.7124 3987 1111 225 315 3987 1111 4212 1426
+1 1 0 1 0 29 50 -1 20 0.000 1 4.7124 3960 4725 225 315 3960 4725 4185 5040
+1 1 0 1 0 29 50 -1 20 0.000 1 4.7124 2610 6300 225 315 2610 6300 2835 6615
+1 1 0 1 0 29 50 -1 20 0.000 1 4.7124 12330 1035 225 315 12330 1035 12555 1350
+1 1 0 1 0 29 50 -1 20 0.000 1 4.7124 10890 2475 225 315 10890 2475 11115 2790
+1 1 0 1 0 29 50 -1 20 0.000 1 4.7124 10800 6300 225 315 10800 6300 11025 6615
+1 1 0 1 0 29 50 -1 20 0.000 1 3.1416 8775 4365 225 315 8775 4365 8460 4590
+1 1 0 1 0 29 50 -1 20 0.000 1 3.1416 12825 4410 225 315 12825 4410 12510 4635
+1 1 0 1 0 29 50 -1 20 0.000 1 3.1416 14175 3060 225 315 14175 3060 13860 3285
+1 1 0 1 0 29 50 -1 20 0.000 1 3.1416 2250 3060 225 315 2250 3060 1935 3285
+1 1 0 1 0 29 50 -1 20 0.000 1 3.1416 5850 3060 225 315 5850 3060 5535 3285
+1 1 0 1 0 29 50 -1 20 0.000 1 3.1416 450 4410 225 315 450 4410 135 4635
+1 1 0 1 0 29 50 -1 20 0.000 1 5.4978 9585 1800 225 315 9585 1800 9810 1800
+1 1 0 1 0 29 50 -1 20 0.000 1 5.4978 13455 1800 225 315 13455 1800 13680 1800
+1 1 0 1 0 29 50 -1 20 0.000 1 5.4978 13500 5535 225 315 13500 5535 13725 5535
+1 1 0 1 0 29 50 -1 20 0.000 1 5.4978 5085 5580 225 315 5085 5580 5310 5580
+1 1 0 1 0 29 50 -1 20 0.000 1 5.4978 1260 5580 225 315 1260 5580 1485 5580
+1 1 0 1 0 29 50 -1 20 0.000 1 5.4978 1350 1800 225 315 1350 1800 1575 1800
+2 1 0 1 0 11 49 -1 20 0.000 0 0 -1 1 0 2
+	1 1 1.00 120.00 240.00
+	 7425 3150 8100 3150
+2 1 0 1 0 11 49 -1 20 0.000 0 0 -1 1 0 2
+	1 1 1.00 120.00 240.00
+	 7425 3150 7425 2475
+2 1 0 1 0 11 49 -1 20 0.000 0 0 -1 1 0 2
+	1 1 1.00 120.00 240.00
+	 7425 3150 6975 3600
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 10935 1440 10260 1440 10260 765 10935 765 10935 1440
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 14535 1440 13815 1440 13815 720 14535 720 14535 1440
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 14490 5130 13815 5130 13815 4410 14490 4410 14490 5130
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 9180 6660 8505 6660 8505 5985 9180 5985 9180 6660
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 13140 6660 12465 6660 12465 5985 13140 5985 13140 6660
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 9180 2880 8505 2880 8505 2205 9180 2205 9180 2880
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 2610 1440 1935 1440 1935 765 2610 765 2610 1440
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 6210 1440 5490 1440 5490 720 6210 720 6210 1440
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 6165 5130 5490 5130 5490 4410 6165 4410 6165 5130
+2 3 0 3 0 7 51 -1 -1 0.000 0 0 -1 0 0 5
+	 2250 1125 2250 4725 5850 4725 5850 1125 2250 1125
+2 3 0 3 0 7 51 -1 -1 0.000 0 0 -1 0 0 5
+	 2250 4725 450 6300 4500 6300 5850 4725 2250 4725
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 855 6660 180 6660 180 5985 855 5985 855 6660
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 4815 6660 4140 6660 4140 5985 4815 5985 4815 6660
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 2565 5040 1890 5040 1890 4365 2565 4365 2565 5040
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 855 2880 180 2880 180 2205 855 2205 855 2880
+2 3 0 3 0 7 51 -1 -1 0.000 0 0 -1 0 0 5
+	 450 2475 450 6300 2250 4725 2250 1125 450 2475
+2 3 0 3 0 7 51 -1 -1 0.000 0 0 -1 0 0 5
+	 8775 2475 8775 6300 12825 6300 12825 2475 8775 2475
+2 3 0 3 0 7 51 -1 -1 0.000 0 0 -1 0 0 5
+	 10530 1035 8775 2475 12825 2475 14175 1035 10530 1035
+2 3 0 3 0 7 51 -1 -1 0.000 0 0 -1 0 0 5
+	 12825 2475 12825 6300 14175 4725 14175 1035 12825 2475
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 13140 2835 12465 2835 12465 2160 13140 2160 13140 2835
+4 1 0 49 -1 0 20 0.0000 4 195 150 7650 2475 y\001
+4 1 0 49 -1 0 20 0.0000 4 135 150 8100 3375 x\001
+4 1 0 49 -1 0 20 0.0000 4 135 135 7200 3825 z\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 14175 4860 1\001
+4 1 0 49 -1 0 24 0.0000 4 255 180 14175 3150 1\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 8865 6480 4\001
+4 1 0 49 -1 0 30 0.0000 4 330 225 12825 6480 5\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 8865 2700 6\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 10575 1260 2\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 14175 1260 3\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 5850 4860 1\001
+4 1 0 49 -1 0 24 4.7124 4 255 180 3870 1125 3\001
+4 1 0 49 -1 0 24 0.0000 4 255 180 5850 3150 1\001
+4 1 0 49 -1 0 24 4.7124 4 240 180 3870 4725 2\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 2250 4860 0\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 540 6480 4\001
+4 1 0 49 -1 0 30 0.0000 4 330 225 4500 6480 5\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 540 2700 6\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 2250 1260 2\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 5850 1260 3\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 1350 3960 0\001
+4 1 0 49 -1 0 24 0.0000 4 240 180 2250 3150 0\001
+4 1 0 49 -1 0 30 5.4978 4 315 225 3240 5670 2\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 12825 2655 0\001
+4 1 0 49 -1 0 30 0.0000 4 330 225 10710 4545 5\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 4050 3060 4\001
+4 1 0 49 -1 0 30 5.4978 4 315 225 11565 1890 3\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 13455 3870 1\001
+4 1 0 49 -1 0 24 4.7124 4 255 180 12195 1035 3\001
+4 1 0 49 -1 0 24 0.0000 4 240 180 450 4500 4\001
+4 1 0 49 -1 0 24 4.7124 4 240 180 2520 6300 6\001
+4 1 0 49 -1 0 24 0.0000 4 240 180 8775 4500 4\001
+4 1 0 49 -1 0 24 0.0000 4 270 180 12825 4500 5\001
+4 1 0 49 -1 0 24 4.7124 4 240 180 10755 2475 7\001
+4 1 0 49 -1 0 24 4.7124 4 240 180 10710 6300 6\001
+4 1 0 49 -1 0 24 5.4978 4 240 180 1170 5670 8\001
+4 1 0 49 -1 0 24 5.4978 4 240 180 5040 5670 9\001
+4 1 0 49 -1 0 24 5.4978 4 255 360 1305 1890 10\001
+4 1 0 49 -1 0 24 5.4978 4 255 360 13365 1890 11\001
+4 1 0 49 -1 0 24 5.4978 4 255 360 9540 1890 10\001
+4 1 0 49 -1 0 24 5.4978 4 240 180 13410 5625 9\001
diff --git a/doc/pictures/ok.gif b/doc/pictures/ok.gif
new file mode 100644
index 0000000..5532a9b
Binary files /dev/null and b/doc/pictures/ok.gif differ
diff --git a/doc/pictures/quad.fig b/doc/pictures/quad.fig
new file mode 100644
index 0000000..de8d86e
--- /dev/null
+++ b/doc/pictures/quad.fig
@@ -0,0 +1,39 @@
+#FIG 3.2
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 1 0 1 0 11 50 -1 20 0.000 1 0.0000 2250 2970 270 450 2250 2970 2520 2970
+1 1 0 1 0 11 50 -1 20 0.000 1 4.7124 4050 1125 270 450 4050 1125 4050 1395
+1 1 0 1 0 11 50 -1 20 0.000 1 4.7124 4050 4725 270 450 4050 4725 4050 4995
+1 1 0 1 0 11 50 -1 20 0.000 1 0.0000 5850 2970 270 450 5850 2970 6120 2970
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 2565 5040 1890 5040 1890 4365 2565 4365 2565 5040
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 2610 1440 1935 1440 1935 765 2610 765 2610 1440
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 6210 1440 5490 1440 5490 720 6210 720 6210 1440
+2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5
+	 6165 5130 5490 5130 5490 4410 6165 4410 6165 5130
+2 3 0 3 0 7 51 -1 -1 0.000 0 0 -1 0 0 5
+	 2250 1125 2250 4725 5850 4725 5850 1125 2250 1125
+2 1 0 1 0 11 49 -1 20 0.000 0 0 -1 1 0 2
+	1 1 1.00 120.00 240.00
+	 3600 3150 4275 3150
+2 1 0 1 0 11 49 -1 20 0.000 0 0 -1 1 0 2
+	1 1 1.00 120.00 240.00
+	 3600 3150 3600 2475
+4 1 0 49 -1 0 30 0.0000 4 315 225 2250 4860 0\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 5850 4860 1\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 2250 1260 2\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 5850 1260 3\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 2250 3150 0\001
+4 1 0 49 -1 0 30 4.7124 4 315 225 3870 1125 3\001
+4 1 0 49 -1 0 30 0.0000 4 315 225 5850 3150 1\001
+4 1 0 49 -1 0 30 4.7124 4 315 225 3870 4725 2\001
+4 1 0 49 -1 0 20 0.0000 4 195 150 3375 2925 y\001
+4 1 0 49 -1 0 20 0.0000 4 135 150 3825 3375 x\001
diff --git a/doc/pictures/title-background.jpg b/doc/pictures/title-background.jpg
new file mode 100644
index 0000000..6492cce
Binary files /dev/null and b/doc/pictures/title-background.jpg differ
diff --git a/doc/readme.html b/doc/readme.html
new file mode 100644
index 0000000..27f46de
--- /dev/null
+++ b/doc/readme.html
@@ -0,0 +1,831 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>The deal.II Readme</title>
+    <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
+    <link href="screen.css" rel="StyleSheet"/>
+    <meta name="copyright" content="Copyright (C) 1998 - 2015 by the deal.II Authors"/>
+    <meta name="keywords" content="deal.II"/>
+  </head>
+
+  <body>
+
+
+
+<h1>Installation instructions and further information
+  on <acronym>deal.II</acronym> </h1>
+
+<div class="toc">
+  <ol>
+    <li> <a href="#prerequisites">System requirements</a>
+      <ol>
+	<li> <a href="#supported">Supported platforms</a></li>
+	<li> <a href="#additional-software">Additional software requirements</a></li>
+      </ol>
+    </li>
+    <li> <a href="#installation">Installation</a>
+      <ol>
+	<li>  <a href="#unpacking">Unpacking</a></li>
+	<li>  <a href="#configuration">Configuring and building the library</a></li>
+	<li>  <a href="#documentation">Configuring and building the documentation</a></li>
+	<li>  <a href="#configuration-options">Configuration options</a>
+	  <ol>
+	    <li> <a href="#optional">Selecting optional behavior</a></li>
+	    <li> <a href="#optional-software">Optional interfaces to
+		other software packages</a></li>
+	    <li> <a href="#conf-details">More information on configuring
+		and building the library</a></li>
+	  </ol>
+	</li>
+      </ol>
+    </li>
+    <li> <a href="#license">License</a></li>
+  </ol>
+</div>
+
+<a name="prerequisites"/>
+<h2>System requirements</h2>
+
+<a name="supported"/>
+<h3>Supported platforms</h3>
+
+<p>
+  <acronym>deal.II</acronym> is mostly developed on Linux using the
+  <a href="http://gcc.gnu.org">GCC</a> compiler. However, it is not
+  platform specific and we strive to keep the source code C++ Standard
+  compliant.
+</p>
+
+<p>
+  <acronym>deal.II</acronym> supports at least the following
+  platforms:
+</p>
+<ul>
+  <li>GNU/Linux: GCC version 4.6 or later; Clang version 3.3 or later;
+  ICC versions 15 or later</li>
+  <li>Mac OS X: GCC version 4.6 or later; Clang version 3.3 or later.
+    Please see the <a href="https://github.com/dealii/dealii/wiki/MacOSX"
+    target="_top">deal.II Wiki</a> for installation instructions.</li>
+  <li>Windows: experimental support for Visual Studio 2013 and 2015.
+    Please have a look at the
+    <a href="https://github.com/dealii/dealii/wiki/Frequently-Asked-Questions#can-i-use-dealii-on-a-windows-platform">
+      FAQ</a> and at the <a href="https://github.com/dealii/dealii/wiki/Windows"
+    target="_top">deal.II Wiki</a> for more information and alternative solutions.</li>
+  </li>
+</ul>
+
+<p>
+  Most other combinations of POSIX-style operating systems and C++
+  Standard compliant compilers should also work. <i>If they don't,
+  please report it as a bug.</i>
+</p>
+
+
+<a name="additional-software"></a>
+<h3>Additional software requirements</h3>
+
+    <p>
+      In order to compile and use <acronym>deal.II</acronym>
+      you need to have the following programs installed:
+    </p>
+    <ul>
+      <li>
+        <a href="http://www.cmake.org/" target="_top">CMake</a> version 2.8.8 or later
+      </li>
+
+      <li>
+        <a href="http://www.gnu.org/software/make/" target="_top">GNU make</a>, version
+        3.78 or later (or any other generator supported by CMake)
+      </li>
+
+      <li>
+        For generating the documentation:
+        <a href="http://www.perl.org/" target="_top">Perl 5.x</a>,
+	<a href="http://www.doxygen.org/" target="_top">doxygen</a>
+        and <code>dot</code>, which is part of the
+	<a href="http://www.graphviz.org" target="_top">GraphViz</a>
+        package
+      </li>
+
+      <li>
+          For debugging programs, we have found that the
+	  <a href="http://www.gnu.org/software/gdb/" target="_top">GNU
+	    debugger GDB</a> is an invaluable tool. GDB is a text-based tool
+	    not always easy to use; <a href="http://www.kdbg.org/"
+	    target="_top">kdbg</a>, is one of many graphical user interfaces
+	    for it. Most integrated development environments
+	    like <a href="http://www.kdevelop.org/" target="_top">kdevelop</a>
+	    or <a href="http://eclipse.org/" target="_top">Eclipse</a> have built
+	    in debuggers as well. <acronym>deal.II</acronym> has some support for
+	    pretty printing its own classes through GDB;
+	    see <a href="users/gdb.html">the GDB configuration guide</a> for
+	    setup information.
+	</li>
+
+      <li> <p>
+        The library generates output in formats readable by
+        <a href="http://www.gnuplot.info/" target="_top">GNUPLOT</a>,
+	<a href="http://www.generalmeshviewer.com/" target="_top">GMV
+ 	  (general mesh viewer)</a>,
+	<a href="http://www.tecplot.com/" target="_top">Tecplot</a> (ASCII and binary),
+        <a href="http://www.vtk.org/"
+           target="_top">Visualization Toolkit (Vtk)</a>,
+	<a href="http://www.avs.com" target="_top">AVS Explorer</a>,
+	<a href="http://www.opendx.org" target="_top">Open DX</a>,
+	<a href="http://www.povray.org" target="_top">Povray</a>,
+        and directly to Encapsulated Postscript.
+	</p>
+
+        <p>
+          <code>gnuplot</code>
+          and a postscript viewer (for <code>eps</code>) should be
+          available almost everywhere. In the last few years, most
+          new visualization programs have moved to support
+          <code>vtk</code>/<code>vtu</code> format. There are a number
+          of excellent programs that can read <code>vtk</code> and
+          <code>vtu</code>, such as
+          <a href="http://www.llnl.gov/visit/" target="_top">Visit</a>,
+          <a href="http://www.paraview.org/" target="_top">ParaView</a>,
+	  as well as others. Povray is freely available for almost all
+	  platforms. AVS is a commercial program available for most Unix
+	  flavors. Tecplot is a commercial program available for Windows
+	  and most Unix platforms.
+	</p>
+	<p>
+	  In case you didn't find your favorite graphics format above,
+	  adding a new writer to <acronym>deal.II</acronym> is not too
+	  difficult, as only a simple intermediate format needs to be
+	  converted into output (without references to cells, nodes,
+	  types of finite elements, and the like).
+	</p>
+      </li>
+    </ul>
+
+    <a name="installation"></a>
+    <h2>Installation</h2>
+
+    <a name="unpacking"></a>
+    <h3>Unpacking</h3>
+
+    <p>
+      The whole library usually comes as a <code>tar.gz</code> file,
+      which is a file archive compressed with gzip. After downloading it,
+      unpack it using either
+    </p>
+<pre>
+  gunzip deal.II-X.Y.Z.tar.gz
+  tar xf deal.II-X.Y.Z.tar
+</pre>
+<p>or, if you have GNU tar with</p>
+<pre>
+  tar -xvf deal.II-X.Y.Z.tar.gz
+</pre>
+
+<p><b>Note:</b> You will want to hang on to the source files
+  of <acronym>deal.II</acronym> after installation as it makes
+  developing much simpler. Consequently, you should do the steps above
+  in a permanent directory, not on <code>/tmp</code> as one often does
+  when installing software.
+</p>
+
+    <a name="configuration"></a>
+    <h3>Configuring and building the library</h3>
+
+    <p>
+      <acronym>deal.II</acronym> uses the
+      <a href="http://www.cmake.org/" target="_top">CMake</a>
+      integrated configuration and build system. Unpacking will create a
+      subdirectory <tt>deal.II/</tt> in the current directory. Then do the
+      following steps:</p>
+
+<pre>
+  mkdir build
+  cd build
+  cmake -DCMAKE_INSTALL_PREFIX=/path/to/install/dir ../deal.II
+  make install
+  make test
+</pre>
+
+<p>
+  These steps compile, link, install the deal.II library, and run a few
+  consistency checks. The whole process should take between a few minutes
+  and an hour, depending on your machine.
+</p>
+
+    <p>
+      <b>Note:</b></p>
+      <ul>
+	<li> <code>/path/to/install/dir</code> is the directory which deal.II
+	  should be installed into. This can be a directory in your home
+	  directory (e.g., <code>~/bin/deal.II</code>) or a directory
+	  such as <code>/usr/local</code> if you have root privileges.
+	  Another option is to use something like <code>`pwd`/../installed/</code> (note the
+	  backticks). Make sure the installation directory is not the same
+	  as the location where you unpacked <tt>deal.II/</tt>.
+	</li>
+
+	<li> If your machine has multiple processors, use <code>make
+	  -jN</code> in the last step, where <code>N</code> is the
+	  number of simultaneous build processes you want <code>make</code>
+	  to use at any given time. Allowing <code>make</code> to use
+	  more simultaneous build processes (assuming you have that many
+	  processor cores) will greatly lower the build time.
+	</li>
+
+	<li> If you do not intend to modify the <acronym>deal.II</acronym>
+	  sources and recompile things, then you can remove
+	  the <code>build/</code> directory after the last step.
+	</li>
+
+	<li> In principle, after installing <acronym>deal.II</acronym>, you
+	  can remove the source directory as well (i.e., the directory into
+	  which <code>tar</code> unpacked the file you downloaded) since
+	  projects using deal.II should only need files that have been
+	  installed. However, you will find it convenient to keep the source
+	  files around anyway, for one reason: When
+	  debugging you often end up with assertions for which you'd like to
+          see the place in the library's source files that triggered it.
+	</li>
+
+	<li> The <acronym>deal.II</acronym> <code>CMake</code> system can accept a
+	  significant number of configuration parameters. See the
+	  discussion <a href="#configuration-options">below</a>.
+	</li>
+
+	<li> If you are changing part of the <acronym>deal.II</acronym>
+	code itself, you can re-compile the library using only the
+	last two commands above in the previously created build
+	directory. It is also possible to change the configuration used
+	in this directory by calling <code>cmake</code> a second time,
+	possibly with different arguments.
+	However, this sometimes leads to surprising results and you may
+	not get exactly what you were hoping for. For more information,
+	see <a href="users/cmake.html">here</a>.
+	</li>
+      </ul>
+
+    <p>
+      The commands above build and install the <acronym>deal.II</acronym>
+      libraries in two variants:
+    </p>
+    <ul>
+      <li>
+	<p><i>Debug mode</i>: This version of the
+	  library is compiled with compiler flags so
+	  that the library contains information that can be used by debuggers.
+	</p>
+
+	<p>
+	  In addition, this library contains a great number of safety
+	  checks on most arguments of all functions you could possibly call.
+	  These assertions have proven to be an
+	  invaluable means to finding programming bugs since they will
+	  almost always abort your program if something goes wrong. In our
+	  experience, more than ninety per cent of all errors are invalid
+	  parameters (such as vectors having the wrong size, etc.) and they
+	  are usually found almost instantaneously, displaying the file
+	  name and line number of where the problem occurred.
+	</p>
+	</li>
+
+	<li> <i>Optimized mode</i>: You will want to link with this version of
+	    the library once you know that your program is working as
+	    expected. It does not contain the safety checks any more
+	    and is compiled with aggressive compiler optimizations. The
+	    resulting executables are smaller and will run between 2 and 10
+	    times faster than the debug executables.
+	</li>
+      </ul>
+
+    <p>
+      At this point, you have generated everything necessary to write
+      programs based on <acronym>deal.II</acronym>. If you are new to
+      <acronym>deal.II</acronym>, you may want to continue with the
+      <a href="doxygen/deal.II/Tutorial.html" target="_top">tutorial</a>.
+    </p>
+
+    <a name="documentation"></a>
+    <h3>Configuring and building the documentation</h3>
+
+    <p>
+      All the documentation about the version that you downloaded and that can
+      be found at the <a href="http://www.dealii.org/" target="_top">
+      http://www.dealii.org/</a> domain  can also be generated locally. To do
+      so, invoke <code>cmake</code> in the build instructions above as follows:
+    </p>
+
+<pre>
+  cmake -DDEAL_II_COMPONENT_DOCUMENTATION=ON -DCMAKE_INSTALL_PREFIX=/path/install/dir ../deal.II
+</pre>
+
+<p>
+  For this to succeed, you will need <a href="http://www.perl.org/"
+					target="_top">Perl 5.x</a>,
+  <a href="http://www.doxygen.org/" target="_top">doxygen</a>
+  and <code>dot</code> (which is part of
+  the <a href="http://www.graphviz.org" target="_top">GraphViz</a>
+  package) to be installed.
+</p>
+
+    <p>
+      Upon calling <code>make</code> and <code>make install</code>, this will
+      install both this readme, other installation instructions, as well as the
+      <a href="doxygen/deal.II/index.html" target="_top">manual that documents
+      all functions and classes</a> as well as
+      the <a href="doxygen/deal.II/Tutorial.html" target="_top"> tutorial
+      of well-documented example programs</a> (the "steps").
+    </p>
+
+    <p>
+      <b>Note:</b> Generating this documentation can take a <i>really long
+      time</i> — running doxygen through our hundreds of thousands of
+      lines of code can take 15-20 minutes even on a fast machine during which
+      you will not get any output from <code>make</code>.
+    </p>
+
+
+    <a name="configuration-options"></a>
+    <h3>Configuration options</h3>
+
+    <p>
+      <acronym>deal.II</acronym> has a large number of optional interfaces to
+      other libraries. <b>By default, <code>cmake</code> will automatically
+      enable support  for all external libraries it can find in default
+      paths.</b>
+      However, this behavior can be changed using command line switches to
+      the initial call to <code>cmake</code>. A detailed description
+      can be found here: <a href="users/cmake.html">Detailed build system description</a>.
+    </p>
+
+    <p>
+      In the following, let us summarize the most common configuration
+      options.
+    </p>
+
+    <a name="optional"/>
+      <h4>Selecting optional behavior</h4>
+
+    <ul>
+      <li>
+	<p>
+	  <i>Threading</i>: By default, deal.II supports parallelism between
+	  multiple cores on the same machine using threads and a
+	  task-based model built on the
+          <a href="http://threadingbuildingblocks.org/"
+	  target="_top">Threading Building Blocks</a>. You can switch
+	  threading off by passing the <code>-DDEAL_II_WITH_THREADS=OFF</code>
+	  argument to <code>cmake</code>.
+	</p>
+      </li>
+
+      <li>
+	<p>
+	  <i>MPI</i>: To enable parallel computations beyond a single node
+	  using the <a href="http://mpi-forum.org/" target="_top">Message
+	  Passing Interface (MPI)</a>, pass the
+	  <code>-DDEAL_II_WITH_MPI=ON</code> argument
+	  to <code>cmake</code>. If <code>cmake</code> found MPI but you
+	  specifically do not want to use it, then
+	  pass <code>-DDEAL_II_WITH_MPI=OFF</code>.
+	</p>
+      </li>
+
+      <li>
+        <p>
+          <i>parameter_gui</i>: If you wish to compile and install the
+          <code>parameter_gui</code> tool discussed in the documentation
+	  of the <code>ParameterHandler</code> class, specify
+          <code>-DDEAL_II_COMPONENT_PARAMETER_GUI=ON</code>. Beware of the
+          fact that <code>parameter_gui</code> needs development packages for Qt.
+	</p>
+      </li>
+
+      <li>
+  <p>
+    <i>64bit indices</i>: By default, deal.II use unsigned int (32bit)
+    indices for degrees of freedom, using
+    the <code>types::global_dof_index</code> type. This limits the number of
+    unknowns to approximately four
+    billions. If larger problem must be solved, pass the
+    <code>-DDEAL_II_WITH_64BIT_INDICES=ON</code> argument to
+    <code>cmake</code>. You will not be able to solve problems of this size on
+    a single machine, but only when using clusters of computers and linear
+    algebra packages PETSc or Trilinos.
+    To use this option with PETSc, PESTc must be compiled
+    with the option <code>--with-64-bit-indices</code>.
+  </p>
+      </li>
+    </ul>
+
+
+
+    <a name="optional-software"/>
+    <h4>Optional interfaces to other software packages</h4>
+
+    <p>
+      When configuring interfacing to external libraries, the
+      <code>cmake</code> script by default tries to find all of these
+      libraries in a number of standard locations on your file system.
+      For <i>optional</i> interfaces, it gives up if the library is not
+      found and <acronym>deal.II</acronym> will be built without support
+      for them.
+      However, there is one interface that we <i>need</i> to have: <a
+      href="http://www.boost.org/" target="_top">BOOST</a>. If it is not
+      found externally <code>cmake</code> will resort to the bundled boost
+      version that is part of the <acronym>deal.II</acronym> tar file.
+    </p>
+
+    <p>
+      The following paragraphs describe how the interfaces to the
+      various packages, <acronym>deal.II</acronym> interacts with,
+      can be configured.
+    </p>
+
+    <p><b>Notes:</b></p>
+      <ul>
+        <li>
+          <b>The majority of libraries mentioned below should be readily
+          packaged by most Linux distributions. Usually you need to
+          install a <i>development</i> version of a library package,
+          e.g. ending in <code>-dev</code> or <code>-devel</code>.
+          After that <code>cmake</code> will automatically find the
+          library and use it.</b>
+	</li>
+        <li>
+          Configuring the interface to a self compiled package,
+          say <code>foo</code> can usually be done by specifying
+          <code>-DFOO_DIR=/path/to/foo</code>.  Alternatively, you can
+          set <code>FOO_DIR</code> as an environment variable in your
+          <code>.bashrc</code> or <code>.cshrc</code> file so that
+          you do not have to enter this argument again the next time
+          you invoke <code>cmake</code> in a fresh build directory.
+          Any value passed on the command line wins over a value that
+          may be found in an environment variable.
+      </li>
+        <li>
+          To explicitly enable or disable support for
+          a library <code>foo</code> use the argument
+          <code>-DDEAL_II_WITH_FOO=ON</code> resp.
+          <code>-DDEAL_II_WITH_FOO=OFF</code> for <code>cmake</code>.
+      </li>
+    </ul>
+
+    <dl>
+      <dt><a name="ARPACK"/>
+	<a href="http://www.caam.rice.edu/software/ARPACK/" target="_top">ARPACK</a></dt>
+      <dd>
+	<p>
+	  <a href="http://www.caam.rice.edu/software/ARPACK/"
+	     target="_top">ARPACK</a> is a library for computing large
+          scale eigenvalue problems.
+          <a href="http://www.caam.rice.edu/software/ARPACK/" target="_top">ARPACK</a>
+          should be readily packaged by most Linux distributions.
+          (Don't forget to install a development version of the library).
+          To use a self compiled version, specify
+          <code>-DARPACK_DIR=/path/to/arpack</code> on the command line.
+
+	  For a detailed description on how to compile ARPACK and linking with deal.II, see
+	  <a href="external-libs/arpack.html" target="body">this page</a>.
+	</p>
+      </dd>
+
+      <dt>
+	<a name="blas"></a>
+	<a href="http://www.netlib.org/blas/" target="_top">BLAS</a>,
+	<a href="http://www.netlib.org/lapack/" target="_top">LAPACK</a>
+      </dt>
+      <dd>
+	<p>
+	  <a href="http://www.netlib.org/blas/" target="_top">BLAS</a>
+	  (the <i>Basic Linear Algebra Subroutines</i>) and
+	  <a href="http://www.netlib.org/lapack/" target="_top">LAPACK</a>
+	  (<i>Linear Algebra Package</i>) are two packages that support
+	  low-level linear algebra operations on vectors and dense
+          matrices.
+          Both libraries should be packaged by almost all Linux
+          distributions and found automatically whenever available.
+          (You might have to install development versions of the libraries
+          for <acronym>deal.II</acronym> being able to use them).
+          For details on how to set up <acronym>deal.II</acronym> with a
+          non standard BLAS/LAPACK implementation, see the
+	  <a href="users/cmake.html#advanced">advanced
+            setup</a> section in the CMake ReadME.
+	</p>
+      </dd>
+
+      <dt><a name="HDF5"></a><a href="http://www.hdfgroup.org/HDF5/">HDF5</a></dt>
+      <dd>
+	<p>
+	  The <a href="http://www.hdfgroup.org/HDF5/">HDF5 library</a>
+	  provides graphical output capabilities in <code>HDF5/XDMF</code>
+	  format.
+	  <a href="http://www.hdfgroup.org/HDF5/">HDF5</a> should be
+	  readily packaged by most Linux distributions.  (Don't forget
+          to install a development version of the library).
+	  To use a self compiled version, specify
+	  <code>-DHDF5_DIR=/path/to/hdf5</code> on the command line.
+	</p>
+      </dd>
+
+
+      <dt><a name="metis"></a><a href="http://glaros.dtc.umn.edu/gkhome/metis/metis/overview"
+	     target="_top">METIS</a></dt>
+      <dd>
+	<p>
+	  <a href="http://glaros.dtc.umn.edu/gkhome/metis/metis/overview"
+		  target="_top">METIS</a> is a library that
+	  provides various methods to partition
+	  graphs. <acronym>deal.II</acronym> uses it in programs like the
+	  step-17 tutorial to partition a mesh for parallel computing.
+	  To use a self compiled version, specify
+	  <code>-DMETIS_DIR=/path/to/metis</code> on the command line.
+	  <acronym>deal.II</acronym> supports METIS 5 and later.
+	</p>
+
+	<p>
+	  <b>Note:</b> A more modern way to support parallel computations is
+	  shown in the step-40 tutorial program and is based on
+	  the <code>p4est</code> library instead of METIS. See below on
+	  installing <code>p4est</code>.
+	</p>
+      </dd>
+
+
+      <dt>
+	<a name="muparser"></a>
+	<a href="http://muparser.beltoforion.de/">muparser</a>
+      </dt>
+      <dd>
+	<p>
+	  <a href="http://muparser.beltoforion.de/">muparser</a>
+	  is a library that allows to enter functions in text form and have them
+	  interpreted at run time. This is particularly useful in input
+	  parameter files. <code>cmake</code> will usually find the version of
+	  this library that comes bundled with <acronym>deal.II</acronym>, but you
+	  can specify <code>-DMUPARSER_DIR=/path/to/muparser</code> if desired.
+	</p>
+      </dd>
+
+
+      <dt>
+	<a name="opencascade"></a>
+	<a href="http://www.opencascade.org/">OpenCASCADE</a>
+      </dt>
+      <dd>
+	Open CASCADE Technology is a software development kit for
+	applications dealing with 3D CAD data, freely available in
+	open source. Our internal interface works with the legacy
+	version of OpenCASCADE, which you can download and install
+	from the official website, as well as with the OpenCASCADE
+	Community Edition (OCE, available at
+	<a href="https://github.com/tpaviot/oce">https://github.com/tpaviot/oce</a>),
+	which offers a cmake 
+	interface for its compilation. Alternatively, you can
+	install one of the many external applications that ship with
+	OpenCASCADE internally (for example 
+	<a href="http://www.salome-platform.org/">SALOME</a>, or 
+	<a href="http://www.freecadweb.org/">FreeCAD</a>). Further
+	installation instructions can be
+	found <a href="external-libs/opencascade.html"
+	target="body">here</a>.
+      </dd>
+
+      <dt><a name="netcdf"></a><a href="http://www.unidata.ucar.edu/software/netcdf/" target="_top">NetCDF</a></dt>
+      <dd>
+	<a href="http://www.unidata.ucar.edu/software/netcdf/"
+	   target="_top">NetCDF</a> is a library that provides services for
+	reading and writing mesh data (and many other
+	things). <acronym>deal.II</acronym> can use it to read meshes via
+        one of the functions of the <code>GridIn</code> class.
+	<a href="http://www.unidata.ucar.edu/software/netcdf/"
+           target="_top">NetCDF</a> should be readily packaged by most
+        Linux distributions. (Don't forget to install a development
+	version of the library). To use a self compiled version, pass
+	<code>-DNETCDF_DIR=/path/to/netcdf</code> to <code>cmake</code>.
+      </dd>
+
+
+      <dt><a name="p4est"></a><a href="http://www.p4est.org/" target="_top">p4est</a></dt>
+      <dd>
+	<p>
+	  <a href="http://www.p4est.org/" target="_top">p4est</a>
+	  is a library that <acronym>deal.II</acronym> uses to
+	  distribute very large meshes across multiple processors (think
+	  meshes with a billion cells on 10,000 processors). Using and
+	  installing p4est is discussed <a href="external-libs/p4est.html"
+					   target="body">here</a>.
+	  To use a self compiled version, pass the argument
+	  <code>-DP4EST_DIR=/path/to/p4est</code> to the
+	  <code>cmake</code> command.
+	</p>
+      </dd>
+
+
+      <dt><a name="petsc"></a><a href="http://www.mcs.anl.gov/petsc/"
+		 target="_top">PETSc</a></dt>
+      <dd>
+	<p>
+	  <a href="http://www.mcs.anl.gov/petsc/" target="_top">PETSc</a> is a
+	  library that supports parallel linear algebra and many other things.
+
+	  <a href="http://www.mcs.anl.gov/petsc/" target="_top">PETSc</a>
+	  is already packaged by some Linux distributions and should be
+	  found automatically if present. (Don't forget to install a
+	  development version of the library).  To use a self compiled
+	  version of PETSc, add <code>-DPETSC_DIR=/path/to/petsc
+	  -DPETSC_ARCH=architecture</code> to the argument list for
+	  <code>cmake</code>. The values for these arguments must be
+	  the same as those specified when building PETSc.
+	</p>
+
+	<p>
+	  To disable the PETSc interfaces in cases where <code>cmake</code>
+	  automatically finds it, use <code>-DDEAL_II_WITH_PETSC=OFF</code>
+	  More information on configuring and building PETSc can be
+          found <a href="external-libs/petsc.html" target="body">here</a>.
+	</p>
+      </dd>
+
+
+      <dt><a name="slepc"></a><a href="http://www.grycap.upv.es/slepc/" target="_top">SLEPc</a></dt>
+      <dd>
+	<p>
+	  <a href="http://www.grycap.upv.es/slepc/"
+	  target="_top">SLEPc</a> is a library for eigenvalue
+	  computations that builds on PETSc. Its configuration works
+	  just like that for PETSc, except that the variable to set
+	  is <code>SLEPC_DIR</code>.  For the interface with SLEPc to
+	  work, <acronym>deal.II</acronym>'s PETSc interface must also
+	  be configured correctly (see above).
+	</p>
+
+	<p>
+	  To disable the SLEPc interfaces in cases where <code>cmake</code>
+	  automatically finds it, use <code>-DDEAL_II_WITH_PETSC=OFF</code>
+	  More information on configuring and building SLEPc can be
+          found <a href="external-libs/slepc.html" target="body">here</a>.
+	</p>
+      </dd>
+
+
+      <dt><a name="tbb"></a><a href="http://www.threadingbuildingblocks.org/"
+	     target="_top">Threading Building Blocks (TBB)</a></dt>
+      <dd>
+	<p>
+	  The <a href="http://www.threadingbuildingblocks.org/"
+	     target="_top">Threading Building Blocks (TBB)</a>
+	  is a library that provides advanced services for using multiple
+	  processor cores on a single machine and is used
+	  in <acronym>deal.II</acronym> to parallelize many operations. If not
+	  found in a system-wide location, <code>cmake</code> will
+	  resort to the version bundled as part of
+	  the <acronym>deal.II</acronym> download. It is always enabled unless
+	  threads are explicitly disabled, see <a href="#optional">above</a>.
+	</p>
+      </dd>
+
+
+      <dt><a name="trilinos"></a><a href="http://trilinos.sandia.gov" target="_top">Trilinos</a></dt>
+      <dd>
+	<p>
+	  <a href="http://trilinos.sandia.gov" target="_top">Trilinos</a> is a
+	  library for parallel linear algebra and all sorts of other things as
+	  well.
+
+	  To interface with a self compiled version of <a
+	  href="http://trilinos.sandia.gov" target="_top">Trilinos</a>
+	  add <code>-DTRILINOS_DIR=/path/to/trilinos</code> to the
+	  argument list for <code>cmake</code>.  Alternatively, you can
+	  also set an environment variable <code>TRILINOS_DIR</code>
+	  and <code>cmake</code> will pick up this path.
+	</p>
+
+	<p>
+	  To disable the Trilinos interfaces in cases where
+	  <code>cmake</code> automatically finds it, use
+	  <code>-DDEAL_II_WITH_TRILINOS=OFF</code> More details about
+	  compatibility and configuration can be found
+          <a href="external-libs/trilinos.html" target="body">here</a>.
+	</p>
+      </dd>
+
+
+      <dt><a name="umfpack"></a><a href="http://www.cise.ufl.edu/research/sparse/umfpack/"
+	     target="_top">UMFPACK</a></dt>
+      <dd>
+	<p>
+	  <a href="http://www.cise.ufl.edu/research/sparse/umfpack/"
+	     target="_top">UMFPACK</a> is a sparse direct solver that we often
+	  use in prototype codes where the goal is to simply get a linear
+          system solved robustly.
+	  The interface will be enabled by default, either using a version
+	  installed on your system of using a version that comes bundled
+	  with <acronym>deal.II</acronym>.
+	  It can be disabled explicitly by using the
+	  <code>-DDEAL_II_WITH_UMFPACK=OFF</code> argument.
+	  To use a self compiled version, pass the argument
+	  <code>-DUMFPACK_DIR=/path/to/umfpack</code> to the
+	  <code>cmake</code> command.
+	</p>
+
+	<p>
+	  UMFPACK has its own license. To use it with deal.II, please
+	  read it and make sure that you agree with it. You can find
+	  the license of UMFPACK
+          <a href="http://www.cise.ufl.edu/research/sparse/umfpack/">here</a>.
+	  We include UMFPACK in the deal.II distributions courtesy of
+	  its author, <a href="http://www.cise.ufl.edu/~davis/">Timothy
+	  A. Davis</a>.
+	</p>
+      </dd>
+    </dl>
+
+
+    <a name="conf-details"/>
+    <h4>More information on configuring and building the library</h4>
+
+    <p>
+      The <acronym>deal.II</acronym> <code>cmake</code> system allows far
+      greater control over what exactly is configured or built than just the
+      outline above. If you want to learn more about this, take a look
+      <a href="users/cmake.html">here</a>. You might also be interested in
+      <a href="users/cmakelists.html">CMake for user projects</a> or 
+      <a href="developers/cmake-internals.html">build system internals</a>.
+    </p>
+
+
+    <a name="license"></a>
+    <h2>License</h2>
+
+    <p>
+      The deal.II library is free software; you can use it, redistribute
+      it, and/or modify it under the terms of the
+      <a href="http://www.gnu.org/licenses/lgpl-2.1.html">GNU Lesser General
+      Public License</a> (LGPL) as published by the Free Software
+      Foundation; either version 2.1 of the License, or (at your option)
+      any later version.
+    </p>
+    <p>
+      This allows you to use the library free of charge for private,
+      academic, or commercial use (under the terms of the LGPL v2.1 or
+      later). You are guaranteed full access to the source code and are
+      encouraged to help in the further development of the library. Follow
+      <a href="http://www.dealii.org/license.html" target="body">this
+        link</a> for detailed information.
+    </p>
+    <p>
+      Please note:</p>
+      <ul>
+	<li>
+          Detailed license information can be found following
+          <a href="http://www.dealii.org/license.html" target="body">this link</a>.
+	</li>
+	<li>
+          <b>As a contributor to this project, you agree that any of your
+          contributions be licensed under the same terms and conditions as
+          the license of the deal.II project granted to you.</b>
+	</li>
+        <li>
+          We, <a href="http://www.dealii.org/authors.html"
+            target="_top">the deal.II Authors</a>, do not require copyright
+          assignments for contributions. This means that the copyright for
+          code contributions in the deal.II project is held by its
+          respective contributors who have each agreed to release their
+          contributed code under the terms of the LGPL v2.1 or later.
+	</li>
+	<li>
+          In addition to the terms imposed by the LGPL (version 2.1 or
+          later), we ask for the courtesy that scientific publications
+          presenting results obtained with this libraries acknowledge its
+          use. Please cite one of the papers referenced
+          <a href="http://www.dealii.org/publications.html">here</a>.
+	</li>
+	<li>
+          <acronym>deal.II</acronym> can interface with a number of <a
+            href="#optional-software">other packages</a> that you either
+          have to install yourself. They are, of course, covered by their
+          own licenses. In addition, deal.II comes bundled with copies of
+          <a href="http://www.cise.ufl.edu/research/sparse/umfpack/"
+            target="_top">UMFPACK</a>,
+          <a href="http://threadingbuildingblocks.org/"
+            target="_top">Threading Building Blocks</a>,
+          <a href="http://www.boost.org/" target="_top">BOOST</a> and
+          <a href="http://muparser.beltoforion.de/"
+            target="_top">muparser</a>, courtesy of their authors.
+          These are also covered by their own licenses; please refer to
+          their webpages for more information.
+	</li>
+      </ul>
+
+  <hr />
+  <div class="right">
+    <a href="http://validator.w3.org/check?uri=referer" target="_top">
+      <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+    <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+      <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  </div>
+
+  </body>
+</html>
diff --git a/doc/screen.css b/doc/screen.css
new file mode 100644
index 0000000..5369e03
--- /dev/null
+++ b/doc/screen.css
@@ -0,0 +1,336 @@
+/*
+ Copyright (C) 1998 - 2015 by the deal.II authors
+*/
+
+body {
+    background-image: none;
+    background-color: #ECF0F4; 
+    color: Black;
+    margin: 10px;
+    padding: 0;
+    font-family: sans-serif;
+    counter-reset: section;
+}
+
+body.title {
+    background-color: #CCD8E8; 
+    margin: 0;
+    padding: 2px;
+}
+
+body.gallery {
+    background-color: #CCD8E8; 
+    margin: 0;
+    padding: 2px;
+}
+
+body.navbar {
+    background-color: #CCD8E8; 
+    margin: 2px;
+    padding: 2px;
+    font-family: sans-serif;
+    font-size: 12px;
+}
+
+div.toc {
+    width: 50%;
+    color: black;
+    border: 1px solid #aaa;
+    background-color: #f9f9f9;
+    padding: 5px;
+    font-size: 88%;
+}
+
+div.toc:before {
+    display: block;
+    content: "Table of contents";
+    text-align: center;
+    font-size: 88%;
+    font-weight: bold;
+}
+
+div.quick {
+    width: 30%;
+    color: black;
+    border: 1px solid #aaa;
+    background-color: #f9f9f9;
+    padding: 5px;
+    font-size: 88%;
+}
+
+div.quick:before {
+    display: block;
+    content: "Quick links";
+    text-align: center;
+    font-size: 88%;
+    font-weight: bold;
+}
+
+frameset {
+    background-color: white;
+    border-color: black;
+    border: 2px;
+}
+
+a:hover { text-decoration: underline; }
+A.bold { font-weight: bold;}
+
+
+img {
+    border: none;
+    vertical-align: middle;
+}
+
+p {
+    margin: 0.4em 0em 0.5em 0em;
+    line-height: 1.5em;
+}
+
+hr {
+    height: 1px;
+    color: #aaaaaa;
+    background-color: #aaaaaa;
+    border: 0;
+    margin: 0.2em 0 0.2em 0;
+}
+
+div.fixedhead {
+        text-align:center;
+        position: fixed;
+        bottom:10px;
+        width: 98%;
+}
+
+h1.head {
+    position:relative;
+        text-align:center;
+    font-weight: bold;
+    color:black;
+    padding-top: 0;
+    padding-bottom: 0;
+    border-bottom: 0;
+}
+
+h1, h2, h3, h4, h5, h6 {
+    color: Black;
+    background: none;
+    font-weight: normal;
+    margin: 0.1em;
+    padding-top: 0.5em;
+    padding-bottom: 0.17em;
+    border-bottom: 1px solid #aaaaaa;
+}
+
+h1 { font-size: 175%;
+     counter-reset: section; }
+
+h2 { font-size: 150%;
+     padding-top: 1.5em;
+     counter-reset: subsection; }
+
+h3 { font-size: 140%;
+     padding-top: 0.5em;
+     padding-bottom: 0.17em;
+     border-bottom: 1px dashed #aaaaaa; }
+
+h4 { font-size: 110%;
+     border-bottom: none;
+     font-weight: bold;
+}
+h5 { font-size: 100%;
+     border-bottom: none;
+     font-weight: bold;
+}
+h6 { font-size: 80%;
+     border-bottom: none;
+     font-weight: bold;
+}
+
+h2:before {
+    counter-increment: section;
+    content: counter(section) ". ";
+}
+
+h3:before {
+    counter-increment: subsection;
+    content: counter(section) "." counter(subsection) ". ";
+}
+
+div.right {
+  text-align: right;
+}
+
+td { vertical-align: top; }
+
+
+ul {
+    line-height: 1.5em;
+    list-style-type: square;
+    margin: 0.3em 0 0 1.5em;
+    padding:0;
+}
+
+ol {
+    line-height: 1.5em;
+    margin: 0.3em 0 0 3.2em;
+    padding:0;
+    list-style-image: none;
+}
+
+li {
+    margin-bottom: 0.1em;
+}
+
+dt {
+    font-weight: bold;
+    margin-bottom: 0.1em;
+}
+dl{
+    margin-top: 0.2em;
+    margin-bottom: 0.5em;
+}
+dd {
+    line-height: 1.5em;
+    margin-left: 2em;
+    margin-bottom: 0.1em;
+}
+
+abbr, acronym, .explain {
+    color: Black;
+    background: none;
+    cursor: help;
+}
+
+q {
+    font-family: Times, "Times New Roman", serif;
+    font-style: italic;
+}
+
+.figure     { font-weight: bold;
+          font-size: larger;
+        }
+
+.pagetoc    {}
+
+.chapter_title {}
+
+span.parhead {
+    font-weight: bold;
+}     
+
+span.example {
+    font-weight: bold;
+    font-style: italic;
+}
+
+pre {
+    padding: 1em;
+    text-align: left;
+    text-indent: 0;
+    border: 1px dashed #2f6fab;
+    color: Black;
+    background-color: #f9f9f9;
+    line-height: 1.1em;
+}
+    
+pre.cmake {
+    padding: 1em;
+    text-align: left;
+    text-indent: 0;
+    border: 1px solid #d06fab;
+    color: Black;
+    background-color: #f9f9f9;
+    line-height: 1.1em;
+}
+    
+pre.sample {
+    padding: 1em;
+    text-align: left;
+    text-indent: 0;
+    border: 1px dashed #2f6fab;
+    color: Black;
+    background-color: #f9f9f9;
+    line-height: 1.1em;
+}
+    
+table.navbar { }
+
+strong.red { color: #ff0000; }
+code.program { color: #700000; }
+code.class { color: #009000; }
+code.member { color: #006000; }
+code.function { color: #006000; }
+code.global { color: #005030; }
+
+code {
+    white-space: nowrap;
+}
+
+td.build {
+    text-align: center;
+    vertical-align: middle;
+    font-size: small;
+}
+
+
+.test4 {
+    background-color: #90ff80;
+}
+
+.test3 {
+    background-color: #FFFF00;
+}
+
+.test2 {
+    background-color: #FFA000;
+}
+
+.test1 {
+    background-color: #FF2020;
+}
+
+.test0 {
+    background-color: #C030D0;
+}
+
+.weak {
+    color: #808080;
+    text-decoration: line-through;
+}
+
+.todo:before {
+    content: "TODO: ";
+}
+
+.todo {
+    color: Red;
+}
+
+.deprecated {
+    text-decoration: line-through;
+}
+
+.attention {
+    color: #f00080;
+    font-weight: bold;
+}
+
+.shout {
+    color: #e000ff;
+    text-decoration: blink;
+    font-weight: bold;
+}
+
+div.border {
+    clear: both;
+    margin: auto;
+}
+
+div.infobox {
+    float: left;
+    width: 20em;
+    text-align: center;
+    margin-top: 1em;
+    margin-left: 20px;
+    margin-right: 20px;
+}
diff --git a/doc/title.html.in b/doc/title.html.in
new file mode 100644
index 0000000..7321cae
--- /dev/null
+++ b/doc/title.html.in
@@ -0,0 +1,17 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN"
+   "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<html>
+<head>
+  <title>deal.II Title Bar</title>
+  <link href="screen.css" rel="StyleSheet">
+  <meta name="copyright" content="Copyright (C) 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2010, 2015 by the deal.II Authors">
+</head>
+
+<body class="title">
+  <div class="fixedhead">
+    <h1 class="head">deal.II @DEAL_II_PACKAGE_VERSION@ Documentation</h1>
+  </div>
+</body>
+</html>
+
+
diff --git a/doc/users/CMakeLists.txt.sample b/doc/users/CMakeLists.txt.sample
new file mode 100644
index 0000000..8f35a43
--- /dev/null
+++ b/doc/users/CMakeLists.txt.sample
@@ -0,0 +1,11 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4.0 REQUIRED
+  HINTS ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+
+PROJECT(myproject)
+
+ADD_EXECUTABLE(mycode mycode.cc)
+DEAL_II_SETUP_TARGET(mycode)
diff --git a/doc/users/CMakeLists.txt.sample2 b/doc/users/CMakeLists.txt.sample2
new file mode 100644
index 0000000..adca53b
--- /dev/null
+++ b/doc/users/CMakeLists.txt.sample2
@@ -0,0 +1,28 @@
+FIND_PACKAGE(deal.II 8.4.0 REQUIRED
+  HINTS
+    ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+    # You can specify additional hints for search paths here, e.g.
+    # $ENV{HOME}/workspace/deal.II
+  )
+
+# Set the name of the project and target:
+SET(TARGET "step-1")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET} CXX)
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/doc/users/CMakeLists.txt.sample3 b/doc/users/CMakeLists.txt.sample3
new file mode 100644
index 0000000..b637fe7
--- /dev/null
+++ b/doc/users/CMakeLists.txt.sample3
@@ -0,0 +1,41 @@
+FIND_PACKAGE(deal.II 8.4.0 REQUIRED
+  HINTS
+    ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+    # You can specify additional hints for search paths here, e.g.
+    # $ENV{HOME}/workspace/deal.II
+  )
+
+# Set the name of the project and target:
+SET(TARGET "step-1")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Specify a list of files (file globs) that will be removed
+# with the "make runclean" and "make distclean" targets.
+# (If empty, sensible default values will be used.)
+SET(CLEAN_UP_FILES
+  # a custom list of globs, e.g. *.log *.vtk
+  )
+
+# A custom command line that should be invoked by "make run".
+# (If empty, ./${TARGET} will be invoked.)
+SET(TARGET_RUN
+  # a custom command line, e.g. mpirun -np 2 ${TARGET}
+  )
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET} CXX)
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/doc/users/cmake.html b/doc/users/cmake.html
new file mode 100644
index 0000000..d5431c1
--- /dev/null
+++ b/doc/users/cmake.html
@@ -0,0 +1,1022 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>deal.II CMake documentation</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="author" content="the deal.II authors <authors @ dealii.org>">
+    <meta name="copyright" content="Copyright (C) 2012, 2013, 2014, 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+
+  <body>
+
+
+    <h1>Details on the <acronym>deal.II</acronym> configuration and build system</h1>
+
+    <p>
+      The <acronym>deal.II</acronym> <a href="../readme.html"
+      target="body">README</a> file gives an overview over the basics
+      of configuring and building the <acronym>deal.II</acronym>
+      library. This page provides more details about using the
+      <acronym>deal.II</acronym> CMake build system.
+    </p>
+
+    <div class="toc">
+      <ol>
+	<li><a href="#operating">Operating cmake</a>
+          <ol>
+            <li><a href="#operatingmanip">Manipulating the cache</a></li>
+            <li><a href="#operatingccmake"><code>ccmake</code> and special build targets </a></li>
+            <li><a href="#operatingshortcuts">Shortcuts</a></li>
+	  </ol>
+	</li>
+	<li><a href="#build">Configuring and Building deal.II</a>
+          <ol>
+            <li><a href="#buildtargets">Primary build targets</a></li>
+            <li><a href="#buildinformation">Information about current configuration</a></li>
+            <li><a href="#buildinsource">Out-of-source versus in-source builds</a></li>
+            <li><a href="#buildgenerator">CMake Generators</a></li>
+            <li><a href="#buildinstall">Installation</a></li>
+	  </ol>
+        </li>
+	<li><a href="#configure">Configuration options</a>
+	  <ol>
+	    <li><a href="#configurefeature">Feature configuration</a></li>
+	    <li><a href="#configureautoconf">Autoconfiguration</a></li>
+	    <li><a href="#configureext">External library locations</a></li>
+	    <li><a href="#configureoverride">Manual override</a></li>
+	    <li><a href="#configurecomp">Component selection</a></li>
+	    <li><a href="#configurebuild">Build configuration</a></li>
+	    <li><a href="#compiler">Selecting a compiler</a></li>
+	    <li><a href="#configureinstall">Installation</a></li>
+	  </ol>
+	</li>
+	<li><a href="#advanced">Initial cache file and advanced options</a></li>
+	<li><a href="#compiling">Compiling only certain parts</a></li>
+      </ol>
+    </div>
+
+    <a name="operating"></a>
+    <h2>Operating cmake</h2>
+
+    <p>
+      When configuring <acronym>deal.II</acronym> by
+      running <code>cmake</code>, the <code>cmake</code> program
+      creates a cache in the current (build) directory that contains
+      the values of all (cached) variables that had previously been passed
+      as command line arguments, been found through running tests,
+      or had otherwise been set.
+    </p>
+
+    <p>
+      On a subsequent call to <code>cmake</code>, e.g. to change the
+      configuration or due to a callback from the build system because some
+      configuration file (the <code>CMakeLists.txt</code> and
+      <code>cmake/<..>.cmake</code> files) has changed,
+      <code>cmake</code> will only run whatever tests are
+      necessary; <i>values for variables that are already in the cache
+      are not re-evaluated</i>. This means that calling <code>cmake</code>
+      a second time without any arguments at all in a situation like this
+<pre class="cmake">
+mkdir build
+cd build
+cmake -DCMAKE_INSTALL_PREFIX=/path/install/dir ../deal.II
+cmake ../deal.II
+</pre>
+      has no effect: In particular, the <code>CMAKE_INSTALL_PREFIX</code>
+      specified on the first invocation of <code>cmake</code> is
+      cached and therefore still valid after the second invocation
+      of <code>cmake</code> This is different from the way the
+      <code>autoconf/configure</code> mechanism worked.
+    </p>
+
+    <p>
+      The cache has an important reason: one can modify all sort of
+      configuration parameters and thereby interact with the configuration
+      system in rather powerful (and, possibly, destructive) ways. For
+      example, the following commands
+<pre class="cmake">
+mkdir build
+cd build
+cmake  ../deal.II
+ccmake
+</pre>
+      first configure a bare-bone setup and then call
+      the <code>ccmake</code> program -- an interactive editor for the
+      cached variables. Similarly,
+<pre class="cmake">
+mkdir build
+cd build
+cmake  ../deal.II
+cmake -D<OPTION>=<VALUE> [...] ../deal.II
+</pre>
+      sets a variable the second time around without destroying all
+      the configuration that has happened the first time around. Likewise,
+<pre class="cmake">
+mkdir build
+cd build
+cmake  ../deal.II
+cmake -DDEAL_II_WITH_METIS=OFF .
+cmake -DDEAL_II_WITH_TRILINOS=ON -DTRILINOS_DIR=/path/to/trilinos .
+</pre>
+      switches off support for the METIS library that may have been
+      automatically detected during the first invocation of
+      <code>cmake</code> and enables support for Trilinos by enabling
+      <code>DEAL_II_WITH_TRILINOS</code> and setting
+      <code>TRILINOS_DIR</code>.
+    </p>
+
+    <a name="operatingmanip"></a>
+    <h3> Manipulating the cache </h3>
+
+    <p>
+      A cached variable can be set on the command line via
+<pre class="cmake">
+cmake -D<VARIABLE>=<VALUE> .
+</pre>
+
+      Cached variables can be removed from the cache via
+<pre class="cmake">
+cmake -U<VARIABLE> .
+</pre>
+      It is possible to use <code>-U</code> together with a globbing
+      expression. E.g. to remove the current feature configuration and
+      rerun the autodetection one can invoke
+<pre class="cmake">
+cmake -U"DEAL_II_WITH_*" .
+</pre>
+    </p>
+
+    <a name="operatingccmake"></a>
+    <h3> <code>ccmake</code> and special build targets </h3>
+
+    <p>
+      A very convenient way to alter the configuration is to use the graphical
+      user interface <code>ccmake</code> to the
+      variables <code>cmake</code> stores upon running. It can be invoked via
+<pre class="cmake">
+ccmake .
+</pre>
+      or by
+<pre class="cmake">
+make edit_cache
+</pre>
+
+      A reconfiguration (without editing the cache) can be run via
+<pre class="cmake">
+make rebuild_cache
+</pre>
+    </p>
+
+    <a name="operatingshortcuts"></a>
+    <h3> Shortcuts </h3>
+
+    <p>
+      All variables starting with <code>WITH_</code> will be automatically
+      renamed to <code>DEAL_II_WITH_*</code>. So, it suffices to specify
+<pre class="cmake">
+cmake -DWITH_MPI=ON <...>
+</pre>
+      instead of the longer
+<pre class="cmake">
+cmake -DDEAL_II_WITH_MPI=ON <...>
+</pre>
+      The same holds for all variables starting with <code>COMPONENT_</code>
+      and all individual component names:
+      <code>DOCUMENTATION</code>, <code>EXAMPLES</code>
+      and <code>PARAMETER_GUI</code> (which will be expanded to the 
+      full <code>DEAL_II_COMPONENT_*</code> variable name).
+    </p>
+
+
+
+    <a name="build"></a>
+    <h2>Configuring and Building deal.II</h2>
+
+    <p>
+      This section provides some further details and advanced topics with
+      respect to configuration and building that is not covered in the <a
+      href="../readme.html" target="body">README</a>.
+    </p>
+
+    <a name="buildtargets"></a>
+    <h3>Primary build targets</h3>
+
+    <p> The current list of primary build targets can be queried via
+    <code>make info</code>:
+<pre class="cmake">
+###
+#
+#  The following targets are available (invoke by $ make <target>):
+#
+#    all            - compile the library and all enabled components
+#    clean          - remove all generated files
+#    install        - install into CMAKE_INSTALL_PREFIX
+#
+#    info           - print this help message
+#    help           - print a list of valid top level targets
+#
+#    edit_cache     - run ccmake for changing (cached) configuration variables
+#                     and reruns the configure and generate phases of CMake
+#    rebuild_cache  - rerun the configure and generate phases of CMake
+#
+#    compat_files   - build and install component 'compat_files'
+#    documentation  - build and install component 'documentation'
+#    examples       - build and install component 'examples'
+#    library        - build and install component 'library'
+#    parameter_gui  - build and install component 'parameter_gui'
+#    package        - build binary package
+#
+#    test           - run a minimal set of tests
+#
+#    setup_tests    - set up testsuite subprojects
+#    prune_tests    - remove all testsuite subprojects
+#
+###
+</pre>
+
+
+    <a name="buildinformation"></a>
+    <h3>Information about current configuration</h3>
+
+    <p>
+      A configuration run of <code>cmake</code> (or <code>ccmake</code>)
+      writes a short summary of the current configuration into
+      <code>CMAKE_BUILD_DIR/summary.log</code>:
+<pre class="cmake">
+###
+#
+#  deal.II configuration:
+#        CMAKE_BUILD_TYPE:       DebugRelease
+#        BUILD_SHARED_LIBS:      ON
+#        CMAKE_INSTALL_PREFIX:   /tmp/deal.II/install
+#        CMAKE_SOURCE_DIR:       /tmp/deal.II/deal.II (Version 8.1.pre)
+#        CMAKE_BINARY_DIR:       /tmp/deal.II/build
+#        CMAKE_CXX_COMPILER:     GNU 4.7.3 on platform Linux x86_64
+#                                /usr/bin/c++
+#
+#  Configured Features (DEAL_II_ALLOW_BUNDLED = ON, DEAL_II_ALLOW_AUTODETECTION = ON):
+#      ( DEAL_II_WITH_64BIT_INDICES = OFF )
+#        DEAL_II_WITH_ARPACK set up with external dependencies
+#        DEAL_II_WITH_BOOST set up with external dependencies
+#        [...]
+#
+#  Component configuration:
+#      ( DEAL_II_COMPONENT_DOCUMENTATION = OFF )
+#        DEAL_II_COMPONENT_EXAMPLES
+#      ( DEAL_II_COMPONENT_PARAMETER_GUI = OFF )
+#
+#  Detailed information (compiler flags, feature configuration) can be found in detailed.log
+#
+#  Run  $ make info  to print a help message with a list of top level targets
+#
+###
+</pre>
+      This summary is also printed at the end of the configuration phase.
+      It tells you about build and install directory locations, feature
+      configuration (whether a feature is enabled with external/internal
+      dependencies, disabled or forced) and component configuration.
+      A more detailed version can be found in
+      <code>CMAKE_BUILD_DIR/detailed.log</code> that also includes detailed
+      information about feature configuration, e.g.
+<pre class="cmake">
+#        DEAL_II_WITH_BOOST set up with external dependencies
+#            BOOST_VERSION = 1.52.0
+#            BOOST_DIR =
+#            Boost_INCLUDE_DIRS = /usr/include
+#            Boost_LIBRARIES = /usr/lib64/libboost_serialization-mt.so;/usr/lib64/libboost_system-mt.so;
+# [...]
+</pre>
+      If this information is not sufficient, you might want to have a look
+      at the following files in <code>CMAKE_BUILD_DIR</code>
+      <ul>
+        <li><code>CMakeCache.txt</code>: The file CMake stores its cached
+        variables in. Do not change directly.
+        <li><code>include/deal.II/base/config.h</code>: The generated
+        config.h file
+        <li><code>lib/cmake/deal.II/deal.IIConfig.cmake</code>: The project
+        configuration file for external projects.
+      </ul>
+    </p>
+
+
+
+    <a name="buildinsource"></a>
+    <h3>Out-of-source versus in-source builds</h3>
+
+    <p>
+      A so called out-of-source build is a setup where the build directory
+      (the directory containing intermediate and generated files) is
+      different from the source directory (the directory containing the
+      source code). With CMake an out-of-source build is set up by invoking
+      <code>cmake</code> (or <code>ccmake</code>) from the designated build
+      directory, so for example (a build directory under the source
+      directory):
+<pre class="cmake">
+$ mkdir build
+$ cd build
+$ cmake ..
+</pre>
+      The big advantage is that source files and intermediate files are
+      strictly separated (highly desired for version control) and that you
+      can have multiple build directories (with different configuration) at
+      the same time.
+    </p>
+
+    <p>
+      <b>Note:</b> However, under rare occasions an in-source build might be
+      useful or needed , so it is supported
+<pre class="cmake">
+$ cmake .
+</pre>
+      But we highly discourage it!
+    </p>
+
+    <a name="buildgenerator"></a>
+    <h3>CMake Generators</h3>
+
+    <p>
+      Cmake is a <i>Makefile Generator</i>. This allows to switch the
+      generator that is used to something different. If you for example want
+      to automatically generate an Eclipse project of deal.II, you can run
+<pre class="cmake">
+$ cmake -G"Eclipse CDT4 - Unix Makefiles" [...]
+</pre>
+      and load up the build directory as a project directly into Eclipse.
+      Have a look at the <a href="https://code.google.com/p/dealii/w/list"
+      target="_top">Wiki</a> for more information.
+    </p>
+
+    <p>
+      An interesting alternative to (GNU) Make might also be <a
+      href="http://martine.github.io/ninja/">Ninja</a>. Configure via
+<pre class="cmake">
+$ cmake -GNinja [...]
+</pre>
+      and run <code>ninja</code> instead of <code>make</code>.
+    </p>
+
+
+    <a name="buildinstall"></a>
+    <h3>Installation</h3>
+
+    <p>
+      It is not necessary to install the library in order to use deal.II.
+      Invoking the <code>all</code> or <code>library</code> target will
+      compile the library and set up all necessary configuration in the
+      build directory so that external projects can directly use it.
+      However, we strongly recommend to proceed in the way explained in the
+      <a href="../readme.html" target="body">README</a> and install the
+      library to a designated install directory (different from source and
+      build directory).
+    </p>
+
+    <p>
+      For installing the library it is necessary to set the CMake variable
+      <code>CMAKE_INSTALL_PREFIX</code> to the designated install
+      directory. You can do this by invoking <code>cmake</code> together
+      with <code>-DCMAKE_INSTALL_PREFIX=<...></code> or by invoking
+      <code>ccmake</code>.
+    </p>
+
+    <p>
+      <b>Note:</b> When you don't install deal.II to an install directory
+      and use it directly from a build directory,
+      both, the build <i>and</i> source directories have to be kept.
+    </p>
+
+    <p>
+      <b>Note:</b> It is not necessary for the source, build and or install
+      directory to be different. All combinations are supported.
+    </p>
+
+
+    <h4>Install a single component</h4>
+
+    <p>
+      If you want to only generate, compile and install a specific
+      component (most notably the documentation) you can use one of the
+      following top level targets:
+      <pre class="cmake">
+documentation  - builds and installs the 'documentation' component
+examples       - builds and installs the 'examples' component
+library        - builds and installs the 'library' component
+parameter_gui  - builds and installs the 'parameter_gui' component
+</pre>
+
+
+    <a name="configure"></a>
+    <h2>Configuration options</h2>
+
+    <p>
+      The various configuration options of the
+      <acronym>deal.II</acronym> library are organized in three
+      categories: <a href="#configurefeature">feature</a>,
+      <a href="#configurecomp">component</a>, and <a
+      href="#configurebuild">build</a>/<a href="#configureinstall">install</a>
+      configuration.
+    </p>
+
+
+
+    <a name="configurefeature"></a>
+    <h3>Feature configuration</h3>
+
+    <p>
+      <acronym>deal.II</acronym> provides (optional) interfaces to quite a
+      number of external libraries as well as multiple versions
+      of <code>C++</code>. All of these options are represented
+      by <code>cmake</code> variables that may be set to either <code>ON</code>
+      or <code>OFF</code>.
+
+      By default, <acronym>deal.II</acronym>'s <code>CMake</code> build scripts
+      will attempt to find the newest <code>C++</code> version available by
+      checking which language version flags work with the detected compiler. For
+      example: if <code>CMake</code> detects <code>GCC 5.1</code> or <code>Clang
+      3.6</code>, <acronym>deal.II</acronym> will be compiled
+      in <code>C++14</code> mode unless either <code>C++14</code>
+      or <code>C++11</code> is explicitly disabled. Another example:
+      if <code>DEAL_II_WITH_CXX11=ON</code> is passed to <code>CMake</code>,
+      then <acronym>deal.II</acronym> will check if the compiler supports enough
+      parts of the <code>C++11</code> <em>and</em>
+      <code>C++14</code> language standards. <code>CMake</code> will not fail in
+      this example just because the compiler does not support
+      <code>C++14</code>, but <code>CMake</code> <em>will</em> fail if the
+      compiler does not support enough of <code>C++11</code>.
+
+      The remaining flags are set to <code>ON</code> if an external package is
+      found or to <code>OFF</code> otherwise. By explicitly setting it to off
+      either on the command line or using <code>ccmake</code>, you can
+      prevent <acronym>deal.II</acronym> from using an external package, even if
+      it is found.
+    </p>
+
+    <p>
+      Specifically, the following variables exist (the list may grow
+      over time, but names are standardized):
+<pre class="cmake">
+DEAL_II_WITH_64BIT_INDICES
+DEAL_II_WITH_ARPACK
+DEAL_II_WITH_BOOST
+DEAL_II_WITH_BZIP2
+DEAL_II_WITH_CXX11
+DEAL_II_WITH_CXX14
+DEAL_II_WITH_HDF5
+DEAL_II_WITH_LAPACK
+DEAL_II_WITH_METIS
+DEAL_II_WITH_MPI
+DEAL_II_WITH_MUPARSER
+DEAL_II_WITH_NETCDF
+DEAL_II_WITH_OPENCASCADE
+DEAL_II_WITH_P4EST
+DEAL_II_WITH_PETSC
+DEAL_II_WITH_SLEPC
+DEAL_II_WITH_THREADS
+DEAL_II_WITH_TRILINOS
+DEAL_II_WITH_UMFPACK
+DEAL_II_WITH_ZLIB
+</pre>
+      They all have standard meaning with the exception of
+      two:
+      <ul>
+        <li> <code>DEAL_II_WITH_BOOST</code> is always <code>ON</code>
+          since BOOST is a mandatory build time dependency.
+
+        <li> <code>DEAL_II_WITH_THREADS</code> enables threading support
+          with the help of the Threading Building Blocks (TBB) library.
+      </ul>
+    </p>
+
+    <p>
+      If enabled, each of the features above will usually add one or
+      more dependencies to external or 'bundled' (i.e. bundled with
+      <acronym>deal.II</acronym> and residing under <code>bundled/</code>)
+      libraries.
+    </p>
+
+    <p>
+      Additionally, the variable
+<pre class="cmake">
+DEAL_II_CXX_VERSION_FLAG
+</pre>
+      may be set to specify the <code>C++</code> language version flag passed to
+      the compiler.
+    </p>
+
+    <p>
+      There are some options to determine the behavior of the dependency
+      resolution.
+      <ul>
+        <li>
+          <p>
+            <code>DEAL_II_ALLOW_BUNDLED</code>:
+          </p>
+          <p>
+            If set to <code>ON</code> external libraries still have
+            precedence. But if there is no external library the bundled
+            library will be used.
+            If set to <code>OFF</code> bundled libraries will not be
+            used and the dependency resolution will fail if there is no
+            external library.
+          </p>
+        <li>
+          <p>
+            <code>DEAL_II_FORCE_BUNDLED_(BOOST|MUPARSER|THREADS|UMFPACK)</code>:
+          </p>
+          <p>
+            Forces the use of the bundled library regardless whether
+            <code>DEAL_II_ALLOW_BUNDLED</code> is set to <code>OFF</code>
+            or an external library is found.
+          </p>
+        <li>
+          Thus, to ensure that no bundled library is used at all
+          <code>DEAL_II_ALLOW_BUNDLED</code>, as well as every
+          <code>DEAL_II_FORCE_BUNDLED_<library></code>
+          have to be set to <code>OFF</code>.  Conversely,
+          <code>DEAL_II_FORCE_BUNDLED_<library>=ON</code>
+          will not automatically enable the corresponding
+          <code>DEAL_II_WITH_<feature></code> toggle. This has to be
+          set separately.
+      </ul>
+    </p>
+
+
+
+    <a name="configureautoconf"></a>
+    <h3> Autoconfiguration </h3>
+
+    <p>
+      As long as <code>DEAL_II_WITH_<FEATURE></code> is
+      not explicitly set to <code>ON</code> or <code>OFF</code>
+      in the cache it will be automatically configured. If a toggle
+      <code>DEAL_II_WITH_<FEATURE></code> is defined it won't
+      be altered.
+      This means that the very first configuration run will set
+      all available features to <code>ON</code> and the rest
+      to <code>OFF</code>. In all subsequent configuration steps
+      <code>DEAL_II_WITH_<FEATURE></code> has to be changed by hand,
+      see the previous section.
+    </p>
+
+    <p>
+      This behavior can be controlled via several variables:
+      <ul>
+	<li>
+          <code>DEAL_II_ALLOW_AUTODETECTION=OFF</code>: This will
+          disable any autoconfiguration by setting undefined
+          <code>DEAL_II_WITH_<FEATURE></code> toggles to
+          <code>OFF</code>.
+
+	<li>
+          <code>DEAL_II_FORCE_AUTODETECTION=ON</code>: This will
+          force the reconfiguration of every feature by undefining
+          <code>DEAL_II_WITH_<FEATURE></code> prior to
+          configuration, effectively overwriting <i>any</i> supplied or
+          cached value.
+      </ul>
+    </p>
+
+
+    <a name="configureext"></a>
+    <h3> External library locations </h3>
+
+    <p>
+      External libraries will be searched depending on hints in the following
+      order:
+      <ol>
+	<li>
+	  <p>
+            Paths specified via <code>CMAKE_PREFIX_PATH</code> take
+            precedence, e.g. with
+<pre class="cmake">
+make -DCMAKE_PREFIX_PATH=~/workspace/local ../deal.II
+</pre>
+	    libraries from <code>~/workspace/local</code> will be
+	    preferred for dependency resolution.
+	  </p>
+
+	<li>
+	  <p>
+	    Hints given by <code><library>_DIR</code> via command
+            line or environment for <i>some</i> libraries:
+<pre class="cmake">
+make -DP4EST_DIR=~/workspace/p4est-install/ ../deal.II
+</pre>
+	    or
+<pre class="cmake">
+export P4EST_DIR=~/workspace/p4est-install/
+cmake ../deal.II
+</pre>
+            where <code>-D<library>_DIR</code> takes precedence
+            over environment.
+	  </p>
+
+	  <p>
+	    Currently, the following variables will be considered:
+<pre class="cmake">
+ARPACK_DIR,
+BOOST_DIR,
+HDF5_DIR,
+LAPACK_DIR (and BLAS_DIR),
+METIS_DIR,
+MUPARSER_DIR,
+P4EST_DIR (and SC_DIR),
+PETSC_DIR and PETSC_ARCH (forming ${PETSC_DIR}/${PETSC_ARCH}),
+SLEPC_DIR (forming ${SLEPC_DIR}/${PETSC_ARCH}),
+TBB_DIR,
+TRILINOS_DIR,
+UMFPACK_DIR and SUITESPARSE_DIR (AMD_DIR, CHOLMOD_DIR, COLAMD_DIR, SUITESPARSECONFIG_DIR)
+</pre>
+	  </p>
+
+	<li>
+	  <p>
+	    The default system locations for libraries and includes.
+	  </p>
+      </ol>
+    </p>
+
+    <p>
+      Alternatively, cached variables set by the
+      <code>Find<Module></code> mechanism may be set,
+      hinted or overwritten directly (variable names are highly
+      dependent on the actual library). You can get a list via
+<pre class="cmake">
+make edit_cache
+</pre>
+      and entering advanced configuration mode by pressing [t].
+      Variables that could not be determined are suffixed with
+      <code>-NOTFOUND</code> and may be set by hand.
+    </p>
+
+    <h4>Library conflicts</h4>
+    <p>Caveat: if you have a set of standard libraries in the default
+    location, say <code>/usr/lib</code> and a set of
+    private versions of the same libraries, for instance because you
+    need different revisions sometimes, in your own library directory,
+    you may receive an error message of the form:</p>
+
+<pre class="cmake">
+CMake Warning at source/CMakeLists.txt:65 (ADD_LIBRARY):
+  Cannot generate a safe runtime search path for target deal_II.g because
+  files in some directories may conflict with libraries in implicit
+  directories:
+
+    runtime library [libtbb.so.2] in /usr/lib may be hidden by files in:
+      /my/private/lib
+
+  Some of these libraries may not be found correctly.
+</pre>
+
+<p>This is not a problem of CMake or deal.II, but rather a general
+  Linux problem. In order to fix this, you have two options:</p>
+<ol>
+<li>Choose all libraries either from your private directory or from
+  the standard one.</li>
+<li>Install all your private library versions in different directories.</li>
+</ol>
+
+    <a name="configureoverride"></a>
+    <h3> Manual override </h3>
+    <p><b>Warning:</b> Do not do this unless absolutely necessary!</p>
+    <p>
+      It is possible to override the CMake find mechanism for external
+      libraries manually. This is useful if a non standard library (e.g.
+      BLAS or LAPACK) should be used but cannot be found by the
+      <code>FIND_PACKAGE(...)</code> mechanism.
+
+      In this case you can set by hand:
+<pre class="cmake">
+cmake -D<feature>_FOUND=true \
+      -D<feature>_LIBRARIES="library;and;complete;link;interface" \
+    ( -D<feature>_INCLUDE_DIRS="semicolon;separated;list;of;include;dirs" \
+      -D<feature>_LINKER_FLAGS="..." \
+      -D<feature>_<...depending on library...> )
+</pre>
+
+      The first define ensures that <code>cmake</code> does not
+      call the corresponding <code>Find<lib>.cmake</code> module.
+      Therefore, all information that would be otherwise exported by the
+      module must be specified by hand. (See the
+      <a href="config.sample" target="_top">config.sample file</a> for a
+      detailed list of valid variables per feature.)
+    </p>
+    <p>
+      An example of use is to select BLAS and LAPACK manually from a PETSc
+      configuration:
+      (Here, these libraries have been compiled with the gfortran compiler
+      and need its support library):
+<pre class="cmake">
+cmake -DLAPACK_FOUND=true \
+      -DLAPACK_LIBRARIES="/tmp/petsc-3.3-p6/arch-linux2-c-debug/lib/libflapack.a;/tmp/petsc-3.3-p6/arch-linux2-c-debug/lib/libfblas.a" \
+      -DLAPACK_LINKER_FLAGS="-lgfortran -lm"
+</pre>
+
+      You can set these values on the command line, with <code>ccmake</code>
+      or by providing an initial cache file, see
+      <a href="#advanced">advanced setup section</a>.
+      Possible manual overrides are explained in detail in the
+      in the <a href="config.sample" target="_top">config.sample file</a>.
+    </p>
+
+
+    <a name="configurecomp"></a>
+    <h3> Component selection </h3>
+
+    <p>
+      The following options control which components of
+      <acronym>deal.II</acronym> will be configured, built and installed:
+
+      <ul>
+	<li>
+          <p>
+            <code>DEAL_II_COMPONENT_DOCUMENTATION</code>
+            (default <code>OFF</code>):
+          </p>
+          <p>
+            Enable configuration, build and installation of the
+            documentation including all of the tutorial programs and the
+            doxygen-generated manual.  This adds a
+            component <code>documentation</code> to the build system.
+          </p>
+
+	<li>
+          <p>
+            <code>DEAL_II_COMPONENT_EXAMPLES</code>
+            (default <code>ON</code>):
+          </p>
+          <p>
+            Enable configuration and installation of the example steps (but
+            not generate the documentation for the tutorial steps).
+            This adds a component <code>examples</code> to the build system.
+          </p>
+
+	<li>
+          <p>
+            <code>DEAL_II_COMPONENT_PARAMETER_GUI</code> (default
+            <code>OFF</code>):
+          </p>
+          <p>
+            Enable configuration and installation of the
+            <code>parameter_gui</code> binary. This adds a component
+            <code>parameter_gui</code> to the build system. Beware of the
+            fact that this component needs development packages for Qt.
+	  </p>
+
+      </ul>
+    </p>
+
+
+    <a name="configurebuild"></a>
+    <h3> Build configuration </h3>
+
+    <p>
+      The <code>cmake</code> variable <code>CMAKE_BUILD_TYPE</code>
+      controls the type of build.  We support <code>Debug</code>,
+      <code>Release</code> and <code>DebugRelease</code> mode. Default
+      is <code>DebugRelease</code>.
+      <ul>
+        <li>
+          Passing <code>cmake</code> the
+          flag <code>-DCMAKE_BUILD_TYPE=Debug</code> will produce makefiles
+          that compile and install only the debug library
+          <code>libdeal_II.g.so</code>.
+        <li>
+          Passing <code>cmake</code> the
+          flag <code>-DCMAKE_BUILD_TYPE=Release</code> result in only
+          compiling and installing the optimized library <code>libdeal_II.so</code>.
+        <li>
+          Passing <code>cmake</code> the
+          flag <code>-DCMAKE_BUILD_TYPE=DebugRelease</code> will build and
+          install both libraries.
+      </ul>
+
+      For more information, see the general discussion <a
+        href="../readme.html#configuration">here</a>.
+    </p>
+
+
+    <p>
+      The build can be further controlled by the following variables:
+      <ul>
+        <li>
+          <code>DEAL_II_ALLOW_PLATFORM_INTROSPECTION</code>: If set
+          (default), <acronym>deal.II</acronym> will perform platform
+          introspection for the given CPU.
+
+	<li>
+          <code>BUILD_SHARED_LIBS</code>: If set (default),
+          <acronym>deal.II</acronym> will be linked as a shared library
+
+        <li>
+        <code>DEAL_II_PREFER_STATIC_LIBS</code> (defaults to off): If set
+        to true, static archives will be preferred over dynamic libraries when
+        searching for features and corresponding link interface.
+
+	<li>
+          <code>DEAL_II_STATIC_EXECUTABLE</code> (defaults to off):
+          If set to true, <acronym>deal.II</acronym> will be configured in
+          a way to provide a link interface that is suitable for static
+          linkage of executables. Enabling this option forces
+          <code>BUILD_SHARED_LIBS=OFF</code> and
+          <code>DEAL_II_PREFER_STATIC_LIBS=ON</code>.
+
+	<li>
+	  <code>CMAKE_INSTALL_RPATH_USE_LINK_PATH</code>: If set
+	  (default), the <acronym>deal.II</acronym> library will be
+	  installed with rpaths  set for all libraries outside of the
+	  system search paths
+      </ul>
+    </p>
+
+
+    <a name="compiler"></a>
+    <h3> Selecting a compiler </h3>
+
+    </p>
+      Compilers can be switched either by command line or by setting
+      <code>CMAKE_(C|CXX|Fortran)_COMPILER</code>:
+<pre class="cmake">
+CC=mpicc CXX=mpicxx FC=mpif90 cmake <...>
+
+cmake -DCMAKE_C_COMPILER="mpicc" -DCMAKE_CXX_COMPILER="mpicxx" -DCMAKE_Fortran_COMPILER="mpif90" <...>
+</pre>
+      Please note that
+      <ul>
+        <li>
+          The compiler <i>must</i> be specified at the very first
+          invocation of <code>cmake</code>.
+        <li>
+          A working CXX compiler is needed. It is optional to provide a C
+          or Fortran compiler.
+      </ul>
+    </p>
+
+    <p>
+      deal.II will configure sensible default <code>CXXFLAGS</code> and
+      <code>LDFLAGS</code> depending on platform, compiler and build
+      target. There are two options to override this behaviour:
+
+      <ol>
+	<li>
+	  Override the default configuration by setting the following
+	  cached variables:
+<pre class="cmake">
+DEAL_II_CXX_FLAGS         - used during all builds
+DEAL_II_CXX_FLAGS_DEBUG   - additional flags for the debug library
+DEAL_II_CXX_FLAGS_RELEASE - additional flags for the release library
+
+DEAL_II_LINKER_FLAGS         - used during all builds
+DEAL_II_LINKER_FLAGS_DEBUG   - additional flags for the debug library
+DEAL_II_LINKER_FLAGS_RELEASE - additional flags for the release library
+</pre>
+
+	  The content of the cached variables will be preserved
+	  and added <i>to the end</i> of the default compiler flags,
+	  hence providing the possibility for overriding a flag. E.g.:
+	  <code>-Wsign-compare</code>, set by the build system, can be
+	  overwritten by specifying:
+<pre class="cmake">
+cmake -DDEAL_II_CXX_FLAGS="-Wno-sign-compare" <...>
+</pre>
+
+        <li>
+          Set the corresponding environment variables: <code>CFLAGS</code>,
+          <code>CXXFLAGS</code>, or <code>LDFLAGS</code> environment. These
+          variables will also be appended after the default compiler flags
+          (but before the corresponding cached variables).
+
+	<li>
+	  Disable the configuration completely by adding the flag
+	  <code>-D DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS=OFF</code>.
+	  Beware of the fact that certain features
+	  may still pull in necessary compiler flags.
+
+        <li>
+          You can setup additional debug compiler flags to provide test
+          coverage information by adding the flag
+          <code>-D DEAL_II_SETUP_COVERAGE=ON</code>.
+      </ol>
+    </p>
+
+
+    <a name="configureinstall"></a>
+    <h3> Installation </h3>
+
+    <p>
+      the location, where the <acronym>deal.II</acronym> library will be
+      installed when invoking <code>make install</code> to is set with the
+      help of
+<pre class="cmake">
+CMAKE_INSTALL_PREFIX
+</pre>
+
+      The default directory structure is:
+<pre class="cmake">
+${CMAKE_INSTALL_PREFIX}/
+    bin
+    include
+    lib${LIB_SUFFIX}
+    lib${LIB_SUFFIX}/cmake/deal.II
+    share/deal.II/
+    share/deal.II/cmake/macros
+    ./
+    doc
+    examples
+</pre>
+      </ul>
+    </p>
+
+    <p>
+      The default directory structure can be changed by setting the
+      following variables:
+<pre class="cmake">
+DEAL_II_EXECUTABLE_RELDIR
+DEAL_II_INCLUDE_RELDIR
+DEAL_II_LIBRARY_RELDIR
+DEAL_II_PROJECT_CONFIG_RELDIR
+DEAL_II_SHARE_RELDIR
+DEAL_II_DOCREADME_RELDIR
+DEAL_II_DOCHTML_RELDIR
+DEAL_II_EXAMPLES_RELDIR
+</pre>
+    </p>
+
+    <a name="advanced"></a>
+    <h2>Initial cache file and advanced options</h2>
+
+    <p>
+      A sample configuration file for preloading the CMake cache with
+<pre class="cmake">
+$ cmake -C config.sample <...>
+</pre>
+      can be found <a href="config.sample" target="_top">here</a>.
+      This sample configuration covers all options mentioned in this
+      documentation as well as some advanced aspects in feature
+      configuration.
+    </p>
+
+    <a name="compiling"></a>
+    <h2> Compiling only certain parts </h2>
+    <p>
+
+    <p>
+      While developing the library itself, it is often desirable
+      to only compile certain parts. The build system generated by
+      <code>cmake</code> allows to build specific, selected targets.
+      A common scenario is that you only want to build debug or optimized
+      libraries. This can be achieved using the following commands in
+      the build directory:
+<pre class="cmake">
+make  deal_II.g        # only debug library
+make  deal_II          # only release (optimized) library
+make  all              # both
+
+make  obj_grid.release # all objects in ./source/grid in release configuration
+</pre>
+    </p>
+
+    <p>
+      For a complete list of possible targets that allow even
+      finer-grained control, do
+<pre class="cmake">
+make  help
+</pre>
+    </p>
+
+    <p>
+      It is frequently useful to be able to see what a particular
+      command does. In that case, use the following:
+<pre class="cmake">
+make  deal_II.g VERBOSE=ON
+</pre>
+      This will show, for every command executed, the exact command
+      line with which it was invoked, including compiler arguments,
+      etc. Every command <code>cmake</code> executes starts with
+      a <code>cd</code> command to change the current directory
+      appropriately so that the command line can be copied and executed
+      from anywhere within the build directory.
+    </p>
+
+    <p>
+      <b>Note:</b> Just because you can call <code>make deal_II.g</code> to
+      only compile the debug version does not mean that a
+      subsequent <code>make install</code> will only install the debug
+      library. Rather, <code>make install</code> will still want to have both
+      libraries up to date and will therefore invoke <code>make all</code>
+      automatically. To restrict builds in such a way that only one library
+      will be installed, see <a href="#configurebuild">configuration</a>
+      and <a href="#buildinstall">installation</a> sections.
+    </p>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/doc/users/cmakelists.html b/doc/users/cmakelists.html
new file mode 100644
index 0000000..046bf68
--- /dev/null
+++ b/doc/users/cmakelists.html
@@ -0,0 +1,887 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+	  "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <title>How to use CMake to configure your projects with deal.II</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="author" content="the deal.II authors <authors @ dealii.org>">
+    <meta name="copyright" content="Copyright (C) 2012, 2013, 2015 by the deal.II authors">
+    <meta name="keywords" content="deal.II">
+  </head>
+
+<body>
+<h1>How to use CMake to configure your projects with <acronym>deal.II</acronym></h1>
+
+<p>
+  <code>cmake</code> is controlled by input files that by convention are
+  called <code>CMakeLists.txt</code>, listing both configuration commands
+  as well as dependencies between source files and targets.
+  This page presents some <code>CMakeLists.txt</code> examples for
+  potential use in your projects. (A detailed description of the
+  <acronym>deal.II</acronym> project configuration is given in the
+  <a href="cmake.html" target="body">deal.II CMake ReadMe</a>.)
+</p>
+
+<div class="toc">
+  <ol>
+    <li><a href="#cmakesimple">Simple <code>CMakeLists.txt</code></a>
+      <ol>
+	<li><a href="#cmakesimple.multiple">Adding multiple executable targets</a></li>
+	<li><a href="#cmakesimple.libs">Adding libraries and common source files</a></li>
+	<li><a href="#cmakesimple.build_type">Switching build types</a></li>
+	<li><a href="#cmakesimple.run">Adding a "run" target</a></li>
+      </ol>
+    </li>
+    <li><a href="#cmakeadvanced">Advanced <code>CMakeLists.txt</code></a>
+      <ol>
+        <li><a href="#cmakeadvanced.layout">Source directory layout</a></li>
+        <li><a href="#cmakeadvanced.control">Control statements</a></li>
+        <li><a href="#cmakeadvanced.globs">File globs</a></li>
+        <li><a href="#cmakeadvanced.setup_target"><code>DEAL_II_SETUP_TARGET</code> revisited</a></li>
+        <li><a href="#cmakeadvanced.cached_variables"><code>DEAL_II_INITIALIZE_CACHED_VARIABLES</code> revisited</a></li>
+        <li><a href="#cmakeadvanced.properties">Customizing include directories and compile definitions</a></li>
+        <li><a href="#cmakeadvanced.external_libraries">External libraries</a></li>
+        <li><a href="#cmakeadvanced.run">The "run" target revisited</a></li>
+        <li><a href="#cmakeadvanced.install">Install a project</a></li>
+      </ol>
+    </li>
+    <li><a href="#cmakeauto">Autopilot style <code>CMakeLists.txt</code></li>
+    <li><a href="#dealiiconfig"><code>deal.IIConfig.cmake</code></a></li>
+    <li><a href="#makeconfig"><code>Make.global_options</code></a></li>
+  </ol>
+</div>
+
+<a name="cmakesimple"></a>
+<h2>Simple CMakeLists.txt</h2>
+
+<p>
+  In this section, we start out with a minimal <code>CMakeLists.txt</code>
+  based on the <code>DEAL_II_SETUP_TARGET</code> macro. This method gives
+  full control of what's happening and is easily extensible to more complex
+  projects, as exemplified in the subsections here and later in the section
+  on <a href="#cmakeadvanced">advanced topics</a>. Here is a full example
+  (<a href="CMakeLists.txt.sample" target="_top">plain text</a>
+  version):
+
+<pre class="cmake">
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.3 REQUIRED
+  HINTS ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+
+PROJECT(myproject)
+
+ADD_EXECUTABLE(mycode mycode.cc)
+DEAL_II_SETUP_TARGET(mycode)
+</pre>
+
+<p>
+  The first line of this code makes sure that a sufficiently high
+  version of CMake is installed. 2.8.8 is the minimal version required
+  to set up <acronym>deal.II</acronym>, therefore it is safe to use this
+  number here.
+</p>
+
+<p>
+  Next, we find our deal.II installation with the help of the
+  <code>FIND_PACKAGE</code> command. In this case requiring at least
+  version 8.3. The <code>HINTS</code> are a list of directories where the
+  install directory of deal.II is likely to be found. First, the location
+  possibly defined in the CMake variable <code>DEAL_II_DIR</code> is
+  considered. After that, we check whether we are in a subdirectory (first
+  and second level) of the deal.II installation and otherwise use the
+  environment variable <code>DEAL_II_DIR</code>. If all of these hints fail
+  the default system locations <code>/usr/</code> and
+  <code>/usr/local/</code> are considered. The list after
+  <code>HINTS</code> can be changed according to your preferences.
+</p>
+
+<p>
+  After finding the deal.II project, we fetch a set of cached variables
+  with the <a href="#cmakeadvanced.cached_variables">
+  <code>DEAL_II_INITIALIZE_CACHED_VARIABLES</code></a> macro. You
+  can inspect these for instance with <code>ccmake</code>.
+</p>
+
+<p>
+  Every <code>CMakeLists.txt</code> must contain a project definition,
+  which we do next.
+</p>
+
+<p>
+  Finally, the last two lines define the executable that is to be produced
+  and its source code. The <a href="#cmakeadvanced.setup_target">
+  <code>DEAL_II_SETUP_TARGET</code></a> macro will set up necessary include
+  directories, compile flags, compile definitions, link flags and the link
+  interface.
+</p>
+
+<a name="cmakesimple.multiple"></a>
+<h3>Adding multiple executable targets</h3>
+
+<p>
+  In order to specify multiple executable targets, simply repeat
+  the last two lines of the simple <code>CMakeLists.txt</code>:
+</p>
+
+<pre class="cmake">
+ADD_EXECUTABLE(mycode2 mycode2.cc)
+DEAL_II_SETUP_TARGET(mycode2)
+
+ADD_EXECUTABLE(mycode3 mycode3.cc)
+DEAL_II_SETUP_TARGET(mycode3)
+</pre>
+
+If the list gets longer, consider using
+a <a href="#cmakeadvanced.foreach">loop</a>, possibly
+with <a href="#cmakeadvanced.globs">GLOB</a>.
+
+    <a name="cmakesimple.libs"></a>
+    <h3>Adding libraries and common source files</h3>
+
+    <p>
+      Adding a library is as simple as adding an executable target. We
+      specify the library name and then have to tell cmake that the
+      executables depend on it. The code in the simple file below the
+      project definition accordingly changes for instance to:
+    </p>
+
+<pre class="cmake">
+ADD_LIBRARY(mylib libsrc1.cc libsrc2.cc libsrc3.cc)
+DEAL_II_SETUP_TARGET(mylib)
+
+ADD_EXECUTABLE(mycode mycode.cc)
+DEAL_II_SETUP_TARGET(mycode)
+TARGET_LINK_LIBRARIES(mycode mylib)
+</pre>
+
+<p>When you have <a href="#cmakesimple.multiple">multiple targets</a>,
+repeat the last line of code for each of them. Accordingly,
+a <a href="#cmakeadvanced.foreach">loop</a> becomes even more
+attractive.</p>
+
+<p>If you only have a single file or few files with common source
+code, an alternative to creating a library might be the option:</p>
+
+<pre class="cmake">
+ADD_EXECUTABLE(mycode mycode.cc common.cc)
+DEAL_II_SETUP_TARGET(mycode)
+
+ADD_EXECUTABLE(mycode2 mycode2.cc common.cc)
+DEAL_II_SETUP_TARGET(mycode2)
+</pre>
+
+<p>You should be aware though that in this case <code>common.cc</code> will
+be compiled for each target, not only once. If you want to avoid this and
+still don't want to use a shared library or static archive, another option
+is to create an <code>OBJECT</code> "library":</p>
+
+<pre class="cmake">
+ADD_LIBRARY(common OBJECT common.cc)
+DEAL_II_SETUP_TARGET(common)
+
+ADD_EXECUTABLE(mycode mycode.cc $<TARGET_OBJECTS:common>)
+DEAL_II_SETUP_TARGET(mycode)
+
+ADD_EXECUTABLE(mycode2 mycode2.cc $<TARGET_OBJECTS:common>)
+DEAL_II_SETUP_TARGET(mycode2)
+</pre>
+This will compile <code>common.cc</code> once for the object target
+<code>common</code> and link the resulting object file into the two
+executables.
+
+<a name="cmakesimple.build_type"></a>
+<h3>Switching build types</h3>
+
+<p> The build type is controlled via the variable
+<code>CMAKE_BUILD_TYPE</code>. If it is set to <code>Debug</code>
+executables and libraries specified in your <code>CMakeLists.txt</code>
+file will be compiled in debug mode and linked against the debug version of
+the deal.II library. Contrary <code>Release</code> will build in optimized
+mode and link against the optimized release version of deal.II. You can set
+<code>CMAKE_BUILD_TYPE</code> with the help of <code>ccmake</code> or via
+<code>cmake</code> on the command line: </p>
+
+<pre class="cmake">
+$ cmake -DCMAKE_BUILD_TYPE="Debug" .
+
+$ cmake -DCMAKE_BUILD_TYPE="Release" .
+</pre>
+
+Alternatively, you can specify custom targets to switch the build type and
+compile automatically:
+
+<pre class="cmake">
+ADD_CUSTOM_TARGET(debug
+  COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Debug ${CMAKE_SOURCE_DIR}
+  COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target all
+  COMMENT "Switch CMAKE_BUILD_TYPE to Debug"
+  )
+
+ADD_CUSTOM_TARGET(release
+  COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Release ${CMAKE_SOURCE_DIR}
+  COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target all
+  COMMENT "Switch CMAKE_BUILD_TYPE to Release"
+  )
+</pre>
+With that, switching the build type and compiling the project can be done
+very conveniently via:
+<pre class="cmake">
+$ make debug
+
+$ make release
+</pre>
+
+<a name="cmakesimple.run"></a>
+<h3>Adding a "run" target</h3>
+
+<p> If you wish to have a "run" target for make, like in the deal.II
+tutorial, specify one this way (obviously, a single "run" target can only
+run a single executable): </p>
+
+<pre class="cmake">
+ADD_CUSTOM_TARGET(run COMMAND mycode
+  COMMENT "Run with ${CMAKE_BUILD_TYPE} configuration"
+  )
+</pre>
+
+
+<a name="cmakeadvanced"></a>
+<h2> Advanced <code>CMakeLists.txt</code></h2>
+
+<p>This section covers some advanced topics for a user
+<code>CMakeLists.txt</code> file.</p>
+
+<a name="cmakeadvanced.layout"></a>
+<h3>Source directory layout</h3>
+
+<p>For complex projects it is desirable to organize source code and header
+files in subdirectories. Assume the following project structure with a
+library "mylib" and an executable "mycode":
+
+<pre class="cmake">
+mylib/source/*.cc
+mylib/include/*.h
+
+mycode/source/*.cc
+mycode/include/*.h
+</pre>
+
+In this case the top level <code>CMakeLists.txt</code> file may be:
+<pre class="cmake">
+# top level CMakeLists.txt
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+FIND_PACKAGE(deal.II 8.0 REQUIRED)
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(myproject)
+
+ADD_SUBDIRECTORY(mylib)
+ADD_SUBDIRECTORY(mycode)
+</pre>
+The <code>ADD_SUBDIRECTORY</code> statement will include the CMakeLists.txt
+file in the specified subdirectory. In our case:
+
+<pre class="cmake">
+# mylib/CMakeLists.txt
+
+INCLUDE_DIRECTORIES(include)
+
+ADD_LIBRARY(mylib
+  source/mylib1.cc
+  source/mylib2.cc
+  )
+
+DEAL_II_SETUP_TARGET(mylib)
+</pre>
+
+We have to add the directory <code>include</code> for the header files to
+the current include directories with the <code>INCLUDE_DIRECTORIES</code>
+statement (see <a href="#cmakeadvanced.properties">this section</a> for
+details). The corresponding configuration file for the executable looks
+like:
+
+<pre class="cmake">
+# mycode/CMakeLists.txt
+
+INCLUDE_DIRECTORIES(
+  include
+  ${CMAKE_SOURCE_DIR}/mylib/include
+  )
+
+ADD_EXECUTABLE(mycode source/mycode.cc)
+DEAL_II_SETUP_TARGET(mycode)
+
+TARGET_LINK_LIBRARIES(mycode mylib)
+</pre>
+
+<p>
+Please note that <code>CMakeLists.txt</code> files have directory scope.
+Any manipulation of properties and variables have only effect in the
+current directory (and subdirectories, included with
+<code>ADD_SUBDIRECTORY</code>. The level above will not be affected.
+Therefore, we have to specify the include directories for "mylib" again in
+the subdirectory <code>mycode</code> - this time with full path
+<code>${CMAKE_SOURCE_DIR}/mylib/include</code>.
+</p>
+
+<p>
+CMake defines the following variables for access to important directories:
+
+<pre class="cmake">
+CMAKE_SOURCE_DIR
+  - the source directory (i.e. the directory of the top level
+    CMakeLists.txt file)
+
+CMAKE_BINARY_DIR
+  - the (top level) build directory
+
+CMAKE_CURRENT_SOURCE_DIR
+  - the current source directory, i.e. location of the currently processed
+    CMakeLists.txt file (top level or included via ADD_SUBDIRECTORY)
+
+CMAKE_CURRENT_BINARY_DIR
+  - the build (sub)directory corresponding to CMAKE_CURRENT_SOURCE_DIR
+</pre>
+
+</p>
+
+<a name="cmakeadvanced.control"></a>
+<h3>Control statements</h3>
+
+<p>Control statements in CMake take the following form:
+<pre class="cmake">
+IF(<expression>)
+  ...
+ENDIF()
+</pre>
+or in long form:
+<pre class="cmake">
+IF(<expression1>)
+  ...
+ELSEIF(<expression2>)
+  ...
+ELSE()
+  ...
+ENDIF()
+</pre>
+Please note the (somehow uncommon) empty, opening and closing brackets
+behind <code>ELSE()</code> and <code>ENDIF()</code>.
+<code><expression></code> can take a multitude of different forms,
+have a look at the
+<a href="http://cmake.org/cmake/help/v2.8.8/cmake.html">CMake
+documentation</a> for a complete list. Important examples are:
+<pre class="cmake">
+IF(${variable})
+  - the body will be evaluated if the variable "variable" is defined and
+    synonymous to true, e.g. 1, TRUE, ON, YES (modulo case insensitivity)
+
+IF(variable MATCHES <regular expression>)
+  - the body will be evaluated if the variable "variable" is defined and
+    matches the specified regular expression
+
+IF("${variable}" STREQUAL "foobar")
+  - the body will be evaluated if both strings are equal. Note that
+    "${variable}" will be replaced by the content of the (string)
+    variable "variable"
+</pre>
+<p>An expression can be negated by prefixing <code>NOT</code>:</p>
+<pre class="cmake">
+IF(NOT <expression>)
+  ...
+ENDIF()
+</pre>
+
+<a name="cmakeadvanced.foreach"></a>
+<p>Loops are implemented with the help of <code>WHILE</code> and
+<code>FOR</code> statements. The former takes the same
+<code><expression></code> as the <code>IF</code> statement:</p>
+<pre class="cmake">
+WHILE(<expression>)
+  ...
+ENDWHILE()
+</pre>
+Given a variable <code>list</code> containing a list, the individual
+elements <code>element</code> can be accessed with a <code>FOREACH</code>
+statement:
+<pre class="cmake">
+FOREACH(element ${list})
+  ...
+ENDFOREACH()
+</pre>
+Note: It is also possible to specify the list directly:
+<pre class="cmake">
+FOREACH(element foo bar baz)
+  # The variable element will iterate through foo, bar and baz.
+ENDFOREACH
+</pre>
+
+
+<a name="cmakeadvanced.globs"></a>
+<h3>File globs</h3>
+
+<p>A very common task is to pick up a list of source files from a
+directory. You can either manage a list of source files in
+<code>CMakeLists.txt</code> by hand, e.g. by manually updating all source
+files for a given target, or you can use a glob to automate this process.
+The following example will pick up every source file under
+<code>SOURCE_DIR/sources/</code> and add it to an executable:</p>
+
+<pre class="cmake">
+FILE(GLOB sources ${CMAKE_SOURCE_DIR}/source/*.cc)
+ADD_EXECUTABLE(mycode ${sources})
+</pre>
+
+<p>Please be aware of one caveat of this approach: Due to the fact that
+CMake is a <i>build system generator</i> the resulting build configuration
+(for make) has no way to detect whether a new source file was added (or
+removed) and that it has to call back to cmake. So, after adding a new
+source file you have to touch a <code>CMakeLists.txt</code> file or to run
+<code>cmake .</code> again by hand.
+
+
+<a name="cmakeadvanced.setup_target"></a>
+<h3><code>DEAL_II_SETUP_TARGET</code> revisited</h3>
+<p>
+  The <code>DEAL_II_SETUP_TARGET</code> macro is responsible for setting up
+  a target to compile and link against deal.II. It will <i>append</i> the
+  <code>INCLUDE_DIRECTORIES</code> property with the location of the
+  deal.II include directories, and <i>append</i> the properties
+  <code>COMPILE_FLAGS</code>, <code>COMPILE_DEFINITIONS</code> and
+  <code>LINK_FLAGS</code> by their respective values from the deal.II
+  configuration (depending on build type and available debug and/or
+  optimized flavor of the library).
+</p>
+<p>
+  Except in the case of an object library, the specified target will also
+  be set up to link against deal.II (and its transitive link interface) as
+  well.
+</p>
+
+<p>
+  Optionally, the <code>DEAL_II_SETUP_TARGET</code> macro takes an
+  additional argument <code>DEBUG</code>, or <code>RELEASE</code>, after
+  the target name to explicitly state the library flavor the target should
+  be set up for. If the parameter is omitted, the correct choice is deduced
+  from the current build type (with <code>DEBUG</code> taking precedence in
+  case of <code>DebugRelease</code> as build type).
+</p>
+
+<p>
+  Note: The flags that are added with <code>DEAL_II_SETUP_TARGET</code> to
+  the target come last in the final link compiler invocation, or linker
+  invocation. This means they take precedence over all flags defined via
+  globally via <code>CMAKE_CXX_FLAGS</code>, etc., or as a directory
+  property. If you wish to modify flags or preprocessor definitions set up
+  with <code>DEAL_II_SETUP_TARGET</code> modify on of the following
+  variables (see the section about <a
+    href="#dealiiconfig"><code>deal.IIConfig.cmake</code></a> for
+  details):
+<pre class="cmake">
+DEAL_II_CXX_FLAGS
+DEAL_II_CXX_FLAGS_DEBUG
+DEAL_II_CXX_FLAGS_RELEASE
+DEAL_II_LINKER_FLAGS
+DEAL_II_LINKER_FLAGS_DEBUG
+DEAL_II_LINKER_FLAGS_RELEASE
+DEAL_II_USER_DEFINITIONS
+DEAL_II_USER_DEFINITIONS_DEBUG
+DEAL_II_USER_DEFINITIONS_RELEASE
+</pre>
+
+
+<a name="cmakeadvanced.cached_variables"></a>
+<h3><code>DEAL_II_INITIALIZE_CACHED_VARIABLES</code> revisited</h3>
+
+<p>
+The <code>DEAL_II_INITIALIZE_CACHED_VARIABLES</code> macro is responsible
+for setting up cached variables and has to invoked before the
+<code>PROJECT</code> call:
+<pre class="cmake">
+FIND_PACKAGE(deal.II 8.0 REQUIRED)
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+
+PROJECT(myproject)
+</pre>
+The macro will set an uninitialized <code>CMAKE_BUILD_TYPE</code> variable
+to <code>Debug</code> (or <code>Release</code> if the debug library is not
+available). If <code>CMAKE_BUILD_TYPE</code> is specified it will
+automatically be reset if the given value is unsupported by the deal.II
+installation (i.e., if it is not equal to <code>Debug</code>,
+<code>Release</code>, or <code>DebugRelease</code>).
+</p>
+<p>
+Furthermore, this macro sets the C++ compiler to the one used for compiling
+the deal.II library. The variables will <code>CMAKE_CXX_FLAGS</code>,
+<code>CMAKE_CXX_FLAGS_DEBUG</code>, and
+<code>CMAKE_CXX_FLAGS_RELEASE</code> will be initialized with the empty
+string.
+</p>
+
+<p>
+Note: If you wish to override the flags and definitions set by the
+<code>DEAL_II_SETUP_TARGET</code> macro you have to override the
+corresponding <code>DEAL_II_*</code> variable instead. See the
+documentation of <a
+href="#cmakeadvanced.setup_target"><code>DEAL_II_SETUP_TARGET</code></a>
+for further details.
+</p>
+
+<a name="cmakeadvanced.properties"></a>
+<h3>Customizing include directories and compile definitions</h3>
+
+<p>
+You can specify custom include directories and compile definitions prior to
+a target definition on a per directory basis (have a look at the <a
+href="http://cmake.org/cmake/help/v2.8.8/cmake.html">CMake
+documentation</a> for further details):
+
+<pre class="cmake">
+INCLUDE_DIRECTORIES(include1 include2)
+
+ADD_DEFINITIONS(-DFOO -DBAR="BAZ")
+
+ADD_EXECUTABLE(...) # or ADD_LIBRARY(...)
+</pre>
+</p>
+
+<a name="cmakeadvanced.external_libraries"></a>
+<h3>External libraries</h3>
+
+<p>
+For external libraries that provide a CMake project configuration or where
+a CMake find module is available, including this external library in your
+project is more or less straightforward. E.g. to require an external
+project "foo" at least of version 8.0 write:
+<pre class="cmake">
+FIND_PACKAGE(foo 8.0 REQUIRED)
+</pre>
+Alternatively, the version number and <code>REQUIRED</code> keyword can be
+omitted. (Depending on the external library) the project configuration or
+find macro will usually define variables like <code>FOO_INCLUDE_DIRS</code>
+and <code>FOO_LIBRARIES</code> that can be directly used in your
+<code>CMakeLists.txt</code> file:
+
+<pre class="cmake">
+INCLUDE_DIRECTORIES(${FOO_INCLUDE_DIRS})
+
+ADD_EXECUTABLE(mycode mycode.cc)
+DEAL_II_SETUP_TARGET(mycode)
+
+TARGET_LINK_LIBRARIES(mycode ${FOO_LIBRARIES})
+</pre>
+The first statement will set up the include directories for the following
+targets as explained above. The last statement with
+<code>TARGET_LINK_LIBRARIES</code> will <i>add</i> the libraries in the
+<code>FOO_LIBRARIES</code> variable to the link interface of the target
+<code>mycode</code>.
+</p>
+
+<a name="cmakeadvanced.run"></a>
+<h3>The "run" target revisited</h3>
+
+<p>The simple run statement as explained <a
+href="#cmakesimple.run">above</a> will run the generated executable in the
+build directory. Sometimes it is more desirable to run the executable in a
+dedicated <code>run</code> directory within in the build directory which is
+a copy of a skeleton <code>run</code> folder from the source directory:
+
+<pre class="cmake">
+# Copy folder run from the source to the build directory:
+FILE(COPY ${CMAKE_SOURCE_DIR}/run DESTINATION ${CMAKE_BINARY_DIR})
+
+ADD_EXECUTABLE(mycode mycode.cc)
+SET_PROPERTY(TARGET mycode
+  PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/run
+  )
+ADD_CUSTOM_TARGET(run
+  COMMAND mycode
+  WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/run
+  )
+</pre>
+Here, we modify the <code>RUNTIME_OUTPUT_DIRECTORY</code> property of our
+target so that the executable is linked inside our designated
+<code>run</code> folder, so that it is conveniently available as an
+executable inside the run folder. Furthermore, we specify a
+<code>WORKING_DIRECTORY</code> for the <code>run</code> target, so that
+<code>make run</code> invokes the executable inside the intendet run
+directory.
+</p>
+
+<a name="cmakeadvanced.install"></a>
+<h3>Install a project</h3>
+
+<p>If you want the <code>make install</code> to install your project to
+<code>CMAKE_INSTALL_PREFIX</code> (that may be set on command line or in
+the cache during the configuration stage), add appropriate
+<code>INSTALL</code> statements. To install e.g. a project consisting of a
+library and an executable as well as a run folder:
+
+<pre class="cmake">
+# [...] all the target definitions
+
+INSTALL(TARGETS mylib DESTINATION lib)
+INSTALL(TARGETS mycode DESTINATION bin)
+
+INSTALL(DIRECTORY run DESTINATION share/mycode/run)
+</pre>
+
+<a name="cmakeauto"></a>
+<h2>Autopilot style CMakeLists.txt</h2>
+
+<p>
+  If you want a make interface similar to the deal.II library and
+  its tutorial, namely maker targets for debug and release
+  versions, running the code and cleaning, the easiest way to
+  write a <code>CMakeLists.txt</code> file may be to use
+  an "autopilot" style macro. Here is a minimalistic example for the
+  step-1 tutorial program (<a href="CMakeLists.txt.sample2"
+			      target="_top">plain text</a> version) that can be used for simple
+  projects:
+</p>
+
+<pre class="cmake">
+FIND_PACKAGE(deal.II 8.0 REQUIRED
+  HINTS
+    ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+    # You can specify additional hints for search paths here, e.g.
+    # $ENV{HOME}/workspace/deal.II
+)
+
+# Set the name of the project and target:
+SET(TARGET "step-1")
+
+# Declare all source files the target consists of:
+SET(TARGET_SRC
+  step-1.cc
+  # You can specify additional files here!
+)
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET} CXX)
+DEAL_II_INVOKE_AUTOPILOT()
+</pre>
+
+
+      This <code>CMakeLists.txt</code> is intended for use with a small
+      project and <i>in-source</i> build (i.e., one does not create a separate
+      build directory as we recommend for the <acronym>deal.II</acronym> build
+      in the <a href="../readme.html#configuration" target="_body">readme
+      file</a>). Using this input file, you can run <code>cmake</code> in the
+      source directory as follows:
+
+<pre class="sample">
+$ cd step-1
+$ cmake .
+
+[...]
+###
+#
+#  Project  step-1  set up with  deal.II-8.3  found at
+#      /usr
+#
+#  CMAKE_BUILD_TYPE:          Debug
+#
+#  You can now run
+#       $ make                - to compile and link the program
+#       $ make run            - to (compile, link and) run the program
+#
+#       $ make debug          - to switch the build type to 'Debug'
+#       $ make release        - to switch the build type to 'Release'
+#
+#       $ make edit_cache     - to change (cached) configuration variables
+#                               and rerun the configure and generate phases of CMake
+#
+#       $ make strip_comments - to strip the source files in this
+#                               directory off the documentation comments
+#       $ make clean          - to remove the generated executable as well as
+#                               all intermediate compilation files
+#       $ make runclean       - to remove all output generated by the program
+#       $ make distclean      - to clean the directory from _all_ generated
+#                               files (includes clean, runclean and the removal
+#                               of the generated build system)
+#       $ make info           - to view this message again
+#
+#  Have a nice day!
+#
+###
+</pre>
+
+      There are two additional configuration options (in addition to
+      <code>TARGET</code> and <code>TARGET_SRC</code>) that can be set via
+      variables before <code>DEAL_II_INVOKE_AUTOPILOT()</code> is called
+      (<a href="CMakeLists.txt.sample3" target="_top">plain text</a> version):
+
+<pre class="cmake">
+# (Optional)
+# Specify a list of files (file globs) that will be removed
+# with the "make runclean" and "make distclean" targets.
+# (If empty, sensible default values will be used.)
+SET(CLEAN_UP_FILES
+  # a custom list of globs, e.g. *.log *.vtk
+)
+
+# (Optional)
+# A custom command line that should be invoked by "make run".
+# (If empty, ./${TARGET} will be invoked.)
+SET(TARGET_RUN
+  # a custom command line, e.g. mpirun -np 2 ${TARGET}
+)
+</pre>
+
+
+<a name="dealiiconfig"></a>
+<h2>  <code>deal.IIConfig.cmake</code> </h2>
+
+<p>
+  Importing the deal.IIConfig.cmake file via <code>FIND_PACKAGE</code>
+  will set the following variables and macros; all of the form
+  <code>DEAL_II_*</code>:
+</p>
+<pre class="cmake">
+#
+# General package information:
+#
+
+DEAL_II_PACKAGE_NAME
+DEAL_II_PACKAGE_VERSION     - the full package version string, e.g. "8.1.pre"
+DEAL_II_PACKAGE_VENDOR
+DEAL_II_PACKAGE_DESCRIPTION
+
+DEAL_II_VERSION             - numerical version number (with "pre" and "rc?"
+                              replaced by "0"), e.g. "8.2.0"
+DEAL_II_VERSION_MAJOR       - the major number, e.g. "8"
+DEAL_II_VERSION_MINOR       - the minor version number, e.g. "2"
+DEAL_II_VERSION_SUBMINOR    - the minor version number, e.g. "0"
+
+DEAL_II_GIT_BRANCH          - name of the local git branch of the source directory
+DEAL_II_GIT_REVISION        - full sha1 revision of the current git HEAD
+DEAL_II_GIT_SHORTREV        - short sha1 revision of the current git HEAD
+
+DEAL_II_BUILD_TYPE          - the configured build type, e.g. "DebugRelease"
+DEAL_II_BUILD_TYPES         - an all caps list of available configurations,
+                              e.g. "DEBUG;RELEASE"
+
+#
+# Information about component locations:
+#
+
+DEAL_II_PATH
+DEAL_II_SHARE_RELDIR
+DEAL_II_DOCREADME_RELDIR
+DEAL_II_DOCHTML_RELDIR
+DEAL_II_EXAMPLES_RELDIR
+DEAL_II_EXECUTABLE_RELDIR
+DEAL_II_INCLUDE_RELDIR
+DEAL_II_LIBRARY_RELDIR
+DEAL_II_PROJECT_CONFIG_RELDIR
+
+DEAL_II_BUILD_DIR             - true if deal.II was picked up from a build
+                                dir, false if the configuration is from an installation
+
+#
+# Compiler and linker configuration
+#
+
+DEAL_II_CXX_COMPILER             - the compiler used to compile deal.II
+
+DEAL_II_CXX_FLAGS                - compile flags for all configurations
+DEAL_II_CXX_FLAGS_DEBUG          - _additional_ compile flags for the debug configuration
+DEAL_II_CXX_FLAGS_RELEASE        - _additional_ compile flags for the release configuration
+
+DEAL_II_LINKER_FLAGS             - link flags for all configurations
+DEAL_II_LINKER_FLAGS_DEBUG       - _additional_ link flags for debug configuration
+DEAL_II_LINKER_FLAGS_RELEASE     - _additional_ link flags for release configuration
+
+DEAL_II_USER_DEFINITIONS         - compile definitions for all configurations
+DEAL_II_USER_DEFINITIONS_DEBUG   - _additional_ compile definitions for debug configuration
+DEAL_II_USER_DEFINITIONS_RELEASE - _additional_ compile definitions for release configuration
+
+#
+# Information about MPI runtime for the mpi implementation used in the
+# deal.II library
+#
+
+DEAL_II_MPIEXEC
+DEAL_II_MPIEXEC_NUMPROC_FLAG
+DEAL_II_MPIEXEC_PREFLAGS
+DEAL_II_MPIEXEC_POSTFLAGS
+
+
+DEAL_II_STATIC_EXECUTABLE        - true if the link interface is set up to
+                                   compile resulting executables statically
+
+#
+# Information about include directories and libraries
+#
+
+DEAL_II_INCLUDE_DIRS
+
+DEAL_II_LIBRARIES_DEBUG   - a list of the full link interface for the debug configuration
+DEAL_II_LIBRARIES_RELEASE - a list of the full link interface for the release configuration
+DEAL_II_LIBRARIES         - full list of libraries with "debug" and "optimized" keywords
+
+#
+# Information about library targets
+#
+
+DEAL_II_TARGET_CONFIG  - the target config file
+
+DEAL_II_TARGET_DEBUG   - the name of the debug target that is available after inclusion
+                         of the target config file
+DEAL_II_TARGET_RELEASE - the name of the release target
+DEAL_II_TARGET         - full list of targets with "debug" and "optimized" keywords
+
+#
+# Feature configuration: The following booleans are set to "ON" or "OFF" depending
+# on the current feature configuration:
+#
+
+DEAL_II_WITH_64BIT_INDICES
+DEAL_II_WITH_ARPACK
+DEAL_II_WITH_BOOST
+DEAL_II_WITH_BZIP2
+DEAL_II_WITH_CXX11
+DEAL_II_WITH_CXX14
+DEAL_II_WITH_HDF5
+DEAL_II_WITH_LAPACK
+DEAL_II_WITH_METIS
+DEAL_II_WITH_MPI
+DEAL_II_WITH_MUPARSER
+DEAL_II_WITH_NETCDF
+DEAL_II_WITH_OPENCASCADE
+DEAL_II_WITH_P4EST
+DEAL_II_WITH_PETSC
+DEAL_II_WITH_SLEPC
+DEAL_II_WITH_THREADS
+DEAL_II_WITH_TRILINOS
+DEAL_II_WITH_UMFPACK
+DEAL_II_WITH_ZLIB
+</pre>
+
+<a name="makeconfig"></a>
+<h2>  <code>Make.global_options</code> </h2>
+
+<p>
+  The above CMake configuration file <code>deal.IIConfig.cmake</code> is also
+  available as a Makefile includable file called
+  <code>Make.global_options</code>. It resides under
+  <code>share/deal.II/Make.global_options</code> in the build and install
+  directory (or alternatively whichever relative path
+  <code>DEAL_II_SHARE_RELDIR</code> is set to).
+</p>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/doc/users/config.sample b/doc/users/config.sample
new file mode 100644
index 0000000..cbb65f3
--- /dev/null
+++ b/doc/users/config.sample
@@ -0,0 +1,664 @@
+##                                                                       ##
+#                      Example configuration file                         #
+#                                                                         #
+#        See doc/readme.html and doc/users/cmake.html for further         #
+#        details on how to use the cmake build system of deal.II.         #
+##                                                                       ##
+
+
+###########################################################################
+#                                                                         #
+#                         General Configuration:                          #
+#                                                                         #
+###########################################################################
+
+#
+# Component configuration:
+#
+#
+# SET(DEAL_II_COMPONENT_DOCUMENTATION OFF CACHE BOOL
+#   "Enable configuration, build and installation of the html documentation. This adds a COMPONENT \"documentation\" to the build system."
+#   )
+#
+# SET(DEAL_II_COMPONENT_EXAMPLES ON CACHE BOOL
+#   "Enable configuration and installation of the example steps. This adds a COMPONENT \"examples\" to the build system."
+#   )
+#
+# SET(DEAL_II_COMPONENT_PARAMETER_GUI OFF CACHE BOOL
+#   "Build and install the parameter_gui. This adds a COMPONENT \"parameter_gui\" to the build system."
+#   )
+#
+
+
+#
+# General Feature configuration:
+#
+#
+# SET(DEAL_II_ALLOW_BUNDLED ON CACHE BOOL
+#   "Allow the use of libraries bundled with the source tarball. (DEAL_II_FORCE_BUNDLED* will overwrite this option.)"
+#   )
+#
+# SET(DEAL_II_ALLOW_AUTODETECTION ON CACHE BOOL
+#   "Allow to automatically setup features by setting all undefined DEAL_II_WITH_* variables to ON or OFF"
+#   )
+#
+# SET(DEAL_II_FORCE_AUTODETECTION OFF CACHE BOOL
+#   "Force feature autodetection by undefining all DEAL_II_WITH_* variables prior to configure"
+#   )
+#
+# For further feature configuration, see section
+# "Advanced feature configuration" below.
+#
+
+
+#
+# Build configuration:
+#
+#
+# SET(CMAKE_BUILD_TYPE "DebugRelease" CACHE STRING
+#   "Choose the type of build, options are: Debug, Release and DebugRelease."
+#   )
+#
+# SET(DEAL_II_ALLOW_PLATFORM_INTROSPECTION ON CACHE BOOL
+#   "Allow platform introspection for CPU command set, SSE and AVX"
+#   )
+#
+# SET(DEAL_II_SETUP_DEFAULT_COMPILER_FLAGS ON CACHE BOOL
+#   "Configure sensible default CFLAGS and CXXFLAGS depending on platform, compiler and build target."
+#   )
+#
+# SET(DEAL_II_SETUP_COVERAGE OFF CACHE BOOL
+#   "Setup debug compiler flags to provide additional test coverage information. Currently only gprof is supported."
+#   )
+#
+# SET(CMAKE_CXX_COMPILER "" CACHE STRING
+#   "CXX Compiler."
+#   )
+#
+# SET(CMAKE_C_COMPILER "" CACHE STRING
+#   "C Compiler."
+#   )
+#
+# SET(CMAKE_Fortran_COMPILER "" CACHE STRING
+#   "Fortran Compiler."
+#   )
+#
+# SET(CMAKE_CXX_FLAGS "" CACHE STRING
+#   "The user supplied cache variable will be appended _at the end_ of the auto generated CMAKE_CXX_FLAGS variable"
+#   )
+#
+# SET(DEAL_II_CXX_FLAGS_DEBUG "" CACHE STRING
+#   "The user supplied cache variable will be appended _at the end_ of the auto generated DEAL_II_CXX_FLAGS_DEBUG variable"
+#   )
+#
+# SET(DEAL_II_CXX_FLAGS_RELEASE "" CACHE STRING
+#   "The user supplied cache variable will be appended _at the end_ of the auto generated DEAL_II_CXX_FLAGS_RELEASE variable"
+#   )
+#
+# SET(DEAL_II_LINKER_FLAGS "" CACHE STRING
+#   "The user supplied cache variable will be appended _at the end_ of the auto generated DEAL_II_LINKER_FLAGS variable"
+#   )
+#
+# SET(DEAL_II_LINKER_FLAGS_DEBUG "" CACHE STRING
+#   "The user supplied cache variable will be appended _at the end_ of the auto generated DEAL_II_LINKER_FLAGS_DEBUG variable"
+#   )
+#
+# SET(DEAL_II_LINKER_FLAGS_RELEASE "" CACHE STRING
+#   "The user supplied cache variable will be appended _at the end_ of the auto generated DEAL_II_LINKER_FLAGS_RELEASE variable"
+#   )
+#
+# SET(BUILD_SHARED_LIBS "ON" CACHE BOOL
+#   "Build a shared library"
+#   )
+#
+# SET(DEAL_II_PREFER_STATIC_LIBS "OFF" CACHE BOOL
+#   "Prefer static libraries over dynamic libraries when searching for features and corresponding link interface"
+#   )
+#
+# OPTION(DEAL_II_STATIC_EXECUTABLE "OFF" CACHE BOOL
+#   "Provide a link interface that is suitable for static linkage of executables. Enabling this option forces BUILD_SHARED_LIBS=OFF and DEAL_II_PREFER_STATIC_LIBS=ON"
+#   )
+#
+# SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH "ON" CACHE BOOL
+#   "Set the rpath of the library to the external link pathes on installation"
+#   )
+#
+# SET(DEAL_II_WITH_64BIT_INDICES "OFF" CACHE BOOL
+#   "If set to ON, then use 64-bit data types to represent global degree of freedom indices. The default is to OFF. You only want to set this to ON if you will solve problems with more than 2^31 (approximately 2 billion) unknowns. If set to ON, you also need to ensure that both Trilinos and/or PETSc support 64-bit indices."
+#   )
+#
+#
+
+
+###########################################################################
+#                                                                         #
+#                     Advanced Feature Configuration:                     #
+#                                                                         #
+###########################################################################
+
+#
+# Arpack:
+#
+# SET(DEAL_II_WITH_ARPACK ON CACHE BOOL
+#   "Build deal.II with support for arpack"
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(ARPACK_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(ARPACK_FOUND TRUE CACHE BOOL "")
+# SET(ARPACK_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(ARPACK_LINKER_FLAGS "..." CACHE STRING "")
+#
+
+
+#
+# Boost:
+#
+# SET(DEAL_II_WITH_BOOST ON CACHE BOOL
+#   "Build deal.II with support for boost"
+#   FORCE)
+#
+# SET(DEAL_II_FORCE_BUNDLED_BOOST TRUE CACHE BOOL
+#   "Always use the bundled boost library instead of an external one."
+#   )
+#
+# Boost is a mandatory build time dependency.
+#
+# Automatic detection:
+#
+# SET(Boost_DIR "..." CACHE PATH
+#   "The preferred installation prefix for searching for boost"
+#   )
+#
+# Have a look at the very long configuration list in FindBoost.cmake.
+#
+# Manual setup:
+#
+# SET(Boost_FOUND TRUE CACHE BOOL "")
+# SET(Boost_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(Boost_LIBRARIES "libraries;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+#
+
+
+#
+# Muparser:
+#
+# SET(DEAL_II_WITH_MUPARSER TRUE CACHE BOOL
+#   "Build deal.II with support for muparser"
+#   )
+#
+# SET(DEAL_II_FORCE_BUNDLED_MUPARSER TRUE CACHE BOOL
+#   "Always use the bundled muparser library instead of an external one."
+#   )
+#
+
+
+#
+# HDF5:
+#
+# SET(DEAL_II_WITH_HDF5 ON CACHE BOOL
+#   "Build deal.II with support for hdf5"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(HDF5_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(HDF5_FOUND TRUE CACHE BOOL "")
+# SET(HDF5_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(HDF5_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+#
+
+
+#
+# BLAS and LAPACK:
+#
+# SET(DEAL_II_WITH_LAPACK ON CACHE BOOL
+#   "Build deal.II with support for lapack"
+#   )
+#
+#
+# Automatic detection:
+#
+# You can set the BLAS/LAPACK vendor that should be found with
+#
+# SET(BLA_VENDOR "Goto" CACHE STRING "")
+#   Valid vendors: Goto, ATLAS, PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH,
+#   IBMESSL, Intel10_32, Intel10_64lp, Intel10_64lp_seq, ACML, ACML_MP,
+#   ACML_GPU, Apple, NAS, Generic
+#
+# BLA_VENDOR will also be recognized by FindLAPACK.cmake.
+#
+# If your BLAS/LAPACK installation resides at a non system location, specify
+# a hint with LAPACK_DIR and BLAS_DIR, or CMAKE_PREFIX_PATH:
+#
+# SET(LAPACK_DIR "..." CACHE PATH
+#   "The preferred installation prefix for searching for LAPACK"
+#   )
+#
+# SET(BLAS_DIR "..." CACHE PATH
+#   "The preferred installation prefix for searching for BLAS"
+#   )
+#
+#
+# Manual setup:
+#
+# SET(BLAS_FOUND TRUE CACHE BOOL "")
+# SET(BLAS_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(BLAS_LINKER_FLAGS "..." CACHE STRING "")
+#
+# SET(LAPACK_FOUND TRUE CACHE BOOL "")
+# SET(LAPACK_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(LAPACK_LINKER_FLAGS "..." CACHE STRING "")
+#
+
+
+#
+# Metis:
+#
+# SET(DEAL_II_WITH_METIS ON CACHE BOOL
+#   "Build deal.II with support for metis"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(METIS_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(METIS_FOUND TRUE CACHE BOOL "")
+# SET(METIS_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(METIS_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(METIS_VERSION "x.y.z" CACHE STRING "")
+# SET(METIS_VERSION_MAJOR "x" CACHE STRING "")
+# SET(METIS_VERSION_MINOR "y" CACHE STRING "")
+# SET(METIS_VERSION_SUBMINOR "z" CACHE STRING "")
+#
+
+
+#
+# MPI:
+#
+# SET(DEAL_II_WITH_MPI ON CACHE BOOL
+#   "Build deal.II with support for mpi"
+#   )
+#
+#
+# Automatic detection:
+#
+# If DEAL_II_WITH_MPI is undefined, autodetection of MPI is very
+# conservative. Only the current compiler and the variable MPI_CXX_COMPILER
+# is used to find MPI. (MPI_CXX_COMPILER takes precedence.)
+#
+# (MPI_C_COMPILER and MPI_Fortran_COMPILER are considered, too)
+#
+#
+# DEAL_II_WITH_MPI=ON will trigger a more sophisticated search for MPI.
+#
+# The best way to force a specific MPI implementation to be used is to set
+# CMAKE_CXX_COMPILER, CMAKE_C_COMPILER and CMAKE_Fortran_COMPILER to the
+# appropriate MPI wrapper (See above).
+#
+# Alternatively, specify the mpi wrapper that will be queried for the mpi
+# link interface:
+#
+# SET(MPI_CXX_COMPILER "mpicxx" CACHE STRING "")
+#
+# SET(MPI_C_COMPILER "mpicc" CACHE STRING "")
+# SET(MPI_Fortran_COMPILER "mpif90" CACHE STRING "")
+#
+#
+# Manual setup:
+#
+# SET(MPI_CXX_FOUND TRUE CACHE BOOL "")
+#
+# and set the current compiler to an MPI wrapper. Alternatively, you can
+# specify the complete link interface as well as compile and link flags
+# via:
+#
+# SET(MPI_CXX_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(MPI_CXX_INCLUDE_PATH "include directory for mpi" CACHE STRING "")
+# SET(MPI_CXX_COMPILE_FLAGS "..." CACHE STRING "")
+# SET(MPI_CXX_LINK_FLAGS "..." CACHE STRING "")
+# SET(MPI_VERSION "x.y" CACHE STRING "")
+# SET(MPI_VERSION_MAJOR "x" CACHE STRING "")
+# SET(MPI_VERSION_MINOR "y" CACHE STRING "")
+#
+# Additionally:
+#
+# SET(MPI_C_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(MPI_Fortran_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+#
+#
+
+
+#
+# muPaser:
+#
+# SET(DEAL_II_WITH_MUPARSER ON CACHE BOOL
+#   "Build deal.II with support for muparser"
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(MUPARSER_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(MUPARSER_FOUND TRUE CACHE BOOL "")
+# SET(MUPARSER_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(MUPARSER_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+#
+
+
+#
+# Netcdf:
+#
+# SET(DEAL_II_WITH_NETCDF ON CACHE BOOL
+#   "Build deal.II with support for netcdf"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(NETCDF_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(NETCDF_FOUND TRUE CACHE BOOL "")
+# SET(NETCDF_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(NETCDF_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+#
+
+
+#
+# P4EST:
+#
+# SET(DEAL_II_WITH_P4EST ON CACHE BOOL
+#   "Build deal.II with support for p4est"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(P4EST_DIR "/.../..." CACHE PATH "")
+# also, if necessary, SC_DIR
+#
+# Manual setup:
+#
+# SET(P4EST_FOUND TRUE CACHE BOOL "")
+# SET(P4EST_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(P4EST_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(P4EST_WITH_MPI TRUE CACHE BOOL "")
+# SET(P4EST_VERSION "x.y.z.p" CACHE STRING "")
+# SET(P4EST_VERSION_MAJOR "x" CACHE STRING "")
+# SET(P4EST_VERSION_MINOR "y" CACHE STRING "")
+# SET(P4EST_VERSION_SUBMINOR "z" CACHE STRING "")
+# SET(P4EST_VERSION_PATCH "p" CACHE STRING "")
+#
+
+
+#
+# PETSc:
+#
+# SET(DEAL_II_WITH_PETSC ON CACHE BOOL
+#   "Build deal.II with support for petsc"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(PETSC_DIR "/.../..." CACHE PATH "")
+# SET(PETSC_ARCH "..." CACHE STRING "")
+#
+# Manual setup:
+#
+# SET(PETSC_FOUND TRUE CACHE BOOL "")
+# SET(PETSC_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(PETSC_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(PETSC_VERSION "x.y.z.p" CACHE STRING "")
+# SET(PETSC_VERSION_MAJOR "x" CACHE STRING "")
+# SET(PETSC_VERSION_MINOR "y" CACHE STRING "")
+# SET(PETSC_VERSION_SUBMINOR "z" CACHE STRING "")
+# SET(PETSC_VERSION_PATCH "p" CACHE STRING "")
+# SET(PETSC_WITH_MPI_UNI FALSE CACHE BOOL "")
+# SET(PETSC_WITH_COMPLEX FALSE CACHE BOOL "")
+#
+
+
+#
+# SLEPc:
+#
+# SET(DEAL_II_WITH_SLEPC ON CACHE BOOL
+#   "Build deal.II with support for slepc"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(SLEPC_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(SLEPC_FOUND TRUE CACHE BOOL "")
+# SET(SLEPC_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(SLEPC_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(SLEPC_VERSION "x.y.z.p" CACHE STRING "")
+# SET(SLEPC_VERSION_MAJOR "x" CACHE STRING "")
+# SET(SLEPC_VERSION_MINOR "y" CACHE STRING "")
+# SET(SLEPC_VERSION_SUBMINOR "z" CACHE STRING "")
+# SET(SLEPC_VERSION_PATCH "p" CACHE STRING "")
+# SET(SLEPC_WITH_MPI_UNI FALSE CACHE BOOL "")
+#
+
+
+#
+# Threading support:
+#
+# SET(DEAL_II_WITH_THREADS ON CACHE BOOL
+#   "Build deal.II with support for threads"
+#   )
+#
+# SET(DEAL_II_FORCE_BUNDLED_THREADS TRUE CACHE BOOL
+#   "Always use the bundled tbb library instead of an external one."
+#   )
+#
+# Automatic setup:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(TBB_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(TBB_FOUND TRUE CACHE BOOL "")
+# SET(TBB_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(TBB_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(TBB_WITH_DEBUGLIB TRUE CACHE BOOL "")
+# SET(TBB_VERSION "x.y" CACHE STRING "")
+# SET(TBB_VERSION_MAJOR "x" CACHE STRING "")
+# SET(TBB_VERSION_MINOR "y" CACHE STRING "")
+#
+
+
+#
+# Trilinos:
+#
+# SET(DEAL_II_WITH_TRILINOS ON CACHE BOOL
+#   "Build deal.II with support for trilinos"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(TRILINOS_DIR "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(TRILINOS_FOUND TRUE CACHE BOOL "")
+# SET(TRILINOS_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(TRILINOS_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(TRILINOS_VERSION "x.y.z" CACHE STRING "")
+# SET(TRILINOS_VERSION_MAJOR "x" CACHE STRING "")
+# SET(TRILINOS_VERSION_MINOR "y" CACHE STRING "")
+# SET(TRILINOS_VERSION_SUBMINOR "z" CACHE STRING "")
+# SET(TRILINOS_WITH_MPI TRUE CACHE BOOL "")
+#
+
+
+#
+# UMFPACK:
+#
+# SET(DEAL_II_WITH_UMFPACK ON CACHE BOOL
+#   "Build deal.II with support for umfpack"
+#   )
+#
+# SET(DEAL_II_FORCE_BUNDLED_UMFPACK TRUE CACHE BOOL
+#   "Always use the bundled umfpack library instead of an external one."
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(UMFPACK_DIR "/.../..." CACHE PATH "")
+# also, if necessary, SUITESPARSE_DIR (AMD_DIR, CHOLMOD_DIR, COLAMD_DIR, SUITESPARSECONFIG_DIR)
+#
+# Manual setup:
+#
+# SET(UMFPACK_FOUND TRUE CACHE BOOL "")
+# SET(UMFPACK_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(UMFPACK_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+# SET(UMFPACK_LINKER_FLAGS "..." CACHE STRING "")
+# SET(UMFPACK_VERSION "x.y.z" CACHE STRING "")
+# SET(UMFPACK_VERSION_MAJOR "x" CACHE STRING "")
+# SET(UMFPACK_VERSION_MINOR "y" CACHE STRING "")
+# SET(UMFPACK_VERSION_SUBMINOR "z" CACHE STRING "")
+#
+
+
+#
+# ZLIB:
+#
+# SET(DEAL_II_WITH_ZLIB ON CACHE BOOL
+#   "Build deal.II with support for zlib"
+#   )
+#
+# Automatic detection:
+#
+# Specify a hint with CMAKE_PREFIX_PATH or by setting
+# SET(ZLIB_ROOT "/.../..." CACHE PATH "")
+#
+# Manual setup:
+#
+# SET(ZLIB_FOUND TRUE CACHE BOOL "")
+# SET(ZLIB_LIBRARIES "library;and;semicolon;separated;list;of;link;interface" CACHE STRING "")
+# SET(ZLIB_INCLUDE_DIRS "semicolon;separated;list;of;include;dirs" CACHE STRING "")
+#
+
+
+
+###########################################################################
+#                                                                         #
+#                         Advanced Configuration:                         #
+#                                                                         #
+###########################################################################
+
+#
+# C++11 support:
+#
+# SET(DEAL_II_WITH_CXX11 ON CACHE BOOL
+#   "deal.II using C++11 language standard"
+#   )
+#
+# A custom C++11 flag can be set by setting
+#
+#   SET(DEAL_II_CXX_VERSION_FLAG "-std=c++0x" CACHE STRING "")
+#
+
+#
+# C++14 support:
+#
+# SET(DEAL_II_WITH_CXX14 ON CACHE BOOL
+#   "deal.II using C++14 language standard"
+#   )
+#
+# A custom C++14 flag can be set by setting
+#
+#   SET(DEAL_II_CXX_VERSION_FLAG "-std=c++1y" CACHE STRING "")
+#
+
+
+#
+# Platform introspection:
+#
+# deal.II has some platform dependend tests and configuration options.
+# They can be enabled/disabled with DEAL_II_ALLOW_PLATFORM_INTROSPECTION
+# (see above)
+#
+# To enable support for AVX and SSE manually, set:
+#
+#   SET(DEAL_II_HAVE_SSE TRUE CACHE BOOL "")
+#   SET(DEAL_II_HAVE_AVX TRUE CACHE BOOL "")
+#   SET(DEAL_II_HAVE_AVX512 TRUE CACHE BOOL "")
+#
+
+
+#
+# Obnoxiously advanced configuration about paths, install locations and
+# names:
+#
+# SET(DEAL_II_PACKAGE_NAME "deal.II" CACHE STRING
+#   "The glorious package name"
+#   )
+#
+# SET(DEAL_II_PROJECT_CONFIG_NAME "${DEAL_II_PACKAGE_NAME}" CACHE STRING
+#   "Basename for the CMake project config file"
+#   )
+#
+# SET(DEAL_II_BASE_NAME "deal_II" CACHE STRING
+#   "Basename for the library"
+#   )
+#
+# SET(DEAL_II_DEBUG_SUFFIX ".g" CACHE STRING
+#   "Library suffix for the debug library"
+#   )
+#
+# SET_IF_EMPTY(DEAL_II_RELEASE_SUFFIX "" CACHE STRING
+#   "Library suffix for the release library"
+#   )
+#
+# SET(DEAL_II_DOCHTML_RELDIR "doc" CACHE STRING
+#   "Relative installation directory for html documentation"
+#   )
+#
+# SET(DEAL_II_DOCREADME_RELDIR "" CACHE STRING
+#   "Relative installation directory for readme"
+#   )
+#
+# SET(DEAL_II_EXAMPLES_RELDIR "examples" CACHE STRING
+#   "Relative installation directory for examples"
+#   )
+#
+# SET(DEAL_II_EXECUTABLE_RELDIR "bin" CACHE STRING
+#   "Relative installation directory for binary targets"
+#   )
+#
+# SET(DEAL_II_INCLUDE_RELDIR "include" CACHE STRING
+#   "Relative installation directory for include files"
+#   )
+#
+# SET(DEAL_II_LIBRARY_RELDIR "lib" CACHE STRING
+#   "Relative installation directory for library targets"
+#   )
+#
+# SET(DEAL_II_PROJECT_CONFIG_RELDIR "${DEAL_II_LIBRARY_RELDIR}/cmake/${DEAL_II_PROJECT_CONFIG_NAME}" CACHE STRING
+#   "Relative installation directory for the CMake project configuration"
+#   )
+#
diff --git a/doc/users/doxygen.html b/doc/users/doxygen.html
new file mode 100644
index 0000000..767eeb5
--- /dev/null
+++ b/doc/users/doxygen.html
@@ -0,0 +1,63 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+                 "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
+  <title>deal.II Doxygen documentation</title>
+  <link href="../screen.css" rel="StyleSheet">
+  <meta name="author" content="the deal.II authors">
+  <meta name="copyright" content="Copyright (C) 2012 - 2015 by the deal.II authors">
+  <meta name="keywords" content="deal dealii finite elements fem triangulation">
+  <meta http-equiv="content-language" content="en">
+</head>
+<body>
+
+  <h1>Referencing the deal.II online documentation from your project</h1>
+
+  <p>deal.II has an extensive reference documentation generated
+  by <a href="http://www.doxygen.org">Doxygen</a>. One of the many
+  features of Doxygen is, that all class names, function names and so
+  on are cross-linked in the documentation, and a single click
+  suffices to jump to the documentation of an object.</p>
+
+  <p>When you write online documentation for your own code, it might
+  come pretty handy if you could provide links directly into the
+  deal.II documentation. And indeed, you can. Here is how:
+
+  <ol>
+    <li>Download the tag file for Doxygen. As of now, the link to the
+      tag file can be found on the main documentation page generated by
+      Doxygen, if the online documentation is available,
+      <a href="../doxygen/deal.II/index.html">here</a>. For older revisions,
+	links are provided here by revision number:
+	<a href="http://www.dealii.org/8.4.1/doxygen/deal.tag">8.4.1</a>,
+	<a href="http://www.dealii.org/8.3.0/doxygen/deal.tag">8.3.0</a>,
+	<a href="http://www.dealii.org/8.2.0/doxygen/deal.tag">8.2.0</a>,
+	<a href="http://www.dealii.org/8.1.0/doxygen/deal.tag">8.1.0</a>,
+	<a href="http://www.dealii.org/8.0.0/doxygen/deal.tag">8.0.0</a>,
+	<a href="http://www.dealii.org/7.3.0/doxygen/deal.tag">7.3.0</a>,
+	<a href="http://www.dealii.org/7.2.0/doxygen/deal.tag">7.2.0</a>,
+	<a href="http://www.dealii.org/7.1.0/doxygen/deal.tag">7.1.0</a>,
+	<a href="http://www.dealii.org/7.0.0/doxygen/deal.tag">7.0.0</a>.
+    </li>
+    <li>Once your <code>deal.tag</code> file is in a location where
+    Doxygen can find it, add the following line to your Doxygen
+    configuration file:
+<pre>
+TAGFILES = deal.tag=http://www.dealii.org/X.Y.Z/doxygen/deal.II
+</pre>
+      where <code>X.Y.Z</code> refers to the release number for which
+      you downloaded the tag file.
+      </li>
+  </ol>
+
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+</body>
+</html>
diff --git a/doc/users/gdb.html b/doc/users/gdb.html
new file mode 100644
index 0000000..6315b13
--- /dev/null
+++ b/doc/users/gdb.html
@@ -0,0 +1,96 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+          "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+  <head>
+    <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
+    <title>deal.II GDB documentation</title>
+    <link href="../screen.css" rel="StyleSheet">
+    <meta name="author" content="the deal.II authors">
+    <meta name="copyright" content="Copyright (C) 2012 - 2015 by the deal.II authors">
+    <meta name="keywords" content="deal dealii finite elements fem triangulation">
+    <meta http-equiv="content-language" content="en">
+  </head>
+  <body>
+
+    <h1>Configuration for debugging deal.II projects via GDB</h1>
+
+    <p><acronym>deal.II</acronym> comes with a simple set of pretty-printers
+      that provide descriptions to GDB (the GNU debugger) on how to usefully
+      print <acronym>deal.II</acronym> objects. For example, without
+      pretty-printing, GDB prints points as</p>
+    <pre class="sample">
+(gdb) print point
+$1 = {<dealii::Tensor<1, 2, double>> = {static dimension = <optimized out>,
+static rank = <optimized out>, static n_independent_components = <optimized out>, values = {{
+static dimension = <optimized out>, static rank = <optimized out>,
+static n_independent_components = <optimized out>, value = 0}, {
+static dimension = <optimized out>, static rank = <optimized out>,
+static n_independent_components = <optimized out>, value = 0}}}, <No data fields>}</pre>
+    but with pretty-printing enabled GDB prints
+    <pre class="sample">
+(gdb) print point
+$1 = {0, 0}</pre>
+    which is much easier to read.</p>
+
+<h2>Setting up GDB for just deal.II</h2>
+<p>If you only plan on using GDB with <acronym>deal.II</acronym> then it
+  suffices to rename the provided GDB pretty-printing file and place it in your
+  home directory. This can be done with the following shell commands
+  (where <code>dealii_source_directory</code> is the root directory containing
+  all <acronym>deal.II</acronym> source files):
+  <pre class="sample">
+cd dealii_source_directory
+cp contrib/utilities/dotgdbinit.py ~/.gdbinit</pre>
+</p>
+
+<h2>Setting up GDB for deal.II and other sets of printers</h2>
+<p>
+  Configuring GDB to use multiple sets of pretty-printers (i.e., sets of
+  pretty-printers from <acronym>deal.II</acronym> as well as other projects)
+  takes a little more work. The recommended procedure is
+  <ol>
+    <li> Decide on a directory where you will place all of your pretty-printing
+      files. A good choice would be creating a new
+      folder <code>.gdbscripts</code> in your home directory, and this is the
+      choice assumed for the rest of this demonstration.
+    <li> Move the Python source code parts (so everything after, and not
+      including, the line containing just the word <code>python</code> near the
+      beginning up to (and also not including) the line containing just the
+      word <code>end</code>) of the
+      file <code>contrib/utilities/dotgdbinit.py</code> into the directory from
+      the first step. Rename this file to <code>deal.py</code> so that we can
+      import it into the master GDB file in the next step.
+    <li> Add the following to your <code>.gdbinit</code> file, which also
+      resides in your home directory:
+      <pre class="sample">
+set print pretty 1
+
+python
+
+import os
+import sys
+sys.path.append(os.path.expanduser("~/.gdbscripts/"))
+import deal
+
+end</pre>
+      The statements between <code>python</code> and <code>end</code> are
+      executed as python code by GDB. The line
+      <pre class="sample">
+import deal</pre>
+      executes the code necessary to enable the <acronym>deal.II</acronym>
+      pretty-printers (it loads the file created in the last step).
+  </ol>
+  Use a similar procedure on other pretty-printing files (which should also be
+  located in <code>.gdbscripts/</code>) to set up any other pretty-printing
+  code.
+</p>
+<hr />
+<div class="right">
+  <a href="http://validator.w3.org/check?uri=referer" target="_top">
+    <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+    <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+</div>
+
+</body>
+</html>
diff --git a/doc/users/testsuite.html b/doc/users/testsuite.html
new file mode 100644
index 0000000..8c12c09
--- /dev/null
+++ b/doc/users/testsuite.html
@@ -0,0 +1,291 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+                 "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
+  <title>The deal.II Testsuite</title>
+  <link href="../screen.css" rel="StyleSheet">
+  <meta name="copyright" content="Copyright (C) 1998 - 2015 by the deal.II Authors">
+  <meta name="keywords" content="deal dealii finite elements fem triangulation">
+  <meta http-equiv="content-language" content="en">
+</head>
+<body>
+
+
+    <h1>Setting up testsuite in user projects</h1>
+
+    <p>
+      This page provides details about how to set up a testsuite in a user
+      project similar to the one that is used to test deal.II itself.
+    </p>
+
+    <div class="toc">
+      <ol>
+        <li><a href="#overview">Overview</a></li>
+        <li><a href="#examples">Examples</a></li>
+        <ol>
+          <li><a href="#simple">Simple configuration</a></li>
+          <li><a href="#advanced">Advanced configuration</a></li>
+        </ol>
+      </ol>
+    </div>
+
+    <a name="overview"></a>
+    <h2>Overview</h2>
+
+    <p>
+      deal.II features an extensive <a
+        href="../developers/testsuite.html">testsuite</a> to ensure
+      consistent, well-defined behavior of its building blocks during
+      development and for releases. But, the larger a user program/project
+      becomes, the more important it is to also check user code for
+      continued correctness during development. This is mainly done via
+      <a href="https://en.wikipedia.org/wiki/Unit_testing">unit</a> and
+      <a href="https://en.wikipedia.org/wiki/Regression_testing">regression testing</a>.
+    </p>
+
+    <p>
+      deal.II provides a mechanism to conveniently set up unit and
+      regression tests in a user project (very much like they are handled
+      in the library itself). At its heart a test is a small executable
+      that is invoked and an output file for comparison. The executable
+      that should be run can be defined in two different ways: Either as a
+      source file in conjunction with a comparison file:
+      <pre>
+my_test_1.cc
+my_test_1.output</pre>
+      In this case <code>my_test_1.cc</code> contains a full executable (with
+      a main function) that produces some output. The screen output is then
+      compared against <code>my_test_1.output</code>. Alternatively, a
+      parameter file together with a comparison file can be provided:
+      <pre>
+my_test_2.prm
+my_test_2.output</pre>
+      In this case an already built executable (that is defined by a CMake
+      variable) is invoked with the path of <code>my_test_2.prm</code> as
+      first argument. Again, its screen output is compared against
+      <code>my_test_2.output</code>
+    </p>
+
+    <a name="examples"></a>
+    <h2>Examples</h2>
+
+    <p>
+      This section presents two different examples of how to use the
+      testsuite facilities. Possible directory layouts together with the
+      necessary CMake configuration are discussed.
+    </p>
+
+    <a name="simple"></a>
+    <h3>Simple configuration</h3>
+
+    <p>
+      For the purpose of an example, let us pretend that step-1 could read
+      input files (defined on the command line) and do some computation
+      based on their contents. Then, we can set up tests for expected
+      output for given a given configuration file.
+
+      This can be done by creating a subdirectory <code>tests</code> and
+      adding a test of the second type (i.e., parameter file and comparison
+      file). In detail the directory and file layout is as follows:
+      <pre>
+CMakeLists.txt
+step-1.cc
+tests/CMakeLists
+tests/my_test.output
+tests/my_test.prm</pre>
+      In order to enable testing the top-level <code>CMakeLists.txt</code>
+      file has to be augmented by a call to <code>ENABLE_TESTING()</code>
+      and a subsequent descent into the <code>tests/</code> subdirectory via
+      <code>ADD_SUBDIRECTORY(tests)</code>. For convenience, here is the
+      full top-level <code>CMakeLists.txt</code> file:
+      <pre class="cmake">
+SET(TARGET step-1)
+SET(TARGET_SRC step-1.cc)
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+FIND_PACKAGE(deal.II 8.4.0 REQUIRED
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
+
+# Enable testing and descent into tests/ subdirectory:
+ENABLE_TESTING()
+ADD_SUBDIRECTORY(tests)</pre>
+      The corresponding file <code>tests/CMakeLists.txt</code> contains
+      only two statements:
+      <pre class="cmake">
+SET(TEST_TARGET ${TARGET})
+DEAL_II_PICKUP_TESTS()</pre>
+      The first statement sets the variable <code>TEST_TARGET</code> to the
+      executable that should be invoked (in our case the contents of the
+      variable <code>TARGET</code>). The second statement is a call to a
+      deal.II macro that will go through the directory contents and define
+      all test targets.
+    </p>
+
+    <p>Due to the fact that step-1 produces only two lines of output and
+    parses no parameters, we can set up a somewhat silly test by just
+    providing the comparison file and an empty parameter file:
+    <pre>
+$ touch tests/my_test.prm
+$ echo "Grid written to grid-1.eps" >  tests/my_test.output
+$ echo "Grid written to grid-2.eps" >> tests/my_test.output</pre>
+    After that, reconfigure and call the test driver <code>ctest</code>:
+    <pre>
+$ cmake .
+[...]
+$ ctest
+Test project .../examples/step-1
+    Start 1: tests/my_test.debug
+1/1 Test #1: tests/my_test.debug ..............   Passed    1.72 sec
+
+100% tests passed, 0 tests failed out of 1
+
+Total Test time (real) =   1.72 sec</pre>
+    </p>
+
+    <p>
+      <i>Remark:</i> The test driver will compare the combined output
+      stream of stdout and stderr against the comparison file. If the test
+      creates a file <code>output</code> and writes to it, the comparison
+      file is compared against this output file instead. In this case
+      stdout and stderr are discarded.
+    </p>
+
+    <a name="advanced"></a>
+    <h3>Advanced configuration</h3>
+
+    <p>
+      Above setup is too inflexible for larger projects that might consist
+      of individual libraries and an independent main program. Therefore,
+      as a second example a project is presented that consists of a support
+      library "support" and an executable "step". The task shall be to
+      provide unit tests for the library "support" and simple configuration
+      type tests for "step". In detail:
+      <pre>
+CMakeLists.txt
+
+src/CMakeLists.txt
+src/step.cc
+src/support.cc
+
+tests/step/CMakeLists
+tests/step/my_test_1.prm
+tests/step/my_test_1.output
+
+tests/support/CMakeLists
+tests/support/my_test_2.cc
+tests/support/my_test_2.output</pre>
+    </p>
+
+    <p>
+      Again, we want to use the "autopilot" configuration for user projects
+      (see the <a href="cmakelists.html">cmake documentation</a> for
+      details). The top-level <code>CMakeLists.txt</code> is now solely
+      responsible for finding deal.II, enable testing, and descending into
+      subdirectories:
+      <pre class="cmake">
+# top-level CMakelists.txt
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+FIND_PACKAGE(deal.II 8.4.0 REQUIRED
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(step)
+ENABLE_TESTING()
+
+ADD_SUBDIRECTORY(src)
+ADD_SUBDIRECTORY(tests/step)
+ADD_SUBDIRECTORY(tests/support)</pre>
+      The library and executable are defined in
+      <code>src/CMakeLists.txt</code>:
+      <pre class="cmake">
+# src/CMakeLists.txt
+
+# set up shared library by hand:
+ADD_LIBRARY(support SHARED support.cc)
+DEAL_II_SETUP_TARGET(support)
+
+# set up executable with autopilot macro:
+SET(TARGET "step")
+SET(TARGET_SRC step.cc)
+DEAL_II_INVOKE_AUTOPILOT()
+TARGET_LINK_LIBRARIES(${TARGET} support)</pre>
+
+      Similarly to the first example, setting up tests for the executable
+      "step" is just a matter of defining a variable and a call to a macro:
+      <pre class="cmake">
+# tests/step/CMakeLists.txt
+
+SET(TEST_TARGET step)
+DEAL_II_PICKUP_TESTS()</pre>
+      In contrast, the tests for the support library consist of a source file
+      that has a main function. The object file generated from this source
+      file will be linked against deal.II and every library listed in
+      <code>TEST_LIBRARIES</code>:
+      <pre class="cmake">
+# tests/support/CMakeLists.txt
+
+SET(TEST_LIBRARIES support)
+DEAL_II_PICKUP_TESTS()</pre>
+    Again, reconfigure and run ctest:
+      <pre>
+$ cmake .
+$ ctest
+Test project .../examples/step
+    Start 1: step/my_test_1.debug
+1/2 Test #1: step/my_test_1.debug .............   Passed    0.21 sec
+    Start 2: support/my_test_2.debug
+2/2 Test #2: support/my_test_2.debug ..........   Passed    0.22 sec
+
+100% tests passed, 0 tests failed out of 2
+
+Total Test time (real) =   0.43 sec</pre>
+    </p>
+
+    <p>
+      <i>Remark:</i> For further information consult the <a
+        href="../developers/testsuite.html">testsuite documentation</a> for
+      the library. With the sole exception of the testsuite setup (that
+      happens unconditionally in user testsuites), this documentation also
+      applies for user testsuites.
+    </p>
+
+    <p>
+    <i>Remark:</i> The full configuration options for
+      <code>DEAL_II_PICKUP_TESTS()</code> are:
+    </p>
+    <pre>
+TEST_LIBRARIES
+TEST_LIBRARIES_DEBUG
+  - additionally used for tests with debug configuration
+TEST_LIBRARIES_RELEASE
+  - additionally used for tests with release configuration
+
+TEST_TARGET or
+TEST_TARGET_DEBUG and TEST_TARGET_RELEASE
+  - used instead of TEST_TARGET for debug/release configuration
+
+NUMDIFF_EXECUTABLE, DIFF_EXECUTABLE
+  - pointing to valid diff executables. If NUMDIFF_EXECUTABLE is not
+    "numdiff" it will be ignored and DIFF_EXECUTABLE is used instead
+
+TEST_TIME_LIMIT
+  - specifying the maximal wall clock time in seconds a test is allowed
+    to run</pre>
+    <hr />
+    <div class="right">
+      <a href="http://validator.w3.org/check?uri=referer" target="_top">
+        <img style="border:0" src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+      <a href="http://jigsaw.w3.org/css-validator/check/referer" target="_top">
+        <img style="border:0;width:88px;height:31px" src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+    </div>
+
+  </body>
+</html>
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000..fd6d05b
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,50 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+IF(DEAL_II_COMPONENT_EXAMPLES)
+  MESSAGE(STATUS "Setting up examples")
+
+  INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/
+    DESTINATION ${DEAL_II_EXAMPLES_RELDIR}
+    COMPONENT examples
+    FILES_MATCHING
+    #
+    # Exclude folder structures: doc, doxygen, CMakeFiles,...
+    #
+    PATTERN "CMakeFiles*" EXCLUDE
+    PATTERN "doc*" EXCLUDE
+    #
+    # Glob Includes:
+    #
+    PATTERN "*.cc"
+    PATTERN "*.prm"
+    PATTERN "*.inp"
+    PATTERN "step*/CMakeLists.txt"
+    PATTERN "doxygen/CMakeLists.txt"
+    #
+    # Special files:
+    #
+    PATTERN "output.reference.dat"        # step-39
+    PATTERN "postprocess.pl"              # step-39
+    PATTERN "obstacle_file.pbm"           # step-42
+    PATTERN "untitled.geo"                # step-49
+    PATTERN "untitled.msh"                # step-49
+    PATTERN "topography.txt.gz"           # step-53
+    PATTERN "input/initial_mesh_3d.vtk"   # step-54
+    PATTERN "DTMB-5415_bulbous_bow.iges"  # step-54
+    )
+
+  MESSAGE(STATUS "Setting up examples - Done")
+ENDIF()
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..df0842f
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,5 @@
+This folder contains example programs for deal.II
+-------------------------------------------------
+
+They are distributed under the same license as the deal.II library itself,
+namely LGPL-2.1+.
diff --git a/examples/doxygen/CMakeLists.txt b/examples/doxygen/CMakeLists.txt
new file mode 100644
index 0000000..3df267c
--- /dev/null
+++ b/examples/doxygen/CMakeLists.txt
@@ -0,0 +1,29 @@
+##
+#  CMake script for compiling the doxygen examples
+##
+
+# Usually, you will not need to modify anything beyond this point...
+
+FILE(GLOB sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(doxygen_examples)
+
+FOREACH(ccfile ${sources})
+  GET_FILENAME_COMPONENT(file ${ccfile} NAME_WE)
+  ADD_EXECUTABLE(${file} ${ccfile})
+  DEAL_II_SETUP_TARGET(${file})
+ENDFOREACH()
diff --git a/examples/doxygen/block_dynamic_sparsity_pattern.cc b/examples/doxygen/block_dynamic_sparsity_pattern.cc
new file mode 100644
index 0000000..cedb545
--- /dev/null
+++ b/examples/doxygen/block_dynamic_sparsity_pattern.cc
@@ -0,0 +1,83 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+// See documentation of BlockDynamicSparsityPattern for documentation of this example
+
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <iostream>
+
+using namespace dealii;
+
+int main()
+{
+  Triangulation<2> tr;
+  GridGenerator::subdivided_hyper_cube(tr, 3);
+  tr.begin_active()->set_refine_flag();
+  tr.execute_coarsening_and_refinement();
+
+  FE_Q<2> fe1(1);
+  FE_Q<2> fe2(2);
+  FESystem<2> fe(fe1, 2, fe2, 1);
+
+  DoFHandler<2> dof(tr);
+  dof.distribute_dofs(fe);
+  DoFRenumbering::Cuthill_McKee(dof);
+  DoFRenumbering::component_wise(dof);
+
+  ConstraintMatrix constraints;
+  DoFTools::make_hanging_node_constraints(dof, constraints);
+  constraints.close();
+
+  std::vector<unsigned int> dofs_per_block(fe.n_blocks());
+  DoFTools::count_dofs_per_block(dof, dofs_per_block);
+
+  BlockDynamicSparsityPattern dsp(fe.n_blocks(), fe.n_blocks());
+  for (unsigned int i=0; i<fe.n_blocks(); ++i)
+    for (unsigned int j=0; j<fe.n_blocks(); ++j)
+      dsp.block(i,j).reinit(dofs_per_block[i],dofs_per_block[j]);
+  dsp.collect_sizes();
+
+  DoFTools::make_sparsity_pattern(dof, dsp);
+  constraints.condense(dsp);
+
+  BlockSparsityPattern sparsity;
+  sparsity.copy_from(dsp);
+
+  unsigned int ig = 0;
+  for (unsigned int ib=0; ib<fe.n_blocks(); ++ib)
+    for (unsigned int i=0; i<dofs_per_block[ib]; ++i,++ig)
+      {
+        unsigned int jg = 0;
+        for (unsigned int jb=0; jb<fe.n_blocks(); ++jb)
+          for (unsigned int j=0; j<dofs_per_block[jb]; ++j,++jg)
+            {
+              if (sparsity.exists(ig,jg))
+                std::cout << ig << ' ' << jg
+                          << '\t' << ib << jb << std::endl;
+            }
+      }
+}
diff --git a/examples/doxygen/block_matrix_array.cc b/examples/doxygen/block_matrix_array.cc
new file mode 100644
index 0000000..e561c10
--- /dev/null
+++ b/examples/doxygen/block_matrix_array.cc
@@ -0,0 +1,130 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+// See documentation of BlockMatrixArray for documentation of this example
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/block_matrix_array.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/solver_gmres.h>
+
+#include <iostream>
+#include <fstream>
+
+using namespace dealii;
+
+double Adata[] =
+{
+  4., .5, .1, 0.,
+  .5, 4., .5, .1,
+  .1, .5, 4., .5,
+  0., .1, .5, 4.
+};
+
+double B1data[] =
+{
+  .5, .1,
+  .4, .2,
+  .3, .3,
+  .2, .4
+};
+
+double B2data[] =
+{
+  .3, 0., -.3, 0.,
+  -.3, 0., .3, 0.
+};
+
+double Cdata[] =
+{
+  8., 1.,
+  1., 8.
+};
+
+int main ()
+{
+  FullMatrix<float> A(4,4);
+  FullMatrix<float> B1(4,2);
+  FullMatrix<float> B2(2,4);
+  FullMatrix<float> C(2,2);
+
+  A.fill(Adata);
+  B1.fill(B1data);
+  B2.fill(B2data);
+  C.fill(Cdata);
+
+  BlockMatrixArray<double> matrix(2, 2);
+
+  matrix.enter(A,0,0,2.);
+  matrix.enter(B1,0,1,-1.);
+  matrix.enter(B2,0,1,1., true);
+  matrix.enter(B2,1,0,1.);
+  matrix.enter(B1,1,0,-1., true);
+  matrix.enter(C,1,1);
+  matrix.print_latex(deallog);
+
+  std::vector<unsigned int> block_sizes(2);
+  block_sizes[0] = 4;
+  block_sizes[1] = 2;
+
+  BlockVector<double> result(block_sizes);
+  BlockVector<double> x(block_sizes);
+  BlockVector<double> y(block_sizes);
+  for (unsigned int i=0; i<result.size(); ++i)
+    result(i) = i;
+
+  matrix.vmult(y, result);
+
+  SolverControl control(100,1.e-10);
+  PreconditionIdentity id;
+
+  SolverCG<BlockVector<double> > cg(control);
+  cg.solve(matrix, x, y, id);
+  x.add(-1., result);
+  deallog << "Error " << x.l2_norm() << std::endl;
+
+  deallog << "Error A-norm "
+          << std::sqrt(matrix.matrix_norm_square(x))
+          << std::endl;
+
+  FullMatrix<float> Ainv(4,4);
+  Ainv.invert(A);
+  FullMatrix<float> Cinv(2,2);
+  Cinv.invert(C);
+
+  BlockTrianglePrecondition<double>
+  precondition(2);
+  precondition.enter(Ainv,0,0,.5);
+  precondition.enter(Cinv,1,1);
+
+  cg.solve(matrix, x, y, precondition);
+  x.add(-1., result);
+  deallog << "Error " << x.l2_norm() << std::endl;
+
+  precondition.enter(B1,1,0,-1., true);
+  precondition.enter(B2,1,0,1.);
+
+  SolverGMRES<BlockVector<double> > gmres(control);
+  gmres.solve(matrix, x, y, precondition);
+  x.add(-1., result);
+  deallog << "Error " << x.l2_norm() << std::endl;
+
+  return 0;
+}
diff --git a/examples/doxygen/product_matrix.cc b/examples/doxygen/product_matrix.cc
new file mode 100644
index 0000000..e957bb8
--- /dev/null
+++ b/examples/doxygen/product_matrix.cc
@@ -0,0 +1,64 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+// See documentation of ProductMatrix for documentation of this example
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/matrix_lib.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/vector.h>
+
+using namespace dealii;
+
+double Adata[] =
+{
+  .5, .1,
+  .4, .2
+};
+
+double Bdata[] =
+{
+  .866, .5,
+  -.5, .866
+};
+
+
+int main()
+{
+  FullMatrix<float> A(2,2);
+  FullMatrix<double> B(2,2);
+
+  A.fill(Adata);
+  B.fill(Bdata);
+
+  GrowingVectorMemory<Vector<double> > mem;
+
+  ProductMatrix<Vector<double> > AB(A,B,mem);
+
+  Vector<double> u(2);
+  Vector<double> v(2);
+
+  u(0) = 1.;
+  u(1) = 2.;
+
+  AB.vmult(v,u);
+
+  deallog << v(0) << '\t' << v(1) << std::endl;
+
+  AB.Tvmult(v,u);
+
+  deallog << v(0) << '\t' << v(1) << std::endl;
+}
diff --git a/examples/doxygen/theta_timestepping.cc b/examples/doxygen/theta_timestepping.cc
new file mode 100644
index 0000000..8fb8633
--- /dev/null
+++ b/examples/doxygen/theta_timestepping.cc
@@ -0,0 +1,132 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+// See documentation of ThetaTimestepping for documentation of this example
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+
+#include <deal.II/algorithms/operator.h>
+#include <deal.II/algorithms/theta_timestepping.h>
+
+#include <iostream>
+
+using namespace dealii;
+using namespace Algorithms;
+
+
+class Explicit : public OperatorBase
+{
+public:
+  Explicit(const FullMatrix<double> &matrix);
+  void operator() (AnyData &out, const AnyData &in);
+
+private:
+  SmartPointer<const FullMatrix<double>, Explicit> matrix;
+  FullMatrix<double> m;
+};
+
+
+class Implicit : public OperatorBase
+{
+public:
+  Implicit(const FullMatrix<double> &matrix);
+  void operator() (AnyData &out, const AnyData &in);
+
+private:
+  SmartPointer<const FullMatrix<double>, Implicit> matrix;
+  FullMatrix<double> m;
+};
+
+// End of declarations
+
+int main()
+{
+  FullMatrix<double> matrix(2);
+  matrix(0,0) = 0.;
+  matrix(1,1) = 0.;
+  matrix(0,1) = 3.14;
+  matrix(1,0) = -3.14;
+
+  OutputOperator<Vector<double> > out;
+  out.initialize_stream(std::cout);
+
+  Explicit op_explicit(matrix);
+  Implicit op_implicit(matrix);
+  ThetaTimestepping<Vector<double> > solver(op_explicit, op_implicit);
+  solver.set_output(out);
+
+  Vector<double> value(2);
+  value(0) = 1.;
+  AnyData indata;
+  AnyData outdata;
+  outdata.add(&value, "value");
+
+  solver.notify(Events::initial);
+  solver(outdata, indata);
+}
+
+
+Explicit::Explicit(const FullMatrix<double> &M)
+  :
+  matrix(&M)
+{
+  m.reinit(M.m(), M.n());
+}
+
+
+void
+Explicit::operator() (AnyData &out, const AnyData &in)
+{
+  const double timestep = *in.read_ptr<double>("Timestep");
+  if (this->notifications.test(Events::initial) || this->notifications.test(Events::new_timestep_size))
+    {
+      m.equ(-timestep, *matrix);
+      for (unsigned int i=0; i<m.m(); ++i)
+        m(i,i) += 1.;
+    }
+  this->notifications.clear();
+  m.vmult(*out.entry<Vector<double>*>(0),
+          *in.read_ptr<Vector<double> >("Previous iterate"));
+}
+
+
+Implicit::Implicit(const FullMatrix<double> &M)
+  :
+  matrix(&M)
+{
+  m.reinit(M.m(), M.n());
+}
+
+
+void
+Implicit::operator() (AnyData &out, const AnyData &in)
+{
+  const double timestep = *in.read_ptr<double>("Timestep");
+  if (this->notifications.test(Events::initial) || this->notifications.test(Events::new_timestep_size))
+    {
+      m.equ(timestep, *matrix);
+      for (unsigned int i=0; i<m.m(); ++i)
+        m(i,i) += 1.;
+      m.gauss_jordan();
+    }
+  this->notifications.clear();
+  m.vmult(*out.entry<Vector<double>*>(0),
+          *in.read_ptr<Vector<double> >("Previous time"));
+}
+
+
diff --git a/examples/step-1/CMakeLists.txt b/examples/step-1/CMakeLists.txt
new file mode 100644
index 0000000..0dfbf45
--- /dev/null
+++ b/examples/step-1/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-1 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-1")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-1/doc/builds-on b/examples/step-1/doc/builds-on
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/examples/step-1/doc/builds-on
@@ -0,0 +1 @@
+
diff --git a/examples/step-1/doc/intro.dox b/examples/step-1/doc/intro.dox
new file mode 100644
index 0000000..14dbb80
--- /dev/null
+++ b/examples/step-1/doc/intro.dox
@@ -0,0 +1,193 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+<h3> About the tutorial </h3>
+
+Since this is the first tutorial program, let us comment first on how
+this tutorial and the rest of the deal.II documentation is supposed to
+work. The documentation for deal.II comes essentially at three
+different levels:
+- The tutorial: This is a collection of programs that shows how
+  deal.II is used in practice. It doesn't typically discuss individual
+  functions at the level of individual arguments, but rather wants to
+  give the big picture of how things work together. In other words, it
+  discusses "concepts": what are the building blocks of deal.II and
+  how are they used together in finite element programs.
+- The manual: This is the documentation of every single class and
+  every single (member) function in deal.II. You get there if, for
+  example, you click on the "Main page" or "Classes" tab at the top of
+  this page. This is the place where you would look up what the second
+  argument of Triangulation::create_triangulation_compatibility means,
+  to give just one slightly obscure example. You need this level of
+  documentation for when you know what you want to do, but forgot how
+  exactly the function was named, what its arguments are, or what it
+  returns. Note that you also get into the manual whenever you read
+  through the tutorial and click on any of the class or function
+  names, i.e. the tutorial contains a great many links into the manual
+  for whenever you need a more detailed description of a function or
+  class. On the other hand, the manual is not a good place to learn
+  deal.II since it gives you a microscopic view of things without
+  telling you how a function might fit into the bigger picture.
+- Modules: These are groups of classes and functions that work
+  together or have related functionality. If you click on the
+  "Modules" tab at the top of this page, you end up on a page that
+  lists a number of such groups. Each module discusses the underlying
+  principles of these classes; for example, the @ref Sparsity module
+  talks about all sorts of different issues related to storing
+  sparsity patterns of matrices. This is documentation at an
+  intermediate level: they give you an overview of what's there in a
+  particular area. For example when you wonder what finite element
+  classes exist, you would take a look at the @ref fe module. The
+  modules are, of course, also cross-linked to the manual (and, at
+  times, to the tutorial); if you click on a class name, say on
+  Triangulation, would will also at the very top right under the class
+  name get a link to the modules this class is a member of if you want
+  to learn more about its context.
+
+Let's come back to the tutorial, since you are looking at the first program
+(or "step") of it. Each tutorial program is subdivided into the following
+sections:
+<ol>
+  <li> <b>Introduction:</b> This is a discussion of what the program
+       does, including the mathematical model, and
+       what programming techniques are new compared to previous
+       tutorial programs.
+  <li> <b>The commented program:</b> An extensively documented listing of the
+       source code. Here, we often document individual lines, or
+       blocks of code, and discuss what they do, how they do it, and
+       why. The comments frequently reference the introduction,
+       i.e. you have to understand <i>what</i> the program wants to achieve
+       (a goal discussed in the introduction) before you can
+       understand <i>how</i> it intends to get there.
+  <li> <b>Results:</b> The output of the program, with comments and
+       interpretation. This section also frequently has a subsection
+       that gives suggestions on how to extend the program in various
+       direction; in the earlier programs, this is intended to give
+       you directions for little experiments designed to make your
+       familiar with deal.II, while in later programs it is more about
+       how to use more advanced numerical techniques.
+  <li> <b>The plain program:</b> The source code stripped of
+       all comments. This is useful if you want to see the "big
+       picture" of the code, since the commented version of the
+       program has so much text in between that it is often difficult
+       to see the entire code of a single function on the screen at
+       once.
+</ol>
+
+The tutorials are not only meant to be static documentation, but you
+should play with them. To this end, go to the
+<code>examples/step-1</code> directory (or whatever the number of the
+tutorial is that you're interested in) and type
+ at code
+  cmake .
+  make
+  make run
+ at endcode
+The first command sets up the files that describe which include files this
+tutorial program depends on, how to compile it and how to run it. This command
+should find the installed deal.II libraries as well that were generated when
+you compiled and installed everything as described in the <a
+href="http://www.dealii.org/readme.html">deal.II ReadMe file</a>.
+If this command should fail to find the deal.II library, then you need to
+provide the path to the installation using the command
+ at code
+  cmake -DDEAL_II_DIR=/path/to/installed/deal.II .
+ at endcode
+instead.
+
+The second of the commands above compiles the sources into an executable, while the
+last one executes it (strictly speaking, <code>make run</code> will also
+compile the code if the executable doesn't exist yet, so you could
+have skipped the second command if you wanted). This is all that's
+needed to run the code and produce the output that is discussed in the
+"Results" section of the tutorial programs. This sequence needs to be repeated
+in all of the tutorial directories you want to play with.
+
+When learning the library, you need to play with it and see what
+happens. To this end, open the <code>examples/step-1/step-1.cc</code>
+source file with your favorite editor and modify it in some way, save it and
+run it as above. A few suggestions for possibly modifications are given at the
+end of the results section of this program, where we also provide a few links
+to other useful pieces of information.
+
+
+<h3> Video lectures on tutorial programs </h3>
+
+This and several of the other tutorial programs are also discussed and
+demonstrated in <a
+href="http://www.math.tamu.edu/~bangerth/videos.html">Wolfgang
+Bangerth's video lectures</a> on deal.II and computational science. In
+particular, you can see the steps he executes to run this and other
+programs, and you will get a much better idea of the tools that can be
+used to work with deal.II. In particular, lectures 2 and 4 give an overview of
+deal.II and of the building blocks of any finite element code.
+
+
+<h3> What this program does </h3>
+
+Let's come back to step-1, the current program.
+In this first example, we don't actually do very much, but show two
+techniques: what is the syntax to generate triangulation objects, and
+some elements of simple loops over all cells. We create two grids, one
+which is a regularly refined square (not very exciting, but a common
+starting grid for some problems), and one more geometric attempt: a
+ring-shaped domain, which is refined towards the inner edge. Through
+this, you will get to know three things every finite element program
+will have to have somewhere: An object of type Triangulation for the
+mesh; a call to the GridGenerator functions to generate a mesh; and
+loops over all cells that involve iterators (iterators are a
+generalization of pointers and are frequently used in the C++ standard
+library; in the context of deal.II, the @ref Iterators module talks
+about them).
+
+The program is otherwise small enough that it doesn't need a whole lot
+of introduction.
+
+ at dealiiVideoLecture{5,6}
+
+
+<h3> About scientific computing in general </h3>
+
+If you are reading through this tutorial program, chances are that you are
+interested in continuing to use deal.II for your own projects. Thus, you are
+about to embark on an exercise in programming using a large-scale scientific
+computing library. Unless you are already an experienced user of large-scale
+programming methods, this may be new territory for you — with all the
+new rules that go along with it such as the fact that you will have to deal
+with code written by others, that you may have to think about documenting your
+own code because you may not remember what exactly it is doing a year down the
+road (or because others will be using it as well), or coming up with ways to
+test that your program is doing the right thing. None of this is something
+that we typically train mathematicians, engineers, or scientists in but that
+is important when you start writing software of more than a few hundred
+lines. Remember: Producing software is not the same as just writing code.
+
+To make your life easier on this journey let us point to three resources that
+are worthwhile browsing through before you start any large-scale programming:
+
+- The <a
+  href="http://code.google.com/p/dealii/wiki/FrequentlyAskedQuestions">deal.II
+  Frequently Asked Questions</a>: This page has a good number of questions
+  that pertain to particular aspects of deal.II, but also to more general
+  questions such as "How do I debug scientific computing codes?" or "Can I
+  train myself to write code that has fewer bugs?".
+
+- The <a href="http://software-carpentry.org/">Software Carpentry project</a>
+  that provides introductions to many topics that are important to dealing
+  with software, such as version control, make files, testing, etc. It is
+  specifically written for scientists and engineers, not for computer
+  scientists, and has a focus on short, practical lessons.
+
+- An article on <a href="http://arxiv.org/abs/1210.0530">Best
+  Practices for Scientific Computing</a> that gives an introduction to
+  many of the ways by which you can make sure you are an efficient
+  programmer writing programs that work.
+
+As a general recommendation: If you expect to spend more than a few days
+writing software in the future, do yourself the favor of learning tools that
+can make your life more productive, in particular debuggers and integrated
+development environments. (@dealiiVideoLectureSeeAlso{7,8,8.01,25})
+You will find that you will get the time spent
+learning these tools back severalfold soon by being more productive!
+Several of the video lectures referenced above show how to use tools
+such as integrated development environments or debuggers.
diff --git a/examples/step-1/doc/kind b/examples/step-1/doc/kind
new file mode 100644
index 0000000..15a13db
--- /dev/null
+++ b/examples/step-1/doc/kind
@@ -0,0 +1 @@
+basic
diff --git a/examples/step-1/doc/results.dox b/examples/step-1/doc/results.dox
new file mode 100644
index 0000000..68eabe3
--- /dev/null
+++ b/examples/step-1/doc/results.dox
@@ -0,0 +1,82 @@
+<h1>Results</h1>
+
+Running the program produces graphics of two grids (grid-1.eps and grid-2.eps). They look like this:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-1.grid-1.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-1.grid-2r2.png" alt="">
+    </td>
+  </tr>
+</table>
+
+The left one, well, is not very exciting. The right one is — at least
+— unconventional.
+
+While the second mesh is entirely artificial and made-up, and
+certainly not very practical in applications, to everyone's surprise it
+has found its way into the literature: see the paper by M. Mu
+titled "PDE.MART: A network-based problem-solving environment", ACM
+Trans. Math. Software, vol. 31, pp. 508-531, 2005. Apparently it is
+good for some things at least.
+
+
+<h3> Possible extensions </h3>
+
+<h4> Different adaptive refinement strategies </h4>
+
+This program obviously does not have a whole lot of functionality, but
+in particular the <code>second_grid</code> function has a bunch of
+places where you can play with it. For example, you could modify the
+criterion by which we decide which cells to refine. An example would
+be to change the condition to this:
+ at code
+      for (; cell!=endc; ++cell)
+        if (cell->center()[1] > 0)
+          cell->set_refine_flag ();
+ at endcode
+This would refine all cells for which the $y$-coordinate of the cell's
+center is greater than zero (the <code>TriaAccessor::center</code>
+function that we call by dereferencing the <code>cell</code> iterator
+returns a Point<2> object; subscripting <code>[0]</code> would give
+the $x$-coordinate, subscripting <code>[1]</code> the
+$y$-coordinate). By looking at the functions that TriaAccessor
+provides, you can also use more complicated criteria for refinement.
+
+<h4> Different geometries </h4>
+
+Another possibility would be to generate meshes of entirely different
+geometries altogether. While for complex geometries there is no way around
+using meshes obtained from mesh generators, there is a good number of
+geometries for which deal.II can create meshes using the functions in the
+GridGenerator namespace. Take a look at what it provides and see how it could
+be used in a program like this.
+
+We also discuss a variety of other ways to create and manipulate meshes to
+step-49.
+
+
+<h4> Comments about programming and debugging </h4>
+
+We close with a comment about modifying or writing programs with deal.II in
+general. When you start working with tutorial programs or your own
+applications, you will find that mistakes happen: your program will contain
+code that either aborts the program right away or bugs that simply lead to
+wrong results. In either case, you will find it extremely helpful to know how
+to work with a debugger: you may get by for a while by just putting debug
+output into your program, compiling it, and running it, but ultimately finding
+bugs with a debugger is much faster, much more convenient, and more reliable
+because you don't have to recompile the program all the time and because you
+can inspect the values of variables and how they change.
+
+Rather than postponing learning how to use a debugger till you really can't
+see any other way to find a bug, here's the one piece of
+advice we will provide in this program: learn how to use a debugger as soon as
+possible. It will be time well invested. The deal.II Frequently Asked
+Questions (FAQ) page linked to from the top-level <a
+href="http://www.dealii.org/">deal.II webpage</a> also provides a good number
+of hints on debugging deal.II programs.
diff --git a/examples/step-1/doc/tooltip b/examples/step-1/doc/tooltip
new file mode 100644
index 0000000..ae43afa
--- /dev/null
+++ b/examples/step-1/doc/tooltip
@@ -0,0 +1 @@
+Creating a mesh. Refining it. Writing it to a file.
diff --git a/examples/step-1/step-1.cc b/examples/step-1/step-1.cc
new file mode 100644
index 0000000..42c437b
--- /dev/null
+++ b/examples/step-1/step-1.cc
@@ -0,0 +1,264 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 1999 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ */
+
+// @sect3{Include files}
+
+// The most fundamental class in the library is the Triangulation class, which
+// is declared here:
+#include <deal.II/grid/tria.h>
+// We need the following two includes for loops over cells and/or faces:
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+// Here are some functions to generate standard grids:
+#include <deal.II/grid/grid_generator.h>
+// We would like to use faces and cells which are not straight lines,
+// or bi-linear quads, so we import some classes which predefine some
+// manifold descriptions:
+#include <deal.II/grid/manifold_lib.h>
+// Output of grids in various graphics formats:
+#include <deal.II/grid/grid_out.h>
+
+// This is needed for C++ output:
+#include <iostream>
+#include <fstream>
+// And this for the declarations of the `sqrt' and `fabs' functions:
+#include <cmath>
+
+// The final step in importing deal.II is this: All deal.II functions and
+// classes are in a namespace <code>dealii</code>, to make sure they don't
+// clash with symbols from other libraries you may want to use in conjunction
+// with deal.II. One could use these functions and classes by prefixing every
+// use of these names by <code>dealii::</code>, but that would quickly become
+// cumbersome and annoying. Rather, we simply import the entire deal.II
+// namespace for general use:
+using namespace dealii;
+
+// @sect3{Creating the first mesh}
+
+// In the following, first function, we simply use the unit square as domain
+// and produce a globally refined grid from it.
+void first_grid ()
+{
+  // The first thing to do is to define an object for a triangulation of a
+  // two-dimensional domain:
+  Triangulation<2> triangulation;
+  // Here and in many following cases, the string "<2>" after a class name
+  // indicates that this is an object that shall work in two space
+  // dimensions. Likewise, there are versions of the triangulation class that
+  // are working in one ("<1>") and three ("<3>") space dimensions. The way
+  // this works is through some template magic that we will investigate in
+  // some more detail in later example programs; there, we will also see how
+  // to write programs in an essentially dimension independent way.
+
+  // Next, we want to fill the triangulation with a single cell for a square
+  // domain. The triangulation is the refined four times, to yield $4^4=256$
+  // cells in total:
+  GridGenerator::hyper_cube (triangulation);
+  triangulation.refine_global (4);
+
+  // Now we want to write a graphical representation of the mesh to an output
+  // file. The GridOut class of deal.II can do that in a number of different
+  // output formats; here, we choose encapsulated postscript (eps) format:
+  std::ofstream out ("grid-1.eps");
+  GridOut grid_out;
+  grid_out.write_eps (triangulation, out);
+  std::cout << "Grid written to grid-1.eps" << std::endl;
+}
+
+
+
+// @sect3{Creating the second mesh}
+
+// The grid in the following, second function is slightly more complicated in
+// that we use a ring domain and refine the result once globally.
+void second_grid ()
+{
+  // We start again by defining an object for a triangulation of a
+  // two-dimensional domain:
+  Triangulation<2> triangulation;
+
+  // We then fill it with a ring domain. The center of the ring shall be the
+  // point (1,0), and inner and outer radius shall be 0.5 and 1. The number of
+  // circumferential cells could be adjusted automatically by this function,
+  // but we choose to set it explicitly to 10 as the last argument:
+  const Point<2> center (1,0);
+  const double inner_radius = 0.5,
+               outer_radius = 1.0;
+  GridGenerator::hyper_shell (triangulation,
+                              center, inner_radius, outer_radius,
+                              10);
+  // By default, the triangulation assumes that all boundaries are
+  // straight lines, and all cells are bi-linear quads or tri-linear
+  // hexes, and that they are defined by the cells of the coarse grid
+  // (which we just created). Unless we do something special, when new
+  // points need to be introduced; the domain is assumed to be
+  // delineated by the straight lines of the coarse mesh, and new
+  // points will simply be in the middle of the surrounding ones.
+  // Here, however, we know that the domain is curved, and we would
+  // like to have the Triangulation place new points according to the
+  // underlying geometry. Fortunately, some good soul implemented an
+  // object which describes a spherical domain, of which the ring is a
+  // section; it only needs the center of the ring and automatically
+  // figures out how to instruct the Triangulation where to place the
+  // new points. The way this works in deal.II is that you tag parts
+  // of the triangulation you want to be curved with a number that is
+  // usually referred to as "manifold indicator" and then tell the
+  // triangulation to use a particular "manifold object" for all
+  // places with this manifold indicator. How exactly this works is
+  // not important at this point (you can read up on it in step-53 and
+  // @ref manifold). Here, for simplicity, we will choose the manifold
+  // id to be zero.  By default, all cells and faces of the
+  // Triangulation have their manifold_id set to
+  // numbers::invalid_manifold_id, which is the default if you want a
+  // manifold that produces straight edges, but you can change this
+  // number for individual cells and faces. In that case, the curved
+  // manifold thus associated with number zero will not apply to those
+  // parts with a non-zero manifold indicator, but other manifold
+  // description objects can be associated with those non-zero
+  // indicators. If no manifold description is associated with a
+  // particular manifold indicator, a manifold that produces straight
+  // edges is implied. (Manifold indicators are a slightly complicated
+  // topic; if you're confused about what exactly is happening here,
+  // you may want to look at the @ref GlossManifoldIndicator "glossary
+  // entry on this topic".)
+  triangulation.set_all_manifold_ids(0);
+  const SphericalManifold<2> manifold_description(center);
+  triangulation.set_manifold (0, manifold_description);
+
+  // In order to demonstrate how to write a loop over all cells, we will
+  // refine the grid in five steps towards the inner circle of the domain:
+  for (unsigned int step=0; step<5; ++step)
+    {
+      // Next, we need an iterator that points to a cell and which we will
+      // move over all active cells one by one. In a sense, you can think of a
+      // triangulation as a collection of cells. If it was an array, you would
+      // just get a pointer that you move from one to the next. In
+      // triangulations, cells aren't stored as an array, so simple pointers
+      // do not work, but one can generalize pointers to iterators (see <a
+      // href="http://en.wikipedia.org/wiki/Iterator#C.2B.2B">this wikipedia
+      // link</a> for more information). We will then get an iterator to the
+      // first cell and iterate over all of the cells until we hit the last
+      // one.
+      //
+      // The second important piece is that we only need the active cells.
+      // Active cells are those that are not further refined, and the only
+      // ones that can be marked for further refinement, obviously. deal.II
+      // provides iterator categories that allow us to iterate over <i>all</i>
+      // cells (including the parent cells of active ones) or only over the
+      // active cells. Because we want the latter, we need to choose
+      // Triangulation::active_cell_iterator as data type.
+      //
+      // Finally, by convention, we almost always use the names
+      // <code>cell</code> and <code>endc</code> for the iterator pointing to
+      // the present cell and to the "one-past-the-end" iterator. This is, in
+      // a sense a misnomer, because the object is not really a "cell": it is
+      // an iterator/pointer to a cell. We should really have started to call
+      // these objects <code>cell_iterator</code> when deal.II started in
+      // 1998, but it is what it is.
+      //
+      // After declaring the iterator variable, the loop over all cells is
+      // then rather trivial, and looks like any loop involving pointers
+      // instead of iterators:
+      Triangulation<2>::active_cell_iterator
+      cell = triangulation.begin_active(),
+      endc = triangulation.end();
+      for (; cell!=endc; ++cell)
+        {
+          // @note Writing a loop like this requires a lot of typing, but it
+          // is the only way of doing it in C++98 and C++03. However, if you
+          // have a C++11-compliant compiler, you can also use the C++11
+          // range-based for loop style that requires significantly less
+          // typing. Take a look at @ref CPP11 "the deal.II C++11 page" to see
+          // how this works.
+          //
+          // Next, we want to loop over all vertices of the cells. Since we are
+          // in 2d, we know that each cell has exactly four vertices. However,
+          // instead of penning down a 4 in the loop bound, we make a first
+          // attempt at writing it in a dimension-independent way by which we
+          // find out about the number of vertices of a cell. Using the
+          // GeometryInfo class, we will later have an easier time getting the
+          // program to also run in 3d: we only have to change all occurrences
+          // of <code><2></code> to <code><3></code>, and do not
+          // have to audit our code for the hidden appearance of magic numbers
+          // like a 4 that needs to be replaced by an 8:
+          for (unsigned int v=0;
+               v < GeometryInfo<2>::vertices_per_cell;
+               ++v)
+            {
+              // If this cell is at the inner boundary, then at least one of its
+              // vertices must sit on the inner ring and therefore have a radial
+              // distance from the center of exactly 0.5, up to floating point
+              // accuracy. Compute this distance, and if we have found a vertex
+              // with this property flag this cell for later refinement. We can
+              // then also break the loop over all vertices and move on to the
+              // next cell.
+              const double distance_from_center
+                = center.distance (cell->vertex(v));
+
+              if (std::fabs(distance_from_center - inner_radius) < 1e-10)
+                {
+                  cell->set_refine_flag ();
+                  break;
+                }
+            }
+        }
+
+      // Now that we have marked all the cells that we want refined, we let
+      // the triangulation actually do this refinement. The function that does
+      // so owes its long name to the fact that one can also mark cells for
+      // coarsening, and the function does coarsening and refinement all at
+      // once:
+      triangulation.execute_coarsening_and_refinement ();
+    }
+
+
+  // Finally, after these five iterations of refinement, we want to again
+  // write the resulting mesh to a file, again in eps format. This works just
+  // as above:
+  std::ofstream out ("grid-2.eps");
+  GridOut grid_out;
+  grid_out.write_eps (triangulation, out);
+
+  std::cout << "Grid written to grid-2.eps" << std::endl;
+
+  // At this point, all objects created in this function will be destroyed in
+  // reverse order. Unfortunately, we defined the manifold object after the
+  // triangulation, which still has a pointer to it and the library will
+  // produce an error if the manifold object is destroyed before the
+  // triangulation. We therefore have to release it, which can be done as
+  // follows. Note that this sets the manifold object used for part "0" of the
+  // domain back to a default object, over which the triangulation has full
+  // control.
+  triangulation.set_manifold (0);
+  // An alternative to doing so, and one that is frequently more convenient,
+  // would have been to declare the manifold object before the triangulation
+  // object. In that case, the triangulation would have let lose of the
+  // manifold object upon its destruction, and everything would have been
+  // fine.
+}
+
+
+
+// @sect3{The main function}
+
+// Finally, the main function. There isn't much to do here, only to call the
+// two subfunctions, which produce the two grids.
+int main ()
+{
+  first_grid ();
+  second_grid ();
+}
diff --git a/examples/step-10/CMakeLists.txt b/examples/step-10/CMakeLists.txt
new file mode 100644
index 0000000..f3287a6
--- /dev/null
+++ b/examples/step-10/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-10 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-10")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-10/doc/builds-on b/examples/step-10/doc/builds-on
new file mode 100644
index 0000000..48a0f73
--- /dev/null
+++ b/examples/step-10/doc/builds-on
@@ -0,0 +1 @@
+step-4
diff --git a/examples/step-10/doc/intro.dox b/examples/step-10/doc/intro.dox
new file mode 100644
index 0000000..5236b0f
--- /dev/null
+++ b/examples/step-10/doc/intro.dox
@@ -0,0 +1,71 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+This is a rather short example which only shows some aspects of using
+higher order mappings. By <em>mapping</em> we mean the transformation
+between the unit cell (i.e. the unit line, square, or cube) to the
+cells in real space. In all the previous examples, we have implicitly
+used linear or d-linear mappings; you will not have noticed this at
+all, since this is what happens if you do not do anything
+special. However, if your domain has curved boundaries, there are
+cases where the piecewise linear approximation of the boundary
+(i.e. by straight line segments) is not sufficient, and you want that
+your computational domain is an approximation to the real domain using
+curved boundaries as well. If the boundary approximation uses
+piecewise quadratic parabolas to approximate the true boundary, then
+we say that this is a quadratic or $Q_2$ approximation. If we
+use piecewise graphs of cubic polynomials, then this is a $Q_3$
+approximation, and so on.
+
+
+
+For some differential equations, it is known that piecewise linear
+approximations of the boundary, i.e. $Q_1$ mappings, are not
+sufficient if the boundary of the exact domain is curved. Examples are the
+biharmonic equation using $C^1$ elements, or the Euler
+equations of gas dynamics on domains with curved reflective boundaries. In these cases,
+it is necessary to compute the integrals using a higher order
+mapping. If we do not use such a higher
+order mapping, the order of approximation of the boundary dominates
+the order of convergence of the entire numerical scheme, irrespective
+of the order of convergence of the discretization in the interior of
+the domain.
+
+
+
+Rather than demonstrating the use of higher order mappings with one of
+these more complicated examples, we do only a brief computation:
+calculating the value of $\pi=3.141592653589793238462643\ldots$ by two
+different methods.
+
+
+
+The first method uses a triangulated approximation of the circle with
+unit radius and integrates the function that is constant one over it. Of course, if
+the domain were the exact unit circle, then the area would be $\pi$, but
+since we only use an approximation by piecewise polynomial segments,
+the value of the area we integrate over is not exactly $\pi$. However, it is known that as
+we refine the triangulation, a $Q_p$ mapping approximates the boundary
+with an order $h^{p+1}$, where $h$ is the mesh
+size. We will check the values of the computed area of the circle and
+their convergence towards $\pi$ under mesh refinement for different
+mappings. We will also find a convergence behavior that is surprising
+at first, but has a good explanation.
+
+
+
+The second method works similarly, but this time does not use the area
+of the triangulated unit circle, but rather its perimeter. $\pi$ is then
+approximated by half of the perimeter, as we choose the radius equal to one.
+
+
+ at note This tutorial shows in essence how to choose a particular
+mapping for integrals, by attaching a particular geometry to the
+triangulation (as had already been done in step-1, for example) and
+then passing a mapping argument to the FEValues class that is used for
+all integrals in deal.II. The geometry we choose is a circle, for
+which deal.II already has a class (SphericalManifold) that can be
+used. If you want to define your own geometry, for example because it
+is complicated and cannot be described by the classes already
+available in deal.II, you will want to read through step-53.
diff --git a/examples/step-10/doc/kind b/examples/step-10/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-10/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-10/doc/results.dox b/examples/step-10/doc/results.dox
new file mode 100644
index 0000000..cf8631b
--- /dev/null
+++ b/examples/step-10/doc/results.dox
@@ -0,0 +1,176 @@
+<h1>Results</h1>
+
+
+The program performs two tasks, the first being to generate a
+visualization of the mapped domain, the second to compute pi by the
+two methods described. Let us first take a look at the generated
+graphics. They are generated in Gnuplot format, and can be viewed with
+the commands
+ at code
+set style data lines
+set size 0.721, 1
+unset key
+plot [-1:1][-1:1] "ball0_mapping_q1.dat"
+ at endcode
+or using one of the other filenames. The second line makes sure that
+the aspect ratio of the generated output is actually 1:1, i.e. a
+circle is drawn as a circle on your screen, rather than as an
+ellipse. The third line switches off the key in the graphic, as that
+will only print information (the filename) which is not that important
+right now.
+
+
+
+The following table shows the triangulated computational domain for
+Q1, Q2, and Q3 mappings, for the original coarse grid (left), and a
+once uniformly refined grid (right). If your browser does not display
+these pictures in acceptable quality, view them one by one.
+
+<table style="width:80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.ball_mapping_q1_ref0.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.ball_mapping_q1_ref1.png" alt=""></td>
+  </tr>
+
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.ball_mapping_q2_ref0.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.ball_mapping_q2_ref1.png" alt=""></td>
+  </tr>
+
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.ball_mapping_q3_ref0.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.ball_mapping_q3_ref1.png" alt=""></td>
+  </tr>
+</table>
+
+These pictures show the obvious advantage of higher order mappings:
+they approximate the true boundary quite well also on rather coarse
+meshes. To demonstrate this a little further, the following table
+shows the upper right quarter of the circle of the coarse mesh, and
+with dashed lines the exact circle:
+
+<table style="width:80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.quarter-q1.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.quarter-q2.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-10.quarter-q3.png" alt=""></td>
+  </tr>
+</table>
+
+Obviously the quadratic mapping approximates the boundary quite well,
+while for the cubic mapping the difference between approximated domain
+and true one is hardly visible already for the coarse grid. You can
+also see that the mapping only changes something at the outer
+boundaries of the triangulation. In the interior, all lines are still
+represented by linear functions, resulting in additional computations
+only on cells at the boundary. Higher order mappings are therefore
+usually not noticeably slower than lower order ones, because the
+additional computations are only performed on a small subset of all
+cells.
+
+
+
+The second purpose of the program was to compute the value of pi to
+good accuracy. This is the output of this part of the program:
+ at code
+Computation of Pi by the area:
+==============================
+Degree = 1
+cells      eval.pi            error      
+    5 1.9999999999999993 1.1416e+00    - 
+   20 2.8284271247461894 3.1317e-01 1.87 
+   80 3.0614674589207178 8.0125e-02 1.97 
+  320 3.1214451522580520 2.0148e-02 1.99 
+ 1280 3.1365484905459393 5.0442e-03 2.00 
+ 5120 3.1403311569547534 1.2615e-03 2.00 
+
+Degree = 2
+cells      eval.pi            error      
+    5 3.1045694996615865 3.7023e-02    - 
+   20 3.1391475703122271 2.4451e-03 3.92 
+   80 3.1414377167038303 1.5494e-04 3.98 
+  320 3.1415829366419015 9.7169e-06 4.00 
+ 1280 3.1415920457576911 6.0783e-07 4.00 
+ 5120 3.1415926155921139 3.7998e-08 4.00 
+
+Degree = 3
+cells      eval.pi            error      
+    5 3.1410033851499310 5.8927e-04    - 
+   20 3.1415830393583861 9.6142e-06 5.94 
+   80 3.1415925017363837 1.5185e-07 5.98 
+  320 3.1415926512106722 2.3791e-09 6.00 
+ 1280 3.1415926535525962 3.7197e-11 6.00 
+ 5120 3.1415926535892140 5.7923e-13 6.00 
+
+Degree = 4
+cells      eval.pi            error       
+    5 3.1415871927401127 5.4608e-06     - 
+   20 3.1415926314742437 2.2116e-08  7.95 
+   80 3.1415926535026228 8.7170e-11  7.99 
+  320 3.1415926535894529 3.4036e-13  8.00 
+ 1280 3.1415926535897927 2.9187e-16 10.19 
+ 5120 3.1415926535897944 1.3509e-15 -2.21 
+
+Computation of Pi by the perimeter:
+===================================
+Degree = 1
+cells      eval.pi            error      
+    5 2.8284271247461898 3.1317e-01    - 
+   20 3.0614674589207178 8.0125e-02 1.97 
+   80 3.1214451522580520 2.0148e-02 1.99 
+  320 3.1365484905459393 5.0442e-03 2.00 
+ 1280 3.1403311569547525 1.2615e-03 2.00 
+ 5120 3.1412772509327729 3.1540e-04 2.00 
+
+Degree = 2
+cells      eval.pi            error      
+    5 3.1248930668550594 1.6700e-02    - 
+   20 3.1404050605605449 1.1876e-03 3.81 
+   80 3.1415157631807014 7.6890e-05 3.95 
+  320 3.1415878042798617 4.8493e-06 3.99 
+ 1280 3.1415923498174534 3.0377e-07 4.00 
+ 5120 3.1415926345932004 1.8997e-08 4.00 
+
+Degree = 3
+cells      eval.pi            error      
+    5 3.1414940401456057 9.8613e-05    - 
+   20 3.1415913432549156 1.3103e-06 6.23 
+   80 3.1415926341726914 1.9417e-08 6.08 
+  320 3.1415926532906893 2.9910e-10 6.02 
+ 1280 3.1415926535851360 4.6571e-12 6.01 
+ 5120 3.1415926535897203 7.2845e-14 6.00 
+
+Degree = 4
+cells      eval.pi            error      
+    5 3.1415921029432576 5.5065e-07    - 
+   20 3.1415926513737600 2.2160e-09 7.96 
+   80 3.1415926535810712 8.7218e-12 7.99 
+  320 3.1415926535897594 3.3668e-14 8.02 
+ 1280 3.1415926535897922 1.0617e-15 4.99 
+ 5120 3.1415926535897931 1.0061e-16 3.40 
+ at endcode
+
+
+
+One of the immediate observations from the output is that in all cases the
+values converge quickly to the true value of
+$\pi=3.141592653589793238462643$. Note that for the $Q_4$ mapping, we are
+already in the regime of roundoff errors and the convergence rate levels off,
+which is already quite a lot. However, also note that for the $Q_1$ mapping,
+even on the finest grid the accuracy is significantly worse than on the coarse
+grid for a $Q_3$ mapping!
+
+
+
+The last column of the output shows the convergence order, in powers of the
+mesh width $h$. In the introduction, we had stated that the convergence order
+for a $Q_p$ mapping should be $h^{p+1}$. However, in the example shown, the
+order is rather $h^{2p}$! This at first surprising fact is explained by the
+properties of the $Q_p$ mapping. At order <i>p</i>, it uses support points
+that are based on the <i>p</i>+1 point Gauss-Lobatto quadrature rule that
+selects the support points in such a way that the quadrature rule converges at
+order 2<i>p</i>. Even though these points are here only used for interpolation
+of a <i>p</i>th order polynomial, we get a superconvergence effect when
+numerically evaluating the integral that actually gives this high order of
+convergence.
+
diff --git a/examples/step-10/doc/tooltip b/examples/step-10/doc/tooltip
new file mode 100644
index 0000000..3e8ca2e
--- /dev/null
+++ b/examples/step-10/doc/tooltip
@@ -0,0 +1 @@
+Higher order mappings.
diff --git a/examples/step-10/step-10.cc b/examples/step-10/step-10.cc
new file mode 100644
index 0000000..06e31a8
--- /dev/null
+++ b/examples/step-10/step-10.cc
@@ -0,0 +1,457 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2001 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Wolfgang Bangerth, Ralf Hartmann, University of Heidelberg, 2001
+ */
+
+
+// The first of the following include files are probably well-known by now and
+// need no further explanation.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/convergence_table.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+// This is the only new one: in it, we declare the <code>MappingQ</code> class
+// which we will use for polynomial mappings of arbitrary order:
+#include <deal.II/fe/mapping_q.h>
+
+// And this again is C++:
+#include <iostream>
+#include <fstream>
+#include <cmath>
+
+// The last step is as in previous programs:
+namespace Step10
+{
+  using namespace dealii;
+
+  // Now, as we want to compute the value of $\pi$, we have to compare to
+  // somewhat. These are the first few digits of $\pi$, which we define
+  // beforehand for later use. Since we would like to compute the difference
+  // between two numbers which are quite accurate, with the accuracy of the
+  // computed approximation to $\pi$ being in the range of the number of
+  // digits which a double variable can hold, we rather declare the reference
+  // value as a <code>long double</code> and give it a number of extra digits:
+  const long double pi = 3.141592653589793238462643;
+
+
+
+  // Then, the first task will be to generate some output. Since this program
+  // is so small, we do not employ object oriented techniques in it and do not
+  // declare classes (although, of course, we use the object oriented features
+  // of the library). Rather, we just pack the functionality into separate
+  // functions. We make these functions templates on the number of space
+  // dimensions to conform to usual practice when using deal.II, although we
+  // will only use them for two space dimensions.
+  //
+  // The first of these functions just generates a triangulation of a circle
+  // (hyperball) and outputs the $Q_p$ mapping of its cells for different values
+  // of <code>p</code>. Then, we refine the grid once and do so again.
+  template <int dim>
+  void gnuplot_output()
+  {
+    std::cout << "Output of grids into gnuplot files:" << std::endl
+              << "===================================" << std::endl;
+
+    // So first generate a coarse triangulation of the circle and
+    // associate a suitable boundary description to it. Note that the
+    // default value of the argument to the SphericalManifold
+    // constructor is a center at the origin.
+    Triangulation<dim> triangulation;
+    GridGenerator::hyper_ball (triangulation);
+    static const SphericalManifold<dim> boundary;
+    triangulation.set_all_manifold_ids_on_boundary(0);
+    triangulation.set_manifold (0, boundary);
+
+    // Next generate output for this grid and for a once refined grid. Note
+    // that we have hidden the mesh refinement in the loop header, which might
+    // be uncommon but nevertheless works. Also it is strangely consistent
+    // with incrementing the loop index denoting the refinement level.
+    for (unsigned int refinement=0; refinement<2;
+         ++refinement, triangulation.refine_global(1))
+      {
+        std::cout << "Refinement level: " << refinement << std::endl;
+
+        // Then have a string which denotes the base part of the names of the
+        // files into which we write the output. Note that in the parentheses
+        // in the initializer we do arithmetic on characters, which assumes
+        // that first the characters denoting numbers are placed consecutively
+        // (which is probably true for all reasonable character sets
+        // nowadays), but also assumes that the increment
+        // <code>refinement</code> is less than ten. This is therefore more a
+        // quick hack if we know exactly the values which the increment can
+        // assume. A better implementation would use the
+        // <code>std::istringstream</code> class to generate a name.
+        std::string filename_base = "ball";
+        filename_base += '0'+refinement;
+
+        // Then output the present grid for $Q_1$, $Q_2$, and $Q_3$ mappings:
+        for (unsigned int degree=1; degree<4; ++degree)
+          {
+            std::cout << "Degree = " << degree << std::endl;
+
+            // For this, first set up an object describing the mapping. This
+            // is done using the <code>MappingQ</code> class, which takes as
+            // argument to the constructor the polynomial degree which it
+            // shall use.
+            const MappingQ<dim> mapping (degree);
+            // We note one interesting fact: if you want a piecewise linear
+            // mapping, then you could give a value of <code>1</code> to the
+            // constructor. However, for linear mappings, so many things can
+            // be generated simpler that there is another class, called
+            // <code>MappingQ1</code> which does exactly the same is if you
+            // gave an degree of <code>1</code> to the <code>MappingQ</code>
+            // class, but does so significantly faster. <code>MappingQ1</code>
+            // is also the class that is implicitly used throughout the
+            // library in many functions and classes if you do not specify
+            // another mapping explicitly.
+
+
+            // In degree to actually write out the present grid with this
+            // mapping, we set up an object which we will use for output. We
+            // will generate Gnuplot output, which consists of a set of lines
+            // describing the mapped triangulation. By default, only one line
+            // is drawn for each face of the triangulation, but since we want
+            // to explicitly see the effect of the mapping, we want to have
+            // the faces in more detail. This can be done by passing the
+            // output object a structure which contains some flags. In the
+            // present case, since Gnuplot can only draw straight lines, we
+            // output a number of additional points on the faces so that each
+            // face is drawn by 30 small lines instead of only one. This is
+            // sufficient to give us the impression of seeing a curved line,
+            // rather than a set of straight lines.
+            GridOut grid_out;
+            GridOutFlags::Gnuplot gnuplot_flags(false, 30);
+            grid_out.set_flags(gnuplot_flags);
+
+            // Finally, generate a filename and a file for output using the
+            // same evil hack as above:
+            std::string filename = filename_base+"_mapping_q";
+            filename += ('0'+degree);
+            filename += ".dat";
+            std::ofstream gnuplot_file (filename.c_str());
+
+            // Then write out the triangulation to this file. The last
+            // argument of the function is a pointer to a mapping object. This
+            // argument has a default value, and if no value is given a simple
+            // <code>MappingQ1</code> object is taken, which we briefly
+            // described above. This would then result in a piecewise linear
+            // approximation of the true boundary in the output.
+            grid_out.write_gnuplot (triangulation, gnuplot_file, &mapping);
+          }
+        std::cout << std::endl;
+      }
+  }
+
+  // Now we proceed with the main part of the code, the approximation of
+  // $\pi$. The area of a circle is of course given by $\pi r^2$, so having a
+  // circle of radius 1, the area represents just the number that is searched
+  // for. The numerical computation of the area is performed by integrating
+  // the constant function of value 1 over the whole computational domain,
+  // i.e. by computing the areas $\int_K 1 dx=\int_{\hat K} 1
+  // \ \textrm{det}\ J(\hat x) d\hat x \approx \sum_i \textrm{det}
+  // \ J(\hat x_i)w(\hat x_i)$,
+  // where the sum extends over all quadrature points on all active cells in
+  // the triangulation, with $w(x_i)$ being the weight of quadrature point
+  // $x_i$. The integrals on each cell are approximated by numerical
+  // quadrature, hence the only additional ingredient we need is to set up a
+  // FEValues object that provides the corresponding `JxW' values of each
+  // cell. (Note that `JxW' is meant to abbreviate <code>Jacobian determinant
+  // times weight</code>; since in numerical quadrature the two factors always
+  // occur at the same places, we only offer the combined quantity, rather
+  // than two separate ones.) We note that here we won't use the FEValues
+  // object in its original purpose, i.e. for the computation of values of
+  // basis functions of a specific finite element at certain quadrature
+  // points. Rather, we use it only to gain the `JxW' at the quadrature
+  // points, irrespective of the (dummy) finite element we will give to the
+  // constructor of the FEValues object. The actual finite element given to
+  // the FEValues object is not used at all, so we could give any.
+  template <int dim>
+  void compute_pi_by_area ()
+  {
+    std::cout << "Computation of Pi by the area:" << std::endl
+              << "==============================" << std::endl;
+
+    // For the numerical quadrature on all cells we employ a quadrature rule
+    // of sufficiently high degree. We choose QGauss that is of order 8 (4
+    // points), to be sure that the errors due to numerical quadrature are of
+    // higher order than the order (maximal 6) that will occur due to the
+    // order of the approximation of the boundary, i.e. the order of the
+    // mappings employed. Note that the integrand, the Jacobian determinant,
+    // is not a polynomial function (rather, it is a rational one), so we do
+    // not use Gauss quadrature in order to get the exact value of the
+    // integral as done often in finite element computations, but could as
+    // well have used any quadrature formula of like order instead.
+    const QGauss<dim> quadrature(4);
+
+    // Now start by looping over polynomial mapping degrees=1..4:
+    for (unsigned int degree=1; degree<5; ++degree)
+      {
+        std::cout << "Degree = " << degree << std::endl;
+
+        // First generate the triangulation, the boundary and the mapping
+        // object as already seen.
+        Triangulation<dim> triangulation;
+        GridGenerator::hyper_ball (triangulation);
+
+        static const SphericalManifold<dim> boundary;
+        triangulation.set_all_manifold_ids_on_boundary (0);
+        triangulation.set_manifold(0, boundary);
+
+        const MappingQ<dim> mapping (degree);
+
+        // We now create a dummy finite element. Here we could choose any
+        // finite element, as we are only interested in the `JxW' values
+        // provided by the FEValues object below. Nevertheless, we have to
+        // provide a finite element since in this example we abuse the
+        // FEValues class a little in that we only ask it to provide us with
+        // the weights of certain quadrature points, in contrast to the usual
+        // purpose (and name) of the FEValues class which is to provide the
+        // values of finite elements at these points.
+        const FE_Q<dim>     dummy_fe (1);
+
+        // Likewise, we need to create a DoFHandler object. We do not actually
+        // use it, but it will provide us with `active_cell_iterators' that
+        // are needed to reinitialize the FEValues object on each cell of the
+        // triangulation.
+        DoFHandler<dim> dof_handler (triangulation);
+
+        // Now we set up the FEValues object, giving the Mapping, the dummy
+        // finite element and the quadrature object to the constructor,
+        // together with the update flags asking for the `JxW' values at the
+        // quadrature points only. This tells the FEValues object that it
+        // needs not compute other quantities upon calling the
+        // <code>reinit</code> function, thus saving computation time.
+        //
+        // The most important difference in the construction of the FEValues
+        // object compared to previous example programs is that we pass a
+        // mapping object as first argument, which is to be used in the
+        // computation of the mapping from unit to real cell. In previous
+        // examples, this argument was omitted, resulting in the implicit use
+        // of an object of type MappingQ1.
+        FEValues<dim> fe_values (mapping, dummy_fe, quadrature,
+                                 update_JxW_values);
+
+        // We employ an object of the ConvergenceTable class to store all
+        // important data like the approximated values for $\pi$ and the error
+        // with respect to the true value of $\pi$. We will also use functions
+        // provided by the ConvergenceTable class to compute convergence rates
+        // of the approximations to $\pi$.
+        ConvergenceTable table;
+
+        // Now we loop over several refinement steps of the triangulation.
+        for (unsigned int refinement=0; refinement<6;
+             ++refinement, triangulation.refine_global (1))
+          {
+            // In this loop we first add the number of active cells of the
+            // current triangulation to the table. This function automatically
+            // creates a table column with superscription `cells', in case
+            // this column was not created before.
+            table.add_value("cells", triangulation.n_active_cells());
+
+            // Then we distribute the degrees of freedom for the dummy finite
+            // element. Strictly speaking we do not need this function call in
+            // our special case but we call it to make the DoFHandler happy --
+            // otherwise it would throw an assertion in the FEValues::reinit
+            // function below.
+            dof_handler.distribute_dofs (dummy_fe);
+
+            // We define the variable area as `long double' like we did for
+            // the pi variable before.
+            long double area = 0;
+
+            // Now we loop over all cells, reinitialize the FEValues object
+            // for each cell, and add up all the `JxW' values for this cell to
+            // `area'...
+            typename DoFHandler<dim>::active_cell_iterator
+            cell = dof_handler.begin_active(),
+            endc = dof_handler.end();
+            for (; cell!=endc; ++cell)
+              {
+                fe_values.reinit (cell);
+                for (unsigned int i=0; i<fe_values.n_quadrature_points; ++i)
+                  area += fe_values.JxW (i);
+              }
+
+            // ...and store the resulting area values and the errors in the
+            // table. We need a static cast to double as there is no
+            // add_value(string, long double) function implemented. Note that
+            // this also concerns the second call as the <code>fabs</code>
+            // function in the <code>std</code> namespace is overloaded on its
+            // argument types, so there exists a version taking and returning
+            // a <code>long double</code>, in contrast to the global namespace
+            // where only one such function is declared (which takes and
+            // returns a double).
+            table.add_value("eval.pi", static_cast<double> (area));
+            table.add_value("error",   static_cast<double> (std::fabs(area-pi)));
+          }
+
+        // We want to compute the convergence rates of the `error'
+        // column. Therefore we need to omit the other columns from the
+        // convergence rate evaluation before calling
+        // `evaluate_all_convergence_rates'
+        table.omit_column_from_convergence_rate_evaluation("cells");
+        table.omit_column_from_convergence_rate_evaluation("eval.pi");
+        table.evaluate_all_convergence_rates(ConvergenceTable::reduction_rate_log2);
+
+        // Finally we set the precision and scientific mode for output of some
+        // of the quantities...
+        table.set_precision("eval.pi", 16);
+        table.set_scientific("error", true);
+
+        // ...and write the whole table to std::cout.
+        table.write_text(std::cout);
+
+        std::cout << std::endl;
+      }
+  }
+
+
+  // The following, second function also computes an approximation of $\pi$
+  // but this time via the perimeter $2\pi r$ of the domain instead of the
+  // area. This function is only a variation of the previous function. So we
+  // will mainly give documentation for the differences.
+  template <int dim>
+  void compute_pi_by_perimeter ()
+  {
+    std::cout << "Computation of Pi by the perimeter:" << std::endl
+              << "===================================" << std::endl;
+
+    // We take the same order of quadrature but this time a `dim-1'
+    // dimensional quadrature as we will integrate over (boundary) lines
+    // rather than over cells.
+    const QGauss<dim-1> quadrature(4);
+
+    // We loop over all degrees, create the triangulation, the boundary, the
+    // mapping, the dummy finite element and the DoFHandler object as seen
+    // before.
+    for (unsigned int degree=1; degree<5; ++degree)
+      {
+        std::cout << "Degree = " << degree << std::endl;
+        Triangulation<dim> triangulation;
+        GridGenerator::hyper_ball (triangulation);
+
+        static const SphericalManifold<dim> boundary;
+        triangulation.set_all_manifold_ids_on_boundary (0);
+        triangulation.set_manifold (0, boundary);
+
+        const MappingQ<dim> mapping (degree);
+        const FE_Q<dim>     fe (1);
+
+        DoFHandler<dim> dof_handler (triangulation);
+
+        // Then we create a FEFaceValues object instead of a FEValues object
+        // as in the previous function. Again, we pass a mapping as first
+        // argument.
+        FEFaceValues<dim> fe_face_values (mapping, fe, quadrature,
+                                          update_JxW_values);
+        ConvergenceTable table;
+
+        for (unsigned int refinement=0; refinement<6;
+             ++refinement, triangulation.refine_global (1))
+          {
+            table.add_value("cells", triangulation.n_active_cells());
+
+            dof_handler.distribute_dofs (fe);
+
+            // Now we run over all cells and over all faces of each cell. Only
+            // the contributions of the `JxW' values on boundary faces are
+            // added to the long double variable `perimeter'.
+            typename DoFHandler<dim>::active_cell_iterator
+            cell = dof_handler.begin_active(),
+            endc = dof_handler.end();
+            long double perimeter = 0;
+            for (; cell!=endc; ++cell)
+              for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+                if (cell->face(face_no)->at_boundary())
+                  {
+                    // We reinit the FEFaceValues object with the cell
+                    // iterator and the number of the face.
+                    fe_face_values.reinit (cell, face_no);
+                    for (unsigned int i=0; i<fe_face_values.n_quadrature_points; ++i)
+                      perimeter += fe_face_values.JxW (i);
+                  }
+            // Then store the evaluated values in the table...
+            table.add_value("eval.pi", static_cast<double> (perimeter/2.));
+            table.add_value("error",   static_cast<double> (std::fabs(perimeter/2.-pi)));
+          }
+
+        // ...and end this function as we did in the previous one:
+        table.omit_column_from_convergence_rate_evaluation("cells");
+        table.omit_column_from_convergence_rate_evaluation("eval.pi");
+        table.evaluate_all_convergence_rates(ConvergenceTable::reduction_rate_log2);
+
+        table.set_precision("eval.pi", 16);
+        table.set_scientific("error", true);
+
+        table.write_text(std::cout);
+
+        std::cout << std::endl;
+      }
+  }
+}
+
+
+// The following main function just calls the above functions in the order of
+// their appearance. Apart from this, it looks just like the main functions of
+// previous tutorial programs.
+int main ()
+{
+  try
+    {
+      std::cout.precision (16);
+
+      Step10::gnuplot_output<2>();
+
+      Step10::compute_pi_by_area<2> ();
+      Step10::compute_pi_by_perimeter<2> ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-11/CMakeLists.txt b/examples/step-11/CMakeLists.txt
new file mode 100644
index 0000000..8cdcd4a
--- /dev/null
+++ b/examples/step-11/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-11 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-11")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-11/doc/builds-on b/examples/step-11/doc/builds-on
new file mode 100644
index 0000000..9cf5899
--- /dev/null
+++ b/examples/step-11/doc/builds-on
@@ -0,0 +1 @@
+step-10
diff --git a/examples/step-11/doc/intro.dox b/examples/step-11/doc/intro.dox
new file mode 100644
index 0000000..e80f72b
--- /dev/null
+++ b/examples/step-11/doc/intro.dox
@@ -0,0 +1,110 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+The problem we will be considering is the solution of Laplace's problem with
+Neumann boundary conditions only:
+ at f{eqnarray*}
+  -\Delta u &=& f \qquad \mathrm{in}\ \Omega,
+  \\
+  \partial_n u &=& g \qquad \mathrm{on}\ \partial\Omega.
+ at f}
+It is well known that if this problem is to have a solution, then the forces
+need to satisfy the compatibility condition
+ at f[
+  \int_\Omega f\; dx + \int_{\partial\Omega} g\; ds = 0.
+ at f]
+We will consider the special case that $\Omega$ is the circle of radius 1
+around the origin, and $f=-2$, $g=1$. This choice satisfies the compatibility
+condition.
+
+The compatibility condition allows a solution of the above equation, but it
+nevertheless retains an ambiguity: since only derivatives of the solution
+appear in the equations, the solution is only determined up to a constant. For
+this reason, we have to pose another condition for the numerical solution,
+which fixes this constant. 
+
+For this, there are various possibilities:
+<ol>
+<li> Fix one node of the discretization to zero or any other fixed value.
+  This amounts to an additional condition $u_h(x_0)=0$. Although this is
+  common practice, it is not necessarily a good idea, since we know that the
+  solutions of Laplace's equation are only in $H^1$, which does not allow for
+  the definition of point values because it is not a subset of the continuous
+  functions. Therefore, even though fixing one node is allowed for
+  discretized functions, it is not for continuous functions, and one can
+  often see this in a resulting error spike at this point in the numerical
+  solution. 
+  
+<li> Fixing the mean value over the domain to zero or any other value. This
+  is allowed on the continuous level, since $H^1(\Omega)\subset L^1(\Omega)$
+  by Sobolev's inequality, and thus also on the discrete level since we
+  there only consider subsets of $H^1$.
+  
+<li> Fixing the mean value over the boundary of the domain to zero or any
+  other value. This is also allowed on the continuous level, since
+  $H^{1/2}(\partial\Omega)\subset L^1(\partial\Omega)$, again by Sobolev's
+  inequality.
+</ol>
+We will choose the last possibility, since we want to demonstrate another
+technique with it.
+
+While this describes the problem to be solved, we still have to figure out how
+to implement it. Basically, except for the additional mean value constraint,
+we have solved this problem several times, using Dirichlet boundary values,
+and we only need to drop the treatment of Dirichlet boundary nodes. The use of
+higher order mappings is also rather trivial and will be explained at the
+various places where we use it; in almost all conceivable cases, you will only
+consider the objects describing mappings as a black box which you need not
+worry about, because their only uses seem to be to be passed to places deep
+inside the library where functions know how to handle them (i.e. in the
+<code>FEValues</code> classes and their descendants).
+
+The tricky point in this program is the use of the mean value
+constraint. Fortunately, there is a class in the library which knows how to
+handle such constraints, and we have used it quite often already, without
+mentioning its generality. Note that if we assume that the boundary nodes are
+spaced equally along the boundary, then the mean value constraint
+ at f[
+  \int_{\partial \Omega} u(x) \; ds = 0
+ at f]
+can be written as
+ at f[
+  \sum_{i\in\partial\Omega_h} u_i = 0,
+ at f]
+where the sum shall run over all degree of freedom indices which are located
+on the boundary of the computational domain. Let us denote by $i_0$ that index
+on the boundary with the lowest number (or any other conveniently chosen
+index), then the constraint can also be represented by
+ at f[
+  u_{i_0} = \sum_{i\in\partial\Omega_h\backslash i_0} -u_i.
+ at f]
+This, luckily, is exactly the form of constraints for which the
+ConstraintMatrix class was designed. Note that we have used this
+class in several previous examples for the representation of hanging nodes
+constraints, which also have this form: there, the middle vertex shall have
+the mean of the values of the adjacent vertices. In general, the
+ConstraintMatrix class is designed to handle homogeneous constraints
+of the form
+ at f[
+  CU = 0
+ at f]
+where $C$ denotes a matrix, and $U$ the vector of nodal values.
+
+In this example, the mean value along the boundary allows just such a
+representation, with $C$ being a matrix with just one row (i.e. there is only
+one constraint). In the implementation, we will create a
+ConstraintMatrix object, add one constraint (i.e. add another row to
+the matrix) referring to the first boundary node $i_0$, and insert the weights
+with which all the other nodes contribute, which in this example happens to be
+just $-1$.
+
+Later, we will use this object to eliminate the first boundary node from the
+linear system of equations, reducing it to one which has a solution without
+the ambiguity of the constant shift value. One of the problems of the
+implementation will be that the explicit elimination of this node results in a
+number of additional elements in the matrix, of which we do not know in
+advance where they are located and how many additional entries will be in each
+of the rows of the matrix. We will show how we can use an intermediate object
+to work around this problem.
+
+But now on to the implementation of the program solving this problem...
diff --git a/examples/step-11/doc/kind b/examples/step-11/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-11/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-11/doc/results.dox b/examples/step-11/doc/results.dox
new file mode 100644
index 0000000..d76412f
--- /dev/null
+++ b/examples/step-11/doc/results.dox
@@ -0,0 +1,44 @@
+<h1>Results</h1>
+
+This is what the program outputs:
+ at code
+Using mapping with degree 1:
+============================
+cells  |u|_1    error   
+    5 0.680402 0.572912 
+   20 1.085518 0.167796 
+   80 1.208981 0.044334 
+  320 1.242041 0.011273 
+ 1280 1.250482 0.002832 
+ 5120 1.252605 0.000709 
+
+Using mapping with degree 2:
+============================
+cells  |u|_1    error   
+    5 1.050963 0.202351 
+   20 1.199642 0.053672 
+   80 1.239913 0.013401 
+  320 1.249987 0.003327 
+ 1280 1.252486 0.000828 
+ 5120 1.253108 0.000206 
+
+Using mapping with degree 3:
+============================
+cells  |u|_1    error   
+    5 1.086161 0.167153 
+   20 1.204349 0.048965 
+   80 1.240502 0.012812 
+  320 1.250059 0.003255 
+ 1280 1.252495 0.000819 
+ 5120 1.253109 0.000205 
+ at endcode
+As we expected, the convergence order for each of the different
+mappings is clearly quadratic in the mesh size. What <em>is</em>
+interesting, though, is that the error for a bilinear mapping
+(i.e. degree 1) is more than three times larger than that for the
+higher order mappings; it is therefore clearly advantageous in this
+case to use a higher order mapping, not because it improves the order
+of convergence but just to reduce the constant before the convergence
+order. On the other hand, using a cubic mapping only improves the
+result further insignificantly, except for the case of very coarse
+grids.
diff --git a/examples/step-11/doc/tooltip b/examples/step-11/doc/tooltip
new file mode 100644
index 0000000..17a5334
--- /dev/null
+++ b/examples/step-11/doc/tooltip
@@ -0,0 +1,2 @@
+Higher order mappings. Dealing with constraints.
+
diff --git a/examples/step-11/step-11.cc b/examples/step-11/step-11.cc
new file mode 100644
index 0000000..8b39241
--- /dev/null
+++ b/examples/step-11/step-11.cc
@@ -0,0 +1,479 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2001 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 2001
+ */
+
+
+// As usual, the program starts with a rather long list of include files which
+// you are probably already used to by now:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/table_handler.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+// Just this one is new: it declares a class
+// DynamicSparsityPattern, which we will use and explain
+// further down below.
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+// We will make use of the std::find algorithm of the C++ standard library, so
+// we have to include the following file for its declaration:
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <cmath>
+
+// The last step is as in all previous programs:
+namespace Step11
+{
+  using namespace dealii;
+
+  // Then we declare a class which represents the solution of a Laplace
+  // problem. As this example program is based on step-5, the class looks
+  // rather the same, with the sole structural difference that the functions
+  // <code>assemble_system</code> now calls <code>solve</code> itself, and is
+  // thus called <code>assemble_and_solve</code>, and that the output function
+  // was dropped since the solution function is so boring that it is not worth
+  // being viewed.
+  //
+  // The only other noteworthy change is that the constructor takes a value
+  // representing the polynomial degree of the mapping to be used later on,
+  // and that it has another member variable representing exactly this
+  // mapping. In general, this variable will occur in real applications at the
+  // same places where the finite element is declared or used.
+  template <int dim>
+  class LaplaceProblem
+  {
+  public:
+    LaplaceProblem (const unsigned int mapping_degree);
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_and_solve ();
+    void solve ();
+
+    Triangulation<dim>   triangulation;
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+    MappingQ<dim>        mapping;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+    ConstraintMatrix     mean_value_constraints;
+
+    Vector<double>       solution;
+    Vector<double>       system_rhs;
+
+    TableHandler         output_table;
+  };
+
+
+
+  // Construct such an object, by initializing the variables. Here, we use
+  // linear finite elements (the argument to the <code>fe</code> variable
+  // denotes the polynomial degree), and mappings of given order. Print to
+  // screen what we are about to do.
+  template <int dim>
+  LaplaceProblem<dim>::LaplaceProblem (const unsigned int mapping_degree) :
+    fe (1),
+    dof_handler (triangulation),
+    mapping (mapping_degree)
+  {
+    std::cout << "Using mapping with degree " << mapping_degree << ":"
+              << std::endl
+              << "============================"
+              << std::endl;
+  }
+
+
+
+  // The first task is to set up the variables for this problem. This includes
+  // generating a valid <code>DoFHandler</code> object, as well as the
+  // sparsity patterns for the matrix, and the object representing the
+  // constraints that the mean value of the degrees of freedom on the boundary
+  // be zero.
+  template <int dim>
+  void LaplaceProblem<dim>::setup_system ()
+  {
+    // The first task is trivial: generate an enumeration of the degrees of
+    // freedom, and initialize solution and right hand side vector to their
+    // correct sizes:
+    dof_handler.distribute_dofs (fe);
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    // Next task is to construct the object representing the constraint that
+    // the mean value of the degrees of freedom on the boundary shall be
+    // zero. For this, we first want a list of those nodes which are actually
+    // at the boundary. The <code>DoFTools</code> namespace has a function
+    // that returns an array of Boolean values where <code>true</code>
+    // indicates that the node is at the boundary. The second argument denotes
+    // a mask selecting which components of vector valued finite elements we
+    // want to be considered. This sort of information is encoded using the
+    // ComponentMask class (see also @ref GlossComponentMask). Since we have a
+    // scalar finite element anyway, this mask in reality should have only one
+    // entry with a <code>true</code> value. However, the ComponentMask class
+    // has semantics that allow it to represents a mask of indefinite size
+    // whose every element equals <code>true</code> when one just default
+    // constructs such an object, so this is what we'll do here.
+    std::vector<bool> boundary_dofs (dof_handler.n_dofs(), false);
+    DoFTools::extract_boundary_dofs (dof_handler,
+                                     ComponentMask(),
+                                     boundary_dofs);
+
+    // Now first for the generation of the constraints: as mentioned in the
+    // introduction, we constrain one of the nodes on the boundary by the
+    // values of all other DoFs on the boundary. So, let us first pick out the
+    // first boundary node from this list. We do that by searching for the
+    // first <code>true</code> value in the array (note that
+    // <code>std::find</code> returns an iterator to this element), and
+    // computing its distance to the overall first element in the array to get
+    // its index:
+    const unsigned int first_boundary_dof
+      = std::distance (boundary_dofs.begin(),
+                       std::find (boundary_dofs.begin(),
+                                  boundary_dofs.end(),
+                                  true));
+
+    // Then generate a constraints object with just this one constraint. First
+    // clear all previous content (which might reside there from the previous
+    // computation on a once coarser grid), then add this one line
+    // constraining the <code>first_boundary_dof</code> to the sum of other
+    // boundary DoFs each with weight -1. Finally, close the constraints
+    // object, i.e. do some internal bookkeeping on it for faster processing
+    // of what is to come later:
+    mean_value_constraints.clear ();
+    mean_value_constraints.add_line (first_boundary_dof);
+    for (unsigned int i=first_boundary_dof+1; i<dof_handler.n_dofs(); ++i)
+      if (boundary_dofs[i] == true)
+        mean_value_constraints.add_entry (first_boundary_dof,
+                                          i, -1);
+    mean_value_constraints.close ();
+
+    // Next task is to generate a sparsity pattern. This is indeed a tricky
+    // task here. Usually, we just call
+    // <code>DoFTools::make_sparsity_pattern</code> and condense the result
+    // using the hanging node constraints. We have no hanging node constraints
+    // here (since we only refine globally in this example), but we have this
+    // global constraint on the boundary. This poses one severe problem in
+    // this context: the <code>SparsityPattern</code> class wants us to state
+    // beforehand the maximal number of entries per row, either for all rows
+    // or for each row separately. There are functions in the library which
+    // can tell you this number in case you just have hanging node constraints
+    // (namely <code>DoFHandler::max_coupling_between_dofs</code>), but how is
+    // this for the present case? The difficulty arises because the
+    // elimination of the constrained degree of freedom requires a number of
+    // additional entries in the matrix at places that are not so simple to
+    // determine. We would therefore have a problem had we to give a maximal
+    // number of entries per row here.
+    //
+    // Since this can be so difficult that no reasonable answer can be given
+    // that allows allocation of only a reasonable amount of memory, there is
+    // a class DynamicSparsityPattern, that can help us out
+    // here. It does not require that we know in advance how many entries rows
+    // could have, but allows just about any length. It is thus significantly
+    // more flexible in case you do not have good estimates of row lengths,
+    // however at the price that building up such a pattern is also
+    // significantly more expensive than building up a pattern for which you
+    // had information in advance. Nevertheless, as we have no other choice
+    // here, we'll just build such an object by initializing it with the
+    // dimensions of the matrix and calling another function
+    // <code>DoFTools::make_sparsity_pattern</code> to get the sparsity
+    // pattern due to the differential operator, then condense it with the
+    // constraints object which adds those positions in the sparsity pattern
+    // that are required for the elimination of the constraint.
+    DynamicSparsityPattern dsp (dof_handler.n_dofs(),
+                                dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    mean_value_constraints.condense (dsp);
+
+    // Finally, once we have the full pattern, we can initialize an object of
+    // type <code>SparsityPattern</code> from it and in turn initialize the
+    // matrix with it. Note that this is actually necessary, since the
+    // DynamicSparsityPattern is so inefficient compared to
+    // the <code>SparsityPattern</code> class due to the more flexible data
+    // structures it has to use, that we can impossibly base the sparse matrix
+    // class on it, but rather need an object of type
+    // <code>SparsityPattern</code>, which we generate by copying from the
+    // intermediate object.
+    //
+    // As a further sidenote, you will notice that we do not explicitly have
+    // to <code>compress</code> the sparsity pattern here. This, of course, is
+    // due to the fact that the <code>copy_from</code> function generates a
+    // compressed object right from the start, to which you cannot add new
+    // entries anymore. The <code>compress</code> call is therefore implicit
+    // in the <code>copy_from</code> call.
+    sparsity_pattern.copy_from (dsp);
+    system_matrix.reinit (sparsity_pattern);
+  }
+
+
+
+  // The next function then assembles the linear system of equations, solves
+  // it, and evaluates the solution. This then makes three actions, and we
+  // will put them into eight true statements (excluding declaration of
+  // variables, and handling of temporary vectors). Thus, this function is
+  // something for the very lazy. Nevertheless, the functions called are
+  // rather powerful, and through them this function uses a good deal of the
+  // whole library. But let's look at each of the steps.
+  template <int dim>
+  void LaplaceProblem<dim>::assemble_and_solve ()
+  {
+
+    // First, we have to assemble the matrix and the right hand side. In all
+    // previous examples, we have investigated various ways how to do this
+    // manually. However, since the Laplace matrix and simple right hand sides
+    // appear so frequently in applications, the library provides functions
+    // for actually doing this for you, i.e. they perform the loop over all
+    // cells, setting up the local matrices and vectors, and putting them
+    // together for the end result.
+    //
+    // The following are the two most commonly used ones: creation of the
+    // Laplace matrix and creation of a right hand side vector from body or
+    // boundary forces. They take the mapping object, the
+    // <code>DoFHandler</code> object representing the degrees of freedom and
+    // the finite element in use, a quadrature formula to be used, and the
+    // output object. The function that creates a right hand side vector also
+    // has to take a function object describing the (continuous) right hand
+    // side function.
+    //
+    // Let us look at the way the matrix and body forces are integrated:
+    const unsigned int gauss_degree
+      = std::max (static_cast<unsigned int>(std::ceil(1.*(mapping.get_degree()+1)/2)),
+                  2U);
+    MatrixTools::create_laplace_matrix (mapping, dof_handler,
+                                        QGauss<dim>(gauss_degree),
+                                        system_matrix);
+    VectorTools::create_right_hand_side (mapping, dof_handler,
+                                         QGauss<dim>(gauss_degree),
+                                         ConstantFunction<dim>(-2),
+                                         system_rhs);
+    // That's quite simple, right?
+    //
+    // Two remarks are in order, though: First, these functions are used in a
+    // lot of contexts. Maybe you want to create a Laplace or mass matrix for
+    // a vector values finite element; or you want to use the default Q1
+    // mapping; or you want to assembled the matrix with a coefficient in the
+    // Laplace operator. For this reason, there are quite a large number of
+    // variants of these functions in the <code>MatrixCreator</code> and
+    // <code>MatrixTools</code> namespaces. Whenever you need a slightly
+    // different version of these functions than the ones called above, it is
+    // certainly worthwhile to take a look at the documentation and to check
+    // whether something fits your needs.
+    //
+    // The second remark concerns the quadrature formula we use: we want to
+    // integrate over bilinear shape functions, so we know that we have to use
+    // at least a Gauss2 quadrature formula. On the other hand, we want to
+    // have the quadrature rule to have at least the order of the boundary
+    // approximation. Since the order of Gauss-r is 2r, and the order of the
+    // boundary approximation using polynomials of degree p is p+1, we know
+    // that 2r@>=p+1. Since r has to be an integer and (as mentioned above)
+    // has to be at least 2, this makes up for the formula above computing
+    // <code>gauss_degree</code>.
+    //
+    // Since the generation of the body force contributions to the right hand
+    // side vector was so simple, we do that all over again for the boundary
+    // forces as well: allocate a vector of the right size and call the right
+    // function. The boundary function has constant values, so we can generate
+    // an object from the library on the fly, and we use the same quadrature
+    // formula as above, but this time of lower dimension since we integrate
+    // over faces now instead of cells:
+    Vector<double> tmp (system_rhs.size());
+    VectorTools::create_boundary_right_hand_side (mapping, dof_handler,
+                                                  QGauss<dim-1>(gauss_degree),
+                                                  ConstantFunction<dim>(1),
+                                                  tmp);
+    // Then add the contributions from the boundary to those from the interior
+    // of the domain:
+    system_rhs += tmp;
+    // For assembling the right hand side, we had to use two different vector
+    // objects, and later add them together. The reason we had to do so is
+    // that the <code>VectorTools::create_right_hand_side</code> and
+    // <code>VectorTools::create_boundary_right_hand_side</code> functions
+    // first clear the output vector, rather than adding up their results to
+    // previous contents. This can reasonably be called a design flaw in the
+    // library made in its infancy, but unfortunately things are as they are
+    // for some time now and it is difficult to change such things that
+    // silently break existing code, so we have to live with that.
+
+    // Now, the linear system is set up, so we can eliminate the one degree of
+    // freedom which we constrained to the other DoFs on the boundary for the
+    // mean value constraint from matrix and right hand side vector, and solve
+    // the system. After that, distribute the constraints again, which in this
+    // case means setting the constrained degree of freedom to its proper
+    // value
+    mean_value_constraints.condense (system_matrix);
+    mean_value_constraints.condense (system_rhs);
+
+    solve ();
+    mean_value_constraints.distribute (solution);
+
+    // Finally, evaluate what we got as solution. As stated in the
+    // introduction, we are interested in the H1 semi-norm of the
+    // solution. Here, as well, we have a function in the library that does
+    // this, although in a slightly non-obvious way: the
+    // <code>VectorTools::integrate_difference</code> function integrates the
+    // norm of the difference between a finite element function and a
+    // continuous function. If we therefore want the norm of a finite element
+    // field, we just put the continuous function to zero. Note that this
+    // function, just as so many other ones in the library as well, has at
+    // least two versions, one which takes a mapping as argument (which we
+    // make us of here), and the one which we have used in previous examples
+    // which implicitly uses <code>MappingQ1</code>.  Also note that we take a
+    // quadrature formula of one degree higher, in order to avoid
+    // superconvergence effects where the solution happens to be especially
+    // close to the exact solution at certain points (we don't know whether
+    // this might be the case here, but there are cases known of this, and we
+    // just want to make sure):
+    Vector<float> norm_per_cell (triangulation.n_active_cells());
+    VectorTools::integrate_difference (mapping, dof_handler,
+                                       solution,
+                                       ZeroFunction<dim>(),
+                                       norm_per_cell,
+                                       QGauss<dim>(gauss_degree+1),
+                                       VectorTools::H1_seminorm);
+    // Then, the function just called returns its results as a vector of
+    // values each of which denotes the norm on one cell. To get the global
+    // norm, a simple computation shows that we have to take the l2 norm of
+    // the vector:
+    const double norm = norm_per_cell.l2_norm();
+
+    // Last task -- generate output:
+    output_table.add_value ("cells", triangulation.n_active_cells());
+    output_table.add_value ("|u|_1", norm);
+    output_table.add_value ("error", std::fabs(norm-std::sqrt(3.14159265358/2)));
+  }
+
+
+
+  // The following function solving the linear system of equations is copied
+  // from step-5 and is explained there in some detail:
+  template <int dim>
+  void LaplaceProblem<dim>::solve ()
+  {
+    SolverControl           solver_control (1000, 1e-12);
+    SolverCG<>              cg (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    cg.solve (system_matrix, solution, system_rhs,
+              preconditioner);
+  }
+
+
+
+  // Finally the main function controlling the different steps to be
+  // performed. Its content is rather straightforward, generating a
+  // triangulation of a circle, associating a boundary to it, and then doing
+  // several cycles on subsequently finer grids. Note again that we have put
+  // mesh refinement into the loop header; this may be something for a test
+  // program, but for real applications you should consider that this implies
+  // that the mesh is refined after the loop is executed the last time since
+  // the increment clause (the last part of the three-parted loop header) is
+  // executed before the comparison part (the second one), which may be rather
+  // costly if the mesh is already quite refined. In that case, you should
+  // arrange code such that the mesh is not further refined after the last
+  // loop run (or you should do it at the beginning of each run except for the
+  // first one).
+  template <int dim>
+  void LaplaceProblem<dim>::run ()
+  {
+    GridGenerator::hyper_ball (triangulation);
+    static const SphericalManifold<dim> boundary;
+    triangulation.set_all_manifold_ids_on_boundary(0);
+    triangulation.set_manifold (0, boundary);
+
+    for (unsigned int cycle=0; cycle<6; ++cycle, triangulation.refine_global(1))
+      {
+        setup_system ();
+        assemble_and_solve ();
+      };
+
+    // After all the data is generated, write a table of results to the
+    // screen:
+    output_table.set_precision("|u|_1", 6);
+    output_table.set_precision("error", 6);
+    output_table.write_text (std::cout);
+    std::cout << std::endl;
+  }
+}
+
+
+
+// Finally the main function. It's structure is the same as that used in
+// several of the previous examples, so probably needs no more explanation.
+int main ()
+{
+  try
+    {
+      std::cout.precision(5);
+
+      // This is the main loop, doing the computations with mappings of linear
+      // through cubic mappings. Note that since we need the object of type
+      // <code>LaplaceProblem@<2@></code> only once, we do not even name it,
+      // but create an unnamed such object and call the <code>run</code>
+      // function of it, subsequent to which it is immediately destroyed
+      // again.
+      for (unsigned int mapping_degree=1; mapping_degree<=3; ++mapping_degree)
+        Step11::LaplaceProblem<2>(mapping_degree).run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    };
+
+  return 0;
+}
diff --git a/examples/step-12/CMakeLists.txt b/examples/step-12/CMakeLists.txt
new file mode 100644
index 0000000..93399db
--- /dev/null
+++ b/examples/step-12/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-12 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-12")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-12/doc/builds-on b/examples/step-12/doc/builds-on
new file mode 100644
index 0000000..ab106ec
--- /dev/null
+++ b/examples/step-12/doc/builds-on
@@ -0,0 +1 @@
+step-7 step-39
diff --git a/examples/step-12/doc/intro.dox b/examples/step-12/doc/intro.dox
new file mode 100644
index 0000000..48d56d5
--- /dev/null
+++ b/examples/step-12/doc/intro.dox
@@ -0,0 +1,76 @@
+<a name="Intro"></a>
+<h1>An example of the MeshWorker framework with an advection problem</h1>
+
+<h3>Overview</h3>
+
+This example is devoted to the MeshWorker framework and the <em>discontinuous
+Galerkin method</em>, or in short: DG method. It includes the following topics.
+<ol>
+  <li> Discretization of the linear advection equation with the DG method.
+  <li> Assembling of the system matrix using the MeshWorker::loop().
+</ol>
+
+The particular concern of this program are the loops of DG methods. These turn
+out to be especially complex, primarily because for the face terms, we have to
+distinguish the cases of boundary, regular interior faces and interior faces
+with hanging nodes, respectively. The MeshWorker framework implements the
+standard loop over all cells and faces in MeshWorker::loop() and takes care of
+distinguishing between all the different faces.
+
+There are two things left to do if you use MeshWorker: first, you need
+to write the local integrators for your problem. Second, you select
+classes from the MeshWorker namespace and combine them to achieve your
+goal.
+
+
+<h3>Problem</h3>
+
+The model problem solved in this example is the linear advection equation
+ at f[
+  \nabla\cdot \left({\mathbf \beta} u\right)=0 \qquad\mbox{in }\Omega,
+ at f]
+subject to the boundary conditions
+ at f[
+u=g\quad\mbox{on }\Gamma_-,
+ at f]
+on the inflow part $\Gamma_-$ of the boundary $\Gamma=\partial\Omega$
+of the domain.  Here, ${\mathbf \beta}={\mathbf \beta}({\bf x})$ denotes a
+vector field, $u$ the (scalar) solution
+function, $g$ a boundary value function,
+ at f[
+\Gamma_-:=\{{\bf x}\in\Gamma, {\mathbf \beta}({\bf x})\cdot{\bf n}({\bf x})<0\}
+ at f]
+the inflow part of the boundary of the domain and ${\bf n}$ denotes
+the unit outward normal to the boundary $\Gamma$. This equation is the
+conservative version of the advection equation already considered in
+step-9 of this tutorial.
+In particular, we solve the advection equation on
+$\Omega=[0,1]^2$ with ${\mathbf \beta}=\frac{1}{|x|}(-x_2, x_1)$
+representing a circular counterclockwise flow field, and $g=1$
+on ${\bf x}\in\Gamma_-^1:=[0,0.5]\times\{0\}$ and $g=0$ on ${\bf x}\in
+\Gamma_-\setminus \Gamma_-^1$.
+
+We apply the well-known upwind discontinuous Galerkin method. To this
+end, we introduce the mesh dependent bilinear form
+
+ at f[
+  -\sum_{T\in \mathbb T_h}\bigl(u_h,{\mathbf \beta}\cdot\nabla v_h\bigr)_T
+  +\sum_{F\in\mathbb F_h^i} \bigl<u_h^-, \beta\cdot[v_h\mathbf n]\bigr>_{F}
+  + \bigl<u_h, v_h \beta\cdot \mathbf n\bigr>_{\Gamma_+}
+  =-\bigl<g, v_h \beta\cdot\mathbf n\bigr>_{\Gamma_-}.
+ at f]
+
+Here, $\mathbb T_h$ is the set of all active cells of the triangulation
+and $\mathbb F_h^i$ is the set of all active interior faces.
+$(\cdot, \cdot)_T$ and $\left<\cdot, \cdot\right>_{F}$ denote the
+<i>L<sup>2</sup></i>-inner products on the cell $T$ and a face $F$,
+respectively.  The jump is defined as $[v\mathbf n] = v^+\mathbf n^+ +
+v^-\mathbf n^-$, where the superscripts refer to the upwind ('+') and
+downwind ('-') values at the face.
+
+In order to implement this bilinear form, we need to compute the cell
+terms $\bigl(u_h,{\mathbf \beta}\cdot\nabla v_h\bigr)_T$, the internal fluxes
+$\bigl<u_h^-, \beta\cdot[v_h\mathbf n]\bigr>_{F}$, and the boundary terms $\bigl<u_h,
+v_h \beta\cdot \mathbf n]\bigr>_{\Gamma_+}$ and $\bigl<g, \beta\cdot\mathbf n
+v_h\bigr>_{\Gamma_-}$. The summation of all those is done by MeshWorker::integration_loop().
+
diff --git a/examples/step-12/doc/kind b/examples/step-12/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-12/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-12/doc/results.dox b/examples/step-12/doc/results.dox
new file mode 100644
index 0000000..ed1a3f9
--- /dev/null
+++ b/examples/step-12/doc/results.dox
@@ -0,0 +1,188 @@
+<h1>Results</h1>
+
+
+The output of this program consist of the console output, the eps
+files including the grids, and the solutions given in gnuplot format.
+ at code
+DEAL::Cycle 0
+DEAL::Number of active cells:       64
+DEAL::Number of degrees of freedom: 256
+DEAL:Richardson::Starting value 0.176777
+DEAL:Richardson::Convergence step 4 value 3.33123e-17
+DEAL::Writing grid to <grid-0.eps>
+DEAL::Writing solution to <sol-0.gnuplot>
+DEAL::Cycle 1
+DEAL::Number of active cells:       112
+DEAL::Number of degrees of freedom: 448
+DEAL:Richardson::Starting value 0.153093
+DEAL:Richardson::Convergence step 9 value 3.74479e-17
+DEAL::Writing grid to <grid-1.eps>
+DEAL::Writing solution to <sol-1.gnuplot>
+DEAL::Cycle 2
+DEAL::Number of active cells:       214
+DEAL::Number of degrees of freedom: 856
+DEAL:Richardson::Starting value 0.149870
+DEAL:Richardson::Convergence step 16 value 1.41017e-14
+DEAL::Writing grid to <grid-2.eps>
+DEAL::Writing solution to <sol-2.gnuplot>
+DEAL::Cycle 3
+DEAL::Number of active cells:       415
+DEAL::Number of degrees of freedom: 1660
+DEAL:Richardson::Starting value 0.149053
+DEAL:Richardson::Convergence step 26 value 4.92424e-15
+DEAL::Writing grid to <grid-3.eps>
+DEAL::Writing solution to <sol-3.gnuplot>
+DEAL::Cycle 4
+DEAL::Number of active cells:       796
+DEAL::Number of degrees of freedom: 3184
+DEAL:Richardson::Starting value 0.148848
+DEAL:Richardson::Convergence step 44 value 5.80787e-14
+DEAL::Writing grid to <grid-4.eps>
+DEAL::Writing solution to <sol-4.gnuplot>
+DEAL::Cycle 5
+DEAL::Number of active cells:       1561
+DEAL::Number of degrees of freedom: 6244
+DEAL:Richardson::Starting value 0.131369
+DEAL:Richardson::Convergence step 81 value 2.39812e-13
+DEAL::Writing grid to <grid-5.eps>
+DEAL::Writing solution to <sol-5.gnuplot>
+ at endcode
+
+We show the solutions on the initial mesh, the mesh after two
+and after five adaptive refinement steps.
+
+<img src="http://www.dealii.org/images/steps/developer/step-12.sol-0.png" alt="">
+<img src="http://www.dealii.org/images/steps/developer/step-12.sol-2.png" alt="">
+<img src="http://www.dealii.org/images/steps/developer/step-12.sol-5.png" alt="">
+
+
+Then we show the final grid (after 5 refinement steps) and the solution again,
+this time with a nicer 3d rendering (obtained using the DataOutBase::write_vtk
+function and the VTK-based VisIt visualization program) that better shows the
+sharpness of the jump on the refined mesh and the over- and undershoots of the
+solution along the interface:
+
+<img src="http://www.dealii.org/images/steps/developer/step-12.grid-5.png" alt="">
+<img src="http://www.dealii.org/images/steps/developer/step-12.3d-solution.png" alt="">
+
+
+And finally we show a plot of a 3d computation.
+
+<img src="http://www.dealii.org/images/steps/developer/step-12.sol-5-3d.png" alt="">
+
+
+<a name="dg-vs-cg"></a>
+<h3>Why use discontinuous elements</h3>
+
+In this program we have used discontinuous elements. It is a legitimate
+question to ask why not simply use the normal, continuous ones. Of course, to
+everyone with a background in numerical methods, the answer is obvious: the
+continuous Galerkin (cG) method is not stable for the transport equation,
+unless one specifically adds stabilization terms. The DG method, however,
+<i>is</i> stable. Illustrating this with the current program is not very
+difficult; in fact, only the following minor modifications are necessary:
+- Change the element to FE_Q instead of FE_DGQ.
+- Add handling of hanging node constraints in exactly the same way as step-6.
+- We need a different solver; the direct solver in step-29 is a convenient
+  choice.
+An experienced deal.II user will be able to do this in less than 10 minutes.
+
+While the 2d solution has been shown above, containing a number of small
+spikes at the interface that are, however, stable in height under mesh
+refinement, results look much different when using a continuous element:
+
+<table align="center">
+  <tr>
+    <td valign="top">
+      0  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-12.cg.sol-0.png" alt="">
+    </td>
+
+    <td valign="top">
+      1  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-12.cg.sol-1.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      2  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-12.cg.sol-2.png" alt="">
+    </td>
+
+    <td valign="top">
+      3  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-12.cg.sol-3.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      4  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-12.cg.sol-4.png" alt="">
+    </td>
+
+    <td valign="top">
+      5  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-12.cg.sol-5.png" alt="">
+    </td>
+  </tr>
+</table>
+
+In refinement iteration 5, the image can't be plotted in a reasonable way any
+more as a 3d plot. We thus show a color plot with a range of $[-1,2]$ (the
+solution values of the exact solution lie in $[0,1]$, of course). In any case,
+it is clear that the continuous Galerkin solution exhibits oscillatory
+behavior that gets worse and worse as the mesh is refined more and more.
+
+There are a number of strategies to stabilize the cG method, if one wants to
+use continuous elements for some reason. Discussing these methods is beyond
+the scope of this tutorial program; an interested reader could, for example,
+take a look at step-31.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+Given that the exact solution is known in this case, one interesting
+avenue for further extensions would be to confirm the order of
+convergence for this program. In the current case, the solution is
+non-smooth, and so we can not expect to get a particularly high order
+of convergence, even if we used higher order elements. But even if the
+solution <i>is</i> smooth, the equation is not elliptic and so it is not
+immediately clear that we should obtain a convergence order that
+equals that of the optimal interpolation estimates (i.e. for example
+that we would get $h^3$ convergence in the $L^2$ norm by using
+quadratic elements).
+
+In fact, for hyperbolic equations, theoretical predictions often
+indicate that the best one can hope for is an order one half below the
+interpolation estimate. For example, for the streamline diffusion
+method (an alternative method to the DG method used here to stabilize
+the solution of the transport equation), one can prove that for
+elements of degree $p$, the order of convergence is $p+\frac 12$ on
+arbitrary meshes. While the observed order is frequently $p+1$ on
+uniformly refined meshes, one can construct so-called Peterson meshes
+on which the worse theoretical bound is actually attained. This should
+be relatively simple to verify, for example using the
+VectorTools::integrate_difference function.
+
+A different direction is to observe that the solution of transport problems
+often has discontinuities and that therefore a mesh in which we <i>bisect</i>
+every cell in every coordinate direction may not be optimal. Rather, a better
+strategy would be to only cut cells in the direction parallel to the
+discontinuity. This is called <i>anisotropic mesh refinement</i> and is the
+subject of step-30.
diff --git a/examples/step-12/doc/tooltip b/examples/step-12/doc/tooltip
new file mode 100644
index 0000000..273cc08
--- /dev/null
+++ b/examples/step-12/doc/tooltip
@@ -0,0 +1 @@
+Discontinuous Galerkin for linear advection.
diff --git a/examples/step-12/step-12.cc b/examples/step-12/step-12.cc
new file mode 100644
index 0000000..bc39ef2
--- /dev/null
+++ b/examples/step-12/step-12.cc
@@ -0,0 +1,663 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2009 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Guido Kanschat, Texas A&M University, 2009
+ */
+
+
+// The first few files have already been covered in previous examples and will
+// thus not be further commented on:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/fe/mapping_q1.h>
+// Here the discontinuous finite elements are defined. They are used in the
+// same way as all other finite elements, though -- as you have seen in
+// previous tutorial programs -- there isn't much user interaction with finite
+// element classes at all: they are passed to <code>DoFHandler</code> and
+// <code>FEValues</code> objects, and that is about it.
+#include <deal.II/fe/fe_dgq.h>
+// We are going to use the simplest possible solver, called Richardson
+// iteration, that represents a simple defect correction. This, in combination
+// with a block SSOR preconditioner (defined in precondition_block.h), that
+// uses the special block matrix structure of system matrices arising from DG
+// discretizations.
+#include <deal.II/lac/solver_richardson.h>
+#include <deal.II/lac/precondition_block.h>
+// We are going to use gradients as refinement indicator.
+#include <deal.II/numerics/derivative_approximation.h>
+
+// Here come the new include files for using the MeshWorker framework. The
+// first contains the class MeshWorker::DoFInfo, which provides local
+// integrators with a mapping between local and global degrees of freedom. It
+// stores the results of local integrals as well in its base class
+// Meshworker::LocalResults.  In the second of these files, we find an object
+// of type MeshWorker::IntegrationInfo, which is mostly a wrapper around a
+// group of FEValues objects. The file <tt>meshworker/simple.h</tt> contains
+// classes assembling locally integrated data into a global system containing
+// only a single matrix. Finally, we will need the file that runs the loop
+// over all mesh cells and faces.
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/integration_info.h>
+#include <deal.II/meshworker/simple.h>
+#include <deal.II/meshworker/loop.h>
+
+// Like in all programs, we finish this section by including the needed C++
+// headers and declaring we want to use objects in the dealii namespace
+// without prefix.
+#include <iostream>
+#include <fstream>
+
+
+namespace Step12
+{
+  using namespace dealii;
+
+  // @sect3{Equation data}
+  //
+  // First, we define a class describing the inhomogeneous boundary
+  // data. Since only its values are used, we implement value_list(), but
+  // leave all other functions of Function undefined.
+  template <int dim>
+  class BoundaryValues:  public Function<dim>
+  {
+  public:
+    BoundaryValues () {};
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double> &values,
+                             const unsigned int component=0) const;
+  };
+
+  // Given the flow direction, the inflow boundary of the unit square
+  // $[0,1]^2$ are the right and the lower boundaries. We prescribe
+  // discontinuous boundary values 1 and 0 on the x-axis and value 0 on the
+  // right boundary. The values of this function on the outflow boundaries
+  // will not be used within the DG scheme.
+  template <int dim>
+  void BoundaryValues<dim>::value_list(const std::vector<Point<dim> > &points,
+                                       std::vector<double> &values,
+                                       const unsigned int) const
+  {
+    Assert(values.size()==points.size(),
+           ExcDimensionMismatch(values.size(),points.size()));
+
+    for (unsigned int i=0; i<values.size(); ++i)
+      {
+        if (points[i](0)<0.5)
+          values[i]=1.;
+        else
+          values[i]=0.;
+      }
+  }
+  // @sect3{The AdvectionProblem class}
+  //
+  // After this preparations, we proceed with the main class of this program,
+  // called AdvectionProblem. It is basically the main class of step-6. We do
+  // not have a ConstraintMatrix, because there are no hanging node
+  // constraints in DG discretizations.
+
+  // Major differences will only come up in the implementation of the assemble
+  // functions, since here, we not only need to cover the flux integrals over
+  // faces, we also use the MeshWorker interface to simplify the loops
+  // involved.
+  template <int dim>
+  class AdvectionProblem
+  {
+  public:
+    AdvectionProblem ();
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    void solve (Vector<double> &solution);
+    void refine_grid ();
+    void output_results (const unsigned int cycle) const;
+
+    Triangulation<dim>   triangulation;
+    const MappingQ1<dim> mapping;
+
+    // Furthermore we want to use DG elements of degree 1 (but this is only
+    // specified in the constructor). If you want to use a DG method of a
+    // different degree the whole program stays the same, only replace 1 in
+    // the constructor by the desired polynomial degree.
+    FE_DGQ<dim>          fe;
+    DoFHandler<dim>      dof_handler;
+
+    // The next four members represent the linear system to be
+    // solved. <code>system_matrix</code> and <code>right_hand_side</code> are
+    // generated by <code>assemble_system()</code>, the <code>solution</code>
+    // is computed in <code>solve()</code>. The <code>sparsity_pattern</code>
+    // is used to determine the location of nonzero elements in
+    // <code>system_matrix</code>.
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+
+    Vector<double>       solution;
+    Vector<double>       right_hand_side;
+
+    // Finally, we have to provide functions that assemble the cell, boundary,
+    // and inner face terms. Within the MeshWorker framework, the loop over
+    // all cells and much of the setup of operations will be done outside this
+    // class, so all we have to provide are these three operations. They will
+    // then work on intermediate objects for which first, we here define
+    // typedefs to the info objects handed to the local integration functions
+    // in order to make our life easier below.
+    typedef MeshWorker::DoFInfo<dim> DoFInfo;
+    typedef MeshWorker::IntegrationInfo<dim> CellInfo;
+
+    // The following three functions are then the ones that get called inside
+    // the generic loop over all cells and faces. They are the ones doing the
+    // actual integration.
+    //
+    // In our code below, these functions do not access member variables of
+    // the current class, so we can mark them as <code>static</code> and
+    // simply pass pointers to these functions to the MeshWorker
+    // framework. If, however, these functions would want to access member
+    // variables (or needed additional arguments beyond the ones specified
+    // below), we could use the facilities of boost::bind (or std::bind,
+    // respectively) to provide the MeshWorker framework with objects that act
+    // as if they had the required number and types of arguments, but have in
+    // fact other arguments already bound.
+    static void integrate_cell_term (DoFInfo &dinfo,
+                                     CellInfo &info);
+    static void integrate_boundary_term (DoFInfo &dinfo,
+                                         CellInfo &info);
+    static void integrate_face_term (DoFInfo &dinfo1,
+                                     DoFInfo &dinfo2,
+                                     CellInfo &info1,
+                                     CellInfo &info2);
+  };
+
+
+  // We start with the constructor. The 1 in the constructor call of
+  // <code>fe</code> is the polynomial degree.
+  template <int dim>
+  AdvectionProblem<dim>::AdvectionProblem ()
+    :
+    mapping (),
+    fe (1),
+    dof_handler (triangulation)
+  {}
+
+
+  template <int dim>
+  void AdvectionProblem<dim>::setup_system ()
+  {
+    // In the function that sets up the usual finite element data structures,
+    // we first need to distribute the DoFs.
+    dof_handler.distribute_dofs (fe);
+
+    // We start by generating the sparsity pattern. To this end, we first fill
+    // an intermediate object of type DynamicSparsityPattern with the
+    // couplings appearing in the system. After building the pattern, this
+    // object is copied to <code>sparsity_pattern</code> and can be discarded.
+
+    // To build the sparsity pattern for DG discretizations, we can call the
+    // function analogue to DoFTools::make_sparsity_pattern, which is called
+    // DoFTools::make_flux_sparsity_pattern:
+    DynamicSparsityPattern dsp(dof_handler.n_dofs());
+    DoFTools::make_flux_sparsity_pattern (dof_handler, dsp);
+    sparsity_pattern.copy_from(dsp);
+
+    // Finally, we set up the structure of all components of the linear
+    // system.
+    system_matrix.reinit (sparsity_pattern);
+    solution.reinit (dof_handler.n_dofs());
+    right_hand_side.reinit (dof_handler.n_dofs());
+  }
+
+  // @sect4{The assemble_system function}
+
+  // Here we see the major difference to assembling by hand. Instead of
+  // writing loops over cells and faces, we leave all this to the MeshWorker
+  // framework. In order to do so, we just have to define local integration
+  // functions and use one of the classes in namespace MeshWorker::Assembler
+  // to build the global system.
+  template <int dim>
+  void AdvectionProblem<dim>::assemble_system ()
+  {
+    // This is the magic object, which knows everything about the data
+    // structures and local integration.  This is the object doing the work in
+    // the function MeshWorker::loop(), which is implicitly called by
+    // MeshWorker::integration_loop() below. After the functions to which we
+    // provide pointers did the local integration, the
+    // MeshWorker::Assembler::SystemSimple object distributes these into the
+    // global sparse matrix and the right hand side vector.
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+
+    // First, we initialize the quadrature formulae and the update flags in
+    // the worker base class. For quadrature, we play safe and use a QGauss
+    // formula with number of points one higher than the polynomial degree
+    // used. Since the quadratures for cells, boundary and interior faces can
+    // be selected independently, we have to hand over this value three times.
+    const unsigned int n_gauss_points = dof_handler.get_fe().degree+1;
+    info_box.initialize_gauss_quadrature(n_gauss_points,
+                                         n_gauss_points,
+                                         n_gauss_points);
+
+    // These are the types of values we need for integrating our system. They
+    // are added to the flags used on cells, boundary and interior faces, as
+    // well as interior neighbor faces, which is forced by the four @p true
+    // values.
+    info_box.initialize_update_flags();
+    UpdateFlags update_flags = update_quadrature_points |
+                               update_values            |
+                               update_gradients;
+    info_box.add_update_flags(update_flags, true, true, true, true);
+
+    // After preparing all data in <tt>info_box</tt>, we initialize the
+    // FEValues objects in there.
+    info_box.initialize(fe, mapping);
+
+    // The object created so far helps us do the local integration on each
+    // cell and face. Now, we need an object which receives the integrated
+    // (local) data and forwards them to the assembler.
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    // Now, we have to create the assembler object and tell it, where to put
+    // the local data. These will be our system matrix and the right hand
+    // side.
+    MeshWorker::Assembler::SystemSimple<SparseMatrix<double>, Vector<double> >
+    assembler;
+    assembler.initialize(system_matrix, right_hand_side);
+
+    // Finally, the integration loop over all active cells (determined by the
+    // first argument, which is an active iterator).
+    //
+    // As noted in the discussion when declaring the local integration
+    // functions in the class declaration, the arguments expected by the
+    // assembling integrator class are not actually function pointers. Rather,
+    // they are objects that can be called like functions with a certain
+    // number of arguments. Consequently, we could also pass objects with
+    // appropriate operator() implementations here, or the result of std::bind
+    // if the local integrators were, for example, non-static member
+    // functions.
+    MeshWorker::loop<dim, dim, MeshWorker::DoFInfo<dim>, MeshWorker::IntegrationInfoBox<dim> >
+    (dof_handler.begin_active(), dof_handler.end(),
+     dof_info, info_box,
+     &AdvectionProblem<dim>::integrate_cell_term,
+     &AdvectionProblem<dim>::integrate_boundary_term,
+     &AdvectionProblem<dim>::integrate_face_term,
+     assembler);
+  }
+
+
+  // @sect4{The local integrators}
+
+  // These are the functions given to the MeshWorker::integration_loop()
+  // called just above. They compute the local contributions to the system
+  // matrix and right hand side on cells and faces.
+  template <int dim>
+  void AdvectionProblem<dim>::integrate_cell_term (DoFInfo &dinfo,
+                                                   CellInfo &info)
+  {
+    // First, let us retrieve some of the objects used here from @p info. Note
+    // that these objects can handle much more complex structures, thus the
+    // access here looks more complicated than might seem necessary.
+    const FEValuesBase<dim> &fe_v = info.fe_values();
+    FullMatrix<double> &local_matrix = dinfo.matrix(0).matrix;
+    const std::vector<double> &JxW = fe_v.get_JxW_values ();
+
+    // With these objects, we continue local integration like always. First,
+    // we loop over the quadrature points and compute the advection vector in
+    // the current point.
+    for (unsigned int point=0; point<fe_v.n_quadrature_points; ++point)
+      {
+        Point<dim> beta;
+        beta(0) = -fe_v.quadrature_point(point)(1);
+        beta(1) = fe_v.quadrature_point(point)(0);
+        beta /= beta.norm();
+
+        // We solve a homogeneous equation, thus no right hand side shows up
+        // in the cell term.  What's left is integrating the matrix entries.
+        for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+          for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+            local_matrix(i,j) -= beta*fe_v.shape_grad(i,point)*
+                                 fe_v.shape_value(j,point) *
+                                 JxW[point];
+      }
+  }
+
+  // Now the same for the boundary terms. Note that now we use FEValuesBase,
+  // the base class for both FEFaceValues and FESubfaceValues, in order to get
+  // access to normal vectors.
+  template <int dim>
+  void AdvectionProblem<dim>::integrate_boundary_term (DoFInfo &dinfo,
+                                                       CellInfo &info)
+  {
+    const FEValuesBase<dim> &fe_v = info.fe_values();
+    FullMatrix<double> &local_matrix = dinfo.matrix(0).matrix;
+    Vector<double> &local_vector = dinfo.vector(0).block(0);
+
+    const std::vector<double> &JxW = fe_v.get_JxW_values ();
+    const std::vector<Tensor<1,dim> > &normals = fe_v.get_all_normal_vectors ();
+
+    std::vector<double> g(fe_v.n_quadrature_points);
+
+    static BoundaryValues<dim> boundary_function;
+    boundary_function.value_list (fe_v.get_quadrature_points(), g);
+
+    for (unsigned int point=0; point<fe_v.n_quadrature_points; ++point)
+      {
+        Point<dim> beta;
+        beta(0) = -fe_v.quadrature_point(point)(1);
+        beta(1) = fe_v.quadrature_point(point)(0);
+        beta /= beta.norm();
+
+        const double beta_n=beta * normals[point];
+        if (beta_n>0)
+          for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+            for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+              local_matrix(i,j) += beta_n *
+                                   fe_v.shape_value(j,point) *
+                                   fe_v.shape_value(i,point) *
+                                   JxW[point];
+        else
+          for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+            local_vector(i) -= beta_n *
+                               g[point] *
+                               fe_v.shape_value(i,point) *
+                               JxW[point];
+      }
+  }
+
+  // Finally, the interior face terms. The difference here is that we receive
+  // two info objects, one for each cell adjacent to the face and we assemble
+  // four matrices, one for each cell and two for coupling back and forth.
+  template <int dim>
+  void AdvectionProblem<dim>::integrate_face_term (DoFInfo &dinfo1,
+                                                   DoFInfo &dinfo2,
+                                                   CellInfo &info1,
+                                                   CellInfo &info2)
+  {
+    // For quadrature points, weights, etc., we use the FEValuesBase object of
+    // the first argument.
+    const FEValuesBase<dim> &fe_v = info1.fe_values();
+
+    // For additional shape functions, we have to ask the neighbors
+    // FEValuesBase.
+    const FEValuesBase<dim> &fe_v_neighbor = info2.fe_values();
+
+    // Then we get references to the four local matrices. The letters u and v
+    // refer to trial and test functions, respectively. The %numbers indicate
+    // the cells provided by info1 and info2. By convention, the two matrices
+    // in each info object refer to the test functions on the respective
+    // cell. The first matrix contains the interior couplings of that cell,
+    // while the second contains the couplings between cells.
+    FullMatrix<double> &u1_v1_matrix = dinfo1.matrix(0,false).matrix;
+    FullMatrix<double> &u2_v1_matrix = dinfo1.matrix(0,true).matrix;
+    FullMatrix<double> &u1_v2_matrix = dinfo2.matrix(0,true).matrix;
+    FullMatrix<double> &u2_v2_matrix = dinfo2.matrix(0,false).matrix;
+
+    // Here, following the previous functions, we would have the local right
+    // hand side vectors. Fortunately, the interface terms only involve the
+    // solution and the right hand side does not receive any contributions.
+
+    const std::vector<double> &JxW = fe_v.get_JxW_values ();
+    const std::vector<Tensor<1,dim> > &normals = fe_v.get_all_normal_vectors ();
+
+    for (unsigned int point=0; point<fe_v.n_quadrature_points; ++point)
+      {
+        Point<dim> beta;
+        beta(0) = -fe_v.quadrature_point(point)(1);
+        beta(1) = fe_v.quadrature_point(point)(0);
+        beta /= beta.norm();
+
+        const double beta_n=beta * normals[point];
+        if (beta_n>0)
+          {
+            // This term we've already seen:
+            for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+              for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+                u1_v1_matrix(i,j) += beta_n *
+                                     fe_v.shape_value(j,point) *
+                                     fe_v.shape_value(i,point) *
+                                     JxW[point];
+
+            // We additionally assemble the term $(\beta\cdot n u,\hat
+            // v)_{\partial \kappa_+}$,
+            for (unsigned int k=0; k<fe_v_neighbor.dofs_per_cell; ++k)
+              for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+                u1_v2_matrix(k,j) -= beta_n *
+                                     fe_v.shape_value(j,point) *
+                                     fe_v_neighbor.shape_value(k,point) *
+                                     JxW[point];
+          }
+        else
+          {
+            // This one we've already seen, too:
+            for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+              for (unsigned int l=0; l<fe_v_neighbor.dofs_per_cell; ++l)
+                u2_v1_matrix(i,l) += beta_n *
+                                     fe_v_neighbor.shape_value(l,point) *
+                                     fe_v.shape_value(i,point) *
+                                     JxW[point];
+
+            // And this is another new one: $(\beta\cdot n \hat u,\hat
+            // v)_{\partial \kappa_-}$:
+            for (unsigned int k=0; k<fe_v_neighbor.dofs_per_cell; ++k)
+              for (unsigned int l=0; l<fe_v_neighbor.dofs_per_cell; ++l)
+                u2_v2_matrix(k,l) -= beta_n *
+                                     fe_v_neighbor.shape_value(l,point) *
+                                     fe_v_neighbor.shape_value(k,point) *
+                                     JxW[point];
+          }
+      }
+  }
+
+
+  // @sect3{All the rest}
+  //
+  // For this simple problem we use the simplest possible solver, called
+  // Richardson iteration, that represents a simple defect correction. This,
+  // in combination with a block SSOR preconditioner, that uses the special
+  // block matrix structure of system matrices arising from DG
+  // discretizations. The size of these blocks are the number of DoFs per
+  // cell. Here, we use a SSOR preconditioning as we have not renumbered the
+  // DoFs according to the flow field. If the DoFs are renumbered in the
+  // downstream direction of the flow, then a block Gauss-Seidel
+  // preconditioner (see the PreconditionBlockSOR class with relaxation=1)
+  // does a much better job.
+  template <int dim>
+  void AdvectionProblem<dim>::solve (Vector<double> &solution)
+  {
+    SolverControl           solver_control (1000, 1e-12);
+    SolverRichardson<>      solver (solver_control);
+
+    // Here we create the preconditioner,
+    PreconditionBlockSSOR<SparseMatrix<double> > preconditioner;
+
+    // then assign the matrix to it and set the right block size:
+    preconditioner.initialize(system_matrix, fe.dofs_per_cell);
+
+    // After these preparations we are ready to start the linear solver.
+    solver.solve (system_matrix, solution, right_hand_side,
+                  preconditioner);
+  }
+
+
+  // We refine the grid according to a very simple refinement criterion,
+  // namely an approximation to the gradient of the solution. As here we
+  // consider the DG(1) method (i.e. we use piecewise bilinear shape
+  // functions) we could simply compute the gradients on each cell. But we do
+  // not want to base our refinement indicator on the gradients on each cell
+  // only, but want to base them also on jumps of the discontinuous solution
+  // function over faces between neighboring cells. The simplest way of doing
+  // that is to compute approximative gradients by difference quotients
+  // including the cell under consideration and its neighbors. This is done by
+  // the <code>DerivativeApproximation</code> class that computes the
+  // approximate gradients in a way similar to the
+  // <code>GradientEstimation</code> described in step-9 of this tutorial. In
+  // fact, the <code>DerivativeApproximation</code> class was developed
+  // following the <code>GradientEstimation</code> class of step-9. Relating
+  // to the discussion in step-9, here we consider $h^{1+d/2}|\nabla_h
+  // u_h|$. Furthermore we note that we do not consider approximate second
+  // derivatives because solutions to the linear advection equation are in
+  // general not in $H^2$ but in $H^1$ (to be more precise, in $H^1_\beta$)
+  // only.
+  template <int dim>
+  void AdvectionProblem<dim>::refine_grid ()
+  {
+    // The <code>DerivativeApproximation</code> class computes the gradients
+    // to float precision. This is sufficient as they are approximate and
+    // serve as refinement indicators only.
+    Vector<float> gradient_indicator (triangulation.n_active_cells());
+
+    // Now the approximate gradients are computed
+    DerivativeApproximation::approximate_gradient (mapping,
+                                                   dof_handler,
+                                                   solution,
+                                                   gradient_indicator);
+
+    // and they are cell-wise scaled by the factor $h^{1+d/2}$
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (unsigned int cell_no=0; cell!=endc; ++cell, ++cell_no)
+      gradient_indicator(cell_no)*=std::pow(cell->diameter(), 1+1.0*dim/2);
+
+    // Finally they serve as refinement indicator.
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     gradient_indicator,
+                                                     0.3, 0.1);
+
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+  // The output of this program consists of eps-files of the adaptively
+  // refined grids and the numerical solutions given in gnuplot format. This
+  // was covered in previous examples and will not be further commented on.
+  template <int dim>
+  void AdvectionProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    // Write the grid in eps format.
+    std::string filename = "grid-";
+    filename += ('0' + cycle);
+    Assert (cycle < 10, ExcInternalError());
+
+    filename += ".eps";
+    deallog << "Writing grid to <" << filename << ">" << std::endl;
+    std::ofstream eps_output (filename.c_str());
+
+    GridOut grid_out;
+    grid_out.write_eps (triangulation, eps_output);
+
+    // Output of the solution in gnuplot format.
+    filename = "sol-";
+    filename += ('0' + cycle);
+    Assert (cycle < 10, ExcInternalError());
+
+    filename += ".gnuplot";
+    deallog << "Writing solution to <" << filename << ">" << std::endl;
+    std::ofstream gnuplot_output (filename.c_str());
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "u");
+
+    data_out.build_patches ();
+
+    data_out.write_gnuplot(gnuplot_output);
+  }
+
+
+  // The following <code>run</code> function is similar to previous examples.
+  template <int dim>
+  void AdvectionProblem<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<6; ++cycle)
+      {
+        deallog << "Cycle " << cycle << std::endl;
+
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_cube (triangulation);
+
+            triangulation.refine_global (3);
+          }
+        else
+          refine_grid ();
+
+
+        deallog << "Number of active cells:       "
+                << triangulation.n_active_cells()
+                << std::endl;
+
+        setup_system ();
+
+        deallog << "Number of degrees of freedom: "
+                << dof_handler.n_dofs()
+                << std::endl;
+
+        assemble_system ();
+        solve (solution);
+
+        output_results (cycle);
+      }
+  }
+}
+
+
+// The following <code>main</code> function is similar to previous examples as
+// well, and need not be commented on.
+int main ()
+{
+  try
+    {
+      Step12::AdvectionProblem<2> dgmethod;
+      dgmethod.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    };
+
+  return 0;
+}
diff --git a/examples/step-13/CMakeLists.txt b/examples/step-13/CMakeLists.txt
new file mode 100644
index 0000000..b66782b
--- /dev/null
+++ b/examples/step-13/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-13 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-13")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-13/doc/builds-on b/examples/step-13/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-13/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-13/doc/intro.dox b/examples/step-13/doc/intro.dox
new file mode 100644
index 0000000..0d59978
--- /dev/null
+++ b/examples/step-13/doc/intro.dox
@@ -0,0 +1,194 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+<h3>Background and purpose</h3>
+
+
+In this example program, we will not so much be concerned with
+describing new ways how to use deal.II and its facilities, but rather
+with presenting methods of writing modular and extensible finite
+element programs. The main reason for this is the size and complexity
+of modern research software: applications implementing modern error
+estimation concepts and adaptive solution methods tend to become
+rather large. For example, when this program was written in 2002, the
+three largest applications by the main
+authors of deal.II, are at the time of writing of this example
+program:
+<ol>
+<li> a program for solving conservation hyperbolic equations by the
+     Discontinuous Galerkin Finite Element method: 33,775 lines of
+     code;
+<li> a parameter estimation program: 28,980 lines of code;
+<li> a wave equation solver: 21,020 lines of code.
+</ol>
+
+(The library proper - without example programs and test suite - has slightly
+more than 150,000 lines of code as of spring 2002. It is of course several
+times larger now.) The sizes of these applications are at the edge of what
+one person, even an experienced programmer, can manage.
+
+
+
+The numbers above make one thing rather clear: monolithic programs that
+are not broken up into smaller, mostly independent pieces have no way
+of surviving, since even the author will quickly lose the overview of
+the various dependencies between different parts of a program. Only
+data encapsulation, for example using object oriented programming
+methods, and modularization by defining small but fixed interfaces can
+help structure data flow and mutual interdependencies. It is also an
+absolute prerequisite if more than one person is developing a program,
+since otherwise confusion will quickly prevail as one developer
+would need to know if another changed something about the internals of
+a different module if they were not cleanly separated.
+
+
+
+In previous examples, you have seen how the library itself is broken
+up into several complexes each building atop the underlying ones, but
+relatively independent of the other ones:
+<ol>
+<li>the triangulation class complex, with associated iterator classes;
+<li>the finite element classes;
+<li>the DoFHandler class complex, with associated iterators, built on
+    the triangulation and finite element classes;
+<li>the classes implementing mappings between unit and real cells;
+<li>the FEValues class complex, built atop the finite elements and
+    mappings.
+</ol>
+Besides these, and a large number of smaller classes, there are of
+course the following "tool" modules:
+<ol>
+<li>output in various graphical formats;
+<li>linear algebra classes.
+</ol>
+These complexes can also be found as a flow chart on the front page of
+the deal.II manual website.
+
+
+
+The goal of this program is now to give an example of how a relatively
+simple finite element program could be structured such that we end up
+with a set of modules that are as independent of each other as
+possible. This allows to change the program at one end, without having to
+worry that it might break at the other, as long as we do not touch the
+interface through which the two ends communicate. The interface in
+C++, of course, is the declaration of abstract base classes.
+
+
+
+Here, we will implement (again) a Laplace solver, although with a
+number of differences compared to previous example programs:
+<ol>
+<li>The classes that implement the process of numerically solving the
+    equation are no more responsible for driving the process of
+    "solving-estimating error-refining-solving again", but we delegate
+    this to external functions. This allows first to use it as a
+    building block in a larger context, where the solution of a
+    Laplace equation might only be one part (for example, in a
+    nonlinear problem, where Laplace equations might have to be solved
+    in each nonlinear step). It would also allow to build a framework
+    around this class that would allow using solvers for other
+    equations (but with the same external interface) instead, in case
+    some techniques shall be evaluated for different types of partial
+    differential equations.
+<li>It splits the process of evaluating the computed solution to a
+    separate set of classes. The reason is that one is usually not
+    interested in the solution of a PDE per se, but rather in certain
+    aspects of it. For example, one might wish to compute the traction
+    at a certain boundary in elastic computations, or in the signal of
+    a seismic wave at a receiver position at a given
+    location. Sometimes, one might have an interest in several of
+    these aspects. Since the evaluation of a solution is something
+    that does not usually affect the process of solution, we split it
+    off into a separate module, to allow for the development of such
+    evaluation filters independently of the development of the solver
+    classes.
+<li>Separate the classes that implement mesh refinement from the
+    classes that compute the solution.
+<li>Separate the description of the test case with which we will
+    present the program, from the rest of the program.
+<li>Parallelize the assembly of linear systems using the WorkStream
+    facilities. This follows the extensive description that can be
+    found in the @ref threads "Parallel computing with multiple processors accessing shared memory"
+    documentation module. The implementation essentially follows what
+    has already been described in step-9.
+</ol>
+
+
+
+The things the program does are not new. In fact, this is more like a
+melange of previous programs, cannibalizing various parts and
+functions from earlier examples. It is the way they are arranged in
+this program that should be the focus of the reader, i.e. the software
+design techniques used in the program to achieve the goal of
+implementing the desired mathematical method. However, we must
+stress that software design is in part also a subjective matter:
+different persons have different programming backgrounds and have
+different opinions about the "right" style of programming; this
+program therefore expresses only what the author considers useful
+practice, and is not necessarily a style that you have to adopt in
+order to write successful numerical software if you feel uncomfortable
+with the chosen ways. It should serve as a case study, however,
+inspiring the reader with ideas to the desired end.
+
+
+
+Once you have worked through the program, you will remark that it is
+already somewhat complex in its structure. Nevertheless, it
+only has about 850 lines of code, without comments. In real
+applications, there would of course be comments and class
+documentation, which would bring that to maybe 1200 lines. Yet, compared to
+the applications listed above, this is still small, as they are 20 to
+25 times as large. For programs as large, a proper design right from
+the start is thus indispensable. Otherwise, it will have to be
+redesigned at one point in its life, once it becomes too large to be
+manageable.
+
+
+
+Despite of this, all three programs listed above have undergone major
+revisions, or even rewrites. The wave program, for example, was once
+entirely teared to parts when it was still significantly smaller, just
+to assemble it again in a more modular form. By that time, it had
+become impossible to add functionality without affecting older parts
+of the code (the main problem with the code was the data flow: in time
+dependent application, the major concern is when to store data to disk
+and when to reload it again; if this is not done in an organized
+fashion, then you end up with data released too early, loaded too
+late, or not released at all). Although the present example program
+thus draws from several years of experience, it is certainly not
+without flaws in its design, and in particular might not be suited for
+an application where the objective is different. It should serve as an
+inspiration for writing your own application in a modular way, to
+avoid the pitfalls of too closely coupled codes.
+
+
+
+<h3>What the program does</h3>
+
+
+What the program actually does is not even the main point of this
+program, the structure of the program is more important. However, in a
+few words, a description would be: solve the Laplace equation for a
+given right hand side such that the solution is the function
+$u(x,t)=\exp(x+\sin(10y+5x^2))$. The goal of the
+computation is to get the value of the solution at the point
+$x_0=(0.5,0.5)$, and to compare the accuracy with
+which we resolve this value for two refinement criteria, namely global
+refinement and refinement by the error indicator by Kelly et al. which
+we have already used in previous examples.
+
+
+
+The results will, as usual, be discussed in the respective section of
+this document. In doing so, we will find a slightly irritating
+observation about the relative performance of the two refinement
+criteria. In a later example program, building atop this one, we will
+devise a different method that should hopefully perform better than
+the techniques discussed here.
+
+
+
+So much now for all the theoretical and anecdotal background. The best
+way of learning about a program is to look at it, so here it is:
+
diff --git a/examples/step-13/doc/kind b/examples/step-13/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-13/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-13/doc/results.dox b/examples/step-13/doc/results.dox
new file mode 100644
index 0000000..228457f
--- /dev/null
+++ b/examples/step-13/doc/results.dox
@@ -0,0 +1,189 @@
+<h1>Results</h1>
+
+
+
+The results of this program are not that interesting - after all
+its purpose was not to demonstrate some new mathematical idea, and
+also not how to program with deal.II, but rather to use the material
+which we have developed in the previous examples to form something
+which demonstrates a way to build modern finite element software in a
+modular and extensible way.
+
+
+
+Nevertheless, we of course show the results of the program. Of
+foremost interest is the point value computation, for which we had
+implemented the corresponding evaluation class. The results (i.e. the
+output) of the program looks as follows:
+ at code
+Running tests with "global" refinement criterion:
+-------------------------------------------------
+Refinement cycle: 0 1 2 3 4 5 6
+DoFs  u(x_0)
+   25 1.2868
+   81 1.6945
+  289 1.4658
+ 1089 1.5679
+ 4225 1.5882
+16641 1.5932
+66049 1.5945
+
+Running tests with "kelly" refinement criterion:
+------------------------------------------------
+Refinement cycle: 0 1 2 3 4 5 6 7 8 9 10 11
+DoFs  u(x_0)
+   25 1.2868
+   47 0.8775
+   89 1.5365
+  165 1.2974
+  316 1.6442
+  589 1.5221
+ 1093 1.5724
+ 2042 1.5627
+ 3766 1.5916
+ 7124 1.5876
+13111 1.5942
+24838 1.5932
+ at endcode
+
+
+What surprises here is that the exact value is 1.59491554..., and that
+it is apparently surprisingly complicated to compute the solution even to
+only one per cent accuracy, although the solution is smooth (in fact
+infinitely often differentiable). This smoothness is shown in the
+graphical output generated by the program, here coarse grid and the
+first 9 refinement steps of the Kelly refinement indicator:
+
+
+<table width="80%" align="center">
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-0.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-1.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-2.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-3.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-4.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-5.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-6.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-7.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-8.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-13.solution-kelly-9.png" alt="">
+    </td>
+  </tr>
+</table>
+
+
+While we're already at watching pictures, this is the eighth grid, as
+viewed from top:
+
+
+<img src="http://www.dealii.org/images/steps/developer/step-13.grid-kelly-8.png" alt="">
+
+
+However, we are not yet finished with evaluation the point value
+computation. In fact, plotting the error
+$e=|u(x_0)-u_h(x_0)|$ for the two
+refinement criteria yields the following picture:
+
+
+<img src="http://www.dealii.org/images/steps/developer/step-13.error.png" alt="">
+
+
+
+
+What <em>is</em> disturbing about this picture is that not only is the
+adaptive mesh refinement not better than global refinement as one
+would usually expect, it is even significantly worse since its
+convergence is irregular, preventing all extrapolation techniques when
+using the values of subsequent meshes! On the other hand, global
+refinement provides a perfect $1/N$ or $h^{-2}$
+convergence history and provides every opportunity to even improve on
+the point values by extrapolation. Global mesh refinement must
+therefore be considered superior in this example! This is even more
+surprising as the evaluation point is not somewhere in the left part
+where the mesh is coarse, but rather to the right and the adaptive
+refinement should refine the mesh around the evaluation point as well.
+
+
+
+We thus close the discussion of this example program with a question:
+
+<p align="center">
+  <strong><em>What is wrong with adaptivity if it is not better than
+  global refinement?</em></strong>
+
+
+
+
+<em>Exercise at the end of this example:</em> There is a simple reason
+for the bad and irregular behavior of the adapted mesh solutions. It
+is simple to find out by looking at the mesh around the evaluation
+point in each of the steps - the data for this is in the output files
+of the program. An exercise would therefore be to modify the mesh
+refinement routine such that the problem (once you remark it) is
+avoided. The second exercise is to check whether the results are then
+better than global refinement, and if so if even a better order of
+convergence (in terms of the number of degrees of freedom) is
+achieved, or only by a better constant.
+
+
+
+(<em>Very brief answers for the impatient:</em> at steps with larger
+errors, the mesh is not regular at the point of evaluation, i.e. some
+of the adjacent cells have hanging nodes; this destroys some
+superapproximation effects of which the globally refined mesh can
+profit. Answer 2: this quick hack
+ at code
+    bool refinement_indicated = false;
+    typename Triangulation<dim>::active_cell_iterator cell;
+    for (cell=triangulation->begin_active();
+	 cell!=triangulation->end(); ++cell)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+	if (cell->vertex(v) == Point<dim>(.5,.5))
+	  {
+	    cell->clear_coarsen_flag();
+	    refinement_indicated |= cell->refine_flag_set();
+	  }
+    if (refinement_indicated)
+      for (cell=triangulation->begin_active();
+	   cell!=triangulation->end(); ++cell)
+	for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+	  if (cell->vertex(v) == Point<dim>(.5,.5))
+	    cell->set_refine_flag ();
+ at endcode
+in the refinement function of the Kelly refinement class right before
+executing refinement would improve the results (exercise: what does
+the code do?), making them consistently better than global
+refinement. Behavior is still irregular, though, so no results about
+an order of convergence are possible.)
+
diff --git a/examples/step-13/doc/tooltip b/examples/step-13/doc/tooltip
new file mode 100644
index 0000000..41364e2
--- /dev/null
+++ b/examples/step-13/doc/tooltip
@@ -0,0 +1 @@
+Modularity. Software design.
diff --git a/examples/step-13/step-13.cc b/examples/step-13/step-13.cc
new file mode 100644
index 0000000..ce256ea
--- /dev/null
+++ b/examples/step-13/step-13.cc
@@ -0,0 +1,1552 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2001 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 2001, 2002
+ */
+
+
+// As in all programs, we start with a list of include files from the library,
+// and as usual they are in the standard order which is <code>base</code> --
+// <code>lac</code> -- <code>grid</code> -- <code>dofs</code> --
+// <code>fe</code> -- <code>numerics</code> (as each of these categories
+// roughly builds upon previous ones), then C++ standard headers:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/table_handler.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// Now for the C++ standard headers:
+#include <iostream>
+#include <fstream>
+#include <list>
+#include <sstream>
+
+// The last step is as in all previous programs:
+namespace Step13
+{
+  using namespace dealii;
+
+  // @sect3{Evaluation of the solution}
+
+  // As for the program itself, we first define classes that evaluate the
+  // solutions of a Laplace equation. In fact, they can evaluate every kind of
+  // solution, as long as it is described by a <code>DoFHandler</code> object,
+  // and a solution vector. We define them here first, even before the classes
+  // that actually generate the solution to be evaluated, since we need to
+  // declare an abstract base class that the solver classes can refer to.
+  //
+  // From an abstract point of view, we declare a pure base class that
+  // provides an evaluation operator() which will do the evaluation of the
+  // solution (whatever derived classes might consider an
+  // <code>evaluation</code>). Since this is the only real function of this
+  // base class (except for some bookkeeping machinery), one usually terms
+  // such a class that only has an <code>operator()</code> a
+  // <code>functor</code> in C++ terminology, since it is used just like a
+  // function object.
+  //
+  // Objects of this functor type will then later be passed to the solver
+  // object, which applies it to the solution just computed. The evaluation
+  // objects may then extract any quantity they like from the solution. The
+  // advantage of putting these evaluation functions into a separate hierarchy
+  // of classes is that by design they cannot use the internals of the solver
+  // object and are therefore independent of changes to the way the solver
+  // works. Furthermore, it is trivial to write another evaluation class
+  // without modifying the solver class, which speeds up programming (not
+  // being able to use internals of another class also means that you do not
+  // have to worry about them -- programming evaluators is usually a rather
+  // quickly done task), as well as compilation (if solver and evaluation
+  // classes are put into different files: the solver only needs to see the
+  // declaration of the abstract base class, and therefore does not need to be
+  // recompiled upon addition of a new evaluation class, or modification of an
+  // old one).  On a related note, you can reuse the evaluation classes for
+  // other projects, solving different equations.
+  //
+  // In order to improve separation of code into different modules, we put the
+  // evaluation classes into a namespace of their own. This makes it easier to
+  // actually solve different equations in the same program, by assembling it
+  // from existing building blocks. The reason for this is that classes for
+  // similar purposes tend to have the same name, although they were developed
+  // in different contexts. In order to be able to use them together in one
+  // program, it is necessary that they are placed in different
+  // namespaces. This we do here:
+  namespace Evaluation
+  {
+
+    // Now for the abstract base class of evaluation classes: its main purpose
+    // is to declare a pure virtual function <code>operator()</code> taking a
+    // <code>DoFHandler</code> object, and the solution vector. In order to be
+    // able to use pointers to this base class only, it also has to declare a
+    // virtual destructor, which however does nothing. Besides this, it only
+    // provides for a little bit of bookkeeping: since we usually want to
+    // evaluate solutions on subsequent refinement levels, we store the number
+    // of the present refinement cycle, and provide a function to change this
+    // number.
+    template <int dim>
+    class EvaluationBase
+    {
+    public:
+      virtual ~EvaluationBase ();
+
+      void set_refinement_cycle (const unsigned int refinement_cycle);
+
+      virtual void operator () (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution) const = 0;
+    protected:
+      unsigned int refinement_cycle;
+    };
+
+
+    // After the declaration has been discussed above, the implementation is
+    // rather straightforward:
+    template <int dim>
+    EvaluationBase<dim>::~EvaluationBase ()
+    {}
+
+
+
+    template <int dim>
+    void
+    EvaluationBase<dim>::set_refinement_cycle (const unsigned int step)
+    {
+      refinement_cycle = step;
+    }
+
+
+    // @sect4{%Point evaluation}
+
+    // The next thing is to implement actual evaluation classes. As noted in
+    // the introduction, we'd like to extract a point value from the solution,
+    // so the first class does this in its <code>operator()</code>. The actual
+    // point is given to this class through the constructor, as well as a
+    // table object into which it will put its findings.
+    //
+    // Finding out the value of a finite element field at an arbitrary point
+    // is rather difficult, if we cannot rely on knowing the actual finite
+    // element used, since then we cannot, for example, interpolate between
+    // nodes. For simplicity, we therefore assume here that the point at which
+    // we want to evaluate the field is actually a node. If, in the process of
+    // evaluating the solution, we find that we did not encounter this point
+    // upon looping over all vertices, we then have to throw an exception in
+    // order to signal to the calling functions that something has gone wrong,
+    // rather than silently ignore this error.
+    //
+    // In the step-9 example program, we have already seen how such an
+    // exception class can be declared, using the <code>DeclExceptionN</code>
+    // macros. We use this mechanism here again.
+    //
+    // From this, the actual declaration of this class should be evident. Note
+    // that of course even if we do not list a destructor explicitly, an
+    // implicit destructor is generated from the compiler, and it is virtual
+    // just as the one of the base class.
+    template <int dim>
+    class PointValueEvaluation : public EvaluationBase<dim>
+    {
+    public:
+      PointValueEvaluation (const Point<dim>   &evaluation_point,
+                            TableHandler       &results_table);
+
+      virtual void operator () (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution) const;
+
+      DeclException1 (ExcEvaluationPointNotFound,
+                      Point<dim>,
+                      << "The evaluation point " << arg1
+                      << " was not found among the vertices of the present grid.");
+    private:
+      const Point<dim>  evaluation_point;
+      TableHandler     &results_table;
+    };
+
+
+    // As for the definition, the constructor is trivial, just taking data and
+    // storing it in object-local ones:
+    template <int dim>
+    PointValueEvaluation<dim>::
+    PointValueEvaluation (const Point<dim>   &evaluation_point,
+                          TableHandler       &results_table)
+      :
+      evaluation_point (evaluation_point),
+      results_table (results_table)
+    {}
+
+
+
+    // Now for the function that is mainly of interest in this class, the
+    // computation of the point value:
+    template <int dim>
+    void
+    PointValueEvaluation<dim>::
+    operator () (const DoFHandler<dim> &dof_handler,
+                 const Vector<double>  &solution) const
+    {
+      // First allocate a variable that will hold the point value. Initialize
+      // it with a value that is clearly bogus, so that if we fail to set it
+      // to a reasonable value, we will note at once. This may not be
+      // necessary in a function as small as this one, since we can easily see
+      // all possible paths of execution here, but it proved to be helpful for
+      // more complex cases, and so we employ this strategy here as well.
+      double point_value = 1e20;
+
+      // Then loop over all cells and all their vertices, and check whether a
+      // vertex matches the evaluation point. If this is the case, then
+      // extract the point value, set a flag that we have found the point of
+      // interest, and exit the loop.
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      bool evaluation_point_found = false;
+      for (; (cell!=endc) && !evaluation_point_found; ++cell)
+        for (unsigned int vertex=0;
+             vertex<GeometryInfo<dim>::vertices_per_cell;
+             ++vertex)
+          if (cell->vertex(vertex) == evaluation_point)
+            {
+              // In order to extract the point value from the global solution
+              // vector, pick that component that belongs to the vertex of
+              // interest, and, in case the solution is vector-valued, take
+              // the first component of it:
+              point_value = solution(cell->vertex_dof_index(vertex,0));
+              // Note that by this we have made an assumption that is not
+              // valid always and should be documented in the class
+              // declaration if this were code for a real application rather
+              // than a tutorial program: we assume that the finite element
+              // used for the solution we try to evaluate actually has degrees
+              // of freedom associated with vertices. This, for example, does
+              // not hold for discontinuous elements, were the support points
+              // for the shape functions happen to be located at the vertices,
+              // but are not associated with the vertices but rather with the
+              // cell interior, since association with vertices would imply
+              // continuity there. It would also not hold for edge oriented
+              // elements, and the like.
+              //
+              // Ideally, we would check this at the beginning of the
+              // function, for example by a statement like <code>Assert
+              // (dof_handler.get_fe().dofs_per_vertex @> 0,
+              // ExcNotImplemented())</code>, which should make it quite clear
+              // what is going wrong when the exception is triggered. In this
+              // case, we omit it (which is indeed bad style), but knowing
+              // that that does not hurt here, since the statement
+              // <code>cell-@>vertex_dof_index(vertex,0)</code> would fail if
+              // we asked it to give us the DoF index of a vertex if there
+              // were none.
+              //
+              // We stress again that this restriction on the allowed finite
+              // elements should be stated in the class documentation.
+
+              // Since we found the right point, we now set the respective
+              // flag and exit the innermost loop. The outer loop will the
+              // also be terminated due to the set flag.
+              evaluation_point_found = true;
+              break;
+            };
+
+      // Finally, we'd like to make sure that we have indeed found the
+      // evaluation point, since if that were not so we could not give a
+      // reasonable value of the solution there and the rest of the
+      // computations were useless anyway. So make sure through the
+      // <code>AssertThrow</code> macro already used in the step-9 program
+      // that we have indeed found this point. If this is not so, the macro
+      // throws an exception of the type that is given to it as second
+      // argument, but compared to a straightforward <code>throw</code>
+      // statement, it fills the exception object with a set of additional
+      // information, for example the source file and line number where the
+      // exception was generated, and the condition that failed. If you have a
+      // <code>catch</code> clause in your main function (as this program
+      // has), you will catch all exceptions that are not caught somewhere in
+      // between and thus already handled, and this additional information
+      // will help you find out what happened and where it went wrong.
+      AssertThrow (evaluation_point_found,
+                   ExcEvaluationPointNotFound(evaluation_point));
+      // Note that we have used the <code>Assert</code> macro in other example
+      // programs as well. It differed from the <code>AssertThrow</code> macro
+      // used here in that it simply aborts the program, rather than throwing
+      // an exception, and that it did so only in debug mode. It was the right
+      // macro to use to check about the size of vectors passed as arguments
+      // to functions, and the like.
+      //
+      // However, here the situation is different: whether we find the
+      // evaluation point or not may change from refinement to refinement (for
+      // example, if the four cells around point are coarsened away, then the
+      // point may vanish after refinement and coarsening). This is something
+      // that cannot be predicted from a few number of runs of the program in
+      // debug mode, but should be checked always, also in production
+      // runs. Thus the use of the <code>AssertThrow</code> macro here.
+
+      // Now, if we are sure that we have found the evaluation point, we can
+      // add the results into the table of results:
+      results_table.add_value ("DoFs", dof_handler.n_dofs());
+      results_table.add_value ("u(x_0)", point_value);
+    }
+
+
+
+
+    // @sect4{Generating output}
+
+    // A different, maybe slightly odd kind of <code>evaluation</code> of a
+    // solution is to output it to a file in a graphical format. Since in the
+    // evaluation functions we are given a <code>DoFHandler</code> object and
+    // the solution vector, we have all we need to do this, so we can do it in
+    // an evaluation class. The reason for actually doing so instead of
+    // putting it into the class that computed the solution is that this way
+    // we have more flexibility: if we choose to only output certain aspects
+    // of it, or not output it at all. In any case, we do not need to modify
+    // the solver class, we just have to modify one of the modules out of
+    // which we build this program. This form of encapsulation, as above,
+    // helps us to keep each part of the program rather simple as the
+    // interfaces are kept simple, and no access to hidden data is possible.
+    //
+    // Since this class which generates the output is derived from the common
+    // <code>EvaluationBase</code> base class, its main interface is the
+    // <code>operator()</code> function. Furthermore, it has a constructor
+    // taking a string that will be used as the base part of the file name to
+    // which output will be sent (we will augment it by a number indicating
+    // the number of the refinement cycle -- the base class has this
+    // information at hand --, and a suffix), and the constructor also takes a
+    // value that indicates which format is requested, i.e. for which graphics
+    // program we shall generate output (from this we will then also generate
+    // the suffix of the filename to which we write).
+    //
+    // Regarding the output format, the DataOutBase namespace
+    // provides an enumeration field
+    // DataOutBase::OutputFormat which lists names for all supported output
+    // formats. At the time of writing of this program, the supported graphics
+    // formats are represented by the enum values <code>ucd</code>,
+    // <code>gnuplot</code>, <code>povray</code>, <code>eps</code>,
+    // <code>gmv</code>, <code>tecplot</code>, <code>tecplot_binary</code>,
+    // <code>dx</code>, <code>vtk</code>, etc, but this list will certainly
+    // grow over time. Now, within various functions of that base class, you
+    // can use values of this type to get information about these graphics
+    // formats (for example the default suffix used for files of each format),
+    // and you can call a generic <code>write</code> function, which then
+    // branches to the <code>write_gnuplot</code>, <code>write_ucd</code>, etc
+    // functions which we have used in previous examples already, based on the
+    // value of a second argument given to it denoting the required output
+    // format. This mechanism makes it simple to write an extensible program
+    // that can decide which output format to use at runtime, and it also
+    // makes it rather simple to write the program in a way such that it takes
+    // advantage of newly implemented output formats, without the need to
+    // change the application program.
+    //
+    // Of these two fields, the base name and the output format descriptor,
+    // the constructor takes values and stores them for later use by the
+    // actual evaluation function.
+    template <int dim>
+    class SolutionOutput : public EvaluationBase<dim>
+    {
+    public:
+      SolutionOutput (const std::string               &output_name_base,
+                      const DataOutBase::OutputFormat  output_format);
+
+      virtual void operator () (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution) const;
+    private:
+      const std::string               output_name_base;
+      const DataOutBase::OutputFormat output_format;
+    };
+
+
+    template <int dim>
+    SolutionOutput<dim>::
+    SolutionOutput (const std::string               &output_name_base,
+                    const DataOutBase::OutputFormat  output_format)
+      :
+      output_name_base (output_name_base),
+      output_format (output_format)
+    {}
+
+
+    // After the description above, the function generating the actual output
+    // is now relatively straightforward. The only particularly interesting
+    // feature over previous example programs is the use of the
+    // DataOutBase::default_suffix function, returning the usual
+    // suffix for files of a given format (e.g. ".eps" for encapsulated
+    // postscript files, ".gnuplot" for Gnuplot files), and of the generic
+    // <code>DataOut::write</code> function with a second argument, which
+    // branches to the actual output functions for the different graphics
+    // formats, based on the value of the format descriptor passed as second
+    // argument.
+    //
+    // Also note that we have to prefix <code>this-@></code> to access a
+    // member variable of the template dependent base class. The reason here,
+    // and further down in the program is the same as the one described in the
+    // step-7 example program (look for <code>two-stage name lookup</code>
+    // there).
+    template <int dim>
+    void
+    SolutionOutput<dim>::operator () (const DoFHandler<dim> &dof_handler,
+                                      const Vector<double>  &solution) const
+    {
+      DataOut<dim> data_out;
+      data_out.attach_dof_handler (dof_handler);
+      data_out.add_data_vector (solution, "solution");
+      data_out.build_patches ();
+
+      std::ostringstream filename;
+      filename << output_name_base << "-"
+               << this->refinement_cycle
+               << data_out.default_suffix (output_format)
+               << std::ends;
+      std::ofstream out (filename.str().c_str());
+
+      data_out.write (out, output_format);
+    }
+
+
+
+    // @sect4{Other evaluations}
+
+    // In practical applications, one would add here a list of other possible
+    // evaluation classes, representing quantities that one may be interested
+    // in. For this example, that much shall be sufficient, so we close the
+    // namespace.
+  }
+
+
+  // @sect3{The Laplace solver classes}
+
+  // After defining what we want to know of the solution, we should now care
+  // how to get at it. We will pack everything we need into a namespace of its
+  // own, for much the same reasons as for the evaluations above.
+  //
+  // Since we have discussed Laplace solvers already in considerable detail in
+  // previous examples, there is not much new stuff following. Rather, we have
+  // to a great extent cannibalized previous examples and put them, in
+  // slightly different form, into this example program. We will therefore
+  // mostly be concerned with discussing the differences to previous examples.
+  //
+  // Basically, as already said in the introduction, the lack of new stuff in
+  // this example is deliberate, as it is more to demonstrate software design
+  // practices, rather than mathematics. The emphasis in explanations below
+  // will therefore be more on the actual implementation.
+  namespace LaplaceSolver
+  {
+    // @sect4{An abstract base class}
+
+    // In defining a Laplace solver, we start out by declaring an abstract
+    // base class, that has no functionality itself except for taking and
+    // storing a pointer to the triangulation to be used later.
+    //
+    // This base class is very general, and could as well be used for any
+    // other stationary problem. It provides declarations of functions that
+    // shall, in derived classes, solve a problem, postprocess the solution
+    // with a list of evaluation objects, and refine the grid,
+    // respectively. None of these functions actually does something itself in
+    // the base class.
+    //
+    // Due to the lack of actual functionality, the programming style of
+    // declaring very abstract base classes is similar to the style used in
+    // Smalltalk or Java programs, where all classes are derived from entirely
+    // abstract classes <code>Object</code>, even number representations. The
+    // author admits that he does not particularly like the use of such a
+    // style in C++, as it puts style over reason. Furthermore, it promotes
+    // the use of virtual functions for everything (for example, in Java, all
+    // functions are virtual per se), which, however, has proven to be rather
+    // inefficient in many applications where functions are often only
+    // accessing data, not doing computations, and therefore quickly return;
+    // the overhead of virtual functions can then be significant. The opinion
+    // of the author is to have abstract base classes wherever at least some
+    // part of the code of actual implementations can be shared and thus
+    // separated into the base class.
+    //
+    // Besides all these theoretical questions, we here have a good reason,
+    // which will become clearer to the reader below. Basically, we want to be
+    // able to have a family of different Laplace solvers that differ so much
+    // that no larger common subset of functionality could be found. We
+    // therefore just declare such an abstract base class, taking a pointer to
+    // a triangulation in the constructor and storing it henceforth. Since
+    // this triangulation will be used throughout all computations, we have to
+    // make sure that the triangulation exists until the destructor exits. We
+    // do this by keeping a <code>SmartPointer</code> to this triangulation,
+    // which uses a counter in the triangulation class to denote the fact that
+    // there is still an object out there using this triangulation, thus
+    // leading to an abort in case the triangulation is attempted to be
+    // destructed while this object still uses it.
+    //
+    // Note that while the pointer itself is declared constant
+    // (i.e. throughout the lifetime of this object, the pointer points to the
+    // same object), it is not declared as a pointer to a constant
+    // triangulation. In fact, by this we allow that derived classes refine or
+    // coarsen the triangulation within the <code>refine_grid</code> function.
+    //
+    // Finally, we have a function <code>n_dofs</code> is only a tool for the
+    // driver functions to decide whether we want to go on with mesh
+    // refinement or not. It returns the number of degrees of freedom the
+    // present simulation has.
+    template <int dim>
+    class Base
+    {
+    public:
+      Base (Triangulation<dim> &coarse_grid);
+      virtual ~Base ();
+
+      virtual void solve_problem () = 0;
+      virtual void postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const = 0;
+      virtual void refine_grid () = 0;
+      virtual unsigned int n_dofs () const = 0;
+
+    protected:
+      const SmartPointer<Triangulation<dim> > triangulation;
+    };
+
+
+    // The implementation of the only two non-abstract functions is then
+    // rather boring:
+    template <int dim>
+    Base<dim>::Base (Triangulation<dim> &coarse_grid)
+      :
+      triangulation (&coarse_grid)
+    {}
+
+
+    template <int dim>
+    Base<dim>::~Base ()
+    {}
+
+
+    // @sect4{A general solver class}
+
+    // Following now the main class that implements assembling the matrix of
+    // the linear system, solving it, and calling the postprocessor objects on
+    // the solution. It implements the <code>solve_problem</code> and
+    // <code>postprocess</code> functions declared in the base class. It does
+    // not, however, implement the <code>refine_grid</code> method, as mesh
+    // refinement will be implemented in a number of derived classes.
+    //
+    // It also declares a new abstract virtual function,
+    // <code>assemble_rhs</code>, that needs to be overloaded in
+    // subclasses. The reason is that we will implement two different classes
+    // that will implement different methods to assemble the right hand side
+    // vector. This function might also be interesting in cases where the
+    // right hand side depends not simply on a continuous function, but on
+    // something else as well, for example the solution of another discretized
+    // problem, etc. The latter happens frequently in non-linear problems.
+    //
+    // As we mentioned previously, the actual content of this class is not
+    // new, but a mixture of various techniques already used in previous
+    // examples. We will therefore not discuss them in detail, but refer the
+    // reader to these programs.
+    //
+    // Basically, in a few words, the constructor of this class takes pointers
+    // to a triangulation, a finite element, and a function object
+    // representing the boundary values. These are either passed down to the
+    // base class's constructor, or are stored and used to generate a
+    // <code>DoFHandler</code> object later. Since finite elements and
+    // quadrature formula should match, it is also passed a quadrature object.
+    //
+    // The <code>solve_problem</code> sets up the data structures for the
+    // actual solution, calls the functions to assemble the linear system, and
+    // solves it.
+    //
+    // The <code>postprocess</code> function finally takes an evaluation
+    // object and applies it to the computed solution.
+    //
+    // The <code>n_dofs</code> function finally implements the pure virtual
+    // function of the base class.
+    template <int dim>
+    class Solver : public virtual Base<dim>
+    {
+    public:
+      Solver (Triangulation<dim>       &triangulation,
+              const FiniteElement<dim> &fe,
+              const Quadrature<dim>    &quadrature,
+              const Function<dim>      &boundary_values);
+      virtual
+      ~Solver ();
+
+      virtual
+      void
+      solve_problem ();
+
+      virtual
+      void
+      postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const;
+
+      virtual
+      unsigned int
+      n_dofs () const;
+
+      // In the protected section of this class, we first have a number of
+      // member variables, of which the use should be clear from the previous
+      // examples:
+    protected:
+      const SmartPointer<const FiniteElement<dim> >  fe;
+      const SmartPointer<const Quadrature<dim> >     quadrature;
+      DoFHandler<dim>                                dof_handler;
+      Vector<double>                                 solution;
+      const SmartPointer<const Function<dim> >       boundary_values;
+
+      // Then we declare an abstract function that will be used to assemble
+      // the right hand side. As explained above, there are various cases for
+      // which this action differs strongly in what is necessary, so we defer
+      // this to derived classes:
+      virtual void assemble_rhs (Vector<double> &rhs) const = 0;
+
+      // Next, in the private section, we have a small class which represents
+      // an entire linear system, i.e. a matrix, a right hand side, and a
+      // solution vector, as well as the constraints that are applied to it,
+      // such as those due to hanging nodes. Its constructor initializes the
+      // various subobjects, and there is a function that implements a
+      // conjugate gradient method as solver.
+    private:
+      struct LinearSystem
+      {
+        LinearSystem (const DoFHandler<dim> &dof_handler);
+
+        void solve (Vector<double> &solution) const;
+
+        ConstraintMatrix     hanging_node_constraints;
+        SparsityPattern      sparsity_pattern;
+        SparseMatrix<double> matrix;
+        Vector<double>       rhs;
+      };
+
+
+      // Finally, there is a set of functions which will be used to
+      // assemble the actual system matrix. The main function of this
+      // group, <code>assemble_linear_system()</code> computes the
+      // matrix in parallel on multicore systems, using the following
+      // two helper functions. The mechanism for doing so is the same
+      // as in the step-9 example program and follows the WorkStream
+      // concept outlined in @ref threads . The main function also
+      // calls the virtual function assembling the right hand side.
+      struct AssemblyScratchData
+      {
+        AssemblyScratchData (const FiniteElement<dim> &fe,
+                             const Quadrature<dim>    &quadrature);
+        AssemblyScratchData (const AssemblyScratchData &scratch_data);
+
+        FEValues<dim>     fe_values;
+      };
+
+      struct AssemblyCopyData
+      {
+        FullMatrix<double> cell_matrix;
+        std::vector<types::global_dof_index> local_dof_indices;
+      };
+
+      void
+      assemble_linear_system (LinearSystem &linear_system);
+
+      void
+      local_assemble_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                             AssemblyScratchData                                  &scratch_data,
+                             AssemblyCopyData                                     &copy_data) const;
+
+      void
+      copy_local_to_global(const AssemblyCopyData &copy_data,
+                           LinearSystem           &linear_system) const;
+    };
+
+
+
+    // Now here comes the constructor of the class. It does not do much except
+    // store pointers to the objects given, and generate
+    // <code>DoFHandler</code> object initialized with the given pointer to a
+    // triangulation. This causes the DoF handler to store that pointer, but
+    // does not already generate a finite element numbering (we only ask for
+    // that in the <code>solve_problem</code> function).
+    template <int dim>
+    Solver<dim>::Solver (Triangulation<dim>       &triangulation,
+                         const FiniteElement<dim> &fe,
+                         const Quadrature<dim>    &quadrature,
+                         const Function<dim>      &boundary_values)
+      :
+      Base<dim> (triangulation),
+      fe (&fe),
+      quadrature (&quadrature),
+      dof_handler (triangulation),
+      boundary_values (&boundary_values)
+    {}
+
+
+    // The destructor is simple, it only clears the information stored in the
+    // DoF handler object to release the memory.
+    template <int dim>
+    Solver<dim>::~Solver ()
+    {
+      dof_handler.clear ();
+    }
+
+
+    // The next function is the one which delegates the main work in solving
+    // the problem: it sets up the DoF handler object with the finite element
+    // given to the constructor of this object, the creates an object that
+    // denotes the linear system (i.e. the matrix, the right hand side vector,
+    // and the solution vector), calls the function to assemble it, and
+    // finally solves it:
+    template <int dim>
+    void
+    Solver<dim>::solve_problem ()
+    {
+      dof_handler.distribute_dofs (*fe);
+      solution.reinit (dof_handler.n_dofs());
+
+      LinearSystem linear_system (dof_handler);
+      assemble_linear_system (linear_system);
+      linear_system.solve (solution);
+    }
+
+
+    // As stated above, the <code>postprocess</code> function takes an
+    // evaluation object, and applies it to the computed solution. This
+    // function may be called multiply, once for each evaluation of the
+    // solution which the user required.
+    template <int dim>
+    void
+    Solver<dim>::
+    postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const
+    {
+      postprocessor (dof_handler, solution);
+    }
+
+
+    // The <code>n_dofs</code> function should be self-explanatory:
+    template <int dim>
+    unsigned int
+    Solver<dim>::n_dofs () const
+    {
+      return dof_handler.n_dofs();
+    }
+
+
+    // The following function assembles matrix and right hand side of
+    // the linear system to be solved in each step. We will do things
+    // in parallel at a couple of levels. First, note that we need to
+    // assemble both the matrix and the right hand side. These are
+    // independent operations, and we should do this in parallel. To
+    // this end, we use the concept of "tasks" that is discussed in
+    // the @ref threads documentation module. In essence, what we want
+    // to say "here is something that needs to be worked on, go do it
+    // whenever a CPU core is available", then do something else, and
+    // when we need the result of the first operation wait for its
+    // completion. At the second level, we want to assemble the matrix
+    // using the exact same strategy we have already used in step-9,
+    // namely the WorkStream concept.
+    //
+    // While we could consider either assembling the right hand side
+    // or assembling the matrix as the thing to do in the background
+    // while doing the other, we will opt for the former approach
+    // simply because the call to <code>Solver::assemble_rhs</code> is
+    // so much simpler to write than the call to WorkStream::run with
+    // its many arguments. In any case, the code then looks like this
+    // to assemble the entire linear system:
+    template <int dim>
+    void
+    Solver<dim>::assemble_linear_system (LinearSystem &linear_system)
+    {
+      Threads::Task<> rhs_task = Threads::new_task (&Solver<dim>::assemble_rhs,
+                                                    *this,
+                                                    linear_system.rhs);
+
+      WorkStream::run(dof_handler.begin_active(),
+                      dof_handler.end(),
+                      std_cxx11::bind(&Solver<dim>::local_assemble_matrix,
+                                      this,
+                                      std_cxx11::_1,
+                                      std_cxx11::_2,
+                                      std_cxx11::_3),
+                      std_cxx11::bind(&Solver<dim>::copy_local_to_global,
+                                      this,
+                                      std_cxx11::_1,
+                                      std_cxx11::ref(linear_system)),
+                      AssemblyScratchData(*fe, *quadrature),
+                      AssemblyCopyData());
+      linear_system.hanging_node_constraints.condense (linear_system.matrix);
+
+      // The syntax above using <code>std_cxx11::bind</code> requires
+      // some explanation. There are multiple version of
+      // WorkStream::run that expect different arguments. In step-9,
+      // we used one version that took a pair of iterators, a pair of
+      // pointers to member functions with very specific argument
+      // lists, a pointer or reference to the object on which these
+      // member functions have to work, and a scratch and copy data
+      // object. This is a bit restrictive since the member functions
+      // called this way have to have an argument list that exactly
+      // matches what WorkStream::run expects: the local assembly
+      // function needs to take an iterator, a scratch object and a
+      // copy object; and the copy-local-to-global function needs to
+      // take exactly a copy object. But, what if we want something
+      // that's slightly more general? For example, in the current
+      // program, the copy-local-to-global function needs to know
+      // which linear system object to write the local contributions
+      // into, i.e., it also has to take a <code>LinearSystem</code>
+      // argument. That won't work with the approach using member
+      // function pointers.
+      //
+      // Fortunately, C++ offers a way out. These are called function
+      // objects. In essence, what WorkStream::run wants to do is not
+      // call a member function. It wants to call some function that
+      // takes an iterator, a scratch object and a copy object in the
+      // first case, and a copy object in the second case. Whether
+      // these are member functions, global functions, or something
+      // else, is really not of much concern to
+      // WorkStream. Consequently, there is a second version of the
+      // function that just takes function objects -- objects that
+      // have an <code>operator()</code> and that consequently can be
+      // called like functions, whatever they really represent. The
+      // typical way to generate such function objects is using
+      // <code>std::bind</code> (or, if the compiler is too old, a
+      // replacement for it, which we generically call
+      // <code>std_cxx11::bind</code>) which takes a pointer to a
+      // (member) function and then <i>binds</i> individual arguments
+      // to fixed values. For example, you can create a function that
+      // takes an iterator, a scratch object and a copy object by
+      // taking the address of a member function and binding the
+      // (implicit) argument to the object on which it is to work to
+      // <code>*this</code>. This is what we do in the first call
+      // above. In the second call, we need to create a function
+      // object that takes a copy object, and we do so by taking the
+      // address of a member function that takes an implicit pointer
+      // to <code>*this</code>, a reference to a copy object, and a
+      // reference to a linear system, and binding the first and third
+      // of these, leaving something that has only one open argument
+      // that can then be filled by WorkStream::run().
+      //
+      // There remains the question of what the
+      // <code>std_cxx11::_1</code>, <code>std_cxx11::_2</code>, etc.,
+      // mean. (These arguments are called <i>placeholders</i>.) The
+      // idea of using <code>std_cxx11::bind</code> in the first of
+      // the two cases above is that it produces an object that can be
+      // called with three arguments. But how are the three arguments
+      // the function object is being called with going to be
+      // distributed to the four arguments
+      // <code>local_assemble_matrix()</code> (including the implicit
+      // <code>this</code> pointer)? As specified, the first argument
+      // given to the function object will become the first argument
+      // given to <code>local_assemble_matrix()</code>, the second the
+      // second, etc. This is trivial here, but allows for interesting
+      // games in other circumstances. Consider, for example, having a
+      // function <code>void f(double x, double y)</code>. Then,
+      // creating a variable <code>p</code> of type
+      // <code>std_cxx11::function@<void f(double,double)@></code> and
+      // initializing <code>p=std_cxx11::bind(&f, std_cxx11::_2,
+      // std_cxx11::_1)</code> then calling <code>p(1,2)</code> will
+      // result in calling <code>f(2,1)</code>.
+      //
+      // @note Once deal.II can rely on every compiler being able to
+      // fully understand the syntax of the C++11 standard, one can
+      // use C++'s version of <a
+      // href="http://en.wikipedia.org/wiki/Anonymous_function">lambda
+      // functions</a> to achieve the same goal. In essence, a lambda
+      // function is a function without a name that is defined right
+      // at the one place where it is going to be used -- i.e., where
+      // we pass the third and fourth argument to WorkStream::run. The
+      // functions one would define in these locations would take 3
+      // and 1 arguments, respectively, and all they do is call
+      // <code>Solver::local_assemble_matrix</code> and
+      // <code>Solver::copy_local_to_global</code> with the required
+      // number of arguments, utilizing what the lambda function has
+      // gotten as arguments itself. We won't show the syntax this
+      // would require since it is no less confusing than the one used
+      // above.
+
+      // At this point, we have assembled the matrix and condensed
+      // it. The right hand side may or may not have been completely
+      // assembled, but we would like to condense the right hand side
+      // vector next. We can only do this if the assembly of this
+      // vector has finished, so we have to wait for the task to
+      // finish; in computer science, waiting for a task is typically
+      // called "joining" the task, explaining the name of the
+      // function we call below.
+      //
+      // Since that task may or may not have finished, and since we
+      // may have to wait for it to finish, we may as well try to pack
+      // other things that need to be done anyway into this
+      // gap. Consequently, we first interpolate boundary values
+      // before we wait for the right hand side. Of course, another
+      // possibility would have been to also interpolate the boundary
+      // values on a separate task since doing so is independent of
+      // the other things we have done in this function so far. Feel
+      // free to find the correct syntax to also create a task for
+      // this interpolation and start it at the top of this function,
+      // along with the assembly of the right hand side. (You will
+      // find that this is slightly more complicated since there are
+      // multiple versions of
+      // VectorTools::interpolate_boundary_values(), and so simply
+      // taking the address
+      // <code>&VectorTools::interpolate_boundary_values</code>
+      // produces a set of overloaded functions that can't be passed
+      // to Threads::new_task() right away -- you have to select which
+      // element of this overload set you want by casting the address
+      // expression to a function pointer type that is specific to the
+      // version of the function that you want to call on the task.)
+      std::map<types::global_dof_index,double> boundary_value_map;
+      VectorTools::interpolate_boundary_values (dof_handler,
+                                                0,
+                                                *boundary_values,
+                                                boundary_value_map);
+
+      rhs_task.join ();
+      linear_system.hanging_node_constraints.condense (linear_system.rhs);
+
+      // Now that we have the complete linear system, we can also
+      // treat boundary values, which need to be eliminated from both
+      // the matrix and the right hand side:
+      MatrixTools::apply_boundary_values (boundary_value_map,
+                                          linear_system.matrix,
+                                          solution,
+                                          linear_system.rhs);
+
+    }
+
+
+    // The second half of this set of functions deals with the local
+    // assembly on each cell and copying local contributions into the
+    // global matrix object. This works in exactly the same way as
+    // described in step-9:
+    template <int dim>
+    Solver<dim>::AssemblyScratchData::
+    AssemblyScratchData (const FiniteElement<dim> &fe,
+                         const Quadrature<dim>    &quadrature)
+      :
+      fe_values (fe,
+                 quadrature,
+                 update_gradients | update_JxW_values)
+    {}
+
+
+    template <int dim>
+    Solver<dim>::AssemblyScratchData::
+    AssemblyScratchData (const AssemblyScratchData &scratch_data)
+      :
+      fe_values (scratch_data.fe_values.get_fe(),
+                 scratch_data.fe_values.get_quadrature(),
+                 update_gradients | update_JxW_values)
+    {}
+
+
+    template <int dim>
+    void
+    Solver<dim>::local_assemble_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                        AssemblyScratchData                                  &scratch_data,
+                                        AssemblyCopyData                                     &copy_data) const
+    {
+      const unsigned int dofs_per_cell = fe->dofs_per_cell;
+      const unsigned int n_q_points    = quadrature->size();
+
+      copy_data.cell_matrix.reinit (dofs_per_cell, dofs_per_cell);
+
+      copy_data.local_dof_indices.resize(dofs_per_cell);
+
+      scratch_data.fe_values.reinit (cell);
+
+      for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            copy_data.cell_matrix(i,j) += (scratch_data.fe_values.shape_grad(i,q_point) *
+                                           scratch_data.fe_values.shape_grad(j,q_point) *
+                                           scratch_data.fe_values.JxW(q_point));
+
+      cell->get_dof_indices (copy_data.local_dof_indices);
+    }
+
+
+
+    template <int dim>
+    void
+    Solver<dim>::copy_local_to_global(const AssemblyCopyData &copy_data,
+                                      LinearSystem           &linear_system) const
+    {
+      for (unsigned int i=0; i<copy_data.local_dof_indices.size(); ++i)
+        for (unsigned int j=0; j<copy_data.local_dof_indices.size(); ++j)
+          linear_system.matrix.add (copy_data.local_dof_indices[i],
+                                    copy_data.local_dof_indices[j],
+                                    copy_data.cell_matrix(i,j));
+    }
+
+
+    // Now for the functions that implement actions in the linear system
+    // class. First, the constructor initializes all data elements to their
+    // correct sizes, and sets up a number of additional data structures, such
+    // as constraints due to hanging nodes. Since setting up the hanging nodes
+    // and finding out about the nonzero elements of the matrix is
+    // independent, we do that in parallel (if the library was configured to
+    // use concurrency, at least; otherwise, the actions are performed
+    // sequentially). Note that we start only one thread, and do the second
+    // action in the main thread. Since only one thread is generated, we don't
+    // use the <code>Threads::ThreadGroup</code> class here, but rather use
+    // the one created thread object directly to wait for this particular
+    // thread's exit.
+    //
+    // Note that taking up the address of the
+    // <code>DoFTools::make_hanging_node_constraints</code> function is a
+    // little tricky, since there are actually three of them, one for each
+    // supported space dimension. Taking addresses of overloaded functions is
+    // somewhat complicated in C++, since the address-of operator
+    // <code>&</code> in that case returns more like a set of values (the
+    // addresses of all functions with that name), and selecting the right one
+    // is then the next step. If the context dictates which one to take (for
+    // example by assigning to a function pointer of known type), then the
+    // compiler can do that by itself, but if this set of pointers shall be
+    // given as the argument to a function that takes a template, the compiler
+    // could choose all without having a preference for one. We therefore have
+    // to make it clear to the compiler which one we would like to have; for
+    // this, we could use a cast, but for more clarity, we assign it to a
+    // temporary <code>mhnc_p</code> (short for <code>pointer to
+    // make_hanging_node_constraints</code>) with the right type, and using
+    // this pointer instead.
+    template <int dim>
+    Solver<dim>::LinearSystem::
+    LinearSystem (const DoFHandler<dim> &dof_handler)
+    {
+      hanging_node_constraints.clear ();
+
+      void (*mhnc_p) (const DoFHandler<dim> &,
+                      ConstraintMatrix &)
+        = &DoFTools::make_hanging_node_constraints;
+
+      // Start a side task then continue on the main thread
+      Threads::Task<> side_task
+        = Threads::new_task (mhnc_p,
+                             dof_handler,
+                             hanging_node_constraints);
+
+      DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+      DoFTools::make_sparsity_pattern (dof_handler, dsp);
+
+
+
+      // Wait for the side task to be done before going further
+      side_task.join();
+
+      hanging_node_constraints.close ();
+      hanging_node_constraints.condense (dsp);
+      sparsity_pattern.copy_from(dsp);
+
+
+      // Finally initialize the matrix and right hand side vector
+      matrix.reinit (sparsity_pattern);
+      rhs.reinit (dof_handler.n_dofs());
+    }
+
+
+
+    // The second function of this class simply solves the linear system by a
+    // preconditioned conjugate gradient method. This has been extensively
+    // discussed before, so we don't dwell into it any more.
+    template <int dim>
+    void
+    Solver<dim>::LinearSystem::solve (Vector<double> &solution) const
+    {
+      SolverControl           solver_control (1000, 1e-12);
+      SolverCG<>              cg (solver_control);
+
+      PreconditionSSOR<> preconditioner;
+      preconditioner.initialize(matrix, 1.2);
+
+      cg.solve (matrix, solution, rhs, preconditioner);
+
+      hanging_node_constraints.distribute (solution);
+    }
+
+
+
+
+    // @sect4{A primal solver}
+
+    // In the previous section, a base class for Laplace solvers was
+    // implemented, that lacked the functionality to assemble the right hand
+    // side vector, however, for reasons that were explained there. Now we
+    // implement a corresponding class that can do this for the case that the
+    // right hand side of a problem is given as a function object.
+    //
+    // The actions of the class are rather what you have seen already in
+    // previous examples already, so a brief explanation should suffice: the
+    // constructor takes the same data as does that of the underlying class
+    // (to which it passes all information) except for one function object
+    // that denotes the right hand side of the problem. A pointer to this
+    // object is stored (again as a <code>SmartPointer</code>, in order to
+    // make sure that the function object is not deleted as long as it is
+    // still used by this class).
+    //
+    // The only functional part of this class is the <code>assemble_rhs</code>
+    // method that does what its name suggests.
+    template <int dim>
+    class PrimalSolver : public Solver<dim>
+    {
+    public:
+      PrimalSolver (Triangulation<dim>       &triangulation,
+                    const FiniteElement<dim> &fe,
+                    const Quadrature<dim>    &quadrature,
+                    const Function<dim>      &rhs_function,
+                    const Function<dim>      &boundary_values);
+    protected:
+      const SmartPointer<const Function<dim> > rhs_function;
+      virtual void assemble_rhs (Vector<double> &rhs) const;
+    };
+
+
+    // The constructor of this class basically does what it is announced to do
+    // above...
+    template <int dim>
+    PrimalSolver<dim>::
+    PrimalSolver (Triangulation<dim>       &triangulation,
+                  const FiniteElement<dim> &fe,
+                  const Quadrature<dim>    &quadrature,
+                  const Function<dim>      &rhs_function,
+                  const Function<dim>      &boundary_values)
+      :
+      Base<dim> (triangulation),
+      Solver<dim> (triangulation, fe,
+                   quadrature, boundary_values),
+      rhs_function (&rhs_function)
+    {}
+
+
+
+    // ... as does the <code>assemble_rhs</code> function. Since this is
+    // explained in several of the previous example programs, we leave it at
+    // that.
+    template <int dim>
+    void
+    PrimalSolver<dim>::
+    assemble_rhs (Vector<double> &rhs) const
+    {
+      FEValues<dim> fe_values (*this->fe, *this->quadrature,
+                               update_values | update_quadrature_points  |
+                               update_JxW_values);
+
+      const unsigned int   dofs_per_cell = this->fe->dofs_per_cell;
+      const unsigned int   n_q_points    = this->quadrature->size();
+
+      Vector<double>       cell_rhs (dofs_per_cell);
+      std::vector<double>  rhs_values (n_q_points);
+      std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = this->dof_handler.begin_active(),
+      endc = this->dof_handler.end();
+      for (; cell!=endc; ++cell)
+        {
+          cell_rhs = 0;
+          fe_values.reinit (cell);
+          rhs_function->value_list (fe_values.get_quadrature_points(),
+                                    rhs_values);
+
+          for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              cell_rhs(i) += (fe_values.shape_value(i,q_point) *
+                              rhs_values[q_point] *
+                              fe_values.JxW(q_point));
+
+          cell->get_dof_indices (local_dof_indices);
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            rhs(local_dof_indices[i]) += cell_rhs(i);
+        };
+    }
+
+
+    // @sect4{Global refinement}
+
+    // By now, all functions of the abstract base class except for the
+    // <code>refine_grid</code> function have been implemented. We will now
+    // have two classes that implement this function for the
+    // <code>PrimalSolver</code> class, one doing global refinement, one a
+    // form of local refinement.
+    //
+    // The first, doing global refinement, is rather simple: its main function
+    // just calls <code>triangulation-@>refine_global (1);</code>, which does
+    // all the work.
+    //
+    // Note that since the <code>Base</code> base class of the
+    // <code>Solver</code> class is virtual, we have to declare a constructor
+    // that initializes the immediate base class as well as the abstract
+    // virtual one.
+    //
+    // Apart from this technical complication, the class is probably simple
+    // enough to be left without further comments.
+    template <int dim>
+    class RefinementGlobal : public PrimalSolver<dim>
+    {
+    public:
+      RefinementGlobal (Triangulation<dim>       &coarse_grid,
+                        const FiniteElement<dim> &fe,
+                        const Quadrature<dim>    &quadrature,
+                        const Function<dim>      &rhs_function,
+                        const Function<dim>      &boundary_values);
+
+      virtual void refine_grid ();
+    };
+
+
+
+    template <int dim>
+    RefinementGlobal<dim>::
+    RefinementGlobal (Triangulation<dim>       &coarse_grid,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<dim>    &quadrature,
+                      const Function<dim>      &rhs_function,
+                      const Function<dim>      &boundary_values)
+      :
+      Base<dim> (coarse_grid),
+      PrimalSolver<dim> (coarse_grid, fe, quadrature,
+                         rhs_function, boundary_values)
+    {}
+
+
+
+    template <int dim>
+    void
+    RefinementGlobal<dim>::refine_grid ()
+    {
+      this->triangulation->refine_global (1);
+    }
+
+
+    // @sect4{Local refinement by the Kelly error indicator}
+
+    // The second class implementing refinement strategies uses the Kelly
+    // refinement indicator used in various example programs before. Since this
+    // indicator is already implemented in a class of its own inside the
+    // deal.II library, there is not much t do here except cal the function
+    // computing the indicator, then using it to select a number of cells for
+    // refinement and coarsening, and refinement the mesh accordingly.
+    //
+    // Again, this should now be sufficiently standard to allow the omission
+    // of further comments.
+    template <int dim>
+    class RefinementKelly : public PrimalSolver<dim>
+    {
+    public:
+      RefinementKelly (Triangulation<dim>       &coarse_grid,
+                       const FiniteElement<dim> &fe,
+                       const Quadrature<dim>    &quadrature,
+                       const Function<dim>      &rhs_function,
+                       const Function<dim>      &boundary_values);
+
+      virtual void refine_grid ();
+    };
+
+
+
+    template <int dim>
+    RefinementKelly<dim>::
+    RefinementKelly (Triangulation<dim>       &coarse_grid,
+                     const FiniteElement<dim> &fe,
+                     const Quadrature<dim>    &quadrature,
+                     const Function<dim>      &rhs_function,
+                     const Function<dim>      &boundary_values)
+      :
+      Base<dim> (coarse_grid),
+      PrimalSolver<dim> (coarse_grid, fe, quadrature,
+                         rhs_function, boundary_values)
+    {}
+
+
+
+    template <int dim>
+    void
+    RefinementKelly<dim>::refine_grid ()
+    {
+      Vector<float> estimated_error_per_cell (this->triangulation->n_active_cells());
+      KellyErrorEstimator<dim>::estimate (this->dof_handler,
+                                          QGauss<dim-1>(3),
+                                          typename FunctionMap<dim>::type(),
+                                          this->solution,
+                                          estimated_error_per_cell);
+      GridRefinement::refine_and_coarsen_fixed_number (*this->triangulation,
+                                                       estimated_error_per_cell,
+                                                       0.3, 0.03);
+      this->triangulation->execute_coarsening_and_refinement ();
+    }
+
+  }
+
+
+
+
+  // @sect3{Equation data}
+
+  // As this is one more academic example, we'd like to compare exact and
+  // computed solution against each other. For this, we need to declare
+  // function classes representing the exact solution (for comparison and for
+  // the Dirichlet boundary values), as well as a class that denotes the right
+  // hand side of the equation (this is simply the Laplace operator applied to
+  // the exact solution we'd like to recover).
+  //
+  // For this example, let us choose as exact solution the function
+  // $u(x,y)=exp(x+sin(10y+5x^2))$. In more than two dimensions, simply repeat
+  // the sine-factor with <code>y</code> replaced by <code>z</code> and so
+  // on. Given this, the following two classes are probably straightforward
+  // from the previous examples.
+  //
+  // As in previous examples, the C++ language forces us to declare and define
+  // a constructor to the following classes even though they are empty. This
+  // is due to the fact that the base class has no default constructor
+  // (i.e. one without arguments), even though it has a constructor which has
+  // default values for all arguments.
+  template <int dim>
+  class Solution : public Function<dim>
+  {
+  public:
+    Solution () : Function<dim> () {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component) const;
+  };
+
+
+  template <int dim>
+  double
+  Solution<dim>::value (const Point<dim>   &p,
+                        const unsigned int  /*component*/) const
+  {
+    double q = p(0);
+    for (unsigned int i=1; i<dim; ++i)
+      q += std::sin(10*p(i)+5*p(0)*p(0));
+    const double exponential = std::exp(q);
+    return exponential;
+  }
+
+
+
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim> () {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component) const;
+  };
+
+
+  template <int dim>
+  double
+  RightHandSide<dim>::value (const Point<dim>   &p,
+                             const unsigned int  /*component*/) const
+  {
+    double q = p(0);
+    for (unsigned int i=1; i<dim; ++i)
+      q += std::sin(10*p(i)+5*p(0)*p(0));
+    const double u = std::exp(q);
+    double t1 = 1,
+           t2 = 0,
+           t3 = 0;
+    for (unsigned int i=1; i<dim; ++i)
+      {
+        t1 += std::cos(10*p(i)+5*p(0)*p(0)) * 10 * p(0);
+        t2 += 10*std::cos(10*p(i)+5*p(0)*p(0)) -
+              100*std::sin(10*p(i)+5*p(0)*p(0)) * p(0)*p(0);
+        t3 += 100*std::cos(10*p(i)+5*p(0)*p(0))*std::cos(10*p(i)+5*p(0)*p(0)) -
+              100*std::sin(10*p(i)+5*p(0)*p(0));
+      };
+    t1 = t1*t1;
+
+    return -u*(t1+t2+t3);
+  }
+
+
+
+  // @sect3{The driver routines}
+
+  // What is now missing are only the functions that actually select the
+  // various options, and run the simulation on successively finer grids to
+  // monitor the progress as the mesh is refined.
+  //
+  // This we do in the following function: it takes a solver object, and a
+  // list of postprocessing (evaluation) objects, and runs them with
+  // intermittent mesh refinement:
+  template <int dim>
+  void
+  run_simulation (LaplaceSolver::Base<dim>                     &solver,
+                  const std::list<Evaluation::EvaluationBase<dim> *> &postprocessor_list)
+  {
+    // We will give an indicator of the step we are presently computing, in
+    // order to keep the user informed that something is still happening, and
+    // that the program is not in an endless loop. This is the head of this
+    // status line:
+    std::cout << "Refinement cycle: ";
+
+    // Then start a loop which only terminates once the number of degrees of
+    // freedom is larger than 20,000 (you may of course change this limit, if
+    // you need more -- or less -- accuracy from your program).
+    for (unsigned int step=0; true; ++step)
+      {
+        // Then give the <code>alive</code> indication for this
+        // iteration. Note that the <code>std::flush</code> is needed to have
+        // the text actually appear on the screen, rather than only in some
+        // buffer that is only flushed the next time we issue an end-line.
+        std::cout << step << " " << std::flush;
+
+        // Now solve the problem on the present grid, and run the evaluators
+        // on it. The long type name of iterators into the list is a little
+        // annoying, but could be shortened by a typedef, if so desired.
+        solver.solve_problem ();
+
+        for (typename std::list<Evaluation::EvaluationBase<dim> *>::const_iterator
+             i = postprocessor_list.begin();
+             i != postprocessor_list.end(); ++i)
+          {
+            (*i)->set_refinement_cycle (step);
+            solver.postprocess (**i);
+          };
+
+
+        // Now check whether more iterations are required, or whether the loop
+        // shall be ended:
+        if (solver.n_dofs() < 20000)
+          solver.refine_grid ();
+        else
+          break;
+      };
+
+    // Finally end the line in which we displayed status reports:
+    std::cout << std::endl;
+  }
+
+
+
+  // The final function is one which takes the name of a solver (presently
+  // "kelly" and "global" are allowed), creates a solver object out of it
+  // using a coarse grid (in this case the ubiquitous unit square) and a
+  // finite element object (here the likewise ubiquitous bilinear one), and
+  // uses that solver to ask for the solution of the problem on a sequence of
+  // successively refined grids.
+  //
+  // The function also sets up two of evaluation functions, one evaluating the
+  // solution at the point (0.5,0.5), the other writing out the solution to a
+  // file.
+  template <int dim>
+  void solve_problem (const std::string &solver_name)
+  {
+    // First minor task: tell the user what is going to happen. Thus write a
+    // header line, and a line with all '-' characters of the same length as
+    // the first one right below.
+    const std::string header = "Running tests with \"" + solver_name +
+                               "\" refinement criterion:";
+    std::cout << header << std::endl
+              << std::string (header.size(), '-') << std::endl;
+
+    // Then set up triangulation, finite element, etc.
+    Triangulation<dim> triangulation;
+    GridGenerator::hyper_cube (triangulation, -1, 1);
+    triangulation.refine_global (2);
+    const FE_Q<dim>          fe(1);
+    const QGauss<dim>       quadrature(4);
+    const RightHandSide<dim> rhs_function;
+    const Solution<dim>      boundary_values;
+
+    // Create a solver object of the kind indicated by the argument to this
+    // function. If the name is not recognized, throw an exception!
+    LaplaceSolver::Base<dim> *solver = 0;
+    if (solver_name == "global")
+      solver = new LaplaceSolver::RefinementGlobal<dim> (triangulation, fe,
+                                                         quadrature,
+                                                         rhs_function,
+                                                         boundary_values);
+    else if (solver_name == "kelly")
+      solver = new LaplaceSolver::RefinementKelly<dim> (triangulation, fe,
+                                                        quadrature,
+                                                        rhs_function,
+                                                        boundary_values);
+    else
+      AssertThrow (false, ExcNotImplemented());
+
+    // Next create a table object in which the values of the numerical
+    // solution at the point (0.5,0.5) will be stored, and create a respective
+    // evaluation object:
+    TableHandler results_table;
+    Evaluation::PointValueEvaluation<dim>
+    postprocessor1 (Point<dim>(0.5,0.5), results_table);
+
+    // Also generate an evaluator which writes out the solution:
+    Evaluation::SolutionOutput<dim>
+    postprocessor2 (std::string("solution-")+solver_name,
+                    DataOutBase::gnuplot);
+
+    // Take these two evaluation objects and put them in a list...
+    std::list<Evaluation::EvaluationBase<dim> *> postprocessor_list;
+    postprocessor_list.push_back (&postprocessor1);
+    postprocessor_list.push_back (&postprocessor2);
+
+    // ... which we can then pass on to the function that actually runs the
+    // simulation on successively refined grids:
+    run_simulation (*solver, postprocessor_list);
+
+    // When this all is done, write out the results of the point evaluations,
+    // and finally delete the solver object:
+    results_table.write_text (std::cout);
+    delete solver;
+
+    // And one blank line after all results:
+    std::cout << std::endl;
+  }
+}
+
+
+
+// There is not much to say about the main function. It follows the same
+// pattern as in all previous examples, with attempts to catch thrown
+// exceptions, and displaying as much information as possible if we should get
+// some. The rest is self-explanatory.
+int main ()
+{
+  try
+    {
+      Step13::solve_problem<2> ("global");
+      Step13::solve_problem<2> ("kelly");
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    };
+
+  return 0;
+}
diff --git a/examples/step-14/CMakeLists.txt b/examples/step-14/CMakeLists.txt
new file mode 100644
index 0000000..35b0364
--- /dev/null
+++ b/examples/step-14/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-14 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-14")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-14/doc/builds-on b/examples/step-14/doc/builds-on
new file mode 100644
index 0000000..342762d
--- /dev/null
+++ b/examples/step-14/doc/builds-on
@@ -0,0 +1 @@
+step-13
diff --git a/examples/step-14/doc/intro.dox b/examples/step-14/doc/intro.dox
new file mode 100644
index 0000000..15a115b
--- /dev/null
+++ b/examples/step-14/doc/intro.dox
@@ -0,0 +1,408 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+<h3>The maths</h3>
+
+The Heidelberg group of Professor Rolf Rannacher, to which the three initial
+authors of the deal.II library belonged during their PhD time and partly also
+afterwards, has been involved with adaptivity and error estimation for finite
+element discretizations since the mid-1990ies. The main achievement is the
+development of error estimates for arbitrary functionals of the solution, and
+of optimal mesh refinement for its computation.
+
+We will not discuss the derivation of these concepts in too great detail, but
+will implement the main ideas in the present example program. For a thorough
+introduction into the general idea, we refer to the seminal work of Becker and
+Rannacher @ref step_14_BR95 "[BR95]", at ref step_14_BR96r "[BR96r]", and the overview article of the same authors in
+Acta Numerica @ref step_14_BR01 "[BR01]"; the first introduces the concept of error
+estimation and adaptivity for general functional output for the Laplace
+equation, while the second gives many examples of applications of these
+concepts to a large number of other, more complicated equations. For
+applications to individual types of equations, see also the publications by
+Becker @ref step_14_Bec95 "[Bec95]", @ref step_14_Bec98 "[Bec98]",
+Kanschat @ref step_14_Kan96 "[Kan96]", @ref step_14_FK97 "[FK97]",
+Suttmeier @ref step_14_Sut96 "[Sut96]", @ref step_14_RS97 "[RS97]", @ref step_14_RS98c "[RS98c]",
+ at ref step_14_RS99 "[RS99]",
+Bangerth @ref step_14_BR99b "[BR99b]", @ref step_14_Ban00w "[Ban00w]",
+ at ref step_14_BR01a "[BR01a]", @ref step_14_Ban02 "[Ban02]", and
+Hartmann @ref step_14_Har02 "[Har02]", @ref step_14_HH01 "[HH01]",
+ at ref step_14_HH01b "[HH01b]".
+All of these works, from the original introduction by Becker and Rannacher to
+individual contributions to particular equations, have later been summarized
+in a book by Bangerth and Rannacher that covers all of these topics, see
+ at ref step_14_BR03 "[BR03]".
+
+
+The basic idea is the following: in applications, one is not usually
+interested in the solution per se, but rather in certain aspects of it. For
+example, in simulations of flow problems, one may want to know the lift or
+drag of a body immersed in the fluid; it is this quantity that we want to know
+to best accuracy, and whether the rest of the solution of the describing
+equations is well resolved is not of primary interest. Likewise, in elasticity
+one might want to know about values of the stress at certain points to guess
+whether maximal load values of joints are safe, for example. Or, in radiative
+transfer problems, mean flux intensities are of interest.
+
+In all the cases just listed, it is the evaluation of a functional $J(u)$ of
+the solution which we are interested in, rather than the values of $u$
+everywhere. Since the exact solution $u$ is not available, but only its
+numerical approximation $u_h$, it is sensible to ask whether the computed
+value $J(u_h)$ is within certain limits of the exact value $J(u)$, i.e. we
+want to bound the error with respect to this functional, $J(u)-J(u_h)$.
+
+For simplicity of exposition, we henceforth assume that both the quantity of
+interest $J$, as well as the equation are linear, and we will in particular
+show the derivation for the Laplace equation with homogeneous Dirichlet
+boundary conditions, although the concept is much more general. For this
+general case, we refer to the references listed above.  The goal is to obtain
+bounds on the error, $J(e)=J(u)-J(u_h)$. For this, let us denote by $z$ the
+solution of a dual problem, defined as follows:
+ at f[
+  a(\varphi,z) = J(\varphi) \qquad \forall \varphi,
+ at f]
+where $a(\cdot,\cdot)$ is the bilinear form associated with the differential
+equation, and the test functions are chosen from the corresponding solution
+space. Then, taking as special test function $\varphi=e$ the error, we have
+that
+ at f[
+  J(e) = a(e,z)
+ at f]
+and we can, by Galerkin orthogonality, rewrite this as
+ at f[
+  J(e) = a(e,z-\varphi_h)
+ at f]
+for all possible functions $\varphi_h$ from the discrete test space.
+
+Concretely, for Laplace's equation, the error identity reads
+ at f[
+  J(e) = (\nabla e, \nabla(z-\varphi_h)).
+ at f]
+For reasons that we will not explain, we do not want to use this formula as
+is, but rather split the scalar products into terms on all cells, and
+integrate by parts on each of them:
+ at f{eqnarray*}
+  J(e)
+  &=&
+  \sum_K (\nabla (u-u_h), \nabla (z-\varphi_h))_K
+  \\
+  &=&
+  \sum_K (-\Delta (u-u_h), z-\varphi_h)_K
+  + (\partial_n (u-u_h), z-z_h)_{\partial K}.
+ at f}
+Next we use that $-\Delta u=f$, and that $\partial_n u$ is a quantity that is
+continuous almost everywhere, so the terms involving $\partial_n u$ on one
+cell cancels with that on its neighbor, where the normal vector has the
+opposite sign. At the boundary of the domain, where there is no neighbor cell
+with which this term could cancel, the weight $z-\varphi_h$ can be chosen as
+zero, since $z$ has zero boundary values, and $\varphi_h$ can be chosen to
+have the same.
+
+Thus, we have
+ at f{eqnarray*}
+  J(e)
+  &=&
+  \sum_K (f+\Delta u_h, z-\varphi_h)_K
+  - (\partial_n u_h, z-\varphi_h)_{\partial K\backslash \partial\Omega}.
+ at f}
+In a final step, note that when taking the normal derivative of $u_h$, we mean
+the value of this quantity as taken from this side of the cell (for the usual
+Lagrange elements, derivatives are not continuous across edges). We then
+rewrite the above formula by exchanging half of the edge integral of cell $K$
+with the neighbor cell $K'$, to obtain
+ at f{eqnarray*}
+  J(e)
+  &=&
+  \sum_K (f+\Delta u_h, z-\varphi_h)_K
+  - \frac 12 (\partial_n u_h|_K + \partial_{n'} u_h|_{K'},
+              z-\varphi_h)_{\partial K\backslash \partial\Omega}.
+ at f}
+Using that for the normal vectors $n'=-n$ holds, we define the jump of the
+normal derivative by
+ at f[
+  [\partial_n u_h] := \partial_n u_h|_K + \partial_{n'} u_h|_{K'}
+  =
+  \partial_n u_h|_K - \partial_n u_h|_{K'},
+ at f]
+and get the final form after setting the discrete function $\varphi_h$, which
+is by now still arbitrary, to the point interpolation of the dual solution,
+$\varphi_h=I_h z$:
+ at f{eqnarray*}
+  J(e)
+  &=&
+  \sum_K (f+\Delta u_h, z-I_h z)_K
+  - \frac 12 ([\partial_n u_h],
+              z-I_h z)_{\partial K\backslash \partial\Omega}.
+ at f}
+
+With this, we have obtained an exact representation of the error of the finite
+element discretization with respect to arbitrary (linear) functionals
+$J(\cdot)$. Its structure is a weighted form of a residual estimator, as both
+$f+\Delta u_h$ and $[\partial_n u_h]$ are cell and edge residuals that vanish
+on the exact solution, and $z-I_h z$ are weights indicating how important the
+residuals on a certain cell is for the evaluation of the given functional.
+Furthermore, it is a cell-wise quantity, so we can use it as a mesh refinement
+criterion. The question, is: how to evaluate it? After all, the evaluation
+requires knowledge of the dual solution $z$, which carries the information
+about the quantity we want to know to best accuracy.
+
+In some, very special cases, this dual solution is known. For example, if the
+functional $J(\cdot)$ is the point evaluation, $J(\varphi)=\varphi(x_0)$, then
+the dual solution has to satisfy
+ at f[
+  -\Delta z = \delta(x-x_0),
+ at f]
+with the Dirac delta function on the right hand side, and the dual solution is
+the Green's function with respect to the point $x_0$. For simple geometries,
+this function is analytically known, and we could insert it into the error
+representation formula.
+
+However, we do not want to restrict ourselves to such special cases. Rather,
+we will compute the dual solution numerically, and approximate $z$ by some
+numerically obtained $\tilde z$. We note that it is not sufficient to compute
+this approximation $\tilde z$ using the same method as used for the primal
+solution $u_h$, since then $\tilde z-I_h \tilde z=0$, and the overall error
+estimate would be zero. Rather, the approximation $\tilde z$ has to be from a
+larger space than the primal finite element space. There are various ways to
+obtain such an approximation (see the cited literature), and we will choose to
+compute it with a higher order finite element space. While this is certainly
+not the most efficient way, it is simple since we already have all we need to
+do that in place, and it also allows for simple experimenting. For more
+efficient methods, again refer to the given literature, in particular
+ at ref step_14_BR95 "[BR95]", @ref step_14_BR03 "[BR03]".
+
+With this, we end the discussion of the mathematical side of this program and
+turn to the actual implementation.
+
+
+<h3>The software</h3>
+
+The step-14 example program builds heavily on the techniques already used in
+the step-13 program. Its implementation of the dual weighted residual error
+estimator explained above is done by deriving a second class, properly called
+<code>DualSolver</code>, from the <code>Solver</code> base class, and having a class
+(<code>WeightedResidual</code>) that joins the two again and controls the solution
+of the primal and dual problem, and then uses both to compute the error
+indicator for mesh refinement.
+
+The program continues the modular concept of the previous example, by
+implementing the dual functional, describing quantity of interest, by an
+abstract base class, and providing two different functionals which implement
+this interface. Adding a different quantity of interest is thus simple.
+
+One of the more fundamental differences is the handling of data. A common case
+is that you develop a program that solves a certain equation, and test it with
+different right hand sides, different domains, different coefficients and
+boundary values, etc. Usually, these have to match, so that exact solutions
+are known, or that their combination makes sense at all.
+
+We demonstrate a way how this can be achieved in a simple, yet very flexible
+way. We will put everything that belongs to a certain setup into one class,
+and provide a little C++ mortar around it, so that entire setups (domains,
+coefficients, right hand sides, etc.) can be exchanged by only changing
+something in <em>one</em> place.
+
+Going this way a little further, we have also centralized all the other
+parameters that describe how the program is to work in one place, such as the
+order of the finite element, the maximal number of degrees of freedom, the
+evaluation objects that shall be executed on the computed solutions, and so
+on. This allows for simpler configuration of the program, and we will show in
+a later program how to use a library class that can handle setting these
+parameters by reading an input file. The general aim is to reduce the places
+within a program where one may have to look when wanting to change some
+parameter, as it has turned out in practice that one forgets where they are as
+programs grow. Furthermore, putting all options describing what the program
+does in a certain run into a file (that can be stored with the results) helps
+repeatability of results more than if the various flags were set somewhere in
+the program, where their exact values are forgotten after the next change to
+this place.
+
+Unfortunately, the program has become rather long. While this admittedly
+reduces its usefulness as an example program, we think that it is a very good
+starting point for development of a program for other kinds of problems,
+involving different equations than the Laplace equation treated here.
+Furthermore, it shows everything that we can show you about our way of a
+posteriori error estimation, and its structure should make it simple for you
+to adjust this method to other problems, other functionals, other geometries,
+coefficients, etc.
+
+The author believes that the present program is his masterpiece among the
+example programs, regarding the mathematical complexity, as well as the
+simplicity to add extensions. If you use this program as a basis for your own
+programs, we would kindly like to ask you to state this fact and the name of
+the author of the example program, Wolfgang Bangerth, in publications that
+arise from that, of your program consists in a considerable part of the
+example program.
+
+
+<h3>Bibliography</h3>
+
+<dl>
+
+<dt> @anchor step_14_Ban00w [Ban00w]</dt>
+<dd>Wolfgang Bangerth.
+<br> Mesh adaptivity and error control for a finite element approximation
+  of the elastic wave equation.
+<br> In Alfredo Bermudez, Dolores Gomez, Christophe Hazard, Patrick
+  Joly, and Jean E. Roberts, editors, <em>Proceedings of the Fifth
+  International Conference on Mathematical and Numerical Aspects of Wave
+  Propagation (Waves2000), Santiago de Compostela, Spain, 2000</em>, pages
+  725–729. SIAM, 2000.
+
+
+
+<dt> @anchor step_14_Ban02 [Ban02]</dt>
+<dd>Wolfgang Bangerth.
+<br> <em>Adaptive Finite Element Methods for the Identification of
+  Distributed Coefficient in Partial Differential Equations</em>.
+<br> PhD thesis, University of Heidelberg, 2002.
+
+
+
+<dt> @anchor step_14_BR99b [BR99b]</dt>
+<dd>Wolfgang Bangerth and Rolf Rannacher.
+<br> Finite element approximation of the acoustic wave equation: Error
+  control and mesh adaptation.
+<br> <em>East–West J. Numer. Math.</em>, 7(4):263–282, 1999.
+
+
+
+<dt> @anchor step_14_BR03 [BR03]</dt>
+<dd>Wolfgang Bangerth and Rolf Rannacher.
+<br> <em>Adaptive Finite Element Methods for Differential Equations</em>.
+<br> Birkhäuser Verlag, Basel, 2003.
+
+
+
+<dt> @anchor step_14_BR01a [BR01a]</dt>
+<dd>Wolfgang Bangerth and Rolf Rannacher.
+<br> Adaptive finite element techniques for the acoustic wave equation.
+<br> <em>J. Comput. Acoustics</em>, 9(2):575–591, 2001.
+
+
+
+<dt> @anchor step_14_BR01 [BR01]</dt>
+<dd>Roland Becker and Rolf Rannacher.
+<br> An optimal control approach to error estimation and mesh adaptation
+  in finite element methods.
+<br> <em>Acta Numerica</em>, 10:1–102, 2001.
+
+
+
+<dt> @anchor step_14_Bec95 [Bec95]</dt>
+<dd>Roland Becker.
+<br> <em>An Adaptive Finite Element Method for the Incompressible
+  Navier-Stokes Equations on Time-dependent Domains</em>.
+<br> Dissertation, Universität Heidelberg, 1995.
+
+
+
+<dt> @anchor step_14_Bec98 [Bec98]</dt>
+<dd>Roland Becker.
+<br> Weighted error estimators for finite element approximations of the
+  incompressible Navier-Stokes equations.
+<br> Preprint 98-20, SFB 359, Universität Heidelberg, 1998.
+
+
+
+<dt> @anchor step_14_BR96r [BR96r]</dt>
+<dd>Roland Becker and Rolf Rannacher.
+<br> A feed-back approach to error control in finite element methods:
+  Basic analysis and examples.
+<br> <em>East–West J. Numer. Math.</em>, 4:237–264, 1996.
+
+
+
+<dt> @anchor step_14_BR95 [BR95]</dt>
+<dd>Roland Becker and Rolf Rannacher.
+<br> Weighted a posteriori error control in FE methods.
+<br> In H. G. Bock et al., ed.s, <em>ENUMATH 95</em>, pages 621–637,
+  Paris, September 1998. World Scientific Publ., Singapore.
+<br> in @ref step_14_enumath97 "[enumath97]".
+
+
+
+<dt> @anchor step_14_enumath97 [enumath97]</dt>
+<dd>Hans Georg Bock, Franco Brezzi, Roland Glowinsky, Guido Kanschat, Yuri A.
+  Kuznetsov, Jacques Periaux, and Rolf Rannacher, editors.
+<br> <em>ENUMATH 97, Proceedings of the 2nd European Conference on
+  Numerical Mathematics and Advanced Applications</em>, Singapore, 1998. World
+  Scientific.
+
+
+
+<dt> @anchor step_14_FK97 [FK97]</dt>
+<dd>Christian Führer and Guido Kanschat.
+<br> A posteriori error control in radiative transfer.
+<br> <em>Computing</em>, 58(4):317–334, 1997.
+
+
+
+<dt> @anchor step_14_Har02 [Har02]</dt>
+<dd>Ralf Hartmann.
+<br> <em>Adaptive Finite Element Methods for the Compressible Euler Equations</em>.
+<br> PhD thesis, University of Heidelberg, 2002.
+
+
+
+<dt> @anchor step_14_HH01 [HH01]</dt>
+<dd>Ralf Hartmann and Paul Houston.
+<br> Adaptive discontinuous Galerkin finite element methods for
+  nonlinear hyperbolic conservation laws.
+<br> <em>SIAM J. Sci. Comput.</em>, 24 (2002), pp. 979-1004.
+
+
+
+<dt> @anchor step_14_HH01b [HH01b]</dt>
+<dd>Ralf Hartmann and Paul Houston.
+<br> Adaptive discontinuous Galerkin finite element methods for the
+  compressible Euler equations.
+<br> J. Comput. Phys. 183 (2002), pp. 508-532.
+
+
+
+<dt> @anchor step_14_Kan96 [Kan96]</dt>
+<dd>Guido Kanschat.
+<br> <em>Parallel and Adaptive Galerkin Methods for Radiative Transfer
+  Problems</em>.
+<br> Dissertation, Universität Heidelberg, 1996.
+
+
+
+<dt> @anchor step_14_RS97 [RS97]</dt>
+<dd>Rolf Rannacher and Franz-Theo Suttmeier.
+<br> A feed-back approach to error control in finite element methods:
+  Application to linear elasticity.
+<br> <em>Comp. Mech.</em>, 19(5):434–446, 1997.
+
+
+
+<dt> @anchor step_14_RS98c [RS98c]</dt>
+<dd>Rolf Rannacher and Franz-Theo Suttmeier.
+<br> A posteriori error control in finite element methods via duality
+  techniques: Application to perfect plasticity.
+<br> <em>Comp. Mech.</em>, 21(2):123–133, 1998.
+
+
+
+<dt> @anchor step_14_RS99 [RS99]</dt>
+<dd>Rolf Rannacher and Franz-Theo Suttmeier.
+<br> A posteriori error control and mesh adaptation for finite element
+  models in elasticity and elasto-plasticity.
+<br> <em>Comput. Methods Appl. Mech. Engrg.</em>, pages 333–361, 1999.
+
+
+
+<dt> @anchor step_14_Sut96 [Sut96]</dt>
+<dd>Franz-Theo Suttmeier.
+<br> <em>Adaptive Finite Element Approximation of Problems in
+  Elasto-Plasticity Theory</em>.
+<br> Dissertation, Universität Heidelberg, 1996.
+
+
+
+</dl>
+
+
+
+
+
diff --git a/examples/step-14/doc/kind b/examples/step-14/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-14/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-14/doc/results.dox b/examples/step-14/doc/results.dox
new file mode 100644
index 0000000..1cb435b
--- /dev/null
+++ b/examples/step-14/doc/results.dox
@@ -0,0 +1,389 @@
+<h1>Results</h1>
+
+<h2>Point values</h2>
+
+
+This program offers a lot of possibilities to play around. We can thus
+only show a small part of all possible results that can be obtained
+with the help of this program. However, you are encouraged to just try
+it out, by changing the settings in the main program. Here, we start
+by simply letting it run, unmodified:
+ at code
+Refinement cycle: 0
+   Number of degrees of freedom=72
+   Point value=0.03243
+   Estimated error=0.000702385
+Refinement cycle: 1
+   Number of degrees of freedom=67
+   Point value=0.0324827
+   Estimated error=0.000888953
+Refinement cycle: 2
+   Number of degrees of freedom=130
+   Point value=0.0329619
+   Estimated error=0.000454606
+Refinement cycle: 3
+   Number of degrees of freedom=307
+   Point value=0.0331934
+   Estimated error=0.000241254
+Refinement cycle: 4
+   Number of degrees of freedom=718
+   Point value=0.0333675
+   Estimated error=7.4912e-05
+Refinement cycle: 5
+   Number of degrees of freedom=1665
+   Point value=0.0334083
+   Estimated error=3.69111e-05
+Refinement cycle: 6
+   Number of degrees of freedom=3975
+   Point value=0.033431
+   Estimated error=1.54218e-05
+Refinement cycle: 7
+   Number of degrees of freedom=8934
+   Point value=0.0334406
+   Estimated error=6.28359e-06
+Refinement cycle: 8
+   Number of degrees of freedom=21799
+   Point value=0.0334444
+ at endcode
+
+
+First let's look what the program actually computed. On the fifth
+grid, primal and dual numerical solutions look like this:
+<table align="center">
+  <tr>
+    <td width="50%">
+      <img src="http://www.dealii.org/images/steps/developer/step-14.point-value.solution-5.png" alt="">
+    </td>
+
+    <td width="50%">
+      <img src="http://www.dealii.org/images/steps/developer/step-14.point-value.solution-5-dual.png" alt="">
+    </td>
+  </tr>
+</table>
+Obviously, the region at the bottom left is so unimportant for the
+point value evaluation at the top right that the grid is left entirely
+unrefined there, even though the solution has singularities there! Due
+to the symmetry in right hand side and domain, the solution should
+actually look like at the top right in all four corners, but the mesh
+refinement criterion involving the dual solution chose to refine them
+differently.
+
+
+
+Looking at the grids that are produced in the course of subsequent
+refinement, here are some of them:
+
+<table width="80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.grid-0.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.grid-2.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.grid-4.png" alt="" width="100%"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.grid-5.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.grid-7.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.grid-8.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+Note the subtle interplay between resolving the corner singularities,
+and resolving around the point of evaluation. It will be rather
+difficult to generate such a mesh by hand, as this would involve to
+judge quantitatively how much which of the four corner singularities
+shall be resolved, and to set the weight compared to the vicinity of
+the evaluation point.
+
+
+
+The program prints the point value and the estimated error in this
+quantity. From extrapolating it, we can guess that the exact value is
+somewhat like 0.0334473, plus or minus 0.0000001 (note that we get
+almost 6 valid digits from only 22,000 (primal) degrees of
+freedom. This number cannot be obtained from the value of the
+functional alone, but I have used the assumption that the error
+estimator is mostly exact, and extrapolated the computed value plus
+the estimated error, to get an approximation of the true
+value. Computing with more degrees of freedom shows that this
+assumption is indeed valid.
+
+
+
+From the computed results, we can generate two graphs: one that shows
+the convergence of the error $J(u)-J(u_h)$ (taking the
+extrapolated value as correct) in the point value, and the value that
+we get by adding up computed value $J(u_h)$ and estimated
+error eta (if the error estimator $eta$ were exact, then the value
+$J(u_h)+\eta$ would equal the exact point value, and the error
+in this quantity would always be zero; however, since the error
+estimator is only a - good - approximation to the true error, we can
+by this only reduce the size of the error). In this graph, we also
+indicate the complexity ${\cal O}(1/N)$ to show that mesh refinement
+acts optimal in this case. The second chart compares
+true and estimated error, and shows that the two are actually very
+close to each other, even for such a complicated quantity as the point
+value:
+
+
+<table width="80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.error.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-value.error-estimation.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+
+<h2>Comparing refinement criteria</h2>
+
+
+Since we have accepted quite some effort when using the mesh
+refinement driven by the dual weighted error estimator (for solving
+the dual problem, and for evaluating the error representation), it is
+worth while asking whether that effort was successful. To this end, we
+first compare the achieved error levels for different mesh refinement
+criteria. To generate this data, simply change the value of the mesh
+refinement criterion variable in the main program. The results are
+thus (for the weight in the Kelly indicator, we have chosen the
+function $1/(r^2+0.1^2)$, where $r$
+is the distance to the evaluation point; it can be shown that this is
+the optimal weight if we neglect the effects of boundaries):
+
+<img src="http://www.dealii.org/images/steps/developer/step-14.point-value.error-comparison.png" alt="">
+
+
+
+Checking these numbers, we see that for global refinement, the error
+is proportional to $O(1/(sqrt(N) log(N)))$, and for the dual
+estimator $O(1/N)$. Generally speaking, we see that the dual
+weighted error estimator is better than the other refinement
+indicators, at least when compared with those that have a similarly
+regular behavior. The Kelly indicator produces smaller errors, but
+jumps about the picture rather irregularly, with the error also
+changing signs sometimes. Therefore, its behavior does not allow to
+extrapolate the results to larger values of N. Furthermore, if we
+trust the error estimates of the dual weighted error estimator, the
+results can be improved by adding the estimated error to the computed
+values. In terms of reliability, the weighted estimator is thus better
+than the Kelly indicator, although the latter sometimes produces
+smaller errors.
+
+
+
+<h2>Evaluation of point stresses</h2>
+
+
+Besides evaluating the values of the solution at a certain point, the
+program also offers the possibility to evaluate the x-derivatives at a
+certain point, and also to tailor mesh refinement for this. To let the
+program compute these quantities, simply replace the two occurrences of
+<code>PointValueEvaluation</code> in the main function by
+<code>PointXDerivativeEvaluation</code>, and let the program run:
+ at code
+Refinement cycle: 0
+   Number of degrees of freedom=72
+   Point x-derivative=-0.0719397
+   Estimated error=-0.0126173
+Refinement cycle: 1
+   Number of degrees of freedom=61
+   Point x-derivative=-0.0707956
+   Estimated error=-0.00774316
+Refinement cycle: 2
+   Number of degrees of freedom=131
+   Point x-derivative=-0.0568671
+   Estimated error=-0.00313426
+Refinement cycle: 3
+   Number of degrees of freedom=247
+   Point x-derivative=-0.053033
+   Estimated error=-0.00136114
+Refinement cycle: 4
+   Number of degrees of freedom=532
+   Point x-derivative=-0.0526429
+   Estimated error=-0.000558868
+Refinement cycle: 5
+   Number of degrees of freedom=1267
+   Point x-derivative=-0.0526955
+   Estimated error=-0.000220116
+Refinement cycle: 6
+   Number of degrees of freedom=2864
+   Point x-derivative=-0.0527495
+   Estimated error=-9.46731e-05
+Refinement cycle: 7
+   Number of degrees of freedom=6409
+   Point x-derivative=-0.052785
+   Estimated error=-4.21543e-05
+Refinement cycle: 8
+   Number of degrees of freedom=14183
+   Point x-derivative=-0.0528028
+   Estimated error=-2.04241e-05
+Refinement cycle: 9
+   Number of degrees of freedom=29902
+   Point x-derivative=-0.052814
+ at endcode
+
+
+
+The solution looks roughly the same as before (the exact solution of
+course <em>is</em> the same, only the grid changed a little), but the
+dual solution is now different. A close-up around the point of
+evaluation shows this:
+<table align="center">
+  <tr>
+    <td width="50%">
+      <img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.solution-5-dual.png" alt="">
+    </td>
+</table>
+This time, the grids in refinement cycles 0, 5, 6, 7, 8, and 9 look
+like this:
+
+<table align="center" width="80%">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.grid-0.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.grid-5.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.grid-6.png" alt="" width="100%"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.grid-7.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.grid-8.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.grid-9.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+Note the asymmetry of the grids compared with those we obtained for
+the point evaluation, which is due to the directionality of the
+x-derivative for which we tailored the refinement criterion.
+
+
+
+Then, it is interesting to compare actually computed values of the
+quantity of interest (i.e. the x-derivative of the solution at one
+point) with a reference value of -0.0528223... plus or minus
+0.0000005. We get this reference value by computing on finer grid after
+some more mesh refinements, with approximately 130,000 cells.
+Recall that if the error is $O(1/N)$ in the optimal case, then
+taking a mesh with ten times more cells gives us one additional digit
+in the result.
+
+
+
+In the left part of the following chart, you again see the convergence
+of the error towards this extrapolated value, while on the right you
+see a comparison of true and estimated error:
+
+<table width="80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.error.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.point-derivative.error-estimation.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+After an initial phase where the true error changes its sign, the
+estimated error matches it quite well, again. Also note the dramatic
+improvement in the error when using the estimated error to correct the
+computed value of $J(u_h)$.
+
+
+
+<h2>Step-13 revisited</h2>
+
+
+If instead of the <code>Exercise_2_3</code> data set, we choose
+<code>CurvedRidges</code> in the main function, we can redo the
+computations of the previous example program, to compare whether the
+results obtained with the help of the dual weighted error estimator
+are better than those we had previously.
+
+
+
+First, the meshes after 9 and 10 adaptive refinement cycles,
+respectively, look like this:
+
+<table width="80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.step-13.grid-9.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.step-13.grid-10.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+The features of the solution can still be seen slightly, but since the
+solution is smooth, the roughness of the dual solution entirely
+dominates the mesh refinement criterion, and leads to strongly
+concentrated meshes. The solution after the seventh refinement step is
+like so:
+
+<table width="80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-14.step-13.solution-7.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+Obviously, the solution is worse at some places, but the mesh
+refinement process should have taken care that these places are not
+important for computing the point value.
+
+
+
+
+The next point is to compare the new (duality based) mesh refinement
+criterion with the old ones. These are the results:
+
+<img src="http://www.dealii.org/images/steps/developer/step-14.step-13.error-comparison.png" alt="">
+
+
+
+The results are, well, somewhat mixed. First, the Kelly indicator
+disqualifies itself by its unsteady behavior, changing the sign of the
+error several times, and with increasing errors under mesh
+refinement. The dual weighted error estimator has a monotone decrease
+in the error, and is better than the weighted Kelly and global
+refinement, but the margin is not as large as expected. This is, here,
+due to the fact the global refinement can exploit the regular
+structure of the meshes around the point of evaluation, which leads to
+a better order of convergence for the point error. However, if we had
+a mesh that is not locally rectangular, for example because we had to
+approximate curved boundaries, or if the coefficients were not
+constant, then this advantage of globally refinement meshes would
+vanish, while the good performance of the duality based estimator
+would remain.
+
+
+
+
+<h2>Conclusions and outlook</h2>
+
+
+The results here are not too clearly indicating the superiority of the
+dual weighted error estimation approach for mesh refinement over other
+mesh refinement criteria, such as the Kelly indicator. This is due to
+the relative simplicity of the shown applications. If you are not
+convinced yet that this approach is indeed superior, you are invited
+to browse through the literature indicated in the introduction, where
+plenty of examples are provided where the dual weighted approach can
+reduce the necessary numerical work by orders of magnitude, making
+this the only way to compute certain quantities to reasonable
+accuracies at all.
+
+
+
+Besides the objections you may raise against its use as a mesh
+refinement criterion, consider that accurate knowledge of the error in
+the quantity one might want to compute is of great use, since we can
+stop computations when we are satisfied with the accuracy. Using more
+traditional approaches, it is very difficult to get accurate estimates
+for arbitrary quantities, except for, maybe, the error in the energy
+norm, and we will then have no guarantee that the result we computed
+satisfies any requirements on its accuracy. Also, as was shown for the
+evaluation of point values and derivatives, the error estimate can be
+used to extrapolate the results, yielding much higher accuracy in the
+quantity we want to know.
+
+
+
+Leaving these mathematical considerations, we tried to write the
+program in a modular way, such that implementing another test case, or
+another evaluation and dual functional is simple. You are encouraged
+to take the program as a basis for your own experiments, and to play a
+little.
+
+
+
+
+
diff --git a/examples/step-14/doc/tooltip b/examples/step-14/doc/tooltip
new file mode 100644
index 0000000..9e6cabd
--- /dev/null
+++ b/examples/step-14/doc/tooltip
@@ -0,0 +1 @@
+Duality based error estimates. Adaptivity.
diff --git a/examples/step-14/step-14.cc b/examples/step-14/step-14.cc
new file mode 100644
index 0000000..44c52c4
--- /dev/null
+++ b/examples/step-14/step-14.cc
@@ -0,0 +1,3088 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2002 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, ETH Zurich, 2002
+ */
+
+
+// Start out with well known things...
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+#include <iostream>
+#include <fstream>
+#include <list>
+#include <algorithm>
+#include <numeric>
+#include <sstream>
+
+// The last step is as in all previous programs:
+namespace Step14
+{
+  using namespace dealii;
+
+  // @sect3{Evaluating the solution}
+
+  // As mentioned in the introduction, significant parts of the program have
+  // simply been taken over from the step-13 example program. We therefore
+  // only comment on those things that are new.
+  //
+  // First, the framework for evaluation of solutions is unchanged, i.e. the
+  // base class is the same, and the class to evaluate the solution at a grid
+  // point is unchanged:
+  namespace Evaluation
+  {
+    // @sect4{The EvaluationBase class}
+    template <int dim>
+    class EvaluationBase
+    {
+    public:
+      virtual ~EvaluationBase ();
+
+      void set_refinement_cycle (const unsigned int refinement_cycle);
+
+      virtual void operator () (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution) const = 0;
+    protected:
+      unsigned int refinement_cycle;
+    };
+
+
+    template <int dim>
+    EvaluationBase<dim>::~EvaluationBase ()
+    {}
+
+
+
+    template <int dim>
+    void
+    EvaluationBase<dim>::set_refinement_cycle (const unsigned int step)
+    {
+      refinement_cycle = step;
+    }
+
+
+    // @sect4{The PointValueEvaluation class}
+    template <int dim>
+    class PointValueEvaluation : public EvaluationBase<dim>
+    {
+    public:
+      PointValueEvaluation (const Point<dim>   &evaluation_point);
+
+      virtual void operator () (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution) const;
+
+      DeclException1 (ExcEvaluationPointNotFound,
+                      Point<dim>,
+                      << "The evaluation point " << arg1
+                      << " was not found among the vertices of the present grid.");
+    private:
+      const Point<dim>  evaluation_point;
+    };
+
+
+    template <int dim>
+    PointValueEvaluation<dim>::
+    PointValueEvaluation (const Point<dim>   &evaluation_point)
+      :
+      evaluation_point (evaluation_point)
+    {}
+
+
+
+    template <int dim>
+    void
+    PointValueEvaluation<dim>::
+    operator () (const DoFHandler<dim> &dof_handler,
+                 const Vector<double>  &solution) const
+    {
+      double point_value = 1e20;
+
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      bool evaluation_point_found = false;
+      for (; (cell!=endc) && !evaluation_point_found; ++cell)
+        for (unsigned int vertex=0;
+             vertex<GeometryInfo<dim>::vertices_per_cell;
+             ++vertex)
+          if (cell->vertex(vertex).distance (evaluation_point)
+              <
+              cell->diameter() * 1e-8)
+            {
+              point_value = solution(cell->vertex_dof_index(vertex,0));
+
+              evaluation_point_found = true;
+              break;
+            }
+
+      AssertThrow (evaluation_point_found,
+                   ExcEvaluationPointNotFound(evaluation_point));
+
+      std::cout << "   Point value=" << point_value
+                << std::endl;
+    }
+
+
+    // @sect4{The PointXDerivativeEvaluation class}
+
+    // Besides the class implementing the evaluation of the solution at one
+    // point, we here provide one which evaluates the gradient at a grid
+    // point. Since in general the gradient of a finite element function is
+    // not continuous at a vertex, we have to be a little bit more careful
+    // here. What we do is to loop over all cells, even if we have found the
+    // point already on one cell, and use the mean value of the gradient at
+    // the vertex taken from all adjacent cells.
+    //
+    // Given the interface of the <code>PointValueEvaluation</code> class, the
+    // declaration of this class provides little surprise, and neither does
+    // the constructor:
+    template <int dim>
+    class PointXDerivativeEvaluation : public EvaluationBase<dim>
+    {
+    public:
+      PointXDerivativeEvaluation (const Point<dim>   &evaluation_point);
+
+      virtual void operator () (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution) const;
+
+      DeclException1 (ExcEvaluationPointNotFound,
+                      Point<dim>,
+                      << "The evaluation point " << arg1
+                      << " was not found among the vertices of the present grid.");
+    private:
+      const Point<dim>  evaluation_point;
+    };
+
+
+    template <int dim>
+    PointXDerivativeEvaluation<dim>::
+    PointXDerivativeEvaluation (const Point<dim>   &evaluation_point)
+      :
+      evaluation_point (evaluation_point)
+    {}
+
+
+    // The more interesting things happen inside the function doing the actual
+    // evaluation:
+    template <int dim>
+    void
+    PointXDerivativeEvaluation<dim>::
+    operator () (const DoFHandler<dim> &dof_handler,
+                 const Vector<double>  &solution) const
+    {
+      // This time initialize the return value with something useful, since we
+      // will have to add up a number of contributions and take the mean value
+      // afterwards...
+      double point_derivative = 0;
+
+      // ...then have some objects of which the meaning will become clear
+      // below...
+      QTrapez<dim>  vertex_quadrature;
+      FEValues<dim> fe_values (dof_handler.get_fe(),
+                               vertex_quadrature,
+                               update_gradients | update_quadrature_points);
+      std::vector<Tensor<1,dim> >
+      solution_gradients (vertex_quadrature.size());
+
+      // ...and next loop over all cells and their vertices, and count how
+      // often the vertex has been found:
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      unsigned int evaluation_point_hits = 0;
+      for (; cell!=endc; ++cell)
+        for (unsigned int vertex=0;
+             vertex<GeometryInfo<dim>::vertices_per_cell;
+             ++vertex)
+          if (cell->vertex(vertex) == evaluation_point)
+            {
+              // Things are now no more as simple, since we can't get the
+              // gradient of the finite element field as before, where we
+              // simply had to pick one degree of freedom at a vertex.
+              //
+              // Rather, we have to evaluate the finite element field on this
+              // cell, and at a certain point. As you know, evaluating finite
+              // element fields at certain points is done through the
+              // <code>FEValues</code> class, so we use that. The question is:
+              // the <code>FEValues</code> object needs to be a given a
+              // quadrature formula and can then compute the values of finite
+              // element quantities at the quadrature points. Here, we don't
+              // want to do quadrature, we simply want to specify some points!
+              //
+              // Nevertheless, the same way is chosen: use a special
+              // quadrature rule with points at the vertices, since these are
+              // what we are interested in. The appropriate rule is the
+              // trapezoidal rule, so that is the reason why we used that one
+              // above.
+              //
+              // Thus: initialize the <code>FEValues</code> object on this
+              // cell,
+              fe_values.reinit (cell);
+              // and extract the gradients of the solution vector at the
+              // vertices:
+              fe_values.get_function_gradients (solution,
+                                                solution_gradients);
+
+              // Now we have the gradients at all vertices, so pick out that
+              // one which belongs to the evaluation point (note that the
+              // order of vertices is not necessarily the same as that of the
+              // quadrature points):
+              unsigned int q_point = 0;
+              for (; q_point<solution_gradients.size(); ++q_point)
+                if (fe_values.quadrature_point(q_point) ==
+                    evaluation_point)
+                  break;
+
+              // Check that the evaluation point was indeed found,
+              Assert (q_point < solution_gradients.size(),
+                      ExcInternalError());
+              // and if so take the x-derivative of the gradient there as the
+              // value which we are interested in, and increase the counter
+              // indicating how often we have added to that variable:
+              point_derivative += solution_gradients[q_point][0];
+              ++evaluation_point_hits;
+
+              // Finally break out of the innermost loop iterating over the
+              // vertices of the present cell, since if we have found the
+              // evaluation point at one vertex it cannot be at a following
+              // vertex as well:
+              break;
+            }
+
+      // Now we have looped over all cells and vertices, so check whether the
+      // point was found:
+      AssertThrow (evaluation_point_hits > 0,
+                   ExcEvaluationPointNotFound(evaluation_point));
+
+      // We have simply summed up the contributions of all adjacent cells, so
+      // we still have to compute the mean value. Once this is done, report
+      // the status:
+      point_derivative /= evaluation_point_hits;
+      std::cout << "   Point x-derivative=" << point_derivative
+                << std::endl;
+    }
+
+
+
+    // @sect4{The GridOutput class}
+
+    // Since this program has a more difficult structure (it computed a dual
+    // solution in addition to a primal one), writing out the solution is no
+    // more done by an evaluation object since we want to write both solutions
+    // at once into one file, and that requires some more information than
+    // available to the evaluation classes.
+    //
+    // However, we also want to look at the grids generated. This again can be
+    // done with one such class. Its structure is analog to the
+    // <code>SolutionOutput</code> class of the previous example program, so
+    // we do not discuss it here in more detail. Furthermore, everything that
+    // is used here has already been used in previous example programs.
+    template <int dim>
+    class GridOutput : public EvaluationBase<dim>
+    {
+    public:
+      GridOutput (const std::string &output_name_base);
+
+      virtual void operator () (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution) const;
+    private:
+      const std::string output_name_base;
+    };
+
+
+    template <int dim>
+    GridOutput<dim>::
+    GridOutput (const std::string &output_name_base)
+      :
+      output_name_base (output_name_base)
+    {}
+
+
+    template <int dim>
+    void
+    GridOutput<dim>::operator () (const DoFHandler<dim> &dof_handler,
+                                  const Vector<double>  &/*solution*/) const
+    {
+      std::ostringstream filename;
+      filename << output_name_base << "-"
+               << this->refinement_cycle
+               << ".eps"
+               << std::ends;
+
+      std::ofstream out (filename.str().c_str());
+      GridOut().write_eps (dof_handler.get_triangulation(), out);
+    }
+  }
+
+
+  // @sect3{The Laplace solver classes}
+
+  // Next are the actual solver classes. Again, we discuss only the
+  // differences to the previous program.
+  namespace LaplaceSolver
+  {
+    // @sect4{The Laplace solver base class}
+
+    // This class is almost unchanged, with the exception that it declares two
+    // more functions: <code>output_solution</code> will be used to generate
+    // output files from the actual solutions computed by derived classes, and
+    // the <code>set_refinement_cycle</code> function by which the testing
+    // framework sets the number of the refinement cycle to a local variable
+    // in this class; this number is later used to generate filenames for the
+    // solution output.
+    template <int dim>
+    class Base
+    {
+    public:
+      Base (Triangulation<dim> &coarse_grid);
+      virtual ~Base ();
+
+      virtual void solve_problem () = 0;
+      virtual void postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const = 0;
+      virtual void refine_grid () = 0;
+      virtual unsigned int n_dofs () const = 0;
+
+      virtual void set_refinement_cycle (const unsigned int cycle);
+
+      virtual void output_solution () const = 0;
+
+    protected:
+      const SmartPointer<Triangulation<dim> > triangulation;
+
+      unsigned int refinement_cycle;
+    };
+
+
+    template <int dim>
+    Base<dim>::Base (Triangulation<dim> &coarse_grid)
+      :
+      triangulation (&coarse_grid)
+    {}
+
+
+    template <int dim>
+    Base<dim>::~Base ()
+    {}
+
+
+
+    template <int dim>
+    void
+    Base<dim>::set_refinement_cycle (const unsigned int cycle)
+    {
+      refinement_cycle = cycle;
+    }
+
+
+    // @sect4{The Laplace Solver class}
+
+    // Likewise, the <code>Solver</code> class is entirely unchanged and will
+    // thus not be discussed.
+    template <int dim>
+    class Solver : public virtual Base<dim>
+    {
+    public:
+      Solver (Triangulation<dim>       &triangulation,
+              const FiniteElement<dim> &fe,
+              const Quadrature<dim>    &quadrature,
+              const Quadrature<dim-1>  &face_quadrature,
+              const Function<dim>      &boundary_values);
+      virtual
+      ~Solver ();
+
+      virtual
+      void
+      solve_problem ();
+
+      virtual
+      void
+      postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const;
+
+      virtual
+      unsigned int
+      n_dofs () const;
+
+    protected:
+      const SmartPointer<const FiniteElement<dim> >  fe;
+      const SmartPointer<const Quadrature<dim> >     quadrature;
+      const SmartPointer<const Quadrature<dim-1> >   face_quadrature;
+      DoFHandler<dim>                                dof_handler;
+      Vector<double>                                 solution;
+      const SmartPointer<const Function<dim> >       boundary_values;
+
+      virtual void assemble_rhs (Vector<double> &rhs) const = 0;
+
+    private:
+      struct LinearSystem
+      {
+        LinearSystem (const DoFHandler<dim> &dof_handler);
+
+        void solve (Vector<double> &solution) const;
+
+        ConstraintMatrix     hanging_node_constraints;
+        SparsityPattern      sparsity_pattern;
+        SparseMatrix<double> matrix;
+        Vector<double>       rhs;
+      };
+
+
+      // The remainder of the class is essentially a copy of step-13
+      // as well, including the data structures and functions
+      // necessary to compute the linear system in parallel using the
+      // WorkStream framework:
+      struct AssemblyScratchData
+      {
+        AssemblyScratchData (const FiniteElement<dim> &fe,
+                             const Quadrature<dim>    &quadrature);
+        AssemblyScratchData (const AssemblyScratchData &scratch_data);
+
+        FEValues<dim>     fe_values;
+      };
+
+      struct AssemblyCopyData
+      {
+        FullMatrix<double> cell_matrix;
+        std::vector<types::global_dof_index> local_dof_indices;
+      };
+
+
+      void
+      assemble_linear_system (LinearSystem &linear_system);
+
+      void
+      local_assemble_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                             AssemblyScratchData                                  &scratch_data,
+                             AssemblyCopyData                                     &copy_data) const;
+
+
+      void
+      copy_local_to_global(const AssemblyCopyData &copy_data,
+                           LinearSystem           &linear_system) const;
+    };
+
+
+
+    template <int dim>
+    Solver<dim>::Solver (Triangulation<dim>       &triangulation,
+                         const FiniteElement<dim> &fe,
+                         const Quadrature<dim>    &quadrature,
+                         const Quadrature<dim-1>  &face_quadrature,
+                         const Function<dim>      &boundary_values)
+      :
+      Base<dim> (triangulation),
+      fe (&fe),
+      quadrature (&quadrature),
+      face_quadrature (&face_quadrature),
+      dof_handler (triangulation),
+      boundary_values (&boundary_values)
+    {}
+
+
+    template <int dim>
+    Solver<dim>::~Solver ()
+    {
+      dof_handler.clear ();
+    }
+
+
+    template <int dim>
+    void
+    Solver<dim>::solve_problem ()
+    {
+      dof_handler.distribute_dofs (*fe);
+      solution.reinit (dof_handler.n_dofs());
+
+      LinearSystem linear_system (dof_handler);
+      assemble_linear_system (linear_system);
+      linear_system.solve (solution);
+    }
+
+
+    template <int dim>
+    void
+    Solver<dim>::
+    postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const
+    {
+      postprocessor (dof_handler, solution);
+    }
+
+
+    template <int dim>
+    unsigned int
+    Solver<dim>::n_dofs () const
+    {
+      return dof_handler.n_dofs();
+    }
+
+
+    // The following few functions and constructors are verbatim
+    // copies taken from step-13:
+    template <int dim>
+    void
+    Solver<dim>::assemble_linear_system (LinearSystem &linear_system)
+    {
+      Threads::Task<> rhs_task = Threads::new_task (&Solver<dim>::assemble_rhs,
+                                                    *this,
+                                                    linear_system.rhs);
+
+      WorkStream::run(dof_handler.begin_active(),
+                      dof_handler.end(),
+                      std_cxx11::bind(&Solver<dim>::local_assemble_matrix,
+                                      this,
+                                      std_cxx11::_1,
+                                      std_cxx11::_2,
+                                      std_cxx11::_3),
+                      std_cxx11::bind(&Solver<dim>::copy_local_to_global,
+                                      this,
+                                      std_cxx11::_1,
+                                      std_cxx11::ref(linear_system)),
+                      AssemblyScratchData(*fe, *quadrature),
+                      AssemblyCopyData());
+      linear_system.hanging_node_constraints.condense (linear_system.matrix);
+
+      std::map<types::global_dof_index,double> boundary_value_map;
+      VectorTools::interpolate_boundary_values (dof_handler,
+                                                0,
+                                                *boundary_values,
+                                                boundary_value_map);
+
+      rhs_task.join ();
+      linear_system.hanging_node_constraints.condense (linear_system.rhs);
+
+      MatrixTools::apply_boundary_values (boundary_value_map,
+                                          linear_system.matrix,
+                                          solution,
+                                          linear_system.rhs);
+    }
+
+
+    template <int dim>
+    Solver<dim>::AssemblyScratchData::
+    AssemblyScratchData (const FiniteElement<dim> &fe,
+                         const Quadrature<dim>    &quadrature)
+      :
+      fe_values (fe,
+                 quadrature,
+                 update_gradients | update_JxW_values)
+    {}
+
+
+    template <int dim>
+    Solver<dim>::AssemblyScratchData::
+    AssemblyScratchData (const AssemblyScratchData &scratch_data)
+      :
+      fe_values (scratch_data.fe_values.get_fe(),
+                 scratch_data.fe_values.get_quadrature(),
+                 update_gradients | update_JxW_values)
+    {}
+
+
+    template <int dim>
+    void
+    Solver<dim>::local_assemble_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                        AssemblyScratchData                                  &scratch_data,
+                                        AssemblyCopyData                                     &copy_data) const
+    {
+      const unsigned int dofs_per_cell = fe->dofs_per_cell;
+      const unsigned int n_q_points    = quadrature->size();
+
+      copy_data.cell_matrix.reinit (dofs_per_cell, dofs_per_cell);
+
+      copy_data.local_dof_indices.resize(dofs_per_cell);
+
+      scratch_data.fe_values.reinit (cell);
+
+      for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            copy_data.cell_matrix(i,j) += (scratch_data.fe_values.shape_grad(i,q_point) *
+                                           scratch_data.fe_values.shape_grad(j,q_point) *
+                                           scratch_data.fe_values.JxW(q_point));
+
+      cell->get_dof_indices (copy_data.local_dof_indices);
+    }
+
+
+
+    template <int dim>
+    void
+    Solver<dim>::copy_local_to_global(const AssemblyCopyData &copy_data,
+                                      LinearSystem           &linear_system) const
+    {
+      for (unsigned int i=0; i<copy_data.local_dof_indices.size(); ++i)
+        for (unsigned int j=0; j<copy_data.local_dof_indices.size(); ++j)
+          linear_system.matrix.add (copy_data.local_dof_indices[i],
+                                    copy_data.local_dof_indices[j],
+                                    copy_data.cell_matrix(i,j));
+    }
+
+
+    // Now for the functions that implement actions in the linear
+    // system class. First, the constructor initializes all data
+    // elements to their correct sizes, and sets up a number of
+    // additional data structures, such as constraints due to hanging
+    // nodes. Since setting up the hanging nodes and finding out about
+    // the nonzero elements of the matrix is independent, we do that
+    // in parallel (if the library was configured to use concurrency,
+    // at least; otherwise, the actions are performed
+    // sequentially). Note that we start only one thread, and do the
+    // second action in the main thread. Since only one thread is
+    // generated, we don't use the <code>Threads::ThreadGroup</code>
+    // class here, but rather use the one created thread object
+    // directly to wait for this particular thread's exit. The
+    // approach is generally the same as the one we have used in
+    // <code>Solver::assemble_linear_system()</code> above.
+    //
+    // Note that taking the address of the
+    // <code>DoFTools::make_hanging_node_constraints</code> function
+    // is a little tricky, since there are actually three functions of
+    // this name, one for each supported space dimension. Taking
+    // addresses of overloaded functions is somewhat complicated in
+    // C++, since the address-of operator <code>&</code> in that case
+    // returns a set of values (the addresses of all
+    // functions with that name), and selecting the right one is then
+    // the next step. If the context dictates which one to take (for
+    // example by assigning to a function pointer of known type), then
+    // the compiler can do that by itself, but if this set of pointers
+    // shall be given as the argument to a function that takes a
+    // template, the compiler could choose all without having a
+    // preference for one. We therefore have to make it clear to the
+    // compiler which one we would like to have; for this, we could
+    // use a cast, but for more clarity, we assign it to a temporary
+    // <code>mhnc_p</code> (short for <code>pointer to
+    // make_hanging_node_constraints</code>) with the right type, and
+    // using this pointer instead.
+    template <int dim>
+    Solver<dim>::LinearSystem::
+    LinearSystem (const DoFHandler<dim> &dof_handler)
+    {
+      hanging_node_constraints.clear ();
+
+      void (*mhnc_p) (const DoFHandler<dim> &,
+                      ConstraintMatrix &)
+        = &DoFTools::make_hanging_node_constraints;
+
+      // Start a side task then continue on the main thread
+      Threads::Task<> side_task
+        = Threads::new_task (mhnc_p,
+                             dof_handler,
+                             hanging_node_constraints);
+
+      DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+      DoFTools::make_sparsity_pattern (dof_handler, dsp);
+
+
+
+      // Wait for the side task to be done before going further
+      side_task.join();
+
+      hanging_node_constraints.close ();
+      hanging_node_constraints.condense (dsp);
+      sparsity_pattern.copy_from(dsp);
+
+      matrix.reinit (sparsity_pattern);
+      rhs.reinit (dof_handler.n_dofs());
+    }
+
+
+
+    template <int dim>
+    void
+    Solver<dim>::LinearSystem::solve (Vector<double> &solution) const
+    {
+      SolverControl           solver_control (5000, 1e-12);
+      SolverCG<>              cg (solver_control);
+
+      PreconditionSSOR<> preconditioner;
+      preconditioner.initialize(matrix, 1.2);
+
+      cg.solve (matrix, solution, rhs, preconditioner);
+
+      hanging_node_constraints.distribute (solution);
+    }
+
+
+
+
+    // @sect4{The PrimalSolver class}
+
+    // The <code>PrimalSolver</code> class is also mostly unchanged except for
+    // implementing the <code>output_solution</code> function. We keep the
+    // <code>GlobalRefinement</code> and <code>RefinementKelly</code> classes
+    // in this program, and they can then rely on the default implementation
+    // of this function which simply outputs the primal solution. The class
+    // implementing dual weighted error estimators will overload this function
+    // itself, to also output the dual solution.
+    template <int dim>
+    class PrimalSolver : public Solver<dim>
+    {
+    public:
+      PrimalSolver (Triangulation<dim>       &triangulation,
+                    const FiniteElement<dim> &fe,
+                    const Quadrature<dim>    &quadrature,
+                    const Quadrature<dim-1>  &face_quadrature,
+                    const Function<dim>      &rhs_function,
+                    const Function<dim>      &boundary_values);
+
+      virtual
+      void output_solution () const;
+
+    protected:
+      const SmartPointer<const Function<dim> > rhs_function;
+      virtual void assemble_rhs (Vector<double> &rhs) const;
+    };
+
+
+    template <int dim>
+    PrimalSolver<dim>::
+    PrimalSolver (Triangulation<dim>       &triangulation,
+                  const FiniteElement<dim> &fe,
+                  const Quadrature<dim>    &quadrature,
+                  const Quadrature<dim-1>  &face_quadrature,
+                  const Function<dim>      &rhs_function,
+                  const Function<dim>      &boundary_values)
+      :
+      Base<dim> (triangulation),
+      Solver<dim> (triangulation, fe,
+                   quadrature, face_quadrature,
+                   boundary_values),
+      rhs_function (&rhs_function)
+    {}
+
+
+
+    template <int dim>
+    void
+    PrimalSolver<dim>::output_solution () const
+    {
+      DataOut<dim> data_out;
+      data_out.attach_dof_handler (this->dof_handler);
+      data_out.add_data_vector (this->solution, "solution");
+      data_out.build_patches ();
+
+      std::ostringstream filename;
+      filename << "solution-"
+               << this->refinement_cycle
+               << ".gnuplot"
+               << std::ends;
+
+      std::ofstream out (filename.str().c_str());
+      data_out.write (out, DataOutBase::gnuplot);
+    }
+
+
+
+    template <int dim>
+    void
+    PrimalSolver<dim>::
+    assemble_rhs (Vector<double> &rhs) const
+    {
+      FEValues<dim> fe_values (*this->fe, *this->quadrature,
+                               update_values  | update_quadrature_points  |
+                               update_JxW_values);
+
+      const unsigned int   dofs_per_cell = this->fe->dofs_per_cell;
+      const unsigned int   n_q_points    = this->quadrature->size();
+
+      Vector<double>       cell_rhs (dofs_per_cell);
+      std::vector<double>  rhs_values (n_q_points);
+      std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = this->dof_handler.begin_active(),
+      endc = this->dof_handler.end();
+      for (; cell!=endc; ++cell)
+        {
+          cell_rhs = 0;
+
+          fe_values.reinit (cell);
+
+          rhs_function->value_list (fe_values.get_quadrature_points(),
+                                    rhs_values);
+
+          for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              cell_rhs(i) += (fe_values.shape_value(i,q_point) *
+                              rhs_values[q_point] *
+                              fe_values.JxW(q_point));
+
+          cell->get_dof_indices (local_dof_indices);
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            rhs(local_dof_indices[i]) += cell_rhs(i);
+        }
+    }
+
+
+    // @sect4{The RefinementGlobal and RefinementKelly classes}
+
+    // For the following two classes, the same applies as for most of the
+    // above: the class is taken from the previous example as-is:
+    template <int dim>
+    class RefinementGlobal : public PrimalSolver<dim>
+    {
+    public:
+      RefinementGlobal (Triangulation<dim>       &coarse_grid,
+                        const FiniteElement<dim> &fe,
+                        const Quadrature<dim>    &quadrature,
+                        const Quadrature<dim-1>  &face_quadrature,
+                        const Function<dim>      &rhs_function,
+                        const Function<dim>      &boundary_values);
+
+      virtual void refine_grid ();
+    };
+
+
+
+    template <int dim>
+    RefinementGlobal<dim>::
+    RefinementGlobal (Triangulation<dim>       &coarse_grid,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<dim>    &quadrature,
+                      const Quadrature<dim-1>  &face_quadrature,
+                      const Function<dim>      &rhs_function,
+                      const Function<dim>      &boundary_values)
+      :
+      Base<dim> (coarse_grid),
+      PrimalSolver<dim> (coarse_grid, fe, quadrature,
+                         face_quadrature, rhs_function,
+                         boundary_values)
+    {}
+
+
+
+    template <int dim>
+    void
+    RefinementGlobal<dim>::refine_grid ()
+    {
+      this->triangulation->refine_global (1);
+    }
+
+
+
+    template <int dim>
+    class RefinementKelly : public PrimalSolver<dim>
+    {
+    public:
+      RefinementKelly (Triangulation<dim>       &coarse_grid,
+                       const FiniteElement<dim> &fe,
+                       const Quadrature<dim>    &quadrature,
+                       const Quadrature<dim-1>  &face_quadrature,
+                       const Function<dim>      &rhs_function,
+                       const Function<dim>      &boundary_values);
+
+      virtual void refine_grid ();
+    };
+
+
+
+    template <int dim>
+    RefinementKelly<dim>::
+    RefinementKelly (Triangulation<dim>       &coarse_grid,
+                     const FiniteElement<dim> &fe,
+                     const Quadrature<dim>    &quadrature,
+                     const Quadrature<dim-1>  &face_quadrature,
+                     const Function<dim>      &rhs_function,
+                     const Function<dim>      &boundary_values)
+      :
+      Base<dim> (coarse_grid),
+      PrimalSolver<dim> (coarse_grid, fe, quadrature,
+                         face_quadrature,
+                         rhs_function, boundary_values)
+    {}
+
+
+
+    template <int dim>
+    void
+    RefinementKelly<dim>::refine_grid ()
+    {
+      Vector<float> estimated_error_per_cell (this->triangulation->n_active_cells());
+      KellyErrorEstimator<dim>::estimate (this->dof_handler,
+                                          QGauss<dim-1>(3),
+                                          typename FunctionMap<dim>::type(),
+                                          this->solution,
+                                          estimated_error_per_cell);
+      GridRefinement::refine_and_coarsen_fixed_number (*this->triangulation,
+                                                       estimated_error_per_cell,
+                                                       0.3, 0.03);
+      this->triangulation->execute_coarsening_and_refinement ();
+    }
+
+
+
+    // @sect4{The RefinementWeightedKelly class}
+
+    // This class is a variant of the previous one, in that it allows to
+    // weight the refinement indicators we get from the library's Kelly
+    // indicator by some function. We include this class since the goal of
+    // this example program is to demonstrate automatic refinement criteria
+    // even for complex output quantities such as point values or stresses. If
+    // we did not solve a dual problem and compute the weights thereof, we
+    // would probably be tempted to give a hand-crafted weighting to the
+    // indicators to account for the fact that we are going to evaluate these
+    // quantities. This class accepts such a weighting function as argument to
+    // its constructor:
+    template <int dim>
+    class RefinementWeightedKelly : public PrimalSolver<dim>
+    {
+    public:
+      RefinementWeightedKelly (Triangulation<dim>       &coarse_grid,
+                               const FiniteElement<dim> &fe,
+                               const Quadrature<dim>    &quadrature,
+                               const Quadrature<dim-1>  &face_quadrature,
+                               const Function<dim>      &rhs_function,
+                               const Function<dim>      &boundary_values,
+                               const Function<dim>      &weighting_function);
+
+      virtual void refine_grid ();
+
+    private:
+      const SmartPointer<const Function<dim> > weighting_function;
+    };
+
+
+
+    template <int dim>
+    RefinementWeightedKelly<dim>::
+    RefinementWeightedKelly (Triangulation<dim>       &coarse_grid,
+                             const FiniteElement<dim> &fe,
+                             const Quadrature<dim>    &quadrature,
+                             const Quadrature<dim-1>  &face_quadrature,
+                             const Function<dim>      &rhs_function,
+                             const Function<dim>      &boundary_values,
+                             const Function<dim>      &weighting_function)
+      :
+      Base<dim> (coarse_grid),
+      PrimalSolver<dim> (coarse_grid, fe, quadrature,
+                         face_quadrature,
+                         rhs_function, boundary_values),
+      weighting_function (&weighting_function)
+    {}
+
+
+
+    // Now, here comes the main function, including the weighting:
+    template <int dim>
+    void
+    RefinementWeightedKelly<dim>::refine_grid ()
+    {
+      // First compute some residual based error indicators for all cells by a
+      // method already implemented in the library. What exactly we compute
+      // here is described in more detail in the documentation of that class.
+      Vector<float> estimated_error_per_cell (this->triangulation->n_active_cells());
+      KellyErrorEstimator<dim>::estimate (this->dof_handler,
+                                          *this->face_quadrature,
+                                          typename FunctionMap<dim>::type(),
+                                          this->solution,
+                                          estimated_error_per_cell);
+
+      // Next weigh each entry in the vector of indicators by the value of the
+      // function given to the constructor, evaluated at the cell center. We need
+      // to write the result into the vector entry that corresponds to the current
+      // cell, which we can obtain by asking the cell what its index among all
+      // active cells is using CellAccessor::active_cell_index(). (In reality,
+      // this index is zero for the first cell we handle in the loop, one for the
+      // second cell, etc., and we could as well just keep track of this index
+      // using an integer counter; but using CellAccessor::active_cell_index()
+      // makes this more explicit.)
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = this->dof_handler.begin_active(),
+      endc = this->dof_handler.end();
+      for (; cell!=endc; ++cell)
+        estimated_error_per_cell(cell->active_cell_index())
+        *= weighting_function->value (cell->center());
+
+      GridRefinement::refine_and_coarsen_fixed_number (*this->triangulation,
+                                                       estimated_error_per_cell,
+                                                       0.3, 0.03);
+      this->triangulation->execute_coarsening_and_refinement ();
+    }
+
+  }
+
+
+  // @sect3{Equation data}
+  //
+  // In this example program, we work with the same data sets as in the
+  // previous one, but as it may so happen that someone wants to run the
+  // program with different boundary values and right hand side functions, or
+  // on a different grid, we show a simple technique to do exactly that. For
+  // more clarity, we furthermore pack everything that has to do with equation
+  // data into a namespace of its own.
+  //
+  // The underlying assumption is that this is a research program, and that
+  // there we often have a number of test cases that consist of a domain, a
+  // right hand side, boundary values, possibly a specified coefficient, and a
+  // number of other parameters. They often vary all at the same time when
+  // shifting from one example to another. To make handling such sets of
+  // problem description parameters simple is the goal of the following.
+  //
+  // Basically, the idea is this: let us have a structure for each set of
+  // data, in which we pack everything that describes a test case: here, these
+  // are two subclasses, one called <code>BoundaryValues</code> for the
+  // boundary values of the exact solution, and one called
+  // <code>RightHandSide</code>, and then a way to generate the coarse
+  // grid. Since the solution of the previous example program looked like
+  // curved ridges, we use this name here for the enclosing class. Note that
+  // the names of the two inner classes have to be the same for all enclosing
+  // test case classes, and also that we have attached the dimension template
+  // argument to the enclosing class rather than to the inner ones, to make
+  // further processing simpler.  (From a language viewpoint, a namespace
+  // would be better to encapsulate these inner classes, rather than a
+  // structure. However, namespaces cannot be given as template arguments, so
+  // we use a structure to allow a second object to select from within its
+  // given argument. The enclosing structure, of course, has no member
+  // variables apart from the classes it declares, and a static function to
+  // generate the coarse mesh; it will in general never be instantiated.)
+  //
+  // The idea is then the following (this is the right time to also take a
+  // brief look at the code below): we can generate objects for boundary
+  // values and right hand side by simply giving the name of the outer class
+  // as a template argument to a class which we call here
+  // <code>Data::SetUp</code>, and it then creates objects for the inner
+  // classes. In this case, to get all that characterizes the curved ridge
+  // solution, we would simply generate an instance of
+  // <code>Data::SetUp@<Data::CurvedRidge@></code>, and everything we need to
+  // know about the solution would be static member variables and functions of
+  // that object.
+  //
+  // This approach might seem like overkill in this case, but will become very
+  // handy once a certain set up is not only characterized by Dirichlet
+  // boundary values and a right hand side function, but in addition by
+  // material properties, Neumann values, different boundary descriptors,
+  // etc. In that case, the <code>SetUp</code> class might consist of a dozen
+  // or more objects, and each descriptor class (like the
+  // <code>CurvedRidges</code> class below) would have to provide them. Then,
+  // you will be happy to be able to change from one set of data to another by
+  // only changing the template argument to the <code>SetUp</code> class at
+  // one place, rather than at many.
+  //
+  // With this framework for different test cases, we are almost finished, but
+  // one thing remains: by now we can select statically, by changing one
+  // template argument, which data set to choose. In order to be able to do
+  // that dynamically, i.e. at run time, we need a base class. This we provide
+  // in the obvious way, see below, with virtual abstract functions. It forces
+  // us to introduce a second template parameter <code>dim</code> which we
+  // need for the base class (which could be avoided using some template
+  // magic, but we omit that), but that's all.
+  //
+  // Adding new testcases is now simple, you don't have to touch the framework
+  // classes, only a structure like the <code>CurvedRidges</code> one is
+  // needed.
+  namespace Data
+  {
+    // @sect4{The SetUpBase and SetUp classes}
+
+    // Based on the above description, the <code>SetUpBase</code> class then
+    // looks as follows. To allow using the <code>SmartPointer</code> class
+    // with this class, we derived from the <code>Subscriptor</code> class.
+    template <int dim>
+    struct SetUpBase : public Subscriptor
+    {
+      virtual
+      const Function<dim>   &get_boundary_values () const = 0;
+
+      virtual
+      const Function<dim>   &get_right_hand_side () const = 0;
+
+      virtual
+      void create_coarse_grid (Triangulation<dim> &coarse_grid) const = 0;
+    };
+
+
+    // And now for the derived class that takes the template argument as
+    // explained above.
+    //
+    // Here we pack the data elements into private variables, and allow access
+    // to them through the methods of the base class.
+    template <class Traits, int dim>
+    struct SetUp : public SetUpBase<dim>
+    {
+      virtual
+      const Function<dim>   &get_boundary_values () const;
+
+      virtual
+      const Function<dim>   &get_right_hand_side () const;
+
+
+      virtual
+      void create_coarse_grid (Triangulation<dim> &coarse_grid) const;
+
+    private:
+      static const typename Traits::BoundaryValues boundary_values;
+      static const typename Traits::RightHandSide  right_hand_side;
+    };
+
+    // We have to provide definitions for the static member variables of the
+    // above class:
+    template <class Traits, int dim>
+    const typename Traits::BoundaryValues  SetUp<Traits,dim>::boundary_values;
+    template <class Traits, int dim>
+    const typename Traits::RightHandSide   SetUp<Traits,dim>::right_hand_side;
+
+    // And definitions of the member functions:
+    template <class Traits, int dim>
+    const Function<dim> &
+    SetUp<Traits,dim>::get_boundary_values () const
+    {
+      return boundary_values;
+    }
+
+
+    template <class Traits, int dim>
+    const Function<dim> &
+    SetUp<Traits,dim>::get_right_hand_side () const
+    {
+      return right_hand_side;
+    }
+
+
+    template <class Traits, int dim>
+    void
+    SetUp<Traits,dim>::
+    create_coarse_grid (Triangulation<dim> &coarse_grid) const
+    {
+      Traits::create_coarse_grid (coarse_grid);
+    }
+
+
+    // @sect4{The CurvedRidges class}
+
+    // The class that is used to describe the boundary values and right hand
+    // side of the <code>curved ridge</code> problem already used in the
+    // step-13 example program is then like so:
+    template <int dim>
+    struct CurvedRidges
+    {
+      class BoundaryValues : public Function<dim>
+      {
+      public:
+        BoundaryValues () : Function<dim> () {}
+
+        virtual double value (const Point<dim>   &p,
+                              const unsigned int  component) const;
+      };
+
+
+      class RightHandSide : public Function<dim>
+      {
+      public:
+        RightHandSide () : Function<dim> () {}
+
+        virtual double value (const Point<dim>   &p,
+                              const unsigned int  component) const;
+      };
+
+      static
+      void
+      create_coarse_grid (Triangulation<dim> &coarse_grid);
+    };
+
+
+    template <int dim>
+    double
+    CurvedRidges<dim>::BoundaryValues::
+    value (const Point<dim>   &p,
+           const unsigned int  /*component*/) const
+    {
+      double q = p(0);
+      for (unsigned int i=1; i<dim; ++i)
+        q += std::sin(10*p(i)+5*p(0)*p(0));
+      const double exponential = std::exp(q);
+      return exponential;
+    }
+
+
+
+    template <int dim>
+    double
+    CurvedRidges<dim>::RightHandSide::value (const Point<dim>   &p,
+                                             const unsigned int  /*component*/) const
+    {
+      double q = p(0);
+      for (unsigned int i=1; i<dim; ++i)
+        q += std::sin(10*p(i)+5*p(0)*p(0));
+      const double u = std::exp(q);
+      double t1 = 1,
+             t2 = 0,
+             t3 = 0;
+      for (unsigned int i=1; i<dim; ++i)
+        {
+          t1 += std::cos(10*p(i)+5*p(0)*p(0)) * 10 * p(0);
+          t2 += 10*std::cos(10*p(i)+5*p(0)*p(0)) -
+                100*std::sin(10*p(i)+5*p(0)*p(0)) * p(0)*p(0);
+          t3 += 100*std::cos(10*p(i)+5*p(0)*p(0))*std::cos(10*p(i)+5*p(0)*p(0)) -
+                100*std::sin(10*p(i)+5*p(0)*p(0));
+        }
+      t1 = t1*t1;
+
+      return -u*(t1+t2+t3);
+    }
+
+
+    template <int dim>
+    void
+    CurvedRidges<dim>::
+    create_coarse_grid (Triangulation<dim> &coarse_grid)
+    {
+      GridGenerator::hyper_cube (coarse_grid, -1, 1);
+      coarse_grid.refine_global (2);
+    }
+
+
+    // @sect4{The Exercise_2_3 class}
+
+    // This example program was written while giving practical courses for a
+    // lecture on adaptive finite element methods and duality based error
+    // estimates. For these courses, we had one exercise, which required to
+    // solve the Laplace equation with constant right hand side on a square
+    // domain with a square hole in the center, and zero boundary
+    // values. Since the implementation of the properties of this problem is
+    // so particularly simple here, lets do it. As the number of the exercise
+    // was 2.3, we take the liberty to retain this name for the class as well.
+    template <int dim>
+    struct Exercise_2_3
+    {
+      // We need a class to denote the boundary values of the problem. In this
+      // case, this is simple: it's the zero function, so don't even declare a
+      // class, just a typedef:
+      typedef ZeroFunction<dim> BoundaryValues;
+
+      // Second, a class that denotes the right hand side. Since they are
+      // constant, just subclass the corresponding class of the library and be
+      // done:
+      class RightHandSide : public ConstantFunction<dim>
+      {
+      public:
+        RightHandSide () : ConstantFunction<dim> (1.) {}
+      };
+
+      // Finally a function to generate the coarse grid. This is somewhat more
+      // complicated here, see immediately below.
+      static
+      void
+      create_coarse_grid (Triangulation<dim> &coarse_grid);
+    };
+
+
+    // As stated above, the grid for this example is the square [-1,1]^2 with
+    // the square [-1/2,1/2]^2 as hole in it. We create the coarse grid as 4
+    // times 4 cells with the middle four ones missing. To understand how
+    // exactly the mesh is going to look, it may be simplest to just look
+    // at the "Results" section of this tutorial program first. In general,
+    // if you'd like to understand more about creating meshes either from
+    // scratch by hand, as we do here, or using other techniques, you
+    // should take a look at step-49.
+    //
+    // Of course, the example has an extension to 3d, but since this function
+    // cannot be written in a dimension independent way we choose not to
+    // implement this here, but rather only specialize the template for
+    // dim=2. If you compile the program for 3d, you'll get a message from the
+    // linker that this function is not implemented for 3d, and needs to be
+    // provided.
+    //
+    // For the creation of this geometry, the library has no predefined
+    // method. In this case, the geometry is still simple enough to do the
+    // creation by hand, rather than using a mesh generator.
+    template <>
+    void
+    Exercise_2_3<2>::
+    create_coarse_grid (Triangulation<2> &coarse_grid)
+    {
+      // We first define the space dimension, to allow those parts of the
+      // function that are actually dimension independent to use this
+      // variable. That makes it simpler if you later take this as a starting
+      // point to implement a 3d version of this mesh. The next step is then
+      // to have a list of vertices. Here, they are 24 (5 times 5, with the
+      // middle one omitted). It is probably best to draw a sketch here. Note
+      // that we leave the number of vertices open at first, but then let the
+      // compiler compute this number afterwards. This reduces the possibility
+      // of having the dimension to large and leaving the last ones
+      // uninitialized.
+      const unsigned int dim = 2;
+
+      static const Point<2> vertices_1[]
+        = {  Point<2> (-1.,   -1.),
+             Point<2> (-1./2, -1.),
+             Point<2> (0.,    -1.),
+             Point<2> (+1./2, -1.),
+             Point<2> (+1,    -1.),
+
+             Point<2> (-1.,   -1./2.),
+             Point<2> (-1./2, -1./2.),
+             Point<2> (0.,    -1./2.),
+             Point<2> (+1./2, -1./2.),
+             Point<2> (+1,    -1./2.),
+
+             Point<2> (-1.,   0.),
+             Point<2> (-1./2, 0.),
+             Point<2> (+1./2, 0.),
+             Point<2> (+1,    0.),
+
+             Point<2> (-1.,   1./2.),
+             Point<2> (-1./2, 1./2.),
+             Point<2> (0.,    1./2.),
+             Point<2> (+1./2, 1./2.),
+             Point<2> (+1,    1./2.),
+
+             Point<2> (-1.,   1.),
+             Point<2> (-1./2, 1.),
+             Point<2> (0.,    1.),
+             Point<2> (+1./2, 1.),
+             Point<2> (+1,    1.)
+          };
+      const unsigned int
+      n_vertices = sizeof(vertices_1) / sizeof(vertices_1[0]);
+
+      // From this static list of vertices, we generate a <tt>std::vector</tt>
+      // of the vertices, as this is the data type the library wants to see.
+      const std::vector<Point<dim> > vertices (&vertices_1[0],
+                                               &vertices_1[n_vertices]);
+
+      // Next, we have to define the cells and the vertices they
+      // contain. Here, we have 8 vertices, but leave the number open and let
+      // it be computed afterwards:
+      static const int cell_vertices[][GeometryInfo<dim>::vertices_per_cell]
+      = {{0, 1, 5, 6},
+        {1, 2, 6, 7},
+        {2, 3, 7, 8},
+        {3, 4, 8, 9},
+        {5, 6, 10, 11},
+        {8, 9, 12, 13},
+        {10, 11, 14, 15},
+        {12, 13, 17, 18},
+        {14, 15, 19, 20},
+        {15, 16, 20, 21},
+        {16, 17, 21, 22},
+        {17, 18, 22, 23}
+      };
+      const unsigned int
+      n_cells = sizeof(cell_vertices) / sizeof(cell_vertices[0]);
+
+      // Again, we generate a C++ vector type from this, but this time by
+      // looping over the cells (yes, this is boring). Additionally, we set
+      // the material indicator to zero for all the cells:
+      std::vector<CellData<dim> > cells (n_cells, CellData<dim>());
+      for (unsigned int i=0; i<n_cells; ++i)
+        {
+          for (unsigned int j=0;
+               j<GeometryInfo<dim>::vertices_per_cell;
+               ++j)
+            cells[i].vertices[j] = cell_vertices[i][j];
+          cells[i].material_id = 0;
+        }
+
+      // Finally pass all this information to the library to generate a
+      // triangulation. The last parameter may be used to pass information
+      // about non-zero boundary indicators at certain faces of the
+      // triangulation to the library, but we don't want that here, so we give
+      // an empty object:
+      coarse_grid.create_triangulation (vertices,
+                                        cells,
+                                        SubCellData());
+
+      // And since we want that the evaluation point (3/4,3/4) in this example
+      // is a grid point, we refine once globally:
+      coarse_grid.refine_global (1);
+    }
+  }
+
+  // @sect4{Discussion}
+  //
+  // As you have now read through this framework, you may be wondering why we
+  // have not chosen to implement the classes implementing a certain setup
+  // (like the <code>CurvedRidges</code> class) directly as classes derived
+  // from <code>Data::SetUpBase</code>. Indeed, we could have done very well
+  // so. The only reason is that then we would have to have member variables
+  // for the solution and right hand side classes in the
+  // <code>CurvedRidges</code> class, as well as member functions overloading
+  // the abstract functions of the base class giving access to these member
+  // variables. The <code>SetUp</code> class has the sole reason to relieve us
+  // from the need to reiterate these member variables and functions that
+  // would be necessary in all such classes. In some way, the template
+  // mechanism here only provides a way to have default implementations for a
+  // number of functions that depend on external quantities and can thus not
+  // be provided using normal virtual functions, at least not without the help
+  // of templates.
+  //
+  // However, there might be good reasons to actually implement classes
+  // derived from <code>Data::SetUpBase</code>, for example if the solution or
+  // right hand side classes require constructors that take arguments, which
+  // the <code>Data::SetUpBase</code> class cannot provide. In that case,
+  // subclassing is a worthwhile strategy. Other possibilities for special
+  // cases are to derive from <code>Data::SetUp@<SomeSetUp@></code> where
+  // <code>SomeSetUp</code> denotes a class, or even to explicitly specialize
+  // <code>Data::SetUp@<SomeSetUp@></code>. The latter allows to transparently
+  // use the way the <code>SetUp</code> class is used for other set-ups, but
+  // with special actions taken for special arguments.
+  //
+  // A final observation favoring the approach taken here is the following: we
+  // have found numerous times that when starting a project, the number of
+  // parameters (usually boundary values, right hand side, coarse grid, just
+  // as here) was small, and the number of test cases was small as well. One
+  // then starts out by handcoding them into a number of <code>switch</code>
+  // statements. Over time, projects grow, and so does the number of test
+  // cases. The number of <code>switch</code> statements grows with that, and
+  // their length as well, and one starts to find ways to consider impossible
+  // examples where domains, boundary values, and right hand sides do not fit
+  // together any more, and starts losing the overview over the whole
+  // structure. Encapsulating everything belonging to a certain test case into
+  // a structure of its own has proven worthwhile for this, as it keeps
+  // everything that belongs to one test case in one place. Furthermore, it
+  // allows to put these things all in one or more files that are only devoted
+  // to test cases and their data, without having to bring their actual
+  // implementation into contact with the rest of the program.
+
+
+  // @sect3{Dual functionals}
+
+  // As with the other components of the program, we put everything we need to
+  // describe dual functionals into a namespace of its own, and define an
+  // abstract base class that provides the interface the class solving the
+  // dual problem needs for its work.
+  //
+  // We will then implement two such classes, for the evaluation of a point
+  // value and of the derivative of the solution at that point. For these
+  // functionals we already have the corresponding evaluation objects, so they
+  // are complementary.
+  namespace DualFunctional
+  {
+    // @sect4{The DualFunctionalBase class}
+
+    // First start with the base class for dual functionals. Since for linear
+    // problems the characteristics of the dual problem play a role only in
+    // the right hand side, we only need to provide for a function that
+    // assembles the right hand side for a given discretization:
+    template <int dim>
+    class DualFunctionalBase : public Subscriptor
+    {
+    public:
+      virtual
+      void
+      assemble_rhs (const DoFHandler<dim> &dof_handler,
+                    Vector<double>        &rhs) const = 0;
+    };
+
+
+    // @sect4{The PointValueEvaluation class}
+
+    // As a first application, we consider the functional corresponding to the
+    // evaluation of the solution's value at a given point which again we
+    // assume to be a vertex. Apart from the constructor that takes and stores
+    // the evaluation point, this class consists only of the function that
+    // implements assembling the right hand side.
+    template <int dim>
+    class PointValueEvaluation : public DualFunctionalBase<dim>
+    {
+    public:
+      PointValueEvaluation (const Point<dim> &evaluation_point);
+
+      virtual
+      void
+      assemble_rhs (const DoFHandler<dim> &dof_handler,
+                    Vector<double>        &rhs) const;
+
+      DeclException1 (ExcEvaluationPointNotFound,
+                      Point<dim>,
+                      << "The evaluation point " << arg1
+                      << " was not found among the vertices of the present grid.");
+
+    protected:
+      const Point<dim> evaluation_point;
+    };
+
+
+    template <int dim>
+    PointValueEvaluation<dim>::
+    PointValueEvaluation (const Point<dim> &evaluation_point)
+      :
+      evaluation_point (evaluation_point)
+    {}
+
+
+    // As for doing the main purpose of the class, assembling the right hand
+    // side, let us first consider what is necessary: The right hand side of
+    // the dual problem is a vector of values J(phi_i), where J is the error
+    // functional, and phi_i is the i-th shape function. Here, J is the
+    // evaluation at the point x0, i.e. J(phi_i)=phi_i(x0).
+    //
+    // Now, we have assumed that the evaluation point is a vertex. Thus, for
+    // the usual finite elements we might be using in this program, we can
+    // take for granted that at such a point exactly one shape function is
+    // nonzero, and in particular has the value one. Thus, we set the right
+    // hand side vector to all-zeros, then seek for the shape function
+    // associated with that point and set the corresponding value of the right
+    // hand side vector to one:
+    template <int dim>
+    void
+    PointValueEvaluation<dim>::
+    assemble_rhs (const DoFHandler<dim> &dof_handler,
+                  Vector<double>        &rhs) const
+    {
+      // So, first set everything to zeros...
+      rhs.reinit (dof_handler.n_dofs());
+
+      // ...then loop over cells and find the evaluation point among the
+      // vertices (or very close to a vertex, which may happen due to floating
+      // point round-off):
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      for (; cell!=endc; ++cell)
+        for (unsigned int vertex=0;
+             vertex<GeometryInfo<dim>::vertices_per_cell;
+             ++vertex)
+          if (cell->vertex(vertex).distance(evaluation_point)
+              < cell->diameter()*1e-8)
+            {
+              // Ok, found, so set corresponding entry, and leave function
+              // since we are finished:
+              rhs(cell->vertex_dof_index(vertex,0)) = 1;
+              return;
+            }
+
+      // Finally, a sanity check: if we somehow got here, then we must have
+      // missed the evaluation point, so raise an exception unconditionally:
+      AssertThrow (false, ExcEvaluationPointNotFound(evaluation_point));
+    }
+
+
+    // @sect4{The PointXDerivativeEvaluation class}
+
+    // As second application, we again consider the evaluation of the
+    // x-derivative of the solution at one point. Again, the declaration of
+    // the class, and the implementation of its constructor is not too
+    // interesting:
+    template <int dim>
+    class PointXDerivativeEvaluation : public DualFunctionalBase<dim>
+    {
+    public:
+      PointXDerivativeEvaluation (const Point<dim> &evaluation_point);
+
+      virtual
+      void
+      assemble_rhs (const DoFHandler<dim> &dof_handler,
+                    Vector<double>        &rhs) const;
+
+      DeclException1 (ExcEvaluationPointNotFound,
+                      Point<dim>,
+                      << "The evaluation point " << arg1
+                      << " was not found among the vertices of the present grid.");
+
+    protected:
+      const Point<dim> evaluation_point;
+    };
+
+
+    template <int dim>
+    PointXDerivativeEvaluation<dim>::
+    PointXDerivativeEvaluation (const Point<dim> &evaluation_point)
+      :
+      evaluation_point (evaluation_point)
+    {}
+
+
+    // What is interesting is the implementation of this functional: here,
+    // J(phi_i)=d/dx phi_i(x0).
+    //
+    // We could, as in the implementation of the respective evaluation object
+    // take the average of the gradients of each shape function phi_i at this
+    // evaluation point. However, we take a slightly different approach: we
+    // simply take the average over all cells that surround this point. The
+    // question which cells <code>surrounds</code> the evaluation point is
+    // made dependent on the mesh width by including those cells for which the
+    // distance of the cell's midpoint to the evaluation point is less than
+    // the cell's diameter.
+    //
+    // Taking the average of the gradient over the area/volume of these cells
+    // leads to a dual solution which is very close to the one which would
+    // result from the point evaluation of the gradient. It is simple to
+    // justify theoretically that this does not change the method
+    // significantly.
+    template <int dim>
+    void
+    PointXDerivativeEvaluation<dim>::
+    assemble_rhs (const DoFHandler<dim> &dof_handler,
+                  Vector<double>        &rhs) const
+    {
+      // Again, first set all entries to zero:
+      rhs.reinit (dof_handler.n_dofs());
+
+      // Initialize a <code>FEValues</code> object with a quadrature formula,
+      // have abbreviations for the number of quadrature points and shape
+      // functions...
+      QGauss<dim> quadrature(4);
+      FEValues<dim>  fe_values (dof_handler.get_fe(), quadrature,
+                                update_gradients |
+                                update_quadrature_points  |
+                                update_JxW_values);
+      const unsigned int n_q_points = fe_values.n_quadrature_points;
+      const unsigned int dofs_per_cell = dof_handler.get_fe().dofs_per_cell;
+
+      // ...and have two objects that are used to store the global indices of
+      // the degrees of freedom on a cell, and the values of the gradients of
+      // the shape functions at the quadrature points:
+      Vector<double> cell_rhs (dofs_per_cell);
+      std::vector<unsigned int> local_dof_indices (dofs_per_cell);
+
+      // Finally have a variable in which we will sum up the area/volume of
+      // the cells over which we integrate, by integrating the unit functions
+      // on these cells:
+      double total_volume = 0;
+
+      // Then start the loop over all cells, and select those cells which are
+      // close enough to the evaluation point:
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      for (; cell!=endc; ++cell)
+        if (cell->center().distance(evaluation_point) <=
+            cell->diameter())
+          {
+            // If we have found such a cell, then initialize the
+            // <code>FEValues</code> object and integrate the x-component of
+            // the gradient of each shape function, as well as the unit
+            // function for the total area/volume.
+            fe_values.reinit (cell);
+            cell_rhs = 0;
+
+            for (unsigned int q=0; q<n_q_points; ++q)
+              {
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  cell_rhs(i) += fe_values.shape_grad(i,q)[0] *
+                                 fe_values.JxW (q);
+                total_volume += fe_values.JxW (q);
+              }
+
+            // If we have the local contributions, distribute them to the
+            // global vector:
+            cell->get_dof_indices (local_dof_indices);
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              rhs(local_dof_indices[i]) += cell_rhs(i);
+          }
+
+      // After we have looped over all cells, check whether we have found any
+      // at all, by making sure that their volume is non-zero. If not, then
+      // the results will be botched, as the right hand side should then still
+      // be zero, so throw an exception:
+      AssertThrow (total_volume > 0,
+                   ExcEvaluationPointNotFound(evaluation_point));
+
+      // Finally, we have by now only integrated the gradients of the shape
+      // functions, not taking their mean value. We fix this by dividing by
+      // the measure of the volume over which we have integrated:
+      rhs /= total_volume;
+    }
+
+
+  }
+
+
+  // @sect3{Extending the LaplaceSolver namespace}
+  namespace LaplaceSolver
+  {
+
+    // @sect4{The DualSolver class}
+
+    // In the same way as the <code>PrimalSolver</code> class above, we now
+    // implement a <code>DualSolver</code>. It has all the same features, the
+    // only difference is that it does not take a function object denoting a
+    // right hand side object, but now takes a <code>DualFunctionalBase</code>
+    // object that will assemble the right hand side vector of the dual
+    // problem. The rest of the class is rather trivial.
+    //
+    // Since both primal and dual solver will use the same triangulation, but
+    // different discretizations, it now becomes clear why we have made the
+    // <code>Base</code> class a virtual one: since the final class will be
+    // derived from both <code>PrimalSolver</code> as well as
+    // <code>DualSolver</code>, it would have two <code>Base</code> instances,
+    // would we not have marked the inheritance as virtual. Since in many
+    // applications the base class would store much more information than just
+    // the triangulation which needs to be shared between primal and dual
+    // solvers, we do not usually want to use two such base classes.
+    template <int dim>
+    class DualSolver : public Solver<dim>
+    {
+    public:
+      DualSolver (Triangulation<dim>       &triangulation,
+                  const FiniteElement<dim> &fe,
+                  const Quadrature<dim>    &quadrature,
+                  const Quadrature<dim-1>  &face_quadrature,
+                  const DualFunctional::DualFunctionalBase<dim> &dual_functional);
+
+    protected:
+      const SmartPointer<const DualFunctional::DualFunctionalBase<dim> > dual_functional;
+      virtual void assemble_rhs (Vector<double> &rhs) const;
+
+      static const ZeroFunction<dim> boundary_values;
+    };
+
+    template <int dim>
+    const ZeroFunction<dim> DualSolver<dim>::boundary_values;
+
+    template <int dim>
+    DualSolver<dim>::
+    DualSolver (Triangulation<dim>       &triangulation,
+                const FiniteElement<dim> &fe,
+                const Quadrature<dim>    &quadrature,
+                const Quadrature<dim-1>  &face_quadrature,
+                const DualFunctional::DualFunctionalBase<dim> &dual_functional)
+      :
+      Base<dim> (triangulation),
+      Solver<dim> (triangulation, fe,
+                   quadrature, face_quadrature,
+                   boundary_values),
+      dual_functional (&dual_functional)
+    {}
+
+
+
+    template <int dim>
+    void
+    DualSolver<dim>::
+    assemble_rhs (Vector<double> &rhs) const
+    {
+      dual_functional->assemble_rhs (this->dof_handler, rhs);
+    }
+
+
+    // @sect4{The WeightedResidual class}
+
+    // Here finally comes the main class of this program, the one that
+    // implements the dual weighted residual error estimator. It joins the
+    // primal and dual solver classes to use them for the computation of
+    // primal and dual solutions, and implements the error representation
+    // formula for use as error estimate and mesh refinement.
+    //
+    // The first few of the functions of this class are mostly overriders of
+    // the respective functions of the base class:
+    template <int dim>
+    class WeightedResidual : public PrimalSolver<dim>,
+      public DualSolver<dim>
+    {
+    public:
+      WeightedResidual (Triangulation<dim>       &coarse_grid,
+                        const FiniteElement<dim> &primal_fe,
+                        const FiniteElement<dim> &dual_fe,
+                        const Quadrature<dim>    &quadrature,
+                        const Quadrature<dim-1>  &face_quadrature,
+                        const Function<dim>      &rhs_function,
+                        const Function<dim>      &boundary_values,
+                        const DualFunctional::DualFunctionalBase<dim> &dual_functional);
+
+      virtual
+      void
+      solve_problem ();
+
+      virtual
+      void
+      postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const;
+
+      virtual
+      unsigned int
+      n_dofs () const;
+
+      virtual void refine_grid ();
+
+      virtual
+      void
+      output_solution () const;
+
+    private:
+      // In the private section, we have two functions that are used to call
+      // the <code>solve_problem</code> functions of the primal and dual base
+      // classes. These two functions will be called in parallel by the
+      // <code>solve_problem</code> function of this class.
+      void solve_primal_problem ();
+      void solve_dual_problem ();
+      // Then declare abbreviations for active cell iterators, to avoid that
+      // we have to write this lengthy name over and over again:
+
+      typedef
+      typename DoFHandler<dim>::active_cell_iterator
+      active_cell_iterator;
+
+      // Next, declare a data type that we will us to store the contribution
+      // of faces to the error estimator. The idea is that we can compute the
+      // face terms from each of the two cells to this face, as they are the
+      // same when viewed from both sides. What we will do is to compute them
+      // only once, based on some rules explained below which of the two
+      // adjacent cells will be in charge to do so. We then store the
+      // contribution of each face in a map mapping faces to their values, and
+      // only collect the contributions for each cell by looping over the
+      // cells a second time and grabbing the values from the map.
+      //
+      // The data type of this map is declared here:
+      typedef
+      typename std::map<typename DoFHandler<dim>::face_iterator,double>
+      FaceIntegrals;
+
+      // In the computation of the error estimates on cells and faces, we need
+      // a number of helper objects, such as <code>FEValues</code> and
+      // <code>FEFaceValues</code> functions, but also temporary objects
+      // storing the values and gradients of primal and dual solutions, for
+      // example. These fields are needed in the three functions that do the
+      // integration on cells, and regular and irregular faces, respectively.
+      //
+      // There are three reasonable ways to provide these fields: first, as
+      // local variables in the function that needs them; second, as member
+      // variables of this class; third, as arguments passed to that function.
+      //
+      // These three alternatives all have drawbacks: the third that their
+      // number is not negligible and would make calling these functions a
+      // lengthy enterprise. The second has the drawback that it disallows
+      // parallelization, since the threads that will compute the error
+      // estimate have to have their own copies of these variables each, so
+      // member variables of the enclosing class will not work. The first
+      // approach, although straightforward, has a subtle but important
+      // drawback: we will call these functions over and over again, many
+      // thousands of times maybe; it now turns out that allocating
+      // vectors and other objects that need memory from the heap is an
+      // expensive business in terms of run-time, since memory allocation is
+      // expensive when several threads are involved. It is thus
+      // significantly better to allocate the memory only once, and recycle
+      // the objects as often as possible.
+      //
+      // What to do? Our answer is to use a variant of the third strategy.
+      // In fact, this is exactly what the WorkStream concept is supposed to
+      // do (we have already introduced it above, but see also @ref threads).
+      // To avoid that we have to give these functions a dozen or so
+      // arguments, we pack all these variables into two structures, one which
+      // is used for the computations on cells, the other doing them on the
+      // faces. Both are then joined into the WeightedResidualScratchData class
+      // that will serve as the "scratch data" class of the WorkStream concept:
+      struct CellData
+      {
+        FEValues<dim>    fe_values;
+        const SmartPointer<const Function<dim> > right_hand_side;
+
+        std::vector<double> cell_residual;
+        std::vector<double> rhs_values;
+        std::vector<double> dual_weights;
+        std::vector<double> cell_laplacians;
+        CellData (const FiniteElement<dim> &fe,
+                  const Quadrature<dim>    &quadrature,
+                  const Function<dim>      &right_hand_side);
+        CellData (const CellData &cell_data);
+      };
+
+      struct FaceData
+      {
+        FEFaceValues<dim>    fe_face_values_cell;
+        FEFaceValues<dim>    fe_face_values_neighbor;
+        FESubfaceValues<dim> fe_subface_values_cell;
+
+        std::vector<double> jump_residual;
+        std::vector<double> dual_weights;
+        typename std::vector<Tensor<1,dim> > cell_grads;
+        typename std::vector<Tensor<1,dim> > neighbor_grads;
+        FaceData (const FiniteElement<dim> &fe,
+                  const Quadrature<dim-1>  &face_quadrature);
+        FaceData (const FaceData &face_data);
+      };
+
+      struct WeightedResidualScratchData
+      {
+        WeightedResidualScratchData (const FiniteElement<dim>  &primal_fe,
+                                     const Quadrature<dim>     &primal_quadrature,
+                                     const Quadrature<dim - 1> &primal_face_quadrature,
+                                     const Function<dim>       &rhs_function,
+                                     const Vector<double>      &primal_solution,
+                                     const Vector<double>      &dual_weights);
+
+        WeightedResidualScratchData(const WeightedResidualScratchData &scratch_data);
+
+        CellData       cell_data;
+        FaceData       face_data;
+        Vector<double> primal_solution;
+        Vector<double> dual_weights;
+      };
+
+
+      // WorkStream::run generally wants both a scratch object and a copy object.
+      // Here, for reasons similar to what we had in step-9 when discussing the
+      // computation of an approximation of the gradient, we don't actually
+      // need a "copy data" structure. Since WorkStream insists on having one of
+      // these, we just declare an empty structure that does nothing other than
+      // being there.
+      struct WeightedResidualCopyData
+      {};
+
+
+
+      // Regarding the evaluation of the error estimator, we have one driver
+      // function that uses WorkStream::run to call the second function on every
+      // cell. The concept of using SynchronousIterators was already explained
+      // in step-9:
+      void estimate_error (Vector<float> &error_indicators) const;
+
+      void estimate_on_one_cell (const SynchronousIterators<std_cxx11::tuple<
+                                 active_cell_iterator,Vector<float>::iterator> > &cell_and_error,
+                                 WeightedResidualScratchData                     &scratch_data,
+                                 WeightedResidualCopyData                        &copy_data,
+                                 FaceIntegrals                                   &face_integrals) const;
+
+      // Then we have functions that do the actual integration of the error
+      // representation formula. They will treat the terms on the cell
+      // interiors, on those faces that have no hanging nodes, and on those
+      // faces with hanging nodes, respectively:
+      void
+      integrate_over_cell (const SynchronousIterators<std_cxx11::tuple<
+                           active_cell_iterator,Vector<float>::iterator> > &cell_and_error,
+                           const Vector<double>                            &primal_solution,
+                           const Vector<double>                            &dual_weights,
+                           CellData                                        &cell_data) const;
+
+      void
+      integrate_over_regular_face (const active_cell_iterator &cell,
+                                   const unsigned int          face_no,
+                                   const Vector<double>       &primal_solution,
+                                   const Vector<double>       &dual_weights,
+                                   FaceData                   &face_data,
+                                   FaceIntegrals              &face_integrals) const;
+      void
+      integrate_over_irregular_face (const active_cell_iterator &cell,
+                                     const unsigned int          face_no,
+                                     const Vector<double>       &primal_solution,
+                                     const Vector<double>       &dual_weights,
+                                     FaceData                   &face_data,
+                                     FaceIntegrals              &face_integrals) const;
+    };
+
+
+
+    // In the implementation of this class, we first have the constructors of
+    // the <code>CellData</code> and <code>FaceData</code> member classes, and
+    // the <code>WeightedResidual</code> constructor. They only initialize
+    // fields to their correct lengths, so we do not have to discuss them in
+    // too much detail:
+    template <int dim>
+    WeightedResidual<dim>::CellData::
+    CellData (const FiniteElement<dim> &fe,
+              const Quadrature<dim>    &quadrature,
+              const Function<dim>      &right_hand_side)
+      :
+      fe_values (fe, quadrature,
+                 update_values   |
+                 update_hessians |
+                 update_quadrature_points |
+                 update_JxW_values),
+      right_hand_side (&right_hand_side),
+      cell_residual (quadrature.size()),
+      rhs_values (quadrature.size()),
+      dual_weights (quadrature.size()),
+      cell_laplacians (quadrature.size())
+    {}
+
+
+
+    template <int dim>
+    WeightedResidual<dim>::CellData::
+    CellData (const CellData &cell_data)
+      :
+      fe_values (cell_data.fe_values.get_fe(),
+                 cell_data.fe_values.get_quadrature(),
+                 update_values   |
+                 update_hessians |
+                 update_quadrature_points |
+                 update_JxW_values),
+      right_hand_side (cell_data.right_hand_side),
+      cell_residual (cell_data.cell_residual),
+      rhs_values (cell_data.rhs_values),
+      dual_weights (cell_data.dual_weights),
+      cell_laplacians (cell_data.cell_laplacians)
+    {}
+
+
+
+    template <int dim>
+    WeightedResidual<dim>::FaceData::
+    FaceData (const FiniteElement<dim> &fe,
+              const Quadrature<dim-1>  &face_quadrature)
+      :
+      fe_face_values_cell (fe, face_quadrature,
+                           update_values        |
+                           update_gradients     |
+                           update_JxW_values    |
+                           update_normal_vectors),
+      fe_face_values_neighbor (fe, face_quadrature,
+                               update_values     |
+                               update_gradients  |
+                               update_JxW_values |
+                               update_normal_vectors),
+      fe_subface_values_cell (fe, face_quadrature,
+                              update_gradients)
+    {
+      const unsigned int n_face_q_points
+        = face_quadrature.size();
+
+      jump_residual.resize(n_face_q_points);
+      dual_weights.resize(n_face_q_points);
+      cell_grads.resize(n_face_q_points);
+      neighbor_grads.resize(n_face_q_points);
+    }
+
+
+
+    template <int dim>
+    WeightedResidual<dim>::FaceData::
+    FaceData (const FaceData &face_data)
+      :
+      fe_face_values_cell (face_data.fe_face_values_cell.get_fe(),
+                           face_data.fe_face_values_cell.get_quadrature(),
+                           update_values        |
+                           update_gradients     |
+                           update_JxW_values    |
+                           update_normal_vectors),
+      fe_face_values_neighbor (face_data.fe_face_values_neighbor.get_fe(),
+                               face_data.fe_face_values_neighbor.get_quadrature(),
+                               update_values     |
+                               update_gradients  |
+                               update_JxW_values |
+                               update_normal_vectors),
+      fe_subface_values_cell (face_data.fe_subface_values_cell.get_fe(),
+                              face_data.fe_subface_values_cell.get_quadrature(),
+                              update_gradients),
+      jump_residual (face_data.jump_residual),
+      dual_weights (face_data.dual_weights),
+      cell_grads (face_data.cell_grads),
+      neighbor_grads (face_data.neighbor_grads)
+    {}
+
+
+
+    template <int dim>
+    WeightedResidual<dim>::WeightedResidualScratchData::
+    WeightedResidualScratchData (const FiniteElement<dim>  &primal_fe,
+                                 const Quadrature<dim>     &primal_quadrature,
+                                 const Quadrature<dim - 1> &primal_face_quadrature,
+                                 const Function<dim>       &rhs_function,
+                                 const Vector<double>      &primal_solution,
+                                 const Vector<double>      &dual_weights)
+      :
+      cell_data (primal_fe, primal_quadrature, rhs_function),
+      face_data (primal_fe, primal_face_quadrature),
+      primal_solution(primal_solution),
+      dual_weights(dual_weights)
+    {}
+
+    template <int dim>
+    WeightedResidual<dim>::WeightedResidualScratchData::
+    WeightedResidualScratchData (const WeightedResidualScratchData &scratch_data)
+      :
+      cell_data(scratch_data.cell_data),
+      face_data(scratch_data.face_data),
+      primal_solution(scratch_data.primal_solution),
+      dual_weights(scratch_data.dual_weights)
+    {}
+
+
+
+    template <int dim>
+    WeightedResidual<dim>::
+    WeightedResidual (Triangulation<dim>       &coarse_grid,
+                      const FiniteElement<dim> &primal_fe,
+                      const FiniteElement<dim> &dual_fe,
+                      const Quadrature<dim>    &quadrature,
+                      const Quadrature<dim-1>  &face_quadrature,
+                      const Function<dim>      &rhs_function,
+                      const Function<dim>      &bv,
+                      const DualFunctional::DualFunctionalBase<dim> &dual_functional)
+      :
+      Base<dim> (coarse_grid),
+      PrimalSolver<dim> (coarse_grid, primal_fe,
+                         quadrature, face_quadrature,
+                         rhs_function, bv),
+      DualSolver<dim> (coarse_grid, dual_fe,
+                       quadrature, face_quadrature,
+                       dual_functional)
+    {}
+
+
+    // The next five functions are boring, as they simply relay their work to
+    // the base classes. The first calls the primal and dual solvers in
+    // parallel, while postprocessing the solution and retrieving the number
+    // of degrees of freedom is done by the primal class.
+    template <int dim>
+    void
+    WeightedResidual<dim>::solve_problem ()
+    {
+      Threads::TaskGroup<> tasks;
+      tasks += Threads::new_task (&WeightedResidual<dim>::solve_primal_problem,
+                                  *this);
+      tasks += Threads::new_task (&WeightedResidual<dim>::solve_dual_problem,
+                                  *this);
+      tasks.join_all();
+    }
+
+
+    template <int dim>
+    void
+    WeightedResidual<dim>::solve_primal_problem ()
+    {
+      PrimalSolver<dim>::solve_problem ();
+    }
+
+    template <int dim>
+    void
+    WeightedResidual<dim>::solve_dual_problem ()
+    {
+      DualSolver<dim>::solve_problem ();
+    }
+
+
+    template <int dim>
+    void
+    WeightedResidual<dim>::
+    postprocess (const Evaluation::EvaluationBase<dim> &postprocessor) const
+    {
+      PrimalSolver<dim>::postprocess (postprocessor);
+    }
+
+
+    template <int dim>
+    unsigned int
+    WeightedResidual<dim>::n_dofs () const
+    {
+      return PrimalSolver<dim>::n_dofs();
+    }
+
+
+
+    // Now, it is becoming more interesting: the <code>refine_grid()</code>
+    // function asks the error estimator to compute the cell-wise error
+    // indicators, then uses their absolute values for mesh refinement.
+    template <int dim>
+    void
+    WeightedResidual<dim>::refine_grid ()
+    {
+      // First call the function that computes the cell-wise and global error:
+      Vector<float> error_indicators (this->triangulation->n_active_cells());
+      estimate_error (error_indicators);
+
+      // Then note that marking cells for refinement or coarsening only works
+      // if all indicators are positive, to allow their comparison. Thus, drop
+      // the signs on all these indicators:
+      for (Vector<float>::iterator i=error_indicators.begin();
+           i != error_indicators.end(); ++i)
+        *i = std::fabs (*i);
+
+      // Finally, we can select between different strategies for
+      // refinement. The default here is to refine those cells with the
+      // largest error indicators that make up for a total of 80 per cent of
+      // the error, while we coarsen those with the smallest indicators that
+      // make up for the bottom 2 per cent of the error.
+      GridRefinement::refine_and_coarsen_fixed_fraction (*this->triangulation,
+                                                         error_indicators,
+                                                         0.8, 0.02);
+      this->triangulation->execute_coarsening_and_refinement ();
+    }
+
+
+    // Since we want to output both the primal and the dual solution, we
+    // overload the <code>output_solution</code> function. The only
+    // interesting feature of this function is that the primal and dual
+    // solutions are defined on different finite element spaces, which is not
+    // the format the <code>DataOut</code> class expects. Thus, we have to
+    // transfer them to a common finite element space. Since we want the
+    // solutions only to see them qualitatively, we contend ourselves with
+    // interpolating the dual solution to the (smaller) primal space. For the
+    // interpolation, there is a library function, that takes a
+    // ConstraintMatrix object including the hanging node
+    // constraints. The rest is standard.
+    template <int dim>
+    void
+    WeightedResidual<dim>::output_solution () const
+    {
+      ConstraintMatrix primal_hanging_node_constraints;
+      DoFTools::make_hanging_node_constraints (PrimalSolver<dim>::dof_handler,
+                                               primal_hanging_node_constraints);
+      primal_hanging_node_constraints.close();
+      Vector<double> dual_solution (PrimalSolver<dim>::dof_handler.n_dofs());
+      FETools::interpolate (DualSolver<dim>::dof_handler,
+                            DualSolver<dim>::solution,
+                            PrimalSolver<dim>::dof_handler,
+                            primal_hanging_node_constraints,
+                            dual_solution);
+
+      DataOut<dim> data_out;
+      data_out.attach_dof_handler (PrimalSolver<dim>::dof_handler);
+
+      // Add the data vectors for which we want output. Add them both, the
+      // <code>DataOut</code> functions can handle as many data vectors as you
+      // wish to write to output:
+      data_out.add_data_vector (PrimalSolver<dim>::solution,
+                                "primal_solution");
+      data_out.add_data_vector (dual_solution,
+                                "dual_solution");
+
+      data_out.build_patches ();
+
+      std::ostringstream filename;
+      filename << "solution-"
+               << this->refinement_cycle
+               << ".gnuplot"
+               << std::ends;
+
+      std::ofstream out (filename.str().c_str());
+      data_out.write (out, DataOutBase::gnuplot);
+    }
+
+
+    // @sect3{Estimating errors}
+
+    // @sect4{Error estimation driver functions}
+    //
+    // As for the actual computation of error estimates, let's start with the
+    // function that drives all this, i.e. calls those functions that actually
+    // do the work, and finally collects the results.
+    template <int dim>
+    void
+    WeightedResidual<dim>::
+    estimate_error (Vector<float> &error_indicators) const
+    {
+      // The first task in computing the error is to set up vectors that
+      // denote the primal solution, and the weights (z-z_h)=(z-I_hz), both in
+      // the finite element space for which we have computed the dual
+      // solution. For this, we have to interpolate the primal solution to the
+      // dual finite element space, and to subtract the interpolation of the
+      // computed dual solution to the primal finite element
+      // space. Fortunately, the library provides functions for the
+      // interpolation into larger or smaller finite element spaces, so this
+      // is mostly obvious.
+      //
+      // First, let's do that for the primal solution: it is cell-wise
+      // interpolated into the finite element space in which we have solved
+      // the dual problem: But, again as in the
+      // <code>WeightedResidual::output_solution</code> function we first need
+      // to create a ConstraintMatrix including the hanging node constraints,
+      // but this time of the dual finite element space.
+      ConstraintMatrix dual_hanging_node_constraints;
+      DoFTools::make_hanging_node_constraints (DualSolver<dim>::dof_handler,
+                                               dual_hanging_node_constraints);
+      dual_hanging_node_constraints.close();
+      Vector<double> primal_solution (DualSolver<dim>::dof_handler.n_dofs());
+      FETools::interpolate (PrimalSolver<dim>::dof_handler,
+                            PrimalSolver<dim>::solution,
+                            DualSolver<dim>::dof_handler,
+                            dual_hanging_node_constraints,
+                            primal_solution);
+
+      // Then for computing the interpolation of the numerically approximated
+      // dual solution z into the finite element space of the primal solution
+      // and subtracting it from z: use the
+      // <code>interpolate_difference</code> function, that gives (z-I_hz) in
+      // the element space of the dual solution.
+      ConstraintMatrix primal_hanging_node_constraints;
+      DoFTools::make_hanging_node_constraints (PrimalSolver<dim>::dof_handler,
+                                               primal_hanging_node_constraints);
+      primal_hanging_node_constraints.close();
+      Vector<double> dual_weights (DualSolver<dim>::dof_handler.n_dofs());
+      FETools::interpolation_difference (DualSolver<dim>::dof_handler,
+                                         dual_hanging_node_constraints,
+                                         DualSolver<dim>::solution,
+                                         PrimalSolver<dim>::dof_handler,
+                                         primal_hanging_node_constraints,
+                                         dual_weights);
+
+      // Note that this could probably have been more efficient since those
+      // constraints have been used previously when assembling matrix and
+      // right hand side for the primal problem and writing out the dual
+      // solution. We leave the optimization of the program in this respect as
+      // an exercise.
+
+      // Having computed the dual weights we now proceed with computing the
+      // cell and face residuals of the primal solution. First we set up a map
+      // between face iterators and their jump term contributions of faces to
+      // the error estimator. The reason is that we compute the jump terms
+      // only once, from one side of the face, and want to collect them only
+      // afterwards when looping over all cells a second time.
+      //
+      // We initialize this map already with a value of -1e20 for all faces,
+      // since this value will stand out in the results if something should go
+      // wrong and we fail to compute the value for a face for some
+      // reason. Secondly, this initialization already makes the std::map
+      // object allocate all objects it may possibly need. This is important
+      // since we will write into this structure from parallel threads,
+      // and doing so would not be thread-safe if the map needed to allocate
+      // memory and thereby reshape its data structures. In other words, the
+      // initial initialization relieves us from the necessity to synchronize the
+      // threads through a mutex each time they write to (and modify the
+      // structure of) this map.
+      FaceIntegrals face_integrals;
+      for (active_cell_iterator cell=DualSolver<dim>::dof_handler.begin_active();
+           cell!=DualSolver<dim>::dof_handler.end();
+           ++cell)
+        for (unsigned int face_no=0;
+             face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          face_integrals[cell->face(face_no)] = -1e20;
+
+      // Then set up a vector with error indicators and reserve one slot for
+      // each cell and set it to zero. With this, we can then set up the
+      // parallel iterator range just as we did in step-9, and hand it
+      // all off to WorkStream::run to compute the estimators for all
+      // cells in parallel:
+      error_indicators.reinit (DualSolver<dim>::dof_handler
+                               .get_triangulation().n_active_cells());
+
+      typedef
+      std_cxx11::tuple<active_cell_iterator,Vector<float>::iterator>
+      IteratorTuple;
+
+      SynchronousIterators<IteratorTuple>
+      cell_and_error_begin(IteratorTuple (DualSolver<dim>::dof_handler.begin_active(),
+                                          error_indicators.begin()));
+      SynchronousIterators<IteratorTuple>
+      cell_and_error_end  (IteratorTuple (DualSolver<dim>::dof_handler.end(),
+                                          error_indicators.begin()));
+
+      WorkStream::run(cell_and_error_begin,
+                      cell_and_error_end,
+                      std_cxx11::bind(&WeightedResidual<dim>::estimate_on_one_cell,
+                                      this,
+                                      std_cxx11::_1,
+                                      std_cxx11::_2,
+                                      std_cxx11::_3,
+                                      std_cxx11::ref(face_integrals)),
+                      std_cxx11::function<void (const WeightedResidualCopyData &)>(),
+                      WeightedResidualScratchData (*DualSolver<dim>::fe,
+                                                   *DualSolver<dim>::quadrature,
+                                                   *DualSolver<dim>::face_quadrature,
+                                                   *this->rhs_function,
+                                                   primal_solution,
+                                                   dual_weights),
+                      WeightedResidualCopyData());
+
+      // Once the error contributions are computed, sum them up. For this,
+      // note that the cell terms are already set, and that only the edge
+      // terms need to be collected. Thus, loop over all cells and their
+      // faces, make sure that the contributions of each of the faces are
+      // there, and add them up. Only take minus one half of the jump term,
+      // since the other half will be taken by the neighboring cell.
+      unsigned int present_cell=0;
+      for (active_cell_iterator cell=DualSolver<dim>::dof_handler.begin_active();
+           cell!=DualSolver<dim>::dof_handler.end();
+           ++cell, ++present_cell)
+        for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          {
+            Assert(face_integrals.find(cell->face(face_no)) !=
+                   face_integrals.end(),
+                   ExcInternalError());
+            error_indicators(present_cell)
+            -= 0.5*face_integrals[cell->face(face_no)];
+          }
+      std::cout << "   Estimated error="
+                << std::accumulate (error_indicators.begin(),
+                                    error_indicators.end(), 0.)
+                << std::endl;
+    }
+
+
+    // @sect4{Estimating on a single cell}
+
+    // Next we have the function that is called to estimate the error on a
+    // single cell. The function may be called multiple times if the library was
+    // configured to use multithreading. Here it goes:
+    template <int dim>
+    void
+    WeightedResidual<dim>::
+    estimate_on_one_cell (const SynchronousIterators<std_cxx11::tuple<
+                          active_cell_iterator,Vector<float>::iterator> > &cell_and_error,
+                          WeightedResidualScratchData                       &scratch_data,
+                          WeightedResidualCopyData                          &copy_data,
+                          FaceIntegrals                                     &face_integrals) const
+    {
+      // Because of WorkStream, estimate_on_one_cell requires a CopyData object
+      // even if it is no used. The next line silences a warning about this unused
+      // variable.
+      (void) copy_data;
+
+      // First task on each cell is to compute the cell residual
+      // contributions of this cell, and put them into the
+      // <code>error_indicators</code> variable:
+      active_cell_iterator cell = std_cxx11::get<0>(cell_and_error.iterators);
+
+      integrate_over_cell (cell_and_error,
+                           scratch_data.primal_solution,
+                           scratch_data.dual_weights,
+                           scratch_data.cell_data);
+
+      // After computing the cell terms, turn to the face terms. For this,
+      // loop over all faces of the present cell, and see whether
+      // something needs to be computed on it:
+      for (unsigned int face_no=0;
+           face_no<GeometryInfo<dim>::faces_per_cell;
+           ++face_no)
+        {
+          // First, if this face is part of the boundary, then there is
+          // nothing to do. However, to make things easier when summing up
+          // the contributions of the faces of cells, we enter this face
+          // into the list of faces with a zero contribution to the error.
+          if (cell->face(face_no)->at_boundary())
+            {
+              face_integrals[cell->face(face_no)] = 0;
+              continue;
+            }
+
+          // Next, note that since we want to compute the jump terms on
+          // each face only once although we access it twice (if it is not
+          // at the boundary), we have to define some rules who is
+          // responsible for computing on a face:
+          //
+          // First, if the neighboring cell is on the same level as this
+          // one, i.e. neither further refined not coarser, then the one
+          // with the lower index within this level does the work. In
+          // other words: if the other one has a lower index, then skip
+          // work on this face:
+          if ((cell->neighbor(face_no)->has_children() == false) &&
+              (cell->neighbor(face_no)->level() == cell->level()) &&
+              (cell->neighbor(face_no)->index() < cell->index()))
+            continue;
+
+          // Likewise, we always work from the coarser cell if this and
+          // its neighbor differ in refinement. Thus, if the neighboring
+          // cell is less refined than the present one, then do nothing
+          // since we integrate over the subfaces when we visit the coarse
+          // cell.
+          if (cell->at_boundary(face_no) == false)
+            if (cell->neighbor(face_no)->level() < cell->level())
+              continue;
+
+
+          // Now we know that we are in charge here, so actually compute
+          // the face jump terms. If the face is a regular one, i.e.  the
+          // other side's cell is neither coarser not finer than this
+          // cell, then call one function, and if the cell on the other
+          // side is further refined, then use another function. Note that
+          // the case that the cell on the other side is coarser cannot
+          // happen since we have decided above that we handle this case
+          // when we pass over that other cell.
+          if (cell->face(face_no)->has_children() == false)
+            integrate_over_regular_face (cell, face_no,
+                                         scratch_data.primal_solution,
+                                         scratch_data.dual_weights,
+                                         scratch_data.face_data,
+                                         face_integrals);
+          else
+            integrate_over_irregular_face (cell, face_no,
+                                           scratch_data.primal_solution,
+                                           scratch_data.dual_weights,
+                                           scratch_data.face_data,
+                                           face_integrals);
+        }
+    }
+
+
+    // @sect4{Computing cell term error contributions}
+
+    // As for the actual computation of the error contributions, first turn to
+    // the cell terms:
+    template <int dim>
+    void WeightedResidual<dim>::
+    integrate_over_cell (const SynchronousIterators<std_cxx11::tuple<
+                         active_cell_iterator,Vector<float>::iterator> >   &cell_and_error,
+                         const Vector<double>                              &primal_solution,
+                         const Vector<double>                              &dual_weights,
+                         CellData                                          &cell_data) const
+    {
+      // The tasks to be done are what appears natural from looking at the
+      // error estimation formula: first get the right hand side and Laplacian
+      // of the numerical solution at the quadrature points for the cell
+      // residual,
+      cell_data.fe_values.reinit (std_cxx11::get<0>(cell_and_error.iterators));
+      cell_data.right_hand_side
+      ->value_list (cell_data.fe_values.get_quadrature_points(),
+                    cell_data.rhs_values);
+      cell_data.fe_values.get_function_laplacians (primal_solution,
+                                                   cell_data.cell_laplacians);
+
+      // ...then get the dual weights...
+      cell_data.fe_values.get_function_values (dual_weights,
+                                               cell_data.dual_weights);
+
+      // ...and finally build the sum over all quadrature points and store it
+      // with the present cell:
+      double sum = 0;
+      for (unsigned int p=0; p<cell_data.fe_values.n_quadrature_points; ++p)
+        sum += ((cell_data.rhs_values[p]+cell_data.cell_laplacians[p]) *
+                cell_data.dual_weights[p] *
+                cell_data.fe_values.JxW (p));
+      *(std_cxx11::get<1>(cell_and_error.iterators)) += sum;
+    }
+
+
+    // @sect4{Computing edge term error contributions -- 1}
+
+    // On the other hand, computation of the edge terms for the error estimate
+    // is not so simple. First, we have to distinguish between faces with and
+    // without hanging nodes. Because it is the simple case, we first consider
+    // the case without hanging nodes on a face (let's call this the `regular'
+    // case):
+    template <int dim>
+    void WeightedResidual<dim>::
+    integrate_over_regular_face (const active_cell_iterator &cell,
+                                 const unsigned int          face_no,
+                                 const Vector<double>       &primal_solution,
+                                 const Vector<double>       &dual_weights,
+                                 FaceData                   &face_data,
+                                 FaceIntegrals              &face_integrals) const
+    {
+      const unsigned int
+      n_q_points = face_data.fe_face_values_cell.n_quadrature_points;
+
+      // The first step is to get the values of the gradients at the
+      // quadrature points of the finite element field on the present
+      // cell. For this, initialize the <code>FEFaceValues</code> object
+      // corresponding to this side of the face, and extract the gradients
+      // using that object.
+      face_data.fe_face_values_cell.reinit (cell, face_no);
+      face_data.fe_face_values_cell.get_function_gradients (primal_solution,
+                                                            face_data.cell_grads);
+
+      // The second step is then to extract the gradients of the finite
+      // element solution at the quadrature points on the other side of the
+      // face, i.e. from the neighboring cell.
+      //
+      // For this, do a sanity check before: make sure that the neighbor
+      // actually exists (yes, we should not have come here if the neighbor
+      // did not exist, but in complicated software there are bugs, so better
+      // check this), and if this is not the case throw an error.
+      Assert (cell->neighbor(face_no).state() == IteratorState::valid,
+              ExcInternalError());
+      // If we have that, then we need to find out with which face of the
+      // neighboring cell we have to work, i.e. the <code>how-many'th</code> the
+      // neighbor the present cell is of the cell behind the present face. For
+      // this, there is a function, and we put the result into a variable with
+      // the name <code>neighbor_neighbor</code>:
+      const unsigned int
+      neighbor_neighbor = cell->neighbor_of_neighbor (face_no);
+      // Then define an abbreviation for the neighbor cell, initialize the
+      // <code>FEFaceValues</code> object on that cell, and extract the
+      // gradients on that cell:
+      const active_cell_iterator neighbor = cell->neighbor(face_no);
+      face_data.fe_face_values_neighbor.reinit (neighbor, neighbor_neighbor);
+      face_data.fe_face_values_neighbor.get_function_gradients (primal_solution,
+                                                                face_data.neighbor_grads);
+
+      // Now that we have the gradients on this and the neighboring cell,
+      // compute the jump residual by multiplying the jump in the gradient
+      // with the normal vector:
+      for (unsigned int p=0; p<n_q_points; ++p)
+        face_data.jump_residual[p]
+          = ((face_data.cell_grads[p] - face_data.neighbor_grads[p]) *
+             face_data.fe_face_values_cell.normal_vector(p));
+
+      // Next get the dual weights for this face:
+      face_data.fe_face_values_cell.get_function_values (dual_weights,
+                                                         face_data.dual_weights);
+
+      // Finally, we have to compute the sum over jump residuals, dual
+      // weights, and quadrature weights, to get the result for this face:
+      double face_integral = 0;
+      for (unsigned int p=0; p<n_q_points; ++p)
+        face_integral += (face_data.jump_residual[p] *
+                          face_data.dual_weights[p]  *
+                          face_data.fe_face_values_cell.JxW(p));
+
+      // Double check that the element already exists and that it was not
+      // already written to...
+      Assert (face_integrals.find (cell->face(face_no)) != face_integrals.end(),
+              ExcInternalError());
+      Assert (face_integrals[cell->face(face_no)] == -1e20,
+              ExcInternalError());
+
+      // ...then store computed value at assigned location. Note that the
+      // stored value does not contain the factor 1/2 that appears in the
+      // error representation. The reason is that the term actually does not
+      // have this factor if we loop over all faces in the triangulation, but
+      // only appears if we write it as a sum over all cells and all faces of
+      // each cell; we thus visit the same face twice. We take account of this
+      // by using this factor -1/2 later, when we sum up the contributions for
+      // each cell individually.
+      face_integrals[cell->face(face_no)] = face_integral;
+    }
+
+
+    // @sect4{Computing edge term error contributions -- 2}
+
+    // We are still missing the case of faces with hanging nodes. This is what
+    // is covered in this function:
+    template <int dim>
+    void WeightedResidual<dim>::
+    integrate_over_irregular_face (const active_cell_iterator &cell,
+                                   const unsigned int          face_no,
+                                   const Vector<double>       &primal_solution,
+                                   const Vector<double>       &dual_weights,
+                                   FaceData                   &face_data,
+                                   FaceIntegrals              &face_integrals) const
+    {
+      // First again two abbreviations, and some consistency checks whether
+      // the function is called only on faces for which it is supposed to be
+      // called:
+      const unsigned int
+      n_q_points = face_data.fe_face_values_cell.n_quadrature_points;
+
+      const typename DoFHandler<dim>::face_iterator
+      face = cell->face(face_no);
+      const typename DoFHandler<dim>::cell_iterator
+      neighbor = cell->neighbor(face_no);
+      Assert (neighbor.state() == IteratorState::valid,
+              ExcInternalError());
+      Assert (neighbor->has_children(),
+              ExcInternalError());
+
+      // Then find out which neighbor the present cell is of the adjacent
+      // cell. Note that we will operate on the children of this adjacent
+      // cell, but that their orientation is the same as that of their mother,
+      // i.e. the neighbor direction is the same.
+      const unsigned int
+      neighbor_neighbor = cell->neighbor_of_neighbor (face_no);
+
+      // Then simply do everything we did in the previous function for one
+      // face for all the sub-faces now:
+      for (unsigned int subface_no=0;
+           subface_no<face->n_children(); ++subface_no)
+        {
+          // Start with some checks again: get an iterator pointing to the
+          // cell behind the present subface and check whether its face is a
+          // subface of the one we are considering. If that were not the case,
+          // then there would be either a bug in the
+          // <code>neighbor_neighbor</code> function called above, or -- worse
+          // -- some function in the library did not keep to some underlying
+          // assumptions about cells, their children, and their faces. In any
+          // case, even though this assertion should not be triggered, it does
+          // not harm to be cautious, and in optimized mode computations the
+          // assertion will be removed anyway.
+          const active_cell_iterator neighbor_child
+            = cell->neighbor_child_on_subface (face_no, subface_no);
+          Assert (neighbor_child->face(neighbor_neighbor) ==
+                  cell->face(face_no)->child(subface_no),
+                  ExcInternalError());
+
+          // Now start the work by again getting the gradient of the solution
+          // first at this side of the interface,
+          face_data.fe_subface_values_cell.reinit (cell, face_no, subface_no);
+          face_data.fe_subface_values_cell.get_function_gradients (primal_solution,
+                                                                   face_data.cell_grads);
+          // then at the other side,
+          face_data.fe_face_values_neighbor.reinit (neighbor_child,
+                                                    neighbor_neighbor);
+          face_data.fe_face_values_neighbor.get_function_gradients (primal_solution,
+                                                                    face_data.neighbor_grads);
+
+          // and finally building the jump residuals. Since we take the normal
+          // vector from the other cell this time, revert the sign of the
+          // first term compared to the other function:
+          for (unsigned int p=0; p<n_q_points; ++p)
+            face_data.jump_residual[p]
+              = ((face_data.neighbor_grads[p] - face_data.cell_grads[p]) *
+                 face_data.fe_face_values_neighbor.normal_vector(p));
+
+          // Then get dual weights:
+          face_data.fe_face_values_neighbor.get_function_values (dual_weights,
+                                                                 face_data.dual_weights);
+
+          // At last, sum up the contribution of this sub-face, and set it in
+          // the global map:
+          double face_integral = 0;
+          for (unsigned int p=0; p<n_q_points; ++p)
+            face_integral += (face_data.jump_residual[p] *
+                              face_data.dual_weights[p] *
+                              face_data.fe_face_values_neighbor.JxW(p));
+          face_integrals[neighbor_child->face(neighbor_neighbor)]
+            = face_integral;
+        }
+
+      // Once the contributions of all sub-faces are computed, loop over all
+      // sub-faces to collect and store them with the mother face for simple
+      // use when later collecting the error terms of cells. Again make safety
+      // checks that the entries for the sub-faces have been computed and do
+      // not carry an invalid value.
+      double sum = 0;
+      for (unsigned int subface_no=0;
+           subface_no<face->n_children(); ++subface_no)
+        {
+          Assert (face_integrals.find(face->child(subface_no)) !=
+                  face_integrals.end(),
+                  ExcInternalError());
+          Assert (face_integrals[face->child(subface_no)] != -1e20,
+                  ExcInternalError());
+
+          sum += face_integrals[face->child(subface_no)];
+        }
+      // Finally store the value with the parent face.
+      face_integrals[face] = sum;
+    }
+
+  }
+
+
+  // @sect3{A simulation framework}
+
+  // In the previous example program, we have had two functions that were used
+  // to drive the process of solving on subsequently finer grids. We extend
+  // this here to allow for a number of parameters to be passed to these
+  // functions, and put all of that into framework class.
+  //
+  // You will have noted that this program is built up of a number of small
+  // parts (evaluation functions, solver classes implementing various
+  // refinement methods, different dual functionals, different problem and
+  // data descriptions), which makes the program relatively simple to extend,
+  // but also allows to solve a large number of different problems by
+  // replacing one part by another. We reflect this flexibility by declaring a
+  // structure in the following framework class that holds a number of
+  // parameters that may be set to test various combinations of the parts of
+  // this program, and which can be used to test it at various problems and
+  // discretizations in a simple way.
+  template <int dim>
+  struct Framework
+  {
+  public:
+    // First, we declare two abbreviations for simple use of the respective
+    // data types:
+    typedef Evaluation::EvaluationBase<dim> Evaluator;
+    typedef std::list<Evaluator *>           EvaluatorList;
+
+
+    // Then we have the structure which declares all the parameters that may
+    // be set. In the default constructor of the structure, these values are
+    // all set to default values, for simple use.
+    struct ProblemDescription
+    {
+      // First allow for the degrees of the piecewise polynomials by which the
+      // primal and dual problems will be discretized. They default to (bi-,
+      // tri-)linear ansatz functions for the primal, and (bi-, tri-)quadratic
+      // ones for the dual problem. If a refinement criterion is chosen that
+      // does not need the solution of a dual problem, the value of the dual
+      // finite element degree is of course ignored.
+      unsigned int primal_fe_degree;
+      unsigned int dual_fe_degree;
+
+      // Then have an object that describes the problem type, i.e. right hand
+      // side, domain, boundary values, etc. The pointer needed here defaults
+      // to the Null pointer, i.e. you will have to set it in actual instances
+      // of this object to make it useful.
+      std_cxx11::unique_ptr<const Data::SetUpBase<dim> > data;
+
+      // Since we allow to use different refinement criteria (global
+      // refinement, refinement by the Kelly error indicator, possibly with a
+      // weight, and using the dual estimator), define a number of enumeration
+      // values, and subsequently a variable of that type. It will default to
+      // <code>dual_weighted_error_estimator</code>.
+      enum RefinementCriterion
+      {
+        dual_weighted_error_estimator,
+        global_refinement,
+        kelly_indicator,
+        weighted_kelly_indicator
+      };
+
+      RefinementCriterion refinement_criterion;
+
+      // Next, an object that describes the dual functional. It is only needed
+      // if the dual weighted residual refinement is chosen, and also defaults
+      // to a Null pointer.
+      std_cxx11::unique_ptr<const DualFunctional::DualFunctionalBase<dim> >
+      dual_functional;
+
+      // Then a list of evaluation objects. Its default value is empty,
+      // i.e. no evaluation objects.
+      EvaluatorList evaluator_list;
+
+      // Next to last, a function that is used as a weight to the
+      // <code>RefinementWeightedKelly</code> class. The default value of this
+      // pointer is zero, but you have to set it to some other value if you
+      // want to use the <code>weighted_kelly_indicator</code> refinement
+      // criterion.
+      std_cxx11::unique_ptr<const Function<dim> > kelly_weight;
+
+      // Finally, we have a variable that denotes the maximum number of
+      // degrees of freedom we allow for the (primal) discretization. If it is
+      // exceeded, we stop the process of solving and intermittent mesh
+      // refinement. Its default value is 20,000.
+      unsigned int max_degrees_of_freedom;
+
+      // Finally the default constructor of this class:
+      ProblemDescription ();
+    };
+
+    // The driver framework class only has one method which calls solver and
+    // mesh refinement intermittently, and does some other small tasks in
+    // between. Since it does not need data besides the parameters given to
+    // it, we make it static:
+    static void run (const ProblemDescription &descriptor);
+  };
+
+
+  // As for the implementation, first the constructor of the parameter object,
+  // setting all values to their defaults:
+  template <int dim>
+  Framework<dim>::ProblemDescription::ProblemDescription ()
+    :
+    primal_fe_degree (1),
+    dual_fe_degree (2),
+    refinement_criterion (dual_weighted_error_estimator),
+    max_degrees_of_freedom (20000)
+  {}
+
+
+
+  // Then the function which drives the whole process:
+  template <int dim>
+  void Framework<dim>::run (const ProblemDescription &descriptor)
+  {
+    // First create a triangulation from the given data object,
+    Triangulation<dim>
+    triangulation (Triangulation<dim>::smoothing_on_refinement);
+    descriptor.data->create_coarse_grid (triangulation);
+
+    // then a set of finite elements and appropriate quadrature formula:
+    const FE_Q<dim>     primal_fe(descriptor.primal_fe_degree);
+    const FE_Q<dim>     dual_fe(descriptor.dual_fe_degree);
+    const QGauss<dim>   quadrature(descriptor.dual_fe_degree+1);
+    const QGauss<dim-1> face_quadrature(descriptor.dual_fe_degree+1);
+
+    // Next, select one of the classes implementing different refinement
+    // criteria.
+    std_cxx11::unique_ptr<LaplaceSolver::Base<dim> > solver;
+    switch (descriptor.refinement_criterion)
+      {
+      case ProblemDescription::dual_weighted_error_estimator:
+      {
+        solver.reset
+        (new LaplaceSolver::WeightedResidual<dim> (triangulation,
+                                                   primal_fe,
+                                                   dual_fe,
+                                                   quadrature,
+                                                   face_quadrature,
+                                                   descriptor.data->get_right_hand_side(),
+                                                   descriptor.data->get_boundary_values(),
+                                                   *descriptor.dual_functional));
+        break;
+      }
+
+      case ProblemDescription::global_refinement:
+      {
+        solver.reset
+        (new LaplaceSolver::RefinementGlobal<dim> (triangulation,
+                                                   primal_fe,
+                                                   quadrature,
+                                                   face_quadrature,
+                                                   descriptor.data->get_right_hand_side(),
+                                                   descriptor.data->get_boundary_values()));
+        break;
+      }
+
+      case ProblemDescription::kelly_indicator:
+      {
+        solver.reset
+        (new LaplaceSolver::RefinementKelly<dim> (triangulation,
+                                                  primal_fe,
+                                                  quadrature,
+                                                  face_quadrature,
+                                                  descriptor.data->get_right_hand_side(),
+                                                  descriptor.data->get_boundary_values()));
+        break;
+      }
+
+      case ProblemDescription::weighted_kelly_indicator:
+      {
+        solver.reset
+        (new LaplaceSolver::RefinementWeightedKelly<dim> (triangulation,
+                                                          primal_fe,
+                                                          quadrature,
+                                                          face_quadrature,
+                                                          descriptor.data->get_right_hand_side(),
+                                                          descriptor.data->get_boundary_values(),
+                                                          *descriptor.kelly_weight));
+        break;
+      }
+
+      default:
+        AssertThrow (false, ExcInternalError());
+      }
+
+    // Now that all objects are in place, run the main loop. The stopping
+    // criterion is implemented at the bottom of the loop.
+    //
+    // In the loop, first set the new cycle number, then solve the problem,
+    // output its solution(s), apply the evaluation objects to it, then decide
+    // whether we want to refine the mesh further and solve again on this
+    // mesh, or jump out of the loop.
+    for (unsigned int step=0; true; ++step)
+      {
+        std::cout << "Refinement cycle: "       << step
+                  << std::endl;
+
+        solver->set_refinement_cycle (step);
+        solver->solve_problem ();
+        solver->output_solution ();
+
+        std::cout << "   Number of degrees of freedom="
+                  << solver->n_dofs() << std::endl;
+
+        for (typename EvaluatorList::const_iterator
+             e = descriptor.evaluator_list.begin();
+             e != descriptor.evaluator_list.end(); ++e)
+          {
+            (*e)->set_refinement_cycle (step);
+            solver->postprocess (**e);
+          }
+
+
+        if (solver->n_dofs() < descriptor.max_degrees_of_freedom)
+          solver->refine_grid ();
+        else
+          break;
+      }
+
+    // Clean up the screen after the loop has run:
+    std::cout << std::endl;
+  }
+
+}
+
+
+
+// @sect3{The main function}
+
+// Here finally comes the main function. It drives the whole process by
+// specifying a set of parameters to be used for the simulation (polynomial
+// degrees, evaluation and dual functionals, etc), and passes them packed into
+// a structure to the frame work class above.
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step14;
+
+      // Describe the problem we want to solve here by passing a descriptor
+      // object to the function doing the rest of the work:
+      const unsigned int dim = 2;
+      Framework<dim>::ProblemDescription descriptor;
+
+      // First set the refinement criterion we wish to use:
+      descriptor.refinement_criterion
+        = Framework<dim>::ProblemDescription::dual_weighted_error_estimator;
+      // Here, we could as well have used <code>global_refinement</code> or
+      // <code>weighted_kelly_indicator</code>. Note that the information
+      // given about dual finite elements, dual functional, etc is only
+      // important for the given choice of refinement criterion, and is
+      // ignored otherwise.
+
+      // Then set the polynomial degrees of primal and dual problem. We choose
+      // here bi-linear and bi-quadratic ones:
+      descriptor.primal_fe_degree = 1;
+      descriptor.dual_fe_degree   = 2;
+
+      // Then set the description of the test case, i.e. domain, boundary
+      // values, and right hand side. These are prepackaged in classes. We
+      // take here the description of <code>Exercise_2_3</code>, but you can
+      // also use <code>CurvedRidges@<dim@></code>:
+      descriptor.data.reset(new Data::SetUp<Data::Exercise_2_3<dim>,dim> ());
+
+      // Next set first a dual functional, then a list of evaluation
+      // objects. We choose as default the evaluation of the value at an
+      // evaluation point, represented by the classes
+      // <code>PointValueEvaluation</code> in the namespaces of evaluation and
+      // dual functional classes. You can also set the
+      // <code>PointXDerivativeEvaluation</code> classes for the x-derivative
+      // instead of the value at the evaluation point.
+      //
+      // Note that dual functional and evaluation objects should
+      // match. However, you can give as many evaluation functionals as you
+      // want, so you can have both point value and derivative evaluated after
+      // each step.  One such additional evaluation is to output the grid in
+      // each step.
+      const Point<dim> evaluation_point (0.75, 0.75);
+      descriptor.dual_functional.reset
+      (new DualFunctional::PointValueEvaluation<dim> (evaluation_point));
+
+      Evaluation::PointValueEvaluation<dim>
+      postprocessor1 (evaluation_point);
+      Evaluation::GridOutput<dim>
+      postprocessor2 ("grid");
+
+      descriptor.evaluator_list.push_back (&postprocessor1);
+      descriptor.evaluator_list.push_back (&postprocessor2);
+
+      // Set the maximal number of degrees of freedom after which we want the
+      // program to stop refining the mesh further:
+      descriptor.max_degrees_of_freedom = 20000;
+
+      // Finally pass the descriptor object to a function that runs the entire
+      // solution with it:
+      Framework<dim>::run (descriptor);
+    }
+
+  // Catch exceptions to give information about things that failed:
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-15/CMakeLists.txt b/examples/step-15/CMakeLists.txt
new file mode 100644
index 0000000..cfcac77
--- /dev/null
+++ b/examples/step-15/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-15 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-15")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-15/doc/builds-on b/examples/step-15/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-15/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-15/doc/intro.dox b/examples/step-15/doc/intro.dox
new file mode 100644
index 0000000..b2e777b
--- /dev/null
+++ b/examples/step-15/doc/intro.dox
@@ -0,0 +1,304 @@
+<br>
+
+<i>
+This program grew out of a student project by Sven Wetterauer at the
+University of Heidelberg, Germany. Most of the work for this program
+is by him.
+</i>
+<br>
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+<h3>Foreword</h3>
+
+This program deals with an example of a non-linear elliptic partial
+differential equation, the minimal
+surface equation. You can imagine the solution of this equation to describe
+the surface spanned by a soap film that is enclosed by a
+closed wire loop. We imagine the wire to not just be a planar loop, but in
+fact curved. The surface tension of the soap film will then reduce the surface
+to have minimal surface. The solution of the minimal surface equation
+describes this shape with the wire's vertical displacement as a boundary
+condition. For simplicity, we will here assume that the surface can be written
+as a graph $u=u(x,y)$ although it is clear that it is not very hard to
+construct cases where the wire is bent in such a way that the surface can only
+locally be constructed as a graph but not globally.
+
+Because the equation is non-linear, we can't solve it directly. Rather, we
+have to use Newton's method to compute the solution iteratively.
+
+ at dealiiVideoLecture{31.5,31.55,31.6}
+(@dealiiVideoLectureSeeAlso{31.65,31.7})
+
+
+
+<h3>Classical formulation</h3>
+
+In a classical sense, the problem is given in the following form:
+
+
+  @f{align*}
+    -\nabla \cdot \left( \frac{1}{\sqrt{1+|\nabla u|^{2}}}\nabla u \right) &= 0 \qquad
+    \qquad &&\textrm{in} ~ \Omega
+    \\
+    u&=g \qquad\qquad &&\textrm{on} ~ \partial \Omega.
+  @f}
+
+$\Omega$ is the domain we get by projecting the wire's positions into $x-y$
+space. In this example, we choose $\Omega$ as the unit disk.
+
+As described above, we solve this equation using Newton's method in which we
+compute the $n$th approximate solution from the $n$th$-1$ one, and use
+a damping parameter $\alpha^n$ to get better global convergence behavior:
+  @f{align*}
+    F'(u^{n},\delta u^{n})&=- F(u^{n})
+    \\
+    u^{n+1}&=u^{n}+\alpha^n \delta u^{n}
+  @f}
+with
+  @f[
+    F(u):= -\nabla \cdot \left( \frac{1}{\sqrt{1+|\nabla u|^{2}}}\nabla u \right)
+  @f]
+and $F'(u,\delta u)$ the derivative of F in direction of $\delta u$:
+ at f[
+  F'(u,\delta u)=\lim \limits_{\epsilon \rightarrow 0}{\frac{F(u+\epsilon \delta u)-
+  F(u)}{\epsilon}}.
+ at f]
+
+Going through the motions to find out what $F'(u,\delta u)$ is, we find that
+we have to solve a linear elliptic PDE in every Newton step, with $\delta u^n$
+as the solution of:
+
+  @f[
+  - \nabla \cdot \left( \frac{1}{(1+|\nabla u^{n}|^{2})^{\frac{1}{2}}}\nabla
+  \delta u^{n} \right) +
+  \nabla \cdot \left( \frac{\nabla u^{n} \cdot
+  \nabla \delta u^{n}}{(1+|\nabla u^{n}|^{2})^{\frac{3}{2}}} \nabla u^{n}
+  \right)  =
+  -\left( - \nabla \cdot \left( \frac{1}{(1+|\nabla u^{n}|^{2})^{\frac{1}{2}}}
+  \nabla u^{n} \right) \right)
+  @f]
+
+In order to solve the minimal surface equation, we have to solve this equation
+repeatedly, once per Newton step. To solve this, we have to take a look at the
+boundary condition of this problem. Assuming that $u^{n}$ already has the
+right boundary values, the Newton update $\delta u^{n}$ should have zero
+boundary conditions, in order to have the right boundary condition after
+adding both.  In the first Newton step, we are starting with the solution
+$u^{0}\equiv 0$, the Newton update still has to deliver the right boundary
+condition to the solution $u^{1}$.
+
+
+Summing up, we have to solve the PDE above with the boundary condition $\delta
+u^{0}=g$ in the first step and with $\delta u^{n}=0$ in all the following steps.
+
+
+<h3>Weak formulation of the problem</h3>
+
+Starting with the strong formulation above, we get the weak formulation by multiplying
+both sides of the PDE with a test function $\varphi$ and integrating by parts on both sides:
+  @f[
+  \left( \nabla \varphi , \frac{1}{(1+|\nabla u^{n}|^{2})^{\frac{1}{2}}}\nabla
+  \delta u^{n} \right)-\left(\nabla \varphi ,\frac{\nabla u^{n} \cdot \nabla
+  \delta u^{n}}{(1+|\nabla u^{n}|^{2})^{\frac{3}{2}}}\nabla u^{n}  \right)
+  = -\left(\nabla \varphi , \frac{1}{(1+|\nabla u^{n}|^{2})^{\frac{1}{2}}} \nabla u^{n}
+   \right).
+  @f]
+Here the solution $\delta u^{n}$ is a function in $H^{1}(\Omega)$, subject to
+the boundary conditions discussed above.
+Reducing this space to a finite dimensional space with basis $\left\{
+\varphi_{0},\dots , \varphi_{N-1}\right\}$, we can write the solution:
+
+ at f[
+  \delta u^{n}=\sum_{j=0}^{N-1} \delta U_{j} \varphi_{j}.
+ at f]
+
+Using the basis functions as test functions and defining $a_{n}:=\frac{1}
+{\sqrt{1+|\nabla u^{n}|^{2}}}$, we can rewrite the weak formulation:
+
+ at f[
+  \sum_{j=0}^{N-1}\left[ \left( \nabla \varphi_{i} , a_{n} \nabla \varphi_{j} \right) -
+  \left(\nabla u^{n}\cdot \nabla \varphi_{i} , a_{n}^{3} \nabla u^{n} \cdot \nabla
+  \varphi_{j} \right) \right] \cdot \delta U_{j}=\left( \nabla \varphi_{i} , a_{n}
+  \nabla u^{n}\right) \qquad \forall i=0,\dots ,N-1,
+ at f]
+
+where the solution $\delta u^{n}$ is given by the coefficients $\delta U^{n}_{j}$.
+This linear system of equations can be rewritten as:
+
+ at f[
+  A^{n}\; \delta U^{n}=b^{n},
+ at f]
+
+where the entries of the matrix $A^{n}$ are given by:
+
+ at f[
+  A^{n}_{ij}:= \left( \nabla \varphi_{i} , a_{n} \nabla \varphi_{j} \right) -
+  \left(\nabla u^{n}\cdot \nabla \varphi_{i} , a_{n}^{3} \nabla u^{n} \cdot \nabla
+  \varphi_{j} \right),
+ at f]
+
+and the right hand side $b^{n}$ is given by:
+
+ at f[
+  b^{n}_{i}:=\left( \nabla \varphi_{i} , a_{n} \nabla u^{n}\right).
+ at f]
+
+
+<h3> Questions about the appropriate solver </h3>
+
+The matrix that corresponds to the Newton step above can be reformulated to
+show its structure a bit better. Rewriting it slightly, we get that it has the
+form
+ at f[
+  A_{ij}
+  =
+  \left(
+    \nabla \varphi_i,
+    B
+    \nabla \varphi_j
+  \right),
+ at f]
+where the matrix $B$ (of size $d \times d$ in $d$ space dimensions) is given
+by the following expression:
+ at f[
+  B
+  =
+  a_n \left\{
+   \mathbf I
+   -
+   a_n^2 [\nabla u_n] \otimes [\nabla u_n]
+  \right\}
+  =
+  a_n \left\{
+   \mathbf I
+   -
+  \frac{\nabla u_n}{\sqrt{1+|\nabla u^{n}|^{2}}} \otimes
+  \frac{\nabla u_n}{\sqrt{1+|\nabla u^{n}|^{2}}}
+  \right\}.
+ at f]
+From this expression, it is obvious that
+$B$ is symmetric, and so $A$ is symmetric as well.
+On the other hand, $B$ is also positive definite, which confers the same
+property onto $A$. This can be seen by noting that the vector $v_1 =
+\frac{\nabla u^n}{|\nabla u^n|}$ is an eigenvector of $B$ with eigenvalue
+$\lambda_1=a_n \left(1-\frac{|\nabla u^n|^2}{1+|\nabla u^n|^2}\right) > 0$ while all vectors $v_2\ldots v_d$
+that are perpendicular to $v_1$ and each other are eigenvectors with
+eigenvalue $a_n$. Since all eigenvalues are positive, $B$ is positive definite
+and so is $A$. We can thus use the CG method for solving the Newton steps.
+(The fact that the matrix $A$ is symmetric and positive definite should not come
+as a surprise. It results from taking the derivative of an operator that
+results from taking the derivative of an energy functional: the minimal 
+surface equation simply minimizes some non-quadratic energy. Consequently,
+the Newton matrix, as the matrix of second derivatives of a scalar energy,
+must be symmetric since the derivative with regard to the $i$th and $j$th
+degree of freedom should clearly commute. Likewise, if the energy functional
+is convex, then the matrix of second derivatives must be positive definite,
+and the direct calculation above simply reaffirms this.) 
+
+It is worth noting, however, that the positive definiteness degenerates for
+problems where $\nabla u$ becomes large. In other words, if we simply multiply
+all boundary values by 2, then to first order $u$ and $\nabla u$ will also be
+multiplied by two, but as a consequence the smallest eigenvalue of $B$ will
+become smaller and the matrix will become more ill-conditioned. (More
+specifically, for $|\nabla u^n|\rightarrow\infty$ we have that
+$\lambda_1 \propto a_n \frac{1}{|\nabla u^n|^2}$ whereas
+$\lambda_2\ldots \lambda_d=a_n$; thus, the condition number of $B$,
+which is a multiplicative factor in the condition number of $A$ grows
+like ${\cal O}(|\nabla u^n|^2)$.) It is simple
+to verify with the current program that indeed multiplying the boundary values
+used in the current program by larger and larger values results in a problem
+that will ultimately no longer be solvable using the simple preconditioned CG
+method we use here.
+
+
+<h3> Choice of step length and globalization </h3>
+
+As stated above, Newton's method works by computing a direction
+$\delta u^n$ and then performing the update $u^{n+1} = u^{n}+\alpha^n
+\delta u^{n}$ with a step length $0 < \alpha^n \le 1$. It is a common
+observation that for strongly nonlinear models, Newton's method does
+not converge if we always choose $\alpha^n=1$ unless one starts with
+an initial guess $u^0$ that is sufficiently close to the solution $u$
+of the nonlinear problem. In practice, we don't always have such an
+initial guess, and consequently taking full Newton steps (i.e., using
+$\alpha=1$) does frequently not work.
+
+A common strategy therefore is to use a smaller step length for the
+first few steps while the iterate $u^n$ is still far away from the
+solution $u$ and as we get closer use larger values for $\alpha^n$
+until we can finally start to use full steps $\alpha^n=1$ as we are
+close enough to the solution. The question is of course how to choose
+$\alpha^n$. There are basically two widely used approaches: line
+search and trust region methods.
+
+In this program, we simply always choose the step length equal to
+0.1. This makes sure that for the testcase at hand we do get
+convergence although it is clear that by not eventually reverting to
+full step lengths we forego the rapid, quadratic convergence that
+makes Newton's method so appealing. Obviously, this is a point one
+eventually has to address if the program was made into one that is
+meant to solve more realistic problems. We will comment on this issue
+some more in the <a href="#Results">results section</a>.
+
+
+<h3> Summary of the algorithm and testcase </h3>
+
+Overall, the program we have here is not unlike step-6 in many regards. The
+layout of the main class is essentially the same. On the other hand, the
+driving algorithm in the <code>run()</code> function is different and works as
+follows:
+<ol>
+<li>
+  Start with the function $u^{0}\equiv 0$ and modify it in such a way
+  that the values of $u^0$ along the boundary equal the correct
+  boundary values $g$ (this happens in
+  <code>MinimalSurfaceProblem::set_boundary_values</code>). Set
+  $n=0$.
+</li>
+
+<li>
+  Compute the Newton update by solving the system $A^{n}\;\delta
+  U^{n}=b^{n}$
+  with boundary condition $\delta u^{n}=0$ on $\partial \Omega$.
+</li>
+
+<li>
+  Compute a step length $\alpha^n$. In this program, we always set
+  $\alpha^n=0.1$. To make things easier to extend later on, this
+  happens in a function of its own, namely in
+  <code>MinimalSurfaceProblem::determine_step_length</code>.
+</li>
+
+<li>
+  The new approximation of the solution is given by
+  $u^{n+1}=u^{n}+\alpha^n \delta u^{n}$.
+</li>
+
+<li>
+  If $n$ is a multiple of 5 then refine the mesh, transfer the
+  solution $u^{n+1}$ to the new mesh and set the values of $u^{n+1}$
+  in such a way that along the boundary we have
+  $u^{n+1}|_{\partial\Gamma}=g$ (again in
+  <code>MinimalSurfaceProblem::set_boundary_values</code>). Note that
+  this isn't automatically
+  guaranteed even though by construction we had that before mesh
+  refinement $u^{n+1}|_{\partial\Gamma}=g$ because mesh refinement
+  adds new nodes to the mesh where we have to interpolate the old
+  solution to the new nodes upon bringing the solution from the old to
+  the new mesh. The values we choose by interpolation may be close to
+  the exact boundary conditions but are, in general, nonetheless not
+  the correct values.
+</li>
+
+<li>
+  Set $n\leftarrow n+1$ and go to step 2.
+</li>
+</ol>
+
+The testcase we solve is chosen as follows: We seek to find the solution of
+minimal surface over the unit disk $\Omega=\{\mathbf x: \|\mathbf
+x\|<1\}\subset {\mathbb R}^2$ where the surface attains the values
+$u(x,y)|{\partial\Omega} = g(x,y):=\sin(2 \pi (x+y))$ along the boundary.
diff --git a/examples/step-15/doc/kind b/examples/step-15/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-15/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-15/doc/results.dox b/examples/step-15/doc/results.dox
new file mode 100644
index 0000000..75e78a1
--- /dev/null
+++ b/examples/step-15/doc/results.dox
@@ -0,0 +1,184 @@
+<h1>Results</h1>
+
+
+The output of the program looks as follows:
+ at code
+* ******** Initial mesh  ********
+  Initial residual: 1.53143
+  Residual: 1.08746
+  Residual: 0.966748
+  Residual: 0.859602
+  Residual: 0.766462
+  Residual: 0.685475
+* ******** Refined mesh 1 ********
+  Initial residual: 0.865774
+  Residual: 0.759295
+  Residual: 0.675281
+  Residual: 0.603523
+  Residual: 0.540744
+  Residual: 0.485238
+* ******** Refined mesh 2 ********
+  Initial residual: 0.425581
+  Residual: 0.382042
+  Residual: 0.343307
+  Residual: 0.308718
+....
+ at endcode
+
+Obviously, the scheme converges, if not very fast. We will come back to
+strategies for accelerating the method below.
+
+One can visualize the solution after each set of five Newton
+iterations, i.e., on each of the meshes on which we approximate the
+solution. This yields the following set of images:
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-0.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-1.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-2.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-3.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-4.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-5.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-6.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-15.solution-7.png" alt="">
+</td>
+</tr>
+</table>
+
+It is clearly visible, that the solution minimizes the surface
+after each refinement. The solution converges to a picture one
+would imagine a soap bubble to be that is located inside a wire loop
+that is bent like
+the boundary. Also it is visible, how the boundary
+is smoothed out after each refinement. On the coarse mesh,
+the boundary doesn't look like a sine, whereas it does the
+finer the mesh gets.
+
+The mesh is mostly refined near the boundary, where the solution
+increases or decreases strongly, whereas it is coarsened on
+the inside of the domain, where nothing interesting happens,
+because there isn't much change in the solution. The final solution
+and mesh are shown here:
+
+<img src="http://www.dealii.org/images/steps/developer/step-15.grid.png" alt="">
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+The program shows the basic structure of a solver for a nonlinear, stationary
+problem. However, it does not converge particularly fast, for good reasons:
+
+- The program always takes a step size of 0.1. This precludes the rapid,
+  quadratic convergence for which Newton's method is typically chosen.
+- It does not connect the nonlinear iteration with the mesh refinement
+  iteration.
+
+Obviously, a better program would have to address these two points.
+We will discuss them in the following.
+
+
+<h4> Step length control </h4>
+
+Newton's method has two well known properties:
+- It does not converge from arbitrarily chosen starting points. Rather, a
+  starting point has to be close enough to the solution to guarantee
+  convergence. However, we can enlarge the area from which Newton's method
+  converges by damping the iteration using a <i>step length</i> 0<$\alpha^n\le
+  1$.
+- It exhibits rapid convergence of quadratic order if (i) the step length is
+  chosen as $\alpha^n=1$, and (ii) it does in fact converge with this choice
+  of step length.
+
+A consequence of these two observations is that a successful strategy is to
+choose $\alpha^n<1$ for the initial iterations until the iterate has come
+close enough to allow for convergence with full step length, at which point we
+want to switch to $\alpha^n=1$. The question is how to choose $\alpha^n$ in an
+automatic fashion that satisfies these criteria.
+
+We do not want to review the literature on this topic here, but only briefly
+mention that there are two fundamental approaches to the problem: backtracking
+line search and trust region methods. The former is more widely used for
+partial differential equations and essentially does the following:
+- Compute a search direction
+- See if the resulting residual of $u^n + \alpha^n\;\delta u^n$ with
+  $\alpha^n=1$ is "substantially smaller" than that of $u^n$ alone.
+- If so, then take $\alpha^n=1$.
+- If not, try whether the residual is "substantially smaller" with
+  $\alpha^n=2/3$.
+- If so, then take $\alpha^n=2/3$.
+- If not, try whether the residual is "substantially smaller" with
+  $\alpha^n=(2/3)^2$.
+- Etc.
+One can of course choose other factors $r, r^2, \ldots$ than the $2/3,
+(2/3)^2, \ldots$ chosen above, for $0<r<1$. It is obvious where the term
+"backtracking" comes from: we try a long step, but if that doesn't work we try
+a shorter step, and ever shorter step, etc. The function
+<code>determine_step_length()</code> is written the way it is to support
+exactly this kind of use case.
+
+Whether we accept a particular step length $\alpha^n$ depends on how we define
+"substantially smaller". There are a number of ways to do so, but without
+going into detail let us just mention that the most common ones are to use the
+Wolfe and Armijo-Goldstein conditions. For these, one can show the following:
+- There is always a step length $\alpha^n$ for which the conditions are
+  satisfied, i.e., the iteration never gets stuck as long as the problem is
+  convex.
+- If we are close enough to the solution, then the conditions allow for
+  $\alpha^n$, thereby enabling quadratic convergence.
+
+We will not dwell on this here any further but leave the implementation of
+such algorithms as an exercise. We note, however, that when implemented
+correctly then it is a common observation that most reasonably nonlinear
+problems can be solved in anywhere between 5 and 15 Newton iterations to
+engineering accuracy — substantially fewer than we need with the current
+version of the program.
+
+
+<h4> Integrating mesh refinement and nonlinear and linear solvers </h4>
+
+We currently do exactly 5 iterations on each mesh. But is this optimal? One
+could ask the following questions:
+- Maybe it is worthwhile doing more iterations on the initial meshes since
+  there, computations are cheap.
+- On the other hand, we do not want to do too many iterations on every mesh:
+  yes, we could drive the residual to zero on every mesh, but that would only
+  mean that the nonlinear iteration error is far smaller than the
+  discretization error.
+- Should we use solve the linear systems in each Newton step with higher or
+  lower accuracy?
+
+Ultimately, what this boils down to is that we somehow need to couple the
+discretization error on the current mesh with the nonlinear residual we want
+to achieve with the Newton iterations on a given mesh, and to the linear
+iteration we want to achieve with the CG method within each Newton
+iterations.
+
+How to do this is, again, not entirely trivial, and we again leave it as a
+future exercise.
diff --git a/examples/step-15/doc/tooltip b/examples/step-15/doc/tooltip
new file mode 100644
index 0000000..70b5c4b
--- /dev/null
+++ b/examples/step-15/doc/tooltip
@@ -0,0 +1 @@
+A nonlinear elliptic problem. Newton's method.
diff --git a/examples/step-15/step-15.cc b/examples/step-15/step-15.cc
new file mode 100644
index 0000000..408e3a3
--- /dev/null
+++ b/examples/step-15/step-15.cc
@@ -0,0 +1,752 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2012 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Sven Wetterauer, University of Heidelberg, 2012
+ */
+
+
+// @sect3{Include files}
+
+// The first few files have already been covered in previous examples and will
+// thus not be further commented on.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/grid/grid_refinement.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_q.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+
+#include <fstream>
+#include <iostream>
+
+// We will use adaptive mesh refinement between Newton iterations. To do so,
+// we need to be able to work with a solution on the new mesh, although it was
+// computed on the old one. The SolutionTransfer class transfers the solution
+// from the old to the new mesh:
+
+#include <deal.II/numerics/solution_transfer.h>
+
+// We then open a namespace for this program and import everything from the
+// dealii namespace into it, as in previous programs:
+namespace Step15
+{
+  using namespace dealii;
+
+
+  // @sect3{The <code>MinimalSurfaceProblem</code> class template}
+
+  // The class template is basically the same as in step-6.  Three additions
+  // are made:
+  // - There are two solution vectors, one for the Newton update
+  //   $\delta u^n$, and one for the current iterate $u^n$.
+  // - The <code>setup_system</code> function takes an argument that denotes whether
+  //   this is the first time it is called or not. The difference is that the
+  //   first time around we need to distribute the degrees of freedom and set the
+  //   solution vector for $u^n$ to the correct size. The following times, the
+  //   function is called after we have already done these steps as part of
+  //   refining the mesh in <code>refine_mesh</code>.
+  // - We then also need new functions: <code>set_boundary_values()</code>
+  //   takes care of setting the boundary values on the solution vector
+  //   correctly, as discussed at the end of the
+  //   introduction. <code>compute_residual()</code> is a function that computes
+  //   the norm of the nonlinear (discrete) residual. We use this function to
+  //   monitor convergence of the Newton iteration. The function takes a step
+  //   length $\alpha^n$ as argument to compute the residual of $u^n + \alpha^n
+  //   \; \delta u^n$. This is something one typically needs for step length
+  //   control, although we will not use this feature here. Finally,
+  //   <code>determine_step_length()</code> computes the step length $\alpha^n$
+  //   in each Newton iteration. As discussed in the introduction, we here use a
+  //   fixed step length and leave implementing a better strategy as an
+  //   exercise.
+
+  template <int dim>
+  class MinimalSurfaceProblem
+  {
+  public:
+    MinimalSurfaceProblem ();
+    ~MinimalSurfaceProblem ();
+
+    void run ();
+
+  private:
+    void setup_system (const bool initial_step);
+    void assemble_system ();
+    void solve ();
+    void refine_mesh ();
+    void set_boundary_values ();
+    double compute_residual (const double alpha) const;
+    double determine_step_length () const;
+
+    Triangulation<dim>   triangulation;
+
+    DoFHandler<dim>      dof_handler;
+    FE_Q<dim>            fe;
+
+    ConstraintMatrix     hanging_node_constraints;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+
+    Vector<double>       present_solution;
+    Vector<double>       newton_update;
+    Vector<double>       system_rhs;
+  };
+
+  // @sect3{Boundary condition}
+
+  // The boundary condition is implemented just like in step-4.  It is chosen
+  // as $g(x,y)=\sin(2 \pi (x+y))$:
+
+  template <int dim>
+  class BoundaryValues : public Function<dim>
+  {
+  public:
+    BoundaryValues () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+  template <int dim>
+  double BoundaryValues<dim>::value (const Point<dim> &p,
+                                     const unsigned int /*component*/) const
+  {
+    return std::sin(2 * numbers::PI * (p[0]+p[1]));
+  }
+
+  // @sect3{The <code>MinimalSurfaceProblem</code> class implementation}
+
+  // @sect4{MinimalSurfaceProblem::MinimalSurfaceProblem}
+
+  // The constructor and destructor of the class are the same as in the first
+  // few tutorials.
+
+  template <int dim>
+  MinimalSurfaceProblem<dim>::MinimalSurfaceProblem ()
+    :
+    dof_handler (triangulation),
+    fe (2)
+  {}
+
+
+
+  template <int dim>
+  MinimalSurfaceProblem<dim>::~MinimalSurfaceProblem ()
+  {
+    dof_handler.clear ();
+  }
+
+  // @sect4{MinimalSurfaceProblem::setup_system}
+
+  // As always in the setup-system function, we setup the variables of the
+  // finite element method. There are same differences to step-6, because
+  // there we start solving the PDE from scratch in every refinement cycle
+  // whereas here we need to take the solution from the previous mesh onto the
+  // current mesh. Consequently, we can't just reset solution vectors. The
+  // argument passed to this function thus indicates whether we can
+  // distributed degrees of freedom (plus compute constraints) and set the
+  // solution vector to zero or whether this has happened elsewhere already
+  // (specifically, in <code>refine_mesh()</code>).
+
+  template <int dim>
+  void MinimalSurfaceProblem<dim>::setup_system (const bool initial_step)
+  {
+    if (initial_step)
+      {
+        dof_handler.distribute_dofs (fe);
+        present_solution.reinit (dof_handler.n_dofs());
+
+        hanging_node_constraints.clear ();
+        DoFTools::make_hanging_node_constraints (dof_handler,
+                                                 hanging_node_constraints);
+        hanging_node_constraints.close ();
+      }
+
+
+    // The remaining parts of the function are the same as in step-6.
+
+    newton_update.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+
+    hanging_node_constraints.condense (dsp);
+
+    sparsity_pattern.copy_from(dsp);
+    system_matrix.reinit (sparsity_pattern);
+  }
+
+  // @sect4{MinimalSurfaceProblem::assemble_system}
+
+  // This function does the same as in the previous tutorials except that now,
+  // of course, the matrix and right hand side functions depend on the
+  // previous iteration's solution. As discussed in the introduction, we need
+  // to use zero boundary values for the Newton updates; we compute them at
+  // the end of this function.
+  //
+  // The top of the function contains the usual boilerplate code, setting up
+  // the objects that allow us to evaluate shape functions at quadrature
+  // points and temporary storage locations for the local matrices and
+  // vectors, as well as for the gradients of the previous solution at the
+  // quadrature points. We then start the loop over all cells:
+  template <int dim>
+  void MinimalSurfaceProblem<dim>::assemble_system ()
+  {
+    const QGauss<dim>  quadrature_formula(3);
+
+    system_matrix = 0;
+    system_rhs = 0;
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_gradients         |
+                             update_quadrature_points |
+                             update_JxW_values);
+
+    const unsigned int           dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int           n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>           cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>               cell_rhs (dofs_per_cell);
+
+    std::vector<Tensor<1, dim> > old_solution_gradients(n_q_points);
+
+    std::vector<types::global_dof_index>    local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        cell_matrix = 0;
+        cell_rhs = 0;
+
+        fe_values.reinit (cell);
+
+        // For the assembly of the linear system, we have to obtain the values
+        // of the previous solution's gradients at the quadrature
+        // points. There is a standard way of doing this: the
+        // FEValues::get_function_gradients function takes a vector that
+        // represents a finite element field defined on a DoFHandler, and
+        // evaluates the gradients of this field at the quadrature points of the
+        // cell with which the FEValues object has last been reinitialized.
+        // The values of the gradients at all quadrature points are then written
+        // into the second argument:
+        fe_values.get_function_gradients(present_solution,
+                                         old_solution_gradients);
+
+        // With this, we can then do the integration loop over all quadrature
+        // points and shape functions.  Having just computed the gradients of
+        // the old solution in the quadrature points, we are able to compute
+        // the coefficients $a_{n}$ in these points.  The assembly of the
+        // system itself then looks similar to what we always do with the
+        // exception of the nonlinear terms, as does copying the results from
+        // the local objects into the global ones:
+        for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+          {
+            const double coeff
+              = 1.0 / std::sqrt(1 +
+                                old_solution_gradients[q_point] *
+                                old_solution_gradients[q_point]);
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              {
+                for (unsigned int j=0; j<dofs_per_cell; ++j)
+                  {
+                    cell_matrix(i, j) += ( ((fe_values.shape_grad(i, q_point)
+                                             * coeff
+                                             * fe_values.shape_grad(j, q_point))
+                                            -
+                                            (fe_values.shape_grad(i, q_point)
+                                             * coeff * coeff * coeff
+                                             * (fe_values.shape_grad(j, q_point)
+                                                *
+                                                old_solution_gradients[q_point])
+                                             * old_solution_gradients[q_point]))
+                                           * fe_values.JxW(q_point));
+                  }
+
+                cell_rhs(i) -= (fe_values.shape_grad(i, q_point)
+                                * coeff
+                                * old_solution_gradients[q_point]
+                                * fe_values.JxW(q_point));
+              }
+          }
+
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              system_matrix.add (local_dof_indices[i],
+                                 local_dof_indices[j],
+                                 cell_matrix(i,j));
+
+            system_rhs(local_dof_indices[i]) += cell_rhs(i);
+          }
+      }
+
+    // Finally, we remove hanging nodes from the system and apply zero
+    // boundary values to the linear system that defines the Newton updates
+    // $\delta u^n$:
+    hanging_node_constraints.condense (system_matrix);
+    hanging_node_constraints.condense (system_rhs);
+
+    std::map<types::global_dof_index,double> boundary_values;
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              ZeroFunction<dim>(),
+                                              boundary_values);
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix,
+                                        newton_update,
+                                        system_rhs);
+  }
+
+
+
+  // @sect4{MinimalSurfaceProblem::solve}
+
+  // The solve function is the same as always. At the end of the solution
+  // process we update the current solution by setting
+  // $u^{n+1}=u^n+\alpha^n\;\delta u^n$.
+  template <int dim>
+  void MinimalSurfaceProblem<dim>::solve ()
+  {
+    SolverControl solver_control (system_rhs.size(),
+                                  system_rhs.l2_norm()*1e-6);
+    SolverCG<>    solver (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    solver.solve (system_matrix, newton_update, system_rhs,
+                  preconditioner);
+
+    hanging_node_constraints.distribute (newton_update);
+
+    const double alpha = determine_step_length();
+    present_solution.add (alpha, newton_update);
+  }
+
+
+  // @sect4{MinimalSurfaceProblem::refine_mesh}
+
+  // The first part of this function is the same as in step-6... However,
+  // after refining the mesh we have to transfer the old solution to the new
+  // one which we do with the help of the SolutionTransfer class. The process
+  // is slightly convoluted, so let us describe it in detail:
+  template <int dim>
+  void MinimalSurfaceProblem<dim>::refine_mesh ()
+  {
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(3),
+                                        typename FunctionMap<dim>::type(),
+                                        present_solution,
+                                        estimated_error_per_cell);
+
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     estimated_error_per_cell,
+                                                     0.3, 0.03);
+
+    // Then we need an additional step: if, for example, you flag a cell that
+    // is once more refined than its neighbor, and that neighbor is not
+    // flagged for refinement, we would end up with a jump of two refinement
+    // levels across a cell interface.  To avoid these situations, the library
+    // will silently also have to refine the neighbor cell once. It does so by
+    // calling the Triangulation::prepare_coarsening_and_refinement function
+    // before actually doing the refinement and coarsening.  This function
+    // flags a set of additional cells for refinement or coarsening, to
+    // enforce rules like the one-hanging-node rule.  The cells that are
+    // flagged for refinement and coarsening after calling this function are
+    // exactly the ones that will actually be refined or coarsened. Usually,
+    // you don't have to do this by hand
+    // (Triangulation::execute_coarsening_and_refinement does this for
+    // you). However, we need to initialize the SolutionTransfer class and it
+    // needs to know the final set of cells that will be coarsened or refined
+    // in order to store the data from the old mesh and transfer to the new
+    // one. Thus, we call the function by hand:
+    triangulation.prepare_coarsening_and_refinement ();
+
+    // With this out of the way, we initialize a SolutionTransfer object with
+    // the present DoFHandler and attach the solution vector to it, followed
+    // by doing the actual refinement and distribution of degrees of freedom
+    // on the new mesh
+    SolutionTransfer<dim> solution_transfer(dof_handler);
+    solution_transfer.prepare_for_coarsening_and_refinement(present_solution);
+
+    triangulation.execute_coarsening_and_refinement();
+
+    dof_handler.distribute_dofs(fe);
+
+    // Finally, we retrieve the old solution interpolated to the new
+    // mesh. Since the SolutionTransfer function does not actually store the
+    // values of the old solution, but rather indices, we need to preserve the
+    // old solution vector until we have gotten the new interpolated
+    // values. Thus, we have the new values written into a temporary vector,
+    // and only afterwards write them into the solution vector object. Once we
+    // have this solution we have to make sure that the $u^n$ we now have
+    // actually has the correct boundary values. As explained at the end of
+    // the introduction, this is not automatically the case even if the
+    // solution before refinement had the correct boundary values, and so we
+    // have to explicitly make sure that it now has:
+    Vector<double> tmp(dof_handler.n_dofs());
+    solution_transfer.interpolate(present_solution, tmp);
+    present_solution = tmp;
+
+    set_boundary_values ();
+
+    // On the new mesh, there are different hanging nodes, which we have to
+    // compute again. To ensure there are no hanging nodes of the old mesh in
+    // the object, it's first cleared.  To be on the safe side, we then also
+    // make sure that the current solution's vector entries satisfy the
+    // hanging node constraints (see the discussion in the documentation of
+    // the SolutionTransfer class for why this is necessary):
+    hanging_node_constraints.clear();
+
+    DoFTools::make_hanging_node_constraints(dof_handler,
+                                            hanging_node_constraints);
+    hanging_node_constraints.close();
+
+    hanging_node_constraints.distribute (present_solution);
+
+    // We end the function by updating all the remaining data structures,
+    // indicating to <code>setup_dofs()</code> that this is not the first
+    // go-around and that it needs to preserve the content of the solution
+    // vector:
+    setup_system (false);
+  }
+
+
+
+  // @sect4{MinimalSurfaceProblem::set_boundary_values}
+
+  // The next function ensures that the solution vector's entries respect the
+  // boundary values for our problem.  Having refined the mesh (or just
+  // started computations), there might be new nodal points on the
+  // boundary. These have values that are simply interpolated from the
+  // previous mesh (or are just zero), instead of the correct boundary
+  // values. This is fixed up by setting all boundary nodes explicit to the
+  // right value:
+  template <int dim>
+  void MinimalSurfaceProblem<dim>::set_boundary_values ()
+  {
+    std::map<types::global_dof_index, double> boundary_values;
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              BoundaryValues<dim>(),
+                                              boundary_values);
+    for (std::map<types::global_dof_index, double>::const_iterator
+         p = boundary_values.begin();
+         p != boundary_values.end(); ++p)
+      present_solution(p->first) = p->second;
+  }
+
+
+  // @sect4{MinimalSurfaceProblem::compute_residual}
+
+  // In order to monitor convergence, we need a way to compute the norm of the
+  // (discrete) residual, i.e., the norm of the vector
+  // $\left<F(u^n),\varphi_i\right>$ with $F(u)=-\nabla \cdot \left(
+  // \frac{1}{\sqrt{1+|\nabla u|^{2}}}\nabla u \right)$ as discussed in the
+  // introduction. It turns out that (although we don't use this feature in
+  // the current version of the program) one needs to compute the residual
+  // $\left<F(u^n+\alpha^n\;\delta u^n),\varphi_i\right>$ when determining
+  // optimal step lengths, and so this is what we implement here: the function
+  // takes the step length $\alpha^n$ as an argument. The original
+  // functionality is of course obtained by passing a zero as argument.
+  //
+  // In the function below, we first set up a vector for the residual, and
+  // then a vector for the evaluation point $u^n+\alpha^n\;\delta u^n$. This
+  // is followed by the same boilerplate code we use for all integration
+  // operations:
+  template <int dim>
+  double MinimalSurfaceProblem<dim>::compute_residual (const double alpha) const
+  {
+    Vector<double> residual (dof_handler.n_dofs());
+
+    Vector<double> evaluation_point (dof_handler.n_dofs());
+    evaluation_point = present_solution;
+    evaluation_point.add (alpha, newton_update);
+
+    const QGauss<dim>  quadrature_formula(3);
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_gradients         |
+                             update_quadrature_points |
+                             update_JxW_values);
+
+    const unsigned int           dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int           n_q_points    = quadrature_formula.size();
+
+    Vector<double>               cell_residual (dofs_per_cell);
+    std::vector<Tensor<1, dim> > gradients(n_q_points);
+
+    std::vector<types::global_dof_index>    local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        cell_residual = 0;
+        fe_values.reinit (cell);
+
+        // The actual computation is much as in
+        // <code>assemble_system()</code>. We first evaluate the gradients of
+        // $u^n+\alpha^n\,\delta u^n$ at the quadrature points, then compute
+        // the coefficient $a_n$, and then plug it all into the formula for
+        // the residual:
+        fe_values.get_function_gradients (evaluation_point,
+                                          gradients);
+
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          {
+            const double coeff = 1/std::sqrt(1 +
+                                             gradients[q_point] *
+                                             gradients[q_point]);
+
+            for (unsigned int i = 0; i < dofs_per_cell; ++i)
+              cell_residual(i) -= (fe_values.shape_grad(i, q_point)
+                                   * coeff
+                                   * gradients[q_point]
+                                   * fe_values.JxW(q_point));
+          }
+
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          residual(local_dof_indices[i]) += cell_residual(i);
+      }
+
+    // At the end of this function we also have to deal with the hanging node
+    // constraints and with the issue of boundary values. With regard to the
+    // latter, we have to set to zero the elements of the residual vector for
+    // all entries that correspond to degrees of freedom that sit at the
+    // boundary. The reason is that because the value of the solution there is
+    // fixed, they are of course no "real" degrees of freedom and so, strictly
+    // speaking, we shouldn't have assembled entries in the residual vector
+    // for them. However, as we always do, we want to do exactly the same
+    // thing on every cell and so we didn't not want to deal with the question
+    // of whether a particular degree of freedom sits at the boundary in the
+    // integration above. Rather, we will simply set to zero these entries
+    // after the fact. To this end, we first need to determine which degrees
+    // of freedom do in fact belong to the boundary and then loop over all of
+    // those and set the residual entry to zero. This happens in the following
+    // lines which we have already seen used in step-11:
+    hanging_node_constraints.condense (residual);
+
+    std::vector<bool> boundary_dofs (dof_handler.n_dofs());
+    DoFTools::extract_boundary_dofs (dof_handler,
+                                     ComponentMask(),
+                                     boundary_dofs);
+    for (unsigned int i=0; i<dof_handler.n_dofs(); ++i)
+      if (boundary_dofs[i] == true)
+        residual(i) = 0;
+
+    // At the end of the function, we return the norm of the residual:
+    return residual.l2_norm();
+  }
+
+
+
+  // @sect4{MinimalSurfaceProblem::determine_step_length}
+
+  // As discussed in the introduction, Newton's method frequently does not
+  // converge if we always take full steps, i.e., compute $u^{n+1}=u^n+\delta
+  // u^n$. Rather, one needs a damping parameter (step length) $\alpha^n$ and
+  // set $u^{n+1}=u^n+\alpha^n\delta u^n$. This function is the one called
+  // to compute $\alpha^n$.
+  //
+  // Here, we simply always return 0.1. This is of course a sub-optimal
+  // choice: ideally, what one wants is that the step size goes to one as we
+  // get closer to the solution, so that we get to enjoy the rapid quadratic
+  // convergence of Newton's method. We will discuss better strategies below
+  // in the results section.
+  template <int dim>
+  double MinimalSurfaceProblem<dim>::determine_step_length() const
+  {
+    return 0.1;
+  }
+
+
+
+  // @sect4{MinimalSurfaceProblem::run}
+
+  // In the run function, we build the first grid and then have the top-level
+  // logic for the Newton iteration. The function has two variables, one that
+  // indicates whether this is the first time we solve for a Newton update and
+  // one that indicates the refinement level of the mesh:
+  template <int dim>
+  void MinimalSurfaceProblem<dim>::run ()
+  {
+    unsigned int refinement = 0;
+    bool         first_step = true;
+
+    // As described in the introduction, the domain is the unit disk around
+    // the origin, created in the same way as shown in step-6. The mesh is
+    // globally refined twice followed later on by several adaptive cycles:
+    GridGenerator::hyper_ball (triangulation);
+    static const SphericalManifold<dim> boundary;
+    triangulation.set_all_manifold_ids_on_boundary(0);
+    triangulation.set_manifold (0, boundary);
+    triangulation.refine_global(2);
+
+    // The Newton iteration starts next. During the first step we do not have
+    // information about the residual prior to this step and so we continue
+    // the Newton iteration until we have reached at least one iteration and
+    // until residual is less than $10^{-3}$.
+    //
+    // At the beginning of the loop, we do a bit of setup work. In the first
+    // go around, we compute the solution on the twice globally refined mesh
+    // after setting up the basic data structures and ensuring that the first
+    // Newton iterate already has the correct boundary values. In all
+    // following mesh refinement loops, the mesh will be refined adaptively.
+    double previous_res = 0;
+    while (first_step || (previous_res>1e-3))
+      {
+        if (first_step == true)
+          {
+            std::cout << "******** Initial mesh "
+                      << " ********"
+                      << std::endl;
+
+            setup_system (true);
+            set_boundary_values ();
+
+            first_step = false;
+          }
+        else
+          {
+            ++refinement;
+            std::cout << "******** Refined mesh " << refinement
+                      << " ********"
+                      << std::endl;
+
+            refine_mesh();
+          }
+
+        // On every mesh we do exactly five Newton steps. We print the initial
+        // residual here and then start the iterations on this mesh.
+        //
+        // In every Newton step the system matrix and the right hand side have
+        // to be computed first, after which we store the norm of the right
+        // hand side as the residual to check against when deciding whether to
+        // stop the iterations. We then solve the linear system (the function
+        // also updates $u^{n+1}=u^n+\alpha^n\;\delta u^n$) and output the
+        // residual at the end of this Newton step:
+        std::cout << "  Initial residual: "
+                  << compute_residual(0)
+                  << std::endl;
+
+        for (unsigned int inner_iteration=0; inner_iteration<5; ++inner_iteration)
+          {
+            assemble_system ();
+            previous_res = system_rhs.l2_norm();
+
+            solve ();
+
+            std::cout << "  Residual: "
+                      << compute_residual(0)
+                      << std::endl;
+          }
+
+        // Every fifth iteration, i.e., just before we refine the mesh again,
+        // we output the solution as well as the Newton update. This happens
+        // as in all programs before:
+        DataOut<dim> data_out;
+
+        data_out.attach_dof_handler (dof_handler);
+        data_out.add_data_vector (present_solution, "solution");
+        data_out.add_data_vector (newton_update, "update");
+        data_out.build_patches ();
+        const std::string filename = "solution-" +
+                                     Utilities::int_to_string (refinement, 2) +
+                                     ".vtk";
+        std::ofstream output (filename.c_str());
+        data_out.write_vtk (output);
+
+      }
+  }
+}
+
+// @sect4{The main function}
+
+// Finally the main function. This follows the scheme of all other main
+// functions:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step15;
+
+      MinimalSurfaceProblem<2> laplace_problem_2d;
+      laplace_problem_2d.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  return 0;
+}
diff --git a/examples/step-16/CMakeLists.txt b/examples/step-16/CMakeLists.txt
new file mode 100644
index 0000000..83ce338
--- /dev/null
+++ b/examples/step-16/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-16 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-16")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-16/doc/builds-on b/examples/step-16/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-16/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-16/doc/intro.dox b/examples/step-16/doc/intro.dox
new file mode 100644
index 0000000..2622eef
--- /dev/null
+++ b/examples/step-16/doc/intro.dox
@@ -0,0 +1,65 @@
+<br>
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+This example shows the basic usage of the multilevel functions in
+deal.II. It solves almost the same problem as used in step-6,
+but demonstrating the things one has to provide when using multigrid
+as a preconditioner. In particular, this requires that we define a
+hierarchy of levels, provide transfer operators from one level to the
+next and back, and provide representations of the Laplace operator on
+each level.
+
+In order to allow sufficient flexibility in conjunction with systems of
+differential equations and block preconditioners, quite a few different objects
+have to be created before starting the multilevel method, although
+most of what needs to be done is provided by deal.II itself. These are
+<ul>
+<li>An the object handling transfer between grids; we use the
+    MGTransferPrebuilt class for this that does almost all of the work
+    inside the library.
+<li>The solver on the coarsest level; here, we use MGCoarseGridHouseholder.
+<li>The smoother on all other levels, which in our case will be the
+    mg::SmootherRelaxation class using SOR as the underlying method
+<li>And mg::Matrix, a class having a special level multiplication, i.e. we
+    basically store one matrix per grid level and allow multiplication
+    with it.
+</ul>
+Most of these objects will only be needed inside the function that
+actually solves the linear system. There, these objects are combined
+in an object of type Multigrid, containing the implementation of the
+V-cycle, which is in turn used by the preconditioner PreconditionMG,
+ready for plug-in into a linear solver of the LAC library.
+
+The multigrid method implemented here for adaptively refined meshes
+follows the outline in the @ref mg_paper "Multigrid paper by Janssen and Kanschat",
+which describes the underlying implementation in
+deal.II and also introduces a lot of the nomenclature. First, we have
+to distinguish between level meshes, namely cells that have the same
+refinement distance from the coarse mesh, and the leaf mesh consisting
+of active cells of the hierarchy (in older work we refer to this as
+the global mesh, but this term is overused). Most importantly, the
+leaf mesh is not identical with the level mesh on the finest level.
+The following image shows what we consider to be a "level mesh":
+
+<p align="center">
+  @image html "multigrid.png" ""
+</p>
+
+The fine level in this mesh consists only of the
+degrees of freedom that are defined on the refined cells, but does not
+extend to that part of the domain that is not refined. While this
+guarantees that the overall effort grows as ${\cal O}(N)$ as necessary
+for optimal multigrid complexity, it leads to problems when defining
+where to smooth and what boundary conditions to pose for the operators
+defined on individual levels if the level boundary is not an external
+boundary. These questions are discussed in detail in the article cited above.
+
+<h3>The testcase</h3>
+
+The problem we solve here is similar to step-6, with two main
+differences: first, the multigrid preconditioner, obviously. We also
+change the discontinuity of the coefficients such that the local
+assembler does not look more complicated than necessary.
diff --git a/examples/step-16/doc/kind b/examples/step-16/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-16/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-16/doc/results.dox b/examples/step-16/doc/results.dox
new file mode 100644
index 0000000..3de588e
--- /dev/null
+++ b/examples/step-16/doc/results.dox
@@ -0,0 +1,83 @@
+<h1>Results</h1>
+
+On the finest mesh, the solution looks like this:
+
+<p align="center">
+  <img src="http://www.dealii.org/images/steps/developer/step-16.solution.png" alt="">
+</p>
+
+More importantly, we would like to see if the multigrid method really
+improved the solver performance. Therefore, here is the textual
+output:
+
+<pre>
+DEAL::Cycle 0
+DEAL::   Number of active cells:       20
+DEAL::   Number of degrees of freedom: 25 (by level: 8, 25)
+DEAL:cg::Starting value 0.510691
+DEAL:cg::Convergence step 6 value 4.59193e-14
+DEAL::Cycle 1
+DEAL::   Number of active cells:       41
+DEAL::   Number of degrees of freedom: 52 (by level: 8, 25, 41)
+DEAL:cg::Starting value 0.455356
+DEAL:cg::Convergence step 8 value 3.09682e-13
+DEAL::Cycle 2
+DEAL::   Number of active cells:       80
+DEAL::   Number of degrees of freedom: 100 (by level: 8, 25, 61, 52)
+DEAL:cg::Starting value 0.394469
+DEAL:cg::Convergence step 9 value 1.96993e-13
+DEAL::Cycle 3
+DEAL::   Number of active cells:       161
+DEAL::   Number of degrees of freedom: 190 (by level: 8, 25, 77, 160)
+DEAL:cg::Starting value 0.322156
+DEAL:cg::Convergence step 9 value 2.94418e-13
+DEAL::Cycle 4
+DEAL::   Number of active cells:       311
+DEAL::   Number of degrees of freedom: 364 (by level: 8, 25, 86, 227, 174)
+DEAL:cg::Starting value 0.279667
+DEAL:cg::Convergence step 10 value 3.45746e-13
+DEAL::Cycle 5
+DEAL::   Number of active cells:       593
+DEAL::   Number of degrees of freedom: 667 (by level: 8, 25, 89, 231, 490, 96)
+DEAL:cg::Starting value 0.215917
+DEAL:cg::Convergence step 10 value 1.03758e-13
+DEAL::Cycle 6
+DEAL::   Number of active cells:       1127
+DEAL::   Number of degrees of freedom: 1251 (by level: 8, 25, 89, 274, 760, 417, 178)
+DEAL:cg::Starting value 0.185906
+DEAL:cg::Convergence step 10 value 3.40351e-13
+DEAL::Cycle 7
+DEAL::   Number of active cells:       2144
+DEAL::   Number of degrees of freedom: 2359 (by level: 8, 25, 89, 308, 779, 1262, 817)
+DEAL:cg::Starting value 0.141519
+DEAL:cg::Convergence step 10 value 5.74965e-13
+</pre>
+
+That's almost perfect multigrid performance: 12 orders of magnitude in
+10 iteration steps, and almost independent of the mesh size. That's
+obviously in part due to the simple nature of the problem solved, but
+it shows the power of multigrid methods.
+
+
+<h3> Possible extensions </h3>
+
+We encourage you to switch on timing output by calling the function
+LogStream::log_execution_time() of the deallog object and compare to
+step 6. You will see that the multigrid method has quite an overhead
+on coarse meshes, but that it always beats other methods on fine
+meshes because of its optimal complexity.
+
+A close inspection of this program's performance shows that it is mostly
+dominated by matrix-vector operations. step-37 shows one way
+how this can be avoided by working with matrix-free methods.
+
+Another avenue would be to use algebraic multigrid methods. The
+geometric multigrid method used here can at times be a bit awkward to
+implement because it needs all those additional data structures, and
+it becomes even more difficult if the program is to run in %parallel on
+machines coupled through MPI, for example. In that case, it would be
+simpler if one could use a black-box preconditioner that uses some
+sort of multigrid hierarchy for good performance but can figure out
+level matrices and similar things out by itself. Algebraic multigrid
+methods do exactly this, and we will use them in
+step-31 for the solution of a Stokes problem.
diff --git a/examples/step-16/doc/tooltip b/examples/step-16/doc/tooltip
new file mode 100644
index 0000000..2fd6559
--- /dev/null
+++ b/examples/step-16/doc/tooltip
@@ -0,0 +1 @@
+Multigrid on adaptive meshes.
diff --git a/examples/step-16/step-16.cc b/examples/step-16/step-16.cc
new file mode 100644
index 0000000..592698e
--- /dev/null
+++ b/examples/step-16/step-16.cc
@@ -0,0 +1,693 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2003 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+ *
+ * Authors: Guido Kanschat, University of Heidelberg, 2003
+ *          Baerbel Janssen, University of Heidelberg, 2010
+ *          Wolfgang Bangerth, Texas A&M University, 2010
+ */
+
+
+// @sect3{Include files}
+
+// Again, the first few include files are already known, so we won't comment
+// on them:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/manifold_lib.h>
+
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// These, now, are the include necessary for the multilevel methods. The first
+// one declares how to handle Dirichlet boundary conditions on each of the
+// levels of the multigrid method. For the actual description of the degrees
+// of freedom, we do not need any new include file because DoFHandler already
+// has all necessary methods implemented. We will only need to distribute the
+// DoFs for the levels further down.
+//
+// The rest of the include files deals with the mechanics of multigrid as a
+// linear operator (solver or preconditioner).
+#include <deal.II/multigrid/mg_constrained_dofs.h>
+#include <deal.II/multigrid/multigrid.h>
+#include <deal.II/multigrid/mg_transfer.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_coarse.h>
+#include <deal.II/multigrid/mg_smoother.h>
+#include <deal.II/multigrid/mg_matrix.h>
+
+// Finally we include the MeshWorker framework. This framework through
+// its function loop() and integration_loop(), automates loops over
+// cells and assembling of data into vectors, matrices, etc. It obeys
+// constraints automatically. Since we have to build
+// several matrices and have to be aware of several sets of
+// constraints, this will save us a lot of headache.
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/integration_info.h>
+#include <deal.II/meshworker/simple.h>
+#include <deal.II/meshworker/output.h>
+#include <deal.II/meshworker/loop.h>
+
+// In order to save effort, we use the pre-implemented Laplacian found in
+#include <deal.II/integrators/laplace.h>
+#include <deal.II/integrators/l2.h>
+
+// This is C++:
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+using namespace dealii;
+
+namespace Step16
+{
+  // @sect3{The integrator on each cell}
+
+  // The MeshWorker::integration_loop() expects a class that provides
+  // functions for integration on cells and boundary and interior
+  // faces. This is done by the following class. In the constructor,
+  // we tell the loop that cell integrals should be computed (the
+  // 'true'), but integrals should not be computed on boundary and
+  // interior faces (the two 'false'). Accordingly, we only need a
+  // cell function, but none for the faces.
+
+  template <int dim>
+  class LaplaceIntegrator : public MeshWorker::LocalIntegrator<dim>
+  {
+  public:
+    LaplaceIntegrator();
+    virtual void cell(MeshWorker::DoFInfo<dim> &dinfo, MeshWorker::IntegrationInfo<dim> &info) const;
+  };
+
+
+  template <int dim>
+  LaplaceIntegrator<dim>::LaplaceIntegrator()
+    :
+    MeshWorker::LocalIntegrator<dim>(true, false, false)
+  {}
+
+
+  // Next the actual integrator on each cell. We solve a Poisson problem with a
+  // coefficient one in the right half plane and one tenth in the left
+  // half plane.
+
+  // The MeshWorker::LocalResults base class of MeshWorker::DoFInfo
+  // contains objects that can be filled in this local integrator. How
+  // many objects is determined inside the MeshWorker framework by the
+  // assembler class. Here, we test for instance that one matrix is
+  // required (MeshWorker::LocalResults::n_matrices()). The matrices are accessed
+  // through MeshWorker::LocalResults::matrix(), which takes the number of the
+  // matrix as its first argument. The second argument is only used
+  // for integrals over faces, where there are two matrices for each
+  // test function set. In such a case, a second matrix with indicator
+  // 'true' would exist with the same index.
+
+  // MeshWorker::IntegrationInfo provides one or several FEValues
+  // objects, which below are used by
+  // LocalIntegrators::Laplace::cell_matrix() or
+  // LocalIntegrators::L2::L2(). Since we are assembling only a single
+  // PDE, there is also only one of these objects with index zero.
+
+  // In addition, we note that this integrator serves to compute the
+  // matrices for the multilevel preconditioner as well as the matrix
+  // and the right hand side for the global system. Since the
+  // assembler for a system requires an additional vector, this is
+  // indicated by MeshWorker::LocalResults::n_vectors() returning a nonzero
+  // value. Accordingly we fill a right hand side vector at the end of
+  // this function. Since LocalResults can deal with several
+  // BlockVector objects, but we are again in the simplest case here,
+  // we enter the information into block zero of vector zero.
+  template <int dim>
+  void LaplaceIntegrator<dim>::cell(MeshWorker::DoFInfo<dim> &dinfo, MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    AssertDimension (dinfo.n_matrices(), 1);
+    const double coefficient = (dinfo.cell->center()(0) > 0.)
+                               ? .1 : 1.;
+
+    LocalIntegrators::Laplace::cell_matrix(dinfo.matrix(0,false).matrix, info.fe_values(0), coefficient);
+
+    if (dinfo.n_vectors() > 0)
+      {
+        std::vector<double> rhs(info.fe_values(0).n_quadrature_points, 1.);
+        LocalIntegrators::L2::L2(dinfo.vector(0).block(0), info.fe_values(0), rhs);
+      }
+  }
+
+
+  // @sect3{The <code>LaplaceProblem</code> class template}
+
+  // This main class is basically the same class as in step-6. As far as
+  // member functions is concerned, the only addition is the
+  // <code>assemble_multigrid</code> function that assembles the matrices that
+  // correspond to the discrete operators on intermediate levels:
+  template <int dim>
+  class LaplaceProblem
+  {
+  public:
+    LaplaceProblem (const unsigned int degree);
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    void assemble_multigrid ();
+    void solve ();
+    void refine_grid ();
+    void output_results (const unsigned int cycle) const;
+
+    Triangulation<dim>   triangulation;
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+
+    // We need an additional object for the hanging nodes constraints. They
+    // are handed to the transfer object in the multigrid. Since we call a
+    // compress inside the multigrid these constraints are not allowed to be
+    // inhomogeneous so we store them in different ConstraintMatrix objects.
+    ConstraintMatrix     hanging_node_constraints;
+    ConstraintMatrix     constraints;
+
+    Vector<double>       solution;
+    Vector<double>       system_rhs;
+
+    const unsigned int degree;
+
+    // The following members are the essential data structures for the
+    // multigrid method. The first two represent the sparsity patterns
+    // and the matrices on individual levels of the multilevel
+    // hierarchy, very much like the objects for the global mesh above.
+
+    // Then we have two new matrices only needed for multigrid
+    // methods with local smoothing on adaptive meshes. They convey
+    // data between the interior part of the refined region and the
+    // refinement edge, as outline in detail in @ref mg_paper.
+
+    // The last object stores information about the boundary indices
+    // on each level and information about indices lying on a
+    // refinement edge between two different refinement levels. It
+    // thus serves a similar purpose as ConstraintMatrix, but on each
+    // level.
+    MGLevelObject<SparsityPattern>       mg_sparsity_patterns;
+    MGLevelObject<SparseMatrix<double> > mg_matrices;
+    MGLevelObject<SparseMatrix<double> > mg_interface_in;
+    MGLevelObject<SparseMatrix<double> > mg_interface_out;
+    MGConstrainedDoFs                    mg_constrained_dofs;
+  };
+
+
+  // @sect3{The <code>LaplaceProblem</code> class implementation}
+
+  // Just one short remark about the constructor of the Triangulation:
+  // by convention, all adaptively refined triangulations in deal.II never
+  // change by more than one level across a face between cells. For our
+  // multigrid algorithms, however, we need a slightly stricter guarantee,
+  // namely that the mesh also does not change by more than refinement level
+  // across vertices that might connect two cells. In other words, we must
+  // prevent the following situation:
+  //
+  // @image html limit_level_difference_at_vertices.png ""
+  //
+  // This is achieved by passing the
+  // Triangulation::limit_level_difference_at_vertices flag to the constructor
+  // of the triangulation class.
+  template <int dim>
+  LaplaceProblem<dim>::LaplaceProblem (const unsigned int degree)
+    :
+    triangulation (Triangulation<dim>::
+                   limit_level_difference_at_vertices),
+    fe (degree),
+    dof_handler (triangulation),
+    degree(degree)
+  {}
+
+
+
+  // @sect4{LaplaceProblem::setup_system}
+
+  // In addition to just distributing the degrees of freedom in
+  // the DoFHandler, we do the same on each level. Then, we follow the
+  // same procedure as before to set up the system on the leaf mesh.
+  template <int dim>
+  void LaplaceProblem<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (fe);
+    dof_handler.distribute_mg_dofs (fe);
+
+    deallog << "   Number of degrees of freedom: "
+            << dof_handler.n_dofs()
+            << " (by level: ";
+    for (unsigned int level=0; level<triangulation.n_levels(); ++level)
+      deallog << dof_handler.n_dofs(level)
+              << (level == triangulation.n_levels()-1
+                  ? ")" : ", ");
+    deallog << std::endl;
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    constraints.clear ();
+    hanging_node_constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler, hanging_node_constraints);
+    DoFTools::make_hanging_node_constraints (dof_handler, constraints);
+
+    typename FunctionMap<dim>::type      dirichlet_boundary_functions;
+    ZeroFunction<dim>                    homogeneous_dirichlet_bc (1);
+    dirichlet_boundary_functions[0] = &homogeneous_dirichlet_bc;
+    VectorTools::interpolate_boundary_values (static_cast<const DoFHandler<dim>&>(dof_handler),
+                                              dirichlet_boundary_functions,
+                                              constraints);
+    constraints.close ();
+    hanging_node_constraints.close ();
+    constraints.condense (dsp);
+    sparsity_pattern.copy_from (dsp);
+    system_matrix.reinit (sparsity_pattern);
+
+    // The multigrid constraints have to be initialized. They need to know
+    // about the boundary values as well, so we pass the
+    // <code>dirichlet_boundary</code> here as well.
+    mg_constrained_dofs.clear();
+    mg_constrained_dofs.initialize(dof_handler, dirichlet_boundary_functions);
+
+
+    // Now for the things that concern the multigrid data structures. First,
+    // we resize the multilevel objects to hold matrices and sparsity
+    // patterns for every level. The coarse level is zero (this is mandatory
+    // right now but may change in a future revision). Note that these
+    // functions take a complete, inclusive range here (not a starting index
+    // and size), so the finest level is <code>n_levels-1</code>.  We first
+    // have to resize the container holding the SparseMatrix classes, since
+    // they have to release their SparsityPattern before the can be destroyed
+    // upon resizing.
+    const unsigned int n_levels = triangulation.n_levels();
+
+    mg_interface_in.resize(0, n_levels-1);
+    mg_interface_in.clear ();
+    mg_interface_out.resize(0, n_levels-1);
+    mg_interface_out.clear ();
+    mg_matrices.resize(0, n_levels-1);
+    mg_matrices.clear ();
+    mg_sparsity_patterns.resize(0, n_levels-1);
+
+    // Now, we have to provide a matrix on each level. To this end, we first
+    // use the MGTools::make_sparsity_pattern function to first generate a
+    // preliminary compressed sparsity pattern on each level (see the @ref
+    // Sparsity module for more information on this topic) and then copy it
+    // over to the one we really want. The next step is to initialize both
+    // kinds of level matrices with these sparsity patterns.
+    //
+    // It may be worth pointing out that the interface matrices only have
+    // entries for degrees of freedom that sit at or next to the interface
+    // between coarser and finer levels of the mesh. They are therefore even
+    // sparser than the matrices on the individual levels of our multigrid
+    // hierarchy. If we were more concerned about memory usage (and possibly
+    // the speed with which we can multiply with these matrices), we should
+    // use separate and different sparsity patterns for these two kinds of
+    // matrices.
+    for (unsigned int level=0; level<n_levels; ++level)
+      {
+        DynamicSparsityPattern dsp (dof_handler.n_dofs(level),
+                                    dof_handler.n_dofs(level));
+        MGTools::make_sparsity_pattern(dof_handler, dsp, level);
+
+        mg_sparsity_patterns[level].copy_from (dsp);
+
+        mg_matrices[level].reinit(mg_sparsity_patterns[level]);
+        mg_interface_in[level].reinit(mg_sparsity_patterns[level]);
+        mg_interface_out[level].reinit(mg_sparsity_patterns[level]);
+      }
+  }
+
+
+  // @sect4{LaplaceProblem::assemble_system}
+
+  // The following function assembles the linear system on the finest level of
+  // the mesh. Since we want to reuse the code here for the level
+  // assembling below, we use the local integrator class
+  // LaplaceIntegrator and leave the loops to the MeshWorker
+  // framework. Thus, this function first sets up the objects
+  // necessary for this framework, namely
+  // <ol>
+  // <li>an MeshWorker::IntegrationInfoBox, which will provide all the required
+  // data in quadrature points on the cell. This object can be seen as
+  // an extension of FEValues, providing a lot more useful
+  // information,</li>
+  // <li>a MeshWorker::DoFInfo object, which on the one hand side extends the
+  // functionality of cell iterators, but also provides space for
+  // return values in its base class LocalResults,</li>
+  // <li>an assembler, in this case for the whole system. The term
+  // 'simple' here refers to the fact that the global system does not
+  // have a block structure,</li>
+  // <li>an the local integrator, which implements the actual forms.
+  // </ol>
+  //
+  // After the loop has combined all of these into a matrix and a
+  // right hand side, there is one thing left to do: the assemblers
+  // leave matrix rows and columns of constrained degrees of freedom
+  // untouched. Therefore, we put a one on the diagonal to make the
+  // whole system well posed. The value one, or any fixed value has
+  // the advantage, that its effect on the spectrum of the matrix is
+  // easily understood. Since the corresponding eigenvectors form an
+  // invariant subspace, the value chosen does not affect the
+  // convergence of Krylov space solvers.
+  template <int dim>
+  void LaplaceProblem<dim>::assemble_system ()
+  {
+    MappingQ1<dim> mapping;
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+    UpdateFlags update_flags = update_values | update_gradients | update_hessians;
+    info_box.add_update_flags_all(update_flags);
+    info_box.initialize(fe, mapping);
+
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    MeshWorker::Assembler::SystemSimple<SparseMatrix<double>, Vector<double> > assembler;
+    assembler.initialize(constraints);
+    assembler.initialize(system_matrix, system_rhs);
+
+    LaplaceIntegrator<dim> matrix_integrator;
+    MeshWorker::integration_loop<dim, dim> (
+      dof_handler.begin_active(), dof_handler.end(),
+      dof_info, info_box, matrix_integrator, assembler);
+
+    for (unsigned int i=0; i<dof_handler.n_dofs(); ++i)
+      if (constraints.is_constrained(i))
+        system_matrix.set(i, i, 1.);
+  }
+
+
+  // @sect4{LaplaceProblem::assemble_multigrid}
+
+  // The next function is the one that builds the linear operators (matrices)
+  // that define the multigrid method on each level of the mesh. The
+  // integration core is the same as above, but the loop below will go over
+  // all existing cells instead of just the active ones, and the results must
+  // be entered into the correct level matrices. Fortunately,
+  // MeshWorker hides most of that from us, and thus the difference
+  // between this function and the previous lies only in the setup of
+  // the assembler and the different iterators in the loop.
+  // Also, fixing up the matrices in the end is a little more complicated.
+  template <int dim>
+  void LaplaceProblem<dim>::assemble_multigrid ()
+  {
+    MappingQ1<dim> mapping;
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+    UpdateFlags update_flags = update_values | update_gradients | update_hessians;
+    info_box.add_update_flags_all(update_flags);
+    info_box.initialize(fe, mapping);
+
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    MeshWorker::Assembler::MGMatrixSimple<SparseMatrix<double> > assembler;
+    assembler.initialize(mg_constrained_dofs);
+    assembler.initialize(mg_matrices);
+    assembler.initialize_interfaces(mg_interface_in, mg_interface_out);
+
+    LaplaceIntegrator<dim> matrix_integrator;
+    MeshWorker::integration_loop<dim, dim> (
+      dof_handler.begin_mg(), dof_handler.end_mg(),
+      dof_info, info_box, matrix_integrator, assembler);
+
+    const unsigned int nlevels = triangulation.n_levels();
+    for (unsigned int level=0; level<nlevels; ++level)
+      {
+        for (unsigned int i=0; i<dof_handler.n_dofs(level); ++i)
+          if (mg_constrained_dofs.is_boundary_index(level,i) ||
+              mg_constrained_dofs.at_refinement_edge(level,i))
+            mg_matrices[level].set(i, i, 1.);
+      }
+  }
+
+
+
+  // @sect4{LaplaceProblem::solve}
+
+  // This is the other function that is significantly different in support of
+  // the multigrid solver (or, in fact, the preconditioner for which we use
+  // the multigrid method).
+  //
+  // Let us start out by setting up two of the components of multilevel
+  // methods: transfer operators between levels, and a solver on the coarsest
+  // level. In finite element methods, the transfer operators are derived from
+  // the finite element function spaces involved and can often be computed in
+  // a generic way independent of the problem under consideration. In that
+  // case, we can use the MGTransferPrebuilt class that, given the constraints
+  // of the final linear system and the MGConstrainedDoFs object that knows
+  // about the boundary conditions on the each level and the degrees of
+  // freedom on interfaces between different refinement level can build the
+  // matrices for those transfer operations from a DoFHandler object with
+  // level degrees of freedom.
+  //
+  // The second part of the following lines deals with the coarse grid
+  // solver. Since our coarse grid is very coarse indeed, we decide for a
+  // direct solver (a Householder decomposition of the coarsest level matrix),
+  // even if its implementation is not particularly sophisticated. If our
+  // coarse mesh had many more cells than the five we have here, something
+  // better suited would obviously be necessary here.
+  template <int dim>
+  void LaplaceProblem<dim>::solve ()
+  {
+    MGTransferPrebuilt<Vector<double> > mg_transfer(hanging_node_constraints, mg_constrained_dofs);
+    mg_transfer.build_matrices(dof_handler);
+
+    FullMatrix<double> coarse_matrix;
+    coarse_matrix.copy_from (mg_matrices[0]);
+    MGCoarseGridHouseholder<> coarse_grid_solver;
+    coarse_grid_solver.initialize (coarse_matrix);
+
+    // The next component of a multilevel solver or preconditioner is that we
+    // need a smoother on each level. A common choice for this is to use the
+    // application of a relaxation method (such as the SOR, Jacobi or
+    // Richardson method) or a small number of iterations of a solver method
+    // (such as CG or GMRES). The mg::SmootherRelaxation and
+    // MGSmootherPrecondition classes provide support for these two kinds of
+    // smoothers. Here, we opt for the application of a single SOR
+    // iteration. To this end, we define an appropriate <code>typedef</code>
+    // and then setup a smoother object.
+    //
+    // The last step is to initialize the smoother object with our level
+    // matrices and to set some smoothing parameters.  The
+    // <code>initialize()</code> function can optionally take additional
+    // arguments that will be passed to the smoother object on each level. In
+    // the current case for the SOR smoother, this could, for example, include
+    // a relaxation parameter. However, we here leave these at their default
+    // values. The call to <code>set_steps()</code> indicates that we will use
+    // two pre- and two post-smoothing steps on each level; to use a variable
+    // number of smoother steps on different levels, more options can be set
+    // in the constructor call to the <code>mg_smoother</code> object.
+    //
+    // The last step results from the fact that we use the SOR method as a
+    // smoother - which is not symmetric - but we use the conjugate gradient
+    // iteration (which requires a symmetric preconditioner) below, we need to
+    // let the multilevel preconditioner make sure that we get a symmetric
+    // operator even for nonsymmetric smoothers:
+    typedef PreconditionSOR<SparseMatrix<double> > Smoother;
+    mg::SmootherRelaxation<Smoother, Vector<double> > mg_smoother;
+    mg_smoother.initialize(mg_matrices);
+    mg_smoother.set_steps(2);
+    mg_smoother.set_symmetric(true);
+
+    // The next preparatory step is that we must wrap our level and interface
+    // matrices in an object having the required multiplication functions. We
+    // will create two objects for the interface objects going from coarse to
+    // fine and the other way around; the multigrid algorithm will later use
+    // the transpose operator for the latter operation, allowing us to
+    // initialize both up and down versions of the operator with the matrices
+    // we already built:
+    mg::Matrix<Vector<double> > mg_matrix(mg_matrices);
+    mg::Matrix<Vector<double> > mg_interface_up(mg_interface_in);
+    mg::Matrix<Vector<double> > mg_interface_down(mg_interface_out);
+
+    // Now, we are ready to set up the V-cycle operator and the multilevel
+    // preconditioner.
+    Multigrid<Vector<double> > mg(dof_handler,
+                                  mg_matrix,
+                                  coarse_grid_solver,
+                                  mg_transfer,
+                                  mg_smoother,
+                                  mg_smoother);
+    mg.set_edge_matrices(mg_interface_down, mg_interface_up);
+
+    PreconditionMG<dim, Vector<double>, MGTransferPrebuilt<Vector<double> > >
+    preconditioner(dof_handler, mg, mg_transfer);
+
+    // With all this together, we can finally get about solving the linear
+    // system in the usual way:
+    SolverControl solver_control (1000, 1e-12);
+    SolverCG<>    solver (solver_control);
+
+    solution = 0;
+
+    solver.solve (system_matrix, solution, system_rhs,
+                  preconditioner);
+    constraints.distribute (solution);
+  }
+
+
+
+  // @sect4{Postprocessing}
+
+  // The following two functions postprocess a solution once it is
+  // computed. In particular, the first one refines the mesh at the beginning
+  // of each cycle while the second one outputs results at the end of each
+  // such cycle. The functions are almost unchanged from those in step-6, with
+  // the exception of one minor difference: we generate output in VTK
+  // format, to use the more modern visualization programs available today
+  // compared to those that were available when step-6 was written.
+  template <int dim>
+  void LaplaceProblem<dim>::refine_grid ()
+  {
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(3),
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        estimated_error_per_cell);
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     estimated_error_per_cell,
+                                                     0.3, 0.03);
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+
+  template <int dim>
+  void LaplaceProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "solution");
+    data_out.build_patches ();
+
+    std::ostringstream filename;
+    filename << "solution-"
+             << cycle
+             << ".vtk";
+
+    std::ofstream output (filename.str().c_str());
+    data_out.write_vtk (output);
+  }
+
+
+  // @sect4{LaplaceProblem::run}
+
+  // Like several of the functions above, this is almost exactly a copy of of
+  // the corresponding function in step-6. The only difference is the call to
+  // <code>assemble_multigrid</code> that takes care of forming the matrices
+  // on every level that we need in the multigrid method.
+  template <int dim>
+  void LaplaceProblem<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<8; ++cycle)
+      {
+        deallog << "Cycle " << cycle << std::endl;
+
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_ball (triangulation);
+
+            static const SphericalManifold<dim> boundary;
+            triangulation.set_all_manifold_ids_on_boundary(0);
+            triangulation.set_manifold (0, boundary);
+
+            triangulation.refine_global (1);
+          }
+        else
+          refine_grid ();
+
+        deallog << "   Number of active cells:       "
+                << triangulation.n_active_cells()
+                << std::endl;
+
+        setup_system ();
+
+        assemble_system ();
+        assemble_multigrid ();
+
+        solve ();
+        output_results (cycle);
+      }
+  }
+}
+
+
+// @sect3{The main() function}
+//
+// This is again the same function as in step-6:
+int main ()
+{
+  try
+    {
+      using namespace Step16;
+
+      deallog.depth_console(2);
+
+      LaplaceProblem<2> laplace_problem(1);
+      laplace_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-17/CMakeLists.txt b/examples/step-17/CMakeLists.txt
new file mode 100644
index 0000000..d130d19
--- /dev/null
+++ b/examples/step-17/CMakeLists.txt
@@ -0,0 +1,51 @@
+##
+#  CMake script for the step-17 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-17")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS
+    ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../  $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF ()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_PETSC)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_PETSC = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-17/doc/builds-on b/examples/step-17/doc/builds-on
new file mode 100644
index 0000000..850b582
--- /dev/null
+++ b/examples/step-17/doc/builds-on
@@ -0,0 +1 @@
+step-8
diff --git a/examples/step-17/doc/intro.dox b/examples/step-17/doc/intro.dox
new file mode 100644
index 0000000..32c0b04
--- /dev/null
+++ b/examples/step-17/doc/intro.dox
@@ -0,0 +1,196 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+<h2>Overview</h2>
+
+This program does not introduce any new mathematical ideas; in fact, all it
+does is to do the exact same computations that step-8
+already does, but it does so in a different manner: instead of using deal.II's
+own linear algebra classes, we build everything on top of classes deal.II
+provides that wrap around the linear algebra implementation of the <a
+href="http://www.mcs.anl.gov/petsc/" target="_top">PETSc</a> library. And
+since PETSc allows to distribute matrices and vectors across several computers
+within an MPI network, the resulting code will even be able to solve the
+problem in %parallel. If you don't know what PETSc is, then this would be a
+good time to take a quick glimpse at their homepage.
+
+As a prerequisite of this program, you need to have PETSc installed, and if
+you want to run in %parallel on a cluster, you also need <a
+href="http://www-users.cs.umn.edu/~karypis/metis/index.html"
+target="_top">METIS</a> to partition meshes. The installation of deal.II
+together with these two additional libraries is described in the <a
+href="https://www.dealii.org/developer/readme.html" target="body">README</a> file.
+
+Now, for the details: as mentioned, the program does not compute anything new,
+so the use of finite element classes, etc., is exactly the same as before. The
+difference to previous programs is that we have replaced almost all uses of
+classes <code>Vector</code> and <code>SparseMatrix</code> by their
+near-equivalents <code>PETScWrappers::MPI::Vector</code> and
+<code>PETScWrappers::MPI::SparseMatrix</code> that store data in a way so that
+every processor in the MPI network only stores
+a part of the matrix or vector. More specifically, each processor will
+only store those rows of the matrix that correspond to a degree of
+freedom it "owns". For vectors, they either store only elements that 
+correspond to degrees of freedom the processor owns (this is what is
+necessary for the right hand side), or also some additional elements
+that make sure that every processor has access the solution components
+that live on the cells the processor owns (so-called 
+ at ref GlossLocallyActiveDof "locally active DoFs") or also on neighboring cells
+(so-called @ref GlossLocallyRelevantDof "locally relevant DoFs").
+
+The interface the classes from the PETScWrapper namespace provide is very similar to that
+of the deal.II linear algebra classes, but instead of implementing this
+functionality themselves, they simply pass on to their corresponding PETSc
+functions. The wrappers are therefore only used to give PETSc a more modern,
+object oriented interface, and to make the use of PETSc and deal.II objects as
+interchangeable as possible. The main point of using PETSc is that it can run
+in %parallel. We will make use of this by partitioning the domain into as many
+blocks ("subdomains") as there are processes in the MPI network. At the same
+time, PETSc also provides dummy MPI stubs, so you can run this program on a
+single machine if PETSc was configured without MPI.
+
+
+<h2>Parallelizing software with MPI</h2>
+
+Developing software to run in %parallel via MPI requires a bit of a change in
+mindset because one typically has to split up all data structures so that
+every processor only stores a piece of the entire problem. As a consequence,
+you can't typically access all components of a solution vector on each 
+processor -- each processor may simply not have enough memory to hold the
+entire solution vector. Because data is split up or "distributed" across
+processors, we call the programming model used by MPI "distributed memory
+computing" (as opposed to "shared memory computing", which would mean
+that multiple processors can all access all data within one memory
+space, for example whenever multiple cores in a single machine work
+on a common task). Some of the fundamentals of distributed memory
+computing are discussed in the 
+ at ref distributed "Parallel computing with multiple processors using distributed memory"
+documentation module, which is itself a sub-module of the
+ at ref Parallel "Parallel computing" module. 
+
+In general, to be truly able to scale to large numbers of processors, one
+needs to split between the available processors <i>every</i> data structure
+whose size scales with the size of the overall problem. (For a definition
+of what it means for a program to "scale", see 
+ at ref GlossParallelScaling "this glossary entry.) This includes, for
+example, the triangulation, the matrix, and all global vectors (solution, right
+hand side). If one doesn't split all of these objects, one of those will be
+replicated on all processors and will eventually simply become too large
+if the problem size (and the number of available processors) becomes large.
+(On the other hand, it is completely fine to keep objects with a size that
+is independent of the overall problem size on every processor. For example,
+each copy of the executable will create its own finite element object, or the
+local matrix we use in the assembly.)
+
+In the current program (as well as in the related step-18), we will not go
+quite this far but present a gentler introduction to using MPI. More
+specifically, the only data structures we will parallelize are matrices and
+vectors. We do, however, not split up the Triangulation and
+DoFHandler classes: each process still has a complete copy of
+these objects, and all processes have exact copies of what the other processes
+have. We will then simply have to mark, in each copy of the triangulation
+on each of the processors, which processor owns which cells. This
+process is called "partitioning" a mesh into @ref GlossSubdomainId "subdomains".
+
+For larger problems, having to store the <i>entire</i> mesh on every processor
+will clearly yield a bottleneck. Splitting up the mesh is slightly, though not
+much more complicated (from a user perspective, though it is <i>much</i> more
+complicated under the hood) to achieve and
+we will show how to do this in step-40 and some other programs. There are
+numerous occasions where, in the course of discussing how a function of this
+program works, we will comment on the fact that it will not scale to large
+problems and why not. All of these issues will be addressed in step-18 and
+in particular step-40, which scales to very large numbers of processes.
+
+Philosophically, the way MPI operates is as follows. You typically run a
+program via
+ at code
+  mpirun -np 32 ./step-17
+ at endcode
+which means to run it on (say) 32 processors. (If you are on a cluster system,
+you typically need to <i>schedule</i> the program to run whenever 32 processors
+become available; this will be described in the documentation of your
+cluster. But under the hood, whenever those processors become available,
+the same call as above will generally be executed.) What this does is that
+the MPI system will start 32 <i>copies</i> of the <code>step-17</code>
+executable. (The MPI term for each of these running executables is that you
+have 32 @ref GlossMPIProcess "MPI processes".)
+This may happen on different machines that can't even read
+from each others' memory spaces, or it may happen on the same machine, but
+the end result is the same: each of these 32 copies will run with some
+memory allocated to it by the operating system, and it will not directly
+be able to read the memory of the other 31 copies. In order to collaborate
+in a common task, these 32 copies then have to <i>communicate</i> with
+each other. MPI, short for <i>Message Passing Interface</i>, makes this
+possible by allowing programs to <i>send messages</i>. You can think
+of this as the mail service: you can put a letter to a specific address
+into the mail and it will be delivered. But that's the extent to which
+you can control things. If you want the receiver to do something
+with the content of the letter, for example return to you data you want
+from over there, then two things need to happen: (i) the receiver needs
+to actually go check whether there is anything in her mailbox, and (ii) if
+there is, react appropriately, for example by sending data back. If you
+wait for this return message but the original receiver was distracted
+and not paying attention, then you're out of luck: you'll simply have to
+wait until your requested over there will be worked on. In some cases,
+bugs will lead the original receiver to never check your mail, and in that
+case you will wait forever -- this is called a <i>deadlock</i>. 
+(@dealiiVideoLectureSeeAlso{39,41,41.25,41.5})
+
+In practice, one does not usually program at the level of sending and
+receiving individual messages, but uses higher level operations. For
+example, in the program we will use function calls that take a number
+from each processor, add them all up, and return the sum to all
+processors. Internally, this is implemented using individual messages,
+but to the user this is transparent. We call such operations <i>collectives</i>
+because <i>all</i> processors participate in them. Collectives allow us
+to write programs where not every copy of the executable is doing something
+completely different (this would be incredibly difficult to program) but
+where in essence all copies are doing the same thing (though on different
+data) for themselves, running through the same blocks of code; then they
+communicate data through collectives; and then go back to doing something
+for themselves again running through the same blocks of data. This is the
+key piece to being able to write programs, and it is the key component
+to making sure that programs can run on any number of processors,
+since we do not have to write different code for each of the participating
+processors.
+
+(This is not to say that programs are never written in ways where
+different processors run through different blocks of code in their
+copy of the executable. Programs internally also often communicate
+in other ways than through collectives. But in practice, %parallel finite
+finite element codes almost always follow the scheme where every copy
+of the program runs through the same blocks of code at the same time,
+interspersed by phases where all processors communicate with each other.)
+
+In reality, even the level of calling MPI collective functions is too
+low. Rather, the program below will not contain any direct
+calls to MPI at all, but only deal.II functions that hide this
+communication from users of the deal.II. This has the advantage that
+you don't have to learn the details of MPI and its rather intricate
+function calls. That said, you do have to understand the general
+philosophy behind MPI as outlined above.
+
+
+<h2>What this program does</h2>
+
+The techniques this program then demonstrates are:
+- How to use the PETSc wrapper classes; this will already be visible in the
+  declaration of the principal class of this program, <code>ElasticProblem</code>.
+- How to partition the mesh into subdomains; this happens in the 
+  <code>ElasticProblem::setup_system()</code> function.
+- How to parallelize operations for jobs running on an MPI network; here, this
+  is something one has to pay attention to in a number of places, most
+  notably in the  <code>ElasticProblem::assemble_system()</code> function.
+- How to deal with vectors that store only a subset of vector entries
+  and for which we have to ensure that they store what we need on the
+  current processors. See for example the
+  <code>ElasticProblem::solve()</code> and <code>ElasticProblem::refine_grid()</code>
+  functions.
+- How to deal with status output from programs that run on multiple
+  processors at the same time. This is done via the <code>pcout</code>
+  variable in the program, initialized in the constructor.
+
+Since all this can only be demonstrated using actual code, let us go straight to the
+code without much further ado.
+
diff --git a/examples/step-17/doc/kind b/examples/step-17/doc/kind
new file mode 100644
index 0000000..56e049c
--- /dev/null
+++ b/examples/step-17/doc/kind
@@ -0,0 +1 @@
+solids
diff --git a/examples/step-17/doc/results.dox b/examples/step-17/doc/results.dox
new file mode 100644
index 0000000..19c56e1
--- /dev/null
+++ b/examples/step-17/doc/results.dox
@@ -0,0 +1,248 @@
+<h1>Results</h1>
+
+
+If the program above is compiled and run on a single processor
+machine, it should generate results that are very similar to those
+that we already got with step-8. However, it becomes more interesting
+if we run it on a multicore machine or a cluster of computers. The
+most basic way to run MPI programs is using a command line like
+ at code
+  mpirun -np 32 ./step-17
+ at endcode
+to run the step-17 executable with 32 processors.
+
+(If you work on a cluster, then there is typically a step in between where you
+need to set up a job script and submit the script to a scheduler. The scheduler
+will execute the script whenever it can allocate 32 unused processors for your
+job. How to write such job
+scripts differs from cluster to cluster, and you should find the documentation
+of your cluster to see how to do this. On my system, I have to use the command
+<code>qsub</code> with a whole host of options to run a job in parallel.)
+
+Whether directly or through a scheduler, if you run this program on 8
+processors, you should get output like the following:
+ at code
+Cycle 0:
+   Number of active cells:       64
+   Number of degrees of freedom: 162 (by partition: 22+22+20+20+18+16+20+24)
+   Solver converged in 23 iterations.
+Cycle 1:
+   Number of active cells:       124
+   Number of degrees of freedom: 302 (by partition: 38+42+36+34+44+44+36+28)
+   Solver converged in 35 iterations.
+Cycle 2:
+   Number of active cells:       238
+   Number of degrees of freedom: 570 (by partition: 68+80+66+74+58+68+78+78)
+   Solver converged in 46 iterations.
+Cycle 3:
+   Number of active cells:       454
+   Number of degrees of freedom: 1046 (by partition: 120+134+124+130+154+138+122+124)
+   Solver converged in 55 iterations.
+Cycle 4:
+   Number of active cells:       868
+   Number of degrees of freedom: 1926 (by partition: 232+276+214+248+230+224+234+268)
+   Solver converged in 77 iterations.
+Cycle 5:
+   Number of active cells:       1654
+   Number of degrees of freedom: 3550 (by partition: 418+466+432+470+442+474+424+424)
+   Solver converged in 93 iterations.
+Cycle 6:
+   Number of active cells:       3136
+   Number of degrees of freedom: 6702 (by partition: 838+796+828+892+866+798+878+806)
+   Solver converged in 127 iterations.
+Cycle 7:
+   Number of active cells:       5962
+   Number of degrees of freedom: 12446 (by partition: 1586+1484+1652+1552+1556+1576+1560+1480)
+   Solver converged in 158 iterations.
+Cycle 8:
+   Number of active cells:       11320
+   Number of degrees of freedom: 23586 (by partition: 2988+2924+2890+2868+2864+3042+2932+3078)
+   Solver converged in 225 iterations.
+Cycle 9:
+   Number of active cells:       21424
+   Number of degrees of freedom: 43986 (by partition: 5470+5376+5642+5450+5630+5470+5416+5532)
+   Solver converged in 282 iterations.
+Cycle 10:
+   Number of active cells:       40696
+   Number of degrees of freedom: 83754 (by partition: 10660+10606+10364+10258+10354+10322+10586+10604)
+   Solver converged in 392 iterations.
+Cycle 11:
+   Number of active cells:       76978
+   Number of degrees of freedom: 156490 (by partition: 19516+20148+19390+19390+19336+19450+19730+19530)
+   Solver converged in 509 iterations.
+Cycle 12:
+   Number of active cells:       146206
+   Number of degrees of freedom: 297994 (by partition: 37462+37780+37000+37060+37232+37328+36860+37272)
+   Solver converged in 705 iterations.
+Cycle 13:
+   Number of active cells:       276184
+   Number of degrees of freedom: 558766 (by partition: 69206+69404+69882+71266+70348+69616+69796+69248)
+   Solver converged in 945 iterations.
+Cycle 14:
+   Number of active cells:       523000
+   Number of degrees of freedom: 1060258 (by partition: 132928+132296+131626+132172+132170+133588+132252+133226)
+   Solver converged in 1282 iterations.
+Cycle 15:
+   Number of active cells:       987394
+   Number of degrees of freedom: 1994226 (by partition: 253276+249068+247430+248402+248496+251380+248272+247902)
+   Solver converged in 1760 iterations.
+Cycle 16:
+   Number of active cells:       1867477
+   Number of degrees of freedom: 3771884 (by partition: 468452+474204+470818+470884+469960+
+471186+470686+475694)
+   Solver converged in 2251 iterations.
+ at endcode
+(This run uses a few more refinement cycles than the code available in
+the examples/ directory. The run also used a version of METIS from
+2004 that generated different partitionings; consequently,
+the numbers you get today are slightly different.)
+
+As can be seen, we can easily get to almost four million unknowns. In fact, the
+code's runtime with 8 processes was less than 7 minutes up to (and including)
+cycle 14, and 14 minutes including the second to last step. (These are numbers
+relevant to when the code was initially written, in 2004.) I lost the timing
+information for the last step, though, but you get the idea. All this is after
+release mode has been enabled by running <code>make release</code>, and
+with the generation of graphical output switched off for the reasons stated in
+the program comments above. 
+(@dealiiVideoLectureSeeAlso{18})
+The biggest 2d computations I did had roughly 7.1
+million unknowns, and were done on 32 processes. It took about 40 minutes.
+Not surprisingly, the limiting factor for how far one can go is how much memory
+one has, since every process has to hold the entire mesh and DoFHandler objects,
+although matrices and vectors are split up. For the 7.1M computation, the memory
+consumption was about 600 bytes per unknown, which is not bad, but one has to
+consider that this is for every unknown, whether we store the matrix and vector
+entries locally or not.
+
+
+
+Here is some output generated in the 12th cycle of the program, i.e. with roughly
+300,000 unknowns:
+
+<table align="center" style="width:80%">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-17.12-ux.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-17.12-uy.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+As one would hope for, the x- (left) and y-displacements (right) shown here
+closely match what we already saw in step-8. As shown
+there and in step-22, we could as well have produced a
+vector plot of the displacement field, rather than plotting it as two
+separate scalar fields. What may be more interesting,
+though, is to look at the mesh and partition at this step:
+
+<table align="center" width="80%">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-17.12-grid.png" alt="" width="100%"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-17.12-partition.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+Again, the mesh (left) shows the same refinement pattern as seen
+previously. The right panel shows the partitioning of the domain across the 8
+processes, each indicated by a different color. The picture shows that the
+subdomains are smaller where mesh cells are small, a fact that needs to be
+expected given that the partitioning algorithm tries to equilibrate the number
+of cells in each subdomain; this equilibration is also easily identified in
+the output shown above, where the number of degrees per subdomain is roughly
+the same.
+
+
+
+It is worth noting that if we ran the same program with a different number of
+processes, that we would likely get slightly different output: a different
+mesh, different number of unknowns and iterations to convergence. The reason
+for this is that while the matrix and right hand side are the same independent
+of the number of processes used, the preconditioner is not: it performs an
+ILU(0) on the chunk of the matrix of <em>each processor separately</em>. Thus,
+it's effectiveness as a preconditioner diminishes as the number of processes
+increases, which makes the number of iterations increase. Since a different
+preconditioner leads to slight changes in the computed solution, this will
+then lead to slightly different mesh cells tagged for refinement, and larger
+differences in subsequent steps. The solution will always look very similar,
+though.
+
+
+
+Finally, here are some results for a 3d simulation. You can repeat these by
+changing
+ at code
+        ElasticProblem<2> elastic_problem;
+ at endcode
+to
+ at code
+        ElasticProblem<3> elastic_problem;
+ at endcode
+in the main function. If you then run the program in parallel,
+you get something similar to this (this is for a job with 16 processes):
+ at code
+Cycle 0:
+   Number of active cells:       512
+   Number of degrees of freedom: 2187 (by partition: 114+156+150+114+114+210+105+102+120+120+96+123+141+183+156+183)
+   Solver converged in 27 iterations.
+Cycle 1:
+   Number of active cells:       1604
+   Number of degrees of freedom: 6549 (by partition: 393+291+342+354+414+417+570+366+444+288+543+525+345+387+489+381)
+   Solver converged in 42 iterations.
+Cycle 2:
+   Number of active cells:       4992
+   Number of degrees of freedom: 19167 (by partition: 1428+1266+1095+1005+1455+1257+1410+1041+1320+1380+1080+1050+963+1005+1188+1224)
+   Solver converged in 65 iterations.
+Cycle 3:
+   Number of active cells:       15485
+   Number of degrees of freedom: 56760 (by partition: 3099+3714+3384+3147+4332+3858+3615+3117+3027+3888+3942+3276+4149+3519+3030+3663)
+   Solver converged in 96 iterations.
+Cycle 4:
+   Number of active cells:       48014
+   Number of degrees of freedom: 168762 (by partition: 11043+10752+9846+10752+9918+10584+10545+11433+12393+11289+10488+9885+10056+9771+11031+8976)
+   Solver converged in 132 iterations.
+Cycle 5:
+   Number of active cells:       148828
+   Number of degrees of freedom: 492303 (by partition: 31359+30588+34638+32244+30984+28902+33297+31569+29778+29694+28482+28032+32283+30702+31491+28260)
+   Solver converged in 179 iterations.
+Cycle 6:
+   Number of active cells:       461392
+   Number of degrees of freedom: 1497951 (by partition: 103587+100827+97611+93726+93429+88074+95892+88296+96882+93000+87864+90915+92232+86931+98091+90594)
+   Solver converged in 261 iterations.
+ at endcode
+
+
+
+The last step, going up to 1.5 million unknowns, takes about 55 minutes with
+16 processes on 8 dual-processor machines (of the kind available in 2003). The
+graphical output generated by
+this job is rather large (cycle 5 already prints around 82 MB of data), so
+we contend ourselves with showing output from cycle 4:
+
+<table width="80%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-17.4-3d-partition.png" width="100%" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-17.4-3d-ux.png" alt="" width="100%"></td>
+  </tr>
+</table>
+
+
+
+The left picture shows the partitioning of the cube into 16 processes, whereas
+the right one shows the x-displacement along two cutplanes through the cube.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+The program keeps a complete copy of the Triangulation and DoFHandler objects
+on every processor. It also creates complete copies of the solution vector,
+and it creates output on only one processor. All of this is obviously
+the bottleneck as far as parallelization is concerned. 
+
+Internally, within deal.II, parallelizing the data
+structures used in hierarchic and unstructured triangulations is a hard
+problem, and it took us a few more years to make this happen. The step-40
+tutorial program and the @ref distributed documentation module talk about how
+to do these steps and what it takes from an application perspective. An
+obvious extension of the current program would be to use this functionality to
+completely distribute computations to many more processors than used here.
diff --git a/examples/step-17/doc/tooltip b/examples/step-17/doc/tooltip
new file mode 100644
index 0000000..4f421f3
--- /dev/null
+++ b/examples/step-17/doc/tooltip
@@ -0,0 +1 @@
+Parallel computing using MPI. Using PETSc.
diff --git a/examples/step-17/step-17.cc b/examples/step-17/step-17.cc
new file mode 100644
index 0000000..4bc646b
--- /dev/null
+++ b/examples/step-17/step-17.cc
@@ -0,0 +1,1103 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2000 - 2016 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Texas at Austin, 2000, 2004
+ *         Wolfgang Bangerth, Texas A&M University, 2016
+ */
+
+// @sect3{Include files}
+
+// First the usual assortment of header files we have already used in previous
+// example programs:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// And here come the things that we need particularly for this example program
+// and that weren't in step-8. First, we replace the standard output
+// <code>std::cout</code> by a new stream <code>pcout</code> which is used in
+// parallel computations for generating output only on one of the MPI
+// processes.
+#include <deal.II/base/conditional_ostream.h>
+// We are going to query the number of processes and the number of the present
+// process by calling the respective functions in the Utilities::MPI
+// namespace.
+#include <deal.II/base/mpi.h>
+// Then, we are going to replace all linear algebra components that involve
+// the (global) linear system by classes that wrap interfaces similar to our
+// own linear algebra classes around what PETSc offers (PETSc is a library
+// written in C, and deal.II comes with wrapper classes that provide the PETSc
+// functionality with an interface that is similar to the interface we already
+// had for our own linear algebra classes). In particular, we need vectors and
+// matrices that are distributed across several
+// @ref GlossMPIProcess "processes" in MPI programs (and
+// simply map to sequential, local vectors and matrices if there is only a
+// single process, i.e. if you are running on only one machine, and without
+// MPI support):
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+// Then we also need interfaces for solvers and preconditioners that PETSc
+// provides:
+#include <deal.II/lac/petsc_solver.h>
+#include <deal.II/lac/petsc_precondition.h>
+// And in addition, we need some algorithms for partitioning our meshes so
+// that they can be efficiently distributed across an MPI network. The
+// partitioning algorithm is implemented in the <code>GridTools</code>
+// namespace, and we need an additional include file for a function in
+// <code>DoFRenumbering</code> that allows to sort the indices associated with
+// degrees of freedom so that they are numbered according to the subdomain
+// they are associated with:
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/dofs/dof_renumbering.h>
+
+// And this is simply C++ again:
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+// The last step is as in all previous programs:
+namespace Step17
+{
+  using namespace dealii;
+
+  // @sect3{The <code>ElasticProblem</code> class template}
+
+  // The first real part of the program is the declaration of the main
+  // class.  As mentioned in the introduction, almost all of this has
+  // been copied verbatim from step-8, so we only comment on the few
+  // differences between the two tutorials.  There is one (cosmetic)
+  // change in that we let <code>solve</code> return a value, namely
+  // the number of iterations it took to converge, so that we can
+  // output this to the screen at the appropriate place.
+  template <int dim>
+  class ElasticProblem
+  {
+  public:
+    ElasticProblem ();
+    ~ElasticProblem ();
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    unsigned int solve ();
+    void refine_grid ();
+    void output_results (const unsigned int cycle) const;
+
+    // The first change is that we have to declare a variable that
+    // indicates the @ref GlossMPICommunicator "MPI communicator" over
+    // which we are supposed to distribute our computations.
+    MPI_Comm mpi_communicator;
+
+    // Then we have two variables that tell us where in the parallel
+    // world we are. The first of the following variables,
+    // <code>n_mpi_processes</code>, tells us how many MPI processes
+    // there exist in total, while the second one,
+    // <code>this_mpi_process</code>, indicates which is the number of
+    // the present process within this space of processes (in MPI
+    // language, this corresponds to the @ref GlossMPIRank "rank" of
+    // the process). The latter will have a unique value for each
+    // process between zero and (less than)
+    // <code>n_mpi_processes</code>. If this program is run on a
+    // single machine without MPI support, then their values are
+    // <code>1</code> and <code>0</code>, respectively.
+    const unsigned int n_mpi_processes;
+    const unsigned int this_mpi_process;
+
+    // Next up is a stream-like variable <code>pcout</code>. It is, in essence,
+    // just something we use for convenience: in a parallel program,
+    // if each process outputs status information, then there quickly
+    // is a lot of clutter. Rather, we would want to only have one
+    // @ref GlossMPIProcess "process" output everything once, for
+    // example the one with @ref GlossMPIRank "rank" zero. At the same
+    // time, it seems silly to prefix <i>every</i> places where we
+    // create output with an <code>if (my_rank==0)</code> condition.
+    //
+    // To make this simpler, the ConditionalOStream class does exactly
+    // this under the hood: it acts as if it were a stream, but only
+    // forwards to a real, underlying stream if a flag is set. By
+    // setting this condition to <code>this_mpi_process==0</code>
+    // (where <code>this_mpi_process</code> corresponds to the rank of
+    // an MPI process), we make sure that output is only generated
+    // from the first process and that we don't get the same lines of
+    // output over and over again, once per process. Thus, we can use
+    // <code>pcout</code> everywhere and in every process, but on all
+    // but one process nothing will ever happen to the information
+    // that is piped into the object via
+    // <code>operator<<</code>.
+    ConditionalOStream pcout;
+
+    // The remainder of the list of member variables is fundamentally the
+    // same as in step-8. However, we change the declarations of matrix
+    // and vector types to use parallel PETSc objects instead. Note that
+    // we do not use a separate sparsity pattern, since PETSc manages this
+    // internally as part of its matrix data structures.
+    Triangulation<dim>   triangulation;
+    DoFHandler<dim>      dof_handler;
+
+    FESystem<dim>        fe;
+
+    ConstraintMatrix     hanging_node_constraints;
+
+    PETScWrappers::MPI::SparseMatrix system_matrix;
+
+    PETScWrappers::MPI::Vector       solution;
+    PETScWrappers::MPI::Vector       system_rhs;
+  };
+
+
+  // @sect3{Right hand side values}
+
+  // The following is taken from step-8 without change:
+  template <int dim>
+  class RightHandSide :  public Function<dim>
+  {
+  public:
+    RightHandSide ();
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &values) const;
+
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> >   &value_list) const;
+  };
+
+
+  template <int dim>
+  RightHandSide<dim>::RightHandSide () :
+    Function<dim> (dim)
+  {}
+
+
+  template <int dim>
+  inline
+  void RightHandSide<dim>::vector_value (const Point<dim> &p,
+                                         Vector<double>   &values) const
+  {
+    Assert (values.size() == dim,
+            ExcDimensionMismatch (values.size(), dim));
+    Assert (dim >= 2, ExcInternalError());
+
+    Point<dim> point_1, point_2;
+    point_1(0) = 0.5;
+    point_2(0) = -0.5;
+
+    if (((p-point_1).norm_square() < 0.2*0.2) ||
+        ((p-point_2).norm_square() < 0.2*0.2))
+      values(0) = 1;
+    else
+      values(0) = 0;
+
+    if (p.square() < 0.2*0.2)
+      values(1) = 1;
+    else
+      values(1) = 0;
+  }
+
+
+
+  template <int dim>
+  void RightHandSide<dim>::vector_value_list (const std::vector<Point<dim> > &points,
+                                              std::vector<Vector<double> >   &value_list) const
+  {
+    const unsigned int n_points = points.size();
+
+    Assert (value_list.size() == n_points,
+            ExcDimensionMismatch (value_list.size(), n_points));
+
+    for (unsigned int p=0; p<n_points; ++p)
+      RightHandSide<dim>::vector_value (points[p],
+                                        value_list[p]);
+  }
+
+
+
+  // @sect3{The <code>ElasticProblem</code> class implementation}
+
+  // @sect4{ElasticProblem::ElasticProblem}
+
+  // The first step in the actual implementation is the constructor of
+  // the main class. Apart from initializing the same member variables
+  // that we already had in step-8, we here initialize the MPI
+  // communicator variable we shall use with the global MPI
+  // communicator linking all processes together (in more complex
+  // applications, one could here use a communicator object that only
+  // links a subset of all processes), and call the Utilities::MPI
+  // helper functions to determine the number of processes and where
+  // the present one fits into this picture. In addition, we make sure
+  // that output is only generated by the (globally) first process.
+  // We do so by passing the stream we want to output to
+  // (<code>std::cout</code>) and a true/false flag as arguments where
+  // the latter is determined by testing whether the process currently
+  // executing the constructor call is the first in the MPI universe.
+  template <int dim>
+  ElasticProblem<dim>::ElasticProblem ()
+    :
+    mpi_communicator (MPI_COMM_WORLD),
+    n_mpi_processes (Utilities::MPI::n_mpi_processes(mpi_communicator)),
+    this_mpi_process (Utilities::MPI::this_mpi_process(mpi_communicator)),
+    pcout (std::cout,
+           (this_mpi_process == 0)),
+    dof_handler (triangulation),
+    fe (FE_Q<dim>(1), dim)
+  {}
+
+
+
+  // @sect4{ElasticProblem::~ElasticProblem}
+
+  // The destructor is exactly as in step-8.
+  template <int dim>
+  ElasticProblem<dim>::~ElasticProblem ()
+  {
+    dof_handler.clear ();
+  }
+
+
+  // @sect4{ElasticProblem::setup_system}
+
+  // Next, the function in which we set up the various variables
+  // for the global linear system to be solved needs to be implemented.
+  //
+  // However, before we with this, there is one thing to do for a
+  // parallel program: we need to determine which MPI process is
+  // responsible for each of the cells. Splitting cells among
+  // processes, commonly called "partitioning the mesh", is done by
+  // assigning a @ref GlossSubdomainId "subdomain id" to each cell. We
+  // do so by calling into the METIS library that does this in a very
+  // efficient way, trying to minimize the number of nodes on the
+  // interfaces between subdomains. Rather than trying to call METIS
+  // directly, we do this by calling the
+  // GridTools::partition_triangulation() function that does this at a
+  // much higher level of programming.
+  //
+  // @note As mentioned in the introduction, we could avoid this manual
+  //   partitioning step if we used the parallel::shared::Triangulation
+  //   class for the triangulation object instead (as we do in step-18).
+  //   That class does, in essence, everything a regular triangulation
+  //   does, but it then also automatically partitions the mesh after
+  //   every mesh creation or refinement operation.
+  //
+  // Following partitioning, we need to enumerate all degrees of
+  // freedom as usual.  However, we would like to enumerate the
+  // degrees of freedom in a way so that all degrees of freedom
+  // associated with cells in subdomain zero (which resides on process
+  // zero) come before all DoFs associated with cells on subdomain
+  // one, before those on cells on process two, and so on. We need
+  // this since we have to split the global vectors for right hand
+  // side and solution, as well as the matrix into contiguous chunks
+  // of rows that live on each of the processors, and we will want to
+  // do this in a way that requires minimal communication. This
+  // particular enumeration can be obtained by re-ordering degrees of
+  // freedom indices using DoFRenumbering::subdomain_wise().
+  //
+  // The final step of this initial setup is that we get ourselves a
+  // variable that indicates how many degrees of freedom the current
+  // process is responsible for. (Note that a degree of freedom is not
+  // necessarily owned by the process that owns a cell just because
+  // the degree of freedom lives on this cell: some degrees of freedom
+  // live on interfaces between subdomains, and are consequently only owned by
+  // one of the processes adjacent to this interface.)
+  //
+  // Before we move on, let us recall a fact already discussed in the
+  // introduction: The triangulation we use here is replicated across
+  // all processes, and each process has a complete copy of the entire
+  // triangulation, with all cells. Partitioning only provides a way
+  // to identify which cells out of all each process "owns", but it
+  // knows everything about all of them. Likewise, the DoFHandler
+  // object knows everything about every cell, in particular the
+  // degrees of freedom that live on each cell, whether it is one that
+  // the current process owns or not. This can not scale to large
+  // problems because eventually just storing on every process the
+  // entire mesh, and everything that is associated with it, will
+  // become infeasible if the problem is large enough. On the other
+  // hand, if we split the triangulation into parts so that every
+  // process stores only those cells it "owns" but nothing else (or,
+  // at least a sufficiently small fraction of everything else), then
+  // we can solve large problems if only we throw a large enough
+  // number of MPI processes at them. This is what we are going to in
+  // step-40, for example, using the
+  // parallel::distributed::Triangulation class.  On the other hand,
+  // most of the rest of what we demonstrate in the current program
+  // will actually continue to work whether we have the entire
+  // triangulation available, or only a piece of it.
+  template <int dim>
+  void ElasticProblem<dim>::setup_system ()
+  {
+    GridTools::partition_triangulation (n_mpi_processes, triangulation);
+
+    dof_handler.distribute_dofs (fe);
+    DoFRenumbering::subdomain_wise (dof_handler);
+
+    const types::global_dof_index n_local_dofs
+      = DoFTools::count_dofs_with_subdomain_association (dof_handler,
+                                                         this_mpi_process);
+
+    // Then we initialize the system matrix, solution, and right hand
+    // side vectors. Since they all need to work in parallel, we have
+    // to pass them an MPI communication object, as well as their
+    // global sizes (both dimensions are equal to the <i>total</i>
+    // number of degrees of freedom), and also how many rows out of
+    // this global size are to be stored locally
+    // (<code>n_local_dofs</code>). In addition, PETSc needs to know
+    // how to partition the columns in the chunk of the matrix that is
+    // stored locally; for square matrices, the columns should be
+    // partitioned in the same way as the rows (indicated by the
+    // second <code>n_local_dofs</code> in the call) but in the case
+    // of rectangular matrices one has to partition the columns in the
+    // same way as vectors are partitioned with which the matrix is
+    // multiplied, while rows have to partitioned in the same way as
+    // destination vectors of matrix-vector multiplications:
+    system_matrix.reinit (mpi_communicator,
+                          dof_handler.n_dofs(),
+                          dof_handler.n_dofs(),
+                          n_local_dofs,
+                          n_local_dofs,
+                          dof_handler.max_couplings_between_dofs());
+
+    solution.reinit (mpi_communicator, dof_handler.n_dofs(), n_local_dofs);
+    system_rhs.reinit (mpi_communicator, dof_handler.n_dofs(), n_local_dofs);
+
+    // Finally, we need to initialize the objects denoting hanging
+    // node constraints for the present grid. As with the
+    // triangulation and DoFHandler objects, we will simply store
+    // <i>all</i> constraints on each process; again, this will not
+    // scale, but we show in step-40 how one can work around this by
+    // only storing on each MPI process the constraints for degrees of
+    // freedom that actually matter on this particular process.
+    //
+    // Since PETSc handles the
+    // sparsity pattern internally to the matrix, there is no need to set up
+    // an independent sparsity pattern here, and to condense it for
+    // constraints, as we have done in all other example programs.
+    hanging_node_constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             hanging_node_constraints);
+    hanging_node_constraints.close ();
+  }
+
+
+
+  // @sect4{ElasticProblem::assemble_system}
+
+  // We now assemble the matrix and right hand side of the
+  // problem. There are some things worth mentioning before we go into
+  // detail. First, we will be assembling the system in parallel,
+  // i.e., each process will be responsible for assembling on cells
+  // that belong to this particular process. Note that the degrees of
+  // freedom are split in a way such that all DoFs in the interior of
+  // cells and between cells belonging to the same subdomain belong to
+  // the process that <code>owns</code> the cell. However, even then
+  // we sometimes need to assemble on a cell with a neighbor that
+  // belongs to a different process, and in these cases when we add up
+  // the local contributions into the global matrix or right hand side
+  // vector, we have to transfer these entries to the process that
+  // owns these elements. Fortunately, we don't have to do this by
+  // hand: PETSc does all this for us by caching these elements
+  // locally, and sending them to the other processes as necessary
+  // when we call the <code>compress()</code> functions on the matrix
+  // and vector at the end of this function.
+  //
+  // The second point is that once we have handed over matrix and
+  // vector contributions to PETSc, it is a) hard, and b) very
+  // inefficient to get them back for modifications. This is not only
+  // the fault of PETSc, it is also a consequence of the distributed
+  // nature of this program: if an entry resides on another processor,
+  // then it is necessarily expensive to get it. The consequence of
+  // this is that we should not try to first assemble the matrix and
+  // right hand side as if there were no hanging node constraints and
+  // boundary values, and then eliminate these in a second step
+  // (using, for example, ConstraintMatrix::condense()). Rather, we
+  // should try to eliminate hanging node constraints before handing
+  // these entries over to PETSc. This is easy: instead of copying
+  // elements by hand into the global matrix (as we do in step-4), we
+  // use the ConstraintMatrix::distribute_local_to_global() functions
+  // to take care of hanging nodes at the same time. We also already
+  // did this in step-6. The second step, elimination of boundary
+  // nodes, could also be done this way by putting the boundary values
+  // into the same ConstraintMatrix object as hanging nodes (see the
+  // way it is done in step-6, for example); however, it is not
+  // strictly necessary to do this here because eliminating boundary
+  // values can be done with only the data stored on each process
+  // itself, and consequently we use the approach used before in
+  // step-4, i.e., via MatrixTools::apply_boundary_values().
+  //
+  // All of this said, here is the actual implementation starting with
+  // the general setup of helper variables.  (Note that we still use
+  // the deal.II full matrix and vector types for the local systems as
+  // these are small and need not be shared across processes.)
+  template <int dim>
+  void ElasticProblem<dim>::assemble_system ()
+  {
+    QGauss<dim>  quadrature_formula(2);
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values   | update_gradients |
+                             update_quadrature_points | update_JxW_values);
+
+    const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int   n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       cell_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    std::vector<double>     lambda_values (n_q_points);
+    std::vector<double>     mu_values (n_q_points);
+
+    ConstantFunction<dim> lambda(1.), mu(1.);
+
+    RightHandSide<dim>      right_hand_side;
+    std::vector<Vector<double> > rhs_values (n_q_points,
+                                             Vector<double>(dim));
+
+
+    // The next thing is the loop over all elements. Note that we do
+    // not have to do <i>all</i> the work on every process: our job
+    // here is only to assemble the system on cells that actually
+    // belong to this MPI process, all other cells will be taken care
+    // of by other processes. This is what the if-clause immediately
+    // after the for-loop takes care of: it queries the subdomain
+    // identifier of each cell, which is a number associated with each
+    // cell that tells us which process owns it. In more generality,
+    // the subdomain id is used to split a domain into several parts
+    // (we do this above, at the beginning of
+    // <code>setup_system()</code>), and which allows to identify
+    // which subdomain a cell is living on. In this application, we
+    // have each process handle exactly one subdomain, so we identify
+    // the terms <code>subdomain</code> and <code>MPI process</code>.
+    //
+    // Apart from this, assembling the local system is relatively uneventful
+    // if you have understood how this is done in step-8. As mentioned above,
+    // distributing local contributions into the global matrix
+    // and right hand sides also takes care of hanging node constraints in the
+    // same way as is done in step-6.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->subdomain_id() == this_mpi_process)
+        {
+          cell_matrix = 0;
+          cell_rhs = 0;
+
+          fe_values.reinit (cell);
+
+          lambda.value_list (fe_values.get_quadrature_points(), lambda_values);
+          mu.value_list     (fe_values.get_quadrature_points(), mu_values);
+
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const unsigned int
+              component_i = fe.system_to_component_index(i).first;
+
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                {
+                  const unsigned int
+                  component_j = fe.system_to_component_index(j).first;
+
+                  for (unsigned int q_point=0; q_point<n_q_points;
+                       ++q_point)
+                    {
+                      cell_matrix(i,j)
+                      +=
+                        (
+                          (fe_values.shape_grad(i,q_point)[component_i] *
+                           fe_values.shape_grad(j,q_point)[component_j] *
+                           lambda_values[q_point])
+                          +
+                          (fe_values.shape_grad(i,q_point)[component_j] *
+                           fe_values.shape_grad(j,q_point)[component_i] *
+                           mu_values[q_point])
+                          +
+                          ((component_i == component_j) ?
+                           (fe_values.shape_grad(i,q_point) *
+                            fe_values.shape_grad(j,q_point) *
+                            mu_values[q_point])  :
+                           0)
+                        )
+                        *
+                        fe_values.JxW(q_point);
+                    }
+                }
+            }
+
+          right_hand_side.vector_value_list (fe_values.get_quadrature_points(),
+                                             rhs_values);
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const unsigned int
+              component_i = fe.system_to_component_index(i).first;
+
+              for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+                cell_rhs(i) += fe_values.shape_value(i,q_point) *
+                               rhs_values[q_point](component_i) *
+                               fe_values.JxW(q_point);
+            }
+
+          cell->get_dof_indices (local_dof_indices);
+          hanging_node_constraints
+          .distribute_local_to_global(cell_matrix, cell_rhs,
+                                      local_dof_indices,
+                                      system_matrix, system_rhs);
+        }
+
+    // The next step is to "compress" the vector and the system
+    // matrix. This means that each process sends the additions that
+    // were made above to those entries of the matrix and vector that
+    // the process did not own itself, to the process that owns
+    // them. After receiving these additions from other processes,
+    // each process then adds them to the values it already has.
+    system_matrix.compress(VectorOperation::add);
+    system_rhs.compress(VectorOperation::add);
+
+    // The global matrix and right hand side vectors have now been
+    // formed. We still have to apply boundary values, in the same way as we
+    // did, for example, in step-3, step-4, and a number of other programs.
+    //
+    // The last argument to the call to
+    // MatrixTools::apply_boundary_values() below allows for some
+    // optimizations. It controls whether we should also delete
+    // entries (i.e., set them to zero) in the matrix columns
+    // corresponding to boundary nodes, or to keep them (and passing
+    // <code>true</code> means: yes, do eliminate the columns). If we
+    // do eliminate columns, then the resulting matrix will be
+    // symmetric again if it was before; if we don't, then it
+    // won't. The solution of the resulting system should be the same,
+    // though. The only reason why we may want to make the system
+    // symmetric again is that we would like to use the CG method,
+    // which only works with symmetric matrices. The reason why we may
+    // <i>not</i> want to make the matrix symmetric is because this
+    // would require us to write into column entries that actually
+    // reside on other processes, i.e., it involves communicating
+    // data. This is always expensive.
+    //
+    // Experience tells us that CG also works (and works almost as
+    // well) if we don't remove the columns associated with boundary
+    // nodes, which can be explained by the special structure of this
+    // particular non-symmetry. To avoid the expense of communication,
+    // we therefore do not eliminate the entries in the affected
+    // columns.
+    std::map<types::global_dof_index,double> boundary_values;
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              ZeroFunction<dim>(dim),
+                                              boundary_values);
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix,
+                                        solution,
+                                        system_rhs,
+                                        false);
+  }
+
+
+
+  // @sect4{ElasticProblem::solve}
+
+  // Having assembled the linear system, we next need to solve
+  // it. PETSc offers a variety of sequential and parallel solvers,
+  // for which we have written wrappers that have almost the same
+  // interface as is used for the deal.II solvers used in all previous
+  // example programs. The following code should therefore look rather
+  // familiar.
+  //
+  // At the top of the function, we set up a convergence monitor, and
+  // assign it the accuracy to which we would like to solve the linear
+  // system. Next, we create an actual solver object using PETSc's CG
+  // solver which also works with parallel (distributed) vectors and
+  // matrices. And finally a preconditioner; we choose to use a block
+  // Jacobi preconditioner which works by computing an incomplete LU
+  // decomposition on each diagonal block of the matrix.  (In other
+  // words, each MPI process computes an ILU from the rows it stores
+  // by throwing away columns that correspond to row indices not
+  // stored locally; this yields a square matrix block from which we
+  // can compute an ILU. That means that if you run the program with
+  // only one process, then you will use an ILU(0) as a
+  // preconditioner, while if it is run on many processes, then we
+  // will have a number of blocks on the diagonal and the
+  // preconditioner is the ILU(0) of each of these blocks. In the
+  // extreme case of one degree of freedom per processor, this
+  // preconditioner is then simply a Jacobi preconditioner since the
+  // diagonal matrix blocks consist of only a single entry. Such a
+  // preconditioner is relatively easy to compute because it does not
+  // require any kind of communication between processors, but it is
+  // in general not very efficient for large numbers of processors.)
+  //
+  // Following this kind of setup, we then solve the linear system:
+  template <int dim>
+  unsigned int ElasticProblem<dim>::solve ()
+  {
+    SolverControl           solver_control (solution.size(),
+                                            1e-8*system_rhs.l2_norm());
+    PETScWrappers::SolverCG cg (solver_control,
+                                mpi_communicator);
+
+    PETScWrappers::PreconditionBlockJacobi preconditioner(system_matrix);
+
+    cg.solve (system_matrix, solution, system_rhs,
+              preconditioner);
+
+    // The next step is to distribute hanging node constraints. This is a
+    // little tricky, since to fill in the value of a constrained node you
+    // need access to the values of the nodes to which it is constrained (for
+    // example, for a Q1 element in 2d, we need access to the two nodes on the
+    // big side of a hanging node face, to compute the value of the
+    // constrained node in the middle).
+    //
+    // The problem is that we have built our vectors (in
+    // <code>setup_system()</code>) in such a way that every process
+    // is responsible for storing only those elements of the solution
+    // vector that correspond to the degrees of freedom this process
+    // "owns". There are, however, cases where in order to compute the
+    // value of the vector entry for a constrained degree of freedom
+    // on one process, we need to access vector entries that are
+    // stored on other processes.  PETSc (and, for that matter, the
+    // MPI model on which it is built) does not allow to simply query
+    // vector entries stored on other processes, so what we do here is
+    // to get a copy of the "distributed" vector where we store all
+    // elements locally. This is simple, since the deal.II wrappers
+    // have a conversion constructor for the deal.II Vector
+    // class. (This conversion of course requires communication, but
+    // in essence every process only needs to send its data to every
+    // other process once in bulk, rather than having to respond to
+    // queries for individual elements):
+    Vector<double> localized_solution (solution);
+
+    // Of course, as in previous discussions, it is clear that such a
+    // step cannot scale very far if you wanted to solve large
+    // problems on large numbers of processes, because every process
+    // now stores <i>all elements</i> of the solution vector. (We will
+    // show how to do this better in step-40.)  On the other hand,
+    // distributing hanging node constraints is simple on this local
+    // copy, using the usual function
+    // ConstraintMatrix::distributed(). In particular, we can compute
+    // the values of <i>all</i> constrained degrees of freedom,
+    // whether the current process owns them or not:
+    hanging_node_constraints.distribute (localized_solution);
+
+    // Then transfer everything back into the global vector. The
+    // following operation copies those elements of the localized
+    // solution that we store locally in the distributed solution, and
+    // does not touch the other ones. Since we do the same operation
+    // on all processors, we end up with a distributed vector (i.e., a
+    // vector that on every process only stores the vector entries
+    // corresponding to degrees of freedom that are owned by this
+    // process) that has all the constrained nodes fixed.
+    //
+    // We end the function by returning the number of iterations it
+    // took to converge, to allow for some output.
+    solution = localized_solution;
+
+    return solver_control.last_step();
+  }
+
+
+  // @sect4{ElasticProblem::refine_grid}
+
+  // Using some kind of refinement indicator, the mesh can be
+  // refined. The problem is basically the same as with distributing
+  // hanging node constraints: in order to compute the error indicator
+  // (even if we were just interested in the indicator on the cells
+  // the current process owns), we need access to more elements of the
+  // solution vector than just those the current processor stores. To
+  // make this happen, we do essentially what we did in
+  // <code>solve()</code> already, namely get a <i>complete</i> copy
+  // of the solution vector onto every process, and use that to
+  // compute. This is, in itself expensive as explained above, and it
+  // is particular unnecessary since we had just created and then
+  // destroyed such a vector in <code>solve()</code>, but efficiency
+  // is not the point of this program and so let us opt for a design
+  // in which every function is as self-contained as possible.
+  //
+  // Once we have such a "localized" vector that contains <i>all</i>
+  // elements of the solution vector, we can compute the indicators
+  // for the cells that belong to the present process. In fact, we
+  // could of course compute <i>all</i> refinement indicators since
+  // our Triangulation and DoFHandler objects store information about
+  // all cells, and since we have a complete copy of the solution
+  // vector. But in the interest in showing how to operate in
+  // %parallel, let us demonstrate how one would operate if one were
+  // to only compute <i>some</i> error indicators and then exchange
+  // the remaining ones with the other processes. (Ultimately, each
+  // process needs a complete set of refinement indicators because
+  // every process needs to refine their mesh, and needs to refine it
+  // in exactly the same way as all of the other processes.)
+  //
+  // So, to do all of this, we need to:
+  // - First, get a local copy of the distributed solution vector.
+  // - Second, create a vector to store the refinement indicators.
+  // - Third, let the KellyErrorEstimator compute refinement
+  //   indicators for all cells belonging to the present
+  //   subdomain/process. The last argument of the call indicates
+  //   which subdomain we are interested in. The three arguments
+  //   before it are various other default arguments that one usually
+  //   doesn't need (and doesn't state values for, but rather uses the
+  //   defaults), but which we have to state here explicitly since we
+  //   want to modify the value of a following argument (i.e. the one
+  //   indicating the subdomain).
+  template <int dim>
+  void ElasticProblem<dim>::refine_grid ()
+  {
+    const Vector<double> localized_solution (solution);
+
+    Vector<float> local_error_per_cell (triangulation.n_active_cells());
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(2),
+                                        typename FunctionMap<dim>::type(),
+                                        localized_solution,
+                                        local_error_per_cell,
+                                        ComponentMask(),
+                                        0,
+                                        MultithreadInfo::n_threads(),
+                                        this_mpi_process);
+
+    // Now all processes have computed error indicators for their own
+    // cells and stored them in the respective elements of the
+    // <code>local_error_per_cell</code> vector. The elements of this
+    // vector for cells not owned by the present process are
+    // zero. However, since all processes have a copy of the entire
+    // triangulation and need to keep these copies in sync, they need
+    // the values of refinement indicators for all cells of the
+    // triangulation. Thus, we need to distribute our results. We do
+    // this by creating a distributed vector where each process has
+    // its share, and sets the elements it has computed. Consequently,
+    // when you view this vector as one that lives across all
+    // processes, then every element of this vector has been set
+    // once. We can then assign this parallel vector to a local,
+    // non-parallel vector on each process, making <i>all</i> error
+    // indicators available on every process.
+    //
+    // So in the first step, we need to set up a parallel vector. For
+    // simplicity, every process will own a chunk with as many
+    // elements as this process owns cells, so that the first chunk of
+    // elements is stored with process zero, the next chunk with
+    // process one, and so on. It is important to remark, however,
+    // that these elements are not necessarily the ones we will write
+    // to. This is so, since the order in which cells are arranged,
+    // i.e., the order in which the elements of the vector correspond
+    // to cells, is not ordered according to the subdomain these cells
+    // belong to. In other words, if on this process we compute
+    // indicators for cells of a certain subdomain, we may write the
+    // results to more or less random elements of the distributed
+    // vector; in particular, they may not necessarily lie within the
+    // chunk of vector we own on the present process. They will
+    // subsequently have to be copied into another process's memory
+    // space, an operation that PETSc does for us when we call the
+    // <code>compress()</code> function. This inefficiency could be
+    // avoided with some more code, but we refrain from it since it is
+    // not a major factor in the program's total runtime.
+    //
+    // So here's how we do it: count how many cells belong to this
+    // process, set up a distributed vector with that many elements to
+    // be stored locally, and copy over the elements we computed
+    // locally, then compress the result. In fact, we really only copy
+    // the elements that are nonzero, so we may miss a few that we
+    // computed to zero, but this won't hurt since the original values
+    // of the vector is zero anyway.
+    const unsigned int n_local_cells
+      = GridTools::count_cells_with_subdomain_association (triangulation,
+                                                           this_mpi_process);
+    PETScWrappers::MPI::Vector
+    distributed_all_errors (mpi_communicator,
+                            triangulation.n_active_cells(),
+                            n_local_cells);
+
+    for (unsigned int i=0; i<local_error_per_cell.size(); ++i)
+      if (local_error_per_cell(i) != 0)
+        distributed_all_errors(i) = local_error_per_cell(i);
+    distributed_all_errors.compress (VectorOperation::insert);
+
+
+    // So now we have this distributed vector that contains the
+    // refinement indicators for all cells. To use it, we need to
+    // obtain a local copy and then use it to mark cells for
+    // refinement or coarsening, and actually do the refinement and
+    // coarsening. It is important to recognize that <i>every</i>
+    // process does this to its own copy of the triangulation, and
+    // does it in exactly the same way.
+    const Vector<float> localized_all_errors (distributed_all_errors);
+
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     localized_all_errors,
+                                                     0.3, 0.03);
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+  // @sect4{ElasticProblem::output_results}
+
+  // The final function of significant interest is the one that
+  // creates graphical output. This works the same way as in step-8,
+  // with two small differences. Before discussing these, let us state
+  // the general philosophy this function will work: we intend for all
+  // of the data to be generated on a single process, and subsequently
+  // written to a file. This is, as many other parts of this program
+  // already discussed, not something that will scale. Previously, we
+  // had argued that we will get into trouble with triangulations,
+  // DoFHandlers, and copies of the solution vector where every
+  // process has to store all of the data, and that there will come to
+  // be a point where each process simply doesn't have enough memory
+  // to store that much data. Here, the situation is different: it's
+  // not only the memory, but also the run time that's a problem. If
+  // one process is responsible for processing <i>all</i> of the data
+  // while all of the other processes do nothing, then this one
+  // function will eventually come to dominate the overall run time of
+  // the program.  In particular, the time this function takes is
+  // going to be proportional to the overall size of the problem
+  // (counted in the number of cells, or the number of degrees of
+  // freedom), independent of the number of processes we throw at it.
+  //
+  // Such situations need to be avoided, and we will show in step-18
+  // and step-40 how to address this issue. For the current problem,
+  // the solution is to have each process generate output data only
+  // for its own local cells, and write them to separate files, one
+  // file per process. This is how step-18 operates. Alternatively,
+  // one could simply leave everything in a set of independent files
+  // and let the visualization software read all of them (possibly
+  // also using multiple processors) and create a single visualization
+  // out of all of them; this is the path step-40, step-32, and all
+  // other parallel programs developed later on take.
+  //
+  // More specifically for the current function, all processes call
+  // this function, but not all of them need to do the work associated
+  // with generating output. In fact, they shouldn't, since we would
+  // try to write to the same file multiple times at once. So we let
+  // only the first process do this, and all the other ones idle
+  // around during this time (or start their work for the next
+  // iteration, or simply yield their CPUs to other jobs that happen
+  // to run at the same time). The second thing is that we not only
+  // output the solution vector, but also a vector that indicates
+  // which subdomain each cell belongs to. This will make for some
+  // nice pictures of partitioned domains.
+  //
+  // To implement this, process zero needs a complete set of solution
+  // components in a local vector. Just as with the previous function,
+  // the efficient way to do this would be to re-use the vector
+  // already created in the <code>solve()</code> function, but to keep
+  // things more self-contained, we simply re-create one here from the
+  // distributed solution vector.
+  //
+  // An important thing to realize is that we do this localization
+  // operation on all processes, not only the one that actually needs
+  // the data. This can't be avoided, however, with the communication
+  // model of MPI: MPI does not have a way to query data on another
+  // process, both sides have to initiate a communication at the same
+  // time. So even though most of the processes do not need the
+  // localized solution, we have to place the statement converting the
+  // distributed into a localized vector so that all processes execute
+  // it.
+  //
+  // (Part of this work could in fact be avoided. What we do is
+  // send the local parts of all processes to all other processes. What we
+  // would really need to do is to initiate an operation on all processes
+  // where each process simply sends its local chunk of data to process
+  // zero, since this is the only one that actually needs it, i.e., we need
+  // something like a gather operation. PETSc can do this, but for
+  // simplicity's sake we don't attempt to make use of this here. We don't,
+  // since what we do is not very expensive in the grand scheme of things:
+  // it is one vector communication among all processes , which has to be
+  // compared to the number of communications we have to do when solving the
+  // linear system, setting up the block-ILU for the preconditioner, and
+  // other operations.)
+  template <int dim>
+  void ElasticProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    const Vector<double> localized_solution (solution);
+
+    // This being done, process zero goes ahead with setting up the
+    // output file as in step-8, and attaching the (localized)
+    // solution vector to the output object. (The code to generate the
+    // output file name is stolen and slightly modified from step-5,
+    // since we expect that we can do a number of cycles greater than
+    // 10, which is the maximum of what the code in step-8 could
+    // handle.)
+    if (this_mpi_process == 0)
+      {
+        std::ostringstream filename;
+        filename << "solution-" << cycle << ".vtk";
+
+        std::ofstream output (filename.str().c_str());
+
+        DataOut<dim> data_out;
+        data_out.attach_dof_handler (dof_handler);
+
+        std::vector<std::string> solution_names;
+        switch (dim)
+          {
+          case 1:
+            solution_names.push_back ("displacement");
+            break;
+          case 2:
+            solution_names.push_back ("x_displacement");
+            solution_names.push_back ("y_displacement");
+            break;
+          case 3:
+            solution_names.push_back ("x_displacement");
+            solution_names.push_back ("y_displacement");
+            solution_names.push_back ("z_displacement");
+            break;
+          default:
+            Assert (false, ExcInternalError());
+          }
+
+        data_out.add_data_vector (localized_solution, solution_names);
+
+        // The only other thing we do here is that we also output one
+        // value per cell indicating which subdomain (i.e., MPI
+        // process) it belongs to. This requires some conversion work,
+        // since the data the library provides us with is not the one
+        // the output class expects, but this is not difficult. First,
+        // set up a vector of integers, one per cell, that is then
+        // filled by the subdomain id of each cell.
+        //
+        // The elements of this vector are then converted to a
+        // floating point vector in a second step, and this vector is
+        // added to the DataOut object, which then goes off creating
+        // output in VTK format:
+        std::vector<unsigned int> partition_int (triangulation.n_active_cells());
+        GridTools::get_subdomain_association (triangulation, partition_int);
+
+        const Vector<double> partitioning(partition_int.begin(),
+                                          partition_int.end());
+
+        data_out.add_data_vector (partitioning, "partitioning");
+
+        data_out.build_patches ();
+        data_out.write_vtk (output);
+      }
+  }
+
+
+  // @sect4{ElasticProblem::run}
+
+  // Lastly, here is the driver function. It is almost completely
+  // unchanged from step-8, with the exception that we replace
+  // <code>std::cout</code> by the <code>pcout</code> stream. Apart
+  // from this, the only other cosmetic change is that we output how
+  // many degrees of freedom there are per process, and how many
+  // iterations it took for the linear solver to converge:
+  template <int dim>
+  void ElasticProblem<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<10; ++cycle)
+      {
+        pcout << "Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_cube (triangulation, -1, 1);
+            triangulation.refine_global (3);
+          }
+        else
+          refine_grid ();
+
+        pcout << "   Number of active cells:       "
+              << triangulation.n_active_cells()
+              << std::endl;
+
+        setup_system ();
+
+        pcout << "   Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << " (by partition:";
+        for (unsigned int p=0; p<n_mpi_processes; ++p)
+          pcout << (p==0 ? ' ' : '+')
+                << (DoFTools::
+                    count_dofs_with_subdomain_association (dof_handler,
+                                                           p));
+        pcout << ")" << std::endl;
+
+        assemble_system ();
+        const unsigned int n_iterations = solve ();
+
+        pcout << "   Solver converged in " << n_iterations
+              << " iterations." << std::endl;
+
+        output_results (cycle);
+      }
+  }
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// The <code>main()</code> works the same way as most of the main
+// functions in the other example programs, i.e., it delegates work to
+// the <code>run</code> function of a master object, and only wraps
+// everything into some code to catch exceptions:
+int main (int argc, char **argv)
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step17;
+
+      // Here is the only real difference: MPI and PETSc both require
+      // that we initialize these libraries at the beginning of the
+      // program, and un-initialize them at the end. The class
+      // MPI_InitFinalize takes care of all of that.
+      Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
+
+      ElasticProblem<2> elastic_problem;
+      elastic_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-18/CMakeLists.txt b/examples/step-18/CMakeLists.txt
new file mode 100644
index 0000000..5082bd4
--- /dev/null
+++ b/examples/step-18/CMakeLists.txt
@@ -0,0 +1,51 @@
+##
+#  CMake script for the step-18 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-18")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS
+    ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../  $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF ()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_PETSC)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_PETSC = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-18/doc/builds-on b/examples/step-18/doc/builds-on
new file mode 100644
index 0000000..1b9190d
--- /dev/null
+++ b/examples/step-18/doc/builds-on
@@ -0,0 +1 @@
+step-17 step-19
diff --git a/examples/step-18/doc/intro.dox b/examples/step-18/doc/intro.dox
new file mode 100644
index 0000000..e02edfb
--- /dev/null
+++ b/examples/step-18/doc/intro.dox
@@ -0,0 +1,601 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+This tutorial program is another one in the series on the elasticity problem
+that we have already started with step-8 and step-17. It extends it into two
+different directions: first, it solves the quasistatic but time dependent
+elasticity problem for large deformations with a Lagrangian mesh movement
+approach. Secondly, it shows some more techniques for solving such problems
+using %parallel processing with PETSc's linear algebra. In addition to this,
+we show how to work around one of the two major bottlenecks of step-17, namely
+that we generated graphical output from only one process, and that this scaled
+very badly with larger numbers of processes and on large problems. (The other
+bottleneck, namely that every processor has to hold the entire mesh and
+DoFHandler, is addressed in step-40.) Finally, a
+good number of assorted improvements and techniques are demonstrated that have
+not been shown yet in previous programs.
+
+As before in step-17, the program runs just as fine on a single sequential
+machine as long as you have PETSc installed. Information on how to tell
+deal.II about a PETSc installation on your system can be found in the deal.II
+README file, which is linked to from the <a href="../../index.html">main
+documentation page</a>
+in your installation of deal.II, or on <a href="http://www.dealii.org/">the
+deal.II webpage</a>.
+
+
+<h3>Quasistatic elastic deformation</h3>
+
+<h4>Motivation of the model</h4>
+
+In general, time-dependent small elastic deformations are described by the
+elastic wave equation
+ at f[
+  \rho \frac{\partial^2 \mathbf{u}}{\partial t^2}
+  + c \frac{\partial \mathbf{u}}{\partial t}
+  - \textrm{div}\  ( C \varepsilon(\mathbf{u})) = \mathbf{f}
+  \qquad
+  \textrm{in}\ \Omega,
+ at f]
+where $\mathbf{u}=\mathbf{u} (\mathbf{x},t)$ is the deformation of the body, $\rho$
+and $c$ the density and attenuation coefficient, and $\mathbf{f}$ external forces.
+In addition, initial conditions
+ at f[
+  \mathbf{u}(\cdot, 0) = \mathbf{u}_0(\cdot)
+  \qquad
+  \textrm{on}\ \Omega,
+ at f]
+and Dirichlet (displacement) or Neumann (traction) boundary conditions need
+to be specified for a unique solution:
+ at f{eqnarray*}
+  \mathbf{u}(\mathbf{x},t) &=& \mathbf{d}(\mathbf{x},t)
+  \qquad
+  \textrm{on}\ \Gamma_D\subset\partial\Omega,
+  \\
+  \mathbf{n} \ C \varepsilon(\mathbf{u}(\mathbf{x},t)) &=& \mathbf{b}(\mathbf{x},t)
+  \qquad
+  \textrm{on}\ \Gamma_N=\partial\Omega\backslash\Gamma_D.
+ at f}
+In above formulation, $\varepsilon(\mathbf{u})= \frac 12 (\nabla \mathbf{u} + \nabla
+\mathbf{u}^T)$ is the symmetric gradient of the displacement, also called the
+<em>strain</em>. $C$ is a tensor of rank 4, called the <em>stress-strain
+  tensor</em> that contains knowledge of the elastic strength of the material; its
+symmetry properties make sure that it maps symmetric tensors of rank 2
+(“matrices” of dimension $d$, where $d$ is the spatial dimensionality) onto
+symmetric tensors of the same rank. We will comment on the roles of the strain
+and stress tensors more below. For the moment it suffices to say that we
+interpret the term $\textrm{div}\  ( C \varepsilon(\mathbf{u}))$ as the vector with
+components $\frac \partial{\partial x_j} C_{ijkl} \varepsilon(\mathbf{u})_{kl}$,
+where summation over indices $j,k,l$ is implied.
+
+The quasistatic limit of this equation is motivated as follows: each small
+perturbation of the body, for example by changes in boundary condition or the
+forcing function, will result in a corresponding change in the configuration
+of the body. In general, this will be in the form of waves radiating away from
+the location of the disturbance. Due to the presence of the damping term,
+these waves will be attenuated on a time scale of, say, $\tau$. Now, assume
+that all changes in external forcing happen on times scales that are
+much larger than $\tau$. In that case, the dynamic nature of the change is
+unimportant: we can consider the body to always be in static equilibrium,
+i.e. we can assume that at all times the body satisfies
+ at f{eqnarray*}
+  - \textrm{div}\  ( C \varepsilon(\mathbf{u})) &=& \mathbf{f}(\mathbf{x},t)
+  \qquad
+  \textrm{in}\ \Omega,
+  \\
+  \mathbf{u}(\mathbf{x},t) &=& \mathbf{d}(\mathbf{x},t)
+  \qquad
+  \textrm{on}\ \Gamma_D,
+  \\
+  \mathbf{n} \ C \varepsilon(\mathbf{u}(\mathbf{x},t)) &=& \mathbf{b}(\mathbf{x},t)
+  \qquad
+  \textrm{on}\ \Gamma_N.
+ at f}
+Note that the differential equation does not contain any time derivatives any
+more -- all time dependence is introduced through boundary conditions and a
+possibly time-varying force function $\mathbf{f}(\mathbf{x},t)$. The changes in
+configuration can therefore be considered as being stationary
+instantaneously. An alternative view of this is that $t$ is not really a time
+variable, but only a time-like parameter that governs the evolution of the
+problem.
+
+While these equations are sufficient to describe small deformations, computing
+large deformations is a little more complicated and, in general, leads
+to nonlinear equations such as those treated in step-44. In the
+following, let us consider some of the tools one would employ when
+simulating problems in which the deformation becomes <i>large</i>.
+
+ at note The model we will consider below is not founded on anything that
+would be mathematically sound: we will consider a model in which we
+produce a small deformation, deform the physical coordinates of the
+body by this deformation, and then consider the next loading step
+again as a linear problem. This isn't consistent, since the assumption
+of linearity implies that deformations are infinitesimal and so moving
+around the vertices of our mesh by a finite amount before solving the
+next linear problem is an inconsistent approach. We should therefore
+note that it is not surprising that the equations discussed below
+can't be found in the literature: <b>The model considered here has
+little to do with reality!</b> On the other hand, the implementational
+techniques we consider are very much what one would need to use when
+implementing a <i>real</i> model, as we will see in step-44.
+
+
+To come back to defining our "artificial" model, let us first
+introduce a tensorial stress variable $\sigma$, and write the differential
+equations in terms of the stress:
+ at f{eqnarray*}
+  - \textrm{div}\  \sigma &=& \mathbf{f}(\mathbf{x},t)
+  \qquad
+  \textrm{in}\ \Omega(t),
+  \\
+  \mathbf{u}(\mathbf{x},t) &=& \mathbf{d}(\mathbf{x},t)
+  \qquad
+  \textrm{on}\ \Gamma_D\subset\partial\Omega(t),
+  \\
+  \mathbf{n} \ C \varepsilon(\mathbf{u}(\mathbf{x},t)) &=& \mathbf{b}(\mathbf{x},t)
+  \qquad
+  \textrm{on}\ \Gamma_N=\partial\Omega(t)\backslash\Gamma_D.
+ at f}
+Note that these equations are posed on a domain $\Omega(t)$ that
+changes with time, with the boundary moving according to the
+displacements $\mathbf{u}(\mathbf{x},t)$ of the points on the boundary. To
+complete this system, we have to specify the incremental relationship between
+the stress and the strain, as follows:
+<a name="step_18.stress-strain"></a>
+ at f[
+  \dot\sigma = C \varepsilon (\dot{\mathbf{u}}),
+  \qquad
+  \qquad
+  \textrm{[stress-strain]}
+ at f]
+where a dot indicates a time derivative. Both the stress $\sigma$ and the
+strain $\varepsilon(\mathbf{u})$ are symmetric tensors of rank 2.
+
+
+<h4>Time discretization</h4>
+
+Numerically, this system is solved as follows: first, we discretize
+the time component using a backward Euler scheme. This leads to a
+discrete equilibrium of force at time step $n$:
+ at f[
+  -\textrm{div}\  \sigma^n = f^n,
+ at f]
+where
+ at f[
+  \sigma^n = \sigma^{n-1} + C \varepsilon (\Delta \mathbf{u}^n),
+ at f]
+and $\Delta \mathbf{u}^n$ the incremental displacement for time step
+$n$. In addition, we have to specify initial data $\mathbf{u}(\cdot,0)=\mathbf{u}_0$.
+This way, if we want to solve for the displacement increment, we
+have to solve the following system:
+ at f{align*}
+  - \textrm{div}\   C \varepsilon(\Delta\mathbf{u}^n) &= \mathbf{f} + \textrm{div}\  \sigma^{n-1}
+  \qquad
+  &&\textrm{in}\ \Omega(t_{n-1}),
+  \\
+  \Delta \mathbf{u}^n(\mathbf{x},t) &= \mathbf{d}(\mathbf{x},t_n) - \mathbf{d}(\mathbf{x},t_{n-1})
+  \qquad
+  &&\textrm{on}\ \Gamma_D\subset\partial\Omega(t_{n-1}),
+  \\
+  \mathbf{n} \ C \varepsilon(\Delta \mathbf{u}^n(\mathbf{x},t)) &= \mathbf{b}(\mathbf{x},t_n)-\mathbf{b}(\mathbf{x},t_{n-1})
+  \qquad
+  &&\textrm{on}\ \Gamma_N=\partial\Omega(t_{n-1})\backslash\Gamma_D.
+ at f}
+The weak form of this set of equations, which as usual is the basis for the
+finite element formulation, reads as follows: find $\Delta \mathbf{u}^n \in
+\{v\in H^1(\Omega(t_{n-1}))^d: v|_{\Gamma_D}=\mathbf{d}(\cdot,t_n) - \mathbf{d}(\cdot,t_{n-1})\}$
+such that
+<a name="step_18.linear-system"></a>
+ at f{align*}
+  (C \varepsilon(\Delta\mathbf{u}^n), \varepsilon(\varphi) )_{\Omega(t_{n-1})}
+  &=
+  (\mathbf{f}, \varphi)_{\Omega(t_{n-1})}
+  -(\sigma^{n-1},\varepsilon(\varphi))_{\Omega(t_{n-1})}
+  \\
+  &\qquad +(\mathbf{b}(\mathbf{x},t_n)-\mathbf{b}(\mathbf{x},t_{n-1}), \varphi)_{\Gamma_N}
+  \\
+  &\qquad\qquad
+  \forall \varphi \in \{\mathbf{v}\in H^1(\Omega(t_{n-1}))^d: \mathbf{v}|_{\Gamma_D}=0\}.
+  \qquad
+  \qquad
+  \textrm{[linear-system]}
+ at f}
+We note that, for simplicity, in the program we will always assume that there
+are no boundary forces, i.e. $\mathbf{b} = 0$, and that the deformation of the
+body is driven by body forces $\mathbf{f}$ and prescribed boundary displacements
+$\mathbf{d}$ alone. It is also worth noting that when integrating by parts, we
+would get terms of the form $(C \varepsilon(\Delta\mathbf{u}^n), \nabla \varphi
+)_{\Omega(t_{n-1})}$, but that we replace it with the term involving the
+symmetric gradient $\varepsilon(\varphi)$ instead of $\nabla\varphi$. Due to
+the symmetry of $C$, the two terms are equivalent, but the symmetric version
+avoids a potential for round-off to render the resulting matrix slightly
+non-symmetric.
+
+The system at time step $n$, to be solved on the old domain
+$\Omega(t_{n-1})$, has exactly the form of a stationary elastic
+problem, and is therefore similar to what we have already implemented
+in previous example programs. We will therefore not comment on the
+space discretization beyond saying that we again use lowest order
+continuous finite elements.
+
+There are differences, however:
+<ol>
+  <li> We have to move (update) the mesh after each time step, in order to be
+  able to solve the next time step on a new domain;
+
+  <li> We need to know $\sigma^{n-1}$ to compute the next incremental
+  displacement, i.e. we need to compute it at the end of the time step
+  to make sure it is available for the next time step. Essentially,
+  the stress variable is our window to the history of deformation of
+  the body.
+</ol>
+These two operations are done in the functions <code>move_mesh</code> and
+<code>update_quadrature_point_history</code> in the program. While moving
+the mesh is only a technicality, updating the stress is a little more
+complicated and will be discussed in the next section.
+
+
+<h4>Updating the stress variable</h4>
+
+As indicated above, we need to have the stress variable $\sigma^n$ available
+when computing time step $n+1$, and we can compute it using
+<a name="step_18.stress-update"></a>
+ at f[
+  \sigma^n = \sigma^{n-1} + C \varepsilon (\Delta \mathbf{u}^n).
+  \qquad
+  \qquad
+  \textrm{[stress-update]}
+ at f]
+There are, despite the apparent simplicity of this equation, two questions
+that we need to discuss. The first concerns the way we store $\sigma^n$: even
+if we compute the incremental updates $\Delta\mathbf{u}^n$ using lowest-order
+finite elements, then its symmetric gradient $\varepsilon(\Delta\mathbf{u}^n)$ is
+in general still a function that is not easy to describe. In particular, it is
+not a piecewise constant function, and on general meshes (with cells that are
+not rectangles %parallel to the coordinate axes) or with non-constant
+stress-strain tensors $C$ it is not even a bi- or trilinear function. Thus, it
+is a priori not clear how to store $\sigma^n$ in a computer program.
+
+To decide this, we have to see where it is used. The only place where we
+require the stress is in the term
+$(\sigma^{n-1},\varepsilon(\varphi))_{\Omega(t_{n-1})}$. In practice, we of
+course replace this term by numerical quadrature:
+ at f[
+  (\sigma^{n-1},\varepsilon(\varphi))_{\Omega(t_{n-1})}
+  =
+  \sum_{K\subset {T}}
+  (\sigma^{n-1},\varepsilon(\varphi))_K
+  \approx
+  \sum_{K\subset {T}}
+  \sum_q
+  w_q \ \sigma^{n-1}(\mathbf{x}_q) : \varepsilon(\varphi(\mathbf{x}_q),
+ at f]
+where $w_q$ are the quadrature weights and $\mathbf{x}_q$ the quadrature points on
+cell $K$. This should make clear that what we really need is not the stress
+$\sigma^{n-1}$ in itself, but only the values of the stress in the quadrature
+points on all cells. This, however, is a simpler task: we only have to provide
+a data structure that is able to hold one symmetric tensor of rank 2 for each
+quadrature point on all cells (or, since we compute in parallel, all
+quadrature points of all cells that the present MPI process “owns”). At the
+end of each time step we then only have to evaluate $\varepsilon(\Delta \mathbf{u}^n(\mathbf{x}_q))$, multiply it by the stress-strain tensor $C$, and use the
+result to update the stress $\sigma^n(\mathbf{x}_q)$ at quadrature point $q$.
+
+The second complication is not visible in our notation as chosen above. It is
+due to the fact that we compute $\Delta u^n$ on the domain $\Omega(t_{n-1})$,
+and then use this displacement increment to both update the stress as well as
+move the mesh nodes around to get to $\Omega(t_n)$ on which the next increment
+is computed. What we have to make sure, in this context, is that moving the
+mesh does not only involve moving around the nodes, but also making
+corresponding changes to the stress variable: the updated stress is a variable
+that is defined with respect to the coordinate system of the material in the
+old domain, and has to be transferred to the new domain. The reason for this
+can be understood as follows: locally, the incremental deformation $\Delta\mathbf{u}$ can be decomposed into three parts, a linear translation (the constant part
+of the displacement increment field in the neighborhood of a point), a
+dilational
+component (that part of the gradient of the displacement field that has a
+nonzero divergence), and a rotation. A linear translation of the material does
+not affect the stresses that are frozen into it -- the stress values are
+simply translated along. The dilational or compressional change produces a
+corresponding stress update. However, the rotational component does not
+necessarily induce a nonzero stress update (think, in 2d, for example of the
+situation where $\Delta\mathbf{u}=(y, -x)^T$, with which $\varepsilon(\Delta
+\mathbf{u})=0$). Nevertheless, if the the material was prestressed in a certain
+direction, then this direction will be rotated along with the material.  To
+this end, we have to define a rotation matrix $R(\Delta \mathbf{u}^n)$ that
+describes, in each point the rotation due to the displacement increments. It
+is not hard to see that the actual dependence of $R$ on $\Delta \mathbf{u}^n$ can
+only be through the curl of the displacement, rather than the displacement
+itself or its full gradient (as mentioned above, the constant components of
+the increment describe translations, its divergence the dilational modes, and
+the curl the rotational modes). Since the exact form of $R$ is cumbersome, we
+only state it in the program code, and note that the correct updating formula
+for the stress variable is then
+<a name="step_18.stress-update+rot"></a>
+ at f[
+  \sigma^n
+  =
+  R(\Delta \mathbf{u}^n)^T
+  [\sigma^{n-1} + C \varepsilon (\Delta \mathbf{u}^n)]
+  R(\Delta \mathbf{u}^n).
+  \qquad
+  \qquad
+  \textrm{[stress-update+rot]}
+ at f]
+
+Both stress update and rotation are implemented in the function
+<code>update_quadrature_point_history</code> of the example program.
+
+
+<h3>Parallel graphical output</h3>
+
+In step-17, the main bottleneck for %parallel computations as far as run time
+is concerned
+was that only the first processor generated output for the entire domain.
+Since generating graphical output is expensive, this did not scale well when
+larger numbers of processors were involved. We will address this here. (For a
+definition of what it means for a program to "scale", see 
+ at ref GlossParallelScaling "this glossary entry.)
+
+Basically, what we need to do is let every process
+generate graphical output for that subset of cells that it owns, write them
+into separate files and have a way to display all files for a certain timestep
+at the same time. This way the code produces one <code>.vtu</code> file per process per
+time step. The two common VTK file viewers ParaView and VisIt both support
+opening more than one <code>.vtu</code> file at once. To simplify the process of picking
+the correct files and allow moving around in time, both support record files
+that reference all files for a given timestep. Sadly, the record files have a
+different format between VisIt and Paraview, so we write out both formats.
+
+The code will generate the files <code>solution-TTTT.NNN.vtu</code>, 
+where <code>TTTT</code> is the timestep number (starting from 1) and 
+<code>NNN</code> is the process rank (starting from
+0). These files contain the locally owned cells for the timestep and
+processor. The files <code>solution-TTTT.visit</code> is the visit record
+for timestep <code>TTTT</code>, while <code>solution-TTTT.pvtu</code> is
+the same for ParaView. (More recent versions of Visit can actually read
+<code>.pvtu</code> files as well, but it doesn't hurt to output both
+kinds of record files.) Finally, the file
+<code>solution.pvd</code> is a special record only supported by ParaView that references
+all time steps. So in ParaView, only solution.pvd needs to be opened, while
+one needs to select the group of all .visit files in VisIt for the same
+effect.
+
+
+<h3>A triangulation with automatic partitioning</h3>
+
+In step-17, we used a regular triangulation that was simply replicated on
+every processor, and a corresponding DoFHandler. Both had no idea that they
+were used in a %parallel context -- they just existed in their entirety
+on every processor, and we argued that this was eventually going to be a
+major memory bottleneck.
+
+We do not address this issue here (we will do so in step-40) but make
+the situation slightly more automated. In step-17, we created the triangulation
+and then manually "partitioned" it, i.e., we assigned
+ at ref GlossSubdomainId "subdomain ids" to every cell that indicated which
+ at ref GlossMPIProcess "MPI process" "owned" the cell. Here, we use a class
+parallel::shared::Triangulation that at least does this part automatically:
+whenever you create or refine such a triangulation, it automatically
+partitions itself among all involved processes (which it knows about because
+you have to tell it about the @ref GlossMPICommunicator "MPI communicator"
+that connects these processes upon construction of the triangulation).
+Otherwise, the parallel::shared::Triangulation looks, for all practical
+purposes, like a regular Triangulation object. 
+
+The convenience of using this class does not only result from being able
+to avoid the manual call to GridTools::partition(). Rather, the DoFHandler
+class now also knows that you want to use it in a parallel context, and
+by default automatically enumerates degrees of freedom in such a way
+that all DoFs owned by process zero come before all DoFs owned by process 1,
+etc. In other words, you can also avoid the call to 
+DoFRenumbering::subdomain_wise().
+
+There are other benefits. For example, because the triangulation knows that
+it lives in a %parallel universe, it also knows that it "owns" certain
+cells (namely, those whose subdomain id equals its MPI rank; previously,
+the triangulation only stored these subdomain ids, but had no way to
+make sense of them). Consequently, in the assembly function, you can
+test whether a cell is "locally owned" (i.e., owned by the current
+process, see @ref GlossLocallyOwnedCell) when you loop over all cells
+using the syntax
+ at code
+  if (cell->is_locally_owned())
+ at endcode
+This knowledge extends to the DoFHandler object built on such triangulations,
+which can then identify which degrees of freedom are locally owned
+(see @ref GlossLocallyOwnedDofs) via calls such as 
+DoFHandler::n_locally_owned_dofs_per_processor() and
+DoFTools::extract_locally_relevant_dofs(). Finally, the DataOut class
+also knows how to deal with such triangulations and will simply skip
+generating graphical output on cells not locally owned.
+
+Of course, as has been noted numerous times in the discussion in step-17,
+keeping the entire triangulation on every process will not scale: large
+problems may simply not fit into each process's memory any more, even if
+we have sufficiently many processes around to solve them in a reasonable
+time. In such cases, the parallel::shared::Triangulation is no longer
+a reasonable basis for computations and we will show in step-40 how the
+parallel::distributed::Triangulation class can be used to work around
+this, namely by letting each process store only a <i>part</i> of the
+triangulation.
+
+
+<h3>Overall structure of the program</h3>
+
+The overall structure of the program can be inferred from the <code>run()</code>
+function that first calls <code>do_initial_timestep()</code> for the first time
+step, and then <code>do_timestep()</code> on all subsequent time steps. The
+difference between these functions is only that in the first time step we
+start on a coarse mesh, solve on it, refine the mesh adaptively, and then
+start again with a clean state on that new mesh. This procedure gives us a
+better starting mesh, although we should of course keep adapting the mesh as
+iterations proceed -- this isn't done in this program, but commented on below.
+
+The common part of the two functions treating time steps is the following
+sequence of operations on the present mesh:
+<ul>
+<li> <code>assemble_system ()</code> [via <code>solve_timestep ()</code>]:
+  This first function is also the most interesting one. It assembles the
+  linear system corresponding to the discretized version of equation
+  <a href="#step_18.linear-system">[linear-system]</a>. This leads to a system matrix $A_{ij} = \sum_K
+  A^K_{ij}$ built up of local contributions on each cell $K$ with entries
+  @f[
+    A^K_{ij} = (C \varepsilon(\varphi_j), \varepsilon(\varphi_i))_K;
+  @f]
+  In practice, $A^K$ is computed using numerical quadrature according to the
+  formula
+  @f[
+    A^K_{ij} = \sum_q w_q [\varepsilon(\varphi_i(\mathbf{x}_q)) : C :
+                           \varepsilon(\varphi_j(\mathbf{x}_q))],
+  @f]
+  with quadrature points $\mathbf{x}_q$ and weights $w_q$. We have built these
+  contributions before, in step-8 and step-17, but in both of these cases we
+  have done so rather clumsily by using knowledge of how the rank-4 tensor $C$
+  is composed, and considering individual elements of the strain tensors
+  $\varepsilon(\varphi_i),\varepsilon(\varphi_j)$. This is not really
+  convenient, in particular if we want to consider more complicated elasticity
+  models than the isotropic case for which $C$ had the convenient form
+  $C_{ijkl}  = \lambda \delta_{ij} \delta_{kl} + \mu (\delta_{ik} \delta_{jl}
+  + \delta_{il} \delta_{jk})$. While we in fact do not use a more complicated
+  form than this in the present program, we nevertheless want to write it in a
+  way that would easily allow for this. It is then natural to introduce
+  classes that represent symmetric tensors of rank 2 (for the strains and
+  stresses) and 4 (for the stress-strain tensor $C$). Fortunately, deal.II
+  provides these: the <code>SymmetricTensor<rank,dim></code> class template
+  provides a full-fledged implementation of such tensors of rank <code>rank</code>
+  (which needs to be an even number) and dimension <code>dim</code>.
+
+  What we then need is two things: a way to create the stress-strain rank-4
+  tensor $C$ as well as to create a symmetric tensor of rank 2 (the strain
+  tensor) from the gradients of a shape function $\varphi_i$ at a quadrature
+  point $\mathbf{x}_q$ on a given cell. At the top of the implementation of this
+  example program, you will find such functions. The first one,
+  <code>get_stress_strain_tensor</code>, takes two arguments corresponding to
+  the Lamé constants $\lambda$ and $\mu$ and returns the stress-strain tensor
+  for the isotropic case corresponding to these constants (in the program, we
+  will choose constants corresponding to steel); it would be simple to replace
+  this function by one that computes this tensor for the anisotropic case, or
+  taking into account crystal symmetries, for example. The second one,
+  <code>get_strain</code> takes an object of type <code>FEValues</code> and indices
+  $i$ and $q$ and returns the symmetric gradient, i.e. the strain,
+  corresponding to shape function $\varphi_i(\mathbf{x}_q)$, evaluated on the cell
+  on which the <code>FEValues</code> object was last reinitialized.
+
+  Given this, the innermost loop of <code>assemble_system</code> computes the
+  local contributions to the matrix in the following elegant way (the variable
+  <code>stress_strain_tensor</code>, corresponding to the tensor $C$, has
+  previously been initialized with the result of the first function above):
+  @verbatim
+for (unsigned int i=0; i<dofs_per_cell; ++i)
+  for (unsigned int j=0; j<dofs_per_cell; ++j)
+    for (unsigned int q_point=0; q_point<n_q_points;
+         ++q_point)
+      {
+        const SymmetricTensor<2,dim>
+          eps_phi_i = get_strain (fe_values, i, q_point),
+          eps_phi_j = get_strain (fe_values, j, q_point);
+
+        cell_matrix(i,j)
+          += (eps_phi_i * stress_strain_tensor * eps_phi_j
+              *
+              fe_values.JxW (q_point));
+      }
+  @endverbatim
+  It is worth noting the expressive power of this piece of code, and to
+  compare it with the complications we had to go through in previous examples
+  for the elasticity problem. (To be fair, the SymmetricTensor class
+  template did not exist when these previous examples were written.) For
+  simplicity, <code>operator*</code> provides for the (double summation) product
+  between symmetric tensors of even rank here.
+
+  Assembling the local contributions
+  @f{eqnarray*}
+      f^K_i &=&
+      (\mathbf{f}, \varphi_i)_K -(\sigma^{n-1},\varepsilon(\varphi_i))_K
+      \\
+      &\approx&
+      \sum_q
+      w_q \left\{
+        \mathbf{f}(\mathbf{x}_q) \cdot \varphi_i(\mathbf{x}_q) -
+        \sigma^{n-1}_q : \varepsilon(\varphi_i(\mathbf{x}_q))
+      \right\}
+  @f}
+  to the right hand side of <a href="#step_18.linear-system">[linear-system]</a> is equally
+  straightforward (note that we do not consider any boundary tractions $\mathbf{b}$ here). Remember that we only had to store the old stress in the
+  quadrature points of cells. In the program, we will provide a variable
+  <code>local_quadrature_points_data</code> that allows to access the stress
+  $\sigma^{n-1}_q$ in each quadrature point. With this the code for the right
+  hand side looks as this, again rather elegant:
+  @verbatim
+for (unsigned int i=0; i<dofs_per_cell; ++i)
+  {
+    const unsigned int
+      component_i = fe.system_to_component_index(i).first;
+
+    for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+      {
+        const SymmetricTensor<2,dim> &old_stress
+          = local_quadrature_points_data[q_point].old_stress;
+
+        cell_rhs(i) += (body_force_values[q_point](component_i) *
+                        fe_values.shape_value (i,q_point)
+                        -
+                        old_stress *
+                        get_strain (fe_values,i,q_point))
+                       *
+                       fe_values.JxW (q_point);
+      }
+  }
+  @endverbatim
+  Note that in the multiplication $\mathbf{f}(\mathbf{x}_q) \cdot \varphi_i(\mathbf{x}_q)$, we have made use of the fact that for the chosen finite element, only
+  one vector component (namely <code>component_i</code>) of $\varphi_i$ is
+  nonzero, and that we therefore also have to consider only one component of
+  $\mathbf{f}(\mathbf{x}_q)$.
+
+  This essentially concludes the new material we present in this function. It
+  later has to deal with boundary conditions as well as hanging node
+  constraints, but this parallels what we had to do previously in other
+  programs already.
+
+<li> <code>solve_linear_problem ()</code> [via <code>solve_timestep ()</code>]:
+  Unlike the previous one, this function is not really interesting, since it
+  does what similar functions have done in all previous tutorial programs --
+  solving the linear system using the CG method, using an incomplete LU
+  decomposition as a preconditioner (in the %parallel case, it uses an ILU of
+  each processor's block separately). It is virtually unchanged
+  from step-17.
+
+<li> <code>update_quadrature_point_history ()</code> [via
+  <code>solve_timestep ()</code>]: Based on the displacement field $\Delta \mathbf{u}^n$ computed before, we update the stress values in all quadrature points
+  according to <a href="#step_18.stress-update">[stress-update]</a> and <a href="#step_18.stress-update+rot">[stress-update+rot]</a>,
+  including the rotation of the coordinate system.
+
+<li> <code>move_mesh ()</code>: Given the solution computed before, in this
+  function we deform the mesh by moving each vertex by the displacement vector
+  field evaluated at this particular vertex.
+
+<li> <code>output_results ()</code>: This function simply outputs the solution
+  based on what we have said above, i.e. every processor computes output only
+  for its own portion of the domain. In addition to the solution, we also compute the norm of
+  the stress averaged over all the quadrature points on each cell.
+</ul>
+
+With this general structure of the code, we only have to define what case we
+want to solve. For the present program, we have chosen to simulate the
+quasistatic deformation of a vertical cylinder for which the bottom boundary
+is fixed and the top boundary is pushed down at a prescribed vertical
+velocity. However, the horizontal velocity of the top boundary is left
+unspecified -- one can imagine this situation as a well-greased plate pushing
+from the top onto the cylinder, the points on the top boundary of the cylinder
+being allowed to slide horizontally along the surface of the plate, but forced
+to move downward by the plate. The inner and outer boundaries of the cylinder
+are free and not subject to any prescribed deflection or traction. In
+addition, gravity acts on the body.
+
+The program text will reveal more about how to implement this situation, and
+the results section will show what displacement pattern comes out of this
+simulation.
+
+
+
diff --git a/examples/step-18/doc/kind b/examples/step-18/doc/kind
new file mode 100644
index 0000000..56e049c
--- /dev/null
+++ b/examples/step-18/doc/kind
@@ -0,0 +1 @@
+solids
diff --git a/examples/step-18/doc/results.dox b/examples/step-18/doc/results.dox
new file mode 100644
index 0000000..95e369d
--- /dev/null
+++ b/examples/step-18/doc/results.dox
@@ -0,0 +1,548 @@
+<h1>Results</h1>
+
+
+Running the program takes a good while if one doesn't change the flags
+in the Makefile: in debug mode (the default) and on only a single
+machine, it takes about 3h45min on my Athlon XP 2GHz. Fortunately, but
+setting <code>debug-mode = off</code> in the Makefile, this can be
+reduced significantly, to about 23 minutes, a much more reasonable time.
+
+
+
+
+If run, the program prints the following output, explaining what it is
+doing during all that time:
+ at code
+examples/\step-18> time make run
+============================ Running \step-18
+Timestep 1 at time 1
+  Cycle 0:
+    Number of active cells:       3712 (by partition: 3712)
+    Number of degrees of freedom: 17226 (by partition: 17226)
+    Assembling system... norm of rhs is 2.34224e+10
+    Solver converged in 117 iterations.
+    Updating quadrature point data...
+  Cycle 1:
+    Number of active cells:       12812 (by partition: 12812)
+    Number of degrees of freedom: 51726 (by partition: 51726)
+    Assembling system... norm of rhs is 2.34227e+10
+    Solver converged in 130 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 2 at time 2
+    Assembling system... norm of rhs is 2.30852e+10
+    Solver converged in 131 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 3 at time 3
+    Assembling system... norm of rhs is 2.27792e+10
+    Solver converged in 126 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 4 at time 4
+    Assembling system... norm of rhs is 2.25107e+10
+    Solver converged in 124 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 5 at time 5
+    Assembling system... norm of rhs is 2.22883e+10
+    Solver converged in 122 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 6 at time 6
+    Assembling system... norm of rhs is 2.21272e+10
+    Solver converged in 118 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 7 at time 7
+    Assembling system... norm of rhs is 2.20652e+10
+    Solver converged in 117 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 8 at time 8
+    Assembling system... norm of rhs is 2.22501e+10
+    Solver converged in 127 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 9 at time 9
+    Assembling system... norm of rhs is 2.32742e+10
+    Solver converged in 144 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 10 at time 10
+    Assembling system... norm of rhs is 2.55929e+10
+    Solver converged in 149 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+ at endcode
+In other words, it is computing on 12,000 cells and with some 52,000
+unknowns. Not a whole lot, but enough for a coupled three-dimensional
+problem to keep a computer busy for a while. At the end of the day,
+this is what we have for output:
+ at code
+examples/\step-18> ls -l *.d2
+-rw-r--r--  1 bangerth wheeler 8797414 May 25 09:10 solution-0001.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8788500 May 25 09:32 solution-0002.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8763718 May 25 09:55 solution-0003.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8738940 May 25 10:17 solution-0004.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8710104 May 25 10:39 solution-0005.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8685388 May 25 11:01 solution-0006.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8649088 May 25 11:23 solution-0007.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8585146 May 25 11:45 solution-0008.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8489764 May 25 12:07 solution-0009.0000.vtu
+-rw-r--r--  1 bangerth wheeler 8405388 May 25 12:29 solution-0010.0000.vtu
+ at endcode
+
+
+If we visualize these files with VisIt or Paraview, we get to see the full picture
+of the disaster our forced compression wreaks on the cylinder (colors in the
+images encode the norm of the stress in the material):
+
+
+<table width="100%">
+  <tr width="100%">
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.sequential-0002.0000.png" alt="">
+       Time = 2
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.sequential-0005.0000.png" alt="">
+       Time = 5
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.sequential-0007.0000.png" alt="">
+       Time = 7
+    </td>
+  </tr>
+
+  <tr width="100%">
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.sequential-0008.0000.png" alt="">
+      Time = 8
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.sequential-0009.0000.png" alt="">
+      Time = 9
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.sequential-0010.0000.png" alt="">
+      Time = 10
+    </td>
+  </tr>
+</table>
+
+
+As is clearly visible, as we keep compressing the cylinder, it starts
+to buckle and ultimately collapses. Towards the end of the simulation,
+the deflection pattern becomes nonsymmetric (the cylinder top slides
+to the right). The model clearly does not provide for this (all our
+forces and boundary deflections are symmetric) but the effect is
+probably physically correct anyway: in reality, small inhomogeneities
+in the body's material properties would lead it to buckle to one side
+to evade the forcing; in numerical simulations, small perturbations
+such as numerical round-off or an inexact solution of a linear system
+by an iterative solver could have the same effect. Another typical source for
+asymmetries in adaptive computations is that only a certain fraction of cells
+is refined in each step, which may lead to asymmetric meshes even if the
+original coarse mesh was symmetric.
+
+
+
+
+Whether the computation is fully converged is a different matter. In order to
+see whether it is, we ran the program again with one more global refinement at
+the beginning and with the time step halved. This would have taken a very long
+time on a single machine, so we used our cluster again and ran it on 16
+processors (8 dual-processor machines) in parallel. The beginning of the output
+now looks like this:
+ at code
+Timestep 1 at time 0.5
+  Cycle 0:
+    Number of active cells:       29696 (by partition: 1862+1890+1866+1850+1864+1850+1858+1842+1911+1851+1911+1804+1854+1816+1839+1828)
+    Number of degrees of freedom: 113100 (by partition: 7089+7218+6978+6972+7110+6840+7119+7023+7542+7203+7068+6741+6921+6759+7464+7053)
+    Assembling system... norm of rhs is 1.05874e+10
+    Solver converged in 289 iterations.
+    Updating quadrature point data...
+  Cycle 1:
+    Number of active cells:       102097 (by partition: 6346+6478+6442+6570+6370+6483+6413+6376+6403+6195+6195+6195+6494+6571+6371+6195)
+    Number of degrees of freedom: 358875 (by partition: 22257+22161+22554+22482+21759+23361+23040+21609+22347+20937+21801+21678+24126+25149+21321+22293)
+    Assembling system... norm of rhs is 3.46364e+10
+    Solver converged in 249 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 2 at time 1
+    Assembling system... norm of rhs is 3.42269e+10
+    Solver converged in 248 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 3 at time 1.5
+    Assembling system... norm of rhs is 3.38229e+10
+    Solver converged in 247 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+Timestep 4 at time 2
+    Assembling system... norm of rhs is 3.34247e+10
+    Solver converged in 247 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+
+[...]
+
+Timestep 20 at time 10
+    Assembling system... norm of rhs is 3.2449e+10
+    Solver converged in 493 iterations.
+    Updating quadrature point data...
+    Moving mesh...
+ at endcode
+That's quite a good number of unknowns, given that we are in 3d. The output of
+this program are 16 files for each time step:
+ at code
+examples/\step-18> ls -l solution-0001*
+-rw-r--r--    1 bangerth mfw       4325219 Aug 11 09:44 solution-0001.000.d2
+-rw-r--r--    1 bangerth mfw       4454460 Aug 11 09:44 solution-0001.001.d2
+-rw-r--r--    1 bangerth mfw       4485242 Aug 11 09:43 solution-0001.002.d2
+-rw-r--r--    1 bangerth mfw       4517364 Aug 11 09:43 solution-0001.003.d2
+-rw-r--r--    1 bangerth mfw       4462829 Aug 11 09:43 solution-0001.004.d2
+-rw-r--r--    1 bangerth mfw       4482487 Aug 11 09:43 solution-0001.005.d2
+-rw-r--r--    1 bangerth mfw       4548619 Aug 11 09:43 solution-0001.006.d2
+-rw-r--r--    1 bangerth mfw       4522421 Aug 11 09:43 solution-0001.007.d2
+-rw-r--r--    1 bangerth mfw       4337529 Aug 11 09:43 solution-0001.008.d2
+-rw-r--r--    1 bangerth mfw       4163047 Aug 11 09:43 solution-0001.009.d2
+-rw-r--r--    1 bangerth mfw       4288247 Aug 11 09:43 solution-0001.010.d2
+-rw-r--r--    1 bangerth mfw       4350410 Aug 11 09:43 solution-0001.011.d2
+-rw-r--r--    1 bangerth mfw       4458427 Aug 11 09:43 solution-0001.012.d2
+-rw-r--r--    1 bangerth mfw       4466037 Aug 11 09:43 solution-0001.013.d2
+-rw-r--r--    1 bangerth mfw       4505679 Aug 11 09:44 solution-0001.014.d2
+-rw-r--r--    1 bangerth mfw       4340488 Aug 11 09:44 solution-0001.015.d2
+ at endcode
+
+Here are first the mesh on which we compute as well as the partitioning
+for the 16 processors:
+
+
+<table width="100%">
+  <tr width="100%">
+    <td width="49%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-000mesh.png" alt="">
+    </td>
+
+    <td width="49%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-0002.p.png" alt="">
+    </td>
+  </tr>
+</table>
+
+
+Finally, here is the same output as we have shown before for the much smaller
+sequential case:
+
+
+<table width="100%">
+  <tr width="100%">
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-0002.s.png" alt="">
+       Time = 2
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-0005.s.png" alt="">
+       Time = 5
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-0007.s.png" alt="">
+       Time = 7
+    </td>
+  </tr>
+
+  <tr width="100%">
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-0008.s.png" alt="">
+       Time = 8
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-0009.s.png" alt="">
+       Time = 9
+    </td>
+
+    <td width="33%">
+      <img src="http://www.dealii.org/images/steps/developer/step-18.parallel-0010.s.png" alt="">
+       Time = 10
+    </td>
+  </tr>
+</table>
+
+
+If one compares this with the previous run, the results are qualitatively
+similar, but quantitatively definitely different. The previous computation was
+therefore certainly not converged, though we can't say for sure anything about
+the present one. One would need an even finer computation to find out. However,
+the point may be moot: looking at the last picture in detail, it is pretty
+obvious that not only is the linear small
+deformation model we chose completely inadequate, but for a realistic
+simulation we would also need to make sure that the body does not intersect
+itself during deformation. Without such a formulation we cannot expect anything
+to make physical sense, even if it produces nice pictures!
+
+
+<h3>Possible directions for extensions</h3>
+
+The program as is does not really solve an equation that has many applications
+in practice: quasi-static material deformation based on a purely elastic law
+is almost boring. However, the program may serve as the starting point for
+more interesting experiments, and that indeed was the initial motivation for
+writing it. Here are some suggestions of what the program is missing and in
+what direction it may be extended:
+
+<h5>Plasticity models</h5>
+
+ The most obvious extension is to use a more
+realistic material model for large-scale quasistatic deformation. The natural
+choice for this would be plasticity, in which a nonlinear relationship between
+stress and strain replaces equation <a href="#step_18.stress-strain">[stress-strain]</a>. Plasticity
+models are usually rather complicated to program since the stress-strain
+dependence is generally non-smooth. The material can be thought of being able
+to withstand only a maximal stress (the yield stress) after which it starts to
+“flow”. A mathematical description to this can be given in the form of a
+variational inequality, which alternatively can be treated as minimizing the
+elastic energy
+ at f[
+  E(\mathbf{u}) =
+  (\varepsilon(\mathbf{u}), C\varepsilon(\mathbf{u}))_{\Omega}
+  - (\mathbf{f}, \mathbf{u})_{\Omega} - (\mathbf{b}, \mathbf{u})_{\Gamma_N},
+ at f]
+subject to the constraint
+ at f[
+  f(\sigma(\mathbf{u})) \le 0
+ at f]
+on the stress. This extension makes the problem to be solved in each time step
+nonlinear, so we need another loop within each time step.
+
+Without going into further details of this model, we refer to the excellent
+book by Simo and Hughes on “Computational Inelasticity” for a
+comprehensive overview of computational strategies for solving plastic
+models. Alternatively, a brief but concise description of an algorithm for
+plasticity is given in an article by S. Commend, A. Truty, and Th. Zimmermann,
+titled “Stabilized finite elements applied to
+elastoplasticity: I. Mixed displacement-pressure formulation”
+(Computer Methods in Applied Mechanics and Engineering, vol. 193,
+pp. 3559-3586, 2004).
+
+
+<h5>Stabilization issues</h5>
+
+The formulation we have chosen, i.e. using
+piecewise (bi-, tri-)linear elements for all components of the displacement
+vector, and treating the stress as a variable dependent on the displacement is
+appropriate for most materials. However, this so-called displacement-based
+formulation becomes unstable and exhibits spurious modes for incompressible or
+nearly-incompressible materials. While fluids are usually not elastic (in most
+cases, the stress depends on velocity gradients, not displacement gradients,
+although there are exceptions such as electro-rheologic fluids), there are a
+few solids that are nearly incompressible, for example rubber. Another case is
+that many plasticity models ultimately let the material become incompressible,
+although this is outside the scope of the present program.
+
+Incompressibility is characterized by Poisson's ratio
+ at f[
+  \nu = \frac{\lambda}{2(\lambda+\mu)},
+ at f]
+where $\lambda,\mu$ are the Lam\'e constants of the material.
+Physical constraints indicate that $-1\le \nu\le \frac 12$ (the condition
+also follows from mathematical stability considerations). If $\nu$
+approaches $\frac 12$, then the material becomes incompressible. In that
+case, pure displacement-based formulations are no longer appropriate for the
+solution of such problems, and stabilization techniques have to be employed
+for a stable and accurate solution. The book and paper cited above give
+indications as to how to do this, but there is also a large volume of
+literature on this subject; a good start to get an overview of the topic can
+be found in the references of the paper by
+H.-Y. Duan and Q. Lin on “Mixed finite elements of least-squares type for
+elasticity” (Computer Methods in Applied Mechanics and Engineering, vol. 194,
+pp. 1093-1112, 2005).
+
+
+<h5>Refinement during timesteps</h5>
+
+In the present form, the program
+only refines the initial mesh a number of times, but then never again. For any
+kind of realistic simulation, one would want to extend this so that the mesh
+is refined and coarsened every few time steps instead. This is not hard to do,
+in fact, but has been left for future tutorial programs or as an exercise, if
+you wish.
+
+The main complication one has to overcome is that one has to
+transfer the data that is stored in the quadrature points of the cells of the
+old mesh to the new mesh, preferably by some sort of projection scheme. The
+general approach to this would go like this:
+
+- At the beginning, the data is only available in the quadrature points of
+  individual cells, not as a finite element field that is defined everywhere.
+
+- So let us find a finite element field that <i>is</i> defined everywhere so
+  that we can later interpolate it to the quadrature points of the new
+  mesh. In general, it will be difficult to find a continuous finite element
+  field that matches the values in the quadrature points exactly because the
+  number of degrees of freedom of these fields does not match the number of
+  quadrature points there are, and the nodal values of this global field will
+  either be over- or underdetermined. But it is usually not very difficult to
+  find a discontinuous field that matches the values in the quadrature points;
+  for example, if you have a QGauss(2) quadrature formula (i.e. 4 points per
+  cell in 2d, 8 points in 3d), then one would use a finite element of kind
+  FE_DGQ(1), i.e. bi-/tri-linear functions as these have 4 degrees of freedom
+  per cell in 2d and 8 in 3d.
+
+- There are functions that can make this conversion from individual points to
+  a global field simpler. The following piece of pseudo-code should help if
+  you use a QGauss(2) quadrature formula. Note that the multiplication by the
+  projection matrix below takes a vector of scalar components, i.e., we can only
+  convert one set of scalars at a time from the quadrature points to the degrees
+  of freedom and vice versa. So we need to store each component of stress separately,
+  which requires <code>dim*dim</code> vectors. We'll store this set of vectors in a 2D array to
+  make it easier to read off components in the same way you would the stress tensor.
+  Thus, we'll loop over the components of stress on each cell and store
+  these values in the global history field. (The prefix <code>history_</code>
+  indicates that we work with quantities related to the history variables defined
+  in the quadrature points.)
+  @code
+    FE_DGQ<dim>     history_fe (1);
+    DoFHandler<dim> history_dof_handler (triangulation);
+    history_dof_handler.distribute_dofs (history_fe);
+
+    std::vector< std::vector< Vector<double> > >
+                 history_field (dim, std::vector< Vector<double> >(dim)),
+                 local_history_values_at_qpoints (dim, std::vector< Vector<double> >(dim)),
+                 local_history_fe_values (dim, std::vector< Vector<double> >(dim));
+
+    for (unsigned int i=0; i<dim; i++)
+      for (unsigned int j=0; j<dim; j++)
+      {
+        history_field[i][j].reinit(history_dof_handler.n_dofs());
+	local_history_values_at_qpoints[i][j].reinit(quadrature.size());
+	local_history_fe_values[i][j].reinit(history_fe.dofs_per_cell);
+      }
+
+    FullMatrix<double> qpoint_to_dof_matrix (history_fe.dofs_per_cell,
+                                             quadrature.size());
+    FETools::compute_projection_from_quadrature_points_matrix
+              (history_fe,
+	       quadrature, quadrature,
+	       qpoint_to_dof_matrix);
+
+    typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active(),
+                                                   endc = dof_handler.end(),
+                                                   dg_cell = history_dof_handler.begin_active();
+
+    for (; cell!=endc; ++cell, ++dg_cell)
+      {
+
+        PointHistory<dim> *local_quadrature_points_history
+	       = reinterpret_cast<PointHistory<dim> *>(cell->user_pointer());
+
+        Assert (local_quadrature_points_history >=
+                    &quadrature_point_history.front(),
+		    ExcInternalError());
+        Assert (local_quadrature_points_history <
+                    &quadrature_point_history.back(),
+		    ExcInternalError());
+
+        for (unsigned int i=0; i<dim; i++)
+          for (unsigned int j=0; j<dim; j++)
+          {
+            for (unsigned int q=0; q<quadrature.size(); ++q)
+              local_history_values_at_qpoints[i][j](q)
+                       = local_quadrature_points_history[q].old_stress[i][j];
+
+            qpoint_to_dof_matrix.vmult (local_history_fe_values[i][j],
+                                        local_history_values_at_qpoints[i][j]);
+
+            dg_cell->set_dof_values (local_history_fe_values[i][j],
+                                     history_field[i][j]);
+          }
+      }
+  @endcode
+
+- Now that we have a global field, we can refine the mesh and transfer the
+  history_field vector as usual using the SolutionTransfer class. This will
+  interpolate everything from the old to the new mesh.
+
+- In a final step, we have to get the data back from the now interpolated
+  global field to the quadrature points on the new mesh. The following code
+  will do that:
+  @code
+    FullMatrix<double> dof_to_qpoint_matrix (quadrature.size(),
+                                             history_fe.dofs_per_cell);
+    FETools::compute_interpolation_to_quadrature_points_matrix
+              (history_fe,
+	       quadrature,
+	       dof_to_qpoint_matrix);
+
+    typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active(),
+                                                   endc = dof_handler.end(),
+                                                   dg_cell = history_dof_handler.begin_active();
+
+    for (; cell != endc; ++cell, ++dg_cell)
+    {
+      PointHistory<dim> *local_quadrature_points_history
+	     = reinterpret_cast<PointHistory<dim> *>(cell->user_pointer());
+
+      Assert (local_quadrature_points_history >=
+                  &quadrature_point_history.front(),
+		  ExcInternalError());
+      Assert (local_quadrature_points_history <
+                  &quadrature_point_history.back(),
+		  ExcInternalError());
+
+      for (unsigned int i=0; i<dim; i++)
+        for (unsigned int j=0; j<dim; j++)
+        {
+          dg_cell->get_dof_values (history_field[i][j],
+	                           local_history_fe_values[i][j]);
+
+          dof_to_qpoint_matrix.vmult (local_history_values_at_qpoints[i][j],
+                                      local_history_fe_values[i][j]);
+
+          for (unsigned int q=0; q<quadrature.size(); ++q)
+            local_quadrature_points_history[q].old_stress[i][j]
+                       = local_history_values_at_qpoints[i][j](q);
+      }
+  @endcode
+
+It becomes a bit more complicated once we run the program in parallel, since
+then each process only stores this data for the cells it owned on the old
+mesh. That said, using a parallel vector for <code>history_field</code> will
+do the trick if you put a call to <code>compress</code> after the transfer
+from quadrature points into the global vector.
+
+
+<h5>Ensuring mesh regularity</h5>
+
+At present, the program makes no attempt
+to make sure that a cell, after moving its vertices at the end of the time
+step, still has a valid geometry (i.e. that its Jacobian determinant is
+positive and bounded away from zero everywhere). It is, in fact, not very hard
+to set boundary values and forcing terms in such a way that one gets distorted
+and inverted cells rather quickly. Certainly, in some cases of large
+deformation, this is unavoidable with a mesh of finite mesh size, but in some
+other cases this should be preventable by appropriate mesh refinement and/or a
+reduction of the time step size. The program does not do that, but a more
+sophisticated version definitely should employ some sort of heuristic defining
+what amount of deformation of cells is acceptable, and what isn't.
+
diff --git a/examples/step-18/doc/tooltip b/examples/step-18/doc/tooltip
new file mode 100644
index 0000000..4e24d6b
--- /dev/null
+++ b/examples/step-18/doc/tooltip
@@ -0,0 +1 @@
+Quasistatic elasticity. More parallel computing.
diff --git a/examples/step-18/step-18.cc b/examples/step-18/step-18.cc
new file mode 100644
index 0000000..50ea691
--- /dev/null
+++ b/examples/step-18/step-18.cc
@@ -0,0 +1,1939 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2000 - 2016 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Texas at Austin, 2000, 2004, 2005,
+ * Timo Heister, 2013
+ */
+
+
+// First the usual list of header files that have already been used in
+// previous example programs:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#include <deal.II/lac/petsc_solver.h>
+#include <deal.II/lac/petsc_precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/distributed/shared_tria.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// And here the only two new things among the header files: an include file in
+// which symmetric tensors of rank 2 and 4 are implemented, as introduced in
+// the introduction:
+#include <deal.II/base/symmetric_tensor.h>
+
+// And a header that implements filters for iterators looping over all
+// cells. We will use this when selecting only those cells for output that are
+// owned by the present process in a %parallel program:
+#include <deal.II/grid/filtered_iterator.h>
+
+// This is then simply C++ again:
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <iomanip>
+
+// The last step is as in all previous programs:
+namespace Step18
+{
+  using namespace dealii;
+
+  // @sect3{The <code>PointHistory</code> class}
+
+  // As was mentioned in the introduction, we have to store the old stress in
+  // quadrature point so that we can compute the residual forces at this point
+  // during the next time step. This alone would not warrant a structure with
+  // only one member, but in more complicated applications, we would have to
+  // store more information in quadrature points as well, such as the history
+  // variables of plasticity, etc. In essence, we have to store everything
+  // that affects the present state of the material here, which in plasticity
+  // is determined by the deformation history variables.
+  //
+  // We will not give this class any meaningful functionality beyond being
+  // able to store data, i.e. there are no constructors, destructors, or other
+  // member functions. In such cases of `dumb' classes, we usually opt to
+  // declare them as <code>struct</code> rather than <code>class</code>, to
+  // indicate that they are closer to C-style structures than C++-style
+  // classes.
+  template <int dim>
+  struct PointHistory
+  {
+    SymmetricTensor<2,dim> old_stress;
+  };
+
+
+  // @sect3{The stress-strain tensor}
+
+  // Next, we define the linear relationship between the stress and the strain
+  // in elasticity. It is given by a tensor of rank 4 that is usually written
+  // in the form $C_{ijkl} = \mu (\delta_{ik} \delta_{jl} + \delta_{il}
+  // \delta_{jk}) + \lambda \delta_{ij} \delta_{kl}$. This tensor maps
+  // symmetric tensor of rank 2 to symmetric tensors of rank 2. A function
+  // implementing its creation for given values of the Lame constants $\lambda$
+  // and $\mu$ is straightforward:
+  template <int dim>
+  SymmetricTensor<4,dim>
+  get_stress_strain_tensor (const double lambda, const double mu)
+  {
+    SymmetricTensor<4,dim> tmp;
+    for (unsigned int i=0; i<dim; ++i)
+      for (unsigned int j=0; j<dim; ++j)
+        for (unsigned int k=0; k<dim; ++k)
+          for (unsigned int l=0; l<dim; ++l)
+            tmp[i][j][k][l] = (((i==k) && (j==l) ? mu : 0.0) +
+                               ((i==l) && (j==k) ? mu : 0.0) +
+                               ((i==j) && (k==l) ? lambda : 0.0));
+    return tmp;
+  }
+
+  // With this function, we will define a static member variable of the main
+  // class below that will be used throughout the program as the stress-strain
+  // tensor. Note that in more elaborate programs, this will probably be a
+  // member variable of some class instead, or a function that returns the
+  // stress-strain relationship depending on other input. For example in
+  // damage theory models, the Lame constants are considered a function of the
+  // prior stress/strain history of a point. Conversely, in plasticity the
+  // form of the stress-strain tensor is modified if the material has reached
+  // the yield stress in a certain point, and possibly also depending on its
+  // prior history.
+  //
+  // In the present program, however, we assume that the material is
+  // completely elastic and linear, and a constant stress-strain tensor is
+  // sufficient for our present purposes.
+
+
+
+  // @sect3{Auxiliary functions}
+
+  // Before the rest of the program, here are a few functions that we need as
+  // tools. These are small functions that are called in inner loops, so we
+  // mark them as <code>inline</code>.
+  //
+  // The first one computes the symmetric strain tensor for shape function
+  // <code>shape_func</code> at quadrature point <code>q_point</code> by
+  // forming the symmetric gradient of this shape function. We need that when
+  // we want to form the matrix, for example.
+  //
+  // We should note that in previous examples where we have treated
+  // vector-valued problems, we have always asked the finite element object in
+  // which of the vector component the shape function is actually non-zero,
+  // and thereby avoided to compute any terms that we could prove were zero
+  // anyway. For this, we used the <code>fe.system_to_component_index</code>
+  // function that returns in which component a shape function was zero, and
+  // also that the <code>fe_values.shape_value</code> and
+  // <code>fe_values.shape_grad</code> functions only returned the value and
+  // gradient of the single non-zero component of a shape function if this is
+  // a vector-valued element.
+  //
+  // This was an optimization, and if it isn't terribly time critical, we can
+  // get away with a simpler technique: just ask the <code>fe_values</code>
+  // for the value or gradient of a given component of a given shape function
+  // at a given quadrature point. This is what the
+  // <code>fe_values.shape_grad_component(shape_func,q_point,i)</code> call
+  // does: return the full gradient of the <code>i</code>th component of shape
+  // function <code>shape_func</code> at quadrature point
+  // <code>q_point</code>. If a certain component of a certain shape function
+  // is always zero, then this will simply always return zero.
+  //
+  // As mentioned, using <code>fe_values.shape_grad_component</code> instead
+  // of the combination of <code>fe.system_to_component_index</code> and
+  // <code>fe_values.shape_grad</code> may be less efficient, but its
+  // implementation is optimized for such cases and shouldn't be a big
+  // slowdown. We demonstrate the technique here since it is so much simpler
+  // and straightforward.
+  template <int dim>
+  inline
+  SymmetricTensor<2,dim>
+  get_strain (const FEValues<dim> &fe_values,
+              const unsigned int   shape_func,
+              const unsigned int   q_point)
+  {
+    // Declare a temporary that will hold the return value:
+    SymmetricTensor<2,dim> tmp;
+
+    // First, fill diagonal terms which are simply the derivatives in
+    // direction <code>i</code> of the <code>i</code> component of the
+    // vector-valued shape function:
+    for (unsigned int i=0; i<dim; ++i)
+      tmp[i][i] = fe_values.shape_grad_component (shape_func,q_point,i)[i];
+
+    // Then fill the rest of the strain tensor. Note that since the tensor is
+    // symmetric, we only have to compute one half (here: the upper right
+    // corner) of the off-diagonal elements, and the implementation of the
+    // <code>SymmetricTensor</code> class makes sure that at least to the
+    // outside the symmetric entries are also filled (in practice, the class
+    // of course stores only one copy). Here, we have picked the upper right
+    // half of the tensor, but the lower left one would have been just as
+    // good:
+    for (unsigned int i=0; i<dim; ++i)
+      for (unsigned int j=i+1; j<dim; ++j)
+        tmp[i][j]
+          = (fe_values.shape_grad_component (shape_func,q_point,i)[j] +
+             fe_values.shape_grad_component (shape_func,q_point,j)[i]) / 2;
+
+    return tmp;
+  }
+
+
+  // The second function does something very similar (and therefore is given
+  // the same name): compute the symmetric strain tensor from the gradient of
+  // a vector-valued field. If you already have a solution field, the
+  // <code>fe_values.get_function_gradients</code> function allows you to extract
+  // the gradients of each component of your solution field at a quadrature
+  // point. It returns this as a vector of rank-1 tensors: one rank-1 tensor
+  // (gradient) per vector component of the solution. From this we have to
+  // reconstruct the (symmetric) strain tensor by transforming the data
+  // storage format and symmetrization. We do this in the same way as above,
+  // i.e. we avoid a few computations by filling first the diagonal and then
+  // only one half of the symmetric tensor (the <code>SymmetricTensor</code>
+  // class makes sure that it is sufficient to write only one of the two
+  // symmetric components).
+  //
+  // Before we do this, though, we make sure that the input has the kind of
+  // structure we expect: that is that there are <code>dim</code> vector
+  // components, i.e. one displacement component for each coordinate
+  // direction. We test this with the <code>Assert</code> macro that will
+  // simply abort our program if the condition is not met.
+  template <int dim>
+  inline
+  SymmetricTensor<2,dim>
+  get_strain (const std::vector<Tensor<1,dim> > &grad)
+  {
+    Assert (grad.size() == dim, ExcInternalError());
+
+    SymmetricTensor<2,dim> strain;
+    for (unsigned int i=0; i<dim; ++i)
+      strain[i][i] = grad[i][i];
+
+    for (unsigned int i=0; i<dim; ++i)
+      for (unsigned int j=i+1; j<dim; ++j)
+        strain[i][j] = (grad[i][j] + grad[j][i]) / 2;
+
+    return strain;
+  }
+
+
+  // Finally, below we will need a function that computes the rotation matrix
+  // induced by a displacement at a given point. In fact, of course, the
+  // displacement at a single point only has a direction and a magnitude, it
+  // is the change in direction and magnitude that induces rotations. In
+  // effect, the rotation matrix can be computed from the gradients of a
+  // displacement, or, more specifically, from the curl.
+  //
+  // The formulas by which the rotation matrices are determined are a little
+  // awkward, especially in 3d. For 2d, there is a simpler way, so we
+  // implement this function twice, once for 2d and once for 3d, so that we
+  // can compile and use the program in both space dimensions if so desired --
+  // after all, deal.II is all about dimension independent programming and
+  // reuse of algorithm thoroughly tested with cheap computations in 2d, for
+  // the more expensive computations in 3d. Here is one case, where we have to
+  // implement different algorithms for 2d and 3d, but then can write the rest
+  // of the program in a way that is independent of the space dimension.
+  //
+  // So, without further ado to the 2d implementation:
+  Tensor<2,2>
+  get_rotation_matrix (const std::vector<Tensor<1,2> > &grad_u)
+  {
+    // First, compute the curl of the velocity field from the gradients. Note
+    // that we are in 2d, so the rotation is a scalar:
+    const double curl = (grad_u[1][0] - grad_u[0][1]);
+
+    // From this, compute the angle of rotation:
+    const double angle = std::atan (curl);
+
+    // And from this, build the antisymmetric rotation matrix:
+    const double t[2][2] = {{ cos(angle), sin(angle) },
+      {-sin(angle), cos(angle) }
+    };
+    return Tensor<2,2>(t);
+  }
+
+
+  // The 3d case is a little more contrived:
+  Tensor<2,3>
+  get_rotation_matrix (const std::vector<Tensor<1,3> > &grad_u)
+  {
+    // Again first compute the curl of the velocity field. This time, it is a
+    // real vector:
+    const Point<3> curl (grad_u[2][1] - grad_u[1][2],
+                         grad_u[0][2] - grad_u[2][0],
+                         grad_u[1][0] - grad_u[0][1]);
+
+    // From this vector, using its magnitude, compute the tangent of the angle
+    // of rotation, and from it the actual angle:
+    const double tan_angle = std::sqrt(curl*curl);
+    const double angle = std::atan (tan_angle);
+
+    // Now, here's one problem: if the angle of rotation is too small, that
+    // means that there is no rotation going on (for example a translational
+    // motion). In that case, the rotation matrix is the identity matrix.
+    //
+    // The reason why we stress that is that in this case we have that
+    // <code>tan_angle==0</code>. Further down, we need to divide by that
+    // number in the computation of the axis of rotation, and we would get
+    // into trouble when dividing doing so. Therefore, let's shortcut this and
+    // simply return the identity matrix if the angle of rotation is really
+    // small:
+    if (angle < 1e-9)
+      {
+        static const double rotation[3][3]
+        = {{ 1, 0, 0}, { 0, 1, 0 }, { 0, 0, 1 } };
+        static const Tensor<2,3> rot(rotation);
+        return rot;
+      }
+
+    // Otherwise compute the real rotation matrix. The algorithm for this is
+    // not exactly obvious, but can be found in a number of books,
+    // particularly on computer games where rotation is a very frequent
+    // operation. Online, you can find a description at
+    // http://www.makegames.com/3drotation/ and (this particular form, with
+    // the signs as here) at
+    // http://www.gamedev.net/reference/articles/article1199.asp:
+    const double c = std::cos(angle);
+    const double s = std::sin(angle);
+    const double t = 1-c;
+
+    const Point<3> axis = curl/tan_angle;
+    const double rotation[3][3]
+    = {{
+        t *axis[0] *axis[0]+c,
+        t *axis[0] *axis[1]+s *axis[2],
+        t *axis[0] *axis[2]-s *axis[1]
+      },
+      {
+        t *axis[0] *axis[1]-s *axis[2],
+        t *axis[1] *axis[1]+c,
+        t *axis[1] *axis[2]+s *axis[0]
+      },
+      {
+        t *axis[0] *axis[2]+s *axis[1],
+        t *axis[1] *axis[1]-s *axis[0],
+        t *axis[2] *axis[2]+c
+      }
+    };
+    return Tensor<2,3>(rotation);
+  }
+
+
+
+  // @sect3{The <code>TopLevel</code> class}
+
+  // This is the main class of the program. Since the namespace already
+  // indicates what problem we are solving, let's call it by what it does: it
+  // directs the flow of the program, i.e. it is the toplevel driver.
+  //
+  // The member variables of this class are essentially as before, i.e. it has
+  // to have a triangulation, a DoF handler and associated objects such as
+  // constraints, variables that describe the linear system, etc. There are a
+  // good number of more member functions now, which we will explain below.
+  //
+  // The external interface of the class, however, is unchanged: it has a
+  // public constructor and desctructor, and it has a <code>run</code>
+  // function that initiated all the work.
+  template <int dim>
+  class TopLevel
+  {
+  public:
+    TopLevel ();
+    ~TopLevel ();
+    void run ();
+
+  private:
+    // The private interface is more extensive than in step-17. First, we
+    // obviously need functions that create the initial mesh, set up the
+    // variables that describe the linear system on the present mesh
+    // (i.e. matrices and vectors), and then functions that actually assemble
+    // the system, direct what has to be solved in each time step, a function
+    // that solves the linear system that arises in each timestep (and returns
+    // the number of iterations it took), and finally output the solution
+    // vector on the correct mesh:
+    void create_coarse_grid ();
+
+    void setup_system ();
+
+    void assemble_system ();
+
+    void solve_timestep ();
+
+    unsigned int solve_linear_problem ();
+
+    void output_results () const;
+
+    // All, except for the first two, of these functions are called in each
+    // timestep. Since the first time step is a little special, we have
+    // separate functions that describe what has to happen in a timestep: one
+    // for the first, and one for all following timesteps:
+    void do_initial_timestep ();
+
+    void do_timestep ();
+
+    // Then we need a whole bunch of functions that do various things. The
+    // first one refines the initial grid: we start on the coarse grid with a
+    // pristine state, solve the problem, then look at it and refine the mesh
+    // accordingly, and start the same process over again, again with a
+    // pristine state. Thus, refining the initial mesh is somewhat simpler
+    // than refining a grid between two successive time steps, since it does
+    // not involve transferring data from the old to the new triangulation, in
+    // particular the history data that is stored in each quadrature point.
+    void refine_initial_grid ();
+
+    // At the end of each time step, we want to move the mesh vertices around
+    // according to the incremental displacement computed in this time
+    // step. This is the function in which this is done:
+    void move_mesh ();
+
+    // Next are two functions that handle the history variables stored in each
+    // quadrature point. The first one is called before the first timestep to
+    // set up a pristine state for the history variables. It only works on
+    // those quadrature points on cells that belong to the present processor:
+    void setup_quadrature_point_history ();
+
+    // The second one updates the history variables at the end of each
+    // timestep:
+    void update_quadrature_point_history ();
+
+    // This is the new shared Triangulation:
+    parallel::shared::Triangulation<dim>   triangulation;
+
+    FESystem<dim>        fe;
+
+    DoFHandler<dim>      dof_handler;
+
+    ConstraintMatrix     hanging_node_constraints;
+
+    // One difference of this program is that we declare the quadrature
+    // formula in the class declaration. The reason is that in all the other
+    // programs, it didn't do much harm if we had used different quadrature
+    // formulas when computing the matrix and the right hand side, for
+    // example. However, in the present case it does: we store information in
+    // the quadrature points, so we have to make sure all parts of the program
+    // agree on where they are and how many there are on each cell. Thus, let
+    // us first declare the quadrature formula that will be used throughout...
+    const QGauss<dim>          quadrature_formula;
+
+    // ... and then also have a vector of history objects, one per quadrature
+    // point on those cells for which we are responsible (i.e. we don't store
+    // history data for quadrature points on cells that are owned by other
+    // processors).
+    std::vector<PointHistory<dim> > quadrature_point_history;
+
+    // The way this object is accessed is through a <code>user pointer</code>
+    // that each cell, face, or edge holds: it is a <code>void*</code> pointer
+    // that can be used by application programs to associate arbitrary data to
+    // cells, faces, or edges. What the program actually does with this data
+    // is within its own responsibility, the library just allocates some space
+    // for these pointers, and application programs can set and read the
+    // pointers for each of these objects.
+
+
+    // Further: we need the objects of linear systems to be solved,
+    // i.e. matrix, right hand side vector, and the solution vector. Since we
+    // anticipate solving big problems, we use the same types as in step-17,
+    // i.e. distributed %parallel matrices and vectors built on top of the
+    // PETSc library. Conveniently, they can also be used when running on only
+    // a single machine, in which case this machine happens to be the only one
+    // in our %parallel universe.
+    //
+    // However, as a difference to step-17, we do not store the solution
+    // vector -- which here is the incremental displacements computed in each
+    // time step -- in a distributed fashion. I.e., of course it must be a
+    // distributed vector when computing it, but immediately after that we
+    // make sure each processor has a complete copy. The reason is that we had
+    // already seen in step-17 that many functions needed a complete
+    // copy. While it is not hard to get it, this requires communication on
+    // the network, and is thus slow. In addition, these were repeatedly the
+    // same operations, which is certainly undesirable unless the gains of not
+    // always having to store the entire vector outweighs it. When writing
+    // this program, it turned out that we need a complete copy of the
+    // solution in so many places that it did not seem worthwhile to only get
+    // it when necessary. Instead, we opted to obtain the complete copy once
+    // and for all, and instead get rid of the distributed copy
+    // immediately. Thus, note that the declaration of
+    // <code>inremental_displacement</code> does not denote a distribute
+    // vector as would be indicated by the middle namespace <code>MPI</code>:
+    PETScWrappers::MPI::SparseMatrix system_matrix;
+
+    PETScWrappers::MPI::Vector       system_rhs;
+
+    Vector<double>                   incremental_displacement;
+
+    // The next block of variables is then related to the time dependent
+    // nature of the problem: they denote the length of the time interval
+    // which we want to simulate, the present time and number of time step,
+    // and length of present timestep:
+    double       present_time;
+    double       present_timestep;
+    double       end_time;
+    unsigned int timestep_no;
+
+    // Then a few variables that have to do with %parallel processing: first,
+    // a variable denoting the MPI communicator we use, and then two numbers
+    // telling us how many participating processors there are, and where in
+    // this world we are. Finally, a stream object that makes sure only one
+    // processor is actually generating output to the console. This is all the
+    // same as in step-17:
+    MPI_Comm mpi_communicator;
+
+    const unsigned int n_mpi_processes;
+
+    const unsigned int this_mpi_process;
+
+    ConditionalOStream pcout;
+
+    // Here is a vector where each entry denotes the numbers of degrees of
+    // freedom that are stored on the processor with that particular number:
+    std::vector<types::global_dof_index> local_dofs_per_process;
+
+    // We are storing the locally owned and the locally relevant indices:
+    IndexSet locally_owned_dofs;
+    IndexSet locally_relevant_dofs;
+
+    // In the same direction, also cache how many cells the present processor
+    // owns. Note that the cells that belong to a processor are not
+    // necessarily contiguously numbered (when iterating over them using
+    // <code>active_cell_iterator</code>).
+    unsigned int         n_local_cells;
+
+    // Finally, we have a static variable that denotes the linear relationship
+    // between the stress and strain. Since it is a constant object that does
+    // not depend on any input (at least not in this program), we make it a
+    // static variable and will initialize it in the same place where we
+    // define the constructor of this class:
+    static const SymmetricTensor<4,dim> stress_strain_tensor;
+  };
+
+
+  // @sect3{The <code>BodyForce</code> class}
+
+  // Before we go on to the main functionality of this program, we have to
+  // define what forces will act on the body whose deformation we want to
+  // study. These may either be body forces or boundary forces. Body forces
+  // are generally mediated by one of the four basic physical types of forces:
+  // gravity, strong and weak interaction, and electromagnetism. Unless one
+  // wants to consider subatomic objects (for which quasistatic deformation is
+  // irrelevant and an inappropriate description anyway), only gravity and
+  // electromagnetic forces need to be considered. Let us, for simplicity
+  // assume that our body has a certain mass density, but is either
+  // non-magnetic and not electrically conducting or that there are no
+  // significant electromagnetic fields around. In that case, the body forces
+  // are simply <code>rho g</code>, where <code>rho</code> is the material
+  // density and <code>g</code> is a vector in negative z-direction with
+  // magnitude 9.81 m/s^2.  Both the density and <code>g</code> are defined in
+  // the function, and we take as the density 7700 kg/m^3, a value commonly
+  // assumed for steel.
+  //
+  // To be a little more general and to be able to do computations in 2d as
+  // well, we realize that the body force is always a function returning a
+  // <code>dim</code> dimensional vector. We assume that gravity acts along
+  // the negative direction of the last, i.e. <code>dim-1</code>th
+  // coordinate. The rest of the implementation of this function should be
+  // mostly self-explanatory given similar definitions in previous example
+  // programs. Note that the body force is independent of the location; to
+  // avoid compiler warnings about unused function arguments, we therefore
+  // comment out the name of the first argument of the
+  // <code>vector_value</code> function:
+  template <int dim>
+  class BodyForce :  public Function<dim>
+  {
+  public:
+    BodyForce ();
+
+    virtual
+    void
+    vector_value (const Point<dim> &p,
+                  Vector<double>   &values) const;
+
+    virtual
+    void
+    vector_value_list (const std::vector<Point<dim> > &points,
+                       std::vector<Vector<double> >   &value_list) const;
+  };
+
+
+  template <int dim>
+  BodyForce<dim>::BodyForce ()
+    :
+    Function<dim> (dim)
+  {}
+
+
+  template <int dim>
+  inline
+  void
+  BodyForce<dim>::vector_value (const Point<dim> &/*p*/,
+                                Vector<double>   &values) const
+  {
+    Assert (values.size() == dim,
+            ExcDimensionMismatch (values.size(), dim));
+
+    const double g   = 9.81;
+    const double rho = 7700;
+
+    values = 0;
+    values(dim-1) = -rho * g;
+  }
+
+
+
+  template <int dim>
+  void
+  BodyForce<dim>::vector_value_list (const std::vector<Point<dim> > &points,
+                                     std::vector<Vector<double> >   &value_list) const
+  {
+    const unsigned int n_points = points.size();
+
+    Assert (value_list.size() == n_points,
+            ExcDimensionMismatch (value_list.size(), n_points));
+
+    for (unsigned int p=0; p<n_points; ++p)
+      BodyForce<dim>::vector_value (points[p],
+                                    value_list[p]);
+  }
+
+
+
+  // @sect3{The <code>IncrementalBoundaryValue</code> class}
+
+  // In addition to body forces, movement can be induced by boundary forces
+  // and forced boundary displacement. The latter case is equivalent to forces
+  // being chosen in such a way that they induce certain displacement.
+  //
+  // For quasistatic displacement, typical boundary forces would be pressure
+  // on a body, or tangential friction against another body. We chose a
+  // somewhat simpler case here: we prescribe a certain movement of (parts of)
+  // the boundary, or at least of certain components of the displacement
+  // vector. We describe this by another vector-valued function that, for a
+  // given point on the boundary, returns the prescribed displacement.
+  //
+  // Since we have a time-dependent problem, the displacement increment of the
+  // boundary equals the displacement accumulated during the length of the
+  // timestep. The class therefore has to know both the present time and the
+  // length of the present time step, and can then approximate the incremental
+  // displacement as the present velocity times the present timestep.
+  //
+  // For the purposes of this program, we choose a simple form of boundary
+  // displacement: we displace the top boundary with constant velocity
+  // downwards. The rest of the boundary is either going to be fixed (and is
+  // then described using an object of type <code>ZeroFunction</code>) or free
+  // (Neumann-type, in which case nothing special has to be done).  The
+  // implementation of the class describing the constant downward motion
+  // should then be obvious using the knowledge we gained through all the
+  // previous example programs:
+  template <int dim>
+  class IncrementalBoundaryValues :  public Function<dim>
+  {
+  public:
+    IncrementalBoundaryValues (const double present_time,
+                               const double present_timestep);
+
+    virtual
+    void
+    vector_value (const Point<dim> &p,
+                  Vector<double>   &values) const;
+
+    virtual
+    void
+    vector_value_list (const std::vector<Point<dim> > &points,
+                       std::vector<Vector<double> >   &value_list) const;
+
+  private:
+    const double velocity;
+    const double present_time;
+    const double present_timestep;
+  };
+
+
+  template <int dim>
+  IncrementalBoundaryValues<dim>::
+  IncrementalBoundaryValues (const double present_time,
+                             const double present_timestep)
+    :
+    Function<dim> (dim),
+    velocity (.1),
+    present_time (present_time),
+    present_timestep (present_timestep)
+  {}
+
+
+  template <int dim>
+  void
+  IncrementalBoundaryValues<dim>::
+  vector_value (const Point<dim> &/*p*/,
+                Vector<double>   &values) const
+  {
+    Assert (values.size() == dim,
+            ExcDimensionMismatch (values.size(), dim));
+
+    values = 0;
+    values(2) = -present_timestep * velocity;
+  }
+
+
+
+  template <int dim>
+  void
+  IncrementalBoundaryValues<dim>::
+  vector_value_list (const std::vector<Point<dim> > &points,
+                     std::vector<Vector<double> >   &value_list) const
+  {
+    const unsigned int n_points = points.size();
+
+    Assert (value_list.size() == n_points,
+            ExcDimensionMismatch (value_list.size(), n_points));
+
+    for (unsigned int p=0; p<n_points; ++p)
+      IncrementalBoundaryValues<dim>::vector_value (points[p],
+                                                    value_list[p]);
+  }
+
+
+
+  // @sect3{Implementation of the <code>TopLevel</code> class}
+
+  // Now for the implementation of the main class. First, we initialize the
+  // stress-strain tensor, which we have declared as a static const
+  // variable. We chose Lame constants that are appropriate for steel:
+  template <int dim>
+  const SymmetricTensor<4,dim>
+  TopLevel<dim>::stress_strain_tensor
+    = get_stress_strain_tensor<dim> (/*lambda = */ 9.695e10,
+                                                   /*mu     = */ 7.617e10);
+
+
+
+  // @sect4{The public interface}
+
+  // The next step is the definition of constructors and destructors. There
+  // are no surprises here: we choose linear and continuous finite elements
+  // for each of the <code>dim</code> vector components of the solution, and a
+  // Gaussian quadrature formula with 2 points in each coordinate
+  // direction. The destructor should be obvious:
+  template <int dim>
+  TopLevel<dim>::TopLevel ()
+    :
+    triangulation(MPI_COMM_WORLD),
+    fe (FE_Q<dim>(1), dim),
+    dof_handler (triangulation),
+    quadrature_formula (2),
+    mpi_communicator (MPI_COMM_WORLD),
+    n_mpi_processes (Utilities::MPI::n_mpi_processes(mpi_communicator)),
+    this_mpi_process (Utilities::MPI::this_mpi_process(mpi_communicator)),
+    pcout (std::cout, this_mpi_process == 0)
+  {}
+
+
+
+  template <int dim>
+  TopLevel<dim>::~TopLevel ()
+  {
+    dof_handler.clear ();
+  }
+
+
+
+  // The last of the public functions is the one that directs all the work,
+  // <code>run()</code>. It initializes the variables that describe where in
+  // time we presently are, then runs the first time step, then loops over all
+  // the other time steps. Note that for simplicity we use a fixed time step,
+  // whereas a more sophisticated program would of course have to choose it in
+  // some more reasonable way adaptively:
+  template <int dim>
+  void TopLevel<dim>::run ()
+  {
+    present_time = 0;
+    present_timestep = 1;
+    end_time = 10;
+    timestep_no = 0;
+
+    do_initial_timestep ();
+
+    while (present_time < end_time)
+      do_timestep ();
+  }
+
+
+  // @sect4{TopLevel::create_coarse_grid}
+
+  // The next function in the order in which they were declared above is the
+  // one that creates the coarse grid from which we start. For this example
+  // program, we want to compute the deformation of a cylinder under axial
+  // compression. The first step therefore is to generate a mesh for a
+  // cylinder of length 3 and with inner and outer radii of 0.8 and 1,
+  // respectively. Fortunately, there is a library function for such a mesh.
+  //
+  // In a second step, we have to associated boundary conditions with the
+  // upper and lower faces of the cylinder. We choose a boundary indicator of
+  // 0 for the boundary faces that are characterized by their midpoints having
+  // z-coordinates of either 0 (bottom face), an indicator of 1 for z=3 (top
+  // face); finally, we use boundary indicator 2 for all faces on the inside
+  // of the cylinder shell, and 3 for the outside.
+  template <int dim>
+  void TopLevel<dim>::create_coarse_grid ()
+  {
+    const double inner_radius = 0.8,
+                 outer_radius = 1;
+    GridGenerator::cylinder_shell (triangulation,
+                                   3, inner_radius, outer_radius);
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell=triangulation.begin_active();
+         cell!=triangulation.end(); ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->face(f)->at_boundary())
+          {
+            const Point<dim> face_center = cell->face(f)->center();
+
+            if (face_center[2] == 0)
+              cell->face(f)->set_boundary_id (0);
+            else if (face_center[2] == 3)
+              cell->face(f)->set_boundary_id (1);
+            else if (std::sqrt(face_center[0]*face_center[0] +
+                               face_center[1]*face_center[1])
+                     <
+                     (inner_radius + outer_radius) / 2)
+              cell->face(f)->set_boundary_id (2);
+            else
+              cell->face(f)->set_boundary_id (3);
+          }
+
+    // In order to make sure that new vertices are placed correctly on
+    // mesh refinement, we have to associate objects describing those
+    // parts of the boundary that do not consist of straight
+    // parts. Corresponding to the cylinder shell generator function
+    // used above, there are classes that can be used to describe the
+    // geometry of cylinders. The library implements both boundary
+    // classes as well as manifold classes, where also the interior
+    // part of mesh is refined according to the geometrical
+    // description. For this example, we use a single cylindrical
+    // manifold both for the interior part and for the boundary
+    // parts. Note that the manifold object need to live as long as
+    // the triangulation does; we can achieve this by making the
+    // objects static, which means that they live as long as the
+    // program runs:
+    static const CylindricalManifold<dim> cylindrical_manifold (2);
+
+    // We tell the triangulation to reset all its manifold indicators
+    // to 0, and then attach the cylindrical manifold to it:
+    triangulation.set_all_manifold_ids(0);
+    triangulation.set_manifold (0, cylindrical_manifold);
+
+    // Once all this is done, we can refine the mesh once globally:
+    triangulation.refine_global (1);
+
+
+    // As the final step, we need to set up a clean state of the data that we
+    // store in the quadrature points on all cells that are treated on the
+    // present processor.
+    setup_quadrature_point_history ();
+  }
+
+
+
+
+  // @sect4{TopLevel::setup_system}
+
+  // The next function is the one that sets up the data structures for a given
+  // mesh. This is done in most the same way as in step-17: distribute the
+  // degrees of freedom, then sort these degrees of freedom in such a way that
+  // each processor gets a contiguous chunk of them. Note that subdivisions into
+  // chunks for each processor is handled in the functions that create or
+  // refine grids, unlike in the previous example program (the point where
+  // this happens is mostly a matter of taste; here, we chose to do it when
+  // grids are created since in the <code>do_initial_timestep</code> and
+  // <code>do_timestep</code> functions we want to output the number of cells
+  // on each processor at a point where we haven't called the present function
+  // yet).
+  template <int dim>
+  void TopLevel<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (fe);
+    locally_owned_dofs = dof_handler.locally_owned_dofs();
+    DoFTools::extract_locally_relevant_dofs (dof_handler,locally_relevant_dofs);
+
+    // The next thing is to store some information for later use on how many
+    // cells or degrees of freedom the present processor, or any of the
+    // processors has to work on. First the cells local to this processor...
+    n_local_cells
+      = GridTools::count_cells_with_subdomain_association (triangulation,
+                                                           triangulation.locally_owned_subdomain ());
+
+    local_dofs_per_process = dof_handler.n_locally_owned_dofs_per_processor();
+
+    // The next step is to set up constraints due to hanging nodes. This has
+    // been handled many times before:
+    hanging_node_constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             hanging_node_constraints);
+    hanging_node_constraints.close ();
+
+    // And then we have to set up the matrix. Here we deviate from step-17, in
+    // which we simply used PETSc's ability to just know about the size of the
+    // matrix and later allocate those nonzero elements that are being written
+    // to. While this works just fine from a correctness viewpoint, it is not
+    // at all efficient: if we don't give PETSc a clue as to which elements
+    // are written to, it is (at least at the time of this writing) unbearably
+    // slow when we set the elements in the matrix for the first time (i.e. in
+    // the first timestep). Later on, when the elements have been allocated,
+    // everything is much faster. In experiments we made, the first timestep
+    // can be accelerated by almost two orders of magnitude if we instruct
+    // PETSc which elements will be used and which are not.
+    //
+    // To do so, we first generate the sparsity pattern of the matrix we are
+    // going to work with, and make sure that the condensation of hanging node
+    // constraints add the necessary additional entries in the sparsity
+    // pattern:
+    DynamicSparsityPattern sparsity_pattern (locally_relevant_dofs);
+    DoFTools::make_sparsity_pattern (dof_handler, sparsity_pattern,
+                                     hanging_node_constraints, /*keep constrained dofs*/ false);
+    SparsityTools::distribute_sparsity_pattern (sparsity_pattern,
+                                                local_dofs_per_process,
+                                                mpi_communicator,
+                                                locally_relevant_dofs);
+    // Note that we have used the <code>DynamicSparsityPattern</code> class
+    // here that was already introduced in step-11, rather than the
+    // <code>SparsityPattern</code> class that we have used in all other
+    // cases. The reason for this is that for the latter class to work we have
+    // to give an initial upper bound for the number of entries in each row, a
+    // task that is traditionally done by
+    // <code>DoFHandler::max_couplings_between_dofs()</code>. However, this
+    // function suffers from a serious problem: it has to compute an upper
+    // bound to the number of nonzero entries in each row, and this is a
+    // rather complicated task, in particular in 3d. In effect, while it is
+    // quite accurate in 2d, it often comes up with much too large a number in
+    // 3d, and in that case the <code>SparsityPattern</code> allocates much
+    // too much memory at first, often several 100 MBs. This is later
+    // corrected when <code>DoFTools::make_sparsity_pattern</code> is called
+    // and we realize that we don't need all that much memory, but at time it
+    // is already too late: for large problems, the temporary allocation of
+    // too much memory can lead to out-of-memory situations.
+    //
+    // In order to avoid this, we resort to the
+    // <code>DynamicSparsityPattern</code> class that is slower but does
+    // not require any up-front estimate on the number of nonzero entries per
+    // row. It therefore only ever allocates as much memory as it needs at any
+    // given time, and we can build it even for large 3d problems.
+    //
+    // It is also worth noting that due to the specifics of parallel::shared::Triangulation,
+    // the sparsity pattern we construct is
+    // global, i.e. comprises all degrees of freedom whether they will be
+    // owned by the processor we are on or another one (in case this program
+    // is run in %parallel via MPI). This of course is not optimal -- it
+    // limits the size of the problems we can solve, since storing the entire
+    // sparsity pattern (even if only for a short time) on each processor does
+    // not scale well. However, there are several more places in the program
+    // in which we do this, for example we always keep the global
+    // triangulation and DoF handler objects around, even if we only work on
+    // part of them. At present, deal.II does not have the necessary
+    // facilities to completely distribute these objects (a task that, indeed,
+    // is very hard to achieve with adaptive meshes, since well-balanced
+    // subdivisions of a domain tend to become unbalanced as the mesh is
+    // adaptively refined).
+    //
+    // With this data structure, we can then go to the PETSc sparse matrix and
+    // tell it to preallocate all the entries we will later want to write to:
+    system_matrix.reinit (locally_owned_dofs,
+                          locally_owned_dofs,
+                          sparsity_pattern,
+                          mpi_communicator);
+    // After this point, no further explicit knowledge of the sparsity pattern
+    // is required any more and we can let the <code>sparsity_pattern</code>
+    // variable go out of scope without any problem.
+
+    // The last task in this function is then only to reset the right hand
+    // side vector as well as the solution vector to its correct size;
+    // remember that the solution vector is a local one, unlike the right hand
+    // side that is a distributed %parallel one and therefore needs to know
+    // the MPI communicator over which it is supposed to transmit messages:
+    system_rhs.reinit(locally_owned_dofs,mpi_communicator);
+    incremental_displacement.reinit (dof_handler.n_dofs());
+  }
+
+
+
+  // @sect4{TopLevel::assemble_system}
+
+  // Again, assembling the system matrix and right hand side follows the same
+  // structure as in many example programs before. In particular, it is mostly
+  // equivalent to step-17, except for the different right hand side that now
+  // only has to take into account internal stresses. In addition, assembling
+  // the matrix is made significantly more transparent by using the
+  // <code>SymmetricTensor</code> class: note the elegance of forming the
+  // scalar products of symmetric tensors of rank 2 and 4. The implementation
+  // is also more general since it is independent of the fact that we may or
+  // may not be using an isotropic elasticity tensor.
+  //
+  // The first part of the assembly routine is as always:
+  template <int dim>
+  void TopLevel<dim>::assemble_system ()
+  {
+    system_rhs = 0;
+    system_matrix = 0;
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values   | update_gradients |
+                             update_quadrature_points | update_JxW_values);
+
+    const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int   n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       cell_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    BodyForce<dim>      body_force;
+    std::vector<Vector<double> > body_force_values (n_q_points,
+                                                    Vector<double>(dim));
+
+    // As in step-17, we only need to loop over all cells that belong to the
+    // present processor:
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          cell_matrix = 0;
+          cell_rhs = 0;
+
+          fe_values.reinit (cell);
+
+          // Then loop over all indices i,j and quadrature points and assemble
+          // the system matrix contributions from this cell.  Note how we
+          // extract the symmetric gradients (strains) of the shape functions
+          // at a given quadrature point from the <code>FEValues</code>
+          // object, and the elegance with which we form the triple
+          // contraction <code>eps_phi_i : C : eps_phi_j</code>; the latter
+          // needs to be compared to the clumsy computations needed in
+          // step-17, both in the introduction as well as in the respective
+          // place in the program:
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              for (unsigned int q_point=0; q_point<n_q_points;
+                   ++q_point)
+                {
+                  const SymmetricTensor<2,dim>
+                  eps_phi_i = get_strain (fe_values, i, q_point),
+                  eps_phi_j = get_strain (fe_values, j, q_point);
+
+                  cell_matrix(i,j)
+                  += (eps_phi_i * stress_strain_tensor * eps_phi_j
+                      *
+                      fe_values.JxW (q_point));
+                }
+
+
+          // Then also assemble the local right hand side contributions. For
+          // this, we need to access the prior stress value in this quadrature
+          // point. To get it, we use the user pointer of this cell that
+          // points into the global array to the quadrature point data
+          // corresponding to the first quadrature point of the present cell,
+          // and then add an offset corresponding to the index of the
+          // quadrature point we presently consider:
+          const PointHistory<dim> *local_quadrature_points_data
+            = reinterpret_cast<PointHistory<dim>*>(cell->user_pointer());
+          // In addition, we need the values of the external body forces at
+          // the quadrature points on this cell:
+          body_force.vector_value_list (fe_values.get_quadrature_points(),
+                                        body_force_values);
+          // Then we can loop over all degrees of freedom on this cell and
+          // compute local contributions to the right hand side:
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const unsigned int
+              component_i = fe.system_to_component_index(i).first;
+
+              for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+                {
+                  const SymmetricTensor<2,dim> &old_stress
+                    = local_quadrature_points_data[q_point].old_stress;
+
+                  cell_rhs(i) += (body_force_values[q_point](component_i) *
+                                  fe_values.shape_value (i,q_point)
+                                  -
+                                  old_stress *
+                                  get_strain (fe_values,i,q_point))
+                                 *
+                                 fe_values.JxW (q_point);
+                }
+            }
+
+          // Now that we have the local contributions to the linear system, we
+          // need to transfer it into the global objects. This is done exactly
+          // as in step-17:
+          cell->get_dof_indices (local_dof_indices);
+
+          hanging_node_constraints
+          .distribute_local_to_global (cell_matrix, cell_rhs,
+                                       local_dof_indices,
+                                       system_matrix, system_rhs);
+        }
+
+    // Now compress the vector and the system matrix:
+    system_matrix.compress(VectorOperation::add);
+    system_rhs.compress(VectorOperation::add);
+
+
+    // The last step is to again fix up boundary values, just as we already
+    // did in previous programs. A slight complication is that the
+    // <code>apply_boundary_values</code> function wants to have a solution
+    // vector compatible with the matrix and right hand side (i.e. here a
+    // distributed %parallel vector, rather than the sequential vector we use
+    // in this program) in order to preset the entries of the solution vector
+    // with the correct boundary values. We provide such a compatible vector
+    // in the form of a temporary vector which we then copy into the
+    // sequential one.
+
+    // We make up for this complication by showing how boundary values can be
+    // used flexibly: following the way we create the triangulation, there are
+    // three distinct boundary indicators used to describe the domain,
+    // corresponding to the bottom and top faces, as well as the inner/outer
+    // surfaces. We would like to impose boundary conditions of the following
+    // type: The inner and outer cylinder surfaces are free of external
+    // forces, a fact that corresponds to natural (Neumann-type) boundary
+    // conditions for which we don't have to do anything. At the bottom, we
+    // want no movement at all, corresponding to the cylinder being clamped or
+    // cemented in at this part of the boundary. At the top, however, we want
+    // a prescribed vertical downward motion compressing the cylinder; in
+    // addition, we only want to restrict the vertical movement, but not the
+    // horizontal ones -- one can think of this situation as a well-greased
+    // plate sitting on top of the cylinder pushing it downwards: the atoms of
+    // the cylinder are forced to move downward, but they are free to slide
+    // horizontally along the plate.
+
+    // The way to describe this is as follows: for boundary indicator zero
+    // (bottom face) we use a dim-dimensional zero function representing no
+    // motion in any coordinate direction. For the boundary with indicator 1
+    // (top surface), we use the <code>IncrementalBoundaryValues</code> class,
+    // but we specify an additional argument to the
+    // <code>VectorTools::interpolate_boundary_values</code> function denoting
+    // which vector components it should apply to; this is a vector of bools
+    // for each vector component and because we only want to restrict vertical
+    // motion, it has only its last component set:
+    FEValuesExtractors::Scalar z_component (dim-1);
+    std::map<types::global_dof_index,double> boundary_values;
+    VectorTools::
+    interpolate_boundary_values (dof_handler,
+                                 0,
+                                 ZeroFunction<dim> (dim),
+                                 boundary_values);
+    VectorTools::
+    interpolate_boundary_values (dof_handler,
+                                 1,
+                                 IncrementalBoundaryValues<dim>(present_time,
+                                                                present_timestep),
+                                 boundary_values,
+                                 fe.component_mask(z_component));
+
+    PETScWrappers::MPI::Vector tmp (locally_owned_dofs,mpi_communicator);
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix, tmp,
+                                        system_rhs, false);
+    incremental_displacement = tmp;
+  }
+
+
+
+  // @sect4{TopLevel::solve_timestep}
+
+  // The next function is the one that controls what all has to happen within
+  // a timestep. The order of things should be relatively self-explanatory
+  // from the function names:
+  template <int dim>
+  void TopLevel<dim>::solve_timestep ()
+  {
+    pcout << "    Assembling system..." << std::flush;
+    assemble_system ();
+    pcout << " norm of rhs is " << system_rhs.l2_norm()
+          << std::endl;
+
+    const unsigned int n_iterations = solve_linear_problem ();
+
+    pcout << "    Solver converged in " << n_iterations
+          << " iterations." << std::endl;
+
+    pcout << "    Updating quadrature point data..." << std::flush;
+    update_quadrature_point_history ();
+    pcout << std::endl;
+  }
+
+
+
+  // @sect4{TopLevel::solve_linear_problem}
+
+  // Solving the linear system again works mostly as before. The only
+  // difference is that we want to only keep a complete local copy of the
+  // solution vector instead of the distributed one that we get as output from
+  // PETSc's solver routines. To this end, we declare a local temporary
+  // variable for the distributed vector and initialize it with the contents
+  // of the local variable (remember that the
+  // <code>apply_boundary_values</code> function called in
+  // <code>assemble_system</code> preset the values of boundary nodes in this
+  // vector), solve with it, and at the end of the function copy it again into
+  // the complete local vector that we declared as a member variable. Hanging
+  // node constraints are then distributed only on the local copy,
+  // i.e. independently of each other on each of the processors:
+  template <int dim>
+  unsigned int TopLevel<dim>::solve_linear_problem ()
+  {
+    PETScWrappers::MPI::Vector
+    distributed_incremental_displacement (locally_owned_dofs,mpi_communicator);
+    distributed_incremental_displacement = incremental_displacement;
+
+    SolverControl           solver_control (dof_handler.n_dofs(),
+                                            1e-16*system_rhs.l2_norm());
+    PETScWrappers::SolverCG cg (solver_control,
+                                mpi_communicator);
+
+    PETScWrappers::PreconditionBlockJacobi preconditioner(system_matrix);
+
+    cg.solve (system_matrix, distributed_incremental_displacement, system_rhs,
+              preconditioner);
+
+    incremental_displacement = distributed_incremental_displacement;
+
+    hanging_node_constraints.distribute (incremental_displacement);
+
+    return solver_control.last_step();
+  }
+
+
+
+  // @sect4{TopLevel::output_results}
+
+  // This function generates the graphical output in .vtu format as explained
+  // in the introduction. Each process will only work on the cells it owns,
+  // and then write the result into a file of its own. Additionally, processor
+  // 0 will write the record files the reference all the .vtu files.
+  //
+  // The crucial part of this function is to give the <code>DataOut</code>
+  // class a way to only work on the cells that the present process owns.
+
+  template <int dim>
+  void TopLevel<dim>::output_results () const
+  {
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+
+    // Then, just as in step-17, define the names of solution variables (which
+    // here are the displacement increments) and queue the solution vector for
+    // output. Note in the following switch how we make sure that if the space
+    // dimension should be unhandled that we throw an exception saying that we
+    // haven't implemented this case yet (another case of defensive
+    // programming):
+    std::vector<std::string> solution_names;
+    switch (dim)
+      {
+      case 1:
+        solution_names.push_back ("delta_x");
+        break;
+      case 2:
+        solution_names.push_back ("delta_x");
+        solution_names.push_back ("delta_y");
+        break;
+      case 3:
+        solution_names.push_back ("delta_x");
+        solution_names.push_back ("delta_y");
+        solution_names.push_back ("delta_z");
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    data_out.add_data_vector (incremental_displacement,
+                              solution_names);
+
+
+    // The next thing is that we wanted to output something like the average
+    // norm of the stresses that we have stored in each cell. This may seem
+    // complicated, since on the present processor we only store the stresses
+    // in quadrature points on those cells that actually belong to the present
+    // process. In other words, it seems as if we can't compute the average
+    // stresses for all cells. However, remember that our class derived from
+    // <code>DataOut</code> only iterates over those cells that actually do
+    // belong to the present processor, i.e. we don't have to compute anything
+    // for all the other cells as this information would not be touched. The
+    // following little loop does this. We enclose the entire block into a
+    // pair of braces to make sure that the iterator variables do not remain
+    // accidentally visible beyond the end of the block in which they are
+    // used:
+    Vector<double> norm_of_stress (triangulation.n_active_cells());
+    {
+      // Loop over all the cells...
+      typename Triangulation<dim>::active_cell_iterator
+      cell = triangulation.begin_active(),
+      endc = triangulation.end();
+      for (; cell!=endc; ++cell)
+        if (cell->is_locally_owned())
+          {
+            // On these cells, add up the stresses over all quadrature
+            // points...
+            SymmetricTensor<2,dim> accumulated_stress;
+            for (unsigned int q=0;
+                 q<quadrature_formula.size();
+                 ++q)
+              accumulated_stress +=
+                reinterpret_cast<PointHistory<dim>*>(cell->user_pointer())[q]
+                .old_stress;
+
+            // ...then write the norm of the average to their destination:
+            norm_of_stress(cell->active_cell_index())
+              = (accumulated_stress /
+                 quadrature_formula.size()).norm();
+          }
+      // And on the cells that we are not interested in, set the respective
+      // value in the vector to a bogus value (norms must be positive, and a
+      // large negative value should catch your eye) in order to make sure
+      // that if we were somehow wrong about our assumption that these
+      // elements would not appear in the output file, that we would find out
+      // by looking at the graphical output:
+        else
+          norm_of_stress(cell->active_cell_index()) = -1e+20;
+    }
+    // Finally attach this vector as well to be treated for output:
+    data_out.add_data_vector (norm_of_stress, "norm_of_stress");
+
+    // As a last piece of data, let us also add the partitioning of the domain
+    // into subdomains associated with the processors if this is a parallel
+    // job. This works in the exact same way as in the step-17 program:
+    std::vector<types::subdomain_id> partition_int (triangulation.n_active_cells());
+    GridTools::get_subdomain_association (triangulation, partition_int);
+    const Vector<double> partitioning(partition_int.begin(),
+                                      partition_int.end());
+    data_out.add_data_vector (partitioning, "partitioning");
+
+    // Finally, with all this data, we can instruct deal.II to munge the
+    // information and produce some intermediate data structures that contain
+    // all these solution and other data vectors:
+    data_out.build_patches ();
+
+
+    // Let us determine the name of the file we will want to write it to. We
+    // compose it of the prefix <code>solution-</code>, followed by the time
+    // step number, and finally the processor id (encoded as a three digit
+    // number):
+    std::string filename = "solution-" + Utilities::int_to_string(timestep_no,4)
+                           + "." + Utilities::int_to_string(this_mpi_process,3)
+                           + ".vtu";
+
+    // The following assertion makes sure that there are less than 1000
+    // processes (a very conservative check, but worth having anyway) as our
+    // scheme of generating process numbers would overflow if there were 1000
+    // processes or more. Note that we choose to use <code>AssertThrow</code>
+    // rather than <code>Assert</code> since the number of processes is a
+    // variable that depends on input files or the way the process is started,
+    // rather than static assumptions in the program code. Therefore, it is
+    // inappropriate to use <code>Assert</code> that is optimized away in
+    // optimized mode, whereas here we actually can assume that users will run
+    // the largest computations with the most processors in optimized mode,
+    // and we should check our assumptions in this particular case, and not
+    // only when running in debug mode:
+    AssertThrow (n_mpi_processes < 1000, ExcNotImplemented());
+
+    // With the so-completed filename, let us open a file and write the data
+    // we have generated into it:
+    std::ofstream output (filename.c_str());
+    data_out.write_vtu (output);
+
+    // The record files must be written only once and not by each processor,
+    // so we do this on processor 0:
+    if (this_mpi_process==0)
+      {
+        // Here we collect all filenames of the current timestep (same format as above)
+        std::vector<std::string> filenames;
+        for (unsigned int i=0; i<n_mpi_processes; ++i)
+          filenames.push_back ("solution-" + Utilities::int_to_string(timestep_no,4)
+                               + "." + Utilities::int_to_string(i,3)
+                               + ".vtu");
+
+        // Now we write the .visit file. The naming is similar to the .vtu files, only
+        // that the file obviously doesn't contain a processor id.
+        const std::string
+        visit_master_filename = ("solution-" +
+                                 Utilities::int_to_string(timestep_no,4) +
+                                 ".visit");
+        std::ofstream visit_master (visit_master_filename.c_str());
+        data_out.write_visit_record (visit_master, filenames);
+
+        // Similarly, we write the paraview .pvtu:
+        const std::string
+        pvtu_master_filename = ("solution-" +
+                                Utilities::int_to_string(timestep_no,4) +
+                                ".pvtu");
+        std::ofstream pvtu_master (pvtu_master_filename.c_str());
+        data_out.write_pvtu_record (pvtu_master, filenames);
+
+        // Finally, we write the paraview record, that references all .pvtu files and
+        // their respective time. Note that the variable times_and_names is declared
+        // static, so it will retain the entries from the pervious timesteps.
+        static std::vector<std::pair<double,std::string> > times_and_names;
+        times_and_names.push_back (std::pair<double,std::string> (present_time, pvtu_master_filename));
+        std::ofstream pvd_output ("solution.pvd");
+        data_out.write_pvd_record (pvd_output, times_and_names);
+      }
+
+  }
+
+
+
+  // @sect4{TopLevel::do_initial_timestep}
+
+  // This and the next function handle the overall structure of the first and
+  // following timesteps, respectively. The first timestep is slightly more
+  // involved because we want to compute it multiple times on successively
+  // refined meshes, each time starting from a clean state. At the end of
+  // these computations, in which we compute the incremental displacements
+  // each time, we use the last results obtained for the incremental
+  // displacements to compute the resulting stress updates and move the mesh
+  // accordingly. On this new mesh, we then output the solution and any
+  // additional data we consider important.
+  //
+  // All this is interspersed by generating output to the console to update
+  // the person watching the screen on what is going on. As in step-17, the
+  // use of <code>pcout</code> instead of <code>std::cout</code> makes sure
+  // that only one of the parallel processes is actually writing to the
+  // console, without having to explicitly code an if-statement in each place
+  // where we generate output:
+  template <int dim>
+  void TopLevel<dim>::do_initial_timestep ()
+  {
+    present_time += present_timestep;
+    ++timestep_no;
+    pcout << "Timestep " << timestep_no << " at time " << present_time
+          << std::endl;
+
+    for (unsigned int cycle=0; cycle<2; ++cycle)
+      {
+        pcout << "  Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          create_coarse_grid ();
+        else
+          refine_initial_grid ();
+
+        pcout << "    Number of active cells:       "
+              << triangulation.n_active_cells()
+              << " (by partition:";
+        for (unsigned int p=0; p<n_mpi_processes; ++p)
+          pcout << (p==0 ? ' ' : '+')
+                << (GridTools::
+                    count_cells_with_subdomain_association (triangulation,p));
+        pcout << ")" << std::endl;
+
+        setup_system ();
+
+        pcout << "    Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << " (by partition:";
+        for (unsigned int p=0; p<n_mpi_processes; ++p)
+          pcout << (p==0 ? ' ' : '+')
+                << (DoFTools::
+                    count_dofs_with_subdomain_association (dof_handler,p));
+        pcout << ")" << std::endl;
+
+        solve_timestep ();
+      }
+
+    move_mesh ();
+    output_results ();
+
+    pcout << std::endl;
+  }
+
+
+
+  // @sect4{TopLevel::do_timestep}
+
+  // Subsequent timesteps are simpler, and probably do not require any more
+  // documentation given the explanations for the previous function above:
+  template <int dim>
+  void TopLevel<dim>::do_timestep ()
+  {
+    present_time += present_timestep;
+    ++timestep_no;
+    pcout << "Timestep " << timestep_no << " at time " << present_time
+          << std::endl;
+    if (present_time > end_time)
+      {
+        present_timestep -= (present_time - end_time);
+        present_time = end_time;
+      }
+
+
+    solve_timestep ();
+
+    move_mesh ();
+    output_results ();
+
+    pcout << std::endl;
+  }
+
+
+  // @sect4{TopLevel::refine_initial_grid}
+
+  // The following function is called when solving the first time step on
+  // successively refined meshes. After each iteration, it computes a
+  // refinement criterion, refines the mesh, and sets up the history variables
+  // in each quadrature point again to a clean state.
+  template <int dim>
+  void TopLevel<dim>::refine_initial_grid ()
+  {
+    // First, let each process compute error indicators for the cells it owns:
+    Vector<float> error_per_cell (triangulation.n_active_cells());
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(2),
+                                        typename FunctionMap<dim>::type(),
+                                        incremental_displacement,
+                                        error_per_cell,
+                                        ComponentMask(),
+                                        0,
+                                        MultithreadInfo::n_threads(),
+                                        this_mpi_process);
+
+    // Then set up a global vector into which we merge the local indicators
+    // from each of the %parallel processes:
+    const unsigned int n_local_cells = triangulation.n_locally_owned_active_cells ();
+
+    PETScWrappers::MPI::Vector
+    distributed_error_per_cell (mpi_communicator,
+                                triangulation.n_active_cells(),
+                                n_local_cells);
+
+    for (unsigned int i=0; i<error_per_cell.size(); ++i)
+      if (error_per_cell(i) != 0)
+        distributed_error_per_cell(i) = error_per_cell(i);
+    distributed_error_per_cell.compress (VectorOperation::insert);
+
+    // Once we have that, copy it back into local copies on all processors and
+    // refine the mesh accordingly:
+    error_per_cell = distributed_error_per_cell;
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     error_per_cell,
+                                                     0.35, 0.03);
+    triangulation.execute_coarsening_and_refinement ();
+
+    // Finally, set up quadrature point data again on the new mesh, and only
+    // on those cells that we have determined to be ours:
+    setup_quadrature_point_history ();
+  }
+
+
+
+  // @sect4{TopLevel::move_mesh}
+
+  // At the end of each time step, we move the nodes of the mesh according to
+  // the incremental displacements computed in this time step. To do this, we
+  // keep a vector of flags that indicate for each vertex whether we have
+  // already moved it around, and then loop over all cells and move those
+  // vertices of the cell that have not been moved yet. It is worth noting
+  // that it does not matter from which of the cells adjacent to a vertex we
+  // move this vertex: since we compute the displacement using a continuous
+  // finite element, the displacement field is continuous as well and we can
+  // compute the displacement of a given vertex from each of the adjacent
+  // cells. We only have to make sure that we move each node exactly once,
+  // which is why we keep the vector of flags.
+  //
+  // There are two noteworthy things in this function. First, how we get the
+  // displacement field at a given vertex using the
+  // <code>cell-@>vertex_dof_index(v,d)</code> function that returns the index
+  // of the <code>d</code>th degree of freedom at vertex <code>v</code> of the
+  // given cell. In the present case, displacement in the k-th coordinate
+  // direction corresponds to the k-th component of the finite element. Using a
+  // function like this bears a certain risk, because it uses knowledge of the
+  // order of elements that we have taken together for this program in the
+  // <code>FESystem</code> element. If we decided to add an additional
+  // variable, for example a pressure variable for stabilization, and happened
+  // to insert it as the first variable of the element, then the computation
+  // below will start to produce nonsensical results. In addition, this
+  // computation rests on other assumptions: first, that the element we use
+  // has, indeed, degrees of freedom that are associated with vertices. This
+  // is indeed the case for the present Q1 element, as would be for all Qp
+  // elements of polynomial order <code>p</code>. However, it would not hold
+  // for discontinuous elements, or elements for mixed formulations. Secondly,
+  // it also rests on the assumption that the displacement at a vertex is
+  // determined solely by the value of the degree of freedom associated with
+  // this vertex; in other words, all shape functions corresponding to other
+  // degrees of freedom are zero at this particular vertex. Again, this is the
+  // case for the present element, but is not so for all elements that are
+  // presently available in deal.II. Despite its risks, we choose to use this
+  // way in order to present a way to query individual degrees of freedom
+  // associated with vertices.
+  //
+  // In this context, it is instructive to point out what a more general way
+  // would be. For general finite elements, the way to go would be to take a
+  // quadrature formula with the quadrature points in the vertices of a
+  // cell. The <code>QTrapez</code> formula for the trapezoidal rule does
+  // exactly this. With this quadrature formula, we would then initialize an
+  // <code>FEValues</code> object in each cell, and use the
+  // <code>FEValues::get_function_values</code> function to obtain the values
+  // of the solution function in the quadrature points, i.e. the vertices of
+  // the cell. These are the only values that we really need, i.e. we are not
+  // at all interested in the weights (or the <code>JxW</code> values)
+  // associated with this particular quadrature formula, and this can be
+  // specified as the last argument in the constructor to
+  // <code>FEValues</code>. The only point of minor inconvenience in this
+  // scheme is that we have to figure out which quadrature point corresponds
+  // to the vertex we consider at present, as they may or may not be ordered
+  // in the same order.
+  //
+  // This inconvenience could be avoided if finite elements have support
+  // points on vertices (which the one here has; for the concept of support
+  // points, see @ref GlossSupport "support points"). For such a case, one
+  // could construct a custom quadrature rule using
+  // FiniteElement::get_unit_support_points(). The first
+  // <code>GeometryInfo@<dim@>::vertices_per_cell*fe.dofs_per_vertex</code>
+  // quadrature points will then correspond to the vertices of the cell and
+  // are ordered consistent with <code>cell-@>vertex(i)</code>, taking into
+  // account that support points for vector elements will be duplicated
+  // <code>fe.dofs_per_vertex</code> times.
+  //
+  // Another point worth explaining about this short function is the way in
+  // which the triangulation class exports information about its vertices:
+  // through the <code>Triangulation::n_vertices</code> function, it
+  // advertises how many vertices there are in the triangulation. Not all of
+  // them are actually in use all the time -- some are left-overs from cells
+  // that have been coarsened previously and remain in existence since deal.II
+  // never changes the number of a vertex once it has come into existence,
+  // even if vertices with lower number go away. Secondly, the location
+  // returned by <code>cell-@>vertex(v)</code> is not only a read-only object
+  // of type <code>Point@<dim@></code>, but in fact a reference that can be
+  // written to. This allows to move around the nodes of a mesh with relative
+  // ease, but it is worth pointing out that it is the responsibility of an
+  // application program using this feature to make sure that the resulting
+  // cells are still useful, i.e. are not distorted so much that the cell is
+  // degenerated (indicated, for example, by negative Jacobians). Note that we
+  // do not have any provisions in this function to actually ensure this, we
+  // just have faith.
+  //
+  // After this lengthy introduction, here are the full 20 or so lines of
+  // code:
+  template <int dim>
+  void TopLevel<dim>::move_mesh ()
+  {
+    pcout << "    Moving mesh..." << std::endl;
+
+    std::vector<bool> vertex_touched (triangulation.n_vertices(),
+                                      false);
+    for (typename DoFHandler<dim>::active_cell_iterator
+         cell = dof_handler.begin_active ();
+         cell != dof_handler.end(); ++cell)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        if (vertex_touched[cell->vertex_index(v)] == false)
+          {
+            vertex_touched[cell->vertex_index(v)] = true;
+
+            Point<dim> vertex_displacement;
+            for (unsigned int d=0; d<dim; ++d)
+              vertex_displacement[d]
+                = incremental_displacement(cell->vertex_dof_index(v,d));
+
+            cell->vertex(v) += vertex_displacement;
+          }
+  }
+
+
+  // @sect4{TopLevel::setup_quadrature_point_history}
+
+  // At the beginning of our computations, we needed to set up initial values
+  // of the history variables, such as the existing stresses in the material,
+  // that we store in each quadrature point. As mentioned above, we use the
+  // <code>user_pointer</code> for this that is available in each cell.
+  //
+  // To put this into larger perspective, we note that if we had previously
+  // available stresses in our model (which we assume do not exist for the
+  // purpose of this program), then we would need to interpolate the field of
+  // preexisting stresses to the quadrature points. Likewise, if we were to
+  // simulate elasto-plastic materials with hardening/softening, then we would
+  // have to store additional history variables like the present yield stress
+  // of the accumulated plastic strains in each quadrature
+  // points. Pre-existing hardening or weakening would then be implemented by
+  // interpolating these variables in the present function as well.
+  template <int dim>
+  void TopLevel<dim>::setup_quadrature_point_history ()
+  {
+    // What we need to do here is to first count how many quadrature points
+    // are within the responsibility of this processor. This, of course,
+    // equals the number of cells that belong to this processor times the
+    // number of quadrature points our quadrature formula has on each cell.
+    //
+    // For good measure, we also set all user pointers of all cells, whether
+    // ours of not, to the null pointer. This way, if we ever access the user
+    // pointer of a cell which we should not have accessed, a segmentation
+    // fault will let us know that this should not have happened:
+    unsigned int our_cells = 0;
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      if (cell->is_locally_owned())
+        ++our_cells;
+
+    triangulation.clear_user_data();
+
+    // Next, allocate as many quadrature objects as we need. Since the
+    // <code>resize</code> function does not actually shrink the amount of
+    // allocated memory if the requested new size is smaller than the old
+    // size, we resort to a trick to first free all memory, and then
+    // reallocate it: we declare an empty vector as a temporary variable and
+    // then swap the contents of the old vector and this temporary
+    // variable. This makes sure that the
+    // <code>quadrature_point_history</code> is now really empty, and we can
+    // let the temporary variable that now holds the previous contents of the
+    // vector go out of scope and be destroyed. In the next step. we can then
+    // re-allocate as many elements as we need, with the vector
+    // default-initializing the <code>PointHistory</code> objects, which
+    // includes setting the stress variables to zero.
+    {
+      std::vector<PointHistory<dim> > tmp;
+      tmp.swap (quadrature_point_history);
+    }
+    quadrature_point_history.resize (our_cells *
+                                     quadrature_formula.size());
+
+    // Finally loop over all cells again and set the user pointers from the
+    // cells that belong to the present processor to point to the first
+    // quadrature point objects corresponding to this cell in the vector of
+    // such objects:
+    unsigned int history_index = 0;
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      if (cell->is_locally_owned())
+        {
+          cell->set_user_pointer (&quadrature_point_history[history_index]);
+          history_index += quadrature_formula.size();
+        }
+
+    // At the end, for good measure make sure that our count of elements was
+    // correct and that we have both used up all objects we allocated
+    // previously, and not point to any objects beyond the end of the
+    // vector. Such defensive programming strategies are always good checks to
+    // avoid accidental errors and to guard against future changes to this
+    // function that forget to update all uses of a variable at the same
+    // time. Recall that constructs using the <code>Assert</code> macro are
+    // optimized away in optimized mode, so do not affect the run time of
+    // optimized runs:
+    Assert (history_index == quadrature_point_history.size(),
+            ExcInternalError());
+  }
+
+
+
+
+  // @sect4{TopLevel::update_quadrature_point_history}
+
+  // At the end of each time step, we should have computed an incremental
+  // displacement update so that the material in its new configuration
+  // accommodates for the difference between the external body and boundary
+  // forces applied during this time step minus the forces exerted through
+  // preexisting internal stresses. In order to have the preexisting
+  // stresses available at the next time step, we therefore have to update the
+  // preexisting stresses with the stresses due to the incremental
+  // displacement computed during the present time step. Ideally, the
+  // resulting sum of internal stresses would exactly counter all external
+  // forces. Indeed, a simple experiment can make sure that this is so: if we
+  // choose boundary conditions and body forces to be time independent, then
+  // the forcing terms (the sum of external forces and internal stresses)
+  // should be exactly zero. If you make this experiment, you will realize
+  // from the output of the norm of the right hand side in each time step that
+  // this is almost the case: it is not exactly zero, since in the first time
+  // step the incremental displacement and stress updates were computed
+  // relative to the undeformed mesh, which was then deformed. In the second
+  // time step, we again compute displacement and stress updates, but this
+  // time in the deformed mesh -- there, the resulting updates are very small
+  // but not quite zero. This can be iterated, and in each such iteration the
+  // residual, i.e. the norm of the right hand side vector, is reduced; if one
+  // makes this little experiment, one realizes that the norm of this residual
+  // decays exponentially with the number of iterations, and after an initial
+  // very rapid decline is reduced by roughly a factor of about 3.5 in each
+  // iteration (for one testcase I looked at, other testcases, and other
+  // numbers of unknowns change the factor, but not the exponential decay).
+
+  // In a sense, this can then be considered as a quasi-timestepping scheme to
+  // resolve the nonlinear problem of solving large-deformation elasticity on
+  // a mesh that is moved along in a Lagrangian manner.
+  //
+  // Another complication is that the existing (old) stresses are defined on
+  // the old mesh, which we will move around after updating the stresses. If
+  // this mesh update involves rotations of the cell, then we need to also
+  // rotate the updated stress, since it was computed relative to the
+  // coordinate system of the old cell.
+  //
+  // Thus, what we need is the following: on each cell which the present
+  // processor owns, we need to extract the old stress from the data stored
+  // with each quadrature point, compute the stress update, add the two
+  // together, and then rotate the result together with the incremental
+  // rotation computed from the incremental displacement at the present
+  // quadrature point. We will detail these steps below:
+  template <int dim>
+  void TopLevel<dim>::update_quadrature_point_history ()
+  {
+    // First, set up an <code>FEValues</code> object by which we will evaluate
+    // the incremental displacements and the gradients thereof at the
+    // quadrature points, together with a vector that will hold this
+    // information:
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values | update_gradients);
+    std::vector<std::vector<Tensor<1,dim> > >
+    displacement_increment_grads (quadrature_formula.size(),
+                                  std::vector<Tensor<1,dim> >(dim));
+
+    // Then loop over all cells and do the job in the cells that belong to our
+    // subdomain:
+    for (typename DoFHandler<dim>::active_cell_iterator
+         cell = dof_handler.begin_active();
+         cell != dof_handler.end(); ++cell)
+      if (cell->is_locally_owned())
+        {
+          // Next, get a pointer to the quadrature point history data local to
+          // the present cell, and, as a defensive measure, make sure that
+          // this pointer is within the bounds of the global array:
+          PointHistory<dim> *local_quadrature_points_history
+            = reinterpret_cast<PointHistory<dim> *>(cell->user_pointer());
+          Assert (local_quadrature_points_history >=
+                  &quadrature_point_history.front(),
+                  ExcInternalError());
+          Assert (local_quadrature_points_history <
+                  &quadrature_point_history.back(),
+                  ExcInternalError());
+
+          // Then initialize the <code>FEValues</code> object on the present
+          // cell, and extract the gradients of the displacement at the
+          // quadrature points for later computation of the strains
+          fe_values.reinit (cell);
+          fe_values.get_function_gradients (incremental_displacement,
+                                            displacement_increment_grads);
+
+          // Then loop over the quadrature points of this cell:
+          for (unsigned int q=0; q<quadrature_formula.size(); ++q)
+            {
+              // On each quadrature point, compute the strain increment from
+              // the gradients, and multiply it by the stress-strain tensor to
+              // get the stress update. Then add this update to the already
+              // existing strain at this point:
+              const SymmetricTensor<2,dim> new_stress
+                = (local_quadrature_points_history[q].old_stress
+                   +
+                   (stress_strain_tensor *
+                    get_strain (displacement_increment_grads[q])));
+
+              // Finally, we have to rotate the result. For this, we first
+              // have to compute a rotation matrix at the present quadrature
+              // point from the incremental displacements. In fact, it can be
+              // computed from the gradients, and we already have a function
+              // for that purpose:
+              const Tensor<2,dim> rotation
+                = get_rotation_matrix (displacement_increment_grads[q]);
+              // Note that the result, a rotation matrix, is in general an
+              // antisymmetric tensor of rank 2, so we must store it as a full
+              // tensor.
+
+              // With this rotation matrix, we can compute the rotated tensor
+              // by contraction from the left and right, after we expand the
+              // symmetric tensor <code>new_stress</code> into a full tensor:
+              const SymmetricTensor<2,dim> rotated_new_stress
+                = symmetrize(transpose(rotation) *
+                             static_cast<Tensor<2,dim> >(new_stress) *
+                             rotation);
+              // Note that while the result of the multiplication of these
+              // three matrices should be symmetric, it is not due to floating
+              // point round off: we get an asymmetry on the order of 1e-16 of
+              // the off-diagonal elements of the result. When assigning the
+              // result to a <code>SymmetricTensor</code>, the constructor of
+              // that class checks the symmetry and realizes that it isn't
+              // exactly symmetric; it will then raise an exception. To avoid
+              // that, we explicitly symmetrize the result to make it exactly
+              // symmetric.
+
+              // The result of all these operations is then written back into
+              // the original place:
+              local_quadrature_points_history[q].old_stress
+                = rotated_new_stress;
+            }
+        }
+  }
+
+  // This ends the project specific namespace <code>Step18</code>. The rest is
+  // as usual and as already shown in step-17: A <code>main()</code> function
+  // that initializes and terminates PETSc, calls the classes that do the
+  // actual work, and makes sure that we catch all exceptions that propagate
+  // up to this point:
+}
+
+
+int main (int argc, char **argv)
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step18;
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
+
+      TopLevel<3> elastic_problem;
+      elastic_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-19/CMakeLists.txt b/examples/step-19/CMakeLists.txt
new file mode 100644
index 0000000..b4e525a
--- /dev/null
+++ b/examples/step-19/CMakeLists.txt
@@ -0,0 +1,42 @@
+##
+#  CMake script for the step-19 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-19")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# No sensible run target can be defined for this example step
+SET(TARGET_RUN "")
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-19/doc/builds-on b/examples/step-19/doc/builds-on
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/examples/step-19/doc/builds-on
@@ -0,0 +1 @@
+
diff --git a/examples/step-19/doc/intro.dox b/examples/step-19/doc/intro.dox
new file mode 100644
index 0000000..e4a598c
--- /dev/null
+++ b/examples/step-19/doc/intro.dox
@@ -0,0 +1,121 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+In step-18, we saw a need to write
+output files in an intermediate format: in a %parallel program, it doesn't scale
+well if all processors participate in computing a result, and then only a
+single processor generates the graphical output. Rather, each of them should
+generate output for its share of the domain, and later on merge all these
+output files into a single one.
+
+
+
+Thus was the beginning of step-19: it is the program that reads a number of
+files written in intermediate format, and merges and converts them into the
+final format that one would like to use for visualization. It can also be used
+for the following purpose: if you are unsure at the time of a computation what
+graphics program you would like to use, write your results in intermediate
+format; it can later be converted, using the present program, to any other
+format you may want.
+
+
+
+While this in itself was not interesting enough to make a tutorial program, we
+have used the opportunity to introduce one class that has proven to be
+extremely help- and useful in real application programs, but had not been
+covered by any of the previous tutorial programs: the
+<code>ParameterHandler</code> class. This class is used in applications that
+want to have some of their behavior determined at run time, using input
+files. For example, one may want to specify the geometry, or specifics of the
+equation to be solved, at run time. Other typical parameters are the number of
+nonlinear iterations, the name of output files, or the names of input files
+specifying material properties or boundary conditions.
+
+
+
+Working with such parameter files is not rocket science. However, it is rather
+tedious to write the parsers for such files, in particular if they should be
+extensible, be able to group parameters into subsections, perform some error
+checks such as that parameters can have only certain kinds of values (for
+example, it should only be allowed to have integer values in an input file for
+parameters that denote a number of iteration), and similar requirements. The
+<code>ParameterHandler</code> class allows for all this: an application program
+will declare the parameters it expects (or call a function in the library that
+declares a number of parameters for you), the <code>ParameterHandler</code>
+class then reads an input file with all these parameters, and the application
+program can then get their values back from this class.
+
+
+
+In order to perform these three steps, the <code>ParameterHandler</code> offers
+three sets of functions: first, the
+<code>ParameterHandler::declare_entry</code> function is used to declare the
+existence of a named parameter in the present section of the input file (one
+can enter and leave subsections in a parameter file just like you would
+navigate through a directory tree in a file system, with the functions
+<code>ParameterHandler::enter_subsection</code> and
+<code>ParameterHandler::leave_subsection</code> taking on the roles of the
+commands <code>cd dir</code> and <code>cd ..</code>; the only difference being
+that if you enter a subsection that has never been visited before, it is
+created: it isn't necessary to "create" subsections explicitly). When declaring
+a parameter, one has to specify its name and default value, in case the
+parameter isn't later listed explicitly in the parameter file. In addition to
+that, there are optional arguments indicating a pattern that a parameter has to
+satisfy, such as being an integer (see the discussion above), and a help text
+that might later give an explanation of what the parameter stands for.
+
+
+
+Once all parameters have been declared, parameters can be read, using the
+<code>ParameterHandler::read_input</code> family of functions. There are
+versions of this function that can read from a file stream, that take a file
+name, or that simply take a string and parse it. When reading parameters, the
+class makes sure that only parameters are listed in the input that have been
+declared before, and that the values of parameters satisfy the pattern that has
+been given to describe the kind of values a parameter can have. Input that uses
+undeclared parameters or values for parameters that do not conform to the
+pattern are rejected by raising an exception.
+
+
+
+A typical input file will look like this:
+ at code
+set Output format  = dx
+set Output file    = my_output_file.dx
+
+set Maximal number of iterations = 13
+
+subsection Application
+  set Color of output = blue
+  set Generate output = false
+end
+ at endcode
+Note that subsections can be nested.
+
+
+
+Finally, the application program can get the values of declared parameters back
+by traversing the subsections of the parameter tree and using the
+<code>ParameterHandler::get</code> and related functions. The
+<code>ParameterHandler::get</code> simply returns the value of a parameter as a
+string, whereas <code>ParameterHandler::get_integer</code>,
+<code>ParameterHandler::get_double</code>, and
+<code>ParameterHandler::get_bool</code> already convert them to the indicated
+type.
+
+
+
+Using the <code>ParameterHandler</code> class therefore provides for a pretty
+flexible mechanism to handle all sorts of moderately complex input files without
+much effort on the side of the application programmer. We will use this to
+provide all sorts of options to the step-19 program in order to convert from
+intermediate file format to any other graphical file format.
+
+
+
+The rest of the story is probably best told by looking at the source of step-19
+itself. Let us, however, end this introduction by pointing the reader at the
+extensive class documentation of the <code>ParameterHandler</code> class for
+more information on specific details of that class.
+
diff --git a/examples/step-19/doc/kind b/examples/step-19/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-19/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-19/doc/results.dox b/examples/step-19/doc/results.dox
new file mode 100644
index 0000000..39fe363
--- /dev/null
+++ b/examples/step-19/doc/results.dox
@@ -0,0 +1,259 @@
+<h1>Results</h1>
+
+
+With all that above, here is first what we get if we just run the program
+without any parameters at all:
+ at code
+examples/\step-19> ./\step-19
+
+Converter from deal.II intermediate format to other graphics formats.
+
+Usage:
+    ./\step-19 [-p parameter_file] list_of_input_files
+              [-x output_format] [-o output_file]
+
+Parameter sequences in brackets can be omitted if a parameter file is
+specified on the command line and if it provides values for these
+missing parameters.
+
+The parameter file has the following format and allows the following
+values (you can cut and paste this and use it for your own parameter
+file):
+
+# Listing of Parameters
+# ---------------------
+# A dummy parameter asking for an integer
+set Dummy iterations = 42
+
+# The name of the output file to be generated
+set Output file      =
+
+# A name for the output format to be used
+set Output format    = gnuplot
+
+
+subsection DX output parameters
+  # A Boolean field indicating whether neighborship information between cells
+  # is to be written to the OpenDX output file
+  set Write neighbors = true
+end
+
+
+subsection Dummy subsection
+  # A dummy parameter that shows how one can define a parameter that can be
+  # assigned values from a finite set of values
+  set Dummy color of output = red
+
+  # A dummy parameter that can be fed with either 'true' or 'false'
+  set Dummy generate output = true
+end
+
+
+subsection Eps output parameters
+  # Angle of the viewing position against the vertical axis
+  set Azimut angle                        = 60
+
+  # Name of a color function used to colorize mesh lines and/or cell
+  # interiors
+  set Color function                      = default
+
+  # Whether the interior of cells shall be shaded
+  set Color shading of interior of cells  = true
+
+  # Whether the mesh lines, or only the surface should be drawn
+  set Draw mesh lines                     = true
+
+  # Whether only the mesh lines, or also the interior of cells should be
+  # plotted. If this flag is false, then one can see through the mesh
+  set Fill interior of cells              = true
+
+  # Number of the input vector that is to be used to generate color
+  # information
+  set Index of vector for color           = 0
+
+  # Number of the input vector that is to be used to generate height
+  # information
+  set Index of vector for height          = 0
+
+  # The width in which the postscript renderer is to plot lines
+  set Line widths in eps units            = 0.5
+
+  # Whether width or height should be scaled to match the given size
+  set Scale to width or height            = width
+
+  # Scaling for the z-direction relative to the scaling used in x- and
+  # y-directions
+  set Scaling for z-axis                  = 1
+
+  # The size (width or height) to which the eps output file is to be scaled
+  set Size (width or height) in eps units = 300
+
+  # Angle of the viewing direction against the y-axis
+  set Turn angle                          = 30
+end
+
+subsection Povray output parameters
+  # Whether camera and lighting information should be put into an external
+  # file "data.inc" or into the POVRAY input file
+  set Include external file = true
+
+  # Whether POVRAY should use bicubic patches
+  set Use bicubic patches   = false
+
+  # A flag indicating whether POVRAY should use smoothed triangles instead of
+  # the usual ones
+  set Use smooth triangles  = false
+end
+
+
+subsection UCD output parameters
+  # A flag indicating whether a comment should be written to the beginning of
+  # the output file indicating date and time of creation as well as the
+  # creating program
+  set Write preamble = true
+end
+ at endcode
+
+That's a lot of output for such a little program, but then that's also a lot of
+output formats that deal.II supports. You will realize that the output consists
+of first entries in the top-level section (sorted alphabetically), then a
+sorted list of subsections. Most of the parameters have been declared by the
+<code>DataOutBase</code> class, but there are also the dummy entries and
+sections we have added in the <code>declare_parameters()</code> function, along
+with their default values and documentations.
+
+
+
+Let us try to run this program on a set of input files generated by a modified
+step-18 run on 32 nodes of a
+cluster. The computation was rather big, with more
+than 350,000 cells and some 1.2M unknowns. That makes for 32 rather big
+intermediate files that we will try to merge using the present program. Here is
+the list of files, totaling some 245MB of data:
+ at code
+examples/\step-19> ls -l *d2
+-rw-r--r--  1 bangerth wheeler 7982085 Aug 12 10:11 solution-0005.0000-000.d2
+-rw-r--r--  1 bangerth wheeler 7888316 Aug 12 10:13 solution-0005.0000-001.d2
+-rw-r--r--  1 bangerth wheeler 7715984 Aug 12 10:09 solution-0005.0000-002.d2
+-rw-r--r--  1 bangerth wheeler 7887648 Aug 12 10:06 solution-0005.0000-003.d2
+-rw-r--r--  1 bangerth wheeler 7833291 Aug 12 10:09 solution-0005.0000-004.d2
+-rw-r--r--  1 bangerth wheeler 7536394 Aug 12 10:07 solution-0005.0000-005.d2
+-rw-r--r--  1 bangerth wheeler 7817551 Aug 12 10:06 solution-0005.0000-006.d2
+-rw-r--r--  1 bangerth wheeler 7996660 Aug 12 10:07 solution-0005.0000-007.d2
+-rw-r--r--  1 bangerth wheeler 7761545 Aug 12 10:06 solution-0005.0000-008.d2
+-rw-r--r--  1 bangerth wheeler 7754027 Aug 12 10:07 solution-0005.0000-009.d2
+-rw-r--r--  1 bangerth wheeler 7607545 Aug 12 10:11 solution-0005.0000-010.d2
+-rw-r--r--  1 bangerth wheeler 7728039 Aug 12 10:07 solution-0005.0000-011.d2
+-rw-r--r--  1 bangerth wheeler 7577293 Aug 12 10:14 solution-0005.0000-012.d2
+-rw-r--r--  1 bangerth wheeler 7735626 Aug 12 10:10 solution-0005.0000-013.d2
+-rw-r--r--  1 bangerth wheeler 7629075 Aug 12 10:10 solution-0005.0000-014.d2
+-rw-r--r--  1 bangerth wheeler 7314459 Aug 12 10:09 solution-0005.0000-015.d2
+-rw-r--r--  1 bangerth wheeler 7414738 Aug 12 10:10 solution-0005.0000-016.d2
+-rw-r--r--  1 bangerth wheeler 7330518 Aug 12 10:05 solution-0005.0000-017.d2
+-rw-r--r--  1 bangerth wheeler 7418213 Aug 12 10:11 solution-0005.0000-018.d2
+-rw-r--r--  1 bangerth wheeler 7508715 Aug 12 10:08 solution-0005.0000-019.d2
+-rw-r--r--  1 bangerth wheeler 7747143 Aug 12 10:06 solution-0005.0000-020.d2
+-rw-r--r--  1 bangerth wheeler 7563548 Aug 12 10:05 solution-0005.0000-021.d2
+-rw-r--r--  1 bangerth wheeler 7846767 Aug 12 10:12 solution-0005.0000-022.d2
+-rw-r--r--  1 bangerth wheeler 7479576 Aug 12 10:12 solution-0005.0000-023.d2
+-rw-r--r--  1 bangerth wheeler 7925060 Aug 12 10:12 solution-0005.0000-024.d2
+-rw-r--r--  1 bangerth wheeler 7842034 Aug 12 10:13 solution-0005.0000-025.d2
+-rw-r--r--  1 bangerth wheeler 7585448 Aug 12 10:13 solution-0005.0000-026.d2
+-rw-r--r--  1 bangerth wheeler 7609698 Aug 12 10:10 solution-0005.0000-027.d2
+-rw-r--r--  1 bangerth wheeler 7576053 Aug 12 10:08 solution-0005.0000-028.d2
+-rw-r--r--  1 bangerth wheeler 7682418 Aug 12 10:08 solution-0005.0000-029.d2
+-rw-r--r--  1 bangerth wheeler 7544141 Aug 12 10:05 solution-0005.0000-030.d2
+-rw-r--r--  1 bangerth wheeler 7348899 Aug 12 10:04 solution-0005.0000-031.d2
+ at endcode
+
+So let's see what happens if we attempt to merge all these files into a single
+one:
+ at code
+examples/\step-19> time ./\step-19 solution-0005.0000-*.d2 -x gmv -o solution-0005.gmv
+real    2m08.35s
+user    1m26.61s
+system  0m05.74s
+
+examples/\step-19> ls -l solution-0005.gmv
+-rw-r--r--  1 bangerth wheeler 240680494 Sep  9 11:53 solution-0005.gmv
+ at endcode
+So in roughly two minutes we have merged 240MB of data. Counting reading and
+writing, that averages a throughput of 3.8MB per second, not so bad.
+
+
+
+If visualized, the output looks very much like that shown for
+step-18. But that's not quite as
+important for the moment, rather we are interested in showing how to use the
+parameter file. To this end, remember that if no parameter file is given, or if
+it is empty, all the default values listed above are used. However, whatever we
+specify in the parameter file is used, unless overridden again by
+parameters found later on the command line.
+
+
+
+For example, let us use a simple parameter file named
+<code>solution-0005.prm</code> that contains only one line:
+ at code
+set Output format  = gnuplot
+ at endcode
+If we run step-19 with it again, we obtain this (for simplicity, and because we
+don't want to visualize 240MB of data anyway, we only convert the one, the
+twelfth, intermediate file to gnuplot format):
+ at code
+examples/\step-19> ./\step-19 solution-0005.0000-012.d2 -p solution-0005.prm -o solution-0005.gnuplot
+
+examples/\step-19> ls -l solution-0005.gnuplot
+-rw-r--r--  1 bangerth wheeler 20281669 Sep  9 12:15 solution-0005.gnuplot
+ at endcode
+
+We can then visualize this one file with gnuplot, obtaining something like
+this:
+<img src="http://www.dealii.org/images/steps/developer/step-19.solution-0005.png" alt="">
+
+That's not particularly exciting, but the file we're looking at has only one
+32nd of the entire domain anyway, so we can't expect much.
+
+In more complicated situations, we would use parameter files that set more of
+the values to non-default values. A file for which this is the case could look
+like this, generating output for the OpenDX visualization program:
+ at code
+set Output format  = dx
+set Output file    = my_output_file.dx
+
+set Dummy iterations = -13
+
+subsection Dummy subsection
+  set Dummy color of output = blue
+  set Dummy generate output = false
+end
+ at endcode
+If one wanted to, one could write comments into the file using the
+same format as used above in the help text, i.e. everything on a line
+following a hashmark (<code>#</code>) is considered a comment.
+
+
+
+If one runs step-19 with this input file, this is what is going to happen:
+ at code
+examples/\step-19> ./\step-19 solution-0005.0000-012.d2 -p solution-0005.prm
+Line 4:
+    The entry value
+        -13
+    for the entry named
+        Dummy iterations
+    does not match the given pattern
+        [Integer range 1...1000 (inclusive)]
+ at endcode
+Ah, right: valid values for the iteration parameter needed to be within the
+range [1...1000]. We would fix that, then go back to run the program with
+correct parameters.
+
+
+
+This program should have given some insight into the input parameter file
+handling that deal.II provides. The <code>ParameterHandler</code> class has a
+few more goodies beyond what has been shown in this program, for those who want
+to use this class, it would be useful to read the documentation of that class
+to get the full picture.
+
diff --git a/examples/step-19/doc/tooltip b/examples/step-19/doc/tooltip
new file mode 100644
index 0000000..dbb26e4
--- /dev/null
+++ b/examples/step-19/doc/tooltip
@@ -0,0 +1 @@
+Handling input parameter files. Converting output formats.
diff --git a/examples/step-19/step-19.cc b/examples/step-19/step-19.cc
new file mode 100644
index 0000000..4490222
--- /dev/null
+++ b/examples/step-19/step-19.cc
@@ -0,0 +1,534 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2005 - 2014 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Luca Heltai, Wolfgang Bangerth, 2005
+ */
+
+
+// @sect4{Preliminaries}
+
+// As usual, we start with include files. This program is content with really
+// few of these -- we only need two files from the library (one for input and
+// output of graphical data, one for parameter handling), and a few C++
+// standard headers:
+#include <deal.II/base/data_out_base.h>
+#include <deal.II/base/parameter_handler.h>
+
+#include <list>
+#include <iostream>
+#include <fstream>
+
+// As mentioned in the first few tutorial programs, all names in deal.II are
+// declared in a namespace <code>dealii</code>. To make using these function
+// and class names simpler, we import the entire content of that namespace
+// into the global scope. As done for all previous programs already, we'll
+// also place everything we do here into a namespace of its own:
+namespace Step19
+{
+  using namespace dealii;
+
+  // Before we start with the actual program, let us declare a few global
+  // variables that will be used to hold the parameters this program is going
+  // to use. Usually, global variables are frowned upon for a good reason, but
+  // since we have such a short program here that does only a single thing, we
+  // may stray from our usual line and make these variables global, rather
+  // than passing them around to all functions or encapsulating them into a
+  // class.
+  //
+  // The variables we have are: first, an object that will hold parameters of
+  // operation, such as output format (unless given on the command line);
+  // second, the names of input and output files; and third, the format in
+  // which the output is to be written:
+  ParameterHandler         prm;
+  std::vector<std::string> input_file_names;
+  std::string              output_file;
+  std::string              output_format;
+
+
+  // All the stuff this program does can be done from here on. As described in
+  // the introduction, what we have to do is declare what values the parameter
+  // file can have, parse the command line, read the input files, then write
+  // the output. We will do this in this order of operation, but before that
+  // let us declare a function that prints a message about how this program is
+  // to be used; the function first prints a general message, and then goes on
+  // to list the parameters that are allowed in the parameter file (the
+  // <code>ParameterHandler</code> class has a function to do exactly this;
+  // see the results section for what it prints):
+  void
+  print_usage_message ()
+  {
+    static const char *message
+      =
+        "\n"
+        "Converter from deal.II intermediate format to other graphics formats.\n"
+        "\n"
+        "Usage:\n"
+        "    ./step-19 [-p parameter_file] list_of_input_files \n"
+        "              [-x output_format] [-o output_file]\n"
+        "\n"
+        "Parameter sequences in brackets can be omitted if a parameter file is\n"
+        "specified on the command line and if it provides values for these\n"
+        "missing parameters.\n"
+        "\n"
+        "The parameter file has the following format and allows the following\n"
+        "values (you can cut and paste this and use it for your own parameter\n"
+        "file):\n"
+        "\n";
+    std::cout << message;
+
+    prm.print_parameters (std::cout, ParameterHandler::Text);
+  }
+
+
+  // @sect4{Declaring parameters for the input file}
+
+  // The second function is used to declare the parameters this program
+  // accepts from the input file. While we don't actually take many parameters
+  // from the input file except for, possibly, the output file name and
+  // format, we nevertheless want to show how to work with parameter files.
+  //
+  // In short, the <code>ParameterHandler</code> class works as follows: one
+  // declares the entries of parameters that can be given in input files
+  // together, and later on one can read an input file in which these
+  // parameters are set to their values. If a parameter is not listed in the
+  // input file, the default value specified in the declaration of that
+  // parameter is used. After that, the program can query the values assigned
+  // to certain parameters from the <code>ParameterHandler</code> object.
+  //
+  // Declaring parameters can be done using the
+  // <code>ParameterHandler::declare_entry</code> function. It's arguments are
+  // the name of a parameter, a default value (given as a string, even if the
+  // parameter is numeric in nature, and thirdly an object that describes
+  // constraints on values that may be passed to this parameter. In the
+  // example below, we use an object of type <code>Patterns::Anything</code>
+  // to denote that there are no constraints on file names (this is, of
+  // course, not true -- the operating system does have constraints, but from
+  // an application standpoint, almost all names are valid). In other cases,
+  // one may, for example, use <code>Patterns::Integer</code> to make sure
+  // that only parameters are accepted that can be interpreted as integer
+  // values (it is also possible to specify bounds for integer values, and all
+  // values outside this range are rejected), <code>Patterns::Double</code>
+  // for floating point values, classes that make sure that the given
+  // parameter value is a comma separated list of things, etc. Take a look at
+  // the <code>Patterns</code> namespace to see what is possible.
+  //
+  // The fourth argument to <code>declare_entry</code> is a help string that
+  // can be printed to document what this parameter is meant to be used for
+  // and other information you may consider important when declaring this
+  // parameter. The default value of this fourth argument is the empty string.
+  //
+  // I always wanted to have an example program describing the
+  // <code>ParameterHandler</code> class, because it is so particularly
+  // useful. It would have been useful in a number of previous example
+  // programs (for example, in order to let the tolerance for linear solvers,
+  // or the number of refinement steps be determined by a run-time parameter,
+  // rather than hard-coding them into the program), but it turned out that
+  // trying to explain this class there would have overloaded them with things
+  // that would have distracted from the main purpose. However, while writing
+  // this program, I realized that there aren't all that many parameters this
+  // program can usefully ask for, or better, it turned out: declaring and
+  // querying these parameters was already done centralized in one place of
+  // the library, namely the <code>DataOutInterface</code> class that handles
+  // exactly this -- managing parameters for input and output.
+  //
+  // So the second function call in this function is to let the
+  // <code>DataOutInterface</code> declare a good number of parameters that
+  // control everything from the output format to what kind of output should
+  // be generated if output is written in a specific graphical format. For
+  // example, when writing data in encapsulated postscript (EPS) format, the
+  // result is just a 2d projection, not data that can be viewed and rotated
+  // with a viewer. Therefore, one has to choose the viewing angle and a
+  // number of other options up front, when output is generated, rather than
+  // playing around with them later on. The call to
+  // <code>DataOutInterface::declare_parameters</code> declares entries that
+  // allow to specify them in the parameter input file during run-time. If the
+  // parameter file does not contain entries for them, defaults are taken.
+  //
+  // As a final note: <code>DataOutInterface</code> is a template, because it
+  // is usually used to write output for a specific space dimension. However,
+  // this program is supposed to be used for all dimensions at the same time,
+  // so we don't know at compile time what the right dimension is when
+  // specifying the template parameter. Fortunately, declaring parameters is
+  // something that is space dimension independent, so we can just pick one
+  // arbitrarily. We pick <code>1</code>, but it could have been any other
+  // number as well.
+  void declare_parameters ()
+  {
+    prm.declare_entry ("Output file", "",
+                       Patterns::Anything(),
+                       "The name of the output file to be generated");
+
+    DataOutInterface<1>::declare_parameters (prm);
+
+    // Since everything that this program can usefully request in terms of
+    // input parameters is already handled by now, let us nevertheless show
+    // how to use input parameters in other circumstances. First, parameters
+    // are like files in a directory tree: they can be in the top-level
+    // directory, but you can also group them into subdirectories to make it
+    // easier to find them or to be able to use the same parameter name in
+    // different contexts.
+    //
+    // Let us first declare a dummy parameter in the top-level section; we
+    // assume that it will denote the number of iterations, and that useful
+    // numbers of iterations that a user should be able to specify are in the
+    // range 1...1000, with a default value of 42:
+    prm.declare_entry ("Dummy iterations", "42",
+                       Patterns::Integer (1,1000),
+                       "A dummy parameter asking for an integer");
+
+    // Next, let us declare a sub-section (the equivalent to a
+    // subdirectory). When entered, all following parameter declarations will
+    // be within this subsection. To also visually group these declarations
+    // with the subsection name, I like to use curly braces to force my editor
+    // to indent everything that goes into this sub-section by one level of
+    // indentation. In this sub-section, we shall have two entries, one that
+    // takes a Boolean parameter and one that takes a selection list of
+    // values, separated by the '|' character:
+    prm.enter_subsection ("Dummy subsection");
+    {
+      prm.declare_entry ("Dummy generate output", "true",
+                         Patterns::Bool(),
+                         "A dummy parameter that can be fed with either "
+                         "'true' or 'false'");
+      prm.declare_entry ("Dummy color of output", "red",
+                         Patterns::Selection("red|black|blue"),
+                         "A dummy parameter that shows how one can define a "
+                         "parameter that can be assigned values from a finite "
+                         "set of values");
+    }
+    prm.leave_subsection ();
+    // After this, we have left the subsection again. You should have gotten
+    // the idea by now how one can nest subsections to separate
+    // parameters. There are a number of other possible patterns describing
+    // possible values of parameters; in all cases, if you try to pass a
+    // parameter to the program that does not match the expectations of the
+    // pattern, it will reject the parameter file and ask you to fix it. After
+    // all, it does not make much sense if you had an entry that contained the
+    // entry "red" for the parameter "Generate output".
+  }
+
+
+  // @sect4{Parsing the command line}
+
+  // Our next task is to see what information has been provided on the command
+  // line. First, we need to be sure that there is at least one parameter: an
+  // input file. The format and the output file can be specified in the
+  // parameter file, but the list of input files can't, so at least one
+  // parameter needs to be there. Together with the name of the program (the
+  // zeroth parameter), <code>argc</code> must therefore be at least 2. If
+  // this is not the case, we print an error message and exit:
+  void
+  parse_command_line (const int     argc,
+                      char *const *argv)
+  {
+    if (argc < 2)
+      {
+        print_usage_message ();
+        exit (1);
+      }
+
+    // Next, collect all parameters in a list that will be somewhat simpler to
+    // handle than the <code>argc</code>/<code>argv</code> mechanism. We omit
+    // the name of the executable at the zeroth index:
+    std::list<std::string> args;
+    for (int i=1; i<argc; ++i)
+      args.push_back (argv[i]);
+
+    // Then process all these parameters. If the parameter is <code>-p</code>,
+    // then there must be a parameter file following (which we should then
+    // read), in case of <code>-x</code> it is the name of an output
+    // format. Finally, for <code>-o</code> it is the name of the output
+    // file. In all cases, once we've treated a parameter, we remove it from
+    // the list of parameters:
+    while (args.size())
+      {
+        if (args.front() == std::string("-p"))
+          {
+            if (args.size() == 1)
+              {
+                std::cerr << "Error: flag '-p' must be followed by the "
+                          << "name of a parameter file."
+                          << std::endl;
+                print_usage_message ();
+                exit (1);
+              }
+            args.pop_front ();
+            const std::string parameter_file = args.front ();
+            args.pop_front ();
+
+            // Now read the input file:
+            prm.read_input (parameter_file);
+
+            // Both the output file name as well as the format can be
+            // specified on the command line. We have therefore given them
+            // global variables that hold their values, but they can also be
+            // set in the parameter file. We therefore need to extract them
+            // from the parameter file here, because they may be overridden by
+            // later command line parameters:
+            if (output_file == "")
+              output_file = prm.get ("Output file");
+
+            if (output_format == "")
+              output_format = prm.get ("Output format");
+
+            // Finally, let us note that if we were interested in the values
+            // of the parameters declared above in the dummy subsection, we
+            // would write something like this to extract the value of the
+            // Boolean flag (the <code>prm.get</code> function returns the
+            // value of a parameter as a string, whereas the
+            // <code>prm.get_X</code> functions return a value already
+            // converted to a different type):
+            prm.enter_subsection ("Dummy subsection");
+            {
+              prm.get_bool ("Dummy generate output");
+            }
+            prm.leave_subsection ();
+            // We would assign the result to a variable, of course, but don't
+            // here in order not to generate an unused variable that the
+            // compiler might warn about.
+            //
+            // Alas, let's move on to handling of output formats:
+          }
+        else if (args.front() == std::string("-x"))
+          {
+            if (args.size() == 1)
+              {
+                std::cerr << "Error: flag '-x' must be followed by the "
+                          << "name of an output format."
+                          << std::endl;
+                print_usage_message ();
+                exit (1);
+              }
+            args.pop_front ();
+            output_format = args.front();
+            args.pop_front ();
+          }
+        else if (args.front() == std::string("-o"))
+          {
+            if (args.size() == 1)
+              {
+                std::cerr << "Error: flag '-o' must be followed by the "
+                          << "name of an output file."
+                          << std::endl;
+                print_usage_message ();
+                exit (1);
+              }
+            args.pop_front ();
+            output_file = args.front();
+            args.pop_front ();
+          }
+
+        // Otherwise, this is not a parameter that starts with a known minus
+        // sequence, and we should consider it to be the name of an input
+        // file. Let us therefore add this file to the list of input files:
+        else
+          {
+            input_file_names.push_back (args.front());
+            args.pop_front ();
+          }
+      }
+
+    // Next check a few things and create errors if the checks fail. Firstly,
+    // there must be at least one input file
+    if (input_file_names.size() == 0)
+      {
+        std::cerr << "Error: No input file specified." << std::endl;
+        print_usage_message ();
+        exit (1);
+      }
+  }
+
+
+  // @sect4{Generating output}
+
+  // Now that we have all the information, we need to read all the input
+  // files, merge them, and generate a single output file. This, after all,
+  // was the motivation, borne from the necessity encountered in the step-18
+  // tutorial program, to write this program in the first place.
+  //
+  // So what we do first is to declare an object into which we will merge the
+  // data from all the input file, and read in the first file through a
+  // stream. Note that every time we open a file, we use the
+  // <code>AssertThrow</code> macro to check whether the file is really
+  // readable -- if it isn't then this will trigger an exception and
+  // corresponding output will be generated from the exception handler in
+  // <code>main()</code>:
+  template <int dim, int spacedim>
+  void do_convert ()
+  {
+    DataOutReader<dim,spacedim> merged_data;
+
+    {
+      std::ifstream input (input_file_names[0].c_str());
+      AssertThrow (input, ExcIO());
+
+      merged_data.read (input);
+    }
+
+    // For all the other input files, we read their data into an intermediate
+    // object, and then merge that into the first object declared above:
+    for (unsigned int i=1; i<input_file_names.size(); ++i)
+      {
+        std::ifstream input (input_file_names[i].c_str());
+        AssertThrow (input, ExcIO());
+
+        DataOutReader<dim,spacedim> additional_data;
+        additional_data.read (input);
+        merged_data.merge (additional_data);
+      }
+
+    // Once we have this, let us open an output stream, and parse what we got
+    // as the name of the output format into an identifier. Fortunately, the
+    // <code>DataOutBase</code> class has a function that does this parsing
+    // for us, i.e. it knows about all the presently supported output formats
+    // and makes sure that they can be specified in the parameter file or on
+    // the command line. Note that this ensures that if the library acquires
+    // the ability to output in other output formats, this program will be
+    // able to make use of this ability without having to be changed!
+    std::ofstream output_stream (output_file.c_str());
+    AssertThrow (output_stream, ExcIO());
+
+    const DataOutBase::OutputFormat format
+      = DataOutBase::parse_output_format (output_format);
+
+    // Finally, write the merged data to the output:
+    merged_data.write(output_stream, format);
+  }
+
+
+  // @sect4{Dispatching output generation}
+
+  // The function above takes template parameters relating to the space
+  // dimension of the output, and the dimension of the objects to be
+  // output. (For example, when outputting whole cells, these two dimensions
+  // are the same, but the intermediate files may contain only data pertaining
+  // to the faces of cells, in which case the first parameter will be one less
+  // than the space dimension.)
+  //
+  // The problem is: at compile time, we of course don't know the dimensions
+  // used in the input files. We have to plan for all cases, therefore. This
+  // is a little clumsy, since we need to specify the dimensions statically at
+  // compile time, even though we will only know about them at run time.
+  //
+  // So here is what we do: from the first input file, we determine (using a
+  // function in <code>DataOutBase</code> that exists for this purpose) these
+  // dimensions. We then have a series of switches that dispatch, statically,
+  // to the <code>do_convert</code> functions with different template
+  // arguments. Not pretty, but works. Apart from this, the function does
+  // nothing -- except making sure that it covered the dimensions for which it
+  // was called, using the <code>AssertThrow</code> macro at places in the
+  // code that shouldn't be reached:
+  void convert ()
+  {
+    AssertThrow (input_file_names.size() > 0,
+                 ExcMessage ("No input files specified."));
+
+    std::ifstream input(input_file_names[0].c_str());
+    AssertThrow (input, ExcIO());
+
+    const std::pair<unsigned int, unsigned int>
+    dimensions = DataOutBase::determine_intermediate_format_dimensions (input);
+
+    switch (dimensions.first)
+      {
+      case 1:
+        switch (dimensions.second)
+          {
+          case 1:
+            do_convert <1,1> ();
+            return;
+
+          case 2:
+            do_convert <1,2> ();
+            return;
+          }
+        AssertThrow (false, ExcNotImplemented());
+
+      case 2:
+        switch (dimensions.second)
+          {
+          case 2:
+            do_convert <2,2> ();
+            return;
+
+          case 3:
+            do_convert <2,3> ();
+            return;
+          }
+        AssertThrow (false, ExcNotImplemented());
+
+      case 3:
+        switch (dimensions.second)
+          {
+          case 3:
+            do_convert <3,3> ();
+            return;
+          }
+        AssertThrow (false, ExcNotImplemented());
+      }
+
+    AssertThrow (false, ExcNotImplemented());
+  }
+}
+
+
+
+// @sect4{main()}
+
+// Finally, the main program. There is not much more to do than to make sure
+// parameters are declared, the command line is parsed (which includes reading
+// parameter files), and finally making sure the input files are read and
+// output is generated. Everything else just has to do with handling
+// exceptions and making sure that appropriate output is generated if one is
+// thrown.
+int main (int argc, char **argv)
+{
+  try
+    {
+      using namespace Step19;
+
+      declare_parameters ();
+      parse_command_line (argc, argv);
+
+      convert ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    };
+
+  return 0;
+}
diff --git a/examples/step-2/CMakeLists.txt b/examples/step-2/CMakeLists.txt
new file mode 100644
index 0000000..3eb212a
--- /dev/null
+++ b/examples/step-2/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-2 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-2")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-2/doc/builds-on b/examples/step-2/doc/builds-on
new file mode 100644
index 0000000..4512741
--- /dev/null
+++ b/examples/step-2/doc/builds-on
@@ -0,0 +1 @@
+step-1
diff --git a/examples/step-2/doc/intro.dox b/examples/step-2/doc/intro.dox
new file mode 100644
index 0000000..2d89d08
--- /dev/null
+++ b/examples/step-2/doc/intro.dox
@@ -0,0 +1,99 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{9}
+
+After we have created a grid in the previous example, we now show how
+to define degrees of freedom on this mesh. For this example, we
+will use the lowest order ($Q_1$) finite elements, for which the degrees
+of freedom are associated with the vertices of the mesh. Later
+examples will demonstrate higher order elements where degrees of freedom are
+not necessarily associated with vertices any more, but can be associated
+with edges, faces, or cells.
+
+The term "degree of freedom" is commonly used in the finite element community
+to indicate two slightly different, but related things. The first is that we'd
+like to represent the finite element solution as a linear combination of shape
+function, in the form $u_h(\mathbf x) = \sum_{j=0}^{N-1} U_j \varphi_j(\mathbf
+x)$. Here, $U_j$ is a vector of expansion coefficients. Because we don't know
+their values yet (we will compute them as the solution of a linear or
+nonlinear system), they are called "unknowns" or "degrees of freedom". The
+second meaning of the term can be explained as follows: A mathematical
+description of finite element problem is often to say that we are looking for
+a finite dimensional function $u_h \in V_h$ that satisfies some set of equations
+(e.g. $a(u_h,\varphi_h)=(f,\varphi_h)$ for all test functions $\varphi_h\in
+V_h$). In other words, all we say here that the solution needs to lie in some
+space $V_h$. However, to actually solve this problem on a computer we need to
+choose a basis of this space; this is the set of shape functions
+$\varphi_j(\mathbf x)$ we have used above in the expansion of $u_h(\mathbf x)$
+with coefficients $U_j$. There are of course many bases of the space $V_h$,
+but we will specifically choose the one that is described by the finite
+element functions that are traditionally defined locally on the cells of the
+mesh. Describing "degrees of freedom" in this context requires us to simply
+<i>enumerate</i> the basis functions of the space $V_h$. For $Q_1$ elements
+this means simply enumerating the vertices of the mesh in some way, but for
+higher elements one also has to enumerate the shape functions that are
+associated with edges, faces, or cell interiors of the mesh. The class that
+provides this enumeration of the basis functions of $V_h$ is called DoFHandler.
+
+Defining degrees of freedom ("DoF"s in short) on a mesh is a rather
+simple task, since the library does all the work for you. Essentially,
+all you have to do is create a finite element object (from one of the
+many finite element classes deal.II already has, see for example the
+ at ref fe documentation) and give it to a DoFHandler object through the
+DoFHandler::distribute_dofs function ("distributing DoFs" is the term we use
+to describe the process of enumerating the basis functions as discussed
+above). The DoFHandler is a class that
+manages which degrees of freedom live where, i.e., it can answer
+questions like "how many degrees of freedom are there globally" and
+"on this cell, give me the global indices of the shape functions that
+live here". This is the sort of information you need when determining
+how big your system matrix should be, and when copying the
+contributions of a single cell into the global matrix.
+
+The next step would then be to compute a matrix and right hand side
+corresponding to a particular differential equation using this finite element
+and mesh. We will keep this step for the step-3 program and rather talk about
+one practical aspect of a finite element program, namely that finite element
+matrices are almost always very sparse, i.e. almost all entries in these
+matrices are zero. (To be more precise, we say a discretization leads to a
+sparse matrix if the number of nonzero entries <i>per row</i> in the matrix is
+bounded by a number that is independent of the overall number of degrees of
+freedom. For example, the simple 5-point stencil of a finite difference
+approximation of the Laplace equation leads to a sparse matrix since the
+number of nonzero entries per row is five, and therefore independent of the
+total size of the matrix.)  Sparsity is one of the distinguishing feature of
+the finite element method compared to, say, approximating the solution of a
+partial differential equation using a Taylor expansion and matching
+coefficients, or using a Fourier basis.
+
+In practical terms, it is the sparsity of matrices that enables us to solve
+problems with millions or billions of unknowns. To understand this, note that
+a matrix with $N$ rows, each with a fixed upper bound for the number of
+nonzero entries, requires ${\cal O}(N)$ memory locations for storage, and a
+matrix-vector multiplication also requires only ${\cal O}(N)$
+operations. Consequently, if we had a linear solver that requires only a fixed
+number of matrix-vector multiplications to come up with the solution of a
+linear system with this matrix, then we would have a solver that can find the
+values of all $N$ unknowns with optimal complexity, i.e., with a total of
+${\cal O}(N)$ operations. It is clear that this wouldn't be possible if the
+matrix were not sparse, but it also requires very specialized solvers such as
+multigrid methods to satisfy the requirement that the solution requires only a
+fixed number of matrix-vector multiplications. We will frequently look at the
+question of what solver to use in the remaining programs of this tutorial.
+
+The sparsity is generated by the fact that finite element shape
+functions are defined locally on individual cells, rather than
+globally, and that the local differential operators in the bilinear
+form only couple shape functions that have some overlap. By default,
+the DoFHandler class enumerates degrees of freedom on a mesh in a
+rather random way; consequently, the sparsity pattern is also not
+optimized for any particular purpose. However, for
+some algorithms, especially for some linear solvers and preconditioners, it is
+advantageous to have the degrees of freedom numbered in a certain
+order, and we will use the algorithm of Cuthill and McKee to do
+so. This can be thought of as choosing a different, permuted basis of the
+finite element space.
+The results are written to a file and visualized using a simple
+visualization program; you get to
+see the outcome in the <a href="#Results">results section below</a>.
diff --git a/examples/step-2/doc/kind b/examples/step-2/doc/kind
new file mode 100644
index 0000000..15a13db
--- /dev/null
+++ b/examples/step-2/doc/kind
@@ -0,0 +1 @@
+basic
diff --git a/examples/step-2/doc/results.dox b/examples/step-2/doc/results.dox
new file mode 100644
index 0000000..6fde7c7
--- /dev/null
+++ b/examples/step-2/doc/results.dox
@@ -0,0 +1,75 @@
+<h1>Results</h1>
+
+The program has, after having been run, produced two sparsity
+patterns. We can visualize them by opening the <code>.svg</code> files in a web browser.
+
+The results then look like this (every point denotes an entry which
+might be nonzero; of course the fact whether the entry actually is
+zero or not depends on the equation under consideration, but the
+indicated positions in the matrix tell us which shape functions can
+and which can't couple when discretizing a local, i.e. differential,
+equation):
+<table style="width:60%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-2.sparsity-1.svg" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-2.sparsity-2.svg" alt=""></td>
+  </tr>
+</table>
+
+The different regions in the left picture, indicated by kinks in the lines and
+single dots on the left and top, represent the degrees of
+freedom on the different refinement levels of the triangulation.  As
+can be seen in the right picture, the sparsity pattern is much better
+clustered around the main diagonal of the matrix after
+renumbering. Although this might not be apparent, the number of
+nonzero entries is the same in both pictures, of course.
+
+
+
+<h3> Possible extensions </h3>
+
+Just as with step-1, you may want to play with the program a bit to
+familiarize yourself with deal.II. For example, in the
+<code>distribute_dofs</code> function, we use linear finite elements
+(that's what the argument "1" to the FE_Q object is). Explore how the
+sparsity pattern changes if you use higher order elements, for example
+cubic or quintic ones (by using 3 and 5 as the respective arguments).
+
+Or, you could see how the sparsity pattern changes with more
+refinements. You will see that not only the size of the matrix
+changes, but also its bandwidth (the distance from the diagonal of
+those nonzero elements of the matrix that are farthest away from the
+diagonal), though the ratio of bandwidth to size typically shrinks,
+i.e. the matrix clusters more around the diagonal.
+
+Another idea of experiments would be to try other renumbering
+strategies than Cuthill-McKee from the DoFRenumbering namespace and see how
+they affect the sparsity pattern.
+
+You can also visualize the output using <a
+href="http://www.gnuplot.info/">GNUPLOT</a> (one of the simpler visualization
+programs; maybe not the easiest to use since it is command line driven, but
+also universally available on all Linux and other Unix-like systems) by changing from <code>print_svg()</code> to <code>print_gnuplot()</code> in <code>distribute_dofs()</code> and <code>renumber_dofs()</code>:
+ at code
+examples/\step-2> gnuplot
+
+        G N U P L O T
+        Version 3.7 patchlevel 3
+        last modified Thu Dec 12 13:00:00 GMT 2002
+        System: Linux 2.6.11.4-21.10-default
+
+        Copyright(C) 1986 - 1993, 1998 - 2002
+        Thomas Williams, Colin Kelley and many others
+
+        Type `help` to access the on-line reference manual
+        The gnuplot FAQ is available from
+        http://www.gnuplot.info/gnuplot-faq.html
+
+        Send comments and requests for help to <info-gnuplot at dartmouth.edu>
+        Send bugs, suggestions and mods to <bug-gnuplot at dartmouth.edu>
+
+
+Terminal type set to 'x11'
+gnuplot> set style data points
+gnuplot> plot "sparsity_pattern.1"
+ at endcode
diff --git a/examples/step-2/doc/tooltip b/examples/step-2/doc/tooltip
new file mode 100644
index 0000000..ac9d18b
--- /dev/null
+++ b/examples/step-2/doc/tooltip
@@ -0,0 +1 @@
+Assigning degrees of freedom to a grid.
diff --git a/examples/step-2/step-2.cc b/examples/step-2/step-2.cc
new file mode 100644
index 0000000..9bd4d36
--- /dev/null
+++ b/examples/step-2/step-2.cc
@@ -0,0 +1,311 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 1999 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 1999
+ */
+
+
+// The first few includes are just like in the previous program, so do not
+// require additional comments:
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/manifold_lib.h>
+
+// However, the next file is new. We need this include file for the
+// association of degrees of freedom ("DoF"s) to vertices, lines, and cells:
+#include <deal.II/dofs/dof_handler.h>
+
+// The following include contains the description of the bilinear finite
+// element, including the facts that it has one degree of freedom on each
+// vertex of the triangulation, but none on faces and none in the interior of
+// the cells.
+//
+// (In fact, the file contains the description of Lagrange elements in
+// general, i.e. also the quadratic, cubic, etc versions, and not only for 2d
+// but also 1d and 3d.)
+#include <deal.II/fe/fe_q.h>
+// In the following file, several tools for manipulating degrees of freedom
+// can be found:
+#include <deal.II/dofs/dof_tools.h>
+// We will use a sparse matrix to visualize the pattern of nonzero entries
+// resulting from the distribution of degrees of freedom on the grid. That
+// class can be found here:
+#include <deal.II/lac/sparse_matrix.h>
+// We will also need to use an intermediate sparsity pattern structure, which
+// is found in this file:
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+// We will want to use a special algorithm to renumber degrees of freedom. It
+// is declared here:
+#include <deal.II/dofs/dof_renumbering.h>
+
+// And this is again needed for C++ output:
+#include <fstream>
+
+// Finally, as in step-1, we import the deal.II namespace into the global
+// scope:
+using namespace dealii;
+
+// @sect3{Mesh generation}
+
+// This is the function that produced the circular grid in the previous step-1
+// example program with fewer refinements steps. The sole difference is that it returns the grid it
+// produces via its argument.
+//
+// The details of what the function does are explained in step-1. The only
+// thing we would like to comment on is this:
+//
+// Since we want to export the triangulation through this function's
+// parameter, we need to make sure that the manifold object lives at least as
+// long as the triangulation does. However, in step-1, the manifold object is
+// a local variable, and it would be deleted at the end of the function, which
+// is too early. We avoid the problem by declaring it 'static' which makes
+// sure that the object is initialized the first time control the program
+// passes this point, but at the same time assures that it lives until the end
+// of the program.
+void make_grid (Triangulation<2> &triangulation)
+{
+  const Point<2> center (1,0);
+  const double inner_radius = 0.5,
+               outer_radius = 1.0;
+  GridGenerator::hyper_shell (triangulation,
+                              center, inner_radius, outer_radius,
+                              5 );
+
+  static const SphericalManifold<2> manifold_description(center);
+  triangulation.set_all_manifold_ids(0);
+  triangulation.set_manifold (0, manifold_description);
+
+  for (unsigned int step=0; step<3; ++step)
+    {
+      Triangulation<2>::active_cell_iterator
+      cell = triangulation.begin_active(),
+      endc = triangulation.end();
+
+      for (; cell!=endc; ++cell)
+        for (unsigned int v=0;
+             v < GeometryInfo<2>::vertices_per_cell;
+             ++v)
+          {
+            const double distance_from_center
+              = center.distance (cell->vertex(v));
+
+            if (std::fabs(distance_from_center - inner_radius) < 1e-10)
+              {
+                cell->set_refine_flag ();
+                break;
+              }
+          }
+
+      triangulation.execute_coarsening_and_refinement ();
+    }
+}
+
+// @sect3{Creation of a DoFHandler}
+
+// Up to now, we only have a grid, i.e. some geometrical (the position of the
+// vertices) and some topological information (how vertices are connected to
+// lines, and lines to cells, as well as which cells neighbor which other
+// cells). To use numerical algorithms, one needs some logic information in
+// addition to that: we would like to associate degree of freedom numbers to
+// each vertex (or line, or cell, in case we were using higher order elements)
+// to later generate matrices and vectors which describe a finite element
+// field on the triangulation.
+//
+// This function shows how to do this. The object to consider is the
+// <code>DoFHandler</code> class template.  Before we do so, however, we first
+// need something that describes how many degrees of freedom are to be
+// associated to each of these objects. Since this is one aspect of the
+// definition of a finite element space, the finite element base class stores
+// this information. In the present context, we therefore create an object of
+// the derived class <code>FE_Q</code> that describes Lagrange elements. Its
+// constructor takes one argument that states the polynomial degree of the
+// element, which here is one (indicating a bi-linear element); this then
+// corresponds to one degree of freedom for each vertex, while there are none
+// on lines and inside the quadrilateral. A value of, say, three given to the
+// constructor would instead give us a bi-cubic element with one degree of
+// freedom per vertex, two per line, and four inside the cell. In general,
+// <code>FE_Q</code> denotes the family of continuous elements with complete
+// polynomials (i.e. tensor-product polynomials) up to the specified order.
+//
+// We first need to create an object of this class and then pass it on to the
+// <code>DoFHandler</code> object to allocate storage for the degrees of
+// freedom (in deal.II lingo: we <code>distribute degrees of
+// freedom</code>). Note that the DoFHandler object will store a reference to
+// this finite element object, so we have to make sure its lifetime is at
+// least as long as that of the <code>DoFHandler</code>; one way to make sure
+// this is so is to make it static as well, in order to prevent its preemptive
+// destruction. (However, the library would warn us if we forgot about this
+// and abort the program if that occurred. You can check this, if you want, by
+// removing the 'static' declaration.)
+void distribute_dofs (DoFHandler<2> &dof_handler)
+{
+  // As described above, let us first create a finite element object, and then
+  // use it to allocate degrees of freedom on the triangulation with which the
+  // dof_handler object is associated:
+  static const FE_Q<2> finite_element(1);
+  dof_handler.distribute_dofs (finite_element);
+
+  // Now that we have associated a degree of freedom with a global number to
+  // each vertex, we wonder how to visualize this?  There is no simple way to
+  // directly visualize the DoF number associated with each vertex. However,
+  // such information would hardly ever be truly important, since the
+  // numbering itself is more or less arbitrary. There are more important
+  // factors, of which we will demonstrate one in the following.
+  //
+  // Associated with each vertex of the triangulation is a shape
+  // function. Assume we want to solve something like Laplace's equation, then
+  // the different matrix entries will be the integrals over the gradient of
+  // each pair of such shape functions. Obviously, since the shape functions
+  // are nonzero only on the cells adjacent to the vertex they are associated
+  // with, matrix entries will be nonzero only if the supports of the shape
+  // functions associated to that column and row %numbers intersect. This is
+  // only the case for adjacent shape functions, and therefore only for
+  // adjacent vertices. Now, since the vertices are numbered more or less
+  // randomly by the above function (DoFHandler::distribute_dofs), the pattern
+  // of nonzero entries in the matrix will be somewhat ragged, and we will
+  // take a look at it now.
+  //
+  // First we have to create a structure which we use to store the places of
+  // nonzero elements. This can then later be used by one or more sparse
+  // matrix objects that store the values of the entries in the locations
+  // stored by this sparsity pattern. The class that stores the locations is
+  // the SparsityPattern class. As it turns out, however, this class has some
+  // drawbacks when we try to fill it right away: its data structures are set
+  // up in such a way that we need to have an estimate for the maximal number
+  // of entries we may wish to have in each row. In two space dimensions,
+  // reasonable values for this estimate are available through the
+  // DoFHandler::max_couplings_between_dofs() function, but in three
+  // dimensions the function almost always severely overestimates the true
+  // number, leading to a lot of wasted memory, sometimes too much for the
+  // machine used, even if the unused memory can be released immediately after
+  // computing the sparsity pattern. In order to avoid this, we use an
+  // intermediate object of type DynamicSparsityPattern that uses a
+  // different %internal data structure and that we can later copy into the
+  // SparsityPattern object without much overhead. (Some more information on
+  // these data structures can be found in the @ref Sparsity module.) In order
+  // to initialize this intermediate data structure, we have to give it the
+  // size of the matrix, which in our case will be square with as many rows
+  // and columns as there are degrees of freedom on the grid:
+  DynamicSparsityPattern dynamic_sparsity_pattern(dof_handler.n_dofs(),
+                                                  dof_handler.n_dofs());
+
+  // We then fill this object with the places where nonzero elements will be
+  // located given the present numbering of degrees of freedom:
+  DoFTools::make_sparsity_pattern (dof_handler, dynamic_sparsity_pattern);
+
+  // Now we are ready to create the actual sparsity pattern that we could
+  // later use for our matrix. It will just contain the data already assembled
+  // in the DynamicSparsityPattern.
+  SparsityPattern sparsity_pattern;
+  sparsity_pattern.copy_from (dynamic_sparsity_pattern);
+
+  // With this, we can now write the results to a file:
+  std::ofstream out ("sparsity_pattern1.svg");
+  sparsity_pattern.print_svg (out);
+  // The result is stored in an <code>.svg</code> file, where each nonzero entry in the
+  // matrix corresponds with a red square in the image. The output will be
+  // shown below.
+  //
+  // If you look at it, you will note that the sparsity pattern is
+  // symmetric. This should not come as a surprise, since we have not given
+  // the <code>DoFTools::make_sparsity_pattern</code> any information that
+  // would indicate that our bilinear form may couple shape functions in a
+  // non-symmetric way. You will also note that it has several distinct
+  // region, which stem from the fact that the numbering starts from the
+  // coarsest cells and moves on to the finer ones; since they are all
+  // distributed symmetrically around the origin, this shows up again in the
+  // sparsity pattern.
+}
+
+
+// @sect3{Renumbering of DoFs}
+
+// In the sparsity pattern produced above, the nonzero entries extended quite
+// far off from the diagonal. For some algorithms, for example for incomplete
+// LU decompositions or Gauss-Seidel preconditioners, this is unfavorable, and
+// we will show a simple way how to improve this situation.
+//
+// Remember that for an entry $(i,j)$ in the matrix to be nonzero, the
+// supports of the shape functions i and j needed to intersect (otherwise in
+// the integral, the integrand would be zero everywhere since either the one
+// or the other shape function is zero at some point). However, the supports
+// of shape functions intersected only if they were adjacent to each other, so
+// in order to have the nonzero entries clustered around the diagonal (where
+// $i$ equals $j$), we would like to have adjacent shape functions to be
+// numbered with indices (DoF numbers) that differ not too much.
+//
+// This can be accomplished by a simple front marching algorithm, where one
+// starts at a given vertex and gives it the index zero. Then, its neighbors
+// are numbered successively, making their indices close to the original
+// one. Then, their neighbors, if not yet numbered, are numbered, and so on.
+//
+// One algorithm that adds a little bit of sophistication along these lines is
+// the one by Cuthill and McKee. We will use it in the following function to
+// renumber the degrees of freedom such that the resulting sparsity pattern is
+// more localized around the diagonal. The only interesting part of the
+// function is the first call to <code>DoFRenumbering::Cuthill_McKee</code>,
+// the rest is essentially as before:
+void renumber_dofs (DoFHandler<2> &dof_handler)
+{
+  DoFRenumbering::Cuthill_McKee (dof_handler);
+
+  DynamicSparsityPattern dynamic_sparsity_pattern(dof_handler.n_dofs(),
+                                                  dof_handler.n_dofs());
+  DoFTools::make_sparsity_pattern (dof_handler, dynamic_sparsity_pattern);
+
+  SparsityPattern sparsity_pattern;
+  sparsity_pattern.copy_from (dynamic_sparsity_pattern);
+
+  std::ofstream out ("sparsity_pattern2.svg");
+  sparsity_pattern.print_svg (out);
+}
+
+// Again, the output is shown below. Note that the nonzero entries are
+// clustered far better around the diagonal than before. This effect is even
+// more distinguished for larger matrices (the present one has 1260 rows and
+// columns, but large matrices often have several 100,000s).
+
+// It is worth noting that the <code>DoFRenumbering</code> class offers a
+// number of other algorithms as well to renumber degrees of freedom. For
+// example, it would of course be ideal if all couplings were in the lower or
+// upper triangular part of a matrix, since then solving the linear system
+// would among to only forward or backward substitution. This is of course
+// unachievable for symmetric sparsity patterns, but in some special
+// situations involving transport equations, this is possible by enumerating
+// degrees of freedom from the inflow boundary along streamlines to the
+// outflow boundary. Not surprisingly, <code>DoFRenumbering</code> also has
+// algorithms for this.
+
+
+// @sect3{The main function}
+
+// Finally, this is the main program. The only thing it does is to allocate
+// and create the triangulation, then create a <code>DoFHandler</code> object
+// and associate it to the triangulation, and finally call above two functions
+// on it:
+int main ()
+{
+  Triangulation<2> triangulation;
+  make_grid (triangulation);
+
+  DoFHandler<2> dof_handler (triangulation);
+
+  distribute_dofs (dof_handler);
+  renumber_dofs (dof_handler);
+}
diff --git a/examples/step-20/CMakeLists.txt b/examples/step-20/CMakeLists.txt
new file mode 100644
index 0000000..ac80b77
--- /dev/null
+++ b/examples/step-20/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-20 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-20")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-20/doc/builds-on b/examples/step-20/doc/builds-on
new file mode 100644
index 0000000..48a0f73
--- /dev/null
+++ b/examples/step-20/doc/builds-on
@@ -0,0 +1 @@
+step-4
diff --git a/examples/step-20/doc/intro.dox b/examples/step-20/doc/intro.dox
new file mode 100644
index 0000000..5033c9b
--- /dev/null
+++ b/examples/step-20/doc/intro.dox
@@ -0,0 +1,712 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{19,20,21}
+
+This program is devoted to two aspects: the use of mixed finite elements -- in
+particular Raviart-Thomas elements -- and using block matrices to define
+solvers, preconditioners, and nested versions of those that use the
+substructure of the system matrix. The equation we are going to solve is again
+the Laplace equation, though with a matrix-valued coefficient:
+ at f{eqnarray*}
+  -\nabla \cdot K({\mathbf x}) \nabla p &=& f \qquad {\textrm{in}\ } \Omega, \\
+  p &=& g \qquad {\textrm{on}\ }\partial\Omega.
+ at f}
+$K({\mathbf x})$ is assumed to be uniformly positive definite, i.e., there is
+$\alpha>0$ such that the eigenvalues $\lambda_i({\mathbf x})$ of $K(x)$ satisfy
+$\lambda_i({\mathbf x})\ge \alpha$. The use of the symbol $p$ instead of the usual
+$u$ for the solution variable will become clear in the next section.
+
+After discussing the equation and the formulation we are going to use to solve
+it, this introduction will cover the use of block matrices and vectors, the
+definition of solvers and preconditioners, and finally the actual test case we
+are going to solve.
+
+We are going to extend this tutorial program in step-21 to
+solve not only the mixed Laplace equation, but add another equation that
+describes the transport of a mixture of two fluids.
+
+The equations covered here fall into the class of vector-valued problems. A
+toplevel overview of this topic can be found in the @ref vector_valued module.
+
+
+<h3>Formulation, weak form, and discrete problem</h3>
+
+In the form above, the Laplace equation is generally considered a good model equation
+for fluid flow in porous media. In particular, if flow is so slow that all
+dynamic effects such as the acceleration terms in the Navier-Stokes equation
+become irrelevant, and if the flow pattern is stationary, then the
+Laplace
+equation models the pressure that drives the flow reasonable well. (Because the
+solution variable is a pressure, we here use the name $p$ instead of the
+name $u$ more commonly used for the solution of partial differential equations.)
+
+Typical applications of this view of the Laplace equation are then modeling
+groundwater flow, or the flow of hydrocarbons in oil reservoirs. In these
+applications, $K$ is then the permeability tensor, i.e. a measure for how much
+resistance the soil or rock matrix asserts on the fluid flow. In the
+applications just named, a desirable feature is that the numerical scheme is
+locally conservative, i.e. that whatever flows into a cell also flows out of
+it (or the difference is equal to the integral over the source terms over each
+cell, if the sources are nonzero). However, as it turns out, the usual
+discretizations of the Laplace equation do not satisfy this property. On the
+other hand, one can achieve this by choosing a different formulation.
+
+To this end, one first introduces a second variable, called the flux,
+${\mathbf u}=-K\nabla p$. By its definition, the flux is a vector in the
+negative
+direction of the pressure gradient, multiplied by the permeability tensor. If
+the permeability tensor is proportional to the unit matrix, this equation is
+easy to understand and intuitive: the higher the permeability, the higher the
+flux; and the flux is proportional to the gradient of the pressure, going from
+areas of high pressure to areas of low pressure.
+
+With this second variable, one then finds an alternative version of the
+Laplace equation, called the mixed formulation:
+ at f{eqnarray*}
+  K^{-1} {\mathbf u} + \nabla p &=& 0 \qquad {\textrm{in}\ } \Omega, \\
+  -{\textrm{div}}\ {\mathbf u} &=& -f \qquad {\textrm{in}\ }\Omega, \\
+  p &=& g \qquad {\textrm{on}\ } \partial\Omega.
+ at f}
+Here, we have multiplied the equation defining the velocity ${\mathbf
+u}$ by $K^{-1}$ because this makes the set of equations symmetric: one
+of the equations has the gradient, the second the negative divergence,
+and these two are of course adjoints of each other, resulting in a
+symmetric bilinear form and a consequently symmetric system matrix
+under the common assumption that $K$ is a symmetric tensor.
+
+The weak formulation of this problem is found by multiplying the two
+equations with test functions and integrating some terms by parts:
+ at f{eqnarray*}
+  A(\{{\mathbf u},p\},\{{\mathbf v},q\}) = F(\{{\mathbf v},q\}),
+ at f}
+where
+ at f{eqnarray*}
+  A(\{{\mathbf u},p\},\{{\mathbf v},q\})
+  &=&
+  ({\mathbf v}, K^{-1}{\mathbf u})_\Omega - ({\textrm{div}}\ {\mathbf v}, p)_\Omega
+  - (q,{\textrm{div}}\ {\mathbf u})_\Omega
+  \\
+  F(\{{\mathbf v},q\}) &=& -(g,{\mathbf v}\cdot {\mathbf n})_{\partial\Omega} - (f,q)_\Omega.
+ at f}
+Here, ${\mathbf n}$ is the outward normal vector at the boundary. Note how in this
+formulation, Dirichlet boundary values of the original problem are
+incorporated in the weak form.
+
+To be well-posed, we have to look for solutions and test functions in the
+space $H({\textrm{div}})=\{{\mathbf w}\in L^2(\Omega)^d:\ {\textrm{div}}\ {\mathbf w}\in L^2\}$
+for $\mathbf u$,$\mathbf v$, and $L^2$ for $p,q$. It is a well-known fact stated in
+almost every book on finite element theory that if one chooses discrete finite
+element spaces for the approximation of ${\mathbf u},p$ inappropriately, then the
+resulting discrete saddle-point problem is instable and the discrete solution
+will not converge to the exact solution.
+
+To overcome this, a number of different finite element pairs for ${\mathbf u},p$
+have been developed that lead to a stable discrete problem. One such pair is
+to use the Raviart-Thomas spaces $RT(k)$ for the velocity ${\mathbf u}$ and
+discontinuous elements of class $DQ(k)$ for the pressure $p$. For details
+about these spaces, we refer in particular to the book on mixed finite element
+methods by Brezzi and Fortin, but many other books on the theory of finite
+elements, for example the classic book by Brenner and Scott, also state the
+relevant results.
+
+
+<h3>Assembling the linear system</h3>
+
+The deal.II library (of course) implements Raviart-Thomas elements $RT(k)$ of
+arbitrary order $k$, as well as discontinuous elements $DG(k)$. If we forget
+about their particular properties for a second, we then have to solve a
+discrete problem
+ at f{eqnarray*}
+  A(x_h,w_h) = F(w_h),
+ at f}
+with the bilinear form and right hand side as stated above, and $x_h=\{{\mathbf u}_h,p_h\}$, $w_h=\{{\mathbf v}_h,q_h\}$. Both $x_h$ and $w_h$ are from the space
+$X_h=RT(k)\times DQ(k)$, where $RT(k)$ is itself a space of $dim$-dimensional
+functions to accommodate for the fact that the flow velocity is vector-valued.
+The necessary question then is: how do we do this in a program?
+
+Vector-valued elements have already been discussed in previous tutorial
+programs, the first time and in detail in step-8. The main difference there
+was that the vector-valued space $V_h$ is uniform in all its components: the
+$dim$ components of the displacement vector are all equal and from the same
+function space. What we could therefore do was to build $V_h$ as the outer
+product of the $dim$ times the usual $Q(1)$ finite element space, and by this
+make sure that all our shape functions have only a single non-zero vector
+component. Instead of dealing with vector-valued shape functions, all we did
+in step-8 was therefore to look at the (scalar) only non-zero component and
+use the <code>fe.system_to_component_index(i).first</code> call to figure out
+which component this actually is.
+
+This doesn't work with Raviart-Thomas elements: following from their
+construction to satisfy certain regularity properties of the space
+$H({\textrm{div}})$, the shape functions of $RT(k)$ are usually nonzero in all
+their vector components at once. For this reason, were
+<code>fe.system_to_component_index(i).first</code> applied to determine the only
+nonzero component of shape function $i$, an exception would be generated. What
+we really need to do is to get at <em>all</em> vector components of a shape
+function. In deal.II diction, we call such finite elements
+<em>non-primitive</em>, whereas finite elements that are either scalar or for
+which every vector-valued shape function is nonzero only in a single vector
+component are called <em>primitive</em>.
+
+So what do we have to do for non-primitive elements? To figure this out, let
+us go back in the tutorial programs, almost to the very beginnings. There, we
+learned that we use the <code>FEValues</code> class to determine the values and
+gradients of shape functions at quadrature points. For example, we would call
+<code>fe_values.shape_value(i,q_point)</code> to obtain the value of the
+<code>i</code>th shape function on the quadrature point with number
+<code>q_point</code>. Later, in step-8 and other tutorial programs, we learned
+that this function call also works for vector-valued shape functions (of
+primitive finite elements), and that it returned the value of the only
+non-zero component of shape function <code>i</code> at quadrature point
+<code>q_point</code>.
+
+For non-primitive shape functions, this is clearly not going to work: there is
+no single non-zero vector component of shape function <code>i</code>, and the call
+to <code>fe_values.shape_value(i,q_point)</code> would consequently not make
+much sense. However, deal.II offers a second function call,
+<code>fe_values.shape_value_component(i,q_point,comp)</code> that returns the
+value of the <code>comp</code>th vector component of shape function  <code>i</code> at
+quadrature point <code>q_point</code>, where <code>comp</code> is an index between
+zero and the number of vector components of the present finite element; for
+example, the element we will use to describe velocities and pressures is going
+to have $dim+1$ components. It is worth noting that this function call can
+also be used for primitive shape functions: it will simply return zero for all
+components except one; for non-primitive shape functions, it will in general
+return a non-zero value for more than just one component.
+
+We could now attempt to rewrite the bilinear form above in terms of vector
+components. For example, in 2d, the first term could be rewritten like this
+(note that $u_0=x_0, u_1=x_1, p=x_2$):
+ at f{eqnarray*}
+  ({\mathbf u}_h^i, K^{-1}{\mathbf u}_h^j)
+  =
+  &\left((x_h^i)_0, K^{-1}_{00} (x_h^j)_0\right) +
+   \left((x_h^i)_0, K^{-1}_{01} (x_h^j)_1\right) + \\
+  &\left((x_h^i)_1, K^{-1}_{10} (x_h^j)_0\right) +
+   \left((x_h^i)_1, K^{-1}_{11} (x_h^j)_1\right).
+ at f}
+If we implemented this, we would get code like this:
+
+ at code
+  for (unsigned int q=0; q<n_q_points; ++q)
+    for (unsigned int i=0; i<dofs_per_cell; ++i)
+      for (unsigned int j=0; j<dofs_per_cell; ++j)
+        local_matrix(i,j) += (k_inverse_values[q][0][0] *
+                              fe_values.shape_value_component(i,q,0) *
+                              fe_values.shape_value_component(j,q,0)
+                              +
+                              k_inverse_values[q][0][1] *
+                              fe_values.shape_value_component(i,q,0) *
+                              fe_values.shape_value_component(j,q,1)
+                              +
+                              k_inverse_values[q][1][0] *
+                              fe_values.shape_value_component(i,q,1) *
+                              fe_values.shape_value_component(j,q,0)
+                              +
+                              k_inverse_values[q][1][1] *
+                              fe_values.shape_value_component(i,q,1) *
+                              fe_values.shape_value_component(j,q,1)
+                             )
+                             *
+                             fe_values.JxW(q);
+ at endcode
+
+This is, at best, tedious, error prone, and not dimension independent. There
+are obvious ways to make things dimension independent, but in the end, the
+code is simply not pretty. What would be much nicer is if we could simply
+extract the ${\mathbf u}$ and $p$ components of a shape function $x_h^i$. In the
+program we do that in the following way:
+
+ at code
+  const FEValuesExtractors::Vector velocities (0);
+  const FEValuesExtractors::Scalar pressure (dim);
+
+  ...
+
+  for (unsigned int q=0; q<n_q_points; ++q)
+    for (unsigned int i=0; i<dofs_per_cell; ++i)
+      for (unsigned int j=0; j<dofs_per_cell; ++j)
+        local_matrix(i,j) += (fe_values[velocities].value (i, q) *
+ 		              k_inverse_values[q] *
+                              fe_values[velocities].value (j, q)
+                              -
+                              fe_values[velocities].divergence (i, q) *
+                              fe_values[pressure].value (j, q)
+                              -
+                              fe_values[pressure].value (i, q) *
+                              fe_values[velocities].divergence (j, q)) *
+                              fe_values.JxW(q);
+ at endcode
+
+This is, in fact, not only the first term of the bilinear form, but the
+whole thing (sans boundary contributions).
+
+What this piece of code does is, given an <code>fe_values</code> object, to extract
+the values of the first $dim$ components of shape function <code>i</code> at
+quadrature points <code>q</code>, that is the velocity components of that shape
+function. Put differently, if we write shape functions $x_h^i$ as the tuple
+$\{{\mathbf u}_h^i,p_h^i\}$, then the function returns the velocity part of this
+tuple. Note that the velocity is of course a <code>dim</code>-dimensional tensor, and
+that the function returns a corresponding object. Similarly, where we
+subscript with the pressure extractor, we extract the scalar pressure
+component. The whole mechanism is described in more detail in the
+ at ref vector_valued module.
+
+In practice, it turns out that we can do a bit better if we evaluate the shape
+functions, their gradients and divergences only once per outermost loop, and
+store the result, as this saves us a few otherwise repeated computations (it is
+possible to save even more repeated operations by calculating all relevant
+quantities in advance and then only inserting the results in the actual loop,
+see step-22 for a realization of that approach).
+The final result then looks like this, working in every space dimension:
+
+ at code
+  typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+      local_matrix = 0;
+      local_rhs = 0;
+
+      right_hand_side.value_list (fe_values.get_quadrature_points(),
+                                  rhs_values);
+      k_inverse.value_list (fe_values.get_quadrature_points(),
+                            k_inverse_values);
+
+      for (unsigned int q=0; q<n_q_points; ++q)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            const Tensor<1,dim> phi_i_u     = fe_values[velocities].value (i, q);
+            const double        div_phi_i_u = fe_values[velocities].divergence (i, q);
+            const double        phi_i_p     = fe_values[pressure].value (i, q);
+
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+		const Tensor<1,dim> phi_j_u     = fe_values[velocities].value (j, q);
+		const double        div_phi_j_u = fe_values[velocities].divergence (j, q);
+		const double        phi_j_p     = fe_values[pressure].value (j, q);
+
+                local_matrix(i,j) += (phi_i_u * k_inverse_values[q] * phi_j_u
+                                      - div_phi_i_u * phi_j_p
+                                      - phi_i_p * div_phi_j_u) *
+                                     fe_values.JxW(q);
+              }
+
+            local_rhs(i) += -phi_i_p *
+                            rhs_values[q] *
+                            fe_values.JxW(q);
+          }
+ at endcode
+
+This very closely resembles the form in which we have originally written down
+the bilinear form and right hand side.
+
+There is one final term that we have to take care of: the right hand side
+contained the term $(g,{\mathbf v}\cdot {\mathbf n})_{\partial\Omega}$, constituting the
+weak enforcement of pressure boundary conditions. We have already seen in
+step-7 how to deal with face integrals: essentially exactly the same as with
+domain integrals, except that we have to use the FEFaceValues class
+instead of <code>FEValues</code>. To compute the boundary term we then simply have
+to loop over all boundary faces and integrate there. The mechanism works in
+the same way as above, i.e. the extractor classes also work on FEFaceValues objects:
+
+ at code
+      for (unsigned int face_no=0;
+	   face_no<GeometryInfo<dim>::faces_per_cell;
+	   ++face_no)
+	if (cell->at_boundary(face_no))
+	  {
+	    fe_face_values.reinit (cell, face_no);
+
+	    pressure_boundary_values
+	      .value_list (fe_face_values.get_quadrature_points(),
+			   boundary_values);
+
+	    for (unsigned int q=0; q<n_face_q_points; ++q)
+	      for (unsigned int i=0; i<dofs_per_cell; ++i)
+		local_rhs(i) += -(fe_face_values[velocities].value (i, q) *
+				  fe_face_values.normal_vector(q) *
+				  boundary_values[q] *
+				  fe_face_values.JxW(q));
+	  }
+ at endcode
+
+You will find the exact same code as above in the sources for the present
+program. We will therefore not comment much on it below.
+
+
+<h3>Linear solvers and preconditioners</h3>
+
+After assembling the linear system we are faced with the task of solving
+it. The problem here is: the matrix has a zero block at the bottom right
+(there is no term in the bilinear form that couples the pressure $p$ with the
+pressure test function $q$), and it is indefinite. At least it is
+symmetric. In other words: the Conjugate Gradient method is not going to
+work. We would have to resort to other iterative solvers instead, such as
+MinRes, SymmLQ, or GMRES, that can deal with indefinite systems. However, then
+the next problem immediately surfaces: due to the zero block, there are zeros
+on the diagonal and none of the usual preconditioners (Jacobi, SSOR) will work
+as they require division by diagonal elements.
+
+For the matrix sizes we expect to run with this program, the by far simplest
+approach would be to just use a direct solver (in particular, the 
+SparseDirectUMFPACK class that is bundled with deal.II). step-29 goes this
+route and shows that solving <i>any</i> linear system can be done in just
+3 or 4 lines of code. 
+
+But then, this is a tutorial: we teach how to do things. Consequently,
+in the following, we will introduce some techniques that can be used in cases
+like these. Namely, we will consider the linear system as not consisting of one
+large matrix and vectors, but we will want to decompose matrices 
+into <i>blocks</i> that correspond to the individual operators that appear in
+the system. We note that the resulting solver is not optimal -- there are much
+better ways, for example those explained in the results section of step-22 or
+the one we use in step-43 for a problem rather similar to the current one --
+but that the goal is to introduce techniques rather than optimal solvers.
+
+
+<h4>Solving using the Schur complement</h4>
+
+In view of the difficulties using standard solvers and preconditioners
+mentioned above, let us take another look at the matrix. If we sort our
+degrees of freedom so that all velocity come before all pressure variables,
+then we can subdivide the linear system $Ax=h$ into the following blocks:
+ at f{eqnarray*}
+  \left(\begin{array}{cc}
+    M & B \\ B^T & 0
+  \end{array}\right)
+  \left(\begin{array}{cc}
+    U \\ P
+  \end{array}\right)
+  =
+  \left(\begin{array}{cc}
+    F \\ G
+  \end{array}\right),
+ at f}
+where $U,P$ are the values of velocity and pressure degrees of freedom,
+respectively, $M$ is the mass matrix on the velocity space, $B^T$ corresponds to
+the negative divergence operator, and $B$ is its transpose and corresponds
+to the gradient.
+
+By block elimination, we can then re-order this system in the following way
+(multiply the first row of the system by $B^TM^{-1}$ and then subtract the
+second row from it):
+ at f{eqnarray*}
+  B^TM^{-1}B P &=& B^TM^{-1} F - G, \\
+  MU &=& F - BP.
+ at f}
+Here, the matrix $S=B^TM^{-1}B$ (called the <em>Schur complement</em> of $A$)
+is obviously symmetric and, owing to the positive definiteness of $M$ and the
+fact that $B$ has full column rank, $S$ is also positive
+definite.
+
+Consequently, if we could compute $S$, we could apply the Conjugate Gradient
+method to it. However, computing $S$ is expensive, and $S$ is in fact
+also a full matrix. On the other hand, the CG algorithm doesn't require
+us to actually have a representation of $S$, it is sufficient to form
+matrix-vector products with it. We can do so in steps: to compute $Sv=B^TM^{-1}Bv=B^T(M^{-1}(Bv))$, we
+<ol>
+ <li> form $w = B v$;
+ <li> solve $My = w$ for $y=M^{-1}w$, using the CG method applied to the
+  positive definite and symmetric mass matrix $M$;
+ <li> form $z=B^Ty$ to obtain $z=Sv$.
+</ol>
+Note how we evaluate the expression $B^TM^{-1}Bv$ right to left to
+avoid matrix-matrix products; this way, all we have to do is evaluate
+matrix-vector products.
+
+ at note The key point in this consideration is to recognize that to implement
+an iterative solver such as CG or GMRES, we never actually need the actual
+<i>elements</i> of a matrix! All that is required is that we can form
+matrix-vector products. The same is true for preconditioners. In deal.II
+we encode this requirement by only requiring that matrices and preconditioners
+given to solver classes have a <code>vmult()</code> member function that does
+the matrix-vector product. How a class chooses to implement this function is
+not important to the solver. Consequently, classes can implement it by,
+for example, doing a sequence of products and linear solves as discussed
+above.
+
+Using this strategy, we can then implement a class that provides the
+function <code>vmult()</code> that is all that the SolverCG class
+requires from an object representing a matrix. We can make our life a
+bit easier by also introducing an object that represents $M^{-1}$ and
+that has its own <code>vmult()</code> function that, if called, solves
+the linear system with $M$; in fact, such a class already exists in
+deal.II: this is accomplished by using the class
+IterativeInverse. Using it, the class that implements the Schur
+only needs to offer the <code>vmult()</code>
+function to perform a matrix-vector multiplication, using the algorithm
+above. Here are again the relevant parts of the code:
+
+ at code
+class SchurComplement
+{
+  public:
+    SchurComplement (const BlockSparseMatrix<double> &A,
+                     const IterativeInverse<Vector<double> > &Minv);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+    const SmartPointer<const IterativeInverse<Vector<double> > > m_inverse;
+
+    mutable Vector<double> tmp1, tmp2;
+};
+
+
+void SchurComplement::vmult (Vector<double>       &dst,
+                             const Vector<double> &src) const
+{
+  system_matrix->block(0,1).vmult (tmp1, src); // multiply with the top right block: B
+  m_inverse->vmult (tmp2, tmp1);               // multiply with M^-1
+  system_matrix->block(1,0).vmult (dst, tmp2); // multiply with the bottom left block: B^T
+}
+ at endcode
+
+In this code, the constructor takes a reference to a block sparse matrix for
+the entire system, and a reference to the object representing the inverse of
+the mass matrix. It stores these using <code>SmartPointer</code> objects (see
+step-7), and additionally allocates two temporary vectors <code>tmp1</code> and
+<code>tmp2</code> for the vectors labeled $w,y$ in the list above.
+
+In the matrix-vector multiplication function, the product $Sv$ is performed in
+exactly the order outlined above. Note how we access the blocks $B$ and $B^T$
+by calling <code>system_matrix->block(0,1)</code> and
+<code>system_matrix->block(1,0)</code> respectively, thereby picking out
+individual blocks of the block system. Multiplication by $M^{-1}$ happens
+using the object introduced above.
+
+With all this, we can go ahead and write down the solver we are going to
+use. Essentially, all we need to do is form the right hand sides of the two
+equations defining $P$ and $U$, and then solve them with the Schur complement
+matrix and the mass matrix, respectively:
+
+ at code
+template <int dim>
+void MixedLaplaceProblem<dim>::solve ()
+{
+  PreconditionIdentity identity;
+  IterativeInverse<Vector<double> > m_inverse;
+  m_inverse.initialize(system_matrix.block(0,0), identity);
+  m_inverse.solver.select("cg");
+  static ReductionControl inner_control(1000, 0., 1.e-13);
+  m_inverse.solver.set_control(inner_control);
+
+  Vector<double> tmp (solution.block(0).size());
+
+  {
+    Vector<double> schur_rhs (solution.block(1).size());
+
+    m_inverse.vmult (tmp, system_rhs.block(0));
+    system_matrix.block(1,0).vmult (schur_rhs, tmp);
+    schur_rhs -= system_rhs.block(1);
+
+    SolverControl solver_control (system_matrix.block(0,0).m(),
+                                  1e-6*schur_rhs.l2_norm());
+    SolverCG<>    cg (solver_control);
+
+    cg.solve (SchurComplement(system_matrix, m_inverse),
+              solution.block(1),
+              schur_rhs,
+              PreconditionIdentity());
+  }
+  {
+    system_matrix.block(0,1).vmult (tmp, solution.block(1));
+    tmp *= -1;
+    tmp += system_rhs.block(0);
+
+    m_inverse.vmult (solution.block(0), tmp);
+  }
+}
+ at endcode
+
+This code looks more impressive than it actually is. At the beginning, we
+declare an object representing $M^{-1}$ and a temporary vector (of the size of
+the first block of the solution, i.e. with as many entries as there are
+velocity unknowns), and the two blocks surrounded by braces then solve the two
+equations for $P$ and $U$, in this order. Most of the code in each of the two
+blocks is actually devoted to constructing the proper right hand sides. For
+the first equation, this would be $B^TM^{-1}F-G$, and $-BP+F$ for the second
+one. The first hand side is then solved with the Schur complement matrix, and
+the second simply multiplied with $M^{-1}$. The code as shown uses no
+preconditioner (i.e. the identity matrix as preconditioner) for the Schur
+complement.
+
+
+
+<h4>A preconditioner for the Schur complement</h4>
+
+One may ask whether it would help if we had a preconditioner for the Schur
+complement $S=B^TM^{-1}B$. The general answer, as usual, is: of course. The
+problem is only, we don't know anything about this Schur complement matrix. We
+do not know its entries, all we know is its action. On the other hand, we have
+to realize that our solver is expensive since in each iteration we have to do
+one matrix-vector product with the Schur complement, which means that we have
+to do invert the mass matrix once in each iteration.
+
+There are different approaches to preconditioning such a matrix. On the one
+extreme is to use something that is cheap to apply and therefore has no real
+impact on the work done in each iteration. The other extreme is a
+preconditioner that is itself very expensive, but in return really brings down
+the number of iterations required to solve with $S$.
+
+We will try something along the second approach, as much to improve the
+performance of the program as to demonstrate some techniques. To this end, let
+us recall that the ideal preconditioner is, of course, $S^{-1}$, but that is
+unattainable. However, how about
+ at f{eqnarray*}
+  \tilde S^{-1} = [B^T ({\textrm{diag}\ }M)^{-1}B]^{-1}
+ at f}
+as a preconditioner? That would mean that every time we have to do one
+preconditioning step, we actually have to solve with $\tilde S$. At first,
+this looks almost as expensive as solving with $S$ right away. However, note
+that in the inner iteration, we do not have to calculate $M^{-1}$, but only
+the inverse of its diagonal, which is cheap.
+
+The next step is to define a class that represents the approximate Schur
+complement. This should look very much like the Schur complement class itself,
+except that it doesn't need the object representing $M^{-1}$ any more
+since we can compute the inverse of the diagonal of $M$ on the fly:
+
+ at code
+class ApproximateSchurComplement : public Subscriptor
+{
+  public:
+    ApproximateSchurComplement (const BlockSparseMatrix<double> &A);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+
+    mutable Vector<double> tmp1, tmp2;
+};
+
+
+void ApproximateSchurComplement::vmult (Vector<double>       &dst,
+                                        const Vector<double> &src) const
+{
+  system_matrix->block(0,1).vmult (tmp1, src);
+  system_matrix->block(0,0).precondition_Jacobi (tmp2, tmp1);
+  system_matrix->block(1,0).vmult (dst, tmp2);
+}
+ at endcode
+
+Note how the <code>vmult</code> function differs in simply doing one Jacobi sweep
+(i.e. multiplying with the inverses of the diagonal) instead of multiplying
+with the full $M^{-1}$. (This is how a single Jacobi preconditioner
+step with $M$ is defined: it is the multiplication with the inverse of
+the diagonal of $M$; in other words, the operation $({\textrm{diag}\
+}M)^{-1}x$ on a vector $x$ is exactly what the function
+SparseMatrix::precondition_Jacobi above does.)
+
+With all this, we already have the preconditioner: it should be the inverse of
+the approximate Schur complement, i.e. we need code like this:
+
+ at code
+    ApproximateSchurComplement
+      approximate_schur_complement (system_matrix);
+
+    IterativeInverse<Vector<double> >
+      preconditioner;
+    preconditioner.initialize(approximate_schur_complement, identity);
+    preconditioner.solver.select("cg");
+    preconditioner.solver.set_control(inner_control);
+ at endcode
+
+That's all!
+
+Taken together, the first block of our <code>solve()</code> function will then
+look like this:
+
+ at code
+    Vector<double> schur_rhs (solution.block(1).size());
+
+    m_inverse.vmult (tmp, system_rhs.block(0));
+    system_matrix.block(1,0).vmult (schur_rhs, tmp);
+    schur_rhs -= system_rhs.block(1);
+
+    SchurComplement
+      schur_complement (system_matrix, m_inverse);
+
+    ApproximateSchurComplement
+      approximate_schur_complement (system_matrix);
+
+    IterativeInverse<Vector<double> >
+      preconditioner;
+    preconditioner.initialize(approximate_schur_complement, identity);
+    preconditioner.solver.select("cg");
+    preconditioner.solver.set_control(inner_control);
+
+    SolverControl solver_control (solution.block(1).size(),
+                                  1e-12*schur_rhs.l2_norm());
+    SolverCG<> cg (solver_control);
+
+    cg.solve (schur_complement, solution.block(1), schur_rhs,
+              preconditioner);
+ at endcode
+
+Note how we pass the so-defined preconditioner to the solver working on the
+Schur complement matrix.
+
+Obviously, applying this inverse of the approximate Schur complement is a very
+expensive preconditioner, almost as expensive as inverting the Schur
+complement itself. We can expect it to significantly reduce the number of
+outer iterations required for the Schur complement. In fact it does: in a
+typical run on 5 times refined meshes using elements of order 0, the number of
+outer iterations drops from 164 to 12. On the other hand, we now have to apply
+a very expensive preconditioner 12 times. A better measure is therefore simply
+the run-time of the program: on my laptop, it drops from 28 to 23 seconds for
+this test case. That doesn't seem too impressive, but the savings become more
+pronounced on finer meshes and with elements of higher order. For example, a
+six times refined mesh and using elements of order 2 yields an improvement of
+318 to 12 outer iterations, at a runtime of 338 seconds to 229 seconds. Not
+earth shattering, but significant.
+
+
+<h4>A remark on similar functionality in deal.II</h4>
+
+As a final remark about solvers and preconditioners, let us note that a
+significant amount of functionality introduced above is actually also present
+in the library itself. It probably even is more powerful and general, but we
+chose to introduce this material here anyway to demonstrate how to work with
+block matrices and to develop solvers and preconditioners, rather than using
+black box components from the library.
+
+For those interested in looking up the corresponding library classes: the Schur
+complement class corresponds to the <code>SchurMatrix</code> class.
+
+
+<h3>Definition of the test case</h3>
+
+In this tutorial program, we will solve the Laplace equation in mixed
+formulation as stated above. Since we want to monitor convergence of the
+solution inside the program, we choose right hand side, boundary conditions,
+and the coefficient so that we recover a solution function known to us. In
+particular, we choose the pressure solution
+ at f{eqnarray*}
+  p = -\left(\frac \alpha 2 xy^2 + \beta x - \frac \alpha 6 x^3\right),
+ at f}
+and for the coefficient we choose the unit matrix $K_{ij}=\delta_{ij}$ for
+simplicity. Consequently, the exact velocity satisfies
+ at f{eqnarray*}
+  {\mathbf u} =
+  \left(\begin{array}{cc}
+    \frac \alpha 2 y^2 + \beta - \frac \alpha 2 x^2 \\
+    \alpha xy
+  \end{array}\right).
+ at f}
+This solution was chosen since it is exactly divergence free, making it a
+realistic test case for incompressible fluid flow. By consequence, the right
+hand side equals $f=0$, and as boundary values we have to choose
+$g=p|_{\partial\Omega}$.
+
+For the computations in this program, we choose $\alpha=0.3,\beta=1$. You can
+find the resulting solution in the <a name="#Results">results section
+below</a>, after the commented program.
diff --git a/examples/step-20/doc/kind b/examples/step-20/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-20/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-20/doc/results.dox b/examples/step-20/doc/results.dox
new file mode 100644
index 0000000..1dcd7b1
--- /dev/null
+++ b/examples/step-20/doc/results.dox
@@ -0,0 +1,323 @@
+<h1>Results</h1>
+
+<h3>Output of the program and graphical visualization</h3>
+
+
+If we run the program as is, we get this output for the $8\times 8$
+mesh we use (for a total of 64 cells with 64 pressure degrees of
+freedom since we use piecewise constants, and 144 velocities because
+the Raviart-Thomas element defines one degree per freedom per face and
+there are 72 faces parallel to the $x$-axis and the same number
+parallel to the $y$-axis):
+ at code
+examples/\step-20> make run
+============================ Remaking Makefile.dep
+==============debug========= \step-20.cc
+============================ Linking \step-20
+============================ Running \step-20
+Number of active cells: 64
+Total number of cells: 85
+Number of degrees of freedom: 208 (144+64)
+15 CG Schur complement iterations to obtain convergence.
+Errors: ||e_p||_L2 = 0.178055,   ||e_u||_L2 = 0.0433435
+ at endcode
+
+The fact that the number of iterations is so small, of course, is due to good
+(but expensive!) preconditioner we have developed. To get confidence in the
+solution, let us take a look at it. The following three images show (from left
+to right) the x-velocity, the y-velocity, and the pressure:
+
+<img src="http://www.dealii.org/images/steps/developer/step-20.u.png" alt="">
+<img src="http://www.dealii.org/images/steps/developer/step-20.v.png" alt="">
+<img src="http://www.dealii.org/images/steps/developer/step-20.p.png" alt="">
+
+
+
+Let us start with the pressure: it is highest at the left and lowest at the
+right, so flow will be from left to right. In addition, though hardly visible
+in the graph, we have chosen the pressure field such that the flow left-right
+flow first channels towards the center and then outward again. Consequently,
+the x-velocity has to increase to get the flow through the narrow part,
+something that can easily be seen in the left image. The middle image
+represents inward flow in y-direction at the left end of the domain, and
+outward flow in y-direction at the right end of the domain.
+
+
+
+As an additional remark, note how the x-velocity in the left image is only
+continuous in x-direction, whereas the y-velocity is continuous in
+y-direction. The flow fields are discontinuous in the other directions. This
+very obviously reflects the continuity properties of the Raviart-Thomas
+elements, which are, in fact, only in the space H(div) and not in the space
+$H^1$. Finally, the pressure field is completely discontinuous, but
+that should not surprise given that we have chosen <code>FE_DGQ(0)</code> as
+the finite element for that solution component.
+
+
+
+<h3>Convergence</h3>
+
+
+The program offers two obvious places where playing and observing convergence
+is in order: the degree of the finite elements used (passed to the constructor
+of the <code>MixedLaplaceProblem</code> class from <code>main()</code>), and
+the refinement level (determined in
+<code>MixedLaplaceProblem::make_grid_and_dofs</code>). What one can do is to
+change these values and observe the errors computed later on in the course of
+the program run.
+
+
+
+If one does this, one finds the following pattern for the $L_2$ error
+in the pressure variable:
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td></td>
+    <td colspan="3" align="center">Finite element order</td>
+  </tr>
+
+  <tr>
+    <td>Refinement level</td>
+    <td>0</td>
+    <td>1</td>
+    <td>2</td>
+  </tr>
+
+  <tr>
+    <td>0</td>  <td>1.45344</td>  <td>0.0831743</td>  <td>0.0235186</td>
+  </tr>
+
+  <tr>
+    <td>1</td>  <td>0.715099</td>  <td>0.0245341</td>  <td>0.00293983</td>
+  </tr>
+
+  <tr>
+    <td>2</td>  <td>0.356383</td>  <td>0.0063458</td>  <td>0.000367478</td>
+  </tr>
+
+  <tr>
+    <td>3</td>  <td>0.178055</td>  <td>0.00159944</td>  <td>4.59349e-05</td>
+  </tr>
+
+  <tr>
+    <td>4</td>  <td>0.0890105</td>  <td>0.000400669</td>  <td>5.74184e-06</td>
+  </tr>
+
+  <tr>
+    <td>5</td>  <td>0.0445032</td>  <td>0.000100218</td>  <td>7.17799e-07</td>
+  </tr>
+
+  <tr>
+    <td>6</td>  <td>0.0222513</td>  <td>2.50576e-05</td>  <td>9.0164e-08</td>
+  </tr>
+
+  <tr>
+    <td></td>  <td>$O(h)$</td>  <td>$O(h^2)$</td>  <td>$O(h^3)$</td>
+  </tr>
+</table>
+
+The theoretically expected convergence orders are very nicely reflected by the
+experimentally observed ones indicated in the last row of the table.
+
+
+
+One can make the same experiment with the $L_2$ error
+in the velocity variables:
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td></td>
+    <td colspan="3" align="center">Finite element order</td>
+  </tr>
+
+  <tr>
+    <td>Refinement level</td>
+    <td>0</td>
+    <td>1</td>
+    <td>2</td>
+  </tr>
+
+  <tr>
+    <td>0</td> <td>0.367423</td> <td>0.127657</td> <td>5.10388e-14</td>
+  </tr>
+
+  <tr>
+    <td>1</td> <td>0.175891</td> <td>0.0319142</td> <td>9.04414e-15</td>
+  </tr>
+
+  <tr>
+    <td>2</td> <td>0.0869402</td> <td>0.00797856</td> <td>1.23723e-14</td>
+  </tr>
+
+  <tr>
+    <td>3</td> <td>0.0433435</td> <td>0.00199464</td> <td>1.86345e-07</td>
+  </tr>
+
+  <tr>
+    <td>4</td> <td>0.0216559</td> <td>0.00049866</td> <td>2.72566e-07</td>
+  </tr>
+
+  <tr>
+    <td>5</td> <td>0.010826</td> <td>0.000124664</td> <td>3.57141e-07</td>
+  </tr>
+
+  <tr>
+    <td>6</td> <td>0.00541274</td> <td>3.1166e-05</td> <td>4.46124e-07</td>
+  </tr>
+
+  <tr>
+    <td></td>  <td>$O(h)$</td>  <td>$O(h^2)$</td>  <td>$O(h^3)$</td>
+  </tr>
+</table>
+The result concerning the convergence order is the same here.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+<h4>More realistic permeability fields</h4>
+
+Realistic flow computations for ground water or oil reservoir simulations will
+not use a constant permeability. Here's a first, rather simple way to change
+this situation: we use a permeability that decays very rapidly away from a
+central flowline until it hits a background value of 0.001. This is to mimic
+the behavior of fluids in sandstone: in most of the domain, the sandstone is
+homogeneous and, while permeable to fluids, not overly so; on the other stone,
+the stone has cracked, or faulted, along one line, and the fluids flow much
+easier along this large crack. Here is how we could implement something like
+this:
+ at code
+template <int dim>
+void
+KInverse<dim>::value_list (const std::vector<Point<dim> > &points,
+                           std::vector<Tensor<2,dim> >    &values) const
+{
+  Assert (points.size() == values.size(),
+	  ExcDimensionMismatch (points.size(), values.size()));
+
+  for (unsigned int p=0; p<points.size(); ++p)
+    {
+      values[p].clear ();
+
+      const double distance_to_flowline
+        = std::fabs(points[p][1]-0.2*std::sin(10*points[p][0]));
+
+      const double permeability = std::max(std::exp(-(distance_to_flowline*
+                                                      distance_to_flowline)
+                                                    / (0.1 * 0.1)),
+                                           0.001);
+
+      for (unsigned int d=0; d<dim; ++d)
+	values[p][d][d] = 1./permeability;
+    }
+}
+ at endcode
+Remember that the function returns the inverse of the permeability tensor.
+
+
+
+With a significantly higher mesh resolution, we can visualize this, here with
+x- and y-velocity:
+
+<table style="width:60%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-20.u-wiggle.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-20.v-wiggle.png" alt=""></td>
+  </tr>
+</table>
+
+It is obvious how fluids flow essentially only along the middle line, and not
+anywhere else.
+
+
+
+Another possibility would be to use a random permeability field. A simple way
+to achieve this would be to scatter a number of centers around the domain and
+then use a permeability field that is the sum of (negative) exponentials for
+each of these centers. Flow would then try to hop from one center of high
+permeability to the next one. This is an entirely unscientific attempt at
+describing a random medium, but one possibility to implement this behavior
+would look like this:
+ at code
+template <int dim>
+class KInverse : public TensorFunction<2,dim>
+{
+  public:
+    KInverse ();
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+			     std::vector<Tensor<2,dim> >    &values) const;
+
+  private:
+    std::vector<Point<dim> > centers;
+};
+
+
+template <int dim>
+KInverse<dim>::KInverse ()
+{
+  const unsigned int N = 40;
+  centers.resize (N);
+  for (unsigned int i=0; i<N; ++i)
+    for (unsigned int d=0; d<dim; ++d)
+      centers[i][d] = 2.*rand()/RAND_MAX-1;
+}
+
+
+template <int dim>
+void
+KInverse<dim>::value_list (const std::vector<Point<dim> > &points,
+                           std::vector<Tensor<2,dim> >    &values) const
+{
+  Assert (points.size() == values.size(),
+	  ExcDimensionMismatch (points.size(), values.size()));
+
+  for (unsigned int p=0; p<points.size(); ++p)
+    {
+      values[p].clear ();
+
+      double permeability = 0;
+      for (unsigned int i=0; i<centers.size(); ++i)
+        permeability += std::exp(-(points[p]-centers[i]).square()
+                                 / (0.1 * 0.1));
+
+      const double normalized_permeability
+        = std::max(permeability, 0.005);
+
+      for (unsigned int d=0; d<dim; ++d)
+	values[p][d][d] = 1./normalized_permeability;
+    }
+}
+ at endcode
+
+A piecewise constant interpolation of the diagonal elements of the
+inverse of this tensor (i.e., of <code>normalized_permeability</code>)
+looks as follows:
+
+<img src="http://www.dealii.org/images/steps/developer/step-20.k-random.png" alt="">
+
+
+With a permeability field like this, we would get x-velocities and pressures as
+follows:
+
+<table style="width:60%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-20.u-random.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-20.p-random.png" alt=""></td>
+  </tr>
+</table>
+
+We will use these permeability fields again in step-21 and step-43.
+
+
+<h4>Better linear solvers</h4>
+
+As mentioned in the introduction, the Schur complement solver used here is not
+the best one conceivable (nor is it intended to be a particularly good
+one). Better ones can be found in the literature and can be built using the
+same block matrix techniques that were introduced here. We pick up on this
+theme again in step-22, where we first build a Schur complement solver for the
+Stokes equation as we did here, and then in the <a
+href="step_22.html#improved-solver">Improved Solvers</a> section discuss better
+ways based on solving the system as a whole but preconditioning based on
+individual blocks. We will also come back to this in step-43.
diff --git a/examples/step-20/doc/tooltip b/examples/step-20/doc/tooltip
new file mode 100644
index 0000000..ff05e0b
--- /dev/null
+++ b/examples/step-20/doc/tooltip
@@ -0,0 +1 @@
+Mixed finite elements for the mixed Laplacian. Block solvers.
diff --git a/examples/step-20/step-20.cc b/examples/step-20/step-20.cc
new file mode 100644
index 0000000..7c01c50
--- /dev/null
+++ b/examples/step-20/step-20.cc
@@ -0,0 +1,968 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2005 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, Texas A&M University, 2005, 2006
+ */
+
+
+// @sect3{Include files}
+
+// Since this program is only an adaptation of step-4, there is not much new
+// stuff in terms of header files. In deal.II, we usually list include files
+// in the order base-lac-grid-dofs-fe-numerics, followed by C++ standard
+// include files:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+// For our Schur complement solver, we need two new objects. One is a matrix
+// object which acts as the inverse of a matrix by calling an iterative
+// solver.
+#include <deal.II/lac/iterative_inverse.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+#include <iostream>
+
+// This is the only significant new header, namely the one in which the
+// Raviart-Thomas finite element is declared:
+#include <deal.II/fe/fe_raviart_thomas.h>
+
+// Finally, as a bonus in this program, we will use a tensorial
+// coefficient. Since it may have a spatial dependence, we consider it a
+// tensor-valued function. The following include file provides the
+// <code>TensorFunction</code> class that offers such functionality:
+#include <deal.II/base/tensor_function.h>
+
+// The last step is as in all previous programs:
+namespace Step20
+{
+  using namespace dealii;
+
+  // @sect3{The <code>MixedLaplaceProblem</code> class template}
+
+  // Again, since this is an adaptation of step-6, the main class is almost
+  // the same as the one in that tutorial program. In terms of member
+  // functions, the main differences are that the constructor takes the degree
+  // of the Raviart-Thomas element as an argument (and that there is a
+  // corresponding member variable to store this value) and the addition of
+  // the <code>compute_error</code> function in which, no surprise, we will
+  // compute the difference between the exact and the numerical solution to
+  // determine convergence of our computations:
+  template <int dim>
+  class MixedLaplaceProblem
+  {
+  public:
+    MixedLaplaceProblem (const unsigned int degree);
+    void run ();
+
+  private:
+    void make_grid_and_dofs ();
+    void assemble_system ();
+    void solve ();
+    void compute_errors () const;
+    void output_results () const;
+
+    const unsigned int   degree;
+
+    Triangulation<dim>   triangulation;
+    FESystem<dim>        fe;
+    DoFHandler<dim>      dof_handler;
+
+    // The second difference is that the sparsity pattern, the system matrix,
+    // and solution and right hand side vectors are now blocked. What this
+    // means and what one can do with such objects is explained in the
+    // introduction to this program as well as further down below when we
+    // explain the linear solvers and preconditioners for this problem:
+    BlockSparsityPattern      sparsity_pattern;
+    BlockSparseMatrix<double> system_matrix;
+
+    BlockVector<double>       solution;
+    BlockVector<double>       system_rhs;
+  };
+
+
+  // @sect3{Right hand side, boundary values, and exact solution}
+
+  // Our next task is to define the right hand side of our problem (i.e., the
+  // scalar right hand side for the pressure in the original Laplace
+  // equation), boundary values for the pressure, as well as a function that
+  // describes both the pressure and the velocity of the exact solution for
+  // later computations of the error. Note that these functions have one, one,
+  // and <code>dim+1</code> components, respectively, and that we pass the
+  // number of components down to the <code>Function@<dim@></code> base
+  // class. For the exact solution, we only declare the function that actually
+  // returns the entire solution vector (i.e. all components of it) at
+  // once. Here are the respective declarations:
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  class PressureBoundaryValues : public Function<dim>
+  {
+  public:
+    PressureBoundaryValues () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+  template <int dim>
+  class ExactSolution : public Function<dim>
+  {
+  public:
+    ExactSolution () : Function<dim>(dim+1) {}
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+  };
+
+
+  // And then we also have to define these respective functions, of
+  // course. Given our discussion in the introduction of how the solution
+  // should look like, the following computations should be straightforward:
+  template <int dim>
+  double RightHandSide<dim>::value (const Point<dim>  &/*p*/,
+                                    const unsigned int /*component*/) const
+  {
+    return 0;
+  }
+
+
+
+  template <int dim>
+  double PressureBoundaryValues<dim>::value (const Point<dim>  &p,
+                                             const unsigned int /*component*/) const
+  {
+    const double alpha = 0.3;
+    const double beta = 1;
+    return -(alpha*p[0]*p[1]*p[1]/2 + beta*p[0] - alpha*p[0]*p[0]*p[0]/6);
+  }
+
+
+
+  template <int dim>
+  void
+  ExactSolution<dim>::vector_value (const Point<dim> &p,
+                                    Vector<double>   &values) const
+  {
+    Assert (values.size() == dim+1,
+            ExcDimensionMismatch (values.size(), dim+1));
+
+    const double alpha = 0.3;
+    const double beta = 1;
+
+    values(0) = alpha*p[1]*p[1]/2 + beta - alpha*p[0]*p[0]/2;
+    values(1) = alpha*p[0]*p[1];
+    values(2) = -(alpha*p[0]*p[1]*p[1]/2 + beta*p[0] - alpha*p[0]*p[0]*p[0]/6);
+  }
+
+
+
+  // @sect3{The inverse permeability tensor}
+
+  // In addition to the other equation data, we also want to use a
+  // permeability tensor, or better -- because this is all that appears in the
+  // weak form -- the inverse of the permeability tensor,
+  // <code>KInverse</code>. For the purpose of verifying the exactness of the
+  // solution and determining convergence orders, this tensor is more in the
+  // way than helpful. We will therefore simply set it to the identity matrix.
+  //
+  // However, a spatially varying permeability tensor is indispensable in
+  // real-life porous media flow simulations, and we would like to use the
+  // opportunity to demonstrate the technique to use tensor valued functions.
+  //
+  // Possibly unsurprising, deal.II also has a base class not only for scalar
+  // and generally vector-valued functions (the <code>Function</code> base
+  // class) but also for functions that return tensors of fixed dimension and
+  // rank, the <code>TensorFunction</code> template. Here, the function under
+  // consideration returns a dim-by-dim matrix, i.e. a tensor of rank 2 and
+  // dimension <code>dim</code>. We then choose the template arguments of the
+  // base class appropriately.
+  //
+  // The interface that the <code>TensorFunction</code> class provides is
+  // essentially equivalent to the <code>Function</code> class. In particular,
+  // there exists a <code>value_list</code> function that takes a list of
+  // points at which to evaluate the function, and returns the values of the
+  // function in the second argument, a list of tensors:
+  template <int dim>
+  class KInverse : public TensorFunction<2,dim>
+  {
+  public:
+    KInverse () : TensorFunction<2,dim>() {}
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<Tensor<2,dim> >    &values) const;
+  };
+
+
+  // The implementation is less interesting. As in previous examples, we add a
+  // check to the beginning of the class to make sure that the sizes of input
+  // and output parameters are the same (see step-5 for a discussion of this
+  // technique). Then we loop over all evaluation points, and for each one
+  // first clear the output tensor and then set all its diagonal elements to
+  // one (i.e. fill the tensor with the identity matrix):
+  template <int dim>
+  void
+  KInverse<dim>::value_list (const std::vector<Point<dim> > &points,
+                             std::vector<Tensor<2,dim> >    &values) const
+  {
+    Assert (points.size() == values.size(),
+            ExcDimensionMismatch (points.size(), values.size()));
+
+    for (unsigned int p=0; p<points.size(); ++p)
+      {
+        values[p].clear ();
+
+        for (unsigned int d=0; d<dim; ++d)
+          values[p][d][d] = 1.;
+      }
+  }
+
+
+
+  // @sect3{MixedLaplaceProblem class implementation}
+
+  // @sect4{MixedLaplaceProblem::MixedLaplaceProblem}
+
+  // In the constructor of this class, we first store the value that was
+  // passed in concerning the degree of the finite elements we shall use (a
+  // degree of zero, for example, means to use RT(0) and DG(0)), and then
+  // construct the vector valued element belonging to the space $X_h$ described
+  // in the introduction. The rest of the constructor is as in the early
+  // tutorial programs.
+  //
+  // The only thing worth describing here is the constructor call of the
+  // <code>fe</code> variable. The <code>FESystem</code> class to which this
+  // variable belongs has a number of different constructors that all refer to
+  // binding simpler elements together into one larger element. In the present
+  // case, we want to couple a single RT(degree) element with a single
+  // DQ(degree) element. The constructor to <code>FESystem</code> that does
+  // this requires us to specify first the first base element (the
+  // <code>FE_RaviartThomas</code> object of given degree) and then the number
+  // of copies for this base element, and then similarly the kind and number
+  // of <code>FE_DGQ</code> elements. Note that the Raviart-Thomas element
+  // already has <code>dim</code> vector components, so that the coupled
+  // element will have <code>dim+1</code> vector components, the first
+  // <code>dim</code> of which correspond to the velocity variable whereas the
+  // last one corresponds to the pressure.
+  //
+  // It is also worth comparing the way we constructed this element from its
+  // base elements, with the way we have done so in step-8: there, we have
+  // built it as <code>fe (FE_Q@<dim@>(1), dim)</code>, i.e. we have simply
+  // used <code>dim</code> copies of the <code>FE_Q(1)</code> element, one
+  // copy for the displacement in each coordinate direction.
+  template <int dim>
+  MixedLaplaceProblem<dim>::MixedLaplaceProblem (const unsigned int degree)
+    :
+    degree (degree),
+    fe (FE_RaviartThomas<dim>(degree), 1,
+        FE_DGQ<dim>(degree), 1),
+    dof_handler (triangulation)
+  {}
+
+
+
+  // @sect4{MixedLaplaceProblem::make_grid_and_dofs}
+
+  // This next function starts out with well-known functions calls that create
+  // and refine a mesh, and then associate degrees of freedom with it:
+  template <int dim>
+  void MixedLaplaceProblem<dim>::make_grid_and_dofs ()
+  {
+    GridGenerator::hyper_cube (triangulation, -1, 1);
+    triangulation.refine_global (3);
+
+    dof_handler.distribute_dofs (fe);
+
+    // However, then things become different. As mentioned in the
+    // introduction, we want to subdivide the matrix into blocks corresponding
+    // to the two different kinds of variables, velocity and pressure. To this
+    // end, we first have to make sure that the indices corresponding to
+    // velocities and pressures are not intermingled: First all velocity
+    // degrees of freedom, then all pressure DoFs. This way, the global matrix
+    // separates nicely into a $2 \times 2$ system. To achieve this, we have to
+    // renumber degrees of freedom base on their vector component, an
+    // operation that conveniently is already implemented:
+    DoFRenumbering::component_wise (dof_handler);
+
+    // The next thing is that we want to figure out the sizes of these blocks
+    // so that we can allocate an appropriate amount of space. To this end, we
+    // call the DoFTools::count_dofs_per_component() function that
+    // counts how many shape functions are non-zero for a particular vector
+    // component. We have <code>dim+1</code> vector components, and
+    // DoFTools::count_dofs_per_component() will count how many shape functions
+    // belong to each of these components.
+    //
+    // There is one problem here. As described in the documentation of that
+    // function, it <i>wants</i> to put the number of $x$-velocity shape
+    // functions into <code>dofs_per_component[0]</code>, the number of
+    // $y$-velocity shape functions into <code>dofs_per_component[1]</code>
+    // (and similar in 3d), and the number of pressure shape functions into
+    // <code>dofs_per_component[dim]</code>. But, the Raviart-Thomas element
+    // is special in that it is non- at ref GlossPrimitive "primitive", i.e.,
+    // for Raviart-Thomas elements all velocity shape functions
+    // are nonzero in all components. In other words, the function cannot
+    // distinguish between $x$ and $y$ velocity functions because there
+    // <i>is</i> no such distinction. It therefore puts the overall number
+    // of velocity into each of <code>dofs_per_component[c]</code>,
+    // $0\le c\le \text{dim}$. On the other hand, the number
+    // of pressure variables equals the number of shape functions that are
+    // nonzero in the dim-th component.
+    //
+    // Using this knowledge, we can get the number of velocity shape
+    // functions from any of the first <code>dim</code> elements of
+    // <code>dofs_per_component</code>, and then use this below to initialize
+    // the vector and matrix block sizes, as well as create output.
+    //
+    // @note If you find this concept difficult to understand, you may
+    // want to consider using the function DoFTools::count_dofs_per_block()
+    // instead, as we do in the corresponding piece of code in step-22.
+    // You might also want to read up on the difference between
+    // @ref GlossBlock "blocks" and @ref GlossComponent "components"
+    // in the glossary.
+    std::vector<types::global_dof_index> dofs_per_component (dim+1);
+    DoFTools::count_dofs_per_component (dof_handler, dofs_per_component);
+    const unsigned int n_u = dofs_per_component[0],
+                       n_p = dofs_per_component[dim];
+
+    std::cout << "Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl
+              << "Total number of cells: "
+              << triangulation.n_cells()
+              << std::endl
+              << "Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << " (" << n_u << '+' << n_p << ')'
+              << std::endl;
+
+    // The next task is to allocate a sparsity pattern for the matrix that we
+    // will create. We use a compressed sparsity pattern like in the previous
+    // steps, but as <code>system_matrix</code> is a block matrix we use the
+    // class <code>BlockDynamicSparsityPattern</code> instead of just
+    // <code>DynamicSparsityPattern</code>. This block sparsity pattern has
+    // four blocks in a $2 \times 2$ pattern. The blocks' sizes depend on
+    // <code>n_u</code> and <code>n_p</code>, which hold the number of velocity
+    // and pressure variables. In the second step we have to instruct the block
+    // system to update its knowledge about the sizes of the blocks it manages;
+    // this happens with the <code>dsp.collect_sizes ()</code> call.
+    BlockDynamicSparsityPattern dsp(2, 2);
+    dsp.block(0, 0).reinit (n_u, n_u);
+    dsp.block(1, 0).reinit (n_p, n_u);
+    dsp.block(0, 1).reinit (n_u, n_p);
+    dsp.block(1, 1).reinit (n_p, n_p);
+    dsp.collect_sizes ();
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+
+    // We use the compressed block sparsity pattern in the same way as the
+    // non-block version to create the sparsity pattern and then the system
+    // matrix:
+    sparsity_pattern.copy_from(dsp);
+    system_matrix.reinit (sparsity_pattern);
+
+    // Then we have to resize the solution and right hand side vectors in
+    // exactly the same way as the block compressed sparsity pattern:
+    solution.reinit (2);
+    solution.block(0).reinit (n_u);
+    solution.block(1).reinit (n_p);
+    solution.collect_sizes ();
+
+    system_rhs.reinit (2);
+    system_rhs.block(0).reinit (n_u);
+    system_rhs.block(1).reinit (n_p);
+    system_rhs.collect_sizes ();
+  }
+
+
+  // @sect4{MixedLaplaceProblem::assemble_system}
+
+  // Similarly, the function that assembles the linear system has mostly been
+  // discussed already in the introduction to this example. At its top, what
+  // happens are all the usual steps, with the addition that we do not only
+  // allocate quadrature and <code>FEValues</code> objects for the cell terms,
+  // but also for face terms. After that, we define the usual abbreviations
+  // for variables, and the allocate space for the local matrix and right hand
+  // side contributions, and the array that holds the global numbers of the
+  // degrees of freedom local to the present cell.
+  template <int dim>
+  void MixedLaplaceProblem<dim>::assemble_system ()
+  {
+    QGauss<dim>   quadrature_formula(degree+2);
+    QGauss<dim-1> face_quadrature_formula(degree+2);
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values    | update_gradients |
+                             update_quadrature_points  | update_JxW_values);
+    FEFaceValues<dim> fe_face_values (fe, face_quadrature_formula,
+                                      update_values    | update_normal_vectors |
+                                      update_quadrature_points  | update_JxW_values);
+
+    const unsigned int   dofs_per_cell   = fe.dofs_per_cell;
+    const unsigned int   n_q_points      = quadrature_formula.size();
+    const unsigned int   n_face_q_points = face_quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    // The next step is to declare objects that represent the source term,
+    // pressure boundary value, and coefficient in the equation. In addition
+    // to these objects that represent continuous functions, we also need
+    // arrays to hold their values at the quadrature points of individual
+    // cells (or faces, for the boundary values). Note that in the case of the
+    // coefficient, the array has to be one of matrices.
+    const RightHandSide<dim>          right_hand_side;
+    const PressureBoundaryValues<dim> pressure_boundary_values;
+    const KInverse<dim>               k_inverse;
+
+    std::vector<double> rhs_values (n_q_points);
+    std::vector<double> boundary_values (n_face_q_points);
+    std::vector<Tensor<2,dim> > k_inverse_values (n_q_points);
+
+    // Finally, we need a couple of extractors that we will use to get at the
+    // velocity and pressure components of vector-valued shape
+    // functions. Their function and use is described in detail in the @ref
+    // vector_valued report. Essentially, we will use them as subscripts on
+    // the FEValues objects below: the FEValues object describes all vector
+    // components of shape functions, while after subscription, it will only
+    // refer to the velocities (a set of <code>dim</code> components starting
+    // at component zero) or the pressure (a scalar component located at
+    // position <code>dim</code>):
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    // With all this in place, we can go on with the loop over all cells. The
+    // body of this loop has been discussed in the introduction, and will not
+    // be commented any further here:
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        local_matrix = 0;
+        local_rhs = 0;
+
+        right_hand_side.value_list (fe_values.get_quadrature_points(),
+                                    rhs_values);
+        k_inverse.value_list (fe_values.get_quadrature_points(),
+                              k_inverse_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const Tensor<1,dim> phi_i_u     = fe_values[velocities].value (i, q);
+              const double        div_phi_i_u = fe_values[velocities].divergence (i, q);
+              const double        phi_i_p     = fe_values[pressure].value (i, q);
+
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                {
+                  const Tensor<1,dim> phi_j_u     = fe_values[velocities].value (j, q);
+                  const double        div_phi_j_u = fe_values[velocities].divergence (j, q);
+                  const double        phi_j_p     = fe_values[pressure].value (j, q);
+
+                  local_matrix(i,j) += (phi_i_u * k_inverse_values[q] * phi_j_u
+                                        - div_phi_i_u * phi_j_p
+                                        - phi_i_p * div_phi_j_u)
+                                       * fe_values.JxW(q);
+                }
+
+              local_rhs(i) += -phi_i_p *
+                              rhs_values[q] *
+                              fe_values.JxW(q);
+            }
+
+        for (unsigned int face_no=0;
+             face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          if (cell->at_boundary(face_no))
+            {
+              fe_face_values.reinit (cell, face_no);
+
+              pressure_boundary_values
+              .value_list (fe_face_values.get_quadrature_points(),
+                           boundary_values);
+
+              for (unsigned int q=0; q<n_face_q_points; ++q)
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  local_rhs(i) += -(fe_face_values[velocities].value (i, q) *
+                                    fe_face_values.normal_vector(q) *
+                                    boundary_values[q] *
+                                    fe_face_values.JxW(q));
+            }
+
+        // The final step in the loop over all cells is to transfer local
+        // contributions into the global matrix and right hand side
+        // vector. Note that we use exactly the same interface as in previous
+        // examples, although we now use block matrices and vectors instead of
+        // the regular ones. In other words, to the outside world, block
+        // objects have the same interface as matrices and vectors, but they
+        // additionally allow to access individual blocks.
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            system_matrix.add (local_dof_indices[i],
+                               local_dof_indices[j],
+                               local_matrix(i,j));
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          system_rhs(local_dof_indices[i]) += local_rhs(i);
+      }
+  }
+
+
+  // @sect3{Linear solvers and preconditioners}
+
+  // The linear solvers and preconditioners we use in this example have been
+  // discussed in significant detail already in the introduction. We will
+  // therefore not discuss the rationale for these classes here any more, but
+  // rather only comment on implementational aspects.
+
+
+  // @sect4{The <code>SchurComplement</code> class template}
+
+  // The next class is the Schur complement class. Its rationale has also been
+  // discussed in length in the introduction. The only things we would like to
+  // note is that the class, too, is derived from the <code>Subscriptor</code>
+  // class and that as mentioned above it stores pointers to the entire block
+  // matrix and the inverse of the mass matrix block using
+  // <code>SmartPointer</code> objects.
+  //
+  // The <code>vmult</code> function requires two temporary vectors that we do
+  // not want to re-allocate and free every time we call this function. Since
+  // here, we have full control over the use of these vectors (unlike above,
+  // where a class called by the <code>vmult</code> function required these
+  // vectors, not the <code>vmult</code> function itself), we allocate them
+  // directly, rather than going through the <code>VectorMemory</code>
+  // mechanism. However, again, these member variables do not carry any state
+  // between successive calls to the member functions of this class (i.e., we
+  // never care what values they were set to the last time a member function
+  // was called), we mark these vectors as <code>mutable</code>.
+  //
+  // The rest of the (short) implementation of this class is straightforward
+  // if you know the order of matrix-vector multiplications performed by the
+  // <code>vmult</code> function:
+  class SchurComplement : public Subscriptor
+  {
+  public:
+    SchurComplement (const BlockSparseMatrix<double> &A,
+                     const IterativeInverse<Vector<double> > &Minv);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+    const SmartPointer<const IterativeInverse<Vector<double> > > m_inverse;
+
+    mutable Vector<double> tmp1, tmp2;
+  };
+
+
+  SchurComplement::SchurComplement (const BlockSparseMatrix<double> &A,
+                                    const IterativeInverse<Vector<double> > &Minv)
+    :
+    system_matrix (&A),
+    m_inverse (&Minv),
+    tmp1 (A.block(0,0).m()),
+    tmp2 (A.block(0,0).m())
+  {}
+
+
+  void SchurComplement::vmult (Vector<double>       &dst,
+                               const Vector<double> &src) const
+  {
+    system_matrix->block(0,1).vmult (tmp1, src);
+    m_inverse->vmult (tmp2, tmp1);
+    system_matrix->block(1,0).vmult (dst, tmp2);
+  }
+
+
+  // @sect4{The <code>ApproximateSchurComplement</code> class template}
+
+  // The third component of our solver and preconditioner system is the class
+  // that approximates the Schur complement so we can form a an InverseIterate
+  // object that approximates the inverse of the Schur complement. It follows
+  // the same pattern as the Schur complement class, with the only exception
+  // that we do not multiply with the inverse mass matrix in
+  // <code>vmult</code>, but rather just do a single Jacobi
+  // step. Consequently, the class also does not have to store a pointer to an
+  // inverse mass matrix object.
+  //
+  // We will later use this class as a template argument to the
+  // IterativeInverse class which will in turn want to use it as a
+  // template argument for the PointerMatrix class. The latter class
+  // has a function that requires us to also write a function that
+  // provides the product with the transpose of the matrix this object
+  // represents. As a consequence, in the code below, we also
+  // implement a <tt>Tvmult</tt> function here that represents the
+  // product of the transpose matrix with a vector. It is easy to see
+  // how this needs to be implemented here: since the matrix is
+  // symmetric, we can as well call <code>vmult</code> wherever the
+  // product with the transpose matrix is required. (Note, however,
+  // that even though we implement this function here, there will in
+  // fact not be any need for it as long as we use SolverCG as the
+  // solver since that solver does not ever call the function that
+  // provides this operation.)
+  class ApproximateSchurComplement : public Subscriptor
+  {
+  public:
+    ApproximateSchurComplement (const BlockSparseMatrix<double> &A);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+    void Tvmult (Vector<double>       &dst,
+                 const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+
+    mutable Vector<double> tmp1, tmp2;
+  };
+
+
+  ApproximateSchurComplement::ApproximateSchurComplement (const BlockSparseMatrix<double> &A)
+    :
+    system_matrix (&A),
+    tmp1 (A.block(0,0).m()),
+    tmp2 (A.block(0,0).m())
+  {}
+
+
+  void ApproximateSchurComplement::vmult (Vector<double>       &dst,
+                                          const Vector<double> &src) const
+  {
+    system_matrix->block(0,1).vmult (tmp1, src);
+    system_matrix->block(0,0).precondition_Jacobi (tmp2, tmp1);
+    system_matrix->block(1,0).vmult (dst, tmp2);
+  }
+
+
+  void ApproximateSchurComplement::Tvmult (Vector<double>       &dst,
+                                           const Vector<double> &src) const
+  {
+    vmult (dst, src);
+  }
+
+
+
+  // @sect4{MixedLaplace::solve}
+
+  // After all these preparations, we can finally write the function that
+  // actually solves the linear problem. We will go through the two parts it
+  // has that each solve one of the two equations, the first one for the
+  // pressure (component 1 of the solution), then the velocities (component 0
+  // of the solution). Both parts need an object representing the inverse mass
+  // matrix and an auxiliary vector, and we therefore declare these objects at
+  // the beginning of this function.
+  template <int dim>
+  void MixedLaplaceProblem<dim>::solve ()
+  {
+    PreconditionIdentity identity;
+    IterativeInverse<Vector<double> > m_inverse;
+    m_inverse.initialize(system_matrix.block(0,0), identity);
+    m_inverse.solver.select("cg");
+    static ReductionControl inner_control(1000, 0., 1.e-13);
+    m_inverse.solver.set_control(inner_control);
+
+    Vector<double> tmp (solution.block(0).size());
+
+    // Now on to the first equation. The right hand side of it is $B^TM^{-1}F-G$,
+    // which is what we compute in the first few lines. We then declare the
+    // objects representing the Schur complement, its approximation, and the
+    // inverse of the approximation. Finally, we declare a solver object and
+    // hand off all these matrices and vectors to it to compute block 1 (the
+    // pressure) of the solution:
+    {
+      Vector<double> schur_rhs (solution.block(1).size());
+
+      m_inverse.vmult (tmp, system_rhs.block(0));
+      system_matrix.block(1,0).vmult (schur_rhs, tmp);
+      schur_rhs -= system_rhs.block(1);
+
+
+      SchurComplement
+      schur_complement (system_matrix, m_inverse);
+
+      ApproximateSchurComplement
+      approximate_schur_complement (system_matrix);
+
+      IterativeInverse<Vector<double> >
+      preconditioner;
+      preconditioner.initialize(approximate_schur_complement, identity);
+      preconditioner.solver.select("cg");
+      preconditioner.solver.set_control(inner_control);
+
+
+      SolverControl solver_control (solution.block(1).size(),
+                                    1e-12*schur_rhs.l2_norm());
+      SolverCG<>    cg (solver_control);
+
+      cg.solve (schur_complement, solution.block(1), schur_rhs,
+                preconditioner);
+
+      std::cout << solver_control.last_step()
+                << " CG Schur complement iterations to obtain convergence."
+                << std::endl;
+    }
+
+    // After we have the pressure, we can compute the velocity. The equation
+    // reads $MU=-BP+F$, and we solve it by first computing the right hand
+    // side, and then multiplying it with the object that represents the
+    // inverse of the mass matrix:
+    {
+      system_matrix.block(0,1).vmult (tmp, solution.block(1));
+      tmp *= -1;
+      tmp += system_rhs.block(0);
+
+      m_inverse.vmult (solution.block(0), tmp);
+    }
+  }
+
+
+  // @sect3{MixedLaplaceProblem class implementation (continued)}
+
+  // @sect4{MixedLaplace::compute_errors}
+
+  // After we have dealt with the linear solver and preconditioners, we
+  // continue with the implementation of our main class. In particular, the
+  // next task is to compute the errors in our numerical solution, in both the
+  // pressures as well as velocities.
+  //
+  // To compute errors in the solution, we have already introduced the
+  // <code>VectorTools::integrate_difference</code> function in step-7 and
+  // step-11. However, there we only dealt with scalar solutions, whereas here
+  // we have a vector-valued solution with components that even denote
+  // different quantities and may have different orders of convergence (this
+  // isn't the case here, by choice of the used finite elements, but is
+  // frequently the case in mixed finite element applications). What we
+  // therefore have to do is to `mask' the components that we are interested
+  // in. This is easily done: the
+  // <code>VectorTools::integrate_difference</code> function takes as its last
+  // argument a pointer to a weight function (the parameter defaults to the
+  // null pointer, meaning unit weights). What we simply have to do is to pass
+  // a function object that equals one in the components we are interested in,
+  // and zero in the other ones. For example, to compute the pressure error,
+  // we should pass a function that represents the constant vector with a unit
+  // value in component <code>dim</code>, whereas for the velocity the
+  // constant vector should be one in the first <code>dim</code> components,
+  // and zero in the location of the pressure.
+  //
+  // In deal.II, the <code>ComponentSelectFunction</code> does exactly this:
+  // it wants to know how many vector components the function it is to
+  // represent should have (in our case this would be <code>dim+1</code>, for
+  // the joint velocity-pressure space) and which individual or range of
+  // components should be equal to one. We therefore define two such masks at
+  // the beginning of the function, following by an object representing the
+  // exact solution and a vector in which we will store the cellwise errors as
+  // computed by <code>integrate_difference</code>:
+  template <int dim>
+  void MixedLaplaceProblem<dim>::compute_errors () const
+  {
+    const ComponentSelectFunction<dim>
+    pressure_mask (dim, dim+1);
+    const ComponentSelectFunction<dim>
+    velocity_mask(std::make_pair(0, dim), dim+1);
+
+    ExactSolution<dim> exact_solution;
+    Vector<double> cellwise_errors (triangulation.n_active_cells());
+
+    // As already discussed in step-7, we have to realize that it is
+    // impossible to integrate the errors exactly. All we can do is
+    // approximate this integral using quadrature. This actually presents a
+    // slight twist here: if we naively chose an object of type
+    // <code>QGauss@<dim@>(degree+1)</code> as one may be inclined to do (this
+    // is what we used for integrating the linear system), one realizes that
+    // the error is very small and does not follow the expected convergence
+    // curves at all. What is happening is that for the mixed finite elements
+    // used here, the Gauss points happen to be superconvergence points in
+    // which the pointwise error is much smaller (and converges with higher
+    // order) than anywhere else. These are therefore not particularly good
+    // points for integration. To avoid this problem, we simply use a
+    // trapezoidal rule and iterate it <code>degree+2</code> times in each
+    // coordinate direction (again as explained in step-7):
+    QTrapez<1>     q_trapez;
+    QIterated<dim> quadrature (q_trapez, degree+2);
+
+    // With this, we can then let the library compute the errors and output
+    // them to the screen:
+    VectorTools::integrate_difference (dof_handler, solution, exact_solution,
+                                       cellwise_errors, quadrature,
+                                       VectorTools::L2_norm,
+                                       &pressure_mask);
+    const double p_l2_error = cellwise_errors.l2_norm();
+
+    VectorTools::integrate_difference (dof_handler, solution, exact_solution,
+                                       cellwise_errors, quadrature,
+                                       VectorTools::L2_norm,
+                                       &velocity_mask);
+    const double u_l2_error = cellwise_errors.l2_norm();
+
+    std::cout << "Errors: ||e_p||_L2 = " << p_l2_error
+              << ",   ||e_u||_L2 = " << u_l2_error
+              << std::endl;
+  }
+
+
+  // @sect4{MixedLaplace::output_results}
+
+  // The last interesting function is the one in which we generate graphical
+  // output. Everything here looks obvious and familiar. Note how we construct
+  // unique names for all the solution variables at the beginning, like we did
+  // in step-8 and other programs later on. The only thing worth mentioning is
+  // that for higher order elements, in seems inappropriate to only show a
+  // single bilinear quadrilateral per cell in the graphical output. We
+  // therefore generate patches of size (degree+1)x(degree+1) to capture the
+  // full information content of the solution. See the step-7 tutorial program
+  // for more information on this.
+  //
+  // Note that we output the <code>dim+1</code> components of the solution
+  // vector as a collection of individual scalars here. Most visualization
+  // programs will then only offer to visualize them individually, rather than
+  // allowing us to plot the flow field as a vector field. However, as
+  // explained in the corresponding function of step-22 or the @ref VVOutput
+  // "Generating graphical output" section of the @ref vector_valued module,
+  // instructing the DataOut class to identify components of the FESystem
+  // object as elements of a <code>dim</code>-dimensional vector is not
+  // actually very difficult and will then allow us to show results as vector
+  // plots. We skip this here for simplicity and refer to the links above for
+  // more information.
+  template <int dim>
+  void MixedLaplaceProblem<dim>::output_results () const
+  {
+    std::vector<std::string> solution_names;
+    switch (dim)
+      {
+      case 2:
+        solution_names.push_back ("u");
+        solution_names.push_back ("v");
+        solution_names.push_back ("p");
+        break;
+
+      case 3:
+        solution_names.push_back ("u");
+        solution_names.push_back ("v");
+        solution_names.push_back ("w");
+        solution_names.push_back ("p");
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, solution_names);
+
+    data_out.build_patches (degree+1);
+
+    std::ofstream output ("solution.gmv");
+    data_out.write_gmv (output);
+  }
+
+
+
+  // @sect4{MixedLaplace::run}
+
+  // This is the final function of our main class. It's only job is to call
+  // the other functions in their natural order:
+  template <int dim>
+  void MixedLaplaceProblem<dim>::run ()
+  {
+    make_grid_and_dofs();
+    assemble_system ();
+    solve ();
+    compute_errors ();
+    output_results ();
+  }
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// The main function we stole from step-6 instead of step-4. It is almost
+// equal to the one in step-6 (apart from the changed class names, of course),
+// the only exception is that we pass the degree of the finite element space
+// to the constructor of the mixed Laplace problem (here, we use zero-th order
+// elements).
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step20;
+
+      MixedLaplaceProblem<2> mixed_laplace_problem(0);
+      mixed_laplace_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-21/CMakeLists.txt b/examples/step-21/CMakeLists.txt
new file mode 100644
index 0000000..523d116
--- /dev/null
+++ b/examples/step-21/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-21 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-21")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-21/doc/builds-on b/examples/step-21/doc/builds-on
new file mode 100644
index 0000000..9391e30
--- /dev/null
+++ b/examples/step-21/doc/builds-on
@@ -0,0 +1 @@
+step-20
diff --git a/examples/step-21/doc/intro.dox b/examples/step-21/doc/intro.dox
new file mode 100644
index 0000000..2ee80de
--- /dev/null
+++ b/examples/step-21/doc/intro.dox
@@ -0,0 +1,532 @@
+<a name="Intro"></a> <h1>Introduction</h1>
+
+This program grew out of a student project by Yan Li at Texas A&M
+University. Most of the work for this program is by her.
+
+In this project, we propose a numerical simulation for two phase
+flow problems in porous media. This problem includes one
+elliptic equation and one nonlinear, time dependent transport
+equation. This is therefore also the first time-dependent tutorial
+program (besides the somewhat strange time-dependence of @ref step_18
+"step-18").
+
+The equations covered here are an extension of the material already covered in
+step-20. In particular, they fall into the class of
+vector-valued problems. A toplevel overview of this topic can be found in the
+ at ref vector_valued module.
+
+
+<h3>The two phase flow problem</h3>
+
+Modeling of two phase flow in porous media is important for both
+environmental remediation and the management of petroleum and groundwater
+reservoirs. Practical situations involving two phase flow include the
+dispersal of a nonaqueous phase liquid in an aquifer, or the joint
+movement of a mixture of fluids such as oil and water in a
+reservoir. Simulation models, if they are to provide realistic
+predictions, must accurately account for these effects.
+
+To derive the governing equations, consider two phase flow in a
+reservoir $\Omega$ under the assumption that the movement of fluids is
+dominated by viscous effects; i.e. we neglect the effects of gravity,
+compressibility, and capillary pressure. Porosity will be considered
+to be constant. We will denote variables referring to either of the two
+phases using subscripts $w$ and $o$, short for water and oil. The
+derivation of the equations holds for other pairs of fluids as well,
+however.
+
+The velocity with which molecules of each of the two phases move is
+determined by Darcy's law that states that the velocity is
+proportional to the pressure gradient:
+ at f{eqnarray*}
+  \mathbf{u}_{j}
+  =
+  -\frac{k_{rj}(S)}{\mu_{j}} \mathbf{K} \cdot \nabla p
+ at f}
+where $\mathbf{u}_{j}$ is the velocity of phase $j=o,w$, $K$ is the
+permeability tensor, $k_{rj}$ is the relative permeability of phase
+$j$, $p$ is the
+pressure and $\mu_{j}$ is the viscosity of phase $j$. Finally, $S$ is
+the saturation (volume fraction), i.e. a function with values between
+0 and 1 indicating the composition of the mixture of fluids. In
+general, the coefficients $K, k_{rj}, \mu$ may be spatially dependent
+variables, and we will always treat them as non-constant functions in
+the following.
+
+We combine Darcy's law with the statement of conservation of mass for
+each phase,
+ at f[
+  \textrm{div}\ \mathbf{u}_{j} = q_j,
+ at f]
+with a source term for each phase. By summing over the two phases,
+we can express the governing equations in terms of the
+so-called pressure equation:
+ at f{eqnarray*}
+- \nabla \cdot (\mathbf{K}\lambda(S) \nabla p)= q.
+ at f}
+Here, $q$ is the sum source term, and
+ at f[
+  \lambda(S) = \frac{k_{rw}(S)}{\mu_{w}}+\frac{k_{ro}(S)}{\mu_{o}}
+ at f]
+is the total mobility.
+
+So far, this looks like an ordinary stationary, Poisson-like equation that we
+can solve right away with the techniques of the first few tutorial programs
+(take a look at step-6, for example, for something very
+similar). However, we have not said anything yet about the saturation, which
+of course is going to change as the fluids move around.
+
+The second part of the equations is a therefore description of the
+dynamics of the saturation. We model this as an advected quantity:
+ at f{eqnarray*}
+  S_{t} + \mathbf{u} \cdot \nabla F(S) = 0,
+ at f}
+where $\mathbf u$ is the total velocity
+ at f[
+  \mathbf{u} =
+  \mathbf{u}_{o} + \mathbf{u}_{w} = -\lambda(S) \mathbf{K}\cdot\nabla p.
+ at f]
+In addition,
+ at f[
+  F(S)
+  =
+  \frac{k_{rw}(S)/\mu_{w}}{k_{rw}(S)/\mu_{w} + k_{ro}(S)/\mu_{o}}
+ at f]
+Note that the advection equation contains the term $\mathbf{u} \cdot \nabla
+F(S)$ rather than $\mathbf{u} \cdot \nabla S$ to indicate that the saturation
+is not simply transported along; rather, since the two phases move with
+different velocities, the saturation can actually change even in the advected
+coordinate system. To see this, rewrite $\mathbf{u} \cdot \nabla F(S)
+= \mathbf{u} F'(S) \cdot \nabla S$ to observe that the <i>actual</i>
+velocity with which the phase with saturation $S$ is transported is
+$\mathbf u F'(S)$ whereas the other phase is transported at velocity
+$\mathbf u (1-F'(S))$. $F(S)$ is consequently often referred to as the
+<i>fractional flow</i>.
+
+In summary, what we get are the following two equations:
+ at f{eqnarray*}
+  - \nabla \cdot (\mathbf{K}\lambda(S) \nabla p) &=& q
+  \qquad \textrm{in}\ \Omega\times[0,T],
+  \\
+  S_{t} + \mathbf{u} \cdot \nabla F(S) &=& 0
+  \qquad \textrm{in}\ \Omega\times[0,T].
+ at f}
+Here, $p=p(\mathbf x, t), S=S(\mathbf x, t)$ are now time dependent
+functions: while at every time instant the flow field is in
+equilibrium with the pressure (i.e. we neglect dynamic
+accelerations), the saturation is transported along with the flow and
+therefore changes over time, in turn affected the flow field again
+through the dependence of the first equation on $S$.
+
+This set of equations has a peculiar character: one of the two
+equations has a time derivative, the other one doesn't. This
+corresponds to the character that the pressure and velocities are
+coupled through an instantaneous constraint, whereas the saturation
+evolves over finite time scales.
+
+Such systems of equations are called Differential Algebraic Equations
+(DAEs), since one of the equations is a differential equation, the
+other is not (at least not with respect to the time variable) and is
+therefore an "algebraic" equation. (The notation comes from the field
+of ordinary differential equations, where everything that does not
+have derivatives with respect to the time variable is necessarily an
+algebraic equation.) This class of equations contains pretty
+well-known cases: for example, the time dependent Stokes and
+Navier-Stokes equations (where the algebraic constraint is that the
+divergence of the flow field, $\textrm{div}\ \mathbf u$, must be zero)
+as well as the time dependent Maxwell equations (here, the algebraic
+constraint is that the divergence of the electric displacement field
+equals the charge density, $\textrm{div}\ \mathbf D = \rho$ and that the
+divergence of the magnetic flux density is zero: $\textrm{div}\ \mathbf
+B = 0$); even the quasistatic model of step-18 falls into this
+category. We will see that the different character of the two equations
+will inform our discretization strategy for the two equations.
+
+
+<h3>Time discretization</h3>
+
+In the reservoir simulation community, it is common to solve the equations
+derived above by going back to the first order, mixed formulation. To this
+end, we re-introduce the total velocity $\mathbf u$ and write the equations in
+the following form:
+ at f{eqnarray*}
+  \mathbf{u}+\mathbf{K}\lambda(S) \nabla p&=&0 \\
+  \nabla \cdot\mathbf{u} &=& q \\
+  S_{t} + \mathbf{u} \cdot \nabla F(S) &=& 0.
+ at f}
+This formulation has the additional benefit that we do not have to express the
+total velocity $\mathbf u$ appearing in the transport equation as a function
+of the pressure, but can rather take the primary variable for it. Given the
+saddle point structure of the first two equations and their similarity to the
+mixed Laplace formulation we have introduced in step-20, it
+will come as no surprise that we will use a mixed discretization again.
+
+But let's postpone this for a moment. The first business we have with these
+equations is to think about the time discretization. In reservoir simulation,
+there is a rather standard algorithm that we will use here. It first solves
+the pressure using an implicit equation, then the saturation using an explicit
+time stepping scheme. The algorithm is called IMPES for IMplicit Pressure
+Explicit Saturation and was first proposed a long time ago: by Sheldon et
+al. in 1959 and Stone and Gardner in 1961 (J. W. Sheldon, B. Zondek and
+W. T. Cardwell: <i>One-dimensional, incompressible, non-capillary, two-phase
+fluid flow in a porous medium</i>, Trans. SPE AIME, 216 (1959), pp. 290-296; H.
+L. Stone and A. O. Gardner Jr: <i>Analysis of gas-cap or dissolved-gas
+reservoirs</i>, Trans. SPE AIME, 222 (1961), pp. 92-104).
+In a slightly modified form, this algorithm can be
+written as follows: for each time step, solve
+ at f{eqnarray*}
+  \mathbf{u}^{n+1}+\mathbf{K}\lambda(S^n) \nabla p^{n+1}&=&0 \\
+  \nabla \cdot\mathbf{u}^{n+1} &=& q^{n+1} \\
+  \frac {S^{n+1}-S^n}{\triangle t} + \mathbf{u}^{n+1} \cdot \nabla F(S^n) &=& 0,
+ at f}
+where $\triangle t$ is the length of a time step. Note how we solve the
+implicit pressure-velocity system that only depends on the previously computed
+saturation $S^n$, and then do an explicit time step for $S^{n+1}$ that only
+depends on the previously known $S^n$ and the just computed
+$\mathbf{u}^{n+1}$. This way, we never have to iterate for the nonlinearities
+of the system as we would have if we used a fully implicit method.
+
+We can then state the problem in weak form as follows, by multiplying each
+equation with test functions $\mathbf v$, $\phi$, and $\sigma$ and integrating
+terms by parts:
+ at f{eqnarray*}
+  \left((\mathbf{K}\lambda(S^n))^{-1} \mathbf{u}^{n+1},\mathbf v\right)_\Omega -
+  (p^{n+1}, \nabla\cdot\mathbf v)_\Omega &=&
+  - (p^{n+1}, \mathbf v)_{\partial\Omega}
+  \\
+  (\nabla \cdot\mathbf{u}^{n+1}, \phi)_\Omega &=& (q^{n+1},\phi)_\Omega
+ at f}
+Note that in the first term, we have to prescribe the pressure $p^{n+1}$ on
+the boundary $\partial\Omega$ as boundary values for our problem. $\mathbf n$
+denotes the unit outward normal vector to $\partial K$, as usual.
+
+For the saturation equation, we obtain after integrating by parts
+ at f{eqnarray*}
+  (S^{n+1}, \sigma)_\Omega
+  -
+  \triangle t
+  \sum_K
+  \left\{
+  \left(F(S^n), \nabla \cdot (\mathbf{u}^{n+1} \sigma)\right)_K
+  -
+  \left(F(S^n) (\mathbf n \cdot \mathbf{u}^{n+1}, \sigma\right)_{\partial K}
+  \right\}
+  &=&
+  (S^n,\sigma)_\Omega.
+ at f}
+Using the fact that $\nabla \cdot \mathbf{u}^{n+1}=q^{n+1}$, we can rewrite the
+cell term to get an equation as follows:
+ at f{eqnarray*}
+  (S^{n+1}, \sigma)_\Omega
+  -
+  \triangle t
+  \sum_K
+  \left\{
+  \left(F(S^n) \mathbf{u}^{n+1}, \nabla \sigma\right)_K
+  -
+  \left(F(S^n) (\mathbf n \cdot \mathbf{u}^{n+1}), \sigma\right)_{\partial K}
+  \right\}
+  &=&
+  (S^n,\sigma)_\Omega +
+  \triangle t \sum_K  \left(F(S^n) q^{n+1}, \sigma\right)_K.
+ at f}
+
+
+
+<h3>Space discretization</h3>
+
+In each time step, we then apply the mixed finite method of @ref step_20
+"step-20" to the velocity and pressure. To be well-posed, we choose
+Raviart-Thomas spaces $RT_{k}$ for $\mathbf{u}$ and discontinuous elements of
+class $DQ_{k}$ for $p$. For the saturation, we will also choose $DQ_{k}$
+spaces.
+
+Since we have discontinuous spaces, we have to think about how to evaluate
+terms on the interfaces between cells, since discontinuous functions are not
+really defined there. In particular, we have to give a meaning to the last
+term on the left hand side of the saturation equation. To this end, let us
+define that we want to evaluate it in the following sense:
+ at f{eqnarray*}
+  &&\left(F(S^n) (\mathbf n \cdot \mathbf{u}^{n+1}), \sigma\right)_{\partial K}
+  \\
+  &&\qquad =
+  \left(F(S^n_+) (\mathbf n \cdot \mathbf{u}^{n+1}_+), \sigma\right)_{\partial K_+}
+  +
+  \left(F(S^n_-) (\mathbf n \cdot \mathbf{u}^{n+1}_-), \sigma\right)_{\partial K_-},
+ at f}
+where $\partial K_{-}:= \{x\in \partial K, \mathbf{u}(x) \cdot \mathbf{n}<0\}$
+denotes the inflow boundary and $\partial K_{+}:= \{\partial K \setminus
+\partial K_{-}\}$ is the outflow part of the boundary.
+The quantities $S_+,\mathbf{u}_+$ then correspond to the values of these
+variables on the present cell, whereas $S_-,\mathbf{u}_-$ (needed on the
+inflow part of the boundary of $K$) are quantities taken from the neighboring
+cell. Some more context on discontinuous element techniques and evaluation of
+fluxes can also be found in step-12.
+
+
+<h3>Linear solvers</h3>
+
+The linear solvers used in this program are a straightforward extension of the
+ones used in step-20. Essentially, we simply have to extend
+everything from
+two to three solution components. If we use the discrete spaces
+mentioned above and put shape functions into the bilinear forms, we
+arrive at the following linear system to be solved for time step $n+1$:
+ at f[
+\left(
+\begin{array}{ccc}
+M^u(S^{n}) & B^{T}& 0\\
+B &    0 & 0\\
+\triangle t\; H &    0& M^S
+\end{array}
+\right)
+\left(
+\begin{array}{c}
+\mathbf{U}^{n+1} \\ P^{n+1} \\ S^{n+1}
+\end{array}
+\right)
+=
+\left(
+\begin{array}{c}
+0 \\ F_2 \\ F_3
+\end{array}
+\right)
+ at f]
+where the individual matrices and vectors are defined as follows using
+shape functions $\mathbf v_i$ (of type Raviart Thomas $RT_k$) for
+velocities and $\phi_i$ (of type $DG_k$) for both pressures and saturations:
+ at f{eqnarray*}
+M^u(S^n)_{ij} &=&
+\left((\mathbf{K}\lambda(S^n))^{-1} \mathbf{v}_i,\mathbf
+v_j\right)_\Omega,
+\\
+B_{ij} &=&
+-(\nabla \cdot \mathbf v_j, \phi_i)_\Omega,
+\\
+H_{ij} &=&
+  -
+  \sum_K
+  \left\{
+  \left(F(S^n) \mathbf v_i, \nabla \phi_j)\right)_K
+  -
+  \left(F(S^n_+) (\mathbf n \cdot (\mathbf v_i)_+), \phi_j\right)_{\partial K_+}
+  -
+  \left(F(S^n_-) (\mathbf n \cdot (\mathbf v_i)_-), \phi_j\right)_{\partial K_-},
+  \right\}
+\\
+M^S_{ij} &=&
+(\phi_i, \phi_j)_\Omega,
+\\
+(F_2)_i &=&
+-(q^{n+1},\phi_i)_\Omega,
+\\
+(F_3)_i &=&
+(S^n,\phi_i)_\Omega +\triangle t \sum_K  \left(F(S^n) q^{n+1}, \phi_i\right)_K.
+ at f}
+
+ at note Due to historical accidents, the role of matrices $B$ and $B^T$
+has been reverted in this program compared to step-20. In other words,
+here $B$ refers to the divergence and $B^T$ to the gradient operators
+when it was the other way around in step-20.
+
+The system above presents a complication: Since the matrix $H_{ij}$
+depends on $\mathbf u^{n+1}$ implicitly (the velocities are needed to
+determine which parts of the boundaries $\partial K$ of cells are
+influx or outflux parts), we can only assemble this matrix after we
+have solved for the velocities.
+
+The solution scheme then involves the following steps:
+<ol>
+  <li>Solve for the pressure $p^{n+1}$ using the Schur complement
+  technique introduced in step-20.
+
+  <li>Solve for the velocity $\mathbf u^{n+1}$ as also discussed in
+  step-20.
+
+  <li>Compute the term $F_3-\triangle t\; H \mathbf u^{n+1}$, using
+  the just computed velocities.
+
+  <li>Solve for the saturation $S^{n+1}$.
+</ol>
+
+In this scheme, we never actually build the matrix $H$, but rather
+generate the right hand side of the third equation once we are ready
+to do so.
+
+In the program, we use a variable <code>solution</code> to store the
+solution of the present time step. At the end of each step, we copy
+its content, i.e. all three of its block components, into the variable
+<code>old_solution</code> for use in the next time step.
+
+
+<h3>Choosing a time step</h3>
+
+A general rule of thumb in hyperbolic transport equations like the equation we
+have to solve for the saturation equation is that if we use an explicit time
+stepping scheme, then we should use a time step such that the distance that a
+particle can travel within one time step is no larger than the diameter of a
+single cell. In other words, here, we should choose
+ at f[
+  \triangle t_{n+1} \le \frac h{|\mathbf{u}^{n+1}(\mathbf{x})|}.
+ at f]
+Fortunately, we are in a position where we can do that: we only need the
+time step when we want to assemble the right hand side of the saturation
+equation, which is after we have already solved for $\mathbf{u}^{n+1}$. All we
+therefore have to do after solving for the velocity is to loop over all
+quadrature points in the domain and determine the maximal magnitude of the
+velocity. We can then set the time step for the saturation equation to
+ at f[
+  \triangle t_{n+1} = \frac {\min_K h_K}{\max_{\mathbf{x}}|\mathbf{u}^{n+1}(\mathbf{x})|}.
+ at f]
+
+Why is it important to do this? If we don't, then we will end up with lots of
+places where our saturation is larger than one or less than zero, as can
+easily be verified. (Remember that the saturation corresponds to something
+like the water fraction in the fluid mixture, and therefore must physically be
+between 0 and 1.) On the other hand, if we choose our time step according to
+the criterion listed above, this only happens very very infrequently —
+in fact only once for the entire run of the program. However, to be on the
+safe side, however, we run a function <code>project_back_saturation</code> at
+the end of each time step, that simply projects the saturation back onto the
+interval $[0,1]$, should it have gotten out of the physical range. This is
+useful since the functions $\lambda(S)$ and $F(S)$ do not represent anything
+physical outside this range, and we should not expect the program to do
+anything useful once we have negative saturations or ones larger than one.
+
+Note that we will have similar restrictions on the time step also in @ref
+step_23 "step-23" and step-24 where we solve the time dependent
+wave equation, another hyperbolic problem. We will also come back to the issue
+of time step choice below in the section on <a href="#extensions">possible
+extensions to this program</a>.
+
+
+<h3>The test case</h3>
+
+For simplicity, this program assumes that there is no source, $q=0$, and that
+the heterogeneous porous medium is isotropic $\mathbf{K}(\mathbf{x}) =
+k(\mathbf{x}) \mathbf{I}$. The first one of these is a realistic assumption in
+oil reservoirs: apart from injection and production wells, there are usually
+no mechanisms for fluids to appear or disappear out of the blue. The second
+one is harder to justify: on a microscopic level, most rocks are isotropic,
+because they consist of a network of interconnected pores. However, this
+microscopic scale is out of the range of today's computer simulations, and we
+have to be content with simulating things on the scale of meters. On that
+scale, however, fluid transport typically happens through a network of cracks
+in the rock, rather than through pores. However, cracks often result from
+external stress fields in the rock layer (for example from tectonic faulting)
+and the cracks are therefore roughly aligned. This leads to a situation where
+the permeability is often orders of magnitude larger in the direction parallel
+to the cracks than perpendicular to the cracks. A problem typically faces in
+reservoir simulation, however, is that the modeler doesn't know the direction
+of cracks because oil reservoirs are not accessible to easy inspection. The
+only solution in that case is to assume an effective, isotropic permeability.
+
+Whatever the matter, both of these restrictions, no sources and isotropy,
+would be easy to lift with a few lines of code in the program.
+
+Next, for simplicity, our numerical simulation will be done on the
+unit cell $\Omega = [0,1]\times [0,1]$ for $t\in [0,T]$. Our initial
+conditions are $S(\mathbf{x},0)=0$; in the oil reservoir picture, where $S$
+would indicate the water saturation, this means that the reservoir contains
+pure oil at the beginning. Note that we do not need any initial
+conditions for pressure or velocity, since the equations do not contain time
+derivatives of these variables. Finally, we impose the following pressure
+boundary conditions:
+ at f[
+  p(\mathbf{x},t)=1-x_1 \qquad \textrm{on}\ \partial\Omega.
+ at f]
+Since the pressure and velocity solve a mixed form Poisson equation, the
+imposed pressure leads to a resulting flow field for the velocity. On the
+other hand, this flow field determines whether a piece of the boundary is of
+inflow or outflow type, which is of relevance because we have to impose
+boundary conditions for the saturation on the inflow part of the boundary,
+ at f[
+  \Gamma_{in}(t) = \{\mathbf{x}\in\partial\Omega:
+                     \mathbf{n} \cdot \mathbf{u}(\mathbf{x},t) < 0\}.
+ at f]
+On this inflow boundary, we impose the following saturation values:
+ at f{eqnarray}
+  S(\mathbf{x},t) = 1 & \textrm{on}\ \Gamma_{in}\cap\{x_1=0\},
+  \\
+  S(\mathbf{x},t) = 0 & \textrm{on}\ \Gamma_{in}\backslash \{x_1=0\}.
+ at f}
+In other words, we have pure water entering the reservoir at the left, whereas
+the other parts of the boundary are in contact with undisturbed parts of the
+reservoir and whenever influx occurs on these boundaries, pure oil will enter.
+
+In our simulations, we choose the total mobility as
+ at f[
+  \lambda (S) = \frac{1.0}{\mu} S^2 +(1-S)^2
+ at f]
+where we use $\mu=0.2$ for the viscosity. In addition, the fractional flow of
+water is given by
+ at f[
+  F(S)=\frac{S^2}{S^2+\mu (1-S)^2}
+ at f]
+
+ at note Coming back to this testcase in step-43 several years later revealed an
+oddity in the setup of this testcase. To this end, consider that we can
+rewrite the advection equation for the saturation as $S_{t} + (\mathbf{u}
+F'(S)) \cdot \nabla S = 0$. Now, at the initial time, we have $S=0$, and with
+the given choice of function $F(S)$, we happen to have $F'(0)=0$. In other
+words, at $t=0$, the equation reduces to $S_t=0$ for all $\mathbf x$, so the
+saturation is zero everywhere and it is going to stay zero everywhere! This is
+despite the fact that $\mathbf u$ is not necessarily zero: the combined fluid
+is moving, but we've chosen our partial flux $F(S)$ in such a way that
+infinitesimal amounts of wetting fluid also only move at infinitesimal speeds
+(i.e., they stick to the medium more than the non-wetting phase in which they
+are embedded). That said, how can we square this with the knowledge that
+wetting fluid is invading from the left, leading to the flow patterns seen in
+the <a href="#Results">results section</a>? That's where we get into
+mathematics: Equations like the transport equation we are considering here
+have infinitely many solutions, but only one of them is physical: the one that
+results from the so-called viscosity limit, called the <a
+href="http://en.wikipedia.org/wiki/Viscosity_solution">viscosity
+solution</a>. The thing is that with discontinuous elements we arrive at this
+viscosity limit because using a numerical flux introduces a finite amount of
+artificial viscosity into the numerical scheme. On the other hand, in step-43,
+we use an artificial viscosity that is proportional to $\|\mathbf u F'(S)\|$
+on every cell, which at the initial time is zero. Thus, the saturation there is
+zero and remains zero; the solution we then get is <i>one</i> solution of the
+advection equation, but the method does not converge to the viscosity solution
+without further changes. We will therefore use a different initial condition in
+that program.
+
+
+Finally, to come back to the description of the testcase, we will show results
+for computations with the two permeability
+functions introduced at the end of the results section of @ref step_20
+"step-20":
+<ul>
+  <li>A function that models a single, winding crack that snakes through the
+  domain. In analogy to step-20, but taking care of the slightly
+  different geometry we have here, we describe this by the following function:
+  @f[
+    k(\mathbf x)
+    =
+    \max \left\{ e^{-\left(\frac{x_2-\frac 12 - 0.1\sin(10x_1)}{0.1}\right)^2}, 0.01 \right\}.
+  @f]
+  Taking the maximum is necessary to ensure that the ratio between maximal and
+  minimal permeability remains bounded. If we don't do that, permeabilities
+  will span many orders of magnitude. On the other hand, the ratio between
+  maximal and minimal permeability is a factor in the condition number of the
+  Schur complement matrix, and if too large leads to problems for which our
+  linear solvers will no longer converge properly.
+
+  <li>A function that models a somewhat random medium. Here, we choose
+  @f{eqnarray*}
+    k(\mathbf x)
+    &=&
+    \min \left\{ \max \left\{ \sum_{i=1}^N \sigma_i(\mathbf{x}), 0.01 \right\}, 4\right\},
+    \\
+    \sigma_i(\mathbf x)
+    &=&
+    e^{-\left(\frac{|\mathbf{x}-\mathbf{x}_i|}{0.05}\right)^2},
+  @f}
+  where the centers $\mathbf{x}_i$ are $N$ randomly chosen locations inside
+  the domain. This function models a domain in which there are $N$ centers of
+  higher permeability (for example where rock has cracked) embedded in a
+  matrix of more pristine, unperturbed background rock. Note that here we have
+  cut off the permeability function both above and below to ensure a bounded
+  condition number.
+</ul>
diff --git a/examples/step-21/doc/kind b/examples/step-21/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-21/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-21/doc/results.dox b/examples/step-21/doc/results.dox
new file mode 100644
index 0000000..b8c7d78
--- /dev/null
+++ b/examples/step-21/doc/results.dox
@@ -0,0 +1,212 @@
+<h1>Results</h1>
+
+If we run the program, we get the following kind of output:
+ at code
+Number of active cells: 1024
+Number of degrees of freedom: 4160 (2112+1024+1024)
+
+Timestep 1
+   22 CG Schur complement iterations for pressure.
+   1 CG iterations for saturation.
+   Now at t=0.0326742, dt=0.0326742.
+
+Timestep 2
+   17 CG Schur complement iterations for pressure.
+   1 CG iterations for saturation.
+   Now at t=0.0653816, dt=0.0327074.
+
+Timestep 3
+   17 CG Schur complement iterations for pressure.
+   1 CG iterations for saturation.
+   Now at t=0.0980651, dt=0.0326836.
+
+...
+ at endcode
+As we can see, the time step is pretty much constant right from the start,
+which indicates that the velocities in the domain are not strongly dependent
+on changes in saturation, although they certainly are through the factor
+$\lambda(S)$ in the pressure equation.
+
+Our second observation is that the number of CG iterations needed to solve the
+pressure Schur complement equation drops from 22 to 17 between the first and
+the second time step (in fact, it remains around 17 for the rest of the
+computations). The reason is actually simple: Before we solve for the pressure
+during a time step, we don't reset the <code>solution</code> variable to
+zero. The pressure (and the other variables) therefore have the previous time
+step's values at the time we get into the CG solver. Since the velocities and
+pressures don't change very much as computations progress, the previous time
+step's pressure is actually a good initial guess for this time step's
+pressure. Consequently, the number of iterations we need once we have computed
+the pressure once is significantly reduced.
+
+The final observation concerns the number of iterations needed to solve for
+the saturation, i.e. one. This shouldn't surprise us too much: the matrix we
+have to solve with is the mass matrix. However, this is the mass matrix for
+the $DQ_0$ element of piecewise constants where no element couples with the
+degrees of freedom on neighboring cells. The matrix is therefore a diagonal
+one, and it is clear that we should be able to invert this matrix in a single
+CG iteration.
+
+
+With all this, here are a few movies that show how the saturation progresses
+over time. First, this is for the single crack model, as implemented in the
+<code>SingleCurvingCrack::KInverse</code> class:
+
+<img src="http://www.dealii.org/images/steps/developer/step-21.centerline.gif" alt="">
+
+As can be seen, the water rich fluid snakes its way mostly along the
+high-permeability zone in the middle of the domain, whereas the rest of the
+domain is mostly impermeable. This and the next movie are generated using
+<code>n_refinement_steps=7</code>, leading to a $128\times 128$ mesh with some
+16,000 cells and about 66,000 unknowns in total.
+
+
+The second movie shows the saturation for the random medium model of class
+<code>RandomMedium::KInverse</code>, where we have randomly distributed
+centers of high permeability and fluid hops from one of these zones to
+the next:
+
+<img src="http://www.dealii.org/images/steps/developer/step-21.random2d.gif" alt="">
+
+
+Finally, here is the same situation in three space dimensions, on a mesh with
+<code>n_refinement_steps=5</code>, which produces a mesh of some 32,000 cells
+and 167,000 degrees of freedom:
+
+<img src="http://www.dealii.org/images/steps/developer/step-21.random3d.gif" alt="">
+
+To repeat these computations, all you have to do is to change the line
+ at code
+      TwoPhaseFlowProblem<2> two_phase_flow_problem(0);
+ at endcode
+in the main function to
+ at code
+      TwoPhaseFlowProblem<3> two_phase_flow_problem(0);
+ at endcode
+The visualization uses a cloud technique, where the saturation is indicated by
+colored but transparent clouds for each cell. This way, one can also see
+somewhat what happens deep inside the domain. A different way of visualizing
+would have been to show isosurfaces of the saturation evolving over
+time. There are techniques to plot isosurfaces transparently, so that one can
+see several of them at the same time like the layers of an onion.
+
+So why don't we show such isosurfaces? The problem lies in the way isosurfaces
+are computed: they require that the field to be visualized is continuous, so
+that the isosurfaces can be generated by following contours at least across a
+single cell. However, our saturation field is piecewise constant and
+discontinuous. If we wanted to plot an isosurface for a saturation $S=0.5$,
+chances would be that there is no single point in the domain where that
+saturation is actually attained. If we had to define isosurfaces in that
+context at all, we would have to take the interfaces between cells, where one
+of the two adjacent cells has a saturation greater than and the other cell a
+saturation less than 0.5. However, it appears that most visualization programs
+are not equipped to do this kind of transformation.
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+There are a number of areas where this program can be improved. Three of them
+are listed below. All of them are, in fact, addressed in a tutorial program
+that forms the continuation of the current one: step-43.
+
+
+<h4>Solvers</h4>
+
+At present, the program is not particularly fast: the 2d random medium
+computation took about a day for the 1,000 or so time steps. The corresponding
+3d computation took almost two days for 800 time steps. The reason why it
+isn't faster than this is twofold. First, we rebuild the entire matrix in
+every time step, although some parts such as the $B$, $B^T$, and $M^S$ blocks
+never change.
+
+Second, we could do a lot better with the solver and
+preconditioners. Presently, we solve the Schur complement $B^TM^u(S)^{-1}B$
+with a CG method, using $[B^T (\textrm{diag}(M^u(S)))^{-1} B]^{-1}$ as a
+preconditioner. Applying this preconditioner is expensive, since it involves
+solving a linear system each time. This may have been appropriate for @ref
+step_20 "step-20", where we have to solve the entire problem only
+once. However, here we have to solve it hundreds of times, and in such cases
+it is worth considering a preconditioner that is more expensive to set up the
+first time, but cheaper to apply later on.
+
+One possibility would be to realize that the matrix we use as preconditioner,
+$B^T (\textrm{diag}(M^u(S)))^{-1} B$ is still sparse, and symmetric on top of
+that. If one looks at the flow field evolve over time, we also see that while
+$S$ changes significantly over time, the pressure hardly does and consequently
+$B^T (\textrm{diag}(M^u(S)))^{-1} B \approx B^T (\textrm{diag}(M^u(S^0)))^{-1}
+B$. In other words, the matrix for the first time step should be a good
+preconditioner also for all later time steps.  With a bit of
+back-and-forthing, it isn't hard to actually get a representation of it as a
+SparseMatrix object. We could then hand it off to the SparseMIC class to form
+a sparse incomplete Cholesky decomposition. To form this decomposition is
+expensive, but we have to do it only once in the first time step, and can then
+use it as a cheap preconditioner in the future. We could do better even by
+using the SparseDirectUMFPACK class that produces not only an incomplete, but
+a complete decomposition of the matrix, which should yield an even better
+preconditioner.
+
+Finally, why use the approximation $B^T (\textrm{diag}(M^u(S)))^{-1} B$ to
+precondition $B^T M^u(S)^{-1} B$? The latter matrix, after all, is the mixed
+form of the Laplace operator on the pressure space, for which we use linear
+elements. We could therefore build a separate matrix $A^p$ on the side that
+directly corresponds to the non-mixed formulation of the Laplacian, for
+example using the bilinear form $(\mathbf{K}\lambda(S^n) \nabla
+\varphi_i,\nabla\varphi_j)$. We could then form an incomplete or complete
+decomposition of this non-mixed matrix and use it as a preconditioner of the
+mixed form.
+
+Using such techniques, it can reasonably be expected that the solution process
+will be faster by at least an order of magnitude.
+
+
+<h4>Time stepping</h4>
+
+In the introduction we have identified the time step restriction
+ at f[
+  \triangle t_{n+1} \le \frac h{|\mathbf{u}^{n+1}(\mathbf{x})|}
+ at f]
+that has to hold globally, i.e. for all $\mathbf x$. After discretization, we
+satisfy it by choosing
+ at f[
+  \triangle t_{n+1} = \frac {\min_K h_K}{\max_{\mathbf{x}}|\mathbf{u}^{n+1}(\mathbf{x})|}.
+ at f]
+
+This restriction on the time step is somewhat annoying: the finer we make the
+mesh the smaller the time step; in other words, we get punished twice: each
+time step is more expensive to solve and we have to do more time steps.
+
+This is particularly annoying since the majority of the additional work is
+spent solving the implicit part of the equations, i.e. the pressure-velocity
+system, whereas it is the hyperbolic transport equation for the saturation
+that imposes the time step restriction.
+
+To avoid this bottleneck, people have invented a number of approaches. For
+example, they may only re-compute the pressure-velocity field every few time
+steps (or, if you want, use different time step sizes for the
+pressure/velocity and saturation equations). This keeps the time step
+restriction on the cheap explicit part while it makes the solution of the
+implicit part less frequent. Experiments in this direction are
+certainly worthwhile; one starting point for such an approach is the paper by
+Zhangxin Chen, Guanren Huan and Baoyan Li: <i>An improved IMPES method for
+two-phase flow in porous media</i>, Transport in Porous Media, 54 (2004),
+pp. 361—376. There are certainly many other papers on this topic as well, but
+this one happened to land on our desk a while back.
+
+
+
+<h4>Adaptivity</h4>
+
+Adaptivity would also clearly help. Looking at the movies, one clearly sees
+that most of the action is confined to a relatively small part of the domain
+(this particularly obvious for the saturation, but also holds for the
+velocities and pressures). Adaptivity can therefore be expected to keep the
+necessary number of degrees of freedom low, or alternatively increase the
+accuracy.
+
+On the other hand, adaptivity for time dependent problems is not a trivial
+thing: we would have to change the mesh every few time steps, and we would
+have to transport our present solution to the next mesh every time we change
+it (something that the SolutionTransfer class can help with). These are not
+insurmountable obstacles, but they do require some additional coding and more
+than we felt comfortable was worth packing into this tutorial program.
diff --git a/examples/step-21/doc/tooltip b/examples/step-21/doc/tooltip
new file mode 100644
index 0000000..33a21e2
--- /dev/null
+++ b/examples/step-21/doc/tooltip
@@ -0,0 +1 @@
+Two-phase flow in porous media.
diff --git a/examples/step-21/step-21.cc b/examples/step-21/step-21.cc
new file mode 100644
index 0000000..208c1c8
--- /dev/null
+++ b/examples/step-21/step-21.cc
@@ -0,0 +1,1313 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2006 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Yan Li, Wolfgang Bangerth, Texas A&M University, 2006
+ */
+
+
+// This program is an adaptation of step-20 and includes some technique of DG
+// methods from step-12. A good part of the program is therefore very similar
+// to step-20 and we will not comment again on these parts. Only the new stuff
+// will be discussed in more detail.
+
+// @sect3{Include files}
+
+// All of these include files have been used before:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_raviart_thomas.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+// In this program, we use a tensor-valued coefficient. Since it may have a
+// spatial dependence, we consider it a tensor-valued function. The following
+// include file provides the <code>TensorFunction</code> class that offers
+// such functionality:
+#include <deal.II/base/tensor_function.h>
+
+// The last step is as in all previous programs:
+namespace Step21
+{
+  using namespace dealii;
+
+
+  // @sect3{The <code>TwoPhaseFlowProblem</code> class}
+
+  // This is the main class of the program. It is close to the one of step-20,
+  // but with a few additional functions:
+  //
+  // <ul> <li><code>assemble_rhs_S</code> assembles the right hand side of the
+  //   saturation equation. As explained in the introduction, this can't be
+  //   integrated into <code>assemble_rhs</code> since it depends on the
+  //   velocity that is computed in the first part of the time step.
+  //
+  //   <li><code>get_maximal_velocity</code> does as its name suggests. This
+  //   function is used in the computation of the time step size.
+  //
+  //   <li><code>project_back_saturation</code> resets all saturation degrees
+  //   of freedom with values less than zero to zero, and all those with
+  //   saturations greater than one to one.  </ul>
+  //
+  // The rest of the class should be pretty much obvious. The
+  // <code>viscosity</code> variable stores the viscosity $\mu$ that enters
+  // several of the formulas in the nonlinear equations.
+  template <int dim>
+  class TwoPhaseFlowProblem
+  {
+  public:
+    TwoPhaseFlowProblem (const unsigned int degree);
+    void run ();
+
+  private:
+    void make_grid_and_dofs ();
+    void assemble_system ();
+    void assemble_rhs_S ();
+    double get_maximal_velocity () const;
+    void solve ();
+    void project_back_saturation ();
+    void output_results () const;
+
+    const unsigned int   degree;
+
+    Triangulation<dim>   triangulation;
+    FESystem<dim>        fe;
+    DoFHandler<dim>      dof_handler;
+
+    BlockSparsityPattern      sparsity_pattern;
+    BlockSparseMatrix<double> system_matrix;
+
+    const unsigned int n_refinement_steps;
+
+    double time_step;
+    unsigned int timestep_number;
+    double viscosity;
+
+    BlockVector<double> solution;
+    BlockVector<double> old_solution;
+    BlockVector<double> system_rhs;
+  };
+
+
+  // @sect3{Equation data}
+
+  // @sect4{Pressure right hand side}
+
+  // At present, the right hand side of the pressure equation is simply the
+  // zero function. However, the rest of the program is fully equipped to deal
+  // with anything else, if this is desired:
+  template <int dim>
+  class PressureRightHandSide : public Function<dim>
+  {
+  public:
+    PressureRightHandSide () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double
+  PressureRightHandSide<dim>::value (const Point<dim>  &/*p*/,
+                                     const unsigned int /*component*/) const
+  {
+    return 0;
+  }
+
+
+  // @sect4{Pressure boundary values}
+
+  // The next are pressure boundary values. As mentioned in the introduction,
+  // we choose a linear pressure field:
+  template <int dim>
+  class PressureBoundaryValues : public Function<dim>
+  {
+  public:
+    PressureBoundaryValues () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+  template <int dim>
+  double
+  PressureBoundaryValues<dim>::value (const Point<dim>  &p,
+                                      const unsigned int /*component*/) const
+  {
+    return 1-p[0];
+  }
+
+
+  // @sect4{Saturation boundary values}
+
+  // Then we also need boundary values on the inflow portions of the
+  // boundary. The question whether something is an inflow part is decided
+  // when assembling the right hand side, we only have to provide a functional
+  // description of the boundary values. This is as explained in the
+  // introduction:
+  template <int dim>
+  class SaturationBoundaryValues : public Function<dim>
+  {
+  public:
+    SaturationBoundaryValues () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double
+  SaturationBoundaryValues<dim>::value (const Point<dim> &p,
+                                        const unsigned int /*component*/) const
+  {
+    if (p[0] == 0)
+      return 1;
+    else
+      return 0;
+  }
+
+
+
+  // @sect4{Initial data}
+
+  // Finally, we need initial data. In reality, we only need initial data for
+  // the saturation, but we are lazy, so we will later, before the first time
+  // step, simply interpolate the entire solution for the previous time step
+  // from a function that contains all vector components.
+  //
+  // We therefore simply create a function that returns zero in all
+  // components. We do that by simply forward every function to the
+  // ZeroFunction class. Why not use that right away in the places of this
+  // program where we presently use the <code>InitialValues</code> class?
+  // Because this way it is simpler to later go back and choose a different
+  // function for initial values.
+  template <int dim>
+  class InitialValues : public Function<dim>
+  {
+  public:
+    InitialValues () : Function<dim>(dim+2) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+
+  };
+
+
+  template <int dim>
+  double
+  InitialValues<dim>::value (const Point<dim>  &p,
+                             const unsigned int component) const
+  {
+    return ZeroFunction<dim>(dim+2).value (p, component);
+  }
+
+
+  template <int dim>
+  void
+  InitialValues<dim>::vector_value (const Point<dim> &p,
+                                    Vector<double>   &values) const
+  {
+    ZeroFunction<dim>(dim+2).vector_value (p, values);
+  }
+
+
+
+
+  // @sect3{The inverse permeability tensor}
+
+  // As announced in the introduction, we implement two different permeability
+  // tensor fields. Each of them we put into a namespace of its own, so that
+  // it will be easy later to replace use of one by the other in the code.
+
+  // @sect4{Single curving crack permeability}
+
+  // The first function for the permeability was the one that models a single
+  // curving crack. It was already used at the end of step-20, and its
+  // functional form is given in the introduction of the present tutorial
+  // program. As in some previous programs, we have to declare a (seemingly
+  // unnecessary) default constructor of the KInverse class to avoid warnings
+  // from some compilers:
+  namespace SingleCurvingCrack
+  {
+    template <int dim>
+    class KInverse : public TensorFunction<2,dim>
+    {
+    public:
+      KInverse ()
+        :
+        TensorFunction<2,dim> ()
+      {}
+
+      virtual void value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const;
+    };
+
+
+    template <int dim>
+    void
+    KInverse<dim>::value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const
+    {
+      Assert (points.size() == values.size(),
+              ExcDimensionMismatch (points.size(), values.size()));
+
+      for (unsigned int p=0; p<points.size(); ++p)
+        {
+          values[p].clear ();
+
+          const double distance_to_flowline
+            = std::fabs(points[p][1]-0.5-0.1*std::sin(10*points[p][0]));
+
+          const double permeability = std::max(std::exp(-(distance_to_flowline*
+                                                          distance_to_flowline)
+                                                        / (0.1 * 0.1)),
+                                               0.01);
+
+          for (unsigned int d=0; d<dim; ++d)
+            values[p][d][d] = 1./permeability;
+        }
+    }
+  }
+
+
+  // @sect4{Random medium permeability}
+
+  // This function does as announced in the introduction, i.e. it creates an
+  // overlay of exponentials at random places. There is one thing worth
+  // considering for this class. The issue centers around the problem that the
+  // class creates the centers of the exponentials using a random function. If
+  // we therefore created the centers each time we create an object of the
+  // present type, we would get a different list of centers each time. That's
+  // not what we expect from classes of this type: they should reliably
+  // represent the same function.
+  //
+  // The solution to this problem is to make the list of centers a static
+  // member variable of this class, i.e. there exists exactly one such
+  // variable for the entire program, rather than for each object of this
+  // type. That's exactly what we are going to do.
+  //
+  // The next problem, however, is that we need a way to initialize this
+  // variable. Since this variable is initialized at the beginning of the
+  // program, we can't use a regular member function for that since there may
+  // not be an object of this type around at the time. The C++ standard
+  // therefore says that only non-member and static member functions can be
+  // used to initialize a static variable. We use the latter possibility by
+  // defining a function <code>get_centers</code> that computes the list of
+  // center points when called.
+  //
+  // Note that this class works just fine in both 2d and 3d, with the only
+  // difference being that we use more points in 3d: by experimenting we find
+  // that we need more exponentials in 3d than in 2d (we have more ground to
+  // cover, after all, if we want to keep the distance between centers roughly
+  // equal), so we choose 40 in 2d and 100 in 3d. For any other dimension, the
+  // function does presently not know what to do so simply throws an exception
+  // indicating exactly this.
+  namespace RandomMedium
+  {
+    template <int dim>
+    class KInverse : public TensorFunction<2,dim>
+    {
+    public:
+      KInverse ()
+        :
+        TensorFunction<2,dim> ()
+      {}
+
+      virtual void value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const;
+
+    private:
+      static std::vector<Point<dim> > centers;
+
+      static std::vector<Point<dim> > get_centers ();
+    };
+
+
+
+    template <int dim>
+    std::vector<Point<dim> >
+    KInverse<dim>::centers = KInverse<dim>::get_centers();
+
+
+    template <int dim>
+    std::vector<Point<dim> >
+    KInverse<dim>::get_centers ()
+    {
+      const unsigned int N = (dim == 2 ?
+                              40 :
+                              (dim == 3 ?
+                               100 :
+                               throw ExcNotImplemented()));
+
+      std::vector<Point<dim> > centers_list (N);
+      for (unsigned int i=0; i<N; ++i)
+        for (unsigned int d=0; d<dim; ++d)
+          centers_list[i][d] = static_cast<double>(rand())/RAND_MAX;
+
+      return centers_list;
+    }
+
+
+
+    template <int dim>
+    void
+    KInverse<dim>::value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const
+    {
+      Assert (points.size() == values.size(),
+              ExcDimensionMismatch (points.size(), values.size()));
+
+      for (unsigned int p=0; p<points.size(); ++p)
+        {
+          values[p].clear ();
+
+          double permeability = 0;
+          for (unsigned int i=0; i<centers.size(); ++i)
+            permeability += std::exp(-(points[p]-centers[i]).norm_square()
+                                     / (0.05 * 0.05));
+
+          const double normalized_permeability
+            = std::min (std::max(permeability, 0.01), 4.);
+
+          for (unsigned int d=0; d<dim; ++d)
+            values[p][d][d] = 1./normalized_permeability;
+        }
+    }
+  }
+
+
+
+  // @sect3{The inverse mobility and saturation functions}
+
+  // There are two more pieces of data that we need to describe, namely the
+  // inverse mobility function and the saturation curve. Their form is also
+  // given in the introduction:
+  double mobility_inverse (const double S,
+                           const double viscosity)
+  {
+    return 1.0 / (1.0/viscosity * S * S + (1-S) * (1-S));
+  }
+
+  double fractional_flow (const double S,
+                          const double viscosity)
+  {
+    return S*S / (S * S + viscosity * (1-S) * (1-S));
+  }
+
+
+
+
+
+  // @sect3{Linear solvers and preconditioners}
+
+  // The linear solvers we use are also completely analogous to the ones used
+  // in step-20. The following classes are therefore copied verbatim from
+  // there. Note that the classes here are not only copied from
+  // step-20, but also duplicate classes in deal.II. In a future version of this example, they should be
+  // replaced by an efficient method, though. There is a single change: if the size of a linear system is small,
+  // i.e. when the mesh is very coarse, then it is sometimes not sufficient to
+  // set a maximum of <code>src.size()</code> CG iterations before the solver
+  // in the <code>vmult()</code> function converges. (This is, of course, a
+  // result of numerical round-off, since we know that on paper, the CG method
+  // converges in at most <code>src.size()</code> steps.) As a consequence, we
+  // set the maximum number of iterations equal to the maximum of the size of
+  // the linear system and 200.
+  template <class Matrix>
+  class InverseMatrix : public Subscriptor
+  {
+  public:
+    InverseMatrix (const Matrix &m);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const Matrix> matrix;
+  };
+
+
+  template <class Matrix>
+  InverseMatrix<Matrix>::InverseMatrix (const Matrix &m)
+    :
+    matrix (&m)
+  {}
+
+
+
+  template <class Matrix>
+  void InverseMatrix<Matrix>::vmult (Vector<double>       &dst,
+                                     const Vector<double> &src) const
+  {
+    SolverControl solver_control (std::max(src.size(), static_cast<std::size_t> (200)),
+                                  1e-8*src.l2_norm());
+    SolverCG<>    cg (solver_control);
+
+    dst = 0;
+
+    cg.solve (*matrix, dst, src, PreconditionIdentity());
+  }
+
+
+
+  class SchurComplement : public Subscriptor
+  {
+  public:
+    SchurComplement (const BlockSparseMatrix<double> &A,
+                     const InverseMatrix<SparseMatrix<double> > &Minv);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+    const SmartPointer<const InverseMatrix<SparseMatrix<double> > > m_inverse;
+
+    mutable Vector<double> tmp1, tmp2;
+  };
+
+
+
+  SchurComplement::
+  SchurComplement (const BlockSparseMatrix<double> &A,
+                   const InverseMatrix<SparseMatrix<double> > &Minv)
+    :
+    system_matrix (&A),
+    m_inverse (&Minv),
+    tmp1 (A.block(0,0).m()),
+    tmp2 (A.block(0,0).m())
+  {}
+
+
+  void SchurComplement::vmult (Vector<double>       &dst,
+                               const Vector<double> &src) const
+  {
+    system_matrix->block(0,1).vmult (tmp1, src);
+    m_inverse->vmult (tmp2, tmp1);
+    system_matrix->block(1,0).vmult (dst, tmp2);
+  }
+
+
+
+  class ApproximateSchurComplement : public Subscriptor
+  {
+  public:
+    ApproximateSchurComplement (const BlockSparseMatrix<double> &A);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+
+    mutable Vector<double> tmp1, tmp2;
+  };
+
+
+  ApproximateSchurComplement::
+  ApproximateSchurComplement (const BlockSparseMatrix<double> &A)
+    :
+    system_matrix (&A),
+    tmp1 (A.block(0,0).m()),
+    tmp2 (A.block(0,0).m())
+  {}
+
+
+  void ApproximateSchurComplement::vmult (Vector<double>       &dst,
+                                          const Vector<double> &src) const
+  {
+    system_matrix->block(0,1).vmult (tmp1, src);
+    system_matrix->block(0,0).precondition_Jacobi (tmp2, tmp1);
+    system_matrix->block(1,0).vmult (dst, tmp2);
+  }
+
+
+
+
+
+  // @sect3{<code>TwoPhaseFlowProblem</code> class implementation}
+
+  // Here now the implementation of the main class. Much of it is actually
+  // copied from step-20, so we won't comment on it in much detail. You should
+  // try to get familiar with that program first, then most of what is
+  // happening here should be mostly clear.
+
+  // @sect4{TwoPhaseFlowProblem::TwoPhaseFlowProblem}
+
+  // First for the constructor. We use $RT_k \times DQ_k \times DQ_k$
+  // spaces. The time step is set to zero initially, but will be computed
+  // before it is needed first, as described in a subsection of the
+  // introduction.
+  template <int dim>
+  TwoPhaseFlowProblem<dim>::TwoPhaseFlowProblem (const unsigned int degree)
+    :
+    degree (degree),
+    fe (FE_RaviartThomas<dim>(degree), 1,
+        FE_DGQ<dim>(degree), 1,
+        FE_DGQ<dim>(degree), 1),
+    dof_handler (triangulation),
+    n_refinement_steps (5),
+    time_step (0),
+    viscosity (0.2)
+  {}
+
+
+
+  // @sect4{TwoPhaseFlowProblem::make_grid_and_dofs}
+
+  // This next function starts out with well-known functions calls that create
+  // and refine a mesh, and then associate degrees of freedom with it. It does
+  // all the same things as in step-20, just now for three components instead
+  // of two.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::make_grid_and_dofs ()
+  {
+    GridGenerator::hyper_cube (triangulation, 0, 1);
+    triangulation.refine_global (n_refinement_steps);
+
+    dof_handler.distribute_dofs (fe);
+    DoFRenumbering::component_wise (dof_handler);
+
+    std::vector<types::global_dof_index> dofs_per_component (dim+2);
+    DoFTools::count_dofs_per_component (dof_handler, dofs_per_component);
+    const unsigned int n_u = dofs_per_component[0],
+                       n_p = dofs_per_component[dim],
+                       n_s = dofs_per_component[dim+1];
+
+    std::cout << "Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl
+              << "Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << " (" << n_u << '+' << n_p << '+'<< n_s <<')'
+              << std::endl
+              << std::endl;
+
+    const unsigned int
+    n_couplings = dof_handler.max_couplings_between_dofs();
+
+    sparsity_pattern.reinit (3,3);
+    sparsity_pattern.block(0,0).reinit (n_u, n_u, n_couplings);
+    sparsity_pattern.block(1,0).reinit (n_p, n_u, n_couplings);
+    sparsity_pattern.block(2,0).reinit (n_s, n_u, n_couplings);
+    sparsity_pattern.block(0,1).reinit (n_u, n_p, n_couplings);
+    sparsity_pattern.block(1,1).reinit (n_p, n_p, n_couplings);
+    sparsity_pattern.block(2,1).reinit (n_s, n_p, n_couplings);
+    sparsity_pattern.block(0,2).reinit (n_u, n_s, n_couplings);
+    sparsity_pattern.block(1,2).reinit (n_p, n_s, n_couplings);
+    sparsity_pattern.block(2,2).reinit (n_s, n_s, n_couplings);
+
+    sparsity_pattern.collect_sizes();
+
+    DoFTools::make_sparsity_pattern (dof_handler, sparsity_pattern);
+    sparsity_pattern.compress();
+
+
+    system_matrix.reinit (sparsity_pattern);
+
+
+    solution.reinit (3);
+    solution.block(0).reinit (n_u);
+    solution.block(1).reinit (n_p);
+    solution.block(2).reinit (n_s);
+    solution.collect_sizes ();
+
+    old_solution.reinit (3);
+    old_solution.block(0).reinit (n_u);
+    old_solution.block(1).reinit (n_p);
+    old_solution.block(2).reinit (n_s);
+    old_solution.collect_sizes ();
+
+    system_rhs.reinit (3);
+    system_rhs.block(0).reinit (n_u);
+    system_rhs.block(1).reinit (n_p);
+    system_rhs.block(2).reinit (n_s);
+    system_rhs.collect_sizes ();
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem::assemble_system}
+
+  // This is the function that assembles the linear system, or at least
+  // everything except the (1,3) block that depends on the still-unknown
+  // velocity computed during this time step (we deal with this in
+  // <code>assemble_rhs_S</code>). Much of it is again as in step-20, but we
+  // have to deal with some nonlinearity this time.  However, the top of the
+  // function is pretty much as usual (note that we set matrix and right hand
+  // side to zero at the beginning — something we didn't have to do for
+  // stationary problems since there we use each matrix object only once and
+  // it is empty at the beginning anyway).
+  //
+  // Note that in its present form, the function uses the permeability
+  // implemented in the RandomMedium::KInverse class. Switching to the single
+  // curved crack permeability function is as simple as just changing the
+  // namespace name.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::assemble_system ()
+  {
+    system_matrix=0;
+    system_rhs=0;
+
+    QGauss<dim>   quadrature_formula(degree+2);
+    QGauss<dim-1> face_quadrature_formula(degree+2);
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values    | update_gradients |
+                             update_quadrature_points  | update_JxW_values);
+    FEFaceValues<dim> fe_face_values (fe, face_quadrature_formula,
+                                      update_values    | update_normal_vectors |
+                                      update_quadrature_points  | update_JxW_values);
+
+    const unsigned int   dofs_per_cell   = fe.dofs_per_cell;
+
+    const unsigned int   n_q_points      = quadrature_formula.size();
+    const unsigned int   n_face_q_points = face_quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    const PressureRightHandSide<dim>  pressure_right_hand_side;
+    const PressureBoundaryValues<dim> pressure_boundary_values;
+    const RandomMedium::KInverse<dim> k_inverse;
+
+    std::vector<double>               pressure_rhs_values (n_q_points);
+    std::vector<double>               boundary_values (n_face_q_points);
+    std::vector<Tensor<2,dim> >       k_inverse_values (n_q_points);
+
+    std::vector<Vector<double> >      old_solution_values(n_q_points, Vector<double>(dim+2));
+    std::vector<std::vector<Tensor<1,dim> > >  old_solution_grads(n_q_points,
+        std::vector<Tensor<1,dim> > (dim+2));
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+    const FEValuesExtractors::Scalar saturation (dim+1);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        local_matrix = 0;
+        local_rhs = 0;
+
+        // Here's the first significant difference: We have to get the values
+        // of the saturation function of the previous time step at the
+        // quadrature points. To this end, we can use the
+        // FEValues::get_function_values (previously already used in step-9,
+        // step-14 and step-15), a function that takes a solution vector and
+        // returns a list of function values at the quadrature points of the
+        // present cell. In fact, it returns the complete vector-valued
+        // solution at each quadrature point, i.e. not only the saturation but
+        // also the velocities and pressure:
+        fe_values.get_function_values (old_solution, old_solution_values);
+
+        // Then we also have to get the values of the pressure right hand side
+        // and of the inverse permeability tensor at the quadrature points:
+        pressure_right_hand_side.value_list (fe_values.get_quadrature_points(),
+                                             pressure_rhs_values);
+        k_inverse.value_list (fe_values.get_quadrature_points(),
+                              k_inverse_values);
+
+        // With all this, we can now loop over all the quadrature points and
+        // shape functions on this cell and assemble those parts of the matrix
+        // and right hand side that we deal with in this function. The
+        // individual terms in the contributions should be self-explanatory
+        // given the explicit form of the bilinear form stated in the
+        // introduction:
+        for (unsigned int q=0; q<n_q_points; ++q)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const double old_s = old_solution_values[q](dim+1);
+
+              const Tensor<1,dim> phi_i_u      = fe_values[velocities].value (i, q);
+              const double        div_phi_i_u  = fe_values[velocities].divergence (i, q);
+              const double        phi_i_p      = fe_values[pressure].value (i, q);
+              const double        phi_i_s      = fe_values[saturation].value (i, q);
+
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                {
+                  const Tensor<1,dim> phi_j_u     = fe_values[velocities].value (j, q);
+                  const double        div_phi_j_u = fe_values[velocities].divergence (j, q);
+                  const double        phi_j_p     = fe_values[pressure].value (j, q);
+                  const double        phi_j_s     = fe_values[saturation].value (j, q);
+
+                  local_matrix(i,j) += (phi_i_u * k_inverse_values[q] *
+                                        mobility_inverse(old_s,viscosity) * phi_j_u
+                                        - div_phi_i_u * phi_j_p
+                                        - phi_i_p * div_phi_j_u
+                                        + phi_i_s * phi_j_s)
+                                       * fe_values.JxW(q);
+                }
+
+              local_rhs(i) += (-phi_i_p * pressure_rhs_values[q])*
+                              fe_values.JxW(q);
+            }
+
+
+        // Next, we also have to deal with the pressure boundary values. This,
+        // again is as in step-20:
+        for (unsigned int face_no=0;
+             face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          if (cell->at_boundary(face_no))
+            {
+              fe_face_values.reinit (cell, face_no);
+
+              pressure_boundary_values
+              .value_list (fe_face_values.get_quadrature_points(),
+                           boundary_values);
+
+              for (unsigned int q=0; q<n_face_q_points; ++q)
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  {
+                    const Tensor<1,dim>
+                    phi_i_u = fe_face_values[velocities].value (i, q);
+
+                    local_rhs(i) += -(phi_i_u *
+                                      fe_face_values.normal_vector(q) *
+                                      boundary_values[q] *
+                                      fe_face_values.JxW(q));
+                  }
+            }
+
+        // The final step in the loop over all cells is to transfer local
+        // contributions into the global matrix and right hand side vector:
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            system_matrix.add (local_dof_indices[i],
+                               local_dof_indices[j],
+                               local_matrix(i,j));
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          system_rhs(local_dof_indices[i]) += local_rhs(i);
+      }
+  }
+
+
+  // So much for assembly of matrix and right hand side. Note that we do not
+  // have to interpolate and apply boundary values since they have all been
+  // taken care of in the weak form already.
+
+
+  // @sect4{TwoPhaseFlowProblem::assemble_rhs_S}
+
+  // As explained in the introduction, we can only evaluate the right hand
+  // side of the saturation equation once the velocity has been computed. We
+  // therefore have this separate function to this end.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::assemble_rhs_S ()
+  {
+    QGauss<dim>   quadrature_formula(degree+2);
+    QGauss<dim-1> face_quadrature_formula(degree+2);
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values    | update_gradients |
+                             update_quadrature_points  | update_JxW_values);
+    FEFaceValues<dim> fe_face_values (fe, face_quadrature_formula,
+                                      update_values    | update_normal_vectors |
+                                      update_quadrature_points  | update_JxW_values);
+    FEFaceValues<dim> fe_face_values_neighbor (fe, face_quadrature_formula,
+                                               update_values);
+
+    const unsigned int   dofs_per_cell   = fe.dofs_per_cell;
+    const unsigned int   n_q_points      = quadrature_formula.size();
+    const unsigned int   n_face_q_points = face_quadrature_formula.size();
+
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<Vector<double> > old_solution_values(n_q_points, Vector<double>(dim+2));
+    std::vector<Vector<double> > old_solution_values_face(n_face_q_points, Vector<double>(dim+2));
+    std::vector<Vector<double> > old_solution_values_face_neighbor(n_face_q_points, Vector<double>(dim+2));
+    std::vector<Vector<double> > present_solution_values(n_q_points, Vector<double>(dim+2));
+    std::vector<Vector<double> > present_solution_values_face(n_face_q_points, Vector<double>(dim+2));
+
+    std::vector<double> neighbor_saturation (n_face_q_points);
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    SaturationBoundaryValues<dim> saturation_boundary_values;
+
+    const FEValuesExtractors::Scalar saturation (dim+1);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        local_rhs = 0;
+        fe_values.reinit (cell);
+
+        fe_values.get_function_values (old_solution, old_solution_values);
+        fe_values.get_function_values (solution, present_solution_values);
+
+        // First for the cell terms. These are, following the formulas in the
+        // introduction, $(S^n,\sigma)-(F(S^n) \mathbf{v}^{n+1},\nabla
+        // \sigma)$, where $\sigma$ is the saturation component of the test
+        // function:
+        for (unsigned int q=0; q<n_q_points; ++q)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const double old_s = old_solution_values[q](dim+1);
+              Tensor<1,dim> present_u;
+              for (unsigned int d=0; d<dim; ++d)
+                present_u[d] = present_solution_values[q](d);
+
+              const double        phi_i_s      = fe_values[saturation].value (i, q);
+              const Tensor<1,dim> grad_phi_i_s = fe_values[saturation].gradient (i, q);
+
+              local_rhs(i) += (time_step *
+                               fractional_flow(old_s,viscosity) *
+                               present_u *
+                               grad_phi_i_s
+                               +
+                               old_s * phi_i_s)
+                              *
+                              fe_values.JxW(q);
+            }
+
+        // Secondly, we have to deal with the flux parts on the face
+        // boundaries. This was a bit more involved because we first have to
+        // determine which are the influx and outflux parts of the cell
+        // boundary. If we have an influx boundary, we need to evaluate the
+        // saturation on the other side of the face (or the boundary values,
+        // if we are at the boundary of the domain).
+        //
+        // All this is a bit tricky, but has been explained in some detail
+        // already in step-9. Take a look there how this is supposed to work!
+        for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          {
+            fe_face_values.reinit (cell, face_no);
+
+            fe_face_values.get_function_values (old_solution, old_solution_values_face);
+            fe_face_values.get_function_values (solution, present_solution_values_face);
+
+            if (cell->at_boundary(face_no))
+              saturation_boundary_values
+              .value_list (fe_face_values.get_quadrature_points(),
+                           neighbor_saturation);
+            else
+              {
+                const typename DoFHandler<dim>::active_cell_iterator
+                neighbor = cell->neighbor(face_no);
+                const unsigned int
+                neighbor_face = cell->neighbor_of_neighbor(face_no);
+
+                fe_face_values_neighbor.reinit (neighbor, neighbor_face);
+
+                fe_face_values_neighbor
+                .get_function_values (old_solution,
+                                      old_solution_values_face_neighbor);
+
+                for (unsigned int q=0; q<n_face_q_points; ++q)
+                  neighbor_saturation[q] = old_solution_values_face_neighbor[q](dim+1);
+              }
+
+
+            for (unsigned int q=0; q<n_face_q_points; ++q)
+              {
+                Tensor<1,dim> present_u_face;
+                for (unsigned int d=0; d<dim; ++d)
+                  present_u_face[d] = present_solution_values_face[q](d);
+
+                const double normal_flux = present_u_face *
+                                           fe_face_values.normal_vector(q);
+
+                const bool is_outflow_q_point = (normal_flux >= 0);
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  local_rhs(i) -= time_step *
+                                  normal_flux *
+                                  fractional_flow((is_outflow_q_point == true
+                                                   ?
+                                                   old_solution_values_face[q](dim+1)
+                                                   :
+                                                   neighbor_saturation[q]),
+                                                  viscosity) *
+                                  fe_face_values[saturation].value (i,q) *
+                                  fe_face_values.JxW(q);
+              }
+          }
+
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          system_rhs(local_dof_indices[i]) += local_rhs(i);
+      }
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem::solve}
+
+  // After all these preparations, we finally solve the linear system for
+  // velocity and pressure in the same way as in step-20. After that, we have
+  // to deal with the saturation equation (see below):
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::solve ()
+  {
+    const InverseMatrix<SparseMatrix<double> >
+    m_inverse (system_matrix.block(0,0));
+    Vector<double> tmp (solution.block(0).size());
+    Vector<double> schur_rhs (solution.block(1).size());
+    Vector<double> tmp2 (solution.block(2).size());
+
+
+    // First the pressure, using the pressure Schur complement of the first
+    // two equations:
+    {
+      m_inverse.vmult (tmp, system_rhs.block(0));
+      system_matrix.block(1,0).vmult (schur_rhs, tmp);
+      schur_rhs -= system_rhs.block(1);
+
+
+      SchurComplement
+      schur_complement (system_matrix, m_inverse);
+
+      ApproximateSchurComplement
+      approximate_schur_complement (system_matrix);
+
+      InverseMatrix<ApproximateSchurComplement>
+      preconditioner (approximate_schur_complement);
+
+
+      SolverControl solver_control (solution.block(1).size(),
+                                    1e-12*schur_rhs.l2_norm());
+      SolverCG<>    cg (solver_control);
+
+      cg.solve (schur_complement, solution.block(1), schur_rhs,
+                preconditioner);
+
+      std::cout << "   "
+                << solver_control.last_step()
+                << " CG Schur complement iterations for pressure."
+                << std::endl;
+    }
+
+    // Now the velocity:
+    {
+      system_matrix.block(0,1).vmult (tmp, solution.block(1));
+      tmp *= -1;
+      tmp += system_rhs.block(0);
+
+      m_inverse.vmult (solution.block(0), tmp);
+    }
+
+    // Finally, we have to take care of the saturation equation. The first
+    // business we have here is to determine the time step using the formula
+    // in the introduction. Knowing the shape of our domain and that we
+    // created the mesh by regular subdivision of cells, we can compute the
+    // diameter of each of our cells quite easily (in fact we use the linear
+    // extensions in coordinate directions of the cells, not the
+    // diameter). Note that we will learn a more general way to do this in
+    // step-24, where we use the GridTools::minimal_cell_diameter function.
+    //
+    // The maximal velocity we compute using a helper function to compute the
+    // maximal velocity defined below, and with all this we can evaluate our
+    // new time step length:
+    time_step = std::pow(0.5, double(n_refinement_steps)) /
+                get_maximal_velocity();
+
+    // The next step is to assemble the right hand side, and then to pass
+    // everything on for solution. At the end, we project back saturations
+    // onto the physically reasonable range:
+    assemble_rhs_S ();
+    {
+
+      SolverControl solver_control (system_matrix.block(2,2).m(),
+                                    1e-8*system_rhs.block(2).l2_norm());
+      SolverCG<>   cg (solver_control);
+      cg.solve (system_matrix.block(2,2), solution.block(2), system_rhs.block(2),
+                PreconditionIdentity());
+
+      project_back_saturation ();
+
+      std::cout << "   "
+                << solver_control.last_step()
+                << " CG iterations for saturation."
+                << std::endl;
+    }
+
+
+    old_solution = solution;
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem::output_results}
+
+  // There is nothing surprising here. Since the program will do a lot of time
+  // steps, we create an output file only every fifth time step.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::output_results ()  const
+  {
+    if (timestep_number % 5 != 0)
+      return;
+
+    std::vector<std::string> solution_names;
+    switch (dim)
+      {
+      case 2:
+        solution_names.push_back ("u");
+        solution_names.push_back ("v");
+        solution_names.push_back ("p");
+        solution_names.push_back ("S");
+        break;
+
+      case 3:
+        solution_names.push_back ("u");
+        solution_names.push_back ("v");
+        solution_names.push_back ("w");
+        solution_names.push_back ("p");
+        solution_names.push_back ("S");
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, solution_names);
+
+    data_out.build_patches (degree+1);
+
+    std::ostringstream filename;
+    filename << "solution-" << timestep_number << ".vtk";
+
+    std::ofstream output (filename.str().c_str());
+    data_out.write_vtk (output);
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem::project_back_saturation}
+
+  // In this function, we simply run over all saturation degrees of freedom
+  // and make sure that if they should have left the physically reasonable
+  // range, that they be reset to the interval $[0,1]$. To do this, we only
+  // have to loop over all saturation components of the solution vector; these
+  // are stored in the block 2 (block 0 are the velocities, block 1 are the
+  // pressures).
+  //
+  // It may be instructive to note that this function almost never triggers
+  // when the time step is chosen as mentioned in the introduction. However,
+  // if we choose the timestep only slightly larger, we get plenty of values
+  // outside the proper range. Strictly speaking, the function is therefore
+  // unnecessary if we choose the time step small enough. In a sense, the
+  // function is therefore only a safety device to avoid situations where our
+  // entire solution becomes unphysical because individual degrees of freedom
+  // have become unphysical a few time steps earlier.
+  template <int dim>
+  void
+  TwoPhaseFlowProblem<dim>::project_back_saturation ()
+  {
+    for (unsigned int i=0; i<solution.block(2).size(); ++i)
+      if (solution.block(2)(i) < 0)
+        solution.block(2)(i) = 0;
+      else if (solution.block(2)(i) > 1)
+        solution.block(2)(i) = 1;
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem::get_maximal_velocity}
+
+  // The following function is used in determining the maximal allowable time
+  // step. What it does is to loop over all quadrature points in the domain
+  // and find what the maximal magnitude of the velocity is.
+  template <int dim>
+  double
+  TwoPhaseFlowProblem<dim>::get_maximal_velocity () const
+  {
+    QGauss<dim>   quadrature_formula(degree+2);
+    const unsigned int   n_q_points
+      = quadrature_formula.size();
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values);
+    std::vector<Vector<double> > solution_values(n_q_points,
+                                                 Vector<double>(dim+2));
+    double max_velocity = 0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        fe_values.get_function_values (solution, solution_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            Tensor<1,dim> velocity;
+            for (unsigned int i=0; i<dim; ++i)
+              velocity[i] = solution_values[q](i);
+
+            max_velocity = std::max (max_velocity,
+                                     velocity.norm());
+          }
+      }
+
+    return max_velocity;
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem::run}
+
+  // This is the final function of our main class. Its brevity speaks for
+  // itself. There are only two points worth noting: First, the function
+  // projects the initial values onto the finite element space at the
+  // beginning; the VectorTools::project function doing this requires an
+  // argument indicating the hanging node constraints. We have none in this
+  // program (we compute on a uniformly refined mesh), but the function
+  // requires the argument anyway, of course. So we have to create a
+  // constraint object. In its original state, constraint objects are
+  // unsorted, and have to be sorted (using the ConstraintMatrix::close
+  // function) before they can be used. This is what we do here, and which is
+  // why we can't simply call the VectorTools::project function with an
+  // anonymous temporary object <code>ConstraintMatrix()</code> as the second
+  // argument.
+  //
+  // The second point worth mentioning is that we only compute the length of
+  // the present time step in the middle of solving the linear system
+  // corresponding to each time step. We can therefore output the present end
+  // time of a time step only at the end of the time step.
+  //
+  // The function as it is here does actually not compute the results
+  // found on the web page. The reason is, that even on a decent
+  // computer it runs more than a day. If you want to reproduce these
+  // results, set the final time at the end of the do loop to 250.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::run ()
+  {
+    make_grid_and_dofs();
+
+    {
+      ConstraintMatrix constraints;
+      constraints.close();
+
+      VectorTools::project (dof_handler,
+                            constraints,
+                            QGauss<dim>(degree+2),
+                            InitialValues<dim>(),
+                            old_solution);
+    }
+
+    timestep_number = 1;
+    double time = 0;
+
+    do
+      {
+        std::cout << "Timestep " << timestep_number
+                  << std::endl;
+
+        assemble_system ();
+
+        solve ();
+
+        output_results ();
+
+        time += time_step;
+        ++timestep_number;
+        std::cout << "   Now at t=" << time
+                  << ", dt=" << time_step << '.'
+                  << std::endl
+                  << std::endl;
+      }
+    while (time <= 1.);
+  }
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// That's it. In the main function, we pass the degree of the finite element
+// space to the constructor of the TwoPhaseFlowProblem object.  Here, we use
+// zero-th degree elements, i.e. $RT_0\times DQ_0 \times DQ_0$. The rest is as
+// in all the other programs.
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step21;
+
+      TwoPhaseFlowProblem<2> two_phase_flow_problem(0);
+      two_phase_flow_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-22/CMakeLists.txt b/examples/step-22/CMakeLists.txt
new file mode 100644
index 0000000..3fe2225
--- /dev/null
+++ b/examples/step-22/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-22 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-22")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_UMFPACK)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_UMFPACK = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-22/doc/builds-on b/examples/step-22/doc/builds-on
new file mode 100644
index 0000000..bab7a7c
--- /dev/null
+++ b/examples/step-22/doc/builds-on
@@ -0,0 +1 @@
+step-6 step-21
diff --git a/examples/step-22/doc/intro.dox b/examples/step-22/doc/intro.dox
new file mode 100644
index 0000000..4b00e31
--- /dev/null
+++ b/examples/step-22/doc/intro.dox
@@ -0,0 +1,786 @@
+<br>
+
+<i>This program was contributed by Martin Kronbichler and Wolfgang
+Bangerth.
+<br>
+This material is based upon work partly supported by the National
+Science Foundation under Award No. EAR-0426271 and The California Institute of
+Technology. Any opinions, findings, and conclusions or recommendations
+expressed in this publication are those of the author and do not
+necessarily reflect the views of the National Science Foundation or of The
+California Institute of Technology.
+</i>
+
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This program deals with the Stokes system of equations which reads as
+follows in non-dimensionalized form:
+ at f{eqnarray*}
+  -2\; \textrm{div}\; \varepsilon(\textbf{u}) + \nabla p &=& \textbf{f},
+  \\
+  -\textrm{div}\; \textbf{u} &=& 0,
+ at f}
+where $\textbf u$ denotes the velocity of a fluid, $p$ is its
+pressure, $\textbf f$ are external forces, and
+$\varepsilon(\textbf{u})= \nabla^s{\textbf{u}}= \frac 12 \left[
+(\nabla \textbf{u}) + (\nabla \textbf{u})^T\right]$  is the
+rank-2 tensor of symmetrized gradients; a component-wise definition
+of it is $\varepsilon(\textbf{u})_{ij}=\frac
+12\left(\frac{\partial u_i}{\partial x_j} + \frac{\partial u_j}{\partial x_i}\right)$.
+
+The Stokes equations describe the steady-state motion of a
+slow-moving, viscous fluid such as honey, rocks in the earth mantle,
+or other cases where inertia does not play a significant role. If a
+fluid is moving fast enough that inertia forces are significant
+compared to viscous friction, the Stokes equations are no longer
+valid; taking into account inertia effects then leads to the
+nonlinear Navier-Stokes equations. However, in this tutorial program,
+we will focus on the simpler Stokes system.
+
+To be well-posed, we will have to add boundary conditions to the
+equations. What boundary conditions are readily possible here will
+become clear once we discuss the weak form of the equations.
+
+The equations covered here fall into the class of vector-valued problems. A
+toplevel overview of this topic can be found in the @ref vector_valued module.
+
+
+<h3>Weak form</h3>
+
+The weak form of the equations is obtained by writing it in vector
+form as
+ at f{eqnarray*}
+  \begin{pmatrix}
+    {-2\; \textrm{div}\; \varepsilon(\textbf{u}) + \nabla p}
+    \\
+    {-\textrm{div}\; \textbf{u}}
+  \end{pmatrix}
+  =
+  \begin{pmatrix}
+  {\textbf{f}}
+  \\
+  0
+  \end{pmatrix},
+ at f}
+forming the dot product from the left with a vector-valued test
+function $\phi = \begin{pmatrix}\textbf v \\ q\end{pmatrix}$ and integrating
+over the domain $\Omega$, yielding the following set of equations:
+ at f{eqnarray*}
+  (\mathrm v,
+   -2\; \textrm{div}\; \varepsilon(\textbf{u}) + \nabla p)_{\Omega}
+  -
+  (q,\textrm{div}\; \textbf{u})_{\Omega}
+  =
+  (\textbf{v}, \textbf{f})_\Omega,
+ at f}
+which has to hold for all test functions $\phi = \begin{pmatrix}\textbf v
+\\ q\end{pmatrix}$.
+
+In practice, one wants to impose as little regularity on the pressure
+variable as possible; consequently, we integrate by parts the second term:
+ at f{eqnarray*}
+  (\mathrm v, -2\; \textrm{div}\; \varepsilon(\textbf{u}))_{\Omega}
+  - (\textrm{div}\; \textbf{v}, p)_{\Omega}
+  + (\textbf{n}\cdot\textbf{v}, p)_{\partial\Omega}
+  -
+  (q,\textrm{div}\; \textbf{u})_{\Omega}
+  =
+  (\textbf{v}, \textbf{f})_\Omega.
+ at f}
+Likewise, we integrate by parts the first term to obtain
+ at f{eqnarray*}
+  (\nabla \mathrm v, 2\; \varepsilon(\textbf{u}))_{\Omega}
+  -
+  (\textbf{n} \otimes \mathrm v, 2\; \varepsilon(\textbf{u}))_{\partial\Omega}
+  - (\textrm{div}\; \textbf{v}, p)_{\Omega}
+  + (\textbf{n}\cdot\textbf{v}, p)_{\partial\Omega}
+  -
+  (q,\textrm{div}\; \textbf{u})_{\Omega}
+  =
+  (\textbf{v}, \textbf{f})_\Omega,
+ at f}
+where the scalar product between two tensor-valued quantities is here
+defined as
+ at f{eqnarray*}
+  (\nabla \mathrm v, 2\; \varepsilon(\textbf{u}))_{\Omega}
+  =
+  2 \int_\Omega \sum_{i,j=1}^d \frac{\partial v_j}{\partial x_i}
+  \varepsilon(\textbf{u})_{ij} \ dx.
+ at f}
+Because the scalar product between a general tensor like
+$\nabla\mathrm v$ and a symmetric tensor like
+$\varepsilon(\textbf{u})$ equals the scalar product between the
+symmetrized forms of the two, we can also write the bilinear form
+above as follows:
+ at f{eqnarray*}
+  (\varepsilon(\mathrm v), 2\; \varepsilon(\textbf{u}))_{\Omega}
+  -
+  (\textbf{n} \otimes \mathrm v, 2\; \varepsilon(\textbf{u}))_{\partial\Omega}
+  - (\textrm{div}\; \textbf{v}, p)_{\Omega}
+  + (\textbf{n}\cdot\textbf{v}, p)_{\partial\Omega}
+  -
+  (q,\textrm{div}\; \textbf{u})_{\Omega}
+  =
+  (\textbf{v}, \textbf{f})_\Omega,
+ at f}
+We will deal with the boundary terms in the next section, but it is already
+clear from the domain terms
+ at f{eqnarray*}
+  (\varepsilon(\mathrm v), 2\; \varepsilon(\textbf{u}))_{\Omega}
+  - (\textrm{div}\; \textbf{v}, p)_{\Omega}
+  -
+  (q,\textrm{div}\; \textbf{u})_{\Omega}
+ at f}
+of the bilinear form that the Stokes equations yield a symmetric bilinear
+form, and consequently a symmetric (if indefinite) system matrix.
+
+
+<h3>%Boundary conditions</h3>
+
+ at dealiiVideoLecture{21.5}
+(@dealiiVideoLectureSeeAlso{21.55,21.6,21.65})
+
+The weak form just derived immediately presents us with different
+possibilities for imposing boundary conditions:
+<ol>
+<li>Dirichlet velocity boundary conditions: On a part
+    $\Gamma_D\subset\partial\Omega$ we may impose Dirichlet conditions
+    on the velocity $\textbf u$:
+
+    @f{eqnarray*}
+        \textbf u = \textbf g_D \qquad\qquad \textrm{on}\ \Gamma_D.
+    @f}
+    Because test functions $\textbf v$ come from the tangent space of
+    the solution variable, we have that $\textbf v=0$ on $\Gamma_D$
+    and consequently that
+    @f{eqnarray*}
+      -(\textbf{n} \otimes \mathrm
+        v, 2\; \varepsilon(\textbf{u}))_{\Gamma_D}
+      +
+      (\textbf{n}\cdot\textbf{v}, p)_{\Gamma_D}
+      = 0.
+    @f}
+    In other words, as usual, strongly imposed boundary values do not
+    appear in the weak form.
+
+    It is noteworthy that if we impose Dirichlet boundary values on the entire
+    boundary, then the pressure is only determined up to a constant. An
+    algorithmic realization of that would use similar tools as have been seen in
+    step-11.
+
+<li>Neumann-type or natural boundary conditions: On the rest of the boundary
+    $\Gamma_N=\partial\Omega\backslash\Gamma_D$, let us re-write the
+    boundary terms as follows:
+    @f{eqnarray*}
+      -(\textbf{n} \otimes \mathrm
+        v, 2\; \varepsilon(\textbf{u}))_{\Gamma_N}
+      +
+      (\textbf{n}\cdot\textbf{v}, p)_{\Gamma_N}
+      &=&
+      \sum_{i,j=1}^d
+      -(n_i v_j, 2\; \varepsilon(\textbf{u})_{ij})_{\Gamma_N}
+      +
+      \sum_{i=1}^d
+      (n_i v_i, p)_{\Gamma_N}
+      \\
+      &=&
+      \sum_{i,j=1}^d
+      -(n_i v_j, 2\; \varepsilon(\textbf{u})_{ij})_{\Gamma_N}
+      +
+      \sum_{i,j=1}^d
+      (n_i v_j, p \delta_{ij})_{\Gamma_N}
+      \\
+      &=&
+      \sum_{i,j=1}^d
+      (n_i v_j,p \delta_{ij} - 2\; \varepsilon(\textbf{u})_{ij})_{\Gamma_N}
+      \\
+      &=&
+      (\textbf{n} \otimes \mathrm v,
+      p \textbf{1} - 2\; \varepsilon(\textbf{u}))_{\Gamma_N}.
+      \\
+      &=&
+      (\mathrm v,
+       \textbf{n}\cdot [p \textbf{1} - 2\; \varepsilon(\textbf{u})])_{\Gamma_N}.
+    @f}
+    In other words, on the Neumann part of the boundary we can
+    prescribe values for the total stress:
+    @f{eqnarray*}
+      \textbf{n}\cdot [p \textbf{1} - 2\; \varepsilon(\textbf{u})]
+      =
+      \textbf g_N \qquad\qquad \textrm{on}\ \Gamma_N.
+    @f}
+    If the boundary is subdivided into Dirichlet and Neumann parts
+    $\Gamma_D,\Gamma_N$, this then leads to the following weak form:
+    @f{eqnarray*}
+      (\varepsilon(\mathrm v), 2\; \varepsilon(\textbf{u}))_{\Omega}
+      - (\textrm{div}\; \textbf{v}, p)_{\Omega}
+      -
+      (q,\textrm{div}\; \textbf{u})_{\Omega}
+      =
+      (\textbf{v}, \textbf{f})_\Omega
+      -
+      (\textbf{v}, \textbf g_N)_{\Gamma_N}.
+    @f}
+
+
+<li>Robin-type boundary conditions: Robin boundary conditions are a mixture of
+    Dirichlet and Neumann boundary conditions. They would read
+    @f{eqnarray*}
+      \textbf{n}\cdot [p \textbf{1} - 2\; \varepsilon(\textbf{u})]
+      =
+      \textbf S \textbf u \qquad\qquad \textrm{on}\ \Gamma_R,
+    @f}
+    with a rank-2 tensor (matrix) $\textbf S$. The associated weak form is
+    @f{eqnarray*}
+      (\varepsilon(\mathrm v), 2\; \varepsilon(\textbf{u}))_{\Omega}
+      - (\textrm{div}\; \textbf{v}, p)_{\Omega}
+      -
+      (q,\textrm{div}\; \textbf{u})_{\Omega}
+      +
+      (\textbf S \textbf u, \textbf{v})_{\Gamma_R}
+      =
+      (\textbf{v}, \textbf{f})_\Omega.
+    @f}
+
+<li>Partial boundary conditions: It is possible to combine Dirichlet and
+    Neumann boundary conditions by only enforcing each of them for certain
+    components of the velocity. For example, one way to impose artificial
+    boundary conditions is to require that the flow is perpendicular to the
+    boundary, i.e. the tangential component $\textbf u_{\textbf t}=(\textbf
+    1-\textbf n\otimes\textbf n)\textbf u$ be zero, thereby constraining
+    <code>dim</code>-1 components of the velocity. The remaining component can
+    be constrained by requiring that the normal component of the normal
+    stress be zero, yielding the following set of boundary conditions:
+    @f{eqnarray*}
+      \textbf u_{\textbf t} &=& 0,
+      \\
+      \textbf n \cdot \left(\textbf{n}\cdot [p \textbf{1} - 2\;
+      \varepsilon(\textbf{u})] \right)
+      &=&
+      0.
+    @f}
+
+    An alternative to this is when one wants the flow to be <i>parallel</i>
+    rather than perpendicular to the boundary (in deal.II, the
+    VectorTools::compute_no_normal_flux_constraints function can do this for
+    you). This is frequently the case for problems with a free boundary
+    (e.g. at the surface of a river or lake if vertical forces of the flow are
+    not large enough to actually deform the surface), or if no significant
+    friction is exerted by the boundary on the fluid (e.g. at the interface
+    between earth mantle and earth core where two fluids meet that are
+    stratified by different densities but that both have small enough
+    viscosities to not introduce much tangential stress on each other).
+    In formulas, this means that
+    @f{eqnarray*}
+      \textbf{n}\cdot\textbf u &=& 0,
+      \\
+      (\textbf 1-\textbf n\otimes\textbf n)
+      \left(\textbf{n}\cdot [p \textbf{1} - 2\;
+      \varepsilon(\textbf{u})] \right)
+      &=&
+      0,
+    @f}
+    the first condition (which needs to be imposed strongly) fixing a single
+    component of the velocity, with the second (which would be enforced in the
+    weak form) fixing the remaining two components.
+</ol>
+
+Despite this wealth of possibilities, we will only use Dirichlet and
+(homogeneous) Neumann boundary conditions in this tutorial program.
+
+
+<h3>Discretization</h3>
+
+As developed above, the weak form of the equations with Dirichlet and Neumann
+boundary conditions on $\Gamma_D$ and $\Gamma_N$ reads like this: find
+$\textbf u\in \textbf V_g = \{\varphi \in H^1(\Omega)^d: \varphi_{\Gamma_D}=\textbf
+g_D\}, p\in Q=L^2(\Omega)$ so that
+ at f{eqnarray*}
+  (\varepsilon(\mathrm v), 2\; \varepsilon(\textbf{u}))_{\Omega}
+  - (\textrm{div}\; \textbf{v}, p)_{\Omega}
+  -
+  (q,\textrm{div}\; \textbf{u})_{\Omega}
+  =
+  (\textbf{v}, \textbf{f})_\Omega
+  -
+  (\textbf{v}, \textbf g_N)_{\Gamma_N}
+ at f}
+for all test functions
+$\textbf v\in \textbf V_0 = \{\varphi \in H^1(\Omega)^d: \varphi_{\Gamma_D}=0\},q\in
+Q$.
+
+These equations represent a symmetric saddle point problem. It is well known
+that then a solution only exists if the function spaces in which we search for
+a solution have to satisfy certain conditions, typically referred to as the
+Babuska-Brezzi or Ladyzhenskaya-Babuska-Brezzi (LBB) conditions. The continuous
+function spaces above satisfy them. However, when we discretize the equations by
+replacing the continuous variables and test functions by finite element
+functions in finite dimensional spaces $\textbf V_{g,h}\subset \textbf V_g,
+Q_h\subset Q$, we have to make sure that $\textbf V_h,Q_h$ also satisfy the LBB
+conditions. This is similar to what we had to do in step-20.
+
+For the Stokes equations, there are a number of possible choices to ensure
+that the finite element spaces are compatible with the LBB condition. A simple
+and accurate choice that we will use here is $\textbf u_h\in Q_{p+1}^d,
+p_h\in Q_p$, i.e. use elements one order higher for the velocities than for the
+pressures.
+
+This then leads to the following discrete problem: find $\textbf u_h,p_h$ so
+that
+ at f{eqnarray*}
+  (\varepsilon(\mathrm v_h), 2\; \varepsilon(\textbf u_h))_{\Omega}
+  - (\textrm{div}\; \textbf{v}_h, p_h)_{\Omega}
+  -
+  (q_h,\textrm{div}\; \textbf{u}_h)_{\Omega}
+  =
+  (\textbf{v}_h, \textbf{f})_\Omega
+  -
+  (\textbf{v}_h, \textbf g_N)_{\Gamma_N}
+ at f}
+for all test functions $\textbf v_h, q_h$. Assembling the linear system
+associated with this problem follows the same lines used in @ref step_20
+"step-20", step-21, and explained in detail in the @ref
+vector_valued module.
+
+
+
+<h3>Linear solver and preconditioning issues</h3>
+
+The weak form of the discrete equations naturally leads to the following
+linear system for the nodal values of the velocity and pressure fields:
+ at f{eqnarray*}
+  \left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+  \left(\begin{array}{c}
+    U \\ P
+  \end{array}\right)
+  =
+  \left(\begin{array}{c}
+    F \\ G
+  \end{array}\right),
+ at f}
+Like in step-20 and step-21, we will solve this
+system of equations by forming the Schur complement, i.e. we will first find
+the solution $P$ of
+ at f{eqnarray*}
+  BA^{-1}B^T P &=& BA^{-1} F - G, \\
+ at f}
+and then
+ at f{eqnarray*}
+  AU &=& F - B^TP.
+ at f}
+The way we do this is pretty much exactly like we did in these previous
+tutorial programs, i.e. we use the same classes <code>SchurComplement</code>
+and <code>InverseMatrix</code> again. There are two significant differences,
+however:
+
+<ol>
+<li>
+First, in the mixed Laplace equation we had to deal with the question of how
+to precondition the Schur complement $B^TM^{-1}B$, which was spectrally
+equivalent to the Laplace operator on the pressure space (because $B$
+represents the gradient operator, $B^T$ its adjoint $-\textrm{div}$, and $M$
+the identity (up to the material parameter $K^{-1}$), so $B^TM^{-1}B$ is
+something like $-\textrm{div} \mathbf 1 \nabla = -\Delta$). Consequently, the
+matrix is badly conditioned for small mesh sizes and we had to come up with an
+elaborate preconditioning scheme for the Schur complement.
+
+<li>
+Second, every time we multiplied with $B^TM^{-1}B$ we had to solve with the
+mass matrix $M$. This wasn't particularly difficult, however, since the mass
+matrix is always well conditioned and so simple to invert using CG and a
+little bit of preconditioning.
+</ol>
+In other words, preconditioning the inner solver for $M$ was simple whereas
+preconditioning the outer solver for $B^TM^{-1}B$ was complicated.
+
+Here, the situation is pretty much exactly the opposite. The difference stems
+from the fact that the matrix at the heart of the Schur complement does not
+stem from the identity operator but from a variant of the Laplace operator,
+$-\textrm{div} \nabla^s$ (where $\nabla^s$ is the symmetric gradient)
+acting on a vector field. In the investigation of this issue
+we largely follow the paper D. Silvester and A. Wathen:
+"Fast iterative solution of stabilised Stokes systems part II. Using
+general block preconditioners." (SIAM J. Numer. Anal., 31 (1994),
+pp. 1352-1367), which is available online <a
+href="http://siamdl.aip.org/getabs/servlet/GetabsServlet?prog=normal&id=SJNAAM000031000005001352000001&idtype=cvips&gifs=Yes" target="_top">here</a>.
+Principally, the difference in the matrix at the heart of the Schur
+complement has two consequences:
+
+<ol>
+<li>
+First, it makes the outer preconditioner simple: the Schur complement
+corresponds to the operator $-\textrm{div} (-\textrm{div} \nabla^s)^{-1}
+\nabla$ on the pressure space; forgetting about the fact that we deal with
+symmetric gradients instead of the regular one, the Schur complement is
+something like $-\textrm{div} (-\textrm{div} \nabla)^{-1} \nabla =
+-\textrm{div} (-\Delta)^{-1} \nabla$, which, even if not mathematically
+entirely concise, is spectrally equivalent to the identity operator (a
+heuristic argument would be to commute the operators into
+$-\textrm{div}(-\Delta)^{-1} \nabla = -\textrm{div}\nabla(-\Delta)^{-1} =
+-\Delta(-\Delta)^{-1} = \mathbf 1$). It turns out that it isn't easy to solve
+this Schur complement in a straightforward way with the CG method:
+using no preconditioner, the condition number of the Schur complement matrix
+depends on the size ratios of the largest to the smallest cells, and one still
+needs on the order of 50-100 CG iterations. However, there is a simple cure:
+precondition with the mass matrix on the pressure space and we get down to a
+number between 5-15 CG iterations, pretty much independently of the structure
+of the mesh (take a look at the <a href="#Results">results section</a> of this
+program to see that indeed the number of CG iterations does not change as we
+refine the mesh).
+
+So all we need in addition to what we already have is the mass matrix on the
+pressure variables. We could do that by building this matrix on the
+side in a separate data structure. However, it is worth remembering
+that although we build the system matrix
+ at f{eqnarray*}
+  \left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+ at f}
+as one object (of type BlockSparseMatrix), we never actually do
+matrix-vector products with this matrix, or any other operations that
+consider the entire matrix. Rather, we only build it in this form for
+convenience (because it reflects the structure of the FESystem finite
+element and associated DoFHandler object) but later only operate on
+the $(0,0),(0,1)$, and $(1,0)$ blocks of this matrix. In other words,
+our algorithm so far entirely ignores the $(1,1)$ (pressure-pressure)
+block as it is empty anyway.
+
+Now, as mentioned, we need a pressure mass matrix to precondition the
+Schur complement and that conveniently the pressure-pressure block of
+the matrix we build anyway is currently empty and ignored. So what we
+will do is to assemble the needed mass matrix in this space; this does
+change the global system matrix but since our algorithm never operates
+on the global matrix and instead only considers individual blocks,
+this fact does not affect what we actually compute. Later, when
+solving, we then precondition the Schur complement with $M_p^{-1}$ by
+doing a few CG iterations on the well-conditioned pressure mass matrix
+$M_p$ stored in the $(1,1)$ block.
+
+
+
+<li>
+While the outer preconditioner has become simpler compared to the
+mixed Laplace case discussed in step-20, the issue of
+the inner solver has become more complicated. In the mixed Laplace
+discretization, the Schur complement has the form $B^TM^{-1}B$. Thus,
+every time we multiplied with the Schur complement, we had to solve a
+linear system $M_uz=y$; this isn't too complicated there, however,
+since the mass matrix $M_u$ on the pressure space is well-conditioned.
+
+
+On the other hand, for the Stokes equation we consider here, the Schur
+complement is $BA^{-1}B^T$ where the matrix $A$ is related to the
+Laplace operator (it is, in fact, the matrix corresponding to the
+bilinear form $(\nabla^s \varphi_i, \nabla^s\varphi_j)$). Thus,
+solving with $A$ is a lot more complicated: the matrix is badly
+conditioned and we know that we need many iterations unless we have a
+very good preconditioner. What is worse, we have to solve with $A$
+every time we multiply with the Schur complement, which is 5-15 times
+using the preconditioner described above.
+
+Because we have to solve with $A$ several times, it pays off to spend
+a bit more time once to create a good preconditioner for this
+matrix. So here's what we're going to do: if in 2d, we use the
+ultimate preconditioner, namely a direct sparse LU decomposition of
+the matrix. This is implemented using the SparseDirectUMFPACK class
+that uses the UMFPACK direct solver to compute the decomposition. To
+use it, you will have to build deal.II with UMFPACK support (which is the
+default); see the <a href="../../readme.html#optional-software">ReadMe file</a>
+for instructions. With this, the inner solver converges in one iteration.
+
+In 2d, we can do this sort of thing because even reasonably large problems
+rarely have more than a few 100,000 unknowns with relatively few nonzero
+entries per row. Furthermore, the bandwidth of matrices in 2d is ${\cal
+O}(\sqrt{N})$ and therefore moderate. For such matrices, sparse factors can be
+computed in a matter of a few seconds. (As a point of reference, computing the
+sparse factors of a matrix of size $N$ and bandwidth $B$ takes ${\cal
+O}(NB^2)$ operations. In 2d, this is ${\cal O}(N^2)$; though this is a higher
+complexity than, for example, assembling the linear system which takes ${\cal
+O}(N)$, the constant for computing the decomposition is so small that it
+doesn't become the dominating factor in the entire program until we get to
+very large %numbers of unknowns in the high 100,000s or more.)
+
+The situation changes in 3d, because there we quickly have many more
+unknowns and the bandwidth of matrices (which determines the number of
+nonzero entries in sparse LU factors) is ${\cal O}(N^{2/3})$, and there
+are many more entries per row as well. This makes using a sparse
+direct solver such as UMFPACK inefficient: only for problem sizes of a
+few 10,000 to maybe 100,000 unknowns can a sparse decomposition be
+computed using reasonable time and memory resources.
+
+What we do in that case is to use an incomplete LU decomposition (ILU) as a
+preconditioner, rather than actually computing complete LU factors. As it so
+happens, deal.II has a class that does this: SparseILU. Computing the ILU
+takes a time that only depends on the number of nonzero entries in the sparse
+matrix (or that we are willing to fill in the LU factors, if these should be
+more than the ones in the matrix), but is independent of the bandwidth of the
+matrix. It is therefore an operation that can efficiently also be computed in
+3d. On the other hand, an incomplete LU decomposition, by definition, does not
+represent an exact inverse of the matrix $A$. Consequently, preconditioning
+with the ILU will still require more than one iteration, unlike
+preconditioning with the sparse direct solver. The inner solver will therefore
+take more time when multiplying with the Schur complement, a trade-off
+unavoidable.
+</ol>
+
+In the program below, we will make use of the fact that the SparseILU and
+SparseDirectUMFPACK classes have a very similar interface and can be used
+interchangeably. All that we need is a switch class that, depending on the
+dimension, provides a type that is either of the two classes mentioned
+above. This is how we do that:
+ at code
+template <int dim>
+struct InnerPreconditioner;
+
+template <>
+struct InnerPreconditioner<2>
+{
+    typedef SparseDirectUMFPACK type;
+};
+
+template <>
+struct InnerPreconditioner<3>
+{
+    typedef SparseILU<double> type;
+};
+ at endcode
+
+From hereon, we can refer to the type <code>typename
+InnerPreconditioner@<dim@>::%type</code> and automatically get the correct
+preconditioner class. Because of the similarity of the interfaces of the two
+classes, we will be able to use them interchangeably using the same syntax in
+all places.
+
+
+<h3>The testcase</h3>
+
+The domain, right hand side and boundary conditions we implement below relate
+to a problem in geophysics: there, one wants to compute the flow field of
+magma in the earth's interior under a mid-ocean rift. Rifts are places where
+two continental plates are very slowly drifting apart (a few centimeters per
+year at most), leaving a crack in the earth crust that is filled with magma
+from below. Without trying to be entirely realistic, we model this situation
+by solving the following set of equations and boundary conditions on the
+domain $\Omega=[-2,2]\times[0,1]\times[-1,0]$:
+ at f{eqnarray*}
+  -2\; \textrm{div}\; \varepsilon(\textbf{u}) + \nabla p &=& 0,
+  \\
+  -\textrm{div}\; \textbf{u} &=& 0,
+  \\
+  \mathbf u &=&   \left(\begin{array}{c}
+    -1 \\ 0 \\0
+  \end{array}\right)
+  \qquad\qquad \textrm{at}\ z=0, x<0,
+  \\
+  \mathbf u &=&   \left(\begin{array}{c}
+    +1 \\ 0 \\0
+  \end{array}\right)
+  \qquad\qquad \textrm{at}\ z=0, x>0,
+  \\
+  \mathbf u &=&   \left(\begin{array}{c}
+    0 \\ 0 \\0
+  \end{array}\right)
+  \qquad\qquad \textrm{at}\ z=0, x=0,
+ at f}
+and using natural boundary conditions $\textbf{n}\cdot [p \textbf{1} - 2
+\varepsilon(\textbf{u})] = 0$ everywhere else. In other words, at the
+left part of the top surface we prescribe that the fluid moves with the
+continental plate to the left at speed $-1$, that it moves to the right on the
+right part of the top surface, and impose natural flow conditions everywhere
+else. If we are in 2d, the description is essentially the same, with the
+exception that we omit the second component of all vectors stated above.
+
+As will become apparent in the <a href="#Results">results section</a>, the
+flow field will pull material from below and move it to the left and right
+ends of the domain, as expected. The discontinuity of velocity boundary
+conditions will produce a singularity in the pressure at the center of the top
+surface that sucks material all the way to the top surface to fill the gap
+left by the outward motion of material at this location.
+
+
+<h3>Implementation</h3>
+
+<h4>Using imhomogeneous constraints for implementing Dirichlet boundary conditions</h4>
+
+In all the previous tutorial programs, we used the ConstraintMatrix merely
+for handling hanging node constraints (with exception of step-11). However,
+the class can also be used to implement Dirichlet boundary conditions, as we
+will show in this program, by fixing some node values $x_i = b_i$. Note that
+these are inhomogeneous constraints, and we have to pay some special
+attention to that. The way we are going to implement this is to first read
+in the boundary values into the ConstraintMatrix object by using the call
+
+ at code
+  VectorTools::interpolate_boundary_values (dof_handler,
+                                            1,
+                                            BoundaryValues<dim>(),
+                                            constraints);
+ at endcode
+
+very similar to how we were making the list of boundary nodes
+before (note that we set Dirichlet conditions only on boundaries with
+boundary flag 1). The actual application of the boundary values is then
+handled by the ConstraintMatrix object directly, without any additional
+interference.
+
+We could then proceed as before, namely by filling the matrix, and then
+calling a condense function on the constraints object of the form
+ at code
+  constraints.condense (system_matrix, system_rhs);
+ at endcode
+
+Note that we call this on the system matrix and system right hand side
+simultaneously, since resolving inhomogeneous constraints requires knowledge
+about both the matrix entries and the right hand side. For efficiency
+reasons, though, we choose another strategy: all the constraints collected
+in the ConstraintMatrix can be resolved on the fly while writing local data
+into the global matrix, by using the call
+ at code
+  constraints.distribute_local_to_global (local_matrix, local_rhs,
+                                          local_dof_indices,
+                                          system_matrix, system_rhs);
+ at endcode
+
+This technique is further discussed in the step-27 tutorial
+program. All we need to know here is that this functions does three things
+at once: it writes the local data into the global matrix and right hand
+side, it distributes the hanging node constraints and additionally
+implements (inhomogeneous) Dirichlet boundary conditions. That's nice, isn't
+it?
+
+We can conclude that the ConstraintMatrix provides an alternative to using
+MatrixTools::apply_boundary_values for implementing Dirichlet boundary
+conditions.
+
+
+<a name="constraint-matrix">
+<h4>Using ConstraintMatrix for increasing performance</h4>
+</a>
+
+Frequently, a sparse matrix contains a substantial amount of elements that
+actually are zero when we are about to start a linear solve. Such elements are
+introduced when we eliminate constraints or implement Dirichlet conditions,
+where we usually delete all entries in constrained rows and columns, i.e., we
+set them to zero. The fraction of elements that are present in the sparsity
+pattern, but do not really contain any information, can be up to one fourth
+of the total number of elements in the matrix for the 3D application
+considered in this tutorial program. Remember that matrix-vector products or
+preconditioners operate on all the elements of a sparse matrix (even those
+that are zero), which is an inefficiency we will avoid here.
+
+An advantage of directly resolving constrained degrees of freedom is that we
+can avoid having most of the entries that are going to be zero in our sparse
+matrix — we do not need constrained entries during matrix construction
+(as opposed to the traditional algorithms, which first fill the matrix, and
+only resolve constraints afterwards). This will save both memory and time
+when forming matrix-vector products. The way we are going to do that is to
+pass the information about constraints to the function that generates the
+sparsity pattern, and then set a <tt>false</tt> argument specifying that we
+do not intend to use constrained entries:
+ at code
+  DoFTools::make_sparsity_pattern (dof_handler, sparsity_pattern,
+                                   constraints, false);
+ at endcode
+This functions obviates, by the way, also the call to the
+<tt>condense()</tt> function on the sparsity pattern.
+
+
+<h4>Performance optimizations</h4>
+
+The program developed below has seen a lot of TLC. We have run it over and
+over under profiling tools (mainly <a
+href="http://www.valgrind.org/">valgrind</a>'s cachegrind and callgrind
+tools, as well as the KDE <a
+href="http://kcachegrind.sourceforge.net/">KCachegrind</a> program for
+visualization) to see where the bottlenecks are. This has paid off: through
+this effort, the program has become about four times as fast when
+considering the runtime of the refinement cycles zero through three,
+reducing the overall number of CPU instructions executed from
+869,574,060,348 to 199,853,005,625. For higher refinement levels, the gain
+is probably even larger since some algorithms that are not ${\cal O}(N)$
+have been eliminated.
+
+Essentially, there are currently two algorithms in the program that do not
+scale linearly with the number of degrees of freedom: renumbering of degrees
+of freedom (which is ${\cal O}(N \log N)$, and the linear solver (which is
+${\cal O}(N^{4/3})$). As for the first, while reordering degrees of freedom
+may not scale linearly, it is an indispensable part of the overall algorithm
+as it greatly improves the quality of the sparse ILU, easily making up for
+the time spent on computing the renumbering; graphs and timings to
+demonstrate this are shown in the documentation of the DoFRenumbering
+namespace, also underlining the choice of the Cuthill-McKee reordering
+algorithm chosen below.
+
+As for the linear solver: as mentioned above, our implementation here uses a
+Schur complement formulation. This is not necessarily the very best choice
+but demonstrates various important techniques available in deal.II. The
+question of which solver is best is again discussed in the <a
+href="#improved-solver">section on improved solvers in the results part</a>
+of this program, along with code showing alternative solvers and a
+comparison of their results.
+
+Apart from this, many other algorithms have been tested and improved during
+the creation of this program. For example, in building the sparsity pattern,
+we originally used a BlockCompressedSparsityPattern object that added one
+element at a time; however, its data structures are poorly adapted for the
+large numbers of nonzero entries per row created by our discretization in
+3d, leading to a quadratic behavior. Replacing the internal algorithms in
+deal.II to set many elements at a time, and using a
+BlockCompressedSimpleSparsityPattern (which has, as of early 2015, been in turn
+replaced by BlockDynamicSparsityPattern) as a better adapted data structure,
+removed this bottleneck at the price of a slightly higher memory
+consumption. Likewise, the implementation of the decomposition step in the
+SparseILU class was very inefficient and has been replaced by one that is
+about 10 times faster. Even the vmult function of the SparseILU has been
+improved to save about twenty percent of time. Small improvements were
+applied here and there. Moreover, the ConstraintMatrix object has been used
+to eliminate a lot of entries in the sparse matrix that are eventually going
+to be zero, see <a href="#constraint-matrix">the section on using advanced
+features of the ConstraintMatrix class</a>.
+
+A profile of how many CPU instructions are spent at the various
+different places in the program during refinement cycles
+zero through three in 3d is shown here:
+
+<img src="http://www.dealii.org/images/steps/developer/step-22.profile-3.png" alt="">
+
+As can be seen, at this refinement level approximately three quarters of the
+instruction count is spent on the actual solver (the SparseILU::vmult calls
+on the left, the SparseMatrix::vmult call in the middle for the Schur
+complement solve, and another box representing the multiplications with
+SparseILU and SparseMatrix in the solve for <i>U</i>). About one fifth of
+the instruction count is spent on matrix assembly and sparse ILU computation
+(box in the lower right corner) and the rest on other things. Since floating
+point operations such as in the SparseILU::vmult calls typically take much
+longer than many of the logical operations and table lookups in matrix
+assembly, the fraction of the run time taken up by matrix assembly is
+actually significantly less than the fraction of instructions, as will
+become apparent in the comparison we make in the results section.
+
+For higher refinement levels, the boxes representing the solver as well as
+the blue box at the top right stemming from reordering algorithm are going
+to grow at the expense of the other parts of the program, since they don't
+scale linearly. The fact that at this moderate refinement level (3168 cells
+and 93176 degrees of freedom) the linear solver already makes up about three
+quarters of the instructions is a good sign that most of the algorithms used
+in this program are well-tuned and that major improvements in speeding up
+the program are most likely not to come from hand-optimizing individual
+aspects but by changing solver algorithms. We will address this point in the
+discussion of results below as well.
+
+As a final point, and as a point of reference, the following picture also
+shows how the profile looked at an early stage of optimizing this program:
+
+<img src="http://www.dealii.org/images/steps/developer/step-22.profile-3.original.png" alt="">
+
+As mentioned above, the runtime of this version was about four times as long
+as for the first profile, with the SparseILU decomposition taking up about
+30% of the instruction count, and operations on the ill-suited
+CompressedSparsityPattern about 10%. Both these bottlenecks have since been
+completely removed.
diff --git a/examples/step-22/doc/kind b/examples/step-22/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-22/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-22/doc/results.dox b/examples/step-22/doc/results.dox
new file mode 100644
index 0000000..f115fd9
--- /dev/null
+++ b/examples/step-22/doc/results.dox
@@ -0,0 +1,804 @@
+<a name="Results"></a>
+<h1>Results</h1>
+
+<h3>Output of the program and graphical visualization</h3>
+
+<h4>2D calculations</h4>
+
+Running the program with the space dimension set to 2 in <code>main()</code>
+yields the following output (in "release mode", @dealiiVideoLectureSeeAlso{18}):
+ at code
+examples/\step-22> make run
+Refinement cycle 0
+   Number of active cells: 64
+   Number of degrees of freedom: 679 (594+85)
+   Assembling...
+   Computing preconditioner...
+   Solving...  11 outer CG Schur complement iterations for pressure
+
+Refinement cycle 1
+   Number of active cells: 160
+   Number of degrees of freedom: 1683 (1482+201)
+   Assembling...
+   Computing preconditioner...
+   Solving...  11 outer CG Schur complement iterations for pressure
+
+Refinement cycle 2
+   Number of active cells: 376
+   Number of degrees of freedom: 3813 (3370+443)
+   Assembling...
+   Computing preconditioner...
+   Solving...  11 outer CG Schur complement iterations for pressure
+
+Refinement cycle 3
+   Number of active cells: 880
+   Number of degrees of freedom: 8723 (7722+1001)
+   Assembling...
+   Computing preconditioner...
+   Solving...  11 outer CG Schur complement iterations for pressure
+
+Refinement cycle 4
+   Number of active cells: 2008
+   Number of degrees of freedom: 19383 (17186+2197)
+   Assembling...
+   Computing preconditioner...
+   Solving...  11 outer CG Schur complement iterations for pressure
+
+Refinement cycle 5
+   Number of active cells: 4288
+   Number of degrees of freedom: 40855 (36250+4605)
+   Assembling...
+   Computing preconditioner...
+   Solving...  11 outer CG Schur complement iterations for pressure
+ at endcode
+
+The entire computation above takes about 2 seconds on a reasonably
+quick (for 2015 standards) machine.
+
+What we see immediately from this is that the number of (outer)
+iterations does not increase as we refine the mesh. This confirms the
+statement in the introduction that preconditioning the Schur
+complement with the mass matrix indeed yields a matrix spectrally
+equivalent to the identity matrix (i.e. with eigenvalues bounded above
+and below independently of the mesh size or the relative sizes of
+cells). In other words, the mass matrix and the Schur complement are
+spectrally equivalent.
+
+In the images below, we show the grids for the first six refinement
+steps in the program.  Observe how the grid is refined in regions
+where the solution rapidly changes: On the upper boundary, we have
+Dirichlet boundary conditions that are -1 in the left half of the line
+and 1 in the right one, so there is an abrupt change at $x=0$. Likewise,
+there are changes from Dirichlet to Neumann data in the two upper
+corners, so there is need for refinement there as well:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.2d.mesh-0.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.2d.mesh-1.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.2d.mesh-2.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.2d.mesh-3.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.2d.mesh-4.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.2d.mesh-5.png" alt="">
+    </td>
+  </tr>
+</table>
+
+Finally, following is a plot of the flow field. It shows fluid
+transported along with the moving upper boundary and being replaced by
+material coming from below:
+
+<img src="http://www.dealii.org/images/steps/developer/step-22.2d.solution.png" alt="">
+
+This plot uses the capability of VTK-based visualization programs (in
+this case of VisIt) to show vector data; this is the result of us
+declaring the velocity components of the finite element in use to be a
+set of vector components, rather than independent scalar components in
+the <code>StokesProblem@<dim@>::%output_results</code> function of this
+tutorial program.
+
+
+
+<h4>3D calculations</h4>
+
+In 3d, the screen output of the program looks like this:
+
+ at code
+Refinement cycle 0
+   Number of active cells: 32
+   Number of degrees of freedom: 1356 (1275+81)
+   Assembling...
+   Computing preconditioner...
+   Solving...  13 outer CG Schur complement iterations for pressure.
+
+Refinement cycle 1
+   Number of active cells: 144
+   Number of degrees of freedom: 5088 (4827+261)
+   Assembling...
+   Computing preconditioner...
+   Solving...  14 outer CG Schur complement iterations for pressure.
+
+Refinement cycle 2
+   Number of active cells: 704
+   Number of degrees of freedom: 22406 (21351+1055)
+   Assembling...
+   Computing preconditioner...
+   Solving...  14 outer CG Schur complement iterations for pressure.
+
+Refinement cycle 3
+   Number of active cells: 3168
+   Number of degrees of freedom: 93176 (89043+4133)
+   Assembling...
+   Computing preconditioner...
+   Solving...  15 outer CG Schur complement iterations for pressure.
+
+Refinement cycle 4
+   Number of active cells: 11456
+   Number of degrees of freedom: 327808 (313659+14149)
+   Assembling...
+   Computing preconditioner...
+   Solving...  15 outer CG Schur complement iterations for pressure.
+
+Refinement cycle 5
+   Number of active cells: 45056
+   Number of degrees of freedom: 1254464 (1201371+53093)
+   Assembling...
+   Computing preconditioner...
+   Solving...  14 outer CG Schur complement iterations for pressure.
+ at endcode
+
+Again, we see that the number of outer iterations does not increase as
+we refine the mesh. Nevertheless, the compute time increases
+significantly: for each of the iterations above separately, it takes about
+0.14 seconds, 0.63 seconds, 4.8 seconds, 35 seconds, 2 minutes and 33 seconds,
+and 13 minutes and 12 seconds. This overall superlinear (in the number of
+unknowns) increase in runtime is due to the fact that our inner solver is not
+${\cal O}(N)$: a simple experiment shows that as we keep refining the mesh, the
+average number of ILU-preconditioned CG iterations to invert the
+velocity-velocity block $A$ increases.
+
+We will address the question of how possibly to improve our solver <a
+href="#improved-solver">below</a>.
+
+As for the graphical output, the grids generated during the solution
+look as follow:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d.mesh-0.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d.mesh-1.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d.mesh-2.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d.mesh-3.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d.mesh-4.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d.mesh-5.png" alt="">
+    </td>
+  </tr>
+</table>
+
+Again, they show essentially the location of singularities introduced
+by boundary conditions. The vector field computed makes for an
+interesting graph:
+
+<img src="http://www.dealii.org/images/steps/developer/step-22.3d.solution.png" alt="">
+
+The isocontours shown here as well are those of the pressure
+variable, showing the singularity at the point of discontinuous
+velocity boundary conditions.
+
+
+
+<h3>Sparsity pattern</h3>
+
+As explained during the generation of the sparsity pattern, it is
+important to have the numbering of degrees of freedom in mind when
+using preconditioners like incomplete LU decompositions. This is most
+conveniently visualized using the distribution of nonzero elements in
+the stiffness matrix.
+
+If we don't do anything special to renumber degrees of freedom (i.e.,
+without using DoFRenumbering::Cuthill_McKee, but with using
+DoFRenumbering::component_wise to ensure that degrees of freedom are
+appropriately sorted into their corresponding blocks of the matrix and
+vector), then we get the following image after the first adaptive
+refinement in two dimensions:
+
+<img src="http://www.dealii.org/images/steps/developer/step-22.2d.sparsity-nor.png" alt="">
+
+In order to generate such a graph, you have to insert a piece of
+code like the following to the end of the setup step.
+ at code
+  {
+    std::ofstream out ("sparsity_pattern.gpl");
+    sparsity_pattern.print_gnuplot(out);
+  }
+ at endcode
+
+It is clearly visible that the nonzero entries are spread over almost the
+whole matrix.  This makes preconditioning by ILU inefficient: ILU generates a
+Gaussian elimination (LU decomposition) without fill-in elements, which means
+that more tentative fill-ins left out will result in a worse approximation of
+the complete decomposition.
+
+In this program, we have thus chosen a more advanced renumbering of
+components.  The renumbering with DoFRenumbering::Cuthill_McKee and grouping
+the components into velocity and pressure yields the following output:
+
+<img src="http://www.dealii.org/images/steps/developer/step-22.2d.sparsity-ren.png" alt="">
+
+It is apparent that the situation has improved a lot. Most of the elements are
+now concentrated around the diagonal in the (0,0) block in the matrix. Similar
+effects are also visible for the other blocks. In this case, the ILU
+decomposition will be much closer to the full LU decomposition, which improves
+the quality of the preconditioner. (It may be interesting to note that the
+sparse direct solver UMFPACK does some %internal renumbering of the equations
+before actually generating a sparse LU decomposition; that procedure leads to
+a very similar pattern to the one we got from the Cuthill-McKee algorithm.)
+
+Finally, we want to have a closer
+look at a sparsity pattern in 3D. We show only the (0,0) block of the
+matrix, again after one adaptive refinement. Apart from the fact that the matrix
+size has increased, it is also visible that there are many more entries
+in the matrix. Moreover, even for the optimized renumbering, there will be a
+considerable amount of tentative fill-in elements. This illustrates why UMFPACK
+is not a good choice in 3D - a full decomposition needs many new entries that
+ eventually won't fit into the physical memory (RAM):
+
+<img src="http://www.dealii.org/images/steps/developer/step-22.3d.sparsity_uu-ren.png" alt="">
+
+
+
+<h3>Possible Extensions</h3>
+
+<a name="improved-solver">
+<h4>Improved linear solver in 3D</h4>
+</a>
+
+We have seen in the section of computational results that the number of outer
+iterations does not depend on the mesh size, which is optimal in a sense of
+scalability. This does, however, not apply to the solver as a whole, as
+mentioned above:
+We did not look at the number of inner iterations when generating the inverse of
+the matrix $A$ and the mass matrix $M_p$. Of course, this is unproblematic in
+the 2D case where we precondition $A$ with a direct solver and the
+<code>vmult</code> operation of the inverse matrix structure will converge in
+one single CG step, but this changes in 3D where we only use an ILU
+preconditioner.  There, the number of required preconditioned CG steps to
+invert $A$ increases as the mesh is refined, and each <code>vmult</code>
+operation involves on average approximately 14, 23, 36, 59, 75 and 101 inner
+CG iterations in the refinement steps shown above. (On the other hand,
+the number of iterations for applying the inverse pressure mass matrix is
+always around five, both in two and three dimensions.)  To summarize, most work
+is spent on solving linear systems with the same matrix $A$ over and over again.
+What makes this look even worse is the fact that we
+actually invert a matrix that is about 95 percent the size of the total system
+matrix and stands for 85 percent of the non-zero entries in the sparsity
+pattern. Hence, the natural question is whether it is reasonable to solve a
+linear system with matrix $A$ for about 15 times when calculating the solution
+to the block system.
+
+The answer is, of course, that we can do that in a few other (most of the time
+better) ways.
+Nevertheless, it has to be remarked that an indefinite system as the one
+at hand puts indeed much higher
+demands on the linear algebra than standard elliptic problems as we have seen
+in the early tutorial programs. The improvements are still rather
+unsatisfactory, if one compares with an elliptic problem of similar
+size. Either way, we will introduce below a number of improvements to the
+linear solver, a discussion that we will re-consider again with additional
+options in the step-31 program.
+
+<a name="improved-ilu">
+<h5>Better ILU decomposition by smart reordering</h5>
+</a>
+A first attempt to improve the speed of the linear solution process is to choose
+a dof reordering that makes the ILU being closer to a full LU decomposition, as
+already mentioned in the in-code comments. The DoFRenumbering namespace compares
+several choices for the renumbering of dofs for the Stokes equations. The best
+result regarding the computing time was found for the King ordering, which is
+accessed through the call DoFRenumbering::boost::king_ordering. With that
+program, the inner solver needs considerably less operations, e.g. about 62
+inner CG iterations for the inversion of $A$ at cycle 4 compared to about 75
+iterations with the standard Cuthill-McKee-algorithm. Also, the computing time
+at cycle 4 decreased from about 17 to 11 minutes for the <code>solve()</code>
+call. However, the King ordering (and the orderings provided by the
+DoFRenumbering::boost namespace in general) has a serious drawback - it uses
+much more memory than the in-build deal versions, since it acts on abstract
+graphs rather than the geometry provided by the triangulation. In the present
+case, the renumbering takes about 5 times as much memory, which yields an
+infeasible algorithm for the last cycle in 3D with 1.2 million
+unknowns.
+
+<h5>Better preconditioner for the inner CG solver</h5>
+Another idea to improve the situation even more would be to choose a
+preconditioner that makes CG for the (0,0) matrix $A$ converge in a
+mesh-independent number of iterations, say 10 to 30. We have seen such a
+candidate in step-16: multigrid.
+
+<h5>Block Schur complement preconditioner</h5>
+Even with a good preconditioner for $A$, we still
+need to solve of the same linear system repeatedly (with different
+right hand sides, though) in order to make the Schur complement solve
+converge. The approach we are going to discuss here is how inner iteration
+and outer iteration can be combined. If we persist in calculating the Schur
+complement, there is no other possibility.
+
+The alternative is to attack the block system at once and use an approximate
+Schur complement as efficient preconditioner. The idea is as
+follows: If we find a block preconditioner $P$ such that the matrix
+ at f{eqnarray*}
+  P^{-1}\left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+ at f}
+is simple, then an iterative solver with that preconditioner will converge in a
+few iterations. Using the Schur complement $S = B A^{-1} B^T$, one finds that
+ at f{eqnarray*}
+  P^{-1}
+  =
+  \left(\begin{array}{cc}
+    A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
+  \end{array}\right)
+ at f}
+would appear to be a good choice since
+ at f{eqnarray*}
+  P^{-1}\left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+  =
+  \left(\begin{array}{cc}
+    A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
+  \end{array}\right)\cdot \left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+  =
+  \left(\begin{array}{cc}
+    I & A^{-1} B^T \\ 0 & I
+  \end{array}\right).
+ at f}
+This is the approach taken by the paper by Silvester and Wathen referenced
+to in the introduction (with the exception that Silvester and Wathen use
+right preconditioning). In this case, a Krylov-based iterative method would
+converge in one step only if exact inverses of $A$ and $S$ were applied,
+since all the eigenvalues are one (and the number of iterations in such a
+method is bounded by the number of distinct eigenvalues). Below, we will
+discuss the choice of an adequate solver for this problem. First, we are
+going to have a closer look at the implementation of the preconditioner.
+
+Since $P$ is aimed to be a preconditioner only, we shall use approximations to
+the inverse of the Schur complement $S$ and the matrix $A$. Hence, the Schur
+complement will be approximated by the pressure mass matrix $M_p$, and we use
+a preconditioner to $A$ (without an InverseMatrix class around it) for
+approximating $A^{-1}$.
+
+Here comes the class that implements the block Schur
+complement preconditioner. The <code>vmult</code> operation for block vectors
+according to the derivation above can be specified by three successive
+operations:
+ at code
+template <class PreconditionerA, class PreconditionerMp>
+class BlockSchurPreconditioner : public Subscriptor
+{
+  public:
+    BlockSchurPreconditioner (const BlockSparseMatrix<double>         &S,
+          const InverseMatrix<SparseMatrix<double>,PreconditionerMp>  &Mpinv,
+          const PreconditionerA &Apreconditioner);
+
+  void vmult (BlockVector<double>       &dst,
+              const BlockVector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+    const SmartPointer<const InverseMatrix<SparseMatrix<double>,
+                       PreconditionerMp > > m_inverse;
+    const PreconditionerA &a_preconditioner;
+
+    mutable Vector<double> tmp;
+
+};
+
+template <class PreconditionerA, class PreconditionerMp>
+BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::BlockSchurPreconditioner(
+          const BlockSparseMatrix<double>                            &S,
+          const InverseMatrix<SparseMatrix<double>,PreconditionerMp> &Mpinv,
+          const PreconditionerA &Apreconditioner
+          )
+                :
+                system_matrix           (&S),
+                m_inverse               (&Mpinv),
+                a_preconditioner        (Apreconditioner),
+                tmp                     (S.block(1,1).m())
+{}
+
+        // Now the interesting function, the multiplication of
+        // the preconditioner with a BlockVector.
+template <class PreconditionerA, class PreconditionerMp>
+void BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::vmult (
+                                     BlockVector<double>       &dst,
+                                     const BlockVector<double> &src) const
+{
+        // Form u_new = A^{-1} u
+  a_preconditioner.vmult (dst.block(0), src.block(0));
+        // Form tmp = - B u_new + p
+        // (<code>SparseMatrix::residual</code>
+        // does precisely this)
+  system_matrix->block(1,0).residual(tmp, dst.block(0), src.block(1));
+        // Change sign in tmp
+  tmp *= -1;
+        // Multiply by approximate Schur complement
+        // (i.e. a pressure mass matrix)
+  m_inverse->vmult (dst.block(1), tmp);
+}
+ at endcode
+
+Since we act on the whole block system now, we have to live with one
+disadvantage: we need to perform the solver iterations on
+the full block system instead of the smaller pressure space.
+
+Now we turn to the question which solver we should use for the block
+system. The first observation is that the resulting preconditioned matrix cannot
+be solved with CG since it is neither positive definite nor symmetric.
+
+The deal.II libraries implement several solvers that are appropriate for the
+problem at hand. One choice is the solver @ref SolverBicgstab "BiCGStab", which
+was used for the solution of the unsymmetric advection problem in step-9. The
+second option, the one we are going to choose, is @ref SolverGMRES "GMRES"
+(generalized minimum residual). Both methods have their pros and cons - there
+are problems where one of the two candidates clearly outperforms the other, and
+vice versa.
+<a href="http://en.wikipedia.org/wiki/GMRES#Comparison_with_other_solvers">Wikipedia</a>'s
+article on the GMRES method gives a comparative presentation.
+A more comprehensive and well-founded comparison can be read e.g. in the book by
+J.W. Demmel (Applied Numerical Linear Algebra, SIAM, 1997, section 6.6.6).
+
+For our specific problem with the ILU preconditioner for $A$, we certainly need
+to perform hundreds of iterations on the block system for large problem sizes
+(we won't beat CG!). Actually, this disfavors GMRES: During the GMRES
+iterations, a basis of Krylov vectors is successively built up and some
+operations are performed on these vectors. The more vectors are in this basis,
+the more operations and memory will be needed. The number of operations scales
+as ${\cal O}(n + k^2)$ and memory as ${\cal O}(kn)$, where $k$ is the number of
+vectors in the Krylov basis and $n$ the size of the (block) matrix.
+To not let these demands grow excessively, deal.II limits the size $k$ of the
+basis to 30 vectors by default.
+Then, the basis is rebuilt. This implementation of the GMRES method is called
+GMRES(k), with default $k=30$. What we have gained by this restriction,
+namely a bound on operations and memory requirements, will be compensated by
+the fact that we use an incomplete basis - this will increase the number of
+required iterations.
+
+BiCGStab, on the other hand, won't get slower when many iterations are needed
+(one iteration uses only results from one preceding step and
+not all the steps as GMRES). Besides the fact the BiCGStab is more expensive per
+step since two matrix-vector products are needed (compared to one for
+CG or GMRES), there is one main reason which makes BiCGStab not appropriate for
+this problem: The preconditioner applies the inverse of the pressure
+mass matrix by using the InverseMatrix class. Since the application of the
+inverse matrix to a vector is done only in approximative way (an exact inverse
+is too expensive), this will also affect the solver. In the case of BiCGStab,
+the Krylov vectors will not be orthogonal due to that perturbation. While
+this is uncritical for a small number of steps (up to about 50), it ruins the
+performance of the solver when these perturbations have grown to a significant
+magnitude in the coarse of iterations.
+
+We did some experiments with BiCGStab and found it to
+be faster than GMRES up to refinement cycle 3 (in 3D), but it became very slow
+for cycles 4 and 5 (even slower than the original Schur complement), so the
+solver is useless in this situation. Choosing a sharper tolerance for the
+inverse matrix class (<code>1e-10*src.l2_norm()</code> instead of
+<code>1e-6*src.l2_norm()</code>) made BiCGStab perform well also for cycle 4,
+but did not change the failure on the very large problems.
+
+GMRES is of course also effected by the approximate inverses, but it is not as
+sensitive to orthogonality and retains a relatively good performance also for
+large sizes, see the results below.
+
+With this said, we turn to the realization of the solver call with GMRES with
+$k=100$ temporary vectors:
+
+ at code
+      SparseMatrix<double> pressure_mass_matrix;
+      pressure_mass_matrix.reinit(sparsity_pattern.block(1,1));
+      pressure_mass_matrix.copy_from(system_matrix.block(1,1));
+      system_matrix.block(1,1) = 0;
+
+      SparseILU<double> pmass_preconditioner;
+      pmass_preconditioner.initialize (pressure_mass_matrix,
+        SparseILU<double>::AdditionalData());
+
+      InverseMatrix<SparseMatrix<double>,SparseILU<double> >
+        m_inverse (pressure_mass_matrix, pmass_preconditioner);
+
+      BlockSchurPreconditioner<typename InnerPreconditioner<dim>::type,
+                               SparseILU<double> >
+        preconditioner (system_matrix, m_inverse, *A_preconditioner);
+
+      SolverControl solver_control (system_matrix.m(),
+                                    1e-6*system_rhs.l2_norm());
+      GrowingVectorMemory<BlockVector<double> > vector_memory;
+      SolverGMRES<BlockVector<double> >::AdditionalData gmres_data;
+      gmres_data.max_n_tmp_vectors = 100;
+
+      SolverGMRES<BlockVector<double> > gmres(solver_control, vector_memory,
+                                              gmres_data);
+
+      gmres.solve(system_matrix, solution, system_rhs,
+                  preconditioner);
+
+      constraints.distribute (solution);
+
+      std::cout << " "
+                << solver_control.last_step()
+                << " block GMRES iterations";
+ at endcode
+
+Obviously, one needs to add the include file @ref SolverGMRES
+"<lac/solver_gmres.h>" in order to make this run.
+We call the solver with a BlockVector template in order to enable
+GMRES to operate on block vectors and matrices.
+Note also that we need to set the (1,1) block in the system
+matrix to zero (we saved the pressure mass matrix there which is not part of the
+problem) after we copied the information to another matrix.
+
+Using the Timer class, we collect some statistics that compare the runtime
+of the block solver with the one from the problem implementation above.
+Besides the solution with the two options we also check if the solutions
+of the two variants are close to each other (i.e. this solver gives indeed the
+same solution as we had before) and calculate the infinity
+norm of the vector difference.
+
+Let's first see the results in 2D:
+ at code
+Refinement cycle 0
+   Number of active cells: 64
+   Number of degrees of freedom: 679 (594+85) [0.00162792 s]
+   Assembling...  [0.00108981 s]
+   Computing preconditioner... [0.0025959 s]
+   Solving...
+      Schur complement: 11 outer CG iterations for p  [0.00479603s ]
+      Block Schur preconditioner: 12 GMRES iterations [0.00441718 s]
+   l_infinity difference between solution vectors: 5.38258e-07
+
+Refinement cycle 1
+   Number of active cells: 160
+   Number of degrees of freedom: 1683 (1482+201) [0.00345707 s]
+   Assembling...  [0.00237417 s]
+   Computing preconditioner... [0.00605702 s]
+   Solving...
+      Schur complement: 11 outer CG iterations for p  [0.0123992s ]
+      Block Schur preconditioner: 12 GMRES iterations [0.011909 s]
+   l_infinity difference between solution vectors: 1.74658e-05
+
+Refinement cycle 2
+   Number of active cells: 376
+   Number of degrees of freedom: 3813 (3370+443) [0.00729299 s]
+   Assembling...  [0.00529909 s]
+   Computing preconditioner... [0.0167508 s]
+   Solving...
+      Schur complement: 11 outer CG iterations for p  [0.031672s ]
+      Block Schur preconditioner: 12 GMRES iterations [0.029232 s]
+   l_infinity difference between solution vectors: 7.81569e-06
+
+Refinement cycle 3
+   Number of active cells: 880
+   Number of degrees of freedom: 8723 (7722+1001) [0.017709 s]
+   Assembling...  [0.0126002 s]
+   Computing preconditioner... [0.0435679 s]
+   Solving...
+      Schur complement: 11 outer CG iterations for p  [0.0971651s ]
+      Block Schur preconditioner: 12 GMRES iterations [0.0992041 s]
+   l_infinity difference between solution vectors: 1.87249e-05
+
+Refinement cycle 4
+   Number of active cells: 2008
+   Number of degrees of freedom: 19383 (17186+2197) [0.039988 s]
+   Assembling...  [0.028281 s]
+   Computing preconditioner... [0.118314 s]
+   Solving...
+      Schur complement: 11 outer CG iterations for p  [0.252133s ]
+      Block Schur preconditioner: 13 GMRES iterations [0.269125 s]
+   l_infinity difference between solution vectors: 6.38657e-05
+
+Refinement cycle 5
+   Number of active cells: 4288
+   Number of degrees of freedom: 40855 (36250+4605) [0.0880702 s]
+   Assembling...  [0.0603511 s]
+   Computing preconditioner... [0.278339 s]
+   Solving...
+      Schur complement: 11 outer CG iterations for p  [0.53846s ]
+      Block Schur preconditioner: 13 GMRES iterations [0.578667 s]
+   l_infinity difference between solution vectors: 0.000173363
+ at endcode
+
+We see that there is no huge difference in the solution time between the
+block Schur complement preconditioner solver and the Schur complement
+itself. The reason is simple: we used a direct solve as preconditioner for
+$A$ - so we cannot expect any gain by avoiding the inner iterations. We see
+that the number of iterations has slightly increased for GMRES, but all in
+all the two choices are fairly similar.
+
+The picture of course changes in 3D:
+
+ at code
+Refinement cycle 0
+   Number of active cells: 32
+   Number of degrees of freedom: 1356 (1275+81) [0.00845218 s]
+   Assembling...  [0.019372 s]
+   Computing preconditioner... [0.00712395 s]
+   Solving...
+      Schur complement: 13 outer CG iterations for p  [0.0320101s ]
+      Block Schur preconditioner: 22 GMRES iterations [0.0048759 s]
+   l_infinity difference between solution vectors: 2.15942e-05
+
+Refinement cycle 1
+   Number of active cells: 144
+   Number of degrees of freedom: 5088 (4827+261) [0.0346942 s]
+   Assembling...  [0.0857739 s]
+   Computing preconditioner... [0.0465031 s]
+   Solving...
+      Schur complement: 14 outer CG iterations for p  [0.349258s ]
+      Block Schur preconditioner: 35 GMRES iterations [0.048759 s]
+   l_infinity difference between solution vectors: 1.77657e-05
+
+Refinement cycle 2
+   Number of active cells: 704
+   Number of degrees of freedom: 22406 (21351+1055) [0.175669 s]
+   Assembling...  [0.437447 s]
+   Computing preconditioner... [0.286435 s]
+   Solving...
+      Schur complement: 14 outer CG iterations for p  [3.65519s ]
+      Block Schur preconditioner: 63 GMRES iterations [0.497787 s]
+   l_infinity difference between solution vectors: 5.08078e-05
+
+Refinement cycle 3
+   Number of active cells: 3168
+   Number of degrees of freedom: 93176 (89043+4133) [0.790985 s]
+   Assembling...  [1.97598 s]
+   Computing preconditioner... [1.4325 s]
+   Solving...
+      Schur complement: 15 outer CG iterations for p  [29.9666s ]
+      Block Schur preconditioner: 128 GMRES iterations [5.02645 s]
+   l_infinity difference between solution vectors: 0.000119671
+
+Refinement cycle 4
+   Number of active cells: 11456
+   Number of degrees of freedom: 327808 (313659+14149) [3.44995 s]
+   Assembling...  [7.54772 s]
+   Computing preconditioner... [5.46306 s]
+   Solving...
+      Schur complement: 15 outer CG iterations for p  [139.987s ]
+      Block Schur preconditioner: 255 GMRES iterations [38.0946 s]
+   l_infinity difference between solution vectors: 0.00020793
+
+Refinement cycle 5
+   Number of active cells: 45056
+   Number of degrees of freedom: 1254464 (1201371+53093) [19.6795 s]
+   Assembling...  [28.6586 s]
+   Computing preconditioner... [22.401 s]
+   Solving...
+      Schur complement: 14 outer CG iterations for p  [796.767s ]
+      Block Schur preconditioner: 524 GMRES iterations [355.597 s]
+   l_infinity difference between solution vectors: 0.000501219
+ at endcode
+
+Here, the block preconditioned solver is clearly superior to the Schur
+complement, but the advantage gets less for more mesh points. This is
+because GMRES(k) scales worse with the problem size than CG, as we discussed
+above.  Nonetheless, the improvement by a factor of 3-6 for moderate problem
+sizes is quite impressive.
+
+<h5>Combining block preconditioner and multigrid</h5>
+An ultimate linear solver for this problem could be imagined as a
+combination of an optimal
+preconditioner for $A$ (e.g. multigrid) and the block preconditioner
+described above, which is the approach taken in the step-31
+tutorial program.
+
+<h5>No block matrices and vectors</h5>
+Another possibility that can be taken into account is to not set up a block
+system, but rather solve the system of velocity and pressure all at once. The
+options are direct solve with UMFPACK (2D) or GMRES with ILU
+preconditioning (3D). It should be straightforward to try that.
+
+
+
+<h4>More interesting testcases</h4>
+
+The program can of course also serve as a basis to compute the flow in more
+interesting cases. The original motivation to write this program was for it to
+be a starting point for some geophysical flow problems, such as the
+movement of magma under places where continental plates drift apart (for
+example mid-ocean ridges). Of course, in such places, the geometry is more
+complicated than the examples shown above, but it is not hard to accommodate
+for that.
+
+For example, by using the following modification of the boundary values
+function
+ at code
+template <int dim>
+double
+BoundaryValues<dim>::value (const Point<dim>  &p,
+                            const unsigned int component) const
+{
+  Assert (component < this->n_components,
+          ExcIndexRange (component, 0, this->n_components));
+
+  const double x_offset = std::atan(p[1]*4)/3;
+
+  if (component == 0)
+    return (p[0] < x_offset ? -1 : (p[0] > x_offset ? 1 : 0));
+  return 0;
+}
+ at endcode
+and the following way to generate the mesh as the domain
+$[-2,2]\times[-2,2]\times[-1,0]$
+ at code
+    std::vector<unsigned int> subdivisions (dim, 1);
+    subdivisions[0] = 4;
+    if (dim>2)
+      subdivisions[1] = 4;
+
+    const Point<dim> bottom_left = (dim == 2 ?
+                                    Point<dim>(-2,-1) :
+                                    Point<dim>(-2,-2,-1));
+    const Point<dim> top_right   = (dim == 2 ?
+                                    Point<dim>(2,0) :
+                                    Point<dim>(2,2,0));
+
+    GridGenerator::subdivided_hyper_rectangle (triangulation,
+                                               subdivisions,
+                                               bottom_left,
+                                               top_right);
+ at endcode
+then we get images where the the fault line is curved:
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d-extension.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-22.3d-grid-extension.png" alt="">
+    </td>
+  </tr>
+</table>
+
diff --git a/examples/step-22/doc/tooltip b/examples/step-22/doc/tooltip
new file mode 100644
index 0000000..89e5500
--- /dev/null
+++ b/examples/step-22/doc/tooltip
@@ -0,0 +1 @@
+The Stokes equation on adaptive meshes.
diff --git a/examples/step-22/step-22.cc b/examples/step-22/step-22.cc
new file mode 100644
index 0000000..1986daa
--- /dev/null
+++ b/examples/step-22/step-22.cc
@@ -0,0 +1,1032 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2008 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, Texas A&M University, 2008
+ */
+
+
+// @sect3{Include files}
+
+// As usual, we start by including some well-known files:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/grid_refinement.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// Then we need to include the header file for the sparse direct solver
+// UMFPACK:
+#include <deal.II/lac/sparse_direct.h>
+
+// This includes the library for the incomplete LU factorization that will be
+// used as a preconditioner in 3D:
+#include <deal.II/lac/sparse_ilu.h>
+
+// This is C++:
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+// As in all programs, the namespace dealii is included:
+namespace Step22
+{
+  using namespace dealii;
+
+  // @sect3{Defining the inner preconditioner type}
+
+  // As explained in the introduction, we are going to use different
+  // preconditioners for two and three space dimensions, respectively. We
+  // distinguish between them by the use of the spatial dimension as a
+  // template parameter. See step-4 for details on templates. We are not going
+  // to create any preconditioner object here, all we do is to create class
+  // that holds a local typedef determining the preconditioner class so we can
+  // write our program in a dimension-independent way.
+  template <int dim>
+  struct InnerPreconditioner;
+
+  // In 2D, we are going to use a sparse direct solver as preconditioner:
+  template <>
+  struct InnerPreconditioner<2>
+  {
+    typedef SparseDirectUMFPACK type;
+  };
+
+  // And the ILU preconditioning in 3D, called by SparseILU:
+  template <>
+  struct InnerPreconditioner<3>
+  {
+    typedef SparseILU<double> type;
+  };
+
+
+  // @sect3{The <code>StokesProblem</code> class template}
+
+  // This is an adaptation of step-20, so the main class and the data types
+  // are the same as used there. In this example we also use adaptive grid
+  // refinement, which is handled in analogy to step-6. According to the
+  // discussion in the introduction, we are also going to use the
+  // ConstraintMatrix for implementing Dirichlet boundary conditions. Hence,
+  // we change the name <code>hanging_node_constraints</code> into
+  // <code>constraints</code>.
+  template <int dim>
+  class StokesProblem
+  {
+  public:
+    StokesProblem (const unsigned int degree);
+    void run ();
+
+  private:
+    void setup_dofs ();
+    void assemble_system ();
+    void solve ();
+    void output_results (const unsigned int refinement_cycle) const;
+    void refine_mesh ();
+
+    const unsigned int   degree;
+
+    Triangulation<dim>   triangulation;
+    FESystem<dim>        fe;
+    DoFHandler<dim>      dof_handler;
+
+    ConstraintMatrix     constraints;
+
+    BlockSparsityPattern      sparsity_pattern;
+    BlockSparseMatrix<double> system_matrix;
+
+    BlockVector<double> solution;
+    BlockVector<double> system_rhs;
+
+    // This one is new: We shall use a so-called shared pointer structure to
+    // access the preconditioner. Shared pointers are essentially just a
+    // convenient form of pointers. Several shared pointers can point to the
+    // same object (just like regular pointers), but when the last shared
+    // pointer object to point to a preconditioner object is deleted (for
+    // example if a shared pointer object goes out of scope, if the class of
+    // which it is a member is destroyed, or if the pointer is assigned a
+    // different preconditioner object) then the preconditioner object pointed
+    // to is also destroyed. This ensures that we don't have to manually track
+    // in how many places a preconditioner object is still referenced, it can
+    // never create a memory leak, and can never produce a dangling pointer to
+    // an already destroyed object:
+    std_cxx11::shared_ptr<typename InnerPreconditioner<dim>::type> A_preconditioner;
+  };
+
+  // @sect3{Boundary values and right hand side}
+
+  // As in step-20 and most other example programs, the next task is to define
+  // the data for the PDE: For the Stokes problem, we are going to use natural
+  // boundary values on parts of the boundary (i.e. homogeneous Neumann-type)
+  // for which we won't have to do anything special (the homogeneity implies
+  // that the corresponding terms in the weak form are simply zero), and
+  // boundary conditions on the velocity (Dirichlet-type) on the rest of the
+  // boundary, as described in the introduction.
+  //
+  // In order to enforce the Dirichlet boundary values on the velocity, we
+  // will use the VectorTools::interpolate_boundary_values function as usual
+  // which requires us to write a function object with as many components as
+  // the finite element has. In other words, we have to define the function on
+  // the $(u,p)$-space, but we are going to filter out the pressure component
+  // when interpolating the boundary values.
+
+  // The following function object is a representation of the boundary values
+  // described in the introduction:
+  template <int dim>
+  class BoundaryValues : public Function<dim>
+  {
+  public:
+    BoundaryValues () : Function<dim>(dim+1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+  };
+
+
+  template <int dim>
+  double
+  BoundaryValues<dim>::value (const Point<dim>  &p,
+                              const unsigned int component) const
+  {
+    Assert (component < this->n_components,
+            ExcIndexRange (component, 0, this->n_components));
+
+    if (component == 0)
+      return (p[0] < 0 ? -1 : (p[0] > 0 ? 1 : 0));
+    return 0;
+  }
+
+
+  template <int dim>
+  void
+  BoundaryValues<dim>::vector_value (const Point<dim> &p,
+                                     Vector<double>   &values) const
+  {
+    for (unsigned int c=0; c<this->n_components; ++c)
+      values(c) = BoundaryValues<dim>::value (p, c);
+  }
+
+
+
+  // We implement similar functions for the right hand side which for the
+  // current example is simply zero:
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>(dim+1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+
+  };
+
+
+  template <int dim>
+  double
+  RightHandSide<dim>::value (const Point<dim>  &/*p*/,
+                             const unsigned int /*component*/) const
+  {
+    return 0;
+  }
+
+
+  template <int dim>
+  void
+  RightHandSide<dim>::vector_value (const Point<dim> &p,
+                                    Vector<double>   &values) const
+  {
+    for (unsigned int c=0; c<this->n_components; ++c)
+      values(c) = RightHandSide<dim>::value (p, c);
+  }
+
+
+  // @sect3{Linear solvers and preconditioners}
+
+  // The linear solvers and preconditioners are discussed extensively in the
+  // introduction. Here, we create the respective objects that will be used.
+
+  // @sect4{The <code>InverseMatrix</code> class template}
+
+  // The <code>InverseMatrix</code> class represents the data structure for an
+  // inverse matrix. It is derived from the one in step-20. The only
+  // difference is that we now do include a preconditioner to the matrix since
+  // we will apply this class to different kinds of matrices that will require
+  // different preconditioners (in step-20 we did not use a preconditioner in
+  // this class at all). The types of matrix and preconditioner are passed to
+  // this class via template parameters, and matrix and preconditioner objects
+  // of these types will then be passed to the constructor when an
+  // <code>InverseMatrix</code> object is created. The member function
+  // <code>vmult</code> is, as in step-20, a multiplication with a vector,
+  // obtained by solving a linear system:
+  template <class Matrix, class Preconditioner>
+  class InverseMatrix : public Subscriptor
+  {
+  public:
+    InverseMatrix (const Matrix         &m,
+                   const Preconditioner &preconditioner);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const Matrix> matrix;
+    const SmartPointer<const Preconditioner> preconditioner;
+  };
+
+
+  template <class Matrix, class Preconditioner>
+  InverseMatrix<Matrix,Preconditioner>::InverseMatrix (const Matrix &m,
+                                                       const Preconditioner &preconditioner)
+    :
+    matrix (&m),
+    preconditioner (&preconditioner)
+  {}
+
+
+  // This is the implementation of the <code>vmult</code> function.
+
+  // In this class we use a rather large tolerance for the solver control. The
+  // reason for this is that the function is used very frequently, and hence,
+  // any additional effort to make the residual in the CG solve smaller makes
+  // the solution more expensive. Note that we do not only use this class as a
+  // preconditioner for the Schur complement, but also when forming the
+  // inverse of the Laplace matrix – which is hence directly responsible
+  // for the accuracy of the solution itself, so we can't choose a too large
+  // tolerance, either.
+  template <class Matrix, class Preconditioner>
+  void InverseMatrix<Matrix,Preconditioner>::vmult (Vector<double>       &dst,
+                                                    const Vector<double> &src) const
+  {
+    SolverControl solver_control (src.size(), 1e-6*src.l2_norm());
+    SolverCG<>    cg (solver_control);
+
+    dst = 0;
+
+    cg.solve (*matrix, dst, src, *preconditioner);
+  }
+
+
+  // @sect4{The <code>SchurComplement</code> class template}
+
+  // This class implements the Schur complement discussed in the introduction.
+  // It is in analogy to step-20.  Though, we now call it with a template
+  // parameter <code>Preconditioner</code> in order to access that when
+  // specifying the respective type of the inverse matrix class. As a
+  // consequence of the definition above, the declaration
+  // <code>InverseMatrix</code> now contains the second template parameter for
+  // a preconditioner class as above, which affects the
+  // <code>SmartPointer</code> object <code>m_inverse</code> as well.
+  template <class Preconditioner>
+  class SchurComplement : public Subscriptor
+  {
+  public:
+    SchurComplement (const BlockSparseMatrix<double> &system_matrix,
+                     const InverseMatrix<SparseMatrix<double>, Preconditioner> &A_inverse);
+
+    void vmult (Vector<double>       &dst,
+                const Vector<double> &src) const;
+
+  private:
+    const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
+    const SmartPointer<const InverseMatrix<SparseMatrix<double>, Preconditioner> > A_inverse;
+
+    mutable Vector<double> tmp1, tmp2;
+  };
+
+
+
+  template <class Preconditioner>
+  SchurComplement<Preconditioner>::
+  SchurComplement (const BlockSparseMatrix<double> &system_matrix,
+                   const InverseMatrix<SparseMatrix<double>,Preconditioner> &A_inverse)
+    :
+    system_matrix (&system_matrix),
+    A_inverse (&A_inverse),
+    tmp1 (system_matrix.block(0,0).m()),
+    tmp2 (system_matrix.block(0,0).m())
+  {}
+
+
+  template <class Preconditioner>
+  void SchurComplement<Preconditioner>::vmult (Vector<double>       &dst,
+                                               const Vector<double> &src) const
+  {
+    system_matrix->block(0,1).vmult (tmp1, src);
+    A_inverse->vmult (tmp2, tmp1);
+    system_matrix->block(1,0).vmult (dst, tmp2);
+  }
+
+
+  // @sect3{StokesProblem class implementation}
+
+  // @sect4{StokesProblem::StokesProblem}
+
+  // The constructor of this class looks very similar to the one of
+  // step-20. The constructor initializes the variables for the polynomial
+  // degree, triangulation, finite element system and the dof handler. The
+  // underlying polynomial functions are of order <code>degree+1</code> for
+  // the vector-valued velocity components and of order <code>degree</code>
+  // for the pressure.  This gives the LBB-stable element pair
+  // $Q_{degree+1}^d\times Q_{degree}$, often referred to as the Taylor-Hood
+  // element.
+  //
+  // Note that we initialize the triangulation with a MeshSmoothing argument,
+  // which ensures that the refinement of cells is done in a way that the
+  // approximation of the PDE solution remains well-behaved (problems arise if
+  // grids are too unstructured), see the documentation of
+  // <code>Triangulation::MeshSmoothing</code> for details.
+  template <int dim>
+  StokesProblem<dim>::StokesProblem (const unsigned int degree)
+    :
+    degree (degree),
+    triangulation (Triangulation<dim>::maximum_smoothing),
+    fe (FE_Q<dim>(degree+1), dim,
+        FE_Q<dim>(degree), 1),
+    dof_handler (triangulation)
+  {}
+
+
+  // @sect4{StokesProblem::setup_dofs}
+
+  // Given a mesh, this function associates the degrees of freedom with it and
+  // creates the corresponding matrices and vectors. At the beginning it also
+  // releases the pointer to the preconditioner object (if the shared pointer
+  // pointed at anything at all at this point) since it will definitely not be
+  // needed any more after this point and will have to be re-computed after
+  // assembling the matrix, and unties the sparse matrix from its sparsity
+  // pattern object.
+  //
+  // We then proceed with distributing degrees of freedom and renumbering
+  // them: In order to make the ILU preconditioner (in 3D) work efficiently,
+  // it is important to enumerate the degrees of freedom in such a way that it
+  // reduces the bandwidth of the matrix, or maybe more importantly: in such a
+  // way that the ILU is as close as possible to a real LU decomposition. On
+  // the other hand, we need to preserve the block structure of velocity and
+  // pressure already seen in in step-20 and step-21. This is done in two
+  // steps: First, all dofs are renumbered to improve the ILU and then we
+  // renumber once again by components. Since
+  // <code>DoFRenumbering::component_wise</code> does not touch the
+  // renumbering within the individual blocks, the basic renumbering from the
+  // first step remains. As for how the renumber degrees of freedom to improve
+  // the ILU: deal.II has a number of algorithms that attempt to find
+  // orderings to improve ILUs, or reduce the bandwidth of matrices, or
+  // optimize some other aspect. The DoFRenumbering namespace shows a
+  // comparison of the results we obtain with several of these algorithms
+  // based on the testcase discussed here in this tutorial program. Here, we
+  // will use the traditional Cuthill-McKee algorithm already used in some of
+  // the previous tutorial programs.  In the <a href="#improved-ilu">section
+  // on improved ILU</a> we're going to discuss this issue in more detail.
+
+  // There is one more change compared to previous tutorial programs: There is
+  // no reason in sorting the <code>dim</code> velocity components
+  // individually. In fact, rather than first enumerating all $x$-velocities,
+  // then all $y$-velocities, etc, we would like to keep all velocities at the
+  // same location together and only separate between velocities (all
+  // components) and pressures. By default, this is not what the
+  // DoFRenumbering::component_wise function does: it treats each vector
+  // component separately; what we have to do is group several components into
+  // "blocks" and pass this block structure to that function. Consequently, we
+  // allocate a vector <code>block_component</code> with as many elements as
+  // there are components and describe all velocity components to correspond
+  // to block 0, while the pressure component will form block 1:
+  template <int dim>
+  void StokesProblem<dim>::setup_dofs ()
+  {
+    A_preconditioner.reset ();
+    system_matrix.clear ();
+
+    dof_handler.distribute_dofs (fe);
+    DoFRenumbering::Cuthill_McKee (dof_handler);
+
+    std::vector<unsigned int> block_component (dim+1,0);
+    block_component[dim] = 1;
+    DoFRenumbering::component_wise (dof_handler, block_component);
+
+    // Now comes the implementation of Dirichlet boundary conditions, which
+    // should be evident after the discussion in the introduction. All that
+    // changed is that the function already appears in the setup functions,
+    // whereas we were used to see it in some assembly routine. Further down
+    // below where we set up the mesh, we will associate the top boundary
+    // where we impose Dirichlet boundary conditions with boundary indicator
+    // 1.  We will have to pass this boundary indicator as second argument to
+    // the function below interpolating boundary values.  There is one more
+    // thing, though.  The function describing the Dirichlet conditions was
+    // defined for all components, both velocity and pressure. However, the
+    // Dirichlet conditions are to be set for the velocity only.  To this end,
+    // we use a ComponentMask that only selects the velocity components. The
+    // component mask is obtained from the finite element by specifying the
+    // particular components we want. Since we use adaptively refined grids
+    // the constraint matrix needs to be first filled with hanging node
+    // constraints generated from the DoF handler. Note the order of the two
+    // functions — we first compute the hanging node constraints, and
+    // then insert the boundary values into the constraint matrix. This makes
+    // sure that we respect H<sup>1</sup> conformity on boundaries with
+    // hanging nodes (in three space dimensions), where the hanging node needs
+    // to dominate the Dirichlet boundary values.
+    {
+      constraints.clear ();
+
+      FEValuesExtractors::Vector velocities(0);
+      DoFTools::make_hanging_node_constraints (dof_handler,
+                                               constraints);
+      VectorTools::interpolate_boundary_values (dof_handler,
+                                                1,
+                                                BoundaryValues<dim>(),
+                                                constraints,
+                                                fe.component_mask(velocities));
+    }
+
+    constraints.close ();
+
+    // In analogy to step-20, we count the dofs in the individual components.
+    // We could do this in the same way as there, but we want to operate on
+    // the block structure we used already for the renumbering: The function
+    // <code>DoFTools::count_dofs_per_block</code> does the same as
+    // <code>DoFTools::count_dofs_per_component</code>, but now grouped as
+    // velocity and pressure block via <code>block_component</code>.
+    std::vector<types::global_dof_index> dofs_per_block (2);
+    DoFTools::count_dofs_per_block (dof_handler, dofs_per_block, block_component);
+    const unsigned int n_u = dofs_per_block[0],
+                       n_p = dofs_per_block[1];
+
+    std::cout << "   Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl
+              << "   Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << " (" << n_u << '+' << n_p << ')'
+              << std::endl;
+
+    // The next task is to allocate a sparsity pattern for the system matrix
+    // we will create. We could do this in the same way as in step-20,
+    // i.e. directly build an object of type SparsityPattern through
+    // DoFTools::make_sparsity_pattern. However, there is a major reason not
+    // to do so: In 3D, the function DoFTools::max_couplings_between_dofs
+    // yields a conservative but rather large number for the coupling between
+    // the individual dofs, so that the memory initially provided for the
+    // creation of the sparsity pattern of the matrix is far too much -- so
+    // much actually that the initial sparsity pattern won't even fit into the
+    // physical memory of most systems already for moderately-sized 3D
+    // problems, see also the discussion in step-18.  Instead, we first build
+    // a temporary object that uses a different data structure that doesn't
+    // require allocating more memory than necessary but isn't suitable for
+    // use as a basis of SparseMatrix or BlockSparseMatrix objects; in a
+    // second step we then copy this object into an object of
+    // BlockSparsityPattern. This is entirely analogous to what we already did
+    // in step-11 and step-18.
+    //
+    // All this is done inside a new scope, which
+    // means that the memory of <code>dsp</code> will be released once the
+    // information has been copied to <code>sparsity_pattern</code>.
+    {
+      BlockDynamicSparsityPattern dsp (2,2);
+
+      dsp.block(0,0).reinit (n_u, n_u);
+      dsp.block(1,0).reinit (n_p, n_u);
+      dsp.block(0,1).reinit (n_u, n_p);
+      dsp.block(1,1).reinit (n_p, n_p);
+
+      dsp.collect_sizes();
+
+      DoFTools::make_sparsity_pattern (dof_handler, dsp, constraints, false);
+      sparsity_pattern.copy_from (dsp);
+    }
+
+    // Finally, the system matrix, solution and right hand side are created
+    // from the block structure as in step-20:
+    system_matrix.reinit (sparsity_pattern);
+
+    solution.reinit (2);
+    solution.block(0).reinit (n_u);
+    solution.block(1).reinit (n_p);
+    solution.collect_sizes ();
+
+    system_rhs.reinit (2);
+    system_rhs.block(0).reinit (n_u);
+    system_rhs.block(1).reinit (n_p);
+    system_rhs.collect_sizes ();
+  }
+
+
+  // @sect4{StokesProblem::assemble_system}
+
+  // The assembly process follows the discussion in step-20 and in the
+  // introduction. We use the well-known abbreviations for the data structures
+  // that hold the local matrix, right hand side, and global numbering of the
+  // degrees of freedom for the present cell.
+  template <int dim>
+  void StokesProblem<dim>::assemble_system ()
+  {
+    system_matrix=0;
+    system_rhs=0;
+
+    QGauss<dim>   quadrature_formula(degree+2);
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values    |
+                             update_quadrature_points  |
+                             update_JxW_values |
+                             update_gradients);
+
+    const unsigned int   dofs_per_cell   = fe.dofs_per_cell;
+
+    const unsigned int   n_q_points      = quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    const RightHandSide<dim>          right_hand_side;
+    std::vector<Vector<double> >      rhs_values (n_q_points,
+                                                  Vector<double>(dim+1));
+
+    // Next, we need two objects that work as extractors for the FEValues
+    // object. Their use is explained in detail in the report on @ref
+    // vector_valued :
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    // As an extension over step-20 and step-21, we include a few
+    // optimizations that make assembly much faster for this particular
+    // problem.  The improvements are based on the observation that we do a
+    // few calculations too many times when we do as in step-20: The symmetric
+    // gradient actually has <code>dofs_per_cell</code> different values per
+    // quadrature point, but we extract it
+    // <code>dofs_per_cell*dofs_per_cell</code> times from the FEValues object
+    // - for both the loop over <code>i</code> and the inner loop over
+    // <code>j</code>. In 3d, that means evaluating it $89^2=7921$ instead of
+    // $89$ times, a not insignificant difference.
+    //
+    // So what we're going to do here is to avoid such repeated calculations
+    // by getting a vector of rank-2 tensors (and similarly for the divergence
+    // and the basis function value on pressure) at the quadrature point prior
+    // to starting the loop over the dofs on the cell. First, we create the
+    // respective objects that will hold these values. Then, we start the loop
+    // over all cells and the loop over the quadrature points, where we first
+    // extract these values. There is one more optimization we implement here:
+    // the local matrix (as well as the global one) is going to be symmetric,
+    // since all the operations involved are symmetric with respect to $i$ and
+    // $j$. This is implemented by simply running the inner loop not to
+    // <code>dofs_per_cell</code>, but only up to <code>i</code>, the index of
+    // the outer loop.
+    std::vector<SymmetricTensor<2,dim> > symgrad_phi_u (dofs_per_cell);
+    std::vector<double>                  div_phi_u   (dofs_per_cell);
+    std::vector<double>                  phi_p       (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        local_matrix = 0;
+        local_rhs = 0;
+
+        right_hand_side.vector_value_list(fe_values.get_quadrature_points(),
+                                          rhs_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            for (unsigned int k=0; k<dofs_per_cell; ++k)
+              {
+                symgrad_phi_u[k] = fe_values[velocities].symmetric_gradient (k, q);
+                div_phi_u[k]     = fe_values[velocities].divergence (k, q);
+                phi_p[k]         = fe_values[pressure].value (k, q);
+              }
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              {
+                for (unsigned int j=0; j<=i; ++j)
+                  {
+                    local_matrix(i,j) += (2 * (symgrad_phi_u[i] * symgrad_phi_u[j])
+                                          - div_phi_u[i] * phi_p[j]
+                                          - phi_p[i] * div_phi_u[j]
+                                          + phi_p[i] * phi_p[j])
+                                         * fe_values.JxW(q);
+
+                  }
+
+                // For the right-hand side we use the fact that the shape
+                // functions are only non-zero in one component (because our
+                // elements are primitive).  Instead of multiplying the tensor
+                // representing the dim+1 values of shape function i with the
+                // whole right-hand side vector, we only look at the only
+                // non-zero component. The Function
+                // FiniteElement::system_to_component_index(i) will return
+                // which component this shape function lives in (0=x velocity,
+                // 1=y velocity, 2=pressure in 2d), which we use to pick out
+                // the correct component of the right-hand side vector to
+                // multiply with.
+
+                const unsigned int component_i =
+                  fe.system_to_component_index(i).first;
+                local_rhs(i) += fe_values.shape_value(i,q) *
+                                rhs_values[q](component_i) *
+                                fe_values.JxW(q);
+              }
+          }
+
+        // Note that in the above computation of the local matrix contribution
+        // we added the term <code> phi_p[i] * phi_p[j] </code>, yielding a
+        // pressure mass matrix in the $(1,1)$ block of the matrix as
+        // discussed in the introduction. That this term only ends up in the
+        // $(1,1)$ block stems from the fact that both of the factors in
+        // <code>phi_p[i] * phi_p[j]</code> are only non-zero when all the
+        // other terms vanish (and the other way around).
+        //
+        // Note also that operator* is overloaded for symmetric tensors,
+        // yielding the scalar product between the two tensors in the first
+        // line of the local matrix contribution.
+
+        // Before we can write the local data into the global matrix (and
+        // simultaneously use the ConstraintMatrix object to apply Dirichlet
+        // boundary conditions and eliminate hanging node constraints, as we
+        // discussed in the introduction), we have to be careful about one
+        // thing, though. We have only built half of the local matrix
+        // because of symmetry, but we're going to save the full system matrix
+        // in order to use the standard functions for solution. This is done
+        // by flipping the indices in case we are pointing into the empty part
+        // of the local matrix.
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=i+1; j<dofs_per_cell; ++j)
+            local_matrix(i,j) = local_matrix(j,i);
+
+        cell->get_dof_indices (local_dof_indices);
+        constraints.distribute_local_to_global (local_matrix, local_rhs,
+                                                local_dof_indices,
+                                                system_matrix, system_rhs);
+      }
+
+    // Before we're going to solve this linear system, we generate a
+    // preconditioner for the velocity-velocity matrix, i.e.,
+    // <code>block(0,0)</code> in the system matrix. As mentioned above, this
+    // depends on the spatial dimension. Since the two classes described by
+    // the <code>InnerPreconditioner::type</code> typedef have the same
+    // interface, we do not have to do anything different whether we want to
+    // use a sparse direct solver or an ILU:
+    std::cout << "   Computing preconditioner..." << std::endl << std::flush;
+
+    A_preconditioner
+      = std_cxx11::shared_ptr<typename InnerPreconditioner<dim>::type>(new typename InnerPreconditioner<dim>::type());
+    A_preconditioner->initialize (system_matrix.block(0,0),
+                                  typename InnerPreconditioner<dim>::type::AdditionalData());
+
+  }
+
+
+
+  // @sect4{StokesProblem::solve}
+
+  // After the discussion in the introduction and the definition of the
+  // respective classes above, the implementation of the <code>solve</code>
+  // function is rather straight-forward and done in a similar way as in
+  // step-20. To start with, we need an object of the
+  // <code>InverseMatrix</code> class that represents the inverse of the
+  // matrix A. As described in the introduction, the inverse is generated with
+  // the help of an inner preconditioner of type
+  // <code>InnerPreconditioner::type</code>.
+  template <int dim>
+  void StokesProblem<dim>::solve ()
+  {
+    const InverseMatrix<SparseMatrix<double>,
+          typename InnerPreconditioner<dim>::type>
+          A_inverse (system_matrix.block(0,0), *A_preconditioner);
+    Vector<double> tmp (solution.block(0).size());
+
+    // This is as in step-20. We generate the right hand side $B A^{-1} F - G$
+    // for the Schur complement and an object that represents the respective
+    // linear operation $B A^{-1} B^T$, now with a template parameter
+    // indicating the preconditioner - in accordance with the definition of
+    // the class.
+    {
+      Vector<double> schur_rhs (solution.block(1).size());
+      A_inverse.vmult (tmp, system_rhs.block(0));
+      system_matrix.block(1,0).vmult (schur_rhs, tmp);
+      schur_rhs -= system_rhs.block(1);
+
+      SchurComplement<typename InnerPreconditioner<dim>::type>
+      schur_complement (system_matrix, A_inverse);
+
+      // The usual control structures for the solver call are created...
+      SolverControl solver_control (solution.block(1).size(),
+                                    1e-6*schur_rhs.l2_norm());
+      SolverCG<>    cg (solver_control);
+
+      // Now to the preconditioner to the Schur complement. As explained in
+      // the introduction, the preconditioning is done by a mass matrix in the
+      // pressure variable.  It is stored in the $(1,1)$ block of the system
+      // matrix (that is not used anywhere else but in preconditioning).
+      //
+      // Actually, the solver needs to have the preconditioner in the form
+      // $P^{-1}$, so we need to create an inverse operation. Once again, we
+      // use an object of the class <code>InverseMatrix</code>, which
+      // implements the <code>vmult</code> operation that is needed by the
+      // solver.  In this case, we have to invert the pressure mass matrix. As
+      // it already turned out in earlier tutorial programs, the inversion of
+      // a mass matrix is a rather cheap and straight-forward operation
+      // (compared to, e.g., a Laplace matrix). The CG method with ILU
+      // preconditioning converges in 5-10 steps, independently on the mesh
+      // size.  This is precisely what we do here: We choose another ILU
+      // preconditioner and take it along to the InverseMatrix object via the
+      // corresponding template parameter.  A CG solver is then called within
+      // the vmult operation of the inverse matrix.
+      //
+      // An alternative that is cheaper to build, but needs more iterations
+      // afterwards, would be to choose a SSOR preconditioner with factor
+      // 1.2. It needs about twice the number of iterations, but the costs for
+      // its generation are almost negligible.
+      SparseILU<double> preconditioner;
+      preconditioner.initialize (system_matrix.block(1,1),
+                                 SparseILU<double>::AdditionalData());
+
+      InverseMatrix<SparseMatrix<double>,SparseILU<double> >
+      m_inverse (system_matrix.block(1,1), preconditioner);
+
+      // With the Schur complement and an efficient preconditioner at hand, we
+      // can solve the respective equation for the pressure (i.e. block 0 in
+      // the solution vector) in the usual way:
+      cg.solve (schur_complement, solution.block(1), schur_rhs,
+                m_inverse);
+
+      // After this first solution step, the hanging node constraints have to
+      // be distributed to the solution in order to achieve a consistent
+      // pressure field.
+      constraints.distribute (solution);
+
+      std::cout << "  "
+                << solver_control.last_step()
+                << " outer CG Schur complement iterations for pressure"
+                << std::endl;
+    }
+
+    // As in step-20, we finally need to solve for the velocity equation where
+    // we plug in the solution to the pressure equation. This involves only
+    // objects we already know - so we simply multiply $p$ by $B^T$, subtract
+    // the right hand side and multiply by the inverse of $A$. At the end, we
+    // need to distribute the constraints from hanging nodes in order to
+    // obtain a consistent flow field:
+    {
+      system_matrix.block(0,1).vmult (tmp, solution.block(1));
+      tmp *= -1;
+      tmp += system_rhs.block(0);
+
+      A_inverse.vmult (solution.block(0), tmp);
+
+      constraints.distribute (solution);
+    }
+  }
+
+
+  // @sect4{StokesProblem::output_results}
+
+  // The next function generates graphical output. In this example, we are
+  // going to use the VTK file format.  We attach names to the individual
+  // variables in the problem: <code>velocity</code> to the <code>dim</code>
+  // components of velocity and <code>pressure</code> to the pressure.
+  //
+  // Not all visualization programs have the ability to group individual
+  // vector components into a vector to provide vector plots; in particular,
+  // this holds for some VTK-based visualization programs. In this case, the
+  // logical grouping of components into vectors should already be described
+  // in the file containing the data. In other words, what we need to do is
+  // provide our output writers with a way to know which of the components of
+  // the finite element logically form a vector (with $d$ components in $d$
+  // space dimensions) rather than letting them assume that we simply have a
+  // bunch of scalar fields.  This is achieved using the members of the
+  // <code>DataComponentInterpretation</code> namespace: as with the filename,
+  // we create a vector in which the first <code>dim</code> components refer
+  // to the velocities and are given the tag
+  // <code>DataComponentInterpretation::component_is_part_of_vector</code>; we
+  // finally push one tag
+  // <code>DataComponentInterpretation::component_is_scalar</code> to describe
+  // the grouping of the pressure variable.
+
+  // The rest of the function is then the same as in step-20.
+  template <int dim>
+  void
+  StokesProblem<dim>::output_results (const unsigned int refinement_cycle)  const
+  {
+    std::vector<std::string> solution_names (dim, "velocity");
+    solution_names.push_back ("pressure");
+
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    data_component_interpretation
+    (dim, DataComponentInterpretation::component_is_part_of_vector);
+    data_component_interpretation
+    .push_back (DataComponentInterpretation::component_is_scalar);
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, solution_names,
+                              DataOut<dim>::type_dof_data,
+                              data_component_interpretation);
+    data_out.build_patches ();
+
+    std::ostringstream filename;
+    filename << "solution-"
+             << Utilities::int_to_string (refinement_cycle, 2)
+             << ".vtk";
+
+    std::ofstream output (filename.str().c_str());
+    data_out.write_vtk (output);
+  }
+
+
+  // @sect4{StokesProblem::refine_mesh}
+
+  // This is the last interesting function of the <code>StokesProblem</code>
+  // class.  As indicated by its name, it takes the solution to the problem
+  // and refines the mesh where this is needed. The procedure is the same as
+  // in the respective step in step-6, with the exception that we base the
+  // refinement only on the change in pressure, i.e., we call the Kelly error
+  // estimator with a mask object of type ComponentMask that selects the
+  // single scalar component for the pressure that we are interested in (we
+  // get such a mask from the finite element class by specifying the component
+  // we want). Additionally, we do not coarsen the grid again:
+  template <int dim>
+  void
+  StokesProblem<dim>::refine_mesh ()
+  {
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    FEValuesExtractors::Scalar pressure(dim);
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(degree+1),
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        estimated_error_per_cell,
+                                        fe.component_mask(pressure));
+
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     estimated_error_per_cell,
+                                                     0.3, 0.0);
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+  // @sect4{StokesProblem::run}
+
+  // The last step in the Stokes class is, as usual, the function that
+  // generates the initial grid and calls the other functions in the
+  // respective order.
+  //
+  // We start off with a rectangle of size $4 \times 1$ (in 2d) or $4 \times 1
+  // \times 1$ (in 3d), placed in $R^2/R^3$ as $(-2,2)\times(-1,0)$ or
+  // $(-2,2)\times(0,1)\times(-1,0)$, respectively. It is natural to start
+  // with equal mesh size in each direction, so we subdivide the initial
+  // rectangle four times in the first coordinate direction. To limit the
+  // scope of the variables involved in the creation of the mesh to the range
+  // where we actually need them, we put the entire block between a pair of
+  // braces:
+  template <int dim>
+  void StokesProblem<dim>::run ()
+  {
+    {
+      std::vector<unsigned int> subdivisions (dim, 1);
+      subdivisions[0] = 4;
+
+      const Point<dim> bottom_left = (dim == 2 ?
+                                      Point<dim>(-2,-1) :
+                                      Point<dim>(-2,0,-1));
+      const Point<dim> top_right   = (dim == 2 ?
+                                      Point<dim>(2,0) :
+                                      Point<dim>(2,1,0));
+
+      GridGenerator::subdivided_hyper_rectangle (triangulation,
+                                                 subdivisions,
+                                                 bottom_left,
+                                                 top_right);
+    }
+
+    // A boundary indicator of 1 is set to all boundaries that are subject to
+    // Dirichlet boundary conditions, i.e.  to faces that are located at 0 in
+    // the last coordinate direction. See the example description above for
+    // details.
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->face(f)->center()[dim-1] == 0)
+          cell->face(f)->set_all_boundary_ids(1);
+
+
+    // We then apply an initial refinement before solving for the first
+    // time. In 3D, there are going to be more degrees of freedom, so we
+    // refine less there:
+    triangulation.refine_global (4-dim);
+
+    // As first seen in step-6, we cycle over the different refinement levels
+    // and refine (except for the first cycle), setup the degrees of freedom
+    // and matrices, assemble, solve and create output:
+    for (unsigned int refinement_cycle = 0; refinement_cycle<6;
+         ++refinement_cycle)
+      {
+        std::cout << "Refinement cycle " << refinement_cycle << std::endl;
+
+        if (refinement_cycle > 0)
+          refine_mesh ();
+
+        setup_dofs ();
+
+        std::cout << "   Assembling..." << std::endl << std::flush;
+        assemble_system ();
+
+        std::cout << "   Solving..." << std::flush;
+        solve ();
+
+        output_results (refinement_cycle);
+
+        std::cout << std::endl;
+      }
+  }
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// The main function is the same as in step-20. We pass the element degree as
+// a parameter and choose the space dimension at the well-known template slot.
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step22;
+
+      StokesProblem<2> flow_problem(1);
+      flow_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-23/CMakeLists.txt b/examples/step-23/CMakeLists.txt
new file mode 100644
index 0000000..a2a8ee3
--- /dev/null
+++ b/examples/step-23/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-23 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-23")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-23/doc/builds-on b/examples/step-23/doc/builds-on
new file mode 100644
index 0000000..48a0f73
--- /dev/null
+++ b/examples/step-23/doc/builds-on
@@ -0,0 +1 @@
+step-4
diff --git a/examples/step-23/doc/intro.dox b/examples/step-23/doc/intro.dox
new file mode 100644
index 0000000..c177d5e
--- /dev/null
+++ b/examples/step-23/doc/intro.dox
@@ -0,0 +1,418 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{28}
+
+This is the first of a number of tutorial programs that will finally
+cover "real" time-dependent problems, not the slightly odd form of time
+dependence found in step-18 or the DAE model of step-21. In particular, this program introduces
+the wave equation in a bounded domain. Later, step-24
+will consider an example of absorbing boundary conditions, and @ref
+step_25 "step-25" a kind of nonlinear wave equation producing
+solutions called solitons.
+
+The wave equation in its prototypical form reads as follows: find
+$u(x,t), x\in\Omega, t\in[0,T]$ that satisfies
+ at f{eqnarray*}
+	\frac{\partial^2 u}{\partial t^2}
+	-
+	\Delta u &=& f
+	\qquad
+	\textrm{in}\ \Omega\times [0,T],
+\\
+	u(x,t) &=& g
+	\qquad
+	\textrm{on}\ \partial\Omega\times [0,T],
+\\
+	u(x,0) &=& u_0(x)
+	\qquad
+	\textrm{in}\ \Omega,
+\\
+	\frac{\partial u(x,0)}{\partial t} &=& u_1(x)
+	\qquad
+	\textrm{in}\ \Omega.
+ at f}
+Note that since this is an equation with second-order time
+derivatives, we need to pose two initial conditions, one for the value
+and one for the time derivative of the solution.
+
+Physically, the equation describes the motion of an elastic medium. In
+2-d, one can think of how a membrane moves if subjected to a
+force. The Dirichlet boundary conditions above indicate that the
+membrane is clamped at the boundary at a height $g(x,t)$ (this height
+might be moving as well — think of people holding a blanket and
+shaking it up and down). The first initial condition equals the
+initial deflection of the membrane, whereas the second one gives its
+velocity. For example, one could think of pushing the membrane down
+with a finger and then letting it go at $t=0$ (nonzero deflection but
+zero initial velocity), or hitting it with a hammer at $t=0$ (zero
+deflection but nonzero velocity). Both cases would induce motion in
+the membrane.
+
+
+<h3>Time discretization</h3>
+
+<h4>Method of lines or Rothe's method?</h4>
+There is a long-standing debate in the numerical analysis community
+over whether a discretization of time dependent equations should
+involve first discretizing the time variable leading to a stationary
+PDE at each time step that is then solved using standard finite
+element techniques (this is called the Rothe method), or whether
+one should first discretize the spatial variables, leading to a large
+system of ordinary differential equations that can then be handled by
+one of the usual ODE solvers (this is called the method of lines).
+
+Both of these methods have advantages and disadvantages.
+Traditionally, people have preferred the method of lines, since it
+allows to use the very well developed machinery of high-order ODE
+solvers available for the rather stiff ODEs resulting from this
+approach, including step length control and estimation of the temporal
+error.
+
+On the other hand, Rothe's method becomes awkward when using
+higher-order time stepping method, since one then has to write down a
+PDE that couples the solution of the present time step not only with
+that at the previous time step, but possibly also even earlier
+solutions, leading to a significant number of terms.
+
+For these reasons, the method of lines was the method of choice for a
+long time. However, it has one big drawback: if we discretize the
+spatial variable first, leading to a large ODE system, we have to
+choose a mesh once and for all. If we are willing to do this, then
+this is a legitimate and probably superior approach.
+
+If, on the other hand, we are looking at the wave equation and many
+other time dependent problems, we find that the character of a
+solution changes as time progresses. For example, for the wave
+equation, we may have a single wave travelling through the domain,
+where the solution is smooth or even constant in front of and behind
+the wave — adaptivity would be really useful for such cases, but the
+key is that the area where we need to refine the mesh changes from
+time step to time step!
+
+If we intend to go that way, i.e. choose a different mesh for each
+time step (or set of time steps), then the method of lines is not
+appropriate any more: instead of getting one ODE system with a number
+of variables equal to the number of unknowns in the finite element
+mesh, our number of unknowns now changes all the time, a fact that
+standard ODE solvers are certainly not prepared to deal with at
+all. On the other hand, for the Rothe method, we just get a PDE for
+each time step that we may choose to discretize independently of the
+mesh used for the previous time step; this approach is not without
+perils and difficulties, but at least is a sensible and well-defined
+procedure.
+
+For all these reasons, for the present program, we choose to use the
+Rothe method for discretization, i.e. we first discretize in time and
+then in space. We will not actually use adaptive meshes at all, since
+this involves a large amount of additional code, but we will comment
+on this some more in the <a href="#Results">results section below</a>.
+
+
+<h4>Rothe's method!</h4>
+
+Given these considerations, here is how we will proceed: let us first
+define a simple time stepping method for this second order problem,
+and then in a second step do the spatial discretization, i.e. we will
+follow Rothe's approach.
+
+For the first step, let us take a little detour first: in order to
+discretize a second time derivative, we can either discretize it
+directly, or we can introduce an additional variable and transform the
+system into a first order system. In many cases, this turns out to be
+equivalent, but dealing with first order systems is often simpler. To
+this end, let us introduce
+ at f[
+	v = \frac{\partial u}{\partial t},
+ at f]
+and call this variable the <i>velocity</i> for obvious reasons. We can
+then reformulate the original wave equation as follows:
+ at f{eqnarray*}
+	\frac{\partial u}{\partial t}
+	-
+	v
+	&=& 0
+	\qquad
+	\textrm{in}\ \Omega\times [0,T],
+\\
+	\frac{\partial v}{\partial t}
+	-
+	\Delta u &=& f
+	\qquad
+	\textrm{in}\ \Omega\times [0,T],
+\\
+	u(x,t) &=& g
+	\qquad
+	\textrm{on}\ \partial\Omega\times [0,T],
+\\
+	u(x,0) &=& u_0(x)
+	\qquad
+	\textrm{in}\ \Omega,
+\\
+	v(x,0) &=& u_1(x)
+	\qquad
+	\textrm{in}\ \Omega.
+ at f}
+The advantage of this formulation is that it now only contains first
+time derivatives for both variables, for which it is simple to write
+down time stepping schemes. Note that we do not have boundary
+conditions for $v$ at first. However, we could enforce $v=\frac{\partial
+g}{\partial t}$ on the boundary. It turns out in numerical examples that this
+is actually necessary: without doing so the solution doesn't look particularly
+wrong, but the Crank-Nicolson scheme does not conserve energy if one doesn't
+enforce these boundary conditions.
+
+With this formulation, let us introduce the following time
+discretization where a superscript $n$ indicates the number of a time
+step and $k=t_n-t_{n-1}$ is the length of the present time step:
+\f{eqnarray*}
+  \frac{u^n - u^{n-1}}{k}
+  - \left[\theta v^n + (1-\theta) v^{n-1}\right] &=& 0,
+  \\
+  \frac{v^n - v^{n-1}}{k}
+  - \Delta\left[\theta u^n + (1-\theta) u^{n-1}\right]
+  &=& \theta f^n + (1-\theta) f^{n-1}.
+\f}
+Note how we introduced a parameter $\theta$ here. If we chose
+$\theta=0$, for example, the first equation would reduce to
+$\frac{u^n - u^{n-1}}{k}  - v^{n-1} = 0$, which is well-known as the
+forward or explicit Euler method. On the other hand, if we set
+$\theta=1$, then we would get
+$\frac{u^n - u^{n-1}}{k}  - v^n = 0$, which corresponds to the
+backward or implicit Euler method. Both these methods are first order
+accurate methods. They are simple to implement, but they are not
+really very accurate.
+
+The third case would be to choose $\theta=\frac 12$. The first of the
+equations above would then read $\frac{u^n - u^{n-1}}{k}
+- \frac 12 \left[v^n + v^{n-1}\right] = 0$. This method is known as
+the Crank-Nicolson method and has the advantage that it is second
+order accurate. In addition, it has the nice property that it
+preserves the energy in the solution (physically, the energy is the
+sum of the kinetic energy of the particles in the membrane plus the
+potential energy present due to the fact that it is locally stretched;
+this quantity is a conserved one in the continuous equation, but most
+time stepping schemes do not conserve it after time
+discretization). Since $v^n$ also appears in the equation for $u^n$,
+the Crank-Nicolson scheme is also implicit.
+
+In the program, we will leave $\theta$ as a parameter, so that it will
+be easy to play with it. The results section will show some numerical
+evidence comparing the different schemes.
+
+The equations above (called the <i>semidiscretized</i> equations
+because we have only discretized the time, but not space), can be
+simplified a bit by eliminating $v^n$ from the first equation and
+rearranging terms. We then get
+\f{eqnarray*}
+  \left[ 1-k^2\theta^2\Delta \right] u^n &=&
+  	 \left[ 1+k^2\theta(1-\theta)\Delta\right] u^{n-1} + k v^{n-1}
+   	 + k^2\theta\left[\theta f^n + (1-\theta) f^{n-1}\right],\\
+   v^n &=& v^{n-1} + k\Delta\left[ \theta u^n + (1-\theta) u^{n-1}\right]
+   + k\left[\theta f^n + (1-\theta) f^{n-1}\right].
+\f}
+In this form, we see that if we are given the solution
+$u^{n-1},v^{n-1}$ of the previous timestep, that we can then solve for
+the variables $u^n,v^n$ separately, i.e. one at a time. This is
+convenient. In addition, we recognize that the operator in the first
+equation is positive definite, and the second equation looks
+particularly simple.
+
+
+<h3>Space discretization</h3>
+
+We have now derived equations that relate the approximate
+(semi-discrete) solution $u^n(x)$ and its time derivative $v^n(x)$ at
+time $t_n$ with the solutions $u^{n-1}(x),v^{n-1}(x)$ of the previous
+time step at $t_{n-1}$. The next step is to also discretize the
+spatial variable using the usual finite element methodology. To this
+end, we multiply each equation with a test function, integrate over
+the entire domain, and integrate by parts where necessary. This leads
+to
+\f{eqnarray*}
+  (u^n,\varphi) + k^2\theta^2(\nabla u^n,\nabla \varphi) &=&
+  (u^{n-1},\varphi) - k^2\theta(1-\theta)(\nabla u^{n-1},\nabla \varphi)
+  +
+  k(v^{n-1},\varphi)
+  + k^2\theta
+  \left[
+  \theta (f^n,\varphi) + (1-\theta) (f^{n-1},\varphi)
+  \right],
+  \\
+  (v^n,\varphi)
+   &=&
+   (v^{n-1},\varphi)
+    -
+    k\left[ \theta (\nabla u^n,\nabla\varphi) +
+    (1-\theta) (\nabla u^{n-1},\nabla \varphi)\right]
+  + k
+  \left[
+  \theta (f^n,\varphi) + (1-\theta) (f^{n-1},\varphi)
+  \right].
+\f}
+
+It is then customary to approximate $u^n(x) \approx u^n_h(x) = \sum_i
+U_i^n\phi_i^n(x)$, where $\phi_i^n(x)$ are the shape functions used
+for the discretization of the $n$-th time step and $U_i^n$ are the
+unknown nodal values of the solution. Similarly, $v^n(x) \approx
+v^n_h(x) = \sum_i V_i^n\phi_i^n(x)$. Finally, we have the solutions of
+the previous time step, $u^{n-1}(x) \approx u^{n-1}_h(x) = \sum_i
+U_i^{n-1}\phi_i^{n-1}(x)$ and $v^{n-1}(x) \approx v^{n-1}_h(x) = \sum_i
+V_i^{n-1}\phi_i^{n-1}(x)$. Note that since the solution of the previous
+time step has already been computed by the time we get to time step
+$n$, $U^{n-1},V^{n-1}$ are known. Furthermore, note that the solutions
+of the previous step may have been computed on a different mesh, so
+we have to use shape functions $\phi^{n-1}_i(x)$.
+
+If we plug these expansions into above equations and test with the
+test functions from the present mesh, we get the following linear
+system:
+\f{eqnarray*}
+  (M^n + k^2\theta^2 A^n)U^n &=&
+  M^{n,n-1}U^{n-1} - k^2\theta(1-\theta) A^{n,n-1}U^{n-1}
+  +
+  kM^{n,n-1}V^{n-1}
+  + k^2\theta
+  \left[
+  \theta F^n + (1-\theta) F^{n-1}
+  \right],
+  \\
+  M^nV^n
+   &=&
+   M^{n,n-1}V^{n-1}
+    -
+    k\left[ \theta A^n U^n +
+    (1-\theta) A^{n,n-1} U^{n-1}\right]
+   + k
+  \left[
+  \theta F^n + (1-\theta) F^{n-1}
+  \right],
+\f}
+where
+ at f{eqnarray*}
+	M^n_{ij} &=& (\phi_i^n, \phi_j^n),
+	\\
+	A^n_{ij} &=& (\nabla\phi_i^n, \nabla\phi_j^n),
+	\\
+	M^{n,n-1}_{ij} &=& (\phi_i^n, \phi_j^{n-1}),
+	\\
+	A^{n,n-1}_{ij} &=& (\nabla\phi_i^n, \nabla\phi_j^{n-1}),
+	\\
+	F^n_{i} &=& (f^n,\phi_i^n),
+	\\
+	F^{n-1}_{i} &=& (f^{n-1},\phi_i^n).
+ at f}
+
+If we solve these two equations, we can move the solution one step
+forward and go on to the next time step.
+
+It is worth noting that if we choose the same mesh on each time step
+(as we will in fact do in the program below), then we have the same
+shape functions on time step $n$ and $n-1$,
+i.e. $\phi^n_i=\phi_i^{n-1}=\phi_i$. Consequently, we get
+$M^n=M^{n,n-1}=M$ and $A^n=A^{n,n-1}=A$. On the other hand, if we had
+used different shape functions, then we would have to compute
+integrals that contain shape functions defined on two meshes. This is a
+somewhat messy process that we omit here, but that is treated in some
+detail in step-28.
+
+Under these conditions (i.e. a mesh that doesn't change), one can optimize the
+solution procedure a bit by basically eliminating the solution of the second
+linear system. We will discuss this in the introduction of the @ref step_25
+"step-25" program.
+
+<h3>Energy conservation</h3>
+
+One way to compare the quality of a time stepping scheme is to see whether the
+numerical approximation preserves conservation properties of the continuous
+equation. For the wave equation, the natural quantity to look at is the
+energy. By multiplying the wave equation by $u_t$, integrating over $\Omega$,
+and integrating by parts where necessary, we find that
+ at f[
+	\frac{d}{d t}
+	\left[\frac 12 \int_\Omega \left(\frac{\partial u}{\partial
+	t}\right)^2 + (\nabla u)^2 \; dx\right]
+	=
+	\int_\Omega f \frac{\partial u}{\partial t} \; dx
+	+
+	\int_{\partial\Omega} n\cdot\nabla u
+	\frac{\partial g}{\partial t} \; dx.
+ at f]
+By consequence, in absence of body forces and constant boundary values, we get
+that
+ at f[
+	E(t) = \frac 12 \int_\Omega \left(\frac{\partial u}{\partial
+	t}\right)^2 + (\nabla u)^2 \; dx
+ at f]
+is a conserved quantity, i.e. one that doesn't change with time. We
+will compute this quantity after each time
+step. It is straightforward to see that if we replace $u$ by its finite
+element approximation, and $\frac{\partial u}{\partial t}$ by the finite
+element approximation of the velocity $v$, then
+ at f[
+	E(t_n) = \frac 12 \left<V^n, M^n V^n\right>
+	+
+	\frac 12 \left<U^n, A^n U^n\right>.
+ at f]
+As we will see in the results section, the Crank-Nicolson scheme does indeed
+conserve the energy, whereas neither the forward nor the backward Euler scheme
+do.
+
+
+<h3>Who are Courant, Friedrichs, and Lewy?</h3>
+
+One of the reasons why the wave equation is nasty to solve numerically is that
+explicit time discretizations are only stable if the time step is small
+enough. In particular, it is coupled to the spatial mesh width $h$. For the
+lowest order discretization we use here, the relationship reads
+ at f[
+	k\le \frac hc
+ at f]
+where $c$ is the wave speed, which in our formulation of the wave equation has
+been normalized to one. Consequently, unless we use the implicit schemes with
+$\theta>0$, our solutions will not be numerically stable if we violate this
+restriction. Implicit schemes do not have this restriction for stability, but
+they become inaccurate if the time step is too large.
+
+This condition was first recognized by Courant, Friedrichs, and Lewy —
+in 1928, long before computers become available for numerical
+computations! (This result appeared in the German language article
+R. Courant, K. Friedrichs and H. Lewy: <i>Über die partiellen
+Differenzengleichungen der mathematischen Physik</i>, Mathematische
+Annalen, vol. 100, no. 1, pages 32-74, 1928.)
+This condition on the time step is most frequently just referred
+to as the <i>CFL</i> condition. Intuitively, the CFL condition says
+that the time step must not be larger than the time it takes a wave to
+cross a single cell.
+
+In the program, we will refine the square
+$[-1,1]^2$ seven times uniformly, giving a mesh size of $h=\frac 1{64}$, which
+is what we set the time step to. The fact that we set the time step and mesh
+size individually in two different places is error prone: it is too easy to
+refine the mesh once more but forget to also adjust the time step. @ref
+step_24 "step-24" shows a better way how to keep these things in sync.
+
+
+<h3>The test case</h3>
+
+Although the program has all the hooks to deal with nonzero initial and
+boundary conditions and body forces, we take a simple case where the domain is
+a square $[-1,1]^2$ and
+ at f{eqnarray*}
+	f &=& 0,
+	\\
+	u_0 &=& 0,
+	\\
+	u_1 &=& 0,
+	\\
+	g &=& \left\{\begin{matrix}\sin (4\pi t)
+	&\qquad& \text{for }\ t\le \frac 12, x=-1, -\frac 13<y<\frac 13
+	\\
+	 0
+	&&\text{otherwise}
+	\end{matrix}
+	\right.
+ at f}
+This corresponds to a membrane initially at rest and clamped all around, where
+someone is waving a part of the clamped boundary once up and down, thereby
+shooting a wave into the domain.
diff --git a/examples/step-23/doc/kind b/examples/step-23/doc/kind
new file mode 100644
index 0000000..86a44aa
--- /dev/null
+++ b/examples/step-23/doc/kind
@@ -0,0 +1 @@
+time dependent
diff --git a/examples/step-23/doc/results.dox b/examples/step-23/doc/results.dox
new file mode 100644
index 0000000..f630d8b
--- /dev/null
+++ b/examples/step-23/doc/results.dox
@@ -0,0 +1,194 @@
+<h1>Results</h1>
+
+When the program is run, it produces the following output:
+ at code
+Number of active cells: 16384
+Number of degrees of freedom: 16641
+
+Time step 1 at t=0.015625
+   u-equation: 8 CG iterations.
+   v-equation: 22 CG iterations.
+   Total energy: 1.17887
+Time step 2 at t=0.03125
+   u-equation: 8 CG iterations.
+   v-equation: 20 CG iterations.
+   Total energy: 2.9655
+Time step 3 at t=0.046875
+   u-equation: 8 CG iterations.
+   v-equation: 21 CG iterations.
+   Total energy: 4.33761
+Time step 4 at t=0.0625
+   u-equation: 7 CG iterations.
+   v-equation: 21 CG iterations.
+   Total energy: 5.35499
+Time step 5 at t=0.078125
+   u-equation: 7 CG iterations.
+   v-equation: 21 CG iterations.
+   Total energy: 6.18652
+Time step 6 at t=0.09375
+   u-equation: 7 CG iterations.
+   v-equation: 20 CG iterations.
+   Total energy: 6.6799
+
+...
+
+Time step 31 at t=0.484375
+   u-equation: 7 CG iterations.
+   v-equation: 20 CG iterations.
+   Total energy: 21.9068
+Time step 32 at t=0.5
+   u-equation: 7 CG iterations.
+   v-equation: 20 CG iterations.
+   Total energy: 23.3394
+Time step 33 at t=0.515625
+   u-equation: 7 CG iterations.
+   v-equation: 20 CG iterations.
+   Total energy: 23.1019
+
+...
+
+Time step 319 at t=4.98438
+   u-equation: 7 CG iterations.
+   v-equation: 20 CG iterations.
+   Total energy: 23.1019
+Time step 320 at t=5
+   u-equation: 7 CG iterations.
+   v-equation: 20 CG iterations.
+   Total energy: 23.1019
+ at endcode
+
+What we see immediately is that the energy is a constant at least after
+$t=\frac 12$ (until which the boundary source term $g$ is nonzero, injecting
+energy into the system).
+
+In addition to the screen output, the program writes the solution of each time
+step to an output file. If we process them adequately and paste them into a
+movie, we get the following:
+
+<img src="http://www.dealii.org/images/steps/developer/step-23.movie.gif" alt="Animation of the solution of step 23.">
+
+The movie shows the generated wave nice traveling through the domain and back,
+being reflected at the clamped boundary. Some numerical noise is trailing the
+wave, an artifact of a too-large mesh size that can be reduced by reducing the
+mesh width and the time step.
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+If you want to explore a bit, try out some of the following things:
+<ul>
+  <li>Varying $\theta$. This gives different time stepping schemes, some of
+  which are stable while others are not. Take a look at how the energy
+  evolves.
+
+  <li>Different initial and boundary conditions, right hand sides.
+
+  <li>More complicated domains or more refined meshes. Remember that the time
+  step needs to be bounded by the mesh width, so changing the mesh should
+  always involve also changing the time step. We will come back to this issue
+  in step-24.
+
+  <li>Variable coefficients: In real media, the wave speed is often
+  variable. In particular, the "real" wave equation in realistic media would
+  read
+  @f[
+     \rho(x) \frac{\partial^2 u}{\partial t^2}
+     -
+     \nabla \cdot
+     a(x) \nabla u = f,
+  @f]
+  where $\rho(x)$ is the density of the material, and $a(x)$ is related to the
+  stiffness coefficient. The wave speed is then $c=\sqrt{a/\rho}$.
+
+  To make such a change, we would have to compute the mass and Laplace
+  matrices with a variable coefficient. Fortunately, this isn't too hard: the
+  functions MatrixCreator::create_laplace_matrix and
+  MatrixCreator::create_mass_matrix have additional default parameters that can
+  be used to pass non-constant coefficient functions to them. The required
+  changes are therefore relatively small. On the other hand, care must be
+  taken again to make sure the time step is within the allowed range.
+
+  <li>In the in-code comments, we discussed the fact that the matrices for
+  solving for $U^n$ and $V^n$ need to be reset in every time because of
+  boundary conditions, even though the actual content does not change. It is
+  possible to avoid copying by not eliminating columns in the linear systems,
+  which is implemented by appending a @p false argument to the call:
+  @code
+    MatrixTools::apply_boundary_values (boundary_values,
+    					matrix_u,
+					solution_u,
+					system_rhs,
+					false);
+  @endcode
+
+  <li>deal.II being a library that supports adaptive meshes it would of course be
+  nice if this program supported change the mesh every few time steps. Given the
+  structure of the solution — a wave that travels through the domain —
+  it would seem appropriate if we only refined the mesh where the wave currently is,
+  and not simply everywhere. It is intuitively clear that we should be able to
+  save a significant amount of cells this way. (Though upon further thought one
+  realizes that this is really only the case in the initial stages of the simulation.
+  After some time, for wave phenomena, the domain is filled with reflections of
+  the initial wave going in every direction and filling every corner of the domain.
+  At this point, there is in general little one can gain using local mesh
+  refinement.)
+
+  To make adaptively changing meshes possible, there are basically two routes.
+  The "correct" way would be to go back to the weak form we get using Rothe's
+  method. For example, the first of the two equations to be solved in each time
+  step looked like this:
+  \f{eqnarray*}
+  (u^n,\varphi) + k^2\theta^2(\nabla u^n,\nabla \varphi) &=&
+  (u^{n-1},\varphi) - k^2\theta(1-\theta)(\nabla u^{n-1},\nabla \varphi)
+  +
+  k(v^{n-1},\varphi)
+  + k^2\theta
+  \left[
+  \theta (f^n,\varphi) + (1-\theta) (f^{n-1},\varphi)
+  \right].
+  \f}
+  Now, note that we solve for $u^n$ on mesh ${\mathbb T}^n$, and
+  consequently the test functions $\varphi$ have to be from the space
+  $V_h^n$ as well. As discussed in the introduction, terms like
+  $(u^{n-1},\varphi)$ then require us to integrate the solution of the
+  previous step (which may have been computed on a different mesh
+  ${\mathbb T}^{n-1}$) against the test functions of the current mesh,
+  leading to a matrix $M^{n,n-1}$. This process of integrating shape
+  functions from different meshes is, at best, awkward. It can be done
+  but because it is difficult to ensure that ${\mathbb T}^{n-1}$ and
+  ${\mathbb T}^{n}$ differ by at most one level of refinement, one
+  has to recursively match cells from both meshes. It is feasible to
+  do this, but it leads to lengthy and not entirely obvious code.
+
+  The second approach is the following: whenever we change the mesh,
+  we simply interpolate the solution from the last time step on the old
+  mesh to the new mesh, using the SolutionTransfer class. In other words,
+  instead of the equation above, we would solve
+  \f{eqnarray*}
+  (u^n,\varphi) + k^2\theta^2(\nabla u^n,\nabla \varphi) &=&
+  (I^n u^{n-1},\varphi) - k^2\theta(1-\theta)(\nabla I^n u^{n-1},\nabla \varphi)
+  +
+  k(I^n v^{n-1},\varphi)
+  + k^2\theta
+  \left[
+  \theta (f^n,\varphi) + (1-\theta) (f^{n-1},\varphi)
+  \right],
+  \f}
+  where $I^n$ interpolates a given function onto mesh ${\mathbb T}^n$.
+  This is a much simpler approach because, in each time step, we no
+  longer have to worry whether $u^{n-1},v^{n-1}$ were computed on the
+  same mesh as we are using now or on a different mesh. Consequently,
+  the only changes to the code necessary are the addition of a function
+  that computes the error, marks cells for refinement, sets up a
+  SolutionTransfer object, transfers the solution to the new mesh, and
+  rebuilds matrices and right hand side vectors on the new mesh. Neither
+  the functions building the matrices and right hand sides, nor the
+  solvers need to be changed.
+
+  While this second approach is, strictly speaking,
+  not quite correct in the Rothe framework (it introduces an addition source
+  of error, namely the interpolation), it is nevertheless what
+  almost everyone solving time dependent equations does. We will use this
+  method in step-31, for example.
+</ul>
diff --git a/examples/step-23/doc/tooltip b/examples/step-23/doc/tooltip
new file mode 100644
index 0000000..1611968
--- /dev/null
+++ b/examples/step-23/doc/tooltip
@@ -0,0 +1 @@
+Time dependent problems. The wave equation.
diff --git a/examples/step-23/step-23.cc b/examples/step-23/step-23.cc
new file mode 100644
index 0000000..ee15a45
--- /dev/null
+++ b/examples/step-23/step-23.cc
@@ -0,0 +1,672 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2006 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, Texas A&M University, 2006
+ */
+
+
+// @sect3{Include files}
+
+// We start with the usual assortment of include files that we've seen in so
+// many of the previous tests:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+#include <iostream>
+
+// Here are the only three include files of some new interest: The first one
+// is already used, for example, for the
+// VectorTools::interpolate_boundary_values and
+// MatrixTools::apply_boundary_values functions. However, we here use another
+// function in that class, VectorTools::project to compute our initial values
+// as the $L^2$ projection of the continuous initial values. Furthermore, we
+// use VectorTools::create_right_hand_side to generate the integrals
+// $(f^n,\phi^n_i)$. These were previously always generated by hand in
+// <code>assemble_system</code> or similar functions in application
+// code. However, we're too lazy to do that here, so simply use a library
+// function:
+#include <deal.II/numerics/vector_tools.h>
+
+// In a very similar vein, we are also too lazy to write the code to assemble
+// mass and Laplace matrices, although it would have only taken copying the
+// relevant code from any number of previous tutorial programs. Rather, we
+// want to focus on the things that are truly new to this program and
+// therefore use the MatrixCreator::create_mass_matrix and
+// MatrixCreator::create_laplace_matrix functions. They are declared here:
+#include <deal.II/numerics/matrix_tools.h>
+
+// Finally, here is an include file that contains all sorts of tool functions
+// that one sometimes needs. In particular, we need the
+// Utilities::int_to_string class that, given an integer argument, returns a
+// string representation of it. It is particularly useful since it allows for
+// a second parameter indicating the number of digits to which we want the
+// result padded with leading zeros. We will use this to write output files
+// that have the form <code>solution-XXX.gnuplot</code> where <code>XXX</code>
+// denotes the number of the time step and always consists of three digits
+// even if we are still in the single or double digit time steps.
+#include <deal.II/base/utilities.h>
+
+// The last step is as in all previous programs:
+namespace Step23
+{
+  using namespace dealii;
+
+
+  // @sect3{The <code>WaveEquation</code> class}
+
+  // Next comes the declaration of the main class. It's public interface of
+  // functions is like in most of the other tutorial programs. Worth
+  // mentioning is that we now have to store four matrices instead of one: the
+  // mass matrix $M$, the Laplace matrix $A$, the matrix $M+k^2\theta^2A$ used
+  // for solving for $U^n$, and a copy of the mass matrix with boundary
+  // conditions applied used for solving for $V^n$. Note that it is a bit
+  // wasteful to have an additional copy of the mass matrix around. We will
+  // discuss strategies for how to avoid this in the section on possible
+  // improvements.
+  //
+  // Likewise, we need solution vectors for $U^n,V^n$ as well as for the
+  // corresponding vectors at the previous time step, $U^{n-1},V^{n-1}$. The
+  // <code>system_rhs</code> will be used for whatever right hand side vector
+  // we have when solving one of the two linear systems in each time
+  // step. These will be solved in the two functions <code>solve_u</code> and
+  // <code>solve_v</code>.
+  //
+  // Finally, the variable <code>theta</code> is used to indicate the
+  // parameter $\theta$ that is used to define which time stepping scheme to
+  // use, as explained in the introduction. The rest is self-explanatory.
+  template <int dim>
+  class WaveEquation
+  {
+  public:
+    WaveEquation ();
+    void run ();
+
+  private:
+    void setup_system ();
+    void solve_u ();
+    void solve_v ();
+    void output_results () const;
+
+    Triangulation<dim>   triangulation;
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+
+    ConstraintMatrix constraints;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> mass_matrix;
+    SparseMatrix<double> laplace_matrix;
+    SparseMatrix<double> matrix_u;
+    SparseMatrix<double> matrix_v;
+
+    Vector<double>       solution_u, solution_v;
+    Vector<double>       old_solution_u, old_solution_v;
+    Vector<double>       system_rhs;
+
+    double time, time_step;
+    unsigned int timestep_number;
+    const double theta;
+  };
+
+
+
+  // @sect3{Equation data}
+
+  // Before we go on filling in the details of the main class, let us define
+  // the equation data corresponding to the problem, i.e. initial and boundary
+  // values for both the solution $u$ and its time derivative $v$, as well as
+  // a right hand side class. We do so using classes derived from the Function
+  // class template that has been used many times before, so the following
+  // should not be a surprise.
+  //
+  // Let's start with initial values and choose zero for both the value $u$ as
+  // well as its time derivative, the velocity $v$:
+  template <int dim>
+  class InitialValuesU : public Function<dim>
+  {
+  public:
+    InitialValuesU () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+  template <int dim>
+  class InitialValuesV : public Function<dim>
+  {
+  public:
+    InitialValuesV () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double InitialValuesU<dim>::value (const Point<dim>  &/*p*/,
+                                     const unsigned int component) const
+  {
+    Assert (component == 0, ExcInternalError());
+    return 0;
+  }
+
+
+
+  template <int dim>
+  double InitialValuesV<dim>::value (const Point<dim>  &/*p*/,
+                                     const unsigned int component) const
+  {
+    Assert (component == 0, ExcInternalError());
+    return 0;
+  }
+
+
+
+  // Secondly, we have the right hand side forcing term. Boring as we are, we
+  // choose zero here as well:
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double RightHandSide<dim>::value (const Point<dim>  &/*p*/,
+                                    const unsigned int component) const
+  {
+    Assert (component == 0, ExcInternalError());
+    return 0;
+  }
+
+
+
+  // Finally, we have boundary values for $u$ and $v$. They are as described
+  // in the introduction, one being the time derivative of the other:
+  template <int dim>
+  class BoundaryValuesU : public Function<dim>
+  {
+  public:
+    BoundaryValuesU () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+
+  template <int dim>
+  class BoundaryValuesV : public Function<dim>
+  {
+  public:
+    BoundaryValuesV () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+
+  template <int dim>
+  double BoundaryValuesU<dim>::value (const Point<dim> &p,
+                                      const unsigned int component) const
+  {
+    Assert (component == 0, ExcInternalError());
+
+    if ((this->get_time() <= 0.5) &&
+        (p[0] < 0) &&
+        (p[1] < 1./3) &&
+        (p[1] > -1./3))
+      return std::sin (this->get_time() * 4 * numbers::PI);
+    else
+      return 0;
+  }
+
+
+
+  template <int dim>
+  double BoundaryValuesV<dim>::value (const Point<dim> &p,
+                                      const unsigned int component) const
+  {
+    Assert (component == 0, ExcInternalError());
+
+    if ((this->get_time() <= 0.5) &&
+        (p[0] < 0) &&
+        (p[1] < 1./3) &&
+        (p[1] > -1./3))
+      return (std::cos (this->get_time() * 4 * numbers::PI) *
+              4 * numbers::PI);
+    else
+      return 0;
+  }
+
+
+
+
+  // @sect3{Implementation of the <code>WaveEquation</code> class}
+
+  // The implementation of the actual logic is actually fairly short, since we
+  // relegate things like assembling the matrices and right hand side vectors
+  // to the library. The rest boils down to not much more than 130 lines of
+  // actual code, a significant fraction of which is boilerplate code that can
+  // be taken from previous example programs (e.g. the functions that solve
+  // linear systems, or that generate output).
+  //
+  // Let's start with the constructor (for an explanation of the choice of
+  // time step, see the section on Courant, Friedrichs, and Lewy in the
+  // introduction):
+  template <int dim>
+  WaveEquation<dim>::WaveEquation () :
+    fe (1),
+    dof_handler (triangulation),
+    time_step (1./64),
+    theta (0.5)
+  {}
+
+
+  // @sect4{WaveEquation::setup_system}
+
+  // The next function is the one that sets up the mesh, DoFHandler, and
+  // matrices and vectors at the beginning of the program, i.e. before the
+  // first time step. The first few lines are pretty much standard if you've
+  // read through the tutorial programs at least up to step-6:
+  template <int dim>
+  void WaveEquation<dim>::setup_system ()
+  {
+    GridGenerator::hyper_cube (triangulation, -1, 1);
+    triangulation.refine_global (7);
+
+    std::cout << "Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl;
+
+    dof_handler.distribute_dofs (fe);
+
+    std::cout << "Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl
+              << std::endl;
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    sparsity_pattern.copy_from (dsp);
+
+    // Then comes a block where we have to initialize the 3 matrices we need
+    // in the course of the program: the mass matrix, the Laplace matrix, and
+    // the matrix $M+k^2\theta^2A$ used when solving for $U^n$ in each time
+    // step.
+    //
+    // When setting up these matrices, note that they all make use of the same
+    // sparsity pattern object. Finally, the reason why matrices and sparsity
+    // patterns are separate objects in deal.II (unlike in many other finite
+    // element or linear algebra classes) becomes clear: in a significant
+    // fraction of applications, one has to hold several matrices that happen
+    // to have the same sparsity pattern, and there is no reason for them not
+    // to share this information, rather than re-building and wasting memory
+    // on it several times.
+    //
+    // After initializing all of these matrices, we call library functions
+    // that build the Laplace and mass matrices. All they need is a DoFHandler
+    // object and a quadrature formula object that is to be used for numerical
+    // integration. Note that in many respects these functions are better than
+    // what we would usually do in application programs, for example because
+    // they automatically parallelize building the matrices if multiple
+    // processors are available in a machine. The matrices for solving linear
+    // systems will be filled in the run() method because we need to re-apply
+    // boundary conditions every time step.
+    mass_matrix.reinit (sparsity_pattern);
+    laplace_matrix.reinit (sparsity_pattern);
+    matrix_u.reinit (sparsity_pattern);
+    matrix_v.reinit (sparsity_pattern);
+
+    MatrixCreator::create_mass_matrix (dof_handler, QGauss<dim>(3),
+                                       mass_matrix);
+    MatrixCreator::create_laplace_matrix (dof_handler, QGauss<dim>(3),
+                                          laplace_matrix);
+
+    // The rest of the function is spent on setting vector sizes to the
+    // correct value. The final line closes the hanging node constraints
+    // object. Since we work on a uniformly refined mesh, no constraints exist
+    // or have been computed (i.e. there was no need to call
+    // DoFTools::make_hanging_node_constraints as in other programs), but we
+    // need a constraints object in one place further down below anyway.
+    solution_u.reinit (dof_handler.n_dofs());
+    solution_v.reinit (dof_handler.n_dofs());
+    old_solution_u.reinit (dof_handler.n_dofs());
+    old_solution_v.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    constraints.close ();
+  }
+
+
+  // @sect4{WaveEquation::solve_u and WaveEquation::solve_v}
+
+  // The next two functions deal with solving the linear systems associated
+  // with the equations for $U^n$ and $V^n$. Both are not particularly
+  // interesting as they pretty much follow the scheme used in all the
+  // previous tutorial programs.
+  //
+  // One can make little experiments with preconditioners for the two matrices
+  // we have to invert. As it turns out, however, for the matrices at hand
+  // here, using Jacobi or SSOR preconditioners reduces the number of
+  // iterations necessary to solve the linear system slightly, but due to the
+  // cost of applying the preconditioner it is no win in terms of run-time. It
+  // is not much of a loss either, but let's keep it simple and just do
+  // without:
+  template <int dim>
+  void WaveEquation<dim>::solve_u ()
+  {
+    SolverControl           solver_control (1000, 1e-8*system_rhs.l2_norm());
+    SolverCG<>              cg (solver_control);
+
+    cg.solve (matrix_u, solution_u, system_rhs,
+              PreconditionIdentity());
+
+    std::cout << "   u-equation: " << solver_control.last_step()
+              << " CG iterations."
+              << std::endl;
+  }
+
+
+  template <int dim>
+  void WaveEquation<dim>::solve_v ()
+  {
+    SolverControl           solver_control (1000, 1e-8*system_rhs.l2_norm());
+    SolverCG<>              cg (solver_control);
+
+    cg.solve (matrix_v, solution_v, system_rhs,
+              PreconditionIdentity());
+
+    std::cout << "   v-equation: " << solver_control.last_step()
+              << " CG iterations."
+              << std::endl;
+  }
+
+
+
+  // @sect4{WaveEquation::output_results}
+
+  // Likewise, the following function is pretty much what we've done
+  // before. The only thing worth mentioning is how here we generate a string
+  // representation of the time step number padded with leading zeros to 3
+  // character length using the Utilities::int_to_string function's second
+  // argument.
+  template <int dim>
+  void WaveEquation<dim>::output_results () const
+  {
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution_u, "U");
+    data_out.add_data_vector (solution_v, "V");
+
+    data_out.build_patches ();
+
+    const std::string filename = "solution-" +
+                                 Utilities::int_to_string (timestep_number, 3) +
+                                 ".gnuplot";
+    std::ofstream output (filename.c_str());
+    data_out.write_gnuplot (output);
+  }
+
+
+
+
+  // @sect4{WaveEquation::run}
+
+  // The following is really the only interesting function of the program. It
+  // contains the loop over all time steps, but before we get to that we have
+  // to set up the grid, DoFHandler, and matrices. In addition, we have to
+  // somehow get started with initial values. To this end, we use the
+  // VectorTools::project function that takes an object that describes a
+  // continuous function and computes the $L^2$ projection of this function
+  // onto the finite element space described by the DoFHandler object. Can't
+  // be any simpler than that:
+  template <int dim>
+  void WaveEquation<dim>::run ()
+  {
+    setup_system();
+
+    VectorTools::project (dof_handler, constraints, QGauss<dim>(3),
+                          InitialValuesU<dim>(),
+                          old_solution_u);
+    VectorTools::project (dof_handler, constraints, QGauss<dim>(3),
+                          InitialValuesV<dim>(),
+                          old_solution_v);
+
+    // The next thing is to loop over all the time steps until we reach the
+    // end time ($T=5$ in this case). In each time step, we first have to
+    // solve for $U^n$, using the equation $(M^n + k^2\theta^2 A^n)U^n =$
+    // $(M^{n,n-1} - k^2\theta(1-\theta) A^{n,n-1})U^{n-1} + kM^{n,n-1}V^{n-1}
+    // +$ $k\theta \left[k \theta F^n + k(1-\theta) F^{n-1} \right]$. Note
+    // that we use the same mesh for all time steps, so that $M^n=M^{n,n-1}=M$
+    // and $A^n=A^{n,n-1}=A$. What we therefore have to do first is to add up
+    // $MU^{n-1} - k^2\theta(1-\theta) AU^{n-1} + kMV^{n-1}$ and the forcing
+    // terms, and put the result into the <code>system_rhs</code> vector. (For
+    // these additions, we need a temporary vector that we declare before the
+    // loop to avoid repeated memory allocations in each time step.)
+    //
+    // The one thing to realize here is how we communicate the time variable
+    // to the object describing the right hand side: each object derived from
+    // the Function class has a time field that can be set using the
+    // Function::set_time and read by Function::get_time. In essence, using
+    // this mechanism, all functions of space and time are therefore
+    // considered functions of space evaluated at a particular time. This
+    // matches well what we typically need in finite element programs, where
+    // we almost always work on a single time step at a time, and where it
+    // never happens that, for example, one would like to evaluate a
+    // space-time function for all times at any given spatial location.
+    Vector<double> tmp (solution_u.size());
+    Vector<double> forcing_terms (solution_u.size());
+
+    for (timestep_number=1, time=time_step;
+         time<=5;
+         time+=time_step, ++timestep_number)
+      {
+        std::cout << "Time step " << timestep_number
+                  << " at t=" << time
+                  << std::endl;
+
+        mass_matrix.vmult (system_rhs, old_solution_u);
+
+        mass_matrix.vmult (tmp, old_solution_v);
+        system_rhs.add (time_step, tmp);
+
+        laplace_matrix.vmult (tmp, old_solution_u);
+        system_rhs.add (-theta * (1-theta) * time_step * time_step, tmp);
+
+        RightHandSide<dim> rhs_function;
+        rhs_function.set_time (time);
+        VectorTools::create_right_hand_side (dof_handler, QGauss<dim>(2),
+                                             rhs_function, tmp);
+        forcing_terms = tmp;
+        forcing_terms *= theta * time_step;
+
+        rhs_function.set_time (time-time_step);
+        VectorTools::create_right_hand_side (dof_handler, QGauss<dim>(2),
+                                             rhs_function, tmp);
+
+        forcing_terms.add ((1-theta) * time_step, tmp);
+
+        system_rhs.add (theta * time_step, forcing_terms);
+
+        // After so constructing the right hand side vector of the first
+        // equation, all we have to do is apply the correct boundary
+        // values. As for the right hand side, this is a space-time function
+        // evaluated at a particular time, which we interpolate at boundary
+        // nodes and then use the result to apply boundary values as we
+        // usually do. The result is then handed off to the solve_u()
+        // function:
+        {
+          BoundaryValuesU<dim> boundary_values_u_function;
+          boundary_values_u_function.set_time (time);
+
+          std::map<types::global_dof_index,double> boundary_values;
+          VectorTools::interpolate_boundary_values (dof_handler,
+                                                    0,
+                                                    boundary_values_u_function,
+                                                    boundary_values);
+
+          // The matrix for solve_u() is the same in every time steps, so one
+          // could think that it is enough to do this only once at the
+          // beginning of the simulation. However, since we need to apply
+          // boundary values to the linear system (which eliminate some matrix
+          // rows and columns and give contributions to the right hand side),
+          // we have to refill the matrix in every time steps before we
+          // actually apply boundary data. The actual content is very simple:
+          // it is the sum of the mass matrix and a weighted Laplace matrix:
+          matrix_u.copy_from (mass_matrix);
+          matrix_u.add (theta * theta * time_step * time_step, laplace_matrix);
+          MatrixTools::apply_boundary_values (boundary_values,
+                                              matrix_u,
+                                              solution_u,
+                                              system_rhs);
+        }
+        solve_u ();
+
+
+        // The second step, i.e. solving for $V^n$, works similarly, except
+        // that this time the matrix on the left is the mass matrix (which we
+        // copy again in order to be able to apply boundary conditions, and
+        // the right hand side is $MV^{n-1} - k\left[ \theta A U^n +
+        // (1-\theta) AU^{n-1}\right]$ plus forcing terms. %Boundary values
+        // are applied in the same way as before, except that now we have to
+        // use the BoundaryValuesV class:
+        laplace_matrix.vmult (system_rhs, solution_u);
+        system_rhs *= -theta * time_step;
+
+        mass_matrix.vmult (tmp, old_solution_v);
+        system_rhs += tmp;
+
+        laplace_matrix.vmult (tmp, old_solution_u);
+        system_rhs.add (-time_step * (1-theta), tmp);
+
+        system_rhs += forcing_terms;
+
+        {
+          BoundaryValuesV<dim> boundary_values_v_function;
+          boundary_values_v_function.set_time (time);
+
+          std::map<types::global_dof_index,double> boundary_values;
+          VectorTools::interpolate_boundary_values (dof_handler,
+                                                    0,
+                                                    boundary_values_v_function,
+                                                    boundary_values);
+          matrix_v.copy_from (mass_matrix);
+          MatrixTools::apply_boundary_values (boundary_values,
+                                              matrix_v,
+                                              solution_v,
+                                              system_rhs);
+        }
+        solve_v ();
+
+        // Finally, after both solution components have been computed, we
+        // output the result, compute the energy in the solution, and go on to
+        // the next time step after shifting the present solution into the
+        // vectors that hold the solution at the previous time step. Note the
+        // function SparseMatrix::matrix_norm_square that can compute
+        // $\left<V^n,MV^n\right>$ and $\left<U^n,AU^n\right>$ in one step,
+        // saving us the expense of a temporary vector and several lines of
+        // code:
+        output_results ();
+
+        std::cout << "   Total energy: "
+                  << (mass_matrix.matrix_norm_square (solution_v) +
+                      laplace_matrix.matrix_norm_square (solution_u)) / 2
+                  << std::endl;
+
+        old_solution_u = solution_u;
+        old_solution_v = solution_v;
+      }
+  }
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// What remains is the main function of the program. There is nothing here
+// that hasn't been shown in several of the previous programs:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step23;
+
+      WaveEquation<2> wave_equation_solver;
+      wave_equation_solver.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-24/CMakeLists.txt b/examples/step-24/CMakeLists.txt
new file mode 100644
index 0000000..c1c2f6f
--- /dev/null
+++ b/examples/step-24/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-24 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-24")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-24/doc/builds-on b/examples/step-24/doc/builds-on
new file mode 100644
index 0000000..f7adc18
--- /dev/null
+++ b/examples/step-24/doc/builds-on
@@ -0,0 +1 @@
+step-23
diff --git a/examples/step-24/doc/intro.dox b/examples/step-24/doc/intro.dox
new file mode 100644
index 0000000..cbaf189
--- /dev/null
+++ b/examples/step-24/doc/intro.dox
@@ -0,0 +1,322 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This program grew out of a student project by Xing Jin at Texas A&M
+University. Most of the work for this program is by her. Some of the work on
+this tutorial program has been funded by NSF under grant DMS-0604778.
+
+The program is part of a project that aims to simulate thermoacoustic
+tomography imaging. In thermoacoustic tomography, pulsed electromagnetic
+energy is delivered into biological issues. Tissues absorb some of this energy
+and those parts of the tissue that absorb the most energy generate
+thermoacoustic waves through thermoelastic expansion. For imaging, one uses
+that different kinds of tissue, most importantly healthy and diseased tissue,
+absorb different amounts of energy and therefore expand at different
+rates. The experimental setup is to measure the amplitude of the pressure
+waves generated by these sources on the surface of the tissue and try to
+reconstruct the source distributions, which is indicative for the distribution
+of absorbers and therefore of different kinds of tissue. Part of this project
+is to compare simulated data with actual measurements, so one has to solve the
+"forward problem", i.e. the wave equation that describes the propagation of
+pressure waves in tissue. This program is therefore a continuation of @ref
+step_23 "step-23", where the wave equation was first introduced.
+
+
+<h3>The problem</h3>
+
+The temperature at a given location, neglecting thermal diffusion, can be
+stated as 
+
+ at f[
+\rho C_p \frac{\partial}{\partial t}T(t,\mathbf r) = H(t,\mathbf r)
+ at f]
+
+Here $\rho (\mathbf r) $ is the density; $C_p (\mathbf r) $ is the specific
+heat; $\frac{\partial T}{\partial t}(t,\mathbf r)$ is the temperature rise due
+to the delivered microwave energy; and $H(t,\mathbf r)$ is the heating
+function defined as the thermal energy per time and volume transformed from
+deposited microwave energy.
+
+Let us assume that tissues have heterogeneous dielectric properties but
+homogeneous acoustic properties. The basic acoustic generation equation in an
+acoustically homogeneous medium can be described as follows: if $u$ is the
+vector-valued displacement, then tissue certainly reacts to changes in
+pressure by acceleration:
+ at f[
+\rho \frac{\partial^2}{\partial t^2}u(t,\mathbf r) = 
+-\nabla p(t,\mathbf r).
+ at f]
+Furthermore, it contracts due to excess pressure and expands based on changes in temperature:
+ at f[
+\nabla \cdot u(t,\mathbf r) = -\frac{p(t,\mathbf r)}{\rho c_0^2}+\beta T(t,\mathbf r) .
+ at f]
+Here, $\beta$ is a thermoexpansion coefficient.
+
+Let us now make the assumption that heating only happens on a time
+scale much shorter than wave propagation through tissue (i.e. the temporal
+length of the microwave pulse that heats the tissue is much shorter than the
+time it takes a wave to cross the domain). In that case, the heating
+rate $H(t,\mathbf r)$ can be written as $H(t,\mathbf r) = a(\mathbf
+r)\delta(t)$ (where $a(\mathbf r)$ is a map of absorption strengths for
+microwave energy and $\delta(t)$ is the Dirac delta function), which together
+with the first equation above will yield 
+an instantaneous jump in the temperature $T(\mathbf r)$ at time $t=0$.
+Using this assumption, and taking all equations together, we can
+rewrite and combine the above as follows:
+ at f[
+\Delta p-\frac{1}{c_0^2} \frac{\partial^2 p}{\partial t^2} = \lambda
+a(\mathbf r)\frac{d\delta(t)}{dt}
+ at f]
+where $\lambda = - \frac{\beta}{C_p}$. 
+
+This somewhat strange equation with the derivative of a Dirac delta function
+on the right hand side can be rewritten as an initial value problem as follows:
+ at f{eqnarray*} 
+\Delta \bar{p}- \frac{1}{c_0^2} \frac{\partial^2 \bar{p}}{\partial t^2} & = &
+0 \\ 
+\bar{p}(0,\mathbf r) &=& c_0^2 \lambda a(\mathbf r) = b(\mathbf r)  \\ 
+\frac{\partial\bar{p}(0,\mathbf r)}{\partial t} &=& 0.
+ at f}
+(A derivation of this transformation into an initial value problem is given at
+the end of this introduction as an appendix.)
+
+In the inverse problem, it is the initial condition $b(\mathbf r) = c_0^2 \lambda a(\mathbf r)$ that
+one would like to recover, since it is a map of absorption strengths for
+microwave energy, and therefore presumably an indicator to discern healthy
+from diseased tissue.
+
+In real application, the thermoacoustic source is very small as compared to
+the medium.  The propagation path of the thermoacoustic waves can then be
+approximated as from the source to the infinity. Furthermore, detectors are
+only a limited distance from the source. One only needs to evaluate the values
+when the thermoacoustic waves pass through the detectors, although they do
+continue beyond. This is therefore a problem where we are only interested in a
+small part of an infinite medium, and we do not want waves generated somewhere
+to be reflected at the boundary of the domain which we consider
+interesting. Rather, we would like to simulate only that part of the wave
+field that is contained inside the domain of interest, and waves that hit the
+boundary of that domain to simply pass undisturbed through the boundary. In
+other words, we would like the boundary to absorb any waves that hit it.
+
+In general, this is a hard problem: Good absorbing boundary conditions are
+nonlinear and/or numerically very expensive. We therefore opt for a simple
+first order approximation to absorbing boundary conditions that reads
+ at f[
+\frac{\partial\bar{p}}{\partial\mathbf n} = 
+-\frac{1}{c_0} \frac{\partial\bar{p}}{\partial t}
+ at f]
+Here, $\frac{\partial\bar{p}}{\partial\mathbf n}$ is the normal derivative at
+the boundary. It should be noted that this is not a particularly good boundary
+condition, but it is one of the very few that are reasonably simple to implement.
+
+
+<h3>Weak form and discretization</h3>
+
+As in step-23, one first introduces a second variable, which is
+defined as the derivative of the pressure potential:
+ at f[ 
+v = \frac{\partial\bar{p}}{\partial t} 
+ at f]
+
+With the second variable, one then transforms the forward problem into
+two separate equations:
+ at f{eqnarray*}
+\bar{p}_{t} - v & = & 0 \\
+\Delta\bar{p} - \frac{1}{c_0^2}\,v_{t} & = & f 
+ at f}
+with initial conditions:
+ at f{eqnarray*}
+\bar{p}(0,\mathbf r) & = & b(r) \\
+v(0,\mathbf r)=\bar{p}_t(0,\mathbf r) & = & 0.
+ at f}
+Note that we have introduced a right hand side $f(t,\mathbf r)$ here to show
+how to derive these formulas in the general case, although in the application
+to the thermoacoustic problem $f=0$.
+
+The semi-discretized, weak version of this model, using the general $\theta$ scheme
+introduced in step-23 is then:
+ at f{eqnarray*}
+\left(\frac{\bar{p}^n-\bar{p}^{n-1}}{k},\phi\right)_\Omega-
+\left(\theta v^{n}+(1-\theta)v^{n-1},\phi\right)_\Omega & = & 0   \\
+-\left(\nabla((\theta\bar{p}^n+(1-\theta)\bar{p}^{n-1})),\nabla\phi\right)_\Omega-
+\frac{1}{c_0}\left(\frac{\bar{p}^n-\bar{p}^{n-1}}{k},\phi\right)_{\partial\Omega} - 
+\frac{1}{c_0^2}\left(\frac{v^n-v^{n-1}}{k},\phi\right)_\Omega & = 
+& \left(\theta f^{n}+(1-\theta)f^{n-1}, \phi\right)_\Omega,
+ at f}
+where $\phi$ is an arbitrary test function, and where we have used the
+absorbing boundary condition to integrate by parts:
+absorbing boundary conditions are incorporated into the weak form by using 
+ at f[ 
+\int_\Omega\varphi \, \Delta p\; dx =
+-\int_\Omega\nabla \varphi \cdot \nabla p dx + 
+\int_{\partial\Omega}\varphi \frac{\partial p}{\partial {\mathbf n}}ds.
+ at f]
+
+From this we obtain the discrete model by introducing a finite number of shape
+functions, and get
+ at f{eqnarray*}
+M\bar{p}^{n}-k \theta M v^n & = & M\bar{p}^{n-1}+k (1-\theta)Mv^{n-1},\\
+
+(-c_0^2k \theta A-c_0 B)\bar{p}^n-Mv^{n} & = & 
+(c_0^2k(1-\theta)A-c_0B)\bar{p}^{n-1}-Mv^{n-1}+c_0^2k(\theta F^{n}+(1-\theta)F^{n-1}).
+ at f}
+The matrices $M$ and $A$ are here as in step-23, and the
+boundary mass matrix
+ at f[
+	B_{ij} = \left(\varphi_i,\varphi_j\right)_{\partial\Omega}
+ at f]
+results from the use of absorbing boundary conditions.
+
+Above two equations can be rewritten in a matrix form with the pressure and its derivative as
+an unknown vector:
+ at f[
+\left(\begin{array}{cc}
+ M         &       -k\theta M \\
+c_0^2\,k\,\theta\,A+c_0\,B  &  M   \\
+               \end{array} \right)\\
+\left(\begin{array}{c}
+ \bar{p}^{n}    \\
+ \bar{v}^{n}
+              \end{array}\right)=\\
+\left(\begin{array}{l}
+ G_1  \\
+ G_2 -(\theta F^{n}+(1-\theta)F ^{n-1})c_{0}^{2}k \\        
+                \end{array}\right)
+ at f]
+
+where
+ at f[
+\left(\begin{array}{c}
+G_1 \\
+G_2 \\
+   \end{array} \right)=\\
+\left(\begin{array}{l}
+ M\bar{p}^{n-1}+k(1-\theta)Mv^{n-1}\\               
+ (-c_{0}^{2}k (1-\theta)A+c_0 B)\bar{p}^{n-1} +Mv^{n-1}
+                \end{array}\right)
+ at f]
+
+By simple transformations, one then obtains two equations for
+the pressure potential and its derivative, just as in the previous tutorial program:
+ at f{eqnarray*}
+(M+(k\,\theta\,c_{0})^{2}A+c_0k\theta B)\bar{p}^{n} & = & 
+G_{1}+(k\, \theta)G_{2}-(c_0k)^2\theta (\theta F^{n}+(1-\theta)F^{n-1}) \\
+Mv^n & = & -(c_0^2\,k\, \theta\, A+c_0B)\bar{p}^{n}+ G_2 - 
+c_0^2k(\theta F^{n}+(1-\theta)F^{n-1}) 
+ at f}
+
+
+<h3>What the program does</h3>
+
+Compared to step-23, this programs adds the treatment of a
+simple absorbing boundary conditions. In addition, it deals with data obtained
+from actual experimental measurements. To this end, we need to evaluate the
+solution at points at which the experiment also evaluates a real pressure
+field. We will see how to do that using the VectorTools::point_value function
+further down below.
+
+
+
+<h3>Appendix: PDEs with Dirac delta functions as right hand side and their transformation to an initial value problem</h3>
+
+In the derivation of the initial value problem for the wave equation, we
+initially found that the equation had the derivative of a Dirac delta function
+as a right hand side:
+ at f[
+\Delta p-\frac{1}{c_0^2} \frac{\partial^2 p}{\partial t^2} = \lambda
+a(\mathbf r)\frac{d\delta(t)}{dt}.
+ at f]
+In order to see how to transform this single equation into the usual statement
+of a PDE with initial conditions, let us make the assumption that the
+physically quite reasonable medium is at rest initially, i.e. $p(t,\mathbf
+r)=\frac{\partial p(t,\mathbf r)}{\partial t}=0$ for $t<0$. Next, let us form
+the indefinite integral with respect to time of both sides:
+ at f[
+\int^t \Delta p\; dt -\int^t \frac{1}{c_0^2} \frac{\partial^2 p}{\partial t^2}
+\; dt
+= 
+\int^t \lambda a(\mathbf r)\frac{d\delta(t)}{dt} \;dt.
+ at f]
+This immediately leads to the statement
+ at f[
+P(t,\mathbf r) - \frac{1}{c_0^2} \frac{\partial p}{\partial t}
+= 
+\lambda a(\mathbf r) \delta(t),
+ at f]
+where $P(t,\mathbf r)$ is such that $\frac{dP(t,\mathbf r)}{dt}=\Delta
+p$. Next, we form the (definite) integral over time from $t=-\epsilon$ to
+$t=+\epsilon$ to find 
+ at f[
+\int_{-\epsilon}^{\epsilon} P(t,\mathbf r)\; dt 
+- \frac{1}{c_0^2} \left[ p(\epsilon,\mathbf r) - p(-\epsilon,\mathbf r) \right]
+= 
+\int_{-\epsilon}^{\epsilon} \lambda a(\mathbf r) \delta(t) \; dt.
+ at f]
+If we use the property of the delta function that $\int_{-\epsilon}^{\epsilon}
+\delta(t)\; dt = 1$, and assume that $P$ is a continuous function in time, we find
+as we let $\epsilon$ go to zero that
+ at f[
+- \lim_{\epsilon\rightarrow 0}\frac{1}{c_0^2} \left[ p(\epsilon,\mathbf r) - p(-\epsilon,\mathbf r) \right]
+= 
+\lambda a(\mathbf r).
+ at f]
+In other words, using that $p(-\epsilon,\mathbf r)=0$, we retrieve the initial
+condition 
+ at f[
+  \frac{1}{c_0^2} p(0,\mathbf r)  
+  =
+  \lambda a(\mathbf r).
+ at f]
+At the same time, we know that for every $t>0$ the delta function is zero, so
+for $0<t<T$ we get the equation
+ at f[
+\Delta p-\frac{1}{c_0^2} \frac{\partial^2 p}{\partial t^2} = 0.
+ at f]
+Consequently, we have obtained a representation of the wave equation and one
+initial condition from the original somewhat strange equation.
+
+Finally, because we here have an equation with two time derivatives, we still
+need a second initial condition. To this end, let us go back to the equation
+ at f[
+\Delta p-\frac{1}{c_0^2} \frac{\partial^2 p}{\partial t^2} = \lambda
+a(\mathbf r)\frac{d\delta(t)}{dt}.
+ at f]
+and integrate it in time from $t=-\epsilon$ to $t=+\epsilon$. This leads to
+ at f[
+P(\epsilon)-P(-\epsilon)
+-\frac{1}{c_0^2} \left[\frac{\partial p(\epsilon)}{\partial t} -
+                       \frac{\partial p(-\epsilon)}{\partial t}\right]
+ = \lambda a(\mathbf r) \int_{-\epsilon}^{\epsilon}\frac{d\delta(t)}{dt} \; dt.
+ at f]
+Using integration by parts of the form
+ at f[
+  \int_{-\epsilon}^{\epsilon}\varphi(t)\frac{d\delta(t)}{dt} \; dt
+  =
+  -\int_{-\epsilon}^{\epsilon}\frac{d\varphi(t)}{dt} \delta(t)\; dt
+ at f]
+where we use that $\delta(\pm \epsilon)=0$ and inserting $\varphi(t)=1$, we
+see that in fact
+ at f[
+  \int_{-\epsilon}^{\epsilon}\frac{d\delta(t)}{dt} \; dt
+  =
+  0.
+ at f]
+
+Now, let $\epsilon\rightarrow 0$. Assuming that $P$ is a continuous function in
+time, we see that 
+ at f[
+  P(\epsilon)-P(-\epsilon) \rightarrow 0,
+ at f]
+and consequently
+ at f[
+  \frac{\partial p(\epsilon)}{\partial t} -
+                       \frac{\partial p(-\epsilon)}{\partial t}
+		       \rightarrow 0.
+ at f]
+However, we have assumed that $\frac{\partial p(-\epsilon)}{\partial t}=0$.
+Consequently, we obtain as the second initial condition that
+ at f[
+  \frac{\partial p(0)}{\partial t} = 0,
+ at f]
+completing the system of equations.
+
diff --git a/examples/step-24/doc/kind b/examples/step-24/doc/kind
new file mode 100644
index 0000000..86a44aa
--- /dev/null
+++ b/examples/step-24/doc/kind
@@ -0,0 +1 @@
+time dependent
diff --git a/examples/step-24/doc/project-1.tex b/examples/step-24/doc/project-1.tex
new file mode 100644
index 0000000..23b3609
--- /dev/null
+++ b/examples/step-24/doc/project-1.tex
@@ -0,0 +1,91 @@
+\documentclass[english]{article}
+\usepackage[T1]{fontenc}
+\usepackage[latin1]{inputenc}
+\usepackage{geometry}
+\geometry{verbose,letterpaper,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
+\usepackage{amsmath}
+
+\makeatletter
+\usepackage{babel}
+\makeatother
+\begin{document}
+
+\section{FEM model}
+
+\begin{quote}
+The problem is
+
+\begin{align*}
+\Delta\bar{p}-\frac{1}{c_{0}^{2}}\frac{\partial^{2}\bar{p}}{\partial^{2}t} & =f\end{align*}
+
+
+with initial condition 
+
+\begin{eqnarray*}
+\bar{p}(0,\mathbf{r}) & = & b(r)\end{eqnarray*}
+
+
+Let 
+
+\begin{eqnarray*}
+v & = & \frac{\partial\bar{p}}{\partial t}\end{eqnarray*}
+
+
+then we have
+
+\begin{alignat*}{1}
+\bar{p}_{t}-v & =0\\
+\Delta\bar{p}-\frac{1}{c_{0}^{2}}\, v_{t} & =f\end{alignat*}
+
+
+and absorbing boundary condition 
+
+\begin{eqnarray*}
+\frac{\partial\bar{p}}{\partial\mathbf{n}} & =- & \frac{1}{c_{0}}\frac{\partial\bar{p}}{\partial t}\end{eqnarray*}
+
+
+$\frac{\partial\bar{p}}{\partial\mathbf{n}}$ is the normal derivative
+at the boundary. This is a the time-varying FEM model. by discretizing
+according to $t$, we have
+
+\begin{eqnarray*}
+(\frac{\bar{p}^{n}-\bar{p}^{n-1}}{\delta t},\phi)_{\Omega}-\,(\theta v^{n}+(1-\theta)v^{n-1},\phi)_{\Omega} & = & 0\\
+-(\Delta((\theta\bar{p}^{n}+(1-\theta)\bar{p}^{n-1}),\bigtriangledown\phi)_{\Omega}-\frac{1}{c_{0}}(\frac{\bar{p}^{n}-\bar{p}^{n-1}}{\delta t},\phi)_{\partial\Omega}-\frac{1}{c_{0}^{2}}(\frac{v^{n}-v^{n-1}}{\delta t},\phi)_{\Omega} & = & (\theta f^{n}+(1-\theta)f^{n-1},\phi)_{\Omega}\end{eqnarray*}
+
+
+we obtain
+
+\begin{eqnarray*}
+M\bar{p}^{n}-(\delta t\,\theta)Mv^{n} & = & M\bar{p}^{n-1}+\delta t\,(1-\theta)\, M\, v^{n-1}\\
+(-c_{0}^{2}\,\delta t\,\theta A-c_{0}\, B)\bar{p}^{n}-Mv^{n} & = & (c_{0}^{2}\,\delta t\,(1-\theta)A-c_{0}B)\bar{p}^{n-1}-M\, v^{n-1}+c_{0}^{2}\delta t(\theta F^{n}+(1-\theta)F^{n-1})\end{eqnarray*}
+
+
+Write the above two equations as a matrix form
+
+\begin{eqnarray*}
+\left(\begin{array}{cc}
+M & -(\delta t\,\theta)M\\
+c_{0}^{2}\,\delta t\,\theta A+c_{0}\, B & M\end{array}\right)\left(\begin{array}{c}
+\bar{p}^{n}\\
+v^{n}\end{array}\right) & = & \left(\begin{array}{c}
+G_{1}\\
+G_{2}\end{array}\right)\end{eqnarray*}
+
+
+where 
+
+\begin{center}$\left(\begin{array}{c}
+G_{1}\\
+G_{2}\end{array}\right)=\left(\begin{array}{c}
+M\bar{p}^{n-1}+\delta t\,(1-\theta)Mv^{n-1}\\
+(-c_{0}^{2}\,\delta t\,(1-\theta)A+c_{0}B)\bar{p}^{n-1}+M\, v^{n-1}-c_{0}^{2}\delta t(\theta F^{n}+(1-\theta)F^{n-1})\end{array}\right)$\end{center}
+
+From the above matrix, we can obtain
+
+\begin{eqnarray*}
+(M+(\delta t\,\theta\, c_{0})^{2}A+c_{0}\,\delta t\,\theta\, B)\bar{p}^{n} & = & G_{1}+(\delta t\,\theta)G_{2}\\
+Mv^{n} & = & -(c_{0}^{2}\,\delta t\,\theta\, A+c_{0}B)\bar{p}^{n}+G_{2}\end{eqnarray*}
+
+\end{quote}
+
+\end{document}
diff --git a/examples/step-24/doc/results.dox b/examples/step-24/doc/results.dox
new file mode 100644
index 0000000..6710199
--- /dev/null
+++ b/examples/step-24/doc/results.dox
@@ -0,0 +1,144 @@
+<h1>Results</h1>
+
+The program writes both graphical data for each time step as well as the
+values evaluated at each detector location to disk. We then 
+draw them in plots. Experimental data were also collected for comparison. 
+Currently our experiments have only been done in two dimensions by 
+circularly scanning a single detector. The tissue sample here is a thin slice 
+in the X-Y plane (Z=0), and we assume that signals from other Z directions 
+won't contribute to the data. Consequently, we only have to compare
+our experimental data with two dimensional simulated data.  
+
+
+<h3> One absorber </h3>
+
+This movie shows the thermoacoustic waves generated by a single small absorber
+propagating in the medium (in our simulation, we assume the medium is mineral
+oil, which has a acoustic speed of 1.437 $\frac{mm}{\mu s}$):
+
+<img src="http://www.dealii.org/images/steps/developer/step-24.one_movie.gif" alt="">
+
+For a single absorber, we of course have to change the
+<code>InitialValuesP</code> class accordingly.
+
+Next, let us compare experimental and computational results. The visualization
+uses a technique long used in seismology, where the data of each detector is
+plotted all in one graph. The way this is done is by offsetting each
+detector's signal a bit compared to the previous one. For example, here is a
+plot of the first four detectors (from bottom to top, with time in
+microseconds running from left to right) using the source setup used in the
+program, to make things a bit more interesting compared to the present case of
+only a single source:
+
+<img src="http://www.dealii.org/images/steps/developer/step-24.traces.png" alt="">
+
+One thing that can be seen, for example, is that the arrival of the second and
+fourth signals shifts to earlier times for greater detector numbers (i.e. the
+topmost ones), but not the first and the third; this can be interpreted to
+mean that the origin of these signals must be closer to the latter detectors
+than to the former ones.
+
+If we stack not only 4, but all 160 detectors in one graph, the individual
+lines blur, but where they run together they create a pattern of darker or
+lighter grayscales.  The following two figures show the results obtained at
+the detector locations stacked in that way. The left figure is obtained from
+experiments, and the right is the simulated data.
+In the experiment, a single small strong absorber was embedded in
+weaker absorbing tissue:
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-24.one.png" alt="">
+</td>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-24.one_s.png" alt="">
+</td>
+</tr>
+</table>
+
+It is obvious that the source location is closer to the detectors at angle
+$180^\circ$. All the other signals that can be seen in the experimental data
+result from the fact that there are weak absorbers also in the rest of the
+tissue, which surrounds the signals generated by the small strong absorber in
+the center. On the other hand, in the simulated data, we only simulate the
+small strong absorber.
+
+In reality, detectors have limited bandwidth. The thermoacoustic waves passing
+through the detector will therefore be filtered. By using a high-pass filter
+(implemented in MATLAB and run against the data file produced by this program),
+the simulated results can be made to look closer to the experimental
+data:
+
+<img src="http://www.dealii.org/images/steps/developer/step-24.one_sf.png" alt="">
+
+In our simulations, we see spurious signals behind the main wave that
+result from numerical artifacts. This problem can be alleviated by using finer
+mesh, resulting in the following plot:
+ 
+<img src="http://www.dealii.org/images/steps/developer/step-24.one_s2.png" alt="">
+
+
+
+<h3>Multiple absorbers</h3>
+
+To further verify the program, we will also show simulation results for
+multiple absorbers. This corresponds to the case that is actually implemented
+in the program. The following movie shows the propagation of the generated
+thermoacoustic waves in the medium by multiple absorbers:
+
+<img src="http://www.dealii.org/images/steps/developer/step-24.multi_movie.gif" alt="">
+
+Experimental data and our simulated data are compared in the following two
+figures:
+<TABLE WIDTH="100%">
+<tr>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-24.multi.png" alt="">
+</td>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-24.multi_s.png" alt="">
+</td>
+</tr>
+</table>
+
+Note that in the experimental data, the first signal (i.e. the left-most dark
+line) results from absorption at the tissue boundary, and therefore reaches
+the detectors first and before any of the signals from the interior. This
+signal is also faintly visible at the end of the traces, around 30 $\mu s$,
+which indicates that the signal travelled through the entire tissue to reach
+detectors at the other side, after all the signals originating from the
+interior have reached them.
+
+As before, the numerical result better matches experimental ones by applying a
+bandwidth filter that matches the actual behavior of detectors (left) and by
+choosing a finer mesh (right):
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-24.multi_sf.png" alt="">
+</td>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-24.multi_s2.png" alt="">
+</td>
+</tr>
+</table>
+
+One of the important differences between the left and the right figure is that
+the curves look much less "angular" at the right. The angularity comes from
+the fact that while waves in the continuous equation travel equally fast in
+all directions, this isn't the case after discretization: there, waves that
+travel diagonal to cells move at slightly different speeds to those that move
+parallel to mesh lines. This anisotropy leads to wave fronts that aren't
+perfectly circular (and would produce sinusoidal signals in the stacked
+plots), but are bulged out in certain directions. To make things worse, the
+circular mesh we use (see for example step-6 for a view of the
+coarse mesh) is not isotropic either. The net result is that the signal fronts
+are not sinusoidal unless the mesh is sufficiently fine. The right image is a
+lot better in this respect, though artifacts in the form of trailing spurious
+waves can still be seen.
+
+
+
+
diff --git a/examples/step-24/doc/tooltip b/examples/step-24/doc/tooltip
new file mode 100644
index 0000000..8c54d3f
--- /dev/null
+++ b/examples/step-24/doc/tooltip
@@ -0,0 +1 @@
+The wave equation with absorbing boundary conditions. Extracting point values.
diff --git a/examples/step-24/step-24.cc b/examples/step-24/step-24.cc
new file mode 100644
index 0000000..0e4294a
--- /dev/null
+++ b/examples/step-24/step-24.cc
@@ -0,0 +1,595 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2006 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Xing Jin, Wolfgang Bangerth, Texas A&M University, 2006
+ */
+
+
+// @sect3{Include files}
+
+// The following have all been covered previously:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/vector_tools.h>
+
+#include <fstream>
+#include <iostream>
+
+// This is the only new one: We will need a library function defined in the
+// namespace GridTools that computes the minimal cell diameter.
+#include <deal.II/grid/grid_tools.h>
+
+// The last step is as in all previous programs:
+namespace Step24
+{
+  using namespace dealii;
+
+  // @sect3{The "forward problem" class template}
+
+  // The first part of the main class is exactly as in step-23 (except for the
+  // name):
+  template <int dim>
+  class TATForwardProblem
+  {
+  public:
+    TATForwardProblem ();
+    void run ();
+
+  private:
+    void setup_system ();
+    void solve_p ();
+    void solve_v ();
+    void output_results () const;
+
+    Triangulation<dim>   triangulation;
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+
+    ConstraintMatrix constraints;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+    SparseMatrix<double> mass_matrix;
+    SparseMatrix<double> laplace_matrix;
+
+    Vector<double>       solution_p, solution_v;
+    Vector<double>       old_solution_p, old_solution_v;
+    Vector<double>       system_rhs_p, system_rhs_v;
+
+    double time, time_step;
+    unsigned int timestep_number;
+    const double theta;
+
+    //  Here's what's new: first, we need that boundary mass matrix $B$ that
+    //  came out of the absorbing boundary condition. Likewise, since this
+    //  time we consider a realistic medium, we must have a measure of the
+    //  wave speed $c_0$ that will enter all the formulas with the Laplace
+    //  matrix (which we still define as $(\nabla \phi_i,\nabla \phi_j)$):
+    SparseMatrix<double> boundary_matrix;
+    const double wave_speed;
+
+    // The last thing we have to take care of is that we wanted to evaluate
+    // the solution at a certain number of detector locations. We need an
+    // array to hold these locations, declared here and filled in the
+    // constructor:
+    std::vector<Point<dim> > detector_locations;
+  };
+
+
+  // @sect3{Equation data}
+
+  // As usual, we have to define our initial values, boundary conditions, and
+  // right hand side functions. Except things are a bit simpler this time: we
+  // are to consider a problem that is driven by initial conditions, so there
+  // is no right hand side function (though you could look up in step-23 to
+  // see how this can be done. Secondly, there are no boundary conditions: the
+  // entire boundary of the domain consists of absorbing boundary
+  // conditions. That only leaves initial conditions, and there things are
+  // simple too since for this particular application only nonzero initial
+  // conditions for the pressure are prescribed, not for the velocity (which
+  // is zero at the initial time).
+  //
+  // So this is all we need: a class that specifies initial conditions for the
+  // pressure. In the physical setting considered in this program, these are
+  // small absorbers, which we model as a series of little circles where we
+  // assume that the pressure surplus is one, whereas no absorption and
+  // therefore no pressure surplus is everywhere else. This is how we do things
+  // (note that if we wanted to expand this program to not only compile but
+  // also to run, we would have to initialize the sources with
+  // three-dimensional source locations):
+  template <int dim>
+  class InitialValuesP : public Function<dim>
+  {
+  public:
+    InitialValuesP ()
+      :
+      Function<dim>()
+    {}
+
+    virtual double value (const Point<dim> &p,
+                          const unsigned int  component = 0) const;
+
+  private:
+    struct Source
+    {
+      Source (const Point<dim> &l,
+              const double      r)
+        :
+        location (l),
+        radius (r)
+      {}
+
+      const Point<dim> location;
+      const double     radius;
+    };
+  };
+
+
+  template <int dim>
+  double InitialValuesP<dim>::value (const Point<dim> &p,
+                                     const unsigned int /*component*/) const
+  {
+    static const Source sources[] = {Source (Point<dim> (0, 0),         0.025),
+                                     Source (Point<dim> (-0.135, 0),    0.05),
+                                     Source (Point<dim> (0.17, 0),      0.03),
+                                     Source (Point<dim> (-0.25, 0),     0.02),
+                                     Source (Point<dim> (-0.05, -0.15), 0.015)
+                                    };
+    static const unsigned int n_sources = sizeof(sources)/sizeof(sources[0]);
+
+    for (unsigned int i=0; i<n_sources; ++i)
+      if (p.distance(sources[i].location) < sources[i].radius)
+        return 1;
+
+    return 0;
+  }
+
+
+  // @sect3{Implementation of the <code>TATForwardProblem</code> class}
+
+  // Let's start again with the constructor. Setting the member variables is
+  // straightforward. We use the acoustic wave speed of mineral oil (in
+  // millimeters per microsecond, a common unit in experimental biomedical
+  // imaging) since this is where many of the experiments we want to compare
+  // the output with are made in. The Crank-Nicolson scheme is used again,
+  // i.e. theta is set to 0.5. The time step is later selected to satisfy $k =
+  // \frac hc$
+  template <int dim>
+  TATForwardProblem<dim>::TATForwardProblem ()
+    :
+    fe (1),
+    dof_handler (triangulation),
+    theta (0.5),
+    wave_speed (1.437)
+  {
+    // The second task in the constructor is to initialize the array that
+    // holds the detector locations. The results of this program were compared
+    // with experiments in which the step size of the detector spacing is 2.25
+    // degree, corresponding to 160 detector locations. The radius of the
+    // scanning circle is selected to be half way between the center and the
+    // boundary to avoid that the remaining reflections from the imperfect
+    // boundary condition spoils our numerical results.
+    //
+    // The locations of the detectors are then calculated in clockwise
+    // order. Note that the following of course only works if we are computing
+    // in 2d, a condition that we guard with an assertion. If we later wanted
+    // to run the same program in 3d, we would have to add code here for the
+    // initialization of detector locations in 3d. Due to the assertion, there
+    // is no way we can forget to do this.
+    Assert (dim == 2, ExcNotImplemented());
+
+    const double detector_step_angle = 2.25;
+    const double detector_radius = 0.5;
+
+    for (double detector_angle = 2*numbers::PI;
+         detector_angle >= 0;
+         detector_angle -= detector_step_angle/360*2*numbers::PI)
+      detector_locations.push_back (Point<dim> (std::cos(detector_angle),
+                                                std::sin(detector_angle)) *
+                                    detector_radius);
+  }
+
+
+
+  // @sect4{TATForwardProblem::setup_system}
+
+  // The following system is pretty much what we've already done in step-23,
+  // but with two important differences. First, we have to create a circular
+  // (or spherical) mesh around the origin, with a radius of 1. This nothing
+  // new: we've done so before in step-6, step-10, and step-11, where we also
+  // explain how to attach a boundary object to a triangulation to be used
+  // whenever the triangulation needs to know where new boundary points lie
+  // when a cell is refined. Following this, the mesh is refined a number of
+  // times.
+  //
+  // One thing we had to make sure is that the time step satisfies the CFL
+  // condition discussed in the introduction of step-23. Back in that program,
+  // we ensured this by hand by setting a timestep that matches the mesh
+  // width, but that was error prone because if we refined the mesh once more
+  // we would also have to make sure the time step is changed. Here, we do
+  // that automatically: we ask a library function for the minimal diameter of
+  // any cell. Then we set $k=\frac h{c_0}$. The only problem is: what exactly
+  // is $h$? The point is that there is really no good theory on this question
+  // for the wave equation. It is known that for uniformly refined meshes
+  // consisting of rectangles, $h$ is the minimal edge length. But for meshes
+  // on general quadrilaterals, the exact relationship appears to be unknown,
+  // i.e. it is unknown what properties of cells are relevant for the CFL
+  // condition. The problem is that the CFL condition follows from knowledge
+  // of the smallest eigenvalue of the Laplace matrix, and that can only be
+  // computed analytically for simply structured meshes.
+  //
+  // The upshot of all this is that we're not quite sure what exactly we
+  // should take for $h$. The function GridTools::minimal_cell_diameter
+  // computes the minimal diameter of all cells. If the cells were all squares
+  // or cubes, then the minimal edge length would be the minimal diameter
+  // divided by <code>std::sqrt(dim)</code>. We simply generalize this,
+  // without theoretical justification, to the case of non-uniform meshes.
+  //
+  // The only other significant change is that we need to build the boundary
+  // mass matrix. We will comment on this further down below.
+  template <int dim>
+  void TATForwardProblem<dim>::setup_system ()
+  {
+    const Point<dim> center;
+    GridGenerator::hyper_ball (triangulation, center, 1.);
+    static const SphericalManifold<dim> boundary_description (center);
+    triangulation.set_all_manifold_ids_on_boundary(0);
+    triangulation.set_manifold (0,boundary_description);
+    triangulation.refine_global (7);
+
+    time_step = GridTools::minimal_cell_diameter(triangulation) /
+                wave_speed /
+                std::sqrt (1.*dim);
+
+    std::cout << "Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl;
+
+    dof_handler.distribute_dofs (fe);
+
+    std::cout << "Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl
+              << std::endl;
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+    mass_matrix.reinit (sparsity_pattern);
+    laplace_matrix.reinit (sparsity_pattern);
+
+    MatrixCreator::create_mass_matrix (dof_handler, QGauss<dim>(3),
+                                       mass_matrix);
+    MatrixCreator::create_laplace_matrix (dof_handler, QGauss<dim>(3),
+                                          laplace_matrix);
+
+    // The second difference, as mentioned, to step-23 is that we need to
+    // build the boundary mass matrix that grew out of the absorbing boundary
+    // conditions.
+    //
+    // A first observation would be that this matrix is much sparser than the
+    // regular mass matrix, since none of the shape functions with purely
+    // interior support contribute to this matrix. We could therefore
+    // optimize the storage pattern to this situation and build up a second
+    // sparsity pattern that only contains the nonzero entries that we
+    // need. There is a trade-off to make here: first, we would have to have a
+    // second sparsity pattern object, so that costs memory. Secondly, the
+    // matrix attached to this sparsity pattern is going to be smaller and
+    // therefore requires less memory; it would also be faster to perform
+    // matrix-vector multiplications with it. The final argument, however, is
+    // the one that tips the scale: we are not primarily interested in
+    // performing matrix-vector with the boundary matrix alone (though we need
+    // to do that for the right hand side vector once per time step), but
+    // mostly wish to add it up to the other matrices used in the first of the
+    // two equations since this is the one that is going to be multiplied with
+    // once per iteration of the CG method, i.e. significantly more often. It
+    // is now the case that the SparseMatrix::add class allows to add one
+    // matrix to another, but only if they use the same sparsity pattern (the
+    // reason being that we can't add nonzero entries to a matrix after the
+    // sparsity pattern has been created, so we simply require that the two
+    // matrices have the same sparsity pattern).
+    //
+    // So let's go with that:
+    boundary_matrix.reinit (sparsity_pattern);
+
+    // The second thing to do is to actually build the matrix. Here, we need
+    // to integrate over faces of cells, so first we need a quadrature object
+    // that works on <code>dim-1</code> dimensional objects. Secondly, the
+    // FEFaceValues variant of FEValues that works on faces, as its name
+    // suggest. And finally, the other variables that are part of the assembly
+    // machinery. All of this we put between curly braces to limit the scope
+    // of these variables to where we actually need them.
+    //
+    // The actual act of assembling the matrix is then fairly straightforward:
+    // we loop over all cells, over all faces of each of these cells, and then
+    // do something only if that particular face is at the boundary of the
+    // domain. Like this:
+    {
+      const QGauss<dim-1>  quadrature_formula(3);
+      FEFaceValues<dim> fe_values (fe, quadrature_formula,
+                                   update_values  |  update_JxW_values);
+
+      const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+      const unsigned int   n_q_points    = quadrature_formula.size();
+
+      FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+
+      std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+
+
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      for (; cell!=endc; ++cell)
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->at_boundary(f))
+            {
+              cell_matrix = 0;
+
+              fe_values.reinit (cell, f);
+
+              for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    cell_matrix(i,j) += (fe_values.shape_value(i,q_point) *
+                                         fe_values.shape_value(j,q_point) *
+                                         fe_values.JxW(q_point));
+
+              cell->get_dof_indices (local_dof_indices);
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                for (unsigned int j=0; j<dofs_per_cell; ++j)
+                  boundary_matrix.add (local_dof_indices[i],
+                                       local_dof_indices[j],
+                                       cell_matrix(i,j));
+            }
+
+    }
+
+    system_matrix.copy_from (mass_matrix);
+    system_matrix.add (time_step * time_step * theta * theta *
+                       wave_speed * wave_speed,
+                       laplace_matrix);
+    system_matrix.add (wave_speed * theta * time_step, boundary_matrix);
+
+
+    solution_p.reinit (dof_handler.n_dofs());
+    old_solution_p.reinit (dof_handler.n_dofs());
+    system_rhs_p.reinit (dof_handler.n_dofs());
+
+    solution_v.reinit (dof_handler.n_dofs());
+    old_solution_v.reinit (dof_handler.n_dofs());
+    system_rhs_v.reinit (dof_handler.n_dofs());
+
+    constraints.close ();
+  }
+
+
+  // @sect4{TATForwardProblem::solve_p and TATForwardProblem::solve_v}
+
+  // The following two functions, solving the linear systems for the pressure
+  // and the velocity variable, are taken pretty much verbatim (with the
+  // exception of the change of name from $u$ to $p$ of the primary variable)
+  // from step-23:
+  template <int dim>
+  void TATForwardProblem<dim>::solve_p ()
+  {
+    SolverControl           solver_control (1000, 1e-8*system_rhs_p.l2_norm());
+    SolverCG<>              cg (solver_control);
+
+    cg.solve (system_matrix, solution_p, system_rhs_p,
+              PreconditionIdentity());
+
+    std::cout << "   p-equation: " << solver_control.last_step()
+              << " CG iterations."
+              << std::endl;
+  }
+
+
+  template <int dim>
+  void TATForwardProblem<dim>::solve_v ()
+  {
+    SolverControl           solver_control (1000, 1e-8*system_rhs_v.l2_norm());
+    SolverCG<>              cg (solver_control);
+
+    cg.solve (mass_matrix, solution_v, system_rhs_v,
+              PreconditionIdentity());
+
+    std::cout << "   v-equation: " << solver_control.last_step()
+              << " CG iterations."
+              << std::endl;
+  }
+
+
+
+  // @sect4{TATForwardProblem::output_results}
+
+  // The same holds here: the function is from step-23.
+  template <int dim>
+  void TATForwardProblem<dim>::output_results () const
+  {
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution_p, "P");
+    data_out.add_data_vector (solution_v, "V");
+
+    data_out.build_patches ();
+
+    const std::string filename =  "solution-" +
+                                  Utilities::int_to_string (timestep_number, 3) +
+                                  ".gnuplot";
+    std::ofstream output (filename.c_str());
+    data_out.write_gnuplot (output);
+  }
+
+
+
+  // @sect4{TATForwardProblem::run}
+
+  // This function that does most of the work is pretty much again like in
+  // step-23, though we make things a bit clearer by using the vectors G1 and
+  // G2 mentioned in the introduction. Compared to the overall memory
+  // consumption of the program, the introduction of a few temporary vectors
+  // isn't doing much harm.
+  //
+  // The only changes to this function are: first, that we do not have to
+  // project initial values for the velocity $v$, since we know that it is
+  // zero. And second that we evaluate the solution at the detector locations
+  // computed in the constructor. This is done using the
+  // VectorTools::point_value function. These values are then written to a
+  // file that we open at the beginning of the function.
+  template <int dim>
+  void TATForwardProblem<dim>::run ()
+  {
+    setup_system();
+
+    VectorTools::project (dof_handler, constraints,
+                          QGauss<dim>(3), InitialValuesP<dim>(),
+                          old_solution_p);
+    old_solution_v = 0;
+
+
+    std::ofstream detector_data("detectors.dat");
+
+    Vector<double> tmp (solution_p.size());
+    Vector<double> G1 (solution_p.size());
+    Vector<double> G2 (solution_v.size());
+
+    const double end_time = 0.7;
+    for (timestep_number=1, time=time_step;
+         time<=end_time;
+         time+=time_step, ++timestep_number)
+      {
+        std::cout << std::endl;
+        std::cout<< "time_step " << timestep_number << " @ t=" << time << std::endl;
+
+        mass_matrix.vmult (G1, old_solution_p);
+        mass_matrix.vmult (tmp, old_solution_v);
+        G1.add(time_step * (1-theta), tmp);
+
+        mass_matrix.vmult (G2, old_solution_v);
+        laplace_matrix.vmult (tmp, old_solution_p);
+        G2.add (-wave_speed * wave_speed * time_step * (1-theta), tmp);
+
+        boundary_matrix.vmult (tmp, old_solution_p);
+        G2.add (wave_speed, tmp);
+
+        system_rhs_p = G1;
+        system_rhs_p.add(time_step * theta , G2);
+
+        solve_p ();
+
+
+        system_rhs_v = G2;
+        laplace_matrix.vmult (tmp, solution_p);
+        system_rhs_v.add (-time_step * theta * wave_speed * wave_speed, tmp);
+
+        boundary_matrix.vmult (tmp, solution_p);
+        system_rhs_v.add (-wave_speed, tmp);
+
+        solve_v ();
+
+        output_results ();
+
+
+        detector_data << time;
+        for (unsigned int i=0 ; i<detector_locations.size(); ++i)
+          detector_data << " "
+                        << VectorTools::point_value (dof_handler,
+                                                     solution_p,
+                                                     detector_locations[i])
+                        << " ";
+        detector_data << std::endl;
+
+
+        old_solution_p = solution_p;
+        old_solution_v = solution_v;
+      }
+  }
+}
+
+
+
+// @sect3{The <code>main</code> function}
+
+// What remains is the main function of the program. There is nothing here
+// that hasn't been shown in several of the previous programs:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step24;
+
+      TATForwardProblem<2> forward_problem_solver;
+      forward_problem_solver.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-25/CMakeLists.txt b/examples/step-25/CMakeLists.txt
new file mode 100644
index 0000000..cc628a8
--- /dev/null
+++ b/examples/step-25/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-25 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-25")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-25/doc/animate.sh b/examples/step-25/doc/animate.sh
new file mode 100755
index 0000000..15a516d
--- /dev/null
+++ b/examples/step-25/doc/animate.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2006 - 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Author: Ivan Christov, Wolfgang Bangerth, Texas A&M University, 2006
+#
+
+dim=1
+tstep=0
+tstepinc=20
+numtsteps=6300
+batchfile='animation.plt'
+
+# optional clean up...all these files will get overwritten
+rm "solution-"$dim"d-"*".png"
+rm "solution-"$dim"d.gif"
+
+# always gotta delete this one, though
+rm $batchfile
+
+# generate the gnuplot batch script to plot all the desire time steps
+while [ $tstep -lt $numtsteps ]
+do
+    if [ $tstep -lt 10 ]; then
+	ztstep="0000"$tstep
+    elif [ $tstep -lt 100 ]; then
+	ztstep="000"$tstep
+    elif [ $tstep -lt 1000 ]; then
+	ztstep="00"$tstep
+    elif [ $tstep -lt 10000 ]; then
+	ztstep="0"$tstep
+    else
+	ztstep=$tstep
+    fi
+
+    echo "call \"plot.plt\" $dim $ztstep" >> $batchfile
+    
+    let tstep=tstep+$tstepinc
+done
+
+gnuplot -persist $batchfile
+
+# use ImageMagick to create an animated gif from the PNG files
+convert -delay 0 -loop 0 "solution-"$dim"d-*.png" "solution-"$dim"d.gif"
diff --git a/examples/step-25/doc/builds-on b/examples/step-25/doc/builds-on
new file mode 100644
index 0000000..d00969b
--- /dev/null
+++ b/examples/step-25/doc/builds-on
@@ -0,0 +1 @@
+step-24
diff --git a/examples/step-25/doc/intro.dox b/examples/step-25/doc/intro.dox
new file mode 100644
index 0000000..2921259
--- /dev/null
+++ b/examples/step-25/doc/intro.dox
@@ -0,0 +1,275 @@
+<a name="Intro"></a> <h1>Introduction</h1>
+
+This program grew out of a student project by Ivan Christov at Texas A&M
+University. Most of the work for this program is by him.
+
+The goal of this program is to solve the sine-Gordon soliton equation
+in 1, 2 or 3 spatial dimensions. The motivation for solving this
+equation is that very little is known about the nature of the
+solutions in 2D and 3D, even though the 1D case has been studied
+extensively. 
+
+Rather facetiously, the sine-Gordon equation's moniker is a pun on the
+so-called Klein-Gordon equation, which is a relativistic version of
+the Schrödinger equation for particles with non-zero mass. The resemblance is not just
+superficial, the sine-Gordon equation has been shown to model some
+unified-field phenomena such as interaction of subatomic particles
+(see, e.g., Perring & Skyrme in Nuclear Physics <b>31</b>) and the
+Josephson (quantum) effect in superconductor junctions (see, e.g., <a
+href="http://en.wikipedia.org/wiki/Long_Josephson_junction">http://en.wikipedia.org/wiki/Long_Josephson_junction</a>).
+Furthermore, from the mathematical standpoint, since the sine-Gordon
+equation is "completely integrable," it is a candidate for study using
+the usual methods such as the inverse scattering
+transform. Consequently, over the years, many interesting
+solitary-wave, and even stationary, solutions to the sine-Gordon
+equation have been found. In these solutions, particles correspond to
+localized features. For more on the sine-Gordon equation, the
+inverse scattering transform and other methods for finding analytical
+soliton equations, the reader should consult the following "classical"
+references on the subject: G. L. Lamb's <i>Elements of Soliton
+Theory</i> (Chapter 5, Section 2) and G. B. Whitham's <i>Linear and
+Nonlinear Waves</i> (Chapter 17, Sections 10-13). 
+
+<h3>Statement of the problem</h3>
+The sine-Gordon initial-boundary-value problem (IBVP) we wish to solve
+consists of the following equations:
+\f{eqnarray*}
+  u_{tt}-\Delta u &=& -\sin(u) \quad\mbox{for}\quad (x,t) \in \Omega \times (t_0,t_f],\\
+  {\mathbf n} \cdot \nabla u &=& 0 \quad\mbox{for}\quad (x,t) \in \partial\Omega 
+           \times (t_0,t_f],\\
+  u(x,t_0) &=& u_0(x).
+\f}
+It is a nonlinear equation similar to the wave equation we
+discussed in step-23 and step-24.
+We have chosen to enforce zero Neumann boundary conditions in order for waves
+to reflect off the boundaries of our domain. It should be noted, however, that
+Dirichlet boundary conditions are not appropriate for this problem. Even
+though the solutions to the sine-Gordon equation are localized, it only makes
+sense to specify (Dirichlet) boundary conditions at $x=\pm\infty$, otherwise
+either a solution does not exist or only the trivial solution $u=0$ exists. 
+
+However, the form of the equation above is not ideal for numerical
+discretization. If we were to discretize the second-order time
+derivative directly and accurately, then  we would need a large
+stencil (i.e., several time steps would need to be kept in the
+memory), which could become expensive. Therefore, in complete analogy
+to what we did in step-23 and step-24,
+we split the 
+second-order (in time) sine-Gordon equation into a system of two
+first-order (in time) equations, which we call the split, or velocity,
+formulation. To this end, by setting $v = u_t$, it is easy to see that the sine-Gordon equation is equivalent to
+\f{eqnarray*}
+  u_t - v &=& 0,\\
+  v_t - \Delta u &=& -\sin(u). 
+\f}
+
+<h3>Discretization of the equations in time</h3>
+Now, we can discretize the split formulation in time using the the
+$\theta$-method, which has a stencil of only two time steps. By
+choosing a $\theta\in [0,1]$, the latter discretization allows us to
+choose from a continuum of schemes. In particular, if we pick
+$\theta=0$ or $\theta=1$, we obtain the first-order accurate explicit
+or implicit Euler method, respectively. Another important choice is
+$\theta=\frac{1}{2}$, which gives the second-order accurate
+Crank-Nicolson scheme. Henceforth, a superscript $n$ denotes the
+values of the variables at the $n^{\mathrm{th}}$ time step, i.e. at
+$t=t_n:= n k$, where $k$ is the (fixed) time step size. Thus,
+the split formulation of the time-discretized sine-Gordon equation becomes
+\f{eqnarray*}
+  \frac{u^n - u^{n-1}}{k} - \left[\theta v^n + (1-\theta) v^{n-1}\right] &=& 0,\\
+  \frac{v^n - v^{n-1}}{k} - \Delta\left[\theta u^n + (1-\theta) u^{n-1}\right]
+  &=& -\sin\left[\theta u^n + (1-\theta) u^{n-1}\right]. 
+\f}
+
+We can simplify the latter via a bit of algebra. Eliminating $v^n$ from the first equation and rearranging, we obtain
+\f{eqnarray*}
+  \left[ 1-k^2\theta^2\Delta \right] u^n &=&
+         \left[ 1+k^2\theta(1-\theta)\Delta\right] u^{n-1} + k v^{n-1} 
+         - k^2\theta\sin\left[\theta u^n + (1-\theta) u^{n-1}\right],\\
+   v^n &=& v^{n-1} + k\Delta\left[ \theta u^n + (1-\theta) u^{n-1}\right]
+         - k\sin\left[ \theta u^n + (1-\theta) u^{n-1} \right].
+\f}
+
+It may seem as though we can just proceed to discretize the equations
+in space at this point. While this is true for the second equation
+(which is linear in $v^n$), this would not work for all $\theta$ since the
+first equation above is nonlinear. Therefore, a nonlinear solver must be
+implemented, then the equations can be discretized in space and solved. 
+
+To this end, we can use Newton's method. Given the nonlinear equation $F(u^n) = 0$, we produce successive approximations to $u^n$ as follows:
+\f{eqnarray*}
+  \mbox{ Find } \delta u^n_l \mbox{ s.t. } F'(u^n_l)\delta u^n_l = -F(u^n_l) 
+  \mbox{, set }  u^n_{l+1} = u^n_l + \delta u^n_l.
+\f}
+The iteration can be initialized with the old time step, i.e. $u^n_0 = u^{n-1}$, 
+and eventually it will produce a solution to the first equation of
+the split formulation (see above). For the time discretization of the
+sine-Gordon equation under consideration here, we have that 
+\f{eqnarray*}
+  F(u^n_l) &=&  \left[ 1-k^2\theta^2\Delta \right] u^n_l -  
+                 \left[ 1+k^2\theta(1-\theta)\Delta\right] u^{n-1} - k v^{n-1} 
+                 + k^2\theta\sin\left[\theta u^n_l + (1-\theta) u^{n-1}\right],\\
+  F'(u^n_l) &=& 1-k^2\theta^2\Delta - k^2\theta^2\cos\left[\theta u^n_l 
+                        + (1-\theta) u^{n-1}\right].
+\f}
+Notice that while $F(u^n_l)$ is a function, $F'(u^n_l)$ is an operator.
+
+<h3>Weak formulation of the time-discretized equations</h3>
+With hindsight, we choose both the solution and the test space to be $H^1(\Omega)$. Hence, multiplying by a test function $\varphi$ and integrating, we obtain the following variational (or weak) formulation of the split formulation (including the nonlinear solver for the first equation) at each time step:
+\f{eqnarray*}
+  &\mbox{ Find}& \delta u^n_l \in H^1(\Omega) \mbox{ s.t. } 
+  \left( F'(u^n_l)\delta u^n_l, \varphi \right)_{\Omega} 
+  = -\left(F(u^n_l), \varphi \right)_{\Omega} \;\forall\varphi\in H^1(\Omega),
+  \mbox{ set } u^n_{l+1} = u^n_l + \delta u^n_l,\; u^n_0 = u^{n-1}.\\
+  &\mbox{ Find}& v^n \in H^1(\Omega) \mbox{ s.t. }
+  \left( v^n, \varphi \right)_{\Omega} = \left( v^{n-1}, \varphi \right)_{\Omega} 
+         - k\theta\left( \nabla u^n, \nabla\varphi \right)_{\Omega} 
+         - k (1-\theta)\left( \nabla u^{n-1}, \nabla\varphi \right)_{\Omega} 
+         - k\left(\sin\left[ \theta u^n + (1-\theta) u^{n-1} \right],
+         \varphi \right)_{\Omega} \;\forall\varphi\in H^1(\Omega).
+\f}
+Note that the we have used integration by parts and the zero Neumann
+boundary conditions on all terms involving the Laplacian
+operator. Moreover, $F(\cdot)$ and $F'(\cdot)$ are as defined above,
+and $(\cdot,\cdot)_{\Omega}$ denotes the usual $L^2$ inner product
+over the domain $\Omega$, i.e. $(f,g)_{\Omega} = \int_\Omega fg
+\,\mathrm{d}x$. Finally, notice that the first equation is, in fact,
+the definition of an iterative procedure, so it is solved multiple
+times during each time step until a stopping criterion is met. 
+
+<h3>Discretization of the weak formulation in space</h3>
+Using the Finite Element Method, we discretize the variational
+formulation in space. To this end, let $V_h$ be a finite-dimensional
+$H^1(\Omega)$-conforming finite element space ($\mathrm{dim}\, V_h = N
+< \infty$) with nodal basis $\{\varphi_1,\ldots,\varphi_N\}$. Now,
+we can expand all functions in the weak formulation (see above) in
+terms of the nodal basis. Henceforth, we shall denote by a capital
+letter the vector of coefficients (in the nodal basis) of a function
+denoted by the same letter in lower case; e.g., $u^n = \sum_{i=1}^N
+U^n_i \varphi_i$ where $U^n \in {R}^N$ and $u^n \in
+H^1(\Omega)$. Thus, the finite-dimensional version of the variational formulation requires that we solve the following matrix equations at each time step:
+ at f{eqnarray*}
+  F_h'(U^{n,l})\delta U^{n,l} &=& -F_h(U^{n,l}), \qquad
+        U^{n,l+1} = U^{n,l} + \delta U^{n,l}, \qquad U^{n,0} = U^{n-1}; \\
+  MV^n &=& MV^{n-1} - k \theta AU^n -k (1-\theta) AU^{n-1} - k S(u^n,u^{n-1}).
+ at f}
+Above, the matrix $F_h'(\cdot)$ and the vector $F_h(\cdot)$ denote the discrete versions of the gadgets discussed above, i.e.
+\f{eqnarray*}
+  F_h(U^{n,l}) &=&  \left[ M+k^2\theta^2A \right] U^{n,l} -  
+                \left[ M-k^2\theta(1-\theta)A \right] U^{n-1} - k MV^{n-1} 
+                + k^2\theta S(u^n_l, u^{n-1}),\\
+  F_h'(U^{n,l}) &=& M+k^2\theta^2A 
+                                - k^2\theta^2N(u^n_l,u^{n-1})
+\f}
+Again, note that the first matrix equation above is, in fact, the
+definition of an iterative procedure, so it is solved multiple times
+until a stopping criterion is met. Moreover, $M$ is the mass matrix,
+i.e. $M_{ij} = \left( \varphi_i,\varphi_j \right)_{\Omega}$, $A$ is
+the Laplace matrix, i.e. $A_{ij} = \left( \nabla \varphi_i, \nabla
+\varphi_j \right)_{\Omega}$, $S$ is the nonlinear term in the
+equation that defines our auxiliary velocity variable, i.e. $S_j(f,g) = \left(
+  \sin\left[ \theta f + (1-\theta) g\right], \varphi_j \right)_{\Omega}$, and
+$N$ is the nonlinear term in the Jacobian matrix of $F(\cdot)$,
+i.e. $N_{ij}(f,g) = \left( \cos\left[ \theta f + (1-\theta) g\right]\varphi_i,
+  \varphi_j \right)_{\Omega}$.
+
+What solvers can we use for the first equation? Let's look at the matrix we
+have to invert:
+ at f[
+  (M+k^2\theta^2(A-N))_{ij} = 
+  \int_\Omega (1-k^2\theta^2 \cos \alpha) 
+  \varphi_i\varphi_j \; dx+\int_\Omega \nabla\varphi_i\nabla\varphi_j \; dx,
+ at f]
+for some $\alpha$ that depends on the present and previous solution. First,
+note that the matrix is symmetric. In addition, if the time step $k$ is small
+enough, i.e. if $k\theta<1$, then the matrix is also going to be positive
+definite. In the program below, this will always be the case, so we will use
+the Conjugate Gradient method together with the SSOR method as
+preconditioner. We should keep in mind, however, that this will fail
+if we happen to use a bigger time step. Fortunately, in that case
+the solver will just throw an exception indicating a failure to converge,
+rather than silently producing a wrong result. If that happens, then we can
+simply replace the CG method by something that can handle indefinite symmetric
+systems. The GMRES solver is typically the standard method for all "bad"
+linear systems, but it is also a slow one. Possibly better would be a solver
+that utilizes the symmetry, such as, for example, SymmLQ, which is also
+implemented in deal.II.
+
+This program uses a clever optimization over step-23 and @ref
+step_24 "step-24": If you read the above formulas closely, it becomes clear
+that the velocity $V$ only ever appears in products with the mass matrix. In
+step-23 and step-24, we were, therefore, a bit
+wasteful: in each time step, we would solve a linear system with the mass
+matrix, only to multiply the solution of that system by $M$ again in the next
+time step. This can, of course, be avoided, and we do so in this program.
+
+
+<h3>The test case</h3>
+
+There are a few analytical solutions for the sine-Gordon equation, both in 1D
+and 2D. In particular, the program as is computes the solution to a problem
+with a single kink-like solitary wave initial condition.  This solution is
+given by Leibbrandt in \e Phys. \e Rev. \e Lett. \b 41(7), and is implemented
+in the <code>ExactSolution</code> class.
+
+It should be noted that this closed-form solution, strictly speaking, only holds
+for the infinite-space initial-value problem (not the Neumann
+initial-boundary-value problem under consideration here). However, given that
+we impose \e zero Neumann boundary conditions, we expect that the solution to
+our initial-boundary-value problem would be close to the solution of the
+infinite-space initial-value problem, if reflections of waves off the
+boundaries of our domain do \e not occur. In practice, this is of course not
+the case, but we can at least assume that this were so.
+
+The constants $\vartheta$ and $\lambda$ in the 2D solution and $\vartheta$,
+$\phi$ and $\tau$ in the 3D solution are called the Bäcklund
+transformation parameters. They control such things as the orientation and
+steepness of the kink. For the purposes of testing the code against the exact
+solution, one should choose the parameters so that the kink is aligned with
+the grid.
+
+The solutions that we implement in the <code>ExactSolution</code> class are
+these:
+<ul>
+  <li>In 1D:
+  @f[
+  u(x,t) = 
+  -4 \arctan\left[
+     \frac{m}{\sqrt{1-m^2}}
+     \frac{\sin\left(\sqrt{1-m^2}t+c_2\right)}
+     {\cosh\left(mx+c_1\right)}
+     \right],
+  @f]
+  where we choose $m=\frac 12, c_1=c_2=0$.
+
+  In 1D, more interesting analytical solutions are known. Many of them are
+  listed on http://mathworld.wolfram.com/Sine-GordonEquation.html .
+
+  <li>In 2D:
+  @f[
+    u(x,y,t) = 4 \arctan \left[a_0 e^{s\xi}\right],
+  @f]
+  where $\xi$ is defined as
+  @f[
+    \xi = x \cos\vartheta + \sin(\vartheta) (y\cosh\lambda + t\sinh \lambda),
+  @f]
+  and where we choose $\vartheta=\frac \pi 4, \lambda=a_0=s=1$.
+
+  <li>In 3D:
+  @f[
+    u(x,y,z,t) = 4 \arctan \left[c_0 e^{s\xi}\right],
+  @f]
+  where $\xi$ is defined as
+  @f[
+    \xi = x \cos\vartheta + y \sin \vartheta \cos\phi +
+          \sin \vartheta \sin\phi (z\cosh\tau + t\sinh \tau),
+  @f]
+  and where we choose $\vartheta=\phi=\frac{\pi}{4}, \tau=c_1=s=1$.
+</ul>
+
+
+Since it makes it easier to play around, the <code>InitialValues</code> class
+that is used to set — surprise! — the initial values of our
+simulation simply queries the class that describes the exact solution for the
+value at the initial time, rather than duplicating the effort to implement a
+solution function. 
diff --git a/examples/step-25/doc/kind b/examples/step-25/doc/kind
new file mode 100644
index 0000000..86a44aa
--- /dev/null
+++ b/examples/step-25/doc/kind
@@ -0,0 +1 @@
+time dependent
diff --git a/examples/step-25/doc/plot.plt b/examples/step-25/doc/plot.plt
new file mode 100644
index 0000000..2c63689
--- /dev/null
+++ b/examples/step-25/doc/plot.plt
@@ -0,0 +1,20 @@
+set data style lines
+set hidden3d
+
+set xrange [-10:10]
+set yrange [-2.2:2.2]
+set zrange [-1:8]
+set xlabel "x"
+if ($0==1) set ylabel "u"
+if ($0==2) set ylabel "y"
+if ($0==2) set zlabel "u"
+
+set terminal png
+set output "solution-$0d-$1.png" 
+
+if ($0==1) pl "solution-$0d-$1.gpl"
+if ($0==2) spl "solution-$0d-$1.gpl"
+
+#set terminal x11
+#replot
+#pause 5
diff --git a/examples/step-25/doc/results.dox b/examples/step-25/doc/results.dox
new file mode 100644
index 0000000..f8e8bdf
--- /dev/null
+++ b/examples/step-25/doc/results.dox
@@ -0,0 +1,143 @@
+<h1>Results</h1>
+The explicit Euler time stepping scheme  ($\theta=0$) performs adequately for the problems we wish to solve. Unfortunately, a rather small time step has to be chosen due to stability issues --- $k\sim h/10$ appears to work for most the simulations we performed. On the other hand, the Crank-Nicolson scheme ($\theta=\frac{1}{2}$) is unconditionally stable, and (at least for the case of the 1D breather) we can pick the time step to be as large as $25h$ without any ill effects on the solutio [...]
+
+In the simulations below, we solve the sine-Gordon equation on the interval $\Omega =
+[-10,10]$ in 1D and on the square $\Omega = [-10,10]\times [-10,10]$ in 2D. In
+each case, the respective grid is refined uniformly 6 times, i.e. $h\sim
+2^{-6}$.
+
+<h3>An (1+1)-d Solution</h3>
+The first example we discuss is the so-called 1D (stationary) breather
+solution of the sine-Gordon equation. The breather has the following
+closed-form expression, as mentioned in the Introduction:
+\f[
+u_{\mathrm{breather}}(x,t) = -4\arctan \left(\frac{m}{\sqrt{1-m^2}} \frac{\sin\left(\sqrt{1-m^2}t +c_2\right)}{\cosh(mx+c_1)} \right),
+\f]
+where $c_1$, $c_2$ and $m<1$ are constants. In the simulation below, we have chosen $c_1=0$, $c_2=0$, $m=0.5$. Moreover, it is know that the period of oscillation of the breather is $2\pi\sqrt{1-m^2}$, hence we have chosen $t_0=-5.4414$ and $t_f=2.7207$ so that we can observe three oscillations of the solution. Then, taking $u_0(x) = u_{\mathrm{breather}}(x,t_0)$, $\theta=0$ and $k=h/10$, the program computed the following solution.
+
+<img src="http://www.dealii.org/images/steps/developer/step-25.1d-breather.gif" alt="Animation of the 1D stationary breather.">
+
+Though not shown how to do this in the program, another way to visualize the
+(1+1)-d solution is to use output generated by the DataOutStack class; it
+allows to "stack" the solutions of individual time steps, so that we get
+2D space-time graphs from 1D time-dependent
+solutions. This produces the space-time plot below instead of the animation
+above.
+
+<img src="http://www.dealii.org/images/steps/developer/step-25.1d-breather_stp.png" alt="A space-time plot of the 1D stationary breather.">
+
+Furthermore, since the breather is an analytical solution of the sine-Gordon
+equation, we can use it to validate our code, although we have to assume that
+the error introduced by our choice of Neumann boundary conditions is small
+compared to the numerical error. Under this assumption, one could use the
+VectorTools::integrate_difference function to compute the difference between
+the numerical solution and the function described by the
+<code>ExactSolution</code> class of this program. For the
+simulation shown in the two images above, the $L^2$ norm of the error in the
+finite element solution at each time step remained on the order of
+$10^{-2}$. Hence, we can conclude that the numerical method has been
+implemented correctly in the program.
+
+
+<h3>A few (2+1)D Solutions</h3>
+
+The only analytical solution to the sine-Gordon equation in (2+1)D that can be found in the literature is the so-called kink solitary wave. It has the following closed-form expression:
+  @f[
+    u(x,y,t) = 4 \arctan \left[a_0 e^{s\xi}\right]
+  @f]
+with
+  @f[
+    \xi = x \cos\vartheta + \sin(\vartheta) (y\cosh\lambda + t\sinh \lambda)
+  @f]
+where $a_0$, $\vartheta$ and $\lambda$ are constants. In the simulation below
+we have chosen $a_0=\lambda=1$. Notice that if $\vartheta=\pi$ the kink is
+stationary, hence it would make a good solution against which we can
+validate the program in 2D because no reflections off the boundary of the
+domain occur.
+
+The simulation shown below was performed with $u_0(x) = u_{\mathrm{kink}}(x,t_0)$, $\theta=\frac{1}{2}$, $k=20h$, $t_0=1$ and $t_f=500$. The $L^2$ norm of the error of the finite element solution at each time step remained on the order of $10^{-2}$, showing that the program is working correctly in 2D, as well as 1D. Unfortunately, the solution is not very interesting, nonetheless we have included a snapshot of it below for completeness.
+
+<img src="http://www.dealii.org/images/steps/developer/step-25.2d-kink.png" alt="Stationary 2D kink.">
+
+Now that we have validated the code in 1D and 2D, we move to a problem where the analytical solution is unknown.
+
+To this end, we rotate the kink solution discussed above about the $z$
+axis: we let  $\vartheta=\frac{\pi}{4}$. The latter results in a
+solitary wave that is not aligned with the grid, so reflections occur
+at the boundaries of the domain immediately. For the simulation shown
+below, we have taken $u_0(x)=u_{\mathrm{kink}}(x,t_0)$,
+$\theta=\frac{2}{3}$, $k=20h$, $t_0=0$ and $t_f=20$. Moreover, we had
+to pick $\theta=\frac{2}{3}$ because for any $\theta\le\frac{1}{2}$
+oscillations arose at the boundary, which are likely due to the scheme
+and not the equation, thus picking a value of $\theta$ a good bit into
+the "exponentially damped" spectrum of the time stepping schemes
+assures these oscillations are not created.
+
+<img src="http://www.dealii.org/images/steps/developer/step-25.2d-angled_kink.gif" alt="Animation of a moving 2D kink, at 45 degrees to the axes of the grid, showing boundary effects.">
+
+Another interesting solution to the sine-Gordon equation (which cannot be
+obtained analytically) can be produced by using two 1D breathers to construct
+the following separable 2D initial condition:
+\f[
+  u_0(x) =
+  u_{\mathrm{pseudobreather}}(x,t_0) =
+  16\arctan \left(
+    \frac{m}{\sqrt{1-m^2}}
+    \frac{\sin\left(\sqrt{1-m^2}t_0\right)}{\cosh(mx_1)} \right)
+  \arctan \left(
+    \frac{m}{\sqrt{1-m^2}}
+    \frac{\sin\left(\sqrt{1-m^2}t_0\right)}{\cosh(mx_2)} \right),
+\f]
+where $x=(x_1,x_2)\in{R}^2$, $m=0.5<1$ as in the 1D case we discussed
+above. For the simulation shown below, we have chosen $\theta=\frac{1}{2}$,
+$k=10h$, $t_0=-5.4414$ and $t_f=2.7207$. The solution is pretty interesting
+--- it acts like a breather (as far as the pictures are concerned); however,
+it appears to break up and reassemble, rather than just oscillate.
+
+<img src="http://www.dealii.org/images/steps/developer/step-25.2d-pseudobreather.gif" alt="Animation of a 2D pseudobreather.">
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+It is instructive to change the initial conditions. Most choices will not lead
+to solutions that stay localized (in the soliton community, such
+solutions are called "stationary", though the solution does change
+with time), but lead to solutions where the wave-like
+character of the equation dominates and a wave travels away from the location
+of a localized initial condition. For example, it is worth playing around with
+the <code>InitialValues</code> class, by replacing the call to the
+<code>ExactSolution</code> class by something like this function:
+ at f[
+  u_0(x,y) = \cos\left(\frac x2\right)\cos\left(\frac y2\right)
+ at f]
+if $|x|,|y|\le \frac\pi 2$, and $u_0(x,y)=0$ outside this region.
+
+A second area would be to investigate whether the scheme is
+energy-preserving. For the pure wave equation, discussed in @ref
+step_23 "step-23", this is the case if we choose the time stepping
+parameter such that we get the Crank-Nicolson scheme. One could do a
+similar thing here, noting that the energy in the sine-Gordon solution
+is defined as
+ at f[
+  E(t) = \frac 12 \int_\Omega \left(\frac{\partial u}{\partial
+  t}\right)^2
+  + \left(\nabla u\right)^2 + 2 (1-\cos u) \; dx.
+ at f]
+(We use $1-\cos u$ instead of $-\cos u$ in the formula to ensure that all
+contributions to the energy are positive, and so that decaying solutions have
+finite energy on unbounded domains.)
+
+Beyond this, there are two obvious areas:
+
+- Clearly, adaptivity (i.e. time-adaptive grids) would be of interest
+  to problems like these. Their complexity leads us to leave this out
+  of this program again, though the general comments in the
+  introduction of @ref step_23 "step-23" remain true.
+
+- Faster schemes to solve this problem. While computers today are
+  plenty fast enough to solve 2d and, frequently, even 3d stationary
+  problems within not too much time, time dependent problems present
+  an entirely different class of problems. We address this topic in
+  step-48 where we show how to solve this problem in parallel and
+  without assembling or inverting any matrix at all.
diff --git a/examples/step-25/doc/tooltip b/examples/step-25/doc/tooltip
new file mode 100644
index 0000000..2ba776a
--- /dev/null
+++ b/examples/step-25/doc/tooltip
@@ -0,0 +1 @@
+The nonlinear sine-Gordon soliton equation
diff --git a/examples/step-25/step-25.cc b/examples/step-25/step-25.cc
new file mode 100644
index 0000000..cf76f0e
--- /dev/null
+++ b/examples/step-25/step-25.cc
@@ -0,0 +1,757 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2006 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Ivan Christov, Wolfgang Bangerth, Texas A&M University, 2006
+ */
+
+
+// @sect3{Include files and global variables}
+
+// For an explanation of the include files, the reader should refer to the
+// example programs step-1 through step-4. They are in the standard order,
+// which is <code>base</code> -- <code>lac</code> -- <code>grid</code> --
+// <code>dofs</code> -- <code>fe</code> -- <code>numerics</code> (since each
+// of these categories roughly builds upon previous ones), then a few C++
+// headers for file input/output and string streams.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+#include <iostream>
+
+
+// The last step is as in all previous programs:
+namespace Step25
+{
+  using namespace dealii;
+
+
+  // @sect3{The <code>SineGordonProblem</code> class template}
+
+  // The entire algorithm for solving the problem is encapsulated in this
+  // class. As in previous example programs, the class is declared with a
+  // template parameter, which is the spatial dimension, so that we can solve
+  // the sine-Gordon equation in one, two or three spatial dimensions. For
+  // more on the dimension-independent class-encapsulation of the problem, the
+  // reader should consult step-3 and step-4.
+  //
+  // Compared to step-23 and step-24, there isn't anything newsworthy in the
+  // general structure of the program (though there is of course in the inner
+  // workings of the various functions!). The most notable difference is the
+  // presence of the two new functions <code>compute_nl_term</code> and
+  // <code>compute_nl_matrix</code> that compute the nonlinear contributions
+  // to the system matrix and right-hand side of the first equation, as
+  // discussed in the Introduction. In addition, we have to have a vector
+  // <code>solution_update</code> that contains the nonlinear update to the
+  // solution vector in each Newton step.
+  //
+  // As also mentioned in the introduction, we do not store the velocity
+  // variable in this program, but the mass matrix times the velocity. This is
+  // done in the <code>M_x_velocity</code> variable (the "x" is intended to
+  // stand for "times").
+  //
+  // Finally, the <code>output_timestep_skip</code> variable stores the number
+  // of time steps to be taken each time before graphical output is to be
+  // generated. This is of importance when using fine meshes (and consequently
+  // small time steps) where we would run lots of time steps and create lots
+  // of output files of solutions that look almost the same in subsequent
+  // files. This only clogs up our visualization procedures and we should
+  // avoid creating more output than we are really interested in. Therefore,
+  // if this variable is set to a value $n$ bigger than one, output is
+  // generated only every $n$th time step.
+  template <int dim>
+  class SineGordonProblem
+  {
+  public:
+    SineGordonProblem ();
+    void run ();
+
+  private:
+    void make_grid_and_dofs ();
+    void assemble_system ();
+    void compute_nl_term (const Vector<double> &old_data,
+                          const Vector<double> &new_data,
+                          Vector<double>       &nl_term) const;
+    void compute_nl_matrix (const Vector<double> &old_data,
+                            const Vector<double> &new_data,
+                            SparseMatrix<double> &nl_matrix) const;
+    unsigned int solve ();
+    void output_results (const unsigned int timestep_number) const;
+
+    Triangulation<dim>   triangulation;
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+    SparseMatrix<double> mass_matrix;
+    SparseMatrix<double> laplace_matrix;
+
+    const unsigned int n_global_refinements;
+
+    double time;
+    const double final_time, time_step;
+    const double theta;
+
+    Vector<double>       solution, solution_update, old_solution;
+    Vector<double>       M_x_velocity;
+    Vector<double>       system_rhs;
+
+    const unsigned int output_timestep_skip;
+  };
+
+
+  // @sect3{Initial conditions}
+
+  // In the following two classes, we first implement the exact solution for
+  // 1D, 2D, and 3D mentioned in the introduction to this program. This
+  // space-time solution may be of independent interest if one wanted to test
+  // the accuracy of the program by comparing the numerical against the
+  // analytic solution (note however that the program uses a finite domain,
+  // whereas these are analytic solutions for an unbounded domain). This may,
+  // for example, be done using the VectorTools::integrate_difference
+  // function. Note, again (as was already discussed in step-23), how we
+  // describe space-time functions as spatial functions that depend on a time
+  // variable that can be set and queried using the FunctionTime::set_time()
+  // and FunctionTime::get_time() member functions of the FunctionTime base
+  // class of the Function class.
+  template <int dim>
+  class ExactSolution : public Function<dim>
+  {
+  public:
+    ExactSolution (const unsigned int n_components = 1,
+                   const double time = 0.) : Function<dim>(n_components, time) {}
+    virtual double value (const Point<dim> &p,
+                          const unsigned int component = 0) const;
+  };
+
+  template <int dim>
+  double ExactSolution<dim>::value (const Point<dim> &p,
+                                    const unsigned int /*component*/) const
+  {
+    double t = this->get_time ();
+
+    switch (dim)
+      {
+      case 1:
+      {
+        const double m = 0.5;
+        const double c1 = 0.;
+        const double c2 = 0.;
+        return -4.*std::atan (m /
+                              std::sqrt(1.-m*m) *
+                              std::sin(std::sqrt(1.-m*m)*t+c2) /
+                              std::cosh(m*p[0]+c1));
+      }
+
+      case 2:
+      {
+        const double theta  = numbers::PI/4.;
+        const double lambda  = 1.;
+        const double a0  = 1.;
+        const double s   = 1.;
+        const double arg = p[0] * std::cos(theta) +
+                           std::sin(theta) *
+                           (p[1] * std::cosh(lambda) +
+                            t * std::sinh(lambda));
+        return 4.*std::atan(a0*std::exp(s*arg));
+      }
+
+      case 3:
+      {
+        double theta  = numbers::PI/4;
+        double phi = numbers::PI/4;
+        double tau = 1.;
+        double c0  = 1.;
+        double s   = 1.;
+        double arg = p[0]*std::cos(theta) +
+                     p[1]*std::sin(theta) * std::cos(phi) +
+                     std::sin(theta) * std::sin(phi) *
+                     (p[2]*std::cosh(tau)+t*std::sinh(tau));
+        return 4.*std::atan(c0*std::exp(s*arg));
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+        return -1e8;
+      }
+  }
+
+  // In the second part of this section, we provide the initial conditions. We
+  // are lazy (and cautious) and don't want to implement the same functions as
+  // above a second time. Rather, if we are queried for initial conditions, we
+  // create an object <code>ExactSolution</code>, set it to the correct time,
+  // and let it compute whatever values the exact solution has at that time:
+  template <int dim>
+  class InitialValues : public Function<dim>
+  {
+  public:
+    InitialValues (const unsigned int n_components = 1,
+                   const double time = 0.)
+      :
+      Function<dim>(n_components, time)
+    {}
+
+    virtual double value (const Point<dim> &p,
+                          const unsigned int component = 0) const;
+  };
+
+  template <int dim>
+  double InitialValues<dim>::value (const Point<dim> &p,
+                                    const unsigned int component) const
+  {
+    return ExactSolution<dim>(1, this->get_time()).value (p, component);
+  }
+
+
+
+  // @sect3{Implementation of the <code>SineGordonProblem</code> class}
+
+  // Let's move on to the implementation of the main class, as it implements
+  // the algorithm outlined in the introduction.
+
+  // @sect4{SineGordonProblem::SineGordonProblem}
+
+  // This is the constructor of the <code>SineGordonProblem</code> class. It
+  // specifies the desired polynomial degree of the finite elements,
+  // associates a <code>DoFHandler</code> to the <code>triangulation</code>
+  // object (just as in the example programs step-3 and step-4), initializes
+  // the current or initial time, the final time, the time step size, and the
+  // value of $\theta$ for the time stepping scheme. Since the solutions we
+  // compute here are time-periodic, the actual value of the start-time
+  // doesn't matter, and we choose it so that we start at an interesting time.
+  //
+  // Note that if we were to chose the explicit Euler time stepping scheme
+  // ($\theta = 0$), then we must pick a time step $k \le h$, otherwise the
+  // scheme is not stable and oscillations might arise in the solution. The
+  // Crank-Nicolson scheme ($\theta = \frac{1}{2}$) and the implicit Euler
+  // scheme ($\theta=1$) do not suffer from this deficiency, since they are
+  // unconditionally stable. However, even then the time step should be chosen
+  // to be on the order of $h$ in order to obtain a good solution. Since we
+  // know that our mesh results from the uniform subdivision of a rectangle,
+  // we can compute that time step easily; if we had a different domain, the
+  // technique in step-24 using GridTools::minimal_cell_diameter would work as
+  // well.
+  template <int dim>
+  SineGordonProblem<dim>::SineGordonProblem ()
+    :
+    fe (1),
+    dof_handler (triangulation),
+    n_global_refinements (6),
+    time (-5.4414),
+    final_time (2.7207),
+    time_step (10*1./std::pow(2.,1.*n_global_refinements)),
+    theta (0.5),
+    output_timestep_skip (1)
+  {}
+
+  // @sect4{SineGordonProblem::make_grid_and_dofs}
+
+  // This function creates a rectangular grid in <code>dim</code> dimensions
+  // and refines it several times. Also, all matrix and vector members of the
+  // <code>SineGordonProblem</code> class are initialized to their appropriate
+  // sizes once the degrees of freedom have been assembled. Like step-24, we
+  // use <code>MatrixCreator</code> functions to generate a mass matrix $M$
+  // and a Laplace matrix $A$ and store them in the appropriate variables for
+  // the remainder of the program's life.
+  template <int dim>
+  void SineGordonProblem<dim>::make_grid_and_dofs ()
+  {
+    GridGenerator::hyper_cube (triangulation, -10, 10);
+    triangulation.refine_global (n_global_refinements);
+
+    std::cout << "   Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl
+              << "   Total number of cells: "
+              << triangulation.n_cells()
+              << std::endl;
+
+    dof_handler.distribute_dofs (fe);
+
+    std::cout << "   Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl;
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit  (sparsity_pattern);
+    mass_matrix.reinit    (sparsity_pattern);
+    laplace_matrix.reinit (sparsity_pattern);
+
+    MatrixCreator::create_mass_matrix (dof_handler,
+                                       QGauss<dim>(3),
+                                       mass_matrix);
+    MatrixCreator::create_laplace_matrix (dof_handler,
+                                          QGauss<dim>(3),
+                                          laplace_matrix);
+
+    solution.reinit       (dof_handler.n_dofs());
+    solution_update.reinit     (dof_handler.n_dofs());
+    old_solution.reinit   (dof_handler.n_dofs());
+    M_x_velocity.reinit    (dof_handler.n_dofs());
+    system_rhs.reinit     (dof_handler.n_dofs());
+  }
+
+  // @sect4{SineGordonProblem::assemble_system}
+
+  // This function assembles the system matrix and right-hand side vector for
+  // each iteration of Newton's method. The reader should refer to the
+  // Introduction for the explicit formulas for the system matrix and
+  // right-hand side.
+  //
+  // Note that during each time step, we have to add up the various
+  // contributions to the matrix and right hand sides. In contrast to step-23
+  // and step-24, this requires assembling a few more terms, since they depend
+  // on the solution of the previous time step or previous nonlinear step. We
+  // use the functions <code>compute_nl_matrix</code> and
+  // <code>compute_nl_term</code> to do this, while the present function
+  // provides the top-level logic.
+  template <int dim>
+  void SineGordonProblem<dim>::assemble_system ()
+  {
+    // First we assemble the Jacobian matrix $F'_h(U^{n,l})$, where $U^{n,l}$
+    // is stored in the vector <code>solution</code> for convenience.
+    system_matrix.copy_from (mass_matrix);
+    system_matrix.add (std::pow(time_step*theta,2), laplace_matrix);
+
+    SparseMatrix<double> tmp_matrix (sparsity_pattern);
+    compute_nl_matrix (old_solution, solution, tmp_matrix);
+    system_matrix.add (-std::pow(time_step*theta,2), tmp_matrix);
+
+    // Then, we compute the right-hand side vector $-F_h(U^{n,l})$.
+    //
+    // We have to first build up the matrix
+    // $M+k^2\theta^2 A$, which we put into <code>tmp_matrix</code>
+    // use it to compute a contribution to the right hand side vector, and
+    // then build the matrix $M-k^2\theta(1-\theta) A$. We could
+    // build it in the same way as before, i.e., using code like
+    // @code
+    // tmp_matrix.copy_from (mass_matrix);
+    // tmp_matrix.add (-std::pow(time_step,2)*theta*(1-theta), laplace_matrix);
+    // @endcode
+    // but we can save the expense of the <code>copy_from</code> operation
+    // by starting from what is already in the <code>tmp_matrix</code>
+    // variable (i.e., $M+k^2\theta^2 A$) and subtracting from this
+    // $k^2\theta^2 A+k^2\theta(1-\theta) A=k^2\theta A$ when computing the
+    // second matrix:
+    system_rhs = 0;
+
+    tmp_matrix.copy_from (mass_matrix);
+    tmp_matrix.add (std::pow(time_step*theta,2), laplace_matrix);
+
+    Vector<double> tmp_vector (solution.size());
+    tmp_matrix.vmult (tmp_vector, solution);
+    system_rhs += tmp_vector;
+
+
+    tmp_matrix.add(-std::pow(time_step, 2) * theta, laplace_matrix);
+
+    tmp_matrix.vmult (tmp_vector, old_solution);
+    system_rhs -= tmp_vector;
+
+    system_rhs.add (-time_step, M_x_velocity);
+
+    compute_nl_term (old_solution, solution, tmp_vector);
+    system_rhs.add (std::pow(time_step,2)*theta, tmp_vector);
+
+    system_rhs *= -1;
+  }
+
+  // @sect4{SineGordonProblem::compute_nl_term}
+
+  // This function computes the vector $S(\cdot,\cdot)$, which appears in the
+  // nonlinear term in the both equations of the split formulation. This
+  // function not only simplifies the repeated computation of this term, but
+  // it is also a fundamental part of the nonlinear iterative solver that we
+  // use when the time stepping is implicit (i.e. $\theta\ne 0$). Moreover, we
+  // must allow the function to receive as input an "old" and a "new"
+  // solution. These may not be the actual solutions of the problem stored in
+  // <code>old_solution</code> and <code>solution</code>, but are simply the
+  // two functions we linearize about. For the purposes of this function, let
+  // us call the first two arguments $w_{\mathrm{old}}$ and $w_{\mathrm{new}}$
+  // in the documentation of this class below, respectively.
+  //
+  // As a side-note, it is perhaps worth investigating what order quadrature
+  // formula is best suited for this type of integration. Since $\sin(\cdot)$
+  // is not a polynomial, there are probably no quadrature formulas that can
+  // integrate these terms exactly. It is usually sufficient to just make sure
+  // that the right hand side is integrated up to the same order of accuracy
+  // as the discretization scheme is, but it may be possible to improve on the
+  // constant in the asymptotic statement of convergence by choosing a more
+  // accurate quadrature formula.
+  template <int dim>
+  void SineGordonProblem<dim>::compute_nl_term (const Vector<double> &old_data,
+                                                const Vector<double> &new_data,
+                                                Vector<double>       &nl_term) const
+  {
+    nl_term = 0;
+    const QGauss<dim> quadrature_formula (3);
+    FEValues<dim>     fe_values (fe, quadrature_formula,
+                                 update_values |
+                                 update_JxW_values |
+                                 update_quadrature_points);
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_q_points    = quadrature_formula.size();
+
+    Vector<double> local_nl_term (dofs_per_cell);
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    std::vector<double> old_data_values (n_q_points);
+    std::vector<double> new_data_values (n_q_points);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        local_nl_term = 0;
+        // Once we re-initialize our <code>FEValues</code> instantiation to
+        // the current cell, we make use of the
+        // <code>get_function_values</code> routine to get the values of the
+        // "old" data (presumably at $t=t_{n-1}$) and the "new" data
+        // (presumably at $t=t_n$) at the nodes of the chosen quadrature
+        // formula.
+        fe_values.reinit (cell);
+        fe_values.get_function_values (old_data, old_data_values);
+        fe_values.get_function_values (new_data, new_data_values);
+
+        // Now, we can evaluate $\int_K \sin\left[\theta w_{\mathrm{new}} +
+        // (1-\theta) w_{\mathrm{old}}\right] \,\varphi_j\,\mathrm{d}x$ using
+        // the desired quadrature formula.
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            local_nl_term(i) += (std::sin(theta * new_data_values[q_point] +
+                                          (1-theta) * old_data_values[q_point]) *
+                                 fe_values.shape_value (i, q_point) *
+                                 fe_values.JxW (q_point));
+
+        // We conclude by adding up the contributions of the integrals over
+        // the cells to the global integral.
+        cell->get_dof_indices (local_dof_indices);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          nl_term(local_dof_indices[i]) += local_nl_term(i);
+      }
+  }
+
+  // @sect4{SineGordonProblem::compute_nl_matrix}
+
+  // This is the second function dealing with the nonlinear scheme. It
+  // computes the matrix $N(\cdot,\cdot)$, which appears in the nonlinear
+  // term in the Jacobian of $F(\cdot)$. Just as <code>compute_nl_term</code>,
+  // we must allow this function to receive as input an "old" and a "new"
+  // solution, which we again call $w_{\mathrm{old}}$ and $w_{\mathrm{new}}$
+  // below, respectively.
+  template <int dim>
+  void SineGordonProblem<dim>::compute_nl_matrix (const Vector<double> &old_data,
+                                                  const Vector<double> &new_data,
+                                                  SparseMatrix<double> &nl_matrix) const
+  {
+    QGauss<dim>   quadrature_formula (3);
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values | update_JxW_values | update_quadrature_points);
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double> local_nl_matrix (dofs_per_cell, dofs_per_cell);
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    std::vector<double> old_data_values (n_q_points);
+    std::vector<double> new_data_values (n_q_points);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        local_nl_matrix = 0;
+        // Again, first we re-initialize our <code>FEValues</code>
+        // instantiation to the current cell.
+        fe_values.reinit (cell);
+        fe_values.get_function_values (old_data, old_data_values);
+        fe_values.get_function_values (new_data, new_data_values);
+
+        // Then, we evaluate $\int_K \cos\left[\theta w_{\mathrm{new}} +
+        // (1-\theta) w_{\mathrm{old}}\right]\, \varphi_i\,
+        // \varphi_j\,\mathrm{d}x$ using the desired quadrature formula.
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              local_nl_matrix(i,j) += (std::cos(theta * new_data_values[q_point] +
+                                                (1-theta) * old_data_values[q_point]) *
+                                       fe_values.shape_value (i, q_point) *
+                                       fe_values.shape_value (j, q_point) *
+                                       fe_values.JxW (q_point));
+
+        // Finally, we add up the contributions of the integrals over the
+        // cells to the global integral.
+        cell->get_dof_indices (local_dof_indices);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            nl_matrix.add(local_dof_indices[i], local_dof_indices[j],
+                          local_nl_matrix(i,j));
+      }
+  }
+
+
+
+  // @sect4{SineGordonProblem::solve}
+
+  // As discussed in the Introduction, this function uses the CG iterative
+  // solver on the linear system of equations resulting from the finite
+  // element spatial discretization of each iteration of Newton's method for
+  // the (nonlinear) first equation of the split formulation. The solution to
+  // the system is, in fact, $\delta U^{n,l}$ so it is stored in
+  // <code>solution_update</code> and used to update <code>solution</code> in
+  // the <code>run</code> function.
+  //
+  // Note that we re-set the solution update to zero before solving for
+  // it. This is not necessary: iterative solvers can start from any point and
+  // converge to the correct solution. If one has a good estimate about the
+  // solution of a linear system, it may be worthwhile to start from that
+  // vector, but as a general observation it is a fact that the starting point
+  // doesn't matter very much: it has to be a very, very good guess to reduce
+  // the number of iterations by more than a few. It turns out that for this
+  // problem, using the previous nonlinear update as a starting point actually
+  // hurts convergence and increases the number of iterations needed, so we
+  // simply set it to zero.
+  //
+  // The function returns the number of iterations it took to converge to a
+  // solution. This number will later be used to generate output on the screen
+  // showing how many iterations were needed in each nonlinear iteration.
+  template <int dim>
+  unsigned int
+  SineGordonProblem<dim>::solve ()
+  {
+    SolverControl solver_control (1000, 1e-12*system_rhs.l2_norm());
+    SolverCG<> cg (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    cg.solve (system_matrix, solution_update,
+              system_rhs,
+              preconditioner);
+
+    return solver_control.last_step();
+  }
+
+  // @sect4{SineGordonProblem::output_results}
+
+  // This function outputs the results to a file. It is pretty much identical
+  // to the respective functions in step-23 and step-24:
+  template <int dim>
+  void
+  SineGordonProblem<dim>::output_results (const unsigned int timestep_number) const
+  {
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "u");
+    data_out.build_patches ();
+
+    const std::string filename =  "solution-" +
+                                  Utilities::int_to_string (timestep_number, 3) +
+                                  ".vtk";
+
+    std::ofstream output (filename.c_str());
+    data_out.write_vtk (output);
+  }
+
+  // @sect4{SineGordonProblem::run}
+
+  // This function has the top-level control over everything: it runs the
+  // (outer) time-stepping loop, the (inner) nonlinear-solver loop, and
+  // outputs the solution after each time step.
+  template <int dim>
+  void SineGordonProblem<dim>::run ()
+  {
+    make_grid_and_dofs ();
+
+    // To acknowledge the initial condition, we must use the function $u_0(x)$
+    // to compute $U^0$. To this end, below we will create an object of type
+    // <code>InitialValues</code>; note that when we create this object (which
+    // is derived from the <code>Function</code> class), we set its internal
+    // time variable to $t_0$, to indicate that the initial condition is a
+    // function of space and time evaluated at $t=t_0$.
+    //
+    // Then we produce $U^0$ by projecting $u_0(x)$ onto the grid using
+    // <code>VectorTools::project</code>. We have to use the same construct
+    // using hanging node constraints as in step-21: the VectorTools::project
+    // function requires a hanging node constraints object, but to be used we
+    // first need to close it:
+    {
+      ConstraintMatrix constraints;
+      constraints.close();
+      VectorTools::project (dof_handler,
+                            constraints,
+                            QGauss<dim>(3),
+                            InitialValues<dim> (1, time),
+                            solution);
+    }
+
+    // For completeness, we output the zeroth time step to a file just like
+    // any other other time step.
+    output_results (0);
+
+    // Now we perform the time stepping: at every time step we solve the
+    // matrix equation(s) corresponding to the finite element discretization
+    // of the problem, and then advance our solution according to the time
+    // stepping formulas we discussed in the Introduction.
+    unsigned int timestep_number = 1;
+    for (time+=time_step; time<=final_time; time+=time_step, ++timestep_number)
+      {
+        old_solution = solution;
+
+        std::cout << std::endl
+                  << "Time step #" << timestep_number << "; "
+                  << "advancing to t = " << time << "."
+                  << std::endl;
+
+        // At the beginning of each time step we must solve the nonlinear
+        // equation in the split formulation via Newton's method ---
+        // i.e. solve for $\delta U^{n,l}$ then compute $U^{n,l+1}$ and so
+        // on. The stopping criterion for this nonlinear iteration is that
+        // $\|F_h(U^{n,l})\|_2 \le 10^{-6} \|F_h(U^{n,0})\|_2$. Consequently,
+        // we need to record the norm of the residual in the first iteration.
+        //
+        // At the end of each iteration, we output to the console how many
+        // linear solver iterations it took us. When the loop below is done,
+        // we have (an approximation of) $U^n$.
+        double initial_rhs_norm = 0.;
+        bool first_iteration = true;
+        do
+          {
+            assemble_system ();
+
+            if (first_iteration == true)
+              initial_rhs_norm = system_rhs.l2_norm();
+
+            const unsigned int n_iterations
+              = solve ();
+
+            solution += solution_update;
+
+            if (first_iteration == true)
+              std::cout << "    " << n_iterations;
+            else
+              std::cout << '+' << n_iterations;
+            first_iteration = false;
+          }
+        while (system_rhs.l2_norm() > 1e-6 * initial_rhs_norm);
+
+        std::cout << " CG iterations per nonlinear step."
+                  << std::endl;
+
+        // Upon obtaining the solution to the first equation of the problem at
+        // $t=t_n$, we must update the auxiliary velocity variable
+        // $V^n$. However, we do not compute and store $V^n$ since it is not a
+        // quantity we use directly in the problem. Hence, for simplicity, we
+        // update $MV^n$ directly:
+        Vector<double> tmp_vector (solution.size());
+        laplace_matrix.vmult (tmp_vector, solution);
+        M_x_velocity.add (-time_step*theta, tmp_vector);
+
+        laplace_matrix.vmult (tmp_vector, old_solution);
+        M_x_velocity.add (-time_step*(1-theta), tmp_vector);
+
+        compute_nl_term (old_solution, solution, tmp_vector);
+        M_x_velocity.add (-time_step, tmp_vector);
+
+        // Oftentimes, in particular for fine meshes, we must pick the time
+        // step to be quite small in order for the scheme to be
+        // stable. Therefore, there are a lot of time steps during which
+        // "nothing interesting happens" in the solution. To improve overall
+        // efficiency -- in particular, speed up the program and save disk
+        // space -- we only output the solution every
+        // <code>output_timestep_skip</code> time steps:
+        if (timestep_number % output_timestep_skip == 0)
+          output_results (timestep_number);
+      }
+  }
+}
+
+// @sect3{The <code>main</code> function}
+
+// This is the main function of the program. It creates an object of top-level
+// class and calls its principal function. If exceptions are thrown during the
+// execution of the run method of the <code>SineGordonProblem</code> class, we
+// catch and report them here. For more information about exceptions the
+// reader should consult step-6.
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step25;
+
+      SineGordonProblem<1> sg_problem;
+      sg_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-26/CMakeLists.txt b/examples/step-26/CMakeLists.txt
new file mode 100644
index 0000000..8f7a650
--- /dev/null
+++ b/examples/step-26/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-26 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-26")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-26/doc/builds-on b/examples/step-26/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-26/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-26/doc/intro.dox b/examples/step-26/doc/intro.dox
new file mode 100644
index 0000000..6168442
--- /dev/null
+++ b/examples/step-26/doc/intro.dox
@@ -0,0 +1,439 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{29,30}
+(@dealiiVideoLectureSeeAlso{31.7})
+
+
+This program implements the heat equation
+ at f{align*}
+  \frac{\partial u(\mathbf x, t)}{\partial t}
+  -
+  \Delta u(\mathbf x, t)
+  &=
+  f(\mathbf x, t),
+  \qquad\qquad &&
+  \forall \mathbf x \in \Omega, t\in (0,T), 
+  \\
+  u(\mathbf x, 0) &= u_0(\mathbf x) &&
+  \forall \mathbf x \in \Omega, \\
+  \\
+  u(\mathbf x, t) &= g(\mathbf x,t) &&
+  \forall \mathbf x \in \partial\Omega, t \in (0,T).
+ at f}
+In some sense, this equation is simpler than the ones we have discussed in the
+preceding programs step-23, step-24, step-25, namely the wave equation. This
+is due to the fact that the heat equation smoothes out the solution over time,
+and is consequently more forgiving in many regards. For example, when using
+implicit time stepping methods, we can actually take large time steps, we have
+less trouble with the small disturbances we introduce through adapting the
+mesh every few time steps, etc.
+
+Our goal here will be to solve the equations above using the theta-scheme that
+discretizes the equation in time using the following approach, where we would
+like $u^n(\mathbf x)$ to approximate $u(\mathbf x, t_n)$ at some time $t_n$:
+ at f{align*}
+  \frac{u^n(\mathbf x)-u^{n-1}(\mathbf x)}{k_n}
+  -
+  \left[
+  (1-\theta)\Delta u^{n-1}(\mathbf x)
+  +
+  \theta\Delta u^n(\mathbf x)
+  \right]
+  &=
+  \left[
+  (1-\theta)f(\mathbf x, t_{n-1})
+  +
+  \theta f(\mathbf x, t_n)
+  \right].  
+ at f}
+Here, $k_n=t_n-t_{n-1}$ is the time step size. The theta-scheme generalizes
+the explicit Euler ($\theta=0$), implicit Euler ($\theta=1$) and
+Crank-Nicolson ($\theta=\frac 12$) time discretizations. Since the latter has
+the highest convergence order, we will choose $\theta=\frac 12$ in the program
+below, but make it so that playing with this parameter remains simple. (If you
+are interested in playing with higher order methods, take a look at step-52.)
+
+Given this time discretization, space discretization happens as it always
+does, by multiplying with test functions, integrating by parts, and then
+restricting everything to a finite dimensional subspace. This yields the
+following set of fully discrete equations after multiplying through with
+$k_n$:
+ at f{align*}
+  M U^n-MU^{n-1}
+  +
+  k_n \left[
+  (1-\theta)A U^{n-1}
+  +
+  \theta A U^n
+  \right]
+  &=
+  k_n
+  \left[
+  (1-\theta)F^{n-1}
+  +
+  \theta F^n
+  \right],
+ at f}
+where $M$ is the mass matrix and $A$ is the stiffness matrix that results from
+discretizing the Laplacian. Bringing all known quantities to the right hand
+side yields the linear system we have to solve in every step:
+ at f{align*}
+  (M
+  +
+  k_n \theta A) U^n
+  &=
+  MU^{n-1}
+  -
+  k_n
+  (1-\theta)A U^{n-1}
+  +
+  k_n
+  \left[
+  (1-\theta)F^{n-1}
+  +
+  \theta F^n
+  \right].
+ at f}
+The linear system on the left hand side is symmetric and positive definite, so
+we should have no trouble solving it with the Conjugate Gradient method.
+
+We can start the iteration above if we have the set of nodal coefficients
+$U^0$ at the initial time. Here, we take the ones we get by interpolating the
+initial values $u_0(\mathbf x)$ onto the mesh used for the first time step. We
+will also need to choose a time step; we will here just choose it as fixed,
+but clearly advanced simulators will want to choose it adaptively. We will
+briefly come back to this in the <a href="#Results">results section
+below</a>.
+
+
+<h3> Adapting meshes for time dependent problems </h3>
+
+When solving the wave equation and its variants in the previous few programs,
+we kept the mesh fixed. Just as for stationary equations, one can make a good
+case that this is not the smartest approach and that significant savings can
+be had by adapting the mesh. There are, however, significant difficulties
+compared to the stationary case. Let us go through them in turn:
+
+<ul>
+  <li><i>Time step size and minimal mesh size</i>: For stationary problems, the
+  general approach is "make the mesh as fine as it is necessary". For problems
+  with singularities, this often leads to situations where we get many levels
+  of refinement into corners or along interfaces. The very first tutorial to
+  use adaptive meshes, step-6, is a point in case already.
+
+  However, for time dependent problems, we typically need to choose the time
+  step related to the mesh size. For explicit time discretizations, this is
+  obvious, since we need to respect a CFL condition that ties the time step
+  size to the smallest mesh size. For implicit time discretizations, no such
+  hard restriction exists, but in practice we still want to make the time step
+  smaller if we make the mesh size smaller since we typically have error
+  estimates of the form $\|e\| \le {\cal O}(k^p + h^q)$ where $p,q$ are the
+  convergence orders of the time and space discretization, respectively. We
+  can only make the error small if we decrease both terms. Ideally, an
+  estimate like this would suggest to choose $k \propto h^{q/p}$. Because, at
+  least for problems with non-smooth solutions, the error is typically
+  localized in the cells with the smallest mesh size, we have to indeed choose
+  $k \propto h_{\text{min}}^{q/p}$, using the <i>smallest</i> mesh size.
+
+  The consequence is that refining the mesh further in one place implies not
+  only the moderate additional effort of increasing the number of degrees of
+  freedom slightly, but also the much larger effort of having the solve the
+  <i>global</i> linear system more often because of the smaller time step.
+
+  In practice, one typically deals with this by acknowledging that we can not
+  make the time step arbitrarily small, and consequently can not make the
+  local mesh size arbitrarily small. Rather, we set a maximal level of
+  refinement and when we flag cells for refinement, we simply do not refine
+  those cells whose children would exceed this maximal level of refinement.
+
+  There is a similar problem in that we will choose a right hand side that
+  will switch on in different parts of the domain at different times. To avoid
+  being caught flat footed with too coarse a mesh in areas where we suddenly
+  need a finer mesh, we will also enforce in our program a <i>minimal</i> mesh
+  refinement level.
+
+  <li><i>Test functions from different meshes</i>: Let us consider again the
+  semi-discrete equations we have written down above:
+  @f{align*}
+    \frac{u^n(\mathbf x)-u^{n-1}(\mathbf x)}{k_n}
+    -
+    \left[
+    (1-\theta)\Delta u^{n-1}(\mathbf x)
+    +
+    \theta\Delta u^n(\mathbf x)
+    \right]
+    &=
+    \left[
+    (1-\theta)f(\mathbf x, t_{n-1})
+    +
+    \theta f(\mathbf x, t_n)
+    \right].  
+  @f}
+  We can here consider $u^{n-1}$ as data since it has presumably been computed
+  before. Now, let us replace 
+  @f{align*}
+    u^n(\mathbf x)\approx u_h^n(\mathbf x) 
+    =
+    \sum_j U^n \varphi_j(\mathbf x),
+  @f}
+  multiply with test functions $\varphi_i(\mathbf x)$ and integrate by parts
+  where necessary. In a process as outlined above, this would yield
+  @f{align*}
+    \sum_j
+    (M
+    +
+    k_n \theta A)_{ij} U^n_j
+    &=
+    (\varphi_i, u_h^{n-1})
+    -
+    k_n
+    (1-\theta)(\nabla \varphi_i, \nabla u_h^{n-1})
+    +
+    k_n
+    \left[
+    (1-\theta)F^{n-1}
+    +
+    \theta F^n
+    \right].
+  @f}
+  Now imagine that we have changed the mesh between time steps $n-1$ and
+  $n$. Then the problem is that the basis functions we use for $u_h^n$ and
+  $u^{n-1}$ are different! This pertains to the terms on the right hand side,
+  the first of which we could more clearly write as (the second follows the
+  same pattern)
+  @f{align*}
+    (\varphi_i, u_h^{n-1})
+    =
+    (\varphi_i^n, u_h^{n-1})
+    =
+    \sum_{j=1}^{N_{n-1}}
+    (\varphi_i^n, \varphi_j^{n-1}) U^{n-1}_j,
+    \qquad\qquad
+    i=1\ldots N_n.
+  @f}
+  If the meshes used in these two time steps are the same, then
+  $(\varphi_i^n, \varphi_j^{n-1})$ forms a square mass matrix
+  $M_{ij}$. However, if the meshes are not the same, then in general the matrix
+  is rectangular. Worse, it is difficult to even compute these integrals
+  because if we loop over the cells of the mesh at time step $n$, then we need
+  to evaluate $\varphi_j^{n-1}$ at the quadrature points of these cells, but
+  they do not necessarily correspond to the cells of the mesh at time step
+  $n-1$ and $\varphi_j^{n-1}$ is not defined via these cells; the same of
+  course applies if we wanted to compute the integrals via integration on the
+  cells of mesh $n-1$.
+
+  In any case, what we have to face is a situation where we need to integrate
+  shape functions defined on two different meshes. This can be done, and is in
+  fact demonstrated in step-28, but the process is at best described by the
+  word "awkward".
+
+  In practice, one does not typically want to do this. Rather, we avoid the
+  whole situation by interpolating the solution from the old to the new mesh
+  every time we adapt the mesh. In other words, rather than solving the
+  equations above, we instead solve the problem
+  @f{align*}
+    \sum_j
+    (M
+    +
+    k_n \theta A)_{ij} U^n_j
+    &=
+    (\varphi_i, I_h^n u_h^{n-1})
+    -
+    k_n
+    (1-\theta)(\nabla \varphi_i, \nabla I_h^n u_h^{n-1})
+    +
+    k_n
+    \left[
+    (1-\theta)F^{n-1}
+    +
+    \theta F^n
+    \right],
+  @f}
+  where $I_h^n$ is the interpolation operator onto the finite element space
+  used in time step $n$. This is not the optimal approach since it introduces
+  an additional error besides time and space discretization, but it is a
+  pragmatic one that makes it feasible to do time adapting meshes.
+</ul>
+
+
+
+<h3> What could possibly go wrong? Verifying whether the code is correct </h3>
+
+There are a number of things one can typically get wrong when implementing a
+finite element code. In particular, for time dependent problems, the following
+are common sources of bugs:
+- The time integration, for example by getting the coefficients in front of
+  the terms involving the current and previous time steps wrong (e.g., mixing
+  up a factor $\theta$ for $1-\theta$).
+- Handling the right hand side, for example forgetting a factor of $k_n$ or
+  $\theta$.
+- Mishandling the boundary values, again for example forgetting a factor of
+  $k_n$ or $\theta$, or forgetting to apply nonzero boundary values not only
+  to the right hand side but also to the system matrix.
+
+A less common problem is getting the initial conditions wrong because one can
+typically see that it is wrong by just outputting the first time step. In any
+case, in order to verify the correctness of the code, it is helpful to have a
+testing protocol that allows us to verify each of these components
+separately. This means:
+- Testing the code with nonzero initial conditions but zero right hand side
+  and boundary values and verifying that the time evolution is correct.
+- Then testing with zero initial conditions and boundary values but nonzero
+  right hand side and again ensuring correctness.
+- Finally, testing with zero initial conditions and right hand side but
+  nonzero boundary values.
+
+This sounds complicated, but fortunately, for linear partial differential
+equations without coefficients (or constant coefficients) like the one here,
+there is a fairly standard protocol that rests on the following observation:
+if you choose as your domain a square $[0,1]^2$ (or, with slight
+modifications, a rectangle), then the exact solution can be written as
+ at f{align*}
+  u(x,y,t) = a(t) \sin(n_x \pi x) \sin(n_y \pi y)
+ at f}
+(with integer constants $n_x,n_y$)
+if only the initial condition, right hand side and boundary values are all
+of the form $\sin(n_x \pi x) \sin(n_y \pi y)$ as well. This is due to the fact
+that the function $\sin(n_x \pi x) \sin(n_y \pi y)$ is an eigenfunction of the
+Laplace operator and allows us to compute things like the time factor $a(t)$
+analytically and, consequently, compare with what we get numerically.
+
+As an example, let us consider the situation where we have
+$u_0(x,y)=\sin(n_x \pi x) \sin(n_x \pi y)$ and
+$f(x,y,t)=0$. With the claim (ansatz) of the form for
+$u(x,y,t)$ above, we get that
+ at f{align*}
+  \left(\frac{\partial}{\partial t} -\Delta\right)
+  u(x,y,t)
+  &=
+  \left(\frac{\partial}{\partial t} -\Delta\right)
+  a(t) \sin(n_x \pi x) \sin(n_y \pi y)
+  \\
+  &=
+  \left(a'(t) + (n_x^2+n_y^2)\pi^2 a(t) \right) \sin(n_x \pi x) \sin(n_y \pi y).
+ at f}
+For this to be equal to $f(x,y,t)=0$, we need that
+ at f{align*}
+  a'(t) + (n_x^2+n_y^2)\pi^2 a(t) = 0
+ at f}
+and due to the initial conditions, $a(0)=1$. This differential equation can be
+integrated to yield
+ at f{align*}
+  a(t) = - e^{-(n_x^2+n_y^2)\pi^2 t}.
+ at f}
+In other words, if the initial condition is a product of sines, then the
+solution has exactly the same shape of a product of sines that decays to zero
+with a known time dependence. This is something that is easy to test if you
+have a sufficiently fine mesh and sufficiently small time step.
+
+What is typically going to happen if you get the time integration scheme wrong
+(e.g., by having the wrong factors of $\theta$ or $k$ in front of the various
+terms) is that you don't get the right temporal behavior of the
+solution. Double check the various factors until you get the right
+behavior. You may also want to verify that the temporal decay rate (as
+determined, for example, by plotting the value of the solution at a fixed
+point) does not double or halve each time you double or halve the time step or
+mesh size. You know that it's not the handling of the
+boundary conditions or right hand side because these were both zero.
+
+If you have so verified that the time integrator is correct, take the
+situation where the right hand side is nonzero but the initial conditions are
+zero: $u_0(x,y)=0$ and
+$f(x,y,t)=\sin(n_x \pi x) \sin(n_x \pi y)$. Again,
+ at f{align*}
+  \left(\frac{\partial}{\partial t} -\Delta\right)
+  u(x,y,t)
+  &=
+  \left(\frac{\partial}{\partial t} -\Delta\right)
+  a(t) \sin(n_x \pi x) \sin(n_y \pi y)
+  \\
+  &=
+  \left(a'(t) + (n_x^2+n_y^2)\pi^2 a(t) \right) \sin(n_x \pi x) \sin(n_y \pi y),
+ at f}
+and for this to be equal to $f(x,y,t)$, we need that
+ at f{align*}
+  a'(t) + (n_x^2+n_y^2)\pi^2 a(t) = 1
+ at f}
+and due to the initial conditions, $a(0)=0$. Integrating this equation in time
+yields
+ at f{align*}
+  a(t) = \frac{1}{(n_x^2+n_y^2)\pi^2} \left[ 1 - e^{-(n_x^2+n_y^2)\pi^2 t} \right].
+ at f}
+
+Again, if you have the wrong factors of $\theta$ or $k$ in front of the right
+hand side terms you will either not get the right temporal behavior of the
+solution, or it will converge to a maximum value other than
+$\frac{1}{(n_x^2+n_y^2)\pi^2}$.
+
+Once we have verified that the time integration and right hand side handling
+are correct using this scheme, we can go on to verifying that we have the
+boundary values correct, using a very similar approach.
+
+
+
+<h3> The testcase </h3>
+
+Solving the heat equation on a simple domain with a simple right hand side
+almost always leads to solutions that are exceedingly boring, since they
+become very smooth very quickly and then do not move very much any
+more. Rather, we here solve the equation on the L-shaped domain with zero
+Dirichlet boundary values and zero initial conditions, but as right hand side
+we choose
+ at f{align*}
+  f(\mathbf x, t)
+  =
+  \left\{
+  \begin{array}{ll}
+    \chi_1(\mathbf x)
+    & \text{if \(0\le t \le 0.2\tau\) or \(\tau\le t \le 1.2\tau\) or \(2\tau\le t
+    \le 2.2\tau\), etc}
+    \\
+    \chi_2(\mathbf x)
+    & \text{if \(0.5\le t \le 0.7\tau\) or \(1.5\tau\le t \le 1.7\tau\) or \(2.5\tau\le t
+    \le 2.7\tau\), etc}
+    \\
+    0
+    & \text{otherwise}
+  \end{array}
+  \right.
+ at f}
+Here,
+ at f{align*}
+  \chi_1(\mathbf x) &= 
+  \left\{
+  \begin{array}{ll}
+    1
+    & \text{if \(x>0.5\) and \(y>-0.5\)}
+    \\
+    0
+    & \text{otherwise}
+  \end{array}
+  \right.
+  \\
+  \chi_2(\mathbf x) &= 
+  \left\{
+  \begin{array}{ll}
+    1
+    & \text{if \(x>-0.5\) and \(y>0.5\)}
+    \\
+    0
+    & \text{otherwise}
+  \end{array}
+  \right.
+ at f}
+In other words, in every period of lenght $\tau$, the right hand side first
+flashes on in domain 1, then off completely, then on in domain 2, then off
+completely again. This pattern is probably best observed via the little
+animation of the solution shown in the <a href="#Results">results
+section</a>.
+
+If you interpret the heat equation as finding the spatially and temporally
+variable temperature distribution of a conducting solid, then the test case
+above corresponds to an L-shaped body where we keep the boundary at zero
+temperature, and heat alternatingly in two parts of the domain. While heating
+is in effect, the temperature rises in these places, after which it diffuses
+and diminishes again. The point of these initial conditions is that they
+provide us with a solution that has singularities both in time (when sources
+switch on and off) as well as time (at the reentrant corner as well as at the
+edges and corners of the regions where the source acts).
diff --git a/examples/step-26/doc/kind b/examples/step-26/doc/kind
new file mode 100644
index 0000000..86a44aa
--- /dev/null
+++ b/examples/step-26/doc/kind
@@ -0,0 +1 @@
+time dependent
diff --git a/examples/step-26/doc/results.dox b/examples/step-26/doc/results.dox
new file mode 100644
index 0000000..92c5396
--- /dev/null
+++ b/examples/step-26/doc/results.dox
@@ -0,0 +1,122 @@
+<h1>Results</h1>
+
+As in many of the tutorials, the actual output of the program matters less
+than how we arrived there. Nonetheless, here it is:
+ at code
+===========================================
+Number of active cells: 48
+Number of degrees of freedom: 65
+
+Time step 1 at t=0.002
+     7 CG iterations.
+
+===========================================
+Number of active cells: 60
+Number of degrees of freedom: 81
+
+
+Time step 1 at t=0.002
+     7 CG iterations.
+
+===========================================
+Number of active cells: 105
+Number of degrees of freedom: 136
+
+
+Time step 1 at t=0.002
+     7 CG iterations.
+
+[...]
+
+Time step 249 at t=0.498
+     13 CG iterations.
+Time step 250 at t=0.5
+     14 CG iterations.
+
+===========================================
+Number of active cells: 1803
+Number of degrees of freedom: 2109
+ at endcode
+
+Maybe of more interest is a visualization of the solution and the mesh on which
+it was computed:
+
+<img src="http://www.dealii.org/images/steps/developer/step-26.movie.gif" alt="Animation of the solution of step 26.">
+
+The movie shows how the two sources switch on and off and how the mesh reacts
+to this. It is quite obvious that the mesh as is is probably not the best we
+could come up with. We'll get back to this in the next section.
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+There are at least two areas where one can improve this program significantly:
+adaptive time stepping and a better choice of the mesh.
+
+<h4>Adaptive time stepping</h4>
+
+Having chosen an implicit time stepping scheme, we are not bound by any
+CFL-like condition on the time step. Furthermore, because the time scales on
+which change happens on a given cell in the heat equation are not bound to the
+cells diameter (unlike the case with the wave equation, where we had a fixed
+speed of information transport that couples the temporal and spatial scales),
+we can choose the time step as we please. Or, better, choose it as we deem
+necessary for accuracy.
+
+Looking at the solution, it is clear that the action does not happen uniformly
+over time: a lot is changing around the time we switch on a source, things
+become less dramatic once a source is on for a little while, and we enter a
+long phase of decline when both sources are off. During these times, we could
+surely get away with a larger time step than before without sacrificing too
+much accuracy.
+
+The literature has many suggestions on how to choose the time step size
+adaptively. Much can be learned, for example, from the way ODE solvers choose
+their time steps. One can also be inspired by a posteriori error estimators
+that can, ideally, be written in a way that the consist of a temporal and a
+spatial contribution to the overall error. If the temporal one is too large,
+we should choose a smaller time step. Ideas in this direction can be found,
+for example, in the PhD thesis of a former principal developer of deal.II,
+Ralf Hartmann, published by the University of Heidelberg, Germany, in 2002.
+
+
+<h4>Better time stepping methods</h4>
+
+We here use one of the simpler time stepping methods, namely the second order
+in time Crank-Nicolson method. However, more accurate methods such as
+Runge-Kutta methods are available and should be used as they do not represent
+much additional effort. It is not difficult to implement this for the current
+program, but a more systematic treatment is also given in step-52.
+
+
+<h4>Better refinement criteria</h4>
+
+If you look at the meshes in the movie above, it is clear that they are not
+particularly well suited to the task at hand. In fact, they look rather
+random. 
+
+There are two factors at play. First, there are some islands where cells
+have been refined but that are surrounded by non-refined cells (and there
+are probably also a few occasional coarsened islands). These are not terrible,
+as they most of the time do not affect the approximation quality of the mesh,
+but they also don't help because so many of their additional degrees of
+freedom are in fact constrained by hanging node constraints. That said,
+this is easy to fix: the Triangulation class takes an argument to its
+constructor indicating a level of "mesh smoothing". Passing one of many 
+possible flags, this instructs the triangulation to refine some additional 
+cells, or not to refine some cells, so that the resulting mesh does not have 
+these artifacts.
+
+The second problem is more severe: the mesh appears to lag the solution. 
+The underlying reason is that we only adapt the mesh once every fifth
+time step, and only allow for a single refinement in these cases. Whenever a
+source switches on, the solution had been very smooth in this area before and
+the mesh was consequently rather coarse. This implies that the next time step
+when we refine the mesh, we will get one refinement level more in this area,
+and five time steps later another level, etc. But this is not enough: first,
+we should refine immediately when a source switches on (after all, in the
+current context we at least know what the right hand side is), and we should
+allow for more than one refinement level. Of course, all of this can be done
+using deal.II, it just requires a bit of algorithmic thinking in how to make
+this work!
diff --git a/examples/step-26/doc/tooltip b/examples/step-26/doc/tooltip
new file mode 100644
index 0000000..9bd689e
--- /dev/null
+++ b/examples/step-26/doc/tooltip
@@ -0,0 +1 @@
+The heat equation. Time dependent meshes.
diff --git a/examples/step-26/step-26.cc b/examples/step-26/step-26.cc
new file mode 100644
index 0000000..747248b
--- /dev/null
+++ b/examples/step-26/step-26.cc
@@ -0,0 +1,692 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2013 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, Texas A&M University, 2013
+ */
+
+
+// The program starts with the usual include files, all of which you should
+// have seen before by now:
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/solution_transfer.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+#include <fstream>
+#include <iostream>
+
+
+// Then the usual placing of all content of this program into a namespace and
+// the importation of the deal.II namespace into the one we will work in:
+namespace Step26
+{
+  using namespace dealii;
+
+
+  // @sect3{The <code>HeatEquation</code> class}
+  //
+  // The next piece is the declaration of the main class of this program. It
+  // follows the well trodden path of previous examples. If you have looked at
+  // step-6, for example, the only thing worth noting here is that we need to
+  // build two matrices (the mass and Laplace matrix) and keep the current and
+  // previous time step's solution. We then also need to store the current
+  // time, the size of the time step, and the number of the current time
+  // step. The last of the member variables denotes the theta parameter
+  // discussed in the introduction that allows us to treat the explicit and
+  // implicit Euler methods as well as the Crank-Nicolson method and other
+  // generalizations all in one program.
+  //
+  // As far as member functions are concerned, the only possible surprise is
+  // that the <code>refine_mesh</code> function takes arguments for the
+  // minimal and maximal mesh refinement level. The purpose of this is
+  // discussed in the introduction.
+  template<int dim>
+  class HeatEquation
+  {
+  public:
+    HeatEquation();
+    void run();
+
+  private:
+    void setup_system();
+    void solve_time_step();
+    void output_results() const;
+    void refine_mesh (const unsigned int min_grid_level,
+                      const unsigned int max_grid_level);
+
+    Triangulation<dim>   triangulation;
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+
+    ConstraintMatrix     constraints;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> mass_matrix;
+    SparseMatrix<double> laplace_matrix;
+    SparseMatrix<double> system_matrix;
+
+    Vector<double>       solution;
+    Vector<double>       old_solution;
+    Vector<double>       system_rhs;
+
+    double               time;
+    double               time_step;
+    unsigned int         timestep_number;
+
+    const double         theta;
+  };
+
+
+
+  // @sect3{Equation data}
+
+  // In the following classes and functions, we implement the various pieces
+  // of data that define this problem (right hand side and boundary values)
+  // that are used in this program and for which we need function objects. The
+  // right hand side is chosen as discussed at the end of the
+  // introduction. For boundary values, we choose zero values, but this is
+  // easily changed below.
+  template<int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide ()
+      :
+      Function<dim>(),
+      period (0.2)
+    {}
+
+    virtual double value (const Point<dim> &p,
+                          const unsigned int component = 0) const;
+
+  private:
+    const double period;
+  };
+
+
+
+  template<int dim>
+  double RightHandSide<dim>::value (const Point<dim> &p,
+                                    const unsigned int component) const
+  {
+    Assert (component == 0, ExcInternalError());
+    Assert (dim == 2, ExcNotImplemented());
+
+    const double time = this->get_time();
+    const double point_within_period = (time/period - std::floor(time/period));
+
+    if ((point_within_period >= 0.0) && (point_within_period <= 0.2))
+      {
+        if ((p[0] > 0.5) && (p[1] > -0.5))
+          return 1;
+        else
+          return 0;
+      }
+    else if ((point_within_period >= 0.5) && (point_within_period <= 0.7))
+      {
+        if ((p[0] > -0.5) && (p[1] > 0.5))
+          return 1;
+        else
+          return 0;
+      }
+    else
+      return 0;
+  }
+
+
+
+  template<int dim>
+  class BoundaryValues : public Function<dim>
+  {
+  public:
+    virtual double value (const Point<dim>  &p,
+                          const unsigned int component = 0) const;
+  };
+
+
+
+  template<int dim>
+  double BoundaryValues<dim>::value (const Point<dim> &/*p*/,
+                                     const unsigned int component) const
+  {
+    Assert(component == 0, ExcInternalError());
+    return 0;
+  }
+
+
+
+  // @sect3{The <code>HeatEquation</code> implementation}
+  //
+  // It is time now for the implementation of the main class. Let's
+  // start with the constructor which selects a linear element, a time
+  // step constant at 1/500 (remember that one period of the source
+  // on the right hand side was set to 0.2 above, so we resolve each
+  // period with 100 time steps) and chooses the Crank Nicolson method
+  // by setting $\theta=1/2$.
+  template<int dim>
+  HeatEquation<dim>::HeatEquation ()
+    :
+    fe(1),
+    dof_handler(triangulation),
+    time_step(1. / 500),
+    theta(0.5)
+  {}
+
+
+
+  // @sect4{<code>HeatEquation::setup_system</code>}
+  //
+  // The next function is the one that sets up the DoFHandler object,
+  // computes the constraints, and sets the linear algebra objects
+  // to their correct sizes. We also compute the mass and Laplace
+  // matrix here by simply calling two functions in the library.
+  //
+  // Note that we do not take the hanging node constraints into account when
+  // assembling the matrices (both functions have a ConstraintMatrix argument
+  // that defaults to an empty object). This is because we are going to
+  // condense the constraints in run() after combining the matrices for the
+  // current time-step.
+  template<int dim>
+  void HeatEquation<dim>::setup_system()
+  {
+    dof_handler.distribute_dofs(fe);
+
+    std::cout << std::endl
+              << "==========================================="
+              << std::endl
+              << "Number of active cells: " << triangulation.n_active_cells()
+              << std::endl
+              << "Number of degrees of freedom: " << dof_handler.n_dofs()
+              << std::endl
+              << std::endl;
+
+    constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             constraints);
+    constraints.close();
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern(dof_handler,
+                                    dsp,
+                                    constraints,
+                                    /*keep_constrained_dofs = */ true);
+    sparsity_pattern.copy_from(dsp);
+
+    mass_matrix.reinit(sparsity_pattern);
+    laplace_matrix.reinit(sparsity_pattern);
+    system_matrix.reinit(sparsity_pattern);
+
+    MatrixCreator::create_mass_matrix(dof_handler,
+                                      QGauss<dim>(fe.degree+1),
+                                      mass_matrix);
+    MatrixCreator::create_laplace_matrix(dof_handler,
+                                         QGauss<dim>(fe.degree+1),
+                                         laplace_matrix);
+
+    solution.reinit(dof_handler.n_dofs());
+    old_solution.reinit(dof_handler.n_dofs());
+    system_rhs.reinit(dof_handler.n_dofs());
+  }
+
+
+  // @sect4{<code>HeatEquation::solve_time_step</code>}
+  //
+  // The next function is the one that solves the actual linear system
+  // for a single time step. There is nothing surprising here:
+  template<int dim>
+  void HeatEquation<dim>::solve_time_step()
+  {
+    SolverControl solver_control(1000, 1e-8 * system_rhs.l2_norm());
+    SolverCG<> cg(solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.0);
+
+    cg.solve(system_matrix, solution, system_rhs,
+             preconditioner);
+
+    constraints.distribute(solution);
+
+    std::cout << "     " << solver_control.last_step()
+              << " CG iterations." << std::endl;
+  }
+
+
+
+  // @sect4{<code>HeatEquation::output_results</code>}
+  //
+  // Neither is there anything new in generating graphical output:
+  template<int dim>
+  void HeatEquation<dim>::output_results() const
+  {
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler(dof_handler);
+    data_out.add_data_vector(solution, "U");
+
+    data_out.build_patches();
+
+    const std::string filename = "solution-"
+                                 + Utilities::int_to_string(timestep_number, 3) +
+                                 ".vtk";
+    std::ofstream output(filename.c_str());
+    data_out.write_vtk(output);
+  }
+
+
+  // @sect4{<code>HeatEquation::refine_mesh</code>}
+  //
+  // This function is the interesting part of the program. It takes care of
+  // the adaptive mesh refinement. The three tasks
+  // this function performs is to first find out which cells to
+  // refine/coarsen, then to actually do the refinement and eventually
+  // transfer the solution vectors between the two different grids. The first
+  // task is simply achieved by using the well-established Kelly error
+  // estimator on the solution. The second task is to actually do the
+  // remeshing. That involves only basic functions as well, such as the
+  // <code>refine_and_coarsen_fixed_fraction</code> that refines those cells
+  // with the largest estimated error that together make up 60 per cent of the
+  // error, and coarsens those cells with the smallest error that make up for
+  // a combined 40 per cent of the error. Note that for problems such as the
+  // current one where the areas where something is going on are shifting
+  // around, we want to aggressively coarsen so that we can move cells
+  // around to where it is necessary.
+  //
+  // As already discussed in the introduction, too small a mesh leads to
+  // too small a time step, whereas too large a mesh leads to too little
+  // resolution. Consequently, after the first two steps, we have two
+  // loops that limit refinement and coarsening to an allowable range of
+  // cells:
+  template <int dim>
+  void HeatEquation<dim>::refine_mesh (const unsigned int min_grid_level,
+                                       const unsigned int max_grid_level)
+  {
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(fe.degree+1),
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        estimated_error_per_cell);
+
+    GridRefinement::refine_and_coarsen_fixed_fraction (triangulation,
+                                                       estimated_error_per_cell,
+                                                       0.6, 0.4);
+
+    if (triangulation.n_levels() > max_grid_level)
+      for (typename Triangulation<dim>::active_cell_iterator
+           cell = triangulation.begin_active(max_grid_level);
+           cell != triangulation.end(); ++cell)
+        cell->clear_refine_flag ();
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = triangulation.begin_active(min_grid_level);
+         cell != triangulation.end_active(min_grid_level); ++cell)
+      cell->clear_coarsen_flag ();
+    // These two loops above are slightly different but this is easily
+    // explained. In the first loop, instead of calling
+    // <code>triangulation.end()</code> we may as well have called
+    // <code>triangulation.end_active(max_grid_level)</code>. The two
+    // calls should yield the same iterator since iterators are sorted
+    // by level and there should not be any cells on levels higher than
+    // on level <code>max_grid_level</code>. In fact, this very piece
+    // of code makes sure that this is the case.
+
+    // As part of mesh refinement we need to transfer the solution vectors
+    // from the old mesh to the new one. To this end we use the
+    // SolutionTransfer class and we have to prepare the solution vectors that
+    // should be transferred to the new grid (we will lose the old grid once
+    // we have done the refinement so the transfer has to happen concurrently
+    // with refinement). At the point where we call this function, we will
+    // have just computed the solution, so we no longer need the old_solution
+    // variable (it will be overwritten by the solution just after the mesh
+    // may have been refined, i.e., at the end of the time step; see below).
+    // In other words, we only need the one solution vector, and we copy it
+    // to a temporary object where it is safe from being reset when we further
+    // down below call <code>setup_system()</code>.
+    //
+    // Consequently, we initialize a SolutionTransfer object by attaching
+    // it to the old DoF handler. We then prepare the triangulation and the
+    // data vector for refinement (in this order).
+    SolutionTransfer<dim> solution_trans(dof_handler);
+
+    Vector<double> previous_solution;
+    previous_solution = solution;
+    triangulation.prepare_coarsening_and_refinement();
+    solution_trans.prepare_for_coarsening_and_refinement(previous_solution);
+
+    // Now everything is ready, so do the refinement and recreate the DoF
+    // structure on the new grid, and finally initialize the matrix structures
+    // and the new vectors in the <code>setup_system</code> function. Next, we
+    // actually perform the interpolation of the solution from old to new
+    // grid. The final step is to apply the hanging node constraints to the
+    // solution vector, i.e., to make sure that the values of degrees of
+    // freedom located on hanging nodes are so that the solution is
+    // continuous. This is necessary since SolutionTransfer only operates on
+    // cells locally, without regard to the neighborhoof.
+    triangulation.execute_coarsening_and_refinement ();
+    setup_system ();
+
+    solution_trans.interpolate(previous_solution, solution);
+    constraints.distribute (solution);
+  }
+
+
+
+  // @sect4{<code>HeatEquation::run</code>}
+  //
+  // This is the main driver of the program, where we loop over all
+  // time steps. At the top of the function, we set the number of
+  // initial global mesh refinements and the number of initial cycles of
+  // adaptive mesh refinement by repeating the first time step a few
+  // times. Then we create a mesh, initialize the various objects we will
+  // work with, set a label for where we should start when re-running
+  // the first time step, and interpolate the initial solution onto
+  // out mesh (we choose the zero function here, which of course we could
+  // do in a simpler way by just setting the solution vector to zero). We
+  // also output the initial time step once.
+  //
+  // @note If you're an experienced programmer, you may be surprised
+  // that we use a <code>goto</code> statement in this piece of code!
+  // <code>goto</code> statements are not particularly well liked any
+  // more since Edsgar Dijkstra, one of the greats of computer science,
+  // wrote a letter in 1968 called "Go To Statement considered harmful"
+  // (see <a href="http://en.wikipedia.org/wiki/Considered_harmful">here</a>).
+  // The author of this code subscribes to this notion whole-heartedly:
+  // <code>goto</code> is hard to understand. In fact, deal.II contains
+  // virtually no occurrences: excluding code that was essentially
+  // transcribed from books and not counting duplicated code pieces,
+  // there are 3 locations in about 600,000 lines of code; we also
+  // use it in 4 tutorial programs, in exactly the same context
+  // as here. Instead of trying to justify the occurrence here,
+  // let's first look at the code and we'll come back to the issue
+  // at the end of function.
+  template<int dim>
+  void HeatEquation<dim>::run()
+  {
+    const unsigned int initial_global_refinement = 2;
+    const unsigned int n_adaptive_pre_refinement_steps = 4;
+
+    GridGenerator::hyper_L (triangulation);
+    triangulation.refine_global (initial_global_refinement);
+
+    setup_system();
+
+    unsigned int pre_refinement_step = 0;
+
+    Vector<double> tmp;
+    Vector<double> forcing_terms;
+
+start_time_iteration:
+
+    tmp.reinit (solution.size());
+    forcing_terms.reinit (solution.size());
+
+
+    VectorTools::interpolate(dof_handler,
+                             ZeroFunction<dim>(),
+                             old_solution);
+    solution = old_solution;
+
+    timestep_number = 0;
+    time            = 0;
+
+    output_results();
+
+    // Then we start the main loop until the computed time exceeds our
+    // end time of 0.5. The first task is to build the right hand
+    // side of the linear system we need to solve in each time step.
+    // Recall that it contains the term $MU^{n-1}-(1-\theta)k_n AU^{n-1}$.
+    // We put these terms into the variable system_rhs, with the
+    // help of a temporary vector:
+    while (time <= 0.5)
+      {
+        time += time_step;
+        ++timestep_number;
+
+        std::cout << "Time step " << timestep_number << " at t=" << time
+                  << std::endl;
+
+        mass_matrix.vmult(system_rhs, old_solution);
+
+        laplace_matrix.vmult(tmp, old_solution);
+        system_rhs.add(-(1 - theta) * time_step, tmp);
+
+        // The second piece is to compute the contributions of the source
+        // terms. This corresponds to the term $k_n
+        // \left[ (1-\theta)F^{n-1} + \theta F^n \right]$. The following
+        // code calls VectorTools::create_right_hand_side to compute the
+        // vectors $F$, where we set the time of the right hand side
+        // (source) function before we evaluate it. The result of this
+        // all ends up in the forcing_terms variable:
+        RightHandSide<dim> rhs_function;
+        rhs_function.set_time(time);
+        VectorTools::create_right_hand_side(dof_handler,
+                                            QGauss<dim>(fe.degree+1),
+                                            rhs_function,
+                                            tmp);
+        forcing_terms = tmp;
+        forcing_terms *= time_step * theta;
+
+        rhs_function.set_time(time - time_step);
+        VectorTools::create_right_hand_side(dof_handler,
+                                            QGauss<dim>(fe.degree+1),
+                                            rhs_function,
+                                            tmp);
+
+        forcing_terms.add(time_step * (1 - theta), tmp);
+
+        // Next, we add the forcing terms to the ones that
+        // come from the time stepping, and also build the matrix
+        // $M+k_n\theta A$ that we have to invert in each time step.
+        // The final piece of these operations is to eliminate
+        // hanging node constrained degrees of freedom from the
+        // linear system:
+        system_rhs += forcing_terms;
+
+        system_matrix.copy_from(mass_matrix);
+        system_matrix.add(theta * time_step, laplace_matrix);
+
+        constraints.condense (system_matrix, system_rhs);
+
+        // There is one more operation we need to do before we
+        // can solve it: boundary values. To this end, we create
+        // a boundary value object, set the proper time to the one
+        // of the current time step, and evaluate it as we have
+        // done many times before. The result is used to also
+        // set the correct boundary values in the linear system:
+        {
+          BoundaryValues<dim> boundary_values_function;
+          boundary_values_function.set_time(time);
+
+          std::map<types::global_dof_index, double> boundary_values;
+          VectorTools::interpolate_boundary_values(dof_handler,
+                                                   0,
+                                                   boundary_values_function,
+                                                   boundary_values);
+
+          MatrixTools::apply_boundary_values(boundary_values,
+                                             system_matrix,
+                                             solution,
+                                             system_rhs);
+        }
+
+        // With this out of the way, all we have to do is solve the
+        // system, generate graphical data, and...
+        solve_time_step();
+
+        output_results();
+
+        // ...take care of mesh refinement. Here, what we want to do is
+        // (i) refine the requested number of times at the very beginning
+        // of the solution procedure, after which we jump to the top to
+        // restart the time iteration, (ii) refine every fifth time
+        // step after that.
+        //
+        // The time loop and, indeed, the main part of the program ends
+        // with starting into the next time step by setting old_solution
+        // to the solution we have just computed.
+        if ((timestep_number == 1) &&
+            (pre_refinement_step < n_adaptive_pre_refinement_steps))
+          {
+            refine_mesh (initial_global_refinement,
+                         initial_global_refinement + n_adaptive_pre_refinement_steps);
+            ++pre_refinement_step;
+
+            tmp.reinit (solution.size());
+            forcing_terms.reinit (solution.size());
+
+            std::cout << std::endl;
+
+            goto start_time_iteration;
+          }
+        else if ((timestep_number > 0) && (timestep_number % 5 == 0))
+          {
+            refine_mesh (initial_global_refinement,
+                         initial_global_refinement + n_adaptive_pre_refinement_steps);
+            tmp.reinit (solution.size());
+            forcing_terms.reinit (solution.size());
+          }
+
+        old_solution = solution;
+      }
+  }
+}
+// Now that you have seen what the function does, let us come back to the issue
+// of the <code>goto</code>. In essence, what the code does is
+// something like this:
+// @code
+//   void run ()
+//   {
+//     initialize;
+//   start_time_iteration:
+//     for (timestep=1...)
+//     {
+//        solve timestep;
+//        if (timestep==1 && not happy with the result)
+//        {
+//          adjust some data structures;
+//          goto start_time_iteration; // simply try again
+//        }
+//        postprocess;
+//     }
+//   }
+// @endcode
+// Here, the condition "happy with the result" is whether we'd like to keep
+// the current mesh or would rather refine the mesh and start over on the
+// new mesh. We could of course replace the use of the <code>goto</code>
+// by the following:
+// @code
+//   void run ()
+//   {
+//     initialize;
+//     while (true)
+//     {
+//        solve timestep;
+//        if (not happy with the result)
+//           adjust some data structures;
+//        else
+//           break;
+//     }
+//     postprocess;
+//
+//     for (timestep=2...)
+//     {
+//        solve timestep;
+//        postprocess;
+//     }
+//   }
+// @endcode
+// This has the advantage of getting rid of the <code>goto</code>
+// but the disadvantage of having to duplicate the code that implements
+// the "solve timestep" and "postprocess" operations in two different
+// places. This could be countered by putting these parts of the code
+// (sizable chunks in the actual implementation above) into their
+// own functions, but a <code>while(true)</code> loop with a
+// <code>break</code> statement is not really all that much easier
+// to read or understand than a <code>goto</code>.
+//
+// In the end, one might simply agree that <i>in general</i>
+// <code>goto</code> statements are a bad idea but be pragmatic
+// and state that there may be occasions where they can help avoid
+// code duplication and awkward control flow. This may be one of these
+// places.
+
+
+// @sect3{The <code>main</code> function}
+//
+// Having made it this far,  there is, again, nothing
+// much to discuss for the main function of this
+// program: it looks like all such functions since step-6.
+int main()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step26;
+
+      HeatEquation<2> heat_equation_solver;
+      heat_equation_solver.run();
+
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl << exc.what()
+                << std::endl << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl << "Aborting!"
+                << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-27/CMakeLists.txt b/examples/step-27/CMakeLists.txt
new file mode 100644
index 0000000..ff811cf
--- /dev/null
+++ b/examples/step-27/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-27 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-27")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-27/doc/builds-on b/examples/step-27/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-27/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-27/doc/intro.dox b/examples/step-27/doc/intro.dox
new file mode 100644
index 0000000..3945e32
--- /dev/null
+++ b/examples/step-27/doc/intro.dox
@@ -0,0 +1,724 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This tutorial program attempts to show how to use $hp$ finite element methods
+with deal.II. It solves the Laplace equation and so builds only on the first
+few tutorial programs, in particular on step-4 for dimension
+independent programming and step-6 for adaptive mesh
+refinement.
+
+The $hp$ finite element method was proposed in the early 1980s by
+Babuska and Guo as an alternative to either
+(i) mesh refinement (i.e. decreasing the mesh parameter $h$ in a finite
+element computation) or (ii) increasing the polynomial degree $p$ used for
+shape functions. It is based on the observation that increasing the polynomial
+degree of the shape functions reduces the approximation error if the solution
+is sufficiently smooth.  On the other hand, it is well known
+that even for the generally well-behaved class of elliptic problems, higher
+degrees of regularity can not be guaranteed in the vicinity of boundaries,
+corners, or where coefficients are discontinuous; consequently, the
+approximation can not be improved in these areas by increasing the polynomial
+degree $p$ but only by refining the mesh, i.e. by reducing the mesh size
+$h$. These differing means to reduce the
+error have led to the notion of $hp$ finite elements, where the approximating
+finite element spaces are adapted to have a high polynomial degree $p$
+wherever the solution is sufficiently smooth, while the mesh width $h$ is
+reduced at places wherever the solution lacks regularity. It was
+already realized in the first papers on this method that $hp$ finite elements
+can be a powerful tool that can guarantee that the error is reduced not only
+with some negative power of the number of degrees of freedom, but in fact
+exponentially.
+
+In order to implement this method, we need several things above and beyond
+what a usual finite element program needs, and in particular above what we
+have introduced in the tutorial programs leading up to step-6. In particular,
+we will have to discuss the following aspects:
+<ul>
+  <li>Instead of using the same finite element on all cells, we now will want
+  a collection of finite element objects, and associate each cell with one
+  of these objects in this collection.
+
+  <li>Degrees of freedom will then have to be allocated on each cell depending
+  on what finite element is associated with this particular cell. Constraints
+  will have to generated in the same way as for hanging nodes, but now also
+  including the case where two neighboring cells.
+
+  <li>We will need to be able to assemble cell and face contributions
+  to global matrices and right hand side vectors.
+
+  <li>After solving the resulting linear system, we will want to
+  analyze the solution. In particular, we will want to compute error
+  indicators that tell us whether a given cell should be refined
+  and/or whether the polynomial degree of the shape functions used on
+  it should be increased.
+</ul>
+
+We will discuss all these aspects in the following subsections of this
+introduction. It will not come as a big surprise that most of these
+tasks are already well supported by functionality provided by the
+deal.II libraries, and that we will only have to provide the logic of
+what the program should do, not exactly how all this is going to
+happen.
+
+In deal.II, the $hp$ functionality is largely packaged into
+the hp namespace. This namespace provides classes that handle
+$hp$ discretizations, assembling matrices and vectors, and other
+tasks. We will get to know many of them further down below. In
+addition, many of the functions in the DoFTools, and VectorTools
+namespaces accept $hp$ objects in addition to the non-$hp$ ones. Much of
+the $hp$ implementation is also discussed in the @ref hp documentation
+module and the links found there.
+
+It may be worth giving a slightly larger perspective at the end of
+this first part of the introduction. $hp$ functionality has been
+implemented in a number of different finite element packages (see, for
+example, the list of references cited in the @ref hp_paper "hp paper").
+However, by and large, most of these packages have implemented it only
+for the (i) the 2d case, and/or (ii) the discontinuous Galerkin
+method. The latter is a significant simplification because
+discontinuous finite elements by definition do not require continuity
+across faces between cells and therefore do not require the special
+treatment otherwise necessary whenever finite elements of different
+polynomial degree meet at a common face. In contrast, deal.II
+implements the most general case, i.e. it allows for continuous and
+discontinuous elements in 1d, 2d, and 3d, and automatically handles
+the resulting complexity. In particular, it handles computing the
+constraints (similar to hanging node constraints) of elements of
+different degree meeting at a face or edge. The many algorithmic and
+data structure techniques necessary for this are described in the
+ at ref hp_paper "hp paper" for those interested in such detail.
+
+We hope that providing such a general implementation will help explore
+the potential of $hp$ methods further.
+
+
+<h3>Finite element collections</h3>
+
+Now on again to the details of how to use the $hp$ functionality in
+deal.II. The first aspect we have to deal with is that now we do not
+have only a single finite element any more that is used on all cells,
+but a number of different elements that cells can choose to use. For
+this, deal.II introduces the concept of a <i>finite element
+collection</i>, implemented in the class hp::FECollection. In essence,
+such a collection acts like an object of type
+<code>std::vector@<FiniteElement@></code>, but with a few more bells
+and whistles and a memory management better suited to the task at
+hand. As we will later see, we will also use similar quadrature
+collections, and — although we don't use them here — there
+is also the concept of mapping collections. All of these classes are
+described in the @ref hpcollection overview.
+
+In this tutorial program, we will use continuous Lagrange elements of
+orders 2 through 7 (in 2d) or 2 through 5 (in 3d). The collection of
+used elements can then be created as follows:
+ at code
+  hp::FECollection<dim> fe_collection;
+  for (unsigned int degree=2; degree<=max_degree; ++degree)
+    fe_collection.push_back (FE_Q<dim>(degree));
+ at endcode
+
+
+
+<h3>The hp::DoFHandler class, associating cells with finite elements, and constraints</h3>
+
+The next task we have to consider is what to do with the list of
+finite element objects we want to use. In previous tutorial programs,
+starting with step-2, we have seen that the DoFHandler
+class is responsible for making the connection between a mesh
+(described by a Triangulation object) and a finite element, by
+allocating the correct number of degrees of freedom for each vertex,
+face, edge, and cell of the mesh.
+
+The situation here is a bit more complicated since we do not just have
+a single finite element object, but rather may want to use different
+elements on different cells. We therefore need two things: (i) a
+version of the DoFHandler class that can deal with this situation, and
+(ii) a way to tell the DoF handler which element to use on which cell.
+
+The first of these two things is implemented in the hp::DoFHandler
+class: rather than associating it with a triangulation and a single
+finite element object, it is associated with a triangulation and a
+finite element collection. The second part is achieved by a loop over
+all cells of this hp::DoFHandler and for each cell setting the index
+of the finite element within the collection that shall be used on this
+cell. We call the index of the finite element object within the
+collection that shall be used on a cell the cell's <i>active FE
+index</i> to indicate that this is the finite element that is active
+on this cell, whereas all the other elements of the collection are
+inactive on it. The general outline of this reads like this:
+
+ at code
+  hp::DoFHandler<dim> dof_handler (triangulation);
+  for (typename hp::DoFHandler<dim>::active_cell_iterator
+         cell = dof_handler.begin_active();
+       cell != dof_handler.end(); ++cell)
+    cell->set_active_fe_index (...);
+  dof_handler.distribute_dofs (fe_collection);
+ at endcode
+
+Dots in the call to <code>set_active_fe_index()</code> indicate that
+we will have to have some sort of strategy later on to decide which
+element to use on which cell; we will come back to this later. The
+main point here is that the first and last line of this code snippet
+is pretty much exactly the same as for the non-$hp$ case.
+
+Another complication arises from the fact that this time we do not
+simply have hanging nodes from local mesh refinement, but we also have
+to deal with the case that if there are two cells with different
+active finite element indices meeting at a face (for example a Q2 and
+a Q3 element) then we have to compute additional constraints on the
+finite element field to ensure that it is continuous. This is
+conceptually very similar to how we compute hanging node constraints,
+and in fact the code looks exactly the same:
+ at code
+  ConstraintMatrix constraints;
+  DoFTools::make_hanging_node_constraints (dof_handler,
+					   constraints);
+ at endcode
+In other words, the DoFTools::make_hanging_node_constraints deals not
+only with hanging node constraints, but also with $hp$ constraints at
+the same time.
+
+
+
+<h3>Assembling matrices and vectors with hp objects</h3>
+
+Following this, we have to set up matrices and vectors for the linear system
+of the correct size and assemble them. Setting them up works in exactly the
+same way as for the non-$hp$ case. Assembling requires a bit more thought.
+
+The main idea is of course unchanged: we have to loop over all cells, assemble
+local contributions, and then copy them into the global objects. As discussed
+in some detail first in step-3, deal.II has the FEValues class
+that pulls finite element description, mapping, and quadrature formula
+together and aids in evaluating values and gradients of shape functions as
+well as other information on each of the quadrature points mapped to the real
+location of a cell. Every time we move on to a new cell we re-initialize this
+FEValues object, thereby asking it to re-compute that part of the information
+that changes from cell to cell. It can then be used to sum up local
+contributions to bilinear form and right hand side.
+
+In the context of $hp$ finite element methods, we have to deal with the fact
+that we do not use the same finite element object on each cell. In fact, we
+should not even use the same quadrature object for all cells, but rather
+higher order quadrature formulas for cells where we use higher order finite
+elements. Similarly, we may want to use higher order mappings on such cells as
+well.
+
+To facilitate these considerations, deal.II has a class hp::FEValues that does
+what we need in the current context. The difference is that instead of a
+single finite element, quadrature formula, and mapping, it takes collections
+of these objects. It's use is very much like the regular FEValues class,
+i.e. the interesting part of the loop over all cells would look like this:
+
+ at code
+  hp::FEValues<dim> hp_fe_values (mapping_collection,
+                                  fe_collection,
+				  quadrature_collection,
+				  update_values    |  update_gradients |
+				  update_q_points  |  update_JxW_values);
+
+  typename hp::DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      hp_fe_values.reinit (cell,
+                           cell->active_fe_index(),
+                           cell->active_fe_index(),
+                           cell->active_fe_index());
+
+      const FEValues<dim> &fe_values = hp_fe_values.get_present_fe_values ();
+
+      ...  // assemble local contributions and copy them into global object
+    }
+ at endcode
+
+In this tutorial program, we will always use a Q1 mapping, so the mapping
+collection argument to the hp::FEValues construction will be omitted. Inside
+the loop, we first initialize the hp::FEValues object for the current
+cell. The second, third and fourth arguments denote the index within their
+respective collections of the quadrature, mapping, and finite element objects
+we wish to use on this cell. These arguments can be omitted (and are in the
+program below), in which case <code>cell-@>active_fe_index()</code> is used
+for this index. The order of these arguments is chosen in this way because one
+may sometimes want to pick a different quadrature or mapping object from their
+respective collections, but hardly ever a different finite element than the
+one in use on this cell, i.e. one with an index different from
+<code>cell-@>active_fe_index()</code>. The finite element collection index is
+therefore the last default argument so that it can be conveniently omitted.
+
+What this <code>reinit</code> call does is the following: the
+hp::FEValues class checks whether it has previously already allocated a
+non-$hp$ FEValues object for this combination of finite element, quadrature,
+and mapping objects. If not, it allocates one. It then re-initializes this
+object for the current cell, after which there is now a FEValues object for
+the selected finite element, quadrature and mapping usable on the current
+cell. A reference to this object is then obtained using the call
+<code>hp_fe_values.get_present_fe_values()</code>, and will be used in the
+usual fashion to assemble local contributions.
+
+
+
+<h3>A simple indicator for hp refinement and estimating smoothness</h3>
+
+One of the central pieces of the adaptive finite element method is that we
+inspect the computed solution (a posteriori) with an indicator that tells us
+which are the cells where the error is largest, and then refine them. In many
+of the other tutorial programs, we use the KellyErrorEstimator class to get an
+indication of the size of the error on a cell, although we also discuss more
+complicated strategies in some programs, most importantly in step-14.
+
+In any case, as long as the decision is only "refine this cell" or "do not
+refine this cell", the actual refinement step is not particularly
+challenging. However, here we have a code that is capable of hp refinement,
+i.e. we suddenly have two choices whenever we detect that the error on a
+certain cell is too large for our liking: we can refine the cell by splitting
+it into several smaller ones, or we can increase the polynomial degree of the
+shape functions used on it. How do we know which is the more promising
+strategy? Answering this question is the central problem in $hp$ finite
+element research at the time of this writing.
+
+In short, the question does not appear to be settled in the literature at this
+time. There are a number of more or less complicated schemes that address it,
+but there is nothing like the KellyErrorEstimator that is universally accepted
+as a good, even if not optimal, indicator of the error. Most proposals use the
+fact that it is beneficial to increase the polynomial degree whenever the
+solution is locally smooth whereas it is better to refine the mesh wherever it
+is rough. However, the questions of how to determine the local smoothness of
+the solution as well as the decision when a solution is smooth enough to allow
+for an increase in $p$ are certainly big and important ones.
+
+In the following, we propose a simple estimator of the local smoothness of a
+solution. As we will see in the results section, this estimator has flaws, in
+particular as far as cells with local hanging nodes are concerned. We
+therefore do not intend to present the following ideas as a complete solution
+to the problem. Rather, it is intended as an idea to approach it that merits
+further research and investigation. In other words, we do not intend to enter
+a sophisticated proposal into the fray about answers to the general
+question. However, to demonstrate our approach to $hp$ finite elements, we
+need a simple indicator that does generate some useful information that is
+able to drive the simple calculations this tutorial program will perform.
+
+
+<h4>The idea</h4>
+
+Our approach here is simple: for a function $u({\bf x})$ to be in the
+Sobolev space $H^s(K)$ on a cell $K$, it has to satisfy the condition
+ at f[
+	\int_K |\nabla^s u({\bf x})|^2 \; d{\bf x} < \infty.
+ at f]
+Assuming that the cell $K$ is not degenerate, i.e. that the mapping from the
+unit cell to cell $K$ is sufficiently regular, above condition is of course
+equivalent to
+ at f[
+	\int_{\hat K} |\nabla^s \hat u(\hat{\bf x})|^2 \; d\hat{\bf x} < \infty
+ at f]
+where $\hat u(\hat{\bf x})$ is the function $u({\bf x})$ mapped back onto the unit cell
+$\hat K$. From here, we can do the following: first, let us define the
+Fourier series of $\hat u$ as
+ at f[
+	\hat U_{\bf k}
+	= \frac 1{(2\pi)^{d/2}} \int_{\hat K} e^{i {\bf k}\cdot \hat{\bf x}} \hat u(\hat{\bf x}) d\hat{\bf x}
+ at f]
+with Fourier vectors ${\bf k}=(k_x,k_y)$ in 2d, ${\bf k}=(k_x,k_y,k_z)$
+in 3d, etc, and $k_x,k_y,k_z=0,\pi,2\pi,3\pi,\ldots$. If we re-compose $\hat u$
+from $\hat U$ using the formula
+ at f[
+	\hat u(\hat{\bf x})
+	= \frac 1{(2\pi)^{d/2}} \sum_{\bf k} e^{-i {\bf k}\cdot \hat{\bf x}} \hat U_{\bf k},
+ at f]
+then it becomes clear that we can write the $H^s$ norm of $\hat u$ as
+ at f[
+	\int_{\hat K} |\nabla^s \hat u(\hat{\bf x})|^2 \; d\hat{\bf x}
+	=
+	\frac 1{(2\pi)^d}
+	\int_{\hat K}
+	\left|
+	  \sum_{\bf k} |{\bf k}|^s e^{-i{\bf k}\cdot \hat{\bf x}} \hat U_{\bf k}
+        \right|^2 \; d\hat{\bf x}
+	=
+	\sum_{\bf k}
+	  |{\bf k}|^{2s}
+	  |\hat U_{\bf k}|^2.
+ at f]
+In other words, if this norm is to be finite (i.e. for $\hat u(\hat{\bf x})$ to be in $H^s(\hat K)$), we need that
+ at f[
+	|\hat U_{\bf k}| = {\cal O}\left(|{\bf k}|^{-\left(s+1/2+\frac{d-1}{2}+\epsilon\right)}\right).
+ at f]
+Put differently: the higher regularity $s$ we want, the faster the
+Fourier coefficients have to go to zero. (If you wonder where the
+additional exponent $\frac{d-1}2$ comes from: we would like to make
+use of the fact that $\sum_l a_l < \infty$ if the sequence $a_l =
+{\cal O}(l^{-1-\epsilon})$ for any $\epsilon>0$. The problem is that we
+here have a summation not only over a single variable, but over all
+the integer multiples of $\pi$ that are located inside the
+$d$-dimensional sphere, because we have vector components $k_x, k_y,
+\ldots$. In the same way as we prove that the sequence $a_l$ above
+converges by replacing the sum by an integral over the entire line, we
+can replace our $d$-dimensional sum by an integral over
+$d$-dimensional space. Now we have to note that between distance $|{\bf k}|$
+and $|{\bf k}|+d|{\bf k}|$, there are, up to a constant, $|{\bf k}|^{d-1}$ modes, in
+much the same way as we can transform the volume element $dx\;dy$ into
+$2\pi r\; dr$. Consequently, it is no longer $|{\bf k}|^{2s}|\hat
+U_{\bf k}|^2$ that has to decay as ${\cal O}(|{\bf k}|^{-1-\epsilon})$, but
+it is in fact $|{\bf k}|^{2s}|\hat U_{\bf k}|^2 |{\bf k}|^{d-1}$. A
+comparison of exponents yields the result.)
+
+We can turn this around: Assume we are given a function $\hat u$ of unknown
+smoothness. Let us compute its Fourier coefficients $\hat U_{\bf k}$
+and see how fast they decay. If they decay as
+ at f[
+	|\hat U_{\bf k}| = {\cal O}(|{\bf k}|^{-\mu-\epsilon}),
+ at f]
+then consequently the function we had here was in $H^{\mu-d/2}$.
+
+
+<h4>What we have to do</h4>
+
+So what do we have to do to estimate the local smoothness of $u({\bf x})$ on
+a cell $K$? Clearly, the first step is to compute the Fourier series
+of our solution. Fourier series being infinite series, we simplify our
+task by only computing the first few terms of the series, such that
+$|{\bf k}|\le N$ with a cut-off $N$. (Let us parenthetically remark
+that we want to choose $N$ large enough so that we capture at least
+the variation of those shape functions that vary the most. On the
+other hand, we should not choose $N$ too large: clearly, a finite
+element function, being a polynomial, is in $C^\infty$ on any given
+cell, so the coefficients will have to decay exponentially at one
+point; since we want to estimate the smoothness of the function this
+polynomial approximates, not of the polynomial itself, we need to
+choose a reasonable cutoff for $N$.) Either way, computing this series
+is not particularly hard: from the definition
+ at f[
+	\hat U_{\bf k}
+	= \frac 1{(2\pi)^{d/2}} \int_{\hat K} e^{i {\bf k}\cdot \hat{\bf x}} \hat u(\hat{\bf x}) d\hat{\bf x}
+ at f]
+we see that we can compute the coefficient $\hat U_{\bf k}$ as
+ at f[
+	\hat U_{\bf k}
+	= \frac 1{(2\pi)^{d/2}}
+          \sum_{i=0}^{\textrm{\tiny dofs per cell}}
+          \left[\int_{\hat K} e^{i {\bf k}\cdot \hat{\bf x}} \hat \varphi_i(\hat{\bf x})
+	  d\hat{\bf x} \right] u_i,
+ at f]
+where $u_i$ is the value of the $i$th degree of freedom on this
+cell. In other words, we can write it as a matrix-vector product
+ at f[
+	\hat U_{\bf k}
+	= {\cal F}_{{\bf k},j} u_j,
+ at f]
+with the matrix
+ at f[
+	{\cal F}_{{\bf k},j}
+	= \frac 1{(2\pi)^{d/2}}
+	\int_{\hat K} e^{i {\bf k}\cdot \hat{\bf x}} \hat \varphi_j(\hat{\bf x}) d\hat{\bf x}.
+ at f]
+This matrix is easily computed for a given number of shape functions
+$\varphi_j$ and Fourier modes $N$. Consequently, finding the
+coefficients $\hat U_{\bf k}$ is a rather trivial job.
+
+The next task is that we have to estimate how fast these coefficients
+decay with $|{\bf k}|$. The problem is that, of course, we have only
+finitely many of these coefficients in the first place. In other
+words, the best we can do is to fit a function $\alpha |{\bf k}|^{-\mu}$
+to our data points $\hat U_{\bf k}$, for example by
+determining $\alpha,\mu$ via a least-squares procedure:
+ at f[
+	\min_{\alpha,\mu}
+	\frac 12 \sum_{{\bf k}, |{\bf k}|\le N}
+	\left( |\hat U_{\bf k}| - \alpha |{\bf k}|^{-\mu}\right)^2
+ at f]
+However, the problem with this is that it leads to a nonlinear
+problem, a fact that we would like to avoid. On the other hand, we can
+transform the problem into a simpler one if we try to fit the
+logarithm of our coefficients to the logarithm of $\alpha |{\bf k}|^{-\mu}$,
+like this:
+ at f[
+	\min_{\alpha,\mu}
+	Q(\alpha,\mu) =
+	\frac 12 \sum_{{\bf k}, |{\bf k}|\le N}
+	\left( \ln |\hat U_{\bf k}| - \ln (\alpha |{\bf k}|^{-\mu})\right)^2.
+ at f]
+Using the usual facts about logarithms, we see that this yields the
+problem
+ at f[
+	\min_{\beta,\mu}
+	Q(\beta,\mu) =
+	\frac 12 \sum_{{\bf k}, |{\bf k}|\le N}
+	\left( \ln |\hat U_{\bf k}| - \beta + \mu \ln |{\bf k}|\right)^2,
+ at f]
+where $\beta=\ln \alpha$. This is now a problem for which the
+optimality conditions $\frac{\partial Q}{\partial\beta}=0,
+\frac{\partial Q}{\partial\mu}=0$, are linear in $\beta,\mu$. We can
+write these conditions as follows:
+ at f[
+	\left(\begin{array}{cc}
+	\sum_{{\bf k}, |{\bf k}|\le N} 1 &
+	\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}|
+	\\
+	\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}| &
+	\sum_{{\bf k}, |{\bf k}|\le N} (\ln |{\bf k}|)^2
+	\end{array}\right)
+	\left(\begin{array}{c}
+	\beta \\ -\mu
+	\end{array}\right)
+	=
+	\left(\begin{array}{c}
+	\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}|
+	\\
+	\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}| \ln |{\bf k}|
+	\end{array}\right)
+ at f]
+This linear system is readily inverted to yield
+ at f[
+	\beta =
+	\frac 1{\left(\sum_{{\bf k}, |{\bf k}|\le N} 1\right)
+                \left(\sum_{{\bf k}, |{\bf k}|\le N} (\ln |{\bf k}|)^2\right)
+		-\left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}|\right)^2}
+	\left[
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} (\ln |{\bf k}|)^2\right)
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}|\right)
+	  -
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}|\right)
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}| \ln |{\bf k}| \right)
+	\right]
+ at f]
+and
+ at f[
+	\mu =
+	\frac 1{\left(\sum_{{\bf k}, |{\bf k}|\le N} 1\right)
+                \left(\sum_{{\bf k}, |{\bf k}|\le N} (\ln |{\bf k}|)^2\right)
+		-\left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}|\right)^2}
+	\left[
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}|\right)
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}|\right)
+	  -
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} 1\right)
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}| \ln |{\bf k}| \right)
+	\right].
+ at f]
+
+While we are not particularly interested in the actual value of
+$\beta$, the formula above gives us a mean to calculate the value of
+the exponent $\mu$ that we can then use to determine that
+$\hat u(\hat{\bf x})$ is in $H^s(\hat K)$ with $s=\mu-\frac d2$.
+
+
+<h4>Compensating for anisotropy</h4>
+
+In the formulas above, we have derived the Fourier coefficients $\hat U_{\vec
+k}$. Because ${\bf k}$ is a vector, we will get a number of Fourier
+coefficients $\hat U_{{\bf k}}$ for the same absolute value $|{\bf k}|$,
+corresponding to the Fourier transform in different directions. If we now
+consider a function like $|x|y^2$ then we will find lots of large Fourier
+coefficients in $x$-direction because the function is non-smooth in this
+direction, but fast-decaying Fourier coefficients in $y$-direction because the
+function is smooth there. The question that arises is this: if we simply fit
+our polynomial decay $\alpha |{\bf k}|^\mu$ to <i>all</i> Fourier coefficients,
+we will fit it to a smoothness <i>averaged in all spatial directions</i>. Is
+this what we want? Or would it be better to only consider the largest
+coefficient $\hat U_{{\bf k}}$ for all ${\bf k}$ with the same magnitude,
+essentially trying to determine the smoothness of the solution in that spatial
+direction in which the solution appears to be roughest?
+
+One can probably argue for either case. The issue would be of more interest if
+deal.II had the ability to use anisotropic finite elements, i.e. ones that use
+different polynomial degrees in different spatial directions, as they would be
+able to exploit the directionally variable smoothness much better. Alas, this
+capability does not exist at the time of writing this tutorial program.
+
+Either way, because we only have isotopic finite element classes, we adopt the
+viewpoint that we should tailor the polynomial degree to the lowest amount of
+regularity, in order to keep numerical efforts low. Consequently, instead of
+using the formula
+ at f[
+	\mu =
+	\frac 1{\left(\sum_{{\bf k}, |{\bf k}|\le N} 1\right)
+                \left(\sum_{{\bf k}, |{\bf k}|\le N} (\ln |{\bf k}|)^2\right)
+		-\left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}|\right)^2}
+	\left[
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |{\bf k}|\right)
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}|\right)
+	  -
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} 1\right)
+	  \left(\sum_{{\bf k}, |{\bf k}|\le N} \ln |\hat U_{{\bf k}}| \ln |{\bf k}| \right)
+	\right].
+ at f]
+to calculate $\mu$ as shown above, we have to slightly modify all sums:
+instead of summing over all Fourier modes, we only sum over those for which
+the Fourier coefficient is the largest one among all $\hat U_{{\bf k}}$ with
+the same magnitude $|{\bf k}|$, i.e. all sums above have to replaced by the
+following sums:
+ at f[
+  \sum_{{\bf k}, |{\bf k}|\le N}
+  \longrightarrow
+  \sum_{\begin{matrix}{{\bf k}, |{\bf k}|\le N} \\ {|\hat U_{{\bf k}}| \ge |\hat U_{{\bf k}'}|
+  \ \textrm{for all}\ {\bf k}'\ \textrm{with}\ |{\bf k}'|=|{\bf k}|}\end{matrix}}
+ at f]
+This is the form we will implement in the program.
+
+
+<h4>Questions about cell sizes</h4>
+
+One may ask whether it is a problem that we only compute the Fourier transform
+on the <i>reference cell</i> (rather than the real cell) of the
+solution. After all, we stretch the solution by a factor $\frac 1h$ during the
+transformation, thereby shifting the Fourier frequencies by a factor of
+$h$. This is of particular concern since we may have neighboring cells with
+mesh sizes $h$ that differ by a factor of 2 if one of them is more refined
+than the other. The concern is also motivated by the fact that, as we will see
+in the results section below, the estimated smoothness of the solution should
+be a more or less continuous function, but exhibits jumps at locations where
+the mesh size jumps. It therefore seems natural to ask whether we have to
+compensate for the transformation.
+
+The short answer is "no". In the process outlined above, we attempt to find
+coefficients $\beta,\mu$ that minimize the sum of squares of the terms
+ at f[
+	\ln |\hat U_{{\bf k}}| - \beta + \mu \ln |{\bf k}|.
+ at f]
+To compensate for the transformation means not attempting to fit a decay
+$|{\bf k}|^\mu$ with respect to the Fourier frequencies ${\bf k}$ <i>on the unit
+cell</i>, but to fit the coefficients $\hat U_{{\bf k}}$ computed on the
+reference cell <i>to the Fourier frequencies on the real cell $|\vec
+k|h$</i>, where $h$ is the norm of the transformation operator (i.e. something
+like the diameter of the cell). In other words, we would have to minimize the
+sum of squares of the terms
+ at f[
+	\ln |\hat U_{{\bf k}}| - \beta + \mu \ln (|{\bf k}|h).
+ at f]
+instead. However, using fundamental properties of the logarithm, this is
+simply equivalent to minimizing
+ at f[
+	\ln |\hat U_{{\bf k}}| - (\beta - \mu \ln h) + \mu \ln (|{\bf k}|).
+ at f]
+In other words, this and the original least squares problem will produce the
+same best-fit exponent $\mu$, though the offset will in one case be $\beta$
+and in the other $\beta-\mu \ln h$. However, since we are not interested in
+the offset at all but only in the exponent, it doesn't matter whether we scale
+Fourier frequencies in order to account for mesh size effects or not, the
+estimated smoothness exponent will be the same in either case.
+
+
+
+<h3>Complications with linear systems for hp discretizations</h3>
+
+<h4>Creating the sparsity pattern</h4>
+
+One of the problems with $hp$ methods is that the high polynomial degree of
+shape functions together with the large number of constrained degrees of
+freedom leads to matrices with large numbers of nonzero entries in some
+rows. At the same time, because there are areas where we use low polynomial
+degree and consequently matrix rows with relatively few nonzero
+entries. Consequently, allocating the sparsity pattern for these matrices is a
+challenge.
+
+Most programs built on deal.II use the DoFTools::make_sparsity_pattern
+function to allocate the sparsity pattern of a matrix, and later add a few
+more entries necessary to handle constrained degrees of freedom using
+ConstraintMatrix::condense. The sparsity pattern is then compressed using
+SparsityPattern::compress. This method is explained in step-6 and used in
+most tutorial programs. In order to work, it needs an initial upper estimate
+for the maximal number of nonzero entries per row, something that can be had
+from the DoFHandler::max_couplings_between_dofs function. This is necessary
+due to the data structure used in the SparsityPattern class.
+
+Unfortunately, DoFHandler::max_couplings_between_dofs is unable to produce an
+efficient upper estimate in 3d and for higher order elements. If used in these
+situations, it therefore leads the SparsityPattern class to allocate much too
+much memory, almost all of which will be released again when we call
+SparsityPattern::compress. This deficiency, caused by the fact that
+DoFHandler::max_couplings_between_dofs must produce a single number for the
+maximal number of elements per row even though most rows will be significantly
+shorter, can be so severe that the initial memory allocation for the
+SparsityPattern exceeds the actual need by a factor of 10 or larger, and can
+lead to a program running out of memory when in fact there would be plenty of
+memory for all computations.
+
+A solution to the problem has already been discussed in step-11
+and step-18. It used an intermediate object of type
+CompressedSparsityPattern. This class uses a different memory storage scheme
+that is optimized to <i>creating</i> a sparsity pattern when maximal numbers
+of entries per row are not accurately available, but is unsuitable for use as
+the sparsity pattern actually underlying a sparse matrix. After building the
+intermediate object, it is therefore copied into a true SparsityPattern
+object, something that can be done very efficient and without having to
+over-allocate memory. Typical code doing this is shown in the documentation of
+the CompressedSparsityPattern class. This solution is slower than directly
+building a SparsityPattern object, but only uses as much memory as is really
+necessary.
+
+As it now turns out, the storage format used in the
+CompressedSparsityPattern class is not very good for matrices with
+truly large numbers of entries per row — where truly large
+numbers mean in the hundreds. This isn't typically the case for lower
+order elements even in 3d, but happens for high order elements in 3d;
+for example, a vertex degree of freedom of a $Q_5$ element in 3d may
+couple to as many as 1700 other degrees of freedom. In such a case
+CompressedSparsityPattern will work, but by tuning the memory storage
+format used internally in that class a bit will make it work several
+times faster. This is what we did with the
+CompressedSetSparsityPattern class — it has exactly the same
+interface as the CompressedSparsityPattern class but internally stores
+things somewhat differently. For most cases, there is not much of a
+difference in performance in the classes (though the old class has a
+slight advantage for lower order elements in 3d), but for high order
+and $hp$ elements in 3d, the CompressedSetSparsityPattern has a
+definite edge. We will therefore use it later when we build the
+sparsity pattern in this tutorial program.
+
+
+<h4>Eliminating constrained degrees of freedom</h4>
+
+A second problem particular to $hp$ methods arises because we have so
+many constrained degrees of freedom: typically up to about one third
+of all degrees of freedom (in 3d) are constrained because they either
+belong to cells with hanging nodes or because they are on cells
+adjacent to cells with a higher or lower polynomial degree. This is,
+in fact, not much more than the fraction of constrained degrees of
+freedom in non-$hp$ mode, but the difference is that each constrained
+hanging node is constrained not only against the two adjacent degrees
+of freedom, but is constrained against many more degrees of freedom.
+
+It turns out that the strategy presented first in step-6 to eliminate the
+constraints while computing the element matrices and vectors with
+ConstraintMatrix::distribute_local_to_global is the most efficient approach
+also for this case. The alternative strategy to first build the matrix without
+constraints and then "condensing" away constrained degrees of freedom is
+considerably more expensive. It turns out that building the sparsity pattern
+by this inefficient algorithm requires at least ${\cal O}(N \log N)$ in the
+number of unknowns, whereas an ideal finite element program would of course
+only have algorithms that are linear in the number of unknowns. Timing the
+sparsity pattern creation as well as the matrix assembly shows that the
+algorithm presented in step-6 (and used in the code below) is indeed faster.
+
+In our program, we will also treat the boundary conditions as (possibly
+inhomogeneous) constraints and eliminate the matrix rows and columns to
+those as well. All we have to do for this is to call the function that
+interpolates the Dirichlet boundary conditions already in the setup phase in
+order to tell the ConstraintMatrix object about them, and then do the
+transfer from local to global data on matrix and vector simultaneously. This
+is exactly what we've shown in step-6.
+
+
+<h3>The test case</h3>
+
+The test case we will solve with this program is a re-take of the one we
+already look at in step-14: we solve the Laplace equation
+ at f[
+	-\Delta u = f
+ at f]
+in 2d, with $f=(x+1)(y+1)$, and with zero Dirichlet boundary values for
+$u$. We do so on the domain $[-1,1]^2\backslash[-\frac 12,\frac 12]^2$, i.e. a
+square with a square hole in the middle.
+
+The difference to step-14 is of course that we use $hp$ finite
+elements for the solution. The testcase is of interest because it has
+re-entrant corners in the corners of the hole, at which the solution has
+singularities. We therefore expect that the solution will be smooth in the
+interior of the domain, and rough in the vicinity of the singularities. The
+hope is that our refinement and smoothness indicators will be able to see this
+behavior and refine the mesh close to the singularities, while the polynomial
+degree is increased away from it. As we will see in the results section, this
+is indeed the case.
diff --git a/examples/step-27/doc/kind b/examples/step-27/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-27/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-27/doc/results.dox b/examples/step-27/doc/results.dox
new file mode 100644
index 0000000..bc82a70
--- /dev/null
+++ b/examples/step-27/doc/results.dox
@@ -0,0 +1,144 @@
+<h1>Results</h1>
+
+In this section, we discuss a few results produced from running the
+current tutorial program. More results, in particular the extension to
+3d calculations and determining how much compute time the individual
+components of the program take, are given in the @ref hp_paper .
+
+When run, this is what the program produces:
+
+ at code
+examples/\step-27> make run
+============================ Running \step-27
+Cycle 0:
+   Number of active cells:       768
+   Number of degrees of freedom: 3264
+   Number of constraints       : 384
+Cycle 1:
+   Number of active cells:       996
+   Number of degrees of freedom: 5327
+   Number of constraints       : 962
+Cycle 2:
+   Number of active cells:       1335
+   Number of degrees of freedom: 8947
+   Number of constraints       : 2056
+Cycle 3:
+   Number of active cells:       1626
+   Number of degrees of freedom: 12994
+   Number of constraints       : 3355
+Cycle 4:
+   Number of active cells:       1911
+   Number of degrees of freedom: 17988
+   Number of constraints       : 4860
+Cycle 5:
+   Number of active cells:       2577
+   Number of degrees of freedom: 26936
+   Number of constraints       : 7074
+ at endcode
+
+The first thing we learn from this is that the number of constrained degrees
+of freedom is on the order of 20-25% of the total number of degrees of
+freedom, at least on the later grids when we have elements of relatively
+high order (in 3d, the fraction of constrained degrees of freedom can be up
+to 30%). This is, in fact, on the same order of magnitude as for non-$hp$
+discretizations. For example, in the last step of the step-6
+program, we have 18401 degrees of freedom, 4104 of which are
+constrained. The difference is that in the latter program, each constrained
+hanging node is constrained against only the two adjacent degrees of
+freedom, whereas in the $hp$ case, constrained nodes are constrained against
+many more degrees of freedom. Note also that the current program also
+includes nodes subject to Dirichlet boundary conditions in the list of
+constraints. In cycle 0, all the constraints are actually because of
+boundary conditions.
+
+Of maybe more interest is to look at the graphical output. First, here is the
+solution of the problem:
+
+<img src="http://www.dealii.org/images/steps/developer/step-27.solution.png" alt="">
+
+Secondly, let us look at the sequence of meshes generated:
+
+<table width="60%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.mesh-0.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.mesh-1.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.mesh-2.png" alt=""></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.mesh-3.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.mesh-4.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.mesh-5.png" alt=""></td>
+  </tr>
+</table>
+
+It is clearly visible how the mesh is refined near the corner singularities,
+as one would expect it. More interestingly, we should be curious to see the
+distribution of finite element polynomial degrees to these mesh cells:
+
+<table width="60%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.fe_degree-0.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.fe_degree-1.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.fe_degree-2.png" alt=""></td>
+  </tr>
+
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.fe_degree-3.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.fe_degree-4.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.fe_degree-5.png" alt=""></td>
+  </tr>
+</table>
+
+While this is certainly not a perfect arrangement, it does make some sense: we
+use low order elements close to boundaries and corners where regularity is
+low. On the other hand, higher order elements are used where (i) the error was
+at one point fairly large, i.e. mainly in the general area around the corner
+singularities and in the top right corner where the solution is large, and
+(ii) where the solution is smooth, i.e. far away from the boundary.
+
+This arrangement of polynomial degrees of course follows from our smoothness
+estimator. Here is the estimated smoothness of the solution, with blue colors
+indicating least smoothness and red indicating the smoothest areas:
+
+<table width="60%" align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.smoothness-0.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.smoothness-1.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.smoothness-2.png" alt=""></td>
+  </tr>
+
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.smoothness-3.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.smoothness-4.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-27.smoothness-5.png" alt=""></td>
+  </tr>
+</table>
+
+The first conclusion one can draw from these images is that apparently the
+estimated smoothness is a fairly stable quantity under mesh refinement: what
+we get on the coarsest mesh is pretty close to what we get on the finest mesh.
+It is also obvious that the smoothness estimates are independent of the actual
+size of the solution (see the picture of the solution above), as it should be.
+A point of larger concern, however, is that one realizes on closer inspection
+that the estimator we have overestimates the smoothness of the solution on
+cells with hanging nodes. This in turn leads to higher polynomial degrees in
+these areas, skewing the allocation of finite elements onto cells.
+
+We have no good explanation for this effect at the moment. One theory is that
+the numerical solution on cells with hanging nodes is, of course, constrained
+and therefore not entirely free to explore the function space to get close to
+the exact solution. This lack of degrees of freedom may manifest itself by
+yielding numerical solutions on these cells with suppressed oscillation,
+meaning a higher degree of smoothness. The estimator picks this signal up and
+the estimated smoothness overestimates the actual value. However, a definite
+answer to what is going on currently eludes the authors of this program.
+
+The bigger question is, of course, how to avoid this problem. Possibilities
+include estimating the smoothness not on single cells, but cell assemblies or
+patches surrounding each cell. It may also be possible to find simple
+correction factors for each cell depending on the number of constrained
+degrees of freedom it has. In either case, there are ample opportunities for
+further research on finding good $hp$ refinement criteria. On the other hand,
+the main point of the current program was to demonstrate using the $hp$
+technology in deal.II, which is unaffected by our use of a possible
+sub-optimal refinement criterion.
diff --git a/examples/step-27/doc/tooltip b/examples/step-27/doc/tooltip
new file mode 100644
index 0000000..ab90588
--- /dev/null
+++ b/examples/step-27/doc/tooltip
@@ -0,0 +1 @@
+hp-adaptive finite element methods.
diff --git a/examples/step-27/step-27.cc b/examples/step-27/step-27.cc
new file mode 100644
index 0000000..0adfe42
--- /dev/null
+++ b/examples/step-27/step-27.cc
@@ -0,0 +1,880 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2006 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, Texas A&M University, 2006, 2007
+ */
+
+
+// @sect3{Include files}
+
+// The first few files have already been covered in previous examples and will
+// thus not be further commented on.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// These are the new files we need. The first one provides an alternative to
+// the usual SparsityPattern class and the DynamicSparsityPattern class
+// already discussed in step-11 and step-18. The last two provide <i>hp</i>
+// versions of the DoFHandler and FEValues classes as described in the
+// introduction of this program.
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/hp/fe_values.h>
+
+// The last set of include files are standard C++ headers. We need support for
+// complex numbers when we compute the Fourier transform.
+#include <fstream>
+#include <iostream>
+#include <complex>
+
+
+// Finally, this is as in previous programs:
+namespace Step27
+{
+  using namespace dealii;
+
+
+  // @sect3{The main class}
+
+  // The main class of this program looks very much like the one already used
+  // in the first few tutorial programs, for example the one in step-6. The
+  // main difference is that we have merged the refine_grid and output_results
+  // functions into one since we will also want to output some of the
+  // quantities used in deciding how to refine the mesh (in particular the
+  // estimated smoothness of the solution). There is also a function that
+  // computes this estimated smoothness, as discussed in the introduction.
+  //
+  // As far as member variables are concerned, we use the same structure as
+  // already used in step-6, but instead of a regular DoFHandler we use an
+  // object of type hp::DoFHandler, and we need collections instead of
+  // individual finite element, quadrature, and face quadrature objects. We
+  // will fill these collections in the constructor of the class. The last
+  // variable, <code>max_degree</code>, indicates the maximal polynomial
+  // degree of shape functions used.
+  template <int dim>
+  class LaplaceProblem
+  {
+  public:
+    LaplaceProblem ();
+    ~LaplaceProblem ();
+
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    void solve ();
+    void create_coarse_grid ();
+    void estimate_smoothness (Vector<float> &smoothness_indicators) const;
+    void postprocess (const unsigned int cycle);
+
+    Triangulation<dim>   triangulation;
+
+    hp::DoFHandler<dim>      dof_handler;
+    hp::FECollection<dim>    fe_collection;
+    hp::QCollection<dim>     quadrature_collection;
+    hp::QCollection<dim-1>   face_quadrature_collection;
+
+    ConstraintMatrix     constraints;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+
+    Vector<double>       solution;
+    Vector<double>       system_rhs;
+
+    const unsigned int max_degree;
+  };
+
+
+
+  // @sect3{Equation data}
+  //
+  // Next, let us define the right hand side function for this problem. It is
+  // $x+1$ in 1d, $(x+1)(y+1)$ in 2d, and so on.
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim> () {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component) const;
+  };
+
+
+  template <int dim>
+  double
+  RightHandSide<dim>::value (const Point<dim>   &p,
+                             const unsigned int  /*component*/) const
+  {
+    double product = 1;
+    for (unsigned int d=0; d<dim; ++d)
+      product *= (p[d]+1);
+    return product;
+  }
+
+
+
+
+  // @sect3{Implementation of the main class}
+
+  // @sect4{LaplaceProblem::LaplaceProblem}
+
+  // The constructor of this class is fairly straightforward. It associates
+  // the hp::DoFHandler object with the triangulation, and then sets the
+  // maximal polynomial degree to 7 (in 1d and 2d) or 5 (in 3d and higher). We
+  // do so because using higher order polynomial degrees becomes prohibitively
+  // expensive, especially in higher space dimensions.
+  //
+  // Following this, we fill the collections of finite element, and cell and
+  // face quadrature objects. We start with quadratic elements, and each
+  // quadrature formula is chosen so that it is appropriate for the matching
+  // finite element in the hp::FECollection object.
+  template <int dim>
+  LaplaceProblem<dim>::LaplaceProblem ()
+    :
+    dof_handler (triangulation),
+    max_degree (dim <= 2 ? 7 : 5)
+  {
+    for (unsigned int degree=2; degree<=max_degree; ++degree)
+      {
+        fe_collection.push_back (FE_Q<dim>(degree));
+        quadrature_collection.push_back (QGauss<dim>(degree+1));
+        face_quadrature_collection.push_back (QGauss<dim-1>(degree+1));
+      }
+  }
+
+
+  // @sect4{LaplaceProblem::~LaplaceProblem}
+
+  // The destructor is unchanged from what we already did in step-6:
+  template <int dim>
+  LaplaceProblem<dim>::~LaplaceProblem ()
+  {
+    dof_handler.clear ();
+  }
+
+
+  // @sect4{LaplaceProblem::setup_system}
+  //
+  // This function is again a verbatim copy of what we already did in
+  // step-6. Despite function calls with exactly the same names and arguments,
+  // the algorithms used internally are different in some aspect since the
+  // dof_handler variable here is an hp object.
+  template <int dim>
+  void LaplaceProblem<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (fe_collection);
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             constraints);
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              ZeroFunction<dim>(),
+                                              constraints);
+    constraints.close ();
+
+    DynamicSparsityPattern dsp (dof_handler.n_dofs(),
+                                dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp, constraints, false);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+  }
+
+
+
+  // @sect4{LaplaceProblem::assemble_system}
+
+  // This is the function that assembles the global matrix and right hand side
+  // vector from the local contributions of each cell. Its main working is as
+  // has been described in many of the tutorial programs before. The
+  // significant deviations are the ones necessary for <i>hp</i> finite
+  // element methods. In particular, that we need to use a collection of
+  // FEValues object (implemented through the hp::FEValues class), and that we
+  // have to eliminate constrained degrees of freedom already when copying
+  // local contributions into global objects. Both of these are explained in
+  // detail in the introduction of this program.
+  //
+  // One other slight complication is the fact that because we use different
+  // polynomial degrees on different cells, the matrices and vectors holding
+  // local contributions do not have the same size on all cells. At the
+  // beginning of the loop over all cells, we therefore each time have to
+  // resize them to the correct size (given by
+  // <code>dofs_per_cell</code>). Because these classes are implement in such
+  // a way that reducing the size of a matrix or vector does not release the
+  // currently allocated memory (unless the new size is zero), the process of
+  // resizing at the beginning of the loop will only require re-allocation of
+  // memory during the first few iterations. Once we have found in a cell with
+  // the maximal finite element degree, no more re-allocations will happen
+  // because all subsequent <code>reinit</code> calls will only set the size
+  // to something that fits the currently allocated memory. This is important
+  // since allocating memory is expensive, and doing so every time we visit a
+  // new cell would take significant compute time.
+  template <int dim>
+  void LaplaceProblem<dim>::assemble_system ()
+  {
+    hp::FEValues<dim> hp_fe_values (fe_collection,
+                                    quadrature_collection,
+                                    update_values    |  update_gradients |
+                                    update_quadrature_points  |  update_JxW_values);
+
+    const RightHandSide<dim> rhs_function;
+
+    FullMatrix<double>   cell_matrix;
+    Vector<double>       cell_rhs;
+
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    typename hp::DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        const unsigned int   dofs_per_cell = cell->get_fe().dofs_per_cell;
+
+        cell_matrix.reinit (dofs_per_cell, dofs_per_cell);
+        cell_matrix = 0;
+
+        cell_rhs.reinit (dofs_per_cell);
+        cell_rhs = 0;
+
+        hp_fe_values.reinit (cell);
+
+        const FEValues<dim> &fe_values = hp_fe_values.get_present_fe_values ();
+
+        std::vector<double>  rhs_values (fe_values.n_quadrature_points);
+        rhs_function.value_list (fe_values.get_quadrature_points(),
+                                 rhs_values);
+
+        for (unsigned int q_point=0;
+             q_point<fe_values.n_quadrature_points;
+             ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                cell_matrix(i,j) += (fe_values.shape_grad(i,q_point) *
+                                     fe_values.shape_grad(j,q_point) *
+                                     fe_values.JxW(q_point));
+
+              cell_rhs(i) += (fe_values.shape_value(i,q_point) *
+                              rhs_values[q_point] *
+                              fe_values.JxW(q_point));
+            }
+
+        local_dof_indices.resize (dofs_per_cell);
+        cell->get_dof_indices (local_dof_indices);
+
+        constraints.distribute_local_to_global (cell_matrix, cell_rhs,
+                                                local_dof_indices,
+                                                system_matrix, system_rhs);
+      }
+  }
+
+
+
+  // @sect4{LaplaceProblem::solve}
+
+  // The function solving the linear system is entirely unchanged from
+  // previous examples. We simply try to reduce the initial residual (which
+  // equals the $l_2$ norm of the right hand side) by a certain factor:
+  template <int dim>
+  void LaplaceProblem<dim>::solve ()
+  {
+    SolverControl           solver_control (system_rhs.size(),
+                                            1e-8*system_rhs.l2_norm());
+    SolverCG<>              cg (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    cg.solve (system_matrix, solution, system_rhs,
+              preconditioner);
+
+    constraints.distribute (solution);
+  }
+
+
+
+  // @sect4{LaplaceProblem::postprocess}
+
+  // After solving the linear system, we will want to postprocess the
+  // solution. Here, all we do is to estimate the error, estimate the local
+  // smoothness of the solution as described in the introduction, then write
+  // graphical output, and finally refine the mesh in both $h$ and $p$
+  // according to the indicators computed before. We do all this in the same
+  // function because we want the estimated error and smoothness indicators
+  // not only for refinement, but also include them in the graphical output.
+  template <int dim>
+  void LaplaceProblem<dim>::postprocess (const unsigned int cycle)
+  {
+    // Let us start with computing estimated error and smoothness indicators,
+    // which each are one number for each active cell of our
+    // triangulation. For the error indicator, we use the KellyErrorEstimator
+    // class as always. Estimating the smoothness is done in the respective
+    // function of this class; that function is discussed further down below:
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        face_quadrature_collection,
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        estimated_error_per_cell);
+
+
+    Vector<float> smoothness_indicators (triangulation.n_active_cells());
+    estimate_smoothness (smoothness_indicators);
+
+    // Next we want to generate graphical output. In addition to the two
+    // estimated quantities derived above, we would also like to output the
+    // polynomial degree of the finite elements used on each of the elements
+    // on the mesh.
+    //
+    // The way to do that requires that we loop over all cells and poll the
+    // active finite element index of them using
+    // <code>cell-@>active_fe_index()</code>. We then use the result of this
+    // operation and query the finite element collection for the finite
+    // element with that index, and finally determine the polynomial degree of
+    // that element. The result we put into a vector with one element per
+    // cell. The DataOut class requires this to be a vector of
+    // <code>float</code> or <code>double</code>, even though our values are
+    // all integers, so that it what we use:
+    {
+      Vector<float> fe_degrees (triangulation.n_active_cells());
+      {
+        typename hp::DoFHandler<dim>::active_cell_iterator
+        cell = dof_handler.begin_active(),
+        endc = dof_handler.end();
+        for (; cell!=endc; ++cell)
+          fe_degrees(cell->active_cell_index())
+            = fe_collection[cell->active_fe_index()].degree;
+      }
+
+      // With now all data vectors available -- solution, estimated errors and
+      // smoothness indicators, and finite element degrees --, we create a
+      // DataOut object for graphical output and attach all data. Note that
+      // the DataOut class has a second template argument (which defaults to
+      // DoFHandler@<dim@>, which is why we have never seen it in previous
+      // tutorial programs) that indicates the type of DoF handler to be
+      // used. Here, we have to use the hp::DoFHandler class:
+      DataOut<dim,hp::DoFHandler<dim> > data_out;
+
+      data_out.attach_dof_handler (dof_handler);
+      data_out.add_data_vector (solution, "solution");
+      data_out.add_data_vector (estimated_error_per_cell, "error");
+      data_out.add_data_vector (smoothness_indicators, "smoothness");
+      data_out.add_data_vector (fe_degrees, "fe_degree");
+      data_out.build_patches ();
+
+      // The final step in generating output is to determine a file name, open
+      // the file, and write the data into it (here, we use VTK format):
+      const std::string filename = "solution-" +
+                                   Utilities::int_to_string (cycle, 2) +
+                                   ".vtk";
+      std::ofstream output (filename.c_str());
+      data_out.write_vtk (output);
+    }
+
+    // After this, we would like to actually refine the mesh, in both $h$ and
+    // $p$. The way we are going to do this is as follows: first, we use the
+    // estimated error to flag those cells for refinement that have the
+    // largest error. This is what we have always done:
+    {
+      GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                       estimated_error_per_cell,
+                                                       0.3, 0.03);
+
+      // Next we would like to figure out which of the cells that have been
+      // flagged for refinement should actually have $p$ increased instead of
+      // $h$ decreased. The strategy we choose here is that we look at the
+      // smoothness indicators of those cells that are flagged for refinement,
+      // and increase $p$ for those with a smoothness larger than a certain
+      // threshold. For this, we first have to determine the maximal and
+      // minimal values of the smoothness indicators of all flagged cells,
+      // which we do using a loop over all cells and comparing current minimal
+      // and maximal values. (We start with the minimal and maximal values of
+      // <i>all</i> cells, a range within which the minimal and maximal values
+      // on cells flagged for refinement must surely lie.) Absent any better
+      // strategies, we will then set the threshold above which will increase
+      // $p$ instead of reducing $h$ as the mean value between minimal and
+      // maximal smoothness indicators on cells flagged for refinement:
+      float max_smoothness = *std::min_element (smoothness_indicators.begin(),
+                                                smoothness_indicators.end()),
+                             min_smoothness = *std::max_element (smoothness_indicators.begin(),
+                                                                 smoothness_indicators.end());
+      {
+        typename hp::DoFHandler<dim>::active_cell_iterator
+        cell = dof_handler.begin_active(),
+        endc = dof_handler.end();
+        for (; cell!=endc; ++cell)
+          if (cell->refine_flag_set())
+            {
+              max_smoothness = std::max (max_smoothness,
+                                         smoothness_indicators(cell->active_cell_index()));
+              min_smoothness = std::min (min_smoothness,
+                                         smoothness_indicators(cell->active_cell_index()));
+            }
+      }
+      const float threshold_smoothness = (max_smoothness + min_smoothness) / 2;
+
+      // With this, we can go back, loop over all cells again, and for those
+      // cells for which (i) the refinement flag is set, (ii) the smoothness
+      // indicator is larger than the threshold, and (iii) we still have a
+      // finite element with a polynomial degree higher than the current one
+      // in the finite element collection, we then increase the polynomial
+      // degree and in return remove the flag indicating that the cell should
+      // undergo bisection. For all other cells, the refinement flags remain
+      // untouched:
+      {
+        typename hp::DoFHandler<dim>::active_cell_iterator
+        cell = dof_handler.begin_active(),
+        endc = dof_handler.end();
+        for (; cell!=endc; ++cell)
+          if (cell->refine_flag_set()
+              &&
+              (smoothness_indicators(cell->active_cell_index()) > threshold_smoothness)
+              &&
+              (cell->active_fe_index()+1 < fe_collection.size()))
+            {
+              cell->clear_refine_flag();
+              cell->set_active_fe_index (cell->active_fe_index() + 1);
+            }
+      }
+
+      // At the end of this procedure, we then refine the mesh. During this
+      // process, children of cells undergoing bisection inherit their mother
+      // cell's finite element index:
+      triangulation.execute_coarsening_and_refinement ();
+    }
+  }
+
+
+  // @sect4{LaplaceProblem::create_coarse_grid}
+
+  // The following function is used when creating the initial grid. It is a
+  // specialization for the 2d case, i.e. a corresponding function needs to be
+  // implemented if the program is run in anything other then 2d. The function
+  // is actually stolen from step-14 and generates the same mesh used already
+  // there, i.e. the square domain with the square hole in the middle. The
+  // meaning of the different parts of this function are explained in the
+  // documentation of step-14:
+  template <>
+  void LaplaceProblem<2>::create_coarse_grid ()
+  {
+    const unsigned int dim = 2;
+
+    static const Point<2> vertices_1[]
+      = {  Point<2> (-1.,   -1.),
+           Point<2> (-1./2, -1.),
+           Point<2> (0.,    -1.),
+           Point<2> (+1./2, -1.),
+           Point<2> (+1,    -1.),
+
+           Point<2> (-1.,   -1./2.),
+           Point<2> (-1./2, -1./2.),
+           Point<2> (0.,    -1./2.),
+           Point<2> (+1./2, -1./2.),
+           Point<2> (+1,    -1./2.),
+
+           Point<2> (-1.,   0.),
+           Point<2> (-1./2, 0.),
+           Point<2> (+1./2, 0.),
+           Point<2> (+1,    0.),
+
+           Point<2> (-1.,   1./2.),
+           Point<2> (-1./2, 1./2.),
+           Point<2> (0.,    1./2.),
+           Point<2> (+1./2, 1./2.),
+           Point<2> (+1,    1./2.),
+
+           Point<2> (-1.,   1.),
+           Point<2> (-1./2, 1.),
+           Point<2> (0.,    1.),
+           Point<2> (+1./2, 1.),
+           Point<2> (+1,    1.)
+        };
+    const unsigned int
+    n_vertices = sizeof(vertices_1) / sizeof(vertices_1[0]);
+    const std::vector<Point<dim> > vertices (&vertices_1[0],
+                                             &vertices_1[n_vertices]);
+    static const int cell_vertices[][GeometryInfo<dim>::vertices_per_cell]
+    = {{0, 1, 5, 6},
+      {1, 2, 6, 7},
+      {2, 3, 7, 8},
+      {3, 4, 8, 9},
+      {5, 6, 10, 11},
+      {8, 9, 12, 13},
+      {10, 11, 14, 15},
+      {12, 13, 17, 18},
+      {14, 15, 19, 20},
+      {15, 16, 20, 21},
+      {16, 17, 21, 22},
+      {17, 18, 22, 23}
+    };
+    const unsigned int
+    n_cells = sizeof(cell_vertices) / sizeof(cell_vertices[0]);
+
+    std::vector<CellData<dim> > cells (n_cells, CellData<dim>());
+    for (unsigned int i=0; i<n_cells; ++i)
+      {
+        for (unsigned int j=0;
+             j<GeometryInfo<dim>::vertices_per_cell;
+             ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      }
+
+    triangulation.create_triangulation (vertices,
+                                        cells,
+                                        SubCellData());
+    triangulation.refine_global (3);
+  }
+
+
+
+
+  // @sect4{LaplaceProblem::run}
+
+  // This function implements the logic of the program, as did the respective
+  // function in most of the previous programs already, see for example
+  // step-6.
+  //
+  // Basically, it contains the adaptive loop: in the first iteration create a
+  // coarse grid, and then set up the linear system, assemble it, solve, and
+  // postprocess the solution including mesh refinement. Then start over
+  // again. In the meantime, also output some information for those staring at
+  // the screen trying to figure out what the program does:
+  template <int dim>
+  void LaplaceProblem<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<6; ++cycle)
+      {
+        std::cout << "Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          create_coarse_grid ();
+
+        setup_system ();
+
+        std::cout << "   Number of active cells:       "
+                  << triangulation.n_active_cells()
+                  << std::endl
+                  << "   Number of degrees of freedom: "
+                  << dof_handler.n_dofs()
+                  << std::endl
+                  << "   Number of constraints       : "
+                  << constraints.n_constraints()
+                  << std::endl;
+
+        assemble_system ();
+        solve ();
+        postprocess (cycle);
+      }
+  }
+
+
+  // @sect4{LaplaceProblem::estimate_smoothness}
+
+  // This last function of significance implements the algorithm to estimate
+  // the smoothness exponent using the algorithms explained in detail in the
+  // introduction. We will therefore only comment on those points that are of
+  // implementational importance.
+  template <int dim>
+  void
+  LaplaceProblem<dim>::
+  estimate_smoothness (Vector<float> &smoothness_indicators) const
+  {
+    // The first thing we need to do is to define the Fourier vectors ${\bf
+    // k}$ for which we want to compute Fourier coefficients of the solution
+    // on each cell. In 2d, we pick those vectors ${\bf k}=(\pi i, \pi j)^T$
+    // for which $\sqrt{i^2+j^2}\le N$, with $i,j$ integers and $N$ being the
+    // maximal polynomial degree we use for the finite elements in this
+    // program. The 3d case is handled analogously. 1d and dimensions higher
+    // than 3 are not implemented, and we guard our implementation by making
+    // sure that we receive an exception in case someone tries to compile the
+    // program for any of these dimensions.
+    //
+    // We exclude ${\bf k}=0$ to avoid problems computing $|{\bf k}|^{-mu}$
+    // and $\ln |{\bf k}|$. The other vectors are stored in the field
+    // <code>k_vectors</code>. In addition, we store the square of the
+    // magnitude of each of these vectors (up to a factor $\pi^2$) in the
+    // <code>k_vectors_magnitude</code> array -- we will need that when we
+    // attempt to find out which of those Fourier coefficients corresponding
+    // to Fourier vectors of the same magnitude is the largest:
+    const unsigned int N = max_degree;
+
+    std::vector<Tensor<1,dim> > k_vectors;
+    std::vector<unsigned int>   k_vectors_magnitude;
+    switch (dim)
+      {
+      case 2:
+      {
+        for (unsigned int i=0; i<N; ++i)
+          for (unsigned int j=0; j<N; ++j)
+            if (!((i==0) && (j==0))
+                &&
+                (i*i + j*j < N*N))
+              {
+                k_vectors.push_back (Point<dim>(numbers::PI * i,
+                                                numbers::PI * j));
+                k_vectors_magnitude.push_back (i*i+j*j);
+              }
+
+        break;
+      }
+
+      case 3:
+      {
+        for (unsigned int i=0; i<N; ++i)
+          for (unsigned int j=0; j<N; ++j)
+            for (unsigned int k=0; k<N; ++k)
+              if (!((i==0) && (j==0) && (k==0))
+                  &&
+                  (i*i + j*j + k*k < N*N))
+                {
+                  k_vectors.push_back (Point<dim>(numbers::PI * i,
+                                                  numbers::PI * j,
+                                                  numbers::PI * k));
+                  k_vectors_magnitude.push_back (i*i+j*j+k*k);
+                }
+
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // After we have set up the Fourier vectors, we also store their total
+    // number for simplicity, and compute the logarithm of the magnitude of
+    // each of these vectors since we will need it many times over further
+    // down below:
+    const unsigned n_fourier_modes = k_vectors.size();
+    std::vector<double> ln_k (n_fourier_modes);
+    for (unsigned int i=0; i<n_fourier_modes; ++i)
+      ln_k[i] = std::log (k_vectors[i].norm());
+
+
+    // Next, we need to assemble the matrices that do the Fourier transforms
+    // for each of the finite elements we deal with, i.e. the matrices ${\cal
+    // F}_{{\bf k},j}$ defined in the introduction. We have to do that for
+    // each of the finite elements in use. Note that these matrices are
+    // complex-valued, so we can't use the FullMatrix class. Instead, we use
+    // the Table class template.
+    std::vector<Table<2,std::complex<double> > >
+    fourier_transform_matrices (fe_collection.size());
+
+    // In order to compute them, we of course can't perform the Fourier
+    // transform analytically, but have to approximate it using quadrature. To
+    // this end, we use a quadrature formula that is obtained by iterating a
+    // 2-point Gauss formula as many times as the maximal exponent we use for
+    // the term $e^{i{\bf k}\cdot{\bf x}}$:
+    QGauss<1>      base_quadrature (2);
+    QIterated<dim> quadrature (base_quadrature, N);
+
+    // With this, we then loop over all finite elements in use, reinitialize
+    // the respective matrix ${\cal F}$ to the right size, and integrate each
+    // entry of the matrix numerically as ${\cal F}_{{\bf k},j}=\sum_q
+    // e^{i{\bf k}\cdot {\bf x}}\varphi_j({\bf x}_q) w_q$, where $x_q$ are the
+    // quadrature points and $w_q$ are the quadrature weights. Note that the
+    // imaginary unit $i=\sqrt{-1}$ is obtained from the standard C++ classes
+    // using <code>std::complex@<double@>(0,1)</code>.
+
+    // Because we work on the unit cell, we can do all this work without a
+    // mapping from reference to real cell and consequently do not need the
+    // FEValues class.
+    for (unsigned int fe=0; fe<fe_collection.size(); ++fe)
+      {
+        fourier_transform_matrices[fe].reinit (n_fourier_modes,
+                                               fe_collection[fe].dofs_per_cell);
+
+        for (unsigned int k=0; k<n_fourier_modes; ++k)
+          for (unsigned int j=0; j<fe_collection[fe].dofs_per_cell; ++j)
+            {
+              std::complex<double> sum = 0;
+              for (unsigned int q=0; q<quadrature.size(); ++q)
+                {
+                  const Point<dim> x_q = quadrature.point(q);
+                  sum += std::exp(std::complex<double>(0,1) *
+                                  (k_vectors[k] * x_q)) *
+                         fe_collection[fe].shape_value(j,x_q) *
+                         quadrature.weight(q);
+                }
+              fourier_transform_matrices[fe](k,j)
+                = sum / std::pow(2*numbers::PI, 1.*dim/2);
+            }
+      }
+
+    // The next thing is to loop over all cells and do our work there, i.e. to
+    // locally do the Fourier transform and estimate the decay coefficient. We
+    // will use the following two arrays as scratch arrays in the loop and
+    // allocate them here to avoid repeated memory allocations:
+    std::vector<std::complex<double> > fourier_coefficients (n_fourier_modes);
+    Vector<double>                     local_dof_values;
+
+    // Then here is the loop:
+    typename hp::DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        // Inside the loop, we first need to get the values of the local
+        // degrees of freedom (which we put into the
+        // <code>local_dof_values</code> array after setting it to the right
+        // size) and then need to compute the Fourier transform by multiplying
+        // this vector with the matrix ${\cal F}$ corresponding to this finite
+        // element. We need to write out the multiplication by hand because
+        // the objects holding the data do not have <code>vmult</code>-like
+        // functions declared:
+        local_dof_values.reinit (cell->get_fe().dofs_per_cell);
+        cell->get_dof_values (solution, local_dof_values);
+
+        for (unsigned int f=0; f<n_fourier_modes; ++f)
+          {
+            fourier_coefficients[f] = 0;
+
+            for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+              fourier_coefficients[f] +=
+                fourier_transform_matrices[cell->active_fe_index()](f,i)
+                *
+                local_dof_values(i);
+          }
+
+        // The next thing, as explained in the introduction, is that we wanted
+        // to only fit our exponential decay of Fourier coefficients to the
+        // largest coefficients for each possible value of $|{\bf k}|$. To
+        // this end, we create a map that for each magnitude $|{\bf k}|$
+        // stores the largest $|\hat U_{{\bf k}}|$ found so far, i.e. we
+        // overwrite the existing value (or add it to the map) if no value for
+        // the current $|{\bf k}|$ exists yet, or if the current value is
+        // larger than the previously stored one:
+        std::map<unsigned int, double> k_to_max_U_map;
+        for (unsigned int f=0; f<n_fourier_modes; ++f)
+          if ((k_to_max_U_map.find (k_vectors_magnitude[f]) ==
+               k_to_max_U_map.end())
+              ||
+              (k_to_max_U_map[k_vectors_magnitude[f]] <
+               std::abs (fourier_coefficients[f])))
+            k_to_max_U_map[k_vectors_magnitude[f]]
+              = std::abs (fourier_coefficients[f]);
+        // Note that it comes in handy here that we have stored the magnitudes
+        // of vectors as integers, since this way we do not have to deal with
+        // round-off-sized differences between different values of $|{\bf
+        // k}|$.
+
+        // As the final task, we have to calculate the various contributions
+        // to the formula for $\mu$. We'll only take those Fourier
+        // coefficients with the largest magnitude for a given value of $|{\bf
+        // k}|$ as explained above:
+        double  sum_1           = 0,
+                sum_ln_k        = 0,
+                sum_ln_k_square = 0,
+                sum_ln_U        = 0,
+                sum_ln_U_ln_k   = 0;
+        for (unsigned int f=0; f<n_fourier_modes; ++f)
+          if (k_to_max_U_map[k_vectors_magnitude[f]] ==
+              std::abs (fourier_coefficients[f]))
+            {
+              sum_1 += 1;
+              sum_ln_k += ln_k[f];
+              sum_ln_k_square += ln_k[f]*ln_k[f];
+              sum_ln_U += std::log (std::abs (fourier_coefficients[f]));
+              sum_ln_U_ln_k += std::log (std::abs (fourier_coefficients[f])) *
+                               ln_k[f];
+            }
+
+        // With these so-computed sums, we can now evaluate the formula for
+        // $\mu$ derived in the introduction:
+        const double mu
+          = (1./(sum_1*sum_ln_k_square - sum_ln_k*sum_ln_k)
+             *
+             (sum_ln_k*sum_ln_U - sum_1*sum_ln_U_ln_k));
+
+        // The final step is to compute the Sobolev index $s=\mu-\frac d2$ and
+        // store it in the vector of estimated values for each cell:
+        smoothness_indicators(cell->active_cell_index()) = mu - 1.*dim/2;
+      }
+  }
+}
+
+
+// @sect3{The main function}
+
+// The main function is again verbatim what we had before: wrap creating and
+// running an object of the main class into a <code>try</code> block and catch
+// whatever exceptions are thrown, thereby producing meaningful output if
+// anything should go wrong:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step27;
+
+      LaplaceProblem<2> laplace_problem;
+      laplace_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-28/CMakeLists.txt b/examples/step-28/CMakeLists.txt
new file mode 100644
index 0000000..725218e
--- /dev/null
+++ b/examples/step-28/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-28 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-28")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-28/doc/builds-on b/examples/step-28/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-28/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-28/doc/data-q1 b/examples/step-28/doc/data-q1
new file mode 100644
index 0000000..c06f6d7
--- /dev/null
+++ b/examples/step-28/doc/data-q1
@@ -0,0 +1,10 @@
+   0 2450  0.907410259645 4.077380000000
+   1 4289  0.907042395776 10.572393000000
+   2 8225  0.906937183449 25.223165000000
+   3 14757  0.906900660844 53.679839000000
+   4 26236  0.906886610239 108.345529000000
+   5 62804  0.906857043552 253.919398000000
+   6 86389  0.906844968762 466.115140000000
+   7 165629  0.906839125266 900.314131000000
+   8 301264  0.906836698748 1776.255968000000
+   9 444166  0.906835969493 3026.378920000000
diff --git a/examples/step-28/doc/data-q2 b/examples/step-28/doc/data-q2
new file mode 100644
index 0000000..df6c0e0
--- /dev/null
+++ b/examples/step-28/doc/data-q2
@@ -0,0 +1,10 @@
+   0 9522  0.906841960371 17.634319000000
+   1 15428  0.906837901031 33.703876000000
+   2 23566  0.906836075928 65.945975000000
+   3 33930  0.906835500110 120.341705000000
+   4 60358  0.906835001796 246.337551000000
+   5 86798  0.906834858174 403.059725000000
+   6 98538  0.906834824060 545.820023000000
+   7 117254  0.906834787555 734.265375000000
+   8 147302  0.906834761604 1014.816724000000
+   9 194442  0.906834746216 1366.371280000000
diff --git a/examples/step-28/doc/data-q3 b/examples/step-28/doc/data-q3
new file mode 100644
index 0000000..4b9d506
--- /dev/null
+++ b/examples/step-28/doc/data-q3
@@ -0,0 +1,10 @@
+   0 21218  0.906835491999 91.037161000000
+   1 34067  0.906834908620 131.201055000000
+   2 35069  0.906834859254 187.733460000000
+   3 50476  0.906834817384 279.669484000000
+   4 73375  0.906834783106 408.506898000000
+   5 90529  0.906834744225 590.097292000000
+   6 114417  0.906834730885 852.591387000000
+   7 151059  0.906834726355 1161.735390000000
+   8 215292  0.906834723745 1576.734300000000
+   9 275614  0.906834722268 2160.737518000000
diff --git a/examples/step-28/doc/data-q4 b/examples/step-28/doc/data-q4
new file mode 100644
index 0000000..15a4abe
--- /dev/null
+++ b/examples/step-28/doc/data-q4
@@ -0,0 +1,10 @@
+   0 37538  0.906834753543 373.753181000000
+   1 64160  0.906834728274 517.920264000000
+   2 88822  0.906834726164 1138.467927000000
+   3 157266  0.906834722755 2460.279981000000
+   4 175774  0.906834722205 4364.633475000000
+   5 227672  0.906834721746 4964.747243999999
+   6 264428  0.906834721654 5461.831674999999
+   7 328106  0.906834721515 6010.156317000000
+   8 395036  0.906834721416 12474.065654000000
+   9 455498  0.906834721380 19863.514286999998
diff --git a/examples/step-28/doc/data-q5 b/examples/step-28/doc/data-q5
new file mode 100644
index 0000000..c4aa83f
--- /dev/null
+++ b/examples/step-28/doc/data-q5
@@ -0,0 +1,10 @@
+   0 58482  0.906834731399 1121.873449000000
+   1 97921  0.906834723230 1409.121780000000
+   2 98655  0.906834723009 2989.811479000000
+   3 131934  0.906834722626 3469.306585000000
+   4 188259  0.906834722379 4011.864103000000
+   5 255493  0.906834721986 4578.781919000000
+   6 271824  0.906834721755 10632.535608000000
+   7 307409  0.906834721567 11699.187453000000
+   8 316691  0.906834721509 12528.627359000000
+   9 363748  0.906834721439 13773.087172000000
diff --git a/examples/step-28/doc/data-q6 b/examples/step-28/doc/data-q6
new file mode 100644
index 0000000..4c5094d
--- /dev/null
+++ b/examples/step-28/doc/data-q6
@@ -0,0 +1,15 @@
+   0 84050  0.906834722842531 3184.047951000000012
+   1 140828  0.906834721670094 3965.101212999999916
+   2 158782  0.906834721605666 8725.235561999999845
+   3 230198  0.906834721530541 16501.528386000001774
+   4 320258  0.906834721476372 18215.300853000000643
+   5 385470  0.906834721418083 21774.706741000001784
+   6 438090  0.906834721389658 25271.607131000000663
+   7 462942  0.906834721380828 27951.595711000001756
+   8 502434  0.906834721371133 33127.243892999998934
+   9 522814  0.906834721367467 36477.579565000000002
+   10 552538  0.906834721365696 39634.395656000000599
+   11 570310  0.906834721364977 43278.281700999999885
+   12 593214  0.906834721364745 47564.244135000000824
+   13 654882  0.906834721363022 53225.507492000004277
+   14 716338  0.906834721360716 60232.646245000003546
diff --git a/examples/step-28/doc/gnuplot.1 b/examples/step-28/doc/gnuplot.1
new file mode 100644
index 0000000..42a47f1
--- /dev/null
+++ b/examples/step-28/doc/gnuplot.1
@@ -0,0 +1,26 @@
+set term png
+set logsc xy
+set da sty lp
+
+set ylabel "Error"
+
+set output "error-vs-dofs.png"
+set xlabel "Degrees of freedom"
+
+pl "data-q1" us 2:(abs($3-0.906834721360716)) title "Q1 elements", \
+   "data-q2" us 2:(abs($3-0.906834721360716)) title "Q2 elements", \
+   "data-q3" us 2:(abs($3-0.906834721360716)) title "Q3 elements", \
+   "data-q4" us 2:(abs($3-0.906834721360716)) title "Q4 elements", \
+   "data-q5" us 2:(abs($3-0.906834721360716)) title "Q5 elements", \
+   "data-q6" us 2:(abs($3-0.906834721360716)) title "Q6 elements"
+
+
+set output "error-vs-time.png"
+set xlabel "Time in seconds"
+
+pl "data-q1" us 4:(abs($3-0.906834721360716)) title "Q1 elements", \
+   "data-q2" us 4:(abs($3-0.906834721360716)) title "Q2 elements", \
+   "data-q3" us 4:(abs($3-0.906834721360716)) title "Q3 elements", \
+   "data-q4" us 4:(abs($3-0.906834721360716)) title "Q4 elements", \
+   "data-q5" us 4:(abs($3-0.906834721360716)) title "Q5 elements", \
+   "data-q6" us 4:(abs($3-0.906834721360716)) title "Q6 elements"
diff --git a/examples/step-28/doc/intro.dox b/examples/step-28/doc/intro.dox
new file mode 100644
index 0000000..a9969b3
--- /dev/null
+++ b/examples/step-28/doc/intro.dox
@@ -0,0 +1,645 @@
+<br>
+
+<i>This program was contributed by Yaqi Wang and Wolfgang
+Bangerth. Results from this program are used and discussed in the publication
+"Three-dimensional $h$-adaptivity for the multigroup neutron diffusion
+equations" by Yaqi Wang, Wolfgang Bangerth and Jean Ragusa. The paper's full
+bibliographic details are as follows:
+ at code
+ at Article{WBR09,
+  author = 	 {Yaqi Wang and Wolfgang Bangerth and Jean Ragusa},
+  title = 	 {Three-dimensional $h$-adaptivity for the multigroup
+                  neutron diffusion equations},
+  journal = 	 {Progr. Nucl. Energy},
+  year = 	 2009,
+  volume = 	 51,
+  pages = 	 {543--555}
+}
+ at endcode
+A preprint that mostly matches the final version of the paper is
+available <a target="_top"
+href="http://iamcs.tamu.edu/file_dl.php?type=preprint&preprint_id=19">here</a>.
+</i>
+
+<br>
+
+
+<a name="Intro"></a> <h1>Introduction</h1>
+
+
+
+In this example, we intend to solve the multigroup diffusion approximation of
+the neutron transport equation. Essentially, the way to view this is as follows: In a
+nuclear reactor, neutrons are speeding around at different energies, get
+absorbed or scattered, or start a new fission
+event. If viewed at long enough length scales, the movement of neutrons can be
+considered a diffusion process.
+
+A mathematical description of this would group neutrons into energy bins, and
+consider the balance equations for the neutron fluxes in each of these
+bins, or energy groups. The scattering, absorption, and fission events would
+then be operators within the diffusion equation describing the neutron
+fluxes. Assume we have energy groups $g=1,\ldots,G$, where by convention we
+assume that the neutrons with the highest energy are in group 1 and those with
+the lowest energy in group $G$. Then the neutron flux of each group satisfies the
+following equations:
+ at f{eqnarray*}
+\frac 1{v_g}\frac{\partial \phi_g(x,t)}{\partial t}
+&=&
+\nabla \cdot(D_g(x) \nabla \phi_g(x,t))
+-
+\Sigma_{r,g}(x)\phi_g(x,t)
+\\
+&& \qquad
++
+\chi_g\sum_{g'=1}^G\nu\Sigma_{f,g'}(x)\phi_{g'}(x,t)
++
+\sum_{g'\ne g}\Sigma_{s,g'\to g}(x)\phi_{g'}(x,t)
++
+s_{\mathrm{ext},g}(x,t)
+ at f}
+augmented by appropriate boundary conditions. Here, $v_g$ is the velocity of
+neutrons within group $g$. In other words, the change in
+time in flux of neutrons in group $g$ is governed by the following
+processes:
+<ul>
+<li> Diffusion $\nabla \cdot(D_g(x) \nabla \phi_g(x,t))$. Here, $D_g$ is the
+  (spatially variable) diffusion coefficient.
+<li> Absorption $\Sigma_{r,g}(x)\phi_g(x,t)$ (note the
+  negative sign). The coefficient $\Sigma_{r,g}$ is called the <i>removal
+  cross section</i>.
+<li> Nuclear fission $\chi_g\sum_{g'=1}^G\nu\Sigma_{f,g'}(x)\phi_{g'}(x,t)$.
+  The production of neutrons of energy $g$ is
+  proportional to the flux of neutrons of energy $g'$ times the
+  probability $\Sigma_{f,g'}$ that neutrons of energy $g'$ cause a fission
+  event times the number $\nu$ of neutrons produced in each fission event
+  times the probability that a neutron produced in this event has energy
+  $g$. $\nu\Sigma_{f,g'}$ is called the <i>fission cross section</i> and
+  $\chi_g$ the <i>fission spectrum</i>. We will denote the term
+  $\chi_g\nu\Sigma_{f,g'}$ as the <i>fission distribution cross
+    section</i> in the program.
+<li> Scattering $\sum_{g'\ne g}\Sigma_{s,g'\to g}(x)\phi_{g'}(x,t)$
+  of neutrons of energy $g'$ producing neutrons
+  of energy $g$. $\Sigma_{s,g'\to g}$ is called the <i>scattering cross
+    section</i>. The case of elastic, in-group scattering $g'=g$ exists, too, but
+  we subsume this into the removal cross section. The case $g'<g$ is called
+  down-scattering, since a neutron loses energy in such an event. On the
+  other hand, $g'>g$ corresponds to up-scattering: a neutron gains energy in
+  a scattering event from the thermal motion of the atoms surrounding it;
+  up-scattering is therefore only an important process for neutrons with
+  kinetic energies that are already on the same order as the thermal kinetic
+  energy (i.e. in the sub $eV$ range).
+<li> An extraneous source $s_{\mathrm{ext},g}$.
+</ul>
+
+For realistic simulations in reactor analysis, one may want to split the
+continuous spectrum of neutron energies into many energy groups, often up to
+100. However, if neutron energy spectra are known well enough for some type of
+reactor (for example Pressurized Water Reactors, PWR), it is possible to obtain
+satisfactory results with only 2 energy groups.
+
+In the program shown in this tutorial program, we provide the structure to
+compute with as many energy groups as desired. However, to keep computing
+times moderate and in order to avoid tabulating hundreds of coefficients, we
+only provide the coefficients for above equations for a two-group simulation,
+i.e. $g=1,2$. We do, however, consider a realistic situation by assuming that
+the coefficients are not constant, but rather depend on the materials that are
+assembled into reactor fuel assemblies in rather complicated ways (see
+below).
+
+
+<h3>The eigenvalue problem</h3>
+
+If we consider all energy groups at once, we may write above equations in the
+following operator form:
+ at f{eqnarray*}
+\frac 1v \frac{\partial \phi}{\partial t}
+=
+-L\phi
++
+F\phi
++
+X\phi
++
+s_{\mathrm{ext}},
+ at f}
+where $L,F,X$ are sinking, fission, and scattering operators,
+respectively. $L$ here includes both the diffusion and removal terms. Note
+that $L$ is symmetric, whereas $F$ and $X$ are not.
+
+It is well known that this equation admits a stable solution if all
+eigenvalues of the operator $-L+F+X$ are negative. This can be readily seen by
+multiplying the equation by $\phi$ and integrating over the domain, leading to
+ at f{eqnarray*}
+  \frac 1{2v} \frac{\partial}{\partial t}  \|\phi\|^2 = ((-L+F+X)\phi,\phi).
+ at f}
+Stability means that the solution does not grow, i.e. we want the left hand
+side to be less than zero, which is the case if the eigenvalues of the
+operator on the right are all negative. For obvious reasons, it is
+not very desirable if a nuclear reactor produces neutron fluxes that grow
+exponentially, so eigenvalue analyses are the bread-and-butter of nuclear
+engineers. The main point of the program is therefore to consider the
+eigenvalue problem
+ at f{eqnarray*}
+  (L-F-X) \phi = \lambda \phi,
+ at f}
+where we want to make sure that all eigenvalues are positive. Note that $L$,
+being the diffusion operator plus the absorption (removal), is positive
+definite; the condition that all eigenvalues are positive therefore means that
+we want to make sure that fission and inter-group scattering are weak enough
+to not shift the spectrum into the negative.
+
+In nuclear engineering, one typically looks at a slightly different
+formulation of the eigenvalue problem. To this end, we do not just multiply
+with $\phi$ and integrate, but rather multiply with $\phi(L-X)^{-1}$. We then
+get the following evolution equation:
+ at f{eqnarray*}
+  \frac 1{2v} \frac{\partial}{\partial t}  \|\phi\|^2_{(L-X)^{-1}} = ((L-X)^{-1}(-L+F+X)\phi,\phi).
+ at f}
+Stability is then guaranteed if the eigenvalues of the following problem are
+all negative:
+ at f{eqnarray*}
+  (L-X)^{-1}(-L+F+X)\phi = \lambda_F \phi,
+ at f}
+which is equivalent to the eigenvalue problem
+ at f{eqnarray*}
+  (L-X)\phi = \frac 1{\lambda_F+1} F \phi.
+ at f}
+The typical formulation in nuclear engineering is to write this as
+ at f{eqnarray*}
+  (L-X) \phi = \frac 1{k_{\mathrm{eff}}} F \phi,
+ at f}
+where $k_{\mathrm{eff}}=\frac 1{\lambda^F+1}$.
+Intuitively, $k_{\mathrm{eff}}$ is something like the multiplication
+factor for neutrons per typical time scale and should be less than or equal to
+one for stable operation of a reactor: if it is less than one, the chain reaction will
+die down, whereas nuclear bombs for example have a $k$-eigenvalue larger than
+one. A stable reactor should have $k_{\mathrm{eff}}=1$.
+
+[For those who wonder how this can be achieved in practice without
+inadvertently getting slightly larger than one and triggering a nuclear bomb:
+first, fission processes happen on different time scales. While most neutrons
+are released very quickly after a fission event, a small number of neutrons
+are only released by daughter nuclei after several further decays, up to 10-60
+seconds after the fission was initiated. If one is therefore slightly beyond
+$k_{\mathrm{eff}}=1$, one therefore has many seconds to react until all the
+neutrons created in fission re-enter the fission cycle. Nevertheless, control
+rods in nuclear reactors absorbing neutrons -- and therefore reducing
+$k_{\mathrm{eff}}$ -- are designed in such a way that they are all the way in
+the reactor in at most 2 seconds.
+
+One therefore has on the order of 10-60 seconds to regulate the nuclear reaction
+if $k_{\mathrm{eff}}$ should be larger than one for some time, as indicated by
+a growing neutron flux. Regulation can be achieved by continuously monitoring
+the neutron flux, and if necessary increase or reduce neutron flux by moving
+neutron-absorbing control rods a few millimeters into or out of the
+reactor. On a longer scale, the water cooling the reactor contains boron, a
+good neutron absorber. Every few hours, boron concentrations are adjusted by
+adding boron or diluting the coolant.
+
+Finally, some of the absorption and scattering reactions have some
+stability built in; for example, higher neutron fluxes result in locally
+higher temperatures, which lowers the density of water and therefore reduces
+the number of scatterers that are necessary to moderate neutrons from high to
+low energies before they can start fission events themselves.]
+
+In this tutorial program, we solve above $k$-eigenvalue problem for two energy
+groups, and we are looking for the largest multiplication factor
+$k_{\mathrm{eff}}$, which is proportional to the inverse of the minimum
+eigenvalue plus one. To solve the eigenvalue problem, we generally
+use a modified version of the <i>inverse power method</i>. The algorithm looks
+like this:
+
+<ol>
+<li> Initialize $\phi_g$ and $k_{\mathrm{eff}}$ with $\phi_g^{(0)}$
+  and $k_{\mathrm{eff}}^{(0)}$ and let $n=1$.
+
+<li> Define the so-called <i>fission source</i> by
+  @f{eqnarray*}
+    s_f^{(n-1)}(x)
+    =
+    \frac{1}{k_{\mathrm{eff}}^{(n-1)}}
+    \sum_{g'=1}^G\nu\Sigma_{f,g'}(x)\phi_{g'}^{(n-1)}(x).
+  @f}
+
+<li> Solve for all group fluxes $\phi_g,g=1,\ldots,G$ using
+  @f{eqnarray*}
+    -\nabla \cdot D_g\nabla \phi_g^{(n)}
+    +
+    \Sigma_{r,g}\phi_g^{(n)}
+    =
+    \chi_g s_f^{(n-1)}
+    +
+    \sum_{g'< g} \Sigma_{s,g'\to g} \phi_{g'}^{(n)}
+    +
+    \sum_{g'> g}\Sigma_{s,g'\to g}\phi_{g'}^{(n-1)}.
+  @f}
+
+<li> Update
+  @f{eqnarray*}
+    k_{\mathrm{eff}}^{(n)}
+    =
+    \sum_{g'=1}^G
+    \int_{\Omega}\nu\Sigma_{f,g'}(x)
+    \phi_{g'}^{(n)}(x)dx.
+  @f}
+
+<li> Compare $k_{\mathrm{eff}}^{(n)}$ with $k_{\mathrm{eff}}^{(n-1)}$.
+  If the change greater than a prescribed tolerance then set $n=n+1$ repeat
+  the iteration starting at step 2, otherwise end the iteration.
+</ol>
+
+Note that in this scheme, we do not solve group fluxes exactly in each power
+iteration, but rather consider previously compute $\phi_{g'}^{(n)}$ only for
+down-scattering events $g'<g$. Up-scattering is only treated by using old
+iterators $\phi_{g'}^{(n-1)}$, in essence assuming that the scattering
+operator is triangular. This is physically motivated since up-scattering does
+not play a too important role in neutron scattering. In addition, practices
+shows that the inverse power iteration is stable even using this
+simplification.
+
+Note also that one can use lots of extrapolation techniques to accelerate the
+power iteration laid out above. However, none of these are implemented in this
+example.
+
+
+<h3>Meshes and mesh refinement</h3>
+
+One may wonder whether it is appropriate to solve for the solutions of the
+individual energy group equations on the same meshes. The question boils down
+to this: will $\phi_g$ and $\phi_{g'}$ have similar smoothness properties? If
+this is the case, then it is appropriate to use the same mesh for the two; a
+typical application could be chemical combustion, where typically the
+concentrations of all or most chemical species change rapidly within the flame
+front. As it turns out, and as will be apparent by looking at the
+graphs shown in the results section of this tutorial program, this isn't the
+case here, however: since the diffusion coefficient is different for different
+energy groups, fast neutrons (in bins with a small group number $g$) have a very
+smooth flux function, whereas slow neutrons (in bins with a large group
+number) are much more affected by the local material properties and have a
+correspondingly rough solution if the coefficient are rough as in the case we
+compute here. Consequently, we will want to use different meshes to compute
+each energy group.
+
+This has two implications that we will have to consider: First, we need to
+find a way to refine the meshes individually. Second, assembling the source
+terms for the inverse power iteration, where we have to integrate solution
+$\phi_{g'}^{(n)}$ defined on mesh $g'$ against the shape functions defined on
+mesh $g$, becomes a much more complicated task.
+
+
+<h4>Mesh refinement</h4>
+
+We use the usual paradigm: solve on a given mesh, then evaluate an error
+indicator for each cell of each mesh we have. Because it is so convenient, we
+again use the a posteriori error estimator by Kelly, Gago, Zienkiewicz
+and Babuska which approximates the error per cell by integrating the jump of
+the gradient of the solution along the faces of each cell. Using this, we
+obtain indicators
+ at f{eqnarray*}
+\eta_{g,K}, \qquad g=1,2,\ldots,G,\qquad K\in{\cal T}_g,
+ at f}
+where ${\cal T}_g$ is the triangulation used in the solution of
+$\phi_g$. The question is what to do with this. For one, it is clear that
+refining only those cells with the highest error indicators might lead to bad
+results. To understand this, it is important to realize that $\eta_{g,K}$
+scales with the second derivative of $\phi_g$. In other words, if we have two
+energy groups $g=1,2$ whose solutions are equally smooth but where one is
+larger by a factor of 10,000, for example, then only the cells of that mesh
+will be refined, whereas the mesh for the solution of small magnitude will
+remain coarse. This is probably not what one wants, since we can consider both
+components of the solution equally important.
+
+In essence, we would therefore have to scale $\eta_{g,K}$ by an importance
+factor $z_g$ that says how important it is to resolve $\phi_g$ to any given
+accuracy. Such important factors can be computed using duality techniques
+(see, for example, the step-14 tutorial program, and the
+reference to the book by Bangerth and Rannacher cited there). We won't go
+there, however, and simply assume that all energy groups are equally
+important, and will therefore normalize the error indicators $\eta_{g,K}$ for
+group $g$ by the maximum of the solution $\phi_g$. We then refine the cells
+whose errors satisfy
+ at f{eqnarray*}
+  \frac{\eta_{g,K}}{\|\phi_g\|_\infty}
+  >
+  \alpha_1
+  \displaystyle{\max_{\begin{matrix}1\le g\le G \\ K\in {\cal T}_g\end{matrix}}
+    \frac{\eta_{g,K}}{\|\phi_g\|_\infty}}
+ at f}
+and coarsen the cells where
+ at f{eqnarray*}
+  \frac{\eta_{g,K}}{\|\phi_g\|_\infty}
+  <
+  \alpha_2
+  \displaystyle{\max_{\begin{matrix}1\le g\le G \\ K\in {\cal T}_g\end{matrix}}
+    \frac{\eta_{g,K}}{\|\phi_g\|_\infty}}.
+ at f}
+We chose $\alpha_1=0.3$ and $\alpha_2=0.01$ in the code. Note that this will,
+of course, lead to different meshes for the different energy groups.
+
+The strategy above essentially means the following: If for energy group $g$
+there are many cells $K\in {\cal T}_g$ on which the error is large, for
+example because the solution is globally very rough, then many cells will be
+above the threshold. On the other hand, if there are a few cells with large
+and many with small errors, for example because the solution is overall rather
+smooth except at a few places, then only the few cells with large errors will
+be refined. Consequently, the strategy allows for meshes that track the global
+smoothness properties of the corresponding solutions rather well.
+
+
+<h4>Assembling terms on different meshes</h4>
+
+As pointed out above, the multigroup refinement strategy results in
+different meshes for the different solutions $\phi_g$. So what's the problem?
+In essence it goes like this: in step 3 of the eigenvalue iteration, we have
+form the weak form for the equation to compute $\phi_g^{(n)}$ as usual by
+multiplication with test functions $\varphi_g^i$ defined on the mesh for
+energy group $g$; in the process, we have to
+compute the right hand side vector that contains terms of the following form:
+ at f{eqnarray*}
+  F_i = \int_\Omega f(x) \varphi_g^i(x) \phi_{g'}(x) \ dx,
+ at f}
+where $f(x)$ is one of the coefficient functions $\Sigma_{s,g'\to g}$ or
+$\nu\chi_g\Sigma_{f,g'}$ used in the right hand side
+of eigenvalue equation. The difficulty now is that $\phi_{g'}$ is defined on
+the mesh for energy group $g'$, i.e. it can be expanded as
+$\phi_{g'}(x)=\sum_j\phi_{g'}^j \varphi_{g'}^j(x)$, with basis functions
+$\varphi_{g'}^j(x)$ defined on mesh $g'$. The contribution to the right hand
+side can therefore be written as
+ at f{eqnarray*}
+  F_i = \sum_j \left\{\int_\Omega f(x) \varphi_g^i(x) \varphi_{g'}^j(x)
+  \ dx \right\} \phi_{g'}^j ,
+ at f}
+On the other hand, the test functions $\varphi_g^i(x)$ are defined on mesh
+$g$. This means that we can't just split the integral $\Omega$ into integrals
+over the cells of either mesh $g$ or $g'$, since the respectively other basis
+functions may not be defined on these cells.
+
+The solution to this problem lies in the fact that both the meshes for $g$ and
+$g'$ are derived by adaptive refinement from a common coarse mesh. We can
+therefore always find a set of cells, which we denote by ${\cal T}_g \cap
+{\cal T}_{g'}$, that satisfy the following conditions:
+<ul>
+<li> the union of the cells covers the entire domain, and
+<li> a cell $K \in {\cal T}_g \cap {\cal T}_{g'}$ is active on at least
+  one of the two meshes.
+</ul>
+A way to construct this set is to take each cell of coarse mesh and do the
+following steps: (i) if the cell is active on either ${\cal T}_g$ or
+${\cal T}_{g'}$, then add this cell to the set; (ii) otherwise, i.e. if
+this cell has children on both meshes, then do step (i) for each of the
+children of this cell. In fact, deal.II has a function
+GridTools::get_finest_common_cells that computes exactly this set
+of cells that are active on at least one of two meshes.
+
+With this, we can write above integral as follows:
+ at f{eqnarray*}
+  F_i
+  =
+  \sum_{K \in {\cal T}_g \cap {\cal T}_{g'}}
+  \sum_j \left\{\int_K f(x) \varphi_g^i(x) \varphi_{g'}^j(x)
+  \ dx \right\} \phi_{g'}^j.
+ at f}
+ In the code, we
+compute the right hand side in the function
+<code>NeutronDiffusionProblem::assemble_rhs</code>, where (among other things) we
+loop over the set of common most refined cells, calling the function
+<code>NeutronDiffusionProblem::assemble_common_cell</code> on each pair of
+these cells.
+
+By construction, there are now three cases to be considered:
+<ol>
+<li> The cell $K$ is active on both meshes, i.e. both the basis
+  functions $\varphi_g^i$ as well as $\varphi_{g'}^j$ are defined on $K$.
+<li> The cell $K$ is active on mesh $g$, but not $g'$, i.e. the
+  $\varphi_g^i$  are defined on $K$, whereas the $\varphi_{g'}^j$ are defined
+  on children of $K$.
+<li> The cell $K$ is active on mesh $g'$, but not $g$, with opposite
+  conclusions than in (ii).
+</ol>
+
+To compute the right hand side above, we then need to have different code for
+these three cases, as follows:
+<ol>
+<li> If the cell $K$ is active on both meshes, then we can directly
+  evaluate the integral. In fact, we don't even have to bother with the basis
+  functions $\varphi_{g'}$, since all we need is the values of $\phi_{g'}$ at
+  the quadrature points. We can do this using the
+  FEValues::get_function_values function. This is done directly in
+  the <code>NeutronDiffusionProblem::assemble_common_cell</code> function.
+
+<li> If the cell $K$ is active on mesh $g$, but not $g'$, then the
+  basis functions $\varphi_{g'}^j$ are only defined either on the children
+  $K_c,0\le c<2^{\texttt{dim}}$, or on children of these children if cell $K$
+  is refined more than once on mesh $g'$.
+
+  Let us assume for a second that $K$ is only once more refined on mesh $g'$
+  than on mesh $g$. Using the fact that we use embedded finite element spaces
+  where each basis function on one mesh can be written as a linear combination
+  of basis functions on the next refined mesh, we can expand the restriction
+  of $\phi_g^i$ to child cell $K_c$ into the basis functions defined on that
+  child cell (i.e. on cells on which the basis functions $\varphi_{g'}^l$ are
+  defined):
+  @f{eqnarray*}
+    \phi_g^i|_{K_c} = B_c^{il} \varphi_{g'}^l|_{K_c}.
+  @f}
+  Here, and in the following, summation over indices appearing twice is
+  implied. The matrix $B_c$ is the matrix that interpolated data from a cell
+  to its $c$-th child.
+
+  Then we can write the contribution of cell $K$ to the right hand side
+  component $F_i$ as
+  @f{eqnarray*}
+    F_i|_K
+    &=&
+    \left\{ \int_K f(x) \varphi_g^i(x) \varphi_{g'}^j(x)
+    \ dx \right\} \phi_{g'}^j
+    \\
+    &=&
+    \left\{
+    \sum_{0\le c<2^{\texttt{dim}}}
+    B_c^{il} \int_{K_c} f(x) \varphi_{g'}^l(x) \varphi_{g'}^j(x)
+    \ dx \right\} \phi_{g'}^j.
+  @f}
+  In matrix notation, this can be written as
+  @f{eqnarray*}
+    F_i|_K
+    =
+    \sum_{0\le c<2^{\texttt{dim}}}
+    F_i|_{K_c},
+    \qquad
+    \qquad
+    F_i|_{K_c} = B_c^{il} M_{K_c}^{lj}  \phi_{g'}^j
+    = (B_c M_{K_c})^{ij} \phi_{g'}^j,
+  @f}
+  where $M_{K_c}^{lj}=\int_{K_c} f(x) \varphi_{g'}^l(x) \varphi_{g'}^j(x)$ is
+  the weighted mass matrix on child $c$ of cell $K$.
+
+  The next question is what happens if a child $K_c$ of $K$ is not
+  active. Then, we have to apply the process recursively, i.e. we have to
+  interpolate the basis functions $\varphi_g^i$ onto child $K_c$ of $K$, then
+  onto child $K_{cc'}$ of that cell, onto child $K_{cc'c''}$ of that one, etc,
+  until we find an active cell. We then have to sum up all the contributions
+  from all the children, grandchildren, etc, of cell $K$, with contributions
+  of the form
+  @f{eqnarray*}
+    F_i|_{K_{cc'}} = (B_cB_{c'} M_{K_{cc'}})^{ij}  \phi_{g'}^j,
+  @f}
+  or
+  @f{eqnarray*}
+    F_i|_{K_{cc'c''}} = (B_c B_{c'} B_{c''}M_{K_{cc'c''}})^{ij}
+    \phi_{g'}^j,
+  @f}
+  etc. We do this process recursively, i.e. if we sit on cell $K$ and see that
+  it has children on grid $g'$, then we call a function
+  <code>assemble_case_2</code> with an identity matrix; the function will
+  multiply it's argument from the left with the prolongation matrix; if the
+  cell has further children, it will call itself with this new matrix,
+  otherwise it will perform the integration.
+
+<li> The last case is where $K$ is active on mesh $g'$ but not mesh
+  $g$. In that case, we have to express basis function $\varphi_{g'}^j$ in
+  terms of the basis functions defined on the children of cell $K$, rather
+  than $\varphi_g^i$ as before. This of course works in exactly the same
+  way. If the children of $K$ are active on mesh $g$, then
+  leading to the expression
+  @f{eqnarray*}
+    F_i|_K
+    &=&
+    \left\{ \int_K f(x) \varphi_g^i(x) \varphi_{g'}^j(x)
+    \ dx \right\} \phi_{g'}^j
+    \\
+    &=&
+    \left\{
+    \sum_{0\le c<2^{\texttt{dim}}}
+    \int_{K_c} f(x) \varphi_g^i(x) B_c^{jl} \varphi_{g}^l(x)
+    \ dx \right\} \phi_{g'}^j.
+  @f}
+  In matrix notation, this expression now reads as
+  @f{eqnarray*}
+    F_i|_K
+    =
+    \sum_{0\le c<2^{\texttt{dim}}}
+    F_i|_{K_c},
+    \qquad
+    \qquad
+    F_i|_{K_c} = M_{K_c}^{il} B_c^{jl}  \phi_{g'}^j
+    =
+    (M_{K_c} B_c^T)^{ij} \phi_{g'}^j,
+  @f}
+  and correspondingly for cases where cell $K$ is refined more than once on
+  mesh $g$:
+  @f{eqnarray*}
+    F_i|_{K_{cc'}} = (M_{K_{cc'}} B_{c'}^T B_c^T)^{ij}  \phi_{g'}^j,
+  @f}
+  or
+  @f{eqnarray*}
+    F_i|_{K_{cc'c''}} = (M_{K_{cc'c''}} B_{c''}^T B_{c'}^T B_c^T)^{ij}
+    \phi_{g'}^j,
+  @f}
+  etc. In other words, the process works in exactly the same way as before,
+  except that we have to take the transpose of the prolongation matrices and
+  need to multiply it to the mass matrix from the other side.
+</ol>
+
+
+The expressions for cases (ii) and (iii) can be understood as repeatedly
+interpolating either the left or right basis functions in the scalar product
+$(f \varphi_g^i, \varphi_{g'}^j)_K$ onto child cells, and then finally
+forming the inner product (the mass matrix) on the final cell. To make the
+symmetry in these cases more obvious, we can write them like this: for case
+(ii), we have
+ at f{eqnarray*}
+  F_i|_{K_{cc'\cdots c^{(k)}}}
+  = [B_c B_{c'} \cdots B_{c^{(k)}} M_{K_{cc'\cdots c^{(k)}}}]^{ij}
+    \phi_{g'}^j,
+ at f}
+whereas for case (iii) we get
+ at f{eqnarray*}
+  F_i|_{K_{cc'\cdots c^{(k)}}}
+  = [(B_c B_{c'} \cdots B_{c^{(k)}} M_{K_{cc'\cdots c^{(k)}}})^T]^{ij}
+    \phi_{g'}^j,
+ at f}
+
+
+
+<h3>Description of the test case</h3>
+
+A nuclear reactor core is composed of different types of assemblies. An
+assembly is essentially the smallest unit that can be moved in and out of a
+reactor, and is usually rectangular or square. However, assemblies are not
+fixed units, as they are assembled from a complex lattice of different fuel
+rods, control rods, and instrumentation elements that are held in place
+relative to each other by spacers that are permanently attached to the rods.
+To make things more complicated, there are different kinds of assemblies that
+are used at the same time in a reactor, where assemblies differ in the type
+and arrangement of rods they are made up of.
+
+Obviously, the arrangement of assemblies as well as the arrangement of rods
+inside them affect the distribution of neutron fluxes in the reactor (a fact
+that will be obvious by looking at the solution shown below in the results
+sections of this program). Fuel rods, for example, differ from each other in
+the enrichment of U-235 or Pu-239. Control rods, on the other hand, have zero
+fission, but nonzero scattering and absorption cross sections.
+
+This whole arrangement would make the description or spatially dependent
+material parameters very complicated. It will not become much simpler, but we
+will make one approximation: we merge the volume inhabited by each cylindrical
+rod and the surrounding water into volumes of quadratic cross section into
+so-called ``pin cells'' for which homogenized material data are obtained with
+nuclear database and knowledge of neutron spectrum. The homogenization makes
+all material data piecewise constant on the solution domain for a reactor with
+fresh fuel. Spatially dependent material parameters are then looked up for the
+quadratic assembly in which a point is located, and then for the quadratic pin
+cell within this assembly.
+
+In this tutorial program, we simulate a quarter of a reactor consisting of $4
+\times 4$ assemblies. We use symmetry (Neumann) boundary conditions to reduce
+the problem to one quarter of the domain, and consequently only simulate a
+$2\times 2$ set of assemblies. Two of them will be UO${}_2$ fuel, the other
+two of them MOX fuel. Each of these assemblies consists of $17\times 17$ rods
+of different compositions. In total, we therefore create a $34\times 34$
+lattice of rods. To make things simpler later on, we reflect this fact by
+creating a coarse mesh of $34\times 34$ cells (even though the domain is a
+square, for which we would usually use a single cell). In deal.II, each cell
+has a <code>material_id</code> which one may use to associated each cell with a
+particular number identifying the material from which this cell's volume is
+made of; we will use this material ID to identify which of the 8 different
+kinds of rods that are used in this testcase make up a particular cell. Note
+that upon mesh refinement, the children of a cell inherit the material ID,
+making it simple to track the material even after mesh refinement.
+
+The arrangement of the rods will be clearly visible in the images shown in
+the results section. The cross sections for materials and for both energy
+groups are taken from a OECD/NEA benchmark problem. The detailed configuration
+and material data is given in the code.
+
+
+<h3>What the program does (and how it does that)</h3>
+
+As a coarse overview of what exactly the program does, here is the basic
+layout: starting on a coarse mesh that is the same for each energy group, we
+compute inverse eigenvalue iterations to compute the $k$-eigenvalue on a given
+set of meshes. We stop these iterations when the change in the eigenvalue
+drops below a certain tolerance, and then write out the meshes and solutions
+for each energy group for inspection by a graphics program. Because the meshes
+for the solutions are different, we have to generate a separate output file
+for each energy group, rather than being able to add all energy group
+solutions into the same file.
+
+After this, we evaluate the error indicators as explained in one of the sections
+above for each of the meshes, and refine and coarsen the cells of each mesh
+independently. Since the eigenvalue iterations are fairly expensive, we don't
+want to start all over on the new mesh; rather, we use the SolutionTransfer
+class to interpolate the solution on the previous mesh to the next one upon
+mesh refinement. A simple experiment will convince you that this is a lot
+cheaper than if we omitted this step. After doing so, we resume our eigenvalue
+iterations on the next set of meshes.
+
+The program is controlled by a parameter file, using the ParameterHandler
+class already mentioned in the step-19 example program. We will show a
+parameter file in the results section of this section. For the moment suffice
+it to say that it controls the polynomial degree of the finite elements used,
+the number of energy groups (even though all that is presently implemented are
+the coefficients for a 2-group problem), the tolerance where to stop the
+inverse eigenvalue iteration, and the number of refinement cycles we will do.
diff --git a/examples/step-28/doc/kind b/examples/step-28/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-28/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-28/doc/results.dox b/examples/step-28/doc/results.dox
new file mode 100644
index 0000000..edbacad
--- /dev/null
+++ b/examples/step-28/doc/results.dox
@@ -0,0 +1,80 @@
+<h1>Results</h1>
+
+
+The output of this program consist of the console output, a file 
+named ``convergence_table'' to record main results of mesh iteration, the eps
+files including the grids, and the solutions given in gnuplot format.
+
+When we set Polynomial_Order to 2, we got following console output:
+ at code
+Cycle 0:
+   Numbers of active cells:       1156 1156 
+   Numbers of degrees of freedom: 4761 4761 
+Iter number:1 k_eff=319.375676634307 flux ratio=6.836246075631 max_thermal=1.433899030144
+Iter number:2 k_eff=0.834072546055 flux ratio=5.204601882141 max_thermal=0.004630925876
+Iter number:3 k_eff=0.862826188043 flux ratio=4.645051765984 max_thermal=0.005380396338
+Iter number:4 k_eff=0.877887920967 flux ratio=4.318030683875 max_thermal=0.006005512201
+Iter number:5 k_eff=0.887161559547 flux ratio=4.256596788174
+max_thermal=0.006639443035
+...
+Iter number:69 k_eff=0.906841960370 flux ratio=4.384056022578 max_thermal=0.008466414246
+Iter number:70 k_eff=0.906841960371 flux ratio=4.384056022582 max_thermal=0.008466414246
+Cycle 1:
+   Numbers of active cells:       1156 2380 
+   Numbers of degrees of freedom: 4761 10667 
+Iter number:1 k_eff=0.906838267472 flux ratio=4.385474405124 max_thermal=0.008463675976
+Iter number:2 k_eff=0.906837892433 flux ratio=4.385486158840 max_thermal=0.008463675386
+...
+Cycle 11:
+   Numbers of active cells:       11749 47074 
+   Numbers of degrees of freedom: 50261 204523 
+Iter number:1 k_eff=0.906805395149 flux ratio=4.384872231023 max_thermal=0.008464861813
+...Iter number:32 k_eff=0.906834736551 flux ratio=4.384846081796 max_thermal=0.008465019607
+Iter number:33 k_eff=0.906834736552 flux ratio=4.384846081800 max_thermal=0.008465019607
+ at endcode
+
+We see that power iteration does converge faster after cycle 0 due to the initialization 
+with solution from last mesh iteration. 
+The contents of ``convergence_table'' are,
+ at code
+0 4761 4761 0.906841960371 4.384056022582
+1 4761 10667 0.906837901031 4.385489087760
+2 4761 18805 0.906836075928 4.385466647499
+3 6629 27301 0.906835500111 4.385404580865
+4 12263 48095 0.906835001796 4.385381798734
+5 17501 69297 0.906834858174 4.384853823414
+6 19933 78605 0.906834824060 4.384850658788
+7 23979 93275 0.906834787556 4.384848379257
+8 30285 117017 0.906834761604 4.384846544947
+9 40087 154355 0.906834746216 4.384846083191
+10 45467 179469 0.906834740155 4.384846005044
+11 50261 204523 0.906834736552 4.384846081800
+ at endcode
+The meanings of columns are: number of mesh iteration, numbers of degrees of
+ freedom of fast energy group, numbers of DoFs of thermal group, converged 
+k-effective and the ratio between maximum of fast flux and maximum of thermal one.
+
+The grids of fast and thermal energy groups at mesh iteration #9 are shown 
+in following figure.
+
+<img src="http://www.dealii.org/images/steps/developer/step-28.grid-0.9.order2.png" alt="">
+
+<img src="http://www.dealii.org/images/steps/developer/step-28.grid-1.9.order2.png" alt="">
+
+We see that the grid of thermal group is much finer than the one of fast group. 
+The solutions on these grids are, (Note: flux are normalized with total fission
+source equal to 1)
+
+<img src="http://www.dealii.org/images/steps/developer/step-28.solution-0.9.order2.png" alt="">
+
+<img src="http://www.dealii.org/images/steps/developer/step-28.solution-1.9.order2.png" alt="">
+
+Then we plot the convergence data with polynomial order being equal to 1,2 and 3.
+
+<img src="http://www.dealii.org/images/steps/developer/step-28.convergence.png" alt="">
+
+The estimated ``exact'' k-effective = 0.906834721253 which is simply from last
+mesh iteration of polynomial order 3 minus 2e-10. We see that h-adaptive calculations
+deliver an algebraic convergence. And the higher polynomial order is, the faster mesh 
+iteration converges. In our problem, we need smaller number of DoFs to achieve same
+accuracy with higher polynomial order.
diff --git a/examples/step-28/doc/tooltip b/examples/step-28/doc/tooltip
new file mode 100644
index 0000000..699fce5
--- /dev/null
+++ b/examples/step-28/doc/tooltip
@@ -0,0 +1 @@
+Handling multiple meshes at the same time. Neutron transport.
diff --git a/examples/step-28/step-28.cc b/examples/step-28/step-28.cc
new file mode 100644
index 0000000..b73f907
--- /dev/null
+++ b/examples/step-28/step-28.cc
@@ -0,0 +1,1807 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2009 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Yaqi Wang, Texas A&M University, 2009, 2010
+ */
+
+
+// @sect3{Include files}
+
+// We start with a bunch of include files that have already been explained in
+// previous tutorial programs:
+#include <deal.II/base/timer.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+#include <fstream>
+#include <iostream>
+
+#include <deal.II/base/utilities.h>
+
+// We use the next include file to access block vectors which provide us a
+// convenient way to manage solution and right hand side vectors of all energy
+// groups:
+#include <deal.II/lac/block_vector.h>
+
+// This include file is for transferring solutions from one mesh to another
+// different mesh. We use it when we are initializing solutions after each
+// mesh iteration:
+#include <deal.II/numerics/solution_transfer.h>
+
+// When integrating functions defined on one mesh against shape functions
+// defined on a different mesh, we need a function @p get_finest_common_cells
+// (as discussed in the introduction) which is defined in the following header
+// file:
+#include <deal.II/grid/grid_tools.h>
+
+// Here are two more C++ standard headers that we use to define list data
+// types as well as to fine-tune the output we generate:
+#include <list>
+#include <iomanip>
+
+// The last step is as in all previous programs:
+namespace Step28
+{
+  using namespace dealii;
+
+
+  // @sect3{Material data}
+
+  // First up, we need to define a class that provides material data
+  // (including diffusion coefficients, removal cross sections, scattering
+  // cross sections, fission cross sections and fission spectra) to the main
+  // class.
+  //
+  // The parameter to the constructor determines for how many energy groups we
+  // set up the relevant tables. At present, this program only includes data
+  // for 2 energy groups, but a more sophisticated program may be able to
+  // initialize the data structures for more groups as well, depending on how
+  // many energy groups are selected in the parameter file.
+  //
+  // For each of the different coefficient types, there is one function that
+  // returns the value of this coefficient for a particular energy group (or
+  // combination of energy groups, as for the distribution cross section
+  // $\chi_g\nu\Sigma_{f,g'}$ or scattering cross section $\Sigma_{s,g'\to
+  // g}$). In addition to the energy group or groups, these coefficients
+  // depend on the type of fuel or control rod, as explained in the
+  // introduction. The functions therefore take an additional parameter, @p
+  // material_id, that identifies the particular kind of rod. Within this
+  // program, we use <code>n_materials=8</code> different kinds of rods.
+  //
+  // Except for the scattering cross section, each of the coefficients
+  // therefore can be represented as an entry in a two-dimensional array of
+  // floating point values indexed by the energy group number as well as the
+  // material ID. The Table class template is the ideal way to store such
+  // data. Finally, the scattering coefficient depends on both two energy
+  // group indices and therefore needs to be stored in a three-dimensional
+  // array, for which we again use the Table class, where this time the first
+  // template argument (denoting the dimensionality of the array) of course
+  // needs to be three:
+  class MaterialData
+  {
+  public:
+    MaterialData (const unsigned int n_groups);
+
+    double get_diffusion_coefficient (const unsigned int group,
+                                      const unsigned int material_id) const;
+    double get_removal_XS (const unsigned int group,
+                           const unsigned int material_id) const;
+    double get_fission_XS (const unsigned int group,
+                           const unsigned int material_id) const;
+    double get_fission_dist_XS (const unsigned int group_1,
+                                const unsigned int group_2,
+                                const unsigned int material_id) const;
+    double get_scattering_XS (const unsigned int group_1,
+                              const unsigned int group_2,
+                              const unsigned int material_id) const;
+    double get_fission_spectrum (const unsigned int group,
+                                 const unsigned int material_id) const;
+
+  private:
+    const unsigned int n_groups;
+    const unsigned int n_materials;
+
+    Table<2,double> diffusion;
+    Table<2,double> sigma_r;
+    Table<2,double> nu_sigma_f;
+    Table<3,double> sigma_s;
+    Table<2,double> chi;
+  };
+
+  // The constructor of the class is used to initialize all the material data
+  // arrays. It takes the number of energy groups as an argument (an throws an
+  // error if that value is not equal to two, since at presently only data for
+  // two energy groups is implemented; however, using this, the function
+  // remains flexible and extendable into the future). In the member
+  // initialization part at the beginning, it also resizes the arrays to their
+  // correct sizes.
+  //
+  // At present, material data is stored for 8 different types of
+  // material. This, as well, may easily be extended in the future.
+  MaterialData::MaterialData (const unsigned int n_groups)
+    :
+    n_groups (n_groups),
+    n_materials (8),
+    diffusion (n_materials, n_groups),
+    sigma_r (n_materials, n_groups),
+    nu_sigma_f (n_materials, n_groups),
+    sigma_s (n_materials, n_groups, n_groups),
+    chi (n_materials, n_groups)
+  {
+    switch (n_groups)
+      {
+      case 2:
+      {
+        for (unsigned int m=0; m<n_materials; ++m)
+          {
+            diffusion[m][0] = 1.2;
+            diffusion[m][1] = 0.4;
+            chi[m][0]       = 1.0;
+            chi[m][1]       = 0.0;
+            sigma_r[m][0]   = 0.03;
+            for (unsigned int group_1=0; group_1<n_groups; ++group_1)
+              for (unsigned int group_2=0; group_2<n_groups; ++ group_2)
+                sigma_s[m][group_1][group_2]   = 0.0;
+          }
+
+
+        diffusion[5][1]  = 0.2;
+
+        sigma_r[4][0]    = 0.026;
+        sigma_r[5][0]    = 0.051;
+        sigma_r[6][0]    = 0.026;
+        sigma_r[7][0]    = 0.050;
+
+        sigma_r[0][1]    = 0.100;
+        sigma_r[1][1]    = 0.200;
+        sigma_r[2][1]    = 0.250;
+        sigma_r[3][1]    = 0.300;
+        sigma_r[4][1]    = 0.020;
+        sigma_r[5][1]    = 0.040;
+        sigma_r[6][1]    = 0.020;
+        sigma_r[7][1]    = 0.800;
+
+        nu_sigma_f[0][0] = 0.0050;
+        nu_sigma_f[1][0] = 0.0075;
+        nu_sigma_f[2][0] = 0.0075;
+        nu_sigma_f[3][0] = 0.0075;
+        nu_sigma_f[4][0] = 0.000;
+        nu_sigma_f[5][0] = 0.000;
+        nu_sigma_f[6][0] = 1e-7;
+        nu_sigma_f[7][0] = 0.00;
+
+        nu_sigma_f[0][1] = 0.125;
+        nu_sigma_f[1][1] = 0.300;
+        nu_sigma_f[2][1] = 0.375;
+        nu_sigma_f[3][1] = 0.450;
+        nu_sigma_f[4][1] = 0.000;
+        nu_sigma_f[5][1] = 0.000;
+        nu_sigma_f[6][1] = 3e-6;
+        nu_sigma_f[7][1] = 0.00;
+
+        sigma_s[0][0][1] = 0.020;
+        sigma_s[1][0][1] = 0.015;
+        sigma_s[2][0][1] = 0.015;
+        sigma_s[3][0][1] = 0.015;
+        sigma_s[4][0][1] = 0.025;
+        sigma_s[5][0][1] = 0.050;
+        sigma_s[6][0][1] = 0.025;
+        sigma_s[7][0][1] = 0.010;
+
+        break;
+      }
+
+
+      default:
+        Assert (false,
+                ExcMessage ("Presently, only data for 2 groups is implemented"));
+      }
+  }
+
+
+  // Next are the functions that return the coefficient values for given
+  // materials and energy groups. All they do is to make sure that the given
+  // arguments are within the allowed ranges, and then look the respective
+  // value up in the corresponding tables:
+  double
+  MaterialData::get_diffusion_coefficient (const unsigned int group,
+                                           const unsigned int material_id) const
+  {
+    Assert (group < n_groups,
+            ExcIndexRange (group, 0, n_groups));
+    Assert (material_id < n_materials,
+            ExcIndexRange (material_id, 0, n_materials));
+
+    return diffusion[material_id][group];
+  }
+
+
+
+  double
+  MaterialData::get_removal_XS (const unsigned int group,
+                                const unsigned int material_id) const
+  {
+    Assert (group < n_groups,
+            ExcIndexRange (group, 0, n_groups));
+    Assert (material_id < n_materials,
+            ExcIndexRange (material_id, 0, n_materials));
+
+    return sigma_r[material_id][group];
+  }
+
+
+  double
+  MaterialData::get_fission_XS (const unsigned int group,
+                                const unsigned int material_id) const
+  {
+    Assert (group < n_groups,
+            ExcIndexRange (group, 0, n_groups));
+    Assert (material_id < n_materials,
+            ExcIndexRange (material_id, 0, n_materials));
+
+    return nu_sigma_f[material_id][group];
+  }
+
+
+
+  double
+  MaterialData::get_scattering_XS (const unsigned int group_1,
+                                   const unsigned int group_2,
+                                   const unsigned int material_id) const
+  {
+    Assert (group_1 < n_groups,
+            ExcIndexRange (group_1, 0, n_groups));
+    Assert (group_2 < n_groups,
+            ExcIndexRange (group_2, 0, n_groups));
+    Assert (material_id < n_materials,
+            ExcIndexRange (material_id, 0, n_materials));
+
+    return sigma_s[material_id][group_1][group_2];
+  }
+
+
+
+  double
+  MaterialData::get_fission_spectrum (const unsigned int group,
+                                      const unsigned int material_id) const
+  {
+    Assert (group < n_groups,
+            ExcIndexRange (group, 0, n_groups));
+    Assert (material_id < n_materials,
+            ExcIndexRange (material_id, 0, n_materials));
+
+    return chi[material_id][group];
+  }
+
+
+  // The function computing the fission distribution cross section is slightly
+  // different, since it computes its value as the product of two other
+  // coefficients. We don't need to check arguments here, since this already
+  // happens when we call the two other functions involved, even though it
+  // would probably not hurt either:
+  double
+  MaterialData::get_fission_dist_XS (const unsigned int group_1,
+                                     const unsigned int group_2,
+                                     const unsigned int material_id) const
+  {
+    return (get_fission_spectrum(group_1, material_id) *
+            get_fission_XS(group_2, material_id));
+  }
+
+
+
+  // @sect3{The <code>EnergyGroup</code> class}
+
+  // The first interesting class is the one that contains everything that is
+  // specific to a single energy group. To group things that belong together
+  // into individual objects, we declare a structure that holds the
+  // Triangulation and DoFHandler objects for the mesh used for a single
+  // energy group, and a number of other objects and member functions that we
+  // will discuss in the following sections.
+  //
+  // The main reason for this class is as follows: for both the forward
+  // problem (with a specified right hand side) as well as for the eigenvalue
+  // problem, one typically solves a sequence of problems for a single energy
+  // group each, rather than the fully coupled problem. This becomes
+  // understandable once one realizes that the system matrix for a single
+  // energy group is symmetric and positive definite (it is simply a diffusion
+  // operator), whereas the matrix for the fully coupled problem is generally
+  // nonsymmetric and not definite. It is also very large and quite full if
+  // more than a few energy groups are involved.
+  //
+  // Let us first look at the equation to solve in the case of an external
+  // right hand side (for the time independent case): @f{eqnarray*} -\nabla
+  // \cdot(D_g(x) \nabla \phi_g(x)) + \Sigma_{r,g}(x)\phi_g(x) =
+  // \chi_g\sum_{g'=1}^G\nu\Sigma_{f,g'}(x)\phi_{g'}(x) + \sum_{g'\ne
+  // g}\Sigma_{s,g'\to g}(x)\phi_{g'}(x) + s_{\mathrm{ext},g}(x) @f}
+  //
+  // We would typically solve this equation by moving all the terms on the
+  // right hand side with $g'=g$ to the left hand side, and solving for
+  // $\phi_g$. Of course, we don't know $\phi_{g'}$ yet, since the equations
+  // for those variables include right hand side terms involving
+  // $\phi_g$. What one typically does in such situations is to iterate:
+  // compute @f{eqnarray*} -\nabla \cdot(D_g(x) \nabla \phi^{(n)}_g(x)) &+&
+  // \Sigma_{r,g}(x)\phi^{(n)}_g(x) \\ &=&
+  // \chi_g\sum_{g'=1}^{g-1}\nu\Sigma_{f,g'}(x)\phi^{(n)}_{g'}(x) +
+  // \chi_g\sum_{g'=g}^G\nu\Sigma_{f,g'}(x)\phi^{(n-1)}_{g'}(x) + \sum_{g'\ne
+  // g, g'<g}\Sigma_{s,g'\to g}(x)\phi^{(n)}_{g'}(x) + \sum_{g'\ne g,
+  // g'>g}\Sigma_{s,g'\to g}(x)\phi^{(n-1)}_{g'}(x) + s_{\mathrm{ext},g}(x)
+  // @f}
+  //
+  // In other words, we solve the equation one by one, using values for
+  // $\phi_{g'}$ from the previous iteration $n-1$ if $g'\ge g$ and already
+  // computed values for $\phi_{g'}$ from the present iteration if $g'<g$.
+  //
+  // When computing the eigenvalue, we do a very similar iteration, except
+  // that we have no external right hand side and that the solution is scaled
+  // after each iteration as explained in the introduction.
+  //
+  // In either case, these two cases can be treated jointly if all we do is to
+  // equip the following class with these abilities: (i) form the left hand
+  // side matrix, (ii) form the in-group right hand side contribution,
+  // i.e. involving the extraneous source, and (iii) form that contribution to
+  // the right hand side that stems from group $g'$. This class does exactly
+  // these tasks (as well as some book-keeping, such as mesh refinement,
+  // setting up matrices and vectors, etc). On the other hand, the class
+  // itself has no idea how many energy groups there are, and in particular
+  // how they interact, i.e. the decision of how the outer iteration looks
+  // (and consequently whether we solve an eigenvalue or a direct problem) is
+  // left to the NeutronDiffusionProblem class further down below in this
+  // program.
+  //
+  // So let us go through the class and its interface:
+  template <int dim>
+  class EnergyGroup
+  {
+  public:
+
+    // @sect5{Public member functions}
+    //
+    // The class has a good number of public member functions, since its the
+    // way it operates is controlled from the outside, and therefore all
+    // functions that do something significant need to be called from another
+    // class. Let's start off with book-keeping: the class obviously needs to
+    // know which energy group it represents, which material data to use, and
+    // from what coarse grid to start. The constructor takes this information
+    // and initializes the relevant member variables with that (see below).
+    //
+    // Then we also need functions that set up the linear system,
+    // i.e. correctly size the matrix and its sparsity pattern, etc, given a
+    // finite element object to use. The <code>setup_linear_system</code>
+    // function does that. Finally, for this initial block, there are two
+    // functions that return the number of active cells and degrees of freedom
+    // used in this object -- using this, we can make the triangulation and
+    // DoF handler member variables private, and do not have to grant external
+    // use to it, enhancing encapsulation:
+    EnergyGroup (const unsigned int        group,
+                 const MaterialData       &material_data,
+                 const Triangulation<dim> &coarse_grid,
+                 const FiniteElement<dim> &fe);
+
+    void setup_linear_system ();
+
+    unsigned int n_active_cells () const;
+    unsigned int n_dofs () const;
+
+    // Then there are functions that assemble the linear system for each
+    // iteration and the present energy group. Note that the matrix is
+    // independent of the iteration number, so only has to be computed once
+    // for each refinement cycle. The situation is a bit more involved for the
+    // right hand side that has to be updated in each inverse power iteration,
+    // and that is further complicated by the fact that computing it may
+    // involve several different meshes as explained in the introduction. To
+    // make things more flexible with regard to solving the forward or the
+    // eigenvalue problem, we split the computation of the right hand side
+    // into a function that assembles the extraneous source and in-group
+    // contributions (which we will call with a zero function as source terms
+    // for the eigenvalue problem) and one that computes contributions to the
+    // right hand side from another energy group:
+    void assemble_system_matrix ();
+    void assemble_ingroup_rhs (const Function<dim> &extraneous_source);
+    void assemble_cross_group_rhs (const EnergyGroup<dim> &g_prime);
+
+    // Next we need a set of functions that actually compute the solution of a
+    // linear system, and do something with it (such as computing the fission
+    // source contribution mentioned in the introduction, writing graphical
+    // information to an output file, computing error indicators, or actually
+    // refining the grid based on these criteria and thresholds for refinement
+    // and coarsening). All these functions will later be called from the
+    // driver class <code>NeutronDiffusionProblem</code>, or any other class
+    // you may want to implement to solve a problem involving the neutron flux
+    // equations:
+    void   solve ();
+
+    double get_fission_source () const;
+
+    void   output_results (const unsigned int cycle) const;
+
+    void   estimate_errors (Vector<float> &error_indicators) const;
+
+    void   refine_grid (const Vector<float> &error_indicators,
+                        const double         refine_threshold,
+                        const double         coarsen_threshold);
+
+    // @sect5{Public data members}
+    //
+    // As is good practice in object oriented programming, we hide most data
+    // members by making them private. However, we have to grant the class
+    // that drives the process access to the solution vector as well as the
+    // solution of the previous iteration, since in the power iteration, the
+    // solution vector is scaled in every iteration by the present guess of
+    // the eigenvalue we are looking for:
+  public:
+
+    Vector<double> solution;
+    Vector<double> solution_old;
+
+
+    // @sect5{Private data members}
+    //
+    // The rest of the data members are private. Compared to all the previous
+    // tutorial programs, the only new data members are an integer storing
+    // which energy group this object represents, and a reference to the
+    // material data object that this object's constructor gets passed from
+    // the driver class. Likewise, the constructor gets a reference to the
+    // finite element object we are to use.
+    //
+    // Finally, we have to apply boundary values to the linear system in each
+    // iteration, i.e. quite frequently. Rather than interpolating them every
+    // time, we interpolate them once on each new mesh and then store them
+    // along with all the other data of this class:
+  private:
+
+    const unsigned int            group;
+    const MaterialData           &material_data;
+
+    Triangulation<dim>            triangulation;
+    const FiniteElement<dim>     &fe;
+    DoFHandler<dim>               dof_handler;
+
+    SparsityPattern               sparsity_pattern;
+    SparseMatrix<double>          system_matrix;
+
+    Vector<double>                system_rhs;
+
+    std::map<types::global_dof_index,double> boundary_values;
+    ConstraintMatrix              hanging_node_constraints;
+
+
+    // @sect5{Private member functions}
+    //
+    // There is one private member function in this class. It recursively
+    // walks over cells of two meshes to compute the cross-group right hand
+    // side terms. The algorithm for this is explained in the introduction to
+    // this program. The arguments to this function are a reference to an
+    // object representing the energy group against which we want to integrate
+    // a right hand side term, an iterator to a cell of the mesh used for the
+    // present energy group, an iterator to a corresponding cell on the other
+    // mesh, and the matrix that interpolates the degrees of freedom from the
+    // coarser of the two cells to the finer one:
+  private:
+
+    void
+    assemble_cross_group_rhs_recursive (const EnergyGroup<dim>                        &g_prime,
+                                        const typename DoFHandler<dim>::cell_iterator &cell_g,
+                                        const typename DoFHandler<dim>::cell_iterator &cell_g_prime,
+                                        const FullMatrix<double>                       prolongation_matrix);
+  };
+
+
+  // @sect4{Implementation of the <code>EnergyGroup</code> class}
+
+  // The first few functions of this class are mostly self-explanatory. The
+  // constructor only sets a few data members and creates a copy of the given
+  // triangulation as the base for the triangulation used for this energy
+  // group. The next two functions simply return data from private data
+  // members, thereby enabling us to make these data members private.
+  template <int dim>
+  EnergyGroup<dim>::EnergyGroup (const unsigned int        group,
+                                 const MaterialData       &material_data,
+                                 const Triangulation<dim> &coarse_grid,
+                                 const FiniteElement<dim> &fe)
+    :
+    group (group),
+    material_data (material_data),
+    fe (fe),
+    dof_handler (triangulation)
+  {
+    triangulation.copy_triangulation (coarse_grid);
+    dof_handler.distribute_dofs (fe);
+  }
+
+
+
+  template <int dim>
+  unsigned int
+  EnergyGroup<dim>::n_active_cells () const
+  {
+    return triangulation.n_active_cells ();
+  }
+
+
+
+  template <int dim>
+  unsigned int
+  EnergyGroup<dim>::n_dofs () const
+  {
+    return dof_handler.n_dofs ();
+  }
+
+
+
+  // @sect5{<code>EnergyGroup::setup_linear_system</code>}
+  //
+  // The first "real" function is the one that sets up the mesh, matrices,
+  // etc, on the new mesh or after mesh refinement. We use this function to
+  // initialize sparse system matrices, and the right hand side vector. If the
+  // solution vector has never been set before (as indicated by a zero size),
+  // we also initialize it and set it to a default value. We don't do that if
+  // it already has a non-zero size (i.e. this function is called after mesh
+  // refinement) since in that case we want to preserve the solution across
+  // mesh refinement (something we do in the
+  // <code>EnergyGroup::refine_grid</code> function).
+  template <int dim>
+  void
+  EnergyGroup<dim>::setup_linear_system ()
+  {
+    const unsigned int n_dofs = dof_handler.n_dofs();
+
+    hanging_node_constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             hanging_node_constraints);
+    hanging_node_constraints.close ();
+
+    system_matrix.clear ();
+
+    DynamicSparsityPattern dsp(n_dofs, n_dofs);
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    hanging_node_constraints.condense (dsp);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+
+    system_rhs.reinit (n_dofs);
+
+    if (solution.size() == 0)
+      {
+        solution.reinit (n_dofs);
+        solution_old.reinit(n_dofs);
+        solution_old = 1.0;
+        solution = solution_old;
+      }
+
+
+    // At the end of this function, we update the list of boundary nodes and
+    // their values, by first clearing this list and the re-interpolating
+    // boundary values (remember that this function is called after first
+    // setting up the mesh, and each time after mesh refinement).
+    //
+    // To understand the code, it is necessary to realize that we create the
+    // mesh using the <code>GridGenerator::subdivided_hyper_rectangle</code>
+    // function (in <code>NeutronDiffusionProblem::initialize_problem</code>)
+    // where we set the last parameter to <code>true</code>. This means that
+    // boundaries of the domain are "colored", i.e. the four (or six, in 3d)
+    // sides of the domain are assigned different boundary indicators. As it
+    // turns out, the bottom boundary gets indicator zero, the top one
+    // boundary indicator one, and left and right boundaries get indicators
+    // two and three, respectively.
+    //
+    // In this program, we simulate only one, namely the top right, quarter of
+    // a reactor. That is, we want to interpolate boundary conditions only on
+    // the top and right boundaries, while do nothing on the bottom and left
+    // boundaries (i.e. impose natural, no-flux Neumann boundary
+    // conditions). This is most easily generalized to arbitrary dimension by
+    // saying that we want to interpolate on those boundaries with indicators
+    // 1, 3, ..., which we do in the following loop (note that calls to
+    // <code>VectorTools::interpolate_boundary_values</code> are additive,
+    // i.e. they do not first clear the boundary value map):
+    boundary_values.clear();
+
+    for (unsigned int i=0; i<dim; ++i)
+      VectorTools::interpolate_boundary_values (dof_handler,
+                                                2*i+1,
+                                                ZeroFunction<dim>(),
+                                                boundary_values);
+  }
+
+
+
+  // @sect5{<code>EnergyGroup::assemble_system_matrix</code>}
+  //
+  // Next we need functions assembling the system matrix and right hand
+  // sides. Assembling the matrix is straightforward given the equations
+  // outlined in the introduction as well as what we've seen in previous
+  // example programs. Note the use of <code>cell->material_id()</code> to get
+  // at the kind of material from which a cell is made up of. Note also how we
+  // set the order of the quadrature formula so that it is always appropriate
+  // for the finite element in use.
+  //
+  // Finally, note that since we only assemble the system matrix here, we
+  // can't yet eliminate boundary values (we need the right hand side vector
+  // for this). We defer this to the <code>EnergyGroup::solve</code> function,
+  // at which point all the information is available.
+  template <int dim>
+  void
+  EnergyGroup<dim>::assemble_system_matrix ()
+  {
+    const QGauss<dim>  quadrature_formula(fe.degree + 1);
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values    |  update_gradients |
+                             update_JxW_values);
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double> cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>     cell_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        cell_matrix = 0;
+
+        fe_values.reinit (cell);
+
+        const double diffusion_coefficient
+          = material_data.get_diffusion_coefficient (group, cell->material_id());
+        const double removal_XS
+          = material_data.get_removal_XS (group,cell->material_id());
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              cell_matrix(i,j) += ((diffusion_coefficient *
+                                    fe_values.shape_grad(i,q_point) *
+                                    fe_values.shape_grad(j,q_point)
+                                    +
+                                    removal_XS *
+                                    fe_values.shape_value(i,q_point) *
+                                    fe_values.shape_value(j,q_point))
+                                   *
+                                   fe_values.JxW(q_point));
+
+        cell->get_dof_indices (local_dof_indices);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            system_matrix.add (local_dof_indices[i],
+                               local_dof_indices[j],
+                               cell_matrix(i,j));
+      }
+
+    hanging_node_constraints.condense (system_matrix);
+  }
+
+
+
+  // @sect5{<code>EnergyGroup::assemble_ingroup_rhs</code>}
+  //
+  // As explained in the documentation of the <code>EnergyGroup</code> class,
+  // we split assembling the right hand side into two parts: the ingroup and
+  // the cross-group couplings. First, we need a function to assemble the
+  // right hand side of one specific group here, i.e. including an extraneous
+  // source (that we will set to zero for the eigenvalue problem) as well as
+  // the ingroup fission contributions.  (In-group scattering has already been
+  // accounted for with the definition of removal cross section.) The
+  // function's workings are pretty standard as far as assembling right hand
+  // sides go, and therefore does not require more comments except that we
+  // mention that the right hand side vector is set to zero at the beginning
+  // of the function -- something we are not going to do for the cross-group
+  // terms that simply add to the right hand side vector.
+  template <int dim>
+  void EnergyGroup<dim>::assemble_ingroup_rhs (const Function<dim> &extraneous_source)
+  {
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    const QGauss<dim>  quadrature_formula (fe.degree + 1);
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values    |  update_quadrature_points  |
+                             update_JxW_values);
+
+    Vector<double>            cell_rhs (dofs_per_cell);
+    std::vector<double>       extraneous_source_values (n_q_points);
+    std::vector<double>       solution_old_values (n_q_points);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        cell_rhs = 0;
+
+        fe_values.reinit (cell);
+
+        const double fission_dist_XS
+          = material_data.get_fission_dist_XS (group, group, cell->material_id());
+
+        extraneous_source.value_list (fe_values.get_quadrature_points(),
+                                      extraneous_source_values);
+
+        fe_values.get_function_values (solution_old, solution_old_values);
+
+        cell->get_dof_indices (local_dof_indices);
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            cell_rhs(i) += ((extraneous_source_values[q_point]
+                             +
+                             fission_dist_XS *
+                             solution_old_values[q_point]) *
+                            fe_values.shape_value(i,q_point) *
+                            fe_values.JxW(q_point));
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          system_rhs(local_dof_indices[i]) += cell_rhs(i);
+      }
+  }
+
+
+
+  // @sect5{<code>EnergyGroup::assemble_cross_group_rhs</code>}
+  //
+  // The more interesting function for assembling the right hand side vector
+  // for the equation of a single energy group is the one that couples energy
+  // group $g$ and $g'$. As explained in the introduction, we first have to
+  // find the set of cells common to the meshes of the two energy
+  // groups. First we call <code>get_finest_common_cells</code> to obtain this
+  // list of pairs of common cells from both meshes. Both cells in a pair may
+  // not be active but at least one of them is. We then hand each of these
+  // cell pairs off to a function that computes the right hand side terms
+  // recursively.
+  //
+  // Note that ingroup coupling is handled already before, so we exit the
+  // function early if $g=g'$.
+  template <int dim>
+  void EnergyGroup<dim>::assemble_cross_group_rhs (const EnergyGroup<dim> &g_prime)
+  {
+    if (group == g_prime.group)
+      return;
+
+    const std::list<std::pair<typename DoFHandler<dim>::cell_iterator,
+          typename DoFHandler<dim>::cell_iterator> >
+          cell_list
+          = GridTools::get_finest_common_cells (dof_handler,
+                                                g_prime.dof_handler);
+
+    typename std::list<std::pair<typename DoFHandler<dim>::cell_iterator,
+             typename DoFHandler<dim>::cell_iterator> >
+             ::const_iterator
+             cell_iter = cell_list.begin();
+
+    for (; cell_iter!=cell_list.end(); ++cell_iter)
+      {
+        FullMatrix<double> unit_matrix (fe.dofs_per_cell);
+        for (unsigned int i=0; i<unit_matrix.m(); ++i)
+          unit_matrix(i,i) = 1;
+        assemble_cross_group_rhs_recursive (g_prime,
+                                            cell_iter->first,
+                                            cell_iter->second,
+                                            unit_matrix);
+      }
+  }
+
+
+
+  // @sect5{<code>EnergyGroup::assemble_cross_group_rhs_recursive</code>}
+  //
+  // This is finally the function that handles assembling right hand side
+  // terms on potentially different meshes recursively, using the algorithm
+  // described in the introduction. The function takes a reference to the
+  // object representing energy group $g'$, as well as iterators to
+  // corresponding cells in the meshes for energy groups $g$ and $g'$. At
+  // first, i.e. when this function is called from the one above, these two
+  // cells will be matching cells on two meshes; however, one of the two may
+  // be further refined, and we will call the function recursively with one of
+  // the two iterators replaced by one of the children of the original cell.
+  //
+  // The last argument is the matrix product matrix $B_{c^{(k)}}^T \cdots
+  // B_{c'}^T B_c^T$ from the introduction that interpolates from the coarser
+  // of the two cells to the finer one. If the two cells match, then this is
+  // the identity matrix -- exactly what we pass to this function initially.
+  //
+  // The function has to consider two cases: that both of the two cells are
+  // not further refined, i.e. have no children, in which case we can finally
+  // assemble the right hand side contributions of this pair of cells; and
+  // that one of the two cells is further refined, in which case we have to
+  // keep recursing by looping over the children of the one cell that is not
+  // active. These two cases will be discussed below:
+  template <int dim>
+  void
+  EnergyGroup<dim>::
+  assemble_cross_group_rhs_recursive (const EnergyGroup<dim>                        &g_prime,
+                                      const typename DoFHandler<dim>::cell_iterator &cell_g,
+                                      const typename DoFHandler<dim>::cell_iterator &cell_g_prime,
+                                      const FullMatrix<double>                       prolongation_matrix)
+  {
+    // The first case is that both cells are no further refined. In that case,
+    // we can assemble the relevant terms (see the introduction). This
+    // involves assembling the mass matrix on the finer of the two cells (in
+    // fact there are two mass matrices with different coefficients, one for
+    // the fission distribution cross section $\chi_g\nu\Sigma_{f,g'}$ and one
+    // for the scattering cross section $\Sigma_{s,g'\to g}$). This is
+    // straight forward, but note how we determine which of the two cells is
+    // the finer one by looking at the refinement level of the two cells:
+    if (!cell_g->has_children() && !cell_g_prime->has_children())
+      {
+        const QGauss<dim>  quadrature_formula (fe.degree+1);
+        const unsigned int n_q_points = quadrature_formula.size();
+
+        FEValues<dim> fe_values (fe, quadrature_formula,
+                                 update_values  |  update_JxW_values);
+
+        if (cell_g->level() > cell_g_prime->level())
+          fe_values.reinit (cell_g);
+        else
+          fe_values.reinit (cell_g_prime);
+
+        const double fission_dist_XS
+          = material_data.get_fission_dist_XS (group, g_prime.group,
+                                               cell_g_prime->material_id());
+
+        const double scattering_XS
+          = material_data.get_scattering_XS (g_prime.group, group,
+                                             cell_g_prime->material_id());
+
+        FullMatrix<double>    local_mass_matrix_f (fe.dofs_per_cell,
+                                                   fe.dofs_per_cell);
+        FullMatrix<double>    local_mass_matrix_g (fe.dofs_per_cell,
+                                                   fe.dofs_per_cell);
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+            for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+              {
+                local_mass_matrix_f(i,j) += (fission_dist_XS *
+                                             fe_values.shape_value(i,q_point) *
+                                             fe_values.shape_value(j,q_point) *
+                                             fe_values.JxW(q_point));
+                local_mass_matrix_g(i,j) += (scattering_XS *
+                                             fe_values.shape_value(i,q_point) *
+                                             fe_values.shape_value(j,q_point) *
+                                             fe_values.JxW(q_point));
+              }
+
+        // Now we have all the interpolation (prolongation) matrices as well
+        // as local mass matrices, so we only have to form the product @f[
+        // F_i|_{K_{cc'\cdots c^{(k)}}} = [B_c B_{c'} \cdots B_{c^{(k)}}
+        // M_{K_{cc'\cdots c^{(k)}}}]^{ij} \phi_{g'}^j, @f] or @f[
+        // F_i|_{K_{cc'\cdots c^{(k)}}} = [(B_c B_{c'} \cdots B_{c^{(k)}}
+        // M_{K_{cc'\cdots c^{(k)}}})^T]^{ij} \phi_{g'}^j, @f] depending on
+        // which of the two cells is the finer. We do this using either the
+        // matrix-vector product provided by the <code>vmult</code> function,
+        // or the product with the transpose matrix using <code>Tvmult</code>.
+        // After doing so, we transfer the result into the global right hand
+        // side vector of energy group $g$.
+        Vector<double>       g_prime_new_values (fe.dofs_per_cell);
+        Vector<double>       g_prime_old_values (fe.dofs_per_cell);
+        cell_g_prime->get_dof_values (g_prime.solution_old, g_prime_old_values);
+        cell_g_prime->get_dof_values (g_prime.solution,     g_prime_new_values);
+
+        Vector<double>       cell_rhs (fe.dofs_per_cell);
+        Vector<double>       tmp (fe.dofs_per_cell);
+
+        if (cell_g->level() > cell_g_prime->level())
+          {
+            prolongation_matrix.vmult (tmp, g_prime_old_values);
+            local_mass_matrix_f.vmult (cell_rhs, tmp);
+
+            prolongation_matrix.vmult (tmp, g_prime_new_values);
+            local_mass_matrix_g.vmult_add (cell_rhs, tmp);
+          }
+        else
+          {
+            local_mass_matrix_f.vmult (tmp, g_prime_old_values);
+            prolongation_matrix.Tvmult (cell_rhs, tmp);
+
+            local_mass_matrix_g.vmult (tmp, g_prime_new_values);
+            prolongation_matrix.Tvmult_add (cell_rhs, tmp);
+          }
+
+        std::vector<types::global_dof_index> local_dof_indices (fe.dofs_per_cell);
+        cell_g->get_dof_indices (local_dof_indices);
+
+        for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+          system_rhs(local_dof_indices[i]) += cell_rhs(i);
+      }
+
+    // The alternative is that one of the two cells is further refined. In
+    // that case, we have to loop over all the children, multiply the existing
+    // interpolation (prolongation) product of matrices from the left with the
+    // interpolation from the present cell to its child (using the
+    // matrix-matrix multiplication function <code>mmult</code>), and then
+    // hand the result off to this very same function again, but with the cell
+    // that has children replaced by one of its children:
+    else
+      for (unsigned int child=0; child<GeometryInfo<dim>::max_children_per_cell; ++child)
+        {
+          FullMatrix<double>   new_matrix (fe.dofs_per_cell, fe.dofs_per_cell);
+          fe.get_prolongation_matrix(child).mmult (new_matrix,
+                                                   prolongation_matrix);
+
+          if (cell_g->has_children())
+            assemble_cross_group_rhs_recursive (g_prime,
+                                                cell_g->child(child), cell_g_prime,
+                                                new_matrix);
+          else
+            assemble_cross_group_rhs_recursive (g_prime,
+                                                cell_g, cell_g_prime->child(child),
+                                                new_matrix);
+        }
+  }
+
+
+  // @sect5{<code>EnergyGroup::get_fission_source</code>}
+  //
+  // In the (inverse) power iteration, we use the integrated fission source to
+  // update the $k$-eigenvalue. Given its definition, the following function
+  // is essentially self-explanatory:
+  template <int dim>
+  double EnergyGroup<dim>::get_fission_source () const
+  {
+    const QGauss<dim>  quadrature_formula (fe.degree + 1);
+    const unsigned int n_q_points    = quadrature_formula.size();
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values  |  update_JxW_values);
+
+    std::vector<double>       solution_values (n_q_points);
+
+    double fission_source = 0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+
+        const double fission_XS
+          = material_data.get_fission_XS(group, cell->material_id());
+
+        fe_values.get_function_values (solution, solution_values);
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          fission_source += (fission_XS *
+                             solution_values[q_point] *
+                             fe_values.JxW(q_point));
+      }
+
+    return fission_source;
+  }
+
+
+  // @sect5{<code>EnergyGroup::solve</code>}
+  //
+  // Next a function that solves the linear system assembled before. Things
+  // are pretty much standard, except that we delayed applying boundary values
+  // until we get here, since in all the previous functions we were still
+  // adding up contributions the right hand side vector.
+  template <int dim>
+  void
+  EnergyGroup<dim>::solve ()
+  {
+    hanging_node_constraints.condense (system_rhs);
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix,
+                                        solution,
+                                        system_rhs);
+
+    SolverControl           solver_control (system_matrix.m(),
+                                            1e-12*system_rhs.l2_norm());
+    SolverCG<>              cg (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    cg.solve (system_matrix, solution, system_rhs, preconditioner);
+
+    hanging_node_constraints.distribute (solution);
+  }
+
+
+
+  // @sect5{<code>EnergyGroup::estimate_errors</code>}
+  //
+  // Mesh refinement is split into two functions. The first estimates the
+  // error for each cell, normalizes it by the magnitude of the solution, and
+  // returns it in the vector given as an argument. The calling function
+  // collects all error indicators from all energy groups, and computes
+  // thresholds for refining and coarsening cells.
+  template <int dim>
+  void EnergyGroup<dim>::estimate_errors (Vector<float> &error_indicators) const
+  {
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1> (fe.degree + 1),
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        error_indicators);
+    error_indicators /= solution.linfty_norm();
+  }
+
+
+
+  // @sect5{<code>EnergyGroup::refine_grid</code>}
+  //
+  // The second part is to refine the grid given the error indicators compute
+  // in the previous function and error thresholds above which cells shall be
+  // refined or below which cells shall be coarsened. Note that we do not use
+  // any of the functions in <code>GridRefinement</code> here, but rather set
+  // refinement flags ourselves.
+  //
+  // After setting these flags, we use the SolutionTransfer class to move the
+  // solution vector from the old to the new mesh. The procedure used here is
+  // described in detail in the documentation of that class:
+  template <int dim>
+  void EnergyGroup<dim>::refine_grid (const Vector<float> &error_indicators,
+                                      const double         refine_threshold,
+                                      const double         coarsen_threshold)
+  {
+    typename Triangulation<dim>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+
+    for (; cell!=endc; ++cell)
+      if (error_indicators(cell->active_cell_index()) > refine_threshold)
+        cell->set_refine_flag ();
+      else if (error_indicators(cell->active_cell_index()) < coarsen_threshold)
+        cell->set_coarsen_flag ();
+
+    SolutionTransfer<dim> soltrans(dof_handler);
+
+    triangulation.prepare_coarsening_and_refinement();
+    soltrans.prepare_for_coarsening_and_refinement(solution);
+
+    triangulation.execute_coarsening_and_refinement ();
+    dof_handler.distribute_dofs (fe);
+    this->setup_linear_system ();
+
+    solution.reinit (dof_handler.n_dofs());
+    soltrans.interpolate(solution_old, solution);
+
+    // enforce constraints to make the interpolated solution conforming on
+    // the new mesh:
+    hanging_node_constraints.distribute(solution);
+
+    solution_old.reinit (dof_handler.n_dofs());
+    solution_old = solution;
+  }
+
+
+  // @sect5{<code>EnergyGroup::output_results</code>}
+  //
+  // The last function of this class outputs meshes and solutions after each
+  // mesh iteration. This has been shown many times before. The only thing
+  // worth pointing out is the use of the
+  // <code>Utilities::int_to_string</code> function to convert an integer into
+  // its string representation. The second argument of that function denotes
+  // how many digits we shall use -- if this value was larger than one, then
+  // the number would be padded by leading zeros.
+  template <int dim>
+  void
+  EnergyGroup<dim>::output_results (const unsigned int cycle) const
+  {
+    {
+      const std::string filename = std::string("grid-") +
+                                   Utilities::int_to_string(group,1) +
+                                   "." +
+                                   Utilities::int_to_string(cycle,1) +
+                                   ".eps";
+      std::ofstream output (filename.c_str());
+
+      GridOut grid_out;
+      grid_out.write_eps (triangulation, output);
+    }
+
+    {
+      const std::string filename = std::string("solution-") +
+                                   Utilities::int_to_string(group,1) +
+                                   "." +
+                                   Utilities::int_to_string(cycle,1) +
+                                   ".gmv";
+
+      DataOut<dim> data_out;
+
+      data_out.attach_dof_handler (dof_handler);
+      data_out.add_data_vector (solution, "solution");
+      data_out.build_patches ();
+
+      std::ofstream output (filename.c_str());
+      data_out.write_gmv (output);
+    }
+  }
+
+
+
+  // @sect3{The <code>NeutronDiffusionProblem</code> class template}
+
+  // This is the main class of the program, not because it implements all the
+  // functionality (in fact, most of it is implemented in the
+  // <code>EnergyGroup</code> class) but because it contains the driving
+  // algorithm that determines what to compute and when. It is mostly as shown
+  // in many of the other tutorial programs in that it has a public
+  // <code>run</code> function and private functions doing all the rest. In
+  // several places, we have to do something for all energy groups, in which
+  // case we will start threads for each group to let these things run in
+  // parallel if deal.II was configured for multithreading.  For strategies of
+  // parallelization, take a look at the @ref threads module.
+  //
+  // The biggest difference to previous example programs is that we also
+  // declare a nested class that has member variables for all the run-time
+  // parameters that can be passed to the program in an input file. Right now,
+  // these are the number of energy groups, the number of refinement cycles,
+  // the polynomial degree of the finite element to be used, and the tolerance
+  // used to determine when convergence of the inverse power iteration has
+  // occurred. In addition, we have a constructor of this class that sets all
+  // these values to their default values, a function
+  // <code>declare_parameters</code> that described to the ParameterHandler
+  // class already used in step-19 what parameters are accepted in the input
+  // file, and a function <code>get_parameters</code> that can extract the
+  // values of these parameters from a ParameterHandler object.
+  template <int dim>
+  class NeutronDiffusionProblem
+  {
+  public:
+    class Parameters
+    {
+    public:
+      Parameters ();
+
+      static void declare_parameters (ParameterHandler &prm);
+      void get_parameters (ParameterHandler &prm);
+
+      unsigned int n_groups;
+      unsigned int n_refinement_cycles;
+
+      unsigned int fe_degree;
+
+      double convergence_tolerance;
+    };
+
+
+
+    NeutronDiffusionProblem (const Parameters &parameters);
+    ~NeutronDiffusionProblem ();
+
+    void run ();
+
+  private:
+    // @sect5{Private member functions}
+
+    // There are not that many member functions in this class since most of
+    // the functionality has been moved into the <code>EnergyGroup</code>
+    // class and is simply called from the <code>run()</code> member function
+    // of this class. The ones that remain have self-explanatory names:
+    void initialize_problem();
+
+    void refine_grid ();
+
+    double get_total_fission_source () const;
+
+
+    // @sect5{Private member variables}
+
+    // Next, we have a few member variables. In particular, these are (i) a
+    // reference to the parameter object (owned by the main function of this
+    // program, and passed to the constructor of this class), (ii) an object
+    // describing the material parameters for the number of energy groups
+    // requested in the input file, and (iii) the finite element to be used by
+    // all energy groups:
+    const Parameters  ¶meters;
+    const MaterialData material_data;
+    FE_Q<dim>          fe;
+
+    // Furthermore, we have (iv) the value of the computed eigenvalue at the
+    // present iteration. This is, in fact, the only part of the solution that
+    // is shared between all energy groups -- all other parts of the solution,
+    // such as neutron fluxes are particular to one or the other energy group,
+    // and are therefore stored in objects that describe a single energy
+    // group:
+    double k_eff;
+
+    // Finally, (v), we have an array of pointers to the energy group
+    // objects. The length of this array is, of course, equal to the number of
+    // energy groups specified in the parameter file.
+    std::vector<EnergyGroup<dim>*> energy_groups;
+  };
+
+
+  // @sect4{Implementation of the <code>NeutronDiffusionProblem::Parameters</code> class}
+
+  // Before going on to the implementation of the outer class, we have to
+  // implement the functions of the parameters structure. This is pretty
+  // straightforward and, in fact, looks pretty much the same for all such
+  // parameters classes using the ParameterHandler capabilities. We will
+  // therefore not comment further on this:
+  template <int dim>
+  NeutronDiffusionProblem<dim>::Parameters::Parameters ()
+    :
+    n_groups (2),
+    n_refinement_cycles (5),
+    fe_degree (2),
+    convergence_tolerance (1e-12)
+  {}
+
+
+
+  template <int dim>
+  void
+  NeutronDiffusionProblem<dim>::Parameters::
+  declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry ("Number of energy groups", "2",
+                       Patterns::Integer (),
+                       "The number of energy different groups considered");
+    prm.declare_entry ("Refinement cycles", "5",
+                       Patterns::Integer (),
+                       "Number of refinement cycles to be performed");
+    prm.declare_entry ("Finite element degree", "2",
+                       Patterns::Integer (),
+                       "Polynomial degree of the finite element to be used");
+    prm.declare_entry ("Power iteration tolerance", "1e-12",
+                       Patterns::Double (),
+                       "Inner power iterations are stopped when the change in k_eff falls "
+                       "below this tolerance");
+  }
+
+
+
+  template <int dim>
+  void
+  NeutronDiffusionProblem<dim>::Parameters::
+  get_parameters (ParameterHandler &prm)
+  {
+    n_groups              = prm.get_integer ("Number of energy groups");
+    n_refinement_cycles   = prm.get_integer ("Refinement cycles");
+    fe_degree             = prm.get_integer ("Finite element degree");
+    convergence_tolerance = prm.get_double ("Power iteration tolerance");
+  }
+
+
+
+
+  // @sect4{Implementation of the <code>NeutronDiffusionProblem</code> class}
+
+  // Now for the <code>NeutronDiffusionProblem</code> class. The constructor
+  // and destructor have nothing of much interest:
+  template <int dim>
+  NeutronDiffusionProblem<dim>::
+  NeutronDiffusionProblem (const Parameters &parameters)
+    :
+    parameters (parameters),
+    material_data (parameters.n_groups),
+    fe (parameters.fe_degree)
+  {}
+
+
+
+  template <int dim>
+  NeutronDiffusionProblem<dim>::~NeutronDiffusionProblem ()
+  {
+    for (unsigned int group=0; group<energy_groups.size(); ++group)
+      delete energy_groups[group];
+
+    energy_groups.resize (0);
+  }
+
+  // @sect5{<code>NeutronDiffusionProblem::initialize_problem</code>}
+  //
+  // The first function of interest is the one that sets up the geometry of
+  // the reactor core. This is described in more detail in the introduction.
+  //
+  // The first part of the function defines geometry data, and then creates a
+  // coarse mesh that has as many cells as there are fuel rods (or pin cells,
+  // for that matter) in that part of the reactor core that we simulate. As
+  // mentioned when interpolating boundary values above, the last parameter to
+  // the <code>GridGenerator::subdivided_hyper_rectangle</code> function
+  // specifies that sides of the domain shall have unique boundary indicators
+  // that will later allow us to determine in a simple way which of the
+  // boundaries have Neumann and which have Dirichlet conditions attached to
+  // them.
+  template <int dim>
+  void NeutronDiffusionProblem<dim>::initialize_problem()
+  {
+    const unsigned int rods_per_assembly_x = 17,
+                       rods_per_assembly_y = 17;
+    const double pin_pitch_x = 1.26,
+                 pin_pitch_y = 1.26;
+    const double assembly_height = 200;
+
+    const unsigned int assemblies_x = 2,
+                       assemblies_y = 2,
+                       assemblies_z = 1;
+
+    const Point<dim> bottom_left = Point<dim>();
+    const Point<dim> upper_right = (dim == 2
+                                    ?
+                                    Point<dim> (assemblies_x*rods_per_assembly_x*pin_pitch_x,
+                                                assemblies_y*rods_per_assembly_y*pin_pitch_y)
+                                    :
+                                    Point<dim> (assemblies_x*rods_per_assembly_x*pin_pitch_x,
+                                                assemblies_y*rods_per_assembly_y*pin_pitch_y,
+                                                assemblies_z*assembly_height));
+
+    std::vector<unsigned int> n_subdivisions;
+    n_subdivisions.push_back (assemblies_x*rods_per_assembly_x);
+    if (dim >= 2)
+      n_subdivisions.push_back (assemblies_y*rods_per_assembly_y);
+    if (dim >= 3)
+      n_subdivisions.push_back (assemblies_z);
+
+    Triangulation<dim> coarse_grid;
+    GridGenerator::subdivided_hyper_rectangle (coarse_grid,
+                                               n_subdivisions,
+                                               bottom_left,
+                                               upper_right,
+                                               true);
+
+
+    // The second part of the function deals with material numbers of pin
+    // cells of each type of assembly. Here, we define four different types of
+    // assembly, for which we describe the arrangement of fuel rods in the
+    // following tables.
+    //
+    // The assemblies described here are taken from the benchmark mentioned in
+    // the introduction and are (in this order): <ol> <li>'UX' Assembly: UO2
+    // fuel assembly with 24 guide tubes and a central Moveable Fission
+    // Chamber <li>'UA' Assembly: UO2 fuel assembly with 24 AIC and a central
+    // Moveable Fission Chamber <li>'PX' Assembly: MOX fuel assembly with 24
+    // guide tubes and a central Moveable Fission Chamber <li>'R' Assembly: a
+    // reflector.  </ol>
+    //
+    // Note that the numbers listed here and taken from the benchmark
+    // description are, in good old Fortran fashion, one-based. We will later
+    // subtract one from each number when assigning materials to individual
+    // cells to convert things into the C-style zero-based indexing.
+    const unsigned int n_assemblies=4;
+    const unsigned int
+    assembly_materials[n_assemblies][rods_per_assembly_x][rods_per_assembly_y]
+    =
+    {
+      {
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 5, 1, 1, 5, 1, 1, 7, 1, 1, 5, 1, 1, 5, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 5, 1, 1, 5, 1, 1, 5, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+      },
+      {
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 8, 1, 1, 8, 1, 1, 7, 1, 1, 8, 1, 1, 8, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+      },
+      {
+        { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+        { 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2 },
+        { 2, 3, 3, 3, 3, 5, 3, 3, 5, 3, 3, 5, 3, 3, 3, 3, 2 },
+        { 2, 3, 3, 5, 3, 4, 4, 4, 4, 4, 4, 4, 3, 5, 3, 3, 2 },
+        { 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2 },
+        { 2, 3, 5, 4, 4, 5, 4, 4, 5, 4, 4, 5, 4, 4, 5, 3, 2 },
+        { 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 2 },
+        { 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 2 },
+        { 2, 3, 5, 4, 4, 5, 4, 4, 7, 4, 4, 5, 4, 4, 5, 3, 2 },
+        { 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 2 },
+        { 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 2 },
+        { 2, 3, 5, 4, 4, 5, 4, 4, 5, 4, 4, 5, 4, 4, 5, 3, 2 },
+        { 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2 },
+        { 2, 3, 3, 5, 3, 4, 4, 4, 4, 4, 4, 4, 3, 5, 3, 3, 2 },
+        { 2, 3, 3, 3, 3, 5, 3, 3, 5, 3, 3, 5, 3, 3, 3, 3, 2 },
+        { 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2 },
+        { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }
+      },
+      {
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+        { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 }
+      }
+    };
+
+    // After the description of the materials that make up an assembly, we
+    // have to specify the arrangement of assemblies within the core. We use a
+    // symmetric pattern that in fact only uses the 'UX' and 'PX' assemblies:
+    const unsigned int core[assemblies_x][assemblies_y][assemblies_z]
+    =  {{{0}, {2}}, {{2}, {0}}};
+
+    // We are now in a position to actually set material IDs for each cell. To
+    // this end, we loop over all cells, look at the location of the cell's
+    // center, and determine which assembly and fuel rod this would be in. (We
+    // add a few checks to see that the locations we compute are within the
+    // bounds of the arrays in which we have to look up materials.) At the end
+    // of the loop, we set material identifiers accordingly:
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = coarse_grid.begin_active();
+         cell!=coarse_grid.end();
+         ++cell)
+      {
+        const Point<dim> cell_center = cell->center();
+
+        const unsigned int tmp_x = int(cell_center[0]/pin_pitch_x);
+        const unsigned int ax = tmp_x/rods_per_assembly_x;
+        const unsigned int cx = tmp_x - ax * rods_per_assembly_x;
+
+        const unsigned tmp_y = int(cell_center[1]/pin_pitch_y);
+        const unsigned int ay = tmp_y/rods_per_assembly_y;
+        const unsigned int cy = tmp_y - ay * rods_per_assembly_y;
+
+        const unsigned int az = (dim == 2
+                                 ?
+                                 0
+                                 :
+                                 int (cell_center[dim-1]/assembly_height));
+
+        Assert (ax < assemblies_x, ExcInternalError());
+        Assert (ay < assemblies_y, ExcInternalError());
+        Assert (az < assemblies_z, ExcInternalError());
+
+        Assert (core[ax][ay][az] < n_assemblies, ExcInternalError());
+
+        Assert (cx < rods_per_assembly_x, ExcInternalError());
+        Assert (cy < rods_per_assembly_y, ExcInternalError());
+
+        cell->set_material_id(assembly_materials[core[ax][ay][az]][cx][cy] - 1);
+      }
+
+    // With the coarse mesh so initialized, we create the appropriate number
+    // of energy group objects and let them initialize their individual meshes
+    // with the coarse mesh generated above:
+    energy_groups.resize (parameters.n_groups);
+    for (unsigned int group=0; group<parameters.n_groups; ++group)
+      energy_groups[group] = new EnergyGroup<dim> (group, material_data,
+                                                   coarse_grid, fe);
+  }
+
+
+  // @sect5{<code>NeutronDiffusionProblem::get_total_fission_source</code>}
+  //
+  // In the eigenvalue computation, we need to calculate total fission neutron
+  // source after each power iteration. The total power then is used to renew
+  // k-effective.
+  //
+  // Since the total fission source is a sum over all the energy groups, and
+  // since each of these sums can be computed independently, we actually do
+  // this in parallel. One of the problems is that the function in the
+  // <code>EnergyGroup</code> class that computes the fission source returns a
+  // value. If we now simply spin off a new thread, we have to later capture
+  // the return value of the function run on that thread. The way this can be
+  // done is to use the return value of the Threads::new_thread function,
+  // which returns an object of type Threads::Thread@<double@> if the function
+  // spawned returns a double. We can then later ask this object for the
+  // returned value (when doing so, the Threads::Thread::return_value function
+  // first waits for the thread to finish if it hasn't done so already).
+  //
+  // The way this function then works is to first spawn one thread for each
+  // energy group we work with, then one-by-one collecting the returned values
+  // of each thread and return the sum.
+  template <int dim>
+  double NeutronDiffusionProblem<dim>::get_total_fission_source () const
+  {
+    std::vector<Threads::Thread<double> > threads;
+    for (unsigned int group=0; group<parameters.n_groups; ++group)
+      threads.push_back (Threads::new_thread (&EnergyGroup<dim>::get_fission_source,
+                                              *energy_groups[group]));
+
+    double fission_source = 0;
+    for (unsigned int group=0; group<parameters.n_groups; ++group)
+      fission_source += threads[group].return_value ();
+
+    return fission_source;
+  }
+
+
+
+
+  // @sect5{<code>NeutronDiffusionProblem::refine_grid</code>}
+  //
+  // The next function lets the individual energy group objects refine their
+  // meshes. Much of this, again, is a task that can be done independently in
+  // parallel: first, let all the energy group objects calculate their error
+  // indicators in parallel, then compute the maximum error indicator over all
+  // energy groups and determine thresholds for refinement and coarsening of
+  // cells, and then ask all the energy groups to refine their meshes
+  // accordingly, again in parallel.
+  template <int dim>
+  void NeutronDiffusionProblem<dim>::refine_grid ()
+  {
+    std::vector<types::global_dof_index> n_cells (parameters.n_groups);
+    for (unsigned int group=0; group<parameters.n_groups; ++group)
+      n_cells[group] = energy_groups[group]->n_active_cells();
+
+    BlockVector<float>  group_error_indicators(n_cells);
+
+    {
+      Threads::ThreadGroup<> threads;
+      for (unsigned int group=0; group<parameters.n_groups; ++group)
+        threads += Threads::new_thread (&EnergyGroup<dim>::estimate_errors,
+                                        *energy_groups[group],
+                                        group_error_indicators.block(group));
+      threads.join_all ();
+    }
+
+    const float max_error         = group_error_indicators.linfty_norm();
+    const float refine_threshold  = 0.3*max_error;
+    const float coarsen_threshold = 0.01*max_error;
+
+    {
+      Threads::ThreadGroup<> threads;
+      for (unsigned int group=0; group<parameters.n_groups; ++group)
+        threads += Threads::new_thread (&EnergyGroup<dim>::refine_grid,
+                                        *energy_groups[group],
+                                        group_error_indicators.block(group),
+                                        refine_threshold,
+                                        coarsen_threshold);
+      threads.join_all ();
+    }
+  }
+
+
+  // @sect5{<code>NeutronDiffusionProblem::run</code>}
+  //
+  // Finally, this is the function where the meat is: iterate on a sequence of
+  // meshes, and on each of them do a power iteration to compute the
+  // eigenvalue.
+  //
+  // Given the description of the algorithm in the introduction, there is
+  // actually not much to comment on:
+  template <int dim>
+  void NeutronDiffusionProblem<dim>::run ()
+  {
+    std::cout << std::setprecision (12) << std::fixed;
+
+    double k_eff_old = k_eff;
+
+    Timer timer;
+    timer.start ();
+
+    for (unsigned int cycle=0; cycle<parameters.n_refinement_cycles; ++cycle)
+      {
+        std::cout << "Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          {
+            initialize_problem();
+            for (unsigned int group=0; group<parameters.n_groups; ++group)
+              energy_groups[group]->setup_linear_system ();
+          }
+
+        else
+          {
+            refine_grid ();
+            for (unsigned int group=0; group<parameters.n_groups; ++group)
+              energy_groups[group]->solution *= k_eff;
+          }
+
+
+        std::cout << "   Numbers of active cells:       ";
+        for (unsigned int group=0; group<parameters.n_groups; ++group)
+          std::cout << energy_groups[group]->n_active_cells()
+                    << ' ';
+        std::cout << std::endl;
+        std::cout << "   Numbers of degrees of freedom: ";
+        for (unsigned int group=0; group<parameters.n_groups; ++group)
+          std::cout << energy_groups[group]->n_dofs()
+                    << ' ';
+        std::cout << std::endl << std::endl;
+
+
+        Threads::ThreadGroup<> threads;
+        for (unsigned int group=0; group<parameters.n_groups; ++group)
+          threads += Threads::new_thread
+                     (&EnergyGroup<dim>::assemble_system_matrix,
+                      *energy_groups[group]);
+        threads.join_all ();
+
+        double error;
+        unsigned int iteration = 1;
+        do
+          {
+            for (unsigned int group=0; group<parameters.n_groups; ++group)
+              {
+                energy_groups[group]->assemble_ingroup_rhs (ZeroFunction<dim>());
+
+                for (unsigned int bgroup=0; bgroup<parameters.n_groups; ++bgroup)
+                  energy_groups[group]->assemble_cross_group_rhs (*energy_groups[bgroup]);
+
+                energy_groups[group]->solve ();
+              }
+
+            k_eff = get_total_fission_source();
+            error = fabs(k_eff-k_eff_old)/fabs(k_eff);
+            std::cout << "   Iteration " << iteration
+                      << ": k_eff=" << k_eff
+                      << std::endl;
+            k_eff_old=k_eff;
+
+            for (unsigned int group=0; group<parameters.n_groups; ++group)
+              {
+                energy_groups[group]->solution_old = energy_groups[group]->solution;
+                energy_groups[group]->solution_old /= k_eff;
+              }
+
+            ++iteration;
+          }
+        while ((error > parameters.convergence_tolerance)
+               &&
+               (iteration < 500));
+
+        for (unsigned int group=0; group<parameters.n_groups; ++group)
+          energy_groups[group]->output_results (cycle);
+
+        std::cout << std::endl;
+        std::cout << "   Cycle=" << cycle
+                  << ", n_dofs=" << energy_groups[0]->n_dofs() + energy_groups[1]->n_dofs()
+                  << ",  k_eff=" << k_eff
+                  << ", time=" << timer()
+                  << std::endl;
+
+
+        std::cout << std::endl << std::endl;
+      }
+  }
+}
+
+
+
+// @sect3{The <code>main()</code> function}
+//
+// The last thing in the program in the <code>main()</code> function. The
+// structure is as in most other tutorial programs, with the only exception
+// that we here handle a parameter file.  To this end, we first look at the
+// command line arguments passed to this function: if no input file is
+// specified on the command line, then use "project.prm", otherwise take the
+// filename given as the first argument on the command line.
+//
+// With this, we create a ParameterHandler object, let the
+// <code>NeutronDiffusionProblem::Parameters</code> class declare all the
+// parameters it wants to see in the input file (or, take the default values,
+// if nothing is listed in the parameter file), then read the input file, ask
+// the parameters object to extract the values, and finally hand everything
+// off to an object of type <code>NeutronDiffusionProblem</code> for
+// computation of the eigenvalue:
+int main (int argc, char **argv)
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step28;
+
+      std::string filename;
+      if (argc < 2)
+        filename = "project.prm";
+      else
+        filename = argv[1];
+
+
+      const unsigned int dim = 2;
+
+      ParameterHandler parameter_handler;
+
+      NeutronDiffusionProblem<dim>::Parameters parameters;
+      parameters.declare_parameters (parameter_handler);
+
+      parameter_handler.read_input (filename);
+
+      parameters.get_parameters (parameter_handler);
+
+
+      NeutronDiffusionProblem<dim> neutron_diffusion_problem (parameters);
+      neutron_diffusion_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-29/CMakeLists.txt b/examples/step-29/CMakeLists.txt
new file mode 100644
index 0000000..106950c
--- /dev/null
+++ b/examples/step-29/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-29 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-29")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_UMFPACK)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_UMFPACK = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-29/doc/builds-on b/examples/step-29/doc/builds-on
new file mode 100644
index 0000000..48a0f73
--- /dev/null
+++ b/examples/step-29/doc/builds-on
@@ -0,0 +1 @@
+step-4
diff --git a/examples/step-29/doc/intro.dox b/examples/step-29/doc/intro.dox
new file mode 100644
index 0000000..32c62b8
--- /dev/null
+++ b/examples/step-29/doc/intro.dox
@@ -0,0 +1,196 @@
+<br>
+
+<i>
+This program was contributed by Moritz Allmaras at Texas A&M
+University. Some of the work on this tutorial program has been funded
+by NSF under grant DMS-0604778.
+</i>
+
+<b>Note:</b> In order to run this program, deal.II must be configured to use
+the UMFPACK sparse direct solver. Refer to the <a
+href="../../readme.html#umfpack">ReadMe</a> for instructions how to do this.
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+A question that comes up frequently is how to solve problems involving complex
+valued functions with deal.II. For many problems, instead of working with
+complex valued finite elements directly, which are not readily available in
+the library, it is often much more convenient to split complex valued
+functions into their real and imaginary parts and use separate scalar finite
+element fields for discretizing each one of them. Basically this amounts to
+viewing a single complex valued equation as a system of two real valued
+equations. This short example demonstrates how this can be implemented in
+deal.II by using an <code>FE_system</code> object to stack two finite element
+fields representing real and imaginary parts. We also revisit the
+ParameterHandler class first used in step-19, which provides a
+convenient way for reading parameters from a configuration file at runtime
+without the need to recompile the program code.  
+
+The equations covered here fall into the class of vector-valued problems. A
+toplevel overview of this topic can be found in the @ref vector_valued module.
+
+
+<h3>Problem setting</h3>
+
+The original purpose of this program is to simulate the focussing properties
+of an ultrasound wave generated by a transducer lens with variable
+geometry. Recent applications in medical imaging use ultrasound waves not only
+for imaging purposes, but also to excite certain local effects in a
+material, like changes in optical properties, that can then be measured by
+other imaging techniques. A vital ingredient for these methods is the ability
+to focus the intensity of the ultrasound wave in a particular part of the
+material, ideally in a point, to be able to examine the properties of the
+material at that particular location.  
+
+To derive a model for this problem, we think of ultrasound as a pressure wave
+governed by the wave equation:  
+ at f[
+	\frac{\partial^2 U}{\partial t^2}	-	c^2 \Delta U = 0
+ at f]
+where $c$ is the wave speed (that for simplicity we assume to be constant), $U
+= U(x,t),\;x \in \Omega,\;t\in\mathrm{R}$. The boundary
+$\Gamma=\partial\Omega$ is divided into two parts $\Gamma_1$ and
+$\Gamma_2=\Gamma\setminus\Gamma_1$, with $\Gamma_1$ representing the
+transducer lens and $\Gamma_2$ an absorbing boundary (that is, we want to
+choose boundary conditions on $\Gamma_2$ in such a way that they imitate a
+larger domain). On $\Gamma_1$, the transducer generates a wave of constant
+frequency ${\omega}>0$ and constant amplitude (that we chose to be 1 here):  
+ at f[
+U(x,t) = \cos{\omega t}, \qquad x\in \Gamma_1
+ at f]
+
+If there are no other (interior or boundary) sources, and since the only
+source has frequency $\omega$, then the solution admits a separation of
+variables of the form $U(x,t) = \textrm{Re}\left(u(x)\,e^{i\omega
+t})\right)$. The complex-valued function $u(x)$ describes the spatial
+dependency of amplitude and phase (relative to the source) of the waves of
+frequency ${\omega}$, with the amplitude being the quantity that we are
+interested in. By plugging this form of the solution into the wave equation,
+we see that for $u$ we have
+ at f{eqnarray*}
+-\omega^2 u(x) - c^2\Delta u(x) &=& 0, \qquad x\in\Omega,\\
+u(x) &=& 1,  \qquad x\in\Gamma_1. 
+ at f}
+
+For finding suitable conditions on $\Gamma_2$ that model an absorbing
+boundary, consider a wave of the form $V(x,t)=e^{i(k\cdot x -\omega t)}$ with
+frequency ${\omega}$ traveling in direction $k\in {\mathrm{R}^2}$. In order
+for $V$ to solve the wave equation, $|k|={\frac{\omega}{c}}$ must
+hold. Suppose that this wave hits the boundary in $x_0\in\Gamma_2$ at a right
+angle, i.e. $n=\frac{k}{|k|}$ with $n$ denoting the outer unit normal of
+$\Omega$ in $x_0$. Then at $x_0$, this wave satisfies the equation 
+ at f[
+c (n\cdot\nabla V) + \frac{\partial V}{\partial t} = (i\, c\, |k| - i\, \omega) V = 0.
+ at f]
+Hence, by enforcing the boundary condition 
+ at f[
+c (n\cdot\nabla U) + \frac{\partial U}{\partial t} = 0, \qquad x\in\Gamma_2,
+ at f]
+waves that hit the boundary $\Gamma_2$ at a right angle will be perfectly
+absorbed. On the other hand, those parts of the wave field that do not hit a
+boundary at a right angle do not satisfy this condition and enforcing it as a
+boundary condition will yield partial reflections, i.e. only parts of the wave
+will pass through the boundary as if it wasn't here whereas the remaining
+fraction of the wave will be reflected back into the domain.
+
+If we are willing to accept this as a sufficient approximation to an absorbing boundary we finally arrive at the following problem for $u$: 
+ at f{eqnarray*}
+-\omega^2 u - c^2\Delta u &=& 0, \qquad x\in\Omega,\\
+c (n\cdot\nabla u) + i\,\omega\,u &=&0, \qquad x\in\Gamma_2,\\
+u &=& 1,  \qquad x\in\Gamma_1. 
+ at f}
+This is a Helmholtz equation (similar to the one in step-7, but this time with ''the bad sign'') with Dirichlet data on $\Gamma_1$ and mixed boundary conditions on $\Gamma_2$. Because of the condition on $\Gamma_2$, we cannot just treat the equations for real and imaginary parts of $u$ separately. What we can do however is to view the PDE for $u$ as a system of two PDEs for the real and imaginary parts of $u$, with the boundary condition on $\Gamma_2$ representing the coupling terms betw [...]
+ at f{eqnarray*}
+  \left.\begin{array}{ccc}
+    -\omega^2 v - c^2\Delta v &=& 0 \quad\\ 
+    -\omega^2 w - c^2\Delta w &=& 0 \quad
+  \end{array}\right\} &\;& x\in\Omega,
+	\\
+  \left.\begin{array}{ccc}
+    c (n\cdot\nabla v) - \omega\,w &=& 0 \quad\\ 
+    c (n\cdot\nabla w) + \omega\,v &=& 0 \quad
+  \end{array}\right\} &\;& x\in\Gamma_2,
+	\\
+	\left.\begin{array}{ccc}
+    v &=& 1 \quad\\ 
+    w &=& 0 \quad
+  \end{array}\right\} &\;& x\in\Gamma_1.
+ at f}
+
+For test functions $\phi,\psi$ with $\phi|_{\Gamma_1}=\psi|_{\Gamma_1}=0$, after the usual multiplication, integration over $\Omega$ and applying integration by parts, we get the weak formulation
+ at f{eqnarray*}
+-\omega^2 \langle \phi, v \rangle_{\mathrm{L}^2(\Omega)} + c^2 \langle \nabla \phi, \nabla v \rangle_{\mathrm{L}^2(\Omega)} - c \omega \langle \phi, w \rangle_{\mathrm{L}^2(\Gamma_2)} &=& 0, \\
+-\omega^2 \langle \psi, w \rangle_{\mathrm{L}^2(\Omega)} + c^2 \langle \nabla \psi, \nabla w \rangle_{\mathrm{L}^2(\Omega)} + c \omega \langle \psi, v \rangle_{\mathrm{L}^2(\Gamma_2)} &=& 0.
+ at f}
+
+We choose finite element spaces $V_h$ and $W_h$ with bases $\{\phi_j\}_{j=1}^n, \{\psi_j\}_{j=1}^n$ and look for approximate solutions 
+ at f[
+v_h = \sum_{j=1}^n \alpha_j \phi_j, \;\; w_h = \sum_{j=1}^n \beta_j \psi_j. 
+ at f]
+Plugging into the variational form yields the equation system
+ at f[
+\renewcommand{\arraystretch}{2.0}
+\left.\begin{array}{ccc}
+\sum_{j=1}^n \left(-\omega^2 \langle \phi_i, \phi_j \rangle_{\mathrm{L}^2(\Omega)} +c^2 \langle \nabla \phi_i, \nabla \phi_j \rangle_{\mathrm{L}^2(\Omega)}\right)\alpha_j - \left(c\omega \langle \phi_i,\psi_j\rangle_{\mathrm{L}^2(\Gamma_2)}\right)\beta_j &=& 0 \\ 
+\sum_{j=1}^n \left(-\omega^2 \langle \psi_i, \psi_j \rangle_{\mathrm{L}^2(\Omega)} +c^2 \langle \nabla \psi_i, \nabla \psi_j \rangle_{\mathrm{L}^2(\Omega)}\right)\beta_j + \left(c\omega \langle \psi_i,\phi_j\rangle_{\mathrm{L}^2(\Gamma_2)}\right)\alpha_j &=& 0
+\end{array}\right\}\;\;\forall\; i =1,\ldots,n.
+ at f]
+In matrix notation: 
+ at f[
+\renewcommand{\arraystretch}{2.0}
+\left(
+\begin{array}{cc}
+-\omega^2 \langle \phi_i, \phi_j \rangle_{\mathrm{L}^2(\Omega)} + c^2 \langle \nabla \phi_i, \nabla \phi_j \rangle_{\mathrm{L}^2(\Omega)} & -c\omega \langle \phi_i,\psi_j\rangle_{\mathrm{L}^2(\Gamma_2)} \\
+c\omega \langle \psi_i,\phi_j\rangle_{\mathrm{L}^2(\Gamma_2)} & -\omega^2 \langle \psi_{i}, \psi_j \rangle_{\mathrm{L}^2(\Omega)} + c^2 \langle \nabla \psi_{i}, \nabla \psi_j  \rangle_{\mathrm{L}^2(\Omega)}
+\end{array}
+\right)
+\left(
+\begin{array}{c}
+\alpha \\ \beta
+\end{array}
+\right)
+=
+\left(
+\begin{array}{c}
+0 \\ 0
+\end{array}
+\right)
+ at f]
+(One should not be fooled by the right hand side being zero here, that is 
+because we haven't included the Dirichlet boundary data yet.) 
+Because of the alternating sign in the off-diagonal blocks, we can already 
+see that this system is non-symmetric, in fact it is even indefinite.
+Of course, there is no necessity to choose the spaces $V_h$ and $W_h$ to be
+the same. However, we expect real and imaginary part of the solution to
+have similar properties and will therefore indeed take $V_h=W_h$ in the 
+implementation, and also use the same basis functions $\phi_i = \psi_i$ for 
+both spaces. The reason for the notation using different symbols is just that 
+it allows us to distinguish between shape functions for $v$ and $w$, as this 
+distinction plays an important role in the implementation.  
+
+
+<h3>The test case</h3>
+
+For the computations, we will consider wave propagation in the unit square, 
+with ultrasound generated by a transducer lens that is shaped like a segment 
+of the circle with center at $(0.5, d)$ and a 
+radius slightly greater than $d$; this shape should lead to a focusing of the sound
+wave at the center of the circle. Varying $d$ changes the "focus" of the lens
+and affects the spatial distribution of the intensity of $u$, where our main
+concern is how well $|u|=\sqrt{v^2+w^2}$ is focussed. 
+
+In the program below, we will implement the complex-valued Helmholtz equations
+using the formulation with split real and imaginary parts. We will also
+discuss how to generate a domain that looks like a square with a slight bulge
+simulating the transducer (in the
+<code>UltrasoundProblem<dim>::make_grid()</code> function), and how to
+generate graphical output that not only contains the solution components $v$ and 
+$w$, but also the magnitude $\sqrt{v^2+w^2}$ directly in the output file (in
+<code>UltrasoundProblem<dim>::output_results()</code>). Finally, we use the 
+ParameterHandler class to easily read parameters like the focal distance $d$, 
+wave speed $c$, frequency $\omega$, and a number of other parameters from an 
+input file at run-time, rather than fixing those parameters in the source code 
+where we would have to re-compile every time we want to change parameters.
diff --git a/examples/step-29/doc/kind b/examples/step-29/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-29/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-29/doc/results.dox b/examples/step-29/doc/results.dox
new file mode 100644
index 0000000..9ff6752
--- /dev/null
+++ b/examples/step-29/doc/results.dox
@@ -0,0 +1,188 @@
+<a name="Results"></a>
+<h1>Results</h1>
+
+The current program reads its run-time parameters from an input file
+called <code>\step-29.prm</code> that looks like this:
+ at code
+subsection Mesh & geometry parameters
+  # Distance of the focal point of the lens to the x-axis
+  set Focal distance        = 0.3
+
+  # Number of global mesh refinement steps applied to initial coarse grid
+  set Number of refinements = 5
+end
+
+
+subsection Physical constants
+  # Wave speed
+  set c     = 1.5e5
+
+  # Frequency
+  set omega = 3.0e7
+end
+
+
+subsection Output parameters
+  # Name of the output file (without extension)
+  set Output file   = solution
+
+  # A name for the output format to be used
+  set Output format = gmv
+end
+ at endcode
+
+As can be seen, we set
+$d=0.3$, which amounts to a focus of the transducer lens
+at $x=0.5$, $y=0.3$. The coarse mesh is refined 5 times,
+resulting in 160x160 cells, and the output is written in gmv
+format. The parameter reader understands many more parameters
+pertaining in particular to the generation of output, see the
+explanation in step-19, but we need none of these
+parameters here and therefore stick with their default values.
+
+Here's the console output of the program in debug mode:
+
+ at code
+examples/\step-29> make run
+============================ Running \step-29
+DEAL::Generating grid... done (1.11607s)
+DEAL::  Number of active cells:  25600
+DEAL::Setting up system... done (1.10807s)
+DEAL::  Number of degrees of freedom: 51842
+DEAL::Assembling system matrix... done (6.50841s)
+DEAL::Solving linear system... done (2.89218s)
+DEAL::Generating output... done (4.52428s)
+ at endcode
+
+(Of course, execution times will differ if you run the program
+locally.) The fact that most of the time is spent on assembling
+the system matrix and generating output is due to the many assertion
+that need to be checked in debug mode. In release mode these parts
+of the program run much faster whereas solving the linear system is
+hardly sped up at all:
+
+ at code
+============================ Running \step-29
+DEAL::Generating grid... done (0.0280020s)
+DEAL::  Number of active cells:  25600
+DEAL::Setting up system... done (0.112007s)
+DEAL::  Number of degrees of freedom: 51842
+DEAL::Assembling system matrix... done (0.160010s)
+DEAL::Solving linear system... done (2.21614s)
+DEAL::Generating output... done (0.880055s)
+ at endcode
+
+The graphical output of the program looks as follows:
+
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-29.v.png" alt="v = Re(u)">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-29.w.png" alt="w = Im(u)">
+    </td>
+  </tr>
+
+  <tr>
+    <td colspan="2">
+      <img src="http://www.dealii.org/images/steps/developer/step-29.intensity.png" alt="|u|">
+    </td>
+  </tr>
+</table>
+
+The first two pictures show the real and imaginary parts of
+$u$, whereas the last shows the intensity $|u|$. One can clearly
+see that the intensity is focussed around the focal point of the
+lens (0.5, 0.3), and that the focus
+is rather sharp in $x$-direction but more blurred in $y$-direction, which is a
+consequence of the geometry of the focusing lens, its finite aperture,
+and the wave nature of the problem.
+
+Because colorful graphics are always fun, and to stress the focusing
+effects some more, here is another set of images highlighting how well
+the intensity is actually focused in $x$-direction:
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-29.surface.png" alt="|u|">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-29.contours.png" alt="|u|">
+    </td>
+  </tr>
+</table>
+
+
+As a final note, the structure of the program makes it easy to
+determine which parts of the program scale nicely as the mesh is
+refined and which parts don't. Here are the run times for 5, 6, and 7
+global refinements:
+
+ at code
+DEAL::Generating grid... done (0.0320020s)
+DEAL::  Number of active cells:  25600
+DEAL::Setting up system... done (0.104006s)
+DEAL::  Number of degrees of freedom: 51842
+DEAL::Assembling system matrix... done (0.164011s)
+DEAL::Solving linear system... done (2.23214s)
+DEAL::Generating output... done (0.900056s)
+
+DEAL::Generating grid... done (0.132009s)
+DEAL::  Number of active cells:  102400
+DEAL::Setting up system... done (0.408025s)
+DEAL::  Number of degrees of freedom: 206082
+DEAL::Assembling system matrix... done (0.656041s)
+DEAL::Solving linear system... done (14.8849s)
+DEAL::Generating output... done (3.57222s)
+
+DEAL::Generating grid... done (0.504031s)
+DEAL::  Number of active cells:  409600
+DEAL::Setting up system... done (1.72011s)
+DEAL::  Number of degrees of freedom: 821762
+DEAL::Assembling system matrix... done (2.63216s)
+DEAL::Solving linear system... done (117.811s)
+DEAL::Generating output... done (15.1489s)
+ at endcode
+
+Each time we refine the mesh once, so the number of cells and degrees
+of freedom roughly quadruples from each step to the next. As can be seen,
+generating the grid, setting up degrees of freedom, assembling the
+linear system, and generating output scale pretty closely to linear,
+whereas solving the linear system is an operation that requires 8
+times more time each time the number of degrees of freedom is
+increased by a factor of 4, i.e. it is ${\cal O}(N^{3/2})$. This can
+be explained by the fact that (using optimal ordering) the
+bandwidth of a finite element matrix is $B={\cal O}(N^{(dim-1)/dim})$,
+and the effort to solve a banded linear system using LU decomposition
+is ${\cal O}(BN)$. This also explains why the program does run in 3d
+as well (after changing the dimension on the
+<code>UltrasoundProblem</code> object), but scales very badly and
+takes extraordinary patience before it finishes solving the linear
+system on a mesh with appreciable resolution, even though all the
+other parts of the program scale very nicely.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+An obvious possible extension for this program is to run it in 3d
+— after all, the world around us is three-dimensional, and
+ultrasound beams propagate in three-dimensional media. You can try
+this by simply changing the template parameter of the principal class
+in <code>main()</code> and running it. This won't get you very far,
+though: certainly not if you do 5 global refinement steps as set in
+the parameter file. You'll simply run out of memory as both the mesh
+(with its $(2^5)^3 \cdot 5^3=2^{15}\cdot 125 \approx 4\cdot 10^6$ cells)
+and in particular the sparse direct solver take too much memory. You
+can solve with 3 global refinement steps, however, if you have a bit
+of time: in early 2011, the direct solve takes about half an
+hour. What you'll notice, however, is that the solution is completely
+wrong: the mesh size is simply not small enough to resolve the
+solution's waves accurately, and you can see this in plots of the
+solution. Consequently, this is one of the cases where adaptivity is
+indispensable if you don't just want to throw a bigger (presumably
+%parallel) machine at the problem.
diff --git a/examples/step-29/doc/tooltip b/examples/step-29/doc/tooltip
new file mode 100644
index 0000000..0ac422a
--- /dev/null
+++ b/examples/step-29/doc/tooltip
@@ -0,0 +1 @@
+A complex-valued Helmholtz equation. Sparse direct solvers.
diff --git a/examples/step-29/step-29.cc b/examples/step-29/step-29.cc
new file mode 100644
index 0000000..d5e0f23
--- /dev/null
+++ b/examples/step-29/step-29.cc
@@ -0,0 +1,974 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2007 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Moritz Allmaras, Texas A&M University, 2007
+ */
+
+
+// @sect3{Include files}
+
+// The following header files are unchanged from step-7 and have been
+// discussed before:
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+
+#include <iostream>
+#include <fstream>
+
+// This header file contains the necessary declarations for the
+// ParameterHandler class that we will use to read our parameters from a
+// configuration file:
+#include <deal.II/base/parameter_handler.h>
+
+// For solving the linear system, we'll use the sparse LU-decomposition
+// provided by UMFPACK (see the SparseDirectUMFPACK class), for which the
+// following header file is needed.  Note that in order to compile this
+// tutorial program, the deal.II-library needs to be built with UMFPACK
+// support, which is enabled by default:
+#include <deal.II/lac/sparse_direct.h>
+
+// The FESystem class allows us to stack several FE-objects to one compound,
+// vector-valued finite element field. The necessary declarations for this
+// class are provided in this header file:
+#include <deal.II/fe/fe_system.h>
+
+// Finally, include the header file that declares the Timer class that we will
+// use to determine how much time each of the operations of our program takes:
+#include <deal.II/base/timer.h>
+
+// As the last step at the beginning of this program, we put everything that
+// is in this program into its namespace and, within it, make everything that
+// is in the deal.II namespace globally available, without the need to prefix
+// everything with <code>dealii</code><code>::</code>:
+namespace Step29
+{
+  using namespace dealii;
+
+
+  // @sect3{The <code>DirichletBoundaryValues</code> class}
+
+  // First we define a class for the function representing the Dirichlet
+  // boundary values. This has been done many times before and therefore does
+  // not need much explanation.
+  //
+  // Since there are two values $v$ and $w$ that need to be prescribed at the
+  // boundary, we have to tell the base class that this is a vector-valued
+  // function with two components, and the <code>vector_value</code> function
+  // and its cousin <code>vector_value_list</code> must return vectors with
+  // two entries. In our case the function is very simple, it just returns 1
+  // for the real part $v$ and 0 for the imaginary part $w$ regardless of the
+  // point where it is evaluated.
+  template <int dim>
+  class DirichletBoundaryValues : public Function<dim>
+  {
+  public:
+    DirichletBoundaryValues() : Function<dim> (2) {};
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &values) const;
+
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> >   &value_list) const;
+  };
+
+
+  template <int dim>
+  inline
+  void DirichletBoundaryValues<dim>::vector_value (const Point<dim> &/*p*/,
+                                                   Vector<double>   &values) const
+  {
+    Assert (values.size() == 2, ExcDimensionMismatch (values.size(), 2));
+
+    values(0) = 1;
+    values(1) = 0;
+  }
+
+
+  template <int dim>
+  void DirichletBoundaryValues<dim>::vector_value_list (const std::vector<Point<dim> > &points,
+                                                        std::vector<Vector<double> >   &value_list) const
+  {
+    Assert (value_list.size() == points.size(),
+            ExcDimensionMismatch (value_list.size(), points.size()));
+
+    for (unsigned int p=0; p<points.size(); ++p)
+      DirichletBoundaryValues<dim>::vector_value (points[p], value_list[p]);
+  }
+
+  // @sect3{The <code>ParameterReader</code> class}
+
+  // The next class is responsible for preparing the ParameterHandler object
+  // and reading parameters from an input file.  It includes a function
+  // <code>declare_parameters</code> that declares all the necessary
+  // parameters and a <code>read_parameters</code> function that is called
+  // from outside to initiate the parameter reading process.
+  class ParameterReader : public Subscriptor
+  {
+  public:
+    ParameterReader(ParameterHandler &);
+    void read_parameters(const std::string);
+
+  private:
+    void declare_parameters();
+    ParameterHandler &prm;
+  };
+
+  // The constructor stores a reference to the ParameterHandler object that is
+  // passed to it:
+  ParameterReader::ParameterReader(ParameterHandler &paramhandler)
+    :
+    prm(paramhandler)
+  {}
+
+  // @sect4{<code>ParameterReader::declare_parameters</code>}
+
+  // The <code>declare_parameters</code> function declares all the parameters
+  // that our ParameterHandler object will be able to read from input files,
+  // along with their types, range conditions and the subsections they appear
+  // in. We will wrap all the entries that go into a section in a pair of
+  // braces to force the editor to indent them by one level, making it simpler
+  // to read which entries together form a section:
+  void ParameterReader::declare_parameters()
+  {
+    // Parameters for mesh and geometry include the number of global
+    // refinement steps that are applied to the initial coarse mesh and the
+    // focal distance $d$ of the transducer lens. For the number of refinement
+    // steps, we allow integer values in the range $[0,\infty)$, where the
+    // omitted second argument to the Patterns::Integer object denotes the
+    // half-open interval.  For the focal distance any number greater than
+    // zero is accepted:
+    prm.enter_subsection ("Mesh & geometry parameters");
+    {
+      prm.declare_entry("Number of refinements", "6",
+                        Patterns::Integer(0),
+                        "Number of global mesh refinement steps "
+                        "applied to initial coarse grid");
+
+      prm.declare_entry("Focal distance", "0.3",
+                        Patterns::Double(0),
+                        "Distance of the focal point of the lens "
+                        "to the x-axis");
+    }
+    prm.leave_subsection ();
+
+    // The next subsection is devoted to the physical parameters appearing in
+    // the equation, which are the frequency $\omega$ and wave speed
+    // $c$. Again, both need to lie in the half-open interval $[0,\infty)$
+    // represented by calling the Patterns::Double class with only the left
+    // end-point as argument:
+    prm.enter_subsection ("Physical constants");
+    {
+      prm.declare_entry("c", "1.5e5",
+                        Patterns::Double(0),
+                        "Wave speed");
+
+      prm.declare_entry("omega", "5.0e7",
+                        Patterns::Double(0),
+                        "Frequency");
+    }
+    prm.leave_subsection ();
+
+
+    // Last but not least we would like to be able to change some properties
+    // of the output, like filename and format, through entries in the
+    // configuration file, which is the purpose of the last subsection:
+    prm.enter_subsection ("Output parameters");
+    {
+      prm.declare_entry("Output file", "solution",
+                        Patterns::Anything(),
+                        "Name of the output file (without extension)");
+
+      // Since different output formats may require different parameters for
+      // generating output (like for example, postscript output needs
+      // viewpoint angles, line widths, colors etc), it would be cumbersome if
+      // we had to declare all these parameters by hand for every possible
+      // output format supported in the library. Instead, each output format
+      // has a <code>FormatFlags::declare_parameters</code> function, which
+      // declares all the parameters specific to that format in an own
+      // subsection. The following call of
+      // DataOutInterface<1>::declare_parameters executes
+      // <code>declare_parameters</code> for all available output formats, so
+      // that for each format an own subsection will be created with
+      // parameters declared for that particular output format. (The actual
+      // value of the template parameter in the call, <code>@<1@></code>
+      // above, does not matter here: the function does the same work
+      // independent of the dimension, but happens to be in a
+      // template-parameter-dependent class.)  To find out what parameters
+      // there are for which output format, you can either consult the
+      // documentation of the DataOutBase class, or simply run this program
+      // without a parameter file present. It will then create a file with all
+      // declared parameters set to their default values, which can
+      // conveniently serve as a starting point for setting the parameters to
+      // the values you desire.
+      DataOutInterface<1>::declare_parameters (prm);
+    }
+    prm.leave_subsection ();
+  }
+
+  // @sect4{<code>ParameterReader::read_parameters</code>}
+
+  // This is the main function in the ParameterReader class.  It gets called
+  // from outside, first declares all the parameters, and then reads them from
+  // the input file whose filename is provided by the caller. After the call
+  // to this function is complete, the <code>prm</code> object can be used to
+  // retrieve the values of the parameters read in from the file:
+  void ParameterReader::read_parameters (const std::string parameter_file)
+  {
+    declare_parameters();
+
+    prm.read_input (parameter_file);
+  }
+
+
+
+  // @sect3{The <code>ComputeIntensity</code> class}
+
+  // As mentioned in the introduction, the quantity that we are really after
+  // is the spatial distribution of the intensity of the ultrasound wave,
+  // which corresponds to $|u|=\sqrt{v^2+w^2}$. Now we could just be content
+  // with having $v$ and $w$ in our output, and use a suitable visualization
+  // or postprocessing tool to derive $|u|$ from the solution we
+  // computed. However, there is also a way to output data derived from the
+  // solution in deal.II, and we are going to make use of this mechanism here.
+
+  // So far we have always used the DataOut::add_data_vector function to add
+  // vectors containing output data to a DataOut object.  There is a special
+  // version of this function that in addition to the data vector has an
+  // additional argument of type DataPostprocessor. What happens when this
+  // function is used for output is that at each point where output data is to
+  // be generated, the DataPostprocessor::compute_derived_quantities_scalar or
+  // DataPostprocessor::compute_derived_quantities_vector function of the
+  // specified DataPostprocessor object is invoked to compute the output
+  // quantities from the values, the gradients and the second derivatives of
+  // the finite element function represented by the data vector (in the case
+  // of face related data, normal vectors are available as well). Hence, this
+  // allows us to output any quantity that can locally be derived from the
+  // values of the solution and its derivatives.  Of course, the ultrasound
+  // intensity $|u|$ is such a quantity and its computation doesn't even
+  // involve any derivatives of $v$ or $w$.
+
+  // In practice, the DataPostprocessor class only provides an interface to
+  // this functionality, and we need to derive our own class from it in order
+  // to implement the functions specified by the interface. In the most
+  // general case one has to implement several member functions but if the
+  // output quantity is a single scalar then some of this boilerplate code can
+  // be handled by a more specialized class, DataPostprocessorScalar and we
+  // can derive from that one instead. This is what the
+  // <code>ComputeIntensity</code> class does:
+  template <int dim>
+  class ComputeIntensity : public DataPostprocessorScalar<dim>
+  {
+  public:
+    ComputeIntensity ();
+
+    virtual
+    void
+    compute_derived_quantities_vector (const std::vector<Vector<double> >               &uh,
+                                       const std::vector<std::vector<Tensor<1, dim> > > &duh,
+                                       const std::vector<std::vector<Tensor<2, dim> > > &dduh,
+                                       const std::vector<Point<dim> >                   &normals,
+                                       const std::vector<Point<dim> >                   &evaluation_points,
+                                       std::vector<Vector<double> >                     &computed_quantities) const;
+  };
+
+  // In the constructor, we need to call the constructor of the base class
+  // with two arguments. The first denotes the name by which the single scalar
+  // quantity computed by this class should be represented in output files. In
+  // our case, the postprocessor has $|u|$ as output, so we use "Intensity".
+  //
+  // The second argument is a set of flags that indicate which data is needed
+  // by the postprocessor in order to compute the output quantities.  This can
+  // be any subset of update_values, update_gradients and update_hessians
+  // (and, in the case of face data, also update_normal_vectors), which are
+  // documented in UpdateFlags.  Of course, computation of the derivatives
+  // requires additional resources, so only the flags for data that is really
+  // needed should be given here, just as we do when we use FEValues objects.
+  // In our case, only the function values of $v$ and $w$ are needed to
+  // compute $|u|$, so we're good with the update_values flag.
+  template <int dim>
+  ComputeIntensity<dim>::ComputeIntensity ()
+    :
+    DataPostprocessorScalar<dim> ("Intensity",
+                                  update_values)
+  {}
+
+
+  // The actual postprocessing happens in the following function.  Its inputs
+  // are a vector representing values of the function (which is here
+  // vector-valued) representing the data vector given to
+  // DataOut::add_data_vector, evaluated at all evaluation points where we
+  // generate output, and some tensor objects representing derivatives (that
+  // we don't use here since $|u|$ is computed from just $v$ and $w$, and for
+  // which we assign no name to the corresponding function argument).  The
+  // derived quantities are returned in the <code>computed_quantities</code>
+  // vector.  Remember that this function may only use data for which the
+  // respective update flag is specified by
+  // <code>get_needed_update_flags</code>. For example, we may not use the
+  // derivatives here, since our implementation of
+  // <code>get_needed_update_flags</code> requests that only function values
+  // are provided.
+  template <int dim>
+  void
+  ComputeIntensity<dim>::compute_derived_quantities_vector (
+    const std::vector<Vector<double> >                 &uh,
+    const std::vector<std::vector<Tensor<1, dim> > >   & /*duh*/,
+    const std::vector<std::vector<Tensor<2, dim> > >   & /*dduh*/,
+    const std::vector<Point<dim> >                     & /*normals*/,
+    const std::vector<Point<dim> >                     & /*evaluation_points*/,
+    std::vector<Vector<double> >                       &computed_quantities
+  ) const
+  {
+    Assert(computed_quantities.size() == uh.size(),
+           ExcDimensionMismatch (computed_quantities.size(), uh.size()));
+
+    // The computation itself is straightforward: We iterate over each entry
+    // in the output vector and compute $|u|$ from the corresponding values of
+    // $v$ and $w$:
+    for (unsigned int i=0; i<computed_quantities.size(); i++)
+      {
+        Assert(computed_quantities[i].size() == 1,
+               ExcDimensionMismatch (computed_quantities[i].size(), 1));
+        Assert(uh[i].size() == 2, ExcDimensionMismatch (uh[i].size(), 2));
+
+        computed_quantities[i](0) = std::sqrt(uh[i](0)*uh[i](0) + uh[i](1)*uh[i](1));
+      }
+  }
+
+
+  // @sect3{The <code>UltrasoundProblem</code> class}
+
+  // Finally here is the main class of this program.  It's member functions
+  // are very similar to the previous examples, in particular step-4, and the
+  // list of member variables does not contain any major surprises either.
+  // The ParameterHandler object that is passed to the constructor is stored
+  // as a reference to allow easy access to the parameters from all functions
+  // of the class.  Since we are working with vector valued finite elements,
+  // the FE object we are using is of type FESystem.
+  template <int dim>
+  class UltrasoundProblem
+  {
+  public:
+    UltrasoundProblem (ParameterHandler &);
+    ~UltrasoundProblem ();
+    void run ();
+
+  private:
+    void make_grid ();
+    void setup_system ();
+    void assemble_system ();
+    void solve ();
+    void output_results () const;
+
+    ParameterHandler      &prm;
+
+    Triangulation<dim>     triangulation;
+    DoFHandler<dim>        dof_handler;
+    FESystem<dim>          fe;
+
+    SparsityPattern        sparsity_pattern;
+    SparseMatrix<double>   system_matrix;
+    Vector<double>         solution, system_rhs;
+  };
+
+
+
+  // The constructor takes the ParameterHandler object and stores it in a
+  // reference. It also initializes the DoF-Handler and the finite element
+  // system, which consists of two copies of the scalar Q1 field, one for $v$
+  // and one for $w$:
+  template <int dim>
+  UltrasoundProblem<dim>::UltrasoundProblem (ParameterHandler  &param)
+    :
+    prm(param),
+    dof_handler(triangulation),
+    fe(FE_Q<dim>(1), 2)
+  {}
+
+
+  template <int dim>
+  UltrasoundProblem<dim>::~UltrasoundProblem ()
+  {
+    dof_handler.clear();
+  }
+
+  // @sect4{<code>UltrasoundProblem::make_grid</code>}
+
+  // Here we setup the grid for our domain.  As mentioned in the exposition,
+  // the geometry is just a unit square (in 2d) with the part of the boundary
+  // that represents the transducer lens replaced by a sector of a circle.
+  template <int dim>
+  void UltrasoundProblem<dim>::make_grid ()
+  {
+    // First we generate some logging output and start a timer so we can
+    // compute execution time when this function is done:
+    deallog << "Generating grid... ";
+    Timer timer;
+    timer.start ();
+
+    // Then we query the values for the focal distance of the transducer lens
+    // and the number of mesh refinement steps from our ParameterHandler
+    // object:
+    prm.enter_subsection ("Mesh & geometry parameters");
+
+    const double                focal_distance = prm.get_double("Focal distance");
+    const unsigned int  n_refinements  = prm.get_integer("Number of refinements");
+
+    prm.leave_subsection ();
+
+    // Next, two points are defined for position and focal point of the
+    // transducer lens, which is the center of the circle whose segment will
+    // form the transducer part of the boundary. Notice that this is the only
+    // point in the program where things are slightly different in 2D and 3D.
+    // Even though this tutorial only deals with the 2D case, the necessary
+    // additions to make this program functional in 3D are so minimal that we
+    // opt for including them:
+    const Point<dim>    transducer = (dim == 2) ?
+                                     Point<dim> (0.5, 0.0) :
+                                     Point<dim> (0.5, 0.5, 0.0);
+    const Point<dim>   focal_point = (dim == 2) ?
+                                     Point<dim> (0.5, focal_distance) :
+                                     Point<dim> (0.5, 0.5, focal_distance);
+
+
+    // As initial coarse grid we take a simple unit square with 5 subdivisions
+    // in each direction. The number of subdivisions is chosen so that the
+    // line segment $[0.4,0.6]$ that we want to designate as the transducer
+    // boundary is spanned by a single face. Then we step through all cells to
+    // find the faces where the transducer is to be located, which in fact is
+    // just the single edge from 0.4 to 0.6 on the x-axis. This is where we
+    // want the refinements to be made according to a circle shaped boundary,
+    // so we mark this edge with a different manifold indicator. Since we will
+    // Dirichlet boundary conditions on the transducer, we also change its
+    // boundary indicator.
+    GridGenerator::subdivided_hyper_cube (triangulation, 5, 0, 1);
+
+    typename Triangulation<dim>::cell_iterator
+    cell = triangulation.begin (),
+    endc = triangulation.end();
+
+    for (; cell!=endc; ++cell)
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        if ( cell->face(face)->at_boundary() &&
+             ((cell->face(face)->center() - transducer).norm_square() < 0.01) )
+          {
+
+            cell->face(face)->set_boundary_id (1);
+            cell->face(face)->set_manifold_id (1);
+          }
+    // For the circle part of the transducer lens, a SphericalManifold object
+    // is used (which, of course, in 2D just represents a circle), with center
+    // computed as above. By marking this object as <code>static</code>, we
+    // ensure that it lives until the end of the program and thereby longer
+    // than the triangulation object we will associate with it. We then assign
+    // this boundary-object to the part of the boundary with boundary indicator 1:
+    static const SphericalManifold<dim> boundary(focal_point);
+    triangulation.set_manifold(1, boundary);
+
+    // Now global refinement is executed. Cells near the transducer location
+    // will be automatically refined according to the circle shaped boundary
+    // of the transducer lens:
+    triangulation.refine_global (n_refinements);
+
+    // Lastly, we generate some more logging output. We stop the timer and
+    // query the number of CPU seconds elapsed since the beginning of the
+    // function:
+    timer.stop ();
+    deallog << "done ("
+            << timer()
+            << "s)"
+            << std::endl;
+
+    deallog << "  Number of active cells:  "
+            << triangulation.n_active_cells()
+            << std::endl;
+  }
+
+
+  // @sect4{<code>UltrasoundProblem::setup_system</code>}
+  //
+  // Initialization of the system matrix, sparsity patterns and vectors are
+  // the same as in previous examples and therefore do not need further
+  // comment. As in the previous function, we also output the run time of what
+  // we do here:
+  template <int dim>
+  void UltrasoundProblem<dim>::setup_system ()
+  {
+    deallog << "Setting up system... ";
+    Timer timer;
+    timer.start();
+
+    dof_handler.distribute_dofs (fe);
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+    system_rhs.reinit (dof_handler.n_dofs());
+    solution.reinit (dof_handler.n_dofs());
+
+    timer.stop ();
+    deallog << "done ("
+            << timer()
+            << "s)"
+            << std::endl;
+
+    deallog << "  Number of degrees of freedom: "
+            << dof_handler.n_dofs()
+            << std::endl;
+  }
+
+
+  // @sect4{<code>UltrasoundProblem::assemble_system</code>}
+
+  // As before, this function takes care of assembling the system matrix and
+  // right hand side vector:
+  template <int dim>
+  void UltrasoundProblem<dim>::assemble_system ()
+  {
+    deallog << "Assembling system matrix... ";
+    Timer timer;
+    timer.start ();
+
+    // First we query wavespeed and frequency from the ParameterHandler object
+    // and store them in local variables, as they will be used frequently
+    // throughout this function.
+
+    prm.enter_subsection ("Physical constants");
+
+    const double omega = prm.get_double("omega"),
+                 c     = prm.get_double("c");
+
+    prm.leave_subsection ();
+
+    // As usual, for computing integrals ordinary Gauss quadrature rule is
+    // used. Since our bilinear form involves boundary integrals on
+    // $\Gamma_2$, we also need a quadrature rule for surface integration on
+    // the faces, which are $dim-1$ dimensional:
+    QGauss<dim>    quadrature_formula(2);
+    QGauss<dim-1>  face_quadrature_formula(2);
+
+    const unsigned int n_q_points       = quadrature_formula.size(),
+                       n_face_q_points  = face_quadrature_formula.size(),
+                       dofs_per_cell    = fe.dofs_per_cell;
+
+    // The FEValues objects will evaluate the shape functions for us.  For the
+    // part of the bilinear form that involves integration on $\Omega$, we'll
+    // need the values and gradients of the shape functions, and of course the
+    // quadrature weights.  For the terms involving the boundary integrals,
+    // only shape function values and the quadrature weights are necessary.
+    FEValues<dim>  fe_values (fe, quadrature_formula,
+                              update_values | update_gradients |
+                              update_JxW_values);
+
+    FEFaceValues<dim> fe_face_values (fe, face_quadrature_formula,
+                                      update_values | update_JxW_values);
+
+    // As usual, the system matrix is assembled cell by cell, and we need a
+    // matrix for storing the local cell contributions as well as an index
+    // vector to transfer the cell contributions to the appropriate location
+    // in the global system matrix after.
+    FullMatrix<double> cell_matrix (dofs_per_cell, dofs_per_cell);
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+
+        // On each cell, we first need to reset the local contribution matrix
+        // and request the FEValues object to compute the shape functions for
+        // the current cell:
+        cell_matrix = 0;
+        fe_values.reinit (cell);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+
+                // At this point, it is important to keep in mind that we are
+                // dealing with a finite element system with two
+                // components. Due to the way we constructed this FESystem,
+                // namely as the Cartesian product of two scalar finite
+                // element fields, each shape function has only a single
+                // nonzero component (they are, in deal.II lingo, @ref
+                // GlossPrimitive "primitive").  Hence, each shape function
+                // can be viewed as one of the $\phi$'s or $\psi$'s from the
+                // introduction, and similarly the corresponding degrees of
+                // freedom can be attributed to either $\alpha$ or $\beta$.
+                // As we iterate through all the degrees of freedom on the
+                // current cell however, they do not come in any particular
+                // order, and so we cannot decide right away whether the DoFs
+                // with index $i$ and $j$ belong to the real or imaginary part
+                // of our solution.  On the other hand, if you look at the
+                // form of the system matrix in the introduction, this
+                // distinction is crucial since it will determine to which
+                // block in the system matrix the contribution of the current
+                // pair of DoFs will go and hence which quantity we need to
+                // compute from the given two shape functions.  Fortunately,
+                // the FESystem object can provide us with this information,
+                // namely it has a function
+                // FESystem::system_to_component_index, that for each local
+                // DoF index returns a pair of integers of which the first
+                // indicates to which component of the system the DoF
+                // belongs. The second integer of the pair indicates which
+                // index the DoF has in the scalar base finite element field,
+                // but this information is not relevant here. If you want to
+                // know more about this function and the underlying scheme
+                // behind primitive vector valued elements, take a look at
+                // step-8 or the @ref vector_valued module, where these topics
+                // are explained in depth.
+                if (fe.system_to_component_index(i).first ==
+                    fe.system_to_component_index(j).first)
+                  {
+
+                    // If both DoFs $i$ and $j$ belong to same component,
+                    // i.e. their shape functions are both $\phi$'s or both
+                    // $\psi$'s, the contribution will end up in one of the
+                    // diagonal blocks in our system matrix, and since the
+                    // corresponding entries are computed by the same formula,
+                    // we do not bother if they actually are $\phi$ or $\psi$
+                    // shape functions. We can simply compute the entry by
+                    // iterating over all quadrature points and adding up
+                    // their contributions, where values and gradients of the
+                    // shape functions are supplied by our FEValues object.
+
+                    for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+                      cell_matrix(i,j) += (((fe_values.shape_value(i,q_point) *
+                                             fe_values.shape_value(j,q_point)) *
+                                            (- omega * omega)
+                                            +
+                                            (fe_values.shape_grad(i,q_point) *
+                                             fe_values.shape_grad(j,q_point)) *
+                                            c * c) *
+                                           fe_values.JxW(q_point));
+
+                    // You might think that we would have to specify which
+                    // component of the shape function we'd like to evaluate
+                    // when requesting shape function values or gradients from
+                    // the FEValues object. However, as the shape functions
+                    // are primitive, they have only one nonzero component,
+                    // and the FEValues class is smart enough to figure out
+                    // that we are definitely interested in this one nonzero
+                    // component.
+                  }
+              }
+          }
+
+
+        // We also have to add contributions due to boundary terms. To this
+        // end, we loop over all faces of the current cell and see if first it
+        // is at the boundary, and second has the correct boundary indicator
+        // associated with $\Gamma_2$, the part of the boundary where we have
+        // absorbing boundary conditions:
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          if (cell->face(face)->at_boundary() &&
+              (cell->face(face)->boundary_id() == 0) )
+            {
+
+
+              // These faces will certainly contribute to the off-diagonal
+              // blocks of the system matrix, so we ask the FEFaceValues
+              // object to provide us with the shape function values on this
+              // face:
+              fe_face_values.reinit (cell, face);
+
+
+              // Next, we loop through all DoFs of the current cell to find
+              // pairs that belong to different components and both have
+              // support on the current face:
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                for (unsigned int j=0; j<dofs_per_cell; ++j)
+                  if ((fe.system_to_component_index(i).first !=
+                       fe.system_to_component_index(j).first) &&
+                      fe.has_support_on_face(i, face) &&
+                      fe.has_support_on_face(j, face))
+                    // The check whether shape functions have support on a
+                    // face is not strictly necessary: if we don't check for
+                    // it we would simply add up terms to the local cell
+                    // matrix that happen to be zero because at least one of
+                    // the shape functions happens to be zero. However, we can
+                    // save that work by adding the checks above.
+
+                    // In either case, these DoFs will contribute to the
+                    // boundary integrals in the off-diagonal blocks of the
+                    // system matrix. To compute the integral, we loop over
+                    // all the quadrature points on the face and sum up the
+                    // contribution weighted with the quadrature weights that
+                    // the face quadrature rule provides.  In contrast to the
+                    // entries on the diagonal blocks, here it does matter
+                    // which one of the shape functions is a $\psi$ and which
+                    // one is a $\phi$, since that will determine the sign of
+                    // the entry.  We account for this by a simple conditional
+                    // statement that determines the correct sign. Since we
+                    // already checked that DoF $i$ and $j$ belong to
+                    // different components, it suffices here to test for one
+                    // of them to which component it belongs.
+                    for (unsigned int q_point=0; q_point<n_face_q_points; ++q_point)
+                      cell_matrix(i,j) += ((fe.system_to_component_index(i).first == 0) ? -1 : 1) *
+                                          fe_face_values.shape_value(i,q_point) *
+                                          fe_face_values.shape_value(j,q_point) *
+                                          c *
+                                          omega *
+                                          fe_face_values.JxW(q_point);
+            }
+
+        // Now we are done with this cell and have to transfer its
+        // contributions from the local to the global system matrix. To this
+        // end, we first get a list of the global indices of the this cells
+        // DoFs...
+        cell->get_dof_indices (local_dof_indices);
+
+
+        // ...and then add the entries to the system matrix one by one:
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            system_matrix.add (local_dof_indices[i],
+                               local_dof_indices[j],
+                               cell_matrix(i,j));
+      }
+
+
+    // The only thing left are the Dirichlet boundary values on $\Gamma_1$,
+    // which is characterized by the boundary indicator 1. The Dirichlet
+    // values are provided by the <code>DirichletBoundaryValues</code> class
+    // we defined above:
+    std::map<types::global_dof_index,double> boundary_values;
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              1,
+                                              DirichletBoundaryValues<dim>(),
+                                              boundary_values);
+
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix,
+                                        solution,
+                                        system_rhs);
+
+    timer.stop ();
+    deallog << "done ("
+            << timer()
+            << "s)"
+            << std::endl;
+  }
+
+
+
+  // @sect4{<code>UltrasoundProblem::solve</code>}
+
+  // As already mentioned in the introduction, the system matrix is neither
+  // symmetric nor definite, and so it is not quite obvious how to come up
+  // with an iterative solver and a preconditioner that do a good job on this
+  // matrix.  We chose instead to go a different way and solve the linear
+  // system with the sparse LU decomposition provided by UMFPACK. This is
+  // often a good first choice for 2D problems and works reasonably well even
+  // for a large number of DoFs.  The deal.II interface to UMFPACK is given by
+  // the SparseDirectUMFPACK class, which is very easy to use and allows us to
+  // solve our linear system with just 3 lines of code.
+
+  // Note again that for compiling this example program, you need to have the
+  // deal.II library built with UMFPACK support.
+  template <int dim>
+  void UltrasoundProblem<dim>::solve ()
+  {
+    deallog << "Solving linear system... ";
+    Timer timer;
+    timer.start ();
+
+    // The code to solve the linear system is short: First, we allocate an
+    // object of the right type. The following <code>initialize</code> call
+    // provides the matrix that we would like to invert to the
+    // SparseDirectUMFPACK object, and at the same time kicks off the
+    // LU-decomposition. Hence, this is also the point where most of the
+    // computational work in this program happens.
+    SparseDirectUMFPACK  A_direct;
+    A_direct.initialize(system_matrix);
+
+    // After the decomposition, we can use <code>A_direct</code> like a matrix
+    // representing the inverse of our system matrix, so to compute the
+    // solution we just have to multiply with the right hand side vector:
+    A_direct.vmult (solution, system_rhs);
+
+    timer.stop ();
+    deallog << "done ("
+            << timer ()
+            << "s)"
+            << std::endl;
+  }
+
+
+
+  // @sect4{<code>UltrasoundProblem::output_results</code>}
+
+  // Here we output our solution $v$ and $w$ as well as the derived quantity
+  // $|u|$ in the format specified in the parameter file. Most of the work for
+  // deriving $|u|$ from $v$ and $w$ was already done in the implementation of
+  // the <code>ComputeIntensity</code> class, so that the output routine is
+  // rather straightforward and very similar to what is done in the previous
+  // tutorials.
+  template <int dim>
+  void UltrasoundProblem<dim>::output_results () const
+  {
+    deallog << "Generating output... ";
+    Timer timer;
+    timer.start ();
+
+    // Define objects of our <code>ComputeIntensity</code> class and a DataOut
+    // object:
+    ComputeIntensity<dim> intensities;
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+
+    // Next we query the output-related parameters from the ParameterHandler.
+    // The DataOut::parse_parameters call acts as a counterpart to the
+    // DataOutInterface<1>::declare_parameters call in
+    // <code>ParameterReader::declare_parameters</code>. It collects all the
+    // output format related parameters from the ParameterHandler and sets the
+    // corresponding properties of the DataOut object accordingly.
+    prm.enter_subsection("Output parameters");
+
+    const std::string output_file    = prm.get("Output file");
+    data_out.parse_parameters(prm);
+
+    prm.leave_subsection ();
+
+    // Now we put together the filename from the base name provided by the
+    // ParameterHandler and the suffix which is provided by the DataOut class
+    // (the default suffix is set to the right type that matches the one set
+    // in the .prm file through parse_parameters()):
+    const std::string filename = output_file +
+                                 data_out.default_suffix();
+
+    std::ofstream output (filename.c_str());
+
+    // The solution vectors $v$ and $w$ are added to the DataOut object in the
+    // usual way:
+    std::vector<std::string> solution_names;
+    solution_names.push_back ("Re_u");
+    solution_names.push_back ("Im_u");
+
+    data_out.add_data_vector (solution, solution_names);
+
+    // For the intensity, we just call <code>add_data_vector</code> again, but
+    // this with our <code>ComputeIntensity</code> object as the second
+    // argument, which effectively adds $|u|$ to the output data:
+    data_out.add_data_vector (solution, intensities);
+
+    // The last steps are as before. Note that the actual output format is now
+    // determined by what is stated in the input file, i.e. one can change the
+    // output format without having to re-compile this program:
+    data_out.build_patches ();
+    data_out.write (output);
+
+    timer.stop ();
+    deallog << "done ("
+            << timer()
+            << "s)"
+            << std::endl;
+  }
+
+
+
+  // @sect4{<code>UltrasoundProblem::run</code>}
+
+  // Here we simply execute our functions one after the other:
+  template <int dim>
+  void UltrasoundProblem<dim>::run ()
+  {
+    make_grid ();
+    setup_system ();
+    assemble_system ();
+    solve ();
+    output_results ();
+  }
+}
+
+
+// @sect4{The <code>main</code> function}
+
+// Finally the <code>main</code> function of the program. It has the same
+// structure as in almost all of the other tutorial programs. The only
+// exception is that we define ParameterHandler and
+// <code>ParameterReader</code> objects, and let the latter read in the
+// parameter values from a textfile called <code>step-29.prm</code>. The
+// values so read are then handed over to an instance of the UltrasoundProblem
+// class:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step29;
+
+      ParameterHandler  prm;
+      ParameterReader   param(prm);
+      param.read_parameters("step-29.prm");
+
+      UltrasoundProblem<2>  ultrasound_problem (prm);
+      ultrasound_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  return 0;
+}
diff --git a/examples/step-29/step-29.prm b/examples/step-29/step-29.prm
new file mode 100644
index 0000000..13b589c
--- /dev/null
+++ b/examples/step-29/step-29.prm
@@ -0,0 +1,28 @@
+# Listing of Parameters
+# ---------------------
+
+subsection Mesh & geometry parameters
+  # Distance of the focal point of the lens to the x-axis
+  set Focal distance        = 0.3
+
+  # Number of global mesh refinement steps applied to initial coarse grid
+  set Number of refinements = 5
+end
+
+
+subsection Physical constants
+  # Wave speed
+  set c     = 1.5e5
+
+  # Frequency
+  set omega = 3.0e7
+end
+
+
+subsection Output parameters
+  # Name of the output file (without extension)
+  set Output file   = solution
+
+  # A name for the output format to be used
+  set Output format = gmv
+end
diff --git a/examples/step-3/CMakeLists.txt b/examples/step-3/CMakeLists.txt
new file mode 100644
index 0000000..066b59b
--- /dev/null
+++ b/examples/step-3/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-3 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-3")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-3/doc/builds-on b/examples/step-3/doc/builds-on
new file mode 100644
index 0000000..6290b8e
--- /dev/null
+++ b/examples/step-3/doc/builds-on
@@ -0,0 +1 @@
+step-2
diff --git a/examples/step-3/doc/intro.dox b/examples/step-3/doc/intro.dox
new file mode 100644
index 0000000..ce011b2
--- /dev/null
+++ b/examples/step-3/doc/intro.dox
@@ -0,0 +1,387 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{10}
+
+<h3>The basic functioning of finite elements</h3>
+
+This is the first example where we actually use finite elements to compute
+something. We
+will solve a simple version of Poisson's equation with zero boundary
+values, but a nonzero right hand side:
+ at f{align*}
+  -\Delta u &= f \qquad\qquad & \text{in}\ \Omega,
+  \\
+  u &= 0 \qquad\qquad & \text{on}\ \partial\Omega.
+ at f}
+We will solve this equation on the unit square, $\Omega=[0,1]^2$, for which
+you've already learned how to generate a mesh in step-1 and step-2. In
+this program, we will also only consider the particular case
+$f(\mathbf x)=1$ and come back to how to implement the more general
+case in the next tutorial program, step-4.
+
+If you've learned about the basics of the finite element method, you will
+remember the steps we need to take to approximate the solution $u$ by a finite
+dimensional approximation. Specifically, we first need to derive the weak form
+of the equation above, which we obtain by multiplying the equation by a test
+function $\varphi$ <i>from the left</i> (we will come back to the reason for
+multiplying from the left and not from the right below) and integrating over
+the domain $\Omega$:
+ at f{align*}
+  -\int_\Omega \varphi \Delta u = \int_\Omega \varphi f.
+ at f}
+This can be integrated by parts:
+ at f{align*}
+  \int_\Omega \nabla\varphi \cdot \nabla u
+  -
+  \int_{\partial\Omega} \varphi \mathbf{n}\cdot \nabla u
+   = \int_\Omega \varphi f.
+ at f}
+The test function $\varphi$ has to satisfy the same kind of boundary
+conditions (in mathematical terms: it needs to come from the tangent space of
+the set in which we seek the solution), so on the boundary $\varphi=0$ and
+consequently the weak form we are looking for reads
+ at f{align*}
+  (\nabla\varphi, \nabla u)
+   = (\varphi, f),
+ at f}
+where we have used the common notation $(a,b)=\int_\Omega a\; b$. The problem
+then asks for a function $u$ for which this statement is true for all test
+functions $\varphi$ from the appropriate space (which here is the space
+$H^1$).
+
+Of course we can't find such a function on a computer in the general case, and
+instead we seek an approximation $u_h(\mathbf x)=\sum_j U_j \varphi_j(\mathbf
+x)$, where the $U_j$ are unknown expansion coefficients we need to determine
+(the "degrees of freedom" of this problem), and $\varphi_i(\mathbf x)$ are the
+finite element shape functions we will use. To define these shape functions,
+we need the following:
+
+- A mesh on which to define shape functions. You have already seen how to
+  generate and manipulate the objects that describe meshes in step-1 and
+  step-2.
+- A finite element that describes the shape functions we want to use on the
+  reference cell (which in deal.II is always the unit interval $[0,1]$, the
+  unit square $[0,1]^2$ or the unit cube $[0,1]^3$, depending on which space
+  dimension you work in. In step-2, we had already used an object of type
+  FE_Q<2>, which denotes the usual Lagrange elements that define shape
+  functions by interpolation on support points. The simplest one is
+  FE_Q<2>(1), which uses polynomial degree 1. In 2d, these are often referred
+  to as <i>bilinear</i>, since they are linear in each of the two coordinates
+  of the reference cell. (In 1d, they would be <i>linear</i> and in 3d
+  <i>tri-linear</i>; however, in the deal.II documentation, we will frequently
+  not make this distinction and simply always call these functions "linear".)
+- A DoFHandler object that enumerates all the degrees of freedom on the mesh,
+  taking the reference cell description the finite element object provides as
+  the basis. You've also already seen how to do this in step-2.
+- A mapping that tells how the shape functions on the real cell are obtained
+  from the shape functions defined by the finite element class on the
+  reference cell. By default, unless you explicitly say otherwise, deal.II
+  will use a (bi-, tri-)linear mapping for this, so in most cases you don't
+  have to worry about this step.
+
+Through these steps, we now have a set of functions $\varphi_i$, and we can
+define the weak form of the discrete problem: Find a function $u_h$, i.e. find
+the expansion coefficients $U_i$ mentioned above, so that
+ at f{align*}
+  (\nabla\varphi_i, \nabla u_h)
+   = (\varphi_i, f),
+   \qquad\qquad
+   i=0\ldots N-1.
+ at f}
+Note that we here follow the convention that everything is counted starting at
+zero, as common in C and C++. This equation can be rewritten as a linear
+system by inserting the representation $u_h(\mathbf x)=\sum_j U_j
+\varphi_j(\mathbf x)$: Find a vector $U$ so that
+ at f{align*}
+  A U = F,
+ at f}
+where the matrix $A$ and the right hand side $F$ are defined as
+ at f{align*}
+  A_{ij} &= (\nabla\varphi_i, \nabla \varphi_j),
+  \\
+  F_i &= (\varphi_i, f).
+ at f}
+Before we move on with describing how these quantities can be computed, note
+that if we had multiplied the original equation from the <i>right</i> by a
+test function rather than from the left, then we would have obtained a linear
+system of the form
+ at f{align*}
+  U^T A = F
+ at f}
+with a row vector $F$. By transposing this system, this is of course
+equivalent to solving
+ at f{align*}
+  A^T U = F
+ at f}
+which here is the same as above since $A=A^T$ but in general is not. To avoid
+any sort of confusion, experience has shown that simply getting into the habit
+of multiplying the equation from the left rather than from the right (as is
+often done in the mathematical literature) avoids a common class of errors as
+the matrix is automatically correct and does not need to be transposed when
+comparing theory and implementation. See step-9 for the first example in this
+tutorial where we have a non-symmetric bilinear form for which it makes a
+difference whether we multiply from the right or from the left.
+
+Now we know what we need (namely objects that hold the matrix and
+vectors, as well as ways to compute $A_{ij},F_i$), and we can look at what it
+takes to make that happen:
+
+- The objects for $A,U,F$ are of type SparseMatrix and Vector, and we will see
+  in the program below what classes are used to solve linear systems.
+- We need a way to form the integrals. In the finite element method, this is
+  most commonly done using quadrature, i.e. the integrals are replaced by a
+  weighted sum over a set of points on each cell. That is, we first split the
+  integral over $\Omega$ into integrals over all cells,
+  @f{align*}
+    A_{ij} &= (\nabla\varphi_i, \nabla \varphi_j)
+    = \sum_{K \in {\mathbb T}} \int_K \nabla\varphi_i \cdot \nabla \varphi_j,
+    \\
+    F_i &= (\varphi_i, f)
+    = \sum_{K \in {\mathbb T}} \int_K \varphi_i f,
+  @f}
+  and then approximate each cell's contribution by quadrature:
+  @f{align*}
+    A^K_{ij} &=
+    \int_K \nabla\varphi_i \cdot \nabla \varphi_j
+    \approx
+    \sum_q \nabla\varphi_i(\mathbf x^K_q) \cdot \nabla
+    \varphi_j(\mathbf x^K_q) w_q^K,
+    \\
+    F^K_i &=
+    \int_K \varphi_i f
+    \approx
+    \sum_q \varphi_i(\mathbf x^K_q) f(\mathbf x^K_q) w^K_q,
+  @f}
+  where $\mathbf x^K_q$ is the $q$th quadrature point on cell $K$, and $w^K_q$
+  the $q$th quadrature weight. There are different parts to what is needed in
+  doing this, and we will discuss them in turn next.
+- First, we need a way to describe the location $\mathbf x_q^K$ of quadrature
+  points and their weights $w^K_q$. They are usually mapped from the reference
+  cell in the same way as shape functions, i.e., implicitly using the
+  MappingQ1 class or, if you explicitly say so, through one of the other
+  classes derived from Mapping. The locations and weights on the reference
+  cell are described by objects derived from the Quadrature base
+  class. Typically, one chooses a quadrature formula (i.e. a set of points and
+  weights) so that the quadrature exactly equals the integral in the matrix;
+  this can be achieved because all factors in the integral are polynomial, and
+  is done by Gaussian quadrature formulas, implemented in the QGauss class.
+- We then need something that can help us evaluate $\varphi_i(\mathbf x^K_q)$
+  on cell $K$. This is what the FEValues class does: it takes a finite element
+  objects to describe $\varphi$ on the reference cell, a quadrature object to
+  describe the quadrature points and weights, and a mapping object (or
+  implicitly takes the MappingQ1 class) and provides values and derivatives of
+  the shape functions on the real cell $K$ as well as all sorts of other
+  information needed for integration, at the quadrature points located on $K$.
+
+FEValues really is the central class in the assembly process. One way you can
+view it is as follows: The FiniteElement and derived classes describe shape
+<i>functions</i>, i.e., infinite dimensional objects: functions have values at
+every point. We need this for theoretical reasons because we want to perform
+our analysis with integrals over functions. However, for a computer, this is a
+very difficult concept, since they can in general only deal with a finite
+amount of information, and so we replace integrals by sums over quadrature
+points that we obtain by mapping (the Mapping object) using  points defined on
+a reference cell (the Quadrature object) onto points on the real cell. In
+essence, we reduce the problem to one where we only need a finite amount of
+information, namely shape function values and derivatives, quadrature weights,
+normal vectors, etc, exclusively at a finite set of points. The FEValues class
+is the one that brings the three components together and provides this finite
+set of information on a particular cell $K$. You will see it in action when we
+assemble the linear system below.
+
+It is noteworthy that all of this could also be achieved if you simply created
+these three objects yourself in an application program, and juggled the
+information yourself. However, this would neither be simpler (the FEValues
+class provides exactly the kind of information you actually need) nor faster:
+the FEValues class is highly optimized to only compute on each cell the
+particular information you need; if anything can be re-used from the previous
+cell, then it will do so, and there is a lot of code in that class to make
+sure things are cached wherever this is advantageous.
+
+The final piece of this introduction is to mention that after a linear
+system is obtained, it is solved using an iterative solver and then
+postprocessed: we create an output file using the DataOut class that can then
+be visualized using one of the common visualization programs.
+
+ at note The preceding overview of all the important steps of any finite element
+implementation has its counterpart in deal.II: The library can naturally be
+grouped into a number of "modules" that cover the basic concepts just
+outlined. You can access these modules through the tab at the top of this
+page. An overview of the most fundamental groups of concepts is also available
+on the <a href="index.html">front page of the deal.II manual</a>.
+
+
+<h3>About the implementation</h3>
+
+Although this is the simplest possible equation you can solve using the finite
+element method, this program shows the basic structure of most finite
+element programs and also serves as the template that almost all of the
+following programs will essentially follow. Specifically, the main class of
+this program looks like this:
+ at code
+class Step3
+{
+  public:
+    Step3 ();
+    void run ();
+
+  private:
+    void make_grid ();
+    void setup_system ();
+    void assemble_system ();
+    void solve ();
+    void output_results () const;
+
+    Triangulation<2>     triangulation;
+    FE_Q<2>              fe;
+    DoFHandler<2>        dof_handler;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+    Vector<double>       solution;
+    Vector<double>       system_rhs;
+};
+ at endcode
+
+This follows the object oriented programming mantra of <a
+href="http://en.wikipedia.org/wiki/Encapsulation_(object-oriented_programming)">data
+encapsulation</a>, i.e. we do our best to hide almost all internal details of
+this class in private members that are not accessible to the outside.
+
+Let's start with the member variables: These follow the building blocks we
+have outlined above in the bullet points, namely we need a Triangulation and a
+DoFHandler object, and a finite element object that describes the kinds of
+shape functions we want to use. The second group of objects relate to the
+linear algebra: the system matrix and right hand side as well as the solution
+vector, and an object that describes the sparsity pattern of the matrix. This
+is all this class needs (and the essentials that any solver for a stationary
+PDE requires) and that needs to survive throughout the entire program. In
+contrast to this, the FEValues object we need for assembly is only required
+throughout assembly, and so we create it as a local object in the function
+that does that and destroy it again at its end.
+
+Secondly, let's look at the member functions. These, as well, already form the
+common structure that almost all following tutorial programs will use:
+<ul>
+  <li> <code>make_grid()</code>: This is what one could call a
+       <i>preprocessing function</i>. As its name suggests, it sets up the
+       object that stores the triangulation. In later examples, it could also
+       deal with boundary conditions, geometries, etc.
+  <li> <code>setup_system()</code>: This then is the function in which all the
+       other data structures are set up that are needed to solve the
+       problem. In particular, it will initialize the DoFHandler object and
+       correctly size the various objects that have to do with the linear
+       algebra. This function is often separated from the preprocessing
+       function above because, in a time dependent program, it may be called
+       at least every few time steps whenever the mesh
+       is adaptively refined (something we will see how to do in step-6). On
+       the other hand, setting up the mesh itself in the preprocessing
+       function above is done only once at the beginning of the program and
+       is, therefore, separated into its own function.
+  <li> <code>assemble_system()</code>: This, then is where the contents of the
+       matrix and right hand side are computed, as discussed at length in the
+       introduction above. Since doing something with this linear system is
+       conceptually very different from computing its entries, we separate it
+       from the following function.
+  <li> <code>solve()</code>: This then is the function in which we compute the
+       solution $U$ of the linear system $AU=F$. In the current program, this
+       is a simple task since the matrix is so simple, but it will become a
+       significant part of a program's size whenever the problem is not so
+       trivial any more (see, for example, step-20, step-22, or step-31 once
+       you've learned a bit more about the library).
+  <li> <code>output_results()</code>: Finally, when you have computed a
+       solution, you probably want to do something with it. For example, you
+       may want to output it in a format that can be visualized, or you may
+       want to compute quantities you are interested in: say, heat fluxes in a
+       heat exchanger, air friction coefficients of a wing, maximum bridge
+       loads, or simple the value of the numerical solution at a point. This
+       function is therefore the place for postprocessing your solution.
+</ul>
+All of this is held together by the single public function (other than the
+constructor), namely the <code>run()</code> function. It is the one that is
+called from the place where an object of this type is created, and it is the
+one that calls all the other functions in their proper order. Encapsulating
+this operation into the <code>run()</code> function, rather than calling all
+the other functions from <code>main()</code> makes sure that you
+can change how the separation of concerns within this class is
+implemented. For example, if one of the functions becomes too big, you can
+split it up into two, and the only places you have to be concerned about
+changing as a consequence are within this very same class, and not anywhere
+else.
+
+As mentioned above, you will see this general structure — sometimes with
+variants in spelling of the functions' names, but in essentially this order of
+separation of functionality — again in many of the
+following tutorial programs.
+
+
+<h3> A note on types </h3>
+
+deal.II defines a number of integral %types via <code>typedef</code>s in
+namespace types. In particular, in this program you will see
+types::global_dof_index in a couple of places: an integer type that is used to
+denote the <i>global</i> index of a degree of freedom, i.e., the index of a
+particular degree of freedom within the DoFHandler object that is defined on
+top of a triangulation (as opposed to the index of a particular degree of
+freedom within a particular cell). For the current program
+(as well as almost all of the tutorial programs), you will have a few thousand
+to maybe a few million unknowns globally (and, for $Q_1$ elements, you will
+have 4 <i>locally on each cell</i> in 2d and 8 in 3d). Consequently, a data
+type that allows to store sufficiently large numbers for global DoF indices is
+<code>unsigned int</code> given that it allows to store numbers between 0 and
+slightly more than 4 billion (on most systems, where integers are 32-bit). In
+fact, this is what types::global_dof_index is.
+
+So, why not just use <code>unsigned int</code> right away? deal.II used to do
+this until version 7.3. However, deal.II supports very large computations (via
+the framework discussed in step-40) that may have more than 4 billion unknowns
+when spread across a few thousand processors. Consequently, there are
+situations where <code>unsigned int</code> is not sufficiently large and we
+need a 64-bit unsigned integral type. To make this possible, we introduced
+types::global_dof_index which by default is defined as simply <code>unsigned
+int</code> whereas it is possible to define it as <code>unsigned long long
+int</code> if necessary, by passing a particular flag during configuration
+(see the ReadMe file).
+
+This covers the technical aspect. But there is also a documentation purpose:
+everywhere in the library and codes that are built on it, if you see a place
+using the data type types::global_dof_index, you immediately know that the
+quantity that is being referenced is, in fact, a global dof index. No such
+meaning would be apparent if we had just used <code>unsigned int</code> (which
+may also be a local index, a boundary indicator, a material id,
+etc.). Immediately knowing what a variable refers to also helps avoid errors:
+it's quite clear that there must be a bug if you see an object of type
+types::global_dof_index being assigned to variable of type
+types::subdomain_id, even though they are both represented by unsigned
+integers and the compiler will, consequently, not complain.
+
+In more practical terms what the presence of this type means is that during
+assembly, we create a $4\times 4$ matrix (in 2d, using a $Q_1$ element) of the
+contributions of the cell we are currently sitting on, and then we need to add
+the elements of this matrix to the appropriate elements of the global (system)
+matrix. For this, we need to get at the global indices of the degrees of
+freedom that are local to the current cell, for which we will always use the
+following piece of the code:
+ at code
+  cell->get_dof_indices (local_dof_indices);
+ at endcode
+where <code>local_dof_indices</code> is declared as
+ at code
+  std::vector<types::global_dof_index> local_dof_indices (fe.dofs_per_cell);
+ at endcode
+The name of this variable might be a bit of a misnomer -- it stands for "the
+global indices of those degrees of freedom locally defined on the current
+cell" -- but variables that hold this information are universally named this
+way throughout the library.
+
+ at note types::global_dof_index is not the only type defined in this
+namespace. Rather, there is a whole family, including types::subdomain_id,
+types::boundary_id, and types::material_id. All of these are
+<code>typedef</code>s for integer data types but, as explained above, they are
+used throughout the library so that (i) the intent of a variable becomes more
+easily discerned, and (ii) so that it becomes possible to change the actual
+type to a larger one if necessary without having to go through the entire
+library and figure out whether a particular use of <code>unsigned int</code>
+corresponds to, say, a material indicator.
+
diff --git a/examples/step-3/doc/kind b/examples/step-3/doc/kind
new file mode 100644
index 0000000..15a13db
--- /dev/null
+++ b/examples/step-3/doc/kind
@@ -0,0 +1 @@
+basic
diff --git a/examples/step-3/doc/results.dox b/examples/step-3/doc/results.dox
new file mode 100644
index 0000000..a890cad
--- /dev/null
+++ b/examples/step-3/doc/results.dox
@@ -0,0 +1,218 @@
+<h1>Results</h1>
+
+The output of the program looks as follows:
+ at code
+Number of active cells: 1024
+Number of degrees of freedom: 1089
+DEAL:cg::Starting value 0.121094
+DEAL:cg::Convergence step 48 value 5.33692e-13
+ at endcode
+
+The first three lines is what we wrote to <code>cout</code>. The last
+two lines were generated without our intervention by the CG
+solver. The first two lines state the residual at the start of the
+iteration, while the last line tells us that the solver needed 47
+iterations to bring the norm of the residual to 5.3e-13, i.e. below
+the threshold 1e-12 which we have set in the `solve' function. We will
+show in the next program how to suppress this output, which is
+sometimes useful for debugging purposes, but often clutters up the
+screen display.
+
+Apart from the output shown above, the program generated the file
+<code>solution.gpl</code>, which is in GNUPLOT format. It can be
+viewed as follows: invoke GNUPLOT and enter the following sequence of
+commands at its prompt:
+ at code
+examples/\step-3> gnuplot
+
+        G N U P L O T
+        Version 3.7 patchlevel 3
+        last modified Thu Dec 12 13:00:00 GMT 2002
+        System: Linux 2.6.11.4-21.10-default
+
+        Copyright(C) 1986 - 1993, 1998 - 2002
+        Thomas Williams, Colin Kelley and many others
+
+        Type `help` to access the on-line reference manual
+        The gnuplot FAQ is available from
+        http://www.gnuplot.info/gnuplot-faq.html
+
+        Send comments and requests for help to <info-gnuplot at dartmouth.edu>
+        Send bugs, suggestions and mods to <bug-gnuplot at dartmouth.edu>
+
+
+Terminal type set to 'x11'
+gnuplot> set style data lines
+gnuplot> splot "solution.gpl"
+ at endcode
+This produces the picture of the solution below left. Alternatively,
+you can order GNUPLOT to do some hidden line removal by the command
+ at code
+gnuplot> set hidden3d
+ at endcode
+to get the result at the right:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-3.solution-1.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-3.solution-2.png" alt="">
+    </td>
+  </tr>
+</table>
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+If you want to play around a little bit with this program, here are a few
+suggestions:
+</p>
+
+<ul>
+  <li>
+  Change the geometry and mesh: In the program, we have generated a square
+  domain and mesh by using the <code>GridGenerator::hyper_cube</code>
+  function. However, the <code>GridGenerator</code> has a good number of other
+  functions as well. Try an L-shaped domain, a ring, or other domains you find
+  there.
+  </li>
+
+  <li>
+  Change the boundary condition: The code uses the <code>ZeroFunction</code>
+  function to generate zero boundary conditions. However, you may want to try
+  non-zero constant boundary values using <code>ConstantFunction<2>
+  (1)</code> instead of <code>ZeroFunction<2> ()</code> to have unit
+  Dirichlet boundary values. More exotic functions are described in the
+  documentation of the <code>Functions</code> namespace, and you may pick one
+  to describe your particular boundary values.
+  </li>
+
+  <li> Modify the type of boundary condition: Presently, what happens
+  is that we use Dirichlet boundary values all around, since the
+  default is that all boundary parts have boundary indicator zero, and
+  then we tell the
+  <code>VectorTools::interpolate_boundary_values</code> function to
+  interpolate boundary values to zero on all boundary components with
+  indicator zero.  <p> We can change this behavior if we assign parts
+  of the boundary different indicators. For example, try this
+  immediately after calling <code>GridGenerator::hyper_cube</code>:
+  @code
+  triangulation.begin_active()->face(0)->set_boundary_id(1);
+  @endcode
+  What this does is it first asks the triangulation to
+  return an iterator that points to the first active cell. Of course,
+  this being the coarse mesh for the triangulation of a square, the
+  triangulation has only a single cell at this moment, and it is
+  active. Next, we ask the cell to return an iterator to its first
+  face, and then we ask the face to reset the boundary indicator of
+  that face to 1. What then follows is this: When the mesh is refined,
+  faces of child cells inherit the boundary indicator of their
+  parents, i.e. even on the finest mesh, the faces on one side of the
+  square have boundary indicator 1. Later, when we get to
+  interpolating boundary conditions, the
+  <code>interpolate_boundary_values</code> will only produce boundary
+  values for those faces that have zero boundary indicator, and leave
+  those faces alone that have a different boundary indicator. What
+  this then does is to impose Dirichlet boundary conditions on the
+  former, and homogeneous Neumann conditions on the latter (i.e. zero
+  normal derivative of the solution, unless one adds additional terms
+  to the right hand side of the variational equality that deal with
+  potentially non-zero Neumann conditions). You will see this if you
+  run the program.
+
+  <li>
+  A slight variation of the last point would be to set different boundary
+  values as above, but then use a different boundary value function for
+  boundary indicator one. In practice, what you have to do is to add a second
+  call to <code>interpolate_boundary_values</code> for boundary indicator one:
+  @code
+  VectorTools::interpolate_boundary_values (dof_handler,
+					    1,
+					    ConstantFunction<2>(1.),
+					    boundary_values);
+  @endcode
+  If you have this call immediately after the first one to this function, then
+  it will interpolate boundary values on faces with boundary indicator 1 to the
+  unit value, and merge these interpolated values with those previously
+  computed for boundary indicator 0. The result will be that we will get
+  discontinuous boundary values, zero on three sides of the square, and one on
+  the fourth.
+
+  <li>
+  Observe convergence: We will only discuss computing errors in norms in
+  step-7, but it is easy to check that computations converge
+  already here. For example, we could evaluate the value of the solution in a
+  single point and compare the value for different %numbers of global
+  refinement (the number of global refinement steps is set in
+  <code>LaplaceProblem::make_grid_and_dofs</code> above). To evaluate the
+  solution at a point, say at $(\frac 13, \frac 13)$, we could add the
+  following code to the <code>LaplaceProblem::output_results</code> function:
+  @code
+    std::cout << "Solution at (1/3,1/3): "
+              << VectorTools::point_value (dof_handler, solution,
+                                           Point<2>(1./3, 1./3))
+              << std::endl;
+  @endcode
+  For 1 through 9 global refinement steps, we then get the following sequence
+  of point values:
+  <table align="center">
+    <tr> <td># of refinements</td> <td>$u_h(\frac 13,\frac13)$</td> </tr>
+    <tr> <td>1</td> <td>0.166667</td> </tr>
+    <tr> <td>2</td> <td>0.227381</td> </tr>
+    <tr> <td>3</td> <td>0.237375</td> </tr>
+    <tr> <td>4</td> <td>0.240435</td> </tr>
+    <tr> <td>5</td> <td>0.241140</td> </tr>
+    <tr> <td>6</td> <td>0.241324</td> </tr>
+    <tr> <td>7</td> <td>0.241369</td> </tr>
+    <tr> <td>8</td> <td>0.241380</td> </tr>
+    <tr> <td>9</td> <td>0.241383</td> </tr>
+  </table>
+  By noticing that the difference between each two consecutive values reduces
+  by about a factor of 4, we can conjecture that the "correct" value may be
+  $u(\frac 13, \frac 13)\approx 0.241384$. In fact, if we assumed this to be
+  the correct value, we could show that the sequence above indeed shows ${\cal
+  O}(h^2)$ convergence — theoretically, the convergence order should be
+  ${\cal O}(h^2 |\log h|)$ but the symmetry of the domain and the mesh may lead
+  to the better convergence order observed.
+
+  A slight variant of this would be to repeat the test with quadratic
+  elements. All you need to do is to set the polynomial degree of the finite
+  element to two in the constructor
+  <code>LaplaceProblem::LaplaceProblem</code>.
+
+  <li>Convergence of the mean: A different way to see that the solution
+  actually converges (to something — we can't tell whether it's really
+  the correct value!) is to compute the mean of the solution. To this end, add
+  the following code to <code>LaplaceProblem::output_results</code>:
+  @code
+    std::cout << "Mean value: "
+              << VectorTools::compute_mean_value (dof_handler,
+						  QGauss<2>(3),
+						  solution,
+						  0)
+              << std::endl;
+  @endcode
+  The documentation of the function explains what the second and fourth
+  parameters mean, while the first and third should be obvious. Doing the same
+  study again where we change the number of global refinement steps, we get
+  the following result:
+  <table align="center">
+    <tr> <td># of refinements</td> <td>$\int_\Omega u_h(x)\; dx$</td> </tr>
+    <tr> <td>1</td> <td>0.093750</td> </tr>
+    <tr> <td>2</td> <td>0.127902</td> </tr>
+    <tr> <td>3</td> <td>0.139761</td> </tr>
+    <tr> <td>4</td> <td>0.139761</td> </tr>
+    <tr> <td>5</td> <td>0.140373</td> </tr>
+    <tr> <td>6</td> <td>0.140526</td> </tr>
+    <tr> <td>7</td> <td>0.140564</td> </tr>
+    <tr> <td>8</td> <td>0.140574</td> </tr>
+    <tr> <td>9</td> <td>0.140576</td> </tr>
+  </table>
+  Again, the difference between two adjacent values goes down by about a
+  factor of four, indicating convergence as ${\cal O}(h^2)$.
+</ul>
diff --git a/examples/step-3/doc/tooltip b/examples/step-3/doc/tooltip
new file mode 100644
index 0000000..1193c62
--- /dev/null
+++ b/examples/step-3/doc/tooltip
@@ -0,0 +1 @@
+Solving Poisson's equation.
diff --git a/examples/step-3/step-3.cc b/examples/step-3/step-3.cc
new file mode 100644
index 0000000..b71f63c
--- /dev/null
+++ b/examples/step-3/step-3.cc
@@ -0,0 +1,660 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 1999 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Wolfgang Bangerth, 1999,
+ *          Guido Kanschat, 2011
+ */
+
+
+// @sect3{Many new include files}
+
+// These include files are already known to you. They declare the classes
+// which handle triangulations and enumeration of degrees of freedom:
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+// And this is the file in which the functions are declared that create grids:
+#include <deal.II/grid/grid_generator.h>
+
+// The next three files contain classes which are needed for loops over all
+// cells and to get the information from the cell objects. The first two have
+// been used before to get geometric information from cells; the last one is
+// new and provides information about the degrees of freedom local to a cell:
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+
+// In this file contains the description of the Lagrange interpolation finite
+// element:
+#include <deal.II/fe/fe_q.h>
+
+// And this file is needed for the creation of sparsity patterns of sparse
+// matrices, as shown in previous examples:
+#include <deal.II/dofs/dof_tools.h>
+
+// The next two file are needed for assembling the matrix using quadrature on
+// each cell. The classes declared in them will be explained below:
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/base/quadrature_lib.h>
+
+// The following three include files we need for the treatment of boundary
+// values:
+#include <deal.II/base/function.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+// We're now almost to the end. The second to last group of include files is
+// for the linear algebra which we employ to solve the system of equations
+// arising from the finite element discretization of the Laplace equation. We
+// will use vectors and full matrices for assembling the system of equations
+// locally on each cell, and transfer the results into a sparse matrix. We
+// will then use a Conjugate Gradient solver to solve the problem, for which
+// we need a preconditioner (in this program, we use the identity
+// preconditioner which does nothing, but we need to include the file anyway):
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+
+// Finally, this is for output to a file and to the console:
+#include <deal.II/numerics/data_out.h>
+#include <fstream>
+#include <iostream>
+
+// ...and this is to import the deal.II namespace into the global scope:
+using namespace dealii;
+
+// @sect3{The <code>Step3</code> class}
+
+// Instead of the procedural programming of previous examples, we encapsulate
+// everything into a class for this program. The class consists of functions
+// which each perform certain aspects of a finite element program, a `main'
+// function which controls what is done first and what is done next, and a
+// list of member variables.
+
+// The public part of the class is rather short: it has a constructor and a
+// function `run' that is called from the outside and acts as something like
+// the `main' function: it coordinates which operations of this class shall be
+// run in which order. Everything else in the class, i.e. all the functions
+// that actually do anything, are in the private section of the class:
+class Step3
+{
+public:
+  Step3 ();
+
+  void run ();
+
+  // Then there are the member functions that mostly do what their names
+  // suggest and whose have been discussed in the introduction already. Since
+  // they do not need to be called from outside, they are made private to this
+  // class.
+
+private:
+  void make_grid ();
+  void setup_system ();
+  void assemble_system ();
+  void solve ();
+  void output_results () const;
+
+  // And finally we have some member variables. There are variables describing
+  // the triangulation and the global numbering of the degrees of freedom (we
+  // will specify the exact polynomial degree of the finite element in the
+  // constructor of this class)...
+  Triangulation<2>     triangulation;
+  FE_Q<2>              fe;
+  DoFHandler<2>        dof_handler;
+
+  // ...variables for the sparsity pattern and values of the system matrix
+  // resulting from the discretization of the Laplace equation...
+  SparsityPattern      sparsity_pattern;
+  SparseMatrix<double> system_matrix;
+
+  // ...and variables which will hold the right hand side and solution
+  // vectors.
+  Vector<double>       solution;
+  Vector<double>       system_rhs;
+};
+
+// @sect4{Step3::Step3}
+
+// Here comes the constructor. It does not much more than first to specify
+// that we want bi-linear elements (denoted by the parameter to the finite
+// element object, which indicates the polynomial degree), and to associate
+// the dof_handler variable to the triangulation we use. (Note that the
+// triangulation isn't set up with a mesh at all at the present time, but the
+// DoFHandler doesn't care: it only wants to know which triangulation it will
+// be associated with, and it only starts to care about an actual mesh once
+// you try to distribute degree of freedom on the mesh using the
+// distribute_dofs() function.) All the other member variables of the Step3
+// class have a default constructor which does all we want.
+Step3::Step3 ()
+  :
+  fe (1),
+  dof_handler (triangulation)
+{}
+
+
+// @sect4{Step3::make_grid}
+
+// Now, the first thing we've got to do is to generate the triangulation on
+// which we would like to do our computation and number each vertex with a
+// degree of freedom. We have seen these two steps in step-1 and step-2
+// before, respectively.
+//
+// This function does the first part, creating the mesh.  We create the grid
+// and refine all cells five times. Since the initial grid (which is the
+// square $[-1,1] \times [-1,1]$) consists of only one cell, the final grid
+// has 32 times 32 cells, for a total of 1024.
+//
+// Unsure that 1024 is the correct number? We can check that by outputting the
+// number of cells using the <code>n_active_cells()</code> function on the
+// triangulation.
+void Step3::make_grid ()
+{
+  GridGenerator::hyper_cube (triangulation, -1, 1);
+  triangulation.refine_global (5);
+
+  std::cout << "Number of active cells: "
+            << triangulation.n_active_cells()
+            << std::endl;
+}
+
+// @note We call the Triangulation::n_active_cells() function, rather than
+// Triangulation::n_cells(). Here, <i>active</i> means the cells that aren't
+// refined any further. We stress the adjective "active" since there are more
+// cells, namely the parent cells of the finest cells, their parents, etc, up
+// to the one cell which made up the initial grid. Of course, on the next
+// coarser level, the number of cells is one quarter that of the cells on the
+// finest level, i.e. 256, then 64, 16, 4, and 1. If you called
+// <code>triangulation.n_cells()</code> instead in the code above, you would
+// consequently get a value of 1365 instead. On the other hand, the number of
+// cells (as opposed to the number of active cells) is not typically of much
+// interest, so there is no good reason to print it.
+
+
+// @sect4{Step3::setup_system}
+
+// Next we enumerate all the degrees of freedom and set up matrix and vector
+// objects to hold the system data. Enumerating is done by using
+// DoFHandler::distribute_dofs(), as we have seen in the step-2 example. Since
+// we use the FE_Q class and have set the polynomial degree to 1 in the
+// constructor, i.e. bilinear elements, this associates one degree of freedom
+// with each vertex. While we're at generating output, let us also take a look
+// at how many degrees of freedom are generated:
+void Step3::setup_system ()
+{
+  dof_handler.distribute_dofs (fe);
+  std::cout << "Number of degrees of freedom: "
+            << dof_handler.n_dofs()
+            << std::endl;
+  // There should be one DoF for each vertex. Since we have a 32 times 32
+  // grid, the number of DoFs should be 33 times 33, or 1089.
+
+  // As we have seen in the previous example, we set up a sparsity pattern by
+  // first creating a temporary structure, tagging those entries that might be
+  // nonzero, and then copying the data over to the SparsityPattern object
+  // that can then be used by the system matrix.
+  DynamicSparsityPattern dsp(dof_handler.n_dofs());
+  DoFTools::make_sparsity_pattern (dof_handler, dsp);
+  sparsity_pattern.copy_from(dsp);
+
+  // Note that the SparsityPattern object does not hold the values of the
+  // matrix, it only stores the places where entries are. The entries
+  // themselves are stored in objects of type SparseMatrix, of which our
+  // variable system_matrix is one.
+  //
+  // The distinction between sparsity pattern and matrix was made to allow
+  // several matrices to use the same sparsity pattern. This may not seem
+  // relevant here, but when you consider the size which matrices can have,
+  // and that it may take some time to build the sparsity pattern, this
+  // becomes important in large-scale problems if you have to store several
+  // matrices in your program.
+  system_matrix.reinit (sparsity_pattern);
+
+  // The last thing to do in this function is to set the sizes of the right
+  // hand side vector and the solution vector to the right values:
+  solution.reinit (dof_handler.n_dofs());
+  system_rhs.reinit (dof_handler.n_dofs());
+}
+
+// @sect4{Step3::assemble_system}
+
+
+// The next step is to compute the entries of the matrix and right hand side
+// that form the linear system from which we compute the solution. This is the
+// central function of each finite element program and we have discussed the
+// primary steps in the introduction already.
+//
+// The general approach to assemble matrices and vectors is to loop over all
+// cells, and on each cell compute the contribution of that cell to the global
+// matrix and right hand side by quadrature. The point to realize now is that
+// we need the values of the shape functions at the locations of quadrature
+// points on the real cell. However, both the finite element shape functions
+// as well as the quadrature points are only defined on the reference
+// cell. They are therefore of little help to us, and we will in fact hardly
+// ever query information about finite element shape functions or quadrature
+// points from these objects directly.
+//
+// Rather, what is required is a way to map this data from the reference cell
+// to the real cell. Classes that can do that are derived from the Mapping
+// class, though one again often does not have to deal with them directly:
+// many functions in the library can take a mapping object as argument, but
+// when it is omitted they simply resort to the standard bilinear Q1
+// mapping. We will go this route, and not bother with it for the moment (we
+// come back to this in step-10, step-11, and step-12).
+//
+// So what we now have is a collection of three classes to deal with: finite
+// element, quadrature, and mapping objects. That's too much, so there is one
+// type of class that orchestrates information exchange between these three:
+// the FEValues class. If given one instance of each three of these objects
+// (or two, and an implicit linear mapping), it will be able to provide you
+// with information about values and gradients of shape functions at
+// quadrature points on a real cell.
+//
+// Using all this, we will assemble the linear system for this problem in the
+// following function:
+void Step3::assemble_system ()
+{
+  // Ok, let's start: we need a quadrature formula for the evaluation of the
+  // integrals on each cell. Let's take a Gauss formula with two quadrature
+  // points in each direction, i.e. a total of four points since we are in
+  // 2D. This quadrature formula integrates polynomials of degrees up to three
+  // exactly (in 1D). It is easy to check that this is sufficient for the
+  // present problem:
+  QGauss<2>  quadrature_formula(2);
+  // And we initialize the object which we have briefly talked about above. It
+  // needs to be told which finite element we want to use, and the quadrature
+  // points and their weights (jointly described by a Quadrature object). As
+  // mentioned, we use the implied Q1 mapping, rather than specifying one
+  // ourselves explicitly. Finally, we have to tell it what we want it to
+  // compute on each cell: we need the values of the shape functions at the
+  // quadrature points (for the right hand side $(\varphi_i,f)$), their
+  // gradients (for the matrix entries $(\nabla \varphi_i, \nabla
+  // \varphi_j)$), and also the weights of the quadrature points and the
+  // determinants of the Jacobian transformations from the reference cell to
+  // the real cells.
+  //
+  // This list of what kind of information we actually need is given as a
+  // collection of flags as the third argument to the constructor of
+  // FEValues. Since these values have to be recomputed, or updated, every
+  // time we go to a new cell, all of these flags start with the prefix
+  // <code>update_</code> and then indicate what it actually is that we want
+  // updated. The flag to give if we want the values of the shape functions
+  // computed is #update_values; for the gradients it is
+  // #update_gradients. The determinants of the Jacobians and the quadrature
+  // weights are always used together, so only the products (Jacobians times
+  // weights, or short <code>JxW</code>) are computed; since we need them, we
+  // have to list #update_JxW_values as well:
+  FEValues<2> fe_values (fe, quadrature_formula,
+                         update_values | update_gradients | update_JxW_values);
+  // The advantage of this approach is that we can specify what kind of
+  // information we actually need on each cell. It is easily understandable
+  // that this approach can significantly speed up finite element computations,
+  // compared to approaches where everything, including second derivatives,
+  // normal vectors to cells, etc are computed on each cell, regardless of
+  // whether they are needed or not.
+  //
+  // @note The syntax <code>update_values | update_gradients |
+  // update_JxW_values</code> is not immediately obvious to anyone not
+  // used to programming bit operations in C for years already. First,
+  // <code>operator|</code> is the <i>bitwise or operator</i>, i.e.,
+  // it takes two integer arguments that are interpreted as bit
+  // patterns and returns an integer in which every bit is set for
+  // which the corresponding bit is set in at least one of the two
+  // arguments. For example, consider the operation
+  // <code>9|10</code>. In binary, <code>9=0b1001</code> (where the
+  // prefix <code>0b</code> indicates that the number is to be
+  // interpreted as a binary number) and <code>10=0b1010</code>. Going
+  // through each bit and seeing whether it is set in one of the
+  // argument, we arrive at <code>0b1001|0b1010=0b1011</code> or, in
+  // decimal notation, <code>9|10=11</code>. The second piece of
+  // information you need to know is that the various
+  // <code>update_*</code> flags are all integers that have <i>exactly
+  // one bit set</i>. For example, assume that
+  // <code>update_values=0b00001=1</code>,
+  // <code>update_gradients=0b00010=2</code>,
+  // <code>update_JxW_values=0b10000=16</code>. Then
+  // <code>update_values | update_gradients | update_JxW_values =
+  // 0b10011 = 19</code>. In other words, we obtain a number that
+  // <i>encodes a binary mask representing all of the operations you
+  // want to happen</i>, where each operation corresponds to exactly
+  // one bit in the integer that, if equal to one, means that a
+  // particular piece should be updated on each cell and, if it is
+  // zero, means that we need not compute it. In other words, even
+  // though <code>operator|</code> is the <i>bitwise OR operation</i>,
+  // what it really represents is <i>I want this AND that AND the
+  // other</i>. Such binary masks are quite common in C programming,
+  // but maybe not so in higher level languages like C++, but serve
+  // the current purpose quite well.
+
+  // For use further down below, we define two shortcuts for values that will
+  // be used very frequently. First, an abbreviation for the number of degrees
+  // of freedom on each cell (since we are in 2D and degrees of freedom are
+  // associated with vertices only, this number is four, but we rather want to
+  // write the definition of this variable in a way that does not preclude us
+  // from later choosing a different finite element that has a different
+  // number of degrees of freedom per cell, or work in a different space
+  // dimension).
+  //
+  // Secondly, we also define an abbreviation for the number of quadrature
+  // points (here that should be four). In general, it is a good idea to use
+  // their symbolic names instead of hard-coding these numbers even if you know
+  // them, since you may want to change the quadrature formula and/or finite
+  // element at some time; the program will just work with these changes,
+  // without the need to change anything in this function.
+  //
+  // The shortcuts, finally, are only defined to make the following loops a
+  // bit more readable. You will see them in many places in larger programs,
+  // and `dofs_per_cell' and `n_q_points' are more or less by convention the
+  // standard names for these purposes:
+  const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int   n_q_points    = quadrature_formula.size();
+
+  // Now, we said that we wanted to assemble the global matrix and vector
+  // cell-by-cell. We could write the results directly into the global matrix,
+  // but this is not very efficient since access to the elements of a sparse
+  // matrix is slow. Rather, we first compute the contribution of each cell in
+  // a small matrix with the degrees of freedom on the present cell, and only
+  // transfer them to the global matrix when the computations are finished for
+  // this cell. We do the same for the right hand side vector. So let's first
+  // allocate these objects (these being local objects, all degrees of freedom
+  // are coupling with all others, and we should use a full matrix object
+  // rather than a sparse one for the local operations; everything will be
+  // transferred to a global sparse matrix later on):
+  FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+  Vector<double>       cell_rhs (dofs_per_cell);
+
+  // When assembling the contributions of each cell, we do this with the local
+  // numbering of the degrees of freedom (i.e. the number running from zero
+  // through dofs_per_cell-1). However, when we transfer the result into the
+  // global matrix, we have to know the global numbers of the degrees of
+  // freedom. When we query them, we need a scratch (temporary) array for
+  // these numbers (see the discussion at the end of the introduction for
+  // the type, types::global_dof_index, used here):
+  std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+  // Now for the loop over all cells. We have seen before how this works, so
+  // the following code should be familiar including the conventional names
+  // for these variables:
+  DoFHandler<2>::active_cell_iterator
+  cell = dof_handler.begin_active(),
+  endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      // @note As already mentioned in step-1, there is a more convenient way
+      // of writing such loops if your compiler supports the C++11
+      // standard. See @ref CPP11 "the deal.II C++11 page" to see
+      // how this works.
+      //
+      // We are now sitting on one cell, and we would like the values and
+      // gradients of the shape functions be computed, as well as the
+      // determinants of the Jacobian matrices of the mapping between
+      // reference cell and true cell, at the quadrature points. Since all
+      // these values depend on the geometry of the cell, we have to have the
+      // FEValues object re-compute them on each cell:
+      fe_values.reinit (cell);
+
+      // Next, reset the local cell's contributions to global matrix and
+      // global right hand side to zero, before we fill them:
+      cell_matrix = 0;
+      cell_rhs = 0;
+
+      // Now it is time to start integration over the cell, which we
+      // do by looping over all quadrature points, which we will
+      // number by q_index.
+      for (unsigned int q_index=0; q_index<n_q_points; ++q_index)
+        {
+          // First assemble the matrix: For the Laplace problem, the
+          // matrix on each cell is the integral over the gradients of
+          // shape function i and j. Since we do not integrate, but
+          // rather use quadrature, this is the sum over all
+          // quadrature points of the integrands times the determinant
+          // of the Jacobian matrix at the quadrature point times the
+          // weight of this quadrature point. You can get the gradient
+          // of shape function $i$ at quadrature point with number q_index by
+          // using <code>fe_values.shape_grad(i,q_index)</code>; this
+          // gradient is a 2-dimensional vector (in fact it is of type
+          // Tensor@<1,dim@>, with here dim=2) and the product of two
+          // such vectors is the scalar product, i.e. the product of
+          // the two shape_grad function calls is the dot
+          // product. This is in turn multiplied by the Jacobian
+          // determinant and the quadrature point weight (that one
+          // gets together by the call to FEValues::JxW() ). Finally,
+          // this is repeated for all shape functions $i$ and $j$:
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              cell_matrix(i,j) += (fe_values.shape_grad (i, q_index) *
+                                   fe_values.shape_grad (j, q_index) *
+                                   fe_values.JxW (q_index));
+
+          // We then do the same thing for the right hand side. Here,
+          // the integral is over the shape function i times the right
+          // hand side function, which we choose to be the function
+          // with constant value one (more interesting examples will
+          // be considered in the following programs).
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            cell_rhs(i) += (fe_values.shape_value (i, q_index) *
+                            1 *
+                            fe_values.JxW (q_index));
+        }
+      // Now that we have the contribution of this cell, we have to transfer
+      // it to the global matrix and right hand side. To this end, we first
+      // have to find out which global numbers the degrees of freedom on this
+      // cell have. Let's simply ask the cell for that information:
+      cell->get_dof_indices (local_dof_indices);
+
+      // Then again loop over all shape functions i and j and transfer the
+      // local elements to the global matrix. The global numbers can be
+      // obtained using local_dof_indices[i]:
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        for (unsigned int j=0; j<dofs_per_cell; ++j)
+          system_matrix.add (local_dof_indices[i],
+                             local_dof_indices[j],
+                             cell_matrix(i,j));
+
+      // And again, we do the same thing for the right hand side vector.
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        system_rhs(local_dof_indices[i]) += cell_rhs(i);
+    }
+
+
+  // Now almost everything is set up for the solution of the discrete
+  // system. However, we have not yet taken care of boundary values (in fact,
+  // Laplace's equation without Dirichlet boundary values is not even uniquely
+  // solvable, since you can add an arbitrary constant to the discrete
+  // solution). We therefore have to do something about the situation.
+  //
+  // For this, we first obtain a list of the degrees of freedom on the
+  // boundary and the value the shape function shall have there. For
+  // simplicity, we only interpolate the boundary value function, rather than
+  // projecting it onto the boundary. There is a function in the library which
+  // does exactly this: VectorTools::interpolate_boundary_values(). Its
+  // parameters are (omitting parameters for which default values exist and
+  // that we don't care about): the DoFHandler object to get the global
+  // numbers of the degrees of freedom on the boundary; the component of the
+  // boundary where the boundary values shall be interpolated; the boundary
+  // value function itself; and the output object.
+  //
+  // The component of the boundary is meant as follows: in many cases, you may
+  // want to impose certain boundary values only on parts of the boundary. For
+  // example, you may have inflow and outflow boundaries in fluid dynamics, or
+  // clamped and free parts of bodies in deformation computations of
+  // bodies. Then you will want to denote these different parts of the
+  // boundary by different numbers and tell the interpolate_boundary_values
+  // function to only compute the boundary values on a certain part of the
+  // boundary (e.g. the clamped part, or the inflow boundary). By default, all
+  // boundaries have the number `0', and since we have not changed that, this
+  // is still so; therefore, if we give `0' as the desired portion of the
+  // boundary, this means we get the whole boundary. If you have boundaries
+  // with kinds of boundaries, you have to number them differently. The
+  // function call below will then only determine boundary values for parts of
+  // the boundary.
+  //
+  // The function describing the boundary values is an object of type Function
+  // or of a derived class. One of the derived classes is ZeroFunction, which
+  // describes (not unexpectedly) a function which is zero everywhere. We
+  // create such an object in-place and pass it to the
+  // VectorTools::interpolate_boundary_values() function.
+  //
+  // Finally, the output object is a list of pairs of global degree of freedom
+  // numbers (i.e. the number of the degrees of freedom on the boundary) and
+  // their boundary values (which are zero here for all entries). This mapping
+  // of DoF numbers to boundary values is done by the <code>std::map</code>
+  // class.
+  std::map<types::global_dof_index,double> boundary_values;
+  VectorTools::interpolate_boundary_values (dof_handler,
+                                            0,
+                                            ZeroFunction<2>(),
+                                            boundary_values);
+  // Now that we got the list of boundary DoFs and their respective boundary
+  // values, let's use them to modify the system of equations
+  // accordingly. This is done by the following function call:
+  MatrixTools::apply_boundary_values (boundary_values,
+                                      system_matrix,
+                                      solution,
+                                      system_rhs);
+}
+
+
+// @sect4{Step3::solve}
+
+// The following function simply solves the discretized equation. As the
+// system is quite a large one for direct solvers such as Gauss elimination or
+// LU decomposition, we use a Conjugate Gradient algorithm. You should
+// remember that the number of variables here (only 1089) is a very small
+// number for finite element computations, where 100.000 is a more usual
+// number.  For this number of variables, direct methods are no longer usable
+// and you are forced to use methods like CG.
+void Step3::solve ()
+{
+  // First, we need to have an object that knows how to tell the CG algorithm
+  // when to stop. This is done by using a SolverControl object, and as
+  // stopping criterion we say: stop after a maximum of 1000 iterations (which
+  // is far more than is needed for 1089 variables; see the results section to
+  // find out how many were really used), and stop if the norm of the residual
+  // is below $10^{-12}$. In practice, the latter criterion will be the one
+  // which stops the iteration:
+  SolverControl           solver_control (1000, 1e-12);
+  // Then we need the solver itself. The template parameter to the SolverCG
+  // class is the type of the vectors, but the empty angle brackets indicate
+  // that we simply take the default argument (which is
+  // <code>Vector@<double@></code>):
+  SolverCG<>              solver (solver_control);
+
+  // Now solve the system of equations. The CG solver takes a preconditioner
+  // as its fourth argument. We don't feel ready to delve into this yet, so we
+  // tell it to use the identity operation as preconditioner:
+  solver.solve (system_matrix, solution, system_rhs,
+                PreconditionIdentity());
+  // Now that the solver has done its job, the solution variable contains the
+  // nodal values of the solution function.
+}
+
+
+// @sect4{Step3::output_results}
+
+// The last part of a typical finite element program is to output the results
+// and maybe do some postprocessing (for example compute the maximal stress
+// values at the boundary, or the average flux across the outflow, etc). We
+// have no such postprocessing here, but we would like to write the solution
+// to a file.
+void Step3::output_results () const
+{
+  // To write the output to a file, we need an object which knows about output
+  // formats and the like. This is the DataOut class, and we need an object of
+  // that type:
+  DataOut<2> data_out;
+  // Now we have to tell it where to take the values from which it shall
+  // write. We tell it which DoFHandler object to use, and the solution vector
+  // (and the name by which the solution variable shall appear in the output
+  // file). If we had more than one vector which we would like to look at in
+  // the output (for example right hand sides, errors per cell, etc) we would
+  // add them as well:
+  data_out.attach_dof_handler (dof_handler);
+  data_out.add_data_vector (solution, "solution");
+  // After the DataOut object knows which data it is to work on, we have to
+  // tell it to process them into something the back ends can handle. The
+  // reason is that we have separated the frontend (which knows about how to
+  // treat DoFHandler objects and data vectors) from the back end (which knows
+  // many different output formats) and use an intermediate data format to
+  // transfer data from the front- to the backend. The data is transformed
+  // into this intermediate format by the following function:
+  data_out.build_patches ();
+
+  // Now we have everything in place for the actual output. Just open a file
+  // and write the data into it, using GNUPLOT format (there are other
+  // functions which write their data in postscript, AVS, GMV, or some other
+  // format):
+  std::ofstream output ("solution.gpl");
+  data_out.write_gnuplot (output);
+}
+
+
+// @sect4{Step3::run}
+
+// Finally, the last function of this class is the main function which calls
+// all the other functions of the <code>Step3</code> class. The order in which
+// this is done resembles the order in which most finite element programs
+// work. Since the names are mostly self-explanatory, there is not much to
+// comment about:
+void Step3::run ()
+{
+  make_grid ();
+  setup_system ();
+  assemble_system ();
+  solve ();
+  output_results ();
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// This is the main function of the program. Since the concept of a main
+// function is mostly a remnant from the pre-object era in C/C++ programming,
+// it often does not much more than creating an object of the top-level class
+// and calling its principle function.
+//
+// Finally, the first line of the function is used to enable output of the
+// deal.II logstream to the screen.  The deallog (which stands for deal-log,
+// not de-allog) variable represents a stream to which some parts of the
+// library write output. For example, iterative solvers will generate
+// diagnostics (starting residual, number of solver steps, final residual) as
+// can be seen when running this tutorial program.
+//
+// The output of deallog can be redirected to the console, to a file, or
+// both. But both are disabled by default. The output is nested in a way so
+// that each function can use a prefix string (separated by colons) for each
+// line of output; if it calls another function, that may also use its prefix
+// which is then printed after the one of the calling function. By running
+// this example (or looking at the "Results" section), you will see the solver
+// statistics prefixed with "DEAL:CG", which is two prefixes.  Since output
+// from functions which are nested deep below is usually not as important as
+// top-level output, you can give the deallog variable a maximal depth of
+// nested output for output to console and file. A depth of 0 (the default)
+// will disable output to the screen, while a value of 2 or higher will cause
+// the solver info in this example to be printed. Imagine that different
+// solvers can be nested, which we will see in step-22 for example, and you
+// might not want to see all this information.
+int main ()
+{
+  deallog.depth_console (2);
+
+  Step3 laplace_problem;
+  laplace_problem.run ();
+
+  return 0;
+}
diff --git a/examples/step-30/CMakeLists.txt b/examples/step-30/CMakeLists.txt
new file mode 100644
index 0000000..69c015a
--- /dev/null
+++ b/examples/step-30/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-30 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-30")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-30/doc/builds-on b/examples/step-30/doc/builds-on
new file mode 100644
index 0000000..20c3064
--- /dev/null
+++ b/examples/step-30/doc/builds-on
@@ -0,0 +1 @@
+step-12
diff --git a/examples/step-30/doc/intro.dox b/examples/step-30/doc/intro.dox
new file mode 100644
index 0000000..3e21d47
--- /dev/null
+++ b/examples/step-30/doc/intro.dox
@@ -0,0 +1,490 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+<h3>Overview</h3>
+
+This example is devoted to <em>anisotropic refinement</em>, which extends to
+possibilities of local refinement. In most parts, this is a modification of the
+step-12 tutorial program, we use the same DG method for a linear transport
+equation. This program will cover the following topics:
+<ol>
+  <li> <em>Anisotropic refinement</em>: What is the meaning of anisotropic refinement?
+  <li> <em>Implementation</em>: Necessary modifications of code to work with anisotropically refined meshes.
+  <li> <em>Jump indicator</em>: A simple indicator for anisotropic refinement in
+  the context of DG methods.
+</ol>
+The discretization itself will not be discussed, and neither will
+implementation techniques not specific to anisotropic refinement used
+here. Please refer to step-12 for this.
+
+Please note, at the moment of writing this tutorial program, anisotropic
+refinement is only fully implemented for discontinuous Galerkin Finite
+Elements. This may later change (or may already have).
+
+
+ at note While this program is a modification of step-12, it is an adaptation of
+a version of step-12 written early on in the history of deal.II when the
+MeshWorker framework wasn't available yet. Consequently, it bears little
+resemblance to the step-12 as it exists now, apart from the fact that it
+solves the same equation with the same discretization.
+
+
+
+<h3>Anisotropic refinement</h3>
+
+All the adaptive processes in the preceding tutorial programs were based on
+<em>isotropic</em> refinement of cells, which cuts all edges in half and forms
+new cells of these split edges (plus some additional edges, faces and vertices,
+of course). In deal.II, <em>anisotropic refinement</em> refers to the process of
+splitting only part of the edges while leaving the others unchanged. Consider a
+simple square cell, for example:
+ at code
+  *-------*
+  |       |
+  |       |
+  |       |
+  *-------*
+ at endcode
+After the usual refinement it will consist of four children and look like this:
+ at code
+  *---*---*
+  |   |   |
+  *---*---*     RefinementCase<2>::cut_xy
+  |   |   |
+  *---*---*
+ at endcode
+The new anisotropic refinement may take two forms: either we can split the edges
+which are parallel to the horizontal x-axis, resulting in these two child cells:
+ at code
+  *---*---*
+  |   |   |
+  |   |   |     RefinementCase<2>::cut_x
+  |   |   |
+  *---*---*
+ at endcode
+or we can split the two edges which run along the y-axis, resulting again in two
+children, which look that way, however:
+ at code
+  *-------*
+  |       |
+  *-------*     RefinementCase<2>::cut_y
+  |       |
+  *-------*
+ at endcode
+All refinement cases of cells are described by an enumeration
+RefinementPossibilities::Possibilities, and the above anisotropic
+cases are called @p cut_x and @p cut_y for obvious reasons. The
+isotropic refinement case is called @p cut_xy in 2D and can be
+requested from the GeometryInfo class via
+GeometryInfo<2>::isotropic_refinement.
+
+In 3D, there is a third axis which can be split, the z-axis, and thus we
+have an additional refinement case @p cut_z here. Isotropic refinement will now
+refine a cell along the x-, y- and z-axes and thus be referred to as @p
+cut_xyz. Additional cases @p cut_xy, @p cut_xz and @p cut_yz exist, which refine
+a cell along two of the axes, but not along the third one. Given a hex cell with
+x-axis running to the right, y-axis 'into the page' and z-axis to the top,
+ at code
+      *-----------*
+     /           /|
+    /           / |
+   /           /  |
+  *-----------*   |
+  |           |   |
+  |           |   *
+  |           |  /
+  |           | /
+  |           |/
+  *-----------*
+ at endcode
+we have the isotropic refinement case,
+ at code
+      *-----*-----*
+     /     /     /|
+    *-----*-----* |
+   /     /     /| *
+  *-----*-----* |/|
+  |     |     | * |
+  |     |     |/| *
+  *-----*-----* |/
+  |     |     | *
+  |     |     |/
+  *-----*-----*
+
+     cut_xyz
+ at endcode
+three anisotropic cases which refine only one axis:
+ at code
+      *-----*-----*           *-----------*           *-----------*
+     /     /     /|          /           /|          /           /|
+    /     /     / |         *-----------* |         /           / |
+   /     /     /  |        /           /| |        /           /  *
+  *-----*-----*   |       *-----------* | |       *-----------*  /|
+  |     |     |   |       |           | | |       |           | / |
+  |     |     |   *       |           | | *       |           |/  *
+  |     |     |  /        |           | |/        *-----------*  /
+  |     |     | /         |           | *         |           | /
+  |     |     |/          |           |/          |           |/
+  *-----*-----*           *-----------*           *-----------*
+
+     cut_x                   cut_y                   cut_z
+ at endcode
+and three cases which refine two of the three axes:
+ at code
+      *-----*-----*           *-----*-----*           *-----------*
+     /     /     /|          /     /     /|          /           /|
+    *-----*-----* |         /     /     / |         *-----------* |
+   /     /     /| |        /     /     /  *        /           /| *
+  *-----*-----* | |       *-----*-----*  /|       *-----------* |/|
+  |     |     | | |       |     |     | / |       |           | * |
+  |     |     | | *       |     |     |/  *       |           |/| *
+  |     |     | |/        *-----*-----*  /        *-----------* |/
+  |     |     | *         |     |     | /         |           | *
+  |     |     |/          |     |     |/          |           |/
+  *-----*-----*           *-----*-----*           *-----------*
+
+     cut_xy                  cut_xz                  cut_yz
+ at endcode
+For 1D problems, anisotropic refinement can make no difference, as there is only
+one coordinate direction for a cell, so it is not possible to split it
+in any other way than isotropically.
+
+<h4>Motivation</h4>
+Adaptive local refinement is used to obtain fine meshes which are well adapted
+to solving the problem at hand efficiently. In short, the size of cells which
+produce a large error is reduced to obtain a better approximation of the
+solution to the problem at hand. However, a lot of problems contain anisotropic
+features. Prominent examples are shocks or boundary layers in compressible
+viscous flows. An efficient mesh approximates these features with cells of higher aspect ratio
+which are oriented according to the mentioned features. Using only isotropic
+refinement, the aspect ratios of the original mesh cells are preserved, as they
+are inherited by the children of a cell. Thus, starting from an isotropic mesh, a
+boundary layer will be refined in order to catch the rapid variation of the flow
+field in the wall normal direction, thus leading to cells with very small edge
+lengths both in normal and tangential direction. Usually, much higher edge
+lengths in tangential direction and thus significantly less cells could be used
+without a significant loss in approximation accuracy. An anisotropic
+refinement process can modify the aspect ratio from mother to child cells by a
+factor of two for each refinement step. In the course of several refinements,
+the aspect ratio of the fine cells can be optimized, saving a considerable
+number of cells and correspondingly degrees of freedom and thus computational
+resources, memory as well as CPU time.
+
+<h3>Implementation</h3>
+
+Most of the time, when we do finite element computations, we only consider one
+cell at a time, for example to calculate cell contributions to the global
+matrix, or to interpolate boundary values. However, sometimes we have to look
+at how cells are related in our algorithms. Relationships between cells come
+in two forms: neighborship and mother-child relationship. For the case of
+isotropic refinement, deal.II uses certain conventions (invariants) for cell
+relationships that are always maintained. For example, a refined cell always
+has exactly $2^{dim}$ children. And (except for the 1d case), two neighboring
+cells may differ by at most one refinement level: they are equally often
+refined or one of them is exactly once more refined, leaving exactly one
+hanging node on the common face. Almost all of the time these invariants are
+only of concern in the internal implementation of the library. However, there
+are cases where knowledge of them is also relevant to an application program.
+
+In the current context, it is worth noting that the kind of mesh refinement
+affects some of the most fundamental assumptions. Consequently, some of the
+usual code found in application programs will need modifications to exploit
+the features of meshes which were created using anisotropic
+refinement. For those interested in how deal.II evolved, it may be of
+interest that the loosening of such invariants required some
+incompatible changes. For example, the library used to have a member
+GeometryInfo<dim>::children_per_cell that specified how many children
+a cell has once it is refined. For isotropic refinement, this number
+is equal to $2^{dim}$, as mentioned above. However, for anisotropic refinement, this number
+does not exist, as is can be either two or four in 2D and two, four or eight in
+3D, and the member GeometryInfo<dim>::children_per_cell has
+consequently been removed. It has now been replaced by
+GeometryInfo<dim>::max_children_per_cell which specifies the
+<i>maximum</i> number of children a cell can have. How many children a
+refined cell has was previously available as static information, but
+now it depends on the actual refinement state of a cell and can be
+retrieved using the function call <code>cell-@>n_children()</code>,
+a call that works equally well for both isotropic and anisotropic
+refinement. A very similar situation can be found for
+faces and their subfaces: the previously available variable
+GeometryInfo<dim>::subfaces_per_face no
+longer exists; the pertinent information can now be queried using
+GeometryInfo<dim>::max_children_per_face or <code>face->n_children()</code>,
+depending on the context.
+
+Another important aspect, and the most important one in this tutorial, is
+the treatment of neighbor-relations when assembling jump terms on the
+faces between cells. Looking at the documentation of the
+assemble_system functions in step-12 we notice, that we need to decide if a
+neighboring cell is coarser, finer or on the same (refinement) level as our
+current cell. These decisions do not work in the same way for anisotropic
+refinement as the information given by the <em>level</em> of a cell is not
+enough to completely characterize anisotropic cells; for example, are
+the terminal children of a two-dimensional
+cell that is first cut in $x$-direction and whose children are then
+cut in $y$-direction on level 2, or are they on level 1 as they would
+be if the cell would have been refined once isotropically, resulting
+in the same set of finest cells?
+
+After anisotropic refinement, a coarser neighbor is not necessarily
+exactly one level below ours, but can pretty much have any level
+relative to the current one; in fact, it can even be on a higher
+level even though it is coarser. Thus the decisions
+have to be made on a different basis, whereas the intention of the
+decisions stays the same.
+
+In the following, we will discuss the cases that can happen when we
+want to compute contributions to the matrix (or right hand side) of
+the form
+ at f[
+  \int_{\partial K} \varphi_i(x) \varphi_j(x) \; dx
+ at f]
+or similar; remember that we integrate terms like this using the
+FEFaceValues and FESubfaceValues classes. We will also show how to
+write code that works for both isotropic and anisotropic refinement:
+
+<ul>
+  <li> <em>Finer neighbor</em>: If we are on an active cell and want
+  to integrate over a face $f\subset \partial K$, the first
+  possibility is that the neighbor behind this face is more refined,
+  i.e. has children occupying only part of the
+  common face. In this case, the face
+  under consideration has to be a refined one, which can determine by
+  asking <code>if(face->has_children())</code>. If this is true, we need to
+  loop over
+  all subfaces and get the neighbors' child behind this subface, so that we can
+  reinit an FEFaceValues object with the neighbor and an FESubfaceValues object
+  with our cell and the respective subface.
+
+  For isotropic refinement, this kind is reasonably simple because we
+  know that an invariant of the isotropically refined adaptive meshes
+  in deal.II is that neighbors can only differ by exactly one
+  refinement level. However, this isn't quite true any more for
+  anisotropically refined meshes, in particular in 3d; there,
+  the active cell we are interested on the other side of $f$ might not
+  actually be a child of our
+  neighbor, but perhaps a grandchild or even a farther offspring. Fortunately,
+  this complexity is hidden in the internals of the library. All we need to do
+  is call the <code>cell->neighbor_child_on_subface(face_no, subface_no)</code>
+  function. Still, in 3D there are two cases which need special consideration:
+  <ul>
+    <li> If the neighbor is refined more than once anisotropically, it might be
+  that here are not two or four but actually three subfaces to
+  consider. Imagine
+  the following refinement process of the (two-dimensional) face of
+  the (three-dimensional) neighbor cell we are considering: first the
+  face is refined along x, later on only the left subface is refined along y.
+ at code
+   *-------*        *---*---*        *---*---*
+   |       |        |   |   |        |   |   |
+   |       |  --->  |   |   |  --->  *---*   |
+   |       |        |   |   |        |   |   |
+   *-------*        *---*---*        *---*---*
+ at endcode
+     Here the number of subfaces is three. It is important to note the subtle
+  differences between <code>face->n_children()</code> and
+  <code>face->number_of_children()</code>. The first function returns the number of
+  immediate children, which would be two for the above example, whereas the
+  second returns the number of active offsprings, which is the correct three in
+  the example above. Using <code>face->number_of_children()</code> works for
+  isotropic and anisotropic as well as 2D and 3D cases, so it should always be
+  used. It should be noted that if any of the cells behind the two
+  small subfaces on the left side of the rightmost image is further
+  refined, then the current cell (i.e. the side from which we are
+  viewing this common face) is going to be refined as well: this is so
+  because otherwise the invariant of having only one hanging node per
+  edge would be violated.
+
+    <li> It might be, that the neighbor is coarser, but still has children which
+  are finer than our current cell. This situation can occur if two equally
+  coarse cells are refined, where one of the cells has two children at the face
+  under consideration and the other one four. The cells in the next graphic are
+  only separated from each other to show the individual refinement cases.
+ at code
+      *-----------*     *-----------*
+     /           /|    /           /|
+    ############# |   +++++++++++++ |
+   #           ## |  +           ++ *
+  ############# # | +++++++++++++ +/|
+  #           # # | +           + + |
+  #           # # * +           +++ *
+  #           # #/  +++++++++++++ +/
+  #           # #   +           + +
+  #           ##    +           ++
+  #############     +++++++++++++
+ at endcode
+
+  Here, the left two cells resulted from an anisotropic bisection of
+  the mother cell in $y$-direction, whereas the right four cells
+  resulted from a simultaneous anisotropic refinement in both the $y$-
+  and $z$-directions.
+  The left cell marked with # has two finer neighbors marked with +, but the
+  actual neighbor of the left cell is the complete right mother cell, as the
+  two cells marked with + are finer and their direct mother is the one
+  large cell.
+  </ul>
+
+  However, it is comfortable to know, that
+  <code>cell->neighbor_child_on_subface(face_no,subface_no)</code> takes care of
+  these situations by itself, if you loop over the correct number of subfaces,
+  in the above example this is two. The FESubfaceValues<dim>::reinit function
+  takes care of this too, so that the resulting state is always correct. There
+  is one little aspect, however: For reiniting the neighbors FEFaceValues object
+  you need to know the index of the face that points toward the current
+  cell. Usually you assume that the neighbor you get directly is as coarse or as
+  fine as you, if it has children, thus this information can be obtained by the
+  <code>cell->neighbor_of_neighbor(face_no)</code> function. If the neighbor is
+  coarser, however, you would have to use
+  <code>cell->neighbor_of_coarser_neighbor(face_no).first</code> instead. In order
+  to make this easy for you, there is the new
+  <code>cell->neighbor_face_no(face_no)</code> function which does the correct thing
+  for you and returns the desired result.
+
+  <li> <em>Neighbor is as fine as our cell</em>: After we ruled out all cases in
+  which there are finer children, we only need to decide, whether the neighbor
+  is coarser here. For this, there is the
+  <code>cell->coarser_neighbor(face_no)</code> function returning a bool value. In
+  order to get the relevant case of a neighbor of the same coarseness we would
+  use <code>else if (!cell->coarser_neighbor(face_no))</code>. The code inside this
+  block can be left untouched. However, there is one thing to mention here: If
+  we want to use a rule, which cell should assemble certain terms on a given
+  face we might think of the rule presented in step-12. We know that we have to
+  leave out the part about comparing our cell's level with that of the neighbor
+  and replace it with the test for a coarser neighbor presented above. However,
+  we also have to consider the possibility that neighboring cells of same
+  coarseness have the same index (on different levels). Thus we have to include
+  the case where the cells have the same index, and give an additional
+  condition, which of the cells should assemble the terms, e.g. we can choose
+  the cell with lower level. The details of this concept can be seen in the
+  implementation below.
+
+  <li> <em>Coarser neighbor</em>: The remaining case is obvious: If there are no
+  refined neighbors and the neighbor is not as fine as the current cell, then it needs
+  to be coarser. Thus we can leave the old condition phrase, simply using
+  <code>else</code>. The <code>cell->neighbor_of_coarser_neighbor(face_no)</code>
+  function takes care of all the complexity of anisotropic refinement combined
+  with possible non standard face orientation, flip and rotation on general 3D meshes.
+
+</ul>
+
+<h4>Mesh smoothing</h4>
+When a triangulation is refined, cells which were not flagged for refinement may
+be refined nonetheless. This is due to additional smoothing algorithms which are
+either necessary or requested explicitly. In particular, the restriction that there
+be at most one hanging node on each edge frequently forces the refinement of additional
+cells neighboring ones that are already finer and are flagged for
+further refinement.
+
+However, deal.II also implements a number of algorithms that make sure
+that resulting meshes are smoother than just the bare minimum, for
+example ensuring that there are no isolated refined cells surrounded
+by non-refined ones, since the additional degrees of freedom on these
+islands would almost all be constrained by hanging node
+constraints. (See the documentation of the Triangulation class and its
+Triangulation::MeshSmoothing member for more information on mesh
+smoothing.)
+
+Most of the smoothing algorithms that were originally developed for
+the isotropic case have been adapted to work in a very similar
+way for both anisotropic and isotropic refinement. There are two
+algorithms worth mentioning, however:
+<ol>
+  <li> <code>MeshSmoothing::limit_level_difference_at_vertices</code>: In an isotropic environment,
+  this algorithm tries to ensure a good approximation quality by reducing the
+  difference in refinement level of cells meeting at a common vertex. However,
+  there is no clear corresponding concept for anisotropic refinement, thus this
+  algorithm may not be used in combination with anisotropic refinement. This
+  restriction is enforced by an assertion which throws an error as soon as the
+  algorithm is called on a triangulation which has been refined anisotropically.
+
+  <li> <code>MeshSmoothing::allow_anisotropic_smoothing</code>: If refinement is introduced to
+  limit the number of hanging nodes, the additional cells are often not needed
+  to improve the approximation quality. This is especially true for DG
+  methods. If you set the flag <code>allow_anisotropic_smoothing</code> the
+  smoothing algorithm tries to minimize the number of probably unneeded
+  additional cells by using anisotropic refinement for the smoothing. If you set
+  this smoothing flag you might get anisotropically refined cells, even if you
+  never set a single refinement flag to anisotropic refinement. Be aware that
+  you should only use this flag, if your code respects the possibility of
+  anisotropic meshes. Combined with a suitable anisotropic indicator this flag
+  can help save additional cells and thus effort.
+</ol>
+
+
+<h3>Jump indicator</h3>
+
+Using the benefits of anisotropic refinement requires an indicator to catch
+anisotropic features of the solution and exploit them for the refinement
+process. Generally the anisotropic refinement process will consist of several
+steps:
+<ol>
+  <li> Calculate an error indicator.
+  <li> Use the error indicator to flag cells for refinement, e.g. using a fixed
+  number or fraction of cells. Those cells will be flagged for isotropic
+  refinement automatically.
+  <li> Evaluate a distinct anisotropic indicator only on the flagged cells.
+  <li> Use the anisotropic indicator to set a new, anisotropic refinement flag
+  for cells where this is appropriate, leave the flags unchanged otherwise.
+  <li> Call Triangulation<dim>::execute_coarsening_and_refinement to perform the
+  requested refinement, using the requested isotropic and anisotropic flags.
+</ol>
+This approach is similar to the one we have used in step-27
+for hp refinement and
+has the great advantage of flexibility: Any error indicator can be
+used in the anisotropic process, i.e. if you have quite involved a posteriori
+goal-oriented error indicators available you can use them as easily as a simple
+Kelly error estimator. The anisotropic part of the refinement process is not
+influenced by this choice. Furthermore, simply leaving out the third and forth
+steps leads to the same isotropic refinement you used to get before any
+anisotropic changes in deal.II or your application program.
+As a last advantage, working only
+on cells flagged for refinement results in a faster evaluation of the
+anisotropic indicator, which can become noticeable on finer meshes with a lot of
+cells if the indicator is quite involved.
+
+Here, we use a very simple approach which is only applicable to DG
+methods. The general idea is quite simple: DG methods allow the discrete
+solution to jump over the faces of a cell, whereas it is smooth within each
+cell. Of course, in the limit we expect that the jumps tend to zero as
+we refine the mesh and approximate the true solution better and better.
+Thus, a large jump
+across a given face indicates that the cell should be refined (at least)
+orthogonal to that face, whereas a small jump does not lead to this
+conclusion. It is possible, of course, that the exact solution is not smooth and
+that it also features a jump. In that case, however, a large jump over one face
+indicates, that this face is more or less parallel to the jump and in the
+vicinity of it, thus again we would expect a refinement orthogonal to the face
+under consideration to be effective.
+
+The proposed indicator calculates the average jump $K_j$, i.e. the mean value of
+the absolute jump $|[u]|$ of the discrete solution $u$ over the two faces
+$f_i^j$, $i=1,2$, $j=1..d$ orthogonal to coordinate direction $j$ on the unit
+cell.
+ at f[
+K_j = \frac{\sum_{i=1}^2 \int_{f_i^j}|[u]| dx}{\sum_{i=1}^2 |f_i^j|} .
+ at f]
+If the average jump in one direction is larger than the average of the
+jumps in the other directions by a
+certain factor $\kappa$, i.e. if
+$K_i > \kappa \frac 1{d-1} \sum_{j=1, j\neq i}^d K_j$, the cell is refined only along that particular
+direction $i$, otherwise the cell is refined isotropically.
+
+Such a criterion is easily generalized to systems of equations: the
+absolute value of the jump would be replaced by an appropriate norm of
+the vector-valued jump.
+
+
+
+<h3>The problem</h3>
+
+We solve the linear transport equation presented in step-12. The domain is
+extended to cover $[-1,1]\times[0,1]$ in 2D, where the flow field $\beta$ describes a
+counterclockwise quarter circle around the origin in the right half of the
+domain and is parallel to the x-axis in the left part of the domain. The inflow
+boundary is again located at $x=1$ and along the positive part of the x-axis,
+and the boundary conditions are chosen as in step-12. Compared to step-12 we
+only use the more effective second assembling technique. In order to make
+comparisons more effective, we decided to keep function names like @p
+assemble_system2 even if there is only one of these routines in this tutorial
+program.
diff --git a/examples/step-30/doc/kind b/examples/step-30/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-30/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-30/doc/results.dox b/examples/step-30/doc/results.dox
new file mode 100644
index 0000000..a98bb18
--- /dev/null
+++ b/examples/step-30/doc/results.dox
@@ -0,0 +1,151 @@
+<h1>Results</h1>
+
+
+The output of this program consist of the console output, the eps
+files containing the grids, and the grids and solutions given in gnuplot format.
+ at code
+Performing a 2D run with isotropic refinement...
+------------------------------------------------
+Cycle 0:
+   Number of active cells:       128
+   Number of degrees of freedom: 512
+Time of assemble_system2: 0.040003
+Writing grid to <grid-0.iso.eps>...
+Writing grid to <grid-0.iso.gnuplot>...
+Writing solution to <sol-0.iso.gnuplot>...
+Cycle 1:
+   Number of active cells:       239
+   Number of degrees of freedom: 956
+Time of assemble_system2: 0.072005
+Writing grid to <grid-1.iso.eps>...
+Writing grid to <grid-1.iso.gnuplot>...
+Writing solution to <sol-1.iso.gnuplot>...
+Cycle 2:
+   Number of active cells:       491
+   Number of degrees of freedom: 1964
+Time of assemble_system2: 0.144009
+Writing grid to <grid-2.iso.eps>...
+Writing grid to <grid-2.iso.gnuplot>...
+Writing solution to <sol-2.iso.gnuplot>...
+Cycle 3:
+   Number of active cells:       1031
+   Number of degrees of freedom: 4124
+Time of assemble_system2: 0.296019
+Writing grid to <grid-3.iso.eps>...
+Writing grid to <grid-3.iso.gnuplot>...
+Writing solution to <sol-3.iso.gnuplot>...
+Cycle 4:
+   Number of active cells:       2027
+   Number of degrees of freedom: 8108
+Time of assemble_system2: 0.576036
+Writing grid to <grid-4.iso.eps>...
+Writing grid to <grid-4.iso.gnuplot>...
+Writing solution to <sol-4.iso.gnuplot>...
+Cycle 5:
+   Number of active cells:       4019
+   Number of degrees of freedom: 16076
+Time of assemble_system2: 1.13607
+Writing grid to <grid-5.iso.eps>...
+Writing grid to <grid-5.iso.gnuplot>...
+Writing solution to <sol-5.iso.gnuplot>...
+
+Performing a 2D run with anisotropic refinement...
+--------------------------------------------------
+Cycle 0:
+   Number of active cells:       128
+   Number of degrees of freedom: 512
+Time of assemble_system2: 0.040003
+Writing grid to <grid-0.aniso.eps>...
+Writing grid to <grid-0.aniso.gnuplot>...
+Writing solution to <sol-0.aniso.gnuplot>...
+Cycle 1:
+   Number of active cells:       171
+   Number of degrees of freedom: 684
+Time of assemble_system2: 0.048003
+Writing grid to <grid-1.aniso.eps>...
+Writing grid to <grid-1.aniso.gnuplot>...
+Writing solution to <sol-1.aniso.gnuplot>...
+Cycle 2:
+   Number of active cells:       255
+   Number of degrees of freedom: 1020
+Time of assemble_system2: 0.072005
+Writing grid to <grid-2.aniso.eps>...
+Writing grid to <grid-2.aniso.gnuplot>...
+Writing solution to <sol-2.aniso.gnuplot>...
+Cycle 3:
+   Number of active cells:       397
+   Number of degrees of freedom: 1588
+Time of assemble_system2: 0.16401
+Writing grid to <grid-3.aniso.eps>...
+Writing grid to <grid-3.aniso.gnuplot>...
+Writing solution to <sol-3.aniso.gnuplot>...
+Cycle 4:
+   Number of active cells:       658
+   Number of degrees of freedom: 2632
+Time of assemble_system2: 0.192012
+Writing grid to <grid-4.aniso.eps>...
+Writing grid to <grid-4.aniso.gnuplot>...
+Writing solution to <sol-4.aniso.gnuplot>...
+Cycle 5:
+   Number of active cells:       1056
+   Number of degrees of freedom: 4224
+Time of assemble_system2: 0.304019
+Writing grid to <grid-5.aniso.eps>...
+Writing grid to <grid-5.aniso.gnuplot>...
+Writing solution to <sol-5.aniso.gnuplot>...
+ at endcode
+
+This text output shows the reduction in the number of cells which results from
+the successive application of anisotropic refinement. After the last refinement
+step the savings have accumulated so much, that almost four times as many cells
+and thus dofs are needed in the isotropic case. The time needed for assembly
+scales with a similar factor.
+
+Now we show the solutions on the mesh after one and after five adaptive
+refinement steps for both the isotropic (left) and anisotropic refinement
+algorithms (right).
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-30.sol-1.iso.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-30.sol-1.aniso.png" alt="">
+    </td>
+  </tr>
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-30.sol-5.iso.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-30.sol-5.aniso.png" alt="">
+    </td>
+  </tr>
+</table>
+
+We see, that the solution on the anisotropically refined mesh is very similar to
+the solution obtained on the isotropically refined mesh. Thus the anisotropic
+indicator seems to effectively select the appropriate cells for anisotropic
+refinement. This observation is strengthened by the plot of the an adapted
+anisotropic grid, e.g. the grid after three refinement steps.
+
+<img src="http://www.dealii.org/images/steps/developer/step-30.grid-3.aniso.png" alt="">
+
+In the whole left part of the domain refinement is only performed along the
+y-axis of cells. In the right part of the domain the refinement is dominated by
+isotropic refinement, as the anisotropic feature of the solution - the jump from
+one to zero - is not well aligned with the mesh. However, at the bottom and
+leftmost parts of the quarter circle this jumps becomes more and more aligned
+with the mesh and the refinement algorithm reacts by creating anisotropic cells
+of increasing aspect ratio.
+
+It might seem that the necessary alignment of anisotropic features and the
+coarse mesh can decrease performance significantly for real world
+problems. However, that is not always the case. Considering boundary layers in
+compressible viscous flows, for example, the mesh is always aligned with the
+anisotropic features, thus anisotropic refinement will almost always increase the
+efficiency of computations on adapted grids for these cases.
+
diff --git a/examples/step-30/doc/tooltip b/examples/step-30/doc/tooltip
new file mode 100644
index 0000000..08a75b4
--- /dev/null
+++ b/examples/step-30/doc/tooltip
@@ -0,0 +1 @@
+Anisotropic refinement for DG methods.
diff --git a/examples/step-30/step-30.cc b/examples/step-30/step-30.cc
new file mode 100644
index 0000000..e721418
--- /dev/null
+++ b/examples/step-30/step-30.cc
@@ -0,0 +1,1018 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2007 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Tobias Leicht, 2007
+ */
+
+
+// The deal.II include files have already been covered in previous examples
+// and will thus not be further commented on.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/lac/solver_richardson.h>
+#include <deal.II/lac/precondition_block.h>
+#include <deal.II/numerics/derivative_approximation.h>
+#include <deal.II/base/timer.h>
+
+// And this again is C++:
+#include <iostream>
+#include <fstream>
+
+// The last step is as in all previous programs:
+namespace Step30
+{
+  using namespace dealii;
+
+  // @sect3{Equation data}
+  //
+  // The classes describing equation data and the actual assembly of
+  // individual terms are almost entirely copied from step-12. We will comment
+  // on differences.
+  template <int dim>
+  class RHS:  public Function<dim>
+  {
+  public:
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double> &values,
+                             const unsigned int component=0) const;
+  };
+
+
+  template <int dim>
+  class BoundaryValues:  public Function<dim>
+  {
+  public:
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double> &values,
+                             const unsigned int component=0) const;
+  };
+
+
+  template <int dim>
+  class Beta
+  {
+  public:
+    Beta () {}
+    void value_list (const std::vector<Point<dim> > &points,
+                     std::vector<Point<dim> > &values) const;
+  };
+
+
+  template <int dim>
+  void RHS<dim>::value_list(const std::vector<Point<dim> > &points,
+                            std::vector<double> &values,
+                            const unsigned int) const
+  {
+    Assert(values.size()==points.size(),
+           ExcDimensionMismatch(values.size(),points.size()));
+
+    for (unsigned int i=0; i<values.size(); ++i)
+      values[i]=0;
+  }
+
+
+  // The flow field is chosen to be a quarter circle with counterclockwise
+  // flow direction and with the origin as midpoint for the right half of the
+  // domain with positive $x$ values, whereas the flow simply goes to the left
+  // in the left part of the domain at a velocity that matches the one coming
+  // in from the right. In the circular part the magnitude of the flow
+  // velocity is proportional to the distance from the origin. This is a
+  // difference to step-12, where the magnitude was 1 everywhere. the new
+  // definition leads to a linear variation of $\beta$ along each given face
+  // of a cell. On the other hand, the solution $u(x,y)$ is exactly the same
+  // as before.
+  template <int dim>
+  void Beta<dim>::value_list(const std::vector<Point<dim> > &points,
+                             std::vector<Point<dim> > &values) const
+  {
+    Assert(values.size()==points.size(),
+           ExcDimensionMismatch(values.size(),points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        if (points[i](0) > 0)
+          {
+            values[i](0) = -points[i](1);
+            values[i](1) = points[i](0);
+          }
+        else
+          {
+            values[i] = Point<dim>();
+            values[i](0) = -points[i](1);
+          }
+      }
+  }
+
+
+  template <int dim>
+  void BoundaryValues<dim>::value_list(const std::vector<Point<dim> > &points,
+                                       std::vector<double> &values,
+                                       const unsigned int) const
+  {
+    Assert(values.size()==points.size(),
+           ExcDimensionMismatch(values.size(),points.size()));
+
+    for (unsigned int i=0; i<values.size(); ++i)
+      {
+        if (points[i](0)<0.5)
+          values[i]=1.;
+        else
+          values[i]=0.;
+      }
+  }
+
+
+  // @sect3{Class: DGTransportEquation}
+  //
+  // This declaration of this class is utterly unaffected by our current
+  // changes.  The only substantial change is that we use only the second
+  // assembly scheme described in step-12.
+  template <int dim>
+  class DGTransportEquation
+  {
+  public:
+    DGTransportEquation();
+
+    void assemble_cell_term(const FEValues<dim> &fe_v,
+                            FullMatrix<double> &ui_vi_matrix,
+                            Vector<double> &cell_vector) const;
+
+    void assemble_boundary_term(const FEFaceValues<dim> &fe_v,
+                                FullMatrix<double> &ui_vi_matrix,
+                                Vector<double> &cell_vector) const;
+
+    void assemble_face_term2(const FEFaceValuesBase<dim> &fe_v,
+                             const FEFaceValuesBase<dim> &fe_v_neighbor,
+                             FullMatrix<double> &ui_vi_matrix,
+                             FullMatrix<double> &ue_vi_matrix,
+                             FullMatrix<double> &ui_ve_matrix,
+                             FullMatrix<double> &ue_ve_matrix) const;
+  private:
+    const Beta<dim> beta_function;
+    const RHS<dim> rhs_function;
+    const BoundaryValues<dim> boundary_function;
+  };
+
+
+  // Likewise, the constructor of the class as well as the functions
+  // assembling the terms corresponding to cell interiors and boundary faces
+  // are unchanged from before. The function that assembles face terms between
+  // cells also did not change because all it does is operate on two objects
+  // of type FEFaceValuesBase (which is the base class of both FEFaceValues
+  // and FESubfaceValues). Where these objects come from, i.e. how they are
+  // initialized, is of no concern to this function: it simply assumes that
+  // the quadrature points on faces or subfaces represented by the two objects
+  // correspond to the same points in physical space.
+  template <int dim>
+  DGTransportEquation<dim>::DGTransportEquation ()
+    :
+    beta_function (),
+    rhs_function (),
+    boundary_function ()
+  {}
+
+
+  template <int dim>
+  void DGTransportEquation<dim>::assemble_cell_term(
+    const FEValues<dim> &fe_v,
+    FullMatrix<double> &ui_vi_matrix,
+    Vector<double> &cell_vector) const
+  {
+    const std::vector<double> &JxW = fe_v.get_JxW_values ();
+
+    std::vector<Point<dim> > beta (fe_v.n_quadrature_points);
+    std::vector<double> rhs (fe_v.n_quadrature_points);
+
+    beta_function.value_list (fe_v.get_quadrature_points(), beta);
+    rhs_function.value_list (fe_v.get_quadrature_points(), rhs);
+
+    for (unsigned int point=0; point<fe_v.n_quadrature_points; ++point)
+      for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+        {
+          for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+            ui_vi_matrix(i,j) -= beta[point]*fe_v.shape_grad(i,point)*
+                                 fe_v.shape_value(j,point) *
+                                 JxW[point];
+
+          cell_vector(i) += rhs[point] * fe_v.shape_value(i,point) * JxW[point];
+        }
+  }
+
+
+  template <int dim>
+  void DGTransportEquation<dim>::assemble_boundary_term(
+    const FEFaceValues<dim> &fe_v,
+    FullMatrix<double> &ui_vi_matrix,
+    Vector<double> &cell_vector) const
+  {
+    const std::vector<double> &JxW = fe_v.get_JxW_values ();
+    const std::vector<Tensor<1,dim> > &normals = fe_v.get_all_normal_vectors ();
+
+    std::vector<Point<dim> > beta (fe_v.n_quadrature_points);
+    std::vector<double> g(fe_v.n_quadrature_points);
+
+    beta_function.value_list (fe_v.get_quadrature_points(), beta);
+    boundary_function.value_list (fe_v.get_quadrature_points(), g);
+
+    for (unsigned int point=0; point<fe_v.n_quadrature_points; ++point)
+      {
+        const double beta_n=beta[point] * normals[point];
+        if (beta_n>0)
+          for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+            for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+              ui_vi_matrix(i,j) += beta_n *
+                                   fe_v.shape_value(j,point) *
+                                   fe_v.shape_value(i,point) *
+                                   JxW[point];
+        else
+          for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+            cell_vector(i) -= beta_n *
+                              g[point] *
+                              fe_v.shape_value(i,point) *
+                              JxW[point];
+      }
+  }
+
+
+  template <int dim>
+  void DGTransportEquation<dim>::assemble_face_term2(
+    const FEFaceValuesBase<dim> &fe_v,
+    const FEFaceValuesBase<dim> &fe_v_neighbor,
+    FullMatrix<double> &ui_vi_matrix,
+    FullMatrix<double> &ue_vi_matrix,
+    FullMatrix<double> &ui_ve_matrix,
+    FullMatrix<double> &ue_ve_matrix) const
+  {
+    const std::vector<double> &JxW = fe_v.get_JxW_values ();
+    const std::vector<Tensor<1,dim> > &normals = fe_v.get_all_normal_vectors ();
+
+    std::vector<Point<dim> > beta (fe_v.n_quadrature_points);
+
+    beta_function.value_list (fe_v.get_quadrature_points(), beta);
+
+    for (unsigned int point=0; point<fe_v.n_quadrature_points; ++point)
+      {
+        const double beta_n=beta[point] * normals[point];
+        if (beta_n>0)
+          {
+            for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+              for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+                ui_vi_matrix(i,j) += beta_n *
+                                     fe_v.shape_value(j,point) *
+                                     fe_v.shape_value(i,point) *
+                                     JxW[point];
+
+            for (unsigned int k=0; k<fe_v_neighbor.dofs_per_cell; ++k)
+              for (unsigned int j=0; j<fe_v.dofs_per_cell; ++j)
+                ui_ve_matrix(k,j) -= beta_n *
+                                     fe_v.shape_value(j,point) *
+                                     fe_v_neighbor.shape_value(k,point) *
+                                     JxW[point];
+          }
+        else
+          {
+            for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+              for (unsigned int l=0; l<fe_v_neighbor.dofs_per_cell; ++l)
+                ue_vi_matrix(i,l) += beta_n *
+                                     fe_v_neighbor.shape_value(l,point) *
+                                     fe_v.shape_value(i,point) *
+                                     JxW[point];
+
+            for (unsigned int k=0; k<fe_v_neighbor.dofs_per_cell; ++k)
+              for (unsigned int l=0; l<fe_v_neighbor.dofs_per_cell; ++l)
+                ue_ve_matrix(k,l) -= beta_n *
+                                     fe_v_neighbor.shape_value(l,point) *
+                                     fe_v_neighbor.shape_value(k,point) *
+                                     JxW[point];
+          }
+      }
+  }
+
+
+  // @sect3{Class: DGMethod}
+  //
+  // Even the main class of this program stays more or less the same. We omit
+  // one of the assembly routines and use only the second, more effective one
+  // of the two presented in step-12. However, we introduce a new routine
+  // (set_anisotropic_flags) and modify another one (refine_grid).
+  template <int dim>
+  class DGMethod
+  {
+  public:
+    DGMethod (const bool anisotropic);
+    ~DGMethod ();
+
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system1 ();
+    void assemble_system2 ();
+    void solve (Vector<double> &solution);
+    void refine_grid ();
+    void set_anisotropic_flags ();
+    void output_results (const unsigned int cycle) const;
+
+    Triangulation<dim>   triangulation;
+    const MappingQ1<dim> mapping;
+    // Again we want to use DG elements of degree 1 (but this is only
+    // specified in the constructor). If you want to use a DG method of a
+    // different degree replace 1 in the constructor by the new degree.
+    const unsigned int   degree;
+    FE_DGQ<dim>          fe;
+    DoFHandler<dim>      dof_handler;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+    // This is new, the threshold value used in the evaluation of the
+    // anisotropic jump indicator explained in the introduction. Its value is
+    // set to 3.0 in the constructor, but it can easily be changed to a
+    // different value greater than 1.
+    const double anisotropic_threshold_ratio;
+    // This is a bool flag indicating whether anisotropic refinement shall be
+    // used or not. It is set by the constructor, which takes an argument of
+    // the same name.
+    const bool anisotropic;
+
+    const QGauss<dim>   quadrature;
+    const QGauss<dim-1> face_quadrature;
+
+    Vector<double>       solution2;
+    Vector<double>       right_hand_side;
+
+    const DGTransportEquation<dim> dg;
+  };
+
+
+  template <int dim>
+  DGMethod<dim>::DGMethod (const bool anisotropic)
+    :
+    mapping (),
+    // Change here for DG methods of different degrees.
+    degree(1),
+    fe (degree),
+    dof_handler (triangulation),
+    anisotropic_threshold_ratio(3.),
+    anisotropic(anisotropic),
+    // As beta is a linear function, we can choose the degree of the
+    // quadrature for which the resulting integration is correct. Thus, we
+    // choose to use <code>degree+1</code> Gauss points, which enables us to
+    // integrate exactly polynomials of degree <code>2*degree+1</code>, enough
+    // for all the integrals we will perform in this program.
+    quadrature (degree+1),
+    face_quadrature (degree+1),
+    dg ()
+  {}
+
+
+  template <int dim>
+  DGMethod<dim>::~DGMethod ()
+  {
+    dof_handler.clear ();
+  }
+
+
+  template <int dim>
+  void DGMethod<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (fe);
+    sparsity_pattern.reinit (dof_handler.n_dofs(),
+                             dof_handler.n_dofs(),
+                             (GeometryInfo<dim>::faces_per_cell
+                              *GeometryInfo<dim>::max_children_per_face+1)*fe.dofs_per_cell);
+
+    DoFTools::make_flux_sparsity_pattern (dof_handler, sparsity_pattern);
+
+    sparsity_pattern.compress();
+
+    system_matrix.reinit (sparsity_pattern);
+
+    solution2.reinit (dof_handler.n_dofs());
+    right_hand_side.reinit (dof_handler.n_dofs());
+  }
+
+
+  // @sect4{Function: assemble_system2}
+  //
+  // We proceed with the <code>assemble_system2</code> function that
+  // implements the DG discretization in its second version. This function is
+  // very similar to the <code>assemble_system2</code> function from step-12,
+  // even the four cases considered for the neighbor-relations of a cell are
+  // the same, namely a) cell is at the boundary, b) there are finer
+  // neighboring cells, c) the neighbor is neither coarser nor finer and d)
+  // the neighbor is coarser.  However, the way in which we decide upon which
+  // case we have are modified in the way described in the introduction.
+  template <int dim>
+  void DGMethod<dim>::assemble_system2 ()
+  {
+    const unsigned int dofs_per_cell = dof_handler.get_fe().dofs_per_cell;
+    std::vector<types::global_dof_index> dofs (dofs_per_cell);
+    std::vector<types::global_dof_index> dofs_neighbor (dofs_per_cell);
+
+    const UpdateFlags update_flags = update_values
+                                     | update_gradients
+                                     | update_quadrature_points
+                                     | update_JxW_values;
+
+    const UpdateFlags face_update_flags = update_values
+                                          | update_quadrature_points
+                                          | update_JxW_values
+                                          | update_normal_vectors;
+
+    const UpdateFlags neighbor_face_update_flags = update_values;
+
+    FEValues<dim> fe_v (
+      mapping, fe, quadrature, update_flags);
+    FEFaceValues<dim> fe_v_face (
+      mapping, fe, face_quadrature, face_update_flags);
+    FESubfaceValues<dim> fe_v_subface (
+      mapping, fe, face_quadrature, face_update_flags);
+    FEFaceValues<dim> fe_v_face_neighbor (
+      mapping, fe, face_quadrature, neighbor_face_update_flags);
+
+
+    FullMatrix<double> ui_vi_matrix (dofs_per_cell, dofs_per_cell);
+    FullMatrix<double> ue_vi_matrix (dofs_per_cell, dofs_per_cell);
+
+    FullMatrix<double> ui_ve_matrix (dofs_per_cell, dofs_per_cell);
+    FullMatrix<double> ue_ve_matrix (dofs_per_cell, dofs_per_cell);
+
+    Vector<double>  cell_vector (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        ui_vi_matrix = 0;
+        cell_vector = 0;
+
+        fe_v.reinit (cell);
+
+        dg.assemble_cell_term(fe_v,
+                              ui_vi_matrix,
+                              cell_vector);
+
+        cell->get_dof_indices (dofs);
+
+        for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+          {
+            typename DoFHandler<dim>::face_iterator face=
+              cell->face(face_no);
+
+            // Case a)
+            if (face->at_boundary())
+              {
+                fe_v_face.reinit (cell, face_no);
+
+                dg.assemble_boundary_term(fe_v_face,
+                                          ui_vi_matrix,
+                                          cell_vector);
+              }
+            else
+              {
+                Assert (cell->neighbor(face_no).state() == IteratorState::valid,
+                        ExcInternalError());
+                typename DoFHandler<dim>::cell_iterator neighbor=
+                  cell->neighbor(face_no);
+                // Case b), we decide that there are finer cells as neighbors
+                // by asking the face, whether it has children. if so, then
+                // there must also be finer cells which are children or
+                // farther offspring of our neighbor.
+                if (face->has_children())
+                  {
+                    // We need to know, which of the neighbors faces points in
+                    // the direction of our cell. Using the @p
+                    // neighbor_face_no function we get this information for
+                    // both coarser and non-coarser neighbors.
+                    const unsigned int neighbor2=
+                      cell->neighbor_face_no(face_no);
+
+                    // Now we loop over all subfaces, i.e. the children and
+                    // possibly grandchildren of the current face.
+                    for (unsigned int subface_no=0;
+                         subface_no<face->number_of_children(); ++subface_no)
+                      {
+                        // To get the cell behind the current subface we can
+                        // use the @p neighbor_child_on_subface function. it
+                        // takes care of all the complicated situations of
+                        // anisotropic refinement and non-standard faces.
+                        typename DoFHandler<dim>::cell_iterator neighbor_child
+                          = cell->neighbor_child_on_subface (face_no, subface_no);
+                        Assert (!neighbor_child->has_children(), ExcInternalError());
+
+                        // The remaining part of this case is unchanged.
+                        ue_vi_matrix = 0;
+                        ui_ve_matrix = 0;
+                        ue_ve_matrix = 0;
+
+                        fe_v_subface.reinit (cell, face_no, subface_no);
+                        fe_v_face_neighbor.reinit (neighbor_child, neighbor2);
+
+                        dg.assemble_face_term2(fe_v_subface,
+                                               fe_v_face_neighbor,
+                                               ui_vi_matrix,
+                                               ue_vi_matrix,
+                                               ui_ve_matrix,
+                                               ue_ve_matrix);
+
+                        neighbor_child->get_dof_indices (dofs_neighbor);
+
+                        for (unsigned int i=0; i<dofs_per_cell; ++i)
+                          for (unsigned int j=0; j<dofs_per_cell; ++j)
+                            {
+                              system_matrix.add(dofs[i], dofs_neighbor[j],
+                                                ue_vi_matrix(i,j));
+                              system_matrix.add(dofs_neighbor[i], dofs[j],
+                                                ui_ve_matrix(i,j));
+                              system_matrix.add(dofs_neighbor[i], dofs_neighbor[j],
+                                                ue_ve_matrix(i,j));
+                            }
+                      }
+                  }
+                else
+                  {
+                    // Case c). We simply ask, whether the neighbor is
+                    // coarser. If not, then it is neither coarser nor finer,
+                    // since any finer neighbor would have been treated above
+                    // with case b). Of all the cases with the same refinement
+                    // situation of our cell and the neighbor we want to treat
+                    // only one half, so that each face is considered only
+                    // once. Thus we have the additional condition, that the
+                    // cell with the lower index does the work. In the rare
+                    // case that both cells have the same index, the cell with
+                    // lower level is selected.
+                    if (!cell->neighbor_is_coarser(face_no) &&
+                        (neighbor->index() > cell->index() ||
+                         (neighbor->level() < cell->level() &&
+                          neighbor->index() == cell->index())))
+                      {
+                        // Here we know, that the neighbor is not coarser so we
+                        // can use the usual @p neighbor_of_neighbor
+                        // function. However, we could also use the more
+                        // general @p neighbor_face_no function.
+                        const unsigned int neighbor2=cell->neighbor_of_neighbor(face_no);
+
+                        ue_vi_matrix = 0;
+                        ui_ve_matrix = 0;
+                        ue_ve_matrix = 0;
+
+                        fe_v_face.reinit (cell, face_no);
+                        fe_v_face_neighbor.reinit (neighbor, neighbor2);
+
+                        dg.assemble_face_term2(fe_v_face,
+                                               fe_v_face_neighbor,
+                                               ui_vi_matrix,
+                                               ue_vi_matrix,
+                                               ui_ve_matrix,
+                                               ue_ve_matrix);
+
+                        neighbor->get_dof_indices (dofs_neighbor);
+
+                        for (unsigned int i=0; i<dofs_per_cell; ++i)
+                          for (unsigned int j=0; j<dofs_per_cell; ++j)
+                            {
+                              system_matrix.add(dofs[i], dofs_neighbor[j],
+                                                ue_vi_matrix(i,j));
+                              system_matrix.add(dofs_neighbor[i], dofs[j],
+                                                ui_ve_matrix(i,j));
+                              system_matrix.add(dofs_neighbor[i], dofs_neighbor[j],
+                                                ue_ve_matrix(i,j));
+                            }
+                      }
+
+                    // We do not need to consider case d), as those faces are
+                    // treated 'from the other side within case b).
+                  }
+              }
+          }
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            system_matrix.add(dofs[i], dofs[j], ui_vi_matrix(i,j));
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          right_hand_side(dofs[i]) += cell_vector(i);
+      }
+  }
+
+
+  // @sect3{Solver}
+  //
+  // For this simple problem we use the simple Richardson iteration again. The
+  // solver is completely unaffected by our anisotropic changes.
+  template <int dim>
+  void DGMethod<dim>::solve (Vector<double> &solution)
+  {
+    SolverControl           solver_control (1000, 1e-12, false, false);
+    SolverRichardson<>      solver (solver_control);
+
+    PreconditionBlockSSOR<SparseMatrix<double> > preconditioner;
+
+    preconditioner.initialize(system_matrix, fe.dofs_per_cell);
+
+    solver.solve (system_matrix, solution, right_hand_side,
+                  preconditioner);
+  }
+
+
+  // @sect3{Refinement}
+  //
+  // We refine the grid according to the same simple refinement criterion used
+  // in step-12, namely an approximation to the gradient of the solution.
+  template <int dim>
+  void DGMethod<dim>::refine_grid ()
+  {
+    Vector<float> gradient_indicator (triangulation.n_active_cells());
+
+    // We approximate the gradient,
+    DerivativeApproximation::approximate_gradient (mapping,
+                                                   dof_handler,
+                                                   solution2,
+                                                   gradient_indicator);
+
+    // and scale it to obtain an error indicator.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (unsigned int cell_no=0; cell!=endc; ++cell, ++cell_no)
+      gradient_indicator(cell_no)*=std::pow(cell->diameter(), 1+1.0*dim/2);
+    // Then we use this indicator to flag the 30 percent of the cells with
+    // highest error indicator to be refined.
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     gradient_indicator,
+                                                     0.3, 0.1);
+    // Now the refinement flags are set for those cells with a large error
+    // indicator. If nothing is done to change this, those cells will be
+    // refined isotropically. If the @p anisotropic flag given to this
+    // function is set, we now call the set_anisotropic_flags() function,
+    // which uses the jump indicator to reset some of the refinement flags to
+    // anisotropic refinement.
+    if (anisotropic)
+      set_anisotropic_flags();
+    // Now execute the refinement considering anisotropic as well as isotropic
+    // refinement flags.
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+  // Once an error indicator has been evaluated and the cells with largest
+  // error are flagged for refinement we want to loop over the flagged cells
+  // again to decide whether they need isotropic refinement or whether
+  // anisotropic refinement is more appropriate. This is the anisotropic jump
+  // indicator explained in the introduction.
+  template <int dim>
+  void DGMethod<dim>::set_anisotropic_flags ()
+  {
+    // We want to evaluate the jump over faces of the flagged cells, so we
+    // need some objects to evaluate values of the solution on faces.
+    UpdateFlags face_update_flags
+      = UpdateFlags(update_values | update_JxW_values);
+
+    FEFaceValues<dim> fe_v_face (mapping, fe, face_quadrature, face_update_flags);
+    FESubfaceValues<dim> fe_v_subface (mapping, fe, face_quadrature, face_update_flags);
+    FEFaceValues<dim> fe_v_face_neighbor (mapping, fe, face_quadrature, update_values);
+
+    // Now we need to loop over all active cells.
+    typename DoFHandler<dim>::active_cell_iterator cell=dof_handler.begin_active(),
+                                                   endc=dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      // We only need to consider cells which are flagged for refinement.
+      if (cell->refine_flag_set())
+        {
+          Point<dim> jump;
+          Point<dim> area;
+
+          for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+            {
+              typename DoFHandler<dim>::face_iterator face = cell->face(face_no);
+
+              if (!face->at_boundary())
+                {
+                  Assert (cell->neighbor(face_no).state() == IteratorState::valid, ExcInternalError());
+                  typename DoFHandler<dim>::cell_iterator neighbor = cell->neighbor(face_no);
+
+                  std::vector<double> u (fe_v_face.n_quadrature_points);
+                  std::vector<double> u_neighbor (fe_v_face.n_quadrature_points);
+
+                  // The four cases of different neighbor relations seen in
+                  // the assembly routines are repeated much in the same way
+                  // here.
+                  if (face->has_children())
+                    {
+                      // The neighbor is refined.  First we store the
+                      // information, which of the neighbor's faces points in
+                      // the direction of our current cell. This property is
+                      // inherited to the children.
+                      unsigned int neighbor2=cell->neighbor_face_no(face_no);
+                      // Now we loop over all subfaces,
+                      for (unsigned int subface_no=0; subface_no<face->number_of_children(); ++subface_no)
+                        {
+                          // get an iterator pointing to the cell behind the
+                          // present subface...
+                          typename DoFHandler<dim>::cell_iterator neighbor_child = cell->neighbor_child_on_subface(face_no,subface_no);
+                          Assert (!neighbor_child->has_children(), ExcInternalError());
+                          // ... and reinit the respective FEFaceValues and
+                          // FESubFaceValues objects.
+                          fe_v_subface.reinit (cell, face_no, subface_no);
+                          fe_v_face_neighbor.reinit (neighbor_child, neighbor2);
+                          // We obtain the function values
+                          fe_v_subface.get_function_values(solution2, u);
+                          fe_v_face_neighbor.get_function_values(solution2, u_neighbor);
+                          // as well as the quadrature weights, multiplied by
+                          // the Jacobian determinant.
+                          const std::vector<double> &JxW = fe_v_subface.get_JxW_values ();
+                          // Now we loop over all quadrature points
+                          for (unsigned int x=0; x<fe_v_subface.n_quadrature_points; ++x)
+                            {
+                              // and integrate the absolute value of the jump
+                              // of the solution, i.e. the absolute value of
+                              // the difference between the function value
+                              // seen from the current cell and the
+                              // neighboring cell, respectively. We know, that
+                              // the first two faces are orthogonal to the
+                              // first coordinate direction on the unit cell,
+                              // the second two faces are orthogonal to the
+                              // second coordinate direction and so on, so we
+                              // accumulate these values into vectors with
+                              // <code>dim</code> components.
+                              jump[face_no/2]+=std::fabs(u[x]-u_neighbor[x])*JxW[x];
+                              // We also sum up the scaled weights to obtain
+                              // the measure of the face.
+                              area[face_no/2]+=JxW[x];
+                            }
+                        }
+                    }
+                  else
+                    {
+                      if (!cell->neighbor_is_coarser(face_no))
+                        {
+                          // Our current cell and the neighbor have the same
+                          // refinement along the face under
+                          // consideration. Apart from that, we do much the
+                          // same as with one of the subcells in the above
+                          // case.
+                          unsigned int neighbor2=cell->neighbor_of_neighbor(face_no);
+
+                          fe_v_face.reinit (cell, face_no);
+                          fe_v_face_neighbor.reinit (neighbor, neighbor2);
+
+                          fe_v_face.get_function_values(solution2, u);
+                          fe_v_face_neighbor.get_function_values(solution2, u_neighbor);
+
+                          const std::vector<double> &JxW = fe_v_face.get_JxW_values ();
+
+                          for (unsigned int x=0; x<fe_v_face.n_quadrature_points; ++x)
+                            {
+                              jump[face_no/2]+=std::fabs(u[x]-u_neighbor[x])*JxW[x];
+                              area[face_no/2]+=JxW[x];
+                            }
+                        }
+                      else //i.e. neighbor is coarser than cell
+                        {
+                          // Now the neighbor is actually coarser. This case
+                          // is new, in that it did not occur in the assembly
+                          // routine. Here, we have to consider it, but this
+                          // is not overly complicated. We simply use the @p
+                          // neighbor_of_coarser_neighbor function, which
+                          // again takes care of anisotropic refinement and
+                          // non-standard face orientation by itself.
+                          std::pair<unsigned int,unsigned int> neighbor_face_subface
+                            = cell->neighbor_of_coarser_neighbor(face_no);
+                          Assert (neighbor_face_subface.first<GeometryInfo<dim>::faces_per_cell, ExcInternalError());
+                          Assert (neighbor_face_subface.second<neighbor->face(neighbor_face_subface.first)->number_of_children(),
+                                  ExcInternalError());
+                          Assert (neighbor->neighbor_child_on_subface(neighbor_face_subface.first, neighbor_face_subface.second)
+                                  == cell, ExcInternalError());
+
+                          fe_v_face.reinit (cell, face_no);
+                          fe_v_subface.reinit (neighbor, neighbor_face_subface.first,
+                                               neighbor_face_subface.second);
+
+                          fe_v_face.get_function_values(solution2, u);
+                          fe_v_subface.get_function_values(solution2, u_neighbor);
+
+                          const std::vector<double> &JxW = fe_v_face.get_JxW_values ();
+
+                          for (unsigned int x=0; x<fe_v_face.n_quadrature_points; ++x)
+                            {
+                              jump[face_no/2]+=std::fabs(u[x]-u_neighbor[x])*JxW[x];
+                              area[face_no/2]+=JxW[x];
+                            }
+                        }
+                    }
+                }
+            }
+          // Now we analyze the size of the mean jumps, which we get dividing
+          // the jumps by the measure of the respective faces.
+          double average_jumps[dim];
+          double sum_of_average_jumps=0.;
+          for (unsigned int i=0; i<dim; ++i)
+            {
+              average_jumps[i] = jump(i)/area(i);
+              sum_of_average_jumps += average_jumps[i];
+            }
+
+          // Now we loop over the <code>dim</code> coordinate directions of
+          // the unit cell and compare the average jump over the faces
+          // orthogonal to that direction with the average jumps over faces
+          // orthogonal to the remaining direction(s). If the first is larger
+          // than the latter by a given factor, we refine only along hat
+          // axis. Otherwise we leave the refinement flag unchanged, resulting
+          // in isotropic refinement.
+          for (unsigned int i=0; i<dim; ++i)
+            if (average_jumps[i] > anisotropic_threshold_ratio*(sum_of_average_jumps-average_jumps[i]))
+              cell->set_refine_flag(RefinementCase<dim>::cut_axis(i));
+        }
+  }
+
+  // @sect3{The Rest}
+  //
+  // The remaining part of the program is again unmodified. Only the creation
+  // of the original triangulation is changed in order to reproduce the new
+  // domain.
+  template <int dim>
+  void DGMethod<dim>::output_results (const unsigned int cycle) const
+  {
+    std::string refine_type;
+    if (anisotropic)
+      refine_type=".aniso";
+    else
+      refine_type=".iso";
+
+    std::string filename = "grid-";
+    filename += ('0' + cycle);
+    Assert (cycle < 10, ExcInternalError());
+
+    filename += refine_type + ".eps";
+    std::cout << "Writing grid to <" << filename << ">..." << std::endl;
+    std::ofstream eps_output (filename.c_str());
+
+    GridOut grid_out;
+    grid_out.write_eps (triangulation, eps_output);
+
+    filename = "grid-";
+    filename += ('0' + cycle);
+    Assert (cycle < 10, ExcInternalError());
+
+    filename += refine_type + ".gnuplot";
+    std::cout << "Writing grid to <" << filename << ">..." << std::endl;
+    std::ofstream gnuplot_grid_output (filename.c_str());
+
+    grid_out.write_gnuplot (triangulation, gnuplot_grid_output);
+
+    filename = "sol-";
+    filename += ('0' + cycle);
+    Assert (cycle < 10, ExcInternalError());
+
+    filename += refine_type + ".gnuplot";
+    std::cout << "Writing solution to <" << filename << ">..."
+              << std::endl;
+    std::ofstream gnuplot_output (filename.c_str());
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution2, "u");
+
+    data_out.build_patches (degree);
+
+    data_out.write_gnuplot(gnuplot_output);
+  }
+
+
+  template <int dim>
+  void DGMethod<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<6; ++cycle)
+      {
+        std::cout << "Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          {
+            // Create the rectangular domain.
+            Point<dim> p1,p2;
+            p1(0)=0;
+            p1(0)=-1;
+            for (unsigned int i=0; i<dim; ++i)
+              p2(i)=1.;
+            // Adjust the number of cells in different directions to obtain
+            // completely isotropic cells for the original mesh.
+            std::vector<unsigned int> repetitions(dim,1);
+            repetitions[0]=2;
+            GridGenerator::subdivided_hyper_rectangle (triangulation,
+                                                       repetitions,
+                                                       p1,
+                                                       p2);
+
+            triangulation.refine_global (5-dim);
+          }
+        else
+          refine_grid ();
+
+
+        std::cout << "   Number of active cells:       "
+                  << triangulation.n_active_cells()
+                  << std::endl;
+
+        setup_system ();
+
+        std::cout << "   Number of degrees of freedom: "
+                  << dof_handler.n_dofs()
+                  << std::endl;
+
+        Timer assemble_timer;
+        assemble_system2 ();
+        std::cout << "Time of assemble_system2: "
+                  << assemble_timer()
+                  << std::endl;
+        solve (solution2);
+
+        output_results (cycle);
+      }
+  }
+}
+
+
+
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step30;
+
+      // If you want to run the program in 3D, simply change the following
+      // line to <code>const unsigned int dim = 3;</code>.
+      const unsigned int dim = 2;
+
+      {
+        // First, we perform a run with isotropic refinement.
+        std::cout << "Performing a " << dim << "D run with isotropic refinement..." << std::endl
+                  << "------------------------------------------------" << std::endl;
+        DGMethod<dim> dgmethod_iso(false);
+        dgmethod_iso.run ();
+      }
+
+      {
+        // Now we do a second run, this time with anisotropic refinement.
+        std::cout << std::endl
+                  << "Performing a " << dim << "D run with anisotropic refinement..." << std::endl
+                  << "--------------------------------------------------" << std::endl;
+        DGMethod<dim> dgmethod_aniso(true);
+        dgmethod_aniso.run ();
+      }
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    };
+
+  return 0;
+}
diff --git a/examples/step-31/CMakeLists.txt b/examples/step-31/CMakeLists.txt
new file mode 100644
index 0000000..8161b22
--- /dev/null
+++ b/examples/step-31/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-31 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-31")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_TRILINOS)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_TRILINOS = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-31/doc/builds-on b/examples/step-31/doc/builds-on
new file mode 100644
index 0000000..80dd13d
--- /dev/null
+++ b/examples/step-31/doc/builds-on
@@ -0,0 +1 @@
+step-22
diff --git a/examples/step-31/doc/intro.dox b/examples/step-31/doc/intro.dox
new file mode 100644
index 0000000..2735157
--- /dev/null
+++ b/examples/step-31/doc/intro.dox
@@ -0,0 +1,1016 @@
+<br>
+
+<i>This program was contributed by Martin Kronbichler and Wolfgang
+Bangerth.
+<br>
+This material is based upon work partly supported by the National
+Science Foundation under Award No. EAR-0426271 and The California Institute of
+Technology. Any opinions, findings, and conclusions or recommendations
+expressed in this publication are those of the author and do not
+necessarily reflect the views of the National Science Foundation or of The
+California Institute of Technology.
+</i>
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+<h3>The Boussinesq equations</h3>
+
+This program deals with an interesting physical problem: how does a
+fluid (i.e. a liquid or gas) behave if it experiences differences in
+buoyancy caused by temperature differences? It is clear that those
+parts of the fluid that are hotter (and therefore lighter) are going
+to rise up and those that are cooler (and denser) are going to sink
+down with gravity.
+
+In cases where the fluid moves slowly enough such that inertia effects
+can be neglected, the equations that describe such behavior are the
+Boussinesq equations that read as follows:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=&
+  -\rho\; \beta \; T\; \mathbf{g},
+  \\
+  \nabla \cdot {\mathbf u} &=& 0,
+  \\
+  \frac{\partial T}{\partial t}
+  +
+  {\mathbf u} \cdot \nabla T
+  -
+  \nabla \cdot \kappa \nabla T &=& \gamma.
+ at f}
+These equations fall into the class of vector-valued problems (a
+toplevel overview of this topic can be found in the @ref vector_valued module).
+Here, <b>u</b> is the velocity field, <i>p</i> the pressure, and <i>T</i>
+the temperature of the fluid. $\varepsilon ({\mathbf u}) = \frac 12
+[(\nabla{\mathbf u}) + (\nabla {\mathbf u})^T]$ is the symmetric
+gradient of the velocity. As can be seen, velocity and pressure
+solve a Stokes equation describing the motion of an incompressible
+fluid, an equation we have previously considered in step-22; we
+will draw extensively on the experience we have gained in that program, in
+particular with regard to efficient linear Stokes solvers.
+
+The forcing term of the fluid motion is the buoyancy of the
+fluid, expressed as the product of the density $\rho$, the thermal expansion
+coefficient $\beta$,
+the temperature <i>T</i> and the gravity vector <b>g</b> pointing
+downward. (A derivation of why the right hand side looks like it looks
+is given in the introduction of step-32.)
+While the first two equations describe how the fluid reacts to
+temperature differences by moving around, the third equation states
+how the fluid motion affects the temperature field: it is an advection
+diffusion equation, i.e. the temperature is attached to the fluid
+particles and advected along in the flow field, with an additional
+diffusion (heat conduction) term. In many applications, the diffusion
+coefficient is fairly small, and the temperature equation is in fact
+transport, not diffusion dominated and therefore in character more hyperbolic
+than elliptic; we will have to take this into account when developing a stable
+discretization.
+
+In the equations above, the term $\gamma$ on the right hand side denotes the
+heat sources and may be a spatially and temporally varying function. $\eta$
+and $\kappa$ denote the viscosity and diffusivity coefficients, which we assume
+constant for this tutorial program. The more general case when $\eta$ depends on
+the temperature is an important factor in physical applications: Most materials
+become more fluid as they get hotter (i.e., $\eta$ decreases with <i>T</i>);
+sometimes, as in the case of rock minerals at temperatures close to their
+melting point, $\eta$ may change by orders of magnitude over the typical range
+of temperatures.
+
+We note that the Stokes equation above could be nondimensionalized by
+introducing the <a target="_top"
+href="http://en.wikipedia.org/wiki/Rayleigh_number">Rayleigh
+number</a> $\mathrm{Ra}=\frac{\|g\| \beta \rho}{\eta \kappa} \delta T L^3$ using a
+typical length scale $L$, typical temperature difference $\delta T$, density
+$\rho$, thermal diffusivity $\eta$, and thermal conductivity $\kappa$.
+$\mathrm{Ra}$ is a dimensionless number that describes the ratio of heat
+transport due to convection induced by buoyancy changes from
+temperature differences, and of heat transport due to thermal
+diffusion. A small Rayleigh number implies that buoyancy is not strong
+relative to viscosity and fluid motion <b>u</b> is slow enough so
+that heat diffusion $\kappa\nabla T$ is the dominant heat transport
+term. On the other hand, a fluid with a high Rayleigh number will show
+vigorous convection that dominates heat conduction.
+
+For most fluids for which we are interested in computing thermal
+convection, the Rayleigh number is very large, often $10^6$ or
+larger. From the structure of the equations, we see that this will
+lead to large pressure differences and large velocities. Consequently,
+the convection term in the convection-diffusion equation for <i>T</i> will
+also be very large and an accurate solution of this equation will
+require us to choose small time steps. Problems with large Rayleigh
+numbers are therefore hard to solve numerically for similar reasons
+that make solving the <a
+href="http://en.wikipedia.org/wiki/Navier-stokes_equations">Navier-Stokes
+equations</a> hard to solve when the <a
+href="http://en.wikipedia.org/wiki/Reynolds_number">Reynolds number
+$\mathrm{Re}$</a> is large.
+
+Note that a large Rayleigh number does not necessarily involve large
+velocities in absolute terms. For example, the Rayleigh number in the
+earth mantle is larger than $10^6$. Yet the
+velocities are small: the material is in fact solid rock but it is so
+hot and under pressure that it can flow very slowly, on the order of
+at most a few centimeters per year. Nevertheless, this can lead to
+mixing over time scales of many million years, a time scale much
+shorter than for the same amount of heat to be distributed by thermal
+conductivity and a time scale of relevance to affect the evolution of the
+earth's interior and surface structure.
+
+ at note If you are interested in using the program as the basis for your own
+experiments, you will also want to take a look at its continuation in
+step-32. Furthermore, step-32 later was developed into the much larger open
+source code Aspect (see http://aspect.dealii.org/ ) that can solve realistic
+problems and that you may want to investigate before trying to morph step-31
+into something that can solve whatever you want to solve.
+
+
+<h3>%Boundary and initial conditions</h3>
+
+Since the Boussinesq equations are derived under the assumption that inertia
+of the fluid's motion does not play a role, the flow field is at each time
+entirely determined by buoyancy difference at that time, not by the flow field
+at previous times. This is reflected by the fact that the first two equations
+above are the steady state Stokes equation that do not contain a time
+derivative. Consequently, we do not need initial conditions for either
+velocities or pressure. On the other hand, the temperature field does satisfy
+an equation with a time derivative, so we need initial conditions for <i>T</i>.
+
+As for boundary conditions: if $\kappa>0$ then the temperature
+satisfies a second order differential equation that requires
+boundary data all around the boundary for all times. These can either be a
+prescribed boundary temperature $T|_{\partial\Omega}=T_b$ (Dirichlet boundary
+conditions), or a prescribed thermal flux $\mathbf{n}\cdot\kappa\nabla
+T|_{\partial\Omega}=\phi$; in this program, we will use an insulated boundary
+condition, i.e. prescribe no thermal flux: $\phi=0$.
+
+Similarly, the velocity field requires us to pose boundary conditions. These
+may be no-slip no-flux conditions <b>u</b>=0 on $\partial\Omega$ if the fluid
+sticks to the boundary, or no normal flux conditions $\mathbf n \cdot \mathbf
+u = 0$ if the fluid can flow along but not across the boundary, or any number
+of other conditions that are physically reasonable. In this program, we will
+use no normal flux conditions.
+
+
+<h3>Solution approach</h3>
+
+Like the equations solved in step-21, we here have a
+system of differential-algebraic equations (DAE): with respect to the time
+variable, only the temperature equation is a differential equation
+whereas the Stokes system for <b>u</b> and <i>p</i> has no
+time-derivatives and is therefore of the sort of an algebraic
+constraint that has to hold at each time instant. The main difference
+to step-21 is that the algebraic constraint there was a
+mixed Laplace system of the form
+ at f{eqnarray*}
+  \mathbf u + {\mathbf K}\lambda \nabla p &=& 0, \\
+  \nabla\cdot \mathbf u &=& f,
+ at f}
+where now we have a Stokes system
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=& f, \\
+  \nabla\cdot \mathbf u &=& 0,
+ at f}
+where $\nabla \cdot \eta \varepsilon (\cdot)$ is an operator similar to the
+Laplacian $\Delta$ applied to a vector field.
+
+Given the similarity to what we have done in step-21,
+it may not come as a surprise that we choose a similar approach,
+although we will have to make adjustments for the change in operator
+in the top-left corner of the differential operator.
+
+
+<h4>Time stepping</h4>
+
+The structure of the problem as a DAE allows us to use the same strategy as
+we have already used in step-21, i.e. we use a time lag
+scheme: we first solve the temperature equation (using an extrapolated
+velocity field), and then insert the new temperature solution into the right
+hand side of the velocity equation. The way we implement this in our code
+looks at things from a slightly different perspective, though. We first
+solve the Stokes equations for velocity and pressure using the temperature
+field from the previous time step, which means that we get the velocity for
+the previous time step. In other words, we first solve the Stokes system for
+time step <i>n-1</i> as
+ at f{eqnarray*}
+  -\nabla \cdot (2\eta \varepsilon ({\mathbf u}^{n-1})) + \nabla p^{n-1} &=&
+  -\rho\; \beta \; T^{n-1} \mathbf{g},
+  \\
+  \nabla \cdot {\mathbf u}^{n-1} &=& 0,
+ at f}
+and then the temperature equation with an extrapolated velocity field to
+time <i>n</i>.
+
+In contrast to step-21, we'll use a higher order time
+stepping scheme here, namely the <a
+href="http://en.wikipedia.org/wiki/Backward_differentiation_formula">Backward
+Differentiation Formula scheme of order 2 (BDF-2 in short)</a> that replaces
+the time derivative $\frac{\partial T}{\partial t}$ by the (one-sided)
+difference quotient $\frac{\frac 32 T^{n}-2T^{n-1}+\frac 12 T^{n-2}}{k}$
+with <i>k</i> the time step size. This gives the discretized-in-time
+temperature equation
+ at f{eqnarray*}
+  \frac 32 T^n
+  -
+  k\nabla \cdot \kappa \nabla T^n
+  &=&
+  2 T^{n-1}
+  -
+  \frac 12 T^{n-2}
+  -
+  k(2{\mathbf u}^{n-1} - {\mathbf u}^{n-2} ) \cdot \nabla (2T^{n-1}-T^{n-2})
+  +
+  k\gamma.
+ at f}
+Note how the temperature equation is solved semi-explicitly: diffusion is
+treated implicitly whereas advection is treated explicitly using an
+extrapolation (or forward-projection) of temperature and velocity, including
+the just-computed velocity ${\mathbf u}^{n-1}$. The forward-projection to
+the current time level <i>n</i> is derived from a Taylor expansion, $T^n
+\approx T^{n-1} + k_n \frac{\partial T}{\partial t} \approx T^{n-1} + k_n
+\frac{T^{n-1}-T^{n-2}}{k_n} = 2T^{n-1}-T^{n-2}$. We need this projection for
+maintaining the order of accuracy of the BDF-2 scheme. In other words, the
+temperature fields we use in the explicit right hand side are second order
+approximations of the current temperature field — not quite an
+explicit time stepping scheme, but by character not too far away either.
+
+The introduction of the temperature extrapolation limits the time step by a
+<a href="http://en.wikipedia.org/wiki/Courant–Friedrichs–Lewy_condition">
+Courant-Friedrichs-Lewy (CFL) condition</a> just like it was in @ref step_21
+"step-21". (We wouldn't have had that stability condition if we treated the
+advection term implicitly since the BDF-2 scheme is A-stable, at the price
+that we needed to build a new temperature matrix at each time step.) We will
+discuss the exact choice of time step in the <a href="#Results">results
+section</a>, but for the moment of importance is that this CFL condition
+means that the time step size <i>k</i> may change from time step to time
+step, and that we have to modify the above formula slightly. If
+$k_n,k_{n-1}$ are the time steps sizes of the current and previous time
+step, then we use the approximations
+
+$\frac{\partial T}{\partial t} \approx
+ \frac 1{k_n}
+ \left(
+       \frac{2k_n+k_{n-1}}{k_n+k_{n-1}} T^{n}
+       -
+       \frac{k_n+k_{n-1}}{k_{n-1}}T^{n-1}
+       +
+       \frac{k_n^2}{k_{n-1}(k_n+k_{n-1})} T^{n-2}
+ \right)$
+and
+$T^n \approx
+   T^{n-1} + k_n \frac{\partial T}{\partial t}
+   \approx
+   T^{n-1} + k_n
+   \frac{T^{n-1}-T^{n-2}}{k_{n-1}}
+   =
+   \left(1+\frac{k_n}{k_{n-1}}\right)T^{n-1}-\frac{k_n}{k_{n-1}}T^{n-2}$,
+and above equation is generalized as follows:
+ at f{eqnarray*}
+  \frac{2k_n+k_{n-1}}{k_n+k_{n-1}} T^n
+  -
+  k_n\nabla \cdot \kappa \nabla T^n
+  &=&
+  \frac{k_n+k_{n-1}}{k_{n-1}} T^{n-1}
+  -
+  \frac{k_n^2}{k_{n-1}(k_n+k_{n-1})} T^{n-2}
+  -
+  k_n{\mathbf u}^{*,n} \cdot \nabla T^{*,n}
+  +
+  k_n\gamma,
+ at f}
+
+where ${(\cdot)}^{*,n} = \left(1+\frac{k_n}{k_{n-1}}\right)(\cdot)^{n-1} -
+\frac{k_n}{k_{n-1}}(\cdot)^{n-2}$ denotes the extrapolation of velocity
+<b>u</b> and temperature <i>T</i> to time level <i>n</i>, using the values
+at the two previous time steps. That's not an easy to read equation, but
+will provide us with the desired higher order accuracy. As a consistency
+check, it is easy to verify that it reduces to the same equation as above if
+$k_n=k_{n-1}$.
+
+As a final remark we note that the choice of a higher order time
+stepping scheme of course forces us to keep more time steps in memory;
+in particular, we here will need to have $T^{n-2}$ around, a vector
+that we could previously discard. This seems like a nuisance that we
+were able to avoid previously by using only a first order time
+stepping scheme, but as we will see below when discussing the topic of
+stabilization, we will need this vector anyway and so keeping it
+around for time discretization is essentially for free and gives us
+the opportunity to use a higher order scheme.
+
+
+<h4>Weak form and space discretization for the Stokes part</h4>
+
+Like solving the mixed Laplace equations, solving the Stokes equations
+requires us to choose particular pairs of finite elements for
+velocities and pressure variables. Because this has already been discussed in
+step-22, we only cover this topic briefly:
+Here, we use the
+stable pair $Q_{p+1}^d \times Q_p, p\ge 1$. These are continuous
+elements, so we can form the weak form of the Stokes equation without
+problem by integrating by parts and substituting continuous functions
+by their discrete counterparts:
+ at f{eqnarray*}
+  (\nabla {\mathbf v}_h, 2\eta \varepsilon ({\mathbf u}^{n-1}_h))
+  -
+  (\nabla \cdot {\mathbf v}_h, p^{n-1}_h)
+  &=&
+  -({\mathbf v}_h, \rho\; \beta \; T^{n-1}_h \mathbf{g}),
+  \\
+  (q_h, \nabla \cdot {\mathbf u}^{n-1}_h) &=& 0,
+ at f}
+for all test functions $\mathbf v_h, q_h$. The first term of the first
+equation is considered as the inner product between tensors, i.e.
+$(\nabla {\mathbf v}_h, \eta \varepsilon ({\mathbf u}^{n-1}_h))_\Omega
+ = \int_\Omega \sum_{i,j=1}^d [\nabla {\mathbf v}_h]_{ij}
+           \eta [\varepsilon ({\mathbf u}^{n-1}_h)]_{ij}\, dx$.
+Because the second tensor in this product is symmetric, the
+anti-symmetric component of $\nabla {\mathbf v}_h$ plays no role and
+it leads to the entirely same form if we use the symmetric gradient of
+$\mathbf v_h$ instead. Consequently, the formulation we consider and
+that we implement is
+ at f{eqnarray*}
+  (\varepsilon({\mathbf v}_h), 2\eta \varepsilon ({\mathbf u}^{n-1}_h))
+  -
+  (\nabla \cdot {\mathbf v}_h, p^{n-1}_h)
+  &=&
+  -({\mathbf v}_h, \rho\; \beta \; T^{n-1}_h \mathbf{g}),
+  \\
+  (q_h, \nabla \cdot {\mathbf u}^{n-1}_h) &=& 0.
+ at f}
+
+This is exactly the same as what we already discussed in
+step-22 and there is not much more to say about this here.
+
+
+<h4>Stabilization, weak form and space discretization for the temperature equation</h4>
+
+The more interesting question is what to do with the temperature
+advection-diffusion equation. By default, not all discretizations of
+this equation are equally stable unless we either do something like
+upwinding, stabilization, or all of this. One way to achieve this is
+to use discontinuous elements (i.e. the FE_DGQ class that we used, for
+example, in the discretization of the transport equation in
+step-12, or in discretizing the pressure in
+step-20 and step-21) and to define a
+flux at the interface between cells that takes into account
+upwinding. If we had a pure advection problem this would probably be
+the simplest way to go. However, here we have some diffusion as well,
+and the discretization of the Laplace operator with discontinuous
+elements is cumbersome because of the significant number of additional
+terms that need to be integrated on each face between
+cells. Discontinuous elements also have the drawback that the use of
+numerical fluxes introduces an additional numerical diffusion that
+acts everywhere, whereas we would really like to minimize the effect
+of numerical diffusion to a minimum and only apply it where it is
+necessary to stabilize the scheme.
+
+A better alternative is therefore to add some nonlinear viscosity to
+the model. Essentially, what this does is to transform the temperature
+equation from the form
+ at f{eqnarray*}
+  \frac{\partial T}{\partial t}
+  +
+  {\mathbf u} \cdot \nabla T
+  -
+  \nabla \cdot \kappa \nabla T &=& \gamma
+ at f}
+to something like
+ at f{eqnarray*}
+  \frac{\partial T}{\partial t}
+  +
+  {\mathbf u} \cdot \nabla T
+  -
+  \nabla \cdot (\kappa+\nu(T)) \nabla T &=& \gamma,
+ at f}
+where $\nu(T)$ is an addition viscosity (diffusion) term that only
+acts in the vicinity of shocks and other discontinuities. $\nu(T)$ is
+chosen in such a way that if <i>T</i> satisfies the original equations, the
+additional viscosity is zero.
+
+To achieve this, the literature contains a number of approaches. We
+will here follow one developed by Guermond and Popov that builds on a
+suitably defined residual and a limiting procedure for the additional
+viscosity. To this end, let us define a residual $R_\alpha(T)$ as follows:
+ at f{eqnarray*}
+  R_\alpha(T)
+  =
+  \left(
+  \frac{\partial T}{\partial t}
+  +
+  {\mathbf u} \cdot \nabla T
+  -
+  \nabla \cdot \kappa \nabla T - \gamma
+  \right)
+  T^{\alpha-1}
+ at f}
+where we will later choose the stabilization exponent $\alpha$ from
+within the range $[1,2]$. Note that $R_\alpha(T)$ will be zero if $T$
+satisfies the temperature equation, since then the term in parentheses
+will be zero. Multiplying terms out, we get the following, entirely
+equivalent form:
+ at f{eqnarray*}
+  R_\alpha(T)
+  =
+  \frac 1\alpha
+  \frac{\partial (T^\alpha)}{\partial t}
+  +
+  \frac 1\alpha
+  {\mathbf u} \cdot \nabla (T^\alpha)
+  -
+  \frac 1\alpha
+  \nabla \cdot \kappa \nabla (T^\alpha)
+  +
+  \kappa(\alpha-1)
+  T^{\alpha-2} |\nabla T|^2
+  -
+  \gamma
+  T^{\alpha-1}
+ at f}
+
+With this residual, we can now define the artificial viscosity as
+a piecewise constant function defined on each cell $K$ with diameter
+$h_K$ separately as
+follows:
+ at f{eqnarray*}
+  \nu_\alpha(T)|_K
+  =
+  \beta
+  \|\mathbf{u}\|_{L^\infty(K)}
+  \min\left\{
+    h_K,
+    h_K^\alpha
+    \frac{\|R_\alpha(T)\|_{L^\infty(K)}}{c(\mathbf{u},T)}
+  \right\}
+ at f}
+
+Here, $\beta$ is a stabilization constant (a dimensional analysis
+reveals that it is unitless and therefore independent of scaling; we will
+discuss its choice in the <a href="#Results">results section</a>) and
+$c(\mathbf{u},T)$ is a normalization constant that must have units
+$\frac{m^{\alpha-1}K^\alpha}{s}$. We will choose it as
+$c(\mathbf{u},T) =
+ c_R\ \|\mathbf{u}\|_{L^\infty(\Omega)} \ \mathrm{var}(T)
+ \ |\mathrm{diam}(\Omega)|^{\alpha-2}$,
+where $\mathrm{var}(T)=\max_\Omega T - \min_\Omega T$ is the range of present
+temperature values (remember that buoyancy is driven by temperature
+variations, not the absolute temperature) and $c_R$ is a dimensionless
+constant. To understand why this method works consider this: If on a particular
+cell $K$ the temperature field is smooth, then we expect the residual
+to be small there (in fact to be on the order of ${\cal O}(h_K)$) and
+the stabilization term that injects artificial diffusion will there be
+of size $h_K^{\alpha+1}$ — i.e. rather small, just as we hope it to
+be when no additional diffusion is necessary. On the other hand, if we
+are on or close to a discontinuity of the temperature field, then the
+residual will be large; the minimum operation in the definition of
+$\nu_\alpha(T)$ will then ensure that the stabilization has size $h_K$
+— the optimal amount of artificial viscosity to ensure stability of
+the scheme.
+
+It is certainly a good questions whether this scheme really works?
+Computations by Guermond and Popov have shown that this form of
+stabilization actually performs much better than most of the other
+stabilization schemes that are around (for example streamline
+diffusion, to name only the simplest one). Furthermore, for $\alpha\in
+[1,2)$ they can even prove that it produces better convergence orders
+for the linear transport equation than for example streamline
+diffusion. For $\alpha=2$, no theoretical results are currently
+available, but numerical tests indicate that the results
+are considerably better than for $\alpha=1$.
+
+A more practical question is how to introduce this artificial
+diffusion into the equations we would like to solve. Note that the
+numerical viscosity $\nu(T)$ is temperature-dependent, so the equation
+we want to solve is nonlinear in <i>T</i> — not what one desires from a
+simple method to stabilize an equation, and even less so if we realize
+that $\nu(T)$ is nondifferentiable in <i>T</i>. However, there is no
+reason to despair: we still have to discretize in time and we can
+treat the term explicitly.
+
+In the definition of the stabilization parameter, we approximate the time
+derivative by $\frac{\partial T}{\partial t} \approx
+\frac{T^{n-1}-T^{n-2}}{k^{n-1}}$. This approximation makes only use
+of available time data and this is the reason why we need to store data of two
+previous time steps (which enabled us to use the BDF-2 scheme without
+additional storage cost). We could now simply evaluate the rest of the
+terms at $t_{n-1}$, but then the discrete residual would be nothing else than
+a backward Euler approximation, which is only first order accurate. So, in
+case of smooth solutions, the residual would be still of the order <i>h</i>,
+despite the second order time accuracy in the outer BDF-2 scheme and the
+spatial FE discretization. This is certainly not what we want to have
+(in fact, we desired to have small residuals in regions where the solution
+behaves nicely), so a bit more care is needed. The key to this problem
+is to observe that the first derivative as we constructed it is actually
+centered at $t_{n-\frac{3}{2}}$. We get the desired second order accurate
+residual calculation if we evaluate all spatial terms at $t_{n-\frac{3}{2}}$
+by using the approximation $\frac 12 T^{n-1}+\frac 12 T^{n-2}$, which means
+that we calculate the nonlinear viscosity as a function of this
+intermediate temperature, $\nu_\alpha =
+\nu_\alpha\left(\frac 12 T^{n-1}+\frac 12 T^{n-2}\right)$. Note that this
+evaluation of the residual is nothing else than a Crank-Nicholson scheme,
+so we can be sure that now everything is alright. One might wonder whether
+it is a problem that the numerical viscosity now is not evaluated at
+time <i>n</i> (as opposed to the rest of the equation). However, this offset
+is uncritical: For smooth solutions, $\nu_\alpha$ will vary continuously,
+so the error in time offset is <i>k</i> times smaller than the nonlinear
+viscosity itself, i.e., it is a small higher order contribution that is
+left out. That's fine because the term itself is already at the level of
+discretization error in smooth regions.
+
+Using the BDF-2 scheme introduced above,
+this yields for the simpler case of uniform time steps of size <i>k</i>:
+ at f{eqnarray*}
+  \frac 32 T^n
+  -
+  k\nabla \cdot \kappa \nabla T^n
+  &=&
+  2 T^{n-1}
+  -
+  \frac 12 T^{n-2}
+  \\
+  &&
+  +
+  k\nabla \cdot
+  \left[
+    \nu_\alpha\left(\frac 12 T^{n-1}+\frac 12 T^{n-2}\right)
+    \ \nabla (2T^{n-1}-T^{n-2})
+  \right]
+  \\
+  &&
+  -
+  k(2{\mathbf u}^{n-1}-{\mathbf u}^{n-2}) \cdot \nabla (2T^{n-1}-T^{n-2})
+  \\
+  &&
+  +
+  k\gamma.
+ at f}
+On the left side of this equation remains the term from the time
+derivative and the original (physical) diffusion which we treat
+implicitly (this is actually a nice term: the matrices that result
+from the left hand side are the mass matrix and a multiple of the
+Laplace matrix — both are positive definite and if the time step
+size <i>k</i> is small, the sum is simple to invert). On the right hand
+side, the terms in the first line result from the time derivative; in
+the second line is the artificial diffusion at time $t_{n-\frac
+32}$; the third line contains the
+advection term, and the fourth the sources. Note that the
+artificial diffusion operates on the extrapolated
+temperature at the current time in the same way as we have discussed
+the advection works in the section on time stepping.
+
+The form for nonuniform time steps that we will have to use in
+reality is a bit more complicated (which is why we showed the simpler
+form above first) and reads:
+ at f{eqnarray*}
+  \frac{2k_n+k_{n-1}}{k_n+k_{n-1}} T^n
+  -
+  k_n\nabla \cdot \kappa \nabla T^n
+  &=&
+  \frac{k_n+k_{n-1}}{k_{n-1}} T^{n-1}
+  -
+  \frac{k_n^2}{k_{n-1}(k_n+k_{n-1})} T^{n-2}
+  \\
+  &&
+  +
+  k_n\nabla \cdot
+  \left[
+    \nu_\alpha\left(\frac 12 T^{n-1}+\frac 12 T^{n-2}\right)
+    \ \nabla  \left[
+    \left(1+\frac{k_n}{k_{n-1}}\right)T^{n-1}-\frac{k_n}{k_{n-1}}T^{n-2}
+  \right]
+  \right]
+  \\
+  &&
+  -
+  k_n
+  \left[
+    \left(1+\frac{k_n}{k_{n-1}}\right){\mathbf u}^{n-1} -
+    \frac{k_n}{k_{n-1}}{\mathbf u}^{n-2}
+  \right]
+  \cdot \nabla
+  \left[
+    \left(1+\frac{k_n}{k_{n-1}}\right)T^{n-1}-\frac{k_n}{k_{n-1}}T^{n-2}
+  \right]
+  \\
+  &&
+  +
+  k_n\gamma.
+ at f}
+
+After settling all these issues, the weak form follows naturally from
+the strong form shown in the last equation, and we immediately arrive
+at the weak form of the discretized equations:
+ at f{eqnarray*}
+  \frac{2k_n+k_{n-1}}{k_n+k_{n-1}} (\tau_h,T_h^n)
+  +
+  k_n (\nabla \tau_h, \kappa \nabla T_h^n)
+  &=&
+  \biggl(\tau_h,
+  \frac{k_n+k_{n-1}}{k_{n-1}} T_h^{n-1}
+  -
+  \frac{k_n^2}{k_{n-1}(k_n+k_{n-1})} T_h^{n-2}
+  \\
+  &&\qquad
+  -
+  k_n
+  \left[
+    \left(1+\frac{k_n}{k_{n-1}}\right){\mathbf u}^{n-1} -
+    \frac{k_n}{k_{n-1}}{\mathbf u}^{n-2}
+  \right]
+  \cdot \nabla
+  \left[
+    \left(1+\frac{k_n}{k_{n-1}}\right)T^{n-1}-\frac{k_n}{k_{n-1}}T^{n-2}
+  \right]
+  +
+  k_n\gamma \biggr)
+  \\
+  &&
+  -
+  k_n \left(\nabla \tau_h,
+    \nu_\alpha\left(\frac 12 T_h^{n-1}+\frac 12 T_h^{n-2}\right)
+    \ \nabla \left[
+    \left(1+\frac{k_n}{k_{n-1}}\right)T^{n-1}-\frac{k_n}{k_{n-1}}T^{n-2}
+  \right]
+  \right)
+ at f}
+for all discrete test functions $\tau_h$. Here, the diffusion term has been
+integrated by parts, and we have used that we will impose no thermal flux,
+$\mathbf{n}\cdot\kappa\nabla T|_{\partial\Omega}=0$.
+
+This then results in a
+matrix equation of form
+ at f{eqnarray*}
+  \left( \frac{2k_n+k_{n-1}}{k_n+k_{n-1}} M+k_n A_T\right) T_h^n
+  = F(U_h^{n-1}, U_h^{n-2},T_h^{n-1},T_h^{n-2}),
+ at f}
+which given the structure of matrix on the left (the sum of two
+positive definite matrices) is easily solved using the Conjugate
+Gradient method.
+
+
+
+<h4>Linear solvers</h4>
+
+As explained above, our approach to solving the joint system for
+velocities/pressure on the one hand and temperature on the other is to use an
+operator splitting where we first solve the Stokes system for the velocities
+and pressures using the old temperature field, and then solve for the new
+temperature field using the just computed velocity field.
+
+
+<h5>Linear solvers for the Stokes problem</h5>
+
+Solving the linear equations coming from the Stokes system has been
+discussed in great detail in step-22. In particular, in
+the results section of that program, we have discussed a number of
+alternative linear solver strategies that turned out to be more
+efficient than the original approach. The best alternative
+identified there we to use a GMRES solver preconditioned by a block
+matrix involving the Schur complement. Specifically, the Stokes
+operator leads to a block structured matrix
+ at f{eqnarray*}
+  \left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+ at f}
+and as discussed there a good preconditioner is
+ at f{eqnarray*}
+  P
+  =
+  \left(\begin{array}{cc}
+    A & 0 \\ B & -S
+  \end{array}\right),
+  \qquad
+  \text{or equivalently}
+  \qquad
+  P^{-1}
+  =
+  \left(\begin{array}{cc}
+    A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
+  \end{array}\right)
+ at f}
+where <i>S</i> is the Schur complement of the Stokes operator
+$S=B^TA^{-1}B$. Of course, this preconditioner is not useful because we
+can't form the various inverses of matrices, but we can use the
+following as a preconditioner:
+ at f{eqnarray*}
+  \tilde P^{-1}
+  =
+  \left(\begin{array}{cc}
+    \tilde A^{-1} & 0 \\ \tilde S^{-1} B \tilde A^{-1} & -\tilde S^{-1}
+  \end{array}\right)
+ at f}
+where $\tilde A^{-1},\tilde S^{-1}$ are approximations to the inverse
+matrices. In particular, it turned out that <i>S</i> is spectrally
+equivalent to the mass matrix and consequently replacing $\tilde
+S^{-1}$ by a CG solver applied to the mass matrix on the pressure
+space was a good choice. In a small deviation from step-22, we
+here have a coefficient $\eta$ in the momentum equation, and by the same
+derivation as there we should arrive at the conclusion that it is the weighted
+mass matrix with entries $\tilde S_{ij}=(\eta^{-1}\varphi_i,\varphi_j)$ that
+we should be using.
+
+It was more complicated to come up with a good replacement $\tilde
+A^{-1}$, which corresponds to the discretized symmetric Laplacian of
+the vector-valued velocity field, i.e.
+$A_{ij} = (\varepsilon {\mathbf v}_i, 2\eta \varepsilon ({\mathbf
+v}_j))$.
+In step-22 we used a sparse LU decomposition (using the
+SparseDirectUMFPACK class) of <i>A</i> for $\tilde A^{-1}$ — the
+perfect preconditioner — in 2d, but for 3d memory and compute
+time is not usually sufficient to actually compute this decomposition;
+consequently, we only use an incomplete LU decomposition (ILU, using
+the SparseILU class) in 3d.
+
+For this program, we would like to go a bit further. To this end, note
+that the symmetrized bilinear form on vector fields,
+$(\varepsilon {\mathbf v}_i, 2 \eta \varepsilon ({\mathbf v}_j))$
+is not too far away from the nonsymmetrized version,
+$(\nabla {\mathbf v}_i, \eta \nabla {\mathbf v}_j)
+= \sum_{k,l=1}^d
+  (\partial_k ({\mathbf v}_i)_l, \eta \partial_k ({\mathbf v}_j)_l)
+$ (note that the factor 2 has disappeared in this form). The latter,
+however, has the advantage that the <code>dim</code> vector components
+of the test functions are not coupled (well, almost, see below),
+i.e. the resulting matrix is block-diagonal: one block for each vector
+component, and each of these blocks is equal to the Laplace matrix for
+this vector component. So assuming we order degrees of freedom in such
+a way that first all <i>x</i>-components of the velocity are numbered, then
+the <i>y</i>-components, and then the <i>z</i>-components, then the matrix
+$\hat A$ that is associated with this slightly different bilinear form has
+the form
+ at f{eqnarray*}
+  \hat A =
+  \left(\begin{array}{ccc}
+    A_s & 0 & 0 \\ 0 & A_s & 0 \\ 0 & 0 & A_s
+  \end{array}\right)
+ at f}
+where $A_s$ is a Laplace matrix of size equal to the number of shape functions
+associated with each component of the vector-valued velocity. With this
+matrix, one could be tempted to define our preconditioner for the
+velocity matrix <i>A</i> as follows:
+ at f{eqnarray*}
+  \tilde A^{-1} =
+  \left(\begin{array}{ccc}
+    \tilde A_s^{-1} & 0 & 0 \\
+    0 & \tilde A_s^{-1} & 0 \\
+    0 & 0 & \tilde A_s^{-1}
+  \end{array}\right),
+ at f}
+where $\tilde A_s^{-1}$ is a preconditioner for the Laplace matrix —
+something where we know very well how to build good preconditioners!
+
+In reality, the story is not quite as simple: To make the matrix
+$\tilde A$ definite, we need to make the individual blocks $\tilde
+A_s$ definite by applying boundary conditions. One can try to do so by
+applying Dirichlet boundary conditions all around the boundary, and
+then the so-defined preconditioner $\tilde A^{-1}$ turns out to be a
+good preconditioner for <i>A</i> if the latter matrix results from a Stokes
+problem where we also have Dirichlet boundary conditions on the
+velocity components all around the domain, i.e. if we enforce <b>u</b>=0.
+
+Unfortunately, this "if" is an "if and only if": in the program below
+we will want to use no-flux boundary conditions of the form $\mathbf u
+\cdot \mathbf n = 0$ (i.e. flow %parallel to the boundary is allowed,
+but no flux through the boundary). In this case, it turns out that the
+block diagonal matrix defined above is not a good preconditioner
+because it neglects the coupling of components at the boundary. A
+better way to do things is therefore if we build the matrix $\hat A$
+as the vector Laplace matrix $\hat A_{ij} = (\nabla {\mathbf v}_i,
+\eta \nabla {\mathbf v}_j)$ and then apply the same boundary condition
+as we applied to <i>A</i>. If this is a Dirichlet boundary condition all
+around the domain, the $\hat A$ will decouple to three diagonal blocks
+as above, and if the boundary conditions are of the form $\mathbf u
+\cdot \mathbf n = 0$ then this will introduce a coupling of degrees of
+freedom at the boundary but only there. This, in fact, turns out to be
+a much better preconditioner than the one introduced above, and has
+almost all the benefits of what we hoped to get.
+
+
+To sum this whole story up, we can observe:
+<ul>
+  <li> Compared to building a preconditioner from the original matrix <i>A</i>
+  resulting from the symmetric gradient as we did in step-22,
+  we have to expect that the preconditioner based on the Laplace bilinear form
+  performs worse since it does not take into account the coupling between
+  vector components.
+
+  <li>On the other hand, preconditioners for the Laplace matrix are typically
+  more mature and perform better than ones for vector problems. For example,
+  at the time of this writing, Algebraic %Multigrid (AMG) algorithms are very
+  well developed for scalar problems, but not so for vector problems.
+
+  <li>In building this preconditioner, we will have to build up the
+  matrix $\hat A$ and its preconditioner. While this means that we
+  have to store an additional matrix we didn't need before, the
+  preconditioner $\tilde A_s^{-1}$ is likely going to need much less
+  memory than storing a preconditioner for the coupled matrix
+  <i>A</i>. This is because the matrix $A_s$ has only a third of the
+  entries per row for all rows corresponding to interior degrees of
+  freedom, and contains coupling between vector components only on
+  those parts of the boundary where the boundary conditions introduce
+  such a coupling. Storing the matrix is therefore comparatively
+  cheap, and we can expect that computing and storing the
+  preconditioner $\tilde A_s$ will also be much cheaper compared to
+  doing so for the fully coupled matrix.
+</ul>
+
+
+
+<h5>Linear solvers for the temperature equation</h5>
+
+This is the easy part: The matrix for the temperature equation has the form
+$\alpha M + \beta A$, where $M,A$ are mass and stiffness matrices on the
+temperature space, and $\alpha,\beta$ are constants related the time stepping
+scheme and the current and previous time step. This being the sum of a
+symmetric positive definite and a symmetric positive semidefinite matrix, the
+result is also symmetric positive definite. Furthermore, $\frac\beta\alpha$ is
+a number proportional to the time step, and so becomes small whenever the mesh
+is fine, damping the effect of the then ill-conditioned stiffness matrix.
+
+As a consequence, inverting this matrix with the Conjugate Gradient algorithm,
+using a simple preconditioner, is trivial and very cheap compared to inverting
+the Stokes matrix.
+
+
+
+<h3>Implementation details</h3>
+
+<h4>Using different DoFHandler objects</h4>
+
+One of the things worth explaining up front about the program below is the use
+of two different DoFHandler objects. If one looks at the structure of the
+equations above and the scheme for their solution, one realizes that there is
+little commonality that keeps the Stokes part and the temperature part
+together. In all previous tutorial programs in which we have discussed @ref
+vector_valued "vector-valued problems" we have always only used a single
+finite element with several vector components, and a single DoFHandler object.
+Sometimes, we have substructured the resulting matrix into blocks to
+facilitate particular solver schemes; this was, for example, the case in the
+step-22 program for the Stokes equations upon which the current
+program is based.
+
+We could of course do the same here. The linear system that we would get would
+look like this:
+ at f{eqnarray*}
+  \left(\begin{array}{ccc}
+    A & B^T & 0 \\ B & 0 &0 \\ C & 0 & K
+  \end{array}\right)
+  \left(\begin{array}{ccc}
+    U^{n-1} \\ P^{n-1} \\ T^n
+  \end{array}\right)
+  =
+  \left(\begin{array}{ccc}
+    F_U(T^{n-1}) \\ 0 \\ F_T(U^{n-1},U^{n-2},T^{n-1},T^{n-2})
+  \end{array}\right).
+ at f}
+The problem with this is: We never use the whole matrix at the same time. In
+fact, it never really exists at the same time: As explained above, $K$ and
+$F_T$ depend on the already computed solution $U^n$, in the first case through
+the time step (that depends on $U^n$ because it has to satisfy a CFL
+condition). So we can only assemble it once we've already solved the top left
+$2\times 2$ block Stokes system, and once we've moved on to the temperature
+equation we don't need the Stokes part any more; the fact that we
+build an object for a matrix that never exists as a whole in memory at
+any given time led us to jumping through some hoops in step-21, so
+let's not repeat this sort of error. Furthermore, we don't
+actually build the matrix $C$: Because by the time we get to the temperature
+equation we already know $U^n$, and because we have to assemble the right hand
+side $F_T$ at this time anyway, we simply move the term $CU^n$ to the right
+hand side and assemble it along with all the other terms there. What this
+means is that there does not remain a part of the matrix where temperature
+variables and Stokes variables couple, and so a global enumeration of all
+degrees of freedom is no longer important: It is enough if we have an
+enumeration of all Stokes degrees of freedom, and of all temperature degrees
+of freedom independently.
+
+In essence, there is consequently not much use in putting <i>everything</i>
+into a block matrix (though there are of course the same good reasons to do so
+for the $2\times 2$ Stokes part), or, for that matter, in putting everything
+into the same DoFHandler object.
+
+But are there <i>downsides</i> to doing so? These exist, though they may not
+be obvious at first. The main problem is that if we need to create one global
+finite element that contains velocity, pressure, and temperature shape
+functions, and use this to initialize the DoFHandler. But we also use this
+finite element object to initialize all FEValues or FEFaceValues objects that
+we use. This may not appear to be that big a deal, but imagine what happens
+when, for example, we evaluate the residual
+$
+  R_\alpha(T)
+  =
+  \left(
+  \frac{\partial T}{\partial t}
+  +
+  {\mathbf u} \cdot \nabla T
+  -
+  \nabla \cdot \kappa \nabla T - \gamma
+  \right)
+  T^{\alpha-1}
+$
+that we need to compute the artificial viscosity $\nu_\alpha(T)|_K$.  For
+this, we need the Laplacian of the temperature, which we compute using the
+tensor of second derivatives (Hessians) of the shape functions (we have to
+give the <code>update_hessians</code> flag to the FEValues object for
+this). Now, if we have a finite that contains the shape functions for
+velocities, pressures, and temperatures, that means that we have to compute
+the Hessians of <i>all</i> shape functions, including the many higher order
+shape functions for the velocities. That's a lot of computations that we don't
+need, and indeed if one were to do that (as we had in an early version of the
+program), assembling the right hand side took about a quarter of the overall
+compute time.
+
+So what we will do is to use two different finite element objects, one for the
+Stokes components and one for the temperatures. With this come two different
+DoFHandlers, two sparsity patterns and two matrices for the Stokes and
+temperature parts, etc. And whenever we have to assemble something that
+contains both temperature and Stokes shape functions (in particular the right
+hand sides of Stokes and temperature equations), then we use two FEValues
+objects initialized with two cell iterators that we walk in %parallel through
+the two DoFHandler objects associated with the same Triangulation object; for
+these two FEValues objects, we use of course the same quadrature objects so
+that we can iterate over the same set of quadrature points, but each FEValues
+object will get update flags only according to what it actually needs to
+compute. In particular, when we compute the residual as above, we only ask for
+the values of the Stokes shape functions, but also the Hessians of the
+temperature shape functions — much cheaper indeed, and as it turns out:
+assembling the right hand side of the temperature equation is now a component
+of the program that is hardly measurable.
+
+With these changes, timing the program yields that only the following
+operations are relevant for the overall run time:
+<ul>
+  <li>Solving the Stokes system: 72% of the run time.
+  <li>Assembling the Stokes preconditioner and computing the algebraic
+      multigrid hierarchy using the Trilinos ML package: 11% of the
+      run time.
+  <li>The function <code>BoussinesqFlowProblem::setup_dofs</code>: 7%
+      of overall run time.
+  <li>Assembling the Stokes and temperature right hand side vectors as
+      well as assembling the matrices: 7%.
+</ul>
+In essence this means that all bottlenecks apart from the algebraic
+multigrid have been removed.
+
+
+
+<h4>Using Trilinos</h4>
+
+In much the same way as we used PETSc to support our linear algebra needs in
+step-17 and step-18, we use interfaces to the <a
+href="http://trilinos.sandia.gov">Trilinos</a> library (see the
+deal.II README file for installation instructions) in this program. Trilinos
+is a very large collection of
+everything that has to do with linear and nonlinear algebra, as well as all
+sorts of tools around that (and looks like it will grow in many other
+directions in the future as well).
+
+The main reason for using Trilinos, similar to our exploring PETSc, is that it
+is a very powerful library that provides a lot more tools than deal.II's own
+linear algebra library. That includes, in particular, the ability to work in
+%parallel on a cluster, using MPI, and a wider variety of preconditioners. In
+the latter class, one of the most interesting capabilities is the existence of
+the Trilinos ML package that implements an Algebraic Multigrid (AMG)
+method. We will use this preconditioner to precondition the second order
+operator part of the momentum equation. The ability to solve problems in
+%parallel will be explored in step-32, using the same problem as
+discussed here.
+
+PETSc, which we have used in step-17 and step-18, is certainly a powerful
+library, providing a large number of functions that deal with matrices,
+vectors, and iterative solvers and preconditioners, along with lots of other
+stuff, most of which runs quite well in %parallel. It is, however, a few years
+old already than Trilinos, written in C, and generally not quite as easy to
+use as some other libraries. As a consequence, deal.II has also acquired
+interfaces to Trilinos, which shares a lot of the same functionality with
+PETSc. It is, however, a project that is several years younger, is written in
+C++ and by people who generally have put a significant emphasis on software
+design.
+
+
+<h3>The testcase</h3>
+
+The case we want to solve here is as follows: we solve the Boussinesq
+equations described above with $\kappa=10^{-6}, \eta=1, \rho=1, \beta=10$,
+i.e. a relatively slow moving fluid that has virtually no thermal diffusive
+conductivity and transports heat mainly through convection. On the
+boundary, we will require no-normal flux for the velocity
+($\mathrm{n}\cdot\mathrm{u}=0$) and for the temperature
+($\mathrm{n}\cdot\nabla T=0$). This is one of the cases discussed in the
+introduction of step-22 and fixes one component of the velocity
+while allowing flow to be %parallel to the boundary. There remain
+<code>dim-1</code> components to be fixed, namely the tangential components of
+the normal stress; for these, we choose homogeneous conditions which means that
+we do not have to anything special. Initial conditions are only necessary for
+the temperature field, and we choose it to be constant zero.
+
+The evolution of the problem is then entirely driven by the right hand side
+$\gamma(\mathrm{x},t)$ of the temperature equation, i.e. by heat sources and
+sinks. Here, we choose a setup invented in advance of a Christmas lecture:
+real candles are of course prohibited in U.S. class rooms, but virtual ones
+are allowed. We therefore choose three spherical heat sources unequally spaced
+close to the bottom of the domain, imitating three candles. The fluid located
+at these sources, initially at rest, is then heated up and as the temperature
+rises gains buoyancy, rising up; more fluid is dragged up and through the
+sources, leading to three hot plumes that rise up until they are captured by
+the recirculation of fluid that sinks down on the outside, replacing the air
+that rises due to heating.
diff --git a/examples/step-31/doc/kind b/examples/step-31/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-31/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-31/doc/results.dox b/examples/step-31/doc/results.dox
new file mode 100644
index 0000000..acccb42
--- /dev/null
+++ b/examples/step-31/doc/results.dox
@@ -0,0 +1,614 @@
+<h1>Results</h1>
+
+<h3> Results in 2d </h3>
+
+When you run the program in 2d, the output will look something like
+this:
+<code>
+<pre>
+Number of active cells: 256 (on 5 levels)
+Number of degrees of freedom: 3556 (2178+289+1089)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 0.976562
+   19 CG iterations for temperature.
+   Temperature range: -0.17714 1.38103
+
+Number of active cells: 280 (on 6 levels)
+Number of degrees of freedom: 4062 (2490+327+1245)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 0.488281
+   19 CG iterations for temperature.
+   Temperature range: -0.10423 0.635684
+
+Number of active cells: 520 (on 7 levels)
+Number of degrees of freedom: 7432 (4562+589+2281)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 0.244141
+   18 CG iterations for temperature.
+   Temperature range: -0.0583624 0.312553
+
+Number of active cells: 1072 (on 8 levels)
+Number of degrees of freedom: 15294 (9398+1197+4699)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 0.12207
+   17 CG iterations for temperature.
+   Temperature range: -0.0288897 0.166335
+
+Number of active cells: 2128 (on 9 levels)
+Number of degrees of freedom: 30270 (18614+2349+9307)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 0.0610352
+   17 CG iterations for temperature.
+   Temperature range: -0.0156055 0.0781987
+
+Timestep 1:  t=0.0610352
+   Assembling...
+   Solving...
+   33 GMRES iterations for Stokes subsystem.
+   Time step: 0.0610352
+   16 CG iterations for temperature.
+   Temperature range: -0.0285735 0.153308
+
+...
+</pre>
+</code>
+
+In the beginning we refine the mesh several times adaptively and
+always return to time step zero to restart on the newly refined
+mesh. Only then do we start the actual time iteration.
+
+The program runs for a while. The temperature field for time steps 0,
+500, 1000, 1500, 2000, 3000, 4000, and 5000 looks like this (note that
+the color scale used for the temperature is not always the same):
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.00.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.01.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.02.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.03.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.04.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.05.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.06.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.solution.07.png" alt="">
+    </td>
+  </tr>
+</table>
+
+As can be seen, we have three heat sources that heat fluid and
+therefore produce a buoyancy effect that lets hots pockets of fluid
+rise up and swirl around. By a chimney effect, the three streams are
+pressed together by fluid that comes from the outside and wants to
+join the updraft party. Note that because the fluid is initially at
+rest, those parts of the fluid that were initially over the sources
+receive a longer heating time than that fluid that is later dragged
+over the source by the fully developed flow field. It is therefore
+hotter, a fact that can be seen in the red tips of the three
+plumes. Note also the relatively fine features of the flow field, a
+result of the sophisticated transport stabilization of the temperature
+equation we have chosen.
+
+In addition to the pictures above, the following ones show the
+adaptive mesh and the flow field at the same time steps:
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.00.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.01.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.02.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.03.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.04.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.05.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.06.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.2d.grid.07.png" alt="">
+    </td>
+  </tr>
+</table>
+
+
+<h3> Results in 3d </h3>
+
+The same thing can of course be done in 3d by changing the template
+parameter to the BoussinesqFlowProblem object in <code>main()</code>
+from 2 to 3, so that the output now looks like follows:
+
+<code>
+<pre>
+Number of active cells: 64 (on 3 levels)
+Number of degrees of freedom: 3041 (2187+125+729)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 2.60417
+   22 CG iterations for temperature.
+   Temperature range: -0.717838 5.25595
+
+Number of active cells: 288 (on 4 levels)
+Number of degrees of freedom: 12379 (8943+455+2981)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 1.30208
+   27 CG iterations for temperature.
+   Temperature range: -0.560342 2.39811
+
+Number of active cells: 1296 (on 5 levels)
+Number of degrees of freedom: 51497 (37305+1757+12435)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 0.651042
+   28 CG iterations for temperature.
+   Temperature range: -0.527408 0.900169
+
+Number of active cells: 5104 (on 6 levels)
+Number of degrees of freedom: 194273 (140913+6389+46971)
+
+Timestep 0:  t=0
+   Assembling...
+   Rebuilding Stokes preconditioner...
+   Solving...
+   0 GMRES iterations for Stokes subsystem.
+   Time step: 0.325521
+   28 CG iterations for temperature.
+   Temperature range: -0.283877 0.528447
+
+Timestep 1:  t=0.325521
+   Assembling...
+   Solving...
+   55 GMRES iterations for Stokes subsystem.
+   Time step: 0.325521
+   26 CG iterations for temperature.
+   Temperature range: -0.488053 1.02071
+
+...
+</pre>
+</code>
+
+Visualizing the temperature isocontours at time steps 0,
+50, 100, 150, 200, 300, 400, 500, 600, 700, and 800 yields the
+following plots:
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.00.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.01.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.02.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.03.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.04.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.05.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.06.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.07.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.08.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.09.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.3d.solution.10.png" alt="">
+    </td>
+    <td>
+    </td>
+  </tr>
+</table>
+
+That the first picture looks like three hedgehogs stems from
+the fact that our scheme essentially projects the source times the
+first time step size onto the mesh to obtain the temperature field in
+the first time step. Since the source function is discontinuous, we
+need to expect over- and undershoots from this project. This is in
+fact what happens (it's easier to check this in 2d) and leads to the
+crumpled appearance of the isosurfaces.
+
+
+
+<h3> Numerical experiments to determine optimal parameters </h3>
+
+The program as is has three parameters that we don't have much of a
+theoretical handle on how to choose in an optimal way. These are:
+<ul>
+  <li>The time step must satisfy a CFL condition
+      $k\le \min_K \frac{c_kh_K}{\|\mathbf{u}\|_{L^\infty(K)}}$. Here, $c_k$ is
+      dimensionless, but what is the right value?
+  <li>In the computation of the artificial viscosity,
+ at f{eqnarray*}
+  \nu_\alpha(T)|_K
+  =
+  \beta
+  \|\mathbf{u}\|_{L^\infty(K)}
+  \min\left\{
+    h_K,
+    h_K^\alpha
+    \frac{\|R_\alpha(T)\|_{L^\infty(K)}}{c(\mathbf{u},T)}
+  \right\},
+ at f}
+      with $c(\mathbf{u},T) =
+      c_R\ \|\mathbf{u}\|_{L^\infty(\Omega)} \ \mathrm{var}(T)
+      \ |\mathrm{diam}(\Omega)|^{\alpha-2}$.
+      Here, the choice of the dimensionless %numbers $\beta,c_R$ is of
+      interest.
+</ul>
+In all of these cases, we will have to expect that the correct choice of each
+value depends on that of the others, and most likely also on the space
+dimension and polynomial degree of the finite element used for the
+temperature. Below we'll discuss a few numerical experiments to choose
+constants $c_k$ and $\beta$.
+
+Below, we will not discuss the choice of $c_R$. In the program, we set
+it to $c_R=2^{\frac{4-2\alpha}{d}}$. The reason for this value is a
+bit complicated and has more to do with the history of the program
+than reasoning: while the correct formula for the global scaling
+parameter $c(\mathbf{u},T)$ is shown above, the program (including the
+version shipped with deal.II 6.2) initially had a bug in that we
+computed
+$c(\mathbf{u},T) =
+      \|\mathbf{u}\|_{L^\infty(\Omega)} \ \mathrm{var}(T)
+      \ \frac{1}{|\mathrm{diam}(\Omega)|^{\alpha-2}}$ instead, where
+we had set the scaling parameter to one. Since we only computed on the
+unit square/cube where $\mathrm{diam}(\Omega)=2^{1/d}$, this was
+entirely equivalent to using the correct formula with
+$c_R=\left(2^{1/d}\right)^{4-2\alpha}=2^{\frac{4-2\alpha}{d}}$. Since
+this value for $c_R$ appears to work just fine for the current
+program, we corrected the formula in the program and set $c_R$ to a
+value that reproduces exactly the results we had before. We will,
+however, revisit this issue again in step-32.
+
+Now, however, back to the discussion of what values of $c_k$ and
+$\beta$ to choose:
+
+
+<h4> Choosing <i>c<sub>k</sub></i> and beta </h4>
+
+These two constants are definitely linked in some way. The reason is easy to
+see: In the case of a pure advection problem,
+$\frac{\partial T}{\partial t} + \mathbf{u}\cdot\nabla T = \gamma$, any
+explicit scheme has to satisfy a CFL condition of the form
+$k\le \min_K \frac{c_k^a h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$. On the other hand,
+for a pure diffusion problem,
+$\frac{\partial T}{\partial t} + \nu \Delta T = \gamma$,
+explicit schemes need to satisfy a condition
+$k\le \min_K \frac{c_k^d h_K^2}{\nu}$. So given the form of $\nu$ above, an
+advection diffusion problem like the one we have to solve here will result in
+a condition of the form
+$
+k\le \min_K \min \left\{
+  \frac{c_k^a h_K}{\|\mathbf{u}\|_{L^\infty(K)}},
+  \frac{c_k^d h_K^2}{\beta \|\mathbf{u}\|_{L^\infty(K)} h_K}\right\}
+  =
+  \min_K \left( \min \left\{
+  c_k^a,
+  \frac{c_k^d}{\beta}\right\}
+  \frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}} \right)
+$.
+It follows that we have to face the fact that we might want to choose $\beta$
+larger to improve the stability of the numerical scheme (by increasing the
+amount of artificial diffusion), but we have to pay a price in the form of
+smaller, and consequently more time steps. In practice, one would therefore
+like to choose $\beta$ as small as possible to keep the transport problem
+sufficiently stabilized while at the same time trying to choose the time step
+as large as possible to reduce the overall amount of work.
+
+The find the right balance, the only way is to do a few computational
+experiments. Here's what we did: We modified the program slightly to allow
+less mesh refinement (so we don't always have to wait that long) and to choose
+$
+  \nu(T)|_K
+  =
+  \beta
+  \|\mathbf{u}\|_{L^\infty(K)} h_K
+$ to eliminate the effect of of the constant $c_R$ (we know that
+solutions are stable by using this version of $\nu(T)$ as an artificial
+viscosity, but that we can improve things -- i.e. make the solution
+sharper -- by using the more complicated formula for this artificial
+viscosity). We then run the program
+for different values $c_k,\beta$ and observe maximal and minimal temperatures
+in the domain. What we expect to see is this: If we choose the time step too
+big (i.e. choose a $c_k$ bigger than theoretically allowed) then we will get
+exponential growth of the temperature. If we choose $\beta$ too small, then
+the transport stabilization becomes insufficient and the solution will show
+significant oscillations but not exponential growth.
+
+
+<h5>Results for Q<sub>1</sub> elements</h5>
+
+Here is what we get for
+$\beta=0.01, \beta=0.1$, and $\beta=0.5$, different choices of $c_k$, and
+bilinear elements (<code>temperature_degree=1</code>) in 2d:
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.timestep.q1.beta=0.01.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.timestep.q1.beta=0.03.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.timestep.q1.beta=0.1.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.timestep.q1.beta=0.5.png" alt="">
+    </td>
+  </tr>
+</table>
+
+The way to interpret these graphs goes like this: for $\beta=0.01$ and
+$c_k=\frac 12,\frac 14$, we see exponential growth or at least large
+variations, but if we choose
+$k=\frac 18\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$
+or smaller, then the scheme is
+stable though a bit wobbly. For more artificial diffusion, we can choose
+$k=\frac 14\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$
+or smaller for $\beta=0.03$,
+$k=\frac 13\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$
+or smaller for $\beta=0.1$, and again need
+$k=\frac 1{15}\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$
+for $\beta=0.5$ (this time because much diffusion requires a small time
+step).
+
+So how to choose? If we were simply interested in a large time step, then we
+would go with $\beta=0.1$ and
+$k=\frac 13\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$.
+On the other hand, we're also interested in accuracy and here it may be of
+interest to actually investigate what these curves show. To this end note that
+we start with a zero temperature and that our sources are positive — so
+we would intuitively expect that the temperature can never drop below
+zero. But it does, a consequence of Gibb's phenomenon when using continuous
+elements to approximate a discontinuous solution. We can therefore see that
+choosing $\beta$ too small is bad: too little artificial diffusion leads to
+over- and undershoots that aren't diffused away. On the other hand, for large
+$\beta$, the minimum temperature drops below zero at the beginning but then
+quickly diffuses back to zero.
+
+On the other hand, let's also look at the maximum temperature. Watching the
+movie of the solution, we see that initially the fluid is at rest. The source
+keeps heating the same volume of fluid whose temperature increases linearly at
+the beginning until its buoyancy is able to move it upwards. The hottest part
+of the fluid is therefore transported away from the solution and fluid taking
+its place is heated for only a short time before being moved out of the source
+region, therefore remaining cooler than the initial bubble. If $\kappa=0$
+(in the program it is nonzero but very small) then the hottest part of the
+fluid should be advected along with the flow with its temperature
+constant. That's what we can see in the graphs with the smallest $\beta$: Once
+the maximum temperature is reached, it hardly changes any more. On the other
+hand, the larger the artificial diffusion, the more the hot spot is
+diffused. Note that for this criterion, the time step size does not play a
+significant role.
+
+So to sum up, likely the best choice would appear to be $\beta=0.03$
+and $k=\frac 14\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$. The curve is
+a bit wobbly, but overall pictures looks pretty reasonable with the
+exception of some over and undershoots close to the start time due to
+Gibb's phenomenon.
+
+
+<h5>Results for Q<sub>2</sub> elements</h5>
+
+One can repeat the same sequence of experiments for higher order
+elements as well. Here are the graphs for bi-quadratic shape functions
+(<code>temperature_degree=2</code>) for the temperature, while we
+retain the $Q_2/Q_1$ stable Taylor-Hood element for the Stokes system:
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.timestep.q2.beta=0.01.png" alt="">
+    </td>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.timestep.q2.beta=0.03.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-31.timestep.q2.beta=0.1.png" alt="">
+    </td>
+  </tr>
+</table>
+
+Again, small values of $\beta$ lead to less diffusion but we have to
+choose the time step very small to keep things under control. Too
+large values of $\beta$ make for more diffusion, but again require
+small time steps. The best value would appear to be $\beta=0.03$, as
+for the $Q_1$ element, and then we have to choose
+$k=\frac 18\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$ — exactly
+half the size for the $Q_1$ element, a fact that may not be surprising
+if we state the CFL condition as the requirement that the time step be
+small enough so that the distance transport advects in each time step
+is no longer than one <i>grid point</i> away (which for $Q_1$ elements
+is $h_K$, but for $Q_2$ elements is $h_K/2$). It turns out that $\beta$
+needs to be slightly larger for obtaining stable results also late in
+the simulation at times larger than 60, so we actually choose it as
+$\beta = 0.034$ in the code.
+
+
+<h5>Results for 3d</h5>
+
+One can repeat these experiments in 3d and find the optimal time step
+for each value of $\beta$ and find the best value of $\beta$. What one
+finds is that for the same $\beta$ already used in 2d, the time steps
+needs to be a bit smaller, by around a factor of 1.2 or so. This is
+easily explained: the time step restriction is
+$k=\min_K \frac{ch_K}{\|\mathbf{u}\|_{L^\infty(K)}}$ where $h_K$ is
+the <i>diameter</i> of the cell. However, what is really needed is the
+distance between mesh points, which is $\frac{h_K}{\sqrt{d}}$. So a
+more appropriate form would be
+$k=\min_K \frac{ch_K}{\|\mathbf{u}\|_{L^\infty(K)}\sqrt{d}}$.
+
+The second find is that one needs to choose $\beta$ slightly bigger
+(about $\beta=0.05$ or so). This then again reduces the time step we
+can take.
+
+
+
+
+<h5>Conclusions</h5>
+
+Concluding, from the simple computations above, $\beta=0.034$ appears to be a
+good choice for the stabilization parameter in 2d, and $\beta=0.05$ in 3d. In
+a dimension independent way, we can model this as $\beta=0.017d$. If one does
+longer computations (several thousand time steps) on finer meshes, one
+realizes that the time step size is not quite small enough and that for
+stability one will have to reduce the above values a bit more (by about a
+factor of $\frac 78$).
+
+As a consequence, a formula that reconciles 2d, 3d, and variable polynomial
+degree and takes all factors in account reads as follows:
+ at f{eqnarray*}
+  k =
+  \frac 1{2 \cdot 1.7} \frac 1{\sqrt{d}}
+  \frac 2d
+  \frac 1{q_T}
+  \frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}
+  =
+  \frac 1{1.7 d\sqrt{d}}
+  \frac 1{q_T}
+  \frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}.
+ at f}
+In the first form (in the center of the equation), $\frac
+1{2 \cdot 1.7}$ is a universal constant, $\frac 1{\sqrt{d}}$
+is the factor that accounts for the difference between cell diameter
+and grid point separation,
+$\frac 2d$ accounts for the increase in $\beta$ with space dimension,
+$\frac 1{q_T}$ accounts for the distance between grid points for
+higher order elements, and $\frac{h_K}{\|\mathbf{u}\|_{L^\infty(K)}}$
+for the local speed of transport relative to the cell size. This is
+the formula that we use in the program.
+
+As for the question of whether to use $Q_1$ or $Q_2$ elements for the
+temperature, the following considerations may be useful: First,
+solving the temperature equation is hardly a factor in the overall
+scheme since almost the entire compute time goes into solving the
+Stokes system in each time step. Higher order elements for the
+temperature equation are therefore not a significant drawback. On the
+other hand, if one compares the size of the over- and undershoots the
+solution produces due to the discontinuous source description, one
+notices that for the choice of $\beta$ and $k$ as above, the $Q_1$
+solution dips down to around $-0.47$, whereas the $Q_2$ solution only
+goes to $-0.13$ (remember that the exact solution should never become
+negative at all. This means that the $Q_2$ solution is significantly
+more accurate; the program therefore uses these higher order elements,
+despite the penalty we pay in terms of smaller time steps.
+
+
+<h3> Possible extensions </h3>
+
+There are various ways to extend the current program. Of particular interest
+is, of course, to make it faster and/or increase the resolution of the
+program, in particular in 3d. This is the topic of the step-32
+tutorial program which will implement strategies to solve this problem in
+%parallel on a cluster. It is also the basis of the much larger open
+source code Aspect (see http://aspect.dealii.org/ ) that can solve realistic
+problems and that constitutes the further development of step-32.
+
+Another direction would be to make the fluid flow more realistic. The program
+was initially written to simulate various cases simulating the convection of
+material in the earth's mantle, i.e. the zone between the outer earth core and
+the solid earth crust: there, material is heated from below and cooled from
+above, leading to thermal convection. The physics of this fluid are much more
+complicated than shown in this program, however: The viscosity of mantle
+material is strongly dependent on the temperature, i.e. $\eta=\eta(T)$, with
+the dependency frequently modeled as a viscosity that is reduced exponentially
+with rising temperature. Secondly, much of the dynamics of the mantle is
+determined by chemical reactions, primarily phase changes of the various
+crystals that make up the mantle; the buoyancy term on the right hand side of
+the Stokes equations then depends not only on the temperature, but also on the
+chemical composition at a given location which is advected by the flow field
+but also changes as a function of pressure and temperature. We will
+investigate some of these effects in later tutorial programs as well.
diff --git a/examples/step-31/doc/tooltip b/examples/step-31/doc/tooltip
new file mode 100644
index 0000000..4c363cf
--- /dev/null
+++ b/examples/step-31/doc/tooltip
@@ -0,0 +1 @@
+Boussinesq flow for thermal convection.
diff --git a/examples/step-31/step-31.cc b/examples/step-31/step-31.cc
new file mode 100644
index 0000000..4a0bb5a
--- /dev/null
+++ b/examples/step-31/step-31.cc
@@ -0,0 +1,2247 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2007 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Martin Kronbichler, Uppsala University,
+ *          Wolfgang Bangerth, Texas A&M University 2007, 2008
+ */
+
+
+// @sect3{Include files}
+
+// The first step, as always, is to include the functionality of these
+// well-known deal.II library files and some C++ header files.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/grid_refinement.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/solution_transfer.h>
+
+// Then we need to include some header files that provide vector, matrix, and
+// preconditioner classes that implement interfaces to the respective Trilinos
+// classes. In particular, we will need interfaces to the matrix and vector
+// classes based on Trilinos as well as Trilinos preconditioners:
+#include <deal.II/base/index_set.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_precondition.h>
+
+// Finally, here are a few C++ headers that haven't been included yet by one of
+// the aforelisted header files:
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <limits>
+
+
+// At the end of this top-matter, we import all deal.II names into the global
+// namespace:
+namespace Step31
+{
+  using namespace dealii;
+
+
+  // @sect3{Equation data}
+
+  // Again, the next stage in the program is the definition of the equation
+  // data, that is, the various boundary conditions, the right hand sides and
+  // the initial condition (remember that we're about to solve a
+  // time-dependent system). The basic strategy for this definition is the
+  // same as in step-22. Regarding the details, though, there are some
+  // differences.
+
+  // The first thing is that we don't set any nonhomogeneous boundary
+  // conditions on the velocity, since as is explained in the introduction we
+  // will use no-flux conditions $\mathbf{n}\cdot\mathbf{u}=0$. So what is
+  // left are <code>dim-1</code> conditions for the tangential part of the
+  // normal component of the stress tensor, $\textbf{n} \cdot [p \textbf{1} -
+  // \eta\varepsilon(\textbf{u})]$; we assume homogeneous values for these
+  // components, i.e. a natural boundary condition that requires no specific
+  // action (it appears as a zero term in the right hand side of the weak
+  // form).
+  //
+  // For the temperature <i>T</i>, we assume no thermal energy flux,
+  // i.e. $\mathbf{n} \cdot \kappa \nabla T=0$. This, again, is a boundary
+  // condition that does not require us to do anything in particular.
+  //
+  // Secondly, we have to set initial conditions for the temperature (no
+  // initial conditions are required for the velocity and pressure, since the
+  // Stokes equations for the quasi-stationary case we consider here have no
+  // time derivatives of the velocity or pressure). Here, we choose a very
+  // simple test case, where the initial temperature is zero, and all dynamics
+  // are driven by the temperature right hand side.
+  //
+  // Thirdly, we need to define the right hand side of the temperature
+  // equation. We choose it to be constant within three circles (or spheres in
+  // 3d) somewhere at the bottom of the domain, as explained in the
+  // introduction, and zero outside.
+  //
+  // Finally, or maybe firstly, at the top of this namespace, we define the
+  // various material constants we need ($\eta,\kappa$, density $\rho$ and the
+  // thermal expansion coefficient $\beta$):
+  namespace EquationData
+  {
+    const double eta = 1;
+    const double kappa = 1e-6;
+    const double beta = 10;
+    const double density = 1;
+
+
+    template <int dim>
+    class TemperatureInitialValues : public Function<dim>
+    {
+    public:
+      TemperatureInitialValues () : Function<dim>(1) {}
+
+      virtual double value (const Point<dim>   &p,
+                            const unsigned int  component = 0) const;
+
+      virtual void vector_value (const Point<dim> &p,
+                                 Vector<double>   &value) const;
+    };
+
+
+    template <int dim>
+    double
+    TemperatureInitialValues<dim>::value (const Point<dim> &,
+                                          const unsigned int) const
+    {
+      return 0;
+    }
+
+
+    template <int dim>
+    void
+    TemperatureInitialValues<dim>::vector_value (const Point<dim> &p,
+                                                 Vector<double>   &values) const
+    {
+      for (unsigned int c=0; c<this->n_components; ++c)
+        values(c) = TemperatureInitialValues<dim>::value (p, c);
+    }
+
+
+    template <int dim>
+    class TemperatureRightHandSide : public Function<dim>
+    {
+    public:
+      TemperatureRightHandSide () : Function<dim>(1) {}
+
+      virtual double value (const Point<dim>   &p,
+                            const unsigned int  component = 0) const;
+
+      virtual void vector_value (const Point<dim> &p,
+                                 Vector<double>   &value) const;
+    };
+
+
+    template <int dim>
+    double
+    TemperatureRightHandSide<dim>::value (const Point<dim>  &p,
+                                          const unsigned int component) const
+    {
+      Assert (component == 0,
+              ExcMessage ("Invalid operation for a scalar function."));
+
+      Assert ((dim==2) || (dim==3), ExcNotImplemented());
+
+      static const Point<dim> source_centers[3]
+        = { (dim == 2 ? Point<dim>(.3,.1) : Point<dim>(.3,.5,.1)),
+            (dim == 2 ? Point<dim>(.45,.1) : Point<dim>(.45,.5,.1)),
+            (dim == 2 ? Point<dim>(.75,.1) : Point<dim>(.75,.5,.1))
+          };
+      static const double source_radius
+        = (dim == 2 ? 1./32 : 1./8);
+
+      return ((source_centers[0].distance (p) < source_radius)
+              ||
+              (source_centers[1].distance (p) < source_radius)
+              ||
+              (source_centers[2].distance (p) < source_radius)
+              ?
+              1
+              :
+              0);
+    }
+
+
+    template <int dim>
+    void
+    TemperatureRightHandSide<dim>::vector_value (const Point<dim> &p,
+                                                 Vector<double>   &values) const
+    {
+      for (unsigned int c=0; c<this->n_components; ++c)
+        values(c) = TemperatureRightHandSide<dim>::value (p, c);
+    }
+  }
+
+
+
+  // @sect3{Linear solvers and preconditioners}
+
+  // This section introduces some objects that are used for the solution of
+  // the linear equations of the Stokes system that we need to solve in each
+  // time step. Many of the ideas used here are the same as in step-20, where
+  // Schur complement based preconditioners and solvers have been introduced,
+  // with the actual interface taken from step-22 (in particular the
+  // discussion in the "Results" section of step-22, in which we introduce
+  // alternatives to the direct Schur complement approach). Note, however,
+  // that here we don't use the Schur complement to solve the Stokes
+  // equations, though an approximate Schur complement (the mass matrix on the
+  // pressure space) appears in the preconditioner.
+  namespace LinearSolvers
+  {
+
+    // @sect4{The <code>InverseMatrix</code> class template}
+
+    // This class is an interface to calculate the action of an "inverted"
+    // matrix on a vector (using the <code>vmult</code> operation) in the same
+    // way as the corresponding class in step-22: when the product of an
+    // object of this class is requested, we solve a linear equation system
+    // with that matrix using the CG method, accelerated by a preconditioner
+    // of (templated) class <code>Preconditioner</code>.
+    //
+    // In a minor deviation from the implementation of the same class in
+    // step-22 (and step-20), we make the <code>vmult</code> function take any
+    // kind of vector type (it will yield compiler errors, however, if the
+    // matrix does not allow a matrix-vector product with this kind of
+    // vector).
+    //
+    // Secondly, we catch any exceptions that the solver may have thrown. The
+    // reason is as follows: When debugging a program like this one
+    // occasionally makes a mistake of passing an indefinite or nonsymmetric
+    // matrix or preconditioner to the current class. The solver will, in that
+    // case, not converge and throw a run-time exception. If not caught here
+    // it will propagate up the call stack and may end up in
+    // <code>main()</code> where we output an error message that will say that
+    // the CG solver failed. The question then becomes: Which CG solver? The
+    // one that inverted the mass matrix? The one that inverted the top left
+    // block with the Laplace operator? Or a CG solver in one of the several
+    // other nested places where we use linear solvers in the current code? No
+    // indication about this is present in a run-time exception because it
+    // doesn't store the stack of calls through which we got to the place
+    // where the exception was generated.
+    //
+    // So rather than letting the exception propagate freely up to
+    // <code>main()</code> we realize that there is little that an outer
+    // function can do if the inner solver fails and rather convert the
+    // run-time exception into an assertion that fails and triggers a call to
+    // <code>abort()</code>, allowing us to trace back in a debugger how we
+    // got to the current place.
+    template <class Matrix, class Preconditioner>
+    class InverseMatrix : public Subscriptor
+    {
+    public:
+      InverseMatrix (const Matrix         &m,
+                     const Preconditioner &preconditioner);
+
+
+      template <typename VectorType>
+      void vmult (VectorType       &dst,
+                  const VectorType &src) const;
+
+    private:
+      const SmartPointer<const Matrix> matrix;
+      const Preconditioner &preconditioner;
+    };
+
+
+    template <class Matrix, class Preconditioner>
+    InverseMatrix<Matrix,Preconditioner>::
+    InverseMatrix (const Matrix &m,
+                   const Preconditioner &preconditioner)
+      :
+      matrix (&m),
+      preconditioner (preconditioner)
+    {}
+
+
+
+    template <class Matrix, class Preconditioner>
+    template <typename VectorType>
+    void
+    InverseMatrix<Matrix,Preconditioner>::
+    vmult (VectorType       &dst,
+           const VectorType &src) const
+    {
+      SolverControl solver_control (src.size(), 1e-7*src.l2_norm());
+      SolverCG<VectorType> cg (solver_control);
+
+      dst = 0;
+
+      try
+        {
+          cg.solve (*matrix, dst, src, preconditioner);
+        }
+      catch (std::exception &e)
+        {
+          Assert (false, ExcMessage(e.what()));
+        }
+    }
+
+    // @sect4{Schur complement preconditioner}
+
+    // This is the implementation of the Schur complement preconditioner as
+    // described in detail in the introduction. As opposed to step-20 and
+    // step-22, we solve the block system all-at-once using GMRES, and use the
+    // Schur complement of the block structured matrix to build a good
+    // preconditioner instead.
+    //
+    // Let's have a look at the ideal preconditioner matrix
+    // $P=\left(\begin{array}{cc} A & 0 \\ B & -S \end{array}\right)$
+    // described in the introduction. If we apply this matrix in the solution
+    // of a linear system, convergence of an iterative GMRES solver will be
+    // governed by the matrix @f{eqnarray*} P^{-1}\left(\begin{array}{cc} A &
+    // B^T \\ B & 0 \end{array}\right) = \left(\begin{array}{cc} I & A^{-1}
+    // B^T \\ 0 & I \end{array}\right), @f} which indeed is very simple. A
+    // GMRES solver based on exact matrices would converge in one iteration,
+    // since all eigenvalues are equal (any Krylov method takes at most as
+    // many iterations as there are distinct eigenvalues). Such a
+    // preconditioner for the blocked Stokes system has been proposed by
+    // Silvester and Wathen ("Fast iterative solution of stabilised Stokes
+    // systems part II.  Using general block preconditioners", SIAM
+    // J. Numer. Anal., 31 (1994), pp. 1352-1367).
+    //
+    // Replacing <i>P</i> by $\tilde{P}$ keeps that spirit alive: the product
+    // $P^{-1} A$ will still be close to a matrix with eigenvalues 1 with a
+    // distribution that does not depend on the problem size. This lets us
+    // hope to be able to get a number of GMRES iterations that is
+    // problem-size independent.
+    //
+    // The deal.II users who have already gone through the step-20 and step-22
+    // tutorials can certainly imagine how we're going to implement this.  We
+    // replace the exact inverse matrices in $P^{-1}$ by some approximate
+    // inverses built from the InverseMatrix class, and the inverse Schur
+    // complement will be approximated by the pressure mass matrix $M_p$
+    // (weighted by $\eta^{-1}$ as mentioned in the introduction). As pointed
+    // out in the results section of step-22, we can replace the exact inverse
+    // of <i>A</i> by just the application of a preconditioner, in this case
+    // on a vector Laplace matrix as was explained in the introduction. This
+    // does increase the number of (outer) GMRES iterations, but is still
+    // significantly cheaper than an exact inverse, which would require
+    // between 20 and 35 CG iterations for <em>each</em> outer solver step
+    // (using the AMG preconditioner).
+    //
+    // Having the above explanations in mind, we define a preconditioner class
+    // with a <code>vmult</code> functionality, which is all we need for the
+    // interaction with the usual solver functions further below in the
+    // program code.
+    //
+    // First the declarations. These are similar to the definition of the
+    // Schur complement in step-20, with the difference that we need some more
+    // preconditioners in the constructor and that the matrices we use here
+    // are built upon Trilinos:
+    template <class PreconditionerA, class PreconditionerMp>
+    class BlockSchurPreconditioner : public Subscriptor
+    {
+    public:
+      BlockSchurPreconditioner (
+        const TrilinosWrappers::BlockSparseMatrix     &S,
+        const InverseMatrix<TrilinosWrappers::SparseMatrix,
+        PreconditionerMp>         &Mpinv,
+        const PreconditionerA                         &Apreconditioner);
+
+      void vmult (TrilinosWrappers::MPI::BlockVector       &dst,
+                  const TrilinosWrappers::MPI::BlockVector &src) const;
+
+    private:
+      const SmartPointer<const TrilinosWrappers::BlockSparseMatrix> stokes_matrix;
+      const SmartPointer<const InverseMatrix<TrilinosWrappers::SparseMatrix,
+            PreconditionerMp > > m_inverse;
+      const PreconditionerA &a_preconditioner;
+
+      mutable TrilinosWrappers::MPI::Vector tmp;
+    };
+
+
+
+    // When using a TrilinosWrappers::MPI::Vector or a
+    // TrilinosWrappers::MPI::BlockVector, the Vector is initialized using an
+    // IndexSet. IndexSet is used not only to resize the
+    // TrilinosWrappers::MPI::Vector but it also associates an index in the
+    // TrilinosWrappers::MPI::Vector with a degree of freedom (see step-40 for
+    // a more detailed explanation). The function complete_index_set() creates
+    // an IndexSet where every valid index is part of the set. Note that this
+    // program can only be run sequentially and will throw an exception if used
+    // in parallel.
+    template <class PreconditionerA, class PreconditionerMp>
+    BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::
+    BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix  &S,
+                             const InverseMatrix<TrilinosWrappers::SparseMatrix,
+                             PreconditionerMp>      &Mpinv,
+                             const PreconditionerA                      &Apreconditioner)
+      :
+      stokes_matrix           (&S),
+      m_inverse               (&Mpinv),
+      a_preconditioner        (Apreconditioner),
+      tmp                     (complete_index_set(stokes_matrix->block(1,1).m()))
+    {}
+
+
+    // Next is the <code>vmult</code> function. We implement the action of
+    // $P^{-1}$ as described above in three successive steps.  In formulas, we
+    // want to compute $Y=P^{-1}X$ where $X,Y$ are both vectors with two block
+    // components.
+    //
+    // The first step multiplies the velocity part of the vector by a
+    // preconditioner of the matrix <i>A</i>, i.e. we compute $Y_0={\tilde
+    // A}^{-1}X_0$.  The resulting velocity vector is then multiplied by $B$
+    // and subtracted from the pressure, i.e. we want to compute $X_1-BY_0$.
+    // This second step only acts on the pressure vector and is accomplished
+    // by the residual function of our matrix classes, except that the sign is
+    // wrong. Consequently, we change the sign in the temporary pressure
+    // vector and finally multiply by the inverse pressure mass matrix to get
+    // the final pressure vector, completing our work on the Stokes
+    // preconditioner:
+    template <class PreconditionerA, class PreconditionerMp>
+    void
+    BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::
+    vmult (TrilinosWrappers::MPI::BlockVector       &dst,
+           const TrilinosWrappers::MPI::BlockVector &src) const
+    {
+      a_preconditioner.vmult (dst.block(0), src.block(0));
+      stokes_matrix->block(1,0).residual(tmp, dst.block(0), src.block(1));
+      tmp *= -1;
+      m_inverse->vmult (dst.block(1), tmp);
+    }
+  }
+
+
+
+  // @sect3{The <code>BoussinesqFlowProblem</code> class template}
+
+  // The definition of the class that defines the top-level logic of solving
+  // the time-dependent Boussinesq problem is mainly based on the step-22
+  // tutorial program. The main differences are that now we also have to solve
+  // for the temperature equation, which forces us to have a second DoFHandler
+  // object for the temperature variable as well as matrices, right hand
+  // sides, and solution vectors for the current and previous time steps. As
+  // mentioned in the introduction, all linear algebra objects are going to
+  // use wrappers of the corresponding Trilinos functionality.
+  //
+  // The member functions of this class are reminiscent of step-21, where we
+  // also used a staggered scheme that first solve the flow equations (here
+  // the Stokes equations, in step-21 Darcy flow) and then update the advected
+  // quantity (here the temperature, there the saturation). The functions that
+  // are new are mainly concerned with determining the time step, as well as
+  // the proper size of the artificial viscosity stabilization.
+  //
+  // The last three variables indicate whether the various matrices or
+  // preconditioners need to be rebuilt the next time the corresponding build
+  // functions are called. This allows us to move the corresponding
+  // <code>if</code> into the respective function and thereby keeping our main
+  // <code>run()</code> function clean and easy to read.
+  template <int dim>
+  class BoussinesqFlowProblem
+  {
+  public:
+    BoussinesqFlowProblem ();
+    void run ();
+
+  private:
+    void setup_dofs ();
+    void assemble_stokes_preconditioner ();
+    void build_stokes_preconditioner ();
+    void assemble_stokes_system ();
+    void assemble_temperature_system (const double maximal_velocity);
+    void assemble_temperature_matrix ();
+    double get_maximal_velocity () const;
+    std::pair<double,double> get_extrapolated_temperature_range () const;
+    void solve ();
+    void output_results () const;
+    void refine_mesh (const unsigned int max_grid_level);
+
+    double
+    compute_viscosity(const std::vector<double>          &old_temperature,
+                      const std::vector<double>          &old_old_temperature,
+                      const std::vector<Tensor<1,dim> >  &old_temperature_grads,
+                      const std::vector<Tensor<1,dim> >  &old_old_temperature_grads,
+                      const std::vector<double>          &old_temperature_laplacians,
+                      const std::vector<double>          &old_old_temperature_laplacians,
+                      const std::vector<Tensor<1,dim> >  &old_velocity_values,
+                      const std::vector<Tensor<1,dim> >  &old_old_velocity_values,
+                      const std::vector<double>          &gamma_values,
+                      const double                        global_u_infty,
+                      const double                        global_T_variation,
+                      const double                        cell_diameter) const;
+
+
+    Triangulation<dim>                  triangulation;
+    double                              global_Omega_diameter;
+
+    const unsigned int                  stokes_degree;
+    FESystem<dim>                       stokes_fe;
+    DoFHandler<dim>                     stokes_dof_handler;
+    ConstraintMatrix                    stokes_constraints;
+
+    std::vector<IndexSet>               stokes_partitioning;
+    TrilinosWrappers::BlockSparseMatrix stokes_matrix;
+    TrilinosWrappers::BlockSparseMatrix stokes_preconditioner_matrix;
+
+    TrilinosWrappers::MPI::BlockVector  stokes_solution;
+    TrilinosWrappers::MPI::BlockVector  old_stokes_solution;
+    TrilinosWrappers::MPI::BlockVector  stokes_rhs;
+
+
+    const unsigned int                  temperature_degree;
+    FE_Q<dim>                           temperature_fe;
+    DoFHandler<dim>                     temperature_dof_handler;
+    ConstraintMatrix                    temperature_constraints;
+
+    TrilinosWrappers::SparseMatrix      temperature_mass_matrix;
+    TrilinosWrappers::SparseMatrix      temperature_stiffness_matrix;
+    TrilinosWrappers::SparseMatrix      temperature_matrix;
+
+    TrilinosWrappers::MPI::Vector       temperature_solution;
+    TrilinosWrappers::MPI::Vector       old_temperature_solution;
+    TrilinosWrappers::MPI::Vector       old_old_temperature_solution;
+    TrilinosWrappers::MPI::Vector       temperature_rhs;
+
+
+    double                              time_step;
+    double                              old_time_step;
+    unsigned int                        timestep_number;
+
+    std_cxx11::shared_ptr<TrilinosWrappers::PreconditionAMG> Amg_preconditioner;
+    std_cxx11::shared_ptr<TrilinosWrappers::PreconditionIC>  Mp_preconditioner;
+
+    bool                                rebuild_stokes_matrix;
+    bool                                rebuild_temperature_matrices;
+    bool                                rebuild_stokes_preconditioner;
+  };
+
+
+  // @sect3{BoussinesqFlowProblem class implementation}
+
+  // @sect4{BoussinesqFlowProblem::BoussinesqFlowProblem}
+  //
+  // The constructor of this class is an extension of the constructor in
+  // step-22. We need to add the various variables that concern the
+  // temperature. As discussed in the introduction, we are going to use
+  // $Q_2\times Q_1$ (Taylor-Hood) elements again for the Stokes part, and
+  // $Q_2$ elements for the temperature. However, by using variables that
+  // store the polynomial degree of the Stokes and temperature finite
+  // elements, it is easy to consistently modify the degree of the elements as
+  // well as all quadrature formulas used on them downstream. Moreover, we
+  // initialize the time stepping as well as the options for matrix assembly
+  // and preconditioning:
+  template <int dim>
+  BoussinesqFlowProblem<dim>::BoussinesqFlowProblem ()
+    :
+    triangulation (Triangulation<dim>::maximum_smoothing),
+
+    stokes_degree (1),
+    stokes_fe (FE_Q<dim>(stokes_degree+1), dim,
+               FE_Q<dim>(stokes_degree), 1),
+    stokes_dof_handler (triangulation),
+
+    temperature_degree (2),
+    temperature_fe (temperature_degree),
+    temperature_dof_handler (triangulation),
+
+    time_step (0),
+    old_time_step (0),
+    timestep_number (0),
+    rebuild_stokes_matrix (true),
+    rebuild_temperature_matrices (true),
+    rebuild_stokes_preconditioner (true)
+  {}
+
+
+
+  // @sect4{BoussinesqFlowProblem::get_maximal_velocity}
+
+  // Starting the real functionality of this class is a helper function that
+  // determines the maximum ($L_\infty$) velocity in the domain (at the
+  // quadrature points, in fact). How it works should be relatively obvious to
+  // all who have gotten to this point of the tutorial. Note that since we are
+  // only interested in the velocity, rather than using
+  // <code>stokes_fe_values.get_function_values</code> to get the values of
+  // the entire Stokes solution (velocities and pressures) we use
+  // <code>stokes_fe_values[velocities].get_function_values</code> to extract
+  // only the velocities part. This has the additional benefit that we get it
+  // as a Tensor<1,dim>, rather than some components in a Vector<double>,
+  // allowing us to process it right away using the <code>norm()</code>
+  // function to get the magnitude of the velocity.
+  //
+  // The only point worth thinking about a bit is how to choose the quadrature
+  // points we use here. Since the goal of this function is to find the
+  // maximal velocity over a domain by looking at quadrature points on each
+  // cell. So we should ask how we should best choose these quadrature points
+  // on each cell. To this end, recall that if we had a single $Q_1$ field
+  // (rather than the vector-valued field of higher order) then the maximum
+  // would be attained at a vertex of the mesh. In other words, we should use
+  // the QTrapez class that has quadrature points only at the vertices of
+  // cells.
+  //
+  // For higher order shape functions, the situation is more complicated: the
+  // maxima and minima may be attained at points between the support points of
+  // shape functions (for the usual $Q_p$ elements the support points are the
+  // equidistant Lagrange interpolation points); furthermore, since we are
+  // looking for the maximum magnitude of a vector-valued quantity, we can
+  // even less say with certainty where the set of potential maximal points
+  // are. Nevertheless, intuitively if not provably, the Lagrange
+  // interpolation points appear to be a better choice than the Gauss points.
+  //
+  // There are now different methods to produce a quadrature formula with
+  // quadrature points equal to the interpolation points of the finite
+  // element. One option would be to use the
+  // FiniteElement::get_unit_support_points() function, reduce the output to a
+  // unique set of points to avoid duplicate function evaluations, and create
+  // a Quadrature object using these points. Another option, chosen here, is
+  // to use the QTrapez class and combine it with the QIterated class that
+  // repeats the QTrapez formula on a number of sub-cells in each coordinate
+  // direction. To cover all support points, we need to iterate it
+  // <code>stokes_degree+1</code> times since this is the polynomial degree of
+  // the Stokes element in use:
+  template <int dim>
+  double BoussinesqFlowProblem<dim>::get_maximal_velocity () const
+  {
+    const QIterated<dim> quadrature_formula (QTrapez<1>(),
+                                             stokes_degree+1);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (stokes_fe, quadrature_formula, update_values);
+    std::vector<Tensor<1,dim> > velocity_values(n_q_points);
+    double max_velocity = 0;
+
+    const FEValuesExtractors::Vector velocities (0);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = stokes_dof_handler.begin_active(),
+    endc = stokes_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        fe_values[velocities].get_function_values (stokes_solution,
+                                                   velocity_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          max_velocity = std::max (max_velocity, velocity_values[q].norm());
+      }
+
+    return max_velocity;
+  }
+
+
+
+
+  // @sect4{BoussinesqFlowProblem::get_extrapolated_temperature_range}
+
+  // Next a function that determines the minimum and maximum temperature at
+  // quadrature points inside $\Omega$ when extrapolated from the two previous
+  // time steps to the current one. We need this information in the
+  // computation of the artificial viscosity parameter $\nu$ as discussed in
+  // the introduction.
+  //
+  // The formula for the extrapolated temperature is
+  // $\left(1+\frac{k_n}{k_{n-1}} \right)T^{n-1} + \frac{k_n}{k_{n-1}}
+  // T^{n-2}$. The way to compute it is to loop over all quadrature points and
+  // update the maximum and minimum value if the current value is
+  // bigger/smaller than the previous one. We initialize the variables that
+  // store the max and min before the loop over all quadrature points by the
+  // smallest and the largest number representable as a double. Then we know
+  // for a fact that it is larger/smaller than the minimum/maximum and that
+  // the loop over all quadrature points is ultimately going to update the
+  // initial value with the correct one.
+  //
+  // The only other complication worth mentioning here is that in the first
+  // time step, $T^{k-2}$ is not yet available of course. In that case, we can
+  // only use $T^{k-1}$ which we have from the initial temperature. As
+  // quadrature points, we use the same choice as in the previous function
+  // though with the difference that now the number of repetitions is
+  // determined by the polynomial degree of the temperature field.
+  template <int dim>
+  std::pair<double,double>
+  BoussinesqFlowProblem<dim>::get_extrapolated_temperature_range () const
+  {
+    const QIterated<dim> quadrature_formula (QTrapez<1>(),
+                                             temperature_degree);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (temperature_fe, quadrature_formula,
+                             update_values);
+    std::vector<double> old_temperature_values(n_q_points);
+    std::vector<double> old_old_temperature_values(n_q_points);
+
+    if (timestep_number != 0)
+      {
+        double min_temperature = std::numeric_limits<double>::max(),
+               max_temperature = -std::numeric_limits<double>::max();
+
+        typename DoFHandler<dim>::active_cell_iterator
+        cell = temperature_dof_handler.begin_active(),
+        endc = temperature_dof_handler.end();
+        for (; cell!=endc; ++cell)
+          {
+            fe_values.reinit (cell);
+            fe_values.get_function_values (old_temperature_solution,
+                                           old_temperature_values);
+            fe_values.get_function_values (old_old_temperature_solution,
+                                           old_old_temperature_values);
+
+            for (unsigned int q=0; q<n_q_points; ++q)
+              {
+                const double temperature =
+                  (1. + time_step/old_time_step) * old_temperature_values[q]-
+                  time_step/old_time_step * old_old_temperature_values[q];
+
+                min_temperature = std::min (min_temperature, temperature);
+                max_temperature = std::max (max_temperature, temperature);
+              }
+          }
+
+        return std::make_pair(min_temperature, max_temperature);
+      }
+    else
+      {
+        double min_temperature = std::numeric_limits<double>::max(),
+               max_temperature = -std::numeric_limits<double>::max();
+
+        typename DoFHandler<dim>::active_cell_iterator
+        cell = temperature_dof_handler.begin_active(),
+        endc = temperature_dof_handler.end();
+        for (; cell!=endc; ++cell)
+          {
+            fe_values.reinit (cell);
+            fe_values.get_function_values (old_temperature_solution,
+                                           old_temperature_values);
+
+            for (unsigned int q=0; q<n_q_points; ++q)
+              {
+                const double temperature = old_temperature_values[q];
+
+                min_temperature = std::min (min_temperature, temperature);
+                max_temperature = std::max (max_temperature, temperature);
+              }
+          }
+
+        return std::make_pair(min_temperature, max_temperature);
+      }
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::compute_viscosity}
+
+  // The last of the tool functions computes the artificial viscosity
+  // parameter $\nu|_K$ on a cell $K$ as a function of the extrapolated
+  // temperature, its gradient and Hessian (second derivatives), the velocity,
+  // the right hand side $\gamma$ all on the quadrature points of the current
+  // cell, and various other parameters as described in detail in the
+  // introduction.
+  //
+  // There are some universal constants worth mentioning here. First, we need
+  // to fix $\beta$; we choose $\beta=0.017\cdot dim$, a choice discussed in
+  // detail in the results section of this tutorial program. The second is the
+  // exponent $\alpha$; $\alpha=1$ appears to work fine for the current
+  // program, even though some additional benefit might be expected from
+  // choosing $\alpha = 2$. Finally, there is one thing that requires special
+  // casing: In the first time step, the velocity equals zero, and the formula
+  // for $\nu|_K$ is not defined. In that case, we return $\nu|_K=5\cdot 10^3
+  // \cdot h_K$, a choice admittedly more motivated by heuristics than
+  // anything else (it is in the same order of magnitude, however, as the
+  // value returned for most cells on the second time step).
+  //
+  // The rest of the function should be mostly obvious based on the material
+  // discussed in the introduction:
+  template <int dim>
+  double
+  BoussinesqFlowProblem<dim>::
+  compute_viscosity (const std::vector<double>          &old_temperature,
+                     const std::vector<double>          &old_old_temperature,
+                     const std::vector<Tensor<1,dim> >  &old_temperature_grads,
+                     const std::vector<Tensor<1,dim> >  &old_old_temperature_grads,
+                     const std::vector<double>          &old_temperature_laplacians,
+                     const std::vector<double>          &old_old_temperature_laplacians,
+                     const std::vector<Tensor<1,dim> >  &old_velocity_values,
+                     const std::vector<Tensor<1,dim> >  &old_old_velocity_values,
+                     const std::vector<double>          &gamma_values,
+                     const double                        global_u_infty,
+                     const double                        global_T_variation,
+                     const double                        cell_diameter) const
+  {
+    const double beta = 0.017 * dim;
+    const double alpha = 1;
+
+    if (global_u_infty == 0)
+      return 5e-3 * cell_diameter;
+
+    const unsigned int n_q_points = old_temperature.size();
+
+    double max_residual = 0;
+    double max_velocity = 0;
+
+    for (unsigned int q=0; q < n_q_points; ++q)
+      {
+        const Tensor<1,dim> u = (old_velocity_values[q] +
+                                 old_old_velocity_values[q]) / 2;
+
+        const double dT_dt = (old_temperature[q] - old_old_temperature[q])
+                             / old_time_step;
+        const double u_grad_T = u * (old_temperature_grads[q] +
+                                     old_old_temperature_grads[q]) / 2;
+
+        const double kappa_Delta_T = EquationData::kappa
+                                     * (old_temperature_laplacians[q] +
+                                        old_old_temperature_laplacians[q]) / 2;
+
+        const double residual
+          = std::abs((dT_dt + u_grad_T - kappa_Delta_T - gamma_values[q]) *
+                     std::pow((old_temperature[q]+old_old_temperature[q]) / 2,
+                              alpha-1.));
+
+        max_residual = std::max (residual,        max_residual);
+        max_velocity = std::max (std::sqrt (u*u), max_velocity);
+      }
+
+    const double c_R = std::pow (2., (4.-2*alpha)/dim);
+    const double global_scaling = c_R * global_u_infty * global_T_variation *
+                                  std::pow(global_Omega_diameter, alpha - 2.);
+
+    return (beta *
+            max_velocity *
+            std::min (cell_diameter,
+                      std::pow(cell_diameter,alpha) *
+                      max_residual / global_scaling));
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::setup_dofs}
+  //
+  // This is the function that sets up the DoFHandler objects we have here
+  // (one for the Stokes part and one for the temperature part) as well as set
+  // to the right sizes the various objects required for the linear algebra in
+  // this program. Its basic operations are similar to what we do in step-22.
+  //
+  // The body of the function first enumerates all degrees of freedom for the
+  // Stokes and temperature systems. For the Stokes part, degrees of freedom
+  // are then sorted to ensure that velocities precede pressure DoFs so that
+  // we can partition the Stokes matrix into a $2\times 2$ matrix. As a
+  // difference to step-22, we do not perform any additional DoF
+  // renumbering. In that program, it paid off since our solver was heavily
+  // dependent on ILU's, whereas we use AMG here which is not sensitive to the
+  // DoF numbering. The IC preconditioner for the inversion of the pressure
+  // mass matrix would of course take advantage of a Cuthill-McKee like
+  // renumbering, but its costs are low compared to the velocity portion, so
+  // the additional work does not pay off.
+  //
+  // We then proceed with the generation of the hanging node constraints that
+  // arise from adaptive grid refinement for both DoFHandler objects. For the
+  // velocity, we impose no-flux boundary conditions $\mathbf{u}\cdot
+  // \mathbf{n}=0$ by adding constraints to the object that already stores the
+  // hanging node constraints matrix. The second parameter in the function
+  // describes the first of the velocity components in the total dof vector,
+  // which is zero here. The variable <code>no_normal_flux_boundaries</code>
+  // denotes the boundary indicators for which to set the no flux boundary
+  // conditions; here, this is boundary indicator zero.
+  //
+  // After having done so, we count the number of degrees of freedom in the
+  // various blocks:
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::setup_dofs ()
+  {
+    std::vector<unsigned int> stokes_sub_blocks (dim+1,0);
+    stokes_sub_blocks[dim] = 1;
+
+    {
+      stokes_dof_handler.distribute_dofs (stokes_fe);
+      DoFRenumbering::component_wise (stokes_dof_handler, stokes_sub_blocks);
+
+      stokes_constraints.clear ();
+      DoFTools::make_hanging_node_constraints (stokes_dof_handler,
+                                               stokes_constraints);
+      std::set<types::boundary_id> no_normal_flux_boundaries;
+      no_normal_flux_boundaries.insert (0);
+      VectorTools::compute_no_normal_flux_constraints (stokes_dof_handler, 0,
+                                                       no_normal_flux_boundaries,
+                                                       stokes_constraints);
+      stokes_constraints.close ();
+    }
+    {
+      temperature_dof_handler.distribute_dofs (temperature_fe);
+
+      temperature_constraints.clear ();
+      DoFTools::make_hanging_node_constraints (temperature_dof_handler,
+                                               temperature_constraints);
+      temperature_constraints.close ();
+    }
+
+    std::vector<types::global_dof_index> stokes_dofs_per_block (2);
+    DoFTools::count_dofs_per_block (stokes_dof_handler, stokes_dofs_per_block,
+                                    stokes_sub_blocks);
+
+    const unsigned int n_u = stokes_dofs_per_block[0],
+                       n_p = stokes_dofs_per_block[1],
+                       n_T = temperature_dof_handler.n_dofs();
+
+    std::cout << "Number of active cells: "
+              << triangulation.n_active_cells()
+              << " (on "
+              << triangulation.n_levels()
+              << " levels)"
+              << std::endl
+              << "Number of degrees of freedom: "
+              << n_u + n_p + n_T
+              << " (" << n_u << '+' << n_p << '+'<< n_T <<')'
+              << std::endl
+              << std::endl;
+
+    // The next step is to create the sparsity pattern for the Stokes and
+    // temperature system matrices as well as the preconditioner matrix from
+    // which we build the Stokes preconditioner. As in step-22, we choose to
+    // create the pattern by
+    // using the blocked version of DynamicSparsityPattern.
+    //
+    // So, we first release the memory stored in the matrices, then set up an
+    // object of type BlockDynamicSparsityPattern consisting of
+    // $2\times 2$ blocks (for the Stokes system matrix and preconditioner) or
+    // DynamicSparsityPattern (for the temperature part). We then
+    // fill these objects with the nonzero pattern, taking into account that
+    // for the Stokes system matrix, there are no entries in the
+    // pressure-pressure block (but all velocity vector components couple with
+    // each other and with the pressure). Similarly, in the Stokes
+    // preconditioner matrix, only the diagonal blocks are nonzero, since we
+    // use the vector Laplacian as discussed in the introduction. This
+    // operator only couples each vector component of the Laplacian with
+    // itself, but not with the other vector components. (Application of the
+    // constraints resulting from the no-flux boundary conditions will couple
+    // vector components at the boundary again, however.)
+    //
+    // When generating the sparsity pattern, we directly apply the constraints
+    // from hanging nodes and no-flux boundary conditions. This approach was
+    // already used in step-27, but is different from the one in early
+    // tutorial programs where we first built the original sparsity pattern
+    // and only then added the entries resulting from constraints. The reason
+    // for doing so is that later during assembly we are going to distribute
+    // the constraints immediately when transferring local to global
+    // dofs. Consequently, there will be no data written at positions of
+    // constrained degrees of freedom, so we can let the
+    // DoFTools::make_sparsity_pattern function omit these entries by setting
+    // the last Boolean flag to <code>false</code>. Once the sparsity pattern
+    // is ready, we can use it to initialize the Trilinos matrices. Since the
+    // Trilinos matrices store the sparsity pattern internally, there is no
+    // need to keep the sparsity pattern around after the initialization of
+    // the matrix.
+    stokes_partitioning.resize (2);
+    stokes_partitioning[0] = complete_index_set (n_u);
+    stokes_partitioning[1] = complete_index_set (n_p);
+    {
+      stokes_matrix.clear ();
+
+      BlockDynamicSparsityPattern dsp (2,2);
+
+      dsp.block(0,0).reinit (n_u, n_u);
+      dsp.block(0,1).reinit (n_u, n_p);
+      dsp.block(1,0).reinit (n_p, n_u);
+      dsp.block(1,1).reinit (n_p, n_p);
+
+      dsp.collect_sizes ();
+
+      Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+
+      for (unsigned int c=0; c<dim+1; ++c)
+        for (unsigned int d=0; d<dim+1; ++d)
+          if (! ((c==dim) && (d==dim)))
+            coupling[c][d] = DoFTools::always;
+          else
+            coupling[c][d] = DoFTools::none;
+
+      DoFTools::make_sparsity_pattern (stokes_dof_handler, coupling, dsp,
+                                       stokes_constraints, false);
+
+      stokes_matrix.reinit (dsp);
+    }
+
+    {
+      Amg_preconditioner.reset ();
+      Mp_preconditioner.reset ();
+      stokes_preconditioner_matrix.clear ();
+
+      BlockDynamicSparsityPattern dsp (2,2);
+
+      dsp.block(0,0).reinit (n_u, n_u);
+      dsp.block(0,1).reinit (n_u, n_p);
+      dsp.block(1,0).reinit (n_p, n_u);
+      dsp.block(1,1).reinit (n_p, n_p);
+
+      dsp.collect_sizes ();
+
+      Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+      for (unsigned int c=0; c<dim+1; ++c)
+        for (unsigned int d=0; d<dim+1; ++d)
+          if (c == d)
+            coupling[c][d] = DoFTools::always;
+          else
+            coupling[c][d] = DoFTools::none;
+
+      DoFTools::make_sparsity_pattern (stokes_dof_handler, coupling, dsp,
+                                       stokes_constraints, false);
+
+      stokes_preconditioner_matrix.reinit (dsp);
+    }
+
+    // The creation of the temperature matrix (or, rather, matrices, since we
+    // provide a temperature mass matrix and a temperature stiffness matrix,
+    // that will be added together for time discretization) follows the
+    // generation of the Stokes matrix – except that it is much easier
+    // here since we do not need to take care of any blocks or coupling
+    // between components. Note how we initialize the three temperature
+    // matrices: We only use the sparsity pattern for reinitialization of the
+    // first matrix, whereas we use the previously generated matrix for the
+    // two remaining reinits. The reason for doing so is that reinitialization
+    // from an already generated matrix allows Trilinos to reuse the sparsity
+    // pattern instead of generating a new one for each copy. This saves both
+    // some time and memory.
+    {
+      temperature_mass_matrix.clear ();
+      temperature_stiffness_matrix.clear ();
+      temperature_matrix.clear ();
+
+      DynamicSparsityPattern dsp (n_T, n_T);
+      DoFTools::make_sparsity_pattern (temperature_dof_handler, dsp,
+                                       temperature_constraints, false);
+
+      temperature_matrix.reinit (dsp);
+      temperature_mass_matrix.reinit (temperature_matrix);
+      temperature_stiffness_matrix.reinit (temperature_matrix);
+    }
+
+    // Lastly, we set the vectors for the Stokes solutions $\mathbf u^{n-1}$
+    // and $\mathbf u^{n-2}$, as well as for the temperatures $T^{n}$,
+    // $T^{n-1}$ and $T^{n-2}$ (required for time stepping) and all the system
+    // right hand sides to their correct sizes and block structure:
+    IndexSet temperature_partitioning = complete_index_set (n_T);
+    stokes_solution.reinit (stokes_partitioning, MPI_COMM_WORLD);
+    old_stokes_solution.reinit (stokes_partitioning, MPI_COMM_WORLD);
+    stokes_rhs.reinit (stokes_partitioning, MPI_COMM_WORLD);
+
+    temperature_solution.reinit (temperature_partitioning, MPI_COMM_WORLD);
+    old_temperature_solution.reinit (temperature_partitioning, MPI_COMM_WORLD);
+    old_old_temperature_solution.reinit (temperature_partitioning, MPI_COMM_WORLD);
+
+    temperature_rhs.reinit (temperature_partitioning, MPI_COMM_WORLD);
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::assemble_stokes_preconditioner}
+  //
+  // This function assembles the matrix we use for preconditioning the Stokes
+  // system. What we need are a vector Laplace matrix on the velocity
+  // components and a mass matrix weighted by $\eta^{-1}$ on the pressure
+  // component. We start by generating a quadrature object of appropriate
+  // order, the FEValues object that can give values and gradients at the
+  // quadrature points (together with quadrature weights). Next we create data
+  // structures for the cell matrix and the relation between local and global
+  // DoFs. The vectors <code>grad_phi_u</code> and <code>phi_p</code> are
+  // going to hold the values of the basis functions in order to faster build
+  // up the local matrices, as was already done in step-22. Before we start
+  // the loop over all active cells, we have to specify which components are
+  // pressure and which are velocity.
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::assemble_stokes_preconditioner ()
+  {
+    stokes_preconditioner_matrix = 0;
+
+    const QGauss<dim> quadrature_formula(stokes_degree+2);
+    FEValues<dim>     stokes_fe_values (stokes_fe, quadrature_formula,
+                                        update_JxW_values |
+                                        update_values |
+                                        update_gradients);
+
+    const unsigned int   dofs_per_cell   = stokes_fe.dofs_per_cell;
+    const unsigned int   n_q_points      = quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    std::vector<Tensor<2,dim> > grad_phi_u (dofs_per_cell);
+    std::vector<double>         phi_p      (dofs_per_cell);
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = stokes_dof_handler.begin_active(),
+    endc = stokes_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        stokes_fe_values.reinit (cell);
+        local_matrix = 0;
+
+        // The creation of the local matrix is rather simple. There are only a
+        // Laplace term (on the velocity) and a mass matrix weighted by
+        // $\eta^{-1}$ to be generated, so the creation of the local matrix is
+        // done in two lines. Once the local matrix is ready (loop over rows
+        // and columns in the local matrix on each quadrature point), we get
+        // the local DoF indices and write the local information into the
+        // global matrix. We do this as in step-27, i.e. we directly apply the
+        // constraints from hanging nodes locally. By doing so, we don't have
+        // to do that afterwards, and we don't also write into entries of the
+        // matrix that will actually be set to zero again later when
+        // eliminating constraints.
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            for (unsigned int k=0; k<dofs_per_cell; ++k)
+              {
+                grad_phi_u[k] = stokes_fe_values[velocities].gradient(k,q);
+                phi_p[k]      = stokes_fe_values[pressure].value (k, q);
+              }
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                local_matrix(i,j) += (EquationData::eta *
+                                      scalar_product (grad_phi_u[i], grad_phi_u[j])
+                                      +
+                                      (1./EquationData::eta) *
+                                      phi_p[i] * phi_p[j])
+                                     * stokes_fe_values.JxW(q);
+          }
+
+        cell->get_dof_indices (local_dof_indices);
+        stokes_constraints.distribute_local_to_global (local_matrix,
+                                                       local_dof_indices,
+                                                       stokes_preconditioner_matrix);
+      }
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::build_stokes_preconditioner}
+  //
+  // This function generates the inner preconditioners that are going to be
+  // used for the Schur complement block preconditioner. Since the
+  // preconditioners need only to be regenerated when the matrices change,
+  // this function does not have to do anything in case the matrices have not
+  // changed (i.e., the flag <code>rebuild_stokes_preconditioner</code> has
+  // the value <code>false</code>). Otherwise its first task is to call
+  // <code>assemble_stokes_preconditioner</code> to generate the
+  // preconditioner matrices.
+  //
+  // Next, we set up the preconditioner for the velocity-velocity matrix
+  // <i>A</i>. As explained in the introduction, we are going to use an AMG
+  // preconditioner based on a vector Laplace matrix $\hat{A}$ (which is
+  // spectrally close to the Stokes matrix <i>A</i>). Usually, the
+  // TrilinosWrappers::PreconditionAMG class can be seen as a good black-box
+  // preconditioner which does not need any special knowledge. In this case,
+  // however, we have to be careful: since we build an AMG for a vector
+  // problem, we have to tell the preconditioner setup which dofs belong to
+  // which vector component. We do this using the function
+  // DoFTools::extract_constant_modes, a function that generates a set of
+  // <code>dim</code> vectors, where each one has ones in the respective
+  // component of the vector problem and zeros elsewhere. Hence, these are the
+  // constant modes on each component, which explains the name of the
+  // variable.
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::build_stokes_preconditioner ()
+  {
+    if (rebuild_stokes_preconditioner == false)
+      return;
+
+    std::cout << "   Rebuilding Stokes preconditioner..." << std::flush;
+
+    assemble_stokes_preconditioner ();
+
+    Amg_preconditioner = std_cxx11::shared_ptr<TrilinosWrappers::PreconditionAMG>
+                         (new TrilinosWrappers::PreconditionAMG());
+
+    std::vector<std::vector<bool> > constant_modes;
+    FEValuesExtractors::Vector velocity_components(0);
+    DoFTools::extract_constant_modes (stokes_dof_handler,
+                                      stokes_fe.component_mask(velocity_components),
+                                      constant_modes);
+    TrilinosWrappers::PreconditionAMG::AdditionalData amg_data;
+    amg_data.constant_modes = constant_modes;
+
+    // Next, we set some more options of the AMG preconditioner. In
+    // particular, we need to tell the AMG setup that we use quadratic basis
+    // functions for the velocity matrix (this implies more nonzero elements
+    // in the matrix, so that a more robust algorithm needs to be chosen
+    // internally). Moreover, we want to be able to control how the coarsening
+    // structure is build up. The way the Trilinos smoothed aggregation AMG
+    // does this is to look which matrix entries are of similar size as the
+    // diagonal entry in order to algebraically build a coarse-grid
+    // structure. By setting the parameter <code>aggregation_threshold</code>
+    // to 0.02, we specify that all entries that are more than two percent of
+    // size of some diagonal pivots in that row should form one coarse grid
+    // point. This parameter is rather ad hoc, and some fine-tuning of it can
+    // influence the performance of the preconditioner. As a rule of thumb,
+    // larger values of <code>aggregation_threshold</code> will decrease the
+    // number of iterations, but increase the costs per iteration. A look at
+    // the Trilinos documentation will provide more information on these
+    // parameters. With this data set, we then initialize the preconditioner
+    // with the matrix we want it to apply to.
+    //
+    // Finally, we also initialize the preconditioner for the inversion of the
+    // pressure mass matrix. This matrix is symmetric and well-behaved, so we
+    // can chose a simple preconditioner. We stick with an incomplete Cholesky
+    // (IC) factorization preconditioner, which is designed for symmetric
+    // matrices. We could have also chosen an SSOR preconditioner with
+    // relaxation factor around 1.2, but IC is cheaper for our example. We
+    // wrap the preconditioners into a <code>std_cxx11::shared_ptr</code>
+    // pointer, which makes it easier to recreate the preconditioner next time
+    // around since we do not have to care about destroying the previously
+    // used object.
+    amg_data.elliptic = true;
+    amg_data.higher_order_elements = true;
+    amg_data.smoother_sweeps = 2;
+    amg_data.aggregation_threshold = 0.02;
+    Amg_preconditioner->initialize(stokes_preconditioner_matrix.block(0,0),
+                                   amg_data);
+
+    Mp_preconditioner = std_cxx11::shared_ptr<TrilinosWrappers::PreconditionIC>
+                        (new TrilinosWrappers::PreconditionIC());
+    Mp_preconditioner->initialize(stokes_preconditioner_matrix.block(1,1));
+
+    std::cout << std::endl;
+
+    rebuild_stokes_preconditioner = false;
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::assemble_stokes_system}
+  //
+  // The time lag scheme we use for advancing the coupled Stokes-temperature
+  // system forces us to split up the assembly (and the solution of linear
+  // systems) into two step. The first one is to create the Stokes system
+  // matrix and right hand side, and the second is to create matrix and right
+  // hand sides for the temperature dofs, which depends on the result of the
+  // linear system for the velocity.
+  //
+  // This function is called at the beginning of each time step. In the first
+  // time step or if the mesh has changed, indicated by the
+  // <code>rebuild_stokes_matrix</code>, we need to assemble the Stokes
+  // matrix; on the other hand, if the mesh hasn't changed and the matrix is
+  // already available, this is not necessary and all we need to do is
+  // assemble the right hand side vector which changes in each time step.
+  //
+  // Regarding the technical details of implementation, not much has changed
+  // from step-22. We reset matrix and vector, create a quadrature formula on
+  // the cells, and then create the respective FEValues object. For the update
+  // flags, we require basis function derivatives only in case of a full
+  // assembly, since they are not needed for the right hand side; as always,
+  // choosing the minimal set of flags depending on what is currently needed
+  // makes the call to FEValues::reinit further down in the program more
+  // efficient.
+  //
+  // There is one thing that needs to be commented – since we have a
+  // separate finite element and DoFHandler for the temperature, we need to
+  // generate a second FEValues object for the proper evaluation of the
+  // temperature solution. This isn't too complicated to realize here: just
+  // use the temperature structures and set an update flag for the basis
+  // function values which we need for evaluation of the temperature
+  // solution. The only important part to remember here is that the same
+  // quadrature formula is used for both FEValues objects to ensure that we
+  // get matching information when we loop over the quadrature points of the
+  // two objects.
+  //
+  // The declarations proceed with some shortcuts for array sizes, the
+  // creation of the local matrix and right hand side as well as the vector
+  // for the indices of the local dofs compared to the global system.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::assemble_stokes_system ()
+  {
+    std::cout << "   Assembling..." << std::flush;
+
+    if (rebuild_stokes_matrix == true)
+      stokes_matrix=0;
+
+    stokes_rhs=0;
+
+    const QGauss<dim> quadrature_formula (stokes_degree+2);
+    FEValues<dim>     stokes_fe_values (stokes_fe, quadrature_formula,
+                                        update_values    |
+                                        update_quadrature_points  |
+                                        update_JxW_values |
+                                        (rebuild_stokes_matrix == true
+                                         ?
+                                         update_gradients
+                                         :
+                                         UpdateFlags(0)));
+
+    FEValues<dim>     temperature_fe_values (temperature_fe, quadrature_formula,
+                                             update_values);
+
+    const unsigned int   dofs_per_cell   = stokes_fe.dofs_per_cell;
+    const unsigned int   n_q_points      = quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       local_rhs    (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    // Next we need a vector that will contain the values of the temperature
+    // solution at the previous time level at the quadrature points to
+    // assemble the source term in the right hand side of the momentum
+    // equation. Let's call this vector <code>old_solution_values</code>.
+    //
+    // The set of vectors we create next hold the evaluations of the basis
+    // functions as well as their gradients and symmetrized gradients that
+    // will be used for creating the matrices. Putting these into their own
+    // arrays rather than asking the FEValues object for this information each
+    // time it is needed is an optimization to accelerate the assembly
+    // process, see step-22 for details.
+    //
+    // The last two declarations are used to extract the individual blocks
+    // (velocity, pressure, temperature) from the total FE system.
+    std::vector<double>               old_temperature_values(n_q_points);
+
+    std::vector<Tensor<1,dim> >          phi_u       (dofs_per_cell);
+    std::vector<SymmetricTensor<2,dim> > grads_phi_u (dofs_per_cell);
+    std::vector<double>                  div_phi_u   (dofs_per_cell);
+    std::vector<double>                  phi_p       (dofs_per_cell);
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    // Now start the loop over all cells in the problem. We are working on two
+    // different DoFHandlers for this assembly routine, so we must have two
+    // different cell iterators for the two objects in use. This might seem a
+    // bit peculiar, since both the Stokes system and the temperature system
+    // use the same grid, but that's the only way to keep degrees of freedom
+    // in sync. The first statements within the loop are again all very
+    // familiar, doing the update of the finite element data as specified by
+    // the update flags, zeroing out the local arrays and getting the values
+    // of the old solution at the quadrature points. Then we are ready to loop
+    // over the quadrature points on the cell.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = stokes_dof_handler.begin_active(),
+    endc = stokes_dof_handler.end();
+    typename DoFHandler<dim>::active_cell_iterator
+    temperature_cell = temperature_dof_handler.begin_active();
+
+    for (; cell!=endc; ++cell, ++temperature_cell)
+      {
+        stokes_fe_values.reinit (cell);
+        temperature_fe_values.reinit (temperature_cell);
+
+        local_matrix = 0;
+        local_rhs = 0;
+
+        temperature_fe_values.get_function_values (old_temperature_solution,
+                                                   old_temperature_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            const double old_temperature = old_temperature_values[q];
+
+            // Next we extract the values and gradients of basis functions
+            // relevant to the terms in the inner products. As shown in
+            // step-22 this helps accelerate assembly.
+            //
+            // Once this is done, we start the loop over the rows and columns
+            // of the local matrix and feed the matrix with the relevant
+            // products. The right hand side is filled with the forcing term
+            // driven by temperature in direction of gravity (which is
+            // vertical in our example).  Note that the right hand side term
+            // is always generated, whereas the matrix contributions are only
+            // updated when it is requested by the
+            // <code>rebuild_matrices</code> flag.
+            for (unsigned int k=0; k<dofs_per_cell; ++k)
+              {
+                phi_u[k] = stokes_fe_values[velocities].value (k,q);
+                if (rebuild_stokes_matrix)
+                  {
+                    grads_phi_u[k] = stokes_fe_values[velocities].symmetric_gradient(k,q);
+                    div_phi_u[k]   = stokes_fe_values[velocities].divergence (k, q);
+                    phi_p[k]       = stokes_fe_values[pressure].value (k, q);
+                  }
+              }
+
+            if (rebuild_stokes_matrix)
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                for (unsigned int j=0; j<dofs_per_cell; ++j)
+                  local_matrix(i,j) += (EquationData::eta * 2 *
+                                        (grads_phi_u[i] * grads_phi_u[j])
+                                        - div_phi_u[i] * phi_p[j]
+                                        - phi_p[i] * div_phi_u[j])
+                                       * stokes_fe_values.JxW(q);
+
+            const Point<dim> gravity = -( (dim == 2) ? (Point<dim> (0,1)) :
+                                          (Point<dim> (0,0,1)) );
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              local_rhs(i) += (-EquationData::density *
+                               EquationData::beta *
+                               gravity * phi_u[i] * old_temperature)*
+                              stokes_fe_values.JxW(q);
+          }
+
+        // The last step in the loop over all cells is to enter the local
+        // contributions into the global matrix and vector structures to the
+        // positions specified in <code>local_dof_indices</code>.  Again, we
+        // let the ConstraintMatrix class do the insertion of the cell matrix
+        // elements to the global matrix, which already condenses the hanging
+        // node constraints.
+        cell->get_dof_indices (local_dof_indices);
+
+        if (rebuild_stokes_matrix == true)
+          stokes_constraints.distribute_local_to_global (local_matrix,
+                                                         local_rhs,
+                                                         local_dof_indices,
+                                                         stokes_matrix,
+                                                         stokes_rhs);
+        else
+          stokes_constraints.distribute_local_to_global (local_rhs,
+                                                         local_dof_indices,
+                                                         stokes_rhs);
+      }
+
+    rebuild_stokes_matrix = false;
+
+    std::cout << std::endl;
+  }
+
+
+
+
+  // @sect4{BoussinesqFlowProblem::assemble_temperature_matrix}
+  //
+  // This function assembles the matrix in the temperature equation. The
+  // temperature matrix consists of two parts, a mass matrix and the time step
+  // size times a stiffness matrix given by a Laplace term times the amount of
+  // diffusion. Since the matrix depends on the time step size (which varies
+  // from one step to another), the temperature matrix needs to be updated
+  // every time step. We could simply regenerate the matrices in every time
+  // step, but this is not really efficient since mass and Laplace matrix do
+  // only change when we change the mesh. Hence, we do this more efficiently
+  // by generating two separate matrices in this function, one for the mass
+  // matrix and one for the stiffness (diffusion) matrix. We will then sum up
+  // the matrix plus the stiffness matrix times the time step size once we
+  // know the actual time step.
+  //
+  // So the details for this first step are very simple. In case we need to
+  // rebuild the matrix (i.e., the mesh has changed), we zero the data
+  // structures, get a quadrature formula and a FEValues object, and create
+  // local matrices, local dof indices and evaluation structures for the basis
+  // functions.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::assemble_temperature_matrix ()
+  {
+    if (rebuild_temperature_matrices == false)
+      return;
+
+    temperature_mass_matrix = 0;
+    temperature_stiffness_matrix = 0;
+
+    QGauss<dim>   quadrature_formula (temperature_degree+2);
+    FEValues<dim> temperature_fe_values (temperature_fe, quadrature_formula,
+                                         update_values    | update_gradients |
+                                         update_JxW_values);
+
+    const unsigned int   dofs_per_cell   = temperature_fe.dofs_per_cell;
+    const unsigned int   n_q_points      = quadrature_formula.size();
+
+    FullMatrix<double>   local_mass_matrix (dofs_per_cell, dofs_per_cell);
+    FullMatrix<double>   local_stiffness_matrix (dofs_per_cell, dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    std::vector<double>         phi_T       (dofs_per_cell);
+    std::vector<Tensor<1,dim> > grad_phi_T  (dofs_per_cell);
+
+    // Now, let's start the loop over all cells in the triangulation. We need
+    // to zero out the local matrices, update the finite element evaluations,
+    // and then loop over the rows and columns of the matrices on each
+    // quadrature point, where we then create the mass matrix and the
+    // stiffness matrix (Laplace terms times the diffusion
+    // <code>EquationData::kappa</code>. Finally, we let the constraints
+    // object insert these values into the global matrix, and directly
+    // condense the constraints into the matrix.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = temperature_dof_handler.begin_active(),
+    endc = temperature_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        local_mass_matrix = 0;
+        local_stiffness_matrix = 0;
+
+        temperature_fe_values.reinit (cell);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            for (unsigned int k=0; k<dofs_per_cell; ++k)
+              {
+                grad_phi_T[k] = temperature_fe_values.shape_grad (k,q);
+                phi_T[k]      = temperature_fe_values.shape_value (k, q);
+              }
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                {
+                  local_mass_matrix(i,j)
+                  += (phi_T[i] * phi_T[j]
+                      *
+                      temperature_fe_values.JxW(q));
+                  local_stiffness_matrix(i,j)
+                  += (EquationData::kappa * grad_phi_T[i] * grad_phi_T[j]
+                      *
+                      temperature_fe_values.JxW(q));
+                }
+          }
+
+        cell->get_dof_indices (local_dof_indices);
+
+        temperature_constraints.distribute_local_to_global (local_mass_matrix,
+                                                            local_dof_indices,
+                                                            temperature_mass_matrix);
+        temperature_constraints.distribute_local_to_global (local_stiffness_matrix,
+                                                            local_dof_indices,
+                                                            temperature_stiffness_matrix);
+      }
+
+    rebuild_temperature_matrices = false;
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::assemble_temperature_system}
+  //
+  // This function does the second part of the assembly work on the
+  // temperature matrix, the actual addition of pressure mass and stiffness
+  // matrix (where the time step size comes into play), as well as the
+  // creation of the velocity-dependent right hand side. The declarations for
+  // the right hand side assembly in this function are pretty much the same as
+  // the ones used in the other assembly routines, except that we restrict
+  // ourselves to vectors this time. We are going to calculate residuals on
+  // the temperature system, which means that we have to evaluate second
+  // derivatives, specified by the update flag <code>update_hessians</code>.
+  //
+  // The temperature equation is coupled to the Stokes system by means of the
+  // fluid velocity. These two parts of the solution are associated with
+  // different DoFHandlers, so we again need to create a second FEValues
+  // object for the evaluation of the velocity at the quadrature points.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::
+  assemble_temperature_system (const double maximal_velocity)
+  {
+    const bool use_bdf2_scheme = (timestep_number != 0);
+
+    if (use_bdf2_scheme == true)
+      {
+        temperature_matrix.copy_from (temperature_mass_matrix);
+        temperature_matrix *= (2*time_step + old_time_step) /
+                              (time_step + old_time_step);
+        temperature_matrix.add (time_step, temperature_stiffness_matrix);
+      }
+    else
+      {
+        temperature_matrix.copy_from (temperature_mass_matrix);
+        temperature_matrix.add (time_step, temperature_stiffness_matrix);
+      }
+
+    temperature_rhs = 0;
+
+    const QGauss<dim> quadrature_formula(temperature_degree+2);
+    FEValues<dim>     temperature_fe_values (temperature_fe, quadrature_formula,
+                                             update_values    |
+                                             update_gradients |
+                                             update_hessians  |
+                                             update_quadrature_points  |
+                                             update_JxW_values);
+    FEValues<dim>     stokes_fe_values (stokes_fe, quadrature_formula,
+                                        update_values);
+
+    const unsigned int   dofs_per_cell   = temperature_fe.dofs_per_cell;
+    const unsigned int   n_q_points      = quadrature_formula.size();
+
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    // Next comes the declaration of vectors to hold the old and older
+    // solution values (as a notation for time levels <i>n-1</i> and
+    // <i>n-2</i>, respectively) and gradients at quadrature points of the
+    // current cell. We also declare an object to hold the temperature right
+    // hand side values (<code>gamma_values</code>), and we again use
+    // shortcuts for the temperature basis functions. Eventually, we need to
+    // find the temperature extrema and the diameter of the computational
+    // domain which will be used for the definition of the stabilization
+    // parameter (we got the maximal velocity as an input to this function).
+    std::vector<Tensor<1,dim> > old_velocity_values (n_q_points);
+    std::vector<Tensor<1,dim> > old_old_velocity_values (n_q_points);
+    std::vector<double>         old_temperature_values (n_q_points);
+    std::vector<double>         old_old_temperature_values(n_q_points);
+    std::vector<Tensor<1,dim> > old_temperature_grads(n_q_points);
+    std::vector<Tensor<1,dim> > old_old_temperature_grads(n_q_points);
+    std::vector<double>         old_temperature_laplacians(n_q_points);
+    std::vector<double>         old_old_temperature_laplacians(n_q_points);
+
+    EquationData::TemperatureRightHandSide<dim>  temperature_right_hand_side;
+    std::vector<double> gamma_values (n_q_points);
+
+    std::vector<double>         phi_T      (dofs_per_cell);
+    std::vector<Tensor<1,dim> > grad_phi_T (dofs_per_cell);
+
+    const std::pair<double,double>
+    global_T_range = get_extrapolated_temperature_range();
+
+    const FEValuesExtractors::Vector velocities (0);
+
+    // Now, let's start the loop over all cells in the triangulation. Again,
+    // we need two cell iterators that walk in parallel through the cells of
+    // the two involved DoFHandler objects for the Stokes and temperature
+    // part. Within the loop, we first set the local rhs to zero, and then get
+    // the values and derivatives of the old solution functions at the
+    // quadrature points, since they are going to be needed for the definition
+    // of the stabilization parameters and as coefficients in the equation,
+    // respectively. Note that since the temperature has its own DoFHandler
+    // and FEValues object we get the entire solution at the quadrature point
+    // (which is the scalar temperature field only anyway) whereas for the
+    // Stokes part we restrict ourselves to extracting the velocity part (and
+    // ignoring the pressure part) by using
+    // <code>stokes_fe_values[velocities].get_function_values</code>.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = temperature_dof_handler.begin_active(),
+    endc = temperature_dof_handler.end();
+    typename DoFHandler<dim>::active_cell_iterator
+    stokes_cell = stokes_dof_handler.begin_active();
+
+    for (; cell!=endc; ++cell, ++stokes_cell)
+      {
+        local_rhs = 0;
+
+        temperature_fe_values.reinit (cell);
+        stokes_fe_values.reinit (stokes_cell);
+
+        temperature_fe_values.get_function_values (old_temperature_solution,
+                                                   old_temperature_values);
+        temperature_fe_values.get_function_values (old_old_temperature_solution,
+                                                   old_old_temperature_values);
+
+        temperature_fe_values.get_function_gradients (old_temperature_solution,
+                                                      old_temperature_grads);
+        temperature_fe_values.get_function_gradients (old_old_temperature_solution,
+                                                      old_old_temperature_grads);
+
+        temperature_fe_values.get_function_laplacians (old_temperature_solution,
+                                                       old_temperature_laplacians);
+        temperature_fe_values.get_function_laplacians (old_old_temperature_solution,
+                                                       old_old_temperature_laplacians);
+
+        temperature_right_hand_side.value_list (temperature_fe_values.get_quadrature_points(),
+                                                gamma_values);
+
+        stokes_fe_values[velocities].get_function_values (stokes_solution,
+                                                          old_velocity_values);
+        stokes_fe_values[velocities].get_function_values (old_stokes_solution,
+                                                          old_old_velocity_values);
+
+        // Next, we calculate the artificial viscosity for stabilization
+        // according to the discussion in the introduction using the dedicated
+        // function. With that at hand, we can get into the loop over
+        // quadrature points and local rhs vector components. The terms here
+        // are quite lengthy, but their definition follows the time-discrete
+        // system developed in the introduction of this program. The BDF-2
+        // scheme needs one more term from the old time step (and involves
+        // more complicated factors) than the backward Euler scheme that is
+        // used for the first time step. When all this is done, we distribute
+        // the local vector into the global one (including hanging node
+        // constraints).
+        const double nu
+          = compute_viscosity (old_temperature_values,
+                               old_old_temperature_values,
+                               old_temperature_grads,
+                               old_old_temperature_grads,
+                               old_temperature_laplacians,
+                               old_old_temperature_laplacians,
+                               old_velocity_values,
+                               old_old_velocity_values,
+                               gamma_values,
+                               maximal_velocity,
+                               global_T_range.second - global_T_range.first,
+                               cell->diameter());
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            for (unsigned int k=0; k<dofs_per_cell; ++k)
+              {
+                grad_phi_T[k] = temperature_fe_values.shape_grad (k,q);
+                phi_T[k]      = temperature_fe_values.shape_value (k, q);
+              }
+
+            const double T_term_for_rhs
+              = (use_bdf2_scheme ?
+                 (old_temperature_values[q] *
+                  (1 + time_step/old_time_step)
+                  -
+                  old_old_temperature_values[q] *
+                  (time_step * time_step) /
+                  (old_time_step * (time_step + old_time_step)))
+                 :
+                 old_temperature_values[q]);
+
+            const Tensor<1,dim> ext_grad_T
+              = (use_bdf2_scheme ?
+                 (old_temperature_grads[q] *
+                  (1 + time_step/old_time_step)
+                  -
+                  old_old_temperature_grads[q] *
+                  time_step/old_time_step)
+                 :
+                 old_temperature_grads[q]);
+
+            const Tensor<1,dim> extrapolated_u
+              = (use_bdf2_scheme ?
+                 (old_velocity_values[q] *
+                  (1 + time_step/old_time_step)
+                  -
+                  old_old_velocity_values[q] *
+                  time_step/old_time_step)
+                 :
+                 old_velocity_values[q]);
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              local_rhs(i) += (T_term_for_rhs * phi_T[i]
+                               -
+                               time_step *
+                               extrapolated_u * ext_grad_T * phi_T[i]
+                               -
+                               time_step *
+                               nu * ext_grad_T * grad_phi_T[i]
+                               +
+                               time_step *
+                               gamma_values[q] * phi_T[i])
+                              *
+                              temperature_fe_values.JxW(q);
+          }
+
+        cell->get_dof_indices (local_dof_indices);
+        temperature_constraints.distribute_local_to_global (local_rhs,
+                                                            local_dof_indices,
+                                                            temperature_rhs);
+      }
+  }
+
+
+
+
+  // @sect4{BoussinesqFlowProblem::solve}
+  //
+  // This function solves the linear systems of equations. Following the
+  // introduction, we start with the Stokes system, where we need to generate
+  // our block Schur preconditioner. Since all the relevant actions are
+  // implemented in the class <code>BlockSchurPreconditioner</code>, all we
+  // have to do is to initialize the class appropriately. What we need to pass
+  // down is an <code>InverseMatrix</code> object for the pressure mass
+  // matrix, which we set up using the respective class together with the IC
+  // preconditioner we already generated, and the AMG preconditioner for the
+  // velocity-velocity matrix. Note that both <code>Mp_preconditioner</code>
+  // and <code>Amg_preconditioner</code> are only pointers, so we use
+  // <code>*</code> to pass down the actual preconditioner objects.
+  //
+  // Once the preconditioner is ready, we create a GMRES solver for the block
+  // system. Since we are working with Trilinos data structures, we have to
+  // set the respective template argument in the solver. GMRES needs to
+  // internally store temporary vectors for each iteration (see the discussion
+  // in the results section of step-22) – the more vectors it can use,
+  // the better it will generally perform. To keep memory demands in check, we
+  // set the number of vectors to 100. This means that up to 100 solver
+  // iterations, every temporary vector can be stored. If the solver needs to
+  // iterate more often to get the specified tolerance, it will work on a
+  // reduced set of vectors by restarting at every 100 iterations.
+  //
+  // With this all set up, we solve the system and distribute the constraints
+  // in the Stokes system, i.e. hanging nodes and no-flux boundary condition,
+  // in order to have the appropriate solution values even at constrained
+  // dofs. Finally, we write the number of iterations to the screen.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::solve ()
+  {
+    std::cout << "   Solving..." << std::endl;
+
+    {
+      const LinearSolvers::InverseMatrix<TrilinosWrappers::SparseMatrix,
+            TrilinosWrappers::PreconditionIC>
+            mp_inverse (stokes_preconditioner_matrix.block(1,1), *Mp_preconditioner);
+
+      const LinearSolvers::BlockSchurPreconditioner<TrilinosWrappers::PreconditionAMG,
+            TrilinosWrappers::PreconditionIC>
+            preconditioner (stokes_matrix, mp_inverse, *Amg_preconditioner);
+
+      SolverControl solver_control (stokes_matrix.m(),
+                                    1e-6*stokes_rhs.l2_norm());
+
+      SolverGMRES<TrilinosWrappers::MPI::BlockVector>
+      gmres (solver_control,
+             SolverGMRES<TrilinosWrappers::MPI::BlockVector >::AdditionalData(100));
+
+      for (unsigned int i=0; i<stokes_solution.size(); ++i)
+        if (stokes_constraints.is_constrained(i))
+          stokes_solution(i) = 0;
+
+      gmres.solve(stokes_matrix, stokes_solution, stokes_rhs, preconditioner);
+
+      stokes_constraints.distribute (stokes_solution);
+
+      std::cout << "   "
+                << solver_control.last_step()
+                << " GMRES iterations for Stokes subsystem."
+                << std::endl;
+    }
+
+    // Once we know the Stokes solution, we can determine the new time step
+    // from the maximal velocity. We have to do this to satisfy the CFL
+    // condition since convection terms are treated explicitly in the
+    // temperature equation, as discussed in the introduction. The exact form
+    // of the formula used here for the time step is discussed in the results
+    // section of this program.
+    //
+    // There is a snatch here. The formula contains a division by the maximum
+    // value of the velocity. However, at the start of the computation, we
+    // have a constant temperature field (we start with a constant
+    // temperature, and it will be nonconstant only after the first time step
+    // during which the source acts). Constant temperature means that no
+    // buoyancy acts, and so the velocity is zero. Dividing by it will not
+    // likely lead to anything good.
+    //
+    // To avoid the resulting infinite time step, we ask whether the maximal
+    // velocity is very small (in particular smaller than the values we
+    // encounter during any of the following time steps) and if so rather than
+    // dividing by zero we just divide by a small value, resulting in a large
+    // but finite time step.
+    old_time_step = time_step;
+    const double maximal_velocity = get_maximal_velocity();
+
+    if (maximal_velocity >= 0.01)
+      time_step = 1./(1.7*dim*std::sqrt(1.*dim)) /
+                  temperature_degree *
+                  GridTools::minimal_cell_diameter(triangulation) /
+                  maximal_velocity;
+    else
+      time_step = 1./(1.7*dim*std::sqrt(1.*dim)) /
+                  temperature_degree *
+                  GridTools::minimal_cell_diameter(triangulation) /
+                  .01;
+
+    std::cout << "   " << "Time step: " << time_step
+              << std::endl;
+
+    temperature_solution = old_temperature_solution;
+
+    // Next we set up the temperature system and the right hand side using the
+    // function <code>assemble_temperature_system()</code>.  Knowing the
+    // matrix and right hand side of the temperature equation, we set up a
+    // preconditioner and a solver. The temperature matrix is a mass matrix
+    // (with eigenvalues around one) plus a Laplace matrix (with eigenvalues
+    // between zero and $ch^{-2}$) times a small number proportional to the
+    // time step $k_n$. Hence, the resulting symmetric and positive definite
+    // matrix has eigenvalues in the range $[1,1+k_nh^{-2}]$ (up to
+    // constants). This matrix is only moderately ill conditioned even for
+    // small mesh sizes and we get a reasonably good preconditioner by simple
+    // means, for example with an incomplete Cholesky decomposition
+    // preconditioner (IC) as we also use for preconditioning the pressure
+    // mass matrix solver. As a solver, we choose the conjugate gradient
+    // method CG. As before, we tell the solver to use Trilinos vectors via
+    // the template argument <code>TrilinosWrappers::MPI::Vector</code>.  Finally,
+    // we solve, distribute the hanging node constraints and write out the
+    // number of iterations.
+    assemble_temperature_system (maximal_velocity);
+    {
+
+      SolverControl solver_control (temperature_matrix.m(),
+                                    1e-8*temperature_rhs.l2_norm());
+      SolverCG<TrilinosWrappers::MPI::Vector> cg (solver_control);
+
+      TrilinosWrappers::PreconditionIC preconditioner;
+      preconditioner.initialize (temperature_matrix);
+
+      cg.solve (temperature_matrix, temperature_solution,
+                temperature_rhs, preconditioner);
+
+      temperature_constraints.distribute (temperature_solution);
+
+      std::cout << "   "
+                << solver_control.last_step()
+                << " CG iterations for temperature."
+                << std::endl;
+
+      // At the end of this function, we step through the vector and read out
+      // the maximum and minimum temperature value, which we also want to
+      // output. This will come in handy when determining the correct constant
+      // in the choice of time step as discuss in the results section of this
+      // program.
+      double min_temperature = temperature_solution(0),
+             max_temperature = temperature_solution(0);
+      for (unsigned int i=0; i<temperature_solution.size(); ++i)
+        {
+          min_temperature = std::min<double> (min_temperature,
+                                              temperature_solution(i));
+          max_temperature = std::max<double> (max_temperature,
+                                              temperature_solution(i));
+        }
+
+      std::cout << "   Temperature range: "
+                << min_temperature << ' ' << max_temperature
+                << std::endl;
+    }
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::output_results}
+  //
+  // This function writes the solution to a VTK output file for visualization,
+  // which is done every tenth time step. This is usually quite a simple task,
+  // since the deal.II library provides functions that do almost all the job
+  // for us. There is one new function compared to previous examples: We want
+  // to visualize both the Stokes solution and the temperature as one data
+  // set, but we have done all the calculations based on two different
+  // DoFHandler objects. Luckily, the DataOut class is prepared to deal with
+  // it. All we have to do is to not attach one single DoFHandler at the
+  // beginning and then use that for all added vector, but specify the
+  // DoFHandler to each vector separately. The rest is done as in step-22. We
+  // create solution names (that are going to appear in the visualization
+  // program for the individual components). The first <code>dim</code>
+  // components are the vector velocity, and then we have pressure for the
+  // Stokes part, whereas temperature is scalar. This information is read out
+  // using the DataComponentInterpretation helper class. Next, we actually
+  // attach the data vectors with their DoFHandler objects, build patches
+  // according to the degree of freedom, which are (sub-) elements that
+  // describe the data for visualization programs. Finally, we set a file name
+  // (that includes the time step number) and write the vtk file.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::output_results ()  const
+  {
+    if (timestep_number % 10 != 0)
+      return;
+
+    std::vector<std::string> stokes_names (dim, "velocity");
+    stokes_names.push_back ("p");
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    stokes_component_interpretation
+    (dim+1, DataComponentInterpretation::component_is_scalar);
+    for (unsigned int i=0; i<dim; ++i)
+      stokes_component_interpretation[i]
+        = DataComponentInterpretation::component_is_part_of_vector;
+
+    DataOut<dim> data_out;
+    data_out.add_data_vector (stokes_dof_handler, stokes_solution,
+                              stokes_names, stokes_component_interpretation);
+    data_out.add_data_vector (temperature_dof_handler, temperature_solution,
+                              "T");
+    data_out.build_patches (std::min(stokes_degree, temperature_degree));
+
+    std::ostringstream filename;
+    filename << "solution-" << Utilities::int_to_string(timestep_number, 4) << ".vtk";
+
+    std::ofstream output (filename.str().c_str());
+    data_out.write_vtk (output);
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::refine_mesh}
+  //
+  // This function takes care of the adaptive mesh refinement. The three tasks
+  // this function performs is to first find out which cells to
+  // refine/coarsen, then to actually do the refinement and eventually
+  // transfer the solution vectors between the two different grids. The first
+  // task is simply achieved by using the well-established Kelly error
+  // estimator on the temperature (it is the temperature we're mainly
+  // interested in for this program, and we need to be accurate in regions of
+  // high temperature gradients, also to not have too much numerical
+  // diffusion). The second task is to actually do the remeshing. That
+  // involves only basic functions as well, such as the
+  // <code>refine_and_coarsen_fixed_fraction</code> that refines those cells
+  // with the largest estimated error that together make up 80 per cent of the
+  // error, and coarsens those cells with the smallest error that make up for
+  // a combined 10 per cent of the error.
+  //
+  // If implemented like this, we would get a program that will not make much
+  // progress: Remember that we expect temperature fields that are nearly
+  // discontinuous (the diffusivity $\kappa$ is very small after all) and
+  // consequently we can expect that a freely adapted mesh will refine further
+  // and further into the areas of large gradients. This decrease in mesh size
+  // will then be accompanied by a decrease in time step, requiring an
+  // exceedingly large number of time steps to solve to a given final time. It
+  // will also lead to meshes that are much better at resolving
+  // discontinuities after several mesh refinement cycles than in the
+  // beginning.
+  //
+  // In particular to prevent the decrease in time step size and the
+  // correspondingly large number of time steps, we limit the maximal
+  // refinement depth of the mesh. To this end, after the refinement indicator
+  // has been applied to the cells, we simply loop over all cells on the
+  // finest level and unselect them from refinement if they would result in
+  // too high a mesh level.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::refine_mesh (const unsigned int max_grid_level)
+  {
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    KellyErrorEstimator<dim>::estimate (temperature_dof_handler,
+                                        QGauss<dim-1>(temperature_degree+1),
+                                        typename FunctionMap<dim>::type(),
+                                        temperature_solution,
+                                        estimated_error_per_cell);
+
+    GridRefinement::refine_and_coarsen_fixed_fraction (triangulation,
+                                                       estimated_error_per_cell,
+                                                       0.8, 0.1);
+    if (triangulation.n_levels() > max_grid_level)
+      for (typename Triangulation<dim>::active_cell_iterator
+           cell = triangulation.begin_active(max_grid_level);
+           cell != triangulation.end(); ++cell)
+        cell->clear_refine_flag ();
+
+    // As part of mesh refinement we need to transfer the solution vectors
+    // from the old mesh to the new one. To this end we use the
+    // SolutionTransfer class and we have to prepare the solution vectors that
+    // should be transferred to the new grid (we will lose the old grid once
+    // we have done the refinement so the transfer has to happen concurrently
+    // with refinement). What we definitely need are the current and the old
+    // temperature (BDF-2 time stepping requires two old solutions). Since the
+    // SolutionTransfer objects only support to transfer one object per dof
+    // handler, we need to collect the two temperature solutions in one data
+    // structure. Moreover, we choose to transfer the Stokes solution, too,
+    // since we need the velocity at two previous time steps, of which only
+    // one is calculated on the fly.
+    //
+    // Consequently, we initialize two SolutionTransfer objects for the Stokes
+    // and temperature DoFHandler objects, by attaching them to the old dof
+    // handlers. With this at place, we can prepare the triangulation and the
+    // data vectors for refinement (in this order).
+    std::vector<TrilinosWrappers::MPI::Vector> x_temperature (2);
+    x_temperature[0] = temperature_solution;
+    x_temperature[1] = old_temperature_solution;
+    TrilinosWrappers::MPI::BlockVector x_stokes = stokes_solution;
+
+    SolutionTransfer<dim,TrilinosWrappers::MPI::Vector>
+    temperature_trans(temperature_dof_handler);
+    SolutionTransfer<dim,TrilinosWrappers::MPI::BlockVector>
+    stokes_trans(stokes_dof_handler);
+
+    triangulation.prepare_coarsening_and_refinement();
+    temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
+    stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
+
+    // Now everything is ready, so do the refinement and recreate the dof
+    // structure on the new grid, and initialize the matrix structures and the
+    // new vectors in the <code>setup_dofs</code> function. Next, we actually
+    // perform the interpolation of the solutions between the grids. We create
+    // another copy of temporary vectors for temperature (now corresponding to
+    // the new grid), and let the interpolate function do the job. Then, the
+    // resulting array of vectors is written into the respective vector member
+    // variables. For the Stokes vector, everything is just the same –
+    // except that we do not need another temporary vector since we just
+    // interpolate a single vector. In the end, we have to tell the program
+    // that the matrices and preconditioners need to be regenerated, since the
+    // mesh has changed.
+    triangulation.execute_coarsening_and_refinement ();
+    setup_dofs ();
+
+    std::vector<TrilinosWrappers::MPI::Vector> tmp (2);
+    tmp[0].reinit (temperature_solution);
+    tmp[1].reinit (temperature_solution);
+    temperature_trans.interpolate(x_temperature, tmp);
+
+    temperature_solution = tmp[0];
+    old_temperature_solution = tmp[1];
+
+    stokes_trans.interpolate (x_stokes, stokes_solution);
+
+    rebuild_stokes_matrix         = true;
+    rebuild_temperature_matrices  = true;
+    rebuild_stokes_preconditioner = true;
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::run}
+  //
+  // This function performs all the essential steps in the Boussinesq
+  // program. It starts by setting up a grid (depending on the spatial
+  // dimension, we choose some different level of initial refinement and
+  // additional adaptive refinement steps, and then create a cube in
+  // <code>dim</code> dimensions and set up the dofs for the first time. Since
+  // we want to start the time stepping already with an adaptively refined
+  // grid, we perform some pre-refinement steps, consisting of all assembly,
+  // solution and refinement, but without actually advancing in time. Rather,
+  // we use the vilified <code>goto</code> statement to jump out of the time
+  // loop right after mesh refinement to start all over again on the new mesh
+  // beginning at the <code>start_time_iteration</code> label. (The use of the
+  // <code>goto</code> is discussed in step-26.)
+  //
+  // Before we start, we project the initial values to the grid and obtain the
+  // first data for the <code>old_temperature_solution</code> vector. Then, we
+  // initialize time step number and time step and start the time loop.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::run ()
+  {
+    const unsigned int initial_refinement = (dim == 2 ? 4 : 2);
+    const unsigned int n_pre_refinement_steps = (dim == 2 ? 4 : 3);
+
+
+    GridGenerator::hyper_cube (triangulation);
+    global_Omega_diameter = GridTools::diameter (triangulation);
+
+    triangulation.refine_global (initial_refinement);
+
+    setup_dofs();
+
+    unsigned int pre_refinement_step = 0;
+
+start_time_iteration:
+
+    VectorTools::project (temperature_dof_handler,
+                          temperature_constraints,
+                          QGauss<dim>(temperature_degree+2),
+                          EquationData::TemperatureInitialValues<dim>(),
+                          old_temperature_solution);
+
+    timestep_number           = 0;
+    time_step = old_time_step = 0;
+
+    double time = 0;
+
+    do
+      {
+        std::cout << "Timestep " << timestep_number
+                  << ":  t=" << time
+                  << std::endl;
+
+        // The first steps in the time loop are all obvious – we
+        // assemble the Stokes system, the preconditioner, the temperature
+        // matrix (matrices and preconditioner do actually only change in case
+        // we've remeshed before), and then do the solve. Before going on with
+        // the next time step, we have to check whether we should first finish
+        // the pre-refinement steps or if we should remesh (every fifth time
+        // step), refining up to a level that is consistent with initial
+        // refinement and pre-refinement steps. Last in the loop is to advance
+        // the solutions, i.e. to copy the solutions to the next "older" time
+        // level.
+        assemble_stokes_system ();
+        build_stokes_preconditioner ();
+        assemble_temperature_matrix ();
+
+        solve ();
+
+        output_results ();
+
+        std::cout << std::endl;
+
+        if ((timestep_number == 0) &&
+            (pre_refinement_step < n_pre_refinement_steps))
+          {
+            refine_mesh (initial_refinement + n_pre_refinement_steps);
+            ++pre_refinement_step;
+            goto start_time_iteration;
+          }
+        else if ((timestep_number > 0) && (timestep_number % 5 == 0))
+          refine_mesh (initial_refinement + n_pre_refinement_steps);
+
+        time += time_step;
+        ++timestep_number;
+
+        old_stokes_solution          = stokes_solution;
+        old_old_temperature_solution = old_temperature_solution;
+        old_temperature_solution     = temperature_solution;
+      }
+    // Do all the above until we arrive at time 100.
+    while (time <= 100);
+  }
+}
+
+
+
+// @sect3{The <code>main</code> function}
+//
+// The main function looks almost the same as in all other programs.
+//
+// There is one difference we have to be careful about. This program uses
+// Trilinos and, typically, Trilinos is configured so that it can run in
+// %parallel using MPI. This doesn't mean that it <i>has</i> to run in
+// %parallel, and in fact this program (unlike step-32) makes no attempt at
+// all to do anything in %parallel using MPI. Nevertheless, Trilinos wants the
+// MPI system to be initialized. We do that be creating an object of type
+// Utilities::MPI::MPI_InitFinalize that initializes MPI (if available) using
+// the arguments given to main() (i.e., <code>argc</code> and
+// <code>argv</code>) and de-initializes it again when the object goes out of
+// scope.
+int main (int argc, char *argv[])
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step31;
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv,
+                                                           numbers::invalid_unsigned_int);
+
+      // This program can only be run in serial. Otherwise, throw an exception.
+      AssertThrow(Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD)==1,
+                  ExcMessage("This program can only be run in serial, use ./step-31"));
+
+      BoussinesqFlowProblem<2> flow_problem;
+      flow_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-32/CMakeLists.txt b/examples/step-32/CMakeLists.txt
new file mode 100644
index 0000000..f1a36b8
--- /dev/null
+++ b/examples/step-32/CMakeLists.txt
@@ -0,0 +1,57 @@
+##
+#  CMake script for the step-32 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-32")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Define the output that should be cleaned:
+SET(CLEAN_UP_FILES *.vtu *.pvtu *.visit)
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF( NOT DEAL_II_WITH_MPI OR
+    NOT DEAL_II_WITH_P4EST OR
+    NOT DEAL_II_WITH_TRILINOS )
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_MPI = ON
+    DEAL_II_WITH_P4EST = ON
+    DEAL_II_WITH_TRILINOS = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-32/doc/builds-on b/examples/step-32/doc/builds-on
new file mode 100644
index 0000000..6349060
--- /dev/null
+++ b/examples/step-32/doc/builds-on
@@ -0,0 +1 @@
+step-31 step-17 step-40
diff --git a/examples/step-32/doc/intro.dox b/examples/step-32/doc/intro.dox
new file mode 100644
index 0000000..46a21c1
--- /dev/null
+++ b/examples/step-32/doc/intro.dox
@@ -0,0 +1,1310 @@
+<br>
+
+<i>This program was contributed by Martin Kronbichler, Wolfgang
+Bangerth, and Timo Heister.
+
+This material is based upon work partly supported by the National
+Science Foundation under Award No. EAR-0426271 and The California Institute of
+Technology; and in a continuation by the National Science
+Foundation under Award No. EAR-0949446 and The University of California
+– Davis. Any opinions, findings, and conclusions or recommendations
+expressed in this publication are those of the author and do not
+necessarily reflect the views of the National Science Foundation, The
+California Institute of Technology, or of The University of California
+– Davis.
+
+The work discussed here is also presented in the following publication:
+<b>
+  M. Kronbichler, T. Heister, W. Bangerth:
+  <i>High Accuracy Mantle Convection Simulation through Modern Numerical
+  Methods</i>, Geophysical Journal International, 2012, 191, 12-29.
+  <a href="http://dx.doi.org/10.1111/j.1365-246X.2012.05609.x">[DOI]</a>
+</b>
+
+The continuation of development of this program has led to the much larger
+open source code Aspect (see http://aspect.dealii.org/ ) which is much more
+flexible in solving many kinds of related problems.
+</i>
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This program does pretty much exactly what step-31 already does: it
+solves the Boussinesq equations that describe the motion of a fluid
+whose temperature is not in equilibrium. As such, all the equations we
+have described in step-31 still hold: we solve the same general
+partial differential equation (with only minor modifications to adjust
+for more realism in the problem setting), using the same finite
+element scheme, the same time stepping algorithm, and more or less the
+same stabilization method for the temperature advection-diffusion
+equation. As a consequence, you may first want to understand that
+program — and its implementation — before you work on the
+current one.
+
+The difference between step-31 and the current program is that
+here we want to do things in %parallel, using both the availability of many
+machines in a cluster (with parallelization based on MPI) as well as many
+processor cores within a single machine (with parallelization based on
+threads). This program's main job is therefore to introduce the changes that are
+necessary to utilize the availability of these %parallel compute
+resources. In this regard, it builds on the step-40 program that first
+introduces the necessary classes for much of the %parallel functionality.
+
+In addition to these changes, we also use a slightly different
+preconditioner, and we will have to make a number of changes that have
+to do with the fact that we want to solve a <i>realistic</i> problem
+here, not a model problem. The latter, in particular, will require
+that we think about scaling issues as well as what all those
+parameters and coefficients in the equations under consideration
+actually mean. We will discuss first the issues that affect changes in
+the mathematical formulation and solver structure, then how to
+parallelize things, and finally the actual testcase we will consider.
+
+
+<h3> Using the "right" pressure </h3>
+
+In step-31, we used the following Stokes model for the
+velocity and pressure field:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=&
+  -\rho \; \beta \; T \mathbf{g},
+  \\
+  \nabla \cdot {\mathbf u} &=& 0.
+ at f}
+The right hand side of the first equation appears a wee bit
+unmotivated. Here's how things should really be. We
+need the external forces that act on the fluid, which we assume are
+given by gravity only. In the current case, we assume that the fluid
+does expand slightly for the purposes of this gravity force, but not
+enough that we need to modify the incompressibility condition (the
+second equation). What this means is that for the purpose of the right
+hand side, we can assume that $\rho=\rho(T)$. An assumption that may
+not be entirely justified is that we can assume that the changes of
+density as a function of temperature are small, leading to an
+expression of the form $\rho(T) = \rho_{\text{ref}}
+[1-\beta(T-T_{\text{ref}})]$, i.e. the density equals
+$\rho_{\text{ref}}$ at reference temperature and decreases linearly as
+the temperature increases (as the material expands). The force balance
+equation then looks properly written like this:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=&
+  \rho_{\text{ref}} [1-\beta(T-T_{\text{ref}})] \mathbf{g}.
+ at f}
+Now note that the gravity force results from a gravity potential as
+$\mathbf g=-\nabla \varphi$, so that we can re-write this as follows:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=&
+  -\rho_{\text{ref}} \; \beta\; T\; \mathbf{g}
+  -\rho_{\text{ref}} [1+\beta T_{\text{ref}}] \nabla\varphi.
+ at f}
+The second term on the right is time independent, and so we could
+introduce a new "dynamic" pressure $p_{\text{dyn}}=p+\rho_{\text{ref}}
+[1+\beta T_{\text{ref}}] \varphi=p_{\text{total}}-p_{\text{static}}$
+with which the Stokes equations would read:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p_{\text{dyn}} &=&
+  -\rho_{\text{ref}} \; \beta \; T \; \mathbf{g},
+  \\
+  \nabla \cdot {\mathbf u} &=& 0.
+ at f}
+This is exactly the form we used in step-31, and it was
+appropriate to do so because all changes in the fluid flow are only
+driven by the dynamic pressure that results from temperature
+differences. (In other words: Any contribution to the right hand side
+that results from taking the gradient of a scalar field have no effect
+on the velocity field.)
+
+On the other hand, we will here use the form of the Stokes equations
+that considers the total pressure instead:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=&
+  \rho(T)\; \mathbf{g},
+  \\
+  \nabla \cdot {\mathbf u} &=& 0.
+ at f}
+There are several advantages to this:
+
+- This way we can plot the pressure in our program in such a way that it
+  actually shows the total pressure that includes the effects of
+  temperature differences as well as the static pressure of the
+  overlying rocks. Since the pressure does not appear any further in any
+  of the other equations, whether to use one or the other is more a
+  matter of taste than of correctness. The flow field is exactly the
+  same, but we get a pressure that we can now compare with values that
+  are given in geophysical books as those that hold at the bottom of the
+  earth mantle, for example.
+
+- If we wanted to make the model even more realistic, we would have to take
+  into account that many of the material parameters (e.g. the viscosity, the
+  density, etc) not only depend on the temperature but also the
+  <i>total</i> pressure.
+
+- The model above assumed a linear dependence $\rho(T) = \rho_{\text{ref}}
+  [1-\beta(T-T_{\text{ref}})]$ and assumed that $\beta$ is small. In
+  practice, this may not be so. In fact, realistic models are
+  certainly not linear, and $\beta$ may also not be small for at least
+  part of the temperature range because the density's behavior is
+  substantially dependent not only on thermal expansion but by phase
+  changes.
+
+- A final reason to do this is discussed in the results section and
+  concerns possible extensions to the model we use here. It has to do
+  with the fact that the temperature equation (see below) we use here does not
+  include a term that contains the pressure. It should, however:
+  rock, like gas, heats up as you compress it. Consequently,
+  material that rises up cools adiabatically, and cold material that
+  sinks down heats adiabatically. We discuss this further below.
+
+ at note There is, however, a downside to this procedure. In the earth,
+the dynamic pressure is several orders of magnitude smaller than the
+total pressure. If we use the equations above and solve all variables
+to, say, 4 digits of accuracy, then we may be able to get the velocity
+and the total pressure right, but we will have no accuracy at all if
+we compute the dynamic pressure by subtracting from the total pressure
+the static part $p_\text{static}=\rho_{\text{ref}}
+[1+\beta T_{\text{ref}}] \varphi$. If, for example, the dynamic
+pressure is six orders of magnitude smaller than the static pressure,
+then we need to solve the overall pressure to at least seven digits of
+accuracy to get anything remotely accurate. That said, in practice
+this turns out not to be a limiting factor.
+
+
+
+<h3> The scaling of discretized equations </h3>
+
+Remember that we want to solve the following set of equations:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=&
+  \rho(T) \mathbf{g},
+  \\
+  \nabla \cdot {\mathbf u} &=& 0,
+  \\
+  \frac{\partial T}{\partial t}
+  +
+  {\mathbf u} \cdot \nabla T
+  -
+  \nabla \cdot \kappa \nabla T &=& \gamma,
+ at f}
+augmented by appropriate boundary and initial conditions. As discussed
+in step-31, we will solve this set of equations by
+solving for a Stokes problem first in each time step, and then moving
+the temperature equation forward by one time interval.
+
+The problem under consideration in this current section is with the
+Stokes problem: if we discretize it as usual, we get a linear system
+ at f{eqnarray*}
+  M \; X
+  =
+  \left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+  \left(\begin{array}{c}
+    U \\ P
+  \end{array}\right)
+  =
+  \left(\begin{array}{c}
+    F_U \\ 0
+  \end{array}\right)
+  =
+  F
+ at f}
+which in this program we will solve with a FGMRES solver. This solver
+iterates until the residual of these linear equations is below a
+certain tolerance, i.e. until
+ at f[
+  \left\|
+  \left(\begin{array}{c}
+    F_U - A U^{(k)} - B P^{(k)}
+    \\
+    B^T U^{(k)}
+  \end{array}\right)
+  \right\|
+  < \text{Tol}.
+ at f]
+This does not make any sense from the viewpoint of physical units: the
+quantities involved here have physical units so that the first part of
+the residual has units $\frac{\text{Pa}}{\text{m}}
+\text{m}^{\text{dim}}$ (most easily established by considering the
+term $(\nabla \cdot \mathbf v, p)_{\Omega}$ and considering that the
+pressure has units $\text{Pa}=\frac{\text{kg}}{\text{m\; s}^2}$ and
+the integration yields a factor of $\text{m}^{\text{dim}}$), whereas
+the second part of the residual has units
+$\frac{\text{m}^{\text{dim}}}{\text{s}}$. Taking the norm
+of this residual vector would yield a quantity with units
+$\text{m}^{\text{dim}-1} \sqrt{\left(\text{Pa}\right)^2 +
+       \left(\frac{\text{m}}{\text{s}}\right)^2}$. This,
+quite obviously, does not make sense, and we should not be surprised
+that doing so is eventually going to come back hurting us.
+
+So why is this an issue here, but not in step-31? The
+reason back there is that everything was nicely balanced: velocities
+were on the order of one, the pressure likewise, the viscosity was
+one, and the domain had a diameter of $\sqrt{2}$. As a result, while
+nonsensical, nothing bad happened. On the other hand, as we will explain
+below, things here will not be that simply scaled: $\eta$ will be around
+$10^{21}$, velocities on the order of $10^{-8}$, pressure around $10^8$, and
+the diameter of the domain is $10^7$. In other words, the order of magnitude
+for the first equation is going to be
+$\eta\text{div}\varepsilon(\mathbf u) \approx 10^{21} \frac{10^{-8}}{(10^7)^2}
+\approx 10^{-1}$, whereas the second equation will be around
+$\text{div}{\mathbf u}\approx \frac{10^{-8}}{10^7} \approx 10^{-15}$. Well, so
+what this will lead to is this: if the solver wants to make the residual small,
+it will almost entirely focus on the first set of equations because they are
+so much bigger, and ignore the divergence equation that describes mass
+conservation. That's exactly what happens: unless we set the tolerance to
+extremely small values, the resulting flow field is definitely not divergence
+free. As an auxiliary problem, it turns out that it is difficult to find a
+tolerance that always works; in practice, one often ends up with a tolerance
+that requires 30 or 40 iterations for most time steps, and 10,000 for some
+others.
+
+So what's a numerical analyst to do in a case like this? The answer is to
+start at the root and first make sure that everything is mathematically
+consistent first. In our case, this means that if we want to solve the system
+of Stokes equations jointly, we have to scale them so that they all have the
+same physical dimensions. In our case, this means multiplying the second
+equation by something that has units $\frac{\text{Pa\; s}}{\text{m}}$; one
+choice is to multiply with $\frac{\eta}{L}$ where $L$ is a typical lengthscale
+in our domain (which experiments show is best chosen to be the diameter of
+plumes — around 10 km — rather than the diameter of the
+domain). Using these %numbers for $\eta$ and $L$, this factor is around
+$10^{17}$. So, we now get this for the Stokes system:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) + \nabla p &=&
+  \rho(T) \; \mathbf{g},
+  \\
+  \frac{\eta}{L} \nabla \cdot {\mathbf u} &=& 0.
+ at f}
+The trouble with this is that the result is not symmetric any more (we have
+$\frac{\eta}{L} \nabla \cdot$ at the bottom left, but not its transpose
+operator at the top right). This, however, can be cured by introducing a
+scaled pressure $\hat p = \frac{L}{\eta}p$, and we get the scaled equations
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) +
+  \nabla \left(\frac{\eta}{L} \hat p\right) &=&
+  \rho(T) \; \mathbf{g},
+  \\
+  \frac{\eta}{L} \nabla \cdot {\mathbf u} &=& 0.
+ at f}
+This is now symmetric. Obviously, we can easily recover the original pressure
+$p$ from the scaled pressure $\hat p$ that we compute as a result of this
+procedure.
+
+In the program below, we will introduce a factor
+<code>EquationData::pressure_scaling</code> that corresponds to
+$\frac{\eta}{L}$, and we will use this factor in the assembly of the system
+matrix and preconditioner. Because it is annoying and error prone, we will
+recover the unscaled pressure immediately following the solution of the linear
+system, i.e., the solution vector's pressure component will immediately be
+unscaled to retrieve the physical pressure. Since the solver uses the fact that
+we can use a good initial guess by extrapolating the previous solutions, we
+also have to scale the pressure immediately <i>before</i> solving.
+
+
+
+<h3> Changes to the Stokes preconditioner and solver </h3>
+
+In this tutorial program, we apply a variant of the preconditioner used in
+step-31. That preconditioner was built to operate on the
+system matrix <i>M</i> in block form such that the product matrix
+ at f{eqnarray*}
+  P^{-1} M
+  =
+  \left(\begin{array}{cc}
+    A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
+  \end{array}\right)
+  \left(\begin{array}{cc}
+    A & B^T \\ B & 0
+  \end{array}\right)
+ at f}
+is of a form that Krylov-based iterative solvers like GMRES can solve in a
+few iterations. We then replaced the exact inverse of <i>A</i> by the action
+of an AMG preconditioner $\tilde{A}$ based on a vector Laplace matrix,
+approximated the Schur complement $S = B A^{-1} B^T$ by a mass matrix $M_p$
+on the pressure space and wrote an <tt>InverseMatrix</tt> class for
+implementing the action of $M_p^{-1}\approx S^{-1}$ on vectors. In the
+InverseMatrix class, we used a CG solve with an incomplete Cholesky (IC)
+preconditioner for performing the inner solves.
+
+An observation one can make is that we use just the action of a
+preconditioner for approximating the velocity inverse $A^{-1}$ (and the
+outer GMRES iteration takes care of the approximate character of the
+inverse), whereas we use a more or less <i>exact</i> inverse for $M_p^{-1}$,
+realized by a fully converged CG solve. This appears unbalanced, but there's
+system to this madness: almost all the effort goes into the upper left block
+to which we apply the AMG preconditioner, whereas even an exact inversion of
+the pressure mass matrix costs basically nothing. Consequently, if it helps us
+reduce the overall number of iterations somewhat, then this effort is well
+spent.
+
+That said, even though the solver worked well for step-31, we have a problem
+here that is a bit more complicated (cells are deformed, the pressure varies
+by orders of magnitude, and we want to plan ahead for more complicated
+physics), and so we'll change a few things slightly:
+
+- For more complex problems, it turns out that using just a single AMG V-cycle
+  as preconditioner is not always sufficient. The outer solver converges just
+  fine most of the time in a reasonable number of iterations (say, less than
+  50) but there are the occasional time step where it suddenly takes 700 or
+  so. What exactly is going on there is hard to determine, but the problem can
+  be avoided by using a more accurate solver for the top left
+  block. Consequently, we'll want to use a CG iteration to invert the top left
+  block of the preconditioner matrix, and use the AMG as a preconditioner for
+  the CG solver.
+
+- The downside of this is that, of course, the Stokes preconditioner becomes
+  much more expensive (approximately 10 times more expensive than when we just
+  use a single V-cycle). Our strategy then is this: let's do up to 30 GMRES
+  iterations with just the V-cycle as a preconditioner and if that doesn't
+  yield convergence, then take the best approximation of the Stokes solution
+  obtained after this first round of iterations and use that as the starting
+  guess for iterations where we use the full inner solver with a rather
+  lenient tolerance as preconditioner. In all our experiments this leads to
+  convergence in only a few additional iterations.
+
+- One thing we need to pay attention to is that when using a CG with a lenient
+  tolerance in the preconditioner, then $y = \tilde A^{-1} r$ is no longer a
+  linear function of $r$ (it is, of course, if we have a very stringent
+  tolerance in our solver, or if we only apply a single V-cycle). This is a
+  problem since now our preconditioner is no longer a linear operator; in
+  other words, every time GMRES uses it the preconditioner looks
+  different. The standard GMRES solver can't deal with this, leading to slow
+  convergence or even breakdown, but the F-GMRES variant is designed to deal
+  with exactly this kind of situation and we consequently use it.
+
+- On the other hand, once we have settled on using F-GMRES we can relax the
+  tolerance used in inverting the preconditioner for $S$. In step-31, we ran a
+  preconditioned CG method on $\tilde S$ until the residual had been reduced
+  by 7 orders of magnitude. Here, we can again be more lenient because we know
+  that the outer preconditioner doesn't suffer.
+
+- In step-31, we used a left preconditioner in which we first invert the top
+  left block of the preconditioner matrix, then apply the bottom left
+  (divergence) one, and then invert the bottom right. In other words, the
+  application of the preconditioner acts as a lower left block triangular
+  matrix. Another option is to use a right preconditioner that here would be
+  upper right block triangulation, i.e., we first invert the bottom right
+  Schur complement, apply the top right (gradient) operator and then invert
+  the elliptic top left block. To a degree, which one to choose is a matter of
+  taste. That said, there is one significant advantage to a right
+  preconditioner in GMRES-type solvers: the residual with which we determine
+  whether we should stop the iteration is the true residual, not the norm of
+  the preconditioned equations. Consequently, it is much simpler to compare it
+  to the stopping criterion we typically use, namely the norm of the right
+  hand side vector. In writing this code we found that the scaling issues we
+  discussed above also made it difficult to determine suitable stopping
+  criteria for left-preconditioned linear systems, and consequently this
+  program uses a right preconditioner.
+
+- In step-31, we used an IC (incomplete Cholesky) preconditioner for the
+  pressure mass matrix in the Schur complement preconditioner and for the
+  solution of the temperature system. Here, we could in principle do the same,
+  but we do choose an even simpler preconditioner, namely a Jacobi
+  preconditioner for both systems. This is because here we target at massively
+  %parallel computations, where the decompositions for IC/ILU would have to be
+  performed block-wise for the locally owned degrees of freedom on each
+  processor. This means, that the preconditioner gets more like a Jacobi
+  preconditioner anyway, so we rather start from that variant straight
+  away. Note that we only use the Jacobi preconditioners for CG solvers with
+  mass matrices, where they give optimal (<i>h</i>-independent) convergence
+  anyway, even though they usually require about twice as many iterations as
+  an IC preconditioner.
+
+As a final note, let us remark that in step-31 we computed the
+Schur complement $S=B A^{-1} B^T$ by approximating
+$-\text{div}(-\eta\Delta)^{-1}\nabla \approx \frac 1{\eta} \mathbf{1}$. Now,
+however, we have re-scaled the $B$ and $B^T$ operators. So $S$ should now
+approximate
+$-\frac{\eta}{L}\text{div}(-\eta\Delta)^{-1}\nabla \frac{\eta}{L} \approx
+\left(\frac{\eta}{L}\right)^2 \frac 1{\eta} \mathbf{1}$.
+We use the discrete form of the right hand side of this as our approximation
+$\tilde S$ to $S$.
+
+
+<h3> Changes to the artificial viscosity stabilization </h3>
+
+Similarly to step-31, we will use an artificial viscosity for stabilization
+based on a residual of the equation.  As a difference to step-31, we will
+provide two slightly different definitions of the stabilization parameter. For
+$\alpha=1$, we use the same definition as in step-31:
+ at f{eqnarray*}
+  \nu_\alpha(T)|_K
+  =
+  \nu_1(T)|_K
+  =
+  \beta
+  \|\mathbf{u}\|_{L^\infty(K)}
+  h_K
+  \min\left\{
+    1,
+    \frac{\|R_1(T)\|_{L^\infty(K)}}{c(\mathbf{u},T)}
+  \right\}
+ at f}
+where we compute the viscosity from a residual $\|R_1(T)\|_{L^\infty(K)}$ of
+the equation, limited by a diffusion proportional to the mesh size $h_K$ in
+regions where the residual is large (around steep gradients). This definition
+has been shown to work well for the given case, $\alpha = 1$ in step-31, but
+it is usually less effective as the diffusion for $\alpha=2$. For that case, we
+choose a slightly more readable definition of the viscosity,
+ at f{eqnarray*}
+  \nu_2(T)|_K = \min (\nu_h^\mathrm{max}|_K,\nu_h^\mathrm{E}|_K)
+ at f}
+where the first term gives again the maximum dissipation (similarly to a first
+order upwind scheme),
+ at f{eqnarray*}
+  \nu^\mathrm{max}_h|_K = \beta h_K \|\mathbf {u}\|_{L^\infty(K)}
+ at f}
+and the entropy viscosity is defined as
+ at f{eqnarray*}
+  \nu^\mathrm{E}_h|_K = c_R \frac{h_K^2 \|R_\mathrm{2,E}(T)\|_{L^\infty(K)}}
+  {\|E(T) - \bar{E}(T)\|_{L^\infty(\Omega)} }.
+ at f}
+
+This formula is described in the article <i>J.-L. Guermond, R. Pasquetti, \&
+B. Popov, 2011.  Entropy viscosity method for nonlinear conservation laws, J.
+Comput. Phys., 230, 4248--4267.</i> Compared to the case $\alpha = 1$, the
+residual is computed from the temperature entropy, $E(T) = \frac12 (T-T_m)^2$
+with $T_m$ an average temperature (we choose the mean between the maximum and
+minimum temperature in the computation), which gives the following formula
+ at f{eqnarray*}
+ R_\mathrm{E}(T) = \frac{\partial E(T)}{\partial t} +
+    (T-T_\mathrm{m}) \left(\mathbf{u} \cdot \nabla T -  \kappa \nabla^2 T - \gamma\right).
+ at f}
+The denominator in the formula for $\nu^\mathrm{E}_h|_K$ is computed as the
+global deviation of the entropy from the space-averaged entropy $\bar{E}(T) =
+\int_\Omega E(T) d\mathbf{x}/\int_\Omega d\mathbf{x}$. As in step-31, we
+evaluate the artificial viscosity from the temperature and velocity at two
+previous time levels, in order to avoid a nonlinearity in its definition.
+
+The above definitions of the viscosity are simple, but depend on two
+parameters, namely $\beta$ and $c_R$.  For the current program, we want to go
+about this issue a bit more systematically for both parameters in the case
+$\alpha =1$, using the same line of reasoning with which we chose two other
+parameters in our discretization, $c_k$ and $\beta$, in the results section of
+step-31. In particular, remember that we would like to make the artificial
+viscosity as small as possible while keeping it as large as necessary. In the
+following, let us describe the general strategy one may follow. The
+computations shown here were done with an earlier version of the program and
+so the actual numerical values you get when running the program may no longer
+match those shown here; that said, the general approach remains valid and has
+been used to find the values of the parameters actually used in the program.
+
+To see what is happening, note that below we will impose
+boundary conditions for the temperature between 973 and 4273 Kelvin,
+and initial conditions are also chosen in this range; for these
+considerations, we run the program without %internal heat sources or sinks,
+and consequently the temperature should
+always be in this range, barring any %internal
+oscillations. If the minimal temperature drops below 973 Kelvin, then
+we need to add stabilization by either increasing $\beta$ or
+decreasing $c_R$.
+
+As we did in step-31, we first determine an optimal value of $\beta$
+by using the "traditional" formula
+ at f{eqnarray*}
+  \nu_\alpha(T)|_K
+  =
+  \beta
+  \|\mathbf{u}\|_{L^\infty(K)}
+    h_K,
+ at f}
+which we know to be stable if only $\beta$ is large enough. Doing a
+couple hundred time steps (on a coarser mesh than the one shown in the
+program, and with a different viscosity that affects transport
+velocities and therefore time step sizes) in 2d will produce the
+following graph:
+
+<img src="http://www.dealii.org/images/steps/developer/step-32.beta.2d.png" alt="">
+
+As can be seen, values $\beta \le 0.05$ are too small whereas
+$\beta=0.052$ appears to work, at least to the time horizon shown
+here. As a remark on the side, there are at least two questions one
+may wonder here: First, what happens at the time when the solution
+becomes unstable? Looking at the graphical output, we can see that
+with the unreasonably coarse mesh chosen for these experiments, around
+time $t=10^{15}$ seconds the plumes of hot material that have been
+rising towards the cold outer boundary and have then spread sideways
+are starting to get close to each other, squeezing out the cold
+material in-between. This creates a layer of cells into which fluids
+flows from two opposite sides and flows out toward a third, apparently
+a scenario that then produce these instabilities without sufficient
+stabilization. Second: In step-31, we used
+$\beta=0.015\cdot\text{dim}$; why does this not work here? The answer
+to this is not entirely clear -- stabilization parameters are
+certainly known to depend on things like the shape of cells, for which
+we had squares in step-31 but have trapezoids in the current
+program. Whatever the exact cause, we at least have a value of
+$\beta$, namely 0.052 for 2d, that works for the current program.
+A similar set of experiments can be made in 3d where we find that
+$\beta=0.078$ is a good choice — neatly leading to the formula
+$\beta=0.026 \cdot \textrm{dim}$.
+
+With this value fixed, we can go back to the original formula for the
+viscosity $\nu$ and play with the constant $c_R$, making it as large
+as possible in order to make $\nu$ as small as possible. This gives us
+a picture like this:
+
+<img src="http://www.dealii.org/images/steps/developer/step-32.beta_cr.2d.png" alt="">
+
+Consequently, $c_R=0.1$ would appear to be the right value here. While this
+graph has been obtained for an exponent $\alpha=1$, in the program we use
+$\alpha=2$ instead, and in that case one has to re-tune the parameter (and
+observe that $c_R$ appears in the numerator and not in the denominator). It
+turns out that $c_R=1$ works with $\alpha=2$.
+
+
+<h3> Locally conservative Stokes discretization </h3>
+
+The standard Taylor-Hood discretization for Stokes, using the $Q_{k+1}^d
+\times Q_k$ element, is globally conservative, i.e. $\int_{\partial\Omega}
+\mathbf n \cdot \mathbf u_h = 0$. This can easily be seen: the weak form of
+the divergence equation reads $(q_h, \textrm{div}\; \mathbf u_h)=0, \forall
+q_h\in Q_h$. Because the pressure space does contain the function $q_h=1$, we
+get
+ at f{align*}
+  0 = (1, \textrm{div}\; \mathbf u_h)_\Omega
+  = \int_\Omega \textrm{div}\; \mathbf u_h
+  = \int_{\partial\Omega} \mathbf n \cdot \mathbf u_h
+ at f}
+by the divergence theorem. This property is important: if we want to use the
+velocity field $u_h$ to transport along other quantities (such as the
+temperature in the current equations, but it could also be concentrations of
+chemical substances or entirely artificial tracer quantities) then the
+conservation property guarantees that the amount of the quantity advected
+remains constant.
+
+That said, there are applications where this <i>global</i> property is not
+enough. Rather, we would like that it holds <i>locally</i>, on every
+cell. This can be achieved by using the space
+$Q_{k+1}^d \times DGP_k$ for discretization, where we have replaced the
+<i>continuous</i> space of tensor product polynomials of degree $k$ for the
+pressure by the <i>discontinuous</i> space of the complete polynomials of the
+same degree. (Note that tensor product polynomials in 2d contain the functions
+$1, x, y, xy$, whereas the complete polynomials only have the functions $1,x,y$.)
+This space turns out to be stable for the Stokes equation.
+
+Because the space is discontinuous, we can now in particular choose the test
+function $q_h(\mathbf x)=\chi_K(\mathbf x)$, i.e. the characteristic function
+of cell $K$. We then get in a similar fashion as above
+ at f{align*}
+  0
+  = (q_h, \textrm{div}\; \mathbf u_h)_\Omega
+  = (1, \textrm{div}\; \mathbf u_h)_K
+  = \int_K \textrm{div}\; \mathbf u_h
+  = \int_{\partial K} \mathbf n \cdot \mathbf u_h,
+ at f}
+showing the conservation property for cell $K$. This clearly holds for each
+cell individually.
+
+There are good reasons to use this discretization. As mentioned above, this
+element guarantees conservation of advected quantities on each cell
+individually. A second advantage is that the pressure mass matrix we use as a
+preconditioner in place of the Schur complement becomes block diagonal and
+consequently very easy to invert. However, there are also downsides. For one,
+there are now more pressure variables, increasing the overall size of the
+problem, although this doesn't seem to cause much harm in practice. More
+importantly, though, the fact that now the divergence integrated over each
+cell is zero when it wasn't before does not guarantee that the divergence is
+pointwise smaller. In fact, as one can easily verify, the $L_2$ norm of the
+divergence is <i>larger</i> for this than for the standard Taylor-Hood
+discretization. (However, both converge at the same rate to zero, since it is
+easy to see that
+$\|\textrm{div}\; u_h\|=
+\|\textrm{div}\; (u-u_h)\|=
+\|\textrm{trace}\; \nabla (u-u_h)\|\le
+\|\nabla (u-u_h)\|={\cal O}(h^{k+2})$.) It is therefore not a priori clear
+that the error is indeed smaller just because we now have more degrees of
+freedom.
+
+Given these considerations, it remains unclear which discretization one should
+prefer. Consequently, we leave that up to the user and make it a parameter in
+the input file which one to use.
+
+
+<h3> Higher order mappings for curved boundaries </h3>
+
+In the program, we will use a spherical shell as domain. This means
+that the inner and outer boundary of the domain are no longer
+"straight" (by which we usually mean that they are bilinear surfaces
+that can be represented by the StraightBoundary class). Rather, they
+are curved and it seems prudent to use a curved approximation in the
+program if we are already using higher order finite elements for the
+velocity. Consequently, we will introduce a member variable of type
+MappingQ that
+denotes such a mapping (step-10 and step-11 introduce such mappings
+for the first time) and that we will use in all computations on cells
+that are adjacent to the boundary. Since this only affects a
+relatively small fraction of cells, the additional effort is not very
+large and we will take the luxury of using a quartic mapping for these
+cells.
+
+
+<h3> Parallelization on clusters </h3>
+
+Running convection codes in 3d with significant Rayleigh numbers requires a lot
+of computations — in the case of whole earth simulations on the order of
+one or several hundred million unknowns. This can obviously not be done with a
+single machine any more (at least not in 2010 when we started writing this
+code). Consequently, we need to parallelize it.
+Parallelization of scientific codes across multiple machines in a cluster of
+computers is almost always done using the Message Passing Interface
+(MPI). This program is no exception to that, and it follows the general spirit
+of step-17 and step-18 programs in this though in practice it borrows more
+from step-40 in which we first introduced the classes and strategies we use
+when we want to <i>completely</i> distribute all computations: including, for
+example, splitting the mesh up into a number of parts so that each processor
+only stores its own share plus some ghost cells, and using strategies where no
+processor potentially has enough memory to hold the entries of the combined
+solution vector locally. The goal is to run this code on hundreds or maybe
+even thousands of processors, at reasonable scalability.
+
+ at note Even though it has a larger number, step-40 comes logically before the
+current program. You will probably want to look at step-40 before you try to
+understand the what we do here.
+
+MPI is a rather awkward interface to program with. It is a semi-object
+oriented set of functions, and while one uses it to send data around a
+network, one needs to explicitly describe the data types because the MPI
+functions insist on getting the address of the data as <code>void*</code>
+objects rather than deducing the data type automatically through overloading
+or templates. We've already seen in step-17 and step-18 how to avoid almost
+all of MPI by putting all the communication necessary into either the deal.II
+library or, in those programs, into PETSc. We'll do something similar here:
+like in step-40, deal.II and the underlying p4est library are responsible for
+all the communication necessary for distributing the mesh, and we will let the
+Trilinos library (along with the wrappers in namespace TrilinosWrappers) deal
+with parallelizing the linear algebra components. We have already used
+Trilinos in step-31, and will do so again here, with the difference that we
+will use its %parallel capabilities.
+
+Trilinos consists of a significant number of packages, implementing basic
+%parallel linear algebra operations (the Epetra package), different solver and
+preconditioner packages, and on to things that are of less importance to
+deal.II (e.g., optimization, uncertainty quantification, etc).
+deal.II's Trilinos interfaces encapsulate many of the things Trilinos offers
+that are of relevance to PDE solvers, and
+provides wrapper classes (in namespace TrilinosWrappers) that make the
+Trilinos matrix, vector, solver and preconditioner classes look very much the
+same as deal.II's own implementations of this functionality. However, as
+opposed to deal.II's classes, they can be used in %parallel if we give them the
+necessary information. As a consequence, there are two Trilinos classes that
+we have to deal with directly (rather than through wrappers), both of which
+are part of Trilinos' Epetra library of basic linear algebra and tool classes:
+<ul>
+<li> The Epetra_Comm class is an abstraction of an MPI "communicator", i.e.
+  it describes how many and which machines can communicate with each other.
+  Each distributed object, such as a sparse matrix or a vector for which we
+  may want to store parts on different machines, needs to have a communicator
+  object to know how many parts there are, where they can be found, and how
+  they can be accessed.
+
+  In this program, we only really use one communicator object -- based on the
+  MPI variable <code>MPI_COMM_WORLD</code> -- that encompasses <i>all</i>
+  processes that work together. It would be perfectly legitimate to start a
+  process on $N$ machines but only store vectors on a subset of these by
+  producing a communicator object that only encompasses this subset of
+  machines; there is really no compelling reason to do so here, however.
+
+<li> The IndexSet class is used to describe which elements of a vector or which
+  rows of a matrix should reside on the current machine that is part of a
+  communicator. To create such an object, you need to know (i) the total
+  number of elements or rows, (ii) the indices of the elements you want to
+  store locally. We will set up
+  these <code>partitioners</code> in the
+  <code>BoussinesqFlowProblem::setup_dofs</code> function below and then hand
+  it to every %parallel object we create.
+
+  Unlike PETSc, Trilinos makes no assumption that the elements of a vector
+  need to be partitioned into contiguous chunks. At least in principle, we
+  could store all elements with even indices on one processor and all odd ones
+  on another. That's not very efficient, of course, but it's
+  possible. Furthermore, the elements of these partitionings do not
+  necessarily be mutually exclusive. This is important because when
+  postprocessing solutions, we need access to all locally relevant or at least
+  the locally active degrees of freedom (see the module on @ref distributed
+  for a definition, as well as the discussion in step-40). Which elements the
+  Trilinos vector considers as locally owned is not important to us then. All
+  we care about is that it stores those elements locally that we need.
+</ul>
+
+There are a number of other concepts relevant to distributing the mesh
+to a number of processors; you may want to take a look at the @ref
+distributed module and step-40 before trying to understand this
+program.  The rest of the program is almost completely agnostic about
+the fact that we don't store all objects completely locally. There
+will be a few points where we have to limit loops over all cells to
+those that are locally owned, or where we need to distinguish between
+vectors that store only locally owned elements and those that store
+everything that is locally relevant (see @ref GlossLocallyRelevantDof
+"this glossary entry"), but by and large the amount of heavy lifting
+necessary to make the program run in %parallel is well hidden in the
+libraries upon which this program builds. In any case, we will comment
+on these locations as we get to them in the program code.
+
+
+<h3> Parallelization within individual nodes of a cluster </h3>
+
+The second strategy to parallelize a program is to make use of the fact that
+most computers today have more than one processor that all have access to the
+same memory. In other words, in this model, we don't explicitly have to say
+which pieces of data reside where -- all of the data we need is directly
+accessible and all we have to do is split <i>processing</i> this data between
+the available processors. We will then couple this with the MPI
+parallelization outlined above, i.e. we will have all the processors on a
+machine work together to, for example, assemble the local contributions to the
+global matrix for the cells that this machine actually "owns" but not for
+those cells that are owned by other machines. We will use this strategy for
+four kinds of operations we frequently do in this program: assembly of the
+Stokes and temperature matrices, assembly of the matrix that forms the Stokes
+preconditioner, and assembly of the right hand side of the temperature system.
+
+All of these operations essentially look as follows: we need to loop over all
+cells for which <code>cell-@>subdomain_id()</code> equals the index our
+machine has within the communicator object used for all communication
+(i.e. <code>MPI_COMM_WORLD</code>, as explained above). The test we are
+actually going to use for this, and which describes in a concise way why we
+test this condition, is <code>cell-@>is_locally_owned()</code>. On each
+such cell we need to assemble the local contributions to the global matrix or
+vector, and then we have to copy each cell's contribution into the global
+matrix or vector. Note that the first part of this (the loop) defines a range
+of iterators on which something has to happen. The second part, assembly of
+local contributions is something that takes the majority of CPU time in this
+sequence of steps, and is a typical example of things that can be done in
+%parallel: each cell's contribution is entirely independent of all other cells'
+contributions. The third part, copying into the global matrix, must not happen
+in %parallel since we are modifying one object and so several threads can not
+at the same time read an existing matrix element, add their contribution, and
+write the sum back into memory without danger of producing a <a
+href="http://en.wikipedia.org/wiki/Race_condition">race condition</a>.
+
+deal.II has a class that is made for exactly this workflow: WorkStream, first
+discussed in step-9 and step-13. Its
+use is also extensively documented in the module on @ref threads (in the section
+on @ref MTWorkStream "the WorkStream class") and we won't repeat here the
+rationale and detailed instructions laid out there, though you will want to
+read through this module to understand the distinction between scratch space
+and per-cell data. Suffice it to mention that we need the following:
+
+- An iterator range for those cells on which we are supposed to work. This is
+  provided by the FilteredIterator class which acts just like every other cell
+  iterator in deal.II with the exception that it skips all cells that do not
+  satisfy a particular predicate (i.e. a criterion that evaluates to true or
+  false). In our case, the predicate is whether a cell has the correct
+  subdomain id.
+
+- A function that does the work on each cell for each of the tasks identified
+  above, i.e. functions that assemble the local contributions to Stokes matrix
+  and preconditioner, temperature matrix, and temperature right hand
+  side. These are the
+  <code>BoussinesqFlowProblem::local_assemble_stokes_system</code>,
+  <code>BoussinesqFlowProblem::local_assemble_stokes_preconditioner</code>,
+  <code>BoussinesqFlowProblem::local_assemble_temperature_matrix</code>, and
+  <code>BoussinesqFlowProblem::local_assemble_temperature_rhs</code> functions in
+  the code below. These four functions can all have several instances
+  running in %parallel at the same time.
+
+- %Functions that copy the result of the previous ones into the global object
+  and that run sequentially to avoid race conditions. These are the
+  <code>BoussinesqFlowProblem::copy_local_to_global_stokes_system</code>,
+  <code>BoussinesqFlowProblem::copy_local_to_global_stokes_preconditioner</code>,
+  <code>BoussinesqFlowProblem::copy_local_to_global_temperature_matrix</code>, and
+  <code>BoussinesqFlowProblem::copy_local_to_global_temperature_rhs</code>
+  functions.
+
+We will comment on a few more points in the actual code, but in general
+their structure should be clear from the discussion in @ref threads.
+
+The underlying technology for WorkStream identifies "tasks" that need to be
+worked on (e.g. assembling local contributions on a cell) and schedules
+these tasks automatically to available processors. WorkStream creates these
+tasks automatically, by splitting the iterator range into suitable chunks.
+
+ at note Using multiple threads within each MPI process only makes sense if you
+have fewer MPI processes running on each node of your cluster than there are
+processor cores on this machine. Otherwise, MPI will already keep your
+processors busy and you won't get any additional speedup from using
+threads. For example, if your cluster nodes have 8 cores as they often have at
+the time of writing this, and if your batch scheduler puts 8 MPI processes on
+each node, then using threads doesn't make the program any
+faster. Consequently, you probably want to either configure your deal.II without threads, or set the number of threads in MPI_InitFinalize to 1 (third argument), or "export DEAL_II_NUM_THREADS=1" before running.
+That said, at
+the time of writing this, we only use the WorkStream class for assembling
+(parts of) linear systems, while 75% or more of the run time of the program is
+spent in the linear solvers that are not parallelized — in other words,
+the best we could hope is to parallelize the remaining 25%.
+
+
+<h3> The testcase </h3>
+
+The setup for this program is mildly reminiscent of the problem we wanted to
+solve in the first place (see the introduction of step-31):
+convection in the earth mantle. As a consequence, we choose the following
+data, all of which appears in the program in units of meters and seconds (the
+SI system) even if we list them here in other units. We do note,
+however, that these choices are essentially still only exemplary, and
+not meant to result in a completely realistic description of
+convection in the earth mantle: for that, more and more difficult
+physics would have to be implemented, and several other aspects are
+currently missing from this program as well. We will come back to this
+issue in the results section again, but state for now that providing a
+realistic description is a goal of the <i>Aspect</i> code in
+development at the time of writing this.
+
+As a reminder, let us again state the equations we want to solve are these:
+ at f{eqnarray*}
+  -\nabla \cdot (2 \eta \varepsilon ({\mathbf u})) +
+  \nabla \left( \frac{\eta}{L} \hat p\right) &=&
+  \rho(T) \mathbf{g},
+  \\
+  \frac{\eta}{L} \nabla \cdot {\mathbf u} &=& 0,
+  \\
+  \frac{\partial T}{\partial t}
+  +
+  {\mathbf u} \cdot \nabla T
+  -
+  \nabla \cdot \kappa \nabla T &=& \gamma,
+ at f}
+augmented by boundary and initial conditions. We then have to choose data for
+the following quantities:
+<ul>
+  <li>The domain is an annulus (in 2d) or a spherical shell (in 3d) with inner
+  and outer radii that match that of the earth: the total radius of the earth
+  is 6371km, with the mantle starting at a depth of around 35km (just under
+  the solid earth <a target="_top"
+  href="http://en.wikipedia.org/wiki/Crust_(geology)">crust</a> composed of
+  <a target="_top"
+  href="http://en.wikipedia.org/wiki/Continental_crust">continental</a> and <a
+  target="_top" href="http://en.wikipedia.org/wiki/Oceanic_crust">oceanic
+  plates</a>) to a depth of 2890km (where the
+  <a target="_top" href="http://en.wikipedia.org/wiki/Outer_core">outer earth
+  core</a> starts). The radii are therefore $R_0=(6371-2890)\text{km},
+  R_1=(6371-35)\text{km}$. This domain is conveniently generated using the
+  GridGenerator::hyper_shell() function, and we use a HyperShellBoundary
+  objects for the inner and outer boundary.
+
+  <li>At the interface between crust and mantle, the temperature is between
+  500 and 900 degrees Celsius, whereas at its bottom it is around 4000 degrees
+  Celsius (see, for example, <a target="_top"
+  href="http://en.wikipedia.org/wiki/Mantle_(geology)">this Wikipedia
+  entry</a>). In Kelvin, we therefore choose $T_0=(4000+273)\text{K}$,
+  $T_1=(500+273)\text{K}$ as boundary conditions at the inner and outer edge.
+
+  In addition to this, we also have to specify some initial conditions for
+  the temperature field. The real temperature field of the earth is quite
+  complicated as a consequence of the convection that has been going on for
+  more than four billion years -- in fact, it is the properties of this
+  temperature distribution that we want to explore with programs like
+  this. As a consequence, we
+  don't really have anything useful to offer here, but we can hope that if we
+  start with something and let things run for a while that the exact initial
+  conditions don't matter that much any more — as is in fact suggested
+  by looking at the pictures shown in the <a href="#Results">results section
+  below</a>. The initial temperature field we use here is given in terms of
+  the radius by
+  @f{align*}
+    s &= \frac{\|\mathbf x\|-R_0}{R_1-R_0}, \\
+    \varphi &= \arctan \frac{y}{x}, \\
+    \tau &= s + \frac 15 s(1-s) \sin(6\varphi) q(z), \\
+    T(\mathbf x) &= T_0(1-\tau) + T_1\tau,
+  @f}
+  where
+  @f{align*}
+    q(z) = \left\{
+    \begin{array}{ll}
+      1 & \text{in 2d} \\
+      \max\{0, \cos(\pi |z/R_1|)\} & \text{in 3d}
+    \end{array}
+    \right. .
+  @f}
+  This complicated function is essentially a perturbation of a linear profile
+  between the inner and outer temperatures. In 2d, the function
+  $\tau=\tau(\mathbf x)$ looks like this (I got the picture from
+  <a
+  href="http://www.wolframalpha.com/input/?i=plot+%28sqrt%28x^2%2By^2%29%2B0.2*%28sqrt%28x^2%2By^2%29*%281-sqrt%28x^2%2By^2%29%29*sin%286*atan2%28x%2Cy%29%29%29%2C+x%3D-1+to+1%2C+y%3D-1+to+1">this
+  page</a>):
+
+  <img src="http://www.dealii.org/images/steps/developer/step-32.2d-initial.png" alt="">
+
+  The point of this profile is that if we had used $s$ instead of $\tau$ in
+  the definition of $T(\mathbf x)$ then it would simply be a linear
+  interpolation. $\tau$ has the same function values as $s$ on the inner and
+  outer boundaries (zero and one, respectively), but it stretches the
+  temperature profile a bit depending on the angle and the $z$ value in 3d,
+  producing an angle-dependent perturbation of the linearly interpolating
+  field. We will see in the results section that this is an
+  entirely unphysical temperature field (though it will make for
+  interesting images) as the equilibrium state for the temperature
+  will be an almost constant temperature with boundary layers at the
+  inner and outer boundary.
+
+  <li>The right hand side of the temperature equation contains the rate of
+  %internal heating $\gamma$. The earth does heat naturally through several mechanisms:
+  radioactive decay, chemical separation (heavier elements sink to the bottom,
+  lighter ones rise to the top; the countercurrents dissipate energy equal to
+  the loss of potential energy by this separation process); heat release
+  by crystallization of liquid metal as the solid inner core of the earth
+  grows; and heat dissipation from viscous friction as the fluid moves.
+
+  Chemical separation is difficult to model since it requires modeling mantle
+  material as multiple phases; it is also a relatively small
+  effect. Crystallization heat is even more difficult since it is confined to
+  areas where temperature and pressure allow for phase changes, i.e. a
+  discontinuous process. Given the difficulties in modeling these two
+  phenomena, we will neglect them.
+
+  The other two are readily handled and, given the way we scaled the
+  temperature equation, lead to the equation
+  @f[
+    \gamma(\mathbf x)
+     =
+     \frac{\rho q+2\eta \varepsilon(\mathbf u):\varepsilon(\mathbf u)}
+     {\rho c_p},
+  @f]
+  where $q$ is the radiogenic heating in $\frac{W}{kg}$, and the second
+  term in the enumerator is viscous friction heating. $\rho$ is the density
+  and $c_p$ is the specific heat. The literature provides the following
+  approximate values: $c_p=1250 \frac{J}{kg\; K}, q=7.4\cdot 10^{-12}\frac{W}{kg}$.
+  The other parameters are discussed elsewhere in this section.
+
+  We neglect one internal heat source, namely adiabatic heating here,
+  which will lead to a surprising temperature field. This point is
+  commented on in detail in the results section below.
+
+  <li>For the velocity we choose as boundary conditions $\mathbf{v}=0$ at the
+  inner radius (i.e. the fluid sticks to the earth core) and
+  $\mathbf{n}\cdot\mathbf{v}=0$ at the outer radius (i.e. the fluid flows
+  tangentially along the bottom of the earth crust). Neither of these is
+  physically overly correct: certainly, on both boundaries, fluids can flow
+  tangentially, but they will incur a shear stress through friction against
+  the medium at the other side of the interface (the metallic core and the
+  crust, respectively). Such a situation could be modeled by a Robin-type
+  boundary condition for the tangential velocity; in either case, the normal (vertical)
+  velocity would be zero, although even that is not entirely correct since
+  continental plates also have vertical motion (see, for example, the
+  phenomenon of <a
+  href="http://en.wikipedia.org/wiki/Postglacial_rebound">post-glacial
+  rebound</a>). But to already make things worse for the tangential velocity,
+  the medium on the other side is in motion as well, so the shear stress
+  would, in the simplest case, be proportional to the <i>velocity
+  difference</i>, leading to a boundary condition of the form
+  @f{align*}
+    \mathbf{n}\cdot [2\eta \varepsilon(\mathbf v)]
+    &=
+    s \mathbf{n} \times [\mathbf v - \mathbf v_0],
+    \\
+    \mathbf{n} \cdot \mathbf v &= 0,
+  @f}
+  with a proportionality constant $s$. Rather than going down this route,
+  however, we go with the choice of zero (stick) and tangential
+  flow boundary conditions.
+
+  As a side note of interest, we may also have chosen tangential flow
+  conditions on both inner and outer boundary. That has a significant
+  drawback, however: it leaves the velocity not uniquely defined. The reason
+  is that all velocity fields $\hat{\mathbf v}$ that correspond to a solid
+  body rotation around the center of the domain satisfy $\mathrm{div}\;
+  \varepsilon(\hat{\mathbf v})=0, \mathrm{div} \;\hat{\mathbf v} = 0$, and
+  $\mathbf{n} \cdot \hat{\mathbf v} = 0$. As a consequence, if $\mathbf v$
+  satisfies equations and boundary conditions, then so does $\mathbf v +
+  \hat{\mathbf v}$. That's certainly not a good situation that we would like
+  to avoid. The traditional way to work around this is to pick an arbitrary
+  point on the boundary and call this your fixed point by choosing the
+  velocity to be zero in all components there. (In 3d one has to choose two
+  points.) Since this program isn't meant to be too realistic to begin with,
+  we avoid this complication by simply fixing the velocity along the entire
+  interior boundary.
+
+  <li>To first order, the gravity vector always points downward. The question for
+  a body as big as the earth is just: where is "up". The naive answer of course is
+  "radially inward, towards the center of the earth". So at the surface of the
+  earth, we have
+  @f[
+    \mathbf g
+    =
+    -9.81 \frac{\text{m}}{\text{s}^2} \frac{\mathbf x}{\|\mathbf x\|},
+  @f]
+  where $9.81 \frac{\text{m}}{\text{s}^2}$ happens to be the average gravity
+  acceleration at the earth surface. But in the earth interior, the question
+  becomes a bit more complicated: at the (bary-)center of the earth, for
+  example, you have matter pulling equally hard in all directions, and so
+  $\mathbf g=0$. In between, the net force is described as follows: let us
+  define the <a target="_top"
+  href="http://en.wikipedia.org/wiki/Potential_energy#Gravitational_potential_energy">gravity
+  potential</a> by
+  @f[
+    \varphi(\mathbf x)
+    =
+    \int_{\text{earth}}
+    -G \frac{\rho(\mathbf y)}{\|\mathbf x-\mathbf y\|}
+    \ \text{d}y,
+  @f]
+  then $\mathbf g(\mathbf x) = -\nabla \varphi(\mathbf x)$. If we assume that
+  the density $\rho$ is constant throughout the earth, we can produce an
+  analytical expression for the gravity vector (don't try to integrate above
+  equation somehow -- it leads to elliptic integrals; a simpler way is to
+  notice that $-\Delta\varphi(\mathbf x) = -4\pi G \rho
+  \chi_{\text{earth}}(\mathbf x)$ and solving this
+  partial differential equation in all of ${\mathbb R}^3$ exploiting the
+  radial symmetry):
+  @f[
+    \mathbf g(\mathbf x) =
+    \left\{
+      \begin{array}{ll}
+        -\frac{4}{3}\pi G \rho \|\mathbf x\| \frac{\mathbf x}{\|\mathbf x\|}
+	& \text{for} \ \|\mathbf x\|<R_1, \\
+        -\frac{4}{3}\pi G \rho R^3 \frac{1}{\|\mathbf x\|^2}
+        \frac{\mathbf x}{\|\mathbf x\|}
+	& \text{for} \ \|\mathbf x\|\ge R_1.
+      \end{array}
+    \right.
+  @f]
+  The factor $-\frac{\mathbf x}{\|\mathbf x\|}$ is the unit vector pointing
+  radially inward. Of course, within this problem, we are only interested in
+  the branch that pertains to within the earth, i.e. $\|\mathbf
+  x\|<R_1$. We would therefore only consider the expression
+  @f[
+    \mathbf g(\mathbf x) =
+        -\frac{4}{3}\pi G \rho \|\mathbf x\| \frac{\mathbf x}{\|\mathbf x\|}
+        =
+        -\frac{4}{3}\pi G \rho \mathbf x
+	=
+	- 9.81 \frac{\mathbf x}{R_1} \frac{\text{m}}{\text{s}^2},
+  @f]
+  where we can infer the last expression because we know Earth's gravity at
+  the surface (where $\|x\|=R_1$).
+
+  One can derive a more general expression by integrating the
+  differential equation for $\varphi(r)$ in the case that the density
+  distribution is radially symmetric, i.e. $\rho(\mathbf
+  x)=\rho(\|\mathbf x\|)=\rho(r)$. In that case, one would get
+  @f[
+    \varphi(r)
+    = 4\pi G \int_0^r \frac 1{s^2} \int_0^s t^2 \rho(t) \; dt \; ds.
+  @f]
+
+
+  There are two problems with this, however: (i) The Earth is not homogeneous,
+  i.e. the density $\rho$ depends on $\mathbf x$; in fact it is not even a
+  function that only depends on the radius $r=\|\mathbf x\|$. In reality, gravity therefore
+  does not always decrease as we get deeper: because the earth core is so much
+  denser than the mantle, gravity actually peaks at around $10.7
+  \frac{\text{m}}{\text{s}^2}$ at the core mantle boundary (see <a
+  target="_top" href="http://en.wikipedia.org/wiki/Earth's_gravity">this
+  article</a>). (ii) The density, and by
+  consequence the gravity vector, is not even constant in time: after all, the
+  problem we want to solve is the time dependent upwelling of hot, less dense
+  material and the downwelling of cold dense material. This leads to a gravity
+  vector that varies with space and time, and does not always point straight
+  down.
+
+  In order to not make the situation more complicated than necessary, we could
+  use the approximation that at the inner boundary of the mantle,
+  gravity is $10.7 \frac{\text{m}}{\text{s}^2}$ and at the outer
+  boundary it is $9.81 \frac{\text{m}}{\text{s}^2}$, in each case
+  pointing radially inward, and that in between gravity varies
+  linearly with the radial distance from the earth center. That said, it isn't
+  that hard to actually be slightly more realistic and assume (as we do below)
+  that the earth mantle has constant density. In that case, the equation above
+  can be integrated and we get an expression for $\|\mathbf{g}\|$ where we
+  can fit constants to match the gravity at the top and bottom of the earth
+  mantle to obtain
+  @f[
+    \|\mathbf{g}\|
+    = 1.245\cdot 10^{-6} \frac{1}{\textrm{s}^2} r + 7.714\cdot 10^{13} \frac{\textrm{m}^3}{\textrm{s}^2}\frac{1}{r^2}.
+  @f]
+
+  <li>The density of the earth mantle varies spatially, but not by very
+  much. $\rho_{\text{ref}}=3300 \frac{\text{kg}}{\text{m}^3}$ is a relatively good average
+  value for the density at reference temperature $T_{\text{ref}}=293$ Kelvin.
+
+  <li>The thermal expansion coefficient $\beta$ also varies with depth
+  (through its dependence on temperature and pressure). Close to the surface,
+  it appears to be on the order of $\beta=45\cdot 10^{-6} \frac 1{\text{K}}$,
+  whereas at the core mantle boundary, it may be closer to $\beta=10\cdot
+  10^{-6} \frac 1{\text{K}}$. As a reasonable value, let us choose
+  $\beta=2\cdot 10^{-5} \frac 1{\text{K}}$. The density as a function
+  of temperature is then
+  $\rho(T)=[1-\beta(T-T_{\text{ref}})]\rho_{\text{ref}}$.
+
+  <li>The second to last parameter we need to specify is the viscosity
+  $\eta$. This is a tough one, because rocks at the temperatures and pressure
+  typical for the earth mantle flow so slowly that the viscosity can not be
+  determined accurately in the laboratory. So how do we know about the
+  viscosity of the mantle? The most commonly used route is to consider that
+  during and after ice ages, ice shields form and disappear on time scales
+  that are shorter than the time scale of flow in the mantle. As a
+  consequence, continents slowly sink into the earth mantle under the added
+  weight of an ice shield, and they rise up again slowly after the ice shield
+  has disappeared again (this is called <a target="_top"
+  href="http://en.wikipedia.org/wiki/Postglacial_rebound"><i>postglacial
+  rebound</i></a>). By measuring the speed of this rebound, we can infer the
+  viscosity of the material that flows into the area vacated under the
+  rebounding continental plates.
+
+  Using this technique, values around $\eta=10^{21} \text{Pa\; s}
+  = 10^{21} \frac{\text{N\; s}}{\text{m}^2}
+  = 10^{21} \frac{\text{kg}}{\text{m\; s}}$ have been found as the most
+  likely, though the error bar on this is at least one order of magnitude.
+
+  While we will use this value, we again have to caution that there are many
+  physical reasons to assume that this is not the correct value. First, it
+  should really be made dependent on temperature: hotter material is most
+  likely to be less viscous than colder material. In reality, however, the
+  situation is even more complex. Most rocks in the mantle undergo phase
+  changes as temperature and pressure change: depending on temperature and
+  pressure, different crystal configurations are thermodynamically favored
+  over others, even if the chemical composition of the mantle were
+  homogeneous. For example, the common mantle material MgSiO<sub>3</sub> exists
+  in its <a target="_top"
+  href="http://en.wikipedia.org/wiki/Perovskite_(structure)">perovskite
+  structure</a> throughout most of the mantle, but in the lower mantle the
+  same substance is stable only as <a targe="_top"
+  href="http://en.wikipedia.org/wiki/Postperovskite">post-perovskite</a>. Clearly,
+  to compute realistic viscosities, we would not only need to know the exact
+  chemical composition of the mantle and the viscosities of all materials, but
+  we would also have to compute the thermodynamically most stable
+  configurations for all materials at each quadrature point. This is at the
+  time of writing this program not a feasible suggestion.
+
+  <li>Our last material parameter is the thermal diffusivity $\kappa$, which
+  is defined as $\kappa=\frac{k}{\rho c_p}$ where $k$ is the thermal
+  conductivity, $\rho$ the density, and $c_p$ the specific heat. For
+  this, the literature indicates that it increases from around $0.7$ in the
+  upper mantle to around $1.7 \frac{\text{mm}^2}{\text{s}}$ in the lower
+  mantle, though the exact value
+  is not really all that important: heat transport through convection is
+  several orders of magnitude more important than through thermal
+  conduction. It may be of interest to know that perovskite, the most abundant
+  material in the earth mantle, appears to become transparent at pressures
+  above around 120 GPa (see, for example, J. Badro et al., Science 305,
+  383-386 (2004)); in the lower mantle, it may therefore be that heat
+  transport through radiative transfer is more efficient than through thermal
+  conduction.
+
+  In view of these considerations, let us choose
+  $\kappa=1 \frac{\text{mm}^2}{\text{s}} =10^{-6} \frac{\text{m}^2}{\text{s}}$
+  for the purpose of this program.
+</ul>
+
+All of these pieces of equation data are defined in the program in the
+<code>EquationData</code> namespace. When run, the program produces
+long-term maximal velocities around 10-40 centimeters per year (see
+the results section below), approximately the physically correct order
+of magnitude. We will set the end time to 1 billion years.
+
+ at note The choice of the constants and material parameters above follows in
+large part the comprehensive book "Mantle Convection in the Earth and Planets,
+Part 1" by G. Schubert and D. L. Turcotte and P. Olson (Cambridge, 2001). It
+contains extensive discussion of ways to make the program more realistic.
+
+
+<h3> Implementation details </h3>
+
+Compared to step-31, this program has a number of noteworthy differences:
+
+- The <code>EquationData</code> namespace is significantly larger, reflecting
+  the fact that we now have much more physics to deal with. That said, most of
+  this additional physical detail is rather self-contained in functions in
+  this one namespace, and does not proliferate throughout the rest of the
+  program.
+
+- Of more obvious visibility is the fact that we have put a good number of
+  parameters into an input file handled by the ParameterHandler class (see,
+  for example, step-29, for ways to set up run-time parameter files with this
+  class). This often makes sense when one wants to avoid re-compiling the
+  program just because one wants to play with a single parameter (think, for
+  example, of parameter studies determining the best values of the
+  stabilization constants discussed above), in particular given that it takes
+  a nontrivial amount of time to re-compile programs of the current size. To
+  just give an overview of the kinds of parameters we have moved from fixed
+  values into the input file, here is a listing of a typical
+  <code>\step-32.prm</code> file:
+  @code
+# Listing of Parameters
+# ---------------------
+# The end time of the simulation in years.
+set End time                            = 1e8
+
+# Whether graphical output is to be generated or not. You may not want to get
+# graphical output if the number of processors is large.
+set Generate graphical output           = false
+
+# The number of adaptive refinement steps performed after initial global
+# refinement.
+set Initial adaptive refinement         = 1
+
+# The number of global refinement steps performed on the initial coarse mesh,
+# before the problem is first solved there.
+set Initial global refinement           = 1
+
+# The number of time steps between each generation of graphical output files.
+set Time steps between graphical output = 50
+
+# The number of time steps after which the mesh is to be adapted based on
+# computed error indicators.
+set Time steps between mesh refinement  = 10
+
+
+subsection Discretization
+  # The polynomial degree to use for the velocity variables in the Stokes
+  # system.
+  set Stokes velocity polynomial degree       = 2
+
+  # The polynomial degree to use for the temperature variable.
+  set Temperature polynomial degree           = 2
+
+  # Whether to use a Stokes discretization that is locally conservative at the
+  # expense of a larger number of degrees of freedom, or to go with a cheaper
+  # discretization that does not locally conserve mass (although it is
+  # globally conservative.
+  set Use locally conservative discretization = true
+end
+
+
+subsection Stabilization parameters
+  # The exponent in the entropy viscosity stabilization.
+  set alpha = 2
+
+  # The beta factor in the artificial viscosity stabilization. An appropriate
+  # value for 2d is 0.052 and 0.078 for 3d.
+  set beta  = 0.078
+
+  # The c_R factor in the entropy viscosity stabilization.
+  set c_R   = 0.5
+end
+  @endcode
+
+- There are, obviously, a good number of changes that have to do with the fact
+  that we want to run our program on a possibly very large number of
+  machines. Although one may suspect that this requires us to completely
+  re-structure our code, that isn't in fact the case (although the classes
+  that implement much of this functionality in deal.II certainly look very
+  different from an implementation viewpoint, but this doesn't reflect in
+  their public interface). Rather, the changes are mostly subtle, and the
+  overall structure of the main class is pretty much unchanged. That said, the
+  devil is in the detail: getting %parallel computing right, without
+  deadlocks, ensuring that the right data is available at the right place
+  (see, for example, the discussion on fully distributed vectors vs. vectors
+  with ghost elements), and avoiding bottlenecks is difficult and discussions
+  on this topic will appear in a good number of places in this program.
+
+
+<h3> Outlook </h3>
+
+This is a tutorial program. That means that at least most of its focus needs
+to lie on demonstrating ways of using deal.II and associated libraries, and
+not diluting this teaching lesson by focusing overly much on physical
+details. Despite the lengthy section above on the choice of physical
+parameters, the part of the program devoted to this is actually quite short
+and self contained.
+
+That said, both step-31 and the current step-32 have not come about by chance
+but are certainly meant as wayposts along the path to a more comprehensive
+program that will simulate convection in the earth mantle. We call this code
+<i>Aspect</i> (short for <i>Advanced %Solver for Problems in Earth's
+ConvecTion</i>); its development is funded by the <a
+href="http://www.geodynamics.org">Computational Infrastructure in
+Geodynamics</a> initiative with support from the National Science
+Foundation. We hope to release this code not long after this tutorial program
+will officially be released as part of deal.II 7.1.
diff --git a/examples/step-32/doc/kind b/examples/step-32/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-32/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-32/doc/results.dox b/examples/step-32/doc/results.dox
new file mode 100644
index 0000000..722fbfb
--- /dev/null
+++ b/examples/step-32/doc/results.dox
@@ -0,0 +1,492 @@
+<h1>Results</h1>
+
+When run, the program simulates convection in 3d in much the same way
+as step-31 did, though with an entirely different testcase.
+
+
+<h3>Comparison of results with \step-31</h3>
+
+Before we go to this testcase, however, let us show a few results from a
+slightly earlier version of this program that was solving exactly the
+testcase we used in step-31, just that we now solve it in parallel and with
+much higher resolution. We show these results mainly for comparison.
+
+Here are two images that show this higher resolution if we choose a 3d
+computation in <code>main()</code> and if we set
+<code>initial_refinement=3</code> and
+<code>n_pre_refinement_steps=4</code>. At the time steps shown, the
+meshes had around 72,000 and 236,000 cells, for a total of 2,680,000
+and 8,250,000 degrees of freedom, respectively, more than an order of
+magnitude more than we had available in step-31:
+
+<table align="center" border="1" cellspacing="3" cellpadding="3">
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-32.3d.cube.0.png" alt="">
+    </td>
+  </tr>
+  <tr>
+    <td>
+        <img src="http://www.dealii.org/images/steps/developer/step-32.3d.cube.1.png" alt="">
+    </td>
+  </tr>
+</table>
+
+The computation was done on a subset of 50 processors of the Brazos
+cluster at Texas A&M University.
+
+
+<h3>Results for a 2d circular shell testcase</h3>
+
+Next, we will run step-32 with the parameter file in the directory. Here we
+are using 50 processors. The command to launch is (note that step-32.prm is
+the default):
+
+<code>
+<pre>
+\$ mpirun -np 50 ./step-32
+</pre>
+</code>
+
+Note that running a job on a cluster typically requires going through a job
+scheduler, which we won't discuss here. The output will look roughly like
+this:
+
+<code>
+<pre>
+Number of active cells: 12,288 (on 6 levels)
+Number of degrees of freedom: 186,624 (99,840+36,864+49,920)
+
+Timestep 0:  t=0 years
+
+   Rebuilding Stokes preconditioner...
+   Solving Stokes system... 40 iterations.
+   Maximal velocity: 60.4935 cm/year
+   Time step: 18166.9 years
+   17 CG iterations for temperature
+   Temperature range: 973 4273.16
+
+Number of active cells: 16,389 (on 7 levels)
+Number of degrees of freedom: 259,617 (140,300+49,167+70,150)
+
+Timestep 0:  t=0 years
+
+   Rebuilding Stokes preconditioner...
+   Solving Stokes system... 49 iterations.
+   Maximal velocity: 60.5202 cm/year
+   Time step: 10619.2 years
+   19 CG iterations for temperature
+   Temperature range: 973 4273.16
+
+Number of active cells: 20,124 (on 8 levels)
+Number of degrees of freedom: 323,145 (175,182+60,372+87,591)
+
+Timestep 0:  t=0 years
+
+   Rebuilding Stokes preconditioner...
+   Solving Stokes system... 51 iterations.
+   Maximal velocity: 59.9101 cm/year
+   Time step: 5334.14 years
+   18 CG iterations for temperature
+   Temperature range: 973 4273.43
+
+Timestep 1:  t=5334.14 years
+
+   Solving Stokes system... 49 iterations.
+   Maximal velocity: 61.0837 cm/year
+   Time step: 5232.24 years
+   18 CG iterations for temperature
+   Temperature range: 973 4273.43
+
+Timestep 2:  t=10566.4 years
+
+   Solving Stokes system... 24 iterations.
+   Maximal velocity: 62.265 cm/year
+   Time step: 5133.56 years
+   18 CG iterations for temperature
+   Temperature range: 973 4273.43
+
+[...]
+
+Timestep 100:  t=270918 years
+
+   Solving Stokes system... 22 iterations.
+   Maximal velocity: 161.391 cm/year
+   Time step: 1683.55 years
+   17 CG iterations for temperature
+   Temperature range: 973 4282.45
+
+Number of active cells: 54,717 (on 8 levels)
+Number of degrees of freedom: 880,626 (477,650+164,151+238,825)
+
+
+
++---------------------------------------------+------------+------------+
+| Total wallclock time elapsed since start    |       170s |            |
+|                                             |            |            |
+| Section                         | no. calls |  wall time | % of total |
++---------------------------------+-----------+------------+------------+
+| Assemble Stokes system          |       103 |      2.25s |       1.3% |
+| Assemble temperature matrices   |        12 |     0.296s |      0.17% |
+| Assemble temperature rhs        |       103 |      9.66s |       5.7% |
+| Build Stokes preconditioner     |        12 |      1.78s |         1% |
+| Solve Stokes system             |       103 |       149s |        88% |
+| Solve temperature system        |       103 |      1.84s |       1.1% |
+| Postprocessing                  |         3 |     0.398s |      0.23% |
+| Refine mesh structure, part 1   |        12 |     0.599s |      0.35% |
+| Refine mesh structure, part 2   |        12 |     0.216s |      0.13% |
+| Setup dof systems               |        13 |      2.15s |       1.3% |
++---------------------------------+-----------+------------+------------+
+
+[...]
+
+Timestep 6946:  t=1.00001e+08 years
+
+   Solving Stokes system... 12 iterations.
+   Maximal velocity: 8.94391 cm/year
+   Time step: 31505.4 years
+   18 CG iterations for temperature
+   Temperature range: 973 4273.04
+
+
+
++---------------------------------------------+------------+------------+
+| Total wallclock time elapsed since start    |  8.86e+03s |            |
+|                                             |            |            |
+| Section                         | no. calls |  wall time | % of total |
++---------------------------------+-----------+------------+------------+
+| Assemble Stokes system          |      6949 |       153s |       1.7% |
+| Assemble temperature matrices   |       697 |      18.2s |       0.2% |
+| Assemble temperature rhs        |      6949 |       600s |       6.8% |
+| Build Stokes preconditioner     |       697 |       115s |       1.3% |
+| Solve Stokes system             |      6949 |  7.56e+03s |        85% |
+| Solve temperature system        |      6949 |       123s |       1.4% |
+| Postprocessing                  |       140 |      18.1s |       0.2% |
+| Refine mesh structure, part 1   |       696 |      34.6s |      0.39% |
+| Refine mesh structure, part 2   |       696 |      12.6s |      0.14% |
+| Setup dof systems               |       697 |       130s |       1.5% |
++---------------------------------+-----------+------------+------------+
+</pre>
+</code>
+
+The simulation terminates when the time reaches the 100 million years
+selected in the input file.  You can extrapolate from this how long a
+simulation would take for a different final time (the time step size
+ultimately settles on somewhere around 20,000 years, so computing for
+one billion years will take 50,000 time steps, give or take 20%).  As
+can be seen here, we spend most of the compute time in assembling
+linear systems and — above all — in solving Stokes
+systems.
+
+We can clearly not show all output files produced by this program when
+run for a longer period of time, so let us
+only show the output from every 2500th time step here (these figures
+are from a version of this tutorial program before the introduction of
+manifolds, when it was not yet possible to arrange vertices along
+circles; the figures may therefore look slightly different if you run
+the current version):
+<table>
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0000.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0100.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0200.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0300.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0400.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0500.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0600.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0700.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0800.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.0900.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1000.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1100.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1200.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1300.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1400.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1500.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1600.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1700.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1800.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.1900.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.2000.png" alt="">
+    </td>
+  </tr>
+
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.temperature.2100.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.grid.2100.png" alt="">
+    </td>
+
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-32.2d.partition.2100.png" alt="">
+    </td>
+  </tr>
+</table>
+
+The last two images show the grid as well as the partitioning of the
+mesh for a computation with 10 subdomains on 10 processors. The full dynamics
+of this simulation are really only visible by looking at
+an animation, for example the one <a
+href="http://www.math.tamu.edu/~bangerth/images/pictures/convection-outward/\step-32.2d.convection.gif">shown on
+this site</a>. Beware that this animation is
+about 20MB large, though it is well worth watching due to its almost
+artistic quality.
+
+If you watch the movie, you'll see that the convection pattern goes
+through several stages: First, it gets rid of the instable temperature
+layering with the hot material overlain by the dense cold
+material. After this great driver is removed and we have a sort of
+stable situation, a few blobs start to separate from the hot boundary
+layer at the inner ring and rise up, with a few cold fingers also
+dropping down from the outer boundary layer. During this phase, the solution
+remains mostly symmetric, reflecting the 12-fold symmetry of the
+original mesh. In a final phase, the fluid enters vigorous chaotic
+stirring in which all symmetries are lost. This is a pattern that then
+continues to dominate flow.
+
+These different phases can also be identified if we look at the
+maximal velocity as a function of time in the simulation:
+
+<img src="http://www.dealii.org/images/steps/developer/step-32.2d.t_vs_vmax.png" alt="">
+
+Here, the velocity (shown in centimeters per year) becomes very large,
+to the order of several meters per year) at the beginning when the
+temperature layering is instable. It then calms down to relatively
+small values before picking up again in the chaotic stirring
+regime. There, it remains in the range of 10-40 centimeters per year,
+quite within the physically expected region.
+
+
+<h3>Results for a 3d spherical shell testcase</h3>
+
+3d computations are very expensive computationally. Furthermore, as
+seen above, interesting behavior only starts after quite a long time
+requiring more CPU hours than is available on a typical
+cluster. Consequently, rather than showing a complete simulation here,
+let us simply show a couple of pictures we have obtained using the
+successor to this program, called <i>Aspect</i> (short for <i>Advanced
+%Solver for Problems in Earth's ConvecTion</i>), that is being
+developed independently of deal.II and that already incorporates some
+of the extensions discussed below. The following two pictures show
+isocontours of the temperature and the partition of the domain (along
+with the mesh) onto 512 processors:
+
+<p align="center">
+<img src="http://www.dealii.org/images/steps/developer/step-32.3d-sphere.solution.png" alt="">
+
+<img src="http://www.dealii.org/images/steps/developer/step-32.3d-sphere.partition.png" alt="">
+</p>
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+There are many directions in which this program could be extended. As
+mentioned at the end of the introduction, most of these are under active
+development in the <i>Aspect</i> (short for <i>Advanced %Solver for Problems
+in Earth's ConvecTion</i>) code at the time this tutorial program is being
+finished. Specifically, the following are certainly topics that one should
+address to make the program more useful:
+
+<ul>
+  <li> <b>Adiabatic heating/cooling:</b>
+  The temperature field we get in our simulations after a while
+  is mostly constant with boundary layers at the inner and outer
+  boundary, and streamers of cold and hot material mixing
+  everything. Yet, this doesn't match our expectation that things
+  closer to the earth core should be hotter than closer to the
+  surface. The reason is that the energy equation we have used does
+  not include a term that describes adiabatic cooling and heating:
+  rock, like gas, heats up as you compress it. Consequently, material
+  that rises up cools adiabatically, and cold material that sinks down
+  heats adiabatically. The correct temperature equation would
+  therefore look somewhat like this:
+  @f{eqnarray*}
+    \frac{D T}{Dt}
+    -
+    \nabla \cdot \kappa \nabla T &=& \gamma + \tau\frac{Dp}{Dt},
+  @f}
+  or, expanding the advected derivative $\frac{D}{Dt} =
+  \frac{\partial}{\partial t} + \mathbf u \cdot \nabla$:
+  @f{eqnarray*}
+    \frac{\partial T}{\partial t}
+    +
+    {\mathbf u} \cdot \nabla T
+    -
+    \nabla \cdot \kappa \nabla T &=& \gamma +
+    \tau\left\{\frac{\partial
+    p}{\partial t} + \mathbf u \cdot \nabla p \right\}.
+  @f}
+  In other words, as pressure increases in a rock volume
+  ($\frac{Dp}{Dt}>0$) we get an additional heat source, and vice
+  versa.
+
+  The time derivative of the pressure is a bit awkward to
+  implement. If necessary, one could approximate using the fact
+  outlined in the introduction that the pressure can be decomposed
+  into a dynamic component due to temperature differences and the
+  resulting flow, and a static component that results solely from the
+  static pressure of the overlying rock. Since the latter is much
+  bigger, one may approximate $p\approx p_{\text{static}}=-\rho_{\text{ref}}
+  [1+\beta T_{\text{ref}}] \varphi$, and consequently
+  $\frac{Dp}{Dt} \approx \left\{- \mathbf u \cdot \nabla \rho_{\text{ref}}
+  [1+\beta T_{\text{ref}}]\varphi\right\} = \rho_{\text{ref}}
+  [1+\beta T_{\text{ref}}] \mathbf u \cdot \mathbf g$.
+  In other words, if the fluid is moving in the direction of gravity
+  (downward) it will be compressed and because in that case $\mathbf u
+  \cdot \mathbf g > 0$ we get a positive heat source. Conversely, the
+  fluid will cool down if it moves against the direction of gravity.
+
+<li> <b>Compressibility:</b>
+  As already hinted at in the temperature model above,
+  mantle rocks are not incompressible. Rather, given the enormous pressures in
+  the earth mantle (at the core-mantle boundary, the pressure is approximately
+  140 GPa, equivalent to 1,400,000 times atmospheric pressure), rock actually
+  does compress to something around 1.5 times the density it would have
+  at surface pressure. Modeling this presents any number of
+  difficulties. Primarily, the mass conservation equation is no longer
+  $\textrm{div}\;\mathbf u=0$ but should read
+  $\textrm{div}(\rho\mathbf u)=0$ where the density $\rho$ is now no longer
+  spatially constant but depends on temperature and pressure. A consequence is
+  that the model is now no longer linear; a linearized version of the Stokes
+  equation is also no longer symmetric requiring us to rethink preconditioners
+  and, possibly, even the discretization. We won't go into detail here as to
+  how this can be resolved.
+
+<li> <b>Nonlinear material models:</b> As already hinted at in various places,
+  material parameters such as the density, the viscosity, and the various
+  thermal parameters are not constant throughout the earth mantle. Rather,
+  they nonlinearly depend on the pressure and temperature, and in the case of
+  the viscosity on the strain rate $\varepsilon(\mathbf u)$. For complicated
+  models, the only way to solve such models accurately may be to actually
+  iterate this dependence out in each time step, rather than simply freezing
+  coefficients at values extrapolated from the previous time step(s).
+
+<li> <b>Checkpoint/restart:</b> Running this program in 2d on a number of
+  processors allows solving realistic models in a day or two. However, in 3d,
+  compute times are so large that one runs into two typical problems: (i) On
+  most compute clusters, the queuing system limits run times for individual
+  jobs are to 2 or 3 days; (ii) losing the results of a computation due to
+  hardware failures, misconfigurations, or power outages is a shame when
+  running on hundreds of processors for a couple of days. Both of these
+  problems can be addressed by periodically saving the state of the program
+  and, if necessary, restarting the program at this point. This technique is
+  commonly called <i>checkpoint/restart</i> and it requires that the entire
+  state of the program is written to a permanent storage location (e.g. a hard
+  drive). Given the complexity of the data structures of this program, this is
+  not entirely trivial (it may also involve writing gigabytes or more of
+  data), but it can be made easier by realizing that one can save the state
+  between two time steps where it essentially only consists of the mesh and
+  solution vectors; during restart one would then first re-enumerate degrees
+  of freedom in the same way as done before and then re-assemble
+  matrices. Nevertheless, given the distributed nature of the data structures
+  involved here, saving and restoring the state of a program is not
+  trivial. An additional complexity is introduced by the fact that one may
+  want to change the number of processors between runs, for example because
+  one may wish to continue computing on a mesh that is finer than the one used
+  to precompute a starting temperature field at an intermediate time.
+
+<li> <b>Predictive postprocessing:</b> The point of computations like this is
+  not simply to solve the equations. Rather, it is typically the exploration
+  of different physical models and their comparison with things that we can
+  measure at the earth surface, in order to find which models are realistic
+  and which are contradicted by reality. To this end, we need to compute
+  quantities from our solution vectors that are related to what we can
+  observe. Among these are, for example, heatfluxes at the surface of the
+  earth, as well as seismic velocities throughout the mantle as these affect
+  earthquake waves that are recorded by seismographs.
+
+<li> <b>Better refinement criteria:</b> As can be seen above for the
+3d case, the mesh in 3d is primarily refined along the inner
+boundary. This is because the boundary layer there is stronger than
+any other transition in the domain, leading us to refine there almost
+exclusively and basically not at all following the plumes. One
+certainly needs better refinement criteria to track the parts of the
+solution we are really interested in better than the criterion used
+here, namely the KellyErrorEstimator applied to the temperature, is
+able to.
+</ul>
+
+
+There are many other ways to extend the current program. However, rather than
+discussing them here, let us point to the much larger open
+source code Aspect (see http://aspect.dealii.org/ ) that constitutes the
+further development of step-32 and that already includes many such possible
+extensions.
diff --git a/examples/step-32/doc/tooltip b/examples/step-32/doc/tooltip
new file mode 100644
index 0000000..fa82da6
--- /dev/null
+++ b/examples/step-32/doc/tooltip
@@ -0,0 +1 @@
+A parallel Boussinesq flow solver for thermal convection in the earth mantle.
diff --git a/examples/step-32/step-32.cc b/examples/step-32/step-32.cc
new file mode 100644
index 0000000..7acc4f3
--- /dev/null
+++ b/examples/step-32/step-32.cc
@@ -0,0 +1,3778 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2008 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Martin Kronbichler, Uppsala University,
+ *          Wolfgang Bangerth, Texas A&M University,
+ *          Timo Heister, University of Goettingen, 2008-2011
+ */
+
+
+// @sect3{Include files}
+
+// The first task as usual is to include the functionality of these well-known
+// deal.II library files and some C++ header files.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/timer.h>
+#include <deal.II/base/parameter_handler.h>
+
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/solver_bicgstab.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_precondition.h>
+#include <deal.II/lac/trilinos_solver.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/filtered_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/grid_refinement.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_dgp.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/solution_transfer.h>
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <limits>
+#include <locale>
+#include <string>
+
+// This is the only include file that is new: It introduces the
+// parallel::distributed::SolutionTransfer equivalent of the
+// dealii::SolutionTransfer class to take a solution from on mesh to the next
+// one upon mesh refinement, but in the case of parallel distributed
+// triangulations:
+#include <deal.II/distributed/solution_transfer.h>
+
+// The following classes are used in parallel distributed computations and
+// have all already been introduced in step-40:
+#include <deal.II/base/index_set.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/grid_refinement.h>
+
+
+// The next step is like in all previous tutorial programs: We put everything
+// into a namespace of its own and then import the deal.II classes and
+// functions into it:
+namespace Step32
+{
+  using namespace dealii;
+
+  // @sect3{Equation data}
+
+  // In the following namespace, we define the various pieces of equation data
+  // that describe the problem. This corresponds to the various aspects of
+  // making the problem at least slightly realistic and that were exhaustively
+  // discussed in the description of the testcase in the introduction.
+  //
+  // We start with a few coefficients that have constant values (the comment
+  // after the value indicates its physical units):
+  namespace EquationData
+  {
+    const double eta                   = 1e21;    /* Pa s       */
+    const double kappa                 = 1e-6;    /* m^2 / s    */
+    const double reference_density     = 3300;    /* kg / m^3   */
+    const double reference_temperature = 293;     /* K          */
+    const double expansion_coefficient = 2e-5;    /* 1/K        */
+    const double specific_heat         = 1250;    /* J / K / kg */
+    const double radiogenic_heating    = 7.4e-12; /* W / kg     */
+
+
+    const double R0      = 6371000.-2890000.;     /* m          */
+    const double R1      = 6371000.-  35000.;     /* m          */
+
+    const double T0      = 4000+273;              /* K          */
+    const double T1      =  700+273;              /* K          */
+
+
+    // The next set of definitions are for functions that encode the density
+    // as a function of temperature, the gravity vector, and the initial
+    // values for the temperature. Again, all of these (along with the values
+    // they compute) are discussed in the introduction:
+    double density (const double temperature)
+    {
+      return (reference_density *
+              (1 - expansion_coefficient * (temperature -
+                                            reference_temperature)));
+    }
+
+
+    template <int dim>
+    Tensor<1,dim> gravity_vector (const Point<dim> &p)
+    {
+      const double r = p.norm();
+      return -(1.245e-6 * r + 7.714e13/r/r) * p / r;
+    }
+
+
+
+    template <int dim>
+    class TemperatureInitialValues : public Function<dim>
+    {
+    public:
+      TemperatureInitialValues () : Function<dim>(1) {}
+
+      virtual double value (const Point<dim>   &p,
+                            const unsigned int  component = 0) const;
+
+      virtual void vector_value (const Point<dim> &p,
+                                 Vector<double>   &value) const;
+    };
+
+
+
+    template <int dim>
+    double
+    TemperatureInitialValues<dim>::value (const Point<dim>  &p,
+                                          const unsigned int) const
+    {
+      const double r = p.norm();
+      const double h = R1-R0;
+
+      const double s = (r-R0)/h;
+      const double q = (dim==3)?std::max(0.0,cos(numbers::PI*abs(p(2)/R1))):1.0;
+      const double phi   = std::atan2(p(0),p(1));
+      const double tau = s
+                         +
+                         0.2 * s * (1-s) * std::sin(6*phi) * q;
+
+      return T0*(1.0-tau) + T1*tau;
+    }
+
+
+    template <int dim>
+    void
+    TemperatureInitialValues<dim>::vector_value (const Point<dim> &p,
+                                                 Vector<double>   &values) const
+    {
+      for (unsigned int c=0; c<this->n_components; ++c)
+        values(c) = TemperatureInitialValues<dim>::value (p, c);
+    }
+
+
+    // As mentioned in the introduction we need to rescale the pressure to
+    // avoid the relative ill-conditioning of the momentum and mass
+    // conservation equations. The scaling factor is $\frac{\eta}{L}$ where
+    // $L$ was a typical length scale. By experimenting it turns out that a
+    // good length scale is the diameter of plumes, which is around 10 km:
+    const double pressure_scaling = eta / 10000;
+
+    // The final number in this namespace is a constant that denotes the
+    // number of seconds per (average, tropical) year. We use this only when
+    // generating screen output: internally, all computations of this program
+    // happen in SI units (kilogram, meter, seconds) but writing geological
+    // times in seconds yields numbers that one can't relate to reality, and
+    // so we convert to years using the factor defined here:
+    const double year_in_seconds  = 60*60*24*365.2425;
+
+  }
+
+
+
+  // @sect3{Preconditioning the Stokes system}
+
+  // This namespace implements the preconditioner. As discussed in the
+  // introduction, this preconditioner differs in a number of key portions
+  // from the one used in step-31. Specifically, it is a right preconditioner,
+  // implementing the matrix
+  // @f{align*}
+  //   \left(\begin{array}{cc}A^{-1} & B^T
+  //                        \\0 & S^{-1}
+  // \end{array}\right)
+  // @f}
+  // where the two inverse matrix operations
+  // are approximated by linear solvers or, if the right flag is given to the
+  // constructor of this class, by a single AMG V-cycle for the velocity
+  // block. The three code blocks of the <code>vmult</code> function implement
+  // the multiplications with the three blocks of this preconditioner matrix
+  // and should be self explanatory if you have read through step-31 or the
+  // discussion of composing solvers in step-20.
+  namespace LinearSolvers
+  {
+    template <class PreconditionerA, class PreconditionerMp>
+    class BlockSchurPreconditioner : public Subscriptor
+    {
+    public:
+      BlockSchurPreconditioner (const TrilinosWrappers::BlockSparseMatrix  &S,
+                                const TrilinosWrappers::BlockSparseMatrix  &Spre,
+                                const PreconditionerMp                     &Mppreconditioner,
+                                const PreconditionerA                      &Apreconditioner,
+                                const bool                                  do_solve_A)
+        :
+        stokes_matrix     (&S),
+        stokes_preconditioner_matrix     (&Spre),
+        mp_preconditioner (Mppreconditioner),
+        a_preconditioner  (Apreconditioner),
+        do_solve_A        (do_solve_A)
+      {}
+
+      void vmult (TrilinosWrappers::MPI::BlockVector       &dst,
+                  const TrilinosWrappers::MPI::BlockVector &src) const
+      {
+        TrilinosWrappers::MPI::Vector utmp(src.block(0));
+
+        {
+          SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
+
+          SolverCG<TrilinosWrappers::MPI::Vector> solver(solver_control);
+
+          solver.solve(stokes_preconditioner_matrix->block(1,1),
+                       dst.block(1), src.block(1),
+                       mp_preconditioner);
+
+          dst.block(1) *= -1.0;
+        }
+
+        {
+          stokes_matrix->block(0,1).vmult(utmp, dst.block(1));
+          utmp*=-1.0;
+          utmp.add(src.block(0));
+        }
+
+        if (do_solve_A == true)
+          {
+            SolverControl solver_control(5000, utmp.l2_norm()*1e-2);
+            TrilinosWrappers::SolverCG solver(solver_control);
+            solver.solve(stokes_matrix->block(0,0), dst.block(0), utmp,
+                         a_preconditioner);
+          }
+        else
+          a_preconditioner.vmult (dst.block(0), utmp);
+      }
+
+    private:
+      const SmartPointer<const TrilinosWrappers::BlockSparseMatrix> stokes_matrix;
+      const SmartPointer<const TrilinosWrappers::BlockSparseMatrix> stokes_preconditioner_matrix;
+      const PreconditionerMp &mp_preconditioner;
+      const PreconditionerA  &a_preconditioner;
+      const bool do_solve_A;
+    };
+  }
+
+
+
+  // @sect3{Definition of assembly data structures}
+  //
+  // As described in the introduction, we will use the WorkStream mechanism
+  // discussed in the @ref threads module to parallelize operations among the
+  // processors of a single machine. The WorkStream class requires that data
+  // is passed around in two kinds of data structures, one for scratch data
+  // and one to pass data from the assembly function to the function that
+  // copies local contributions into global objects.
+  //
+  // The following namespace (and the two sub-namespaces) contains a
+  // collection of data structures that serve this purpose, one pair for each
+  // of the four operations discussed in the introduction that we will want to
+  // parallelize. Each assembly routine gets two sets of data: a Scratch array
+  // that collects all the classes and arrays that are used for the
+  // calculation of the cell contribution, and a CopyData array that keeps
+  // local matrices and vectors which will be written into the global
+  // matrix. Whereas CopyData is a container for the final data that is
+  // written into the global matrices and vector (and, thus, absolutely
+  // necessary), the Scratch arrays are merely there for performance reasons
+  // — it would be much more expensive to set up a FEValues object on
+  // each cell, than creating it only once and updating some derivative data.
+  //
+  // Step-31 had four assembly routines: One for the preconditioner matrix of
+  // the Stokes system, one for the Stokes matrix and right hand side, one for
+  // the temperature matrices and one for the right hand side of the
+  // temperature equation. We here organize the scratch arrays and CopyData
+  // objects for each of those four assembly components using a
+  // <code>struct</code> environment (since we consider these as temporary
+  // objects we pass around, rather than classes that implement functionality
+  // of their own, though this is a more subjective point of view to
+  // distinguish between <code>struct</code>s and <code>class</code>es).
+  //
+  // Regarding the Scratch objects, each struct is equipped with a constructor
+  // that creates an @ref FEValues object using the @ref FiniteElement,
+  // Quadrature, @ref Mapping (which describes the interpolation of curved
+  // boundaries), and @ref UpdateFlags instances. Moreover, we manually
+  // implement a copy constructor (since the FEValues class is not copyable by
+  // itself), and provide some additional vector fields that are used to hold
+  // intermediate data during the computation of local contributions.
+  //
+  // Let us start with the scratch arrays and, specifically, the one used for
+  // assembly of the Stokes preconditioner:
+  namespace Assembly
+  {
+    namespace Scratch
+    {
+      template <int dim>
+      struct StokesPreconditioner
+      {
+        StokesPreconditioner (const FiniteElement<dim> &stokes_fe,
+                              const Quadrature<dim>    &stokes_quadrature,
+                              const Mapping<dim>       &mapping,
+                              const UpdateFlags         update_flags);
+
+        StokesPreconditioner (const StokesPreconditioner &data);
+
+
+        FEValues<dim>               stokes_fe_values;
+
+        std::vector<Tensor<2,dim> > grad_phi_u;
+        std::vector<double>         phi_p;
+      };
+
+      template <int dim>
+      StokesPreconditioner<dim>::
+      StokesPreconditioner (const FiniteElement<dim> &stokes_fe,
+                            const Quadrature<dim>    &stokes_quadrature,
+                            const Mapping<dim>       &mapping,
+                            const UpdateFlags         update_flags)
+        :
+        stokes_fe_values (mapping, stokes_fe, stokes_quadrature,
+                          update_flags),
+        grad_phi_u (stokes_fe.dofs_per_cell),
+        phi_p (stokes_fe.dofs_per_cell)
+      {}
+
+
+
+      template <int dim>
+      StokesPreconditioner<dim>::
+      StokesPreconditioner (const StokesPreconditioner &scratch)
+        :
+        stokes_fe_values (scratch.stokes_fe_values.get_mapping(),
+                          scratch.stokes_fe_values.get_fe(),
+                          scratch.stokes_fe_values.get_quadrature(),
+                          scratch.stokes_fe_values.get_update_flags()),
+        grad_phi_u (scratch.grad_phi_u),
+        phi_p (scratch.phi_p)
+      {}
+
+
+
+      // The next one is the scratch object used for the assembly of the full
+      // Stokes system. Observe that we derive the StokesSystem scratch class
+      // from the StokesPreconditioner class above. We do this because all the
+      // objects that are necessary for the assembly of the preconditioner are
+      // also needed for the actual matrix system and right hand side, plus
+      // some extra data. This makes the program more compact. Note also that
+      // the assembly of the Stokes system and the temperature right hand side
+      // further down requires data from temperature and velocity,
+      // respectively, so we actually need two FEValues objects for those two
+      // cases.
+      template <int dim>
+      struct StokesSystem : public StokesPreconditioner<dim>
+      {
+        StokesSystem (const FiniteElement<dim> &stokes_fe,
+                      const Mapping<dim>       &mapping,
+                      const Quadrature<dim>    &stokes_quadrature,
+                      const UpdateFlags         stokes_update_flags,
+                      const FiniteElement<dim> &temperature_fe,
+                      const UpdateFlags         temperature_update_flags);
+
+        StokesSystem (const StokesSystem<dim> &data);
+
+
+        FEValues<dim>                        temperature_fe_values;
+
+        std::vector<Tensor<1,dim> >          phi_u;
+        std::vector<SymmetricTensor<2,dim> > grads_phi_u;
+        std::vector<double>                  div_phi_u;
+
+        std::vector<double>                  old_temperature_values;
+      };
+
+
+      template <int dim>
+      StokesSystem<dim>::
+      StokesSystem (const FiniteElement<dim> &stokes_fe,
+                    const Mapping<dim>       &mapping,
+                    const Quadrature<dim>    &stokes_quadrature,
+                    const UpdateFlags         stokes_update_flags,
+                    const FiniteElement<dim> &temperature_fe,
+                    const UpdateFlags         temperature_update_flags)
+        :
+        StokesPreconditioner<dim> (stokes_fe, stokes_quadrature,
+                                   mapping,
+                                   stokes_update_flags),
+        temperature_fe_values (mapping, temperature_fe, stokes_quadrature,
+                               temperature_update_flags),
+        phi_u (stokes_fe.dofs_per_cell),
+        grads_phi_u (stokes_fe.dofs_per_cell),
+        div_phi_u (stokes_fe.dofs_per_cell),
+        old_temperature_values (stokes_quadrature.size())
+      {}
+
+
+      template <int dim>
+      StokesSystem<dim>::
+      StokesSystem (const StokesSystem<dim> &scratch)
+        :
+        StokesPreconditioner<dim> (scratch),
+        temperature_fe_values (scratch.temperature_fe_values.get_mapping(),
+                               scratch.temperature_fe_values.get_fe(),
+                               scratch.temperature_fe_values.get_quadrature(),
+                               scratch.temperature_fe_values.get_update_flags()),
+        phi_u (scratch.phi_u),
+        grads_phi_u (scratch.grads_phi_u),
+        div_phi_u (scratch.div_phi_u),
+        old_temperature_values (scratch.old_temperature_values)
+      {}
+
+
+      // After defining the objects used in the assembly of the Stokes system,
+      // we do the same for the assembly of the matrices necessary for the
+      // temperature system. The general structure is very similar:
+      template <int dim>
+      struct TemperatureMatrix
+      {
+        TemperatureMatrix (const FiniteElement<dim> &temperature_fe,
+                           const Mapping<dim>       &mapping,
+                           const Quadrature<dim>    &temperature_quadrature);
+
+        TemperatureMatrix (const TemperatureMatrix &data);
+
+
+        FEValues<dim>               temperature_fe_values;
+
+        std::vector<double>         phi_T;
+        std::vector<Tensor<1,dim> > grad_phi_T;
+      };
+
+
+      template <int dim>
+      TemperatureMatrix<dim>::
+      TemperatureMatrix (const FiniteElement<dim> &temperature_fe,
+                         const Mapping<dim>       &mapping,
+                         const Quadrature<dim>    &temperature_quadrature)
+        :
+        temperature_fe_values (mapping,
+                               temperature_fe, temperature_quadrature,
+                               update_values    | update_gradients |
+                               update_JxW_values),
+        phi_T (temperature_fe.dofs_per_cell),
+        grad_phi_T (temperature_fe.dofs_per_cell)
+      {}
+
+
+      template <int dim>
+      TemperatureMatrix<dim>::
+      TemperatureMatrix (const TemperatureMatrix &scratch)
+        :
+        temperature_fe_values (scratch.temperature_fe_values.get_mapping(),
+                               scratch.temperature_fe_values.get_fe(),
+                               scratch.temperature_fe_values.get_quadrature(),
+                               scratch.temperature_fe_values.get_update_flags()),
+        phi_T (scratch.phi_T),
+        grad_phi_T (scratch.grad_phi_T)
+      {}
+
+
+      // The final scratch object is used in the assembly of the right hand
+      // side of the temperature system. This object is significantly larger
+      // than the ones above because a lot more quantities enter the
+      // computation of the right hand side of the temperature equation. In
+      // particular, the temperature values and gradients of the previous two
+      // time steps need to be evaluated at the quadrature points, as well as
+      // the velocities and the strain rates (i.e. the symmetric gradients of
+      // the velocity) that enter the right hand side as friction heating
+      // terms. Despite the number of terms, the following should be rather
+      // self explanatory:
+      template <int dim>
+      struct TemperatureRHS
+      {
+        TemperatureRHS (const FiniteElement<dim> &temperature_fe,
+                        const FiniteElement<dim> &stokes_fe,
+                        const Mapping<dim>       &mapping,
+                        const Quadrature<dim>    &quadrature);
+
+        TemperatureRHS (const TemperatureRHS &data);
+
+
+        FEValues<dim>                        temperature_fe_values;
+        FEValues<dim>                        stokes_fe_values;
+
+        std::vector<double>                  phi_T;
+        std::vector<Tensor<1,dim> >          grad_phi_T;
+
+        std::vector<Tensor<1,dim> >          old_velocity_values;
+        std::vector<Tensor<1,dim> >          old_old_velocity_values;
+
+        std::vector<SymmetricTensor<2,dim> > old_strain_rates;
+        std::vector<SymmetricTensor<2,dim> > old_old_strain_rates;
+
+        std::vector<double>                  old_temperature_values;
+        std::vector<double>                  old_old_temperature_values;
+        std::vector<Tensor<1,dim> >          old_temperature_grads;
+        std::vector<Tensor<1,dim> >          old_old_temperature_grads;
+        std::vector<double>                  old_temperature_laplacians;
+        std::vector<double>                  old_old_temperature_laplacians;
+      };
+
+
+      template <int dim>
+      TemperatureRHS<dim>::
+      TemperatureRHS (const FiniteElement<dim> &temperature_fe,
+                      const FiniteElement<dim> &stokes_fe,
+                      const Mapping<dim>       &mapping,
+                      const Quadrature<dim>    &quadrature)
+        :
+        temperature_fe_values (mapping,
+                               temperature_fe, quadrature,
+                               update_values    |
+                               update_gradients |
+                               update_hessians  |
+                               update_quadrature_points |
+                               update_JxW_values),
+        stokes_fe_values (mapping,
+                          stokes_fe, quadrature,
+                          update_values | update_gradients),
+        phi_T (temperature_fe.dofs_per_cell),
+        grad_phi_T (temperature_fe.dofs_per_cell),
+
+        old_velocity_values (quadrature.size()),
+        old_old_velocity_values (quadrature.size()),
+        old_strain_rates (quadrature.size()),
+        old_old_strain_rates (quadrature.size()),
+
+        old_temperature_values (quadrature.size()),
+        old_old_temperature_values(quadrature.size()),
+        old_temperature_grads(quadrature.size()),
+        old_old_temperature_grads(quadrature.size()),
+        old_temperature_laplacians(quadrature.size()),
+        old_old_temperature_laplacians(quadrature.size())
+      {}
+
+
+      template <int dim>
+      TemperatureRHS<dim>::
+      TemperatureRHS (const TemperatureRHS &scratch)
+        :
+        temperature_fe_values (scratch.temperature_fe_values.get_mapping(),
+                               scratch.temperature_fe_values.get_fe(),
+                               scratch.temperature_fe_values.get_quadrature(),
+                               scratch.temperature_fe_values.get_update_flags()),
+        stokes_fe_values (scratch.stokes_fe_values.get_mapping(),
+                          scratch.stokes_fe_values.get_fe(),
+                          scratch.stokes_fe_values.get_quadrature(),
+                          scratch.stokes_fe_values.get_update_flags()),
+        phi_T (scratch.phi_T),
+        grad_phi_T (scratch.grad_phi_T),
+
+        old_velocity_values (scratch.old_velocity_values),
+        old_old_velocity_values (scratch.old_old_velocity_values),
+        old_strain_rates (scratch.old_strain_rates),
+        old_old_strain_rates (scratch.old_old_strain_rates),
+
+        old_temperature_values (scratch.old_temperature_values),
+        old_old_temperature_values (scratch.old_old_temperature_values),
+        old_temperature_grads (scratch.old_temperature_grads),
+        old_old_temperature_grads (scratch.old_old_temperature_grads),
+        old_temperature_laplacians (scratch.old_temperature_laplacians),
+        old_old_temperature_laplacians (scratch.old_old_temperature_laplacians)
+      {}
+    }
+
+
+    // The CopyData objects are even simpler than the Scratch objects as all
+    // they have to do is to store the results of local computations until
+    // they can be copied into the global matrix or vector objects. These
+    // structures therefore only need to provide a constructor, a copy
+    // operation, and some arrays for local matrix, local vectors and the
+    // relation between local and global degrees of freedom (a.k.a.
+    // <code>local_dof_indices</code>). Again, we have one such structure for
+    // each of the four operations we will parallelize using the WorkStream
+    // class:
+    namespace CopyData
+    {
+      template <int dim>
+      struct StokesPreconditioner
+      {
+        StokesPreconditioner (const FiniteElement<dim> &stokes_fe);
+        StokesPreconditioner (const StokesPreconditioner &data);
+
+        FullMatrix<double>          local_matrix;
+        std::vector<types::global_dof_index> local_dof_indices;
+      };
+
+      template <int dim>
+      StokesPreconditioner<dim>::
+      StokesPreconditioner (const FiniteElement<dim> &stokes_fe)
+        :
+        local_matrix (stokes_fe.dofs_per_cell,
+                      stokes_fe.dofs_per_cell),
+        local_dof_indices (stokes_fe.dofs_per_cell)
+      {}
+
+      template <int dim>
+      StokesPreconditioner<dim>::
+      StokesPreconditioner (const StokesPreconditioner &data)
+        :
+        local_matrix (data.local_matrix),
+        local_dof_indices (data.local_dof_indices)
+      {}
+
+
+
+      template <int dim>
+      struct StokesSystem : public StokesPreconditioner<dim>
+      {
+        StokesSystem (const FiniteElement<dim> &stokes_fe);
+        StokesSystem (const StokesSystem<dim> &data);
+
+        Vector<double> local_rhs;
+      };
+
+      template <int dim>
+      StokesSystem<dim>::
+      StokesSystem (const FiniteElement<dim> &stokes_fe)
+        :
+        StokesPreconditioner<dim> (stokes_fe),
+        local_rhs (stokes_fe.dofs_per_cell)
+      {}
+
+      template <int dim>
+      StokesSystem<dim>::
+      StokesSystem (const StokesSystem<dim> &data)
+        :
+        StokesPreconditioner<dim> (data),
+        local_rhs (data.local_rhs)
+      {}
+
+
+
+      template <int dim>
+      struct TemperatureMatrix
+      {
+        TemperatureMatrix (const FiniteElement<dim> &temperature_fe);
+        TemperatureMatrix (const TemperatureMatrix &data);
+
+        FullMatrix<double>          local_mass_matrix;
+        FullMatrix<double>          local_stiffness_matrix;
+        std::vector<types::global_dof_index>   local_dof_indices;
+      };
+
+      template <int dim>
+      TemperatureMatrix<dim>::
+      TemperatureMatrix (const FiniteElement<dim> &temperature_fe)
+        :
+        local_mass_matrix (temperature_fe.dofs_per_cell,
+                           temperature_fe.dofs_per_cell),
+        local_stiffness_matrix (temperature_fe.dofs_per_cell,
+                                temperature_fe.dofs_per_cell),
+        local_dof_indices (temperature_fe.dofs_per_cell)
+      {}
+
+      template <int dim>
+      TemperatureMatrix<dim>::
+      TemperatureMatrix (const TemperatureMatrix &data)
+        :
+        local_mass_matrix (data.local_mass_matrix),
+        local_stiffness_matrix (data.local_stiffness_matrix),
+        local_dof_indices (data.local_dof_indices)
+      {}
+
+
+
+      template <int dim>
+      struct TemperatureRHS
+      {
+        TemperatureRHS (const FiniteElement<dim> &temperature_fe);
+        TemperatureRHS (const TemperatureRHS &data);
+
+        Vector<double>              local_rhs;
+        std::vector<types::global_dof_index> local_dof_indices;
+        FullMatrix<double>          matrix_for_bc;
+      };
+
+      template <int dim>
+      TemperatureRHS<dim>::
+      TemperatureRHS (const FiniteElement<dim> &temperature_fe)
+        :
+        local_rhs (temperature_fe.dofs_per_cell),
+        local_dof_indices (temperature_fe.dofs_per_cell),
+        matrix_for_bc (temperature_fe.dofs_per_cell,
+                       temperature_fe.dofs_per_cell)
+      {}
+
+      template <int dim>
+      TemperatureRHS<dim>::
+      TemperatureRHS (const TemperatureRHS &data)
+        :
+        local_rhs (data.local_rhs),
+        local_dof_indices (data.local_dof_indices),
+        matrix_for_bc (data.matrix_for_bc)
+      {}
+    }
+  }
+
+
+
+  // @sect3{The <code>BoussinesqFlowProblem</code> class template}
+  //
+  // This is the declaration of the main class. It is very similar to step-31
+  // but there are a number differences we will comment on below.
+  //
+  // The top of the class is essentially the same as in step-31, listing the
+  // public methods and a set of private functions that do the heavy
+  // lifting. Compared to step-31 there are only two additions to this
+  // section: the function <code>get_cfl_number()</code> that computes the
+  // maximum CFL number over all cells which we then compute the global time
+  // step from, and the function <code>get_entropy_variation()</code> that is
+  // used in the computation of the entropy stabilization. It is akin to the
+  // <code>get_extrapolated_temperature_range()</code> we have used in step-31
+  // for this purpose, but works on the entropy instead of the temperature
+  // instead.
+  template <int dim>
+  class BoussinesqFlowProblem
+  {
+  public:
+    struct Parameters;
+    BoussinesqFlowProblem (Parameters &parameters);
+    void run ();
+
+  private:
+    void setup_dofs ();
+    void assemble_stokes_preconditioner ();
+    void build_stokes_preconditioner ();
+    void assemble_stokes_system ();
+    void assemble_temperature_matrix ();
+    void assemble_temperature_system (const double maximal_velocity);
+    void project_temperature_field ();
+    double get_maximal_velocity () const;
+    double get_cfl_number () const;
+    double get_entropy_variation (const double average_temperature) const;
+    std::pair<double,double> get_extrapolated_temperature_range () const;
+    void solve ();
+    void output_results ();
+    void refine_mesh (const unsigned int max_grid_level);
+
+    double
+    compute_viscosity(const std::vector<double>          &old_temperature,
+                      const std::vector<double>          &old_old_temperature,
+                      const std::vector<Tensor<1,dim> >  &old_temperature_grads,
+                      const std::vector<Tensor<1,dim> >  &old_old_temperature_grads,
+                      const std::vector<double>          &old_temperature_laplacians,
+                      const std::vector<double>          &old_old_temperature_laplacians,
+                      const std::vector<Tensor<1,dim> >  &old_velocity_values,
+                      const std::vector<Tensor<1,dim> >  &old_old_velocity_values,
+                      const std::vector<SymmetricTensor<2,dim> >  &old_strain_rates,
+                      const std::vector<SymmetricTensor<2,dim> >  &old_old_strain_rates,
+                      const double                        global_u_infty,
+                      const double                        global_T_variation,
+                      const double                        average_temperature,
+                      const double                        global_entropy_variation,
+                      const double                        cell_diameter) const;
+
+  public:
+
+    // The first significant new component is the definition of a struct for
+    // the parameters according to the discussion in the introduction. This
+    // structure is initialized by reading from a parameter file during
+    // construction of this object.
+    struct Parameters
+    {
+      Parameters (const std::string &parameter_filename);
+
+      static void declare_parameters (ParameterHandler &prm);
+      void parse_parameters (ParameterHandler &prm);
+
+      double       end_time;
+
+      unsigned int initial_global_refinement;
+      unsigned int initial_adaptive_refinement;
+
+      bool         generate_graphical_output;
+      unsigned int graphical_output_interval;
+
+      unsigned int adaptive_refinement_interval;
+
+      double       stabilization_alpha;
+      double       stabilization_c_R;
+      double       stabilization_beta;
+
+      unsigned int stokes_velocity_degree;
+      bool         use_locally_conservative_discretization;
+
+      unsigned int temperature_degree;
+    };
+
+  private:
+    Parameters                               ¶meters;
+
+    // The <code>pcout</code> (for <i>%parallel <code>std::cout</code></i>)
+    // object is used to simplify writing output: each MPI process can use
+    // this to generate output as usual, but since each of these processes
+    // will (hopefully) produce the same output it will just be replicated
+    // many times over; with the ConditionalOStream class, only the output
+    // generated by one MPI process will actually be printed to screen,
+    // whereas the output by all the other threads will simply be forgotten.
+    ConditionalOStream                        pcout;
+
+    // The following member variables will then again be similar to those in
+    // step-31 (and to other tutorial programs). As mentioned in the
+    // introduction, we fully distribute computations, so we will have to use
+    // the parallel::distributed::Triangulation class (see step-40) but the
+    // remainder of these variables is rather standard with two exceptions:
+    //
+    // - The <code>mapping</code> variable is used to denote a higher-order
+    // polynomial mapping. As mentioned in the introduction, we use this
+    // mapping when forming integrals through quadrature for all cells that
+    // are adjacent to either the inner or outer boundaries of our domain
+    // where the boundary is curved.
+    //
+    // - In a bit of naming confusion, you will notice below that some of the
+    // variables from namespace TrilinosWrappers are taken from namespace
+    // TrilinosWrappers::MPI (such as the right hand side vectors) whereas
+    // others are not (such as the various matrices). This is due to legacy
+    // reasons. We will frequently have to query velocities
+    // and temperatures at arbitrary quadrature points; consequently, rather
+    // than importing ghost information of a vector whenever we need access
+    // to degrees of freedom that are relevant locally but owned by another
+    // processor, we solve linear systems in %parallel but then immediately
+    // initialize a vector including ghost entries of the solution for further
+    // processing. The various <code>*_solution</code> vectors are therefore
+    // filled immediately after solving their respective linear system in
+    // %parallel and will always contain values for all
+    // @ref GlossLocallyRelevantDof "locally relevant degrees of freedom";
+    // the fully distributed vectors that we obtain from the solution process
+    // and that only ever contain the
+    // @ref GlossLocallyOwnedDof "locally owned degrees of freedom" are
+    // destroyed immediately after the solution process and after we have
+    // copied the relevant values into the member variable vectors.
+    parallel::distributed::Triangulation<dim> triangulation;
+    double                                    global_Omega_diameter;
+
+    const MappingQ<dim>                       mapping;
+
+    const FESystem<dim>                       stokes_fe;
+    DoFHandler<dim>                           stokes_dof_handler;
+    ConstraintMatrix                          stokes_constraints;
+
+    TrilinosWrappers::BlockSparseMatrix       stokes_matrix;
+    TrilinosWrappers::BlockSparseMatrix       stokes_preconditioner_matrix;
+
+    TrilinosWrappers::MPI::BlockVector        stokes_solution;
+    TrilinosWrappers::MPI::BlockVector        old_stokes_solution;
+    TrilinosWrappers::MPI::BlockVector        stokes_rhs;
+
+
+    FE_Q<dim>                                 temperature_fe;
+    DoFHandler<dim>                           temperature_dof_handler;
+    ConstraintMatrix                          temperature_constraints;
+
+    TrilinosWrappers::SparseMatrix            temperature_mass_matrix;
+    TrilinosWrappers::SparseMatrix            temperature_stiffness_matrix;
+    TrilinosWrappers::SparseMatrix            temperature_matrix;
+
+    TrilinosWrappers::MPI::Vector             temperature_solution;
+    TrilinosWrappers::MPI::Vector             old_temperature_solution;
+    TrilinosWrappers::MPI::Vector             old_old_temperature_solution;
+    TrilinosWrappers::MPI::Vector             temperature_rhs;
+
+
+    double                                    time_step;
+    double                                    old_time_step;
+    unsigned int                              timestep_number;
+
+    std_cxx11::shared_ptr<TrilinosWrappers::PreconditionAMG>    Amg_preconditioner;
+    std_cxx11::shared_ptr<TrilinosWrappers::PreconditionJacobi> Mp_preconditioner;
+    std_cxx11::shared_ptr<TrilinosWrappers::PreconditionJacobi> T_preconditioner;
+
+    bool                                      rebuild_stokes_matrix;
+    bool                                      rebuild_stokes_preconditioner;
+    bool                                      rebuild_temperature_matrices;
+    bool                                      rebuild_temperature_preconditioner;
+
+    // The next member variable, <code>computing_timer</code> is used to
+    // conveniently account for compute time spent in certain "sections" of
+    // the code that are repeatedly entered. For example, we will enter (and
+    // leave) sections for Stokes matrix assembly and would like to accumulate
+    // the run time spent in this section over all time steps. Every so many
+    // time steps as well as at the end of the program (through the destructor
+    // of the TimerOutput class) we will then produce a nice summary of the
+    // times spent in the different sections into which we categorize the
+    // run-time of this program.
+    TimerOutput                               computing_timer;
+
+    // After these member variables we have a number of auxiliary functions
+    // that have been broken out of the ones listed above. Specifically, there
+    // are first three functions that we call from <code>setup_dofs</code> and
+    // then the ones that do the assembling of linear systems:
+    void setup_stokes_matrix (const std::vector<IndexSet> &stokes_partitioning,
+                              const std::vector<IndexSet> &stokes_relevant_partitioning);
+    void setup_stokes_preconditioner (const std::vector<IndexSet> &stokes_partitioning,
+                                      const std::vector<IndexSet> &stokes_relevant_partitioning);
+    void setup_temperature_matrices (const IndexSet &temperature_partitioning,
+                                     const IndexSet &temperature_relevant_partitioning);
+
+
+    // Following the @ref MTWorkStream "task-based parallelization" paradigm,
+    // we split all the assembly routines into two parts: a first part that
+    // can do all the calculations on a certain cell without taking care of
+    // other threads, and a second part (which is writing the local data into
+    // the global matrices and vectors) which can be entered by only one
+    // thread at a time. In order to implement that, we provide functions for
+    // each of those two steps for all the four assembly routines that we use
+    // in this program. The following eight functions do exactly this:
+    void
+    local_assemble_stokes_preconditioner (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                          Assembly::Scratch::StokesPreconditioner<dim> &scratch,
+                                          Assembly::CopyData::StokesPreconditioner<dim> &data);
+
+    void
+    copy_local_to_global_stokes_preconditioner (const Assembly::CopyData::StokesPreconditioner<dim> &data);
+
+
+    void
+    local_assemble_stokes_system (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                  Assembly::Scratch::StokesSystem<dim>  &scratch,
+                                  Assembly::CopyData::StokesSystem<dim> &data);
+
+    void
+    copy_local_to_global_stokes_system (const Assembly::CopyData::StokesSystem<dim> &data);
+
+
+    void
+    local_assemble_temperature_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                       Assembly::Scratch::TemperatureMatrix<dim>  &scratch,
+                                       Assembly::CopyData::TemperatureMatrix<dim> &data);
+
+    void
+    copy_local_to_global_temperature_matrix (const Assembly::CopyData::TemperatureMatrix<dim> &data);
+
+
+
+    void
+    local_assemble_temperature_rhs (const std::pair<double,double> global_T_range,
+                                    const double                   global_max_velocity,
+                                    const double                   global_entropy_variation,
+                                    const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                    Assembly::Scratch::TemperatureRHS<dim> &scratch,
+                                    Assembly::CopyData::TemperatureRHS<dim> &data);
+
+    void
+    copy_local_to_global_temperature_rhs (const Assembly::CopyData::TemperatureRHS<dim> &data);
+
+    // Finally, we forward declare a member class that we will define later on
+    // and that will be used to compute a number of quantities from our
+    // solution vectors that we'd like to put into the output files for
+    // visualization.
+    class Postprocessor;
+  };
+
+
+  // @sect3{BoussinesqFlowProblem class implementation}
+
+  // @sect4{BoussinesqFlowProblem::Parameters}
+  //
+  // Here comes the definition of the parameters for the Stokes problem. We
+  // allow to set the end time for the simulation, the level of refinements
+  // (both global and adaptive, which in the sum specify what maximum level
+  // the cells are allowed to have), and the interval between refinements in
+  // the time stepping.
+  //
+  // Then, we let the user specify constants for the stabilization parameters
+  // (as discussed in the introduction), the polynomial degree for the Stokes
+  // velocity space, whether to use the locally conservative discretization
+  // based on FE_DGP elements for the pressure or not (FE_Q elements for
+  // pressure), and the polynomial degree for the temperature interpolation.
+  //
+  // The constructor checks for a valid input file (if not, a file with
+  // default parameters for the quantities is written), and eventually parses
+  // the parameters.
+  template <int dim>
+  BoussinesqFlowProblem<dim>::Parameters::Parameters (const std::string &parameter_filename)
+    :
+    end_time (1e8),
+    initial_global_refinement (2),
+    initial_adaptive_refinement (2),
+    adaptive_refinement_interval (10),
+    stabilization_alpha (2),
+    stabilization_c_R (0.11),
+    stabilization_beta (0.078),
+    stokes_velocity_degree (2),
+    use_locally_conservative_discretization (true),
+    temperature_degree (2)
+  {
+    ParameterHandler prm;
+    BoussinesqFlowProblem<dim>::Parameters::declare_parameters (prm);
+
+    std::ifstream parameter_file (parameter_filename.c_str());
+
+    if (!parameter_file)
+      {
+        parameter_file.close ();
+
+        std::ostringstream message;
+        message << "Input parameter file <"
+                << parameter_filename << "> not found. Creating a"
+                << std::endl
+                << "template file of the same name."
+                << std::endl;
+
+        std::ofstream parameter_out (parameter_filename.c_str());
+        prm.print_parameters (parameter_out,
+                              ParameterHandler::Text);
+
+        AssertThrow (false, ExcMessage (message.str().c_str()));
+      }
+
+    const bool success = prm.read_input (parameter_file);
+    AssertThrow (success, ExcMessage ("Invalid input parameter file."));
+
+    parse_parameters (prm);
+  }
+
+
+
+  // Next we have a function that declares the parameters that we expect in
+  // the input file, together with their data types, default values and a
+  // description:
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::Parameters::
+  declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry ("End time", "1e8",
+                       Patterns::Double (0),
+                       "The end time of the simulation in years.");
+    prm.declare_entry ("Initial global refinement", "2",
+                       Patterns::Integer (0),
+                       "The number of global refinement steps performed on "
+                       "the initial coarse mesh, before the problem is first "
+                       "solved there.");
+    prm.declare_entry ("Initial adaptive refinement", "2",
+                       Patterns::Integer (0),
+                       "The number of adaptive refinement steps performed after "
+                       "initial global refinement.");
+    prm.declare_entry ("Time steps between mesh refinement", "10",
+                       Patterns::Integer (1),
+                       "The number of time steps after which the mesh is to be "
+                       "adapted based on computed error indicators.");
+    prm.declare_entry ("Generate graphical output", "false",
+                       Patterns::Bool (),
+                       "Whether graphical output is to be generated or not. "
+                       "You may not want to get graphical output if the number "
+                       "of processors is large.");
+    prm.declare_entry ("Time steps between graphical output", "50",
+                       Patterns::Integer (1),
+                       "The number of time steps between each generation of "
+                       "graphical output files.");
+
+    prm.enter_subsection ("Stabilization parameters");
+    {
+      prm.declare_entry ("alpha", "2",
+                         Patterns::Double (1, 2),
+                         "The exponent in the entropy viscosity stabilization.");
+      prm.declare_entry ("c_R", "0.11",
+                         Patterns::Double (0),
+                         "The c_R factor in the entropy viscosity "
+                         "stabilization.");
+      prm.declare_entry ("beta", "0.078",
+                         Patterns::Double (0),
+                         "The beta factor in the artificial viscosity "
+                         "stabilization. An appropriate value for 2d is 0.052 "
+                         "and 0.078 for 3d.");
+    }
+    prm.leave_subsection ();
+
+    prm.enter_subsection ("Discretization");
+    {
+      prm.declare_entry ("Stokes velocity polynomial degree", "2",
+                         Patterns::Integer (1),
+                         "The polynomial degree to use for the velocity variables "
+                         "in the Stokes system.");
+      prm.declare_entry ("Temperature polynomial degree", "2",
+                         Patterns::Integer (1),
+                         "The polynomial degree to use for the temperature variable.");
+      prm.declare_entry ("Use locally conservative discretization", "true",
+                         Patterns::Bool (),
+                         "Whether to use a Stokes discretization that is locally "
+                         "conservative at the expense of a larger number of degrees "
+                         "of freedom, or to go with a cheaper discretization "
+                         "that does not locally conserve mass (although it is "
+                         "globally conservative.");
+    }
+    prm.leave_subsection ();
+  }
+
+
+
+  // And then we need a function that reads the contents of the
+  // ParameterHandler object we get by reading the input file and puts the
+  // results into variables that store the values of the parameters we have
+  // previously declared:
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::Parameters::
+  parse_parameters (ParameterHandler &prm)
+  {
+    end_time                    = prm.get_double ("End time");
+    initial_global_refinement   = prm.get_integer ("Initial global refinement");
+    initial_adaptive_refinement = prm.get_integer ("Initial adaptive refinement");
+
+    adaptive_refinement_interval= prm.get_integer ("Time steps between mesh refinement");
+
+    generate_graphical_output   = prm.get_bool ("Generate graphical output");
+    graphical_output_interval   = prm.get_integer ("Time steps between graphical output");
+
+    prm.enter_subsection ("Stabilization parameters");
+    {
+      stabilization_alpha = prm.get_double ("alpha");
+      stabilization_c_R   = prm.get_double ("c_R");
+      stabilization_beta  = prm.get_double ("beta");
+    }
+    prm.leave_subsection ();
+
+    prm.enter_subsection ("Discretization");
+    {
+      stokes_velocity_degree = prm.get_integer ("Stokes velocity polynomial degree");
+      temperature_degree     = prm.get_integer ("Temperature polynomial degree");
+      use_locally_conservative_discretization
+        = prm.get_bool ("Use locally conservative discretization");
+    }
+    prm.leave_subsection ();
+  }
+
+
+
+
+  // @sect4{BoussinesqFlowProblem::BoussinesqFlowProblem}
+  //
+  // The constructor of the problem is very similar to the constructor in
+  // step-31. What is different is the %parallel communication: Trilinos uses
+  // a message passing interface (MPI) for data distribution. When entering
+  // the BoussinesqFlowProblem class, we have to decide how the parallization
+  // is to be done. We choose a rather simple strategy and let all processors
+  // that are running the program work together, specified by the communicator
+  // <code>MPI_COMM_WORLD</code>. Next, we create the output stream (as we
+  // already did in step-18) that only generates output on the first MPI
+  // process and is completely forgetful on all others. The implementation of
+  // this idea is to check the process number when <code>pcout</code> gets a
+  // true argument, and it uses the <code>std::cout</code> stream for
+  // output. If we are one processor five, for instance, then we will give a
+  // <code>false</code> argument to <code>pcout</code>, which means that the
+  // output of that processor will not be printed. With the exception of the
+  // mapping object (for which we use polynomials of degree 4) all but the
+  // final member variable are exactly the same as in step-31.
+  //
+  // This final object, the TimerOutput object, is then told to restrict
+  // output to the <code>pcout</code> stream (processor 0), and then we
+  // specify that we want to get a summary table at the end of the program
+  // which shows us wallclock times (as opposed to CPU times). We will
+  // manually also request intermediate summaries every so many time steps in
+  // the <code>run()</code> function below.
+  template <int dim>
+  BoussinesqFlowProblem<dim>::BoussinesqFlowProblem (Parameters &parameters_)
+    :
+    parameters (parameters_),
+    pcout (std::cout,
+           (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD)
+            == 0)),
+
+    triangulation (MPI_COMM_WORLD,
+                   typename Triangulation<dim>::MeshSmoothing
+                   (Triangulation<dim>::smoothing_on_refinement |
+                    Triangulation<dim>::smoothing_on_coarsening)),
+
+    mapping (4),
+
+    stokes_fe (FE_Q<dim>(parameters.stokes_velocity_degree),
+               dim,
+               (parameters.use_locally_conservative_discretization
+                ?
+                static_cast<const FiniteElement<dim> &>
+                (FE_DGP<dim>(parameters.stokes_velocity_degree-1))
+                :
+                static_cast<const FiniteElement<dim> &>
+                (FE_Q<dim>(parameters.stokes_velocity_degree-1))),
+               1),
+
+    stokes_dof_handler (triangulation),
+
+    temperature_fe (parameters.temperature_degree),
+    temperature_dof_handler (triangulation),
+
+    time_step (0),
+    old_time_step (0),
+    timestep_number (0),
+    rebuild_stokes_matrix (true),
+    rebuild_stokes_preconditioner (true),
+    rebuild_temperature_matrices (true),
+    rebuild_temperature_preconditioner (true),
+
+    computing_timer (MPI_COMM_WORLD,
+                     pcout,
+                     TimerOutput::summary,
+                     TimerOutput::wall_times)
+  {}
+
+
+
+  // @sect4{The BoussinesqFlowProblem helper functions}
+  // @sect5{BoussinesqFlowProblem::get_maximal_velocity}
+
+  // Except for two small details, the function to compute the global maximum
+  // of the velocity is the same as in step-31. The first detail is actually
+  // common to all functions that implement loops over all cells in the
+  // triangulation: When operating in %parallel, each processor can only work
+  // on a chunk of cells since each processor only has a certain part of the
+  // entire triangulation. This chunk of cells that we want to work on is
+  // identified via a so-called <code>subdomain_id</code>, as we also did in
+  // step-18. All we need to change is hence to perform the cell-related
+  // operations only on cells that are owned by the current process (as
+  // opposed to ghost or artificial cells), i.e. for which the subdomain id
+  // equals the number of the process ID. Since this is a commonly used
+  // operation, there is a shortcut for this operation: we can ask whether the
+  // cell is owned by the current processor using
+  // <code>cell-@>is_locally_owned()</code>.
+  //
+  // The second difference is the way we calculate the maximum value. Before,
+  // we could simply have a <code>double</code> variable that we checked
+  // against on each quadrature point for each cell. Now, we have to be a bit
+  // more careful since each processor only operates on a subset of
+  // cells. What we do is to first let each processor calculate the maximum
+  // among its cells, and then do a global communication operation
+  // <code>Utilities::MPI::max</code> that computes the maximum value among
+  // all the maximum values of the individual processors. MPI provides such a
+  // call, but it's even simpler to use the respective function in namespace
+  // Utilities::MPI using the MPI communicator object since that will do the
+  // right thing even if we work without MPI and on a single machine only. The
+  // call to <code>Utilities::MPI::max</code> needs two arguments, namely the
+  // local maximum (input) and the MPI communicator, which is MPI_COMM_WORLD
+  // in this example.
+  template <int dim>
+  double BoussinesqFlowProblem<dim>::get_maximal_velocity () const
+  {
+    const QIterated<dim> quadrature_formula (QTrapez<1>(),
+                                             parameters.stokes_velocity_degree);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (mapping, stokes_fe, quadrature_formula, update_values);
+    std::vector<Tensor<1,dim> > velocity_values(n_q_points);
+
+    const FEValuesExtractors::Vector velocities (0);
+
+    double max_local_velocity = 0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = stokes_dof_handler.begin_active(),
+    endc = stokes_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          fe_values.reinit (cell);
+          fe_values[velocities].get_function_values (stokes_solution,
+                                                     velocity_values);
+
+          for (unsigned int q=0; q<n_q_points; ++q)
+            max_local_velocity = std::max (max_local_velocity,
+                                           velocity_values[q].norm());
+        }
+
+    return Utilities::MPI::max (max_local_velocity, MPI_COMM_WORLD);
+  }
+
+
+  // @sect5{BoussinesqFlowProblem::get_cfl_number}
+
+  // The next function does something similar, but we now compute the CFL
+  // number, i.e., maximal velocity on a cell divided by the cell
+  // diameter. This number is necessary to determine the time step size, as we
+  // use a semi-explicit time stepping scheme for the temperature equation
+  // (see step-31 for a discussion). We compute it in the same way as above:
+  // Compute the local maximum over all locally owned cells, then exchange it
+  // via MPI to find the global maximum.
+  template <int dim>
+  double BoussinesqFlowProblem<dim>::get_cfl_number () const
+  {
+    const QIterated<dim> quadrature_formula (QTrapez<1>(),
+                                             parameters.stokes_velocity_degree);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (mapping, stokes_fe, quadrature_formula, update_values);
+    std::vector<Tensor<1,dim> > velocity_values(n_q_points);
+
+    const FEValuesExtractors::Vector velocities (0);
+
+    double max_local_cfl = 0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = stokes_dof_handler.begin_active(),
+    endc = stokes_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          fe_values.reinit (cell);
+          fe_values[velocities].get_function_values (stokes_solution,
+                                                     velocity_values);
+
+          double max_local_velocity = 1e-10;
+          for (unsigned int q=0; q<n_q_points; ++q)
+            max_local_velocity = std::max (max_local_velocity,
+                                           velocity_values[q].norm());
+          max_local_cfl = std::max(max_local_cfl,
+                                   max_local_velocity / cell->diameter());
+        }
+
+    return Utilities::MPI::max (max_local_cfl, MPI_COMM_WORLD);
+  }
+
+
+  // @sect5{BoussinesqFlowProblem::get_entropy_variation}
+
+  // Next comes the computation of the global entropy variation
+  // $\|E(T)-\bar{E}(T)\|_\infty$ where the entropy $E$ is defined as
+  // discussed in the introduction.  This is needed for the evaluation of the
+  // stabilization in the temperature equation as explained in the
+  // introduction. The entropy variation is actually only needed if we use
+  // $\alpha=2$ as a power in the residual computation. The infinity norm is
+  // computed by the maxima over quadrature points, as usual in discrete
+  // computations.
+  //
+  // In order to compute this quantity, we first have to find the
+  // space-average $\bar{E}(T)$ and then evaluate the maximum. However, that
+  // means that we would need to perform two loops. We can avoid the overhead
+  // by noting that $\|E(T)-\bar{E}(T)\|_\infty =
+  // \max\big(E_{\textrm{max}}(T)-\bar{E}(T),
+  // \bar{E}(T)-E_{\textrm{min}}(T)\big)$, i.e., the maximum out of the
+  // deviation from the average entropy in positive and negative
+  // directions. The four quantities we need for the latter formula (maximum
+  // entropy, minimum entropy, average entropy, area) can all be evaluated in
+  // the same loop over all cells, so we choose this simpler variant.
+  template <int dim>
+  double
+  BoussinesqFlowProblem<dim>::get_entropy_variation (const double average_temperature) const
+  {
+    if (parameters.stabilization_alpha != 2)
+      return 1.;
+
+    const QGauss<dim> quadrature_formula (parameters.temperature_degree+1);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (temperature_fe, quadrature_formula,
+                             update_values | update_JxW_values);
+    std::vector<double> old_temperature_values(n_q_points);
+    std::vector<double> old_old_temperature_values(n_q_points);
+
+    // In the two functions above we computed the maximum of numbers that were
+    // all non-negative, so we knew that zero was certainly a lower bound. On
+    // the other hand, here we need to find the maximum deviation from the
+    // average value, i.e., we will need to know the maximal and minimal
+    // values of the entropy for which we don't a priori know the sign.
+    //
+    // To compute it, we can therefore start with the largest and smallest
+    // possible values we can store in a double precision number: The minimum
+    // is initialized with a bigger and the maximum with a smaller number than
+    // any one that is going to appear. We are then guaranteed that these
+    // numbers will be overwritten in the loop on the first cell or, if this
+    // processor does not own any cells, in the communication step at the
+    // latest. The following loop then computes the minimum and maximum local
+    // entropy as well as keeps track of the area/volume of the part of the
+    // domain we locally own and the integral over the entropy on it:
+    double min_entropy = std::numeric_limits<double>::max(),
+           max_entropy = -std::numeric_limits<double>::max(),
+           area = 0,
+           entropy_integrated = 0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = temperature_dof_handler.begin_active(),
+    endc = temperature_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          fe_values.reinit (cell);
+          fe_values.get_function_values (old_temperature_solution,
+                                         old_temperature_values);
+          fe_values.get_function_values (old_old_temperature_solution,
+                                         old_old_temperature_values);
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              const double T = (old_temperature_values[q] +
+                                old_old_temperature_values[q]) / 2;
+              const double entropy = ((T-average_temperature) *
+                                      (T-average_temperature));
+
+              min_entropy = std::min (min_entropy, entropy);
+              max_entropy = std::max (max_entropy, entropy);
+              area += fe_values.JxW(q);
+              entropy_integrated += fe_values.JxW(q) * entropy;
+            }
+        }
+
+    // Now we only need to exchange data between processors: we need to sum
+    // the two integrals (<code>area</code>, <code>entropy_integrated</code>),
+    // and get the extrema for maximum and minimum. We could do this through
+    // four different data exchanges, but we can it with two:
+    // Utilities::MPI::sum also exists in a variant that takes an array of
+    // values that are all to be summed up. And we can also utilize the
+    // Utilities::MPI::max function by realizing that forming the minimum over
+    // the minimal entropies equals forming the negative of the maximum over
+    // the negative of the minimal entropies; this maximum can then be
+    // combined with forming the maximum over the maximal entropies.
+    const double local_sums[2]   = { entropy_integrated, area },
+                                   local_maxima[2] = { -min_entropy, max_entropy };
+    double global_sums[2], global_maxima[2];
+
+    Utilities::MPI::sum (local_sums,   MPI_COMM_WORLD, global_sums);
+    Utilities::MPI::max (local_maxima, MPI_COMM_WORLD, global_maxima);
+
+    // Having computed everything this way, we can then compute the average
+    // entropy and find the $L^\infty$ norm by taking the larger of the
+    // deviation of the maximum or minimum from the average:
+    const double average_entropy = global_sums[0] / global_sums[1];
+    const double entropy_diff = std::max(global_maxima[1] - average_entropy,
+                                         average_entropy - (-global_maxima[0]));
+    return entropy_diff;
+  }
+
+
+
+  // @sect5{BoussinesqFlowProblem::get_extrapolated_temperature_range}
+
+  // The next function computes the minimal and maximal value of the
+  // extrapolated temperature over the entire domain. Again, this is only a
+  // slightly modified version of the respective function in step-31. As in
+  // the function above, we collect local minima and maxima and then compute
+  // the global extrema using the same trick as above.
+  //
+  // As already discussed in step-31, the function needs to distinguish
+  // between the first and all following time steps because it uses a higher
+  // order temperature extrapolation scheme when at least two previous time
+  // steps are available.
+  template <int dim>
+  std::pair<double,double>
+  BoussinesqFlowProblem<dim>::get_extrapolated_temperature_range () const
+  {
+    const QIterated<dim> quadrature_formula (QTrapez<1>(),
+                                             parameters.temperature_degree);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (mapping, temperature_fe, quadrature_formula,
+                             update_values);
+    std::vector<double> old_temperature_values(n_q_points);
+    std::vector<double> old_old_temperature_values(n_q_points);
+
+    double min_local_temperature = std::numeric_limits<double>::max(),
+           max_local_temperature = -std::numeric_limits<double>::max();
+
+    if (timestep_number != 0)
+      {
+        typename DoFHandler<dim>::active_cell_iterator
+        cell = temperature_dof_handler.begin_active(),
+        endc = temperature_dof_handler.end();
+        for (; cell!=endc; ++cell)
+          if (cell->is_locally_owned())
+            {
+              fe_values.reinit (cell);
+              fe_values.get_function_values (old_temperature_solution,
+                                             old_temperature_values);
+              fe_values.get_function_values (old_old_temperature_solution,
+                                             old_old_temperature_values);
+
+              for (unsigned int q=0; q<n_q_points; ++q)
+                {
+                  const double temperature =
+                    (1. + time_step/old_time_step) * old_temperature_values[q]-
+                    time_step/old_time_step * old_old_temperature_values[q];
+
+                  min_local_temperature = std::min (min_local_temperature,
+                                                    temperature);
+                  max_local_temperature = std::max (max_local_temperature,
+                                                    temperature);
+                }
+            }
+      }
+    else
+      {
+        typename DoFHandler<dim>::active_cell_iterator
+        cell = temperature_dof_handler.begin_active(),
+        endc = temperature_dof_handler.end();
+        for (; cell!=endc; ++cell)
+          if (cell->is_locally_owned())
+            {
+              fe_values.reinit (cell);
+              fe_values.get_function_values (old_temperature_solution,
+                                             old_temperature_values);
+
+              for (unsigned int q=0; q<n_q_points; ++q)
+                {
+                  const double temperature = old_temperature_values[q];
+
+                  min_local_temperature = std::min (min_local_temperature,
+                                                    temperature);
+                  max_local_temperature = std::max (max_local_temperature,
+                                                    temperature);
+                }
+            }
+      }
+
+    double local_extrema[2] = { -min_local_temperature,
+                                max_local_temperature
+                              };
+    double global_extrema[2];
+    Utilities::MPI::max (local_extrema, MPI_COMM_WORLD, global_extrema);
+
+    return std::make_pair(-global_extrema[0], global_extrema[1]);
+  }
+
+
+  // @sect5{BoussinesqFlowProblem::compute_viscosity}
+
+  // The function that calculates the viscosity is purely local and so needs
+  // no communication at all. It is mostly the same as in step-31 but with an
+  // updated formulation of the viscosity if $\alpha=2$ is chosen:
+  template <int dim>
+  double
+  BoussinesqFlowProblem<dim>::
+  compute_viscosity (const std::vector<double>          &old_temperature,
+                     const std::vector<double>          &old_old_temperature,
+                     const std::vector<Tensor<1,dim> >  &old_temperature_grads,
+                     const std::vector<Tensor<1,dim> >  &old_old_temperature_grads,
+                     const std::vector<double>          &old_temperature_laplacians,
+                     const std::vector<double>          &old_old_temperature_laplacians,
+                     const std::vector<Tensor<1,dim> >  &old_velocity_values,
+                     const std::vector<Tensor<1,dim> >  &old_old_velocity_values,
+                     const std::vector<SymmetricTensor<2,dim> >  &old_strain_rates,
+                     const std::vector<SymmetricTensor<2,dim> >  &old_old_strain_rates,
+                     const double                        global_u_infty,
+                     const double                        global_T_variation,
+                     const double                        average_temperature,
+                     const double                        global_entropy_variation,
+                     const double                        cell_diameter) const
+  {
+    if (global_u_infty == 0)
+      return 5e-3 * cell_diameter;
+
+    const unsigned int n_q_points = old_temperature.size();
+
+    double max_residual = 0;
+    double max_velocity = 0;
+
+    for (unsigned int q=0; q < n_q_points; ++q)
+      {
+        const Tensor<1,dim> u = (old_velocity_values[q] +
+                                 old_old_velocity_values[q]) / 2;
+
+        const SymmetricTensor<2,dim> strain_rate = (old_strain_rates[q] +
+                                                    old_old_strain_rates[q]) / 2;
+
+        const double T = (old_temperature[q] + old_old_temperature[q]) / 2;
+        const double dT_dt = (old_temperature[q] - old_old_temperature[q])
+                             / old_time_step;
+        const double u_grad_T = u * (old_temperature_grads[q] +
+                                     old_old_temperature_grads[q]) / 2;
+
+        const double kappa_Delta_T = EquationData::kappa
+                                     * (old_temperature_laplacians[q] +
+                                        old_old_temperature_laplacians[q]) / 2;
+        const double gamma
+          = ((EquationData::radiogenic_heating * EquationData::density(T)
+              +
+              2 * EquationData::eta * strain_rate * strain_rate) /
+             (EquationData::density(T) * EquationData::specific_heat));
+
+        double residual
+          = std::abs(dT_dt + u_grad_T - kappa_Delta_T - gamma);
+        if (parameters.stabilization_alpha == 2)
+          residual *= std::abs(T - average_temperature);
+
+        max_residual = std::max (residual,        max_residual);
+        max_velocity = std::max (std::sqrt (u*u), max_velocity);
+      }
+
+    const double max_viscosity = (parameters.stabilization_beta *
+                                  max_velocity * cell_diameter);
+    if (timestep_number == 0)
+      return max_viscosity;
+    else
+      {
+        Assert (old_time_step > 0, ExcInternalError());
+
+        double entropy_viscosity;
+        if (parameters.stabilization_alpha == 2)
+          entropy_viscosity = (parameters.stabilization_c_R *
+                               cell_diameter * cell_diameter *
+                               max_residual /
+                               global_entropy_variation);
+        else
+          entropy_viscosity = (parameters.stabilization_c_R *
+                               cell_diameter * global_Omega_diameter *
+                               max_velocity * max_residual /
+                               (global_u_infty * global_T_variation));
+
+        return std::min (max_viscosity, entropy_viscosity);
+      }
+  }
+
+
+
+  // @sect5{BoussinesqFlowProblem::project_temperature_field}
+
+  // This function is new compared to step-31. What is does is to re-implement
+  // the library function <code>VectorTools::project()</code> for an MPI-based
+  // parallelization, a function we used for generating an initial vector for
+  // temperature based on some initial function. The library function only
+  // works with shared memory but doesn't know how to utilize multiple
+  // machines coupled through MPI to compute the projected field. The details
+  // of a <code>project()</code> function are not very difficult. All we do is
+  // to use a mass matrix and put the evaluation of the initial value function
+  // on the right hand side. The mass matrix for temperature we can simply
+  // generate using the respective assembly function, so all we need to do
+  // here is to create the right hand side and do a CG solve. The assembly
+  // function does a loop over all cells and evaluates the function in the
+  // <code>EquationData</code> namespace, and does this only on cells owned by
+  // the respective processor. The implementation of this assembly differs
+  // from the assembly we do for the principal assembly functions further down
+  // (which include thread-based parallelization with the WorkStream
+  // concept). Here we chose to keep things simple (keeping in mind that this
+  // function is also only called once at the beginning of the program, not in
+  // every time step), and generating the right hand side is cheap anyway so
+  // we won't even notice that this part is not parallized by threads.
+  //
+  // Regarding the implementation of inhomogeneous Dirichlet boundary
+  // conditions: Since we use the temperature ConstraintMatrix, we could apply
+  // the boundary conditions directly when building the respective matrix and
+  // right hand side. In this case, the boundary conditions are inhomogeneous,
+  // which makes this procedure somewhat tricky since we get the matrix from
+  // some other function that uses its own integration and assembly
+  // loop. However, the correct imposition of boundary conditions needs the
+  // matrix data we work on plus the right hand side simultaneously, since the
+  // right hand side is created by Gaussian elimination on the matrix rows. In
+  // order to not introduce the matrix assembly at this place, but still
+  // having the matrix data available, we choose to create a dummy matrix
+  // <code>matrix_for_bc</code> that we only fill with data when we need it
+  // for imposing boundary conditions. These positions are exactly those where
+  // we have an inhomogeneous entry in the ConstraintMatrix. There are only a
+  // few such positions (on the boundary DoFs), so it is still much cheaper to
+  // use this function than to create the full matrix here. To implement this,
+  // we ask the constraint matrix whether the DoF under consideration is
+  // inhomogeneously constrained. In that case, we generate the respective
+  // matrix column that we need for creating the correct right hand side. Note
+  // that this (manually generated) matrix entry needs to be exactly the entry
+  // that we would fill the matrix with — otherwise, this will not work.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::project_temperature_field ()
+  {
+    assemble_temperature_matrix ();
+
+    QGauss<dim> quadrature(parameters.temperature_degree+2);
+    UpdateFlags update_flags = UpdateFlags(update_values   |
+                                           update_quadrature_points |
+                                           update_JxW_values);
+    FEValues<dim> fe_values (mapping, temperature_fe, quadrature, update_flags);
+
+    const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                       n_q_points    = fe_values.n_quadrature_points;
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    Vector<double> cell_vector (dofs_per_cell);
+    FullMatrix<double> matrix_for_bc (dofs_per_cell, dofs_per_cell);
+
+    std::vector<double> rhs_values(n_q_points);
+
+    IndexSet row_temp_matrix_partitioning(temperature_mass_matrix.n());
+    row_temp_matrix_partitioning.add_range(temperature_mass_matrix.local_range().first,
+                                           temperature_mass_matrix.local_range().second);
+    TrilinosWrappers::MPI::Vector rhs (row_temp_matrix_partitioning),
+                     solution (row_temp_matrix_partitioning);
+
+
+    const EquationData::TemperatureInitialValues<dim> initial_temperature;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = temperature_dof_handler.begin_active(),
+    endc = temperature_dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          cell->get_dof_indices (local_dof_indices);
+          fe_values.reinit (cell);
+
+          initial_temperature.value_list (fe_values.get_quadrature_points(),
+                                          rhs_values);
+
+          cell_vector = 0;
+          matrix_for_bc = 0;
+          for (unsigned int point=0; point<n_q_points; ++point)
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              {
+                cell_vector(i) += rhs_values[point] *
+                                  fe_values.shape_value(i,point) *
+                                  fe_values.JxW(point);
+                if (temperature_constraints.is_inhomogeneously_constrained(local_dof_indices[i]))
+                  {
+                    for (unsigned int j=0; j<dofs_per_cell; ++j)
+                      matrix_for_bc(j,i) += fe_values.shape_value(i,point) *
+                                            fe_values.shape_value(j,point) *
+                                            fe_values.JxW(point);
+                  }
+              }
+
+          temperature_constraints.distribute_local_to_global (cell_vector,
+                                                              local_dof_indices,
+                                                              rhs,
+                                                              matrix_for_bc);
+        }
+
+    rhs.compress (VectorOperation::add);
+
+    // Now that we have the right linear system, we solve it using the CG
+    // method with a simple Jacobi preconditioner:
+    SolverControl solver_control(5*rhs.size(), 1e-12*rhs.l2_norm());
+    SolverCG<TrilinosWrappers::MPI::Vector> cg(solver_control);
+
+    TrilinosWrappers::PreconditionJacobi preconditioner_mass;
+    preconditioner_mass.initialize(temperature_mass_matrix, 1.3);
+
+    cg.solve (temperature_mass_matrix, solution, rhs, preconditioner_mass);
+
+    temperature_constraints.distribute (solution);
+
+    // Having so computed the current temperature field, let us set the member
+    // variable that holds the temperature nodes. Strictly speaking, we really
+    // only need to set <code>old_temperature_solution</code> since the first
+    // thing we will do is to compute the Stokes solution that only requires
+    // the previous time step's temperature field. That said, nothing good can
+    // come from not initializing the other vectors as well (especially since
+    // it's a relatively cheap operation and we only have to do it once at the
+    // beginning of the program) if we ever want to extend our numerical
+    // method or physical model, and so we initialize
+    // <code>temperature_solution</code> and
+    // <code>old_old_temperature_solution</code> as well. As a sidenote, while
+    // the <code>solution</code> vector is strictly distributed (i.e. each
+    // processor only stores a mutually exclusive subset of elements), the
+    // assignment makes sure that the vectors on the left hand side (which
+    // where initialized to contain ghost elements as well) also get the
+    // correct ghost elements. In other words, the assignment here requires
+    // communication between processors:
+    temperature_solution = solution;
+    old_temperature_solution = solution;
+    old_old_temperature_solution = solution;
+  }
+
+
+
+
+  // @sect4{The BoussinesqFlowProblem setup functions}
+
+  // The following three functions set up the Stokes matrix, the matrix used
+  // for the Stokes preconditioner, and the temperature matrix. The code is
+  // mostly the same as in step-31, but it has been broken out into three
+  // functions of their own for simplicity.
+  //
+  // The main functional difference between the code here and that in step-31
+  // is that the matrices we want to set up are distributed across multiple
+  // processors. Since we still want to build up the sparsity pattern first
+  // for efficiency reasons, we could continue to build the <i>entire</i>
+  // sparsity pattern as a BlockDynamicSparsityPattern, as we did in
+  // step-31. However, that would be inefficient: every processor would build
+  // the same sparsity pattern, but only initialize a small part of the matrix
+  // using it. It also violates the principle that every processor should only
+  // work on those cells it owns (and, if necessary the layer of ghost cells
+  // around it).
+  //
+  // Rather, we use an object of type TrilinosWrappers::BlockSparsityPattern,
+  // which is (obviously) a wrapper around a sparsity pattern object provided
+  // by Trilinos. The advantage is that the Trilinos sparsity pattern class
+  // can communicate across multiple processors: if this processor fills in
+  // all the nonzero entries that result from the cells it owns, and every
+  // other processor does so as well, then at the end after some MPI
+  // communication initiated by the <code>compress()</code> call, we will have
+  // the globally assembled sparsity pattern available with which the global
+  // matrix can be initialized.
+  //
+  // There is one important aspect when initializing Trilinos sparsity
+  // patterns in parallel: In addition to specifying the locally owned rows
+  // and columns of the matrices via the @p stokes_partitioning index set, we
+  // also supply information about all the rows we are possibly going to write
+  // into when assembling on a certain processor. The set of locally relevant
+  // rows contains all such rows (possibly also a few unnecessary ones, but it
+  // is difficult to find the exact row indices before actually getting
+  // indices on all cells and resolving constraints). This additional
+  // information allows to exactly determine the structure for the
+  // off-processor data found during assembly. While Trilinos matrices are
+  // able to collect this information on the fly as well (when initializing
+  // them from some other reinit method), it is less efficient and leads to
+  // problems when assembling matrices with multiple threads. In this program,
+  // we pessimistically assume that only one processor at a time can write
+  // into the matrix while assembly (whereas the computation is parallel),
+  // which is fine for Trilinos matrices. In practice, one can do better by
+  // hinting WorkStream at cells that do not share vertices, allowing for
+  // parallelism among those cells (see the graph coloring algorithms and
+  // WorkStream with colored iterators argument). However, that only works
+  // when only one MPI processor is present because Trilinos' internal data
+  // structures for accumulating off-processor data on the fly are not thread
+  // safe. With the initialization presented here, there is no such problem
+  // and one could safely introduce graph coloring for this algorithm.
+  //
+  // The only other change we need to make is to tell the
+  // DoFTools::make_sparsity_pattern() function that it is only supposed to
+  // work on a subset of cells, namely the ones whose
+  // <code>subdomain_id</code> equals the number of the current processor, and
+  // to ignore all other cells.
+  //
+  // This strategy is replicated across all three of the following functions.
+  //
+  // Note that Trilinos matrices store the information contained in the
+  // sparsity patterns, so we can safely release the <code>sp</code> variable
+  // once the matrix has been given the sparsity structure.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::
+  setup_stokes_matrix (const std::vector<IndexSet> &stokes_partitioning,
+                       const std::vector<IndexSet> &stokes_relevant_partitioning)
+  {
+    stokes_matrix.clear ();
+
+    TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning, stokes_partitioning,
+                                              stokes_relevant_partitioning,
+                                              MPI_COMM_WORLD);
+
+    Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+    for (unsigned int c=0; c<dim+1; ++c)
+      for (unsigned int d=0; d<dim+1; ++d)
+        if (! ((c==dim) && (d==dim)))
+          coupling[c][d] = DoFTools::always;
+        else
+          coupling[c][d] = DoFTools::none;
+
+    DoFTools::make_sparsity_pattern (stokes_dof_handler,
+                                     coupling, sp,
+                                     stokes_constraints, false,
+                                     Utilities::MPI::
+                                     this_mpi_process(MPI_COMM_WORLD));
+    sp.compress();
+
+    stokes_matrix.reinit (sp);
+  }
+
+
+
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::
+  setup_stokes_preconditioner (const std::vector<IndexSet> &stokes_partitioning,
+                               const std::vector<IndexSet> &stokes_relevant_partitioning)
+  {
+    Amg_preconditioner.reset ();
+    Mp_preconditioner.reset ();
+
+    stokes_preconditioner_matrix.clear ();
+
+    TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning, stokes_partitioning,
+                                              stokes_relevant_partitioning,
+                                              MPI_COMM_WORLD);
+
+    Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+    for (unsigned int c=0; c<dim+1; ++c)
+      for (unsigned int d=0; d<dim+1; ++d)
+        if (c == d)
+          coupling[c][d] = DoFTools::always;
+        else
+          coupling[c][d] = DoFTools::none;
+
+    DoFTools::make_sparsity_pattern (stokes_dof_handler,
+                                     coupling, sp,
+                                     stokes_constraints, false,
+                                     Utilities::MPI::
+                                     this_mpi_process(MPI_COMM_WORLD));
+    sp.compress();
+
+    stokes_preconditioner_matrix.reinit (sp);
+  }
+
+
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::
+  setup_temperature_matrices (const IndexSet &temperature_partitioner,
+                              const IndexSet &temperature_relevant_partitioner)
+  {
+    T_preconditioner.reset ();
+    temperature_mass_matrix.clear ();
+    temperature_stiffness_matrix.clear ();
+    temperature_matrix.clear ();
+
+    TrilinosWrappers::SparsityPattern sp(temperature_partitioner,
+                                         temperature_partitioner,
+                                         temperature_relevant_partitioner,
+                                         MPI_COMM_WORLD);
+    DoFTools::make_sparsity_pattern (temperature_dof_handler, sp,
+                                     temperature_constraints, false,
+                                     Utilities::MPI::
+                                     this_mpi_process(MPI_COMM_WORLD));
+    sp.compress();
+
+    temperature_matrix.reinit (sp);
+    temperature_mass_matrix.reinit (sp);
+    temperature_stiffness_matrix.reinit (sp);
+  }
+
+
+
+  // The remainder of the setup function (after splitting out the three
+  // functions above) mostly has to deal with the things we need to do for
+  // parallelization across processors. Because setting all of this up is a
+  // significant compute time expense of the program, we put everything we do
+  // here into a timer group so that we can get summary information about the
+  // fraction of time spent in this part of the program at its end.
+  //
+  // At the top as usual we enumerate degrees of freedom and sort them by
+  // component/block, followed by writing their numbers to the screen from
+  // processor zero. The DoFHandler::distributed_dofs() function, when applied
+  // to a parallel::distributed::Triangulation object, sorts degrees of
+  // freedom in such a way that all degrees of freedom associated with
+  // subdomain zero come before all those associated with subdomain one,
+  // etc. For the Stokes part, this entails, however, that velocities and
+  // pressures become intermixed, but this is trivially solved by sorting
+  // again by blocks; it is worth noting that this latter operation leaves the
+  // relative ordering of all velocities and pressures alone, i.e. within the
+  // velocity block we will still have all those associated with subdomain
+  // zero before all velocities associated with subdomain one, etc. This is
+  // important since we store each of the blocks of this matrix distributed
+  // across all processors and want this to be done in such a way that each
+  // processor stores that part of the matrix that is roughly equal to the
+  // degrees of freedom located on those cells that it will actually work on.
+  //
+  // When printing the numbers of degrees of freedom, note that these numbers
+  // are going to be large if we use many processors. Consequently, we let the
+  // stream put a comma separator in between every three digits. The state of
+  // the stream, using the locale, is saved from before to after this
+  // operation. While slightly opaque, the code works because the default
+  // locale (which we get using the constructor call
+  // <code>std::locale("")</code>) implies printing numbers with a comma
+  // separator for every third digit (i.e., thousands, millions, billions).
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::setup_dofs ()
+  {
+    computing_timer.enter_section("Setup dof systems");
+
+    std::vector<unsigned int> stokes_sub_blocks (dim+1,0);
+    stokes_sub_blocks[dim] = 1;
+    stokes_dof_handler.distribute_dofs (stokes_fe);
+    DoFRenumbering::component_wise (stokes_dof_handler, stokes_sub_blocks);
+
+    temperature_dof_handler.distribute_dofs (temperature_fe);
+
+    std::vector<types::global_dof_index> stokes_dofs_per_block (2);
+    DoFTools::count_dofs_per_block (stokes_dof_handler, stokes_dofs_per_block,
+                                    stokes_sub_blocks);
+
+    const unsigned int n_u = stokes_dofs_per_block[0],
+                       n_p = stokes_dofs_per_block[1],
+                       n_T = temperature_dof_handler.n_dofs();
+
+    std::locale s = pcout.get_stream().getloc();
+    pcout.get_stream().imbue(std::locale(""));
+    pcout << "Number of active cells: "
+          << triangulation.n_global_active_cells()
+          << " (on "
+          << triangulation.n_levels()
+          << " levels)"
+          << std::endl
+          << "Number of degrees of freedom: "
+          << n_u + n_p + n_T
+          << " (" << n_u << '+' << n_p << '+'<< n_T <<')'
+          << std::endl
+          << std::endl;
+    pcout.get_stream().imbue(s);
+
+
+    // After this, we have to set up the various partitioners (of type
+    // <code>IndexSet</code>, see the introduction) that describe which parts
+    // of each matrix or vector will be stored where, then call the functions
+    // that actually set up the matrices, and at the end also resize the
+    // various vectors we keep around in this program.
+    std::vector<IndexSet> stokes_partitioning, stokes_relevant_partitioning;
+    IndexSet temperature_partitioning (n_T), temperature_relevant_partitioning (n_T);
+    IndexSet stokes_relevant_set;
+    {
+      IndexSet stokes_index_set = stokes_dof_handler.locally_owned_dofs();
+      stokes_partitioning.push_back(stokes_index_set.get_view(0,n_u));
+      stokes_partitioning.push_back(stokes_index_set.get_view(n_u,n_u+n_p));
+
+      DoFTools::extract_locally_relevant_dofs (stokes_dof_handler,
+                                               stokes_relevant_set);
+      stokes_relevant_partitioning.push_back(stokes_relevant_set.get_view(0,n_u));
+      stokes_relevant_partitioning.push_back(stokes_relevant_set.get_view(n_u,n_u+n_p));
+
+      temperature_partitioning = temperature_dof_handler.locally_owned_dofs();
+      DoFTools::extract_locally_relevant_dofs (temperature_dof_handler,
+                                               temperature_relevant_partitioning);
+    }
+
+    // Following this, we can compute constraints for the solution vectors,
+    // including hanging node constraints and homogeneous and inhomogeneous
+    // boundary values for the Stokes and temperature fields. Note that as for
+    // everything else, the constraint objects can not hold <i>all</i>
+    // constraints on every processor. Rather, each processor needs to store
+    // only those that are actually necessary for correctness given that it
+    // only assembles linear systems on cells it owns. As discussed in the
+    // @ref distributed_paper "this paper", the set of constraints we need to
+    // know about is exactly the set of constraints on all locally relevant
+    // degrees of freedom, so this is what we use to initialize the constraint
+    // objects.
+    {
+      stokes_constraints.clear ();
+      stokes_constraints.reinit (stokes_relevant_set);
+
+      DoFTools::make_hanging_node_constraints (stokes_dof_handler,
+                                               stokes_constraints);
+
+      FEValuesExtractors::Vector velocity_components(0);
+      VectorTools::interpolate_boundary_values (stokes_dof_handler,
+                                                0,
+                                                ZeroFunction<dim>(dim+1),
+                                                stokes_constraints,
+                                                stokes_fe.component_mask(velocity_components));
+
+      std::set<types::boundary_id> no_normal_flux_boundaries;
+      no_normal_flux_boundaries.insert (1);
+      VectorTools::compute_no_normal_flux_constraints (stokes_dof_handler, 0,
+                                                       no_normal_flux_boundaries,
+                                                       stokes_constraints,
+                                                       mapping);
+      stokes_constraints.close ();
+    }
+    {
+      temperature_constraints.clear ();
+      temperature_constraints.reinit (temperature_relevant_partitioning);
+
+      DoFTools::make_hanging_node_constraints (temperature_dof_handler,
+                                               temperature_constraints);
+      VectorTools::interpolate_boundary_values (temperature_dof_handler,
+                                                0,
+                                                EquationData::TemperatureInitialValues<dim>(),
+                                                temperature_constraints);
+      VectorTools::interpolate_boundary_values (temperature_dof_handler,
+                                                1,
+                                                EquationData::TemperatureInitialValues<dim>(),
+                                                temperature_constraints);
+      temperature_constraints.close ();
+    }
+
+    // All this done, we can then initialize the various matrix and vector
+    // objects to their proper sizes. At the end, we also record that all
+    // matrices and preconditioners have to be re-computed at the beginning of
+    // the next time step. Note how we initialize the vectors for the Stokes
+    // and temperature right hand sides: These are writable vectors (last
+    // boolean argument set to @p true) that have the correct one-to-one
+    // partitioning of locally owned elements but are still given the relevant
+    // partitioning for means of figuring out the vector entries that are
+    // going to be set right away. As for matrices, this allows for writing
+    // local contributions into the vector with multiple threads (always
+    // assuming that the same vector entry is not accessed by multiple threads
+    // at the same time). The other vectors only allow for read access of
+    // individual elements, including ghosts, but are not suitable for
+    // solvers.
+    setup_stokes_matrix (stokes_partitioning, stokes_relevant_partitioning);
+    setup_stokes_preconditioner (stokes_partitioning,
+                                 stokes_relevant_partitioning);
+    setup_temperature_matrices (temperature_partitioning,
+                                temperature_relevant_partitioning);
+
+    stokes_rhs.reinit (stokes_partitioning, stokes_relevant_partitioning,
+                       MPI_COMM_WORLD, true);
+    stokes_solution.reinit (stokes_relevant_partitioning, MPI_COMM_WORLD);
+    old_stokes_solution.reinit (stokes_solution);
+
+    temperature_rhs.reinit (temperature_partitioning,
+                            temperature_relevant_partitioning,
+                            MPI_COMM_WORLD, true);
+    temperature_solution.reinit (temperature_relevant_partitioning, MPI_COMM_WORLD);
+    old_temperature_solution.reinit (temperature_solution);
+    old_old_temperature_solution.reinit (temperature_solution);
+
+    rebuild_stokes_matrix              = true;
+    rebuild_stokes_preconditioner      = true;
+    rebuild_temperature_matrices       = true;
+    rebuild_temperature_preconditioner = true;
+
+    computing_timer.exit_section();
+  }
+
+
+
+  // @sect4{The BoussinesqFlowProblem assembly functions}
+  //
+  // Following the discussion in the introduction and in the @ref threads
+  // module, we split the assembly functions into different parts:
+  //
+  // <ul> <li> The local calculations of matrices and right hand sides, given
+  // a certain cell as input (these functions are named
+  // <code>local_assemble_*</code> below). The resulting function is, in other
+  // words, essentially the body of the loop over all cells in step-31. Note,
+  // however, that these functions store the result from the local
+  // calculations in variables of classes from the CopyData namespace.
+  //
+  // <li>These objects are then given to the second step which writes the
+  // local data into the global data structures (these functions are named
+  // <code>copy_local_to_global_*</code> below). These functions are pretty
+  // trivial.
+  //
+  // <li>These two subfunctions are then used in the respective assembly
+  // routine (called <code>assemble_*</code> below), where a WorkStream object
+  // is set up and runs over all the cells that belong to the processor's
+  // subdomain.  </ul>
+
+  // @sect5{Stokes preconditioner assembly}
+  //
+  // Let us start with the functions that builds the Stokes
+  // preconditioner. The first two of these are pretty trivial, given the
+  // discussion above. Note in particular that the main point in using the
+  // scratch data object is that we want to avoid allocating any objects on
+  // the free space each time we visit a new cell. As a consequence, the
+  // assembly function below only has automatic local variables, and
+  // everything else is accessed through the scratch data object, which is
+  // allocated only once before we start the loop over all cells:
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::
+  local_assemble_stokes_preconditioner (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                        Assembly::Scratch::StokesPreconditioner<dim> &scratch,
+                                        Assembly::CopyData::StokesPreconditioner<dim> &data)
+  {
+    const unsigned int   dofs_per_cell   = stokes_fe.dofs_per_cell;
+    const unsigned int   n_q_points      = scratch.stokes_fe_values.n_quadrature_points;
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    scratch.stokes_fe_values.reinit (cell);
+    cell->get_dof_indices (data.local_dof_indices);
+
+    data.local_matrix = 0;
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      {
+        for (unsigned int k=0; k<dofs_per_cell; ++k)
+          {
+            scratch.grad_phi_u[k] = scratch.stokes_fe_values[velocities].gradient(k,q);
+            scratch.phi_p[k]      = scratch.stokes_fe_values[pressure].value (k, q);
+          }
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            data.local_matrix(i,j) += (EquationData::eta *
+                                       scalar_product (scratch.grad_phi_u[i],
+                                                       scratch.grad_phi_u[j])
+                                       +
+                                       (1./EquationData::eta) *
+                                       EquationData::pressure_scaling *
+                                       EquationData::pressure_scaling *
+                                       (scratch.phi_p[i] * scratch.phi_p[j]))
+                                      * scratch.stokes_fe_values.JxW(q);
+      }
+  }
+
+
+
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::
+  copy_local_to_global_stokes_preconditioner (const Assembly::CopyData::StokesPreconditioner<dim> &data)
+  {
+    stokes_constraints.distribute_local_to_global (data.local_matrix,
+                                                   data.local_dof_indices,
+                                                   stokes_preconditioner_matrix);
+  }
+
+
+  // Now for the function that actually puts things together, using the
+  // WorkStream functions.  WorkStream::run needs a start and end iterator to
+  // enumerate the cells it is supposed to work on. Typically, one would use
+  // DoFHandler::begin_active() and DoFHandler::end() for that but here we
+  // actually only want the subset of cells that in fact are owned by the
+  // current processor. This is where the FilteredIterator class comes into
+  // play: you give it a range of cells and it provides an iterator that only
+  // iterates over that subset of cells that satisfy a certain predicate (a
+  // predicate is a function of one argument that either returns true or
+  // false). The predicate we use here is IteratorFilters::LocallyOwnedCell,
+  // i.e., it returns true exactly if the cell is owned by the current
+  // processor. The resulting iterator range is then exactly what we need.
+  //
+  // With this obstacle out of the way, we call the WorkStream::run
+  // function with this set of cells, scratch and copy objects, and
+  // with pointers to two functions: the local assembly and
+  // copy-local-to-global function. These functions need to have very
+  // specific signatures: three arguments in the first and one
+  // argument in the latter case (see the documentation of the
+  // WorkStream::run function for the meaning of these arguments).
+  // Note how we use the construct <code>std_cxx11::bind</code> to
+  // create a function object that satisfies this requirement. It uses
+  // placeholders <code>std_cxx11::_1, std_cxx11::_2,
+  // std_cxx11::_3</code> for the local assembly function that specify
+  // cell, scratch data, and copy data, as well as the placeholder
+  // <code>std_cxx11::_1</code> for the copy function that expects the
+  // data to be written into the global matrix (for placeholder
+  // arguments, also see the discussion in step-13's
+  // <code>assemble_linear_system()</code> function). On the other
+  // hand, the implicit zeroth argument of member functions (namely
+  // the <code>this</code> pointer of the object on which that member
+  // function is to operate on) is <i>bound</i> to the
+  // <code>this</code> pointer of the current function. The
+  // WorkStream::run function, as a consequence, does not need to know
+  // anything about the object these functions work on.
+  //
+  // When the WorkStream is executed, it will create several local assembly
+  // routines of the first kind for several cells and let some available
+  // processors work on them. The function that needs to be synchronized,
+  // i.e., the write operation into the global matrix, however, is executed by
+  // only one thread at a time in the prescribed order. Of course, this only
+  // holds for the parallelization on a single MPI process. Different MPI
+  // processes will have their own WorkStream objects and do that work
+  // completely independently (and in different memory spaces). In a
+  // distributed calculation, some data will accumulate at degrees of freedom
+  // that are not owned by the respective processor. It would be inefficient
+  // to send data around every time we encounter such a dof. What happens
+  // instead is that the Trilinos sparse matrix will keep that data and send
+  // it to the owner at the end of assembly, by calling the
+  // <code>compress()</code> command.
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::assemble_stokes_preconditioner ()
+  {
+    stokes_preconditioner_matrix = 0;
+
+    const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree+1);
+
+    typedef
+    FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+    CellFilter;
+
+    WorkStream::
+    run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     stokes_dof_handler.begin_active()),
+         CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     stokes_dof_handler.end()),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          local_assemble_stokes_preconditioner,
+                          this,
+                          std_cxx11::_1,
+                          std_cxx11::_2,
+                          std_cxx11::_3),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          copy_local_to_global_stokes_preconditioner,
+                          this,
+                          std_cxx11::_1),
+         Assembly::Scratch::
+         StokesPreconditioner<dim> (stokes_fe, quadrature_formula,
+                                    mapping,
+                                    update_JxW_values |
+                                    update_values |
+                                    update_gradients),
+         Assembly::CopyData::
+         StokesPreconditioner<dim> (stokes_fe));
+
+    stokes_preconditioner_matrix.compress(VectorOperation::add);
+  }
+
+
+
+  // The final function in this block initiates assembly of the Stokes
+  // preconditioner matrix and then in fact builds the Stokes
+  // preconditioner. It is mostly the same as in the serial case. The only
+  // difference to step-31 is that we use a Jacobi preconditioner for the
+  // pressure mass matrix instead of IC, as discussed in the introduction.
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::build_stokes_preconditioner ()
+  {
+    if (rebuild_stokes_preconditioner == false)
+      return;
+
+    computing_timer.enter_section ("   Build Stokes preconditioner");
+    pcout << "   Rebuilding Stokes preconditioner..." << std::flush;
+
+    assemble_stokes_preconditioner ();
+
+    std::vector<std::vector<bool> > constant_modes;
+    FEValuesExtractors::Vector velocity_components(0);
+    DoFTools::extract_constant_modes (stokes_dof_handler,
+                                      stokes_fe.component_mask(velocity_components),
+                                      constant_modes);
+
+    Mp_preconditioner.reset  (new TrilinosWrappers::PreconditionJacobi());
+    Amg_preconditioner.reset (new TrilinosWrappers::PreconditionAMG());
+
+    TrilinosWrappers::PreconditionAMG::AdditionalData Amg_data;
+    Amg_data.constant_modes = constant_modes;
+    Amg_data.elliptic = true;
+    Amg_data.higher_order_elements = true;
+    Amg_data.smoother_sweeps = 2;
+    Amg_data.aggregation_threshold = 0.02;
+
+    Mp_preconditioner->initialize (stokes_preconditioner_matrix.block(1,1));
+    Amg_preconditioner->initialize (stokes_preconditioner_matrix.block(0,0),
+                                    Amg_data);
+
+    rebuild_stokes_preconditioner = false;
+
+    pcout << std::endl;
+    computing_timer.exit_section();
+  }
+
+
+  // @sect5{Stokes system assembly}
+
+  // The next three functions implement the assembly of the Stokes system,
+  // again split up into a part performing local calculations, one for writing
+  // the local data into the global matrix and vector, and one for actually
+  // running the loop over all cells with the help of the WorkStream
+  // class. Note that the assembly of the Stokes matrix needs only to be done
+  // in case we have changed the mesh. Otherwise, just the
+  // (temperature-dependent) right hand side needs to be calculated
+  // here. Since we are working with distributed matrices and vectors, we have
+  // to call the respective <code>compress()</code> functions in the end of
+  // the assembly in order to send non-local data to the owner process.
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::
+  local_assemble_stokes_system (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                Assembly::Scratch::StokesSystem<dim> &scratch,
+                                Assembly::CopyData::StokesSystem<dim> &data)
+  {
+    const unsigned int dofs_per_cell = scratch.stokes_fe_values.get_fe().dofs_per_cell;
+    const unsigned int n_q_points    = scratch.stokes_fe_values.n_quadrature_points;
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    scratch.stokes_fe_values.reinit (cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    temperature_cell (&triangulation,
+                      cell->level(),
+                      cell->index(),
+                      &temperature_dof_handler);
+    scratch.temperature_fe_values.reinit (temperature_cell);
+
+    if (rebuild_stokes_matrix)
+      data.local_matrix = 0;
+    data.local_rhs = 0;
+
+    scratch.temperature_fe_values.get_function_values (old_temperature_solution,
+                                                       scratch.old_temperature_values);
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      {
+        const double old_temperature = scratch.old_temperature_values[q];
+
+        for (unsigned int k=0; k<dofs_per_cell; ++k)
+          {
+            scratch.phi_u[k] = scratch.stokes_fe_values[velocities].value (k,q);
+            if (rebuild_stokes_matrix)
+              {
+                scratch.grads_phi_u[k] = scratch.stokes_fe_values[velocities].symmetric_gradient(k,q);
+                scratch.div_phi_u[k]   = scratch.stokes_fe_values[velocities].divergence (k, q);
+                scratch.phi_p[k]       = scratch.stokes_fe_values[pressure].value (k, q);
+              }
+          }
+
+        if (rebuild_stokes_matrix == true)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              data.local_matrix(i,j) += (EquationData::eta * 2 *
+                                         (scratch.grads_phi_u[i] * scratch.grads_phi_u[j])
+                                         - (EquationData::pressure_scaling *
+                                            scratch.div_phi_u[i] * scratch.phi_p[j])
+                                         - (EquationData::pressure_scaling *
+                                            scratch.phi_p[i] * scratch.div_phi_u[j]))
+                                        * scratch.stokes_fe_values.JxW(q);
+
+        const Tensor<1,dim>
+        gravity = EquationData::gravity_vector (scratch.stokes_fe_values
+                                                .quadrature_point(q));
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          data.local_rhs(i) += (EquationData::density(old_temperature) *
+                                gravity  *
+                                scratch.phi_u[i]) *
+                               scratch.stokes_fe_values.JxW(q);
+      }
+
+    cell->get_dof_indices (data.local_dof_indices);
+  }
+
+
+
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::
+  copy_local_to_global_stokes_system (const Assembly::CopyData::StokesSystem<dim> &data)
+  {
+    if (rebuild_stokes_matrix == true)
+      stokes_constraints.distribute_local_to_global (data.local_matrix,
+                                                     data.local_rhs,
+                                                     data.local_dof_indices,
+                                                     stokes_matrix,
+                                                     stokes_rhs);
+    else
+      stokes_constraints.distribute_local_to_global (data.local_rhs,
+                                                     data.local_dof_indices,
+                                                     stokes_rhs);
+  }
+
+
+
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::assemble_stokes_system ()
+  {
+    computing_timer.enter_section ("   Assemble Stokes system");
+
+    if (rebuild_stokes_matrix == true)
+      stokes_matrix=0;
+
+    stokes_rhs=0;
+
+    const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree+1);
+
+    typedef
+    FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+    CellFilter;
+
+    WorkStream::
+    run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     stokes_dof_handler.begin_active()),
+         CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     stokes_dof_handler.end()),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          local_assemble_stokes_system,
+                          this,
+                          std_cxx11::_1,
+                          std_cxx11::_2,
+                          std_cxx11::_3),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          copy_local_to_global_stokes_system,
+                          this,
+                          std_cxx11::_1),
+         Assembly::Scratch::
+         StokesSystem<dim> (stokes_fe, mapping, quadrature_formula,
+                            (update_values    |
+                             update_quadrature_points  |
+                             update_JxW_values |
+                             (rebuild_stokes_matrix == true
+                              ?
+                              update_gradients
+                              :
+                              UpdateFlags(0))),
+                            temperature_fe,
+                            update_values),
+         Assembly::CopyData::
+         StokesSystem<dim> (stokes_fe));
+
+    if (rebuild_stokes_matrix == true)
+      stokes_matrix.compress(VectorOperation::add);
+    stokes_rhs.compress(VectorOperation::add);
+
+    rebuild_stokes_matrix = false;
+
+    pcout << std::endl;
+    computing_timer.exit_section();
+  }
+
+
+  // @sect5{Temperature matrix assembly}
+
+  // The task to be performed by the next three functions is to calculate a
+  // mass matrix and a Laplace matrix on the temperature system. These will be
+  // combined in order to yield the semi-implicit time stepping matrix that
+  // consists of the mass matrix plus a time step-dependent weight factor
+  // times the Laplace matrix. This function is again essentially the body of
+  // the loop over all cells from step-31.
+  //
+  // The two following functions perform similar services as the ones above.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::
+  local_assemble_temperature_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                     Assembly::Scratch::TemperatureMatrix<dim> &scratch,
+                                     Assembly::CopyData::TemperatureMatrix<dim> &data)
+  {
+    const unsigned int dofs_per_cell = scratch.temperature_fe_values.get_fe().dofs_per_cell;
+    const unsigned int n_q_points    = scratch.temperature_fe_values.n_quadrature_points;
+
+    scratch.temperature_fe_values.reinit (cell);
+    cell->get_dof_indices (data.local_dof_indices);
+
+    data.local_mass_matrix = 0;
+    data.local_stiffness_matrix = 0;
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      {
+        for (unsigned int k=0; k<dofs_per_cell; ++k)
+          {
+            scratch.grad_phi_T[k] = scratch.temperature_fe_values.shape_grad (k,q);
+            scratch.phi_T[k]      = scratch.temperature_fe_values.shape_value (k, q);
+          }
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            {
+              data.local_mass_matrix(i,j)
+              += (scratch.phi_T[i] * scratch.phi_T[j]
+                  *
+                  scratch.temperature_fe_values.JxW(q));
+              data.local_stiffness_matrix(i,j)
+              += (EquationData::kappa * scratch.grad_phi_T[i] * scratch.grad_phi_T[j]
+                  *
+                  scratch.temperature_fe_values.JxW(q));
+            }
+      }
+  }
+
+
+
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::
+  copy_local_to_global_temperature_matrix (const Assembly::CopyData::TemperatureMatrix<dim> &data)
+  {
+    temperature_constraints.distribute_local_to_global (data.local_mass_matrix,
+                                                        data.local_dof_indices,
+                                                        temperature_mass_matrix);
+    temperature_constraints.distribute_local_to_global (data.local_stiffness_matrix,
+                                                        data.local_dof_indices,
+                                                        temperature_stiffness_matrix);
+  }
+
+
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::assemble_temperature_matrix ()
+  {
+    if (rebuild_temperature_matrices == false)
+      return;
+
+    computing_timer.enter_section ("   Assemble temperature matrices");
+    temperature_mass_matrix = 0;
+    temperature_stiffness_matrix = 0;
+
+    const QGauss<dim> quadrature_formula(parameters.temperature_degree+2);
+
+    typedef
+    FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+    CellFilter;
+
+    WorkStream::
+    run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     temperature_dof_handler.begin_active()),
+         CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     temperature_dof_handler.end()),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          local_assemble_temperature_matrix,
+                          this,
+                          std_cxx11::_1,
+                          std_cxx11::_2,
+                          std_cxx11::_3),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          copy_local_to_global_temperature_matrix,
+                          this,
+                          std_cxx11::_1),
+         Assembly::Scratch::
+         TemperatureMatrix<dim> (temperature_fe, mapping, quadrature_formula),
+         Assembly::CopyData::
+         TemperatureMatrix<dim> (temperature_fe));
+
+    temperature_mass_matrix.compress(VectorOperation::add);
+    temperature_stiffness_matrix.compress(VectorOperation::add);
+
+    rebuild_temperature_matrices = false;
+    rebuild_temperature_preconditioner = true;
+
+    computing_timer.exit_section();
+  }
+
+
+  // @sect5{Temperature right hand side assembly}
+
+  // This is the last assembly function. It calculates the right hand side of
+  // the temperature system, which includes the convection and the
+  // stabilization terms. It includes a lot of evaluations of old solutions at
+  // the quadrature points (which are necessary for calculating the artificial
+  // viscosity of stabilization), but is otherwise similar to the other
+  // assembly functions. Notice, once again, how we resolve the dilemma of
+  // having inhomogeneous boundary conditions, by just making a right hand
+  // side at this point (compare the comments for the <code>project()</code>
+  // function above): We create some matrix columns with exactly the values
+  // that would be entered for the temperature stiffness matrix, in case we
+  // have inhomogeneously constrained dofs. That will account for the correct
+  // balance of the right hand side vector with the matrix system of
+  // temperature.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::
+  local_assemble_temperature_rhs (const std::pair<double,double> global_T_range,
+                                  const double                   global_max_velocity,
+                                  const double                   global_entropy_variation,
+                                  const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                  Assembly::Scratch::TemperatureRHS<dim> &scratch,
+                                  Assembly::CopyData::TemperatureRHS<dim> &data)
+  {
+    const bool use_bdf2_scheme = (timestep_number != 0);
+
+    const unsigned int dofs_per_cell = scratch.temperature_fe_values.get_fe().dofs_per_cell;
+    const unsigned int n_q_points    = scratch.temperature_fe_values.n_quadrature_points;
+
+    const FEValuesExtractors::Vector velocities (0);
+
+    data.local_rhs = 0;
+    data.matrix_for_bc = 0;
+    cell->get_dof_indices (data.local_dof_indices);
+
+    scratch.temperature_fe_values.reinit (cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    stokes_cell (&triangulation,
+                 cell->level(),
+                 cell->index(),
+                 &stokes_dof_handler);
+    scratch.stokes_fe_values.reinit (stokes_cell);
+
+    scratch.temperature_fe_values.get_function_values (old_temperature_solution,
+                                                       scratch.old_temperature_values);
+    scratch.temperature_fe_values.get_function_values (old_old_temperature_solution,
+                                                       scratch.old_old_temperature_values);
+
+    scratch.temperature_fe_values.get_function_gradients (old_temperature_solution,
+                                                          scratch.old_temperature_grads);
+    scratch.temperature_fe_values.get_function_gradients (old_old_temperature_solution,
+                                                          scratch.old_old_temperature_grads);
+
+    scratch.temperature_fe_values.get_function_laplacians (old_temperature_solution,
+                                                           scratch.old_temperature_laplacians);
+    scratch.temperature_fe_values.get_function_laplacians (old_old_temperature_solution,
+                                                           scratch.old_old_temperature_laplacians);
+
+    scratch.stokes_fe_values[velocities].get_function_values (stokes_solution,
+                                                              scratch.old_velocity_values);
+    scratch.stokes_fe_values[velocities].get_function_values (old_stokes_solution,
+                                                              scratch.old_old_velocity_values);
+    scratch.stokes_fe_values[velocities].get_function_symmetric_gradients (stokes_solution,
+        scratch.old_strain_rates);
+    scratch.stokes_fe_values[velocities].get_function_symmetric_gradients (old_stokes_solution,
+        scratch.old_old_strain_rates);
+
+    const double nu
+      = compute_viscosity (scratch.old_temperature_values,
+                           scratch.old_old_temperature_values,
+                           scratch.old_temperature_grads,
+                           scratch.old_old_temperature_grads,
+                           scratch.old_temperature_laplacians,
+                           scratch.old_old_temperature_laplacians,
+                           scratch.old_velocity_values,
+                           scratch.old_old_velocity_values,
+                           scratch.old_strain_rates,
+                           scratch.old_old_strain_rates,
+                           global_max_velocity,
+                           global_T_range.second - global_T_range.first,
+                           0.5 * (global_T_range.second + global_T_range.first),
+                           global_entropy_variation,
+                           cell->diameter());
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      {
+        for (unsigned int k=0; k<dofs_per_cell; ++k)
+          {
+            scratch.phi_T[k]      = scratch.temperature_fe_values.shape_value (k, q);
+            scratch.grad_phi_T[k] = scratch.temperature_fe_values.shape_grad (k,q);
+          }
+
+
+        const double T_term_for_rhs
+          = (use_bdf2_scheme ?
+             (scratch.old_temperature_values[q] *
+              (1 + time_step/old_time_step)
+              -
+              scratch.old_old_temperature_values[q] *
+              (time_step * time_step) /
+              (old_time_step * (time_step + old_time_step)))
+             :
+             scratch.old_temperature_values[q]);
+
+        const double ext_T
+          = (use_bdf2_scheme ?
+             (scratch.old_temperature_values[q] *
+              (1 + time_step/old_time_step)
+              -
+              scratch.old_old_temperature_values[q] *
+              time_step/old_time_step)
+             :
+             scratch.old_temperature_values[q]);
+
+        const Tensor<1,dim> ext_grad_T
+          = (use_bdf2_scheme ?
+             (scratch.old_temperature_grads[q] *
+              (1 + time_step/old_time_step)
+              -
+              scratch.old_old_temperature_grads[q] *
+              time_step/old_time_step)
+             :
+             scratch.old_temperature_grads[q]);
+
+        const Tensor<1,dim> extrapolated_u
+          = (use_bdf2_scheme ?
+             (scratch.old_velocity_values[q] *
+              (1 + time_step/old_time_step)
+              -
+              scratch.old_old_velocity_values[q] *
+              time_step/old_time_step)
+             :
+             scratch.old_velocity_values[q]);
+
+        const SymmetricTensor<2,dim> extrapolated_strain_rate
+          = (use_bdf2_scheme ?
+             (scratch.old_strain_rates[q] *
+              (1 + time_step/old_time_step)
+              -
+              scratch.old_old_strain_rates[q] *
+              time_step/old_time_step)
+             :
+             scratch.old_strain_rates[q]);
+
+        const double gamma
+          = ((EquationData::radiogenic_heating * EquationData::density(ext_T)
+              +
+              2 * EquationData::eta * extrapolated_strain_rate * extrapolated_strain_rate) /
+             (EquationData::density(ext_T) * EquationData::specific_heat));
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            data.local_rhs(i) += (T_term_for_rhs * scratch.phi_T[i]
+                                  -
+                                  time_step *
+                                  extrapolated_u * ext_grad_T * scratch.phi_T[i]
+                                  -
+                                  time_step *
+                                  nu * ext_grad_T * scratch.grad_phi_T[i]
+                                  +
+                                  time_step *
+                                  gamma * scratch.phi_T[i])
+                                 *
+                                 scratch.temperature_fe_values.JxW(q);
+
+            if (temperature_constraints.is_inhomogeneously_constrained(data.local_dof_indices[i]))
+              {
+                for (unsigned int j=0; j<dofs_per_cell; ++j)
+                  data.matrix_for_bc(j,i) += (scratch.phi_T[i] * scratch.phi_T[j] *
+                                              (use_bdf2_scheme ?
+                                               ((2*time_step + old_time_step) /
+                                                (time_step + old_time_step)) : 1.)
+                                              +
+                                              scratch.grad_phi_T[i] *
+                                              scratch.grad_phi_T[j] *
+                                              EquationData::kappa *
+                                              time_step)
+                                             *
+                                             scratch.temperature_fe_values.JxW(q);
+              }
+          }
+      }
+  }
+
+
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::
+  copy_local_to_global_temperature_rhs (const Assembly::CopyData::TemperatureRHS<dim> &data)
+  {
+    temperature_constraints.distribute_local_to_global (data.local_rhs,
+                                                        data.local_dof_indices,
+                                                        temperature_rhs,
+                                                        data.matrix_for_bc);
+  }
+
+
+
+  // In the function that runs the WorkStream for actually calculating the
+  // right hand side, we also generate the final matrix. As mentioned above,
+  // it is a sum of the mass matrix and the Laplace matrix, times some time
+  // step-dependent weight. This weight is specified by the BDF-2 time
+  // integration scheme, see the introduction in step-31. What is new in this
+  // tutorial program (in addition to the use of MPI parallelization and the
+  // WorkStream class), is that we now precompute the temperature
+  // preconditioner as well. The reason is that the setup of the Jacobi
+  // preconditioner takes a noticeable time compared to the solver because we
+  // usually only need between 10 and 20 iterations for solving the
+  // temperature system (this might sound strange, as Jacobi really only
+  // consists of a diagonal, but in Trilinos it is derived from more general
+  // framework for point relaxation preconditioners which is a bit
+  // inefficient). Hence, it is more efficient to precompute the
+  // preconditioner, even though the matrix entries may slightly change
+  // because the time step might change. This is not too big a problem because
+  // we remesh every few time steps (and regenerate the preconditioner then).
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::assemble_temperature_system (const double maximal_velocity)
+  {
+    const bool use_bdf2_scheme = (timestep_number != 0);
+
+    if (use_bdf2_scheme == true)
+      {
+        temperature_matrix.copy_from (temperature_mass_matrix);
+        temperature_matrix *= (2*time_step + old_time_step) /
+                              (time_step + old_time_step);
+        temperature_matrix.add (time_step, temperature_stiffness_matrix);
+      }
+    else
+      {
+        temperature_matrix.copy_from (temperature_mass_matrix);
+        temperature_matrix.add (time_step, temperature_stiffness_matrix);
+      }
+
+    if (rebuild_temperature_preconditioner == true)
+      {
+        T_preconditioner.reset (new TrilinosWrappers::PreconditionJacobi());
+        T_preconditioner->initialize (temperature_matrix);
+        rebuild_temperature_preconditioner = false;
+      }
+
+    // The next part is computing the right hand side vectors.  To do so, we
+    // first compute the average temperature $T_m$ that we use for evaluating
+    // the artificial viscosity stabilization through the residual $E(T) =
+    // (T-T_m)^2$. We do this by defining the midpoint between maximum and
+    // minimum temperature as average temperature in the definition of the
+    // entropy viscosity. An alternative would be to use the integral average,
+    // but the results are not very sensitive to this choice. The rest then
+    // only requires calling WorkStream::run again, binding the arguments to
+    // the <code>local_assemble_temperature_rhs</code> function that are the
+    // same in every call to the correct values:
+    temperature_rhs = 0;
+
+    const QGauss<dim> quadrature_formula(parameters.temperature_degree+2);
+    const std::pair<double,double>
+    global_T_range = get_extrapolated_temperature_range();
+
+    const double average_temperature = 0.5 * (global_T_range.first +
+                                              global_T_range.second);
+    const double global_entropy_variation =
+      get_entropy_variation (average_temperature);
+
+    typedef
+    FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+    CellFilter;
+
+    WorkStream::
+    run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     temperature_dof_handler.begin_active()),
+         CellFilter (IteratorFilters::LocallyOwnedCell(),
+                     temperature_dof_handler.end()),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          local_assemble_temperature_rhs,
+                          this,
+                          global_T_range,
+                          maximal_velocity,
+                          global_entropy_variation,
+                          std_cxx11::_1,
+                          std_cxx11::_2,
+                          std_cxx11::_3),
+         std_cxx11::bind (&BoussinesqFlowProblem<dim>::
+                          copy_local_to_global_temperature_rhs,
+                          this,
+                          std_cxx11::_1),
+         Assembly::Scratch::
+         TemperatureRHS<dim> (temperature_fe, stokes_fe, mapping,
+                              quadrature_formula),
+         Assembly::CopyData::
+         TemperatureRHS<dim> (temperature_fe));
+
+    temperature_rhs.compress(VectorOperation::add);
+  }
+
+
+
+
+  // @sect4{BoussinesqFlowProblem::solve}
+
+  // This function solves the linear systems in each time step of the
+  // Boussinesq problem. First, we work on the Stokes system and then on the
+  // temperature system. In essence, it does the same things as the respective
+  // function in step-31. However, there are a few changes here.
+  //
+  // The first change is related to the way we store our solution: we keep the
+  // vectors with locally owned degrees of freedom plus ghost nodes on each
+  // MPI node. When we enter a solver which is supposed to perform
+  // matrix-vector products with a distributed matrix, this is not the
+  // appropriate form, though. There, we will want to have the solution vector
+  // to be distributed in the same way as the matrix, i.e. without any
+  // ghosts. So what we do first is to generate a distributed vector called
+  // <code>distributed_stokes_solution</code> and put only the locally owned
+  // dofs into that, which is neatly done by the <code>operator=</code> of the
+  // Trilinos vector.
+  //
+  // Next, we scale the pressure solution (or rather, the initial guess) for
+  // the solver so that it matches with the length scales in the matrices, as
+  // discussed in the introduction. We also immediately scale the pressure
+  // solution back to the correct units after the solution is completed.  We
+  // also need to set the pressure values at hanging nodes to zero. This we
+  // also did in step-31 in order not to disturb the Schur complement by some
+  // vector entries that actually are irrelevant during the solve stage. As a
+  // difference to step-31, here we do it only for the locally owned pressure
+  // dofs. After solving for the Stokes solution, each processor copies the
+  // distributed solution back into the solution vector that also includes
+  // ghost elements.
+  //
+  // The third and most obvious change is that we have two variants for the
+  // Stokes solver: A fast solver that sometimes breaks down, and a robust
+  // solver that is slower. This is what we already discussed in the
+  // introduction. Here is how we realize it: First, we perform 30 iterations
+  // with the fast solver based on the simple preconditioner based on the AMG
+  // V-cycle instead of an approximate solve (this is indicated by the
+  // <code>false</code> argument to the
+  // <code>LinearSolvers::BlockSchurPreconditioner</code> object). If we
+  // converge, everything is fine. If we do not converge, the solver control
+  // object will throw an exception SolverControl::NoConvergence. Usually,
+  // this would abort the program because we don't catch them in our usual
+  // <code>solve()</code> functions. This is certainly not what we want to
+  // happen here. Rather, we want to switch to the strong solver and continue
+  // the solution process with whatever vector we got so far. Hence, we catch
+  // the exception with the C++ try/catch mechanism. We then simply go through
+  // the same solver sequence again in the <code>catch</code> clause, this
+  // time passing the @p true flag to the preconditioner for the strong
+  // solver, signaling an approximate CG solve.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::solve ()
+  {
+    computing_timer.enter_section ("   Solve Stokes system");
+
+    {
+      pcout << "   Solving Stokes system... " << std::flush;
+
+      TrilinosWrappers::MPI::BlockVector
+      distributed_stokes_solution (stokes_rhs);
+      distributed_stokes_solution = stokes_solution;
+
+      distributed_stokes_solution.block(1) /= EquationData::pressure_scaling;
+
+      const unsigned int
+      start = (distributed_stokes_solution.block(0).size() +
+               distributed_stokes_solution.block(1).local_range().first),
+              end   = (distributed_stokes_solution.block(0).size() +
+                       distributed_stokes_solution.block(1).local_range().second);
+      for (unsigned int i=start; i<end; ++i)
+        if (stokes_constraints.is_constrained (i))
+          distributed_stokes_solution(i) = 0;
+
+
+      PrimitiveVectorMemory<TrilinosWrappers::MPI::BlockVector> mem;
+
+      unsigned int n_iterations = 0;
+      const double solver_tolerance = 1e-8 * stokes_rhs.l2_norm();
+      SolverControl solver_control (30, solver_tolerance);
+
+      try
+        {
+          const LinearSolvers::BlockSchurPreconditioner<TrilinosWrappers::PreconditionAMG,
+                TrilinosWrappers::PreconditionJacobi>
+                preconditioner (stokes_matrix, stokes_preconditioner_matrix,
+                                *Mp_preconditioner, *Amg_preconditioner,
+                                false);
+
+          SolverFGMRES<TrilinosWrappers::MPI::BlockVector>
+          solver(solver_control, mem,
+                 SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::
+                 AdditionalData(30, true));
+          solver.solve(stokes_matrix, distributed_stokes_solution, stokes_rhs,
+                       preconditioner);
+
+          n_iterations = solver_control.last_step();
+        }
+
+      catch (SolverControl::NoConvergence)
+        {
+          const LinearSolvers::BlockSchurPreconditioner<TrilinosWrappers::PreconditionAMG,
+                TrilinosWrappers::PreconditionJacobi>
+                preconditioner (stokes_matrix, stokes_preconditioner_matrix,
+                                *Mp_preconditioner, *Amg_preconditioner,
+                                true);
+
+          SolverControl solver_control_refined (stokes_matrix.m(), solver_tolerance);
+          SolverFGMRES<TrilinosWrappers::MPI::BlockVector>
+          solver(solver_control_refined, mem,
+                 SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::
+                 AdditionalData(50, true));
+          solver.solve(stokes_matrix, distributed_stokes_solution, stokes_rhs,
+                       preconditioner);
+
+          n_iterations = (solver_control.last_step() +
+                          solver_control_refined.last_step());
+        }
+
+
+      stokes_constraints.distribute (distributed_stokes_solution);
+
+      distributed_stokes_solution.block(1) *= EquationData::pressure_scaling;
+
+      stokes_solution = distributed_stokes_solution;
+      pcout << n_iterations  << " iterations."
+            << std::endl;
+    }
+    computing_timer.exit_section();
+
+
+    // Now let's turn to the temperature part: First, we compute the time step
+    // size. We found that we need smaller time steps for 3D than for 2D for
+    // the shell geometry. This is because the cells are more distorted in
+    // that case (it is the smallest edge length that determines the CFL
+    // number). Instead of computing the time step from maximum velocity and
+    // minimal mesh size as in step-31, we compute local CFL numbers, i.e., on
+    // each cell we compute the maximum velocity times the mesh size, and
+    // compute the maximum of them. Hence, we need to choose the factor in
+    // front of the time step slightly smaller.
+    //
+    // After temperature right hand side assembly, we solve the linear system
+    // for temperature (with fully distributed vectors without any ghosts),
+    // apply constraints and copy the vector back to one with ghosts.
+    //
+    // In the end, we extract the temperature range similarly to step-31 to
+    // produce some output (for example in order to help us choose the
+    // stabilization constants, as discussed in the introduction). The only
+    // difference is that we need to exchange maxima over all processors.
+    computing_timer.enter_section ("   Assemble temperature rhs");
+    {
+      old_time_step = time_step;
+
+      const double scaling = (dim==3 ? 0.25 : 1.0);
+      time_step = (scaling/(2.1*dim*std::sqrt(1.*dim)) /
+                   (parameters.temperature_degree *
+                    get_cfl_number()));
+
+      const double maximal_velocity = get_maximal_velocity();
+      pcout << "   Maximal velocity: "
+            << maximal_velocity *EquationData::year_in_seconds * 100
+            << " cm/year"
+            << std::endl;
+      pcout << "   " << "Time step: "
+            << time_step/EquationData::year_in_seconds
+            << " years"
+            << std::endl;
+
+      temperature_solution = old_temperature_solution;
+      assemble_temperature_system (maximal_velocity);
+    }
+    computing_timer.exit_section ();
+
+    computing_timer.enter_section ("   Solve temperature system");
+    {
+      SolverControl solver_control (temperature_matrix.m(),
+                                    1e-12*temperature_rhs.l2_norm());
+      SolverCG<TrilinosWrappers::MPI::Vector>   cg (solver_control);
+
+      TrilinosWrappers::MPI::Vector
+      distributed_temperature_solution (temperature_rhs);
+      distributed_temperature_solution = temperature_solution;
+
+      cg.solve (temperature_matrix, distributed_temperature_solution,
+                temperature_rhs, *T_preconditioner);
+
+      temperature_constraints.distribute (distributed_temperature_solution);
+      temperature_solution = distributed_temperature_solution;
+
+      pcout << "   "
+            << solver_control.last_step()
+            << " CG iterations for temperature" << std::endl;
+      computing_timer.exit_section();
+
+      double temperature[2] = { std::numeric_limits<double>::max(),
+                                -std::numeric_limits<double>::max()
+                              };
+      double global_temperature[2];
+
+      for (unsigned int i=distributed_temperature_solution.local_range().first;
+           i < distributed_temperature_solution.local_range().second; ++i)
+        {
+          temperature[0] = std::min<double> (temperature[0],
+                                             distributed_temperature_solution(i));
+          temperature[1] = std::max<double> (temperature[1],
+                                             distributed_temperature_solution(i));
+        }
+
+      temperature[0] *= -1.0;
+      Utilities::MPI::max (temperature, MPI_COMM_WORLD, global_temperature);
+      global_temperature[0] *= -1.0;
+
+      pcout << "   Temperature range: "
+            << global_temperature[0] << ' ' << global_temperature[1]
+            << std::endl;
+    }
+  }
+
+
+  // @sect4{BoussinesqFlowProblem::output_results}
+
+  // Next comes the function that generates the output. The quantities to
+  // output could be introduced manually like we did in step-31. An
+  // alternative is to hand this task over to a class PostProcessor that
+  // inherits from the class DataPostprocessor, which can be attached to
+  // DataOut. This allows us to output derived quantities from the solution,
+  // like the friction heating included in this example. It overloads the
+  // virtual function DataPostprocessor::compute_derived_quantities_vector,
+  // which is then internally called from DataOut::build_patches. We have to
+  // give it values of the numerical solution, its derivatives, normals to the
+  // cell, the actual evaluation points and any additional quantities. This
+  // follows the same procedure as discussed in step-29 and other programs.
+  template <int dim>
+  class BoussinesqFlowProblem<dim>::Postprocessor : public DataPostprocessor<dim>
+  {
+  public:
+    Postprocessor (const unsigned int partition,
+                   const double       minimal_pressure);
+
+    virtual
+    void
+    compute_derived_quantities_vector (const std::vector<Vector<double> >              &uh,
+                                       const std::vector<std::vector<Tensor<1,dim> > > &duh,
+                                       const std::vector<std::vector<Tensor<2,dim> > > &dduh,
+                                       const std::vector<Point<dim> >                  &normals,
+                                       const std::vector<Point<dim> >                  &evaluation_points,
+                                       std::vector<Vector<double> >                    &computed_quantities) const;
+
+    virtual std::vector<std::string> get_names () const;
+
+    virtual
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    get_data_component_interpretation () const;
+
+    virtual UpdateFlags get_needed_update_flags () const;
+
+  private:
+    const unsigned int partition;
+    const double       minimal_pressure;
+  };
+
+
+  template <int dim>
+  BoussinesqFlowProblem<dim>::Postprocessor::
+  Postprocessor (const unsigned int partition,
+                 const double       minimal_pressure)
+    :
+    partition (partition),
+    minimal_pressure (minimal_pressure)
+  {}
+
+
+  // Here we define the names for the variables we want to output. These are
+  // the actual solution values for velocity, pressure, and temperature, as
+  // well as the friction heating and to each cell the number of the processor
+  // that owns it. This allows us to visualize the partitioning of the domain
+  // among the processors. Except for the velocity, which is vector-valued,
+  // all other quantities are scalar.
+  template <int dim>
+  std::vector<std::string>
+  BoussinesqFlowProblem<dim>::Postprocessor::get_names() const
+  {
+    std::vector<std::string> solution_names (dim, "velocity");
+    solution_names.push_back ("p");
+    solution_names.push_back ("T");
+    solution_names.push_back ("friction_heating");
+    solution_names.push_back ("partition");
+
+    return solution_names;
+  }
+
+
+  template <int dim>
+  std::vector<DataComponentInterpretation::DataComponentInterpretation>
+  BoussinesqFlowProblem<dim>::Postprocessor::
+  get_data_component_interpretation () const
+  {
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    interpretation (dim,
+                    DataComponentInterpretation::component_is_part_of_vector);
+
+    interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+    interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+    interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+    interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+
+    return interpretation;
+  }
+
+
+  template <int dim>
+  UpdateFlags
+  BoussinesqFlowProblem<dim>::Postprocessor::get_needed_update_flags() const
+  {
+    return update_values | update_gradients | update_q_points;
+  }
+
+
+  // Now we implement the function that computes the derived quantities. As we
+  // also did for the output, we rescale the velocity from its SI units to
+  // something more readable, namely cm/year. Next, the pressure is scaled to
+  // be between 0 and the maximum pressure. This makes it more easily
+  // comparable -- in essence making all pressure variables positive or
+  // zero. Temperature is taken as is, and the friction heating is computed as
+  // $2 \eta \varepsilon(\mathbf{u}) \cdot \varepsilon(\mathbf{u})$.
+  //
+  // The quantities we output here are more for illustration, rather than for
+  // actual scientific value. We come back to this briefly in the results
+  // section of this program and explain what one may in fact be interested
+  // in.
+  template <int dim>
+  void
+  BoussinesqFlowProblem<dim>::Postprocessor::
+  compute_derived_quantities_vector (const std::vector<Vector<double> >              &uh,
+                                     const std::vector<std::vector<Tensor<1,dim> > > &duh,
+                                     const std::vector<std::vector<Tensor<2,dim> > > &/*dduh*/,
+                                     const std::vector<Point<dim> >                  &/*normals*/,
+                                     const std::vector<Point<dim> >                  &/*evaluation_points*/,
+                                     std::vector<Vector<double> >                    &computed_quantities) const
+  {
+    const unsigned int n_quadrature_points = uh.size();
+    Assert (duh.size() == n_quadrature_points,                  ExcInternalError());
+    Assert (computed_quantities.size() == n_quadrature_points,  ExcInternalError());
+    Assert (uh[0].size() == dim+2,                              ExcInternalError());
+
+    for (unsigned int q=0; q<n_quadrature_points; ++q)
+      {
+        for (unsigned int d=0; d<dim; ++d)
+          computed_quantities[q](d)
+            = (uh[q](d) *  EquationData::year_in_seconds * 100);
+
+        const double pressure = (uh[q](dim)-minimal_pressure);
+        computed_quantities[q](dim) = pressure;
+
+        const double temperature = uh[q](dim+1);
+        computed_quantities[q](dim+1) = temperature;
+
+        Tensor<2,dim> grad_u;
+        for (unsigned int d=0; d<dim; ++d)
+          grad_u[d] = duh[q][d];
+        const SymmetricTensor<2,dim> strain_rate = symmetrize (grad_u);
+        computed_quantities[q](dim+2) = 2 * EquationData::eta *
+                                        strain_rate * strain_rate;
+
+        computed_quantities[q](dim+3) = partition;
+      }
+  }
+
+
+  // The <code>output_results()</code> function has a similar task to the one
+  // in step-31. However, here we are going to demonstrate a different
+  // technique on how to merge output from different DoFHandler objects. The
+  // way we're going to achieve this recombination is to create a joint
+  // DoFHandler that collects both components, the Stokes solution and the
+  // temperature solution. This can be nicely done by combining the finite
+  // elements from the two systems to form one FESystem, and let this
+  // collective system define a new DoFHandler object. To be sure that
+  // everything was done correctly, we perform a sanity check that ensures
+  // that we got all the dofs from both Stokes and temperature even in the
+  // combined system. We then combine the data vectors. Unfortunately, there
+  // is no straight-forward relation that tells us how to sort Stokes and
+  // temperature vector into the joint vector. The way we can get around this
+  // trouble is to rely on the information collected in the FESystem. For each
+  // dof on a cell, the joint finite element knows to which equation component
+  // (velocity component, pressure, or temperature) it belongs – that's the
+  // information we need! So we step through all cells (with iterators into
+  // all three DoFHandlers moving in sync), and for each joint cell dof, we
+  // read out that component using the FiniteElement::system_to_base_index
+  // function (see there for a description of what the various parts of its
+  // return value contain). We also need to keep track whether we're on a
+  // Stokes dof or a temperature dof, which is contained in
+  // joint_fe.system_to_base_index(i).first.first. Eventually, the dof_indices
+  // data structures on either of the three systems tell us how the relation
+  // between global vector and local dofs looks like on the present cell,
+  // which concludes this tedious work. We make sure that each processor only
+  // works on the subdomain it owns locally (and not on ghost or artificial
+  // cells) when building the joint solution vector. The same will then have
+  // to be done in DataOut::build_patches(), but that function does so
+  // automatically.
+  //
+  // What we end up with is a set of patches that we can write using the
+  // functions in DataOutBase in a variety of output formats. Here, we then
+  // have to pay attention that what each processor writes is really only its
+  // own part of the domain, i.e. we will want to write each processor's
+  // contribution into a separate file. This we do by adding an additional
+  // number to the filename when we write the solution. This is not really
+  // new, we did it similarly in step-40. Note that we write in the compressed
+  // format @p .vtu instead of plain vtk files, which saves quite some
+  // storage.
+  //
+  // All the rest of the work is done in the PostProcessor class.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::output_results ()
+  {
+    computing_timer.enter_section ("Postprocessing");
+
+    const FESystem<dim> joint_fe (stokes_fe, 1,
+                                  temperature_fe, 1);
+
+    DoFHandler<dim> joint_dof_handler (triangulation);
+    joint_dof_handler.distribute_dofs (joint_fe);
+    Assert (joint_dof_handler.n_dofs() ==
+            stokes_dof_handler.n_dofs() + temperature_dof_handler.n_dofs(),
+            ExcInternalError());
+
+    TrilinosWrappers::MPI::Vector joint_solution;
+    joint_solution.reinit (joint_dof_handler.locally_owned_dofs(), MPI_COMM_WORLD);
+
+    {
+      std::vector<types::global_dof_index> local_joint_dof_indices (joint_fe.dofs_per_cell);
+      std::vector<types::global_dof_index> local_stokes_dof_indices (stokes_fe.dofs_per_cell);
+      std::vector<types::global_dof_index> local_temperature_dof_indices (temperature_fe.dofs_per_cell);
+
+      typename DoFHandler<dim>::active_cell_iterator
+      joint_cell       = joint_dof_handler.begin_active(),
+      joint_endc       = joint_dof_handler.end(),
+      stokes_cell      = stokes_dof_handler.begin_active(),
+      temperature_cell = temperature_dof_handler.begin_active();
+      for (; joint_cell!=joint_endc;
+           ++joint_cell, ++stokes_cell, ++temperature_cell)
+        if (joint_cell->is_locally_owned())
+          {
+            joint_cell->get_dof_indices (local_joint_dof_indices);
+            stokes_cell->get_dof_indices (local_stokes_dof_indices);
+            temperature_cell->get_dof_indices (local_temperature_dof_indices);
+
+            for (unsigned int i=0; i<joint_fe.dofs_per_cell; ++i)
+              if (joint_fe.system_to_base_index(i).first.first == 0)
+                {
+                  Assert (joint_fe.system_to_base_index(i).second
+                          <
+                          local_stokes_dof_indices.size(),
+                          ExcInternalError());
+
+                  joint_solution(local_joint_dof_indices[i])
+                    = stokes_solution(local_stokes_dof_indices
+                                      [joint_fe.system_to_base_index(i).second]);
+                }
+              else
+                {
+                  Assert (joint_fe.system_to_base_index(i).first.first == 1,
+                          ExcInternalError());
+                  Assert (joint_fe.system_to_base_index(i).second
+                          <
+                          local_temperature_dof_indices.size(),
+                          ExcInternalError());
+                  joint_solution(local_joint_dof_indices[i])
+                    = temperature_solution(local_temperature_dof_indices
+                                           [joint_fe.system_to_base_index(i).second]);
+                }
+          }
+    }
+
+    joint_solution.compress(VectorOperation::insert);
+
+    IndexSet locally_relevant_joint_dofs(joint_dof_handler.n_dofs());
+    DoFTools::extract_locally_relevant_dofs (joint_dof_handler, locally_relevant_joint_dofs);
+    TrilinosWrappers::MPI::Vector locally_relevant_joint_solution;
+    locally_relevant_joint_solution.reinit (locally_relevant_joint_dofs, MPI_COMM_WORLD);
+    locally_relevant_joint_solution = joint_solution;
+
+    Postprocessor postprocessor (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD),
+                                 stokes_solution.block(1).min());
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (joint_dof_handler);
+    data_out.add_data_vector (locally_relevant_joint_solution, postprocessor);
+    data_out.build_patches ();
+
+    static int out_index=0;
+    const std::string filename = ("solution-" +
+                                  Utilities::int_to_string (out_index, 5) +
+                                  "." +
+                                  Utilities::int_to_string
+                                  (triangulation.locally_owned_subdomain(), 4) +
+                                  ".vtu");
+    std::ofstream output (filename.c_str());
+    data_out.write_vtu (output);
+
+
+    // At this point, all processors have written their own files to disk. We
+    // could visualize them individually in Visit or Paraview, but in reality
+    // we of course want to visualize the whole set of files at once. To this
+    // end, we create a master file in each of the formats understood by Visit
+    // (<code>.visit</code>) and Paraview (<code>.pvtu</code>) on the zeroth
+    // processor that describes how the individual files are defining the
+    // global data set.
+    if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
+      {
+        std::vector<std::string> filenames;
+        for (unsigned int i=0; i<Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD); ++i)
+          filenames.push_back (std::string("solution-") +
+                               Utilities::int_to_string (out_index, 5) +
+                               "." +
+                               Utilities::int_to_string(i, 4) +
+                               ".vtu");
+        const std::string
+        pvtu_master_filename = ("solution-" +
+                                Utilities::int_to_string (out_index, 5) +
+                                ".pvtu");
+        std::ofstream pvtu_master (pvtu_master_filename.c_str());
+        data_out.write_pvtu_record (pvtu_master, filenames);
+
+        const std::string
+        visit_master_filename = ("solution-" +
+                                 Utilities::int_to_string (out_index, 5) +
+                                 ".visit");
+        std::ofstream visit_master (visit_master_filename.c_str());
+        data_out.write_visit_record (visit_master, filenames);
+      }
+
+    computing_timer.exit_section ();
+    out_index++;
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::refine_mesh}
+
+  // This function isn't really new either. Since the <code>setup_dofs</code>
+  // function that we call in the middle has its own timer section, we split
+  // timing this function into two sections. It will also allow us to easily
+  // identify which of the two is more expensive.
+  //
+  // One thing of note, however, is that we only want to compute error
+  // indicators on the locally owned subdomain. In order to achieve this, we
+  // pass one additional argument to the KellyErrorEstimator::estimate
+  // function. Note that the vector for error estimates is resized to the
+  // number of active cells present on the current process, which is less than
+  // the total number of active cells on all processors (but more than the
+  // number of locally owned active cells); each processor only has a few
+  // coarse cells around the locally owned ones, as also explained in step-40.
+  //
+  // The local error estimates are then handed to a %parallel version of
+  // GridRefinement (in namespace parallel::distributed::GridRefinement, see
+  // also step-40) which looks at the errors and finds the cells that need
+  // refinement by comparing the error values across processors. As in
+  // step-31, we want to limit the maximum grid level. So in case some cells
+  // have been marked that are already at the finest level, we simply clear
+  // the refine flags.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::refine_mesh (const unsigned int max_grid_level)
+  {
+    computing_timer.enter_section ("Refine mesh structure, part 1");
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    KellyErrorEstimator<dim>::estimate (temperature_dof_handler,
+                                        QGauss<dim-1>(parameters.temperature_degree+1),
+                                        typename FunctionMap<dim>::type(),
+                                        temperature_solution,
+                                        estimated_error_per_cell,
+                                        ComponentMask(),
+                                        0,
+                                        0,
+                                        triangulation.locally_owned_subdomain());
+
+    parallel::distributed::GridRefinement::
+    refine_and_coarsen_fixed_fraction (triangulation,
+                                       estimated_error_per_cell,
+                                       0.3, 0.1);
+
+    if (triangulation.n_levels() > max_grid_level)
+      for (typename Triangulation<dim>::active_cell_iterator
+           cell = triangulation.begin_active(max_grid_level);
+           cell != triangulation.end(); ++cell)
+        cell->clear_refine_flag ();
+
+    // With all flags marked as necessary, we set up the
+    // parallel::distributed::SolutionTransfer object to transfer the
+    // solutions for the current time level and the next older one. The syntax
+    // is similar to the non-%parallel solution transfer (with the exception
+    // that here a pointer to the vector entries is enough). The remainder of
+    // the function is concerned with setting up the data structures again
+    // after mesh refinement and restoring the solution vectors on the new
+    // mesh.
+    std::vector<const TrilinosWrappers::MPI::Vector *> x_temperature (2);
+    x_temperature[0] = &temperature_solution;
+    x_temperature[1] = &old_temperature_solution;
+    std::vector<const TrilinosWrappers::MPI::BlockVector *> x_stokes (2);
+    x_stokes[0] = &stokes_solution;
+    x_stokes[1] = &old_stokes_solution;
+
+    parallel::distributed::SolutionTransfer<dim,TrilinosWrappers::MPI::Vector>
+    temperature_trans(temperature_dof_handler);
+    parallel::distributed::SolutionTransfer<dim,TrilinosWrappers::MPI::BlockVector>
+    stokes_trans(stokes_dof_handler);
+
+    triangulation.prepare_coarsening_and_refinement();
+    temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
+    stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
+
+    triangulation.execute_coarsening_and_refinement ();
+    computing_timer.exit_section();
+
+    setup_dofs ();
+
+    computing_timer.enter_section ("Refine mesh structure, part 2");
+
+    {
+      TrilinosWrappers::MPI::Vector distributed_temp1 (temperature_rhs);
+      TrilinosWrappers::MPI::Vector distributed_temp2 (temperature_rhs);
+
+      std::vector<TrilinosWrappers::MPI::Vector *> tmp (2);
+      tmp[0] = &(distributed_temp1);
+      tmp[1] = &(distributed_temp2);
+      temperature_trans.interpolate(tmp);
+
+      // enforce constraints to make the interpolated solution conforming on
+      // the new mesh:
+      temperature_constraints.distribute(distributed_temp1);
+      temperature_constraints.distribute(distributed_temp2);
+
+      temperature_solution     = distributed_temp1;
+      old_temperature_solution = distributed_temp2;
+    }
+
+    {
+      TrilinosWrappers::MPI::BlockVector distributed_stokes (stokes_rhs);
+      TrilinosWrappers::MPI::BlockVector old_distributed_stokes (stokes_rhs);
+
+      std::vector<TrilinosWrappers::MPI::BlockVector *> stokes_tmp (2);
+      stokes_tmp[0] = &(distributed_stokes);
+      stokes_tmp[1] = &(old_distributed_stokes);
+
+      stokes_trans.interpolate (stokes_tmp);
+
+      // enforce constraints to make the interpolated solution conforming on
+      // the new mesh:
+      stokes_constraints.distribute(distributed_stokes);
+      stokes_constraints.distribute(old_distributed_stokes);
+
+      stokes_solution     = distributed_stokes;
+      old_stokes_solution = old_distributed_stokes;
+    }
+
+    computing_timer.exit_section();
+  }
+
+
+
+  // @sect4{BoussinesqFlowProblem::run}
+
+  // This is the final and controlling function in this class. It, in fact,
+  // runs the entire rest of the program and is, once more, very similar to
+  // step-31. We use a different mesh now (a GridGenerator::hyper_shell
+  // instead of a simple cube geometry), and use the
+  // <code>project_temperature_field()</code> function instead of the library
+  // function <code>VectorTools::project</code>.
+  // In this example, however, we define both a SphericalManifold() and a
+  // HyperShellBoundary() object to describe the geometry of the domain.
+  // The reason we do so here, is because we want to impose no normal flux
+  // boundary conditions, and they require knowledge of the normals to a boundary,
+  // which a SphericalManifold() alone cannot compute. Consequently, we set
+  // all manifold indicators of cells and adjacent edges to zero, then overwrite
+  // the manifold indicators of all boundary objects by one. We then associate
+  // a SphericalManifold object with the former, and the HyperShellBoundary
+  // object that can also provide normal vectors with the latter.
+  template <int dim>
+  void BoussinesqFlowProblem<dim>::run ()
+  {
+    GridGenerator::hyper_shell (triangulation,
+                                Point<dim>(),
+                                EquationData::R0,
+                                EquationData::R1,
+                                (dim==3) ? 96 : 12,
+                                true);
+    triangulation.set_all_manifold_ids(0);
+    triangulation.set_all_manifold_ids_on_boundary(1);
+    static SphericalManifold<dim> manifold;
+    static HyperShellBoundary<dim> boundary;
+    triangulation.set_manifold (0, manifold);
+    triangulation.set_manifold (1, boundary);
+
+    global_Omega_diameter = GridTools::diameter (triangulation);
+
+    triangulation.refine_global (parameters.initial_global_refinement);
+
+    setup_dofs();
+
+    unsigned int pre_refinement_step = 0;
+
+start_time_iteration:
+
+    project_temperature_field ();
+
+    timestep_number           = 0;
+    time_step = old_time_step = 0;
+
+    double time = 0;
+
+    do
+      {
+        pcout << "Timestep " << timestep_number
+              << ":  t=" << time/EquationData::year_in_seconds
+              << " years"
+              << std::endl;
+
+        assemble_stokes_system ();
+        build_stokes_preconditioner ();
+        assemble_temperature_matrix ();
+
+        solve ();
+
+        pcout << std::endl;
+
+        if ((timestep_number == 0) &&
+            (pre_refinement_step < parameters.initial_adaptive_refinement))
+          {
+            refine_mesh (parameters.initial_global_refinement +
+                         parameters.initial_adaptive_refinement);
+            ++pre_refinement_step;
+            goto start_time_iteration;
+          }
+        else if ((timestep_number > 0)
+                 &&
+                 (timestep_number % parameters.adaptive_refinement_interval == 0))
+          refine_mesh (parameters.initial_global_refinement +
+                       parameters.initial_adaptive_refinement);
+
+        if ((parameters.generate_graphical_output == true)
+            &&
+            (timestep_number % parameters.graphical_output_interval == 0))
+          output_results ();
+
+        // In order to speed up linear solvers, we extrapolate the solutions
+        // from the old time levels to the new one. This gives a very good
+        // initial guess, cutting the number of iterations needed in solvers
+        // by more than one half. We do not need to extrapolate in the last
+        // iteration, so if we reached the final time, we stop here.
+        //
+        // As the last thing during a time step (before actually bumping up
+        // the number of the time step), we check whether the current time
+        // step number is divisible by 100, and if so we let the computing
+        // timer print a summary of CPU times spent so far.
+        if (time > parameters.end_time * EquationData::year_in_seconds)
+          break;
+
+        TrilinosWrappers::MPI::BlockVector old_old_stokes_solution;
+        old_old_stokes_solution      = old_stokes_solution;
+        old_stokes_solution          = stokes_solution;
+        old_old_temperature_solution = old_temperature_solution;
+        old_temperature_solution     = temperature_solution;
+        if (old_time_step > 0)
+          {
+            //Trilinos sadd does not like ghost vectors even as input. Copy
+            //into distributed vectors for now:
+            {
+              TrilinosWrappers::MPI::BlockVector distr_solution (stokes_rhs);
+              distr_solution = stokes_solution;
+              TrilinosWrappers::MPI::BlockVector distr_old_solution (stokes_rhs);
+              distr_old_solution = old_old_stokes_solution;
+              distr_solution .sadd (1.+time_step/old_time_step, -time_step/old_time_step,
+                                    distr_old_solution);
+              stokes_solution = distr_solution;
+            }
+            {
+              TrilinosWrappers::MPI::Vector distr_solution (temperature_rhs);
+              distr_solution = temperature_solution;
+              TrilinosWrappers::MPI::Vector distr_old_solution (temperature_rhs);
+              distr_old_solution = old_old_temperature_solution;
+              distr_solution .sadd (1.+time_step/old_time_step, -time_step/old_time_step,
+                                    distr_old_solution);
+              temperature_solution = distr_solution;
+            }
+          }
+
+        if ((timestep_number > 0) && (timestep_number % 100 == 0))
+          computing_timer.print_summary ();
+
+        time += time_step;
+        ++timestep_number;
+      }
+    while (true);
+
+    // If we are generating graphical output, do so also for the last time
+    // step unless we had just done so before we left the do-while loop
+    if ((parameters.generate_graphical_output == true)
+        &&
+        !((timestep_number-1) % parameters.graphical_output_interval == 0))
+      output_results ();
+  }
+}
+
+
+
+// @sect3{The <code>main</code> function}
+
+// The main function is short as usual and very similar to the one in
+// step-31. Since we use a parameter file which is specified as an argument in
+// the command line, we have to read it in here and pass it on to the
+// Parameters class for parsing. If no filename is given in the command line,
+// we simply use the <code>\step-32.prm</code> file which is distributed
+// together with the program.
+//
+// Because 3d computations are simply very slow unless you throw a lot of
+// processors at them, the program defaults to 2d. You can get the 3d version
+// by changing the constant dimension below to 3.
+int main (int argc, char *argv[])
+{
+  using namespace Step32;
+  using namespace dealii;
+
+  Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv,
+                                                      numbers::invalid_unsigned_int);
+
+  try
+    {
+      std::string parameter_filename;
+      if (argc>=2)
+        parameter_filename = argv[1];
+      else
+        parameter_filename = "step-32.prm";
+
+      const int dim = 2;
+      BoussinesqFlowProblem<dim>::Parameters  parameters(parameter_filename);
+      BoussinesqFlowProblem<dim> flow_problem (parameters);
+      flow_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-32/step-32.prm b/examples/step-32/step-32.prm
new file mode 100644
index 0000000..6c2abfc
--- /dev/null
+++ b/examples/step-32/step-32.prm
@@ -0,0 +1,54 @@
+# Listing of Parameters
+# ---------------------
+# The end time of the simulation in years.
+set End time                            = 1e8
+
+# Whether graphical output is to be generated or not. You may not want to get
+# graphical output if the number of processors is large.
+set Generate graphical output           = true
+
+# The number of adaptive refinement steps performed after initial global
+# refinement.
+set Initial adaptive refinement         = 2
+
+# The number of global refinement steps performed on the initial coarse mesh,
+# before the problem is first solved there.
+set Initial global refinement           = 5
+
+# The number of time steps between each generation of graphical output files.
+set Time steps between graphical output = 50
+
+# The number of time steps after which the mesh is to be adapted based on
+# computed error indicators.
+set Time steps between mesh refinement  = 10
+
+
+subsection Discretization
+  # The polynomial degree to use for the velocity variables in the Stokes
+  # system.
+  set Stokes velocity polynomial degree       = 2
+
+  # The polynomial degree to use for the temperature variable.
+  set Temperature polynomial degree           = 2
+
+  # Whether to use a Stokes discretization that is locally conservative at the
+  # expense of a larger number of degrees of freedom, or to go with a cheaper
+  # discretization that does not locally conserve mass (although it is
+  # globally conservative.
+  set Use locally conservative discretization = true
+end
+
+
+subsection Stabilization parameters
+  # The exponent in the entropy viscosity stabilization.
+  set alpha = 2
+
+  # The beta factor in the artificial viscosity stabilization. An appropriate
+  # value for 2d is 0.052 and 0.078 for 3d.
+  set beta  = 0.078
+
+  # The c_R factor in the entropy viscosity stabilization.
+  set c_R   = 0.5
+end
+
+
diff --git a/examples/step-33/CMakeLists.txt b/examples/step-33/CMakeLists.txt
new file mode 100644
index 0000000..cb8a6ba
--- /dev/null
+++ b/examples/step-33/CMakeLists.txt
@@ -0,0 +1,53 @@
+##
+#  CMake script for the step-33 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-33")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# A custom command line to run the program
+SET(TARGET_RUN ${TARGET} input.prm)
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_TRILINOS)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_TRILINOS = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-33/doc/builds-on b/examples/step-33/doc/builds-on
new file mode 100644
index 0000000..20c3064
--- /dev/null
+++ b/examples/step-33/doc/builds-on
@@ -0,0 +1 @@
+step-12
diff --git a/examples/step-33/doc/intro.dox b/examples/step-33/doc/intro.dox
new file mode 100644
index 0000000..54c511e
--- /dev/null
+++ b/examples/step-33/doc/intro.dox
@@ -0,0 +1,363 @@
+<br>
+
+<i>
+This program was written for fun by David Neckels (NCAR) while working
+at Sandia (on the Wyoming Express bus to and from Corrales each day).
+The main purpose was to better understand Euler flow.
+The code solves the basic Euler equations of gas dynamics, by using a
+fully implicit Newton iteration (inspired by Sandia's Aria code).  The
+code may be configured by an input deck to run different simulations
+on different meshes, with differing boundary conditions.
+<br>
+The original code and documentation was later slightly modified by Wolfgang
+Bangerth to make it more modular and allow replacing the parts that are
+specific to the Euler equations by other hyperbolic conservation laws without
+too much trouble.
+</i>
+
+<b>Note:</b>The program uses the <a
+href="http://trilinos.sandia.gov">Trilinos</a> linear solvers (which
+are part of the Aztec/Amesos package of Trilinos) and an automatic
+differentiation package, Sacado, also part of Trilinos. deal.II must
+be configured to use this package. Refer to the <a
+href="../../readme.html#trilinos">ReadMe</a> file for instructions how to
+do this.
+
+
+
+<a name="Intro"></a> <h1>Introduction</h1>
+
+<h3>Euler flow</h3>
+
+The equations that describe the movement of a compressible, inviscid
+gas (the so-called Euler equations of gas dynamics) are
+a basic system of conservation laws. In spatial dimension $d$ they read
+ at f[
+\partial_t \mathbf{w} + \nabla \cdot \mathbf{F}(\mathbf{w}) =
+\mathbf{G}(\mathbf w),
+ at f]
+with the solution $\mathbf{w}=(\rho v_1,\ldots,\rho v_d,\rho,
+E)^{\top}$ consisting of $\rho$ the fluid density, ${\mathbf v}=(v_1,\ldots v_d)^T$ the
+flow velocity (and thus $\rho\mathbf v$ being the linear momentum
+density), and
+$E$ the energy density of the gas. We interpret the equations above as
+$\partial_t \mathbf{w}_i + \nabla \cdot \mathbf{F}_i(\mathbf{w}) = \mathbf
+G_i(\mathbf w)$, $i=1,\ldots,dim+2$.
+
+For the Euler equations, the flux matrix $\mathbf F$ (or system of flux functions)
+is defined as (shown here for the case $d=3$)
+ at f{eqnarray*}
+  \mathbf F(\mathbf w)
+  =
+  \left(
+  \begin{array}{ccc}
+    \rho v_1^2+p & \rho v_2v_1  & \rho v_3v_1 \\
+    \rho v_1v_2  & \rho v_2^2+p & \rho v_3v_2 \\
+    \rho v_1v_3  & \rho v_2v_3  & \rho v_3^2+p \\
+    \rho v_1 & \rho v_2 & \rho v_3 \\
+    (E+p) v_1 & (E+p) v_2 & (E+p) v_3
+  \end{array}
+  \right),
+ at f}
+and we will choose as particular right hand side forcing only the effects of
+gravity, described by
+ at f{eqnarray*}
+  \mathbf G(\mathbf w)
+  =
+  \left(
+  \begin{array}{c}
+    g_1\rho \\
+    g_2\rho \\
+    g_3\rho \\
+    0 \\
+    \rho \mathbf g \cdot \mathbf v
+  \end{array}
+  \right),
+ at f}
+where $\mathbf g=(g_1,g_2,g_3)^T$ denotes the gravity vector.
+With this, the entire system of equations reads:
+ at f{eqnarray*}
+  \partial_t (\rho v_i) + \sum_{s=1}^d \frac{\partial(\rho v_i v_s +
+  \delta_{is} p)}{\partial x_s} &=& g_i \rho, \qquad i=1,\dots,d, \\
+  \partial_t \rho + \sum_{s=1}^d \frac{\partial(\rho v_s)}{\partial x_s} &=& 0,  \\
+  \partial_t E + \sum_{s=1}^d \frac{\partial((E+p)v_s)}{\partial x_s} &=&
+  \rho \mathbf g \cdot \mathbf v.
+ at f}
+These equations describe, respectively, the conservation of momentum,
+mass, and energy.
+The system is closed by a relation that defines the pressure: $p =
+(\gamma -1)(E-\frac{1}{2} \rho |\mathbf v|^2)$. For the constituents
+of air (mainly nitrogen and oxygen) and other diatomic gases, the ratio of
+specific heats is $\gamma=1.4$.
+
+This problem obviously falls into the class of vector-valued
+problem. A general overview of how to deal with these problems in
+deal.II can be found in the @ref vector_valued module.
+
+<h3>Discretization</h3>
+
+Discretization happens in the usual way, taking into account that this
+is a hyperbolic problem in the same style as the simple one discussed
+in step-12:
+We choose a finite element space $V_h$, and integrate our conservation law against
+our (vector-valued) test function $\mathbf{z} \in V_h$.  We then integrate by parts and approximate the
+boundary flux with a <i> numerical </i> flux $\mathbf{H}$,
+ at f{eqnarray*}
+&&\int_{\Omega} (\partial_t \mathbf{w}, \mathbf{z}) + (\nabla \cdot \mathbf{F}(\mathbf{w}), \mathbf{z}) \\
+&\approx &\int_{\Omega} (\partial_t \mathbf{w}, \mathbf{z}) - (\mathbf{F}(\mathbf{w}), \nabla \mathbf{z}) + h^{\eta}(\nabla \mathbf{w} , \nabla \mathbf{z}) + \int_{\partial \Omega} (\mathbf{H}(\mathbf{w}^+, \mathbf{w}^-, \mathbf{n}), \mathbf{z}^+),
+ at f}
+where a superscript $+$ denotes the interior trace of a function, and $-$ represents the outer trace.
+The diffusion term $h^{\eta}(\nabla \mathbf{w} , \nabla \mathbf{z})$ is introduced strictly for stability,
+ where $h$ is the mesh size and $\eta$ is a parameter prescribing how
+ much diffusion to add.
+
+On the boundary, we have to say what the outer trace $\mathbf{w}^-$ is.
+Depending on the boundary condition, we prescribe either of the following:
+<ul>
+<li> Inflow boundary: $\mathbf{w}^-$ is prescribed to be the desired value.
+<li> Supersonic outflow boundary: $\mathbf{w}^- = \mathbf{w}^+$
+<li> Subsonic outflow boundary: $\mathbf{w}^- = \mathbf{w}^+$ except that the energy variable
+is modified to support a prescribed pressure $p_o$, i.e.
+$\mathbf{w}^- =(\rho^+, \rho v_1^+, \dots, \rho v_d^+, p_o/(\gamma -1) + 0.5 \rho |\mathbf{v}^+|^2)$
+<li> Reflective boundary: we set $\mathbf{w}^-$ so that $(\mathbf{v}^+ + \mathbf{v}^-) \cdot \mathbf{n} = 0$ and
+$\rho^- = \rho^+,E^-=E^+$.
+</ul>
+
+More information on these issues can be found, for example, in Ralf
+Hartmann's PhD thesis ("Adaptive Finite Element Methods for the
+Compressible Euler Equations", PhD thesis, University of Heidelberg, 2002).
+
+We use a time stepping scheme to substitute the time derivative in the
+above equations. For simplicity, we define $ \mathbf{B}({\mathbf{w}_{n}})(\mathbf z) $ as the spatial residual at time step $n$ :
+
+ at f{eqnarray*}
+ \mathbf{B}(\mathbf{w}_{n})(\mathbf z)  &=&
+- \int_{\Omega} \left(\mathbf{F}(\mathbf{w}_n),
+\nabla\mathbf{z}\right) +  h^{\eta}(\nabla \mathbf{w}_n , \nabla \mathbf{z}) \\
+&& +
+\int_{\partial \Omega} \left(\mathbf{H}(\mathbf{w}_n^+,
+\mathbf{w}^-(\mathbf{w}_n^+), \mathbf{n}), \mathbf{z}\right)
+-
+\int_{\partial \Omega} \left(\mathbf{G}(\mathbf{w}_n),
+\mathbf{z}\right) .
+ at f}
+
+At each time step, our full discretization is thus
+that the residual applied to any test
+function $\mathbf z$ equals zero:
+ at f{eqnarray*}
+R(\mathbf{W}_{n+1})(\mathbf z) &=&
+\int_{\Omega} \left(\frac{{\mathbf w}_{n+1} - \mathbf{w}_n}{\delta t},
+\mathbf{z}\right)+
+\theta \mathbf{B}({\mathbf{w}}_{n+1}) +  (1-\theta) \mathbf{B}({\mathbf w}_{n}) \\
+&=& 0
+ at f}
+where $ \theta \in [0,1] $ and
+$\mathbf{w}_i = \sum_k \mathbf{W}_i^k \mathbf{\phi}_k$. Choosing
+$\theta=0$ results in the explicit (forward) Euler scheme, $\theta=1$
+in the stable implicit (backward) Euler scheme, and $\theta=\frac 12$
+in the Crank-Nicolson scheme.
+
+In the implementation below, we choose the Lax-Friedrichs flux for the
+function $\mathbf H$, i.e.  $\mathbf{H}(\mathbf{a},\mathbf{b},\mathbf{n}) =
+\frac{1}{2}(\mathbf{F}(\mathbf{a})\cdot \mathbf{n} +
+\mathbf{F}(\mathbf{b})\cdot \mathbf{n} + \alpha (\mathbf{a} - \mathbf{b}))$,
+where $\alpha$ is either a fixed number specified in the input file, or where
+$\alpha$ is a mesh dependent value. In the latter case, it is chosen as
+$\frac{h}{2\delta T}$ with $h$ the diameter of the face to which the flux is
+applied, and $\delta T$ the current time step.
+
+With these choices, equating the residual to zero results in a
+nonlinear system of equations $R(\mathbf{W}_{n+1})=0$. We solve this nonlinear system by a
+Newton iteration (in the same way as explained in step-15), i.e. by iterating
+ at f{eqnarray*}
+R'(\mathbf{W}^k_{n+1},\delta \mathbf{W}_{n+1}^k)(\mathbf z) & = & -
+R(\mathbf{W}^{k}_{n+1})(\mathbf z) \qquad \qquad \forall \mathbf z\in V_h \\
+\mathbf{W}^{k+1}_{n+1} &=& \mathbf{W}^k_{n+1} + \delta \mathbf{W}^k_{n+1},
+ at f}
+until $|R(\mathbf{W}^k_{n+1})|$ (the residual) is sufficiently small. By
+testing with the nodal basis of a finite element space instead of all
+$\mathbf z$, we arrive at a linear system for $\delta \mathbf W$:
+ at f{eqnarray*}
+\mathbf R'(\mathbf{W}^k_{n+1})\delta \mathbf{W}^k_{n+1} & = & -
+\mathbf R(\mathbf{W}^{k}_{n+1}).
+ at f}
+This linear system is, in general, neither symmetric nor has any
+particular definiteness properties. We will either use a direct solver
+or Trilinos' GMRES implementation to solve it. As will become apparent from
+the <a href="#Results">results shown below</a>, this fully implicit iteration
+converges very rapidly (typically in 3 steps) and with the quadratic
+convergence order expected from a Newton method.
+
+
+<h3> Automatic differentiation </h3>
+
+Since computing the Jacobian matrix $\mathbf R'(\mathbf W^k)$ is a
+terrible beast, we use an automatic differentiation package, Sacado,
+to do this.  Sacado is a package within the <a
+href="http://trilinos.sandia.gov" target="_top">Trilinos</a> framework
+and offers a C++ template class <code>Sacado::Fad::DFad</code>
+(<code>Fad</code> standing for "forward automatic
+differentiation") that supports basic arithmetic operators and
+functions such as <code> sqrt, sin, cos, pow, </code> etc. In order to
+use this feature, one declares a collection of variables of this type
+and then denotes some of this collection as degrees of freedom, the rest of
+the variables being functions of the independent variables.  These
+variables are used in an algorithm, and as the variables are used,
+their sensitivities with respect to the degrees of freedom are
+continuously updated.
+
+One can imagine that for the full Jacobian matrix as a whole,
+this could be prohibitively expensive: the number of independent variables are
+the $\mathbf W^k$, the dependent variables the elements of the vector $\mathbf
+R(\mathbf W^k)$. Both of these vectors can easily have tens of thousands of
+elements or more.  However, it is important to note that not all elements of
+$\mathbf R$ depend on all elements of $\mathbf W^k$: in fact, an entry in
+$\mathbf R$ only depends on an element of $\mathbf W^k$ if the two
+corresponding shape functions overlap and couple in the weak form.
+
+Specifically, it is wise to define a minimum set of
+independent AD variables that the residual on the current cell may possibly
+depend on: on every element, we define those variables as
+independent that correspond to the degrees of freedom defined on this
+cell (or, if we have to compute jump terms between cells, that
+correspond to degrees of freedom defined on either of the two adjacent
+cells), and the dependent variables are the elements of the local
+residual vector. Not doing this, i.e. defining <i>all</i> elements of
+$\mathbf W^k$ as independent, will result a very expensive computation
+of a lot of zeros: the elements of the local residual vector are
+independent of almost all elements of the solution vector, and
+consequently their derivatives are zero; however, trying to compute
+these zeros can easily take 90% or more of the compute time of the
+entire program, as shown in an experiment inadvertently made by a student a few
+years after this program was first written.
+
+
+Coming back to the question of computing the Jacobian automatically:
+The author has used this approach side by side with a hand coded Jacobian for
+the incompressible Navier-Stokes problem and found the Sacado approach to be
+just as fast as using a hand coded Jacobian, but infinitely simpler and less
+error prone: Since using the auto-differentiation requires only that one code
+the residual $R(\mathbf{W})$, ensuring code correctness and maintaining code
+becomes tremendously more simple -- the Jacobian matrix $\mathbf R'$ is
+computed by essentially the same code that also computes the residual $\mathbf
+R$.
+
+All this said, here's a very simple example showing how Sacado can be
+used:
+
+ at code
+#include <Sacado.hpp>
+#include <iostream>
+
+typedef Sacado::Fad::DFad<double> fad_double;
+
+main() {
+
+  fad_double a,b,c;
+
+  a = 1; b = 2;
+
+  a.diff(0,2);  // Set a to be dof 0, in a 2-dof system.
+
+  b.diff(1,2);  // Set b to be dof 1, in a 2-dof system.
+
+  c = 2*a+cos(a*b);
+
+  double *derivs = &c.fastAccessDx(0); // Access derivatives
+
+  std::cout << "dc/da = " << derivs[0] << ", dc/db=" << derivs[1] << std::endl;
+
+}
+ at endcode
+
+The output are the derivatives $\frac{\partial c(a,b)}{\partial a},
+\frac{\partial c(a,b)}{\partial b}$ of $c(a,b)=2a+\cos(ab)$ at $a=1,b=2$.
+
+It should be noted that Sacado provides more auto-differentiation capabilities than the small subset
+used in this program.  However, understanding the example above is
+enough to understand the use of Sacado in this Euler flow program.
+
+<h3> Trilinos solvers </h3>
+The program uses either the Aztec iterative solvers, or the Amesos
+sparse direct solver, both provided by
+the Trilinos package.  This package is inherently designed to be used in a parallel program, however,
+it may be used in serial just as easily, as is done here.  The Epetra package is the basic
+vector/matrix library upon which the solvers are built.  This very powerful package can be used
+to describe the parallel distribution of a vector, and to define sparse matrices that operate
+on these vectors.  Please view the commented code for more details on how these solvers are used
+within the example.
+
+<h3> Adaptivity </h3>
+The example uses an ad hoc refinement indicator that shows some usefulness in shock-type problems, and
+in the downhill flow example included.  We refine according to the squared gradient of the density.
+Hanging nodes are handled by computing the numerical flux across cells that are of differing
+refinement levels, rather than using the ConstraintMatrix class as in
+all other tutorial programs so far.  In this way, the example combines
+the continuous and DG methodologies. It also simplifies the generation
+of the Jacobian because we do not have to track constrained degrees of
+freedom through the automatic differentiation used to compute it.
+
+Further, we enforce a maximum number of refinement levels to keep refinement under check.  It is the
+author's experience that for adaptivity for a time dependent problem, refinement can easily lead the simulation to
+a screeching halt, because of time step restrictions if the mesh
+becomes too fine in any part of the domain, if care is not taken.  The amount of refinement is
+limited in the example by letting the user specify the
+maximum level of refinement that will be present anywhere in the mesh.  In this way, refinement
+tends not to slow the simulation to a halt.  This, of course, is purely a heuristic strategy, and
+if the author's advisor heard about it, the author would likely be exiled forever from the finite
+ element error estimation community.
+
+<h3>Input deck, initial and boundary conditions</h3>
+
+We use an input file deck to drive the simulation.  In this way, we can alter the boundary conditions
+and other important properties of the simulation without having to recompile.  For more information on
+the format, look at the <a href="#Results">results section</a>, where we
+describe an example input file in more detail.
+
+In previous example programs, we have usually hard-coded the initial
+and boundary conditions. In this program, we instead use the
+expression parser class FunctionParser so that we can specify a
+generic expression in the input file and have it parsed at run time —
+this way, we can change initial conditions without the need to
+recompile the program. Consequently, no classes named
+InitialConditions or BoundaryConditions will be declared in the
+program below.
+
+
+<h3>Implementation</h3>
+
+The implementation of this program is split into three essential parts:
+<ul>
+  <li>The <code>EulerEquations</code> class that encapsulates everything that
+  completely describes the specifics of the Euler equations. This includes the
+  flux matrix $\mathbf F(\mathbf W)$, the numerical flux $\mathbf F(\mathbf
+  W^+,\mathbf W^-,\mathbf n)$, the right hand side $\mathbf G(\mathbf W)$,
+  boundary conditions, refinement indicators, postprocessing the output, and
+  similar things that require knowledge of the meaning of the individual
+  components of the solution vectors and the equations.
+
+  <li>A namespace that deals with everything that has to do with run-time
+  parameters.
+
+  <li>The <code>ConservationLaw</code> class that deals with time stepping,
+  outer nonlinear and inner linear solves, assembling the linear systems, and
+  the top-level logic that drives all this.
+</ul>
+
+The reason for this approach is that it separates the various concerns in a
+program: the <code>ConservationLaw</code> is written in such a way that it
+would be relatively straightforward to adapt it to a different set of
+equations: One would simply re-implement the members of the
+<code>EulerEquations</code> class for some other hyperbolic equation, or
+augment the existing equations by additional ones (for example by advecting
+additional variables, or by adding chemistry, etc). Such modifications,
+however, would not affect the time stepping, or the nonlinear solvers if
+correctly done, and consequently nothing in the <code>ConservationLaw</code>
+would have to be modified.
+
+Similarly, if we wanted to improve on the linear or nonlinear solvers, or on
+the time stepping scheme (as hinted at at the end of the <a
+href="#Results">results section</a>), then this would not require changes in
+the <code>EulerEquations</code> at all.
+
diff --git a/examples/step-33/doc/kind b/examples/step-33/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-33/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-33/doc/results.dox b/examples/step-33/doc/results.dox
new file mode 100644
index 0000000..f25a996
--- /dev/null
+++ b/examples/step-33/doc/results.dox
@@ -0,0 +1,291 @@
+<a name="Results"></a>
+<h1>Results</h1>
+
+We run the problem with the mesh <code>slide.inp</code> (this file is in the
+same directory as the source code for this program) and the following input
+deck (available as <code>input.prm</code> in the same directory):
+ at verbatim
+# Listing of Parameters
+# ---------------------
+
+# The input grid
+set mesh = slide.inp
+
+# Stabilization parameter
+set diffusion power = 2.0
+
+# --------------------------------------------------
+# Boundary conditions
+# We may specify boundary conditions for up to MAX_BD boundaries.
+# Your .inp file should have these boundaries designated.
+subsection boundary_1
+  set no penetration = true # reflective boundary condition
+end
+
+subsection boundary_2
+  # outflow boundary
+  # set w_2 = pressure
+  # set w_2 value = 1.5 - y
+end
+
+subsection boundary_3
+  set no penetration = true # reflective
+  # set w_3 = pressure
+  # set w_3 value = 1.0
+end
+
+subsection boundary_4
+  set no penetration = true #reflective
+end
+
+# --------------------------------------------------
+# Initial Conditions
+# We set the initial conditions of the conservative variables.  These lines
+# are passed to the expression parsing function.  You should use x,y,z for
+# the coordinate variables.
+
+subsection initial condition
+  set w_0 value = 0
+  set w_1 value = 0
+  set w_2 value = 10*(x<-0.7)*(y> 0.3)*(y< 0.45) + (1-(x<-0.7)*(y> 0.3)*(y< 0.45))*1.0
+  set w_3 value = (1.5-(1.0*1.0*y))/0.4
+end
+
+# --------------------------------------------------
+# Time stepping control
+subsection time stepping
+  set final time = 10.0 # simulation end time
+  set time step  = 0.02 # simulation time step
+  set theta scheme value = 0.5
+end
+
+subsection linear solver
+  set output         = quiet
+  set method         = gmres
+  set ilut fill      = 1.5
+  set ilut drop tolerance = 1e-6
+  set ilut absolute tolerance = 1e-6
+  set ilut relative tolerance = 1.0
+end
+
+# --------------------------------------------------
+# Output frequency and kind
+subsection output
+  set step           = 0.01
+  set schlieren plot = true
+end
+
+# --------------------------------------------------
+# Refinement control
+subsection refinement
+  set refinement = true # none only other option
+  set shock value = 1.5
+  set shock levels = 1 # how many levels of refinement to allow
+end
+
+# --------------------------------------------------
+# Flux parameters
+subsection flux
+ set stab = constant
+ #set stab value = 1.0
+end
+ at endverbatim
+
+When we run the program, we get the following kind of output:
+ at verbatim
+...
+T=0.14
+   Number of active cells:       1807
+   Number of degrees of freedom: 7696
+
+   NonLin Res     Lin Iter       Lin Res
+   _____________________________________
+   7.015e-03        0008        3.39e-13
+   2.150e-05        0008        1.56e-15
+   2.628e-09        0008        5.09e-20
+   5.243e-16        (converged)
+
+T=0.16
+   Number of active cells:       1807
+   Number of degrees of freedom: 7696
+
+   NonLin Res     Lin Iter       Lin Res
+   _____________________________________
+   7.145e-03        0008        3.80e-13
+   2.548e-05        0008        7.20e-16
+   4.063e-09        0008        2.49e-19
+   5.970e-16        (converged)
+
+T=0.18
+   Number of active cells:       1807
+   Number of degrees of freedom: 7696
+
+   NonLin Res     Lin Iter       Lin Res
+   _____________________________________
+   7.395e-03        0008        6.69e-13
+   2.867e-05        0008        1.33e-15
+   4.091e-09        0008        3.35e-19
+   5.617e-16        (converged)
+...
+ at endverbatim
+
+This output reports the progress of the Newton iterations and the time
+stepping. Note that our implementation of the Newton iteration indeed shows
+the expected quadratic convergence order: the norm of the nonlinear residual
+in each step is roughly the norm of the previous step squared. This leads to
+the very rapid convergence we can see here. This holds until
+times up to $t=1.9$ at which time the nonlinear iteration reports a
+lack of convergence:
+ at verbatim
+...
+
+T=1.88
+   Number of active cells:       2119
+   Number of degrees of freedom: 9096
+
+   NonLin Res     Lin Iter       Lin Res
+   _____________________________________
+   2.251e-01        0012        9.78e-12
+   5.698e-03        0012        2.04e-13
+   3.896e-05        0012        1.48e-15
+   3.915e-09        0012        1.94e-19
+   8.800e-16        (converged)
+
+T=1.9
+   Number of active cells:       2140
+   Number of degrees of freedom: 9184
+
+   NonLin Res     Lin Iter       Lin Res
+   _____________________________________
+   2.320e-01        0013        3.94e-12
+   1.235e-01        0016        6.62e-12
+   8.494e-02        0016        6.05e-12
+   1.199e+01        0026        5.72e-10
+   1.198e+03        0002        1.20e+03
+   7.030e+03        0001        nan
+   7.030e+03        0001        nan
+   7.030e+03        0001        nan
+   7.030e+03        0001        nan
+   7.030e+03        0001        nan
+   7.030e+03        0001        nan
+
+
+----------------------------------------------------
+Exception on processing:
+
+--------------------------------------------------------
+An error occurred in line <2476> of file <\step-33.cc> in function
+    void Step33::ConservationLaw<dim>::run() [with int dim = 2]
+The violated condition was:
+    nonlin_iter <= 10
+The name and call sequence of the exception was:
+    ExcMessage ("No convergence in nonlinear solver")
+Additional Information:
+No convergence in nonlinear solver
+--------------------------------------------------------
+
+Aborting!
+----------------------------------------------------
+ at endverbatim
+
+We may find out the cause and possible remedies by looking at the animation of the solution.
+
+The result of running these computations is a bunch of output files that we
+can pass to our visualization program of choice. When we collate them into a
+movie, the results of last several time steps looks like this:
+
+<img src="https://www.dealii.org/images/steps/developer/step-33.oscillation.gif " alt="" height="300">
+
+As we see, when the heavy mass of fluid hits the left bottom corner,
+some oscillation occurs and lead to the divergence of the iteration. A lazy solution to
+this issue is add more viscosity. If we set the diffusion power $\eta = 1.5$ instead of $2.0$,
+the simulation would be able to survive this crisis. Then, the result looks like this:
+
+
+<img src="https://www.dealii.org/images/steps/developer/step-33.slide.ed2.gif " alt="" height="300">
+
+The heavy mass of fluid is drawn down the slope by gravity, where
+it collides with the ski lodge and is flung into the air!  Hopefully everyone
+escapes! And also, we can see the boundary between heavy mass and light mass blur quickly
+due to the artificial viscosity.
+
+We can also visualize the evolution of the adaptively refined grid:
+
+<img src="https://www.dealii.org/images/steps/developer/step-33.slide.adapt.ed2.gif " alt="" height="300">
+
+The adaptivity follows and precedes the flow pattern, based on the heuristic
+refinement scheme discussed above.
+
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+<h4>Stabilization</h4>
+
+The numerical scheme we have chosen is not particularly
+stable when the artificial viscosity is samll while is too diffusive when
+the artificial viscosity is large. Furthermore, it is known there are more
+advanced techniques to stabilize the solution, for example streamline
+diffusion, least-squares stabilization terms, entropy viscosity.
+
+
+
+<h4>Better linear solvers</h4>
+
+While the Newton method as a nonlinear solver appears to work very
+well if the time step is small enough, the linear solver can be
+improved. For example, in the current scheme whenever we use an
+iterative solver, an ILU is computed anew for each Newton step;
+likewise, for the direct solver, an LU decomposition of the Newton
+matrix is computed in each step. This is obviously wasteful: from one
+Newton step to another, and probably also between time steps, the
+Newton matrix does not radically change: an ILU or a sparse LU
+decomposition for one Newton step is probably still a very good
+preconditioner for the next Newton or time step. Avoiding the
+recomputation would therefore be a good way to reduce the amount of
+compute time.
+
+One could drive this a step further: since close to convergence the
+Newton matrix changes only a little bit, one may be able to define a
+quasi-Newton scheme where we only re-compute the residual (i.e. the
+right hand side vector) in each Newton iteration, and re-use the
+Newton matrix. The resulting scheme will likely not be of quadratic
+convergence order, and we have to expect to do a few more nonlinear
+iterations; however, given that we don't have to spend the time to
+build the Newton matrix each time, the resulting scheme may well be
+faster.
+
+
+<h4>Cache the explicit part of residual</h4>
+
+The residual calulated in ConservationLaw::assemble_cell_term function
+reads 
+   $R_i = \left(\frac{\mathbf{w}^{k}_{n+1} - \mathbf{w}_n}{\delta t}
+    , \mathbf{z}_i \right)_K  +
+      \theta \mathbf{B}({\mathbf{w}^{k}_{n+1}})(\mathbf{z}_i)_K +
+      (1-\theta) \mathbf{B}({\mathbf{w}_{n}}) (\mathbf{z}_i)_K $
+This means that we calculate the spatial residual twice at one Newton
+iteration step: once respect to the current solution $\mathbf{w}^{k}_{n+1}$
+and once more respect to the last time step solution $\mathbf{w}_{n}$ which
+remains the same during all Newton iterations through one timestep.
+Cache up the explicit part of residual
+ $ \mathbf{B}({\mathbf{w}_{n}}) (\mathbf{z}_i)_K$
+during Newton iteration will save lots of labor.
+
+
+<h4>Other conservation laws</h4>
+
+Finally, as a direction beyond the immediate solution of the Euler
+equations, this program tries very hard to separate the implementation
+of everything that is specific to the Euler equations into one class
+(the <code>EulerEquation</code> class), and everything that is
+specific to assembling the matrices and vectors, nonlinear and linear
+solvers, and the general top-level logic into another (the
+<code>ConservationLaw</code> class).
+
+By replacing the definitions of flux matrices and numerical fluxes in
+this class, as well as the various other parts defined there, it
+should be possible to apply the <code>ConservationLaw</code> class to
+other hyperbolic conservation laws as well.
diff --git a/examples/step-33/doc/tooltip b/examples/step-33/doc/tooltip
new file mode 100644
index 0000000..869bdff
--- /dev/null
+++ b/examples/step-33/doc/tooltip
@@ -0,0 +1 @@
+Hyperbolic conservation laws: the Euler equations of gas dynamics.
diff --git a/examples/step-33/input.prm b/examples/step-33/input.prm
new file mode 100644
index 0000000..b9e2e73
--- /dev/null
+++ b/examples/step-33/input.prm
@@ -0,0 +1,84 @@
+# Listing of Parameters
+# ---------------------
+
+# The input grid 
+set mesh = slide.inp
+
+# Stabilization parameter
+set diffusion power = 2.0
+
+# --------------------------------------------------
+# Boundary conditions
+# We may specify boundary conditions for up to MAX_BD boundaries.
+# Your .inp file should have these boundaries designated.
+subsection boundary_1
+  set no penetration = true # reflective boundary condition
+end
+
+subsection boundary_2
+  # outflow boundary
+  # set w_2 = pressure
+  # set w_2 value = 1.5 - y
+end
+
+subsection boundary_3
+  set no penetration = true # reflective
+  # set w_3 = pressure
+  # set w_3 value = 1.0
+end
+
+subsection boundary_4
+  set no penetration = true #reflective
+end
+
+# --------------------------------------------------
+# Initial Conditions
+# We set the initial conditions of the conservative variables.  These lines
+# are passed to the expression parsing function.  You should use x,y,z for
+# the coordinate variables.
+
+subsection initial condition
+  set w_0 value = 0
+  set w_1 value = 0
+  set w_2 value = 10*(x<-0.7)*(y> 0.3)*(y< 0.45) + (1-(x<-0.7)*(y> 0.3)*(y< 0.45))*1.0
+  set w_3 value = (1.5-(1.0*1.0*y))/0.4
+end
+
+# --------------------------------------------------
+# Time stepping control
+subsection time stepping
+  set final time = 10.0 # simulation end time
+  set time step  = 0.02 # simulation time step
+  set theta scheme value = 0.5
+end
+
+subsection linear solver
+  set output         = quiet
+  set method         = gmres
+  set ilut fill      = 1.5
+  set ilut drop tolerance = 1e-6
+  set ilut absolute tolerance = 1e-6
+  set ilut relative tolerance = 1.0
+end
+
+# --------------------------------------------------
+# Output frequency and kind
+subsection output
+  set step           = 0.01
+  set schlieren plot = true
+end
+
+# --------------------------------------------------
+# Refinement control
+subsection refinement
+  set refinement = true # none only other option
+  set shock value = 1.5
+  set shock levels = 1 # how many levels of refinement to allow
+end
+
+# --------------------------------------------------
+# Flux parameters
+subsection flux
+ set stab = constant
+ #set stab value = 1.0
+end
diff --git a/examples/step-33/slide.inp b/examples/step-33/slide.inp
new file mode 100644
index 0000000..66684bc
--- /dev/null
+++ b/examples/step-33/slide.inp
@@ -0,0 +1,3560 @@
+1731 1827 0 0 0
+195 -0.964976 0.485465 0.000000
+133 -0.970149 0.500000 0.000000
+67 -1.000000 0.500000 0.000000
+180 -0.978723 0.478723 0.000000
+196 -0.945882 0.477585 0.000000
+132 -0.940299 0.500000 0.000000
+179 -0.957447 0.457447 0.000000
+197 -0.919731 0.466897 0.000000
+131 -0.910448 0.500000 0.000000
+178 -0.936170 0.436170 0.000000
+181 0.150000 -0.500000 0.000000
+187 0.150000 -0.471429 0.000000
+198 0.119014 -0.470800 0.000000
+185 0.120000 -0.500000 0.000000
+188 0.150000 -0.442857 0.000000
+199 0.117550 -0.441479 0.000000
+189 0.150000 -0.414286 0.000000
+200 0.117242 -0.412206 0.000000
+190 0.150000 -0.385714 0.000000
+201 0.118129 -0.383064 0.000000
+191 0.150000 -0.357143 0.000000
+202 0.120210 -0.354337 0.000000
+192 0.150000 -0.328571 0.000000
+203 0.121793 -0.325840 0.000000
+186 0.150000 -0.300000 0.000000
+204 0.122717 -0.296869 0.000000
+206 0.153091 -0.267708 0.000000
+205 0.123759 -0.266752 0.000000
+193 0.183333 -0.300000 0.000000
+207 0.184560 -0.267950 0.000000
+194 0.216667 -0.300000 0.000000
+208 0.216606 -0.268111 0.000000
+1 0.250000 -0.300000 0.000000
+209 0.248350 -0.268410 0.000000
+211 0.279437 -0.298917 0.000000
+210 0.278782 -0.268564 0.000000
+3 0.250000 -0.328571 0.000000
+212 0.279638 -0.328111 0.000000
+4 0.250000 -0.357143 0.000000
+213 0.279710 -0.356801 0.000000
+5 0.250000 -0.385714 0.000000
+214 0.279677 -0.385395 0.000000
+6 0.250000 -0.414286 0.000000
+215 0.279641 -0.413938 0.000000
+7 0.250000 -0.442857 0.000000
+216 0.279666 -0.442534 0.000000
+8 0.250000 -0.471429 0.000000
+217 0.279811 -0.471266 0.000000
+2 0.250000 -0.500000 0.000000
+10 0.280000 -0.500000 0.000000
+34 1.000000 0.500000 0.000000
+68 0.970149 0.500000 0.000000
+218 0.970096 0.469780 0.000000
+66 1.000000 0.469697 0.000000
+69 0.940299 0.500000 0.000000
+219 0.940231 0.469840 0.000000
+70 0.910448 0.500000 0.000000
+220 0.910371 0.469856 0.000000
+71 0.880597 0.500000 0.000000
+221 0.880527 0.469909 0.000000
+72 0.850746 0.500000 0.000000
+222 0.850684 0.469926 0.000000
+73 0.820896 0.500000 0.000000
+223 0.820842 0.469978 0.000000
+74 0.791045 0.500000 0.000000
+224 0.791001 0.470016 0.000000
+75 0.761194 0.500000 0.000000
+225 0.761159 0.470055 0.000000
+76 0.731343 0.500000 0.000000
+226 0.731316 0.470091 0.000000
+77 0.701493 0.500000 0.000000
+227 0.701472 0.470118 0.000000
+78 0.671642 0.500000 0.000000
+228 0.671627 0.470134 0.000000
+79 0.641791 0.500000 0.000000
+229 0.641780 0.470142 0.000000
+80 0.611940 0.500000 0.000000
+230 0.611934 0.470065 0.000000
+81 0.582090 0.500000 0.000000
+231 0.582089 0.470002 0.000000
+82 0.552239 0.500000 0.000000
+232 0.552248 0.469853 0.000000
+83 0.522388 0.500000 0.000000
+233 0.522413 0.469718 0.000000
+84 0.492537 0.500000 0.000000
+234 0.492577 0.469656 0.000000
+85 0.462687 0.500000 0.000000
+235 0.462743 0.469571 0.000000
+86 0.432836 0.500000 0.000000
+236 0.432895 0.469630 0.000000
+87 0.402985 0.500000 0.000000
+237 0.403046 0.469669 0.000000
+88 0.373134 0.500000 0.000000
+238 0.373191 0.469745 0.000000
+89 0.343284 0.500000 0.000000
+239 0.343335 0.469723 0.000000
+90 0.313433 0.500000 0.000000
+240 0.313478 0.469741 0.000000
+91 0.283582 0.500000 0.000000
+241 0.283622 0.469783 0.000000
+92 0.253731 0.500000 0.000000
+242 0.253768 0.469742 0.000000
+93 0.223881 0.500000 0.000000
+243 0.223918 0.469737 0.000000
+94 0.194030 0.500000 0.000000
+244 0.194064 0.469810 0.000000
+95 0.164179 0.500000 0.000000
+245 0.164210 0.469881 0.000000
+96 0.134328 0.500000 0.000000
+246 0.134355 0.469940 0.000000
+97 0.104478 0.500000 0.000000
+247 0.104500 0.469997 0.000000
+98 0.074627 0.500000 0.000000
+248 0.074645 0.470051 0.000000
+99 0.044776 0.500000 0.000000
+249 0.044790 0.470093 0.000000
+100 0.014925 0.500000 0.000000
+250 0.014935 0.470120 0.000000
+101 -0.014925 0.500000 0.000000
+251 -0.014918 0.470135 0.000000
+102 -0.044776 0.500000 0.000000
+252 -0.044771 0.470143 0.000000
+103 -0.074627 0.500000 0.000000
+253 -0.074623 0.470146 0.000000
+104 -0.104478 0.500000 0.000000
+254 -0.104496 0.469979 0.000000
+105 -0.134328 0.500000 0.000000
+255 -0.134359 0.469935 0.000000
+106 -0.164179 0.500000 0.000000
+256 -0.164225 0.469839 0.000000
+107 -0.194030 0.500000 0.000000
+257 -0.194083 0.469797 0.000000
+108 -0.223881 0.500000 0.000000
+258 -0.223940 0.469753 0.000000
+109 -0.253731 0.500000 0.000000
+259 -0.253798 0.469819 0.000000
+110 -0.283582 0.500000 0.000000
+260 -0.283647 0.470019 0.000000
+111 -0.313433 0.500000 0.000000
+261 -0.313491 0.470151 0.000000
+112 -0.343284 0.500000 0.000000
+262 -0.343348 0.470381 0.000000
+113 -0.373134 0.500000 0.000000
+263 -0.373195 0.470395 0.000000
+114 -0.402985 0.500000 0.000000
+264 -0.403049 0.470404 0.000000
+115 -0.432836 0.500000 0.000000
+265 -0.432937 0.470297 0.000000
+116 -0.462687 0.500000 0.000000
+266 -0.462848 0.469995 0.000000
+117 -0.492537 0.500000 0.000000
+267 -0.492807 0.469765 0.000000
+118 -0.522388 0.500000 0.000000
+268 -0.522760 0.469580 0.000000
+119 -0.552239 0.500000 0.000000
+269 -0.552730 0.469652 0.000000
+120 -0.582090 0.500000 0.000000
+270 -0.582662 0.469731 0.000000
+121 -0.611940 0.500000 0.000000
+271 -0.612594 0.469999 0.000000
+122 -0.641791 0.500000 0.000000
+272 -0.642647 0.470419 0.000000
+123 -0.671642 0.500000 0.000000
+273 -0.672638 0.470543 0.000000
+124 -0.701493 0.500000 0.000000
+274 -0.702968 0.469800 0.000000
+125 -0.731343 0.500000 0.000000
+275 -0.733232 0.468149 0.000000
+126 -0.761194 0.500000 0.000000
+276 -0.765399 0.466448 0.000000
+127 -0.791045 0.500000 0.000000
+277 -0.795702 0.469563 0.000000
+128 -0.820896 0.500000 0.000000
+278 -0.822797 0.472070 0.000000
+129 -0.850746 0.500000 0.000000
+279 -0.850723 0.467038 0.000000
+130 -0.880597 0.500000 0.000000
+303 -0.887028 0.454038 0.000000
+11 0.310000 -0.500000 0.000000
+280 0.309694 -0.471123 0.000000
+12 0.340000 -0.500000 0.000000
+281 0.339686 -0.471033 0.000000
+13 0.370000 -0.500000 0.000000
+282 0.369721 -0.471055 0.000000
+14 0.400000 -0.500000 0.000000
+283 0.399756 -0.470957 0.000000
+15 0.430000 -0.500000 0.000000
+284 0.429805 -0.470822 0.000000
+16 0.460000 -0.500000 0.000000
+285 0.459835 -0.470712 0.000000
+17 0.490000 -0.500000 0.000000
+286 0.489873 -0.470554 0.000000
+18 0.520000 -0.500000 0.000000
+287 0.519906 -0.470379 0.000000
+19 0.550000 -0.500000 0.000000
+288 0.549930 -0.470248 0.000000
+20 0.580000 -0.500000 0.000000
+289 0.579948 -0.470155 0.000000
+21 0.610000 -0.500000 0.000000
+290 0.609962 -0.470091 0.000000
+22 0.640000 -0.500000 0.000000
+291 0.639972 -0.470050 0.000000
+23 0.670000 -0.500000 0.000000
+292 0.669980 -0.470026 0.000000
+24 0.700000 -0.500000 0.000000
+293 0.699986 -0.470013 0.000000
+25 0.730000 -0.500000 0.000000
+294 0.729990 -0.470006 0.000000
+26 0.760000 -0.500000 0.000000
+295 0.759994 -0.470003 0.000000
+27 0.790000 -0.500000 0.000000
+296 0.789980 -0.469916 0.000000
+28 0.820000 -0.500000 0.000000
+297 0.819956 -0.469877 0.000000
+29 0.850000 -0.500000 0.000000
+298 0.849929 -0.469833 0.000000
+30 0.880000 -0.500000 0.000000
+299 0.879924 -0.469805 0.000000
+31 0.910000 -0.500000 0.000000
+300 0.909913 -0.469792 0.000000
+32 0.940000 -0.500000 0.000000
+301 0.939922 -0.469775 0.000000
+33 0.970000 -0.500000 0.000000
+302 0.969956 -0.469740 0.000000
+9 1.000000 -0.500000 0.000000
+35 1.000000 -0.469697 0.000000
+177 -0.914894 0.414894 0.000000
+176 -0.893617 0.393617 0.000000
+304 -0.867787 0.420844 0.000000
+175 -0.872340 0.372340 0.000000
+305 -0.847479 0.396796 0.000000
+174 -0.851064 0.351064 0.000000
+306 -0.826567 0.374510 0.000000
+173 -0.829787 0.329787 0.000000
+307 -0.806205 0.352141 0.000000
+172 -0.808511 0.308511 0.000000
+308 -0.786305 0.329477 0.000000
+171 -0.787234 0.287234 0.000000
+309 -0.766015 0.307496 0.000000
+170 -0.765957 0.265957 0.000000
+310 -0.745195 0.286345 0.000000
+169 -0.744681 0.244681 0.000000
+311 -0.723868 0.265704 0.000000
+168 -0.723404 0.223404 0.000000
+312 -0.702394 0.245107 0.000000
+167 -0.702128 0.202128 0.000000
+313 -0.680976 0.224300 0.000000
+166 -0.680851 0.180851 0.000000
+314 -0.659597 0.203254 0.000000
+165 -0.659574 0.159574 0.000000
+315 -0.638233 0.182030 0.000000
+164 -0.638298 0.138298 0.000000
+316 -0.616915 0.160630 0.000000
+163 -0.617021 0.117021 0.000000
+317 -0.595665 0.139192 0.000000
+162 -0.595745 0.095745 0.000000
+318 -0.574431 0.117764 0.000000
+161 -0.574468 0.074468 0.000000
+319 -0.553204 0.096309 0.000000
+160 -0.553191 0.053191 0.000000
+320 -0.531840 0.075023 0.000000
+159 -0.531915 0.031915 0.000000
+321 -0.510492 0.053733 0.000000
+158 -0.510638 0.010638 0.000000
+322 -0.489127 0.032461 0.000000
+157 -0.489362 -0.010638 0.000000
+323 -0.467791 0.011178 0.000000
+156 -0.468085 -0.031915 0.000000
+324 -0.446536 -0.010172 0.000000
+155 -0.446809 -0.053191 0.000000
+325 -0.425240 -0.031466 0.000000
+154 -0.425532 -0.074468 0.000000
+326 -0.404012 -0.052824 0.000000
+153 -0.404255 -0.095745 0.000000
+327 -0.382783 -0.074175 0.000000
+152 -0.382979 -0.117021 0.000000
+328 -0.361545 -0.095511 0.000000
+151 -0.361702 -0.138298 0.000000
+329 -0.340288 -0.116831 0.000000
+150 -0.340426 -0.159574 0.000000
+330 -0.319034 -0.138159 0.000000
+149 -0.319149 -0.180851 0.000000
+331 -0.297766 -0.159484 0.000000
+148 -0.297872 -0.202128 0.000000
+332 -0.276489 -0.180797 0.000000
+147 -0.276596 -0.223404 0.000000
+333 -0.255087 -0.202105 0.000000
+146 -0.255319 -0.244681 0.000000
+334 -0.233674 -0.223466 0.000000
+145 -0.234043 -0.265957 0.000000
+335 -0.212119 -0.244916 0.000000
+144 -0.212766 -0.287234 0.000000
+336 -0.190657 -0.266392 0.000000
+143 -0.191489 -0.308511 0.000000
+337 -0.169261 -0.287934 0.000000
+142 -0.170213 -0.329787 0.000000
+338 -0.147854 -0.309624 0.000000
+141 -0.148936 -0.351064 0.000000
+339 -0.126431 -0.331364 0.000000
+140 -0.127660 -0.372340 0.000000
+340 -0.105256 -0.353368 0.000000
+139 -0.106383 -0.393617 0.000000
+341 -0.083924 -0.375263 0.000000
+138 -0.085106 -0.414894 0.000000
+342 -0.062097 -0.396824 0.000000
+137 -0.063830 -0.436170 0.000000
+343 -0.040132 -0.418076 0.000000
+136 -0.042553 -0.457447 0.000000
+344 -0.019646 -0.439891 0.000000
+135 -0.021277 -0.478723 0.000000
+345 -0.002475 -0.461393 0.000000
+134 0.000000 -0.500000 0.000000
+346 0.009599 -0.478226 0.000000
+182 0.030000 -0.500000 0.000000
+347 0.029922 -0.474701 0.000000
+183 0.060000 -0.500000 0.000000
+348 0.057397 -0.471689 0.000000
+184 0.090000 -0.500000 0.000000
+349 0.087960 -0.470463 0.000000
+36 1.000000 -0.439394 0.000000
+350 0.969923 -0.439465 0.000000
+37 1.000000 -0.409091 0.000000
+351 0.969894 -0.409159 0.000000
+38 1.000000 -0.378788 0.000000
+352 0.969889 -0.378860 0.000000
+39 1.000000 -0.348485 0.000000
+353 0.969858 -0.348548 0.000000
+40 1.000000 -0.318182 0.000000
+354 0.969843 -0.318237 0.000000
+41 1.000000 -0.287879 0.000000
+355 0.969809 -0.287925 0.000000
+42 1.000000 -0.257576 0.000000
+356 0.969784 -0.257613 0.000000
+43 1.000000 -0.227273 0.000000
+357 0.969757 -0.227302 0.000000
+44 1.000000 -0.196970 0.000000
+358 0.969734 -0.196993 0.000000
+45 1.000000 -0.166667 0.000000
+359 0.969717 -0.166684 0.000000
+46 1.000000 -0.136364 0.000000
+360 0.969707 -0.136377 0.000000
+47 1.000000 -0.106061 0.000000
+361 0.969701 -0.106070 0.000000
+48 1.000000 -0.075758 0.000000
+362 0.969699 -0.075764 0.000000
+49 1.000000 -0.045455 0.000000
+363 0.969698 -0.045459 0.000000
+50 1.000000 -0.015152 0.000000
+364 0.969697 -0.015155 0.000000
+51 1.000000 0.015152 0.000000
+365 0.969697 0.015149 0.000000
+52 1.000000 0.045455 0.000000
+366 0.969697 0.045453 0.000000
+53 1.000000 0.075758 0.000000
+367 0.969697 0.075757 0.000000
+54 1.000000 0.106061 0.000000
+368 0.969697 0.106060 0.000000
+55 1.000000 0.136364 0.000000
+369 0.969697 0.136363 0.000000
+56 1.000000 0.166667 0.000000
+370 0.969697 0.166666 0.000000
+57 1.000000 0.196970 0.000000
+371 0.969697 0.196970 0.000000
+58 1.000000 0.227273 0.000000
+372 0.969697 0.227273 0.000000
+59 1.000000 0.257576 0.000000
+373 0.969795 0.257643 0.000000
+60 1.000000 0.287879 0.000000
+374 0.969874 0.287995 0.000000
+61 1.000000 0.318182 0.000000
+375 0.969958 0.318338 0.000000
+62 1.000000 0.348485 0.000000
+376 0.969973 0.348658 0.000000
+63 1.000000 0.378788 0.000000
+377 0.970000 0.378980 0.000000
+64 1.000000 0.409091 0.000000
+378 0.970010 0.409285 0.000000
+65 1.000000 0.439394 0.000000
+379 0.970062 0.439531 0.000000
+503 -0.841822 0.439272 0.000000
+380 -0.823383 0.418799 0.000000
+381 -0.801366 0.398075 0.000000
+382 -0.781899 0.373291 0.000000
+383 -0.763952 0.349541 0.000000
+384 -0.745019 0.327062 0.000000
+385 -0.724690 0.306382 0.000000
+386 -0.703265 0.286716 0.000000
+387 -0.681523 0.267067 0.000000
+388 -0.660018 0.246574 0.000000
+389 -0.638443 0.225771 0.000000
+390 -0.616833 0.204679 0.000000
+391 -0.595551 0.183139 0.000000
+392 -0.574409 0.161470 0.000000
+393 -0.553222 0.139792 0.000000
+394 -0.531865 0.118362 0.000000
+395 -0.510475 0.096979 0.000000
+396 -0.489009 0.075696 0.000000
+397 -0.467590 0.054392 0.000000
+398 -0.446270 0.033017 0.000000
+399 -0.424906 0.011735 0.000000
+400 -0.403621 -0.009619 0.000000
+401 -0.382400 -0.031023 0.000000
+402 -0.361173 -0.052410 0.000000
+403 -0.339922 -0.073800 0.000000
+404 -0.318715 -0.095237 0.000000
+405 -0.297453 -0.116646 0.000000
+406 -0.276156 -0.138018 0.000000
+407 -0.254723 -0.159496 0.000000
+408 -0.233335 -0.180914 0.000000
+409 -0.211596 -0.202409 0.000000
+410 -0.189882 -0.223912 0.000000
+411 -0.168328 -0.245541 0.000000
+412 -0.146829 -0.267432 0.000000
+413 -0.125347 -0.289413 0.000000
+414 -0.103926 -0.311617 0.000000
+415 -0.082894 -0.334299 0.000000
+416 -0.061472 -0.356952 0.000000
+417 -0.038982 -0.378982 0.000000
+418 -0.016521 -0.400436 0.000000
+419 0.006313 -0.423591 0.000000
+421 0.051537 -0.441884 0.000000
+420 0.021625 -0.449713 0.000000
+422 0.085122 -0.440903 0.000000
+423 0.082947 -0.409815 0.000000
+424 0.086501 -0.379229 0.000000
+425 0.090943 -0.350788 0.000000
+426 0.094089 -0.322730 0.000000
+427 0.095265 -0.294581 0.000000
+428 0.094987 -0.265825 0.000000
+430 0.123468 -0.236299 0.000000
+429 0.093679 -0.236645 0.000000
+431 0.153728 -0.236070 0.000000
+432 0.184970 -0.235916 0.000000
+433 0.216576 -0.236122 0.000000
+434 0.247804 -0.236880 0.000000
+435 0.278352 -0.237921 0.000000
+437 0.308849 -0.268892 0.000000
+436 0.308561 -0.238863 0.000000
+438 0.309174 -0.298672 0.000000
+439 0.309365 -0.327764 0.000000
+440 0.309409 -0.356452 0.000000
+441 0.309369 -0.385004 0.000000
+442 0.309363 -0.413591 0.000000
+443 0.309512 -0.442324 0.000000
+444 0.940150 0.439639 0.000000
+445 0.910293 0.439719 0.000000
+446 0.880439 0.439764 0.000000
+447 0.850606 0.439851 0.000000
+448 0.820773 0.439905 0.000000
+449 0.790942 0.439997 0.000000
+450 0.761110 0.440072 0.000000
+451 0.731277 0.440144 0.000000
+452 0.701441 0.440206 0.000000
+453 0.671603 0.440248 0.000000
+454 0.641769 0.440131 0.000000
+455 0.611935 0.440030 0.000000
+456 0.582116 0.439803 0.000000
+457 0.552311 0.439544 0.000000
+458 0.522496 0.439372 0.000000
+459 0.492687 0.439151 0.000000
+460 0.462850 0.439112 0.000000
+461 0.433008 0.439157 0.000000
+462 0.403152 0.439190 0.000000
+463 0.373296 0.439261 0.000000
+464 0.343430 0.439326 0.000000
+465 0.313568 0.439388 0.000000
+466 0.283709 0.439386 0.000000
+467 0.253856 0.439367 0.000000
+468 0.223995 0.439416 0.000000
+469 0.194134 0.439557 0.000000
+470 0.164274 0.439658 0.000000
+471 0.134410 0.439807 0.000000
+472 0.104545 0.439947 0.000000
+473 0.074682 0.440045 0.000000
+474 0.044821 0.440112 0.000000
+475 0.014962 0.440171 0.000000
+476 -0.014897 0.440222 0.000000
+477 -0.044754 0.440258 0.000000
+478 -0.074665 0.440098 0.000000
+479 -0.104552 0.439944 0.000000
+480 -0.134459 0.439734 0.000000
+481 -0.164338 0.439555 0.000000
+482 -0.194223 0.439404 0.000000
+483 -0.224113 0.439388 0.000000
+484 -0.253960 0.439636 0.000000
+485 -0.283803 0.439999 0.000000
+486 -0.313653 0.440461 0.000000
+487 -0.343481 0.440757 0.000000
+488 -0.373319 0.440824 0.000000
+489 -0.403191 0.440764 0.000000
+490 -0.433152 0.440474 0.000000
+491 -0.463150 0.439907 0.000000
+492 -0.493171 0.439333 0.000000
+493 -0.523226 0.438998 0.000000
+494 -0.553219 0.439109 0.000000
+495 -0.583244 0.439382 0.000000
+496 -0.613191 0.439846 0.000000
+497 -0.642956 0.440951 0.000000
+498 -0.672668 0.441524 0.000000
+499 -0.702061 0.440023 0.000000
+500 -0.733531 0.435747 0.000000
+501 -0.771806 0.426884 0.000000
+553 -0.803350 0.441135 0.000000
+502 -0.822712 0.450765 0.000000
+504 0.939858 -0.439508 0.000000
+505 0.939812 -0.409239 0.000000
+506 0.939773 -0.378933 0.000000
+507 0.939746 -0.348626 0.000000
+508 0.939690 -0.318305 0.000000
+509 0.939650 -0.287984 0.000000
+510 0.939589 -0.257662 0.000000
+511 0.939539 -0.227343 0.000000
+512 0.939492 -0.197025 0.000000
+513 0.939452 -0.166709 0.000000
+514 0.939425 -0.136395 0.000000
+515 0.939409 -0.106084 0.000000
+516 0.939401 -0.075774 0.000000
+517 0.939397 -0.045467 0.000000
+518 0.939395 -0.015160 0.000000
+519 0.939394 0.015146 0.000000
+520 0.939394 0.045451 0.000000
+521 0.939394 0.075755 0.000000
+522 0.939394 0.106059 0.000000
+523 0.939394 0.136362 0.000000
+524 0.939394 0.166666 0.000000
+525 0.939394 0.196969 0.000000
+526 0.939509 0.227355 0.000000
+527 0.939655 0.257739 0.000000
+528 0.939814 0.288143 0.000000
+529 0.939913 0.318502 0.000000
+530 0.939975 0.348849 0.000000
+531 0.940009 0.379160 0.000000
+532 0.940091 0.409414 0.000000
+533 0.339432 -0.442176 0.000000
+534 0.369451 -0.442035 0.000000
+535 0.399523 -0.441897 0.000000
+536 0.429589 -0.441684 0.000000
+537 0.459678 -0.441399 0.000000
+538 0.489748 -0.441062 0.000000
+539 0.519800 -0.440764 0.000000
+540 0.549844 -0.440522 0.000000
+541 0.579882 -0.440330 0.000000
+542 0.609913 -0.440196 0.000000
+543 0.639937 -0.440110 0.000000
+544 0.669955 -0.440059 0.000000
+545 0.699968 -0.440030 0.000000
+546 0.729978 -0.440014 0.000000
+547 0.759947 -0.439909 0.000000
+548 0.789937 -0.439803 0.000000
+549 0.819882 -0.439709 0.000000
+550 0.849843 -0.439644 0.000000
+551 0.879819 -0.439597 0.000000
+552 0.909827 -0.439560 0.000000
+590 0.040432 -0.405830 0.000000
+670 -0.755455 0.393182 0.000000
+554 -0.740317 0.367025 0.000000
+555 -0.724443 0.344933 0.000000
+556 -0.704864 0.325740 0.000000
+557 -0.683231 0.307949 0.000000
+558 -0.661196 0.289226 0.000000
+559 -0.639331 0.269137 0.000000
+560 -0.617409 0.248586 0.000000
+561 -0.595465 0.227639 0.000000
+562 -0.574245 0.205848 0.000000
+563 -0.553198 0.183785 0.000000
+564 -0.531973 0.162012 0.000000
+565 -0.510563 0.140460 0.000000
+566 -0.489049 0.119070 0.000000
+567 -0.467535 0.097696 0.000000
+568 -0.446086 0.076355 0.000000
+569 -0.424583 0.055151 0.000000
+570 -0.403151 0.033875 0.000000
+571 -0.381891 0.012449 0.000000
+572 -0.360645 -0.008987 0.000000
+573 -0.339410 -0.030497 0.000000
+574 -0.318218 -0.052043 0.000000
+575 -0.296914 -0.073549 0.000000
+576 -0.275621 -0.095064 0.000000
+577 -0.254107 -0.116660 0.000000
+578 -0.232698 -0.138235 0.000000
+579 -0.211016 -0.159840 0.000000
+580 -0.189307 -0.181488 0.000000
+581 -0.167499 -0.203012 0.000000
+582 -0.145658 -0.224717 0.000000
+583 -0.124325 -0.246942 0.000000
+584 -0.102865 -0.269240 0.000000
+585 -0.081284 -0.291521 0.000000
+586 -0.060814 -0.314997 0.000000
+587 -0.039868 -0.338366 0.000000
+588 -0.017866 -0.359895 0.000000
+589 0.007087 -0.381124 0.000000
+591 0.053625 -0.372025 0.000000
+592 0.062247 -0.344115 0.000000
+593 0.067251 -0.318078 0.000000
+594 0.068547 -0.292412 0.000000
+595 0.066596 -0.265244 0.000000
+596 0.064013 -0.237360 0.000000
+598 0.092058 -0.207144 0.000000
+597 0.061821 -0.208936 0.000000
+599 0.122455 -0.205662 0.000000
+600 0.153304 -0.204437 0.000000
+601 0.184778 -0.203622 0.000000
+602 0.216464 -0.203889 0.000000
+603 0.247666 -0.205271 0.000000
+604 0.278273 -0.207077 0.000000
+605 0.308444 -0.208860 0.000000
+607 0.338640 -0.239619 0.000000
+606 0.338501 -0.210215 0.000000
+608 0.338849 -0.269123 0.000000
+609 0.339046 -0.298501 0.000000
+610 0.339149 -0.327425 0.000000
+611 0.339145 -0.356028 0.000000
+612 0.339123 -0.384605 0.000000
+613 0.339265 -0.413346 0.000000
+614 0.910184 0.409525 0.000000
+615 0.880341 0.409627 0.000000
+616 0.850499 0.409699 0.000000
+617 0.820678 0.409821 0.000000
+618 0.790857 0.409913 0.000000
+619 0.761038 0.410043 0.000000
+620 0.731217 0.410148 0.000000
+621 0.701394 0.410247 0.000000
+622 0.671582 0.410190 0.000000
+623 0.641769 0.410064 0.000000
+624 0.611986 0.409820 0.000000
+625 0.582223 0.409499 0.000000
+626 0.552442 0.409166 0.000000
+627 0.522678 0.408817 0.000000
+628 0.492873 0.408592 0.000000
+629 0.463046 0.408514 0.000000
+630 0.433212 0.408428 0.000000
+631 0.403368 0.408486 0.000000
+632 0.373497 0.408591 0.000000
+633 0.343636 0.408704 0.000000
+634 0.313765 0.408808 0.000000
+635 0.283900 0.408804 0.000000
+636 0.254031 0.408855 0.000000
+637 0.224157 0.409018 0.000000
+638 0.194295 0.409133 0.000000
+639 0.164411 0.409379 0.000000
+640 0.134527 0.409617 0.000000
+641 0.104649 0.409788 0.000000
+642 0.074780 0.409914 0.000000
+643 0.044908 0.410046 0.000000
+644 0.015035 0.410160 0.000000
+645 -0.014837 0.410241 0.000000
+646 -0.044786 0.410157 0.000000
+647 -0.074715 0.410004 0.000000
+648 -0.104689 0.409724 0.000000
+649 -0.134629 0.409413 0.000000
+650 -0.164573 0.409102 0.000000
+651 -0.194543 0.408821 0.000000
+652 -0.224425 0.408905 0.000000
+653 -0.254313 0.409349 0.000000
+654 -0.284136 0.410098 0.000000
+655 -0.313927 0.410835 0.000000
+656 -0.343709 0.411217 0.000000
+657 -0.373516 0.411287 0.000000
+658 -0.403429 0.411111 0.000000
+659 -0.433418 0.410580 0.000000
+660 -0.463454 0.409742 0.000000
+661 -0.493588 0.408650 0.000000
+662 -0.523716 0.408015 0.000000
+663 -0.553867 0.408390 0.000000
+664 -0.583894 0.408825 0.000000
+665 -0.613736 0.409400 0.000000
+666 -0.642923 0.411762 0.000000
+667 -0.671337 0.413661 0.000000
+668 -0.699264 0.412614 0.000000
+669 -0.726520 0.405751 0.000000
+671 0.369212 -0.413104 0.000000
+672 0.399261 -0.412881 0.000000
+673 0.429386 -0.412541 0.000000
+674 0.459495 -0.412121 0.000000
+675 0.489590 -0.411630 0.000000
+676 0.519678 -0.411233 0.000000
+677 0.549757 -0.410839 0.000000
+678 0.579820 -0.410560 0.000000
+679 0.609867 -0.410349 0.000000
+680 0.639903 -0.410201 0.000000
+681 0.669930 -0.410109 0.000000
+682 0.699950 -0.410056 0.000000
+683 0.729910 -0.409917 0.000000
+684 0.759884 -0.409777 0.000000
+685 0.789837 -0.409624 0.000000
+686 0.819781 -0.409515 0.000000
+687 0.849736 -0.409429 0.000000
+688 0.879719 -0.409378 0.000000
+689 0.909756 -0.409298 0.000000
+690 0.909695 -0.379025 0.000000
+691 0.909632 -0.348708 0.000000
+692 0.909581 -0.318392 0.000000
+693 0.909498 -0.288059 0.000000
+694 0.909434 -0.257728 0.000000
+695 0.909350 -0.227398 0.000000
+696 0.909280 -0.197070 0.000000
+697 0.909216 -0.166745 0.000000
+698 0.909165 -0.136423 0.000000
+699 0.909131 -0.106105 0.000000
+700 0.909111 -0.075791 0.000000
+701 0.909100 -0.045478 0.000000
+702 0.909095 -0.015168 0.000000
+703 0.909093 0.015140 0.000000
+704 0.909072 0.045446 0.000000
+705 0.909067 0.075752 0.000000
+706 0.909073 0.106056 0.000000
+707 0.909080 0.136361 0.000000
+708 0.909086 0.166664 0.000000
+709 0.909215 0.197066 0.000000
+710 0.909385 0.227464 0.000000
+711 0.909604 0.257898 0.000000
+712 0.909779 0.288313 0.000000
+713 0.909917 0.318693 0.000000
+714 0.909986 0.349031 0.000000
+715 0.910101 0.379296 0.000000
+716 0.024590 -0.357790 0.000000
+717 0.036820 -0.334922 0.000000
+718 0.044018 -0.313436 0.000000
+719 0.043162 -0.290917 0.000000
+720 0.038764 -0.265690 0.000000
+721 0.034906 -0.239103 0.000000
+722 0.031729 -0.211118 0.000000
+724 0.059321 -0.180222 0.000000
+723 0.028990 -0.183087 0.000000
+725 0.089882 -0.177435 0.000000
+726 0.120857 -0.174840 0.000000
+727 0.152193 -0.172458 0.000000
+728 0.184221 -0.171049 0.000000
+729 0.216369 -0.171146 0.000000
+730 0.247776 -0.173286 0.000000
+731 0.278417 -0.176312 0.000000
+732 0.308664 -0.179103 0.000000
+733 0.338547 -0.181101 0.000000
+735 0.368461 -0.210821 0.000000
+734 0.368350 -0.182042 0.000000
+736 0.368647 -0.239838 0.000000
+737 0.368823 -0.269040 0.000000
+738 0.368963 -0.298178 0.000000
+739 0.368962 -0.326977 0.000000
+740 0.368943 -0.355559 0.000000
+741 0.369058 -0.384300 0.000000
+742 0.880206 0.379422 0.000000
+743 0.850374 0.379547 0.000000
+744 0.820545 0.379654 0.000000
+745 0.790741 0.379815 0.000000
+746 0.760936 0.379938 0.000000
+747 0.731131 0.380103 0.000000
+748 0.701348 0.380156 0.000000
+749 0.671569 0.380011 0.000000
+750 0.641834 0.379835 0.000000
+751 0.612110 0.379504 0.000000
+752 0.582403 0.379062 0.000000
+753 0.552705 0.378593 0.000000
+754 0.522968 0.378130 0.000000
+755 0.493191 0.377826 0.000000
+756 0.463421 0.377567 0.000000
+757 0.433611 0.377484 0.000000
+758 0.403752 0.377581 0.000000
+759 0.373895 0.377725 0.000000
+760 0.344005 0.377927 0.000000
+761 0.314132 0.378023 0.000000
+762 0.284270 0.378127 0.000000
+763 0.254375 0.378254 0.000000
+764 0.224504 0.378365 0.000000
+765 0.194586 0.378654 0.000000
+766 0.164677 0.379020 0.000000
+767 0.134778 0.379321 0.000000
+768 0.104899 0.379529 0.000000
+769 0.074998 0.379743 0.000000
+770 0.045096 0.379901 0.000000
+771 0.015198 0.380037 0.000000
+772 -0.014766 0.380029 0.000000
+773 -0.044758 0.379983 0.000000
+774 -0.074799 0.379773 0.000000
+775 -0.104855 0.379434 0.000000
+776 -0.134915 0.378975 0.000000
+777 -0.165034 0.378407 0.000000
+778 -0.195055 0.378036 0.000000
+779 -0.225064 0.378155 0.000000
+780 -0.254915 0.379009 0.000000
+781 -0.284684 0.380269 0.000000
+782 -0.314377 0.381326 0.000000
+783 -0.344063 0.381861 0.000000
+784 -0.373781 0.381860 0.000000
+785 -0.403636 0.381479 0.000000
+786 -0.433541 0.380809 0.000000
+787 -0.463618 0.379545 0.000000
+788 -0.493878 0.377655 0.000000
+789 -0.524338 0.376897 0.000000
+790 -0.554662 0.377591 0.000000
+791 -0.584521 0.378512 0.000000
+792 -0.614901 0.378305 0.000000
+793 -0.644852 0.383150 0.000000
+794 -0.671012 0.387415 0.000000
+795 -0.694363 0.388117 0.000000
+796 -0.716721 0.380258 0.000000
+797 0.399067 -0.383941 0.000000
+798 0.429159 -0.383498 0.000000
+799 0.459314 -0.382900 0.000000
+800 0.489457 -0.382364 0.000000
+801 0.519590 -0.381760 0.000000
+802 0.549686 -0.381311 0.000000
+803 0.579760 -0.380889 0.000000
+804 0.609819 -0.380588 0.000000
+805 0.639864 -0.380360 0.000000
+806 0.669900 -0.380203 0.000000
+807 0.699859 -0.379982 0.000000
+808 0.729822 -0.379783 0.000000
+809 0.759760 -0.379578 0.000000
+810 0.789702 -0.379418 0.000000
+811 0.819647 -0.379286 0.000000
+812 0.849617 -0.379205 0.000000
+813 0.879641 -0.379103 0.000000
+814 -0.704962 0.359795 0.000000
+815 -0.687415 0.344916 0.000000
+816 -0.665253 0.330013 0.000000
+817 -0.640678 0.312888 0.000000
+818 -0.618867 0.291654 0.000000
+819 -0.596620 0.271672 0.000000
+820 -0.574345 0.250779 0.000000
+821 -0.552849 0.228703 0.000000
+822 -0.532003 0.206108 0.000000
+823 -0.510760 0.184088 0.000000
+824 -0.489162 0.162574 0.000000
+825 -0.467617 0.141083 0.000000
+826 -0.446103 0.119735 0.000000
+827 -0.424459 0.098611 0.000000
+828 -0.402806 0.077505 0.000000
+829 -0.381337 0.056223 0.000000
+830 -0.359980 0.034804 0.000000
+831 -0.338720 0.013213 0.000000
+832 -0.317525 -0.008472 0.000000
+833 -0.296233 -0.030164 0.000000
+834 -0.274944 -0.051850 0.000000
+835 -0.253374 -0.073554 0.000000
+836 -0.231843 -0.095322 0.000000
+837 -0.210150 -0.117070 0.000000
+838 -0.188532 -0.138903 0.000000
+839 -0.167016 -0.160779 0.000000
+840 -0.144837 -0.182173 0.000000
+841 -0.122682 -0.203880 0.000000
+842 -0.101823 -0.226595 0.000000
+843 -0.081098 -0.248884 0.000000
+844 -0.058277 -0.270876 0.000000
+845 -0.037972 -0.296496 0.000000
+1723 -0.019092 -0.320042 0.000000
+1725 0.000735 -0.340540 0.000000
+848 0.879560 -0.348818 0.000000
+849 0.879472 -0.318487 0.000000
+850 0.879399 -0.288159 0.000000
+851 0.879291 -0.257815 0.000000
+852 0.879205 -0.227473 0.000000
+853 0.879100 -0.197133 0.000000
+854 0.879013 -0.166796 0.000000
+855 0.878936 -0.136464 0.000000
+856 0.878856 -0.106138 0.000000
+857 0.878795 -0.075817 0.000000
+858 0.878746 -0.045502 0.000000
+859 0.878725 -0.015190 0.000000
+860 0.878647 0.015119 0.000000
+861 0.878640 0.045426 0.000000
+862 0.878639 0.075733 0.000000
+863 0.878671 0.106040 0.000000
+864 0.878713 0.136347 0.000000
+865 0.878883 0.166766 0.000000
+866 0.879094 0.197181 0.000000
+867 0.879362 0.227644 0.000000
+868 0.879600 0.258094 0.000000
+869 0.879805 0.288518 0.000000
+870 0.879937 0.318888 0.000000
+871 0.880089 0.349176 0.000000
+846 0.020727 -0.293356 0.000000
+847 0.026838 -0.309682 0.000000
+872 0.012547 -0.270085 0.000000
+873 0.004882 -0.242473 0.000000
+874 0.002208 -0.213381 0.000000
+875 -0.000861 -0.185685 0.000000
+877 0.026244 -0.155284 0.000000
+876 -0.002631 -0.158212 0.000000
+878 0.056250 -0.151549 0.000000
+879 0.087018 -0.147720 0.000000
+880 0.118301 -0.143854 0.000000
+881 0.150274 -0.140350 0.000000
+882 0.183089 -0.137656 0.000000
+883 0.216064 -0.138164 0.000000
+884 0.248380 -0.141045 0.000000
+885 0.279257 -0.145764 0.000000
+886 0.309290 -0.149826 0.000000
+887 0.338917 -0.152404 0.000000
+888 0.368368 -0.153538 0.000000
+890 0.398029 -0.182007 0.000000
+889 0.397803 -0.153563 0.000000
+891 0.398367 -0.210697 0.000000
+892 0.398637 -0.239587 0.000000
+893 0.398861 -0.268621 0.000000
+894 0.398942 -0.297627 0.000000
+895 0.398900 -0.326384 0.000000
+896 0.398951 -0.355111 0.000000
+897 -0.691273 0.370756 0.000000
+898 -0.674493 0.364355 0.000000
+1040 -0.650656 0.354888 0.000000
+976 -0.617833 0.341638 0.000000
+899 -0.598837 0.312875 0.000000
+900 -0.576909 0.294643 0.000000
+901 -0.552428 0.275224 0.000000
+902 -0.530818 0.250993 0.000000
+903 -0.510792 0.227821 0.000000
+904 -0.489498 0.205790 0.000000
+905 -0.467802 0.184485 0.000000
+906 -0.446323 0.163103 0.000000
+907 -0.424713 0.141919 0.000000
+908 -0.402894 0.120971 0.000000
+909 -0.381057 0.100028 0.000000
+910 -0.359274 0.078918 0.000000
+911 -0.337845 0.057399 0.000000
+912 -0.316626 0.035561 0.000000
+913 -0.295418 0.013637 0.000000
+914 -0.274138 -0.008245 0.000000
+915 -0.252589 -0.030100 0.000000
+916 -0.230965 -0.052030 0.000000
+917 -0.209177 -0.074014 0.000000
+918 -0.187347 -0.095982 0.000000
+919 -0.165790 -0.118137 0.000000
+920 -0.144877 -0.140439 0.000000
+921 -0.122656 -0.161118 0.000000
+922 -0.098962 -0.182854 0.000000
+923 -0.079724 -0.207401 0.000000
+924 -0.061150 -0.228771 0.000000
+925 -0.031281 -0.247098 0.000000
+926 -0.013937 -0.279764 0.000000
+927 0.850209 0.349326 0.000000
+928 0.820397 0.349486 0.000000
+929 0.790584 0.349621 0.000000
+930 0.760799 0.349820 0.000000
+931 0.731036 0.349946 0.000000
+932 0.701303 0.349866 0.000000
+933 0.671610 0.349779 0.000000
+934 0.641932 0.349508 0.000000
+935 0.612330 0.349074 0.000000
+936 0.582719 0.348493 0.000000
+937 0.553104 0.347828 0.000000
+938 0.523441 0.347216 0.000000
+939 0.493783 0.346743 0.000000
+940 0.464050 0.346432 0.000000
+941 0.434236 0.346369 0.000000
+942 0.404391 0.346489 0.000000
+943 0.374528 0.346684 0.000000
+944 0.344649 0.346945 0.000000
+945 0.314797 0.347163 0.000000
+946 0.284881 0.347290 0.000000
+947 0.254985 0.347409 0.000000
+948 0.225047 0.347654 0.000000
+949 0.195107 0.348125 0.000000
+950 0.165173 0.348601 0.000000
+951 0.135276 0.348956 0.000000
+952 0.105340 0.349258 0.000000
+953 0.075403 0.349451 0.000000
+954 0.045475 0.349683 0.000000
+955 0.015506 0.349758 0.000000
+956 -0.014542 0.349888 0.000000
+957 -0.044633 0.349863 0.000000
+958 -0.074856 0.349598 0.000000
+959 -0.105110 0.349093 0.000000
+960 -0.135408 0.348361 0.000000
+961 -0.165677 0.347487 0.000000
+962 -0.195905 0.346884 0.000000
+963 -0.226027 0.347092 0.000000
+964 -0.255914 0.348502 0.000000
+965 -0.285596 0.350414 0.000000
+966 -0.315123 0.352026 0.000000
+967 -0.344543 0.352795 0.000000
+968 -0.374040 0.352571 0.000000
+969 -0.403574 0.351874 0.000000
+970 -0.433297 0.351311 0.000000
+971 -0.463227 0.349634 0.000000
+972 -0.493249 0.346733 0.000000
+973 -0.525130 0.344192 0.000000
+974 -0.556069 0.347386 0.000000
+975 -0.584638 0.349299 0.000000
+977 0.429047 -0.354564 0.000000
+978 0.459194 -0.353904 0.000000
+979 0.489384 -0.353164 0.000000
+980 0.519514 -0.352512 0.000000
+981 0.549620 -0.351866 0.000000
+982 0.579698 -0.351370 0.000000
+983 0.609762 -0.350919 0.000000
+984 0.639814 -0.350586 0.000000
+985 0.669775 -0.350217 0.000000
+986 0.699737 -0.349897 0.000000
+987 0.729661 -0.349597 0.000000
+988 0.759593 -0.349367 0.000000
+989 0.789532 -0.349176 0.000000
+990 0.819498 -0.349051 0.000000
+991 0.849515 -0.348917 0.000000
+992 0.849408 -0.318619 0.000000
+993 0.849295 -0.288272 0.000000
+994 0.849198 -0.257931 0.000000
+995 0.849065 -0.227574 0.000000
+996 0.848962 -0.197220 0.000000
+997 0.848836 -0.166870 0.000000
+998 0.848656 -0.136533 0.000000
+999 0.848506 -0.106206 0.000000
+1000 0.848367 -0.075894 0.000000
+1001 0.848296 -0.045579 0.000000
+1002 0.848134 -0.015275 0.000000
+1003 0.848083 0.015034 0.000000
+1004 0.848033 0.045346 0.000000
+1005 0.848096 0.075663 0.000000
+1006 0.848193 0.105984 0.000000
+1007 0.848437 0.136431 0.000000
+1008 0.848728 0.166875 0.000000
+1009 0.849076 0.197378 0.000000
+1010 0.849382 0.227866 0.000000
+1011 0.849654 0.258328 0.000000
+1012 0.849850 0.288735 0.000000
+1013 0.850055 0.319057 0.000000
+1014 -0.026262 -0.214203 0.000000
+1015 -0.029383 -0.187423 0.000000
+1016 -0.032824 -0.158817 0.000000
+1018 -0.003084 -0.132754 0.000000
+1017 -0.029552 -0.130056 0.000000
+1019 0.022484 -0.128711 0.000000
+1020 0.051752 -0.123251 0.000000
+1021 0.082401 -0.118191 0.000000
+1022 0.113973 -0.113014 0.000000
+1023 0.146621 -0.107888 0.000000
+1024 0.180362 -0.103934 0.000000
+1025 0.216332 -0.102481 0.000000
+1026 0.250760 -0.108923 0.000000
+1027 0.282088 -0.115954 0.000000
+1028 0.310848 -0.121366 0.000000
+1029 0.339795 -0.124483 0.000000
+1030 0.368570 -0.125488 0.000000
+1031 0.397434 -0.125259 0.000000
+1033 0.427150 -0.152702 0.000000
+1032 0.426416 -0.124184 0.000000
+1034 0.427780 -0.181299 0.000000
+1035 0.428281 -0.210062 0.000000
+1036 0.428674 -0.238927 0.000000
+1037 0.428954 -0.267922 0.000000
+1038 0.429022 -0.296882 0.000000
+1039 0.429009 -0.325704 0.000000
+1041 0.820193 0.319232 0.000000
+1042 0.790405 0.319425 0.000000
+1043 0.760627 0.319590 0.000000
+1044 0.730930 0.319648 0.000000
+1045 0.701264 0.319642 0.000000
+1046 0.671658 0.319493 0.000000
+1047 0.642145 0.319171 0.000000
+1048 0.612661 0.318582 0.000000
+1049 0.583227 0.317841 0.000000
+1050 0.553746 0.316951 0.000000
+1051 0.524252 0.316219 0.000000
+1052 0.494666 0.315548 0.000000
+1053 0.464954 0.315177 0.000000
+1054 0.435153 0.315100 0.000000
+1055 0.405377 0.315185 0.000000
+1056 0.375483 0.315504 0.000000
+1057 0.345638 0.315877 0.000000
+1058 0.315734 0.316161 0.000000
+1059 0.285817 0.316366 0.000000
+1060 0.255889 0.316515 0.000000
+1061 0.225925 0.316910 0.000000
+1062 0.195940 0.317535 0.000000
+1063 0.166024 0.318176 0.000000
+1064 0.136095 0.318655 0.000000
+1065 0.106116 0.318949 0.000000
+1066 0.076161 0.319226 0.000000
+1067 0.046155 0.319410 0.000000
+1068 0.016077 0.319689 0.000000
+1069 -0.014080 0.319906 0.000000
+1070 -0.044435 0.319837 0.000000
+1071 -0.074887 0.319446 0.000000
+1072 -0.105364 0.318757 0.000000
+1073 -0.135875 0.317703 0.000000
+1074 -0.166440 0.316418 0.000000
+1075 -0.197098 0.315318 0.000000
+1076 -0.227516 0.315492 0.000000
+1077 -0.257562 0.317644 0.000000
+1078 -0.287116 0.320624 0.000000
+1079 -0.316202 0.323175 0.000000
+1080 -0.345132 0.324256 0.000000
+1081 -0.374089 0.323465 0.000000
+1082 -0.403254 0.322262 0.000000
+1083 -0.432294 0.322314 0.000000
+1084 -0.460563 0.320631 0.000000
+1085 -0.490322 0.315176 0.000000
+1086 -0.527019 0.305092 0.000000
+1087 -0.560434 0.318601 0.000000
+1088 -0.581094 0.326893 0.000000
+1089 0.459215 -0.324955 0.000000
+1090 0.489364 -0.324151 0.000000
+1091 0.519476 -0.323335 0.000000
+1092 0.549546 -0.322613 0.000000
+1093 0.579618 -0.321924 0.000000
+1094 0.609675 -0.321348 0.000000
+1095 0.639635 -0.320779 0.000000
+1096 0.669600 -0.320274 0.000000
+1097 0.699520 -0.319810 0.000000
+1098 0.729446 -0.319441 0.000000
+1099 0.759383 -0.319141 0.000000
+1100 0.789349 -0.318940 0.000000
+1101 0.819365 -0.318752 0.000000
+1213 -0.506862 0.272229 0.000000
+1102 -0.488578 0.247543 0.000000
+1103 -0.468378 0.226625 0.000000
+1104 -0.446529 0.206460 0.000000
+1105 -0.425174 0.185182 0.000000
+1106 -0.403551 0.164089 0.000000
+1107 -0.381461 0.143421 0.000000
+1108 -0.359047 0.122895 0.000000
+1109 -0.336992 0.101927 0.000000
+1110 -0.315505 0.080197 0.000000
+1111 -0.294359 0.058038 0.000000
+1112 -0.273215 0.035830 0.000000
+1113 -0.251740 0.013783 0.000000
+1114 -0.230105 -0.008250 0.000000
+1115 -0.208280 -0.030424 0.000000
+1116 -0.186340 -0.052658 0.000000
+1117 -0.164096 -0.074999 0.000000
+1118 -0.142249 -0.098075 0.000000
+1119 -0.124600 -0.120870 0.000000
+1120 -0.101689 -0.138969 0.000000
+1121 -0.070069 -0.159405 0.000000
+1143 -0.056720 -0.191602 0.000000
+1122 -0.048349 -0.211338 0.000000
+1123 0.819230 -0.288441 0.000000
+1124 0.819089 -0.258075 0.000000
+1125 0.818971 -0.227720 0.000000
+1126 0.818800 -0.197352 0.000000
+1127 0.818537 -0.167025 0.000000
+1128 0.818297 -0.136701 0.000000
+1129 0.818075 -0.106416 0.000000
+1130 0.817914 -0.076099 0.000000
+1131 0.817666 -0.045814 0.000000
+1132 0.817482 -0.015504 0.000000
+1133 0.817346 0.014821 0.000000
+1134 0.817349 0.045170 0.000000
+1135 0.817459 0.075531 0.000000
+1136 0.817790 0.106031 0.000000
+1137 0.818215 0.136528 0.000000
+1138 0.818711 0.167083 0.000000
+1139 0.819128 0.197617 0.000000
+1140 0.819470 0.228123 0.000000
+1141 0.819728 0.258571 0.000000
+1142 0.819993 0.288931 0.000000
+1244 -0.059543 -0.125203 0.000000
+1145 -0.022658 -0.104274 0.000000
+1144 -0.050604 -0.097371 0.000000
+1146 -0.003277 -0.113375 0.000000
+1147 0.015569 -0.103577 0.000000
+1148 0.043900 -0.096090 0.000000
+1149 0.074890 -0.089260 0.000000
+1150 0.106866 -0.082653 0.000000
+1151 0.139199 -0.075782 0.000000
+1152 0.174640 -0.068119 0.000000
+1153 0.218123 -0.058538 0.000000
+1154 0.257570 -0.075952 0.000000
+1155 0.287428 -0.087324 0.000000
+1156 0.314488 -0.094635 0.000000
+1157 0.341036 -0.097765 0.000000
+1158 0.368607 -0.097995 0.000000
+1159 0.396531 -0.096956 0.000000
+1160 0.424895 -0.095330 0.000000
+1162 0.455499 -0.122538 0.000000
+1161 0.453743 -0.093457 0.000000
+1163 0.456665 -0.151337 0.000000
+1164 0.457569 -0.180155 0.000000
+1165 0.458233 -0.209057 0.000000
+1166 0.458740 -0.238043 0.000000
+1167 0.459073 -0.267080 0.000000
+1168 0.459174 -0.296028 0.000000
+1169 0.790159 0.289139 0.000000
+1170 0.760439 0.289302 0.000000
+1171 0.730789 0.289384 0.000000
+1172 0.701251 0.289345 0.000000
+1173 0.671804 0.289210 0.000000
+1174 0.642476 0.288781 0.000000
+1175 0.613248 0.288165 0.000000
+1176 0.584018 0.287164 0.000000
+1177 0.554753 0.286148 0.000000
+1178 0.525405 0.285084 0.000000
+1179 0.495870 0.284318 0.000000
+1180 0.466191 0.283786 0.000000
+1181 0.436505 0.283622 0.000000
+1182 0.406661 0.283753 0.000000
+1183 0.376803 0.284156 0.000000
+1184 0.346949 0.284678 0.000000
+1185 0.317067 0.285146 0.000000
+1186 0.287175 0.285413 0.000000
+1187 0.257218 0.285557 0.000000
+1188 0.227216 0.286008 0.000000
+1189 0.197271 0.286938 0.000000
+1190 0.167376 0.287870 0.000000
+1191 0.137356 0.288464 0.000000
+1192 0.107379 0.288752 0.000000
+1193 0.077319 0.288996 0.000000
+1194 0.047198 0.289325 0.000000
+1195 0.016953 0.289780 0.000000
+1196 -0.013452 0.290044 0.000000
+1197 -0.044033 0.289941 0.000000
+1198 -0.074662 0.289458 0.000000
+1199 -0.105330 0.288557 0.000000
+1200 -0.136186 0.287158 0.000000
+1201 -0.167320 0.285224 0.000000
+1202 -0.198610 0.283265 0.000000
+1203 -0.229792 0.283738 0.000000
+1204 -0.260249 0.286423 0.000000
+1205 -0.289549 0.291136 0.000000
+1206 -0.317900 0.295204 0.000000
+1207 -0.345821 0.296788 0.000000
+1208 -0.373271 0.294906 0.000000
+1209 -0.402699 0.292329 0.000000
+1210 -0.431306 0.294539 0.000000
+1211 -0.456225 0.294743 0.000000
+1212 -0.480543 0.286589 0.000000
+1214 0.489324 -0.295158 0.000000
+1215 0.519386 -0.294275 0.000000
+1216 0.549442 -0.293393 0.000000
+1217 0.579473 -0.292561 0.000000
+1218 0.609427 -0.291788 0.000000
+1219 0.639393 -0.291062 0.000000
+1220 0.669310 -0.290383 0.000000
+1221 0.699237 -0.289798 0.000000
+1222 0.729178 -0.289297 0.000000
+1223 0.759153 -0.288937 0.000000
+1224 0.789181 -0.288640 0.000000
+1225 -0.468048 0.263946 0.000000
+1226 -0.449064 0.247078 0.000000
+1227 -0.425672 0.229002 0.000000
+1228 -0.403972 0.207099 0.000000
+1229 -0.382701 0.186041 0.000000
+1230 -0.360028 0.165890 0.000000
+1231 -0.336966 0.146039 0.000000
+1232 -0.314349 0.125401 0.000000
+1233 -0.292863 0.103310 0.000000
+1234 -0.271955 0.080617 0.000000
+1235 -0.250761 0.058067 0.000000
+1236 -0.229184 0.035846 0.000000
+1237 -0.207348 0.013734 0.000000
+1238 -0.185635 -0.008675 0.000000
+1239 -0.163704 -0.031228 0.000000
+1240 -0.140931 -0.053778 0.000000
+1241 -0.113524 -0.078759 0.000000
+1242 -0.108485 -0.104935 0.000000
+1243 -0.087393 -0.114472 0.000000
+1245 0.789012 -0.258320 0.000000
+1246 0.788816 -0.227953 0.000000
+1247 0.788528 -0.197675 0.000000
+1248 0.788213 -0.167365 0.000000
+1249 0.787924 -0.137121 0.000000
+1250 0.787641 -0.106821 0.000000
+1251 0.787323 -0.076569 0.000000
+1252 0.787001 -0.046263 0.000000
+1253 0.786721 -0.015923 0.000000
+1254 0.786547 0.014478 0.000000
+1255 0.786533 0.044918 0.000000
+1256 0.786809 0.075509 0.000000
+1257 0.787410 0.106098 0.000000
+1258 0.788167 0.136724 0.000000
+1259 0.788811 0.167320 0.000000
+1260 0.789306 0.197873 0.000000
+1261 0.789646 0.228368 0.000000
+1262 0.789928 0.258780 0.000000
+1263 -0.079885 -0.088544 0.000000
+1264 -0.068853 -0.063035 0.000000
+1354 -0.096410 -0.050332 0.000000
+1265 -0.040079 -0.071151 0.000000
+1266 -0.003904 -0.078351 0.000000
+1267 0.033276 -0.068635 0.000000
+1268 0.065450 -0.060908 0.000000
+1269 0.097012 -0.053165 0.000000
+1270 0.129040 -0.044661 0.000000
+1271 0.161078 -0.033951 0.000000
+1272 0.194763 -0.019351 0.000000
+1274 0.297376 -0.062036 0.000000
+1273 0.273270 -0.047050 0.000000
+1730 0.318766 -0.071448 0.000000
+1275 0.342006 -0.072798 0.000000
+1276 0.367455 -0.071083 0.000000
+1277 0.394160 -0.068575 0.000000
+1278 0.422042 -0.066164 0.000000
+1279 0.450643 -0.063564 0.000000
+1281 0.482961 -0.091446 0.000000
+1280 0.480092 -0.061554 0.000000
+1282 0.484919 -0.120729 0.000000
+1283 0.486360 -0.149771 0.000000
+1284 0.487432 -0.178843 0.000000
+1285 0.488230 -0.207943 0.000000
+1286 0.488820 -0.237072 0.000000
+1287 0.489125 -0.266135 0.000000
+1288 0.247345 -0.025834 0.000000
+1289 0.224714 0.002070 0.000000
+1290 0.760232 0.258961 0.000000
+1291 0.730702 0.259023 0.000000
+1292 0.701273 0.259041 0.000000
+1293 0.672070 0.258884 0.000000
+1294 0.643041 0.258580 0.000000
+1295 0.614100 0.257855 0.000000
+1296 0.585158 0.256709 0.000000
+1297 0.556141 0.255290 0.000000
+1298 0.526914 0.253967 0.000000
+1299 0.497488 0.252938 0.000000
+1300 0.467901 0.252297 0.000000
+1301 0.438185 0.251985 0.000000
+1302 0.408350 0.252105 0.000000
+1303 0.378486 0.252635 0.000000
+1304 0.348689 0.253392 0.000000
+1305 0.318849 0.254180 0.000000
+1306 0.288993 0.254506 0.000000
+1307 0.259104 0.254448 0.000000
+1308 0.229125 0.254911 0.000000
+1309 0.199160 0.256428 0.000000
+1310 0.169139 0.257831 0.000000
+1311 0.139113 0.258524 0.000000
+1312 0.109056 0.258739 0.000000
+1313 0.078905 0.258871 0.000000
+1314 0.048551 0.259413 0.000000
+1315 0.018071 0.260052 0.000000
+1316 -0.012555 0.260381 0.000000
+1317 -0.043290 0.260233 0.000000
+1318 -0.074004 0.259633 0.000000
+1319 -0.104865 0.258556 0.000000
+1320 -0.136045 0.256823 0.000000
+1321 -0.167724 0.254059 0.000000
+1322 -0.199349 0.251178 0.000000
+1323 -0.232977 0.249469 0.000000
+1324 -0.265381 0.255203 0.000000
+1325 -0.294998 0.262571 0.000000
+1326 -0.321525 0.268904 0.000000
+1327 -0.345370 0.271940 0.000000
+1328 -0.370494 0.266937 0.000000
+1329 -0.403103 0.256984 0.000000
+1330 -0.434068 0.268653 0.000000
+1331 0.519208 -0.265168 0.000000
+1332 0.549223 -0.264174 0.000000
+1333 0.579169 -0.263217 0.000000
+1334 0.609116 -0.262295 0.000000
+1335 0.639018 -0.261389 0.000000
+1336 0.668940 -0.260559 0.000000
+1337 0.698881 -0.259811 0.000000
+1338 0.728862 -0.259217 0.000000
+1339 0.758922 -0.258684 0.000000
+1340 -0.452781 0.275781 0.000000
+1341 -0.382340 0.228180 0.000000
+1342 -0.361422 0.207003 0.000000
+1343 -0.338786 0.188092 0.000000
+1344 -0.314784 0.169671 0.000000
+1345 -0.291764 0.149035 0.000000
+1346 -0.269929 0.126678 0.000000
+1347 -0.249214 0.103062 0.000000
+1348 -0.228144 0.080017 0.000000
+1349 -0.206339 0.057748 0.000000
+1350 -0.184396 0.035681 0.000000
+1351 -0.163152 0.013135 0.000000
+1352 -0.142212 -0.009361 0.000000
+1353 -0.120583 -0.030889 0.000000
+1355 0.758631 -0.228471 0.000000
+1356 0.758281 -0.198203 0.000000
+1357 0.757927 -0.168007 0.000000
+1358 0.757568 -0.137774 0.000000
+1359 0.757184 -0.107553 0.000000
+1360 0.756766 -0.077283 0.000000
+1361 0.756298 -0.046951 0.000000
+1362 0.755911 -0.016494 0.000000
+1363 0.755578 0.014071 0.000000
+1364 0.755530 0.044794 0.000000
+1365 0.756009 0.075565 0.000000
+1366 0.757080 0.106305 0.000000
+1367 0.758211 0.136989 0.000000
+1368 0.759083 0.167591 0.000000
+1369 0.759636 0.198105 0.000000
+1370 0.760021 0.228532 0.000000
+1371 -0.360079 0.243309 0.000000
+1372 -0.341982 0.226476 0.000000
+1373 -0.319699 0.210923 0.000000
+1374 -0.294047 0.194220 0.000000
+1375 -0.267776 0.174727 0.000000
+1376 -0.245955 0.149439 0.000000
+1377 -0.226125 0.124759 0.000000
+1378 -0.205333 0.101062 0.000000
+1379 -0.183133 0.079096 0.000000
+1380 -0.161064 0.056981 0.000000
+1381 -0.140320 0.034408 0.000000
+1382 -0.120789 0.011646 0.000000
+1383 -0.101307 -0.009581 0.000000
+1384 -0.080345 -0.027959 0.000000
+1385 -0.059504 -0.040679 0.000000
+1386 -0.035185 -0.045016 0.000000
+1387 -0.006250 -0.045396 0.000000
+1388 0.025205 -0.040029 0.000000
+1389 0.055491 -0.032869 0.000000
+1390 0.085590 -0.024698 0.000000
+1391 0.115671 -0.015090 0.000000
+1392 0.145238 -0.002575 0.000000
+1393 0.175423 0.012389 0.000000
+1394 0.204047 0.030821 0.000000
+1396 0.252433 0.023671 0.000000
+1395 0.231976 0.050390 0.000000
+1397 0.272784 -0.001727 0.000000
+1398 0.292997 -0.024140 0.000000
+1399 -0.344301 0.253627 0.000000
+1476 -0.327649 0.245851 0.000000
+1475 -0.303967 0.235261 0.000000
+1474 -0.275606 0.222900 0.000000
+1473 -0.238652 0.207257 0.000000
+1400 -0.218940 0.171385 0.000000
+1401 -0.201026 0.143814 0.000000
+1402 -0.182515 0.120273 0.000000
+1403 -0.160481 0.099745 0.000000
+1404 -0.136284 0.078764 0.000000
+1405 -0.115924 0.053849 0.000000
+1406 -0.097288 0.029482 0.000000
+1407 -0.080305 0.007789 0.000000
+1408 -0.064615 -0.010938 0.000000
+1409 -0.053222 -0.024526 0.000000
+1410 -0.035546 -0.021961 0.000000
+1411 -0.011462 -0.018372 0.000000
+1412 0.015936 -0.012857 0.000000
+1413 0.044720 -0.005800 0.000000
+1414 0.073013 0.002618 0.000000
+1415 0.101137 0.013052 0.000000
+1416 0.129049 0.026082 0.000000
+1417 0.156365 0.041756 0.000000
+1418 0.184159 0.058506 0.000000
+1419 0.210898 0.077145 0.000000
+1421 0.258587 0.071409 0.000000
+1420 0.235912 0.097537 0.000000
+1422 0.279603 0.044767 0.000000
+1423 0.299087 0.018547 0.000000
+1424 0.315969 -0.006303 0.000000
+1731 0.310464 -0.042060 0.000000
+1425 0.330327 -0.029348 0.000000
+1428 0.363244 -0.044857 0.000000
+1427 0.340084 -0.050081 0.000000
+1429 0.389469 -0.040337 0.000000
+1430 0.417204 -0.036504 0.000000
+1431 0.446200 -0.033641 0.000000
+1432 0.475860 -0.030939 0.000000
+1434 0.509909 -0.059392 0.000000
+1433 0.506170 -0.028724 0.000000
+1435 0.512650 -0.089356 0.000000
+1436 0.514653 -0.118826 0.000000
+1437 0.516198 -0.148172 0.000000
+1438 0.517372 -0.177496 0.000000
+1439 0.518251 -0.206797 0.000000
+1440 0.518809 -0.236000 0.000000
+1441 0.730529 0.228662 0.000000
+1442 0.701323 0.228754 0.000000
+1443 0.672423 0.228800 0.000000
+1444 0.643741 0.228640 0.000000
+1445 0.615194 0.227932 0.000000
+1446 0.586654 0.226401 0.000000
+1447 0.557941 0.224612 0.000000
+1448 0.528929 0.223020 0.000000
+1449 0.499611 0.221647 0.000000
+1450 0.470054 0.220728 0.000000
+1451 0.440302 0.220268 0.000000
+1452 0.410353 0.220340 0.000000
+1453 0.380500 0.220966 0.000000
+1454 0.350669 0.222138 0.000000
+1455 0.320983 0.223466 0.000000
+1456 0.291443 0.223868 0.000000
+1457 0.261694 0.223241 0.000000
+1458 0.231496 0.223560 0.000000
+1459 0.201231 0.226194 0.000000
+1460 0.171137 0.228421 0.000000
+1461 0.141193 0.229123 0.000000
+1462 0.111175 0.228865 0.000000
+1463 0.080816 0.228761 0.000000
+1464 0.050208 0.229564 0.000000
+1465 0.019472 0.230514 0.000000
+1466 -0.011343 0.230954 0.000000
+1467 -0.042117 0.230757 0.000000
+1468 -0.072814 0.229991 0.000000
+1469 -0.103589 0.228826 0.000000
+1470 -0.134529 0.227009 0.000000
+1471 -0.165046 0.223412 0.000000
+1472 -0.197706 0.217242 0.000000
+1477 0.548816 -0.234928 0.000000
+1478 0.578757 -0.233868 0.000000
+1479 0.608646 -0.232776 0.000000
+1480 0.638544 -0.231722 0.000000
+1481 0.668475 -0.230745 0.000000
+1482 0.698449 -0.229883 0.000000
+1483 0.728502 -0.229106 0.000000
+1484 0.728097 -0.198990 0.000000
+1485 0.727706 -0.168870 0.000000
+1486 0.727268 -0.138716 0.000000
+1487 0.726781 -0.108502 0.000000
+1488 0.726187 -0.078232 0.000000
+1489 0.725568 -0.047739 0.000000
+1490 0.724892 -0.017056 0.000000
+1491 0.724335 0.013822 0.000000
+1492 0.724164 0.044888 0.000000
+1493 0.724932 0.075908 0.000000
+1494 0.726778 0.106759 0.000000
+1495 0.728488 0.137429 0.000000
+1496 0.729633 0.167949 0.000000
+1497 0.730244 0.198323 0.000000
+1624 -0.176223 0.159072 0.000000
+1565 -0.188636 0.185770 0.000000
+1498 -0.161949 0.137398 0.000000
+1499 -0.140110 0.121576 0.000000
+1500 -0.106545 0.104454 0.000000
+1501 -0.087526 0.070737 0.000000
+1502 -0.072361 0.044417 0.000000
+1503 -0.057196 0.020338 0.000000
+1504 -0.044389 -0.001210 0.000000
+1505 -0.020536 0.005897 0.000000
+1506 0.005896 0.012768 0.000000
+1507 0.032832 0.019947 0.000000
+1508 0.059602 0.028562 0.000000
+1509 0.085773 0.039645 0.000000
+1510 0.111268 0.053907 0.000000
+1511 0.136949 0.069672 0.000000
+1512 0.163708 0.085560 0.000000
+1513 0.189111 0.102843 0.000000
+1514 0.212292 0.123719 0.000000
+1516 0.261207 0.118056 0.000000
+1515 0.235876 0.152123 0.000000
+1517 0.283993 0.091978 0.000000
+1518 0.308470 0.065373 0.000000
+1519 0.327160 0.035468 0.000000
+1520 0.342353 0.007735 0.000000
+1522 0.381592 -0.011628 0.000000
+1521 0.354627 -0.019214 0.000000
+1523 0.410351 -0.006674 0.000000
+1524 0.440111 -0.003133 0.000000
+1525 0.470506 -0.000324 0.000000
+1526 0.501426 0.002030 0.000000
+1528 0.536865 -0.026396 0.000000
+1527 0.532740 0.004491 0.000000
+1529 0.540168 -0.057063 0.000000
+1530 0.542678 -0.087141 0.000000
+1531 0.544636 -0.116906 0.000000
+1532 0.546173 -0.146556 0.000000
+1533 0.547355 -0.176132 0.000000
+1534 0.548177 -0.205590 0.000000
+1535 0.701238 0.198603 0.000000
+1536 0.672622 0.198890 0.000000
+1537 0.644404 0.199062 0.000000
+1538 0.616485 0.198422 0.000000
+1539 0.588583 0.196609 0.000000
+1540 0.560344 0.194273 0.000000
+1541 0.531588 0.192198 0.000000
+1542 0.502343 0.190448 0.000000
+1543 0.472715 0.189284 0.000000
+1544 0.442689 0.188639 0.000000
+1545 0.412525 0.188524 0.000000
+1546 0.382371 0.189211 0.000000
+1547 0.352532 0.190926 0.000000
+1548 0.323383 0.193301 0.000000
+1549 0.294480 0.194119 0.000000
+1550 0.265376 0.192145 0.000000
+1551 0.233926 0.191285 0.000000
+1552 0.201869 0.196460 0.000000
+1553 0.172943 0.200159 0.000000
+1554 0.143553 0.200482 0.000000
+1555 0.113782 0.199148 0.000000
+1556 0.083231 0.198393 0.000000
+1557 0.052135 0.199830 0.000000
+1558 0.021092 0.201266 0.000000
+1559 -0.009922 0.201866 0.000000
+1560 -0.040700 0.201511 0.000000
+1561 -0.071228 0.200508 0.000000
+1562 -0.101438 0.199427 0.000000
+1563 -0.131223 0.198305 0.000000
+1564 -0.159535 0.194395 0.000000
+1566 0.578134 -0.204375 0.000000
+1567 0.608049 -0.203173 0.000000
+1568 0.637979 -0.201995 0.000000
+1569 0.667952 -0.200914 0.000000
+1570 0.698007 -0.199916 0.000000
+1571 0.697532 -0.169905 0.000000
+1572 0.697003 -0.139791 0.000000
+1573 0.696380 -0.109617 0.000000
+1574 0.695597 -0.079155 0.000000
+1575 0.694688 -0.048469 0.000000
+1576 0.693667 -0.017488 0.000000
+1577 0.692727 0.013854 0.000000
+1578 0.692318 0.045364 0.000000
+1579 0.693432 0.076853 0.000000
+1580 0.696657 0.107710 0.000000
+1581 0.699224 0.138222 0.000000
+1582 0.700642 0.168450 0.000000
+1583 0.371214 0.017358 0.000000
+1584 0.401698 0.024012 0.000000
+1585 0.432909 0.027812 0.000000
+1586 0.464397 0.030558 0.000000
+1587 0.495928 0.033147 0.000000
+1588 0.527675 0.035870 0.000000
+1590 0.564245 0.007571 0.000000
+1589 0.559782 0.038972 0.000000
+1591 0.567852 -0.023731 0.000000
+1592 0.570714 -0.054539 0.000000
+1593 0.572988 -0.084893 0.000000
+1594 0.574798 -0.114991 0.000000
+1595 0.576239 -0.144944 0.000000
+1596 0.577286 -0.174743 0.000000
+1597 0.672269 0.169162 0.000000
+1598 0.644670 0.169920 0.000000
+1599 0.617885 0.169958 0.000000
+1600 0.591267 0.167167 0.000000
+1601 0.563698 0.164256 0.000000
+1602 0.535150 0.161558 0.000000
+1603 0.505769 0.159594 0.000000
+1604 0.475777 0.158082 0.000000
+1605 0.445365 0.156971 0.000000
+1606 0.414493 0.156592 0.000000
+1607 0.383621 0.157125 0.000000
+1608 0.353035 0.160313 0.000000
+1609 0.324219 0.164732 0.000000
+1610 0.298724 0.166543 0.000000
+1611 0.271789 0.160670 0.000000
+1651 0.199126 0.167293 0.000000
+1612 0.173017 0.174362 0.000000
+1613 0.147220 0.172881 0.000000
+1614 0.118370 0.169386 0.000000
+1615 0.086388 0.167114 0.000000
+1616 0.053408 0.170284 0.000000
+1617 0.022505 0.172511 0.000000
+1618 -0.008623 0.173287 0.000000
+1619 -0.039412 0.172568 0.000000
+1620 -0.069931 0.170982 0.000000
+1621 -0.100028 0.170519 0.000000
+1622 -0.128149 0.170934 0.000000
+1623 -0.152764 0.169059 0.000000
+1625 0.607304 -0.173381 0.000000
+1626 0.637335 -0.172134 0.000000
+1627 0.667402 -0.170981 0.000000
+1628 0.666783 -0.140930 0.000000
+1629 0.665958 -0.110595 0.000000
+1630 0.664919 -0.080023 0.000000
+1631 0.663654 -0.049157 0.000000
+1632 0.662211 -0.017858 0.000000
+1633 0.660734 0.013978 0.000000
+1634 0.659723 0.046364 0.000000
+1635 0.661116 0.078961 0.000000
+1636 0.667246 0.110337 0.000000
+1637 0.670745 0.139548 0.000000
+1638 -0.147799 0.150716 0.000000
+1674 -0.128684 0.145691 0.000000
+1673 -0.100884 0.140613 0.000000
+1640 -0.044570 0.054253 0.000000
+1639 -0.056446 0.080725 0.000000
+1641 -0.032234 0.029928 0.000000
+1642 -0.006024 0.037236 0.000000
+1643 0.020359 0.044478 0.000000
+1644 0.045874 0.052989 0.000000
+1645 0.069878 0.064328 0.000000
+1646 0.092192 0.079845 0.000000
+1647 0.116360 0.097485 0.000000
+1648 0.143869 0.111631 0.000000
+1649 0.168436 0.126174 0.000000
+1650 0.188283 0.142853 0.000000
+1652 0.358550 0.049279 0.000000
+1653 0.392847 0.055605 0.000000
+1654 0.425498 0.059383 0.000000
+1655 0.457980 0.061988 0.000000
+1656 0.489964 0.064583 0.000000
+1657 0.521756 0.067277 0.000000
+1658 0.553712 0.070546 0.000000
+1660 0.592058 0.043198 0.000000
+1659 0.586126 0.075499 0.000000
+1661 0.596024 0.010863 0.000000
+1662 0.599068 -0.020945 0.000000
+1663 0.601524 -0.052102 0.000000
+1664 0.603505 -0.082800 0.000000
+1665 0.605107 -0.113223 0.000000
+1666 0.606324 -0.143417 0.000000
+1667 0.171493 0.155016 0.000000
+1696 0.153180 0.147834 0.000000
+1695 0.126680 0.139244 0.000000
+1694 0.090766 0.129758 0.000000
+1668 0.053205 0.140969 0.000000
+1669 0.022718 0.144596 0.000000
+1670 -0.007526 0.145267 0.000000
+1671 -0.037468 0.143511 0.000000
+1672 -0.068877 0.141356 0.000000
+1675 0.643744 0.141837 0.000000
+1676 0.619340 0.143192 0.000000
+1677 0.595381 0.139507 0.000000
+1678 0.568637 0.134721 0.000000
+1679 0.539884 0.131489 0.000000
+1680 0.510058 0.128996 0.000000
+1681 0.479577 0.126939 0.000000
+1682 0.448267 0.125455 0.000000
+1683 0.416247 0.124554 0.000000
+1684 0.383738 0.125001 0.000000
+1685 0.350239 0.128717 0.000000
+1686 0.321325 0.138246 0.000000
+1687 0.301817 0.146183 0.000000
+1688 0.285401 0.133759 0.000000
+1689 0.636514 -0.142039 0.000000
+1719 0.635429 -0.111764 0.000000
+1718 0.634140 -0.081198 0.000000
+1717 0.632516 -0.050293 0.000000
+1716 0.630582 -0.018910 0.000000
+1715 0.628349 0.013302 0.000000
+1714 0.625872 0.047113 0.000000
+1713 0.623071 0.085699 0.000000
+1690 0.639907 0.114926 0.000000
+1707 0.307163 0.113588 0.000000
+1706 0.342214 0.089227 0.000000
+1711 -0.017903 0.061881 0.000000
+1710 0.008821 0.068462 0.000000
+1691 0.033317 0.075470 0.000000
+1692 0.053991 0.085182 0.000000
+1693 0.071183 0.102698 0.000000
+1697 0.620610 0.123584 0.000000
+1698 0.602483 0.113360 0.000000
+1699 0.575576 0.106296 0.000000
+1700 0.546046 0.101572 0.000000
+1701 0.515442 0.098403 0.000000
+1702 0.484200 0.095912 0.000000
+1703 0.452238 0.093868 0.000000
+1704 0.419736 0.091956 0.000000
+1705 0.384875 0.090284 0.000000
+1708 0.047562 0.116186 0.000000
+1709 0.022481 0.118825 0.000000
+1727 -0.004797 0.117886 0.000000
+1729 -0.034224 0.114874 0.000000
+1712 -0.065866 0.110365 0.000000
+1721 0.041866 0.099182 0.000000
+1720 0.024047 0.096178 0.000000
+1722 0.000778 -0.304198 0.000000
+1724 0.015932 -0.322856 0.000000
+1726 -0.000448 0.092548 0.000000
+1728 -0.027125 0.087460 0.000000
+1426 0.323105 -0.054530 0.000000
+1 1 quad 195 133 67 180 
+2 1 quad 196 132 133 195 
+3 1 quad 196 195 180 179 
+4 1 quad 132 196 197 131 
+5 1 quad 196 179 178 197 
+6 1 quad 181 187 198 185 
+7 1 quad 187 188 199 198 
+8 1 quad 188 189 200 199 
+9 1 quad 189 190 201 200 
+10 1 quad 190 191 202 201 
+11 1 quad 191 192 203 202 
+12 1 quad 192 186 204 203 
+13 1 quad 186 206 205 204 
+14 1 quad 186 193 207 206 
+15 1 quad 193 194 208 207 
+16 1 quad 194 1 209 208 
+17 1 quad 1 211 210 209 
+18 1 quad 1 3 212 211 
+19 1 quad 3 4 213 212 
+20 1 quad 4 5 214 213 
+21 1 quad 5 6 215 214 
+22 1 quad 6 7 216 215 
+23 1 quad 7 8 217 216 
+24 1 quad 8 2 10 217 
+25 1 quad 34 68 218 66 
+26 1 quad 68 69 219 218 
+27 1 quad 69 70 220 219 
+28 1 quad 70 71 221 220 
+29 1 quad 71 72 222 221 
+30 1 quad 72 73 223 222 
+31 1 quad 73 74 224 223 
+32 1 quad 74 75 225 224 
+33 1 quad 75 76 226 225 
+34 1 quad 76 77 227 226 
+35 1 quad 77 78 228 227 
+36 1 quad 78 79 229 228 
+37 1 quad 79 80 230 229 
+38 1 quad 80 81 231 230 
+39 1 quad 81 82 232 231 
+40 1 quad 82 83 233 232 
+41 1 quad 83 84 234 233 
+42 1 quad 84 85 235 234 
+43 1 quad 85 86 236 235 
+44 1 quad 86 87 237 236 
+45 1 quad 87 88 238 237 
+46 1 quad 88 89 239 238 
+47 1 quad 89 90 240 239 
+48 1 quad 90 91 241 240 
+49 1 quad 91 92 242 241 
+50 1 quad 92 93 243 242 
+51 1 quad 93 94 244 243 
+52 1 quad 94 95 245 244 
+53 1 quad 95 96 246 245 
+54 1 quad 96 97 247 246 
+55 1 quad 97 98 248 247 
+56 1 quad 98 99 249 248 
+57 1 quad 99 100 250 249 
+58 1 quad 100 101 251 250 
+59 1 quad 101 102 252 251 
+60 1 quad 102 103 253 252 
+61 1 quad 103 104 254 253 
+62 1 quad 104 105 255 254 
+63 1 quad 105 106 256 255 
+64 1 quad 106 107 257 256 
+65 1 quad 107 108 258 257 
+66 1 quad 108 109 259 258 
+67 1 quad 109 110 260 259 
+68 1 quad 110 111 261 260 
+69 1 quad 111 112 262 261 
+70 1 quad 112 113 263 262 
+71 1 quad 113 114 264 263 
+72 1 quad 114 115 265 264 
+73 1 quad 115 116 266 265 
+74 1 quad 116 117 267 266 
+75 1 quad 117 118 268 267 
+76 1 quad 118 119 269 268 
+77 1 quad 119 120 270 269 
+78 1 quad 120 121 271 270 
+79 1 quad 121 122 272 271 
+80 1 quad 122 123 273 272 
+81 1 quad 123 124 274 273 
+82 1 quad 124 125 275 274 
+83 1 quad 125 126 276 275 
+84 1 quad 126 127 277 276 
+85 1 quad 127 128 278 277 
+86 1 quad 128 129 279 278 
+87 1 quad 129 130 303 279 
+88 1 quad 130 131 197 303 
+89 1 quad 10 11 280 217 
+90 1 quad 11 12 281 280 
+91 1 quad 12 13 282 281 
+92 1 quad 13 14 283 282 
+93 1 quad 14 15 284 283 
+94 1 quad 15 16 285 284 
+95 1 quad 16 17 286 285 
+96 1 quad 17 18 287 286 
+97 1 quad 18 19 288 287 
+98 1 quad 19 20 289 288 
+99 1 quad 20 21 290 289 
+100 1 quad 21 22 291 290 
+101 1 quad 22 23 292 291 
+102 1 quad 23 24 293 292 
+103 1 quad 24 25 294 293 
+104 1 quad 25 26 295 294 
+105 1 quad 26 27 296 295 
+106 1 quad 27 28 297 296 
+107 1 quad 28 29 298 297 
+108 1 quad 29 30 299 298 
+109 1 quad 30 31 300 299 
+110 1 quad 31 32 301 300 
+111 1 quad 32 33 302 301 
+112 1 quad 33 9 35 302 
+113 1 quad 178 177 303 197 
+114 1 quad 177 176 304 303 
+115 1 quad 176 175 305 304 
+116 1 quad 175 174 306 305 
+117 1 quad 174 173 307 306 
+118 1 quad 173 172 308 307 
+119 1 quad 172 171 309 308 
+120 1 quad 171 170 310 309 
+121 1 quad 170 169 311 310 
+122 1 quad 169 168 312 311 
+123 1 quad 168 167 313 312 
+124 1 quad 167 166 314 313 
+125 1 quad 166 165 315 314 
+126 1 quad 165 164 316 315 
+127 1 quad 164 163 317 316 
+128 1 quad 163 162 318 317 
+129 1 quad 162 161 319 318 
+130 1 quad 161 160 320 319 
+131 1 quad 160 159 321 320 
+132 1 quad 159 158 322 321 
+133 1 quad 158 157 323 322 
+134 1 quad 157 156 324 323 
+135 1 quad 156 155 325 324 
+136 1 quad 155 154 326 325 
+137 1 quad 154 153 327 326 
+138 1 quad 153 152 328 327 
+139 1 quad 152 151 329 328 
+140 1 quad 151 150 330 329 
+141 1 quad 150 149 331 330 
+142 1 quad 149 148 332 331 
+143 1 quad 148 147 333 332 
+144 1 quad 147 146 334 333 
+145 1 quad 146 145 335 334 
+146 1 quad 145 144 336 335 
+147 1 quad 144 143 337 336 
+148 1 quad 143 142 338 337 
+149 1 quad 142 141 339 338 
+150 1 quad 141 140 340 339 
+151 1 quad 140 139 341 340 
+152 1 quad 139 138 342 341 
+153 1 quad 138 137 343 342 
+154 1 quad 137 136 344 343 
+155 1 quad 136 135 345 344 
+156 1 quad 135 134 346 345 
+157 1 quad 134 182 347 346 
+158 1 quad 182 183 348 347 
+159 1 quad 183 184 349 348 
+160 1 quad 184 185 198 349 
+161 1 quad 35 36 350 302 
+162 1 quad 36 37 351 350 
+163 1 quad 37 38 352 351 
+164 1 quad 38 39 353 352 
+165 1 quad 39 40 354 353 
+166 1 quad 40 41 355 354 
+167 1 quad 41 42 356 355 
+168 1 quad 42 43 357 356 
+169 1 quad 43 44 358 357 
+170 1 quad 44 45 359 358 
+171 1 quad 45 46 360 359 
+172 1 quad 46 47 361 360 
+173 1 quad 47 48 362 361 
+174 1 quad 48 49 363 362 
+175 1 quad 49 50 364 363 
+176 1 quad 50 51 365 364 
+177 1 quad 51 52 366 365 
+178 1 quad 52 53 367 366 
+179 1 quad 53 54 368 367 
+180 1 quad 54 55 369 368 
+181 1 quad 55 56 370 369 
+182 1 quad 56 57 371 370 
+183 1 quad 57 58 372 371 
+184 1 quad 58 59 373 372 
+185 1 quad 59 60 374 373 
+186 1 quad 60 61 375 374 
+187 1 quad 61 62 376 375 
+188 1 quad 62 63 377 376 
+189 1 quad 63 64 378 377 
+190 1 quad 64 65 379 378 
+191 1 quad 65 66 218 379 
+192 1 quad 303 304 503 279 
+193 1 quad 304 305 380 503 
+194 1 quad 305 306 381 380 
+195 1 quad 306 307 382 381 
+196 1 quad 307 308 383 382 
+197 1 quad 308 309 384 383 
+198 1 quad 309 310 385 384 
+199 1 quad 310 311 386 385 
+200 1 quad 311 312 387 386 
+201 1 quad 312 313 388 387 
+202 1 quad 313 314 389 388 
+203 1 quad 314 315 390 389 
+204 1 quad 315 316 391 390 
+205 1 quad 316 317 392 391 
+206 1 quad 317 318 393 392 
+207 1 quad 318 319 394 393 
+208 1 quad 319 320 395 394 
+209 1 quad 320 321 396 395 
+210 1 quad 321 322 397 396 
+211 1 quad 322 323 398 397 
+212 1 quad 323 324 399 398 
+213 1 quad 324 325 400 399 
+214 1 quad 325 326 401 400 
+215 1 quad 326 327 402 401 
+216 1 quad 327 328 403 402 
+217 1 quad 328 329 404 403 
+218 1 quad 329 330 405 404 
+219 1 quad 330 331 406 405 
+220 1 quad 331 332 407 406 
+221 1 quad 332 333 408 407 
+222 1 quad 333 334 409 408 
+223 1 quad 334 335 410 409 
+224 1 quad 335 336 411 410 
+225 1 quad 336 337 412 411 
+226 1 quad 337 338 413 412 
+227 1 quad 338 339 414 413 
+228 1 quad 339 340 415 414 
+229 1 quad 340 341 416 415 
+230 1 quad 341 342 417 416 
+231 1 quad 342 343 418 417 
+232 1 quad 343 344 419 418 
+233 1 quad 347 348 421 420 
+234 1 quad 348 349 422 421 
+235 1 quad 349 198 199 422 
+236 1 quad 199 200 423 422 
+237 1 quad 200 201 424 423 
+238 1 quad 201 202 425 424 
+239 1 quad 202 203 426 425 
+240 1 quad 203 204 427 426 
+241 1 quad 204 205 428 427 
+242 1 quad 205 430 429 428 
+243 1 quad 205 206 431 430 
+244 1 quad 206 207 432 431 
+245 1 quad 207 208 433 432 
+246 1 quad 208 209 434 433 
+247 1 quad 209 210 435 434 
+248 1 quad 210 437 436 435 
+249 1 quad 210 211 438 437 
+250 1 quad 211 212 439 438 
+251 1 quad 212 213 440 439 
+252 1 quad 213 214 441 440 
+253 1 quad 214 215 442 441 
+254 1 quad 215 216 443 442 
+255 1 quad 216 217 280 443 
+256 1 quad 218 219 444 379 
+257 1 quad 219 220 445 444 
+258 1 quad 220 221 446 445 
+259 1 quad 221 222 447 446 
+260 1 quad 222 223 448 447 
+261 1 quad 223 224 449 448 
+262 1 quad 224 225 450 449 
+263 1 quad 225 226 451 450 
+264 1 quad 226 227 452 451 
+265 1 quad 227 228 453 452 
+266 1 quad 228 229 454 453 
+267 1 quad 229 230 455 454 
+268 1 quad 230 231 456 455 
+269 1 quad 231 232 457 456 
+270 1 quad 232 233 458 457 
+271 1 quad 233 234 459 458 
+272 1 quad 234 235 460 459 
+273 1 quad 235 236 461 460 
+274 1 quad 236 237 462 461 
+275 1 quad 237 238 463 462 
+276 1 quad 238 239 464 463 
+277 1 quad 239 240 465 464 
+278 1 quad 240 241 466 465 
+279 1 quad 241 242 467 466 
+280 1 quad 242 243 468 467 
+281 1 quad 243 244 469 468 
+282 1 quad 244 245 470 469 
+283 1 quad 245 246 471 470 
+284 1 quad 246 247 472 471 
+285 1 quad 247 248 473 472 
+286 1 quad 248 249 474 473 
+287 1 quad 249 250 475 474 
+288 1 quad 250 251 476 475 
+289 1 quad 251 252 477 476 
+290 1 quad 252 253 478 477 
+291 1 quad 253 254 479 478 
+292 1 quad 254 255 480 479 
+293 1 quad 255 256 481 480 
+294 1 quad 256 257 482 481 
+295 1 quad 257 258 483 482 
+296 1 quad 258 259 484 483 
+297 1 quad 259 260 485 484 
+298 1 quad 260 261 486 485 
+299 1 quad 261 262 487 486 
+300 1 quad 262 263 488 487 
+301 1 quad 263 264 489 488 
+302 1 quad 264 265 490 489 
+303 1 quad 265 266 491 490 
+304 1 quad 266 267 492 491 
+305 1 quad 267 268 493 492 
+306 1 quad 268 269 494 493 
+307 1 quad 269 270 495 494 
+308 1 quad 270 271 496 495 
+309 1 quad 271 272 497 496 
+310 1 quad 272 273 498 497 
+311 1 quad 273 274 499 498 
+312 1 quad 274 275 500 499 
+313 1 quad 275 276 501 500 
+314 1 quad 276 277 553 501 
+315 1 quad 277 278 502 553 
+316 1 quad 278 279 503 502 
+317 1 quad 302 350 504 301 
+318 1 quad 350 351 505 504 
+319 1 quad 351 352 506 505 
+320 1 quad 352 353 507 506 
+321 1 quad 353 354 508 507 
+322 1 quad 354 355 509 508 
+323 1 quad 355 356 510 509 
+324 1 quad 356 357 511 510 
+325 1 quad 357 358 512 511 
+326 1 quad 358 359 513 512 
+327 1 quad 359 360 514 513 
+328 1 quad 360 361 515 514 
+329 1 quad 361 362 516 515 
+330 1 quad 362 363 517 516 
+331 1 quad 363 364 518 517 
+332 1 quad 364 365 519 518 
+333 1 quad 365 366 520 519 
+334 1 quad 366 367 521 520 
+335 1 quad 367 368 522 521 
+336 1 quad 368 369 523 522 
+337 1 quad 369 370 524 523 
+338 1 quad 370 371 525 524 
+339 1 quad 371 372 526 525 
+340 1 quad 372 373 527 526 
+341 1 quad 373 374 528 527 
+342 1 quad 374 375 529 528 
+343 1 quad 375 376 530 529 
+344 1 quad 376 377 531 530 
+345 1 quad 377 378 532 531 
+346 1 quad 378 379 444 532 
+347 1 quad 280 281 533 443 
+348 1 quad 281 282 534 533 
+349 1 quad 282 283 535 534 
+350 1 quad 283 284 536 535 
+351 1 quad 284 285 537 536 
+352 1 quad 285 286 538 537 
+353 1 quad 286 287 539 538 
+354 1 quad 287 288 540 539 
+355 1 quad 288 289 541 540 
+356 1 quad 289 290 542 541 
+357 1 quad 290 291 543 542 
+358 1 quad 291 292 544 543 
+359 1 quad 292 293 545 544 
+360 1 quad 293 294 546 545 
+361 1 quad 294 295 547 546 
+362 1 quad 295 296 548 547 
+363 1 quad 296 297 549 548 
+364 1 quad 297 298 550 549 
+365 1 quad 298 299 551 550 
+366 1 quad 299 300 552 551 
+367 1 quad 300 301 504 552 
+368 1 quad 420 421 590 419 
+369 1 quad 421 422 423 590 
+370 1 quad 503 380 553 502 
+371 1 quad 380 381 501 553 
+372 1 quad 381 382 670 501 
+373 1 quad 382 383 554 670 
+374 1 quad 383 384 555 554 
+375 1 quad 384 385 556 555 
+376 1 quad 385 386 557 556 
+377 1 quad 386 387 558 557 
+378 1 quad 387 388 559 558 
+379 1 quad 388 389 560 559 
+380 1 quad 389 390 561 560 
+381 1 quad 390 391 562 561 
+382 1 quad 391 392 563 562 
+383 1 quad 392 393 564 563 
+384 1 quad 393 394 565 564 
+385 1 quad 394 395 566 565 
+386 1 quad 395 396 567 566 
+387 1 quad 396 397 568 567 
+388 1 quad 397 398 569 568 
+389 1 quad 398 399 570 569 
+390 1 quad 399 400 571 570 
+391 1 quad 400 401 572 571 
+392 1 quad 401 402 573 572 
+393 1 quad 402 403 574 573 
+394 1 quad 403 404 575 574 
+395 1 quad 404 405 576 575 
+396 1 quad 405 406 577 576 
+397 1 quad 406 407 578 577 
+398 1 quad 407 408 579 578 
+399 1 quad 408 409 580 579 
+400 1 quad 409 410 581 580 
+401 1 quad 410 411 582 581 
+402 1 quad 411 412 583 582 
+403 1 quad 412 413 584 583 
+404 1 quad 413 414 585 584 
+405 1 quad 414 415 586 585 
+406 1 quad 415 416 587 586 
+407 1 quad 416 417 588 587 
+408 1 quad 417 418 589 588 
+409 1 quad 418 419 590 589 
+410 1 quad 423 424 591 590 
+411 1 quad 424 425 592 591 
+412 1 quad 425 426 593 592 
+413 1 quad 426 427 594 593 
+414 1 quad 427 428 595 594 
+415 1 quad 428 429 596 595 
+416 1 quad 429 598 597 596 
+417 1 quad 429 430 599 598 
+418 1 quad 430 431 600 599 
+419 1 quad 431 432 601 600 
+420 1 quad 432 433 602 601 
+421 1 quad 433 434 603 602 
+422 1 quad 434 435 604 603 
+423 1 quad 435 436 605 604 
+424 1 quad 436 607 606 605 
+425 1 quad 436 437 608 607 
+426 1 quad 437 438 609 608 
+427 1 quad 438 439 610 609 
+428 1 quad 439 440 611 610 
+429 1 quad 440 441 612 611 
+430 1 quad 441 442 613 612 
+431 1 quad 442 443 533 613 
+432 1 quad 444 445 614 532 
+433 1 quad 445 446 615 614 
+434 1 quad 446 447 616 615 
+435 1 quad 447 448 617 616 
+436 1 quad 448 449 618 617 
+437 1 quad 449 450 619 618 
+438 1 quad 450 451 620 619 
+439 1 quad 451 452 621 620 
+440 1 quad 452 453 622 621 
+441 1 quad 453 454 623 622 
+442 1 quad 454 455 624 623 
+443 1 quad 455 456 625 624 
+444 1 quad 456 457 626 625 
+445 1 quad 457 458 627 626 
+446 1 quad 458 459 628 627 
+447 1 quad 459 460 629 628 
+448 1 quad 460 461 630 629 
+449 1 quad 461 462 631 630 
+450 1 quad 462 463 632 631 
+451 1 quad 463 464 633 632 
+452 1 quad 464 465 634 633 
+453 1 quad 465 466 635 634 
+454 1 quad 466 467 636 635 
+455 1 quad 467 468 637 636 
+456 1 quad 468 469 638 637 
+457 1 quad 469 470 639 638 
+458 1 quad 470 471 640 639 
+459 1 quad 471 472 641 640 
+460 1 quad 472 473 642 641 
+461 1 quad 473 474 643 642 
+462 1 quad 474 475 644 643 
+463 1 quad 475 476 645 644 
+464 1 quad 476 477 646 645 
+465 1 quad 477 478 647 646 
+466 1 quad 478 479 648 647 
+467 1 quad 479 480 649 648 
+468 1 quad 480 481 650 649 
+469 1 quad 481 482 651 650 
+470 1 quad 482 483 652 651 
+471 1 quad 483 484 653 652 
+472 1 quad 484 485 654 653 
+473 1 quad 485 486 655 654 
+474 1 quad 486 487 656 655 
+475 1 quad 487 488 657 656 
+476 1 quad 488 489 658 657 
+477 1 quad 489 490 659 658 
+478 1 quad 490 491 660 659 
+479 1 quad 491 492 661 660 
+480 1 quad 492 493 662 661 
+481 1 quad 493 494 663 662 
+482 1 quad 494 495 664 663 
+483 1 quad 495 496 665 664 
+484 1 quad 496 497 666 665 
+485 1 quad 497 498 667 666 
+486 1 quad 498 499 668 667 
+487 1 quad 499 500 669 668 
+488 1 quad 500 501 670 669 
+489 1 quad 533 534 671 613 
+490 1 quad 534 535 672 671 
+491 1 quad 535 536 673 672 
+492 1 quad 536 537 674 673 
+493 1 quad 537 538 675 674 
+494 1 quad 538 539 676 675 
+495 1 quad 539 540 677 676 
+496 1 quad 540 541 678 677 
+497 1 quad 541 542 679 678 
+498 1 quad 542 543 680 679 
+499 1 quad 543 544 681 680 
+500 1 quad 544 545 682 681 
+501 1 quad 545 546 683 682 
+502 1 quad 546 547 684 683 
+503 1 quad 547 548 685 684 
+504 1 quad 548 549 686 685 
+505 1 quad 549 550 687 686 
+506 1 quad 550 551 688 687 
+507 1 quad 551 552 689 688 
+508 1 quad 552 504 505 689 
+509 1 quad 505 506 690 689 
+510 1 quad 506 507 691 690 
+511 1 quad 507 508 692 691 
+512 1 quad 508 509 693 692 
+513 1 quad 509 510 694 693 
+514 1 quad 510 511 695 694 
+515 1 quad 511 512 696 695 
+516 1 quad 512 513 697 696 
+517 1 quad 513 514 698 697 
+518 1 quad 514 515 699 698 
+519 1 quad 515 516 700 699 
+520 1 quad 516 517 701 700 
+521 1 quad 517 518 702 701 
+522 1 quad 518 519 703 702 
+523 1 quad 519 520 704 703 
+524 1 quad 520 521 705 704 
+525 1 quad 521 522 706 705 
+526 1 quad 522 523 707 706 
+527 1 quad 523 524 708 707 
+528 1 quad 524 525 709 708 
+529 1 quad 525 526 710 709 
+530 1 quad 526 527 711 710 
+531 1 quad 527 528 712 711 
+532 1 quad 528 529 713 712 
+533 1 quad 529 530 714 713 
+534 1 quad 530 531 715 714 
+535 1 quad 531 532 614 715 
+536 1 quad 590 591 716 589 
+537 1 quad 591 592 717 716 
+538 1 quad 592 593 718 717 
+539 1 quad 593 594 719 718 
+540 1 quad 594 595 720 719 
+541 1 quad 595 596 721 720 
+542 1 quad 596 597 722 721 
+543 1 quad 597 724 723 722 
+544 1 quad 597 598 725 724 
+545 1 quad 598 599 726 725 
+546 1 quad 599 600 727 726 
+547 1 quad 600 601 728 727 
+548 1 quad 601 602 729 728 
+549 1 quad 602 603 730 729 
+550 1 quad 603 604 731 730 
+551 1 quad 604 605 732 731 
+552 1 quad 605 606 733 732 
+553 1 quad 606 735 734 733 
+554 1 quad 606 607 736 735 
+555 1 quad 607 608 737 736 
+556 1 quad 608 609 738 737 
+557 1 quad 609 610 739 738 
+558 1 quad 610 611 740 739 
+559 1 quad 611 612 741 740 
+560 1 quad 612 613 671 741 
+561 1 quad 614 615 742 715 
+562 1 quad 615 616 743 742 
+563 1 quad 616 617 744 743 
+564 1 quad 617 618 745 744 
+565 1 quad 618 619 746 745 
+566 1 quad 619 620 747 746 
+567 1 quad 620 621 748 747 
+568 1 quad 621 622 749 748 
+569 1 quad 622 623 750 749 
+570 1 quad 623 624 751 750 
+571 1 quad 624 625 752 751 
+572 1 quad 625 626 753 752 
+573 1 quad 626 627 754 753 
+574 1 quad 627 628 755 754 
+575 1 quad 628 629 756 755 
+576 1 quad 629 630 757 756 
+577 1 quad 630 631 758 757 
+578 1 quad 631 632 759 758 
+579 1 quad 632 633 760 759 
+580 1 quad 633 634 761 760 
+581 1 quad 634 635 762 761 
+582 1 quad 635 636 763 762 
+583 1 quad 636 637 764 763 
+584 1 quad 637 638 765 764 
+585 1 quad 638 639 766 765 
+586 1 quad 639 640 767 766 
+587 1 quad 640 641 768 767 
+588 1 quad 641 642 769 768 
+589 1 quad 642 643 770 769 
+590 1 quad 643 644 771 770 
+591 1 quad 644 645 772 771 
+592 1 quad 645 646 773 772 
+593 1 quad 646 647 774 773 
+594 1 quad 647 648 775 774 
+595 1 quad 648 649 776 775 
+596 1 quad 649 650 777 776 
+597 1 quad 650 651 778 777 
+598 1 quad 651 652 779 778 
+599 1 quad 652 653 780 779 
+600 1 quad 653 654 781 780 
+601 1 quad 654 655 782 781 
+602 1 quad 655 656 783 782 
+603 1 quad 656 657 784 783 
+604 1 quad 657 658 785 784 
+605 1 quad 658 659 786 785 
+606 1 quad 659 660 787 786 
+607 1 quad 660 661 788 787 
+608 1 quad 661 662 789 788 
+609 1 quad 662 663 790 789 
+610 1 quad 663 664 791 790 
+611 1 quad 664 665 792 791 
+612 1 quad 665 666 793 792 
+613 1 quad 666 667 794 793 
+614 1 quad 667 668 795 794 
+615 1 quad 668 669 796 795 
+616 1 quad 669 670 554 796 
+617 1 quad 671 672 797 741 
+618 1 quad 672 673 798 797 
+619 1 quad 673 674 799 798 
+620 1 quad 674 675 800 799 
+621 1 quad 675 676 801 800 
+622 1 quad 676 677 802 801 
+623 1 quad 677 678 803 802 
+624 1 quad 678 679 804 803 
+625 1 quad 679 680 805 804 
+626 1 quad 680 681 806 805 
+627 1 quad 681 682 807 806 
+628 1 quad 682 683 808 807 
+629 1 quad 683 684 809 808 
+630 1 quad 684 685 810 809 
+631 1 quad 685 686 811 810 
+632 1 quad 686 687 812 811 
+633 1 quad 687 688 813 812 
+634 1 quad 688 689 690 813 
+635 1 quad 554 555 814 796 
+636 1 quad 555 556 815 814 
+637 1 quad 556 557 816 815 
+638 1 quad 557 558 817 816 
+639 1 quad 558 559 818 817 
+640 1 quad 559 560 819 818 
+641 1 quad 560 561 820 819 
+642 1 quad 561 562 821 820 
+643 1 quad 562 563 822 821 
+644 1 quad 563 564 823 822 
+645 1 quad 564 565 824 823 
+646 1 quad 565 566 825 824 
+647 1 quad 566 567 826 825 
+648 1 quad 567 568 827 826 
+649 1 quad 568 569 828 827 
+650 1 quad 569 570 829 828 
+651 1 quad 570 571 830 829 
+652 1 quad 571 572 831 830 
+653 1 quad 572 573 832 831 
+654 1 quad 573 574 833 832 
+655 1 quad 574 575 834 833 
+656 1 quad 575 576 835 834 
+657 1 quad 576 577 836 835 
+658 1 quad 577 578 837 836 
+659 1 quad 578 579 838 837 
+660 1 quad 579 580 839 838 
+661 1 quad 580 581 840 839 
+662 1 quad 581 582 841 840 
+663 1 quad 582 583 842 841 
+664 1 quad 583 584 843 842 
+665 1 quad 584 585 844 843 
+666 1 quad 585 586 845 844 
+667 1 quad 586 587 1723 845 
+668 1 quad 587 588 1725 1723 
+669 1 quad 588 589 716 1725 
+670 1 quad 690 691 848 813 
+671 1 quad 691 692 849 848 
+672 1 quad 692 693 850 849 
+673 1 quad 693 694 851 850 
+674 1 quad 694 695 852 851 
+675 1 quad 695 696 853 852 
+676 1 quad 696 697 854 853 
+677 1 quad 697 698 855 854 
+678 1 quad 698 699 856 855 
+679 1 quad 699 700 857 856 
+680 1 quad 700 701 858 857 
+681 1 quad 701 702 859 858 
+682 1 quad 702 703 860 859 
+683 1 quad 703 704 861 860 
+684 1 quad 704 705 862 861 
+685 1 quad 705 706 863 862 
+686 1 quad 706 707 864 863 
+687 1 quad 707 708 865 864 
+688 1 quad 708 709 866 865 
+689 1 quad 709 710 867 866 
+690 1 quad 710 711 868 867 
+691 1 quad 711 712 869 868 
+692 1 quad 712 713 870 869 
+693 1 quad 713 714 871 870 
+694 1 quad 714 715 742 871 
+695 1 quad 718 719 846 847 
+696 1 quad 719 720 872 846 
+697 1 quad 720 721 873 872 
+698 1 quad 721 722 874 873 
+699 1 quad 722 723 875 874 
+700 1 quad 723 877 876 875 
+701 1 quad 723 724 878 877 
+702 1 quad 724 725 879 878 
+703 1 quad 725 726 880 879 
+704 1 quad 726 727 881 880 
+705 1 quad 727 728 882 881 
+706 1 quad 728 729 883 882 
+707 1 quad 729 730 884 883 
+708 1 quad 730 731 885 884 
+709 1 quad 731 732 886 885 
+710 1 quad 732 733 887 886 
+711 1 quad 733 734 888 887 
+712 1 quad 734 890 889 888 
+713 1 quad 734 735 891 890 
+714 1 quad 735 736 892 891 
+715 1 quad 736 737 893 892 
+716 1 quad 737 738 894 893 
+717 1 quad 738 739 895 894 
+718 1 quad 739 740 896 895 
+719 1 quad 740 741 797 896 
+720 1 quad 796 814 897 795 
+721 1 quad 814 815 898 897 
+722 1 quad 815 816 1040 898 
+723 1 quad 816 817 976 1040 
+724 1 quad 817 818 899 976 
+725 1 quad 818 819 900 899 
+726 1 quad 819 820 901 900 
+727 1 quad 820 821 902 901 
+728 1 quad 821 822 903 902 
+729 1 quad 822 823 904 903 
+730 1 quad 823 824 905 904 
+731 1 quad 824 825 906 905 
+732 1 quad 825 826 907 906 
+733 1 quad 826 827 908 907 
+734 1 quad 827 828 909 908 
+735 1 quad 828 829 910 909 
+736 1 quad 829 830 911 910 
+737 1 quad 830 831 912 911 
+738 1 quad 831 832 913 912 
+739 1 quad 832 833 914 913 
+740 1 quad 833 834 915 914 
+741 1 quad 834 835 916 915 
+742 1 quad 835 836 917 916 
+743 1 quad 836 837 918 917 
+744 1 quad 837 838 919 918 
+745 1 quad 838 839 920 919 
+746 1 quad 839 840 921 920 
+747 1 quad 840 841 922 921 
+748 1 quad 841 842 923 922 
+749 1 quad 842 843 924 923 
+750 1 quad 843 844 925 924 
+751 1 quad 844 845 926 925 
+752 1 quad 742 743 927 871 
+753 1 quad 743 744 928 927 
+754 1 quad 744 745 929 928 
+755 1 quad 745 746 930 929 
+756 1 quad 746 747 931 930 
+757 1 quad 747 748 932 931 
+758 1 quad 748 749 933 932 
+759 1 quad 749 750 934 933 
+760 1 quad 750 751 935 934 
+761 1 quad 751 752 936 935 
+762 1 quad 752 753 937 936 
+763 1 quad 753 754 938 937 
+764 1 quad 754 755 939 938 
+765 1 quad 755 756 940 939 
+766 1 quad 756 757 941 940 
+767 1 quad 757 758 942 941 
+768 1 quad 758 759 943 942 
+769 1 quad 759 760 944 943 
+770 1 quad 760 761 945 944 
+771 1 quad 761 762 946 945 
+772 1 quad 762 763 947 946 
+773 1 quad 763 764 948 947 
+774 1 quad 764 765 949 948 
+775 1 quad 765 766 950 949 
+776 1 quad 766 767 951 950 
+777 1 quad 767 768 952 951 
+778 1 quad 768 769 953 952 
+779 1 quad 769 770 954 953 
+780 1 quad 770 771 955 954 
+781 1 quad 771 772 956 955 
+782 1 quad 772 773 957 956 
+783 1 quad 773 774 958 957 
+784 1 quad 774 775 959 958 
+785 1 quad 775 776 960 959 
+786 1 quad 776 777 961 960 
+787 1 quad 777 778 962 961 
+788 1 quad 778 779 963 962 
+789 1 quad 779 780 964 963 
+790 1 quad 780 781 965 964 
+791 1 quad 781 782 966 965 
+792 1 quad 782 783 967 966 
+793 1 quad 783 784 968 967 
+794 1 quad 784 785 969 968 
+795 1 quad 785 786 970 969 
+796 1 quad 786 787 971 970 
+797 1 quad 787 788 972 971 
+798 1 quad 788 789 973 972 
+799 1 quad 789 790 974 973 
+800 1 quad 790 791 975 974 
+801 1 quad 791 792 976 975 
+802 1 quad 797 798 977 896 
+803 1 quad 798 799 978 977 
+804 1 quad 799 800 979 978 
+805 1 quad 800 801 980 979 
+806 1 quad 801 802 981 980 
+807 1 quad 802 803 982 981 
+808 1 quad 803 804 983 982 
+809 1 quad 804 805 984 983 
+810 1 quad 805 806 985 984 
+811 1 quad 806 807 986 985 
+812 1 quad 807 808 987 986 
+813 1 quad 808 809 988 987 
+814 1 quad 809 810 989 988 
+815 1 quad 810 811 990 989 
+816 1 quad 811 812 991 990 
+817 1 quad 812 813 848 991 
+818 1 quad 848 849 992 991 
+819 1 quad 849 850 993 992 
+820 1 quad 850 851 994 993 
+821 1 quad 851 852 995 994 
+822 1 quad 852 853 996 995 
+823 1 quad 853 854 997 996 
+824 1 quad 854 855 998 997 
+825 1 quad 855 856 999 998 
+826 1 quad 856 857 1000 999 
+827 1 quad 857 858 1001 1000 
+828 1 quad 858 859 1002 1001 
+829 1 quad 859 860 1003 1002 
+830 1 quad 860 861 1004 1003 
+831 1 quad 861 862 1005 1004 
+832 1 quad 862 863 1006 1005 
+833 1 quad 863 864 1007 1006 
+834 1 quad 864 865 1008 1007 
+835 1 quad 865 866 1009 1008 
+836 1 quad 866 867 1010 1009 
+837 1 quad 867 868 1011 1010 
+838 1 quad 868 869 1012 1011 
+839 1 quad 869 870 1013 1012 
+840 1 quad 870 871 927 1013 
+841 1 quad 872 873 925 926 
+842 1 quad 873 874 1014 925 
+843 1 quad 874 875 1015 1014 
+844 1 quad 875 876 1016 1015 
+845 1 quad 876 1018 1017 1016 
+846 1 quad 876 877 1019 1018 
+847 1 quad 877 878 1020 1019 
+848 1 quad 878 879 1021 1020 
+849 1 quad 879 880 1022 1021 
+850 1 quad 880 881 1023 1022 
+851 1 quad 881 882 1024 1023 
+852 1 quad 882 883 1025 1024 
+853 1 quad 883 884 1026 1025 
+854 1 quad 884 885 1027 1026 
+855 1 quad 885 886 1028 1027 
+856 1 quad 886 887 1029 1028 
+857 1 quad 887 888 1030 1029 
+858 1 quad 888 889 1031 1030 
+859 1 quad 889 1033 1032 1031 
+860 1 quad 889 890 1034 1033 
+861 1 quad 890 891 1035 1034 
+862 1 quad 891 892 1036 1035 
+863 1 quad 892 893 1037 1036 
+864 1 quad 893 894 1038 1037 
+865 1 quad 894 895 1039 1038 
+866 1 quad 895 896 977 1039 
+867 1 quad 795 897 898 794 
+868 1 quad 792 793 1040 976 
+869 1 quad 1040 793 794 898 
+870 1 quad 927 928 1041 1013 
+871 1 quad 928 929 1042 1041 
+872 1 quad 929 930 1043 1042 
+873 1 quad 930 931 1044 1043 
+874 1 quad 931 932 1045 1044 
+875 1 quad 932 933 1046 1045 
+876 1 quad 933 934 1047 1046 
+877 1 quad 934 935 1048 1047 
+878 1 quad 935 936 1049 1048 
+879 1 quad 936 937 1050 1049 
+880 1 quad 937 938 1051 1050 
+881 1 quad 938 939 1052 1051 
+882 1 quad 939 940 1053 1052 
+883 1 quad 940 941 1054 1053 
+884 1 quad 941 942 1055 1054 
+885 1 quad 942 943 1056 1055 
+886 1 quad 943 944 1057 1056 
+887 1 quad 944 945 1058 1057 
+888 1 quad 945 946 1059 1058 
+889 1 quad 946 947 1060 1059 
+890 1 quad 947 948 1061 1060 
+891 1 quad 948 949 1062 1061 
+892 1 quad 949 950 1063 1062 
+893 1 quad 950 951 1064 1063 
+894 1 quad 951 952 1065 1064 
+895 1 quad 952 953 1066 1065 
+896 1 quad 953 954 1067 1066 
+897 1 quad 954 955 1068 1067 
+898 1 quad 955 956 1069 1068 
+899 1 quad 956 957 1070 1069 
+900 1 quad 957 958 1071 1070 
+901 1 quad 958 959 1072 1071 
+902 1 quad 959 960 1073 1072 
+903 1 quad 960 961 1074 1073 
+904 1 quad 961 962 1075 1074 
+905 1 quad 962 963 1076 1075 
+906 1 quad 963 964 1077 1076 
+907 1 quad 964 965 1078 1077 
+908 1 quad 965 966 1079 1078 
+909 1 quad 966 967 1080 1079 
+910 1 quad 967 968 1081 1080 
+911 1 quad 968 969 1082 1081 
+912 1 quad 969 970 1083 1082 
+913 1 quad 970 971 1084 1083 
+914 1 quad 971 972 1085 1084 
+915 1 quad 972 973 1086 1085 
+916 1 quad 973 974 1087 1086 
+917 1 quad 974 975 1088 1087 
+918 1 quad 975 976 899 1088 
+919 1 quad 977 978 1089 1039 
+920 1 quad 978 979 1090 1089 
+921 1 quad 979 980 1091 1090 
+922 1 quad 980 981 1092 1091 
+923 1 quad 981 982 1093 1092 
+924 1 quad 982 983 1094 1093 
+925 1 quad 983 984 1095 1094 
+926 1 quad 984 985 1096 1095 
+927 1 quad 985 986 1097 1096 
+928 1 quad 986 987 1098 1097 
+929 1 quad 987 988 1099 1098 
+930 1 quad 988 989 1100 1099 
+931 1 quad 989 990 1101 1100 
+932 1 quad 990 991 992 1101 
+933 1 quad 899 900 1087 1088 
+934 1 quad 900 901 1086 1087 
+935 1 quad 901 902 1213 1086 
+936 1 quad 902 903 1102 1213 
+937 1 quad 903 904 1103 1102 
+938 1 quad 904 905 1104 1103 
+939 1 quad 905 906 1105 1104 
+940 1 quad 906 907 1106 1105 
+941 1 quad 907 908 1107 1106 
+942 1 quad 908 909 1108 1107 
+943 1 quad 909 910 1109 1108 
+944 1 quad 910 911 1110 1109 
+945 1 quad 911 912 1111 1110 
+946 1 quad 912 913 1112 1111 
+947 1 quad 913 914 1113 1112 
+948 1 quad 914 915 1114 1113 
+949 1 quad 915 916 1115 1114 
+950 1 quad 916 917 1116 1115 
+951 1 quad 917 918 1117 1116 
+952 1 quad 918 919 1118 1117 
+953 1 quad 919 920 1119 1118 
+954 1 quad 920 921 1120 1119 
+955 1 quad 921 922 1121 1120 
+956 1 quad 922 923 1143 1121 
+957 1 quad 923 924 1122 1143 
+958 1 quad 924 925 1014 1122 
+959 1 quad 992 993 1123 1101 
+960 1 quad 993 994 1124 1123 
+961 1 quad 994 995 1125 1124 
+962 1 quad 995 996 1126 1125 
+963 1 quad 996 997 1127 1126 
+964 1 quad 997 998 1128 1127 
+965 1 quad 998 999 1129 1128 
+966 1 quad 999 1000 1130 1129 
+967 1 quad 1000 1001 1131 1130 
+968 1 quad 1001 1002 1132 1131 
+969 1 quad 1002 1003 1133 1132 
+970 1 quad 1003 1004 1134 1133 
+971 1 quad 1004 1005 1135 1134 
+972 1 quad 1005 1006 1136 1135 
+973 1 quad 1006 1007 1137 1136 
+974 1 quad 1007 1008 1138 1137 
+975 1 quad 1008 1009 1139 1138 
+976 1 quad 1009 1010 1140 1139 
+977 1 quad 1010 1011 1141 1140 
+978 1 quad 1011 1012 1142 1141 
+979 1 quad 1012 1013 1041 1142 
+980 1 quad 1014 1015 1143 1122 
+981 1 quad 1015 1016 1121 1143 
+982 1 quad 1016 1017 1244 1121 
+983 1 quad 1017 1145 1144 1244 
+984 1 quad 1017 1018 1146 1145 
+985 1 quad 1018 1019 1147 1146 
+986 1 quad 1019 1020 1148 1147 
+987 1 quad 1020 1021 1149 1148 
+988 1 quad 1021 1022 1150 1149 
+989 1 quad 1022 1023 1151 1150 
+990 1 quad 1023 1024 1152 1151 
+991 1 quad 1024 1025 1153 1152 
+992 1 quad 1025 1026 1154 1153 
+993 1 quad 1026 1027 1155 1154 
+994 1 quad 1027 1028 1156 1155 
+995 1 quad 1028 1029 1157 1156 
+996 1 quad 1029 1030 1158 1157 
+997 1 quad 1030 1031 1159 1158 
+998 1 quad 1031 1032 1160 1159 
+999 1 quad 1032 1162 1161 1160 
+1000 1 quad 1032 1033 1163 1162 
+1001 1 quad 1033 1034 1164 1163 
+1002 1 quad 1034 1035 1165 1164 
+1003 1 quad 1035 1036 1166 1165 
+1004 1 quad 1036 1037 1167 1166 
+1005 1 quad 1037 1038 1168 1167 
+1006 1 quad 1038 1039 1089 1168 
+1007 1 quad 1041 1042 1169 1142 
+1008 1 quad 1042 1043 1170 1169 
+1009 1 quad 1043 1044 1171 1170 
+1010 1 quad 1044 1045 1172 1171 
+1011 1 quad 1045 1046 1173 1172 
+1012 1 quad 1046 1047 1174 1173 
+1013 1 quad 1047 1048 1175 1174 
+1014 1 quad 1048 1049 1176 1175 
+1015 1 quad 1049 1050 1177 1176 
+1016 1 quad 1050 1051 1178 1177 
+1017 1 quad 1051 1052 1179 1178 
+1018 1 quad 1052 1053 1180 1179 
+1019 1 quad 1053 1054 1181 1180 
+1020 1 quad 1054 1055 1182 1181 
+1021 1 quad 1055 1056 1183 1182 
+1022 1 quad 1056 1057 1184 1183 
+1023 1 quad 1057 1058 1185 1184 
+1024 1 quad 1058 1059 1186 1185 
+1025 1 quad 1059 1060 1187 1186 
+1026 1 quad 1060 1061 1188 1187 
+1027 1 quad 1061 1062 1189 1188 
+1028 1 quad 1062 1063 1190 1189 
+1029 1 quad 1063 1064 1191 1190 
+1030 1 quad 1064 1065 1192 1191 
+1031 1 quad 1065 1066 1193 1192 
+1032 1 quad 1066 1067 1194 1193 
+1033 1 quad 1067 1068 1195 1194 
+1034 1 quad 1068 1069 1196 1195 
+1035 1 quad 1069 1070 1197 1196 
+1036 1 quad 1070 1071 1198 1197 
+1037 1 quad 1071 1072 1199 1198 
+1038 1 quad 1072 1073 1200 1199 
+1039 1 quad 1073 1074 1201 1200 
+1040 1 quad 1074 1075 1202 1201 
+1041 1 quad 1075 1076 1203 1202 
+1042 1 quad 1076 1077 1204 1203 
+1043 1 quad 1077 1078 1205 1204 
+1044 1 quad 1078 1079 1206 1205 
+1045 1 quad 1079 1080 1207 1206 
+1046 1 quad 1080 1081 1208 1207 
+1047 1 quad 1081 1082 1209 1208 
+1048 1 quad 1082 1083 1210 1209 
+1049 1 quad 1083 1084 1211 1210 
+1050 1 quad 1084 1085 1212 1211 
+1051 1 quad 1085 1086 1213 1212 
+1052 1 quad 1089 1090 1214 1168 
+1053 1 quad 1090 1091 1215 1214 
+1054 1 quad 1091 1092 1216 1215 
+1055 1 quad 1092 1093 1217 1216 
+1056 1 quad 1093 1094 1218 1217 
+1057 1 quad 1094 1095 1219 1218 
+1058 1 quad 1095 1096 1220 1219 
+1059 1 quad 1096 1097 1221 1220 
+1060 1 quad 1097 1098 1222 1221 
+1061 1 quad 1098 1099 1223 1222 
+1062 1 quad 1099 1100 1224 1223 
+1063 1 quad 1100 1101 1123 1224 
+1064 1 quad 1213 1102 1225 1212 
+1065 1 quad 1102 1103 1226 1225 
+1066 1 quad 1103 1104 1227 1226 
+1067 1 quad 1104 1105 1228 1227 
+1068 1 quad 1105 1106 1229 1228 
+1069 1 quad 1106 1107 1230 1229 
+1070 1 quad 1107 1108 1231 1230 
+1071 1 quad 1108 1109 1232 1231 
+1072 1 quad 1109 1110 1233 1232 
+1073 1 quad 1110 1111 1234 1233 
+1074 1 quad 1111 1112 1235 1234 
+1075 1 quad 1112 1113 1236 1235 
+1076 1 quad 1113 1114 1237 1236 
+1077 1 quad 1114 1115 1238 1237 
+1078 1 quad 1115 1116 1239 1238 
+1079 1 quad 1116 1117 1240 1239 
+1080 1 quad 1117 1118 1241 1240 
+1081 1 quad 1118 1119 1242 1241 
+1082 1 quad 1119 1120 1243 1242 
+1083 1 quad 1120 1121 1244 1243 
+1084 1 quad 1123 1124 1245 1224 
+1085 1 quad 1124 1125 1246 1245 
+1086 1 quad 1125 1126 1247 1246 
+1087 1 quad 1126 1127 1248 1247 
+1088 1 quad 1127 1128 1249 1248 
+1089 1 quad 1128 1129 1250 1249 
+1090 1 quad 1129 1130 1251 1250 
+1091 1 quad 1130 1131 1252 1251 
+1092 1 quad 1131 1132 1253 1252 
+1093 1 quad 1132 1133 1254 1253 
+1094 1 quad 1133 1134 1255 1254 
+1095 1 quad 1134 1135 1256 1255 
+1096 1 quad 1135 1136 1257 1256 
+1097 1 quad 1136 1137 1258 1257 
+1098 1 quad 1137 1138 1259 1258 
+1099 1 quad 1138 1139 1260 1259 
+1100 1 quad 1139 1140 1261 1260 
+1101 1 quad 1140 1141 1262 1261 
+1102 1 quad 1141 1142 1169 1262 
+1103 1 quad 1244 1144 1263 1243 
+1104 1 quad 1243 1263 1241 1242 
+1105 1 quad 1263 1264 1354 1241 
+1106 1 quad 1263 1144 1265 1264 
+1107 1 quad 1144 1145 1266 1265 
+1108 1 quad 1146 1147 1266 1145 
+1109 1 quad 1147 1148 1267 1266 
+1110 1 quad 1148 1149 1268 1267 
+1111 1 quad 1149 1150 1269 1268 
+1112 1 quad 1150 1151 1270 1269 
+1113 1 quad 1151 1152 1271 1270 
+1114 1 quad 1152 1153 1272 1271 
+1115 1 quad 1154 1155 1274 1273 
+1116 1 quad 1155 1156 1730 1274 
+1117 1 quad 1156 1157 1275 1730 
+1118 1 quad 1157 1158 1276 1275 
+1119 1 quad 1158 1159 1277 1276 
+1120 1 quad 1159 1160 1278 1277 
+1121 1 quad 1160 1161 1279 1278 
+1122 1 quad 1161 1281 1280 1279 
+1123 1 quad 1161 1162 1282 1281 
+1124 1 quad 1162 1163 1283 1282 
+1125 1 quad 1163 1164 1284 1283 
+1126 1 quad 1164 1165 1285 1284 
+1127 1 quad 1165 1166 1286 1285 
+1128 1 quad 1166 1167 1287 1286 
+1129 1 quad 1167 1168 1214 1287 
+1130 1 quad 1153 1154 1273 1288 
+1131 1 quad 1272 1153 1288 1289 
+1132 1 quad 1169 1170 1290 1262 
+1133 1 quad 1170 1171 1291 1290 
+1134 1 quad 1171 1172 1292 1291 
+1135 1 quad 1172 1173 1293 1292 
+1136 1 quad 1173 1174 1294 1293 
+1137 1 quad 1174 1175 1295 1294 
+1138 1 quad 1175 1176 1296 1295 
+1139 1 quad 1176 1177 1297 1296 
+1140 1 quad 1177 1178 1298 1297 
+1141 1 quad 1178 1179 1299 1298 
+1142 1 quad 1179 1180 1300 1299 
+1143 1 quad 1180 1181 1301 1300 
+1144 1 quad 1181 1182 1302 1301 
+1145 1 quad 1182 1183 1303 1302 
+1146 1 quad 1183 1184 1304 1303 
+1147 1 quad 1184 1185 1305 1304 
+1148 1 quad 1185 1186 1306 1305 
+1149 1 quad 1186 1187 1307 1306 
+1150 1 quad 1187 1188 1308 1307 
+1151 1 quad 1188 1189 1309 1308 
+1152 1 quad 1189 1190 1310 1309 
+1153 1 quad 1190 1191 1311 1310 
+1154 1 quad 1191 1192 1312 1311 
+1155 1 quad 1192 1193 1313 1312 
+1156 1 quad 1193 1194 1314 1313 
+1157 1 quad 1194 1195 1315 1314 
+1158 1 quad 1195 1196 1316 1315 
+1159 1 quad 1196 1197 1317 1316 
+1160 1 quad 1197 1198 1318 1317 
+1161 1 quad 1198 1199 1319 1318 
+1162 1 quad 1199 1200 1320 1319 
+1163 1 quad 1200 1201 1321 1320 
+1164 1 quad 1201 1202 1322 1321 
+1165 1 quad 1202 1203 1323 1322 
+1166 1 quad 1203 1204 1324 1323 
+1167 1 quad 1204 1205 1325 1324 
+1168 1 quad 1205 1206 1326 1325 
+1169 1 quad 1206 1207 1327 1326 
+1170 1 quad 1207 1208 1328 1327 
+1171 1 quad 1208 1209 1329 1328 
+1172 1 quad 1209 1210 1330 1329 
+1173 1 quad 1214 1215 1331 1287 
+1174 1 quad 1215 1216 1332 1331 
+1175 1 quad 1216 1217 1333 1332 
+1176 1 quad 1217 1218 1334 1333 
+1177 1 quad 1218 1219 1335 1334 
+1178 1 quad 1219 1220 1336 1335 
+1179 1 quad 1220 1221 1337 1336 
+1180 1 quad 1221 1222 1338 1337 
+1181 1 quad 1222 1223 1339 1338 
+1182 1 quad 1223 1224 1245 1339 
+1183 1 quad 1212 1225 1340 1211 
+1184 1 quad 1211 1340 1330 1210 
+1185 1 quad 1225 1226 1330 1340 
+1186 1 quad 1226 1227 1329 1330 
+1187 1 quad 1227 1228 1341 1329 
+1188 1 quad 1228 1229 1342 1341 
+1189 1 quad 1229 1230 1343 1342 
+1190 1 quad 1230 1231 1344 1343 
+1191 1 quad 1231 1232 1345 1344 
+1192 1 quad 1232 1233 1346 1345 
+1193 1 quad 1233 1234 1347 1346 
+1194 1 quad 1234 1235 1348 1347 
+1195 1 quad 1235 1236 1349 1348 
+1196 1 quad 1236 1237 1350 1349 
+1197 1 quad 1237 1238 1351 1350 
+1198 1 quad 1238 1239 1352 1351 
+1199 1 quad 1239 1240 1353 1352 
+1200 1 quad 1240 1241 1354 1353 
+1201 1 quad 1245 1246 1355 1339 
+1202 1 quad 1246 1247 1356 1355 
+1203 1 quad 1247 1248 1357 1356 
+1204 1 quad 1248 1249 1358 1357 
+1205 1 quad 1249 1250 1359 1358 
+1206 1 quad 1250 1251 1360 1359 
+1207 1 quad 1251 1252 1361 1360 
+1208 1 quad 1252 1253 1362 1361 
+1209 1 quad 1253 1254 1363 1362 
+1210 1 quad 1254 1255 1364 1363 
+1211 1 quad 1255 1256 1365 1364 
+1212 1 quad 1256 1257 1366 1365 
+1213 1 quad 1257 1258 1367 1366 
+1214 1 quad 1258 1259 1368 1367 
+1215 1 quad 1259 1260 1369 1368 
+1216 1 quad 1260 1261 1370 1369 
+1217 1 quad 1261 1262 1290 1370 
+1218 1 quad 1329 1341 1371 1328 
+1219 1 quad 1341 1342 1372 1371 
+1220 1 quad 1342 1343 1373 1372 
+1221 1 quad 1343 1344 1374 1373 
+1222 1 quad 1344 1345 1375 1374 
+1223 1 quad 1345 1346 1376 1375 
+1224 1 quad 1346 1347 1377 1376 
+1225 1 quad 1347 1348 1378 1377 
+1226 1 quad 1348 1349 1379 1378 
+1227 1 quad 1349 1350 1380 1379 
+1228 1 quad 1350 1351 1381 1380 
+1229 1 quad 1351 1352 1382 1381 
+1230 1 quad 1352 1353 1383 1382 
+1231 1 quad 1353 1354 1384 1383 
+1232 1 quad 1354 1264 1385 1384 
+1233 1 quad 1264 1265 1386 1385 
+1234 1 quad 1265 1266 1387 1386 
+1235 1 quad 1266 1267 1388 1387 
+1236 1 quad 1267 1268 1389 1388 
+1237 1 quad 1268 1269 1390 1389 
+1238 1 quad 1269 1270 1391 1390 
+1239 1 quad 1270 1271 1392 1391 
+1240 1 quad 1271 1272 1393 1392 
+1241 1 quad 1272 1289 1394 1393 
+1242 1 quad 1289 1396 1395 1394 
+1243 1 quad 1289 1288 1397 1396 
+1244 1 quad 1288 1273 1398 1397 
+1245 1 quad 1328 1371 1399 1327 
+1246 1 quad 1371 1372 1476 1399 
+1247 1 quad 1372 1373 1475 1476 
+1248 1 quad 1373 1374 1474 1475 
+1249 1 quad 1374 1375 1473 1474 
+1250 1 quad 1375 1376 1400 1473 
+1251 1 quad 1376 1377 1401 1400 
+1252 1 quad 1377 1378 1402 1401 
+1253 1 quad 1378 1379 1403 1402 
+1254 1 quad 1379 1380 1404 1403 
+1255 1 quad 1380 1381 1405 1404 
+1256 1 quad 1381 1382 1406 1405 
+1257 1 quad 1382 1383 1407 1406 
+1258 1 quad 1383 1384 1408 1407 
+1259 1 quad 1384 1385 1409 1408 
+1260 1 quad 1385 1386 1410 1409 
+1261 1 quad 1386 1387 1411 1410 
+1262 1 quad 1387 1388 1412 1411 
+1263 1 quad 1388 1389 1413 1412 
+1264 1 quad 1389 1390 1414 1413 
+1265 1 quad 1390 1391 1415 1414 
+1266 1 quad 1391 1392 1416 1415 
+1267 1 quad 1392 1393 1417 1416 
+1268 1 quad 1393 1394 1418 1417 
+1269 1 quad 1394 1395 1419 1418 
+1270 1 quad 1395 1421 1420 1419 
+1271 1 quad 1395 1396 1422 1421 
+1272 1 quad 1396 1397 1423 1422 
+1273 1 quad 1397 1398 1424 1423 
+1274 1 quad 1398 1731 1425 1424 
+1275 1 quad 1398 1273 1274 1731 
+1276 1 quad 1275 1276 1428 1427 
+1277 1 quad 1276 1277 1429 1428 
+1278 1 quad 1277 1278 1430 1429 
+1279 1 quad 1278 1279 1431 1430 
+1280 1 quad 1279 1280 1432 1431 
+1281 1 quad 1280 1434 1433 1432 
+1282 1 quad 1280 1281 1435 1434 
+1283 1 quad 1281 1282 1436 1435 
+1284 1 quad 1282 1283 1437 1436 
+1285 1 quad 1283 1284 1438 1437 
+1286 1 quad 1284 1285 1439 1438 
+1287 1 quad 1285 1286 1440 1439 
+1288 1 quad 1286 1287 1331 1440 
+1289 1 quad 1290 1291 1441 1370 
+1290 1 quad 1291 1292 1442 1441 
+1291 1 quad 1292 1293 1443 1442 
+1292 1 quad 1293 1294 1444 1443 
+1293 1 quad 1294 1295 1445 1444 
+1294 1 quad 1295 1296 1446 1445 
+1295 1 quad 1296 1297 1447 1446 
+1296 1 quad 1297 1298 1448 1447 
+1297 1 quad 1298 1299 1449 1448 
+1298 1 quad 1299 1300 1450 1449 
+1299 1 quad 1300 1301 1451 1450 
+1300 1 quad 1301 1302 1452 1451 
+1301 1 quad 1302 1303 1453 1452 
+1302 1 quad 1303 1304 1454 1453 
+1303 1 quad 1304 1305 1455 1454 
+1304 1 quad 1305 1306 1456 1455 
+1305 1 quad 1306 1307 1457 1456 
+1306 1 quad 1307 1308 1458 1457 
+1307 1 quad 1308 1309 1459 1458 
+1308 1 quad 1309 1310 1460 1459 
+1309 1 quad 1310 1311 1461 1460 
+1310 1 quad 1311 1312 1462 1461 
+1311 1 quad 1312 1313 1463 1462 
+1312 1 quad 1313 1314 1464 1463 
+1313 1 quad 1314 1315 1465 1464 
+1314 1 quad 1315 1316 1466 1465 
+1315 1 quad 1316 1317 1467 1466 
+1316 1 quad 1317 1318 1468 1467 
+1317 1 quad 1318 1319 1469 1468 
+1318 1 quad 1319 1320 1470 1469 
+1319 1 quad 1320 1321 1471 1470 
+1320 1 quad 1321 1322 1472 1471 
+1321 1 quad 1322 1323 1473 1472 
+1322 1 quad 1323 1324 1474 1473 
+1323 1 quad 1324 1325 1475 1474 
+1324 1 quad 1325 1326 1476 1475 
+1325 1 quad 1476 1326 1327 1399 
+1326 1 quad 1331 1332 1477 1440 
+1327 1 quad 1332 1333 1478 1477 
+1328 1 quad 1333 1334 1479 1478 
+1329 1 quad 1334 1335 1480 1479 
+1330 1 quad 1335 1336 1481 1480 
+1331 1 quad 1336 1337 1482 1481 
+1332 1 quad 1337 1338 1483 1482 
+1333 1 quad 1338 1339 1355 1483 
+1334 1 quad 1355 1356 1484 1483 
+1335 1 quad 1356 1357 1485 1484 
+1336 1 quad 1357 1358 1486 1485 
+1337 1 quad 1358 1359 1487 1486 
+1338 1 quad 1359 1360 1488 1487 
+1339 1 quad 1360 1361 1489 1488 
+1340 1 quad 1361 1362 1490 1489 
+1341 1 quad 1362 1363 1491 1490 
+1342 1 quad 1363 1364 1492 1491 
+1343 1 quad 1364 1365 1493 1492 
+1344 1 quad 1365 1366 1494 1493 
+1345 1 quad 1366 1367 1495 1494 
+1346 1 quad 1367 1368 1496 1495 
+1347 1 quad 1368 1369 1497 1496 
+1348 1 quad 1369 1370 1441 1497 
+1349 1 quad 1400 1401 1624 1565 
+1350 1 quad 1401 1402 1498 1624 
+1351 1 quad 1402 1403 1499 1498 
+1352 1 quad 1403 1404 1500 1499 
+1353 1 quad 1404 1405 1501 1500 
+1354 1 quad 1405 1406 1502 1501 
+1355 1 quad 1406 1407 1503 1502 
+1356 1 quad 1407 1408 1504 1503 
+1357 1 quad 1410 1411 1505 1504 
+1358 1 quad 1411 1412 1506 1505 
+1359 1 quad 1412 1413 1507 1506 
+1360 1 quad 1413 1414 1508 1507 
+1361 1 quad 1414 1415 1509 1508 
+1362 1 quad 1415 1416 1510 1509 
+1363 1 quad 1416 1417 1511 1510 
+1364 1 quad 1417 1418 1512 1511 
+1365 1 quad 1418 1419 1513 1512 
+1366 1 quad 1419 1420 1514 1513 
+1367 1 quad 1420 1516 1515 1514 
+1368 1 quad 1420 1421 1517 1516 
+1369 1 quad 1421 1422 1518 1517 
+1370 1 quad 1422 1423 1519 1518 
+1371 1 quad 1423 1424 1520 1519 
+1372 1 quad 1428 1429 1522 1521 
+1373 1 quad 1429 1430 1523 1522 
+1374 1 quad 1430 1431 1524 1523 
+1375 1 quad 1431 1432 1525 1524 
+1376 1 quad 1432 1433 1526 1525 
+1377 1 quad 1433 1528 1527 1526 
+1378 1 quad 1433 1434 1529 1528 
+1379 1 quad 1434 1435 1530 1529 
+1380 1 quad 1435 1436 1531 1530 
+1381 1 quad 1436 1437 1532 1531 
+1382 1 quad 1437 1438 1533 1532 
+1383 1 quad 1438 1439 1534 1533 
+1384 1 quad 1439 1440 1477 1534 
+1385 1 quad 1441 1442 1535 1497 
+1386 1 quad 1442 1443 1536 1535 
+1387 1 quad 1443 1444 1537 1536 
+1388 1 quad 1444 1445 1538 1537 
+1389 1 quad 1445 1446 1539 1538 
+1390 1 quad 1446 1447 1540 1539 
+1391 1 quad 1447 1448 1541 1540 
+1392 1 quad 1448 1449 1542 1541 
+1393 1 quad 1449 1450 1543 1542 
+1394 1 quad 1450 1451 1544 1543 
+1395 1 quad 1451 1452 1545 1544 
+1396 1 quad 1452 1453 1546 1545 
+1397 1 quad 1453 1454 1547 1546 
+1398 1 quad 1454 1455 1548 1547 
+1399 1 quad 1455 1456 1549 1548 
+1400 1 quad 1456 1457 1550 1549 
+1401 1 quad 1457 1458 1551 1550 
+1402 1 quad 1458 1459 1552 1551 
+1403 1 quad 1459 1460 1553 1552 
+1404 1 quad 1460 1461 1554 1553 
+1405 1 quad 1461 1462 1555 1554 
+1406 1 quad 1462 1463 1556 1555 
+1407 1 quad 1463 1464 1557 1556 
+1408 1 quad 1464 1465 1558 1557 
+1409 1 quad 1465 1466 1559 1558 
+1410 1 quad 1466 1467 1560 1559 
+1411 1 quad 1467 1468 1561 1560 
+1412 1 quad 1468 1469 1562 1561 
+1413 1 quad 1469 1470 1563 1562 
+1414 1 quad 1470 1471 1564 1563 
+1415 1 quad 1471 1472 1565 1564 
+1416 1 quad 1477 1478 1566 1534 
+1417 1 quad 1478 1479 1567 1566 
+1418 1 quad 1479 1480 1568 1567 
+1419 1 quad 1480 1481 1569 1568 
+1420 1 quad 1481 1482 1570 1569 
+1421 1 quad 1482 1483 1484 1570 
+1422 1 quad 1484 1485 1571 1570 
+1423 1 quad 1485 1486 1572 1571 
+1424 1 quad 1486 1487 1573 1572 
+1425 1 quad 1487 1488 1574 1573 
+1426 1 quad 1488 1489 1575 1574 
+1427 1 quad 1489 1490 1576 1575 
+1428 1 quad 1490 1491 1577 1576 
+1429 1 quad 1491 1492 1578 1577 
+1430 1 quad 1492 1493 1579 1578 
+1431 1 quad 1493 1494 1580 1579 
+1432 1 quad 1494 1495 1581 1580 
+1433 1 quad 1495 1496 1582 1581 
+1434 1 quad 1496 1497 1535 1582 
+1435 1 quad 1521 1522 1583 1520 
+1436 1 quad 1522 1523 1584 1583 
+1437 1 quad 1523 1524 1585 1584 
+1438 1 quad 1524 1525 1586 1585 
+1439 1 quad 1525 1526 1587 1586 
+1440 1 quad 1526 1527 1588 1587 
+1441 1 quad 1527 1590 1589 1588 
+1442 1 quad 1527 1528 1591 1590 
+1443 1 quad 1528 1529 1592 1591 
+1444 1 quad 1529 1530 1593 1592 
+1445 1 quad 1530 1531 1594 1593 
+1446 1 quad 1531 1532 1595 1594 
+1447 1 quad 1532 1533 1596 1595 
+1448 1 quad 1533 1534 1566 1596 
+1449 1 quad 1535 1536 1597 1582 
+1450 1 quad 1536 1537 1598 1597 
+1451 1 quad 1537 1538 1599 1598 
+1452 1 quad 1538 1539 1600 1599 
+1453 1 quad 1539 1540 1601 1600 
+1454 1 quad 1540 1541 1602 1601 
+1455 1 quad 1541 1542 1603 1602 
+1456 1 quad 1542 1543 1604 1603 
+1457 1 quad 1543 1544 1605 1604 
+1458 1 quad 1544 1545 1606 1605 
+1459 1 quad 1545 1546 1607 1606 
+1460 1 quad 1546 1547 1608 1607 
+1461 1 quad 1547 1548 1609 1608 
+1462 1 quad 1548 1549 1610 1609 
+1463 1 quad 1549 1550 1611 1610 
+1464 1 quad 1550 1551 1515 1611 
+1465 1 quad 1551 1552 1651 1515 
+1466 1 quad 1552 1553 1612 1651 
+1467 1 quad 1553 1554 1613 1612 
+1468 1 quad 1554 1555 1614 1613 
+1469 1 quad 1555 1556 1615 1614 
+1470 1 quad 1556 1557 1616 1615 
+1471 1 quad 1557 1558 1617 1616 
+1472 1 quad 1558 1559 1618 1617 
+1473 1 quad 1559 1560 1619 1618 
+1474 1 quad 1560 1561 1620 1619 
+1475 1 quad 1561 1562 1621 1620 
+1476 1 quad 1562 1563 1622 1621 
+1477 1 quad 1563 1564 1623 1622 
+1478 1 quad 1564 1565 1624 1623 
+1479 1 quad 1566 1567 1625 1596 
+1480 1 quad 1567 1568 1626 1625 
+1481 1 quad 1568 1569 1627 1626 
+1482 1 quad 1569 1570 1571 1627 
+1483 1 quad 1571 1572 1628 1627 
+1484 1 quad 1572 1573 1629 1628 
+1485 1 quad 1573 1574 1630 1629 
+1486 1 quad 1574 1575 1631 1630 
+1487 1 quad 1575 1576 1632 1631 
+1488 1 quad 1576 1577 1633 1632 
+1489 1 quad 1577 1578 1634 1633 
+1490 1 quad 1578 1579 1635 1634 
+1491 1 quad 1579 1580 1636 1635 
+1492 1 quad 1580 1581 1637 1636 
+1493 1 quad 1581 1582 1597 1637 
+1494 1 quad 1624 1498 1638 1623 
+1495 1 quad 1498 1499 1674 1638 
+1496 1 quad 1499 1500 1673 1674 
+1497 1 quad 1501 1502 1640 1639 
+1498 1 quad 1502 1503 1641 1640 
+1499 1 quad 1505 1506 1642 1641 
+1500 1 quad 1506 1507 1643 1642 
+1501 1 quad 1507 1508 1644 1643 
+1502 1 quad 1508 1509 1645 1644 
+1503 1 quad 1509 1510 1646 1645 
+1504 1 quad 1510 1511 1647 1646 
+1505 1 quad 1511 1512 1648 1647 
+1506 1 quad 1512 1513 1649 1648 
+1507 1 quad 1513 1514 1650 1649 
+1508 1 quad 1514 1515 1651 1650 
+1509 1 quad 1520 1583 1652 1519 
+1510 1 quad 1583 1584 1653 1652 
+1511 1 quad 1584 1585 1654 1653 
+1512 1 quad 1585 1586 1655 1654 
+1513 1 quad 1586 1587 1656 1655 
+1514 1 quad 1587 1588 1657 1656 
+1515 1 quad 1588 1589 1658 1657 
+1516 1 quad 1589 1660 1659 1658 
+1517 1 quad 1589 1590 1661 1660 
+1518 1 quad 1590 1591 1662 1661 
+1519 1 quad 1591 1592 1663 1662 
+1520 1 quad 1592 1593 1664 1663 
+1521 1 quad 1593 1594 1665 1664 
+1522 1 quad 1594 1595 1666 1665 
+1523 1 quad 1595 1596 1625 1666 
+1524 1 quad 1651 1612 1667 1650 
+1525 1 quad 1612 1613 1696 1667 
+1526 1 quad 1613 1614 1695 1696 
+1527 1 quad 1614 1615 1694 1695 
+1528 1 quad 1615 1616 1668 1694 
+1529 1 quad 1616 1617 1669 1668 
+1530 1 quad 1617 1618 1670 1669 
+1531 1 quad 1618 1619 1671 1670 
+1532 1 quad 1619 1620 1672 1671 
+1533 1 quad 1620 1621 1673 1672 
+1534 1 quad 1621 1622 1674 1673 
+1535 1 quad 1674 1622 1623 1638 
+1536 1 quad 1597 1598 1675 1637 
+1537 1 quad 1598 1599 1676 1675 
+1538 1 quad 1599 1600 1677 1676 
+1539 1 quad 1600 1601 1678 1677 
+1540 1 quad 1601 1602 1679 1678 
+1541 1 quad 1602 1603 1680 1679 
+1542 1 quad 1603 1604 1681 1680 
+1543 1 quad 1604 1605 1682 1681 
+1544 1 quad 1605 1606 1683 1682 
+1545 1 quad 1606 1607 1684 1683 
+1546 1 quad 1607 1608 1685 1684 
+1547 1 quad 1608 1609 1686 1685 
+1548 1 quad 1609 1610 1687 1686 
+1549 1 quad 1610 1611 1688 1687 
+1550 1 quad 1611 1515 1516 1688 
+1551 1 quad 1625 1626 1689 1666 
+1552 1 quad 1626 1627 1628 1689 
+1553 1 quad 1628 1629 1719 1689 
+1554 1 quad 1629 1630 1718 1719 
+1555 1 quad 1630 1631 1717 1718 
+1556 1 quad 1631 1632 1716 1717 
+1557 1 quad 1632 1633 1715 1716 
+1558 1 quad 1633 1634 1714 1715 
+1559 1 quad 1634 1635 1713 1714 
+1560 1 quad 1635 1636 1690 1713 
+1561 1 quad 1636 1637 1675 1690 
+1562 1 quad 1516 1517 1707 1688 
+1563 1 quad 1517 1518 1706 1707 
+1564 1 quad 1518 1519 1652 1706 
+1565 1 quad 1641 1642 1711 1640 
+1566 1 quad 1642 1643 1710 1711 
+1567 1 quad 1643 1644 1691 1710 
+1568 1 quad 1644 1645 1692 1691 
+1569 1 quad 1645 1646 1693 1692 
+1570 1 quad 1646 1647 1694 1693 
+1571 1 quad 1647 1648 1695 1694 
+1572 1 quad 1648 1649 1696 1695 
+1573 1 quad 1696 1649 1650 1667 
+1574 1 quad 1675 1676 1697 1690 
+1575 1 quad 1676 1677 1698 1697 
+1576 1 quad 1677 1678 1699 1698 
+1577 1 quad 1678 1679 1700 1699 
+1578 1 quad 1679 1680 1701 1700 
+1579 1 quad 1680 1681 1702 1701 
+1580 1 quad 1681 1682 1703 1702 
+1581 1 quad 1682 1683 1704 1703 
+1582 1 quad 1683 1684 1705 1704 
+1583 1 quad 1684 1685 1706 1705 
+1584 1 quad 1685 1686 1707 1706 
+1585 1 quad 1688 1707 1686 1687 
+1586 1 quad 1694 1668 1708 1693 
+1587 1 quad 1668 1669 1709 1708 
+1588 1 quad 1669 1670 1727 1709 
+1589 1 quad 1670 1671 1729 1727 
+1590 1 quad 1671 1672 1712 1729 
+1591 1 quad 1652 1653 1705 1706 
+1592 1 quad 1653 1654 1704 1705 
+1593 1 quad 1654 1655 1703 1704 
+1594 1 quad 1655 1656 1702 1703 
+1595 1 quad 1656 1657 1701 1702 
+1596 1 quad 1657 1658 1700 1701 
+1597 1 quad 1658 1659 1699 1700 
+1598 1 quad 1699 1659 1713 1698 
+1599 1 quad 1659 1660 1714 1713 
+1600 1 quad 1660 1661 1715 1714 
+1601 1 quad 1661 1662 1716 1715 
+1602 1 quad 1662 1663 1717 1716 
+1603 1 quad 1663 1664 1718 1717 
+1604 1 quad 1664 1665 1719 1718 
+1605 1 quad 1719 1665 1666 1689 
+1606 1 quad 1713 1690 1697 1698 
+1607 1 quad 1691 1692 1721 1720 
+1608 1 quad 1693 1708 1721 1692 
+1609 1 quad 1708 1709 1720 1721 
+1610 1 quad 420 419 344 345 
+1611 1 quad 345 346 347 420 
+1612 1 quad 872 926 1722 846 
+1613 1 quad 926 845 1723 1722 
+1614 1 quad 1722 1723 1725 1724 
+1615 1 quad 846 1722 1724 847 
+1616 1 quad 847 1724 717 718 
+1617 1 quad 1724 1725 716 717 
+1618 1 quad 1504 1505 1641 1503 
+1619 1 quad 1504 1408 1409 1410 
+1620 1 quad 1521 1520 1424 1425 
+1621 1 quad 1425 1427 1428 1521 
+1622 1 quad 1473 1400 1565 1472 
+1623 1 quad 1691 1720 1726 1710 
+1624 1 quad 1720 1709 1727 1726 
+1625 1 quad 1726 1727 1729 1728 
+1626 1 quad 1710 1726 1728 1711 
+1627 1 quad 1711 1728 1639 1640 
+1628 1 quad 1728 1729 1712 1639 
+1629 1 quad 1274 1730 1426 1731 
+1630 1 quad 1730 1275 1427 1426 
+1631 1 quad 1426 1427 1425 1731 
+1632 1 quad 1712 1672 1673 1500 
+1633 1 quad 1712 1500 1501 1639 
+1633 1 line 67 180 
+1634 1 line 180 179 
+1635 1 line 179 178 
+1636 1 line 178 177 
+1637 1 line 177 176 
+1638 1 line 176 175 
+1639 1 line 175 174 
+1640 1 line 174 173 
+1641 1 line 173 172 
+1642 1 line 172 171 
+1643 1 line 171 170 
+1644 1 line 170 169 
+1645 1 line 169 168 
+1646 1 line 168 167 
+1647 1 line 167 166 
+1648 1 line 166 165 
+1649 1 line 165 164 
+1650 1 line 164 163 
+1651 1 line 163 162 
+1652 1 line 162 161 
+1653 1 line 161 160 
+1654 1 line 160 159 
+1655 1 line 159 158 
+1656 1 line 158 157 
+1657 1 line 157 156 
+1658 1 line 156 155 
+1659 1 line 155 154 
+1660 1 line 154 153 
+1661 1 line 153 152 
+1662 1 line 152 151 
+1663 1 line 151 150 
+1664 1 line 150 149 
+1665 1 line 149 148 
+1666 1 line 148 147 
+1667 1 line 147 146 
+1668 1 line 146 145 
+1669 1 line 145 144 
+1670 1 line 144 143 
+1671 1 line 143 142 
+1672 1 line 142 141 
+1673 1 line 141 140 
+1674 1 line 140 139 
+1675 1 line 139 138 
+1676 1 line 138 137 
+1677 1 line 137 136 
+1678 1 line 136 135 
+1679 1 line 135 134 
+1680 2 line 66 34 
+1681 2 line 9 35 
+1682 2 line 35 36 
+1683 2 line 36 37 
+1684 2 line 37 38 
+1685 2 line 38 39 
+1686 2 line 39 40 
+1687 2 line 40 41 
+1688 2 line 41 42 
+1689 2 line 42 43 
+1690 2 line 43 44 
+1691 2 line 44 45 
+1692 2 line 45 46 
+1693 2 line 46 47 
+1694 2 line 47 48 
+1695 2 line 48 49 
+1696 2 line 49 50 
+1697 2 line 50 51 
+1698 2 line 51 52 
+1699 2 line 52 53 
+1700 2 line 53 54 
+1701 2 line 54 55 
+1702 2 line 55 56 
+1703 2 line 56 57 
+1704 2 line 57 58 
+1705 2 line 58 59 
+1706 2 line 59 60 
+1707 2 line 60 61 
+1708 2 line 61 62 
+1709 2 line 62 63 
+1710 2 line 63 64 
+1711 2 line 64 65 
+1712 2 line 65 66 
+1713 3 line 133 67 
+1714 3 line 132 133 
+1715 3 line 131 132 
+1716 3 line 34 68 
+1717 3 line 68 69 
+1718 3 line 69 70 
+1719 3 line 70 71 
+1720 3 line 71 72 
+1721 3 line 72 73 
+1722 3 line 73 74 
+1723 3 line 74 75 
+1724 3 line 75 76 
+1725 3 line 76 77 
+1726 3 line 77 78 
+1727 3 line 78 79 
+1728 3 line 79 80 
+1729 3 line 80 81 
+1730 3 line 81 82 
+1731 3 line 82 83 
+1732 3 line 83 84 
+1733 3 line 84 85 
+1734 3 line 85 86 
+1735 3 line 86 87 
+1736 3 line 87 88 
+1737 3 line 88 89 
+1738 3 line 89 90 
+1739 3 line 90 91 
+1740 3 line 91 92 
+1741 3 line 92 93 
+1742 3 line 93 94 
+1743 3 line 94 95 
+1744 3 line 95 96 
+1745 3 line 96 97 
+1746 3 line 97 98 
+1747 3 line 98 99 
+1748 3 line 99 100 
+1749 3 line 100 101 
+1750 3 line 101 102 
+1751 3 line 102 103 
+1752 3 line 103 104 
+1753 3 line 104 105 
+1754 3 line 105 106 
+1755 3 line 106 107 
+1756 3 line 107 108 
+1757 3 line 108 109 
+1758 3 line 109 110 
+1759 3 line 110 111 
+1760 3 line 111 112 
+1761 3 line 112 113 
+1762 3 line 113 114 
+1763 3 line 114 115 
+1764 3 line 115 116 
+1765 3 line 116 117 
+1766 3 line 117 118 
+1767 3 line 118 119 
+1768 3 line 119 120 
+1769 3 line 120 121 
+1770 3 line 121 122 
+1771 3 line 122 123 
+1772 3 line 123 124 
+1773 3 line 124 125 
+1774 3 line 125 126 
+1775 3 line 126 127 
+1776 3 line 127 128 
+1777 3 line 128 129 
+1778 3 line 129 130 
+1779 3 line 130 131 
+1780 4 line 185 181 
+1781 4 line 134 182 
+1782 4 line 182 183 
+1783 4 line 183 184 
+1784 4 line 184 185 
+1785 4 line 181 187 
+1786 4 line 187 188 
+1787 4 line 188 189 
+1788 4 line 189 190 
+1789 4 line 190 191 
+1790 4 line 191 192 
+1791 4 line 192 186 
+1792 4 line 186 193 
+1793 4 line 193 194 
+1794 4 line 194 1 
+1795 4 line 1 3 
+1796 4 line 3 4 
+1797 4 line 4 5 
+1798 4 line 5 6 
+1799 4 line 6 7 
+1800 4 line 7 8 
+1801 4 line 8 2 
+1802 4 line 2 10 
+1803 4 line 10 11 
+1804 4 line 11 12 
+1805 4 line 12 13 
+1806 4 line 13 14 
+1807 4 line 14 15 
+1808 4 line 15 16 
+1809 4 line 16 17 
+1810 4 line 17 18 
+1811 4 line 18 19 
+1812 4 line 19 20 
+1813 4 line 20 21 
+1814 4 line 21 22 
+1815 4 line 22 23 
+1816 4 line 23 24 
+1817 4 line 24 25 
+1818 4 line 25 26 
+1819 4 line 26 27 
+1820 4 line 27 28 
+1821 4 line 28 29 
+1822 4 line 29 30 
+1823 4 line 30 31 
+1824 4 line 31 32 
+1825 4 line 32 33 
+1826 4 line 33 9 
+
diff --git a/examples/step-33/step-33.cc b/examples/step-33/step-33.cc
new file mode 100644
index 0000000..b78ade3
--- /dev/null
+++ b/examples/step-33/step-33.cc
@@ -0,0 +1,2550 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2007 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: David Neckels, Boulder, Colorado, 2007, 2008
+ */
+
+
+// @sect3{Include files}
+
+// First a standard set of deal.II includes. Nothing special to comment on
+// here:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/function_parser.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/std_cxx11/array.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_in.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/fe/fe_q.h>
+
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/solution_transfer.h>
+
+// Then, as mentioned in the introduction, we use various Trilinos packages as
+// linear solvers as well as for automatic differentiation. These are in the
+// following include files.
+//
+// Since deal.II provides interfaces to the basic Trilinos matrices,
+// preconditioners and solvers, we include them similarly as deal.II linear
+// algebra structures.
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_precondition.h>
+#include <deal.II/lac/trilinos_solver.h>
+
+
+// Sacado is the automatic differentiation package within Trilinos, which is
+// used to find the Jacobian for a fully implicit Newton iteration:
+// Trilinos::Sacado (at least until version 11.10.2) package will trigger
+// warnings when compiling this file. Since we are not responsible for this,
+// we just suppress the warning by wrapping the <code>#include</code>
+// directive into a pair of macros that simply suppress these warnings:
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <Sacado.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+// And this again is C++:
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <memory>
+
+// To end this section, introduce everything in the dealii library into the
+// namespace into which the contents of this program will go:
+namespace Step33
+{
+  using namespace dealii;
+
+
+  // @sect3{Euler equation specifics}
+
+  // Here we define the flux function for this particular system of
+  // conservation laws, as well as pretty much everything else that's specific
+  // to the Euler equations for gas dynamics, for reasons discussed in the
+  // introduction. We group all this into a structure that defines everything
+  // that has to do with the flux. All members of this structure are static,
+  // i.e. the structure has no actual state specified by instance member
+  // variables. The better way to do this, rather than a structure with all
+  // static members would be to use a namespace -- but namespaces can't be
+  // templatized and we want some of the member variables of the structure to
+  // depend on the space dimension, which we in our usual way introduce using
+  // a template parameter.
+  template <int dim>
+  struct EulerEquations
+  {
+    // @sect4{Component description}
+
+    // First a few variables that describe the various components of our
+    // solution vector in a generic way. This includes the number of
+    // components in the system (Euler's equations have one entry for momenta
+    // in each spatial direction, plus the energy and density components, for
+    // a total of <code>dim+2</code> components), as well as functions that
+    // describe the index within the solution vector of the first momentum
+    // component, the density component, and the energy density
+    // component. Note that all these %numbers depend on the space dimension;
+    // defining them in a generic way (rather than by implicit convention)
+    // makes our code more flexible and makes it easier to later extend it,
+    // for example by adding more components to the equations.
+    static const unsigned int n_components             = dim + 2;
+    static const unsigned int first_momentum_component = 0;
+    static const unsigned int density_component        = dim;
+    static const unsigned int energy_component         = dim+1;
+
+    // When generating graphical output way down in this program, we need to
+    // specify the names of the solution variables as well as how the various
+    // components group into vector and scalar fields. We could describe this
+    // there, but in order to keep things that have to do with the Euler
+    // equation localized here and the rest of the program as generic as
+    // possible, we provide this sort of information in the following two
+    // functions:
+    static
+    std::vector<std::string>
+    component_names ()
+    {
+      std::vector<std::string> names (dim, "momentum");
+      names.push_back ("density");
+      names.push_back ("energy_density");
+
+      return names;
+    }
+
+
+    static
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    component_interpretation ()
+    {
+      std::vector<DataComponentInterpretation::DataComponentInterpretation>
+      data_component_interpretation
+      (dim, DataComponentInterpretation::component_is_part_of_vector);
+      data_component_interpretation
+      .push_back (DataComponentInterpretation::component_is_scalar);
+      data_component_interpretation
+      .push_back (DataComponentInterpretation::component_is_scalar);
+
+      return data_component_interpretation;
+    }
+
+
+    // @sect4{Transformations between variables}
+
+    // Next, we define the gas constant. We will set it to 1.4 in its
+    // definition immediately following the declaration of this class (unlike
+    // integer variables, like the ones above, static const floating point
+    // member variables cannot be initialized within the class declaration in
+    // C++). This value of 1.4 is representative of a gas that consists of
+    // molecules composed of two atoms, such as air which consists up to small
+    // traces almost entirely of $N_2$ and $O_2$.
+    static const double gas_gamma;
+
+
+    // In the following, we will need to compute the kinetic energy and the
+    // pressure from a vector of conserved variables. This we can do based on
+    // the energy density and the kinetic energy $\frac 12 \rho |\mathbf v|^2
+    // = \frac{|\rho \mathbf v|^2}{2\rho}$ (note that the independent
+    // variables contain the momentum components $\rho v_i$, not the
+    // velocities $v_i$).
+    template <typename InputVector>
+    static
+    typename InputVector::value_type
+    compute_kinetic_energy (const InputVector &W)
+    {
+      typename InputVector::value_type kinetic_energy = 0;
+      for (unsigned int d=0; d<dim; ++d)
+        kinetic_energy += W[first_momentum_component+d] *
+                          W[first_momentum_component+d];
+      kinetic_energy *= 1./(2 * W[density_component]);
+
+      return kinetic_energy;
+    }
+
+
+    template <typename InputVector>
+    static
+    typename InputVector::value_type
+    compute_pressure (const InputVector &W)
+    {
+      return ((gas_gamma-1.0) *
+              (W[energy_component] - compute_kinetic_energy(W)));
+    }
+
+
+    // @sect4{EulerEquations::compute_flux_matrix}
+
+    // We define the flux function $F(W)$ as one large matrix.  Each row of
+    // this matrix represents a scalar conservation law for the component in
+    // that row.  The exact form of this matrix is given in the
+    // introduction. Note that we know the size of the matrix: it has as many
+    // rows as the system has components, and <code>dim</code> columns; rather
+    // than using a FullMatrix object for such a matrix (which has a variable
+    // number of rows and columns and must therefore allocate memory on the
+    // heap each time such a matrix is created), we use a rectangular array of
+    // numbers right away.
+    //
+    // We templatize the numerical type of the flux function so that we may
+    // use the automatic differentiation type here.  Similarly, we will call
+    // the function with different input vector data types, so we templatize
+    // on it as well:
+    template <typename InputVector>
+    static
+    void compute_flux_matrix (const InputVector &W,
+                              std_cxx11::array <std_cxx11::array
+                              <typename InputVector::value_type, dim>,
+                              EulerEquations<dim>::n_components > &flux)
+    {
+      // First compute the pressure that appears in the flux matrix, and then
+      // compute the first <code>dim</code> columns of the matrix that
+      // correspond to the momentum terms:
+      const typename InputVector::value_type pressure = compute_pressure(W);
+
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          for (unsigned int e=0; e<dim; ++e)
+            flux[first_momentum_component+d][e]
+              = W[first_momentum_component+d] *
+                W[first_momentum_component+e] /
+                W[density_component];
+
+          flux[first_momentum_component+d][d] += pressure;
+        }
+
+      // Then the terms for the density (i.e. mass conservation), and, lastly,
+      // conservation of energy:
+      for (unsigned int d=0; d<dim; ++d)
+        flux[density_component][d] = W[first_momentum_component+d];
+
+      for (unsigned int d=0; d<dim; ++d)
+        flux[energy_component][d] = W[first_momentum_component+d] /
+                                    W[density_component] *
+                                    (W[energy_component] + pressure);
+    }
+
+
+    // @sect4{EulerEquations::compute_normal_flux}
+
+    // On the boundaries of the domain and across hanging nodes we use a
+    // numerical flux function to enforce boundary conditions.  This routine
+    // is the basic Lax-Friedrich's flux with a stabilization parameter
+    // $\alpha$. It's form has also been given already in the introduction:
+    template <typename InputVector>
+    static
+    void numerical_normal_flux (const Tensor<1,dim>                &normal,
+                                const InputVector                  &Wplus,
+                                const InputVector                  &Wminus,
+                                const double                        alpha,
+                                std_cxx11::array
+                                <typename InputVector::value_type, n_components>
+                                &normal_flux)
+    {
+      std_cxx11::array
+      <std_cxx11::array <typename InputVector::value_type, dim>,
+      EulerEquations<dim>::n_components > iflux, oflux;
+
+      compute_flux_matrix (Wplus, iflux);
+      compute_flux_matrix (Wminus, oflux);
+
+      for (unsigned int di=0; di<n_components; ++di)
+        {
+          normal_flux[di] = 0;
+          for (unsigned int d=0; d<dim; ++d)
+            normal_flux[di] += 0.5*(iflux[di][d] + oflux[di][d]) * normal[d];
+
+          normal_flux[di] += 0.5*alpha*(Wplus[di] - Wminus[di]);
+        }
+    }
+
+    // @sect4{EulerEquations::compute_forcing_vector}
+
+    // In the same way as describing the flux function $\mathbf F(\mathbf w)$,
+    // we also need to have a way to describe the right hand side forcing
+    // term. As mentioned in the introduction, we consider only gravity here,
+    // which leads to the specific form $\mathbf G(\mathbf w) = \left(
+    // g_1\rho, g_2\rho, g_3\rho, 0, \rho \mathbf g \cdot \mathbf v
+    // \right)^T$, shown here for the 3d case. More specifically, we will
+    // consider only $\mathbf g=(0,0,-1)^T$ in 3d, or $\mathbf g=(0,-1)^T$ in
+    // 2d. This naturally leads to the following function:
+    template <typename InputVector>
+    static
+    void compute_forcing_vector (const InputVector &W,
+                                 std_cxx11::array
+                                 <typename InputVector::value_type, n_components>
+                                 &forcing)
+    {
+      const double gravity = -1.0;
+
+      for (unsigned int c=0; c<n_components; ++c)
+        switch (c)
+          {
+          case first_momentum_component+dim-1:
+            forcing[c] = gravity * W[density_component];
+            break;
+          case energy_component:
+            forcing[c] = gravity * W[first_momentum_component+dim-1];
+            break;
+          default:
+            forcing[c] = 0;
+          }
+    }
+
+
+    // @sect4{Dealing with boundary conditions}
+
+    // Another thing we have to deal with is boundary conditions. To this end,
+    // let us first define the kinds of boundary conditions we currently know
+    // how to deal with:
+    enum BoundaryKind
+    {
+      inflow_boundary,
+      outflow_boundary,
+      no_penetration_boundary,
+      pressure_boundary
+    };
+
+
+    // The next part is to actually decide what to do at each kind of
+    // boundary. To this end, remember from the introduction that boundary
+    // conditions are specified by choosing a value $\mathbf w^-$ on the
+    // outside of a boundary given an inhomogeneity $\mathbf j$ and possibly
+    // the solution's value $\mathbf w^+$ on the inside. Both are then passed
+    // to the numerical flux $\mathbf H(\mathbf{w}^+, \mathbf{w}^-,
+    // \mathbf{n})$ to define boundary contributions to the bilinear form.
+    //
+    // Boundary conditions can in some cases be specified for each component
+    // of the solution vector independently. For example, if component $c$ is
+    // marked for inflow, then $w^-_c = j_c$. If it is an outflow, then $w^-_c
+    // = w^+_c$. These two simple cases are handled first in the function
+    // below.
+    //
+    // There is a little snag that makes this function unpleasant from a C++
+    // language viewpoint: The output vector <code>Wminus</code> will of
+    // course be modified, so it shouldn't be a <code>const</code>
+    // argument. Yet it is in the implementation below, and needs to be in
+    // order to allow the code to compile. The reason is that we call this
+    // function at a place where <code>Wminus</code> is of type
+    // <code>Table@<2,Sacado::Fad::DFad@<double@> @></code>, this being 2d
+    // table with indices representing the quadrature point and the vector
+    // component, respectively. We call this function with
+    // <code>Wminus[q]</code> as last argument; subscripting a 2d table yields
+    // a temporary accessor object representing a 1d vector, just what we want
+    // here. The problem is that a temporary accessor object can't be bound to
+    // a non-const reference argument of a function, as we would like here,
+    // according to the C++ 1998 and 2003 standards (something that will be
+    // fixed with the next standard in the form of rvalue references).  We get
+    // away with making the output argument here a constant because it is the
+    // <i>accessor</i> object that's constant, not the table it points to:
+    // that one can still be written to. The hack is unpleasant nevertheless
+    // because it restricts the kind of data types that may be used as
+    // template argument to this function: a regular vector isn't going to do
+    // because that one can not be written to when marked
+    // <code>const</code>. With no good solution around at the moment, we'll
+    // go with the pragmatic, even if not pretty, solution shown here:
+    template <typename DataVector>
+    static
+    void
+    compute_Wminus (const BoundaryKind  (&boundary_kind)[n_components],
+                    const Tensor<1,dim>  &normal_vector,
+                    const DataVector     &Wplus,
+                    const Vector<double> &boundary_values,
+                    const DataVector     &Wminus)
+    {
+      for (unsigned int c = 0; c < n_components; c++)
+        switch (boundary_kind[c])
+          {
+          case inflow_boundary:
+          {
+            Wminus[c] = boundary_values(c);
+            break;
+          }
+
+          case outflow_boundary:
+          {
+            Wminus[c] = Wplus[c];
+            break;
+          }
+
+          // Prescribed pressure boundary conditions are a bit more
+          // complicated by the fact that even though the pressure is
+          // prescribed, we really are setting the energy component here,
+          // which will depend on velocity and pressure. So even though this
+          // seems like a Dirichlet type boundary condition, we get
+          // sensitivities of energy to velocity and density (unless these are
+          // also prescribed):
+          case pressure_boundary:
+          {
+            const typename DataVector::value_type
+            density = (boundary_kind[density_component] ==
+                       inflow_boundary
+                       ?
+                       boundary_values(density_component)
+                       :
+                       Wplus[density_component]);
+
+            typename DataVector::value_type kinetic_energy = 0;
+            for (unsigned int d=0; d<dim; ++d)
+              if (boundary_kind[d] == inflow_boundary)
+                kinetic_energy += boundary_values(d)*boundary_values(d);
+              else
+                kinetic_energy += Wplus[d]*Wplus[d];
+            kinetic_energy *= 1./2./density;
+
+            Wminus[c] = boundary_values(c) / (gas_gamma-1.0) +
+                        kinetic_energy;
+
+            break;
+          }
+
+          case no_penetration_boundary:
+          {
+            // We prescribe the velocity (we are dealing with a particular
+            // component here so that the average of the velocities is
+            // orthogonal to the surface normal.  This creates sensitivities of
+            // across the velocity components.
+            typename DataVector::value_type vdotn = 0;
+            for (unsigned int d = 0; d < dim; d++)
+              {
+                vdotn += Wplus[d]*normal_vector[d];
+              }
+
+            Wminus[c] = Wplus[c] - 2.0*vdotn*normal_vector[c];
+            break;
+          }
+
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+    }
+
+
+    // @sect4{EulerEquations::compute_refinement_indicators}
+
+    // In this class, we also want to specify how to refine the mesh. The
+    // class <code>ConservationLaw</code> that will use all the information we
+    // provide here in the <code>EulerEquation</code> class is pretty agnostic
+    // about the particular conservation law it solves: as doesn't even really
+    // care how many components a solution vector has. Consequently, it can't
+    // know what a reasonable refinement indicator would be. On the other
+    // hand, here we do, or at least we can come up with a reasonable choice:
+    // we simply look at the gradient of the density, and compute
+    // $\eta_K=\log\left(1+|\nabla\rho(x_K)|\right)$, where $x_K$ is the
+    // center of cell $K$.
+    //
+    // There are certainly a number of equally reasonable refinement
+    // indicators, but this one does, and it is easy to compute:
+    static
+    void
+    compute_refinement_indicators (const DoFHandler<dim> &dof_handler,
+                                   const Mapping<dim>    &mapping,
+                                   const Vector<double>  &solution,
+                                   Vector<double>        &refinement_indicators)
+    {
+      const unsigned int dofs_per_cell = dof_handler.get_fe().dofs_per_cell;
+      std::vector<unsigned int> dofs (dofs_per_cell);
+
+      const QMidpoint<dim>  quadrature_formula;
+      const UpdateFlags update_flags = update_gradients;
+      FEValues<dim> fe_v (mapping, dof_handler.get_fe(),
+                          quadrature_formula, update_flags);
+
+      std::vector<std::vector<Tensor<1,dim> > >
+      dU (1, std::vector<Tensor<1,dim> >(n_components));
+
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      for (unsigned int cell_no=0; cell!=endc; ++cell, ++cell_no)
+        {
+          fe_v.reinit(cell);
+          fe_v.get_function_gradients (solution, dU);
+
+          refinement_indicators(cell_no)
+            = std::log(1+
+                       std::sqrt(dU[0][density_component] *
+                                 dU[0][density_component]));
+        }
+    }
+
+
+
+    // @sect4{EulerEquations::Postprocessor}
+
+    // Finally, we declare a class that implements a postprocessing of data
+    // components. The problem this class solves is that the variables in the
+    // formulation of the Euler equations we use are in conservative rather
+    // than physical form: they are momentum densities $\mathbf m=\rho\mathbf
+    // v$, density $\rho$, and energy density $E$. What we would like to also
+    // put into our output file are velocities $\mathbf v=\frac{\mathbf
+    // m}{\rho}$ and pressure $p=(\gamma-1)(E-\frac{1}{2} \rho |\mathbf
+    // v|^2)$.
+    //
+    // In addition, we would like to add the possibility to generate schlieren
+    // plots. Schlieren plots are a way to visualize shocks and other sharp
+    // interfaces. The word "schlieren" is a German word that may be
+    // translated as "striae" -- it may be simpler to explain it by an
+    // example, however: schlieren is what you see when you, for example, pour
+    // highly concentrated alcohol, or a transparent saline solution, into
+    // water; the two have the same color, but they have different refractive
+    // indices and so before they are fully mixed light goes through the
+    // mixture along bent rays that lead to brightness variations if you look
+    // at it. That's "schlieren". A similar effect happens in compressible
+    // flow because the refractive index depends on the pressure (and
+    // therefore the density) of the gas.
+    //
+    // The origin of the word refers to two-dimensional projections of a
+    // three-dimensional volume (we see a 2d picture of the 3d fluid). In
+    // computational fluid dynamics, we can get an idea of this effect by
+    // considering what causes it: density variations. Schlieren plots are
+    // therefore produced by plotting $s=|\nabla \rho|^2$; obviously, $s$ is
+    // large in shocks and at other highly dynamic places. If so desired by
+    // the user (by specifying this in the input file), we would like to
+    // generate these schlieren plots in addition to the other derived
+    // quantities listed above.
+    //
+    // The implementation of the algorithms to compute derived quantities from
+    // the ones that solve our problem, and to output them into data file,
+    // rests on the DataPostprocessor class. It has extensive documentation,
+    // and other uses of the class can also be found in step-29. We therefore
+    // refrain from extensive comments.
+    class Postprocessor : public DataPostprocessor<dim>
+    {
+    public:
+      Postprocessor (const bool do_schlieren_plot);
+
+      virtual
+      void
+      compute_derived_quantities_vector (const std::vector<Vector<double> >              &uh,
+                                         const std::vector<std::vector<Tensor<1,dim> > > &duh,
+                                         const std::vector<std::vector<Tensor<2,dim> > > &dduh,
+                                         const std::vector<Point<dim> >                  &normals,
+                                         const std::vector<Point<dim> >                  &evaluation_points,
+                                         std::vector<Vector<double> >                    &computed_quantities) const;
+
+      virtual std::vector<std::string> get_names () const;
+
+      virtual
+      std::vector<DataComponentInterpretation::DataComponentInterpretation>
+      get_data_component_interpretation () const;
+
+      virtual UpdateFlags get_needed_update_flags () const;
+
+    private:
+      const bool do_schlieren_plot;
+    };
+  };
+
+
+  template <int dim>
+  const double EulerEquations<dim>::gas_gamma = 1.4;
+
+
+
+  template <int dim>
+  EulerEquations<dim>::Postprocessor::
+  Postprocessor (const bool do_schlieren_plot)
+    :
+    do_schlieren_plot (do_schlieren_plot)
+  {}
+
+
+  // This is the only function worth commenting on. When generating graphical
+  // output, the DataOut and related classes will call this function on each
+  // cell, with values, gradients, Hessians, and normal vectors (in case we're
+  // working on faces) at each quadrature point. Note that the data at each
+  // quadrature point is itself vector-valued, namely the conserved
+  // variables. What we're going to do here is to compute the quantities we're
+  // interested in at each quadrature point. Note that for this we can ignore
+  // the Hessians ("dduh") and normal vectors; to avoid compiler warnings
+  // about unused variables, we comment out their names.
+  template <int dim>
+  void
+  EulerEquations<dim>::Postprocessor::
+  compute_derived_quantities_vector (const std::vector<Vector<double> >              &uh,
+                                     const std::vector<std::vector<Tensor<1,dim> > > &duh,
+                                     const std::vector<std::vector<Tensor<2,dim> > > &/*dduh*/,
+                                     const std::vector<Point<dim> >                  &/*normals*/,
+                                     const std::vector<Point<dim> >                  &/*evaluation_points*/,
+                                     std::vector<Vector<double> >                    &computed_quantities) const
+  {
+    // At the beginning of the function, let us make sure that all variables
+    // have the correct sizes, so that we can access individual vector
+    // elements without having to wonder whether we might read or write
+    // invalid elements; we also check that the <code>duh</code> vector only
+    // contains data if we really need it (the system knows about this because
+    // we say so in the <code>get_needed_update_flags()</code> function
+    // below). For the inner vectors, we check that at least the first element
+    // of the outer vector has the correct inner size:
+    const unsigned int n_quadrature_points = uh.size();
+
+    if (do_schlieren_plot == true)
+      Assert (duh.size() == n_quadrature_points,
+              ExcInternalError());
+
+    Assert (computed_quantities.size() == n_quadrature_points,
+            ExcInternalError());
+
+    Assert (uh[0].size() == n_components,
+            ExcInternalError());
+
+    if (do_schlieren_plot == true)
+      Assert (computed_quantities[0].size() == dim+2, ExcInternalError())
+      else
+        Assert (computed_quantities[0].size() == dim+1, ExcInternalError());
+
+    // Then loop over all quadrature points and do our work there. The code
+    // should be pretty self-explanatory. The order of output variables is
+    // first <code>dim</code> velocities, then the pressure, and if so desired
+    // the schlieren plot. Note that we try to be generic about the order of
+    // variables in the input vector, using the
+    // <code>first_momentum_component</code> and
+    // <code>density_component</code> information:
+    for (unsigned int q=0; q<n_quadrature_points; ++q)
+      {
+        const double density = uh[q](density_component);
+
+        for (unsigned int d=0; d<dim; ++d)
+          computed_quantities[q](d)
+            = uh[q](first_momentum_component+d) / density;
+
+        computed_quantities[q](dim) = compute_pressure (uh[q]);
+
+        if (do_schlieren_plot == true)
+          computed_quantities[q](dim+1) = duh[q][density_component] *
+                                          duh[q][density_component];
+      }
+  }
+
+
+  template <int dim>
+  std::vector<std::string>
+  EulerEquations<dim>::Postprocessor::
+  get_names () const
+  {
+    std::vector<std::string> names;
+    for (unsigned int d=0; d<dim; ++d)
+      names.push_back ("velocity");
+    names.push_back ("pressure");
+
+    if (do_schlieren_plot == true)
+      names.push_back ("schlieren_plot");
+
+    return names;
+  }
+
+
+  template <int dim>
+  std::vector<DataComponentInterpretation::DataComponentInterpretation>
+  EulerEquations<dim>::Postprocessor::
+  get_data_component_interpretation () const
+  {
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    interpretation (dim,
+                    DataComponentInterpretation::component_is_part_of_vector);
+
+    interpretation.push_back (DataComponentInterpretation::
+                              component_is_scalar);
+
+    if (do_schlieren_plot == true)
+      interpretation.push_back (DataComponentInterpretation::
+                                component_is_scalar);
+
+    return interpretation;
+  }
+
+
+
+  template <int dim>
+  UpdateFlags
+  EulerEquations<dim>::Postprocessor::
+  get_needed_update_flags () const
+  {
+    if (do_schlieren_plot == true)
+      return update_values | update_gradients;
+    else
+      return update_values;
+  }
+
+
+  // @sect3{Run time parameter handling}
+
+  // Our next job is to define a few classes that will contain run-time
+  // parameters (for example solver tolerances, number of iterations,
+  // stabilization parameter, and the like). One could do this in the main
+  // class, but we separate it from that one to make the program more modular
+  // and easier to read: Everything that has to do with run-time parameters
+  // will be in the following namespace, whereas the program logic is in the
+  // main class.
+  //
+  // We will split the run-time parameters into a few separate structures,
+  // which we will all put into a namespace <code>Parameters</code>. Of these
+  // classes, there are a few that group the parameters for individual groups,
+  // such as for solvers, mesh refinement, or output. Each of these classes
+  // have functions <code>declare_parameters()</code> and
+  // <code>parse_parameters()</code> that declare parameter subsections and
+  // entries in a ParameterHandler object, and retrieve actual parameter
+  // values from such an object, respectively. These classes declare all their
+  // parameters in subsections of the ParameterHandler.
+  //
+  // The final class of the following namespace combines all the previous
+  // classes by deriving from them and taking care of a few more entries at
+  // the top level of the input file, as well as a few odd other entries in
+  // subsections that are too short to warrant a structure by themselves.
+  //
+  // It is worth pointing out one thing here: None of the classes below have a
+  // constructor that would initialize the various member variables. This
+  // isn't a problem, however, since we will read all variables declared in
+  // these classes from the input file (or indirectly: a ParameterHandler
+  // object will read it from there, and we will get the values from this
+  // object), and they will be initialized this way. In case a certain
+  // variable is not specified at all in the input file, this isn't a problem
+  // either: The ParameterHandler class will in this case simply take the
+  // default value that was specified when declaring an entry in the
+  // <code>declare_parameters()</code> functions of the classes below.
+  namespace Parameters
+  {
+
+    // @sect4{Parameters::Solver}
+    //
+    // The first of these classes deals with parameters for the linear inner
+    // solver. It offers parameters that indicate which solver to use (GMRES
+    // as a solver for general non-symmetric indefinite systems, or a sparse
+    // direct solver), the amount of output to be produced, as well as various
+    // parameters that tweak the thresholded incomplete LU decomposition
+    // (ILUT) that we use as a preconditioner for GMRES.
+    //
+    // In particular, the ILUT takes the following parameters:
+    // - ilut_fill: the number of extra entries to add when forming the ILU
+    //   decomposition
+    // - ilut_atol, ilut_rtol: When forming the preconditioner, for certain
+    //   problems bad conditioning (or just bad luck) can cause the
+    //   preconditioner to be very poorly conditioned.  Hence it can help to
+    //   add diagonal perturbations to the original matrix and form the
+    //   preconditioner for this slightly better matrix.  ATOL is an absolute
+    //   perturbation that is added to the diagonal before forming the prec,
+    //   and RTOL is a scaling factor $rtol \geq 1$.
+    // - ilut_drop: The ILUT will drop any values that have magnitude less
+    //   than this value.  This is a way to manage the amount of memory used
+    //   by this preconditioner.
+    //
+    // The meaning of each parameter is also briefly described in the third
+    // argument of the ParameterHandler::declare_entry call in
+    // <code>declare_parameters()</code>.
+    struct Solver
+    {
+      enum SolverType { gmres, direct };
+      SolverType solver;
+
+      enum  OutputType { quiet, verbose };
+      OutputType output;
+
+      double linear_residual;
+      int max_iterations;
+
+      double ilut_fill;
+      double ilut_atol;
+      double ilut_rtol;
+      double ilut_drop;
+
+      static void declare_parameters (ParameterHandler &prm);
+      void parse_parameters (ParameterHandler &prm);
+    };
+
+
+
+    void Solver::declare_parameters (ParameterHandler &prm)
+    {
+      prm.enter_subsection("linear solver");
+      {
+        prm.declare_entry("output", "quiet",
+                          Patterns::Selection("quiet|verbose"),
+                          "State whether output from solver runs should be printed. "
+                          "Choices are <quiet|verbose>.");
+        prm.declare_entry("method", "gmres",
+                          Patterns::Selection("gmres|direct"),
+                          "The kind of solver for the linear system. "
+                          "Choices are <gmres|direct>.");
+        prm.declare_entry("residual", "1e-10",
+                          Patterns::Double(),
+                          "Linear solver residual");
+        prm.declare_entry("max iters", "300",
+                          Patterns::Integer(),
+                          "Maximum solver iterations");
+        prm.declare_entry("ilut fill", "2",
+                          Patterns::Double(),
+                          "Ilut preconditioner fill");
+        prm.declare_entry("ilut absolute tolerance", "1e-9",
+                          Patterns::Double(),
+                          "Ilut preconditioner tolerance");
+        prm.declare_entry("ilut relative tolerance", "1.1",
+                          Patterns::Double(),
+                          "Ilut relative tolerance");
+        prm.declare_entry("ilut drop tolerance", "1e-10",
+                          Patterns::Double(),
+                          "Ilut drop tolerance");
+      }
+      prm.leave_subsection();
+    }
+
+
+
+
+    void Solver::parse_parameters (ParameterHandler &prm)
+    {
+      prm.enter_subsection("linear solver");
+      {
+        const std::string op = prm.get("output");
+        if (op == "verbose")
+          output = verbose;
+        if (op == "quiet")
+          output = quiet;
+
+        const std::string sv = prm.get("method");
+        if (sv == "direct")
+          solver = direct;
+        else if (sv == "gmres")
+          solver = gmres;
+
+        linear_residual = prm.get_double("residual");
+        max_iterations  = prm.get_integer("max iters");
+        ilut_fill       = prm.get_double("ilut fill");
+        ilut_atol       = prm.get_double("ilut absolute tolerance");
+        ilut_rtol       = prm.get_double("ilut relative tolerance");
+        ilut_drop       = prm.get_double("ilut drop tolerance");
+      }
+      prm.leave_subsection();
+    }
+
+
+
+    // @sect4{Parameters::Refinement}
+    //
+    // Similarly, here are a few parameters that determine how the mesh is to
+    // be refined (and if it is to be refined at all). For what exactly the
+    // shock parameters do, see the mesh refinement functions further down.
+    struct Refinement
+    {
+      bool do_refine;
+      double shock_val;
+      double shock_levels;
+
+      static void declare_parameters (ParameterHandler &prm);
+      void parse_parameters (ParameterHandler &prm);
+    };
+
+
+
+    void Refinement::declare_parameters (ParameterHandler &prm)
+    {
+
+      prm.enter_subsection("refinement");
+      {
+        prm.declare_entry("refinement", "true",
+                          Patterns::Bool(),
+                          "Whether to perform mesh refinement or not");
+        prm.declare_entry("refinement fraction", "0.1",
+                          Patterns::Double(),
+                          "Fraction of high refinement");
+        prm.declare_entry("unrefinement fraction", "0.1",
+                          Patterns::Double(),
+                          "Fraction of low unrefinement");
+        prm.declare_entry("max elements", "1000000",
+                          Patterns::Double(),
+                          "maximum number of elements");
+        prm.declare_entry("shock value", "4.0",
+                          Patterns::Double(),
+                          "value for shock indicator");
+        prm.declare_entry("shock levels", "3.0",
+                          Patterns::Double(),
+                          "number of shock refinement levels");
+      }
+      prm.leave_subsection();
+    }
+
+
+    void Refinement::parse_parameters (ParameterHandler &prm)
+    {
+      prm.enter_subsection("refinement");
+      {
+        do_refine     = prm.get_bool ("refinement");
+        shock_val     = prm.get_double("shock value");
+        shock_levels  = prm.get_double("shock levels");
+      }
+      prm.leave_subsection();
+    }
+
+
+
+    // @sect4{Parameters::Flux}
+    //
+    // Next a section on flux modifications to make it more stable. In
+    // particular, two options are offered to stabilize the Lax-Friedrichs
+    // flux: either choose $\mathbf{H}(\mathbf{a},\mathbf{b},\mathbf{n}) =
+    // \frac{1}{2}(\mathbf{F}(\mathbf{a})\cdot \mathbf{n} +
+    // \mathbf{F}(\mathbf{b})\cdot \mathbf{n} + \alpha (\mathbf{a} -
+    // \mathbf{b}))$ where $\alpha$ is either a fixed number specified in the
+    // input file, or where $\alpha$ is a mesh dependent value. In the latter
+    // case, it is chosen as $\frac{h}{2\delta T}$ with $h$ the diameter of
+    // the face to which the flux is applied, and $\delta T$ the current time
+    // step.
+    struct Flux
+    {
+      enum StabilizationKind { constant, mesh_dependent };
+      StabilizationKind stabilization_kind;
+
+      double stabilization_value;
+
+      static void declare_parameters (ParameterHandler &prm);
+      void parse_parameters (ParameterHandler &prm);
+    };
+
+
+    void Flux::declare_parameters (ParameterHandler &prm)
+    {
+      prm.enter_subsection("flux");
+      {
+        prm.declare_entry("stab", "mesh",
+                          Patterns::Selection("constant|mesh"),
+                          "Whether to use a constant stabilization parameter or "
+                          "a mesh-dependent one");
+        prm.declare_entry("stab value", "1",
+                          Patterns::Double(),
+                          "alpha stabilization");
+      }
+      prm.leave_subsection();
+    }
+
+
+    void Flux::parse_parameters (ParameterHandler &prm)
+    {
+      prm.enter_subsection("flux");
+      {
+        const std::string stab = prm.get("stab");
+        if (stab == "constant")
+          stabilization_kind = constant;
+        else if (stab == "mesh")
+          stabilization_kind = mesh_dependent;
+        else
+          AssertThrow (false, ExcNotImplemented());
+
+        stabilization_value = prm.get_double("stab value");
+      }
+      prm.leave_subsection();
+    }
+
+
+
+    // @sect4{Parameters::Output}
+    //
+    // Then a section on output parameters. We offer to produce Schlieren
+    // plots (the squared gradient of the density, a tool to visualize shock
+    // fronts), and a time interval between graphical output in case we don't
+    // want an output file every time step.
+    struct Output
+    {
+      bool schlieren_plot;
+      double output_step;
+
+      static void declare_parameters (ParameterHandler &prm);
+      void parse_parameters (ParameterHandler &prm);
+    };
+
+
+
+    void Output::declare_parameters (ParameterHandler &prm)
+    {
+      prm.enter_subsection("output");
+      {
+        prm.declare_entry("schlieren plot", "true",
+                          Patterns::Bool (),
+                          "Whether or not to produce schlieren plots");
+        prm.declare_entry("step", "-1",
+                          Patterns::Double(),
+                          "Output once per this period");
+      }
+      prm.leave_subsection();
+    }
+
+
+
+    void Output::parse_parameters (ParameterHandler &prm)
+    {
+      prm.enter_subsection("output");
+      {
+        schlieren_plot = prm.get_bool("schlieren plot");
+        output_step = prm.get_double("step");
+      }
+      prm.leave_subsection();
+    }
+
+
+
+    // @sect4{Parameters::AllParameters}
+    //
+    // Finally the class that brings it all together. It declares a number of
+    // parameters itself, mostly ones at the top level of the parameter file
+    // as well as several in section too small to warrant their own
+    // classes. It also contains everything that is actually space dimension
+    // dependent, like initial or boundary conditions.
+    //
+    // Since this class is derived from all the ones above, the
+    // <code>declare_parameters()</code> and <code>parse_parameters()</code>
+    // functions call the respective functions of the base classes as well.
+    //
+    // Note that this class also handles the declaration of initial and
+    // boundary conditions specified in the input file. To this end, in both
+    // cases, there are entries like "w_0 value" which represent an expression
+    // in terms of $x,y,z$ that describe the initial or boundary condition as
+    // a formula that will later be parsed by the FunctionParser
+    // class. Similar expressions exist for "w_1", "w_2", etc, denoting the
+    // <code>dim+2</code> conserved variables of the Euler system. Similarly,
+    // we allow up to <code>max_n_boundaries</code> boundary indicators to be
+    // used in the input file, and each of these boundary indicators can be
+    // associated with an inflow, outflow, or pressure boundary condition,
+    // with homogeneous boundary conditions being specified for each
+    // component and each boundary indicator separately.
+    //
+    // The data structure used to store the boundary indicators is a bit
+    // complicated. It is an array of <code>max_n_boundaries</code> elements
+    // indicating the range of boundary indicators that will be accepted. For
+    // each entry in this array, we store a pair of data in the
+    // <code>BoundaryCondition</code> structure: first, an array of size
+    // <code>n_components</code> that for each component of the solution
+    // vector indicates whether it is an inflow, outflow, or other kind of
+    // boundary, and second a FunctionParser object that describes all
+    // components of the solution vector for this boundary id at once.
+    //
+    // The <code>BoundaryCondition</code> structure requires a constructor
+    // since we need to tell the function parser object at construction time
+    // how many vector components it is to describe. This initialization can
+    // therefore not wait till we actually set the formulas the FunctionParser
+    // object represents later in
+    // <code>AllParameters::parse_parameters()</code>
+    //
+    // For the same reason of having to tell Function objects their vector
+    // size at construction time, we have to have a constructor of the
+    // <code>AllParameters</code> class that at least initializes the other
+    // FunctionParser object, i.e. the one describing initial conditions.
+    template <int dim>
+    struct AllParameters : public Solver,
+      public Refinement,
+      public Flux,
+      public Output
+    {
+      static const unsigned int max_n_boundaries = 10;
+
+      struct BoundaryConditions
+      {
+        typename EulerEquations<dim>::BoundaryKind
+        kind[EulerEquations<dim>::n_components];
+
+        FunctionParser<dim> values;
+
+        BoundaryConditions ();
+      };
+
+
+      AllParameters ();
+
+      double diffusion_power;
+
+      double time_step, final_time;
+      double theta;
+      bool is_stationary;
+
+      std::string mesh_filename;
+
+      FunctionParser<dim> initial_conditions;
+      BoundaryConditions  boundary_conditions[max_n_boundaries];
+
+      static void declare_parameters (ParameterHandler &prm);
+      void parse_parameters (ParameterHandler &prm);
+    };
+
+
+
+    template <int dim>
+    AllParameters<dim>::BoundaryConditions::BoundaryConditions ()
+      :
+      values (EulerEquations<dim>::n_components)
+    {}
+
+
+    template <int dim>
+    AllParameters<dim>::AllParameters ()
+      :
+      initial_conditions (EulerEquations<dim>::n_components)
+    {}
+
+
+    template <int dim>
+    void
+    AllParameters<dim>::declare_parameters (ParameterHandler &prm)
+    {
+      prm.declare_entry("mesh", "grid.inp",
+                        Patterns::Anything(),
+                        "intput file name");
+
+      prm.declare_entry("diffusion power", "2.0",
+                        Patterns::Double(),
+                        "power of mesh size for diffusion");
+
+      prm.enter_subsection("time stepping");
+      {
+        prm.declare_entry("time step", "0.1",
+                          Patterns::Double(0),
+                          "simulation time step");
+        prm.declare_entry("final time", "10.0",
+                          Patterns::Double(0),
+                          "simulation end time");
+        prm.declare_entry("theta scheme value", "0.5",
+                          Patterns::Double(0,1),
+                          "value for theta that interpolated between explicit "
+                          "Euler (theta=0), Crank-Nicolson (theta=0.5), and "
+                          "implicit Euler (theta=1).");
+      }
+      prm.leave_subsection();
+
+
+      for (unsigned int b=0; b<max_n_boundaries; ++b)
+        {
+          prm.enter_subsection("boundary_" +
+                               Utilities::int_to_string(b));
+          {
+            prm.declare_entry("no penetration", "false",
+                              Patterns::Bool(),
+                              "whether the named boundary allows gas to "
+                              "penetrate or is a rigid wall");
+
+            for (unsigned int di=0; di<EulerEquations<dim>::n_components; ++di)
+              {
+                prm.declare_entry("w_" + Utilities::int_to_string(di),
+                                  "outflow",
+                                  Patterns::Selection("inflow|outflow|pressure"),
+                                  "<inflow|outflow|pressure>");
+
+                prm.declare_entry("w_" + Utilities::int_to_string(di) +
+                                  " value", "0.0",
+                                  Patterns::Anything(),
+                                  "expression in x,y,z");
+              }
+          }
+          prm.leave_subsection();
+        }
+
+      prm.enter_subsection("initial condition");
+      {
+        for (unsigned int di=0; di<EulerEquations<dim>::n_components; ++di)
+          prm.declare_entry("w_" + Utilities::int_to_string(di) + " value",
+                            "0.0",
+                            Patterns::Anything(),
+                            "expression in x,y,z");
+      }
+      prm.leave_subsection();
+
+      Parameters::Solver::declare_parameters (prm);
+      Parameters::Refinement::declare_parameters (prm);
+      Parameters::Flux::declare_parameters (prm);
+      Parameters::Output::declare_parameters (prm);
+    }
+
+
+    template <int dim>
+    void
+    AllParameters<dim>::parse_parameters (ParameterHandler &prm)
+    {
+      mesh_filename = prm.get("mesh");
+      diffusion_power = prm.get_double("diffusion power");
+
+      prm.enter_subsection("time stepping");
+      {
+        time_step = prm.get_double("time step");
+        if (time_step == 0)
+          {
+            is_stationary = true;
+            time_step = 1.0;
+            final_time = 1.0;
+          }
+        else
+          is_stationary = false;
+
+        final_time = prm.get_double("final time");
+        theta = prm.get_double("theta scheme value");
+      }
+      prm.leave_subsection();
+
+      for (unsigned int boundary_id=0; boundary_id<max_n_boundaries;
+           ++boundary_id)
+        {
+          prm.enter_subsection("boundary_" +
+                               Utilities::int_to_string(boundary_id));
+          {
+            std::vector<std::string>
+            expressions(EulerEquations<dim>::n_components, "0.0");
+
+            const bool no_penetration = prm.get_bool("no penetration");
+
+            for (unsigned int di=0; di<EulerEquations<dim>::n_components; ++di)
+              {
+                const std::string boundary_type
+                  = prm.get("w_" + Utilities::int_to_string(di));
+
+                if ((di < dim) && (no_penetration == true))
+                  boundary_conditions[boundary_id].kind[di]
+                    = EulerEquations<dim>::no_penetration_boundary;
+                else if (boundary_type == "inflow")
+                  boundary_conditions[boundary_id].kind[di]
+                    = EulerEquations<dim>::inflow_boundary;
+                else if (boundary_type == "pressure")
+                  boundary_conditions[boundary_id].kind[di]
+                    = EulerEquations<dim>::pressure_boundary;
+                else if (boundary_type == "outflow")
+                  boundary_conditions[boundary_id].kind[di]
+                    = EulerEquations<dim>::outflow_boundary;
+                else
+                  AssertThrow (false, ExcNotImplemented());
+
+                expressions[di] = prm.get("w_" + Utilities::int_to_string(di) +
+                                          " value");
+              }
+
+            boundary_conditions[boundary_id].values
+            .initialize (FunctionParser<dim>::default_variable_names(),
+                         expressions,
+                         std::map<std::string, double>());
+          }
+          prm.leave_subsection();
+        }
+
+      prm.enter_subsection("initial condition");
+      {
+        std::vector<std::string> expressions (EulerEquations<dim>::n_components,
+                                              "0.0");
+        for (unsigned int di = 0; di < EulerEquations<dim>::n_components; di++)
+          expressions[di] = prm.get("w_" + Utilities::int_to_string(di) +
+                                    " value");
+        initial_conditions.initialize (FunctionParser<dim>::default_variable_names(),
+                                       expressions,
+                                       std::map<std::string, double>());
+      }
+      prm.leave_subsection();
+
+      Parameters::Solver::parse_parameters (prm);
+      Parameters::Refinement::parse_parameters (prm);
+      Parameters::Flux::parse_parameters (prm);
+      Parameters::Output::parse_parameters (prm);
+    }
+  }
+
+
+
+
+  // @sect3{Conservation law class}
+
+  // Here finally comes the class that actually does something with all the
+  // Euler equation and parameter specifics we've defined above. The public
+  // interface is pretty much the same as always (the constructor now takes
+  // the name of a file from which to read parameters, which is passed on the
+  // command line). The private function interface is also pretty similar to
+  // the usual arrangement, with the <code>assemble_system</code> function
+  // split into three parts: one that contains the main loop over all cells
+  // and that then calls the other two for integrals over cells and faces,
+  // respectively.
+  template <int dim>
+  class ConservationLaw
+  {
+  public:
+    ConservationLaw (const char *input_filename);
+    void run ();
+
+  private:
+    void setup_system ();
+
+    void assemble_system ();
+    void assemble_cell_term (const FEValues<dim>             &fe_v,
+                             const std::vector<types::global_dof_index> &dofs);
+    void assemble_face_term (const unsigned int               face_no,
+                             const FEFaceValuesBase<dim>     &fe_v,
+                             const FEFaceValuesBase<dim>     &fe_v_neighbor,
+                             const std::vector<types::global_dof_index> &dofs,
+                             const std::vector<types::global_dof_index> &dofs_neighbor,
+                             const bool                       external_face,
+                             const unsigned int               boundary_id,
+                             const double                     face_diameter);
+
+    std::pair<unsigned int, double> solve (Vector<double> &solution);
+
+    void compute_refinement_indicators (Vector<double> &indicator) const;
+    void refine_grid (const Vector<double> &indicator);
+
+    void output_results () const;
+
+
+
+    // The first few member variables are also rather standard. Note that we
+    // define a mapping object to be used throughout the program when
+    // assembling terms (we will hand it to every FEValues and FEFaceValues
+    // object); the mapping we use is just the standard $Q_1$ mapping --
+    // nothing fancy, in other words -- but declaring one here and using it
+    // throughout the program will make it simpler later on to change it if
+    // that should become necessary. This is, in fact, rather pertinent: it is
+    // known that for transsonic simulations with the Euler equations,
+    // computations do not converge even as $h\rightarrow 0$ if the boundary
+    // approximation is not of sufficiently high order.
+    Triangulation<dim>   triangulation;
+    const MappingQ1<dim> mapping;
+
+    const FESystem<dim>  fe;
+    DoFHandler<dim>      dof_handler;
+
+    const QGauss<dim>    quadrature;
+    const QGauss<dim-1>  face_quadrature;
+
+    // Next come a number of data vectors that correspond to the solution of
+    // the previous time step (<code>old_solution</code>), the best guess of
+    // the current solution (<code>current_solution</code>; we say
+    // <i>guess</i> because the Newton iteration to compute it may not have
+    // converged yet, whereas <code>old_solution</code> refers to the fully
+    // converged final result of the previous time step), and a predictor for
+    // the solution at the next time step, computed by extrapolating the
+    // current and previous solution one time step into the future:
+    Vector<double>       old_solution;
+    Vector<double>       current_solution;
+    Vector<double>       predictor;
+
+    Vector<double>       right_hand_side;
+
+    // This final set of member variables (except for the object holding all
+    // run-time parameters at the very bottom and a screen output stream that
+    // only prints something if verbose output has been requested) deals with
+    // the interface we have in this program to the Trilinos library that
+    // provides us with linear solvers. Similarly to including PETSc matrices
+    // in step-17, step-18, and step-19, all we need to do is to create a
+    // Trilinos sparse matrix instead of the standard deal.II class. The
+    // system matrix is used for the Jacobian in each Newton step. Since we do
+    // not intend to run this program in parallel (which wouldn't be too hard
+    // with Trilinos data structures, though), we don't have to think about
+    // anything else like distributing the degrees of freedom.
+    TrilinosWrappers::SparseMatrix system_matrix;
+
+    Parameters::AllParameters<dim>  parameters;
+    ConditionalOStream              verbose_cout;
+  };
+
+
+  // @sect4{ConservationLaw::ConservationLaw}
+  //
+  // There is nothing much to say about the constructor. Essentially, it reads
+  // the input file and fills the parameter object with the parsed values:
+  template <int dim>
+  ConservationLaw<dim>::ConservationLaw (const char *input_filename)
+    :
+    mapping (),
+    fe (FE_Q<dim>(1), EulerEquations<dim>::n_components),
+    dof_handler (triangulation),
+    quadrature (2),
+    face_quadrature (2),
+    verbose_cout (std::cout, false)
+  {
+    ParameterHandler prm;
+    Parameters::AllParameters<dim>::declare_parameters (prm);
+
+    prm.read_input (input_filename);
+    parameters.parse_parameters (prm);
+
+    verbose_cout.set_condition (parameters.output == Parameters::Solver::verbose);
+  }
+
+
+
+  // @sect4{ConservationLaw::setup_system}
+  //
+  // The following (easy) function is called each time the mesh is
+  // changed. All it does is to resize the Trilinos matrix according to a
+  // sparsity pattern that we generate as in all the previous tutorial
+  // programs.
+  template <int dim>
+  void ConservationLaw<dim>::setup_system ()
+  {
+    DynamicSparsityPattern dsp (dof_handler.n_dofs(),
+                                dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+
+    system_matrix.reinit (dsp);
+  }
+
+
+  // @sect4{ConservationLaw::assemble_system}
+  //
+  // This and the following two functions are the meat of this program: They
+  // assemble the linear system that results from applying Newton's method to
+  // the nonlinear system of conservation equations.
+  //
+  // This first function puts all of the assembly pieces together in a routine
+  // that dispatches the correct piece for each cell/face.  The actual
+  // implementation of the assembly on these objects is done in the following
+  // functions.
+  //
+  // At the top of the function we do the usual housekeeping: allocate
+  // FEValues, FEFaceValues, and FESubfaceValues objects necessary to do the
+  // integrations on cells, faces, and subfaces (in case of adjoining cells on
+  // different refinement levels). Note that we don't need all information
+  // (like values, gradients, or real locations of quadrature points) for all
+  // of these objects, so we only let the FEValues classes whatever is
+  // actually necessary by specifying the minimal set of UpdateFlags. For
+  // example, when using a FEFaceValues object for the neighboring cell we
+  // only need the shape values: Given a specific face, the quadrature points
+  // and <code>JxW</code> values are the same as for the current cells, and
+  // the normal vectors are known to be the negative of the normal vectors of
+  // the current cell.
+  template <int dim>
+  void ConservationLaw<dim>::assemble_system ()
+  {
+    const unsigned int dofs_per_cell = dof_handler.get_fe().dofs_per_cell;
+
+    std::vector<types::global_dof_index> dof_indices (dofs_per_cell);
+    std::vector<types::global_dof_index> dof_indices_neighbor (dofs_per_cell);
+
+    const UpdateFlags update_flags               = update_values
+                                                   | update_gradients
+                                                   | update_q_points
+                                                   | update_JxW_values,
+                                                   face_update_flags          = update_values
+                                                       | update_q_points
+                                                       | update_JxW_values
+                                                       | update_normal_vectors,
+                                                       neighbor_face_update_flags = update_values;
+
+    FEValues<dim>        fe_v                  (mapping, fe, quadrature,
+                                                update_flags);
+    FEFaceValues<dim>    fe_v_face             (mapping, fe, face_quadrature,
+                                                face_update_flags);
+    FESubfaceValues<dim> fe_v_subface          (mapping, fe, face_quadrature,
+                                                face_update_flags);
+    FEFaceValues<dim>    fe_v_face_neighbor    (mapping, fe, face_quadrature,
+                                                neighbor_face_update_flags);
+    FESubfaceValues<dim> fe_v_subface_neighbor (mapping, fe, face_quadrature,
+                                                neighbor_face_update_flags);
+
+    // Then loop over all cells, initialize the FEValues object for the
+    // current cell and call the function that assembles the problem on this
+    // cell.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        fe_v.reinit (cell);
+        cell->get_dof_indices (dof_indices);
+
+        assemble_cell_term(fe_v, dof_indices);
+
+        // Then loop over all the faces of this cell.  If a face is part of
+        // the external boundary, then assemble boundary conditions there (the
+        // fifth argument to <code>assemble_face_terms</code> indicates
+        // whether we are working on an external or internal face; if it is an
+        // external face, the fourth argument denoting the degrees of freedom
+        // indices of the neighbor is ignored, so we pass an empty vector):
+        for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          if (cell->at_boundary(face_no))
+            {
+              fe_v_face.reinit (cell, face_no);
+              assemble_face_term (face_no, fe_v_face,
+                                  fe_v_face,
+                                  dof_indices,
+                                  std::vector<types::global_dof_index>(),
+                                  true,
+                                  cell->face(face_no)->boundary_id(),
+                                  cell->face(face_no)->diameter());
+            }
+
+        // The alternative is that we are dealing with an internal face. There
+        // are two cases that we need to distinguish: that this is a normal
+        // face between two cells at the same refinement level, and that it is
+        // a face between two cells of the different refinement levels.
+        //
+        // In the first case, there is nothing we need to do: we are using a
+        // continuous finite element, and face terms do not appear in the
+        // bilinear form in this case. The second case usually does not lead
+        // to face terms either if we enforce hanging node constraints
+        // strongly (as in all previous tutorial programs so far whenever we
+        // used continuous finite elements -- this enforcement is done by the
+        // ConstraintMatrix class together with
+        // DoFTools::make_hanging_node_constraints). In the current program,
+        // however, we opt to enforce continuity weakly at faces between cells
+        // of different refinement level, for two reasons: (i) because we can,
+        // and more importantly (ii) because we would have to thread the
+        // automatic differentiation we use to compute the elements of the
+        // Newton matrix from the residual through the operations of the
+        // ConstraintMatrix class. This would be possible, but is not trivial,
+        // and so we choose this alternative approach.
+        //
+        // What needs to be decided is which side of an interface between two
+        // cells of different refinement level we are sitting on.
+        //
+        // Let's take the case where the neighbor is more refined first. We
+        // then have to loop over the children of the face of the current cell
+        // and integrate on each of them. We sprinkle a couple of assertions
+        // into the code to ensure that our reasoning trying to figure out
+        // which of the neighbor's children's faces coincides with a given
+        // subface of the current cell's faces is correct -- a bit of
+        // defensive programming never hurts.
+        //
+        // We then call the function that integrates over faces; since this is
+        // an internal face, the fifth argument is false, and the sixth one is
+        // ignored so we pass an invalid value again:
+          else
+            {
+              if (cell->neighbor(face_no)->has_children())
+                {
+                  const unsigned int neighbor2=
+                    cell->neighbor_of_neighbor(face_no);
+
+                  for (unsigned int subface_no=0;
+                       subface_no < cell->face(face_no)->n_children();
+                       ++subface_no)
+                    {
+                      const typename DoFHandler<dim>::active_cell_iterator
+                      neighbor_child
+                        = cell->neighbor_child_on_subface (face_no, subface_no);
+
+                      Assert (neighbor_child->face(neighbor2) ==
+                              cell->face(face_no)->child(subface_no),
+                              ExcInternalError());
+                      Assert (neighbor_child->has_children() == false,
+                              ExcInternalError());
+
+                      fe_v_subface.reinit (cell, face_no, subface_no);
+                      fe_v_face_neighbor.reinit (neighbor_child, neighbor2);
+
+                      neighbor_child->get_dof_indices (dof_indices_neighbor);
+
+                      assemble_face_term (face_no, fe_v_subface,
+                                          fe_v_face_neighbor,
+                                          dof_indices,
+                                          dof_indices_neighbor,
+                                          false,
+                                          numbers::invalid_unsigned_int,
+                                          neighbor_child->face(neighbor2)->diameter());
+                    }
+                }
+
+              // The other possibility we have to care for is if the neighbor
+              // is coarser than the current cell (in particular, because of
+              // the usual restriction of only one hanging node per face, the
+              // neighbor must be exactly one level coarser than the current
+              // cell, something that we check with an assertion). Again, we
+              // then integrate over this interface:
+              else if (cell->neighbor(face_no)->level() != cell->level())
+                {
+                  const typename DoFHandler<dim>::cell_iterator
+                  neighbor = cell->neighbor(face_no);
+                  Assert(neighbor->level() == cell->level()-1,
+                         ExcInternalError());
+
+                  neighbor->get_dof_indices (dof_indices_neighbor);
+
+                  const std::pair<unsigned int, unsigned int>
+                  faceno_subfaceno = cell->neighbor_of_coarser_neighbor(face_no);
+                  const unsigned int neighbor_face_no    = faceno_subfaceno.first,
+                                     neighbor_subface_no = faceno_subfaceno.second;
+
+                  Assert (neighbor->neighbor_child_on_subface (neighbor_face_no,
+                                                               neighbor_subface_no)
+                          == cell,
+                          ExcInternalError());
+
+                  fe_v_face.reinit (cell, face_no);
+                  fe_v_subface_neighbor.reinit (neighbor,
+                                                neighbor_face_no,
+                                                neighbor_subface_no);
+
+                  assemble_face_term (face_no, fe_v_face,
+                                      fe_v_subface_neighbor,
+                                      dof_indices,
+                                      dof_indices_neighbor,
+                                      false,
+                                      numbers::invalid_unsigned_int,
+                                      cell->face(face_no)->diameter());
+                }
+            }
+      }
+  }
+
+
+  // @sect4{ConservationLaw::assemble_cell_term}
+  //
+  // This function assembles the cell term by computing the cell part of the
+  // residual, adding its negative to the right hand side vector, and adding
+  // its derivative with respect to the local variables to the Jacobian
+  // (i.e. the Newton matrix). Recall that the cell contributions to the
+  // residual read
+  // $R_i = \left(\frac{\mathbf{w}^{k}_{n+1} - \mathbf{w}_n}{\delta t} ,
+  // \mathbf{z}_i \right)_K $ $ +
+  // \theta \mathbf{B}(\mathbf{w}^{k}_{n+1})(\mathbf{z}_i)_K $ $ +
+  // (1-\theta) \mathbf{B}(\mathbf{w}_{n}) (\mathbf{z}_i)_K $ where
+  // $\mathbf{B}(\mathbf{w})(\mathbf{z}_i)_K =
+  // - \left(\mathbf{F}(\mathbf{w}),\nabla\mathbf{z}_i\right)_K $ $
+  // + h^{\eta}(\nabla \mathbf{w} , \nabla \mathbf{z}_i)_K $ $
+  // - (\mathbf{G}(\mathbf {w}), \mathbf{z}_i)_K $ for both
+  // $\mathbf{w} = \mathbf{w}^k_{n+1}$ and $\mathbf{w} = \mathbf{w}_{n}$ ,
+  // $\mathbf{z}_i$ is the $i$th vector valued test function.
+  //   Furthermore, the scalar product
+  // $\left(\mathbf{F}(\mathbf{w}), \nabla\mathbf{z}_i\right)_K$ is
+  // understood as $\int_K \sum_{c=1}^{\text{n\_components}}
+  // \sum_{d=1}^{\text{dim}} \mathbf{F}(\mathbf{w})_{cd}
+  // \frac{\partial z^c_i}{x_d}$ where $z^c_i$ is the $c$th component of
+  // the $i$th test function.
+  //
+  //
+  // At the top of this function, we do the usual housekeeping in terms of
+  // allocating some local variables that we will need later. In particular,
+  // we will allocate variables that will hold the values of the current
+  // solution $W_{n+1}^k$ after the $k$th Newton iteration (variable
+  // <code>W</code>) and the previous time step's solution $W_{n}$ (variable
+  // <code>W_old</code>).
+  //
+  // In addition to these, we need the gradients of the current variables.  It
+  // is a bit of a shame that we have to compute these; we almost don't.  The
+  // nice thing about a simple conservation law is that the flux doesn't
+  // generally involve any gradients.  We do need these, however, for the
+  // diffusion stabilization.
+  //
+  // The actual format in which we store these variables requires some
+  // explanation. First, we need values at each quadrature point for each of
+  // the <code>EulerEquations::n_components</code> components of the solution
+  // vector. This makes for a two-dimensional table for which we use deal.II's
+  // Table class (this is more efficient than
+  // <code>std::vector@<std::vector@<T@> @></code> because it only needs to
+  // allocate memory once, rather than once for each element of the outer
+  // vector). Similarly, the gradient is a three-dimensional table, which the
+  // Table class also supports.
+  //
+  // Secondly, we want to use automatic differentiation. To this end, we use
+  // the Sacado::Fad::DFad template for everything that is computed from the
+  // variables with respect to which we would like to compute
+  // derivatives. This includes the current solution and gradient at the
+  // quadrature points (which are linear combinations of the degrees of
+  // freedom) as well as everything that is computed from them such as the
+  // residual, but not the previous time step's solution. These variables are
+  // all found in the first part of the function, along with a variable that
+  // we will use to store the derivatives of a single component of the
+  // residual:
+  template <int dim>
+  void
+  ConservationLaw<dim>::
+  assemble_cell_term (const FEValues<dim>             &fe_v,
+                      const std::vector<types::global_dof_index> &dof_indices)
+  {
+    const unsigned int dofs_per_cell = fe_v.dofs_per_cell;
+    const unsigned int n_q_points    = fe_v.n_quadrature_points;
+
+    Table<2,Sacado::Fad::DFad<double> >
+    W (n_q_points, EulerEquations<dim>::n_components);
+
+    Table<2,double>
+    W_old (n_q_points, EulerEquations<dim>::n_components);
+
+    Table<3,Sacado::Fad::DFad<double> >
+    grad_W (n_q_points, EulerEquations<dim>::n_components, dim);
+
+    Table<3,double>
+    grad_W_old(n_q_points, EulerEquations<dim>::n_components, dim);
+
+    std::vector<double> residual_derivatives (dofs_per_cell);
+
+    // Next, we have to define the independent variables that we will try to
+    // determine by solving a Newton step. These independent variables are the
+    // values of the local degrees of freedom which we extract here:
+    std::vector<Sacado::Fad::DFad<double> > independent_local_dof_values(dofs_per_cell);
+    for (unsigned int i=0; i<dofs_per_cell; ++i)
+      independent_local_dof_values[i] = current_solution(dof_indices[i]);
+
+    // The next step incorporates all the magic: we declare a subset of the
+    // autodifferentiation variables as independent degrees of freedom,
+    // whereas all the other ones remain dependent functions. These are
+    // precisely the local degrees of freedom just extracted. All calculations
+    // that reference them (either directly or indirectly) will accumulate
+    // sensitivities with respect to these variables.
+    //
+    // In order to mark the variables as independent, the following does the
+    // trick, marking <code>independent_local_dof_values[i]</code> as the
+    // $i$th independent variable out of a total of
+    // <code>dofs_per_cell</code>:
+    for (unsigned int i=0; i<dofs_per_cell; ++i)
+      independent_local_dof_values[i].diff (i, dofs_per_cell);
+
+    // After all these declarations, let us actually compute something. First,
+    // the values of <code>W</code>, <code>W_old</code>, <code>grad_W</code>
+    // and <code>grad_W_old</code>, which we can compute from the local DoF values
+    // by using the formula $W(x_q)=\sum_i \mathbf W_i \Phi_i(x_q)$, where
+    // $\mathbf W_i$ is the $i$th entry of the (local part of the) solution
+    // vector, and $\Phi_i(x_q)$ the value of the $i$th vector-valued shape
+    // function evaluated at quadrature point $x_q$. The gradient can be
+    // computed in a similar way.
+    //
+    // Ideally, we could compute this information using a call into something
+    // like FEValues::get_function_values and FEValues::get_function_gradients,
+    // but since (i) we would have to extend the FEValues class for this, and
+    // (ii) we don't want to make the entire <code>old_solution</code> vector
+    // fad types, only the local cell variables, we explicitly code the loop
+    // above. Before this, we add another loop that initializes all the fad
+    // variables to zero:
+    for (unsigned int q=0; q<n_q_points; ++q)
+      for (unsigned int c=0; c<EulerEquations<dim>::n_components; ++c)
+        {
+          W[q][c]       = 0;
+          W_old[q][c]   = 0;
+          for (unsigned int d=0; d<dim; ++d)
+            {
+              grad_W[q][c][d] = 0;
+              grad_W_old[q][c][d] = 0;
+            }
+        }
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        {
+          const unsigned int c = fe_v.get_fe().system_to_component_index(i).first;
+
+          W[q][c] += independent_local_dof_values[i] *
+                     fe_v.shape_value_component(i, q, c);
+          W_old[q][c] += old_solution(dof_indices[i]) *
+                         fe_v.shape_value_component(i, q, c);
+
+          for (unsigned int d = 0; d < dim; d++)
+            {
+              grad_W[q][c][d] += independent_local_dof_values[i] *
+                                 fe_v.shape_grad_component(i, q, c)[d];
+              grad_W_old[q][c][d] += old_solution(dof_indices[i]) *
+                                     fe_v.shape_grad_component(i, q, c)[d];
+            }
+        }
+
+
+    // Next, in order to compute the cell contributions, we need to evaluate
+    // $\mathbf{F}({\mathbf w}^k_{n+1})$, $\mathbf{G}({\mathbf w}^k_{n+1})$ and
+    // $\mathbf{F}({\mathbf w}_n)$, $\mathbf{G}({\mathbf w}_n)$ at all quadrature
+    // points. To store these, we also need to allocate a bit of memory. Note
+    // that we compute the flux matrices and right hand sides in terms of
+    // autodifferentiation variables, so that the Jacobian contributions can
+    // later easily be computed from it:
+
+    std::vector <
+    std_cxx11::array <std_cxx11::array <Sacado::Fad::DFad<double>, dim>, EulerEquations<dim>::n_components >
+    > flux(n_q_points);
+
+    std::vector <
+    std_cxx11::array <std_cxx11::array <double, dim>, EulerEquations<dim>::n_components >
+    > flux_old(n_q_points);
+
+    std::vector < std_cxx11::array< Sacado::Fad::DFad<double>, EulerEquations<dim>::n_components> > forcing(n_q_points);
+
+    std::vector < std_cxx11::array< double, EulerEquations<dim>::n_components> > forcing_old(n_q_points);
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      {
+        EulerEquations<dim>::compute_flux_matrix (W_old[q], flux_old[q]);
+        EulerEquations<dim>::compute_forcing_vector (W_old[q], forcing_old[q]);
+        EulerEquations<dim>::compute_flux_matrix (W[q], flux[q]);
+        EulerEquations<dim>::compute_forcing_vector (W[q], forcing[q]);
+      }
+
+
+    // We now have all of the pieces in place, so perform the assembly.  We
+    // have an outer loop through the components of the system, and an inner
+    // loop over the quadrature points, where we accumulate contributions to
+    // the $i$th residual $R_i$. The general formula for this residual is
+    // given in the introduction and at the top of this function. We can,
+    // however, simplify it a bit taking into account that the $i$th
+    // (vector-valued) test function $\mathbf{z}_i$ has in reality only a
+    // single nonzero component (more on this topic can be found in the @ref
+    // vector_valued module). It will be represented by the variable
+    // <code>component_i</code> below. With this, the residual term can be
+    // re-written as
+    // @f{eqnarray*}
+    // R_i &=&
+    // \left(\frac{(\mathbf{w}_{n+1} -
+    // \mathbf{w}_n)_{\text{component\_i}}}{\delta
+    // t},(\mathbf{z}_i)_{\text{component\_i}}\right)_K
+    // \\ &-& \sum_{d=1}^{\text{dim}} \left(  \theta \mathbf{F}
+    // ({\mathbf{w}^k_{n+1}})_{\text{component\_i},d} + (1-\theta)
+    // \mathbf{F} ({\mathbf{w}_{n}})_{\text{component\_i},d}  ,
+    // \frac{\partial(\mathbf{z}_i)_{\text{component\_i}}} {\partial
+    // x_d}\right)_K
+    // \\ &+& \sum_{d=1}^{\text{dim}} h^{\eta} \left( \theta \frac{\partial
+    // (\mathbf{w}^k_{n+1})_{\text{component\_i}}}{\partial x_d} + (1-\theta)
+    // \frac{\partial (\mathbf{w}_n)_{\text{component\_i}}}{\partial x_d} ,
+    // \frac{\partial (\mathbf{z}_i)_{\text{component\_i}}}{\partial x_d} \right)_K
+    // \\ &-& \left( \theta\mathbf{G}({\mathbf{w}^k_n+1} )_{\text{component\_i}} +
+    // (1-\theta)\mathbf{G}({\mathbf{w}_n})_{\text{component\_i}} ,
+    // (\mathbf{z}_i)_{\text{component\_i}} \right)_K ,
+    // @f}
+    // where integrals are
+    // understood to be evaluated through summation over quadrature points.
+    //
+    // We initially sum all contributions of the residual in the positive
+    // sense, so that we don't need to negative the Jacobian entries.  Then,
+    // when we sum into the <code>right_hand_side</code> vector, we negate
+    // this residual.
+    for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+      {
+        Sacado::Fad::DFad<double> R_i = 0;
+
+        const unsigned int
+        component_i = fe_v.get_fe().system_to_component_index(i).first;
+
+        // The residual for each row (i) will be accumulating into this fad
+        // variable.  At the end of the assembly for this row, we will query
+        // for the sensitivities to this variable and add them into the
+        // Jacobian.
+
+        for (unsigned int point=0; point<fe_v.n_quadrature_points; ++point)
+          {
+            if (parameters.is_stationary == false)
+              R_i += 1.0 / parameters.time_step *
+                     (W[point][component_i] - W_old[point][component_i]) *
+                     fe_v.shape_value_component(i, point, component_i) *
+                     fe_v.JxW(point);
+
+            for (unsigned int d=0; d<dim; d++)
+              R_i -= ( parameters.theta * flux[point][component_i][d] +
+                       (1.0-parameters.theta) * flux_old[point][component_i][d] ) *
+                     fe_v.shape_grad_component(i, point, component_i)[d] *
+                     fe_v.JxW(point);
+
+            for (unsigned int d=0; d<dim; d++)
+              R_i += 1.0*std::pow(fe_v.get_cell()->diameter(),
+                                  parameters.diffusion_power) *
+                     ( parameters.theta * grad_W[point][component_i][d] +
+                       (1.0-parameters.theta) * grad_W_old[point][component_i][d] ) *
+                     fe_v.shape_grad_component(i, point, component_i)[d] *
+                     fe_v.JxW(point);
+
+            R_i -= ( parameters.theta  * forcing[point][component_i] +
+                     (1.0 - parameters.theta) * forcing_old[point][component_i] ) *
+                   fe_v.shape_value_component(i, point, component_i) *
+                   fe_v.JxW(point);
+          }
+
+        // At the end of the loop, we have to add the sensitivities to the
+        // matrix and subtract the residual from the right hand side. Trilinos
+        // FAD data type gives us access to the derivatives using
+        // <code>R_i.fastAccessDx(k)</code>, so we store the data in a
+        // temporary array. This information about the whole row of local dofs
+        // is then added to the Trilinos matrix at once (which supports the
+        // data types we have chosen).
+        for (unsigned int k=0; k<dofs_per_cell; ++k)
+          residual_derivatives[k] = R_i.fastAccessDx(k);
+        system_matrix.add(dof_indices[i], dof_indices, residual_derivatives);
+        right_hand_side(dof_indices[i]) -= R_i.val();
+      }
+  }
+
+
+  // @sect4{ConservationLaw::assemble_face_term}
+  //
+  // Here, we do essentially the same as in the previous function. At the top,
+  // we introduce the independent variables. Because the current function is
+  // also used if we are working on an internal face between two cells, the
+  // independent variables are not only the degrees of freedom on the current
+  // cell but in the case of an interior face also the ones on the neighbor.
+  template <int dim>
+  void
+  ConservationLaw<dim>::assemble_face_term(const unsigned int           face_no,
+                                           const FEFaceValuesBase<dim> &fe_v,
+                                           const FEFaceValuesBase<dim> &fe_v_neighbor,
+                                           const std::vector<types::global_dof_index>   &dof_indices,
+                                           const std::vector<types::global_dof_index>   &dof_indices_neighbor,
+                                           const bool                   external_face,
+                                           const unsigned int           boundary_id,
+                                           const double                 face_diameter)
+  {
+    const unsigned int n_q_points = fe_v.n_quadrature_points;
+    const unsigned int dofs_per_cell = fe_v.dofs_per_cell;
+
+    std::vector<Sacado::Fad::DFad<double> >
+    independent_local_dof_values (dofs_per_cell),
+                                 independent_neighbor_dof_values (external_face == false ?
+                                     dofs_per_cell :
+                                     0);
+
+    const unsigned int n_independent_variables = (external_face == false ?
+                                                  2 * dofs_per_cell :
+                                                  dofs_per_cell);
+
+    for (unsigned int i = 0; i < dofs_per_cell; i++)
+      {
+        independent_local_dof_values[i] = current_solution(dof_indices[i]);
+        independent_local_dof_values[i].diff(i, n_independent_variables);
+      }
+
+    if (external_face == false)
+      for (unsigned int i = 0; i < dofs_per_cell; i++)
+        {
+          independent_neighbor_dof_values[i]
+            = current_solution(dof_indices_neighbor[i]);
+          independent_neighbor_dof_values[i]
+          .diff(i+dofs_per_cell, n_independent_variables);
+        }
+
+
+    // Next, we need to define the values of the conservative variables
+    // ${\mathbf W}$ on this side of the face ($ {\mathbf W}^+$)
+    // and on the opposite side (${\mathbf W}^-$), for both ${\mathbf W} =
+    // {\mathbf W}^k_{n+1}$ and  ${\mathbf W} = {\mathbf W}_n$.
+    // The "this side" values can be
+    // computed in exactly the same way as in the previous function, but note
+    // that the <code>fe_v</code> variable now is of type FEFaceValues or
+    // FESubfaceValues:
+    Table<2,Sacado::Fad::DFad<double> >
+    Wplus (n_q_points, EulerEquations<dim>::n_components),
+          Wminus (n_q_points, EulerEquations<dim>::n_components);
+    Table<2,double>
+    Wplus_old(n_q_points, EulerEquations<dim>::n_components),
+              Wminus_old(n_q_points, EulerEquations<dim>::n_components);
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        {
+          const unsigned int component_i = fe_v.get_fe().system_to_component_index(i).first;
+          Wplus[q][component_i] +=  independent_local_dof_values[i] *
+                                    fe_v.shape_value_component(i, q, component_i);
+          Wplus_old[q][component_i] +=  old_solution(dof_indices[i]) *
+                                        fe_v.shape_value_component(i, q, component_i);
+        }
+
+    // Computing "opposite side" is a bit more complicated. If this is
+    // an internal face, we can compute it as above by simply using the
+    // independent variables from the neighbor:
+    if (external_face == false)
+      {
+        for (unsigned int q=0; q<n_q_points; ++q)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const unsigned int component_i = fe_v_neighbor.get_fe().
+                                               system_to_component_index(i).first;
+              Wminus[q][component_i] += independent_neighbor_dof_values[i] *
+                                        fe_v_neighbor.shape_value_component(i, q, component_i);
+              Wminus_old[q][component_i] += old_solution(dof_indices_neighbor[i])*
+                                            fe_v_neighbor.shape_value_component(i, q, component_i);
+            }
+      }
+    // On the other hand, if this is an external boundary face, then the
+    // values of $\mathbf{W}^-$ will be either functions of $\mathbf{W}^+$, or they will be
+    // prescribed, depending on the kind of boundary condition imposed here.
+    //
+    // To start the evaluation, let us ensure that the boundary id specified
+    // for this boundary is one for which we actually have data in the
+    // parameters object. Next, we evaluate the function object for the
+    // inhomogeneity.  This is a bit tricky: a given boundary might have both
+    // prescribed and implicit values.  If a particular component is not
+    // prescribed, the values evaluate to zero and are ignored below.
+    //
+    // The rest is done by a function that actually knows the specifics of
+    // Euler equation boundary conditions. Note that since we are using fad
+    // variables here, sensitivities will be updated appropriately, a process
+    // that would otherwise be tremendously complicated.
+    else
+      {
+        Assert (boundary_id < Parameters::AllParameters<dim>::max_n_boundaries,
+                ExcIndexRange (boundary_id, 0,
+                               Parameters::AllParameters<dim>::max_n_boundaries));
+
+        std::vector<Vector<double> >
+        boundary_values(n_q_points, Vector<double>(EulerEquations<dim>::n_components));
+        parameters.boundary_conditions[boundary_id]
+        .values.vector_value_list(fe_v.get_quadrature_points(),
+                                  boundary_values);
+
+        for (unsigned int q = 0; q < n_q_points; q++)
+          {
+            EulerEquations<dim>::compute_Wminus (parameters.boundary_conditions[boundary_id].kind,
+                                                 fe_v.normal_vector(q),
+                                                 Wplus[q],
+                                                 boundary_values[q],
+                                                 Wminus[q]);
+            // Here we assume that boundary type, boundary normal vector and boundary data values
+            // maintain the same during time advancing.
+            EulerEquations<dim>::compute_Wminus (parameters.boundary_conditions[boundary_id].kind,
+                                                 fe_v.normal_vector(q),
+                                                 Wplus_old[q],
+                                                 boundary_values[q],
+                                                 Wminus_old[q]);
+          }
+      }
+
+
+    // Now that we have $\mathbf w^+$ and $\mathbf w^-$, we can go about
+    // computing the numerical flux function $\mathbf H(\mathbf w^+,\mathbf
+    // w^-, \mathbf n)$ for each quadrature point. Before calling the function
+    // that does so, we also need to determine the Lax-Friedrich's stability
+    // parameter:
+
+    std::vector< std_cxx11::array < Sacado::Fad::DFad<double>, EulerEquations<dim>::n_components> >  normal_fluxes(n_q_points);
+    std::vector< std_cxx11::array < double, EulerEquations<dim>::n_components> >  normal_fluxes_old(n_q_points);
+
+    double alpha;
+
+    switch (parameters.stabilization_kind)
+      {
+      case Parameters::Flux::constant:
+        alpha = parameters.stabilization_value;
+        break;
+      case Parameters::Flux::mesh_dependent:
+        alpha = face_diameter/(2.0*parameters.time_step);
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+        alpha = 1;
+      }
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      {
+        EulerEquations<dim>::numerical_normal_flux(fe_v.normal_vector(q),
+                                                   Wplus[q], Wminus[q], alpha,
+                                                   normal_fluxes[q]);
+        EulerEquations<dim>::numerical_normal_flux(fe_v.normal_vector(q),
+                                                   Wplus_old[q], Wminus_old[q], alpha,
+                                                   normal_fluxes_old[q]);
+      }
+
+    // Now assemble the face term in exactly the same way as for the cell
+    // contributions in the previous function. The only difference is that if
+    // this is an internal face, we also have to take into account the
+    // sensitivities of the residual contributions to the degrees of freedom on
+    // the neighboring cell:
+    std::vector<double> residual_derivatives (dofs_per_cell);
+    for (unsigned int i=0; i<fe_v.dofs_per_cell; ++i)
+      if (fe_v.get_fe().has_support_on_face(i, face_no) == true)
+        {
+          Sacado::Fad::DFad<double> R_i = 0;
+
+          for (unsigned int point=0; point<n_q_points; ++point)
+            {
+              const unsigned int
+              component_i = fe_v.get_fe().system_to_component_index(i).first;
+
+              R_i += ( parameters.theta * normal_fluxes[point][component_i] +
+                       (1.0 - parameters.theta) * normal_fluxes_old[point][component_i] ) *
+                     fe_v.shape_value_component(i, point, component_i) *
+                     fe_v.JxW(point);
+            }
+
+          for (unsigned int k=0; k<dofs_per_cell; ++k)
+            residual_derivatives[k] = R_i.fastAccessDx(k);
+          system_matrix.add(dof_indices[i], dof_indices, residual_derivatives);
+
+          if (external_face == false)
+            {
+              for (unsigned int k=0; k<dofs_per_cell; ++k)
+                residual_derivatives[k] = R_i.fastAccessDx(dofs_per_cell+k);
+              system_matrix.add (dof_indices[i], dof_indices_neighbor,
+                                 residual_derivatives);
+            }
+
+          right_hand_side(dof_indices[i]) -= R_i.val();
+        }
+  }
+
+
+  // @sect4{ConservationLaw::solve}
+  //
+  // Here, we actually solve the linear system, using either of Trilinos'
+  // Aztec or Amesos linear solvers. The result of the computation will be
+  // written into the argument vector passed to this function. The result is a
+  // pair of number of iterations and the final linear residual.
+
+  template <int dim>
+  std::pair<unsigned int, double>
+  ConservationLaw<dim>::solve (Vector<double> &newton_update)
+  {
+    switch (parameters.solver)
+      {
+      // If the parameter file specified that a direct solver shall be used,
+      // then we'll get here. The process is straightforward, since deal.II
+      // provides a wrapper class to the Amesos direct solver within
+      // Trilinos. All we have to do is to create a solver control object
+      // (which is just a dummy object here, since we won't perform any
+      // iterations), and then create the direct solver object. When
+      // actually doing the solve, note that we don't pass a
+      // preconditioner. That wouldn't make much sense for a direct solver
+      // anyway.  At the end we return the solver control statistics —
+      // which will tell that no iterations have been performed and that the
+      // final linear residual is zero, absent any better information that
+      // may be provided here:
+      case Parameters::Solver::direct:
+      {
+        SolverControl solver_control (1,0);
+        TrilinosWrappers::SolverDirect::AdditionalData data (
+          parameters.output == Parameters::Solver::verbose);
+        TrilinosWrappers::SolverDirect direct (solver_control, data);
+
+        direct.solve (system_matrix, newton_update, right_hand_side);
+
+        return std::pair<unsigned int, double> (solver_control.last_step(),
+                                                solver_control.last_value());
+      }
+
+      // Likewise, if we are to use an iterative solver, we use Aztec's GMRES
+      // solver. We could use the Trilinos wrapper classes for iterative
+      // solvers and preconditioners here as well, but we choose to use an
+      // Aztec solver directly. For the given problem, Aztec's internal
+      // preconditioner implementations are superior over the ones deal.II has
+      // wrapper classes to, so we use ILU-T preconditioning within the
+      // AztecOO solver and set a bunch of options that can be changed from
+      // the parameter file.
+      //
+      // There are two more practicalities: Since we have built our right hand
+      // side and solution vector as deal.II Vector objects (as opposed to the
+      // matrix, which is a Trilinos object), we must hand the solvers
+      // Trilinos Epetra vectors.  Luckily, they support the concept of a
+      // 'view', so we just send in a pointer to our deal.II vectors. We have
+      // to provide an Epetra_Map for the vector that sets the parallel
+      // distribution, which is just a dummy object in serial. The easiest way
+      // is to ask the matrix for its map, and we're going to be ready for
+      // matrix-vector products with it.
+      //
+      // Secondly, the Aztec solver wants us to pass a Trilinos
+      // Epetra_CrsMatrix in, not the deal.II wrapper class itself. So we
+      // access to the actual Trilinos matrix in the Trilinos wrapper class by
+      // the command trilinos_matrix(). Trilinos wants the matrix to be
+      // non-constant, so we have to manually remove the constantness using a
+      // const_cast.
+      case Parameters::Solver::gmres:
+      {
+        Epetra_Vector x(View, system_matrix.trilinos_matrix().DomainMap(),
+                        newton_update.begin());
+        Epetra_Vector b(View, system_matrix.trilinos_matrix().RangeMap(),
+                        right_hand_side.begin());
+
+        AztecOO solver;
+        solver.SetAztecOption(AZ_output,
+                              (parameters.output ==
+                               Parameters::Solver::quiet
+                               ?
+                               AZ_none
+                               :
+                               AZ_all));
+        solver.SetAztecOption(AZ_solver, AZ_gmres);
+        solver.SetRHS(&b);
+        solver.SetLHS(&x);
+
+        solver.SetAztecOption(AZ_precond,         AZ_dom_decomp);
+        solver.SetAztecOption(AZ_subdomain_solve, AZ_ilut);
+        solver.SetAztecOption(AZ_overlap,         0);
+        solver.SetAztecOption(AZ_reorder,         0);
+
+        solver.SetAztecParam(AZ_drop,      parameters.ilut_drop);
+        solver.SetAztecParam(AZ_ilut_fill, parameters.ilut_fill);
+        solver.SetAztecParam(AZ_athresh,   parameters.ilut_atol);
+        solver.SetAztecParam(AZ_rthresh,   parameters.ilut_rtol);
+
+        solver.SetUserMatrix(const_cast<Epetra_CrsMatrix *>
+                             (&system_matrix.trilinos_matrix()));
+
+        solver.Iterate(parameters.max_iterations, parameters.linear_residual);
+
+        return std::pair<unsigned int, double> (solver.NumIters(),
+                                                solver.TrueResidual());
+      }
+      }
+
+    Assert (false, ExcNotImplemented());
+    return std::pair<unsigned int, double> (0,0);
+  }
+
+
+  // @sect4{ConservationLaw::compute_refinement_indicators}
+
+  // This function is real simple: We don't pretend that we know here what a
+  // good refinement indicator would be. Rather, we assume that the
+  // <code>EulerEquation</code> class would know about this, and so we simply
+  // defer to the respective function we've implemented there:
+  template <int dim>
+  void
+  ConservationLaw<dim>::
+  compute_refinement_indicators (Vector<double> &refinement_indicators) const
+  {
+    EulerEquations<dim>::compute_refinement_indicators (dof_handler,
+                                                        mapping,
+                                                        predictor,
+                                                        refinement_indicators);
+  }
+
+
+
+  // @sect4{ConservationLaw::refine_grid}
+
+  // Here, we use the refinement indicators computed before and refine the
+  // mesh. At the beginning, we loop over all cells and mark those that we
+  // think should be refined:
+  template <int dim>
+  void
+  ConservationLaw<dim>::refine_grid (const Vector<double> &refinement_indicators)
+  {
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (unsigned int cell_no=0; cell!=endc; ++cell, ++cell_no)
+      {
+        cell->clear_coarsen_flag();
+        cell->clear_refine_flag();
+
+        if ((cell->level() < parameters.shock_levels) &&
+            (std::fabs(refinement_indicators(cell_no)) > parameters.shock_val))
+          cell->set_refine_flag();
+        else if ((cell->level() > 0) &&
+                 (std::fabs(refinement_indicators(cell_no)) < 0.75*parameters.shock_val))
+          cell->set_coarsen_flag();
+      }
+
+    // Then we need to transfer the various solution vectors from the old to
+    // the new grid while we do the refinement. The SolutionTransfer class is
+    // our friend here; it has a fairly extensive documentation, including
+    // examples, so we won't comment much on the following code. The last
+    // three lines simply re-set the sizes of some other vectors to the now
+    // correct size:
+    std::vector<Vector<double> > transfer_in;
+    std::vector<Vector<double> > transfer_out;
+
+    transfer_in.push_back(old_solution);
+    transfer_in.push_back(predictor);
+
+    triangulation.prepare_coarsening_and_refinement();
+
+    SolutionTransfer<dim> soltrans(dof_handler);
+    soltrans.prepare_for_coarsening_and_refinement(transfer_in);
+
+    triangulation.execute_coarsening_and_refinement ();
+
+    dof_handler.clear();
+    dof_handler.distribute_dofs (fe);
+
+    {
+      Vector<double> new_old_solution(1);
+      Vector<double> new_predictor(1);
+
+      transfer_out.push_back(new_old_solution);
+      transfer_out.push_back(new_predictor);
+      transfer_out[0].reinit(dof_handler.n_dofs());
+      transfer_out[1].reinit(dof_handler.n_dofs());
+    }
+
+    soltrans.interpolate(transfer_in, transfer_out);
+
+    old_solution.reinit (transfer_out[0].size());
+    old_solution = transfer_out[0];
+
+    predictor.reinit (transfer_out[1].size());
+    predictor = transfer_out[1];
+
+    current_solution.reinit(dof_handler.n_dofs());
+    current_solution = old_solution;
+    right_hand_side.reinit (dof_handler.n_dofs());
+  }
+
+
+  // @sect4{ConservationLaw::output_results}
+
+  // This function now is rather straightforward. All the magic, including
+  // transforming data from conservative variables to physical ones has been
+  // abstracted and moved into the EulerEquations class so that it can be
+  // replaced in case we want to solve some other hyperbolic conservation law.
+  //
+  // Note that the number of the output file is determined by keeping a
+  // counter in the form of a static variable that is set to zero the first
+  // time we come to this function and is incremented by one at the end of
+  // each invocation.
+  template <int dim>
+  void ConservationLaw<dim>::output_results () const
+  {
+    typename EulerEquations<dim>::Postprocessor
+    postprocessor (parameters.schlieren_plot);
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+
+    data_out.add_data_vector (current_solution,
+                              EulerEquations<dim>::component_names (),
+                              DataOut<dim>::type_dof_data,
+                              EulerEquations<dim>::component_interpretation ());
+
+    data_out.add_data_vector (current_solution, postprocessor);
+
+    data_out.build_patches ();
+
+    static unsigned int output_file_number = 0;
+    std::string filename = "solution-" +
+                           Utilities::int_to_string (output_file_number, 3) +
+                           ".vtk";
+    std::ofstream output (filename.c_str());
+    data_out.write_vtk (output);
+
+    ++output_file_number;
+  }
+
+
+
+
+  // @sect4{ConservationLaw::run}
+
+  // This function contains the top-level logic of this program:
+  // initialization, the time loop, and the inner Newton iteration.
+  //
+  // At the beginning, we read the mesh file specified by the parameter file,
+  // setup the DoFHandler and various vectors, and then interpolate the given
+  // initial conditions on this mesh. We then perform a number of mesh
+  // refinements, based on the initial conditions, to obtain a mesh that is
+  // already well adapted to the starting solution. At the end of this
+  // process, we output the initial solution.
+  template <int dim>
+  void ConservationLaw<dim>::run ()
+  {
+    {
+      GridIn<dim> grid_in;
+      grid_in.attach_triangulation(triangulation);
+
+      std::ifstream input_file(parameters.mesh_filename.c_str());
+      Assert (input_file, ExcFileNotOpen(parameters.mesh_filename.c_str()));
+
+      grid_in.read_ucd(input_file);
+    }
+
+    dof_handler.clear();
+    dof_handler.distribute_dofs (fe);
+
+    // Size all of the fields.
+    old_solution.reinit (dof_handler.n_dofs());
+    current_solution.reinit (dof_handler.n_dofs());
+    predictor.reinit (dof_handler.n_dofs());
+    right_hand_side.reinit (dof_handler.n_dofs());
+
+    setup_system();
+
+    VectorTools::interpolate(dof_handler,
+                             parameters.initial_conditions, old_solution);
+    current_solution = old_solution;
+    predictor = old_solution;
+
+    if (parameters.do_refine == true)
+      for (unsigned int i=0; i<parameters.shock_levels; ++i)
+        {
+          Vector<double> refinement_indicators (triangulation.n_active_cells());
+
+          compute_refinement_indicators(refinement_indicators);
+          refine_grid(refinement_indicators);
+
+          setup_system();
+
+          VectorTools::interpolate(dof_handler,
+                                   parameters.initial_conditions, old_solution);
+          current_solution = old_solution;
+          predictor = old_solution;
+        }
+
+    output_results ();
+
+    // We then enter into the main time stepping loop. At the top we simply
+    // output some status information so one can keep track of where a
+    // computation is, as well as the header for a table that indicates
+    // progress of the nonlinear inner iteration:
+    Vector<double> newton_update (dof_handler.n_dofs());
+
+    double time = 0;
+    double next_output = time + parameters.output_step;
+
+    predictor = old_solution;
+    while (time < parameters.final_time)
+      {
+        std::cout << "T=" << time << std::endl
+                  << "   Number of active cells:       "
+                  << triangulation.n_active_cells()
+                  << std::endl
+                  << "   Number of degrees of freedom: "
+                  << dof_handler.n_dofs()
+                  << std::endl
+                  << std::endl;
+
+        std::cout << "   NonLin Res     Lin Iter       Lin Res" << std::endl
+                  << "   _____________________________________" << std::endl;
+
+        // Then comes the inner Newton iteration to solve the nonlinear
+        // problem in each time step. The way it works is to reset matrix and
+        // right hand side to zero, then assemble the linear system. If the
+        // norm of the right hand side is small enough, then we declare that
+        // the Newton iteration has converged. Otherwise, we solve the linear
+        // system, update the current solution with the Newton increment, and
+        // output convergence information. At the end, we check that the
+        // number of Newton iterations is not beyond a limit of 10 -- if it
+        // is, it appears likely that iterations are diverging and further
+        // iterations would do no good. If that happens, we throw an exception
+        // that will be caught in <code>main()</code> with status information
+        // being displayed before the program aborts.
+        //
+        // Note that the way we write the AssertThrow macro below is by and
+        // large equivalent to writing something like <code>if (!(nonlin_iter
+        // @<= 10)) throw ExcMessage ("No convergence in nonlinear
+        // solver");</code>. The only significant difference is that
+        // AssertThrow also makes sure that the exception being thrown carries
+        // with it information about the location (file name and line number)
+        // where it was generated. This is not overly critical here, because
+        // there is only a single place where this sort of exception can
+        // happen; however, it is generally a very useful tool when one wants
+        // to find out where an error occurred.
+        unsigned int nonlin_iter = 0;
+        current_solution = predictor;
+        while (true)
+          {
+            system_matrix = 0;
+
+            right_hand_side = 0;
+            assemble_system ();
+
+            const double res_norm = right_hand_side.l2_norm();
+            if (std::fabs(res_norm) < 1e-10)
+              {
+                std::printf("   %-16.3e (converged)\n\n", res_norm);
+                break;
+              }
+            else
+              {
+                newton_update = 0;
+
+                std::pair<unsigned int, double> convergence
+                  = solve (newton_update);
+
+                current_solution += newton_update;
+
+                std::printf("   %-16.3e %04d        %-5.2e\n",
+                            res_norm, convergence.first, convergence.second);
+              }
+
+            ++nonlin_iter;
+            AssertThrow (nonlin_iter <= 10,
+                         ExcMessage ("No convergence in nonlinear solver"));
+          }
+
+        // We only get to this point if the Newton iteration has converged, so
+        // do various post convergence tasks here:
+        //
+        // First, we update the time and produce graphical output if so
+        // desired. Then we update a predictor for the solution at the next
+        // time step by approximating $\mathbf w^{n+1}\approx \mathbf w^n +
+        // \delta t \frac{\partial \mathbf w}{\partial t} \approx \mathbf w^n
+        // + \delta t \; \frac{\mathbf w^n-\mathbf w^{n-1}}{\delta t} = 2
+        // \mathbf w^n - \mathbf w^{n-1}$ to try and make adaptivity work
+        // better.  The idea is to try and refine ahead of a front, rather
+        // than stepping into a coarse set of elements and smearing the
+        // old_solution.  This simple time extrapolator does the job. With
+        // this, we then refine the mesh if so desired by the user, and
+        // finally continue on with the next time step:
+        time += parameters.time_step;
+
+        if (parameters.output_step < 0)
+          output_results ();
+        else if (time >= next_output)
+          {
+            output_results ();
+            next_output += parameters.output_step;
+          }
+
+        predictor = current_solution;
+        predictor.sadd (2.0, -1.0, old_solution);
+
+        old_solution = current_solution;
+
+        if (parameters.do_refine == true)
+          {
+            Vector<double> refinement_indicators (triangulation.n_active_cells());
+            compute_refinement_indicators(refinement_indicators);
+
+            refine_grid(refinement_indicators);
+            setup_system();
+
+            newton_update.reinit (dof_handler.n_dofs());
+          }
+      }
+  }
+}
+
+// @sect3{main()}
+
+// The following ``main'' function is similar to previous examples and need
+// not to be commented on. Note that the program aborts if no input file name
+// is given on the command line.
+int main (int argc, char *argv[])
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step33;
+
+      if (argc != 2)
+        {
+          std::cout << "Usage:" << argv[0] << " input_file" << std::endl;
+          std::exit(1);
+        }
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, dealii::numbers::invalid_unsigned_int);
+
+      ConservationLaw<2> cons (argv[1]);
+      cons.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    };
+
+  return 0;
+}
diff --git a/examples/step-34/CMakeLists.txt b/examples/step-34/CMakeLists.txt
new file mode 100644
index 0000000..4bcda21
--- /dev/null
+++ b/examples/step-34/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-34 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-34")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_MUPARSER)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_MUPARSER = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-34/coarse_circle.inp b/examples/step-34/coarse_circle.inp
new file mode 100644
index 0000000..dda97cd
--- /dev/null
+++ b/examples/step-34/coarse_circle.inp
@@ -0,0 +1,21 @@
+10 10 0 0 0
+1	0 1.0 0
+2	0.587785252292 0.809016994375 0
+3	0.951056516295 0.309016994375 0
+4	0.951056516295 -0.309016994375 0
+5	0.587785252292 -0.809016994375 0
+6	0 -1.0 0
+7	-0.587785252292 -0.809016994375 0
+8	-0.951056516295 -0.309016994375 0
+9	-0.951056516295 0.309016994375 0
+10	-0.587785252292 0.809016994375 0
+1 1 line 1 2
+2 1 line 2 3
+3 1 line 3 4
+4 1 line 4 5
+5 1 line 5 6
+6 1 line 6 7
+7 1 line 7 8
+8 1 line 8 9
+9 1 line 9 10
+10 1 line 10 1
diff --git a/examples/step-34/coarse_sphere.inp b/examples/step-34/coarse_sphere.inp
new file mode 100644
index 0000000..5ea249c
--- /dev/null
+++ b/examples/step-34/coarse_sphere.inp
@@ -0,0 +1,15 @@
+8 6 0 0 0
+1	-0.577350269 -0.577350269 -0.577350269
+2	 0.577350269 -0.577350269 -0.577350269
+3	-0.577350269  0.577350269 -0.577350269
+4	 0.577350269  0.577350269 -0.577350269
+5	-0.577350269 -0.577350269  0.577350269
+6	 0.577350269 -0.577350269  0.577350269
+7	-0.577350269  0.577350269  0.577350269
+8	 0.577350269  0.577350269  0.577350269
+1 1 quad 3 4 2 1
+2 1 quad 5 6 8 7
+3 1 quad 1 2 6 5 
+4 1 quad 3 7 8 4
+5 1 quad 5 7 3 1
+6 1 quad 2 4 8 6
diff --git a/examples/step-34/doc/builds-on b/examples/step-34/doc/builds-on
new file mode 100644
index 0000000..2397751
--- /dev/null
+++ b/examples/step-34/doc/builds-on
@@ -0,0 +1 @@
+step-4 step-38
diff --git a/examples/step-34/doc/intro.dox b/examples/step-34/doc/intro.dox
new file mode 100644
index 0000000..0dce094
--- /dev/null
+++ b/examples/step-34/doc/intro.dox
@@ -0,0 +1,687 @@
+<br>
+
+<i>This program was contributed by Luca Heltai (thanks to Michael
+Gratton for pointing out what the exact solution should have been in
+the three dimensional case).  </i>
+
+<a name="Intro"></a>
+
+<h1>Introduction</h1>
+
+<h3> Irrotational flow </h3>
+The incompressible motion of an inviscid fluid past a body (for
+example air past an airplane wing, or air or water past a propeller) is
+usually modeled by the Euler equations of fluid dynamics:
+
+\f{align*}
+  \frac{\partial }{\partial t}\mathbf{v} + (\mathbf{v}\cdot\nabla)\mathbf{v}
+  &=
+  -\frac{1}{\rho}\nabla p + \mathbf{g}
+  \qquad &\text{in } \mathbb{R}^n \backslash \Omega
+  \\
+  \nabla \cdot \mathbf{v}&=0
+  &\text{in } \mathbb{R}^n\backslash\Omega
+\f}
+where the fluid density $\rho$ and the acceleration $\mathbf{g}$ due
+to external forces are given and the velocity $\mathbf{v}$ and the
+pressure $p$ are the unknowns. Here $\Omega$ is a closed bounded
+region representing the body around which the fluid moves.
+
+The above equations can be derived from Navier-Stokes equations
+assuming that the effects due to viscosity are negligible compared to
+those due to the pressure gradient, inertial forces and the external
+forces. This is the opposite case of the Stokes equations discussed in
+step-22 which are the limit case of dominant viscosity,
+i.e. where the velocity is so small that inertia forces can be
+neglected. On the other hand, owing to the assumed incompressibility,
+the equations are not suited for very high speed gas flows where
+compressibility and the equation of state of the gas have to be taken
+into account, leading to the Euler equations of gas dynamics, a
+hyperbolic system.
+
+For the purpose of this tutorial program, we will consider only stationary
+flow without external forces:
+\f{align*}
+  (\mathbf{v}\cdot\nabla)\mathbf{v}
+  &=
+  -\frac{1}{\rho}\nabla p
+  \qquad &\text{in } \mathbb{R}^n \backslash \Omega
+  \\
+  \nabla \cdot \mathbf{v}&=0
+  &\text{in } \mathbb{R}^n\backslash\Omega
+\f}
+
+
+Uniqueness of the solution of the Euler equations is ensured by adding the
+boundary conditions
+\f[
+  \label{eq:boundary-conditions}
+  \begin{aligned}
+    \mathbf{n}\cdot\mathbf{v}& = 0 \qquad && \text{ on } \partial\Omega \\
+    \mathbf{v}& = \mathbf{v}_\infty && \text{ when } |\mathbf{x}| \to \infty,
+  \end{aligned}
+\f]
+
+which is to say that the body is at rest in our coordinate systems and
+is not permeable, and that the fluid has (constant) velocity
+$\mathbf{v}_\infty$ at infinity. An alternative viewpoint is that our
+coordinate system moves along with the body whereas the background
+fluid is at rest at infinity. Notice that we define the normal
+$\mathbf{n}$ as the <i>outer</i> normal to the domain $\Omega$, which
+is the opposite of the outer normal to the integration domain.
+
+For both stationary and non stationary flow, the solution process
+starts by solving for the velocity in the second equation and
+substituting in the first equation in order to find the pressure.
+The solution of the stationary Euler equations is typically performed
+in order to understand the behavior of the given (possibly complex)
+geometry when a prescribed motion is enforced on the system.
+
+The first step in this process is to change the frame of reference from a
+coordinate system moving along with the body to one in which the body moves
+through a fluid that is at rest at infinity. This can be expressed by
+introducing a new velocity $\mathbf{\tilde{v}}=\mathbf{v}-\mathbf{v}_\infty$ for
+which we find that the same equations hold (because $\nabla\cdot
+\mathbf{v}_\infty=0$) and we have boundary conditions
+\f[
+  \label{eq:boundary-conditions-tilde}
+  \begin{aligned}
+    \mathbf{n}\cdot\mathbf{\tilde{v}}& = -\mathbf{n}\cdot\mathbf{v}_\infty \qquad && \text{ on } \partial\Omega \\
+    \mathbf{\tilde{v}}& = 0 && \text{ when } |\mathbf{x}| \to \infty,
+  \end{aligned}
+\f]
+
+If we assume that the fluid is irrotational, i.e., $\nabla \times
+\mathbf{v}=0$ in $\mathbb{R}^n\backslash\Omega$, we can represent the
+velocity, and consequently also the perturbation velocity, as the
+gradient of a scalar function:
+\f[
+  \mathbf{\tilde{v}}=\nabla\phi,
+\f]
+and so the second part of Euler equations above can be rewritten
+as the homogeneous Laplace equation for the unknown $\phi$:
+\f{align*}
+\label{laplace}
+\Delta\phi &= 0 \qquad &&\text{in}\ \mathbb{R}^n\backslash\Omega,
+	   \\
+	   \mathbf{n}\cdot\nabla\phi &= -\mathbf{n}\cdot\mathbf{v}_\infty
+	   && \text{on}\ \partial\Omega
+\f}
+while the momentum equation reduces to Bernoulli's equation that expresses the
+pressure $p$ as a function of the potential $\phi$:
+\f[
+\frac{p}{\rho} +\frac{1}{2} | \nabla \phi |^2 = 0 \in \Omega.
+\f]
+
+So we can solve the problem by solving the Laplace equation for the
+potential.  We recall that the following functions, called fundamental
+solutions of the Laplace equation,
+
+\f[ \begin{aligned}
+\label{eq:3} G(\mathbf{y}-\mathbf{x}) = &
+-\frac{1}{2\pi}\ln|\mathbf{y}-\mathbf{x}| \qquad && \text{for } n=2 \\
+G(\mathbf{y}-\mathbf{x}) = &
+\frac{1}{4\pi}\frac{1}{|\mathbf{y}-\mathbf{x}|}&& \text{for } n=3,
+\end{aligned}
+\f]
+
+satisfy in a distributional sense the equation:
+
+\f[
+-\Delta_y G(\mathbf{y}-\mathbf{x}) = \delta(\mathbf{y}-\mathbf{x}),
+\f]
+
+where the derivative is done in the variable $\mathbf{y}$. By using
+the usual Green identities, our problem can be written on the boundary
+$\partial\Omega = \Gamma$ only. We recall the general definition of
+the second Green %identity:
+
+\f[\label{green}
+  \int_{\omega}
+  (-\Delta u)v\,dx + \int_{\partial\omega} \frac{\partial u}{\partial \tilde{\mathbf{n}} }v \,ds
+  =
+  \int_{\omega}
+  (-\Delta v)u\,dx + \int_{\partial\omega} u\frac{\partial v}{\partial \tilde{\mathbf{n}}} \,ds,
+\f]
+
+where $\tilde{\mathbf{n}}$ is the normal to the surface of $\omega$ pointing
+outwards from the domain of integration $\omega$.
+
+In our case the domain of integration is the domain
+$\mathbb{R}^n\backslash\Omega$, whose boundary is $ \Gamma_\infty \cup
+\Gamma$, where the "boundary" at infinity is defined as
+
+\f[
+\Gamma_\infty := \lim_{r\to\infty} \partial B_r(0).
+\f]
+
+In our program the normals are defined as <i>outer</i> to the domain
+$\Omega$, that is, they are in fact <i>inner</i> to the integration
+domain, and some care is required in defining the various integrals
+with the correct signs for the normals, i.e. replacing $\tilde{\mathbf{n}}$
+by $-\mathbf{n}$.
+
+If we substitute $u$ and $v$ in the Green %identity with the solution
+$\phi$ and with the fundamental solution of the Laplace equation
+respectively, as long as $\mathbf{x}$ is chosen in the region
+$\mathbb{R}^n\backslash\Omega$, we obtain:
+\f[
+  \phi(\mathbf{x}) -
+  \int_{\Gamma\cup\Gamma_\infty}\frac{\partial G(\mathbf{y}-\mathbf{x})}{\partial \mathbf{n}_y}\phi(\mathbf{y})\,ds_y
+  =
+  -\int_{\Gamma\cup\Gamma_\infty}G(\mathbf{y}-\mathbf{x})\frac{\partial \phi}{\partial \mathbf{n}_y}(\mathbf{y})\,ds_y
+  \qquad \forall\mathbf{x}\in \mathbb{R}^n\backslash\Omega
+\f]
+
+where the normals are now pointing <i>inward</i> the domain of
+integration.
+
+Notice that in the above equation, we also have the integrals on the
+portion of the boundary at $\Gamma_\infty$. Using the boundary
+conditions of our problem, we have that $\nabla \phi$ is zero at
+infinity (which simplifies the integral on $\Gamma_\infty$ on the
+right hand side).
+
+The integral on $\Gamma_\infty$ that appears on the left hand side can
+be treated by observing that $\nabla\phi=0$ implies that $\phi$ at
+infinity is necessarily constant. We define its value to be
+$\phi_\infty$.  It is an easy exercise to prove that
+
+\f[
+-\int_{\Gamma_\infty} \frac{\partial G(\mathbf{y}-\mathbf{x})}
+{\partial \mathbf{n}_y}\phi_\infty \,ds_y =
+\lim_{r\to\infty} \int_{\partial B_r(0)} \frac{\mathbf{r}}{r} \cdot \nabla G(\mathbf{y}-\mathbf{x})
+\phi_\infty \,ds_y = -\phi_\infty.
+\f]
+
+Using this result, we can reduce the above equation only on the
+boundary $\Gamma$ using the so-called Single and Double Layer
+Potential operators:
+
+\f[\label{integral}
+  \phi(\mathbf{x}) - (D\phi)(\mathbf{x}) = \phi_\infty
+  -\left(S \frac{\partial \phi}{\partial n_y}\right)(\mathbf{x})
+  \qquad \forall\mathbf{x}\in \mathbb{R}^n\backslash\Omega.
+\f]
+
+(The name of these operators comes from the fact that they describe the
+electric potential in $\mathbb{R}^n$ due to a single thin sheet of charges
+along a surface, and due to a double sheet of charges and anti-charges along
+the surface, respectively.)
+
+In our case, we know the Neumann values of $\phi$ on the boundary:
+$\mathbf{n}\cdot\nabla\phi = -\mathbf{n}\cdot\mathbf{v}_\infty$.
+Consequently,
+\f[
+  \phi(\mathbf{x}) - (D\phi)(\mathbf{x}) = \phi_\infty +
+   \left(S[\mathbf{n}\cdot\mathbf{v}_\infty]\right)(\mathbf{x})
+   \qquad \forall\mathbf{x} \in \mathbb{R}^n\backslash\Omega.
+\f]
+If we take the limit for $\mathbf{x}$ tending to $\Gamma$ of
+the above equation, using well known properties of the single and double layer
+operators, we obtain an equation for $\phi$ just on the boundary $\Gamma$ of
+$\Omega$:
+
+\f[\label{SD}
+  \alpha(\mathbf{x})\phi(\mathbf{x}) - (D\phi)(\mathbf{x}) = \phi_\infty +
+  \left(S [\mathbf{n}\cdot\mathbf{v}_\infty]\right)(\mathbf{x})
+  \quad \mathbf{x}\in \partial\Omega,
+\f]
+
+which is the %Boundary Integral Equation (BIE) we were looking for,
+where the quantity $\alpha(\mathbf{x})$ is the fraction of angle or
+solid angle by which the point $\mathbf{x}$ sees the domain of
+integration $\mathbb{R}^n\backslash\Omega$.
+
+In particular, at points $\mathbf{x}$ where the boundary
+$\partial\Omega$ is differentiable (i.e. smooth) we have
+$\alpha(\mathbf{x})=\frac 12$, but the value may be smaller or larger
+at points where the boundary has a corner or an edge.
+
+Substituting the single and double layer operators we get:
+\f[
+  \alpha(\mathbf{x}) \phi(\mathbf{x})
+  + \frac{1}{2\pi}\int_{\partial \Omega}  \frac{
+  (\mathbf{y}-\mathbf{x})\cdot\mathbf{n}_y  }{ |\mathbf{y}-\mathbf{x}|^2 }
+  \phi(\mathbf{y}) \,ds_y
+  = \phi_\infty
+    -\frac{1}{2\pi}\int_{\partial \Omega}  \ln|\mathbf{y}-\mathbf{x}| \, \mathbf{n}\cdot\mathbf{v_\infty}\,ds_y
+\f]
+for two dimensional flows and
+\f[
+  \alpha(\mathbf{x}) \phi(\mathbf{x})
+   + \frac{1}{4\pi}\int_{\partial \Omega} \frac{ (\mathbf{y}-\mathbf{x})\cdot\mathbf{n}_y  }{ |\mathbf{y}-\mathbf{x}|^3 }\phi(\mathbf{y})\,ds_y
+  = \phi_\infty +
+  \frac{1}{4\pi}\int_{\partial \Omega} \frac{1}{|\mathbf{y}-\mathbf{x}|} \, \mathbf{n}\cdot\mathbf{v_\infty}\,ds_y
+\f]
+for three dimensional flows, where the normal derivatives of the fundamental
+solutions have been written in a form that makes computation easier. In either
+case, $\phi$ is the solution of an integral equation posed entirely on the
+boundary since both $\mathbf{x},\mathbf{y}\in\partial\Omega$.
+
+Notice that the fraction of angle (in 2d) or solid angle (in 3d)
+$\alpha(\mathbf{x})$ by which the point $\mathbf{x}$ sees the domain
+$\Omega$ can be defined using the double layer potential itself:
+\f[
+\alpha(\mathbf{x}) := 1 -
+\frac{1}{2(n-1)\pi}\int_{\partial \Omega} \frac{ (\mathbf{y}-\mathbf{x})\cdot\mathbf{n}_y  }
+{ |\mathbf{y}-\mathbf{x}|^{n} }\phi(\mathbf{y})\,ds_y = 1+
+\int_{\partial \Omega} \frac{ \partial G(\mathbf{y}-\mathbf{x}) }{\partial \mathbf{n}_y} \, ds_y.
+\f]
+
+The reason why this is possible can be understood if we consider the
+fact that the solution of a pure Neumann problem is known up to an
+arbitrary constant $c$, which means that, if we set the Neumann data
+to be zero, then any constant $\phi = \phi_\infty$ will be a solution.
+Inserting the constant solution and the Neumann boundary condition in the 
+boundary integral equation, we have
+ at f{align*}
+\alpha\left(\mathbf{x}\right)\phi\left(\mathbf{x}\right)
+&=\int_{\Omega}\phi\left(\mathbf{y}\right)\delta\left(\mathbf{y}-\mathbf{x}\right)\, dy\\
+\Rightarrow
+\alpha\left(\mathbf{x}\right)\phi_\infty
+&=\phi_\infty\int_{\Gamma\cup\Gamma_\infty}\frac{ \partial G(\mathbf{y}-\mathbf{x}) }{\partial \mathbf{n}_y} \, ds_y
+=\phi_\infty\left[\int_{\Gamma_\infty}\frac{ \partial G(\mathbf{y}-\mathbf{x}) }{\partial \mathbf{n}_y} \, ds_y
++\int_{\Gamma}\frac{ \partial G(\mathbf{y}-\mathbf{x}) }{\partial \mathbf{n}_y} \, ds_y
+\right]
+ at f}
+The integral on $\Gamma_\infty$ is unity, see above, so division by the constant $\phi_\infty$ gives us the explicit 
+expression above for $\alpha(\mathbf{x})$.
+
+While this example program is really only focused on the solution of the
+boundary integral equation, in a realistic setup one would still need to solve
+for the velocities. To this end, note that we have just computed
+$\phi(\mathbf{x})$ for all $\mathbf{x}\in\partial\Omega$. In the next step, we
+can compute (analytically, if we want) the solution $\phi(\mathbf{x})$ in all
+of $\mathbb{R}^n\backslash\Omega$. To this end, recall that we had
+\f[
+  \phi(\mathbf{x})
+  =
+  \phi_\infty +
+  (D\phi)(\mathbf{x})
+  +
+  \left(S[\mathbf{n}\cdot\mathbf{v}_\infty]\right)(\mathbf{x})
+  \qquad \forall\mathbf{x}\in \mathbb{R}^n\backslash\Omega.
+\f]
+where now we have everything that is on the right hand side ($S$ and $D$ are
+integrals we can evaluate, the normal velocity on the boundary is given, and
+$\phi$ on the boundary we have just computed). Finally, we can then recover
+the velocity as $\mathbf{\tilde v}=\nabla \phi$.
+
+Notice that the evaluation of the above formula for $\mathbf{x} \in
+\Omega$ should yield zero as a result, since the integration of the
+the Dirac delta $\delta(\mathbf{x})$ in the domain
+$\mathbb{R}^n\backslash\Omega$ is always zero by definition.
+
+As a final test, let us verify that this velocity indeed satisfies the
+momentum balance equation for a stationary flow field, i.e., whether
+$\mathbf{v}\cdot\nabla\mathbf{v} = -\frac 1\rho \nabla p$ where
+$\mathbf{v}=\mathbf{\tilde
+v}+\mathbf{v}_\infty=\nabla\phi+\mathbf{v}_\infty$ for some (unknown) pressure
+$p$ and a given constant $\rho$. In other words, we would like to verify that
+Bernoulli's law as stated above indeed holds. To show this, we use that
+the left hand side of this equation equates to
+ at f{align*}
+  \mathbf{v}\cdot\nabla\mathbf{v}
+  &=
+  [(\nabla\phi+\mathbf{v}_\infty)\cdot\nabla] (\nabla\phi+\mathbf{v}_\infty)
+  \\
+  &=
+  [(\nabla\phi+\mathbf{v}_\infty)\cdot\nabla] (\nabla\phi)
+ at f}
+where we have used that $\mathbf{v}_\infty$ is constant. We would like to
+write this expression as the gradient of something (remember that $\rho$ is a
+constant). The next step is more
+convenient if we consider the components of the equation individually
+(summation over indices that appear twice is implied):
+ at f{align*}
+  [\mathbf{v}\cdot\nabla\mathbf{v}]_i
+  &=
+  (\partial_j\phi+v_{\infty,j}) \partial_j \partial_i\phi
+  \\
+  &=
+  \partial_j [(\partial_j\phi+v_{\infty,j}) \partial_i\phi]
+  -
+  \partial_j [(\partial_j\phi+v_{\infty,j})] \partial_i\phi
+  \\
+  &=
+  \partial_j [(\partial_j\phi+v_{\infty,j}) \partial_i\phi]
+ at f}
+because $\partial_j \partial_j\phi = \Delta \phi = 0$ and $\textrm{div}
+\ \mathbf{v}_\infty=0$. Next,
+ at f{align*}
+  [\mathbf{v}\cdot\nabla\mathbf{v}]_i
+  &=
+  \partial_j [(\partial_j\phi+v_{\infty,j}) \partial_i\phi]
+  \\
+  &=
+  \partial_j [(\partial_j\phi) (\partial_i\phi)]
+  +
+  \partial_j [v_{\infty,j} \partial_i\phi]
+  \\
+  &=
+  \partial_j [(\partial_j\phi) (\partial_i\phi)]
+  +
+  \partial_j [v_{\infty,j}] \partial_i\phi
+  +
+  v_{\infty,j} \partial_j \partial_i\phi
+  \\
+  &=
+  \partial_j [(\partial_j\phi) (\partial_i\phi)]
+  +
+  v_{\infty,j} \partial_j \partial_i\phi
+  \\
+  &=
+  \partial_i \partial_j [(\partial_j\phi) \phi]
+  -
+  \partial_j [\partial_i (\partial_j\phi) \phi]
+  +
+  \partial_i [v_{\infty,j} \partial_j \phi]
+  -
+  \partial_i [v_{\infty,j}] \partial_j \phi
+ at f}
+Again, the last term disappears because $\mathbf{v}_\infty$ is constant and we
+can merge the first and third term into one:
+ at f{align*}
+  [\mathbf{v}\cdot\nabla\mathbf{v}]_i
+  &=
+  \partial_i (\partial_j [(\partial_j\phi) \phi + v_{\infty,j} \partial_j \phi])
+  -
+  \partial_j [\partial_i (\partial_j\phi) \phi]
+  \\
+  &=
+  \partial_i [(\partial_j\phi)(\partial_j \phi) + v_{\infty,j} \partial_j \phi]
+  -
+  \partial_j [\partial_i (\partial_j\phi) \phi]
+ at f}
+
+We now only need to massage that last term a bit more. Using the product rule,
+we get
+ at f{align*}
+  \partial_j [\partial_i (\partial_j\phi) \phi]
+  &=
+  \partial_i [\partial_j \partial_j\phi] \phi
+  +
+  \partial_i [\partial_j \phi] (\partial_j \phi).
+ at f}
+The first of these terms is zero (because, again, the summation over $j$ gives
+$\Delta\phi$, which is zero). The last term can be written as $\frac 12
+\partial_i [(\partial_j\phi)(\partial_j\phi)]$ which is in the desired gradient
+form. As a consequence, we can now finally state that
+ at f{align*}
+  [\mathbf{v}\cdot\nabla\mathbf{v}]_i
+  &=
+  \partial_i (\partial_j [(\partial_j\phi) \phi + v_{\infty,j} \partial_j \phi])
+  -
+  \partial_j [\partial_i (\partial_j\phi) \phi]
+  \\
+  &=
+  \partial_i
+  \left[
+    (\partial_j\phi)(\partial_j \phi) + v_{\infty,j} \partial_j \phi
+    -
+    \frac 12 (\partial_j\phi)(\partial_j\phi)
+  \right],
+  \\
+  &=
+  \partial_i
+  \left[
+    \frac 12 (\partial_j\phi)(\partial_j \phi) + v_{\infty,j} \partial_j \phi
+  \right],
+ at f}
+or in vector form:
+ at f[
+  \mathbf{v}\cdot\nabla\mathbf{v}
+  =
+  \nabla
+  \left[
+    \frac 12 \mathbf{\tilde v}^2
+    + \mathbf{v}_{\infty} \cdot \mathbf{\tilde v}
+  \right],
+ at f]
+or in other words:
+ at f[
+  p
+  =
+  -\rho
+  \left[
+    \frac 12 \mathbf{\tilde v}^2
+    + \mathbf{v}_{\infty} \cdot \mathbf{\tilde v}
+  \right]
+  =
+  -\rho
+  \left[
+    \frac 12 \mathbf{v}^2
+    -
+    \frac 12 \mathbf{v}_{\infty}^2
+  \right]
+  .
+ at f]
+Because the pressure is only determined up to a constant (it appears only with
+a gradient in the equations), an equally valid definition is
+ at f[
+  p
+  =
+  -\frac 12 \rho \mathbf{v}^2
+  .
+ at f]
+This is exactly Bernoulli's law mentioned above.
+
+
+<h3>The numerical approximation</h3>
+
+Numerical approximations of %Boundary Integral Equations (BIE) are commonly
+referred to as the boundary element method or panel method (the latter
+expression being used mostly in the computational fluid dynamics community).
+The goal of the following test problem is to solve the integral
+formulation of the Laplace equation with Neumann boundary conditions,
+using a circle and a sphere respectively in two and three space
+dimensions, illustrating along the way the features that allow one to
+treat boundary element problems almost as easily as finite element
+problems using the deal.II library.
+
+To this end, let $\mathcal{T}_h = \bigcup_i K_i$ be a subdivision of the
+manifold $\Gamma = \partial \Omega$ into $M$ line segments if $n=2$, or $M$
+quadrilaterals if $n=3$. We will call each individual segment or
+quadrilateral an <i>element</i> or <i>cell</i>, independently of the
+dimension $n$ of the surrounding space $\mathbb{R}^n$.
+We define the finite dimensional space $V_h$ as
+\f[
+  \label{eq:definition-Vh}
+  V_h := \{ v \in C^0(\Gamma) \text{ s.t. } v|_{K_i} \in \mathcal{Q}^1(K_i),
+  \forall i\},
+\f]
+with basis functions $\psi_i(\mathbf{x})$ for which we will use the usual FE_Q
+finite element, with the catch that this time it is defined on a manifold of
+codimension one (which we do by using the second template argument that is
+usually defaulted to equal the first; here, we will create objects
+<code>FE_Q@<dim-1,dim@></code> to indicate that we have <code>dim-1</code>
+dimensional cells in a <code>dim</code> dimensional space).
+An element $\phi_h$ of $V_h$ is uniquely
+identified by the vector $\boldsymbol{\phi}$ of its coefficients
+$\phi_i$, that is:
+\f[
+  \label{eq:definition-of-element}
+  \phi_h(\mathbf{x}) := \phi_i \psi_i(\mathbf{x}), \qquad
+  \boldsymbol{\phi} := \{ \phi_i \},
+\f]
+where summation  is implied over repeated indexes. Note that we could use
+discontinuous elements here — in fact, there is no real reason to use
+continuous ones since the integral formulation does not
+imply any derivatives on our trial functions so continuity is unnecessary,
+and often in the literature only piecewise constant elements are used.
+
+<h3> Collocation boundary element method </h3>
+
+By far, the most common approximation of boundary integral equations
+is by use of the collocation based boundary element method.
+
+This method requires the evaluation of the boundary integral equation
+at a number of collocation points which is equal to the number of
+unknowns of the system. The choice of these points is a delicate
+matter, that requires a careful study. Assume that these points are
+known for the moment, and call them $\mathbf x_i$ with $i=0...n\_dofs$.
+
+The problem then becomes:
+Given the datum $\mathbf{v}_\infty$, find a function $\phi_h$ in $V_h$
+such that the following $n\_dofs$ equations are satisfied:
+
+\f{align*}
+    \alpha(\mathbf{x}_i) \phi_h(\mathbf{x}_i)
+    - \int_{\Gamma_y} \frac{ \partial G(\mathbf{y}-\mathbf{x}_i)}{\partial\mathbf{n}_y }
+    \phi_h(\mathbf{y}) \,ds_y =
+    \int_{\Gamma_y} G(\mathbf{y}-\mathbf{x}_i) \,
+    \mathbf{n}_y\cdot\mathbf{v_\infty} \,ds_y
+    ,
+\f}
+
+where the quantity $\alpha(\mathbf{x}_i)$ is the fraction of (solid)
+angle by which the point $\mathbf{x}_i$ sees the domain $\Omega$, as
+explained above, and we set $\phi_\infty$ to be zero.  If the support
+points $\mathbf{x}_i$ are chosen appropriately, then the problem can
+be written as the following linear system:
+
+\f[
+\label{eq:linear-system}
+(\mathbf{A}+\mathbf{N})\boldsymbol\phi = \mathbf{b},
+\f]
+
+where
+
+\f[
+\begin{aligned}
+\mathbf{A}_{ij}&=
+\alpha(\mathbf{x}_i) \psi_j(\mathbf{x}_i)
+= 1+\int_\Gamma
+\frac{\partial G(\mathbf{y}-\mathbf{x}_i)}{\partial \mathbf{n}_y}\,ds_y
+\psi_j(\mathbf{x}_i)
+\\
+\mathbf{N}_{ij}&= - \int_\Gamma
+  \frac{\partial G(\mathbf{y}-\mathbf{x}_i)}{\partial \mathbf{n}_y}
+  \psi_j(\mathbf{y}) \,ds_y
+\\
+\mathbf{b}_i&= \int_\Gamma
+   G(\mathbf{y}-\mathbf{x}_i)  \, \mathbf{n}_y\cdot\mathbf{v_\infty}
+   ds_y.
+\end{aligned}
+\f]
+
+From a linear algebra point of view, the best possible choice of the
+collocation points is the one that renders the matrix
+$\mathbf{A}+\mathbf{N}$ the most diagonally dominant. A natural choice
+is then to select the $\mathbf{x}_i$ collocation points to be the
+support points of the nodal basis functions $\psi_i(\mathbf{x})$. In that
+case, $\psi_j(\mathbf{x}_i)=\delta_{ij}$, and as a consequence the matrix
+$\mathbf{A}$ is diagonal with entries
+\f[
+  \mathbf{A}_{ii}
+  =
+  1+\int_\Gamma
+  \frac{\partial G(\mathbf{y}-\mathbf{x}_i)}{\partial \mathbf{n}_y}\,ds_y
+  =
+  1-\sum_j N_{ij},
+\f]
+where we have used that $\sum_j \psi_j(\mathbf{y})=1$ for the usual Lagrange
+elements.
+With this choice of collocation points, the computation of the entries
+of the matrices $\mathbf{A}$, $\mathbf{N}$ and of the right hand side
+$\mathbf{b}$ requires the evaluation of singular integrals on the
+elements $K_i$ of the triangulation $\mathcal{T}_h$.
+As usual in these cases, all integrations are performed on a reference
+simple domain, i.e., we assume that each element $K_i$ of
+$\mathcal{T}_h$ can be expressed as a linear (in two dimensions) or
+bi-linear (in three dimensions) transformation of the reference
+boundary element $\hat K := [0,1]^{n-1}$, and we perform the integrations after a
+change of variables from the real element $K_i$ to the reference
+element $\hat K$.
+
+<h3> Treating the singular integrals. </h3>
+
+In two dimensions it is not necessary to compute the diagonal elements
+$\mathbf{N}_{ii}$ of the system matrix, since, even if the denominator
+goes to zero when $\mathbf{x}=\mathbf{y}$, the numerator is always
+zero because $\mathbf{n}_y$ and $(\mathbf{y}-\mathbf{x})$ are
+orthogonal (on our polygonal approximation of the boundary of $\Omega$), and
+the only singular integral arises in the computation
+of $\mathbf{b}_i$ on the i-th element of $\mathcal{T}_h$:
+\f[
+  \frac{1}{\pi}
+  \int_{K_i}
+  \ln|\mathbf{y}-\mathbf{x}_i| \, \mathbf{n}_y\cdot\mathbf{v_\infty} \,ds_y.
+\f]
+
+This can be easily treated by the QGaussLogR quadrature
+formula.
+
+Similarly, it is possible to use the QGaussOneOverR quadrature formula
+to perform the singular integrations in three dimensions. The
+interested reader will find detailed explanations on how these
+quadrature rules work in their documentation.
+
+The resulting matrix $\mathbf{A}+\mathbf{N}$ is full. Depending on its
+size, it might be convenient to use a direct solver or an iterative
+one. For the purpose of this example code, we chose to use only an
+iterative solver, without providing any preconditioner.
+
+If this were a production code rather than a demonstration of principles,
+there are techniques that are available to not store full matrices but instead
+store only those entries that are large and/or relevant. In the literature on
+boundary element methods, a plethora of methods is available that allows to
+determine which elements are important and which are not, leading to a
+significantly sparser representation of these matrices that also facilitates
+rapid evaluations of the scalar product between vectors and matrices. This not
+being the goal of this program, we leave this for more sophisticated
+implementations.
+
+
+<h3>Implementation</h3>
+
+The implementation is rather straight forward. The main point that hasn't been
+used in any of the previous tutorial programs is that most classes in deal.II
+are not only templated on the dimension, but in fact on the dimension of the
+manifold on which we pose the differential equation as well as the dimension
+of the space into which this manifold is embedded. By default, the second
+template argument equals the first, meaning for example that we want to solve
+on a two-dimensional region of two-dimensional space. The triangulation class
+to use in this case would be <code>Triangulation@<2@></code>, which is an
+equivalent way of writing <code>Triangulation@<2,2@></code>.
+
+However, this doesn't have to be so: in the current example, we will for
+example want to solve on the surface of a sphere, which is a two-dimensional
+manifold embedded in a three-dimensional space. Consequently, the right class
+will be <code>Triangulation@<2,3@></code>, and correspondingly we will use
+<code>DoFHandler@<2,3@></code> as the DoF handler class and
+<code>FE_Q@<2,3@></code> for finite elements.
+
+Some further details on what one can do with things that live on
+curved manifolds can be found in the report
+<a target="_top"
+href="http://www.dealii.org/reports/codimension-one/desimone-heltai-manigrasso.pdf"><i>Tools
+for the Solution of PDEs Defined on Curved Manifolds with the deal.II
+Library</i> by A. DeSimone, L. Heltai, C. Manigrasso</a>. In addition, the
+step-38 tutorial program extends what we show here to cases where the equation
+posed on the manifold is not an integral operator but in fact involves
+derivatives.
+
+
+<h3>Testcase</h3>
+
+The testcase we will be solving is for a circular (in 2d) or spherical
+(in 3d) obstacle. Meshes for these geometries will be read in from
+files in the current directory and an object of type HyperBallBoundary
+will then be attached to the triangulation to allow mesh refinement
+that respects the continuous geometry behind the discrete initial
+mesh.
+
+For a sphere of radius $a$ translating at a velocity of $U$ in the $x$ direction, the potential reads
+ at f{align*}
+\phi = -\frac{1}{2}U \left(\frac{a}{r}\right)3 r \cos\theta
+ at f}
+see, e.g. J. N. Newman, <i>Marine Hydrodynamics</i>, 1977,
+pp. 127. For unit speed and radius, and restricting $(x,y,z)$ to lie
+on the surface of the sphere,
+$\phi = -x/2$. In the test problem,
+the flow is $(1,1,1)$, so the appropriate exact solution on the
+surface of the sphere is the superposition of the above solution with
+the analogous solution along the $y$ and $z$ axes, or $\phi =
+\frac{1}{2}(x + y + z)$.
diff --git a/examples/step-34/doc/kind b/examples/step-34/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-34/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-34/doc/results.dox b/examples/step-34/doc/results.dox
new file mode 100644
index 0000000..dec4c54
--- /dev/null
+++ b/examples/step-34/doc/results.dox
@@ -0,0 +1,219 @@
+<h1>Results</h1>
+
+We ran the program using the following <code>parameters.prm</code> file (which
+can also be found in the directory in which all the other source files are):
+ at verbatim
+# Listing of Parameters
+# ---------------------
+set Extend solution on the -2,2 box = true
+set External refinement             = 5
+set Number of cycles                = 4
+set Run 2d simulation               = true
+set Run 3d simulation               = true
+
+
+subsection Exact solution 2d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = x+y   # default: 0
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,t
+end
+
+
+subsection Exact solution 3d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = .5*(x+y+z)   # default: 0
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,z,t
+end
+
+
+subsection Quadrature rules
+  set Quadrature order          = 4
+  set Quadrature type           = gauss
+  set Singular quadrature order = 5
+end
+
+
+subsection Solver
+  set Log frequency = 1
+  set Log history   = false
+  set Log result    = true
+  set Max steps     = 100
+  set Tolerance     = 1.e-10
+end
+
+
+subsection Wind function 2d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = 1; 1  # default: 0; 0
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,t
+end
+
+
+subsection Wind function 3d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = 1; 1; 1 # default: 0; 0; 0
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,z,t
+end
+ at endverbatim
+
+When we run the program, the following is printed on screen:
+ at verbatim
+DEAL::
+DEAL::Parsing parameter file parameters.prm
+DEAL::for a 2 dimensional simulation. 
+DEAL:GMRES::Starting value 2.21576
+DEAL:GMRES::Convergence step 1 value 2.37635e-13
+DEAL::Cycle 0:
+DEAL::   Number of active cells:       20
+DEAL::   Number of degrees of freedom: 20
+DEAL:GMRES::Starting value 3.15543
+DEAL:GMRES::Convergence step 1 value 2.89310e-13
+DEAL::Cycle 1:
+DEAL::   Number of active cells:       40
+DEAL::   Number of degrees of freedom: 40
+DEAL:GMRES::Starting value 4.46977
+DEAL:GMRES::Convergence step 1 value 3.11815e-13
+DEAL::Cycle 2:
+DEAL::   Number of active cells:       80
+DEAL::   Number of degrees of freedom: 80
+DEAL:GMRES::Starting value 6.32373
+DEAL:GMRES::Convergence step 1 value 3.22474e-13
+DEAL::Cycle 3:
+DEAL::   Number of active cells:       160
+DEAL::   Number of degrees of freedom: 160
+DEAL::
+cycle cells dofs  L2(phi)  Linfty(alpha) 
+    0    20   20 4.465e-02             - 5.000e-02    - 
+    1    40   40 1.081e-02          2.05 2.500e-02 1.00 
+    2    80   80 2.644e-03          2.03 1.250e-02 1.00 
+    3   160  160 6.529e-04          2.02 6.250e-03 1.00 
+DEAL::
+DEAL::Parsing parameter file parameters.prm
+DEAL::for a 3 dimensional simulation. 
+DEAL:GMRES::Starting value 2.84666
+DEAL:GMRES::Convergence step 3 value 8.68638e-18
+DEAL::Cycle 0:
+DEAL::   Number of active cells:       24
+DEAL::   Number of degrees of freedom: 26
+DEAL:GMRES::Starting value 6.34288
+DEAL:GMRES::Convergence step 5 value 1.38740e-11
+DEAL::Cycle 1:
+DEAL::   Number of active cells:       96
+DEAL::   Number of degrees of freedom: 98
+DEAL:GMRES::Starting value 12.9780
+DEAL:GMRES::Convergence step 5 value 3.29225e-11
+DEAL::Cycle 2:
+DEAL::   Number of active cells:       384
+DEAL::   Number of degrees of freedom: 386
+DEAL:GMRES::Starting value 26.0874
+DEAL:GMRES::Convergence step 6 value 1.47271e-12
+DEAL::Cycle 3:
+DEAL::   Number of active cells:       1536
+DEAL::   Number of degrees of freedom: 1538
+DEAL::
+cycle cells dofs  L2(phi)  Linfty(alpha) 
+    0    24   26 6.873e-01             - 2.327e-01    - 
+    1    96   98 1.960e-01          1.81 1.239e-01 0.91 
+    2   384  386 4.837e-02          2.02 6.319e-02 0.97 
+    3  1536 1538 1.176e-02          2.04 3.176e-02 0.99 
+ at endverbatim
+
+As we can see from the convergence table in 2d, if we choose
+quadrature formulas which are accurate enough, then the error we
+obtain for $\alpha(\mathbf{x})$ should be exactly the inverse of the
+number of elements. The approximation of the circle with N segments of
+equal size generates a regular polygon with N faces, whose angles are
+exactly $\pi-\frac {2\pi}{N}$, therefore the error we commit should be
+exactly $\frac 12 - (\frac 12 -\frac 1N) = \frac 1N$. In fact this is
+a very good indicator that we are performing the singular integrals in
+an appropriate manner. 
+
+The error in the approximation of the potential $\phi$ is largely due
+to approximation of the domain. A much better approximation could be
+obtained by using higher order mappings. 
+
+If we modify the main() function, setting fe_degree and mapping_degree
+to two, and raise the order of the quadrature formulas  in
+the parameter file, we obtain the following convergence table for the
+two dimensional simulation
+
+ at verbatim
+cycle cells dofs  L2(phi)  Linfty(alpha) 
+    0    20   40 5.404e-05             - 2.306e-04    - 
+    1    40   80 3.578e-06          3.92 1.738e-05 3.73 
+    2    80  160 2.479e-07          3.85 1.253e-05 0.47 
+    3   160  320 1.856e-08          3.74 7.670e-06 0.71 
+ at endverbatim
+
+and
+
+ at verbatim
+cycle cells dofs  L2(phi)  Linfty(alpha) 
+    0    24   98 9.187e-03             - 8.956e-03    - 
+    1    96  386 3.991e-04          4.52 1.182e-03 2.92 
+    2   384 1538 2.113e-05          4.24 1.499e-04 2.98 
+    3  1536 6146 1.247e-06          4.08 1.896e-05 2.98 
+ at endverbatim
+
+for the three dimensional case. As we can see, convergence results are
+much better with higher order mapping, mainly due to a better
+resolution of the curved geometry. Notice that, given the same number
+of degrees of freedom, for example in step 3 of the Q1 case and step 2
+of Q2 case in the three dimensional simulation, the error is roughly
+three orders of magnitude lower.
+
+The result of running these computations is a bunch of output files that we
+can pass to our visualization program of choice. 
+The output files are of two kind: the potential on the boundary
+element surface, and the potential extended to the outer and inner
+domain. The combination of the two for the two dimensional case looks
+like
+
+<img src="http://www.dealii.org/images/steps/developer/step-34_2d.png" alt="">
+
+while in three dimensions we show first the potential on the surface,
+together with a contour plot,
+
+<img src="http://www.dealii.org/images/steps/developer/step-34_3d.png" alt="">
+
+and then the external contour plot of the potential, with opacity set to 25%:
+
+<img src="http://www.dealii.org/images/steps/developer/step-34_3d-2.png" alt="">
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+This is the first tutorial program that considers solving equations defined on
+surfaces embedded in higher dimensional spaces. But the equation discussed
+here was relatively simple because it only involved an integral operator, not
+derivatives which are more difficult to define on the surface. The step-38
+tutorial program considers such problems and provides the necessary tools.
diff --git a/examples/step-34/doc/tooltip b/examples/step-34/doc/tooltip
new file mode 100644
index 0000000..35e3d59
--- /dev/null
+++ b/examples/step-34/doc/tooltip
@@ -0,0 +1 @@
+Boundary element methods for potential flow.
diff --git a/examples/step-34/parameters.prm b/examples/step-34/parameters.prm
new file mode 100644
index 0000000..60edd7c
--- /dev/null
+++ b/examples/step-34/parameters.prm
@@ -0,0 +1,81 @@
+# Listing of Parameters
+# ---------------------
+set Extend solution on the -2,2 box = true
+set External refinement             = 5
+set Number of cycles                = 4
+set Run 2d simulation               = true
+set Run 3d simulation               = true
+
+
+subsection Exact solution 2d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = x+y   # default: 0
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,t
+end
+
+
+subsection Exact solution 3d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = .5*(x+y+z)
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,z,t
+end
+
+
+subsection Quadrature rules
+  set Quadrature order          = 4
+  set Quadrature type           = gauss
+  set Singular quadrature order = 5
+end
+
+
+subsection Solver
+  set Log frequency = 1
+  set Log history   = false
+  set Log result    = true
+  set Max steps     = 100
+  set Tolerance     = 1.e-10
+end
+
+
+subsection Wind function 2d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = 1; 1  # default: 0; 0
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,t
+end
+
+
+subsection Wind function 3d
+  # Any constant used inside the function which is not a variable name.
+  set Function constants  = 
+
+  # Separate vector valued expressions by ';' as ',' is used internally by the
+  # function parser.
+  set Function expression = 1; 1; 1
+
+  # The name of the variables as they will be used in the function, separated
+  # by ','.
+  set Variable names      = x,y,z,t
+end
+
+
diff --git a/examples/step-34/step-34.cc b/examples/step-34/step-34.cc
new file mode 100644
index 0000000..dd54cbb
--- /dev/null
+++ b/examples/step-34/step-34.cc
@@ -0,0 +1,1131 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2009 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Luca Heltai, Cataldo Manigrasso, 2009
+ */
+
+
+// @sect3{Include files}
+
+// The program starts with including a bunch of include files that we will use
+// in the various parts of the program. Most of them have been discussed in
+// previous tutorials already:
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/convergence_table.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/quadrature_selector.h>
+#include <deal.II/base/parsed_function.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/precondition.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_in.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/manifold_lib.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q.h>
+
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+
+// And here are a few C++ standard header files that we will need:
+#include <cmath>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+// The last part of this preamble is to import everything in the dealii
+// namespace into the one into which everything in this program will go:
+namespace Step34
+{
+  using namespace dealii;
+
+
+  // @sect3{Single and double layer operator kernels}
+
+  // First, let us define a bit of the boundary integral equation machinery.
+
+  // The following two functions are the actual calculations of the single and
+  // double layer potential kernels, that is $G$ and $\nabla G$. They are well
+  // defined only if the vector $R = \mathbf{y}-\mathbf{x}$ is different from
+  // zero.
+  namespace LaplaceKernel
+  {
+    template <int dim>
+    double single_layer(const Tensor<1,dim> &R)
+    {
+      switch (dim)
+        {
+        case 2:
+          return (-std::log(R.norm()) / (2*numbers::PI) );
+
+        case 3:
+          return (1./( R.norm()*4*numbers::PI ) );
+
+        default:
+          Assert(false, ExcInternalError());
+          return 0.;
+        }
+    }
+
+
+
+    template <int dim>
+    Tensor<1,dim> double_layer(const Tensor<1,dim> &R)
+    {
+      switch (dim)
+        {
+        case 2:
+          return R / ( -2*numbers::PI * R.norm_square());
+        case 3:
+          return R / ( -4*numbers::PI * R.norm_square() * R.norm() );
+
+        default:
+          Assert(false, ExcInternalError());
+          return Tensor<1,dim>();
+        }
+    }
+  }
+
+
+  // @sect3{The BEMProblem class}
+
+  // The structure of a boundary element method code is very similar to the
+  // structure of a finite element code, and so the member functions of this
+  // class are like those of most of the other tutorial programs. In
+  // particular, by now you should be familiar with reading parameters from an
+  // external file, and with the splitting of the different tasks into
+  // different modules. The same applies to boundary element methods, and we
+  // won't comment too much on them, except on the differences.
+  template <int dim>
+  class BEMProblem
+  {
+  public:
+    BEMProblem(const unsigned int fe_degree = 1,
+               const unsigned int mapping_degree = 1);
+
+    void run();
+
+  private:
+
+    void read_parameters (const std::string &filename);
+
+    void read_domain();
+
+    void refine_and_resize();
+
+    // The only really different function that we find here is the assembly
+    // routine. We wrote this function in the most possible general way, in
+    // order to allow for easy generalization to higher order methods and to
+    // different fundamental solutions (e.g., Stokes or Maxwell).
+    //
+    // The most noticeable difference is the fact that the final matrix is
+    // full, and that we have a nested loop inside the usual loop on cells
+    // that visits all support points of the degrees of freedom.  Moreover,
+    // when the support point lies inside the cell which we are visiting, then
+    // the integral we perform becomes singular.
+    //
+    // The practical consequence is that we have two sets of quadrature
+    // formulas, finite element values and temporary storage, one for standard
+    // integration and one for the singular integration, which are used where
+    // necessary.
+    void assemble_system();
+
+    // There are two options for the solution of this problem. The first is to
+    // use a direct solver, and the second is to use an iterative solver. We
+    // opt for the second option.
+    //
+    // The matrix that we assemble is not symmetric, and we opt to use the
+    // GMRES method; however the construction of an efficient preconditioner
+    // for boundary element methods is not a trivial issue. Here we use a non
+    // preconditioned GMRES solver. The options for the iterative solver, such
+    // as the tolerance, the maximum number of iterations, are selected
+    // through the parameter file.
+    void solve_system();
+
+    // Once we obtained the solution, we compute the $L^2$ error of the
+    // computed potential as well as the $L^\infty$ error of the approximation
+    // of the solid angle. The mesh we are using is an approximation of a
+    // smooth curve, therefore the computed diagonal matrix of fraction of
+    // angles or solid angles $\alpha(\mathbf{x})$ should be constantly equal
+    // to $\frac 12$. In this routine we output the error on the potential and
+    // the error in the approximation of the computed angle. Notice that the
+    // latter error is actually not the error in the computation of the angle,
+    // but a measure of how well we are approximating the sphere and the
+    // circle.
+    //
+    // Experimenting a little with the computation of the angles gives very
+    // accurate results for simpler geometries. To verify this you can comment
+    // out, in the read_domain() method, the tria.set_manifold(1, manifold)
+    // line, and check the alpha that is generated by the program. By removing
+    // this call, whenever the mesh is refined new nodes will be placed along
+    // the straight lines that made up the coarse mesh, rather than be pulled
+    // onto the surface that we really want to approximate. In the three
+    // dimensional case, the coarse grid of the sphere is obtained starting
+    // from a cube, and the obtained values of alphas are exactly $\frac 12$
+    // on the nodes of the faces, $\frac 34$ on the nodes of the edges and
+    // $\frac 78$ on the 8 nodes of the vertices.
+    void compute_errors(const unsigned int cycle);
+
+    // Once we obtained a solution on the codimension one domain, we want to
+    // interpolate it to the rest of the space. This is done by performing
+    // again the convolution of the solution with the kernel in the
+    // compute_exterior_solution() function.
+    //
+    // We would like to plot the velocity variable which is the gradient of
+    // the potential solution. The potential solution is only known on the
+    // boundary, but we use the convolution with the fundamental solution to
+    // interpolate it on a standard dim dimensional continuous finite element
+    // space. The plot of the gradient of the extrapolated solution will give
+    // us the velocity we want.
+    //
+    // In addition to the solution on the exterior domain, we also output the
+    // solution on the domain's boundary in the output_results() function, of
+    // course.
+    void compute_exterior_solution();
+
+    void output_results(const unsigned int cycle);
+
+    // To allow for dimension independent programming, we specialize this
+    // single function to extract the singular quadrature formula needed to
+    // integrate the singular kernels in the interior of the cells.
+    const Quadrature<dim-1> & get_singular_quadrature(
+      const typename DoFHandler<dim-1, dim>::active_cell_iterator &cell,
+      const unsigned int index) const;
+
+
+    // The usual deal.II classes can be used for boundary element methods by
+    // specifying the "codimension" of the problem. This is done by setting
+    // the optional second template arguments to Triangulation, FiniteElement
+    // and DoFHandler to the dimension of the embedding space. In our case we
+    // generate either 1 or 2 dimensional meshes embedded in 2 or 3
+    // dimensional spaces.
+    //
+    // The optional argument by default is equal to the first argument, and
+    // produces the usual finite element classes that we saw in all previous
+    // examples.
+    //
+    // The class is constructed in a way to allow for arbitrary order of
+    // approximation of both the domain (through high order mapping) and the
+    // finite element space. The order of the finite element space and of the
+    // mapping can be selected in the constructor of the class.
+
+    Triangulation<dim-1, dim>   tria;
+    FE_Q<dim-1,dim>             fe;
+    DoFHandler<dim-1,dim>       dh;
+    MappingQ<dim-1, dim>      mapping;
+
+    // In BEM methods, the matrix that is generated is dense. Depending on the
+    // size of the problem, the final system might be solved by direct LU
+    // decomposition, or by iterative methods. In this example we use an
+    // unpreconditioned GMRES method. Building a preconditioner for BEM method
+    // is non trivial, and we don't treat this subject here.
+
+    FullMatrix<double>    system_matrix;
+    Vector<double>        system_rhs;
+
+    // The next two variables will denote the solution $\phi$ as well as a
+    // vector that will hold the values of $\alpha(\mathbf x)$ (the fraction
+    // of $\Omega$ visible from a point $\mathbf x$) at the support points of
+    // our shape functions.
+
+    Vector<double>              phi;
+    Vector<double>              alpha;
+
+    // The convergence table is used to output errors in the exact solution
+    // and in the computed alphas.
+
+    ConvergenceTable  convergence_table;
+
+    // The following variables are the ones that we fill through a parameter
+    // file.  The new objects that we use in this example are the
+    // Functions::ParsedFunction object and the QuadratureSelector object.
+    //
+    // The Functions::ParsedFunction class allows us to easily and quickly
+    // define new function objects via parameter files, with custom
+    // definitions which can be very complex (see the documentation of that
+    // class for all the available options).
+    //
+    // We will allocate the quadrature object using the QuadratureSelector
+    // class that allows us to generate quadrature formulas based on an
+    // identifying string and on the possible degree of the formula itself. We
+    // used this to allow custom selection of the quadrature formulas for the
+    // standard integration, and to define the order of the singular
+    // quadrature rule.
+    //
+    // We also define a couple of parameters which are used in case we wanted
+    // to extend the solution to the entire domain.
+
+    Functions::ParsedFunction<dim> wind;
+    Functions::ParsedFunction<dim> exact_solution;
+
+    unsigned int singular_quadrature_order;
+    std_cxx11::shared_ptr<Quadrature<dim-1> > quadrature;
+
+    SolverControl solver_control;
+
+    unsigned int n_cycles;
+    unsigned int external_refinement;
+
+    bool run_in_this_dimension;
+    bool extend_solution;
+  };
+
+
+  // @sect4{BEMProblem::BEMProblem and BEMProblem::read_parameters}
+
+  // The constructor initializes the various object in much the same way as
+  // done in the finite element programs such as step-4 or step-6. The only
+  // new ingredient here is the ParsedFunction object, which needs, at
+  // construction time, the specification of the number of components.
+  //
+  // For the exact solution the number of vector components is one, and no
+  // action is required since one is the default value for a ParsedFunction
+  // object. The wind, however, requires dim components to be
+  // specified. Notice that when declaring entries in a parameter file for the
+  // expression of the Functions::ParsedFunction, we need to specify the
+  // number of components explicitly, since the function
+  // Functions::ParsedFunction::declare_parameters is static, and has no
+  // knowledge of the number of components.
+  template <int dim>
+  BEMProblem<dim>::BEMProblem(const unsigned int fe_degree,
+                              const unsigned int mapping_degree)
+    :
+    fe(fe_degree),
+    dh(tria),
+    mapping(mapping_degree, true),
+    wind(dim)
+  {}
+
+
+  template <int dim>
+  void BEMProblem<dim>::read_parameters (const std::string &filename)
+  {
+    deallog << std::endl << "Parsing parameter file " << filename << std::endl
+            << "for a " << dim << " dimensional simulation. " << std::endl;
+
+    ParameterHandler prm;
+
+    prm.declare_entry("Number of cycles", "4",
+                      Patterns::Integer());
+    prm.declare_entry("External refinement", "5",
+                      Patterns::Integer());
+    prm.declare_entry("Extend solution on the -2,2 box", "true",
+                      Patterns::Bool());
+    prm.declare_entry("Run 2d simulation", "true",
+                      Patterns::Bool());
+    prm.declare_entry("Run 3d simulation", "true",
+                      Patterns::Bool());
+
+    prm.enter_subsection("Quadrature rules");
+    {
+      prm.declare_entry("Quadrature type", "gauss",
+                        Patterns::Selection(QuadratureSelector<(dim-1)>::get_quadrature_names()));
+      prm.declare_entry("Quadrature order", "4", Patterns::Integer());
+      prm.declare_entry("Singular quadrature order", "5", Patterns::Integer());
+    }
+    prm.leave_subsection();
+
+    // For both two and three dimensions, we set the default input data to be
+    // such that the solution is $x+y$ or $x+y+z$. The actually computed
+    // solution will have value zero at infinity. In this case, this coincide
+    // with the exact solution, and no additional corrections are needed, but
+    // you should be aware of the fact that we arbitrarily set $\phi_\infty$,
+    // and the exact solution we pass to the program needs to have the same
+    // value at infinity for the error to be computed correctly.
+    //
+    // The use of the Functions::ParsedFunction object is pretty straight
+    // forward. The Functions::ParsedFunction::declare_parameters function
+    // takes an additional integer argument that specifies the number of
+    // components of the given function. Its default value is one. When the
+    // corresponding Functions::ParsedFunction::parse_parameters method is
+    // called, the calling object has to have the same number of components
+    // defined here, otherwise an exception is thrown.
+    //
+    // When declaring entries, we declare both 2 and three dimensional
+    // functions. However only the dim-dimensional one is ultimately
+    // parsed. This allows us to have only one parameter file for both 2 and 3
+    // dimensional problems.
+    //
+    // Notice that from a mathematical point of view, the wind function on the
+    // boundary should satisfy the condition $\int_{\partial\Omega}
+    // \mathbf{v}\cdot \mathbf{n} d \Gamma = 0$, for the problem to have a
+    // solution. If this condition is not satisfied, then no solution can be
+    // found, and the solver will not converge.
+    prm.enter_subsection("Wind function 2d");
+    {
+      Functions::ParsedFunction<2>::declare_parameters(prm, 2);
+      prm.set("Function expression", "1; 1");
+    }
+    prm.leave_subsection();
+
+    prm.enter_subsection("Wind function 3d");
+    {
+      Functions::ParsedFunction<3>::declare_parameters(prm, 3);
+      prm.set("Function expression", "1; 1; 1");
+    }
+    prm.leave_subsection();
+
+    prm.enter_subsection("Exact solution 2d");
+    {
+      Functions::ParsedFunction<2>::declare_parameters(prm);
+      prm.set("Function expression", "x+y");
+    }
+    prm.leave_subsection();
+
+    prm.enter_subsection("Exact solution 3d");
+    {
+      Functions::ParsedFunction<3>::declare_parameters(prm);
+      prm.set("Function expression", "x+y+z");
+    }
+    prm.leave_subsection();
+
+
+    // In the solver section, we set all SolverControl parameters. The object
+    // will then be fed to the GMRES solver in the solve_system() function.
+    prm.enter_subsection("Solver");
+    SolverControl::declare_parameters(prm);
+    prm.leave_subsection();
+
+    // After declaring all these parameters to the ParameterHandler object,
+    // let's read an input file that will give the parameters their values. We
+    // then proceed to extract these values from the ParameterHandler object:
+    prm.read_input(filename);
+
+    n_cycles = prm.get_integer("Number of cycles");
+    external_refinement = prm.get_integer("External refinement");
+    extend_solution = prm.get_bool("Extend solution on the -2,2 box");
+
+    prm.enter_subsection("Quadrature rules");
+    {
+      quadrature =
+        std_cxx11::shared_ptr<Quadrature<dim-1> >
+        (new QuadratureSelector<dim-1> (prm.get("Quadrature type"),
+                                        prm.get_integer("Quadrature order")));
+      singular_quadrature_order = prm.get_integer("Singular quadrature order");
+    }
+    prm.leave_subsection();
+
+    prm.enter_subsection(std::string("Wind function ")+
+                         Utilities::int_to_string(dim)+std::string("d"));
+    {
+      wind.parse_parameters(prm);
+    }
+    prm.leave_subsection();
+
+    prm.enter_subsection(std::string("Exact solution ")+
+                         Utilities::int_to_string(dim)+std::string("d"));
+    {
+      exact_solution.parse_parameters(prm);
+    }
+    prm.leave_subsection();
+
+    prm.enter_subsection("Solver");
+    solver_control.parse_parameters(prm);
+    prm.leave_subsection();
+
+
+    // Finally, here's another example of how to use parameter files in
+    // dimension independent programming.  If we wanted to switch off one of
+    // the two simulations, we could do this by setting the corresponding "Run
+    // 2d simulation" or "Run 3d simulation" flag to false:
+    run_in_this_dimension = prm.get_bool("Run " +
+                                         Utilities::int_to_string(dim) +
+                                         "d simulation");
+  }
+
+
+  // @sect4{BEMProblem::read_domain}
+
+  // A boundary element method triangulation is basically the same as a
+  // (dim-1) dimensional triangulation, with the difference that the vertices
+  // belong to a (dim) dimensional space.
+  //
+  // Some of the mesh formats supported in deal.II use by default three
+  // dimensional points to describe meshes. These are the formats which are
+  // compatible with the boundary element method capabilities of deal.II. In
+  // particular we can use either UCD or GMSH formats. In both cases, we have
+  // to be particularly careful with the orientation of the mesh, because,
+  // unlike in the standard finite element case, no reordering or
+  // compatibility check is performed here.  All meshes are considered as
+  // oriented, because they are embedded in a higher dimensional space. (See
+  // the documentation of the GridIn and of the Triangulation for further
+  // details on orientation of cells in a triangulation.) In our case, the
+  // normals to the mesh are external to both the circle in 2d or the sphere
+  // in 3d.
+  //
+  // The other detail that is required for appropriate refinement of
+  // the boundary element mesh, is an accurate description of the
+  // manifold that the mesh is approximating. We already saw this
+  // several times for the boundary of standard finite element meshes
+  // (for example in step-5 and step-6), and here the principle and
+  // usage is the same, except that the SphericalManifold class takes
+  // an additional template parameter that specifies the embedding
+  // space dimension. The function object still has to be static to
+  // live at least as long as the triangulation object to which it is
+  // attached.
+
+  template <int dim>
+  void BEMProblem<dim>::read_domain()
+  {
+    static const Point<dim> center = Point<dim>();
+    static const SphericalManifold<dim-1, dim> manifold(center);
+
+    std::ifstream in;
+    switch (dim)
+      {
+      case 2:
+        in.open ("coarse_circle.inp");
+        break;
+
+      case 3:
+        in.open ("coarse_sphere.inp");
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    GridIn<dim-1, dim> gi;
+    gi.attach_triangulation (tria);
+    gi.read_ucd (in);
+
+    tria.set_all_manifold_ids(1);
+    tria.set_manifold(1, manifold);
+  }
+
+
+  // @sect4{BEMProblem::refine_and_resize}
+
+  // This function globally refines the mesh, distributes degrees of freedom,
+  // and resizes matrices and vectors.
+
+  template <int dim>
+  void BEMProblem<dim>::refine_and_resize()
+  {
+    tria.refine_global(1);
+
+    dh.distribute_dofs(fe);
+
+    const unsigned int n_dofs =  dh.n_dofs();
+
+    system_matrix.reinit(n_dofs, n_dofs);
+
+    system_rhs.reinit(n_dofs);
+    phi.reinit(n_dofs);
+    alpha.reinit(n_dofs);
+  }
+
+
+  // @sect4{BEMProblem::assemble_system}
+
+  // The following is the main function of this program, assembling the matrix
+  // that corresponds to the boundary integral equation.
+  template <int dim>
+  void BEMProblem<dim>::assemble_system()
+  {
+
+    // First we initialize an FEValues object with the quadrature formula for
+    // the integration of the kernel in non singular cells. This quadrature is
+    // selected with the parameter file, and needs to be quite precise, since
+    // the functions we are integrating are not polynomial functions.
+    FEValues<dim-1,dim> fe_v(mapping, fe, *quadrature,
+                             update_values |
+                             update_cell_normal_vectors |
+                             update_quadrature_points |
+                             update_JxW_values);
+
+    const unsigned int n_q_points = fe_v.n_quadrature_points;
+
+    std::vector<types::global_dof_index> local_dof_indices(fe.dofs_per_cell);
+
+    std::vector<Vector<double> > cell_wind(n_q_points, Vector<double>(dim) );
+    double normal_wind;
+
+    // Unlike in finite element methods, if we use a collocation boundary
+    // element method, then in each assembly loop we only assemble the
+    // information that refers to the coupling between one degree of freedom
+    // (the degree associated with support point $i$) and the current
+    // cell. This is done using a vector of fe.dofs_per_cell elements, which
+    // will then be distributed to the matrix in the global row $i$. The
+    // following object will hold this information:
+    Vector<double>      local_matrix_row_i(fe.dofs_per_cell);
+
+    // The index $i$ runs on the collocation points, which are the support
+    // points of the $i$th basis function, while $j$ runs on inner integration
+    // points.
+
+    // We construct a vector of support points which will be used in the local
+    // integrations:
+    std::vector<Point<dim> > support_points(dh.n_dofs());
+    DoFTools::map_dofs_to_support_points<dim-1, dim>( mapping, dh, support_points);
+
+
+    // After doing so, we can start the integration loop over all cells, where
+    // we first initialize the FEValues object and get the values of
+    // $\mathbf{\tilde v}$ at the quadrature points (this vector field should
+    // be constant, but it doesn't hurt to be more general):
+    typename DoFHandler<dim-1,dim>::active_cell_iterator
+    cell = dh.begin_active(),
+    endc = dh.end();
+
+    for (cell = dh.begin_active(); cell != endc; ++cell)
+      {
+        fe_v.reinit(cell);
+        cell->get_dof_indices(local_dof_indices);
+
+        const std::vector<Point<dim> >    &q_points = fe_v.get_quadrature_points();
+        const std::vector<Tensor<1,dim> > &normals  = fe_v.get_all_normal_vectors();
+        wind.vector_value_list(q_points, cell_wind);
+
+        // We then form the integral over the current cell for all degrees of
+        // freedom (note that this includes degrees of freedom not located on
+        // the current cell, a deviation from the usual finite element
+        // integrals). The integral that we need to perform is singular if one
+        // of the local degrees of freedom is the same as the support point
+        // $i$. A the beginning of the loop we therefore check whether this is
+        // the case, and we store which one is the singular index:
+        for (unsigned int i=0; i<dh.n_dofs() ; ++i)
+          {
+
+            local_matrix_row_i = 0;
+
+            bool is_singular = false;
+            unsigned int singular_index = numbers::invalid_unsigned_int;
+
+            for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+              if (local_dof_indices[j] == i)
+                {
+                  singular_index = j;
+                  is_singular = true;
+                  break;
+                }
+
+            // We then perform the integral. If the index $i$ is not one of
+            // the local degrees of freedom, we simply have to add the single
+            // layer terms to the right hand side, and the double layer terms
+            // to the matrix:
+            if (is_singular == false)
+              {
+                for (unsigned int q=0; q<n_q_points; ++q)
+                  {
+                    normal_wind = 0;
+                    for (unsigned int d=0; d<dim; ++d)
+                      normal_wind += normals[q][d]*cell_wind[q](d);
+
+                    const Tensor<1,dim> R = q_points[q] - support_points[i];
+
+                    system_rhs(i) += ( LaplaceKernel::single_layer(R)   *
+                                       normal_wind                      *
+                                       fe_v.JxW(q) );
+
+                    for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+
+                      local_matrix_row_i(j) -= ( ( LaplaceKernel::double_layer(R)     *
+                                                   normals[q] )            *
+                                                 fe_v.shape_value(j,q)     *
+                                                 fe_v.JxW(q)       );
+                  }
+              }
+            else
+              {
+                // Now we treat the more delicate case. If we are here, this
+                // means that the cell that runs on the $j$ index contains
+                // support_point[i]. In this case both the single and the
+                // double layer potential are singular, and they require
+                // special treatment.
+                //
+                // Whenever the integration is performed with the singularity
+                // inside the given cell, then a special quadrature formula is
+                // used that allows one to integrate arbitrary functions
+                // against a singular weight on the reference cell.
+                //
+                // The correct quadrature formula is selected by the
+                // get_singular_quadrature function, which is explained in
+                // detail below.
+                Assert(singular_index != numbers::invalid_unsigned_int,
+                       ExcInternalError());
+
+                const Quadrature<dim-1> & singular_quadrature =
+                  get_singular_quadrature(cell, singular_index);
+
+                FEValues<dim-1,dim> fe_v_singular (mapping, fe, singular_quadrature,
+                                                   update_jacobians |
+                                                   update_values |
+                                                   update_cell_normal_vectors |
+                                                   update_quadrature_points );
+
+                fe_v_singular.reinit(cell);
+
+                std::vector<Vector<double> > singular_cell_wind( singular_quadrature.size(),
+                                                                 Vector<double>(dim) );
+
+                const std::vector<Tensor<1,dim> > &singular_normals  = fe_v_singular.get_all_normal_vectors();
+                const std::vector<Point<dim> >    &singular_q_points = fe_v_singular.get_quadrature_points();
+
+                wind.vector_value_list(singular_q_points, singular_cell_wind);
+
+                for (unsigned int q=0; q<singular_quadrature.size(); ++q)
+                  {
+                    const Tensor<1,dim> R = singular_q_points[q] - support_points[i];
+                    double normal_wind = 0;
+                    for (unsigned int d=0; d<dim; ++d)
+                      normal_wind += (singular_cell_wind[q](d)*
+                                      singular_normals[q][d]);
+
+                    system_rhs(i) += ( LaplaceKernel::single_layer(R) *
+                                       normal_wind                         *
+                                       fe_v_singular.JxW(q) );
+
+                    for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+                      {
+                        local_matrix_row_i(j) -= (( LaplaceKernel::double_layer(R) *
+                                                    singular_normals[q])                *
+                                                  fe_v_singular.shape_value(j,q)        *
+                                                  fe_v_singular.JxW(q)       );
+                      }
+                  }
+              }
+
+            // Finally, we need to add the contributions of the current cell
+            // to the global matrix.
+            for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+              system_matrix(i,local_dof_indices[j])
+              += local_matrix_row_i(j);
+          }
+      }
+
+    // The second part of the integral operator is the term
+    // $\alpha(\mathbf{x}_i) \phi_j(\mathbf{x}_i)$. Since we use a collocation
+    // scheme, $\phi_j(\mathbf{x}_i)=\delta_{ij}$ and the corresponding matrix
+    // is a diagonal one with entries equal to $\alpha(\mathbf{x}_i)$.
+
+    // One quick way to compute this diagonal matrix of the solid angles, is
+    // to use the Neumann matrix itself. It is enough to multiply the matrix
+    // with a vector of elements all equal to -1, to get the diagonal matrix
+    // of the alpha angles, or solid angles (see the formula in the
+    // introduction for this). The result is then added back onto the system
+    // matrix object to yield the final form of the matrix:
+    Vector<double> ones(dh.n_dofs());
+    ones.add(-1.);
+
+    system_matrix.vmult(alpha, ones);
+    alpha.add(1);
+    for (unsigned int i = 0; i<dh.n_dofs(); ++i)
+      system_matrix(i,i) +=  alpha(i);
+  }
+
+
+  // @sect4{BEMProblem::solve_system}
+
+  // The next function simply solves the linear system.
+  template <int dim>
+  void BEMProblem<dim>::solve_system()
+  {
+    SolverGMRES<Vector<double> > solver (solver_control);
+    solver.solve (system_matrix, phi, system_rhs, PreconditionIdentity());
+  }
+
+
+  // @sect4{BEMProblem::compute_errors}
+
+  // The computation of the errors is exactly the same in all other example
+  // programs, and we won't comment too much. Notice how the same methods that
+  // are used in the finite element methods can be used here.
+  template <int dim>
+  void BEMProblem<dim>::compute_errors(const unsigned int cycle)
+  {
+    Vector<float> difference_per_cell (tria.n_active_cells());
+    VectorTools::integrate_difference (mapping, dh, phi,
+                                       exact_solution,
+                                       difference_per_cell,
+                                       QGauss<(dim-1)>(2*fe.degree+1),
+                                       VectorTools::L2_norm);
+    const double L2_error = difference_per_cell.l2_norm();
+
+
+    // The error in the alpha vector can be computed directly using the
+    // Vector::linfty_norm() function, since on each node, the value should be
+    // $\frac 12$. All errors are then output and appended to our
+    // ConvergenceTable object for later computation of convergence rates:
+    Vector<double> difference_per_node(alpha);
+    difference_per_node.add(-.5);
+
+    const double alpha_error = difference_per_node.linfty_norm();
+    const unsigned int n_active_cells=tria.n_active_cells();
+    const unsigned int n_dofs=dh.n_dofs();
+
+    deallog << "Cycle " << cycle << ':'
+            << std::endl
+            << "   Number of active cells:       "
+            << n_active_cells
+            << std::endl
+            << "   Number of degrees of freedom: "
+            << n_dofs
+            << std::endl;
+
+    convergence_table.add_value("cycle", cycle);
+    convergence_table.add_value("cells", n_active_cells);
+    convergence_table.add_value("dofs", n_dofs);
+    convergence_table.add_value("L2(phi)", L2_error);
+    convergence_table.add_value("Linfty(alpha)", alpha_error);
+  }
+
+
+  // Singular integration requires a careful selection of the quadrature
+  // rules. In particular the deal.II library provides quadrature rules which
+  // are tailored for logarithmic singularities (QGaussLog, QGaussLogR), as
+  // well as for 1/R singularities (QGaussOneOverR).
+  //
+  // Singular integration is typically obtained by constructing weighted
+  // quadrature formulas with singular weights, so that it is possible to
+  // write
+  //
+  // \f[ \int_K f(x) s(x) dx = \sum_{i=1}^N w_i f(q_i) \f]
+  //
+  // where $s(x)$ is a given singularity, and the weights and quadrature
+  // points $w_i,q_i$ are carefully selected to make the formula above an
+  // equality for a certain class of functions $f(x)$.
+  //
+  // In all the finite element examples we have seen so far, the weight of the
+  // quadrature itself (namely, the function $s(x)$), was always constantly
+  // equal to 1.  For singular integration, we have two choices: we can use
+  // the definition above, factoring out the singularity from the integrand
+  // (i.e., integrating $f(x)$ with the special quadrature rule), or we can
+  // ask the quadrature rule to "normalize" the weights $w_i$ with $s(q_i)$:
+  //
+  // \f[ \int_K f(x) s(x) dx = \int_K g(x) dx = \sum_{i=1}^N
+  //   \frac{w_i}{s(q_i)} g(q_i) \f]
+  //
+  // We use this second option, through the @p factor_out_singularity
+  // parameter of both QGaussLogR and QGaussOneOverR.
+  //
+  // These integrals are somewhat delicate, especially in two dimensions, due
+  // to the transformation from the real to the reference cell, where the
+  // variable of integration is scaled with the determinant of the
+  // transformation.
+  //
+  // In two dimensions this process does not result only in a factor appearing
+  // as a constant factor on the entire integral, but also on an additional
+  // integral altogether that needs to be evaluated:
+  //
+  // \f[ \int_0^1 f(x)\ln(x/\alpha) dx = \int_0^1 f(x)\ln(x) dx - \int_0^1
+  //  f(x) \ln(\alpha) dx.  \f]
+  //
+  // This process is taken care of by the constructor of the QGaussLogR class,
+  // which adds additional quadrature points and weights to take into
+  // consideration also the second part of the integral.
+  //
+  // A similar reasoning should be done in the three dimensional case, since
+  // the singular quadrature is tailored on the inverse of the radius $r$ in
+  // the reference cell, while our singular function lives in real space,
+  // however in the three dimensional case everything is simpler because the
+  // singularity scales linearly with the determinant of the
+  // transformation. This allows us to build the singular two dimensional
+  // quadrature rules only once and, reuse them over all cells.
+  //
+  // In the one dimensional singular integration this is not possible, since
+  // we need to know the scaling parameter for the quadrature, which is not
+  // known a priori. Here, the quadrature rule itself depends also on the size
+  // of the current cell. For this reason, it is necessary to create a new
+  // quadrature for each singular integration.
+  //
+  // The different quadrature rules are built inside the
+  // get_singular_quadrature, which is specialized for dim=2 and dim=3, and
+  // they are retrieved inside the assemble_system function. The index given
+  // as an argument is the index of the unit support point where the
+  // singularity is located.
+
+  template<>
+  const Quadrature<2> &BEMProblem<3>::get_singular_quadrature(
+    const DoFHandler<2,3>::active_cell_iterator &,
+    const unsigned int index) const
+  {
+    Assert(index < fe.dofs_per_cell,
+           ExcIndexRange(0, fe.dofs_per_cell, index));
+
+    static std::vector<QGaussOneOverR<2> > quadratures;
+    if (quadratures.size() == 0)
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        quadratures.push_back(QGaussOneOverR<2>(singular_quadrature_order,
+                                                fe.get_unit_support_points()[i],
+                                                true));
+    return quadratures[index];
+  }
+
+
+  template<>
+  const Quadrature<1> &BEMProblem<2>::get_singular_quadrature(
+    const DoFHandler<1,2>::active_cell_iterator &cell,
+    const unsigned int index) const
+  {
+    Assert(index < fe.dofs_per_cell,
+           ExcIndexRange(0, fe.dofs_per_cell, index));
+
+    static Quadrature<1> *q_pointer = NULL;
+    if (q_pointer) delete q_pointer;
+
+    q_pointer = new QGaussLogR<1>(singular_quadrature_order,
+                                  fe.get_unit_support_points()[index],
+                                  1./cell->measure(), true);
+    return (*q_pointer);
+  }
+
+
+
+  // @sect4{BEMProblem::compute_exterior_solution}
+
+  // We'd like to also know something about the value of the potential $\phi$
+  // in the exterior domain: after all our motivation to consider the boundary
+  // integral problem was that we wanted to know the velocity in the exterior
+  // domain!
+  //
+  // To this end, let us assume here that the boundary element domain is
+  // contained in the box $[-2,2]^{\text{dim}}$, and we extrapolate the actual
+  // solution inside this box using the convolution with the fundamental
+  // solution. The formula for this is given in the introduction.
+  //
+  // The reconstruction of the solution in the entire space is done on a
+  // continuous finite element grid of dimension dim. These are the usual
+  // ones, and we don't comment any further on them. At the end of the
+  // function, we output this exterior solution in, again, much the usual way.
+  template <int dim>
+  void BEMProblem<dim>::compute_exterior_solution()
+  {
+    Triangulation<dim>  external_tria;
+    GridGenerator::hyper_cube(external_tria, -2, 2);
+
+    FE_Q<dim>           external_fe(1);
+    DoFHandler<dim>     external_dh (external_tria);
+    Vector<double>      external_phi;
+
+    external_tria.refine_global(external_refinement);
+    external_dh.distribute_dofs(external_fe);
+    external_phi.reinit(external_dh.n_dofs());
+
+    typename DoFHandler<dim-1,dim>::active_cell_iterator
+    cell = dh.begin_active(),
+    endc = dh.end();
+
+
+    FEValues<dim-1,dim> fe_v(mapping, fe, *quadrature,
+                             update_values |
+                             update_cell_normal_vectors |
+                             update_quadrature_points |
+                             update_JxW_values);
+
+    const unsigned int n_q_points = fe_v.n_quadrature_points;
+
+    std::vector<types::global_dof_index> dofs(fe.dofs_per_cell);
+
+    std::vector<double> local_phi(n_q_points);
+    std::vector<double> normal_wind(n_q_points);
+    std::vector<Vector<double> > local_wind(n_q_points, Vector<double>(dim) );
+
+    std::vector<Point<dim> > external_support_points(external_dh.n_dofs());
+    DoFTools::map_dofs_to_support_points<dim>(StaticMappingQ1<dim>::mapping,
+                                              external_dh, external_support_points);
+
+    for (cell = dh.begin_active(); cell != endc; ++cell)
+      {
+        fe_v.reinit(cell);
+
+        const std::vector<Point<dim> >    &q_points = fe_v.get_quadrature_points();
+        const std::vector<Tensor<1,dim> > &normals  = fe_v.get_all_normal_vectors();
+
+        cell->get_dof_indices(dofs);
+        fe_v.get_function_values(phi, local_phi);
+
+        wind.vector_value_list(q_points, local_wind);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            normal_wind[q] = 0;
+            for (unsigned int d=0; d<dim; ++d)
+              normal_wind[q] += normals[q][d]*local_wind[q](d);
+          }
+
+        for (unsigned int i=0; i<external_dh.n_dofs(); ++i)
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+
+              const Tensor<1,dim> R = q_points[q] - external_support_points[i];
+
+              external_phi(i) += ( ( LaplaceKernel::single_layer(R) *
+                                     normal_wind[q]
+                                     +
+                                     (LaplaceKernel::double_layer(R) *
+                                      normals[q] )            *
+                                     local_phi[q] )           *
+                                   fe_v.JxW(q) );
+            }
+      }
+
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler(external_dh);
+    data_out.add_data_vector(external_phi, "external_phi");
+    data_out.build_patches();
+
+    const std::string
+    filename = Utilities::int_to_string(dim) + "d_external.vtk";
+    std::ofstream file(filename.c_str());
+
+    data_out.write_vtk(file);
+  }
+
+
+  // @sect4{BEMProblem::output_results}
+
+  // Outputting the results of our computations is a rather mechanical
+  // tasks. All the components of this function have been discussed before.
+  template <int dim>
+  void BEMProblem<dim>::output_results(const unsigned int cycle)
+  {
+    DataOut<dim-1, DoFHandler<dim-1, dim> > dataout;
+
+    dataout.attach_dof_handler(dh);
+    dataout.add_data_vector(phi, "phi",
+                            DataOut<dim-1, DoFHandler<dim-1, dim> >::type_dof_data);
+    dataout.add_data_vector(alpha, "alpha",
+                            DataOut<dim-1, DoFHandler<dim-1, dim> >::type_dof_data);
+    dataout.build_patches(mapping,
+                          mapping.get_degree(),
+                          DataOut<dim-1, DoFHandler<dim-1, dim> >::curved_inner_cells);
+
+    std::string filename = ( Utilities::int_to_string(dim) +
+                             "d_boundary_solution_" +
+                             Utilities::int_to_string(cycle) +
+                             ".vtk" );
+    std::ofstream file(filename.c_str());
+
+    dataout.write_vtk(file);
+
+    if (cycle == n_cycles-1)
+      {
+        convergence_table.set_precision("L2(phi)", 3);
+        convergence_table.set_precision("Linfty(alpha)", 3);
+
+        convergence_table.set_scientific("L2(phi)", true);
+        convergence_table.set_scientific("Linfty(alpha)", true);
+
+        convergence_table
+        .evaluate_convergence_rates("L2(phi)", ConvergenceTable::reduction_rate_log2);
+        convergence_table
+        .evaluate_convergence_rates("Linfty(alpha)", ConvergenceTable::reduction_rate_log2);
+        deallog << std::endl;
+        convergence_table.write_text(std::cout);
+      }
+  }
+
+
+  // @sect4{BEMProblem::run}
+
+  // This is the main function. It should be self explanatory in its
+  // briefness:
+  template <int dim>
+  void BEMProblem<dim>::run()
+  {
+
+    read_parameters("parameters.prm");
+
+    if (run_in_this_dimension == false)
+      {
+        deallog << "Run in dimension " << dim
+                << " explicitly disabled in parameter file. "
+                << std::endl;
+        return;
+      }
+
+    read_domain();
+
+    for (unsigned int cycle=0; cycle<n_cycles; ++cycle)
+      {
+        refine_and_resize();
+        assemble_system();
+        solve_system();
+        compute_errors(cycle);
+        output_results(cycle);
+      }
+
+    if (extend_solution == true)
+      compute_exterior_solution();
+  }
+}
+
+
+// @sect3{The main() function}
+
+// This is the main function of this program. It is exactly like all previous
+// tutorial programs:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step34;
+
+      const unsigned int degree = 1;
+      const unsigned int mapping_degree = 1;
+
+      deallog.depth_console (3);
+      BEMProblem<2> laplace_problem_2d(degree, mapping_degree);
+      laplace_problem_2d.run();
+
+      BEMProblem<3> laplace_problem_3d(degree, mapping_degree);
+      laplace_problem_3d.run();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-35/CMakeLists.txt b/examples/step-35/CMakeLists.txt
new file mode 100644
index 0000000..ae97f17
--- /dev/null
+++ b/examples/step-35/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-35 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-35")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_UMFPACK)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_UMFPACK = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-35/doc/builds-on b/examples/step-35/doc/builds-on
new file mode 100644
index 0000000..80dd13d
--- /dev/null
+++ b/examples/step-35/doc/builds-on
@@ -0,0 +1 @@
+step-22
diff --git a/examples/step-35/doc/intro.dox b/examples/step-35/doc/intro.dox
new file mode 100644
index 0000000..6a05992
--- /dev/null
+++ b/examples/step-35/doc/intro.dox
@@ -0,0 +1,236 @@
+<br>
+
+<i>
+This program grew out of a student project by Abner Salgado at Texas A&M
+University. Most of the work for this program is by him.
+</i>
+
+<a name="Intro"></a>
+<h1> Introduction </h1>
+
+<a name="Motivation"></a>
+<h3> Motivation </h3>
+The purpose of this program is to show how to effectively solve the incompressible time-dependent
+Navier-Stokes equations. These equations describe the flow of a viscous incompressible fluid and read
+ at f{align*}
+  u_t + u \cdot \nabla u - \nu \Delta u + \nabla p = f, \\
+  \nabla \cdot u = 0,
+ at f}
+where $u$ represents the velocity of the flow and $p$ the pressure. This system of equations is supplemented by
+the initial condition
+ at f[
+  u |_{t=0} = u_0,
+ at f]
+with $u_0$ sufficiently smooth and solenoidal, and suitable boundary conditions. For instance, an admissible boundary
+condition, is
+ at f[
+  u|_{\partial\Omega} = u_b.
+ at f]
+It is possible to prescribe other boundary conditions as well. In the test case that we solve here the boundary
+is partitioned into two disjoint subsets $\partial\Omega = \Gamma_1 \cup \Gamma_2$ and we have
+ at f[
+  u|_{\Gamma_1} = u_b,
+ at f]
+and
+ at f[
+ u\times n|_{\Gamma_2} = 0, \quad p|_{\Gamma_2} = 0
+ at f]
+where $n$ is the outer unit normal. The boundary conditions on $\Gamma_2$ are often
+used to model outflow conditions.
+
+In previous tutorial programs (see for instance step-20 and
+step-22) we have seen
+how to solve the time-independent Stokes equations using a Schur complement approach. For the
+time-dependent case, after time discretization, we would arrive at a system like
+ at f{align*}
+  \frac1\tau u^k - \nu \Delta u^k + \nabla p^k = F^k, \\
+  \nabla \cdot u^k = 0,
+ at f}
+where $\tau$ is the time-step. Although the structure of this system is similar to the Stokes system and thus
+it could be solved using a Schur complement approach, it turns out that the condition number of the
+Schur complement is proportional to $\tau^{-2}$. This makes the system very
+difficult to solve, and means that for the Navier-Stokes equations, this is
+not a useful avenue to the solution.
+
+<a name="Projection"></a>
+<h3> Projection methods </h3>
+
+Rather, we need to come up with a different approach to solve the time-dependent Navier-Stokes
+equations. The difficulty in their solution comes from the fact that the velocity and the pressure are coupled
+through the constraint
+ at f[
+  \nabla \cdot u = 0,
+ at f]
+for which the pressure is the Lagrange multiplier.
+Projection methods aim at decoupling this constraint from the diffusion (Laplace) operator.
+
+Let us shortly describe how the projection methods look like in a semi-discrete setting. The objective is to
+obtain a sequence of velocities $\{u^k\}$ and pressures $\{p^k\}$. We will
+also obtain a sequence $\{\phi^k\}$ of auxiliary variables.
+Suppose that from the initial conditions, and an application of a first order method we have found
+$(u^0,p^0,\phi^0=0)$ and $(u^1,p^1,\phi^1=p^1-p^0)$. Then the projection method consists of the following steps:
+<ul>
+  <li> <b>Step 0</b>: Extrapolation. Define:
+  @f[
+    u^\star = 2u^k - u^{k-1}, \quad p^\sharp = p^k + \frac43 \phi^k - \frac13 \phi^{k-1}.
+  @f]
+  <li> <b>Step 1</b>: Diffusion step. We find $u^{k+1}$ that solves the single
+  linear equation
+  @f[
+    \frac1{2\tau}\left( 3u^{k+1} - 4u^k + u^{k-1} \right)
+    + u^\star \cdot\nabla u^{k+1} + \frac12 \left( \nabla \cdot u^\star \right) u^{k+1}
+    -\nu \Delta u^{k+1} + \nabla p^\sharp
+    = f^{k+1},
+    \quad
+    u^{k+1}|_{\Gamma_1} = u_b,
+    \quad
+    u^{k+1} \times n|_{\Gamma_2} = 0.
+  @f]
+
+  <li> <b>Step 2</b>: Projection. Find $\phi^{k+1}$ that solves
+  @f[
+    \Delta \phi^{k+1} = \frac3{2\tau} \nabla \cdot u^{k+1},
+    \quad
+    \partial_n \phi^{k+1}|_{\Gamma_1} = 0,
+    \quad
+    \phi^{k+1}|_{\Gamma_2} = 0
+  @f]
+  <li> <b>Step 3</b>: Pressure correction. Here we have two options:
+    <ul>
+      <li> <i>Incremental Method in Standard Form</i>. The pressure is updated by:
+      @f[
+        p^{k+1} = p^k + \phi^{k+1}.
+      @f]
+      <li> <i>Incremental Method in Rotational Form</i>. In this case
+      @f[
+        p^{k+1} = p^k + \phi^{k+1} - \nu \nabla \cdot u^{k+1}.
+      @f]
+    </ul>
+</ul>
+
+Without going into details, which can be found in the corresponding literature, let us remark a few things about the
+projection methods that we have just described:
+<ul>
+  <li> The advection term $u\cdot\nabla u$ is replaced by its <i>skew symmetric form</i>
+  @f[
+    u \cdot \nabla u + \frac12 \left( \nabla\cdot u \right) u.
+  @f]
+  This is consistent with the continuous equation (because $\nabla\cdot u = 0$,
+  though this is not true pointwise for the discrete solution) and it is needed to
+  guarantee unconditional stability of the
+  time-stepping scheme. Moreover, to linearize the term we use the second order extrapolation $u^\star$ of
+  $u^{k+1}$.
+  <li> The projection step is a realization of the Helmholtz decomposition
+  @f[
+    L^2(\Omega)^d = H \oplus \nabla H^1_{\Gamma_2}(\Omega),
+  @f]
+  where
+  @f[
+    H = \left\{ v \in L^2(\Omega)^d:\  \nabla\cdot v =0, \  v\cdot n|_{\Gamma_1} = 0 \right\},
+  @f]
+  and
+  @f[
+    H^1_{\Gamma_2}(\Omega) = \left\{ q \in H^1(\Omega):\ q|_{\Gamma_2} = 0 \right\}.
+  @f]
+  Indeed, if we use this decomposition on $u^{k+1}$ we obtain
+  @f[
+    u^{k+1} = v^{k+1} + \nabla \left( \frac{2\tau}{3}  \phi^{k+1} \right),
+  @f]
+  with $v^{k+1}\in H$. Taking the divergence of this equation we arrive at the projection equation.
+  <li> The more accurate of the two variants outlined above is the rotational
+  one. However, the program below implements both variants. Moreover, in the author's experience,
+  the standard form is the one that should be used if, for instance, the viscosity $\nu$ is variable.
+</ul>
+
+
+<a name ="fullydiscrete"></a>
+<h3> The Fully Discrete Setting </h3>
+To obtain a fully discrete setting of the method we, as always, need a variational formulation. There is one
+subtle issue here given the nature of the boundary conditions. When we multiply the equation by a suitable test
+function one of the term that arises is
+ at f[
+  -\nu \int_\Omega \Delta u \cdot v.
+ at f]
+If we, say, had Dirichlet boundary conditions on the whole boundary then after integration by parts we would
+obtain
+ at f[
+  -\nu \int_\Omega \Delta u \cdot v = \nu \int_\Omega \nabla u : \nabla v
+                                    - \int_{\partial\Omega} \partial_n u \cdot v
+                                    = \nu \int_\Omega \nabla u : \nabla v.
+ at f]
+One of the advantages of this formulation is that it fully decouples the components of the velocity. Moreover,
+they all share the same system matrix. This can be exploited in the program.
+
+However, given the nonstandard boundary conditions, to be able to take them into account we need to use
+the following %identity
+ at f[
+  \Delta u = \nabla\nabla\cdot u - \nabla\times\nabla\times u,
+ at f]
+so that when we integrate by parts and take into account the boundary conditions we obtain
+ at f[
+  -\nu \int_\Omega \Delta u \cdot v = \nu \int_\Omega \left[ \nabla \cdot u \nabla \cdot v
+                                    + \nabla \times u \nabla \times v \right],
+ at f]
+which is the form that we would have to use. Notice that this couples the components of the velocity.
+Moreover, to enforce the boundary condition on the pressure, we need to rewrite
+ at f[
+  \int_\Omega \nabla p \cdot v = -\int_\Omega p \nabla \cdot v + \int_{\Gamma_1} p v\cdot n
+                                + \int_{\Gamma_2} p v\cdot n
+                               = -\int_\Omega p \nabla \cdot v,
+ at f]
+where the boundary integral in $\Gamma_1$ equals zero given the boundary conditions for the velocity,
+and the one in $\Gamma_2$ given the boundary conditions for the pressure.
+
+In the simplified case where the boundary $\Gamma_2$ is %parallel to a coordinate axis, which holds for
+the testcase that we carry out below, it can actually be shown that
+ at f[
+  \nu \int_\Omega \nabla u : \nabla v = \nu \int_\Omega \left[ \nabla \cdot u \nabla \cdot v
+                                    + \nabla \times u \nabla \times v \right].
+ at f]
+This issue is not very often addressed in the literature. For more information the reader can consult, for
+instance,
+<ul>
+  <li> J.-L. GUERMOND, L. QUARTAPELLE, On the approximation of the unsteady Navier-Stokes equations by
+  finite element projection methods, Numer. Math., 80  (1998) 207-238
+  <li> J.-L. GUERMOND, P. MINEV, J. SHEN, Error analysis of pressure-correction schemes for the
+  Navier-Stokes equations with open boundary conditions, SIAM J. Numer. Anal., 43  1 (2005) 239--258.
+</ul>
+
+
+
+<a name = "implementation"></a>
+<h3> Implementation </h3>
+
+Our implementation of the projection methods follows <i>verbatim</i> the description given above. We must note,
+however, that as opposed to most other problems that have several solution components, we do not use
+vector-valued finite elements. Instead, we use separate finite elements the components of the velocity
+and the pressure, respectively, and use different <code>DoFHandler</code>'s for those as well. The main
+reason for doing this is that, as we see from the description of the scheme, the <code>dim</code> components
+of the velocity and the pressure are decoupled. As a consequence, the equations for all the velocity components
+look all the same, have the same system matrix, and can be solved in %parallel. Obviously, this approach
+has also its disadvantages. For instance, we need to keep several <code>DoFHandler</code>s and iterators
+synchronized when assembling matrices and right hand sides; obtaining quantities that are inherent to
+vector-valued functions (e.g. divergences) becomes a little awkward, and others.
+
+<a name ="testcase"></a>
+<h3> The Testcase </h3>
+
+The testcase that we use for this program consists of the flow around a square obstacle. The geometry is
+as follows:
+
+<img src="http://www.dealii.org/images/steps/developer/step-35.geometry.png" alt="">
+
+with $H=4.1$, making the geometry slightly non-symmetric.
+
+We impose no-slip boundary conditions on both the top and bottom walls and the obstacle. On the left side we
+have the inflow boundary condition
+ at f[
+  u =
+  \left( \begin{array}{c} 4 U_m y (H-y)/H^2 \\ 0 \end{array} \right),
+ at f]
+with $U_m = 1.5$, i.e. the inflow boundary conditions correspond to Poiseuille flow for this configuration.
+Finally, on the right vertical wall we impose the condition that the vertical component of the velocity
+and the pressure should both be zero.
+The final time $T=10$.
+
+
diff --git a/examples/step-35/doc/kind b/examples/step-35/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-35/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-35/doc/results.dox b/examples/step-35/doc/results.dox
new file mode 100644
index 0000000..c7da3a3
--- /dev/null
+++ b/examples/step-35/doc/results.dox
@@ -0,0 +1,176 @@
+<a name="results"></a>
+<h1>Results</h1>
+
+<a name="Re100"></a>
+<h3> Re = 100 </h3>
+
+We run the code with the following <code>parameter-file.prm</code>, which can be found in the
+same directory as the source:
+ at verbatim
+  # First a global definition
+  # the type of method we want to use
+  set Method_Form = rotational
+
+  subsection Physical data
+    # In this subsection we declare the physical data
+    # The initial and final time, and the Reynolds number
+    set initial_time = 0.
+    set final_time   = 25.
+    set Reynolds     = 100
+  end
+
+  subsection Time step data
+    # In this subsection we declare the data that is to be used for time discretization,
+    # i.e. the time step dt
+    set dt = 5e-3
+  end
+
+  subsection Space discretization
+    # In this subsection we declare the data that is relevant to the space discretization
+    # we set the number of global refines the triangulation must have
+    # and the degree k of the pair Q_(k+1)--Q_k of velocity--pressure finite element spaces
+    set n_of_refines = 3
+    set pressure_fe_degree = 1
+  end
+
+  subsection Data solve velocity
+    # In this section we declare the parameters that are going to control the solution process
+    # for the velocity.
+    set max_iterations = 1000  # maximal number of iterations that GMRES must make
+    set eps            = 1e-6  # stopping criterion
+    set Krylov_size    = 30    # size of the Krylov subspace to be used in GMRES
+    set off_diagonals  = 60    # number of off diagonals that ILU must compute
+    set diag_strength  = 0.01  # diagonal strengthening value
+    set update_prec    = 10    # this number indicates how often the preconditioner must be updated
+  end
+
+  #The output frequency
+  set output = 50
+
+  #Finally we set the verbosity level
+  set verbose = false
+ at endverbatim
+
+Since the <code>verbose</code> parameter is set to <code>false</code>,
+we do not get any kind of output besides the number of the time step
+the program is currently working on.
+If we we were to set it to <code>true</code> we would get information on what the program is doing and
+how many steps each iterative process had to make to converge, etc.
+
+Let us plot the obtained results for $t=1,5,12,20,25$ (i.e. time steps
+200, 1000, 2400, 4000, and 5000), where in the left column we show the
+vorticity and in the right the velocity field:
+
+<table>
+  <tr>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.vorticity.0.png" alt=""> </td>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.velocity.0.png" alt=""> </td>
+  </tr>
+  <tr>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.vorticity.1.png" alt=""> </td>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.velocity.1.png" alt=""> </td>
+  </tr>
+  <tr>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.vorticity.2.png" alt=""> </td>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.velocity.2.png" alt=""> </td>
+  </tr>
+  <tr>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.vorticity.3.png" alt=""> </td>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.velocity.3.png" alt=""> </td>
+  </tr>
+  <tr>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.vorticity.4.png" alt=""> </td>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_100.velocity.4.png" alt=""> </td>
+  </tr>
+</table>
+
+The images show nicely the development and extension of a vortex chain
+behind the obstacles, with the sign of the vorticity indicating
+whether this is a left or right turning vortex.
+
+
+<a name="Re500"></a>
+<h3> Re = 500 </h3>
+
+We can change the Reynolds number, $Re$, in the parameter file to a
+value of $500$. Doing so, and reducing the time step somewhat as well,
+yields the following images at times $t=20,40$:
+
+<table>
+  <tr>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_500.vorticity.0.png" alt=""> </td>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_500.velocity.0.png" alt=""> </td>
+  </tr>
+  <tr>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_500.vorticity.1.png" alt=""> </td>
+    <td> <img src="http://www.dealii.org/images/steps/developer/step-35.Re_500.velocity.1.png" alt=""> </td>
+  </tr>
+</table>
+
+What does look right is that with the higher Reynolds number, the flow
+is much more unorganized, swaying between the bottom and top boundary
+of the domain. On the other hand, the vorticity can't be right: it
+oscillates rapidly, not something we would expect for a Reynolds
+number that is still only moderate and that would not produce
+small scale dynamics. The riddle is easily solved,
+however, by looking at a zoom at the region behind the obstacle, and
+the mesh size we have there:
+
+<img src="http://www.dealii.org/images/steps/developer/step-35.Re_500.zoom.png" alt="">
+
+It is clear here that the flow is still reasonably well resolved in
+the immediate wake of the obstacle, where the mesh is relatively fine,
+but that the problem appears where the mesh becomes too coarse. That
+shouldn't surprise us: if we increase the Reynolds number, the scales
+on which we expect flow variation decrease and we will have to have a
+finer mesh.
+
+A rerun with one more mesh refinement set in the parameter file
+confirms that results look much more sensible now:
+
+<img src="http://www.dealii.org/images/steps/developer/step-35.Re_500.zoom_2.png" alt="">
+
+Here, the zone where the mesh becomes coarser is immediately
+downstream of the big red blob at the bottom of the wake behind the
+obstacle. While there are still quite visible oscillations beyond that
+point, it is clear that mesh refinement has already significantly
+helped, and we can expect that further refining the mesh will suppress
+the remaining oscillations as well.
+
+
+<a name="extensions"></a>
+<h3> Possible Extensions </h3>
+
+This program can be extended in the following directions:
+<ul>
+  <li> Adaptive mesh refinement: As we have seen, we computed everything on a single fixed mesh.
+  Using adaptive mesh refinement can lead to increased accuracy while not significantly increasing the
+  computational time.
+
+  <li> Adaptive time-stepping: Although there apparently is currently no theory about
+  projection methods with variable time step,
+  practice shows that they perform very well.
+
+  <li> High Reynolds %numbers: As we can see from the results, increasing the Reynolds number changes significantly
+  the behavior of the flow. Using well-known stabilization techniques we could be able to
+  compute the flow in this, or many other problems, when the Reynolds number is very large.
+
+  <li> Variable density incompressible flows: There are projection-like methods for the case of incompressible
+  flows with variable density. Such flows play a role if fluids of different
+  density mix, for example fresh water and salt water, or alcohol and water.
+
+  <li> Compressible Navier-Stokes equations: These equations are relevant for
+  cases where
+  velocities are high enough so that the fluid becomes compressible, but not
+  fast enough that we get into a regime where viscosity becomes negligible
+  and the Navier-Stokes equations need to be replaced by the hyperbolic Euler
+  equations of gas dynamics. Compressibility starts to become a factor if the
+  velocity becomes greater than about one third of the speed of sound, so it
+  is not a factor for almost all terrestrial vehicles. On the other hand,
+  commercial jetliners fly at about 85 per cent of the speed of sound, and
+  flow over the wings becomes significantly supersonic, a regime in which the
+  compressible Navier-Stokes equations are not applicable any more
+  either. There are significant applications for the range in between,
+  however, such as for small aircraft or the fast trains in many European and
+  East Asian countries.
+</ul>
diff --git a/examples/step-35/doc/tooltip b/examples/step-35/doc/tooltip
new file mode 100644
index 0000000..0cdaefd
--- /dev/null
+++ b/examples/step-35/doc/tooltip
@@ -0,0 +1 @@
+A projection solver for the Navier-Stokes equations.
diff --git a/examples/step-35/nsbench2.inp b/examples/step-35/nsbench2.inp
new file mode 100644
index 0000000..80c46df
--- /dev/null
+++ b/examples/step-35/nsbench2.inp
@@ -0,0 +1,187 @@
+82 104 0 0 0
+ 0 01.75 02.25 0.00
+ 1 01.1 01.1 0.00
+ 2 02.0 01.0 0.00
+ 3 03.0 0.0 0.00
+ 4 02.0 0.0 0.00
+ 5 01.0 02.0 0.00
+ 6 00 03.075 0.00
+ 7 00 02.050 0.00
+ 8 01.0 00 0.00
+ 9 0.0 01.025 0.00
+ 10 0.0 0.0 0.00
+ 11 03.0 02.0 0.00
+ 12 01.1 02.9 0.00
+ 13 02.0 03.0 0.00
+ 14 02.9 02.9 0.00
+ 15 04.0 03.0 0.00
+ 16 06.0 02.0 0.00
+ 17 04.0 02.0 0.00
+ 18 06.0 01.0 0.00
+ 19 08.0 01.0 0.00
+ 20 10.00 0.0 0.00
+ 21 08.0 0.0 0.00
+ 22 02.9 01.1 0.00
+ 23 04.0 01.0 0.00
+ 24 06.0 0.0 0.00
+ 25 04.0 0.0 0.00
+ 26 10.00 02.0 0.00
+ 27 06.0 03.0 0.00
+ 28 08.0 03.0 0.00
+ 29 08.0 02.0 0.00
+ 30 10.00 03.0 0.00
+ 31 12.0 03.0 0.00
+ 32 14.0 02.0 0.00
+ 33 12.0 02.0 0.00
+ 34 14.0 01.0 0.00
+ 35 10.0 01.0 0.00
+ 36 12.0 01.0 0.00
+ 37 14.0 0.0 0.00
+ 38 12.0 0.0 0.00
+ 39 14.0 03.0 0.00
+ 40 03.0 04.1 0.00
+ 41 02.0 04.1 0.00
+ 42 01.0 04.1 0.00
+ 43 00. 04.1 0.00
+ 44 10.0 04.1 0.00
+ 45 08.0 04.1 0.00
+ 46 06.0 04.1 0.00
+ 47 04.0 04.1 0.00
+ 48 14.0 04.1 0.00
+ 49 12.0 04.1 0.00
+ 50 01.5 02.0 0.00
+ 51 01.75 01.75 0.00
+ 52 02.0 01.5 0.00
+ 53 02.25 01.75 0.00
+ 54 02.5 02.0 0.00
+ 55 02.25 02.25 0.00
+ 56 02.0 02.5 0.00
+57 16.2 0 0.0
+58 16.2 01. 0.0
+59 16.2 02. 0.0
+60 16.2 03. 0.0
+61 16.2 04.1 0.0
+62 18.4 0 0.0
+63 18.4 01. 0.0
+64 18.4 02. 0.0
+65 18.4 03. 0.0
+66 18.4 04.1 0.0
+67 20.6 0 0.0
+68 20.6 01. 0.0
+69 20.6 02. 0.0
+70 20.6 03. 0.0
+71 20.6 04.1 0.0
+72 22.8 0 0.0
+73 22.8 01. 0.0
+74 22.8 02. 0.0
+75 22.8 03. 0.0
+76 22.8 04.1 0.0
+77 25. 0 0.0
+78 25. 01. 0.0
+79 25. 02. 0.0
+80 25. 03. 0.0
+81 25. 04.1 0.0
+ 1 1 quad 10 8 1 9
+ 2 1 quad 8 4 2 1
+ 3 1 quad 9 1 5 7
+ 4 1 quad 4 3 22 2
+ 5 1 quad 3 25 23 22
+ 6 1 quad 22 23 17 11
+ 7 1 quad 11 17 15 14
+ 8 1 quad 14 15 47 40
+ 9 1 quad 13 14 40 41
+ 10 1 quad 7 5 12 6
+ 11 1 quad 12 13 41 42
+ 12 1 quad 6 12 42 43
+ 13 1 quad 25 24 18 23
+ 14 1 quad 24 21 19 18
+ 15 1 quad 18 19 29 16
+ 16 1 quad 23 18 16 17
+ 17 1 quad 21 20 35 19
+ 18 1 quad 20 38 36 35
+ 19 1 quad 35 36 33 26
+ 20 1 quad 19 35 26 29
+ 21 1 quad 29 26 30 28
+ 22 1 quad 26 33 31 30
+ 23 1 quad 30 31 49 44
+ 24 1 quad 28 30 44 45
+ 25 1 quad 17 16 27 15
+ 26 1 quad 16 29 28 27
+ 27 1 quad 27 28 45 46
+ 28 1 quad 15 27 46 47
+ 29 1 quad 38 37 34 36
+ 30 1 quad 36 34 32 33
+ 31 1 quad 33 32 39 31
+ 32 1 quad 31 39 48 49
+ 33 1 quad 1 51 50 5
+ 34 1 quad 2 52 51 1
+ 35 1 quad 22 53 52 2
+ 36 1 quad 11 54 53 22
+ 37 1 quad 14 55 54 11
+ 38 1 quad 13 56 55 14
+ 39 1 quad 12 0 56 13
+ 40 1 quad 5 50 0 12
+41 1 quad 57 58 34 37
+42 1 quad 58 59 32 34
+43 1 quad 59 60 39 32
+44 1 quad 60 61 48 39
+45 1 quad 62 63 58 57
+46 1 quad 63 64 59 58
+47 1 quad 64 65 60 59
+48 1 quad 65 66 61 60
+49 1 quad 67 68 63 62
+50 1 quad 68 69 64 63
+51 1 quad 69 70 65 64
+52 1 quad 70 71 66 65
+53 1 quad 72 73 68 67
+54 1 quad 73 74 69 68
+55 1 quad 74 75 70 69
+56 1 quad 75 76 71 70
+57 1 quad 77 78 73 72
+58 1 quad 78 79 74 73
+59 1 quad 79 80 75 74
+60 1 quad 80 81 76 75
+ 41  1 line 10 8
+ 42  2 line 10 9
+ 43  1 line 8 4
+ 44  2 line 9 7
+ 45  1 line 4 3
+ 46  1 line 3 25
+ 47  1 line 40 47
+ 48  1 line 41 40
+ 49  2 line 7 6
+ 50  1 line 42 41
+ 51  1 line 43 42
+ 52  2 line 6 43
+ 53  1 line 25 24
+ 54  1 line 24 21
+ 55  1 line 21 20
+ 56  1 line 20 38
+ 57  1 line 44 49
+ 58  1 line 45 44
+ 59  1 line 46 45
+ 60  1 line 47 46
+ 61  1 line 38 37
+ 62  3 line 77 78
+ 63  3 line 78 79
+ 64  3 line 79 80
+ 65  3 line 80 81
+ 66  1 line 49 48
+ 67  4 line 51 50
+ 68  4 line 52 51
+ 69  4 line 53 52
+ 70  4 line 54 53
+ 71  4 line 55 54
+ 72  4 line 56 55
+ 73  4 line 0  56
+ 74  4 line 50 0
+75 1 line 37 57
+76 1 line 48 61
+77 1 line 57 62
+78 1 line 61 66
+79 1 line 62 67
+80 1 line 66 71
+81 1 line 67 72
+82 1 line 71 76
+83 1 line 72 77
+84 1 line 76 81
diff --git a/examples/step-35/parameter-file.prm b/examples/step-35/parameter-file.prm
new file mode 100644
index 0000000..81a946e
--- /dev/null
+++ b/examples/step-35/parameter-file.prm
@@ -0,0 +1,42 @@
+# First a global definition
+# the type of method we want to use
+set Method_Form = rotational
+
+subsection Physical data
+  # In this subsection we declare the physical data
+  # The initial and final time, and the Reynolds number
+  set initial_time = 0.
+  set final_time   = 10.
+  set Reynolds     = 100
+end
+
+subsection Time step data
+  # In this subsection we declare the data that is to be used for time discretization,
+  # i.e. the time step dt
+  set dt = 5e-3
+end
+
+subsection Space discretization
+  # In this subsection we declare the data that is relevant to the space discretization
+  # we set the number of global refines the triangulation must have
+  # and the degree k of the pair Q_(k+1)--Q_k of velocity--pressure finite element spaces
+  set n_of_refines = 3
+  set pressure_fe_degree = 1
+end
+
+subsection Data solve velocity
+  # In this section we declare the parameters that are going to control the solution process
+  # for the velocity.
+  set max_iterations = 1000  # maximal number of iterations that GMRES must make
+  set eps            = 1e-8  # stopping criterion
+  set Krylov_size    = 30    # size of the Krylov subspace to be used in GMRES
+  set off_diagonals  = 70    # number of off diagonals that ILU must compute
+  set diag_strength  = 0.1  # diagonal strengthening value
+  set update_prec    = 10    # this number indicates how often the preconditioner must be updated
+end
+
+#The output frequency
+set output_interval = 50
+
+#Finally we set the verbosity level
+set verbose = false
diff --git a/examples/step-35/step-35.cc b/examples/step-35/step-35.cc
new file mode 100644
index 0000000..f15cb5d
--- /dev/null
+++ b/examples/step-35/step-35.cc
@@ -0,0 +1,1455 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2009 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Abner Salgado, Texas A&M University 2009
+ */
+
+
+// @sect3{Include files}
+
+// We start by including all the necessary deal.II header files and some C++
+// related ones. Each one of them has been discussed in previous tutorial
+// programs, so we will not get into details here.
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/parallel.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/conditional_ostream.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/sparse_ilu.h>
+#include <deal.II/lac/sparse_direct.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/grid_in.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_renumbering.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe_system.h>
+
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+#include <cmath>
+#include <iostream>
+
+// Finally this is as in all previous programs:
+namespace Step35
+{
+  using namespace dealii;
+
+
+
+  // @sect3{Run time parameters}
+  //
+  // Since our method has several parameters that can be fine-tuned we put
+  // them into an external file, so that they can be determined at run-time.
+  //
+  // This includes, in particular, the formulation of the equation for the
+  // auxiliary variable $\phi$, for which we declare an <code>enum</code>.
+  // Next, we declare a class that is going to read and store all the
+  // parameters that our program needs to run.
+  namespace RunTimeParameters
+  {
+    enum MethodFormulation
+    {
+      METHOD_STANDARD,
+      METHOD_ROTATIONAL
+    };
+
+    class Data_Storage
+    {
+    public:
+      Data_Storage();
+      ~Data_Storage();
+      void read_data (const char *filename);
+      MethodFormulation form;
+      double initial_time,
+             final_time,
+             Reynolds;
+      double dt;
+      unsigned int n_global_refines,
+               pressure_degree;
+      unsigned int vel_max_iterations,
+               vel_Krylov_size,
+               vel_off_diagonals,
+               vel_update_prec;
+      double vel_eps,
+             vel_diag_strength;
+      bool verbose;
+      unsigned int output_interval;
+    protected:
+      ParameterHandler prm;
+    };
+
+    // In the constructor of this class we declare all the parameters. The
+    // details of how this works have been discussed elsewhere, for example in
+    // step-19 and step-29.
+    Data_Storage::Data_Storage()
+    {
+      prm.declare_entry ("Method_Form", "rotational",
+                         Patterns::Selection ("rotational|standard"),
+                         " Used to select the type of method that we are going "
+                         "to use. ");
+      prm.enter_subsection ("Physical data");
+      {
+        prm.declare_entry ("initial_time", "0.",
+                           Patterns::Double (0.),
+                           " The initial time of the simulation. ");
+        prm.declare_entry ("final_time", "1.",
+                           Patterns::Double (0.),
+                           " The final time of the simulation. ");
+        prm.declare_entry ("Reynolds", "1.",
+                           Patterns::Double (0.),
+                           " The Reynolds number. ");
+      }
+      prm.leave_subsection();
+
+      prm.enter_subsection ("Time step data");
+      {
+        prm.declare_entry ("dt", "5e-4",
+                           Patterns::Double (0.),
+                           " The time step size. ");
+      }
+      prm.leave_subsection();
+
+      prm.enter_subsection ("Space discretization");
+      {
+        prm.declare_entry ("n_of_refines", "0",
+                           Patterns::Integer (0, 15),
+                           " The number of global refines we do on the mesh. ");
+        prm.declare_entry ("pressure_fe_degree", "1",
+                           Patterns::Integer (1, 5),
+                           " The polynomial degree for the pressure space. ");
+      }
+      prm.leave_subsection();
+
+      prm.enter_subsection ("Data solve velocity");
+      {
+        prm.declare_entry ("max_iterations", "1000",
+                           Patterns::Integer (1, 1000),
+                           " The maximal number of iterations GMRES must make. ");
+        prm.declare_entry ("eps", "1e-12",
+                           Patterns::Double (0.),
+                           " The stopping criterion. ");
+        prm.declare_entry ("Krylov_size", "30",
+                           Patterns::Integer(1),
+                           " The size of the Krylov subspace to be used. ");
+        prm.declare_entry ("off_diagonals", "60",
+                           Patterns::Integer(0),
+                           " The number of off-diagonal elements ILU must "
+                           "compute. ");
+        prm.declare_entry ("diag_strength", "0.01",
+                           Patterns::Double (0.),
+                           " Diagonal strengthening coefficient. ");
+        prm.declare_entry ("update_prec", "15",
+                           Patterns::Integer(1),
+                           " This number indicates how often we need to "
+                           "update the preconditioner");
+      }
+      prm.leave_subsection();
+
+      prm.declare_entry ("verbose", "true",
+                         Patterns::Bool(),
+                         " This indicates whether the output of the solution "
+                         "process should be verbose. ");
+
+      prm.declare_entry ("output_interval", "1",
+                         Patterns::Integer(1),
+                         " This indicates between how many time steps we print "
+                         "the solution. ");
+    }
+
+
+
+    Data_Storage::~Data_Storage()
+    {}
+
+
+
+    void Data_Storage::read_data (const char *filename)
+    {
+      std::ifstream file (filename);
+      AssertThrow (file, ExcFileNotOpen (filename));
+
+      prm.read_input (file);
+
+      if (prm.get ("Method_Form") == std::string ("rotational"))
+        form = METHOD_ROTATIONAL;
+      else
+        form = METHOD_STANDARD;
+
+      prm.enter_subsection ("Physical data");
+      {
+        initial_time = prm.get_double ("initial_time");
+        final_time   = prm.get_double ("final_time");
+        Reynolds     = prm.get_double ("Reynolds");
+      }
+      prm.leave_subsection();
+
+      prm.enter_subsection ("Time step data");
+      {
+        dt = prm.get_double ("dt");
+      }
+      prm.leave_subsection();
+
+      prm.enter_subsection ("Space discretization");
+      {
+        n_global_refines = prm.get_integer ("n_of_refines");
+        pressure_degree     = prm.get_integer ("pressure_fe_degree");
+      }
+      prm.leave_subsection();
+
+      prm.enter_subsection ("Data solve velocity");
+      {
+        vel_max_iterations = prm.get_integer ("max_iterations");
+        vel_eps            = prm.get_double ("eps");
+        vel_Krylov_size    = prm.get_integer ("Krylov_size");
+        vel_off_diagonals  = prm.get_integer ("off_diagonals");
+        vel_diag_strength  = prm.get_double ("diag_strength");
+        vel_update_prec    = prm.get_integer ("update_prec");
+      }
+      prm.leave_subsection();
+
+      verbose = prm.get_bool ("verbose");
+
+      output_interval = prm.get_integer ("output_interval");
+    }
+  }
+
+
+
+  // @sect3{Equation data}
+
+  // In the next namespace, we declare the initial and boundary conditions:
+  namespace EquationData
+  {
+    // As we have chosen a completely decoupled formulation, we will not take
+    // advantage of deal.II's capabilities to handle vector valued
+    // problems. We do, however, want to use an interface for the equation
+    // data that is somehow dimension independent. To be able to do that, our
+    // functions should be able to know on which spatial component we are
+    // currently working, and we should be able to have a common interface to
+    // do that. The following class is an attempt in that direction.
+    template <int dim>
+    class MultiComponentFunction: public Function<dim>
+    {
+    public:
+      MultiComponentFunction (const double initial_time = 0.);
+      void set_component (const unsigned int d);
+    protected:
+      unsigned int comp;
+    };
+
+    template <int dim>
+    MultiComponentFunction<dim>::
+    MultiComponentFunction (const double initial_time)
+      :
+      Function<dim> (1, initial_time), comp(0)
+    {}
+
+
+    template <int dim>
+    void MultiComponentFunction<dim>::set_component(const unsigned int d)
+    {
+      Assert (d<dim, ExcIndexRange (d, 0, dim));
+      comp = d;
+    }
+
+
+    // With this class defined, we declare classes that describe the boundary
+    // conditions for velocity and pressure:
+    template <int dim>
+    class Velocity : public MultiComponentFunction<dim>
+    {
+    public:
+      Velocity (const double initial_time = 0.0);
+
+      virtual double value (const Point<dim> &p,
+                            const unsigned int component = 0) const;
+
+      virtual void value_list (const std::vector< Point<dim> > &points,
+                               std::vector<double> &values,
+                               const unsigned int component = 0) const;
+    };
+
+
+    template <int dim>
+    Velocity<dim>::Velocity (const double initial_time)
+      :
+      MultiComponentFunction<dim> (initial_time)
+    {}
+
+
+    template <int dim>
+    void Velocity<dim>::value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<double> &values,
+                                    const unsigned int) const
+    {
+      const unsigned int n_points = points.size();
+      Assert (values.size() == n_points,
+              ExcDimensionMismatch (values.size(), n_points));
+      for (unsigned int i=0; i<n_points; ++i)
+        values[i] = Velocity<dim>::value (points[i]);
+    }
+
+
+    template <int dim>
+    double Velocity<dim>::value (const Point<dim> &p,
+                                 const unsigned int) const
+    {
+      if (this->comp == 0)
+        {
+          const double Um = 1.5;
+          const double H  = 4.1;
+          return 4.*Um*p(1)*(H - p(1))/(H*H);
+        }
+      else
+        return 0.;
+    }
+
+
+
+    template <int dim>
+    class Pressure: public Function<dim>
+    {
+    public:
+      Pressure (const double initial_time = 0.0);
+
+      virtual double value (const Point<dim> &p,
+                            const unsigned int component = 0) const;
+
+      virtual void value_list (const std::vector< Point<dim> > &points,
+                               std::vector<double> &values,
+                               const unsigned int component = 0) const;
+    };
+
+    template <int dim>
+    Pressure<dim>::Pressure (const double initial_time)
+      :
+      Function<dim> (1, initial_time)
+    {}
+
+
+    template <int dim>
+    double Pressure<dim>::value (const Point<dim> &p,
+                                 const unsigned int) const
+    {
+      return 25.-p(0);
+    }
+
+    template <int dim>
+    void Pressure<dim>::value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<double> &values,
+                                    const unsigned int) const
+    {
+      const unsigned int n_points = points.size();
+      Assert (values.size() == n_points, ExcDimensionMismatch (values.size(), n_points));
+      for (unsigned int i=0; i<n_points; ++i)
+        values[i] = Pressure<dim>::value (points[i]);
+    }
+  }
+
+
+
+  // @sect3{The <code>NavierStokesProjection</code> class}
+
+  // Now for the main class of the program. It implements the various versions
+  // of the projection method for Navier-Stokes equations.  The names for all
+  // the methods and member variables should be self-explanatory, taking into
+  // account the implementation details given in the introduction.
+  template <int dim>
+  class NavierStokesProjection
+  {
+  public:
+    NavierStokesProjection (const RunTimeParameters::Data_Storage &data);
+
+    void run (const bool         verbose    = false,
+              const unsigned int n_plots = 10);
+  protected:
+    RunTimeParameters::MethodFormulation type;
+
+    const unsigned int deg;
+    const double       dt;
+    const double       t_0, T, Re;
+
+    EquationData::Velocity<dim>       vel_exact;
+    std::map<types::global_dof_index, double>    boundary_values;
+    std::vector<types::boundary_id> boundary_ids;
+
+    Triangulation<dim> triangulation;
+
+    FE_Q<dim>          fe_velocity;
+    FE_Q<dim>          fe_pressure;
+
+    DoFHandler<dim>    dof_handler_velocity;
+    DoFHandler<dim>    dof_handler_pressure;
+
+    QGauss<dim>        quadrature_pressure;
+    QGauss<dim>        quadrature_velocity;
+
+    SparsityPattern    sparsity_pattern_velocity;
+    SparsityPattern    sparsity_pattern_pressure;
+    SparsityPattern    sparsity_pattern_pres_vel;
+
+    SparseMatrix<double> vel_Laplace_plus_Mass;
+    SparseMatrix<double> vel_it_matrix[dim];
+    SparseMatrix<double> vel_Mass;
+    SparseMatrix<double> vel_Laplace;
+    SparseMatrix<double> vel_Advection;
+    SparseMatrix<double> pres_Laplace;
+    SparseMatrix<double> pres_Mass;
+    SparseMatrix<double> pres_Diff[dim];
+    SparseMatrix<double> pres_iterative;
+
+    Vector<double> pres_n;
+    Vector<double> pres_n_minus_1;
+    Vector<double> phi_n;
+    Vector<double> phi_n_minus_1;
+    Vector<double> u_n[dim];
+    Vector<double> u_n_minus_1[dim];
+    Vector<double> u_star[dim];
+    Vector<double> force[dim];
+    Vector<double> v_tmp;
+    Vector<double> pres_tmp;
+    Vector<double> rot_u;
+
+    SparseILU<double> prec_velocity[dim];
+    SparseILU<double> prec_pres_Laplace;
+    SparseDirectUMFPACK prec_mass;
+    SparseDirectUMFPACK prec_vel_mass;
+
+    DeclException2 (ExcInvalidTimeStep,
+                    double, double,
+                    << " The time step " << arg1 << " is out of range."
+                    << std::endl
+                    << " The permitted range is (0," << arg2 << "]");
+
+    void create_triangulation_and_dofs (const unsigned int n_refines);
+
+    void initialize();
+
+    void interpolate_velocity ();
+
+    void diffusion_step (const bool reinit_prec);
+
+    void projection_step (const bool reinit_prec);
+
+    void update_pressure (const bool reinit_prec);
+
+  private:
+    unsigned int vel_max_its;
+    unsigned int vel_Krylov_size;
+    unsigned int vel_off_diagonals;
+    unsigned int vel_update_prec;
+    double       vel_eps;
+    double       vel_diag_strength;
+
+    void initialize_velocity_matrices();
+
+    void initialize_pressure_matrices();
+
+    // The next few structures and functions are for doing various things in
+    // parallel. They follow the scheme laid out in @ref threads, using the
+    // WorkStream class. As explained there, this requires us to declare two
+    // structures for each of the assemblers, a per-task data and a scratch
+    // data structure. These are then handed over to functions that assemble
+    // local contributions and that copy these local contributions to the
+    // global objects.
+    //
+    // One of the things that are specific to this program is that we don't
+    // just have a single DoFHandler object that represents both the
+    // velocities and the pressure, but we use individual DoFHandler objects
+    // for these two kinds of variables. We pay for this optimization when we
+    // want to assemble terms that involve both variables, such as the
+    // divergence of the velocity and the gradient of the pressure, times the
+    // respective test functions. When doing so, we can't just anymore use a
+    // single FEValues object, but rather we need two, and they need to be
+    // initialized with cell iterators that point to the same cell in the
+    // triangulation but different DoFHandlers.
+    //
+    // To do this in practice, we declare a "synchronous" iterator -- an
+    // object that internally consists of several (in our case two) iterators,
+    // and each time the synchronous iteration is moved up one step, each of
+    // the iterators stored internally is moved up one step as well, thereby
+    // always staying in sync. As it so happens, there is a deal.II class that
+    // facilitates this sort of thing.
+    typedef std_cxx11::tuple< typename DoFHandler<dim>::active_cell_iterator,
+            typename DoFHandler<dim>::active_cell_iterator
+            > IteratorTuple;
+
+    typedef SynchronousIterators<IteratorTuple> IteratorPair;
+
+    void initialize_gradient_operator();
+
+    struct InitGradPerTaskData
+    {
+      unsigned int              d;
+      unsigned int              vel_dpc;
+      unsigned int              pres_dpc;
+      FullMatrix<double>        local_grad;
+      std::vector<types::global_dof_index> vel_local_dof_indices;
+      std::vector<types::global_dof_index> pres_local_dof_indices;
+
+      InitGradPerTaskData (const unsigned int dd,
+                           const unsigned int vdpc,
+                           const unsigned int pdpc)
+        :
+        d(dd),
+        vel_dpc (vdpc),
+        pres_dpc (pdpc),
+        local_grad (vdpc, pdpc),
+        vel_local_dof_indices (vdpc),
+        pres_local_dof_indices (pdpc)
+      {}
+    };
+
+    struct InitGradScratchData
+    {
+      unsigned int  nqp;
+      FEValues<dim> fe_val_vel;
+      FEValues<dim> fe_val_pres;
+      InitGradScratchData (const FE_Q<dim> &fe_v,
+                           const FE_Q<dim> &fe_p,
+                           const QGauss<dim> &quad,
+                           const UpdateFlags flags_v,
+                           const UpdateFlags flags_p)
+        :
+        nqp (quad.size()),
+        fe_val_vel (fe_v, quad, flags_v),
+        fe_val_pres (fe_p, quad, flags_p)
+      {}
+      InitGradScratchData (const InitGradScratchData &data)
+        :
+        nqp (data.nqp),
+        fe_val_vel (data.fe_val_vel.get_fe(),
+                    data.fe_val_vel.get_quadrature(),
+                    data.fe_val_vel.get_update_flags()),
+        fe_val_pres (data.fe_val_pres.get_fe(),
+                     data.fe_val_pres.get_quadrature(),
+                     data.fe_val_pres.get_update_flags())
+      {}
+    };
+
+    void assemble_one_cell_of_gradient (const IteratorPair  &SI,
+                                        InitGradScratchData &scratch,
+                                        InitGradPerTaskData &data);
+
+    void copy_gradient_local_to_global (const InitGradPerTaskData &data);
+
+    // The same general layout also applies to the following classes and
+    // functions implementing the assembly of the advection term:
+    void assemble_advection_term();
+
+    struct AdvectionPerTaskData
+    {
+      FullMatrix<double>        local_advection;
+      std::vector<types::global_dof_index> local_dof_indices;
+      AdvectionPerTaskData (const unsigned int dpc)
+        :
+        local_advection (dpc, dpc),
+        local_dof_indices (dpc)
+      {}
+    };
+
+    struct AdvectionScratchData
+    {
+      unsigned int                 nqp;
+      unsigned int                 dpc;
+      std::vector< Point<dim> >    u_star_local;
+      std::vector< Tensor<1,dim> > grad_u_star;
+      std::vector<double>          u_star_tmp;
+      FEValues<dim>                fe_val;
+      AdvectionScratchData (const FE_Q<dim> &fe,
+                            const QGauss<dim> &quad,
+                            const UpdateFlags flags)
+        :
+        nqp (quad.size()),
+        dpc (fe.dofs_per_cell),
+        u_star_local (nqp),
+        grad_u_star (nqp),
+        u_star_tmp (nqp),
+        fe_val (fe, quad, flags)
+      {}
+
+      AdvectionScratchData (const AdvectionScratchData &data)
+        :
+        nqp (data.nqp),
+        dpc (data.dpc),
+        u_star_local (nqp),
+        grad_u_star (nqp),
+        u_star_tmp (nqp),
+        fe_val (data.fe_val.get_fe(),
+                data.fe_val.get_quadrature(),
+                data.fe_val.get_update_flags())
+      {}
+    };
+
+    void assemble_one_cell_of_advection (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                         AdvectionScratchData &scratch,
+                                         AdvectionPerTaskData &data);
+
+    void copy_advection_local_to_global (const AdvectionPerTaskData &data);
+
+    // The final few functions implement the diffusion solve as well as
+    // postprocessing the output, including computing the curl of the
+    // velocity:
+    void diffusion_component_solve (const unsigned int d);
+
+    void output_results (const unsigned int step);
+
+    void assemble_vorticity (const bool reinit_prec);
+  };
+
+
+
+  // @sect4{ <code>NavierStokesProjection::NavierStokesProjection</code> }
+
+  // In the constructor, we just read all the data from the
+  // <code>Data_Storage</code> object that is passed as an argument, verify
+  // that the data we read is reasonable and, finally, create the
+  // triangulation and load the initial data.
+  template <int dim>
+  NavierStokesProjection<dim>::NavierStokesProjection(const RunTimeParameters::Data_Storage &data)
+    :
+    type (data.form),
+    deg (data.pressure_degree),
+    dt (data.dt),
+    t_0 (data.initial_time),
+    T (data.final_time),
+    Re (data.Reynolds),
+    vel_exact (data.initial_time),
+    fe_velocity (deg+1),
+    fe_pressure (deg),
+    dof_handler_velocity (triangulation),
+    dof_handler_pressure (triangulation),
+    quadrature_pressure (deg+1),
+    quadrature_velocity (deg+2),
+    vel_max_its (data.vel_max_iterations),
+    vel_Krylov_size (data.vel_Krylov_size),
+    vel_off_diagonals (data.vel_off_diagonals),
+    vel_update_prec (data.vel_update_prec),
+    vel_eps (data.vel_eps),
+    vel_diag_strength (data.vel_diag_strength)
+  {
+    if (deg < 1)
+      std::cout << " WARNING: The chosen pair of finite element spaces is not stable."
+                << std::endl
+                << " The obtained results will be nonsense"
+                << std::endl;
+
+    AssertThrow (!  ( (dt <= 0.) || (dt > .5*T)), ExcInvalidTimeStep (dt, .5*T));
+
+    create_triangulation_and_dofs (data.n_global_refines);
+    initialize();
+  }
+
+
+  // @sect4{ <code>NavierStokesProjection::create_triangulation_and_dofs</code> }
+
+  // The method that creates the triangulation and refines it the needed
+  // number of times.  After creating the triangulation, it creates the mesh
+  // dependent data, i.e. it distributes degrees of freedom and renumbers
+  // them, and initializes the matrices and vectors that we will use.
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::
+  create_triangulation_and_dofs (const unsigned int n_refines)
+  {
+    GridIn<dim> grid_in;
+    grid_in.attach_triangulation (triangulation);
+
+    {
+      std::string filename = "nsbench2.inp";
+      std::ifstream file (filename.c_str());
+      Assert (file, ExcFileNotOpen (filename.c_str()));
+      grid_in.read_ucd (file);
+    }
+
+    std::cout << "Number of refines = " << n_refines
+              << std::endl;
+    triangulation.refine_global (n_refines);
+    std::cout << "Number of active cells: " << triangulation.n_active_cells()
+              << std::endl;
+
+    boundary_ids = triangulation.get_boundary_ids();
+
+    dof_handler_velocity.distribute_dofs (fe_velocity);
+    DoFRenumbering::boost::Cuthill_McKee (dof_handler_velocity);
+    dof_handler_pressure.distribute_dofs (fe_pressure);
+    DoFRenumbering::boost::Cuthill_McKee (dof_handler_pressure);
+
+    initialize_velocity_matrices();
+    initialize_pressure_matrices();
+    initialize_gradient_operator();
+
+    pres_n.reinit (dof_handler_pressure.n_dofs());
+    pres_n_minus_1.reinit (dof_handler_pressure.n_dofs());
+    phi_n.reinit (dof_handler_pressure.n_dofs());
+    phi_n_minus_1.reinit (dof_handler_pressure.n_dofs());
+    pres_tmp.reinit (dof_handler_pressure.n_dofs());
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        u_n[d].reinit (dof_handler_velocity.n_dofs());
+        u_n_minus_1[d].reinit (dof_handler_velocity.n_dofs());
+        u_star[d].reinit (dof_handler_velocity.n_dofs());
+        force[d].reinit (dof_handler_velocity.n_dofs());
+      }
+    v_tmp.reinit (dof_handler_velocity.n_dofs());
+    rot_u.reinit (dof_handler_velocity.n_dofs());
+
+    std::cout << "dim (X_h) = " << (dof_handler_velocity.n_dofs()*dim)
+              << std::endl
+              << "dim (M_h) = " << dof_handler_pressure.n_dofs()
+              << std::endl
+              << "Re        = " << Re
+              << std::endl
+              << std::endl;
+  }
+
+
+  // @sect4{ <code>NavierStokesProjection::initialize</code> }
+
+  // This method creates the constant matrices and loads the initial data
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::initialize()
+  {
+    vel_Laplace_plus_Mass = 0.;
+    vel_Laplace_plus_Mass.add (1./Re, vel_Laplace);
+    vel_Laplace_plus_Mass.add (1.5/dt, vel_Mass);
+
+    EquationData::Pressure<dim> pres (t_0);
+    VectorTools::interpolate (dof_handler_pressure, pres, pres_n_minus_1);
+    pres.advance_time (dt);
+    VectorTools::interpolate (dof_handler_pressure, pres, pres_n);
+    phi_n = 0.;
+    phi_n_minus_1 = 0.;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        vel_exact.set_time (t_0);
+        vel_exact.set_component(d);
+        VectorTools::interpolate (dof_handler_velocity, ZeroFunction<dim>(), u_n_minus_1[d]);
+        vel_exact.advance_time (dt);
+        VectorTools::interpolate (dof_handler_velocity, ZeroFunction<dim>(), u_n[d]);
+      }
+  }
+
+
+  // @sect4{ The <code>NavierStokesProjection::initialize_*_matrices</code> methods }
+
+  // In this set of methods we initialize the sparsity patterns, the
+  // constraints (if any) and assemble the matrices that do not depend on the
+  // timestep <code>dt</code>. Note that for the Laplace and mass matrices, we
+  // can use functions in the library that do this. Because the expensive
+  // operations of this function -- creating the two matrices -- are entirely
+  // independent, we could in principle mark them as tasks that can be worked
+  // on in %parallel using the Threads::new_task functions. We won't do that
+  // here since these functions internally already are parallelized, and in
+  // particular because the current function is only called once per program
+  // run and so does not incur a cost in each time step. The necessary
+  // modifications would be quite straightforward, however.
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::initialize_velocity_matrices()
+  {
+    {
+      DynamicSparsityPattern dsp(dof_handler_velocity.n_dofs(), dof_handler_velocity.n_dofs());
+      DoFTools::make_sparsity_pattern (dof_handler_velocity, dsp);
+      sparsity_pattern_velocity.copy_from (dsp);
+    }
+    vel_Laplace_plus_Mass.reinit (sparsity_pattern_velocity);
+    for (unsigned int d=0; d<dim; ++d)
+      vel_it_matrix[d].reinit (sparsity_pattern_velocity);
+    vel_Mass.reinit (sparsity_pattern_velocity);
+    vel_Laplace.reinit (sparsity_pattern_velocity);
+    vel_Advection.reinit (sparsity_pattern_velocity);
+
+    MatrixCreator::create_mass_matrix (dof_handler_velocity,
+                                       quadrature_velocity,
+                                       vel_Mass);
+    MatrixCreator::create_laplace_matrix (dof_handler_velocity,
+                                          quadrature_velocity,
+                                          vel_Laplace);
+  }
+
+  // The initialization of the matrices that act on the pressure space is
+  // similar to the ones that act on the velocity space.
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::initialize_pressure_matrices()
+  {
+    {
+      DynamicSparsityPattern dsp(dof_handler_pressure.n_dofs(), dof_handler_pressure.n_dofs());
+      DoFTools::make_sparsity_pattern (dof_handler_pressure, dsp);
+      sparsity_pattern_pressure.copy_from (dsp);
+    }
+
+    pres_Laplace.reinit (sparsity_pattern_pressure);
+    pres_iterative.reinit (sparsity_pattern_pressure);
+    pres_Mass.reinit (sparsity_pattern_pressure);
+
+    MatrixCreator::create_laplace_matrix (dof_handler_pressure,
+                                          quadrature_pressure,
+                                          pres_Laplace);
+    MatrixCreator::create_mass_matrix (dof_handler_pressure,
+                                       quadrature_pressure,
+                                       pres_Mass);
+  }
+
+
+  // For the gradient operator, we start by initializing the sparsity pattern
+  // and compressing it.  It is important to notice here that the gradient
+  // operator acts from the pressure space into the velocity space, so we have
+  // to deal with two different finite element spaces. To keep the loops
+  // synchronized, we use the <code>typedef</code>'s that we have defined
+  // before, namely <code>PairedIterators</code> and
+  // <code>IteratorPair</code>.
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::initialize_gradient_operator()
+  {
+    {
+      DynamicSparsityPattern dsp(dof_handler_velocity.n_dofs(), dof_handler_pressure.n_dofs());
+      DoFTools::make_sparsity_pattern (dof_handler_velocity, dof_handler_pressure, dsp);
+      sparsity_pattern_pres_vel.copy_from (dsp);
+    }
+
+    InitGradPerTaskData per_task_data (0, fe_velocity.dofs_per_cell,
+                                       fe_pressure.dofs_per_cell);
+    InitGradScratchData scratch_data (fe_velocity,
+                                      fe_pressure,
+                                      quadrature_velocity,
+                                      update_gradients | update_JxW_values,
+                                      update_values);
+
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        pres_Diff[d].reinit (sparsity_pattern_pres_vel);
+        per_task_data.d = d;
+        WorkStream::run (IteratorPair (IteratorTuple (dof_handler_velocity.begin_active(),
+                                                      dof_handler_pressure.begin_active()
+                                                     )
+                                      ),
+                         IteratorPair (IteratorTuple (dof_handler_velocity.end(),
+                                                      dof_handler_pressure.end()
+                                                     )
+                                      ),
+                         *this,
+                         &NavierStokesProjection<dim>::assemble_one_cell_of_gradient,
+                         &NavierStokesProjection<dim>::copy_gradient_local_to_global,
+                         scratch_data,
+                         per_task_data
+                        );
+      }
+  }
+
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::
+  assemble_one_cell_of_gradient (const IteratorPair  &SI,
+                                 InitGradScratchData &scratch,
+                                 InitGradPerTaskData &data)
+  {
+    scratch.fe_val_vel.reinit (std_cxx11::get<0> (SI.iterators));
+    scratch.fe_val_pres.reinit (std_cxx11::get<1> (SI.iterators));
+
+    std_cxx11::get<0> (SI.iterators)->get_dof_indices (data.vel_local_dof_indices);
+    std_cxx11::get<1> (SI.iterators)->get_dof_indices (data.pres_local_dof_indices);
+
+    data.local_grad = 0.;
+    for (unsigned int q=0; q<scratch.nqp; ++q)
+      {
+        for (unsigned int i=0; i<data.vel_dpc; ++i)
+          for (unsigned int j=0; j<data.pres_dpc; ++j)
+            data.local_grad (i, j) += -scratch.fe_val_vel.JxW(q) *
+                                      scratch.fe_val_vel.shape_grad (i, q)[data.d] *
+                                      scratch.fe_val_pres.shape_value (j, q);
+      }
+  }
+
+
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::
+  copy_gradient_local_to_global(const InitGradPerTaskData &data)
+  {
+    for (unsigned int i=0; i<data.vel_dpc; ++i)
+      for (unsigned int j=0; j<data.pres_dpc; ++j)
+        pres_Diff[data.d].add (data.vel_local_dof_indices[i], data.pres_local_dof_indices[j],
+                               data.local_grad (i, j) );
+  }
+
+
+  // @sect4{ <code>NavierStokesProjection::run</code> }
+
+  // This is the time marching function, which starting at <code>t_0</code>
+  // advances in time using the projection method with time step
+  // <code>dt</code> until <code>T</code>.
+  //
+  // Its second parameter, <code>verbose</code> indicates whether the function
+  // should output information what it is doing at any given moment: for
+  // example, it will say whether we are working on the diffusion, projection
+  // substep; updating preconditioners etc. Rather than implementing this
+  // output using code like
+  // @code
+  //   if (verbose) std::cout << "something";
+  // @endcode
+  // we use the ConditionalOStream class to do that for us. That
+  // class takes an output stream and a condition that indicates whether the
+  // things you pass to it should be passed through to the given output
+  // stream, or should just be ignored. This way, above code simply becomes
+  // @code
+  //   verbose_cout << "something";
+  // @endcode
+  // and does the right thing in either case.
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::run (const bool verbose,
+                                    const unsigned int output_interval)
+  {
+    ConditionalOStream verbose_cout (std::cout, verbose);
+
+    const unsigned int n_steps =  static_cast<unsigned int>((T - t_0)/dt);
+    vel_exact.set_time (2.*dt);
+    output_results(1);
+    for (unsigned int n = 2; n<=n_steps; ++n)
+      {
+        if (n % output_interval == 0)
+          {
+            verbose_cout << "Plotting Solution" << std::endl;
+            output_results(n);
+          }
+        std::cout << "Step = " << n << " Time = " << (n*dt) << std::endl;
+        verbose_cout << "  Interpolating the velocity " << std::endl;
+
+        interpolate_velocity();
+        verbose_cout << "  Diffusion Step" << std::endl;
+        if (n % vel_update_prec == 0)
+          verbose_cout << "    With reinitialization of the preconditioner"
+                       << std::endl;
+        diffusion_step ((n%vel_update_prec == 0) || (n == 2));
+        verbose_cout << "  Projection Step" << std::endl;
+        projection_step ( (n == 2));
+        verbose_cout << "  Updating the Pressure" << std::endl;
+        update_pressure ( (n == 2));
+        vel_exact.advance_time(dt);
+      }
+    output_results (n_steps);
+  }
+
+
+
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::interpolate_velocity()
+  {
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        u_star[d].equ (2., u_n[d]);
+        u_star[d] -=  u_n_minus_1[d];
+      }
+  }
+
+
+  // @sect4{<code>NavierStokesProjection::diffusion_step</code>}
+
+  // The implementation of a diffusion step. Note that the expensive operation
+  // is the diffusion solve at the end of the function, which we have to do
+  // once for each velocity component. To accelerate things a bit, we allow
+  // to do this in %parallel, using the Threads::new_task function which makes
+  // sure that the <code>dim</code> solves are all taken care of and are
+  // scheduled to available processors: if your machine has more than one
+  // processor core and no other parts of this program are using resources
+  // currently, then the diffusion solves will run in %parallel. On the other
+  // hand, if your system has only one processor core then running things in
+  // %parallel would be inefficient (since it leads, for example, to cache
+  // congestion) and things will be executed sequentially.
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::diffusion_step (const bool reinit_prec)
+  {
+    pres_tmp.equ (-1., pres_n);
+    pres_tmp.add (-4./3., phi_n, 1./3., phi_n_minus_1);
+
+    assemble_advection_term();
+
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        force[d] = 0.;
+        v_tmp.equ (2./dt,u_n[d]);
+        v_tmp.add (-.5/dt,u_n_minus_1[d]);
+        vel_Mass.vmult_add (force[d], v_tmp);
+
+        pres_Diff[d].vmult_add (force[d], pres_tmp);
+        u_n_minus_1[d] = u_n[d];
+
+        vel_it_matrix[d].copy_from (vel_Laplace_plus_Mass);
+        vel_it_matrix[d].add (1., vel_Advection);
+
+        vel_exact.set_component(d);
+        boundary_values.clear();
+        for (std::vector<types::boundary_id>::const_iterator
+             boundaries = boundary_ids.begin();
+             boundaries != boundary_ids.end();
+             ++boundaries)
+          {
+            switch (*boundaries)
+              {
+              case 1:
+                VectorTools::
+                interpolate_boundary_values (dof_handler_velocity,
+                                             *boundaries,
+                                             ZeroFunction<dim>(),
+                                             boundary_values);
+                break;
+              case 2:
+                VectorTools::
+                interpolate_boundary_values (dof_handler_velocity,
+                                             *boundaries,
+                                             vel_exact,
+                                             boundary_values);
+                break;
+              case 3:
+                if (d != 0)
+                  VectorTools::
+                  interpolate_boundary_values (dof_handler_velocity,
+                                               *boundaries,
+                                               ZeroFunction<dim>(),
+                                               boundary_values);
+                break;
+              case 4:
+                VectorTools::
+                interpolate_boundary_values (dof_handler_velocity,
+                                             *boundaries,
+                                             ZeroFunction<dim>(),
+                                             boundary_values);
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+        MatrixTools::apply_boundary_values (boundary_values,
+                                            vel_it_matrix[d],
+                                            u_n[d],
+                                            force[d]);
+      }
+
+
+    Threads::TaskGroup<void> tasks;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        if (reinit_prec)
+          prec_velocity[d].initialize (vel_it_matrix[d],
+                                       SparseILU<double>::
+                                       AdditionalData (vel_diag_strength,
+                                                       vel_off_diagonals));
+        tasks += Threads::new_task (&NavierStokesProjection<dim>::
+                                    diffusion_component_solve,
+                                    *this, d);
+      }
+    tasks.join_all();
+  }
+
+
+
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::diffusion_component_solve (const unsigned int d)
+  {
+    SolverControl solver_control (vel_max_its, vel_eps*force[d].l2_norm());
+    SolverGMRES<> gmres (solver_control,
+                         SolverGMRES<>::AdditionalData (vel_Krylov_size));
+    gmres.solve (vel_it_matrix[d], u_n[d], force[d], prec_velocity[d]);
+  }
+
+
+  // @sect4{ The <code>NavierStokesProjection::assemble_advection_term</code> method and related}
+
+  // The following few functions deal with assembling the advection terms,
+  // which is the part of the system matrix for the diffusion step that
+  // changes at every time step. As mentioned above, we will run the assembly
+  // loop over all cells in %parallel, using the WorkStream class and other
+  // facilities as described in the documentation module on @ref threads.
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::assemble_advection_term()
+  {
+    vel_Advection = 0.;
+    AdvectionPerTaskData data (fe_velocity.dofs_per_cell);
+    AdvectionScratchData scratch (fe_velocity, quadrature_velocity,
+                                  update_values |
+                                  update_JxW_values |
+                                  update_gradients);
+    WorkStream::run (dof_handler_velocity.begin_active(),
+                     dof_handler_velocity.end(), *this,
+                     &NavierStokesProjection<dim>::assemble_one_cell_of_advection,
+                     &NavierStokesProjection<dim>::copy_advection_local_to_global,
+                     scratch,
+                     data);
+  }
+
+
+
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::
+  assemble_one_cell_of_advection(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                 AdvectionScratchData &scratch,
+                                 AdvectionPerTaskData &data)
+  {
+    scratch.fe_val.reinit(cell);
+    cell->get_dof_indices (data.local_dof_indices);
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        scratch.fe_val.get_function_values (u_star[d], scratch.u_star_tmp);
+        for (unsigned int q=0; q<scratch.nqp; ++q)
+          scratch.u_star_local[q](d) = scratch.u_star_tmp[q];
+      }
+
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        scratch.fe_val.get_function_gradients (u_star[d], scratch.grad_u_star);
+        for (unsigned int q=0; q<scratch.nqp; ++q)
+          {
+            if (d==0)
+              scratch.u_star_tmp[q] = 0.;
+            scratch.u_star_tmp[q] += scratch.grad_u_star[q][d];
+          }
+      }
+
+    data.local_advection = 0.;
+    for (unsigned int q=0; q<scratch.nqp; ++q)
+      for (unsigned int i=0; i<scratch.dpc; ++i)
+        for (unsigned int j=0; j<scratch.dpc; ++j)
+          data.local_advection(i,j) += (scratch.u_star_local[q] *
+                                        scratch.fe_val.shape_grad (j, q) *
+                                        scratch.fe_val.shape_value (i, q)
+                                        +
+                                        0.5 *
+                                        scratch.u_star_tmp[q] *
+                                        scratch.fe_val.shape_value (i, q) *
+                                        scratch.fe_val.shape_value (j, q))
+                                       *
+                                       scratch.fe_val.JxW(q) ;
+  }
+
+
+
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::
+  copy_advection_local_to_global(const AdvectionPerTaskData &data)
+  {
+    for (unsigned int i=0; i<fe_velocity.dofs_per_cell; ++i)
+      for (unsigned int j=0; j<fe_velocity.dofs_per_cell; ++j)
+        vel_Advection.add (data.local_dof_indices[i],
+                           data.local_dof_indices[j],
+                           data.local_advection(i,j));
+  }
+
+
+
+  // @sect4{<code>NavierStokesProjection::projection_step</code>}
+
+  // This implements the projection step:
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::projection_step (const bool reinit_prec)
+  {
+    pres_iterative.copy_from (pres_Laplace);
+
+    pres_tmp = 0.;
+    for (unsigned d=0; d<dim; ++d)
+      pres_Diff[d].Tvmult_add (pres_tmp, u_n[d]);
+
+    phi_n_minus_1 = phi_n;
+
+    static std::map<types::global_dof_index, double> bval;
+    if (reinit_prec)
+      VectorTools::interpolate_boundary_values (dof_handler_pressure, 3,
+                                                ZeroFunction<dim>(), bval);
+
+    MatrixTools::apply_boundary_values (bval, pres_iterative, phi_n, pres_tmp);
+
+    if (reinit_prec)
+      prec_pres_Laplace.initialize(pres_iterative,
+                                   SparseILU<double>::AdditionalData (vel_diag_strength,
+                                       vel_off_diagonals) );
+
+    SolverControl solvercontrol (vel_max_its, vel_eps*pres_tmp.l2_norm());
+    SolverCG<> cg (solvercontrol);
+    cg.solve (pres_iterative, phi_n, pres_tmp, prec_pres_Laplace);
+
+    phi_n *= 1.5/dt;
+  }
+
+
+  // @sect4{ <code>NavierStokesProjection::update_pressure</code> }
+
+  // This is the pressure update step of the projection method. It implements
+  // the standard formulation of the method, that is @f[ p^{n+1} = p^n +
+  // \phi^{n+1}, @f] or the rotational form, which is @f[ p^{n+1} = p^n +
+  // \phi^{n+1} - \frac{1}{Re} \nabla\cdot u^{n+1}.  @f]
+  template <int dim>
+  void
+  NavierStokesProjection<dim>::update_pressure (const bool reinit_prec)
+  {
+    pres_n_minus_1 = pres_n;
+    switch (type)
+      {
+      case RunTimeParameters::METHOD_STANDARD:
+        pres_n += phi_n;
+        break;
+      case RunTimeParameters::METHOD_ROTATIONAL:
+        if (reinit_prec)
+          prec_mass.initialize (pres_Mass);
+        pres_n = pres_tmp;
+        prec_mass.solve (pres_n);
+        pres_n.sadd(1./Re, 1., pres_n_minus_1);
+        pres_n += phi_n;
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      };
+  }
+
+
+  // @sect4{ <code>NavierStokesProjection::output_results</code> }
+
+  // This method plots the current solution. The main difficulty is that we
+  // want to create a single output file that contains the data for all
+  // velocity components, the pressure, and also the vorticity of the flow. On
+  // the other hand, velocities and the pressure live on separate DoFHandler
+  // objects, and so can't be written to the same file using a single DataOut
+  // object. As a consequence, we have to work a bit harder to get the various
+  // pieces of data into a single DoFHandler object, and then use that to
+  // drive graphical output.
+  //
+  // We will not elaborate on this process here, but rather refer to step-32,
+  // where a similar procedure is used (and is documented) to
+  // create a joint DoFHandler object for all variables.
+  //
+  // Let us also note that we here compute the vorticity as a scalar quantity
+  // in a separate function, using the $L^2$ projection of the quantity
+  // $\text{curl} u$ onto the finite element space used for the components of
+  // the velocity. In principle, however, we could also have computed as a
+  // pointwise quantity from the velocity, and do so through the
+  // DataPostprocessor mechanism discussed in step-29 and step-33.
+  template <int dim>
+  void NavierStokesProjection<dim>::output_results (const unsigned int step)
+  {
+    assemble_vorticity ( (step == 1));
+    const FESystem<dim> joint_fe (fe_velocity, dim,
+                                  fe_pressure, 1,
+                                  fe_velocity, 1);
+    DoFHandler<dim> joint_dof_handler (triangulation);
+    joint_dof_handler.distribute_dofs (joint_fe);
+    Assert (joint_dof_handler.n_dofs() ==
+            ((dim + 1)*dof_handler_velocity.n_dofs() +
+             dof_handler_pressure.n_dofs()),
+            ExcInternalError());
+    static Vector<double> joint_solution (joint_dof_handler.n_dofs());
+    std::vector<types::global_dof_index> loc_joint_dof_indices (joint_fe.dofs_per_cell),
+        loc_vel_dof_indices (fe_velocity.dofs_per_cell),
+        loc_pres_dof_indices (fe_pressure.dofs_per_cell);
+    typename DoFHandler<dim>::active_cell_iterator
+    joint_cell = joint_dof_handler.begin_active(),
+    joint_endc = joint_dof_handler.end(),
+    vel_cell   = dof_handler_velocity.begin_active(),
+    pres_cell  = dof_handler_pressure.begin_active();
+    for (; joint_cell != joint_endc; ++joint_cell, ++vel_cell, ++pres_cell)
+      {
+        joint_cell->get_dof_indices (loc_joint_dof_indices);
+        vel_cell->get_dof_indices (loc_vel_dof_indices),
+                 pres_cell->get_dof_indices (loc_pres_dof_indices);
+        for (unsigned int i=0; i<joint_fe.dofs_per_cell; ++i)
+          switch (joint_fe.system_to_base_index(i).first.first)
+            {
+            case 0:
+              Assert (joint_fe.system_to_base_index(i).first.second < dim,
+                      ExcInternalError());
+              joint_solution (loc_joint_dof_indices[i]) =
+                u_n[ joint_fe.system_to_base_index(i).first.second ]
+                (loc_vel_dof_indices[ joint_fe.system_to_base_index(i).second ]);
+              break;
+            case 1:
+              Assert (joint_fe.system_to_base_index(i).first.second == 0,
+                      ExcInternalError());
+              joint_solution (loc_joint_dof_indices[i]) =
+                pres_n (loc_pres_dof_indices[ joint_fe.system_to_base_index(i).second ]);
+              break;
+            case 2:
+              Assert (joint_fe.system_to_base_index(i).first.second == 0,
+                      ExcInternalError());
+              joint_solution (loc_joint_dof_indices[i]) =
+                rot_u (loc_vel_dof_indices[ joint_fe.system_to_base_index(i).second ]);
+              break;
+            default:
+              Assert (false, ExcInternalError());
+            }
+      }
+    std::vector<std::string> joint_solution_names (dim, "v");
+    joint_solution_names.push_back ("p");
+    joint_solution_names.push_back ("rot_u");
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (joint_dof_handler);
+    std::vector< DataComponentInterpretation::DataComponentInterpretation >
+    component_interpretation (dim+2,
+                              DataComponentInterpretation::component_is_part_of_vector);
+    component_interpretation[dim]
+      = DataComponentInterpretation::component_is_scalar;
+    component_interpretation[dim+1]
+      = DataComponentInterpretation::component_is_scalar;
+    data_out.add_data_vector (joint_solution,
+                              joint_solution_names,
+                              DataOut<dim>::type_dof_data,
+                              component_interpretation);
+    data_out.build_patches (deg + 1);
+    std::ofstream output (("solution-" +
+                           Utilities::int_to_string (step, 5) +
+                           ".vtk").c_str());
+    data_out.write_vtk (output);
+  }
+
+
+
+  // Following is the helper function that computes the vorticity by
+  // projecting the term $\text{curl} u$ onto the finite element space used
+  // for the components of the velocity. The function is only called whenever
+  // we generate graphical output, so not very often, and as a consequence we
+  // didn't bother parallelizing it using the WorkStream concept as we do for
+  // the other assembly functions. That should not be overly complicated,
+  // however, if needed. Moreover, the implementation that we have here only
+  // works for 2d, so we bail if that is not the case.
+  template <int dim>
+  void NavierStokesProjection<dim>::assemble_vorticity (const bool reinit_prec)
+  {
+    Assert (dim == 2, ExcNotImplemented());
+    if (reinit_prec)
+      prec_vel_mass.initialize (vel_Mass);
+
+    FEValues<dim> fe_val_vel (fe_velocity, quadrature_velocity,
+                              update_gradients |
+                              update_JxW_values |
+                              update_values);
+    const unsigned int dpc = fe_velocity.dofs_per_cell,
+                       nqp = quadrature_velocity.size();
+    std::vector<types::global_dof_index> ldi (dpc);
+    Vector<double> loc_rot (dpc);
+
+    std::vector< Tensor<1,dim> > grad_u1 (nqp), grad_u2 (nqp);
+    rot_u = 0.;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler_velocity.begin_active(),
+    end  = dof_handler_velocity.end();
+    for (; cell != end; ++cell)
+      {
+        fe_val_vel.reinit (cell);
+        cell->get_dof_indices (ldi);
+        fe_val_vel.get_function_gradients (u_n[0], grad_u1);
+        fe_val_vel.get_function_gradients (u_n[1], grad_u2);
+        loc_rot = 0.;
+        for (unsigned int q=0; q<nqp; ++q)
+          for (unsigned int i=0; i<dpc; ++i)
+            loc_rot(i) += (grad_u2[q][0] - grad_u1[q][1]) *
+                          fe_val_vel.shape_value (i, q) *
+                          fe_val_vel.JxW(q);
+
+        for (unsigned int i=0; i<dpc; ++i)
+          rot_u (ldi[i]) += loc_rot(i);
+      }
+
+    prec_vel_mass.solve (rot_u);
+  }
+}
+
+
+// @sect3{ The main function }
+
+// The main function looks very much like in all the other tutorial programs,
+// so there is little to comment on here:
+int main()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step35;
+
+      RunTimeParameters::Data_Storage data;
+      data.read_data ("parameter-file.prm");
+
+      deallog.depth_console (data.verbose ? 2 : 0);
+
+      NavierStokesProjection<2> test (data);
+      test.run (data.verbose, data.output_interval);
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  std::cout << "----------------------------------------------------"
+            << std::endl
+            << "Apparently everything went fine!"
+            << std::endl
+            << "Don't forget to brush your teeth :-)"
+            << std::endl << std::endl;
+  return 0;
+}
diff --git a/examples/step-35/zigzag.inp b/examples/step-35/zigzag.inp
new file mode 100644
index 0000000..ceeb717
--- /dev/null
+++ b/examples/step-35/zigzag.inp
@@ -0,0 +1,102 @@
+45 56 0 0 0
+0  0  1   0.
+1  3  1   0.
+2  4.5  0   0.
+3  6  1   0.
+4  7.5  0   0.
+5  9  1   0.
+6  10.5  0   0.
+7  12  1   0.
+8  15  1   0.
+9  0  1.6   0.
+10  3  1.6   0.
+11  4.5  1   0.
+12  6  1.6   0.
+13  7.5  1   0.
+14  9  1.6   0.
+15  10.5  1   0.
+16  12  1.6   0.
+17  15  1.6   0.
+18  0  2.5   0.
+19  3  2.5   0.
+20  4.5  2.5   0.
+21  6  2.5   0.
+22  7.5  2.5   0.
+23  9  2.5   0.
+24  10.5  2.5   0.
+25  12  2.5   0.
+26  15  2.5   0.
+27  0  3.4   0.
+28  3  3.4   0.
+29  4.5  4   0.
+30  6  3.4   0.
+31  7.5  4   0.
+32  9  3.4   0.
+33  10.5  4   0.
+34  12  3.4   0.
+35  15  3.4   0.
+36  0  4   0.
+37  3  4   0.
+38  4.5  5   0.
+39  6  4   0.
+40  7.5  5   0.
+41  9  4   0.
+42  10.5  5   0.
+43  12  4   0.
+44  15  4   0.
+1  1 quad 0 1 10 9
+2  1 quad 1 2 11 10
+3  1 quad 2 3 12 11
+4  1 quad 3 4 13 12
+5  1 quad 4 5 14 13
+6  1 quad 5 6 15 14
+7  1 quad 6 7 16 15
+8  1 quad 7 8 17 16
+9  1 quad 9 10 19 18
+10  1 quad 10 11 20 19
+11  1 quad 11 12 21 20
+12  1 quad 12 13 22 21
+13  1 quad 13 14 23 22
+14  1 quad 14 15 24 23
+15  1 quad 15 16 25 24
+16  1 quad 16 17 26 25
+17  1 quad 18 19 28 27
+18  1 quad 19 20 29 28
+19  1 quad 20 21 30 29
+20  1 quad 21 22 31 30
+21  1 quad 22 23 32 31
+22  1 quad 23 24 33 32
+23  1 quad 24 25 34 33
+24  1 quad 25 26 35 34
+25  1 quad 27 28 37 36
+26  1 quad 28 29 38 37
+27  1 quad 29 30 39 38
+28  1 quad 30 31 40 39
+29  1 quad 31 32 41 40
+30  1 quad 32 33 42 41
+31  1 quad 33 34 43 42
+32  1 quad 34 35 44 43
+33 0 line 0 1
+34 0 line 1 2
+35 0 line 2 3
+36 0 line 3 4
+37 0 line 4 5
+38 0 line 5 6
+39 0 line 6 7
+40 0 line 7 8
+41 0 line 44 43
+42 0 line 43 42
+43 0 line 42 41
+44 0 line 41 40
+45 0 line 40 39
+46 0 line 39 38
+47 0 line 38 37
+48 0 line 37 36
+49 1 line 0 9
+50 1 line 9 18
+51 1 line 18 27
+52 1 line 27 36
+53 2 line 8 17
+54 2 line 17 26
+55 2 line 26 35
+56 2 line 35 44
diff --git a/examples/step-36/CMakeLists.txt b/examples/step-36/CMakeLists.txt
new file mode 100644
index 0000000..daecdc8
--- /dev/null
+++ b/examples/step-36/CMakeLists.txt
@@ -0,0 +1,51 @@
+##
+#  CMake script for the step-36 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-36")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_PETSC OR NOT DEAL_II_WITH_SLEPC)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_PETSC = ON
+    DEAL_II_WITH_SLEPC = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-36/doc/builds-on b/examples/step-36/doc/builds-on
new file mode 100644
index 0000000..48a0f73
--- /dev/null
+++ b/examples/step-36/doc/builds-on
@@ -0,0 +1 @@
+step-4
diff --git a/examples/step-36/doc/intro.dox b/examples/step-36/doc/intro.dox
new file mode 100644
index 0000000..799fd13
--- /dev/null
+++ b/examples/step-36/doc/intro.dox
@@ -0,0 +1,269 @@
+<br>
+
+<i>This program was contributed by Toby D. Young and Wolfgang
+Bangerth.  </i>
+
+<a name="Preamble"></a> 
+<h1>Preamble</h1>
+
+The problem we want to solve in this example is an eigenspectrum
+problem. Eigenvalue problems appear in a wide context of problems, for
+example in the computation of electromagnetic standing waves in
+cavities, vibration modes of drum membranes, or oscillations of lakes
+and estuaries. One of the most enigmatic applications is probably the
+computation of stationary or quasi-static wave functions in quantum
+mechanics. The latter application is what we would like to investigate
+here, though the general techniques outlined in this program are of
+course equally applicable to the other applications above.
+
+Eigenspectrum problems have the general form
+ at f{align*}
+	L \Psi &= \varepsilon \Psi \qquad &&\text{in}\ \Omega\quad,
+	\\
+	\Psi &= 0 &&\text{on}\ \partial\Omega\quad,
+ at f}
+where the Dirichlet boundary condition on $\Psi=\Psi(\mathbf x)$ could also be
+replaced by Neumann or Robin conditions; $L$ is an operator that generally
+also contains differential operators.
+
+Under suitable conditions, the above equations have a set of solutions
+$\Psi_\ell,\varepsilon_\ell$, $\ell\in {\cal I}$, where $\cal I$ can
+be a finite or infinite set (and in the latter case it may be a discrete or
+sometimes at least in part a continuous set). In either case, let us note that
+there is 
+no longer just a single solution, but a set of solutions (the various
+eigenfunctions and corresponding eigenvalues) that we want to
+compute. The problem of numerically finding all eigenvalues
+(eigenfunctions) of such eigenvalue problems is a formidable
+challenge. In fact, if the set $\cal I$ is infinite, the challenge is
+of course intractable.  Most of the time however we are really only
+interested in a small subset of these values (functions); and
+fortunately, the interface to the SLEPc library that we will use for
+this tutorial program allows us to select which portion of the
+eigenspectrum and how many solutions we want to solve for.
+
+In this program, the eigenspectrum solvers we use are classes provided
+by deal.II that wrap around the linear algebra implementation of the
+<a href="http://www.grycap.upv.es/slepc/" target="_top">SLEPc</a>
+library; SLEPc itself builds on the <a
+href="http://www.mcs.anl.gov/petsc/" target="_top">PETSc</a> library
+for linear algebra contents.
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+The basic equation of stationary quantum mechanics is the
+Schrödinger equation which models the motion of particles in an
+external potential $V(\mathbf x)$. The particle is described by a wave
+function $\Psi(\mathbf x)$ that satisfies a relation of the
+(nondimensionalized) form
+ at f{align*} [-\Delta + V(\mathbf x)]
+\Psi(\mathbf x) &= \varepsilon \Psi(\mathbf x) \qquad &&\text{in}\
+\Omega\quad, \\ \Psi &= 0 &&\text{on}\ \partial\Omega\quad.  
+ at f} 
+As a consequence, this particle can only exist in a certain number of
+eigenstates that correspond to the energy eigenvalues
+$\varepsilon_\ell$ admitted as solutions of this equation. The
+orthodox (Copenhagen) interpretation of quantum mechanics posits that, if a
+particle has energy $\varepsilon_\ell$ then the probability of finding
+it at location $\mathbf x$ is proportional to $|\Psi_\ell(\mathbf
+x)|^2$ where $\Psi_\ell$ is the eigenfunction that corresponds to this
+eigenvalue.
+
+In order to numerically find solutions to this equation, i.e. a set of
+pairs of eigenvalues/eigenfunctions, we use the usual finite element
+approach of multiplying the equation from the left with test functions,
+integrating by parts, and searching for solutions in finite
+dimensional spaces by approximating $\Psi(\mathbf
+x)\approx\Psi_h(\mathbf x)=\sum_{j}\phi_j(\mathbf x)\tilde\psi_j$,
+where $\tilde\psi$ is a vector of expansion coefficients. We then
+immediately arrive at the following equation that discretizes the
+continuous eigenvalue problem: @f[ \sum_j [(\nabla\phi_i,
+\nabla\phi_j)+(V(\mathbf x)\phi_i,\phi_j)] \tilde{\psi}_j =
+\varepsilon_h \sum_j (\phi_i, \phi_j) \tilde{\psi}_j\quad.  @f] In
+matrix and vector notation, this equation then reads: @f[ A
+\tilde{\Psi} = \varepsilon_h M \tilde{\Psi} \quad, @f] where $A$ is
+the stiffness matrix arising from the differential operator $L$, and
+$M$ is the mass matrix. The solution to the eigenvalue problem is an
+eigenspectrum $\varepsilon_{h,\ell}$, with associated eigenfunctions
+$\Psi_\ell=\sum_j \phi_j\tilde{\psi}_j$.
+
+
+<h3>Eigenvalues and Dirichlet boundary conditions</h3>
+
+In this program, we use Dirichlet boundary conditions for the wave
+function $\Psi$. What this means, from the perspective of a finite
+element code, is that only the interior degrees of freedom are real
+degrees of <i>freedom</i>: the ones on the boundary are not free but
+are forced to have a zero value, after all. On the other hand, the
+finite element method gains much of its power and simplicity from
+the fact that we just do the same thing on every cell, without
+having to think too much about where a cell is, whether it bounds
+on a less refined cell and consequently has a hanging node, or is 
+adjacent to the boundary. All such checks would make the assembly
+of finite element linear systems unbearably difficult to write and
+even more so to read. 
+
+Consequently, of course, when you distribute degrees of freedom with
+your DoFHandler object, you don't care whether some of the degrees
+of freedom you enumerate are at a Dirichlet boundary. They all get
+numbers. We just have to take care of these degrees of freedom at a
+later time when we apply boundary values. There are two basic ways
+of doing this (either using MatrixTools::apply_boundary_values() 
+<i>after</i> assembling the linear system, or using
+ConstraintMatrix::distribute_local_to_global() <i>during</i> assembly;
+see the @ref constraints "constraints module" for more information),
+but both result in the same: a linear system that has a total
+number of rows equal to the number of <i>all</i> degrees of freedom,
+including those that lie on the boundary. However, degrees of 
+freedom that are constrained by Dirichlet conditions are separated
+from the rest of the linear system by zeroing out the corresponding
+row and column, putting a single positive entry on the diagonal,
+and the corresponding Dirichlet value on the right hand side.
+
+If you assume for a moment that we had renumbered degrees of freedom
+in such a way that all of those on the Dirichlet boundary come last,
+then the linear system we would get when solving a regular PDE with
+a right hand side would look like this:
+ at f{align*}
+  \begin{pmatrix}
+    A_i & 0 \\ 0 & D_b
+  \end{pmatrix}
+  \begin{pmatrix}
+    U_i \\ U_b
+  \end{pmatrix}
+  =
+  \begin{pmatrix}
+    F_i \\ F_b
+  \end{pmatrix}.
+ at f}
+Here, subscripts $i$ and $b$ correspond to interior and boundary 
+degrees of freedom, respectively. The interior degrees of freedom
+satisfy the linear system $A_i U_i=F_i$ which yields the correct
+solution in the interior, and boundary values are determined by
+$U_b = D_b^{-1} F_b$ where $D_b$ is a diagonal matrix that results
+from the process of eliminating boundary degrees of freedom, and
+$F_b$ is chosen in such a way that $U_{b,j}=D_{b,jj}^{-1} F_{b,j}$
+has the correct boundary values for every boundary degree of freedom
+$j$. (For the curious, the entries of the 
+matrix $D_b$ result from adding modified local contributions to the
+global matrix where for the local matrices the diagonal elements, if non-zero, 
+are set to their absolute value; otherwise, they are set to the average of  
+absolute values of the diagonal. This process guarantees that the entries
+of $D_b$ are positive and of a size comparable to the rest of the diagonal
+entries, ensuring that the resulting matrix does not incur unreasonable
+losses of accuracy due to roundoff involving matrix entries of drastically
+different size. The actual values that end up on the diagonal are difficult
+to predict and you should treat them as arbitrary and unpredictable, but
+positive.)
+
+For "regular" linear systems, this all leads to the correct solution.
+On the other hand, for eigenvalue problems, this is not so trivial.
+There, eliminating boundary values affects both matrices
+$A$ and $M$ that we will solve with in the current tutorial program.
+After elimination of boundary values, we then receive an eigenvalue
+problem that can be partitioned like this:
+ at f{align*}
+  \begin{pmatrix}
+    A_i & 0 \\ 0 & D_A
+  \end{pmatrix}
+  \begin{pmatrix}
+    \tilde\Psi_i \\ \tilde\Psi_b
+  \end{pmatrix}
+  =
+  \epsilon_h
+  \begin{pmatrix}
+    M_i & 0 \\ 0 & D_M
+  \end{pmatrix}
+  \begin{pmatrix}
+    \tilde\Psi_i \\ \tilde\Psi_b
+  \end{pmatrix}.
+ at f}
+This form makes it clear that there are two sets of eigenvalues:
+the ones we care about, and spurious eigenvalues from the
+separated problem
+ at f[
+  D_A \tilde \Psi_b = \epsilon_h D_M \Psi_b.
+ at f]
+These eigenvalues are spurious since they result from an eigenvalue
+system that operates only on boundary nodes -- nodes that are not
+real degrees of <i>freedom</i>.
+Of course, since the two matrices $D_A,D_M$ are diagonal, we can
+exactly quantify these spurious eigenvalues: they are
+$\varepsilon_{h,j}=A_{jj}/M_{jj}$ (where the indices
+$j$ corresponds exactly to the degrees of freedom that are constrained
+by Dirichlet boundary values).
+
+So how does one deal with them? The fist part is to recognize when our
+eigenvalue solver finds one of them. To this end, the program computes
+and prints an interval within which these eigenvalues lie, by computing
+the minimum and maximum of the expression $\varepsilon_{h,j}=A_{jj}/M_{jj}$
+over all constrained degrees of freedom. In the program below, this
+already suffices: we find that this interval lies outside the set of
+smallest eigenvalues and corresponding eigenfunctions we are interested
+in and compute, so there is nothing we need to do here.
+
+On the other hand, it may happen that we find that one of the eigenvalues
+we compute in this program happens to be in this interval, and in that
+case we would not know immediately whether it is a spurious or a true
+eigenvalue. In that case, one could simply scale the diagonal elements of
+either matrix after computing the two matrices, 
+thus shifting them away from the frequency of interest in the eigen-spectrum.
+This can be done by using the following code, making sure that all spurious
+eigenvalues are exactly equal to $1.234\cdot 10^5$:
+ at code
+    for (unsigned int i = 0; i < dof_handler.n_dofs(); ++i)
+      if (constraints.is_constrained(i))
+        {
+          stiffness_matrix.set(i, i, 1.234e5);
+          mass_matrix.set(i, i, 1);
+        }
+ at endcode 
+However, this strategy is not pursued here as the spurious eigenvalues
+we get from our program as-is happen to be greater than the lowest 
+five that we will calculate and are interested in.
+
+
+<h3>Implementation details</h3>
+
+The program below is essentially just a slightly modified version of 
+step-4. The things that are different are the following:
+
+<ul> 
+
+<li>The main class (named <code>EigenvalueProblem</code>) now no
+longer has a single solution vector, but a whole set of vectors for
+the various eigenfunctions we want to compute. Moreover, the
+<code>main</code> function, which has the top-level control over
+everything here, initializes and finalizes the interface to SLEPc and
+PETSc simultaneously via <code>SlepcInitialize</code> and
+<code>SlepFinalize</code>.</li>
+
+<li>We use PETSc matrices and vectors as in step-17 and
+step-18 since that is what the SLEPc eigenvalue solvers
+require.</li>
+
+<li>The function <code>EigenvalueProblem::solve</code> is entirely
+different from anything seen so far in the tutorial, as it does not
+just solve a linear system but actually solves the eigenvalue problem.
+It is built on the SLEPc library, and more immediately on the deal.II
+SLEPc wrappers in the class SLEPcWrappers::SolverKrylovSchur.</li>
+
+<li>We use the ParameterHandler class to describe a few input
+parameters, such as the exact form of the potential $V({\mathbf
+x})$, the number of global refinement steps of the mesh,
+or the number of eigenvalues we want to solve for. We could go much
+further with this but stop at making only a few of the things that one
+could select at run time actual input file parameters. In order to see
+what could be done in this regard, take a look at @ref step_29
+"step-29", step-33, and in particular @ref step_19
+"step-19".</li>
+
+<li>We use the FunctionParser class to make the potential $V(\mathbf
+x)$ a run-time parameter that can be specified in the input file as a
+formula.</li>
+
+</ul>
+
+The rest of the program follows in a pretty straightforward way from 
+step-4.
diff --git a/examples/step-36/doc/kind b/examples/step-36/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-36/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-36/doc/results.dox b/examples/step-36/doc/results.dox
new file mode 100644
index 0000000..dfe6597
--- /dev/null
+++ b/examples/step-36/doc/results.dox
@@ -0,0 +1,252 @@
+<h1>Results</h1>
+
+<h2>Running the problem</h2>
+
+The problem's input is parameterized by an input file <code>\step-36.prm</code>
+which could, for example, contain the following text:
+
+ at code
+set Global mesh refinement steps         = 5
+set Number of eigenvalues/eigenfunctions = 5
+set Potential                            = 0
+ at endcode
+
+Here, the potential is zero inside the domain, and we know that the
+eigenvalues are given by $\lambda_{(mn)}=\frac{\pi^2}{4}(m^2+n^2)$ where
+$m,n\in{\mathbb N^+}$. Eigenfunctions are sines and cosines with $m$ and $n$
+periods in $x$ and $y$ directions. This matches the output our program
+generates:
+ at code
+examples/\step-36> make run
+============================ Running \step-36
+   Number of active cells:       1024
+   Number of degrees of freedom: 1089
+   Solver converged in 67 iterations.
+
+      Eigenvalue 0 : 4.93877
+      Eigenvalue 1 : 12.3707
+      Eigenvalue 2 : 12.3707
+      Eigenvalue 3 : 19.8027
+      Eigenvalue 4 : 24.837
+
+   Job done.  @endcode These eigenvalues are exactly the ones that
+correspond to pairs $(m,n)=(1,1)$, $(1,2)$ and $(2,1)$, $(2,2)$, and
+$(3,1)$. A visualization of the corresponding eigenfunctions would
+look like this:
+
+<table width="80%">
+<tr>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.default.eigenfunction.0.png" alt=""></td>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.default.eigenfunction.1.png" alt=""></td>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.default.eigenfunction.2.png" alt=""></td>
+</tr>
+
+<tr>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.default.eigenfunction.3.png" alt=""></td>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.default.eigenfunction.4.png" alt=""></td>
+<td></td>
+</tr>
+</table>
+
+<h2>Possibilities for extensions</h2>
+
+It is always worth playing a few games in the playground! So here goes
+with a few suggestions:
+
+<ul>
+
+<li> The potential used above (called the <i>infinite well</i> because
+it is a flat potential surrounded by infinitely high walls) is
+interesting because it allows for analytically known solutions. Apart
+from that, it is rather boring, however. That said, it is trivial to
+play around with the potential by just setting it to something
+different in the input file. For example, let us assume that we wanted
+to work with the following potential in
+2d:
+ at f[
+  V(x,y) = \left\{
+       \begin{array}{ll}
+         -100 & \text{if}\ \sqrt{x^2+y^2}<\frac 34 \ \text{and}
+                         \ xy>0
+         \\
+         -5 & \text{if}\ \sqrt{x^2+y^2}<\frac 34 \ \text{and}
+                         \ xy\le 0
+         \\
+         0 & \text{otherwise}
+      \end{array} \right.\quad.
+ at f]
+In other words, the potential is -100 in two sectors of a circle of radius
+0.75, -5 in the other two sectors, and zero outside the circle. We can achieve
+this by using the following in the input file:
+ at code
+set Potential = if (x^2 + y^2 < 0.75^2, if (x*y > 0, -100, -5), 0)
+ at endcode
+If in addition we also increase the mesh refinement by one level, we get the
+following results:
+ at code
+examples/\step-36> make run
+============================ Running \step-36
+   Number of active cells:       4096
+   Number of degrees of freedom: 4225
+
+   Eigenvalue 0 : -74.2562
+   Eigenvalue 1 : -72.7322
+   Eigenvalue 2 : -42.7406
+   Eigenvalue 3 : -42.2232
+   Eigenvalue 4 : -37.0744
+ at endcode
+
+The output file also contains an interpolated version of the potential, which
+looks like this (note that as expected the lowest few eigenmodes have
+probability densities $|\Psi(\mathbf x)|^2$ that are significant only where the
+potential is the lowest, i.e. in the top right and bottom left sector of inner
+circle of the potential):
+
+<img src="http://www.dealii.org/images/steps/developer/step-36.mod.potential.png" alt="">
+
+The first five eigenfunctions are now like this:
+
+<table width="80%">
+<tr>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.mod.eigenfunction.0.png" alt=""></td>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.mod.eigenfunction.1.png" alt=""></td>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.mod.eigenfunction.2.png" alt=""></td>
+</tr>
+
+<tr>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.mod.eigenfunction.3.png" alt=""></td>
+<td><img src="http://www.dealii.org/images/steps/developer/step-36.mod.eigenfunction.4.png" alt=""></td>
+<td></td>
+</tr>
+</table>
+
+<li> In our derivation of the problem we have assumed that the
+particle is confined to a domain $\Omega$ and that at the boundary of
+this domain its probability $|\Psi|^2$ of being is zero. This is
+equivalent to solving the eigenvalue problem on all of ${\mathbb R}^d$
+and assuming that the energy potential is finite only inside a region
+$\Omega$ and infinite outside. It is relatively easy to show that
+$|\Psi(\mathbf x)|^2$ at all locations $\mathbf x$ where $V(\mathbf
+x)=\infty$. So the question is what happens if our potential is not of
+this form, i.e. there is no bounded domain outside of which the
+potential is infinite? In that case, it may be worth to just consider
+a very large domain at the boundary of which $V(\mathbf x)$ is at
+least very large, if not infinite. Play around with a few cases like
+this and explore how the spectrum and eigenfunctions change as we make
+the computational region larger and larger.
+
+<li> What happens if we investigate the simple harmonic oscillator
+problem $V(\mathbf x)=c|\mathbf x|^2$? This potential is exactly of
+the form discussed in the previous paragraph and has hyper spherical
+symmetry. One may want to use a large spherical domain with a large
+outer radius, to approximate the whole-space problem (say, by invoking
+GridGenerator::hyper_ball).
+
+<li> The plots above show the wave function $\Psi(\mathbf x)$, but the
+physical quantity of interest is actually the probability density
+$|\Psi(\mathbf x)|^2$ for the particle to be at location $\mathbf x$.
+Some visualization programs can compute derived quantities from the data in
+an input file, but we can also do so right away when creating the output
+file. The facility to do that is the DataPostprocessor class that can
+be used in conjunction with the DataOut class. Examples of how this
+can be done can be found in step-29 and
+step-33.
+
+<li> What happens if the particle in the box has %internal degrees of
+freedom? For example, if the particle were a spin-$1/2$ particle? In
+that case, we may want to start solving a vector-valued problem
+instead.
+
+<li> Our implementation of the deal.II library here uses the
+PETScWrappers and SLEPcWrappers and is suitable for running on serial
+machine architecture. However, for larger grids and with a larger
+number of degrees-of-freedom, we may want to run our application on
+parallel architectures. A parallel implementation of the above code
+can be particularly useful here since the generalized eigenspectrum
+problem is somewhat more expensive to solve than the standard problems
+considered in most of the earlier tutorials. Fortunately, modifying the above
+program to be MPI compliant is a relatively straightforward
+procedure. A sketch of how this can be done can be found in @ref
+step_17 "step-17".
+
+<li> Finally, there are alternatives to using the SLEPc eigenvalue
+solvers. deal.II has interfaces to one of them, ARPACK (see
+http://www.dealii.org/developer/external-libs/arpack.html), implemented in the
+ArpackSolver class. Here is a short and quick overview of what one would need
+to change to use it, provided you have a working installation of ARPACK and
+deal.II has been configured properly for it (see the deal.II ReadMe file
+at http://www.dealii.org/readme.html):
+
+First, in order to use the ARPACK interfaces, we can go back to using standard
+deal.II matrices and vectors, so we start by replacing the PETSc and SLEPc
+headers
+ at code
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/slepc_solver.h>
+ at endcode
+with these:
+ at code
+#include <deal.II/lac/arpack_solver.h>
+#include <deal.II/lac/sparse_direct.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/compressed_sparsity_pattern.h>
+ at endcode
+ARPACK allows complex eigenvalues, so we will also need
+ at code
+#include <complex>
+ at endcode
+
+Secondly, we switch back to the deal.II matrix and vector definitions in the
+main class:
+ at code
+    SparsityPattern                     sparsity_pattern;
+    SparseMatrix<double>                stiffness_matrix, mass_matrix;
+    std::vector<Vector<double> >        eigenfunctions;
+    std::vector<std::complex<double>>   eigenvalues;
+ at endcode
+and initialize them as usual in <code>make_grid_and_dofs()</code>:
+ at code
+    sparsity_pattern.reinit (dof_handler.n_dofs(),
+                             dof_handler.n_dofs(),
+                             dof_handler.max_couplings_between_dofs());
+
+    DoFTools::make_sparsity_pattern (dof_handler, sparsity_pattern);
+    constraints.condense (sparsity_pattern);
+    sparsity_pattern.compress();
+
+    stiffness_matrix.reinit (sparsity_pattern);
+    mass_matrix.reinit (sparsity_pattern);
+ at endcode
+
+For solving the eigenvalue problem with ARPACK, we finally need to modify
+<code>solve()</code>:
+ at code
+  template <int dim>
+  unsigned int EigenvalueProblem<dim>::solve ()
+  {
+    SolverControl solver_control (dof_handler.n_dofs(), 1e-9);
+
+    SparseDirectUMFPACK inverse;
+    inverse.initialize (stiffness_matrix);
+
+    const unsigned int num_arnoldi_vectors = 2*eigenvalues.size() + 2;
+    ArpackSolver::AdditionalData additional_data(num_arnoldi_vectors);
+
+    ArpackSolver eigensolver (solver_control, additional_data);
+    eigensolver.solve (stiffness_matrix,
+                       mass_matrix,
+                       inverse,
+                       eigenvalues,
+                       eigenfunctions,
+                       eigenvalues.size());
+
+    for (unsigned int i=0; i<eigenfunctions.size(); ++i)
+      eigenfunctions[i] /= eigenfunctions[i].linfty_norm ();
+
+    return solver_control.last_step ();
+  }
+ at endcode
+Note how we have used an exact decomposition (using SparseDirectUMFPACK) as a
+preconditioner to ARPACK.
+</ul>
diff --git a/examples/step-36/doc/tooltip b/examples/step-36/doc/tooltip
new file mode 100644
index 0000000..1e1a5da
--- /dev/null
+++ b/examples/step-36/doc/tooltip
@@ -0,0 +1 @@
+Finding eigenvalues of the Schrödinger equation.
diff --git a/examples/step-36/step-36.cc b/examples/step-36/step-36.cc
new file mode 100644
index 0000000..13ab66d
--- /dev/null
+++ b/examples/step-36/step-36.cc
@@ -0,0 +1,524 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2009 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Toby D. Young, Polish Academy of Sciences,
+ *          Wolfgang Bangerth, Texas A&M University
+ */
+
+// @sect3{Include files}
+
+// As mentioned in the introduction, this program is essentially only a
+// slightly revised version of step-4. As a consequence, most of the following
+// include files are as used there, or at least as used already in previous
+// tutorial programs:
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/function_parser.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/lac/full_matrix.h>
+
+// IndexSet is used to set the size of PETScWrappers::Vector:
+#include <deal.II/base/index_set.h>
+
+// PETSc appears here because SLEPc depends on this library:
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+
+// And then we need to actually import the interfaces for solvers that SLEPc
+// provides:
+#include <deal.II/lac/slepc_solver.h>
+
+// We also need some standard C++:
+#include <fstream>
+#include <iostream>
+
+// Finally, as in previous programs, we import all the deal.II class and
+// function names into the namespace into which everything in this program
+// will go:
+namespace Step36
+{
+  using namespace dealii;
+
+  // @sect3{The <code>EigenvalueProblem</code> class template}
+
+  // Following is the class declaration for the main class template. It looks
+  // pretty much exactly like what has already been shown in step-4:
+  template <int dim>
+  class EigenvalueProblem
+  {
+  public:
+    EigenvalueProblem (const std::string &prm_file);
+    void run ();
+
+  private:
+    void make_grid_and_dofs ();
+    void assemble_system ();
+    unsigned int solve ();
+    void output_results () const;
+
+    Triangulation<dim> triangulation;
+    FE_Q<dim>          fe;
+    DoFHandler<dim>    dof_handler;
+
+    // With these exceptions: For our eigenvalue problem, we need both a
+    // stiffness matrix for the left hand side as well as a mass matrix for
+    // the right hand side. We also need not just one solution function, but a
+    // whole set of these for the eigenfunctions we want to compute, along
+    // with the corresponding eigenvalues:
+    PETScWrappers::SparseMatrix             stiffness_matrix, mass_matrix;
+    std::vector<PETScWrappers::MPI::Vector> eigenfunctions;
+    std::vector<double>                     eigenvalues;
+
+    // And then we need an object that will store several run-time parameters
+    // that we will specify in an input file:
+    ParameterHandler parameters;
+
+    // Finally, we will have an object that contains "constraints" on our
+    // degrees of freedom. This could include hanging node constraints if we
+    // had adaptively refined meshes (which we don't have in the current
+    // program). Here, we will store the constraints for boundary nodes
+    // $U_i=0$.
+    ConstraintMatrix constraints;
+  };
+
+  // @sect3{Implementation of the <code>EigenvalueProblem</code> class}
+
+  // @sect4{EigenvalueProblem::EigenvalueProblem}
+
+  // First up, the constructor. The main new part is handling the run-time
+  // input parameters. We need to declare their existence first, and then read
+  // their values from the input file whose name is specified as an argument
+  // to this function:
+  template <int dim>
+  EigenvalueProblem<dim>::EigenvalueProblem (const std::string &prm_file)
+    :
+    fe (1),
+    dof_handler (triangulation)
+  {
+//TODO investigate why the minimum number of refinement steps required to obtain the correct eigenvalue degeneracies is 6
+    parameters.declare_entry ("Global mesh refinement steps", "5",
+                              Patterns::Integer (0, 20),
+                              "The number of times the 1-cell coarse mesh should "
+                              "be refined globally for our computations.");
+    parameters.declare_entry ("Number of eigenvalues/eigenfunctions", "5",
+                              Patterns::Integer (0, 100),
+                              "The number of eigenvalues/eigenfunctions "
+                              "to be computed.");
+    parameters.declare_entry ("Potential", "0",
+                              Patterns::Anything(),
+                              "A functional description of the potential.");
+
+    parameters.read_input (prm_file);
+  }
+
+
+  // @sect4{EigenvalueProblem::make_grid_and_dofs}
+
+  // The next function creates a mesh on the domain $[-1,1]^d$, refines it as
+  // many times as the input file calls for, and then attaches a DoFHandler to
+  // it and initializes the matrices and vectors to their correct sizes. We
+  // also build the constraints that correspond to the boundary values
+  // $u|_{\partial\Omega}=0$.
+  //
+  // For the matrices, we use the PETSc wrappers. These have the ability to
+  // allocate memory as necessary as non-zero entries are added. This seems
+  // inefficient: we could as well first compute the sparsity pattern,
+  // initialize the matrices with it, and as we then insert entries we can be
+  // sure that we do not need to re-allocate memory and free the one used
+  // previously. One way to do that would be to use code like this:
+  // @code
+  //   DynamicSparsityPattern
+  //      dsp (dof_handler.n_dofs(),
+  //           dof_handler.n_dofs());
+  //   DoFTools::make_sparsity_pattern (dof_handler, dsp);
+  //   dsp.compress ();
+  //   stiffness_matrix.reinit (dsp);
+  //   mass_matrix.reinit (dsp);
+  // @endcode
+  // instead of the two <code>reinit()</code> calls for the
+  // stiffness and mass matrices below.
+  //
+  // This doesn't quite work, unfortunately. The code above may lead to a few
+  // entries in the non-zero pattern to which we only ever write zero entries;
+  // most notably, this holds true for off-diagonal entries for those rows and
+  // columns that belong to boundary nodes. This shouldn't be a problem, but
+  // for whatever reason, PETSc's ILU preconditioner, which we use to solve
+  // linear systems in the eigenvalue solver, doesn't like these extra entries
+  // and aborts with an error message.
+  //
+  // In the absence of any obvious way to avoid this, we simply settle for the
+  // second best option, which is have PETSc allocate memory as
+  // necessary. That said, since this is not a time critical part, this whole
+  // affair is of no further importance.
+  template <int dim>
+  void EigenvalueProblem<dim>::make_grid_and_dofs ()
+  {
+    GridGenerator::hyper_cube (triangulation, -1, 1);
+    triangulation.refine_global (parameters.get_integer ("Global mesh refinement steps"));
+    dof_handler.distribute_dofs (fe);
+
+    DoFTools::make_zero_boundary_constraints (dof_handler, constraints);
+    constraints.close ();
+
+    stiffness_matrix.reinit (dof_handler.n_dofs(),
+                             dof_handler.n_dofs(),
+                             dof_handler.max_couplings_between_dofs());
+    mass_matrix.reinit (dof_handler.n_dofs(),
+                        dof_handler.n_dofs(),
+                        dof_handler.max_couplings_between_dofs());
+
+    // The next step is to take care of the eigenspectrum. In this case, the
+    // outputs are eigenvalues and eigenfunctions, so we set the size of the
+    // list of eigenfunctions and eigenvalues to be as large as we asked for
+    // in the input file. When using a PETScWrappers::MPI::Vector, the Vector
+    // is initialized using an IndexSet. IndexSet is used not only to resize the
+    // PETScWrappers::MPI::Vector but it also associates an index in the
+    // PETScWrappers::MPI::Vector with a degree of freedom (see step-40 for a
+    // a more detailed explanation). The function complete_index_set() creates
+    // an IndexSet where every valid index is part of the set. Note that this
+    // program can only be run sequentially and will throw an exception if used
+    // in parallel.
+    IndexSet eigenfunction_index_set = dof_handler.locally_owned_dofs ();
+    eigenfunctions
+    .resize (parameters.get_integer ("Number of eigenvalues/eigenfunctions"));
+    for (unsigned int i=0; i<eigenfunctions.size (); ++i)
+      eigenfunctions[i].reinit (eigenfunction_index_set, MPI_COMM_WORLD);
+
+    eigenvalues.resize (eigenfunctions.size ());
+  }
+
+
+  // @sect4{EigenvalueProblem::assemble_system}
+
+  // Here, we assemble the global stiffness and mass matrices from local
+  // contributions $A^K_{ij} = \int_K \nabla\varphi_i(\mathbf x) \cdot
+  // \nabla\varphi_j(\mathbf x) + V(\mathbf x)\varphi_i(\mathbf
+  // x)\varphi_j(\mathbf x)$ and $M^K_{ij} = \int_K \varphi_i(\mathbf
+  // x)\varphi_j(\mathbf x)$ respectively. This function should be immediately
+  // familiar if you've seen previous tutorial programs. The only thing new
+  // would be setting up an object that described the potential $V(\mathbf x)$
+  // using the expression that we got from the input file. We then need to
+  // evaluate this object at the quadrature points on each cell. If you've
+  // seen how to evaluate function objects (see, for example the coefficient
+  // in step-5), the code here will also look rather familiar.
+  template <int dim>
+  void EigenvalueProblem<dim>::assemble_system ()
+  {
+    QGauss<dim>   quadrature_formula(2);
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values | update_gradients |
+                             update_quadrature_points | update_JxW_values);
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double> cell_stiffness_matrix (dofs_per_cell, dofs_per_cell);
+    FullMatrix<double> cell_mass_matrix (dofs_per_cell, dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    FunctionParser<dim> potential;
+    potential.initialize (FunctionParser<dim>::default_variable_names (),
+                          parameters.get ("Potential"),
+                          typename FunctionParser<dim>::ConstMap());
+
+    std::vector<double> potential_values (n_q_points);
+
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active (),
+    endc = dof_handler.end ();
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        cell_stiffness_matrix = 0;
+        cell_mass_matrix      = 0;
+
+        potential.value_list (fe_values.get_quadrature_points(),
+                              potential_values);
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+                cell_stiffness_matrix (i, j)
+                += (fe_values.shape_grad (i, q_point) *
+                    fe_values.shape_grad (j, q_point)
+                    +
+                    potential_values[q_point] *
+                    fe_values.shape_value (i, q_point) *
+                    fe_values.shape_value (j, q_point)
+                   ) * fe_values.JxW (q_point);
+
+                cell_mass_matrix (i, j)
+                += (fe_values.shape_value (i, q_point) *
+                    fe_values.shape_value (j, q_point)
+                   ) * fe_values.JxW (q_point);
+              }
+
+        // Now that we have the local matrix contributions, we transfer them
+        // into the global objects and take care of zero boundary constraints:
+        cell->get_dof_indices (local_dof_indices);
+
+        constraints
+        .distribute_local_to_global (cell_stiffness_matrix,
+                                     local_dof_indices,
+                                     stiffness_matrix);
+        constraints
+        .distribute_local_to_global (cell_mass_matrix,
+                                     local_dof_indices,
+                                     mass_matrix);
+      }
+
+    // At the end of the function, we tell PETSc that the matrices have now
+    // been fully assembled and that the sparse matrix representation can now
+    // be compressed as no more entries will be added:
+    stiffness_matrix.compress (VectorOperation::add);
+    mass_matrix.compress (VectorOperation::add);
+
+
+    // Before leaving the function, we calculate spurious eigenvalues,
+    // introduced to the system by zero Dirichlet constraints. As
+    // discussed in the introduction, the use of Dirichlet boundary
+    // conditions coupled with the fact that the degrees of freedom
+    // located at the boundary of the domain remain part of the linear
+    // system we solve, introduces a number of spurious eigenvalues.
+    // Below, we output the interval within which they all lie to
+    // ensure that we can ignore them should they show up in our
+    // computations.
+    double min_spurious_eigenvalue = std::numeric_limits<double>::max(),
+           max_spurious_eigenvalue = -std::numeric_limits<double>::max();
+
+    for (unsigned int i = 0; i < dof_handler.n_dofs(); ++i)
+      if (constraints.is_constrained(i))
+        {
+          const double ev = stiffness_matrix(i,i)/mass_matrix(i,i);
+          min_spurious_eigenvalue = std::min (min_spurious_eigenvalue, ev);
+          max_spurious_eigenvalue = std::max (max_spurious_eigenvalue, ev);
+        }
+
+    std::cout << "   Spurious eigenvalues are all in the interval "
+              << "[" << min_spurious_eigenvalue << "," << max_spurious_eigenvalue << "]"
+              << std::endl;
+
+  }
+
+
+  // @sect4{EigenvalueProblem::solve}
+
+  // This is the key new functionality of the program. Now that the system is
+  // set up, here is a good time to actually solve the problem: As with other
+  // examples this is done using a "solve" routine. Essentially, it works as
+  // in other programs: you set up a SolverControl object that describes the
+  // accuracy to which we want to solve the linear systems, and then we select
+  // the kind of solver we want. Here we choose the Krylov-Schur solver of
+  // SLEPc, a pretty fast and robust choice for this kind of problem:
+  template <int dim>
+  unsigned int EigenvalueProblem<dim>::solve ()
+  {
+    // We start here, as we normally do, by assigning convergence control we
+    // want:
+    SolverControl solver_control (dof_handler.n_dofs(), 1e-9);
+    SLEPcWrappers::SolverKrylovSchur eigensolver (solver_control);
+
+    // Before we actually solve for the eigenfunctions and -values, we have to
+    // also select which set of eigenvalues to solve for. Lets select those
+    // eigenvalues and corresponding eigenfunctions with the smallest real
+    // part (in fact, the problem we solve here is symmetric and so the
+    // eigenvalues are purely real). After that, we can actually let SLEPc do
+    // its work:
+    eigensolver.set_which_eigenpairs (EPS_SMALLEST_REAL);
+
+    eigensolver.set_problem_type (EPS_GHEP);
+
+    eigensolver.solve (stiffness_matrix, mass_matrix,
+                       eigenvalues, eigenfunctions,
+                       eigenfunctions.size());
+
+    // The output of the call above is a set of vectors and values. In
+    // eigenvalue problems, the eigenfunctions are only determined up to a
+    // constant that can be fixed pretty arbitrarily. Knowing nothing about
+    // the origin of the eigenvalue problem, SLEPc has no other choice than to
+    // normalize the eigenvectors to one in the $l_2$ (vector)
+    // norm. Unfortunately this norm has little to do with any norm we may be
+    // interested from a eigenfunction perspective: the $L_2(\Omega)$ norm, or
+    // maybe the $L_\infty(\Omega)$ norm.
+    //
+    // Let us choose the latter and rescale eigenfunctions so that they have
+    // $\|\phi_i(\mathbf x)\|_{L^\infty(\Omega)}=1$ instead of
+    // $\|\Phi\|_{l_2}=1$ (where $\phi_i$ is the $i$th eigen<i>function</i>
+    // and $\Phi_i$ the corresponding vector of nodal values). For the $Q_1$
+    // elements chosen here, we know that the maximum of the function
+    // $\phi_i(\mathbf x)$ is attained at one of the nodes, so $\max_{\mathbf
+    // x}\phi_i(\mathbf x)=\max_j (\Phi_i)_j$, making the normalization in the
+    // $L_\infty$ norm trivial. Note that this doesn't work as easily if we
+    // had chosen $Q_k$ elements with $k>1$: there, the maximum of a function
+    // does not necessarily have to be attained at a node, and so
+    // $\max_{\mathbf x}\phi_i(\mathbf x)\ge\max_j (\Phi_i)_j$ (although the
+    // equality is usually nearly true).
+    for (unsigned int i=0; i<eigenfunctions.size(); ++i)
+      eigenfunctions[i] /= eigenfunctions[i].linfty_norm ();
+
+    // Finally return the number of iterations it took to converge:
+    return solver_control.last_step ();
+  }
+
+
+  // @sect4{EigenvalueProblem::output_results}
+
+  // This is the last significant function of this program. It uses the
+  // DataOut class to generate graphical output from the eigenfunctions for
+  // later visualization. It works as in many of the other tutorial programs.
+  //
+  // The whole collection of functions is then output as a single VTK file.
+  template <int dim>
+  void EigenvalueProblem<dim>::output_results () const
+  {
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+
+    for (unsigned int i=0; i<eigenfunctions.size(); ++i)
+      data_out.add_data_vector (eigenfunctions[i],
+                                std::string("eigenfunction_") +
+                                Utilities::int_to_string(i));
+
+    // The only thing worth discussing may be that because the potential is
+    // specified as a function expression in the input file, it would be nice
+    // to also have it as a graphical representation along with the
+    // eigenfunctions. The process to achieve this is relatively
+    // straightforward: we build an object that represents $V(\mathbf x)$ and
+    // then we interpolate this continuous function onto the finite element
+    // space. The result we also attach to the DataOut object for
+    // visualization.
+    Vector<double> projected_potential (dof_handler.n_dofs());
+    {
+      FunctionParser<dim> potential;
+      potential.initialize (FunctionParser<dim>::default_variable_names (),
+                            parameters.get ("Potential"),
+                            typename FunctionParser<dim>::ConstMap());
+      VectorTools::interpolate (dof_handler, potential, projected_potential);
+    }
+    data_out.add_data_vector (projected_potential, "interpolated_potential");
+
+    data_out.build_patches ();
+
+    std::ofstream output ("eigenvectors.vtk");
+    data_out.write_vtk (output);
+  }
+
+
+  // @sect4{EigenvalueProblem::run}
+
+  // This is the function which has the top-level control over everything. It
+  // is almost exactly the same as in step-4:
+  template <int dim>
+  void EigenvalueProblem<dim>::run ()
+  {
+    make_grid_and_dofs ();
+
+    std::cout << "   Number of active cells:       "
+              << triangulation.n_active_cells ()
+              << std::endl
+              << "   Number of degrees of freedom: "
+              << dof_handler.n_dofs ()
+              << std::endl;
+
+    assemble_system ();
+
+    const unsigned int n_iterations = solve ();
+    std::cout << "   Solver converged in " << n_iterations
+              << " iterations." << std::endl;
+
+    output_results ();
+
+    std::cout << std::endl;
+    for (unsigned int i=0; i<eigenvalues.size(); ++i)
+      std::cout << "      Eigenvalue " << i
+                << " : " << eigenvalues[i]
+                << std::endl;
+  }
+}
+
+// @sect3{The <code>main</code> function}
+int main (int argc, char **argv)
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step36;
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
+
+
+      // This program can only be run in serial. Otherwise, throw an exception.
+      AssertThrow(Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD)==1,
+                  ExcMessage("This program can only be run in serial, use ./step-36"));
+
+      EigenvalueProblem<2> problem ("step-36.prm");
+      problem.run ();
+    }
+
+  // All the while, we are watching out if any exceptions should have been
+  // generated. If that is so, we panic...
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  // If no exceptions are thrown, then we tell the program to stop monkeying
+  // around and exit nicely:
+  std::cout << std::endl
+            << "   Job done."
+            << std::endl;
+
+  return 0;
+}
diff --git a/examples/step-36/step-36.prm b/examples/step-36/step-36.prm
new file mode 100644
index 0000000..9280d72
--- /dev/null
+++ b/examples/step-36/step-36.prm
@@ -0,0 +1,11 @@
+# Listing of Parameters
+# ---------------------
+# The number of times the 1-cell coarse mesh should be refined globally for
+# our computations.
+set Global mesh refinement steps         = 5
+
+# The number of eigenvalues/eigenfunctions to be computed.
+set Number of eigenvalues/eigenfunctions = 5
+
+# A functional description of the potential.
+set Potential                            = 0
diff --git a/examples/step-37/CMakeLists.txt b/examples/step-37/CMakeLists.txt
new file mode 100644
index 0000000..6a4158d
--- /dev/null
+++ b/examples/step-37/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-37 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-37")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_LAPACK)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_LAPACK = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-37/doc/builds-on b/examples/step-37/doc/builds-on
new file mode 100644
index 0000000..42c2846
--- /dev/null
+++ b/examples/step-37/doc/builds-on
@@ -0,0 +1 @@
+step-16
diff --git a/examples/step-37/doc/intro.dox b/examples/step-37/doc/intro.dox
new file mode 100644
index 0000000..1cd4efa
--- /dev/null
+++ b/examples/step-37/doc/intro.dox
@@ -0,0 +1,475 @@
+<br>
+
+<i>
+This program was contributed by Katharina Kormann and Martin
+Kronbichler.
+
+The algorithm for the matrix-vector product is based on the article <a
+href="http://dx.doi.org/10.1016/j.compfluid.2012.04.012">A generic
+interface for parallel cell-based finite element operator
+application</a> by Martin Kronbichler and Katharina Kormann, Computers
+and Fluids 63:135–147, 2012, and the paper "Parallel finite element operator
+application: Graph partitioning and coloring" by Katharina
+Kormann and Martin Kronbichler in: Proceedings of the 7th IEEE
+International Conference on e-Science, 2011.  </i>
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This example shows how to implement a matrix-free method, that is, a
+method that does not explicitly store the matrix elements, for a
+second-order Poisson equation with variable coefficients on a
+hypercube. The elliptic equation will be solved with a multigrid
+method.
+
+The major motivation for matrix-free methods is the fact that today
+access to main memory (i.e., for objects that don't fit in the cache)
+has become the bottleneck in scientific computing: To perform a
+matrix-vector product, modern CPUs spend far more time waiting for
+data to arrive from memory than on actually doing the floating point
+multiplications and additions. Thus, if we could substitute looking up
+matrix elements in memory by re-computing them — or rather, the
+operator represented by these entries —, we may win in terms of
+overall run-time (even if this requires a significant number of
+additional floating point operations). That said, to realize this with
+a trivial implementation is not enough and one needs to really look at
+what it takes to make this happen. This tutorial program (and the
+papers referenced above) show how one can implement such a scheme and
+demonstrates the speedup that can be obtained.
+
+
+<h3>The test case</h3>
+
+In this example, we consider the Poisson problem @f{eqnarray*} -
+\nabla \cdot a(\mathbf x) \nabla u &=& 1, \\ u &=& 0 \quad \text{on}\
+\partial \Omega @f} where $a(\mathbf x)$ is a variable coefficient.
+Below, we explain how to implement a matrix-vector product for this
+problem without explicitly forming the matrix. The construction can,
+of course, be done in a similar way for other equations as well.
+
+We choose as domain $\Omega=[0,1]^3$ and $a(\mathbf x)=\frac{1}{0.05 +
+2\|\mathbf x\|^2}$. Since the coefficient is symmetric around the
+origin but the domain is not, we will end up with a non-symmetric
+solution.
+
+
+<h3>Matrix-vector product implementation</h3>
+
+In order to find out how we can write a code that performs a matrix-vector
+product, but does not need to store the matrix elements, let us start at
+looking how a finite element matrix <i>A</i> is assembled:
+ at f{eqnarray*}
+A = \sum_{\mathrm{cell}=1}^{\mathrm{n\_cells}}
+P_{\mathrm{cell,{loc-glob}}}^T A_{\mathrm{cell}} P_{\mathrm{cell,{loc-glob}}}.
+ at f}
+In this formula, the matrix <i>P</i><sub>cell,loc-glob</sub> is a rectangular
+matrix that defines the index mapping from local degrees of freedom in the
+current cell to the global degrees of freedom. The information from which this
+operator can be built is usually encoded in the <code>local_dof_indices</code>
+variable we have always used in the assembly of matrices. Moreover,
+<i>A</i><sub>cell</sub> denotes the cell-operation associated with <i>A</i>.
+
+If we are to perform a matrix-vector product, we can hence use that
+ at f{eqnarray*}
+y &=& A\cdot u = \left(\sum_{\text{cell}=1}^{\mathrm{n\_cells}} P_\mathrm{cell,{loc-glob}}^T
+A_\mathrm{cell} P_\mathrm{cell,{loc-glob}}\right) \cdot u
+\\
+&=& \sum_{\mathrm{cell}=1}^{\mathrm{n\_cells}} P_\mathrm{cell,{loc-glob}}^T
+A_\mathrm{cell} u_\mathrm{cell}
+\\
+&=& \sum_{\mathrm{cell}=1}^{\mathrm{n\_cells}} P_\mathrm{cell,{loc-glob}}^T
+v_\mathrm{cell},
+ at f}
+where <i>u</i><sub>cell</sub> are the values of <i>u</i> at the degrees of freedom
+of the respective cell, and
+<i>v</i><sub>cell</sub>=<i>A</i><sub>cell</sub><i>u</i><sub>cell</sub>
+correspondingly for the result.
+A naive attempt to implement the local action of the Laplacian would hence be
+to use the following code:
+ at code
+MatrixFree<dim>::vmult (Vector<double>       &dst,
+		        const Vector<double> &src) const
+{
+  dst = 0;
+
+  QGauss<dim>  quadrature_formula(fe.degree+1);
+  FEValues<dim> fe_values (fe, quadrature_formula,
+                           update_gradients | update_JxW_values|
+			   update_quadrature_points);
+
+  const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int   n_q_points    = quadrature_formula.size();
+
+  FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+  Vector<double>       cell_src (dofs_per_cell),
+   		       cell_dst (dofs_per_cell);
+  const Coefficient<dim> coefficient;
+  std::vector<double> coefficient_values(n_q_points);
+
+  std::vector<unsigned int> local_dof_indices (dofs_per_cell);
+
+  typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      cell_matrix = 0;
+      fe_values.reinit (cell);
+      coefficient.value_list(fe_values.get_quadrature_points(),
+                             coefficient_values);
+
+      for (unsigned int q=0; q<n_q_points; ++q)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+	  for (unsigned int j=0; j<dofs_per_cell; ++j)
+            cell_matrix(i,j) += (fe_values.shape_grad(i,q) *
+                                 fe_values.shape_grad(j,q) *
+                                 fe_values.JxW(q)*
+				 coefficient_values[q]);
+
+      cell->get_dof_indices (local_dof_indices);
+
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        cell_src(i) = src(local_dof_indices(i));
+
+      cell_matrix.vmult (cell_dst, cell_src);
+
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dst(local_dof_indices(i)) += cell_dst;
+    }
+}
+ at endcode
+
+Here we neglected boundary conditions as well as any hanging nodes we may
+have, though neither would be very difficult to include using the
+ConstraintMatrix class. Note how we first generate the local matrix in the
+usual way as a sum over all quadrature points for each local matrix entry.
+To form the actual product as expressed in the above formula, we
+extract the values of <code>src</code> of the cell-related degrees of freedom
+(the action of <i>P</i><sub>cell,loc-glob</sub>), multiply by the local matrix
+(the action of <i>A</i><sub>cell</sub>), and finally add the result to the
+destination vector <code>dst</code> (the action of
+<i>P</i><sub>cell,loc-glob</sub><sup>T</sup>, added over all the elements). It
+is not more difficult than that, in principle.
+
+While this code is completely correct, it is very slow. For every cell, we
+generate a local matrix, which takes three nested loops with loop length equal
+to the number of local degrees of freedom to compute. The
+multiplication itself is then done by two nested loops, which means that it
+is much cheaper.
+
+One way to improve this is to realize that conceptually the local
+matrix can be thought of as the product of three matrices,
+ at f{eqnarray*}
+A_\mathrm{cell} = B_\mathrm{cell}^T D_\mathrm{cell} B_\mathrm{cell},
+ at f}
+where for the example of the Laplace operator the (<i>q</i>*dim+<i>d,i</i>)-th
+element of <i>B</i><sub>cell</sub> is given by
+<code>fe_values.shape_grad(i,q)[d]</code>. The matrix consists of
+<code>dim*n_q_points</code> rows and @p dofs_per_cell columns). The matrix
+<i>D</i><sub>cell</sub> is diagonal and contains the values
+<code>fe_values.JxW(q) * coefficient_values[q]</code> (or, rather, @p
+dim copies of each of these values). This kind of representation of
+finite element matrices can often be found in the engineering literature.
+
+When the cell-based matrix is applied to a vector @f{eqnarray*}
+A_\mathrm{cell}\cdot u_\mathrm{cell} = B_\mathrm{cell}^T
+D_\mathrm{cell} B_\mathrm{cell} \cdot u_\mathrm{cell}, @f} one would
+then never form the matrix-matrix products, but rather multiply with
+the vector from right to left so that only three successive
+matrix-vector products are formed.  This removed the three nested
+loops in the calculation of the local matrix. What happens is as
+follows: We first transform the vector of values on the local dofs to
+a vector of gradients on the quadrature points. In the second loop, we
+multiply these gradients by the integration weight. The third loop
+applies the second gradient (in transposed form), so that we get back
+to a vector of (Laplacian) values on the cell dofs.  This reduces the
+complexity of the work on one cell from something like $\mathcal
+{O}(\mathrm{dofs\_per\_cell}^3)$ to $\mathcal
+{O}(\mathrm{dofs\_per\_cell}^2)$.
+
+The bottleneck in the above code is the operations done by the call to
+FEValues::reinit for every <code>cell</code>, which take about as much time as the
+other steps together (at least if the mesh is unstructured; deal.II can
+recognize that the gradients are often unchanged on structured meshes). That
+is certainly not ideal and we would like to do better than this. What the
+reinit function does is to calculate the gradient in real space by
+transforming the gradient on the reference cell using the Jacobian of the
+transformation from real to reference cell. This is done for each basis
+function on the cell, for each quadrature point. The Jacobian does not depend
+on the basis function, but it is different on different quadrature points in
+general. If you only build the matrix once as we've done in all
+previous tutorial programs, there is nothing one can do about the need
+to call FEValues::reinit on every cell since this transformation has
+to be done when we want to compute the local matrix elements.
+
+However, in a matrix-free implementation, we are not interested in
+applying the matrix only once. Rather, in iterative solvers, we need
+to expect that we have to apply the matrix many times, and so we can
+think about whether we may be able to cache something between
+different applications. On the other hand, we realize that we must not
+cache too much data since otherwise we get back to the situation where
+memory access becomes the dominating factor.
+
+The trick is now to factor out the Jacobian transformation and first
+apply the gradient on the reference cell only. That transforms the vector of
+values on the local dofs to a vector of gradients on the quadrature
+points. There, we first apply the Jacobian that we factored out from the
+gradient, then we apply the weights of the quadrature, and we apply the
+transposed Jacobian for preparing the third loop which again uses the
+gradients on the unit cell.
+
+Let us again write this in terms of matrices. Let the matrix
+<i>B</i><sub>cell</sub> denote the cell-related gradient matrix, with each row
+containing the values on the quadrature points. It is constructed by a
+matrix-matrix product as
+ at f{eqnarray*}
+B_\mathrm{cell} = J_\mathrm{cell} B_\mathrm{ref\_cell},
+ at f}
+where <i>B</i><sub>ref_cell</sub> denotes the gradient on the reference cell
+and <i>J</i><sub>cell</sub> denotes the Jacobian transformation from unit to
+real cell (in the language of transformations, the operation represented by
+<i>J</i><sub>cell</sub> represents a covariant
+transformation). <i>J</i><sub>cell</sub> is block-diagonal, and the blocks
+size is equal to the dimension of the problem. Each diagonal block is the
+Jacobian transformation that goes from the reference cell to the real cell.
+
+Putting things together, we find that
+ at f{eqnarray*}
+A_\mathrm{cell} = B_\mathrm{cell}^T D B_\mathrm{cell}
+		= B_\mathrm{ref\_cell}^T J_\mathrm{cell}^T
+		  D_\mathrm{cell}
+		  J_\mathrm{cell} B_\mathrm{ref\_cell},
+ at f}
+so we calculate the product (starting the local product from the right)
+ at f{eqnarray*}
+v_\mathrm{cell} = B_\mathrm{ref\_cell}^T J_\mathrm{cell}^T D J_\mathrm{cell}
+B_\mathrm{ref\_cell} u_\mathrm{cell}, \quad
+v = \sum_{\mathrm{cell}=1}^{\mathrm{n\_cells}} P_\mathrm{cell,{loc-glob}}^T
+v_\mathrm{cell}.
+ at f}
+ at code
+...
+  FEValues<dim> fe_values_reference (fe, quadrature_formula,
+                           	     update_gradients);
+  Triangulation<dim> reference_cell;
+  GridGenerator::hyper_cube(reference_cell, 0., 1.);
+  fe_values_reference.reinit (reference_cell.begin());
+
+  FEValues<dim> fe_values (fe, quadrature_formula,
+                           update_inverse_jacobians | update_JxW_values |
+			   update_quadrature_points);
+
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+      coefficient.value_list(fe_values.get_quadrature_points(),
+                             coefficient_values);
+
+      cell->get_dof_indices (local_dof_indices);
+
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        cell_src(i) = src(local_dof_indices(i));
+
+      temp_vector = 0;
+      for (unsigned int q=0; q<n_q_points; ++q)
+        for (unsigned int d=0; d<dim; ++d)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+	    temp_vector(q*dim+d) +=
+	      fe_values_reference.shape_grad(i,q)[d] * cell_src(i);
+
+      for (unsigned int q=0; q<n_q_points; ++q)
+        {
+          // apply the transposed inverse Jacobian of the mapping
+	  Tensor<1,dim> temp;
+	  for (unsigned int d=0; d<dim; ++d)
+	    temp[d] = temp_vector(q*dim+d);
+	  for (unsigned int d=0; d<dim; ++d)
+	    {
+	      double sum = 0;
+	      for (unsigned int e=0; e<dim; ++e)
+	      	sum += fe_values.inverse_jacobian(q)[e][d] *
+	       	       temp[e];
+	      temp_vector(q*dim+d) = sum;
+	    }
+
+          // multiply by coefficient and integration weight
+	  for (unsigned int d=0; d<dim; ++d)
+	    temp_vector(q*dim+d) *= fe_values.JxW(q) * coefficient_values[q];
+
+          // apply the inverse Jacobian of the mapping
+	  for (unsigned int d=0; d<dim; ++d)
+	    temp[d] = temp_vector(q*dim+d);
+	  for (unsigned int d=0; d<dim; ++d)
+	    {
+	      double sum = 0;
+	      for (unsigned int e=0; e<dim; ++e)
+	      	sum += fe_values.inverse_jacobian(q)[d][e] *
+	    	       temp[e];
+	      temp_vector(q*dim+d) = sum;
+	    }
+        }
+
+      cell_dst = 0;
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        for (unsigned int q=0; q<n_q_points; ++q)
+          for (unsigned int d=0; d<dim; ++d)
+	    cell_dst(i) += fe_values_reference.shape_grad(i,q)[d] *
+	               	   temp_vector(q*dim+d);
+
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dst(local_dof_indices(i)) += cell_dst(i);
+    }
+}
+ at endcode
+
+Note how we create an additional FEValues object for the reference cell
+gradients and how we initialize it to the reference cell. The actual
+derivative data is then applied by the inverse, transposed Jacobians (deal.II
+calls the Jacobian matrix from unit to real cell inverse_jacobian, because the
+transformation direction in deal.II is from real to unit cell).
+
+Finally, we are using tensor product basis functions and now that we have
+separated out the gradient on the reference cell <i>B</i><sub>ref_cell</sub>,
+we can exploit the tensor-product structure to further reduce the
+complexity. We illustrate this in two space dimensions, but the same technique
+can be used in higher dimensions. On the reference cell, the basis functions
+are of the tensor product form $\phi(x,y,z) = \varphi_i(x) \varphi_j(y)$. The
+part of the matrix <i>B</i><sub>ref_cell</sub> that computes the first
+component has the form $B_\mathrm{sub\_cell}^x = B_\mathrm{grad,x} \otimes
+B_\mathrm{val,y}$, where <i>B</i><sub>grad,x</sub> and
+<i>B</i><sub>val,y</sub> contain the evaluation of all the 1D basis functions
+on all the 1D quadrature points. Forming a matrix <i>U</i> with <i>U(j,i)</i>
+containing the coefficient belonging to basis function $\varphi_i(x)
+\varphi_j(y)$, we get $(B_\mathrm{grad,x} \otimes
+B_\mathrm{val,y})u_\mathrm{cell} = B_\mathrm{val,y} U B_\mathrm{grad,x}$. This
+reduces the complexity for computing this product from $p^4$ to $2 p^3$, where
+<i>p</i>-1 is the degree of the finite element (i.e., equivalently,
+<i>p</i> is the number of shape functions in each coordinate
+direction), or $p^{2d}$ to $d p^{d+1}$ in general.
+
+Implementing a matrix-free and cell-based finite element operator requires a
+somewhat different design compared to the usual matrix assembly codes shown in
+previous tutorial programs. The data structures for doing this are the
+MatrixFree class that collects all data and issues a (parallel) loop over all
+cells and the FEEvaluation class that evaluates finite element basis functions
+by making use of the tensor product structure.
+
+
+The implementation of the matrix-free matrix-vector product shown in this
+tutorial is slower than a matrix-vector product using a sparse matrix for
+linear elements, but faster for all higher order elements thanks to the
+reduced complexity due to the tensor product structure and due to less memory
+transfer during computations. The impact of reduced memory transfer is
+particularly beneficial when working on a multicore processor where several
+processing units share access to memory. In that case, an algorithm which is
+computation bound will show almost perfect parallel speedup, whereas an
+algorithm that is bound by memory transfer might not achieve similar speedup
+(even when the work is perfectly parallel and one could expect perfect scaling
+like in sparse matrix-vector products). An additional gain with this
+implementation is that we do not have to build the sparse matrix itself, which
+can also be quite expensive depending on the underlying differential
+equation. Moreover, the above framework is simple to generalize to nonlinear
+operations, as we demonstrate in step-48.
+
+
+<h3>Combination with multigrid</h3>
+
+Above, we have gone to significant lengths to implement a matrix-vector
+product that does not actually store the matrix elements. In many user codes,
+however, one wants more than just performing some number of
+matrix-vector products — one wants to do as little of these operations
+as possible when solving linear equation systems. In theory, we could use the
+CG method without preconditioning; however, that would not be very
+efficient. Rather, one uses preconditioners for improving speed. On the other
+hand, most of the more frequently used preconditioners such as SSOR, ILU or
+algebraic multigrid (AMG) cannot be used here because their
+implementation requires knowledge of the elements of the system matrix.
+
+One solution is to use multigrid methods as shown in step-16. They are known
+to be very fast, and they are suitable for our purpose since they can be
+designed based purely on matrix-vector products. All one needs to do is to
+find a smoother that works with matrix-vector products only (our choice
+requires knowledge of the diagonal entries of the matrix, though). One such
+candidate would be a damped Jacobi iteration, but that is often not
+sufficiently good in damping high-frequency errors.  A Chebyshev
+preconditioner, eventually, is what we use here. It can be seen as an
+extension of the Jacobi method by using Chebyshev polynomials. With degree
+zero, the Jacobi method with optimal damping parameter is retrieved, whereas
+higher order corrections improve the smoothing properties if some parameters
+are suitably chosen. The effectiveness of Chebyshev smoothing in multigrid has
+been demonstrated, e.g., in the article <a
+href="http://www.sciencedirect.com/science/article/pii/S0021999103001943">
+<i>M. Adams, M. Brezina, J. Hu, R. Tuminaro. Parallel multigrid smoothers:
+polynomial versus Gauss–Seidel, J. Comput. Phys. 188:593–610,
+2003</i> </a>. This publication also identifies one more advantage of
+Chebyshev smoothers that we exploit here, namely that they are easy to
+parallelize, whereas SOR/Gauss–Seidel smoothing relies on substitutions,
+for which a naive parallelization works on diagonal sub-blocks of the matrix,
+thereby decreases efficiency (for more detail see e.g. Y. Saad,
+Iterative Methods for Sparse Linear Systems, SIAM, 2nd edition, 2003, chapters
+11 & 12).
+
+The implementation into the multigrid framework is then straightforward. The
+multigrid implementation in this program is based on a simplified version of
+step-16 that disregards adaptivity.
+
+
+<h3>Using CPU-dependent instructions (vectorization)</h3>
+
+The computational kernels for evaluation in FEEvaluation are written in a way
+to optimally use computational resources. Indeed, they operate not on double
+data types, but something we call VectorizedArray (check e.g. the return type
+of FEEvaluationBase::get_value, which is VectorizedArray for a scalar element
+and a Tensor of VectorizedArray for a vector finite element). VectorizedArray
+is a short array of doubles or float whose length depends on the particular
+computer system in use. For example, systems based on x86-64 support the
+streaming SIMD extensions (SSE), where the processor's vector units can
+process two doubles (or four single-precision floats) by one CPU
+instruction. Newer processors with support for the so-called advanced vector
+extensions (AVX) with 256 bit operands can use four doubles and eight floats,
+respectively. Vectorization is a single-instruction/multiple-data (SIMD)
+concept, that is, one CPU instruction is used to process multiple data values
+at once. Often, finite element programs do not use vectorization explicitly as
+the benefits of this concept are only in arithmetic intensive operations. The
+bulk of typical finite element workloads are memory bandwidth limited
+(operations on sparse matrices and vectors) where the additional computational
+power is useless.
+
+Behind the scenes, optimized BLAS packages might heavily rely on
+vectorization, though. Also, optimizing compilers might automatically
+transform loops involving standard code into more efficient vectorized
+form. However, the data flow must be very regular in order for compilers to
+produce efficient code. For example, already the automatic vectorization of
+the prototype operation that benefits from vectorization, matrix-matrix
+products, fails on most compilers (as of writing this tutorial in early 2012,
+neither gcc-4.6 nor the Intel compiler v. 12 manage to produce useful
+vectorized code for the FullMatrix::mmult function, and not even on the
+simpler case where the matrix bounds are compile-time constants instead of
+run-time constants as in FullMatrix::mmult). The main reason for this is that
+the information to be processed at the innermost loop (that is where
+vectorization is applied) is not necessarily a multiple of the vector length,
+leaving parts of the resources unused. Moreover, the data that can potentially
+be processed together might not be laid out in a contiguous way in memory or
+not with the necessary alignment to address boundaries that are needed by the
+processor. Or the compiler might not be able to prove that data arrays do not
+overlap when loading several elements at once.
+
+In the matrix-free implementation in deal.II, we have therefore chosen to
+apply vectorization at the level which is most appropriate for finite element
+computations: The cell-wise computations are typically exactly the same for
+all cells (except for reading from and writing to vectors), and hence SIMD can
+be used to process several cells at once. In all what follows, you can think
+of a VectorizedArray to hold data from several cells. Remember that it is not
+related to the spatial dimension and the number of elements e.g. in a Tensor
+or Point.
+
+Note that vectorization depends on the CPU that is used for deal.II. In order
+to generate the fastest kernels of FEEvaluation for your computer, you should
+compile deal.II with the so-called <i>native</i> processor variant. When using
+the gcc compiler, it can be enabled by setting the variable
+<tt>CMAKE_CXX_FLAGS</tt> to <tt>"-march=native"</tt> in the cmake build
+settings (on the command line, specify
+<tt>-DCMAKE_CXX_FLAGS="-march=native"</tt>, see the deal.II README for more
+information). Similar options exist for other compilers.
+
diff --git a/examples/step-37/doc/kind b/examples/step-37/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-37/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-37/doc/results.dox b/examples/step-37/doc/results.dox
new file mode 100644
index 0000000..0244754
--- /dev/null
+++ b/examples/step-37/doc/results.dox
@@ -0,0 +1,239 @@
+<h1>Results</h1>
+
+<h3>Program output</h3>
+
+Since this example solves the same problem as step-5 (except for
+a different coefficient), there is little to say about the
+solution. We show a picture anyway, illustrating the size of the
+solution through both isocontours and volume rendering:
+
+<img src="http://www.dealii.org/images/steps/developer/step-37.solution.png" alt="">
+
+Of more interest is to evaluate some aspects of the multigrid solver.
+When we run this program in 2D for quadratic ($Q_2$) elements, we get the
+following output:
+ at code
+Cycle 0
+Number of degrees of freedom: 81
+System matrix memory consumption:     0.008982 MB.
+Multigrid objects memory consumption: 0.02617 MB.
+Total setup time               (wall) 0.001811s
+Time solve (5 iterations)  (CPU/wall) 0s/0.0002651s
+
+Cycle 1
+Number of degrees of freedom: 289
+System matrix memory consumption:     0.01817 MB.
+Multigrid objects memory consumption: 0.05779 MB.
+Total setup time               (wall) 0.001223s
+Time solve (5 iterations)  (CPU/wall) 0s/0.000926s
+
+Cycle 2
+Number of degrees of freedom: 1089
+System matrix memory consumption:     0.05286 MB.
+Multigrid objects memory consumption: 0.1581 MB.
+Total setup time               (wall) 0.003045s
+Time solve (6 iterations)  (CPU/wall) 0.012s/0.003393s
+
+Cycle 3
+Number of degrees of freedom: 4225
+System matrix memory consumption:     0.1957 MB.
+Multigrid objects memory consumption: 0.5228 MB.
+Total setup time               (wall) 0.008561s
+Time solve (6 iterations)  (CPU/wall) 0.02s/0.01133s
+
+Cycle 4
+Number of degrees of freedom: 16641
+System matrix memory consumption:     0.7343 MB.
+Multigrid objects memory consumption: 1.925 MB.
+Total setup time               (wall) 0.02938s
+Time solve (6 iterations)  (CPU/wall) 0.068s/0.03312s
+
+Cycle 5
+Number of degrees of freedom: 66049
+System matrix memory consumption:     2.856 MB.
+Multigrid objects memory consumption: 7.435 MB.
+Total setup time               (wall) 0.1128s
+Time solve (6 iterations)  (CPU/wall) 0.228s/0.09577s
+
+Cycle 6
+Number of degrees of freedom: 263169
+System matrix memory consumption:     11.28 MB.
+Multigrid objects memory consumption: 29.3 MB.
+Total setup time               (wall) 0.4553s
+Time solve (6 iterations)  (CPU/wall) 1.272s/0.3955s
+ at endcode
+
+As in step-16, we see that the number of CG iterations remains constant with
+increasing number of degrees of freedom. We can also see that the various
+objects we have to store for the multigrid method on the individual levels of
+our mesh together make up more than twice as much as the matrix on the finest
+level. For the present example, about half the memory consumption of the
+multigrid objects are the level transfer matrices, and the other half is
+consumed by the matrix-free objects (and there, mainly the indices and the
+variable coefficient).
+
+Not much changes if we run the program in three spatial dimensions, with the
+exception that the multilevel objects now take up some more memory (because
+the level transfer matrices are denser) and the computing times are somewhat
+larger:
+
+ at code
+Cycle 0
+Number of degrees of freedom: 125
+System matrix memory consumption:     0.01093 MB.
+Multigrid objects memory consumption: 0.03094 MB.
+Total setup time               (wall) 0.002481s
+Time solve (5 iterations)  (CPU/wall) 0s/0.000334s
+
+Cycle 1
+Number of degrees of freedom: 729
+System matrix memory consumption:     0.04105 MB.
+Multigrid objects memory consumption: 0.1274 MB.
+Total setup time               (wall) 0.004471s
+Time solve (5 iterations)  (CPU/wall) 0.004s/0.001979s
+
+Cycle 2
+Number of degrees of freedom: 4913
+System matrix memory consumption:     0.2821 MB.
+Multigrid objects memory consumption: 0.8048 MB.
+Total setup time               (wall) 0.01651s
+Time solve (4 iterations)  (CPU/wall) 0.036s/0.01295s
+
+Cycle 3
+Number of degrees of freedom: 35937
+System matrix memory consumption:     1.948 MB.
+Multigrid objects memory consumption: 5.734 MB.
+Total setup time               (wall) 0.1072s
+Time solve (5 iterations)  (CPU/wall) 0.16s/0.0709s
+
+Cycle 4
+Number of degrees of freedom: 274625
+System matrix memory consumption:     14.49 MB.
+Multigrid objects memory consumption: 44.41 MB.
+Total setup time               (wall) 0.8173s
+Time solve (5 iterations)  (CPU/wall) 1.52s/0.5093s
+
+Cycle 5
+Number of degrees of freedom: 2146689
+System matrix memory consumption:     115.9 MB.
+Multigrid objects memory consumption: 342.6 MB.
+Total setup time               (wall) 6.387s
+Time solve (5 iterations)  (CPU/wall) 12.45s/3.767s
+ at endcode
+
+
+<h3>Comparison with a sparse matrix</h3>
+
+In order to understand the capabilities of the matrix-free implementation, we
+compare the performance on the 3d example above with a SparseMatrix
+implementation and we measure the computation times for both initialization of
+the problem (distribute DoFs, setup and assemble matrices, setup multigrid
+structures) and the actual solution for the matrix-free variant and the
+variant based on sparse matrices. We base the preconditioner on float
+numbers and the actual matrix and vectors on double numbers, as shown
+above. Tests are run on an Intel Core i7-2620M notebook processor (two cores
+and <a href="http://en.wikipedia.org/wiki/Advanced_Vector_Extensions">AVX</a>
+support, i.e., four operations on doubles can be done with one CPU
+instruction, which is heavily used in FEEvaluation) and optimized mode. The
+example makes use of multithreading, so both cores are actually used.
+
+<table align="center" border="1">
+  <tr>
+    <th> </th>
+    <th colspan="2">Sparse matrix</th>
+    <th colspan="2">Matrix-free implementation</th>
+  </tr>
+  <tr>
+    <th>n_dofs</th>
+    <th>Setup + assemble</th>
+    <th> Solve </th>
+    <th>Setup + assemble</th>
+    <th> Solve </th>
+  </tr>
+  <tr>
+    <td align="right">125</td>
+    <td align="center">0.0048s</td>
+    <td align="center">0.00075s</td>
+    <td align="center">0.0025s</td>
+    <td align="center">0.00033s</td>
+  </tr>
+  <tr>
+    <td align="right">729</td>
+    <td align="center">0.014s</td>
+    <td align="center">0.0022s</td>
+    <td align="center">0.0026s</td>
+    <td align="center">0.0018s</td>
+  </tr>
+  <tr>
+    <td align="right">4,913</td>
+    <td align="center">0.10s</td>
+    <td align="center">0.012s</td>
+    <td align="center">0.017s</td>
+    <td align="center">0.013s</td>
+  </tr>
+  <tr>
+    <td align="right">35,937</td>
+    <td align="center">0.80s</td>
+    <td align="center">0.14s</td>
+    <td align="center">0.11s</td>
+    <td align="center">0.071s</td>
+  </tr>
+  <tr>
+    <td align="right">274,625</td>
+    <td align="center">5.93s</td>
+    <td align="center">1.05s</td>
+    <td align="center">0.82s</td>
+    <td align="center">0.51s</td>
+  </tr>
+  <tr>
+    <td align="right">2,146,689</td>
+    <td align="center">46.7s</td>
+    <td align="center">8.44s</td>
+    <td align="center">6.39s</td>
+    <td align="center">3.77s</td>
+  </tr>
+</table>
+
+The table clearly shows that the matrix-free implementation is twice as fast
+for the solver, and more than six times as fast when it comes to
+initialization costs. As the problem size is made a factor 8 larger, we note
+that the times usually go up by a factor eight, too (as the solver iterations
+are constant at 5). There are two deviations. The first is in the sparse
+matrix between 5k and 36k degrees of freedom, where the time increases by a
+factor 12. This is the threshold when the cache in the processor can no longer
+hold all data necessary for the matrix-vector products and all matrix elements
+must be fetched from main memory. The second deviation is the times for the
+matrix-free solve which increase by less than a factor 8. This is because of
+more parallelism from more cells, exploited by the (involved) dynamic task
+scheduling approach taken in the cell loop of the MatrixFree class. Note
+that about 30% of the time in the
+matrix-free solver is spent on restriction and prolongation, which use sparse
+matrices. So the speedup could be even better if all parts were done
+efficiently.
+
+Of course, this picture does not necessarily translate to all cases, as there
+are problems where knowledge of matrix entries enables much better solvers (as
+happens when the coefficient is varying more strongly than in the above
+example). Moreover, it also depends on the computer system. The present system
+has good memory performance, so sparse matrices perform comparably
+well. Nonetheless, the matrix-free implementation gives a nice speedup already
+for the <i>Q</i><sub>2</sub> elements used in this example. This becomes
+particularly apparent for time-dependent or nonlinear problems where sparse
+matrices would need to be reassembled over and over again, which becomes much
+easier with this class. And of course, thanks to the better complexity of the
+products, the method gains increasingly larger advantages when the order of the
+elements increases (the matrix-free implementation has costs
+4<i>d</i><sup>2</sup><i>p</i> per degree of freedom, compared to
+2<i>p<sup>d</sup></i> for the sparse matrix, so it will win anyway for order 4
+and higher in 3d).
+
+<h3>Possibilities for extensions</h3>
+
+Above, we have shown figures for second-order finite elements. Our
+implementation gains more compared to sparse matrices if higher order elements
+are used. However, FE_Q elements with equidistant nodes are badly conditioned
+if the order increases. In this case, the smoother and the multigrid solver
+break down. Node clustering close to the element boundaries resolves this
+problem (and the multigrid solver converges in 5 or 6 iterations also for very
+high order). Elements with this properties are the Gauss-Lobatto FE_Q
+elements, which are presented in step-48.
diff --git a/examples/step-37/doc/tooltip b/examples/step-37/doc/tooltip
new file mode 100644
index 0000000..83658ea
--- /dev/null
+++ b/examples/step-37/doc/tooltip
@@ -0,0 +1 @@
+Matrix-free methods. Multigrid. Fast assembly techniques.
diff --git a/examples/step-37/step-37.cc b/examples/step-37/step-37.cc
new file mode 100644
index 0000000..fd7af71
--- /dev/null
+++ b/examples/step-37/step-37.cc
@@ -0,0 +1,1149 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2009 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Katharina Kormann, Martin Kronbichler, Uppsala University, 2009-2012
+ */
+
+
+// First include the necessary files from the deal.II library.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/timer.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/grid_generator.h>
+
+#include <deal.II/multigrid/multigrid.h>
+#include <deal.II/multigrid/mg_transfer.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_coarse.h>
+#include <deal.II/multigrid/mg_smoother.h>
+#include <deal.II/multigrid/mg_matrix.h>
+
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+
+// This includes the data structures for the efficient implementation of
+// matrix-free methods or more generic finite element operators with the class
+// MatrixFree.
+#include <deal.II/matrix_free/matrix_free.h>
+#include <deal.II/matrix_free/fe_evaluation.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+
+namespace Step37
+{
+  using namespace dealii;
+
+
+  // To be efficient, the operations performed in the matrix-free
+  // implementation require knowledge of loop lengths at compile time, which
+  // are given by the degree of the finite element. Hence, we collect the
+  // values of the two template parameters that can be changed at one place in
+  // the code. Of course, one could make the degree of the finite element a
+  // run-time parameter by compiling the computational kernels for all degrees
+  // that are likely (say, between 1 and 6) and selecting the appropriate
+  // kernel at run time. Here, we simply choose second order $Q_2$ elements
+  // and choose dimension 3 as standard.
+  const unsigned int degree_finite_element = 2;
+  const unsigned int dimension = 3;
+
+
+  // @sect3{Equation data}
+
+  // We define a variable coefficient function for the Poisson problem. It is
+  // similar to the function in step-5 but we use the form $a(\mathbf
+  // x)=\frac{1}{0.05 + 2\|\bf x\|^2}$ instead of a discontinuous one. It is
+  // merely to demonstrate the possibilities of this implementation, rather
+  // than making much sense physically. We define the coefficient in the same
+  // way as functions in earlier tutorial programs. There is one new function,
+  // namely a @p value method with template argument @p number.
+  template <int dim>
+  class Coefficient : public Function<dim>
+  {
+  public:
+    Coefficient ()  : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    template <typename number>
+    number value (const Point<dim,number> &p,
+                  const unsigned int component = 0) const;
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+  };
+
+
+
+  // This is the new function mentioned above: Evaluate the coefficient for
+  // abstract type @p number. It might be just a usual double, but it can also
+  // be a somewhat more complicated type that we call VectorizedArray. This
+  // data type is essentially a short array of doubles as discussed in the
+  // introduction that holds data from several cells. For example, we evaluate
+  // the coefficient shown here not on a simple point as usually done, but we
+  // hand it a Point<dim,VectorizedArray<double> > point, which is actually a
+  // collection of two points in the case of SSE2. Do not confuse the entries
+  // in VectorizedArray<double> with the different coordinates of the
+  // point. Indeed, the data is laid out such that <code>p[0]</code> returns a
+  // VectorizedArray<double>, which in turn contains the x-coordinate for the
+  // first point and the second point. You may access the coordinates
+  // individually using e.g. <code>p[0][j]</code>, j=0,1, but it is
+  // recommended to define operations on a VectorizedArray as much as possible
+  // in order to make use of vectorized operations.
+  //
+  // In the function implementation, we assume that the number type overloads
+  // basic arithmetic operations, so we just write the code as usual. The
+  // standard functions @p value and value_list that are virtual functions
+  // contained in the base class are then computed from the templated function
+  // with double type, in order to avoid duplicating code.
+  template <int dim>
+  template <typename number>
+  number Coefficient<dim>::value (const Point<dim,number> &p,
+                                  const unsigned int /*component*/) const
+  {
+    return 1. / (0.05 + 2.*p.square());
+  }
+
+
+
+  template <int dim>
+  double Coefficient<dim>::value (const Point<dim>  &p,
+                                  const unsigned int component) const
+  {
+    return value<double>(p,component);
+  }
+
+
+
+  template <int dim>
+  void Coefficient<dim>::value_list (const std::vector<Point<dim> > &points,
+                                     std::vector<double>            &values,
+                                     const unsigned int              component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch (values.size(), points.size()));
+    Assert (component == 0,
+            ExcIndexRange (component, 0, 1));
+
+    const unsigned int n_points = points.size();
+    for (unsigned int i=0; i<n_points; ++i)
+      values[i] = value<double>(points[i],component);
+  }
+
+
+
+  // @sect3{Matrix-free implementation}
+
+  // The following class, called <code>LaplaceOperator</code>, implements the
+  // differential operator. For all practical purposes, it is a matrix, i.e.,
+  // you can ask it for its size (member functions <code>m(), n()</code>) and
+  // you can apply it to a vector (the various variants of the
+  // <code>vmult()</code> function). The difference to a real matrix of course
+  // lies in the fact that this class doesn't actually store the
+  // <i>elements</i> of the matrix, but only knows how to compute the action
+  // of the operator when applied to a vector.
+
+  // In this program, we want to make use of the data cache for finite element
+  // operator application that is integrated in deal.II. The main class that
+  // collects all data is called MatrixFree. It contains mapping information
+  // (Jacobians) and index relations between local and global degrees of
+  // freedom. It also contains constraints like the ones from Dirichlet
+  // boundary conditions (or hanging nodes, if we had any). Moreover, it can
+  // issue a loop over all cells in %parallel, where it makes sure that only
+  // cells are worked on that do not share any degree of freedom (this makes
+  // the loop thread-safe when writing into destination vectors). This is a
+  // more advanced strategy compared to the WorkStream class described in the
+  // @ref threads module that serializes operations that might not be
+  // thread-safe. Of course, to not destroy thread-safety, we have to be
+  // careful when writing into class-global structures.
+  //
+  // First comes the implementation of the matrix-free class. It provides some
+  // standard information we expect for matrices (like returning the
+  // dimensions of the matrix), it implements matrix-vector multiplications in
+  // several forms (transposed and untransposed), and it provides functions
+  // for initializing the structure with data. The class has three template
+  // arguments, one for the dimension (as many deal.II classes carry), one for
+  // the degree of the finite element (which we need to enable efficient
+  // computations through the FEEvaluation class), and one for the underlying
+  // scalar type. We want to use <code>double</code> numbers (i.e., double
+  // precision, 64-bit floating point) for the final matrix, but floats
+  // (single precision, 32-bit floating point numbers) for the multigrid level
+  // matrices (as that is only a preconditioner, and floats can be worked with
+  // twice as fast).
+  //
+  // In this class, we store the actual MatrixFree object, the variable
+  // coefficient that is evaluated at all quadrature points (so that we don't
+  // have to recompute it during matrix-vector products), and a vector that
+  // contains the diagonal of the matrix that we need for the multigrid
+  // smoother. We choose to let the user provide the diagonal in this program,
+  // but we could also integrate a function in this class to evaluate the
+  // diagonal. Unfortunately, this forces us to define matrix entries at two
+  // places, once when we evaluate the product and once for the diagonal, but
+  // the work is still much less than when we compute sparse matrices.
+  //
+  // As a sidenote, if we implemented several different operations on the same
+  // grid and degrees of freedom (like a mass matrix and a Laplace matrix), we
+  // would have to have two classes like the current one for each of the
+  // operators (maybe with a common base class). However, in that case, we
+  // would not store a MatrixFree object in this class to avoid doing the
+  // expensive work of precomputing everything MatrixFree stores
+  // twice. Rather, we would keep this object in the main class and simply
+  // store a reference.
+  //
+  // @note Note that storing values of type
+  // <code>VectorizedArray<number></code> requires care: Here, we use the
+  // deal.II table class which is prepared to hold the data with correct
+  // alignment. However, storing it in e.g.
+  // <code>std::vector<VectorizedArray<number> ></code> is not possible with
+  // vectorization: A certain alignment of the data with the memory address
+  // boundaries is required (essentially, a VectorizedArray of 16 bytes length
+  // as in SSE needs to start at a memory address that is divisible by
+  // 16). The table class (as well as the AlignedVector class it is based on)
+  // makes sure that this alignment is respected, whereas std::vector can in
+  // general not, which may lead to segmentation faults at strange places for
+  // some systems or suboptimal performance for other systems.
+  template <int dim, int fe_degree, typename number>
+  class LaplaceOperator : public Subscriptor
+  {
+  public:
+    LaplaceOperator ();
+
+    void clear();
+
+    void reinit (const DoFHandler<dim>  &dof_handler,
+                 const ConstraintMatrix  &constraints,
+                 const unsigned int      level = numbers::invalid_unsigned_int);
+
+    unsigned int m () const;
+    unsigned int n () const;
+
+    void vmult (Vector<double> &dst,
+                const Vector<double> &src) const;
+    void Tvmult (Vector<double> &dst,
+                 const Vector<double> &src) const;
+    void vmult_add (Vector<double> &dst,
+                    const Vector<double> &src) const;
+    void Tvmult_add (Vector<double> &dst,
+                     const Vector<double> &src) const;
+
+    number el (const unsigned int row,
+               const unsigned int col) const;
+    void set_diagonal (const Vector<number> &diagonal);
+
+    std::size_t memory_consumption () const;
+
+  private:
+    void local_apply (const MatrixFree<dim,number>    &data,
+                      Vector<double>                      &dst,
+                      const Vector<double>                &src,
+                      const std::pair<unsigned int,unsigned int> &cell_range) const;
+
+    void evaluate_coefficient(const Coefficient<dim> &function);
+
+    MatrixFree<dim,number>      data;
+    Table<2, VectorizedArray<number> > coefficient;
+
+    Vector<number>  diagonal_values;
+    bool            diagonal_is_available;
+  };
+
+
+
+  // This is the constructor of the @p LaplaceOperator class. All it does is
+  // to subscribe to the general deal.II @p Subscriptor scheme that makes sure
+  // that we do not delete an object of this class as long as it used
+  // somewhere else, e.g. in a preconditioner.
+  template <int dim, int fe_degree, typename number>
+  LaplaceOperator<dim,fe_degree,number>::LaplaceOperator ()
+    :
+    Subscriptor()
+  {}
+
+
+
+  // The next functions return the number of rows and columns of the global
+  // matrix (i.e. the dimensions of the operator this class represents, the
+  // point of this tutorial program was, after all, that we don't actually
+  // store the elements of the rows and columns of this operator). Since the
+  // matrix is square, the returned numbers are the same. We get the number
+  // from the vector partitioner stored in the data field (a partitioner
+  // distributes elements of a vector onto a number of different machines if
+  // programs are run in %parallel; since this program is written to run on
+  // only a single machine, the partitioner will simply say that all elements
+  // of the vector -- or, in the current case, all rows and columns of a
+  // matrix -- are stored on the current machine).
+  template <int dim, int fe_degree, typename number>
+  unsigned int
+  LaplaceOperator<dim,fe_degree,number>::m () const
+  {
+    return data.get_vector_partitioner()->size();
+  }
+
+
+
+  template <int dim, int fe_degree, typename number>
+  unsigned int
+  LaplaceOperator<dim,fe_degree,number>::n () const
+  {
+    return data.get_vector_partitioner()->size();
+  }
+
+
+
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::clear ()
+  {
+    data.clear();
+    diagonal_is_available = false;
+    diagonal_values.reinit(0);
+  }
+
+
+  // @sect4{Initialization}
+
+  // Once we have created the multigrid dof_handler and the constraints, we
+  // can call the reinit function for each level of the multigrid routine
+  // (and the active cells). The main purpose of the reinit function is to
+  // setup the <code> MatrixFree </code> instance for the problem. Also, the
+  // coefficient is evaluated. For this, we need to activate the update flag
+  // in the AdditionalData field of MatrixFree that enables the storage of
+  // quadrature point coordinates in real space (by default, it only caches
+  // data for gradients (inverse transposed Jacobians) and JxW values). Note
+  // that if we call the reinit function without specifying the level (i.e.,
+  // giving <code>level = numbers::invalid_unsigned_int</code>), we have told
+  // the class to loop over the active cells.
+  //
+  // We also set one option regarding task parallelism. We choose to use the
+  // @p partition_color strategy, which is based on subdivision of cells into
+  // partitions where cells in partition $k$ (or, more precisely, the degrees
+  // of freedom on these cells) only interact with cells in partitions $k-1$,
+  // $k$, and $k+1$. Within each partition, cells are colored in such a way
+  // that cells with the same color do not share degrees of freedom and can,
+  // therefore, be worked on at the same time without interference. This
+  // determines a task dependency graph that is scheduled by the Intel
+  // Threading Building Blocks library. Another option would be the strategy
+  // @p partition_partition, which performs better when the grid is more
+  // unstructured. We could also manually set the size of chunks that form one
+  // task in the scheduling process by setting @p tasks_block_size, but the
+  // default strategy to let the function decide works well already.
+  //
+  // To initialize the coefficient, we directly give it the Coefficient class
+  // defined above and then select the method
+  // <code>coefficient_function.value</code> with vectorized number (which the
+  // compiler can deduce from the point data type). The use of the
+  // FEEvaluation class (and its template arguments) will be explained below.
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::reinit (const DoFHandler<dim>  &dof_handler,
+                                                 const ConstraintMatrix  &constraints,
+                                                 const unsigned int      level)
+  {
+    typename MatrixFree<dim,number>::AdditionalData additional_data;
+    additional_data.tasks_parallel_scheme =
+      MatrixFree<dim,number>::AdditionalData::partition_color;
+    additional_data.level_mg_handler = level;
+    additional_data.mapping_update_flags = (update_gradients | update_JxW_values |
+                                            update_quadrature_points);
+    data.reinit (dof_handler, constraints, QGauss<1>(fe_degree+1),
+                 additional_data);
+    evaluate_coefficient(Coefficient<dim>());
+  }
+
+
+
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::
+  evaluate_coefficient (const Coefficient<dim> &coefficient_function)
+  {
+    const unsigned int n_cells = data.n_macro_cells();
+    FEEvaluation<dim,fe_degree,fe_degree+1,1,number> phi (data);
+    coefficient.reinit (n_cells, phi.n_q_points);
+    for (unsigned int cell=0; cell<n_cells; ++cell)
+      {
+        phi.reinit (cell);
+        for (unsigned int q=0; q<phi.n_q_points; ++q)
+          coefficient(cell,q) =
+            coefficient_function.value(phi.quadrature_point(q));
+      }
+  }
+
+
+
+  // @sect4{Local evaluation of Laplace operator}
+
+  // Here comes the main function of this class, the evaluation of the
+  // matrix-vector product (or, in general, a finite element operator
+  // evaluation). This is done in a function that takes exactly four
+  // arguments, the MatrixFree object, the destination and source vectors, and
+  // a range of cells that are to be worked on. The method
+  // <code>cell_loop</code> in the MatrixFree class will internally call this
+  // function with some range of cells that is obtained by checking which
+  // cells are possible to work on simultaneously so that write operations do
+  // not cause any race condition. Note that the total range of cells as
+  // visible in this class is usually not equal to the number of (active)
+  // cells in the triangulation.  In fact, "cell" may be the wrong term to
+  // begin with, since it is rather a collection of quadrature points from
+  // several cells, and the MatrixFree class groups the quadrature points of
+  // several cells into one block to enable a higher degree of vectorization.
+  // The number of such "cells" is stored in MatrixFree and can be queried
+  // through MatrixFree::n_macro_cells(). Compared to the
+  // deal.II cell iterators, in this class all cells are laid out in a plain
+  // array with no direct knowledge of level or neighborship relations, which
+  // makes it possible to index the cells by unsigned integers.
+  //
+  // The implementation of the Laplace operator is quite simple: First, we
+  // need to create an object FEEvaluation that contains the computational
+  // kernels and has data fields to store temporary results (e.g. gradients
+  // evaluated on all quadrature points on a collection of a few cells). Note
+  // that temporary results do not use a lot of memory, and since we specify
+  // template arguments with the element order, the data is stored on the
+  // stack (without expensive memory allocation). Usually, one only needs to
+  // set two template arguments, the dimension as first argument and the
+  // degree of the finite element as the second argument (this is equal to the
+  // number of degrees of freedom per dimension minus one for FE_Q
+  // elements). However, here we also want to be able to use float numbers for
+  // the multigrid preconditioner, which is the last (fifth) template
+  // argument. Therefore, we cannot rely on the default template arguments and
+  // must also fill the third and fourth field, consequently. The third
+  // argument specifies the number of quadrature points per direction and has
+  // a default value equal to the degree of the element plus one. The fourth
+  // argument sets the number of components (one can also evaluate
+  // vector-valued functions in systems of PDEs, but the default is a scalar
+  // element), and finally the last argument sets the number type.
+  //
+  // Next, we loop over the given cell range and then we continue with the
+  // actual implementation: <ol> <li>Tell the FEEvaluation object the (macro)
+  // cell we want to work on.  <li>Read in the values of the source vectors
+  // (@p read_dof_values), including the resolution of constraints. This
+  // stores $u_\mathrm{cell}$ as described in the introduction.  <li>Compute
+  // the unit-cell gradient (the evaluation of finite element
+  // functions). Since FEEvaluation can combine value computations with
+  // gradient computations, it uses a unified interface to all kinds of
+  // derivatives of order between zero and two. We only want gradients, no
+  // values and no second derivatives, so we set the function arguments to
+  // true in the gradient slot (second slot), and to false in the values slot
+  // (first slot) and Hessian slot (third slot). Note that the FEEvaluation
+  // class internally evaluates shape functions in an efficient way where one
+  // dimension is worked on at a time (using the tensor product form of shape
+  // functions and quadrature points as mentioned in the introduction). This
+  // gives complexity equal to $\mathcal O(d^2 (p+1)^{d+1})$ for polynomial
+  // degree $p$ in $d$ dimensions, compared to the naive approach with loops
+  // over all local degrees of freedom and quadrature points that is used in
+  // FEValues and costs $\mathcal O(d (p+1)^{2d})$.  <li>Next comes the
+  // application of the Jacobian transformation, the multiplication by the
+  // variable coefficient and the quadrature weight. FEEvaluation has an
+  // access function @p get_gradient that applies the Jacobian and returns the
+  // gradient in real space. Then, we just need to multiply by the (scalar)
+  // coefficient, and let the function @p submit_gradient apply the second
+  // Jacobian (for the test function) and the quadrature weight and Jacobian
+  // determinant (JxW). Note that the submitted gradient is stored in the same
+  // data field as where it is read from in @p get_gradient. Therefore, you
+  // need to make sure to not read from the same quadrature point again after
+  // having called @p submit_gradient on that particular quadrature point. In
+  // general, it is a good idea to copy the result of @p get_gradient when it
+  // is used more often than once.  <li>Next follows the summation over
+  // quadrature points for all test functions that corresponds to the actual
+  // integration step. For the Laplace operator, we just multiply by the
+  // gradient, so we call the integrate function with the respective argument
+  // set. If you have an equation where you test by both the values of the
+  // test functions and the gradients, both template arguments need to be set
+  // to true. Calling first the integrate function for values and then
+  // gradients in a separate call leads to wrong results, since the second
+  // call will internally overwrite the results from the first call. Note that
+  // there is no function argument for the second derivative for integrate
+  // step.  <li>Eventually, the local contributions in the vector
+  // $v_\mathrm{cell}$ as mentioned in the introduction need to be added into
+  // the result vector (and constraints are applied). This is done with a call
+  // to @p distribute_local_to_global, the same name as the corresponding
+  // function in the ConstraintMatrix (only that we now store the local vector
+  // in the FEEvaluation object, as are the indices between local and global
+  // degrees of freedom).  </ol>
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::
+  local_apply (const MatrixFree<dim,number>         &data,
+               Vector<double>                       &dst,
+               const Vector<double>                 &src,
+               const std::pair<unsigned int,unsigned int> &cell_range) const
+  {
+    FEEvaluation<dim,fe_degree,fe_degree+1,1,number> phi (data);
+
+    for (unsigned int cell=cell_range.first; cell<cell_range.second; ++cell)
+      {
+        phi.reinit (cell);
+        phi.read_dof_values(src);
+        phi.evaluate (false,true,false);
+        for (unsigned int q=0; q<phi.n_q_points; ++q)
+          phi.submit_gradient (coefficient(cell,q) *
+                               phi.get_gradient(q), q);
+        phi.integrate (false,true);
+        phi.distribute_local_to_global (dst);
+      }
+  }
+
+
+
+  // @sect4{vmult functions}
+
+  // Now to the @p vmult function that is called externally: In addition to
+  // what we do in a @p vmult_add function further down, we set the
+  // destination to zero first. The transposed matrix-vector is needed for
+  // well-defined multigrid preconditioner operations. Since we solve a
+  // Laplace problem, this is the same operation, and we just refer to the
+  // vmult operation.
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::vmult (Vector<double>       &dst,
+                                                const Vector<double> &src) const
+  {
+    dst = 0;
+    vmult_add (dst, src);
+  }
+
+
+
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::Tvmult (Vector<double>       &dst,
+                                                 const Vector<double> &src) const
+  {
+    dst = 0;
+    vmult_add (dst,src);
+  }
+
+
+
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::Tvmult_add (Vector<double>       &dst,
+                                                     const Vector<double> &src) const
+  {
+    vmult_add (dst,src);
+  }
+
+
+
+  // This function implements the loop over all cells. This is done with the
+  // @p cell_loop of the MatrixFree class, which takes the operator() of this
+  // class with arguments MatrixFree, OutVector, InVector, cell_range. Note
+  // that we could also use a simple function as local operation in case we
+  // had constant coefficients (all we need then is the MatrixFree, the
+  // vectors and the cell range), but since the coefficient is stored in a
+  // variable of this class, we cannot use that variant here. The cell loop is
+  // automatically performed on several threads if multithreading is enabled
+  // (this class uses a quite elaborate algorithm to work on cells that do not
+  // share any degrees of freedom that could possibly give rise to race
+  // conditions, using the dynamic task scheduler of the Intel Threading
+  // Building Blocks).
+  //
+  // After the cell loop, we need to touch the constrained degrees of freedom:
+  // Since the assembly loop automatically resolves constraints (just as the
+  // ConstraintMatrix::distribute_local_to_global call does), it does not
+  // compute any contribution for constrained degrees of freedom. In other
+  // words, the entries for constrained DoFs remain zero after the first part
+  // of this function, as if the matrix had empty rows and columns for
+  // constrained degrees of freedom. On the other hand, iterative solvers like
+  // CG only work for non-singular matrices, so we have to modify the
+  // operation on constrained DoFs. The easiest way to do that is to pretend
+  // that the sub-block of the matrix that corresponds to constrained DoFs is
+  // the identity matrix, in which case application of the matrix would simply
+  // copy the elements of the right hand side vector into the left hand
+  // side. In general, however, one needs to make sure that the diagonal
+  // entries of this sub-block are of the same order of magnitude as the
+  // diagonal elements of the rest of the matrix.  Here, the domain extent is
+  // of unit size, so we can simply choose unit size. If we had domains that
+  // are far away from unit size, we would need to choose a number that is
+  // close to the size of other diagonal matrix entries, so that these
+  // artificial eigenvalues do not change the eigenvalue spectrum (and make
+  // convergence with CG more difficult).
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::vmult_add (Vector<double>       &dst,
+                                                    const Vector<double> &src) const
+  {
+    data.cell_loop (&LaplaceOperator::local_apply, this, dst, src);
+
+    const std::vector<unsigned int> &
+    constrained_dofs = data.get_constrained_dofs();
+    for (unsigned int i=0; i<constrained_dofs.size(); ++i)
+      dst(constrained_dofs[i]) += src(constrained_dofs[i]);
+  }
+
+
+
+  // The next function is used to return entries of the matrix. Since this
+  // class is intended not to store the matrix entries, it would make no sense
+  // to provide access to all those elements. However, diagonal entries are
+  // explicitly needed for the implementation of the Chebyshev smoother that
+  // we intend to use in the multigrid preconditioner. This matrix is equipped
+  // with a vector that stores the diagonal.
+  template <int dim, int fe_degree, typename number>
+  number
+  LaplaceOperator<dim,fe_degree,number>::el (const unsigned int row,
+                                             const unsigned int col) const
+  {
+    Assert (row == col, ExcNotImplemented());
+    Assert (diagonal_is_available == true, ExcNotInitialized());
+    return diagonal_values(row);
+  }
+
+
+
+  // Regarding the calculation of the diagonal, we expect the user to provide
+  // a vector with the diagonal entries (and we will compute them in the code
+  // below). We only need it for the level matrices of multigrid, not the
+  // system matrix (since we only need these diagonals for the multigrid
+  // smoother). Since we fill only elements into unconstrained entries, we
+  // have to set constrained entries to one in order to avoid the same
+  // problems as discussed above.
+  template <int dim, int fe_degree, typename number>
+  void
+  LaplaceOperator<dim,fe_degree,number>::set_diagonal(const Vector<number> &diagonal)
+  {
+    AssertDimension (m(), diagonal.size());
+
+    diagonal_values = diagonal;
+
+    const std::vector<unsigned int> &
+    constrained_dofs = data.get_constrained_dofs();
+    for (unsigned int i=0; i<constrained_dofs.size(); ++i)
+      diagonal_values(constrained_dofs[i]) = 1.0;
+
+    diagonal_is_available = true;
+  }
+
+
+
+  // Eventually, we provide a function that calculates how much memory this
+  // class uses. We just need to sum up the memory consumption in the
+  // MatrixFree object and the memory for storing the other member
+  // variables. As a remark: In 3D and for Cartesian meshes, most memory is
+  // consumed for storing the vector indices on the local cells (corresponding
+  // to local_dof_indices). For general (non-Cartesian) meshes, the cached
+  // Jacobian transformation consumes most memory.
+  template <int dim, int fe_degree, typename number>
+  std::size_t
+  LaplaceOperator<dim,fe_degree,number>::memory_consumption () const
+  {
+    return (data.memory_consumption () +
+            coefficient.memory_consumption() +
+            diagonal_values.memory_consumption() +
+            MemoryConsumption::memory_consumption(diagonal_is_available));
+  }
+
+
+
+  // @sect3{LaplaceProblem class}
+
+  // This class is based on the one in step-16. However, we replaced the
+  // SparseMatrix<double> class by our matrix-free implementation, which means
+  // that we can also skip the sparsity patterns. Notice that we define the
+  // LaplaceOperator class with the degree of finite element as template
+  // argument (the value is defined at the top of the file), and that we use
+  // float numbers for the multigrid level matrices.
+  //
+  // The class also has a member variable to keep track of all the time we
+  // spend on setting up the entire chain of data before we actually go about
+  // solving the problem. In addition, there is an output stream (that is
+  // disabled by default) that can be used to output details for the
+  // individual setup operations instead of the summary only that is printed
+  // out by default.
+  template <int dim>
+  class LaplaceProblem
+  {
+  public:
+    LaplaceProblem ();
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    void assemble_multigrid ();
+    void solve ();
+    void output_results (const unsigned int cycle) const;
+
+    typedef LaplaceOperator<dim,degree_finite_element,double> SystemMatrixType;
+    typedef LaplaceOperator<dim,degree_finite_element,float>  LevelMatrixType;
+
+    Triangulation<dim>               triangulation;
+    FE_Q<dim>                        fe;
+    DoFHandler<dim>                  dof_handler;
+    ConstraintMatrix                 constraints;
+
+    SystemMatrixType                 system_matrix;
+    MGLevelObject<LevelMatrixType>   mg_matrices;
+    FullMatrix<float>                coarse_matrix;
+    MGLevelObject<ConstraintMatrix>  mg_constraints;
+
+    Vector<double>                   solution;
+    Vector<double>                   system_rhs;
+
+    double                           setup_time;
+    ConditionalOStream               time_details;
+  };
+
+
+
+  // When we initialize the finite element, we of course have to use the
+  // degree specified at the top of the file as well (otherwise, an exception
+  // will be thrown at some point, since the computational kernel defined in
+  // the templated LaplaceOperator class and the information from the finite
+  // element read out by MatrixFree will not match).
+  template <int dim>
+  LaplaceProblem<dim>::LaplaceProblem ()
+    :
+    fe (degree_finite_element),
+    dof_handler (triangulation),
+    time_details (std::cout, false)
+  {}
+
+
+
+  // @sect4{LaplaceProblem::setup_system}
+
+  // This is the function of step-16 with relevant changes due to the
+  // LaplaceOperator class. We do not use adaptive grids, so we do not have to
+  // compute edge matrices. Thus, all we do is to implement Dirichlet boundary
+  // conditions through the ConstraintMatrix, set up the (one-dimensional)
+  // quadrature that should be used by the matrix-free class, and call the
+  // initialization functions.
+  //
+  // In the process, we output data on both the run time of the program as
+  // well as on memory consumption, where we output memory data in megabytes
+  // (1 million bytes).
+  template <int dim>
+  void LaplaceProblem<dim>::setup_system ()
+  {
+    Timer time;
+    time.start ();
+    setup_time = 0;
+
+    system_matrix.clear();
+    mg_matrices.clear();
+    mg_constraints.clear();
+
+    dof_handler.distribute_dofs (fe);
+    dof_handler.distribute_mg_dofs (fe);
+
+    std::cout << "Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl;
+
+    constraints.clear();
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              ZeroFunction<dim>(),
+                                              constraints);
+    constraints.close();
+    setup_time += time.wall_time();
+    time_details << "Distribute DoFs & B.C.     (CPU/wall) "
+                 << time() << "s/" << time.wall_time() << "s" << std::endl;
+    time.restart();
+
+    system_matrix.reinit (dof_handler, constraints);
+    std::cout.precision(4);
+    std::cout << "System matrix memory consumption:     "
+              << system_matrix.memory_consumption()*1e-6
+              << " MB."
+              << std::endl;
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    setup_time += time.wall_time();
+    time_details << "Setup matrix-free system   (CPU/wall) "
+                 << time() << "s/" << time.wall_time() << "s" << std::endl;
+    time.restart();
+
+    // Next, initialize the matrices for the multigrid method on all the
+    // levels. The function MGTools::make_boundary_list returns for each
+    // multigrid level which degrees of freedom are located on a Dirichlet
+    // boundary; we force these DoFs to have value zero by adding to the
+    // ConstraintMatrix object a zero condition by using the command
+    // ConstraintMatrix::add_line. Once this is done, we close the
+    // ConstraintMatrix on each level so it can be used to read out indices
+    // internally in the MatrixFree.
+    const unsigned int nlevels = triangulation.n_levels();
+    mg_matrices.resize(0, nlevels-1);
+    mg_constraints.resize (0, nlevels-1);
+
+    typename FunctionMap<dim>::type dirichlet_boundary;
+    ZeroFunction<dim>               homogeneous_dirichlet_bc (1);
+    dirichlet_boundary[0] = &homogeneous_dirichlet_bc;
+    std::vector<std::set<types::global_dof_index> > boundary_indices(triangulation.n_levels());
+    MGTools::make_boundary_list (dof_handler,
+                                 dirichlet_boundary,
+                                 boundary_indices);
+    for (unsigned int level=0; level<nlevels; ++level)
+      {
+        std::set<types::global_dof_index>::iterator bc_it = boundary_indices[level].begin();
+        for ( ; bc_it != boundary_indices[level].end(); ++bc_it)
+          mg_constraints[level].add_line(*bc_it);
+
+        mg_constraints[level].close();
+        mg_matrices[level].reinit(dof_handler,
+                                  mg_constraints[level],
+                                  level);
+      }
+    coarse_matrix.reinit (dof_handler.n_dofs(0),
+                          dof_handler.n_dofs(0));
+    setup_time += time.wall_time();
+    time_details << "Setup matrix-free levels   (CPU/wall) "
+                 << time() << "s/" << time.wall_time() << "s" << std::endl;
+  }
+
+
+
+  // @sect4{LaplaceProblem::assemble_system}
+
+  // The assemble function is significantly reduced compared to step-16. All
+  // we need to do is to assemble the right hand side. That is the same as in
+  // many other tutorial programs. In the end, we condense the constraints
+  // from Dirichlet boundary conditions away from the right hand side.
+  template <int dim>
+  void LaplaceProblem<dim>::assemble_system ()
+  {
+    Timer time;
+    QGauss<dim>  quadrature_formula(fe.degree+1);
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values   | update_JxW_values);
+
+    const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int   n_q_points    = quadrature_formula.size();
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active(),
+                                                   endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        cell->get_dof_indices (local_dof_indices);
+        fe_values.reinit (cell);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            double rhs_val = 0;
+            for (unsigned int q=0; q<n_q_points; ++q)
+              rhs_val += (fe_values.shape_value(i,q) * 1.0 *
+                          fe_values.JxW(q));
+            system_rhs(local_dof_indices[i]) += rhs_val;
+          }
+      }
+    constraints.condense(system_rhs);
+    setup_time += time.wall_time();
+    time_details << "Assemble right hand side   (CPU/wall) "
+                 << time() << "s/" << time.wall_time() << "s" << std::endl;
+  }
+
+
+  // @sect4{LaplaceProblem::assemble_multigrid}
+
+  // Here is another assemble function. Again, it is simpler than assembling
+  // matrices. We need to compute the diagonal of the Laplace matrices on the
+  // individual levels, send the final matrices to the LaplaceOperator class,
+  // and we need to compute the full matrix on the coarsest level (since that
+  // is inverted exactly in the deal.II multigrid implementation).
+  template <int dim>
+  void LaplaceProblem<dim>::assemble_multigrid ()
+  {
+    Timer time;
+    coarse_matrix = 0;
+    QGauss<dim>  quadrature_formula(fe.degree+1);
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_gradients  | update_inverse_jacobians |
+                             update_quadrature_points | update_JxW_values);
+
+    const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int   n_q_points    = quadrature_formula.size();
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    const Coefficient<dim>    coefficient;
+    std::vector<double>       coefficient_values (n_q_points);
+    FullMatrix<float>         local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>            local_diagonal (dofs_per_cell);
+
+    const unsigned int n_levels = triangulation.n_levels();
+    std::vector<Vector<float> > diagonals (n_levels);
+    for (unsigned int level=0; level<n_levels; ++level)
+      diagonals[level].reinit (dof_handler.n_dofs(level));
+
+    std::vector<unsigned int> cell_no(triangulation.n_levels());
+    typename DoFHandler<dim>::cell_iterator cell = dof_handler.begin(),
+                                            endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        const unsigned int level = cell->level();
+        cell->get_mg_dof_indices (local_dof_indices);
+        fe_values.reinit (cell);
+        coefficient.value_list (fe_values.get_quadrature_points(),
+                                coefficient_values);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            double local_diag = 0;
+            for (unsigned int q=0; q<n_q_points; ++q)
+              local_diag += ((fe_values.shape_grad(i,q) *
+                              fe_values.shape_grad(i,q)) *
+                             coefficient_values[q] * fe_values.JxW(q));
+            local_diagonal(i) = local_diag;
+          }
+        mg_constraints[level].distribute_local_to_global(local_diagonal,
+                                                         local_dof_indices,
+                                                         diagonals[level]);
+
+        if (level == 0)
+          {
+            local_matrix = 0;
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                {
+                  double add_value = 0;
+                  for (unsigned int q=0; q<n_q_points; ++q)
+                    add_value += (fe_values.shape_grad(i,q) *
+                                  fe_values.shape_grad(j,q) *
+                                  coefficient_values[q] *
+                                  fe_values.JxW(q));
+                  local_matrix(i,j) = add_value;
+                }
+            mg_constraints[0].distribute_local_to_global (local_matrix,
+                                                          local_dof_indices,
+                                                          coarse_matrix);
+          }
+      }
+
+    for (unsigned int level=0; level<n_levels; ++level)
+      mg_matrices[level].set_diagonal (diagonals[level]);
+
+    setup_time += time.wall_time();
+    time_details << "Assemble MG diagonal       (CPU/wall) "
+                 << time() << "s/" << time.wall_time() << "s" << std::endl;
+  }
+
+
+
+  // @sect4{LaplaceProblem::solve}
+
+  // The solution process again looks like step-16. We now use a Chebyshev
+  // smoother instead of SOR (SOR would be very difficult to implement because
+  // we do not have the matrix elements available explicitly, and it is
+  // difficult to make it work efficiently in %parallel). The multigrid
+  // classes provide a simple interface for using the Chebyshev smoother which
+  // is defined in a preconditioner class: MGSmootherPrecondition.
+  template <int dim>
+  void LaplaceProblem<dim>::solve ()
+  {
+    Timer time;
+    MGTransferPrebuilt<Vector<double> > mg_transfer;
+    mg_transfer.build_matrices(dof_handler);
+    setup_time += time.wall_time();
+    time_details << "MG build transfer time     (CPU/wall) " << time()
+                 << "s/" << time.wall_time() << "s\n";
+    time.restart();
+
+    MGCoarseGridHouseholder<float, Vector<double> > mg_coarse;
+    mg_coarse.initialize(coarse_matrix);
+    setup_time += time.wall_time();
+    time_details << "MG coarse time             (CPU/wall) " << time()
+                 << "s/" << time.wall_time() << "s\n";
+    time.restart();
+
+    typedef PreconditionChebyshev<LevelMatrixType,Vector<double> > SMOOTHER;
+    MGSmootherPrecondition<LevelMatrixType, SMOOTHER, Vector<double> >
+    mg_smoother;
+
+    // Then, we initialize the smoother with our level matrices and the
+    // mandatory additional data for the Chebyshev smoother. We use quite a
+    // high degree here (6), since matrix-vector products are comparably cheap
+    // and more parallel than the level-transfer operations. We choose to
+    // smooth out a range of $[1.2 \hat{\lambda}_{\max}/10,1.2
+    // \hat{\lambda}_{\max}]$ in the smoother where $\hat{\lambda}_{\max}$ is
+    // an estimate of the largest eigenvalue. In order to compute that
+    // eigenvalue, the Chebyshev initializations performs a few steps of a CG
+    // algorithm without preconditioner. Since the highest eigenvalue is
+    // usually the easiest one to find and a rough estimate is enough, we
+    // choose 10 iterations.
+    typename SMOOTHER::AdditionalData smoother_data;
+    smoother_data.smoothing_range = 10.;
+    smoother_data.degree = 6;
+    smoother_data.eig_cg_n_iterations = 10;
+    mg_smoother.initialize(mg_matrices, smoother_data);
+
+    mg::Matrix<Vector<double> > mg_matrix(mg_matrices);
+
+    Multigrid<Vector<double> > mg(dof_handler,
+                                  mg_matrix,
+                                  mg_coarse,
+                                  mg_transfer,
+                                  mg_smoother,
+                                  mg_smoother);
+    PreconditionMG<dim, Vector<double>,
+                   MGTransferPrebuilt<Vector<double> > >
+                   preconditioner(dof_handler, mg, mg_transfer);
+
+    // Finally, write out the memory consumption of the Multigrid object (or
+    // rather, of its most significant components, since there is no built-in
+    // function for the total multigrid object), then create the solver object
+    // and solve the system. This is very easy, and we didn't even see any
+    // difference in the solve process compared to step-16. The magic is all
+    // hidden behind the implementation of the LaplaceOperator::vmult
+    // operation. Note that we print out the solve time and the accumulated
+    // setup time through standard out, i.e., in any case, whereas detailed
+    // times for the setup operations are only printed in case the flag for
+    // detail_times in the constructor is changed.
+    const std::size_t multigrid_memory
+      = (mg_matrices.memory_consumption() +
+         mg_transfer.memory_consumption() +
+         coarse_matrix.memory_consumption());
+    std::cout << "Multigrid objects memory consumption: "
+              << multigrid_memory * 1e-6
+              << " MB."
+              << std::endl;
+
+    SolverControl           solver_control (1000, 1e-12*system_rhs.l2_norm());
+    SolverCG<>              cg (solver_control);
+    setup_time += time.wall_time();
+    time_details << "MG build smoother time     (CPU/wall) " << time()
+                 << "s/" << time.wall_time() << "s\n";
+    std::cout << "Total setup time               (wall) " << setup_time
+              << "s\n";
+
+    time.reset();
+    time.start();
+    cg.solve (system_matrix, solution, system_rhs,
+              preconditioner);
+
+
+    std::cout << "Time solve ("
+              << solver_control.last_step()
+              << " iterations)  (CPU/wall) " << time() << "s/"
+              << time.wall_time() << "s\n";
+  }
+
+
+
+  // @sect4{LaplaceProblem::output_results}
+
+  // Here is the data output, which is a simplified version of step-5. We use
+  // the standard VTU (= compressed VTK) output for each grid produced in the
+  // refinement process.
+  template <int dim>
+  void LaplaceProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "solution");
+    data_out.build_patches ();
+
+    std::ostringstream filename;
+    filename << "solution-"
+             << cycle
+             << ".vtu";
+
+    std::ofstream output (filename.str().c_str());
+    data_out.write_vtu (output);
+  }
+
+
+
+  // @sect4{LaplaceProblem::run}
+
+  // The function that runs the program is very similar to the one in
+  // step-16. We make less refinement steps in 3D compared to 2D, but that's
+  // it.
+  template <int dim>
+  void LaplaceProblem<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<9-dim; ++cycle)
+      {
+        std::cout << "Cycle " << cycle << std::endl;
+
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_cube (triangulation, 0., 1.);
+            triangulation.refine_global (3-dim);
+          }
+        triangulation.refine_global (1);
+        setup_system ();
+        assemble_system ();
+        assemble_multigrid ();
+        solve ();
+        output_results (cycle);
+        std::cout << std::endl;
+      };
+  }
+}
+
+
+
+// @sect3{The <code>main</code> function}
+
+// This is as in most other programs.
+int main ()
+{
+  try
+    {
+      using namespace Step37;
+
+      LaplaceProblem<dimension> laplace_problem;
+      laplace_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-38/CMakeLists.txt b/examples/step-38/CMakeLists.txt
new file mode 100644
index 0000000..3195f54
--- /dev/null
+++ b/examples/step-38/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-38 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-38")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-38/doc/builds-on b/examples/step-38/doc/builds-on
new file mode 100644
index 0000000..8a0072d
--- /dev/null
+++ b/examples/step-38/doc/builds-on
@@ -0,0 +1 @@
+step-4 step-34
diff --git a/examples/step-38/doc/intro.dox b/examples/step-38/doc/intro.dox
new file mode 100644
index 0000000..8ff7796
--- /dev/null
+++ b/examples/step-38/doc/intro.dox
@@ -0,0 +1,233 @@
+<br>
+
+<i>This program was contributed by Andrea Bonito and M. Sebastian Pauletti,
+with editing and writing by Wolfgang Bangerth.
+<br>
+This material is based upon work supported by the National Science
+Foundation under Grant No. DMS-0914977. Any opinions, findings and conclusions
+or recommendations expressed in this material are those of the author(s) and
+do not necessarily reflect the views of the National Science Foundation
+(NSF).
+</i>
+
+<a name="Intro"></a>
+
+<h1>Introduction</h1>
+
+In this example, we show how to solve a partial differential equation (PDE)
+on a codimension one surface $\Gamma \subset \mathbb R^3$
+made of quadrilaterals, i.e. on a surface in 3d or a line in 2d.
+We focus on the following elliptic second order PDE
+ at f{align*}
+-\Delta_\Gamma u &= f \qquad \text{on } \qquad \Gamma,\\
+u  &= g \qquad \text{on} \qquad \partial \Gamma,
+ at f}
+which generalized the Laplace equation we have previously solved in several of
+the early tutorial programs. Our implementation is based on step-4. step-34
+also solves problems on lower dimensional surfaces; however, there we only
+consider integral equations that do not involve derivatives on the solution
+variable, while here we actually have to investigate what it means to take
+derivatives of a function only defined on a (possibly curved) surface.
+
+In order to define the above operator, we start by introducing some notations.
+Let $\mathbf x_S:\hat S \rightarrow S$ be a parametrization of
+a surface $S$ from a reference element $\hat S \subset \mathbb R^2$,
+i.e. each point $\hat{\mathbf x}\in\hat S$ induces a point ${\mathbf
+  x}_S(\hat{\mathbf x}) \in S$. Then let
+ at f[
+G_S:= (D \mathbf{x}_S)^T \ D \mathbf{x}_S
+ at f]
+denotes the corresponding first fundamental form, where $D
+\mathbf{x}_S=\left(\frac{\partial x_{S,i}(\hat{\mathbf x})}{\partial \hat x_j}\right)_{ij}$ is the 
+derivative (Jacobian) of the mapping.
+In the following, $S$ will be either the entire surface $\Gamma$ or,
+more convenient for the finite element method, any face $S \in
+{\mathbb T}$, where ${\mathbb T}$ is a partition (triangulation) of $\Gamma$
+constituted of quadrilaterals. 
+We are now in position to define the tangential gradient of a function $v : S \rightarrow \mathbb
+R$ by
+ at f[
+(\nabla_S v)\circ \mathbf x_S :=  D \mathbf x_S \ G_S^{-1} \ \nabla (v \circ \mathbf x_S).
+ at f]
+The surface Laplacian (also called the Laplace-Beltrami operator) is then
+defined as  $\Delta_S:= \nabla_S \cdot \nabla_S$.
+Note that an alternate way to compute the surface gradient on smooth surfaces $\Gamma$ is 
+ at f[
+\nabla_S v = \nabla \tilde v - \mathbf n (\mathbf n \cdot \nabla \tilde v),
+ at f]
+where $\tilde v$ is a "smooth" extension of $v$ in a tubular neighborhood of $\Gamma$ and
+$\mathbf n$ is the normal of $\Gamma$. 
+Since $\Delta_S = \nabla_S \cdot \nabla_S$, we deduce
+ at f[
+\Delta_S v = \Delta \tilde v - \mathbf n^T \ D^2 \tilde v \ \mathbf n - (\mathbf n \cdot \nabla \tilde v) (\nabla \cdot \mathbf n - \mathbf n^T \ D \mathbf n \ \mathbf n ).
+ at f]
+Worth mentioning, the term $\nabla \cdot \mathbf n - \mathbf n \ D \mathbf n \ \mathbf n$ appearing in the above expression is the total curvature of the surface (sum of principal curvatures).
+
+As usual, we are only interested in weak solutions for which we can use $C^0$
+finite elements (rather than requiring $C^1$ continuity as for strong
+solutions). We therefore resort to the weak formulation
+ at f[
+\int_\Gamma \nabla_\Gamma u \cdot
+\nabla_\Gamma v = \int_\Gamma f \ v  \qquad \forall v \in H^1_0(\Gamma)
+ at f]
+and take advantage of the partition ${\mathbb T}$ to further write
+ at f[
+\sum_{K\in  {\mathbb T}}\int_K \nabla_{K} u \cdot \nabla_{K} v = \sum_{K\in
+  {\mathbb T}} \int_K f \ v  \qquad \forall v \in H^1_0(\Gamma).
+ at f]
+Moreover, each integral in the above expression is computed in the reference
+element $\hat K:= [0,1]^2$ 
+so that
+ at f{align*}
+\int_{K} \nabla_{K} u \cdot \nabla_{K} v 
+&= 
+\int_{\hat K} \nabla (u \circ \mathbf x_K)^T G_K^{-1} (D \mathbf
+  x_K)^T D \mathbf x_K G_K^{-1} \nabla (v \circ \mathbf x_K) \sqrt{\det
+    (G_K)}
+\\
+&= 
+\int_{\hat K} \nabla (u \circ \mathbf x_K)^T G_K^{-1} \nabla (v \circ \mathbf x_K) \sqrt{\det
+    (G_K)}
+ at f}
+and
+ at f[
+\int_{K} f \ v = \int_{\hat K} (f \circ \mathbf x_K) (v \circ \mathbf
+x_K)  \sqrt{\det
+    (G_K)}.
+ at f]
+Finally, we use a quadrature formula defined by points $\{p_l\}_{l=1}^N\subset
+\hat K$ and weights $\{w_l\}_{l=1}^N \subset \mathbb R^+_*$ to
+evaluate the above integrals and
+obtain
+ at f[\int_{K} \nabla_{K} u \cdot \nabla_{K} v \approx \sum_{l=1}^N
+ (\nabla (u \circ \mathbf x_K)(p_l))^T G^{-1}(p_l)  \nabla (v \circ \mathbf x_K)
+(p_l) \sqrt{\det (G(p_l))} \ w_l
+ at f]
+and
+ at f[
+\int_{K} f \ v \approx \sum_{l=1}^N (f \circ \mathbf x_K)(p_l) \ (v \circ \mathbf x_K)(p_l) \sqrt{\det (G(p_l))} \ w_l.
+ at f]
+
+
+Fortunately, deal.II has already all the tools to compute the above
+expressions.
+In fact, they barely differ from the ways in which we solve the usual
+Laplacian, only requiring the surface coordinate mapping to be provided in the
+constructor of the FEValues class.
+This surface description given, in the codimension one surface case, the two
+routines FEValues::shape_grad and FEValues::JxW
+return
+ at f{align*}
+\text{FEValues::shape\_grad}(i,l)&=D \mathbf x_K(p_l) G^{-1}(p_l)D(\varphi_i \circ \mathbf x_K)
+  (p_l)
+\\
+\text{FEValues::JxW}(l) &=  \sqrt{\det (G(p_l))} \ w_l.
+ at f}
+This provides exactly the terms we need for our computations.
+
+On a more general note, details for the finite element approximation on
+surfaces can be found for instance in 
+[Dziuk, in Partial differential equations and calculus of
+variations 1357, Lecture Notes in Math., 1988],
+[Demlow, SIAM J. Numer. Anal.  47(2), 2009]
+and
+[Bonito, Nochetto, and Pauletti, SIAM J. Numer. Anal. 48(5), 2010].
+
+
+
+<h3>Testcase</h3>
+
+In general when you want to test numerically the accuracy and/or order of
+convergence of an algorithm you need to provide an exact solution. The usual
+trick is to pick a function that we want to be the solution, then apply the
+differential operator to it that defines a forcing term for the right hand
+side. This is what we do in this example. In the current case, the form of the
+domain is obviously also essential.
+
+We produce one test case for a 2d problem and another one for 3d:
+
+<ul>
+<li>
+  In 2d, let's choose as domain a half circle. On this domain, we choose the
+  function $u(\mathbf x)=-2x_1x_2$ as the solution. To compute the right hand
+  side, we have to compute the surface Laplacian of the
+  solution function. There are (at least) two ways to do that. The first one
+  is to project away the normal derivative as described above using the natural extension of $u(\mathbf x)$ (still denoted by $u$) over $\mathbb R^d$, i.e. to compute
+  @f[
+    -\Delta_\Gamma u =  \Delta u - \mathbf n^T \ D^2 u \ \mathbf n - (\mathbf n \cdot \nabla u)\ \kappa,
+  @f]	
+  where $\kappa$ is the total curvature of $\Gamma$.
+  Since we are on the unit circle, $\mathbf n=\mathbf x$ and $\kappa = 1$ so that
+  @f[
+    -\Delta_\Gamma u = -8 x_1x_2.
+  @f]	
+
+  A somewhat simpler way, at least for the current case of a curve in
+  two-dimensional space, is to note that we can map the interval $t \in
+  [0,\pi]$ onto the domain $\Omega$ using the transformation
+  $\mathbf x(t)= \left(\begin{array}{c} \cos t \\ \sin t \end{array}\right)$.
+  At position $\mathbf x=\mathbf x(t)$, the value of the solution is then
+  $u(\mathbf x(t)) = -2\cos t \sin t$.
+  Taking into account that the transformation is length preserving, i.e. a
+  segment of length $dt$ is mapped onto a piece of curve of exactly the same
+  length, the tangential Laplacian then satisfies
+  @f{align*}
+    \Delta_\Gamma u 
+    &= \frac{d^2}{dt^2}(-2\cos t \sin t)
+    = -2 \frac{d}{dt}(-\sin^2 t + \cos^2 t)
+    = -2 (-2 \sin t \cos t - 2 \cos t \sin t)
+    \\
+    &= 8 \sin t \cos t
+    \\
+    &= 8 x_1x_2,
+  @f}
+  which is of course the same result as we had above.
+</li>
+<li>
+  In 3d, the domain is again half of the surface of the unit ball, i.e. a half
+  sphere or dome. We choose $u(\mathbf x)=-2\sin(\pi x_1)\cos(\pi x_2)e^z$ as
+  the solution. We can compute the right hand side of the
+  equation, $f=-\Delta_\Gamma u$, in the same way as the method above (with $\kappa = 2$), yielding an
+  awkward and lengthy expression. You can find the full expression in the
+  source code.
+</li>
+</ul> 
+
+In the program, we will also compute the $H^1$ seminorm error of the
+solution. Since the solution function and its numerical approximation are only
+defined on the manifold, the obvious definition of this error functional is
+$| e |_{H^1(\Gamma)} 
+  = | \nabla_\Gamma e |_{L_2(\Gamma)} 
+  = \left( \int_\Gamma | \nabla_\Gamma (u-u_h) |^2 \right)^{1/2}$. This requires us to provide the
+<i>tangential</i> gradient $\nabla_\Gamma u$ to the function VectorTools::integrate_difference
+(first introduced in step-7), which we
+will do by implementing the function <code>Solution::gradient</code> in the
+program below.
+
+
+<h3>Implementation</h3>
+
+If you've read through step-4 and understand the discussion above of how
+solution and right hand side correspond to each other, you will be immediately
+familiar with this program as well. In fact, there are only two things that
+are of significance:
+
+- The way we generate the mesh that triangulates the computational domain.
+
+- The way we use Mapping objects to describe that the domain on which we solve
+  the partial differential equation is not planar but in fact curved.
+
+Mapping objects were already introduced in step-10 and step-11 and as
+explained there, there is usually not a whole lot you have to know about how
+they work as long as you have a working description of how the boundary
+looks. In essence, we will simply declare an appropriate object of type
+MappingQ that will automatically obtain the boundary description from the
+Triangulation. The mapping object will then be passed to the appropriate
+functions, and we will get a boundary description for half circles or half
+spheres that is predefined in the library.
+
+The rest of the program follows closely step-4 and, as far as computing the
+error, step-7. Some aspects of this program, in particular the use of two
+template arguments on the classes Triangulation, DoFHandler, and similar, are
+already described in detail in step-34; you may wish to read through this
+tutorial program as well.
diff --git a/examples/step-38/doc/kind b/examples/step-38/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-38/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-38/doc/results.dox b/examples/step-38/doc/results.dox
new file mode 100644
index 0000000..1ea337f
--- /dev/null
+++ b/examples/step-38/doc/results.dox
@@ -0,0 +1,161 @@
+<h1>Results</h1>
+
+When you run the program, the following output should be printed on screen:
+
+ at verbatim
+Surface mesh has 1280 cells.
+Surface mesh has 5185 degrees of freedom.
+H1 error = 0.0221245
+ at endverbatim
+
+
+By playing around with the number of global refinements in the
+<code>LaplaceBeltrami::make_grid_and_dofs</code> function you increase or decrease mesh
+refinement. For example, doing one more refinement and only running the 3d surface
+problem yields the following
+output:
+
+ at verbatim
+Surface mesh has 5120 cells.
+Surface mesh has 20609 degrees of freedom.
+H1 error = 0.00552639
+ at endverbatim
+
+This is what we expect: make the mesh size smaller by a factor of two and the
+error goes down by a factor of four (remember that we use bi-quadratic
+elements). The full sequence of errors from one to five refinements looks like
+this, neatly following the theoretically predicted pattern:
+ at verbatim
+0.360759
+0.0888008
+0.0221245
+0.00552639
+0.0013813
+ at endverbatim
+
+Finally, the program produces graphical output that we can visualize. Here is
+a plot of the results:
+
+<img src="http://www.dealii.org/images/steps/developer/step-38.solution-3d.png" alt="">
+
+The program also works for 1d curves in 2d, not just 2d surfaces in 3d. You
+can test this by changing the template argument in <code>main()</code> like
+so:
+ at code
+      LaplaceBeltramiProblem<2> laplace_beltrami;  
+ at endcode
+The domain is a curve in 2d, and we can visualize the solution by using the
+third dimension (and color) to denote the value of the function $u(x)$. This
+then looks like so (the white curve is the domain, the colored curve is the
+solution extruded into the third dimension, clearly showing the change in sign
+as the curve moves from one quadrant of the domain into the adjacent one):
+
+<img src="http://www.dealii.org/images/steps/developer/step-38.solution-2d.png" alt="">
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+Computing on surfaces only becomes interesting if the surface is more
+interesting than just a half sphere. To achieve this, deal.II can read
+meshes that describe surfaces through the usual GridIn class. Or, in case you
+have an analytic description, a simple mesh can sometimes be stretched and
+bent into a shape we are interested in.
+
+Let us consider a relatively simple example: we take the half sphere we used
+before, we stretch it by a factor of 10 in the z-direction, and then we jumble
+the x- and y-coordinates a bit. Let's show the computational domain and the
+solution first before we go into details of the implementation below:
+
+<img src="http://www.dealii.org/images/steps/developer/step-38.warp-1.png" alt="">
+
+<img src="http://www.dealii.org/images/steps/developer/step-38.warp-2.png" alt="">
+
+The way to produce such a mesh is by using the GridTools::transform
+function. It needs a way to transform each individual mesh point to a
+different position. Let us here use the following, rather simple function
+(remember: stretch in one direction, jumble in the other two):
+
+ at code
+template <int spacedim>
+Point<spacedim> warp (const Point<spacedim> &p)
+{
+  Point<spacedim> q = p;
+  q[spacedim-1] *= 10;
+
+  if (spacedim >= 2)
+    q[0] += 2*std::sin(q[spacedim-1]);
+  if (spacedim >= 3)
+    q[1] += 2*std::cos(q[spacedim-1]);
+
+  return q;
+}
+ at endcode
+
+If we followed the <code>LaplaceBeltrami::make_grid_and_dofs</code> function, we would
+extract the half spherical surface mesh as before, warp it into the shape we
+want, and refine as often as necessary. This is not quite as simple as we'd
+like here, though: refining requires that we have an appropriate manifold
+object attached to the triangulation that describes where new vertices of the
+mesh should be located upon refinement. I'm sure it's possible to describe
+this manifold in a not-too-complicated way by simply undoing the
+transformation above (yielding the spherical surface again), finding the
+location of a new point on the sphere, and then re-warping the result. But I'm
+a lazy person, and since doing this is not really the point here, let's just
+make our lives a bit easier: we'll extract the half sphere, refine it as
+often as necessary, get rid of the object that describes the manifold since we
+now no longer need it, and then finally warp the mesh. With the function
+above, this would look as follows:
+
+ at code
+template <int spacedim>
+void LaplaceBeltrami<spacedim>::make_grid_and_dofs ()
+{
+  static HyperBallBoundary<dim,spacedim> surface_description;
+  triangulation.set_boundary (0, surface_description);
+
+  {
+    HyperBallBoundary<spacedim> boundary_description;
+    Triangulation<spacedim> volume_mesh;
+    GridGenerator::half_hyper_ball(volume_mesh);
+
+    volume_mesh.set_boundary (0, boundary_description);
+    volume_mesh.refine_global (4);
+
+    std::set<unsigned char> boundary_ids;
+    boundary_ids.insert (0);
+
+    GridGenerator::extract_boundary_mesh (volume_mesh, triangulation,
+                                          boundary_ids);
+    triangulation.set_boundary (1);                            /* ** */
+    triangulation.set_boundary (0);                            /* ** */
+    GridTools::transform (&warp<spacedim>, triangulation);     /* ** */
+    std::ofstream x("x"), y("y");
+    GridOut().write_gnuplot (volume_mesh, x);
+    GridOut().write_gnuplot (triangulation, y);
+  }
+
+  std::cout << "Surface mesh has " << triangulation.n_active_cells()
+	    << " cells."
+	    << std::endl;
+
+  ...
+ at endcode
+
+Note that the only essential addition has been the three lines marked with
+asterisks. It is worth pointing out one other thing here, though: because we
+detach the manifold description from the surface mesh, whenever we use a
+mapping object in the rest of the program, it has no curves boundary
+description to go on any more. Rather, it will have to use the implicit,
+StraightBoundary class that is used on all parts of the boundary not
+explicitly assigned a different manifold object. Consequently, whether we use
+MappingQ(2), MappingQ(15) or MappingQ1, each cell of our mesh will be mapped
+using a bilinear approximation.
+
+All these drawbacks aside, the resulting pictures are still pretty. The only
+other differences to what's in step-38 is that we changed the right hand side
+to $f(\mathbf x)=\sin x_3$ and the boundary values (through the
+<code>Solution</code> class) to $u(\mathbf x)|_{\partial\Omega}=\cos x_3$. Of
+course, we now non longer know the exact solution, so the computation of the
+error at the end of <code>LaplaceBeltrami::run</code> will yield a meaningless
+number.
diff --git a/examples/step-38/doc/tooltip b/examples/step-38/doc/tooltip
new file mode 100644
index 0000000..550a7eb
--- /dev/null
+++ b/examples/step-38/doc/tooltip
@@ -0,0 +1 @@
+Solve the Laplace Beltrami operator on a Half Sphere.
diff --git a/examples/step-38/step-38.cc b/examples/step-38/step-38.cc
new file mode 100644
index 0000000..fe84666
--- /dev/null
+++ b/examples/step-38/step-38.cc
@@ -0,0 +1,590 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2010 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Andrea Bonito, Sebastian Pauletti.
+ */
+
+
+// @sect3{Include files}
+
+// If you've read through step-4 and step-7, you will recognize that we have
+// used all of the following include files there already. Consequently, we
+// will not explain their meaning here again.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+#include <fstream>
+#include <iostream>
+
+
+namespace Step38
+{
+  using namespace dealii;
+
+  // @sect3{The <code>LaplaceBeltramiProblem</code> class template}
+
+  // This class is almost exactly similar to the <code>LaplaceProblem</code>
+  // class in step-4.
+
+  // The essential differences are these:
+  //
+  // - The template parameter now denotes the dimensionality of the embedding
+  //   space, which is no longer the same as the dimensionality of the domain
+  //   and the triangulation on which we compute. We indicate this by calling
+  //   the parameter @p spacedim , and introducing a constant @p dim equal to
+  //   the dimensionality of the domain -- here equal to
+  //   <code>spacedim-1</code>.
+  // - All member variables that have geometric aspects now need to know about
+  //   both their own dimensionality as well as that of the embedding
+  //   space. Consequently, we need to specify both of their template
+  //   parameters one for the dimension of the mesh @p dim, and the other for
+  //   the dimension of the embedding space, @p spacedim. This is exactly what
+  //   we did in step-34, take a look there for a deeper explanation.
+  // - We need an object that describes which kind of mapping to use from the
+  //   reference cell to the cells that the triangulation is composed of. The
+  //   classes derived from the Mapping base class do exactly this. Throughout
+  //   most of deal.II, if you don't do anything at all, the library assumes
+  //   that you want an object of kind MappingQ1 that uses a (bi-, tri-)linear
+  //   mapping. In many cases, this is quite sufficient, which is why the use
+  //   of these objects is mostly optional: for example, if you have a
+  //   polygonal two-dimensional domain in two-dimensional space, a bilinear
+  //   mapping of the reference cell to the cells of the triangulation yields
+  //   an exact representation of the domain. If you have a curved domain, one
+  //   may want to use a higher order mapping for those cells that lie at the
+  //   boundary of the domain -- this is what we did in step-11, for
+  //   example. However, here we have a curved domain, not just a curved
+  //   boundary, and while we can approximate it with bilinearly mapped cells,
+  //   it is really only prudent to use a higher order mapping for all
+  //   cells. Consequently, this class has a member variable of type MappingQ;
+  //   we will choose the polynomial degree of the mapping equal to the
+  //   polynomial degree of the finite element used in the computations to
+  //   ensure optimal approximation, though this iso-parametricity is not
+  //   required.
+  template <int spacedim>
+  class LaplaceBeltramiProblem
+  {
+  public:
+    LaplaceBeltramiProblem (const unsigned degree = 2);
+    void run ();
+
+  private:
+    static const unsigned int dim = spacedim-1;
+
+    void make_grid_and_dofs ();
+    void assemble_system ();
+    void solve ();
+    void output_results () const;
+    void compute_error () const;
+
+
+    Triangulation<dim,spacedim>   triangulation;
+    FE_Q<dim,spacedim>            fe;
+    DoFHandler<dim,spacedim>      dof_handler;
+    MappingQ<dim, spacedim>       mapping;
+
+    SparsityPattern               sparsity_pattern;
+    SparseMatrix<double>          system_matrix;
+
+    Vector<double>                solution;
+    Vector<double>                system_rhs;
+  };
+
+
+  // @sect3{Equation data}
+
+  // Next, let us define the classes that describe the exact solution and the
+  // right hand sides of the problem. This is in analogy to step-4 and step-7
+  // where we also defined such objects. Given the discussion in the
+  // introduction, the actual formulas should be self-explanatory. A point of
+  // interest may be how we define the value and gradient functions for the 2d
+  // and 3d cases separately, using explicit specializations of the general
+  // template. An alternative to doing it this way might have been to define
+  // the general template and have a <code>switch</code> statement (or a
+  // sequence of <code>if</code>s) for each possible value of the spatial
+  // dimension.
+  template <int dim>
+  class Solution  : public Function<dim>
+  {
+  public:
+    Solution () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+  };
+
+
+  template <>
+  double
+  Solution<2>::value (const Point<2> &p,
+                      const unsigned int) const
+  {
+    return ( -2. * p(0) * p(1) );
+  }
+
+
+  template <>
+  Tensor<1,2>
+  Solution<2>::gradient (const Point<2>   &p,
+                         const unsigned int) const
+  {
+    Tensor<1,2> return_value;
+    return_value[0] = -2. * p(1) * (1 - 2. * p(0) * p(0));
+    return_value[1] = -2. * p(0) * (1 - 2. * p(1) * p(1));
+
+    return return_value;
+  }
+
+
+  template <>
+  double
+  Solution<3>::value (const Point<3> &p,
+                      const unsigned int) const
+  {
+    return (std::sin(numbers::PI * p(0)) *
+            std::cos(numbers::PI * p(1))*exp(p(2)));
+  }
+
+
+  template <>
+  Tensor<1,3>
+  Solution<3>::gradient (const Point<3>   &p,
+                         const unsigned int) const
+  {
+    using numbers::PI;
+
+    Tensor<1,3> return_value;
+
+    return_value[0] = PI *cos(PI * p(0))*cos(PI * p(1))*exp(p(2));
+    return_value[1] = -PI *sin(PI * p(0))*sin(PI * p(1))*exp(p(2));
+    return_value[2] = sin(PI * p(0))*cos(PI * p(1))*exp(p(2));
+
+    return return_value;
+  }
+
+
+
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+  template <>
+  double
+  RightHandSide<2>::value (const Point<2> &p,
+                           const unsigned int /*component*/) const
+  {
+    return ( -8. * p(0) * p(1) );
+  }
+
+
+  template <>
+  double
+  RightHandSide<3>::value (const Point<3> &p,
+                           const unsigned int /*component*/) const
+  {
+    using numbers::PI;
+
+    Tensor<2,3> hessian;
+
+    hessian[0][0] = -PI*PI*sin(PI*p(0))*cos(PI*p(1))*exp(p(2));
+    hessian[1][1] = -PI*PI*sin(PI*p(0))*cos(PI*p(1))*exp(p(2));
+    hessian[2][2] = sin(PI*p(0))*cos(PI*p(1))*exp(p(2));
+
+    hessian[0][1] = -PI*PI*cos(PI*p(0))*sin(PI*p(1))*exp(p(2));
+    hessian[1][0] = -PI*PI*cos(PI*p(0))*sin(PI*p(1))*exp(p(2));
+
+    hessian[0][2] = PI*cos(PI*p(0))*cos(PI*p(1))*exp(p(2));
+    hessian[2][0] = PI*cos(PI*p(0))*cos(PI*p(1))*exp(p(2));
+
+    hessian[1][2] = -PI*sin(PI*p(0))*sin(PI*p(1))*exp(p(2));
+    hessian[2][1] = -PI*sin(PI*p(0))*sin(PI*p(1))*exp(p(2));
+
+    Tensor<1,3> gradient;
+    gradient[0] = PI * cos(PI*p(0))*cos(PI*p(1))*exp(p(2));
+    gradient[1] = - PI * sin(PI*p(0))*sin(PI*p(1))*exp(p(2));
+    gradient[2] = sin(PI*p(0))*cos(PI*p(1))*exp(p(2));
+
+    Point<3> normal = p;
+    normal /= p.norm();
+
+    return (- trace(hessian)
+            + 2 * (gradient * normal)
+            + (hessian * normal) * normal);
+  }
+
+
+  // @sect3{Implementation of the <code>LaplaceBeltramiProblem</code> class}
+
+  // The rest of the program is actually quite unspectacular if you know
+  // step-4. Our first step is to define the constructor, setting the
+  // polynomial degree of the finite element and mapping, and associating the
+  // DoF handler to the triangulation:
+  template <int spacedim>
+  LaplaceBeltramiProblem<spacedim>::
+  LaplaceBeltramiProblem (const unsigned degree)
+    :
+    fe (degree),
+    dof_handler(triangulation),
+    mapping (degree)
+  {}
+
+
+  // @sect4{LaplaceBeltramiProblem::make_grid_and_dofs}
+
+  // The next step is to create the mesh, distribute degrees of freedom, and
+  // set up the various variables that describe the linear system. All of
+  // these steps are standard with the exception of how to create a mesh that
+  // describes a surface. We could generate a mesh for the domain we are
+  // interested in, generate a triangulation using a mesh generator, and read
+  // it in using the GridIn class. Or, as we do here, we generate the mesh
+  // using the facilities in the GridGenerator namespace.
+  //
+  // In particular, what we're going to do is this (enclosed between the set
+  // of braces below): we generate a <code>spacedim</code> dimensional mesh
+  // for the half disk (in 2d) or half ball (in 3d), using the
+  // GridGenerator::half_hyper_ball function. This function sets the boundary
+  // indicators of all faces on the outside of the boundary to zero for the
+  // ones located on the perimeter of the disk/ball, and one on the straight
+  // part that splits the full disk/ball into two halves. The next step is the
+  // main point: The GridGenerator::extract_boundary_mesh function creates a mesh
+  // that consists of those cells that are the faces of the previous mesh,
+  // i.e. it describes the <i>surface</i> cells of the original (volume)
+  // mesh. However, we do not want all faces: only those on the perimeter of
+  // the disk or ball which carry boundary indicator zero; we can select these
+  // cells using a set of boundary indicators that we pass to
+  // GridGenerator::extract_boundary_mesh.
+  //
+  // There is one point that needs to be mentioned. In order to refine a
+  // surface mesh appropriately if the manifold is curved (similarly to
+  // refining the faces of cells that are adjacent to a curved boundary), the
+  // triangulation has to have an object attached to it that describes where
+  // new vertices should be located. If you don't attach such a boundary
+  // object, they will be located halfway between existing vertices; this is
+  // appropriate if you have a domain with straight boundaries (e.g. a
+  // polygon) but not when, as here, the manifold has curvature. So for things
+  // to work properly, we need to attach a manifold object to our (surface)
+  // triangulation, in much the same way as we've already done in 1d for the
+  // boundary. We create such an object (with indefinite, <code>static</code>,
+  // lifetime) at the top of the function and attach it to the triangulation
+  // for all cells with boundary indicator zero that will be created
+  // henceforth.
+  //
+  // The final step in creating the mesh is to refine it a number of
+  // times. The rest of the function is the same as in previous tutorial
+  // programs.
+  template <int spacedim>
+  void LaplaceBeltramiProblem<spacedim>::make_grid_and_dofs ()
+  {
+    static SphericalManifold<dim,spacedim> surface_description;
+
+    {
+      Triangulation<spacedim> volume_mesh;
+      GridGenerator::half_hyper_ball(volume_mesh);
+
+      std::set<types::boundary_id> boundary_ids;
+      boundary_ids.insert (0);
+
+      GridGenerator::extract_boundary_mesh (volume_mesh, triangulation,
+                                            boundary_ids);
+    }
+    triangulation.set_all_manifold_ids(0);
+    triangulation.set_manifold (0, surface_description);
+
+    triangulation.refine_global(4);
+
+    std::cout << "Surface mesh has " << triangulation.n_active_cells()
+              << " cells."
+              << std::endl;
+
+    dof_handler.distribute_dofs (fe);
+
+    std::cout << "Surface mesh has " << dof_handler.n_dofs()
+              << " degrees of freedom."
+              << std::endl;
+
+    DynamicSparsityPattern dsp (dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+  }
+
+
+  // @sect4{LaplaceBeltramiProblem::assemble_system}
+
+  // The following is the central function of this program, assembling the
+  // matrix that corresponds to the surface Laplacian (Laplace-Beltrami
+  // operator). Maybe surprisingly, it actually looks exactly the same as for
+  // the regular Laplace operator discussed in, for example, step-4. The key
+  // is that the FEValues::shape_gradient function does the magic: It returns
+  // the surface gradient $\nabla_K \phi_i(x_q)$ of the $i$th shape function
+  // at the $q$th quadrature point. The rest then does not need any changes
+  // either:
+  template <int spacedim>
+  void LaplaceBeltramiProblem<spacedim>::assemble_system ()
+  {
+    system_matrix = 0;
+    system_rhs = 0;
+
+    const QGauss<dim>  quadrature_formula(2*fe.degree);
+    FEValues<dim,spacedim> fe_values (mapping, fe, quadrature_formula,
+                                      update_values              |
+                                      update_gradients           |
+                                      update_quadrature_points   |
+                                      update_JxW_values);
+
+    const unsigned int        dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int        n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>        cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>            cell_rhs (dofs_per_cell);
+
+    std::vector<double>       rhs_values(n_q_points);
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    const RightHandSide<spacedim> rhs;
+
+    for (typename DoFHandler<dim,spacedim>::active_cell_iterator
+         cell = dof_handler.begin_active(),
+         endc = dof_handler.end();
+         cell!=endc; ++cell)
+      {
+        cell_matrix = 0;
+        cell_rhs = 0;
+
+        fe_values.reinit (cell);
+
+        rhs.value_list (fe_values.get_quadrature_points(), rhs_values);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+              cell_matrix(i,j) += fe_values.shape_grad(i,q_point) *
+                                  fe_values.shape_grad(j,q_point) *
+                                  fe_values.JxW(q_point);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+            cell_rhs(i) += fe_values.shape_value(i,q_point) *
+                           rhs_values[q_point]*
+                           fe_values.JxW(q_point);
+
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              system_matrix.add (local_dof_indices[i],
+                                 local_dof_indices[j],
+                                 cell_matrix(i,j));
+
+            system_rhs(local_dof_indices[i]) += cell_rhs(i);
+          }
+      }
+
+    std::map<types::global_dof_index,double> boundary_values;
+    VectorTools::interpolate_boundary_values (mapping,
+                                              dof_handler,
+                                              0,
+                                              Solution<spacedim>(),
+                                              boundary_values);
+
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix,
+                                        solution,
+                                        system_rhs,false);
+  }
+
+
+
+  // @sect4{LaplaceBeltramiProblem::solve}
+
+  // The next function is the one that solves the linear system. Here, too, no
+  // changes are necessary:
+  template <int spacedim>
+  void LaplaceBeltramiProblem<spacedim>::solve ()
+  {
+    SolverControl solver_control (solution.size(),
+                                  1e-7 * system_rhs.l2_norm());
+    SolverCG<>    cg (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    cg.solve (system_matrix, solution, system_rhs,
+              preconditioner);
+  }
+
+
+
+  // @sect4{LaplaceBeltramiProblem::output_result}
+
+  // This is the function that generates graphical output from the
+  // solution. Most of it is boilerplate code, but there are two points worth
+  // pointing out:
+  //
+  // - The DataOut::add_data_vector function can take two kinds of vectors:
+  //   Either vectors that have one value per degree of freedom defined by the
+  //   DoFHandler object previously attached via DataOut::attach_dof_handler;
+  //   and vectors that have one value for each cell of the triangulation, for
+  //   example to output estimated errors for each cell. Typically, the
+  //   DataOut class knows to tell these two kinds of vectors apart: there are
+  //   almost always more degrees of freedom than cells, so we can
+  //   differentiate by the two kinds looking at the length of a vector. We
+  //   could do the same here, but only because we got lucky: we use a half
+  //   sphere. If we had used the whole sphere as domain and $Q_1$ elements,
+  //   we would have the same number of cells as vertices and consequently the
+  //   two kinds of vectors would have the same number of elements. To avoid
+  //   the resulting confusion, we have to tell the DataOut::add_data_vector
+  //   function which kind of vector we have: DoF data. This is what the third
+  //   argument to the function does.
+  // - The DataOut::build_patches function can generate output that subdivides
+  //   each cell so that visualization programs can resolve curved manifolds
+  //   or higher polynomial degree shape functions better. We here subdivide
+  //   each element in each coordinate direction as many times as the
+  //   polynomial degree of the finite element in use.
+  template <int spacedim>
+  void LaplaceBeltramiProblem<spacedim>::output_results () const
+  {
+    DataOut<dim,DoFHandler<dim,spacedim> > data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution,
+                              "solution",
+                              DataOut<dim,DoFHandler<dim,spacedim> >::type_dof_data);
+    data_out.build_patches (mapping,
+                            mapping.get_degree());
+
+    std::string filename ("solution-");
+    filename += static_cast<char>('0'+spacedim);
+    filename += "d.vtk";
+    std::ofstream output (filename.c_str());
+    data_out.write_vtk (output);
+  }
+
+
+
+  // @sect4{LaplaceBeltramiProblem::compute_error}
+
+  // This is the last piece of functionality: we want to compute the error in
+  // the numerical solution. It is a verbatim copy of the code previously
+  // shown and discussed in step-7. As mentioned in the introduction, the
+  // <code>Solution</code> class provides the (tangential) gradient of the
+  // solution. To avoid evaluating the error only a superconvergence points,
+  // we choose a quadrature rule of sufficiently high order.
+  template <int spacedim>
+  void LaplaceBeltramiProblem<spacedim>::compute_error () const
+  {
+    Vector<float> difference_per_cell (triangulation.n_active_cells());
+    VectorTools::integrate_difference (mapping, dof_handler, solution,
+                                       Solution<spacedim>(),
+                                       difference_per_cell,
+                                       QGauss<dim>(2*fe.degree+1),
+                                       VectorTools::H1_norm);
+
+    std::cout << "H1 error = "
+              << difference_per_cell.l2_norm()
+              << std::endl;
+  }
+
+
+
+  // @sect4{LaplaceBeltramiProblem::run}
+
+  // The last function provides the top-level logic. Its contents are
+  // self-explanatory:
+  template <int spacedim>
+  void LaplaceBeltramiProblem<spacedim>::run ()
+  {
+    make_grid_and_dofs();
+    assemble_system ();
+    solve ();
+    output_results ();
+    compute_error ();
+  }
+}
+
+
+// @sect3{The main() function}
+
+// The remainder of the program is taken up by the <code>main()</code>
+// function. It follows exactly the general layout first introduced in step-6
+// and used in all following tutorial programs:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step38;
+
+      LaplaceBeltramiProblem<3> laplace_beltrami;
+      laplace_beltrami.run();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-39/CMakeLists.txt b/examples/step-39/CMakeLists.txt
new file mode 100644
index 0000000..5fb6909
--- /dev/null
+++ b/examples/step-39/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-39 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-39")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-39/doc/builds-on b/examples/step-39/doc/builds-on
new file mode 100644
index 0000000..7167d6a
--- /dev/null
+++ b/examples/step-39/doc/builds-on
@@ -0,0 +1 @@
+step-6 step-12
diff --git a/examples/step-39/doc/intro.dox b/examples/step-39/doc/intro.dox
new file mode 100644
index 0000000..8b35bce
--- /dev/null
+++ b/examples/step-39/doc/intro.dox
@@ -0,0 +1,89 @@
+<a name="Intro"></a>
+
+In this program, we use the interior penalty method and Nitsche's weak
+boundary conditions to solve Poisson's equation. We use multigrid
+methods on locally refined meshes, which are generated using a bulk
+criterion and a standard error estimator based on cell and face
+residuals. All operators are implemented using the MeshWorker interface.
+
+Like in step-12, the discretization relies on finite element spaces,
+which are polynomial inside the mesh cells $K\in \mathbb T_h$, but
+have no continuity between cells. Since such functions have two values
+on each interior face $F\in \mathbb F_h^i$, one from each side, we
+define mean value and jump operators as follows: let
+<i>K</i><sub>1</sub> and <i>K</i><sub>2</sub> be the two cells sharing
+a face, and let the traces of functions <i>u<sub>i</sub></i> and the
+outer normal vectors <b>n</b><i><sub>i</sub></i> be labeled
+accordingly. Then, on the face, we let
+ at f[
+	\{\!\{ u \}\!\} = \frac{u_1 + u_2}2
+ at f]
+
+Note, that if such an expression contains a normal vector, the
+averaging operator turns into a jump. The interior penalty method for the problem
+ at f[
+  -\Delta u = f \text{ in }\Omega \qquad u = u^D \text{ on } \partial\Omega
+ at f]
+becomes
+ at f{multline*}
+  \sum_{K\in \mathbb T_h} (\nabla u, \nabla v)_K
+  \\
+  + \sum_{F \in F_h^i} \biggl\{4\sigma_F (\{\!\{ u \mathbf n\}\!\}, \{\!\{ v \mathbf n \}\!\})_F
+  - 2 (\{\!\{ \nabla u \}\!\},\{\!\{ v\mathbf n \}\!\})_F
+  - 2 (\{\!\{ \nabla v \}\!\},\{\!\{ u\mathbf n \}\!\})_F
+  \biggr\}
+  \\
+  + \sum_{F \in F_h^b} \biggl\{2\sigma_F (u, v)_F
+  - (\partial_n u,v)_F
+  - (\partial_n v,u)_F
+  \biggr\}
+  \\
+  = (f, v)_\Omega + \sum_{F \in F_h^b} \biggl\{
+  2\sigma_F (u^D, v)_F - (\partial_n v,u^D)_F
+  \biggr\}.
+ at f}
+
+Here, $\sigma_F$ is the penalty parameter, which is chosen as follows:
+for a face <i>F</i> of a cell <i>K</i>, compute the value
+ at f[
+\sigma_{F,K} = p(p+1) \frac{|F|_{d-1}}{|K|_d},
+ at f]
+where <i>p</i> is the polynomial degree of the finite element
+functions and $|\cdot|_d$ and $|\cdot|_{d-1}$ denote the $d$ and $d-1$
+dimensional Hausdorff measure of the corresponding
+object. If the face is at the boundary, choose $\sigma_F = \sigma_{F,K}$.
+For an interior face, we take the average of the two values at this face.
+
+In our finite element program, we distinguish three different
+integrals, corresponding to the sums over cells, interior faces and
+boundary faces above. Since the MeshWorker::loop organizes the sums
+for us, we only need to implement the integrals over each mesh
+element. The class MatrixIntegrator below has these three functions
+for the left hand side of the formula, the class RHSIntegrator for the
+right.
+
+As we will see below, even the error estimate is of the same
+structure, since it can be written as
+ at f{align*}
+  \eta^2 &= \eta_K^2 + \eta_F^2 + \eta_B^2
+  \\
+  \eta_K^2 &= \sum_{K\in \mathbb T_h} h^2 \|f + \Delta u_h\|^2
+  \\
+  \eta_F^2 &= \sum_{F \in F_h^i} \biggl\{
+    4 \sigma_F \| \{\!\{u_h\mathbf n\}\!\} \|^2 + h \|\{\!\{\partial_n u_h\}\!\}\|^2 \biggr\}
+  \\
+  \eta_B^2 &= \sum_{F \in F_h^b} 2\sigma_F \| u_h-u^D \|^2.
+ at f}
+
+Thus, the functions for assembling matrices, right hand side and error
+estimates below exhibit that these loops are all generic and can be
+programmed in the same way.
+
+This program is related to step-12, in that it uses MeshWorker and
+discontinuous Galerkin methods. While there, we solved an advection
+problem, here it is a diffusion problem. Here, we also use multigrid
+preconditioning and a theoretically justified error estimator, see
+Karakashian and Pascal (2003). The multilevel scheme was discussed in
+detail in Kanschat (2004). The adaptive iteration and its convergence
+have been discussed (for triangular meshes) in Hoppe, Kanschat, and
+Warburton (2009).
diff --git a/examples/step-39/doc/kind b/examples/step-39/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-39/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-39/doc/results.dox b/examples/step-39/doc/results.dox
new file mode 100644
index 0000000..1e7de4a
--- /dev/null
+++ b/examples/step-39/doc/results.dox
@@ -0,0 +1,93 @@
+<h1>Results</h1>
+
+<h2>Logfile output</h2>
+First, the program produces the usual logfile here stored in <tt>deallog</tt>. It reads (with omission of intermediate steps)
+
+ at code
+DEAL::Element: FE_DGQ<2>(3)
+DEAL::Step 0
+DEAL::Triangulation 4 cells, 1 levels
+DEAL::DoFHandler 64 dofs, level dofs 64
+DEAL::Assemble matrix
+DEAL::Assemble multilevel matrix
+DEAL::Assemble right hand side
+DEAL::Solve
+DEAL:cg::Starting value 27.1275
+DEAL:cg::Convergence step 1 value 1.97998e-14
+DEAL::Error    0.161172
+DEAL::Estimate 1.35839
+DEAL::Writing solution to <sol-00.gnuplot>...
+DEAL::
+DEAL::Step 1
+DEAL::Triangulation 10 cells, 2 levels
+DEAL::DoFHandler 160 dofs, level dofs 64 128
+DEAL::Assemble matrix
+DEAL::Assemble multilevel matrix
+DEAL::Assemble right hand side
+DEAL::Solve
+DEAL:cg::Starting value 35.5356
+DEAL:cg::Convergence step 14 value 3.21479e-13
+DEAL::Error    0.164760
+DEAL::Estimate 1.08528
+DEAL::Writing solution to <sol-01.gnuplot>...
+DEAL::
+DEAL::Step 2
+DEAL::Triangulation 16 cells, 2 levels
+DEAL::DoFHandler 256 dofs, level dofs 64 256
+DEAL::Assemble matrix
+DEAL::Assemble multilevel matrix
+DEAL::Assemble right hand side
+DEAL::Solve
+DEAL:cg::Starting value 37.0552
+DEAL:cg::Convergence step 14 value 6.05416e-13
+DEAL::Error    0.113503
+DEAL::Estimate 0.990460
+DEAL::Writing solution to <sol-02.gnuplot>...
+
+...
+
+DEAL::Step 10
+DEAL::Triangulation 124 cells, 9 levels
+DEAL::DoFHandler 1984 dofs, level dofs 64 256 512 512 256 256 256 256 256
+DEAL::Assemble matrix
+DEAL::Assemble multilevel matrix
+DEAL::Assemble right hand side
+DEAL::Solve
+DEAL:cg::Starting value 38.5798
+DEAL:cg::Convergence step 17 value 2.64999e-13
+DEAL::Error    0.0101278
+DEAL::Estimate 0.0957571
+DEAL::Writing solution to <sol-10.gnuplot>...
+DEAL::
+DEAL::Step 11
+DEAL::Triangulation 163 cells, 10 levels
+DEAL::DoFHandler 2608 dofs, level dofs 64 256 768 576 512 256 256 256 256 256
+DEAL::Assemble matrix
+DEAL::Assemble multilevel matrix
+DEAL::Assemble right hand side
+DEAL::Solve
+DEAL:cg::Starting value 44.1721
+DEAL:cg::Convergence step 17 value 3.18657e-13
+DEAL::Error    0.00716962
+DEAL::Estimate 0.0681646
+DEAL::Writing solution to <sol-11.gnuplot>...
+DEAL::
+ at endcode
+
+This log for instance shows that the number of conjugate gradient
+iteration steps is constant at approximately 17.
+
+<h2>Postprocessing of the logfile</h2>
+
+<img src="http://www.dealii.org/images/steps/developer/step-39-convergence.png" alt="">
+Using the perl script <tt>postprocess.pl</tt>, we extract relevant
+data into <tt>output.dat</tt>, which can be used to plot graphs with
+<tt>gnuplot</tt>. The graph above for instance was produced with
+
+ at code
+set style data linespoints
+set logscale
+set xrange [50:3000]
+plot "output.dat" using 2:3 title "error", "" using 2:4 title "estimate", \
+     "" using 2:(3000*$2**-1.5) title "3rd order"
+ at endcode
diff --git a/examples/step-39/doc/tooltip b/examples/step-39/doc/tooltip
new file mode 100644
index 0000000..09884e0
--- /dev/null
+++ b/examples/step-39/doc/tooltip
@@ -0,0 +1 @@
+Interior Penalty for the Laplace equation. Adaptive refinement. Multigrid.
diff --git a/examples/step-39/output.reference.dat b/examples/step-39/output.reference.dat
new file mode 100644
index 0000000..35de96d
--- /dev/null
+++ b/examples/step-39/output.reference.dat
@@ -0,0 +1,13 @@
+#step dofs error estimate l2error iterations efficiency order l2order
+0  	256    	2.974190e-01	9.904600e-01	4.524470e-03	14	0.300284	0.000000	0.000000
+1  	400    	2.585590e-01	7.386240e-01	2.885100e-03	16	0.350055	0.627480	2.016374
+2  	544    	1.892340e-01	6.575070e-01	1.479540e-03	16	0.287805	2.030277	4.343815
+3  	688    	1.503620e-01	4.547120e-01	9.005740e-04	16	0.330675	1.958261	4.228028
+4  	832    	1.053170e-01	3.548790e-01	4.750150e-04	17	0.296769	3.747254	6.731991
+5  	1024   	7.459510e-02	2.539050e-01	2.362060e-04	17	0.293791	3.322106	6.729380
+6  	1216   	5.291920e-02	1.836100e-01	1.224410e-04	17	0.288215	3.995439	7.647065
+7  	1504   	3.756820e-02	1.335280e-01	6.804430e-05	17	0.281351	3.223616	5.527534
+8  	1984   	2.657860e-02	9.576030e-02	3.998000e-05	17	0.277553	2.498686	3.839747
+9  	2608   	1.880990e-02	6.816520e-02	1.951950e-05	17	0.275946	2.528429	5.243492
+10 	3472   	1.328020e-02	4.780300e-02	1.006070e-05	17	0.277811	2.433078	4.632422
+11 	4672   	9.367220e-03	3.336360e-02	5.766390e-06	17	0.280762	2.351695	3.749897
diff --git a/examples/step-39/postprocess.pl b/examples/step-39/postprocess.pl
new file mode 100644
index 0000000..90e2190
--- /dev/null
+++ b/examples/step-39/postprocess.pl
@@ -0,0 +1,41 @@
+######################################################################
+######################################################################
+# Postprocess logstream output and create a data file for gnuplot
+######################################################################
+
+use strict;
+
+my $step;     # The iteration step in the adaptive loop
+my @dofs;     # The number of degrees of freedom in each step
+my @error;    # The energy error of the solution
+my @l2error;  # The L2-error of the solution
+my @estimate; # The a posteriori error estimate
+my @steps;    # The number of multigrid iteration steps
+
+my $energy_order = 0.;
+my $l2_order = 0.;
+
+while(<>)
+{
+    $step = $1 if m/DEAL::Step\s*(\d+)/;
+    $dofs[$step] = $1 if m/DEAL::DoFHandler\s*(\d+)/;
+    $error[$step] = $1 if m/DEAL::energy-error:\s*(\S+)/;
+    $l2error[$step] = $1 if m/DEAL::L2-error:\s*(\S+)/;
+    $estimate[$step] = $1 if m/DEAL::Estimate\s*(\S+)/;
+    $steps[$step] = $1 if m/DEAL:\w+::Convergence step\s*(\S+)/;
+}
+
+print '#step dofs error estimate l2error iterations efficiency order l2order', "\n";
+
+for (my $i=0;$i<=$step;++$i)
+{
+    if ($i>0)
+    {
+	my $hlog = -1/2.* log($dofs[$i-1]/$dofs[$i]);
+	$energy_order = log($error[$i-1]/$error[$i]) / $hlog;
+	$l2_order = log($l2error[$i-1]/$l2error[$i]) / $hlog;
+    }
+    my $eff = $error[$i]/$estimate[$i];
+    printf "%-3d\t%-7d\t%e\t%e\t%e\t%d\t%f\t%f\t%f\n", $i, $dofs[$i], $error[$i], $estimate[$i], $l2error[$i],
+    $steps[$i], $eff, $energy_order, $l2_order;
+}
diff --git a/examples/step-39/step-39.cc b/examples/step-39/step-39.cc
new file mode 100644
index 0000000..b880cf5
--- /dev/null
+++ b/examples/step-39/step-39.cc
@@ -0,0 +1,970 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2010 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Guido Kanschat, Texas A&M University, 2009
+ */
+
+
+// The include files for the linear algebra: A regular SparseMatrix, which in
+// turn will include the necessary files for SparsityPattern and Vector
+// classes.
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/precondition_block.h>
+#include <deal.II/lac/block_vector.h>
+
+// Include files for setting up the mesh
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+
+// Include files for FiniteElement classes and DoFHandler.
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_dgp.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/dofs/dof_tools.h>
+
+// The include files for using the MeshWorker framework
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/integration_info.h>
+#include <deal.II/meshworker/assembler.h>
+#include <deal.II/meshworker/loop.h>
+
+// The include file for local integrators associated with the Laplacian
+#include <deal.II/integrators/laplace.h>
+
+// Support for multigrid methods
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/multigrid.h>
+#include <deal.II/multigrid/mg_matrix.h>
+#include <deal.II/multigrid/mg_transfer.h>
+#include <deal.II/multigrid/mg_coarse.h>
+#include <deal.II/multigrid/mg_smoother.h>
+
+// Finally, we take our exact solution from the library as well as quadrature
+// and additional tools.
+#include <deal.II/base/function_lib.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <iostream>
+#include <fstream>
+
+// All classes of the deal.II library are in the namespace dealii. In order to
+// save typing, we tell the compiler to search names in there as well.
+namespace Step39
+{
+  using namespace dealii;
+
+  // This is the function we use to set the boundary values and also the exact
+  // solution we compare to.
+  Functions::SlitSingularityFunction<2> exact_solution;
+
+  // @sect3{The local integrators}
+
+  // MeshWorker separates local integration from the loops over cells and
+  // faces. Thus, we have to write local integration classes for generating
+  // matrices, the right hand side and the error estimator.
+
+  // All these classes have the same three functions for integrating over
+  // cells, boundary faces and interior faces, respectively. All the
+  // information needed for the local integration is provided by
+  // MeshWorker::IntegrationInfo<dim>. Note that the signature of the
+  // functions cannot be changed, because it is expected by
+  // MeshWorker::integration_loop().
+
+  // The first class defining local integrators is responsible for computing
+  // cell and face matrices. It is used to assemble the global matrix as well
+  // as the level matrices.
+  template <int dim>
+  class MatrixIntegrator : public MeshWorker::LocalIntegrator<dim>
+  {
+  public:
+    void cell(MeshWorker::DoFInfo<dim> &dinfo,
+              typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void boundary(MeshWorker::DoFInfo<dim> &dinfo,
+                  typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void face(MeshWorker::DoFInfo<dim> &dinfo1,
+              MeshWorker::DoFInfo<dim> &dinfo2,
+              typename MeshWorker::IntegrationInfo<dim> &info1,
+              typename MeshWorker::IntegrationInfo<dim> &info2) const;
+  };
+
+
+  // On each cell, we integrate the Dirichlet form. We use the library of
+  // ready made integrals in LocalIntegrators to avoid writing these loops
+  // ourselves. Similarly, we implement Nitsche boundary conditions and the
+  // interior penalty fluxes between cells.
+  //
+  // The boundary and flux terms need a penalty parameter, which should be
+  // adjusted to the cell size and the polynomial degree. A safe choice of
+  // this parameter for constant coefficients can be found in
+  // LocalIntegrators::Laplace::compute_penalty() and we use this below.
+  template <int dim>
+  void MatrixIntegrator<dim>::cell(
+    MeshWorker::DoFInfo<dim> &dinfo,
+    typename MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    LocalIntegrators::Laplace::cell_matrix(dinfo.matrix(0,false).matrix, info.fe_values());
+  }
+
+
+  template <int dim>
+  void MatrixIntegrator<dim>::boundary(
+    MeshWorker::DoFInfo<dim> &dinfo,
+    typename MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    const unsigned int deg = info.fe_values(0).get_fe().tensor_degree();
+    LocalIntegrators::Laplace::nitsche_matrix(
+      dinfo.matrix(0,false).matrix, info.fe_values(0),
+      LocalIntegrators::Laplace::compute_penalty(dinfo, dinfo, deg, deg));
+  }
+
+  // Interior faces use the interior penalty method
+  template <int dim>
+  void MatrixIntegrator<dim>::face(
+    MeshWorker::DoFInfo<dim> &dinfo1,
+    MeshWorker::DoFInfo<dim> &dinfo2,
+    typename MeshWorker::IntegrationInfo<dim> &info1,
+    typename MeshWorker::IntegrationInfo<dim> &info2) const
+  {
+    const unsigned int deg = info1.fe_values(0).get_fe().tensor_degree();
+    LocalIntegrators::Laplace::ip_matrix(
+      dinfo1.matrix(0,false).matrix, dinfo1.matrix(0,true).matrix,
+      dinfo2.matrix(0,true).matrix, dinfo2.matrix(0,false).matrix,
+      info1.fe_values(0), info2.fe_values(0),
+      LocalIntegrators::Laplace::compute_penalty(dinfo1, dinfo2, deg, deg));
+  }
+
+  // The second local integrator builds the right hand side. In our example,
+  // the right hand side function is zero, such that only the boundary
+  // condition is set here in weak form.
+  template <int dim>
+  class RHSIntegrator : public MeshWorker::LocalIntegrator<dim>
+  {
+  public:
+    void cell(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void boundary(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void face(MeshWorker::DoFInfo<dim> &dinfo1,
+              MeshWorker::DoFInfo<dim> &dinfo2,
+              typename MeshWorker::IntegrationInfo<dim> &info1,
+              typename MeshWorker::IntegrationInfo<dim> &info2) const;
+  };
+
+
+  template <int dim>
+  void RHSIntegrator<dim>::cell(MeshWorker::DoFInfo<dim> &, typename MeshWorker::IntegrationInfo<dim> &) const
+  {}
+
+
+  template <int dim>
+  void RHSIntegrator<dim>::boundary(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    const FEValuesBase<dim> &fe = info.fe_values();
+    Vector<double> &local_vector = dinfo.vector(0).block(0);
+
+    std::vector<double> boundary_values(fe.n_quadrature_points);
+    exact_solution.value_list(fe.get_quadrature_points(), boundary_values);
+
+    const unsigned int deg = fe.get_fe().tensor_degree();
+    const double penalty = 2. * deg * (deg+1) * dinfo.face->measure() / dinfo.cell->measure();
+
+    for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        local_vector(i) += (- fe.shape_value(i,k) * penalty * boundary_values[k]
+                            + (fe.normal_vector(k) * fe.shape_grad(i,k)) * boundary_values[k])
+                           * fe.JxW(k);
+  }
+
+
+  template <int dim>
+  void RHSIntegrator<dim>::face(MeshWorker::DoFInfo<dim> &,
+                                MeshWorker::DoFInfo<dim> &,
+                                typename MeshWorker::IntegrationInfo<dim> &,
+                                typename MeshWorker::IntegrationInfo<dim> &) const
+  {}
+
+
+  // The third local integrator is responsible for the contributions to the
+  // error estimate. This is the standard energy estimator due to Karakashian
+  // and Pascal (2003).
+  template <int dim>
+  class Estimator : public MeshWorker::LocalIntegrator<dim>
+  {
+  public:
+    void cell(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void boundary(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void face(MeshWorker::DoFInfo<dim> &dinfo1,
+              MeshWorker::DoFInfo<dim> &dinfo2,
+              typename MeshWorker::IntegrationInfo<dim> &info1,
+              typename MeshWorker::IntegrationInfo<dim> &info2) const;
+  };
+
+
+  // The cell contribution is the Laplacian of the discrete solution, since
+  // the right hand side is zero.
+  template <int dim>
+  void Estimator<dim>::cell(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    const FEValuesBase<dim> &fe = info.fe_values();
+
+    const std::vector<Tensor<2,dim> > &DDuh = info.hessians[0][0];
+    for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+      {
+        const double t = dinfo.cell->diameter() * trace(DDuh[k]);
+        dinfo.value(0) +=  t*t * fe.JxW(k);
+      }
+    dinfo.value(0) = std::sqrt(dinfo.value(0));
+  }
+
+  // At the boundary, we use simply a weighted form of the boundary residual,
+  // namely the norm of the difference between the finite element solution and
+  // the correct boundary condition.
+  template <int dim>
+  void Estimator<dim>::boundary(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    const FEValuesBase<dim> &fe = info.fe_values();
+
+    std::vector<double> boundary_values(fe.n_quadrature_points);
+    exact_solution.value_list(fe.get_quadrature_points(), boundary_values);
+
+    const std::vector<double> &uh = info.values[0][0];
+
+    const unsigned int deg = fe.get_fe().tensor_degree();
+    const double penalty = 2. * deg * (deg+1) * dinfo.face->measure() / dinfo.cell->measure();
+
+    for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+      dinfo.value(0) += penalty * (boundary_values[k] - uh[k]) * (boundary_values[k] - uh[k])
+                        * fe.JxW(k);
+    dinfo.value(0) = std::sqrt(dinfo.value(0));
+  }
+
+
+  // Finally, on interior faces, the estimator consists of the jumps of the
+  // solution and its normal derivative, weighted appropriately.
+  template <int dim>
+  void Estimator<dim>::face(MeshWorker::DoFInfo<dim> &dinfo1,
+                            MeshWorker::DoFInfo<dim> &dinfo2,
+                            typename MeshWorker::IntegrationInfo<dim> &info1,
+                            typename MeshWorker::IntegrationInfo<dim> &info2) const
+  {
+    const FEValuesBase<dim> &fe = info1.fe_values();
+    const std::vector<double> &uh1 = info1.values[0][0];
+    const std::vector<double> &uh2 = info2.values[0][0];
+    const std::vector<Tensor<1,dim> > &Duh1 = info1.gradients[0][0];
+    const std::vector<Tensor<1,dim> > &Duh2 = info2.gradients[0][0];
+
+    const unsigned int deg = fe.get_fe().tensor_degree();
+    const double penalty1 = deg * (deg+1) * dinfo1.face->measure() / dinfo1.cell->measure();
+    const double penalty2 = deg * (deg+1) * dinfo2.face->measure() / dinfo2.cell->measure();
+    const double penalty = penalty1 + penalty2;
+    const double h = dinfo1.face->measure();
+
+    for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+      {
+        double diff1 = uh1[k] - uh2[k];
+        double diff2 = fe.normal_vector(k) * Duh1[k] - fe.normal_vector(k) * Duh2[k];
+        dinfo1.value(0) += (penalty * diff1*diff1 + h * diff2*diff2)
+                           * fe.JxW(k);
+      }
+    dinfo1.value(0) = std::sqrt(dinfo1.value(0));
+    dinfo2.value(0) = dinfo1.value(0);
+  }
+
+  // Finally we have an integrator for the error. Since the energy norm for
+  // discontinuous Galerkin problems not only involves the difference of the
+  // gradient inside the cells, but also the jump terms across faces and at
+  // the boundary, we cannot just use VectorTools::integrate_difference().
+  // Instead, we use the MeshWorker interface to compute the error ourselves.
+
+  // There are several different ways to define this energy norm, but all of
+  // them are equivalent to each other uniformly with mesh size (some not
+  // uniformly with polynomial degree). Here, we choose @f[ \|u\|_{1,h} =
+  // \sum_{K\in \mathbb T_h} \|\nabla u\|_K^2 + \sum_{F \in F_h^i}
+  // 4\sigma_F\|\{\!\{ u \mathbf n\}\!\}\|^2_F + \sum_{F \in F_h^b}
+  // 2\sigma_F\|u\|^2_F @f]
+
+  template <int dim>
+  class ErrorIntegrator : public MeshWorker::LocalIntegrator<dim>
+  {
+  public:
+    void cell(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void boundary(MeshWorker::DoFInfo<dim> &dinfo, typename MeshWorker::IntegrationInfo<dim> &info) const;
+    void face(MeshWorker::DoFInfo<dim> &dinfo1,
+              MeshWorker::DoFInfo<dim> &dinfo2,
+              typename MeshWorker::IntegrationInfo<dim> &info1,
+              typename MeshWorker::IntegrationInfo<dim> &info2) const;
+  };
+
+  // Here we have the integration on cells. There is currently no good
+  // interface in MeshWorker that would allow us to access values of regular
+  // functions in the quadrature points. Thus, we have to create the vectors
+  // for the exact function's values and gradients inside the cell
+  // integrator. After that, everything is as before and we just add up the
+  // squares of the differences.
+
+  // Additionally to computing the error in the energy norm, we use the
+  // capability of the mesh worker to compute two functionals at the same time
+  // and compute the <i>L<sup>2</sup></i>-error in the same loop. Obviously,
+  // this one does not have any jump terms and only appears in the integration
+  // on cells.
+  template <int dim>
+  void ErrorIntegrator<dim>::cell(
+    MeshWorker::DoFInfo<dim> &dinfo,
+    typename MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    const FEValuesBase<dim> &fe = info.fe_values();
+    std::vector<Tensor<1,dim> > exact_gradients(fe.n_quadrature_points);
+    std::vector<double> exact_values(fe.n_quadrature_points);
+
+    exact_solution.gradient_list(fe.get_quadrature_points(), exact_gradients);
+    exact_solution.value_list(fe.get_quadrature_points(), exact_values);
+
+    const std::vector<Tensor<1,dim> > &Duh = info.gradients[0][0];
+    const std::vector<double> &uh = info.values[0][0];
+
+    for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+      {
+        double sum = 0;
+        for (unsigned int d=0; d<dim; ++d)
+          {
+            const double diff = exact_gradients[k][d] - Duh[k][d];
+            sum += diff*diff;
+          }
+        const double diff = exact_values[k] - uh[k];
+        dinfo.value(0) +=  sum * fe.JxW(k);
+        dinfo.value(1) +=  diff*diff * fe.JxW(k);
+      }
+    dinfo.value(0) = std::sqrt(dinfo.value(0));
+    dinfo.value(1) = std::sqrt(dinfo.value(1));
+  }
+
+
+  template <int dim>
+  void ErrorIntegrator<dim>::boundary(
+    MeshWorker::DoFInfo<dim> &dinfo,
+    typename MeshWorker::IntegrationInfo<dim> &info) const
+  {
+    const FEValuesBase<dim> &fe = info.fe_values();
+
+    std::vector<double> exact_values(fe.n_quadrature_points);
+    exact_solution.value_list(fe.get_quadrature_points(), exact_values);
+
+    const std::vector<double> &uh = info.values[0][0];
+
+    const unsigned int deg = fe.get_fe().tensor_degree();
+    const double penalty = 2. * deg * (deg+1) * dinfo.face->measure() / dinfo.cell->measure();
+
+    for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+      {
+        const double diff = exact_values[k] - uh[k];
+        dinfo.value(0) += penalty * diff * diff * fe.JxW(k);
+      }
+    dinfo.value(0) = std::sqrt(dinfo.value(0));
+  }
+
+
+  template <int dim>
+  void ErrorIntegrator<dim>::face(
+    MeshWorker::DoFInfo<dim> &dinfo1,
+    MeshWorker::DoFInfo<dim> &dinfo2,
+    typename MeshWorker::IntegrationInfo<dim> &info1,
+    typename MeshWorker::IntegrationInfo<dim> &info2) const
+  {
+    const FEValuesBase<dim> &fe = info1.fe_values();
+    const std::vector<double> &uh1 = info1.values[0][0];
+    const std::vector<double> &uh2 = info2.values[0][0];
+
+    const unsigned int deg = fe.get_fe().tensor_degree();
+    const double penalty1 = deg * (deg+1) * dinfo1.face->measure() / dinfo1.cell->measure();
+    const double penalty2 = deg * (deg+1) * dinfo2.face->measure() / dinfo2.cell->measure();
+    const double penalty = penalty1 + penalty2;
+
+    for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+      {
+        double diff = uh1[k] - uh2[k];
+        dinfo1.value(0) += (penalty * diff*diff)
+                           * fe.JxW(k);
+      }
+    dinfo1.value(0) = std::sqrt(dinfo1.value(0));
+    dinfo2.value(0) = dinfo1.value(0);
+  }
+
+
+
+  // @sect3{The main class}
+
+  // This class does the main job, like in previous examples. For a
+  // description of the functions declared here, please refer to the
+  // implementation below.
+  template <int dim>
+  class InteriorPenaltyProblem
+  {
+  public:
+    typedef MeshWorker::IntegrationInfo<dim> CellInfo;
+
+    InteriorPenaltyProblem(const FiniteElement<dim> &fe);
+
+    void run(unsigned int n_steps);
+
+  private:
+    void setup_system ();
+    void assemble_matrix ();
+    void assemble_mg_matrix ();
+    void assemble_right_hand_side ();
+    void error ();
+    double estimate ();
+    void solve ();
+    void output_results (const unsigned int cycle) const;
+
+    // The member objects related to the discretization are here.
+    Triangulation<dim>        triangulation;
+    const MappingQ1<dim>      mapping;
+    const FiniteElement<dim> &fe;
+    DoFHandler<dim>           dof_handler;
+
+    // Then, we have the matrices and vectors related to the global discrete
+    // system.
+    SparsityPattern      sparsity;
+    SparseMatrix<double> matrix;
+    Vector<double>       solution;
+    Vector<double>       right_hand_side;
+    BlockVector<double>  estimates;
+
+    // Finally, we have a group of sparsity patterns and sparse matrices
+    // related to the multilevel preconditioner.  First, we have a level
+    // matrix and its sparsity pattern.
+    MGLevelObject<SparsityPattern> mg_sparsity;
+    MGLevelObject<SparseMatrix<double> > mg_matrix;
+
+    // When we perform multigrid with local smoothing on locally refined
+    // meshes, additional matrices are required; see Kanschat (2004). Here is
+    // the sparsity pattern for these edge matrices. We only need one, because
+    // the pattern of the up matrix is the transpose of that of the down
+    // matrix. Actually, we do not care too much about these details, since
+    // the MeshWorker is filling these matrices.
+    MGLevelObject<SparsityPattern> mg_sparsity_dg_interface;
+    // The flux matrix at the refinement edge, coupling fine level degrees of
+    // freedom to coarse level.
+    MGLevelObject<SparseMatrix<double> > mg_matrix_dg_down;
+    // The transpose of the flux matrix at the refinement edge, coupling
+    // coarse level degrees of freedom to fine level.
+    MGLevelObject<SparseMatrix<double> > mg_matrix_dg_up;
+  };
+
+
+  // The constructor simply sets up the coarse grid and the DoFHandler. The
+  // FiniteElement is provided as a parameter to allow flexibility.
+  template <int dim>
+  InteriorPenaltyProblem<dim>::InteriorPenaltyProblem(const FiniteElement<dim> &fe)
+    :
+    mapping(),
+    fe(fe),
+    dof_handler(triangulation),
+    estimates(1)
+  {
+    GridGenerator::hyper_cube_slit(triangulation, -1, 1);
+  }
+
+
+  // In this function, we set up the dimension of the linear system and the
+  // sparsity patterns for the global matrix as well as the level matrices.
+  template <int dim>
+  void
+  InteriorPenaltyProblem<dim>::setup_system()
+  {
+    // First, we use the finite element to distribute degrees of freedom over
+    // the mesh and number them.
+    dof_handler.distribute_dofs(fe);
+    dof_handler.distribute_mg_dofs(fe);
+    unsigned int n_dofs = dof_handler.n_dofs();
+    // Then, we already know the size of the vectors representing finite
+    // element functions.
+    solution.reinit(n_dofs);
+    right_hand_side.reinit(n_dofs);
+
+    // Next, we set up the sparsity pattern for the global matrix. Since we do
+    // not know the row sizes in advance, we first fill a temporary
+    // DynamicSparsityPattern object and copy it to the regular
+    // SparsityPattern once it is complete.
+    DynamicSparsityPattern dsp(n_dofs);
+    DoFTools::make_flux_sparsity_pattern(dof_handler, dsp);
+    sparsity.copy_from(dsp);
+    matrix.reinit(sparsity);
+
+    const unsigned int n_levels = triangulation.n_levels();
+    // The global system is set up, now we attend to the level matrices. We
+    // resize all matrix objects to hold one matrix per level.
+    mg_matrix.resize(0, n_levels-1);
+    mg_matrix.clear();
+    mg_matrix_dg_up.resize(0, n_levels-1);
+    mg_matrix_dg_up.clear();
+    mg_matrix_dg_down.resize(0, n_levels-1);
+    mg_matrix_dg_down.clear();
+    // It is important to update the sparsity patterns after <tt>clear()</tt>
+    // was called for the level matrices, since the matrices lock the sparsity
+    // pattern through the SmartPointer and Subscriptor mechanism.
+    mg_sparsity.resize(0, n_levels-1);
+    mg_sparsity_dg_interface.resize(0, n_levels-1);
+
+    // Now all objects are prepared to hold one sparsity pattern or matrix per
+    // level. What's left is setting up the sparsity patterns on each level.
+    for (unsigned int level=mg_sparsity.min_level();
+         level<=mg_sparsity.max_level(); ++level)
+      {
+        // These are roughly the same lines as above for the global matrix,
+        // now for each level.
+        DynamicSparsityPattern dsp(dof_handler.n_dofs(level));
+        MGTools::make_flux_sparsity_pattern(dof_handler, dsp, level);
+        mg_sparsity[level].copy_from(dsp);
+        mg_matrix[level].reinit(mg_sparsity[level]);
+
+        // Additionally, we need to initialize the transfer matrices at the
+        // refinement edge between levels. They are stored at the index
+        // referring to the finer of the two indices, thus there is no such
+        // object on level 0.
+        if (level>0)
+          {
+            DynamicSparsityPattern dsp;
+            dsp.reinit(dof_handler.n_dofs(level-1), dof_handler.n_dofs(level));
+            MGTools::make_flux_sparsity_pattern_edge(dof_handler, dsp, level);
+            mg_sparsity_dg_interface[level].copy_from(dsp);
+            mg_matrix_dg_up[level].reinit(mg_sparsity_dg_interface[level]);
+            mg_matrix_dg_down[level].reinit(mg_sparsity_dg_interface[level]);
+          }
+      }
+  }
+
+
+  // In this function, we assemble the global system matrix, where by global
+  // we indicate that this is the matrix of the discrete system we solve and
+  // it is covering the whole mesh.
+  template <int dim>
+  void
+  InteriorPenaltyProblem<dim>::assemble_matrix()
+  {
+    // First, we need t set up the object providing the values we
+    // integrate. This object contains all FEValues and FEFaceValues objects
+    // needed and also maintains them automatically such that they always
+    // point to the current cell. To this end, we need to tell it first, where
+    // and what to compute. Since we are not doing anything fancy, we can rely
+    // on their standard choice for quadrature rules.
+    //
+    // Since their default update flags are minimal, we add what we need
+    // additionally, namely the values and gradients of shape functions on all
+    // objects (cells, boundary and interior faces). Afterwards, we are ready
+    // to initialize the container, which will create all necessary
+    // FEValuesBase objects for integration.
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+    UpdateFlags update_flags = update_values | update_gradients;
+    info_box.add_update_flags_all(update_flags);
+    info_box.initialize(fe, mapping);
+
+    // This is the object into which we integrate local data. It is filled by
+    // the local integration routines in MatrixIntegrator and then used by the
+    // assembler to distribute the information into the global matrix.
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    // Furthermore, we need an object that assembles the local matrix into the
+    // global matrix. These assembler objects have all the knowledge
+    // of the structures of the target object, in this case a
+    // SparseMatrix, possible constraints and the mesh structure.
+    MeshWorker::Assembler::MatrixSimple<SparseMatrix<double> > assembler;
+    assembler.initialize(matrix);
+
+    // Now comes the part we coded ourselves, the local
+    // integrator. This is the only part which is problem dependent.
+    MatrixIntegrator<dim> integrator;
+    // Now, we throw everything into a MeshWorker::loop(), which here
+    // traverses all active cells of the mesh, computes cell and face matrices
+    // and assembles them into the global matrix. We use the variable
+    // <tt>dof_handler</tt> here in order to use the global numbering of
+    // degrees of freedom.
+    MeshWorker::integration_loop<dim, dim>(
+      dof_handler.begin_active(), dof_handler.end(),
+      dof_info, info_box,
+      integrator, assembler);
+  }
+
+
+  // Now, we do the same for the level matrices. Not too surprisingly, this
+  // function looks like a twin of the previous one. Indeed, there are only
+  // two minor differences.
+  template <int dim>
+  void
+  InteriorPenaltyProblem<dim>::assemble_mg_matrix()
+  {
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+    UpdateFlags update_flags = update_values | update_gradients;
+    info_box.add_update_flags_all(update_flags);
+    info_box.initialize(fe, mapping);
+
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    // Obviously, the assembler needs to be replaced by one filling level
+    // matrices. Note that it automatically fills the edge matrices as well.
+    MeshWorker::Assembler::MGMatrixSimple<SparseMatrix<double> > assembler;
+    assembler.initialize(mg_matrix);
+    assembler.initialize_fluxes(mg_matrix_dg_up, mg_matrix_dg_down);
+
+    MatrixIntegrator<dim> integrator;
+    // Here is the other difference to the previous function: we run
+    // over all cells, not only the active ones. And we use functions
+    // ending on <code>_mg</code> since we need the degrees of freedom
+    // on each level, not the global numbering.
+    MeshWorker::integration_loop<dim, dim> (
+      dof_handler.begin_mg(), dof_handler.end_mg(),
+      dof_info, info_box,
+      integrator, assembler);
+  }
+
+
+  // Here we have another clone of the assemble function. The difference to
+  // assembling the system matrix consists in that we assemble a vector here.
+  template <int dim>
+  void
+  InteriorPenaltyProblem<dim>::assemble_right_hand_side()
+  {
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+    UpdateFlags update_flags = update_quadrature_points | update_values | update_gradients;
+    info_box.add_update_flags_all(update_flags);
+    info_box.initialize(fe, mapping);
+
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    // Since this assembler allows us to fill several vectors, the interface is
+    // a little more complicated as above. The pointers to the vectors have to
+    // be stored in a AnyData object. While this seems to cause two extra
+    // lines of code here, it actually comes handy in more complex
+    // applications.
+    MeshWorker::Assembler::ResidualSimple<Vector<double> > assembler;
+    AnyData data;
+    data.add<Vector<double>*>(&right_hand_side, "RHS");
+    assembler.initialize(data);
+
+    RHSIntegrator<dim> integrator;
+    MeshWorker::integration_loop<dim, dim>(
+      dof_handler.begin_active(), dof_handler.end(),
+      dof_info, info_box,
+      integrator, assembler);
+
+    right_hand_side *= -1.;
+  }
+
+
+  // Now that we have coded all functions building the discrete linear system,
+  // it is about time that we actually solve it.
+  template <int dim>
+  void
+  InteriorPenaltyProblem<dim>::solve()
+  {
+    // The solver of choice is conjugate gradient.
+    SolverControl control(1000, 1.e-12);
+    SolverCG<Vector<double> > solver(control);
+
+    // Now we are setting up the components of the multilevel
+    // preconditioner. First, we need transfer between grid levels. The object
+    // we are using here generates sparse matrices for these transfers.
+    MGTransferPrebuilt<Vector<double> > mg_transfer;
+    mg_transfer.build_matrices(dof_handler);
+
+    // Then, we need an exact solver for the matrix on the coarsest level.
+    FullMatrix<double> coarse_matrix;
+    coarse_matrix.copy_from (mg_matrix[0]);
+    MGCoarseGridHouseholder<double, Vector<double> > mg_coarse;
+    mg_coarse.initialize(coarse_matrix);
+
+    // While transfer and coarse grid solver are pretty much generic, more
+    // flexibility is offered for the smoother. First, we choose Gauss-Seidel
+    // as our smoothing method.
+    GrowingVectorMemory<Vector<double> > mem;
+    typedef PreconditionSOR<SparseMatrix<double> > RELAXATION;
+    mg::SmootherRelaxation<RELAXATION, Vector<double> >
+    mg_smoother;
+    RELAXATION::AdditionalData smoother_data(1.);
+    mg_smoother.initialize(mg_matrix, smoother_data);
+
+    // Do two smoothing steps on each level.
+    mg_smoother.set_steps(2);
+    // Since the SOR method is not symmetric, but we use conjugate gradient
+    // iteration below, here is a trick to make the multilevel preconditioner
+    // a symmetric operator even for nonsymmetric smoothers.
+    mg_smoother.set_symmetric(true);
+    // The smoother class optionally implements the variable V-cycle, which we
+    // do not want here.
+    mg_smoother.set_variable(false);
+
+    // Finally, we must wrap our matrices in an object having the required
+    // multiplication functions.
+    mg::Matrix<Vector<double> > mgmatrix(mg_matrix);
+    mg::Matrix<Vector<double> > mgdown(mg_matrix_dg_down);
+    mg::Matrix<Vector<double> > mgup(mg_matrix_dg_up);
+
+    // Now, we are ready to set up the V-cycle operator and the multilevel
+    // preconditioner.
+    Multigrid<Vector<double> > mg(dof_handler, mgmatrix,
+                                  mg_coarse, mg_transfer,
+                                  mg_smoother, mg_smoother);
+    // Let us not forget the edge matrices needed because of the adaptive
+    // refinement.
+    mg.set_edge_flux_matrices(mgdown, mgup);
+
+    // After all preparations, wrap the Multigrid object into another object,
+    // which can be used as a regular preconditioner,
+    PreconditionMG<dim, Vector<double>,
+                   MGTransferPrebuilt<Vector<double> > >
+                   preconditioner(dof_handler, mg, mg_transfer);
+    // and use it to solve the system.
+    solver.solve(matrix, solution, right_hand_side, preconditioner);
+  }
+
+
+  // Another clone of the assemble function. The big difference to the
+  // previous ones is here that we also have an input vector.
+  template <int dim>
+  double
+  InteriorPenaltyProblem<dim>::estimate()
+  {
+    // The results of the estimator are stored in a vector with one entry per
+    // cell. Since cells in deal.II are not numbered, we have to create our
+    // own numbering in order to use this vector.
+    //
+    // On the other hand, somebody might have used the user indices
+    // already. So, let's be good citizens and save them before tampering with
+    // them.
+    std::vector<unsigned int> old_user_indices;
+    triangulation.save_user_indices(old_user_indices);
+
+    estimates.block(0).reinit(triangulation.n_active_cells());
+    unsigned int i=0;
+    for (typename Triangulation<dim>::active_cell_iterator cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell,++i)
+      cell->set_user_index(i);
+
+    // This starts like before,
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+    const unsigned int n_gauss_points = dof_handler.get_fe().tensor_degree()+1;
+    info_box.initialize_gauss_quadrature(n_gauss_points, n_gauss_points+1, n_gauss_points);
+
+    // but now we need to notify the info box of the finite element function we
+    // want to evaluate in the quadrature points. First, we create an AnyData
+    // object with this vector, which is the solution we just computed.
+    AnyData solution_data;
+    solution_data.add<const Vector<double>*>(&solution, "solution");
+
+    // Then, we tell the Meshworker::VectorSelector for cells, that we need
+    // the second derivatives of this solution (to compute the
+    // Laplacian). Therefore, the Boolean arguments selecting function values
+    // and first derivatives a false, only the last one selecting second
+    // derivatives is true.
+    info_box.cell_selector.add("solution", false, false, true);
+    // On interior and boundary faces, we need the function values and the
+    // first derivatives, but not second derivatives.
+    info_box.boundary_selector.add("solution", true, true, false);
+    info_box.face_selector.add("solution", true, true, false);
+
+    // And we continue as before, with the exception that the default update
+    // flags are already adjusted to the values and derivatives we requested
+    // above.
+    info_box.add_update_flags_boundary(update_quadrature_points);
+    info_box.initialize(fe, mapping, solution_data, solution);
+
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    // The assembler stores one number per cell, but else this is the same as
+    // in the computation of the right hand side.
+    MeshWorker::Assembler::CellsAndFaces<double> assembler;
+    AnyData out_data;
+    out_data.add<BlockVector<double>*>(&estimates, "cells");
+    assembler.initialize(out_data, false);
+
+    Estimator<dim> integrator;
+    MeshWorker::integration_loop<dim, dim> (
+      dof_handler.begin_active(), dof_handler.end(),
+      dof_info, info_box,
+      integrator, assembler);
+
+    // Right before we return the result of the error estimate, we restore the
+    // old user indices.
+    triangulation.load_user_indices(old_user_indices);
+    return estimates.block(0).l2_norm();
+  }
+
+  // Here we compare our finite element solution with the (known) exact
+  // solution and compute the mean quadratic error of the gradient and the
+  // function itself. This function is a clone of the estimation function
+  // right above.
+
+  // Since we compute the error in the energy and the
+  // <i>L<sup>2</sup></i>-norm, respectively, our block vector needs two
+  // blocks here.
+  template <int dim>
+  void
+  InteriorPenaltyProblem<dim>::error()
+  {
+    BlockVector<double> errors(2);
+    errors.block(0).reinit(triangulation.n_active_cells());
+    errors.block(1).reinit(triangulation.n_active_cells());
+    unsigned int i=0;
+    for (typename Triangulation<dim>::active_cell_iterator cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell,++i)
+      cell->set_user_index(i);
+
+    MeshWorker::IntegrationInfoBox<dim> info_box;
+    const unsigned int n_gauss_points = dof_handler.get_fe().tensor_degree()+1;
+    info_box.initialize_gauss_quadrature(n_gauss_points, n_gauss_points+1, n_gauss_points);
+
+    AnyData solution_data;
+    solution_data.add<Vector<double>*>(&solution, "solution");
+
+    info_box.cell_selector.add("solution", true, true, false);
+    info_box.boundary_selector.add("solution", true, false, false);
+    info_box.face_selector.add("solution", true, false, false);
+
+    info_box.add_update_flags_cell(update_quadrature_points);
+    info_box.add_update_flags_boundary(update_quadrature_points);
+    info_box.initialize(fe, mapping, solution_data, solution);
+
+    MeshWorker::DoFInfo<dim> dof_info(dof_handler);
+
+    MeshWorker::Assembler::CellsAndFaces<double> assembler;
+    AnyData out_data;
+    out_data.add<BlockVector<double>* >(&errors, "cells");
+    assembler.initialize(out_data, false);
+
+    ErrorIntegrator<dim> integrator;
+    MeshWorker::integration_loop<dim, dim> (
+      dof_handler.begin_active(), dof_handler.end(),
+      dof_info, info_box,
+      integrator, assembler);
+
+    deallog << "energy-error: " << errors.block(0).l2_norm() << std::endl;
+    deallog << "L2-error:     " << errors.block(1).l2_norm() << std::endl;
+  }
+
+
+  // Some graphical output
+  template <int dim>
+  void InteriorPenaltyProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    // Output of the solution in gnuplot format.
+    char *fn = new char[100];
+    sprintf(fn, "sol-%02d", cycle);
+
+    std::string filename(fn);
+    filename += ".gnuplot";
+    deallog << "Writing solution to <" << filename << ">..."
+            << std::endl << std::endl;
+    std::ofstream gnuplot_output (filename.c_str());
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "u");
+    data_out.add_data_vector (estimates.block(0), "est");
+
+    data_out.build_patches ();
+
+    data_out.write_gnuplot(gnuplot_output);
+  }
+
+  // And finally the adaptive loop, more or less like in previous examples.
+  template <int dim>
+  void
+  InteriorPenaltyProblem<dim>::run(unsigned int n_steps)
+  {
+    deallog << "Element: " << fe.get_name() << std::endl;
+    for (unsigned int s=0; s<n_steps; ++s)
+      {
+        deallog << "Step " << s << std::endl;
+        if (estimates.block(0).size() == 0)
+          triangulation.refine_global(1);
+        else
+          {
+            GridRefinement::refine_and_coarsen_fixed_fraction (triangulation,
+                                                               estimates.block(0),
+                                                               0.5, 0.0);
+            triangulation.execute_coarsening_and_refinement ();
+          }
+
+        deallog << "Triangulation "
+                << triangulation.n_active_cells() << " cells, "
+                << triangulation.n_levels() << " levels" << std::endl;
+
+        setup_system();
+        deallog << "DoFHandler " << dof_handler.n_dofs() << " dofs, level dofs";
+        for (unsigned int l=0; l<triangulation.n_levels(); ++l)
+          deallog << ' ' << dof_handler.n_dofs(l);
+        deallog << std::endl;
+
+        deallog << "Assemble matrix" << std::endl;
+        assemble_matrix();
+        deallog << "Assemble multilevel matrix" << std::endl;
+        assemble_mg_matrix();
+        deallog << "Assemble right hand side" << std::endl;
+        assemble_right_hand_side();
+        deallog << "Solve" << std::endl;
+        solve();
+        error();
+        deallog << "Estimate " << estimate() << std::endl;
+        output_results(s);
+      }
+  }
+}
+
+
+
+int main()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step39;
+
+      deallog.depth_console(2);
+      std::ofstream logfile("deallog");
+      deallog.attach(logfile);
+      FE_DGQ<2> fe1(3);
+      InteriorPenaltyProblem<2> test1(fe1);
+      test1.run(12);
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-4/CMakeLists.txt b/examples/step-4/CMakeLists.txt
new file mode 100644
index 0000000..bbe64fb
--- /dev/null
+++ b/examples/step-4/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-4 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-4")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-4/doc/builds-on b/examples/step-4/doc/builds-on
new file mode 100644
index 0000000..9406e4b
--- /dev/null
+++ b/examples/step-4/doc/builds-on
@@ -0,0 +1 @@
+step-3
diff --git a/examples/step-4/doc/intro.dox b/examples/step-4/doc/intro.dox
new file mode 100644
index 0000000..f8eadbc
--- /dev/null
+++ b/examples/step-4/doc/intro.dox
@@ -0,0 +1,141 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{12,13}
+
+deal.II has a unique feature which we call
+``dimension independent programming''. You may have noticed in the
+previous examples that many classes had a number in angle brackets
+suffixed to them. This is to indicate that for example the
+triangulation in two and three space dimensions are different, but
+related data %types. We could as well have called them
+<code>Triangulation2d</code> and <code>Triangulation3d</code> instead
+of <code>Triangulation@<2@></code> and
+<code>Triangulation@<3@></code> to name the two classes, but this
+has an important drawback: assume you have a function which does
+exactly the same functionality, but on 2d or 3d triangulations,
+depending on which dimension we would like to solve the equation in
+presently (if you don't believe that it is the common case that a
+function does something that is the same in all dimensions, just take
+a look at the code below - there are almost no distinctions between 2d
+and 3d!). We would have to write the same function twice, once
+working on <code>Triangulation2d</code> and once working with a
+<code>Triangulation3d</code>. This is an unnecessary obstacle in
+programming and leads to a nuisance to keep the two function in sync
+(at best) or difficult to find errors if the two versions get out of
+sync (at worst; this would probably the more common case).
+
+
+
+
+Such obstacles can be circumvented by using some template magic as
+provided by the C++ language: templatized classes and functions are
+not really classes or functions but only a pattern depending on an
+as-yet undefined data type parameter or on a numerical value which is
+also unknown at the point of definition. However, the compiler can
+build proper classes or functions from these templates if you provide
+it with the information that is needed for that. Of course, parts of
+the template can depend on the template parameters, and they will be
+resolved at the time of compilation for a specific template
+parameter. For example, consider the following piece of code:
+ at code
+  template <int dim>
+  void make_grid (Triangulation<dim> &triangulation)
+  {
+    GridGenerator::hyper_cube (triangulation, -1, 1);
+  };
+ at endcode
+
+
+
+At the point where the compiler sees this function, it does not know
+anything about the actual value of <code>dim</code>. The only thing the compiler has is
+a template, i.e. a blueprint, to generate
+functions <code>make_grid</code> if given a particular value of
+<code>dim</code>. Since <code>dim</code> has an unknown value, there is no
+code the compiler can generate for the moment.
+
+
+
+However, if later down the compiler would encounter code that looks, for
+example, like this,
+ at code
+  Triangulation<2> triangulation;
+  make_grid (triangulation);
+ at endcode
+then the compiler will deduce that the function <code>make_grid</code> for
+<code>dim==2</code> was
+requested and will compile the template above into a function with dim replaced
+by 2 everywhere, i.e. it will compile the function as if it were defined
+as
+ at code
+  void make_grid (Triangulation<2> &triangulation)
+  {
+    GridGenerator::hyper_cube (triangulation, -1, 1);
+  };
+ at endcode
+
+
+
+However, it is worth to note that the function
+<code>GridGenerator::hyper_cube</code> depends on the dimension as
+well, so in this case, the compiler will call the function
+<code>GridGenerator::hyper_cube@<2@></code> while if dim were 3,
+it would call <code>GridGenerator::hyper_cube@<3@></code> which
+might be (and actually is) a totally unrelated  function.
+
+
+
+The same can be done with member variables. Consider the following
+function, which might in turn call the above one:
+ at code
+  template <int dim>
+  void make_grid_and_dofs (Triangulation<dim> &triangulation)
+  {
+    make_grid (triangulation);
+
+    DoFHandler<dim> dof_handler(triangulation);
+    ...
+  };
+ at endcode
+This function has a member variable of type
+<code>DoFHandler@<dim@></code>. Again, the compiler can't
+compile this function until it knows for which dimension. If you call
+this function for a specific dimension as above, the compiler will
+take the template, replace all occurrences of dim by the dimension for
+which it was called, and compile it. If you call the function several
+times for different dimensions, it will compile it several times, each
+time calling the right <code>make_grid</code> function and reserving the right
+amount of memory for the member variable; note that the size of a
+<code>DoFHandler</code> might, and indeed does, depend on the space dimension.
+
+
+
+The deal.II library is build around this concept
+of dimension-independent programming, and therefore allows you to program in
+a way that will not need to
+distinguish between the space dimensions. It should be noted that in
+only a very few places is it necessary to actually compare the
+dimension using <code>if</code>s or <code>switch</code>es. However, since the compiler
+has to compile each function for each dimension separately, even there
+it knows the value of <code>dim</code> at the time of compilation and will
+therefore be able to optimize away the <code>if</code> statement along with the
+unused branch.
+
+
+
+In this example program, we will show how to program dimension
+independently (which in fact is even simpler than if you had to take
+care about the dimension) and we will extend the Laplace problem of
+the last example to a program that runs in two and three space
+dimensions at the same time. Other extensions are the use of a
+non-constant right hand side function and of non-zero boundary values.
+
+
+ at note When using templates, C++ imposes all sorts of syntax constraints that
+make it sometimes a bit difficult to understand why exactly something has to
+be written this way. A typical example is the need to use the keyword
+<code>typename</code> in so many places. If you are not entirely familiar with
+this already, then several of these difficulties are explained in the deal.II
+Frequently Asked Questions (FAQ) linked to from the <a
+href="http://www.dealii.org/">deal.II homepage</a>.
diff --git a/examples/step-4/doc/kind b/examples/step-4/doc/kind
new file mode 100644
index 0000000..15a13db
--- /dev/null
+++ b/examples/step-4/doc/kind
@@ -0,0 +1 @@
+basic
diff --git a/examples/step-4/doc/results.dox b/examples/step-4/doc/results.dox
new file mode 100644
index 0000000..3b9a95c
--- /dev/null
+++ b/examples/step-4/doc/results.dox
@@ -0,0 +1,110 @@
+<h1>Results</h1>
+
+
+The output of the program looks as follows (the number of iterations
+may vary by one or two, depending on your computer, since this is
+often dependent on the round-off accuracy of floating point
+operations, which differs between processors):
+ at code
+Solving problem in 2 space dimensions.
+   Number of active cells: 256
+   Total number of cells: 341
+   Number of degrees of freedom: 289
+   26 CG iterations needed to obtain convergence.
+Solving problem in 3 space dimensions.
+   Number of active cells: 4096
+   Total number of cells: 4681
+   Number of degrees of freedom: 4913
+   30 CG iterations needed to obtain convergence.
+ at endcode
+It is obvious that in three spatial dimensions the number of cells and
+therefore also the number of degrees of freedom is
+much higher. What cannot be seen here, is that besides this higher
+number of rows and columns in the matrix, there are also significantly
+more entries per row of the matrix in three space
+dimensions. Together, this leads to a much higher numerical effort for
+solving the system of equation, which you can feel in the run time of the two
+solution steps when you actually run the program.
+
+
+
+The program produces two files: <code>solution-2d.vtk</code> and
+<code>solution-3d.vtk</code>, which can be viewed using the programs
+Visit or Paraview (in case you do not have these programs, you can easily
+change the 
+output format in the program to something which you can view more
+easily). Visualizing solutions is a bit of an art, but it can also be fun, so
+you should play around with your favorite visualization tool to get familiar
+with its functionality. Here's what I have come up with for the 2d solution:
+
+<p align="center">
+  <img src="http://www.dealii.org/images/steps/developer/step-4.solution-2d.png" alt="">
+</p>
+
+(@dealiiVideoLectureSeeAlso{11,32})
+The picture shows the solution of the problem under consideration as
+a 3D plot. As can be seen, the solution is almost flat in the interior
+of the domain and has a higher curvature near the boundary. This, of
+course, is due to the fact that for Laplace's equation the curvature
+of the solution is equal to the right hand side and that was chosen as
+a quartic polynomial which is nearly zero in the interior and is only
+rising sharply when approaching the boundaries of the domain; the
+maximal values of the right hand side function are at the corners of
+the domain, where also the solution is moving most rapidly.
+It is also nice to see that the solution follows the desired quadratic
+boundary values along the boundaries of the domain.
+
+On the other hand, even though the picture does not show the mesh lines
+explicitly, you can see them as little kinks in the solution. This clearly
+indicates that the solution hasn't been computed to very high accuracy and
+that to get a better solution, we may have to compute on a finer mesh.
+
+In three spatial dimensions, visualization is a bit more difficult. The left
+picture shows the solution and the mesh it was computed on on the surface of
+the domain. This is nice, but it has the drawback that it completely hides
+what is happening on the inside. The picture on the right is an attempt at
+visualizing the interior as well, by showing surfaces where the solution has
+constant values (as indicated by the legend at the top left). Isosurface
+pictures look best if one makes the individual surfaces slightly transparent
+so that it is possible to see through them and see what's behind.
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-4.solution-3d.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-4.contours-3d.png" alt="">
+    </td>
+  </tr>
+</table>
+
+ at note
+A final remark on visualization: The idea of visualization is to give insight,
+which is not the same as displaying information. In particular, it is easy to
+overload a picture with information, but while it shows more information it
+makes it also more difficult to glean insight. As an example, the program I
+used to generate these pictures, Visit, by default puts tick marks on every
+axis, puts a big fat label "X Axis" on the $x$ axis and similar for the other
+axes, shows the file name from which the data was taken in the top left and
+the name of the user doing so and the time and date on the bottom right. None
+of this is important 
+here: the axes are equally easy to make out because the tripod at the bottom
+left is still visible, and we know from the program that the domain is
+$[-1,1]^3$, so there is no need for tick marks. As a consequence, I have
+switched off all the extraneous stuff in the picture: The art of visualization
+is to reduce the picture to those parts that are important to see what one
+wants to see, but no more.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+
+Essentially the possibilities for playing around with the program are the same
+as for the previous one, except that they will now also apply to the 3d
+case. For inspiration read up on <a href="step_3.html#extensions"
+target="body">possible extensions in the documentation of step 3</a>.
+
diff --git a/examples/step-4/doc/tooltip b/examples/step-4/doc/tooltip
new file mode 100644
index 0000000..1f94394
--- /dev/null
+++ b/examples/step-4/doc/tooltip
@@ -0,0 +1 @@
+Dimension independent programming. Boundary conditions.
diff --git a/examples/step-4/step-4.cc b/examples/step-4/step-4.cc
new file mode 100644
index 0000000..cf08984
--- /dev/null
+++ b/examples/step-4/step-4.cc
@@ -0,0 +1,540 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 1999 - 2016 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 1999
+ */
+
+
+// @sect3{Include files}
+
+// The first few (many?) include files have already been used in the previous
+// example, so we will not explain their meaning here again.
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+
+#include <deal.II/numerics/data_out.h>
+#include <fstream>
+#include <iostream>
+
+// This is new, however: in the previous example we got some unwanted output
+// from the linear solvers. If we want to suppress it, we have to include this
+// file and add a single line somewhere to the program (see the main()
+// function below for that):
+#include <deal.II/base/logstream.h>
+
+// The final step, as in previous programs, is to import all the deal.II class
+// and function names into the global namespace:
+using namespace dealii;
+
+// @sect3{The <code>Step4</code> class template}
+
+// This is again the same <code>Step4</code> class as in the previous
+// example. The only difference is that we have now declared it as a class
+// with a template parameter, and the template parameter is of course the
+// spatial dimension in which we would like to solve the Laplace equation. Of
+// course, several of the member variables depend on this dimension as well,
+// in particular the Triangulation class, which has to represent
+// quadrilaterals or hexahedra, respectively. Apart from this, everything is
+// as before.
+template <int dim>
+class Step4
+{
+public:
+  Step4 ();
+  void run ();
+
+private:
+  void make_grid ();
+  void setup_system();
+  void assemble_system ();
+  void solve ();
+  void output_results () const;
+
+  Triangulation<dim>   triangulation;
+  FE_Q<dim>            fe;
+  DoFHandler<dim>      dof_handler;
+
+  SparsityPattern      sparsity_pattern;
+  SparseMatrix<double> system_matrix;
+
+  Vector<double>       solution;
+  Vector<double>       system_rhs;
+};
+
+
+// @sect3{Right hand side and boundary values}
+
+// In the following, we declare two more classes denoting the right hand side
+// and the non-homogeneous Dirichlet boundary values. Both are functions of a
+// dim-dimensional space variable, so we declare them as templates as well.
+//
+// Each of these classes is derived from a common, abstract base class
+// Function, which declares the common interface which all functions have to
+// follow. In particular, concrete classes have to overload the
+// <code>value</code> function, which takes a point in dim-dimensional space
+// as parameters and shall return the value at that point as a
+// <code>double</code> variable.
+//
+// The <code>value</code> function takes a second argument, which we have here
+// named <code>component</code>: This is only meant for vector valued
+// functions, where you may want to access a certain component of the vector
+// at the point <code>p</code>. However, our functions are scalar, so we need
+// not worry about this parameter and we will not use it in the implementation
+// of the functions. Inside the library's header files, the Function base
+// class's declaration of the <code>value</code> function has a default value
+// of zero for the component, so we will access the <code>value</code>
+// function of the right hand side with only one parameter, namely the point
+// where we want to evaluate the function. A value for the component can then
+// simply be omitted for scalar functions.
+//
+// Note that the C++ language forces us to declare and define a constructor to
+// the following classes even though they are empty. This is due to the fact
+// that the base class has no default constructor (i.e. one without
+// arguments), even though it has a constructor which has default values for
+// all arguments.
+template <int dim>
+class RightHandSide : public Function<dim>
+{
+public:
+  RightHandSide () : Function<dim>() {}
+
+  virtual double value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+};
+
+
+
+template <int dim>
+class BoundaryValues : public Function<dim>
+{
+public:
+  BoundaryValues () : Function<dim>() {}
+
+  virtual double value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+};
+
+
+
+
+// For this example, we choose as right hand side function to function
+// $4(x^4+y^4)$ in 2D, or $4(x^4+y^4+z^4)$ in 3D. We could write this
+// distinction using an if-statement on the space dimension, but here is a
+// simple way that also allows us to use the same function in 1D (or in 4D, if
+// you should desire to do so), by using a short loop.  Fortunately, the
+// compiler knows the size of the loop at compile time (remember that at the
+// time when you define the template, the compiler doesn't know the value of
+// <code>dim</code>, but when it later encounters a statement or declaration
+// <code>RightHandSide@<2@></code>, it will take the template, replace all
+// occurrences of dim by 2 and compile the resulting function); in other
+// words, at the time of compiling this function, the number of times the body
+// will be executed is known, and the compiler can optimize away the overhead
+// needed for the loop and the result will be as fast as if we had used the
+// formulas above right away.
+//
+// The last thing to note is that a <code>Point@<dim@></code> denotes a point
+// in dim-dimensional space, and its individual components (i.e. $x$, $y$,
+// ... coordinates) can be accessed using the () operator (in fact, the []
+// operator will work just as well) with indices starting at zero as usual in
+// C and C++.
+template <int dim>
+double RightHandSide<dim>::value (const Point<dim> &p,
+                                  const unsigned int /*component*/) const
+{
+  double return_value = 0.0;
+  for (unsigned int i=0; i<dim; ++i)
+    return_value += 4.0 * std::pow(p(i), 4.0);
+
+  return return_value;
+}
+
+
+// As boundary values, we choose $x^2+y^2$ in 2D, and $x^2+y^2+z^2$ in 3D. This
+// happens to be equal to the square of the vector from the origin to the
+// point at which we would like to evaluate the function, irrespective of the
+// dimension. So that is what we return:
+template <int dim>
+double BoundaryValues<dim>::value (const Point<dim> &p,
+                                   const unsigned int /*component*/) const
+{
+  return p.square();
+}
+
+
+
+// @sect3{Implementation of the <code>Step4</code> class}
+
+// Next for the implementation of the class template that makes use of the
+// functions above. As before, we will write everything as templates that have
+// a formal parameter <code>dim</code> that we assume unknown at the time we
+// define the template functions. Only later, the compiler will find a
+// declaration of <code>Step4@<2@></code> (in the <code>main</code> function,
+// actually) and compile the entire class with <code>dim</code> replaced by 2,
+// a process referred to as `instantiation of a template'. When doing so, it
+// will also replace instances of <code>RightHandSide@<dim@></code> by
+// <code>RightHandSide@<2@></code> and instantiate the latter class from the
+// class template.
+//
+// In fact, the compiler will also find a declaration <code>Step4@<3@></code>
+// in <code>main()</code>. This will cause it to again go back to the general
+// <code>Step4@<dim@></code> template, replace all occurrences of
+// <code>dim</code>, this time by 3, and compile the class a second time. Note
+// that the two instantiations <code>Step4@<2@></code> and
+// <code>Step4@<3@></code> are completely independent classes; their only
+// common feature is that they are both instantiated from the same general
+// template, but they are not convertible into each other, for example, and
+// share no code (both instantiations are compiled completely independently).
+
+
+// @sect4{Step4::Step4}
+
+// After this introduction, here is the constructor of the <code>Step4</code>
+// class. It specifies the desired polynomial degree of the finite elements
+// and associates the DoFHandler to the triangulation just as in the previous
+// example program, step-3:
+template <int dim>
+Step4<dim>::Step4 ()
+  :
+  fe (1),
+  dof_handler (triangulation)
+{}
+
+
+// @sect4{Step4::make_grid}
+
+// Grid creation is something inherently dimension dependent. However, as long
+// as the domains are sufficiently similar in 2D or 3D, the library can
+// abstract for you. In our case, we would like to again solve on the square
+// $[-1,1]\times [-1,1]$ in 2D, or on the cube $[-1,1] \times [-1,1] \times
+// [-1,1]$ in 3D; both can be termed GridGenerator::hyper_cube(), so we may
+// use the same function in whatever dimension we are. Of course, the
+// functions that create a hypercube in two and three dimensions are very much
+// different, but that is something you need not care about. Let the library
+// handle the difficult things.
+template <int dim>
+void Step4<dim>::make_grid ()
+{
+  GridGenerator::hyper_cube (triangulation, -1, 1);
+  triangulation.refine_global (4);
+
+  std::cout << "   Number of active cells: "
+            << triangulation.n_active_cells()
+            << std::endl
+            << "   Total number of cells: "
+            << triangulation.n_cells()
+            << std::endl;
+}
+
+// @sect4{Step4::setup_system}
+
+// This function looks exactly like in the previous example, although it
+// performs actions that in their details are quite different if
+// <code>dim</code> happens to be 3. The only significant difference from a
+// user's perspective is the number of cells resulting, which is much higher
+// in three than in two space dimensions!
+template <int dim>
+void Step4<dim>::setup_system ()
+{
+  dof_handler.distribute_dofs (fe);
+
+  std::cout << "   Number of degrees of freedom: "
+            << dof_handler.n_dofs()
+            << std::endl;
+
+  DynamicSparsityPattern dsp(dof_handler.n_dofs());
+  DoFTools::make_sparsity_pattern (dof_handler, dsp);
+  sparsity_pattern.copy_from(dsp);
+
+  system_matrix.reinit (sparsity_pattern);
+
+  solution.reinit (dof_handler.n_dofs());
+  system_rhs.reinit (dof_handler.n_dofs());
+}
+
+
+// @sect4{Step4::assemble_system}
+
+// Unlike in the previous example, we would now like to use a non-constant
+// right hand side function and non-zero boundary values. Both are tasks that
+// are readily achieved with only a few new lines of code in the assemblage of
+// the matrix and right hand side.
+//
+// More interesting, though, is the way we assemble matrix and right hand side
+// vector dimension independently: there is simply no difference to the
+// two-dimensional case. Since the important objects used in this function
+// (quadrature formula, FEValues) depend on the dimension by way of a template
+// parameter as well, they can take care of setting up properly everything for
+// the dimension for which this function is compiled. By declaring all classes
+// which might depend on the dimension using a template parameter, the library
+// can make nearly all work for you and you don't have to care about most
+// things.
+template <int dim>
+void Step4<dim>::assemble_system ()
+{
+  QGauss<dim>  quadrature_formula(2);
+
+  // We wanted to have a non-constant right hand side, so we use an object of
+  // the class declared above to generate the necessary data. Since this right
+  // hand side object is only used locally in the present function, we declare
+  // it here as a local variable:
+  const RightHandSide<dim> right_hand_side;
+
+  // Compared to the previous example, in order to evaluate the non-constant
+  // right hand side function we now also need the quadrature points on the
+  // cell we are presently on (previously, we only required values and
+  // gradients of the shape function from the FEValues object, as well as the
+  // quadrature weights, FEValues::JxW() ). We can tell the FEValues object to
+  // do for us by also giving it the #update_quadrature_points flag:
+  FEValues<dim> fe_values (fe, quadrature_formula,
+                           update_values   | update_gradients |
+                           update_quadrature_points | update_JxW_values);
+
+  // We then again define a few abbreviations. The values of these variables
+  // of course depend on the dimension which we are presently using. However,
+  // the FE and Quadrature classes do all the necessary work for you and you
+  // don't have to care about the dimension dependent parts:
+  const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int   n_q_points    = quadrature_formula.size();
+
+  FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+  Vector<double>       cell_rhs (dofs_per_cell);
+
+  std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+  // Next, we again have to loop over all cells and assemble local
+  // contributions.  Note, that a cell is a quadrilateral in two space
+  // dimensions, but a hexahedron in 3D. In fact, the
+  // <code>active_cell_iterator</code> data type is something different,
+  // depending on the dimension we are in, but to the outside world they look
+  // alike and you will probably never see a difference although the classes
+  // that this typedef stands for are in fact completely unrelated:
+  typename DoFHandler<dim>::active_cell_iterator
+  cell = dof_handler.begin_active(),
+  endc = dof_handler.end();
+
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+      cell_matrix = 0;
+      cell_rhs = 0;
+
+      // Now we have to assemble the local matrix and right hand side. This is
+      // done exactly like in the previous example, but now we revert the
+      // order of the loops (which we can safely do since they are independent
+      // of each other) and merge the loops for the local matrix and the local
+      // vector as far as possible to make things a bit faster.
+      //
+      // Assembling the right hand side presents the only significant
+      // difference to how we did things in step-3: Instead of using a
+      // constant right hand side with value 1, we use the object representing
+      // the right hand side and evaluate it at the quadrature points:
+      for (unsigned int q_index=0; q_index<n_q_points; ++q_index)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              cell_matrix(i,j) += (fe_values.shape_grad (i, q_index) *
+                                   fe_values.shape_grad (j, q_index) *
+                                   fe_values.JxW (q_index));
+
+            cell_rhs(i) += (fe_values.shape_value (i, q_index) *
+                            right_hand_side.value (fe_values.quadrature_point (q_index)) *
+                            fe_values.JxW (q_index));
+          }
+      // As a final remark to these loops: when we assemble the local
+      // contributions into <code>cell_matrix(i,j)</code>, we have to multiply
+      // the gradients of shape functions $i$ and $j$ at point number
+      // q_index and
+      // multiply it with the scalar weights JxW. This is what actually
+      // happens: <code>fe_values.shape_grad(i,q_index)</code> returns a
+      // <code>dim</code> dimensional vector, represented by a
+      // <code>Tensor@<1,dim@></code> object, and the operator* that
+      // multiplies it with the result of
+      // <code>fe_values.shape_grad(j,q_index)</code> makes sure that the
+      // <code>dim</code> components of the two vectors are properly
+      // contracted, and the result is a scalar floating point number that
+      // then is multiplied with the weights. Internally, this operator* makes
+      // sure that this happens correctly for all <code>dim</code> components
+      // of the vectors, whether <code>dim</code> be 2, 3, or any other space
+      // dimension; from a user's perspective, this is not something worth
+      // bothering with, however, making things a lot simpler if one wants to
+      // write code dimension independently.
+
+      // With the local systems assembled, the transfer into the global matrix
+      // and right hand side is done exactly as before, but here we have again
+      // merged some loops for efficiency:
+      cell->get_dof_indices (local_dof_indices);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        {
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            system_matrix.add (local_dof_indices[i],
+                               local_dof_indices[j],
+                               cell_matrix(i,j));
+
+          system_rhs(local_dof_indices[i]) += cell_rhs(i);
+        }
+    }
+
+
+  // As the final step in this function, we wanted to have non-homogeneous
+  // boundary values in this example, unlike the one before. This is a simple
+  // task, we only have to replace the ZeroFunction used there by an object of
+  // the class which describes the boundary values we would like to use
+  // (i.e. the <code>BoundaryValues</code> class declared above):
+  std::map<types::global_dof_index,double> boundary_values;
+  VectorTools::interpolate_boundary_values (dof_handler,
+                                            0,
+                                            BoundaryValues<dim>(),
+                                            boundary_values);
+  MatrixTools::apply_boundary_values (boundary_values,
+                                      system_matrix,
+                                      solution,
+                                      system_rhs);
+}
+
+
+// @sect4{Step4::solve}
+
+// Solving the linear system of equations is something that looks almost
+// identical in most programs. In particular, it is dimension independent, so
+// this function is copied verbatim from the previous example.
+template <int dim>
+void Step4<dim>::solve ()
+{
+  SolverControl           solver_control (1000, 1e-12);
+  SolverCG<>              solver (solver_control);
+  solver.solve (system_matrix, solution, system_rhs,
+                PreconditionIdentity());
+
+  // We have made one addition, though: since we suppress output from the
+  // linear solvers, we have to print the number of iterations by hand.
+  std::cout << "   " << solver_control.last_step()
+            << " CG iterations needed to obtain convergence."
+            << std::endl;
+}
+
+
+// @sect4{Step4::output_results}
+
+// This function also does what the respective one did in step-3. No changes
+// here for dimension independence either.
+//
+// The only difference to the previous example is that we want to write output
+// in VTK format, rather than for gnuplot. VTK format is currently the most
+// widely used one and is supported by a number of visualization programs such
+// as Visit and Paraview (for ways to obtain these programs see the ReadMe
+// file of deal.II). To write data in this format, we simply replace the
+// <code>data_out.write_gnuplot</code> call by
+// <code>data_out.write_vtk</code>.
+//
+// Since the program will run both 2d and 3d versions of the Laplace solver,
+// we use the dimension in the filename to generate distinct filenames for
+// each run (in a better program, one would check whether <code>dim</code> can
+// have other values than 2 or 3, but we neglect this here for the sake of
+// brevity).
+template <int dim>
+void Step4<dim>::output_results () const
+{
+  DataOut<dim> data_out;
+
+  data_out.attach_dof_handler (dof_handler);
+  data_out.add_data_vector (solution, "solution");
+
+  data_out.build_patches ();
+
+  std::ofstream output (dim == 2 ?
+                        "solution-2d.vtk" :
+                        "solution-3d.vtk");
+  data_out.write_vtk (output);
+}
+
+
+
+// @sect4{Step4::run}
+
+// This is the function which has the top-level control over everything. Apart
+// from one line of additional output, it is the same as for the previous
+// example.
+template <int dim>
+void Step4<dim>::run ()
+{
+  std::cout << "Solving problem in " << dim << " space dimensions." << std::endl;
+
+  make_grid();
+  setup_system ();
+  assemble_system ();
+  solve ();
+  output_results ();
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// And this is the main function. It also looks mostly like in step-3, but if
+// you look at the code below, note how we first create a variable of type
+// <code>Step4@<2@></code> (forcing the compiler to compile the class template
+// with <code>dim</code> replaced by <code>2</code>) and run a 2d simulation,
+// and then we do the whole thing over in 3d.
+//
+// In practice, this is probably not what you would do very frequently (you
+// probably either want to solve a 2d problem, or one in 3d, but not both at
+// the same time). However, it demonstrates the mechanism by which we can
+// simply change which dimension we want in a single place, and thereby force
+// the compiler to recompile the dimension independent class templates for the
+// dimension we request. The emphasis here lies on the fact that we only need
+// to change a single place. This makes it rather trivial to debug the program
+// in 2d where computations are fast, and then switch a single place to a 3 to
+// run the much more computing intensive program in 3d for `real'
+// computations.
+//
+// Each of the two blocks is enclosed in braces to make sure that the
+// <code>laplace_problem_2d</code> variable goes out of scope (and releases
+// the memory it holds) before we move on to allocate memory for the 3d
+// case. Without the additional braces, the <code>laplace_problem_2d</code>
+// variable would only be destroyed at the end of the function, i.e. after
+// running the 3d problem, and would needlessly hog memory while the 3d run
+// could actually use it.
+int main ()
+{
+  deallog.depth_console (0);
+  {
+    Step4<2> laplace_problem_2d;
+    laplace_problem_2d.run ();
+  }
+
+  {
+    Step4<3> laplace_problem_3d;
+    laplace_problem_3d.run ();
+  }
+
+  return 0;
+}
diff --git a/examples/step-40/CMakeLists.txt b/examples/step-40/CMakeLists.txt
new file mode 100644
index 0000000..d04cc77
--- /dev/null
+++ b/examples/step-40/CMakeLists.txt
@@ -0,0 +1,55 @@
+##
+#  CMake script for the step-40 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-40")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT (DEAL_II_WITH_PETSC OR DEAL_II_WITH_TRILINOS) OR NOT DEAL_II_WITH_P4EST)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_PETSC = ON
+    DEAL_II_WITH_P4EST = ON
+or
+    DEAL_II_WITH_TRILINOS = ON
+    DEAL_II_WITH_P4EST = ON
+One or both of these combinations are OFF in your installation but at least one is required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+SET(CLEAN_UP_FILES *.log *.gmv *.gnuplot *.gpl *.eps *.pov *.vtk *.ucd *.d2 *.vtu *.pvtu)
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-40/doc/builds-on b/examples/step-40/doc/builds-on
new file mode 100644
index 0000000..c27eb15
--- /dev/null
+++ b/examples/step-40/doc/builds-on
@@ -0,0 +1 @@
+step-6 step-17
diff --git a/examples/step-40/doc/intro.dox b/examples/step-40/doc/intro.dox
new file mode 100644
index 0000000..6ed0e30
--- /dev/null
+++ b/examples/step-40/doc/intro.dox
@@ -0,0 +1,148 @@
+<br>
+
+<i>This program was contributed by Timo Heister, Martin Kronbichler and Wolfgang
+Bangerth.
+<br>
+This material is based upon work partly supported by the National
+Science Foundation under Award No. EAR-0426271 and The California Institute of
+Technology. Any opinions, findings, and conclusions or recommendations
+expressed in this publication are those of the author and do not
+necessarily reflect the views of the National Science Foundation or of The
+California Institute of Technology.
+</i>
+
+
+ at note As a prerequisite of this program, you need to have both PETSc and the
+p4est library installed. The installation of deal.II
+together with these two additional libraries is described in the <a
+href="../../readme.html" target="body">README</a> file. Note also that
+to work properly, this program needs access to the Hypre
+preconditioner package implementing algebraic multigrid; it can be
+installed as part of PETSc but has to be explicitly enabled during
+PETSc configuration; see the page linked to from the installation
+instructions for PETSc.
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{41.5,41.75}
+
+Given today's computers, most finite element computations can be done on
+a single machine. The majority of previous tutorial programs therefore
+shows only this, possibly splitting up work among a number of
+processors that, however, can all access the same, shared memory
+space. That said, there are problems that are simply too big for a
+single machine and in that case the problem has to be split up in a
+suitable way among multiple machines each of which contributes its
+part to the whole. A simple way to do that was shown in step-17 and
+step-18, where we show how a program can use <a
+href="http://www.mpi-forum.org/" target="_top">MPI</a> to parallelize
+assembling the linear system, storing it, solving it, and computing
+error estimators. All of these operations scale relatively trivially
+(for a definition of what it means for an operation to "scale", see 
+ at ref GlossParallelScaling "this glossary entry),
+but there was one significant drawback: for this to be moderately
+simple to implement, each MPI processor had to keep its own copy of
+the entire Triangulation and DoFHandler objects. Consequently, while
+we can suspect (with good reasons) that the operations listed above
+can scale to thousands of computers and problem sizes of billions of
+cells and billions of degrees of freedom, building the one big mesh for the
+entire problem these thousands of computers are solving on every last
+processor is clearly not going to scale: it is going to take forever,
+and maybe more importantly no single machine will have enough memory
+to store a mesh that has a billion cells (at least not at the time of
+writing this). In reality, programs like step-17 and step-18 can
+therefore not be run on more than maybe 100 or 200 processors and even
+there storing the Triangulation and DoFHandler objects consumes the
+vast majority of memory on each machine.
+
+Consequently, we need to approach the problem differently: to scale to
+very large problems each processor can only store its own little piece
+of the Triangulation and DoFHandler objects. deal.II implements such a
+scheme in the parallel::distributed namespace and the classes
+therein. It builds on an external library, <a
+href="http://www.p4est.org/">p4est</a> (a play on the expression
+<i>parallel forest</i> that describes the parallel storage of a
+hierarchically constructed mesh as a forest of quad- or
+oct-trees). You need to <a
+href="../../external-libs/p4est.html">install and configure p4est</a>
+but apart from that all of its workings are hidden under the surface
+of deal.II.
+
+In essence, what the parallel::distributed::Triangulation class and
+code inside the DoFHandler class do is to split
+the global mesh so that every processor only stores a small bit it
+"owns" along with one layer of "ghost" cells that surround the ones it
+owns. What happens in the rest of the domain on which we want to solve
+the partial differential equation is unknown to each processor and can
+only be inferred through communication with other machines if such
+information is needed. This implies that we also have to think about
+problems in a different way than we did in, for example, step-17 and
+step-18: no processor can have the entire solution vector for
+postprocessing, for example, and every part of a program has to be
+parallelized because no processor has all the information necessary
+for sequential operations.
+
+A general overview of how this parallelization happens is described in
+the @ref distributed documentation module. You should read it for a
+top-level overview before reading through the source code of this
+program. A concise discussion of many terms we will use in the program
+is also provided in the @ref distributed_paper "Distributed Computing paper".
+It is probably worthwhile reading it for background information on how
+things work internally in this program.
+
+
+<h3>The testcase</h3>
+
+This program essentially re-solves what we already do in
+step-6, i.e. it solves the Laplace equation
+ at f{align*}
+  -\Delta u &= f \qquad &&\text{in}\ \Omega=[0,1]^2, \\
+  u &= 0 \qquad &&\text{on}\ \partial\Omega.
+ at f}
+The difference of course is now that we want to do so on a mesh that
+may have a billion cells, with a billion or so degrees of
+freedom. There is no doubt that doing so is completely silly for such
+a simple problem, but the point of a tutorial program is, after all,
+not to do something useful but to show how useful programs can be
+implemented using deal.II. Be that as it may, to make things at least
+a tiny bit interesting, we choose the right hand side as a
+discontinuous function,
+ at f{align*}
+  f(x,y)
+  =
+  \left\{
+  \begin{array}{ll}
+    1 & \text{if}\ y < \frac 12 + \frac 14 \sin(4\pi x), \\
+    -1 & \text{otherwise},
+  \end{array}
+  \right.
+ at f}
+so that the solution has a singularity along the sinusoidal line
+snaking its way through the domain. As a consequence, mesh refinement
+will be concentrated along this line. You can see this in the mesh
+picture shown below in the results section.
+
+Rather than continuing here and giving a long introduction, let us go
+straight to the program code. If you have read through step-6 and the
+ at ref distributed documentation module, most of things that are going
+to happen should be familiar to you already. In fact, comparing the two
+programs you will notice that the additional effort necessary to make things
+work in %parallel is almost insignificant: the two programs have about the
+same number of lines of code (though step-6 spends more space on dealing with
+coefficients and output). In either case, the comments below will only be on
+the things that set step-40 apart from step-6 and that aren't already covered
+in the @ref distributed documentation module.
+
+
+ at note This program will be able to compute on as many processors as you want
+to throw at it, and for as large a problem as you have the memory and patience
+to solve. However, there <i>is</i> a limit: the number of unknowns can not
+exceed the largest number that can be stored with an object of type
+types::global_dof_index. By default, this is a typedef for <code>unsigned
+int</code>, which on most machines today is a 32-bit integer, limiting you to
+some 4 billion (in reality, since this program uses PETSc, you will be limited
+to half that as PETSc uses signed integers). However, this can be changed
+during configuration to use 64-bit integers, see the ReadMe file. This will
+give problem sizes you are unlikely to exceed anytime soon.
diff --git a/examples/step-40/doc/kind b/examples/step-40/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-40/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-40/doc/results.dox b/examples/step-40/doc/results.dox
new file mode 100644
index 0000000..8e887c0
--- /dev/null
+++ b/examples/step-40/doc/results.dox
@@ -0,0 +1,174 @@
+<h1>Results</h1>
+
+When you run the program, on a single processor or with your local MPI
+installation on a few, you should get output like this:
+ at code
+Cycle 0:
+   Number of active cells:       1024
+   Number of degrees of freedom: 4225
+   Solved in 10 iterations.
+
+
++---------------------------------------------+------------+------------+
+| Total wallclock time elapsed since start    |     0.176s |            |
+|                                             |            |            |
+| Section                         | no. calls |  wall time | % of total |
++---------------------------------+-----------+------------+------------+
+| assembly                        |         1 |    0.0209s |        12% |
+| output                          |         1 |    0.0189s |        11% |
+| setup                           |         1 |    0.0299s |        17% |
+| solve                           |         1 |    0.0419s |        24% |
++---------------------------------+-----------+------------+------------+
+
+
+Cycle 1:
+   Number of active cells:       1954
+   Number of degrees of freedom: 8399
+   Solved in 10 iterations.
+
+
++---------------------------------------------+------------+------------+
+| Total wallclock time elapsed since start    |     0.327s |            |
+|                                             |            |            |
+| Section                         | no. calls |  wall time | % of total |
++---------------------------------+-----------+------------+------------+
+| assembly                        |         1 |    0.0368s |        11% |
+| output                          |         1 |    0.0208s |       6.4% |
+| refine                          |         1 |     0.157s |        48% |
+| setup                           |         1 |    0.0452s |        14% |
+| solve                           |         1 |    0.0668s |        20% |
++---------------------------------+-----------+------------+------------+
+
+
+Cycle 2:
+   Number of active cells:       3664
+   Number of degrees of freedom: 16183
+   Solved in 11 iterations.
+
+...
+ at endcode
+
+The exact numbers differ, depending on how many processors we use;
+this is due to the fact that the preconditioner depends on the
+partitioning of the problem, the solution then differs in the last few
+digits, and consequently the mesh refinement differs slightly.
+The primary thing to notice here, though, is that the number of
+iterations does not increase with the size of the problem. This
+guarantees that we can efficiently solve even the largest problems.
+
+When run on a sufficiently large number of machines (say a few
+thousand), this program can relatively easily solve problems with well
+over one billion unknowns in less than a minute. On the other hand,
+such big problems can no longer be visualized, so we also ran the
+program on only 16 processors. Here are a mesh, along with its
+partitioning onto the 16 processors, and the corresponding solution:
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-40.mesh.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-40.solution.png" alt="">
+</td>
+</tr>
+</table>
+
+The mesh on the left has a mere 7,069 cells. This is of course a
+problem we would easily have been able to solve already on a single
+processor using step-6, but the point of the program was to show how
+to write a program that scales to many more machines. For example,
+here are two graphs that show how the run time of a large number of parts
+of the program scales on problems with around 52 and 375 million degrees of
+freedom if we take more and more processors (these and the next couple of
+graphs are taken from an earlier version of the
+ at ref distributed_paper "Distributed Computing paper"; updated graphs showing
+data of runs on even larger numbers of processors, and a lot
+more interpretation can be found in the final version of the paper):
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-40.strong2.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-40.strong.png" alt="">
+</td>
+</tr>
+</table>
+
+As can clearly be seen, the program scales nicely to very large
+numbers of processors. The curves, in particular the linear solver, become a
+bit wobble at the right end of the graphs since each processor has too little
+to do to offset the cost of communication (the part of the whole problem each
+processor has to solve in the above two examples is only 13,000 and 90,000
+degrees of freedom when 4,096 processors are used).
+
+While the strong scaling graphs above show that we can solve a problem of
+fixed size faster and faster if we take more and more processors, the more
+interesting question may be how big problems can become so that they can still
+be solved within a reasonable time on a machine of a particular size. We show
+this in the following two graphs for 256 and 4096 processors:
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-40.256.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-40.4096.png" alt="">
+</td>
+</tr>
+</table>
+
+What these graphs show is that all parts of the program scale linearly with
+the number of degrees of freedom. This time, lines are wobbly at the left as
+the size of local problems is too small. For more discussions of these results
+we refer to the @ref distributed_paper "Distributed Computing paper".
+
+So how large are the largest problems one can solve? At the time of writing
+this problem, the
+limiting factor is that the program uses the BoomerAMG algebraic
+multigrid method from the <a
+href="http://acts.nersc.gov/hypre/" target="_top">Hypre package</a> as
+a preconditioner, which unfortunately uses signed 32-bit integers to
+index the elements of a %distributed matrix. This limits the size of
+problems to $2^{31}-1=2,147,483,647$ degrees of freedom. From the graphs
+above it is obvious that the scalability would extend beyond this
+number, and one could expect that given more than the 4,096 machines
+shown above would also further reduce the compute time. That said, one
+can certainly expect that this limit will eventually be lifted by the
+hypre developers.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+In a sense, this program is the ultimate solver for the Laplace
+equation: it can essentially solve the equation to whatever accuracy
+you want, if only you have enough processors available. Since the
+Laplace equation by itself is not terribly interesting at this level
+of accuracy, the more interesting possibilities for extension
+therefore concern not so much this program but what comes beyond
+it. For example, several of the other programs in this tutorial have
+significant run times, especially in 3d. It would therefore be
+interesting to use the techniques explained here to extend other
+programs to support parallel distributed computations. We have done
+this for step-31 in the step-32 tutorial program, but the same would
+apply to, for example, step-23 and step-25 for hyperbolic time
+dependent problems, step-33 for gas dynamics, or step-35 for the
+Navier-Stokes equations.
+
+Maybe equally interesting is the problem of postprocessing. As
+mentioned above, we only show pictures of the solution and the mesh
+for 16 processors because 4,096 processors solving 1 billion unknowns
+would produce graphical output on the order of several 10
+gigabyte. Currently, no program is able to visualize this amount of
+data in any reasonable way unless it also runs on at least several
+hundred processors. There are, however, approaches where visualization
+programs directly communicate with solvers on each processor with each
+visualization process rendering the part of the scene computed by the
+solver on this processor. Implementing such an interface would allow
+to quickly visualize things that are otherwise not amenable to
+graphical display.
diff --git a/examples/step-40/doc/tooltip b/examples/step-40/doc/tooltip
new file mode 100644
index 0000000..a8090e4
--- /dev/null
+++ b/examples/step-40/doc/tooltip
@@ -0,0 +1 @@
+Solving the Laplace equation on adaptive meshes on thousands of processors.
diff --git a/examples/step-40/step-40.cc b/examples/step-40/step-40.cc
new file mode 100644
index 0000000..a2e7dd8
--- /dev/null
+++ b/examples/step-40/step-40.cc
@@ -0,0 +1,743 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2009 - 2016 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, Texas A&M University, 2009, 2010
+ *         Timo Heister, University of Goettingen, 2009, 2010
+ */
+
+
+// @sect3{Include files}
+//
+// Most of the include files we need for this program have already been
+// discussed in previous programs. In particular, all of the following should
+// already be familiar friends:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/timer.h>
+
+#include <deal.II/lac/generic_linear_algebra.h>
+
+// uncomment the following #define if you have PETSc and Trilinos installed
+// and you prefer using Trilinos in this example:
+// #define FORCE_USE_OF_TRILINOS
+
+// This will either import PETSc or TrilinosWrappers into the namespace
+// LA. Note that we are defining the macro USE_PETSC_LA so that we can detect
+// if we are using PETSc (see solve() for an example where this is necessary)
+namespace LA
+{
+#if defined(DEAL_II_WITH_PETSC) && !(defined(DEAL_II_WITH_TRILINOS) && defined(FORCE_USE_OF_TRILINOS))
+  using namespace dealii::LinearAlgebraPETSc;
+#  define USE_PETSC_LA
+#elif defined(DEAL_II_WITH_TRILINOS)
+  using namespace dealii::LinearAlgebraTrilinos;
+#else
+#  error DEAL_II_WITH_PETSC or DEAL_II_WITH_TRILINOS required
+#endif
+}
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_solver.h>
+#include <deal.II/lac/petsc_precondition.h>
+
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// The following, however, will be new or be used in new roles. Let's walk
+// through them. The first of these will provide the tools of the
+// Utilities::System namespace that we will use to query things like the
+// number of processors associated with the current MPI universe, or the
+// number within this universe the processor this job runs on is:
+#include <deal.II/base/utilities.h>
+// The next one provides a class, ConditionOStream that allows us to write
+// code that would output things to a stream (such as <code>std::cout</code>
+// on every processor but throws the text away on all but one of them. We
+// could achieve the same by simply putting an <code>if</code> statement in
+// front of each place where we may generate output, but this doesn't make the
+// code any prettier. In addition, the condition whether this processor should
+// or should not produce output to the screen is the same every time -- and
+// consequently it should be simple enough to put it into the statements that
+// generate output itself.
+#include <deal.II/base/conditional_ostream.h>
+// After these preliminaries, here is where it becomes more interesting. As
+// mentioned in the @ref distributed module, one of the fundamental truths of
+// solving problems on large numbers of processors is that there is no way for
+// any processor to store everything (e.g. information about all cells in the
+// mesh, all degrees of freedom, or the values of all elements of the solution
+// vector). Rather, every processor will <i>own</i> a few of each of these
+// and, if necessary, may <i>know</i> about a few more, for example the ones
+// that are located on cells adjacent to the ones this processor owns
+// itself. We typically call the latter <i>ghost cells</i>, <i>ghost nodes</i>
+// or <i>ghost elements of a vector</i>. The point of this discussion here is
+// that we need to have a way to indicate which elements a particular
+// processor owns or need to know of. This is the realm of the IndexSet class:
+// if there are a total of $N$ cells, degrees of freedom, or vector elements,
+// associated with (non-negative) integral indices $[0,N)$, then both the set
+// of elements the current processor owns as well as the (possibly larger) set
+// of indices it needs to know about are subsets of the set $[0,N)$. IndexSet
+// is a class that stores subsets of this set in an efficient format:
+#include <deal.II/base/index_set.h>
+// The next header file is necessary for a single function,
+// SparsityTools::distribute_sparsity_pattern. The role of this function will
+// be explained below.
+#include <deal.II/lac/sparsity_tools.h>
+// The final two, new header files provide the class
+// parallel::distributed::Triangulation that provides meshes distributed
+// across a potentially very large number of processors, while the second
+// provides the namespace parallel::distributed::GridRefinement that offers
+// functions that can adaptively refine such distributed meshes:
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/grid_refinement.h>
+
+#include <fstream>
+#include <iostream>
+
+namespace Step40
+{
+  using namespace dealii;
+
+  // @sect3{The <code>LaplaceProblem</code> class template}
+
+  // Next let's declare the main class of this program. Its structure is
+  // almost exactly that of the step-6 tutorial program. The only significant
+  // differences are:
+  // - The <code>mpi_communicator</code> variable that
+  //   describes the set of processors we want this code to run on. In practice,
+  //   this will be MPI_COMM_WORLD, i.e. all processors the batch scheduling
+  //   system has assigned to this particular job.
+  // - The presence of the <code>pcout</code> variable of type ConditionOStream.
+  // - The obvious use of parallel::distributed::Triangulation instead of Triangulation.
+  // - The presence of two IndexSet objects that denote which sets of degrees of
+  //   freedom (and associated elements of solution and right hand side vectors)
+  //   we own on the current processor and which we need (as ghost elements) for
+  //   the algorithms in this program to work.
+  // - The fact that all matrices and vectors are now distributed. We use
+  //   their PETScWrapper versions for this since deal.II's own classes do not
+  //   provide %parallel functionality. Note that as part of this class, we
+  //   store a solution vector that does not only contain the degrees of freedom
+  //   the current processor owns, but also (as ghost elements) all those vector
+  //   elements that correspond to "locally relevant" degrees of freedom
+  //   (i.e. all those that live on locally owned cells or the layer of ghost
+  //   cells that surround it).
+  template <int dim>
+  class LaplaceProblem
+  {
+  public:
+    LaplaceProblem ();
+    ~LaplaceProblem ();
+
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    void solve ();
+    void refine_grid ();
+    void output_results (const unsigned int cycle) const;
+
+    MPI_Comm                                  mpi_communicator;
+
+    parallel::distributed::Triangulation<dim> triangulation;
+
+    DoFHandler<dim>                           dof_handler;
+    FE_Q<dim>                                 fe;
+
+    IndexSet                                  locally_owned_dofs;
+    IndexSet                                  locally_relevant_dofs;
+
+    ConstraintMatrix                          constraints;
+
+    LA::MPI::SparseMatrix                     system_matrix;
+    LA::MPI::Vector                           locally_relevant_solution;
+    LA::MPI::Vector                           system_rhs;
+
+    ConditionalOStream                        pcout;
+    TimerOutput                               computing_timer;
+  };
+
+
+  // @sect3{The <code>LaplaceProblem</code> class implementation}
+
+  // @sect4{Constructors and destructors}
+
+  // Constructors and destructors are rather trivial. In addition to what we
+  // do in step-6, we set the set of processors we want to work on to all
+  // machines available (MPI_COMM_WORLD); ask the triangulation to ensure that
+  // the mesh remains smooth and free to refined islands, for example; and
+  // initialize the <code>pcout</code> variable to only allow processor zero
+  // to output anything. The final piece is to initialize a timer that we
+  // use to determine how much compute time the different parts of the program
+  // take:
+  template <int dim>
+  LaplaceProblem<dim>::LaplaceProblem ()
+    :
+    mpi_communicator (MPI_COMM_WORLD),
+    triangulation (mpi_communicator,
+                   typename Triangulation<dim>::MeshSmoothing
+                   (Triangulation<dim>::smoothing_on_refinement |
+                    Triangulation<dim>::smoothing_on_coarsening)),
+    dof_handler (triangulation),
+    fe (2),
+    pcout (std::cout,
+           (Utilities::MPI::this_mpi_process(mpi_communicator)
+            == 0)),
+    computing_timer (mpi_communicator,
+                     pcout,
+                     TimerOutput::summary,
+                     TimerOutput::wall_times)
+  {}
+
+
+
+  template <int dim>
+  LaplaceProblem<dim>::~LaplaceProblem ()
+  {
+    dof_handler.clear ();
+  }
+
+
+  // @sect4{LaplaceProblem::setup_system}
+
+  // The following function is, arguably, the most interesting one in the
+  // entire program since it goes to the heart of what distinguishes %parallel
+  // step-40 from sequential step-6.
+  //
+  // At the top we do what we always do: tell the DoFHandler object to
+  // distribute degrees of freedom. Since the triangulation we use here is
+  // distributed, the DoFHandler object is smart enough to recognize that on
+  // each processor it can only distribute degrees of freedom on cells it
+  // owns; this is followed by an exchange step in which processors tell each
+  // other about degrees of freedom on ghost cell. The result is a DoFHandler
+  // that knows about the degrees of freedom on locally owned cells and ghost
+  // cells (i.e. cells adjacent to locally owned cells) but nothing about
+  // cells that are further away, consistent with the basic philosophy of
+  // distributed computing that no processor can know everything.
+  template <int dim>
+  void LaplaceProblem<dim>::setup_system ()
+  {
+    TimerOutput::Scope t(computing_timer, "setup");
+
+    dof_handler.distribute_dofs (fe);
+
+    // The next two lines extract some information we will need later on,
+    // namely two index sets that provide information about which degrees of
+    // freedom are owned by the current processor (this information will be
+    // used to initialize solution and right hand side vectors, and the system
+    // matrix, indicating which elements to store on the current processor and
+    // which to expect to be stored somewhere else); and an index set that
+    // indicates which degrees of freedom are locally relevant (i.e. live on
+    // cells that the current processor owns or on the layer of ghost cells
+    // around the locally owned cells; we need all of these degrees of
+    // freedom, for example, to estimate the error on the local cells).
+    locally_owned_dofs = dof_handler.locally_owned_dofs ();
+    DoFTools::extract_locally_relevant_dofs (dof_handler,
+                                             locally_relevant_dofs);
+
+    // Next, let us initialize the solution and right hand side vectors. As
+    // mentioned above, the solution vector we seek does not only store
+    // elements we own, but also ghost entries; on the other hand, the right
+    // hand side vector only needs to have the entries the current processor
+    // owns since all we will ever do is write into it, never read from it on
+    // locally owned cells (of course the linear solvers will read from it,
+    // but they do not care about the geometric location of degrees of
+    // freedom).
+    locally_relevant_solution.reinit (locally_owned_dofs,
+                                      locally_relevant_dofs, mpi_communicator);
+    system_rhs.reinit (locally_owned_dofs, mpi_communicator);
+
+    // The next step is to compute hanging node and boundary value
+    // constraints, which we combine into a single object storing all
+    // constraints.
+    //
+    // As with all other things in %parallel, the mantra must be that no
+    // processor can store all information about the entire universe. As a
+    // consequence, we need to tell the constraints object for which degrees
+    // of freedom it can store constraints and for which it may not expect any
+    // information to store. In our case, as explained in the @ref distributed
+    // module, the degrees of freedom we need to care about on each processor
+    // are the locally relevant ones, so we pass this to the
+    // ConstraintMatrix::reinit function. As a side note, if you forget to
+    // pass this argument, the ConstraintMatrix class will allocate an array
+    // with length equal to the largest DoF index it has seen so far. For
+    // processors with high MPI process number, this may be very large --
+    // maybe on the order of billions. The program would then allocate more
+    // memory than for likely all other operations combined for this single
+    // array.
+    constraints.clear ();
+    constraints.reinit (locally_relevant_dofs);
+    DoFTools::make_hanging_node_constraints (dof_handler, constraints);
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              ZeroFunction<dim>(),
+                                              constraints);
+    constraints.close ();
+
+    // The last part of this function deals with initializing the matrix with
+    // accompanying sparsity pattern. As in previous tutorial programs, we use
+    // the DynamicSparsityPattern as an intermediate with which we
+    // then initialize the PETSc matrix. To do so we have to tell the sparsity
+    // pattern its size but as above there is no way the resulting object will
+    // be able to store even a single pointer for each global degree of
+    // freedom; the best we can hope for is that it stores information about
+    // each locally relevant degree of freedom, i.e. all those that we may
+    // ever touch in the process of assembling the matrix (the @ref
+    // distributed_paper "distributed computing paper" has a long discussion
+    // why one really needs the locally relevant, and not the small set of
+    // locally active degrees of freedom in this context).
+    //
+    // So we tell the sparsity pattern its size and what DoFs to store
+    // anything for and then ask DoFTools::make_sparsity_pattern to fill it
+    // (this function ignores all cells that are not locally owned, mimicking
+    // what we will do below in the assembly process). After this, we call a
+    // function that exchanges entries in these sparsity pattern between
+    // processors so that in the end each processor really knows about all the
+    // entries that will exist in that part of the finite element matrix that
+    // it will own. The final step is to initialize the matrix with the
+    // sparsity pattern.
+    DynamicSparsityPattern dsp (locally_relevant_dofs);
+
+    DoFTools::make_sparsity_pattern (dof_handler, dsp,
+                                     constraints, false);
+    SparsityTools::distribute_sparsity_pattern (dsp,
+                                                dof_handler.n_locally_owned_dofs_per_processor(),
+                                                mpi_communicator,
+                                                locally_relevant_dofs);
+
+    system_matrix.reinit (locally_owned_dofs,
+                          locally_owned_dofs,
+                          dsp,
+                          mpi_communicator);
+  }
+
+
+
+  // @sect4{LaplaceProblem::assemble_system}
+
+  // The function that then assembles the linear system is comparatively
+  // boring, being almost exactly what we've seen before. The points to watch
+  // out for are:
+  // - Assembly must only loop over locally owned cells. There
+  //   are multiple ways to test that; for example, we could compare a cell's
+  //   subdomain_id against information from the triangulation as in
+  //   <code>cell->subdomain_id() ==
+  //   triangulation.locally_owned_subdomain()</code>, or skip all cells for
+  //   which the condition <code>cell->is_ghost() ||
+  //   cell->is_artificial()</code> is true. The simplest way, however, is to
+  //   simply ask the cell whether it is owned by the local processor.
+  // - Copying local contributions into the global matrix must include
+  //   distributing constraints and boundary values. In other words, we can now
+  //   (as we did in step-6) first copy every local contribution into the global
+  //   matrix and only in a later step take care of hanging node constraints and
+  //   boundary values. The reason is, as discussed in step-17, that PETSc does
+  //   not provide access to arbitrary elements of the matrix once they have
+  //   been assembled into it -- in parts because they may simple no longer
+  //   reside on the current processor but have instead been shipped to a
+  //   different machine.
+  // - The way we compute the right hand side (given the
+  //   formula stated in the introduction) may not be the most elegant but will
+  //   do for a program whose focus lies somewhere entirely different.
+  template <int dim>
+  void LaplaceProblem<dim>::assemble_system ()
+  {
+    TimerOutput::Scope t(computing_timer, "assembly");
+
+    const QGauss<dim>  quadrature_formula(3);
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values    |  update_gradients |
+                             update_quadrature_points |
+                             update_JxW_values);
+
+    const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int   n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       cell_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          cell_matrix = 0;
+          cell_rhs = 0;
+
+          fe_values.reinit (cell);
+
+          for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+            {
+              const double
+              rhs_value
+                = (fe_values.quadrature_point(q_point)[1]
+                   >
+                   0.5+0.25*std::sin(4.0 * numbers::PI *
+                                     fe_values.quadrature_point(q_point)[0])
+                   ? 1 : -1);
+
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                {
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    cell_matrix(i,j) += (fe_values.shape_grad(i,q_point) *
+                                         fe_values.shape_grad(j,q_point) *
+                                         fe_values.JxW(q_point));
+
+                  cell_rhs(i) += (rhs_value *
+                                  fe_values.shape_value(i,q_point) *
+                                  fe_values.JxW(q_point));
+                }
+            }
+
+          cell->get_dof_indices (local_dof_indices);
+          constraints.distribute_local_to_global (cell_matrix,
+                                                  cell_rhs,
+                                                  local_dof_indices,
+                                                  system_matrix,
+                                                  system_rhs);
+        }
+
+    // Notice that the assembling above is just a local operation. So, to
+    // form the "global" linear system, a synchronization between all
+    // processors is needed. This could be done by invoking the function
+    // compress(). See @ref GlossCompress  "Compressing distributed objects"
+    // for more information on what is compress() designed to do.
+    system_matrix.compress (VectorOperation::add);
+    system_rhs.compress (VectorOperation::add);
+  }
+
+
+
+  // @sect4{LaplaceProblem::solve}
+
+  // Even though solving linear systems on potentially tens of thousands of
+  // processors is by far not a trivial job, the function that does this is --
+  // at least at the outside -- relatively simple. Most of the parts you've
+  // seen before. There are really only two things worth mentioning:
+  // - Solvers and preconditioners are built on the deal.II wrappers of PETSc
+  //   functionality. It is relatively well known that the primary bottleneck of
+  //   massively %parallel linear solvers is not actually the communication
+  //   between processors, but the fact that it is difficult to produce
+  //   preconditioners that scale well to large numbers of processors. Over the
+  //   second half of the first decade of the 21st century, it has become clear
+  //   that algebraic multigrid (AMG) methods turn out to be extremely efficient
+  //   in this context, and we will use one of them -- the BoomerAMG
+  //   implementation of the Hypre package that can be interfaced to through
+  //   PETSc -- for the current program. The rest of the solver itself is
+  //   boilerplate and has been shown before. Since the linear system is
+  //   symmetric and positive definite, we can use the CG method as the outer
+  //   solver.
+  // - Ultimately, we want a vector that stores not only the elements
+  //   of the solution for degrees of freedom the current processor owns, but
+  //   also all other locally relevant degrees of freedom. On the other hand,
+  //   the solver itself needs a vector that is uniquely split between
+  //   processors, without any overlap. We therefore create a vector at the
+  //   beginning of this function that has these properties, use it to solve the
+  //   linear system, and only assign it to the vector we want at the very
+  //   end. This last step ensures that all ghost elements are also copied as
+  //   necessary.
+  template <int dim>
+  void LaplaceProblem<dim>::solve ()
+  {
+    TimerOutput::Scope t(computing_timer, "solve");
+    LA::MPI::Vector
+    completely_distributed_solution (locally_owned_dofs, mpi_communicator);
+
+    SolverControl solver_control (dof_handler.n_dofs(), 1e-12);
+
+#ifdef USE_PETSC_LA
+    LA::SolverCG solver(solver_control, mpi_communicator);
+#else
+    LA::SolverCG solver(solver_control);
+#endif
+
+    LA::MPI::PreconditionAMG preconditioner;
+
+    LA::MPI::PreconditionAMG::AdditionalData data;
+
+#ifdef USE_PETSC_LA
+    data.symmetric_operator = true;
+#else
+    /* Trilinos defaults are good */
+#endif
+    preconditioner.initialize(system_matrix, data);
+
+    solver.solve (system_matrix, completely_distributed_solution, system_rhs,
+                  preconditioner);
+
+    pcout << "   Solved in " << solver_control.last_step()
+          << " iterations." << std::endl;
+
+    constraints.distribute (completely_distributed_solution);
+
+    locally_relevant_solution = completely_distributed_solution;
+  }
+
+
+
+  // @sect4{LaplaceProblem::refine_grid}
+
+  // The function that estimates the error and refines the grid is again
+  // almost exactly like the one in step-6. The only difference is that the
+  // function that flags cells to be refined is now in namespace
+  // parallel::distributed::GridRefinement -- a namespace that has functions
+  // that can communicate between all involved processors and determine global
+  // thresholds to use in deciding which cells to refine and which to coarsen.
+  //
+  // Note that we didn't have to do anything special about the
+  // KellyErrorEstimator class: we just give it a vector with as many elements
+  // as the local triangulation has cells (locally owned cells, ghost cells,
+  // and artificial ones), but it only fills those entries that correspond to
+  // cells that are locally owned.
+  template <int dim>
+  void LaplaceProblem<dim>::refine_grid ()
+  {
+    TimerOutput::Scope t(computing_timer, "refine");
+
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(3),
+                                        typename FunctionMap<dim>::type(),
+                                        locally_relevant_solution,
+                                        estimated_error_per_cell);
+    parallel::distributed::GridRefinement::
+    refine_and_coarsen_fixed_number (triangulation,
+                                     estimated_error_per_cell,
+                                     0.3, 0.03);
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+
+  // @sect4{LaplaceProblem::output_results}
+
+  // Compared to the corresponding function in step-6, the one here is a tad
+  // more complicated. There are two reasons: the first one is that we do not
+  // just want to output the solution but also for each cell which processor
+  // owns it (i.e. which "subdomain" it is in). Secondly, as discussed at
+  // length in step-17 and step-18, generating graphical data can be a
+  // bottleneck in parallelizing. In step-18, we have moved this step out of
+  // the actual computation but shifted it into a separate program that later
+  // combined the output from various processors into a single file. But this
+  // doesn't scale: if the number of processors is large, this may mean that
+  // the step of combining data on a single processor later becomes the
+  // longest running part of the program, or it may produce a file that's so
+  // large that it can't be visualized any more. We here follow a more
+  // sensible approach, namely creating individual files for each MPI process
+  // and leaving it to the visualization program to make sense of that.
+  //
+  // To start, the top of the function looks like always. In addition to
+  // attaching the solution vector (the one that has entries for all locally
+  // relevant, not only the locally owned, elements), we attach a data vector
+  // that stores, for each cell, the subdomain the cell belongs to. This is
+  // slightly tricky, because of course not every processor knows about every
+  // cell. The vector we attach therefore has an entry for every cell that the
+  // current processor has in its mesh (locally owned ones, ghost cells, and
+  // artificial cells), but the DataOut class will ignore all entries that
+  // correspond to cells that are not owned by the current processor. As a
+  // consequence, it doesn't actually matter what values we write into these
+  // vector entries: we simply fill the entire vector with the number of the
+  // current MPI process (i.e. the subdomain_id of the current process); this
+  // correctly sets the values we care for, i.e. the entries that correspond
+  // to locally owned cells, while providing the wrong value for all other
+  // elements -- but these are then ignored anyway.
+  template <int dim>
+  void LaplaceProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (locally_relevant_solution, "u");
+
+    Vector<float> subdomain (triangulation.n_active_cells());
+    for (unsigned int i=0; i<subdomain.size(); ++i)
+      subdomain(i) = triangulation.locally_owned_subdomain();
+    data_out.add_data_vector (subdomain, "subdomain");
+
+    data_out.build_patches ();
+
+    // The next step is to write this data to disk. We choose file names of
+    // the form <code>solution-XX.PPPP.vtu</code> where <code>XX</code>
+    // indicates the refinement cycle, <code>PPPP</code> refers to the
+    // processor number (enough for up to 10,000 processors, though we hope
+    // that nobody ever tries to generate this much data -- you would likely
+    // overflow all file system quotas), and <code>.vtu</code> indicates the
+    // XML-based Visualization Toolkit (VTK) file format.
+    const std::string filename = ("solution-" +
+                                  Utilities::int_to_string (cycle, 2) +
+                                  "." +
+                                  Utilities::int_to_string
+                                  (triangulation.locally_owned_subdomain(), 4));
+    std::ofstream output ((filename + ".vtu").c_str());
+    data_out.write_vtu (output);
+
+    // The last step is to write a "master record" that lists for the
+    // visualization program the names of the various files that combined
+    // represents the graphical data for the entire domain. The
+    // DataOutBase::write_pvtu_record does this, and it needs a list of
+    // filenames that we create first. Note that only one processor needs to
+    // generate this file; we arbitrarily choose processor zero to take over
+    // this job.
+    if (Utilities::MPI::this_mpi_process(mpi_communicator) == 0)
+      {
+        std::vector<std::string> filenames;
+        for (unsigned int i=0;
+             i<Utilities::MPI::n_mpi_processes(mpi_communicator);
+             ++i)
+          filenames.push_back ("solution-" +
+                               Utilities::int_to_string (cycle, 2) +
+                               "." +
+                               Utilities::int_to_string (i, 4) +
+                               ".vtu");
+
+        std::ofstream master_output (("solution-" +
+                                      Utilities::int_to_string (cycle, 2) +
+                                      ".pvtu").c_str());
+        data_out.write_pvtu_record (master_output, filenames);
+      }
+  }
+
+
+
+  // @sect4{LaplaceProblem::run}
+
+  // The function that controls the overall behavior of the program is again
+  // like the one in step-6. The minor difference are the use of
+  // <code>pcout</code> instead of <code>std::cout</code> for output to the
+  // console (see also step-17) and that we only generate graphical output if
+  // at most 32 processors are involved. Without this limit, it would be just
+  // too easy for people carelessly running this program without reading it
+  // first to bring down the cluster interconnect and fill any file system
+  // available :-)
+  //
+  // A functional difference to step-6 is the use of a square domain and that
+  // we start with a slightly finer mesh (5 global refinement cycles) -- there
+  // just isn't much of a point showing a massively %parallel program starting
+  // on 4 cells (although admittedly the point is only slightly stronger
+  // starting on 1024).
+  template <int dim>
+  void LaplaceProblem<dim>::run ()
+  {
+    const unsigned int n_cycles = 8;
+    for (unsigned int cycle=0; cycle<n_cycles; ++cycle)
+      {
+        pcout << "Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_cube (triangulation);
+            triangulation.refine_global (5);
+          }
+        else
+          refine_grid ();
+
+        setup_system ();
+
+        pcout << "   Number of active cells:       "
+              << triangulation.n_global_active_cells()
+              << std::endl
+              << "   Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl;
+
+        assemble_system ();
+        solve ();
+
+        if (Utilities::MPI::n_mpi_processes(mpi_communicator) <= 32)
+          {
+            TimerOutput::Scope t(computing_timer, "output");
+            output_results (cycle);
+          }
+
+        computing_timer.print_summary ();
+        computing_timer.reset ();
+
+        pcout << std::endl;
+      }
+  }
+}
+
+
+
+// @sect4{main()}
+
+// The final function, <code>main()</code>, again has the same structure as in
+// all other programs, in particular step-6. Like in the other programs that
+// use PETSc, we have to initialize and finalize PETSc, which is done using the
+// helper object MPI_InitFinalize.
+//
+// Note how we enclose the use the use of the LaplaceProblem class in a pair
+// of braces. This makes sure that all member variables of the object are
+// destroyed by the time we destroy the mpi_initialization object. Not doing
+// this will lead to strange and hard to debug errors when
+// <code>PetscFinalize</code> first deletes all PETSc vectors that are still
+// around, and the destructor of the LaplaceProblem class then tries to delete
+// them again.
+int main(int argc, char *argv[])
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step40;
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
+
+      LaplaceProblem<2> laplace_problem_2d;
+      laplace_problem_2d.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-41/CMakeLists.txt b/examples/step-41/CMakeLists.txt
new file mode 100644
index 0000000..e8ccded
--- /dev/null
+++ b/examples/step-41/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-41 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-41")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_TRILINOS)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_TRILINOS = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-41/doc/builds-on b/examples/step-41/doc/builds-on
new file mode 100644
index 0000000..78300ce
--- /dev/null
+++ b/examples/step-41/doc/builds-on
@@ -0,0 +1 @@
+step-15
diff --git a/examples/step-41/doc/intro.dox b/examples/step-41/doc/intro.dox
new file mode 100644
index 0000000..62e8876
--- /dev/null
+++ b/examples/step-41/doc/intro.dox
@@ -0,0 +1,418 @@
+<br>
+
+<i>This program was contributed by Jörg Frohne (University of Siegen,
+Germany) while on a long-term visit to Texas A&M University.
+<br>
+This material is based upon work partly supported by ThyssenKrupp Steel Europe.
+</i>
+
+
+<a name="Intro"></a>
+<h3>Introduction</h3>
+
+This example is based on the Laplace equation in 2d and deals with the
+question what happens if a membrane is deflected by some external force but is
+also constrained by an obstacle. In other words, think of a elastic membrane
+clamped at the boundary to a rectangular frame (we choose $\Omega =
+\left[-1,1\right]^2$) and that sags through due to gravity acting on it. What
+happens now if there is an obstacle under the membrane that prevents it from
+reaching its equilibrium position if gravity was the only existing force? In
+the current example program, we will consider that under the membrane is a
+stair step obstacle against which gravity pushes the membrane.
+
+This problem is typically called the "obstacle problem" (see also <a
+href="http://en.wikipedia.org/wiki/Obstacle_problem">this Wikipedia article</a>), and it results in a
+variational inequality, rather than a variational equation when put into the
+weak form. We will below derive it from the classical formulation, but before we
+go on to discuss the mathematics let us show how the solution of the problem we
+will consider in this tutorial program looks to gain some intuition of what
+we should expect:
+
+<table align="center" class="tutorial" cellspacing="3" cellpadding="3">
+  <tr>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.png" alt="">
+    </td>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.png" alt="">
+    </td>
+  </tr>
+</table>
+
+Here, at the left, we see the displacement of the membrane. The shape
+of the obstacle underneath is clearly visible. On the right, we overlay which
+parts of the membrane are in contact with the obstacle. We will later call
+this set of points the "active set" to indicate that an inequality constraint
+is active there.
+
+
+<h3>Classical formulation</h3>
+
+The classical formulation of the problem possesses the following form:
+ at f{align*}
+ -\textrm{div}\ \sigma &\geq f & &\quad\text{in } \Omega,\\
+ \sigma &= \nabla u & &\quad\text{in } \Omega,\\
+ u(\mathbf x) &= 0 & &\quad\text{on }\partial\Omega,\\
+(-\Delta u - f)(u - g) &= 0 & &\quad\text{in } \Omega,\\
+ u(\mathbf x) &\geq g(\mathbf x) & &\quad\text{in } \Omega
+ at f}
+with $u\in H^2(\Omega)$.  $u$ is a scalar valued function that denotes the
+vertical displacement of the membrane. The first equation is called equilibrium
+condition with a force of areal density $f$. Here, we will consider this force
+to be gravity. The second one is known as Hooke's Law that says that the stresses
+$\sigma$ are proportional to the gradient of the displacements $u$ (the
+proportionality constant, often denoted by $E$, has been set to one here,
+without loss of generality; if it is constant, it can be put into the right
+hand side function). At the boundary we have zero Dirichlet
+conditions. Obviously, the first two equations can be combined to yield
+$-\Delta u \ge f$.
+
+Intuitively, gravity acts downward and so $f(\mathbf x)$ is a negative
+function (we choose $f=-10$ in this program). The first condition then means
+that the total force acting on the membrane is gravity plus something
+positive: namely the upward force that the obstacle exerts on the membrane at
+those places where the two of them are in contact. How big is this additional
+force? We don't know yet (and neither do we know "where" it actually acts) but
+it must be so that the membrane doesn't penetrate the obstacle.
+
+The fourth equality above together with the last inequality forms the obstacle
+condition which has to hold at every point of the whole domain. The latter of
+these two means that the membrane must be above the obstacle $g(\mathbf x)$
+everywhere. The second to last equation, often called the "complementarity
+condition" says that where the membrane is not in contact with the obstacle
+(i.e., those $\mathbf x$ where $u(\mathbf x) - g(\mathbf x) \neq 0$), then
+$-\Delta u=f$ at these locations; in other words, no additional forces act
+there, as expected. On the other hand, where $u=g$ we can have $-\Delta u-f
+\neq 0$, i.e., there can be additional forces (though there don't have to be:
+it is possible for the membrane to just touch, not press against, the
+obstacle).
+
+
+<h3>Derivation of the variational inequality</h3>
+
+An obvious way to obtain the variational formulation of the obstacle problem is to consider the total potential energy:
+ at f{equation*}
+ E(u):=\dfrac{1}{2}\int\limits_{\Omega} \nabla u \cdot \nabla u - \int\limits_{\Omega} fu.
+ at f}
+We have to find a solution $u\in G$ of the following minimization problem:
+ at f{equation*}
+ E(u)\leq E(v)\quad \forall v\in G,
+ at f}
+with the convex set of admissible displacements:
+ at f{equation*}
+ G:=\lbrace v\in V: v\geq g \text{ a.e. in } \Omega\rbrace,\quad V:=H^1_0(\Omega).
+ at f}
+This set takes care of the third and fifth conditions above (the boundary
+values and the complementarity condition).
+
+Consider now the minimizer $u\in G$ of $E$ and any other function $v\in
+G$. Then the function
+ at f{equation*}
+ F(\varepsilon) := E(u+\varepsilon(v-u)),\quad\varepsilon\in\left[0,1\right],
+ at f}
+takes its minimum at $\varepsilon = 0$ (because $u$ is a minimizer of the
+energy functional $E(\cdot)$), so that $F'(0)\geq 0$ for any choice
+of $v$. Note that
+$u+\varepsilon(v-u) = (1-\varepsilon)u+\varepsilon v\in G$ because of the
+convexity of $G$. If we compute $F'(\varepsilon)\vert_{\varepsilon=0}$ it
+yields the variational formulation we are searching for:
+
+<i>Find a function $u\in G$ with</i>
+ at f{equation*}
+ \left(\nabla u, \nabla(v-u)\right) \geq \left(f,v-u\right) \quad \forall v\in G.
+ at f}
+
+This is the typical form of variational inequalities, where not just $v$
+appears in the bilinear form but in fact $v-u$. The reason is this: if $u$ is
+not constrained, then we can find test functions $v$ in $G$ so that $v-u$ can have
+any sign. By choosing test functions $v_1,v_2$ so that $v_1-u = -(v_2-u)$ it
+follows that the inequality can only hold for both $v_1$ and $v_2$ if the two
+sides are in fact equal, i.e., we obtain a variational equality.
+
+On the other hand, if $u=g$ then $G$ only allows test functions $v$ so that in fact
+$v-u\ge 0$. This means that we can't test the equation with both $v-u$ and
+$-(v-u)$ as above, and so we can no longer conclude that the two sides are in
+fact equal. Thus, this mimics the way we have discussed the complementarity
+condition above.
+
+
+
+<h3>Formulation as a saddle point problem</h3>
+
+The variational inequality above is awkward to work with. We would therefore
+like to reformulate it as an equivalent saddle point problem. We introduce a
+Lagrange multiplier $\lambda$ and the convex cone $K\subset V'$, $V'$
+dual space of $V$, $K:=\{\mu\in V': \langle\mu,v\rangle\geq 0,\quad \forall
+v\in V, v \le 0 \}$ of
+Lagrange multipliers, where $\langle\cdot,\cdot\rangle$ denotes the duality
+pairing between $V'$ and $V$. Intuitively, $K$ is the cone of all "non-positive
+functions", except that $K\subset (H_0^1)'$ and so contains other objects
+besides regular functions as well.
+This yields:
+
+<i>Find $u\in V$ and $\lambda\in K$ such that</i>
+ at f{align*}
+ a(u,v) + b(v,\lambda) &= f(v),\quad &&v\in V\\
+ b(u,\mu - \lambda) &\leq \langle g,\mu - \lambda\rangle,\quad&&\mu\in K,
+ at f}
+<i>with</i>
+ at f{align*}
+ a(u,v) &:= \left(\nabla u, \nabla v\right),\quad &&u,v\in V\\
+ b(u,\mu) &:= \langle u,\mu\rangle,\quad &&u\in V,\quad\mu\in V'.
+ at f}
+In other words, we can consider $\lambda$ as the negative of the additional, positive force that the
+obstacle exerts on the membrane. The inequality in the second line of the
+statement above only appears to have the wrong sign because we have
+$\mu-\lambda<0$ at points where $\lambda=0$, given the definition of $K$.
+
+The existence and uniqueness of $(u,\lambda)\in V\times K$ of this saddle
+point problem has been stated in Glowinski, Lions and Trémolières: Numerical Analysis of Variational
+Inequalities, North-Holland, 1981.
+
+
+
+<h3>Active Set methods to solve the saddle point problem</h3>
+
+There are different methods to solve the variational inequality. As one
+possibility you can understand the saddle point problem as a convex quadratic program (QP) with
+inequality constraints.
+
+To get there, let us assume that we discretize both $u$ and $\lambda$ with the
+same finite element space, for example the usual $Q_k$ spaces. We would then
+get the equations
+ at f{eqnarray*}
+ &A U + B\Lambda = F,&\\
+ &[BU-G]_i \geq 0, \quad \Lambda_i \leq 0,\quad \Lambda_i[BU-G]_i = 0
+\qquad \forall i.&
+ at f}
+where $B$ is the mass matrix on the chosen finite element space and the
+indices $i$ above are for all degrees of freedom in the set $\cal S$ of degrees of
+freedom located in the interior of the domain
+(we have Dirichlet conditions on the perimeter). However, we
+can make our life simpler if we use a particular quadrature rule when
+assembling all terms that yield this mass matrix, namely a quadrature formula
+where quadrature points are only located at the interpolation points at
+which shape functions are defined; since all but one shape function are zero
+at these locations, we get a diagonal mass matrix with
+ at f{align*}
+  B_{ii} = \int_\Omega \varphi_i(\mathbf x)^2\ \textrm{d}x,
+  \qquad
+  B_{ij}=0 \ \text{for } i\neq j.
+ at f}
+To define $G$ we use the same technique as for $B$. In other words, we
+define
+ at f{align*}
+  G_{i} = \int_\Omega g_h(x) \varphi_i(\mathbf x)\ \textrm{d}x,
+ at f}
+where $g_h$ is a suitable approximation of $g$. The integral in the definition
+of $B_{ii}$ and $G_i$ are then approximated by the trapezoidal rule.
+With this, the equations above can be restated as
+ at f{eqnarray*}
+ &A U + B\Lambda = F,&\\
+ &U_i-B_{ii}^{-1}G_i \ge 0, \quad \Lambda_i \leq 0,\quad \Lambda_i[U_i-B_{ii}^{-1}G_i] = 0
+\qquad \forall i\in{\cal S}.&
+ at f}
+
+Now we define for each degree of freedom $i$ the function
+ at f{equation*}
+ C([BU]_i,\Lambda_i):=-\Lambda_i + \min\lbrace 0, \Lambda_i + c([BU]_i - G_i) \rbrace,
+ at f}
+with some $c>0$. (In this program we choose $c = 100$. It is a kind of a
+penalty parameter which depends on the problem itself and needs to be chosen
+large enough; for example there is no convergence for $c = 1$ using the
+current program if we use 7 global refinements.)
+
+After some head-scratching one can then convince oneself that the inequalities
+above can equivalently be rewritten as
+ at f{equation*}
+ C([BU]_i,\Lambda_i) = 0, \qquad \forall i\in{\cal S}.
+ at f}
+The primal-dual active set strategy we will use here is an iterative scheme which is based on
+this condition to predict the next active and inactive sets $\mathcal{A}_k$ and
+$\mathcal{F}_k$ (that is, those complementary sets of indices $i$ for which
+$U_i$ is either equal to or not equal to the value of the obstacle
+$B^{-1}G$). For a more in depth treatment of this approach, see Hintermueller, Ito, Kunisch: The primal-dual active set
+strategy as a semismooth newton method, SIAM J. OPTIM., 2003, Vol. 13, No. 3,
+pp. 865-888.
+
+<h3>The primal-dual active set algorithm</h3>
+
+The algorithm for the primal-dual active set method works as follows (NOTE: $B = B^T$):
+
+- [(0)] Initialize $\mathcal{A}_k$ and $\mathcal{F}_k$, such that
+ $\mathcal{S}=\mathcal{A}_k\cup\mathcal{F}_k$ and
+ $\mathcal{A}_k\cap\mathcal{F}_k=\emptyset$ and set $k=1$.
+- [(1)] Find the primal-dual pair $(U^k,\Lambda^k)$ that satisfies
+ @f{align*}
+  AU^k + B\Lambda^k &= F,\\
+  [BU^k]_i &= G_i\quad&&\forall i\in\mathcal{A}_k,\\
+  \Lambda_i^k &= 0\quad&&\forall i\in\mathcal{F}_k.
+ @f}
+ Note that the second and third conditions imply that exactly $|S|$ unknowns
+ are fixed, with the first condition yielding the remaining $|S|$ equations
+ necessary to determine both $U$ and $\Lambda$.
+- [(2)] Define the new active and inactive sets by
+ @f{equation*}
+ \begin{split}
+  \mathcal{A}_{k+1}:=\lbrace i\in\mathcal{S}:\Lambda^k_i + c([BU^k]_i - G_i)< 0\rbrace,\\
+  \mathcal{F}_{k+1}:=\lbrace i\in\mathcal{S}:\Lambda^k_i + c([BU^k]_i - G_i)\geq 0\rbrace.
+ \end{split}
+ @f}
+- [(3)] If $\mathcal{A}_{k+1}=\mathcal{A}_k$ (and then, obviously, also
+ $\mathcal{F}_{k+1}=\mathcal{F}_k$) then stop, else set $k=k+1$ and go to step
+ (1).
+
+The method is called "primal-dual" because it uses both primal (the
+displacement $U$) as well as dual variables (the Lagrange multiplier
+$\Lambda$) to determine the next active set.
+
+At the end of this section, let us add two observations. First,
+for any primal-dual pair $(U^k,\Lambda^k)$ that satisfies these
+condition, we can distinguish the following cases:
+
+- [1.] $\Lambda^k_i + c([BU^k]_i - G_i) < 0$ (i active):
+  <br>
+  Then either $[BU^k]_i<G_i$ and $\Lambda^k_i=0$ (penetration) or $\Lambda^k_i<0$ and $[BU^k]_i=G_i$ (pressing load).
+- [2.] $\Lambda^k_i + c([BU^k]_i - G_i)\geq 0$ (i inactive):
+  <br>
+  Then either $[BU^k]_i\geq G_i$ and $\Lambda^k_i=0$ (no contact) or $\Lambda^k_i\geq0$ and $[BU^k]_i=G_i$ (unpressing load).
+
+Second, the method above appears intuitively correct and useful but a bit ad
+hoc. However, it can be derived in a concisely in the following way. To this
+end, note that we'd like to solve the nonlinear system
+ at f{eqnarray*}
+ &A U + B\Lambda = F,&\\
+ &C([BU-G]_i, \Lambda_i) = 0,
+\qquad \forall i.&
+ at f}
+We can iteratively solve this by always linearizing around the previous
+iterate (i.e., applying a Newton method), but for this we need to linearize
+the function $C(\cdot,\cdot)$ that is not differentiable. That said, it is
+slantly differentiable, and in fact we have
+ at f{equation*}
+ \dfrac{\partial}{\partial U^k_i}C([BU^k]_i,\Lambda^k_i) = \begin{cases}
+                                   cB_{ii},& \text{if}\ \Lambda^k_i + c([BU^k]_i - G_i)< 0\\
+                                   0,& \text{if}\ \Lambda^k_i + c([BU^k]_i - G_i)\geq 0.
+                                  \end{cases}
+ at f}
+ at f{equation*}
+ \dfrac{\partial}{\partial\Lambda^k_i}C([BU^k]_i,\Lambda^k_i) = \begin{cases}
+                                   0,& \text{if}\ \Lambda^k_i + c([BU^k]_i - G_i)< 0\\
+                                   -1,& \text{if}\ \Lambda^k_i + c([BU^k]_i - G_i)\geq 0.
+                                  \end{cases}
+ at f}
+This suggest a semismooth Newton step of the form
+ at f{equation*}
+ \begin{pmatrix}
+ A_{\mathcal{F}_k\mathcal{F}_k} & A_{\mathcal{F}_k\mathcal{A}_k} & B_{\mathcal{F}_k} & 0\\
+ A_{\mathcal{A}_k\mathcal{F}_k} & A_{\mathcal{A}_k\mathcal{A}_k} & 0 & B_{\mathcal{A}_k}\\
+ 0 & 0 & -Id_{\mathcal{F}_k} & 0\\
+ 0 & cB_{\mathcal{A}_k} & 0 & 0
+\end{pmatrix}
+\begin{pmatrix}
+ \delta U^k_{\mathcal{F}_k}\\ \delta U^k_{\mathcal{A}_k}\\ \delta \Lambda^k_{\mathcal{F}_k}\\ \delta \Lambda^k_{\mathcal{A}_k}
+\end{pmatrix}
+=
+-\begin{pmatrix}
+ (AU^k + \Lambda^k - F)_{\mathcal{F}_k}\\ (AU^k + \Lambda^k - F)_{\mathcal{A}_k}\\ -\Lambda^k_{\mathcal{F}_k}\\ c(B_{\mathcal{A}_k} U^k - G)_{\mathcal{A}_k}
+\end{pmatrix},
+ at f}
+where we have split matrices $A,B$ as well as vectors in the natural way into
+rows and columns whose indices belong to either the active set
+${\mathcal{A}_k}$ or the inactive set ${\mathcal{F}_k}$.
+
+Rather than solving for updates $\delta U, \delta \Lambda$, we can also solve
+for the variables we are interested in right away by setting $\delta U^k :=
+U^{k+1} - U^k$ and $\delta \Lambda^k := \Lambda^{k+1} - \Lambda^k$ and
+bringing all known terms to the right hand side. This yields
+ at f{equation*}
+\begin{pmatrix}
+ A_{\mathcal{F}_k\mathcal{F}_k} & A_{\mathcal{F}_k\mathcal{A}_k} & B_{\mathcal{F}_k} & 0\\
+ A_{\mathcal{A}_k\mathcal{F}_k} & A_{\mathcal{A}_k\mathcal{A}_k} & 0 & B_{\mathcal{A}_k}\\
+ 0 & 0 & Id_{\mathcal{F}_k} & 0\\
+ 0 & B_{\mathcal{A}_k} & 0 & 0
+\end{pmatrix}
+\begin{pmatrix}
+ U^k_{\mathcal{F}_k}\\ U^k_{\mathcal{A}_k}\\ \Lambda^k_{\mathcal{F}_k}\\ \Lambda^k_{\mathcal{A}_k}
+\end{pmatrix}
+=
+\begin{pmatrix}
+ F_{\mathcal{F}_k}\\ F_{\mathcal{A}_k}\\ 0\\ G_{\mathcal{A}_k}
+\end{pmatrix}.
+ at f}
+These are the equations outlined above in the description of the basic algorithm.
+
+We could even drive this a bit further.
+It's easy to see that we can eliminate the third row and the third column
+because it implies $\Lambda_{\mathcal{F}_k} = 0$:
+ at f{equation*}
+\begin{pmatrix}
+ A_{\mathcal{F}_k\mathcal{F}_k} & A_{\mathcal{F}_k\mathcal{A}_k} & 0\\
+ A_{\mathcal{A}_k\mathcal{F}_k} & A_{\mathcal{A}_k\mathcal{A}_k} & B_{\mathcal{A}_k}\\
+ 0 & B_{\mathcal{A}_k} & 0
+\end{pmatrix}
+\begin{pmatrix}
+ U^k_{\mathcal{F}_k}\\ U^k_{\mathcal{A}_k}\\ \Lambda^k_{\mathcal{A}_k}
+\end{pmatrix}
+=
+\begin{pmatrix}
+ F_{\mathcal{F}_k}\\ F_{\mathcal{A}_k}\\ G_{\mathcal{A}_k}
+\end{pmatrix}.
+ at f}
+This shows that one in fact only needs to solve for the Lagrange multipliers
+located on the active set. By considering the second row one would then recover
+the full Lagrange multiplier vector through
+ at f{equation*}
+ \Lambda^k_S = B^{-1}\left(f_{\mathcal{S}} - A_{\mathcal{S}}U^k_{\mathcal{S}}\right).
+ at f}
+Because of the third row and the fact that $B_{\mathcal{A}_k}$ is a diagonal matrix we are able
+to calculate $U^k_{\mathcal{A}_k}=B^{-1}_{\mathcal{A}_k}G_{\mathcal{A}_k}$ directly. We can therefore also write the
+linear system as follows:
+ at f{equation*}
+\begin{pmatrix}
+ A_{\mathcal{F}_k\mathcal{F}_k} & 0\\
+ 0 & Id_{\mathcal{A}_k} \\
+\end{pmatrix}
+\begin{pmatrix}
+ U^k_{\mathcal{F}_k}\\ U^k_{\mathcal{A}_k}
+\end{pmatrix}
+=
+\begin{pmatrix}
+ F_{\mathcal{F}_k} - A_{\mathcal{F}_k\mathcal{A}_k}B^{-1}_{\mathcal{A}_k}G_{\mathcal{A}_k}
+ \\
+ B_{\mathcal{A}_k}^{-1}G_{\mathcal{A}_k}
+\end{pmatrix}.
+ at f}
+Fortunately, this form is easy to arrive at: we simply build the usual Laplace
+linear system
+ at f{equation*}
+\begin{pmatrix}
+ A_{\mathcal{F}_k\mathcal{F}_k} & A_{\mathcal{F}_k\mathcal{A}_k} \\
+ A_{\mathcal{A}_k\mathcal{F}_k} & A_{\mathcal{A}_k\mathcal{A}_k}
+\end{pmatrix}
+\begin{pmatrix}
+ U^k_{\mathcal{F}_k}\\ U^k_{\mathcal{A}_k}
+\end{pmatrix}
+=
+\begin{pmatrix}
+ F_{\mathcal{F}_k}\\ F_{\mathcal{A}_k}
+\end{pmatrix},
+ at f}
+and then let the ConstraintMatrix class eliminate all constrained degrees of
+freedom, namely $U^k_{\mathcal{A}_k}=B^{-1}_{\mathcal{A}_k}G_{\mathcal{A}_k}$,
+in the same way as if the dofs in $\mathcal{A}_k$ were Dirichlet data. The
+result linear system (the second to last one above) is symmetric and positive
+definite and we solve it with a CG-method
+and the AMG preconditioner from Trilinos.
+
+
+<h3>Implementation</h3>
+
+This tutorial is quite similar to step-4. The general structure of the program
+follows step-4 with minor differences:
+- We need two new methods, <code>assemble_mass_matrix_diagonal</code> and
+  <code>update_solution_and_constraints</code>.
+- We need new member variables that denote the constraints we have here.
+- We change the preconditioner for the solver.
+
+
diff --git a/examples/step-41/doc/kind b/examples/step-41/doc/kind
new file mode 100644
index 0000000..56e049c
--- /dev/null
+++ b/examples/step-41/doc/kind
@@ -0,0 +1 @@
+solids
diff --git a/examples/step-41/doc/results.dox b/examples/step-41/doc/results.dox
new file mode 100644
index 0000000..7810a56
--- /dev/null
+++ b/examples/step-41/doc/results.dox
@@ -0,0 +1,255 @@
+<h1>Results</h1>
+
+Running the program produces output like this:
+ at code
+Number of active cells: 16384
+Total number of cells: 21845
+Number of degrees of freedom: 16641
+
+Newton iteration 0
+   Assembling system...
+   Solving system...
+      Error: 0.310059 -> 5.16619e-05 in 5 CG iterations.
+   Updating active set...
+      Size of active set: 13164
+   Residual of the non-contact part of the system: 1.61863e-05
+   Writing graphical output...
+
+Newton iteration 1
+   Assembling system...
+   Solving system...
+      Error: 1.11987 -> 0.00109377 in 6 CG iterations.
+   Updating active set...
+      Size of active set: 12363
+   Residual of the non-contact part of the system: 3.9373
+   Writing graphical output...
+
+...
+
+Newton iteration 17
+   Assembling system...
+   Solving system...
+      Error: 0.00713308 -> 2.29249e-06 in 4 CG iterations.
+   Updating active set...
+      Size of active set: 5399
+   Residual of the non-contact part of the system: 0.000957525
+   Writing graphical output...
+
+Newton iteration 18
+   Assembling system...
+   Solving system...
+      Error: 0.000957525 -> 2.8033e-07 in 4 CG iterations.
+   Updating active set...
+      Size of active set: 5399
+   Residual of the non-contact part of the system: 2.8033e-07
+   Writing graphical output...
+ at endcode
+
+The iterations end once the active set doesn't change any more (it has
+5,399 constrained degrees of freedom at that point). The algebraic
+precondition is apparently working nicely since we only need 4-6 CG
+iterations to solve the linear system (although this also has a lot to
+do with the fact that we are not asking for very high accuracy of the
+linear solver).
+
+More revealing is to look at a sequence of graphical output files
+(every third step is shown, with the number of the iteration in the
+leftmost column):
+
+<table align="center">
+  <tr>
+    <td valign="top">
+      0  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.00.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.00.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.3d.00.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      3  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.03.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.03.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.3d.03.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      6  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.06.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.06.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.3d.06.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      9  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.09.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.09.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.3d.09.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      12  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.12.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.12.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.3d.12.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      15  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.15.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.15.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.3d.15.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td valign="top">
+      18  
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.18.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.active-set.18.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.displacement.3d.18.png" alt="">
+    </td>
+  </tr>
+</table>
+
+The pictures show that in the first step, the solution (which has been
+computed without any of the constraints active) bends through so much
+that pretty much every interior point has to be bounced back to the
+stairstep function, producing a discontinuous solution. Over the
+course of the active set iterations, this unphysical membrane shape is
+smoothed out, the contact with the lower-most stair step disappears,
+and the solution stabilizes.
+
+In addition to this, the program also outputs the values of the
+Lagrange multipliers. Remember that these are the contact forces and
+so should only be positive on the contact set, and zero outside. If,
+on the other hand, a Lagrange multiplier is negative in the active
+set, then this degree of freedom must be removed from the active
+set. The following pictures show the multipliers in iterations 1, 9
+and 18, where we use red and browns to indicate positive values, and
+blue for negative values.
+
+<table align="center">
+  <tr>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.forces.01.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.forces.09.png" alt="">
+    </td>
+    <td valign="top">
+      <img src="http://www.dealii.org/images/steps/developer/step-41.forces.18.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td align="center">
+      Iteration 1
+    </td>
+    <td align="center">
+      Iteration 9
+    </td>
+    <td align="center">
+      Iteration 18
+    </td>
+  </tr>
+</table>
+
+It is easy to see that the positive values converge nicely to moderate
+values in the interior of the contact set and large upward forces at
+the edges of the steps, as one would expect (to support the large
+curvature of the membrane there); at the fringes of the active set,
+multipliers are initially negative, causing the set to shrink until,
+in iteration 18, there are no more negative multipliers and the
+algorithm has converged.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+As with any of the programs of this tutorial, there are a number of
+obvious possibilities for extensions and experiments. The first one is
+clear: introduce adaptivity. Contact problems are prime candidates for
+adaptive meshes because the solution has lines along which it is less
+regular (the places where contact is established between membrane and
+obstacle) and other areas where the solution is very smooth (or, in
+the present context, constant wherever it is in contact with the
+obstacle). Adding this to the current program should not pose too many
+difficulties, but it is not trivial to find a good error estimator for
+that purpose.
+
+A more challenging task would be an extension to 3d. The problem here
+is not so much to simply make everything run in 3d. Rather, it is that
+when a 3d body is deformed and gets into contact with an obstacle,
+then the obstacle does not act as a constraining body force within the
+domain as is the case here. Rather, the contact force only acts on the
+boundary of the object. The inequality then is not in the differential
+equation but in fact in the (Neumann-type) boundary conditions, though
+this leads to a similar kind of variational
+inequality. Mathematically, this means that the Lagrange multiplier
+only lives on the surface, though it can of course be extended by zero
+into the domain if that is convenient. As in the current program, one
+does not need to form and store this Lagrange multiplier explicitly.
+
+A further interesting problem for the 3d case is to consider contact problems
+with friction. In almost every mechanical process friction has a big influence.
+For the modelling we have to take into account tangential stresses at the contact
+surface. Also we have to observe that friction adds another nonlinearity to
+our problem.
+
+Another nontrivial modification is to implement a more complex constitutive
+law like nonlinear elasticity or elasto-plastic  material behavior.
+The difficulty here is to handle the additional nonlinearity arising
+through the nonlinear constitutive law.
diff --git a/examples/step-41/doc/tooltip b/examples/step-41/doc/tooltip
new file mode 100644
index 0000000..0f580b9
--- /dev/null
+++ b/examples/step-41/doc/tooltip
@@ -0,0 +1 @@
+Solving the obstacle problem (a variational inequality)
diff --git a/examples/step-41/step-41.cc b/examples/step-41/step-41.cc
new file mode 100644
index 0000000..68a889c
--- /dev/null
+++ b/examples/step-41/step-41.cc
@@ -0,0 +1,706 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2011 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Joerg Frohne, Texas A&M University and
+ *                        University of Siegen, 2011, 2012
+ *          Wolfgang Bangerth, Texas A&M University, 2012
+ */
+
+
+// @sect3{Include files}
+
+// As usual, at the beginning we include all the header files we need in
+// here. With the exception of the various files that provide interfaces to
+// the Trilinos library, there are no surprises:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/index_set.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_precondition.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+#include <iostream>
+#include <list>
+
+
+namespace Step41
+{
+  using namespace dealii;
+
+  // @sect3{The <code>ObstacleProblem</code> class template}
+
+  // This class supplies all function and variables needed to describe the
+  // obstacle problem. It is close to what we had to do in step-4, and so
+  // relatively simple. The only real new components are the
+  // update_solution_and_constraints function that computes the active set and
+  // a number of variables that are necessary to describe the original
+  // (unconstrained) form of the linear system
+  // (<code>complete_system_matrix</code> and
+  // <code>complete_system_rhs</code>) as well as the active set itself and
+  // the diagonal of the mass matrix $B$ used in scaling Lagrange multipliers
+  // in the active set formulation. The rest is as in step-4:
+  template <int dim>
+  class ObstacleProblem
+  {
+  public:
+    ObstacleProblem ();
+    void run ();
+
+  private:
+    void make_grid ();
+    void setup_system();
+    void assemble_system ();
+    void assemble_mass_matrix_diagonal (TrilinosWrappers::SparseMatrix &mass_matrix);
+    void update_solution_and_constraints ();
+    void solve ();
+    void output_results (const unsigned int iteration) const;
+
+    Triangulation<dim>   triangulation;
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+    ConstraintMatrix     constraints;
+    IndexSet             active_set;
+
+    TrilinosWrappers::SparseMatrix system_matrix;
+    TrilinosWrappers::SparseMatrix complete_system_matrix;
+
+    TrilinosWrappers::MPI::Vector  solution;
+    TrilinosWrappers::MPI::Vector  system_rhs;
+    TrilinosWrappers::MPI::Vector  complete_system_rhs;
+    TrilinosWrappers::MPI::Vector  diagonal_of_mass_matrix;
+    TrilinosWrappers::MPI::Vector  contact_force;
+  };
+
+
+  // @sect3{Right hand side, boundary values, and the obstacle}
+
+  // In the following, we define classes that describe the right hand side
+  // function, the Dirichlet boundary values, and the height of the obstacle
+  // as a function of $\mathbf x$. In all three cases, we derive these classes
+  // from Function@<dim@>, although in the case of <code>RightHandSide</code>
+  // and <code>Obstacle</code> this is more out of convention than necessity
+  // since we never pass such objects to the library. In any case, the
+  // definition of the right hand side and boundary values classes is obvious
+  // given our choice of $f=-10$, $u|_{\partial\Omega}=0$:
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+  template <int dim>
+  double RightHandSide<dim>::value (const Point<dim> &,
+                                    const unsigned int component) const
+  {
+    Assert (component == 0, ExcNotImplemented());
+
+    return -10;
+  }
+
+
+
+  template <int dim>
+  class BoundaryValues : public Function<dim>
+  {
+  public:
+    BoundaryValues () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+  template <int dim>
+  double BoundaryValues<dim>::value (const Point<dim> &,
+                                     const unsigned int component) const
+  {
+    Assert (component == 0, ExcNotImplemented());
+
+    return 0;
+  }
+
+
+
+  // We describe the obstacle function by a cascaded barrier (think: stair
+  // steps):
+  template <int dim>
+  class Obstacle : public Function<dim>
+  {
+  public:
+    Obstacle () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+  template <int dim>
+  double Obstacle<dim>::value (const Point<dim> &p,
+                               const unsigned int component) const
+  {
+    Assert (component == 0, ExcNotImplemented());
+
+    if (p (0) < -0.5)
+      return -0.2;
+    else if (p (0) >= -0.5 && p (0) < 0.0)
+      return -0.4;
+    else if (p (0) >= 0.0 && p (0) < 0.5)
+      return -0.6;
+    else
+      return -0.8;
+  }
+
+
+
+  // @sect3{Implementation of the <code>ObstacleProblem</code> class}
+
+
+  // @sect4{ObstacleProblem::ObstacleProblem}
+
+  // To everyone who has taken a look at the first few tutorial programs, the
+  // constructor is completely obvious:
+  template <int dim>
+  ObstacleProblem<dim>::ObstacleProblem ()
+    :
+    fe (1),
+    dof_handler (triangulation)
+  {}
+
+
+  // @sect4{ObstacleProblem::make_grid}
+
+  // We solve our obstacle problem on the square $[-1,1]\times [-1,1]$ in
+  // 2D. This function therefore just sets up one of the simplest possible
+  // meshes.
+  template <int dim>
+  void ObstacleProblem<dim>::make_grid ()
+  {
+    GridGenerator::hyper_cube (triangulation, -1, 1);
+    triangulation.refine_global (7);
+
+    std::cout << "Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl
+              << "Total number of cells: "
+              << triangulation.n_cells()
+              << std::endl;
+  }
+
+
+  // @sect4{ObstacleProblem::setup_system}
+
+  // In this first function of note, we set up the degrees of freedom handler,
+  // resize vectors and matrices, and deal with the constraints. Initially,
+  // the constraints are, of course, only given by boundary values, so we
+  // interpolate them towards the top of the function.
+  template <int dim>
+  void ObstacleProblem<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (fe);
+    active_set.set_size (dof_handler.n_dofs());
+
+    std::cout << "Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl
+              << std::endl;
+
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              BoundaryValues<dim>(),
+                                              constraints);
+    constraints.close ();
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler,
+                                     dsp,
+                                     constraints,
+                                     false);
+
+    system_matrix.reinit (dsp);
+    complete_system_matrix.reinit (dsp);
+
+    IndexSet solution_index_set = dof_handler.locally_owned_dofs();
+    solution.reinit (solution_index_set, MPI_COMM_WORLD);
+    system_rhs.reinit (solution_index_set, MPI_COMM_WORLD);
+    complete_system_rhs.reinit (solution_index_set, MPI_COMM_WORLD);
+    contact_force.reinit (solution_index_set, MPI_COMM_WORLD);
+
+    // The only other thing to do here is to compute the factors in the $B$
+    // matrix which is used to scale the residual. As discussed in the
+    // introduction, we'll use a little trick to make this mass matrix
+    // diagonal, and in the following then first compute all of this as a
+    // matrix and then extract the diagonal elements for later use:
+    TrilinosWrappers::SparseMatrix mass_matrix;
+    mass_matrix.reinit (dsp);
+    assemble_mass_matrix_diagonal (mass_matrix);
+    diagonal_of_mass_matrix.reinit (solution_index_set);
+    for (unsigned int j=0; j<solution.size (); j++)
+      diagonal_of_mass_matrix (j) = mass_matrix.diag_element (j);
+  }
+
+
+  // @sect4{ObstacleProblem::assemble_system}
+
+  // This function at once assembles the system matrix and right-hand-side and
+  // applied the constraints (both due to the active set as well as from
+  // boundary values) to our system. Otherwise, it is functionally equivalent
+  // to the corresponding function in, for example, step-4.
+  template <int dim>
+  void ObstacleProblem<dim>::assemble_system ()
+  {
+    std::cout << "   Assembling system..." << std::endl;
+
+    system_matrix = 0;
+    system_rhs    = 0;
+
+    const QGauss<dim>         quadrature_formula(fe.degree+1);
+    const RightHandSide<dim>  right_hand_side;
+
+    FEValues<dim>             fe_values (fe, quadrature_formula,
+                                         update_values   | update_gradients |
+                                         update_quadrature_points |
+                                         update_JxW_values);
+
+    const unsigned int        dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int        n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>        cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>            cell_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        cell_matrix = 0;
+        cell_rhs = 0;
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                cell_matrix(i,j) += (fe_values.shape_grad (i, q_point) *
+                                     fe_values.shape_grad (j, q_point) *
+                                     fe_values.JxW (q_point));
+
+              cell_rhs(i) += (fe_values.shape_value (i, q_point) *
+                              right_hand_side.value (fe_values.quadrature_point (q_point)) *
+                              fe_values.JxW (q_point));
+            }
+
+        cell->get_dof_indices (local_dof_indices);
+
+        constraints.distribute_local_to_global (cell_matrix,
+                                                cell_rhs,
+                                                local_dof_indices,
+                                                system_matrix,
+                                                system_rhs,
+                                                true);
+      }
+  }
+
+
+
+  // @sect4{ObstacleProblem::assemble_mass_matrix_diagonal}
+
+  // The next function is used in the computation of the diagonal mass matrix
+  // $B$ used to scale variables in the active set method. As discussed in the
+  // introduction, we get the mass matrix to be diagonal by choosing the
+  // trapezoidal rule for quadrature. Doing so we don't really need the triple
+  // loop over quadrature points, indices $i$ and indices $j$ any more and
+  // can, instead, just use a double loop. The rest of the function is obvious
+  // given what we have discussed in many of the previous tutorial programs.
+  //
+  // Note that at the time this function is called, the constraints object
+  // only contains boundary value constraints; we therefore do not have to pay
+  // attention in the last copy-local-to-global step to preserve the values of
+  // matrix entries that may later on be constrained by the active set.
+  //
+  // Note also that the trick with the trapezoidal rule only works if we have
+  // in fact $Q_1$ elements. For higher order elements, one would need to use
+  // a quadrature formula that has quadrature points at all the support points
+  // of the finite element. Constructing such a quadrature formula isn't
+  // really difficult, but not the point here, and so we simply assert at the
+  // top of the function that our implicit assumption about the finite element
+  // is in fact satisfied.
+  template <int dim>
+  void
+  ObstacleProblem<dim>::
+  assemble_mass_matrix_diagonal (TrilinosWrappers::SparseMatrix &mass_matrix)
+  {
+    Assert (fe.degree == 1, ExcNotImplemented());
+
+    const QTrapez<dim>        quadrature_formula;
+    FEValues<dim>             fe_values (fe,
+                                         quadrature_formula,
+                                         update_values   |
+                                         update_JxW_values);
+
+    const unsigned int        dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int        n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>        cell_matrix (dofs_per_cell, dofs_per_cell);
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        fe_values.reinit (cell);
+        cell_matrix = 0;
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            cell_matrix(i,i) += (fe_values.shape_value (i, q_point) *
+                                 fe_values.shape_value (i, q_point) *
+                                 fe_values.JxW (q_point));
+
+        cell->get_dof_indices (local_dof_indices);
+
+        constraints.distribute_local_to_global (cell_matrix,
+                                                local_dof_indices,
+                                                mass_matrix);
+      }
+  }
+
+
+  // @sect4{ObstacleProblem::update_solution_and_constraints}
+
+  // In a sense, this is the central function of this program.  It updates the
+  // active set of constrained degrees of freedom as discussed in the
+  // introduction and computes a ConstraintMatrix object from it that can then
+  // be used to eliminate constrained degrees of freedom from the solution of
+  // the next iteration. At the same time we set the constrained degrees of
+  // freedom of the solution to the correct value, namely the height of the
+  // obstacle.
+  //
+  // Fundamentally, the function is rather simple: We have to loop over all
+  // degrees of freedom and check the sign of the function $\Lambda^k_i +
+  // c([BU^k]_i - G_i) = \Lambda^k_i + cB_i(U^k_i - [g_h]_i)$ because in our
+  // case $G_i = B_i[g_h]_i$. To this end, we use the formula given in the
+  // introduction by which we can compute the Lagrange multiplier as the
+  // residual of the original linear system (given via the variables
+  // <code>complete_system_matrix</code> and <code>complete_system_rhs</code>.
+  // At the top of this function, we compute this residual using a function
+  // that is part of the matrix classes.
+  template <int dim>
+  void
+  ObstacleProblem<dim>::update_solution_and_constraints ()
+  {
+    std::cout << "   Updating active set..." << std::endl;
+
+    const double penalty_parameter = 100.0;
+
+    TrilinosWrappers::MPI::Vector lambda (complete_index_set(dof_handler.n_dofs()));
+    complete_system_matrix.residual (lambda,
+                                     solution, complete_system_rhs);
+    contact_force.ratio (lambda, diagonal_of_mass_matrix);
+    contact_force *= -1;
+
+    // The next step is to reset the active set and constraints objects and to
+    // start the loop over all degrees of freedom. This is made slightly more
+    // complicated by the fact that we can't just loop over all elements of
+    // the solution vector since there is no way for us then to find out what
+    // location a DoF is associated with; however, we need this location to
+    // test whether the displacement of a DoF is larger or smaller than the
+    // height of the obstacle at this location.
+    //
+    // We work around this by looping over all cells and DoFs defined on each
+    // of these cells. We use here that the displacement is described using a
+    // $Q_1$ function for which degrees of freedom are always located on the
+    // vertices of the cell; thus, we can get the index of each degree of
+    // freedom and its location by asking the vertex for this information. On
+    // the other hand, this clearly wouldn't work for higher order elements,
+    // and so we add an assertion that makes sure that we only deal with
+    // elements for which all degrees of freedom are located in vertices to
+    // avoid tripping ourselves with non-functional code in case someone wants
+    // to play with increasing the polynomial degree of the solution.
+    //
+    // The price to pay for having to loop over cells rather than DoFs is that
+    // we may encounter some degrees of freedom more than once, namely each
+    // time we visit one of the cells adjacent to a given vertex. We will
+    // therefore have to keep track which vertices we have already touched and
+    // which we haven't so far. We do so by using an array of flags
+    // <code>dof_touched</code>:
+    constraints.clear();
+    active_set.clear ();
+
+    const Obstacle<dim> obstacle;
+    std::vector<bool>   dof_touched (dof_handler.n_dofs(), false);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        {
+          Assert (dof_handler.get_fe().dofs_per_cell ==
+                  GeometryInfo<dim>::vertices_per_cell,
+                  ExcNotImplemented());
+
+          const unsigned int dof_index = cell->vertex_dof_index (v,0);
+
+          if (dof_touched[dof_index] == false)
+            dof_touched[dof_index] = true;
+          else
+            continue;
+
+          // Now that we know that we haven't touched this DoF yet, let's get
+          // the value of the displacement function there as well as the value
+          // of the obstacle function and use this to decide whether the
+          // current DoF belongs to the active set. For that we use the
+          // function given above and in the introduction.
+          //
+          // If we decide that the DoF should be part of the active set, we
+          // add its index to the active set, introduce an inhomogeneous
+          // equality constraint in the ConstraintMatrix object, and reset the
+          // solution value to the height of the obstacle. Finally, the
+          // residual of the non-contact part of the system serves as an
+          // additional control (the residual equals the remaining,
+          // unaccounted forces, and should be zero outside the contact zone),
+          // so we zero out the components of the residual vector (i.e., the
+          // Lagrange multiplier lambda) that correspond to the area where the
+          // body is in contact; at the end of the loop over all cells, the
+          // residual will therefore only consist of the residual in the
+          // non-contact zone. We output the norm of this residual along with
+          // the size of the active set after the loop.
+          const double obstacle_value = obstacle.value (cell->vertex(v));
+          const double solution_value = solution (dof_index);
+
+          if (lambda (dof_index) +
+              penalty_parameter *
+              diagonal_of_mass_matrix(dof_index) *
+              (solution_value - obstacle_value)
+              <
+              0)
+            {
+              active_set.add_index (dof_index);
+              constraints.add_line (dof_index);
+              constraints.set_inhomogeneity (dof_index, obstacle_value);
+
+              solution (dof_index) = obstacle_value;
+
+              lambda (dof_index) = 0;
+            }
+        }
+    std::cout << "      Size of active set: " << active_set.n_elements()
+              << std::endl;
+
+    std::cout << "   Residual of the non-contact part of the system: "
+              << lambda.l2_norm()
+              << std::endl;
+
+    // In a final step, we add to the set of constraints on DoFs we have so
+    // far from the active set those that result from Dirichlet boundary
+    // values, and close the constraints object:
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              BoundaryValues<dim>(),
+                                              constraints);
+    constraints.close ();
+  }
+
+  // @sect4{ObstacleProblem::solve}
+
+  // There is nothing to say really about the solve function. In the context
+  // of a Newton method, we are not typically interested in very high accuracy
+  // (why ask for a highly accurate solution of a linear problem that we know
+  // only gives us an approximation of the solution of the nonlinear problem),
+  // and so we use the ReductionControl class that stops iterations when
+  // either an absolute tolerance is reached (for which we choose $10^{-12}$)
+  // or when the residual is reduced by a certain factor (here, $10^{-3}$).
+  template <int dim>
+  void ObstacleProblem<dim>::solve ()
+  {
+    std::cout << "   Solving system..." << std::endl;
+
+    ReductionControl                    reduction_control (100, 1e-12, 1e-3);
+    SolverCG<TrilinosWrappers::MPI::Vector>  solver (reduction_control);
+    TrilinosWrappers::PreconditionAMG   precondition;
+    precondition.initialize (system_matrix);
+
+    solver.solve (system_matrix, solution, system_rhs, precondition);
+    constraints.distribute (solution);
+
+    std::cout << "      Error: " << reduction_control.initial_value()
+              << " -> " << reduction_control.last_value()
+              << " in "
+              <<  reduction_control.last_step()
+              << " CG iterations."
+              << std::endl;
+  }
+
+
+  // @sect4{ObstacleProblem::output_results}
+
+  // We use the vtk-format for the output.  The file contains the displacement
+  // and a numerical representation of the active set. The function looks
+  // standard but note that we can add an IndexSet object to the DataOut
+  // object in exactly the same way as a regular solution vector: it is simply
+  // interpreted as a function that is either zero (when a degree of freedom
+  // is not part of the IndexSet) or one (if it is).
+  template <int dim>
+  void ObstacleProblem<dim>::output_results (const unsigned int iteration) const
+  {
+    std::cout << "   Writing graphical output..." << std::endl;
+
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "displacement");
+    data_out.add_data_vector (active_set, "active_set");
+    data_out.add_data_vector (contact_force, "lambda");
+
+    data_out.build_patches ();
+
+    std::ofstream output_vtk ((std::string("output_") +
+                               Utilities::int_to_string (iteration, 3) +
+                               ".vtk").c_str ());
+    data_out.write_vtk (output_vtk);
+  }
+
+
+
+  // @sect4{ObstacleProblem::run}
+
+  // This is the function which has the top-level control over everything.  It
+  // is not very long, and in fact rather straightforward: in every iteration
+  // of the active set method, we assemble the linear system, solve it, update
+  // the active set and project the solution back to the feasible set, and
+  // then output the results. The iteration is terminated whenever the active
+  // set has not changed in the previous iteration.
+  //
+  // The only trickier part is that we have to save the linear system (i.e.,
+  // the matrix and right hand side) after assembling it in the first
+  // iteration. The reason is that this is the only step where we can access
+  // the linear system as built without any of the contact constraints
+  // active. We need this to compute the residual of the solution at other
+  // iterations, but in other iterations that linear system we form has the
+  // rows and columns that correspond to constrained degrees of freedom
+  // eliminated, and so we can no longer access the full residual of the
+  // original equation.
+  template <int dim>
+  void ObstacleProblem<dim>::run ()
+  {
+    make_grid();
+    setup_system ();
+
+    IndexSet active_set_old (active_set);
+    for (unsigned int iteration=0; iteration<=solution.size (); ++iteration)
+      {
+        std::cout << "Newton iteration " << iteration << std::endl;
+
+        assemble_system ();
+
+        if (iteration == 0)
+          {
+            complete_system_matrix.copy_from (system_matrix);
+            complete_system_rhs = system_rhs;
+          }
+
+        solve ();
+        update_solution_and_constraints ();
+        output_results (iteration);
+
+        if (active_set == active_set_old)
+          break;
+
+        active_set_old = active_set;
+
+        std::cout << std::endl;
+      }
+  }
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// And this is the main function. It follows the pattern of all other main
+// functions. The call to initialize MPI exists because the Trilinos library
+// upon which we build our linear solvers in this program requires it.
+int main (int argc, char *argv[])
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step41;
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv,
+                                                           numbers::invalid_unsigned_int);
+
+      // This program can only be run in serial. Otherwise, throw an exception.
+      AssertThrow(Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD)==1,
+                  ExcMessage("This program can only be run in serial, use ./step-41"));
+
+      ObstacleProblem<2> obstacle_problem;
+      obstacle_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-42/CMakeLists.txt b/examples/step-42/CMakeLists.txt
new file mode 100644
index 0000000..6c4a8dd
--- /dev/null
+++ b/examples/step-42/CMakeLists.txt
@@ -0,0 +1,58 @@
+##
+#  CMake script for the step-42 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-42")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Define the output that should be cleaned:
+SET(CLEAN_UP_FILES *.vtu *.pvtu *.visit)
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF( NOT DEAL_II_WITH_MPI OR
+    NOT DEAL_II_WITH_P4EST OR
+    NOT DEAL_II_WITH_TRILINOS )
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_MPI = ON
+    DEAL_II_WITH_P4EST = ON
+    DEAL_II_WITH_TRILINOS = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-42/doc/builds-on b/examples/step-42/doc/builds-on
new file mode 100644
index 0000000..8defe7d
--- /dev/null
+++ b/examples/step-42/doc/builds-on
@@ -0,0 +1 @@
+step-41 step-40
diff --git a/examples/step-42/doc/intro.dox b/examples/step-42/doc/intro.dox
new file mode 100644
index 0000000..93573cb
--- /dev/null
+++ b/examples/step-42/doc/intro.dox
@@ -0,0 +1,425 @@
+<br>
+
+<i>This program was contributed by Jörg Frohne (University of Siegen,
+Germany) while on a long-term visit to Texas A&M University, with significant
+contributions by Timo Heister and Wolfgang Bangerth.
+<br>
+<br>
+The code described here provides the basis for the numerical experiments shown
+in the following paper:
+<br>
+  J. Frohne, T. Heister, W. Bangerth: <b>Efficient numerical methods for the large-scale, parallel
+                  solution of elastoplastic contact problems</b>.
+  Accepted for publication in International Journal for Numerical Methods in Engineering, 2015.
+</i>
+
+
+
+<a name="Intro"></a>
+<h3>Introduction</h3>
+
+This example is an extension of step-41, considering a 3d contact problem with an
+elasto-plastic material behavior with isotropic hardening in three dimensions.
+In other words, it considers how a three-dimensional body deforms if one pushes
+into it a rigid obstacle (the contact problem) where deformation is governed
+by an elasto-plastic material law (a material that can only accommodate a certain
+maximal stress) that hardens as deformation accumulates. To show what we intend to
+do before going into too many details, let us just show a picture of what the
+solution will look like (the deformable body is a cube - only half of
+which is actually shown -, the obstacle corresponds
+to a Chinese character that is discussed below):
+
+<img src="http://www.dealii.org/images/steps/developer/step-42.CellConstitutionLi2.png" alt="">
+
+
+This problem description implies that we have to take care of an additional
+nonlinearity compared to step-41: the
+material behavior. Since we consider a three dimensional problem here, we also
+have to account for the fact that the contact area is at the boundary of
+the deformable body now, rather than in the interior. Finally, compared to
+step-41, we also have to deal with hanging nodes in both the handling of the linear
+system as well as of the inequality constraints as we would like to use an
+adaptive mesh; in the latter case, we will
+have to deal with prioritizing whether the constraints from the hanging nodes
+or from the inequalities are more important.
+
+Since you can very easily reach a few million degrees of freedom in three
+dimensions, even with adaptive mesh refinement, we decided to use Trilinos and
+p4est to run our code in parallel, building on the framework of step-40 for
+the parallelization. Additional pointers for parallelization can be found in
+step-32.
+
+
+<h3>Classical formulation</h3>
+
+The classical formulation of the problem possesses the following form:
+ at f{align*}
+ \varepsilon(\mathbf u) &= A\sigma + \varepsilon^p & &\quad\text{in } \Omega,\\
+  -\textrm{div}\ \sigma &= \mathbf f & &\quad\text{in } \Omega,\\
+  \varepsilon^p:(\tau - \sigma) &\geq 0\quad\forall\tau\text{ with
+  }\mathcal{F}(\tau)\leq 0 & &\quad\text{in } \Omega,\\
+  \mathbf u &= 0 & &\quad\text{on }\Gamma_D,\\
+  \sigma \mathbf n - [\mathbf n \cdot(\sigma \mathbf n)]\mathbf n &= 0,
+  \quad \mathbf n \cdot (\sigma
+  \mathbf n) \leq 0 & &\quad\text{on }\Gamma_C,\\
+  (\mathbf n \cdot (\sigma
+  \mathbf n))(\mathbf n \cdot \mathbf u - g) &= 0,\quad \mathbf n
+  \cdot \mathbf u - g \leq 0 & &\quad\text{on } \Gamma_C.
+ at f}
+Here, the first of these equations defines the
+relationship between strain $\varepsilon(\mathbf u)=\frac{1}{2}\left(\nabla \mathbf u
+  + \nabla \mathbf u^T\right)$ and stress $\sigma$ via
+the fourth-order compliance tensor $A$; $\varepsilon^p$ provides the plastic
+component of the strain to ensure that the stress does not exceed the yield
+stress. We will only consider isotropic
+materials for which $A$ can be expressed in terms of the Lam\'e moduli
+$\lambda$ and $\mu$ or alternatively in terms of the bulk modulus
+$\kappa$ and $\mu$.
+The second equation is the force balance; we will here
+not consider any body forces and henceforth assume that $\mathbf f=0$. The
+complementarity condition in the third line implies that $\varepsilon^p=0$ if
+$\mathcal{F}(\sigma)< 0$ but that $\varepsilon^p$ may be a nonzero tensor if and
+only if $\mathcal{F}(\sigma) = 0$, and in particular that in this case
+$\varepsilon^p$ must point in the direction $\partial
+\mathcal{F}(\sigma)/\partial \sigma$. The inequality $\mathcal{F}(\sigma)\le 0$ is
+a statement of the fact that plastic materials can only support a finite amount
+of stress; in other words, they react with plastic deformations $\varepsilon^p$
+if external forces would result in a stress $\sigma$ for which $\mathcal{F}(\sigma)> 0$
+would result. A typical form for this <i>yield function</i> is
+$\mathcal{F}(\sigma)=|\sigma^D|-\sigma_{\text{yield}}$ where $\tau^D
+= \tau - \dfrac{1}{3}tr(\tau)I$ is the deviatoric part of a tensor
+and $|\cdot|$ denotes the Frobenius norm.
+
+Further equations describe a
+fixed, zero displacement on $\Gamma_D$ and
+that on the surface $\Gamma_C=\partial\Omega\backslash\Gamma_D$ where contact may appear, the normal
+force $\sigma_n=\mathbf n \cdot (\sigma(\mathbf u)
+  \mathbf n)$ exerted by the obstacle is inward (no "pull" by the obstacle on our
+body) and with zero tangential component $\mathbf \sigma_t= \sigma \mathbf n - \mathbf \sigma_n \mathbf n
+= \sigma \mathbf n - [\mathbf n \cdot(\sigma \mathbf n)]\mathbf n$.
+The last condition is again a complementarity condition that
+implies that on $\Gamma_C$, the normal
+force can only be nonzero if the body is in contact with the obstacle; the
+second part describes the impenetrability of the obstacle and the body.
+The last two equations are commonly referred to as the Signorini contact
+conditions.
+
+Most materials - especially metals - have the property that they show some hardening as a result of
+deformation. In other words, $\sigma_{\text{yield}}$ increases with deformation.
+In practice, it is not the elastic deformation that results in hardening,
+but the plastic component.
+There are different constitutive laws to describe those material behaviors. The
+simplest one is called linear isotropic hardening described by the flow function
+$\mathcal{F}(\sigma,\varepsilon^p) = \vert\sigma^D\vert - (\sigma_0 +
+\gamma^{\text{iso}}|\varepsilon^p|)$.
+
+
+<h3>Reformulation as a variational inequality</h3>
+
+It is generally rather awkward to deal with inequalities. Here, we have to deal with
+two: plasticity and the contact problem.
+As described in more detail in the paper mentioned at the top of this page, one
+can at least reformulate the plasticity in a way that makes it look like a
+nonlinearity that we can then treat with Newton's method. This is slightly
+tricky mathematically since the nonlinearity is not just some smooth
+function but instead has kinks where the stress reaches the yield stress;
+however, it can be shown for such <i>semismooth</i> functions that Newton's
+method still converges.
+
+Without going into details, we will also get rid of the stress as an independent
+variable and instead work exclusively with the displacements $\mathbf u$. Ultimately,
+the goal of this reformulation is that we will want to end up with a symmetric,
+positive definite problem - such as a linearized elasticity problem with spatially
+variable coefficients resulting from the plastic behavior - that needs to be solved
+in each Newton step. We want this because there are efficient and scalable methods
+for the solution of such linear systems, such as CG preconditioned with an
+algebraic multigrid. This is opposed to the saddle point problem akin to the mixed
+Laplace (see step-20) we would get were we to continue with the mixed formulation
+containing both displacements and stresses, and for which step-20 already gives a
+hint at how difficult it is to construct good solvers and preconditioners.
+
+With this said, let us simply state the problem we obtain after reformulation
+(again, details can be found in the paper): Find a displacement $\mathbf u \in
+V^+$ so that
+ at f{align*}
+\left(P_{\Pi}(C\varepsilon(\mathbf u)),\varepsilon(\varphi) - \varepsilon(\mathbf u)\right) \geq 0,\quad \forall \varphi\in V^+.
+ at f}
+where the projector $P_\Pi$ is defined as
+ at f{align*}
+ P_{\Pi}(\tau):=\begin{cases}
+    \tau, & \text{if }\vert\tau^D\vert \leq \sigma_0,\\
+    \left[
+      \dfrac{\gamma^{\text{iso}}}{2\mu + \gamma^{\text{iso}}} +
+      \left(1-\dfrac{\gamma^{\text{iso}}}{2\mu + \gamma^{\text{iso}}}\right)\dfrac{\sigma_0}{\vert\tau^D\vert}
+    \right]\tau^D
+    + \dfrac{1}{3}\text{trace}(\tau) I, & \text{if }\vert\tau^D\vert >
+    \sigma_0,
+  \end{cases}
+ at f}
+and the space $V^+$ is the space of all displacements that satisfy the contact
+condition:
+ at f{align*}
+  V
+  &=
+  \left\{ \mathbf u\in \left[H^1(\Omega)\right]^{d}:
+    \mathbf u = 0 \text{ on } \Gamma_D\right\},
+  \\
+  V^+
+  &=
+  \left\{ \mathbf u\in V: \mathbf n \cdot \mathbf u\leq g \text{ on } \Gamma_C \right\}.
+ at f}
+
+In the actual code, we will use the abbreviation $\gamma=\dfrac{\gamma^{\text{iso}}}{2\mu + \gamma^{\text{iso}}}$.
+
+Given this formulation, we will apply two techniques:
+- Run a Newton method to iterate out the nonlinearity in the projector.
+- Run an active set method for the contact condition, in much the same
+  way as we did in step-41.
+
+A strict approach would keep the active set fixed while we iterate
+the Newton method to convergence (or maybe the other way around: find the
+final active set before moving on to the next Newton iteration).
+In practice, it turns out that it is sufficient to do only a single
+Newton step per active set iteration, and so we will iterate over them
+concurrently. We will also, every once in a while, refine the mesh.
+
+
+<h3>A Newton method for the plastic nonlinearity</h3>
+
+As mentioned, we will treat the nonlinearity of the operator $P_\Pi$ by
+applying a Newton method, despite the fact that the operator is not differentiable
+in the strict sense. However, it satisfies the conditions of <i>slant</i>
+differentiability and this turns out to be enough for Newton's method to work.
+The resulting method then goes by the name <i>semi-smooth Newton method</i>,
+which sounds impressive but is, in reality, just a Newton method applied to
+a semi-smooth function with an appropriately chosen "derivative".
+
+In the current case, we will run our iteration by solving in each iteration $i$
+the following equation (still an inequality, but linearized):
+ at f{align*}
+  \label{eq:linearization}
+  \left(I_{\Pi}\varepsilon(\tilde {\mathbf u}^{i}),
+    \varepsilon(\varphi) - \varepsilon(\tilde {\mathbf u}^{i})\right) \geq
+  \left(\left(I_{\Pi}\varepsilon({\mathbf u}^{i-1}),
+    \varepsilon(\varphi) - \varepsilon(\tilde {\mathbf u}^{i})\right) -
+  \left(P_{\Pi}(C\varepsilon({\mathbf u}^{i-1})),
+    \varepsilon(\varphi) - \varepsilon(\tilde {\mathbf u}^{i})\right)\right),
+  \quad \forall \varphi\in V^+,
+ at f}
+where the rank-4 tensor $I_\Pi=I_\Pi(\varepsilon^D(\mathbf u^{i-1}))$ given by
+ at f{align}
+  I_\Pi = \begin{cases}
+    C_{\mu} + C_{\kappa}, & \hspace{-8em} \text{if } \vert C\varepsilon^D(\mathbf u^{i-1}) \vert \leq \sigma_0,
+    \\
+    \frac{\gamma^{\text{iso}}}{2\mu + \gamma^{\text{iso}}} C_{\mu} + \frac{\left(1-\frac{\gamma^{\text{iso}}}{2\mu + \gamma^{\text{iso}}}\right)\sigma_0}{\vert C\varepsilon^D(\mathbf u^{i-1}) \vert}\left(C_{\mu} -
+      2\mu\dfrac{C\varepsilon^D(\mathbf u^{i-1})\otimes C\varepsilon^D(\mathbf
+        u^{i-1})}{\vert C\varepsilon^D(\mathbf u^{i-1})\vert^2}\right) + C_{\kappa}, & \text{ else.}
+\end{cases}
+ at f}
+This tensor is the (formal) linearization of $P_\Pi(C\cdot)$ around $\varepsilon^D(\mathbf u^{i-1})$.
+For the linear isotropic material we consider here,
+the bulk and shear components of the projector are given by
+ at f{gather*}
+  C_{\kappa} = \kappa I\otimes I,
+  \qquad\qquad\qquad\qquad
+  C_{\mu} = 2\mu\left(\mathbb{I}  - \dfrac{1}{3} I\otimes
+    I\right),
+ at f}
+where $I$
+and $\mathbb{I}$ are the identity tensors of rank 2 and 4, respectively.
+
+Note that this problem corresponds to a linear elastic contact problem
+where $I_\Pi$ plays the role of the elasticity tensor $C=A^{-1}$. Indeed,
+if the material is not plastic at a point, then $I_\Pi=C$. However, at
+places where the material is plastic, $I_\Pi$ is a spatially varying
+function. In any case, the system we have to solve for the Newton iterate
+$\tilde {\mathbf u}^{i}$ gets us closer to the goal of rewriting our problem in
+a way that allows us to use well-known solvers and preconditioners for
+elliptic systems.
+
+As a final note about the Newton method let us mention that as is common with
+Newton methods we need to globalize it by controlling the step length. In
+other words, while the system above solves for $\tilde {\mathbf u}^{i}$, the final
+iterate will rather be
+ at f{align*}
+  {\mathbf u}^{i} = {\mathbf u}^{i-1} + \alpha_i (\tilde {\mathbf u}^{i} - {\mathbf u}^{i-1})
+ at f}
+where the difference in parentheses on the right takes the role of the
+traditional Newton direction, $\delta {\mathbf u}^{i}$. We will determine
+$\alpha^i$ using a standard line search.
+
+
+<h3>Active Set methods to solve the saddle point problem</h3>
+
+This linearized problem to be solved in each Newton step is essentially like
+in step-41. The only difference consists in the fact that the contact area
+is at the boundary instead of in the domain. But this has no further consequence
+so that we refer to the documentation of step-41 with the only hint that
+$\mathcal{S}$ contains all the vertices at the contact boundary $\Gamma_C$ this
+time. As there, what we need to do is keep a subset of degrees of freedom fixed,
+leading to additional constraints that one can write as a saddle point problem.
+However, as discussed in the paper, by writing these constraints in an
+appropriate way that removes the coupling between degrees of freedom,
+we end up with a set of nodes that essentially just have Dirichlet values
+attached to them.
+
+
+<h3>Overall algorithm</h3>
+
+The algorithm outlined above combines the damped semismooth Newton-method,
+which we use for the nonlinear constitutive law, with the semismooth Newton
+method for the contact. It works as follows:
+<ol>
+ <li> Initialize the active and inactive sets $\mathcal{A}_i$ and $\mathcal{F}_i$
+ such that $\mathcal{S} = \mathcal{A}_i \cup \mathcal{F}_i$ and $\mathcal{A}_i \cap
+ \mathcal{F}_i = \emptyset$ and set $i = 1$. Here, $\mathcal{S}$ is the set of
+ all degrees of freedom located at the surface of the domain where contact
+ may happen.
+ The start value $\hat U^0 :=
+ P_{\mathcal{A}_k}(0)$ fulfills our obstacle condition, i.e., we project an
+ initial zero displacement onto the set of feasible displacements.
+
+ <li> Assemble the Newton matrix $A_{pq} := a'(
+ U^{i-1};\varphi_p,\varphi_q)$ and the right-hand-side $F(\hat U^{i-1})$.
+ These correspond to the linearized Newton step, ignoring for the moment
+ the contact inequality.
+
+ <li> Find the primal-dual pair $(\tilde U^i,\Lambda^i)$ that satisfies
+ @f{align*}
+ A\tilde U^i + B\Lambda^i & = F, &\\
+ \left[B^T\tilde U^i\right]_p & = G_p & \forall p\in\mathcal{A}_i,\\
+ \Lambda^i_p & = 0 & \forall p\in\mathcal{F}_i.
+ @f}
+ As in step-41, we can obtain the solution to this problem by eliminating
+ those degrees of freedom in ${\cal A}_i$ from the first equation and
+ obtain a linear system $\hat {\hat A}(U^{i-1}) \tilde U^i = \hat {\hat H}(U^{i-1})$.
+
+
+
+ <li> Damp the Newton iteration for $i>2$ by applying a line search and
+ calculating a linear combination of $U^{i-1}$ and $\tilde U^i$. This
+ requires finding an
+ $\alpha^i_l:=2^{-l},(l=0,\ldots,10)$ so that
+ @f{gather*}U^i := \alpha^i_l\bar U^i +
+ (1-\alpha^i_l)U^{i-1}@f}
+ satisfies
+ @f{gather*}
+   \vert {\hat R}\left({\mathbf u}^{i}\right) \vert < \vert {\hat R}\left({\mathbf u}^{i-1}\right) \vert.
+ \f}
+ with ${\hat R}\left({\mathbf u}\right)=\left(P_{Pi}(C\varepsilon(u)),\varepsilon(\varphi^{i}_p\right)$ with
+ the exceptions of (i) elements $p\in\mathcal{A}_i$ where we set ${\hat R}\left({\mathbf u}\right)=0$,
+ and (ii) elements that correspond to hanging nodes, which we eliminate in the usual manner.
+
+ <li> Define the new active and inactive sets by
+ @f{gather*}\mathcal{A}_{i+1}:=\lbrace p\in\mathcal{S}:\Lambda^i_p +
+ c\left(\left[B^TU^i\right]_p - G_p\right) > 0\rbrace, at f}
+ @f{gather*}\mathcal{F}_{i+1}:=\lbrace p\in\mathcal{S}:\Lambda^i_p +
+ c\left(\left[B^TU^i\right]_p - G_p\right) \leq 0\rbrace. at f}
+
+ <li>Project $U^i$ so that it satisfies the contact inequality,
+ @f{gather*}\hat U^i := P_{\mathcal{A}_{i+1}}(U^i). at f}
+ Here,
+ $P_{\mathcal{A}}(U)$ is the projection of the active
+ components in $\mathcal{A}$ to the gap
+ @f{gather*}P_{\mathcal{A}}(U)_p:=\begin{cases}
+ U_p, & \textrm{if}\quad p\notin\mathcal{A}\\
+ g_{h,p}, & \textrm{if}\quad
+ p\in\mathcal{A},
+ \end{cases}@f}
+ where $g_{h,p}$ is the <i>gap</i> denoting the distance of the obstacle
+ from the undisplaced configuration of the body.
+
+ <li> If $\mathcal{A}_{i+1} = \mathcal{A}_k$ and $\left\|
+ {\hat R}\left({\mathbf u}^{i}\right) \right\|_{\ell_2} < \delta$ then stop, else set $i=i+1$ and go to
+ step (1). This step ensures that we only stop iterations if both the correct
+ active set has been found and the plasticity has been iterated to sufficient
+ accuracy.
+</ol>
+
+In step 3 of this algorithm,
+the matrix $B\in\mathbb{R}^{n\times m}$, $n>m$ describes the coupling of the
+bases for the displacements and Lagrange multiplier (contact forces)
+and it is not quadratic in our situation since $\Lambda^k$ is only defined on
+$\Gamma_C$, i.e., the surface where contact may happen. As shown in the paper,
+we can choose $B$ to be a matrix that has only one entry per row,
+(see also Hüeber, Wohlmuth: A primal-dual active
+set strategy for non-linear multibody contact problems, Comput. Methods Appl. Mech. Engrg.
+194, 2005, pp. 3147-3166).
+The vector $G$ is defined by a suitable approximation $g_h$ of the gap $g$
+ at f{gather*}G_p = \begin{cases}
+g_{h,p}, & \text{if}\quad p\in\mathcal{S}\\
+0, & \text{if}\quad p\notin\mathcal{S}.
+\end{cases}@f}
+
+
+<h3>Adaptive mesh refinement</h3>
+
+Since we run our program in 3d, the computations the program performs are
+expensive. Consequently using adaptive mesh refinement is an important step towards
+staying within acceptable run-times. To make our lives easier we simply choose the
+KellyErrorEstimator that is already implemented in deal.II. We hand the
+solution vector to it which contains the displacement $u$. As we will see in the
+results it yields a quite reasonable adaptive mesh for the contact zone as well
+as for plasticity.
+
+
+<h3>Implementation</h3>
+
+This tutorial is essentially a mixture of step-40 and step-41 but instead of
+PETSc we let the Trilinos library deal with parallelizing the linear algebra
+(like in step-32). Since we are trying to solve a similar problem like in
+step-41 we will use the same methods but now in parallel.
+
+A difficulty is handling of the constraints from
+the Dirichlet conditions, hanging nodes and the inequality condition that
+arises from the contact. For this purpose we create three objects of type
+ConstraintMatrix that describe the various constraints and that we will
+combine as appropriate in each iteration.
+
+Compared to step-41, the programs has a few new classes:
+
+<ul>
+<li> <code>ConstitutiveLaw</code> describes the plastic behavior of the
+  material
+
+<li> <code>SphereObstacle</code> describes a sphere that serves as the
+  obstacle that is pushed into the deformable, elastoplastic body.
+  Whether this or the next class is used to describe the obstacle is
+  determined from the input parameter file.
+
+<li> <code>ChineseObstacle</code> (and a helper class) is a class that
+  allows us to read in an obstacle from a file. In the example we
+  will show in the results section, this file will be
+  <code>'obstacle_file.dat'</code> and will correspond to data that shows the
+  Chinese, Japanese or
+  Korean symbol for force or power (see http://www.orientaloutpost.com/ :
+  "This word can be used for motivation - it
+  can also mean power/motion/propulsion/force. It can be anything
+  internal or external that keeps you going. This is the safest way to express
+  motivation in Chinese. If your audience is Japanese, please see the other entry
+  for motivation. This is a word in Japanese and Korean, but it means "motive
+  power" or "kinetic energy" (without the motivation meaning that you are
+  probably looking for)"). In essence, we will pretend that we have a stamp
+  (i.e., a mask that corresponds to a flat bottomed obstacle with no pieces
+  of intermediate height) that we press into the body. The symbol in question
+  looks as follows (see also the picture at
+  the top of this section on how the end result looks like):
+
+  <img src="http://www.dealii.org/images/steps/developer/step-42.character.png" alt="" width="25%">
+</ul>
+
+Other than that, let us comment only on the following aspects:
+<ul>
+<li> The program allows you to select from two different coarse meshes
+  through the parameter file. These are either a cube $[0,1]^3$ or
+  a half sphere with the open side facing the positive $z$ direction.
+  
+<li>In either case, we will assume the convention that the part of the
+  boundary that may be in contact with the obstacle has boundary
+  indicator one. For both kinds of meshes, we assume that this is a free
+  surface, i.e., the body is either in contact there or there is no force
+  acting on it. For the half sphere, the curved part has boundary
+  indicator zero and we impose zero displacement there. For the box,
+  we impose zero displacement along the bottom but allow vertical
+  displacement along the sides (though no horizontal displacement).
+</ul>
diff --git a/examples/step-42/doc/kind b/examples/step-42/doc/kind
new file mode 100644
index 0000000..56e049c
--- /dev/null
+++ b/examples/step-42/doc/kind
@@ -0,0 +1 @@
+solids
diff --git a/examples/step-42/doc/results.dox b/examples/step-42/doc/results.dox
new file mode 100644
index 0000000..6f34dec
--- /dev/null
+++ b/examples/step-42/doc/results.dox
@@ -0,0 +1,221 @@
+<h1>Results</h1>
+
+The directory that contains this program also contains a number of input
+parameter files that can be used to create various different
+simulations. For example, running the program with the
+<code>p1_adaptive.prm</code> parameter file (using a ball as obstacle and the
+box as domain) on 16 cores produces output like this:
+ at code
+    Using output directory 'p1adaptive/'
+    FE degree 1
+    transfer solution false
+
+Cycle 0:
+   Number of active cells: 512
+   Number of degrees of freedom: 2187
+ 
+   Newton iteration 1
+      Updating active set...
+         Size of active set: 1
+      Assembling system... 
+      Solving system... 
+         Error: 173.076 -> 3.06875e-07 in 8 Bicgstab iterations.
+      Accepting Newton solution with residual: 3.06875e-07
+      Active set did not change!
+ 
+   Newton iteration 2
+      Updating active set...
+         Size of active set: 1
+      Assembling system... 
+      Solving system... 
+         Error: 57.3622 -> 2.46266e-07 in 8 Bicgstab iterations.
+      Accepting Newton solution with residual: 24.9028
+      Active set did not change!
+ 
+   Newton iteration 3
+      Updating active set...
+         Size of active set: 1
+      Assembling system... 
+      Solving system... 
+         Error: 24.9028 -> 1.83021e-07 in 7 Bicgstab iterations.
+      Residual of the non-contact part of the system: 1.63333
+         with a damping parameter alpha = 1
+      Active set did not change!
+
+...
+
+  Newton iteration 6
+      Updating active set...
+         Size of active set: 1
+      Assembling system... 
+      Solving system... 
+         Error: 1.43188e-07 -> 4.61326e-16 in 8 Bicgstab iterations.
+      Residual of the non-contact part of the system: 5.67016e-14
+         with a damping parameter alpha = 1
+      Active set did not change!
+      Writing graphical output... p1adaptive/solution-00.pvtu
+
+
++---------------------------------------------+------------+------------+
+| Total wallclock time elapsed since start    |      1.13s |            |
+|                                             |            |            |
+| Section                         | no. calls |  wall time | % of total |
++---------------------------------+-----------+------------+------------+
+| Assembling                      |         6 |     0.463s |        41% |
+| Graphical output                |         1 |    0.0257s |       2.3% |
+| Residual and lambda             |         4 |    0.0754s |       6.7% |
+| Setup                           |         1 |     0.227s |        20% |
+| Setup: constraints              |         1 |    0.0347s |       3.1% |
+| Setup: distribute DoFs          |         1 |    0.0441s |       3.9% |
+| Setup: matrix                   |         1 |    0.0119s |       1.1% |
+| Setup: vectors                  |         1 |   0.00155s |      0.14% |
+| Solve                           |         6 |     0.246s |        22% |
+| Solve: iterate                  |         6 |    0.0631s |       5.6% |
+| Solve: setup preconditioner     |         6 |     0.167s |        15% |
+| update active set               |         6 |    0.0401s |       3.6% |
++---------------------------------+-----------+------------+------------+
+
+Peak virtual memory used, resident in kB: 541884 77464
+Contact force = 37.3058
+
+...
+
+Cycle 3:
+   Number of active cells: 14652
+   Number of degrees of freedom: 52497
+ 
+   Newton iteration 1
+      Updating active set...
+         Size of active set: 145
+      Assembling system... 
+      Solving system... 
+         Error: 296.309 -> 2.45532e-06 in 9 Bicgstab iterations.
+      Accepting Newton solution with residual: 2.45532e-06
+      Active set did not change!
+ 
+...
+
+  Newton iteration 10
+      Updating active set...
+         Size of active set: 145
+      Assembling system... 
+      Solving system... 
+         Error: 2.71542e-07 -> 2.69366e-15 in 21 Bicgstab iterations.
+      Residual of the non-contact part of the system: 1.73713e-13
+         with a damping parameter alpha = 1
+      Active set did not change!
+      Writing graphical output... p1adaptive/solution-03.pvtu
+
+
++---------------------------------------------+------------+------------+
+| Total wallclock time elapsed since start    |      38.4s |            |
+|                                             |            |            |
+| Section                         | no. calls |  wall time | % of total |
++---------------------------------+-----------+------------+------------+
+| Assembling                      |        10 |      22.5s |        58% |
+| Graphical output                |         1 |     0.327s |      0.85% |
+| Residual and lambda             |         9 |      3.75s |       9.8% |
+| Setup                           |         1 |      4.83s |        13% |
+| Setup: constraints              |         1 |     0.578s |       1.5% |
+| Setup: distribute DoFs          |         1 |      0.71s |       1.8% |
+| Setup: matrix                   |         1 |     0.111s |      0.29% |
+| Setup: refine mesh              |         1 |      4.83s |        13% |
+| Setup: vectors                  |         1 |   0.00548s |     0.014% |
+| Solve                           |        10 |      5.49s |        14% |
+| Solve: iterate                  |        10 |       3.5s |       9.1% |
+| Solve: setup preconditioner     |        10 |      1.84s |       4.8% |
+| update active set               |        10 |     0.662s |       1.7% |
++---------------------------------+-----------+------------+------------+
+
+Peak virtual memory used, resident in kB: 566052 105788
+Contact force = 56.794
+
+...
+ at endcode
+
+The tables at the end of each cycle show information about computing time
+and the number of calls of different parts of the program like assembly or
+calculating the residual, for the most recent mesh refinement cycle. Some of
+the numbers above can be improved by transferring the solution from one mesh to
+the next, an option we have not exercised here. Of course, you can also make
+the program run faster, especially on the later refinement cycles, by just
+using more processors: the accompanying paper shows good scaling to at least
+1000 cores.
+
+In a typical run, you can observe that for every refinement step, the active
+set - the contact points - are iterated out at first. After that the Newton
+method has only to resolve the plasticity. For the finer meshes,
+quadratic convergence can be observed for the last 4 or 5 Newton iterations.
+
+We will not discuss here in all detail what happens with each of the input
+files. Rather, let us just show pictures of the solution (the left half of the
+domain is omitted if cells have zero quadrature points at which the plastic
+inequality is active):
+
+<table align="center">
+  <tr>
+    <td>
+    <img src="http://www.dealii.org/images/steps/developer/step-42.CellConstitutionColorbar.png">
+    </td>
+    <td>
+    <img src="http://www.dealii.org/images/steps/developer/step-42.CellConstitutionBall2.png" alt="" width="70%">
+    </td>
+    <td valign="top">
+       
+    </td>
+    <td>
+    <img src="http://www.dealii.org/images/steps/developer/step-42.CellConstitutionLi2.png" alt="" alt="" width="70%">
+    </td>
+  </tr>
+</table>
+
+The picture shows the adaptive refinement and as well how much a cell is
+plastified during the contact with the ball. Remember that we consider the
+norm of the deviator part of the stress in each quadrature point to
+see if there is elastic or plastic behavior. 
+The blue
+color means that this cell contains only elastic quadrature points in
+contrast to the red cells in which all quadrature points are plastified.
+In the middle of the top surface - 
+where the mesh is finest - a very close look shows the dimple caused by the
+obstacle. This is the result of the <code>move_mesh()</code>
+function. However, because the indentation of the obstacles we consider here
+is so small, it is hard to discern this effect; one could play with displacing
+vertices of the mesh by a multiple of the computed displacement.
+
+Further discussion of results that can be obtained using this program is
+provided in the publication mentioned at the very top of this page.
+
+
+<a name="extensions"></a>
+<h1>Possibilities for extensions</h1>
+
+There are, as always, multiple possibilities for extending this program. From
+an algorithmic perspective, this program goes about as far as one can at the
+time of writing, using the best available algorithms for the contact
+inequality, the plastic nonlinearity, and the linear solvers. However, there
+are things one would like to do with this program as far as more realistic
+situations are concerned:
+<ul>
+<li> Extend the program from a static to a quasi-static situation, perhaps by
+choosing a backward-Euler-scheme for the time discretization. Some theoretical
+results can be found in the PhD thesis by Jörg Frohne, <i>FEM-Simulation
+der Umformtechnik metallischer Oberflächen im Mikrokosmos</i>, University
+of Siegen, Germany, 2011.
+
+<li> It would also be an interesting advance to consider a contact problem
+with friction. In almost every mechanical process friction has a big
+influence.  To model this situation, we have to take into account tangential
+stresses at the contact surface. Friction also adds another inequality to
+our problem since body and obstacle will typically stick together as long as
+the tangential stress does not exceed a certain limit, beyond which the two
+bodies slide past each other.
+
+<li> If we already simulate a frictional contact, the next step to consider
+is heat generation over the contact zone. The heat that is
+caused by friction between two bodies raises the temperature in the
+deformable body and entails an change of some material parameters.
+
+<li> It might be of interest to implement more accurate, problem-adapted error
+estimators for contact as well as for the plasticity.
+</ul>
diff --git a/examples/step-42/doc/tooltip b/examples/step-42/doc/tooltip
new file mode 100644
index 0000000..d93e7df
--- /dev/null
+++ b/examples/step-42/doc/tooltip
@@ -0,0 +1 @@
+An adaptive, 3d solver for an elasto-plastic contact problem
diff --git a/examples/step-42/obstacle.pbm b/examples/step-42/obstacle.pbm
new file mode 100644
index 0000000..7fae69a
--- /dev/null
+++ b/examples/step-42/obstacle.pbm
@@ -0,0 +1,13602 @@
+P1
+680 780
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 
+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 
+1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
diff --git a/examples/step-42/p1_adaptive.prm b/examples/step-42/p1_adaptive.prm
new file mode 100644
index 0000000..54bc9e9
--- /dev/null
+++ b/examples/step-42/p1_adaptive.prm
@@ -0,0 +1,6 @@
+set polynomial degree             = 1
+set number of initial refinements = 3
+set number of cycles              = 12
+set refinement strategy           = percentage
+set obstacle                      = sphere
+set output directory              = p1_adaptive
diff --git a/examples/step-42/p1_chinese.prm b/examples/step-42/p1_chinese.prm
new file mode 100644
index 0000000..c7a6d06
--- /dev/null
+++ b/examples/step-42/p1_chinese.prm
@@ -0,0 +1,6 @@
+set polynomial degree             = 1
+set number of initial refinements = 3
+set number of cycles              = 12
+set refinement strategy           = percentage
+set obstacle                      = read from file
+set output directory              = p1_chinese
diff --git a/examples/step-42/p1_global.prm b/examples/step-42/p1_global.prm
new file mode 100644
index 0000000..fd15bdb
--- /dev/null
+++ b/examples/step-42/p1_global.prm
@@ -0,0 +1,6 @@
+set polynomial degree             = 1
+set number of initial refinements = 3
+set number of cycles              = 7
+set refinement strategy           = global
+set obstacle                      = sphere
+set output directory              = p1_global
diff --git a/examples/step-42/p2_adaptive.prm b/examples/step-42/p2_adaptive.prm
new file mode 100644
index 0000000..3c06265
--- /dev/null
+++ b/examples/step-42/p2_adaptive.prm
@@ -0,0 +1,25 @@
+# Listing of Parameters
+# ---------------------
+
+# polynomial degree of the FE_Q finite element space, typically 1 or 2
+set polynomial degree             = 2
+
+# number of initial global refinements before the first computation
+set number of initial refinements = 2
+
+# number of adaptive cycles to run
+set number of cycles              = 11
+
+# refinement strategy for each cycle:
+# global: one global refinement
+# percentage: fixed percentage gets refined using kelly
+# fix dofs: tries to achieve 2^initial_refinement*300 dofs after cycle 1 (only
+# use 2 cycles!). Changes the coarse mesh!
+set refinement strategy           = percentage
+
+# obstacle file to read, leave empty to use a sphere or 'obstacle_file.pbm'
+set obstacle filename             = 
+
+# directory to put output files (graphical output and benchmark statistics,
+# leave empty to put into current directory
+set output directory              = p2adaptive
diff --git a/examples/step-42/p2_global.prm b/examples/step-42/p2_global.prm
new file mode 100644
index 0000000..652d079
--- /dev/null
+++ b/examples/step-42/p2_global.prm
@@ -0,0 +1,6 @@
+set polynomial degree             = 2
+set number of initial refinements = 3
+set number of cycles              = 6
+set refinement strategy           = global
+set obstacle                      = sphere
+set output directory              = p2_global
diff --git a/examples/step-42/step-42.cc b/examples/step-42/step-42.cc
new file mode 100644
index 0000000..54860f5
--- /dev/null
+++ b/examples/step-42/step-42.cc
@@ -0,0 +1,2248 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2012 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Joerg Frohne, Texas A&M University and
+ *                        University of Siegen, 2012, 2013
+ *          Wolfgang Bangerth, Texas A&M University, 2012, 2013
+ *          Timo Heister, Texas A&M University, 2013
+ */
+
+// @sect3{Include files}
+// The set of include files is not much of a surprise any more at this time:
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/timer.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/solver_bicgstab.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_precondition.h>
+#include <deal.II/lac/trilinos_solver.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/grid_refinement.h>
+#include <deal.II/distributed/solution_transfer.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/fe_field_function.h>
+
+#include <fstream>
+#include <iostream>
+
+// This final include file provides the <code>mkdir</code> function
+// that we will use to create a directory for output files, if necessary:
+#include <sys/stat.h>
+
+namespace Step42
+{
+  using namespace dealii;
+
+  // @sect3{The <code>ConstitutiveLaw</code> class template}
+
+  // This class provides an interface for a constitutive law, i.e., for the
+  // relationship between strain $\varepsilon(\mathbf u)$ and stress
+  // $\sigma$. In this example we are using an elastoplastic material behavior
+  // with linear, isotropic hardening. Such materials are characterized by
+  // Young's modulus $E$, Poisson's ratio $\nu$, the initial yield stress
+  // $\sigma_0$ and the isotropic hardening parameter $\gamma$.  For $\gamma =
+  // 0$ we obtain perfect elastoplastic behavior.
+  //
+  // As explained in the paper that describes this program, the first Newton
+  // steps are solved with a completely elastic material model to avoid having
+  // to deal with both nonlinearities (plasticity and contact) at once. To this
+  // end, this class has a function <code>set_sigma_0()</code> that we use later
+  // on to simply set $\sigma_0$ to a very large value -- essentially
+  // guaranteeing that the actual stress will not exceed it, and thereby
+  // producing an elastic material. When we are ready to use a plastic model, we
+  // set $\sigma_0$ back to its proper value, using the same function.  As a
+  // result of this approach, we need to leave <code>sigma_0</code> as the only
+  // non-const member variable of this class.
+  template <int dim>
+  class ConstitutiveLaw
+  {
+  public:
+    ConstitutiveLaw (const double E,
+                     const double nu,
+                     const double sigma_0,
+                     const double gamma);
+
+    void
+    set_sigma_0 (double sigma_zero);
+
+    bool
+    get_stress_strain_tensor (const SymmetricTensor<2, dim> &strain_tensor,
+                              SymmetricTensor<4, dim> &stress_strain_tensor) const;
+
+    void
+    get_linearized_stress_strain_tensors (const SymmetricTensor<2, dim> &strain_tensor,
+                                          SymmetricTensor<4, dim> &stress_strain_tensor_linearized,
+                                          SymmetricTensor<4, dim> &stress_strain_tensor) const;
+
+  private:
+    const double kappa;
+    const double mu;
+    double       sigma_0;
+    const double gamma;
+
+    const SymmetricTensor<4, dim> stress_strain_tensor_kappa;
+    const SymmetricTensor<4, dim> stress_strain_tensor_mu;
+  };
+
+  // The constructor of the ConstitutiveLaw class sets the required material
+  // parameter for our deformable body. Material parameters for elastic
+  // isotropic media can be defined in a variety of ways, such as the pair $E,
+  // \nu$ (elastic modulus and Poisson's number), using the Lame parameters
+  // $\lambda,mu$ or several other commonly used conventions. Here, the
+  // constructor takes a description of material parameters in the form of
+  // $E,\nu$, but since this turns out to these are not the coefficients that
+  // appear in the equations of the plastic projector, we immediately convert
+  // them into the more suitable set $\kappa,\mu$ of bulk and shear moduli.  In
+  // addition, the constructor takes $\sigma_0$ (the yield stress absent any
+  // plastic strain) and $\gamma$ (the hardening parameter) as arguments. In
+  // this constructor, we also compute the two principal components of the
+  // stress-strain relation and its linearization.
+  template <int dim>
+  ConstitutiveLaw<dim>::ConstitutiveLaw (double E,
+                                         double nu,
+                                         double sigma_0,
+                                         double gamma)
+    :
+    kappa (E / (3 * (1 - 2 * nu))),
+    mu (E / (2 * (1 + nu))),
+    sigma_0(sigma_0),
+    gamma(gamma),
+    stress_strain_tensor_kappa (kappa
+                                * outer_product(unit_symmetric_tensor<dim>(),
+                                                unit_symmetric_tensor<dim>())),
+    stress_strain_tensor_mu (2 * mu
+                             * (identity_tensor<dim>()
+                                - outer_product(unit_symmetric_tensor<dim>(),
+                                                unit_symmetric_tensor<dim>()) / 3.0))
+  {}
+
+
+  template <int dim>
+  void
+  ConstitutiveLaw<dim>::set_sigma_0 (double sigma_zero)
+  {
+    sigma_0 = sigma_zero;
+  }
+
+
+  // @sect4{ConstitutiveLaw::get_stress_strain_tensor}
+
+  // This is the principal component of the constitutive law. It
+  // computes the fourth order symmetric tensor that relates the
+  // strain to the stress according to the projection given above,
+  // when evaluated at a particular strain point. We need this
+  // function to calculate the nonlinear residual in
+  // <code>PlasticityContactProblem::residual_nl_system()</code> where
+  // we multiply this tensor with the strain given in a quadrature
+  // point. The computations follow the formulas laid out in the
+  // introduction. In comparing the formulas there with the
+  // implementation below, recall that $C_\mu : \varepsilon = \tau_D$
+  // and that $C_\kappa : \varepsilon = \kappa
+  // \text{trace}(\varepsilon) I = \frac 13 \text{trace}(\tau) I$.
+  //
+  // The function returns whether the quadrature point is plastic to allow for
+  // some statistics downstream on how many of the quadrature points are
+  // plastic and how many are elastic.
+  template <int dim>
+  bool
+  ConstitutiveLaw<dim>::
+  get_stress_strain_tensor (const SymmetricTensor<2, dim> &strain_tensor,
+                            SymmetricTensor<4, dim> &stress_strain_tensor) const
+  {
+    Assert (dim == 3, ExcNotImplemented());
+
+    SymmetricTensor<2, dim> stress_tensor;
+    stress_tensor = (stress_strain_tensor_kappa + stress_strain_tensor_mu)
+                    * strain_tensor;
+
+    const SymmetricTensor<2, dim> deviator_stress_tensor = deviator(stress_tensor);
+    const double deviator_stress_tensor_norm = deviator_stress_tensor.norm();
+
+    stress_strain_tensor = stress_strain_tensor_mu;
+    if (deviator_stress_tensor_norm > sigma_0)
+      {
+        const double beta = sigma_0 / deviator_stress_tensor_norm;
+        stress_strain_tensor *= (gamma + (1 - gamma) * beta);
+      }
+
+    stress_strain_tensor += stress_strain_tensor_kappa;
+
+    return (deviator_stress_tensor_norm > sigma_0);
+  }
+
+
+  // @sect4{ConstitutiveLaw::get_linearized_stress_strain_tensors}
+
+  // This function returns the linearized stress strain tensor, linearized
+  // around the solution $u^{i-1}$ of the previous Newton step $i-1$.  The
+  // parameter <code>strain_tensor</code> (commonly denoted
+  // $\varepsilon(u^{i-1})$) must be passed as an argument, and serves as the
+  // linearization point. The function returns the derivative of the nonlinear
+  // constitutive law in the variable stress_strain_tensor, as well as the
+  // stress-strain tensor of the linearized problem in
+  // stress_strain_tensor_linearized.  See
+  // PlasticityContactProblem::assemble_nl_system where this function is used.
+  template <int dim>
+  void
+  ConstitutiveLaw<dim>::
+  get_linearized_stress_strain_tensors (const SymmetricTensor<2, dim> &strain_tensor,
+                                        SymmetricTensor<4, dim> &stress_strain_tensor_linearized,
+                                        SymmetricTensor<4, dim> &stress_strain_tensor) const
+  {
+    Assert (dim == 3, ExcNotImplemented());
+
+    SymmetricTensor<2, dim> stress_tensor;
+    stress_tensor = (stress_strain_tensor_kappa + stress_strain_tensor_mu)
+                    * strain_tensor;
+
+    stress_strain_tensor = stress_strain_tensor_mu;
+    stress_strain_tensor_linearized = stress_strain_tensor_mu;
+
+    SymmetricTensor<2, dim> deviator_stress_tensor = deviator(stress_tensor);
+    const double deviator_stress_tensor_norm = deviator_stress_tensor.norm();
+
+    if (deviator_stress_tensor_norm > sigma_0)
+      {
+        const double beta = sigma_0 / deviator_stress_tensor_norm;
+        stress_strain_tensor *= (gamma + (1 - gamma) * beta);
+        stress_strain_tensor_linearized *= (gamma + (1 - gamma) * beta);
+        deviator_stress_tensor /= deviator_stress_tensor_norm;
+        stress_strain_tensor_linearized -= (1 - gamma) * beta * 2 * mu
+                                           * outer_product(deviator_stress_tensor,
+                                                           deviator_stress_tensor);
+      }
+
+    stress_strain_tensor += stress_strain_tensor_kappa;
+    stress_strain_tensor_linearized += stress_strain_tensor_kappa;
+  }
+
+  // <h3>Equation data: boundary forces, boundary values, obstacles</h3>
+  //
+  // The following should be relatively standard. We need classes for
+  // the boundary forcing term (which we here choose to be zero)
+  // and boundary values on those part of the boundary that are not part
+  // of the contact surface (also chosen to be zero here).
+  namespace EquationData
+  {
+    template <int dim>
+    class BoundaryForce : public Function<dim>
+    {
+    public:
+      BoundaryForce ();
+
+      virtual
+      double value (const Point<dim> &p,
+                    const unsigned int component = 0) const;
+
+      virtual
+      void vector_value (const Point<dim> &p,
+                         Vector<double> &values) const;
+    };
+
+    template <int dim>
+    BoundaryForce<dim>::BoundaryForce ()
+      :
+      Function<dim>(dim)
+    {}
+
+
+    template <int dim>
+    double
+    BoundaryForce<dim>::value (const Point<dim> &,
+                               const unsigned int) const
+    {
+      return 0.;
+    }
+
+    template <int dim>
+    void
+    BoundaryForce<dim>::vector_value (const Point<dim> &p,
+                                      Vector<double> &values) const
+    {
+      for (unsigned int c = 0; c < this->n_components; ++c)
+        values(c) = BoundaryForce<dim>::value(p, c);
+    }
+
+
+
+    template <int dim>
+    class BoundaryValues : public Function<dim>
+    {
+    public:
+      BoundaryValues ();
+
+      virtual double value (const Point<dim> &p,
+                            const unsigned int component = 0) const;
+
+      virtual
+      void vector_value (const Point<dim> &p,
+                         Vector<double> &values) const;
+    };
+
+
+    template <int dim>
+    BoundaryValues<dim>::BoundaryValues ()
+      :
+      Function<dim>(dim)
+    {}
+
+
+    template <int dim>
+    double
+    BoundaryValues<dim>::value (const Point<dim> &,
+                                const unsigned int) const
+    {
+      return 0.;
+    }
+
+    template <int dim>
+    void
+    BoundaryValues<dim>::vector_value (const Point<dim> &p,
+                                       Vector<double> &values) const
+    {
+      for (unsigned int c = 0; c < this->n_components; ++c)
+        values(c) = BoundaryValues<dim>::value(p, c);
+    }
+
+
+    // @sect4{The <code>SphereObstacle</code> class}
+
+    // The following class is the first of two obstacles that can be
+    // selected from the input file. It describes a sphere centered
+    // at position $x=y=0.5, z=z_{\text{surface}}+0.59$ and radius $r=0.6$,
+    // where $z_{\text{surface}}$ is the vertical position of the (flat)
+    // surface of the deformable body. The function's <code>value</code>
+    // returns the location of the obstacle for a given $x,y$ value if the
+    // point actually lies below the sphere, or a large positive value that
+    // can't possibly interfere with the deformation if it lies outside
+    // the "shadow" of the sphere.
+    template <int dim>
+    class SphereObstacle : public Function<dim>
+    {
+    public:
+      SphereObstacle (const double z_surface);
+
+      virtual
+      double value (const Point<dim> &p,
+                    const unsigned int component = 0) const;
+
+      virtual
+      void vector_value (const Point<dim> &p,
+                         Vector<double> &values) const;
+
+    private:
+      const double z_surface;
+    };
+
+
+    template <int dim>
+    SphereObstacle<dim>::SphereObstacle (const double z_surface)
+      :
+      Function<dim>(dim),
+      z_surface(z_surface)
+    {}
+
+
+    template <int dim>
+    double
+    SphereObstacle<dim>::value (
+      const Point<dim> &p, const unsigned int component) const
+    {
+      if (component == 0)
+        return p(0);
+      else if (component == 1)
+        return p(1);
+      else if (component == 2)
+        {
+          if ((p(0) - 0.5) * (p(0) - 0.5) + (p(1) - 0.5) * (p(1) - 0.5)
+              < 0.36)
+            return (-std::sqrt(
+                      0.36 - (p(0) - 0.5) * (p(0) - 0.5)
+                      - (p(1) - 0.5) * (p(1) - 0.5)) + z_surface + 0.59);
+          else
+            return 1000;
+        }
+
+      Assert(false, ExcNotImplemented());
+      return 1e9; // an unreasonable value; ignored in debug mode because of the preceding Assert
+    }
+
+
+    template <int dim>
+    void
+    SphereObstacle<dim>::vector_value (const Point<dim> &p,
+                                       Vector<double> &values) const
+    {
+      for (unsigned int c = 0; c < this->n_components; ++c)
+        values(c) = SphereObstacle<dim>::value(p, c);
+    }
+
+    // @sect4{The <code>BitmapFile</code> and <code>ChineseObstacle</code> classes}
+
+    // The following two classes describe the obstacle outlined in the introduction,
+    // i.e., the Chinese character. The first of the two, <code>BitmapFile</code>
+    // is responsible for reading in data from a picture file
+    // stored in pbm ascii format. This data will be bilinearly interpolated and
+    // thereby provides a function that describes the obstacle. (The code below
+    // shows how one can construct a function by interpolating between given
+    // data points. One could use the Functions::InterpolatedUniformGridData,
+    // introduced after this tutorial program was written, which does exactly
+    // what we want here, but it is instructive to see how to do it by hand.)
+    //
+    // The data which we read from the file will be stored in a double std::vector
+    // named obstacle_data.  This vector composes the base to calculate a
+    // piecewise bilinear function as a polynomial interpolation. The data we will
+    // read from a file consists of zeros (white) and ones (black).
+    //
+    // The <code>hx,hy</code> variables denote the spacing between pixels in $x$
+    // and $y$ directions. <code>nx,ny</code> are the numbers of pixels in each of
+    // these directions.  <code>get_value()</code> returns the value of the image
+    // at a given location, interpolated from the adjacent pixel values.
+    template <int dim>
+    class BitmapFile
+    {
+    public:
+      BitmapFile(const std::string &name);
+
+      double
+      get_value(const double x, const double y) const;
+
+    private:
+      std::vector<double> obstacle_data;
+      double hx, hy;
+      int nx, ny;
+
+      double
+      get_pixel_value(const int i, const int j) const;
+    };
+
+    // The constructor of this class reads in the data that describes
+    // the obstacle from the given file name.
+    template <int dim>
+    BitmapFile<dim>::BitmapFile(const std::string &name)
+      :
+      obstacle_data(0),
+      hx(0),
+      hy(0),
+      nx(0),
+      ny(0)
+    {
+      std::ifstream f(name.c_str());
+      AssertThrow (f, ExcMessage (std::string("Can't read from file <") +
+                                  name + ">!"));
+
+      std::string temp;
+      f >> temp >> nx >> ny;
+
+      AssertThrow(nx > 0 && ny > 0, ExcMessage("Invalid file format."));
+
+      for (int k = 0; k < nx * ny; k++)
+        {
+          double val;
+          f >> val;
+          obstacle_data.push_back(val);
+        }
+
+      hx = 1.0 / (nx - 1);
+      hy = 1.0 / (ny - 1);
+
+      if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
+        std::cout << "Read obstacle from file <" << name << ">" << std::endl
+                  << "Resolution of the scanned obstacle picture: " << nx
+                  << " x " << ny << std::endl;
+    }
+
+    // The following two functions return the value of a given pixel with
+    // coordinates $i,j$, which we identify with the values of a function
+    // defined at positions <code>i*hx, j*hy</code>, and at arbitrary
+    // coordinates $x,y$ where we do a bilinear interpolation between
+    // point values returned by the first of the two functions. In the
+    // second function, for each $x,y$, we first compute the (integer)
+    // location of the nearest pixel coordinate to the bottom left of
+    // $x,y$, and then compute the coordinates $\xi,\eta$ within this
+    // pixel. We truncate both kinds of variables from both below
+    // and above to avoid problems when evaluating the function outside
+    // of its defined range as may happen due to roundoff errors.
+    template <int dim>
+    double
+    BitmapFile<dim>::get_pixel_value(const int i,
+                                     const int j) const
+    {
+      assert(i >= 0 && i < nx);
+      assert(j >= 0 && j < ny);
+      return obstacle_data[nx * (ny - 1 - j) + i];
+    }
+
+    template <int dim>
+    double
+    BitmapFile<dim>::get_value(const double x,
+                               const double y) const
+    {
+      const int ix = std::min(std::max((int) (x / hx), 0), nx - 2);
+      const int iy = std::min(std::max((int) (y / hy), 0), ny - 2);
+
+      const double xi  = std::min(std::max((x-ix*hx)/hx, 1.), 0.);
+      const double eta = std::min(std::max((y-iy*hy)/hy, 1.), 0.);
+
+      return ((1-xi)*(1-eta)*get_pixel_value(ix,iy)
+              +
+              xi*(1-eta)*get_pixel_value(ix+1,iy)
+              +
+              (1-xi)*eta*get_pixel_value(ix,iy+1)
+              +
+              xi*eta*get_pixel_value(ix+1,iy+1));
+    }
+
+    // Finally, this is the class that actually uses the class above. It
+    // has a BitmapFile object as a member that describes the height of the
+    // obstacle. As mentioned above, the BitmapFile class will provide us
+    // with a mask, i.e., values that are either zero or one (and, if you
+    // ask for locations between pixels, values that are interpolated between
+    // zero and one). This class translates this to heights that are either
+    // 0.001 below the surface of the deformable body (if the BitmapFile
+    // class reports a one at this location) or 0.999 above the obstacle (if
+    // the BitmapFile class reports a zero). The following function should then
+    // be self-explanatory.
+    template <int dim>
+    class ChineseObstacle : public Function<dim>
+    {
+    public:
+      ChineseObstacle(const std::string &filename,
+                      const double z_surface);
+
+      virtual
+      double value (const Point<dim> &p,
+                    const unsigned int component = 0) const;
+
+      virtual
+      void vector_value (const Point<dim> &p,
+                         Vector<double> &values) const;
+
+    private:
+      const BitmapFile<dim> input_obstacle;
+      double z_surface;
+    };
+
+
+    template <int dim>
+    ChineseObstacle<dim>::ChineseObstacle(const std::string &filename,
+                                          const double z_surface)
+      :
+      Function<dim>(dim),
+      input_obstacle(filename),
+      z_surface(z_surface)
+    {}
+
+
+    template <int dim>
+    double
+    ChineseObstacle<dim>::value (const Point<dim> &p,
+                                 const unsigned int component) const
+    {
+      if (component == 0)
+        return p(0);
+      if (component == 1)
+        return p(1);
+      else if (component==2)
+        {
+          if (p(0) >= 0.0 && p(0) <= 1.0 && p(1) >= 0.0 && p(1) <= 1.0)
+            return z_surface + 0.999 - input_obstacle.get_value(p(0), p(1));
+        }
+
+      Assert (false, ExcNotImplemented());
+      return 1e9; // an unreasonable value; ignored in debug mode because of the preceding Assert
+    }
+
+    template <int dim>
+    void
+    ChineseObstacle<dim>::vector_value (const Point<dim> &p,
+                                        Vector<double> &values) const
+    {
+      for (unsigned int c = 0; c < this->n_components; ++c)
+        values(c) = ChineseObstacle<dim>::value(p, c);
+    }
+  }
+
+  // @sect3{The <code>PlasticityContactProblem</code> class template}
+
+  // This is the main class of this program and supplies all functions
+  // and variables needed to describe
+  // the nonlinear contact problem. It is
+  // close to step-41 but with some additional
+  // features like handling hanging nodes,
+  // a Newton method, using Trilinos and p4est
+  // for parallel distributed computing.
+  // To deal with hanging nodes makes
+  // life a bit more complicated since
+  // we need another ConstraintMatrix now.
+  // We create a Newton method for the
+  // active set method for the contact
+  // situation and to handle the nonlinear
+  // operator for the constitutive law.
+  //
+  // The general layout of this class is very much like for most other tutorial programs.
+  // To make our life a bit easier, this class reads a set of input parameters from an input file. These
+  // parameters, using the ParameterHandler class, are declared in the <code>declare_parameters</code>
+  // function (which is static so that it can be called before we even create an object of the current
+  // type), and a ParameterHandler object that has been used to read an input file will then be passed
+  // to the constructor of this class.
+  //
+  // The remaining member functions are by and large as we have seen in several of the other tutorial
+  // programs, though with additions for the current nonlinear system. We will comment on their purpose
+  // as we get to them further below.
+  template <int dim>
+  class PlasticityContactProblem
+  {
+  public:
+    PlasticityContactProblem (const ParameterHandler &prm);
+
+    void run ();
+
+    static void declare_parameters (ParameterHandler &prm);
+
+  private:
+    void make_grid ();
+    void setup_system ();
+    void compute_dirichlet_constraints ();
+    void update_solution_and_constraints ();
+    void assemble_mass_matrix_diagonal (TrilinosWrappers::SparseMatrix &mass_matrix);
+    void assemble_newton_system (const TrilinosWrappers::MPI::Vector &linearization_point);
+    void compute_nonlinear_residual (const TrilinosWrappers::MPI::Vector &linearization_point);
+    void solve_newton_system ();
+    void solve_newton ();
+    void refine_grid ();
+    void move_mesh (const TrilinosWrappers::MPI::Vector &displacement) const;
+    void output_results (const std::string &filename_base);
+    void output_contact_force () const;
+
+    // As far as member variables are concerned, we start with ones that we use to
+    // indicate the MPI universe this program runs on, a stream we use to let
+    // exactly one processor produce output to the console (see step-17) and
+    // a variable that is used to time the various sections of the program:
+    MPI_Comm           mpi_communicator;
+    ConditionalOStream pcout;
+    TimerOutput        computing_timer;
+
+    // The next group describes the mesh and the finite element space.
+    // In particular, for this parallel program, the finite element
+    // space has associated with it variables that indicate which degrees
+    // of freedom live on the current processor (the index sets, see
+    // also step-40 and the @ref distributed documentation module) as
+    // well as a variety of constraints: those imposed by hanging nodes,
+    // by Dirichlet boundary conditions, and by the active set of
+    // contact nodes. Of the three ConstraintMatrix variables defined
+    // here, the first only contains hanging node constraints, the
+    // second also those associated with Dirichlet boundary conditions,
+    // and the third these plus the contact constraints.
+    //
+    // The variable <code>active_set</code> consists of those degrees
+    // of freedom constrained by the contact, and we use
+    // <code>fraction_of_plastic_q_points_per_cell</code> to keep
+    // track of the fraction of quadrature points on each cell where
+    // the stress equals the yield stress. The latter is only used to
+    // create graphical output showing the plastic zone, but not for
+    // any further computation; the variable is a member variable of
+    // this class since the information is computed as a by-product
+    // of computing the residual, but is used only much later. (Note
+    // that the vector is a vector of length equal to the number of
+    // active cells on the <i>local mesh</i>; it is never used to
+    // exchange information between processors and can therefore be
+    // a regular deal.II vector.)
+    const unsigned int                        n_initial_global_refinements;
+    parallel::distributed::Triangulation<dim> triangulation;
+
+    const unsigned int fe_degree;
+    FESystem<dim>      fe;
+    DoFHandler<dim>    dof_handler;
+
+    IndexSet           locally_owned_dofs;
+    IndexSet           locally_relevant_dofs;
+
+    ConstraintMatrix   constraints_hanging_nodes;
+    ConstraintMatrix   constraints_dirichlet_and_hanging_nodes;
+    ConstraintMatrix   all_constraints;
+
+    IndexSet           active_set;
+    Vector<float>      fraction_of_plastic_q_points_per_cell;
+
+
+    // The next block of variables corresponds to the solution
+    // and the linear systems we need to form. In particular, this
+    // includes the Newton matrix and right hand side; the vector
+    // that corresponds to the residual (i.e., the Newton right hand
+    // side) but from which we have not eliminated the various
+    // constraints and that is used to determine which degrees of
+    // freedom need to be constrained in the next iteration; and
+    // a vector that corresponds to the diagonal of the $B$ matrix
+    // briefly mentioned in the introduction and discussed in the
+    // accompanying paper.
+    TrilinosWrappers::SparseMatrix    newton_matrix;
+
+    TrilinosWrappers::MPI::Vector     solution;
+    TrilinosWrappers::MPI::Vector     newton_rhs;
+    TrilinosWrappers::MPI::Vector     newton_rhs_uncondensed;
+    TrilinosWrappers::MPI::Vector     diag_mass_matrix_vector;
+
+    // The next block contains the variables that describe the material
+    // response:
+    const double         e_modulus, nu, gamma, sigma_0;
+    ConstitutiveLaw<dim> constitutive_law;
+
+    // And then there is an assortment of other variables that are used
+    // to identify the mesh we are asked to build as selected by the
+    // parameter file, the obstacle that is being pushed into the
+    // deformable body, the mesh refinement strategy, whether to transfer
+    // the solution from one mesh to the next, and how many mesh
+    // refinement cycles to perform. As possible, we mark these kinds
+    // of variables as <code>const</code> to help the reader identify
+    // which ones may or may not be modified later on (the output directory
+    // being an exception -- it is never modified outside the constructor
+    // but it is awkward to initialize in the member-initializer-list
+    // following the colon in the constructor since there we have only
+    // one shot at setting it; the same is true for the mesh refinement
+    // criterion):
+    const std::string                                  base_mesh;
+    const std_cxx11::shared_ptr<const Function<dim> >  obstacle;
+
+    struct RefinementStrategy
+    {
+      enum value
+      {
+        refine_global,
+        refine_percentage,
+        refine_fix_dofs
+      };
+    };
+    typename RefinementStrategy::value                 refinement_strategy;
+
+    const bool                                         transfer_solution;
+    std::string                                        output_dir;
+    const unsigned int                                 n_refinement_cycles;
+    unsigned int                                       current_refinement_cycle;
+  };
+
+
+  // @sect3{Implementation of the <code>PlasticityContactProblem</code> class}
+
+  // @sect4{PlasticityContactProblem::declare_parameters}
+
+  // Let us start with the declaration of run-time parameters that can be
+  // selected in the input file. These values will be read back in the
+  // constructor of this class to initialize the member variables of this
+  // class:
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry("polynomial degree", "1",
+                      Patterns::Integer(),
+                      "Polynomial degree of the FE_Q finite element space, typically 1 or 2.");
+    prm.declare_entry("number of initial refinements", "2",
+                      Patterns::Integer(),
+                      "Number of initial global mesh refinement steps before "
+                      "the first computation.");
+    prm.declare_entry("refinement strategy", "percentage",
+                      Patterns::Selection("global|percentage"),
+                      "Mesh refinement strategy:\n"
+                      " global: one global refinement\n"
+                      " percentage: a fixed percentage of cells gets refined using the Kelly estimator.");
+    prm.declare_entry("number of cycles", "5",
+                      Patterns::Integer(),
+                      "Number of adaptive mesh refinement cycles to run.");
+    prm.declare_entry("obstacle", "sphere",
+                      Patterns::Selection("sphere|read from file"),
+                      "The name of the obstacle to use. This may either be 'sphere' if we should "
+                      "use a spherical obstacle, or 'read from file' in which case the obstacle "
+                      "will be read from a file named 'obstacle.pbm' that is supposed to be in "
+                      "ASCII PBM format.");
+    prm.declare_entry("output directory", "",
+                      Patterns::Anything(),
+                      "Directory for output files (graphical output and benchmark "
+                      "statistics). If empty, use the current directory.");
+    prm.declare_entry("transfer solution", "false",
+                      Patterns::Bool(),
+                      "Whether the solution should be used as a starting guess "
+                      "for the next finer mesh. If false, then the iteration starts at "
+                      "zero on every mesh.");
+    prm.declare_entry("base mesh", "box",
+                      Patterns::Selection("box|half sphere"),
+                      "Select the shape of the domain: 'box' or 'half sphere'");
+  }
+
+
+  // @sect4{The <code>PlasticityContactProblem</code> constructor}
+
+  // Given the declarations of member variables as well as the
+  // declarations of run-time parameters that are read from the input
+  // file, there is nothing surprising in this constructor. In the body
+  // we initialize the mesh refinement strategy and the output directory,
+  // creating such a directory if necessary.
+  template <int dim>
+  PlasticityContactProblem<dim>::
+  PlasticityContactProblem (const ParameterHandler &prm)
+    :
+    mpi_communicator(MPI_COMM_WORLD),
+    pcout(std::cout,
+          (Utilities::MPI::this_mpi_process(mpi_communicator) == 0)),
+    computing_timer(MPI_COMM_WORLD, pcout, TimerOutput::never,
+                    TimerOutput::wall_times),
+
+    n_initial_global_refinements (prm.get_integer("number of initial refinements")),
+    triangulation(mpi_communicator),
+    fe_degree (prm.get_integer("polynomial degree")),
+    fe(FE_Q<dim>(QGaussLobatto<1>(fe_degree+1)), dim),
+    dof_handler(triangulation),
+
+    e_modulus (200000),
+    nu (0.3),
+    gamma (0.01),
+    sigma_0(400.0),
+    constitutive_law (e_modulus,
+                      nu,
+                      sigma_0,
+                      gamma),
+
+    base_mesh (prm.get("base mesh")),
+    obstacle (prm.get("obstacle") == "read from file"
+              ?
+              static_cast<const Function<dim>*>
+              (new EquationData::ChineseObstacle<dim>("obstacle.pbm", (base_mesh == "box" ? 1.0 : 0.5)))
+              :
+              static_cast<const Function<dim>*>
+              (new EquationData::SphereObstacle<dim>(base_mesh == "box" ? 1.0 : 0.5))),
+
+    transfer_solution (prm.get_bool("transfer solution")),
+    n_refinement_cycles (prm.get_integer("number of cycles"))
+  {
+    std::string strat = prm.get("refinement strategy");
+    if (strat == "global")
+      refinement_strategy = RefinementStrategy::refine_global;
+    else if (strat == "percentage")
+      refinement_strategy = RefinementStrategy::refine_percentage;
+    else
+      AssertThrow (false, ExcNotImplemented());
+
+    output_dir = prm.get("output directory");
+    if (output_dir != "" && *(output_dir.rbegin()) != '/')
+      output_dir += "/";
+    mkdir(output_dir.c_str(), 0777);
+
+    pcout << "    Using output directory '" << output_dir << "'" << std::endl;
+    pcout << "    FE degree " << fe_degree << std::endl;
+    pcout << "    transfer solution "
+          << (transfer_solution ? "true" : "false") << std::endl;
+  }
+
+
+
+  // @sect4{PlasticityContactProblem::make_grid}
+
+  // The next block deals with constructing the starting mesh.
+  // We will use the following helper function and the first
+  // block of the <code>make_grid()</code> to construct a
+  // mesh that corresponds to a half sphere. deal.II has a function
+  // that creates such a mesh, but it is in the wrong location
+  // and facing the wrong direction, so we need to shift and rotate
+  // it a bit before using it.
+  //
+  // For later reference, as described in the documentation of
+  // GridGenerator::half_hyper_ball(), the flat surface of the halfsphere
+  // has boundary indicator zero, while the remainder has boundary
+  // indicator one.
+  Point<3>
+  rotate_half_sphere (const Point<3> &in)
+  {
+    return Point<3>(in(2), in(1), -in(0));
+  }
+
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::make_grid ()
+  {
+    if (base_mesh == "half sphere")
+      {
+        const Point<dim> center(0, 0, 0);
+        const double radius = 0.8;
+        GridGenerator::half_hyper_ball(triangulation, center, radius);
+
+        GridTools::transform(&rotate_half_sphere, triangulation);
+        GridTools::shift(Point<dim>(0.5, 0.5, 0.5), triangulation);
+
+        static SphericalManifold<dim> manifold_description(Point<dim>(0.5, 0.5, 0.5));
+        GridTools::copy_boundary_to_manifold_id(triangulation);
+        triangulation.set_manifold(0, manifold_description);
+      }
+    // Alternatively, create a hypercube mesh. After creating it,
+    // assign boundary indicators as follows:
+    // @code
+    // >     _______
+    // >    /  1    /|
+    // >   /______ / |
+    // >  |       | 8|
+    // >  |   8   | /
+    // >  |_______|/
+    // >      6
+    // @endcode
+    // In other words, the boundary indicators of the sides of the cube are 8.
+    // The boundary indicator of the bottom is 6 and the top has indicator 1.
+    // We set these by looping over all cells of all faces and looking at
+    // coordinate values of the cell center, and will make use of these
+    // indicators later when evaluating which boundary will carry Dirichlet
+    // boundary conditions or will be subject to potential contact.
+    // (In the current case, the mesh contains only a single cell, and all of
+    // its faces are on the boundary, so both the loop over all cells and the
+    // query whether a face is on the boundary are, strictly speaking,
+    // unnecessary; we retain them simply out of habit: this kind of code can
+    // be found in many programs in essentially this form.)
+    else
+      {
+        const Point<dim> p1(0, 0, 0);
+        const Point<dim> p2(1.0, 1.0, 1.0);
+
+        GridGenerator::hyper_rectangle(triangulation, p1, p2);
+
+        Triangulation<3>::active_cell_iterator
+        cell = triangulation.begin_active(),
+        endc = triangulation.end();
+        for (; cell != endc; ++cell)
+          for (unsigned int face_no = 0; face_no < GeometryInfo<dim>::faces_per_cell;
+               ++face_no)
+            if (cell->face(face_no)->at_boundary())
+              {
+                if (std::fabs(cell->face(face_no)->center()[2] - p2[2]) < 1e-12)
+                  cell->face(face_no)->set_boundary_id(1);
+                if (std::fabs(cell->face(face_no)->center()[0] - p1[0]) < 1e-12
+                    || std::fabs(cell->face(face_no)->center()[0] - p2[0]) < 1e-12
+                    || std::fabs(cell->face(face_no)->center()[1] - p1[1]) < 1e-12
+                    || std::fabs(cell->face(face_no)->center()[1] - p2[1]) < 1e-12)
+                  cell->face(face_no)->set_boundary_id(8);
+                if (std::fabs(cell->face(face_no)->center()[2] - p1[2]) < 1e-12)
+                  cell->face(face_no)->set_boundary_id(6);
+              }
+      }
+
+    triangulation.refine_global(n_initial_global_refinements);
+  }
+
+
+
+  // @sect4{PlasticityContactProblem::setup_system}
+
+  // The next piece in the puzzle is to set up the DoFHandler, resize
+  // vectors and take care of various other status variables such as
+  // index sets and constraint matrices.
+  //
+  // In the following, each group of operations is put into a brace-enclosed
+  // block that is being timed by the variable declared at the top of the
+  // block (the constructor of the TimerOutput::Scope variable starts the
+  // timed section, the destructor that is called at the end of the block
+  // stops it again).
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::setup_system ()
+  {
+    /* setup dofs and get index sets for locally owned and relevant dofs */
+    {
+      TimerOutput::Scope t(computing_timer, "Setup: distribute DoFs");
+      dof_handler.distribute_dofs(fe);
+
+      locally_owned_dofs = dof_handler.locally_owned_dofs();
+      locally_relevant_dofs.clear();
+      DoFTools::extract_locally_relevant_dofs(dof_handler,
+                                              locally_relevant_dofs);
+    }
+
+    /* setup hanging nodes and Dirichlet constraints */
+    {
+      TimerOutput::Scope t(computing_timer, "Setup: constraints");
+      constraints_hanging_nodes.reinit(locally_relevant_dofs);
+      DoFTools::make_hanging_node_constraints(dof_handler,
+                                              constraints_hanging_nodes);
+      constraints_hanging_nodes.close();
+
+      pcout << "   Number of active cells: "
+            << triangulation.n_global_active_cells() << std::endl
+            << "   Number of degrees of freedom: " << dof_handler.n_dofs()
+            << std::endl;
+
+      compute_dirichlet_constraints();
+    }
+
+    /* initialization of vectors and the active set */
+    {
+      TimerOutput::Scope t(computing_timer, "Setup: vectors");
+      solution.reinit(locally_relevant_dofs, mpi_communicator);
+      newton_rhs.reinit(locally_owned_dofs, mpi_communicator);
+      newton_rhs_uncondensed.reinit(locally_owned_dofs, mpi_communicator);
+      diag_mass_matrix_vector.reinit(locally_owned_dofs, mpi_communicator);
+      fraction_of_plastic_q_points_per_cell.reinit(triangulation.n_active_cells());
+
+      active_set.clear();
+      active_set.set_size(dof_handler.n_dofs());
+    }
+
+    // Finally, we set up sparsity patterns and matrices.
+    // We temporarily (ab)use the system matrix to also build the (diagonal)
+    // matrix that we use in eliminating degrees of freedom that are in contact
+    // with the obstacle, but we then immediately set the Newton matrix back
+    // to zero.
+    {
+      TimerOutput::Scope t(computing_timer, "Setup: matrix");
+      TrilinosWrappers::SparsityPattern sp(locally_owned_dofs,
+                                           mpi_communicator);
+
+      DoFTools::make_sparsity_pattern(dof_handler, sp,
+                                      constraints_dirichlet_and_hanging_nodes, false,
+                                      Utilities::MPI::this_mpi_process(mpi_communicator));
+      sp.compress();
+      newton_matrix.reinit(sp);
+
+
+      TrilinosWrappers::SparseMatrix &mass_matrix = newton_matrix;
+
+      assemble_mass_matrix_diagonal(mass_matrix);
+
+      const unsigned int start = (newton_rhs.local_range().first),
+                         end = (newton_rhs.local_range().second);
+      for (unsigned int j = start; j < end; j++)
+        diag_mass_matrix_vector(j) = mass_matrix.diag_element(j);
+      diag_mass_matrix_vector.compress(VectorOperation::insert);
+
+      mass_matrix = 0;
+    }
+  }
+
+
+  // @sect4{PlasticityContactProblem::compute_dirichlet_constraints}
+
+  // This function, broken out of the preceding one, computes the constraints
+  // associated with Dirichlet-type boundary conditions and puts them into the
+  // <code>constraints_dirichlet_and_hanging_nodes</code> variable by merging
+  // with the constraints that come from hanging nodes.
+  //
+  // As laid out in the introduction, we need to distinguish between two
+  // cases:
+  // - If the domain is a box, we set the displacement to zero at the bottom,
+  //   and allow vertical movement in z-direction along the sides. As
+  //   shown in the <code>make_grid()</code> function, the former corresponds
+  //   to boundary indicator 6, the latter to 8.
+  // - If the domain is a half sphere, then we impose zero displacement along
+  //   the curved part of the boundary, associated with boundary indicator zero.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::compute_dirichlet_constraints ()
+  {
+    constraints_dirichlet_and_hanging_nodes.reinit(locally_relevant_dofs);
+    constraints_dirichlet_and_hanging_nodes.merge(constraints_hanging_nodes);
+
+    if (base_mesh == "box")
+      {
+        // interpolate all components of the solution
+        VectorTools::interpolate_boundary_values(dof_handler,
+                                                 6,
+                                                 EquationData::BoundaryValues<dim>(),
+                                                 constraints_dirichlet_and_hanging_nodes,
+                                                 ComponentMask());
+
+        // interpolate x- and y-components of the
+        // solution (this is a bit mask, so apply
+        // operator| )
+        const FEValuesExtractors::Scalar x_displacement(0);
+        const FEValuesExtractors::Scalar y_displacement(1);
+        VectorTools::interpolate_boundary_values(dof_handler,
+                                                 8,
+                                                 EquationData::BoundaryValues<dim>(),
+                                                 constraints_dirichlet_and_hanging_nodes,
+                                                 (fe.component_mask(x_displacement) | fe.component_mask(y_displacement)));
+      }
+    else
+      VectorTools::interpolate_boundary_values(dof_handler,
+                                               0,
+                                               EquationData::BoundaryValues<dim>(),
+                                               constraints_dirichlet_and_hanging_nodes,
+                                               ComponentMask());
+
+    constraints_dirichlet_and_hanging_nodes.close();
+  }
+
+
+
+  // @sect4{PlasticityContactProblem::assemble_mass_matrix_diagonal}
+
+  // The next helper function computes the (diagonal) mass matrix that
+  // is used to determine the active set of the active set method we use in
+  // the contact algorithm. This matrix is of mass matrix type, but unlike
+  // the standard mass matrix, we can make it diagonal (even in the case of
+  // higher order elements) by using a quadrature formula that has its
+  // quadrature points at exactly the same locations as the interpolation points
+  // for the finite element are located. We achieve this by using a
+  // QGaussLobatto quadrature formula here, along with initializing the finite
+  // element with a set of interpolation points derived from the same quadrature
+  // formula. The remainder of the function is relatively straightforward: we
+  // put the resulting matrix into the given argument; because we know the
+  // matrix is diagonal, it is sufficient to have a loop over only $i$ not
+  // not over $j$. Strictly speaking, we could even avoid multiplying the
+  // shape function's values at quadrature point <code>q_point</code> by itself
+  // because we know the shape value to be a vector with exactly one one which
+  // when dotted with itself yields one. Since this function is not time
+  // critical we add this term for clarity.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::
+  assemble_mass_matrix_diagonal (TrilinosWrappers::SparseMatrix &mass_matrix)
+  {
+    QGaussLobatto<dim-1> face_quadrature_formula(fe.degree + 1);
+
+    FEFaceValues<dim> fe_values_face(fe, face_quadrature_formula,
+                                     update_values | update_JxW_values);
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_face_q_points = face_quadrature_formula.size();
+
+    FullMatrix<double> cell_matrix(dofs_per_cell, dofs_per_cell);
+    std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
+
+    const FEValuesExtractors::Vector displacement(0);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell != endc; ++cell)
+      if (cell->is_locally_owned())
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell;
+             ++face)
+          if (cell->face(face)->at_boundary()
+              &&
+              cell->face(face)->boundary_id() == 1)
+            {
+              fe_values_face.reinit(cell, face);
+              cell_matrix = 0;
+
+              for (unsigned int q_point = 0; q_point<n_face_q_points; ++q_point)
+                for (unsigned int i = 0; i < dofs_per_cell; ++i)
+                  cell_matrix(i, i) += (fe_values_face[displacement].value(i, q_point) *
+                                        fe_values_face[displacement].value(i, q_point) *
+                                        fe_values_face.JxW(q_point));
+
+              cell->get_dof_indices(local_dof_indices);
+
+              for (unsigned int i = 0; i < dofs_per_cell; i++)
+                mass_matrix.add(local_dof_indices[i],
+                                local_dof_indices[i],
+                                cell_matrix(i, i));
+            }
+    mass_matrix.compress(VectorOperation::add);
+  }
+
+
+  // @sect4{PlasticityContactProblem::update_solution_and_constraints}
+
+  // The following function is the first function we call in each Newton
+  // iteration in the <code>solve_newton()</code> function. What it does is
+  // to project the solution onto the feasible set and update the active set
+  // for the degrees of freedom that touch or penetrate the obstacle.
+  //
+  // In order to function, we first need to do some bookkeeping: We need
+  // to write into the solution vector (which we can only do with fully
+  // distributed vectors without ghost elements) and we need to read
+  // the Lagrange multiplier and the elements of the diagonal mass matrix
+  // from their respective vectors (which we can only do with vectors that
+  // do have ghost elements), so we create the respective vectors. We then
+  // also initialize the constraints object that will contain constraints
+  // from contact and all other sources, as well as an object that contains
+  // an index set of all locally owned degrees of freedom that are part of
+  // the contact:
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::update_solution_and_constraints ()
+  {
+    std::vector<bool> dof_touched(dof_handler.n_dofs(), false);
+
+    TrilinosWrappers::MPI::Vector distributed_solution(locally_owned_dofs, mpi_communicator);
+    distributed_solution = solution;
+
+    TrilinosWrappers::MPI::Vector lambda(locally_relevant_dofs, mpi_communicator);
+    lambda = newton_rhs_uncondensed;
+
+    TrilinosWrappers::MPI::Vector diag_mass_matrix_vector_relevant(locally_relevant_dofs, mpi_communicator);
+    diag_mass_matrix_vector_relevant = diag_mass_matrix_vector;
+
+
+    all_constraints.reinit(locally_relevant_dofs);
+    active_set.clear();
+
+    // The second part is a loop over all cells in which we look at each
+    // point where a degree of freedom is defined whether the active set
+    // condition is true and we need to add this degree of freedom to
+    // the active set of contact nodes. As we always do, if we want to
+    // evaluate functions at individual points, we do this with an
+    // FEValues object (or, here, an FEFaceValues object since we need to
+    // check contact at the surface) with an appropriately chosen quadrature
+    // object. We create this face quadrature object by choosing the
+    // "support points" of the shape functions defined on the faces
+    // of cells (for more on support points, see this
+    // @ref GlossSupport "glossary entry"). As a consequence, we have as
+    // many quadrature points as there are shape functions per face and
+    // looping over quadrature points is equivalent to looping over shape
+    // functions defined on a face. With this, the code looks as follows:
+    Quadrature<dim-1> face_quadrature(fe.get_unit_face_support_points());
+    FEFaceValues<dim> fe_values_face(fe, face_quadrature,
+                                     update_quadrature_points);
+
+    const unsigned int dofs_per_face = fe.dofs_per_face;
+    const unsigned int n_face_q_points = face_quadrature.size();
+
+    std::vector<types::global_dof_index> dof_indices(dofs_per_face);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell != endc; ++cell)
+      if (!cell->is_artificial())
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          if (cell->face(face)->at_boundary()
+              &&
+              cell->face(face)->boundary_id() == 1)
+            {
+              fe_values_face.reinit(cell, face);
+              cell->face(face)->get_dof_indices(dof_indices);
+
+              for (unsigned int q_point=0; q_point<n_face_q_points; ++q_point)
+                {
+                  // At each quadrature point (i.e., at each support point of a degree
+                  // of freedom located on the contact boundary), we then ask whether
+                  // it is part of the z-displacement degrees of freedom and if we
+                  // haven't encountered this degree of freedom yet (which can happen
+                  // for those on the edges between faces), we need to evaluate the gap
+                  // between the deformed object and the obstacle. If the active set
+                  // condition is true, then we add a constraint to the ConstraintMatrix
+                  // object that the next Newton update needs to satisfy, set the solution
+                  // vector's corresponding element to the correct value, and add the
+                  // index to the IndexSet object that stores which degree of freedom is
+                  // part of the contact:
+                  const unsigned int
+                  component = fe.face_system_to_component_index(q_point).first;
+
+                  const unsigned int index_z = dof_indices[q_point];
+
+                  if ((component == 2) && (dof_touched[index_z] == false))
+                    {
+                      dof_touched[index_z] = true;
+
+                      const Point<dim> this_support_point = fe_values_face.quadrature_point(q_point);
+
+                      const double obstacle_value = obstacle->value(this_support_point, 2);
+                      const double solution_here = solution(index_z);
+                      const double undeformed_gap = obstacle_value - this_support_point(2);
+
+                      const double c = 100.0 * e_modulus;
+                      if ((lambda(index_z) / diag_mass_matrix_vector_relevant(index_z)
+                           +
+                           c * (solution_here - undeformed_gap)
+                           > 0)
+                          &&
+                          !constraints_hanging_nodes.is_constrained(index_z))
+                        {
+                          all_constraints.add_line(index_z);
+                          all_constraints.set_inhomogeneity(index_z, undeformed_gap);
+                          distributed_solution(index_z) = undeformed_gap;
+
+                          active_set.add_index(index_z);
+                        }
+                    }
+                }
+            }
+
+    // At the end of this function, we exchange data between processors updating
+    // those ghost elements in the <code>solution</code> variable that have been
+    // written by other processors. We then merge the Dirichlet constraints and
+    // those from hanging nodes into the ConstraintMatrix object that already
+    // contains the active set. We finish the function by outputting the total
+    // number of actively constrained degrees of freedom for which we sum over
+    // the number of actively constrained degrees of freedom owned by each
+    // of the processors. This number of locally owned constrained degrees of
+    // freedom is of course the number of elements of the intersection of the
+    // active set and the set of locally owned degrees of freedom, which
+    // we can get by using <code>operator&</code> on two IndexSets:
+    distributed_solution.compress(VectorOperation::insert);
+    solution = distributed_solution;
+
+    all_constraints.close();
+    all_constraints.merge(constraints_dirichlet_and_hanging_nodes);
+
+    pcout << "         Size of active set: "
+          << Utilities::MPI::sum((active_set & locally_owned_dofs).n_elements(),
+                                 mpi_communicator)
+          << std::endl;
+  }
+
+
+  // @sect4{PlasticityContactProblem::assemble_newton_system}
+
+  // Given the complexity of the problem, it may come as a bit of a surprise
+  // that assembling the linear system we have to solve in each Newton iteration
+  // is actually fairly straightforward. The following function builds the Newton
+  // right hand side and Newton matrix. It looks fairly innocent because the
+  // heavy lifting happens in the call to
+  // <code>ConstitutiveLaw::get_linearized_stress_strain_tensors()</code> and in
+  // particular in ConstraintMatrix::distribute_local_to_global(), using the
+  // constraints we have previously computed.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::
+  assemble_newton_system (const TrilinosWrappers::MPI::Vector &linearization_point)
+  {
+    TimerOutput::Scope t(computing_timer, "Assembling");
+
+    QGauss<dim> quadrature_formula(fe.degree + 1);
+    QGauss<dim - 1> face_quadrature_formula(fe.degree + 1);
+
+    FEValues<dim> fe_values(fe, quadrature_formula,
+                            update_values | update_gradients | update_JxW_values);
+
+    FEFaceValues<dim> fe_values_face(fe, face_quadrature_formula,
+                                     update_values | update_quadrature_points | update_JxW_values);
+
+    const unsigned int dofs_per_cell   = fe.dofs_per_cell;
+    const unsigned int n_q_points      = quadrature_formula.size();
+    const unsigned int n_face_q_points = face_quadrature_formula.size();
+
+    const EquationData::BoundaryForce<dim> boundary_force;
+    std::vector<Vector<double> >           boundary_force_values(n_face_q_points,
+        Vector<double>(dim));
+
+    FullMatrix<double>                     cell_matrix(dofs_per_cell, dofs_per_cell);
+    Vector<double>                         cell_rhs(dofs_per_cell);
+
+    std::vector<types::global_dof_index>   local_dof_indices(dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    const FEValuesExtractors::Vector displacement(0);
+
+    for (; cell != endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          fe_values.reinit(cell);
+          cell_matrix = 0;
+          cell_rhs = 0;
+
+          std::vector<SymmetricTensor<2, dim> > strain_tensor(n_q_points);
+          fe_values[displacement].get_function_symmetric_gradients(linearization_point,
+                                                                   strain_tensor);
+
+          for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+            {
+              SymmetricTensor<4, dim> stress_strain_tensor_linearized;
+              SymmetricTensor<4, dim> stress_strain_tensor;
+              constitutive_law.get_linearized_stress_strain_tensors(strain_tensor[q_point],
+                                                                    stress_strain_tensor_linearized,
+                                                                    stress_strain_tensor);
+
+              for (unsigned int i = 0; i < dofs_per_cell; ++i)
+                {
+                  // Having computed the stress-strain tensor and its linearization,
+                  // we can now put together the parts of the matrix and right hand side.
+                  // In both, we need the linearized stress-strain tensor times the
+                  // symmetric gradient of $\varphi_i$, i.e. the term $I_\Pi\varepsilon(\varphi_i)$,
+                  // so we introduce an abbreviation of this term. Recall that the
+                  // matrix corresponds to the bilinear form
+                  // $A_{ij}=(I_\Pi\varepsilon(\varphi_i),\varepsilon(\varphi_j))$ in the
+                  // notation of the accompanying publication, whereas the right
+                  // hand side is $F_i=([I_\Pi-P_\Pi C]\varepsilon(\varphi_i),\varepsilon(\mathbf u))$
+                  // where $u$ is the current linearization points (typically the last solution).
+                  // This might suggest that the right hand side will be zero if the material
+                  // is completely elastic (where $I_\Pi=P_\Pi$) but this ignores the fact
+                  // that the right hand side will also contain contributions from
+                  // non-homogeneous constraints due to the contact.
+                  //
+                  // The code block that follows this adds contributions that are due to
+                  // boundary forces, should there be any.
+                  const SymmetricTensor<2, dim>
+                  stress_phi_i = stress_strain_tensor_linearized
+                                 * fe_values[displacement].symmetric_gradient(i, q_point);
+
+                  for (unsigned int j = 0; j < dofs_per_cell; ++j)
+                    cell_matrix(i, j) += (stress_phi_i
+                                          * fe_values[displacement].symmetric_gradient(j, q_point)
+                                          * fe_values.JxW(q_point));
+
+                  cell_rhs(i) += ((stress_phi_i
+                                   -
+                                   stress_strain_tensor
+                                   * fe_values[displacement].symmetric_gradient(i, q_point))
+                                  * strain_tensor[q_point]
+                                  * fe_values.JxW(q_point));
+                }
+            }
+
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->at_boundary()
+                &&
+                cell->face(face)->boundary_id() == 1)
+              {
+                fe_values_face.reinit(cell, face);
+
+                boundary_force.vector_value_list(fe_values_face.get_quadrature_points(),
+                                                 boundary_force_values);
+
+                for (unsigned int q_point=0; q_point<n_face_q_points; ++q_point)
+                  {
+                    Tensor<1, dim> rhs_values;
+                    rhs_values[2] = boundary_force_values[q_point][2];
+                    for (unsigned int i = 0; i < dofs_per_cell; ++i)
+                      cell_rhs(i) += (fe_values_face[displacement].value(i, q_point)
+                                      * rhs_values
+                                      * fe_values_face.JxW(q_point));
+                  }
+              }
+
+          cell->get_dof_indices(local_dof_indices);
+          all_constraints.distribute_local_to_global(cell_matrix, cell_rhs,
+                                                     local_dof_indices,
+                                                     newton_matrix,
+                                                     newton_rhs,
+                                                     true);
+
+        }
+
+    newton_matrix.compress(VectorOperation::add);
+    newton_rhs.compress(VectorOperation::add);
+  }
+
+
+
+  // @sect4{PlasticityContactProblem::compute_nonlinear_residual}
+
+  // The following function computes the nonlinear residual of the equation
+  // given the current solution (or any other linearization point). This
+  // is needed in the linear search algorithm where we need to try various
+  // linear combinations of previous and current (trial) solution to
+  // compute the (real, globalized) solution of the current Newton step.
+  //
+  // That said, in a slight abuse of the name of the function, it actually
+  // does significantly more. For example, it also computes the vector
+  // that corresponds to the Newton residual but without eliminating
+  // constrained degrees of freedom. We need this vector to compute contact
+  // forces and, ultimately, to compute the next active set. Likewise, by
+  // keeping track of how many quadrature points we encounter on each cell
+  // that show plastic yielding, we also compute the
+  // <code>fraction_of_plastic_q_points_per_cell</code> vector that we
+  // can later output to visualize the plastic zone. In both of these cases,
+  // the results are not necessary as part of the line search, and so we may
+  // be wasting a small amount of time computing them. At the same time, this
+  // information appears as a natural by-product of what we need to do here
+  // anyway, and we want to collect it once at the end of each Newton
+  // step, so we may as well do it here.
+  //
+  // The actual implementation of this function should be rather obvious:
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::
+  compute_nonlinear_residual (const TrilinosWrappers::MPI::Vector &linearization_point)
+  {
+    QGauss<dim>   quadrature_formula(fe.degree + 1);
+    QGauss<dim-1> face_quadrature_formula(fe.degree + 1);
+
+    FEValues<dim> fe_values(fe, quadrature_formula,
+                            update_values | update_gradients |
+                            update_JxW_values);
+
+    FEFaceValues<dim> fe_values_face(fe, face_quadrature_formula,
+                                     update_values | update_quadrature_points |
+                                     update_JxW_values);
+
+    const unsigned int dofs_per_cell   = fe.dofs_per_cell;
+    const unsigned int n_q_points      = quadrature_formula.size();
+    const unsigned int n_face_q_points = face_quadrature_formula.size();
+
+    const EquationData::BoundaryForce<dim> boundary_force;
+    std::vector<Vector<double> >           boundary_force_values(n_face_q_points,
+        Vector<double>(dim));
+
+    Vector<double> cell_rhs(dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
+
+    const FEValuesExtractors::Vector displacement(0);
+
+    newton_rhs                            = 0;
+    newton_rhs_uncondensed                = 0;
+
+    fraction_of_plastic_q_points_per_cell = 0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    unsigned int cell_number = 0;
+    for (; cell != endc; ++cell, ++cell_number)
+      if (cell->is_locally_owned())
+        {
+          fe_values.reinit(cell);
+          cell_rhs = 0;
+
+          std::vector<SymmetricTensor<2, dim> > strain_tensors(n_q_points);
+          fe_values[displacement].get_function_symmetric_gradients(linearization_point,
+                                                                   strain_tensors);
+
+          for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+            {
+              SymmetricTensor<4, dim> stress_strain_tensor;
+              const bool q_point_is_plastic
+                = constitutive_law.get_stress_strain_tensor(strain_tensors[q_point],
+                                                            stress_strain_tensor);
+              if (q_point_is_plastic)
+                ++fraction_of_plastic_q_points_per_cell(cell_number);
+
+              for (unsigned int i = 0; i < dofs_per_cell; ++i)
+                {
+                  cell_rhs(i) -= (strain_tensors[q_point]
+                                  * stress_strain_tensor
+                                  * fe_values[displacement].symmetric_gradient(i, q_point)
+                                  * fe_values.JxW(q_point));
+
+                  Tensor<1, dim> rhs_values;
+                  rhs_values = 0;
+                  cell_rhs(i) += (fe_values[displacement].value(i, q_point)
+                                  * rhs_values
+                                  * fe_values.JxW(q_point));
+                }
+            }
+
+          for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->at_boundary()
+                && cell->face(face)->boundary_id() == 1)
+              {
+                fe_values_face.reinit(cell, face);
+
+                boundary_force.vector_value_list(fe_values_face.get_quadrature_points(),
+                                                 boundary_force_values);
+
+                for (unsigned int q_point = 0; q_point < n_face_q_points;
+                     ++q_point)
+                  {
+                    Tensor<1, dim> rhs_values;
+                    rhs_values[2] = boundary_force_values[q_point][2];
+                    for (unsigned int i = 0; i < dofs_per_cell; ++i)
+                      cell_rhs(i) += (fe_values_face[displacement].value(i, q_point) * rhs_values
+                                      * fe_values_face.JxW(q_point));
+                  }
+              }
+
+          cell->get_dof_indices(local_dof_indices);
+          constraints_dirichlet_and_hanging_nodes.distribute_local_to_global(cell_rhs,
+              local_dof_indices,
+              newton_rhs);
+
+          for (unsigned int i = 0; i < dofs_per_cell; ++i)
+            newton_rhs_uncondensed(local_dof_indices[i]) += cell_rhs(i);
+        }
+
+    fraction_of_plastic_q_points_per_cell /= quadrature_formula.size();
+    newton_rhs.compress(VectorOperation::add);
+    newton_rhs_uncondensed.compress(VectorOperation::add);
+  }
+
+
+
+
+
+  // @sect4{PlasticityContactProblem::solve_newton_system}
+
+  // The last piece before we can discuss the actual Newton iteration
+  // on a single mesh is the solver for the linear systems. There are
+  // a couple of complications that slightly obscure the code, but
+  // mostly it is just setup then solve. Among the complications are:
+  //
+  // - For the hanging nodes we have to apply
+  //   the ConstraintMatrix::set_zero function to newton_rhs.
+  //   This is necessary if a hanging node with solution value $x_0$
+  //   has one neighbor with value $x_1$ which is in contact with the
+  //   obstacle and one neighbor $x_2$ which is not in contact. Because
+  //   the update for the former will be prescribed, the hanging node constraint
+  //   will have an inhomogeneity and will look like $x_0 = x_1/2 + \text{gap}/2$.
+  //   So the corresponding entries in the
+  //   ride-hang-side are non-zero with a
+  //   meaningless value. These values we have to
+  //   to set to zero.
+  // - Like in step-40, we need to shuffle between vectors that do and do
+  //   do not have ghost elements when solving or using the solution.
+  //
+  // The rest of the function is similar to step-40 and
+  // step-41 except that we use a BiCGStab solver
+  // instead of CG. This is due to the fact that for very small hardening
+  // parameters $\gamma$, the linear system becomes almost semidefinite though
+  // still symmetric. BiCGStab appears to have an easier time with such linear
+  // systems.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::solve_newton_system ()
+  {
+    TimerOutput::Scope t(computing_timer, "Solve");
+
+    TrilinosWrappers::MPI::Vector distributed_solution(locally_owned_dofs, mpi_communicator);
+    distributed_solution = solution;
+
+    constraints_hanging_nodes.set_zero(distributed_solution);
+    constraints_hanging_nodes.set_zero(newton_rhs);
+
+    TrilinosWrappers::PreconditionAMG preconditioner;
+    {
+      TimerOutput::Scope t(computing_timer, "Solve: setup preconditioner");
+
+      std::vector<std::vector<bool> > constant_modes;
+      DoFTools::extract_constant_modes(dof_handler, ComponentMask(),
+                                       constant_modes);
+
+      TrilinosWrappers::PreconditionAMG::AdditionalData additional_data;
+      additional_data.constant_modes = constant_modes;
+      additional_data.elliptic = true;
+      additional_data.n_cycles = 1;
+      additional_data.w_cycle = false;
+      additional_data.output_details = false;
+      additional_data.smoother_sweeps = 2;
+      additional_data.aggregation_threshold = 1e-2;
+
+      preconditioner.initialize(newton_matrix, additional_data);
+    }
+
+    {
+      TimerOutput::Scope t(computing_timer, "Solve: iterate");
+
+      TrilinosWrappers::MPI::Vector tmp(locally_owned_dofs, mpi_communicator);
+
+      const double relative_accuracy = 1e-8;
+      const double solver_tolerance  = relative_accuracy
+                                       * newton_matrix.residual(tmp, distributed_solution,
+                                                                newton_rhs);
+
+      SolverControl solver_control(newton_matrix.m(),
+                                   solver_tolerance);
+      SolverBicgstab<TrilinosWrappers::MPI::Vector> solver(solver_control);
+      solver.solve(newton_matrix, distributed_solution,
+                   newton_rhs, preconditioner);
+
+      pcout << "         Error: " << solver_control.initial_value()
+            << " -> " << solver_control.last_value() << " in "
+            << solver_control.last_step() << " Bicgstab iterations."
+            << std::endl;
+    }
+
+    all_constraints.distribute(distributed_solution);
+
+    solution = distributed_solution;
+  }
+
+
+  // @sect4{PlasticityContactProblem::solve_newton}
+
+  // This is, finally, the function that implements the damped Newton method
+  // on the current mesh. There are two nested loops: the outer loop for the Newton
+  // iteration and the inner loop for the line search which
+  // will be used only if necessary. To obtain a good and reasonable
+  // starting value we solve an elastic problem in the very first Newton step on each
+  // mesh (or only on the first mesh if we transfer solutions between meshes). We
+  // do so by setting the yield stress to an unreasonably large value in these
+  // iterations and then setting it back to the correct value in subsequent
+  // iterations.
+  //
+  // Other than this, the top part of this function should be
+  // reasonably obvious. We initialize the variable
+  // <code>previous_residual_norm</code> to the most negative value
+  // representable with double precision numbers so that the
+  // comparison whether the current residual is less than that of the
+  // previous step will always fail in the first step.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::solve_newton ()
+  {
+    TrilinosWrappers::MPI::Vector old_solution(locally_owned_dofs, mpi_communicator);
+    TrilinosWrappers::MPI::Vector residual(locally_owned_dofs, mpi_communicator);
+    TrilinosWrappers::MPI::Vector tmp_vector(locally_owned_dofs, mpi_communicator);
+    TrilinosWrappers::MPI::Vector locally_relevant_tmp_vector(locally_relevant_dofs, mpi_communicator);
+    TrilinosWrappers::MPI::Vector distributed_solution(locally_owned_dofs, mpi_communicator);
+
+    double residual_norm;
+    double previous_residual_norm = -std::numeric_limits<double>::max();
+
+    const double correct_sigma = sigma_0;
+
+    IndexSet old_active_set(active_set);
+
+    for (unsigned int newton_step = 1; newton_step <= 100; ++newton_step)
+      {
+        if (newton_step == 1
+            &&
+            ((transfer_solution && current_refinement_cycle == 0)
+             ||
+             !transfer_solution))
+          constitutive_law.set_sigma_0(1e+10);
+        else if (newton_step == 2
+                 ||
+                 current_refinement_cycle > 0
+                 ||
+                 !transfer_solution)
+          constitutive_law.set_sigma_0(correct_sigma);
+
+        pcout << " " << std::endl;
+        pcout << "   Newton iteration " << newton_step << std::endl;
+        pcout << "      Updating active set..." << std::endl;
+
+        {
+          TimerOutput::Scope t(computing_timer, "update active set");
+          update_solution_and_constraints();
+        }
+
+        pcout << "      Assembling system... " << std::endl;
+        newton_matrix = 0;
+        newton_rhs = 0;
+        assemble_newton_system(solution);
+
+        pcout << "      Solving system... " << std::endl;
+        solve_newton_system();
+
+        // It gets a bit more hairy after we have computed the
+        // trial solution $\tilde{\mathbf u}$ of the current Newton step.
+        // We handle a highly nonlinear problem so we have to damp
+        // Newton's method using a line search. To understand how we do this,
+        // recall that in our formulation, we compute a trial solution
+        // in each Newton step and not the update between old and new solution.
+        // Since the solution set is a convex set, we will use a line
+        // search that tries linear combinations of the
+        // previous and the trial solution to guarantee that the
+        // damped solution is in our solution set again.
+        // At most we apply 5 damping steps.
+        //
+        // There are exceptions to when we use a line search. First,
+        // if this is the first Newton step on any mesh, then we don't have
+        // any point to compare the residual to, so we always accept a full
+        // step. Likewise, if this is the second Newton step on the first mesh (or
+        // the second on any mesh if we don't transfer solutions from
+        // mesh to mesh), then we have computed the first of these steps using
+        // just an elastic model (see how we set the yield stress sigma to
+        // an unreasonably large value above). In this case, the first Newton
+        // solution was a purely elastic one, the second one a plastic one,
+        // and any linear combination would not necessarily be expected to
+        // lie in the feasible set -- so we just accept the solution we just
+        // got.
+        //
+        // In either of these two cases, we bypass the line search and just
+        // update residual and other vectors as necessary.
+        if ((newton_step==1)
+            ||
+            (transfer_solution && newton_step == 2 && current_refinement_cycle == 0)
+            ||
+            (!transfer_solution && newton_step == 2))
+          {
+            compute_nonlinear_residual(solution);
+            old_solution = solution;
+
+            residual = newton_rhs;
+            const unsigned int start_res = (residual.local_range().first),
+                               end_res = (residual.local_range().second);
+            for (unsigned int n = start_res; n < end_res; ++n)
+              if (all_constraints.is_inhomogeneously_constrained(n))
+                residual(n) = 0;
+
+            residual.compress(VectorOperation::insert);
+
+            residual_norm = residual.l2_norm();
+
+            pcout << "      Accepting Newton solution with residual: "
+                  << residual_norm << std::endl;
+          }
+        else
+          {
+            for (unsigned int i = 0; i < 5; i++)
+              {
+                distributed_solution = solution;
+
+                const double alpha = std::pow(0.5, static_cast<double>(i));
+                tmp_vector = old_solution;
+                tmp_vector.sadd(1 - alpha, alpha, distributed_solution);
+
+                TimerOutput::Scope t(computing_timer, "Residual and lambda");
+
+                locally_relevant_tmp_vector = tmp_vector;
+                compute_nonlinear_residual(locally_relevant_tmp_vector);
+                residual = newton_rhs;
+
+                const unsigned int start_res = (residual.local_range().first),
+                                   end_res = (residual.local_range().second);
+                for (unsigned int n = start_res; n < end_res; ++n)
+                  if (all_constraints.is_inhomogeneously_constrained(n))
+                    residual(n) = 0;
+
+                residual.compress(VectorOperation::insert);
+
+                residual_norm = residual.l2_norm();
+
+                pcout << "      Residual of the non-contact part of the system: "
+                      << residual_norm << std::endl
+                      << "         with a damping parameter alpha = " << alpha
+                      << std::endl;
+
+                if (residual_norm < previous_residual_norm)
+                  break;
+              }
+
+            solution = tmp_vector;
+            old_solution = solution;
+          }
+
+        old_active_set = active_set;
+        previous_residual_norm = residual_norm;
+
+
+        // The final step is to check for convergence. If the active set
+        // has not changed across all processors and the residual is
+        // less than a threshold of $10^{-10}$, then we terminate
+        // the iteration on the current mesh:
+        if (Utilities::MPI::sum((active_set == old_active_set) ? 0 : 1,
+                                mpi_communicator) == 0)
+          {
+            pcout << "      Active set did not change!" << std::endl;
+            if (residual_norm < 1e-10)
+              break;
+          }
+      }
+  }
+
+  // @sect4{PlasticityContactProblem::refine_grid}
+
+  // If you've made it this far into the deal.II tutorial, the following
+  // function refining the mesh should not pose any challenges to you
+  // any more. It refines the mesh, either globally or using the Kelly
+  // error estimator, and if so asked also transfers the solution from
+  // the previous to the next mesh. In the latter case, we also need
+  // to compute the active set and other quantities again, for which we
+  // need the information computed by <code>compute_nonlinear_residual()</code>.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::refine_grid ()
+  {
+    if (refinement_strategy == RefinementStrategy::refine_global)
+      {
+        for (typename Triangulation<dim>::active_cell_iterator
+             cell = triangulation.begin_active();
+             cell != triangulation.end(); ++cell)
+          if (cell->is_locally_owned())
+            cell->set_refine_flag ();
+      }
+    else
+      {
+        Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
+        KellyErrorEstimator<dim>::estimate(dof_handler,
+                                           QGauss<dim - 1>(fe.degree + 2),
+                                           typename FunctionMap<dim>::type(),
+                                           solution,
+                                           estimated_error_per_cell);
+
+        parallel::distributed::GridRefinement
+        ::refine_and_coarsen_fixed_number(triangulation,
+                                          estimated_error_per_cell,
+                                          0.3, 0.03);
+      }
+
+    triangulation.prepare_coarsening_and_refinement();
+
+    parallel::distributed::SolutionTransfer<dim,
+             TrilinosWrappers::MPI::Vector> solution_transfer(dof_handler);
+    if (transfer_solution)
+      solution_transfer.prepare_for_coarsening_and_refinement(solution);
+
+    triangulation.execute_coarsening_and_refinement();
+
+    setup_system();
+
+    if (transfer_solution)
+      {
+        TrilinosWrappers::MPI::Vector distributed_solution(locally_owned_dofs, mpi_communicator);
+        solution_transfer.interpolate(distributed_solution);
+
+        // enforce constraints to make the interpolated solution conforming on
+        // the new mesh:
+        constraints_hanging_nodes.distribute(distributed_solution);
+
+        solution = distributed_solution;
+        compute_nonlinear_residual(solution);
+      }
+  }
+
+
+  // @sect4{PlasticityContactProblem::move_mesh}
+
+  // The remaining three functions before we get to <code>run()</code>
+  // have to do with generating output. The following one is an attempt
+  // at showing the deformed body in its deformed configuration. To this
+  // end, this function takes a displacement vector field and moves every
+  // vertex of the (local part) of the mesh by the previously computed
+  // displacement. We will call this function with the current
+  // displacement field before we generate graphical output, and we will
+  // call it again after generating graphical output with the negative
+  // displacement field to undo the changes to the mesh so made.
+  //
+  // The function itself is pretty straightforward. All we have to do
+  // is keep track which vertices we have already touched, as we
+  // encounter the same vertices multiple times as we loop over cells.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::
+  move_mesh (const TrilinosWrappers::MPI::Vector &displacement) const
+  {
+    std::vector<bool> vertex_touched(triangulation.n_vertices(), false);
+
+    for (typename DoFHandler<dim>::active_cell_iterator cell =
+           dof_handler.begin_active();
+         cell != dof_handler.end(); ++cell)
+      if (cell->is_locally_owned())
+        for (unsigned int v = 0; v < GeometryInfo<dim>::vertices_per_cell; ++v)
+          if (vertex_touched[cell->vertex_index(v)] == false)
+            {
+              vertex_touched[cell->vertex_index(v)] = true;
+
+              Point<dim> vertex_displacement;
+              for (unsigned int d = 0; d < dim; ++d)
+                vertex_displacement[d] = displacement(cell->vertex_dof_index(v, d));
+
+              cell->vertex(v) += vertex_displacement;
+            }
+  }
+
+
+
+  // @sect4{PlasticityContactProblem::output_results}
+
+  // Next is the function we use to actually generate graphical output. The
+  // function is a bit tedious, but not actually particularly complicated.
+  // It moves the mesh at the top (and moves it back at the end), then
+  // computes the contact forces along the contact surface. We can do
+  // so (as shown in the accompanying paper) by taking the untreated
+  // residual vector and identifying which degrees of freedom
+  // correspond to those with contact by asking whether they have an
+  // inhomogeneous constraints associated with them. As always, we need
+  // to be mindful that we can only write into completely distributed
+  // vectors (i.e., vectors without ghost elements) but that when we
+  // want to generate output, we need vectors that do indeed have
+  // ghost entries for all locally relevant degrees of freedom.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::output_results (const std::string &filename_base)
+  {
+    TimerOutput::Scope t(computing_timer, "Graphical output");
+
+    pcout << "      Writing graphical output... " << std::flush;
+
+    move_mesh(solution);
+
+    // Calculation of the contact forces
+    TrilinosWrappers::MPI::Vector distributed_lambda(locally_owned_dofs, mpi_communicator);
+    const unsigned int start_res = (newton_rhs_uncondensed.local_range().first),
+                       end_res = (newton_rhs_uncondensed.local_range().second);
+    for (unsigned int n = start_res; n < end_res; ++n)
+      if (all_constraints.is_inhomogeneously_constrained(n))
+        distributed_lambda(n) = newton_rhs_uncondensed(n) /
+                                diag_mass_matrix_vector(n);
+    distributed_lambda.compress(VectorOperation::insert);
+    constraints_hanging_nodes.distribute(distributed_lambda);
+
+    TrilinosWrappers::MPI::Vector lambda(locally_relevant_dofs, mpi_communicator);
+    lambda = distributed_lambda;
+
+
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler(dof_handler);
+
+    const std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    data_component_interpretation(dim, DataComponentInterpretation::component_is_part_of_vector);
+    data_out.add_data_vector(solution,
+                             std::vector<std::string> (dim, "displacement"),
+                             DataOut<dim>::type_dof_data, data_component_interpretation);
+    data_out.add_data_vector(lambda,
+                             std::vector<std::string> (dim, "contact_force"),
+                             DataOut<dim>::type_dof_data, data_component_interpretation);
+    data_out.add_data_vector(active_set,
+                             std::vector<std::string> (dim, "active_set"),
+                             DataOut<dim>::type_dof_data, data_component_interpretation);
+
+    Vector<float> subdomain(triangulation.n_active_cells());
+    for (unsigned int i = 0; i < subdomain.size(); ++i)
+      subdomain(i) = triangulation.locally_owned_subdomain();
+    data_out.add_data_vector(subdomain, "subdomain");
+
+    data_out.add_data_vector(fraction_of_plastic_q_points_per_cell,
+                             "fraction_of_plastic_q_points");
+
+    data_out.build_patches();
+
+    // In the remainder of the function, we generate one VTU file on
+    // every processor, indexed by the subdomain id of this processor.
+    // On the first processor, we then also create a <code>.pvtu</code>
+    // file that indexes <i>all</i> of the VTU files so that the entire
+    // set of output files can be read at once. These <code>.pvtu</code>
+    // are used by Paraview to describe an entire parallel computation's
+    // output files. We then do the same again for the competitor of
+    // Paraview, the Visit visualization program, by creating a matching
+    // <code>.visit</code> file.
+    const std::string filename =
+      (output_dir + filename_base + "-"
+       + Utilities::int_to_string(triangulation.locally_owned_subdomain(), 4));
+
+    std::ofstream output_vtu((filename + ".vtu").c_str());
+    data_out.write_vtu(output_vtu);
+    pcout << output_dir + filename_base << ".pvtu" << std::endl;
+
+    if (Utilities::MPI::this_mpi_process(mpi_communicator) == 0)
+      {
+        std::vector<std::string> filenames;
+        for (unsigned int i = 0;
+             i < Utilities::MPI::n_mpi_processes(mpi_communicator); ++i)
+          filenames.push_back(filename_base + "-" +
+                              Utilities::int_to_string(i, 4) +
+                              ".vtu");
+
+        std::ofstream pvtu_master_output((output_dir + filename_base + ".pvtu").c_str());
+        data_out.write_pvtu_record(pvtu_master_output, filenames);
+
+        std::ofstream visit_master_output((output_dir + filename_base + ".visit").c_str());
+        data_out.write_visit_record(visit_master_output, filenames);
+      }
+
+    TrilinosWrappers::MPI::Vector tmp(solution);
+    tmp *= -1;
+    move_mesh(tmp);
+  }
+
+
+  // @sect4{PlasticityContactProblem::output_contact_force}
+
+  // This last auxiliary function computes the contact force by
+  // calculating an integral over the contact pressure in z-direction
+  // over the contact area. For this purpose we set the contact
+  // pressure lambda to 0 for all inactive dofs (whether a degree
+  // of freedom is part of the contact is determined just as
+  // we did in the previous function). For all
+  // active dofs, lambda contains the quotient of the nonlinear
+  // residual (newton_rhs_uncondensed) and corresponding diagonal entry
+  // of the mass matrix (diag_mass_matrix_vector). Because it is
+  // not unlikely that hanging nodes show up in the contact area
+  // it is important to apply constraints_hanging_nodes.distribute
+  // to the distributed_lambda vector.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::output_contact_force () const
+  {
+    TrilinosWrappers::MPI::Vector distributed_lambda(locally_owned_dofs, mpi_communicator);
+    const unsigned int start_res = (newton_rhs_uncondensed.local_range().first),
+                       end_res = (newton_rhs_uncondensed.local_range().second);
+    for (unsigned int n = start_res; n < end_res; ++n)
+      if (all_constraints.is_inhomogeneously_constrained(n))
+        distributed_lambda(n) = newton_rhs_uncondensed(n) / diag_mass_matrix_vector(n);
+      else
+        distributed_lambda(n) = 0;
+    distributed_lambda.compress(VectorOperation::insert);
+    constraints_hanging_nodes.distribute(distributed_lambda);
+
+    TrilinosWrappers::MPI::Vector lambda(locally_relevant_dofs, mpi_communicator);
+    lambda = distributed_lambda;
+
+    double contact_force = 0.0;
+
+    QGauss<dim-1> face_quadrature_formula(fe.degree + 1);
+    FEFaceValues<dim> fe_values_face(fe, face_quadrature_formula,
+                                     update_values | update_JxW_values);
+
+    const unsigned int n_face_q_points = face_quadrature_formula.size();
+
+    const FEValuesExtractors::Vector displacement(0);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell != endc; ++cell)
+      if (cell->is_locally_owned())
+        for (unsigned int face = 0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          if (cell->face(face)->at_boundary()
+              &&
+              cell->face(face)->boundary_id() == 1)
+            {
+              fe_values_face.reinit(cell, face);
+
+              std::vector<Tensor<1, dim> > lambda_values(n_face_q_points);
+              fe_values_face[displacement].get_function_values(lambda,
+                                                               lambda_values);
+
+              for (unsigned int q_point=0; q_point<n_face_q_points; ++q_point)
+                contact_force += lambda_values[q_point][2]
+                                 * fe_values_face.JxW(q_point);
+            }
+    contact_force = Utilities::MPI::sum(contact_force, MPI_COMM_WORLD);
+
+    pcout << "Contact force = " << contact_force << std::endl;
+  }
+
+
+  // @sect4{PlasticityContactProblem::run}
+
+  // As in all other tutorial programs, the <code>run()</code> function contains
+  // the overall logic. There is not very much to it here: in essence, it
+  // performs the loops over all mesh refinement cycles, and within each, hands
+  // things over to the Newton solver in <code>solve_newton()</code> on the
+  // current mesh and calls the function that creates graphical output for
+  // the so-computed solution. It then outputs some statistics concerning both
+  // run times and memory consumption that has been collected over the course of
+  // computations on this mesh.
+  template <int dim>
+  void
+  PlasticityContactProblem<dim>::run ()
+  {
+    computing_timer.reset();
+    for (current_refinement_cycle = 0;
+         current_refinement_cycle < n_refinement_cycles;
+         ++current_refinement_cycle)
+      {
+        {
+          TimerOutput::Scope t(computing_timer, "Setup");
+
+          pcout << std::endl;
+          pcout << "Cycle " << current_refinement_cycle << ':' << std::endl;
+
+          if (current_refinement_cycle == 0)
+            {
+              make_grid();
+              setup_system();
+            }
+          else
+            {
+              TimerOutput::Scope t(computing_timer, "Setup: refine mesh");
+              refine_grid();
+            }
+        }
+
+        solve_newton();
+
+        output_results((std::string("solution-") +
+                        Utilities::int_to_string(current_refinement_cycle, 2)).c_str());
+
+        computing_timer.print_summary();
+        computing_timer.reset();
+
+        Utilities::System::MemoryStats stats;
+        Utilities::System::get_memory_stats(stats);
+        pcout << "Peak virtual memory used, resident in kB: " << stats.VmSize << " "
+              << stats.VmRSS << std::endl;
+
+        if (base_mesh == "box")
+          output_contact_force();
+      }
+  }
+}
+
+// @sect3{The <code>main</code> function}
+
+// There really isn't much to the <code>main()</code> function. It looks
+// like they always do:
+int main (int argc, char *argv[])
+{
+  using namespace dealii;
+  using namespace Step42;
+
+  try
+    {
+      ParameterHandler prm;
+      PlasticityContactProblem<3>::declare_parameters(prm);
+      if (argc != 2)
+        {
+          std::cerr << "*** Call this program as <./step-42 input.prm>" << std::endl;
+          return 1;
+        }
+
+      prm.read_input(argv[1]);
+      Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv,
+                                                           numbers::invalid_unsigned_int);
+      {
+        PlasticityContactProblem<3> problem(prm);
+        problem.run();
+      }
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-43/CMakeLists.txt b/examples/step-43/CMakeLists.txt
new file mode 100644
index 0000000..98ff06f
--- /dev/null
+++ b/examples/step-43/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-43 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-43")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_TRILINOS)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_TRILINOS = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-43/doc/builds-on b/examples/step-43/doc/builds-on
new file mode 100644
index 0000000..b27e0d3
--- /dev/null
+++ b/examples/step-43/doc/builds-on
@@ -0,0 +1 @@
+step-21 step-31
diff --git a/examples/step-43/doc/intro.dox b/examples/step-43/doc/intro.dox
new file mode 100644
index 0000000..658f197
--- /dev/null
+++ b/examples/step-43/doc/intro.dox
@@ -0,0 +1,609 @@
+<br>
+
+<i>
+This program was contributed by Chih-Che Chueh (University of Victoria) and
+Wolfgang Bangerth. Results from this program are used and discussed in the
+following publications (in particular in the second one):
+- Chih-Che Chueh, Marc Secanell, Wolfgang Bangerth, Ned Djilali. Multi-level
+  adaptive simulation of transient two-phase flow in heterogeneous porous
+  media. Computers & Fluids, 39:1585-1596, 2010
+- Chih-Che Chueh, Ned Djilali, Wolfgang Bangerth. An h-adaptive operator
+  splitting method for two-phase flow in 3D heterogeneous porous
+  media. SIAM Journal on Scientific Computing, 35:B149-B175, 2013.
+
+The implementation discussed here uses and extends
+parts of the step-21 and step-31 tutorial programs.
+
+The work of the Chih-Che Chueh was funded through the Canada Research Chairs
+Program and the MITACS Network of Centres of Excellence. Parts of the work by
+Wolfgang Bangerth were funded through Award No. KUS-C1-016-04, made by the King
+Abdullah University of Science and Technology, and through an Alfred P. Sloan
+Research Fellowship.
+This material is also in parts based upon work supported by the National
+Science Foundation under Award No. EAR-0426271 and The California Institute of
+Technology; and in a continuation by the National Science
+Foundation under Award No. EAR-0949446 and The University of California
+– Davis. Any opinions, findings, and conclusions or recommendations
+expressed in this publication are those of the author and do not
+necessarily reflect the views of the National Science Foundation, The
+California Institute of Technology, or of The University of California
+– Davis.
+</i>
+
+
+<a name="Intro"></a> <h1>Introduction</h1>
+
+The simulation of multiphase flow in porous media is a ubiquitous problem, and
+we have previously addressed it already in some form in step-20 and
+step-21. However, as was easy to see there, it faces two major difficulties:
+numerical accuracy and efficiency. The first is easy to see in the stationary
+solver step-20: using lowest order Raviart-Thomas elements can not be expected
+to yield highly accurate solutions. We need more accurate methods. The second
+reason is apparent from the time dependent step-21: that program is
+excruciatingly slow, and there is no hope to get highly accurate solutions in
+3d within reasonable time frames.
+
+In this
+program, in order to overcome these two problems, there are five areas which
+we are trying to improve for a high performance simulator:
+
+<ul>
+<li> Higher order spatial discretizations
+<li> Adaptive mesh refinement
+<li> Adaptive time stepping
+<li> Operator splitting
+<li> Efficient solver and preconditioning
+</ul>
+
+Much inspiration for this program comes from step-31 but several of the
+techniques discussed here are original.
+
+
+<h3>Advection-dominated two-phase flow mathematical model.</h3>
+
+We consider the flow of a two-phase immiscible, incompressible
+fluid. Capillary and gravity effects are neglected, and viscous
+effects are assumed dominant. The governing equations for such a
+flow that are identical to those used in step-21 and are
+ at f{align*}
+  \mathbf{u}_t &= - \mathbf{K} \lambda_t \left(S\right) \nabla p, \\
+  \nabla \cdot \mathbf{u}_t &= q, \\
+  \epsilon \frac{\partial S}{\partial t} + \nabla \cdot \left( \mathbf{u}_t  F\left( S \right) \right)&=0,
+ at f}
+where $S$ is the saturation (volume fraction between zero and one) of the second (wetting) phase, $p$ is the pressure, $\mathbf{K}$ is the permeability tensor, $\lambda_t$ is the total mobility, $\epsilon$ is the porosity, $F$ is the fractional flow of the wetting phase, $q$ is the source term and $\mathbf{u}_t$ is the total velocity. The total mobility, fractional flow of the wetting phase and total velocity are respectively given by
+ at f{align*}
+   \lambda_t(S)&= \lambda_w + \lambda_{nw} = \frac{k_{rw}(S)}{\mu_w} + \frac{k_{rnw}(S)}{\mu_{nw}}, \\
+   F(S) &= \frac{\lambda_w}{\lambda_t} = \frac{\lambda_w}{\lambda_w + \lambda_{nw}} = \frac{k_{rw}(S)/\mu_w}{k_{rw}(S)/\mu_w + k_{rnw}(S)/\mu_{nw}}, \\
+   \mathbf{u}_t &= \mathbf{u}_w + \mathbf{u}_{nw} = -\lambda_t(S)\mathbf{K} \cdot \nabla p,
+ at f}
+where subscripts $w, nw$ represent the wetting and non-wetting phases,
+respectively.
+
+For convenience, the
+porosity $\epsilon$ in the saturation equation, which can be considered a
+scaling factor for the time variable, is set to
+one. Following a commonly used prescription for the dependence of the relative
+permeabilities $k_{rw}$ and $k_{rnw}$ on saturation, we use
+ at f{align*}
+   k_{rw}  &= S^2, \qquad&\qquad
+   k_{rnw} &= \left( 1-S \right)^2.
+ at f}
+
+The porous media equations above are
+augmented by initial conditions for the saturation and boundary conditions for
+the pressure. Since saturation and the gradient of the pressure uniquely
+determine the velocity, no boundary conditions are necessary for the velocity.
+Since the flow equations do not contain time derivatives, initial conditions for the velocity and pressure
+variables are not required. The flow field separates the boundary into inflow or outflow
+parts. Specifically,
+ at f[
+   \mathbf{\Gamma}_{in}(t) = \left\{\mathbf{x} \in \partial \Omega:\mathbf{n} \cdot \mathbf{u}_t<0\right\},
+ at f]
+and we arrive at a complete model by also imposing boundary values for the
+saturation variable on the inflow boundary $\mathbf{\Gamma}_{in}$.
+
+
+<h3>Adaptive operator splitting and time stepping.</h3>
+
+As seen in step-21, solving the flow equations for velocity and pressure are
+the parts of the program that take far longer than the (explicit) updating
+step for the saturation variable once we know the flow variables. On the other
+hand,  the pressure and velocity depend only weakly on saturation, so one may
+think about only solving for pressure and velocity every few time steps while
+updating the saturation in every step. If we can find a criterion for when the
+flow variables need to be updated, we call this splitting an "adaptive
+operator splitting" scheme.
+
+Here, we use the following a posteriori criterion to decide when to re-compute
+pressure and velocity variables
+(detailed derivations and descriptions can be found in [Chueh, Djilali
+and Bangerth 2011]):
+ at f{align*}
+  \theta(n,n_p)
+  =
+    \max_{\kappa\in{\mathbb T}}
+    \left(
+    \left\|
+      \frac 1{\lambda_t\left(S^{(n-1)}\right)}
+      - \frac 1{\lambda_t\left(S^{(n_p)}\right)} \right\|_{L^\infty(\kappa)}
+    \left\|\|\mathbf{K}^{-1}\|_1\right\|_{L^\infty(\kappa)}
+    \right).
+ at f}
+where superscripts in parentheses denote the number of the saturation time
+step at which any quantity is defined and $n_p<n$ represents the last step
+where we actually computed the pressure and velocity. If $\theta(n,n_p)$
+exceeds a certain threshold we re-compute the flow variables; otherwise, we
+skip this computation in time step $n$ and only move the saturation variable
+one time step forward.
+
+In short, the algorithm allows us to perform a number of
+saturation time steps of length $\Delta t_c^{(n)}=t^{(n)}_c-t^{(n-1)}_c$ until
+the criterion above tells us to re-compute velocity and pressure
+variables, leading to a macro time step of length
+ at f[
+   \Delta t_p^{(n)} = \sum_{i=n_p+1}^{n} \Delta t_c^{(i)}.
+ at f]
+We choose the length of (micro) steps subject to the Courant-Friedrichs-Lewy
+(CFL) restriction according to the criterion
+ at f[
+  \Delta t_c = \frac{\textrm{min}_{K}h_{K}}{7 \|\mathbf{u}_t\|_{L^{\infty}\left(\Omega\right)}},
+ at f]
+which we have confirmed to be stable for the choice of finite element and time
+stepping scheme for the saturation equation discussed below ($h_K$ denotes the
+diameter of cell $K$).
+The result is a scheme where neither micro nor macro time
+steps are of uniform length, and both are chosen adaptively.
+
+<h3>Time discretization.</h3>
+Using this time discretization, we obtain the following set of equations for
+each time step from the IMPES approach (see step-21):
+ at f{align*}
+   \mathbf{u}^{(n)}_t + \lambda_t\left(S^{(n-1)}\right) \mathbf{K} \nabla p^{(n)} =0, \\
+   \nabla \cdot \mathbf{u}^{(n)}_t = q, \\
+   \epsilon \left( \frac{S^{(n-1)}-S^{(n)}}{\Delta t^{(n)}_c} \right) + \mathbf{u}^{(n)}_t \cdot \nabla F\left(S^{(n-1)}\right) + F\left(S^{(n-1)}\right) \nabla \cdot \mathbf{u}^{(n)}_t =0.
+ at f}
+
+
+Using the fact that $\nabla \cdot \mathbf{u}_t = q$, the time discrete
+saturation equation becomes
+ at f{align*}
+  &\epsilon \left( \frac{S^{(n)}-S^{(n-1)}}{\Delta t^{(n)}_c} \right) + \mathbf{u}^{(n)}_t \cdot \nabla F\left(S^{(n-1)}\right) + F\left(S^{(n-1)}\right)q=0.
+ at f}
+
+<h3>Weak form, space discretization for the pressure-velocity part.</h3>
+
+By multiplying the equations defining the total velocity $\mathbf u_t^{(n)}$ and
+the equation that expresses its divergence in terms of source terms, with test
+functions $\mathbf{v}$ and $w$
+respectively and then integrating terms by parts as necessary, the weak form
+of the problem reads: Find $\mathbf u, p$ so that for all test functions
+$\mathbf{v}, w$ there holds
+ at f{gather*}
+   \left( \left( \mathbf{K} \lambda_t\left(S^{(n-1)}\right) \right)^{-1} \mathbf{u}^{(n)}_t, \mathbf{v}\right)_{\Omega} - \left(p^{(n)}, \nabla \cdot \mathbf{v}\right)_{\Omega} = -\left(p^{(n)}, \mathbf{n} \cdot \mathbf{v} \right)_{\partial \Omega}, \\
+   - \left( \nabla \cdot \mathbf{u}^{(n)}_t,w\right)_{\Omega} = - \big(q,w\big)_{\Omega}.
+ at f}
+Here, $\mathbf{n}$ represents the unit outward normal vector to $\partial
+\Omega$ and the pressure $p^{(n)}$ can be prescribed weakly on the open part
+of the boundary $\partial \Omega$ whereas on those parts where a velocity is
+prescribed (for example impermeable boundaries with $\mathbf n \cdot \mathbf
+u=0$ the term disappears altogether because $\mathbf n \cdot \mathbf
+v=0$.
+
+We use continuous finite elements to discretize the velocity and pressure
+equations. Specifically, we use mixed finite elements to ensure high order approximation
+for both vector (e.g. a fluid velocity) and scalar variables (e.g. pressure)
+simultaneously. For saddle point problems, it is well established that
+the so-called Babuska-Brezzi or Ladyzhenskaya-Babuska-Brezzi (LBB) conditions
+[Brezzi 1991, Chen 2005] need to be satisfied to ensure stability of
+the pressure-velocity system. These stability conditions are satisfied in the
+present work by using elements for velocity that are one order higher than for
+the pressure, i.e. $u_h \in Q^d_{p+1}$ and $p_h \in Q_p$, where $p=1$, $d$ is
+the space dimension, and $Q_s$ denotes the space of tensor product Lagrange
+polynomials of degree $s$ in each variable.
+
+<h3>Stabilization, weak form and space discretization for the saturation transport equation.</h3>
+The chosen $Q_1$ elements for the saturation equation do not lead to a stable
+discretization without upwinding or other kinds of stabilization, and spurious
+oscillations will appear in the numerical solution. Adding an artificial
+diffusion term is one approach to eliminating these oscillations
+[Chen 2005]. On the other hand, adding too much diffusion smears sharp
+fronts in the solution and suffers from grid-orientation difficulties
+[Chen 2005]. To avoid these effects, we use the artificial diffusion
+term proposed by [Guermond and Pasquetti 2008] and
+validated in [Chueh, Djilali, Bangerth 2011] and
+[Kronbichler, Heister and Bangerth, 2011], as well as in step-31.
+
+This method modifies the (discrete) weak form of the saturation equation
+to read
+ at f{align*}
+  \left(\epsilon \frac{\partial S_h}{\partial t},\sigma_h\right)
+  -
+  \left(\mathbf{u}_t  F\left( S_h \right),
+    \nabla \sigma_h\right)
+  +
+  \left(\mathbf n \cdot \mathbf{u}_t  \hat F\left( S_h \right),
+    \sigma_h\right)_{\partial\Omega}
+  +
+  (\nu(S_h) \nabla S_h, \nabla \sigma_h)
+  &=0
+  \qquad
+  \forall \sigma_h,
+ at f}
+where $\nu$ is the artificial diffusion parameter and $\hat F$ is an
+appropriately chosen numerical flux on the boundary of the domain (we choose
+the obvious full upwind flux for this).
+
+Following [Guermond and Pasquetti 2008] (and as detailed in
+[Chueh, Djilali and Bangerth 2011]), we use
+the parameter as a piecewise
+constant function set on each cell $K$ with the diameter $h_{K}$ as
+ at f[
+   \nu(S_h)|_{K} = \beta \| \mathbf{u}_t \max\{F'(S_h),1\} \|_{L^{\infty}(K)} \textrm{min} \left\{ h_{K},h^{\alpha}_{K} \frac{\|\textrm{Res}(S_h)\|_{L^{\infty}(K)}}{c(\mathbf{u}_t,S)} \right\}
+ at f]
+where $\alpha$ is a stabilization exponent and $\beta$ is a dimensionless
+user-defined stabilization constant. Following [Guermond and Pasquetti 2008]
+as well as the implementation in step-31, the velocity and saturation global
+normalization constant, $c(\mathbf{u}_t,S)$, and the residual $\textrm{Res}(S)$
+are respectively given by
+ at f[
+   c(\mathbf{u}_t,S) = c_R \|\mathbf{u}_t \max\{F'(S),1\}\|_{L^{\infty}(\Omega)} \textrm{var}(S)^\alpha | \textrm{diam} (\Omega) |^{\alpha - 2}
+ at f]
+and
+ at f[
+   \textrm{Res}(S) = \left( \epsilon \frac{\partial S}{\partial t} + \mathbf{u}_t \cdot \nabla F(S) + F(S)q \right) \cdot S^{\alpha - 1}
+ at f]
+where $c_R$ is a second dimensionless user-defined constant,
+$\textrm{diam}(\Omega)$ is the diameter of the domain and $\textrm{var}(S) =
+\textrm{max}_{\Omega} S - \textrm{min}_{\Omega} S$ is the range of the present
+saturation values in the entire computational domain $\Omega$.
+
+This stabilization scheme has a number of advantages over simpler schemes such
+as finite volume (or discontinuous Galerkin) methods or streamline upwind
+Petrov Galerkin (SUPG) discretizations. In particular, the artificial
+diffusion term acts primarily in the vicinity of discontinuities
+since the residual is small in areas where the saturation is smooth. It
+therefore provides for a higher degree of accuracy. On the other hand, it is
+nonlinear since $\nu$ depends on the saturation $S$. We avoid this difficulty
+by treating all nonlinear terms explicitly, which leads to the following
+fully discrete problem at time step $n$:
+ at f{align*}
+   &\left( \epsilon S_h^{(n)},\sigma_h\right)_{\Omega} - \Delta t^{(n)}_c \Big(F\left(S_h^{(n-1)}\right)\mathbf{u}^{*}_t,\nabla\sigma_h\Big)_{\Omega} + \Delta t^{(n)}_c \Big(F\left(S_h^{(n-1)}\right)\left(\mathbf{n}\cdot\mathbf{u}^{*}_t\right),\sigma_h\Big)_{\partial\Omega} \nonumber \\
+   & \quad = \left( \epsilon S_h^{(n-1)},\sigma_h\right)_{\Omega} - \Delta t^{(n)}_c \bigg(\nu\left(S_h^{(n-1)}\right)\nabla S_h^{(n-1)},\nabla\sigma_h\bigg)_{\Omega} \nonumber \\
+   & \qquad + \Delta t^{(n)}_c \bigg(\mathbf{n}\cdot\nu\left(S_h^{(n-1)}\right)\nabla S^{(n-1)},\sigma_h\bigg)_{\partial\Omega}
+ at f}
+where $\mathbf{u}_t^{*}$ is the velocity linearly extrapolated from
+$\mathbf{u}^{(n_p)}_t$ and $\mathbf{u}^{(n_{pp})}_t$ to the current time $t^{(n)}$ if $\theta<\theta^*$ while $\mathbf{u}_t^{*}$ is $\mathbf{u}^{(n_p)}_t$ if $\theta>\theta^*$.
+Consequently, the equation is linear in $S_h^{(n)}$ and all that is required
+is to solve with a mass matrix on the saturation space.
+
+Since the Dirichlet boundary conditions for saturation are only imposed on the
+inflow boundaries, the third term on the left hand side of the equation above
+needs to be split further into two parts:
+ at f{align*}
+  &\Delta t^{(n)}_c \Big(F\left(S_h^{(n-1)}\right)\left(\mathbf{n}\cdot\mathbf{u}^{(n)}_t\right),\sigma_h\Big)_{\partial\Omega} \nonumber \\
+  &\qquad= \Delta t^{(n)}_c \Big(F\left(S^{(n-1)}_{(+)}\right)\left(\mathbf{n}\cdot\mathbf{u}^{(n)}_{t(+)}\right),\sigma_h\Big)_{\partial\Omega_{(+)}} + \Delta t^{(n)}_c \Big(F\left(S^{(n-1)}_{(-)}\right)\left(\mathbf{n}\cdot\mathbf{u}^{(n)}_{t(-)}\right),\sigma_h\Big)_{\partial\Omega_{(-)}}
+ at f}
+where $\partial\Omega_{(-)} = \left\{\mathbf{x} \in \partial\Omega : \mathbf{n}
+  \cdot \mathbf{u}_t<0\right\}$ and
+$\partial\Omega_{(+)} = \left\{\mathbf{x} \in \partial\Omega : \mathbf{n} \cdot
+  \mathbf{u}_t>0\right\}$ represent inflow and outflow boundaries,
+respectively. We choose values using an
+upwind formulation, i.e. $S^{(n-1)}_{(+)}$ and $\mathbf{u}^{(n)}_{t(+)}$
+correspond to the values taken from the present cell, while the values of
+$S^{(n-1)}_{(-)}$ and $\mathbf{u}^{(n)}_{t(-)}$ are those taken from the
+neighboring boundary $\partial\Omega_{(-)}$.
+
+
+<h3>Adaptive mesh refinement.</h3>
+
+Choosing meshes adaptively to resolve sharp
+saturation fronts is an essential ingredient to achieve efficiency in our
+algorithm. Here, we use the same shock-type refinement approach used in
+[Chueh, Djilali and Bangerth 2011] to select those cells that should be refined or
+coarsened. The refinement indicator for each cell $K$ of the triangulation is
+computed by
+ at f[
+   \eta_{K} = |\nabla S_h(\mathbf x_K)|
+ at f]
+where $\nabla S_h(\mathbf x_K)$ is the gradient of the discrete saturation
+variable evaluated at the center $\mathbf x_K$ of cell $K$. This approach is
+analogous to ones frequently used in compressible flow problems, where density
+gradients are used to indicate refinement. That said, as we will
+discuss at the end of the <a href="#Results">results section</a>, this turns
+out to not be a very useful criterion since it leads to refinement basically
+everywhere. We only show it here for illustrative purposes.
+
+
+<h3>Linear system and its preconditioning.</h3>
+
+Following the discretization of the governing equations
+discussed above, we
+obtain a linear system of equations in time step $(n)$ of the following form:
+ at f[
+ \left(
+  \begin{array}{ccc}
+   \mathbf{M}^{\mathbf{u}} & \mathbf{B}^{T} & \mathbf{0}  \\
+   \mathbf{B}           & \mathbf{0}     & \mathbf{0}   \\
+   \mathbf{H}           & \mathbf{0}     & \mathbf{M}^{S}
+  \end{array}
+ \right)
+ \left(
+  \begin{array}{c}
+   \mathbf{U}^{(n)} \\
+   \mathbf{P}^{(n)} \\
+   \mathbf{S}^{(n)}
+  \end{array}
+ \right)
+ =
+ \left(
+  \begin{array}{c}
+   0 \\
+   \mathbf{F}_{2} \\
+   \mathbf{F}_{3}
+  \end{array}
+ \right)
+ at f]
+where the individual matrices and vectors are defined as follows using shape functions $\mathbf{v}_i$ for velocity, and $\phi_i$ for both pressure and saturation:
+ at f{align*}
+  \mathbf{M}^{\mathbf{u}}_{ij}
+  &= \left( \left( \mathbf{K} \lambda_t\left(S^{(n-1)}\right) \right)^{-1}
+  \mathbf{v}_{i},\mathbf{v}_{j}\right)_{\Omega},
+  &
+  \mathbf{M}^{S}_{ij}           &= \left(\epsilon \phi_i,\phi_j\right)_{\Omega}
+  \\
+  \mathbf{B}_{ij}
+  &= - \left( \nabla \cdot \mathbf{v}_{j},\phi_{i}\right)_{\Omega},
+  &
+  \mathbf{H}_{ij}
+  &= - \Delta t^{(n)}_c \Big( F\left(S^{(n-1)}\right) \mathbf{v}_i,\nabla\phi_j\Big)_{\Omega}
+  \\
+  \left(\mathbf{F}_{2}\right)_i
+  &= - \big(F\left(S^{(n-1)}\right)q,\phi_i\big)_{\Omega},
+ at f}
+and $\mathbf{F}_{3}$ as given in the definition of the stabilized transport
+equation.
+
+The linear system above is of block triangular form if we consider the top
+left $2\times 2$ panel of matrices as one block. We can therefore first solve
+for the velocity and pressure (unless we decide to use $\mathbf U^{(n_p)}$ in
+place of the velocity)
+followed by a solve for the saturation variable. The first of these steps
+requires us to solve
+ at f[
+ \left(
+  \begin{array}{cc}
+   \mathbf{M}^{\mathbf{u}} & \mathbf{B}^{T}  \\
+   \mathbf{B}           & \mathbf{0}
+  \end{array}
+ \right)
+ \left(
+  \begin{array}{c}
+   \mathbf{U}^{(n)} \\
+   \mathbf{P}^{(n)}
+  \end{array}
+ \right)
+ =
+ \left(
+  \begin{array}{c}
+   0 \\
+   \mathbf{F}_{2}
+  \end{array}
+ \right)
+ at f]
+We apply the Generalized Minimal Residual (GMRES) method [Saad and Schultz
+1986] to this linear system. The ideal preconditioner for the
+velocity-pressure system is
+ at f{align*}
+\mathbf{P} =
+ \left(
+  \begin{array}{cc}
+   \mathbf{M}^{\mathbf{u}} &  \mathbf{0}  \\
+   \mathbf{B}           & -\mathbf{S}
+  \end{array}
+ \right),
+ & \qquad
+ \mathbf{P}^{-1} =
+ \left(
+  \begin{array}{cc}
+   \left(\mathbf{M}^{\mathbf{u}}\right)^{-1}                              &  \mathbf{0}  \\
+   \mathbf{S}^{-1} \mathbf{B} \left(\mathbf{M}^{\mathbf{u}}\right)^{-1}   & -\mathbf{S}^{-1}
+  \end{array}
+ \right)
+ @f}
+where
+$\mathbf{S}=\mathbf{B}\left(\mathbf{M}^{\mathbf{u}}\right)^{-1}\mathbf{B}^T$ is
+the Schur complement [Zhang 2005] of the system. This preconditioner is
+optimal since
+ at f{align*}
+ \mathbf{P}^{-1}
+ \left(
+  \begin{array}{cc}
+   \mathbf{M}^{\mathbf{u}} & \mathbf{B}^{T}  \\
+   \mathbf{B}           & \mathbf{0}
+  \end{array}
+ \right)
+ =
+  \left(
+  \begin{array}{cc}
+   \mathbf{I}         &  \left(\mathbf{M}^{\mathbf{u}}\right)^{-1} \mathbf{B}^{T}  \\
+   \mathbf{0}         &  \mathbf{I}
+  \end{array}
+ \right),
+ at f}
+for which it can be shown that GMRES converges in two iterations.
+
+However, we cannot of course expect to use exact inverses of the
+velocity mass matrix and the Schur complement. We therefore follow the
+approach by [Silvester and Wathen 1994] originally proposed for
+the Stokes system. Adapting it to the current set of equations yield the
+preconditioner
+ at f{align*}
+ \mathbf{\tilde{P}}^{-1} =
+ \left(
+  \begin{array}{cc}
+   \widetilde{\left(\mathbf{{M}}^{\mathbf{u}}\right)^{-1}}
+                              &  \mathbf{0}  \\
+   \widetilde{\mathbf{{S}}^{-1}} \mathbf{B} \widetilde{\left(\mathbf{{M}}^{\mathbf{u}}\right)^{-1}}   & -\widetilde{\mathbf{{S}}^{-1}}
+  \end{array}
+ \right)
+ at f}
+where a tilde indicates an approximation of the exact inverse matrix. In
+particular, since $\left(\mathbf{{M}}^{\mathbf{u}}\right)^{-1}=\left( \left(
+    \mathbf{K} \lambda_t \right)^{-1}
+  \mathbf{v}_{i},\mathbf{v}_{j}\right)_{\Omega}$
+is a sparse symmetric and positive definite matrix, we choose for
+$\widetilde{\left(\mathbf{{M}}^{\mathbf{u}}\right)^{-1}}$ a single application of
+a sparse incomplete Cholesky decomposition of this matrix
+[Golub and Van Loan 1996].
+We note that the Schur complement that corresponds to the porous
+media flow operator in non-mixed form, $-\nabla \cdot [\mathbf K
+\lambda_t(S)]\nabla$ and
+$\mathbf{\tilde {S}} = \left( \left( \mathbf{K} \lambda_t \right) \nabla \phi_{i},\nabla \phi_{j}\right)_{\Omega}$
+should be a good approximation of the actual Schur complement matrix $\mathbf
+S$. Since both of these matrices are again symmetric and positive definite, we
+use an incomplete Cholesky decomposition of $\mathbf{\tilde S}$ for $\widetilde
+{\mathbf{{S}}^{-1}}$. It is important to note that $\mathbf{\tilde S}$ needs
+to be built with Dirichlet boundary conditions to ensure its invertibility.
+
+Once the velocity $\mathbf{U}^{(n)} \equiv \mathbf{u}^*_t$  is available, we
+can assemble $\mathbf{H}$ and
+$\mathbf{F}_{3}$ and solve for the saturations using
+ at f{align*}
+  \mathbf{M}^{S} \mathbf{S}^{(n)} = \mathbf{F}_{3} - \mathbf{H} \mathbf{U}^{(n)}.
+ at f}
+where the mass matrix $\mathbf{M}^{S}$ is solved by the conjugate gradient
+method, using an incomplete Cholesky decomposition as preconditioner once
+more.
+
+<h3>The test cases.</h3>
+
+ at note
+The implementation discussed here uses and extends
+parts of the step-21, step-31 and step-33 tutorial programs of this
+library. In particular, if you want to understand how it works, please
+consult step-21 for a discussion of the mathematical problem, and
+step-31 from which most of the implementation is derived. We will not
+discuss aspects of the implementation that have already been discussed
+in step-31.
+
+We show numerical results for some two-phase flow equations augmented by
+appropriate initial and boundary conditions in conjunction with two different
+choices of the permeability model. In the problems considered, there is no
+internal source term ($q=0$). As mentioned above, quantitative numerical
+results are presented in [Chueh, Djilali and Bangerth 2011].
+
+For simplicity, we choose $\Omega=[0,1]^d,d=2,3$, though all methods (as well
+as our implementation) should work equally well on general unstructured meshes.
+
+Initial conditions are only required for the saturation variable, and we
+choose $S(\mathbf{x},0)=0.2$, i.e. the porous medium is initially filled by a
+mixture of the non-wetting (80%) and wetting (20%) phases. This differs from
+the initial condition in step-21 where we had taken $S(\mathbf{x},0)=0$, but
+for complicated mathematical reasons that are mentioned there in a longish
+remark, the current method using an entropy-based artificial diffusion term
+does not converge to the viscosity solution with this initial condition
+without additional modifications to the method. We therefore choose this
+modified version for the current program.
+
+Furthermore, we prescribe a linear pressure on
+the boundaries:
+ at f[
+   p(\mathbf{x},t) = 1 - x \qquad
+   \textrm{on} \quad \partial \Omega \times [0,T].
+ at f]
+Pressure and saturation uniquely
+determine a velocity, and the velocity determines whether a boundary segment
+is an inflow or outflow boundary. On the inflow part of the boundary,
+$\mathbf{\Gamma}_{in}(t)$, we impose
+ at f{align*}
+   S(\mathbf{x},t) = 1 \qquad & \textrm{on} \quad \mathbf{\Gamma}_{in}(t) \cap \left\{x = 0\right\}, \\
+   S(\mathbf{x},t) = 0 \qquad & \textrm{on} \quad \mathbf{\Gamma}_{in}(t) \backslash \left\{x = 0\right\}.
+ at f}
+In other words, the domain is flooded by the wetting phase from the left.
+No boundary conditions for the saturation are required for the outflow parts
+of the boundary.
+
+All the numerical and physical parameters used for the 2D/3D
+cases are listed in the following table:
+
+<table align="center" class="tutorial" width="50%">
+<tr>
+    <th>Parameter                           </th><th>Symbol          </th><th>Value               </th><th>units     </th></tr><tr>
+    <td>Porosity                            </td><td>$\epsilon$      </td><td>1.0                 </td><td>-                   </td></tr><tr>
+    <td>Viscosity (wetting)                 </td><td>$\mu_w$         </td><td>0.2                 </td><td>$kg \cdot m^{-1} \cdot sec^{-1}$   </td></tr><tr>
+    <td>Viscosity (nonwetting)              </td><td>$\mu_{nw}$      </td><td>1.0                 </td><td>$kg \cdot m^{-1} \cdot sec^{-1}$      </td></tr><tr>
+    <td>Stabilization exponent              </td><td>$\alpha$        </td><td>1.0                 </td><td>-     </td></tr><tr>
+    <td>Stabilization constant              </td><td>$\beta$         </td><td>2D: 0.3; 3D: 0.27   </td><td>- </td></tr><tr>
+    <td>Normalization constant              </td><td>$c_R$           </td><td>1.0                 </td><td>- </td></tr><tr>
+    <td>Number of high-permeability regions </td><td>$N$             </td><td>50; 200             </td><td>- </td></tr><tr>
+    <td>Operator splitting threshold        </td><td>$\theta^\ast$   </td><td>5.0              </td><td>- </td></tr>
+</table>
+
+
+<h3>List of references</h3>
+
+
+<ol>
+<li>
+CC Chueh, N Djilali and W Bangerth.
+<br> An h-adaptive operator splitting method for two-phase flow in 3D
+  heterogeneous porous media.
+<br> SIAM Journal on Scientific Computing, vol. 35 (2013), pp. B149-B175
+
+<li>
+M. Kronbichler, T. Heister, and W. Bangerth
+<br> High Accuracy Mantle Convection Simulation through Modern Numerical
+Methods.
+<br> Geophysics Journal International, vol. 191 (2012), pp. 12-29
+
+<li>
+F Brezzi and M Fortin.
+<br> <i>Mixed and Hybrid Finite Element Methods</i>.
+<br> Springer-Verlag, 1991.
+
+<li>
+Z Chen.
+<br> <i>Finite Element Methods and Their Applications</i>.
+<br> Springer, 2005.
+
+<li>
+JL Guermond and R Pasquetti.
+<br> Entropy-based nonlinear viscosity for Fourier approximations of
+  conservation laws.
+<br> <i>Comptes Rendus Mathematique</i>, 346(13-14):801-806, 2008.
+
+<li>
+CC Chueh, M Secanell, W Bangerth, and N Djilali.
+<br> Multi-level adaptive simulation of transient two-phase flow in
+  heterogeneous porous media.
+<br> <i>Computers and Fluids</i>, 39:1585-1596, 2010.
+
+<li>
+Y Saad and MH Schultz.
+<br> Gmres: A generalized minimal residual algorithm for solving
+  nonsymmetric linear systems.
+<br> <i>SIAM Journal on Scientific and Statistical Computing</i>,
+  7(3):856-869, 1986.
+
+<li>
+F Zhang.
+<br> <i>The Schur Complement and its Applications</i>.
+<br> Springer, 2005.
+
+<li>
+D Silvester and A Wathen.
+<br> Fast iterative solution of stabilised Stokes systems part ii: Using
+  general block preconditioners.
+<br> <i>SIAM Journal on Numerical Analysis</i>, 31(5):1352-1367, 1994.
+
+<li>
+GH Golub and CF van Loan.
+<br> <i>Matrix Computations</i>.
+<br> 3rd Edition, Johns Hopkins, 1996.
+
+<li>
+SE Buckley and MC Leverett.
+<br> Mechanism of fluid displacements in sands.
+<br> <i>AIME Trans.</i>, 146:107-116, 1942.
+
+</ol>
diff --git a/examples/step-43/doc/kind b/examples/step-43/doc/kind
new file mode 100644
index 0000000..e62f4e7
--- /dev/null
+++ b/examples/step-43/doc/kind
@@ -0,0 +1 @@
+fluids
diff --git a/examples/step-43/doc/results.dox b/examples/step-43/doc/results.dox
new file mode 100644
index 0000000..eec7e77
--- /dev/null
+++ b/examples/step-43/doc/results.dox
@@ -0,0 +1,111 @@
+<h1>Results</h1>
+
+
+The output of this program is not really much different from that of
+step-21: it solves the same problem, after all. Of more importance are
+quantitative metrics such as the accuracy of the solution as well as
+the time needed to compute it. These are documented in detail in the
+two publications listed at the top of this page and we won't repeat
+them here.
+
+That said, no tutorial program is complete without a couple of good
+pictures, so here is some output of a run in 3d:
+
+<table align="center" class="tutorial" cellspacing="3" cellpadding="3">
+  <tr>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-43.3d.velocity.png" alt="">
+	<p align="center">
+        Velocity vectors of flow through the porous medium with random
+        permeability model. Streaming paths of high permeability and resulting
+        high velocity are clearly visible.
+	</p>
+    </td>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-43.3d.streamlines.png" alt="">
+	<p align="center">
+        Streamlines colored by the saturation along the streamline path. Blue
+        streamlines indicate low saturations, i.e., the flow along these
+	streamlines must be slow or else more fluid would have been
+        transported along them. On the other hand, green paths indicate high
+        velocities since the fluid front has already reached further into the
+        domain.
+	</p>
+    </td>
+  </tr>
+
+  <tr>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-43.3d.saturation.png" alt="">
+	<p align="center">
+        Streamlines with a volume rendering of the saturation, showing how far
+        the fluid front has advanced at this time.
+	</p>
+    </td>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-43.3d.mesh.png" alt="">
+	<p align="center">
+	Surface of the mesh showing the adaptive refinement along the front.
+	</p>
+    </td>
+  </tr>
+</table>
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+The primary objection one may have to this program is that it is still too
+slow: 3d computations on reasonably fine meshes are simply too expensive to be
+done routinely and with reasonably quick turn-around. This is similar to the
+situation we were in when we wrote step-31, from which this program has taken
+much inspiration. The solution is similar as it was there as well: We need to
+parallelize the program in a way similar to how we derived step-32 out of
+step-31. In fact, all of the techniques used in step-32 would be transferable
+to this program as well, making the program run on dozens or hundreds of
+processors immediately.
+
+A different direction is to make the program more relevant to many other
+porous media applications. Specifically, one avenue is to go to the primary
+user of porous media flow simulators, namely the oil industry. There,
+applications in this area are dominated by multiphase flow (i.e., more than
+the two phases we have here), and the reactions they may have with each other
+(or any other way phases may exchange mass, such as through dissolution in and
+bubbling out of gas from the oil phase). Furthermore, the presence of gas
+often leads to compressibility effects of the fluid. Jointly, these effects
+are typically formulated in the widely-used "black oil model". True reactions
+between multiple phases also play a role in oil reservoir modeling when
+considering controlled burns of oil in the reservoir to raise pressure and
+temperature. These are much more complex problems, though, and left for future
+projects.
+
+Finally, from a mathematical perspective, we have derived the
+criterion for re-computing the velocity/pressure solution at a given
+time step under the assumption that we want to compare the solution we
+would get at the current time step with that computed the last time we
+actually solved this system. However, in the program, whenever we did
+not re-compute the solution, we didn't just use the previously
+computed solution but instead extrapolated from the previous two times
+we solved the system. Consequently, the criterion was pessimistically
+stated: what we should really compare is the solution we would get at
+the current time step with the extrapolated one. Re-stating the
+theorem in this regard is left as an exercise.
+
+There are also other ways to extend the mathematical foundation of
+this program; for example, one may say that it isn't the velocity we
+care about, but in fact the saturation. Thus, one may ask whether the
+criterion we use here to decide whether $\mathbf u$ needs to be
+recomputed is appropriate; one may, for example, suggest that it is
+also important to decide whether (and by how much) a wrong velocity
+field in fact affects the solution of the saturation equation. This
+would then naturally lead to a sensitivity analysis.
+
+From an algorithmic viewpoint, we have here used a criterion for refinement
+that is often used in engineering, namely by looking at the gradient of
+the solution. However, if you inspect the solution, you will find that
+it quickly leads to refinement almost everywhere, even in regions where it
+is clearly not necessary: frequently used therefore does not need to imply
+that it is a useful criterion to begin with. On the other hand, replacing
+this criterion by a different and better one should not be very difficult.
+For example, the KellyErrorEstimator class used in many other programs
+should certainly be applicable to the current problem as well.
diff --git a/examples/step-43/doc/tooltip b/examples/step-43/doc/tooltip
new file mode 100644
index 0000000..48a5223
--- /dev/null
+++ b/examples/step-43/doc/tooltip
@@ -0,0 +1 @@
+Efficient ways to solve two-phase flow problems on adaptive meshes in 2d and 3d.
diff --git a/examples/step-43/step-43.cc b/examples/step-43/step-43.cc
new file mode 100644
index 0000000..89910f9
--- /dev/null
+++ b/examples/step-43/step-43.cc
@@ -0,0 +1,2285 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2010 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Chih-Che Chueh, University of Victoria, 2010
+ *          Wolfgang Bangerth, Texas A&M University, 2010
+ */
+
+
+// @sect3{Include files}
+
+// The first step, as always, is to include the functionality of a number of
+// deal.II and C++ header files.
+//
+// The list includes some header files that provide vector, matrix, and
+// preconditioner classes that implement interfaces to the respective Trilinos
+// classes; some more information on these may be found in step-31.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/tensor_function.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/base/index_set.h>
+
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/solution_transfer.h>
+
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_precondition.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+
+// At the end of this top-matter, we open a namespace for the current project
+// into which all the following material will go, and then import all deal.II
+// names into this namespace:
+namespace Step43
+{
+  using namespace dealii;
+
+
+  // @sect3{Pressure right hand side, pressure boundary values and saturation initial value classes}
+
+  // The following part is taken directly from step-21 so there is no need to
+  // repeat the descriptions found there.
+  template <int dim>
+  class PressureRightHandSide : public Function<dim>
+  {
+  public:
+    PressureRightHandSide () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double
+  PressureRightHandSide<dim>::value (const Point<dim>  &/*p*/,
+                                     const unsigned int /*component*/) const
+  {
+    return 0;
+  }
+
+
+  template <int dim>
+  class PressureBoundaryValues : public Function<dim>
+  {
+  public:
+    PressureBoundaryValues () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+  template <int dim>
+  double
+  PressureBoundaryValues<dim>::value (const Point<dim>  &p,
+                                      const unsigned int /*component*/) const
+  {
+    return 1-p[0];
+  }
+
+
+  template <int dim>
+  class SaturationBoundaryValues : public Function<dim>
+  {
+  public:
+    SaturationBoundaryValues () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double
+  SaturationBoundaryValues<dim>::value (const Point<dim> &p,
+                                        const unsigned int /*component*/) const
+  {
+    if (p[0] == 0)
+      return 1;
+    else
+      return 0;
+  }
+
+
+  template <int dim>
+  class SaturationInitialValues : public Function<dim>
+  {
+  public:
+    SaturationInitialValues () : Function<dim>(1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+  };
+
+
+  template <int dim>
+  double
+  SaturationInitialValues<dim>::value (const Point<dim>  &/*p*/,
+                                       const unsigned int /*component*/) const
+  {
+    return 0.2;
+  }
+
+
+  template <int dim>
+  void
+  SaturationInitialValues<dim>::vector_value (const Point<dim> &p,
+                                              Vector<double>   &values) const
+  {
+    for (unsigned int c=0; c<this->n_components; ++c)
+      values(c) = SaturationInitialValues<dim>::value (p,c);
+  }
+
+
+  // @sect3{Permeability models}
+
+  // In this tutorial, we still use the two permeability models previously
+  // used in step-21 so we again refrain from commenting in detail about them.
+  namespace SingleCurvingCrack
+  {
+    template <int dim>
+    class KInverse : public TensorFunction<2,dim>
+    {
+    public:
+      KInverse ()
+        :
+        TensorFunction<2,dim> ()
+      {}
+
+      virtual void value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const;
+    };
+
+
+    template <int dim>
+    void
+    KInverse<dim>::value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const
+    {
+      Assert (points.size() == values.size(),
+              ExcDimensionMismatch (points.size(), values.size()));
+
+      for (unsigned int p=0; p<points.size(); ++p)
+        {
+          values[p].clear ();
+
+          const double distance_to_flowline
+            = std::fabs(points[p][1]-0.5-0.1*std::sin(10*points[p][0]));
+
+          const double permeability = std::max(std::exp(-(distance_to_flowline*
+                                                          distance_to_flowline)
+                                                        / (0.1 * 0.1)),
+                                               0.01);
+
+          for (unsigned int d=0; d<dim; ++d)
+            values[p][d][d] = 1./permeability;
+        }
+    }
+  }
+
+
+  namespace RandomMedium
+  {
+    template <int dim>
+    class KInverse : public TensorFunction<2,dim>
+    {
+    public:
+      KInverse ()
+        :
+        TensorFunction<2,dim> ()
+      {}
+
+      virtual void value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const;
+
+    private:
+      static std::vector<Point<dim> > centers;
+
+      static std::vector<Point<dim> > get_centers ();
+    };
+
+
+
+    template <int dim>
+    std::vector<Point<dim> >
+    KInverse<dim>::centers = KInverse<dim>::get_centers();
+
+
+    template <int dim>
+    std::vector<Point<dim> >
+    KInverse<dim>::get_centers ()
+    {
+      const unsigned int N = (dim == 2 ?
+                              40 :
+                              (dim == 3 ?
+                               100 :
+                               throw ExcNotImplemented()));
+
+      std::vector<Point<dim> > centers_list (N);
+      for (unsigned int i=0; i<N; ++i)
+        for (unsigned int d=0; d<dim; ++d)
+          centers_list[i][d] = static_cast<double>(rand())/RAND_MAX;
+
+      return centers_list;
+    }
+
+
+
+    template <int dim>
+    void
+    KInverse<dim>::value_list (const std::vector<Point<dim> > &points,
+                               std::vector<Tensor<2,dim> >    &values) const
+    {
+      Assert (points.size() == values.size(),
+              ExcDimensionMismatch (points.size(), values.size()));
+
+      for (unsigned int p=0; p<points.size(); ++p)
+        {
+          values[p].clear ();
+
+          double permeability = 0;
+          for (unsigned int i=0; i<centers.size(); ++i)
+            permeability += std::exp(-(points[p]-centers[i]).norm_square()
+                                     / (0.05 * 0.05));
+
+          const double normalized_permeability
+            = std::min (std::max(permeability, 0.01), 4.);
+
+          for (unsigned int d=0; d<dim; ++d)
+            values[p][d][d] = 1./normalized_permeability;
+        }
+    }
+  }
+
+
+  // @sect3{Physical quantities}
+
+  // The implementations of all the physical quantities such as total mobility
+  // $\lambda_t$ and fractional flow of water $F$ are taken from step-21 so
+  // again we don't have do any comment about them. Compared to step-21 we
+  // have added checks that the saturation passed to these functions is in
+  // fact within the physically valid range. Furthermore, given that the
+  // wetting phase moves at speed $\mathbf u F'(S)$ it is clear that $F'(S)$
+  // must be greater or equal to zero, so we assert that as well to make sure
+  // that our calculations to get at the formula for the derivative made
+  // sense.
+  double mobility_inverse (const double S,
+                           const double viscosity)
+  {
+    return 1.0 / (1.0/viscosity * S * S + (1-S) * (1-S));
+  }
+
+
+  double fractional_flow (const double S,
+                          const double viscosity)
+  {
+    Assert ((S >= 0) && (S<=1),
+            ExcMessage ("Saturation is outside its physically valid range."));
+
+    return S*S / ( S * S + viscosity * (1-S) * (1-S));
+  }
+
+
+  double fractional_flow_derivative (const double S,
+                                     const double viscosity)
+  {
+    Assert ((S >= 0) && (S<=1),
+            ExcMessage ("Saturation is outside its physically valid range."));
+
+    const double temp = ( S * S + viscosity * (1-S) * (1-S) );
+
+    const double numerator   =  2.0 * S * temp
+                                -
+                                S * S *
+                                ( 2.0 * S - 2.0 * viscosity * (1-S) );
+    const double denominator =  std::pow(temp, 2.0);
+
+    const double F_prime = numerator / denominator;
+
+    Assert (F_prime >= 0, ExcInternalError());
+
+    return F_prime;
+  }
+
+
+  // @sect3{Helper classes for solvers and preconditioners}
+
+  // In this first part we define a number of classes that we need in the
+  // construction of linear solvers and preconditioners. This part is
+  // essentially the same as that used in step-31. The only difference is that
+  // the original variable name stokes_matrix is replaced by another name
+  // darcy_matrix to match our problem.
+  namespace LinearSolvers
+  {
+    template <class Matrix, class Preconditioner>
+    class InverseMatrix : public Subscriptor
+    {
+    public:
+      InverseMatrix (const Matrix         &m,
+                     const Preconditioner &preconditioner);
+
+
+      template <typename VectorType>
+      void vmult (VectorType       &dst,
+                  const VectorType &src) const;
+
+    private:
+      const SmartPointer<const Matrix> matrix;
+      const Preconditioner &preconditioner;
+    };
+
+
+    template <class Matrix, class Preconditioner>
+    InverseMatrix<Matrix,Preconditioner>::
+    InverseMatrix (const Matrix &m,
+                   const Preconditioner &preconditioner)
+      :
+      matrix (&m),
+      preconditioner (preconditioner)
+    {}
+
+
+
+    template <class Matrix, class Preconditioner>
+    template <typename VectorType>
+    void
+    InverseMatrix<Matrix,Preconditioner>::
+    vmult (VectorType       &dst,
+           const VectorType &src) const
+    {
+      SolverControl solver_control (src.size(), 1e-7*src.l2_norm());
+      SolverCG<VectorType> cg (solver_control);
+
+      dst = 0;
+
+      try
+        {
+          cg.solve (*matrix, dst, src, preconditioner);
+        }
+      catch (std::exception &e)
+        {
+          Assert (false, ExcMessage(e.what()));
+        }
+    }
+
+    template <class PreconditionerA, class PreconditionerMp>
+    class BlockSchurPreconditioner : public Subscriptor
+    {
+    public:
+      BlockSchurPreconditioner (
+        const TrilinosWrappers::BlockSparseMatrix     &S,
+        const InverseMatrix<TrilinosWrappers::SparseMatrix,
+        PreconditionerMp>         &Mpinv,
+        const PreconditionerA                         &Apreconditioner);
+
+      void vmult (TrilinosWrappers::MPI::BlockVector       &dst,
+                  const TrilinosWrappers::MPI::BlockVector &src) const;
+
+    private:
+      const SmartPointer<const TrilinosWrappers::BlockSparseMatrix> darcy_matrix;
+      const SmartPointer<const InverseMatrix<TrilinosWrappers::SparseMatrix,
+            PreconditionerMp > > m_inverse;
+      const PreconditionerA &a_preconditioner;
+
+      mutable TrilinosWrappers::MPI::Vector tmp;
+    };
+
+
+
+    template <class PreconditionerA, class PreconditionerMp>
+    BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::
+    BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix  &S,
+                             const InverseMatrix<TrilinosWrappers::SparseMatrix,
+                             PreconditionerMp>      &Mpinv,
+                             const PreconditionerA                      &Apreconditioner)
+      :
+      darcy_matrix            (&S),
+      m_inverse               (&Mpinv),
+      a_preconditioner        (Apreconditioner),
+      tmp                     (complete_index_set(darcy_matrix->block(1,1).m()))
+    {}
+
+
+    template <class PreconditionerA, class PreconditionerMp>
+    void BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::vmult (
+      TrilinosWrappers::MPI::BlockVector       &dst,
+      const TrilinosWrappers::MPI::BlockVector &src) const
+    {
+      a_preconditioner.vmult (dst.block(0), src.block(0));
+      darcy_matrix->block(1,0).residual(tmp, dst.block(0), src.block(1));
+      tmp *= -1;
+      m_inverse->vmult (dst.block(1), tmp);
+    }
+  }
+
+
+  // @sect3{The TwoPhaseFlowProblem class}
+
+  // The definition of the class that defines the top-level logic of solving
+  // the time-dependent advection-dominated two-phase flow problem (or
+  // Buckley-Leverett problem [Buckley 1942]) is mainly based on tutorial
+  // programs step-21 and step-33, and in particular on step-31 where we have
+  // used basically the same general structure as done here. As in step-31,
+  // the key routines to look for in the implementation below are the
+  // <code>run()</code> and <code>solve()</code> functions.
+  //
+  // The main difference to step-31 is that, since adaptive operator splitting
+  // is considered, we need a couple more member variables to hold the last
+  // two computed Darcy (velocity/pressure) solutions in addition to the
+  // current one (which is either computed directly, or extrapolated from the
+  // previous two), and we need to remember the last two times we computed the
+  // Darcy solution. We also need a helper function that figures out whether
+  // we do indeed need to recompute the Darcy solution.
+  //
+  // Unlike step-31, this step uses one more ConstraintMatrix object called
+  // darcy_preconditioner_constraints. This constraint object is used only for
+  // assembling the matrix for the Darcy preconditioner and includes hanging
+  // node constraints as well as Dirichlet boundary value constraints for the
+  // pressure variable. We need this because we are building a Laplace matrix
+  // for the pressure as an approximation of the Schur complement) which is
+  // only positive definite if boundary conditions are applied.
+  //
+  // The collection of member functions and variables thus declared in this
+  // class is then rather similar to those in step-31:
+  template <int dim>
+  class TwoPhaseFlowProblem
+  {
+  public:
+    TwoPhaseFlowProblem (const unsigned int degree);
+    void run ();
+
+  private:
+    void setup_dofs ();
+    void assemble_darcy_preconditioner ();
+    void build_darcy_preconditioner ();
+    void assemble_darcy_system ();
+    void assemble_saturation_system ();
+    void assemble_saturation_matrix ();
+    void assemble_saturation_rhs ();
+    void assemble_saturation_rhs_cell_term (const FEValues<dim>             &saturation_fe_values,
+                                            const FEValues<dim>             &darcy_fe_values,
+                                            const double                     global_max_u_F_prime,
+                                            const double                     global_S_variation,
+                                            const std::vector<types::global_dof_index> &local_dof_indices);
+    void assemble_saturation_rhs_boundary_term (const FEFaceValues<dim>             &saturation_fe_face_values,
+                                                const FEFaceValues<dim>             &darcy_fe_face_values,
+                                                const std::vector<types::global_dof_index>     &local_dof_indices);
+    void solve ();
+    void refine_mesh (const unsigned int              min_grid_level,
+                      const unsigned int              max_grid_level);
+    void output_results () const;
+
+    // We follow with a number of helper functions that are used in a variety
+    // of places throughout the program:
+    double                   get_max_u_F_prime () const;
+    std::pair<double,double> get_extrapolated_saturation_range () const;
+    bool                     determine_whether_to_solve_for_pressure_and_velocity () const;
+    void                     project_back_saturation ();
+    double                   compute_viscosity (const std::vector<double>          &old_saturation,
+                                                const std::vector<double>          &old_old_saturation,
+                                                const std::vector<Tensor<1,dim> >  &old_saturation_grads,
+                                                const std::vector<Tensor<1,dim> >  &old_old_saturation_grads,
+                                                const std::vector<Vector<double> > &present_darcy_values,
+                                                const double                        global_max_u_F_prime,
+                                                const double                        global_S_variation,
+                                                const double                        cell_diameter) const;
+
+
+    // This all is followed by the member variables, most of which are similar
+    // to the ones in step-31, with the exception of the ones that pertain to
+    // the macro time stepping for the velocity/pressure system:
+    Triangulation<dim>                   triangulation;
+    double                               global_Omega_diameter;
+
+    const unsigned int degree;
+
+    const unsigned int                   darcy_degree;
+    FESystem<dim>                        darcy_fe;
+    DoFHandler<dim>                      darcy_dof_handler;
+    ConstraintMatrix                     darcy_constraints;
+
+    ConstraintMatrix                     darcy_preconditioner_constraints;
+
+    TrilinosWrappers::BlockSparseMatrix  darcy_matrix;
+    TrilinosWrappers::BlockSparseMatrix  darcy_preconditioner_matrix;
+
+    TrilinosWrappers::MPI::BlockVector   darcy_solution;
+    TrilinosWrappers::MPI::BlockVector   darcy_rhs;
+
+    TrilinosWrappers::MPI::BlockVector   last_computed_darcy_solution;
+    TrilinosWrappers::MPI::BlockVector   second_last_computed_darcy_solution;
+
+
+    const unsigned int                   saturation_degree;
+    FE_Q<dim>                            saturation_fe;
+    DoFHandler<dim>                      saturation_dof_handler;
+    ConstraintMatrix                     saturation_constraints;
+
+    TrilinosWrappers::SparseMatrix       saturation_matrix;
+
+
+    TrilinosWrappers::MPI::Vector        saturation_solution;
+    TrilinosWrappers::MPI::Vector        old_saturation_solution;
+    TrilinosWrappers::MPI::Vector        old_old_saturation_solution;
+    TrilinosWrappers::MPI::Vector        saturation_rhs;
+
+    TrilinosWrappers::MPI::Vector        saturation_matching_last_computed_darcy_solution;
+
+    const double                         saturation_refinement_threshold;
+
+    double                               time;
+    const double                         end_time;
+
+    double                               current_macro_time_step;
+    double                               old_macro_time_step;
+
+    double                               time_step;
+    double                               old_time_step;
+    unsigned int                         timestep_number;
+
+    const double                         viscosity;
+    const double                         porosity;
+    const double                         AOS_threshold;
+
+    std_cxx11::shared_ptr<TrilinosWrappers::PreconditionIC> Amg_preconditioner;
+    std_cxx11::shared_ptr<TrilinosWrappers::PreconditionIC> Mp_preconditioner;
+
+    bool                                rebuild_saturation_matrix;
+
+    // At the very end we declare a variable that denotes the material
+    // model. Compared to step-21, we do this here as a member variable since
+    // we will want to use it in a variety of places and so having a central
+    // place where such a variable is declared will make it simpler to replace
+    // one class by another (e.g. replace RandomMedium::KInverse by
+    // SingleCurvingCrack::KInverse).
+    const RandomMedium::KInverse<dim>   k_inverse;
+  };
+
+
+  // @sect3{TwoPhaseFlowProblem<dim>::TwoPhaseFlowProblem}
+
+  // The constructor of this class is an extension of the constructors in
+  // step-21 and step-31. We need to add the various variables that concern
+  // the saturation. As discussed in the introduction, we are going to use
+  // $Q_2 \times Q_1$ (Taylor-Hood) elements again for the Darcy system, an
+  // element combination that fulfills the Ladyzhenskaya-Babuska-Brezzi (LBB)
+  // conditions [Brezzi and Fortin 1991, Chen 2005], and $Q_1$ elements for
+  // the saturation. However, by using variables that store the polynomial
+  // degree of the Darcy and temperature finite elements, it is easy to
+  // consistently modify the degree of the elements as well as all quadrature
+  // formulas used on them downstream. Moreover, we initialize the time
+  // stepping variables related to operator splitting as well as the option
+  // for matrix assembly and preconditioning:
+  template <int dim>
+  TwoPhaseFlowProblem<dim>::TwoPhaseFlowProblem (const unsigned int degree)
+    :
+    triangulation (Triangulation<dim>::maximum_smoothing),
+
+    degree (degree),
+    darcy_degree (degree),
+    darcy_fe (FE_Q<dim>(darcy_degree+1), dim,
+              FE_Q<dim>(darcy_degree), 1),
+    darcy_dof_handler (triangulation),
+
+    saturation_degree (degree+1),
+    saturation_fe (saturation_degree),
+    saturation_dof_handler (triangulation),
+
+    saturation_refinement_threshold (0.5),
+
+    time (0),
+    end_time (10),
+
+    current_macro_time_step (0),
+    old_macro_time_step (0),
+
+    time_step (0),
+    old_time_step (0),
+    viscosity (0.2),
+    porosity (1.0),
+    AOS_threshold (3.0),
+
+    rebuild_saturation_matrix (true)
+  {}
+
+
+  // @sect3{TwoPhaseFlowProblem<dim>::setup_dofs}
+
+  // This is the function that sets up the DoFHandler objects we have here
+  // (one for the Darcy part and one for the saturation part) as well as set
+  // to the right sizes the various objects required for the linear algebra in
+  // this program. Its basic operations are similar to what step-31 did.
+  //
+  // The body of the function first enumerates all degrees of freedom for the
+  // Darcy and saturation systems. For the Darcy part, degrees of freedom are
+  // then sorted to ensure that velocities precede pressure DoFs so that we
+  // can partition the Darcy matrix into a $2 \times 2$ matrix.
+  //
+  // Then, we need to incorporate hanging node constraints and Dirichlet
+  // boundary value constraints into darcy_preconditioner_constraints.  The
+  // boundary condition constraints are only set on the pressure component
+  // since the Schur complement preconditioner that corresponds to the porous
+  // media flow operator in non-mixed form, $-\nabla \cdot [\mathbf K
+  // \lambda_t(S)]\nabla$, acts only on the pressure variable. Therefore, we
+  // use a component_mask that filters out the velocity component, so that the
+  // condensation is performed on pressure degrees of freedom only.
+  //
+  // After having done so, we count the number of degrees of freedom in the
+  // various blocks. This information is then used to create the sparsity
+  // pattern for the Darcy and saturation system matrices as well as the
+  // preconditioner matrix from which we build the Darcy preconditioner. As in
+  // step-31, we choose to create the pattern using the blocked version of
+  // DynamicSparsityPattern. So, for this, we follow the same way as step-31
+  // did and we don't have to repeat descriptions again for the rest of the
+  // member function.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::setup_dofs ()
+  {
+    std::vector<unsigned int> darcy_block_component (dim+1,0);
+    darcy_block_component[dim] = 1;
+    {
+      darcy_dof_handler.distribute_dofs (darcy_fe);
+      DoFRenumbering::Cuthill_McKee (darcy_dof_handler);
+      DoFRenumbering::component_wise (darcy_dof_handler, darcy_block_component);
+
+      darcy_constraints.clear ();
+      DoFTools::make_hanging_node_constraints (darcy_dof_handler, darcy_constraints);
+      darcy_constraints.close ();
+    }
+    {
+      saturation_dof_handler.distribute_dofs (saturation_fe);
+
+      saturation_constraints.clear ();
+      DoFTools::make_hanging_node_constraints (saturation_dof_handler, saturation_constraints);
+      saturation_constraints.close ();
+    }
+    {
+      darcy_preconditioner_constraints.clear ();
+
+      FEValuesExtractors::Scalar pressure(dim);
+
+      DoFTools::make_hanging_node_constraints (darcy_dof_handler, darcy_preconditioner_constraints);
+      DoFTools::make_zero_boundary_constraints (darcy_dof_handler, darcy_preconditioner_constraints,
+                                                darcy_fe.component_mask(pressure));
+
+      darcy_preconditioner_constraints.close ();
+    }
+
+
+    std::vector<types::global_dof_index> darcy_dofs_per_block (2);
+    DoFTools::count_dofs_per_block (darcy_dof_handler, darcy_dofs_per_block, darcy_block_component);
+    const unsigned int n_u = darcy_dofs_per_block[0],
+                       n_p = darcy_dofs_per_block[1],
+                       n_s = saturation_dof_handler.n_dofs();
+
+    std::cout << "Number of active cells: "
+              << triangulation.n_active_cells()
+              << " (on "
+              << triangulation.n_levels()
+              << " levels)"
+              << std::endl
+              << "Number of degrees of freedom: "
+              << n_u + n_p + n_s
+              << " (" << n_u << '+' << n_p << '+'<< n_s <<')'
+              << std::endl
+              << std::endl;
+
+    {
+      darcy_matrix.clear ();
+
+      BlockDynamicSparsityPattern dsp (2,2);
+
+      dsp.block(0,0).reinit (n_u, n_u);
+      dsp.block(0,1).reinit (n_u, n_p);
+      dsp.block(1,0).reinit (n_p, n_u);
+      dsp.block(1,1).reinit (n_p, n_p);
+
+      dsp.collect_sizes ();
+
+      Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+
+      for (unsigned int c=0; c<dim+1; ++c)
+        for (unsigned int d=0; d<dim+1; ++d)
+          if (! ((c==dim) && (d==dim)))
+            coupling[c][d] = DoFTools::always;
+          else
+            coupling[c][d] = DoFTools::none;
+
+
+      DoFTools::make_sparsity_pattern (darcy_dof_handler, coupling, dsp,
+                                       darcy_constraints, false);
+
+      darcy_matrix.reinit (dsp);
+    }
+
+    {
+      Amg_preconditioner.reset ();
+      Mp_preconditioner.reset ();
+      darcy_preconditioner_matrix.clear ();
+
+      BlockDynamicSparsityPattern dsp (2,2);
+
+      dsp.block(0,0).reinit (n_u, n_u);
+      dsp.block(0,1).reinit (n_u, n_p);
+      dsp.block(1,0).reinit (n_p, n_u);
+      dsp.block(1,1).reinit (n_p, n_p);
+
+      dsp.collect_sizes ();
+
+      Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+      for (unsigned int c=0; c<dim+1; ++c)
+        for (unsigned int d=0; d<dim+1; ++d)
+          if (c == d)
+            coupling[c][d] = DoFTools::always;
+          else
+            coupling[c][d] = DoFTools::none;
+
+      DoFTools::make_sparsity_pattern (darcy_dof_handler, coupling, dsp,
+                                       darcy_constraints, false);
+
+      darcy_preconditioner_matrix.reinit (dsp);
+    }
+
+
+    {
+      saturation_matrix.clear ();
+
+      DynamicSparsityPattern dsp (n_s, n_s);
+
+      DoFTools::make_sparsity_pattern (saturation_dof_handler, dsp,
+                                       saturation_constraints, false);
+
+
+      saturation_matrix.reinit (dsp);
+    }
+
+    std::vector<IndexSet> darcy_partitioning(2);
+    darcy_partitioning[0] = complete_index_set (n_u);
+    darcy_partitioning[1] = complete_index_set (n_p);
+    darcy_solution.reinit (darcy_partitioning, MPI_COMM_WORLD);
+    darcy_solution.collect_sizes ();
+
+    last_computed_darcy_solution.reinit (darcy_partitioning, MPI_COMM_WORLD);
+    last_computed_darcy_solution.collect_sizes ();
+
+    second_last_computed_darcy_solution.reinit (darcy_partitioning, MPI_COMM_WORLD);
+    second_last_computed_darcy_solution.collect_sizes ();
+
+    darcy_rhs.reinit (darcy_partitioning, MPI_COMM_WORLD);
+    darcy_rhs.collect_sizes ();
+
+    IndexSet saturation_partitioning = complete_index_set(n_s);
+    saturation_solution.reinit (saturation_partitioning, MPI_COMM_WORLD);
+    old_saturation_solution.reinit (saturation_partitioning, MPI_COMM_WORLD);
+    old_old_saturation_solution.reinit (saturation_partitioning, MPI_COMM_WORLD);
+
+    saturation_matching_last_computed_darcy_solution.reinit (saturation_partitioning,
+                                                             MPI_COMM_WORLD);
+
+    saturation_rhs.reinit (saturation_partitioning, MPI_COMM_WORLD);
+  }
+
+
+  // @sect3{Assembling matrices and preconditioners}
+
+  // The next few functions are devoted to setting up the various system and
+  // preconditioner matrices and right hand sides that we have to deal with in
+  // this program.
+
+  // @sect4{TwoPhaseFlowProblem<dim>::assemble_darcy_preconditioner}
+
+  // This function assembles the matrix we use for preconditioning the Darcy
+  // system. What we need are a vector mass matrix weighted by
+  // $\left(\mathbf{K} \lambda_t\right)^{-1}$ on the velocity components and a
+  // mass matrix weighted by $\left(\mathbf{K} \lambda_t\right)$ on the
+  // pressure component. We start by generating a quadrature object of
+  // appropriate order, the FEValues object that can give values and gradients
+  // at the quadrature points (together with quadrature weights). Next we
+  // create data structures for the cell matrix and the relation between local
+  // and global DoFs. The vectors phi_u and grad_phi_p are going to hold the
+  // values of the basis functions in order to faster build up the local
+  // matrices, as was already done in step-22. Before we start the loop over
+  // all active cells, we have to specify which components are pressure and
+  // which are velocity.
+  //
+  // The creation of the local matrix is rather simple. There are only a term
+  // weighted by $\left(\mathbf{K} \lambda_t\right)^{-1}$ (on the velocity)
+  // and a Laplace matrix weighted by $\left(\mathbf{K} \lambda_t\right)$ to
+  // be generated, so the creation of the local matrix is done in essentially
+  // two lines. Since the material model functions at the top of this file
+  // only provide the inverses of the permeability and mobility, we have to
+  // compute $\mathbf K$ and $\lambda_t$ by hand from the given values, once
+  // per quadrature point.
+  //
+  // Once the local matrix is ready (loop over rows and columns in the local
+  // matrix on each quadrature point), we get the local DoF indices and write
+  // the local information into the global matrix. We do this by directly
+  // applying the constraints (i.e. darcy_preconditioner_constraints) that
+  // takes care of hanging node and zero Dirichlet boundary condition
+  // constraints. By doing so, we don't have to do that afterwards, and we
+  // later don't have to use ConstraintMatrix::condense and
+  // MatrixTools::apply_boundary_values, both functions that would need to
+  // modify matrix and vector entries and so are difficult to write for the
+  // Trilinos classes where we don't immediately have access to individual
+  // memory locations.
+  template <int dim>
+  void
+  TwoPhaseFlowProblem<dim>::assemble_darcy_preconditioner ()
+  {
+    std::cout << "   Rebuilding darcy preconditioner..." << std::endl;
+
+    darcy_preconditioner_matrix = 0;
+
+    const QGauss<dim> quadrature_formula(darcy_degree+2);
+    FEValues<dim>     darcy_fe_values (darcy_fe, quadrature_formula,
+                                       update_JxW_values |
+                                       update_values |
+                                       update_gradients |
+                                       update_quadrature_points);
+    FEValues<dim> saturation_fe_values (saturation_fe, quadrature_formula,
+                                        update_values);
+
+    const unsigned int   dofs_per_cell   = darcy_fe.dofs_per_cell;
+    const unsigned int   n_q_points      = quadrature_formula.size();
+
+    std::vector<Tensor<2,dim> >       k_inverse_values (n_q_points);
+
+    std::vector<double>               old_saturation_values (n_q_points);
+
+    FullMatrix<double>                local_matrix (dofs_per_cell, dofs_per_cell);
+    std::vector<types::global_dof_index>         local_dof_indices (dofs_per_cell);
+
+    std::vector<Tensor<1,dim> > phi_u   (dofs_per_cell);
+    std::vector<Tensor<1,dim> > grad_phi_p (dofs_per_cell);
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = darcy_dof_handler.begin_active(),
+    endc = darcy_dof_handler.end();
+    typename DoFHandler<dim>::active_cell_iterator
+    saturation_cell = saturation_dof_handler.begin_active();
+
+    for (; cell!=endc; ++cell, ++saturation_cell)
+      {
+        darcy_fe_values.reinit (cell);
+        saturation_fe_values.reinit (saturation_cell);
+
+        local_matrix = 0;
+
+        saturation_fe_values.get_function_values (old_saturation_solution, old_saturation_values);
+
+        k_inverse.value_list (darcy_fe_values.get_quadrature_points(),
+                              k_inverse_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            const double old_s = old_saturation_values[q];
+
+            const double        inverse_mobility = mobility_inverse(old_s,viscosity);
+            const double        mobility         = 1.0 / inverse_mobility;
+            const Tensor<2,dim> permeability     = invert(k_inverse_values[q]);
+
+            for (unsigned int k=0; k<dofs_per_cell; ++k)
+              {
+                phi_u[k]       = darcy_fe_values[velocities].value (k,q);
+                grad_phi_p[k]  = darcy_fe_values[pressure].gradient (k,q);
+              }
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                {
+                  local_matrix(i,j) += (k_inverse_values[q] * inverse_mobility *
+                                        phi_u[i] * phi_u[j]
+                                        +
+                                        permeability * mobility *
+                                        grad_phi_p[i] * grad_phi_p[j])
+                                       * darcy_fe_values.JxW(q);
+                }
+          }
+
+        cell->get_dof_indices (local_dof_indices);
+        darcy_preconditioner_constraints.distribute_local_to_global (local_matrix,
+            local_dof_indices,
+            darcy_preconditioner_matrix);
+      }
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::build_darcy_preconditioner}
+
+  // After calling the above functions to assemble the preconditioner matrix,
+  // this function generates the inner preconditioners that are going to be
+  // used for the Schur complement block preconditioner. The preconditioners
+  // need to be regenerated at every saturation time step since they depend on
+  // the saturation $S$ that varies with time.
+  //
+  // In here, we set up the preconditioner for the velocity-velocity matrix
+  // $\mathbf{M}^{\mathbf{u}}$ and the Schur complement $\mathbf{S}$. As
+  // explained in the introduction, we are going to use an IC preconditioner
+  // based on the vector matrix $\mathbf{M}^{\mathbf{u}}$ and another based on
+  // the scalar Laplace matrix $\tilde{\mathbf{S}}^p$ (which is spectrally
+  // close to the Schur complement of the Darcy matrix). Usually, the
+  // TrilinosWrappers::PreconditionIC class can be seen as a good black-box
+  // preconditioner which does not need any special knowledge of the matrix
+  // structure and/or the operator that's behind it.
+  template <int dim>
+  void
+  TwoPhaseFlowProblem<dim>::build_darcy_preconditioner ()
+  {
+    assemble_darcy_preconditioner ();
+
+    Amg_preconditioner = std_cxx11::shared_ptr<TrilinosWrappers::PreconditionIC>
+                         (new TrilinosWrappers::PreconditionIC());
+    Amg_preconditioner->initialize(darcy_preconditioner_matrix.block(0,0));
+
+    Mp_preconditioner = std_cxx11::shared_ptr<TrilinosWrappers::PreconditionIC>
+                        (new TrilinosWrappers::PreconditionIC());
+    Mp_preconditioner->initialize(darcy_preconditioner_matrix.block(1,1));
+
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::assemble_darcy_system}
+
+  // This is the function that assembles the linear system for the Darcy
+  // system.
+  //
+  // Regarding the technical details of implementation, the procedures are
+  // similar to those in step-22 and step-31. We reset matrix and vector,
+  // create a quadrature formula on the cells, and then create the respective
+  // FEValues object.
+  //
+  // There is one thing that needs to be commented: since we have a separate
+  // finite element and DoFHandler for the saturation, we need to generate a
+  // second FEValues object for the proper evaluation of the saturation
+  // solution. This isn't too complicated to realize here: just use the
+  // saturation structures and set an update flag for the basis function
+  // values which we need for evaluation of the saturation solution. The only
+  // important part to remember here is that the same quadrature formula is
+  // used for both FEValues objects to ensure that we get matching information
+  // when we loop over the quadrature points of the two objects.
+  //
+  // The declarations proceed with some shortcuts for array sizes, the
+  // creation of the local matrix, right hand side as well as the vector for
+  // the indices of the local dofs compared to the global system.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::assemble_darcy_system ()
+  {
+    darcy_matrix = 0;
+    darcy_rhs    = 0;
+
+    QGauss<dim>   quadrature_formula(darcy_degree+2);
+    QGauss<dim-1> face_quadrature_formula(darcy_degree+2);
+
+    FEValues<dim> darcy_fe_values (darcy_fe, quadrature_formula,
+                                   update_values    | update_gradients |
+                                   update_quadrature_points  | update_JxW_values);
+
+    FEValues<dim> saturation_fe_values (saturation_fe, quadrature_formula,
+                                        update_values);
+
+    FEFaceValues<dim> darcy_fe_face_values (darcy_fe, face_quadrature_formula,
+                                            update_values    | update_normal_vectors |
+                                            update_quadrature_points  | update_JxW_values);
+
+    const unsigned int   dofs_per_cell   = darcy_fe.dofs_per_cell;
+
+    const unsigned int   n_q_points      = quadrature_formula.size();
+    const unsigned int   n_face_q_points = face_quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    const PressureRightHandSide<dim>  pressure_right_hand_side;
+    const PressureBoundaryValues<dim> pressure_boundary_values;
+
+    std::vector<double>               pressure_rhs_values (n_q_points);
+    std::vector<double>               boundary_values (n_face_q_points);
+    std::vector<Tensor<2,dim> >       k_inverse_values (n_q_points);
+
+    // Next we need a vector that will contain the values of the saturation
+    // solution at the previous time level at the quadrature points to
+    // assemble the saturation dependent coefficients in the Darcy equations.
+    //
+    // The set of vectors we create next hold the evaluations of the basis
+    // functions as well as their gradients that will be used for creating the
+    // matrices. Putting these into their own arrays rather than asking the
+    // FEValues object for this information each time it is needed is an
+    // optimization to accelerate the assembly process, see step-22 for
+    // details.
+    //
+    // The last two declarations are used to extract the individual blocks
+    // (velocity, pressure, saturation) from the total FE system.
+    std::vector<double>               old_saturation_values (n_q_points);
+
+    std::vector<Tensor<1,dim> >       phi_u (dofs_per_cell);
+    std::vector<double>               div_phi_u (dofs_per_cell);
+    std::vector<double>               phi_p (dofs_per_cell);
+
+    const FEValuesExtractors::Vector  velocities (0);
+    const FEValuesExtractors::Scalar  pressure (dim);
+
+    // Now start the loop over all cells in the problem. We are working on two
+    // different DoFHandlers for this assembly routine, so we must have two
+    // different cell iterators for the two objects in use. This might seem a
+    // bit peculiar, but since both the Darcy system and the saturation system
+    // use the same grid we can assume that the two iterators run in sync over
+    // the cells of the two DoFHandler objects.
+    //
+    // The first statements within the loop are again all very familiar, doing
+    // the update of the finite element data as specified by the update flags,
+    // zeroing out the local arrays and getting the values of the old solution
+    // at the quadrature points.  At this point we also have to get the values
+    // of the saturation function of the previous time step at the quadrature
+    // points. To this end, we can use the FEValues::get_function_values
+    // (previously already used in step-9, step-14 and step-15), a function
+    // that takes a solution vector and returns a list of function values at
+    // the quadrature points of the present cell. In fact, it returns the
+    // complete vector-valued solution at each quadrature point, i.e. not only
+    // the saturation but also the velocities and pressure.
+    //
+    // Then we are ready to loop over the quadrature points on the cell to do
+    // the integration. The formula for this follows in a straightforward way
+    // from what has been discussed in the introduction.
+    //
+    // Once this is done, we start the loop over the rows and columns of the
+    // local matrix and feed the matrix with the relevant products.
+    //
+    // The last step in the loop over all cells is to enter the local
+    // contributions into the global matrix and vector structures to the
+    // positions specified in local_dof_indices. Again, we let the
+    // ConstraintMatrix class do the insertion of the cell matrix elements to
+    // the global matrix, which already condenses the hanging node
+    // constraints.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = darcy_dof_handler.begin_active(),
+    endc = darcy_dof_handler.end();
+    typename DoFHandler<dim>::active_cell_iterator
+    saturation_cell = saturation_dof_handler.begin_active();
+
+    for (; cell!=endc; ++cell, ++saturation_cell)
+      {
+        darcy_fe_values.reinit (cell);
+        saturation_fe_values.reinit (saturation_cell);
+
+        local_matrix = 0;
+        local_rhs = 0;
+
+        saturation_fe_values.get_function_values (old_saturation_solution, old_saturation_values);
+
+        pressure_right_hand_side.value_list (darcy_fe_values.get_quadrature_points(),
+                                             pressure_rhs_values);
+        k_inverse.value_list (darcy_fe_values.get_quadrature_points(),
+                              k_inverse_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            for (unsigned int k=0; k<dofs_per_cell; ++k)
+              {
+                phi_u[k]     = darcy_fe_values[velocities].value (k,q);
+                div_phi_u[k] = darcy_fe_values[velocities].divergence (k,q);
+                phi_p[k]     = darcy_fe_values[pressure].value (k,q);
+              }
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              {
+                const double old_s = old_saturation_values[q];
+                for (unsigned int j=0; j<=i; ++j)
+                  {
+                    local_matrix(i,j) += (phi_u[i] * k_inverse_values[q] *
+                                          mobility_inverse(old_s,viscosity) * phi_u[j]
+                                          - div_phi_u[i] * phi_p[j]
+                                          - phi_p[i] * div_phi_u[j])
+                                         * darcy_fe_values.JxW(q);
+                  }
+
+                local_rhs(i) += (-phi_p[i] * pressure_rhs_values[q])*
+                                darcy_fe_values.JxW(q);
+              }
+          }
+
+        for (unsigned int face_no=0;
+             face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          if (cell->at_boundary(face_no))
+            {
+              darcy_fe_face_values.reinit (cell, face_no);
+
+              pressure_boundary_values
+              .value_list (darcy_fe_face_values.get_quadrature_points(),
+                           boundary_values);
+
+              for (unsigned int q=0; q<n_face_q_points; ++q)
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  {
+                    const Tensor<1,dim>
+                    phi_i_u = darcy_fe_face_values[velocities].value (i, q);
+
+                    local_rhs(i) += -(phi_i_u *
+                                      darcy_fe_face_values.normal_vector(q) *
+                                      boundary_values[q] *
+                                      darcy_fe_face_values.JxW(q));
+                  }
+            }
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=i+1; j<dofs_per_cell; ++j)
+            local_matrix(i,j) = local_matrix(j,i);
+
+        cell->get_dof_indices (local_dof_indices);
+
+        darcy_constraints.distribute_local_to_global (local_matrix,
+                                                      local_rhs,
+                                                      local_dof_indices,
+                                                      darcy_matrix,
+                                                      darcy_rhs);
+
+      }
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::assemble_saturation_system}
+
+  // This function is to assemble the linear system for the saturation
+  // transport equation. It calls, if necessary, two other member functions:
+  // assemble_saturation_matrix() and assemble_saturation_rhs(). The former
+  // function then assembles the saturation matrix that only needs to be
+  // changed occasionally. On the other hand, the latter function that
+  // assembles the right hand side must be called at every saturation time
+  // step.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::assemble_saturation_system ()
+  {
+    if (rebuild_saturation_matrix == true)
+      {
+        saturation_matrix = 0;
+        assemble_saturation_matrix ();
+      }
+
+    saturation_rhs = 0;
+    assemble_saturation_rhs ();
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::assemble_saturation_matrix}
+
+  // This function is easily understood since it only forms a simple mass
+  // matrix for the left hand side of the saturation linear system by basis
+  // functions phi_i_s and phi_j_s only. Finally, as usual, we enter the local
+  // contribution into the global matrix by specifying the position in
+  // local_dof_indices. This is done by letting the ConstraintMatrix class do
+  // the insertion of the cell matrix elements to the global matrix, which
+  // already condenses the hanging node constraints.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::assemble_saturation_matrix ()
+  {
+    QGauss<dim> quadrature_formula(saturation_degree+2);
+
+    FEValues<dim> saturation_fe_values (saturation_fe, quadrature_formula,
+                                        update_values | update_JxW_values);
+
+    const unsigned int dofs_per_cell = saturation_fe.dofs_per_cell;
+
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = saturation_dof_handler.begin_active(),
+    endc = saturation_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        saturation_fe_values.reinit (cell);
+        local_matrix = 0;
+        local_rhs    = 0;
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              const double phi_i_s = saturation_fe_values.shape_value (i,q);
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                {
+                  const double phi_j_s = saturation_fe_values.shape_value (j,q);
+                  local_matrix(i,j) += porosity * phi_i_s * phi_j_s * saturation_fe_values.JxW(q);
+                }
+            }
+        cell->get_dof_indices (local_dof_indices);
+
+        saturation_constraints.distribute_local_to_global (local_matrix,
+                                                           local_dof_indices,
+                                                           saturation_matrix);
+
+      }
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::assemble_saturation_rhs}
+
+  // This function is to assemble the right hand side of the saturation
+  // transport equation. Before going about it, we have to create two FEValues
+  // objects for the Darcy and saturation systems respectively and, in
+  // addition, two FEFaceValues objects for the two systems because we have a
+  // boundary integral term in the weak form of saturation equation. For the
+  // FEFaceValues object of the saturation system, we also require normal
+  // vectors, which we request using the update_normal_vectors flag.
+  //
+  // Next, before looping over all the cells, we have to compute some
+  // parameters (e.g. global_u_infty, global_S_variation, and
+  // global_Omega_diameter) that the artificial viscosity $\nu$ needs. This is
+  // largely the same as was done in step-31, so you may see there for more
+  // information.
+  //
+  // The real works starts with the loop over all the saturation and Darcy
+  // cells to put the local contributions into the global vector. In this
+  // loop, in order to simplify the implementation, we split some of the work
+  // into two helper functions: assemble_saturation_rhs_cell_term and
+  // assemble_saturation_rhs_boundary_term.  We note that we insert cell or
+  // boundary contributions into the global vector in the two functions rather
+  // than in this present function.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::assemble_saturation_rhs ()
+  {
+    QGauss<dim>   quadrature_formula(saturation_degree+2);
+    QGauss<dim-1> face_quadrature_formula(saturation_degree+2);
+
+    FEValues<dim> saturation_fe_values                   (saturation_fe, quadrature_formula,
+                                                          update_values    | update_gradients |
+                                                          update_quadrature_points  | update_JxW_values);
+    FEValues<dim> darcy_fe_values                        (darcy_fe, quadrature_formula,
+                                                          update_values);
+    FEFaceValues<dim> saturation_fe_face_values          (saturation_fe, face_quadrature_formula,
+                                                          update_values    | update_normal_vectors |
+                                                          update_quadrature_points  | update_JxW_values);
+    FEFaceValues<dim> darcy_fe_face_values               (darcy_fe, face_quadrature_formula,
+                                                          update_values);
+    FEFaceValues<dim> saturation_fe_face_values_neighbor (saturation_fe, face_quadrature_formula,
+                                                          update_values);
+
+    const unsigned int dofs_per_cell = saturation_dof_handler.get_fe().dofs_per_cell;
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    const double                   global_max_u_F_prime = get_max_u_F_prime ();
+    const std::pair<double,double> global_S_range       = get_extrapolated_saturation_range ();
+    const double                   global_S_variation   = global_S_range.second - global_S_range.first;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = saturation_dof_handler.begin_active(),
+    endc = saturation_dof_handler.end();
+    typename DoFHandler<dim>::active_cell_iterator
+    darcy_cell = darcy_dof_handler.begin_active();
+    for (; cell!=endc; ++cell, ++darcy_cell)
+      {
+        saturation_fe_values.reinit (cell);
+        darcy_fe_values.reinit (darcy_cell);
+
+        cell->get_dof_indices (local_dof_indices);
+
+        assemble_saturation_rhs_cell_term (saturation_fe_values,
+                                           darcy_fe_values,
+                                           global_max_u_F_prime,
+                                           global_S_variation,
+                                           local_dof_indices);
+
+        for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          if (cell->at_boundary(face_no))
+            {
+              darcy_fe_face_values.reinit (darcy_cell, face_no);
+              saturation_fe_face_values.reinit (cell, face_no);
+              assemble_saturation_rhs_boundary_term (saturation_fe_face_values,
+                                                     darcy_fe_face_values,
+                                                     local_dof_indices);
+            }
+      }
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::assemble_saturation_rhs_cell_term}
+
+  // This function takes care of integrating the cell terms of the right hand
+  // side of the saturation equation, and then assembling it into the global
+  // right hand side vector. Given the discussion in the introduction, the
+  // form of these contributions is clear. The only tricky part is getting the
+  // artificial viscosity and all that is necessary to compute it. The first
+  // half of the function is devoted to this task.
+  //
+  // The last part of the function is copying the local contributions into the
+  // global vector with position specified in local_dof_indices.
+  template <int dim>
+  void
+  TwoPhaseFlowProblem<dim>::
+  assemble_saturation_rhs_cell_term (const FEValues<dim>             &saturation_fe_values,
+                                     const FEValues<dim>             &darcy_fe_values,
+                                     const double                     global_max_u_F_prime,
+                                     const double                     global_S_variation,
+                                     const std::vector<types::global_dof_index> &local_dof_indices)
+  {
+    const unsigned int dofs_per_cell = saturation_fe_values.dofs_per_cell;
+    const unsigned int n_q_points    = saturation_fe_values.n_quadrature_points;
+
+    std::vector<double>          old_saturation_solution_values(n_q_points);
+    std::vector<double>          old_old_saturation_solution_values(n_q_points);
+    std::vector<Tensor<1,dim> >  old_grad_saturation_solution_values(n_q_points);
+    std::vector<Tensor<1,dim> >  old_old_grad_saturation_solution_values(n_q_points);
+    std::vector<Vector<double> > present_darcy_solution_values(n_q_points, Vector<double>(dim+1));
+
+    saturation_fe_values.get_function_values (old_saturation_solution, old_saturation_solution_values);
+    saturation_fe_values.get_function_values (old_old_saturation_solution, old_old_saturation_solution_values);
+    saturation_fe_values.get_function_gradients (old_saturation_solution, old_grad_saturation_solution_values);
+    saturation_fe_values.get_function_gradients (old_old_saturation_solution, old_old_grad_saturation_solution_values);
+    darcy_fe_values.get_function_values (darcy_solution, present_darcy_solution_values);
+
+    const double nu
+      = compute_viscosity (old_saturation_solution_values,
+                           old_old_saturation_solution_values,
+                           old_grad_saturation_solution_values,
+                           old_old_grad_saturation_solution_values,
+                           present_darcy_solution_values,
+                           global_max_u_F_prime,
+                           global_S_variation,
+                           saturation_fe_values.get_cell()->diameter());
+
+    Vector<double> local_rhs (dofs_per_cell);
+
+    for (unsigned int q=0; q<n_q_points; ++q)
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        {
+          const double old_s = old_saturation_solution_values[q];
+          Tensor<1,dim> present_u;
+          for (unsigned int d=0; d<dim; ++d)
+            present_u[d] = present_darcy_solution_values[q](d);
+
+          const double        phi_i_s      = saturation_fe_values.shape_value (i, q);
+          const Tensor<1,dim> grad_phi_i_s = saturation_fe_values.shape_grad (i, q);
+
+          local_rhs(i) += (time_step *
+                           fractional_flow(old_s,viscosity) *
+                           present_u *
+                           grad_phi_i_s
+                           -
+                           time_step *
+                           nu *
+                           old_grad_saturation_solution_values[q] * grad_phi_i_s
+                           +
+                           porosity * old_s * phi_i_s)
+                          *
+                          saturation_fe_values.JxW(q);
+        }
+
+    saturation_constraints.distribute_local_to_global (local_rhs,
+                                                       local_dof_indices,
+                                                       saturation_rhs);
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::assemble_saturation_rhs_boundary_term}
+
+  // The next function is responsible for the boundary integral terms in the
+  // right hand side form of the saturation equation.  For these, we have to
+  // compute the upwinding flux on the global boundary faces, i.e. we impose
+  // Dirichlet boundary conditions weakly only on inflow parts of the global
+  // boundary. As before, this has been described in step-21 so we refrain
+  // from giving more descriptions about that.
+  template <int dim>
+  void
+  TwoPhaseFlowProblem<dim>::
+  assemble_saturation_rhs_boundary_term (const FEFaceValues<dim>             &saturation_fe_face_values,
+                                         const FEFaceValues<dim>             &darcy_fe_face_values,
+                                         const std::vector<types::global_dof_index>     &local_dof_indices)
+  {
+    const unsigned int dofs_per_cell      = saturation_fe_face_values.dofs_per_cell;
+    const unsigned int n_face_q_points    = saturation_fe_face_values.n_quadrature_points;
+
+    Vector<double> local_rhs (dofs_per_cell);
+
+    std::vector<double>          old_saturation_solution_values_face(n_face_q_points);
+    std::vector<Vector<double> > present_darcy_solution_values_face(n_face_q_points,
+        Vector<double>(dim+1));
+    std::vector<double>          neighbor_saturation (n_face_q_points);
+
+    saturation_fe_face_values.get_function_values (old_saturation_solution,
+                                                   old_saturation_solution_values_face);
+    darcy_fe_face_values.get_function_values (darcy_solution,
+                                              present_darcy_solution_values_face);
+
+    SaturationBoundaryValues<dim> saturation_boundary_values;
+    saturation_boundary_values
+    .value_list (saturation_fe_face_values.get_quadrature_points(),
+                 neighbor_saturation);
+
+    for (unsigned int q=0; q<n_face_q_points; ++q)
+      {
+        Tensor<1,dim> present_u_face;
+        for (unsigned int d=0; d<dim; ++d)
+          present_u_face[d] = present_darcy_solution_values_face[q](d);
+
+        const double normal_flux = present_u_face *
+                                   saturation_fe_face_values.normal_vector(q);
+
+        const bool is_outflow_q_point = (normal_flux >= 0);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          local_rhs(i) -= time_step *
+                          normal_flux *
+                          fractional_flow((is_outflow_q_point == true
+                                           ?
+                                           old_saturation_solution_values_face[q]
+                                           :
+                                           neighbor_saturation[q]),
+                                          viscosity) *
+                          saturation_fe_face_values.shape_value (i,q) *
+                          saturation_fe_face_values.JxW(q);
+      }
+    saturation_constraints.distribute_local_to_global (local_rhs,
+                                                       local_dof_indices,
+                                                       saturation_rhs);
+  }
+
+
+  // @sect3{TwoPhaseFlowProblem<dim>::solve}
+
+  // This function implements the operator splitting algorithm, i.e. in each
+  // time step it either re-computes the solution of the Darcy system or
+  // extrapolates velocity/pressure from previous time steps, then determines
+  // the size of the time step, and then updates the saturation variable. The
+  // implementation largely follows similar code in step-31. It is, next to
+  // the run() function, the central one in this program.
+  //
+  // At the beginning of the function, we ask whether to solve the
+  // pressure-velocity part by evaluating the a posteriori criterion (see the
+  // following function). If necessary, we will solve the pressure-velocity
+  // part using the GMRES solver with the Schur complement block
+  // preconditioner as is described in the introduction.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::solve ()
+  {
+    const bool
+    solve_for_pressure_and_velocity = determine_whether_to_solve_for_pressure_and_velocity ();
+
+    if (solve_for_pressure_and_velocity == true)
+      {
+        std::cout << "   Solving Darcy (pressure-velocity) system..." << std::endl;
+
+        assemble_darcy_system ();
+        build_darcy_preconditioner ();
+
+        {
+          const LinearSolvers::InverseMatrix<TrilinosWrappers::SparseMatrix,
+                TrilinosWrappers::PreconditionIC>
+                mp_inverse (darcy_preconditioner_matrix.block(1,1), *Mp_preconditioner);
+
+          const LinearSolvers::BlockSchurPreconditioner<TrilinosWrappers::PreconditionIC,
+                TrilinosWrappers::PreconditionIC>
+                preconditioner (darcy_matrix, mp_inverse, *Amg_preconditioner);
+
+          SolverControl solver_control (darcy_matrix.m(),
+                                        1e-16*darcy_rhs.l2_norm());
+
+          SolverGMRES<TrilinosWrappers::MPI::BlockVector>
+          gmres (solver_control,
+                 SolverGMRES<TrilinosWrappers::MPI::BlockVector >::AdditionalData(100));
+
+          for (unsigned int i=0; i<darcy_solution.size(); ++i)
+            if (darcy_constraints.is_constrained(i))
+              darcy_solution(i) = 0;
+
+          gmres.solve(darcy_matrix, darcy_solution, darcy_rhs, preconditioner);
+
+          darcy_constraints.distribute (darcy_solution);
+
+          std::cout << "        ..."
+                    << solver_control.last_step()
+                    << " GMRES iterations."
+                    << std::endl;
+        }
+
+        {
+          second_last_computed_darcy_solution              = last_computed_darcy_solution;
+          last_computed_darcy_solution                     = darcy_solution;
+
+          saturation_matching_last_computed_darcy_solution = saturation_solution;
+        }
+      }
+    // On the other hand, if we have decided that we don't want to compute the
+    // solution of the Darcy system for the current time step, then we need to
+    // simply extrapolate the previous two Darcy solutions to the same time as
+    // we would have computed the velocity/pressure at. We do a simple linear
+    // extrapolation, i.e. given the current length $dt$ of the macro time
+    // step from the time when we last computed the Darcy solution to now
+    // (given by <code>current_macro_time_step</code>), and $DT$ the length of
+    // the last macro time step (given by <code>old_macro_time_step</code>),
+    // then we get $u^\ast = u_p + dt \frac{u_p-u_{pp}}{DT} = (1+dt/DT)u_p -
+    // dt/DT u_{pp}$, where $u_p$ and $u_{pp}$ are the last two computed Darcy
+    // solutions. We can implement this formula using just two lines of code.
+    //
+    // Note that the algorithm here only works if we have at least two
+    // previously computed Darcy solutions from which we can extrapolate to
+    // the current time, and this is ensured by requiring re-computation of
+    // the Darcy solution for the first 2 time steps.
+    else
+      {
+        darcy_solution = last_computed_darcy_solution;
+        darcy_solution.sadd (1 + current_macro_time_step / old_macro_time_step,
+                             -current_macro_time_step / old_macro_time_step,
+                             second_last_computed_darcy_solution);
+      }
+
+
+    // With the so computed velocity vector, compute the optimal time step
+    // based on the CFL criterion discussed in the introduction...
+    {
+      old_time_step = time_step;
+
+      const double max_u_F_prime = get_max_u_F_prime();
+      if (max_u_F_prime > 0)
+        time_step = porosity *
+                    GridTools::minimal_cell_diameter(triangulation) /
+                    saturation_degree /
+                    max_u_F_prime / 50;
+      else
+        time_step = end_time - time;
+    }
+
+
+
+    // ...and then also update the length of the macro time steps we use while
+    // we're dealing with time step sizes. In particular, this involves: (i)
+    // If we have just recomputed the Darcy solution, then the length of the
+    // previous macro time step is now fixed and the length of the current
+    // macro time step is, up to now, simply the length of the current (micro)
+    // time step. (ii) If we have not recomputed the Darcy solution, then the
+    // length of the current macro time step has just grown by
+    // <code>time_step</code>.
+    if (solve_for_pressure_and_velocity == true)
+      {
+        old_macro_time_step     = current_macro_time_step;
+        current_macro_time_step = time_step;
+      }
+    else
+      current_macro_time_step += time_step;
+
+    // The last step in this function is to recompute the saturation solution
+    // based on the velocity field we've just obtained. This naturally happens
+    // in every time step, and we don't skip any of these computations. At the
+    // end of computing the saturation, we project back into the allowed
+    // interval $[0,1]$ to make sure our solution remains physical.
+    {
+      std::cout << "   Solving saturation transport equation..." << std::endl;
+
+      assemble_saturation_system ();
+
+      SolverControl solver_control (saturation_matrix.m(),
+                                    1e-16*saturation_rhs.l2_norm());
+      SolverCG<TrilinosWrappers::MPI::Vector> cg (solver_control);
+
+      TrilinosWrappers::PreconditionIC preconditioner;
+      preconditioner.initialize (saturation_matrix);
+
+      cg.solve (saturation_matrix, saturation_solution,
+                saturation_rhs, preconditioner);
+
+      saturation_constraints.distribute (saturation_solution);
+      project_back_saturation ();
+
+      std::cout << "        ..."
+                << solver_control.last_step()
+                << " CG iterations."
+                << std::endl;
+    }
+  }
+
+
+  // @sect3{TwoPhaseFlowProblem<dim>::refine_mesh}
+
+  // The next function does the refinement and coarsening of the mesh. It does
+  // its work in three blocks: (i) Compute refinement indicators by looking at
+  // the gradient of a solution vector extrapolated linearly from the previous
+  // two using the respective sizes of the time step (or taking the only
+  // solution we have if this is the first time step). (ii) Flagging those
+  // cells for refinement and coarsening where the gradient is larger or
+  // smaller than a certain threshold, preserving minimal and maximal levels
+  // of mesh refinement. (iii) Transferring the solution from the old to the
+  // new mesh. None of this is particularly difficult.
+  template <int dim>
+  void
+  TwoPhaseFlowProblem<dim>::
+  refine_mesh (const unsigned int              min_grid_level,
+               const unsigned int              max_grid_level)
+  {
+    Vector<double> refinement_indicators (triangulation.n_active_cells());
+    {
+      const QMidpoint<dim> quadrature_formula;
+      FEValues<dim> fe_values (saturation_fe, quadrature_formula, update_gradients);
+      std::vector<Tensor<1,dim> > grad_saturation (1);
+
+      TrilinosWrappers::MPI::Vector extrapolated_saturation_solution (saturation_solution);
+      if (timestep_number != 0)
+        extrapolated_saturation_solution.sadd ((1. + time_step/old_time_step),
+                                               time_step/old_time_step, old_saturation_solution);
+
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = saturation_dof_handler.begin_active(),
+      endc = saturation_dof_handler.end();
+      for (unsigned int cell_no=0; cell!=endc; ++cell, ++cell_no)
+        {
+          fe_values.reinit(cell);
+          fe_values.get_function_gradients (extrapolated_saturation_solution,
+                                            grad_saturation);
+
+          refinement_indicators(cell_no) = grad_saturation[0].norm();
+        }
+    }
+
+    {
+      typename DoFHandler<dim>::active_cell_iterator
+      cell = saturation_dof_handler.begin_active(),
+      endc = saturation_dof_handler.end();
+
+      for (unsigned int cell_no=0; cell!=endc; ++cell, ++cell_no)
+        {
+          cell->clear_coarsen_flag();
+          cell->clear_refine_flag();
+
+          if ((static_cast<unsigned int>(cell->level()) < max_grid_level) &&
+              (std::fabs(refinement_indicators(cell_no)) > saturation_refinement_threshold))
+            cell->set_refine_flag();
+          else if ((static_cast<unsigned int>(cell->level()) > min_grid_level) &&
+                   (std::fabs(refinement_indicators(cell_no)) < 0.5 * saturation_refinement_threshold))
+            cell->set_coarsen_flag();
+        }
+    }
+
+    triangulation.prepare_coarsening_and_refinement ();
+
+    {
+      std::vector<TrilinosWrappers::MPI::Vector> x_saturation (3);
+      x_saturation[0] = saturation_solution;
+      x_saturation[1] = old_saturation_solution;
+      x_saturation[2] = saturation_matching_last_computed_darcy_solution;
+
+      std::vector<TrilinosWrappers::MPI::BlockVector> x_darcy (2);
+      x_darcy[0] = last_computed_darcy_solution;
+      x_darcy[1] = second_last_computed_darcy_solution;
+
+      SolutionTransfer<dim,TrilinosWrappers::MPI::Vector> saturation_soltrans(saturation_dof_handler);
+
+      SolutionTransfer<dim,TrilinosWrappers::MPI::BlockVector> darcy_soltrans(darcy_dof_handler);
+
+
+      triangulation.prepare_coarsening_and_refinement();
+      saturation_soltrans.prepare_for_coarsening_and_refinement(x_saturation);
+
+      darcy_soltrans.prepare_for_coarsening_and_refinement(x_darcy);
+
+      triangulation.execute_coarsening_and_refinement ();
+      setup_dofs ();
+
+      std::vector<TrilinosWrappers::MPI::Vector> tmp_saturation (3);
+      tmp_saturation[0].reinit (saturation_solution);
+      tmp_saturation[1].reinit (saturation_solution);
+      tmp_saturation[2].reinit (saturation_solution);
+      saturation_soltrans.interpolate(x_saturation, tmp_saturation);
+
+      saturation_solution = tmp_saturation[0];
+      old_saturation_solution = tmp_saturation[1];
+      saturation_matching_last_computed_darcy_solution = tmp_saturation[2];
+
+      std::vector<TrilinosWrappers::MPI::BlockVector> tmp_darcy (2);
+      tmp_darcy[0].reinit (darcy_solution);
+      tmp_darcy[1].reinit (darcy_solution);
+      darcy_soltrans.interpolate(x_darcy, tmp_darcy);
+
+      last_computed_darcy_solution        = tmp_darcy[0];
+      second_last_computed_darcy_solution = tmp_darcy[1];
+
+      rebuild_saturation_matrix    = true;
+    }
+  }
+
+
+
+  // @sect3{TwoPhaseFlowProblem<dim>::output_results}
+
+  // This function generates graphical output. It is in essence a copy of the
+  // implementation in step-31.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::output_results ()  const
+  {
+    const FESystem<dim> joint_fe (darcy_fe, 1,
+                                  saturation_fe, 1);
+    DoFHandler<dim> joint_dof_handler (triangulation);
+    joint_dof_handler.distribute_dofs (joint_fe);
+    Assert (joint_dof_handler.n_dofs() ==
+            darcy_dof_handler.n_dofs() + saturation_dof_handler.n_dofs(),
+            ExcInternalError());
+
+    Vector<double> joint_solution (joint_dof_handler.n_dofs());
+
+    {
+      std::vector<types::global_dof_index> local_joint_dof_indices (joint_fe.dofs_per_cell);
+      std::vector<types::global_dof_index> local_darcy_dof_indices (darcy_fe.dofs_per_cell);
+      std::vector<types::global_dof_index> local_saturation_dof_indices (saturation_fe.dofs_per_cell);
+
+      typename DoFHandler<dim>::active_cell_iterator
+      joint_cell      = joint_dof_handler.begin_active(),
+      joint_endc      = joint_dof_handler.end(),
+      darcy_cell      = darcy_dof_handler.begin_active(),
+      saturation_cell = saturation_dof_handler.begin_active();
+
+      for (; joint_cell!=joint_endc; ++joint_cell, ++darcy_cell, ++saturation_cell)
+        {
+          joint_cell->get_dof_indices (local_joint_dof_indices);
+          darcy_cell->get_dof_indices (local_darcy_dof_indices);
+          saturation_cell->get_dof_indices (local_saturation_dof_indices);
+
+          for (unsigned int i=0; i<joint_fe.dofs_per_cell; ++i)
+            if (joint_fe.system_to_base_index(i).first.first == 0)
+              {
+                Assert (joint_fe.system_to_base_index(i).second
+                        <
+                        local_darcy_dof_indices.size(),
+                        ExcInternalError());
+                joint_solution(local_joint_dof_indices[i])
+                  = darcy_solution(local_darcy_dof_indices[joint_fe.system_to_base_index(i).second]);
+              }
+            else
+              {
+                Assert (joint_fe.system_to_base_index(i).first.first == 1,
+                        ExcInternalError());
+                Assert (joint_fe.system_to_base_index(i).second
+                        <
+                        local_darcy_dof_indices.size(),
+                        ExcInternalError());
+                joint_solution(local_joint_dof_indices[i])
+                  = saturation_solution(local_saturation_dof_indices[joint_fe.system_to_base_index(i).second]);
+              }
+
+        }
+    }
+    std::vector<std::string> joint_solution_names (dim, "velocity");
+    joint_solution_names.push_back ("pressure");
+    joint_solution_names.push_back ("saturation");
+
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    data_component_interpretation
+    (dim, DataComponentInterpretation::component_is_part_of_vector);
+    data_component_interpretation
+    .push_back (DataComponentInterpretation::component_is_scalar);
+    data_component_interpretation
+    .push_back (DataComponentInterpretation::component_is_scalar);
+
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (joint_dof_handler);
+    data_out.add_data_vector (joint_solution, joint_solution_names,
+                              DataOut<dim>::type_dof_data,
+                              data_component_interpretation);
+
+    data_out.build_patches ();
+
+    std::string filename = "solution-" +
+                           Utilities::int_to_string (timestep_number, 5) + ".vtu";
+    std::ofstream output (filename.c_str());
+    data_out.write_vtu (output);
+  }
+
+
+
+  // @sect3{Tool functions}
+
+  // @sect4{TwoPhaseFlowProblem<dim>::determine_whether_to_solve_for_pressure_and_velocity}
+
+  // This function implements the a posteriori criterion for adaptive operator
+  // splitting. The function is relatively straightforward given the way we
+  // have implemented other functions above and given the formula for the
+  // criterion derived in the paper.
+  //
+  // If one decides that one wants the original IMPES method in which the
+  // Darcy equation is solved in every time step, then this can be achieved by
+  // setting the threshold value <code>AOS_threshold</code> (with a default of
+  // $5.0$) to zero, thereby forcing the function to always return true.
+  //
+  // Finally, note that the function returns true unconditionally for the
+  // first two time steps to ensure that we have always solved the Darcy
+  // system at least twice when skipping its solution, thereby allowing us to
+  // extrapolate the velocity from the last two solutions in
+  // <code>solve()</code>.
+  template <int dim>
+  bool
+  TwoPhaseFlowProblem<dim>::determine_whether_to_solve_for_pressure_and_velocity () const
+  {
+    if (timestep_number <= 2)
+      return true;
+
+    const QGauss<dim>  quadrature_formula(saturation_degree+2);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (saturation_fe, quadrature_formula,
+                             update_values | update_quadrature_points);
+
+    std::vector<double> old_saturation_after_solving_pressure (n_q_points);
+    std::vector<double> present_saturation (n_q_points);
+
+    std::vector<Tensor<2,dim> > k_inverse_values (n_q_points);
+
+    double max_global_aop_indicator = 0.0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = saturation_dof_handler.begin_active(),
+    endc = saturation_dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        double max_local_mobility_reciprocal_difference = 0.0;
+        double max_local_permeability_inverse_l1_norm = 0.0;
+
+        fe_values.reinit(cell);
+        fe_values.get_function_values (saturation_matching_last_computed_darcy_solution,
+                                       old_saturation_after_solving_pressure);
+        fe_values.get_function_values (saturation_solution,
+                                       present_saturation);
+
+        k_inverse.value_list (fe_values.get_quadrature_points(),
+                              k_inverse_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            const double mobility_reciprocal_difference
+              = std::fabs(mobility_inverse(present_saturation[q],viscosity)
+                          -
+                          mobility_inverse(old_saturation_after_solving_pressure[q],viscosity));
+
+            max_local_mobility_reciprocal_difference = std::max(max_local_mobility_reciprocal_difference,
+                                                                mobility_reciprocal_difference);
+
+            max_local_permeability_inverse_l1_norm = std::max(max_local_permeability_inverse_l1_norm,
+                                                              l1_norm(k_inverse_values[q]));
+          }
+
+        max_global_aop_indicator = std::max(max_global_aop_indicator,
+                                            (max_local_mobility_reciprocal_difference *
+                                             max_local_permeability_inverse_l1_norm));
+      }
+
+    return (max_global_aop_indicator > AOS_threshold);
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::project_back_saturation}
+
+  // The next function simply makes sure that the saturation values always
+  // remain within the physically reasonable range of $[0,1]$. While the
+  // continuous equations guarantee that this is so, the discrete equations
+  // don't. However, if we allow the discrete solution to escape this range we
+  // get into trouble because terms like $F(S)$ and $F'(S)$ will produce
+  // unreasonable results (e.g. $F'(S)<0$ for $S<0$, which would imply that
+  // the wetting fluid phase flows <i>against</i> the direction of the bulk
+  // fluid velocity)). Consequently, at the end of each time step, we simply
+  // project the saturation field back into the physically reasonable region.
+  template <int dim>
+  void
+  TwoPhaseFlowProblem<dim>::project_back_saturation ()
+  {
+    for (unsigned int i=0; i<saturation_solution.size(); ++i)
+      if (saturation_solution(i) < 0.2)
+        saturation_solution(i) = 0.2;
+      else if (saturation_solution(i) > 1)
+        saturation_solution(i) = 1;
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::get_max_u_F_prime}
+  //
+  // Another simpler helper function: Compute the maximum of the total
+  // velocity times the derivative of the fraction flow function, i.e.,
+  // compute $\|\mathbf{u} F'(S)\|_{L_\infty(\Omega)}$. This term is used in
+  // both the computation of the time step as well as in normalizing the
+  // entropy-residual term in the artificial viscosity.
+  template <int dim>
+  double
+  TwoPhaseFlowProblem<dim>::get_max_u_F_prime () const
+  {
+    const QGauss<dim>  quadrature_formula(darcy_degree+2);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> darcy_fe_values (darcy_fe, quadrature_formula,
+                                   update_values);
+    FEValues<dim> saturation_fe_values (saturation_fe, quadrature_formula,
+                                        update_values);
+
+    std::vector<Vector<double> > darcy_solution_values(n_q_points,
+                                                       Vector<double>(dim+1));
+    std::vector<double>          saturation_values (n_q_points);
+
+    double max_velocity_times_dF_dS = 0;
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = darcy_dof_handler.begin_active(),
+    endc = darcy_dof_handler.end();
+    typename DoFHandler<dim>::active_cell_iterator
+    saturation_cell = saturation_dof_handler.begin_active();
+    for (; cell!=endc; ++cell, ++saturation_cell)
+      {
+        darcy_fe_values.reinit (cell);
+        saturation_fe_values.reinit (saturation_cell);
+
+        darcy_fe_values.get_function_values (darcy_solution, darcy_solution_values);
+        saturation_fe_values.get_function_values (old_saturation_solution, saturation_values);
+
+        for (unsigned int q=0; q<n_q_points; ++q)
+          {
+            Tensor<1,dim> velocity;
+            for (unsigned int i=0; i<dim; ++i)
+              velocity[i] = darcy_solution_values[q](i);
+
+            const double dF_dS = fractional_flow_derivative(saturation_values[q],viscosity);
+
+            max_velocity_times_dF_dS = std::max (max_velocity_times_dF_dS,
+                                                 velocity.norm() * dF_dS);
+          }
+      }
+
+    return max_velocity_times_dF_dS;
+  }
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::get_extrapolated_saturation_range}
+  //
+  // For computing the stabilization term, we need to know the range of the
+  // saturation variable. Unlike in step-31, this range is trivially bounded
+  // by the interval $[0,1]$ but we can do a bit better by looping over a
+  // collection of quadrature points and seeing what the values are there. If
+  // we can, i.e., if there are at least two timesteps around, we can even
+  // take the values extrapolated to the next time step.
+  //
+  // As before, the function is taken with minimal modifications from step-31.
+  template <int dim>
+  std::pair<double,double>
+  TwoPhaseFlowProblem<dim>::get_extrapolated_saturation_range () const
+  {
+    const QGauss<dim>  quadrature_formula(saturation_degree+2);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    FEValues<dim> fe_values (saturation_fe, quadrature_formula,
+                             update_values);
+    std::vector<double> old_saturation_values(n_q_points);
+    std::vector<double> old_old_saturation_values(n_q_points);
+
+    if (timestep_number != 0)
+      {
+        double min_saturation = std::numeric_limits<double>::max(),
+               max_saturation = -std::numeric_limits<double>::max();
+
+        typename DoFHandler<dim>::active_cell_iterator
+        cell = saturation_dof_handler.begin_active(),
+        endc = saturation_dof_handler.end();
+        for (; cell!=endc; ++cell)
+          {
+            fe_values.reinit (cell);
+            fe_values.get_function_values (old_saturation_solution,
+                                           old_saturation_values);
+            fe_values.get_function_values (old_old_saturation_solution,
+                                           old_old_saturation_values);
+
+            for (unsigned int q=0; q<n_q_points; ++q)
+              {
+                const double saturation =
+                  (1. + time_step/old_time_step) * old_saturation_values[q]-
+                  time_step/old_time_step * old_old_saturation_values[q];
+
+                min_saturation = std::min (min_saturation, saturation);
+                max_saturation = std::max (max_saturation, saturation);
+              }
+          }
+
+        return std::make_pair(min_saturation, max_saturation);
+      }
+    else
+      {
+        double min_saturation = std::numeric_limits<double>::max(),
+               max_saturation = -std::numeric_limits<double>::max();
+
+        typename DoFHandler<dim>::active_cell_iterator
+        cell = saturation_dof_handler.begin_active(),
+        endc = saturation_dof_handler.end();
+        for (; cell!=endc; ++cell)
+          {
+            fe_values.reinit (cell);
+            fe_values.get_function_values (old_saturation_solution,
+                                           old_saturation_values);
+
+            for (unsigned int q=0; q<n_q_points; ++q)
+              {
+                const double saturation = old_saturation_values[q];
+
+                min_saturation = std::min (min_saturation, saturation);
+                max_saturation = std::max (max_saturation, saturation);
+              }
+          }
+
+        return std::make_pair(min_saturation, max_saturation);
+      }
+  }
+
+
+
+  // @sect4{TwoPhaseFlowProblem<dim>::compute_viscosity}
+  //
+  // The final tool function is used to compute the artificial viscosity on a
+  // given cell. This isn't particularly complicated if you have the formula
+  // for it in front of you, and looking at the implementation in step-31. The
+  // major difference to that tutorial program is that the velocity here is
+  // not simply $\mathbf u$ but $\mathbf u F'(S)$ and some of the formulas
+  // need to be adjusted accordingly.
+  template <int dim>
+  double
+  TwoPhaseFlowProblem<dim>::
+  compute_viscosity (const std::vector<double>          &old_saturation,
+                     const std::vector<double>          &old_old_saturation,
+                     const std::vector<Tensor<1,dim> >  &old_saturation_grads,
+                     const std::vector<Tensor<1,dim> >  &old_old_saturation_grads,
+                     const std::vector<Vector<double> > &present_darcy_values,
+                     const double                        global_max_u_F_prime,
+                     const double                        global_S_variation,
+                     const double                        cell_diameter) const
+  {
+    const double beta = .4 * dim;
+    const double alpha = 1;
+
+    if (global_max_u_F_prime == 0)
+      return 5e-3 * cell_diameter;
+
+    const unsigned int n_q_points = old_saturation.size();
+
+    double max_residual = 0;
+    double max_velocity_times_dF_dS = 0;
+
+    const bool use_dF_dS = true;
+
+    for (unsigned int q=0; q < n_q_points; ++q)
+      {
+        Tensor<1,dim> u;
+        for (unsigned int d=0; d<dim; ++d)
+          u[d] = present_darcy_values[q](d);
+
+        const double dS_dt = porosity * (old_saturation[q] - old_old_saturation[q])
+                             / old_time_step;
+
+        const double dF_dS = fractional_flow_derivative ((old_saturation[q] + old_old_saturation[q]) / 2.0,viscosity);
+
+        const double u_grad_S = u * dF_dS *
+                                (old_saturation_grads[q] + old_old_saturation_grads[q]) / 2.0;
+
+        const double residual
+          = std::abs((dS_dt + u_grad_S) *
+                     std::pow((old_saturation[q]+old_old_saturation[q]) / 2,
+                              alpha-1.));
+
+        max_residual = std::max (residual,        max_residual);
+        max_velocity_times_dF_dS = std::max (std::sqrt (u*u) *
+                                             (use_dF_dS
+                                              ?
+                                              std::max(dF_dS, 1.)
+                                              :
+                                              1),
+                                             max_velocity_times_dF_dS);
+      }
+
+    const double c_R = 1.0;
+    const double global_scaling = c_R * porosity * (global_max_u_F_prime) * global_S_variation /
+                                  std::pow(global_Omega_diameter, alpha - 2.);
+
+    return (beta *
+            (max_velocity_times_dF_dS) *
+            std::min (cell_diameter,
+                      std::pow(cell_diameter,alpha) *
+                      max_residual / global_scaling));
+  }
+
+
+  // @sect3{TwoPhaseFlowProblem<dim>::run}
+
+  // This function is, besides <code>solve()</code>, the primary function of
+  // this program as it controls the time iteration as well as when the
+  // solution is written into output files and when to do mesh refinement.
+  //
+  // With the exception of the startup code that loops back to the beginning
+  // of the function through the <code>goto start_time_iteration</code> label,
+  // everything should be relatively straightforward. In any case, it mimics
+  // the corresponding function in step-31.
+  template <int dim>
+  void TwoPhaseFlowProblem<dim>::run ()
+  {
+    const unsigned int initial_refinement     = (dim == 2 ? 5 : 2);
+    const unsigned int n_pre_refinement_steps = (dim == 2 ? 3 : 2);
+
+
+    GridGenerator::hyper_cube (triangulation, 0, 1);
+    triangulation.refine_global (initial_refinement);
+    global_Omega_diameter = GridTools::diameter (triangulation);
+
+    setup_dofs ();
+
+    unsigned int pre_refinement_step = 0;
+
+start_time_iteration:
+
+    VectorTools::project (saturation_dof_handler,
+                          saturation_constraints,
+                          QGauss<dim>(saturation_degree+2),
+                          SaturationInitialValues<dim>(),
+                          old_saturation_solution);
+
+    timestep_number = 0;
+    time_step = old_time_step = 0;
+    current_macro_time_step = old_macro_time_step = 0;
+
+    time = 0;
+
+    do
+      {
+        std::cout << "Timestep " << timestep_number
+                  << ":  t=" << time
+                  << ", dt=" << time_step
+                  << std::endl;
+
+        solve ();
+
+        std::cout << std::endl;
+
+        if (timestep_number % 200 == 0)
+          output_results ();
+
+        if (timestep_number % 25 == 0)
+          refine_mesh (initial_refinement,
+                       initial_refinement + n_pre_refinement_steps);
+
+        if ((timestep_number == 0) &&
+            (pre_refinement_step < n_pre_refinement_steps))
+          {
+            ++pre_refinement_step;
+            goto start_time_iteration;
+          }
+
+        time += time_step;
+        ++timestep_number;
+
+        old_old_saturation_solution = old_saturation_solution;
+        old_saturation_solution = saturation_solution;
+      }
+    while (time <= end_time);
+  }
+}
+
+
+
+// @sect3{The <code>main()</code> function}
+//
+// The main function looks almost the same as in all other programs. The need
+// to initialize the MPI subsystem for a program that uses Trilinos -- even
+// for programs that do not actually run in parallel -- is explained in
+// step-31.
+int main (int argc, char *argv[])
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step43;
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv,
+                                                           numbers::invalid_unsigned_int);
+
+      // This program can only be run in serial. Otherwise, throw an exception.
+      AssertThrow(Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD)==1,
+                  ExcMessage("This program can only be run in serial, use ./step-43"));
+
+      TwoPhaseFlowProblem<2> two_phase_flow_problem(1);
+      two_phase_flow_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-44/CMakeLists.txt b/examples/step-44/CMakeLists.txt
new file mode 100644
index 0000000..1176109
--- /dev/null
+++ b/examples/step-44/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-44 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-44")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-44/doc/builds-on b/examples/step-44/doc/builds-on
new file mode 100644
index 0000000..fa2a1ea
--- /dev/null
+++ b/examples/step-44/doc/builds-on
@@ -0,0 +1 @@
+step-8 step-18
diff --git a/examples/step-44/doc/intro.dox b/examples/step-44/doc/intro.dox
new file mode 100644
index 0000000..d33136b
--- /dev/null
+++ b/examples/step-44/doc/intro.dox
@@ -0,0 +1,681 @@
+<br>
+
+<i>This program was contributed by Jean-Paul Pelteret and Andrew McBride.
+<br>
+This material is based upon work supported by  the German Science Foundation (Deutsche
+Forschungsgemeinschaft, DFG), grant STE 544/39-1,  and the National Research Foundation of South Africa. 
+</i>
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+The subject of this tutorial is nonlinear solid mechanics. 
+Classical single-field approaches (see e.g. step-18) can not correctly describe the response of quasi-incompressible materials. 
+The response is overly stiff; a phenomenon known as locking.
+Locking problems can be circumvented using a variety of alternative strategies. 
+One such strategy is the  three-field formulation.
+It is used here  to model the three-dimensional, fully-nonlinear (geometrical and material) response of an isotropic continuum body.
+The material response is approximated as hyperelastic.
+Additionally, the three-field formulation employed is valid for quasi-incompressible as well as compressible materials.
+
+The objective of this presentation is to provide a basis for using deal.II for problems in nonlinear solid mechanics.
+The linear problem was addressed in step-8.
+A non-standard, hypoelastic-type form of the geometrically nonlinear problem was partially considered in step-18: a rate form of the linearised constitutive relations is used and the problem domain evolves with the motion.
+Important concepts surrounding the nonlinear kinematics are absent in the theory and implementation.
+Step-18 does, however, describe many of the key concepts to implement elasticity within the framework of deal.II.
+
+We begin with a crash-course in nonlinear kinematics.
+For the sake of simplicity, we restrict our attention to the quasi-static problem.
+Thereafter, various key stress measures are introduced and the constitutive model described.
+We then describe the three-field formulation in detail prior to explaining the structure of the class used to manage the material. 
+The setup of the example problem is then presented.
+
+ at note This tutorial has been developed for the problem of elasticity in three dimensions.
+ While the space dimension could be changed in the main() routine, care needs to be taken.
+ Two-dimensional elasticity problems, in general, exist only as idealisations of three-dimensional ones.
+ That is, they are either plane strain or plane stress. 
+ The assumptions that follow either of these choices needs to be consistently imposed. 
+ For more information see the note in step-8.
+
+<h3>List of references</h3>
+
+The three-field formulation implemented here was pioneered by Simo et al. (1985) and is known as the mixed Jacobian-pressure formulation.
+Important related contributions include those by Simo and Taylor (1991), and Miehe (1994).
+The notation adopted here draws heavily on the excellent overview of the theoretical aspects of nonlinear solid mechanics by Holzapfel (2001). 
+A nice overview of issues pertaining to incompressible elasticity (at small strains) is given in Hughes (2000).
+
+<ol>
+	<li> J.C. Simo, R.L. Taylor and K.S. Pister (1985),
+		Variational and projection methods for the volume constraint in finite deformation elasto-plasticity,
+		<em> Computer Methods in Applied Mechanics and Engineering </em>,
+		<strong> 51 </strong>, 1-3,
+		177-208;
+	<li> J.C. Simo and R.L. Taylor (1991),
+  		Quasi-incompressible finite elasticity in principal stretches. Continuum
+			basis and numerical algorithms,
+		<em> Computer Methods in Applied Mechanics and Engineering </em>,
+		<strong> 85 </strong>, 3,
+		273-310;
+	<li> C. Miehe (1994),
+		Aspects of the formulation and finite element implementation of large 	strain isotropic elasticity
+		<em> International Journal for Numerical Methods in Engineering </em>
+		<strong> 37 </strong>, 12,
+		1981-2004;
+	<li> G.A. Holzapfel (2001),
+		Nonlinear Solid Mechanics. A Continuum Approach for Engineering,
+		John Wiley & Sons;
+	<li> T.J.R. Hughes (2000),
+		The Finite Element Method: Linear Static and Dynamic Finite Element Analysis,
+		Dover.
+</ol>
+
+
+<h3> Notation </h3>
+
+One can think of fourth-order tensors as linear operators mapping second-order
+tensors (matrices) onto themselves in much the same way as matrices map
+vectors onto vectors.
+There are various fourth-order unit tensors that will be required in the forthcoming presentation.
+The fourth-order unit tensors $\mathcal{I}$ and $\overline{\mathcal{I}}$ are defined by
+ at f[
+	\mathbf{A} = \mathcal{I}:\mathbf{A}
+		\qquad \text{and} \qquad
+	\mathbf{A}^T = \overline{\mathcal{I}}:\mathbf{A} \, .
+ at f]
+Note $\mathcal{I} \neq \overline{\mathcal{I}}^T$.
+Furthermore, we define the symmetric and skew-symmetric fourth-order unit tensors by
+ at f[
+	\mathcal{S} := \dfrac{1}{2}[\mathcal{I} + \overline{\mathcal{I}}]
+		\qquad \text{and} \qquad
+	\mathcal{W} := \dfrac{1}{2}[\mathcal{I} - \overline{\mathcal{I}}] \, ,
+ at f]
+such that
+ at f[
+	\dfrac{1}{2}[\mathbf{A} + \mathbf{A}^T] = \mathcal{S}:\mathbf{A}
+		\qquad \text{and} \qquad
+	\dfrac{1}{2}[\mathbf{A} - \mathbf{A}^T] = \mathcal{W}:\mathbf{A} \, .
+ at f]
+The fourth-order <code>SymmetricTensor</code> returned by identity_tensor() is $\mathcal{S}$.
+
+
+<h3>Kinematics</h3>
+
+Let the time domain be denoted $\mathbb{T} = [0,T_{\textrm{end}}]$, where $t \in \mathbb{T}$ and $T_{\textrm{end}}$ is the total problem duration.
+Consider a continuum body that occupies the reference configuration $\Omega_0$ at time $t=0$.
+Particles in the reference configuration are identified by the position vector $\mathbf{X}$.
+The configuration of the body at a later time $t>0$ is termed the current configuration, denoted $\Omega$, with particles identified by the vector $\mathbf{x}$.
+The nonlinear map between the reference and current configurations, denoted $\boldsymbol{\varphi}$, acts as follows:
+ at f[
+	\mathbf{x} = \boldsymbol{\varphi}(\mathbf{X},t) \, .
+ at f]
+The material description of the displacement of a particle is defined by
+ at f[
+	\mathbf{U}(\mathbf{X},t) = \mathbf{x}(\mathbf{X},t) - \mathbf{X} \, .
+ at f]
+
+The deformation gradient $\mathbf{F}$ is defined as the material gradient of the motion:
+ at f[
+	\mathbf{F}(\mathbf{X},t)
+		:= \dfrac{\partial \boldsymbol{\varphi}(\mathbf{X},t)}{\partial \mathbf{X}}
+		= \textrm{Grad}\ \mathbf{x}(\mathbf{X},t)
+		= \mathbf{I} + \textrm{Grad}\ \mathbf{U} \, .
+ at f]
+The determinant of the of the deformation gradient
+$J(\mathbf{X},t):= \textrm{det}\ \mathbf{F}(\mathbf{X},t) > 0$
+maps corresponding volume elements in the reference and current configurations, denoted
+$\textrm{d}V$ and $\textrm{d}v$,
+respectively, as
+ at f[
+	\textrm{d}v = J(\mathbf{X},t)\; \textrm{d}V \, .
+ at f]
+
+Two important measures of the deformation in terms of the spatial and material coordinates are the left and right Cauchy-Green tensors, respectively, 
+and denoted $\mathbf{b} := \mathbf{F}\mathbf{F}^T$ and $\mathbf{C} := \mathbf{F}^T\mathbf{F}$.
+They are both symmetric and positive definite.
+
+The Green-Lagrange strain tensor is defined by
+ at f[
+	\mathbf{E}:= \frac{1}{2}[\mathbf{C} - \mathbf{I} ]
+		= \underbrace{\frac{1}{2}[\textrm{Grad}^T \mathbf{U} +	\textrm{Grad}\mathbf{U}]}_{\boldsymbol{\varepsilon}}
+			+ \frac{1}{2}[\textrm{Grad}^T\ \mathbf{U}][\textrm{Grad}\ \mathbf{U}] \, .
+ at f]
+If the assumption of infinitesimal deformations is made, then the second term
+on the right can be neglected, and $\boldsymbol{\varepsilon}$ (the linearised
+strain tensor) is the only component of the strain tensor.
+This assumption is, looking at the setup of the problem, not valid in step-18,
+making the use of the linearized $\boldsymbol{\varepsilon}$ as the strain
+measure in that tutorial program questionable.
+
+In order to handle the different response that materials exhibit when subjected to bulk and shear type deformations we consider the following decomposition of the deformation gradient $\mathbf{F}$  and the left Cauchy-Green tensor $\mathbf{b}$ into volume-changing (volumetric) and volume-preserving (isochoric) parts:
+ at f[
+	\mathbf{F}
+		= (J^{1/3}\mathbf{I})\overline{\mathbf{F}}
+	\qquad \text{and} \qquad
+	\mathbf{b}
+        = (J^{2/3}\mathbf{I})\overline{\mathbf{F}}\,\overline{\mathbf{F}}^T
+		=  (J^{2/3}\mathbf{I})\overline{\mathbf{b}} \, .
+ at f]
+Clearly, $\textrm{det}\ \mathbf{F} = \textrm{det}\ (J^{1/3}\mathbf{I}) = J$.
+
+The spatial velocity field is denoted $\mathbf{v}(\mathbf{x},t)$.
+The derivative of the spatial velocity field with respect to the spatial coordinates gives the spatial velocity gradient $\mathbf{l}(\mathbf{x},t)$, that is
+ at f[
+	\mathbf{l}(\mathbf{x},t)
+		:= \dfrac{\partial \mathbf{v}(\mathbf{x},t)}{\partial \mathbf{x}}
+		= \textrm{grad}\ \mathbf{v}(\mathbf{x},t) \, ,
+ at f]
+where $\textrm{grad} \{\bullet \}
+= \frac{\partial \{ \bullet \} }{ \partial \mathbf{x}}
+= \frac{\partial \{ \bullet \} }{ \partial \mathbf{X}}\frac{\partial \mathbf{X} }{ \partial \mathbf{x}}
+= \textrm{Grad} \{ \bullet \} \mathbf{F}^{-1}$.
+
+
+<h3>Kinetics</h3>
+
+Cauchy's stress theorem equates the Cauchy traction $\mathbf{t}$ acting on an infinitesimal surface element in the current configuration $\mathrm{d}a$ to the product of the Cauchy stress tensor $\boldsymbol{\sigma}$ (a spatial quantity)  and the outward unit normal to the surface $\mathbf{n}$ as
+ at f[
+	\mathbf{t}(\mathbf{x},t, \mathbf{n}) = \boldsymbol{\sigma}\mathbf{n} \, .
+ at f]
+The Cauchy stress is symmetric.
+Similarly,  the first Piola-Kirchhoff traction $\mathbf{T}$ which acts on an infinitesimal surface element in the reference configuration $\mathrm{d}A$ is the product of the first Piola-Kirchhoff stress tensor $\mathbf{P}$ (a two-point tensor)  and the outward unit normal to the surface $\mathbf{N}$ as
+ at f[
+	\mathbf{T}(\mathbf{X},t, \mathbf{N}) = \mathbf{P}\mathbf{N} \, .
+ at f]
+The Cauchy traction $\mathbf{t}$ and the first Piola-Kirchhoff traction $\mathbf{T}$ are related as
+ at f[
+	\mathbf{t}\mathrm{d}a = \mathbf{T}\mathrm{d}A \, .
+ at f]
+This can be demonstrated using <a href="http://en.wikipedia.org/wiki/Finite_strain_theory">Nanson's formula</a>.
+
+The first Piola-Kirchhoff stress tensor is related to the Cauchy stress as
+ at f[
+	\mathbf{P} = J \boldsymbol{\sigma}\mathbf{F}^{-T} \, .
+ at f]
+Further important stress measures are the (spatial) Kirchhoff stress  $\boldsymbol{\tau} = J \boldsymbol{\sigma}$
+and the (referential) second Piola-Kirchhoff stress
+$\mathbf{S} = {\mathbf{F}}^{-1} \boldsymbol{\tau} {\mathbf{F}}^{-T}$.
+
+
+<h3> Push-forward and pull-back operators </h3>
+
+Push-forward and pull-back operators allow one to transform various measures between the material and spatial settings.
+The stress measures used here are contravariant, while the strain measures are covariant.
+
+The push-forward and-pull back operations for second-order covariant tensors $(\bullet)^{\text{cov}}$ are respectively given by:
+ at f[
+	\chi_{*}(\bullet)^{\text{cov}}:= \mathbf{F}^{-T} (\bullet)^{\text{cov}} \mathbf{F}^{-1}
+	\qquad \text{and} \qquad
+	\chi^{-1}_{*}(\bullet)^{\text{cov}}:= \mathbf{F}^{T} (\bullet)^{\text{cov}} \mathbf{F} \, .
+ at f]
+
+The push-forward and pull back operations for second-order contravariant tensors $(\bullet)^{\text{con}}$ are respectively given by:
+ at f[
+	\chi_{*}(\bullet)^{\text{con}}:= \mathbf{F} (\bullet)^{\text{con}} \mathbf{F}^T
+	\qquad \text{and} \qquad
+	\chi^{-1}_{*}(\bullet)^{\text{con}}:= \mathbf{F}^{-1} (\bullet)^{\text{con}} \mathbf{F}^{-T} \, .
+ at f]
+For example $\boldsymbol{\tau} = \chi_{*}(\mathbf{S})$.
+
+
+<h3>Hyperelastic materials</h3>
+
+A hyperelastic material response is governed by a Helmholtz free energy function $\Psi = \Psi(\mathbf{F}) = \Psi(\mathbf{C}) = \Psi(\mathbf{b})$ which serves as a potential for the stress.
+For example, if the Helmholtz free energy depends on the right Cauchy-Green tensor $\mathbf{C}$ then the isotropic hyperelastic response is
+ at f[
+	\mathbf{S}
+		= 2 \dfrac{\partial \Psi(\mathbf{C})}{\partial \mathbf{C}} \, .
+ at f]
+If the Helmholtz free energy depends on the left Cauchy-Green tensor $\mathbf{b}$ then the isotropic hyperelastic response is
+ at f[
+	\boldsymbol{\tau}
+		= 2 \dfrac{\partial \Psi(\mathbf{b})}{\partial \mathbf{b}} \mathbf{b}
+		=  2 \mathbf{b} \dfrac{\partial \Psi(\mathbf{b})}{\partial \mathbf{b}} \, .
+ at f]
+
+Following the multiplicative decomposition of the deformation gradient, the Helmholtz free energy can be decomposed as
+ at f[
+	\Psi(\mathbf{b}) = \Psi_{\text{vol}}(J) + \Psi_{\text{iso}}(\overline{\mathbf{b}}) \, .
+ at f]
+Similarly, the Kirchhoff stress can be decomposed into volumetric and isochoric parts as $\boldsymbol{\tau} = \boldsymbol{\tau}_{\text{vol}} + \boldsymbol{\tau}_{\text{iso}}$ where:
+ at f{align*}
+	\boldsymbol{\tau}_{\text{vol}} &=
+		2 \mathbf{b} \dfrac{\partial \Psi_{\textrm{vol}}(J)}{\partial \mathbf{b}}
+		\\
+		&= p J\mathbf{I} \, ,
+		\\
+	\boldsymbol{\tau}_{\text{iso}} &=
+		2 \mathbf{b} \dfrac{\partial \Psi_{\textrm{iso}} (\overline{\mathbf{b}})}{\partial \mathbf{b}}
+		\\
+		&= \underbrace{( \mathcal{I} - \dfrac{1}{3} \mathbf{I} \otimes \mathbf{I})}_{\mathbb{P}} : \overline{\boldsymbol{\tau}} \, ,
+ at f}
+where
+$p := \dfrac{\partial \Psi_{\text{vol}}(J)}{\partial J}$ is the pressure response. 
+$\mathbb{P}$ is the projection tensor which provides the deviatoric operator in the Eulerian setting. 
+The fictitious Kirchhoff stress tensor $\overline{\boldsymbol{\tau}}$ is defined by
+ at f[
+	\overline{\boldsymbol{\tau}}
+		:= 2 \overline{\mathbf{b}} \dfrac{\partial \Psi_{\textrm{iso}}(\overline{\mathbf{b}})}{\partial \overline{\mathbf{b}}} \, .
+ at f]
+
+
+ at note The pressure response as defined above differs from the widely-used definition of the
+pressure in solid mechanics as
+$p = - 1/3 \textrm{tr} \boldsymbol{\sigma} = - 1/3 J^{-1} \textrm{tr} \boldsymbol{\tau}$.
+Here $p$ is the hydrostatic pressure.
+We make use of the pressure response throughout this tut (although we refer to it as the pressure). 
+
+<h4> Neo-Hookean materials </h4>
+
+The Helmholtz free energy corresponding to a compressible <a href="http://en.wikipedia.org/wiki/Neo-Hookean_solid">neo-Hookean material</a> is given by
+ at f[
+    \Psi \equiv
+        \underbrace{\kappa [ \mathcal{G}(J) ] }_{\Psi_{\textrm{vol}}(J)}
+        + \underbrace{\bigl[c_1 [ \overline{I}_1 - 3] \bigr]}_{\Psi_{\text{iso}}(\overline{\mathbf{b}})} \, ,
+ at f]
+where $\kappa := \lambda + 2/3 \mu$ is the bulk modulus ($\lambda$ and $\mu$ are the Lame parameters)
+and $\overline{I}_1 := \textrm{tr}\ \overline{\mathbf{b}}$.
+The function $\mathcal{G}(J)$ is required to be strictly convex and satisfy the condition $\mathcal{G}(1) = 0$, 
+among others, see Holzapfel (2001) for further details.
+In this work $\mathcal{G}:=\frac{1}{4} [ J^2 - 1 - 2\textrm{ln}J ]$.
+
+Incompressibility imposes the isochoric constraint that $J=1$ for all motions $\boldsymbol{\varphi}$.
+The Helmholtz free energy corresponding to an incompressible neo-Hookean material is given by
+ at f[
+    \Psi \equiv
+        \underbrace{\bigl[ c_1 [ I_1 - 3] \bigr] }_{\Psi_{\textrm{iso}}(\mathbf{b})} \, ,
+ at f]
+where $ I_1 := \textrm{tr}\mathbf{b} $.
+Thus, the incompressible response is obtained by removing the volumetric component from the compressible free energy and enforcing $J=1$.
+
+
+<h3>Elasticity tensors</h3>
+
+We will use a Newton-Raphson strategy to solve the nonlinear boundary value problem.
+Thus, we will need to linearise the constitutive relations.
+
+The fourth-order elasticity tensor in the material description is defined by
+ at f[
+	\mathfrak{C}
+		= 2\dfrac{\partial \mathbf{S}(\mathbf{C})}{\partial \mathbf{C}}
+		= 4\dfrac{\partial^2 \Psi(\mathbf{C})}{\partial \mathbf{C} \partial \mathbf{C}} \, .
+ at f]
+The fourth-order elasticity tensor in the spatial description $\mathfrak{c}$ is obtained from the push-forward of $\mathfrak{C}$ as
+ at f[
+	\mathfrak{c} = J^{-1} \chi_{*}(\mathfrak{C})
+		\qquad \text{and thus} \qquad
+	J\mathfrak{c} = 4 \mathbf{b} \dfrac{\partial^2 \Psi(\mathbf{b})} {\partial \mathbf{b} \partial \mathbf{b}} \mathbf{b}	\, .
+ at f]
+The fourth-order elasticity tensors (for hyperelastic materials) possess both major and minor symmetries.
+
+The fourth-order spatial elasticity tensor can be written in the following decoupled form:
+ at f[
+	\mathfrak{c} = \mathfrak{c}_{\text{vol}} + \mathfrak{c}_{\text{iso}} \, ,
+ at f]
+where
+ at f{align*}
+	J \mathfrak{c}_{\text{vol}}
+		&= 4 \mathbf{b} \dfrac{\partial^2 \Psi_{\text{vol}}(J)} {\partial \mathbf{b} \partial \mathbf{b}} \mathbf{b}
+		\\
+		&= J[\widehat{p}\, \mathbf{I} \otimes \mathbf{I} - 2p \mathcal{I}]
+			\qquad \text{where} \qquad
+		\widehat{p} := p + \dfrac{\textrm{d} p}{\textrm{d}J} \, ,
+		\\
+	J \mathfrak{c}_{\text{iso}}
+		&=  4 \mathbf{b} \dfrac{\partial^2 \Psi_{\text{iso}}(\overline{\mathbf{b}})} {\partial \mathbf{b} \partial \mathbf{b}} \mathbf{b}
+		\\
+		&= \mathbb{P} : \mathfrak{\overline{c}} : \mathbb{P}
+			+ \dfrac{2}{3}[\overline{\boldsymbol{\tau}}:\mathbf{I}]\mathbb{P}
+			- \dfrac{2}{3}[ \mathbf{I}\otimes\boldsymbol{\tau}_{\text{iso}}
+				+ \boldsymbol{\tau}_{\text{iso}} \otimes \mathbf{I} ] \, ,
+ at f}
+where the fictitious elasticity tensor $\overline{\mathfrak{c}}$ in the spatial description is defined by
+ at f[
+	\overline{\mathfrak{c}}
+		= 4 \overline{\mathbf{b}} \dfrac{ \partial^2 \Psi_{\textrm{iso}}(\overline{\mathbf{b}})} {\partial \overline{\mathbf{b}} \partial \overline{\mathbf{b}}} \overline{\mathbf{b}} \, .
+ at f]
+
+<h3>Principle of stationary potential energy and the three-field formulation</h3>
+
+The total potential energy of the system $\Pi$ is the sum of the internal and external potential energies, denoted $\Pi_{\textrm{int}}$ and $\Pi_{\textrm{ext}}$, respectively.
+We wish to find the equilibrium configuration by minimising the potential energy.
+
+As mentioned above, we adopt a three-field formulation.
+We denote the set of primary unknowns by
+$\mathbf{\Xi}:= \{ \mathbf{u}, \widetilde{p}, \widetilde{J} \}$.
+The independent kinematic variable $\widetilde{J}$ enters the formulation as a constraint on $J$ enforced by the Lagrange multiplier $\widetilde{p}$ (the pressure, as we shall see).
+
+The three-field variational principle used here is given by
+ at f[
+	\Pi(\mathbf{\Xi}) := \int_\Omega \bigl[
+		\Psi_{\textrm{vol}}(\widetilde{J})
+		+ \widetilde{p}\,[J(\mathbf{u}) - \widetilde{J}]
+		+ \Psi_{\textrm{iso}}(\overline{\mathbf{b}}(\mathbf{u}))
+		\bigr] \textrm{d}v
+	+ 	\Pi_{\textrm{ext}} \, ,
+ at f]
+where the external potential is defined by
+ at f[
+	\Pi_{\textrm{ext}}
+		= - \int_\Omega \mathbf{b}^\text{p} \cdot \mathbf{u}~\textrm{d}v
+			- \int_{\partial \Omega_{\sigma}} \mathbf{t}^\text{p} \cdot \mathbf{u}~\textrm{d}a \, .
+ at f]
+The boundary of the current configuration  $\partial \Omega$ is composed into two parts as
+$\partial \Omega = \partial \Omega_{\mathbf{u}} \cup \partial \Omega_{\sigma}$,
+where
+$\partial \Omega_{\mathbf{u}} \cap \partial \Omega_{\boldsymbol{\sigma}} = \emptyset$.
+The prescribed Cauchy traction, denoted $\mathbf{t}^\text{p}$, is applied to $ \partial \Omega_{\boldsymbol{\sigma}}$ while the motion is prescribed on the remaining portion of the boundary $\partial \Omega_{\mathbf{u}}$.
+The body force per unit current volume is denoted $\mathbf{b}^\text{p}$.
+
+
+
+The stationarity of the potential follows as
+ at f{align*}
+	R(\mathbf\Xi;\delta \mathbf{\Xi})
+		&= D_{\delta \mathbf{\Xi}}\Pi(\mathbf{\Xi})
+		\\
+		&= \dfrac{\partial \Pi(\mathbf{\Xi})}{\partial \mathbf{u}} \cdot \delta \mathbf{u}
+			+ \dfrac{\partial \Pi(\mathbf{\Xi})}{\partial \widetilde{p}} \delta \widetilde{p}
+			+ \dfrac{\partial \Pi(\mathbf{\Xi})}{\partial \widetilde{J}} \delta \tilde{J}
+			\\
+		&= \int_{\Omega_0}  \left[
+			\textrm{grad}\ \delta\mathbf{u} : [ \underbrace{[\widetilde{p} J \mathbf{I}]}_{\equiv \boldsymbol{\tau}_{\textrm{vol}}}
+            +  \boldsymbol{\tau}_{\textrm{iso}}]
+			+ \delta \widetilde{p}\, [ J(\mathbf{u}) - \widetilde{J}]
+			+ \delta \widetilde{J}\left[ \dfrac{\textrm{d} \Psi_{\textrm{vol}}(\widetilde{J})}{\textrm{d} \widetilde{J}}
+            -\widetilde{p}\right]
+			\right]~\textrm{d}V
+			\\
+		&\quad - \int_{\Omega_0} \delta \mathbf{u} \cdot \mathbf{B}^\text{p}~\textrm{d}V
+			- \int_{\partial \Omega_{0,\boldsymbol{\sigma}}} \delta \mathbf{u} \cdot \mathbf{T}^\text{p}~\textrm{d}A
+			\\
+		&=0 \, ,
+ at f}
+for all virtual displacements $\delta \mathbf{u} \in H^1(\Omega)$ subject to the constraint that $\delta \mathbf{u} = \mathbf{0}$ on $\partial \Omega_{\mathbf{u}}$, and all virtual pressures $\delta \widetilde{p} \in L^2(\Omega)$ and virtual dilatations $\delta \widetilde{J} \in L^2(\Omega)$.
+
+One should note that the definitions of the volumetric Kirchhoff stress in the three field formulation
+$\boldsymbol{\tau}_{\textrm{vol}} \equiv \widetilde{p} J \mathbf{I}$
+ and the subsequent volumetric tangent differs slightly from the general form given in the section on hyperelastic materials where 
+$\boldsymbol{\tau}_{\textrm{vol}} \equiv p J\mathbf{I}$.
+This is because the pressure $\widetilde{p}$ is now a primary field as opposed to a constitutively derived quantity.
+One needs to carefully distinguish between the primary fields and those obtained from the constitutive relations. 
+
+ at note Although the variables are all expressed in terms of spatial quantities, the domain of integration is the initial configuration.
+This approach is called a <em> total-Lagrangian formulation </em>.
+The approach given in step-18, where the domain of integration is the current configuration, could be called an <em> updated Lagrangian formulation </em>. 
+The various merits of these two approaches are discussed widely in the literature.
+It should be noted however that they are equivalent.
+
+
+The Euler-Lagrange equations corresponding to the residual are:
+ at f{align*}
+	&\textrm{div}\ \boldsymbol{\sigma} + \mathbf{b}^\text{p} = \mathbf{0} && \textrm{[equilibrium]}
+		\\
+	&J(\mathbf{u}) = \widetilde{J} 		&& \textrm{[dilatation]}
+		\\
+	&\widetilde{p} = \dfrac{\textrm{d} \Psi_{\textrm{vol}}(\widetilde{J})}{\textrm{d} \widetilde{J}} && \textrm{[pressure]} \, .
+ at f}
+The first equation is the (quasi-static) equilibrium equation in the spatial setting.
+The second is the constraint that $J(\mathbf{u}) = \widetilde{J}$.
+The third is the definition of the pressure $\widetilde{p}$.
+
+ at note The simplified single-field derivation ($\mathbf{u}$ is the only primary variable) below makes it clear how we transform the limits of integration to the reference domain:
+ at f{align*}
+\int_{\Omega}\delta \mathbf{u} \cdot [ \boldsymbol{\sigma} + \mathbf{b}^\text{p}]~\mathrm{d}v
+&=
+\int_{\Omega} [-\mathrm{grad}\delta \mathbf{u}:\boldsymbol{\sigma} + \delta \mathbf{u} \cdot\mathbf{b}^\text{p}]~\mathrm{d}v
+  + \int_{\partial \Omega} \delta \mathbf{u} \cdot \mathbf{t}^\text{p}~\mathrm{d}a \\
+&=
+- \int_{\Omega_0} \mathrm{grad}\delta \mathbf{u}:\boldsymbol{\tau}~\mathrm{d}V 
++ \int_{\Omega_0} \delta \mathbf{u} \cdot J\mathbf{b}^\text{p}~\mathrm{d}V
+ + \int_{\partial \Omega_0} \delta \mathbf{u} \cdot \mathbf{T}^\text{p}~\mathrm{d}A \\
+&=
+- \int_{\Omega_0} \mathrm{grad}\delta \mathbf{u}:\boldsymbol{\tau}~\mathrm{d}V 
++ \int_{\Omega_0} \delta \mathbf{u} \cdot \mathbf{B}^\text{p}~\mathrm{d}V
+ + \int_{\partial \Omega_{0,\sigma}} \delta \mathbf{u} \cdot \mathbf{T}^\text{p}~\mathrm{d}A \\
+&=
+- \int_{\Omega_0} [\mathrm{grad}\delta\mathbf{u}]^{\text{sym}} :\boldsymbol{\tau}~\mathrm{d}V 
++ \int_{\Omega_0} \delta \mathbf{u} \cdot \mathbf{B}^\text{p}~\mathrm{d}V
+ + \int_{\partial \Omega_{0,\sigma}} \delta \mathbf{u} \cdot \mathbf{T}^\text{p}~\mathrm{d}A \, ,
+ at f}
+where 
+$[\mathrm{grad}\delta\mathbf{u}]^{\text{sym}} = 1/2[ \mathrm{grad}\delta\mathbf{u} + [\mathrm{grad}\delta\mathbf{u}]^T] $.
+
+We will use an iterative Newton-Raphson method to solve the nonlinear residual equation $R$.
+For the sake of simplicity we assume dead loading, i.e. the loading does not change due to the deformation.
+
+The change in a quantity between the known state at $t_{\textrm{n}-1}$
+and the currently unknown state at $t_{\textrm{n}}$ is denoted 
+$\varDelta \{ \bullet \} = { \{ \bullet \} }^{\textrm{n}} - { \{ \bullet \} }^{\textrm{n-1}}$.
+The value of a quantity at the current iteration $\textrm{i}$ is denoted 
+${ \{ \bullet \} }^{\textrm{n}}_{\textrm{i}} = { \{ \bullet \} }_{\textrm{i}}$.
+The incremental change between iterations $\textrm{i}$ and $\textrm{i}+1$ is denoted
+$d \{ \bullet \} := \{ \bullet \}_{\textrm{i}+1} - \{ \bullet \}_{\textrm{i}}$.
+
+Assume that the state of the system is known for some iteration $\textrm{i}$.
+The linearised approximation to nonlinear governing equations to be solved using the  Newton-Raphson method is:
+Find $d \mathbf{\Xi}$ such that
+ at f[
+	R(\mathbf{\Xi}_{\mathsf{i}+1}) =
+		R(\mathbf{\Xi}_{\mathsf{i}})
+		+ D^2_{d \mathbf{\Xi}, \delta \mathbf{\Xi}} \Pi(\mathbf{\Xi_{\mathsf{i}}}) \cdot d \mathbf{\Xi} \equiv 0 \, ,
+ at f]
+then set
+$\mathbf{\Xi}_{\textrm{i}+1} = \mathbf{\Xi}_{\textrm{i}}
++ d \mathbf{\Xi}$.
+The tangent is given by
+
+ at f[
+	D^2_{d \mathbf{\Xi}, \delta \mathbf{\Xi}} \Pi( \mathbf{\Xi}_{\mathsf{i}} )
+		= D_{d \mathbf{\Xi}} R( \mathbf{\Xi}_{\mathsf{i}}; \delta \mathbf{\Xi})
+		=: K(\mathbf{\Xi}_{\mathsf{i}}; d \mathbf{\Xi}, \delta \mathbf{\Xi}) \, .
+ at f]
+Thus,
+ at f{align*}
+ 	K(\mathbf{\Xi}_{\mathsf{i}}; d \mathbf{\Xi}, \delta \mathbf{\Xi})
+ 		&=
+ 			D_{d \mathbf{u}} R( \mathbf{\Xi}_{\mathsf{i}}; \delta \mathbf{\Xi}) \cdot d \mathbf{u}
+ 			\\
+ 				&\quad +
+ 			 	D_{d \widetilde{p}} R( \mathbf{\Xi}_{\mathsf{i}}; \delta \mathbf{\Xi})  d \widetilde{p}
+ 			 \\
+ 			 	&\quad +
+ 			  D_{d \widetilde{J}} R( \mathbf{\Xi}_{\mathsf{i}}; \delta \mathbf{\Xi})  d \widetilde{J} \, ,
+ at f}
+where
+ at f{align*}
+	D_{d \mathbf{u}} R( \mathbf{\Xi}; \delta \mathbf{\Xi})
+ 	&=
+ 	\int_{\Omega_0} \bigl[ \textrm{grad}\ \delta \mathbf{u} :
+ 			\textrm{grad}\ d \mathbf{u} [\boldsymbol{\tau}_{\textrm{iso}} + \boldsymbol{\tau}_{\textrm{vol}}]
+ 			+ \textrm{grad}\ \delta \mathbf{u} :[
+             \underbrace{[\widetilde{p}J[\mathbf{I}\otimes\mathbf{I} - 2 \mathcal{I}]}_{\equiv J\mathfrak{c}_{\textrm{vol}}} +
+             J\mathfrak{c}_{\textrm{iso}}] :\textrm{grad} d \mathbf{u}
+ 		\bigr]~\textrm{d}V \, ,
+ 		\\
+ 	&\quad + \int_{\Omega_0} \delta \widetilde{p} J \mathbf{I} : \textrm{grad}\ d \mathbf{u} ~\textrm{d}V
+ 	\\
+ 	D_{d \widetilde{p}} R( \mathbf{\Xi}; \delta \mathbf{\Xi})
+ 	&=
+ 	\int_{\Omega_0} \textrm{grad}\ \delta \mathbf{u} : J \mathbf{I} d \widetilde{p} ~\textrm{d}V
+ 		-  \int_{\Omega_0} \delta \widetilde{J} d \widetilde{p}  ~\textrm{d}V \, ,
+ 	\\
+ 	D_{d \widetilde{J}} R( \mathbf{\Xi}; \delta \mathbf{\Xi})
+ 	&=  -\int_{\Omega_0} \delta \widetilde{p} d \widetilde{J}~\textrm{d}V
+ 	 + \int_{\Omega_0} \delta \widetilde{J}  \dfrac{\textrm{d}^2 \Psi_{\textrm{vol}}(\widetilde{J})}{\textrm{d} \widetilde{J}\textrm{d}\widetilde{J}} d \widetilde{J} ~\textrm{d}V \, .
+ at f}
+
+Note that the following terms are termed the geometrical stress and  the material contributions to the tangent matrix:
+ at f{align*}
+& \int_{\Omega_0} \textrm{grad}\ \delta \mathbf{u} :
+ 			\textrm{grad}\ d \mathbf{u} [\boldsymbol{\tau}_{\textrm{iso}} +  \boldsymbol{\tau}_{\textrm{vol}}]~\textrm{d}V
+ 			&& \quad {[\textrm{Geometrical stress}]} \, ,
+ 		\\
+& \int_{\Omega_0} \textrm{grad} \delta \mathbf{u} :
+ 			[J\mathfrak{c}_{\textrm{vol}} + J\mathfrak{c}_{\textrm{iso}}] :\textrm{grad}\ d \mathbf{u}
+ 		~\textrm{d}V
+ 		&& \quad {[\textrm{Material}]} \, .
+ at f}
+
+
+<h3> Discretisation of governing equations </h3>
+
+The three-field formulation used here is effective for quasi-incompressible materials,
+that is where $\nu \rightarrow 0.5$ (where $\nu$ is <a
+href="http://en.wikipedia.org/wiki/Poisson's_ratio">Poisson's ratio</a>), subject to a good choice of the interpolation fields
+for $\mathbf{u},~\widetilde{p}$ and $\widetilde{J}$.
+Typically a choice of $Q_n \times DGPM_{n-1} \times DGPM_{n-1}$ is made. 
+Here $DGPM$ is the FE_DGPMonomial class.
+A popular choice is $Q_1 \times DGPM_0 \times DGPM_0$ which is known as the mean dilatation method (see Hughes (2000) for an intuitive discussion).
+This code can accommodate a $Q_n \times DGPM_{n-1} \times DGPM_{n-1}$ formulation.
+The discontinuous approximation
+allows $\widetilde{p}$ and $\widetilde{J}$ to be condensed out
+and a classical displacement based method is recovered.
+
+For fully-incompressible materials $\nu = 0.5$ and the three-field formulation will still exhibit
+locking behaviour.
+This can be overcome by introducing an additional constraint into the free energy of the form
+$\int_{\Omega_0} \Lambda [ \widetilde{J} - 1]~\textrm{d}V$.
+Here $\Lambda$ is a Lagrange multiplier to enforce the isochoric constraint.
+For further details see Miehe (1994).
+
+The linearised problem can be written as
+ at f[
+	\mathbf{\mathsf{K}}( \mathbf{\Xi}_{\textrm{i}}) d\mathbf{\Xi}
+	=
+	\mathbf{ \mathsf{F}}(\mathbf{\Xi}_{\textrm{i}})
+ at f]
+where
+ at f{align*}
+		\underbrace{\begin{bmatrix}
+			\mathbf{\mathsf{K}}_{uu}	&	\mathbf{\mathsf{K}}_{u\widetilde{p}}	& \mathbf{0}
+			\\
+			\mathbf{\mathsf{K}}_{\widetilde{p}u}	&	\mathbf{0}	&	\mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}
+			\\
+			\mathbf{0}	& 	\mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}		& \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+		\end{bmatrix}}_{\mathbf{\mathsf{K}}(\mathbf{\Xi}_{\textrm{i}})}
+		\underbrace{\begin{bmatrix}
+			d \mathbf{\mathsf{u}}\\
+            d \widetilde{\mathbf{\mathsf{p}}} \\
+            d \widetilde{\mathbf{\mathsf{J}}}
+		\end{bmatrix}}_{d \mathbf{\Xi}}
+        =
+        \underbrace{\begin{bmatrix}
+			-\mathbf{\mathsf{R}}_{u}(\mathbf{u}_{\textrm{i}}) \\
+            -\mathbf{\mathsf{R}}_{\widetilde{p}}(\widetilde{p}_{\textrm{i}}) \\
+           -\mathbf{\mathsf{R}}_{\widetilde{J}}(\widetilde{J}_{\textrm{i}})
+		\end{bmatrix}}_{ -\mathbf{\mathsf{R}}(\mathbf{\Xi}_{\textrm{i}}) }
+=
+        \underbrace{\begin{bmatrix}
+			\mathbf{\mathsf{F}}_{u}(\mathbf{u}_{\textrm{i}}) \\
+            \mathbf{\mathsf{F}}_{\widetilde{p}}(\widetilde{p}_{\textrm{i}}) \\
+           \mathbf{\mathsf{F}}_{\widetilde{J}}(\widetilde{J}_{\textrm{i}})
+		\end{bmatrix}}_{ \mathbf{\mathsf{F}}(\mathbf{\Xi}_{\textrm{i}}) } \, .
+ at f}
+
+There are no derivatives of the pressure and dilatation (primary) variables present in the formulation. 
+Thus the discontinuous finite element interpolation of the pressure and dilatation yields a block
+diagonal matrix for
+$\mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}$,
+$\mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}$ and 
+$\mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}$.
+Therefore we can easily express the fields $\widetilde{p}$ and $\widetilde{J}$ on each cell simply
+by inverting a local matrix and multiplying it by the local right hand
+side. We can then insert the result into the remaining equations and recover
+a classical displacement-based method.
+In order to condense out the pressure and dilatation contributions at the element level we need the following results:
+ at f{align*}
+		d \widetilde{\mathbf{\mathsf{p}}}
+		& = \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \bigl[
+			 \mathbf{\mathsf{F}}_{\widetilde{J}}
+			 - \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}} d \widetilde{\mathbf{\mathsf{J}}} \bigr]
+			\\
+		d \widetilde{\mathbf{\mathsf{J}}}
+		& = \mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1} \bigl[
+			\mathbf{\mathsf{F}}_{\widetilde{p}}
+			- \mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}}
+			\bigr]
+		\\
+		 \Rightarrow d \widetilde{\mathbf{\mathsf{p}}}
+		&=  \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \mathbf{\mathsf{F}}_{\widetilde{J}}
+		- \underbrace{\bigl[\mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+		\mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1}\bigr]}_{\overline{\mathbf{\mathsf{K}}}}\bigl[ \mathbf{\mathsf{F}}_{\widetilde{p}}
+ 		- \mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}} \bigr]
+ at f}
+and thus
+ at f[
+		\underbrace{\bigl[ \mathbf{\mathsf{K}}_{uu} + \overline{\overline{\mathbf{\mathsf{K}}}}~ \bigr]
+		}_{\mathbf{\mathsf{K}}_{\textrm{con}}} d \mathbf{\mathsf{u}}
+		=
+        \underbrace{
+		\Bigl[
+		\mathbf{\mathsf{F}}_{u}
+			- \mathbf{\mathsf{K}}_{u\widetilde{p}} \bigl[ \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \mathbf{\mathsf{F}}_{\widetilde{J}}
+			- \overline{\mathbf{\mathsf{K}}}\mathbf{\mathsf{F}}_{\widetilde{p}} \bigr]
+		\Bigr]}_{\mathbf{\mathsf{F}}_{\textrm{con}}}
+ at f]
+where
+ at f[
+		\overline{\overline{\mathbf{\mathsf{K}}}} :=
+			\mathbf{\mathsf{K}}_{u\widetilde{p}} \overline{\mathbf{\mathsf{K}}} \mathbf{\mathsf{K}}_{\widetilde{p}u} \, .
+ at f]
+Note that due to the choice of $\widetilde{p}$ and $\widetilde{J}$ as discontinuous at the element level, all matrices that need to be inverted are defined at the element level.
+
+The procedure to construct the various contributions is as follows:
+- Construct $\mathbf{\mathsf{K}}$. 
+- Form $\mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1}$ for element and store where $\mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}$ was stored in $\mathbf{\mathsf{K}}$.
+- Form $\overline{\overline{\mathbf{\mathsf{K}}}}$ and add to $\mathbf{\mathsf{K}}_{uu}$ to get $\mathbf{\mathsf{K}}_{\textrm{con}}$
+- The modified system matrix is called ${\mathbf{\mathsf{K}}}_{\textrm{store}}$. 
+  That is
+  @f[
+        \mathbf{\mathsf{K}}_{\textrm{store}}
+:=
+        \begin{bmatrix}
+			\mathbf{\mathsf{K}}_{\textrm{con}}	&	\mathbf{\mathsf{K}}_{u\widetilde{p}}	& \mathbf{0}
+			\\
+			\mathbf{\mathsf{K}}_{\widetilde{p}u}	&	\mathbf{0}	&	\mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1}
+			\\
+			\mathbf{0}	& 	\mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}		& \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+		\end{bmatrix} \, .
+  @f]
+
+
+<h3> The material class </h3>
+
+A good object-oriented design of a Material class would facilitate the extension of this tutorial to a wide range of material types. 
+In this tutorial we simply have one Material class named Material_Compressible_Neo_Hook_Three_Field.
+Ideally this class would derive from a class HyperelasticMaterial which would derive from the base class Material.
+The three-field nature of the formulation used here also complicates the matter. 
+
+The Helmholtz free energy function for the three field formulation is $\Psi = \Psi_\text{vol}(\widetilde{J}) + \Psi_\text{iso}(\overline{\mathbf{b}})$. 
+The isochoric part of the Kirchhoff stress ${\boldsymbol{\tau}}_{\text{iso}}(\overline{\mathbf{b}})$ is identical to that obtained using a one-field formulation for a hyperelastic material. 
+However, the volumetric part of the free energy is now a function of the primary variable $\widetilde{J}$. 
+Thus, for a three field formulation the constitutive response for the volumetric part of the Kirchhoff stress ${\boldsymbol{\tau}}_{\text{vol}}$ (and the tangent) is not given by the hyperelastic constitutive law as in a one-field formulation. 
+One can label the term
+$\boldsymbol{\tau}_{\textrm{vol}} \equiv \widetilde{p} J \mathbf{I}$
+as the volumetric Kirchhoff stress, but the pressure $\widetilde{p}$ is not derived from the free energy; it is a primary field.  
+
+In order to have a flexible approach, it was decided that the Material_Compressible_Neo_Hook_Three_Field would still be able to calculate and return a volumetric Kirchhoff stress and tangent. 
+In order to do this, we choose to store the interpolated primary fields $\widetilde{p}$ and $\widetilde{J}$ in the Material_Compressible_Neo_Hook_Three_Field class associated with the quadrature point. 
+This decision should be revisited at a later stage when the tutorial is extended to account for other materials.
+
+
+<h3> Numerical example </h3>
+
+The numerical example considered here is a nearly-incompressible block under compression.
+This benchmark problem is taken from
+- S. Reese, P. Wriggers, B.D. Reddy (2000),
+  A new locking-free brick element technique for large deformation problems in elasticity,
+  <em> Computers and Structures </em>,
+  <strong> 75 </strong>,
+  291-304.
+
+ <img src="http://www.dealii.org/images/steps/developer/step-44.setup.png" alt="">
+
+The material is quasi-incompressible neo-Hookean with <a href="http://en.wikipedia.org/wiki/Shear_modulus">shear modulus</a> $\mu = 80.194e6$ and $\nu = 0.4999$.
+For such a choice of material properties a conventional single-field $Q_1$ approach would lock.
+That is, the response would be overly stiff.
+The initial and final configurations are shown in the image above.
+Using symmetry, we solve for only one quarter of the geometry (i.e. a cube with dimension $0.001$).
+The inner-quarter of the upper surface of the domain is subject to a load of $p_0$. 
+ 
+ 
diff --git a/examples/step-44/doc/kind b/examples/step-44/doc/kind
new file mode 100644
index 0000000..56e049c
--- /dev/null
+++ b/examples/step-44/doc/kind
@@ -0,0 +1 @@
+solids
diff --git a/examples/step-44/doc/results.dox b/examples/step-44/doc/results.dox
new file mode 100644
index 0000000..22756cc
--- /dev/null
+++ b/examples/step-44/doc/results.dox
@@ -0,0 +1,279 @@
+<h1>Results</h1>
+
+Firstly, we present a comparison of a series of results with those
+in the literature (see Reese et al (2000)) to demonstrate that the program works as expected.
+
+We begin with a comparison of the convergence with mesh refinement for the $Q_1-DGPM_0-DGPM_0$ and
+$Q_2-DGPM_1-DGPM_1$ formulations, as summarised in the figure below.
+The vertical displacement of the midpoint of the upper surface of the block is used to assess convergence.
+Both schemes demonstrate good convergence properties for varying values of the load parameter $p/p_0$.
+The results agree with those in the literature.
+The lower-order formulation typically overestimates the displacement for low levels of refinement,
+while the higher-order interpolation scheme underestimates it, but be a lesser degree.
+This benchmark, and a series of others not shown here, give us confidence that the code is working
+as it should.
+
+<table align="center" class="tutorial" cellspacing="3" cellpadding="3">
+  <tr>
+     <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q1-P0_convergence.png" alt="">
+	<p align="center">
+        Convergence of the $Q_1-DGPM_0-DGPM_0$ formulation.
+	</p>
+    </td>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q2-P1_convergence.png" alt="">
+	<p align="center">
+        Convergence of the $Q_2-DGPM_1-DGPM_1$ formulation.
+	</p>
+    </td>
+  </tr>
+</table>
+
+
+A typical screen output generated by running the problem is shown below.
+The particular case demonstrated is that of the $Q_2-DGPM_1-DGPM_1$ formulation.
+It is clear that, using the Newton-Raphson method, quadratic convergence of the solution is obtained.
+Solution convergence is achieved within 5 Newton increments for all time-steps.
+The converged displacement's $L_2$-norm is several orders of magnitude less than the geometry scale.
+
+ at code
+Grid:
+	 Reference volume: 1e-09
+Triangulation:
+	 Number of active cells: 64
+	 Number of degrees of freedom: 2699
+    Setting up quadrature point data...
+
+Timestep 1 @ 0.1s
+___________________________________________________________________________________________________________________________________________________________
+                 SOLVER STEP                   |  LIN_IT   LIN_RES    RES_NORM     RES_U     RES_P      RES_J     NU_NORM      NU_U       NU_P       NU_J
+___________________________________________________________________________________________________________________________________________________________
+  0  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     786  2.118e-06  1.000e+00  1.000e+00  0.000e+00  0.000e+00  1.000e+00  1.000e+00  1.000e+00  1.000e+00
+  1  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     552  1.031e-03  8.563e-02  8.563e-02  9.200e-13  3.929e-08  1.060e-01  3.816e-02  1.060e-01  1.060e-01
+  2  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     667  5.602e-06  2.482e-03  2.482e-03  3.373e-15  2.982e-10  2.936e-03  2.053e-04  2.936e-03  2.936e-03
+  3  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     856  6.469e-10  2.129e-06  2.129e-06  2.245e-19  1.244e-13  1.887e-06  7.289e-07  1.887e-06  1.887e-06
+  4  ASM_R  CONVERGED!
+___________________________________________________________________________________________________________________________________________________________
+Relative errors:
+Displacement:	7.289e-07
+Force: 		2.451e-10
+Dilatation:	1.353e-07
+v / V_0:	1.000e-09 / 1.000e-09 = 1.000e+00
+
+
+[...]
+
+Timestep 10 @ 1.000e+00s
+___________________________________________________________________________________________________________________________________________________________
+                 SOLVER STEP                   |  LIN_IT   LIN_RES    RES_NORM     RES_U     RES_P      RES_J     NU_NORM      NU_U       NU_P       NU_J
+___________________________________________________________________________________________________________________________________________________________
+  0  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     874  2.358e-06  1.000e+00  1.000e+00  1.000e+00  1.000e+00  1.000e+00  1.000e+00  1.000e+00  1.000e+00
+  1  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     658  2.942e-04  1.544e-01  1.544e-01  1.208e+13  1.855e+06  6.014e-02  7.398e-02  6.014e-02  6.014e-02
+  2  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     790  2.206e-06  2.908e-03  2.908e-03  7.302e+10  2.067e+03  2.716e-03  1.433e-03  2.716e-03  2.717e-03
+  3  ASM_R  ASM_K  CST  ASM_SC  SLV  PP  UQPH  |     893  2.374e-09  1.919e-06  1.919e-06  4.527e+07  4.100e+00  1.672e-06  6.842e-07  1.672e-06  1.672e-06
+  4  ASM_R  CONVERGED!
+___________________________________________________________________________________________________________________________________________________________
+Relative errors:
+Displacement:	6.842e-07
+Force: 		8.995e-10
+Dilatation:	1.528e-06
+v / V_0:	1.000e-09 / 1.000e-09 = 1.000e+00
+ at endcode
+
+
+
+Using the Timer class, we can discern which parts of the code require the highest computational expense.
+For a case with a large number of degrees-of-freedom (i.e. a high level of refinement), a typical output of the Timer is given below.
+Much of the code in the tutorial has been developed based on the optimisations described,
+discussed and demonstrated in Step-18 and others.
+With over 93% of the time being spent in the linear solver, it is obvious that it may be necessary
+to invest in a better solver for large three-dimensional problems.
+The SSOR preconditioner is not multithreaded but is effective for this class of solid problems.
+It may be beneficial to investigate the use of another solver such as those available through the Trilinos library.
+
+
+ at code
++---------------------------------------------+------------+------------+
+| Total wallclock time elapsed since start    | 9.874e+02s |            |
+|                                             |            |            |
+| Section                         | no. calls |  wall time | % of total |
++---------------------------------+-----------+------------+------------+
+| Assemble system right-hand side |        53 | 1.727e+00s |  1.75e-01% |
+| Assemble tangent matrix         |        43 | 2.707e+01s |  2.74e+00% |
+| Linear solver                   |        43 | 9.248e+02s |  9.37e+01% |
+| Linear solver postprocessing    |        43 | 2.743e-02s |  2.78e-03% |
+| Perform static condensation     |        43 | 1.437e+01s |  1.46e+00% |
+| Setup system                    |         1 | 3.897e-01s |  3.95e-02% |
+| Update QPH data                 |        43 | 5.770e-01s |  5.84e-02% |
++---------------------------------+-----------+------------+------------+
+ at endcode
+
+
+We then used ParaView to visualise the results for two cases.
+The first was for the coarsest grid and the lowest-order interpolation method: $Q_1-DGPM_0-DGPM_0$.
+The second was on a refined grid using a $Q_2-DGPM_1-DGPM_1$ formulation.
+The vertical component of the displacement, the pressure $\widetilde{p}$ and the dilatation $\widetilde{J}$ fields
+are shown below.
+
+
+For the first case it is clear that the coarse spatial discretisation coupled with large displacements leads to a low quality solution
+(the loading ratio is  $p/p_0=80$).
+Additionally, the pressure difference between elements is very large.
+The constant pressure field on the element means that the large pressure gradient is not captured.
+However, it should be noted that locking, which would be present in a standard $Q_1$ displacement formulation does not arise
+even in this poorly discretised case.
+The final vertical displacement of the tracked node on the top surface of the block is still within 12.5% of the converged solution.
+The pressure solution is very coarse and has large jumps between adjacent cells.
+It is clear that the volume nearest to the applied traction undergoes compression while the outer extents
+of the domain are in a state of expansion.
+The dilatation solution field and pressure field are clearly linked,
+with positive dilatation indicating regions of positive pressure and negative showing regions placed in compression.
+As discussed in the Introduction, a compressive pressure has a negative sign
+while an expansive pressure takes a positive sign.
+This stems from the definition of the volumetric strain energy function
+and is opposite to the physically realistic interpretation of pressure.
+
+
+<table align="center" class="tutorial" cellspacing="3" cellpadding="3">
+  <tr>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q1-P0_gr_1_p_ratio_80-displacement.png" alt="">
+	<p align="center">
+        Z-displacement solution.
+	</p>
+    </td>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q1-P0_gr_1_p_ratio_80-pressure.png" alt="">
+	<p align="center">
+        Discontinuous piece-wise constant pressure field.
+	</p>
+    </td>
+     <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q1-P0_gr_1_p_ratio_80-dilatation.png" alt="">
+	<p align="center">
+        Discontinuous piece-wise constant dilatation field.
+	</p>
+    </td>
+  </tr>
+</table>
+
+Combining spatial refinement and a higher-order interpolation scheme results in a high-quality solution.
+Three grid refinements coupled with a $Q_2-DGPM_1-DGPM_1$ formulation produces
+a result that clearly captures the mechanics of the problem.
+The deformation of the traction surface is well resolved.
+We can now observe the actual extent of the applied traction, with the maximum force being applied
+at the central point of the surface causing the largest compression.
+Even though very high strains are experienced in the domain,
+especially at the boundary of the region of applied traction,
+the solution remains accurate.
+The pressure field is captured in far greater detail than before.
+There is a clear distinction and transition between regions of compression and expansion,
+and the linear approximation of the pressure field allows a refined visualisation
+of the pressure at the sub-element scale.
+It should however be noted that the pressure field remains discontinuous
+ and could be smoothed on a continuous grid for the post-processing purposes.
+
+
+
+<table align="center" class="tutorial" cellspacing="3" cellpadding="3">
+
+  <tr>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q2-P1_gr_3_p_ratio_80-displacement.png" alt="">
+	<p align="center">
+        Z-displacement solution.
+	</p>
+    </td>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q2-P1_gr_3_p_ratio_80-pressure.png" alt="">
+	<p align="center">
+        Discontinuous linear pressure field.
+	</p>
+    </td>
+    <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Q2-P1_gr_3_p_ratio_80-dilatation.png" alt="">
+	<p align="center">
+        Discontinuous linear dilatation field.
+	</p>
+    </td>
+  </tr>
+</table>
+
+This brief analysis of the results demonstrates that the three-field formulation is effective
+in circumventing volumetric locking for highly-incompressible media.
+The mixed formulation is able to accurately simulate the displacement of a
+near-incompressible block under compression.
+The command-line output indicates that the volumetric change under extreme compression resulted in
+less than 0.01% volume change for a Poisson's ratio of 0.4999.
+
+In terms of run-time, the $Q_2-DGPM_1-DGPM_1$ formulation tends to be more computationally expensive
+than the $Q_1-DGPM_0-DGPM_0$ for a similar number of degrees-of-freedom
+(produced by adding an extra grid refinement level for the lower-order interpolation).
+This is shown in the graph below for a batch of tests run consecutively on a single 4-core (8-thread) machine.
+The increase in computational time for the higher-order method is likely due to
+the increased band-width required for the higher-order elements.
+As previously mentioned, the use of a better solver and precondtioner may mitigate the
+expense of using a higher-order formulation.
+It was observed that for the given problem using the multithreaded Jacobi preconditioner can reduce the
+computational runtime by up to 72% (for the worst case being a higher-order formulation with a large number
+of degrees-of-freedom) in comparison to the single-thread SSOR preconditioner.
+However, it is the author's experience that the Jacobi method of preconditioning may not be suitable for
+some finite-strain problems involving alternative constitutive models.
+
+
+<table align="center" class="tutorial" cellspacing="3" cellpadding="3">
+  <tr>
+     <td align="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-44.Normalised_runtime.png" alt="">
+	<p align="center">
+        Runtime on a 4-core machine, normalised against the lowest grid resolution $Q_1-DGPM_0-DGPM_0$ solution that utilised a SSOR preconditioner.
+	</p>
+    </td>
+  </tr>
+</table>
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+There are a number of obvious extensions for this work:
+
+- Firstly, an additional constraint could be added to the free-energy
+  function in order to enforce a high degree of incompressibility in
+  materials. An additional Lagrange multiplier would be introduced,
+  but this could most easily be dealt with using the principle of
+  augmented Lagrange multipliers. This is demonstrated in <em>Simo and
+  Taylor (1991) </em>.
+- The constitutive relationship used in this
+  model is relatively basic. It may be beneficial to split the material
+  class into two separate classes, one dealing with the volumetric
+  response and the other the isochoric response, and produce a generic
+  materials class (i.e. having abstract virtual functions that derived
+  classes have to implement) that would allow for the addition of more complex
+  material models. Such models could include other hyperelastic
+  materials, plasticity and viscoelastic materials and others.
+- The program has been developed for solving problems on single-node
+  multicore machines. With a little effort, the program could be
+  extended to a large-scale computing environment through the use of
+  Petsc or Trilinos, using a similar technique to that demonstrated in
+  step-40. This would mostly involve changes to the setup, assembly,
+  <code>PointHistory</code> and linear solver routines.
+- As this program assumes quasi-static equilibrium, extensions to
+  include dynamic effects would be necessary to study problems where
+  inertial effects are important, e.g. problems involving impact.
+- Load and solution limiting procedures may be necessary for highly
+  nonlinear problems. It is possible to add a linesearch algorithm to
+  limit the step size within a Newton increment to ensure optimum
+  convergence. It may also be necessary to use a load limiting method,
+  such as the Riks method, to solve unstable problems involving
+  geometric instability such as buckling and snap-through.
+- Many physical problems involve contact. It is possible to include
+  the effect of frictional or frictionless contact between objects
+  into this program. This would involve the addition of an extra term
+  in the free-energy functional and therefore an addition to the
+  assembly routine. One would also need to manage the contact problem
+  (detection and stress calculations) itself. An alternative to
+  additional penalty terms in the free-energy functional would be to
+  use active set methods such as the one used in step-41.
+- Finally, adaptive mesh refinement, as demonstrated in step-6 and
+  step-18, could provide additional solution accuracy.
diff --git a/examples/step-44/doc/tooltip b/examples/step-44/doc/tooltip
new file mode 100644
index 0000000..2e08a00
--- /dev/null
+++ b/examples/step-44/doc/tooltip
@@ -0,0 +1 @@
+Quasi-static finite-strain elasticity
diff --git a/examples/step-44/parameters.prm b/examples/step-44/parameters.prm
new file mode 100644
index 0000000..6c1aca3
--- /dev/null
+++ b/examples/step-44/parameters.prm
@@ -0,0 +1,71 @@
+# Listing of Parameters
+# ---------------------
+subsection Finite element system
+  # Displacement system polynomial order
+  set Polynomial degree = 2
+
+  # Gauss quadrature order
+  set Quadrature order  = 3
+end
+
+
+subsection Geometry
+  # Global refinement level
+  set Global refinement   = 2
+
+  # Global grid scaling factor
+  set Grid scale          = 1e-3
+
+  # Ratio of applied pressure to reference pressure
+  set Pressure ratio p/p0 = 100
+end
+
+
+subsection Linear solver
+  # Linear solver iterations (multiples of the system matrix size)
+  set Max iteration multiplier = 1
+
+  # Linear solver residual (scaled by residual norm)
+  set Residual                 = 1e-6
+
+ # Preconditioner type
+ set Preconditioner type  = ssor
+
+ # Preconditioner relaxation value
+ set Preconditioner relaxation  = 0.65
+
+  # Type of solver used to solve the linear system
+  set Solver type              = CG
+end
+
+
+subsection Material properties
+  # Poisson's ratio
+  set Poisson's ratio = 0.4999
+
+  # Shear modulus
+  set Shear modulus   = 80.194e6
+end
+
+
+subsection Nonlinear solver
+  # Number of Newton-Raphson iterations allowed
+  set Max iterations Newton-Raphson = 10
+
+  # Displacement error tolerance
+  set Tolerance displacement        = 1.0e-6
+
+  # Force residual tolerance
+  set Tolerance force               = 1.0e-9
+end
+
+
+subsection Time
+  # End time
+  set End time       = 1
+
+  # Time step size
+  set Time step size = 0.1
+end
+
+
diff --git a/examples/step-44/step-44.cc b/examples/step-44/step-44.cc
new file mode 100644
index 0000000..2ccb74b
--- /dev/null
+++ b/examples/step-44/step-44.cc
@@ -0,0 +1,3205 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2010 - 2015 by the deal.II authors and
+ *                              & Jean-Paul Pelteret and Andrew McBride
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Jean-Paul Pelteret, University of Cape Town,
+ *          Andrew McBride, University of Erlangen-Nuremberg, 2010
+ */
+
+
+// We start by including all the necessary deal.II header files and some C++
+// related ones. They have been discussed in detail in previous tutorial
+// programs, so you need only refer to past tutorials for details.
+#include <deal.II/base/function.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/symmetric_tensor.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/timer.h>
+#include <deal.II/base/work_stream.h>
+
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/grid_in.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+
+#include <deal.II/fe/fe_dgp_monomial.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q_eulerian.h>
+
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/precondition_selector.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/sparse_direct.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+
+#include <iostream>
+#include <fstream>
+
+
+// We then stick everything that relates to this tutorial program into a
+// namespace of its own, and import all the deal.II function and class names
+// into it:
+namespace Step44
+{
+  using namespace dealii;
+
+// @sect3{Run-time parameters}
+//
+// There are several parameters that can be set in the code so we set up a
+// ParameterHandler object to read in the choices at run-time.
+  namespace Parameters
+  {
+// @sect4{Finite Element system}
+
+// As mentioned in the introduction, a different order interpolation should be
+// used for the displacement $\mathbf{u}$ than for the pressure
+// $\widetilde{p}$ and the dilatation $\widetilde{J}$.  Choosing
+// $\widetilde{p}$ and $\widetilde{J}$ as discontinuous (constant) functions
+// at the element level leads to the mean-dilatation method. The discontinuous
+// approximation allows $\widetilde{p}$ and $\widetilde{J}$ to be condensed
+// out and a classical displacement based method is recovered.  Here we
+// specify the polynomial order used to approximate the solution.  The
+// quadrature order should be adjusted accordingly.
+    struct FESystem
+    {
+      unsigned int poly_degree;
+      unsigned int quad_order;
+
+      static void
+      declare_parameters(ParameterHandler &prm);
+
+      void
+      parse_parameters(ParameterHandler &prm);
+    };
+
+
+    void FESystem::declare_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Finite element system");
+      {
+        prm.declare_entry("Polynomial degree", "2",
+                          Patterns::Integer(0),
+                          "Displacement system polynomial order");
+
+        prm.declare_entry("Quadrature order", "3",
+                          Patterns::Integer(0),
+                          "Gauss quadrature order");
+      }
+      prm.leave_subsection();
+    }
+
+    void FESystem::parse_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Finite element system");
+      {
+        poly_degree = prm.get_integer("Polynomial degree");
+        quad_order = prm.get_integer("Quadrature order");
+      }
+      prm.leave_subsection();
+    }
+
+// @sect4{Geometry}
+
+// Make adjustments to the problem geometry and the applied load.  Since the
+// problem modelled here is quite specific, the load scale can be altered to
+// specific values to compare with the results given in the literature.
+    struct Geometry
+    {
+      unsigned int global_refinement;
+      double       scale;
+      double       p_p0;
+
+      static void
+      declare_parameters(ParameterHandler &prm);
+
+      void
+      parse_parameters(ParameterHandler &prm);
+    };
+
+    void Geometry::declare_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Geometry");
+      {
+        prm.declare_entry("Global refinement", "2",
+                          Patterns::Integer(0),
+                          "Global refinement level");
+
+        prm.declare_entry("Grid scale", "1e-3",
+                          Patterns::Double(0.0),
+                          "Global grid scaling factor");
+
+        prm.declare_entry("Pressure ratio p/p0", "100",
+                          Patterns::Selection("20|40|60|80|100"),
+                          "Ratio of applied pressure to reference pressure");
+      }
+      prm.leave_subsection();
+    }
+
+    void Geometry::parse_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Geometry");
+      {
+        global_refinement = prm.get_integer("Global refinement");
+        scale = prm.get_double("Grid scale");
+        p_p0 = prm.get_double("Pressure ratio p/p0");
+      }
+      prm.leave_subsection();
+    }
+
+// @sect4{Materials}
+
+// We also need the shear modulus $ \mu $ and Poisson ration $ \nu $ for the
+// neo-Hookean material.
+    struct Materials
+    {
+      double nu;
+      double mu;
+
+      static void
+      declare_parameters(ParameterHandler &prm);
+
+      void
+      parse_parameters(ParameterHandler &prm);
+    };
+
+    void Materials::declare_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Material properties");
+      {
+        prm.declare_entry("Poisson's ratio", "0.4999",
+                          Patterns::Double(-1.0,0.5),
+                          "Poisson's ratio");
+
+        prm.declare_entry("Shear modulus", "80.194e6",
+                          Patterns::Double(),
+                          "Shear modulus");
+      }
+      prm.leave_subsection();
+    }
+
+    void Materials::parse_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Material properties");
+      {
+        nu = prm.get_double("Poisson's ratio");
+        mu = prm.get_double("Shear modulus");
+      }
+      prm.leave_subsection();
+    }
+
+// @sect4{Linear solver}
+
+// Next, we choose both solver and preconditioner settings.  The use of an
+// effective preconditioner is critical to ensure convergence when a large
+// nonlinear motion occurs within a Newton increment.
+    struct LinearSolver
+    {
+      std::string type_lin;
+      double      tol_lin;
+      double      max_iterations_lin;
+      std::string preconditioner_type;
+      double      preconditioner_relaxation;
+
+      static void
+      declare_parameters(ParameterHandler &prm);
+
+      void
+      parse_parameters(ParameterHandler &prm);
+    };
+
+    void LinearSolver::declare_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Linear solver");
+      {
+        prm.declare_entry("Solver type", "CG",
+                          Patterns::Selection("CG|Direct"),
+                          "Type of solver used to solve the linear system");
+
+        prm.declare_entry("Residual", "1e-6",
+                          Patterns::Double(0.0),
+                          "Linear solver residual (scaled by residual norm)");
+
+        prm.declare_entry("Max iteration multiplier", "1",
+                          Patterns::Double(0.0),
+                          "Linear solver iterations (multiples of the system matrix size)");
+
+        prm.declare_entry("Preconditioner type", "ssor",
+                          Patterns::Selection("jacobi|ssor"),
+                          "Type of preconditioner");
+
+        prm.declare_entry("Preconditioner relaxation", "0.65",
+                          Patterns::Double(0.0),
+                          "Preconditioner relaxation value");
+      }
+      prm.leave_subsection();
+    }
+
+    void LinearSolver::parse_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Linear solver");
+      {
+        type_lin = prm.get("Solver type");
+        tol_lin = prm.get_double("Residual");
+        max_iterations_lin = prm.get_double("Max iteration multiplier");
+        preconditioner_type = prm.get("Preconditioner type");
+        preconditioner_relaxation = prm.get_double("Preconditioner relaxation");
+      }
+      prm.leave_subsection();
+    }
+
+// @sect4{Nonlinear solver}
+
+// A Newton-Raphson scheme is used to solve the nonlinear system of governing
+// equations.  We now define the tolerances and the maximum number of
+// iterations for the Newton-Raphson nonlinear solver.
+    struct NonlinearSolver
+    {
+      unsigned int max_iterations_NR;
+      double       tol_f;
+      double       tol_u;
+
+      static void
+      declare_parameters(ParameterHandler &prm);
+
+      void
+      parse_parameters(ParameterHandler &prm);
+    };
+
+    void NonlinearSolver::declare_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Nonlinear solver");
+      {
+        prm.declare_entry("Max iterations Newton-Raphson", "10",
+                          Patterns::Integer(0),
+                          "Number of Newton-Raphson iterations allowed");
+
+        prm.declare_entry("Tolerance force", "1.0e-9",
+                          Patterns::Double(0.0),
+                          "Force residual tolerance");
+
+        prm.declare_entry("Tolerance displacement", "1.0e-6",
+                          Patterns::Double(0.0),
+                          "Displacement error tolerance");
+      }
+      prm.leave_subsection();
+    }
+
+    void NonlinearSolver::parse_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Nonlinear solver");
+      {
+        max_iterations_NR = prm.get_integer("Max iterations Newton-Raphson");
+        tol_f = prm.get_double("Tolerance force");
+        tol_u = prm.get_double("Tolerance displacement");
+      }
+      prm.leave_subsection();
+    }
+
+// @sect4{Time}
+
+// Set the timestep size $ \varDelta t $ and the simulation end-time.
+    struct Time
+    {
+      double delta_t;
+      double end_time;
+
+      static void
+      declare_parameters(ParameterHandler &prm);
+
+      void
+      parse_parameters(ParameterHandler &prm);
+    };
+
+    void Time::declare_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Time");
+      {
+        prm.declare_entry("End time", "1",
+                          Patterns::Double(),
+                          "End time");
+
+        prm.declare_entry("Time step size", "0.1",
+                          Patterns::Double(),
+                          "Time step size");
+      }
+      prm.leave_subsection();
+    }
+
+    void Time::parse_parameters(ParameterHandler &prm)
+    {
+      prm.enter_subsection("Time");
+      {
+        end_time = prm.get_double("End time");
+        delta_t = prm.get_double("Time step size");
+      }
+      prm.leave_subsection();
+    }
+
+// @sect4{All parameters}
+
+// Finally we consolidate all of the above structures into a single container
+// that holds all of our run-time selections.
+    struct AllParameters : public FESystem,
+      public Geometry,
+      public Materials,
+      public LinearSolver,
+      public NonlinearSolver,
+      public Time
+
+    {
+      AllParameters(const std::string &input_file);
+
+      static void
+      declare_parameters(ParameterHandler &prm);
+
+      void
+      parse_parameters(ParameterHandler &prm);
+    };
+
+    AllParameters::AllParameters(const std::string &input_file)
+    {
+      ParameterHandler prm;
+      declare_parameters(prm);
+      prm.read_input(input_file);
+      parse_parameters(prm);
+    }
+
+    void AllParameters::declare_parameters(ParameterHandler &prm)
+    {
+      FESystem::declare_parameters(prm);
+      Geometry::declare_parameters(prm);
+      Materials::declare_parameters(prm);
+      LinearSolver::declare_parameters(prm);
+      NonlinearSolver::declare_parameters(prm);
+      Time::declare_parameters(prm);
+    }
+
+    void AllParameters::parse_parameters(ParameterHandler &prm)
+    {
+      FESystem::parse_parameters(prm);
+      Geometry::parse_parameters(prm);
+      Materials::parse_parameters(prm);
+      LinearSolver::parse_parameters(prm);
+      NonlinearSolver::parse_parameters(prm);
+      Time::parse_parameters(prm);
+    }
+  }
+
+// @sect3{Some standard tensors}
+
+// Now we define some frequently used second and fourth-order tensors:
+  template <int dim>
+  class StandardTensors
+  {
+  public:
+
+    // $\mathbf{I}$
+    static const SymmetricTensor<2, dim> I;
+    // $\mathbf{I} \otimes \mathbf{I}$
+    static const SymmetricTensor<4, dim> IxI;
+    // $\mathcal{S}$, note that as we only use this fourth-order unit tensor
+    // to operate on symmetric second-order tensors.  To maintain notation
+    // consistent with Holzapfel (2001) we name the tensor $\mathcal{I}$
+    static const SymmetricTensor<4, dim> II;
+    // Fourth-order deviatoric tensor such that
+    // $\textrm{dev} \{ \bullet \} = \{ \bullet \} -
+    //  [1/\textrm{dim}][ \{ \bullet\} :\mathbf{I}]\mathbf{I}$
+    static const SymmetricTensor<4, dim> dev_P;
+  };
+
+  template <int dim>
+  const SymmetricTensor<2, dim>
+  StandardTensors<dim>::I = unit_symmetric_tensor<dim>();
+
+  template <int dim>
+  const SymmetricTensor<4, dim>
+  StandardTensors<dim>::IxI = outer_product(I, I);
+
+  template <int dim>
+  const SymmetricTensor<4, dim>
+  StandardTensors<dim>::II = identity_tensor<dim>();
+
+  template <int dim>
+  const SymmetricTensor<4, dim>
+  StandardTensors<dim>::dev_P = deviator_tensor<dim>();
+
+// @sect3{Time class}
+
+// A simple class to store time data. Its functioning is transparent so no
+// discussion is necessary. For simplicity we assume a constant time step
+// size.
+  class Time
+  {
+  public:
+    Time (const double time_end,
+          const double delta_t)
+      :
+      timestep(0),
+      time_current(0.0),
+      time_end(time_end),
+      delta_t(delta_t)
+    {}
+
+    virtual ~Time()
+    {}
+
+    double current() const
+    {
+      return time_current;
+    }
+    double end() const
+    {
+      return time_end;
+    }
+    double get_delta_t() const
+    {
+      return delta_t;
+    }
+    unsigned int get_timestep() const
+    {
+      return timestep;
+    }
+    void increment()
+    {
+      time_current += delta_t;
+      ++timestep;
+    }
+
+  private:
+    unsigned int timestep;
+    double       time_current;
+    const double time_end;
+    const double delta_t;
+  };
+
+// @sect3{Compressible neo-Hookean material within a three-field formulation}
+
+// As discussed in the Introduction, Neo-Hookean materials are a type of
+// hyperelastic materials.  The entire domain is assumed to be composed of a
+// compressible neo-Hookean material.  This class defines the behaviour of
+// this material within a three-field formulation.  Compressible neo-Hookean
+// materials can be described by a strain-energy function (SEF) $ \Psi =
+// \Psi_{\text{iso}}(\overline{\mathbf{b}}) + \Psi_{\text{vol}}(\widetilde{J})
+// $.
+//
+// The isochoric response is given by $
+// \Psi_{\text{iso}}(\overline{\mathbf{b}}) = c_{1} [\overline{I}_{1} - 3] $
+// where $ c_{1} = \frac{\mu}{2} $ and $\overline{I}_{1}$ is the first
+// invariant of the left- or right-isochoric Cauchy-Green deformation tensors.
+// That is $\overline{I}_1 :=\textrm{tr}(\overline{\mathbf{b}})$.  In this
+// example the SEF that governs the volumetric response is defined as $
+// \Psi_{\text{vol}}(\widetilde{J}) = \kappa \frac{1}{4} [ \widetilde{J}^2 - 1
+// - 2\textrm{ln}\; \widetilde{J} ]$,  where $\kappa:= \lambda + 2/3 \mu$ is
+// the <a href="http://en.wikipedia.org/wiki/Bulk_modulus">bulk modulus</a>
+// and $\lambda$ is <a
+// href="http://en.wikipedia.org/wiki/Lam%C3%A9_parameters">Lame's first
+// parameter</a>.
+//
+// The following class will be used to characterize the material we work with,
+// and provides a central point that one would need to modify if one were to
+// implement a different material model. For it to work, we will store one
+// object of this type per quadrature point, and in each of these objects
+// store the current state (characterized by the values or measures  of the three fields)
+// so that we can compute the elastic coefficients linearized around the
+// current state.
+  template <int dim>
+  class Material_Compressible_Neo_Hook_Three_Field
+  {
+  public:
+    Material_Compressible_Neo_Hook_Three_Field(const double mu,
+                                               const double nu)
+      :
+      kappa((2.0 * mu * (1.0 + nu)) / (3.0 * (1.0 - 2.0 * nu))),
+      c_1(mu / 2.0),
+      det_F(1.0),
+      p_tilde(0.0),
+      J_tilde(1.0),
+      b_bar(StandardTensors<dim>::I)
+    {
+      Assert(kappa > 0, ExcInternalError());
+    }
+
+    ~Material_Compressible_Neo_Hook_Three_Field()
+    {}
+
+    // We update the material model with various deformation dependent data
+    // based on $F$ and the pressure $\widetilde{p}$ and dilatation
+    // $\widetilde{J}$, and at the end of the function include a physical
+    // check for internal consistency:
+    void update_material_data(const Tensor<2, dim> &F,
+                              const double p_tilde_in,
+                              const double J_tilde_in)
+    {
+      det_F = determinant(F);
+      b_bar = std::pow(det_F, -2.0 / 3.0) * symmetrize(F * transpose(F));
+      p_tilde = p_tilde_in;
+      J_tilde = J_tilde_in;
+
+      Assert(det_F > 0, ExcInternalError());
+    }
+
+    // The second function determines the Kirchhoff stress $\boldsymbol{\tau}
+    // = \boldsymbol{\tau}_{\textrm{iso}} + \boldsymbol{\tau}_{\textrm{vol}}$
+    SymmetricTensor<2, dim> get_tau()
+    {
+      return get_tau_iso() + get_tau_vol();
+    }
+
+    // The fourth-order elasticity tensor in the spatial setting
+    // $\mathfrak{c}$ is calculated from the SEF $\Psi$ as $ J
+    // \mathfrak{c}_{ijkl} = F_{iA} F_{jB} \mathfrak{C}_{ABCD} F_{kC} F_{lD}$
+    // where $ \mathfrak{C} = 4 \frac{\partial^2 \Psi(\mathbf{C})}{\partial
+    // \mathbf{C} \partial \mathbf{C}}$
+    SymmetricTensor<4, dim> get_Jc() const
+    {
+      return get_Jc_vol() + get_Jc_iso();
+    }
+
+    // Derivative of the volumetric free energy with respect to
+    // $\widetilde{J}$ return $\frac{\partial
+    // \Psi_{\text{vol}}(\widetilde{J})}{\partial \widetilde{J}}$
+    double get_dPsi_vol_dJ() const
+    {
+      return (kappa / 2.0) * (J_tilde - 1.0 / J_tilde);
+    }
+
+    // Second derivative of the volumetric free energy wrt $\widetilde{J}$. We
+    // need the following computation explicitly in the tangent so we make it
+    // public.  We calculate $\frac{\partial^2
+    // \Psi_{\textrm{vol}}(\widetilde{J})}{\partial \widetilde{J} \partial
+    // \widetilde{J}}$
+    double get_d2Psi_vol_dJ2() const
+    {
+      return ( (kappa / 2.0) * (1.0 + 1.0 / (J_tilde * J_tilde)));
+    }
+
+    // The next few functions return various data that we choose to store with
+    // the material:
+    double get_det_F() const
+    {
+      return det_F;
+    }
+
+    double get_p_tilde() const
+    {
+      return p_tilde;
+    }
+
+    double get_J_tilde() const
+    {
+      return J_tilde;
+    }
+
+  protected:
+    // Define constitutive model parameters $\kappa$ (bulk modulus) and the
+    // neo-Hookean model parameter $c_1$:
+    const double kappa;
+    const double c_1;
+
+    // Model specific data that is convenient to store with the material:
+    double det_F;
+    double p_tilde;
+    double J_tilde;
+    SymmetricTensor<2, dim> b_bar;
+
+    // The following functions are used internally in determining the result
+    // of some of the public functions above. The first one determines the
+    // volumetric Kirchhoff stress $\boldsymbol{\tau}_{\textrm{vol}}$:
+    SymmetricTensor<2, dim> get_tau_vol() const
+    {
+      return p_tilde * det_F * StandardTensors<dim>::I;
+    }
+
+    // Next, determine the isochoric Kirchhoff stress
+    // $\boldsymbol{\tau}_{\textrm{iso}} =
+    // \mathcal{P}:\overline{\boldsymbol{\tau}}$:
+    SymmetricTensor<2, dim> get_tau_iso() const
+    {
+      return StandardTensors<dim>::dev_P * get_tau_bar();
+    }
+
+    // Then, determine the fictitious Kirchhoff stress
+    // $\overline{\boldsymbol{\tau}}$:
+    SymmetricTensor<2, dim> get_tau_bar() const
+    {
+      return 2.0 * c_1 * b_bar;
+    }
+
+    // Calculate the volumetric part of the tangent $J
+    // \mathfrak{c}_\textrm{vol}$:
+    SymmetricTensor<4, dim> get_Jc_vol() const
+    {
+
+      return p_tilde * det_F
+             * ( StandardTensors<dim>::IxI
+                 - (2.0 * StandardTensors<dim>::II) );
+    }
+
+    // Calculate the isochoric part of the tangent $J
+    // \mathfrak{c}_\textrm{iso}$:
+    SymmetricTensor<4, dim> get_Jc_iso() const
+    {
+      const SymmetricTensor<2, dim> tau_bar = get_tau_bar();
+      const SymmetricTensor<2, dim> tau_iso = get_tau_iso();
+      const SymmetricTensor<4, dim> tau_iso_x_I
+        = outer_product(tau_iso,
+                        StandardTensors<dim>::I);
+      const SymmetricTensor<4, dim> I_x_tau_iso
+        = outer_product(StandardTensors<dim>::I,
+                        tau_iso);
+      const SymmetricTensor<4, dim> c_bar = get_c_bar();
+
+      return (2.0 / 3.0) * trace(tau_bar)
+             * StandardTensors<dim>::dev_P
+             - (2.0 / 3.0) * (tau_iso_x_I + I_x_tau_iso)
+             + StandardTensors<dim>::dev_P * c_bar
+             * StandardTensors<dim>::dev_P;
+    }
+
+    // Calculate the fictitious elasticity tensor $\overline{\mathfrak{c}}$.
+    // For the material model chosen this is simply zero:
+    SymmetricTensor<4, dim> get_c_bar() const
+    {
+      return SymmetricTensor<4, dim>();
+    }
+  };
+
+// @sect3{Quadrature point history}
+
+// As seen in step-18, the <code> PointHistory </code> class offers a method
+// for storing data at the quadrature points.  Here each quadrature point
+// holds a pointer to a material description.  Thus, different material models
+// can be used in different regions of the domain.  Among other data, we
+// choose to store the Kirchhoff stress $\boldsymbol{\tau}$ and the tangent
+// $J\mathfrak{c}$ for the quadrature points.
+  template <int dim>
+  class PointHistory
+  {
+  public:
+    PointHistory()
+      :
+      material(NULL),
+      F_inv(StandardTensors<dim>::I),
+      tau(SymmetricTensor<2, dim>()),
+      d2Psi_vol_dJ2(0.0),
+      dPsi_vol_dJ(0.0),
+      Jc(SymmetricTensor<4, dim>())
+    {}
+
+    virtual ~PointHistory()
+    {
+      delete material;
+      material = NULL;
+    }
+
+    // The first function is used to create a material object and to
+    // initialize all tensors correctly: The second one updates the stored
+    // values and stresses based on the current deformation measure
+    // $\textrm{Grad}\mathbf{u}_{\textrm{n}}$, pressure $\widetilde{p}$ and
+    // dilation $\widetilde{J}$ field values.
+    void setup_lqp (const Parameters::AllParameters &parameters)
+    {
+      material = new Material_Compressible_Neo_Hook_Three_Field<dim>(parameters.mu,
+          parameters.nu);
+      update_values(Tensor<2, dim>(), 0.0, 1.0);
+    }
+
+    // To this end, we calculate the deformation gradient $\mathbf{F}$ from
+    // the displacement gradient $\textrm{Grad}\ \mathbf{u}$, i.e.
+    // $\mathbf{F}(\mathbf{u}) = \mathbf{I} + \textrm{Grad}\ \mathbf{u}$ and
+    // then let the material model associated with this quadrature point
+    // update itself. When computing the deformation gradient, we have to take
+    // care with which data types we compare the sum $\mathbf{I} +
+    // \textrm{Grad}\ \mathbf{u}$: Since $I$ has data type SymmetricTensor,
+    // just writing <code>I + Grad_u_n</code> would convert the second
+    // argument to a symmetric tensor, perform the sum, and then cast the
+    // result to a Tensor (i.e., the type of a possibly nonsymmetric
+    // tensor). However, since <code>Grad_u_n</code> is nonsymmetric in
+    // general, the conversion to SymmetricTensor will fail. We can avoid this
+    // back and forth by converting $I$ to Tensor first, and then performing
+    // the addition as between nonsymmetric tensors:
+    void update_values (const Tensor<2, dim> &Grad_u_n,
+                        const double p_tilde,
+                        const double J_tilde)
+    {
+      const Tensor<2, dim> F
+        = (Tensor<2, dim>(StandardTensors<dim>::I) +
+           Grad_u_n);
+      material->update_material_data(F, p_tilde, J_tilde);
+
+      // The material has been updated so we now calculate the Kirchhoff
+      // stress $\mathbf{\tau}$, the tangent $J\mathfrak{c}$ and the first and
+      // second derivatives of the volumetric free energy.
+      //
+      // We also store the inverse of the deformation gradient since we
+      // frequently use it:
+      F_inv = invert(F);
+      tau = material->get_tau();
+      Jc = material->get_Jc();
+      dPsi_vol_dJ = material->get_dPsi_vol_dJ();
+      d2Psi_vol_dJ2 = material->get_d2Psi_vol_dJ2();
+
+    }
+
+    // We offer an interface to retrieve certain data.  Here are the kinematic
+    // variables:
+    double get_J_tilde() const
+    {
+      return material->get_J_tilde();
+    }
+
+    double get_det_F() const
+    {
+      return material->get_det_F();
+    }
+
+    const Tensor<2, dim> &get_F_inv() const
+    {
+      return F_inv;
+    }
+
+    // ...and the kinetic variables.  These are used in the material and
+    // global tangent matrix and residual assembly operations:
+    double get_p_tilde() const
+    {
+      return material->get_p_tilde();
+    }
+
+    const SymmetricTensor<2, dim> &get_tau() const
+    {
+      return tau;
+    }
+
+    double get_dPsi_vol_dJ() const
+    {
+      return dPsi_vol_dJ;
+    }
+
+    double get_d2Psi_vol_dJ2() const
+    {
+      return d2Psi_vol_dJ2;
+    }
+
+    // And finally the tangent:
+    const SymmetricTensor<4, dim> &get_Jc() const
+    {
+      return Jc;
+    }
+
+    // In terms of member functions, this class stores for the quadrature
+    // point it represents a copy of a material type in case different
+    // materials are used in different regions of the domain, as well as the
+    // inverse of the deformation gradient...
+  private:
+    Material_Compressible_Neo_Hook_Three_Field<dim> *material;
+
+    Tensor<2, dim> F_inv;
+
+    // ... and stress-type variables along with the tangent $J\mathfrak{c}$:
+    SymmetricTensor<2, dim> tau;
+    double                  d2Psi_vol_dJ2;
+    double                  dPsi_vol_dJ;
+
+    SymmetricTensor<4, dim> Jc;
+  };
+
+
+// @sect3{Quasi-static quasi-incompressible finite-strain solid}
+
+// The Solid class is the central class in that it represents the problem at
+// hand. It follows the usual scheme in that all it really has is a
+// constructor, destructor and a <code>run()</code> function that dispatches
+// all the work to private functions of this class:
+  template <int dim>
+  class Solid
+  {
+  public:
+    Solid(const std::string &input_file);
+
+    virtual
+    ~Solid();
+
+    void
+    run();
+
+  private:
+
+    // In the private section of this class, we first forward declare a number
+    // of objects that are used in parallelizing work using the WorkStream
+    // object (see the @ref threads module for more information on this).
+    //
+    // We declare such structures for the computation of tangent (stiffness)
+    // matrix, right hand side, static condensation, and for updating
+    // quadrature points:
+    struct PerTaskData_K;
+    struct ScratchData_K;
+
+    struct PerTaskData_RHS;
+    struct ScratchData_RHS;
+
+    struct PerTaskData_SC;
+    struct ScratchData_SC;
+
+    struct PerTaskData_UQPH;
+    struct ScratchData_UQPH;
+
+    // We start the collection of member functions with one that builds the
+    // grid:
+    void
+    make_grid();
+
+    // Set up the finite element system to be solved:
+    void
+    system_setup();
+
+    void
+    determine_component_extractors();
+
+    // Several functions to assemble the system and right hand side matrices
+    // using multithreading. Each of them comes as a wrapper function, one
+    // that is executed to do the work in the WorkStream model on one cell,
+    // and one that copies the work done on this one cell into the global
+    // object that represents it:
+    void
+    assemble_system_tangent();
+
+    void
+    assemble_system_tangent_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                     ScratchData_K &scratch,
+                                     PerTaskData_K &data);
+
+    void
+    copy_local_to_global_K(const PerTaskData_K &data);
+
+    void
+    assemble_system_rhs();
+
+    void
+    assemble_system_rhs_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                 ScratchData_RHS &scratch,
+                                 PerTaskData_RHS &data);
+
+    void
+    copy_local_to_global_rhs(const PerTaskData_RHS &data);
+
+    void
+    assemble_sc();
+
+    void
+    assemble_sc_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                         ScratchData_SC &scratch,
+                         PerTaskData_SC &data);
+
+    void
+    copy_local_to_global_sc(const PerTaskData_SC &data);
+
+    // Apply Dirichlet boundary conditions on the displacement field
+    void
+    make_constraints(const int &it_nr);
+
+    // Create and update the quadrature points. Here, no data needs to be
+    // copied into a global object, so the copy_local_to_global function is
+    // empty:
+    void
+    setup_qph();
+
+    void
+    update_qph_incremental(const BlockVector<double> &solution_delta);
+
+    void
+    update_qph_incremental_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                    ScratchData_UQPH &scratch,
+                                    PerTaskData_UQPH &data);
+
+    void
+    copy_local_to_global_UQPH(const PerTaskData_UQPH &/*data*/)
+    {}
+
+    // Solve for the displacement using a Newton-Raphson method. We break this
+    // function into the nonlinear loop and the function that solves the
+    // linearized Newton-Raphson step:
+    void
+    solve_nonlinear_timestep(BlockVector<double> &solution_delta);
+
+    std::pair<unsigned int, double>
+    solve_linear_system(BlockVector<double> &newton_update);
+
+    // Solution retrieval as well as post-processing and writing data to file:
+    BlockVector<double>
+    get_total_solution(const BlockVector<double> &solution_delta) const;
+
+    void
+    output_results() const;
+
+    // Finally, some member variables that describe the current state: A
+    // collection of the parameters used to describe the problem setup...
+    Parameters::AllParameters        parameters;
+
+    // ...the volume of the reference and current configurations...
+    double                           vol_reference;
+    double                           vol_current;
+
+    // ...and description of the geometry on which the problem is solved:
+    Triangulation<dim>               triangulation;
+
+    // Also, keep track of the current time and the time spent evaluating
+    // certain functions
+    Time                             time;
+    TimerOutput                      timer;
+
+    // A storage object for quadrature point information.  See step-18 for
+    // more on this:
+    std::vector<PointHistory<dim> >  quadrature_point_history;
+
+    // A description of the finite-element system including the displacement
+    // polynomial degree, the degree-of-freedom handler, number of DoFs per
+    // cell and the extractor objects used to retrieve information from the
+    // solution vectors:
+    const unsigned int               degree;
+    const FESystem<dim>              fe;
+    DoFHandler<dim>                  dof_handler_ref;
+    const unsigned int               dofs_per_cell;
+    const FEValuesExtractors::Vector u_fe;
+    const FEValuesExtractors::Scalar p_fe;
+    const FEValuesExtractors::Scalar J_fe;
+
+    // Description of how the block-system is arranged. There are 3 blocks,
+    // the first contains a vector DOF $\mathbf{u}$ while the other two
+    // describe scalar DOFs, $\widetilde{p}$ and $\widetilde{J}$.
+    static const unsigned int        n_blocks = 3;
+    static const unsigned int        n_components = dim + 2;
+    static const unsigned int        first_u_component = 0;
+    static const unsigned int        p_component = dim;
+    static const unsigned int        J_component = dim + 1;
+
+    enum
+    {
+      u_dof = 0,
+      p_dof = 1,
+      J_dof = 2
+    };
+
+    std::vector<types::global_dof_index>  dofs_per_block;
+    std::vector<types::global_dof_index>        element_indices_u;
+    std::vector<types::global_dof_index>        element_indices_p;
+    std::vector<types::global_dof_index>        element_indices_J;
+
+    // Rules for Gauss-quadrature on both the cell and faces. The number of
+    // quadrature points on both cells and faces is recorded.
+    const QGauss<dim>                qf_cell;
+    const QGauss<dim - 1>            qf_face;
+    const unsigned int               n_q_points;
+    const unsigned int               n_q_points_f;
+
+    // Objects that store the converged solution and right-hand side vectors,
+    // as well as the tangent matrix. There is a ConstraintMatrix object used
+    // to keep track of constraints.  We make use of a sparsity pattern
+    // designed for a block system.
+    ConstraintMatrix                 constraints;
+    BlockSparsityPattern             sparsity_pattern;
+    BlockSparseMatrix<double>        tangent_matrix;
+    BlockVector<double>              system_rhs;
+    BlockVector<double>              solution_n;
+
+    // Then define a number of variables to store norms and update norms and
+    // normalisation factors.
+    struct Errors
+    {
+      Errors()
+        :
+        norm(1.0), u(1.0), p(1.0), J(1.0)
+      {}
+
+      void reset()
+      {
+        norm = 1.0;
+        u = 1.0;
+        p = 1.0;
+        J = 1.0;
+      }
+      void normalise(const Errors &rhs)
+      {
+        if (rhs.norm != 0.0)
+          norm /= rhs.norm;
+        if (rhs.u != 0.0)
+          u /= rhs.u;
+        if (rhs.p != 0.0)
+          p /= rhs.p;
+        if (rhs.J != 0.0)
+          J /= rhs.J;
+      }
+
+      double norm, u, p, J;
+    };
+
+    Errors error_residual, error_residual_0, error_residual_norm, error_update,
+           error_update_0, error_update_norm;
+
+    // Methods to calculate error measures
+    void
+    get_error_residual(Errors &error_residual);
+
+    void
+    get_error_update(const BlockVector<double> &newton_update,
+                     Errors &error_update);
+
+    std::pair<double, double>
+    get_error_dilation();
+
+    // Print information to screen in a pleasing way...
+    static
+    void
+    print_conv_header();
+
+    void
+    print_conv_footer();
+  };
+
+// @sect3{Implementation of the <code>Solid</code> class}
+
+// @sect4{Public interface}
+
+// We initialise the Solid class using data extracted from the parameter file.
+  template <int dim>
+  Solid<dim>::Solid(const std::string &input_file)
+    :
+    parameters(input_file),
+    triangulation(Triangulation<dim>::maximum_smoothing),
+    time(parameters.end_time, parameters.delta_t),
+    timer(std::cout,
+          TimerOutput::summary,
+          TimerOutput::wall_times),
+    degree(parameters.poly_degree),
+    // The Finite Element System is composed of dim continuous displacement
+    // DOFs, and discontinuous pressure and dilatation DOFs. In an attempt to
+    // satisfy the Babuska-Brezzi or LBB stability conditions (see Hughes
+    // (2000)), we setup a $Q_n \times DGPM_{n-1} \times DGPM_{n-1}$
+    // system. $Q_2 \times DGPM_1 \times DGPM_1$ elements satisfy this
+    // condition, while $Q_1 \times DGPM_0 \times DGPM_0$ elements do
+    // not. However, it has been shown that the latter demonstrate good
+    // convergence characteristics nonetheless.
+    fe(FE_Q<dim>(parameters.poly_degree), dim, // displacement
+       FE_DGPMonomial<dim>(parameters.poly_degree - 1), 1, // pressure
+       FE_DGPMonomial<dim>(parameters.poly_degree - 1), 1), // dilatation
+    dof_handler_ref(triangulation),
+    dofs_per_cell (fe.dofs_per_cell),
+    u_fe(first_u_component),
+    p_fe(p_component),
+    J_fe(J_component),
+    dofs_per_block(n_blocks),
+    qf_cell(parameters.quad_order),
+    qf_face(parameters.quad_order),
+    n_q_points (qf_cell.size()),
+    n_q_points_f (qf_face.size())
+  {
+    determine_component_extractors();
+  }
+
+// The class destructor simply clears the data held by the DOFHandler
+  template <int dim>
+  Solid<dim>::~Solid()
+  {
+    dof_handler_ref.clear();
+  }
+
+
+// In solving the quasi-static problem, the time becomes a loading parameter,
+// i.e. we increasing the loading linearly with time, making the two concepts
+// interchangeable. We choose to increment time linearly using a constant time
+// step size.
+//
+// We start the function with preprocessing, setting the initial dilatation
+// values, and then output the initial grid before starting the simulation
+//  proper with the first time (and loading)
+// increment.
+//
+// Care must be taken (or at least some thought given) when imposing the
+// constraint $\widetilde{J}=1$ on the initial solution field. The constraint
+// corresponds to the determinant of the deformation gradient in the undeformed
+// configuration, which is the identity tensor.
+// We use FE_DGPMonomial bases to interpolate the dilatation field, thus we can't
+// simply set the corresponding dof to unity as they correspond to the
+// monomial coefficients. Thus we use the VectorTools::project function to do
+// the work for us. The VectorTools::project function requires an argument
+// indicating the hanging node constraints. We have none in this program
+// So we have to create a constraint object. In its original state, constraint
+// objects are unsorted, and have to be sorted (using the ConstraintMatrix::close function)
+// before they can be used. Have a look at step-21 for more information.
+// We only need to enforce the initial condition on the dilatation.
+// In order to do this, we make use of a ComponentSelectFunction which acts
+// as a mask and sets the J_component of n_components to 1. This is exactly what
+// we want. Have a look at its usage in step-20 for more information.
+  template <int dim>
+  void Solid<dim>::run()
+  {
+    make_grid();
+    system_setup();
+    {
+      ConstraintMatrix constraints;
+      constraints.close();
+
+      const ComponentSelectFunction<dim>
+      J_mask (J_component, n_components);
+
+      VectorTools::project (dof_handler_ref,
+                            constraints,
+                            QGauss<dim>(degree+2),
+                            J_mask,
+                            solution_n);
+    }
+    output_results();
+    time.increment();
+
+    // We then declare the incremental solution update $\varDelta
+    // \mathbf{\Xi}:= \{\varDelta \mathbf{u},\varDelta \widetilde{p},
+    // \varDelta \widetilde{J} \}$ and start the loop over the time domain.
+    //
+    // At the beginning, we reset the solution update for this time step...
+    BlockVector<double> solution_delta(dofs_per_block);
+    while (time.current() < time.end())
+      {
+        solution_delta = 0.0;
+
+        // ...solve the current time step and update total solution vector
+        // $\mathbf{\Xi}_{\textrm{n}} = \mathbf{\Xi}_{\textrm{n-1}} +
+        // \varDelta \mathbf{\Xi}$...
+        solve_nonlinear_timestep(solution_delta);
+        solution_n += solution_delta;
+
+        // ...and plot the results before moving on happily to the next time
+        // step:
+        output_results();
+        time.increment();
+      }
+  }
+
+
+// @sect3{Private interface}
+
+// @sect4{Threading-building-blocks structures}
+
+// The first group of private member functions is related to parallization.
+// We use the Threading Building Blocks library (TBB) to perform as many
+// computationally intensive distributed tasks as possible. In particular, we
+// assemble the tangent matrix and right hand side vector, the static
+// condensation contributions, and update data stored at the quadrature points
+// using TBB. Our main tool for this is the WorkStream class (see the @ref
+// threads module for more information).
+
+// Firstly we deal with the tangent matrix assembly structures.  The
+// PerTaskData object stores local contributions.
+  template <int dim>
+  struct Solid<dim>::PerTaskData_K
+  {
+    FullMatrix<double>        cell_matrix;
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    PerTaskData_K(const unsigned int dofs_per_cell)
+      :
+      cell_matrix(dofs_per_cell, dofs_per_cell),
+      local_dof_indices(dofs_per_cell)
+    {}
+
+    void reset()
+    {
+      cell_matrix = 0.0;
+    }
+  };
+
+
+// On the other hand, the ScratchData object stores the larger objects such as
+// the shape-function values array (<code>Nx</code>) and a shape function
+// gradient and symmetric gradient vector which we will use during the
+// assembly.
+  template <int dim>
+  struct Solid<dim>::ScratchData_K
+  {
+    FEValues<dim> fe_values_ref;
+
+    std::vector<std::vector<double> >                   Nx;
+    std::vector<std::vector<Tensor<2, dim> > >          grad_Nx;
+    std::vector<std::vector<SymmetricTensor<2, dim> > > symm_grad_Nx;
+
+    ScratchData_K(const FiniteElement<dim> &fe_cell,
+                  const QGauss<dim> &qf_cell,
+                  const UpdateFlags uf_cell)
+      :
+      fe_values_ref(fe_cell, qf_cell, uf_cell),
+      Nx(qf_cell.size(),
+         std::vector<double>(fe_cell.dofs_per_cell)),
+      grad_Nx(qf_cell.size(),
+              std::vector<Tensor<2, dim> >(fe_cell.dofs_per_cell)),
+      symm_grad_Nx(qf_cell.size(),
+                   std::vector<SymmetricTensor<2, dim> >
+                   (fe_cell.dofs_per_cell))
+    {}
+
+    ScratchData_K(const ScratchData_K &rhs)
+      :
+      fe_values_ref(rhs.fe_values_ref.get_fe(),
+                    rhs.fe_values_ref.get_quadrature(),
+                    rhs.fe_values_ref.get_update_flags()),
+      Nx(rhs.Nx),
+      grad_Nx(rhs.grad_Nx),
+      symm_grad_Nx(rhs.symm_grad_Nx)
+    {}
+
+    void reset()
+    {
+      const unsigned int n_q_points = Nx.size();
+      const unsigned int n_dofs_per_cell = Nx[0].size();
+      for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+        {
+          Assert( Nx[q_point].size() == n_dofs_per_cell, ExcInternalError());
+          Assert( grad_Nx[q_point].size() == n_dofs_per_cell,
+                  ExcInternalError());
+          Assert( symm_grad_Nx[q_point].size() == n_dofs_per_cell,
+                  ExcInternalError());
+          for (unsigned int k = 0; k < n_dofs_per_cell; ++k)
+            {
+              Nx[q_point][k] = 0.0;
+              grad_Nx[q_point][k] = 0.0;
+              symm_grad_Nx[q_point][k] = 0.0;
+            }
+        }
+    }
+
+  };
+
+// Next, the same approach is used for the right-hand side assembly.  The
+// PerTaskData object again stores local contributions and the ScratchData
+// object the shape function object and precomputed values vector:
+  template <int dim>
+  struct Solid<dim>::PerTaskData_RHS
+  {
+    Vector<double>            cell_rhs;
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    PerTaskData_RHS(const unsigned int dofs_per_cell)
+      :
+      cell_rhs(dofs_per_cell),
+      local_dof_indices(dofs_per_cell)
+    {}
+
+    void reset()
+    {
+      cell_rhs = 0.0;
+    }
+  };
+
+
+  template <int dim>
+  struct Solid<dim>::ScratchData_RHS
+  {
+    FEValues<dim>     fe_values_ref;
+    FEFaceValues<dim> fe_face_values_ref;
+
+    std::vector<std::vector<double> >                   Nx;
+    std::vector<std::vector<SymmetricTensor<2, dim> > > symm_grad_Nx;
+
+    ScratchData_RHS(const FiniteElement<dim> &fe_cell,
+                    const QGauss<dim> &qf_cell, const UpdateFlags uf_cell,
+                    const QGauss<dim - 1> & qf_face, const UpdateFlags uf_face)
+      :
+      fe_values_ref(fe_cell, qf_cell, uf_cell),
+      fe_face_values_ref(fe_cell, qf_face, uf_face),
+      Nx(qf_cell.size(),
+         std::vector<double>(fe_cell.dofs_per_cell)),
+      symm_grad_Nx(qf_cell.size(),
+                   std::vector<SymmetricTensor<2, dim> >
+                   (fe_cell.dofs_per_cell))
+    {}
+
+    ScratchData_RHS(const ScratchData_RHS &rhs)
+      :
+      fe_values_ref(rhs.fe_values_ref.get_fe(),
+                    rhs.fe_values_ref.get_quadrature(),
+                    rhs.fe_values_ref.get_update_flags()),
+      fe_face_values_ref(rhs.fe_face_values_ref.get_fe(),
+                         rhs.fe_face_values_ref.get_quadrature(),
+                         rhs.fe_face_values_ref.get_update_flags()),
+      Nx(rhs.Nx),
+      symm_grad_Nx(rhs.symm_grad_Nx)
+    {}
+
+    void reset()
+    {
+      const unsigned int n_q_points      = Nx.size();
+      const unsigned int n_dofs_per_cell = Nx[0].size();
+      for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+        {
+          Assert( Nx[q_point].size() == n_dofs_per_cell, ExcInternalError());
+          Assert( symm_grad_Nx[q_point].size() == n_dofs_per_cell,
+                  ExcInternalError());
+          for (unsigned int k = 0; k < n_dofs_per_cell; ++k)
+            {
+              Nx[q_point][k] = 0.0;
+              symm_grad_Nx[q_point][k] = 0.0;
+            }
+        }
+    }
+
+  };
+
+// Then we define structures to assemble the statically condensed tangent
+// matrix. Recall that we wish to solve for a displacement-based formulation.
+// We do the condensation at the element level as the $\widetilde{p}$ and
+// $\widetilde{J}$ fields are element-wise discontinuous.  As these operations
+// are matrix-based, we need to setup a number of matrices to store the local
+// contributions from a number of the tangent matrix sub-blocks.  We place
+// these in the PerTaskData struct.
+//
+// We choose not to reset any data in the <code>reset()</code> function as the
+// matrix extraction and replacement tools will take care of this
+  template <int dim>
+  struct Solid<dim>::PerTaskData_SC
+  {
+    FullMatrix<double>        cell_matrix;
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    FullMatrix<double>        k_orig;
+    FullMatrix<double>        k_pu;
+    FullMatrix<double>        k_pJ;
+    FullMatrix<double>        k_JJ;
+    FullMatrix<double>        k_pJ_inv;
+    FullMatrix<double>        k_bbar;
+    FullMatrix<double>        A;
+    FullMatrix<double>        B;
+    FullMatrix<double>        C;
+
+    PerTaskData_SC(const unsigned int dofs_per_cell,
+                   const unsigned int n_u,
+                   const unsigned int n_p,
+                   const unsigned int n_J)
+      :
+      cell_matrix(dofs_per_cell, dofs_per_cell),
+      local_dof_indices(dofs_per_cell),
+      k_orig(dofs_per_cell, dofs_per_cell),
+      k_pu(n_p, n_u),
+      k_pJ(n_p, n_J),
+      k_JJ(n_J, n_J),
+      k_pJ_inv(n_p, n_J),
+      k_bbar(n_u, n_u),
+      A(n_J,n_u),
+      B(n_J, n_u),
+      C(n_p, n_u)
+    {}
+
+    void reset()
+    {}
+  };
+
+
+// The ScratchData object for the operations we wish to perform here is empty
+// since we need no temporary data, but it still needs to be defined for the
+// current implementation of TBB in deal.II.  So we create a dummy struct for
+// this purpose.
+  template <int dim>
+  struct Solid<dim>::ScratchData_SC
+  {
+    void reset()
+    {}
+  };
+
+
+// And finally we define the structures to assist with updating the quadrature
+// point information. Similar to the SC assembly process, we do not need the
+// PerTaskData object (since there is nothing to store here) but must define
+// one nonetheless. Note that this is because for the operation that we have
+// here -- updating the data on quadrature points -- the operation is purely
+// local: the things we do on every cell get consumed on every cell, without
+// any global aggregation operation as is usually the case when using the
+// WorkStream class. The fact that we still have to define a per-task data
+// structure points to the fact that the WorkStream class may be ill-suited to
+// this operation (we could, in principle simply create a new task using
+// Threads::new_task for each cell) but there is not much harm done to doing
+// it this way anyway.
+// Furthermore, should there be different material models associated with a
+// quadrature point, requiring varying levels of computational expense, then
+// the method used here could be advantageous.
+  template <int dim>
+  struct Solid<dim>::PerTaskData_UQPH
+  {
+    void reset()
+    {}
+  };
+
+
+// The ScratchData object will be used to store an alias for the solution
+// vector so that we don't have to copy this large data structure. We then
+// define a number of vectors to extract the solution values and gradients at
+// the quadrature points.
+  template <int dim>
+  struct Solid<dim>::ScratchData_UQPH
+  {
+    const BlockVector<double>   &solution_total;
+
+    std::vector<Tensor<2, dim> > solution_grads_u_total;
+    std::vector<double>          solution_values_p_total;
+    std::vector<double>          solution_values_J_total;
+
+    FEValues<dim>                fe_values_ref;
+
+    ScratchData_UQPH(const FiniteElement<dim> &fe_cell,
+                     const QGauss<dim> &qf_cell,
+                     const UpdateFlags uf_cell,
+                     const BlockVector<double> &solution_total)
+      :
+      solution_total(solution_total),
+      solution_grads_u_total(qf_cell.size()),
+      solution_values_p_total(qf_cell.size()),
+      solution_values_J_total(qf_cell.size()),
+      fe_values_ref(fe_cell, qf_cell, uf_cell)
+    {}
+
+    ScratchData_UQPH(const ScratchData_UQPH &rhs)
+      :
+      solution_total(rhs.solution_total),
+      solution_grads_u_total(rhs.solution_grads_u_total),
+      solution_values_p_total(rhs.solution_values_p_total),
+      solution_values_J_total(rhs.solution_values_J_total),
+      fe_values_ref(rhs.fe_values_ref.get_fe(),
+                    rhs.fe_values_ref.get_quadrature(),
+                    rhs.fe_values_ref.get_update_flags())
+    {}
+
+    void reset()
+    {
+      const unsigned int n_q_points = solution_grads_u_total.size();
+      for (unsigned int q = 0; q < n_q_points; ++q)
+        {
+          solution_grads_u_total[q] = 0.0;
+          solution_values_p_total[q] = 0.0;
+          solution_values_J_total[q] = 0.0;
+        }
+    }
+  };
+
+
+// @sect4{Solid::make_grid}
+
+// On to the first of the private member functions. Here we create the
+// triangulation of the domain, for which we choose the scaled cube with each
+// face given a boundary ID number.  The grid must be refined at least once
+// for the indentation problem.
+//
+// We then determine the volume of the reference configuration and print it
+// for comparison:
+  template <int dim>
+  void Solid<dim>::make_grid()
+  {
+    GridGenerator::hyper_rectangle(triangulation,
+                                   Point<dim>(0.0, 0.0, 0.0),
+                                   Point<dim>(1.0, 1.0, 1.0),
+                                   true);
+    GridTools::scale(parameters.scale, triangulation);
+    triangulation.refine_global(std::max (1U, parameters.global_refinement));
+
+    vol_reference = GridTools::volume(triangulation);
+    vol_current = vol_reference;
+    std::cout << "Grid:\n\t Reference volume: " << vol_reference << std::endl;
+
+    // Since we wish to apply a Neumann BC to a patch on the top surface, we
+    // must find the cell faces in this part of the domain and mark them with
+    // a distinct boundary ID number.  The faces we are looking for are on the
+    // +y surface and will get boundary ID 6 (zero through five are already
+    // used when creating the six faces of the cube domain):
+    typename Triangulation<dim>::active_cell_iterator cell =
+      triangulation.begin_active(), endc = triangulation.end();
+    for (; cell != endc; ++cell)
+      for (unsigned int face = 0;
+           face < GeometryInfo<dim>::faces_per_cell; ++face)
+        if (cell->face(face)->at_boundary() == true
+            &&
+            cell->face(face)->center()[2] == 1.0 * parameters.scale)
+          if (cell->face(face)->center()[0] < 0.5 * parameters.scale
+              &&
+              cell->face(face)->center()[1] < 0.5 * parameters.scale)
+            cell->face(face)->set_boundary_id(6);
+  }
+
+
+// @sect4{Solid::system_setup}
+
+// Next we describe how the FE system is setup.  We first determine the number
+// of components per block. Since the displacement is a vector component, the
+// first dim components belong to it, while the next two describe scalar
+// pressure and dilatation DOFs.
+  template <int dim>
+  void Solid<dim>::system_setup()
+  {
+    timer.enter_subsection("Setup system");
+
+    std::vector<unsigned int> block_component(n_components, u_dof); // Displacement
+    block_component[p_component] = p_dof; // Pressure
+    block_component[J_component] = J_dof; // Dilatation
+
+    // The DOF handler is then initialised and we renumber the grid in an
+    // efficient manner. We also record the number of DOFs per block.
+    dof_handler_ref.distribute_dofs(fe);
+    DoFRenumbering::Cuthill_McKee(dof_handler_ref);
+    DoFRenumbering::component_wise(dof_handler_ref, block_component);
+    DoFTools::count_dofs_per_block(dof_handler_ref, dofs_per_block,
+                                   block_component);
+
+    std::cout << "Triangulation:"
+              << "\n\t Number of active cells: " << triangulation.n_active_cells()
+              << "\n\t Number of degrees of freedom: " << dof_handler_ref.n_dofs()
+              << std::endl;
+
+    // Setup the sparsity pattern and tangent matrix
+    tangent_matrix.clear();
+    {
+      const types::global_dof_index n_dofs_u = dofs_per_block[u_dof];
+      const types::global_dof_index n_dofs_p = dofs_per_block[p_dof];
+      const types::global_dof_index n_dofs_J = dofs_per_block[J_dof];
+
+      BlockDynamicSparsityPattern dsp(n_blocks, n_blocks);
+
+      dsp.block(u_dof, u_dof).reinit(n_dofs_u, n_dofs_u);
+      dsp.block(u_dof, p_dof).reinit(n_dofs_u, n_dofs_p);
+      dsp.block(u_dof, J_dof).reinit(n_dofs_u, n_dofs_J);
+
+      dsp.block(p_dof, u_dof).reinit(n_dofs_p, n_dofs_u);
+      dsp.block(p_dof, p_dof).reinit(n_dofs_p, n_dofs_p);
+      dsp.block(p_dof, J_dof).reinit(n_dofs_p, n_dofs_J);
+
+      dsp.block(J_dof, u_dof).reinit(n_dofs_J, n_dofs_u);
+      dsp.block(J_dof, p_dof).reinit(n_dofs_J, n_dofs_p);
+      dsp.block(J_dof, J_dof).reinit(n_dofs_J, n_dofs_J);
+      dsp.collect_sizes();
+
+      // The global system matrix initially has the following structure
+      // @f{align*}
+      // \underbrace{\begin{bmatrix}
+      //   \mathbf{\mathsf{K}}_{uu}  & \mathbf{\mathsf{K}}_{u\widetilde{p}} & \mathbf{0}
+      //   \\ \mathbf{\mathsf{K}}_{\widetilde{p}u} & \mathbf{0} & \mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}
+      //   \\ \mathbf{0} & \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}} & \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+      // \end{bmatrix}}_{\mathbf{\mathsf{K}}(\mathbf{\Xi}_{\textrm{i}})}
+      //      \underbrace{\begin{bmatrix}
+      //          d \mathbf{\mathsf{u}}
+      //      \\  d \widetilde{\mathbf{\mathsf{p}}}
+      //      \\  d \widetilde{\mathbf{\mathsf{J}}}
+      //      \end{bmatrix}}_{d \mathbf{\Xi}}
+      // =
+      // \underbrace{\begin{bmatrix}
+      //  \mathbf{\mathsf{F}}_{u}(\mathbf{u}_{\textrm{i}})
+      //  \\ \mathbf{\mathsf{F}}_{\widetilde{p}}(\widetilde{p}_{\textrm{i}})
+      //  \\ \mathbf{\mathsf{F}}_{\widetilde{J}}(\widetilde{J}_{\textrm{i}})
+      //\end{bmatrix}}_{ \mathbf{\mathsf{F}}(\mathbf{\Xi}_{\textrm{i}}) } \, .
+      // @f}
+      // We optimise the sparsity pattern to reflect this structure
+      // and prevent unnecessary data creation for the right-diagonal
+      // block components.
+      Table<2, DoFTools::Coupling> coupling(n_components, n_components);
+      for (unsigned int ii = 0; ii < n_components; ++ii)
+        for (unsigned int jj = 0; jj < n_components; ++jj)
+          if (((ii < p_component) && (jj == J_component))
+              || ((ii == J_component) && (jj < p_component))
+              || ((ii == p_component) && (jj == p_component)))
+            coupling[ii][jj] = DoFTools::none;
+          else
+            coupling[ii][jj] = DoFTools::always;
+      DoFTools::make_sparsity_pattern(dof_handler_ref,
+                                      coupling,
+                                      dsp,
+                                      constraints,
+                                      false);
+      sparsity_pattern.copy_from(dsp);
+    }
+
+    tangent_matrix.reinit(sparsity_pattern);
+
+    // We then set up storage vectors
+    system_rhs.reinit(dofs_per_block);
+    system_rhs.collect_sizes();
+
+    solution_n.reinit(dofs_per_block);
+    solution_n.collect_sizes();
+
+    // ...and finally set up the quadrature
+    // point history:
+    setup_qph();
+
+    timer.leave_subsection();
+  }
+
+
+// @sect4{Solid::determine_component_extractors}
+// Next we compute some information from the FE system that describes which local
+// element DOFs are attached to which block component.  This is used later to
+// extract sub-blocks from the global matrix.
+//
+// In essence, all we need is for the FESystem object to indicate to which
+// block component a DOF on the reference cell is attached to.  Currently, the
+// interpolation fields are setup such that 0 indicates a displacement DOF, 1
+// a pressure DOF and 2 a dilatation DOF.
+  template <int dim>
+  void
+  Solid<dim>::determine_component_extractors()
+  {
+    element_indices_u.clear();
+    element_indices_p.clear();
+    element_indices_J.clear();
+
+    for (unsigned int k = 0; k < fe.dofs_per_cell; ++k)
+      {
+        const unsigned int k_group = fe.system_to_base_index(k).first.first;
+        if (k_group == u_dof)
+          element_indices_u.push_back(k);
+        else if (k_group == p_dof)
+          element_indices_p.push_back(k);
+        else if (k_group == J_dof)
+          element_indices_J.push_back(k);
+        else
+          {
+            Assert(k_group <= J_dof, ExcInternalError());
+          }
+      }
+  }
+
+// @sect4{Solid::setup_qph}
+// The method used to store quadrature information is already described in
+// step-18. Here we implement a similar setup for a SMP machine.
+//
+// Firstly the actual QPH data objects are created. This must be done only
+// once the grid is refined to its finest level.
+  template <int dim>
+  void Solid<dim>::setup_qph()
+  {
+    std::cout << "    Setting up quadrature point data..." << std::endl;
+
+    {
+      triangulation.clear_user_data();
+      {
+        std::vector<PointHistory<dim> > tmp;
+        tmp.swap(quadrature_point_history);
+      }
+
+      quadrature_point_history
+      .resize(triangulation.n_active_cells() * n_q_points);
+
+      unsigned int history_index = 0;
+      for (typename Triangulation<dim>::active_cell_iterator cell =
+             triangulation.begin_active(); cell != triangulation.end();
+           ++cell)
+        {
+          cell->set_user_pointer(&quadrature_point_history[history_index]);
+          history_index += n_q_points;
+        }
+
+      Assert(history_index == quadrature_point_history.size(),
+             ExcInternalError());
+    }
+
+    // Next we setup the initial quadrature
+    // point data:
+    for (typename Triangulation<dim>::active_cell_iterator cell =
+           triangulation.begin_active(); cell != triangulation.end(); ++cell)
+      {
+        PointHistory<dim> *lqph =
+          reinterpret_cast<PointHistory<dim>*>(cell->user_pointer());
+
+        Assert(lqph >= &quadrature_point_history.front(), ExcInternalError());
+        Assert(lqph <= &quadrature_point_history.back(), ExcInternalError());
+
+        for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+          lqph[q_point].setup_lqp(parameters);
+      }
+  }
+
+// @sect4{Solid::update_qph_incremental}
+// As the update of QP information occurs frequently and involves a number of
+// expensive operations, we define a multithreaded approach to distributing
+// the task across a number of CPU cores.
+//
+// To start this, we first we need to obtain the total solution as it stands
+// at this Newton increment and then create the initial copy of the scratch and
+// copy data objects:
+  template <int dim>
+  void Solid<dim>::update_qph_incremental(const BlockVector<double> &solution_delta)
+  {
+    timer.enter_subsection("Update QPH data");
+    std::cout << " UQPH " << std::flush;
+
+    const BlockVector<double> solution_total(get_total_solution(solution_delta));
+
+    const UpdateFlags uf_UQPH(update_values | update_gradients);
+    PerTaskData_UQPH per_task_data_UQPH;
+    ScratchData_UQPH scratch_data_UQPH(fe, qf_cell, uf_UQPH, solution_total);
+
+    // We then pass them and the one-cell update function to the WorkStream to
+    // be processed:
+    WorkStream::run(dof_handler_ref.begin_active(),
+                    dof_handler_ref.end(),
+                    *this,
+                    &Solid::update_qph_incremental_one_cell,
+                    &Solid::copy_local_to_global_UQPH,
+                    scratch_data_UQPH,
+                    per_task_data_UQPH);
+
+    timer.leave_subsection();
+  }
+
+
+// Now we describe how we extract data from the solution vector and pass it
+// along to each QP storage object for processing.
+  template <int dim>
+  void
+  Solid<dim>::update_qph_incremental_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                              ScratchData_UQPH &scratch,
+                                              PerTaskData_UQPH &/*data*/)
+  {
+    PointHistory<dim> *lqph =
+      reinterpret_cast<PointHistory<dim>*>(cell->user_pointer());
+
+    Assert(lqph >= &quadrature_point_history.front(), ExcInternalError());
+    Assert(lqph <= &quadrature_point_history.back(), ExcInternalError());
+
+    Assert(scratch.solution_grads_u_total.size() == n_q_points,
+           ExcInternalError());
+    Assert(scratch.solution_values_p_total.size() == n_q_points,
+           ExcInternalError());
+    Assert(scratch.solution_values_J_total.size() == n_q_points,
+           ExcInternalError());
+
+    scratch.reset();
+
+    // We first need to find the values and gradients at quadrature points
+    // inside the current cell and then we update each local QP using the
+    // displacement gradient and total pressure and dilatation solution
+    // values:
+    scratch.fe_values_ref.reinit(cell);
+    scratch.fe_values_ref[u_fe].get_function_gradients(scratch.solution_total,
+                                                       scratch.solution_grads_u_total);
+    scratch.fe_values_ref[p_fe].get_function_values(scratch.solution_total,
+                                                    scratch.solution_values_p_total);
+    scratch.fe_values_ref[J_fe].get_function_values(scratch.solution_total,
+                                                    scratch.solution_values_J_total);
+
+    for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+      lqph[q_point].update_values(scratch.solution_grads_u_total[q_point],
+                                  scratch.solution_values_p_total[q_point],
+                                  scratch.solution_values_J_total[q_point]);
+  }
+
+
+// @sect4{Solid::solve_nonlinear_timestep}
+
+// The next function is the driver method for the Newton-Raphson scheme. At
+// its top we create a new vector to store the current Newton update step,
+// reset the error storage objects and print solver header.
+  template <int dim>
+  void
+  Solid<dim>::solve_nonlinear_timestep(BlockVector<double> &solution_delta)
+  {
+    std::cout << std::endl << "Timestep " << time.get_timestep() << " @ "
+              << time.current() << "s" << std::endl;
+
+    BlockVector<double> newton_update(dofs_per_block);
+
+    error_residual.reset();
+    error_residual_0.reset();
+    error_residual_norm.reset();
+    error_update.reset();
+    error_update_0.reset();
+    error_update_norm.reset();
+
+    print_conv_header();
+
+    // We now perform a number of Newton iterations to iteratively solve the
+    // nonlinear problem.  Since the problem is fully nonlinear and we are
+    // using a full Newton method, the data stored in the tangent matrix and
+    // right-hand side vector is not reusable and must be cleared at each
+    // Newton step.  We then initially build the right-hand side vector to
+    // check for convergence (and store this value in the first iteration).
+    // The unconstrained DOFs of the rhs vector hold the out-of-balance
+    // forces. The building is done before assembling the system matrix as the
+    // latter is an expensive operation and we can potentially avoid an extra
+    // assembly process by not assembling the tangent matrix when convergence
+    // is attained.
+    unsigned int newton_iteration = 0;
+    for (; newton_iteration < parameters.max_iterations_NR;
+         ++newton_iteration)
+      {
+        std::cout << " " << std::setw(2) << newton_iteration << " " << std::flush;
+
+        tangent_matrix = 0.0;
+        system_rhs = 0.0;
+
+        assemble_system_rhs();
+        get_error_residual(error_residual);
+
+        if (newton_iteration == 0)
+          error_residual_0 = error_residual;
+
+        // We can now determine the normalised residual error and check for
+        // solution convergence:
+        error_residual_norm = error_residual;
+        error_residual_norm.normalise(error_residual_0);
+
+        if (newton_iteration > 0 && error_update_norm.u <= parameters.tol_u
+            && error_residual_norm.u <= parameters.tol_f)
+          {
+            std::cout << " CONVERGED! " << std::endl;
+            print_conv_footer();
+
+            break;
+          }
+
+        // If we have decided that we want to continue with the iteration, we
+        // assemble the tangent, make and impose the Dirichlet constraints,
+        // and do the solve of the linearised system:
+        assemble_system_tangent();
+        make_constraints(newton_iteration);
+        constraints.condense(tangent_matrix, system_rhs);
+
+        const std::pair<unsigned int, double>
+        lin_solver_output = solve_linear_system(newton_update);
+
+        get_error_update(newton_update, error_update);
+        if (newton_iteration == 0)
+          error_update_0 = error_update;
+
+        // We can now determine the normalised Newton update error, and
+        // perform the actual update of the solution increment for the current
+        // time step, update all quadrature point information pertaining to
+        // this new displacement and stress state and continue iterating:
+        error_update_norm = error_update;
+        error_update_norm.normalise(error_update_0);
+
+        solution_delta += newton_update;
+        update_qph_incremental(solution_delta);
+
+        std::cout << " | " << std::fixed << std::setprecision(3) << std::setw(7)
+                  << std::scientific << lin_solver_output.first << "  "
+                  << lin_solver_output.second << "  " << error_residual_norm.norm
+                  << "  " << error_residual_norm.u << "  "
+                  << error_residual_norm.p << "  " << error_residual_norm.J
+                  << "  " << error_update_norm.norm << "  " << error_update_norm.u
+                  << "  " << error_update_norm.p << "  " << error_update_norm.J
+                  << "  " << std::endl;
+      }
+
+    // At the end, if it turns out that we have in fact done more iterations
+    // than the parameter file allowed, we raise an exception that can be
+    // caught in the main() function. The call <code>AssertThrow(condition,
+    // exc_object)</code> is in essence equivalent to <code>if (!cond) throw
+    // exc_object;</code> but the former form fills certain fields in the
+    // exception object that identify the location (filename and line number)
+    // where the exception was raised to make it simpler to identify where the
+    // problem happened.
+    AssertThrow (newton_iteration <= parameters.max_iterations_NR,
+                 ExcMessage("No convergence in nonlinear solver!"));
+  }
+
+
+// @sect4{Solid::print_conv_header and Solid::print_conv_footer}
+
+// This program prints out data in a nice table that is updated
+// on a per-iteration basis. The next two functions set up the table
+// header and footer:
+  template <int dim>
+  void Solid<dim>::print_conv_header()
+  {
+    static const unsigned int l_width = 155;
+
+    for (unsigned int i = 0; i < l_width; ++i)
+      std::cout << "_";
+    std::cout << std::endl;
+
+    std::cout << "                 SOLVER STEP                  "
+              << " |  LIN_IT   LIN_RES    RES_NORM    "
+              << " RES_U     RES_P      RES_J     NU_NORM     "
+              << " NU_U       NU_P       NU_J " << std::endl;
+
+    for (unsigned int i = 0; i < l_width; ++i)
+      std::cout << "_";
+    std::cout << std::endl;
+  }
+
+
+
+  template <int dim>
+  void Solid<dim>::print_conv_footer()
+  {
+    static const unsigned int l_width = 155;
+
+    for (unsigned int i = 0; i < l_width; ++i)
+      std::cout << "_";
+    std::cout << std::endl;
+
+    const std::pair <double,double> error_dil = get_error_dilation();
+
+    std::cout << "Relative errors:" << std::endl
+              << "Displacement:\t" << error_update.u / error_update_0.u << std::endl
+              << "Force: \t\t" << error_residual.u / error_residual_0.u << std::endl
+              << "Dilatation:\t" << error_dil.first << std::endl
+              << "v / V_0:\t" << vol_current << " / " << vol_reference
+              << " = " << error_dil.second << std::endl;
+  }
+
+
+// @sect4{Solid::get_error_dilation}
+
+// Calculate how well the dilatation $\widetilde{J}$ agrees with $J :=
+// \textrm{det}\ \mathbf{F}$ from the $L^2$ error $ \bigl[ \int_{\Omega_0} {[ J
+// - \widetilde{J}]}^{2}\textrm{d}V \bigr]^{1/2}$.
+// We also return the ratio of the current volume of the
+// domain to the reference volume. This is of interest for incompressible
+// media where we want to check how well the isochoric constraint has been
+// enforced.
+  template <int dim>
+  std::pair<double, double>
+  Solid<dim>::get_error_dilation()
+  {
+    double dil_L2_error = 0.0;
+    vol_current = 0.0;
+
+    FEValues<dim> fe_values_ref(fe, qf_cell, update_JxW_values);
+
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      {
+        fe_values_ref.reinit(cell);
+
+        PointHistory<dim> *lqph =
+          reinterpret_cast<PointHistory<dim>*>(cell->user_pointer());
+
+        Assert(lqph >= &quadrature_point_history.front(), ExcInternalError());
+        Assert(lqph <= &quadrature_point_history.back(), ExcInternalError());
+
+        for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+          {
+            const double det_F_qp = lqph[q_point].get_det_F();
+            const double J_tilde_qp = lqph[q_point].get_J_tilde();
+            const double the_error_qp_squared = std::pow((det_F_qp - J_tilde_qp),
+                                                         2);
+            const double JxW = fe_values_ref.JxW(q_point);
+
+            dil_L2_error += the_error_qp_squared * JxW;
+            vol_current += det_F_qp * JxW;
+          }
+        Assert(vol_current > 0, ExcInternalError());
+      }
+
+    std::pair<double, double> error_dil;
+    error_dil.first = std::sqrt(dil_L2_error);
+    error_dil.second = vol_current / vol_reference;
+
+    return error_dil;
+  }
+
+
+// @sect4{Solid::get_error_residual}
+
+// Determine the true residual error for the problem.  That is, determine the
+// error in the residual for the unconstrained degrees of freedom.  Note that to
+// do so, we need to ignore constrained DOFs by setting the residual in these
+// vector components to zero.
+  template <int dim>
+  void Solid<dim>::get_error_residual(Errors &error_residual)
+  {
+    BlockVector<double> error_res(dofs_per_block);
+
+    for (unsigned int i = 0; i < dof_handler_ref.n_dofs(); ++i)
+      if (!constraints.is_constrained(i))
+        error_res(i) = system_rhs(i);
+
+    error_residual.norm = error_res.l2_norm();
+    error_residual.u = error_res.block(u_dof).l2_norm();
+    error_residual.p = error_res.block(p_dof).l2_norm();
+    error_residual.J = error_res.block(J_dof).l2_norm();
+  }
+
+
+// @sect4{Solid::get_error_udpate}
+
+// Determine the true Newton update error for the problem
+  template <int dim>
+  void Solid<dim>::get_error_update(const BlockVector<double> &newton_update,
+                                    Errors &error_update)
+  {
+    BlockVector<double> error_ud(dofs_per_block);
+    for (unsigned int i = 0; i < dof_handler_ref.n_dofs(); ++i)
+      if (!constraints.is_constrained(i))
+        error_ud(i) = newton_update(i);
+
+    error_update.norm = error_ud.l2_norm();
+    error_update.u = error_ud.block(u_dof).l2_norm();
+    error_update.p = error_ud.block(p_dof).l2_norm();
+    error_update.J = error_ud.block(J_dof).l2_norm();
+  }
+
+
+
+// @sect4{Solid::get_total_solution}
+
+// This function provides the total solution, which is valid at any Newton step.
+// This is required as, to reduce computational error, the total solution is
+// only updated at the end of the timestep.
+  template <int dim>
+  BlockVector<double>
+  Solid<dim>::get_total_solution(const BlockVector<double> &solution_delta) const
+  {
+    BlockVector<double> solution_total(solution_n);
+    solution_total += solution_delta;
+    return solution_total;
+  }
+
+
+// @sect4{Solid::assemble_system_tangent}
+
+// Since we use TBB for assembly, we simply setup a copy of the
+// data structures required for the process and pass them, along
+// with the memory addresses of the assembly functions to the
+// WorkStream object for processing. Note that we must ensure that
+// the matrix is reset before any assembly operations can occur.
+  template <int dim>
+  void Solid<dim>::assemble_system_tangent()
+  {
+    timer.enter_subsection("Assemble tangent matrix");
+    std::cout << " ASM_K " << std::flush;
+
+    tangent_matrix = 0.0;
+
+    const UpdateFlags uf_cell(update_values    |
+                              update_gradients |
+                              update_JxW_values);
+
+    PerTaskData_K per_task_data(dofs_per_cell);
+    ScratchData_K scratch_data(fe, qf_cell, uf_cell);
+
+    WorkStream::run(dof_handler_ref.begin_active(),
+                    dof_handler_ref.end(),
+                    *this,
+                    &Solid::assemble_system_tangent_one_cell,
+                    &Solid::copy_local_to_global_K,
+                    scratch_data,
+                    per_task_data);
+
+    timer.leave_subsection();
+  }
+
+// This function adds the local contribution to the system matrix.
+// Note that we choose not to use the constraint matrix to do the
+// job for us because the tangent matrix and residual processes have
+// been split up into two separate functions.
+  template <int dim>
+  void Solid<dim>::copy_local_to_global_K(const PerTaskData_K &data)
+  {
+    for (unsigned int i = 0; i < dofs_per_cell; ++i)
+      for (unsigned int j = 0; j < dofs_per_cell; ++j)
+        tangent_matrix.add(data.local_dof_indices[i],
+                           data.local_dof_indices[j],
+                           data.cell_matrix(i, j));
+  }
+
+// Of course, we still have to define how we assemble the tangent matrix
+// contribution for a single cell.  We first need to reset and initialise some
+// of the scratch data structures and retrieve some basic information
+// regarding the DOF numbering on this cell.  We can precalculate the cell
+// shape function values and gradients. Note that the shape function gradients
+// are defined with regard to the current configuration.  That is
+// $\textrm{grad}\ \boldsymbol{\varphi} = \textrm{Grad}\ \boldsymbol{\varphi}
+// \ \mathbf{F}^{-1}$.
+  template <int dim>
+  void
+  Solid<dim>::assemble_system_tangent_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                               ScratchData_K &scratch,
+                                               PerTaskData_K &data)
+  {
+    data.reset();
+    scratch.reset();
+    scratch.fe_values_ref.reinit(cell);
+    cell->get_dof_indices(data.local_dof_indices);
+    PointHistory<dim> *lqph =
+      reinterpret_cast<PointHistory<dim>*>(cell->user_pointer());
+
+    for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+      {
+        const Tensor<2, dim> F_inv = lqph[q_point].get_F_inv();
+        for (unsigned int k = 0; k < dofs_per_cell; ++k)
+          {
+            const unsigned int k_group = fe.system_to_base_index(k).first.first;
+
+            if (k_group == u_dof)
+              {
+                scratch.grad_Nx[q_point][k] = scratch.fe_values_ref[u_fe].gradient(k, q_point)
+                                              * F_inv;
+                scratch.symm_grad_Nx[q_point][k] = symmetrize(scratch.grad_Nx[q_point][k]);
+              }
+            else if (k_group == p_dof)
+              scratch.Nx[q_point][k] = scratch.fe_values_ref[p_fe].value(k,
+                                                                         q_point);
+            else if (k_group == J_dof)
+              scratch.Nx[q_point][k] = scratch.fe_values_ref[J_fe].value(k,
+                                                                         q_point);
+            else
+              Assert(k_group <= J_dof, ExcInternalError());
+          }
+      }
+
+    // Now we build the local cell stiffness matrix. Since the global and
+    // local system matrices are symmetric, we can exploit this property by
+    // building only the lower half of the local matrix and copying the values
+    // to the upper half.  So we only assemble half of the
+    // $\mathsf{\mathbf{k}}_{uu}$, $\mathsf{\mathbf{k}}_{\widetilde{p}
+    // \widetilde{p}} = \mathbf{0}$, $\mathsf{\mathbf{k}}_{\widetilde{J}
+    // \widetilde{J}}$ blocks, while the whole
+    // $\mathsf{\mathbf{k}}_{\widetilde{p} \widetilde{J}}$,
+    // $\mathsf{\mathbf{k}}_{\mathbf{u} \widetilde{J}} = \mathbf{0}$,
+    // $\mathsf{\mathbf{k}}_{\mathbf{u} \widetilde{p}}$ blocks are built.
+    //
+    // In doing so, we first extract some configuration dependent variables
+    // from our QPH history objects for the current quadrature point.
+    for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+      {
+        const Tensor<2, dim> tau         = lqph[q_point].get_tau();
+        const SymmetricTensor<4, dim> Jc = lqph[q_point].get_Jc();
+        const double d2Psi_vol_dJ2       = lqph[q_point].get_d2Psi_vol_dJ2();
+        const double det_F               = lqph[q_point].get_det_F();
+
+        // Next we define some aliases to make the assembly process easier to
+        // follow
+        const std::vector<double>
+        &N = scratch.Nx[q_point];
+        const std::vector<SymmetricTensor<2, dim> >
+        &symm_grad_Nx = scratch.symm_grad_Nx[q_point];
+        const std::vector<Tensor<2, dim> >
+        &grad_Nx = scratch.grad_Nx[q_point];
+        const double JxW = scratch.fe_values_ref.JxW(q_point);
+
+        for (unsigned int i = 0; i < dofs_per_cell; ++i)
+          {
+            const unsigned int component_i = fe.system_to_component_index(i).first;
+            const unsigned int i_group     = fe.system_to_base_index(i).first.first;
+
+            for (unsigned int j = 0; j <= i; ++j)
+              {
+                const unsigned int component_j = fe.system_to_component_index(j).first;
+                const unsigned int j_group     = fe.system_to_base_index(j).first.first;
+
+                // This is the $\mathsf{\mathbf{k}}_{\mathbf{u} \mathbf{u}}$
+                // contribution. It comprises a material contribution, and a
+                // geometrical stress contribution which is only added along
+                // the local matrix diagonals:
+                if ((i_group == j_group) && (i_group == u_dof))
+                  {
+                    data.cell_matrix(i, j) += symm_grad_Nx[i] * Jc // The material contribution:
+                                              * symm_grad_Nx[j] * JxW;
+                    if (component_i == component_j) // geometrical stress contribution
+                      data.cell_matrix(i, j) += grad_Nx[i][component_i] * tau
+                                                * grad_Nx[j][component_j] * JxW;
+                  }
+                // Next is the $\mathsf{\mathbf{k}}_{ \widetilde{p} \mathbf{u}}$ contribution
+                else if ((i_group == p_dof) && (j_group == u_dof))
+                  {
+                    data.cell_matrix(i, j) += N[i] * det_F
+                                              * (symm_grad_Nx[j]
+                                                 * StandardTensors<dim>::I)
+                                              * JxW;
+                  }
+                // and lastly the $\mathsf{\mathbf{k}}_{ \widetilde{J} \widetilde{p}}$
+                // and $\mathsf{\mathbf{k}}_{ \widetilde{J} \widetilde{J}}$
+                // contributions:
+                else if ((i_group == J_dof) && (j_group == p_dof))
+                  data.cell_matrix(i, j) -= N[i] * N[j] * JxW;
+                else if ((i_group == j_group) && (i_group == J_dof))
+                  data.cell_matrix(i, j) += N[i] * d2Psi_vol_dJ2 * N[j] * JxW;
+                else
+                  Assert((i_group <= J_dof) && (j_group <= J_dof),
+                         ExcInternalError());
+              }
+          }
+      }
+
+    // Finally, we need to copy the lower half of the local matrix into the
+    // upper half:
+    for (unsigned int i = 0; i < dofs_per_cell; ++i)
+      for (unsigned int j = i + 1; j < dofs_per_cell; ++j)
+        data.cell_matrix(i, j) = data.cell_matrix(j, i);
+  }
+
+// @sect4{Solid::assemble_system_rhs}
+// The assembly of the right-hand side process is similar to the
+// tangent matrix, so we will not describe it in too much detail.
+// Note that since we are describing a problem with Neumann BCs,
+// we will need the face normals and so must specify this in the
+// update flags.
+  template <int dim>
+  void Solid<dim>::assemble_system_rhs()
+  {
+    timer.enter_subsection("Assemble system right-hand side");
+    std::cout << " ASM_R " << std::flush;
+
+    system_rhs = 0.0;
+
+    const UpdateFlags uf_cell(update_values |
+                              update_gradients |
+                              update_JxW_values);
+    const UpdateFlags uf_face(update_values |
+                              update_normal_vectors |
+                              update_JxW_values);
+
+    PerTaskData_RHS per_task_data(dofs_per_cell);
+    ScratchData_RHS scratch_data(fe, qf_cell, uf_cell, qf_face, uf_face);
+
+    WorkStream::run(dof_handler_ref.begin_active(),
+                    dof_handler_ref.end(),
+                    *this,
+                    &Solid::assemble_system_rhs_one_cell,
+                    &Solid::copy_local_to_global_rhs,
+                    scratch_data,
+                    per_task_data);
+
+    timer.leave_subsection();
+  }
+
+
+
+  template <int dim>
+  void Solid<dim>::copy_local_to_global_rhs(const PerTaskData_RHS &data)
+  {
+    for (unsigned int i = 0; i < dofs_per_cell; ++i)
+      system_rhs(data.local_dof_indices[i]) += data.cell_rhs(i);
+  }
+
+
+
+  template <int dim>
+  void
+  Solid<dim>::assemble_system_rhs_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                           ScratchData_RHS &scratch,
+                                           PerTaskData_RHS &data)
+  {
+    data.reset();
+    scratch.reset();
+    scratch.fe_values_ref.reinit(cell);
+    cell->get_dof_indices(data.local_dof_indices);
+    PointHistory<dim> *lqph =
+      reinterpret_cast<PointHistory<dim>*>(cell->user_pointer());
+
+    for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+      {
+        const Tensor<2, dim> F_inv = lqph[q_point].get_F_inv();
+
+        for (unsigned int k = 0; k < dofs_per_cell; ++k)
+          {
+            const unsigned int k_group = fe.system_to_base_index(k).first.first;
+
+            if (k_group == u_dof)
+              scratch.symm_grad_Nx[q_point][k]
+                = symmetrize(scratch.fe_values_ref[u_fe].gradient(k, q_point)
+                             * F_inv);
+            else if (k_group == p_dof)
+              scratch.Nx[q_point][k] = scratch.fe_values_ref[p_fe].value(k,
+                                                                         q_point);
+            else if (k_group == J_dof)
+              scratch.Nx[q_point][k] = scratch.fe_values_ref[J_fe].value(k,
+                                                                         q_point);
+            else
+              Assert(k_group <= J_dof, ExcInternalError());
+          }
+      }
+
+    for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
+      {
+        const SymmetricTensor<2, dim> tau = lqph[q_point].get_tau();
+        const double det_F = lqph[q_point].get_det_F();
+        const double J_tilde = lqph[q_point].get_J_tilde();
+        const double p_tilde = lqph[q_point].get_p_tilde();
+        const double dPsi_vol_dJ = lqph[q_point].get_dPsi_vol_dJ();
+
+        const std::vector<double>
+        &N = scratch.Nx[q_point];
+        const std::vector<SymmetricTensor<2, dim> >
+        &symm_grad_Nx = scratch.symm_grad_Nx[q_point];
+        const double JxW = scratch.fe_values_ref.JxW(q_point);
+
+        // We first compute the contributions
+        // from the internal forces.  Note, by
+        // definition of the rhs as the negative
+        // of the residual, these contributions
+        // are subtracted.
+        for (unsigned int i = 0; i < dofs_per_cell; ++i)
+          {
+            const unsigned int i_group = fe.system_to_base_index(i).first.first;
+
+            if (i_group == u_dof)
+              data.cell_rhs(i) -= (symm_grad_Nx[i] * tau) * JxW;
+            else if (i_group == p_dof)
+              data.cell_rhs(i) -= N[i] * (det_F - J_tilde) * JxW;
+            else if (i_group == J_dof)
+              data.cell_rhs(i) -= N[i] * (dPsi_vol_dJ - p_tilde) * JxW;
+            else
+              Assert(i_group <= J_dof, ExcInternalError());
+          }
+      }
+
+    // Next we assemble the Neumann contribution. We first check to see it the
+    // cell face exists on a boundary on which a traction is applied and add
+    // the contribution if this is the case.
+    for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell;
+         ++face)
+      if (cell->face(face)->at_boundary() == true
+          && cell->face(face)->boundary_id() == 6)
+        {
+          scratch.fe_face_values_ref.reinit(cell, face);
+
+          for (unsigned int f_q_point = 0; f_q_point < n_q_points_f;
+               ++f_q_point)
+            {
+              const Tensor<1, dim> &N =
+                scratch.fe_face_values_ref.normal_vector(f_q_point);
+
+              // Using the face normal at this quadrature point we specify the
+              // traction in reference configuration. For this problem, a
+              // defined pressure is applied in the reference configuration.
+              // The direction of the applied traction is assumed not to
+              // evolve with the deformation of the domain. The traction is
+              // defined using the first Piola-Kirchhoff stress is simply
+              // $\mathbf{t} = \mathbf{P}\mathbf{N} = [p_0 \mathbf{I}]
+              // \mathbf{N} = p_0 \mathbf{N}$ We use the time variable to
+              // linearly ramp up the pressure load.
+              //
+              // Note that the contributions to the right hand side vector we
+              // compute here only exist in the displacement components of the
+              // vector.
+              static const double  p0        = -4.0
+                                               /
+                                               (parameters.scale * parameters.scale);
+              const double         time_ramp = (time.current() / time.end());
+              const double         pressure  = p0 * parameters.p_p0 * time_ramp;
+              const Tensor<1, dim> traction  = pressure * N;
+
+              for (unsigned int i = 0; i < dofs_per_cell; ++i)
+                {
+                  const unsigned int i_group =
+                    fe.system_to_base_index(i).first.first;
+
+                  if (i_group == u_dof)
+                    {
+                      const unsigned int component_i =
+                        fe.system_to_component_index(i).first;
+                      const double Ni =
+                        scratch.fe_face_values_ref.shape_value(i,
+                                                               f_q_point);
+                      const double JxW = scratch.fe_face_values_ref.JxW(
+                                           f_q_point);
+
+                      data.cell_rhs(i) += (Ni * traction[component_i])
+                                          * JxW;
+                    }
+                }
+            }
+        }
+  }
+
+// @sect4{Solid::make_constraints}
+// The constraints for this problem are simple to describe.
+// However, since we are dealing with an iterative Newton method,
+// it should be noted that any displacement constraints should only
+// be specified at the zeroth iteration and subsequently no
+// additional contributions are to be made since the constraints
+// are already exactly satisfied.
+  template <int dim>
+  void Solid<dim>::make_constraints(const int &it_nr)
+  {
+    std::cout << " CST " << std::flush;
+
+    // Since the constraints are different at different Newton iterations, we
+    // need to clear the constraints matrix and completely rebuild
+    // it. However, after the first iteration, the constraints remain the same
+    // and we can simply skip the rebuilding step if we do not clear it.
+    if (it_nr > 1)
+      return;
+    constraints.clear();
+    const bool apply_dirichlet_bc = (it_nr == 0);
+
+    // The boundary conditions for the indentation problem are as follows: On
+    // the -x, -y and -z faces (ID's 0,2,4) we set up a symmetry condition to
+    // allow only planar movement while the +x and +y faces (ID's 1,3) are
+    // traction free. In this contrived problem, part of the +z face (ID 5) is
+    // set to have no motion in the x- and y-component. Finally, as described
+    // earlier, the other part of the +z face has an the applied pressure but
+    // is also constrained in the x- and y-directions.
+    //
+    // In the following, we will have to tell the function interpolation
+    // boundary values which components of the solution vector should be
+    // constrained (i.e., whether it's the x-, y-, z-displacements or
+    // combinations thereof). This is done using ComponentMask objects (see
+    // @ref GlossComponentMask) which we can get from the finite element if we
+    // provide it with an extractor object for the component we wish to
+    // select. To this end we first set up such extractor objects and later
+    // use it when generating the relevant component masks:
+    const FEValuesExtractors::Scalar x_displacement(0);
+    const FEValuesExtractors::Scalar y_displacement(1);
+    const FEValuesExtractors::Scalar z_displacement(2);
+
+    {
+      const int boundary_id = 0;
+
+      if (apply_dirichlet_bc == true)
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 fe.component_mask(x_displacement));
+      else
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 fe.component_mask(x_displacement));
+    }
+    {
+      const int boundary_id = 2;
+
+      if (apply_dirichlet_bc == true)
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 fe.component_mask(y_displacement));
+      else
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 fe.component_mask(y_displacement));
+    }
+    {
+      const int boundary_id = 4;
+
+      if (apply_dirichlet_bc == true)
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 fe.component_mask(z_displacement));
+      else
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 fe.component_mask(z_displacement));
+    }
+    {
+      const int boundary_id = 5;
+
+      if (apply_dirichlet_bc == true)
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 (fe.component_mask(x_displacement)
+                                                  |
+                                                  fe.component_mask(y_displacement)));
+      else
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 (fe.component_mask(x_displacement)
+                                                  |
+                                                  fe.component_mask(y_displacement)));
+    }
+    {
+      const int boundary_id = 6;
+
+      if (apply_dirichlet_bc == true)
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 (fe.component_mask(x_displacement)
+                                                  |
+                                                  fe.component_mask(y_displacement)));
+      else
+        VectorTools::interpolate_boundary_values(dof_handler_ref,
+                                                 boundary_id,
+                                                 ZeroFunction<dim>(n_components),
+                                                 constraints,
+                                                 (fe.component_mask(x_displacement)
+                                                  |
+                                                  fe.component_mask(y_displacement)));
+    }
+
+    constraints.close();
+  }
+
+// @sect4{Solid::solve_linear_system}
+// Solving the entire block system is a bit problematic as there are no
+// contributions to the $\mathsf{\mathbf{k}}_{ \widetilde{J} \widetilde{J}}$
+// block, rendering it noninvertible.
+// Since the pressure and dilatation variables DOFs are discontinuous, we can
+// condense them out to form a smaller displacement-only system which
+// we will then solve and subsequently post-process to retrieve the
+// pressure and dilatation solutions.
+//
+// At the top, we allocate two temporary vectors to help with the static
+// condensation, and variables to store the number of linear solver iterations
+// and the (hopefully converged) residual.
+//
+// For the following, recall that
+// @f{align*}
+//  \mathbf{\mathsf{K}}_{\textrm{store}}
+//:=
+//  \begin{bmatrix}
+//      \mathbf{\mathsf{K}}_{\textrm{con}}      &       \mathbf{\mathsf{K}}_{u\widetilde{p}}    & \mathbf{0}
+//  \\  \mathbf{\mathsf{K}}_{\widetilde{p}u}    &       \mathbf{0}      &       \mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1}
+//  \\  \mathbf{0}      &       \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}                & \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+//  \end{bmatrix} \, .
+// @f}
+// and
+//  @f{align*}
+//              d \widetilde{\mathbf{\mathsf{p}}}
+//              & = \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \bigl[
+//                       \mathbf{\mathsf{F}}_{\widetilde{J}}
+//                       - \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}} d \widetilde{\mathbf{\mathsf{J}}} \bigr]
+//              \\ d \widetilde{\mathbf{\mathsf{J}}}
+//              & = \mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1} \bigl[
+//                      \mathbf{\mathsf{F}}_{\widetilde{p}}
+//                      - \mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}}
+//                      \bigr]
+//               \\ \Rightarrow d \widetilde{\mathbf{\mathsf{p}}}
+//              &=  \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \mathbf{\mathsf{F}}_{\widetilde{J}}
+//              - \underbrace{\bigl[\mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+//              \mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1}\bigr]}_{\overline{\mathbf{\mathsf{K}}}}\bigl[ \mathbf{\mathsf{F}}_{\widetilde{p}}
+//              - \mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}} \bigr]
+//  @f}
+//  and thus
+//  @f[
+//              \underbrace{\bigl[ \mathbf{\mathsf{K}}_{uu} + \overline{\overline{\mathbf{\mathsf{K}}}}~ \bigr]
+//              }_{\mathbf{\mathsf{K}}_{\textrm{con}}} d \mathbf{\mathsf{u}}
+//              =
+//          \underbrace{
+//              \Bigl[
+//              \mathbf{\mathsf{F}}_{u}
+//                      - \mathbf{\mathsf{K}}_{u\widetilde{p}} \bigl[ \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1} \mathbf{\mathsf{F}}_{\widetilde{J}}
+//                      - \overline{\mathbf{\mathsf{K}}}\mathbf{\mathsf{F}}_{\widetilde{p}} \bigr]
+//              \Bigr]}_{\mathbf{\mathsf{F}}_{\textrm{con}}}
+//  @f]
+//  where
+//  @f[
+//              \overline{\overline{\mathbf{\mathsf{K}}}} :=
+//                      \mathbf{\mathsf{K}}_{u\widetilde{p}} \overline{\mathbf{\mathsf{K}}} \mathbf{\mathsf{K}}_{\widetilde{p}u} \, .
+//  @f]
+  template <int dim>
+  std::pair<unsigned int, double>
+  Solid<dim>::solve_linear_system(BlockVector<double> &newton_update)
+  {
+    BlockVector<double> A(dofs_per_block);
+    BlockVector<double> B(dofs_per_block);
+
+    unsigned int lin_it = 0;
+    double lin_res = 0.0;
+
+    // In the first step of this function, we solve for the incremental
+    // displacement $d\mathbf{u}$.  To this end, we perform static
+    // condensation to make
+    //    $\mathbf{\mathsf{K}}_{\textrm{con}}
+    //    = \bigl[ \mathbf{\mathsf{K}}_{uu} + \overline{\overline{\mathbf{\mathsf{K}}}}~ \bigr]$
+    // and put
+    // $\mathsf{\mathbf{k}}^{-1}_{\widetilde{p} \widetilde{J}}$
+    // in the original $\mathsf{\mathbf{k}}_{\widetilde{p} \widetilde{J}}$ block.
+    // That is, we make $\mathbf{\mathsf{K}}_{\textrm{store}}$.
+    {
+      assemble_sc();
+
+      //              $
+      //      \mathsf{\mathbf{A}}_{\widetilde{J}}
+      //      =
+      //              \mathsf{\mathbf{K}}^{-1}_{\widetilde{p} \widetilde{J}}
+      //              \mathsf{\mathbf{F}}_{\widetilde{p}}
+      //              $
+      tangent_matrix.block(p_dof, J_dof).vmult(A.block(J_dof),
+                                               system_rhs.block(p_dof));
+      //      $
+      //      \mathsf{\mathbf{B}}_{\widetilde{J}}
+      //      =
+      //      \mathsf{\mathbf{K}}_{\widetilde{J} \widetilde{J}}
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{p} \widetilde{J}}
+      //      \mathsf{\mathbf{F}}_{\widetilde{p}}
+      //      $
+      tangent_matrix.block(J_dof, J_dof).vmult(B.block(J_dof),
+                                               A.block(J_dof));
+      //      $
+      //      \mathsf{\mathbf{A}}_{\widetilde{J}}
+      //      =
+      //      \mathsf{\mathbf{F}}_{\widetilde{J}}
+      //      -
+      //      \mathsf{\mathbf{K}}_{\widetilde{J} \widetilde{J}}
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{p} \widetilde{J}}
+      //      \mathsf{\mathbf{F}}_{\widetilde{p}}
+      //      $
+      A.block(J_dof) = system_rhs.block(J_dof);
+      A.block(J_dof) -= B.block(J_dof);
+      //      $
+      //      \mathsf{\mathbf{A}}_{\widetilde{J}}
+      //      =
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{J} \widetilde{p}}
+      //      [
+      //      \mathsf{\mathbf{F}}_{\widetilde{J}}
+      //      -
+      //      \mathsf{\mathbf{K}}_{\widetilde{J} \widetilde{J}}
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{p} \widetilde{J}}
+      //      \mathsf{\mathbf{F}}_{\widetilde{p}}
+      //      ]
+      //      $
+      tangent_matrix.block(p_dof, J_dof).Tvmult(A.block(p_dof),
+                                                A.block(J_dof));
+      //      $
+      //      \mathsf{\mathbf{A}}_{\mathbf{u}}
+      //      =
+      //      \mathsf{\mathbf{K}}_{\mathbf{u} \widetilde{p}}
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{J} \widetilde{p}}
+      //      [
+      //      \mathsf{\mathbf{F}}_{\widetilde{J}}
+      //      -
+      //      \mathsf{\mathbf{K}}_{\widetilde{J} \widetilde{J}}
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{p} \widetilde{J}}
+      //      \mathsf{\mathbf{F}}_{\widetilde{p}}
+      //      ]
+      //      $
+      tangent_matrix.block(u_dof, p_dof).vmult(A.block(u_dof),
+                                               A.block(p_dof));
+      //      $
+      //      \mathsf{\mathbf{F}}_{\text{con}}
+      //      =
+      //      \mathsf{\mathbf{F}}_{\mathbf{u}}
+      //      -
+      //      \mathsf{\mathbf{K}}_{\mathbf{u} \widetilde{p}}
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{J} \widetilde{p}}
+      //      [
+      //      \mathsf{\mathbf{F}}_{\widetilde{J}}
+      //      -
+      //      \mathsf{\mathbf{K}}_{\widetilde{J} \widetilde{J}}
+      //      \mathsf{\mathbf{K}}^{-1}_{\widetilde{p} \widetilde{J}}
+      //      \mathsf{\mathbf{K}}_{\widetilde{p}}
+      //      ]
+      //      $
+      system_rhs.block(u_dof) -= A.block(u_dof);
+
+      timer.enter_subsection("Linear solver");
+      std::cout << " SLV " << std::flush;
+      if (parameters.type_lin == "CG")
+        {
+          const int solver_its = tangent_matrix.block(u_dof, u_dof).m()
+                                 * parameters.max_iterations_lin;
+          const double tol_sol = parameters.tol_lin
+                                 * system_rhs.block(u_dof).l2_norm();
+
+          SolverControl solver_control(solver_its, tol_sol);
+
+          GrowingVectorMemory<Vector<double> > GVM;
+          SolverCG<Vector<double> > solver_CG(solver_control, GVM);
+
+          // We've chosen by default a SSOR preconditioner as it appears to
+          // provide the fastest solver convergence characteristics for this
+          // problem on a single-thread machine.  However, this might not be
+          // true for different problem sizes.
+          PreconditionSelector<SparseMatrix<double>, Vector<double> >
+          preconditioner (parameters.preconditioner_type,
+                          parameters.preconditioner_relaxation);
+          preconditioner.use_matrix(tangent_matrix.block(u_dof, u_dof));
+
+          solver_CG.solve(tangent_matrix.block(u_dof, u_dof),
+                          newton_update.block(u_dof),
+                          system_rhs.block(u_dof),
+                          preconditioner);
+
+          lin_it = solver_control.last_step();
+          lin_res = solver_control.last_value();
+        }
+      else if (parameters.type_lin == "Direct")
+        {
+          // Otherwise if the problem is small
+          // enough, a direct solver can be
+          // utilised.
+          SparseDirectUMFPACK A_direct;
+          A_direct.initialize(tangent_matrix.block(u_dof, u_dof));
+          A_direct.vmult(newton_update.block(u_dof), system_rhs.block(u_dof));
+
+          lin_it = 1;
+          lin_res = 0.0;
+        }
+      else
+        Assert (false, ExcMessage("Linear solver type not implemented"));
+
+      timer.leave_subsection();
+    }
+
+    // Now that we have the displacement update, distribute the constraints
+    // back to the Newton update:
+    constraints.distribute(newton_update);
+
+    timer.enter_subsection("Linear solver postprocessing");
+    std::cout << " PP " << std::flush;
+
+    // The next step after solving the displacement
+    // problem is to post-process to get the
+    // dilatation solution from the
+    // substitution:
+    //    $
+    //     d \widetilde{\mathbf{\mathsf{J}}}
+    //      = \mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}^{-1} \bigl[
+    //       \mathbf{\mathsf{F}}_{\widetilde{p}}
+    //     - \mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}}
+    //      \bigr]
+    //    $
+    {
+      //      $
+      //      \mathbf{\mathsf{A}}_{\widetilde{p}}
+      //      =
+      //      \mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}}
+      //      $
+      tangent_matrix.block(p_dof, u_dof).vmult(A.block(p_dof),
+                                               newton_update.block(u_dof));
+      //      $
+      //      \mathbf{\mathsf{A}}_{\widetilde{p}}
+      //      =
+      //      -\mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}}
+      //      $
+      A.block(p_dof) *= -1.0;
+      //      $
+      //      \mathbf{\mathsf{A}}_{\widetilde{p}}
+      //      =
+      //      \mathbf{\mathsf{F}}_{\widetilde{p}}
+      //      -\mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}}
+      //      $
+      A.block(p_dof) += system_rhs.block(p_dof);
+      //      $
+      //      d\mathbf{\mathsf{\widetilde{J}}}
+      //      =
+      //      \mathbf{\mathsf{K}}^{-1}_{\widetilde{p}\widetilde{J}}
+      //      [
+      //      \mathbf{\mathsf{F}}_{\widetilde{p}}
+      //      -\mathbf{\mathsf{K}}_{\widetilde{p}u} d \mathbf{\mathsf{u}}
+      //      ]
+      //      $
+      tangent_matrix.block(p_dof, J_dof).vmult(newton_update.block(J_dof),
+                                               A.block(p_dof));
+    }
+
+    // we insure here that any Dirichlet constraints
+    // are distributed on the updated solution:
+    constraints.distribute(newton_update);
+
+    // Finally we solve for the pressure
+    // update with the substitution:
+    //    $
+    //    d \widetilde{\mathbf{\mathsf{p}}}
+    //     =
+    //    \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1}
+    //    \bigl[
+    //     \mathbf{\mathsf{F}}_{\widetilde{J}}
+    //      - \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+    //    d \widetilde{\mathbf{\mathsf{J}}}
+    //    \bigr]
+    //    $
+    {
+      //      $
+      //      \mathsf{\mathbf{A}}_{\widetilde{J}}
+      //       =
+      //      \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+      //      d \widetilde{\mathbf{\mathsf{J}}}
+      //      $
+      tangent_matrix.block(J_dof, J_dof).vmult(A.block(J_dof),
+                                               newton_update.block(J_dof));
+      //      $
+      //      \mathsf{\mathbf{A}}_{\widetilde{J}}
+      //       =
+      //      -\mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+      //      d \widetilde{\mathbf{\mathsf{J}}}
+      //      $
+      A.block(J_dof) *= -1.0;
+      //      $
+      //      \mathsf{\mathbf{A}}_{\widetilde{J}}
+      //       =
+      //      \mathsf{\mathbf{F}}_{\widetilde{J}}
+      //      -
+      //      \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+      //      d \widetilde{\mathbf{\mathsf{J}}}
+      //      $
+      A.block(J_dof) += system_rhs.block(J_dof);
+      // and finally....
+      //    $
+      //    d \widetilde{\mathbf{\mathsf{p}}}
+      //     =
+      //    \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{p}}^{-1}
+      //    \bigl[
+      //     \mathbf{\mathsf{F}}_{\widetilde{J}}
+      //      - \mathbf{\mathsf{K}}_{\widetilde{J}\widetilde{J}}
+      //    d \widetilde{\mathbf{\mathsf{J}}}
+      //    \bigr]
+      //    $
+      tangent_matrix.block(p_dof, J_dof).Tvmult(newton_update.block(p_dof),
+                                                A.block(J_dof));
+    }
+
+    // We are now at the end, so we distribute all
+    // constrained dofs back to the Newton
+    // update:
+    constraints.distribute(newton_update);
+
+    timer.leave_subsection();
+
+    return std::make_pair(lin_it, lin_res);
+  }
+
+// @sect4{Solid::assemble_system_SC}
+
+// The static condensation process could be performed at a global level but we
+// need the inverse of one of the blocks. However, since the pressure and
+// dilatation variables are discontinuous, the static condensation (SC)
+// operation can be done on a per-cell basis and we can produce the inverse of
+// the block-diagonal $ \mathbf{\mathsf{K}}_{\widetilde{p}\widetilde{J}}$
+  // block by inverting the local blocks. We can again
+// use TBB to do this since each operation will be independent of one another.
+//
+// Using the TBB via the WorkStream class, we assemble the contributions to form
+//  $
+//  \mathbf{\mathsf{K}}_{\textrm{con}}
+//  = \bigl[ \mathbf{\mathsf{K}}_{uu} + \overline{\overline{\mathbf{\mathsf{K}}}}~ \bigr]
+//  $
+// from each element's contributions. These
+// contributions are then added to the global stiffness matrix. Given this
+// description, the following two functions should be clear:
+  template <int dim>
+  void Solid<dim>::assemble_sc()
+  {
+    timer.enter_subsection("Perform static condensation");
+    std::cout << " ASM_SC " << std::flush;
+
+    PerTaskData_SC per_task_data(dofs_per_cell, element_indices_u.size(),
+                                 element_indices_p.size(),
+                                 element_indices_J.size());
+    ScratchData_SC scratch_data;
+
+    WorkStream::run(dof_handler_ref.begin_active(),
+                    dof_handler_ref.end(),
+                    *this,
+                    &Solid::assemble_sc_one_cell,
+                    &Solid::copy_local_to_global_sc,
+                    scratch_data,
+                    per_task_data);
+
+    timer.leave_subsection();
+  }
+
+
+  template <int dim>
+  void Solid<dim>::copy_local_to_global_sc(const PerTaskData_SC &data)
+  {
+    for (unsigned int i = 0; i < dofs_per_cell; ++i)
+      for (unsigned int j = 0; j < dofs_per_cell; ++j)
+        tangent_matrix.add(data.local_dof_indices[i],
+                           data.local_dof_indices[j],
+                           data.cell_matrix(i, j));
+  }
+
+
+// Now we describe the static condensation process.  As per usual, we must
+// first find out which global numbers the degrees of freedom on this cell
+// have and reset some data structures:
+  template <int dim>
+  void
+  Solid<dim>::assemble_sc_one_cell(const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                   ScratchData_SC &scratch,
+                                   PerTaskData_SC &data)
+  {
+    data.reset();
+    scratch.reset();
+    cell->get_dof_indices(data.local_dof_indices);
+
+    // We now extract the contribution of the dofs associated with the current
+    // cell to the global stiffness matrix.  The discontinuous nature of the
+    // $\widetilde{p}$ and $\widetilde{J}$ interpolations mean that their is
+    // no coupling of the local contributions at the global level. This is not
+    // the case with the u dof.  In other words,
+    // $\mathsf{\mathbf{k}}_{\widetilde{J} \widetilde{p}}$,
+    // $\mathsf{\mathbf{k}}_{\widetilde{p} \widetilde{p}}$ and
+    // $\mathsf{\mathbf{k}}_{\widetilde{J} \widetilde{p}}$, when extracted
+    // from the global stiffness matrix are the element contributions.  This
+    // is not the case for $\mathsf{\mathbf{k}}_{\mathbf{u} \mathbf{u}}$
+    //
+    // Note: A lower-case symbol is used to denote element stiffness matrices.
+
+    // Currently the matrix corresponding to
+    // the dof associated with the current element
+    // (denoted somewhat loosely as $\mathsf{\mathbf{k}}$)
+    // is of the form:
+    // @f{align*}
+    //    \begin{bmatrix}
+    //       \mathbf{\mathsf{k}}_{uu}  &  \mathbf{\mathsf{k}}_{u\widetilde{p}}    & \mathbf{0}
+    //    \\ \mathbf{\mathsf{k}}_{\widetilde{p}u} & \mathbf{0}  &  \mathbf{\mathsf{k}}_{\widetilde{p}\widetilde{J}}
+    //    \\ \mathbf{0}  &  \mathbf{\mathsf{k}}_{\widetilde{J}\widetilde{p}}  & \mathbf{\mathsf{k}}_{\widetilde{J}\widetilde{J}}
+    //    \end{bmatrix}
+    // @f}
+    //
+    // We now need to modify it such that it appear as
+    // @f{align*}
+    //    \begin{bmatrix}
+    //       \mathbf{\mathsf{k}}_{\textrm{con}}   & \mathbf{\mathsf{k}}_{u\widetilde{p}}    & \mathbf{0}
+    //    \\ \mathbf{\mathsf{k}}_{\widetilde{p}u} & \mathbf{0} & \mathbf{\mathsf{k}}_{\widetilde{p}\widetilde{J}}^{-1}
+    //    \\ \mathbf{0} & \mathbf{\mathsf{k}}_{\widetilde{J}\widetilde{p}} & \mathbf{\mathsf{k}}_{\widetilde{J}\widetilde{J}}
+    //    \end{bmatrix}
+    // @f}
+    // with $\mathbf{\mathsf{k}}_{\textrm{con}} = \bigl[ \mathbf{\mathsf{k}}_{uu} +\overline{\overline{\mathbf{\mathsf{k}}}}~ \bigr]$
+    // where
+    // $               \overline{\overline{\mathbf{\mathsf{k}}}} :=
+    // \mathbf{\mathsf{k}}_{u\widetilde{p}} \overline{\mathbf{\mathsf{k}}} \mathbf{\mathsf{k}}_{\widetilde{p}u}
+    // $
+    // and
+    // $
+    //    \overline{\mathbf{\mathsf{k}}} =
+    //     \mathbf{\mathsf{k}}_{\widetilde{J}\widetilde{p}}^{-1} \mathbf{\mathsf{k}}_{\widetilde{J}\widetilde{J}}
+    //    \mathbf{\mathsf{k}}_{\widetilde{p}\widetilde{J}}^{-1}
+    // $.
+    //
+    // At this point, we need to take note of
+    // the fact that global data already exists
+    // in the $\mathsf{\mathbf{K}}_{uu}$,
+    // $\mathsf{\mathbf{K}}_{\widetilde{p} \widetilde{J}}$
+    // and
+    //  $\mathsf{\mathbf{K}}_{\widetilde{J} \widetilde{p}}$
+    // sub-blocks.  So if we are to modify them, we must account for the data
+    // that is already there (i.e. simply add to it or remove it if
+    // necessary).  Since the copy_local_to_global operation is a "+="
+    // operation, we need to take this into account
+    //
+    // For the $\mathsf{\mathbf{K}}_{uu}$ block in particular, this means that
+    // contributions have been added from the surrounding cells, so we need to
+    // be careful when we manipulate this block.  We can't just erase the
+    // sub-blocks.
+    //
+    // This is the strategy we will employ to get the sub-blocks we want:
+    //
+    // - $ {\mathbf{\mathsf{k}}}_{\textrm{store}}$:
+    // Since we don't have access to $\mathsf{\mathbf{k}}_{uu}$,
+    // but we know its contribution is added to
+    // the global $\mathsf{\mathbf{K}}_{uu}$ matrix, we just want
+    // to add the element wise
+    // static-condensation $\overline{\overline{\mathbf{\mathsf{k}}}}$.
+    //
+    // - $\mathsf{\mathbf{k}}^{-1}_{\widetilde{p} \widetilde{J}}$:
+    //                      Similarly, $\mathsf{\mathbf{k}}_{\widetilde{p} \widetilde{J}}$ exists in
+    //          the subblock. Since the copy
+    //          operation is a += operation, we
+    //          need to subtract the existing
+    //          $\mathsf{\mathbf{k}}_{\widetilde{p} \widetilde{J}}$
+    //                      submatrix in addition to
+    //          "adding" that which we wish to
+    //          replace it with.
+    //
+    // - $\mathsf{\mathbf{k}}^{-1}_{\widetilde{J} \widetilde{p}}$:
+    //              Since the global matrix
+    //          is symmetric, this block is the
+    //          same as the one above and we
+    //          can simply use
+    //              $\mathsf{\mathbf{k}}^{-1}_{\widetilde{p} \widetilde{J}}$
+    //          as a substitute for this one.
+    //
+    // We first extract element data from the
+    // system matrix. So first we get the
+    // entire subblock for the cell, then
+    // extract $\mathsf{\mathbf{k}}$
+    // for the dofs associated with
+    // the current element
+    data.k_orig.extract_submatrix_from(tangent_matrix,
+                                       data.local_dof_indices,
+                                       data.local_dof_indices);
+    // and next the local matrices for
+    // $\mathsf{\mathbf{k}}_{ \widetilde{p} \mathbf{u}}$
+    // $\mathsf{\mathbf{k}}_{ \widetilde{p} \widetilde{J}}$
+    // and
+    // $\mathsf{\mathbf{k}}_{ \widetilde{J} \widetilde{J}}$:
+    data.k_pu.extract_submatrix_from(data.k_orig,
+                                     element_indices_p,
+                                     element_indices_u);
+    data.k_pJ.extract_submatrix_from(data.k_orig,
+                                     element_indices_p,
+                                     element_indices_J);
+    data.k_JJ.extract_submatrix_from(data.k_orig,
+                                     element_indices_J,
+                                     element_indices_J);
+
+    // To get the inverse of $\mathsf{\mathbf{k}}_{\widetilde{p}
+    // \widetilde{J}}$, we invert it directly.  This operation is relatively
+    // inexpensive since $\mathsf{\mathbf{k}}_{\widetilde{p} \widetilde{J}}$
+    // since block-diagonal.
+    data.k_pJ_inv.invert(data.k_pJ);
+
+    // Now we can make condensation terms to
+    // add to the $\mathsf{\mathbf{k}}_{\mathbf{u} \mathbf{u}}$
+    // block and put them in
+    // the cell local matrix
+    //    $
+    //    \mathsf{\mathbf{A}}
+    //    =
+    //    \mathsf{\mathbf{k}}^{-1}_{\widetilde{p} \widetilde{J}}
+    //    \mathsf{\mathbf{k}}_{\widetilde{p} \mathbf{u}}
+    //    $:
+    data.k_pJ_inv.mmult(data.A, data.k_pu);
+    //      $
+    //      \mathsf{\mathbf{B}}
+    //      =
+    //      \mathsf{\mathbf{k}}^{-1}_{\widetilde{J} \widetilde{J}}
+    //      \mathsf{\mathbf{k}}^{-1}_{\widetilde{p} \widetilde{J}}
+    //      \mathsf{\mathbf{k}}_{\widetilde{p} \mathbf{u}}
+    //      $
+    data.k_JJ.mmult(data.B, data.A);
+    //    $
+    //    \mathsf{\mathbf{C}}
+    //    =
+    //    \mathsf{\mathbf{k}}^{-1}_{\widetilde{J} \widetilde{p}}
+    //    \mathsf{\mathbf{k}}^{-1}_{\widetilde{J} \widetilde{J}}
+    //    \mathsf{\mathbf{k}}^{-1}_{\widetilde{p} \widetilde{J}}
+    //    \mathsf{\mathbf{k}}_{\widetilde{p} \mathbf{u}}
+    //    $
+    data.k_pJ_inv.Tmmult(data.C, data.B);
+    //    $
+    //    \overline{\overline{\mathsf{\mathbf{k}}}}
+    //    =
+    //    \mathsf{\mathbf{k}}_{\mathbf{u} \widetilde{p}}
+    //    \mathsf{\mathbf{k}}^{-1}_{\widetilde{J} \widetilde{p}}
+    //    \mathsf{\mathbf{k}}^{-1}_{\widetilde{J} \widetilde{J}}
+    //    \mathsf{\mathbf{k}}^{-1}_{\widetilde{p} \widetilde{J}}
+    //    \mathsf{\mathbf{k}}_{\widetilde{p} \mathbf{u}}
+    //    $
+    data.k_pu.Tmmult(data.k_bbar, data.C);
+    data.k_bbar.scatter_matrix_to(element_indices_u,
+                                  element_indices_u,
+                                  data.cell_matrix);
+
+    // Next we place
+    // $\mathsf{\mathbf{k}}^{-1}_{ \widetilde{p} \widetilde{J}}$
+    // in the
+    // $\mathsf{\mathbf{k}}_{ \widetilde{p} \widetilde{J}}$
+    // block for post-processing.  Note again
+    // that we need to remove the
+    // contribution that already exists there.
+    data.k_pJ_inv.add(-1.0, data.k_pJ);
+    data.k_pJ_inv.scatter_matrix_to(element_indices_p,
+                                    element_indices_J,
+                                    data.cell_matrix);
+  }
+
+// @sect4{Solid::output_results}
+// Here we present how the results are written to file to be viewed
+// using ParaView or Visit. The method is similar to that shown in previous
+// tutorials so will not be discussed in detail.
+  template <int dim>
+  void Solid<dim>::output_results() const
+  {
+    DataOut<dim> data_out;
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    data_component_interpretation(dim,
+                                  DataComponentInterpretation::component_is_part_of_vector);
+    data_component_interpretation.push_back(DataComponentInterpretation::component_is_scalar);
+    data_component_interpretation.push_back(DataComponentInterpretation::component_is_scalar);
+
+    std::vector<std::string> solution_name(dim, "displacement");
+    solution_name.push_back("pressure");
+    solution_name.push_back("dilatation");
+
+    data_out.attach_dof_handler(dof_handler_ref);
+    data_out.add_data_vector(solution_n,
+                             solution_name,
+                             DataOut<dim>::type_dof_data,
+                             data_component_interpretation);
+
+    // Since we are dealing with a large deformation problem, it would be nice
+    // to display the result on a displaced grid!  The MappingQEulerian class
+    // linked with the DataOut class provides an interface through which this
+    // can be achieved without physically moving the grid points in the
+    // Triangulation object ourselves.  We first need to copy the solution to
+    // a temporary vector and then create the Eulerian mapping. We also
+    // specify the polynomial degree to the DataOut object in order to produce
+    // a more refined output data set when higher order polynomials are used.
+    Vector<double> soln(solution_n.size());
+    for (unsigned int i = 0; i < soln.size(); ++i)
+      soln(i) = solution_n(i);
+    MappingQEulerian<dim> q_mapping(degree, dof_handler_ref, soln);
+    data_out.build_patches(q_mapping, degree);
+
+    std::ostringstream filename;
+    filename << "solution-" << time.get_timestep() << ".vtk";
+
+    std::ofstream output(filename.str().c_str());
+    data_out.write_vtk(output);
+  }
+
+}
+
+
+// @sect3{Main function}
+// Lastly we provide the main driver function which appears
+// no different to the other tutorials.
+int main ()
+{
+  using namespace dealii;
+  using namespace Step44;
+
+  try
+    {
+      Solid<3> solid_3d("parameters.prm");
+      solid_3d.run();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl << exc.what()
+                << std::endl << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl << "Aborting!"
+                << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-45/CMakeLists.txt b/examples/step-45/CMakeLists.txt
new file mode 100644
index 0000000..8a525dd
--- /dev/null
+++ b/examples/step-45/CMakeLists.txt
@@ -0,0 +1,54 @@
+##
+#  CMake script for the step-45 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-45")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF( NOT DEAL_II_WITH_MPI OR
+    NOT DEAL_II_WITH_P4EST OR
+    NOT DEAL_II_WITH_TRILINOS )
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_MPI = ON
+    DEAL_II_WITH_P4EST = ON
+    DEAL_II_WITH_TRILINOS = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-45/doc/builds-on b/examples/step-45/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-45/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-45/doc/intro.dox b/examples/step-45/doc/intro.dox
new file mode 100644
index 0000000..475c33d
--- /dev/null
+++ b/examples/step-45/doc/intro.dox
@@ -0,0 +1,149 @@
+<br>
+
+<i>This program was contributed by Daniel Arndt and Matthias Maier.</i>
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+In this example we present how to use periodic boundary conditions in
+deal.II. Periodic boundary conditions are algebraic constraints that
+typically occur in computations on representative regions of a larger
+domain that repeat in one or more directions.
+
+An example is the simulation of the electronic structure of photonic
+crystals, because they have a lattice-like structure and, thus, it often
+suffices to do the actual computation on only one box of the lattice. To
+be able to proceed this way one has to assume that the computation can be
+periodically extended to the other boxes; this requires the solution to
+have a periodic structure.
+
+<a name="Procedure"></a>
+<h1>Procedure</h1>
+
+deal.II provides a number of high level entry points to impose periodic
+boundary conditions.
+The general approach to apply periodic boundary conditions consists of
+three steps (see also the
+ at ref GlossPeriodicConstraints "Glossary entry on periodic boundary conditions"):
+-# Create a mesh
+-# Identify those pairs of faces on different parts of the boundary across which
+   the solution should be symmetric, using GridTools::collect_periodic_faces()
+-# Add the periodicity information to the mesh
+   using parallel::distributed::Triangulation::add_periodicity()
+-# Add periodicity constraints using DoFTools::make_periodicity_constraints()
+
+The second and third step are necessary for distributed meshes
+to ensure that cells on opposite sides of the domain but connected by periodic
+faces are part of the ghost layer if one of them is stored on the local processor.
+If the Triangulation is not a parallel::distributed::Triangulation,
+these steps have to be omitted.
+
+The first step consists of collecting matching periodic faces and storing them in
+a <code>std::vector</code> of GridTools::PeriodicFacePair. This is done with the
+function GridTools::collect_periodic_faces() that can be invoked for example
+like this:
+ at code
+GridTools::collect_periodic_faces(dof_handler,
+                                  b_id1,
+                                  b_id2,
+                                  direction,
+                                  matched_pairs,
+                                  offset = <default value>,
+                                  matrix = <default value>,
+                                  first_vector_components = <default value>);
+ at endcode
+
+This call loops over all faces of the container dof_handler on the opposing
+boundaries with boundary indicator @p b_id1 and @p b_id2,
+respecitvely. If $\text{vertices}_{1/2}$ are the vertices of $\text{face}_{1/2}$,
+it matches pairs of faces (and dofs) such that the difference between $\text{vertices}_2$
+and $matrix\cdot \text{vertices}_1+\text{offset}$ vanishes in every component apart from direction
+and stores the resulting pairs with associated data in @p matched_pairs. (See
+GridTools::orthogonal_equality() for detailed information about the
+matching process.)
+
+Consider, for example, the colored unit square $\Omega=[0,1]^2$ with boundary
+indicator 0 on the left, 1 on the right, 2 on the bottom and 3 on the top
+faces. Then,
+ at code
+GridTools::collect_periodic_faces(dof_handler,
+                                  /*b_id1*/ 0,
+                                  /*b_id2*/ 1,
+                                  /*direction*/ 0,
+                                  matched_pairs);
+ at endcode
+would yield periodicity constraints such that $u(0,y)=u(1,y)$ for all
+$y\in[0,1]$.
+
+If we instead consider the parallelogram given by the convex hull of
+$(0,0)$, $(1,1)$, $(1,2)$, $(0,1)$ we can achieve the constraints
+$u(0,y)=u(1,y+1)$ by specifying an @p offset:
+ at code
+GridTools::collect_periodic_faces(dof_handler,
+                                  /*b_id1*/ 0,
+                                  /*b_id2*/ 1,
+                                  /*direction*/ 0,
+                                  matched_pairs,
+                                  Tensor<1, 2>(0.,1.));
+ at endcode
+or
+ at code
+GridTools::collect_periodic_faces(dof_handler,
+                                  /*b_id1*/ 0,
+                                  /*b_id2*/ 1,
+                                  /*arbitrary direction*/ 0,
+                                  matched_pairs,
+                                  Tensor<1, 2>(1.,1.));
+ at endcode
+
+The resulting @p matched_pairs can be used in
+DoFTools::make_periodicity_constraints for populating a ConstraintMatrix
+with periodicity constraints:
+ at code
+DoFTools::make_periodicity_constraints(matched_pairs, constraints);
+ at endcode
+
+Apart from this high level interface there are also variants of
+DoFTools::make_periodicity_constraints available that combine those two
+steps (see the variants of DofTools::make_periodicity_constraints).
+
+There is also a low level interface to
+DoFTools::make_periodicity_constraints if more flexibility is needed. The
+low level variant allows to directly specify two faces that shall be
+constrained:
+ at code
+using namespace DoFTools;
+make_periodicity_constraints(face_1,
+                             face_2,
+                             constraint_matrix,
+                             component_mask = <default value>;
+                             face_orientation = <default value>,
+                             face_flip = <default value>,
+                             face_rotation = <default value>,
+                             matrix = <default value>);
+ at endcode
+Here, we need to specify the orientation of the two faces using
+ at p face_orientation, @p face_flip and @p face_orientation. For a closer description
+have a look at the documentation of DoFTools::make_periodicity_constraints.
+The remaining parameters are the same as for the high level interface apart
+from the self-explaining @p component_mask and @p constraint_matrix.
+
+<a name="problem"></a>
+<h1>A practical example</h1>
+
+In the following, we show how to use the above functions in a more involved
+example. The task is to enforce rotated periodicity constraints for the
+velocity component of a Stokes flow.
+
+On a quarter-circle defined by $\Omega=\{{\bf x}\in(0,1)^2:\|{\bf x}\|\in (0.5,1)\}$ we are
+going to solve the Stokes problem
+ at f{eqnarray*}
+  -\Delta \; \textbf{u} + \nabla p &=& (\exp(-100*\|{\bf x}-(.75,0.1)^T\|^2),0)^T, \\
+  -\textrm{div}\;  \textbf{u}&=&0,\\
+  \textbf{u}|_{\Gamma_1}&=&{\bf 0},
+ at f}
+where the boundary $\Gamma_1$ is defined as $\Gamma_1:=\{x\in \partial\Omega: \|x\|\in\{0.5,1\}\}$.
+For the remaining parts of the boundary we are going to use periodic boundary conditions, i.e.
+ at f{align*}
+  u_x(0,\nu)&=-u_y(\nu,0)&\nu&\in[0,1]\\
+  u_y(0,\nu)&=u_x(\nu,0)&\nu&\in[0,1].
+ at f}
diff --git a/examples/step-45/doc/kind b/examples/step-45/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-45/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-45/doc/results.dox b/examples/step-45/doc/results.dox
new file mode 100644
index 0000000..900ae31
--- /dev/null
+++ b/examples/step-45/doc/results.dox
@@ -0,0 +1,10 @@
+<h1>Results</h1>
+
+The created output is not very surprising. We simply see that the solution is
+periodic with respect to the left and lower boundary:
+
+<img src="http://www.dealii.org/images/steps/developer/step-45.periodic.png" alt="">
+
+Without the periodicity constraints we would have ended up with the following solution:
+
+<img src="http://www.dealii.org/images/steps/developer/step-45.non_periodic.png" alt="">
diff --git a/examples/step-45/doc/tooltip b/examples/step-45/doc/tooltip
new file mode 100644
index 0000000..26a6489
--- /dev/null
+++ b/examples/step-45/doc/tooltip
@@ -0,0 +1 @@
+Periodic boundary conditions
diff --git a/examples/step-45/step-45.cc b/examples/step-45/step-45.cc
new file mode 100644
index 0000000..f6b30e5
--- /dev/null
+++ b/examples/step-45/step-45.cc
@@ -0,0 +1,797 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2008 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Daniel Arndt, Matthias Maier, 2015
+ *
+ * Based on step-22 by Wolfgang Bangerth and Martin Kronbichler
+ */
+
+// This example program is a slight modification of step-22 running in parallel
+// using Trilinos to demonstrate the usage of periodic boundary conditions in deal.II.
+// We thus omit to discuss the majority of the source code and only comment on the
+// parts that deal with periodicity constraints. For the rest have a look at step-22
+// and the full source code at the bottom.
+
+// In order to implement periodic boundary conditions only two functions
+// have to be modified:
+// - <code>StokesProblem<dim>::setup_dofs()</code>: To populate a ConstraintMatrix
+//   object with periodicity constraints
+// - <code>StokesProblem<dim>::run()</code>: To supply a distributed triangulation with
+//   periodicity information.
+
+
+// @cond SKIP
+#include <deal.II/base/conditional_ostream.h>
+
+#include <deal.II/distributed/grid_refinement.h>
+
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/lac/trilinos_solver.h>
+#include <deal.II/lac/trilinos_precondition.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_parallel_block_vector.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/grid_tools.h>
+
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_system.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+namespace Step45
+{
+  using namespace dealii;
+
+  template <int dim>
+  class StokesProblem
+  {
+  public:
+    StokesProblem (const unsigned int degree);
+    void run ();
+
+  private:
+    void create_mesh();
+    void setup_dofs ();
+    void assemble_system ();
+    void solve ();
+    void output_results (const unsigned int refinement_cycle) const;
+    void refine_mesh ();
+
+    const unsigned int   degree;
+
+    MPI_Comm                                    mpi_communicator;
+
+    HyperShellBoundary<dim>                     boundary;
+    parallel::distributed::Triangulation<dim>   triangulation;
+    FESystem<dim>                               fe;
+    DoFHandler<dim>                             dof_handler;
+
+    ConstraintMatrix                            constraints;
+    std::vector<IndexSet>                       owned_partitioning;
+    std::vector<IndexSet>                       relevant_partitioning;
+
+    TrilinosWrappers::BlockSparsityPattern      sparsity_pattern;
+    TrilinosWrappers::BlockSparseMatrix         system_matrix;
+
+    TrilinosWrappers::MPI::BlockVector          solution;
+    TrilinosWrappers::MPI::BlockVector          system_rhs;
+
+    ConditionalOStream                          pcout;
+
+    MappingQ<dim>                               mapping;
+  };
+
+
+
+  template <int dim>
+  class BoundaryValues : public Function<dim>
+  {
+  public:
+    BoundaryValues () : Function<dim>(dim+1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+  };
+
+
+  template <int dim>
+  double
+  BoundaryValues<dim>::value (const Point<dim>  &/*p*/,
+                              const unsigned int component) const
+  {
+    Assert (component < this->n_components,
+            ExcIndexRange (component, 0, this->n_components));
+
+    return 0;
+  }
+
+
+  template <int dim>
+  void
+  BoundaryValues<dim>::vector_value (const Point<dim> &p,
+                                     Vector<double>   &values) const
+  {
+    for (unsigned int c=0; c<this->n_components; ++c)
+      values(c) = BoundaryValues<dim>::value (p, c);
+  }
+
+
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>(dim+1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+
+  };
+
+
+  template <int dim>
+  double
+  RightHandSide<dim>::value (const Point<dim>  &p,
+                             const unsigned int component) const
+  {
+    const Point<dim> center(0.75, 0.1);
+    const double r = (p-center).norm();
+
+    if (component==0)
+      return std::exp(-100.*r*r);
+    return 0;
+  }
+
+
+  template <int dim>
+  void
+  RightHandSide<dim>::vector_value (const Point<dim> &p,
+                                    Vector<double>   &values) const
+  {
+    for (unsigned int c=0; c<this->n_components; ++c)
+      values(c) = RightHandSide<dim>::value (p, c);
+  }
+
+
+
+  template <class Matrix, class Preconditioner>
+  class InverseMatrix : public Subscriptor
+  {
+  public:
+    InverseMatrix (const Matrix         &m,
+                   const Preconditioner &preconditioner,
+                   const IndexSet       &locally_owned,
+                   const MPI_Comm       &mpi_communicator);
+
+    void vmult (TrilinosWrappers::MPI::Vector       &dst,
+                const TrilinosWrappers::MPI::Vector &src) const;
+
+  private:
+    const SmartPointer<const Matrix> matrix;
+    const SmartPointer<const Preconditioner> preconditioner;
+
+    const MPI_Comm *mpi_communicator;
+    mutable TrilinosWrappers::MPI::Vector tmp;
+  };
+
+
+
+  template <class Matrix, class Preconditioner>
+  InverseMatrix<Matrix,Preconditioner>::InverseMatrix
+  (const Matrix         &m,
+   const Preconditioner &preconditioner,
+   const IndexSet       &locally_owned,
+   const MPI_Comm       &mpi_communicator)
+    :
+    matrix (&m),
+    preconditioner (&preconditioner),
+    mpi_communicator (&mpi_communicator),
+    tmp(locally_owned, mpi_communicator)
+  {}
+
+
+
+  template <class Matrix, class Preconditioner>
+  void InverseMatrix<Matrix,Preconditioner>::vmult
+  (TrilinosWrappers::MPI::Vector       &dst,
+   const TrilinosWrappers::MPI::Vector &src) const
+  {
+    SolverControl solver_control (src.size(), 1e-6*src.l2_norm());
+    TrilinosWrappers::SolverCG    cg (solver_control,
+                                      TrilinosWrappers::SolverCG::AdditionalData());
+
+    tmp = 0.;
+    cg.solve (*matrix, tmp, src, *preconditioner);
+    dst = tmp;
+  }
+
+
+
+  template <class Preconditioner>
+  class SchurComplement : public TrilinosWrappers::SparseMatrix
+  {
+  public:
+    SchurComplement ( const TrilinosWrappers::BlockSparseMatrix &system_matrix,
+                      const InverseMatrix<TrilinosWrappers::SparseMatrix,
+                      Preconditioner> &A_inverse,
+                      const IndexSet &owned_pres,
+                      const MPI_Comm &mpi_communicator);
+
+    void vmult (TrilinosWrappers::MPI::Vector       &dst,
+                const TrilinosWrappers::MPI::Vector &src) const;
+
+  private:
+    const SmartPointer<const TrilinosWrappers::BlockSparseMatrix> system_matrix;
+    const SmartPointer<const InverseMatrix<TrilinosWrappers::SparseMatrix,
+          Preconditioner> > A_inverse;
+    mutable TrilinosWrappers::MPI::Vector tmp1, tmp2;
+  };
+
+
+
+  template <class Preconditioner>
+  SchurComplement<Preconditioner>::
+  SchurComplement (const TrilinosWrappers::BlockSparseMatrix &system_matrix,
+                   const InverseMatrix<TrilinosWrappers::SparseMatrix,
+                   Preconditioner> &A_inverse,
+                   const IndexSet &owned_vel,
+                   const MPI_Comm &mpi_communicator)
+    :
+    system_matrix (&system_matrix),
+    A_inverse (&A_inverse),
+    tmp1 (owned_vel, mpi_communicator),
+    tmp2 (tmp1)
+  {}
+
+
+
+  template <class Preconditioner>
+  void SchurComplement<Preconditioner>::vmult
+  (TrilinosWrappers::MPI::Vector       &dst,
+   const TrilinosWrappers::MPI::Vector &src) const
+  {
+    system_matrix->block(0,1).vmult (tmp1, src);
+    A_inverse->vmult (tmp2, tmp1);
+    system_matrix->block(1,0).vmult (dst, tmp2);
+  }
+
+
+
+  template <int dim>
+  StokesProblem<dim>::StokesProblem (const unsigned int degree)
+    :
+    degree (degree),
+    mpi_communicator (MPI_COMM_WORLD),
+    triangulation (mpi_communicator),
+    fe (FE_Q<dim>(degree+1), dim,
+        FE_Q<dim>(degree)  , 1),
+    dof_handler (triangulation),
+    pcout (std::cout,
+           Utilities::MPI::this_mpi_process(mpi_communicator) == 0),
+    mapping(degree+1)
+  {}
+// @endcond
+//
+// @sect3{Setting up periodicity constraints on distributed triangulations}
+  template <int dim>
+  void StokesProblem<dim>::create_mesh()
+  {
+    Point<dim> center;
+    const double inner_radius = .5;
+    const double outer_radius = 1.;
+
+    GridGenerator::quarter_hyper_shell (triangulation,
+                                        center,
+                                        inner_radius,
+                                        outer_radius,
+                                        0,
+                                        true);
+
+// Before we can prescribe periodicity constraints, we need to ensure that cells
+// on opposite sides of the domain but connected by periodic faces are part of
+// the ghost layer if one of them is stored on the local processor.
+// At this point we need to think about how we want to prescribe periodicity.
+// The vertices $\text{vertices}_2$ of a face on the left boundary should be
+// matched to the vertices $\text{vertices}_1$ of a face on the lower boundary
+// given by $\text{vertices}_2=R\cdot \text{vertices}_1+b$ where the rotation
+// matrix $R$ and the offset $b$ are given by
+// @f{align*}
+// R=\begin{pmatrix}
+// 0&1\\-1&0
+// \end{pmatrix},
+// \quad
+// b=\begin{pmatrix}0&0\end{pmatrix}.
+// @f}
+// The data structure we are saving the reuslitng information into is here based
+// on the Triangulation.
+    std::vector<GridTools::PeriodicFacePair<typename parallel::distributed::Triangulation<dim>::cell_iterator> >
+    periodicity_vector;
+
+    FullMatrix<double> rotation_matrix(dim);
+    rotation_matrix[0][1]=1.;
+    rotation_matrix[1][0]=-1.;
+
+    GridTools::collect_periodic_faces(triangulation, 2, 3, 1,
+                                      periodicity_vector, Tensor<1, dim>(),
+                                      rotation_matrix);
+
+// Now telling the triangulation about the desired periodicity is
+// particularly easy by just calling
+// parallel::distributed::Triangulation::add_periodicity.
+    triangulation.add_periodicity(periodicity_vector);
+
+    triangulation.set_boundary(0, boundary);
+    triangulation.set_boundary(1, boundary);
+
+    triangulation.refine_global (4-dim);
+  }
+
+
+// @sect3{Setting up periodicity constraints on distributed triangulations}
+  template <int dim>
+  void StokesProblem<dim>::setup_dofs ()
+  {
+    dof_handler.distribute_dofs (fe);
+
+    std::vector<unsigned int> block_component (dim+1,0);
+    block_component[dim] = 1;
+    DoFRenumbering::component_wise (dof_handler, block_component);
+
+    std::vector<types::global_dof_index> dofs_per_block (2);
+    DoFTools::count_dofs_per_block (dof_handler, dofs_per_block, block_component);
+    const unsigned int n_u = dofs_per_block[0],
+                       n_p = dofs_per_block[1];
+
+    {
+      owned_partitioning.clear();
+      IndexSet locally_owned_dofs = dof_handler.locally_owned_dofs();
+      owned_partitioning.push_back(locally_owned_dofs.get_view(0, n_u));
+      owned_partitioning.push_back(locally_owned_dofs.get_view(n_u, n_u+n_p));
+
+      relevant_partitioning.clear();
+      IndexSet locally_relevant_dofs;
+      DoFTools::extract_locally_relevant_dofs (dof_handler, locally_relevant_dofs);
+      relevant_partitioning.push_back(locally_relevant_dofs.get_view(0, n_u));
+      relevant_partitioning.push_back(locally_relevant_dofs.get_view(n_u, n_u+n_p));
+
+      constraints.clear ();
+      constraints.reinit(locally_relevant_dofs);
+
+      FEValuesExtractors::Vector velocities(0);
+
+      DoFTools::make_hanging_node_constraints (dof_handler,
+                                               constraints);
+      VectorTools::interpolate_boundary_values (mapping,
+                                                dof_handler,
+                                                0,
+                                                BoundaryValues<dim>(),
+                                                constraints,
+                                                fe.component_mask(velocities));
+      VectorTools::interpolate_boundary_values (mapping,
+                                                dof_handler,
+                                                1,
+                                                BoundaryValues<dim>(),
+                                                constraints,
+                                                fe.component_mask(velocities));
+
+// After we provided the mesh with the necessary information for the periodicity
+// constraints, we are now able to actual create them. For describing the
+// matching we are using the same approach as before, i.e., the $\text{vertices}_2$
+// of a face on the left boundary should be matched to the vertices
+// $\text{vertices}_1$ of a face on the lower boundary given by
+// $\text{vertices}_2=R\cdot \text{vertices}_1+b$ where the rotation matrix $R$
+// and the offset $b$ are given by
+// @f{align*}
+// R=\begin{pmatrix}
+// 0&1\\-1&0
+// \end{pmatrix},
+// \quad
+// b=\begin{pmatrix}0&0\end{pmatrix}.
+// @f}
+// These two objects not only describe how faces should be matched but also
+// in which sense the solution should be transformed from $\text{face}_2$ to
+// $\text{face}_1$.
+      FullMatrix<double> rotation_matrix(dim);
+      rotation_matrix[0][1]=1.;
+      rotation_matrix[1][0]=-1.;
+
+      Tensor<1,dim> offset;
+
+// For setting up the constraints, we first store the periodicity
+// information in an auxiliary object of type
+// <code>std::vector@<GridTools::PeriodicFacePair<typename
+// DoFHandler@<dim@>::cell_iterator@> </code>. The periodic boundaries have the
+// boundary indicators 2 (x=0) and 3 (y=0). All the other parameters we
+// have set up before. In this case the direction does not matter. Due to
+// $\text{vertices}_2=R\cdot \text{vertices}_1+b$ this is exactly what we want.
+      std::vector<GridTools::PeriodicFacePair<typename DoFHandler<dim>::cell_iterator> >
+      periodicity_vector;
+
+      const unsigned int direction = 1;
+
+      GridTools::collect_periodic_faces(dof_handler, 2, 3, direction,
+                                        periodicity_vector, offset,
+                                        rotation_matrix);
+
+// Next we need to provide information on which vector valued components of
+// the solution should be rotated. Since we choose here to just constraint the
+// velocity and this starts at the first component of the solution vector we
+// simply insert a 0:
+      std::vector<unsigned int> first_vector_components;
+      first_vector_components.push_back(0);
+
+// After setting up all the information in periodicity_vector all we have
+// to do is to tell make_periodicity_constraints to create the desired
+// constraints.
+      DoFTools::make_periodicity_constraints<DoFHandler<dim> >
+      (periodicity_vector, constraints, fe.component_mask(velocities),
+       first_vector_components);
+
+      VectorTools::interpolate_boundary_values (mapping,
+                                                dof_handler,
+                                                0,
+                                                BoundaryValues<dim>(),
+                                                constraints,
+                                                fe.component_mask(velocities));
+      VectorTools::interpolate_boundary_values (mapping,
+                                                dof_handler,
+                                                1,
+                                                BoundaryValues<dim>(),
+                                                constraints,
+                                                fe.component_mask(velocities));
+
+    }
+
+    constraints.close ();
+
+    {
+      TrilinosWrappers::BlockSparsityPattern bsp
+      (owned_partitioning, owned_partitioning,
+       relevant_partitioning, mpi_communicator);
+
+      DoFTools::make_sparsity_pattern
+      (dof_handler, bsp, constraints, false,
+       Utilities::MPI::this_mpi_process(mpi_communicator));
+
+      bsp.compress();
+
+      system_matrix.reinit (bsp);
+    }
+
+    system_rhs.reinit (owned_partitioning,
+                       mpi_communicator);
+    solution.reinit (owned_partitioning, relevant_partitioning,
+                     mpi_communicator);
+  }
+
+
+// @cond SKIP
+  template <int dim>
+  void StokesProblem<dim>::assemble_system ()
+  {
+    system_matrix=0.;
+    system_rhs=0.;
+
+    QGauss<dim>   quadrature_formula(degree+2);
+
+    FEValues<dim> fe_values (mapping, fe, quadrature_formula,
+                             update_values    |
+                             update_quadrature_points  |
+                             update_JxW_values |
+                             update_gradients);
+
+    const unsigned int   dofs_per_cell   = fe.dofs_per_cell;
+
+    const unsigned int   n_q_points      = quadrature_formula.size();
+
+    FullMatrix<double>   local_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       local_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    const RightHandSide<dim>          right_hand_side;
+    std::vector<Vector<double> >      rhs_values (n_q_points,
+                                                  Vector<double>(dim+1));
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+
+    std::vector<SymmetricTensor<2,dim> > symgrad_phi_u (dofs_per_cell);
+    std::vector<double>                  div_phi_u   (dofs_per_cell);
+    std::vector<double>                  phi_p       (dofs_per_cell);
+
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          fe_values.reinit (cell);
+          local_matrix = 0;
+          local_rhs = 0;
+
+          right_hand_side.vector_value_list(fe_values.get_quadrature_points(),
+                                            rhs_values);
+
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              for (unsigned int k=0; k<dofs_per_cell; ++k)
+                {
+                  symgrad_phi_u[k] = fe_values[velocities].symmetric_gradient (k, q);
+                  div_phi_u[k]     = fe_values[velocities].divergence (k, q);
+                  phi_p[k]         = fe_values[pressure].value (k, q);
+                }
+
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                {
+                  for (unsigned int j=0; j<=i; ++j)
+                    {
+                      local_matrix(i,j) += (symgrad_phi_u[i] * symgrad_phi_u[j]
+                                            - div_phi_u[i] * phi_p[j]
+                                            - phi_p[i] * div_phi_u[j]
+                                            + phi_p[i] * phi_p[j])
+                                           * fe_values.JxW(q);
+                    }
+
+                  const unsigned int component_i =
+                    fe.system_to_component_index(i).first;
+                  local_rhs(i) += fe_values.shape_value(i,q) *
+                                  rhs_values[q](component_i) *
+                                  fe_values.JxW(q);
+                }
+            }
+
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=i+1; j<dofs_per_cell; ++j)
+              local_matrix(i,j) = local_matrix(j,i);
+
+          cell->get_dof_indices (local_dof_indices);
+          constraints.distribute_local_to_global (local_matrix, local_rhs,
+                                                  local_dof_indices,
+                                                  system_matrix, system_rhs);
+        }
+
+    system_matrix.compress (VectorOperation::add);
+    system_rhs.compress (VectorOperation::add);
+
+    pcout << "   Computing preconditioner..." << std::endl << std::flush;
+  }
+
+
+
+  template <int dim>
+  void StokesProblem<dim>::solve ()
+  {
+    TrilinosWrappers::PreconditionJacobi A_preconditioner;
+    A_preconditioner.initialize(system_matrix.block(0,0));
+
+    const InverseMatrix<TrilinosWrappers::SparseMatrix,
+          TrilinosWrappers::PreconditionJacobi>
+          A_inverse (system_matrix.block(0,0),
+                     A_preconditioner,
+                     owned_partitioning[0],
+                     mpi_communicator);
+
+    TrilinosWrappers::MPI::BlockVector tmp (owned_partitioning,
+                                            mpi_communicator);
+
+    {
+      TrilinosWrappers::MPI::Vector schur_rhs (owned_partitioning[1],
+                                               mpi_communicator);
+      A_inverse.vmult (tmp.block(0), system_rhs.block(0));
+      system_matrix.block(1,0).vmult (schur_rhs, tmp.block(0));
+      schur_rhs -= system_rhs.block(1);
+
+      SchurComplement<TrilinosWrappers::PreconditionJacobi>
+      schur_complement (system_matrix, A_inverse,
+                        owned_partitioning[0],
+                        mpi_communicator);
+
+      SolverControl solver_control (solution.block(1).size(),
+                                    1e-6*schur_rhs.l2_norm());
+      SolverCG<TrilinosWrappers::MPI::Vector> cg(solver_control);
+
+      TrilinosWrappers::PreconditionAMG preconditioner;
+      preconditioner.initialize (system_matrix.block(1,1));
+
+      InverseMatrix<TrilinosWrappers::SparseMatrix,
+                    TrilinosWrappers::PreconditionAMG>
+                    m_inverse (system_matrix.block(1,1), preconditioner,
+                               owned_partitioning[1], mpi_communicator);
+
+      cg.solve (schur_complement,
+                tmp.block(1),
+                schur_rhs,
+                preconditioner);
+
+      constraints.distribute (tmp);
+      solution.block(1)=tmp.block(1);
+    }
+
+    {
+      system_matrix.block(0,1).vmult (tmp.block(0), tmp.block(1));
+      tmp.block(0) *= -1;
+      tmp.block(0) += system_rhs.block(0);
+
+      A_inverse.vmult (tmp.block(0), tmp.block(0));
+
+      constraints.distribute (tmp);
+      solution.block(0)=tmp.block(0);
+    }
+  }
+
+
+
+  template <int dim>
+  void
+  StokesProblem<dim>::output_results (const unsigned int refinement_cycle)  const
+  {
+    std::vector<std::string> solution_names (dim, "velocity");
+    solution_names.push_back ("pressure");
+
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    data_component_interpretation
+    (dim, DataComponentInterpretation::component_is_part_of_vector);
+    data_component_interpretation
+    .push_back (DataComponentInterpretation::component_is_scalar);
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, solution_names,
+                              DataOut<dim>::type_dof_data,
+                              data_component_interpretation);
+    Vector<float> subdomain (triangulation.n_active_cells());
+    for (unsigned int i=0; i<subdomain.size(); ++i)
+      subdomain(i) = triangulation.locally_owned_subdomain();
+    data_out.add_data_vector (subdomain, "subdomain");
+    data_out.build_patches (mapping, degree+1);
+
+    std::ostringstream filename;
+    filename << "solution-"
+             << Utilities::int_to_string (refinement_cycle, 2)
+             << "."
+             << Utilities::int_to_string (triangulation.locally_owned_subdomain(),2)
+             << ".vtu";
+
+    std::ofstream output (filename.str().c_str());
+    data_out.write_vtu (output);
+
+    if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
+      {
+        std::vector<std::string> filenames;
+        for (unsigned int i=0; i<Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD); ++i)
+          filenames.push_back (std::string("solution-") +
+                               Utilities::int_to_string (refinement_cycle, 2) +
+                               "." +
+                               Utilities::int_to_string(i, 2) +
+                               ".vtu");
+        const std::string
+        pvtu_master_filename = ("solution-" +
+                                Utilities::int_to_string (refinement_cycle, 2) +
+                                ".pvtu");
+        std::ofstream pvtu_master (pvtu_master_filename.c_str());
+        data_out.write_pvtu_record (pvtu_master, filenames);
+      }
+  }
+
+
+
+  template <int dim>
+  void
+  StokesProblem<dim>::refine_mesh ()
+  {
+
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    FEValuesExtractors::Scalar pressure(dim);
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(degree+1),
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        estimated_error_per_cell,
+                                        fe.component_mask(pressure));
+
+    parallel::distributed::GridRefinement::
+    refine_and_coarsen_fixed_number (triangulation,
+                                     estimated_error_per_cell,
+                                     0.3, 0.0);
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+  template <int dim>
+  void StokesProblem<dim>::run ()
+  {
+    create_mesh();
+
+    for (unsigned int refinement_cycle = 0; refinement_cycle<9;
+         ++refinement_cycle)
+      {
+        pcout << "Refinement cycle " << refinement_cycle << std::endl;
+
+        if (refinement_cycle > 0)
+          refine_mesh ();
+
+        setup_dofs ();
+
+        pcout << "   Assembling..." << std::endl << std::flush;
+        assemble_system ();
+
+        pcout << "   Solving..." << std::flush;
+        solve ();
+
+        output_results (refinement_cycle);
+
+        pcout << std::endl;
+      }
+  }
+}
+
+
+int main (int argc, char *argv[])
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step45;
+
+      Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv, 1);
+      StokesProblem<2> flow_problem(1);
+      flow_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
+// @endcond
diff --git a/examples/step-46/CMakeLists.txt b/examples/step-46/CMakeLists.txt
new file mode 100644
index 0000000..379ae58
--- /dev/null
+++ b/examples/step-46/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-46 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-46")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_UMFPACK)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_UMFPACK = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-46/doc/builds-on b/examples/step-46/doc/builds-on
new file mode 100644
index 0000000..3c3e873
--- /dev/null
+++ b/examples/step-46/doc/builds-on
@@ -0,0 +1 @@
+step-8 step-22 step-27
diff --git a/examples/step-46/doc/intro.dox b/examples/step-46/doc/intro.dox
new file mode 100644
index 0000000..45d3618
--- /dev/null
+++ b/examples/step-46/doc/intro.dox
@@ -0,0 +1,685 @@
+<br>
+
+<i>This program was contributed by Wolfgang Bangerth.
+<br>
+This material is based upon work partly supported by the National Science
+Foundation under Award No. EAR-0949446 and The University of California
+– Davis. Any opinions, findings, and conclusions or recommendations
+expressed in this publication are those of the author and do not necessarily
+reflect the views of the National Science Foundation or of The University of
+California – Davis.  </i>
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This program deals with the problem of coupling different physics in different
+parts of the domain. Specifically, let us consider the following
+situation that couples a Stokes fluid with an elastic solid (these two
+problems were previously discussed separately in step-22 and step-8,
+where you may want to read up on the individual equations):
+
+- In a part $\Omega_f$ of $\Omega$, we have a fluid flowing that satisfies the
+  time independent Stokes equations (in the form that involves the strain
+  tensor):
+  @f{align*}
+    -2\eta\nabla \cdot \varepsilon(\mathbf v) + \nabla p &= 0,
+          \qquad \qquad && \text{in}\ \Omega_f\\
+    -\nabla \cdot \mathbf v &= 0  && \text{in}\ \Omega_f.
+  @f}
+  Here, $\mathbf v, p$ are the fluid velocity and pressure, respectively.
+  We prescribe the velocity on part of the external boundary,
+  @f{align*}
+    \mathbf v = \mathbf v_0 \qquad\qquad
+     \text{on}\ \Gamma_{f,1} \subset \partial\Omega \cap \partial\Omega_f
+  @f}
+  while we assume free-flow conditions on the remainder of the external
+  boundary,
+  @f{align*}
+    (2\eta \varepsilon(\mathbf v) + p \mathbf 1) \cdot \mathbf n = 0
+     \qquad\qquad
+     \text{on}\ \Gamma_{f,2} = \partial\Omega \cap \partial\Omega_f \backslash
+     \Gamma_{f,1}.
+  @f}
+- The remainder of the domain, $\Omega_s = \Omega \backslash \Omega_f$ is
+  occupied by a solid whose deformation field $\mathbf u$ satisfies the
+  elasticity equation,
+  @f{align*}
+    -\nabla \cdot C \varepsilon(\mathbf u) = 0 \qquad\qquad
+    & \text{in}\ \Omega_s,
+  @f}
+  where $C$ is the rank-4 elasticity tensor (for which we will use a
+  particularly simple form by assuming that the solid is isotropic).
+  It deforms in reaction to the forces exerted by the
+  fluid flowing along the boundary of the solid. We assume this deformation to
+  be so small that it has no feedback effect on the fluid, i.e. the coupling
+  is only in one direction. For simplicity, we will assume that the
+  solid's external boundary is clamped, i.e.
+  @f{align*}
+    \mathbf u = \mathbf 0 \qquad\qquad
+     \text{on}\ \Gamma_{s,1} = \partial\Omega \cap \partial\Omega_s
+  @f}
+- As a consequence of the small displacement assumption, we will pose the
+  following boundary conditions on the interface between the fluid and solid:
+  first, we have no slip boundary conditions for the fluid,
+  @f{align*}
+    \mathbf v = \mathbf 0 \qquad\qquad
+     \text{on}\ \Gamma_{i} = \partial\Omega_s \cap \partial\Omega_f.
+  @f}
+  Secondly, the forces (traction) on the solid equal the normal stress from the fluid,
+  @f{align*}
+    (C \varepsilon(\mathbf u)) \mathbf n =
+    (2 \eta \varepsilon(\mathbf v) + p \mathbf 1) \mathbf n \qquad\qquad
+     \text{on}\ \Gamma_{i} = \partial\Omega_s \cap \partial\Omega_f.
+  @f}
+
+We get a weak formulation of this problem by following our usual rule of
+multiplying from the left by a test function and integrating over the
+domain. It then looks like this: Find $y = \{\mathbf v, p,
+\mathbf u\} \in Y \subset H^1(\Omega_f)^d \times L_2(\Omega_f) \times
+H^1(\Omega_s)^d$ such that
+ at f{multline*}
+	2 \eta (\varepsilon(\mathbf a), \varepsilon(\mathbf v))_{\Omega_f}
+	- (\nabla \cdot \mathbf a, p)_{\Omega_f}
+	- (q, \nabla \cdot \mathbf v)_{\Omega_f}
+	\\
+	+ (\varepsilon(\mathbf b), C \varepsilon(\mathbf u))_{\Omega_s}
+	\\
+	- (\mathbf b,
+           (2 \eta \varepsilon(\mathbf v) + p \mathbf 1) \mathbf n)_{\Gamma_i}
+	=
+	0,
+ at f}
+for all test functions $\mathbf a, q, \mathbf b$.
+Note that $Y$ is only a subspace of the spaces listed above to accommodate for
+the various Dirichlet boundary conditions.
+
+This sort of coupling is of course possible by simply having two Triangulation
+and two DoFHandler objects, one each for each of the two subdomains. On the
+other hand, deal.II is much simpler to use if there is a single DoFHandler
+object that knows about the discretization of the entire problem.
+
+This program is about how this can be achieved. Note that the goal is not to
+present a particularly useful physical model (a realistic fluid-structure
+interaction model would have to take into account the finite deformation of
+the solid and the effect this has on the fluid): this is, after all, just a
+tutorial program intended to demonstrate techniques, not to solve actual
+problems. Furthermore, we will make the assumption that the interface between
+the subdomains is aligned with coarse mesh cell faces.
+
+
+<h3>The general idea</h3>
+
+Before going into more details let us state the obvious: this is a
+problem with multiple solution variables; for this, you will probably
+want to read the @ref vector_valued documentation module first, which
+presents the basic philosophical framework in which we address
+problems with more than one solution variable. But back to the problem
+at hand:
+
+The fundamental idea to implement these sort of problems in deal.II goes as
+follows: in the problem formulation, the velocity and pressure variables
+$\mathbf v, p$ only live in the fluid subdomain $\Omega_f$. But let's assume
+that we extend them by zero to the entire domain $\Omega$ (in the general case
+this means that they will be discontinuous along $\Gamma_i$). So what is the
+appropriate function space for these variables? We know that on $\Omega_f$ we
+should require $\mathbf v \in H^1(\Omega_f)^d, p \in L_2(\Omega_f)$, so for
+the extensions $\tilde{\mathbf v}, \tilde p$ to the whole domain the following
+appears a useful set of function spaces:
+ at f{align*}
+  \tilde {\mathbf v} &\in V
+   = \{\tilde {\mathbf v}|_{\Omega_f} \in H^1(\Omega_f)^d, \quad
+       \tilde {\mathbf v}|_{\Omega_s} = 0 \}
+  \\
+  \tilde p &\in P
+  = \{\tilde p|_{\Omega_f} \in L_2(\Omega_f), \quad
+       \tilde p|_{\Omega_s} = 0 \}.
+ at f}
+(Since this is not important for the current discussion, we have omitted the
+question of boundary values from the choice of function spaces; this question
+also affects whether we can choose $L_2$ for the pressure or whether we have
+to choose the space $L_{2,0}(\Omega_f)=\{q\in L_2(\Omega_f): \int_{\Omega_f} q
+= 0\}$ for the pressure. None of these questions are relevant to the following
+discussion, however.)
+
+Note that these are indeed a linear function spaces with obvious norm. Since no
+confusion is possible in practice, we will henceforth omit the tilde again to
+denote the extension of a function to the whole domain and simply refer by
+$\mathbf v, p$ to both the original and the extended function.
+
+For discretization, we need finite dimensional subspaces $V_h,P_h$ of
+$V, P$. For Stokes, we know from step-22 that an appropriate choice is
+$Q_{p+1}^d\times Q_P$ but this only holds for that part of the domain
+occupied by the fluid. For the extended field, let's use the following
+subspaces defined on the triangulation $\mathbb T$:
+ at f{align*}
+  V_h
+   &= \{{\mathbf v}_h \quad | \quad
+       \forall K \in {\mathbb T}:
+       {\mathbf v}_h|_K \in Q_{p+1}^d\  \text{if}\ K\subset {\Omega_f}, \quad
+       {\mathbf v}_h|_{\Omega_f}\ \text{is continuous}, \quad
+       {\mathbf v}_h|_K = 0\ \text{if}\ K\subset {\Omega_s}\}
+   && \subset V
+  \\
+  P_h
+  &= \{ p_h \quad | \quad
+       \forall K \in {\mathbb T}:
+       p_h|_K \in Q_p\  \text{if}\ K\subset {\Omega_f}, \quad
+       p_h|_{\Omega_f}\ \text{is continuous}, \quad
+       p_h|_K = 0\ \text{if}\ K\subset {\Omega_s}\ \}
+   && \subset P.
+ at f}
+In other words, on $\Omega_f$ we choose the usual discrete spaces but
+we keep the (discontinuous) extension by zero. The point to make is
+that we now need a description of a finite element space for functions
+that are zero on a cell — and this is where the FE_Nothing class
+comes in: it describes a finite dimensional function space of
+functions that are constant zero. A particular property of this
+peculiar linear vector space is that it has no degrees of freedom: it
+isn't just finite dimensional, it is in fact zero dimensional, and
+consequently for objects of this type, FiniteElement::dofs_per_cell
+will return zero. For discussion below, let us give this space a
+proper symbol:
+ at f[
+  Z = \{ \varphi: \varphi(x)=0 \}.
+ at f]
+The symbol $Z$ reminds of the fact that functions in this space are
+zero. Obviously, we choose $Z_h=Z$.
+
+This entire discussion above can be repeated for the variables we use to
+describe the elasticity equation. Here, for the extended variables, we
+have
+ at f{align*}
+  \tilde {\mathbf u} &\in U
+   = \{\tilde {\mathbf u}|_{\Omega_s} \in H^1(\Omega_f)^d, \quad
+       \tilde {\mathbf u}|_{\Omega_f} \in Z(\Omega_s)^d \},
+ at f}
+and we will typically use a finite element space of the kind
+ at f{align*}
+  U_h
+   &= \{{\mathbf u}_h \quad | \quad
+       \forall K \in {\mathbb T}:
+       {\mathbf u}_h|_K \in Q_r^d\  \text{if}\ K\subset {\Omega_s}, \quad
+       {\mathbf u}_h|_{\Omega_f}\ \text{is continuous}, \quad
+       {\mathbf u}_h|_K \in Z^d\ \text{if}\ K\subset {\Omega_f}\}
+   && \subset U
+ at f}
+of polynomial degree $r$.
+
+So to sum up, we are going to look for a discrete vector-valued
+solution $y_h = \{\mathbf v_h, p_h, \mathbf u_h\}$ in the following
+space:
+ at f{align*}
+  Y_h = \{
+      & y_h = \{\mathbf v_h, p_h, \mathbf u_h\} : \\
+      & y_h|_{\Omega_f} \in Q_{p+1}^d \times Q_p \times Z^d, \\
+      & y_h|_{\Omega_s} \in Z^d \times Z \times Q_r^d \}.
+ at f}
+
+
+
+<h3>Implementation</h3>
+
+So how do we implement this sort of thing? First, we realize that the discrete
+space $Y_h$ essentially calls for two different finite elements: First, on the
+fluid subdomain, we need the element $Q_{p+1}^d \times Q_p \times Z^d$ which
+in deal.II is readily implemented by
+ at code
+  FESystem<dim> (FE_Q<dim>(p+1), dim,
+		 FE_Q<dim>(p), 1,
+		 FE_Nothing<dim>(), dim),
+ at endcode
+where <code>FE_Nothing</code> implements the space of functions that are
+always zero. Second, on the solid subdomain, we need the element
+$\in Z^d \times Z \times Q_r^d$, which we get using
+ at code
+  FESystem<dim> (FE_Nothing<dim>(), dim,
+		 FE_Nothing<dim>(), 1,
+		 FE_Q<dim>(r), dim),
+ at endcode
+
+The next step is that we associate each of these two elements with the cells
+that occupy each of the two subdomains. For this we realize that in a sense
+the two elements are just variations of each other in that they have the same
+number of vector components but have different polynomial degrees — this
+smells very much like what one would do in $hp$ finite element methods, and it
+is exactly what we are going to do here: we are going to (ab)use the classes
+and facilities of the hp namespace to assign different elements to different
+cells. In other words, we will use collect the two finite elements in an
+hp::FECollection, will integrate with an appropriate hp::QCollection using an
+hp::FEValues object, and our DoF handler will be of type hp::DoFHandler. You
+may wish to take a look at step-27 for an overview of all of these concepts.
+
+Before going on describing the testcase, let us clarify a bit <i>why</i> this
+approach of extending the functions by zero to the entire domain and then
+mapping the problem on to the hp framework makes sense:
+
+- It makes things uniform: On all cells, the number of vector components is
+  the same (here, <code>2*dim+1</code>). This makes all sorts of
+  things possible since a uniform description allows for code
+  re-use. For example, counting degrees of freedom per vector
+  component (DoFTools::count_dofs_per_component), sorting degrees of
+  freedom by component (DoFRenumbering::component_wise), subsequent
+  partitioning of matrices and vectors into blocks and many other
+  functions work as they always did without the need to add special
+  logic to them that describes cases where some of the variables only
+  live on parts of the domain. Consequently, you have all sorts of
+  tools already available to you in programs like the current one that
+  weren't originally written for the multiphysics case but work just
+  fine in the current context.
+
+- It allows for easy graphical output: All graphical output formats we support
+  require that each field in the output is defined on all nodes of the
+  mesh. But given that now all solution components live everywhere,
+  our existing DataOut routines work as they always did, and produce
+  graphical output suitable for visualization -- the fields will
+  simply be extended by zero, a value that can easily be filtered out
+  by visualization programs if not desired.
+
+- There is essentially no cost: The trick with the FE_Nothing does not add any
+  degrees of freedom to the overall problem, nor do we ever have to handle a
+  shape function that belongs to these components — the FE_Nothing has
+  no degrees of freedom, not does it have shape functions, all it does is take
+  up vector components.
+
+
+<h3> Specifics of the implementation </h3>
+
+More specifically, in the program we have to address the following
+points:
+- Implementing the bilinear form, and in particular dealing with the
+  interface term, both in the matrix and the sparsity pattern.
+- Implementing Dirichlet boundary conditions on the external and
+  internal parts of the boundaries
+  $\partial\Omega_f,\partial\Omega_s$.
+
+
+<h4>Dealing with the interface terms</h4>
+
+Let us first discuss implementing the bilinear form, which at the
+discrete level we recall to be
+ at f{multline*}
+	2 \eta (\varepsilon(\mathbf a_h), \varepsilon(\mathbf v_h))_{\Omega_f}
+	- (\nabla \cdot \mathbf a_h, p_h)_{\Omega_f}
+	- (q_h, \nabla \cdot \mathbf v_h)_{\Omega_f}
+	\\
+	+ (\varepsilon(\mathbf b_h), C \varepsilon(\mathbf u_h))_{\Omega_s}
+	\\
+	- (\mathbf b_h,
+           (2 \eta \varepsilon(\mathbf v_h) + p \mathbf 1) \mathbf n)_{\Gamma_i}
+	=
+	0,
+ at f}
+Given that we have extended the fields by zero, we could in principle
+write the integrals over subdomains to the entire domain $\Omega$,
+though it is little additional effort to first ask whether a cell is
+part of the elastic or fluid region before deciding which terms to
+integrate. Actually integrating these terms is not very difficult; for
+the Stokes equations, the relevant steps have been shown in step-22,
+whereas for the elasticity equation we take essentially the form shown
+in the @ref vector_valued module (rather than the one from step-8).
+
+The term that is of more interest is the interface term,
+ at f[
+	(\mathbf b_h,
+           (2 \eta \varepsilon(\mathbf v_h) + p \mathbf 1) \mathbf n)_{\Gamma_i}.
+ at f]
+Based on our assumption that the interface $\Gamma_i$ coincides with
+cell boundaries, this can in fact be written as a set of face
+integrals. If we denote the velocity, pressure and displacement
+components of shape function $\psi_i\in Y_h$ using the extractor
+notation $\psi_i[\mathbf v],\psi_i[p], \psi_i[\mathbf u]$, then the
+term above yields the following contribution to the global matrix
+entry $i,j$:
+ at f[
+	\sum_K (\psi_i[\mathbf u],
+           (2 \eta \varepsilon(\psi_j[\mathbf v]) + \psi_j[p] \mathbf 1)
+	   \mathbf n)_{\partial K \cap \Gamma_i}.
+ at f]
+Although it isn't immediately obvious, this term presents a slight
+complication: while $\psi_i[\mathbf u]$ and $\mathbf n$ are evaluated
+on the solid side of the interface (they are test functions for the
+displacement and the normal vector to $\Omega_s$, respectively, we
+need to evaluate $\psi_j[\mathbf v],\psi_j[p]$ on the fluid
+side of the interface since they correspond to the stress/force
+exerted by the fluid. In other words, in our implementation, we will
+need FEFaceValue objects for both sides of the interface. To make
+things slightly worse, we may also have to deal with the fact that one
+side or the other may be refined, leaving us with the need to
+integrate over parts of a face. Take a look at the implementation
+below on how to deal with this.
+
+As an additional complication, the matrix entries that result from this term
+need to be added to the sparsity pattern of the matrix somehow. This is the
+realm of various functions in the DoFTools namespace like
+DoFTools::make_sparsity_pattern and
+DoFTools::make_flux_sparsity_pattern. Essentially, what these functions do is
+simulate what happens during assembly of the system matrix: whenever assembly
+would write a nonzero entry into the global matrix, the functions in DoFTools
+would add an entry to the sparsity pattern. We could therefore do the
+following: let DoFTools::make_sparsity_pattern add all those entries to the
+sparsity pattern that arise from the regular cell-by-cell integration, and
+then do the same by hand that arise from the interface terms. If you look at
+the implementation of the interface integrals in the program below, it should
+be obvious how to do that and would require no more than maybe 100 lines of
+code at most.
+
+But we're lazy people: the interface term couples degrees of freedom from two
+adjacent cells along a face, which is exactly the kind of thing one would do
+in discontinuous Galerkin schemes for which the function
+DoFTools::make_flux_sparsity_pattern was written. This is a superset of matrix
+entries compared to the usual DoFTools::make_sparsity_pattern: it will also
+add all entries that result from computing terms coupling the degrees of
+freedom from both sides of all faces. Unfortunately, for the simplest version
+of this function, this is a pretty big superset. Consider for example the
+following mesh with two cells and a $Q_1$ finite element:
+ at code
+  2---3---5
+  |   |   |
+  0---1---4
+ at endcode
+Here, the sparsity pattern produced by DoFTools::make_sparsity_pattern will
+only have entries for degrees of freedom that couple on a cell. However, it
+will not have sparsity pattern entries $(0,4),(0,5),(2,4),(2,5)$. The sparsity
+pattern generated by DoFTools::make_flux_sparsity_pattern will have these
+entries, however: it assumes that you want to build a sparsity pattern for a
+bilinear form that couples <i>all</i> degrees of freedom from adjacent
+cells. This is not what we want: our interface term acts only on a small
+subset of cells, and we certainly don't need all the extra couplings between
+two adjacent fluid cells, or two adjacent solid cells. Furthermore, the fact that we
+use higher order elements means that we would really generate many many more
+entries than we actually need: on the coarsest mesh, in 2d, 44,207 nonzero
+entries instead of 16,635 for DoFTools::make_sparsity_pattern, leading to
+plenty of zeros in the matrix we later build (of course, the 16,635 are not
+enough since they don't include the interface entries). This ratio would be
+even worse in 3d.
+
+So being extremely lazy comes with a cost: too many entries in the matrix. But
+we can get away with being moderately lazy: there is a variant of
+DoFTools::make_flux_sparsity_pattern that allows us
+to specify which vector components of the finite element couple with which
+other components, both in cell terms as well as in face terms. For cells that
+are in the solid subdomain, we couple all displacements with each other; for
+fluid cells, all velocities with all velocities and the pressure, but not the
+pressure with itself. Since no cell has both sets of
+variables, there is no need to distinguish between the two kinds of cells, so
+we can write the mask like this:
+ at code
+    Table<2,DoFTools::Coupling> cell_coupling (fe_collection.n_components(),
+					       fe_collection.n_components());
+
+    for (unsigned int c=0; c<fe_collection.n_components(); ++c)
+      for (unsigned int d=0; d<fe_collection.n_components(); ++d)
+	if (((c<dim+1) && (d<dim+1)
+	     && !((c==dim) && (d==dim)))
+	    ||
+	    ((c>=dim+1) && (d>=dim+1)))
+	  cell_coupling[c][d] = DoFTools::Coupling::always;
+ at endcode
+Here, we have used the fact that the first <code>dim</code> components of the
+finite element are the velocities, then the pressure, and then the
+<code>dim</code> displacements. (We could as well have stated that the
+velocities/pressure also couple with the displacements since no cell ever has
+both sets of variables.) On the other hand, the interface terms require a mask
+like this:
+ at code
+    Table<2,DoFTools::Coupling> face_coupling (fe_collection.n_components(),
+					       fe_collection.n_components());
+
+    for (unsigned int c=0; c<fe_collection.n_components(); ++c)
+      for (unsigned int d=0; d<fe_collection.n_components(); ++d)
+	if ((c>=dim+1) && (d<dim+1))
+	  face_coupling[c][d] = DoFTools::Coupling::always;
+ at endcode
+In other words, all displacement test functions (components
+<code>c@>=dim+1</code>) couple with all velocity and pressure shape functions
+on the other side of an interface. This is not entirely true, though close: in
+fact, the exact form of the interface term only those pressure displacement
+shape functions that are indeed nonzero on the common interface, which is not
+true for all shape functions; on the other hand, it really couples all
+velocities (since the integral involves gradients of the velocity shape
+functions, which are all nonzero on all faces of the cell). However, the mask we
+build above, is not capable of these subtleties. Nevertheless, through these
+masks we manage to get the number of sparsity pattern entries down to 21,028
+— good enough for now.
+
+
+
+<h4>Velocity boundary conditions on the interface</h4>
+
+The second difficulty is that while we know how to enforce a zero
+velocity or stress on the external boundary (using
+VectorTools::interpolate_boundary_values, called with an appropriate
+component mask and setting different boundary indicators for solid and
+fluid external boundaries), we now also needed the velocity to be zero
+on the interior interface, i.e. $\mathbf v|_{\Gamma_i}=0$. At the time
+of writing this, there is no function in deal.II that handles this
+part, but it isn't particularly difficult to implement by hand:
+essentially, we just have to loop over all cells, and if it is a fluid
+cell and its neighbor is a solid cell, then add constraints that
+ensure that the velocity degrees of freedom on this face are
+zero. Some care is necessary to deal with the case that the adjacent
+solid cell is refined, yielding the following code:
+ at code
+    std::vector<unsigned int> local_face_dof_indices (stokes_fe.dofs_per_face);
+    for (typename hp::DoFHandler<dim>::active_cell_iterator
+	   cell = dof_handler.begin_active();
+	 cell != dof_handler.end(); ++cell)
+      if (cell_is_in_fluid_domain (cell))
+	for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+	  if (!cell->at_boundary(f))
+	    {
+	      bool face_is_on_interface = false;
+
+	      if ((cell->neighbor(f)->has_children() == false)
+		  &&
+		  (cell_is_in_solid_domain (cell->neighbor(f))))
+		face_is_on_interface = true;
+	      else if (cell->neighbor(f)->has_children() == true)
+		{
+						   // The neighbor does
+						   // have
+						   // children. See if
+						   // any of the cells
+						   // on the other
+						   // side are elastic
+		  for (unsigned int sf=0; sf<cell->face(f)->n_children(); ++sf)
+		    if (cell_is_in_solid_domain (cell->neighbor_child_on_subface(f, sf)))
+		      {
+			face_is_on_interface = true;
+			break;
+		      }
+		}
+
+	      if (face_is_on_interface)
+		{
+		  cell->face(f)->get_dof_indices (local_face_dof_indices, 0);
+		  for (unsigned int i=0; i<local_face_dof_indices.size(); ++i)
+		    if (stokes_fe.face_system_to_component_index(i).first < dim)
+ 		      constraints.add_line (local_face_dof_indices[i]);
+		}
+	    }
+ at endcode
+
+The call <code>constraints.add_line(t)</code> tells the
+ConstraintMatrix to start a new constraint for degree of freedom
+<code>t</code> of the form $x_t=\sum_{l=0}^{N-1} c_{tl} x_l +
+b_t$. Typically, one would then proceed to set individual coefficients
+$c_{tl}$ to nonzero values (using ConstraintMatrix::add_entry) or set
+$b_t$ to something nonzero (using
+ConstraintMatrix::set_inhomogeneity); doing nothing as above, funny as
+it looks, simply leaves the constraint to be $x_t=0$, which is exactly
+what we need in the current context. The call to
+FiniteElement::face_system_to_component_index makes sure that we only set
+boundary values to zero for velocity but not pressure components.
+
+Note that there are cases where this may yield incorrect results:
+notably, once we find a solid neighbor child to a current fluid cell,
+we assume that all neighbor children on the common face are in the
+solid subdomain. But that need not be so; consider, for example, the
+following mesh:
+ at code
++---------+----+----+
+|         | f  |    |
+|    f    +----+----+
+|         | s  |    |
++---------+----+----+
+ at endcode
+
+In this case, we would set all velocity degrees of freedom on the
+right face of the left cell to zero, which is incorrect for the top
+degree of freedom on that face. That said, that can only happen if the
+fluid and solid subdomains do not coincide with a set of complete
+coarse mesh cells — but this is a contradiction to the
+assumption stated at the end of the first section of this
+introduction.
+
+
+
+<h3>The testcase</h3>
+
+We will consider the following situation as a testcase:
+
+<img src="http://www.dealii.org/images/steps/developer/step-46.layout.png" alt="">
+
+As discussed at the top of this document, we need to assume in a few places
+that a cell is either entirely in the fluid or solid part of the domain and,
+furthermore, that all children of an inactive cell also belong to the same
+subdomain. This can definitely be ensured if the coarse mesh already
+subdivides the mesh into solid and fluid coarse mesh cells; given the geometry
+outlined above, we can do that by using an $8\times 8$ coarse mesh,
+conveniently provided by the GridGenerator::subdivided_hyper_rectangle
+function.
+
+The fixed boundary at the bottom implies $\mathbf u=0$, and we also
+prescribe Dirichlet conditions for the flow at the top so that we get
+inflow at the left and outflow at the right. At the left and right
+boundaries, no boundary conditions are imposed explicitly for the
+flow, yielding the implicit no-stress condition $(2\eta
+\varepsilon(\mathbf v) + p \mathbf 1) \cdot \mathbf n = 0$.
+The conditions on the interface between the two domains has already been
+discussed above.
+
+For simplicity, we choose the material parameters to be
+$\eta=\lambda=\mu=1$. In the results section below, we will also show
+a 3d simulation that can be obtained from the same program. The
+boundary conditions and geometry are defined nearly analogously to the
+2d situation above.
+
+
+<h4>Identifying which subdomain a cell is in</h4>
+
+In the program, we need a way to identify which part of the domain a cell is
+in. There are many different ways of doing this. A typical way would be to use
+the @ref GlossSubdomainId "subdomain_id" tag available with each cell, though
+this field has a special meaning in %parallel computations. An alternative
+is the @ref GlossMaterialId "material_id" field also available with
+every cell. It has the additional advantage that it is inherited from the
+mother to the child cell upon mesh refinement; in other words, we would set
+the material id once upon creating the mesh and it will be correct for all
+active cells even after several refinement cycles. We therefore go with this
+alternative: we define an <code>enum</code> with symbolic names for
+material_id numbers and will use them to identify which part of the domain a
+cell is on.
+
+Secondly, we use an object of type hp::DoFHandler. This class needs to know
+which cells will use the Stokes and which the elasticity finite element. At
+the beginning of each refinement cycle we will therefore have to walk over
+all cells and set the (in hp parlance) active FE index to whatever is
+appropriate in the current situation. While we can use symbolic names for the
+material id, the active FE index is in fact a number that will frequently be
+used to index into collections of objects (e.g. of type hp::FECollection and
+hp::QCollection); that means that the active FE index actually has to have
+value zero for the fluid and one for the elastic part of the domain.
+
+
+<h4>Linear solvers</h4>
+
+This program is primarily intended to show how to deal with different
+physics in different parts of the domain, and how to implement such
+models in deal.II. As a consequence, we won't bother coming up with a
+good solver: we'll just use the SparseDirectUMFPACK class which always
+works, even if not with optimal complexity. We will, however, comment
+on possible other solvers in the <a href="#Results">results</a> section.
+
+
+<h4>Mesh refinement</h4>
+
+One of the trickier aspects of this program is how to estimate the
+error. Because it works on almost any program, we'd like to use the
+KellyErrorEstimator, and we can relatively easily do that here as well using
+code like the following:
+ at code
+  Vector<float> stokes_estimated_error_per_cell (triangulation.n_active_cells());
+  Vector<float> elasticity_estimated_error_per_cell (triangulation.n_active_cells());
+
+  std::vector<bool> stokes_component_mask (dim+1+dim, false);
+  for (unsigned int d=0; d<dim; ++d)
+    stokes_component_mask[d] = true;
+  KellyErrorEstimator<dim>::estimate (dof_handler,
+                                      face_q_collection,
+                                      typename FunctionMap<dim>::type(),
+                                      solution,
+                                      stokes_estimated_error_per_cell,
+                                      stokes_component_mask);
+
+  std::vector<bool> elasticity_component_mask (dim+1+dim, false);
+  for (unsigned int d=0; d<dim; ++d)
+    elasticity_component_mask[dim+1+d] = true;
+  KellyErrorEstimator<dim>::estimate (dof_handler,
+                                      face_q_collection,
+                                      typename FunctionMap<dim>::type(),
+                                      solution,
+                                      elasticity_estimated_error_per_cell,
+                                      elasticity_component_mask);
+ at endcode
+This gives us two sets of error indicators for each cell. We would then
+somehow combine them into one for mesh refinement, for example using something
+like the following (note that we normalize the squared error indicator in the
+two vectors because error quantities have physical units that do not match in
+the current situation, leading to error indicators that may differ by orders
+of magnitude between the two subdomains):
+ at code
+  stokes_estimated_error_per_cell /= stokes_estimated_error_per_cell.l2_norm();
+  elasticity_estimated_error_per_cell /= elasticity_estimated_error_per_cell.l2_norm();
+
+  Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+  estimated_error_per_cell += stokes_estimated_error_per_cell;
+  estimated_error_per_cell += elasticity_estimated_error_per_cell;
+ at endcode
+(In the code, we actually weigh the error indicators 4:1 in favor of the ones
+computed on the Stokes subdomain since refinement is otherwise heavily biased
+towards the elastic subdomain, but this is just a technicality. The factor 4
+has been determined heuristically to work reasonably well.)
+
+While this principle is sound, it doesn't quite work as expected. The reason
+is that the KellyErrorEstimator class computes error indicators by integrating
+the jump in the solution's gradient around the faces of each cell. This jump
+is likely to be very large at the locations where the solution is
+discontinuous and extended by zero; it also doesn't become smaller as the mesh
+is refined. The KellyErrorEstimator class can't just ignore the interface
+because it essentially only sees an hp::DoFHandler where the element type
+changes from one cell to another — precisely the thing that the
+hp::DoFHandler was designed for, the interface in the current program looks no
+different than the interfaces in step-27, for example, and certainly no less
+legitimate. Be that as it may, the end results is that there is a layer of
+cells on both sides of the interface between the two subdomains where error
+indicators are irrationally large. Consequently, most of the mesh refinement
+is focused on the interface.
+
+This clearly wouldn't happen if we had a refinement indicator that actually
+understood something about the problem and simply ignore the interface between
+subdomains when integrating jump terms.
+On the other hand, this program is
+about showing how to represent problems where we have different physics in
+different subdomains, not about the peculiarities of the KellyErrorEstimator,
+and so we resort to the big hammer called "heuristics": we simply set the
+error indicators of cells at the interface to zero. This cuts off the spikes
+in the error indicators. At first sight one would also think that it prevents
+the mesh from being refined at the interface, but the requirement that
+neighboring cells may only differ by one level of refinement will still lead
+to a reasonably refined mesh.
+
+While this is clearly a suboptimal solution, it works for now and leaves room
+for future improvement.
+
diff --git a/examples/step-46/doc/kind b/examples/step-46/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-46/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-46/doc/results.dox b/examples/step-46/doc/results.dox
new file mode 100644
index 0000000..560002d
--- /dev/null
+++ b/examples/step-46/doc/results.dox
@@ -0,0 +1,322 @@
+<a name="Results"></a>
+<h1>Results</h1>
+
+<h3>2d results</h3>
+
+
+When running the program, you should get output like the following:
+ at code
+Refinement cycle 0
+   Number of active cells: 64
+   Number of degrees of freedom: 531
+   Assembling...
+   Solving...
+   Writing output...
+
+Refinement cycle 1
+   Number of active cells: 136
+   Number of degrees of freedom: 1260
+   Assembling...
+   Solving...
+   Writing output...
+
+Refinement cycle 2
+   Number of active cells: 412
+   Number of degrees of freedom: 3667
+   Assembling...
+   Solving...
+   Writing output...
+
+Refinement cycle 3
+   Number of active cells: 1216
+   Number of degrees of freedom: 9999
+   Assembling...
+   Solving...
+   Writing output...
+
+Refinement cycle 4
+   Number of active cells: 2788
+   Number of degrees of freedom: 18537
+   Assembling...
+   Solving...
+   Writing output...
+
+Refinement cycle 5
+   Number of active cells: 6496
+   Number of degrees of freedom: 35985
+   Assembling...
+   Solving...
+   Writing output...
+ at endcode
+
+The results are easily visualized:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr valign="top">
+    <td valign="top" align="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-46.velocity-magnitude.png" alt="">
+
+      <p align="center">
+        Magnitude of the fluid velocity.
+      </p>
+    </td>
+
+    <td valign="top" align="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-46.pressure.png" alt="">
+
+      <p align="center">
+        Fluid pressure. The dynamic range has been truncated to cut off the
+        pressure singularities at the top left and right corners of the domain
+        as well as the top corners of the solid that forms re-entrant corners
+        into the fluid domain.
+      </p>
+    </td>
+  </tr>
+  <tr valign="top">
+    <td valign="top" align="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-46.velocity.png" alt="">
+
+      <p align="center">
+        Fluid velocity.
+      </p>
+    </td>
+
+    <td valign="top" align="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-46.displacement.png" alt="">
+
+      <p align="center">
+        Solid displacement.
+      </p>
+    </td>
+  </tr>
+</table>
+
+In all figures, we have applied a mask to only show the original field, not
+the one extended by zero: for example, to plot the pressure, we have selected
+that part of the domain where the magnitude of the velocity is greater than
+$10^{-7}$.
+
+The plots are easily interpreted: as the flow drives down on the left side and
+up on the right side of the upright part of the solid, it produces a shear
+force that pulls the left side down and the right side up. An additional part
+force comes from the pressure, which bears down on the left side of the top
+and pulls up on the right side. Both forces yield a net torque on the solid
+that bends it to the left, as confirmed by the plot of the displacement
+vectors.
+
+
+<h3>3d results</h3>
+
+By changing the dimension of the <code>FluidStructureProblem</code>
+class in <code>main()</code> to 3, we can also run the same problem
+3d. You'd get output along the following lines:
+ at code
+Refinement cycle 0
+   Number of active cells: 512
+   Number of degrees of freedom: 11631
+   Assembling...
+   Solving...
+   Writing output...
+
+Refinement cycle 1
+   Number of active cells: 1716
+   Number of degrees of freedom: 48984
+   Assembling...
+   Solving...
+
+Refinement cycle 2
+   Number of active cells: 8534
+   Number of degrees of freedom: 245647
+   Assembling...
+   Solving...
+ at endcode
+You'll notice that the big bottleneck is the solver: SparseDirectUmfpack needs
+approximately 8 hours and some 42 GB of memory to solve the last iteration of
+this problem on a 2010 workstation (the second to last iteration took only 6
+minutes). Clearly a better solver is needed here, a topic discussed below.
+
+The results can also be visualized and yield some good pictures:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr valign="top">
+    <td valign="top" align="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-46.3d.velocity.png" alt="">
+
+      <p align="center">
+        Vectors of the fluid velocity and magnitude of the
+        displacement of the solid part.
+      </p>
+    </td>
+
+    <td valign="top" align="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-46.3d.streamlines.png" alt="">
+
+      <p align="center">
+        Streamlines of the velocity, with the mesh superimposed.
+      </p>
+    </td>
+  </tr>
+  <tr valign="top">
+    <td valign="top" align="center" colspan="2">
+      <img src="http://www.dealii.org/images/steps/developer/step-46.3d.displacement.png" alt="">
+
+      <p align="center">
+        Solid displacement.
+      </p>
+    </td>
+  </tr>
+</table>
+
+In addition to the lack of a good solver, the mesh is a bit
+unbalanced: mesh refinement heavily favors the fluid subdomain (in 2d,
+it was the other way around, prompting us to weigh the fluid error
+indicators higher). Clearly, some tweaking of the relative importance
+of error indicators in the two subdomains is important if one wanted
+to go on doing more 3d computations.
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+<h4>Linear solvers and preconditioners</h4>
+
+An obvious place to improve the program would be to use a more
+sophisticated solver — in particular one that scales well and
+will also work for realistic 3d problems. This shouldn't actually be
+too hard to achieve here, because of the one-way coupling from fluid
+into solid. To this end, assume we had re-ordered degrees of freedom
+in such a way that we first have all velocity and pressure degrees of
+freedom, and then all displacements (this is easily possible using
+DoFRenumbering::component_wise). Then the system matrix could be split
+into the following block form:
+ at f[
+  A_\text{global}
+  =
+  \begin{pmatrix}
+    A_{\text{fluid}} & 0 \\
+    B & A_{\text{solid}}
+  \end{pmatrix}
+ at f]
+where $A_{\text{fluid}}$ is the Stokes matrix for velocity and pressure (it
+could be further subdivided into a $2\times 2$ matrix as in step-22, though
+this is immaterial for the current purpose),
+$A_{\text{solid}}$ results from the elasticity equations for the
+displacements, and $B$ is the matrix that comes from the interface
+conditions. Now notice that the matrix
+ at f[
+  A_\text{global}^{-1}
+  =
+  \begin{pmatrix}
+    A_{\text{fluid}}^{-1} & 0 \\
+    -A_\text{solid}^{-1} B
+      A_\text{fluid}^{-1} & A_{\text{solid}}^{-1}
+  \end{pmatrix}
+ at f]
+is the inverse of $A_\text{global}$. Applying this matrix requires
+only one solve with $A_\text{fluid}$ and $A_\text{solid}$ each since
+ at f[
+  \begin{pmatrix}
+    p_x \\ p_y
+  \end{pmatrix}
+  =
+  \begin{pmatrix}
+    A_{\text{fluid}}^{-1} & 0 \\
+    -A_\text{solid}^{-1} B
+      A_\text{fluid}^{-1} & A_{\text{solid}}^{-1}
+  \end{pmatrix}
+  \begin{pmatrix}
+    x \\ y
+  \end{pmatrix}
+ at f]
+can be computed as $p_x = A_{\text{fluid}}^{-1} x$ followed by
+$p_y = A_{\text{solid}}^{-1} (y-Bp_x)$.
+
+One can therefore expect that
+ at f[
+  \widetilde{A_\text{global}^{-1}}
+  =
+  \begin{pmatrix}
+    \widetilde{A_{\text{fluid}}^{-1}} & 0 \\
+    -\widetilde{A_\text{solid}^{-1}} B
+      \widetilde{A_\text{fluid}^{-1}} & \widetilde{A_{\text{solid}}^{-1}}
+  \end{pmatrix}
+ at f]
+would be a good preconditioner if $\widetilde{A_{\text{fluid}}^{-1}}
+\approx A_{\text{fluid}}^{-1}, \widetilde{A_{\text{solid}}^{-1}}
+\approx A_{\text{solid}}^{-1}$.
+
+That means, we only need good preconditioners for Stokes and the
+elasticity equations separately. These are well known: for
+Stokes, we can use the preconditioner discussed in the results section
+of step-22; for elasticity, a good preconditioner would be a single
+V-cycle of a geometric or algebraic multigrid. There are more open
+questions, however: For an "optimized" solver block-triangular
+preconditioner built from two sub-preconditioners, one point that
+often comes up is that, when choosing parameters for the
+sub-preconditioners, values that work well when solving the two
+problems separately may not be optimal when combined into a
+multiphysics preconditioner.  In particular, when solving just a solid
+or fluid mechanics problem separately, the balancing act between the
+number of iterations to convergence and the cost of applying the
+preconditioner on a per iteration basis may lead one to choose an
+expensive preconditioner for the Stokes problem and a cheap
+preconditioner for the elasticity problem (or vice versa).  When
+combined, however, there is the additional constraint that you want
+the two sub-preconditioners to converge at roughly the same rate, or
+else the cheap one may drive up the global number of iterations while
+the expensive one drives up the cost-per-iteration. For example, while a single AMG
+V-cycle is a good approach for elasticity by itself, when combined
+into a multiphysics problem there may be an incentive to using a full
+W-cycle or multiple cycles to help drive down the total solve time.
+
+
+<h4>Refinement indicators</h4>
+
+As mentioned in the introduction, the refinement indicator we use for this
+program is rather ad hoc. A better one would understand that the jump in the
+gradient of the solution across the interface is not indicative of the error
+but to be expected and ignore the interface when integrating the jump
+terms. Nevertheless, this is not what the KellyErrorEstimator class
+does. Another, bigger question, is whether this kind of estimator is a good
+strategy in the first place: for example, if we want to have maximal accuracy
+in one particular aspect of the displacement (e.g. the displacement at the top
+right corner of the solid), then is it appropriate to scale the error
+indicators for fluid and solid to the same magnitude? Maybe it is necessary to
+solve the fluid problem with more accuracy than the solid because the fluid
+solution directly affects the solids solution? Maybe the other way around?
+
+Consequently, an obvious possibility for improving the program would be to
+implement a better refinement criterion. There is some literature on this
+topic; one of a variety of possible starting points would be the paper by
+Thomas Wick on "Adaptive finite elements for monolithic fluid-structure
+interaction on a prolongated domain: Applied to an heart valve simulation",
+Proceedings of the Computer Methods in Mechanics Conference 2011 (CMM-2011),
+9-12 May 2011, Warszaw, Poland.
+
+
+<h4>Verification</h4>
+
+The results above are purely qualitative as there is no evidence that our
+scheme in fact converges. An obvious thing to do would therefore be to add
+some quantitative measures to check that the scheme at least converges to
+<i>something</i>. For example, we could output for each refinement cycle the
+deflection of the top right corner of the part of the solid that protrudes
+into the fluid subdomain. Or we could compute the net force vector or torque
+the fluid exerts on the solid.
+
+
+<h4>Better models</h4>
+
+In reality, most fluid structure interaction problems are so that the movement
+of the solid does affect the flow of the fluid. For example, the forces of the
+air around an air foil cause it to flex and to change its shape. Likewise, a
+flag flaps in the wind, completely changing its shape.
+
+Such problems where the coupling goes both ways are typically handled in an
+Arbitrary Lagrangian Eulerian (ALE) framework, in which the displacement of
+the solid is extended into the fluid domain in some smooth way, rather than by
+zero as we do here. The extended displacement field is then used to deform the
+mesh on which we compute the fluid flow. Furthermore, the boundary conditions
+for the fluid on the interface are no longer that the velocity is zero;
+rather, in a time dependent program, the fluid velocity must be equal to the
+time derivative of the displacement along the interface.
diff --git a/examples/step-46/doc/step-46.layout.fig b/examples/step-46/doc/step-46.layout.fig
new file mode 100644
index 0000000..b193840
--- /dev/null
+++ b/examples/step-46/doc/step-46.layout.fig
@@ -0,0 +1,43 @@
+#FIG 3.2  Produced by xfig version 3.2.5b
+Landscape
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+2 2 0 0 0 7 50 -1 45 0.000 0 0 -1 0 0 5
+	 1800 4500 5400 4500 5400 5400 1800 5400 1800 4500
+2 2 0 0 0 7 50 -1 45 0.000 0 0 -1 0 0 5
+	 3150 2700 4050 2700 4050 4500 3150 4500 3150 2700
+2 2 0 2 0 7 48 -1 -1 0.000 0 0 -1 0 0 5
+	 1800 1800 5400 1800 5400 5400 1800 5400 1800 1800
+2 1 0 1 0 7 48 -1 -1 0.000 0 0 -1 0 0 6
+	 1800 4500 3150 4500 3150 2700 4050 2700 4050 4500 5400 4500
+2 2 0 0 0 7 49 -1 -1 0.000 0 0 -1 0 0 5
+	 3240 4635 4005 4635 4005 4950 3240 4950 3240 4635
+2 2 0 0 0 7 48 -1 20 0.000 0 0 -1 0 0 5
+	 3240 4680 4005 4680 4005 4950 3240 4950 3240 4680
+2 1 0 2 0 7 49 -1 -1 0.000 0 0 -1 0 1 2
+	3 1 2.00 60.00 120.00
+	 4500 1575 4500 2025
+2 1 0 0 0 7 49 -1 -1 0.000 0 0 -1 0 0 2
+	 2250 1575 2250 2025
+2 1 0 2 0 7 49 -1 -1 0.000 0 0 -1 1 0 2
+	3 1 2.00 60.00 120.00
+	 2250 1575 2250 2025
+2 1 0 2 0 7 49 -1 -1 0.000 0 0 -1 1 0 2
+	3 1 2.00 60.00 120.00
+	 2700 1575 2700 2025
+2 1 0 2 0 7 49 -1 -1 0.000 0 0 -1 0 1 2
+	3 1 2.00 60.00 120.00
+	 4950 1575 4950 2025
+2 1 0 4 0 7 49 -1 -1 0.000 0 0 -1 0 0 4
+	 1800 4500 1800 5400 5400 5400 5400 4500
+4 1 0 49 -1 19 16 0.0000 4 195 570 2250 3150 fluid\001
+4 1 0 49 -1 19 16 0.0000 4 195 570 4950 3150 fluid\001
+4 1 0 46 -1 19 16 0.0000 4 195 630 3600 4905 solid\001
+4 1 0 49 -1 19 14 0.0000 4 240 1725 3600 5670 fixed boundary\001
+4 1 0 49 -1 19 14 0.0000 4 180 855 4725 1350 outflow\001
+4 1 0 49 -1 19 14 0.0000 4 180 690 2475 1350 inflow\001
diff --git a/examples/step-46/doc/tooltip b/examples/step-46/doc/tooltip
new file mode 100644
index 0000000..7cddc5c
--- /dev/null
+++ b/examples/step-46/doc/tooltip
@@ -0,0 +1 @@
+Coupling different physical models (flow, elasticity) in different parts of the domain
diff --git a/examples/step-46/step-46.cc b/examples/step-46/step-46.cc
new file mode 100644
index 0000000..d5f1602
--- /dev/null
+++ b/examples/step-46/step-46.cc
@@ -0,0 +1,1133 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2011 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, Texas A&M University, 2011
+ */
+
+
+// @sect3{Include files}
+
+// The include files for this program are the same as for many others
+// before. The only new one is the one that declares FE_Nothing as discussed
+// in the introduction. The ones in the hp directory have already been
+// discussed in step-27.
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/sparse_direct.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_refinement.h>
+
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_accessor.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/fe_values.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+
+namespace Step46
+{
+  using namespace dealii;
+
+  // @sect3{The <code>FluidStructureProblem</code> class template}
+
+  // This is the main class. It is, if you want, a combination of step-8 and
+  // step-22 in that it has member variables that either address the global
+  // problem (the Triangulation and hp::DoFHandler objects, as well as the
+  // hp::FECollection and various linear algebra objects) or that pertain to
+  // either the elasticity or Stokes sub-problems. The general structure of
+  // the class, however, is like that of most of the other programs
+  // implementing stationary problems.
+  //
+  // There are a few helper functions (<code>cell_is_in_fluid_domain,
+  // cell_is_in_solid_domain</code>) of self-explanatory nature (operating on
+  // the symbolic names for the two subdomains that will be used as
+  // material_ids for cells belonging to the subdomains, as explained in the
+  // introduction) and a few functions (<code>make_grid,
+  // set_active_fe_indices, assemble_interface_terms</code>) that have been
+  // broken out of other functions that can be found in many of the other
+  // tutorial programs and that will be discussed as we get to their
+  // implementation.
+  //
+  // The final set of variables (<code>viscosity, lambda, eta</code>)
+  // describes the material properties used for the two physics models.
+  template <int dim>
+  class FluidStructureProblem
+  {
+  public:
+    FluidStructureProblem (const unsigned int stokes_degree,
+                           const unsigned int elasticity_degree);
+    void run ();
+
+  private:
+    enum
+    {
+      fluid_domain_id,
+      solid_domain_id
+    };
+
+    static bool
+    cell_is_in_fluid_domain (const typename hp::DoFHandler<dim>::cell_iterator &cell);
+
+    static bool
+    cell_is_in_solid_domain (const typename hp::DoFHandler<dim>::cell_iterator &cell);
+
+
+    void make_grid ();
+    void set_active_fe_indices ();
+    void setup_dofs ();
+    void assemble_system ();
+    void assemble_interface_term (const FEFaceValuesBase<dim>          &elasticity_fe_face_values,
+                                  const FEFaceValuesBase<dim>          &stokes_fe_face_values,
+                                  std::vector<Tensor<1,dim> >          &elasticity_phi,
+                                  std::vector<SymmetricTensor<2,dim> > &stokes_symgrad_phi_u,
+                                  std::vector<double>                  &stokes_phi_p,
+                                  FullMatrix<double>                   &local_interface_matrix) const;
+    void solve ();
+    void output_results (const unsigned int refinement_cycle) const;
+    void refine_mesh ();
+
+    const unsigned int    stokes_degree;
+    const unsigned int    elasticity_degree;
+
+    Triangulation<dim>    triangulation;
+    FESystem<dim>         stokes_fe;
+    FESystem<dim>         elasticity_fe;
+    hp::FECollection<dim> fe_collection;
+    hp::DoFHandler<dim>   dof_handler;
+
+    ConstraintMatrix      constraints;
+
+    SparsityPattern       sparsity_pattern;
+    SparseMatrix<double>  system_matrix;
+
+    Vector<double>        solution;
+    Vector<double>        system_rhs;
+
+    const double          viscosity;
+    const double          lambda;
+    const double          mu;
+  };
+
+
+  // @sect3{Boundary values and right hand side}
+
+  // The following classes do as their names suggest. The boundary values for
+  // the velocity are $\mathbf u=(0, \sin(\pi x))^T$ in 2d and $\mathbf u=(0,
+  // 0, \sin(\pi x)\sin(\pi y))^T$ in 3d, respectively. The remaining boundary
+  // conditions for this problem are all homogeneous and have been discussed in
+  // the introduction. The right hand side forcing term is zero for both the
+  // fluid and the solid.
+  template <int dim>
+  class StokesBoundaryValues : public Function<dim>
+  {
+  public:
+    StokesBoundaryValues () : Function<dim>(dim+1+dim) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+  };
+
+
+  template <int dim>
+  double
+  StokesBoundaryValues<dim>::value (const Point<dim>  &p,
+                                    const unsigned int component) const
+  {
+    Assert (component < this->n_components,
+            ExcIndexRange (component, 0, this->n_components));
+
+    if (component == dim-1)
+      switch (dim)
+        {
+        case 2:
+          return std::sin(numbers::PI*p[0]);
+        case 3:
+          return std::sin(numbers::PI*p[0]) * std::sin(numbers::PI*p[1]);
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+
+    return 0;
+  }
+
+
+  template <int dim>
+  void
+  StokesBoundaryValues<dim>::vector_value (const Point<dim> &p,
+                                           Vector<double>   &values) const
+  {
+    for (unsigned int c=0; c<this->n_components; ++c)
+      values(c) = StokesBoundaryValues<dim>::value (p, c);
+  }
+
+
+
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>(dim+1) {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &value) const;
+
+  };
+
+
+  template <int dim>
+  double
+  RightHandSide<dim>::value (const Point<dim>  &/*p*/,
+                             const unsigned int /*component*/) const
+  {
+    return 0;
+  }
+
+
+  template <int dim>
+  void
+  RightHandSide<dim>::vector_value (const Point<dim> &p,
+                                    Vector<double>   &values) const
+  {
+    for (unsigned int c=0; c<this->n_components; ++c)
+      values(c) = RightHandSide<dim>::value (p, c);
+  }
+
+
+
+  // @sect3{The <code>FluidStructureProblem</code> implementation}
+
+  // @sect4{Constructors and helper functions}
+
+  // Let's now get to the implementation of the primary class of this
+  // program. The first few functions are the constructor and the helper
+  // functions that can be used to determine which part of the domain a cell
+  // is in. Given the discussion of these topics in the introduction, their
+  // implementation is rather obvious. In the constructor, note that we have
+  // to construct the hp::FECollection object from the base elements for
+  // Stokes and elasticity; using the hp::FECollection::push_back function
+  // assigns them spots zero and one in this collection, an order that we have
+  // to remember and use consistently in the rest of the program.
+  template <int dim>
+  FluidStructureProblem<dim>::
+  FluidStructureProblem (const unsigned int stokes_degree,
+                         const unsigned int elasticity_degree)
+    :
+    stokes_degree (stokes_degree),
+    elasticity_degree (elasticity_degree),
+    triangulation (Triangulation<dim>::maximum_smoothing),
+    stokes_fe (FE_Q<dim>(stokes_degree+1), dim,
+               FE_Q<dim>(stokes_degree), 1,
+               FE_Nothing<dim>(), dim),
+    elasticity_fe (FE_Nothing<dim>(), dim,
+                   FE_Nothing<dim>(), 1,
+                   FE_Q<dim>(elasticity_degree), dim),
+    dof_handler (triangulation),
+    viscosity (2),
+    lambda (1),
+    mu (1)
+  {
+    fe_collection.push_back (stokes_fe);
+    fe_collection.push_back (elasticity_fe);
+  }
+
+
+
+
+  template <int dim>
+  bool
+  FluidStructureProblem<dim>::
+  cell_is_in_fluid_domain (const typename hp::DoFHandler<dim>::cell_iterator &cell)
+  {
+    return (cell->material_id() == fluid_domain_id);
+  }
+
+
+  template <int dim>
+  bool
+  FluidStructureProblem<dim>::
+  cell_is_in_solid_domain (const typename hp::DoFHandler<dim>::cell_iterator &cell)
+  {
+    return (cell->material_id() == solid_domain_id);
+  }
+
+
+  // @sect4{Meshes and assigning subdomains}
+
+  // The next pair of functions deals with generating a mesh and making sure
+  // all flags that denote subdomains are correct. <code>make_grid</code>, as
+  // discussed in the introduction, generates an $8\times 8$ mesh (or an
+  // $8\times 8\times 8$ mesh in 3d) to make sure that each coarse mesh cell
+  // is completely within one of the subdomains. After generating this mesh,
+  // we loop over its boundary and set the boundary indicator to one at the
+  // top boundary, the only place where we set nonzero Dirichlet boundary
+  // conditions. After this, we loop again over all cells to set the material
+  // indicator — used to denote which part of the domain we are in, to
+  // either the fluid or solid indicator.
+  template <int dim>
+  void
+  FluidStructureProblem<dim>::make_grid ()
+  {
+    GridGenerator::subdivided_hyper_cube (triangulation, 8, -1, 1);
+
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->face(f)->at_boundary()
+            &&
+            (cell->face(f)->center()[dim-1] == 1))
+          cell->face(f)->set_all_boundary_ids(1);
+
+
+    for (typename Triangulation<dim>::active_cell_iterator
+         cell = dof_handler.begin_active();
+         cell != dof_handler.end(); ++cell)
+      if (((std::fabs(cell->center()[0]) < 0.25)
+           &&
+           (cell->center()[dim-1] > 0.5))
+          ||
+          ((std::fabs(cell->center()[0]) >= 0.25)
+           &&
+           (cell->center()[dim-1] > -0.5)))
+        cell->set_material_id (fluid_domain_id);
+      else
+        cell->set_material_id (solid_domain_id);
+  }
+
+
+  // The second part of this pair of functions determines which finite element
+  // to use on each cell. Above we have set the material indicator for each
+  // coarse mesh cell, and as mentioned in the introduction, this information
+  // is inherited from mother to child cell upon mesh refinement.
+  //
+  // In other words, whenever we have refined (or created) the mesh, we can
+  // rely on the material indicators to be a correct description of which part
+  // of the domain a cell is in. We then use this to set the active FE index
+  // of the cell to the corresponding element of the hp::FECollection member
+  // variable of this class: zero for fluid cells, one for solid cells.
+  template <int dim>
+  void
+  FluidStructureProblem<dim>::set_active_fe_indices ()
+  {
+    for (typename hp::DoFHandler<dim>::active_cell_iterator
+         cell = dof_handler.begin_active();
+         cell != dof_handler.end(); ++cell)
+      {
+        if (cell_is_in_fluid_domain(cell))
+          cell->set_active_fe_index (0);
+        else if (cell_is_in_solid_domain(cell))
+          cell->set_active_fe_index (1);
+        else
+          Assert (false, ExcNotImplemented());
+      }
+  }
+
+
+  // @sect4{<code>FluidStructureProblem::setup_dofs</code>}
+
+  // The next step is to setup the data structures for the linear system. To
+  // this end, we first have to set the active FE indices with the function
+  // immediately above, then distribute degrees of freedom, and then determine
+  // constraints on the linear system. The latter includes hanging node
+  // constraints as usual, but also the inhomogeneous boundary values at the
+  // top fluid boundary, and zero boundary values along the perimeter of the
+  // solid subdomain.
+  template <int dim>
+  void
+  FluidStructureProblem<dim>::setup_dofs ()
+  {
+    set_active_fe_indices ();
+    dof_handler.distribute_dofs (fe_collection);
+
+    {
+      constraints.clear ();
+      DoFTools::make_hanging_node_constraints (dof_handler,
+                                               constraints);
+
+      const FEValuesExtractors::Vector velocities(0);
+      VectorTools::interpolate_boundary_values (dof_handler,
+                                                1,
+                                                StokesBoundaryValues<dim>(),
+                                                constraints,
+                                                fe_collection.component_mask(velocities));
+
+      const FEValuesExtractors::Vector displacements(dim+1);
+      VectorTools::interpolate_boundary_values (dof_handler,
+                                                0,
+                                                ZeroFunction<dim>(dim+1+dim),
+                                                constraints,
+                                                fe_collection.component_mask(displacements));
+    }
+
+    // There are more constraints we have to handle, though: we have to make
+    // sure that the velocity is zero at the interface between fluid and
+    // solid. The following piece of code was already presented in the
+    // introduction:
+    {
+      std::vector<types::global_dof_index> local_face_dof_indices (stokes_fe.dofs_per_face);
+      for (typename hp::DoFHandler<dim>::active_cell_iterator
+           cell = dof_handler.begin_active();
+           cell != dof_handler.end(); ++cell)
+        if (cell_is_in_fluid_domain (cell))
+          for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+            if (!cell->at_boundary(f))
+              {
+                bool face_is_on_interface = false;
+
+                if ((cell->neighbor(f)->has_children() == false)
+                    &&
+                    (cell_is_in_solid_domain (cell->neighbor(f))))
+                  face_is_on_interface = true;
+                else if (cell->neighbor(f)->has_children() == true)
+                  {
+                    for (unsigned int sf=0; sf<cell->face(f)->n_children(); ++sf)
+                      if (cell_is_in_solid_domain (cell->neighbor_child_on_subface
+                                                   (f, sf)))
+                        {
+                          face_is_on_interface = true;
+                          break;
+                        }
+                  }
+
+                if (face_is_on_interface)
+                  {
+                    cell->face(f)->get_dof_indices (local_face_dof_indices, 0);
+                    for (unsigned int i=0; i<local_face_dof_indices.size(); ++i)
+                      if (stokes_fe.face_system_to_component_index(i).first < dim)
+                        constraints.add_line (local_face_dof_indices[i]);
+                  }
+              }
+    }
+
+    // At the end of all this, we can declare to the constraints object that
+    // we now have all constraints ready to go and that the object can rebuild
+    // its internal data structures for better efficiency:
+    constraints.close ();
+
+    std::cout << "   Number of active cells: "
+              << triangulation.n_active_cells()
+              << std::endl
+              << "   Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl;
+
+    // In the rest of this function we create a sparsity pattern as discussed
+    // extensively in the introduction, and use it to initialize the matrix;
+    // then also set vectors to their correct sizes:
+    {
+      DynamicSparsityPattern dsp (dof_handler.n_dofs(),
+                                  dof_handler.n_dofs());
+
+      Table<2,DoFTools::Coupling> cell_coupling (fe_collection.n_components(),
+                                                 fe_collection.n_components());
+      Table<2,DoFTools::Coupling> face_coupling (fe_collection.n_components(),
+                                                 fe_collection.n_components());
+
+      for (unsigned int c=0; c<fe_collection.n_components(); ++c)
+        for (unsigned int d=0; d<fe_collection.n_components(); ++d)
+          {
+            if (((c<dim+1) && (d<dim+1)
+                 && !((c==dim) && (d==dim)))
+                ||
+                ((c>=dim+1) && (d>=dim+1)))
+              cell_coupling[c][d] = DoFTools::always;
+
+            if ((c>=dim+1) && (d<dim+1))
+              face_coupling[c][d] = DoFTools::always;
+          }
+
+      DoFTools::make_flux_sparsity_pattern (dof_handler, dsp,
+                                            cell_coupling, face_coupling);
+      constraints.condense (dsp);
+      sparsity_pattern.copy_from (dsp);
+    }
+
+    system_matrix.reinit (sparsity_pattern);
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+  }
+
+
+
+  // @sect4{<code>FluidStructureProblem::assemble_system</code>}
+
+  // Following is the central function of this program: the one that assembles
+  // the linear system. It has a long section of setting up auxiliary
+  // functions at the beginning: from creating the quadrature formulas and
+  // setting up the FEValues, FEFaceValues and FESubfaceValues objects
+  // necessary to integrate the cell terms as well as the interface terms for
+  // the case where cells along the interface come together at same size or
+  // with differing levels of refinement...
+  template <int dim>
+  void FluidStructureProblem<dim>::assemble_system ()
+  {
+    system_matrix=0;
+    system_rhs=0;
+
+    const QGauss<dim> stokes_quadrature(stokes_degree+2);
+    const QGauss<dim> elasticity_quadrature(elasticity_degree+2);
+
+    hp::QCollection<dim>  q_collection;
+    q_collection.push_back (stokes_quadrature);
+    q_collection.push_back (elasticity_quadrature);
+
+    hp::FEValues<dim> hp_fe_values (fe_collection, q_collection,
+                                    update_values    |
+                                    update_quadrature_points  |
+                                    update_JxW_values |
+                                    update_gradients);
+
+    const QGauss<dim-1> common_face_quadrature(std::max (stokes_degree+2,
+                                                         elasticity_degree+2));
+
+    FEFaceValues<dim>    stokes_fe_face_values (stokes_fe,
+                                                common_face_quadrature,
+                                                update_JxW_values |
+                                                update_normal_vectors |
+                                                update_gradients);
+    FEFaceValues<dim>    elasticity_fe_face_values (elasticity_fe,
+                                                    common_face_quadrature,
+                                                    update_values);
+    FESubfaceValues<dim> stokes_fe_subface_values (stokes_fe,
+                                                   common_face_quadrature,
+                                                   update_JxW_values |
+                                                   update_normal_vectors |
+                                                   update_gradients);
+    FESubfaceValues<dim> elasticity_fe_subface_values (elasticity_fe,
+                                                       common_face_quadrature,
+                                                       update_values);
+
+    // ...to objects that are needed to describe the local contributions to
+    // the global linear system...
+    const unsigned int        stokes_dofs_per_cell     = stokes_fe.dofs_per_cell;
+    const unsigned int        elasticity_dofs_per_cell = elasticity_fe.dofs_per_cell;
+
+    FullMatrix<double>        local_matrix;
+    FullMatrix<double>        local_interface_matrix (elasticity_dofs_per_cell,
+                                                      stokes_dofs_per_cell);
+    Vector<double>            local_rhs;
+
+    std::vector<types::global_dof_index> local_dof_indices;
+    std::vector<types::global_dof_index> neighbor_dof_indices (stokes_dofs_per_cell);
+
+    const RightHandSide<dim>  right_hand_side;
+
+    // ...to variables that allow us to extract certain components of the
+    // shape functions and cache their values rather than having to recompute
+    // them at every quadrature point:
+    const FEValuesExtractors::Vector     velocities (0);
+    const FEValuesExtractors::Scalar     pressure (dim);
+    const FEValuesExtractors::Vector     displacements (dim+1);
+
+    std::vector<SymmetricTensor<2,dim> > stokes_symgrad_phi_u (stokes_dofs_per_cell);
+    std::vector<double>                  stokes_div_phi_u     (stokes_dofs_per_cell);
+    std::vector<double>                  stokes_phi_p         (stokes_dofs_per_cell);
+
+    std::vector<Tensor<2,dim> >          elasticity_grad_phi (elasticity_dofs_per_cell);
+    std::vector<double>                  elasticity_div_phi  (elasticity_dofs_per_cell);
+    std::vector<Tensor<1,dim> >          elasticity_phi      (elasticity_dofs_per_cell);
+
+    // Then comes the main loop over all cells and, as in step-27, the
+    // initialization of the hp::FEValues object for the current cell and the
+    // extraction of a FEValues object that is appropriate for the current
+    // cell:
+    typename hp::DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        hp_fe_values.reinit (cell);
+
+        const FEValues<dim> &fe_values = hp_fe_values.get_present_fe_values();
+
+        local_matrix.reinit (cell->get_fe().dofs_per_cell,
+                             cell->get_fe().dofs_per_cell);
+        local_rhs.reinit (cell->get_fe().dofs_per_cell);
+
+        // With all of this done, we continue to assemble the cell terms for
+        // cells that are part of the Stokes and elastic regions. While we
+        // could in principle do this in one formula, in effect implementing
+        // the one bilinear form stated in the introduction, we realize that
+        // our finite element spaces are chosen in such a way that on each
+        // cell, one set of variables (either velocities and pressure, or
+        // displacements) are always zero, and consequently a more efficient
+        // way of computing local integrals is to do only what's necessary
+        // based on an <code>if</code> clause that tests which part of the
+        // domain we are in.
+        //
+        // The actual computation of the local matrix is the same as in
+        // step-22 as well as that given in the @ref vector_valued
+        // documentation module for the elasticity equations:
+        if (cell_is_in_fluid_domain (cell))
+          {
+            const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+            Assert (dofs_per_cell == stokes_dofs_per_cell,
+                    ExcInternalError());
+
+            for (unsigned int q=0; q<fe_values.n_quadrature_points; ++q)
+              {
+                for (unsigned int k=0; k<dofs_per_cell; ++k)
+                  {
+                    stokes_symgrad_phi_u[k] = fe_values[velocities].symmetric_gradient (k, q);
+                    stokes_div_phi_u[k]     = fe_values[velocities].divergence (k, q);
+                    stokes_phi_p[k]         = fe_values[pressure].value (k, q);
+                  }
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    local_matrix(i,j) += (2 * viscosity * stokes_symgrad_phi_u[i] * stokes_symgrad_phi_u[j]
+                                          - stokes_div_phi_u[i] * stokes_phi_p[j]
+                                          - stokes_phi_p[i] * stokes_div_phi_u[j])
+                                         * fe_values.JxW(q);
+              }
+          }
+        else
+          {
+            const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+            Assert (dofs_per_cell == elasticity_dofs_per_cell,
+                    ExcInternalError());
+
+            for (unsigned int q=0; q<fe_values.n_quadrature_points; ++q)
+              {
+                for (unsigned int k=0; k<dofs_per_cell; ++k)
+                  {
+                    elasticity_grad_phi[k] = fe_values[displacements].gradient (k, q);
+                    elasticity_div_phi[k]  = fe_values[displacements].divergence (k, q);
+                  }
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    {
+                      local_matrix(i,j)
+                      +=  (lambda *
+                           elasticity_div_phi[i] * elasticity_div_phi[j]
+                           +
+                           mu *
+                           scalar_product(elasticity_grad_phi[i], elasticity_grad_phi[j])
+                           +
+                           mu *
+                           scalar_product(elasticity_grad_phi[i], transpose(elasticity_grad_phi[j]))
+                          )
+                          *
+                          fe_values.JxW(q);
+                    }
+              }
+          }
+
+        // Once we have the contributions from cell integrals, we copy them
+        // into the global matrix (taking care of constraints right away,
+        // through the ConstraintMatrix::distribute_local_to_global
+        // function). Note that we have not written anything into the
+        // <code>local_rhs</code> variable, though we still need to pass it
+        // along since the elimination of nonzero boundary values requires the
+        // modification of local and consequently also global right hand side
+        // values:
+        local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+        cell->get_dof_indices (local_dof_indices);
+        constraints.distribute_local_to_global (local_matrix, local_rhs,
+                                                local_dof_indices,
+                                                system_matrix, system_rhs);
+
+        // The more interesting part of this function is where we see about
+        // face terms along the interface between the two subdomains. To this
+        // end, we first have to make sure that we only assemble them once
+        // even though a loop over all faces of all cells would encounter each
+        // part of the interface twice. We arbitrarily make the decision that
+        // we will only evaluate interface terms if the current cell is part
+        // of the solid subdomain and if, consequently, a face is not at the
+        // boundary and the potential neighbor behind it is part of the fluid
+        // domain. Let's start with these conditions:
+        if (cell_is_in_solid_domain (cell))
+          for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+            if (cell->at_boundary(f) == false)
+              {
+                // At this point we know that the current cell is a candidate
+                // for integration and that a neighbor behind face
+                // <code>f</code> exists. There are now three possibilities:
+                //
+                // - The neighbor is at the same refinement level and has no
+                //   children.
+                // - The neighbor has children.
+                // - The neighbor is coarser.
+                //
+                // In all three cases, we are only interested in it if it is
+                // part of the fluid subdomain. So let us start with the first
+                // and simplest case: if the neighbor is at the same level,
+                // has no children, and is a fluid cell, then the two cells
+                // share a boundary that is part of the interface along which
+                // we want to integrate interface terms. All we have to do is
+                // initialize two FEFaceValues object with the current face
+                // and the face of the neighboring cell (note how we find out
+                // which face of the neighboring cell borders on the current
+                // cell) and pass things off to the function that evaluates
+                // the interface terms (the third through fifth arguments to
+                // this function provide it with scratch arrays). The result
+                // is then again copied into the global matrix, using a
+                // function that knows that the DoF indices of rows and
+                // columns of the local matrix result from different cells:
+                if ((cell->neighbor(f)->level() == cell->level())
+                    &&
+                    (cell->neighbor(f)->has_children() == false)
+                    &&
+                    cell_is_in_fluid_domain (cell->neighbor(f)))
+                  {
+                    elasticity_fe_face_values.reinit (cell, f);
+                    stokes_fe_face_values.reinit (cell->neighbor(f),
+                                                  cell->neighbor_of_neighbor(f));
+
+                    assemble_interface_term (elasticity_fe_face_values, stokes_fe_face_values,
+                                             elasticity_phi, stokes_symgrad_phi_u, stokes_phi_p,
+                                             local_interface_matrix);
+
+                    cell->neighbor(f)->get_dof_indices (neighbor_dof_indices);
+                    constraints.distribute_local_to_global(local_interface_matrix,
+                                                           local_dof_indices,
+                                                           neighbor_dof_indices,
+                                                           system_matrix);
+                  }
+
+                // The second case is if the neighbor has further children. In
+                // that case, we have to loop over all the children of the
+                // neighbor to see if they are part of the fluid subdomain. If
+                // they are, then we integrate over the common interface,
+                // which is a face for the neighbor and a subface of the
+                // current cell, requiring us to use an FEFaceValues for the
+                // neighbor and an FESubfaceValues for the current cell:
+                else if ((cell->neighbor(f)->level() == cell->level())
+                         &&
+                         (cell->neighbor(f)->has_children() == true))
+                  {
+                    for (unsigned int subface=0;
+                         subface<cell->face(f)->n_children();
+                         ++subface)
+                      if (cell_is_in_fluid_domain (cell->neighbor_child_on_subface
+                                                   (f, subface)))
+                        {
+                          elasticity_fe_subface_values.reinit (cell,
+                                                               f,
+                                                               subface);
+                          stokes_fe_face_values.reinit (cell->neighbor_child_on_subface (f, subface),
+                                                        cell->neighbor_of_neighbor(f));
+
+                          assemble_interface_term (elasticity_fe_subface_values,
+                                                   stokes_fe_face_values,
+                                                   elasticity_phi,
+                                                   stokes_symgrad_phi_u, stokes_phi_p,
+                                                   local_interface_matrix);
+
+                          cell->neighbor_child_on_subface (f, subface)
+                          ->get_dof_indices (neighbor_dof_indices);
+                          constraints.distribute_local_to_global(local_interface_matrix,
+                                                                 local_dof_indices,
+                                                                 neighbor_dof_indices,
+                                                                 system_matrix);
+                        }
+                  }
+
+                // The last option is that the neighbor is coarser. In that
+                // case we have to use an FESubfaceValues object for the
+                // neighbor and a FEFaceValues for the current cell; the rest
+                // is the same as before:
+                else if (cell->neighbor_is_coarser(f)
+                         &&
+                         cell_is_in_fluid_domain(cell->neighbor(f)))
+                  {
+                    elasticity_fe_face_values.reinit (cell, f);
+                    stokes_fe_subface_values.reinit (cell->neighbor(f),
+                                                     cell->neighbor_of_coarser_neighbor(f).first,
+                                                     cell->neighbor_of_coarser_neighbor(f).second);
+
+                    assemble_interface_term (elasticity_fe_face_values,
+                                             stokes_fe_subface_values,
+                                             elasticity_phi,
+                                             stokes_symgrad_phi_u, stokes_phi_p,
+                                             local_interface_matrix);
+
+                    cell->neighbor(f)->get_dof_indices (neighbor_dof_indices);
+                    constraints.distribute_local_to_global(local_interface_matrix,
+                                                           local_dof_indices,
+                                                           neighbor_dof_indices,
+                                                           system_matrix);
+
+                  }
+              }
+      }
+  }
+
+
+
+  // In the function that assembles the global system, we passed computing
+  // interface terms to a separate function we discuss here. The key is that
+  // even though we can't predict the combination of FEFaceValues and
+  // FESubfaceValues objects, they are both derived from the FEFaceValuesBase
+  // class and consequently we don't have to care: the function is simply
+  // called with two such objects denoting the values of the shape functions
+  // on the quadrature points of the two sides of the face. We then do what we
+  // always do: we fill the scratch arrays with the values of shape functions
+  // and their derivatives, and then loop over all entries of the matrix to
+  // compute the local integrals. The details of the bilinear form we evaluate
+  // here are given in the introduction.
+  template <int dim>
+  void
+  FluidStructureProblem<dim>::
+  assemble_interface_term (const FEFaceValuesBase<dim>          &elasticity_fe_face_values,
+                           const FEFaceValuesBase<dim>          &stokes_fe_face_values,
+                           std::vector<Tensor<1,dim> >          &elasticity_phi,
+                           std::vector<SymmetricTensor<2,dim> > &stokes_symgrad_phi_u,
+                           std::vector<double>                  &stokes_phi_p,
+                           FullMatrix<double>                   &local_interface_matrix) const
+  {
+    Assert (stokes_fe_face_values.n_quadrature_points ==
+            elasticity_fe_face_values.n_quadrature_points,
+            ExcInternalError());
+    const unsigned int n_face_quadrature_points
+      = elasticity_fe_face_values.n_quadrature_points;
+
+    const FEValuesExtractors::Vector velocities (0);
+    const FEValuesExtractors::Scalar pressure (dim);
+    const FEValuesExtractors::Vector displacements (dim+1);
+
+    local_interface_matrix = 0;
+    for (unsigned int q=0; q<n_face_quadrature_points; ++q)
+      {
+        const Tensor<1,dim> normal_vector = stokes_fe_face_values.normal_vector(q);
+
+        for (unsigned int k=0; k<stokes_fe_face_values.dofs_per_cell; ++k)
+          stokes_symgrad_phi_u[k] = stokes_fe_face_values[velocities].symmetric_gradient (k, q);
+        for (unsigned int k=0; k<elasticity_fe_face_values.dofs_per_cell; ++k)
+          elasticity_phi[k] = elasticity_fe_face_values[displacements].value (k,q);
+
+        for (unsigned int i=0; i<elasticity_fe_face_values.dofs_per_cell; ++i)
+          for (unsigned int j=0; j<stokes_fe_face_values.dofs_per_cell; ++j)
+            local_interface_matrix(i,j) += -((2 * viscosity *
+                                              (stokes_symgrad_phi_u[j] *
+                                               normal_vector)
+                                              +
+                                              stokes_phi_p[j] *
+                                              normal_vector) *
+                                             elasticity_phi[i] *
+                                             stokes_fe_face_values.JxW(q));
+      }
+  }
+
+
+  // @sect4{<code>FluidStructureProblem::solve</code>}
+
+  // As discussed in the introduction, we use a rather trivial solver here: we
+  // just pass the linear system off to the SparseDirectUMFPACK direct solver
+  // (see, for example, step-29). The only thing we have to do after solving
+  // is ensure that hanging node and boundary value constraints are correct.
+  template <int dim>
+  void
+  FluidStructureProblem<dim>::solve ()
+  {
+    SparseDirectUMFPACK direct_solver;
+    direct_solver.initialize (system_matrix);
+    direct_solver.vmult (solution, system_rhs);
+
+    constraints.distribute (solution);
+  }
+
+
+
+  // @sect4{<code>FluidStructureProblem::output_results</code>}
+
+  // Generating graphical output is rather trivial here: all we have to do is
+  // identify which components of the solution vector belong to scalars and/or
+  // vectors (see, for example, step-22 for a previous example), and then pass
+  // it all on to the DataOut class (with the second template argument equal
+  // to hp::DoFHandler instead of the usual default DoFHandler):
+  template <int dim>
+  void
+  FluidStructureProblem<dim>::
+  output_results (const unsigned int refinement_cycle)  const
+  {
+    std::vector<std::string> solution_names (dim, "velocity");
+    solution_names.push_back ("pressure");
+    for (unsigned int d=0; d<dim; ++d)
+      solution_names.push_back ("displacement");
+
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    data_component_interpretation
+    (dim, DataComponentInterpretation::component_is_part_of_vector);
+    data_component_interpretation
+    .push_back (DataComponentInterpretation::component_is_scalar);
+    for (unsigned int d=0; d<dim; ++d)
+      data_component_interpretation
+      .push_back (DataComponentInterpretation::component_is_part_of_vector);
+
+    DataOut<dim,hp::DoFHandler<dim> > data_out;
+    data_out.attach_dof_handler (dof_handler);
+
+    data_out.add_data_vector (solution, solution_names,
+                              DataOut<dim,hp::DoFHandler<dim> >::type_dof_data,
+                              data_component_interpretation);
+    data_out.build_patches ();
+
+    std::ostringstream filename;
+    filename << "solution-"
+             << Utilities::int_to_string (refinement_cycle, 2)
+             << ".vtk";
+
+    std::ofstream output (filename.str().c_str());
+    data_out.write_vtk (output);
+  }
+
+
+  // @sect4{<code>FluidStructureProblem::refine_mesh</code>}
+
+  // The next step is to refine the mesh. As was discussed in the
+  // introduction, this is a bit tricky primarily because the fluid and the
+  // solid subdomains use variables that have different physical dimensions
+  // and for which the absolute magnitude of error estimates is consequently
+  // not directly comparable. We will therefore have to scale them. At the top
+  // of the function, we therefore first compute error estimates for the
+  // different variables separately (using the velocities but not the pressure
+  // for the fluid domain, and the displacements in the solid domain):
+  template <int dim>
+  void
+  FluidStructureProblem<dim>::refine_mesh ()
+  {
+    Vector<float>
+    stokes_estimated_error_per_cell (triangulation.n_active_cells());
+    Vector<float>
+    elasticity_estimated_error_per_cell (triangulation.n_active_cells());
+
+    const QGauss<dim-1> stokes_face_quadrature(stokes_degree+2);
+    const QGauss<dim-1> elasticity_face_quadrature(elasticity_degree+2);
+
+    hp::QCollection<dim-1> face_q_collection;
+    face_q_collection.push_back (stokes_face_quadrature);
+    face_q_collection.push_back (elasticity_face_quadrature);
+
+    const FEValuesExtractors::Vector velocities(0);
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        face_q_collection,
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        stokes_estimated_error_per_cell,
+                                        fe_collection.component_mask(velocities));
+
+    const FEValuesExtractors::Vector displacements(dim+1);
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        face_q_collection,
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        elasticity_estimated_error_per_cell,
+                                        fe_collection.component_mask(displacements));
+
+    // We then normalize error estimates by dividing by their norm and scale
+    // the fluid error indicators by a factor of 4 as discussed in the
+    // introduction. The results are then added together into a vector that
+    // contains error indicators for all cells:
+    stokes_estimated_error_per_cell
+    *= 4. / stokes_estimated_error_per_cell.l2_norm();
+    elasticity_estimated_error_per_cell
+    *= 1. / elasticity_estimated_error_per_cell.l2_norm();
+
+    Vector<float>
+    estimated_error_per_cell (triangulation.n_active_cells());
+
+    estimated_error_per_cell += stokes_estimated_error_per_cell;
+    estimated_error_per_cell += elasticity_estimated_error_per_cell;
+
+    // The second to last part of the function, before actually refining the
+    // mesh, involves a heuristic that we have already mentioned in the
+    // introduction: because the solution is discontinuous, the
+    // KellyErrorEstimator class gets all confused about cells that sit at the
+    // boundary between subdomains: it believes that the error is large there
+    // because the jump in the gradient is large, even though this is entirely
+    // expected and a feature that is in fact present in the exact solution as
+    // well and therefore not indicative of any numerical error.
+    //
+    // Consequently, we set the error indicators to zero for all cells at the
+    // interface; the conditions determining which cells this affects are
+    // slightly awkward because we have to account for the possibility of
+    // adaptively refined meshes, meaning that the neighboring cell can be
+    // coarser than the current one, or could in fact be refined some
+    // more. The structure of these nested conditions is much the same as we
+    // encountered when assembling interface terms in
+    // <code>assemble_system</code>.
+    for (typename hp::DoFHandler<dim>::active_cell_iterator
+         cell = dof_handler.begin_active();
+         cell != dof_handler.end(); ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell_is_in_solid_domain (cell))
+          {
+            if ((cell->at_boundary(f) == false)
+                &&
+                (((cell->neighbor(f)->level() == cell->level())
+                  &&
+                  (cell->neighbor(f)->has_children() == false)
+                  &&
+                  cell_is_in_fluid_domain (cell->neighbor(f)))
+                 ||
+                 ((cell->neighbor(f)->level() == cell->level())
+                  &&
+                  (cell->neighbor(f)->has_children() == true)
+                  &&
+                  (cell_is_in_fluid_domain (cell->neighbor_child_on_subface
+                                            (f, 0))))
+                 ||
+                 (cell->neighbor_is_coarser(f)
+                  &&
+                  cell_is_in_fluid_domain(cell->neighbor(f)))
+                ))
+              estimated_error_per_cell(cell->active_cell_index()) = 0;
+          }
+        else
+          {
+            if ((cell->at_boundary(f) == false)
+                &&
+                (((cell->neighbor(f)->level() == cell->level())
+                  &&
+                  (cell->neighbor(f)->has_children() == false)
+                  &&
+                  cell_is_in_solid_domain (cell->neighbor(f)))
+                 ||
+                 ((cell->neighbor(f)->level() == cell->level())
+                  &&
+                  (cell->neighbor(f)->has_children() == true)
+                  &&
+                  (cell_is_in_solid_domain (cell->neighbor_child_on_subface
+                                            (f, 0))))
+                 ||
+                 (cell->neighbor_is_coarser(f)
+                  &&
+                  cell_is_in_solid_domain(cell->neighbor(f)))
+                ))
+              estimated_error_per_cell(cell->active_cell_index()) = 0;
+          }
+
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     estimated_error_per_cell,
+                                                     0.3, 0.0);
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+
+  // @sect4{<code>FluidStructureProblem::run</code>}
+
+  // This is, as usual, the function that controls the overall flow of
+  // operation. If you've read through tutorial programs step-1 through
+  // step-6, for example, then you are already quite familiar with the
+  // following structure:
+  template <int dim>
+  void FluidStructureProblem<dim>::run ()
+  {
+    make_grid ();
+
+    for (unsigned int refinement_cycle = 0; refinement_cycle<10-2*dim;
+         ++refinement_cycle)
+      {
+        std::cout << "Refinement cycle " << refinement_cycle << std::endl;
+
+        if (refinement_cycle > 0)
+          refine_mesh ();
+
+        setup_dofs ();
+
+        std::cout << "   Assembling..." << std::endl;
+        assemble_system ();
+
+        std::cout << "   Solving..." << std::endl;
+        solve ();
+
+        std::cout << "   Writing output..." << std::endl;
+        output_results (refinement_cycle);
+
+        std::cout << std::endl;
+      }
+  }
+}
+
+
+
+// @sect4{The <code>main()</code> function}
+
+// This, final, function contains pretty much exactly what most of the other
+// tutorial programs have:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step46;
+
+      FluidStructureProblem<2> flow_problem(1, 1);
+      flow_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-48/CMakeLists.txt b/examples/step-48/CMakeLists.txt
new file mode 100644
index 0000000..177810d
--- /dev/null
+++ b/examples/step-48/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-48 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-48")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-48/doc/builds-on b/examples/step-48/doc/builds-on
new file mode 100644
index 0000000..017d954
--- /dev/null
+++ b/examples/step-48/doc/builds-on
@@ -0,0 +1 @@
+step-25 step-37 step-40
diff --git a/examples/step-48/doc/intro.dox b/examples/step-48/doc/intro.dox
new file mode 100644
index 0000000..a4715e6
--- /dev/null
+++ b/examples/step-48/doc/intro.dox
@@ -0,0 +1,176 @@
+
+<i>
+This program was contributed by Katharina Kormann and Martin
+Kronbichler.
+
+The algorithm for the matrix-vector product is based on the article <a
+href="http://dx.doi.org/10.1016/j.compfluid.2012.04.012">A generic
+interface for parallel cell-based finite element operator
+application</a> by Martin Kronbichler and Katharina Kormann, Computers
+and Fluids 63:135–147, 2012, and the paper "Parallel finite element operator
+application: Graph partitioning and coloring" by Katharina
+Kormann and Martin Kronbichler in: Proceedings of the 7th IEEE
+International Conference on e-Science, 2011.  </i>
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This program demonstrates how to use the cell-based implementation of
+finite element operators with the MatrixFree class,
+first introduced in step-37, to solve nonlinear partial
+differential equations. Moreover, we demonstrate how the
+MatrixFree class handles constraints and how it can be
+parallelized over distributed nodes. Finally, we will use an explicit
+time-stepping method to solve the problem and introduce Gauss-Lobatto
+finite elements that are very convenient in this case since they have
+a diagonal, and thus trivially invertible, mass matrix. Moreover,
+this type of elements clusters the nodes towards the element
+boundaries which is why they have good properties for high-order
+discretization methods. Indeed, the condition number of standard FE_Q
+elements with equidistant nodes grows exponentially with the degree,
+which destroys any benefit for orders of about five and higher.
+
+<h3> Problem statement and discretization </h3>
+
+As an example, we choose to solve the sine-Gordon soliton equation
+\f{eqnarray*}
+u_{tt} &=& \Delta u -\sin(u) \quad\mbox{for}\quad (x,t) \in
+\Omega \times (t_0,t_f],\\
+{\mathbf n} \cdot \nabla u &=& 0
+\quad\mbox{for}\quad (x,t) \in \partial\Omega \times (t_0,t_f],\\
+u(x,t_0) &=& u_0(x).
+\f}
+
+that was already introduced in step-25. As a simple explicit time
+integration method, we choose leap frog scheme using the second-order
+formulation of the equation. Then, the scheme reads in weak form
+
+\f{eqnarray*}
+(v,u^{n+1}) = (v,2 u^n-u^{n-1} -
+(\Delta t)^2 \sin(u^n)) - (\nabla v, (\Delta t)^2 \nabla u^n),
+\f}
+where <i> v</i> denotes a test function and the index <i>n</i> stands for
+the time step number.
+
+For the spatial discretization, we choose FE_Q elements
+with basis functions defined to interpolate the support points of the
+Gauss-Lobatto quadrature rule. Moreover, when we compute the integrals
+over the basis functions to form the mass matrix and the operator on
+the right hand side of the equation above, we use the
+Gauss-Lobatto quadrature rule with the same support points as the
+node points of the finite element to evaluate the integrals. Since the
+finite element is Lagrangian, this will yield a diagonal mass matrix
+on the left hand side of the equation, making the solution of the
+linear system in each time step trivial.
+
+Using this quadrature rule, for a <i>p</i>th order finite element, we
+use a <i>(2p-1)</i>th order accurate formula to evaluate the
+integrals. Since the product of two <i>p</i>th order basis functions
+when computing a mass matrix gives a function with polynomial degree
+<i>2p</i> in each direction, the integrals are not exactly
+evaluated. However, considering the fact that the interpolation order
+of finite elements of degree <i>p</i> is <i>p+1</i>, the overall
+convergence properties are not disturbed by the quadrature error, in
+particular not when we use high orders.
+
+Apart from the fact, that we avoid solving linear systems with this
+type of elements when using explicit time-stepping, they come with two
+other advantages. When we are using the sum-factorization approach to
+evaluate the finite element operator (cf. step-37), we have to
+evaluate the function at the quadrature points. In the case of
+Gauss-Lobatto elements, where quadrature points and node points of the
+finite element coincide, this operation is trivial since the value
+of the function at the quadrature points is given by its one-dimensional
+coefficients. In this way, the complexity of a finite element operator
+evaluation is further reduced compared to equidistant elements.
+
+The third advantage is the fact that these elements are better conditioned as
+equidistant Lagrange polynomials for increasing order so that we can use
+higher order elements for an accurate solution of the equation. Lagrange
+elements FE_Q with equidistant points should not be used for polynomial
+degrees four and higher.
+
+To sum up the discussion, by using the right finite element and
+quadrature rule combination, we end up with a scheme where we in each
+time step need to compute the right hand side vector corresponding
+to the formulation above and then multiply it by the inverse of the
+diagonal mass matrix. In practice, of course, we extract the diagonal
+elements and invert them only once at the beginning of the program.
+
+<h3>Implementation of constraints</h3>
+
+The usual way to handle constraints in <code>deal.II</code> is to use
+the ConstraintMatrix class that builds a sparse matrix storing
+information about which degrees of freedom (DoF) are constrained and
+how they are constrained. This format uses an unnecessarily large
+amount of memory since there are not so many different types of
+constraints: for example, in the case of hanging nodes when using
+linear finite element on every cell, constraints most have the form
+$x_k = \frac 12 x_i + \frac 12 x_j$ where the coefficients $\frac 12$
+are always the same and only $i,j,k$ are different. While storing this
+redundant information is not a problem in general because it is only
+needed once during matrix and right hand side assembly, it becomes a
+problem when we want to use the matrix-free approach since there this
+information has to be accessed every time we apply the operator. Thus,
+instead of a ConstraintMatrix, we use a variable that we call
+<code>constraint_pool</code> that collects the weights of the
+different constraints. Then, we only have to store an identifier of
+each constraint in the mesh instead of all the weights. Moreover, we
+do not want to apply the constraints in a pre- and postprocessing step
+but want to take care of constraints as we evaluate the finite element
+operator. Therefore, we embed the constraint information into the
+variable <code>indices_local_to_global</code> that is used to extract
+the cell information from the global vector. If a DoF is constrained,
+the <code>indices_local_to_global</code> variable contains the global
+indices of the DoFs that it is constrained to. Then, we have another
+variable <code>constraint_indicator</code> at hand that holds, for
+each cell, the local indices of DoFs that are constrained as well as
+the identifier of the type of constraint. Actually, you will not see
+these data structures in the example program since the class
+<code>FEEvaluation</code> takes care of the constraints without user
+interaction.
+
+
+<h3> Parallelization </h3>
+
+The MatrixFree class comes with the option to be parallelized on three levels:
+MPI parallelization on clusters of distributed nodes, thread parallelization
+scheduled by the Threading Building Blocks library, and finally with a
+vectorization by clustering of two (or more) cells into a SIMD data type for
+the operator application. As we have already discussed in step-37, you will
+get best performance by using an instruction set specific to your system,
+e.g. with the cmake variable
+<tt>-DCMAKE_CXX_FLAGS="-march=native"</tt>. Shared memory (thread)
+parallelization was also exploited in step-37. Here, we demonstrate MPI
+parallelization.
+
+To facilitate parallelism with distributed memory (MPI), we use a special
+vector type parallel::distributed::Vector that holds the
+processor-local part of the solution as well as information on and data
+fields for the ghost DoFs, i.e. DoFs that are owned by a remote
+processor but needed on cells that are treated by the present
+processor. Moreover, it holds the MPI-send information for DoFs that
+are owned locally but needed by other processors. This is similar to
+the PETScWrappers::MPI::Vector and TrilinosWrappers::MPI::Vector data
+types we have used in step-40 and step-32 before, but since we do not
+need any other parallel functionality of these libraries, we use the
+parallel::distributed::Vector class of deal.II instead of linking in
+another large library.
+
+Note that this program is designed to be run with a distributed triangulation
+(parallel::distributed::Triangulation), which requires deal.II to be
+configured with <a href="http://www.p4est.org/">p4est</a> as described
+in the <a href="../../readme.html">deal.II ReadMe</a> file. However, a
+non-distributed triangulation is also supported, in which case the
+computation will be run in serial.
+
+<h3> The test case </h3>
+
+In our example, we choose the initial value to be \f{eqnarray*} u(x,t) =
+\prod_{i=1}^{d} -4 \arctan \left(
+\frac{m}{\sqrt{1-m^2}}\frac{\sin\left(\sqrt{1-m^2} t +c_2\right)}{\cosh(mx_i+c_1)}\right)
+\f} and solve the equation over the time interval [-10,10]. The
+constants are chosen to be $c_1=c_1=0$ and <i> m=0.5</i>. As mentioned
+in step-25, in one dimension <i>u</i> as a function of <i>t</i> is the exact
+solution of the sine-Gordon equation. For higher dimension, this is however
+not the case.
diff --git a/examples/step-48/doc/kind b/examples/step-48/doc/kind
new file mode 100644
index 0000000..86a44aa
--- /dev/null
+++ b/examples/step-48/doc/kind
@@ -0,0 +1 @@
+time dependent
diff --git a/examples/step-48/doc/results.dox b/examples/step-48/doc/results.dox
new file mode 100644
index 0000000..213aced
--- /dev/null
+++ b/examples/step-48/doc/results.dox
@@ -0,0 +1,174 @@
+<h1>Results</h1>
+
+<h3>Comparison with a sparse matrix</h3>
+
+In order to demonstrate the gain in using the MatrixFree class instead of
+the standard <code>deal.II</code> assembly routines for evaluating the
+information from old time steps, we study a simple serial run of the code on a
+nonadaptive mesh. Since much time is spent on evaluating the sine function, we
+do not only show the numbers of the full sine-Gordon equation but also for the
+wave equation (the sine-term skipped from the sine-Gordon equation). We use
+both second and fourth order elements. The results are summarized in the
+following table.
+
+<table align="center" border="1">
+  <tr>
+    <th> </th>
+    <th colspan="3">wave equation</th>
+    <th colspan="2">sine-Gordon</th>
+  </tr>
+  <tr>
+    <th> </th>
+    <th>MF</th>
+    <th>SpMV</th>
+    <th>dealii</th>
+    <th>MF</th>
+    <th>dealii</th>
+  </tr>
+  <tr>
+    <td>2D, $\mathcal{Q}_2$</td>
+    <td align="right"> 0.0106</td>
+    <td align="right"> 0.00971</td>
+    <td align="right"> 0.109</td>
+    <td align="right"> 0.0243</td>
+    <td align="right"> 0.124</td>
+  </tr>
+  <tr>
+    <td>2D, $\mathcal{Q}_4$</td>
+    <td align="right"> 0.0328</td>
+    <td align="right"> 0.0706</td>
+    <td align="right"> 0.528</td>
+    <td align="right"> 0.0714</td>
+    <td align="right"> 0.502</td>
+   </tr>
+   <tr>
+    <td>3D, $\mathcal{Q}_2$</td>
+    <td align="right"> 0.0151</td>
+    <td align="right"> 0.0320</td>
+    <td align="right"> 0.331</td>
+    <td align="right"> 0.0376</td>
+    <td align="right"> 0.364</td>
+   </tr>
+   <tr>
+    <td>3D, $\mathcal{Q}_4$</td>
+    <td align="right"> 0.0918</td>
+    <td align="right"> 0.844</td>
+    <td align="right"> 6.83</td>
+    <td align="right"> 0.194</td>
+    <td align="right"> 6.95</td>
+   </tr>
+</table>
+
+It is apparent that the matrix-free code outperforms the standard assembly
+routines in deal.II by far. In 3D and for fourth order elements, one operator
+application is also almost ten times as fast as a sparse matrix-vector
+product.
+
+<h3>Parallel run in 3D</h3>
+
+To demonstrate how the example scales for a parallel run and to demonstrate
+that hanging node constraints can be handled in an efficient way, we run the
+example in 3D with $\mathcal{Q}_4$ elements. First, we run it on a notebook
+with 2 cores (Sandy Bridge CPU) at 2.7 GHz.
+ at code
+\$ make debug-mode=off run
+   Number of global active cells: 17592
+   Number of degrees of freedom: 1193881
+   Time step size: 0.0117233, finest cell: 0.46875
+
+   Time:     -10, solution norm:  29.558
+   Time:   -7.66, solution norm:  129.13
+   Time:   -5.31, solution norm:  67.753
+   Time:   -2.97, solution norm:  79.245
+   Time:  -0.621, solution norm:  123.52
+   Time:    1.72, solution norm:  43.525
+   Time:    4.07, solution norm:  93.285
+   Time:    6.41, solution norm:  97.722
+   Time:    8.76, solution norm:  36.734
+   Time:      10, solution norm:  94.115
+
+   Performed 1706 time steps.
+   Average wallclock time per time step: 0.038261s
+   Spent 11.977s on output and 65.273s on computations.
+ at endcode
+
+It takes 0.04 seconds for one time step on a notebook with more than a million
+degrees of freedom (note that we would need many processors to reach such
+numbers when solving linear systems). If we run the same 3D code on a
+cluster with 2 nodes and each node runs 8 threads, we get the following times:
+
+ at code
+\$ mpirun --bynode -n 2 ./\step-48
+...
+   Performed 1706 time steps.
+   Average wallclock time per time step: 0.0123188s
+   Spent 6.74378s on output and 21.0158s on computations.
+ at endcode
+
+We observe a considerable speedup over the notebook (16 cores versus 2 cores;
+nonetheless, one notebook core is considerably faster than one core of the
+cluster because of a newer processor architecture). If we run the same program
+on 4 nodes with 8 threads on each node, we get:
+ at code
+\$ mpirun --bynode -n 4 ./\step-48
+...
+   Performed 1706 time steps.
+   Average wallclock time per time step: 0.00689865s
+   Spent 3.54145s on output and 11.7691s on computations.
+ at endcode
+
+By comparing the times for two nodes and four nodes, we observe the nice
+scaling behavior of the implementation. Of course, the code can also be run in
+MPI-mode only by disabling the multithreading flag in the code. If we use the
+same 32 cores as for the hybrid parallelization above, we observe the
+following run-time:
+
+ at code
+\$ mpirun -n 32 ./\step-48
+...
+   Performed 1706 time steps.
+   Average wallclock time per time step: 0.0189041s
+   Spent 0.968967s on output and 32.2504s on computations.
+ at endcode
+
+We observe slower speed for computations, but faster output (which makes
+sense, as output is only parallelized by MPI and not threads), whereas the
+computations are faster if we use hybrid parallelism in the given case.
+
+<h3>Possibilities for extensions</h3>
+
+There are several things in this program that could be improved to make it
+even more efficient (besides improved boundary conditions and physical
+stuff as discussed in step-25):
+
+<ul> <li> <b>Faster evaluation of sine terms:</b> As becomes obvious
+  from the comparison of the plain wave equation and the sine-Gordon
+  equation above, the evaluation of the sine terms dominates the total
+  time for the finite element operator application. There are a few
+  reasons for this: Firstly, the deal.II sine computation of a
+  VectorizedArray field is not vectorized (as opposed to the rest of
+  the operator application). This could be cured by handing the sine
+  computation to a library with vectorized sine computations like
+  Intel's math kernel library (MKL). By using the function
+  <code>vdSin</code> in MKL, the program uses half the computing time
+  in 2D and 40 percent less time in 3D. On the other hand, the sine
+  computation is structurally much more complicated than the simple
+  arithmetic operations like additions and multiplications in the rest
+  of the local operation.
+
+  <li> <b>Higher order time stepping:</b> While the implementation allows for
+  arbitrary order in the spatial part (by adjusting the degree of the finite
+  element), the time stepping scheme is a standard second-order leap-frog
+  scheme. Since solutions in wave propagation problems are usually very
+  smooth, the error is likely dominated by the time stepping part. Of course,
+  this could be cured by using smaller time steps (at a fixed spatial
+  resolution), but it would be more efficient to use higher order time
+  stepping as well. While it would be straight-forward to do so for a
+  first-order system (use some Runge–Kutta scheme of higher order,
+  probably combined with adaptive time step selection like the <a
+  href="http://en.wikipedia.org/wiki/Dormand%E2%80%93Prince_method">Dormand–Prince
+  method</a>), it is more challenging for the second-order formulation. At
+  least in the finite difference community, people usually use the PDE to find
+  spatial correction terms that improve the temporal error.
+
+</ul>
diff --git a/examples/step-48/doc/tooltip b/examples/step-48/doc/tooltip
new file mode 100644
index 0000000..ffe23ae
--- /dev/null
+++ b/examples/step-48/doc/tooltip
@@ -0,0 +1 @@
+Parallelization via MPI. The wave equation, in linear and nonlinear variants. Mass lumping. Fast assembly techniques.
diff --git a/examples/step-48/step-48.cc b/examples/step-48/step-48.cc
new file mode 100644
index 0000000..bd3d0b5
--- /dev/null
+++ b/examples/step-48/step-48.cc
@@ -0,0 +1,664 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2011 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Katharina Kormann, Martin Kronbichler, Uppsala University, 2011-2012
+ */
+
+
+// The necessary files from the deal.II library.
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/timer.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/distributed/tria.h>
+
+// This includes the data structures for the efficient implementation of
+// matrix-free methods.
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/matrix_free/matrix_free.h>
+#include <deal.II/matrix_free/fe_evaluation.h>
+
+#include <fstream>
+#include <iostream>
+#include <iomanip>
+
+
+namespace Step48
+{
+  using namespace dealii;
+
+  // We start by defining two global variables to collect all parameters
+  // subject to changes at one place: One for the dimension and one for the
+  // finite element degree. The dimension is used in the main function as a
+  // template argument for the actual classes (like in all other deal.II
+  // programs), whereas the degree of the finite element is more crucial, as
+  // it is passed as a template argument to the implementation of the
+  // Sine-Gordon operator. Therefore, it needs to be a compile-time constant.
+  const unsigned int dimension = 2;
+  const unsigned int fe_degree = 4;
+
+
+  // @sect3{SineGordonOperation}
+
+  // The <code>SineGordonOperation</code> class implements the cell-based
+  // operation that is needed in each time step. This nonlinear operation can
+  // be implemented straight-forwardly based on the <code>MatrixFree</code>
+  // class, in the same way as a linear operation would be treated by this
+  // implementation of the finite element operator application. We apply two
+  // template arguments to the class, one for the dimension and one for the
+  // degree of the finite element. This is a difference to other functions in
+  // deal.II where only the dimension is a template argument. This is
+  // necessary to provide the inner loops in @p FEEvaluation with information
+  // about loop lengths etc., which is essential for efficiency. On the other
+  // hand, it makes it more challenging to implement the degree as a run-time
+  // parameter.
+  template <int dim, int fe_degree>
+  class SineGordonOperation
+  {
+  public:
+    SineGordonOperation(const MatrixFree<dim,double> &data_in,
+                        const double                  time_step);
+
+    void apply (parallel::distributed::Vector<double>                     &dst,
+                const std::vector<parallel::distributed::Vector<double>*> &src) const;
+
+  private:
+    const MatrixFree<dim,double>         &data;
+    const VectorizedArray<double>         delta_t_sqr;
+    parallel::distributed::Vector<double> inv_mass_matrix;
+
+    void local_apply (const MatrixFree<dim,double>               &data,
+                      parallel::distributed::Vector<double>      &dst,
+                      const std::vector<parallel::distributed::Vector<double>*> &src,
+                      const std::pair<unsigned int,unsigned int> &cell_range) const;
+  };
+
+
+
+  // @sect4{SineGordonOperation::SineGordonOperation}
+
+  // This is the constructor of the SineGordonOperation class. It receives a
+  // reference to the MatrixFree holding the problem information and the time
+  // step size as input parameters. The initialization routine sets up the
+  // mass matrix. Since we use Gauss-Lobatto elements, the mass matrix is a
+  // diagonal matrix and can be stored as a vector. The computation of the
+  // mass matrix diagonal is simple to achieve with the data structures
+  // provided by FEEvaluation: Just loop over all (macro-) cells and integrate
+  // over the function that is constant one on all quadrature points by using
+  // the <code>integrate</code> function with @p true argument at the slot for
+  // values. Finally, we invert the diagonal entries since we have to multiply
+  // by the inverse mass matrix in each time step.
+  template <int dim, int fe_degree>
+  SineGordonOperation<dim,fe_degree>::
+  SineGordonOperation(const MatrixFree<dim,double> &data_in,
+                      const double                  time_step)
+    :
+    data(data_in),
+    delta_t_sqr(make_vectorized_array(time_step *time_step))
+  {
+    VectorizedArray<double> one = make_vectorized_array (1.);
+
+    data.initialize_dof_vector (inv_mass_matrix);
+
+    FEEvaluation<dim,fe_degree>   fe_eval(data);
+    const unsigned int            n_q_points = fe_eval.n_q_points;
+
+    for (unsigned int cell=0; cell<data.n_macro_cells(); ++cell)
+      {
+        fe_eval.reinit(cell);
+        for (unsigned int q=0; q<n_q_points; ++q)
+          fe_eval.submit_value(one,q);
+        fe_eval.integrate (true,false);
+        fe_eval.distribute_local_to_global (inv_mass_matrix);
+      }
+
+    inv_mass_matrix.compress(VectorOperation::add);
+    for (unsigned int k=0; k<inv_mass_matrix.local_size(); ++k)
+      if (inv_mass_matrix.local_element(k)>1e-15)
+        inv_mass_matrix.local_element(k) = 1./inv_mass_matrix.local_element(k);
+      else
+        inv_mass_matrix.local_element(k) = 0;
+  }
+
+
+
+  // @sect4{SineGordonOperation::local_apply}
+
+  // This operator implements the core operation of the program, the
+  // integration over a range of cells for the nonlinear operator of the
+  // Sine-Gordon problem. The implementation is based on the FEEvaluation
+  // class as in step-37. Due to the special structure in Gauss-Lobatto
+  // elements, certain operations become simpler, in particular the evaluation
+  // of shape function values on quadrature points which is simply the
+  // injection of the values of cell degrees of freedom. The MatrixFree class
+  // detects possible structure of the finite element at quadrature points
+  // when initializing, which is then used by FEEvaluation for selecting the
+  // most appropriate numerical kernel.
+
+  // The nonlinear function that we have to evaluate for the time stepping
+  // routine includes the value of the function at the present time @p current
+  // as well as the value at the previous time step @p old. Both values are
+  // passed to the operator in the collection of source vectors @p src, which is
+  // simply a <tt>std::vector</tt> of pointers to the actual solution
+  // vectors. This construct of collecting several source vectors into one is
+  // necessary as the cell loop in @p MatrixFree takes exactly one source and
+  // one destination vector, even if we happen to use many vectors like the two
+  // in this case. Note that the cell loop accepts any valid class for input and
+  // output, which does not only include vectors but general data types.
+  // However, only in case it encounters a parallel::distributed::Vector<Number>
+  // or a <tt>std::vector</tt> collecting these vectors, it calls functions that
+  // exchange data at the beginning and the end of the loop. In the loop over
+  // the cells, we first have to read in the values in the vectors related to
+  // the local values. Then, we evaluate the value and the gradient of the
+  // current solution vector and the values of the old vector at the quadrature
+  // points. Then, we combine the terms in the scheme in the loop over the
+  // quadrature points. Finally, we integrate the result against the test
+  // function and accumulate the result to the global solution vector @p dst.
+  template <int dim, int fe_degree>
+  void SineGordonOperation<dim, fe_degree>::
+  local_apply (const MatrixFree<dim>                      &data,
+               parallel::distributed::Vector<double>      &dst,
+               const std::vector<parallel::distributed::Vector<double>*> &src,
+               const std::pair<unsigned int,unsigned int> &cell_range) const
+  {
+    AssertDimension (src.size(), 2);
+    FEEvaluation<dim,fe_degree> current (data), old (data);
+    for (unsigned int cell=cell_range.first; cell<cell_range.second; ++cell)
+      {
+        current.reinit (cell);
+        old.reinit (cell);
+
+        current.read_dof_values (*src[0]);
+        old.read_dof_values     (*src[1]);
+
+        current.evaluate (true, true, false);
+        old.evaluate (true, false, false);
+
+        for (unsigned int q=0; q<current.n_q_points; ++q)
+          {
+            const VectorizedArray<double> current_value = current.get_value(q);
+            const VectorizedArray<double> old_value     = old.get_value(q);
+
+            current.submit_value (2.*current_value - old_value -
+                                  delta_t_sqr * std::sin(current_value),q);
+            current.submit_gradient (- delta_t_sqr *
+                                     current.get_gradient(q), q);
+          }
+
+        current.integrate (true,true);
+        current.distribute_local_to_global (dst);
+      }
+  }
+
+
+
+  //@sect4{SineGordonOperation::apply}
+
+  // This function performs the time stepping routine based on the cell-local
+  // strategy. First the destination vector is set to zero, then the cell-loop
+  // is called, and finally the solution is multiplied by the inverse mass
+  // matrix. The structure of the cell loop is implemented in the cell finite
+  // element operator class. On each cell it applies the routine defined as
+  // the <code>local_apply()</code> method of the class
+  // <code>SineGordonOperation</code>, i.e., <code>this</code>. One could also
+  // provide a function with the same signature that is not part of a class.
+  template <int dim, int fe_degree>
+  void SineGordonOperation<dim, fe_degree>::
+  apply (parallel::distributed::Vector<double>                     &dst,
+         const std::vector<parallel::distributed::Vector<double>*> &src) const
+  {
+    dst = 0;
+    data.cell_loop (&SineGordonOperation<dim,fe_degree>::local_apply,
+                    this, dst, src);
+    dst.scale(inv_mass_matrix);
+  }
+
+
+  //@sect3{Equation data}
+
+  // We define a time-dependent function that is used as initial
+  // value. Different solutions can be obtained by varying the starting
+  // time. This function has already been explained in step-25.
+  template <int dim>
+  class ExactSolution : public Function<dim>
+  {
+  public:
+    ExactSolution (const unsigned int n_components = 1,
+                   const double time = 0.) : Function<dim>(n_components, time) {}
+    virtual double value (const Point<dim> &p,
+                          const unsigned int component = 0) const;
+  };
+
+  template <int dim>
+  double ExactSolution<dim>::value (const Point<dim> &p,
+                                    const unsigned int /* component */) const
+  {
+    double t = this->get_time ();
+
+    const double m = 0.5;
+    const double c1 = 0.;
+    const double c2 = 0.;
+    const double factor = (m / std::sqrt(1.-m*m) *
+                           std::sin(std::sqrt(1.-m*m)*t+c2));
+    double result = 1.;
+    for (unsigned int d=0; d<dim; ++d)
+      result *= -4. * std::atan (factor / std::cosh(m*p[d]+c1));
+    return result;
+  }
+
+
+
+  // @sect3{SineGordonProblem class}
+
+  // This is the main class that builds on the class in step-25.  However, we
+  // replaced the SparseMatrix<double> class by the MatrixFree class to store
+  // the geometry data. Also, we use a distributed triangulation in this
+  // example.
+  template <int dim>
+  class SineGordonProblem
+  {
+  public:
+    SineGordonProblem ();
+    void run ();
+
+  private:
+    ConditionalOStream pcout;
+
+    void make_grid_and_dofs ();
+    void output_results (const unsigned int timestep_number);
+
+#ifdef DEAL_II_WITH_P4EST
+    parallel::distributed::Triangulation<dim>   triangulation;
+#else
+    Triangulation<dim>   triangulation;
+#endif
+    FE_Q<dim>            fe;
+    DoFHandler<dim>      dof_handler;
+    ConstraintMatrix     constraints;
+    IndexSet             locally_relevant_dofs;
+
+    MatrixFree<dim,double> matrix_free_data;
+
+    parallel::distributed::Vector<double> solution, old_solution, old_old_solution;
+
+    const unsigned int n_global_refinements;
+    double time, time_step;
+    const double final_time;
+    const double cfl_number;
+    const unsigned int output_timestep_skip;
+  };
+
+
+  //@sect4{SineGordonProblem::SineGordonProblem}
+
+  // This is the constructor of the SineGordonProblem class. The time interval
+  // and time step size are defined here. Moreover, we use the degree of the
+  // finite element that we defined at the top of the program to initialize a
+  // FE_Q finite element based on Gauss-Lobatto support points. These points
+  // are convenient because in conjunction with a QGaussLobatto quadrature
+  // rule of the same order they give a diagonal mass matrix without
+  // compromising accuracy too much (note that the integration is inexact,
+  // though), see also the discussion in the introduction.
+  template <int dim>
+  SineGordonProblem<dim>::SineGordonProblem ()
+    :
+    pcout (std::cout,
+           Utilities::MPI::this_mpi_process(MPI_COMM_WORLD)==0),
+#ifdef DEAL_II_WITH_P4EST
+    triangulation (MPI_COMM_WORLD),
+#endif
+    fe (QGaussLobatto<1>(fe_degree+1)),
+    dof_handler (triangulation),
+    n_global_refinements (10-2*dim),
+    time (-10),
+    final_time (10),
+    cfl_number (.1/fe_degree),
+    output_timestep_skip (200)
+  {}
+
+  //@sect4{SineGordonProblem::make_grid_and_dofs}
+
+  // As in step-25 this functions sets up a cube grid in <code>dim</code>
+  // dimensions of extent $[-15,15]$. We refine the mesh more in the center of
+  // the domain since the solution is concentrated there. We first refine all
+  // cells whose center is within a radius of 11, and then refine once more
+  // for a radius 6.  This simple ad hoc refinement could be done better by
+  // adapting the mesh to the solution using error estimators during the time
+  // stepping as done in other example programs, and using
+  // parallel::distributed::SolutionTransfer to transfer the solution to the
+  // new mesh.
+  template <int dim>
+  void SineGordonProblem<dim>::make_grid_and_dofs ()
+  {
+    GridGenerator::hyper_cube (triangulation, -15, 15);
+    triangulation.refine_global (n_global_refinements);
+    {
+      typename Triangulation<dim>::active_cell_iterator
+      cell = triangulation.begin_active(),
+      end_cell = triangulation.end();
+      for ( ; cell != end_cell; ++cell)
+        if (cell->is_locally_owned())
+          if (cell->center().norm() < 11)
+            cell->set_refine_flag();
+      triangulation.execute_coarsening_and_refinement();
+
+      cell = triangulation.begin_active();
+      end_cell = triangulation.end();
+      for ( ; cell != end_cell; ++cell)
+        if (cell->is_locally_owned())
+          if (cell->center().norm() < 6)
+            cell->set_refine_flag();
+      triangulation.execute_coarsening_and_refinement();
+    }
+
+    pcout << "   Number of global active cells: "
+#ifdef DEAL_II_WITH_P4EST
+          << triangulation.n_global_active_cells()
+#else
+          << triangulation.n_active_cells()
+#endif
+          << std::endl;
+
+    dof_handler.distribute_dofs (fe);
+
+    pcout << "   Number of degrees of freedom: "
+          << dof_handler.n_dofs()
+          << std::endl;
+
+
+    // We generate hanging node constraints for ensuring continuity of the
+    // solution. As in step-40, we need to equip the constraint matrix with
+    // the IndexSet of locally relevant degrees of freedom to avoid it to
+    // consume too much memory for big problems. Next, the <code> MatrixFree
+    // </code> for the problem is set up. Note that we specify the MPI
+    // communicator which we are going to use, and that we also want to use
+    // shared-memory parallelization (hence one would use multithreading for
+    // intra-node parallelism and not MPI; note that we here choose the
+    // standard option — if we wanted to disable shared memory
+    // parallelization, we would choose @p none). Finally, three solution
+    // vectors are initialized. MatrixFree stores the layout that is to be
+    // used by distributed vectors, so we just ask it to initialize the
+    // vectors.
+    DoFTools::extract_locally_relevant_dofs (dof_handler,
+                                             locally_relevant_dofs);
+    constraints.clear();
+    constraints.reinit (locally_relevant_dofs);
+    DoFTools::make_hanging_node_constraints (dof_handler, constraints);
+    constraints.close();
+
+    QGaussLobatto<1> quadrature (fe_degree+1);
+    typename MatrixFree<dim>::AdditionalData additional_data;
+    additional_data.mpi_communicator = MPI_COMM_WORLD;
+    additional_data.tasks_parallel_scheme =
+      MatrixFree<dim>::AdditionalData::partition_partition;
+
+    matrix_free_data.reinit (dof_handler, constraints,
+                             quadrature, additional_data);
+
+    matrix_free_data.initialize_dof_vector (solution);
+    old_solution.reinit (solution);
+    old_old_solution.reinit (solution);
+  }
+
+
+
+  //@sect4{SineGordonProblem::output_results}
+
+  // This function prints the norm of the solution and writes the solution
+  // vector to a file. The norm is standard (except for the fact that we need
+  // to be sure to only count norms on locally owned cells), and the second is
+  // similar to what we did in step-40. Note that we can use the same vector
+  // for output as we used for computation: The vectors in the matrix-free
+  // framework always provide full information on all locally owned cells
+  // (this is what is needed in the local evaluations, too), including ghost
+  // vector entries on these cells. This is the only data that is needed in
+  // the integrate_difference function as well as in DataOut. We only need to
+  // make sure that we tell the vector to update its ghost values before we
+  // read them. This is a feature present only in the
+  // parallel::distributed::Vector class. Distributed vectors with PETSc and
+  // Trilinos, on the other hand, need to be copied to special vectors
+  // including ghost values (see the relevant section in step-40). If we
+  // wanted to access all degrees of freedom on ghost cells, too (e.g. when
+  // computing error estimators that use the jump of solution over cell
+  // boundaries), we would need more information and create a vector
+  // initialized with locally relevant dofs just as in step-40. Observe also
+  // that we need to distribute constraints for output - they are not filled
+  // during computations (rather, they are distributed on the fly in the
+  // matrix-free method read_dof_values).
+  template <int dim>
+  void
+  SineGordonProblem<dim>::output_results (const unsigned int timestep_number)
+  {
+    constraints.distribute (solution);
+
+    Vector<float> norm_per_cell (triangulation.n_active_cells());
+    solution.update_ghost_values();
+    VectorTools::integrate_difference (dof_handler,
+                                       solution,
+                                       ZeroFunction<dim>(),
+                                       norm_per_cell,
+                                       QGauss<dim>(fe_degree+1),
+                                       VectorTools::L2_norm);
+    const double solution_norm =
+      std::sqrt(Utilities::MPI::sum (norm_per_cell.norm_sqr(), MPI_COMM_WORLD));
+
+    pcout << "   Time:"
+          << std::setw(8) << std::setprecision(3) << time
+          << ", solution norm: "
+          << std::setprecision(5) << std::setw(7) << solution_norm
+          << std::endl;
+
+    DataOut<dim> data_out;
+
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "solution");
+    data_out.build_patches ();
+
+    const std::string filename =
+      "solution-" + Utilities::int_to_string (timestep_number, 3);
+
+    std::ofstream output ((filename +
+                           "." + Utilities::int_to_string (Utilities::MPI::
+                                                           this_mpi_process(MPI_COMM_WORLD),4) + ".vtu").c_str());
+    data_out.write_vtu (output);
+
+    if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
+      {
+        std::vector<std::string> filenames;
+        for (unsigned int i=0;
+             i<Utilities::MPI::n_mpi_processes (MPI_COMM_WORLD); ++i)
+          filenames.push_back ("solution-" +
+                               Utilities::int_to_string (timestep_number, 3) +
+                               "." +
+                               Utilities::int_to_string (i, 4) +
+                               ".vtu");
+
+        std::ofstream master_output ((filename + ".pvtu").c_str());
+        data_out.write_pvtu_record (master_output, filenames);
+      }
+  }
+
+
+  // @sect4{SineGordonProblem::run}
+
+  // This function is called by the main function and calls the subroutines of
+  // the class.
+  //
+  // The first step is to set up the grid and the cell operator. Then, the
+  // time step is computed from the CFL number given in the constructor and
+  // the finest mesh size. The finest mesh size is computed as the diameter of
+  // the last cell in the triangulation, which is the last cell on the finest
+  // level of the mesh. This is only possible for Cartesian meshes, otherwise,
+  // one needs to loop over all cells. Note that we need to query all the
+  // processors for their finest cell since the not all processors might hold
+  // a region where the mesh is at the finest level. Then, we readjust the
+  // time step a little to hit the final time exactly.
+  template <int dim>
+  void
+  SineGordonProblem<dim>::run ()
+  {
+    make_grid_and_dofs();
+
+    const double local_min_cell_diameter =
+      triangulation.last()->diameter()/std::sqrt(dim);
+    const double global_min_cell_diameter
+      = -Utilities::MPI::max(-local_min_cell_diameter, MPI_COMM_WORLD);
+    time_step = cfl_number * global_min_cell_diameter;
+    time_step = (final_time-time)/(int((final_time-time)/time_step));
+    pcout << "   Time step size: " << time_step << ", finest cell: "
+          << global_min_cell_diameter << std::endl << std::endl;
+
+    // Next the initial value is set. Since we have a two-step time stepping
+    // method, we also need a value of the solution at time-time_step. For
+    // accurate results, one would need to compute this from the time
+    // derivative of the solution at initial time, but here we ignore this
+    // difficulty and just set it to the initial value function at that
+    // artificial time.
+
+    // We create an output of the initial value. Then we also need to collect
+    // the two starting solutions in a <tt>std::vector</tt> of pointers field
+    // and to set up an instance of the <code> SineGordonOperation class </code>
+    // based on the finite element degree specified at the top of this file.
+    VectorTools::interpolate (dof_handler,
+                              ExactSolution<dim> (1, time),
+                              solution);
+    VectorTools::interpolate (dof_handler,
+                              ExactSolution<dim> (1, time-time_step),
+                              old_solution);
+    output_results (0);
+
+    std::vector<parallel::distributed::Vector<double>*> previous_solutions;
+    previous_solutions.push_back(&old_solution);
+    previous_solutions.push_back(&old_old_solution);
+
+    SineGordonOperation<dim,fe_degree> sine_gordon_op (matrix_free_data,
+                                                       time_step);
+
+    // Now loop over the time steps. In each iteration, we shift the solution
+    // vectors by one and call the <code> apply </code> function of the <code>
+    // SineGordonOperator </code>. Then, we write the solution to a file. We
+    // clock the wall times for the computational time needed as wall as the
+    // time needed to create the output and report the numbers when the time
+    // stepping is finished.
+    //
+    // Note how this shift is implemented: We simply call the swap method on
+    // the two vectors which swaps only some pointers without the need to copy
+    // data around. Obviously, this is a more efficient way to update the
+    // vectors during time stepping. Let us see what happens in more detail:
+    // First, we exchange <code>old_solution</code> with
+    // <code>old_old_solution</code>, which means that
+    // <code>old_old_solution</code> gets <code>old_solution</code>, which is
+    // what we expect. Similarly, <code>old_solution</code> gets the content
+    // from <code>solution</code> in the next step. Afterward,
+    // <code>solution</code> holds <code>old_old_solution</code>, but that
+    // will be overwritten during this step.
+    unsigned int timestep_number = 1;
+
+    Timer timer;
+    double wtime = 0;
+    double output_time = 0;
+    for (time+=time_step; time<=final_time; time+=time_step, ++timestep_number)
+      {
+        timer.restart();
+        old_old_solution.swap (old_solution);
+        old_solution.swap (solution);
+        sine_gordon_op.apply (solution, previous_solutions);
+        wtime += timer.wall_time();
+
+        timer.restart();
+        if (timestep_number % output_timestep_skip == 0)
+          output_results(timestep_number / output_timestep_skip);
+
+        output_time += timer.wall_time();
+      }
+    timer.restart();
+    output_results(timestep_number / output_timestep_skip + 1);
+    output_time += timer.wall_time();
+
+    pcout << std::endl
+          << "   Performed " << timestep_number << " time steps."
+          << std::endl;
+
+    pcout << "   Average wallclock time per time step: "
+          << wtime / timestep_number << "s" << std::endl;
+
+    pcout << "   Spent " << output_time << "s on output and "
+          << wtime << "s on computations." << std::endl;
+  }
+}
+
+
+
+// @sect3{The <code>main</code> function}
+
+// As in step-40, we initialize MPI at the start of the program. Since we will
+// in general mix MPI parallelization with threads, we also set the third
+// argument in MPI_InitFinalize that controls the number of threads to an
+// invalid number, which means that the TBB library chooses the number of
+// threads automatically, typically to the number of available cores in the
+// system. As an alternative, you can also set this number manually if you
+// want to set a specific number of threads (e.g. when MPI-only is required).
+int main (int argc, char **argv)
+{
+  using namespace Step48;
+  using namespace dealii;
+
+  Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv,
+                                                       numbers::invalid_unsigned_int);
+
+  try
+    {
+      SineGordonProblem<dimension> sg_problem;
+      sg_problem.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-49/CMakeLists.txt b/examples/step-49/CMakeLists.txt
new file mode 100644
index 0000000..61c4a84
--- /dev/null
+++ b/examples/step-49/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-1 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-49")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#   FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#   FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#   SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-49/doc/builds-on b/examples/step-49/doc/builds-on
new file mode 100644
index 0000000..4512741
--- /dev/null
+++ b/examples/step-49/doc/builds-on
@@ -0,0 +1 @@
+step-1
diff --git a/examples/step-49/doc/intro.dox b/examples/step-49/doc/intro.dox
new file mode 100644
index 0000000..83f3239
--- /dev/null
+++ b/examples/step-49/doc/intro.dox
@@ -0,0 +1,294 @@
+<i>This program was contributed by Timo Heister. Parts of the results section
+were contributed by Yuhan Zhou and Wolfgang Bangerth.</i>
+
+<a name="Intro"></a>
+<h1> Introduction </h1>
+This tutorial is an extension to step-1 and demonstrates several ways to
+obtain more involved meshes than the ones shown there.
+
+Generating complex geometries is a challenging task, especially in three space
+dimensions. We will discuss several ways to do this, but this list is not
+exhaustive. Additionally, there is not one approach that fits all problems.
+
+This example program shows some of ways to create and modify meshes for
+computations and outputs them as <code>.eps</code> files in much the same way
+as we do in step-1. No other computations or adaptive
+refinements are done; the idea is that you can use the techniques used here as
+building blocks in other, more involved simulators. Please note that the
+example program does not show all the ways to generate meshes that are
+discussed in this introduction.
+
+
+<h3>General concerns about meshes</h3>
+
+When you use adaptive mesh refinement, you definitely want the initial mesh to
+be as coarse as possible. The reason is that you can make it as fine as you
+want using adaptive refinement as long as you have memory and CPU time
+available. However, this requires that you don't waste mesh cells in parts of
+the domain where they don't pay off. As a consequence, you don't want to start
+with a mesh that is too fine to start with, because that takes up a good part
+of your cell budget already, and because you can't coarsen away cells that are
+in the initial mesh.
+
+That said, your mesh needs to capture the given geometry adequately.
+
+
+<h3>How to create meshes</h3>
+
+There are several ways to create an initial mesh. Meshes can be modified or
+combined in many ways as discussed later on.
+
+<h4>Using GridGenerator</h4>
+
+The easiest way to generate meshes is to use the functions in namespace
+GridGenerator, as already discussed in step-1.  There are many different
+helper functions
+available, including GridGenerator::hyper_cube(),
+GridGenerator::hyper_shell(), GridGenerator::hyper_ball(),
+GridGenerator::hyper_cube_with_cylindrical_hole(), ...
+
+
+<h4>Constructing your own mesh programmatically</h4>
+
+If there is no good fit in the GridGenerator namespace for what you want to
+do, you can always create a
+Triangulation in your program "by hand". For that, you need a list of vertices
+with their coordinates and a list of cells referencing those vertices. You can
+find an example in the function create_coarse_grid in step-14.
+All the functions in GridGenerator are implemented in this fashion.
+
+We are happy to accept more functions to be added to GridGenerator. So, if
+you end up writing a function that might be useful for a larger audience,
+please contribute it.
+
+
+<h4>Importing from external programs</h4>
+
+The class GridIn can read many different mesh formats from a file from
+disk. How this is done is explained in step-5 and can be seen in the function
+<code>grid_1</code> in this example, see the code below.
+
+Meshes can be generated from different tools like <a
+href="http://geuz.org/gmsh/" target="_top">gmsh</a>, <a
+href="https://lagrit.lanl.gov/" target="_top">lagrit</a> and <a
+href="http://cubit.sandia.gov/" target="_top">cubit</a>. See the
+documentation of GridIn for more information. The problem is that deal.II
+needs meshes that only consist of quads and hexas -- tetrahedral meshes won't
+work (this means tools like tetgen can not be used directly).
+
+We will describe a possible workflow using Gmsh. Gmsh is the smallest and
+most quickly set up open source tool we are aware of. It can generate
+unstructured 2d quad meshes, but in 3d it can only extrude 2d meshes to
+get hexahedral meshes. 3D meshing of unstructured geometry into hexahedra is not
+supported at the time of writing this tutorial (early 2013).
+
+In gmsh, a mesh is described in a text based <code>.geo</code> file, that can
+contain computations, loops, variables, etc. It is very flexible. The mesh is
+generated from a surface representation, which is build from a list of line
+loops, which is build from a list of lines, which are in turn built from
+points. The <code>.geo</code> script can be written and edited by hand or it
+can be generated automatically by creating objects graphically inside gmsh. In
+many cases it is best to combine both approaches. The file can be easily
+reloaded by pressing "reload" under the "Geometry" tab.
+
+This tutorial contains an example <code>.geo</code> file, that describes a box
+with two objects cut out in the interior. This is how
+<code>untitled.geo</code> looks like in gmsh (displaying the boundary
+indicators as well as the mesh discussed further down below):
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.gmsh_picture.png" alt="">
+
+You might want to open the <code>untitled.geo</code> file in a text editor (it
+is located in the same directory as the <code>step-49.cc</code> source file) to
+see how it is structured. You can see how the boundary of the domain is
+composed of a number of lines and how later on we combine several lines into
+"physical lines" (or "physical surfaces") that list the logical lines'
+numbers. "Physical" object are the ones that carry information about the
+boundary indicator (see @ref GlossBoundaryIndicator "this glossary entry").
+
+ at note It is important that this file contain "physical lines" and "physical
+surfaces". These give the boundary indicators and material ids for use
+in deal.II. Without these physical entities, nothing will be imported into
+deal.II.
+
+deal.II's GridIn class can read the <code>.msh</code> format written by
+gmsh and that contains a mesh created for the geometry described by the
+<code>.geo</code> file. You generate the <code>.msh</code> from the
+<code>.geo</code> by running the commands
+
+ at code
+gmsh -2 untitled.geo
+ at endcode
+
+on the command line, or by clicking "Mesh" and then "2D" inside Gmsh after
+loading the file.  Now this is the mesh read from the <code>.msh</code> file
+and saved again by deal.II as an image (see the <code>grid_1</code> function
+of the current program):
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.grid-1.png" alt="">
+
+
+<h3>Modifying a Mesh</h3>
+
+After acquiring one (or several) meshes in the ways described above, there are
+many ways to manipulate them before using them in a finite element
+computation.
+
+
+<h4>Transformations</h4>
+
+The GridTools namespace contains a collection of small functions to transform
+a given mesh in various ways. The usage of the functions GridTools::shift,
+GridTools::rotate, GridTools::scale is fairly obvious, so we won't discuss
+those functions here.
+
+The function GridTools::transform allows you to transform the vertices of a
+given mesh using a smooth function. An example of its use is also given in the
+results section of step-38 but let us show a simpler example here:
+In the function <code>grid_5()</code> of the current program, we perturb the y
+coordinate of a mesh with a sine curve:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-5a.png" alt=""> regular input mesh
+    </td>
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-5.png" alt=""> output mesh
+    </td>
+  </tr>
+</TABLE>
+
+Similarly, we can transform a regularly refined
+unit square to a wall-adapted mesh in y direction using the formula
+$(x,y) \mapsto (x,\tanh(2*y)/\tanh(2))$. This is done in <code>grid_6()</code>
+of this tutorial:
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-6a.png" alt=""> regular input mesh
+    </td>
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-6.png" alt=""> wall-adapted output mesh
+    </td>
+  </tr>
+</TABLE>
+
+Finally, the function GridTools::distort_random allows you to move vertices in the
+mesh (optionally ignoring boundary nodes) by a random amount. This is
+demonstrated in <code>grid_7()</code> and the result is as follows:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-7a.png" alt=""> regular input mesh
+    </td>
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-7.png" alt=""> perturbed output mesh
+    </td>
+  </tr>
+</TABLE>
+
+This function is primarily intended to negate some of the superconvergence
+effects one gets when studying convergence on regular meshes, as well as to
+suppress some optimizations in deal.II that can exploit the fact that cells
+are similar in shape. In practice, it is of course always better to
+work with a sequence of unstructured meshes (see possible extensions at the
+end of the this section).
+
+
+<h4>Merging Meshes</h4>
+
+The function GridGenerator::merge_triangulations() allows you to merge two
+given Triangulation objects into a single one.  For this to work, the vertices
+of the shared edge or face have to match exactly.  Lining up the two meshes
+can be achieved using GridTools::shift and GridTools::scale.  In the function
+<code>grid_2()</code> of this tutorial, we merge a square with a round hole
+(generated with GridGenerator::hyper_cube_with_cylindrical_hole()) and a
+rectangle (generated with GridGenerator::subdivided_hyper_rectangle()). The
+function GridGenerator::subdivided_hyper_rectangle() allows you to specify the
+number of repetitions and the positions of the corners, so there is no need to
+shift the triangulation manually here. You should inspect the mesh graphically
+to make sure that cells line up correctly and no unpaired nodes exist in the
+merged Triangulation.
+
+These are the input meshes and the output mesh:
+
+<table width="80%" align="center">
+  <tr>
+    <td ALIGN="center"><img src="http://www.dealii.org/images/steps/developer/step-49.grid-2a.png" alt="" height="200px">input mesh 1</td>
+    <td ALIGN="center"><img src="http://www.dealii.org/images/steps/developer/step-49.grid-2b.png" alt="" height="200px">input mesh 2</td>
+    <td ALIGN="center"><img src="http://www.dealii.org/images/steps/developer/step-49.grid-2.png" alt="" height="200px">merged mesh</td>
+  </tr>
+</table>
+
+
+<h4>Moving Vertices</h4>
+
+The function <code>grid_3()</code> demonstrates the ability to pick individual vertices and
+move them around in an existing mesh. Note that this has the potential to produce degenerate
+or inverted cells and you shouldn't expect anything useful to come of using
+such meshes. Here, we create a box with a cylindrical hole that is not exactly
+centered by moving the top vertices upwards:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-3a.png" alt="" height="200px"> input mesh
+    </td>
+
+    <td ALIGN="center">
+        <img src="http://www.dealii.org/images/steps/developer/step-49.grid-3.png" alt="" height="200px"> top vertices moved upwards
+    </td>
+  </tr>
+</table>
+
+For the exact way how this is done, see the code below.
+
+
+<h4>Extruding Meshes</h4>
+
+If you need a 3d mesh that can be created by extruding a given 2d mesh (that
+can be created in any of the ways given above), you can use the function
+GridGenerator::extrude_triangulation(). See the <code>grid_4()</code> function
+in this tutorial for an example. Note that for this particular case, the given
+result could also be achieved using the 3d version of
+GridGenerator::hyper_cube_with_cylindrical_hole(). The main usage is a 2d
+mesh, generated for example with gmsh, that is read in from a
+<code>.msh</code> file as described above. This is the output from grid_4():
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-49.grid-4base.png" alt=""> input mesh
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-49.grid-4.png" alt=""> extruded output mesh
+    </td>
+  </tr>
+</table>
+
+
+<h3> After you have a coarse mesh </h3>
+
+Creating a coarse mesh using the methods discussed above is only the first
+step. When you have it, it will typically serve as the basis for further mesh
+refinement. This is not difficult — in fact, there is nothing else to do
+— if your geometry consists of only straight faces. However, this is
+often not the case if you have a more complex geometry and more steps than
+just creating the mesh are necessary. We will go over some of these steps in
+the <a href="#Results">results section</a> below.
+
+
+<!--
+
+Possible Extensions
+
+- Modify a mesh:
+  - change boundary indicators
+  - relax inner vertices
+- Database of unstructured meshes for convergence studies
+- discuss GridTools::extract_boundary_mesh
+- how to remove or disable a cell inside a mesh
+-->
diff --git a/examples/step-49/doc/kind b/examples/step-49/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-49/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-49/doc/results.dox b/examples/step-49/doc/results.dox
new file mode 100644
index 0000000..ed20909
--- /dev/null
+++ b/examples/step-49/doc/results.dox
@@ -0,0 +1,382 @@
+<h1>Results</h1>
+
+The program produces a series of <code>.eps</code> files of the
+triangulations. The methods are discussed above.
+
+
+<h3>Next steps: Curved boundaries</h3>
+
+As mentioned in the introduction,
+creating a coarse mesh using the methods discussed here is only the first
+step. In order to refine a mesh, the Triangulation needs to know where to put
+new vertices on the mid-points of edges and faces. By default, these new
+points will be placed at the centers of the old edge but this isn't what you
+want if you need curved boundaries that aren't already adequately resolved by
+the coarse mesh. Several of the meshes shown in the introduction section fall
+into this category. For example, for this mesh the central hole is supposed to
+be round:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.grid-2a.png" alt="" height="200px">
+
+On the other hand, if you simply refine it, the Triangulation class can not
+know whether you wanted the hole to be round or to be an octagon. The default
+is to place new points along existing edges. After two mesh refinement steps,
+this would yield the following mesh, which is not what we wanted:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.grid-2d-refined.png" alt="" height="200px">
+
+What needs to happen is that you tell the triangulation that you in fact want
+to use a curved boundary. The way to do this requires three steps:
+- Create an object that describes the boundary in terms that allow the
+  Triangulation::execute_coarsening_and_refinement function to ask the
+  boundary description where a new point should be located upon mesh
+  refinement.
+- Tell the triangulation object that you want this object to be used for all
+  boundaries with boundary indicates equal to a particular value (for more
+  information on boundary indicators, see the
+  @ref GlossBoundaryIndicator "glossary entry on this topic".)
+- Mark those parts of the boundary of the domain for which you want the
+  boundary to be so treated with the value of the boundary indicator used in
+  the previous step. (The order of this step and the previous one does not
+  matter.)
+
+To illustrate this process in more detail, let us consider an example created
+by Yuhan Zhou as part of a 2013 semester project at Texas A&M University.
+The goal was to generate (and use) a geometry that describes a
+microstructured electric device. In a CAD program, the geometry looks like
+this:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.yuhan.1.png" alt="">
+
+In the following, we will walk you through the entire process of creating a
+mesh for this geometry, including a number of common pitfalls by showing the
+things that can go wrong.
+
+The first step in getting there was to create a coarse mesh, which was done
+by creating a 2d coarse mesh for each of the two cross section, extruding them
+into the third direction, and gluing them together. The following code does
+this, using the techniques previously described:
+
+ at code
+// Given a list of points and how vertices connect to cells,
+// create a mesh. This is in the same way as we do in step 14.
+void create_2d_grid (const Point<2> vertices_1[],
+	  	     const unsigned int n_vertices,
+		     const int cell_vertices[][4],
+		     const unsigned int n_cells,
+                     Triangulation<2> &coarse_grid)
+{
+  const std::vector<Point<2> > vertices (&vertices_1[0],
+			    	         &vertices_1[n_vertices]);
+
+  std::vector<CellData<2> > cells (n_cells, CellData<2>());
+  for (unsigned int i=0; i<n_cells; ++i)
+    {
+      for (unsigned int j=0;
+	   j<GeometryInfo<2>::vertices_per_cell;
+	   ++j)
+	cells[i].vertices[j] = cell_vertices[i][j];
+    }
+
+  coarse_grid.create_triangulation (vertices,
+				    cells,
+				    SubCellData());
+}
+
+
+// Create a triangulation that covers the entire volume
+void create_3d_grid (Triangulation<3> &triangulation)
+{
+  // Generate first cross section
+  const Point<2> vertices_1[]
+    = {  Point<2> (-1.5,   0.),
+	 Point<2> (-0.5,   0.),
+	 Point<2> ( 0.5,   0.),
+	 Point<2> ( 1.5,   0.),
+
+	 Point<2> (-1.5,  1.5),
+	 Point<2> (-0.5,  1.5),
+	 Point<2> ( 0.5,  1.5),
+	 Point<2> ( 1.5,  1.5),
+
+	 Point<2> (-1.5,   3.),
+	 Point<2> (-0.5,   3.),
+	 Point<2> ( 0.5,   3.),
+	 Point<2> ( 1.5,   3.),
+
+	 Point<2> (-0.5,   3+0.5*sqrt(3)),
+	 Point<2> ( 0.5,   3+0.5*sqrt(3)),
+
+	 Point<2> (-0.75,  3+0.75*sqrt(3)),
+	 Point<2> ( 0.75,  3+0.75*sqrt(3))
+  };
+  const int cell_vertices_1[][GeometryInfo<2>::vertices_per_cell]
+    = {{0, 1, 4, 5},
+       {1, 2, 5, 6},
+       {3, 7, 2, 6},
+       {4, 5, 8, 9},
+       {5, 6, 9, 10},
+       {7,11, 6,10},
+       {8, 9, 14,12},
+       {9, 10,12,13},
+       {11,15,10,13},
+       {14,12,15,13}
+  };
+
+  // Copy vertices into a 2d triangulation
+  Triangulation<2> triangulation_2d_1;
+  create_2d_grid (vertices_1,
+                  sizeof(vertices_1)/sizeof(vertices_1[0]),
+		  cell_vertices_1,
+                  sizeof(cell_vertices_1)/sizeof(cell_vertices_1[0]),
+                  triangulation_2d_1);
+
+  // Then extrude it into a 3d piece
+  Triangulation<3> triangulation_3d_1;
+  GridGenerator::extrude_triangulation (triangulation_2d_1,
+                                        5, 2.5,
+                                        triangulation_3d_1);
+
+  // Now do the same with the second volume
+  const Point<2> vertices_2[]
+    = {  Point<2> (-2.5,   0.),
+	 Point<2> (-1.5,   0.),
+	 Point<2> (-0.5,   0.),
+	 Point<2> ( 0.5,   0.),
+	 Point<2> ( 1.5,   0.),
+	 Point<2> ( 2.5,   0.),
+
+	 Point<2> (-2.5,  1.5),
+	 Point<2> (-1.5,  1.5),
+	 Point<2> (-0.5,  1.5),
+	 Point<2> ( 0.5,  1.5),
+	 Point<2> ( 1.5,  1.5),
+	 Point<2> ( 2.5,  1.5),
+
+	 Point<2> (-2.5,  3.),
+	 Point<2> (-1.5,  3.),
+	 Point<2> (-0.5,  3.),
+	 Point<2> ( 0.5,  3.),
+	 Point<2> ( 1.5,  3.),
+	 Point<2> ( 2.5,  3.),
+
+	 Point<2> (-0.5,   3.+0.5*sqrt(3)),
+	 Point<2> ( 0.5,   3.+0.5*sqrt(3)),
+
+	 Point<2> (-0.75,  3.+0.75*sqrt(3)),
+	 Point<2> ( 0.75,  3.+0.75*sqrt(3)),
+
+	 Point<2> (-1.25,  3.+1.25*sqrt(3)),
+	 Point<2> ( 1.25,  3.+1.25*sqrt(3))
+  };
+  const int cell_vertices_2[][GeometryInfo<2>::vertices_per_cell]
+    = {{0, 1, 6, 7},
+       {1, 2, 7, 8},
+       {2, 3, 8, 9},
+       {4, 10, 3, 9},
+       {5, 11, 4, 10},
+       {6, 7, 12, 13},
+       {7, 8, 13, 14},
+       {8, 9, 14, 15},
+       {10, 16, 9, 15},
+       {11, 17, 10, 16},
+       {12, 13, 22, 20},
+       {13, 14, 20, 18},
+       {14, 15, 18, 19},
+       {16, 21, 15, 19},
+       {17, 23, 16, 21},
+       {20, 18, 21, 19},
+       {22, 20, 23, 21}
+  };
+
+  Triangulation<2> triangulation_2d_2;
+  create_2d_grid (vertices_2,
+                  sizeof(vertices_2)/sizeof(vertices_2[0]),
+		  cell_vertices_2,
+                  sizeof(cell_vertices_2)/sizeof(cell_vertices_2[0]),
+                  triangulation_2d_2);
+
+  Triangulation<3> triangulation_3d_2;
+  GridGenerator::extrude_triangulation (triangulation_2d_2,
+                                        5, 2.5,
+                                        triangulation_3d_2);
+
+  // Also shift this triangulation in the z-direction so
+  // that it matches the end face of the first part
+  GridTools::shift (Point<3>(0,0,2.5),
+                    triangulation_3d_2);
+
+
+  // Now first merge these two pieces, then shift the
+  // first piece in z-direction beyond the second, and
+  // merge the shifted piece with the two previously
+  // merged one into the final one:
+  Triangulation<3> triangulation_3d_tmp;
+  GridGenerator::merge_triangulations (triangulation_3d_1,
+                                       triangulation_3d_2,
+                                       triangulation_3d_tmp);
+
+  GridTools::shift (Point<3>(0,0,5),
+                    triangulation_3d_1);
+
+  GridGenerator::merge_triangulations (triangulation_3d_tmp,
+                                       triangulation_3d_1,
+                                       triangulation);
+}
+ at endcode
+
+With this code, you get a mesh that looks like this:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.yuhan.2.png" alt="">
+
+The next step is to teach each of the top surfaces that they should be
+curved. We can do this by creating CylinderBoundary objects that
+describe this. A first attempt looks like this:
+
+ at code
+  Triangulation<3> triangulation;
+  create_3d_grid (triangulation);
+
+  // Create the objects that describe the boundaries and attach them
+  // to the triangulation as the ones to use on boundaries marked
+  // with boundary indicators 8 and 9
+  const double inner_radius = 1.5;
+  const double outer_radius = 2.5;
+
+  static const CylinderBoundary<3> inner_cylinder(inner_radius, 2);
+  static const CylinderBoundary<3> outer_cylinder(outer_radius, 2);
+
+  triangulation.set_boundary (8, inner_cylinder);
+  triangulation.set_boundary (9, outer_cylinder);
+
+  // Then loop over all faces of the domain and, if for the position
+  // of the center of a face the following holds then set boundary
+  // indicators:
+  // - if y>3 and z<=2.5 or z>=5 then use boundary indicator 8
+  // - if y>3 and 2.5<=z<=5 then use boundary indicator 9
+  typename Triangulation<3>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+  for (; cell!=endc; ++cell)
+    for (unsigned int f=0;
+	 f < GeometryInfo<3>::faces_per_cell;
+	 ++f)
+      {
+	const Point<3> face_center = cell->face(f)->center();
+
+	if (cell->face(f)->at_boundary())
+	  {
+	    if ((face_center[2] <= 2.5 || face_center[2] >= 5) &&
+	        face_center[1] >= 3)
+	      cell->face(f)->set_boundary_id(8);
+
+	    if (face_center[2] >= 2.5 &&
+	        face_center[2] <= 5
+	        && face_center[1] >= 3)
+	      cell->face(f)->set_boundary_id(9);
+	  }
+      }
+
+  // Then refine the mesh once
+  triangulation.refine_global (1);
+ at endcode
+
+With this code, we get a mesh that looks like this:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.yuhan.3.png" alt="">
+
+This is clearly not correct: The new vertices that have been entered at
+mid-edge and mid-face points are not where they should have been. Upon some
+reflection, it turns out that while the radii of the cylinders are correct,
+the axes of the two cylinder objects should not have been along coordinate
+axes but shifted. This can be corrected by creating them as follows, the
+two points given as arguments indicating the direction and a point on the
+axis:
+
+ at code
+  static const CylinderBoundary<3> inner_cylinder (inner_radius,
+                                                   Point<3>(0,0,1),
+                                                   Point<3>(0,3,0));
+  static const CylinderBoundary<3> outer_cylinder (outer_radius,
+                                                   Point<3>(0,0,1),
+                                                   Point<3>(0,3,0));
+  triangulation.set_boundary (9, outer_cylinder);
+ at endcode
+
+This yields an improvement, though it is still not quite correct:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.yuhan.4.png" alt="">
+
+Looking closely at this mesh, we realize that the new points on mid-face
+vertices are where they should be, though the new vertices inserted at
+mid-edge points are in the wrong place (you see this by comparing the
+picture with the one of the coarse mesh). What is happening is that we
+are only telling the triangulation to use these geometry objects for
+the <i>faces</i> but not for the adjacent <i>edges</i> as well. This is
+easily fixed by using the function TriaAccessor::set_all_boundary_ids()
+instead of TriaAccessor::set_boundary_id() used above. With this change,
+the grid now looks like this:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.yuhan.5.png" alt="">
+
+This is already better. However, something is still going wrong on the
+front left face. On second look, we can also see that the faces where
+the geometry widens have been refined at the bottom, that there is one
+transition face that looks wrong because it has a triangle rather than
+a quadrilateral, and that finally the transition faces in the cylindrical
+region appear not to have been refined at all in radial direction.
+
+This due to the fact that we have (erroneously) marked all boundary faces
+between $0\le z\le 2.5$ with the boundary indicator for the small cylinder
+and similarly for the other regions. This condition includes the faces parallel
+to the x-y plane. To fix it, we need to exclude faces whose center points have
+$z$ values equal to (or at least close to, since we should not compare
+for equality in floating point arithmetic) 0, 2.5, 5 or 7.5. This replacement
+code does the trick:
+
+ at code
+  // Then loop over all faces of the domain and, if for the position
+  // of the center of a face the following holds then set boundary
+  // indicators:
+  // - if y>3 and z<2.5 or z>5 then use boundary indicator 8
+  // - if y>3 and 2.5<z<5 then use boundary indicator 9
+  // In this process, exclude faces whose z-coordinates are
+  // within a small distance of z=0, z=2.5, z=5 or z=7.5.
+  typename Triangulation<3>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+  for (; cell!=endc; ++cell)
+    for (unsigned int f=0;
+         f < GeometryInfo<3>::faces_per_cell;
+         ++f)
+      {
+        const Point<3> face_center = cell->face(f)->center();
+
+        if (cell->face(f)->at_boundary())
+          if ((face_center[2]>1e-6) &&
+              (face_center[2]<7.5-1e-6) &&
+              (std::fabs(face_center[2]-2.5)>1e-6) &&
+              (std::fabs(face_center[2]-5.0)>1e-6))
+            {
+              if ((face_center[2] < 2.5 || face_center[2] > 5)
+                  && face_center[1] >= 3)
+                cell->face(f)->set_all_boundary_ids(8);
+
+              if (face_center[2] > 2.5 && face_center[2] < 5
+                  && face_center[1] >= 3)
+                cell->face(f)->set_all_boundary_ids(9);
+            }
+      }
+}
+ at endcode
+
+With this, we finally get a mesh that looks good:
+
+<img src="http://www.dealii.org/images/steps/developer/step-49.yuhan.6.png" alt="">
+
+We can then refine the mesh two more times to see in more detail what
+happens to the curved part of the boundary:
+
+ <img src="http://www.dealii.org/images/steps/developer/step-49.yuhan.7.png" alt="">
+
+ So, yes!, this is finally what we were looking for!
diff --git a/examples/step-49/doc/tooltip b/examples/step-49/doc/tooltip
new file mode 100644
index 0000000..9975ca8
--- /dev/null
+++ b/examples/step-49/doc/tooltip
@@ -0,0 +1 @@
+How to create and modify meshes.
diff --git a/examples/step-49/step-49.cc b/examples/step-49/step-49.cc
new file mode 100644
index 0000000..cb38466
--- /dev/null
+++ b/examples/step-49/step-49.cc
@@ -0,0 +1,358 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2013 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Timo Heister, Texas A&M University, 2013
+ */
+
+
+// This tutorial program is odd in the sense that, unlike for most other
+// steps, the introduction already provides most of the information on how to
+// use the various strategies to generate meshes. Consequently, there is
+// little that remains to be commented on here, and we intersperse the code
+// with relatively little text. In essence, the code here simply provides a
+// reference implementation of what has already been described in the
+// introduction.
+
+// @sect3{Include files}
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/grid_in.h>
+
+#include <iostream>
+#include <fstream>
+
+#include <map>
+
+using namespace dealii;
+
+// @sect3{Generating output for a given mesh}
+
+// The following function generates some output for any of the meshes we will
+// be generating in the remainder of this program. In particular, it generates
+// the following information:
+//
+// - Some general information about the number of space dimensions in which
+//   this mesh lives and its number of cells.
+// - The number of boundary faces that use each boundary indicator, so that
+//   it can be compared with what we expect.
+//
+// Finally, the function outputs the mesh in encapsulated postscript (EPS)
+// format that can easily be visualized in the same way as was done in step-1.
+template <int dim>
+void print_mesh_info(const Triangulation<dim> &tria,
+                     const std::string        &filename)
+{
+  std::cout << "Mesh info:" << std::endl
+            << " dimension: " << dim << std::endl
+            << " no. of cells: " << tria.n_active_cells() << std::endl;
+
+  // Next loop over all faces of all cells and find how often each
+  // boundary indicator is used (recall that if you access an element
+  // of a std::map object that doesn't exist, it is implicitly created
+  // and default initialized -- to zero, in the current case -- before
+  // we then increment it):
+  {
+    std::map<unsigned int, unsigned int> boundary_count;
+    typename Triangulation<dim>::active_cell_iterator
+    cell = tria.begin_active(),
+    endc = tria.end();
+    for (; cell!=endc; ++cell)
+      {
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          {
+            if (cell->face(face)->at_boundary())
+              boundary_count[cell->face(face)->boundary_id()]++;
+          }
+      }
+
+    std::cout << " boundary indicators: ";
+    for (std::map<unsigned int, unsigned int>::iterator it=boundary_count.begin();
+         it!=boundary_count.end();
+         ++it)
+      {
+        std::cout << it->first << "(" << it->second << " times) ";
+      }
+    std::cout << std::endl;
+  }
+
+  // Finally, produce a graphical representation of the mesh to an output
+  // file:
+  std::ofstream out (filename.c_str());
+  GridOut grid_out;
+  grid_out.write_eps (tria, out);
+  std::cout << " written to " << filename
+            << std::endl
+            << std::endl;
+}
+
+// @sect3{Main routines}
+
+// @sect4{grid_1: Loading a mesh generated by gmsh}
+
+// In this first example, we show how to load the mesh for which we have
+// discussed in the introduction how to generate it. This follows the same
+// pattern as used in step-5 to load a mesh, although there it was written in
+// a different file format (UCD instead of MSH).
+void grid_1 ()
+{
+  Triangulation<2> triangulation;
+
+  GridIn<2> gridin;
+  gridin.attach_triangulation(triangulation);
+  std::ifstream f("untitled.msh");
+  gridin.read_msh(f);
+
+  print_mesh_info(triangulation, "grid-1.eps");
+}
+
+
+// @sect4{grid_2: Merging triangulations}
+
+// Here, we first create two triangulations and then merge them into one.  As
+// discussed in the introduction, it is important to ensure that the vertices
+// at the common interface are located at the same coordinates.
+void grid_2 ()
+{
+  Triangulation<2> tria1;
+  GridGenerator::hyper_cube_with_cylindrical_hole (tria1, 0.25, 1.0);
+
+  Triangulation<2> tria2;
+  std::vector< unsigned int > repetitions(2);
+  repetitions[0]=3;
+  repetitions[1]=2;
+  GridGenerator::subdivided_hyper_rectangle (tria2, repetitions,
+                                             Point<2>(1.0,-1.0),
+                                             Point<2>(4.0,1.0));
+
+  Triangulation<2> triangulation;
+  GridGenerator::merge_triangulations (tria1, tria2, triangulation);
+
+  print_mesh_info(triangulation, "grid-2.eps");
+}
+
+
+// @sect4{grid_3: Moving vertices}
+
+// In this function, we move vertices of a mesh. This is simpler than one
+// usually expects: if you ask a cell using <code>cell-@>vertex(i)</code> for
+// the coordinates of its <code>i</code>th vertex, it doesn't just provide the
+// location of this vertex but in fact a reference to the location where these
+// coordinates are stored. We can then modify the value stored there.
+//
+// So this is what we do in the first part of this function: We create a
+// square of geometry $[-1,1]^2$ with a circular hole with radius 0.25 located
+// at the origin. We then loop over all cells and all vertices and if a vertex
+// has a $y$ coordinate equal to one, we move it upward by 0.5.
+//
+// Note that this sort of procedure does not usually work this way because one
+// will typically encounter the same vertices multiple times and may move them
+// more than once. It works here because we select the vertices we want to use
+// based on their geometric location, and a vertex moved once will fail this
+// test in the future. A more general approach to this problem would have been
+// to keep a std::set of of those vertex indices that we have already moved
+// (which we can obtain using <code>cell-@>vertex_index(i)</code> and only
+// move those vertices whose index isn't in the set yet.
+void grid_3 ()
+{
+  Triangulation<2> triangulation;
+  GridGenerator::hyper_cube_with_cylindrical_hole (triangulation, 0.25, 1.0);
+
+  Triangulation<2>::active_cell_iterator
+  cell = triangulation.begin_active(),
+  endc = triangulation.end();
+  for (; cell!=endc; ++cell)
+    {
+      for (unsigned int i=0; i<GeometryInfo<2>::vertices_per_cell; ++i)
+        {
+          Point<2> &v = cell->vertex(i);
+          if (std::abs(v(1)-1.0)<1e-5)
+            v(1) += 0.5;
+        }
+    }
+
+  // In the second step we will refine the mesh twice. To do this
+  // correctly, we have to associate a geometry object with the
+  // boundary of the hole; since the boundary of the hole has boundary
+  // indicator 1 (see the documentation of the function that generates
+  // the mesh), we need to create an object that describes a spherical
+  // manifold (i.e., a hyper ball) with appropriate center and assign
+  // it to the triangulation. Notice that the function that generates
+  // the triangulation sets the boundary indicators of the inner mesh,
+  // but leaves unchanged the manifold indicator. We copy the boundary
+  // indicator to the manifold indicators in order for the object to
+  // be refined accordingly.
+  // We can then refine twice:
+  GridTools::copy_boundary_to_manifold_id(triangulation);
+  const SphericalManifold<2> boundary_description(Point<2>(0,0));
+  triangulation.set_manifold (1, boundary_description);
+  triangulation.refine_global(2);
+
+  // The mesh so generated is then passed to the function that generates
+  // output. In a final step we remove the boundary object again so that it is
+  // no longer in use by the triangulation when it is destroyed (the boundary
+  // object is destroyed first in this function since it was declared after
+  // the triangulation).
+  print_mesh_info (triangulation, "grid-3.eps");
+  triangulation.set_manifold (1);
+}
+
+// There is one snag to doing things as shown above: If one moves the nodes on
+// the boundary as shown here, one often ends up with cells in the interior
+// that are badly distorted since the interior nodes were not moved around. This
+// is not that much of a problem in the current case since the mesh did not
+// contain any internal nodes when the nodes were moved -- it was the coarse
+// mesh and it so happened that all vertices are at the boundary. It's also
+// the case that the movement we had here was, compared to the average cell
+// size not overly dramatic. Nevertheless, sometimes one does want to move
+// vertices by a significant distance, and in that case one needs to move
+// internal nodes as well. One way to do that automatically is to call the
+// function GridTools::laplace_transform that takes a set of transformed
+// vertex coordinates and moves all of the other vertices in such a way that the
+// resulting mesh has, in some sense, a small distortion.
+
+
+
+// @sect4{grid_4: Demonstrating extrude_triangulation}
+
+// This example takes the initial grid from the previous function and simply extrudes it into the third space dimension:
+void grid_4()
+{
+  Triangulation<2> triangulation;
+  Triangulation<3> out;
+  GridGenerator::hyper_cube_with_cylindrical_hole (triangulation, 0.25, 1.0);
+
+  GridGenerator::extrude_triangulation (triangulation, 3, 2.0, out);
+  print_mesh_info(out, "grid-4.eps");
+}
+
+
+// @sect4{grid_5: Demonstrating GridTools::transform, part 1}
+
+// This and the next example first create a mesh and then transform it by
+// moving every node of the mesh according to a function that takes a point
+// and returns a mapped point. In this case, we transform $(x,y) \mapsto
+// (x,y+\sin(\pi x/5))$.
+//
+// GridTools::transform takes a triangulation and any kind of object that can
+// be called like a function as arguments. This function-like argument can be
+// simply the address of a function as in the current case, or an object that
+// has an <code>operator()</code> as in the next example, or for example a
+// <code>std::function@<Point@<2@>(const Point@<2@>)@></code> object one can get
+// via <code>std::bind</code> in more complex cases.
+Point<2> grid_5_transform (const Point<2> &in)
+{
+  return Point<2>(in(0),
+                  in(1) + std::sin(in(0)/5.0*3.14159));
+}
+
+
+void grid_5()
+{
+  Triangulation<2> tria;
+  std::vector<unsigned int> repetitions(2);
+  repetitions[0] = 14;
+  repetitions[1] = 2;
+  GridGenerator::subdivided_hyper_rectangle (tria, repetitions,
+                                             Point<2>(0.0,0.0),
+                                             Point<2>(10.0,1.0));
+
+  GridTools::transform(&grid_5_transform, tria);
+  print_mesh_info(tria, "grid-5.eps");
+}
+
+
+
+// @sect4{grid_6: Demonstrating GridTools::transform, part 2}
+
+// In this second example of transforming points from an original to a new
+// mesh, we will use the mapping $(x,y) \mapsto (x,\tanh(2y)/\tanh(2))$. To
+// make things more interesting, rather than doing so in a single function as
+// in the previous example, we here create an object with an
+// <code>operator()</code> that will be called by GridTools::transform. Of
+// course, this object may in reality be much more complex: the object may
+// have member variables that play a role in computing the new locations of
+// vertices.
+struct Grid6Func
+{
+  double trans(const double y) const
+  {
+    return std::tanh(2*y)/tanh(2);
+  }
+
+  Point<2> operator() (const Point<2> &in) const
+  {
+    return Point<2> (in(0),
+                     trans(in(1)));
+  }
+};
+
+
+void grid_6()
+{
+  Triangulation<2> tria;
+  std::vector< unsigned int > repetitions(2);
+  repetitions[0] = repetitions[1] = 40;
+  GridGenerator::subdivided_hyper_rectangle (tria, repetitions,
+                                             Point<2>(0.0,0.0),
+                                             Point<2>(1.0,1.0));
+
+  GridTools::transform(Grid6Func(), tria);
+  print_mesh_info(tria, "grid-6.eps");
+}
+
+
+// @sect4{grid_7: Demonstrating distort_random}
+
+// In this last example, we create a mesh and then distort its (interior)
+// vertices by a random perturbation. This is not something you want to do for
+// production computations, but it is a useful tool for testing
+// discretizations and codes to make sure they don't work just by accident
+// because the mesh happens to be uniformly structured and supporting
+// super-convergence properties.
+void grid_7()
+{
+  Triangulation<2> tria;
+  std::vector<unsigned int> repetitions(2);
+  repetitions[0] = repetitions[1] = 16;
+  GridGenerator::subdivided_hyper_rectangle (tria, repetitions,
+                                             Point<2>(0.0,0.0),
+                                             Point<2>(1.0,1.0));
+
+  GridTools::distort_random (0.3, tria, true);
+  print_mesh_info(tria, "grid-7.eps");
+}
+
+
+// @sect3{The main function}
+
+// Finally, the main function. There isn't much to do here, only to call the
+// subfunctions.
+int main ()
+{
+  grid_1 ();
+  grid_2 ();
+  grid_3 ();
+  grid_4 ();
+  grid_5 ();
+  grid_6 ();
+  grid_7 ();
+}
diff --git a/examples/step-49/untitled.geo b/examples/step-49/untitled.geo
new file mode 100644
index 0000000..95e422d
--- /dev/null
+++ b/examples/step-49/untitled.geo
@@ -0,0 +1,56 @@
+cl1 = 1;
+
+Point(1) = {-1, 0.3, 0, 1};
+Point(2) = {0.5, 0.3, 0, 1};
+Point(3) = {-1, -0.5, 0, 1};
+Point(4) = {0.5, -0.5, 0, 1};
+
+Point(7) = {-0.3, -0.1, 0, 1};
+Point(8) = {-0.2, -0.1, 0, 1};
+Point(9) = {-0.3, 0.1, -0, 1};
+Point(10) = {-0.4, -0.1, 0, 1};
+Point(11) = {-0.3, -0.3, 0, 1};
+
+Point(12) = {0.1, -0.1, 0, 1};
+Point(13) = {0.2, 0.0, 0, 1};
+Point(14) = {0.3, -0.1, 0, 1};
+
+// lines of the outer box:
+Line(1) = {1, 2};
+Line(2) = {4, 2};
+Line(3) = {1, 3};
+Line(4) = {3, 4};
+
+// the first cutout:
+Ellipse(5) = {8, 7, 11, 9};
+Ellipse(6) = {9, 7, 11, 10};
+Ellipse(7) = {8, 7, 10, 11};
+Ellipse(8) = {11, 7, 8, 10};
+
+// the second cutout:
+Line(9) = {12, 13};
+Line(10) = {13, 14};
+Line(11) = {14, 12};
+
+// loops of the outside and the two cutouts
+Line Loop(12) = {1, -2, -4, -3};
+Line Loop(14) = {5, 6, -8, -7};
+Line Loop(15) = {9,10,11};
+
+// these define the boundary indicators in deal.II:
+Physical Line(0) = {1, 2, 4, 3};
+Physical Line(1) = {6, 5, 8, 7};
+Physical Line(2) = {9, 10, 11};
+
+
+// you need the physical surface, because that is what deal.II reads in
+Plane Surface(16) = {12, 14, 15};
+Physical Surface(17) = {16};
+
+// some parameters for the meshing:
+Mesh.Algorithm = 8;
+Mesh.RecombineAll = 1;
+Mesh.CharacteristicLengthFactor = 0.09;
+Mesh.SubdivisionAlgorithm = 1;
+Mesh.Smoothing = 20;
+Show "*";
diff --git a/examples/step-49/untitled.msh b/examples/step-49/untitled.msh
new file mode 100644
index 0000000..c98a099
--- /dev/null
+++ b/examples/step-49/untitled.msh
@@ -0,0 +1,1150 @@
+$MeshFormat
+2.2 0 8
+$EndMeshFormat
+$Nodes
+539
+1 -1 0.3 0
+2 0.5 0.3 0
+3 -1 -0.5 0
+4 0.5 -0.5 0
+5 -0.2 -0.1 0
+6 -0.3 0.1 -0
+7 -0.4 -0.1 0
+8 -0.3 -0.3 0
+9 0.1 -0.1 0
+10 0.2 0 0
+11 0.3 -0.1 0
+12 -0.9531250000001519 0.3 0
+13 -0.9062500000003468 0.3 0
+14 -0.8593750000004925 0.3 0
+15 -0.8125000000006937 0.3 0
+16 -0.7656250000008764 0.3 0
+17 -0.7187500000010403 0.3 0
+18 -0.67187500000122 0.3 0
+19 -0.6250000000013871 0.3 0
+20 -0.5781250000015825 0.3 0
+21 -0.5312500000017341 0.3 0
+22 -0.4843750000019432 0.3 0
+23 -0.4375000000020811 0.3 0
+24 -0.3906250000022451 0.3 0
+25 -0.3437500000024281 0.3 0
+26 -0.2968750000025728 0.3 0
+27 -0.2500000000027534 0.3 0
+28 -0.2031250000025706 0.3 0
+29 -0.1562500000024281 0.3 0
+30 -0.1093750000022452 0.3 0
+31 -0.06250000000208122 0.3 0
+32 -0.01562500000189826 0.3 0
+33 0.03124999999826561 0.3 0
+34 0.0781249999983642 0.3 0
+35 0.1249999999986124 0.3 0
+36 0.1718749999989115 0.3 0
+37 0.2187499999989593 0.3 0
+38 0.2656249999991656 0.3 0
+39 0.3124999999993063 0.3 0
+40 0.3593749999994047 0.3 0
+41 0.4062499999996529 0.3 0
+42 0.4531249999999201 0.3 0
+43 0.5 -0.4500000000001266 0
+44 0.5 -0.4000000000002087 0
+45 0.5 -0.3500000000003457 0
+46 0.5 -0.3000000000004826 0
+47 0.5 -0.2500000000006304 0
+48 0.5 -0.2000000000007781 0
+49 0.5 -0.1500000000009204 0
+50 0.5 -0.1000000000010627 0
+51 0.5 -0.05000000000093285 0
+52 0.5 -8.051892486093948e-13 0
+53 0.5 0.04999999999932914 0
+54 0.5 0.0999999999994633 0
+55 0.5 0.1499999999996039 0
+56 0.5 0.1999999999997427 0
+57 0.5 0.249999999999861 0
+58 -1 0.2500000000001076 0
+59 -1 0.2000000000001668 0
+60 -1 0.1500000000003152 0
+61 -1 0.1000000000004636 0
+62 -1 0.05000000000061477 0
+63 -1 7.659428646888955e-13 0
+64 -1 -0.04999999999908838 0
+65 -1 -0.09999999999894271 0
+66 -1 -0.1499999999990865 0
+67 -1 -0.1999999999991935 0
+68 -1 -0.2499999999993186 0
+69 -1 -0.2999999999994552 0
+70 -1 -0.3499999999996415 0
+71 -1 -0.3999999999997279 0
+72 -1 -0.4499999999998711 0
+73 -0.9531250000001519 -0.5 0
+74 -0.9062500000003468 -0.5 0
+75 -0.8593750000004925 -0.5 0
+76 -0.8125000000006937 -0.5 0
+77 -0.7656250000008764 -0.5 0
+78 -0.7187500000010403 -0.5 0
+79 -0.67187500000122 -0.5 0
+80 -0.6250000000013871 -0.5 0
+81 -0.5781250000015825 -0.5 0
+82 -0.5312500000017341 -0.5 0
+83 -0.4843750000019432 -0.5 0
+84 -0.4375000000020811 -0.5 0
+85 -0.3906250000022451 -0.5 0
+86 -0.3437500000024281 -0.5 0
+87 -0.2968750000025728 -0.5 0
+88 -0.2500000000027534 -0.5 0
+89 -0.2031250000025706 -0.5 0
+90 -0.1562500000024281 -0.5 0
+91 -0.1093750000022452 -0.5 0
+92 -0.06250000000208122 -0.5 0
+93 -0.01562500000189826 -0.5 0
+94 0.03124999999826561 -0.5 0
+95 0.0781249999983642 -0.5 0
+96 0.1249999999986124 -0.5 0
+97 0.1718749999989115 -0.5 0
+98 0.2187499999989593 -0.5 0
+99 0.2656249999991656 -0.5 0
+100 0.3124999999993063 -0.5 0
+101 0.3593749999994047 -0.5 0
+102 0.4062499999996529 -0.5 0
+103 0.4531249999999201 -0.5 0
+104 -0.2046552515335552 -0.03968817894076274 0
+105 -0.2195883623691181 0.01889438226459211 0
+106 -0.2486906772479457 0.07166657681360722 0
+107 -0.3513093227524131 0.07166657681317815 0
+108 -0.3804116376307094 0.01889438226505884 0
+109 -0.3953447484664305 -0.03968817894067206 0
+110 -0.2046552515917311 -0.1603118214271105 0
+111 -0.2195883626274184 -0.2188943829633752 0
+112 -0.248690677965047 -0.2716665776709432 0
+113 -0.3513093227525148 -0.2716665768130566 0
+114 -0.3804116376306984 -0.2188943822650886 0
+115 -0.3953447484664183 -0.1603118210594052 0
+116 0.1249999999999542 -0.07500000000004578 0
+117 0.1499999999998638 -0.05000000000013619 0
+118 0.1749999999999431 -0.02500000000005694 0
+119 0.2249999999999409 -0.02499999999994093 0
+120 0.2499999999998745 -0.04999999999987455 0
+121 0.2749999999999421 -0.07499999999994209 0
+122 0.2500000000001383 -0.1 0
+123 0.2000000000002766 -0.1 0
+124 0.1500000000001331 -0.1 0
+125 -0.9033134529127611 0.0009279283689537796 0
+126 0.1056840344532033 -0.03059238097234286 0
+127 0.1329470343745991 -0.4064733662967535 0
+128 -0.4974081242184328 -0.09974790781516674 0
+129 -0.6209098083537731 -0.4105390417591004 0
+130 -0.9033106598168849 -0.2008404664612156 0
+131 -0.6209453715945851 0.210783788751528 0
+132 0.1057767353338423 0.2096217856743312 0
+133 0.407452196177108 -0.310240435546468 0
+134 -0.05438727050785021 -0.4351160765312596 0
+135 -0.1505983338068013 0.2323218867908677 0
+136 -0.4079436836074805 -0.3717558360125787 0
+137 -0.2130881523143215 -0.3591068442289977 0
+138 -0.4082886386645511 0.1734939970920804 0
+139 0.4059594361456419 -0.2178217131655568 0
+140 0.4004290339816127 -0.1292214458349392 0
+141 0.3691751941203575 -0.03515626665003391 0
+142 0.3151105104374043 -0.4081830749632446 0
+143 -0.9051149044947018 0.2008847418724897 0
+144 -0.9040734470980513 -0.3012032519312854 0
+145 -0.7151841063579403 -0.4050055645801979 0
+146 -0.8101478808109693 -0.4022384847434944 0
+147 0.3865687383511582 0.08918799281037987 0
+148 0.1965665207117662 0.2046632714094421 0
+149 0.292949426821332 0.1973723774562355 0
+150 0.3960450532783039 0.1963555245692347 0
+151 -0.08021530801204914 0.2159939976245657 0
+152 0.4072141460255509 -0.4046121882977527 0
+153 -0.905112121158652 -0.4008470418122601 0
+154 -0.8101549564630695 0.2023209272257964 0
+155 -0.2171797667128769 0.1788651136144886 0
+156 0.218984182851411 -0.1701737160108267 0
+157 -0.4610056759041399 -0.4357674458069065 0
+158 -0.5302644076150214 0.2212305143258244 0
+159 -0.04276806009063583 -0.01994244229985313 0
+160 -0.6143240522781491 -0.3163772884327728 0
+161 0.1390496222876248 -0.1978461725853969 0
+162 -0.8078839296756867 -0.3032164558448147 0
+163 -0.067328241500502 -0.1551066914615729 0
+164 -0.5194409484339509 0.1354588183004715 0
+165 0.1432169875111249 0.03768194104198446 0
+166 0.08301780946366345 -0.07948530505208366 0
+167 -0.5193219148563174 -0.3347355561430876 0
+168 -0.1152102028428986 0.1010823184666092 0
+169 -0.8078942260925337 0.103364400115844 0
+170 0.3099381016143418 -0.1502447769707067 0
+171 0.2240198121867453 -0.4094569351224969 0
+172 0.01395361697461602 0.2105501774789584 0
+173 0.02632380625840756 -0.3932928550063711 0
+174 -0.9040773173311365 0.1012715507754214 0
+175 -0.9030270303063508 -0.09995308514281143 0
+176 -0.7152005395259696 0.2051506618156246 0
+177 -0.5301926136732626 -0.4208207505638644 0
+178 -0.2905034631772817 0.1778597630963602 0
+179 0.2897766248860165 -0.006075784670611182 0
+180 0.08763760765979685 0.1241875167922535 0
+181 0.3165585219793023 -0.3185691860890915 0
+182 -0.5001365741626347 0.0340008752718627 0
+183 -0.1169732586528348 -0.4054336053944584 0
+184 -0.1396591952075576 -0.2588406695042229 0
+185 0.1694367779534013 0.1152800340702304 0
+186 0.261596433653527 0.07758040886641067 0
+187 -0.4611183693292731 0.2363440391786482 0
+188 0.1541211525474303 -0.2436630957357367 0
+189 -0.6143783360417686 0.1167979220562566 0
+190 0.06816743222956195 0.04856262365960533 0
+191 0.0473237886756602 -0.2107344335750746 0
+192 -0.6071689133831988 -0.212836923948426 0
+193 -0.8061312794772055 0.002468849118730037 0
+194 0.03411381029467352 -0.09348450398787239 0
+195 -0.8061240166154311 -0.2022821153082574 0
+196 0.2292394242264159 -0.2433794764606136 0
+197 -0.05292867330974841 -0.3178686584911756 0
+198 -0.5000385683282311 -0.2332641409741138 0
+199 -0.7112442892465903 -0.3073790576613057 0
+200 -0.7112680902269539 0.1076337452325555 0
+201 0.2286250646145996 -0.3228771129954415 0
+202 0.1440970459551938 -0.3202361403772453 0
+203 0.3150380983196349 -0.2327133622163994 0
+204 -0.6072096239822308 0.01331263540733966 0
+205 0.003693268944608279 0.1203828433480361 0
+206 0.04509901209674554 -0.02235879387749332 0
+207 -0.6042790773748262 -0.09976721676533161 0
+208 -0.8054490418964394 -0.09990047909209698 0
+209 -0.7077911141327923 -0.2054036380728836 0
+210 0.07519281766359141 -0.3137479833235459 0
+211 -0.7078088806076444 0.005714505058565612 0
+212 0.009604611758956853 0.0553324412178114 0
+213 -0.7064017558563392 -0.09983760196223063 0
+214 0.1977962077032731 0.02852914647217401 0
+215 0.2804203530535676 0.02351624765053963 0
+216 0.2717874725963128 -0.02541161036790307 0
+217 0.2459472476787362 0.002064508004240215 0
+218 0.156547764408105 0.07574029251098816 0
+219 0.2098628805408967 0.1029798262073474 0
+220 0.1891827595475258 0.07167148639763916 0
+221 0.1075128358607772 -0.1517842496451334 0
+222 0.1602420053072623 -0.2185869394425716 0
+223 0.1146169217735123 -0.2324038065418674 0
+224 0.03670431575136057 -0.1363880990037906 0
+225 0.06433928464235994 -0.08669261703874735 0
+226 0.09319538260732002 -0.0915938175845129 0
+227 0.07232672244303819 -0.1180508038793629 0
+228 -0.2523535676751602 -0.4189854059611055 0
+229 -0.162715949854048 -0.3850805738158907 0
+230 -0.1391075348828126 -0.4514005347013539 0
+231 -0.1875772762510924 -0.4396223837793893 0
+232 -0.6558734744873089 -0.09980231327279056 0
+233 -0.6050008408179098 -0.1570759122181456 0
+234 -0.6578793171536932 -0.2082922692408035 0
+235 -0.7067582934933287 -0.1528912551476548 0
+236 -0.6563877670390236 -0.1544971659032877 0
+237 -0.525550059959925 -0.3791647856175708 0
+238 -0.471955841389365 -0.3464028387358708 0
+239 -0.4423383563342458 -0.4154853023434004 0
+240 -0.4895476227210242 -0.428433464187789 0
+241 -0.4814371159050623 -0.3918374800454812 0
+242 0.3086606295264157 -0.100814620264855 0
+243 0.3907034443436858 -0.08744235428838394 0
+244 0.3551031093989661 -0.1410648871872618 0
+245 0.3542744297482685 -0.1081402830332978 0
+246 -0.04532282597920551 0.1120017590829687 0
+247 0.005442917137097258 0.0810073348010094 0
+248 -0.01069984678944677 0.03164204931318011 0
+249 -0.08436148739166101 0.03147674770418169 0
+250 -0.03455258633965184 0.06409345945349011 0
+251 -0.1751061264147883 0.1415272165617647 0
+252 -0.1752214761455878 0.05240332544622112 0
+253 -0.2739267903116944 0.1351299060231787 0
+254 -0.2164902212536593 0.100024829981679 0
+255 -0.5100739413314819 -0.2877864250461893 0
+256 -0.4254023003564598 -0.2679562049698027 0
+257 -0.4713516915870031 -0.3048937271776742 0
+258 0.03725822758536431 -0.05743929912775716 0
+259 0.07726358501967136 -0.02540437506983998 0
+260 0.09133441938874079 -0.05902299174993161 0
+261 0.06735464631338589 -0.0569190339744973 0
+262 -0.6104996208176845 -0.2660105987493264 0
+263 -0.5551372301363793 -0.2202282009394987 0
+264 -0.5661982618409271 -0.3241442876570492 0
+265 -0.5603882794971785 -0.2744620929512951 0
+266 -0.08791933398487439 -0.3617631739140857 0
+267 -0.1766835297874687 -0.3092464633495194 0
+268 -0.09505079797409979 -0.2870443812433464 0
+269 -0.1306177533469599 -0.3356257803971124 0
+270 -0.02896007085168698 -0.06125924130905632 0
+271 -0.04011622831208577 -0.12707311922204 0
+272 -0.1087227321903851 -0.09265485501975078 0
+273 -0.03938661858174743 -0.09356847866724131 0
+274 -0.1829855987303169 -0.2353758661684148 0
+275 -0.2196526904935378 -0.285128739156362 0
+276 0.2639383104969536 -0.1603519114822477 0
+277 0.2098619560118975 -0.1346525545471456 0
+278 0.2578187366713578 -0.1260038693212363 0
+279 -0.3365984299757152 -0.3689435493738567 0
+280 -0.3690999315586024 -0.4418953687204333 0
+281 -0.3167065311567923 -0.4334631236276932 0
+282 -0.3694087863727465 0.2432376965214948 0
+283 -0.2553426359919454 0.2272752053164272 0
+284 -0.3374673358027612 0.1732246469354434 0
+285 -0.3175608418683333 0.2365690651497333 0
+286 0.008850703388214479 -0.01991633335901061 0
+287 -0.001218728385269719 -0.09549213347220226 0
+288 0.002406985415346704 -0.05813005717296095 0
+289 -0.9045711401853331 -0.3511095501629792 0
+290 -0.8560264413202144 -0.3020447179060484 0
+291 -0.8089871469726755 -0.3529471485029519 0
+292 -0.8576409198931736 -0.4014328831249685 0
+293 -0.8568052555584925 -0.3518808734814977 0
+294 0.3172104436976491 -0.01611502345616553 0
+295 0.2961887323673169 -0.05215947956752492 0
+296 -0.1436268837999887 -0.008073762404093306 0
+297 -0.7596240500136783 -0.3049051894798189 0
+298 -0.7132399750615483 -0.3566655936622771 0
+299 -0.7626467551267595 -0.4033784894106121 0
+300 -0.7611209118846552 -0.3544690910603182 0
+301 -0.0790805288764406 -0.422714065852822 0
+302 -0.05750896173024642 -0.4623311016806863 0
+303 -0.09668934292041458 -0.459452811071282 0
+304 0.1509212121154961 0.2077848374668743 0
+305 0.1836828707613349 0.1583418024019007 0
+306 0.1286140158341375 0.1211455629251016 0
+307 0.09652466197720637 0.1658702066709334 0
+308 0.1399063131894998 0.1633367249061549 0
+309 -0.4967052699289158 -0.1680954607817837 0
+310 -0.4455863871877834 -0.09983362082364024 0
+311 -0.4419944429954236 -0.1703583035669841 0
+312 0.4399385296268339 -0.01480981599751474 0
+313 0.4488854705295473 -0.1145822402599836 0
+314 0.4440544412727954 -0.0665036364457209 0
+315 -0.7571529621827666 -0.2035259996663414 0
+316 -0.8056210978029794 -0.1511957574280544 0
+317 -0.7561834791762537 -0.09987041919485985 0
+318 -0.7564316170847615 -0.151865517817573 0
+319 0.1045271459110808 0.04397954972877061 0
+320 0.0785635595921011 0.0852021314126377 0
+321 0.1170762896178348 0.08148781143980693 0
+322 -0.6050230124520853 -0.04245360931612332 0
+323 -0.706768033525384 -0.04678693877789675 0
+324 -0.6579061026483358 0.008679234839006648 0
+325 -0.6564018364000489 -0.04510872021777437 0
+326 0.05629755565715045 0.01298988797842564 0
+327 0.03425840040256539 0.0522397232792762 0
+328 0.02145585825736532 0.01795935898243648 0
+329 0.452712189225738 -0.2091246069478791 0
+330 0.4040232577519153 -0.1727664164243717 0
+331 0.4514685343576281 -0.1616654217760329 0
+332 0.4431216297949669 0.0939449572435696 0
+333 0.3813449639831981 0.03324456543265554 0
+334 0.440379027709377 0.03989937046530734 0
+335 0.2436715478033035 0.2007307850258904 0
+336 0.2805777751576197 0.1431834844812172 0
+337 0.2296755891230483 0.1504476318460616 0
+338 0.1859544588755713 -0.3225737691342332 0
+339 0.2265396764920814 -0.3654553470422299 0
+340 0.178665718577793 -0.4086808657455717 0
+341 0.1387268474324107 -0.362147092517086 0
+342 0.1824509014877034 -0.3647275649522855 0
+343 -0.9056776248804862 0.2504767439967071 0
+344 -0.9525633370051564 0.2004111546004803 0
+345 -0.9528413625667851 0.2502216932047778 0
+346 0.06033876788292724 0.2103404548317719 0
+347 0.04654245948919494 0.1242617451960501 0
+348 0.006961988863520622 0.1649871265707444 0
+349 0.05264540668462143 0.1663056722429792 0
+350 -0.5551996147243393 0.02081228492985406 0
+351 -0.4967559359177801 -0.03131526710838064 0
+352 -0.5512740018354556 -0.09974021903041519 0
+353 -0.5521276905692779 -0.03813293153647686 0
+354 0.4480560292877221 0.1979403772868061 0
+355 0.3910453198787736 0.1432755284778359 0
+356 0.4455880042648426 0.1463221447988414 0
+357 0.4011920952600672 0.2484603035935394 0
+358 0.4505867221193388 0.2490964129459936 0
+359 -0.4420426088624309 -0.02919066868346985 0
+360 -0.09461580976806189 0.1683893193005361 0
+361 -0.03345653863318529 0.2117239099166231 0
+362 -0.04255710796838254 0.1635182507849549 0
+363 0.02244515737387976 0.2555321971815035 0
+364 0.1153276536642179 0.2545638407255845 0
+365 0.06903668810980047 0.2550872133225062 0
+366 -0.6627742486555354 -0.3110215627452389 0
+367 -0.7093553766207289 -0.2569645512691488 0
+368 -0.6601276339085735 -0.2605473010105935 0
+369 0.2080036288962619 0.2521726249603491 0
+370 0.1615279512433943 0.2536107591643126 0
+371 0.4067736801327207 -0.4522404047645152 0
+372 0.4535608449381135 -0.4023692506433797 0
+373 0.4533650615629829 -0.4511525003447613 0
+374 -0.8069123105196332 0.05315953249385553 0
+375 -0.8548184461610144 0.001572210866932038 0
+376 -0.9036477026460118 0.0511894786402976 0
+377 -0.856033452761093 0.1021507910447329 0
+378 -0.8553527990154516 0.05201460090337671 0
+379 -0.02389528817512852 -0.1847825176038125 0
+380 -0.006866951689854774 -0.1366953755079541 0
+381 0.4536148678219512 -0.3052418336925722 0
+382 0.4070041533957115 -0.2637237885159185 0
+383 0.4533391647602582 -0.2570200123092205 0
+384 0.4074781971602873 -0.3572407476166842 0
+385 0.4536634392021606 -0.3537123726000089 0
+386 0.1485891094597923 -0.2802080085377704 0
+387 0.1043450115668061 -0.316574270532763 0
+388 0.06413318914224587 -0.2820176859086687 0
+389 0.1070057290026125 -0.2758977391779172 0
+390 -0.9525620624585559 -0.4003927197529595 0
+391 -0.9056761932559764 -0.450457444198259 0
+392 -0.9528406407743424 -0.4502122271860289 0
+393 0.08533727993238593 -0.4019867898914997 0
+394 0.1283367036550898 -0.4528245183296776 0
+395 0.03033962569320452 -0.452250621222814 0
+396 0.07998257428074815 -0.4511572177524582 0
+397 0.2213759752444606 -0.4544650772518872 0
+398 0.1751014076412781 -0.4539889712745865 0
+399 -0.4425155233639313 0.2163736403451548 0
+400 -0.4507954935070622 0.2628554092023328 0
+401 -0.4131121192292684 0.2545642425095516 0
+402 0.3031527625931779 0.2490967563569192 0
+403 0.2551385906644782 0.2505030766716961 0
+404 0.2694481338460449 -0.4092035598857057 0
+405 0.2721547896521269 -0.3213931550726695 0
+406 0.3160779834837667 -0.3629795393574789 0
+407 0.2710607389809552 -0.3647612340793691 0
+408 -0.1035653352604332 -0.2044842605346114 0
+409 -0.01227971339367681 -0.2713950410296468 0
+410 -0.05685006699402992 -0.236496096181459 0
+411 0.0567224087876325 -0.3417035739692363 0
+412 0.09572994502199662 -0.3565452296207258 0
+413 0.3610431723283473 -0.4065979774655465 0
+414 0.3617095081774879 -0.3147610523221558 0
+415 0.3615783810179587 -0.3603963399126824 0
+416 -0.6678899125006312 0.2075030616485266 0
+417 -0.6179797597273983 0.1646947883097049 0
+418 -0.6628100920803135 0.1113485573428164 0
+419 -0.7132618541436707 0.1568718715267484 0
+420 -0.6654720343892375 0.1600998646407859 0
+421 -0.9520536125952824 -0.3005564560645043 0
+422 -0.9522964686527513 -0.3505139000296544 0
+423 -0.8069030283127105 -0.2529887309677519 0
+424 -0.7582589453215287 -0.2545872549834342 0
+425 0.3440950074007083 0.1959038712126979 0
+426 0.3519476833162895 0.2483874983655644 0
+427 -0.903644034907426 -0.2511100259437735 0
+428 -0.8548144294065865 -0.2014374701824486 0
+429 -0.8553464046710568 -0.2518916080054454 0
+430 -0.7596406842544865 0.1051021581732425 0
+431 -0.7093783582344169 0.05725415993678134 0
+432 -0.7571650836163084 0.003771073525906968 0
+433 -0.7582736896895523 0.05481275956927414 0
+434 -0.01481801560885839 -0.3513043633344067 0
+435 0.02361501072631281 -0.3107054446564863 0
+436 -0.951685039286628 -0.200387298081232 0
+437 -0.9518457062710525 -0.2505125709290499 0
+438 0.3138656895294811 -0.453946642029053 0
+439 0.2675825239413352 -0.4544049565600234 0
+440 -0.9045747126559834 0.1511638102078595 0
+441 -0.9520553717178326 0.1005899458591781 0
+442 -0.9522981211662438 0.1505404565853773 0
+443 0.3602648131448942 -0.4531965135399142 0
+444 -0.9030987467953839 -0.1504338854054128 0
+445 -0.8543570162983474 -0.09992777457289652 0
+446 -0.8544735940678891 -0.1507469539724091 0
+447 0.3162907293872416 -0.2751205845211 0
+448 0.3600318302091939 -0.2257407570242921 0
+449 0.3612546876187391 -0.2698352314609165 0
+450 -0.02602843930910637 -0.4199006069063608 0
+451 -0.01641445870082838 -0.4581139258750331 0
+452 -0.5520932648527178 -0.1613243625575299 0
+453 -0.9515486251638957 -0.0999768936304521 0
+454 -0.9515832963917333 -0.1501990849447876 0
+455 -0.8056249141090809 -0.04860812053724763 0
+456 -0.9031003894421947 -0.04947380601328975 0
+457 -0.8544763125296403 -0.0491109868786193 0
+458 -0.07010735352474724 0.2586055963514451 0
+459 -0.02425050482752994 0.2565223714448963 0
+460 0.1247673753961651 0.001705686756119401 0
+461 0.09065388019342856 0.008320022461929439 0
+462 -0.9516863080510799 0.0004302710258955425 0
+463 -0.9518471593836025 0.05055150760639032 0
+464 -0.8113238580642472 -0.4512049023585757 0
+465 -0.8585033677350856 -0.4507729093530499 0
+466 -0.756438156053141 -0.04787862047141542 0
+467 -0.951584013139347 -0.0497555117147115 0
+468 -0.762657640333006 0.2034891469336877 0
+469 -0.8113278690226757 0.2512474027994662 0
+470 -0.7170195754387109 0.2527500617314798 0
+471 -0.7641553504984928 0.2518703440248712 0
+472 -0.8576456881396742 0.2014916700250431 0
+473 -0.8585059071269067 0.2508030556119411 0
+474 -0.1522040914805329 -0.1765408815012435 0
+475 0.3295867631797828 0.08518530340051468 0
+476 0.327019535214101 0.03143332045598474 0
+477 -0.5318633548378588 -0.4606009300961785 0
+478 -0.6231864686385054 -0.4555578685236892 0
+479 -0.5747381969708022 -0.4149381028087382 0
+480 -0.5769313843781129 -0.457795983150587 0
+481 0.3356462875136635 0.1412995287148972 0
+482 -0.1536121738060832 0.2604449186279605 0
+483 -0.1764144812665558 0.2123102634550833 0
+484 -0.1959182184847739 0.2509373354711786 0
+485 -0.8089971116418125 0.1530656272518927 0
+486 -0.8568116644147861 0.1519653489558748 0
+487 -0.6232052594286518 0.2556843409543333 0
+488 -0.66998928549547 0.2539839588043611 0
+489 -0.4507238963838467 -0.4624949390234637 0
+490 -0.4129811226227236 -0.4539394975688619 0
+491 -0.5747895428395321 0.2152558094780136 0
+492 -0.5256521111163802 0.1797574484337065 0
+493 -0.5662792981051168 0.1246916070083734 0
+494 -0.5712023529739348 0.1710891211581412 0
+495 -0.4896433891511788 0.2289433921729284 0
+496 -0.531900311206985 0.2608091695776251 0
+497 -0.489463245450992 0.2635657797803316 0
+498 -0.7170108740325225 -0.4526751944553407 0
+499 -0.7641495043082897 -0.4518133784739797 0
+500 -0.4894139628150242 -0.463303856855738 0
+501 -0.7611354905114098 0.1546270834998305 0
+502 0.3127359341776296 -0.1912754390728661 0
+503 0.2712174231191717 -0.2386659376373855 0
+504 0.2258007352453115 -0.2063580088239624 0
+505 0.2683781304317485 -0.1991490110930109 0
+506 -0.1221992217718819 0.2240798894202508 0
+507 -0.1133461613236452 0.2612147550922374 0
+508 -0.6678656860098373 -0.4073145991678367 0
+509 -0.6699765864111431 -0.453886597450658 0
+510 0.1296543789765991 -0.03904033620470677 0
+511 0.1097551030489606 -0.06578249848847872 0
+512 -0.05113984009488316 -0.388588939525596 0
+513 -0.6179319662280429 -0.364349299106822 0
+514 -0.6654401282749955 -0.3598333235214998 0
+515 -0.5711317603228333 -0.370638500243041 0
+516 -0.4721349056985737 0.1473981152803444 0
+517 -0.4815833562825206 0.1926225674481079 0
+518 -0.6105515200116438 0.06647411877902022 0
+519 -0.5101942784625602 0.08857137651614656 0
+520 -0.5604672741188895 0.07505559551467471 0
+521 0.1544427085925376 -0.009209785786451647 0
+522 0.3579276707365492 -0.1826403629447318 0
+523 -0.5769580974011784 0.2579592972468382 0
+524 -0.6601622524577578 0.06091406732579768 0
+525 0.1902592643906305 -0.246060359871225 0
+526 0.1765195159623006 -0.1797399253319203 0
+527 0.1872491382317506 -0.213044054513281 0
+528 0.1621511081378988 -0.1416974233908054 0
+529 -0.425591039222277 0.06911936575611695 0
+530 -0.3463160924911299 0.1140440878571036 0
+531 -0.141657377958181 0.1850732162869781 0
+532 0.2297912599016763 -0.2820981495222479 0
+533 0.2723590392103513 -0.2793383823651852 0
+534 -0.2733842766185236 -0.3260641459033662 0
+535 -0.2889290129607631 -0.3701547377263648 0
+536 0.04155799186107693 0.08615753036408318 0
+537 -0.3458894655207208 -0.3108387664856851 0
+538 0.1887693767942723 -0.2826680295938668 0
+539 -0.4715227914031501 0.1059034382805981 0
+$EndNodes
+$Elements
+602
+1 1 2 0 1 1 12
+2 1 2 0 1 12 13
+3 1 2 0 1 13 14
+4 1 2 0 1 14 15
+5 1 2 0 1 15 16
+6 1 2 0 1 16 17
+7 1 2 0 1 17 18
+8 1 2 0 1 18 19
+9 1 2 0 1 19 20
+10 1 2 0 1 20 21
+11 1 2 0 1 21 22
+12 1 2 0 1 22 23
+13 1 2 0 1 23 24
+14 1 2 0 1 24 25
+15 1 2 0 1 25 26
+16 1 2 0 1 26 27
+17 1 2 0 1 27 28
+18 1 2 0 1 28 29
+19 1 2 0 1 29 30
+20 1 2 0 1 30 31
+21 1 2 0 1 31 32
+22 1 2 0 1 32 33
+23 1 2 0 1 33 34
+24 1 2 0 1 34 35
+25 1 2 0 1 35 36
+26 1 2 0 1 36 37
+27 1 2 0 1 37 38
+28 1 2 0 1 38 39
+29 1 2 0 1 39 40
+30 1 2 0 1 40 41
+31 1 2 0 1 41 42
+32 1 2 0 1 42 2
+33 1 2 0 2 4 43
+34 1 2 0 2 43 44
+35 1 2 0 2 44 45
+36 1 2 0 2 45 46
+37 1 2 0 2 46 47
+38 1 2 0 2 47 48
+39 1 2 0 2 48 49
+40 1 2 0 2 49 50
+41 1 2 0 2 50 51
+42 1 2 0 2 51 52
+43 1 2 0 2 52 53
+44 1 2 0 2 53 54
+45 1 2 0 2 54 55
+46 1 2 0 2 55 56
+47 1 2 0 2 56 57
+48 1 2 0 2 57 2
+49 1 2 0 3 1 58
+50 1 2 0 3 58 59
+51 1 2 0 3 59 60
+52 1 2 0 3 60 61
+53 1 2 0 3 61 62
+54 1 2 0 3 62 63
+55 1 2 0 3 63 64
+56 1 2 0 3 64 65
+57 1 2 0 3 65 66
+58 1 2 0 3 66 67
+59 1 2 0 3 67 68
+60 1 2 0 3 68 69
+61 1 2 0 3 69 70
+62 1 2 0 3 70 71
+63 1 2 0 3 71 72
+64 1 2 0 3 72 3
+65 1 2 0 4 3 73
+66 1 2 0 4 73 74
+67 1 2 0 4 74 75
+68 1 2 0 4 75 76
+69 1 2 0 4 76 77
+70 1 2 0 4 77 78
+71 1 2 0 4 78 79
+72 1 2 0 4 79 80
+73 1 2 0 4 80 81
+74 1 2 0 4 81 82
+75 1 2 0 4 82 83
+76 1 2 0 4 83 84
+77 1 2 0 4 84 85
+78 1 2 0 4 85 86
+79 1 2 0 4 86 87
+80 1 2 0 4 87 88
+81 1 2 0 4 88 89
+82 1 2 0 4 89 90
+83 1 2 0 4 90 91
+84 1 2 0 4 91 92
+85 1 2 0 4 92 93
+86 1 2 0 4 93 94
+87 1 2 0 4 94 95
+88 1 2 0 4 95 96
+89 1 2 0 4 96 97
+90 1 2 0 4 97 98
+91 1 2 0 4 98 99
+92 1 2 0 4 99 100
+93 1 2 0 4 100 101
+94 1 2 0 4 101 102
+95 1 2 0 4 102 103
+96 1 2 0 4 103 4
+97 1 2 1 5 5 104
+98 1 2 1 5 104 105
+99 1 2 1 5 105 106
+100 1 2 1 5 106 6
+101 1 2 1 6 6 107
+102 1 2 1 6 107 108
+103 1 2 1 6 108 109
+104 1 2 1 6 109 7
+105 1 2 1 7 5 110
+106 1 2 1 7 110 111
+107 1 2 1 7 111 112
+108 1 2 1 7 112 8
+109 1 2 1 8 8 113
+110 1 2 1 8 113 114
+111 1 2 1 8 114 115
+112 1 2 1 8 115 7
+113 1 2 2 9 9 116
+114 1 2 2 9 116 117
+115 1 2 2 9 117 118
+116 1 2 2 9 118 10
+117 1 2 2 10 10 119
+118 1 2 2 10 119 120
+119 1 2 2 10 120 121
+120 1 2 2 10 121 11
+121 1 2 2 11 11 122
+122 1 2 2 11 122 123
+123 1 2 2 11 123 124
+124 1 2 2 11 124 9
+125 3 2 17 16 254 253 155 251
+126 3 2 17 16 10 214 217 119
+127 3 2 17 16 214 186 215 217
+128 3 2 17 16 217 215 179 216
+129 3 2 17 16 119 217 216 120
+130 3 2 17 16 186 214 220 219
+131 3 2 17 16 220 214 165 218
+132 3 2 17 16 219 220 218 185
+133 3 2 17 16 191 221 161 223
+134 3 2 17 16 223 161 222 188
+135 3 2 17 16 9 221 227 226
+136 3 2 17 16 221 191 224 227
+137 3 2 17 16 227 224 194 225
+138 3 2 17 16 226 227 225 166
+139 3 2 17 16 88 228 231 89
+140 3 2 17 16 228 137 229 231
+141 3 2 17 16 231 229 183 230
+142 3 2 17 16 89 231 230 90
+143 3 2 17 16 213 232 236 235
+144 3 2 17 16 232 207 233 236
+145 3 2 17 16 236 233 192 234
+146 3 2 17 16 235 236 234 209
+147 3 2 17 16 177 237 241 240
+148 3 2 17 16 237 167 238 241
+149 3 2 17 16 241 238 136 239
+150 3 2 17 16 240 241 239 157
+151 3 2 17 16 170 242 245 244
+152 3 2 17 16 245 242 141 243
+153 3 2 17 16 244 245 243 140
+154 3 2 17 16 168 246 250 249
+155 3 2 17 16 246 205 247 250
+156 3 2 17 16 250 247 212 248
+157 3 2 17 16 249 250 248 159
+158 3 2 17 16 251 168 252 254
+159 3 2 17 16 254 252 105 106
+160 3 2 17 16 253 254 106 6
+161 3 2 17 16 136 238 257 256
+162 3 2 17 16 238 167 255 257
+163 3 2 17 16 257 255 198 256
+164 3 2 17 16 194 258 261 225
+165 3 2 17 16 258 206 259 261
+166 3 2 17 16 261 259 126 260
+167 3 2 17 16 225 261 260 166
+168 3 2 17 16 160 262 265 264
+169 3 2 17 16 262 192 263 265
+170 3 2 17 16 265 263 198 255
+171 3 2 17 16 264 265 255 167
+172 3 2 17 16 197 266 269 268
+173 3 2 17 16 266 183 229 269
+174 3 2 17 16 269 229 137 267
+175 3 2 17 16 268 269 267 184
+176 3 2 17 16 159 270 273 272
+177 3 2 17 16 273 271 163 272
+178 3 2 17 16 184 267 275 274
+179 3 2 17 16 267 137 534 275
+180 3 2 17 16 275 534 8 112
+181 3 2 17 16 274 275 112 111
+182 3 2 17 16 123 122 278 277
+183 3 2 17 16 122 11 242 278
+184 3 2 17 16 278 242 170 276
+185 3 2 17 16 277 278 276 156
+186 3 2 17 16 136 279 281 280
+187 3 2 17 16 279 535 228 281
+188 3 2 17 16 281 228 88 87
+189 3 2 17 16 280 281 87 86
+190 3 2 17 16 138 282 285 284
+191 3 2 17 16 282 25 26 285
+192 3 2 17 16 285 26 27 283
+193 3 2 17 16 284 285 283 178
+194 3 2 17 16 159 286 288 270
+195 3 2 17 16 286 206 258 288
+196 3 2 17 16 288 258 194 287
+197 3 2 17 16 270 288 287 273
+198 3 2 17 16 153 289 293 292
+199 3 2 17 16 289 144 290 293
+200 3 2 17 16 293 290 162 291
+201 3 2 17 16 292 293 291 146
+202 3 2 17 16 141 242 295 294
+203 3 2 17 16 242 11 121 295
+204 3 2 17 16 295 121 120 216
+205 3 2 17 16 294 295 216 179
+206 3 2 17 16 168 249 296 252
+207 3 2 17 16 249 159 272 296
+208 3 2 17 16 296 272 5 104
+209 3 2 17 16 252 296 104 105
+210 3 2 17 16 146 291 300 299
+211 3 2 17 16 291 162 297 300
+212 3 2 17 16 300 297 199 298
+213 3 2 17 16 299 300 298 145
+214 3 2 17 16 90 230 303 91
+215 3 2 17 16 230 183 301 303
+216 3 2 17 16 303 301 134 302
+217 3 2 17 16 91 303 302 92
+218 3 2 17 16 132 304 308 307
+219 3 2 17 16 304 148 305 308
+220 3 2 17 16 308 305 185 306
+221 3 2 17 16 307 308 306 180
+222 3 2 17 16 114 256 311 115
+223 3 2 17 16 256 198 309 311
+224 3 2 17 16 311 309 128 310
+225 3 2 17 16 115 311 310 7
+226 3 2 17 16 141 312 314 243
+227 3 2 17 16 312 52 51 314
+228 3 2 17 16 314 51 50 313
+229 3 2 17 16 243 314 313 140
+230 3 2 17 16 209 315 318 235
+231 3 2 17 16 315 195 316 318
+232 3 2 17 16 318 316 208 317
+233 3 2 17 16 235 318 317 213
+234 3 2 17 16 185 218 321 306
+235 3 2 17 16 218 165 319 321
+236 3 2 17 16 321 319 190 320
+237 3 2 17 16 306 321 320 180
+238 3 2 17 16 204 322 325 324
+239 3 2 17 16 322 207 232 325
+240 3 2 17 16 325 232 213 323
+241 3 2 17 16 324 325 323 211
+242 3 2 17 16 190 326 328 327
+243 3 2 17 16 326 206 286 328
+244 3 2 17 16 328 286 159 248
+245 3 2 17 16 327 328 248 212
+246 3 2 17 16 140 313 331 330
+247 3 2 17 16 313 50 49 331
+248 3 2 17 16 331 49 48 329
+249 3 2 17 16 330 331 329 139
+250 3 2 17 16 147 332 334 333
+251 3 2 17 16 332 54 53 334
+252 3 2 17 16 334 53 52 312
+253 3 2 17 16 333 334 312 141
+254 3 2 17 16 185 305 337 219
+255 3 2 17 16 305 148 335 337
+256 3 2 17 16 337 335 149 336
+257 3 2 17 16 219 337 336 186
+258 3 2 17 16 202 338 342 341
+259 3 2 17 16 338 201 339 342
+260 3 2 17 16 342 339 171 340
+261 3 2 17 16 341 342 340 127
+262 3 2 17 16 1 12 345 58
+263 3 2 17 16 12 13 343 345
+264 3 2 17 16 345 343 143 344
+265 3 2 17 16 58 345 344 59
+266 3 2 17 16 172 346 349 348
+267 3 2 17 16 346 132 307 349
+268 3 2 17 16 349 307 180 347
+269 3 2 17 16 348 349 347 205
+270 3 2 17 16 207 322 353 352
+271 3 2 17 16 322 204 350 353
+272 3 2 17 16 353 350 182 351
+273 3 2 17 16 352 353 351 128
+274 3 2 17 16 150 354 356 355
+275 3 2 17 16 354 56 55 356
+276 3 2 17 16 356 55 54 332
+277 3 2 17 16 355 356 332 147
+278 3 2 17 16 56 354 358 57
+279 3 2 17 16 354 150 357 358
+280 3 2 17 16 358 357 41 42
+281 3 2 17 16 57 358 42 2
+282 3 2 17 16 7 310 359 109
+283 3 2 17 16 310 128 351 359
+284 3 2 17 16 359 351 182 529
+285 3 2 17 16 109 359 529 108
+286 3 2 17 16 172 348 362 361
+287 3 2 17 16 348 205 246 362
+288 3 2 17 16 362 246 168 360
+289 3 2 17 16 361 362 360 151
+290 3 2 17 16 172 363 365 346
+291 3 2 17 16 363 33 34 365
+292 3 2 17 16 365 34 35 364
+293 3 2 17 16 346 365 364 132
+294 3 2 17 16 192 262 368 234
+295 3 2 17 16 262 160 366 368
+296 3 2 17 16 368 366 199 367
+297 3 2 17 16 234 368 367 209
+298 3 2 17 16 148 304 370 369
+299 3 2 17 16 304 132 364 370
+300 3 2 17 16 370 364 35 36
+301 3 2 17 16 369 370 36 37
+302 3 2 17 16 44 43 373 372
+303 3 2 17 16 43 4 103 373
+304 3 2 17 16 373 103 102 371
+305 3 2 17 16 372 373 371 152
+306 3 2 17 16 169 374 378 377
+307 3 2 17 16 374 193 375 378
+308 3 2 17 16 378 375 125 376
+309 3 2 17 16 377 378 376 174
+310 3 2 17 16 194 224 380 287
+311 3 2 17 16 224 191 379 380
+312 3 2 17 16 380 379 163 271
+313 3 2 17 16 287 380 271 273
+314 3 2 17 16 139 329 383 382
+315 3 2 17 16 329 48 47 383
+316 3 2 17 16 383 47 46 381
+317 3 2 17 16 382 383 381 133
+318 3 2 17 16 152 384 385 372
+319 3 2 17 16 384 133 381 385
+320 3 2 17 16 385 381 46 45
+321 3 2 17 16 372 385 45 44
+322 3 2 17 16 191 223 389 388
+323 3 2 17 16 223 188 386 389
+324 3 2 17 16 389 386 202 387
+325 3 2 17 16 388 389 387 210
+326 3 2 17 16 71 390 392 72
+327 3 2 17 16 390 153 391 392
+328 3 2 17 16 392 391 74 73
+329 3 2 17 16 72 392 73 3
+330 3 2 17 16 173 393 396 395
+331 3 2 17 16 393 127 394 396
+332 3 2 17 16 396 394 96 95
+333 3 2 17 16 395 396 95 94
+334 3 2 17 16 171 397 398 340
+335 3 2 17 16 397 98 97 398
+336 3 2 17 16 398 97 96 394
+337 3 2 17 16 340 398 394 127
+338 3 2 17 16 138 399 401 282
+339 3 2 17 16 399 187 400 401
+340 3 2 17 16 401 400 23 24
+341 3 2 17 16 282 401 24 25
+342 3 2 17 16 149 335 403 402
+343 3 2 17 16 335 148 369 403
+344 3 2 17 16 403 369 37 38
+345 3 2 17 16 402 403 38 39
+346 3 2 17 16 142 404 407 406
+347 3 2 17 16 404 171 339 407
+348 3 2 17 16 407 339 201 405
+349 3 2 17 16 406 407 405 181
+350 3 2 17 16 197 268 410 409
+351 3 2 17 16 268 184 408 410
+352 3 2 17 16 410 408 163 379
+353 3 2 17 16 409 410 379 191
+354 3 2 17 16 127 393 412 341
+355 3 2 17 16 393 173 411 412
+356 3 2 17 16 412 411 210 387
+357 3 2 17 16 341 412 387 202
+358 3 2 17 16 133 384 415 414
+359 3 2 17 16 384 152 413 415
+360 3 2 17 16 415 413 142 406
+361 3 2 17 16 414 415 406 181
+362 3 2 17 16 176 416 420 419
+363 3 2 17 16 416 131 417 420
+364 3 2 17 16 420 417 189 418
+365 3 2 17 16 419 420 418 200
+366 3 2 17 16 153 390 422 289
+367 3 2 17 16 390 71 70 422
+368 3 2 17 16 422 70 69 421
+369 3 2 17 16 289 422 421 144
+370 3 2 17 16 199 297 424 367
+371 3 2 17 16 297 162 423 424
+372 3 2 17 16 424 423 195 315
+373 3 2 17 16 367 424 315 209
+374 3 2 17 16 150 425 426 357
+375 3 2 17 16 425 149 402 426
+376 3 2 17 16 426 402 39 40
+377 3 2 17 16 357 426 40 41
+378 3 2 17 16 195 423 429 428
+379 3 2 17 16 423 162 290 429
+380 3 2 17 16 429 290 144 427
+381 3 2 17 16 428 429 427 130
+382 3 2 17 16 169 430 433 374
+383 3 2 17 16 430 200 431 433
+384 3 2 17 16 433 431 211 432
+385 3 2 17 16 374 433 432 193
+386 3 2 17 16 173 434 435 411
+387 3 2 17 16 434 197 409 435
+388 3 2 17 16 435 409 191 388
+389 3 2 17 16 411 435 388 210
+390 3 2 17 16 144 421 437 427
+391 3 2 17 16 421 69 68 437
+392 3 2 17 16 437 68 67 436
+393 3 2 17 16 427 437 436 130
+394 3 2 17 16 171 404 439 397
+395 3 2 17 16 404 142 438 439
+396 3 2 17 16 439 438 100 99
+397 3 2 17 16 397 439 99 98
+398 3 2 17 16 143 440 442 344
+399 3 2 17 16 440 174 441 442
+400 3 2 17 16 442 441 61 60
+401 3 2 17 16 344 442 60 59
+402 3 2 17 16 152 371 443 413
+403 3 2 17 16 371 102 101 443
+404 3 2 17 16 443 101 100 438
+405 3 2 17 16 413 443 438 142
+406 3 2 17 16 130 444 446 428
+407 3 2 17 16 444 175 445 446
+408 3 2 17 16 446 445 208 316
+409 3 2 17 16 428 446 316 195
+410 3 2 17 16 139 382 449 448
+411 3 2 17 16 382 133 414 449
+412 3 2 17 16 449 414 181 447
+413 3 2 17 16 448 449 447 203
+414 3 2 17 16 173 395 451 450
+415 3 2 17 16 395 94 93 451
+416 3 2 17 16 451 93 92 302
+417 3 2 17 16 450 451 302 134
+418 3 2 17 16 192 233 452 263
+419 3 2 17 16 233 207 352 452
+420 3 2 17 16 452 352 128 309
+421 3 2 17 16 263 452 309 198
+422 3 2 17 16 67 66 454 436
+423 3 2 17 16 66 65 453 454
+424 3 2 17 16 454 453 175 444
+425 3 2 17 16 436 454 444 130
+426 3 2 17 16 193 455 457 375
+427 3 2 17 16 455 208 445 457
+428 3 2 17 16 457 445 175 456
+429 3 2 17 16 375 457 456 125
+430 3 2 17 16 172 361 459 363
+431 3 2 17 16 361 151 458 459
+432 3 2 17 16 459 458 31 32
+433 3 2 17 16 363 459 32 33
+434 3 2 17 16 206 326 461 259
+435 3 2 17 16 326 190 319 461
+436 3 2 17 16 461 319 165 460
+437 3 2 17 16 259 461 460 126
+438 3 2 17 16 174 376 463 441
+439 3 2 17 16 376 125 462 463
+440 3 2 17 16 463 462 63 62
+441 3 2 17 16 441 463 62 61
+442 3 2 17 16 153 292 465 391
+443 3 2 17 16 292 146 464 465
+444 3 2 17 16 465 464 76 75
+445 3 2 17 16 391 465 75 74
+446 3 2 17 16 208 455 466 317
+447 3 2 17 16 455 193 432 466
+448 3 2 17 16 466 432 211 323
+449 3 2 17 16 317 466 323 213
+450 3 2 17 16 63 462 467 64
+451 3 2 17 16 462 125 456 467
+452 3 2 17 16 467 456 175 453
+453 3 2 17 16 64 467 453 65
+454 3 2 17 16 176 468 471 470
+455 3 2 17 16 468 154 469 471
+456 3 2 17 16 471 469 15 16
+457 3 2 17 16 470 471 16 17
+458 3 2 17 16 143 343 473 472
+459 3 2 17 16 343 13 14 473
+460 3 2 17 16 473 14 15 469
+461 3 2 17 16 472 473 469 154
+462 3 2 17 16 5 272 474 110
+463 3 2 17 16 272 163 408 474
+464 3 2 17 16 474 408 184 274
+465 3 2 17 16 110 474 274 111
+466 3 2 17 16 141 294 476 333
+467 3 2 17 16 294 179 215 476
+468 3 2 17 16 476 215 186 475
+469 3 2 17 16 333 476 475 147
+470 3 2 17 16 177 477 480 479
+471 3 2 17 16 477 82 81 480
+472 3 2 17 16 480 81 80 478
+473 3 2 17 16 479 480 478 129
+474 3 2 17 16 147 475 481 355
+475 3 2 17 16 475 186 336 481
+476 3 2 17 16 481 336 149 425
+477 3 2 17 16 355 481 425 150
+478 3 2 17 16 155 283 484 483
+479 3 2 17 16 283 27 28 484
+480 3 2 17 16 484 28 29 482
+481 3 2 17 16 483 484 482 135
+482 3 2 17 16 174 440 486 377
+483 3 2 17 16 440 143 472 486
+484 3 2 17 16 486 472 154 485
+485 3 2 17 16 377 486 485 169
+486 3 2 17 16 176 470 488 416
+487 3 2 17 16 470 17 18 488
+488 3 2 17 16 488 18 19 487
+489 3 2 17 16 416 488 487 131
+490 3 2 17 16 283 155 253 178
+491 3 2 17 16 136 280 490 239
+492 3 2 17 16 280 86 85 490
+493 3 2 17 16 490 85 84 489
+494 3 2 17 16 239 490 489 157
+495 3 2 17 16 189 417 494 493
+496 3 2 17 16 417 131 491 494
+497 3 2 17 16 494 491 158 492
+498 3 2 17 16 493 494 492 164
+499 3 2 17 16 187 495 497 400
+500 3 2 17 16 495 158 496 497
+501 3 2 17 16 497 496 21 22
+502 3 2 17 16 400 497 22 23
+503 3 2 17 16 78 77 499 498
+504 3 2 17 16 77 76 464 499
+505 3 2 17 16 499 464 146 299
+506 3 2 17 16 498 499 299 145
+507 3 2 17 16 177 240 500 477
+508 3 2 17 16 240 157 489 500
+509 3 2 17 16 500 489 84 83
+510 3 2 17 16 477 500 83 82
+511 3 2 17 16 200 430 501 419
+512 3 2 17 16 430 169 485 501
+513 3 2 17 16 501 485 154 468
+514 3 2 17 16 419 501 468 176
+515 3 2 17 16 156 276 505 504
+516 3 2 17 16 276 170 502 505
+517 3 2 17 16 505 502 203 503
+518 3 2 17 16 504 505 503 196
+519 3 2 17 16 29 30 507 482
+520 3 2 17 16 30 31 458 507
+521 3 2 17 16 507 458 151 506
+522 3 2 17 16 482 507 506 135
+523 3 2 17 16 80 79 509 478
+524 3 2 17 16 79 78 498 509
+525 3 2 17 16 509 498 145 508
+526 3 2 17 16 478 509 508 129
+527 3 2 17 16 117 116 511 510
+528 3 2 17 16 116 9 226 511
+529 3 2 17 16 511 226 166 260
+530 3 2 17 16 510 511 260 126
+531 3 2 17 16 173 450 512 434
+532 3 2 17 16 450 134 301 512
+533 3 2 17 16 512 301 183 266
+534 3 2 17 16 434 512 266 197
+535 3 2 17 16 145 298 514 508
+536 3 2 17 16 298 199 366 514
+537 3 2 17 16 514 366 160 513
+538 3 2 17 16 508 514 513 129
+539 3 2 17 16 167 237 515 264
+540 3 2 17 16 237 177 479 515
+541 3 2 17 16 515 479 129 513
+542 3 2 17 16 264 515 513 160
+543 3 2 17 16 138 516 517 399
+544 3 2 17 16 516 164 492 517
+545 3 2 17 16 517 492 158 495
+546 3 2 17 16 399 517 495 187
+547 3 2 17 16 182 350 520 519
+548 3 2 17 16 350 204 518 520
+549 3 2 17 16 520 518 189 493
+550 3 2 17 16 519 520 493 164
+551 3 2 17 16 165 214 521 460
+552 3 2 17 16 214 10 118 521
+553 3 2 17 16 521 118 117 510
+554 3 2 17 16 460 521 510 126
+555 3 2 17 16 140 330 522 244
+556 3 2 17 16 330 139 448 522
+557 3 2 17 16 522 448 203 502
+558 3 2 17 16 244 522 502 170
+559 3 2 17 16 19 20 523 487
+560 3 2 17 16 20 21 496 523
+561 3 2 17 16 523 496 158 491
+562 3 2 17 16 487 523 491 131
+563 3 2 17 16 200 418 524 431
+564 3 2 17 16 418 189 518 524
+565 3 2 17 16 524 518 204 324
+566 3 2 17 16 431 524 324 211
+567 3 2 17 16 156 504 527 526
+568 3 2 17 16 504 196 525 527
+569 3 2 17 16 527 525 188 222
+570 3 2 17 16 526 527 222 161
+571 3 2 17 16 123 277 528 124
+572 3 2 17 16 277 156 526 528
+573 3 2 17 16 528 526 161 221
+574 3 2 17 16 124 528 221 9
+575 3 2 17 16 108 529 530 107
+576 3 2 17 16 529 138 284 530
+577 3 2 17 16 530 284 178 253
+578 3 2 17 16 107 530 253 6
+579 3 2 17 16 151 360 531 506
+580 3 2 17 16 360 168 251 531
+581 3 2 17 16 531 251 155 483
+582 3 2 17 16 506 531 483 135
+583 3 2 17 16 201 532 533 405
+584 3 2 17 16 532 196 503 533
+585 3 2 17 16 533 503 203 447
+586 3 2 17 16 405 533 447 181
+587 3 2 17 16 228 535 534 137
+588 3 2 17 16 180 320 536 347
+589 3 2 17 16 320 190 327 536
+590 3 2 17 16 536 327 212 247
+591 3 2 17 16 347 536 247 205
+592 3 2 17 16 114 113 537 256
+593 3 2 17 16 113 8 534 537
+594 3 2 17 16 537 534 535 279
+595 3 2 17 16 256 537 279 136
+596 3 2 17 16 201 338 538 532
+597 3 2 17 16 338 202 386 538
+598 3 2 17 16 538 386 188 525
+599 3 2 17 16 532 538 525 196
+600 3 2 17 16 529 182 519 539
+601 3 2 17 16 539 519 164 516
+602 3 2 17 16 529 539 516 138
+$EndElements
diff --git a/examples/step-5/CMakeLists.txt b/examples/step-5/CMakeLists.txt
new file mode 100644
index 0000000..7f3d861
--- /dev/null
+++ b/examples/step-5/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-5 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-5")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-5/TODO b/examples/step-5/TODO
new file mode 100644
index 0000000..09b0612
--- /dev/null
+++ b/examples/step-5/TODO
@@ -0,0 +1 @@
+Text following 414 is wrong
diff --git a/examples/step-5/circle-grid.inp b/examples/step-5/circle-grid.inp
new file mode 100644
index 0000000..f28a7a2
--- /dev/null
+++ b/examples/step-5/circle-grid.inp
@@ -0,0 +1,46 @@
+25 20 0 0 0
+1  -0.7071 -0.7071 0
+2  0.7071 -0.7071 0
+3  -0.2668 -0.2668 0
+4  0.2668 -0.2668 0
+5  -0.2668 0.2668 0
+6  0.2668 0.2668 0
+7  -0.7071 0.7071 0
+8  0.7071 0.7071 0
+9  0 -1 0
+10  0.5 -0.5 0
+11  0 -0.3139 0
+12  -0.5 -0.5 0
+13  0 -0.6621 0
+14  -0.3139 0 0
+15  -0.5 0.5 0
+16  -1 0 0
+17  -0.6621 0 0
+18  0.3139 0 0
+19  0 0.3139 0
+20  0 0 0
+21  1 0 0
+22  0.5 0.5 0
+23  0.6621 0 0
+24  0 1 0
+25  0 0.6621 0
+1 0 quad    1 9 13 12 
+2 0 quad    9 2 10 13 
+3 0 quad    13 10 4 11 
+4 0 quad    12 13 11 3 
+5 0 quad    1 12 17 16 
+6 0 quad    12 3 14 17 
+7 0 quad    17 14 5 15 
+8 0 quad    16 17 15 7 
+9 0 quad    3 11 20 14 
+10 0 quad    11 4 18 20 
+11 0 quad    20 18 6 19 
+12 0 quad    14 20 19 5 
+13 0 quad    2 21 23 10 
+14 0 quad    21 8 22 23 
+15 0 quad    23 22 6 18 
+16 0 quad    10 23 18 4 
+17 0 quad    7 15 25 24 
+18 0 quad    15 5 19 25 
+19 0 quad    25 19 6 22 
+20 0 quad    24 25 22 8 
diff --git a/examples/step-5/doc/builds-on b/examples/step-5/doc/builds-on
new file mode 100644
index 0000000..48a0f73
--- /dev/null
+++ b/examples/step-5/doc/builds-on
@@ -0,0 +1 @@
+step-4
diff --git a/examples/step-5/doc/intro.dox b/examples/step-5/doc/intro.dox
new file mode 100644
index 0000000..9e02ee8
--- /dev/null
+++ b/examples/step-5/doc/intro.dox
@@ -0,0 +1,74 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{14}
+
+This example does not show revolutionary new things, but it shows many
+small improvements over the previous examples, and also many small
+things that can usually be found in finite element programs. Among
+them are:
+<ul>
+  <li> Computations on successively refined grids. At least in the
+       mathematical sciences, it is common to compute solutions on
+       a hierarchy of grids, in order to get a feeling for the accuracy
+       of the solution; if you only have one solution on a single grid, you
+       usually can't guess the accuracy of the
+       solution. Furthermore, deal.II is designed to support adaptive
+       algorithms where iterative solution on successively refined
+       grids is at the heart of algorithms. Although adaptive grids
+       are not used in this example, the foundations for them is laid
+       here.
+  <li> In practical applications, the domains are often subdivided
+       into triangulations by automatic mesh generators. In order to
+       use them, it is important to read coarse grids from a file. In
+       this example, we will read a coarse grid in UCD (unstructured
+       cell data) format. When this program was first written around
+       2000, UCD format was what the AVS Explorer used -- a program
+       reasonably widely used at the time but now no longer of
+       importance. (Nonetheless, the file format has survived and is
+       still understood by a number of programs.)
+  <li> Finite element programs usually use extensive amounts of
+       computing time, so some optimizations are sometimes
+       necessary. We will show some of them.
+  <li> On the other hand, finite element programs tend to be rather
+       complex, so debugging is an important aspect. We support safe
+       programming by using assertions that check the validity of
+       parameters and %internal states in a debug mode, but are removed
+       in optimized mode. (@dealiiVideoLectureSeeAlso{18})
+  <li> Regarding the mathematical side, we show how to support a
+       variable coefficient in the elliptic operator and how to use
+       preconditioned iterative solvers for the linear systems of
+       equations.
+</ul>
+
+The equation to solve here is as follows:
+ at f{align*}
+  -\nabla \cdot a(\mathbf x) \nabla u(\mathbf x) &= 1 \qquad\qquad & \text{in}\ \Omega,
+  \\
+  u &= 0 \qquad\qquad & \text{on}\ \partial\Omega.
+ at f}
+If $a(\mathbf x)$ was a constant coefficient, this would simply be the Poisson
+equation. However, if it is indeed spatially variable, it is a more complex
+equation (often referred to as the "extended Poisson equation"). Depending on
+what the variable $u$ refers to it models a variety of situations with wide
+applicability:
+
+- If $u$ is the electric potential, then $-a\nabla u$ is the electric current
+  in a medium and the coefficient $a$ is the conductivity of the medium at any
+  given point. (In this situation, the right hand side of the equation would
+  be the electric source density and would usually be zero or consist of
+  localized, Delta-like, functions.)
+- If $u$ is the vertical deflection of a thin membrane, then $a$ would be a
+  measure of the local stiffness. This is the interpretation that will allow
+  us to interpret the images shown in the results section below.
+
+Since the Laplace/Poisson equation appears in so many contexts, there are many
+more interpretations than just the two listed above.
+
+When assembling the linear system for this equation, we need the weak form
+which here reads as follows:
+ at f{align*}
+  (a \nabla \varphi, \nabla u) &= (\varphi, 1) \qquad \qquad \forall \varphi.
+ at f}
+The implementation in the <code>assemble_system</code> function follows
+immediately from this.
diff --git a/examples/step-5/doc/kind b/examples/step-5/doc/kind
new file mode 100644
index 0000000..15a13db
--- /dev/null
+++ b/examples/step-5/doc/kind
@@ -0,0 +1 @@
+basic
diff --git a/examples/step-5/doc/results.dox b/examples/step-5/doc/results.dox
new file mode 100644
index 0000000..dec392e
--- /dev/null
+++ b/examples/step-5/doc/results.dox
@@ -0,0 +1,162 @@
+<h1>Results</h1>
+
+
+When the last block in <code>main()</code> is commented in, the output
+of the program looks as follows:
+ at code
+Cycle 0:
+   Number of active cells: 20
+   Total number of cells: 20
+   Number of degrees of freedom: 25
+   13 CG iterations needed to obtain convergence.
+Cycle 1:
+   Number of active cells: 80
+   Total number of cells: 100
+   Number of degrees of freedom: 89
+   18 CG iterations needed to obtain convergence.
+Cycle 2:
+   Number of active cells: 320
+   Total number of cells: 420
+   Number of degrees of freedom: 337
+   29 CG iterations needed to obtain convergence.
+Cycle 3:
+   Number of active cells: 1280
+   Total number of cells: 1700
+   Number of degrees of freedom: 1313
+   52 CG iterations needed to obtain convergence.
+Cycle 4:
+   Number of active cells: 5120
+   Total number of cells: 6820
+   Number of degrees of freedom: 5185
+   95 CG iterations needed to obtain convergence.
+Cycle 5:
+   Number of active cells: 20480
+   Total number of cells: 27300
+   Number of degrees of freedom: 20609
+   182 CG iterations needed to obtain convergence.
+--------------------------------------------------------
+An error occurred in line <273> of file <\step-5.cc> in function
+    void Coefficient<dim>::value_list(const std::vector<Point<dim>, std::allocator<Point<dim> > >&, std::vector<double, std::allocator<double> >&, unsigned int)
+ const [with int dim = 2]
+The violated condition was:
+    values.size() == points.size()
+The name and call sequence of the exception was:
+    ExcDimensionMismatch (values.size(), points.size())
+Additional Information:
+Dimension 1 not equal to 2
+
+Stacktrace:
+-----------
+#0  ./\step-5: Coefficient<2>::value_list(std::vector<Point<2>, std::allocator<Point<2> > > const&, std::vector<double, std::allocator<double> >&, unsigned) const
+#1  ./\step-5: main
+--------------------------------------------------------
+make: *** [run] Aborted
+ at endcode
+
+
+
+Let's first focus on the things before the error:
+In each cycle, the number of cells quadruples and the number of CG
+iterations roughly doubles.
+Also, in each cycle, the program writes one output graphic file in EPS
+format. They are depicted in the following:
+
+
+
+<TABLE WIDTH="100%">
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-5.solution-0.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-5.solution-1.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-5.solution-2.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-5.solution-3.png" alt="">
+    </td>
+  </tr>
+
+  <tr>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-5.solution-4.png" alt="">
+    </td>
+    <td>
+      <img src="http://www.dealii.org/images/steps/developer/step-5.solution-5.png" alt="">
+    </td>
+  </tr>
+</table>
+
+
+
+Due to the variable coefficient (the curvature there is reduced by the
+same factor by which the coefficient is increased), the top region of
+the solution is flattened. The gradient of the solution is
+discontinuous there, although this is not very clearly visible in the
+pictures above. We will look at this in more detail in the next
+example.
+
+
+
+
+As for the error — let's look at it again:
+ at code
+--------------------------------------------------------
+An error occurred in line <273> of file <\step-5.cc> in function
+    void Coefficient<dim>::value_list(const std::vector<Point<dim>, std::allocator<Point<dim> > >&, std::vector<double, std::allocator<double> >&, unsigned int)
+ const [with int dim = 2]
+The violated condition was:
+    values.size() == points.size()
+The name and call sequence of the exception was:
+    ExcDimensionMismatch (values.size(), points.size())
+Additional Information:
+Dimension 1 not equal to 2
+
+Stacktrace:
+-----------
+#0  ./\step-5: Coefficient<2>::value_list(std::vector<Point<2>, std::allocator<Point<2> > > const&, std::vector<double, std::allocator<double> >&, unsigned) const
+#1  ./\step-5: main
+--------------------------------------------------------
+make: *** [run] Aborted
+ at endcode
+
+
+
+What we see is that the error was triggered in line 273 of the
+step-5.cc file (as we modify tutorial programs over time, these line
+numbers change, so you should check what line number you actually get
+in your output). That's already good information if you want to look up
+in the code what exactly happened. But the text tells you even
+more. First, it prints the function this happens in, and then the
+plain text version of the condition that was violated. This will
+almost always be enough already to let you know what exactly went wrong.
+
+
+
+But that's not all yet. You get to see the name of the exception
+(<code>ExcDimensionMismatch</code>) and this exception even prints the
+values of the two array sizes. If you go back to the code in
+<code>main()</code>, you will remember that we gave the two variables
+sizes 1 and 2, which of course are the ones that you find in the
+output again.
+
+
+
+So now we know pretty exactly where the error happened and what went
+wrong. What we don't know yet is how exactly we got there. The
+stacktrace at the bottom actually tells us what happened: the problem
+happened in
+<code>Coefficient::value_list</code> (stackframe 0) and that it was
+called from <code>main()</code> (stackframe 1). In realistic programs,
+there would be many more functions in between these two. For example,
+we might have made the mistake in the <code>assemble_system</code>
+function, in which case stack frame 1 would be
+<code>Step5::assemble_system</code>, stack frame 2
+would be <code>Step5::run</code>, and stack frame 3
+would be <code>main()</code> — you get the idea.
+
diff --git a/examples/step-5/doc/tooltip b/examples/step-5/doc/tooltip
new file mode 100644
index 0000000..51b43ab
--- /dev/null
+++ b/examples/step-5/doc/tooltip
@@ -0,0 +1 @@
+Reading a grid from disk. Computations on successively refined grids. Variable coefficients.
diff --git a/examples/step-5/step-5.cc b/examples/step-5/step-5.cc
new file mode 100644
index 0000000..964a761
--- /dev/null
+++ b/examples/step-5/step-5.cc
@@ -0,0 +1,654 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 1999 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 1999
+ */
+
+
+// @sect3{Include files}
+
+// Again, the first few include files are already known, so we won't comment
+// on them:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+// This one is new. We want to read a triangulation from disk, and the class
+// which does this is declared in the following file:
+#include <deal.II/grid/grid_in.h>
+
+// We will use a circular domain, and the object describing the boundary of it
+// comes from this file:
+#include <deal.II/grid/manifold_lib.h>
+
+// This is C++ ...
+#include <fstream>
+#include <iostream>
+// ... and this is too: We will convert integers to strings using the C++
+// stringstream class <code>ostringstream</code>:
+#include <sstream>
+
+// Finally, this has been discussed in previous tutorial programs before:
+using namespace dealii;
+
+
+// @sect3{The <code>Step5</code> class template}
+
+// The main class is mostly as in the previous example. The most visible
+// change is that the function <code>make_grid_and_dofs</code> has been
+// removed, since creating the grid is now done in the <code>run</code>
+// function and the rest of its functionality is now in
+// <code>setup_system</code>. Apart from this, everything is as before.
+template <int dim>
+class Step5
+{
+public:
+  Step5 ();
+  void run ();
+
+private:
+  void setup_system ();
+  void assemble_system ();
+  void solve ();
+  void output_results (const unsigned int cycle) const;
+
+  Triangulation<dim>   triangulation;
+  FE_Q<dim>            fe;
+  DoFHandler<dim>      dof_handler;
+
+  SparsityPattern      sparsity_pattern;
+  SparseMatrix<double> system_matrix;
+
+  Vector<double>       solution;
+  Vector<double>       system_rhs;
+};
+
+
+// @sect3{Nonconstant coefficients, using <code>Assert</code>}
+
+// In step-4, we showed how to use non-constant boundary values and right hand
+// side.  In this example, we want to use a variable coefficient in the
+// elliptic operator instead. Of course, the suitable object is a
+// <code>Function</code>, as we have used for the right hand side and boundary
+// values in the last example. We will use it again, but we implement another
+// function <code>value_list</code> which takes a list of points and returns
+// the values of the function at these points as a list. The reason why such a
+// function is reasonable although we can get all the information from the
+// <code>value</code> function as well will be explained below when assembling
+// the matrix.
+//
+// The need to declare a seemingly useless default constructor exists here
+// just as in the previous example.
+template <int dim>
+class Coefficient : public Function<dim>
+{
+public:
+  Coefficient ()  : Function<dim>() {}
+
+  virtual double value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<double>            &values,
+                           const unsigned int              component = 0) const;
+};
+
+
+
+// This is the implementation of the coefficient function for a single
+// point. We let it return 20 if the distance to the origin is less than 0.5,
+// and 1 otherwise. As in the previous example, we simply ignore the second
+// parameter of the function that is used to denote different components of
+// vector-valued functions (we deal only with a scalar function here, after
+// all):
+template <int dim>
+double Coefficient<dim>::value (const Point<dim> &p,
+                                const unsigned int /*component*/) const
+{
+  if (p.square() < 0.5*0.5)
+    return 20;
+  else
+    return 1;
+}
+
+
+
+// And this is the function that returns the value of the coefficient at a
+// whole list of points at once. Of course, we need to make sure that the
+// values are the same as if we would ask the <code>value</code> function for
+// each point individually.
+//
+// This method takes three parameters: a list of points at which to evaluate
+// the function, a list that will hold the values at these points, and the
+// vector component that should be zero here since we only have a single
+// scalar function.  Now, of course the size of the output array
+// (<code>values</code>) must be the same as that of the input array
+// (<code>points</code>), and we could simply assume that. However, in
+// practice, it turns out that more than 90 per cent of programming errors are
+// invalid function parameters such as invalid array sizes, etc, so we should
+// try to make sure that the parameters are valid. For this, the
+// <code>Assert</code> macro is a good means, since it makes sure that the
+// condition which is given as first argument is valid, and if not throws an
+// exception (its second argument) which will usually terminate the program
+// giving information where the error occurred and what the reason was. This
+// generally reduces the time to find programming errors dramatically and we
+// have found assertions an invaluable means to program fast.
+//
+// On the other hand, all these checks (there are more than 4200 of them in
+// the library at present) should not slow down the program too much if you
+// want to do large computations. To this end, the <code>Assert</code> macro
+// is only used in debug mode and expands to nothing if in optimized
+// mode. Therefore, while you test your program on small problems and debug
+// it, the assertions will tell you where the problems are.  Once your program
+// is stable, you can switch off debugging and the program will run your real
+// computations without the assertions and at maximum speed. (In fact, it
+// turns out the switching off all the checks in the library that prevent you
+// from calling functions with the wrong arguments by switching to optimized
+// mode, makes most programs run faster by about a factor of four. This
+// should, however, not try to induce you to always run in optimized mode:
+// Most people who have tried that soon realize that they introduce lots of
+// errors that would have easily been caught had they run the program in debug
+// mode while developing.) For those who want to try: The way to switch from
+// debug mode to optimized mode is to go edit the Makefile in this
+// directory. It should have a line <code>debug-mode = on</code>; simply
+// replace it by <code>debug-mode = off</code> and recompile your program. The
+// output of the <code>make</code> program should already indicate to you that
+// the program is now compiled in optimized mode, and it will later also be
+// linked to libraries that have been compiled for optimized mode.
+//
+// Here, as has been said above, we would like to make sure that the size of
+// the two arrays is equal, and if not throw an exception. Comparing the sizes
+// of two arrays is one of the most frequent checks, which is why there is
+// already an exception class <code>ExcDimensionMismatch</code> that takes the
+// sizes of two vectors and prints some output in case the condition is
+// violated:
+
+template <int dim>
+void Coefficient<dim>::value_list (const std::vector<Point<dim> > &points,
+                                   std::vector<double>            &values,
+                                   const unsigned int              component) const
+{
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch (values.size(), points.size()));
+  // Since examples are not very good if they do not demonstrate their point,
+  // we will show how to trigger this exception at the end of the main
+  // program, and what output results from this (see the <code>Results</code>
+  // section of this example program). You will certainly notice that the
+  // output is quite well suited to quickly find what the problem is and what
+  // parameters are expected. An additional plus is that if the program is run
+  // inside a debugger, it will stop at the point where the exception is
+  // triggered, so you can go up the call stack to immediately find the place
+  // where the the array with the wrong size was set up.
+
+  // While we're at it, we can do another check: the coefficient is a scalar,
+  // but the <code>Function</code> class also represents vector-valued
+  // function. A scalar function must therefore be considered as a
+  // vector-valued function with only one component, so the only valid
+  // component for which a user might ask is zero (we always count from
+  // zero). The following assertion checks this. If the condition in the
+  // <code>Assert</code> call is violated, an exception of type
+  // <code>ExcRange</code> will be triggered; that class takes the violating
+  // index as first argument, and the second and third arguments denote a
+  // range that includes the left point but is open at the right, i.e. here
+  // the interval [0,1). For integer arguments, this means that the only value
+  // in the range is the zero, of course. (The interval is half open since we
+  // also want to write exceptions like <code>ExcRange(i,0,v.size())</code>,
+  // where an index must be between zero but less than the size of an
+  // array. To save us the effort of writing <code>v.size()-1</code> in many
+  // places, the range is defined as half-open.)
+  Assert (component == 0,
+          ExcIndexRange (component, 0, 1));
+
+  // The rest of the function is uneventful: we define <code>n_q_points</code>
+  // as an abbreviation for the number of points for which function values are
+  // requested, and then simply fill the output value:
+  const unsigned int n_points = points.size();
+
+  for (unsigned int i=0; i<n_points; ++i)
+    {
+      if (points[i].square() < 0.5*0.5)
+        values[i] = 20;
+      else
+        values[i] = 1;
+    }
+}
+
+
+// @sect3{The <code>Step5</code> class implementation}
+
+// @sect4{Step5::Step5}
+
+// This function is as before.
+template <int dim>
+Step5<dim>::Step5 () :
+  fe (1),
+  dof_handler (triangulation)
+{}
+
+
+
+// @sect4{Step5::setup_system}
+
+// This is the function <code>make_grid_and_dofs</code> from the previous
+// example, minus the generation of the grid. Everything else is unchanged:
+template <int dim>
+void Step5<dim>::setup_system ()
+{
+  dof_handler.distribute_dofs (fe);
+
+  std::cout << "   Number of degrees of freedom: "
+            << dof_handler.n_dofs()
+            << std::endl;
+
+  DynamicSparsityPattern dsp(dof_handler.n_dofs());
+  DoFTools::make_sparsity_pattern (dof_handler, dsp);
+  sparsity_pattern.copy_from(dsp);
+
+  system_matrix.reinit (sparsity_pattern);
+
+  solution.reinit (dof_handler.n_dofs());
+  system_rhs.reinit (dof_handler.n_dofs());
+}
+
+
+
+// @sect4{Step5::assemble_system}
+
+// As in the previous examples, this function is not changed much with regard
+// to its functionality, but there are still some optimizations which we will
+// show. For this, it is important to note that if efficient solvers are used
+// (such as the preconditions CG method), assembling the matrix and right hand
+// side can take a comparable time, and you should think about using one or
+// two optimizations at some places.
+//
+// What we will show here is how we can avoid calls to the shape_value,
+// shape_grad, and quadrature_point functions of the FEValues object, and in
+// particular optimize away most of the virtual function calls of the Function
+// object. The way to do so will be explained in the following, while those
+// parts of this function that are not changed with respect to the previous
+// example are not commented on.
+//
+// The first parts of the function are completely unchanged from before:
+template <int dim>
+void Step5<dim>::assemble_system ()
+{
+  QGauss<dim>  quadrature_formula(2);
+
+  FEValues<dim> fe_values (fe, quadrature_formula,
+                           update_values    |  update_gradients |
+                           update_quadrature_points  |  update_JxW_values);
+
+  const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int   n_q_points    = quadrature_formula.size();
+
+  FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+  Vector<double>       cell_rhs (dofs_per_cell);
+
+  std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+  // Here is one difference: for this program, we will again use a constant
+  // right hand side function and zero boundary values, but a variable
+  // coefficient. We have already declared the class that represents this
+  // coefficient above, so we only have to declare a corresponding object
+  // here.
+  //
+  // Then, below, we will ask the <code>coefficient</code> function object to
+  // compute the values of the coefficient at all quadrature points on one
+  // cell at once. The reason for this is that, if you look back at how we did
+  // this in step-4, you will realize that we called the function computing
+  // the right hand side value inside nested loops over all degrees of freedom
+  // and over all quadrature points, i.e. dofs_per_cell*n_q_points times. For
+  // the coefficient that is used inside the matrix, this would actually be
+  // dofs_per_cell*dofs_per_cell*n_q_points. On the other hand, the function
+  // will of course return the same value every time it is called with the
+  // same quadrature point, independently of what shape function we presently
+  // treat; secondly, these are virtual function calls, so are rather
+  // expensive. Obviously, there are only n_q_point different values, and we
+  // shouldn't call the function more often than that. Or, even better than
+  // this, compute all of these values at once, and get away with a single
+  // function call per cell.
+  //
+  // This is exactly what we are going to do. For this, we need some space to
+  // store the values in. We therefore also have to declare an array to hold
+  // these values:
+  const Coefficient<dim> coefficient;
+  std::vector<double>    coefficient_values (n_q_points);
+
+  // Next is the typical loop over all cells to compute local contributions
+  // and then to transfer them into the global matrix and vector.
+  //
+  // The only two things in which this loop differs from step-4 is that we
+  // want to compute the value of the coefficient in all quadrature points on
+  // the present cell at the beginning, and then use it in the computation of
+  // the local contributions. This is what we do in the call to
+  // <code>coefficient.value_list</code> in the fourth line of the loop.
+  //
+  // The second change is how we make use of this coefficient in computing the
+  // cell matrix contributions. This is in the obvious way, and not worth more
+  // comments. For the right hand side, we use a constant value again.
+  typename DoFHandler<dim>::active_cell_iterator
+  cell = dof_handler.begin_active(),
+  endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      cell_matrix = 0;
+      cell_rhs = 0;
+
+      fe_values.reinit (cell);
+
+      coefficient.value_list (fe_values.get_quadrature_points(),
+                              coefficient_values);
+
+      for (unsigned int q_index=0; q_index<n_q_points; ++q_index)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              cell_matrix(i,j) += (coefficient_values[q_index] *
+                                   fe_values.shape_grad(i,q_index) *
+                                   fe_values.shape_grad(j,q_index) *
+                                   fe_values.JxW(q_index));
+
+            cell_rhs(i) += (fe_values.shape_value(i,q_index) *
+                            1.0 *
+                            fe_values.JxW(q_index));
+          }
+
+
+      cell->get_dof_indices (local_dof_indices);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        {
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            system_matrix.add (local_dof_indices[i],
+                               local_dof_indices[j],
+                               cell_matrix(i,j));
+
+          system_rhs(local_dof_indices[i]) += cell_rhs(i);
+        }
+    }
+
+  // With the matrix so built, we use zero boundary values again:
+  std::map<types::global_dof_index,double> boundary_values;
+  VectorTools::interpolate_boundary_values (dof_handler,
+                                            0,
+                                            ZeroFunction<dim>(),
+                                            boundary_values);
+  MatrixTools::apply_boundary_values (boundary_values,
+                                      system_matrix,
+                                      solution,
+                                      system_rhs);
+}
+
+
+// @sect4{Step5::solve}
+
+// The solution process again looks mostly like in the previous
+// examples. However, we will now use a preconditioned conjugate gradient
+// algorithm. It is not very difficult to make this change. In fact, the only
+// thing we have to alter is that we need an object which will act as a
+// preconditioner. We will use SSOR (symmetric successive overrelaxation),
+// with a relaxation factor of 1.2. For this purpose, the
+// <code>SparseMatrix</code> class has a function which does one SSOR step,
+// and we need to package the address of this function together with the
+// matrix on which it should act (which is the matrix to be inverted) and the
+// relaxation factor into one object. The <code>PreconditionSSOR</code> class
+// does this for us. (<code>PreconditionSSOR</code> class takes a template
+// argument denoting the matrix type it is supposed to work on. The default
+// value is <code>SparseMatrix@<double@></code>, which is exactly what we need
+// here, so we simply stick with the default and do not specify anything in
+// the angle brackets.)
+//
+// Note that for the present case, SSOR doesn't really perform much better
+// than most other preconditioners (though better than no preconditioning at
+// all). A brief comparison of different preconditioners is presented in the
+// Results section of the next tutorial program, step-6.
+//
+// With this, the rest of the function is trivial: instead of the
+// <code>PreconditionIdentity</code> object we have created before, we now use
+// the preconditioner we have declared, and the CG solver will do the rest for
+// us:
+template <int dim>
+void Step5<dim>::solve ()
+{
+  SolverControl           solver_control (1000, 1e-12);
+  SolverCG<>              solver (solver_control);
+
+  PreconditionSSOR<> preconditioner;
+  preconditioner.initialize(system_matrix, 1.2);
+
+  solver.solve (system_matrix, solution, system_rhs,
+                preconditioner);
+
+  std::cout << "   " << solver_control.last_step()
+            << " CG iterations needed to obtain convergence."
+            << std::endl;
+}
+
+
+// @sect4{Step5::output_results and setting output flags}
+
+// Writing output to a file is mostly the same as for the previous example,
+// but here we will show how to modify some output options and how to
+// construct a different filename for each refinement cycle.
+template <int dim>
+void Step5<dim>::output_results (const unsigned int cycle) const
+{
+  DataOut<dim> data_out;
+
+  data_out.attach_dof_handler (dof_handler);
+  data_out.add_data_vector (solution, "solution");
+
+  data_out.build_patches ();
+
+  // For this example, we would like to write the output directly to a file in
+  // Encapsulated Postscript (EPS) format. The library supports this, but
+  // things may be a bit more difficult sometimes, since EPS is a printing
+  // format, unlike most other supported formats which serve as input for
+  // graphical tools. Therefore, you can't scale or rotate the image after it
+  // has been written to disk, and you have to decide about the viewpoint or
+  // the scaling in advance.
+  //
+  // The defaults in the library are usually quite reasonable, and regarding
+  // viewpoint and scaling they coincide with the defaults of
+  // Gnuplot. However, since this is a tutorial, we will demonstrate how to
+  // change them. For this, we first have to generate an object describing the
+  // flags for EPS output (similar flag classes exist for all supported output
+  // formats):
+  DataOutBase::EpsFlags eps_flags;
+  // They are initialized with the default values, so we only have to change
+  // those that we don't like. For example, we would like to scale the z-axis
+  // differently (stretch each data point in z-direction by a factor of four):
+  eps_flags.z_scaling = 4;
+  // Then we would also like to alter the viewpoint from which we look at the
+  // solution surface. The default is at an angle of 60 degrees down from the
+  // vertical axis, and 30 degrees rotated against it in mathematical positive
+  // sense. We raise our viewpoint a bit and look more along the y-axis:
+  eps_flags.azimut_angle = 40;
+  eps_flags.turn_angle   = 10;
+  // That shall suffice. There are more flags, for example whether to draw the
+  // mesh lines, which data vectors to use for colorization of the interior of
+  // the cells, and so on. You may want to take a look at the documentation of
+  // the EpsFlags structure to get an overview of what is possible.
+  //
+  // The only thing still to be done, is to tell the output object to use
+  // these flags:
+  data_out.set_flags (eps_flags);
+  // The above way to modify flags requires recompilation each time we would
+  // like to use different flags. This is inconvenient, and we will see more
+  // advanced ways in step-19 where the output flags are determined at run
+  // time using an input file (step-19 doesn't show many other things; you
+  // should feel free to read over it even if you haven't done step-6 to
+  // step-18 yet).
+
+  // Finally, we need the filename to which the results are to be written. We
+  // would like to have it of the form <code>solution-N.eps</code>, where N is
+  // the number of the refinement cycle. Thus, we have to convert an integer
+  // to a part of a string; this can be done using the <code>sprintf</code>
+  // function, but in C++ there is a more elegant way: write everything into a
+  // special stream (just like writing into a file or to the screen) and
+  // retrieve what you wrote as a string. This applies the usual conversions
+  // from integer to strings, and one could as well use stream modifiers such
+  // as <code>setw</code>, <code>setprecision</code>, and so on. In C++, you
+  // can do this by using the so-called stringstream classes:
+  std::ostringstream filename;
+
+  // In order to now actually generate a filename, we fill the stringstream
+  // variable with the base of the filename, then the number part, and finally
+  // the suffix indicating the file type:
+  filename << "solution-"
+           << cycle
+           << ".eps";
+
+  // We can get whatever we wrote to the stream using the <code>str()</code>
+  // function. The result is a string which we have to convert to a char*
+  // using the <code>c_str()</code> function. Use that as filename for the
+  // output stream and then write the data to the file:
+  std::ofstream output (filename.str().c_str());
+
+  data_out.write_eps (output);
+}
+
+
+
+// @sect4{Step5::run}
+
+// The second to last thing in this program is the definition of the
+// <code>run()</code> function. In contrast to the previous programs, we will
+// compute on a sequence of meshes that after each iteration is globally
+// refined. The function therefore consists of a loop over 6 cycles. In each
+// cycle, we first print the cycle number, and then have to decide what to do
+// with the mesh. If this is not the first cycle, we simply refine the
+// existing mesh once globally. Before running through these cycles, however,
+// we have to generate a mesh:
+
+// In previous examples, we have already used some of the functions from the
+// <code>GridGenerator</code> class. Here we would like to read a grid from a
+// file where the cells are stored and which may originate from someone else,
+// or may be the product of a mesh generator tool.
+//
+// In order to read a grid from a file, we generate an object of data type
+// GridIn and associate the triangulation to it (i.e. we tell it to fill our
+// triangulation object when we ask it to read the file). Then we open the
+// respective file and initialize the triangulation with the data in the file:
+template <int dim>
+void Step5<dim>::run ()
+{
+  GridIn<dim> grid_in;
+  grid_in.attach_triangulation (triangulation);
+  std::ifstream input_file("circle-grid.inp");
+  // We would now like to read the file. However, the input file is only for a
+  // two-dimensional triangulation, while this function is a template for
+  // arbitrary dimension. Since this is only a demonstration program, we will
+  // not use different input files for the different dimensions, but rather
+  // kill the whole program if we are not in 2D:
+  Assert (dim==2, ExcInternalError());
+  // ExcInternalError is a globally defined exception, which may be thrown
+  // whenever something is terribly wrong. Usually, one would like to use more
+  // specific exceptions, and particular in this case one would of course try
+  // to do something else if <code>dim</code> is not equal to two, e.g. create
+  // a grid using library functions. Aborting a program is usually not a good
+  // idea and assertions should really only be used for exceptional cases
+  // which should not occur, but might due to stupidity of the programmer,
+  // user, or someone else. The situation above is not a very clever use of
+  // Assert, but again: this is a tutorial and it might be worth to show what
+  // not to do, after all.
+
+  // So if we got past the assertion, we know that dim==2, and we can now
+  // actually read the grid. It is in UCD (unstructured cell data) format (though
+  // the convention is to use the suffix <code>inp</code> for UCD files):
+  grid_in.read_ucd (input_file);
+  // If you like to use another input format, you have to use one of the other
+  // <code>grid_in.read_xxx</code> function. (See the documentation of the
+  // <code>GridIn</code> class to find out what input formats are presently
+  // supported.)
+
+  // The grid in the file describes a circle. Therefore we have to use
+  // a manifold object which tells the triangulation where to put new
+  // points on the boundary when the grid is refined. This works in
+  // the same way as in the first example, but in this case we only
+  // set the manifold ids of the boundary.
+  static const SphericalManifold<dim> boundary;
+  triangulation.set_all_manifold_ids_on_boundary(0);
+  triangulation.set_manifold (0, boundary);
+
+  for (unsigned int cycle=0; cycle<6; ++cycle)
+    {
+      std::cout << "Cycle " << cycle << ':' << std::endl;
+
+      if (cycle != 0)
+        triangulation.refine_global (1);
+
+      // Now that we have a mesh for sure, we write some output and do all the
+      // things that we have already seen in the previous examples.
+      std::cout << "   Number of active cells: "
+                << triangulation.n_active_cells()
+                << std::endl
+                << "   Total number of cells: "
+                << triangulation.n_cells()
+                << std::endl;
+
+      setup_system ();
+      assemble_system ();
+      solve ();
+      output_results (cycle);
+    }
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// The main function looks mostly like the one in the previous example, so we
+// won't comment on it further:
+int main ()
+{
+  Step5<2> laplace_problem_2d;
+  laplace_problem_2d.run ();
+
+  // Finally, we have promised to trigger an exception in the
+  // <code>Coefficient</code> class through the <code>Assert</code> macro we
+  // have introduced there. For this, we have to call its
+  // <code>value_list</code> function with two arrays of different size (the
+  // number in parentheses behind the declaration of the object). We have
+  // commented out these lines in order to allow the program to exit
+  // gracefully in normal situations (we use the program in day-to-day testing
+  // of changes to the library as well), so you will only get the exception by
+  // un-commenting the following lines. Take a look at the Results section of
+  // the program to see what happens when the code is actually run:
+  /*
+    Coefficient<2>    coefficient;
+    std::vector<Point<2> > points (2);
+    std::vector<double>    coefficient_values (1);
+    coefficient.value_list (points, coefficient_values);
+  */
+
+  return 0;
+}
diff --git a/examples/step-51/CMakeLists.txt b/examples/step-51/CMakeLists.txt
new file mode 100644
index 0000000..aad640c
--- /dev/null
+++ b/examples/step-51/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-7 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-51")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#   FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#   FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#   SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-51/doc/builds-on b/examples/step-51/doc/builds-on
new file mode 100644
index 0000000..f1818eb
--- /dev/null
+++ b/examples/step-51/doc/builds-on
@@ -0,0 +1 @@
+step-7 step-9
diff --git a/examples/step-51/doc/intro.dox b/examples/step-51/doc/intro.dox
new file mode 100644
index 0000000..0bca103
--- /dev/null
+++ b/examples/step-51/doc/intro.dox
@@ -0,0 +1,350 @@
+<br>
+
+<i>
+This program was contributed by Martin Kronbichler and Scott Miller.
+</i>
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+This tutorial program presents the implementation of a hybridizable
+discontinuous Galkerin method for the convection-diffusion equation.
+
+<h3>  Hybridizable discontinuous Galerkin methods</h3>
+
+One common argument against the use of discontinuous Galerkin elements
+is the large number of globally coupled degrees of freedom that one
+must solve in an implicit system.  This is because, unlike continuous finite
+elements, in typical discontinuous elements there is one degree of freedom at
+each vertex <i>for each of the adjacent elements</i>, rather than just one,
+and similarly for edges and faces.  As an example of how fast the number of
+unknowns grows,
+consider the <code>FE_DGP_Monomial</code> basis:  each
+scalar solution component is represented by polynomials of degree $p$
+with $(1/dim!)*\prod_{i=1}^{dim}(p+i)$ degrees of freedom per
+element. Typically, all degrees of freedom in an element are coupled
+to all of the degrees of freedom in the adjacent elements.  The resulting
+discrete equations yield very large linear systems very quickly, especially
+for systems of equations in 2 or 3 dimensions.
+
+<h4> Reducing the size of the linear system </h4>
+To alleviate the computational cost of solving such large linear systems,
+the hybridizable discontinuous Galerkin (HDG) methodology was introduced
+by Cockburn and co-workers
+(see the references in the recent HDG overview article by
+ N.C. Nguyen and J. Peraire:
+   <i>Hybridizable discontinuous Galerkin methods for partial differential
+   equations in continuum mechanics</i>, Journal of Computational Physics,
+   2012, 231:18, 5955-5988.
+   <a href="http://dx.doi.org/10.1016/j.jcp.2012.02.033">[DOI]</a>).
+The HDG method achieves
+this goal by formulating the mathematical problem using Dirichlet-to-Neumann
+mappings.  The partial differential equations are first written as a first
+order system, and each field is then discretized via a DG method.  At this
+point the  single-valued "trace" values on the skeleton of the
+mesh, i.e. element faces, are taken to be independent unknown quantities.
+The Dirichlet-to-Neumann map concept then permits the following solution procedure:
+<ol>
+  <li>  Use local element interior data to enforce a Neumann condition on the
+skeleton of the triangulation.  The global problem is then to solve for the
+trace values, which are the only globally coupled unknowns.
+  <li>  Use the known skeleton values as Dirichlet data for solving local
+element-level solutions.  This is known as the
+'local solver', and is an <i>embarrassingly parallel</i> element-by-element
+solution process.
+</ol>
+
+The above procedure also has a linear algebra interpretation and is referred to
+as static condensation. Let us write the complete linear system associated to
+the HDG problem as a block system with the discrete DG variables $U$ as
+first block and the skeleton variables $\Lambda$ as the second block:
+ at f{eqnarray*}
+\begin{pmatrix} A & B \\ C & D \end{pmatrix}
+\begin{pmatrix} U \\ \Lambda \end{pmatrix}
+=
+\begin{pmatrix} F \\ G \end{pmatrix}.
+ at f}
+Our aim is now to eliminate the $U$ block with a Schur complement
+approach similar to step-20, which results in the following two steps:
+ at f{eqnarray*}
+(D - C A^{-1} B) \Lambda &=& G - C A^{-1} F, \\
+A U &=& F - B \Lambda.
+ at f}
+The point is that the presence of $A^{-1}$ is not a problem because $A$ is a
+block diagonal matrix where each block corresponds to one cell and is
+therefore easy enough to invert.
+The coupling to other cells is introduced by the matrices
+$B$ and $C$ over the skeleton variable. The block-diagonality of
+$A$ and the structure in $B$ and $C$ allow us to invert the
+matrix $A$ element by element (the local solution of the Dirichlet
+problem) and subtract $CA^{-1}B$ from $D$. The steps in the Dirichlet-to-Neumann map concept hence correspond to
+<ol>
+  <li> constructing the Schur complement matrix $D-C A^{-1} B$ and right hand side $G - C A^{-1} F$  <i>locally on each cell</i>
+  and inserting the contribution into the global trace matrix in the usual way,
+  <li> solving the Schur complement system for $\Lambda$, and
+  <li> solving for $U$ using the second equation, given $\Lambda$.
+</ol>
+
+
+<h4> Solution quality and rates of convergence</h4>
+Another criticism of traditional DG methods is that the approximate fluxes
+converge suboptimally.  The local HDG solutions can be shown to converge
+as $\mathcal{O}(h^{p+1})$, i.e., at optimal order.  Additionally, a
+super-convergence property can be used to post-process a new approximate
+solution that converges at the rate $\mathcal{O}(h^{p+2})$.
+
+
+
+<h3> HDG applied to the convection-diffusion problem </h3>
+
+The HDG formulation used for this example is taken from
+<br>
+<b>
+  N.C. Nguyen, J. Peraire, B. Cockburn:
+  <i>An implicit high-order hybridizable discontinuous Galerkin method
+  for linear convection–diffusion equations</i>,
+  Journal of Computational Physics, 2009, 228:9, 3232-3254.
+  <a href="http://dx.doi.org/10.1016/j.jcp.2009.01.030">[DOI]</a>
+</b>
+
+We consider the convection-diffusion equation over the domain $\Omega$
+with Dirichlet boundary $\partial \Omega_D$ and Neumann boundary
+$\partial \Omega_N$:
+ at f{eqnarray*}
+	\nabla \cdot (\mathbf{c} u) - \nabla \cdot (\kappa \nabla u) &=& f,
+	\quad \text{ in } \Omega, \\
+	u &=& g_D, \quad \text{ on } \partial \Omega_D, \\
+	(\mathbf{c} u - \kappa \nabla u)\cdot \mathbf{n} &=& g_N,
+	\quad \text{ on }  \partial \Omega_N.
+ at f}
+
+Introduce the auxiliary variable $\mathbf{q}=-\kappa \nabla u$ and rewrite
+the above equation as the first order system:
+ at f{eqnarray*}
+  \mathbf{q} + \kappa \nabla u &=& 0, \quad \text{ in } \Omega, \\
+  \nabla \cdot (\mathbf{c} u + \mathbf{q}) &=& f, \quad \text{ in } \Omega, \\
+  u &=& g_D, \quad \text{ on } \partial \Omega_D, \\
+  (\mathbf{q} + \mathbf{c}u)\cdot\mathbf{n}  &=& g_N,
+	\quad \text{ on }  \partial \Omega_N.
+ at f}
+
+We multiply these equations by the weight functions $\mathbf{v}, w$
+and integrate by parts over every element $K$ to obtain:
+ at f{eqnarray*}
+  (\mathbf{v}, \kappa^{-1} \mathbf{q})_K - (\nabla\cdot\mathbf{v}, u)_K
+    + \left<\mathbf{v}\cdot\mathbf{n}, \hat{u}\right>_{\partial K} &=& 0, \\
+  - (\nabla w, \mathbf{c} u + \mathbf{q})_K
+    + \left<(w, \widehat{\mathbf{c} u}+\hat{\mathbf{q}})\cdot\mathbf{n}\right>_{\partial K}
+    &=& (w,f)_K.
+ at f}
+
+The terms decorated with a hat denote the numerical traces (also commonly referred
+to as numerical fluxes).  They are approximations
+to the interior values on the boundary of the element.  To ensure conservation,
+these terms must be single-valued on any given element edge $\partial K$ even
+though, with discontinuous shape functions, there may of course be multiple
+values coming from the cells adjacent to an interface.
+We eliminate the numerical trace $\hat{\mathbf{q}}$ by using traces of the form:
+ at f{eqnarray*}
+  \widehat{\mathbf{c} u}+\hat{\mathbf{q}} = \mathbf{c}\hat{u} + \mathbf{q}
+  + \tau(u - \hat{u})\mathbf{n} \quad \text{ on } \partial K.
+ at f}
+
+The variable $\hat {u}$ is introduced as an additional independent variable
+and is the one for which we finally set up a globally coupled linear
+system. As mentioned above, it is defined on the element faces and
+discontinuous from one face to another wherever faces meet (at
+vertices in 2d, and at edges and vertices in 3d).
+Values for $u$ and $\mathbf{q}$ appearing in the numerical trace function
+are taken to be the cell's interior solution restricted
+to the boundary $\partial K$.
+
+The local stabilization parameter $\tau$ has effects on stability and accuracy
+of HDG solutions; see the literature for a further discussion. A stabilization
+parameter of unity is reported to be the choice which gives best results. A
+stabilization parameter $\tau$ that tends to infinity prohibits jumps in the
+solution over the element boundaries, making the HDG solution approach the
+approximation with continuous finite elements. In the program below, we choose
+the stabilization parameter as
+ at f{eqnarray*}
+  \tau = \frac{\kappa}{\ell} + |\mathbf{c} \cdot \mathbf{n}|
+ at f}
+where we set the diffusion $\kappa=1$ and the diffusion length scale to
+$\ell = \frac{1}{5}$.
+
+The trace/skeleton variables in HDG methods are single-valued on element
+faces.  As such, they must strongly represent the Dirichlet data on
+$\partial\Omega_D$.  This means that
+ at f{equation*}
+  \hat{u}|_{\partial \Omega_D} = g_D,
+ at f}
+where the equal sign actually means an $L_2$ projection of the boundary
+function $g$ onto the space of the face variables (e.g. linear functions on
+the faces). This constraint is then applied to the skeleton variable $\hat{u}$
+using inhomogeneous constraints by the method
+VectorTools::project_boundary_values.
+
+Summing the elemental
+contributions across all elements in the triangulation, enforcing the normal
+component of the numerical flux, and integrating by parts
+on the equation weighted by $w$, we arrive at the final form of the problem:
+Find $(\mathbf{q}_h, u_h, \hat{u}_h) \in
+\mathcal{V}_h^p \times \mathcal{W}_h^p \times \mathcal{M}_h^p$ such that
+ at f{align*}
+  (\mathbf{v}, \kappa^{-1} \mathbf{q}_h)_{\mathcal{T}}
+    - ( \nabla\cdot\mathbf{v}, u_h)_{\mathcal{T}}
+    + \left<\mathbf{v}\cdot\mathbf{n}, \hat{u}_h\right>_{\partial\mathcal{T}}
+    &= 0,
+    \quad &&\forall \mathbf{v} \in \mathcal{V}_h^p,
+\\
+   - (\nabla w, \mathbf{c} u_h)_{\mathcal{T}}
+   + (w, \nabla \cdot \mathbf{q}_h)_{\mathcal{T}}
+   + (w, (\mathbf{c}\cdot\mathbf{n}) \hat{u}_H)_{\partial \mathcal{T}}
+    + \left<w, \tau (u_h - \hat{u}_h)\right>_{\partial \mathcal{T}}
+    &=
+    (w, f)_{\mathcal{T}},
+    \quad &&\forall w \in \mathcal{W}_h^p,
+\\
+  \left< \mu, \hat{u}_h\mathbf{c} \cdot \mathbf{n}
+  		+ \mathbf{q}_h\cdot \mathbf{n}
+  	    + \tau (u_h - \hat{u}_h)\right>_{\partial \mathcal{T}}
+    &=
+    \left<\mu, g_N\right>_{\partial\Omega_N},
+    \quad &&\forall \mu \in \mathcal{M}_h^p.
+ at f}
+
+The unknowns $(\mathbf{q}_h, u_h)$ are referred to as local variables; they are
+represented as standard DG variables.  The unknown $\hat{u}_h$ is the skeleton
+variable which has support on the codimension-1 surfaces (faces) of the mesh.
+
+We use the notation $(\cdot, \cdot)_{\mathcal{T}} = \sum_K (\cdot, \cdot)_K$
+to denote the sum of integrals over all cells and $\left<\cdot,
+\cdot\right>_{\partial \mathcal{T}} = \sum_K \left<\cdot,
+\cdot\right>_{\partial K}$ to denote integration over all faces of all cells,
+i.e., interior faces are visited twice, once from each side and with
+the corresponding normal vectors. When combining the contribution from
+both elements sharing a face, the above equation yields terms familiar
+from the DG method, with jumps of the solution over the cell boundaries.
+
+In the equation above, the space $\mathcal {W}_h^{p}$ for the scalar variable
+$u_h$ is defined as the space of functions that are tensor
+product polynomials of degree $p$ on each cell and discontinuous over the
+element boundaries $\mathcal Q_{-p}$, i.e., the space described by
+<code>FE_DGQ<dim>(p)</code>. The space for the gradient or flux variable
+$\mathbf{q}_i$ is a vector element space where each component is
+a locally polynomial and discontinuous $\mathcal Q_{-p}$. In the code below,
+we collect these two local parts together in one FESystem where the first @p
+dim components denote the gradient part and the last scalar component
+corresponds to the scalar variable. For the skeleton component $\hat{u}_h$, we
+define a space that consists of discontinuous tensor product polynomials that
+live on the element faces, which in deal.II is implemented by the class
+FE_FaceQ. This space is otherwise similar to FE_DGQ, i.e., the solution
+function is not continuous between two neighboring faces, see also the results
+section below for an illustration.
+
+In the weak form given above, we can note the following coupling patterns:
+<ol>
+  <li> The matrix $A$ consists of local-local coupling terms.  These arise when the
+  local weighting functions $(\mathbf{v}, w)$ multiply the local solution terms
+  $(\mathbf{q}_h, u_h)$. Because the elements are discontinuous, $A$
+  is block diagonal.
+  <li> The matrix $B$ represents the local-face coupling.  These are the terms
+  with weighting functions $(\mathbf{v}, w)$ multiplying the skeleton variable
+  $\hat{u}_h$.
+  <li> The matrix $C$ represents the face-local coupling, which involves the
+  weighting function $\mu$ multiplying the local solutions $(\mathbf{q}_h, u_h)$.
+  <li>  The matrix $D$ is the face-face coupling;
+  terms involve both $\mu$ and $\hat{u}_h$.
+</ol>
+
+<h4> Post-processing and super-convergence </h4>
+
+One special feature of the HDG methods is that they typically allow for
+constructing an enriched solution that gains accuracy. This post-processing
+takes the HDG solution in an element-by-element fashion and combines it such
+that one can get $\mathcal O(h^{p+2})$ order of accuracy when using
+polynomials of degree $p$. For this to happen, there are two necessary
+ingredients:
+<ol>
+  <li> The computed solution gradient $\mathbf{q}_h$ converges at optimal rate,
+   i.e., $\mathcal{O}(h^{p+1})$.
+  <li> The average of the scalar part of the solution, <i>u<sub>h</sub></i>,
+   on each cell $K$ super-converges at rate $\mathcal{O}(h^{p+2})$.
+</ol>
+
+We now introduce a new variable $u_h^* \in \mathcal{V}_h^{p+1}$, which we find
+by minimizing the expression $|\kappa \nabla u_h^* + \mathbf{q}_h|^2$ over the cell
+$K$ under the constraint $\left(1, u_h^*\right)_K = \left(1,
+u_h\right)_K$. The constraint is necessary because the minimization
+functional does not determine the constant part of $u_h^*$. This
+translates to the following system of equations:
+ at f{eqnarray*}
+\left(1, u_h^*\right)_K &=& \left(1, u_h\right)_K\\
+\left(\nabla w_h^*, \kappa \nabla u_h^*\right)_K &=&
+-\left(\nabla w_h^*, \mathbf{q}_h\right)_K
+\quad \text{for all } w_h^* \in \mathcal Q^{p+1}.
+ at f}
+
+Since we test by the whole set of basis functions in the space of tensor
+product polynomials of degree $p+1$ in the second set of equations, this
+is an overdetermined system with one more equation than unknowns. We fix this
+in the code below by omitting one of these equations (since the rows in the
+Laplacian are linearly dependent when representing a constant function). As we
+will see below, this form of the post-processing gives the desired
+super-convergence result with rate $\mathcal {O}(h^{p+2})$.  It should be
+noted that there is some freedom in constructing $u_h^*$ and this minimization
+approach to extract the information from the gradient is not the only one. In
+particular, the post-processed solution defined here does not satisfy the
+convection-diffusion equation in any sense. As an alternative, the paper by
+Nguyen, Peraire and Cockburn cited above suggests another somewhat more
+involved formula for convection-diffusion that can also post-process the flux
+variable into an $H(\Omega,\mathrm{div})$-conforming variant and better
+represents the local convection-diffusion operator when the diffusion is
+small. We leave the implementation of a more sophisticated post-processing as
+a possible extension to the interested reader.
+
+Note that for vector-valued problems, the post-processing works similarly. One
+simply sets the constraint for the mean value of each vector component
+separately and uses the gradient as the main source of information.
+
+<h3> Problem specific data </h3>
+
+For this tutorial program, we consider almost the same test case as in
+step-7. The computational domain is $\Omega := [-1,1]^d$ and the exact
+solution corresponds to the one in step-7, except for a scaling. We use the
+following source centers $x_i$ for the exponentials
+<ul>
+  <li> 1D:  $\{x_i\}^1 = \{ -\frac{1}{3}, 0, \frac{1}{3} \}$,
+  <li> 2D: $\{\mathbf{x}_i\}^2 = \{ (-\frac{1}{2},\frac{1}{2}),
+                        		 (-\frac{1}{2},-\frac{1}{2}),
+  					 (\frac{1}{2},-\frac{1}{2})
+  				   \}$,
+  <li> 3D: $\{\mathbf{x}_i\}^3 = \{ (-\frac{1}{2},\frac{1}{2}, \frac{1}{4}),
+  				      (-\frac{3}{5},-\frac{1}{2}, -\frac{1}{8}),
+  				      (\frac{1}{2},-\frac{1}{2}, \frac{1}{2})
+  				   \}$.
+</ul>
+
+With the exact solution given, we then choose the forcing on the right hand
+side and the Neumann boundary condition such that we obtain this solution
+(manufactured solution technique). In this example, we choose the diffusion
+equal to one and the convection as
+\f[
+\mathbf{c} = \begin{cases}
+1, & \textrm{dim}=1 \\
+(y, -x), & \textrm{dim}=2 \\
+(y, -x, 1), & \textrm{dim}=3
+\end{cases}
+\f]
+Note that the convection is divergence-free, $\nabla \cdot c = 0$.
+
+<h3> Implementation </h3>
+
+Besides implementing the above equations, the implementation below provides the following features:
+<ul>
+  <li> WorkStream to parallelize local solvers. Workstream has been presented
+  in detail in step-9.
+  <li> Reconstruct the local DG solution from the trace.
+  <li> Post-processing the solution for superconvergence.
+  <li> DataOutFaces for direct output of the global skeleton solution.
+</ul>
diff --git a/examples/step-51/doc/kind b/examples/step-51/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-51/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-51/doc/results.dox b/examples/step-51/doc/results.dox
new file mode 100644
index 0000000..06ef541
--- /dev/null
+++ b/examples/step-51/doc/results.dox
@@ -0,0 +1,384 @@
+<h1>Results</h1>
+
+<h3>Program output</h3>
+
+We first have a look at the output generated by the program when run in 2D. In
+the four images below, we show the solution for polynomial degree $p=1$
+and cycles 2, 3, 4, and 8 of the program. In the plots, we overlay the data
+generated from the internal data (DG part) with the skeleton part ($\hat{u}$)
+into the same plot. We had to generate two different data sets because cells
+and faces represent different geometric entities, the combination of which (in
+the same file) is not supported in the VTK output of deal.II.
+
+The images show the distinctive features of HDG: The cell solution (colored
+surfaces) is discontinuous between the cells. The solution on the skeleton
+variable sits on the faces and ties together the local parts. The skeleton
+solution is not continuous on the vertices where the faces meet, even though
+its values are quite close along lines in the same coordinate direction. The
+skeleton solution can be interpreted as a rubber spring between the two sides
+that balances the jumps in the solution (or rather, the flux $\kappa \nabla u
++ \mathbf{c} u$). From the picture at the top left, it is clear that
+the bulk solution frequently over- and undershoots and that the
+skeleton variable in indeed a better approximation to the exact
+solution; this explains why we can get a better solution using a
+postprocessing step.
+
+As the mesh is refined, the jumps between the cells get
+small (we represent a smooth solution), and the skeleton solution approaches
+the interior parts. For cycle 8, there is no visible difference in the two
+variables. We also see how boundary conditions are implemented weakly and that
+the interior variables do not exactly satisfy boundary conditions. On the
+lower and left boundaries, we set Neumann boundary conditions, whereas we set
+Dirichlet conditions on the right and top boundaries.
+
+<table align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.sol_2.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.sol_3.png" alt=""></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.sol_4.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.sol_8.png" alt=""></td>
+  </tr>
+</table>
+
+Next, we have a look at the post-processed solution, again at cycles 2, 3, 4,
+and 8. This is a discontinuous solution that is locally described by second
+order polynomials. While the solution does not look very good on the mesh of
+cycle two, it looks much better for cycles three and four. As shown by the
+convergence table below, we find that is also converges more quickly to the
+analytical solution.
+
+<table align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.post_2.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.post_3.png" alt=""></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.post_4.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.post_8.png" alt=""></td>
+  </tr>
+</table>
+
+Finally, we look at the solution for $p=3$ at cycle 2. Despite the coarse
+mesh with only 64 cells, the post-processed solution is similar in quality
+to the linear solution (not post-processed) at cycle 8 with 4,096
+cells. This clearly shows the superiority of high order methods for smooth
+solutions.
+
+<table align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.sol_q3_2.png" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.post_q3_2.png" alt=""></td>
+  </tr>
+</table>
+
+<h4>Convergence tables</h4>
+
+When the program is run, it also outputs information about the respective
+steps and convergence tables with errors in the various components in the
+end. In 2D, the convergence tables look the following:
+
+ at code
+Q1 elements, adaptive refinement:
+cells dofs   val L2    grad L2  val L2-post
+   16    80 1.804e+01 2.207e+01   1.798e+01
+   31   170 9.874e+00 1.322e+01   9.798e+00
+   61   314 7.452e-01 3.793e+00   4.891e-01
+  121   634 3.240e-01 1.511e+00   2.616e-01
+  238  1198 8.585e-02 8.212e-01   1.808e-02
+  454  2290 4.802e-02 5.178e-01   2.195e-02
+  898  4378 2.561e-02 2.947e-01   4.318e-03
+ 1720  7864 1.306e-02 1.664e-01   2.978e-03
+ 3271 14638 7.025e-03 9.815e-02   1.075e-03
+ 6217 27214 4.119e-03 6.407e-02   9.975e-04
+
+Q1 elements, global refinement:
+cells dofs      val L2        grad L2      val L2-post
+   16    80 1.804e+01    - 2.207e+01    - 1.798e+01    -
+   36   168 6.125e+00 2.66 9.472e+00 2.09 6.084e+00 2.67
+   64   288 9.785e-01 6.38 4.260e+00 2.78 7.102e-01 7.47
+  144   624 2.730e-01 3.15 1.866e+00 2.04 6.115e-02 6.05
+  256  1088 1.493e-01 2.10 1.046e+00 2.01 2.880e-02 2.62
+  576  2400 6.965e-02 1.88 4.846e-01 1.90 9.204e-03 2.81
+ 1024  4224 4.018e-02 1.91 2.784e-01 1.93 4.027e-03 2.87
+ 2304  9408 1.831e-02 1.94 1.264e-01 1.95 1.236e-03 2.91
+ 4096 16640 1.043e-02 1.96 7.185e-02 1.96 5.306e-04 2.94
+ 9216 37248 4.690e-03 1.97 3.228e-02 1.97 1.599e-04 2.96
+
+Q3 elements, global refinement:
+cells dofs      val L2        grad L2      val L2-post
+   16   160 3.613e-01    - 1.891e+00    - 3.020e-01    -
+   36   336 6.411e-02 4.26 5.081e-01 3.24 3.238e-02 5.51
+   64   576 3.480e-02 2.12 2.533e-01 2.42 5.277e-03 6.31
+  144  1248 8.297e-03 3.54 5.924e-02 3.58 6.330e-04 5.23
+  256  2176 2.254e-03 4.53 1.636e-02 4.47 1.403e-04 5.24
+  576  4800 4.558e-04 3.94 3.277e-03 3.96 1.844e-05 5.01
+ 1024  8448 1.471e-04 3.93 1.052e-03 3.95 4.378e-06 5.00
+ 2304 18816 2.956e-05 3.96 2.104e-04 3.97 5.750e-07 5.01
+ 4096 33280 9.428e-06 3.97 6.697e-05 3.98 1.362e-07 5.01
+ 9216 74496 1.876e-06 3.98 1.330e-05 3.99 1.788e-08 5.01
+ at endcode
+
+
+One can see the error reduction upon grid refinement, and for the cases where
+global refinement was performed, also the convergence rates. The quadratic
+convergence rates of Q1 elements in the $L_2$ norm for both the scalar
+variable and the gradient variable is apparent, as is the cubic rate for the
+postprocessed scalar variable in the $L_2$ norm. Note this distinctive
+feature of an HDG solution. In typical continuous finite elements, the
+gradient of the solution of order $p$ converges at rate $p$ only, as
+opposed to $p+1$ for the actual solution. Even though superconvergence
+results for finite elements are also available (e.g. superconvergent patch
+recovery first introduced by Zienkiewicz and Zhu), these are typically limited
+to structured meshes and other special cases. For Q3 HDG variables, the scalar
+variable and gradient converge at fourth order and the postprocessed scalar
+variable at fifth order.
+
+The same convergence rates are observed in 3d.
+ at code
+Q1 elements, adaptive refinement:
+cells   dofs    val L2    grad L2  val L2-post
+     8     144 7.122e+00 1.941e+01   6.102e+00
+    29     500 3.309e+00 1.023e+01   2.145e+00
+   113    1792 2.204e+00 1.023e+01   1.912e+00
+   379    5732 6.085e-01 5.008e+00   2.233e-01
+  1317   19412 1.543e-01 1.464e+00   4.196e-02
+  4579   64768 5.058e-02 5.611e-01   9.521e-03
+ 14596  199552 2.129e-02 3.122e-01   4.569e-03
+ 46180  611400 1.033e-02 1.622e-01   1.684e-03
+144859 1864212 5.007e-03 8.371e-02   7.364e-04
+451060 5684508 2.518e-03 4.562e-02   3.070e-04
+
+Q1 elements, global refinement:
+cells   dofs       val L2          grad L2       val L2-post
+     8     144 7.122e+00    - 1.941e+01     - 6.102e+00    -
+    27     432 5.491e+00 0.64 2.184e+01 -0.29 4.448e+00 0.78
+    64     960 3.646e+00 1.42 1.299e+01  1.81 3.306e+00 1.03
+   216    3024 1.595e+00 2.04 8.550e+00  1.03 1.441e+00 2.05
+   512    6912 6.922e-01 2.90 5.306e+00  1.66 2.511e-01 6.07
+  1728   22464 2.915e-01 2.13 2.490e+00  1.87 8.588e-02 2.65
+  4096   52224 1.684e-01 1.91 1.453e+00  1.87 4.055e-02 2.61
+ 13824  172800 7.972e-02 1.84 6.861e-01  1.85 1.335e-02 2.74
+ 32768  405504 4.637e-02 1.88 3.984e-01  1.89 5.932e-03 2.82
+110592 1354752 2.133e-02 1.92 1.830e-01  1.92 1.851e-03 2.87
+
+Q3 elements, global refinement:
+cells   dofs       val L2        grad L2      val L2-post
+     8     576 5.670e+00    - 1.868e+01    - 5.462e+00    -
+    27    1728 1.048e+00 4.16 6.988e+00 2.42 8.011e-01 4.73
+    64    3840 2.831e-01 4.55 2.710e+00 3.29 1.363e-01 6.16
+   216   12096 7.883e-02 3.15 7.721e-01 3.10 2.158e-02 4.55
+   512   27648 3.642e-02 2.68 3.305e-01 2.95 5.231e-03 4.93
+  1728   89856 8.546e-03 3.58 7.581e-02 3.63 7.640e-04 4.74
+  4096  208896 2.598e-03 4.14 2.313e-02 4.13 1.783e-04 5.06
+ 13824  691200 5.314e-04 3.91 4.697e-03 3.93 2.355e-05 4.99
+ 32768 1622016 1.723e-04 3.91 1.517e-03 3.93 5.602e-06 4.99
+110592 5419008 3.482e-05 3.94 3.055e-04 3.95 7.374e-07 5.00
+ at endcode
+
+<h3>Comparison with continuous finite elements</h3>
+
+<h4>Results for 2D</h4>
+
+The convergence tables verify the expected convergence rates stated in the
+introduction. Now, we want to show a quick comparison of the computational
+efficiency of the HDG method compared to a usual finite element (continuous
+Galkerin) method on the problem of this tutorial. Of course, stability aspects
+of the HDG method compared to continuous finite elements for
+transport-dominated problems are also important in practice, which is an
+aspect not seen on a problem with smooth analytic solution. In the picture
+below, we compare the $L_2$ error as a function of the number of degrees of
+freedom (left) and of the computing time spent in the linear solver (right)
+for two space dimensions of continuous finite elements (CG) and the hybridized
+discontinuous Galerkin method presented in this tutorial. As opposed to the
+tutorial where we only use unpreconditioned BiCGStab, the times shown in the
+figures below use the Trilinos algebraic multigrid preconditioner in
+TrilinosWrappers::PreconditionAMG. For the HDG part, a wrapper around
+ChunkSparseMatrix for the trace variable has been used in order to utilize the
+block structure in the matrix on the finest level.
+
+<table align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.2d_plain.png" width="400" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.2dt_plain.png" width="400" alt=""></td>
+  </tr>
+</table>
+
+The results in the graphs show that the HDG method is slower than continuous
+finite elements at $p=1$, about equally fast for cubic elements and
+faster for sixth order elements. However, we have seen above that the HDG
+method actually produces solutions which are more accurate than what is
+represented in the original variables. Therefore, in the next two plots below
+we instead display the error of the post-processed solution for HDG (denoted
+by $p=1^*$ for example). We now see a clear advantage of HDG for the same
+amount of work for both $p=3$ and $p=6$, and about the same quality
+for $p=1$.
+
+<table align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.2d_post.png" width="400" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.2dt_post.png" width="400" alt=""></td>
+  </tr>
+</table>
+
+Since the HDG method actually produces results converging as
+$h^{p+2}$, we should compare it to a continuous Galerkin
+solution with the same asymptotic convergence behavior, i.e., FE_Q with degree
+$p+1$. If we do this, we get the convergence curves below. We see that
+CG with second order polynomials is again clearly better than HDG with
+linears. However, the advantage of HDG for higher orders remains.
+
+<table align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.2d_postb.png" width="400" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.2dt_postb.png" width="400" alt=""></td>
+  </tr>
+</table>
+
+The results are in line with properties of DG methods in general: Best
+performance is typically not achieved for linear elements, but rather at
+somewhat higher order, usually around $p=3$. This is because of a
+volume-to-surface effect for discontinuous solutions with too much of the
+solution living on the surfaces and hence duplicating work when the elements
+are linear. Put in other words, DG methods are often most efficient when used
+at relatively high order, despite their focus on a discontinuous (and hence,
+seemingly low accurate) representation of solutions.
+
+<h4>Results for 3D</h4>
+
+We now show the same figures in 3D: The first row shows the number of degrees
+of freedom and computing time versus the $L_2$ error in the scalar variable
+$u$ for CG and HDG at order $p$, the second row shows the
+post-processed HDG solution instead of the original one, and the third row
+compares the post-processed HDG solution with CG at order $p+1$. In 3D,
+the volume-to-surface effect makes the cost of HDG somewhat higher and the CG
+solution is clearly better than HDG for linears by any metric. For cubics, HDG
+and CG are of similar quality, whereas HDG is again more efficient for sixth
+order polynomials. One can alternatively also use the combination of FE_DGP
+and FE_FaceP instead of (FE_DGQ, FE_FaceQ), which do not use tensor product
+polynomials of degree $p$ but Legendre polynomials of <i>complete</i>
+degree $p$. There are fewer degrees of freedom on the skeleton variable
+for FE_FaceP for a given mesh size, but the solution quality (error vs. number
+of DoFs) is very similar to the results for FE_FaceQ.
+
+<table align="center">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.3d_plain.png" width="400" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.3dt_plain.png" width="400" alt=""></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.3d_post.png" width="400" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.3dt_post.png" width="400" alt=""></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.3d_postb.png" width="400" alt=""></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-51.3dt_postb.png" width="400" alt=""></td>
+  </tr>
+</table>
+
+One final note on the efficiency comparison: We tried to use general-purpose
+sparse matrix structures and similar solvers (optimal AMG preconditioners for
+both without particular tuning of the AMG parameters on any of them) to give a
+fair picture of the cost versus accuracy of two methods, on a toy example. It
+should be noted however that geometric multigrid (GMG) for continuous finite elements is about a
+factor four to five faster for $p=3$ and $p=6$. The authors of this
+tutorial have not seen similarly advanced solvers for the HDG linear
+systems. Also, there are other implementation aspects for CG available such as
+fast matrix-free approaches as shown in step-37 that make higher order
+continuous elements more competitive. Again, it is not clear to the authors of
+the tutorial whether similar improvements could be made for HDG.
+
+
+<h3>Possibilities for improvements</h3>
+
+As already mentioned in the introduction, one possibility is to implement
+another post-processing technique as discussed in the literature.
+
+A second item that is not done optimally relates to the performance of this
+program, which is of course an issue in practical applications (weighing in
+also the better solution quality of (H)DG methods for transport-dominated
+problems). Let us look at
+the computing time of the tutorial program and the share of the individual
+components:
+
+<table align="center" border="1">
+  <tr>
+    <th> </th>
+    <th> </th>
+    <th>Setup</th>
+    <th>Assemble</th>
+    <th>Solve</th>
+    <th>Trace reconstruct</th>
+    <th>Post-processing</th>
+    <th>Output</th>
+  </tr>
+  <tr>
+    <th> </th>
+    <th>Total time</th>
+    <th colspan="6">Relative share</th>
+  </tr>
+  <tr>
+    <td align="left">2D, Q1, cycle 9, 37,248 dofs</td>
+    <td align="center">5.34s</td>
+    <td align="center">0.7%</td>
+    <td align="center">1.2%</td>
+    <td align="center">89.5%</td>
+    <td align="center">0.9%</td>
+    <td align="center">2.3%</td>
+    <td align="center">5.4%</td>
+  </tr>
+  <tr>
+    <td align="left">2D, Q3, cycle 9, 74,496 dofs</td>
+    <td align="center">22.2s</td>
+    <td align="center">0.4%</td>
+    <td align="center">4.3%</td>
+    <td align="center">84.1%</td>
+    <td align="center">4.1%</td>
+    <td align="center">3.5%</td>
+    <td align="center">3.6%</td>
+  </tr>
+  <tr>
+    <td align="left">3D, Q1, cycle 7, 172,800 dofs</td>
+    <td align="center">9.06s</td>
+    <td align="center">3.1%</td>
+    <td align="center">8.9%</td>
+    <td align="center">42.7%</td>
+    <td align="center">7.0%</td>
+    <td align="center">20.6%</td>
+    <td align="center">17.7%</td>
+  </tr>
+  <tr>
+    <td align="left">3D, Q3, cycle 7, 691,200 dofs</td>
+    <td align="center">516s</td>
+    <td align="center">0.6%</td>
+    <td align="center">34.5%</td>
+    <td align="center">13.4%</td>
+    <td align="center">32.8%</td>
+    <td align="center">17.1%</td>
+    <td align="center">1.5%</td>
+  </tr>
+</table>
+
+As can be seen from the table, the solver and assembly calls dominate the
+runtime of the program. This also gives a clear indication of where
+improvements would make the most sense:
+
+<ol>
+  <li> Better linear solvers: We use a BiCGStab iterative solver without
+  preconditioner, where the number of iteration increases with increasing
+  problem size (the number of iterations for Q1 elements and global
+  refinements starts at 35 for the small sizes but increase up to 701 for the
+  largest size). To do better, one could for example use an algebraic
+  multigrid preconditioner from Trilinos. For diffusion-dominated
+  problems such as
+  the problem at hand with finer meshes, such a solver can be designed that
+  uses the matrix-vector products from the more efficient ChunkSparseMatrix on
+  the finest level, as long as we are not working in parallel with MPI. For
+  MPI-parallelized computation, a standard TrilinosWrappers::SparseMatrix can
+  be used.
+
+  <li> Speed up assembly by pre-assembling parts that do not change from one
+  cell to another (those that do neither contain variable coefficients nor
+  mapping-dependent terms).
+</ol>
diff --git a/examples/step-51/doc/tooltip b/examples/step-51/doc/tooltip
new file mode 100644
index 0000000..784e257
--- /dev/null
+++ b/examples/step-51/doc/tooltip
@@ -0,0 +1 @@
+Convection-diffusion equation. Hybridizable discontinuous Galerkin methods. Face elements.
diff --git a/examples/step-51/step-51.cc b/examples/step-51/step-51.cc
new file mode 100644
index 0000000..80434c8
--- /dev/null
+++ b/examples/step-51/step-51.cc
@@ -0,0 +1,1449 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2013 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Martin Kronbichler, Technische Universität München,
+ *         Scott T. Miller, The Pennsylvania State University, 2013
+ */
+
+// @sect3{Include files}
+//
+// Most of the deal.II include files have already been covered in previous
+// examples and are not commented on.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/tensor_function.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/convergence_table.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_bicgstab.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+// However, we do have a few new includes for the example.
+// The first one defines finite element spaces on the faces
+// of the triangulation, which we refer to as the 'skeleton'.
+// These finite elements do not have any support on the element
+// interior, and they represent polynomials that have a single
+// value on each codimension-1 surface, but admit discontinuities
+// on codimension-2 surfaces.
+#include <deal.II/fe/fe_face.h>
+
+// The second new file we include defines a new type of sparse matrix.  The
+// regular <code>SparseMatrix</code> type stores indices to all non-zero
+// entries.  The <code>ChunkSparseMatrix</code> takes advantage of the coupled
+// nature of DG solutions.  It stores an index to a matrix sub-block of a
+// specified size.  In the HDG context, this sub-block-size is actually the
+// number of degrees of freedom per face defined by the skeleton solution
+// field. This reduces the memory consumption of the matrix by up to one third
+// and results in similar speedups when using the matrix in solvers.
+#include <deal.II/lac/chunk_sparse_matrix.h>
+
+// The final new include for this example deals with data output.  Since
+// we have a finite element field defined on the skeleton of the mesh,
+// we would like to visualize what that solution actually is.
+// DataOutFaces does exactly this; the interface is the almost the same
+// as the familiar DataOut, but the output only has codimension-1 data for
+// the simulation.
+#include <deal.II/numerics/data_out_faces.h>
+
+#include <iostream>
+
+
+
+// We start by putting the class into its own namespace.
+namespace Step51
+{
+
+  using namespace dealii;
+
+// @sect3{Equation data}
+//
+// The structure of the analytic solution is the same as in step-7. There are
+// two exceptions. Firstly, we also create a solution for the 3d case, and
+// secondly, we scale the solution so its norm is of order unity for all
+// values of the solution width.
+  template <int dim>
+  class SolutionBase
+  {
+  protected:
+    static const unsigned int  n_source_centers = 3;
+    static const Point<dim>    source_centers[n_source_centers];
+    static const double        width;
+  };
+
+
+  template <>
+  const Point<1>
+  SolutionBase<1>::source_centers[SolutionBase<1>::n_source_centers]
+    = { Point<1>(-1.0 / 3.0),
+        Point<1>(0.0),
+        Point<1>(+1.0 / 3.0)
+      };
+
+
+  template <>
+  const Point<2>
+  SolutionBase<2>::source_centers[SolutionBase<2>::n_source_centers]
+    = { Point<2>(-0.5, +0.5),
+        Point<2>(-0.5, -0.5),
+        Point<2>(+0.5, -0.5)
+      };
+
+  template <>
+  const Point<3>
+  SolutionBase<3>::source_centers[SolutionBase<3>::n_source_centers]
+    = { Point<3>(-0.5, +0.5, 0.25),
+        Point<3>(-0.6, -0.5, -0.125),
+        Point<3>(+0.5, -0.5, 0.5)
+      };
+
+  template <int dim>
+  const double SolutionBase<dim>::width = 1./5.;
+
+
+  template <int dim>
+  class Solution : public Function<dim>,
+    protected SolutionBase<dim>
+  {
+  public:
+    Solution () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double Solution<dim>::value (const Point<dim>   &p,
+                               const unsigned int) const
+  {
+    double return_value = 0;
+    for (unsigned int i=0; i<this->n_source_centers; ++i)
+      {
+        const Tensor<1,dim> x_minus_xi = p - this->source_centers[i];
+        return_value += std::exp(-x_minus_xi.norm_square() /
+                                 (this->width * this->width));
+      }
+
+    return return_value /
+           Utilities::fixed_power<dim>(std::sqrt(2. * numbers::PI) * this->width);
+  }
+
+
+
+  template <int dim>
+  Tensor<1,dim> Solution<dim>::gradient (const Point<dim>   &p,
+                                         const unsigned int) const
+  {
+    Tensor<1,dim> return_value;
+
+    for (unsigned int i=0; i<this->n_source_centers; ++i)
+      {
+        const Tensor<1,dim> x_minus_xi = p - this->source_centers[i];
+
+        return_value += (-2 / (this->width * this->width) *
+                         std::exp(-x_minus_xi.norm_square() /
+                                  (this->width * this->width)) *
+                         x_minus_xi);
+      }
+
+    return return_value / Utilities::fixed_power<dim>(std::sqrt(2 * numbers::PI) *
+                                                      this->width);
+  }
+
+
+
+// This class implements a function where the scalar solution and its negative
+// gradient are collected together. This function is used when computing the
+// error of the HDG approximation and its implementation is to simply call
+// value and gradient function of the Solution class.
+  template <int dim>
+  class SolutionAndGradient : public Function<dim>,
+    protected SolutionBase<dim>
+  {
+  public:
+    SolutionAndGradient () : Function<dim>(dim) {}
+
+    virtual void vector_value (const Point<dim>   &p,
+                               Vector<double>     &v) const;
+  };
+
+  template <int dim>
+  void SolutionAndGradient<dim>::vector_value (const Point<dim> &p,
+                                               Vector<double>   &v) const
+  {
+    AssertDimension(v.size(), dim+1);
+    Solution<dim> solution;
+    Tensor<1,dim> grad = solution.gradient(p);
+    for (unsigned int d=0; d<dim; ++d)
+      v[d] = -grad[d];
+    v[dim] = solution.value(p);
+  }
+
+
+
+// Next comes the implementation of the convection velocity. As described in
+// the introduction, we choose a velocity field that is $(y, -x)$ in 2D and
+// $(y, -x, 1)$ in 3D. This gives a divergence-free velocity field.
+  template <int dim>
+  class ConvectionVelocity : public TensorFunction<1,dim>
+  {
+  public:
+    ConvectionVelocity() : TensorFunction<1,dim>() {}
+
+    virtual Tensor<1,dim> value (const Point<dim> &p) const;
+  };
+
+
+
+  template <int dim>
+  Tensor<1,dim>
+  ConvectionVelocity<dim>::value(const Point<dim> &p) const
+  {
+    Tensor<1,dim> convection;
+    switch (dim)
+      {
+      case 1:
+        convection[0] = 1;
+        break;
+      case 2:
+        convection[0] = p[1];
+        convection[1] = -p[0];
+        break;
+      case 3:
+        convection[0] = p[1];
+        convection[1] = -p[0];
+        convection[2] = 1;
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return convection;
+  }
+
+
+
+// The last function we implement is the right hand side for the manufactured
+// solution. It is very similar to step-7, with the exception that we now have
+// a convection term instead of the reaction term. Since the velocity field is
+// incompressible, i.e. $\nabla \cdot \mathbf{c} = 0$, this term simply reads
+// $\mathbf{c} \nabla u$.
+  template <int dim>
+  class RightHandSide : public Function<dim>,
+    protected SolutionBase<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+  private:
+    const ConvectionVelocity<dim> convection_velocity;
+  };
+
+
+  template <int dim>
+  double RightHandSide<dim>::value (const Point<dim>   &p,
+                                    const unsigned int) const
+  {
+    Tensor<1,dim> convection = convection_velocity.value(p);
+    double return_value = 0;
+    for (unsigned int i=0; i<this->n_source_centers; ++i)
+      {
+        const Tensor<1,dim> x_minus_xi = p - this->source_centers[i];
+
+        return_value +=
+          ((2*dim - 2*convection*x_minus_xi - 4*x_minus_xi.norm_square()/
+            (this->width * this->width)) /
+           (this->width * this->width) *
+           std::exp(-x_minus_xi.norm_square() /
+                    (this->width * this->width)));
+      }
+
+    return return_value / Utilities::fixed_power<dim>(std::sqrt(2 * numbers::PI)
+                                                      * this->width);
+  }
+
+// @sect3{The HDG solver class}
+
+// The HDG solution procedure follows closely that of step-7. The major
+// difference is the use of three different sets of <code>DoFHandler</code> and FE
+// objects, along with the <code>ChunkSparseMatrix</code> and the
+// corresponding solutions vectors. We also use WorkStream to enable a
+// multithreaded local solution process which exploits the embarrassingly
+// parallel nature of the local solver. For WorkStream, we define the local
+// operations on a cell and a copy function into the global matrix and
+// vector. We do this both for the assembly (which is run twice, once when we
+// generate the system matrix and once when we compute the element-interior
+// solutions from the skeleton values) and for the postprocessing where
+// we extract a solution that converges at higher order.
+  template <int dim>
+  class HDG
+  {
+  public:
+    enum RefinementMode
+    {
+      global_refinement, adaptive_refinement
+    };
+
+    HDG (const unsigned int degree,
+         const RefinementMode refinement_mode);
+    void run ();
+
+  private:
+
+    void setup_system ();
+    void assemble_system (const bool reconstruct_trace = false);
+    void solve ();
+    void postprocess ();
+    void refine_grid (const unsigned int cylce);
+    void output_results (const unsigned int cycle);
+
+    // Data for the assembly and solution of the primal variables.
+    struct PerTaskData;
+    struct ScratchData;
+
+    // Post-processing the solution to obtain $u^*$ is an element-by-element
+    // procedure; as such, we do not need to assemble any global data and do
+    // not declare any 'task data' for WorkStream to use.
+    struct PostProcessScratchData;
+
+    // The following three functions are used by WorkStream to do the actual
+    // work of the program.
+    void assemble_system_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                   ScratchData &scratch,
+                                   PerTaskData &task_data);
+
+    void copy_local_to_global(const PerTaskData &data);
+
+    void postprocess_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                               PostProcessScratchData &scratch,
+                               unsigned int &empty_data);
+
+
+    Triangulation<dim>   triangulation;
+
+    // The 'local' solutions are interior to each element.  These
+    // represent the primal solution field $u$ as well as the auxiliary
+    // field $\mathbf{q}$.
+    FESystem<dim>        fe_local;
+    DoFHandler<dim>      dof_handler_local;
+    Vector<double>       solution_local;
+
+    // The new finite element type and corresponding <code>DoFHandler</code> are
+    // used for the global skeleton solution that couples the element-level local
+    // solutions.
+    FE_FaceQ<dim>        fe;
+    DoFHandler<dim>      dof_handler;
+    Vector<double>       solution;
+    Vector<double>       system_rhs;
+
+    // As stated in the introduction, HDG solutions can be post-processed to
+    // attain superconvergence rates of $\mathcal{O}(h^{p+2})$.  The
+    // post-processed solution is a discontinuous finite element solution
+    // representing the primal variable on the interior of each cell.  We define
+    // a FE type of degree $p+1$ to represent this post-processed solution,
+    // which we only use for output after constructing it.
+    FE_DGQ<dim>          fe_u_post;
+    DoFHandler<dim>      dof_handler_u_post;
+    Vector<double>       solution_u_post;
+
+    // The degrees of freedom corresponding to the skeleton strongly enforce
+    // Dirichlet boundary conditions, just as in a continuous Galerkin finite
+    // element method.  We can enforce the boundary conditions in an analogous
+    // manner through the use of ConstraintMatrix constructs. In
+    // addition, hanging nodes are handled in the same way as for
+    // continuous finite elements: For the face elements which
+    // only define degrees of freedom on the face, this process sets the
+    // solution on the refined to be the one from the coarse side.
+    ConstraintMatrix     constraints;
+
+    // The usage of the ChunkSparseMatrix class is similar to the usual sparse
+    // matrices: You need a sparsity pattern of type ChunkSparsityPattern and
+    // the actual matrix object. When creating the sparsity pattern, we just
+    // have to additionally pass the size of local blocks.
+    ChunkSparsityPattern sparsity_pattern;
+    ChunkSparseMatrix<double> system_matrix;
+
+    // Same as step-7:
+    const RefinementMode refinement_mode;
+    ConvergenceTable     convergence_table;
+  };
+
+  // @sect3{The HDG class implementation}
+
+  // @sect4{Constructor}
+  // The constructor is similar to those in other examples,
+  // with the exception of handling multiple <code>DoFHandler</code> and
+  // <code>FiniteElement</code> objects. Note that we create a system of finite
+  // elements for the local DG part, including the gradient/flux part and the
+  // scalar part.
+  template <int dim>
+  HDG<dim>::HDG (const unsigned int degree,
+                 const RefinementMode refinement_mode) :
+    fe_local (FE_DGQ<dim>(degree), dim,
+              FE_DGQ<dim>(degree), 1),
+    dof_handler_local (triangulation),
+    fe (degree),
+    dof_handler (triangulation),
+    fe_u_post (degree+1),
+    dof_handler_u_post (triangulation),
+    refinement_mode (refinement_mode)
+  {}
+
+
+
+  // @sect4{HDG::setup_system}
+  // The system for an HDG solution is setup in an analogous manner to most
+  // of the other tutorial programs.  We are careful to distribute dofs with
+  // all of our <code>DoFHandler</code> objects.  The @p solution and @p system_matrix
+  // objects go with the global skeleton solution.
+  template <int dim>
+  void
+  HDG<dim>::setup_system ()
+  {
+    dof_handler_local.distribute_dofs(fe_local);
+    dof_handler.distribute_dofs(fe);
+    dof_handler_u_post.distribute_dofs(fe_u_post);
+
+    std::cout << "   Number of degrees of freedom: "
+              << dof_handler.n_dofs()
+              << std::endl;
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+
+    solution_local.reinit (dof_handler_local.n_dofs());
+    solution_u_post.reinit (dof_handler_u_post.n_dofs());
+
+    constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler, constraints);
+    typename FunctionMap<dim>::type boundary_functions;
+    Solution<dim> solution_function;
+    boundary_functions[0] = &solution_function;
+    VectorTools::project_boundary_values (dof_handler,
+                                          boundary_functions,
+                                          QGauss<dim-1>(fe.degree+1),
+                                          constraints);
+    constraints.close ();
+
+    // When creating the chunk sparsity pattern, we first create the usual
+    // compressed sparsity pattern and then set the chunk size, which is equal
+    // to the number of dofs on a face, when copying this into the final
+    // sparsity pattern.
+    {
+      DynamicSparsityPattern dsp (dof_handler.n_dofs());
+      DoFTools::make_sparsity_pattern (dof_handler, dsp,
+                                       constraints, false);
+      sparsity_pattern.copy_from(dsp, fe.dofs_per_face);
+    }
+    system_matrix.reinit (sparsity_pattern);
+  }
+
+
+
+  // @sect4{HDG::PerTaskData}
+  // Next comes the definition of the local data structures for the parallel
+  // assembly. The first structure @p PerTaskData contains the local vector
+  // and matrix that are written into the global matrix, whereas the
+  // ScratchData contains all data that we need for the local assembly. There
+  // is one variable worth noting here, namely the boolean variable @p
+  // trace_reconstruct. As mentioned in the introduction, we solve the HDG
+  // system in two steps. First, we create a linear system for the skeleton
+  // system where we condense the local part into it via the Schur complement
+  // $D-CA^{-1}B$. Then, we solve for the local part using the skeleton
+  // solution. For these two steps, we need the same matrices on the elements
+  // twice, which we want to compute by two assembly steps. Since most of the
+  // code is similar, we do this with the same function but only switch
+  // between the two based on a flag that we set when starting the
+  // assembly. Since we need to pass this information on to the local worker
+  // routines, we store it once in the task data.
+  template <int dim>
+  struct HDG<dim>::PerTaskData
+  {
+    FullMatrix<double> cell_matrix;
+    Vector<double>     cell_vector;
+    std::vector<types::global_dof_index> dof_indices;
+
+    bool trace_reconstruct;
+
+    PerTaskData(const unsigned int n_dofs, const bool trace_reconstruct)
+      : cell_matrix(n_dofs, n_dofs),
+        cell_vector(n_dofs),
+        dof_indices(n_dofs),
+        trace_reconstruct(trace_reconstruct)
+    {}
+  };
+
+
+
+  // @sect4{HDG::ScratchData}
+  // @p ScratchData contains persistent data for each
+  // thread within <code>WorkStream</code>.  The <code>FEValues</code>, matrix,
+  // and vector objects should be familiar by now.  There are two objects that
+  // need to be discussed: @p std::vector<std::vector<unsigned int> >
+  // fe_local_support_on_face and @p std::vector<std::vector<unsigned int> >
+  // fe_support_on_face.  These are used to indicate whether or not the finite
+  // elements chosen have support (non-zero values) on a given face of the
+  // reference cell for the local part associated to @p fe_local and the
+  // skeleton part @p fe. We extract this information in the
+  // constructor and store it once for all cells that we work on.  Had we not
+  // stored this information, we would be forced to assemble a large number of
+  // zero terms on each cell, which would significantly slow the program.
+  template <int dim>
+  struct HDG<dim>::ScratchData
+  {
+    FEValues<dim>     fe_values_local;
+    FEFaceValues<dim> fe_face_values_local;
+    FEFaceValues<dim> fe_face_values;
+
+    FullMatrix<double> ll_matrix;
+    FullMatrix<double> lf_matrix;
+    FullMatrix<double> fl_matrix;
+    FullMatrix<double> tmp_matrix;
+    Vector<double>     l_rhs;
+    Vector<double>     tmp_rhs;
+
+    std::vector<Tensor<1,dim> > q_phi;
+    std::vector<double>         q_phi_div;
+    std::vector<double>         u_phi;
+    std::vector<Tensor<1,dim> > u_phi_grad;
+    std::vector<double>         tr_phi;
+    std::vector<double>         trace_values;
+
+    std::vector<std::vector<unsigned int> > fe_local_support_on_face;
+    std::vector<std::vector<unsigned int> > fe_support_on_face;
+
+    ConvectionVelocity<dim> convection_velocity;
+    RightHandSide<dim> right_hand_side;
+    const Solution<dim> exact_solution;
+
+    ScratchData(const FiniteElement<dim> &fe,
+                const FiniteElement<dim> &fe_local,
+                const QGauss<dim>   &quadrature_formula,
+                const QGauss<dim-1> &face_quadrature_formula,
+                const UpdateFlags local_flags,
+                const UpdateFlags local_face_flags,
+                const UpdateFlags flags)
+      :
+      fe_values_local (fe_local, quadrature_formula, local_flags),
+      fe_face_values_local (fe_local, face_quadrature_formula, local_face_flags),
+      fe_face_values (fe, face_quadrature_formula, flags),
+      ll_matrix (fe_local.dofs_per_cell, fe_local.dofs_per_cell),
+      lf_matrix (fe_local.dofs_per_cell, fe.dofs_per_cell),
+      fl_matrix (fe.dofs_per_cell, fe_local.dofs_per_cell),
+      tmp_matrix (fe.dofs_per_cell, fe_local.dofs_per_cell),
+      l_rhs (fe_local.dofs_per_cell),
+      tmp_rhs (fe_local.dofs_per_cell),
+      q_phi (fe_local.dofs_per_cell),
+      q_phi_div (fe_local.dofs_per_cell),
+      u_phi (fe_local.dofs_per_cell),
+      u_phi_grad (fe_local.dofs_per_cell),
+      tr_phi (fe.dofs_per_cell),
+      trace_values(face_quadrature_formula.size()),
+      fe_local_support_on_face(GeometryInfo<dim>::faces_per_cell),
+      fe_support_on_face(GeometryInfo<dim>::faces_per_cell)
+    {
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        for (unsigned int i=0; i<fe_local.dofs_per_cell; ++i)
+          {
+            if (fe_local.has_support_on_face(i,face))
+              fe_local_support_on_face[face].push_back(i);
+          }
+
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+          {
+            if (fe.has_support_on_face(i,face))
+              fe_support_on_face[face].push_back(i);
+          }
+    }
+
+    ScratchData(const ScratchData &sd)
+      :
+      fe_values_local (sd.fe_values_local.get_fe(),
+                       sd.fe_values_local.get_quadrature(),
+                       sd.fe_values_local.get_update_flags()),
+      fe_face_values_local (sd.fe_face_values_local.get_fe(),
+                            sd.fe_face_values_local.get_quadrature(),
+                            sd.fe_face_values_local.get_update_flags()),
+      fe_face_values (sd.fe_face_values.get_fe(),
+                      sd.fe_face_values.get_quadrature(),
+                      sd.fe_face_values.get_update_flags()),
+      ll_matrix (sd.ll_matrix),
+      lf_matrix (sd.lf_matrix),
+      fl_matrix (sd.fl_matrix),
+      tmp_matrix (sd.tmp_matrix),
+      l_rhs (sd.l_rhs),
+      tmp_rhs (sd.tmp_rhs),
+      q_phi (sd.q_phi),
+      q_phi_div (sd.q_phi_div),
+      u_phi (sd.u_phi),
+      u_phi_grad (sd.u_phi_grad),
+      tr_phi (sd.tr_phi),
+      trace_values(sd.trace_values),
+      fe_local_support_on_face(sd.fe_local_support_on_face),
+      fe_support_on_face(sd.fe_support_on_face)
+    {}
+  };
+
+
+
+  // @sect4{HDG::PostProcessScratchData}
+  // @p PostProcessScratchData contains the data used by <code>WorkStream</code>
+  // when post-processing the local solution $u^*$.  It is similar, but much
+  // simpler, than @p ScratchData.
+  template <int dim>
+  struct HDG<dim>::PostProcessScratchData
+  {
+    FEValues<dim> fe_values_local;
+    FEValues<dim> fe_values;
+
+    std::vector<double> u_values;
+    std::vector<Tensor<1,dim> > u_gradients;
+    FullMatrix<double> cell_matrix;
+
+    Vector<double> cell_rhs;
+    Vector<double> cell_sol;
+
+    PostProcessScratchData(const FiniteElement<dim> &fe,
+                           const FiniteElement<dim> &fe_local,
+                           const QGauss<dim>   &quadrature_formula,
+                           const UpdateFlags local_flags,
+                           const UpdateFlags flags)
+      :
+      fe_values_local (fe_local, quadrature_formula, local_flags),
+      fe_values (fe, quadrature_formula, flags),
+      u_values (quadrature_formula.size()),
+      u_gradients (quadrature_formula.size()),
+      cell_matrix (fe.dofs_per_cell, fe.dofs_per_cell),
+      cell_rhs (fe.dofs_per_cell),
+      cell_sol (fe.dofs_per_cell)
+    {}
+
+    PostProcessScratchData(const PostProcessScratchData &sd)
+      :
+      fe_values_local (sd.fe_values_local.get_fe(),
+                       sd.fe_values_local.get_quadrature(),
+                       sd.fe_values_local.get_update_flags()),
+      fe_values (sd.fe_values.get_fe(),
+                 sd.fe_values.get_quadrature(),
+                 sd.fe_values.get_update_flags()),
+      u_values (sd.u_values),
+      u_gradients (sd.u_gradients),
+      cell_matrix (sd.cell_matrix),
+      cell_rhs (sd.cell_rhs),
+      cell_sol (sd.cell_sol)
+    {}
+  };
+
+
+
+  // @sect4{HDG::assemble_system}
+  // The @p assemble_system function is similar to <code>Step-32</code>, where
+  // the quadrature formula and the update flags are set up, and then
+  // <code>WorkStream</code> is used to do the work in a multi-threaded
+  // manner.  The @p trace_reconstruct input parameter is used to decide
+  // whether we are solving for the global skeleton solution (false) or the
+  // local solution (true).
+  template <int dim>
+  void
+  HDG<dim>::assemble_system (const bool trace_reconstruct)
+  {
+    const QGauss<dim>   quadrature_formula(fe.degree+1);
+    const QGauss<dim-1> face_quadrature_formula(fe.degree+1);
+
+    const UpdateFlags local_flags (update_values | update_gradients |
+                                   update_JxW_values | update_quadrature_points);
+
+    const UpdateFlags local_face_flags (update_values);
+
+    const UpdateFlags flags ( update_values | update_normal_vectors |
+                              update_quadrature_points |
+                              update_JxW_values);
+
+    PerTaskData task_data (fe.dofs_per_cell,
+                           trace_reconstruct);
+    ScratchData scratch (fe, fe_local,
+                         quadrature_formula,
+                         face_quadrature_formula,
+                         local_flags,
+                         local_face_flags,
+                         flags);
+
+    WorkStream::run(dof_handler.begin_active(),
+                    dof_handler.end(),
+                    *this,
+                    &HDG<dim>::assemble_system_one_cell,
+                    &HDG<dim>::copy_local_to_global,
+                    scratch,
+                    task_data);
+  }
+
+
+
+  // @sect4{HDG::assemble_system_one_cell}
+  // The real work of the HDG program is done by @p assemble_system_one_cell.
+  // Assembling the local matrices $A, B, C$ is done here, along with the
+  // local contributions of the global matrix $D$.
+  template <int dim>
+  void
+  HDG<dim>::assemble_system_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                      ScratchData &scratch,
+                                      PerTaskData &task_data)
+  {
+    // Construct iterator for dof_handler_local for FEValues reinit function.
+    typename DoFHandler<dim>::active_cell_iterator
+    loc_cell (&triangulation,
+              cell->level(),
+              cell->index(),
+              &dof_handler_local);
+
+    const unsigned int n_q_points    = scratch.fe_values_local.get_quadrature().size();
+    const unsigned int n_face_q_points = scratch.fe_face_values_local.get_quadrature().size();
+
+    const unsigned int loc_dofs_per_cell = scratch.fe_values_local.get_fe().dofs_per_cell;
+
+    const FEValuesExtractors::Vector fluxes (0);
+    const FEValuesExtractors::Scalar scalar (dim);
+
+    scratch.ll_matrix = 0;
+    scratch.l_rhs = 0;
+    if (!task_data.trace_reconstruct)
+      {
+        scratch.lf_matrix = 0;
+        scratch.fl_matrix = 0;
+        task_data.cell_matrix = 0;
+        task_data.cell_vector = 0;
+      }
+    scratch.fe_values_local.reinit (loc_cell);
+
+    // We first compute the cell-interior contribution to @p ll_matrix matrix
+    // (referred to as matrix $A$ in the introduction) corresponding to
+    // local-local coupling, as well as the local right-hand-side vector.  We
+    // store the values at each quadrature point for the basis functions, the
+    // right-hand-side value, and the convection velocity, in order to have
+    // quick access to these fields.
+    for (unsigned int q=0; q<n_q_points; ++q)
+      {
+        const double rhs_value
+          = scratch.right_hand_side.value(scratch.fe_values_local.quadrature_point(q));
+        const Tensor<1,dim> convection
+          = scratch.convection_velocity.value(scratch.fe_values_local.quadrature_point(q));
+        const double JxW = scratch.fe_values_local.JxW(q);
+        for (unsigned int k=0; k<loc_dofs_per_cell; ++k)
+          {
+            scratch.q_phi[k] = scratch.fe_values_local[fluxes].value(k,q);
+            scratch.q_phi_div[k] = scratch.fe_values_local[fluxes].divergence(k,q);
+            scratch.u_phi[k] = scratch.fe_values_local[scalar].value(k,q);
+            scratch.u_phi_grad[k] = scratch.fe_values_local[scalar].gradient(k,q);
+          }
+        for (unsigned int i=0; i<loc_dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<loc_dofs_per_cell; ++j)
+              scratch.ll_matrix(i,j) += (
+                                          scratch.q_phi[i] * scratch.q_phi[j]
+                                          -
+                                          scratch.q_phi_div[i] * scratch.u_phi[j]
+                                          +
+                                          scratch.u_phi[i] * scratch.q_phi_div[j]
+                                          -
+                                          (scratch.u_phi_grad[i] * convection) * scratch.u_phi[j]
+                                        ) * JxW;
+            scratch.l_rhs(i) += scratch.u_phi[i] * rhs_value * JxW;
+          }
+      }
+
+    // Face terms are assembled on all faces of all elements. This is in
+    // contrast to more traditional DG methods, where each face is only visited
+    // once in the assembly procedure.
+    for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+      {
+        scratch.fe_face_values_local.reinit(loc_cell, face);
+        scratch.fe_face_values.reinit(cell, face);
+
+        // The already obtained $\hat{u}$ values are needed when solving for the
+        // local variables.
+        if (task_data.trace_reconstruct)
+          scratch.fe_face_values.get_function_values (solution, scratch.trace_values);
+
+        for (unsigned int q=0; q<n_face_q_points; ++q)
+          {
+            const double JxW = scratch.fe_face_values.JxW(q);
+            const Point<dim> quadrature_point =
+              scratch.fe_face_values.quadrature_point(q);
+            const Tensor<1,dim> normal = scratch.fe_face_values.normal_vector(q);
+            const Tensor<1,dim> convection
+              = scratch.convection_velocity.value(quadrature_point);
+
+            // Here we compute the stabilization parameter discussed in the
+            // introduction: since the diffusion is one and the diffusion
+            // length scale is set to 1/5, it simply results in a contribution
+            // of 5 for the diffusion part and the magnitude of convection
+            // through the element boundary in a centered scheme for the
+            // convection part.
+            const double tau_stab = (5. +
+                                     std::abs(convection * normal));
+
+            // We store the non-zero flux and scalar values, making use of the
+            // support_on_face information we created in @p ScratchData.
+            for (unsigned int k=0; k<scratch.fe_local_support_on_face[face].size(); ++k)
+              {
+                const unsigned int kk=scratch.fe_local_support_on_face[face][k];
+                scratch.q_phi[k] = scratch.fe_face_values_local[fluxes].value(kk,q);
+                scratch.u_phi[k] = scratch.fe_face_values_local[scalar].value(kk,q);
+              }
+
+            // When @p trace_reconstruct=false, we are preparing to assemble the
+            // system for the skeleton variable $\hat{u}$. If this is the case,
+            // we must assemble all local matrices associated with the problem:
+            // local-local, local-face, face-local, and face-face.  The
+            // face-face matrix is stored as @p TaskData::cell_matrix, so that
+            // it can be assembled into the global system by @p
+            // copy_local_to_global.
+            if (!task_data.trace_reconstruct)
+              {
+                for (unsigned int k=0; k<scratch.fe_support_on_face[face].size(); ++k)
+                  scratch.tr_phi[k] =
+                    scratch.fe_face_values.shape_value(scratch.fe_support_on_face[face][k],q);
+                for (unsigned int i=0; i<scratch.fe_local_support_on_face[face].size(); ++i)
+                  for (unsigned int j=0; j<scratch.fe_support_on_face[face].size(); ++j)
+                    {
+                      const unsigned int ii=scratch.fe_local_support_on_face[face][i];
+                      const unsigned int jj=scratch.fe_support_on_face[face][j];
+                      scratch.lf_matrix(ii,jj) += (
+                                                    (scratch.q_phi[i] * normal
+                                                     +
+                                                     (convection * normal -
+                                                      tau_stab) * scratch.u_phi[i])
+                                                    * scratch.tr_phi[j]
+                                                  ) * JxW;
+
+                      // Note the sign of the face-local matrix.  We negate the
+                      // sign during assembly here so that we can use the
+                      // FullMatrix::mmult with addition when computing the
+                      // Schur complement.
+                      scratch.fl_matrix(jj,ii) -= (
+                                                    (scratch.q_phi[i] * normal
+                                                     +
+                                                     tau_stab * scratch.u_phi[i])
+                                                    * scratch.tr_phi[j]
+                                                  ) * JxW;
+                    }
+
+                for (unsigned int i=0; i<scratch.fe_support_on_face[face].size(); ++i)
+                  for (unsigned int j=0; j<scratch.fe_support_on_face[face].size(); ++j)
+                    {
+                      const unsigned int ii=scratch.fe_support_on_face[face][i];
+                      const unsigned int jj=scratch.fe_support_on_face[face][j];
+                      task_data.cell_matrix(ii,jj) += (
+                                                        (convection * normal - tau_stab) *
+                                                        scratch.tr_phi[i] * scratch.tr_phi[j]
+                                                      ) * JxW;
+                    }
+
+                if (cell->face(face)->at_boundary()
+                    &&
+                    (cell->face(face)->boundary_id() == 1))
+                  {
+                    const double neumann_value =
+                      - scratch.exact_solution.gradient (quadrature_point) * normal
+                      + convection * normal * scratch.exact_solution.value(quadrature_point);
+                    for (unsigned int i=0; i<scratch.fe_support_on_face[face].size(); ++i)
+                      {
+                        const unsigned int ii=scratch.fe_support_on_face[face][i];
+                        task_data.cell_vector(ii) += scratch.tr_phi[i] * neumann_value * JxW;
+                      }
+                  }
+              }
+
+            // This last term adds the contribution of the term $\left<w,\tau
+            // u_h\right>_{\partial \mathcal T}$ to the local matrix. As opposed
+            // to the face matrices above, we need it in both assembly stages.
+            for (unsigned int i=0; i<scratch.fe_local_support_on_face[face].size(); ++i)
+              for (unsigned int j=0; j<scratch.fe_local_support_on_face[face].size(); ++j)
+                {
+                  const unsigned int ii=scratch.fe_local_support_on_face[face][i];
+                  const unsigned int jj=scratch.fe_local_support_on_face[face][j];
+                  scratch.ll_matrix(ii,jj) += tau_stab * scratch.u_phi[i] * scratch.u_phi[j] * JxW;
+                }
+
+            // When @p trace_reconstruct=true, we are solving for the local
+            // solutions on an element by element basis.  The local
+            // right-hand-side is calculated by replacing the basis functions @p
+            // tr_phi in the @p lf_matrix computation by the computed values @p
+            // trace_values.  Of course, the sign of the matrix is now minus
+            // since we have moved everything to the other side of the equation.
+            if (task_data.trace_reconstruct)
+              for (unsigned int i=0; i<scratch.fe_local_support_on_face[face].size(); ++i)
+                {
+                  const unsigned int ii=scratch.fe_local_support_on_face[face][i];
+                  scratch.l_rhs(ii) -= (scratch.q_phi[i] * normal
+                                        +
+                                        scratch.u_phi[i] * (convection * normal - tau_stab)
+                                       ) * scratch.trace_values[q] * JxW;
+                }
+          }
+      }
+
+    // Once assembly of all of the local contributions is complete, we must either:
+    // (1) assemble the global system, or (2) compute the local solution values and
+    // save them.
+    // In either case, the first step is to invert the local-local matrix.
+    scratch.ll_matrix.gauss_jordan();
+
+    // For (1), we compute the Schur complement and add it to the @p
+    // cell_matrix, matrix $D$ in the introduction.
+    if (task_data.trace_reconstruct == false)
+      {
+        scratch.fl_matrix.mmult(scratch.tmp_matrix, scratch.ll_matrix);
+        scratch.tmp_matrix.vmult_add(task_data.cell_vector, scratch.l_rhs);
+        scratch.tmp_matrix.mmult(task_data.cell_matrix, scratch.lf_matrix, true);
+        cell->get_dof_indices(task_data.dof_indices);
+      }
+    // For (2), we are simply solving (ll_matrix).(solution_local) = (l_rhs).
+    // Hence, we multiply @p l_rhs by our already inverted local-local matrix
+    // and store the result using the <code>set_dof_values</code> function.
+    else
+      {
+        scratch.ll_matrix.vmult(scratch.tmp_rhs, scratch.l_rhs);
+        loc_cell->set_dof_values(scratch.tmp_rhs, solution_local);
+      }
+  }
+
+
+
+  // @sect4{HDG::copy_local_to_global}
+  // If we are in the first step of the solution, i.e. @p trace_reconstruct=false,
+  // then we assemble the local matrices into the global system.
+  template <int dim>
+  void HDG<dim>::copy_local_to_global(const PerTaskData &data)
+  {
+    if (data.trace_reconstruct == false)
+      constraints.distribute_local_to_global (data.cell_matrix,
+                                              data.cell_vector,
+                                              data.dof_indices,
+                                              system_matrix, system_rhs);
+  }
+
+
+
+  // @sect4{HDG::solve}
+  // The skeleton solution is solved for by using a BiCGStab solver with
+  // identity preconditioner.
+  template <int dim>
+  void HDG<dim>::solve ()
+  {
+    SolverControl solver_control (system_matrix.m()*10,
+                                  1e-11*system_rhs.l2_norm());
+    SolverBicgstab<> solver (solver_control);
+    solver.solve (system_matrix, solution, system_rhs,
+                  PreconditionIdentity());
+
+    std::cout << "   Number of BiCGStab iterations: " << solver_control.last_step()
+              << std::endl;
+
+    system_matrix.clear();
+    sparsity_pattern.reinit(0,0,0,1);
+
+    constraints.distribute(solution);
+
+    // Once we have solved for the skeleton solution,
+    // we can solve for the local solutions in an element-by-element
+    // fashion.  We do this by re-using the same @p assemble_system function
+    // but switching @p trace_reconstruct to true.
+    assemble_system(true);
+  }
+
+
+
+  // @sect4{HDG::postprocess}
+
+  // The postprocess method serves two purposes. First, we want to construct a
+  // post-processed scalar variables in the element space of degree $p+1$ that
+  // we hope will converge at order $p+2$. This is again an element-by-element
+  // process and only involves the scalar solution as well as the gradient on
+  // the local cell. To do this, we introduce the already defined scratch data
+  // together with some update flags and run the work stream to do this in
+  // parallel.
+  //
+  // Secondly, we want to compute discretization errors just as we did in
+  // step-7. The overall procedure is similar with calls to
+  // VectorTools::integrate_difference. The difference is in how we compute
+  // the errors for the scalar variable and the gradient variable. In step-7,
+  // we did this by computing @p L2_norm or @p H1_seminorm
+  // contributions. Here, we have a DoFHandler with these two contributions
+  // computed and sorted by their vector component, <code>[0, dim)</code> for the
+  // gradient and @p dim for the scalar. To compute their value, we hence use
+  // a ComponentSelectFunction with either of them, together with the @p
+  // SolutionAndGradient class introduced above that contains the analytic
+  // parts of either of them. Eventually, we also compute the L2-error of the
+  // post-processed solution and add the results into the convergence table.
+  template <int dim>
+  void
+  HDG<dim>::postprocess()
+  {
+    {
+      const QGauss<dim>   quadrature_formula(fe_u_post.degree+1);
+      const UpdateFlags local_flags (update_values);
+      const UpdateFlags flags ( update_values | update_gradients |
+                                update_JxW_values);
+
+      PostProcessScratchData scratch (fe_u_post, fe_local,
+                                      quadrature_formula,
+                                      local_flags,
+                                      flags);
+
+      WorkStream::run(dof_handler_u_post.begin_active(),
+                      dof_handler_u_post.end(),
+                      std_cxx11::bind (&HDG<dim>::postprocess_one_cell,
+                                       std_cxx11::ref(*this),
+                                       std_cxx11::_1, std_cxx11::_2, std_cxx11::_3),
+                      std_cxx11::function<void(const unsigned int &)>(),
+                      scratch,
+                      0U);
+    }
+
+    Vector<float> difference_per_cell (triangulation.n_active_cells());
+
+    ComponentSelectFunction<dim> value_select (dim, dim+1);
+    VectorTools::integrate_difference (dof_handler_local,
+                                       solution_local,
+                                       SolutionAndGradient<dim>(),
+                                       difference_per_cell,
+                                       QGauss<dim>(fe.degree+2),
+                                       VectorTools::L2_norm,
+                                       &value_select);
+    const double L2_error = difference_per_cell.l2_norm();
+
+    ComponentSelectFunction<dim> gradient_select (std::pair<unsigned int,unsigned int>(0, dim),
+                                                  dim+1);
+    VectorTools::integrate_difference (dof_handler_local,
+                                       solution_local,
+                                       SolutionAndGradient<dim>(),
+                                       difference_per_cell,
+                                       QGauss<dim>(fe.degree+2),
+                                       VectorTools::L2_norm,
+                                       &gradient_select);
+    const double grad_error = difference_per_cell.l2_norm();
+
+    VectorTools::integrate_difference (dof_handler_u_post,
+                                       solution_u_post,
+                                       Solution<dim>(),
+                                       difference_per_cell,
+                                       QGauss<dim>(fe.degree+3),
+                                       VectorTools::L2_norm);
+    const double post_error = difference_per_cell.l2_norm();
+
+    convergence_table.add_value("cells",     triangulation.n_active_cells());
+    convergence_table.add_value("dofs",      dof_handler.n_dofs());
+    convergence_table.add_value("val L2",    L2_error);
+    convergence_table.add_value("grad L2",   grad_error);
+    convergence_table.add_value("val L2-post", post_error);
+  }
+
+
+
+  // @sect4{HDG::postprocess_one_cell}
+  //
+  // This is the actual work done for the postprocessing. According to the
+  // discussion in the introduction, we need to set up a system that projects
+  // the gradient part of the DG solution onto the gradient of the
+  // post-processed variable. Moreover, we need to set the average of the new
+  // post-processed variable to equal the average of the scalar DG solution
+  // on the cell.
+  //
+  // More technically speaking, the projection of the gradient is a system
+  // that would potentially fills our @p dofs_per_cell times @p dofs_per_cell
+  // matrix but is singular (the sum of all rows would be zero because the
+  // constant function has zero gradient). Therefore, we take one row away and
+  // use it for imposing the average of the scalar value. We pick the first
+  // row for the scalar part, even though we could pick any row for $\mathcal
+  // Q_{-p}$ elements. However, had we used FE_DGP elements instead, the first
+  // row would correspond to the constant part already and deleting e.g. the
+  // last row would give us a singular system. This way, our program can also
+  // be used for those elements.
+  template <int dim>
+  void
+  HDG<dim>::postprocess_one_cell (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                  PostProcessScratchData &scratch,
+                                  unsigned int &)
+  {
+    typename DoFHandler<dim>::active_cell_iterator
+    loc_cell (&triangulation,
+              cell->level(),
+              cell->index(),
+              &dof_handler_local);
+
+    scratch.fe_values_local.reinit (loc_cell);
+    scratch.fe_values.reinit(cell);
+
+    FEValuesExtractors::Vector fluxes(0);
+    FEValuesExtractors::Scalar scalar(dim);
+
+    const unsigned int n_q_points = scratch.fe_values.get_quadrature().size();
+    const unsigned int dofs_per_cell = scratch.fe_values.dofs_per_cell;
+
+    scratch.fe_values_local[scalar].get_function_values(solution_local, scratch.u_values);
+    scratch.fe_values_local[fluxes].get_function_values(solution_local, scratch.u_gradients);
+
+    double sum = 0;
+    for (unsigned int i=1; i<dofs_per_cell; ++i)
+      {
+        for (unsigned int j=0; j<dofs_per_cell; ++j)
+          {
+            sum = 0;
+            for (unsigned int q=0; q<n_q_points; ++q)
+              sum += (scratch.fe_values.shape_grad(i,q) *
+                      scratch.fe_values.shape_grad(j,q)
+                     ) * scratch.fe_values.JxW(q);
+            scratch.cell_matrix(i,j) = sum;
+          }
+
+        sum = 0;
+        for (unsigned int q=0; q<n_q_points; ++q)
+          sum -= (scratch.fe_values.shape_grad(i,q) * scratch.u_gradients[q]
+                 ) * scratch.fe_values.JxW(q);
+        scratch.cell_rhs(i) = sum;
+      }
+    for (unsigned int j=0; j<dofs_per_cell; ++j)
+      {
+        sum = 0;
+        for (unsigned int q=0; q<n_q_points; ++q)
+          sum += scratch.fe_values.shape_value(j,q) * scratch.fe_values.JxW(q);
+        scratch.cell_matrix(0,j) = sum;
+      }
+    {
+      sum = 0;
+      for (unsigned int q=0; q<n_q_points; ++q)
+        sum += scratch.u_values[q] * scratch.fe_values.JxW(q);
+      scratch.cell_rhs(0) = sum;
+    }
+
+    // Having assembled all terms, we can again go on and solve the linear
+    // system. We invert the matrix and then multiply the inverse by the
+    // right hand side. An alternative (and more numerically stable) method would have
+    // been to only factorize the matrix and apply the factorization.
+    scratch.cell_matrix.gauss_jordan();
+    scratch.cell_matrix.vmult(scratch.cell_sol, scratch.cell_rhs);
+    cell->distribute_local_to_global(scratch.cell_sol, solution_u_post);
+  }
+
+
+
+  // @sect4{HDG::output_results}
+  // We have 3 sets of results that we would like to output:  the local solution,
+  // the post-processed local solution, and the skeleton solution.  The former 2
+  // both 'live' on element volumes, whereas the latter lives on codimension-1 surfaces
+  // of the triangulation.  Our @p output_results function writes all local solutions
+  // to the same vtk file, even though they correspond to different <code>DoFHandler</code>
+  // objects.  The graphical output for the skeleton variable is done through
+  // use of the <code>DataOutFaces</code> class.
+  template <int dim>
+  void HDG<dim>::output_results (const unsigned int cycle)
+  {
+    std::string filename;
+    switch (refinement_mode)
+      {
+      case global_refinement:
+        filename = "solution-global";
+        break;
+      case adaptive_refinement:
+        filename = "solution-adaptive";
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    std::string face_out(filename);
+    face_out += "-face";
+
+    filename += "-q" + Utilities::int_to_string(fe.degree,1);
+    filename += "-" + Utilities::int_to_string(cycle,2);
+    filename += ".vtk";
+    std::ofstream output (filename.c_str());
+
+    DataOut<dim> data_out;
+
+    // We first define the names and types of the local solution,
+    // and add the data to @p data_out.
+    std::vector<std::string> names (dim, "gradient");
+    names.push_back ("solution");
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    component_interpretation
+    (dim+1, DataComponentInterpretation::component_is_part_of_vector);
+    component_interpretation[dim]
+      = DataComponentInterpretation::component_is_scalar;
+    data_out.add_data_vector (dof_handler_local, solution_local,
+                              names, component_interpretation);
+
+    // The second data item we add is the post-processed solution.
+    // In this case, it is a single scalar variable belonging to
+    // a different DoFHandler.
+    std::vector<std::string> post_name(1,"u_post");
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    post_comp_type(1, DataComponentInterpretation::component_is_scalar);
+    data_out.add_data_vector (dof_handler_u_post, solution_u_post,
+                              post_name, post_comp_type);
+
+    data_out.build_patches (fe.degree);
+    data_out.write_vtk (output);
+
+    face_out += "-q" + Utilities::int_to_string(fe.degree,1);
+    face_out += "-" + Utilities::int_to_string(cycle,2);
+    face_out += ".vtk";
+    std::ofstream face_output (face_out.c_str());
+
+// The <code>DataOutFaces</code> class works analogously to the <code>DataOut</code>
+// class when we have a <code>DoFHandler</code> that defines the solution on
+// the skeleton of the triangulation.  We treat it as such here, and the code is
+// similar to that above.
+    DataOutFaces<dim> data_out_face(false);
+    std::vector<std::string> face_name(1,"u_hat");
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    face_component_type(1, DataComponentInterpretation::component_is_scalar);
+
+    data_out_face.add_data_vector (dof_handler,
+                                   solution,
+                                   face_name,
+                                   face_component_type);
+
+    data_out_face.build_patches (fe.degree);
+    data_out_face.write_vtk (face_output);
+  }
+
+// @sect4{HDG::refine_grid}
+
+// We implement two different refinement cases for HDG, just as in
+// <code>Step-7</code>: adaptive_refinement and global_refinement.  The
+// global_refinement option recreates the entire triangulation every
+// time. This is because we want to use a finer sequence of meshes than what
+// we would get with one refinement step, namely 2, 3, 4, 6, 8, 12, 16, ...
+// elements per direction.
+
+// The adaptive_refinement mode uses the <code>KellyErrorEstimator</code> to
+// give a decent indication of the non-regular regions in the scalar local
+// solutions.
+  template <int dim>
+  void HDG<dim>::refine_grid (const unsigned int cycle)
+  {
+    if (cycle == 0)
+      {
+        GridGenerator::subdivided_hyper_cube (triangulation, 2, -1, 1);
+        triangulation.refine_global(3-dim);
+      }
+    else
+      switch (refinement_mode)
+        {
+        case global_refinement:
+        {
+          triangulation.clear();
+          GridGenerator::subdivided_hyper_cube (triangulation, 2+(cycle%2), -1, 1);
+          triangulation.refine_global(3-dim+cycle/2);
+          break;
+        }
+
+        case adaptive_refinement:
+        {
+          Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+          FEValuesExtractors::Scalar scalar(dim);
+          typename FunctionMap<dim>::type neumann_boundary;
+          KellyErrorEstimator<dim>::estimate (dof_handler_local,
+                                              QGauss<dim-1>(3),
+                                              neumann_boundary,
+                                              solution_local,
+                                              estimated_error_per_cell,
+                                              fe_local.component_mask(scalar));
+
+          GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                           estimated_error_per_cell,
+                                                           0.3, 0.);
+
+          triangulation.execute_coarsening_and_refinement ();
+
+          break;
+        }
+
+        default:
+        {
+          Assert (false, ExcNotImplemented());
+        }
+        }
+
+    // Just as in step-7, we set the boundary indicator of two of the faces to 1
+    // where we want to specify Neumann boundary conditions instead of Dirichlet
+    // conditions. Since we re-create the triangulation every time for global
+    // refinement, the flags are set in every refinement step, not just at the
+    // beginning.
+    typename Triangulation<dim>::cell_iterator
+    cell = triangulation.begin (),
+    endc = triangulation.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        if (cell->face(face)->at_boundary())
+          if ((std::fabs(cell->face(face)->center()(0) - (-1)) < 1e-12)
+              ||
+              (std::fabs(cell->face(face)->center()(1) - (-1)) < 1e-12))
+            cell->face(face)->set_boundary_id (1);
+  }
+
+  // @sect4{HDG::run}
+  // The functionality here is basically the same as <code>Step-7</code>.
+  // We loop over 10 cycles, refining the grid on each one.  At the end,
+  // convergence tables are created.
+  template <int dim>
+  void HDG<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<10; ++cycle)
+      {
+        std::cout << "Cycle " << cycle << ':' << std::endl;
+
+        refine_grid (cycle);
+        setup_system ();
+        assemble_system (false);
+        solve ();
+        postprocess();
+        output_results (cycle);
+      }
+
+
+
+    convergence_table.set_precision("val L2", 3);
+    convergence_table.set_scientific("val L2", true);
+    convergence_table.set_precision("grad L2", 3);
+    convergence_table.set_scientific("grad L2", true);
+    convergence_table.set_precision("val L2-post", 3);
+    convergence_table.set_scientific("val L2-post", true);
+
+    // There is one minor change for the convergence table compared to step-7:
+    // Since we did not refine our mesh by a factor two in each cycle (but
+    // rather used the sequence 2, 3, 4, 6, 8, 12, ...), we need to tell the
+    // convergence rate evaluation about this. We do this by setting the
+    // number of cells as a reference column and additionally specifying the
+    // dimension of the problem, which gives the necessary information for the
+    // relation between number of cells and mesh size.
+    if (refinement_mode == global_refinement)
+      {
+        convergence_table
+        .evaluate_convergence_rates("val L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
+        convergence_table
+        .evaluate_convergence_rates("grad L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
+        convergence_table
+        .evaluate_convergence_rates("val L2-post", "cells", ConvergenceTable::reduction_rate_log2, dim);
+      }
+    convergence_table.write_text(std::cout);
+  }
+
+} // end of namespace Step51
+
+
+
+int main ()
+{
+  const unsigned int dim = 2;
+
+  try
+    {
+      using namespace dealii;
+
+      // Now for the three calls to the main class in complete analogy to
+      // step-7.
+      {
+        std::cout << "Solving with Q1 elements, adaptive refinement" << std::endl
+                  << "=============================================" << std::endl
+                  << std::endl;
+
+        Step51::HDG<dim> hdg_problem (1, Step51::HDG<dim>::adaptive_refinement);
+        hdg_problem.run ();
+
+        std::cout << std::endl;
+      }
+
+      {
+        std::cout << "Solving with Q1 elements, global refinement" << std::endl
+                  << "===========================================" << std::endl
+                  << std::endl;
+
+        Step51::HDG<dim> hdg_problem (1, Step51::HDG<dim>::global_refinement);
+        hdg_problem.run ();
+
+        std::cout << std::endl;
+      }
+
+      {
+        std::cout << "Solving with Q3 elements, global refinement" << std::endl
+                  << "===========================================" << std::endl
+                  << std::endl;
+
+        Step51::HDG<dim> hdg_problem (3, Step51::HDG<dim>::global_refinement);
+        hdg_problem.run ();
+
+        std::cout << std::endl;
+      }
+
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-52/CMakeLists.txt b/examples/step-52/CMakeLists.txt
new file mode 100644
index 0000000..39fffde
--- /dev/null
+++ b/examples/step-52/CMakeLists.txt
@@ -0,0 +1,42 @@
+##
+#  CMake script for the step-52 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-52")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Define the output that should be cleaned:
+SET(CLEAN_UP_FILES *.vtu)
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-52/doc/builds-on b/examples/step-52/doc/builds-on
new file mode 100644
index 0000000..1aabbdf
--- /dev/null
+++ b/examples/step-52/doc/builds-on
@@ -0,0 +1 @@
+step-26
diff --git a/examples/step-52/doc/intro.dox b/examples/step-52/doc/intro.dox
new file mode 100644
index 0000000..ba3cc54
--- /dev/null
+++ b/examples/step-52/doc/intro.dox
@@ -0,0 +1,251 @@
+<br>
+
+<i>This program was contributed by Bruno Turcksin and Damien Lebrun-Grandie.</i>
+
+ at note In order to run this program, deal.II must be configured to use
+the UMFPACK sparse direct solver. Refer to the <a
+href="../../readme.html#umfpack">ReadMe</a> for instructions how to do this.
+
+<a name="Intro"></a>
+<h1>Introducion</h1>
+
+This program shows how to use Runge-Kutta methods to solve a time-dependent
+problem. It solves a small variation of the heat equation discussed first in
+step-26 but, since the purpose of this program is only to demonstrate using
+more advanced ways to interface with deal.II's time stepping algorithms, only
+solves a simple problem on a uniformly refined mesh. 
+
+
+<h3>Problem statement</h3>
+
+In this example, we solve the one-group time-dependent diffusion
+approximation of the neutron transport equation (see step-28 for the
+time-independent multigroup diffusion). This is a model for how neutrons move
+around highly scattering media, and consequently it is a variant of the
+time-dependent diffusion equation -- which is just a different name for the
+heat equation discussed in step-26, plus some extra terms.
+We assume that the medium is not
+fissible and therefore, the neutron flux satisfies the following equation:
+ at f{eqnarray*}
+\frac{1}{v}\frac{\partial \phi(x,t)}{\partial t} = \nabla \cdot D(x) \nabla \phi(x,t)
+- \Sigma_a(x) \phi(x,t) + S(x,t)
+ at f}
+augmented by appropriate boundary conditions. Here, $v$ is the velocity of
+neutrons (for simplicity we assume it is equal to 1 which can be achieved by
+simply scaling the time variable), $D$ is the diffusion coefficient, 
+$\Sigma_a$ is the absorption cross section, and $S$ is a source. Because we are
+only interested in the time dependence, we assume that $D$ and $\Sigma_a$ are
+constant. 
+
+Since this program only intends to demonstrate how to use advanced time
+stepping algorithms, we will only look for the solutions of relatively simple
+problems. Specifically, we are looking for a solution on a square domain
+$[0,b]\times[0,b]$ of the form
+ at f{eqnarray*}
+\phi(x,t) = A\sin(\omega t)(bx-x^2).
+ at f}
+By using quadratic finite elements, we can represent this function exactly at
+any particular time, and all the error will be due to the time
+discretization. We do this because it is then easy to observe the order of
+convergence of the various time stepping schemes we will consider, without
+having to separate spatial and temporal errors.
+
+We impose the following boundary conditions: homogeneous Dirichlet for $x=0$ and
+$x=b$ and homogeneous Neumann conditions for $y=0$ and $y=b$. We choose the
+source term so that the corresponding solution is
+in fact of the form stated above:
+ at f{eqnarray*}
+S=A\left(\frac{1}{v}\omega \cos(\omega t)(bx -x^2) + \sin(\omega t)
+\left(\Sigma_a (bx-x^2)+2D\right) \right).
+ at f}
+Because the solution is a sine in time, we know that the exact solution
+satisfies $\phi\left(x,\pi\right) = 0$.
+Therefore, the error at time $t=\pi$ is simply the norm of the numerical
+solution, i.e., $\|e(\cdot,t=\pi)\|_{L_2} = \|\phi_h(\cdot,t=\pi)\|_{L_2}$,
+and is particularly easily evaluated. In the code, we evaluate the $l_2$ norm
+of the vector of nodal values of $\phi_h$ instead of the $L_2$ norm of the
+associated spatial function, since the former is simpler to compute; however,
+on uniform meshes, the two are just related by a constant and we can
+consequently observe the temporal convergence order with either.
+
+
+<h3>Runge-Kutta methods</h3>
+
+The Runge-Kutta methods implemented in deal.II assume that the equation to be
+solved can be written as:
+ at f{eqnarray*}
+\frac{dy}{dt} = g(t,y).
+ at f}
+On the other hand, when using finite elements, discretized time derivatives always result in the
+presence of a mass matrix on the left hand side. This can easily be seen by
+considering that if the solution vector $y(t)$ in the equation above is in fact the vector
+of nodal coefficients $U(t)$ for a variable of the form
+ at f{eqnarray*}
+  u_h(x,t) = \sum_j U_j(t) \varphi_j(x)
+ at f}
+with spatial shape functions $\varphi_j(x)$, then multiplying an equation of
+the form 
+ at f{eqnarray*}
+  \frac{\partial u(x,t)}{\partial t} = q(t,u(x,t))
+ at f}
+by test functions, integrating over $\Omega$, substituting $u\rightarrow u_h$
+and restricting the test functions to the $\varphi_i(x)$ from above, then this
+spatially discretized equation has the form
+ at f{eqnarray*}
+M\frac{dU}{dt} = f(t,U),
+ at f}
+where $M$ is the mass matrix and $f(t,U)$ is the spatially discretized version
+of $q(t,u(x,t))$ (where $q$ is typically the place where spatial
+derivatives appear, but this is not of much concern for the moment given that
+we only consider time derivatives). In other words, this form fits the general
+scheme above if we write 
+ at f{eqnarray*}
+\frac{dy}{dt} = g(t,y) = M^{-1}f(t,y).
+ at f}
+
+Runke-Kutta methods are time stepping schemes that approximate $y(t_n)\approx
+y_{n}$ through a particular one-step approach. They are typically written in the form
+ at f{eqnarray*}
+y_{n+1} = y_n + \sum_{i=1}^s b_i k_i
+ at f}
+where for the form of the right hand side above
+ at f{eqnarray*}
+k_i = h M^{-1} f\left(t_n+c_ih,y_n+\sum_{j=1}^sa_{ij}k_j\right).
+ at f}
+Here $a_{ij}$, $b_i$, and $c_i$ are known coefficients that identify which
+particular Runge-Kutta scheme you want to use, and $h=t_{n+1}-t_n$ is the time step
+used. Different time stepping methods of the Runge-Kutta class differ in the
+number of stages $s$ and the values they use for the coefficients $a_{ij}$,
+$b_i$, and $c_i$ but are otherwise easy to implement since one can look up
+tabulated values for these coefficients. (These tables are often called
+Butcher tableaus.)
+
+At the time of the writing of this tutorial, the methods implemented in
+deal.II can be divided in three categories:
+<ol>
+<li> Explicit Runge-Kutta; in order for a method to be explicit, it is
+necessary that in the formula above defining $k_i$, $k_i$ does not appear
+on the right hand side. In other words, these methods have to satisfy
+$a_{ii}=0, i=1,\ldots,s$.
+<li> Embedded (or adaptive) Runge-Kutta; we will discuss their properties below.
+<li> Implicit Runge-Kutta; this class of methods require the solution of a
+possibly nonlinear system the stages $k_i$ above, i.e., they have
+$a_{ii}\neq 0$ for at least one of the stages $i=1,\ldots,s$.
+</ol> 
+Many well known time stepping schemes that one does not typically associate
+with the names Runge or Kutta can in fact be written in a way so that they,
+too, can be expressed in these categories. They oftentimes represent the
+lowest-order members of these families.
+
+
+<h4>Explicit Runge-Kutta methods</h4> 
+
+These methods, only require a function to evaluate $M^{-1}f(t,y)$ but not
+(as implicit methods) to solve an equation that involves
+$f(t,y)$ for $y$. As all explicit time stepping methods, they become unstable
+when the time step chosen is too large.
+
+Well known methods in this class include forward Euler, third order
+Runge-Kutta, and fourth order Runge-Kutta (often abbreviated as RK4).
+
+
+<h4>Embedded Runge-Kutta methods</h4>
+
+These methods use both a lower and a higher order method to
+estimate the error and decide if the time step needs to be shortened or can be
+increased. The term "embedded" refers to the fact that the lower-order method
+does not require additional evaluates of the function $M^{-1}f(\cdot,\cdot)$
+but reuses data that has to be computed for the high order method anyway. It
+is, in other words, essentially free, and we get the error estimate as a side
+product of using the higher order method.
+
+This class of methods include Heun-Euler, Bogacki-Shampine, Dormand-Prince (ode45 in
+Matlab and often abbreviated as RK45 to indicate that the lower and higher order methods
+used here are 4th and 5th order Runge-Kutta methods, respectively), Fehlberg,
+and Cash-Karp.
+ 
+At the time of the writing, only embedded explicit methods have been implemented.
+
+
+<h4>Implicit Runge-Kutta methods</h4>
+
+Implicit methods require the solution of (possibly nonlinear) systems of the
+form $\alpha y = f(t,y)$
+for $y$ in each (sub-)timestep. Internally, this is
+done using a Newton-type method and, consequently, they require that the user
+provide functions that can evaluate $M^{-1}f(t,y)$ and
+$\left(I-\tau M^{-1} \frac{\partial f}{\partial y}\right)^{-1}$ or equivalently 
+$\left(M - \tau \frac{\partial f}{\partial y}\right)^{-1} M$. 
+
+The particular form of this operator results from the fact that each Newton
+step requires the solution of an equation of the form
+ at f{align*}
+  \left(M - \tau \frac{\partial f}{\partial y}\right) \Delta y
+  = -M h(t,y)
+ at f}
+for some (given) $h(t,y)$. Implicit methods are 
+always stable, regardless of the time step size, but too large time steps of
+course affect the <i>accuracy</i> of the solution, even if the numerical
+solution remains stable and bounded.
+
+Methods in this class include backward Euler, implicit midpoint,
+Crank-Nicolson, and the two stage SDIRK method (short for "singly diagonally
+implicit Runge-Kutta", a term coined to indicate that the diagonal elements
+$a_{ii}$ defining the time stepping method are all equal; this property
+allows for the Newton matrix $I-\tau M^{-1}\frac{\partial f}{\partial y}$ to
+be re-used between stages because $\tau$ is the same every time).
+
+
+<h3>Spatially discrete formulation</h3>
+
+By expanding the solution of our model problem
+as always using shape functions $\psi_j$ and writing
+ at f{eqnarray*}
+\phi_h(x,t) = \sum_j U_j(t) \psi_j(x),
+ at f}
+we immediately get the spatially discretized version of the diffusion equation as
+ at f{eqnarray*}
+  M \frac{dU(t)}{dt}
+  = -{\cal D} U(t) - {\cal A} U(t) + {\cal S}(t)
+ at f}
+where
+ at f{eqnarray*}
+  M_{ij}  &=& (\psi_i,\psi_j), \\
+  {\cal D}_{ij}  &=& (D\nabla\psi_i,\nabla\psi_j)_\Omega, \\
+  {\cal A}_{ij}  &=& (\Sigma_a\psi_i,\psi_j)_\Omega, \\
+  {\cal S}_{i}(t)  &=& (\psi_i,S(x,t))_\Omega.
+ at f}
+See also step-24 and step-26 to understand how we arrive here.
+%Boundary terms are not necessary due to the chosen boundary conditions for
+the current problem. To use the Runge-Kutta methods, we recast this
+as follows:
+ at f{eqnarray*}
+f(y) = -{\cal D}y - {\cal A}y + {\cal S}.
+ at f}
+In the code, we will need to be able to evaluate this function $f(U)$ along
+with its derivative,
+ at f{eqnarray*}
+\frac{\partial f}{\partial y} = -{\cal D} - {\cal A}.
+ at f}
+
+
+<h3>Notes on the testcase</h3>
+
+To simplify the problem, the domain is two dimensional and the mesh is
+uniformly refined (there is no need to adapt the mesh since we use quadratic
+finite elements and the exact solution is quadratic). Going from a two
+dimensional domain to a three dimensional domain is not very
+challenging. However if you intend to solve more complex problems where the
+mesh must be adapted (as is done, for example, in step-26), then it is
+important to remember the following issues:
+
+<ol>
+<li> You will need to project the solution to the new mesh when the mesh is changed. Of course,
+     the mesh 
+     used should be the same from the beginning to the end of each time step,
+     a question that arises because Runge-Kutta methods use multiple
+     evaluations of the equations within each time step.
+<li> You will need to update the mass matrix and its inverse every time the
+     mesh is changed.
+</ol>
+The techniques for these steps are readily available by looking at step-26.
diff --git a/examples/step-52/doc/kind b/examples/step-52/doc/kind
new file mode 100644
index 0000000..86a44aa
--- /dev/null
+++ b/examples/step-52/doc/kind
@@ -0,0 +1 @@
+time dependent
diff --git a/examples/step-52/doc/results.dox b/examples/step-52/doc/results.dox
new file mode 100644
index 0000000..4c2d9ea
--- /dev/null
+++ b/examples/step-52/doc/results.dox
@@ -0,0 +1,40 @@
+<h1>Results</h1>
+
+The point of this program is less to show particular results, but instead to
+show how it is done. This we have already demonstrated simply by discussing
+the code above. Consequently, the output the program yields is relatively
+sparse and consists only of the console output and the solutions given in VTU
+format for visualization.
+
+The console output contains both errors and, for some of the methods, the
+number of steps they performed:
+ at code
+Explicit methods:
+Forward Euler:            error=1.00883
+Third order Runge-Kutta:  error=0.000227982
+Fourth order Runge-Kutta: error=1.90541e-06
+
+Implicit methods:
+Backward Euler:           error=1.03428
+Implicit Midpoint:        error=0.00862702
+Crank-Nicolson:           error=0.00862675
+SDIRK:                    error=0.0042349
+
+Embedded %explicit methods:
+Heun-Euler:               error=0.0073012
+                steps performed=284
+Bogacki-Shampine:         error=0.000207511
+                steps performed=200
+Dopri:                    error=4.01774e-09
+                steps performed=200
+Fehlberg:                 error=9.89504e-09
+                steps performed=200
+Cash-Karp:                error=2.55791e-10
+                steps performed=200
+ at endcode
+
+As expected the higher order methods give (much) more accurate solutions. We
+also see that the (rather inaccurate) Heun-Euler method adapted the number of
+time steps in order to satisfy the tolerance. On the other hand, the other
+embedded methods did not need to change the number of time steps as they are
+easily able to reach the desired tolerance with the given time step size.
diff --git a/examples/step-52/doc/tooltip b/examples/step-52/doc/tooltip
new file mode 100644
index 0000000..7754b44
--- /dev/null
+++ b/examples/step-52/doc/tooltip
@@ -0,0 +1 @@
+Time-dependent diffusion equation. Time stepping methods.
diff --git a/examples/step-52/step-52.cc b/examples/step-52/step-52.cc
new file mode 100644
index 0000000..d913d52
--- /dev/null
+++ b/examples/step-52/step-52.cc
@@ -0,0 +1,764 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2014 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Damien Lebrun-Grandie, Bruno Turcksin, 2014
+ */
+
+// @sect3{Include files}
+
+// The first task as usual is to include the functionality of these well-known
+// deal.II library files and some C++ header files.
+#include <deal.II/base/function.h>
+#include <deal.II/base/quadrature_lib.h>
+
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_out.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/sparse_direct.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+#include <iostream>
+#include <cmath>
+#include <map>
+
+// This is the only include file that is new: It includes all the Runge-Kutta
+// methods.
+#include <deal.II/base/time_stepping.h>
+
+
+// The next step is like in all previous tutorial programs: We put everything
+// into a namespace of its own and then import the deal.II classes and functions
+// into it.
+namespace Step52
+{
+  using namespace dealii;
+
+  // @sect3{The <code>Diffusion</code> class}
+
+  // The next piece is the declaration of the main class. Most of the
+  // functions in this class are not new and have been explained in previous
+  // tutorials. The only interesting functions are
+  // <code>evaluate_diffusion()</code> and
+  // <code>id_minus_tau_J_inverse()</code>. <code>evaluate_diffusion()</code>
+  // evaluates the diffusion equation, $M^{-1}(f(t,y))$, at a given time and a
+  // given $y$. <code>id_minus_tau_J_inverse()</code> evaluates $\left(I-\tau
+  // M^{-1} \frac{\partial f(t,y)}{\partial y}\right)^{-1}$ or equivalently
+  // $\left(M-\tau \frac{\partial f}{\partial y}\right)^{-1} M$ at a given
+  // time, for a given $\tau$ and $y$. This function is needed when an
+  // implicit method is used.
+  class Diffusion
+  {
+  public:
+    Diffusion();
+
+    void run();
+
+  private:
+    void setup_system();
+
+    void assemble_system();
+
+    double get_source (const double time,
+                       const Point<2> &point) const;
+
+    Vector<double> evaluate_diffusion (const double time,
+                                       const Vector<double> &y) const;
+
+    Vector<double> id_minus_tau_J_inverse (const double time,
+                                           const double tau,
+                                           const Vector<double> &y);
+
+    void output_results (const unsigned int time_step,
+                         TimeStepping::runge_kutta_method method) const;
+
+    // The next three functions are the drivers for the explicit methods, the
+    // implicit methods, and the embedded explicit methods respectively. The
+    // driver function for embedded explicit methods returns the number of
+    // steps executed given that it only takes the number of time steps passed
+    // as an argument as a hint, but internally computed the optimal time step
+    // itself.
+    void explicit_method (const TimeStepping::runge_kutta_method method,
+                          const unsigned int                     n_time_steps,
+                          const double                           initial_time,
+                          const double                           final_time);
+
+    void implicit_method (const TimeStepping::runge_kutta_method method,
+                          const unsigned int                     n_time_steps,
+                          const double                           initial_time,
+                          const double                           final_time);
+
+    unsigned int embedded_explicit_method (const TimeStepping::runge_kutta_method method,
+                                           const unsigned int                     n_time_steps,
+                                           const double                     initial_time,
+                                           const double                     final_time);
+
+
+    unsigned int                 fe_degree;
+
+    double                       diffusion_coefficient;
+    double                       absorption_cross_section;
+
+    Triangulation<2>             triangulation;
+
+    FE_Q<2>                      fe;
+
+    DoFHandler<2>                dof_handler;
+
+    ConstraintMatrix             constraint_matrix;
+
+    SparsityPattern              sparsity_pattern;
+
+    SparseMatrix<double>         system_matrix;
+    SparseMatrix<double>         mass_matrix;
+    SparseMatrix<double>         mass_minus_tau_Jacobian;
+
+    SparseDirectUMFPACK          inverse_mass_matrix;
+
+    Vector<double>               solution;
+  };
+
+
+
+  // We choose quadratic finite elements and we initialize the parameters.
+  Diffusion::Diffusion()
+    :
+    fe_degree(2),
+    diffusion_coefficient(1./30.),
+    absorption_cross_section(1.),
+    fe(fe_degree),
+    dof_handler(triangulation)
+  {}
+
+
+
+  // @sect4{<code>Diffusion::setup_system</code>}
+  // Now, we create the constraint matrix and the sparsity pattern. Then, we
+  // initialize the matrices and the solution vector.
+  void Diffusion::setup_system ()
+  {
+    dof_handler.distribute_dofs(fe);
+
+    VectorTools::interpolate_boundary_values(dof_handler,1,ZeroFunction<2>(),constraint_matrix);
+    constraint_matrix.close();
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern(dof_handler,dsp,constraint_matrix);
+    sparsity_pattern.copy_from(dsp);
+
+    system_matrix.reinit(sparsity_pattern);
+    mass_matrix.reinit(sparsity_pattern);
+    mass_minus_tau_Jacobian.reinit(sparsity_pattern);
+    solution.reinit(dof_handler.n_dofs());
+  }
+
+
+
+  // @sect4{<code>Diffusion::assemble_system</code>}
+  // In this function, we compute $-\int D \nabla b_i \cdot \nabla b_j
+  // d\boldsymbol{r} - \int \Sigma_a b_i b_j d\boldsymbol{r}$ and the mass
+  // matrix $\int b_i b_j d\boldsymbol{r}$. The mass matrix is then
+  // inverted using a direct solver; the <code>inverse_mass_matrix</code>
+  // variable will then store the inverse of the mass matrix so that
+  // $M^{-1}$ can be applied to a vector using the <code>vmult()</code>
+  // function of that object. (Internally, UMFPACK does not really store
+  // the inverse of the matrix, but its LU factors; applying the inverse
+  // matrix is then equivalent to doing one forward and one backward solves
+  // with these two factors, which has the same complexity as applying an
+  // explicit inverse of the matrix).
+  void Diffusion::assemble_system ()
+  {
+    system_matrix = 0.;
+    mass_matrix = 0.;
+
+    const QGauss<2> quadrature_formula(fe_degree+1);
+
+    FEValues<2> fe_values(fe, quadrature_formula,
+                          update_values | update_gradients | update_JxW_values);
+
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double> cell_matrix (dofs_per_cell, dofs_per_cell);
+    FullMatrix<double> cell_mass_matrix (dofs_per_cell, dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    DoFHandler<2>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        cell_matrix = 0.;
+        cell_mass_matrix = 0.;
+
+        fe_values.reinit (cell);
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+                cell_matrix(i,j) += ((-diffusion_coefficient *
+                                      fe_values.shape_grad(i,q_point) *
+                                      fe_values.shape_grad(j,q_point)
+                                      - absorption_cross_section *
+                                      fe_values.shape_value(i,q_point) *
+                                      fe_values.shape_value(j,q_point)) *
+                                     fe_values.JxW(q_point));
+                cell_mass_matrix(i,j) += fe_values.shape_value(i,q_point) *
+                                         fe_values.shape_value(j,q_point) *
+                                         fe_values.JxW(q_point);
+              }
+
+        cell->get_dof_indices(local_dof_indices);
+
+        constraint_matrix.distribute_local_to_global(cell_matrix,local_dof_indices,system_matrix);
+        constraint_matrix.distribute_local_to_global(cell_mass_matrix,local_dof_indices,mass_matrix);
+      }
+
+    inverse_mass_matrix.initialize(mass_matrix);
+  }
+
+
+
+  // @sect4{<code>Diffusion::get_source</code>}
+  //
+  // In this function, the source term of the equation for a given time and a
+  // given point is computed.
+  double Diffusion::get_source (const double time,
+                                const Point<2> &point) const
+  {
+    const double intensity = 10.;
+    const double frequency = numbers::PI/10.;
+    const double b = 5.;
+    const double x = point(0);
+
+    return intensity* (frequency*std::cos(frequency*time)*(b*x-x*x)
+                       +
+                       std::sin(frequency*time) * (absorption_cross_section*(b*x-x*x)
+                                                   +
+                                                   2.*diffusion_coefficient));
+  }
+
+
+
+  // @sect4{<code>Diffusion:evaluate_diffusion</code>}
+  //
+  // Next, we evaluate the weak form of the diffusion equation at a given time
+  // $t$ and for a given vector $y$. In other words, as outlined in the
+  // introduction, we evaluate $M^{-1}(-{\cal D}y - {\cal A}y + {\cal
+  // S})$. For this, we have to apply the matrix $-{\cal D} - {\cal A}$
+  // (previously computed and stored in the variable
+  // <code>system_matrix</code>) to $y$ and then add the source term which we
+  // integrate as we usually do. (Integrating up the solution could be done
+  // using VectorTools::create_right_hand_side() if you wanted to save a few
+  // lines of code, or wanted to take advantage of doing the integration in
+  // parallel.) The result is then multiplied by $M^{-1}$.
+  Vector<double> Diffusion::evaluate_diffusion (const double time,
+                                                const Vector<double> &y) const
+  {
+    Vector<double> tmp(dof_handler.n_dofs());
+    tmp = 0.;
+    system_matrix.vmult(tmp,y);
+
+    const QGauss<2> quadrature_formula(fe_degree+1);
+
+    FEValues<2> fe_values(fe,
+                          quadrature_formula,
+                          update_values | update_quadrature_points | update_JxW_values);
+
+
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int n_q_points    = quadrature_formula.size();
+
+    Vector<double>  cell_source(dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    DoFHandler<2>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        cell_source = 0.;
+
+        fe_values.reinit (cell);
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          {
+            const double source = get_source(time,
+                                             fe_values.quadrature_point(q_point));
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              cell_source(i) += source *
+                                fe_values.shape_value(i,q_point) *
+                                fe_values.JxW(q_point);
+          }
+
+        cell->get_dof_indices(local_dof_indices);
+
+        constraint_matrix.distribute_local_to_global(cell_source,
+                                                     local_dof_indices,
+                                                     tmp);
+      }
+
+    Vector<double> value(dof_handler.n_dofs());
+    inverse_mass_matrix.vmult(value,tmp);
+
+    return value;
+  }
+
+
+  // @sect4{<code>Diffusion::id_minus_tau_J_inverse</code>}
+  //
+  // We compute $\left(M-\tau \frac{\partial f}{\partial y}\right)^{-1} M$. This
+  // is done in several steps:
+  //   - compute $M-\tau \frac{\partial f}{\partial y}$
+  //   - invert the matrix to get $\left(M-\tau \frac{\partial f}{\partial y}\right)^{-1}$
+  //   - compute $tmp=My$
+  //   - compute $z=\left(M-\tau \frac{\partial f}{\partial y}\right)^{-1} tmp = \left(M-\tau \frac{\partial f}{\partial y}\right)^{-1} My$
+  //   - return z.
+  Vector<double> Diffusion::id_minus_tau_J_inverse (const double time,
+                                                    const double tau,
+                                                    const Vector<double> &y)
+  {
+    (void) time;
+    SparseDirectUMFPACK inverse_mass_minus_tau_Jacobian;
+
+    mass_minus_tau_Jacobian.copy_from(mass_matrix);
+    mass_minus_tau_Jacobian.add(-tau, system_matrix);
+
+    inverse_mass_minus_tau_Jacobian.initialize(mass_minus_tau_Jacobian);
+
+    Vector<double> tmp(dof_handler.n_dofs());
+    mass_matrix.vmult(tmp, y);
+
+    Vector<double> result(y);
+    inverse_mass_minus_tau_Jacobian.vmult(result,tmp);
+
+    return result;
+  }
+
+
+
+  // @sect4{<code>Diffusion::output_results</code>}
+  //
+  // The following function then outputs the solution in vtu files indexed by
+  // the number of the time step and the name of the time stepping method. Of
+  // course, the (exact) result should really be the same for all time
+  // stepping method, but the output here at least allows us to compare them.
+  void Diffusion::output_results (const unsigned int time_step,
+                                  TimeStepping::runge_kutta_method method) const
+  {
+    std::string method_name;
+
+    switch (method)
+      {
+      case TimeStepping::FORWARD_EULER:
+      {
+        method_name = "forward_euler";
+        break;
+      }
+      case TimeStepping::RK_THIRD_ORDER:
+      {
+        method_name = "rk3";
+        break;
+      }
+      case TimeStepping::RK_CLASSIC_FOURTH_ORDER:
+      {
+        method_name = "rk4";
+        break;
+      }
+      case TimeStepping::BACKWARD_EULER:
+      {
+        method_name = "backward_euler";
+        break;
+      }
+      case TimeStepping::IMPLICIT_MIDPOINT:
+      {
+        method_name = "implicit_midpoint";
+        break;
+      }
+      case TimeStepping::SDIRK_TWO_STAGES:
+      {
+        method_name = "sdirk";
+        break;
+      }
+      case TimeStepping::HEUN_EULER:
+      {
+        method_name = "heun_euler";
+        break;
+      }
+      case TimeStepping::BOGACKI_SHAMPINE:
+      {
+        method_name = "bocacki_shampine";
+        break;
+      }
+      case TimeStepping::DOPRI:
+      {
+        method_name = "dopri";
+        break;
+      }
+      case TimeStepping::FEHLBERG:
+      {
+        method_name = "fehlberg";
+        break;
+      }
+      case TimeStepping::CASH_KARP:
+      {
+        method_name = "cash_karp";
+        break;
+      }
+      default:
+      {
+        break;
+      }
+      }
+
+    DataOut<2> data_out;
+
+    data_out.attach_dof_handler(dof_handler);
+    data_out.add_data_vector(solution, "solution");
+
+    data_out.build_patches();
+
+    const std::string filename = "solution-" + method_name + "-" +
+                                 Utilities::int_to_string (time_step, 3) +
+                                 ".vtu";
+    std::ofstream output(filename.c_str());
+    data_out.write_vtu(output);
+  }
+
+
+  // @sect4{<code>Diffusion::explicit_method</code>}
+  //
+  // This function is the driver for all the explicit methods. It calls
+  // <code>evolve_one_time_step</code> which performs one time step.  For
+  // explicit methods, <code>evolve_one_time_step</code> needs to evaluate
+  // $M^{-1}(f(t,y))$, i.e, it needs <code>evaluate_diffusion</code>. Because
+  // <code>evaluate_diffusion</code> is a member function, it needs to be bound
+  // to <code>this</code>. Finally, the solution is output every 10 time steps.
+  void Diffusion::explicit_method (const TimeStepping::runge_kutta_method method,
+                                   const unsigned int                     n_time_steps,
+                                   const double                           initial_time,
+                                   const double                           final_time)
+  {
+    const double time_step = (final_time-initial_time)/static_cast<double> (n_time_steps);
+    double time = initial_time;
+    solution = 0.;
+
+    TimeStepping::ExplicitRungeKutta<Vector<double> > explicit_runge_kutta(method);
+    output_results(0,method);
+    for (unsigned int i=0; i<n_time_steps; ++i)
+      {
+        time = explicit_runge_kutta.evolve_one_time_step(
+                 std_cxx11::bind(&Diffusion::evaluate_diffusion,
+                                 this,
+                                 std_cxx11::_1,
+                                 std_cxx11::_2),
+                 time,time_step,solution);
+
+        if ((i+1)%10==0)
+          output_results(i+1,method);
+      }
+  }
+
+
+
+  // @sect4{<code>Diffusion::implicit_method</code>}
+  // This function is equivalent to <code>explicit_method</code> but for implicit
+  // methods. When using implicit methods, we need to evaluate $M^{-1}(f(t,y))$
+  // and $\left(I-\tau M^{-1} \frac{\partial f(t,y)}{\partial y}\right)^{-1}$
+  // for which we use the two member functions previously introduced.
+  void Diffusion::implicit_method (const TimeStepping::runge_kutta_method method,
+                                   const unsigned int                     n_time_steps,
+                                   const double                           initial_time,
+                                   const double                           final_time)
+  {
+    const double time_step = (final_time-initial_time)/static_cast<double> (n_time_steps);
+    double time = initial_time;
+    solution = 0.;
+
+    TimeStepping::ImplicitRungeKutta<Vector<double> > implicit_runge_kutta(method);
+    output_results(0,method);
+    for (unsigned int i=0; i<n_time_steps; ++i)
+      {
+        time = implicit_runge_kutta.evolve_one_time_step(
+                 std_cxx11::bind(&Diffusion::evaluate_diffusion,
+                                 this,
+                                 std_cxx11::_1,
+                                 std_cxx11::_2),
+                 std_cxx11::bind(&Diffusion::id_minus_tau_J_inverse,
+                                 this,
+                                 std_cxx11::_1,
+                                 std_cxx11::_2,
+                                 std_cxx11::_3),
+                 time,time_step,solution);
+
+        if ((i+1)%10==0)
+          output_results(i+1,method);
+      }
+  }
+
+
+
+  // @sect4{<code>Diffusion::embedded_explicit_method</code>}
+  // This function is the driver for the embedded explicit methods. It requires
+  // more parameters:
+  //   - coarsen_param: factor multiplying the current time step when the error
+  //   is below the threshold.
+  //   - refine_param: factor multiplying the current time step when the error
+  //   is above the threshold.
+  //   - min_delta: smallest time step acceptable.
+  //   - max_delta: largest time step acceptable.
+  //   - refine_tol: threshold above which the time step is refined.
+  //   - coarsen_tol: threshold below which the time step is coarsen.
+  // Embedded methods use a guessed time step. If the error using this time step
+  // is too large, the time step will be reduced. If the error is below the
+  // threshold, a larger time step will be tried for the next time step.
+  // <code>delta_t_guess</code> is the guessed time step produced by the embedded method.
+  unsigned int Diffusion::embedded_explicit_method(const TimeStepping::runge_kutta_method method,
+                                                   const unsigned int                     n_time_steps,
+                                                   const double                           initial_time,
+                                                   const double                           final_time)
+  {
+    double time_step = (final_time-initial_time)/static_cast<double> (n_time_steps);
+    double time = initial_time;
+    const double coarsen_param = 1.2;
+    const double refine_param = 0.8;
+    const double min_delta = 1e-8;
+    const double max_delta = 10*time_step;
+    const double refine_tol = 1e-1;
+    const double coarsen_tol = 1e-5;
+    solution = 0.;
+
+    TimeStepping::EmbeddedExplicitRungeKutta<Vector<double> >
+    embedded_explicit_runge_kutta(method,
+                                  coarsen_param,
+                                  refine_param,
+                                  min_delta,
+                                  max_delta,
+                                  refine_tol,
+                                  coarsen_tol);
+    output_results (0, method);
+
+    // Now for the time loop. The last time step is chosen such that the final
+    // time is exactly reached.
+    unsigned int n_steps=0;
+    while (time<final_time)
+      {
+        if (time+time_step>final_time)
+          time_step = final_time-time;
+
+        time = embedded_explicit_runge_kutta.evolve_one_time_step(
+                 std_cxx11::bind(&Diffusion::evaluate_diffusion,this,std_cxx11::_1,std_cxx11::_2),
+                 time,time_step,solution);
+
+        if ((n_steps+1)%10==0)
+          output_results(n_steps+1,method);
+
+        time_step = embedded_explicit_runge_kutta.get_status().delta_t_guess;
+        ++n_steps;
+      }
+
+    return n_steps;
+  }
+
+
+
+  // @sect4{<code>Diffusion::run</code>}
+  //
+  // The following is the main function of the program. At the top, we create
+  // the grid (a [0,5]x[0,5] square) and refine it four times to get a mesh
+  // that has 16 by 16 cells, for a total of 256.  We then set the boundary
+  // indicator to 1 for those parts of the boundary where $x=0$ and $x=5$.
+  void Diffusion::run ()
+  {
+    GridGenerator::hyper_cube(triangulation, 0., 5.);
+    triangulation.refine_global(4);
+
+    Triangulation<2>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<2>::faces_per_cell; ++f)
+        if (cell->face(f)->at_boundary())
+          {
+            if ((cell->face(f)->center()[0]==0.) || (cell->face(f)->center()[0]==5.))
+              cell->face(f)->set_boundary_id(1);
+            else
+              cell->face(f)->set_boundary_id(0);
+          }
+
+    // Next, we set up the linear systems and fill them with content so that
+    // they can be used throughout the time stepping process:
+    setup_system();
+
+    assemble_system();
+
+    // Finally, we solve the diffusion problem using several of the
+    // Runge-Kutta methods implemented in namespace TimeStepping, each time
+    // outputting the error at the end time. (As explained in the
+    // introduction, since the exact solution is zero at the final time, the
+    // error equals the numerical solution and can be computed by just taking
+    // the $l_2$ norm of the solution vector.)
+    unsigned int       n_steps      = 0;
+    const unsigned int n_time_steps = 200;
+    const double       initial_time = 0.;
+    const double       final_time   = 10.;
+
+    std::cout << "Explicit methods:" << std::endl;
+    explicit_method (TimeStepping::FORWARD_EULER,
+                     n_time_steps,
+                     initial_time,
+                     final_time);
+    std::cout << "Forward Euler:            error=" << solution.l2_norm() << std::endl;
+
+    explicit_method (TimeStepping::RK_THIRD_ORDER,
+                     n_time_steps,
+                     initial_time,
+                     final_time);
+    std::cout << "Third order Runge-Kutta:  error=" << solution.l2_norm() << std::endl;
+
+    explicit_method (TimeStepping::RK_CLASSIC_FOURTH_ORDER,
+                     n_time_steps,
+                     initial_time,
+                     final_time);
+    std::cout << "Fourth order Runge-Kutta: error=" << solution.l2_norm() << std::endl;
+    std::cout << std::endl;
+
+
+    std::cout << "Implicit methods:" << std::endl;
+    implicit_method (TimeStepping::BACKWARD_EULER,
+                     n_time_steps,
+                     initial_time,
+                     final_time);
+    std::cout << "Backward Euler:           error=" << solution.l2_norm() << std::endl;
+
+    implicit_method (TimeStepping::IMPLICIT_MIDPOINT,
+                     n_time_steps,
+                     initial_time,
+                     final_time);
+    std::cout << "Implicit Midpoint:        error=" << solution.l2_norm() << std::endl;
+
+    implicit_method (TimeStepping::CRANK_NICOLSON,
+                     n_time_steps,
+                     initial_time,
+                     final_time);
+    std::cout << "Crank-Nicolson:           error=" << solution.l2_norm() << std::endl;
+
+    implicit_method (TimeStepping::SDIRK_TWO_STAGES,
+                     n_time_steps,
+                     initial_time,
+                     final_time);
+    std::cout << "SDIRK:                    error=" << solution.l2_norm() << std::endl;
+    std::cout << std::endl;
+
+
+    std::cout << "Embedded explicit methods:" << std::endl;
+    n_steps = embedded_explicit_method (TimeStepping::HEUN_EULER,
+                                        n_time_steps,
+                                        initial_time,
+                                        final_time);
+    std::cout << "Heun-Euler:               error=" << solution.l2_norm() << std::endl;
+    std::cout << "                steps performed=" << n_steps << std::endl;
+
+    n_steps = embedded_explicit_method (TimeStepping::BOGACKI_SHAMPINE,
+                                        n_time_steps,
+                                        initial_time,
+                                        final_time);
+    std::cout << "Bogacki-Shampine:         error=" << solution.l2_norm() << std::endl;
+    std::cout << "                steps performed=" << n_steps << std::endl;
+
+    n_steps = embedded_explicit_method (TimeStepping::DOPRI,
+                                        n_time_steps,
+                                        initial_time,
+                                        final_time);
+    std::cout << "Dopri:                    error=" << solution.l2_norm() << std::endl;
+    std::cout << "                steps performed=" << n_steps << std::endl;
+
+    n_steps = embedded_explicit_method (TimeStepping::FEHLBERG,
+                                        n_time_steps,
+                                        initial_time,
+                                        final_time);
+    std::cout << "Fehlberg:                 error=" << solution.l2_norm() << std::endl;
+    std::cout << "                steps performed=" << n_steps << std::endl;
+
+    n_steps = embedded_explicit_method (TimeStepping::CASH_KARP,
+                                        n_time_steps,
+                                        initial_time,
+                                        final_time);
+    std::cout << "Cash-Karp:                error=" << solution.l2_norm() << std::endl;
+    std::cout << "                steps performed=" << n_steps << std::endl;
+  }
+}
+
+
+
+// @sect3{The <code>main()</code> function}
+//
+// The following <code>main</code> function is similar to previous examples
+// and need not be commented on.
+int main ()
+{
+  try
+    {
+      Step52::Diffusion diffusion;
+      diffusion.run();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    };
+
+  return 0;
+}
diff --git a/examples/step-53/CMakeLists.txt b/examples/step-53/CMakeLists.txt
new file mode 100644
index 0000000..799e962
--- /dev/null
+++ b/examples/step-53/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-1 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-53")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#   FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#   FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#   SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_ZLIB)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+  DEAL_II_WITH_ZLIB = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-53/doc/builds-on b/examples/step-53/doc/builds-on
new file mode 100644
index 0000000..2302bdb
--- /dev/null
+++ b/examples/step-53/doc/builds-on
@@ -0,0 +1 @@
+step-49
diff --git a/examples/step-53/doc/intro.dox b/examples/step-53/doc/intro.dox
new file mode 100644
index 0000000..93519dc
--- /dev/null
+++ b/examples/step-53/doc/intro.dox
@@ -0,0 +1,371 @@
+<br>
+
+<i>This program was contributed by Wolfgang Bangerth and Luca Heltai, using
+data provided by D. Sarah Stamps.</i>
+
+ at note This program elaborates on concepts of geometry and the classes that
+implement it. These classes are grouped into the documentation module on @ref
+manifold "Manifold description for triangulations". See there for additional
+information.
+
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+Partial differential equations for realistic problems are often posed on
+domains with complicated geometries. To provide just a few examples, consider
+these cases:
+- Among the two arguably most important industrial applications for the finite
+  element method, aerodynamics and more generally fluid dynamics is
+  one. Computer simulations today are used in the design of every airplane,
+  car, train and ship. The domain in which the partial differential equation
+  is posed is, in these cases, the air surrounding the plane with its wings,
+  flaps and engines; the air surrounding the car with its wheel, wheel wells,
+  mirrors and, in the case of race cars, all sorts of aerodynamic equipment;
+  the air surrounding the train with its wheels and gaps between cars. In the
+  case of ships, the domain is the water surrounding the ship with its rudders
+  and propellers.
+- The other of the two big applications of the finite element method is
+  structural engineering in which the domains are bridges, airplane nacelles
+  and wings, and other solid bodies of often complicated shapes.
+- Finite element modeling is also often used to describe the generation and
+  propagation of earthquake waves. In these cases, one needs to accurately
+  represent the geometry of faults in the Earth crust. Since faults intersect,
+  dip at angles, and are often not completely straight, domains are frequently
+  very complex.
+One could cite many more examples of complicated geometries in which one wants
+to pose and solve a partial differential equation. What this shows is that the
+"real" world is much more complicated than what we have shown in almost all of
+the tutorial programs preceding this one. 
+
+This program is therefore devoted to showing how one deals with complex
+geometries using a concrete application. In particular, what it shows is how
+we make a mesh fit the domain we want to solve on. On the other hand, what the
+program does not show is how to create a coarse for a domain. The process to
+arrive at a coarse mesh is called "mesh generation" and there are a number of
+high-quality programs that do this much better than we could ever
+implement. However, deal.II does have the ability to read in meshes in many
+formats generated by mesh generators and then make them fit a given shape,
+either by deforming a mesh or refining it a number of times until it fits. The
+deal.II Frequently Asked Questions page referenced from http://www.dealii.org/
+provides resources to mesh generators.
+
+
+<h2>Where geometry and meshes intersect</h2>
+
+Let us assume that you have a complex domain and that you already have a
+coarse mesh that somehow represents the general features of the domain. Then
+there are two situations in which it is necessary to describe to a deal.II
+program the details of your geometry:
+
+- Mesh refinement: Whenever a cell is refined, it is necessary to introduce
+  new vertices in the Triangulation. In the simplest case, one assumes that
+  the objects that make up the Triangulation are straight line segments, a
+  bi-linear surface or a tri-linear volume. The next vertex is then simply put
+  into the middle of the old ones. However, for curved boundaries or if we
+  want to solve a PDE on a curved, lower-dimensional manifold embedded in a
+  higher-dimensional space, this is insufficient since it will not respect the
+  actual geometry. We will therefore have to tell Triangulation where to put
+  new points.
+
+- Integration: When using higher order finite element methods, it is often
+  necessary to compute integrals using curved approximations of the boundary,
+  i.e., describe each edge or face of cells as curves, instead of straight
+  line segments or bilinear patches). The same is, of course, true when
+  integrating boundary terms (e.g., inhomogenous Neumann boundary
+  conditions). For the purpose of integration, the various Mapping classes
+  then provide the transformation from the reference cell to the actual cell.
+
+In both cases, we need a way to provide information about the geometry of the
+domain at the level of an individual cell, its faces and edges. This is where
+the Manifold class comes into play. Manifold is an abstract base class that
+only defines an interface by which the Triangulation and Mapping classes can
+query geometric information about the domain. Conceptually, Manifold sees the
+world in a way not dissimilar to how the mathematical subdiscipline geometry
+sees it: a domain is essentially just a collection of points that is somehow
+equipped with the notion of a distance between points so that we can obtain a
+point "in the middle" of some other points.
+
+deal.II provides a number of classes that implement the interface provided by
+Manifold for a variety of common geometries. On the other hand, in this
+program we will consider only a very common and much simpler case, namely the
+situation where (a part of) the domain we want to solve on can be described by
+transforming a much simpler domain (we will call this the "reference domain").
+In the language of mathematics, this means
+that the (part of the) domain is a <a
+href="http://en.wikipedia.org/wiki/Chart_%28topology%29">chart</a>. Charts are
+described by a smooth function that maps from the simpler domain to the chart
+(the "push-forward" function) and its inverse (the "pull-back" function). If
+the domain as a whole is not a chart (e.g., the surface of a sphere), then it
+can often be described as a collection of charts (e.g., the northern
+hemisphere and the southern hemisphere are each charts) and the domain can then
+be describe by an <a
+href="http://en.wikipedia.org/wiki/Atlas_%28topology%29">atlas</a>. 
+
+If a domain can be decomposed into an atlas, all we need to do is provide the
+pull-back and push-forward functions for each of the charts. In deal.II, this
+means providing a class derived from ChartManifold, and this is precisely what
+we will do in this program.
+
+
+<h2>The example case</h2>
+
+To illustrate how one describes geometries using charts in deal.II, we will
+consider a case that originates in an application of the <a
+href="http://aspect.dealii.org">ASPECT mantle convection code</a>, using a
+data set provided by D. Sarah Stamps. In the concrete application, we were
+interested in describing flow in the Earth mantle under the <a
+href="http://en.wikipedia.org/wiki/East_African_rift">East African Rift</a>, a
+zone where two continental plates drift apart. Not to beat around the bush,
+the geometry we want to describe looks like this:
+
+<img src="http://www.dealii.org/images/steps/developer/step-53.topo.png" alt="">
+
+In particular, though you cannot see this here, the top surface is not
+just colored by the elevation but is, in fact, deformed to follow the 
+correct topography.
+While the actual application is not relevant here, the geometry is. The domain
+we are interested in is a part of the Earth that ranges from the surface to a
+depth of 500km, from 26 to 35 degrees East of the Greenwich meridian, and from
+5 degrees North of the equator to 10 degrees South.
+
+This description of the geometry suggests to start with a box 
+$\hat U=[26,35]\times[-10,5]\times[-500000,0]$ (measured in degrees,
+degrees, and meters) and to provide a map $\varphi$ so
+that $\varphi^{-1}(\hat U)=\Omega$ where $\Omega$ is the domain we
+seek. $(\Omega,\varphi)$ is then a chart, $\varphi$ the pull-back operator, and
+$\varphi^{-1}$ the push-forward operator. If we need a point $q$ that is the
+"average" of other points $q_i\in\Omega$, the ChartManifold class then first
+applies the pull-back to obtain $\hat q_i=\varphi(q_i)$, averages these to a
+point $\hat p$ and then computes $p=\varphi^{-1}(\hat p)$.
+
+Our goal here is therefore to implement a class that describes $\varphi$ and
+$\varphi^{-1}$. If Earth was a sphere, then this would not be difficult: if we
+denote by $(\hat \phi,\hat \theta,\hat d)$ the points of $\hat U$ (i.e.,
+longitude counted eastward, latitude counted northward, and elevation relative
+to zero depth), then
+ at f[
+  \mathbf x = \varphi^{-1}(\hat \phi,\hat \theta,\hat d)
+  = (R+\hat d) (\cos\hat \phi\cos\hat \theta, \sin\hat \phi\cos\hat \theta, \sin\hat \theta)^T
+ at f]
+provides coordinates in a Cartesian coordinate system, where $R$ is the radius
+of the sphere. However, the Earth is not a sphere:
+
+<ol>
+<li> It is flattened at the poles and larger at the equator: the semi-major axis
+  is approximately 22km longer than the semi-minor axis. We will account for
+  this using the <a href="http://en.wikipedia.org/wiki/WGS84">WGS 84</a>
+  reference standard for the Earth shape. The formula used in WGS 84 to obtain
+  a position in Cartesian coordinates from longitude, latitude, and elevation
+  is 
+ at f[
+  \mathbf x = \varphi_\text{WGS84}^{-1}(\phi,\theta,d)
+  = \left(
+    \begin{array}{c}
+     (\bar R(\theta)+d) \cos\phi\cos\theta, \\
+     (\bar R(\theta)+d) \sin\phi\cos\theta, \\
+     ((1-e^2)\bar R(\theta)+d) \sin\theta
+    \end{array}
+    \right),
+ at f]
+  where $\bar R(\theta)=\frac{R}{\sqrt{1-(e \sin\theta)^2}}$, and radius and
+  ellipticity are given by $R=6378137\text{m}, e=0.081819190842622$. In this formula, 
+  we assume that the arguments to sines and cosines are evaluated in degree, not
+  radians (though we will have to change this assumption in the code).
+
+<li> It has topography in the form of mountains and valleys. We will account for
+  this using real topography data (see below for a description of where
+  this data comes from). Using this data set, we can look up elevations on a
+  latitude-longitude mesh laid over the surface of the Earth. Starting with
+  the box $\hat U=[26,35]\times[-10,5]\times[-500000,0]$, we will therefore 
+  first stretch it in vertical direction before handing it off to the WGS 84
+  function: if $h(\hat\phi,\hat\theta)$ is the height at longitude $\hat\phi$
+  and latitude $\hat\theta$, then we define
+ at f[
+  (\phi,\theta,d) =
+  \varphi_\text{topo}^{-1}(\hat\phi,\hat\theta,\hat d)
+  = \left(
+      \hat\phi,
+      \hat\theta,
+      \hat d + \frac{\hat d+500000}{500000}h(\hat\phi,\hat\theta)
+    \right).
+ at f]
+  Using this function, the top surface of the box $\hat U$ is displaced to the
+  correct topography, the bottom surface remains where it was, and points in
+  between are linearly interpolated.
+</ol>
+
+Using these two functions, we can then define the entire push-forward function
+$\varphi^{-1}: \hat U \rightarrow \Omega$ as
+ at f[
+  \mathbf x 
+  = 
+  \varphi^{-1}(\hat\phi,\hat\theta,\hat d)
+  =
+  \varphi_\text{WGS84}^{-1}(\varphi_\text{topo}^{-1}(\hat\phi,\hat\theta,\hat d)).
+ at f]
+In addition, we will have to define the inverse of this function, the
+pull-back operation, which we can write as
+ at f[
+  (\hat\phi,\hat\theta,\hat d)
+  = 
+  \varphi(\mathbf x)
+  =
+  \varphi_\text{topo}(\varphi_\text{WGS84}(\mathbf x)).
+ at f]
+We can obtain one of the components of this function by inverting the formula above:
+ at f[
+  (\hat\phi,\hat\theta,\hat d) =
+  \varphi_\text{topo}(\phi,\theta,d)
+  = \left(
+      \phi,
+      \theta,
+      500000\frac{d-h(\phi,\theta)}{500000+h(\phi,\theta)}
+    \right).
+ at f]
+Computing $\varphi_\text{WGS84}(\mathbf x)$ is also possible though a lot more
+awkward. We won't show the formula here but instead only provide the implementation
+in the program.
+
+
+<h2>Implementation</h2>
+
+There are a number of issues we need to address in the program. At the largest scale, 
+we need to write a class that implements the interface of ChartManifold. This involves
+a function <code>push_forward()</code> that takes a point
+in the reference domain $\hat U$ and transform it into real space using the function
+$\varphi^{-1}$ outlined above, and its inverse function <code>pull_back()</code>
+implementing $\varphi$. We will do so in the <code>AfricaGeometry</code> class below
+that looks, in essence, like this:
+ at code
+  class AfricaGeometry : public ChartManifold<3,3>
+  {
+  public:
+    virtual
+    Point<3>
+    pull_back(const Point<3> &space_point) const;
+
+    virtual
+    Point<3>
+    push_forward(const Point<3> &chart_point) const;
+
+  private:
+    ... some member variables and other member functions...;
+  };
+ at endcode
+
+The transformations above have two parts: the WGS 84 transformations and the topography
+transformation. Consequently, the <code>AfricaGeometry</code> class will have 
+additional (non-virtual) member functions 
+<code>AfricaGeometry::push_forward_wgs84()</code> and
+<code>AfricaGeometry::push_forward_topo()</code> that implement these two pieces, and
+corresponding pull back functions.
+
+The WGS 84 transformation functions are not particularly interesting (even though the
+formulas they implement are impressive). The more interesting part is the topography
+transformation. Recall that for this, we needed to evaluate the elevation function
+$h(\hat\phi,\hat\theta)$. There is of course no formula for this: Earth is what it is,
+the best one can do is look up the altitude from some table. This is, in fact what we
+will do.
+
+The data we use was originally created by the  <a
+href="http://en.wikipedia.org/wiki/Shuttle_Radar_Topography_Mission">Shuttle
+Radar Topography Mission</a>, was downloaded from the US Geologic Survey
+(USGS) and processed by D. Sarah Stamps who also wrote the initial version of
+the WGS 84 transformation functions. The topography data so processed is
+stored in a file <code>topography.txt.gz</code> that, when unpacked
+looks like this:
+ at code
+6.983333 25.000000 700
+6.983333 25.016667 692
+6.983333 25.033333 701
+6.983333 25.050000 695
+6.983333 25.066667 710
+6.983333 25.083333 702
+...
+-11.983333 35.950000 707
+-11.983333 35.966667 687
+-11.983333 35.983333 659
+ at endcode
+The data is formatted as <code>latitude longitude elevation</code> where the first two
+columns are provided in degrees North of the equator and degrees East of the Greenwich
+meridian. The final column is given in meters above the WGS 84 zero elevation. 
+
+In the transformation functions, we need to evaluate $h(\hat\phi,\hat\theta)$ for a given
+longitude $\hat\phi$ and latitude $\hat\theta$. In general, this data point will not be
+available and we will have to interpolate between adjacent data points. Writing such an
+interpolation routine is not particularly difficult, but it is a bit tedious and error
+prone. Fortunately, we can somehow shoehorn this data set into an existing class:
+Functions::InterpolatedUniformGridData . Unfortunately, the class does not fit the bill
+quite exactly and so we need to work around it a bit. The problem comes from the way
+we initialize this class: in its simplest form, it takes a stream of values that it 
+assumes form an equispaced mesh in the $x-y$ plane (or, here, the $\phi-\theta$ plane).
+Which is what they do here, sort of: they are ordered latitude first, longitude second;
+and more awkwardly, the first column starts at the largest values and counts down,
+rather than the usual other way around. 
+
+Now, while tutorial programs are meant to illustrate how to code with deal.II, they do
+not necessarily have to satisfy the same quality standards as one would have to do
+with production codes. In a production code, we would write a function that reads the
+data and (i) automatically determines the extents of the first and second column,
+(ii) automatically determines the number of data points in each direction, (iii) does
+the interpolation regardless of the order in which data is arranged, if necessary
+by switching the order between reading and presenting it to the
+Functions::InterpolatedUniformGridData class.
+
+On the other hand, tutorial programs are best if they are short and demonstrate key
+points rather than dwell on unimportant aspects and, thereby, obscure what we really
+want to show. Consequently, we will allow ourselves a bit of leeway:
+- since this program is intended solely for a particular geometry around the area 
+  of the East-African rift and since this is precisely the area described by the data 
+  file, we will hardcode in the program that there are 
+  $1139\times 660$ pieces of data;
+- we will hardcode the boundaries of the data 
+  $[-11.98333^\circ,6.983333^\circ]\times[25^\circ,35.98333^\circ]$;
+- we will lie to the Functions::InterpolatedUniformGridData class: the class will
+  only see the data in the last column of this data file, and we will pretend that
+  the data is arranged in a way that there are 1139 data points in the first
+  coordinate direction that are arranged in <i>ascending</i> order but in an
+  interval $[-6.983333^\circ,11.98333^\circ]$ (not the negated bounds). Then,
+  when we need to look something up for a latitude $\hat\theta$, we can ask the
+  interpolating table class for a value at $-\hat\theta$. With this little
+  trick, we can avoid having to switch around the order of data as read from
+  file.
+  
+All of this then calls for a class that essentially looks like this:
+ at code
+  class AfricaTopography
+  {
+  public:
+    AfricaTopography ()
+      :
+      topography_data (...initialize somehow...)
+    {}
+
+    double value (const double lon, const double lat) const
+    {
+      return topography_data.value (Point<2>(-lat * 180/numbers::PI,
+                                             lon * 180/numbers::PI));
+    }
+
+  private:
+    const Functions::InterpolatedUniformGridData<2> topography_data;
+  };
+ at endcode
+
+Note how the <code>value()</code> function negates the latitude. It also switches
+from the format $\phi,\theta$ that we use everywhere else to the latitude-longitude
+format used in the table. Finally, it takes its arguments in radians as that is what
+we do everywhere else in the program, but then converts them to the degree-based
+system used for table lookup. As you will see in the implementation below, the function
+has a few more (static) member functions that we will call in the initialization
+of the <code>topography_data</code> member variable: the class type of this variable
+has a constructor that allows us to set everything right at construction time,
+rather than having to fill data later on, but this constructor takes a number of
+objects that can't be constructed in-place (at least not in C++98). Consequently,
+the construction of each of the objects we want to pass in the initialization happens
+in a number of static member functions.
+
+Having discussed the general outline of how we want to implement things, let us go
+to the program and show how it is done in practice.
+
diff --git a/examples/step-53/doc/kind b/examples/step-53/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-53/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-53/doc/results.dox b/examples/step-53/doc/results.dox
new file mode 100644
index 0000000..beaaf74
--- /dev/null
+++ b/examples/step-53/doc/results.dox
@@ -0,0 +1,282 @@
+<h1>Results</h1>
+
+Running the program produces a mesh file <code>mesh.vtu</code> that we can
+visualize with any of the usual visualization programs that can read the VTU
+file format. If one just looks at the mesh itself, it is actually very difficult
+to see anything that doesn't just look like a perfectly round piece of a
+sphere (though if one modified the program so that it does produce a sphere and
+looked at them at the same time, the difference between the overall sphere and
+WGS 84 shape is quite apparent). Apparently, Earth is actually quite a flat place.
+Of course we already know this from satellite pictures. 
+However, we can tease out something more by
+coloring cells by their volume. This both produces slight variations in hue
+along the top surface and something for the visualization programs to apply
+their shading algorithms to (because the top surfaces of the cells are now no
+longer just tangential to a sphere but tilted):
+
+<img src="http://www.dealii.org/images/steps/developer/step-53.mesh.png" alt="">
+
+Yet, at least as far as visualizations are concerned, this is still not too
+impressive. Rather, let us visualize things in a way so that we show the
+actual elevation along the top surface. In other words, we want a picture like
+this, with an incredible amount of detail:
+
+<img src="http://www.dealii.org/images/steps/developer/step-53.topo.png" alt="">
+
+A zoom-in of this picture shows the vertical displacement quite clearly (here,
+looking from the West-Northwest over the rift valley, the triple peaks
+of 
+<a href="http://en.wikipedia.org/wiki/Mount_Stanley">Mount Stanley</a>,
+<a href="http://en.wikipedia.org/wiki/Mount_Speke">Mount Speke</a>, and
+<a href="http://en.wikipedia.org/wiki/Mount_Baker_%28Uganda%29">Mount Baker</a>
+in the
+<a href="http://en.wikipedia.org/wiki/Rwenzori_Mountains">Rwenzori Range</a>,
+<a href="http://en.wikipedia.org/wiki/Lake_George_%28Uganda%29">Lake
+George</a>
+and toward the great flatness of
+<a href="http://en.wikipedia.org/wiki/Lake_Victoria">Lake Victoria</a>):
+
+<img src="http://www.dealii.org/images/steps/developer/step-53.topozoom.png" alt="">
+
+
+These image were produced with three small modifications: 
+<ol>
+  <li> An additional seventh mesh refinement towards the top surface for the
+  first of these two pictures, and a total of nine for the second. In the
+  second image, the horizontal mesh size is approximately 1.5km, and just
+  under 1km in vertical direction. (The picture was also created using a
+  more resolved data set; however, it is too big to distribute as part of
+  the tutorial.)
+  
+  <li> The addition of the following function that, given a point 
+  <code>x</code> computes the elevation by converting the point to
+  reference WGS 84 coordinates and only keeping the depth variable (the
+  function is, consequently, a simplified version of the
+  <code>AfricaGeometry::pull_back_wgs84()</code> function):
+
+ at code
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+
+
+double get_elevation (const Point<3> &x)
+  {
+    const double R           = 6378137;
+    const double ellipticity = 8.1819190842622e-2;
+
+    const double b     = std::sqrt(R * R * (1 - ellipticity * ellipticity));
+    const double ep    = std::sqrt((R * R - b * b) / (b * b));
+    const double p     = std::sqrt(x(0) * x(0) + x(1) * x(1));
+    const double th    = std::atan2(R * x(2), b * p);
+    const double theta = std::atan2((x(2) + ep * ep * b * std::sin(th) * std::sin(th) * std::sin(th)),
+                                      (p - (ellipticity * ellipticity * R  * (std::cos(th) * std::cos(th) * std::cos(th)))));
+    const double R_bar = R / (std::sqrt(1 - ellipticity * ellipticity * std::sin(theta) * std::sin(theta)));
+    const double R_plus_d = p / std::cos(theta);
+
+    return R_plus_d - R_bar;
+  }
+ at endcode
+
+  <li>Adding the following piece to the bottom of the <code>run()</code> function:
+
+ at code
+      FE_Q<3>       fe(1);
+      DoFHandler<3> dof_handler (triangulation);
+      dof_handler.distribute_dofs(fe);
+
+      Vector<double> elevation (dof_handler.n_dofs());
+      {
+        std::map<unsigned int,double> boundary_values;
+        VectorTools::interpolate_boundary_values(dof_handler,
+                                                 5,
+                                                 ScalarFunctionFromFunctionObject<3>(get_elevation),
+                                                 boundary_values);
+        for (std::map<unsigned int,double>::const_iterator p = boundary_values.begin(); 
+             p!=boundary_values.end(); ++p)
+          elevation[p->first] = p->second;
+      }
+
+      DataOut<3>    data_out;
+      data_out.attach_dof_handler(dof_handler);
+      data_out.add_data_vector (elevation, "elevation");
+      data_out.build_patches();
+
+      std::ofstream out ("data.vtu");
+      data_out.write_vtu (out);
+ at endcode
+</ol>
+This last piece of code first creates a $Q_1$ finite element space on the mesh.
+It then (ab)uses VectorTools::interpolate_boundary_values() to evaluate the
+elevation function for every node at the top boundary (the one with boundary
+indicator 5). We here wrap the call to <code>get_elevation()</code> with the
+ScalarFunctionFromFunctionObject class to make a regular C++ function look
+like an object of a class derived from the Function class that we want
+to use in VectorTools::interpolate_boundary_values(). Having so gotten a list
+of degrees of freedom located at the top boundary and corresponding elevation
+values, we just go down this list and set these elevations in the 
+<code>elevation</code> vector (leaving all interior degrees of freedom at
+their original zero value). This vector is then output using DataOut as
+usual and can be visualized as shown above.
+
+
+<h3>Issues with adaptively refined meshes generated this way</h3>
+
+If you zoomed in on the mesh shown above and looked closely enough, you would
+find that at hanging nodes, the two small edges connecting to the hanging
+nodes are not in exactly the same location as the large edge of the
+neighboring cell. This can be shown more clearly by using a different surface
+description in which we enlarge the vertical topography to enhance the effect
+(courtesy of Alexander Grayver):
+
+<img src="http://www.dealii.org/images/steps/developer/step-53.smooth-geometry.png" alt="">
+
+So what is happening here? Partly, this is only a result of visualization, but
+there is an underlying real cause as well:
+
+<ul>
+  <li>When you visualize a mesh using any of the common visualization
+  programs, what they really show you is just a set of edges that are plotted
+  as straight lines in three-dimensional space. This is so because almost all
+  data file formats for visualizing data only describe hexahedral cells as a
+  collection of eight vertices in 3d space, and do not allow to any more
+  complicated descriptions. (This is the main reason why
+  DataOut::build_patches() takes an argument that can be set to something
+  larger than one.) These linear edges may be the edges of the cell you do
+  actual computations on, or they may not, depending on what kind of mapping
+  you use when you do your integrations using FEValues. By default, of course,
+  FEValues uses a linear mapping (i.e., an object of class MappingQ1) and in
+  that case a 3d cell is indeed described exclusively by its 8 vertices and
+  the volume it fills is a trilinear interpolation between these points,
+  resulting in linear edges. But, you could also have used tri-quadratic,
+  tri-cubic, or even higher order mappings and in these cases the volume of
+  each cell will be bounded by quadratic, cubic or higher order polynomial
+  curves. Yet, you only get to see these with linear edges in the
+  visualization program because, as mentioned, file formats do not allow to
+  describe the real geometry of cells.
+
+  <li>That said, let us for simplicity assume that you are indeed using a
+  trilinear mapping, then the image shown above is a faithful representation
+  of the cells on which you form your integrals. In this case, indeed the
+  small cells at a hanging nodes do not, in general, snugly fit against the
+  large cell but leave a gap or may intersect the larger cell. Why is this?
+  Because when the triangulation needs a new vertex on an edge it wants to
+  refine, it asks the manifold description where this new vertex is supposed
+  to be, and the manifold description duly returns such a point by (in the
+  case of a geometry derived from ChartManifold) pulling the adjacent points
+  of the line back to the reference domain, averaging their locations, and
+  pushing forward this new location to the real domain. But this new location
+  is not usually along a straight line (in real space) between the adjacent
+  vertices and consequently the two small straight lines forming the refined
+  edge do not lie exactly on the one large straight line forming the unrefined
+  side of the hanging node.
+</ul>
+
+The situation is slightly more complicated if you use a higher order mapping
+using the MappingQ class, but not fundamentally different. Let's take a
+quadratic mapping for the moment (nothing fundamental changes with even higher
+order mappings). Then you need to imagine each edge of the cells you integrate
+on as a quadratic curve despite the fact that you will never actually see it
+plotted that way by a visualization program. But imagine it that way for a
+second. So which quadratic curve does MappingQ take? It is the quadratic curve
+that goes through the two vertices at the end of the edge as well as a point
+in the middle that it queries from the manifold. In the case of the long edge
+on the unrefined side, that's of course exactly the location of the hanging
+node, so the quadratic curve describing the long edge does go through the
+hanging node, unlike in the case of the linear mapping. But the two small
+edges are also quadratic curves; for example, the left small edge will go
+through the left vertex of the long edge and the hanging node, plus a point it
+queries halfway in between from the manifold. Because, as before, the point
+the manifold returns halfway along the left small edge is rarely exactly on
+the quadratic curve describing the long edge, the quadratic short edge will
+typically not coincide with the left half of the quadratic long edge, and the
+same is true for the right short edge. In other words, again, the geometries
+of the large cell and its smaller neighbors at hanging nodes do not touch
+snuggly.
+
+This all begs two questions: first, does it matter, and second, could this be
+fixed. Let us discuss these in the following:
+
+<ul>
+  <li>Does it matter? It is almost certainly true that this depends on the
+  equation you are solving. For example, it is known that solving the Euler
+  equations of gas dynamics on complex geometries requires highly accurate
+  boundary descriptions to ensure convergence of quantities that are measure
+  the flow close to the boundary. On the other hand, equations with elliptic
+  components (e.g., the Laplace or Stokes equations) are typically rather
+  forgiving of these issues: one does quadrature anyway to approximate
+  integrals, and further approximating the geometry may not do as much harm as
+  one could fear given that the volume of the overlaps or gaps at every
+  hanging node is only ${\cal O}(h^d)$ even with a linear mapping and ${\cal
+  O}(h^{d+p-1})$ for a mapping of degree $p$. (You can see this by considering
+  that in 2d the gap/overlap is a triangle with base $h$ and height ${\cal
+  O}(h)$; in 3d, it is a pyramid-like structure with base area $h^2$ and
+  height ${\cal O}(h)$. Similar considerations apply for higher order mappings
+  where the height of the gaps/overlaps is ${\cal O}(h^p)$.) In other words,
+  if you use a linear mapping with linear elements, the error in the volume
+  you integrate over is already at the same level as the integration error
+  using the usual Gauss quadrature. Of course, for higher order elements one
+  would have to choose matching mapping objects.
+
+  Another point of view on why it is probably not worth worrying too much
+  about the issue is that there is certainly no narrative in the community of
+  numerical analysts that these issues are a major concern one needs to watch
+  out for when using complex geometries. If it does not seem to be discussed
+  often among practitioners, if ever at all, then it is at least not something
+  people have identified as a common problem.
+
+  This issue is not dissimilar to having hanging nodes at curved boundaries
+  where the geometry description of the boundary typically pulls a hanging
+  node onto the boundary whereas the large edge remains straight, making the
+  adjacent small and large cells not match each other. Although this behavior
+  existed in deal.II since its beginning, 15 years before manifold
+  descriptions became available, it did not ever come up in mailing list
+  discussions or conversations with colleagues.
+
+  <li>Could it be fixed? In principle, yes, but it's a complicated
+  issue. Let's assume for the moment that we would only ever use the MappingQ1
+  class, i.e., linear mappings. In that case, whenever the triangulation class
+  requires a new vertex along an edge that would become a hanging node, it
+  would just take the mean value of the adjacent vertices <i>in real
+  space</i>, i.e., without asking the manifold description. This way, the
+  point lies on the long straight edge and the two short straight edges would
+  match the one long edge. Only when all adjacent cells have been refined and
+  the point is no longer a hanging node would we replace its coordinates by
+  coordinates we get by a manifold. This may be awkward to implement, but it
+  would certainly be possible.
+
+  The more complicated issue arises because people may want to use a higher
+  order MappingQ object. In that case, the Triangulation class may freely
+  choose the location of the hanging node (because the quadratic curve for the
+  long edge can be chosen in such a way that it goes through the hanging node)
+  but the MappingQ class, when determining the location of mid-edge points
+  must make sure that if the edge is one half of a long edge of a neighboring
+  coarser cell, then the midpoint cannot be obtained from the manifold but
+  must be chosen along the long quadratic edge. For cubic (and all other odd)
+  mappings, the matter is again a bit complicated because one typically
+  arranges the cubic edge to go through points 1/3 and 2/3 along the edge, and
+  thus necessarily through the hanging node, but this could probably be worked
+  out. In any case, even then, there are two problems with this:
+
+  - When refining the triangulation, the Triangulation class can not know what
+    mapping will be used. In fact it is not uncommon for a triangulation to be
+    used differently in different contexts within the same program. If the
+    mapping used determines whether we can freely choose a point or not, how,
+    then, should the triangulation locate new vertices?
+  - Mappings are purely local constructs: they only work on a cell in
+    isolation, and this is one of the important features of the finite element
+    method. Having to ask whether one of the vertices of an edge is a hanging
+    node requires querying the neighborhood of a cell; furthermore, such a
+    query does not just involve the 6 face neighbors of a cell in 3d, but may
+    require traversing a possibly very large number of other cells that
+    connect to an edge. Even if it can be done, one still needs to do
+    different things depending on how the neighborhood looks like, producing
+    code that is likely very complex, hard to maintain, and possibly slow.
+
+  Consequently, at least for the moment, none of these ideas are
+  implemented. This leads to the undesirable consequence of discontinuous
+  geometries, but, as discussed above, the effects of this do not appear to
+  pose problem in actual practice.
+
+</ul>
diff --git a/examples/step-53/doc/tooltip b/examples/step-53/doc/tooltip
new file mode 100644
index 0000000..5cd4028
--- /dev/null
+++ b/examples/step-53/doc/tooltip
@@ -0,0 +1 @@
+Geometry: Dealing with deformed domains.
diff --git a/examples/step-53/step-53.cc b/examples/step-53/step-53.cc
new file mode 100644
index 0000000..157bf74
--- /dev/null
+++ b/examples/step-53/step-53.cc
@@ -0,0 +1,480 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2014 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Authors: Wolfgang Bangerth, Texas A&M University, 2014
+ *          Luca Heltai, SISSA, 2014
+ *          D. Sarah Stamps, MIT, 2014
+ */
+
+// Let us start with the include files we need here. Obviously, we need the
+// ones that describe the triangulation (<code>tria.h</code>), and that allow
+// us to create and output triangulations (<code>grid_generator.h</code> and
+// <code>grid_out.h</code>). Furthermore, we need the header file that
+// declares the Manifold and ChartManifold classes that we will need to
+// describe the geometry (<code>manifold.h</code>). We will then also need
+// the GridTools::transform() function from the last of the following header
+// files; the purpose for this function will become discussed at the point
+// where we use it.
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/manifold.h>
+#include <deal.II/grid/grid_tools.h>
+
+// The remainder of the include files relate to reading the topography data.
+// As explained in the introduction, we will read it from a file and then use
+// the Functions::InterpolatedUniformGridData class that is declared in the
+// first of the following header files. Because the data is large, the file we
+// read from is stored as gzip compressed data and we make use of some
+// BOOST-provided functionality to read directly from gzipped data. We wrap
+// the BOOST includes with a preprocessor macro that disables certain annoying
+// compiler warnings.
+#include <deal.II/base/function_lib.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/device/file.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#include <iostream>
+#include <fstream>
+
+
+// The final part of the top matter is to open a namespace into which to put
+// everything, and then to import the dealii namespace into it.
+namespace Step53
+{
+  using namespace dealii;
+
+
+  // @sect3{Describing topography: AfricaTopography}
+  //
+  // The first significant part of this program is the class that describes
+  // the topography $h(\hat phi,\hat \theta)$ as a function of longitude
+  // and latitude. As discussed in the introduction, we will make our life
+  // a bit easier here by not writing the class in the most general way
+  // possible but by only writing it for the particular purpose we are
+  // interested in here: interpolating data obtained from one very specific
+  // data file that contains information about a particular area of the
+  // world for which we know the extents.
+  //
+  // The general layout of the class has been discussed already above.
+  // Following is its declaration, including three static member functions
+  // that we will need in initializing the <code>topography_data</code>
+  // member variable.
+  class AfricaTopography
+  {
+  public:
+    AfricaTopography ();
+
+    double value (const double lon,
+                  const double lat) const;
+
+  private:
+    const Functions::InterpolatedUniformGridData<2> topography_data;
+
+    static std_cxx11::array<std::pair<double,double>,2> get_endpoints ();
+    static std_cxx11::array<unsigned int,2>             n_intervals ();
+    static std::vector<double>                           get_data ();
+  };
+
+
+  // Let us move to the implementation of the class. The interesting parts
+  // of the class are the constructor and the <code>value()</code> function.
+  // The former initializes the Functions::InterpolatedUniformGridData member
+  // variable and we will use the constructor that requires us to pass in
+  // the end points of the 2-dimensional data set we want to interpolate
+  // (which are here given by the intervals $[-6.983333, 11.98333]$,
+  // using the trick of switching end points discussed in the introduction,
+  // and $[25, 35.983333]$, both given in degrees), the number of intervals
+  // into which the data is split (379 in latitude direction and 219 in
+  // longitude direction, for a total of $380\times 220$ data points), and
+  // a Table object that contains the data. The data then of course has
+  // size $380\times 220$ and we initialize it by providing an iterator
+  // to the first of the 83,600 elements of a std::vector object returned
+  // by the <code>get_data()</code> function below. Note that all of the
+  // member functions we call here are static because (i) they do not
+  // access any member variables of the class, and (ii) because they are
+  // called at a time when the object is not initialized fully anyway.
+  AfricaTopography::AfricaTopography ()
+    :
+    topography_data (get_endpoints(),
+                     n_intervals(),
+                     Table<2,double> (380, 220,
+                                      get_data().begin()))
+  {}
+
+
+  double
+  AfricaTopography::value (const double lon,
+                           const double lat) const
+  {
+    return topography_data.value (Point<2>(-lat * 180/numbers::PI,
+                                           lon * 180/numbers::PI));
+  }
+
+
+  std_cxx11::array<std::pair<double,double>,2>
+  AfricaTopography::get_endpoints ()
+  {
+    std_cxx11::array<std::pair<double,double>,2> endpoints;
+    endpoints[0] = std::make_pair (-6.983333, 11.966667);
+    endpoints[1] = std::make_pair (25, 35.95);
+    return endpoints;
+  }
+
+
+  std_cxx11::array<unsigned int,2>
+  AfricaTopography::n_intervals ()
+  {
+    std_cxx11::array<unsigned int,2> endpoints;
+    endpoints[0] = 379;
+    endpoints[1] = 219;
+    return endpoints;
+  }
+
+
+  // The only other function of greater interest is the <code>get_data()</code>
+  // function. It returns a temporary vector that contains all 83,600 data
+  // points describing the altitude and is read from the file
+  // <code>topography.txt.gz</code>. Because the file is compressed by gzip,
+  // we cannot just read it through an object of type std::ifstream, but
+  // there are convenient methods in the BOOST library (see
+  // http://www.boost.org) that allows us to read from compressed files
+  // without first having to uncompress it on disk. The result is, basically,
+  // just another input stream that, for all practical purposes, looks just like
+  // the ones we always use.
+  //
+  // When reading the data, we read the three columns but throw ignore the
+  // first two. The datum in the last column is appended to an array that we
+  // the return and that will be copied into the table from which
+  // <code>topography_data</code> is initialized. Since the BOOST.iostreams
+  // library does not provide a very useful exception when the input file
+  // does not exist, is not readable, or does not contain the correct
+  // number of data lines, we catch all exceptions it may produce and
+  // create our own one. To this end, in the <code>catch</code>
+  // clause, we let the program run into an <code>AssertThrow(false, ...)</code>
+  // statement. Since the condition is always false, this always triggers an
+  // exception. In other words, this is equivalent to writing
+  // <code>throw ExcMessage("...")</code> but it also fills certain fields
+  // in the exception object that will later be printed on the screen
+  // identifying the function, file and line where the exception happened.
+  std::vector<double>
+  AfricaTopography::get_data ()
+  {
+    std::vector<double> data;
+
+    // create a stream where we read from gzipped data
+    boost::iostreams::filtering_istream in;
+    in.push(boost::iostreams::basic_gzip_decompressor<>());
+    in.push(boost::iostreams::file_source("topography.txt.gz"));
+
+    for (unsigned int line=0; line<83600; ++line)
+      {
+        try
+          {
+            double lat, lon, elevation;
+            in >> lat >> lon >> elevation;
+
+            data.push_back (elevation);
+          }
+        catch (...)
+          {
+            AssertThrow (false,
+                         ExcMessage ("Could not read all 83,600 data points "
+                                     "from the file <topography.txt.gz>!"));
+          }
+      }
+
+    return data;
+  }
+
+
+  // @sect3{Describing the geometry: AfricaGeometry}
+  //
+  // The following class is then the main one of this program. Its structure
+  // has been described in much detail in the introduction and does not need
+  // much introduction any more.
+  class AfricaGeometry : public ChartManifold<3,3>
+  {
+  public:
+    virtual
+    Point<3>
+    pull_back(const Point<3> &space_point) const;
+
+    virtual
+    Point<3>
+    push_forward(const Point<3> &chart_point) const;
+
+  private:
+    static const double    R;
+    static const double    ellipticity;
+
+    const AfricaTopography topography;
+
+    Point<3> push_forward_wgs84 (const Point<3> &phi_theta_d) const;
+    Point<3> pull_back_wgs84 (const Point<3> &x) const;
+
+    Point<3> push_forward_topo (const Point<3> &phi_theta_d_hat) const;
+    Point<3> pull_back_topo (const Point<3> &phi_theta_d) const;
+  };
+
+
+  const double AfricaGeometry::R           = 6378137;
+  const double AfricaGeometry::ellipticity = 8.1819190842622e-2;
+
+
+  // The implementation, as well, is pretty straightforward if you have
+  // read the introduction. In particular, both of the pull back and
+  // push forward functions are just concatenations of the respective
+  // functions of the WGS 84 and topography mappings:
+  Point<3>
+  AfricaGeometry::pull_back(const Point<3> &space_point) const
+  {
+    return pull_back_topo (pull_back_wgs84 (space_point));
+  }
+
+  Point<3>
+  AfricaGeometry::push_forward(const Point<3> &chart_point) const
+  {
+    return push_forward_wgs84 (push_forward_topo (chart_point));
+  }
+
+
+  // The following two functions then define the forward and inverse
+  // transformations that correspond to the WGS 84 reference shape of
+  // Earth. The forward transform follows the formula shown in the
+  // introduction. The inverse transform is significantly more complicated
+  // and is, at the very least, not intuitive. It also suffers from the
+  // fact that it returns an angle that at the end of the function we
+  // need to clip back into the interval $[0,2\pi]$ if it should have
+  // escaped from there.
+  Point<3>
+  AfricaGeometry::push_forward_wgs84(const Point<3> &phi_theta_d) const
+  {
+    const double phi   = phi_theta_d[0];
+    const double theta = phi_theta_d[1];
+    const double d     = phi_theta_d[2];
+
+    const double R_bar = R / std::sqrt(1 - (ellipticity * ellipticity *
+                                            std::sin(theta) * std::sin(theta)));
+
+    return Point<3> ((R_bar + d) * std::cos(phi) * std::cos(theta),
+                     (R_bar + d) * std::sin(phi) * std::cos(theta),
+                     ((1 - ellipticity * ellipticity) * R_bar + d) * std::sin(theta));
+  }
+
+  Point<3>
+  AfricaGeometry::pull_back_wgs84(const Point<3> &x) const
+  {
+    const double b     = std::sqrt(R * R * (1 - ellipticity * ellipticity));
+    const double ep    = std::sqrt((R * R - b * b) / (b * b));
+    const double p     = std::sqrt(x(0) * x(0) + x(1) * x(1));
+    const double th    = std::atan2(R * x(2), b * p);
+    const double phi   = std::atan2(x(1), x(0));
+    const double theta = std::atan2(x(2) + ep * ep * b * std::pow(std::sin(th),3),
+                                    (p - (ellipticity * ellipticity * R  * std::pow(std::cos(th),3))));
+    const double R_bar = R / (std::sqrt(1 - ellipticity * ellipticity * std::sin(theta) * std::sin(theta)));
+    const double R_plus_d = p / std::cos(theta);
+
+    Point<3> phi_theta_d;
+    if (phi < 0)
+      phi_theta_d[0] = phi + 2*numbers::PI;
+    else if (phi > 2*numbers::PI)
+      phi_theta_d[0] = phi - 2*numbers::PI;
+    else
+      phi_theta_d[0] = phi;
+    phi_theta_d[1] = theta;
+    phi_theta_d[2] = R_plus_d - R_bar;
+    return phi_theta_d;
+  }
+
+
+  // In contrast, the topography transformations follow exactly the
+  // description in the introduction. There is not consequently not
+  // much to add:
+  Point<3>
+  AfricaGeometry::push_forward_topo(const Point<3> &phi_theta_d_hat) const
+  {
+    const double d_hat = phi_theta_d_hat[2];
+    const double h     = topography.value(phi_theta_d_hat[0],
+                                          phi_theta_d_hat[1]);
+    const double d = d_hat + (d_hat + 500000)/500000*h;
+    const Point<3> phi_theta_d (phi_theta_d_hat[0],
+                                phi_theta_d_hat[1],
+                                d);
+    return phi_theta_d;
+  }
+
+  Point<3>
+  AfricaGeometry::pull_back_topo(const Point<3> &phi_theta_d) const
+  {
+    const double d = phi_theta_d[2];
+    const double h = topography.value(phi_theta_d[0],
+                                      phi_theta_d[1]);
+    const double d_hat = 500000 * (d-h)/(500000+h);
+    const Point<3> phi_theta_d_hat (phi_theta_d[0],
+                                    phi_theta_d[1],
+                                    d_hat);
+    return phi_theta_d_hat;
+  }
+
+
+  // @sect3{Creating the mesh}
+  //
+  // Having so described the properties of the geometry, not it is
+  // time to deal with the mesh used to discretize it. To this end,
+  // we create objects for the geometry and triangulation, and then
+  // proceed to create a $1\times 2\times 1$ rectangular mesh that
+  // corresponds to the reference domain
+  // $\hat U=[26,35]\times[-10,5]\times[-500000,0]$. We choose
+  // this number of subdivisions because it leads to cells that
+  // are roughly like cubes instead of stretched in one direction or
+  // another.
+  //
+  // Of course, we are not actually interested in meshing the
+  // reference domain. We are interested in meshing the real domain.
+  // Consequently, we will use the GridTools::transform() function
+  // that simply moves every point of a triangulation according to
+  // a given transformation. The transformation function it wants is
+  // a function that takes as its single argument a point in the reference
+  // domain and returns the corresponding location in the domain that we
+  // want to map to. This is, of course, exactly the push forward
+  // function of the geometry we use. However,
+  // <code>AfricaGeometry::push_forward()</code> requires two arguments:
+  // the <code>AfricaGeometry</code> object to work with via its implicit
+  // <code>this</code> pointer, and the point. We bind the first of these
+  // to the geometry object we have created at the top of the function
+  // and leave the second one open, obtaining the desired object to
+  // do the transformation.
+  void run ()
+  {
+    AfricaGeometry   geometry;
+    Triangulation<3> triangulation;
+
+    {
+      const Point<3> corner_points[2] = { Point<3>(26*numbers::PI/180,
+                                                   -10*numbers::PI/180,
+                                                   -500000),
+                                          Point<3>(35*numbers::PI/180,
+                                                   5*numbers::PI/180,
+                                                   0)
+                                        };
+      std::vector<unsigned int> subdivisions(3);
+      subdivisions[0] = 1;
+      subdivisions[1] = 2;
+      subdivisions[2] = 1;
+      GridGenerator::subdivided_hyper_rectangle (triangulation, subdivisions,
+                                                 corner_points[0], corner_points[1],
+                                                 true);
+
+      GridTools::transform (std_cxx11::bind(&AfricaGeometry::push_forward,
+                                            std_cxx11::cref(geometry),
+                                            std_cxx11::_1),
+                            triangulation);
+    }
+
+    // The next step is to explain to the triangulation to use our geometry
+    // object whenever a new point is needed upon refining the mesh. We do
+    // this by telling the triangulation to use our geometry for everything
+    // that has manifold indicator zero, and then proceed to mark all cells
+    // and their bounding faces and edges with manifold indicator zero. This
+    // ensures that the triangulation consults our geometry object every time
+    // a new vertex is needed. Since manifold indicators are inherited from
+    // mother to children, this also happens after several recursive
+    // refinement steps.
+    triangulation.set_manifold(0, geometry);
+    for (Triangulation<3>::active_cell_iterator cell=triangulation.begin_active();
+         cell!=triangulation.end(); ++cell)
+      cell->set_all_manifold_ids(0);
+
+    // The last step is to refine the mesh beyond its initial $1\times 2\times 1$
+    // coarse mesh. We could just refine globally a number of times, but since for
+    // the purpose of this tutorial program we're really only interested in what
+    // is happening close to the surface, we just refine 6 times all of the cells
+    // that have a face at a boundary with indicator 5. Looking this up in the
+    // documentation of the GridGenerator::subdivided_hyper_rectangle() function
+    // we have used above reveals that boundary indicator 5 corresponds to the top
+    // surface of the domain (and this is what the last <code>true</code> argument
+    // in the call to GridGenerator::subdivided_hyper_rectangle() above meant: to
+    // "color" the boundaries by assigning each boundary a unique boundary indicator).
+    for (unsigned int i=0; i<6; ++i)
+      {
+        for (Triangulation<3>::active_cell_iterator cell=triangulation.begin_active();
+             cell!=triangulation.end(); ++cell)
+          for (unsigned int f=0; f<GeometryInfo<3>::faces_per_cell; ++f)
+            if (cell->face(f)->boundary_id() == 5)
+              {
+                cell->set_refine_flag ();
+                break;
+              }
+        triangulation.execute_coarsening_and_refinement();
+
+        std::cout << "Refinement step " << i+1 << ": "
+                  << triangulation.n_active_cells() << " cells, "
+                  << GridTools::minimal_cell_diameter (triangulation)/1000
+                  << "km minimal cell diameter"
+                  << std::endl;
+      }
+
+    // Having done this all, we can now output the mesh into a file of its own:
+    const std::string filename = "mesh.vtu";
+    std::ofstream out (filename.c_str());
+    GridOut grid_out;
+    grid_out.write_vtu (triangulation, out);
+  }
+}
+
+
+
+// @sect3{The main function}
+
+// Finally, the main function, which follows the same scheme used in all
+// tutorial programs starting with step-6. There isn't much to do here, only
+// to call the single <code>run()</code> function.
+int main ()
+{
+  try
+    {
+      Step53::run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+}
+
diff --git a/examples/step-53/topography.license b/examples/step-53/topography.license
new file mode 100644
index 0000000..d54582a
--- /dev/null
+++ b/examples/step-53/topography.license
@@ -0,0 +1,227 @@
+topography.txt.gz contains data from the Shuttle Radar Topography Mission and
+was originally downloaded from http://dds.cr.usgs.gov/srtm/version1/,
+following by some data processing by D. Sarah Stamps (MIT). The documentation
+accompanying SRTM data can be found at
+http://dds.cr.usgs.gov/srtm/version1/Documentation/SRTM_Topo.txt and contains
+further information about measurement method, data layout (of the original,
+not the processed data), copyright holders and conditions and is reproduced in
+full below. In particular, note that the SRTM level 1 data corresponds to the
+statement "In advance of that, the unedited data are being released for public
+use subject to the caveats discussed below." This implies that the data is in
+the public domain and can be used for the purpose of this tutorial program and
+all modifications you may want to make to it. Similar statements about the
+data being in the public domain can be found at
+  http://dds.cr.usgs.gov/srtm/version1/Documentation/SRTM_Topo.txt
+  http://wiki.openstreetmap.org/wiki/SRTM
+  http://www.earthmodels.org/data-and-tools/topography/srtm
+  https://groups.google.com/forum/#!topic/osmand/9nj3GY1KBGI
+
+................................................
+
+SRTM_Topo     (last update 11/05/03)
+
+SRTM Documentation (best viewed with mono-spaced font, such as courier)
+
+1.0  Introduction
+
+The SRTM data sets result from a collaborative effort by the National
+Aeronautics and Space Administration (NASA) and the National Imagery and
+Mapping Agency (NIMA), as well as the participation of the German and
+Italian space agencies, to generate a near-global digital elevation model
+(DEM) of the Earth using radar interferometry. The SRTM instrument
+consisted of the Spaceborne Imaging Radar-C (SIR-C) hardware set modified
+with a Space Station-derived mast and additional antennae to form an
+interferometer with a 60 meter long baseline. A description of the SRTM
+mission, can be found in Farr and Kobrick (2000).
+
+Synthetic aperture radars are side-looking instruments and acquire data
+along continuous swaths. The SRTM swaths extended from about 30 degrees
+off-nadir to about 58 degrees off-nadir from an altitude of 233 km, and
+thus were about 225 km wide. During the data flight the instrument was
+operated at all times the orbiter was over land and about 1000 individual
+swaths were acquired over the ten days of mapping operations. Length of the
+acquired swaths range from a few hundred to several thousand km. Each
+individual data acquisition is referred to as a "data take."
+
+SRTM was the primary (and pretty much only) payload on the STS-99 mission
+of the Space Shuttle Endeavour, which launched February 11, 2000 and flew
+for 11 days. Following several hours for instrument deployment, activation
+and checkout, systematic interferometric data were collected for 222.4
+consecutive hours. The instrument operated virtually flawlessly and imaged
+99.96% of the targeted landmass at least one time, 94.59% at least twice
+and about 50% at least three or more times. The goal was to image each
+terrain segment at least twice from different angles (on ascending, or
+north-going, and descending orbit passes) to fill in areas shadowed from
+the radar beam by terrain.
+
+This 'targeted landmass' consisted of all land between 56 degrees south and
+60 degrees north latitude, which comprises almost exactly 80% of the total
+landmass.
+
+2.0 Data Set Characteristics
+
+2.1 General
+
+SRTM data were processed in a systematic fashion using the SRTM Ground
+Data Processing System (GDPS) supercomputer system at the Jet Propulsion
+Laboratory. Data were mosaicked into approximately 15,000 one degree by
+one degree cells and formatted according to the Digital Terrain Elevation
+Data (DTED) specification for delivery to NIMA, who will use it to update
+and extend their DTED products. Data were processed on a
+continent-by-continent basis beginning with North America. NIMA is applying
+several post-processing steps to these data including editing, spike and well
+removal, water body leveling and coastline definition. Following these
+"finishing" steps data will be returned to NASA for distribution to the
+scientific and civil user communities, as well as the public. In advance of
+that, the unedited data are being released for public use subject to the
+caveats discussed below.
+
+2.2 Organization
+
+SRTM data are organized into individual rasterized cells, or tiles, each
+covering one degree by one degree in latitude and longitude. Sample spacing
+for individual data points is either 1 arc-second or 3 arc-seconds,
+referred to as SRTM-1 and SRTM-3, respectively. Since one arc-second at the
+equator corresponds to roughly 30 meters in horizontal extent, the sets are
+sometimes referred to as "30 meter" or "90 meter" data.
+
+Unedited SRTM-3 data are being released continent-by-continent, with the
+definitions of the continents displayed in the file Continent_def.gif.
+By agreement with NIMA unedited SRTM-1 data for the United States and its
+territories and possessions are also being released and can be found in
+the directory /United_States_1arcsec./ Cells that straddle the border with
+neighboring countries have been masked with quarter degree quantiation
+such that data outside the U.S. have the void value.
+
+2.3 Elevation mosaics
+
+Each SRTM data tile contains a mosaic of elevations generated by averaging
+all data takes that fall within that tile. Since the primary error source
+in synthetic aperture radar data is speckle, which has the characteristics
+of random noise, combining data through averaging reduces the error by the
+square root of the number of data takes used. In the case of SRTM the
+number of data takes could range from a minimum of one (in a very few
+cases) up to as many as ten or more.
+
+3.0 Data Formats
+
+The names of individual data tiles refer to the longitude and latitude of
+the lower-left (southwest) corner of the tile (this follows the DTED
+convention as opposed to the GTOPO30 standard). For example, the
+coordinates of the lower-left corner of tile N40W118 are 40 degrees north
+latitude and 118 degrees west longitude. To be more exact, these
+coordinates refer to the geometric center of the lower left pixel, which in
+the case of SRTM-1 data will be about 30 meters in extent.
+
+SRTM-1 data are sampled at one arc-second of latitude and longitude and
+each file contains 3601 lines and 3601 samples. The rows at the north
+and south ecges as well as the columns at the east and west edges of each
+cell overlap and are identical to the edge rows and columns in the adjacent
+cell.
+
+SRTM-3 data are sampled at three arc-seconds and contain 1201 lines and
+1201 samples with similar overlapping rows and columns. This organization
+also follows the DTED convention. Unlike DTED, however, 3 arc-second data
+are generated in each case by 3x3 averaging of the 1 arc-second data - thus
+9 samples are combined in each 3 arc-second data point. Since the primary
+error source in the elevation data has the characteristics of random noise
+this reduces that error by roughly a factor of three.
+
+This sampling scheme is sometimes called a "geographic projection", but of
+course it is not actually a projection in the mapping sense. It does not
+possess any of the characteristics usually present in true map projections,
+for example it is not conformal, so that if it is displayed as an image
+geographic features will be distorted. However it is quite easy to handle
+mathematically, can be easily imported into most image processing and GIS
+software packages, and multiple cells can be assembled easily into a larger
+mosaic (unlike the pesky UTM projection, for example.)
+
+3.1 DEM File (.HGT)
+
+The DEM is provided as 16-bit signed integer data in a simple binary
+raster. There are no header or trailer bytes embedded in the file. The data
+are stored in row major order (all the data for row 1, followed by all the
+data for row 2, etc.).
+
+All elevations are in meters referenced to the WGS84 EGM96 geoid as
+documented at http://www.nima.mil/GandG/wgsegm/.
+
+Byte order is Motorola ("big-endian") standard with the most significant
+byte first. Since they are signed integers elevations can range from -32767
+to 32767 meters, encompassing the range of elevation to be found on the
+Earth.
+
+In these preliminary data there commonly will be data voids from a number of
+causes such as shadowing, phase unwrapping anomalies, or other
+radar-specific causes. Voids are flagged with the value -32768.
+
+
+4.0  Notes and Hints for SRTM Data Users
+
+4.1 Data Encoding
+
+Because the DEM data are stored in a 16-bit binary format, users must be
+aware of how the bytes are addressed on their computers. The DEM data are
+provided in Motorola or IEEE byte order, which stores the most significant
+byte first ("big endian"). Systems such as Sun SPARC and Silicon Graphics
+workstations use the Motorola byte order. The Intel byte order, which
+stores the least significant byte first ("little endian"), is used on DEC
+Alpha systems and most PCs. Users with systems that address bytes in the
+Intel byte order may have to "swap bytes" of the DEM data unless their
+application software performs the conversion during ingest. 
+
+4.3 SRTM Caveats
+
+As with all digital geospatial data sets, users of SRTM must be aware of
+certain characteristics of the data set (resolution, accuracy, method of
+production and any resulting artifacts, etc.) in order to better judge its
+suitability for a specific application. A characteristic of SRTM that
+renders it unsuitable for one application may have no relevance as a
+limiting factor for its use in a different application.
+
+In particular, data produced by the PI processor should be considered as
+"research grade" data suitable for scientific investigations and
+development and testing of various civil applications.
+
+No editing has been performed on the data, and the elevation data in
+particular contain numerous voids and other spurious points such as
+anomalously high (spike) or low (well) values. Water bodies will generally
+not be well-defined - in fact since water surfaces generally produce very
+low radar backscatter they will appear quite "noisy" or rough, in the
+elevations data. Similarly, coastlines will not be well-defined.
+
+5.0 References
+
+Farr, T.G., M. Kobrick, 2000, Shuttle Radar Topography Mission produces a
+wealth of data, Amer. Geophys. Union Eos, v. 81, p. 583-585.
+
+Rosen, P.A., S. Hensley, I.R. Joughin, F.K. Li, S.N. Madsen, E. Rodriguez,
+R.M. Goldstein, 2000, Synthetic aperture radar interferometry, Proc. IEEE,
+v. 88, p. 333-382.
+
+DMATR 8350.2, Dept. of Defense World Geodetic System 1984, Its Definition
+and Relationship with Local Geodetic Systems, Third Edition, 4 July 1997.
+http://164.214.2.59/GandG/tr8350_2.html
+
+Lemoine, F.G. et al, NASA/TP-1998-206861, The Development of the Joint NASA
+GSFC and NIMA Geopotential Model EGM96, NASA Goddard Space Flight Center,
+Greenbelt, MD 20771, U.S.A., July 1998.
+
+Other Web sites of interest:
+
+NASA/JPL SRTM: http://www.jpl.nasa.gov/srtm/
+
+NIMA: http://164.214.2.59/nimahome.html
+
+STS-99 Press Kit: http://www.shuttlepresskit.com/STS-99/index.htm
+
+Johnson Space Center STS-99:
+http://spaceflight.nasa.gov/shuttle/archives/sts-99/index.html
+
+German Space Agency: http://www.dlr.de/srtm
+
+Italian Space Agency: http://srtm.det.unifi.it/index.htm
+
+U.S. Geological Survey, EROS Data Center: http://edc.usgs.gov/
+
+Note: DTED is a trademark of the National Imagery and Mapping Agency
diff --git a/examples/step-53/topography.txt.gz b/examples/step-53/topography.txt.gz
new file mode 100644
index 0000000..223a85b
Binary files /dev/null and b/examples/step-53/topography.txt.gz differ
diff --git a/examples/step-54/CMakeLists.txt b/examples/step-54/CMakeLists.txt
new file mode 100644
index 0000000..34e5131
--- /dev/null
+++ b/examples/step-54/CMakeLists.txt
@@ -0,0 +1,50 @@
+##
+#  CMake script for the step-34 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-54")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#  FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#  FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#  SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC})
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+#
+# Are all dependencies fulfilled?
+#
+IF(NOT DEAL_II_WITH_OPENCASCADE)
+  MESSAGE(FATAL_ERROR "
+Error! The deal.II library found at ${DEAL_II_PATH} was not configured with
+    DEAL_II_WITH_OPENCASCADE = ON
+One or all of these are OFF in your installation but are required for this tutorial step."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-54/DTMB-5415_bulbous_bow.iges b/examples/step-54/DTMB-5415_bulbous_bow.iges
new file mode 100644
index 0000000..4dbd780
--- /dev/null
+++ b/examples/step-54/DTMB-5415_bulbous_bow.iges
@@ -0,0 +1,549 @@
+                                                                        S0000001
+,,31HOpen CASCADE IGES processor 6.5,13HFilename.iges,                  G0000001
+16HOpen CASCADE 6.5,31HOpen CASCADE IGES processor 6.5,32,308,15,308,15,G0000002
+,1.,6,1HM,1,0.01,15H20140926.165225,1.4463E-05,3.063474,5Hamola,,11,0,  G0000003
+15H20140926.165225,;                                                    G0000004
+     510       1       0       0       0       0       0       000010000D0000001
+     510       0       0       1       1                               0D0000002
+     128       2       0       0       0       0       0       000010000D0000003
+     128       0       0     166       0                               0D0000004
+     508     168       0       0       0       0       0       000010000D0000005
+     508       0       0       2       1                               0D0000006
+     504     170       0       0       0       0       0       000010001D0000007
+     504       0       0       1       1                               0D0000008
+     126     171       0       0       0       0       0       000010000D0000009
+     126       0       0      73       0                               0D0000010
+     502     244       0       0       0       0       0       000010000D0000011
+     502       0       0       2       1                               0D0000012
+     126     246       0       0       0       0       0       000010000D0000013
+     126       0       0      10       0                               0D0000014
+     126     256       0       0       0       0       0       000010000D0000015
+     126       0       0       9       0                               0D0000016
+     126     265       0       0       0       0       0       000010000D0000017
+     126       0       0      14       0                               0D0000018
+     126     279       0       0       0       0       0       000010000D0000019
+     126       0       0     232       0                               0D0000020
+     126     511       0       0       0       0       0       000010000D0000021
+     126       0       0       4       0                               0D0000022
+     126     515       0       0       0       0       0       000010000D0000023
+     126       0       0       2       0                               0D0000024
+     110     517       0       0       0       0       0       000010000D0000025
+     110       0       0       1       0                               0D0000026
+510,3,1,1,5;                                                     0000001P0000001
+128,18,13,3,3,0,0,1,0,0,0.,0.,0.,0.,1.,2.,2.5,2.5,2.5,3.,3.5,    0000003P0000002
+3.5,3.5,4.,5.,6.,7.,8.,9.,10.,10.,10.,10.,0.,0.,0.,0.,1.,2.,3.,  0000003P0000003
+4.,5.,6.,7.,8.,9.,10.,11.,11.,11.,11.,1.,1.,1.,1.,1.,1.,1.,1.,   0000003P0000004
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000005
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000006
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000007
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000008
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000009
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000010
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000011
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000012
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000013
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000014
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000015
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000003P0000016
+1.,1.,1.,1.,1.,1.,-1.8411253,0.,-0.247075893,-1.8735103,0.,      0000003P0000017
+-0.247080363,-1.9382801,0.,-0.247089304,-1.9817766,0.,           0000003P0000018
+-0.247094968,-2.0045777,0.,-0.247096822,-2.0094125,0.,           0000003P0000019
+-0.247098092,-2.0150569,0.,-0.256362056,-2.0231086,0.,           0000003P0000020
+-0.27006388,-2.0299576,0.,-0.28255951,-2.0351962,0.,-0.29223011, 0000003P0000021
+-2.0420465,0.,-0.29384303,-2.0783114,0.,-0.30069283,-2.1701788,  0000003P0000022
+0.,-0.3188375,-2.3136179,0.,-0.34504804,-2.4542424,0.,           0000003P0000023
+-0.3656138,-2.56746892,0.,-0.3712712,-2.61219314,0.,-0.3624128,  0000003P0000024
+-2.62871465,0.,-0.3531455,-2.63758308,0.,-0.34670038,-1.8411299, 0000003P0000025
+4.028084E-03,-0.245868605,-1.8735149,4.028084E-03,-0.245867361,  0000003P0000026
+-1.9382783,4.028084E-03,-0.245069547,-1.9824499,2.351634E-03,    0000003P0000027
+-0.245080291,-2.0045777,1.64785E-03,-0.247096822,-2.0094125,     0000003P0000028
+2.014042E-03,-0.247098092,-2.0150569,2.014042E-03,-0.256362056,  0000003P0000029
+-2.0231086,2.014042E-03,-0.27006388,-2.0299576,2.014042E-03,     0000003P0000030
+-0.28255951,-2.0351962,2.014042E-03,-0.29223011,-2.0420465,      0000003P0000031
+1.613535E-03,-0.29384303,-2.0783114,1.027621E-02,-0.30069283,    0000003P0000032
+-2.1701788,2.337321E-02,-0.3188375,-2.3136179,4.472666E-02,      0000003P0000033
+-0.34504804,-2.4542424,5.92312E-02,-0.3656138,-2.56746892,       0000003P0000034
+6.124524E-02,-0.3712712,-2.61219314,4.029232E-02,-0.3624128,     0000003P0000035
+-2.63717686,3.42502E-02,-0.34670076,-2.64685205,0.,-0.34025493,  0000003P0000036
+-1.8411301,1.128321E-02,-0.239820764,-1.873515,1.087699E-02,     0000003P0000037
+-0.239825209,-1.9382791,1.087699E-02,-0.239834175,-1.9848368,    0000003P0000038
+9.669704E-03,-0.239842735,-2.0069992,6.848881E-03,-0.242259748,  0000003P0000039
+-2.0118352,5.641619E-03,-0.24346828,-2.0182799,2.014042E-03,     0000003P0000040
+-0.251930493,-2.0259314,2.014042E-03,-0.26603892,-2.0335879,     0000003P0000041
+2.014042E-03,-0.27933483,-2.0384195,2.014042E-03,-0.28819905,    0000003P0000042
+-2.0448693,2.014042E-03,-0.28981236,-2.0794531,2.155942E-02,     0000003P0000043
+-0.29787668,-2.1738078,5.278283E-02,-0.30796289,-2.3136218,      0000003P0000044
+8.945331E-02,-0.32449563,-2.4542456,0.1140338,-0.34426036,       0000003P0000045
+-2.57714109,0.1104062,-0.3491078,-2.63556549,7.817007E-02,       0000003P0000046
+-0.33662631,-2.660955046,3.182991E-02,-0.32051344,-2.660955046,  0000003P0000047
+0.,-0.32051344,-1.8411296,2.014616E-02,-0.23310917,-1.8735145,   0000003P0000048
+1.933941E-02,-0.23311364,-1.9382844,1.853263E-02,-0.23312258,    0000003P0000049
+-1.9872519,1.571755E-02,-0.23419888,-2.0094147,1.128321E-02,     0000003P0000050
+-0.23702214,-2.014251,9.669704E-03,-0.23863692,-2.0215018,       0000003P0000051
+2.780741E-03,-0.246291617,-2.0307661,2.014042E-03,-0.25959179,   0000003P0000052
+-2.038823,2.014042E-03,-0.27288737,-2.0440647,2.014042E-03,      0000003P0000053
+-0.27973714,-2.0513153,2.014042E-03,-0.28094918,-2.0855003,      0000003P0000054
+2.169102E-02,-0.28471115,-2.1818662,5.358958E-02,-0.27935259,    0000003P0000055
+-2.3140284,0.1003303,-0.2813821,-2.4542511,0.1341799,            0000003P0000056
+-0.28825046,-2.58319229,0.1386086,-0.29068343,-2.64685584,       0000003P0000057
+8.381169E-02,-0.29472149,-2.666596513,3.182991E-02,-0.29553292,  0000003P0000058
+-2.666596513,0.,-0.29553292,-1.8411293,2.820805E-02,-0.22652348, 0000003P0000059
+-1.8735142,2.659454E-02,-0.22652795,-1.9382841,2.458049E-02,     0000003P0000060
+-0.22653689,-1.9888655,2.055241E-02,-0.22815525,-2.0118358,      0000003P0000061
+1.651859E-02,-0.23178453,-2.0166665,1.369779E-02,-0.23339931,    0000003P0000062
+-2.0243224,6.042127E-03,-0.239846367,-2.0346502,2.014042E-03,    0000003P0000063
+-0.252390284,-2.0440621,2.014042E-03,-0.26442582,-2.0488986,     0000003P0000064
+2.014042E-03,-0.27248326,-2.0565547,2.014042E-03,-0.27288819,    0000003P0000065
+-2.0895265,7.792974E-03,-0.27645102,-2.1879153,2.458049E-02,     0000003P0000066
+-0.26202737,-2.3136316,5.358958E-02,-0.254387384,-2.4538562,     0000003P0000067
+0.1019438,-0.244731422,-2.58319533,0.112014,-0.244348949,        0000003P0000068
+-2.64363816,6.165149E-02,-0.255233153,-2.661364494,2.900911E-02, 0000003P0000069
+-0.26813071,-2.661364494,0.,-0.26813071,-1.8411329,3.787775E-02, 0000003P0000070
+-0.21806107,-1.8735178,3.586371E-02,-0.21806551,-1.938282,       0000003P0000071
+3.263666E-02,-0.21807448,-1.9918544,2.961561E-02,-0.21808228,    0000003P0000072
+-2.0157525,2.458049E-02,-0.22262047,-2.0215042,2.21602E-02,      0000003P0000073
+-0.22413141,-2.0308896,1.853263E-02,-0.22607957,-2.0452261,      0000003P0000074
+1.168375E-02,-0.22997428,-2.0600373,6.614312E-03,-0.23360536,    0000003P0000075
+-2.0687585,5.584393E-03,-0.23505064,-2.0771076,4.548759E-03,     0000003P0000076
+-0.23622732,-2.1075961,4.29702E-03,-0.239723812,-2.1955747,      0000003P0000077
+2.578778E-02,-0.23503096,-2.3136336,3.868453E-02,-0.23182093,    0000003P0000078
+-2.4538556,6.285878E-02,-0.22539201,-2.57393393,7.534928E-02,    0000003P0000079
+-0.2217796,-2.62711813,1.531132E-02,-0.23508504,-2.6444467,      0000003P0000080
+1.613535E-03,-0.244755933,-2.6444467,0.,-0.244755933,-1.8411331, 0000003P0000081
+4.351937E-02,-0.21201891,-1.8740329,4.150533E-02,-0.21202293,    0000003P0000082
+-1.9382821,3.747153E-02,-0.21203233,-1.9928616,3.411286E-02,     0000003P0000083
+-0.21203921,-2.0199368,3.270532E-02,-0.21204287,-2.0271461,      0000003P0000084
+3.223616E-02,-0.21204193,-2.0373079,3.182991E-02,-0.21204401,    0000003P0000085
+-2.0539924,3.102887E-02,-0.21204579,-2.0704824,3.022211E-02,     0000003P0000086
+-0.21204774,-2.0797402,2.983304E-02,-0.21205066,-2.0889979,      0000003P0000087
+2.944967E-02,-0.21204782,-2.1198208,2.833967E-02,-0.21205374,    0000003P0000088
+-2.2024253,2.699504E-02,-0.21206339,-2.3136383,3.344342E-02,     0000003P0000089
+-0.21208099,-2.4538605,4.634017E-02,-0.21209473,-2.5674881,      0000003P0000090
+3.062262E-02,-0.21211013,-2.62027095,1.007595E-02,-0.21211866,   0000003P0000091
+-2.63558227,1.613535E-03,-0.21211599,-2.63558227,0.,-0.21211599, 0000003P0000092
+-1.8411358,7.534928E-02,-0.17776872,-1.8735207,7.051441E-02,     0000003P0000093
+-0.17777319,-1.9382906,6.406604E-02,-0.17778213,-1.9967497,      0000003P0000094
+5.970039E-02,-0.178123,-2.0261365,5.73831E-02,-0.17818175,       0000003P0000095
+-2.0356002,5.658777E-02,-0.17819018,-2.0450697,5.579245E-02,     0000003P0000096
+-0.17819861,-2.0633448,5.41446E-02,-0.17819892,-2.0815228,       0000003P0000097
+5.216489E-02,-0.17820504,-2.0914099,5.107203E-02,-0.17820166,    0000003P0000098
+-2.1012913,4.99792E-02,-0.17820397,-2.1333272,4.640311E-02,      0000003P0000099
+-0.17820877,-2.2108908,3.868453E-02,-0.17821736,-2.3132352,      0000003P0000100
+2.981587E-02,-0.17823168,-2.4538636,1.813212E-02,-0.17825078,    0000003P0000101
+-2.56668444,5.641619E-03,-0.17826117,-2.61181166,2.820797E-03,   0000003P0000102
+-0.17827105,-2.63075053,1.613535E-03,-0.17827077,-2.63075053,0., 0000003P0000103
+-0.17827077,-1.8411412,0.1136276,-0.1278067,-1.8735262,          0000003P0000104
+0.1091989,-0.1278111,-1.938296,9.831624E-02,-0.1278201,          0000003P0000105
+-1.996566,8.824603E-02,-0.1278236,-2.0288765,8.340545E-02,       0000003P0000106
+-0.1278281,-2.0394846,8.186631E-02,-0.1278298,-2.0500927,        0000003P0000107
+8.032145E-02,-0.1278314,-2.070977,7.736332E-02,-0.127835,        0000003P0000108
+-2.0917296,7.441093E-02,-0.1278388,-2.1026181,7.286605E-02,      0000003P0000109
+-0.1278402,-2.1135065,7.132119E-02,-0.1278416,-2.1477052,        0000003P0000110
+6.648633E-02,-0.1278444,-2.2205656,5.64104E-02,-0.1278516,       0000003P0000111
+-2.317274,3.908504E-02,-0.1278654,-2.4550759,2.055241E-02,       0000003P0000112
+-0.1278814,-2.56911053,9.269171E-03,-0.1287031,-2.60819559,      0000003P0000113
+6.042127E-03,-0.1287071,-2.627535,4.028084E-03,-0.1287122,       0000003P0000114
+-2.627535,0.,-0.1287122,-1.8411536,0.1692312,-4.19809E-02,       0000003P0000115
+-1.8735386,0.1607745,-4.19853E-02,-1.9383084,0.1462642,          0000003P0000116
+-4.19943E-02,-1.9967672,0.1328353,-4.19976E-02,-2.0312921,       0000003P0000117
+0.1250767,-4.20058E-02,-2.0427469,0.1225248,-4.20067E-02,        0000003P0000118
+-2.0542018,0.1199729,-4.20076E-02,-2.0770086,0.1149378,          0000003P0000119
+-4.20094E-02,-2.0996552,0.1100343,-4.20114E-02,-2.1113618,       0000003P0000120
+0.1074653,-4.20121E-02,-2.1230684,0.1048962,-4.20127E-02,        0000003P0000121
+-2.1593327,9.684004E-02,-4.20193E-02,-2.2318612,7.937736E-02,    0000003P0000122
+-4.20268E-02,-2.3289702,5.600987E-02,-4.20403E-02,-2.4655648,    0000003P0000123
+2.900911E-02,-4.20574E-02,-2.58846162,1.047646E-02,-4.20756E-02, 0000003P0000124
+-2.62794719,5.641619E-03,-4.20793E-02,-2.64527255,4.028084E-03,  0000003P0000125
+-4.20805E-02,-2.64527255,0.,-4.20805E-02,-1.8411636,0.211137,    0000003P0000126
+4.02173E-02,-1.8735486,0.2034814,4.02129E-02,-1.938317,          0000003P0000127
+0.1869628,4.18175E-02,-1.9967793,0.1715199,4.41601E-02,          0000003P0000128
+-2.0331165,0.1627027,4.57213E-02,-2.045269,0.1597389,            0000003P0000129
+4.62074E-02,-2.0574215,0.156775,4.66878E-02,-2.0818066,          0000003P0000130
+0.1508016,4.75914E-02,-2.1059632,0.1444218,4.81629E-02,          0000003P0000131
+-2.1184477,0.1411032,4.84548E-02,-2.1309379,0.1377904,           0000003P0000132
+4.87467E-02,-2.1696216,0.1274627,4.96463E-02,-2.2471825,         0000003P0000133
+0.1079859,5.26588E-02,-2.3515437,8.09909E-02,5.58677E-02,        0000003P0000134
+-2.4921706,4.593394E-02,5.74621E-02,-2.61627429,1.933941E-02,    0000003P0000135
+5.78512E-02,-2.672284183,1.087699E-02,5.78456E-02,-2.69485063,   0000003P0000136
+4.028084E-03,5.78436E-02,-2.69485063,0.,5.78436E-02,-1.8411737,  0000003P0000137
+0.246194,0.1224213,-1.8735586,0.2385383,0.1224168,-1.9383255,    0000003P0000138
+0.2244343,0.1256292,-1.9967878,0.21235,0.1279718,-2.034938,      0000003P0000139
+0.2032811,0.1304387,-2.0478282,0.2001513,0.131326,-2.0607184,    0000003P0000140
+0.1970216,0.1322133,-2.0868535,0.1905389,0.1341256,-2.1130285,   0000003P0000141
+0.1839932,0.1361008,-2.1266452,0.1806231,0.137189,-2.1402676,    0000003P0000142
+0.1772588,0.138283,-2.1827252,0.1668853,0.1418525,-2.2693611,    0000003P0000143
+0.147071,0.1505722,-2.3954806,0.1172551,0.1614339,-2.5437597,    0000003P0000144
+7.897685E-02,0.1730998,-2.68519165,4.150533E-02,0.1827512,       0000003P0000145
+-2.75248379,1.813212E-02,0.1899997,-2.7734371,4.028084E-03,      0000003P0000146
+0.1895956,-2.7734367,0.,0.1899962,-1.8411863,0.2844723,          0000003P0000147
+0.2017987,-1.8735713,0.2776234,0.2017942,-1.9383381,0.2643261,   0000003P0000148
+0.2050066,-1.9967992,0.2508972,0.2086938,-2.0370607,0.2448036,   0000003P0000149
+0.2112084,-2.0508606,0.2427781,0.2121538,-2.0646605,0.2407526,   0000003P0000150
+0.2131049,-2.0930096,0.2367245,0.2152137,-2.1220676,0.2318897,   0000003P0000151
+0.218067,-2.1374174,0.2292405,0.219752,-2.1527672,0.2265857,     0000003P0000152
+0.2214368,-2.2012706,0.217923,0.227272,-2.3040248,0.1994533,     0000003P0000153
+0.2420315,-2.4551299,0.173265,0.2665911,-2.61066653,0.1345805,   0000003P0000154
+0.2879278,-2.7710322,8.13914E-02,0.303221,-2.8673392,            0000003P0000155
+4.029232E-02,0.310462,-2.9197186,4.028084E-03,0.3136801,         0000003P0000156
+-2.9225432,0.,0.3156911,-1.8410594,0.3328265,0.3146194,          0000003P0000157
+-1.8736931,0.3296052,0.3178422,-1.9383544,0.3231568,0.3238753,   0000003P0000158
+-1.9966245,0.3161019,0.329914,-2.0399682,0.3109695,0.334291,     0000003P0000159
+-2.0551008,0.3091615,0.335804,-2.0702276,0.3073591,0.3373228,    0000003P0000160
+-2.1018601,0.3035656,0.340476,-2.1352376,0.2994346,0.3437625,    0000003P0000161
+-2.1530419,0.2970886,0.345524,-2.1708463,0.2947485,0.3472855,    0000003P0000162
+-2.2276119,0.2868925,0.3529166,-2.3491699,0.2667464,0.3653246,   0000003P0000163
+-2.5192064,0.2280619,0.3794081,-2.68159585,0.1817274,0.3902689,  0000003P0000164
+-2.8443828,0.1273311,0.3983035,-2.9801719,6.447229E-02,          0000003P0000165
+0.4031263,-3.0595482,4.028084E-03,0.4043209,-3.0595482,0.,       0000003P0000166
+0.4043209,1.961052526,10.,0.,11.;                                0000003P0000167
+508,4,0,7,1,0,1,0,19,0,7,2,1,1,0,21,0,7,3,1,1,0,23,0,7,4,0,1,0,  0000005P0000168
+25;                                                              0000005P0000169
+504,4,9,11,1,11,2,13,11,1,11,3,15,11,3,11,4,17,11,2,11,4;        0000007P0000170
+126,80,5,1,0,1,0,0.,0.,0.,0.,0.,0.,1.294475869E-02,              0000009P0000171
+1.294475869E-02,1.294475869E-02,3.677477935E-02,3.677477935E-02, 0000009P0000172
+3.677477935E-02,6.37318215E-02,6.37318215E-02,6.37318215E-02,    0000009P0000173
+9.214808598E-02,9.214808598E-02,9.214808598E-02,0.121332981,     0000009P0000174
+0.121332981,0.121332981,0.151632798,0.151632798,0.151632798,     0000009P0000175
+0.1758443,0.1758443,0.1758443,0.210359082,0.210359082,           0000009P0000176
+0.210359082,0.246682918,0.246682918,0.246682918,0.284528737,     0000009P0000177
+0.284528737,0.284528737,0.32376227,0.32376227,0.32376227,        0000009P0000178
+0.364138741,0.364138741,0.364138741,0.405236432,0.405236432,     0000009P0000179
+0.405236432,0.436342998,0.436342998,0.436342998,0.478039961,     0000009P0000180
+0.478039961,0.478039961,0.519962761,0.519962761,0.519962761,     0000009P0000181
+0.562315329,0.562315329,0.562315329,0.605309669,0.605309669,     0000009P0000182
+0.605309669,0.649262383,0.649262383,0.649262383,0.68324956,      0000009P0000183
+0.68324956,0.68324956,0.730724691,0.730724691,0.730724691,       0000009P0000184
+0.780978326,0.780978326,0.780978326,0.83357938,0.83357938,       0000009P0000185
+0.83357938,0.887890113,0.887890113,0.887890113,0.943462915,      0000009P0000186
+0.943462915,0.943462915,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000009P0000187
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000009P0000188
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000009P0000189
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000009P0000190
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,-2.010663,-5.551115123E-17,     0000009P0000191
+-0.249154631,-2.010663,2.192562401E-03,-0.248088702,-2.010663,   0000009P0000192
+2.470301091E-04,-0.249728099,-2.010663,3.029158552E-03,          0000009P0000193
+-0.246771216,-2.010663,3.365191399E-03,-0.24532699,-2.010663,    0000009P0000194
+5.118457775E-03,-0.244005691,-2.010663,5.873359039E-03,          0000009P0000195
+-0.243052269,-2.010663,7.654583196E-03,-0.241150896,-2.010663,   0000009P0000196
+9.260894273E-03,-0.239319174,-2.010663,1.010271957E-02,          0000009P0000197
+-0.238369708,-2.010663,1.181875623E-02,-0.236455061,-2.010663,   0000009P0000198
+1.366229627E-02,-0.234480774,-2.010663,1.466167086E-02,          0000009P0000199
+-0.233419917,-2.010663,1.683693655E-02,-0.231118987,-2.010663,   0000009P0000200
+1.927946131E-02,-0.228503427,-2.010663,2.055775019E-02,          0000009P0000201
+-0.227114481,-2.010663,2.317581602E-02,-0.22422026,-2.010663,    0000009P0000202
+2.577881373E-02,-0.221216062,-2.010663,2.700662073E-02,          0000009P0000203
+-0.219762714,-2.010663,2.933857512E-02,-0.216890233,-2.010663,   0000009P0000204
+3.198439543E-02,-0.213478303,-2.010663,3.324359497E-02,          0000009P0000205
+-0.211833043,-2.010663,3.649498241E-02,-0.207551568,-2.010663,   0000009P0000206
+4.020176937E-02,-0.202626255,-2.010663,4.245514765E-02,          0000009P0000207
+-0.199578673,-2.010663,4.719737423E-02,-0.193029949,-2.010663,   0000009P0000208
+5.208215231E-02,-0.185939422,-2.010663,5.460381E-02,-0.1821745,  0000009P0000209
+-2.010663,5.974289917E-02,-0.174236858,-2.010663,                0000009P0000210
+6.481581795E-02,-0.165808396,-2.010663,6.739620648E-02,          0000009P0000211
+-0.161387569,-2.010663,7.264475388E-02,-0.152160249,-2.010663,   0000009P0000212
+7.790443011E-02,-0.142521964,-2.010663,8.059095806E-02,          0000009P0000213
+-0.137520136,-2.010663,8.606806939E-02,-0.127200057,-2.010663,   0000009P0000214
+9.161316932E-02,-0.116564868,-2.010663,9.444582827E-02,          0000009P0000215
+-0.111101167,-2.010663,0.100168309,-9.9976354E-02,-2.010663,     0000009P0000216
+0.105899822,-8.8671986E-02,-2.010663,0.108786628,-8.2933649E-02, 0000009P0000217
+-2.010663,0.113846214,-7.2796344E-02,-2.010663,0.118870191,      0000009P0000218
+-6.2576161E-02,-2.010663,0.121026173,-5.8159836E-02,-2.010663,   0000009P0000219
+0.126048143,-4.7797163E-02,-2.010663,0.131002538,-3.7381703E-02, 0000009P0000220
+-2.010663,0.13381038,-3.1401874E-02,-2.010663,0.139406089,       0000009P0000221
+-1.938662E-02,-2.010663,0.144935488,-7.346199E-03,-2.010663,     0000009P0000222
+0.147694038,-1.311825E-03,-2.010663,0.153216264,1.0798475E-02,   0000009P0000223
+-2.010663,0.158704444,2.2838304E-02,-2.010663,0.16145713,        0000009P0000224
+2.8863258E-02,-2.010663,0.166999357,4.09423E-02,-2.010663,       0000009P0000225
+0.172546846,5.2866785E-02,-2.010663,0.17534571,5.8833209E-02,    0000009P0000226
+-2.010663,0.1810135,7.0837885E-02,-2.010663,0.186703971,         0000009P0000227
+8.2735759E-02,-2.010663,0.189584341,8.8724639E-02,-2.010663,     0000009P0000228
+0.194712178,9.9331819E-02,-2.010663,0.199840153,0.109860809,     0000009P0000229
+-2.010663,0.202072482,0.114430279,-2.010663,0.207403955,         0000009P0000230
+0.125321152,-2.010663,0.212701313,0.136082592,-2.010663,         0000009P0000231
+0.215756708,0.142292757,-2.010663,0.222083463,0.155063531,       0000009P0000232
+-2.010663,0.228501245,0.167835869,-2.010663,0.231834725,         0000009P0000233
+0.174416948,-2.010663,0.238710655,0.187900455,-2.010663,         0000009P0000234
+0.245687148,0.201417533,-2.010663,0.24927514,0.208333764,        0000009P0000235
+-2.010663,0.256599596,0.222391991,-2.010663,0.26397988,          0000009P0000236
+0.236452081,-2.010663,0.267739885,0.243592745,-2.010663,         0000009P0000237
+0.275365647,0.258035896,-2.010663,0.283020109,0.272464637,       0000009P0000238
+-2.010663,0.286896874,0.279757443,-2.010663,0.294726199,         0000009P0000239
+0.294459216,-2.010663,0.302569582,0.309140062,-2.010663,         0000009P0000240
+0.306527711,0.316538185,-2.010663,0.310488219,0.32393137,        0000009P0000241
+-2.010663,0.314450974,0.331320309,0.,1.,1.,6.160081347E-14,      0000009P0000242
+1.026680225E-14;                                                 0000009P0000243
+502,4,-2.010663,0.,-0.249154631,-2.010663,0.314450974,           0000011P0000244
+0.331320309,-2.63758308,0.,-0.34670038,-3.0595482,0.,0.4043209;  0000011P0000245
+126,13,3,1,0,1,0,2.537723945,2.537723945,2.537723945,            0000013P0000246
+2.537723945,3.,3.5,3.5,3.5,4.,5.,6.,7.,8.,9.,10.,10.,10.,10.,1., 0000013P0000247
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,-2.010663,0.,             0000013P0000248
+-0.249154631,-2.015662672,0.,-0.257394113,-2.023366971,0.,       0000013P0000249
+-0.270535264,-2.0299576,0.,-0.28255951,-2.0351962,0.,            0000013P0000250
+-0.29223011,-2.0420465,0.,-0.29384303,-2.0783114,0.,-0.30069283, 0000013P0000251
+-2.1701788,0.,-0.3188375,-2.3136179,0.,-0.34504804,-2.4542424,   0000013P0000252
+0.,-0.3656138,-2.56746892,0.,-0.3712712,-2.61219314,0.,          0000013P0000253
+-0.3624128,-2.62871465,0.,-0.3531455,-2.63758308,0.,-0.34670038, 0000013P0000254
+2.537723945,10.,0.,1.,0.;                                        0000013P0000255
+126,13,3,1,0,1,0,0.,0.,0.,0.,1.,2.,3.,4.,5.,6.,7.,8.,9.,10.,11., 0000015P0000256
+11.,11.,11.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,           0000015P0000257
+-2.63758308,0.,-0.34670038,-2.64685205,0.,-0.34025493,           0000015P0000258
+-2.660955046,0.,-0.32051344,-2.666596513,0.,-0.29553292,         0000015P0000259
+-2.661364494,0.,-0.26813071,-2.6444467,0.,-0.244755933,          0000015P0000260
+-2.63558227,0.,-0.21211599,-2.63075053,0.,-0.17827077,-2.627535, 0000015P0000261
+0.,-0.1287122,-2.64527255,0.,-4.20805E-02,-2.69485063,0.,        0000015P0000262
+5.78436E-02,-2.7734367,0.,0.1899962,-2.9225432,0.,0.3156911,     0000015P0000263
+-3.0595482,0.,0.4043209,0.,11.,0.,-1.,0.;                        0000015P0000264
+126,17,3,0,0,1,0,1.983431199,1.983431199,1.983431199,            0000017P0000265
+1.983431199,2.,2.5,2.5,2.5,3.,3.5,3.5,3.5,4.,5.,6.,7.,8.,9.,10., 0000017P0000266
+10.,10.,10.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000017P0000267
+1.,-2.010663,0.314450974,0.331320309,-2.0111089,0.31439781,      0000017P0000268
+0.331365701,-2.025041531,0.312736992,0.332783652,-2.0399682,     0000017P0000269
+0.3109695,0.334291,-2.0551008,0.3091615,0.335804,-2.0702276,     0000017P0000270
+0.3073591,0.3373228,-2.1018601,0.3035656,0.340476,-2.1352376,    0000017P0000271
+0.2994346,0.3437625,-2.1530419,0.2970886,0.345524,-2.1708463,    0000017P0000272
+0.2947485,0.3472855,-2.2276119,0.2868925,0.3529166,-2.3491699,   0000017P0000273
+0.2667464,0.3653246,-2.5192064,0.2280619,0.3794081,-2.68159585,  0000017P0000274
+0.1817274,0.3902689,-2.8443828,0.1273311,0.3983035,-2.9801719,   0000017P0000275
+6.447229E-02,0.4031263,-3.0595482,4.028084E-03,0.4043209,        0000017P0000276
+-3.0595482,0.,0.4043209,1.983431199,10.,0.115177732,             0000017P0000277
+-0.166179628,0.979345915;                                        0000017P0000278
+126,312,8,1,0,1,0,0.,0.,0.,0.,0.,0.,0.,0.,0.,8.516307792E-02,    0000019P0000279
+8.516307792E-02,8.516307792E-02,8.516307792E-02,8.516307792E-02, 0000019P0000280
+8.516307792E-02,8.516307792E-02,8.516307792E-02,0.170326155,     0000019P0000281
+0.170326155,0.170326155,0.170326155,0.170326155,0.170326155,     0000019P0000282
+0.170326155,0.170326155,0.227101539,0.227101539,0.227101539,     0000019P0000283
+0.227101539,0.227101539,0.227101539,0.227101539,0.227101539,     0000019P0000284
+0.259544616,0.259544616,0.259544616,0.259544616,0.259544616,     0000019P0000285
+0.259544616,0.259544616,0.259544616,0.296622419,0.296622419,     0000019P0000286
+0.296622419,0.296622419,0.296622419,0.296622419,0.296622419,     0000019P0000287
+0.296622419,0.306190884,0.306190884,0.306190884,0.306190884,     0000019P0000288
+0.306190884,0.306190884,0.306190884,0.306190884,0.316068009,     0000019P0000289
+0.316068009,0.316068009,0.316068009,0.316068009,0.316068009,     0000019P0000290
+0.316068009,0.316068009,0.337139209,0.337139209,0.337139209,     0000019P0000291
+0.337139209,0.337139209,0.337139209,0.337139209,0.337139209,     0000019P0000292
+0.359615157,0.359615157,0.359615157,0.359615157,0.359615157,     0000019P0000293
+0.359615157,0.359615157,0.359615157,0.3835895,0.3835895,         0000019P0000294
+0.3835895,0.3835895,0.3835895,0.3835895,0.3835895,0.3835895,     0000019P0000295
+0.409162134,0.409162134,0.409162134,0.409162134,0.409162134,     0000019P0000296
+0.409162134,0.409162134,0.409162134,0.436439609,0.436439609,     0000019P0000297
+0.436439609,0.436439609,0.436439609,0.436439609,0.436439609,     0000019P0000298
+0.436439609,0.450518306,0.450518306,0.450518306,0.450518306,     0000019P0000299
+0.450518306,0.450518306,0.450518306,0.450518306,0.514878064,     0000019P0000300
+0.514878064,0.514878064,0.514878064,0.514878064,0.514878064,     0000019P0000301
+0.514878064,0.514878064,0.549203268,0.549203268,0.549203268,     0000019P0000302
+0.549203268,0.549203268,0.549203268,0.549203268,0.549203268,     0000019P0000303
+0.566919503,0.566919503,0.566919503,0.566919503,0.566919503,     0000019P0000304
+0.566919503,0.566919503,0.566919503,0.585207229,0.585207229,     0000019P0000305
+0.585207229,0.585207229,0.585207229,0.585207229,0.585207229,     0000019P0000306
+0.585207229,0.624221044,0.624221044,0.624221044,0.624221044,     0000019P0000307
+0.624221044,0.624221044,0.624221044,0.624221044,0.644357206,     0000019P0000308
+0.644357206,0.644357206,0.644357206,0.644357206,0.644357206,     0000019P0000309
+0.644357206,0.644357206,0.665142923,0.665142923,0.665142923,     0000019P0000310
+0.665142923,0.665142923,0.665142923,0.665142923,0.665142923,     0000019P0000311
+0.675700747,0.675700747,0.675700747,0.675700747,0.675700747,     0000019P0000312
+0.675700747,0.675700747,0.675700747,0.686426156,0.686426156,     0000019P0000313
+0.686426156,0.686426156,0.686426156,0.686426156,0.686426156,     0000019P0000314
+0.686426156,0.708568935,0.708568935,0.708568935,0.708568935,     0000019P0000315
+0.708568935,0.708568935,0.708568935,0.708568935,0.731425997,     0000019P0000316
+0.731425997,0.731425997,0.731425997,0.731425997,0.731425997,     0000019P0000317
+0.731425997,0.731425997,0.755020384,0.755020384,0.755020384,     0000019P0000318
+0.755020384,0.755020384,0.755020384,0.755020384,0.755020384,     0000019P0000319
+0.760965426,0.760965426,0.760965426,0.760965426,0.760965426,     0000019P0000320
+0.760965426,0.760965426,0.760965426,0.785512698,0.785512698,     0000019P0000321
+0.785512698,0.785512698,0.785512698,0.785512698,0.785512698,     0000019P0000322
+0.785512698,0.797981154,0.797981154,0.797981154,0.797981154,     0000019P0000323
+0.797981154,0.797981154,0.797981154,0.797981154,0.823722481,     0000019P0000324
+0.823722481,0.823722481,0.823722481,0.823722481,0.823722481,     0000019P0000325
+0.823722481,0.823722481,0.826952766,0.826952766,0.826952766,     0000019P0000326
+0.826952766,0.826952766,0.826952766,0.826952766,0.826952766,     0000019P0000327
+0.833464205,0.833464205,0.833464205,0.833464205,0.833464205,     0000019P0000328
+0.833464205,0.833464205,0.833464205,0.846693795,0.846693795,     0000019P0000329
+0.846693795,0.846693795,0.846693795,0.846693795,0.846693795,     0000019P0000330
+0.846693795,0.853360675,0.853360675,0.853360675,0.853360675,     0000019P0000331
+0.853360675,0.853360675,0.853360675,0.853360675,0.866906083,     0000019P0000332
+0.866906083,0.866906083,0.866906083,0.866906083,0.866906083,     0000019P0000333
+0.866906083,0.866906083,0.870305715,0.870305715,0.870305715,     0000019P0000334
+0.870305715,0.870305715,0.870305715,0.870305715,0.870305715,     0000019P0000335
+0.994635102,0.994635102,0.994635102,0.994635102,0.994635102,     0000019P0000336
+0.994635102,0.994635102,0.994635102,0.995607375,0.995607375,     0000019P0000337
+0.995607375,0.995607375,0.995607375,0.995607375,0.995607375,     0000019P0000338
+0.995607375,0.999511718,0.999511718,0.999511718,0.999511718,     0000019P0000339
+0.999511718,0.999511718,0.999511718,0.999511718,1.,1.,1.,1.,1.,  0000019P0000340
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000341
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000342
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000343
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000344
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000345
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000346
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000347
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000348
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000349
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000350
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000351
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000352
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000353
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000354
+1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,  0000019P0000355
+1.,1.,1.983431198,10.999999998,0.,1.984239761,10.980949557,0.,   0000019P0000356
+1.985055869,10.961335918,0.,1.985879815,10.94111261,0.,          0000019P0000357
+1.986711913,10.920228083,0.,1.987552497,10.898623092,0.,         0000019P0000358
+1.98840192,10.876228074,0.,1.989260547,10.852960527,0.,          0000019P0000359
+1.990128755,10.828722388,0.,1.990996967,10.804484071,0.,         0000019P0000360
+1.991874745,10.779275206,0.,1.992762521,10.752995452,0.,         0000019P0000361
+1.993660679,10.725528191,0.,1.994569596,10.696736577,0.,         0000019P0000362
+1.995489774,10.666451536,0.,1.996421846,10.634460177,0.,         0000019P0000363
+1.997366229,10.600503086,0.,1.997995822,10.577864908,0.,         0000019P0000364
+1.998630919,10.554351929,0.,1.999271649,10.529886582,0.,         0000019P0000365
+1.99991834,10.504374634,0.,2.000570641,10.477728763,0.,          0000019P0000366
+2.001229385,10.449817468,0.,2.001893615,10.420543263,0.,         0000019P0000367
+2.002563803,10.389765756,0.,2.002946762,10.372178802,0.,         0000019P0000368
+2.003331639,10.354101413,0.,2.003718407,10.335510738,0.,         0000019P0000369
+2.00410702,10.316383231,0.,2.004497414,10.296696507,0.,          0000019P0000370
+2.004889508,10.276431198,0.,2.005283204,10.255572817,0.,         0000019P0000371
+2.005678388,10.234113617,0.,2.00613004,10.209588818,0.,          0000019P0000372
+2.006583525,10.184280468,0.,2.007038973,10.158176055,0.,         0000019P0000373
+2.007496196,10.131278482,0.,2.007952471,10.103629097,0.,         0000019P0000374
+2.008411397,10.075250463,0.,2.008870947,10.046243496,0.,         0000019P0000375
+2.009330215,10.016732675,0.,2.009448733,10.009116986,0.,         0000019P0000376
+2.009567213,10.001468179,0.,2.0096857,9.993785556,0.,            0000019P0000377
+2.009803887,9.986082806,0.,2.009922268,9.978340462,0.,           0000019P0000378
+2.010040498,9.970573418,0.,2.010158793,9.962773369,0.,           0000019P0000379
+2.010277097,9.954942907,0.,2.01039922,9.946859787,0.,            0000019P0000380
+2.010521364,9.938743722,0.,2.010643547,9.93059447,0.,            0000019P0000381
+2.010765773,9.92241189,0.,2.010888037,9.914195943,0.,            0000019P0000382
+2.011010325,9.905946692,0.,2.011132608,9.897664301,0.,           0000019P0000383
+2.011254851,9.889349036,0.,2.011515635,9.871609745,0.,           0000019P0000384
+2.01177614,9.853721796,0.,2.012036498,9.835683479,0.,            0000019P0000385
+2.012296796,9.817494013,0.,2.012557079,9.799153554,0.,           0000019P0000386
+2.012817345,9.780663189,0.,2.013077552,9.762024938,0.,           0000019P0000387
+2.013337612,9.743241754,0.,2.013615012,9.723206388,0.,           0000019P0000388
+2.013892316,9.703005814,0.,2.014169365,9.68264438,0.,2.01444617, 0000019P0000389
+9.662125722,0.,2.014722763,9.641455413,0.,2.014999124,           0000019P0000390
+9.620641256,0.,2.015275179,9.599691231,0.,2.015550874,           0000019P0000391
+9.578609086,0.,2.015844955,9.556121517,0.,2.016138657,           0000019P0000392
+9.53348401,0.,2.016431969,9.510702805,0.,2.016724922,            0000019P0000393
+9.487782881,0.,2.017017576,9.464728446,0.,2.017310008,           0000019P0000394
+9.441543434,0.,2.017602294,9.418231991,0.,2.017894501,           0000019P0000395
+9.394798975,0.,2.018206182,9.369803695,0.,2.018517785,           0000019P0000396
+9.344670595,0.,2.018829412,9.319403822,0.,2.019140967,           0000019P0000397
+9.294012557,0.,2.019452517,9.268506214,0.,2.019764366,           0000019P0000398
+9.242892519,0.,2.020076824,9.217178435,0.,2.020389673,           0000019P0000399
+9.191373965,0.,2.020723403,9.163849185,0.,2.021057661,           0000019P0000400
+9.136220573,0.,2.021392708,9.108502014,0.,2.021728027,           0000019P0000401
+9.080694383,0.,2.022064986,9.052832337,0.,2.02240181,            0000019P0000402
+9.024893084,0.,2.022741238,8.996932592,0.,2.023081799,           0000019P0000403
+8.968928405,0.,2.023257582,8.954474807,0.,2.023433699,           0000019P0000404
+8.940010385,0.,2.023610182,8.925535466,0.,2.023787062,           0000019P0000405
+8.911050379,0.,2.023964372,8.896555453,0.,2.024142145,           0000019P0000406
+8.882051015,0.,2.024320413,8.867537393,0.,2.024499207,           0000019P0000407
+8.853014917,0.,2.025316517,8.786626368,0.,2.026144673,           0000019P0000408
+8.720052423,0.,2.026986203,8.653323171,0.,2.027841491,           0000019P0000409
+8.586463654,0.,2.02871436,8.519512223,0.,2.029608015,            0000019P0000410
+8.452511908,0.,2.030521909,8.385500032,0.,2.031460064,           0000019P0000411
+8.318521268,0.,2.03196038,8.28279932,0.,2.032467484,8.247085811, 0000019P0000412
+0.,2.032982203,8.211389961,0.,2.033504066,8.17571571,0.,         0000019P0000413
+2.034033132,8.140064498,0.,2.034570488,8.104437115,0.,           0000019P0000414
+2.035117402,8.068834639,0.,2.035673156,8.03325845,0.,            0000019P0000415
+2.035960004,8.014896741,0.,2.036249353,7.996540641,0.,           0000019P0000416
+2.036540862,7.978195581,0.,2.036834779,7.959856222,0.,           0000019P0000417
+2.037131438,7.941517524,0.,2.037430827,7.923176816,0.,           0000019P0000418
+2.037732608,7.904831636,0.,2.03803699,7.886478127,0.,            0000019P0000419
+2.038351187,7.867532619,0.,2.038668107,7.848578451,0.,           0000019P0000420
+2.038987765,7.829612197,0.,2.039310182,7.810630377,0.,           0000019P0000421
+2.03963538,7.791629457,0.,2.039963388,7.772605852,0.,            0000019P0000422
+2.040294239,7.75355592,0.,2.040627968,7.734475966,0.,            0000019P0000423
+2.041339901,7.693772116,0.,2.042065016,7.652931214,0.,           0000019P0000424
+2.042803537,7.611917448,0.,2.043555817,7.57069243,0.,2.04432234, 0000019P0000425
+7.529214466,0.,2.045103728,7.487437821,0.,2.045900751,           0000019P0000426
+7.445311995,0.,2.046714329,7.402780986,0.,2.047134242,           0000019P0000427
+7.380829428,0.,2.047558487,7.358770448,0.,2.04798735,            0000019P0000428
+7.336594957,0.,2.048420786,7.314296127,0.,2.04885883,            0000019P0000429
+7.291866066,0.,2.049301739,7.269294569,0.,2.049749877,           0000019P0000430
+7.246569973,0.,2.050203343,7.223682109,0.,2.050671436,           0000019P0000431
+7.200055919,0.,2.051145218,7.176255733,0.,2.051624881,           0000019P0000432
+7.152269558,0.,2.052110591,7.12808507,0.,2.052602533,            0000019P0000433
+7.103688945,0.,2.053100947,7.079066185,0.,2.053606168,           0000019P0000434
+7.05419944,0.,2.05411867,7.029068342,0.,2.054378989,7.016303367, 0000019P0000435
+0.,2.054641168,7.003469673,0.,2.054905394,6.990567981,0.,        0000019P0000436
+2.0551713,6.977583519,0.,2.055439763,6.964534522,0.,2.055710452, 0000019P0000437
+6.951406354,0.,2.055983861,6.938205931,0.,2.056260137,           0000019P0000438
+6.924930307,0.,2.056540821,6.911444062,0.,2.056824446,           0000019P0000439
+6.897880621,0.,2.057111295,6.884239407,0.,2.057401653,           0000019P0000440
+6.87051984,0.,2.057695804,6.856721341,0.,2.057994031,            0000019P0000441
+6.842843333,0.,2.058296619,6.828885237,0.,2.058603851,           0000019P0000442
+6.814846474,0.,2.059238173,6.785863293,0.,2.059892477,           0000019P0000443
+6.756536938,0.,2.060569494,6.726862904,0.,2.061272233,           0000019P0000444
+6.696837644,0.,2.062003984,6.666458567,0.,2.062768318,           0000019P0000445
+6.635724041,0.,2.063569082,6.604633392,0.,2.064410408,           0000019P0000446
+6.573186902,0.,2.065278904,6.540725963,0.,2.066190591,6.5078865, 0000019P0000447
+0.,2.067150126,6.474668219,0.,2.068162484,6.441073231,0.,        0000019P0000448
+2.069232962,6.407106049,0.,2.070367174,6.372773589,0.,           0000019P0000449
+2.071571058,6.338085169,0.,2.072850869,6.303052512,0.,           0000019P0000450
+2.074171939,6.266889902,0.,2.075573948,6.230360304,0.,           0000019P0000451
+2.077063446,6.19347863,0.,2.078648047,6.156258644,0.,            0000019P0000452
+2.080334801,6.118722383,0.,2.082131124,6.08089451,0.,            0000019P0000453
+2.084045402,6.042798064,0.,2.086084429,6.004468098,0.,           0000019P0000454
+2.086598181,5.994810133,0.,2.087119881,5.985137851,0.,           0000019P0000455
+2.087649489,5.975450784,0.,2.088186969,5.965748459,0.,           0000019P0000456
+2.088732283,5.956030407,0.,2.089285392,5.946296157,0.,           0000019P0000457
+2.089846259,5.936545239,0.,2.090414847,5.926777183,0.,           0000019P0000458
+2.092762564,5.886444393,0.,2.095241888,5.845819299,0.,           0000019P0000459
+2.097850175,5.80486631,0.,2.100584782,5.763548375,0.,            0000019P0000460
+2.103443131,5.721824703,0.,2.106422769,5.679648479,0.,           0000019P0000461
+2.109521433,5.636964582,0.,2.112737111,5.593707303,0.,           0000019P0000462
+2.11437048,5.571735231,0.,2.116034006,5.549615468,0.,            0000019P0000463
+2.117727522,5.527338289,0.,2.119450763,5.504894408,0.,           0000019P0000464
+2.121203412,5.482274285,0.,2.122985145,5.459467439,0.,           0000019P0000465
+2.124795681,5.436461755,0.,2.126634825,5.413242798,0.,           0000019P0000466
+2.130431756,5.365306965,0.,2.134350486,5.316463793,0.,           0000019P0000467
+2.138388831,5.266589867,0.,2.142544404,5.215541767,0.,           0000019P0000468
+2.146814514,5.163135359,0.,2.15119473,5.109154538,0.,            0000019P0000469
+2.155677975,5.053322892,0.,2.160255935,4.995171767,0.,           0000019P0000470
+2.160830434,4.987874404,0.,2.161406401,4.980541279,0.,           0000019P0000471
+2.161983857,4.973172191,0.,2.162562822,4.965766941,0.,           0000019P0000472
+2.163143319,4.958325329,0.,2.163725366,4.950847154,0.,           0000019P0000473
+2.164308986,4.943332216,0.,2.1648942,4.935780315,0.,2.166073846, 0000019P0000474
+4.920557578,0.,2.167259972,4.905184733,0.,2.168452757,           0000019P0000475
+4.889660236,0.,2.169652377,4.873982759,0.,2.170859011,           0000019P0000476
+4.858151192,0.,2.172072839,4.842164642,0.,2.173294042,           0000019P0000477
+4.826022432,0.,2.174522802,4.809724104,0.,2.17701933,            0000019P0000478
+4.776610035,0.,2.179547051,4.742851778,0.,2.182107486,           0000019P0000479
+4.708445723,0.,2.184702175,4.673393135,0.,2.187332641,           0000019P0000480
+4.637701151,0.,2.190000361,4.601383791,0.,2.192706738,           0000019P0000481
+4.564462963,0.,2.195453066,4.526969466,0.,2.196837055,           0000019P0000482
+4.50807492,0.,2.19823111,4.489036014,0.,2.199635679,4.469852715, 0000019P0000483
+0.,2.201050659,4.450533095,0.,2.202475917,4.43108612,0.,         0000019P0000484
+2.203911519,4.411518341,0.,2.205357687,4.391834486,0.,           0000019P0000485
+2.206814475,4.372041949,0.,2.209774303,4.331828494,0.,           0000019P0000486
+2.212778096,4.291164565,0.,2.215826947,4.250103437,0.,           0000019P0000487
+2.218922543,4.208694744,0.,2.22206608,4.166999273,0.,            0000019P0000488
+2.225257285,4.125099511,0.,2.228493528,4.083105959,0.,           0000019P0000489
+2.231769062,4.041159195,0.,2.232591154,4.030631439,0.,           0000019P0000490
+2.233415764,4.020105744,0.,2.234242548,4.00958967,0.,            0000019P0000491
+2.235072246,3.99906767,0.,2.235903005,3.988579271,0.,            0000019P0000492
+2.236736759,3.978082853,0.,2.237572614,3.967597442,0.,           0000019P0000493
+2.238410965,3.957114247,0.,2.268686384,3.586585898,0.,           0000019P0000494
+2.306476754,3.097603196,0.,2.327287014,3.075685891,0.,           0000019P0000495
+2.399342021,1.942197312,0.,2.359616172,2.143616437,0.,           0000019P0000496
+2.445786875,1.323805042,0.,2.478905511,0.768445753,0.,           0000019P0000497
+2.52286648,0.197888699,0.,2.523203055,0.193405813,0.,2.52353963, 0000019P0000498
+0.188922927,0.,2.523876204,0.184440041,0.,2.524212779,           0000019P0000499
+0.179957155,0.,2.524549354,0.175474269,0.,2.524885928,           0000019P0000500
+0.170991382,0.,2.525222503,0.166508496,0.,2.525559078,           0000019P0000501
+0.16202561,0.,2.526910656,0.144023746,0.,2.528262234,            0000019P0000502
+0.126021881,0.,2.529613813,0.108020017,0.,2.530965391,           0000019P0000503
+9.001815282E-02,0.,2.532316969,7.201628848E-02,0.,2.533668548,   0000019P0000504
+5.401442415E-02,0.,2.535020126,3.601255982E-02,0.,2.536371704,   0000019P0000505
+1.801069549E-02,0.,2.536539497,1.58040887E-02,0.,2.536718137,    0000019P0000506
+1.320519093E-02,0.,2.53685457,1.213278926E-02,0.,2.537086413,    0000019P0000507
+7.609639345E-03,0.,2.53718069,8.061970397E-03,0.,2.537405247,    0000019P0000508
+3.802333079E-03,0.,2.537550024,2.428190827E-03,0.,2.53772394,    0000019P0000509
+1.401645235E-07,0.,0.,1.,0.,0.,1.;                               0000019P0000510
+126,4,2,0,0,1,0,2.537723945,2.537723945,2.537723945,3.5,3.5,10., 0000021P0000511
+10.,10.,1.,1.,1.,1.,1.,2.537723945,0.,0.,3.018861968,0.,0.,3.5,  0000021P0000512
+0.,0.,6.75,-1.434890022E-17,-0.,10.,0.,0.,2.537723945,10.,0.,0., 0000021P0000513
+1.;                                                              0000021P0000514
+126,2,2,0,0,1,0,0.,0.,0.,11.,11.,11.,1.,1.,1.,10.,               0000023P0000515
+1.690495512E-13,0.,10.,5.5,0.,10.,11.,0.,0.,11.,0.,0.,1.;        0000023P0000516
+110,10.,11.,0.,1.983431199,11.,0.;                               0000025P0000517
+S      1G      4D     26P    517                                        T0000001
diff --git a/examples/step-54/doc/builds-on b/examples/step-54/doc/builds-on
new file mode 100644
index 0000000..4673935
--- /dev/null
+++ b/examples/step-54/doc/builds-on
@@ -0,0 +1 @@
+step-49 step-53
diff --git a/examples/step-54/doc/intro.dox b/examples/step-54/doc/intro.dox
new file mode 100644
index 0000000..bcd257c
--- /dev/null
+++ b/examples/step-54/doc/intro.dox
@@ -0,0 +1,190 @@
+<br>
+
+<i>This program was contributed by Andrea Mola and Luca Heltai.</i>
+
+ at note This program elaborates on concepts of industrial geometry, using tools
+that interface with the OpenCASCADE library (http://www.opencascade.org) that
+allow the specification of arbitrary IGES files to describe the boundaries for
+your geometries.
+
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+In some of the previous tutorial programs (step-1, step-3, step-5, step-6 and
+step-49 among others) we have learned how to use the mesh refinement methods
+provided in deal.II. These tutorials have shown how to employ such tools to
+produce a fine grid for a single simulation, as done in step-3; or to start
+from a coarse grid and carry out a series of simulations on adaptively refined
+grids, as is the case of step-6. Regardless of which approach is taken, the
+mesh refinement requires a suitable geometrical description of the
+computational domain boundary in order to place, at each refinement, the new
+mesh nodes onto the boundary surface. For instance, step-5 shows how to assign
+a circular shape to the boundary of the computational domain, so that the
+faces lying on the boundary are refined onto the circle. step-53 shows how to
+do this with a boundary defined by experimentally obtained data.  But, at
+least as far as elementary boundary shapes are concerned, deal.II really only
+provides circles, spheres, boxes and various combinations. In this tutorial,
+we will show how to use a set of classes developed to import arbitrary CAD
+geometries, assign them to the desired boundary of the computational domain,
+and refine a computational grid on such complex shapes.
+
+
+<h3> CAD surfaces </h3>
+
+In the most common industrial practice, the geometrical models of arbitrarily
+shaped objects are realized by means of Computer Aided Design (CAD) tools. The
+use of CAD modelers has spread in the last decades, as they allow for the
+generation of a full virtual model of each designed object, which through a
+computer can be visualized, inspected, and analyzed in its finest details well
+before it is physically crafted.  From a mathematical perspective, the engine
+lying under the hood of CAD modelers is represented by analytical geometry,
+and in particular by parametric curves and surfaces such as B-splines and
+NURBS that are rich enough that they can represent most surfaces of practical
+interest.  Once a virtual model is ready, all the geometrical features of the
+desired object are stored in files which materially contain the coefficients
+of the parametric surfaces and curves composing the object. Depending on the
+specific CAD tool used to define the geometrical model, there are of course
+several different file formats in which the information of a CAD model can be
+organized. To provide a common ground to exchange data across CAD tools, the
+U.S. National Bureau of Standards published in 1980 the Initial Graphics
+Exchange Representation (IGES) neutral file format, which is used in this
+example.
+
+<h3> The boundary projector classes </h3>
+
+To import and interrogate CAD models, the deal.II library contains a series of
+wrapper functions for the OpenCASCADE open source library for CAD
+modeling. These functions allow to import IGES files into OpenCASCADE native
+objects, and wrap them inside a series of Manifold classes.
+
+Once imported from an IGES file, the model is stored in a
+<code>TopoDS_Shape</code>, which is the generic topological entity defined in
+the OpenCASCADE framework. From a <code>TopoDS_Shape</code>, it is then
+possible to access all the sub-shapes (such as vertices, edges and faces)
+composing it, along with their geometrical description.  In the deal.II
+framework, the topological entities composing a shape are used to create
+objects of the Manifold or Boundary classes. In Step-6 we saw how to build a
+HyperBallBoundary and assign it to a set of faces (or cells, for co-dimension
+1) of a Triangulation, to have cells and faces refined on a sphere or circle.
+The functions of the CAD modeling interface have been designed to retain the
+same structure, allowing the user to build a projector object using the
+imported CAD shapes, maintaining the very same procedure we use with
+HyperBallBoundary, i.e., assigning such projector object to cells, faces or
+edges of a coarse mesh. At each refinement cycle, the new mesh nodes will be
+then automatically generated by projecting a midpoint of an existing object
+onto the specified geometry.
+
+Differently from a spherical or circular boundary, a boundary with a complex
+geometry poses problems as to where it is best to place the new nodes created
+upon refinement on the prescribed shape.  HyperBallBoundary first creates the
+new nodes on the face or edge to be refined by averaging the surrounding
+points in the same way as FlatManifold does. Then, it goes on to project such
+nodes on the circle or sphere along the radial direction. On such a geometry,
+the radial direction ensures that the newly generated nodes remain evenly
+spaced when remaining on a given refinement level.
+
+In the case of an arbitrary and complex shape though, the best direction of
+the projection cannot be identified that easily.  The OpenCASCADE wrappers in
+deal.II provide several projector classes that employ different projection
+strategies. A first projector, implemented in the
+OpenCASCADE::ArclengthProjectionLineManifold class, is to be used only for
+edge refinement. It is built assigning it a topological shape of dimension
+one, either a <code>TopoDS_Edge</code> or a <code>TopoDS_Wire</code> (which is
+a compound shape, made of several connected <code>TopoDS_Edge</code>s) and
+refines a mesh edge finding the new vertex as the point splitting in two even
+parts the curvilinear length of the CAD curve portion that lies between the
+vertices of the original edge.
+
+<img src="http://www.dealii.org/images/steps/developer/step-54.CurveSplit.png" alt="" width="500">
+
+
+A different projection strategy has been implemented in the
+OpenCASCADE::NormalProjectionBoundary class. The <code>TopoDS_Shape</code>
+assigned at construction time can be arbitrary (a collection of shapes, faces,
+edges or a single face or edge will all work). The new cell nodes are first
+computed by averaging the surrounding points in the same way as FlatManifold
+does. In a second step, all the new nodes will be projected onto the
+<code>TopoDS_Shape</code> along the direction normal to the shape. If no
+normal projection is available, the point which is closest to the
+shape---typically lying on the shape boundary---is selected.  If the shape is
+composed of several sub-shapes, the projection is carried out onto every
+single sub-shape, and the closest projection point point is selected.
+
+<img src="http://www.dealii.org/images/steps/developer/step-54.NormalProjectionEdge.png" alt="" width="500">
+<img src="http://www.dealii.org/images/steps/developer/step-54.NormalProjection.png" alt="" width="500">
+
+As we are about to experience, for some shapes, setting the projection
+direction as that normal to the CAD surface will not lead to surface mesh
+elements of suitable quality. This is because the direction normal to the CAD
+surface has in principle nothing to do with the direction along which the mesh
+needs the new nodes to be located. The
+OpenCASCADE::DirectionalProjectionBoundary class, in this case, can help. This
+class is constructed assigning a <code>TopoDS_Shape</code> (containing at
+least a face) and a direction along which all the projections will be carried
+out. New points will be computed by first averaging the surrounding points (as
+in the FlatManifold case), and then taking the closest intersection between
+the topological shape and the line passing through the resulting point, along
+the direction used at construction time.  In this way, the user will have a
+higher control on the projection direction to be enforced to ensure good mesh
+quality.
+
+<img src="http://www.dealii.org/images/steps/developer/step-54.DirectionalProjection.png" alt="" width="500">
+
+
+Of course the latter approach is effective only when the orientation of the
+surface is rather uniform, so that a single projection direction can be
+identified. In cases in which the surface direction is approaching the
+projection direction, it is even possible that the directional projection is
+not found. To overcome these problems, the
+OpenCASCADE::NormalToMeshProjectionBoundary class implements a third
+projection algorithm. The OpenCASCADE::NormalToMeshProjectionBoundary class is
+built assigning a <code>TopoDS_Shape</code> (containing at least one face) to
+the constructor, and works exactly like a
+OpenCASCADE::DirectionalProjection. But, as the name of the class suggests,
+OpenCASCADE::NormalToMeshProjectionBoundary tries to come up with a suitable
+estimate of the direction normal to the mesh elements to be refined, and uses
+it for the projection of the new nodes onto the CAD surface. If we consider a
+mesh edge in a 2D space, the direction of its axis is a direction along which
+to split it in order to give rise to two new cells of the same length. We here
+extended this concept in 3D, and project all new nodes in a direction that
+approximates the cell normal.
+
+In the next figure, which is inspired by the geometry considered in this
+tutorial, we make an attempt to compare the behavior of the three projectors
+considered. As can be seen on the left, given the original cell (in blue), the
+new point found with the normal projection is in a position which does not
+allow for the generation of evenly spaced new elements (in red). The situation
+will get worse in further refinement steps.  Since the geometry we considered
+is somehow perpendicular to the horizontal direction, the directional
+projection (central image) defined with horizontal direction as the projection
+direction, does a rather good job in getting the new mesh point. Yet, since
+the surface is almost horizontal at the bottom of the picture, we can expect
+problems in those regions when further refinement steps are carried
+out. Finally, the picture on the right shows that a node located on the cell
+axis will result in two new cells having the same length. Of course the
+situation in 3D gets a little more complicated than that described in this
+simple 2D case. Nevertheless, the results of this test confirm that the normal
+to the mesh direction is the best approach among the three tested, when
+arbitrarily shaped surfaces are considered, and unless you have a geometry for
+which a more specific approach is known to be appropriate.
+
+
+<img src="http://www.dealii.org/images/steps/developer/step-54.ProjectionComparisons.png" alt="" width="700">
+
+
+<h3> The testcase </h3>
+
+In this program, we will consider creating a surface mesh for a real geometry
+describing the bow of a ship (this geometry is frequently used in CAD and mesh
+generation comparisons and is freely available). The surface mesh we get from
+this could then be used to solve a boundary element equation to simulate the
+flow of water around the ship (in a way similar to step-34) but we will not
+try to do this here. To already give you an idea of the geometry we consider,
+here is a picture:
+
+<img src="http://www.dealii.org/images/steps/developer/step-54.bare.png" alt="" width="500">
+
+In the program, we read both the geometry and a coarse mesh from files, and
+then employ several of the options discussed above to place new vertices for a
+sequence of mesh refinement steps.
diff --git a/examples/step-54/doc/kind b/examples/step-54/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-54/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-54/doc/results.dox b/examples/step-54/doc/results.dox
new file mode 100644
index 0000000..ebf94fb
--- /dev/null
+++ b/examples/step-54/doc/results.dox
@@ -0,0 +1,92 @@
+<h1>Results</h1>
+
+The program execution produces a series of mesh files <code>3d_mesh_*.vtk</code> 
+that we can visualize with any of the usual visualization programs that can read the VTK
+file format. 
+
+The following table illustrates the results obtained employing the normal projection strategy. The first two
+rows of the table show side views of the grids obtained for progressive levels
+of refinement, overlain on a very fine rendering of the exact geometry. The
+dark and light red areas simply indicate whether the current mesh or the fine
+geometry is closer to the observer; the distinction does not carry any
+particularly deep meaning. The last row
+of pictures depict front views (mirrored to both sides of the geometry) of the
+same grids shown in the second row.
+
+
+<table style="width:90%">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.common_0.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_1.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_2.png" alt="" width="400"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_3.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_4.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_5.png" alt="" width="400"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_front_3.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_front_4.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_front_5.png" alt="" width="400"></td>
+  </tr>
+</table>
+
+As can be seen in the pictures---and as we anticipated---the normal refinement strategy is unable to produce nicely shaped elements
+when applied to surfaces with significant curvature changes. This is
+particularly apparent at the bulb of the hull where all new points have been
+placed in the upper part of the bulb and the lower part remains completely
+unresolved.
+
+The following table, which is arranged as the previous one, illustrates
+the results obtained adopting the directional projection approach, in which the projection direction selected was the y-axis (which
+is indicated with a small yellow arrow at the bottom left of each image).
+
+
+<table style="width:90%">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.common_0.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_1.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_2.png" alt="" width="400"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_3.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_4.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_5.png" alt="" width="400"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_front_3.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_front_4.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.directional_front_5.png" alt="" width="400"></td>
+  </tr>
+</table>
+
+The images confirm that the quality of the mesh obtained with a directional projection is sensibly higher than that obtained projecting along the
+surface normal. Yet, a number of elements elongated in the y-direction are observed around the bottom of the bulb, where the surface is almost parallel to the
+direction chosen for the projection. 
+
+The final test shows results using instead the projection normal to the faces:
+
+<table style="width:90%">
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.common_0.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_1.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_2.png" alt="" width="400"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_3.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_4.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_5.png" alt="" width="400"></td>
+  </tr>
+  <tr>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_front_3.png" alt="" width="400"></td>
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_front_4.png" alt="" width="400"></td>		
+    <td><img src="http://www.dealii.org/images/steps/developer/step-54.normal_to_mesh_front_5.png" alt="" width="400"></td>
+  </tr>
+</table>
+
+The pictures confirm that the normal to mesh projection approach leads to grids that remain evenly spaced
+throughtout the refinement steps. At the same time, these meshes represent rather well the original geometry even in the bottom region
+of the bulb, which is not well recovered employing the directional projector or the normal projector.
+ 
+
diff --git a/examples/step-54/doc/tooltip b/examples/step-54/doc/tooltip
new file mode 100644
index 0000000..9afee0b
--- /dev/null
+++ b/examples/step-54/doc/tooltip
@@ -0,0 +1 @@
+Geometry: Using industry standard IGES files as boundary descriptors.
diff --git a/examples/step-54/input/initial_mesh_3d.vtk b/examples/step-54/input/initial_mesh_3d.vtk
new file mode 100644
index 0000000..ddf377e
--- /dev/null
+++ b/examples/step-54/input/initial_mesh_3d.vtk
@@ -0,0 +1,17 @@
+# vtk DataFile Version 3.0
+#This file was generated by the deal.II library on 2014/10/15 at 14:32:28
+ASCII
+DATASET UNSTRUCTURED_GRID
+
+POINTS 4 double
+-3.05955 0 0.404321
+-2.63758 0 -0.3467
+-2.01066 0.314451 0.33132
+-2.01066 0 -0.249155
+
+CELLS 1 5
+4	0	1	3	2
+
+CELL_TYPES 1
+ 9
+POINT_DATA 4
diff --git a/examples/step-54/step-54.cc b/examples/step-54/step-54.cc
new file mode 100644
index 0000000..e77fad5
--- /dev/null
+++ b/examples/step-54/step-54.cc
@@ -0,0 +1,472 @@
+/* ---------------------------------------------------------------------
+ * $Id$
+ *
+ * Copyright (C) 2009 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+ *  Authors: Andrea Mola, Luca Heltai, 2014
+ */
+
+
+// @sect3{Include files}
+
+// We start with including a bunch of files that we will use in the
+// various parts of the program. Most of them have been discussed in
+// previous tutorials already:
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_in.h>
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/vector_tools.h>
+
+// These are the headers of the opencascade support classes and
+// functions. Notice that these will contain sensible data only if you
+// compiled your deal.II library with support for OpenCASCADE, i.e.,
+// specifying <code>-DDEAL_II_WITH_OPENCASCADE=ON</code> and
+// <code>-DOPENCASCADE_DIR=/path/to/your/opencascade/installation</code>
+// when calling <code>cmake</code> during deal.II configuration.
+#include <deal.II/opencascade/boundary_lib.h>
+#include <deal.II/opencascade/utilities.h>
+
+
+// Finally, a few C++ standard header files
+#include <cmath>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+// We isolate the rest of the program in its own namespace
+namespace Step54
+{
+  using namespace dealii;
+
+
+
+  // @sect3{The TriangulationOnCAD class}
+
+  // This is the main class. All it really does is store names for
+  // input and output files, and a triangulation. It then provides
+  // a function that generates such a triangulation from a coarse
+  // mesh, using one of the strategies discussed in the introduction
+  // and listed in the enumeration type at the top of the class.
+  //
+  // The member functions of this class are similar to what you can
+  // find in most of the other tutorial programs in the setup stage of
+  // the grid for the simulations.
+
+  class TriangulationOnCAD
+  {
+  public:
+    enum ProjectionType
+    {
+      NormalProjection = 0,
+      DirectionalProjection = 1,
+      NormalToMeshProjection = 2
+    };
+
+
+    TriangulationOnCAD(const std::string &initial_mesh_filename,
+                       const std::string &cad_file_name,
+                       const std::string &output_filename,
+                       const ProjectionType surface_projection_kind = NormalProjection);
+
+
+    ~TriangulationOnCAD();
+
+    void run();
+
+  private:
+
+    void read_domain();
+
+    void refine_mesh();
+
+    void output_results(const unsigned int cycle);
+
+    Triangulation<2, 3>   tria;
+
+    const std::string     initial_mesh_filename;
+    const std::string     cad_file_name;
+    const std::string     output_filename;
+
+    const ProjectionType  surface_projection_kind;
+  };
+
+
+  // @sect4{TriangulationOnCAD::TriangulationOnCAD}
+
+  // The constructor of the TriangulationOnCAD class is very simple.
+  // The input arguments are strings for the input and output file
+  // names, and the enumeration type that determines which kind of
+  // surface projector is used in the mesh refinement cycles (see
+  // below for details).
+
+  TriangulationOnCAD::TriangulationOnCAD(const std::string &initial_mesh_filename,
+                                         const std::string &cad_file_name,
+                                         const std::string &output_filename,
+                                         const ProjectionType surface_projection_kind)
+    :
+    initial_mesh_filename(initial_mesh_filename),
+    cad_file_name(cad_file_name),
+    output_filename(output_filename),
+    surface_projection_kind(surface_projection_kind)
+  {
+  }
+
+  TriangulationOnCAD::~TriangulationOnCAD()
+  {
+  }
+
+
+
+  // @sect4{TriangulationOnCAD::read_domain}
+
+
+  // The following function represents the core of this program.  In
+  // this function we import the CAD shape upon which we want to
+  // generate and refine our triangulation. We assume that the CAD
+  // surface is contained in the @p cad_file_name file (we provide an
+  // example IGES file in this directory called
+  // "DTMB-5415_bulbous_bow.iges" that represents the bulbous bow of a
+  // ship). The presence of several convex and concave high curvature
+  // regions makes the geometry we provided a particularly meaningful
+  // example.
+  //
+  // After importing the hull bow surface, we extract some of the
+  // curves and surfaces composing it, and use them to generate a set
+  // of projectors. Such projectors define the rules the Triangulation
+  // has to follow to position each new node during cell refinement.
+  //
+  // To initialize the Triangulation, as done in previous tutorial
+  // programs, we import a pre-existing grid saved in VTK format. We
+  // assume here that the user has generated a coarse mesh
+  // externally, which matches the IGES geometry. At the moment of
+  // writing this tutorial, the
+  // deal.II library does not automatically support generation of such
+  // meshes, but there are several tools which can provide you with
+  // reasonable initial meshes starting from CAD files.
+  // In our example, the imported mesh is composed of a single
+  // quadrilateral cell whose vertices have been placed on the CAD
+  // shape.
+  //
+  // After importing both the IGES geometry and the initial mesh, we
+  // assign the projectors previously discussed to each of the edges
+  // and cells which will have to be refined on the CAD surface.
+  //
+  // In this tutorial, we will test the three different CAD surface
+  // projectors described in the introduction, and will analyze the
+  // results obtained with each of them.  As mentioned, each of these
+  // projection strategies has been implemented in a different class,
+  // and objects of these types can be assigned to a triangulation
+  // using the Triangulation::set_manifold method.
+  //
+  // The following function then first imports the given CAD file.
+  // The function arguments are a string containing the desired file
+  // name, and a scale factor. In this example, the scale factor is
+  // set to 1e-3, as the original geometry is written in millimeters
+  // (which is the typical unit of measure for most IGES files),
+  // while we prefer to work in meters.  The output of the function
+  // is an object of OpenCASCADE generic topological shape class,
+  // namely a @p TopoDS_Shape.
+  void TriangulationOnCAD::read_domain()
+  {
+    TopoDS_Shape bow_surface = OpenCASCADE::read_IGES(cad_file_name, 1e-3);
+
+    // Each CAD geometrical object is defined along with a tolerance,
+    // which indicates possible inaccuracy of its placement. For
+    // instance, the tolerance @p tol of a vertex indicates that it can
+    // be located in any point contained in a sphere centered in the
+    // nominal position and having radius @p tol. While projecting a
+    // point onto a surface (which will in turn have its tolerance) we
+    // must keep in mind that the precision of the projection will be
+    // limited by the tolerance with which the surface is built.
+
+    // The following method extracts the tolerance of the given shape and
+    // makes it a bit bigger to stay our of trouble:
+    const double tolerance = OpenCASCADE::get_shape_tolerance(bow_surface) * 5;
+
+    // We now want to extract a set of composite sub-shapes from the
+    // generic shape. In particular, each face of the CAD file
+    // is composed of a trimming curve of type @p TopoDS_Wire, which is
+    // the collection of @p TopoDS_Edges that compose the boundary of a
+    // surface, and a NURBS description of the surface itself. We will
+    // use a line projector to associate the boundary of our
+    // Triangulation to the wire delimiting the surface.  To extract
+    // all compound sub-shapes, like wires, shells, or solids, we
+    // resort to a method of the OpenCASCADE namespace.  The input of
+    // OpenCASCADE::extract_compound_shapes is a shape and a set of empty
+    // std::vectors of subshapes, which will be filled with all
+    // compound shapes found in the given topological shape:
+    std::vector<TopoDS_Compound>  compounds;
+    std::vector<TopoDS_CompSolid> compsolids;
+    std::vector<TopoDS_Solid>     solids;
+    std::vector<TopoDS_Shell>     shells;
+    std::vector<TopoDS_Wire>      wires;
+
+    OpenCASCADE::extract_compound_shapes(bow_surface,
+                                         compounds,
+                                         compsolids,
+                                         solids,
+                                         shells,
+                                         wires);
+
+    // The next few steps are more familiar, and allow us to import an existing
+    // mesh from an external VTK file, and convert it to a deal triangulation.
+    std::ifstream in;
+
+    in.open(initial_mesh_filename.c_str());
+
+    GridIn<2,3> gi;
+    gi.attach_triangulation(tria);
+    gi.read_vtk(in);
+
+    // We output this initial mesh saving it as the refinement step 0.
+    output_results(0);
+
+    // The mesh imported has a single, two-dimensional cell located in
+    // three-dimensional space. We now want to ensure that it is refined
+    // according to the CAD geometry imported above. This this end, we get an
+    // iterator to that cell and assign to it the manifold_id 1 (see
+    // @ref GlossManifoldIndicator "this glossary entry").
+    // We also get an iterator to its four faces, and assign each of them
+    // the manifold_id 2:
+    Triangulation<2,3>::active_cell_iterator cell = tria.begin_active();
+    cell->set_manifold_id(1);
+
+    for (unsigned int f=0; f<GeometryInfo<2>::faces_per_cell; ++f)
+      cell->face(f)->set_manifold_id(2);
+
+    // Once both the CAD geometry and the initial mesh have been
+    // imported and digested, we use the CAD surfaces and curves to
+    // define the projectors and assign them to the manifold ids just
+    // specified.
+
+    // A first projector is defined using the single wire contained in
+    // our CAD file.  The ArclengthProjectionLineManifold will make
+    // sure that every mesh edge located on the wire is refined with a
+    // point that lies on the wire and splits it into two equal arcs
+    // lying between the edge vertices. We first check
+    // that the wires vector contains at least one element and then
+    // create a boundary object for it. (The object is marked as
+    // @p static to ensure that it survives past the end of the
+    // current function, as we want to attach it to the triangulation
+    // object for later use).
+    //
+    // Once the projector is created, we then assign it to all the parts of
+    // the triangulation with manifold_id = 2:
+    Assert(wires.size() > 0,
+           ExcMessage("I could not find any wire in the CAD file you gave me. Bailing out."));
+
+    static OpenCASCADE::ArclengthProjectionLineManifold<2,3>
+    line_projector (wires[0], tolerance);
+
+    tria.set_manifold(2, line_projector);
+
+    // The surface projector is created according to what is specified
+    // with the @p surface_projection_kind option of the constructor. In particular,
+    // if the surface_projection_kind value equals @p NormalProjection, we select the
+    // OpenCASCADE::NormalProjectionBoundary. The new mesh points will
+    // then initially be generated at the barycenter of the cell/edge
+    // considered, and then projected on the CAD surface along its
+    // normal direction.  The NormalProjectionBoundary constructor
+    // only needs a shape and a tolerance, and we then assign it to
+    // the triangulation for use with all parts that manifold having id 1:
+    switch (surface_projection_kind)
+      {
+      case NormalProjection:
+        static OpenCASCADE::NormalProjectionBoundary<2,3>
+        normal_projector(bow_surface, tolerance);
+
+        tria.set_manifold(1,normal_projector);
+
+        break;
+
+      // @p If surface_projection_kind value is @p DirectionalProjection, we select the
+      // OpenCASCADE::DirectionalProjectionBoundary class. The new mesh points will
+      // then initially be generated at the barycenter of the cell/edge
+      // considered, and then projected on the CAD surface along a
+      // direction that is specified to the
+      // OpenCASCADE::DirectionalProjectionBoundary constructor. In this case,
+      // the projection is done along the y-axis.
+      case DirectionalProjection:
+        static OpenCASCADE::DirectionalProjectionBoundary<2,3>
+        directional_projector(bow_surface, Point<3>(0.0,1.0,0.0), tolerance);
+
+        tria.set_manifold(1,directional_projector);
+
+        break;
+
+      // As a third option, if @p surface_projection_kind value
+      // is @p NormalToMeshProjection, we select the
+      // OpenCASCADE::NormalToMeshProjectionBoundary. The new mesh points will
+      // again initially be generated at the barycenter of the cell/edge
+      // considered, and then projected on the CAD surface along a
+      // direction that is an estimate of the mesh normal direction.
+      // The OpenCASCADE::NormalToMeshProjectionBoundary constructor only
+      // requires a shape (containing at least a face) and a
+      // tolerance.
+      case NormalToMeshProjection:
+        static OpenCASCADE::NormalToMeshProjectionBoundary<2,3>
+        normal_to_mesh_projector(bow_surface, tolerance);
+
+        tria.set_manifold(1,normal_to_mesh_projector);
+
+        break;
+
+      // Finally, we use good software cleanliness by ensuring that this
+      // really covers all possible options of the @p case statement. If we
+      // get any other value, we simply abort the program:
+      default:
+        AssertThrow(false, ExcInternalError());
+      }
+  }
+
+
+  // @sect4{TriangulationOnCAD::refine_mesh}
+
+  // This function globally refines the mesh. In other tutorials, it
+  // would typically also distribute degrees of freedom, and resize
+  // matrices and vectors. These tasks are not carried out here, since
+  // we are not running any simulation on the Triangulation produced.
+  //
+  // While the function looks innocent, this is where most of the work we are
+  // interested in for this tutorial program actually happens. In particular,
+  // when refining the quads and lines that define the surface of the ship's
+  // hull, the Triangulation class will ask the various objects we have
+  // assigned to handle individual manifold ids for where the new vertices
+  // should lie.
+  void TriangulationOnCAD::refine_mesh()
+  {
+    tria.refine_global(1);
+  }
+
+
+
+  // @sect4{TriangulationOnCAD::output_results}
+
+  // Outputting the results of our computations is a rather mechanical
+  // task. All the components of this function have been discussed
+  // before:
+  void TriangulationOnCAD::output_results(const unsigned int cycle)
+  {
+    const std::string filename = ( output_filename + "_" +
+                                   Utilities::int_to_string(cycle) +
+                                   ".vtk" );
+    std::ofstream logfile(filename.c_str());
+    GridOut grid_out;
+    grid_out.write_vtk(tria, logfile);
+  }
+
+
+  // @sect4{TriangulationOnCAD::run}
+
+  // This is the main function. It should be self explanatory in its
+  // briefness:
+  void TriangulationOnCAD::run()
+  {
+    read_domain();
+
+    const unsigned int n_cycles = 5;
+    for (unsigned int cycle=0; cycle<n_cycles; ++cycle)
+      {
+        refine_mesh();
+        output_results(cycle+1);
+      }
+  }
+}
+
+
+// @sect3{The main() function}
+
+// This is the main function of this program. It is in its basic structure
+// like all previous tutorial programs, but runs the main class through the
+// three possibilities of new vertex placement:
+int main ()
+{
+  try
+    {
+      using namespace dealii;
+      using namespace Step54;
+
+      const std::string in_mesh_filename = "input/initial_mesh_3d.vtk";
+      const std::string cad_file_name = "DTMB-5415_bulbous_bow.iges";
+
+      cout << "----------------------------------------------------------" << endl;
+      cout << "Testing projection in direction normal to CAD surface" << endl;
+      cout << "----------------------------------------------------------" << endl;
+      std::string out_mesh_filename = ( "3d_mesh_normal_projection" );
+      TriangulationOnCAD tria_on_cad_norm(in_mesh_filename,
+                                          cad_file_name,
+                                          out_mesh_filename,
+                                          TriangulationOnCAD::NormalProjection);
+      tria_on_cad_norm.run();
+      cout << "----------------------------------------------------------" << endl;
+      cout << endl;
+      cout << endl;
+
+      cout << "----------------------------------------------------------" << endl;
+      cout << "Testing projection in y-axis direction" << endl;
+      cout << "----------------------------------------------------------" << endl;
+      out_mesh_filename = ( "3d_mesh_directional_projection" );
+      TriangulationOnCAD tria_on_cad_dir(in_mesh_filename,
+                                         cad_file_name,
+                                         out_mesh_filename,
+                                         TriangulationOnCAD::DirectionalProjection);
+      tria_on_cad_dir.run();
+      cout << "----------------------------------------------------------" << endl;
+      cout << endl;
+      cout << endl;
+
+      cout << "----------------------------------------------------------" << endl;
+      cout << "Testing projection in direction normal to mesh elements" << endl;
+      cout << "----------------------------------------------------------" << endl;
+      out_mesh_filename = ( "3d_mesh_normal_to_mesh_projection" );
+      TriangulationOnCAD tria_on_cad_norm_to_mesh(in_mesh_filename,
+                                                  cad_file_name,
+                                                  out_mesh_filename,
+                                                  TriangulationOnCAD::NormalToMeshProjection);
+      tria_on_cad_norm_to_mesh.run();
+      cout << "----------------------------------------------------------" << endl;
+      cout << endl;
+      cout << endl;
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-6/CMakeLists.txt b/examples/step-6/CMakeLists.txt
new file mode 100644
index 0000000..23253c6
--- /dev/null
+++ b/examples/step-6/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-6 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-6")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-6/doc/builds-on b/examples/step-6/doc/builds-on
new file mode 100644
index 0000000..9fdd726
--- /dev/null
+++ b/examples/step-6/doc/builds-on
@@ -0,0 +1 @@
+step-5
diff --git a/examples/step-6/doc/intro.dox b/examples/step-6/doc/intro.dox
new file mode 100644
index 0000000..7cf7b16
--- /dev/null
+++ b/examples/step-6/doc/intro.dox
@@ -0,0 +1,72 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+ at dealiiVideoLecture{15,16,17}
+
+The main emphasis in this example is the handling of locally refined
+grids. The approach to adaptivity chosen in deal.II is to use grids in which
+neighboring cells may be refined a different number of times. This then
+results in nodes on the interfaces of cells which belong to one
+side, but are unbalanced on the other. The common term for these is
+“hanging nodes”.
+
+To guarantee that the global solution is continuous at these nodes as well, we
+have to state some additional constraints on the values of the solution at
+these nodes. In the program below, we will show how we can get these
+constraints from the library, and how to use them in the solution of the
+linear system of equations. Before going over the details of the program
+below, you may want to take a look at the @ref constraints documentation
+module that explains how these constraints can be computed and what classes in
+deal.II work on them.
+
+The locally refined grids are produced using an error estimator class
+which estimates the energy error with respect to the Laplace
+operator. This error estimator, although developed for Laplace's
+equation has proven to be a suitable tool to generate locally refined
+meshes for a wide range of equations, not restricted to elliptic
+problems. Although it will create non-optimal meshes for other
+equations, it is often a good way to quickly produce meshes that are
+well adapted to the features of solutions, such as regions of great
+variation or discontinuities. Since it was developed by Kelly and
+co-workers, we often refer to it as the “Kelly refinement
+indicator” in the library, documentation, and mailing list. The
+class that implements it is called
+KellyErrorEstimator. Although the error estimator (and
+its
+implementation in the deal.II library) is capable of handling variable
+coefficients in the equation, we will not use this feature since we
+are only interested in a quick and simple way to generate locally
+refined grids.
+
+
+
+Since the concepts used for locally refined grids are so important,
+we do not show much additional new stuff in this example. The most
+important exception is that we show how to use biquadratic elements
+instead of the bilinear ones which we have used in all previous
+examples. In fact, The use of higher order elements is accomplished by
+only replacing three lines of the program, namely the declaration of
+the <code>fe</code> variable, and the use of an appropriate quadrature formula
+in two places. The rest of the program is unchanged.
+
+
+
+The only other new thing is a method to catch exceptions in the
+<code>main</code> function in order to output some information in case the
+program crashes for some reason.
+
+
+<h3>The ConstraintMatrix</h3>
+
+As explained above, we are going to use an object called ConstraintMatrix
+that will store the constraints at the hanging nodes to insure the solution
+is continuous at these nodes. We could first assemble the system as normal
+and then condense out the degrees of freedom that need to be constrained.
+This is also explained  @ref constraints documentation
+module. Instead we will go the more efficient route and eliminate the
+constrained entries while we are copying from the local to the global system.
+Because boundary conditions can be treated in the same way, we will
+incorporate the them as constraints in the same ConstraintMatrix object.
+This way, we don't need to apply the boundary conditions after assembly 
+(like we did in the earlier steps).
+ 
diff --git a/examples/step-6/doc/kind b/examples/step-6/doc/kind
new file mode 100644
index 0000000..15a13db
--- /dev/null
+++ b/examples/step-6/doc/kind
@@ -0,0 +1 @@
+basic
diff --git a/examples/step-6/doc/results.dox b/examples/step-6/doc/results.dox
new file mode 100644
index 0000000..eae0cb6
--- /dev/null
+++ b/examples/step-6/doc/results.dox
@@ -0,0 +1,488 @@
+<h1>Results</h1>
+
+
+The output of the program looks as follows:
+ at code
+Cycle 0:
+   Number of active cells:       20
+   Number of degrees of freedom: 89
+Cycle 1:
+   Number of active cells:       44
+   Number of degrees of freedom: 209
+Cycle 2:
+   Number of active cells:       92
+   Number of degrees of freedom: 449
+Cycle 3:
+   Number of active cells:       200
+   Number of degrees of freedom: 961
+Cycle 4:
+   Number of active cells:       440
+   Number of degrees of freedom: 2033
+Cycle 5:
+   Number of active cells:       932
+   Number of degrees of freedom: 4465
+Cycle 6:
+   Number of active cells:       1916
+   Number of degrees of freedom: 9113
+Cycle 7:
+   Number of active cells:       3884
+   Number of degrees of freedom: 18401
+ at endcode
+
+
+
+As intended, the number of cells roughly doubles in each cycle.  The
+number of degrees is slightly more than four times the number of
+cells; one would expect a factor of exactly four in two spatial
+dimensions on an infinite grid (since the spacing between the degrees
+of freedom is half the cell width: one additional degree of freedom on
+each edge and one in the middle of each cell), but it is larger than
+that factor due to the finite size of the mesh and due to additional
+degrees of freedom which are introduced by hanging nodes and local
+refinement.
+
+
+
+The final solution, as written by the program at the end of the
+<code>%run()</code> function, looks as follows:
+
+
+
+<img src="http://www.dealii.org/images/steps/developer/step-6.solution.png" alt="">
+
+
+
+In each cycle, the program furthermore writes the grid in EPS
+format. These are shown in the following:
+
+
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-0.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-1.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-2.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-3.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-4.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-5.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-6.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.grid-7.png" alt="">
+</td>
+</tr>
+</table>
+
+
+
+It is clearly visible that the region where the solution has a kink,
+i.e. the circle at radial distance 0.5 from the center, is
+refined most. Furthermore, the central region where the solution is
+very smooth and almost flat, is almost not refined at all, but this
+results from the fact that we did not take into account that the
+coefficient is large there. The region outside is refined rather
+randomly, since the second derivative is constant there and refinement
+is therefore mostly based on the size of the cells and their deviation
+from the optimal square.
+
+
+
+
+For completeness, we show what happens if the code we commented about
+in the destructor of the <code>Step6</code> class is omitted
+from this example.
+
+ at code
+--------------------------------------------------------
+An error occurred in line <79> of file <source/subscriptor.cc> in function
+    virtual Subscriptor::~Subscriptor()
+The violated condition was:
+    counter == 0
+The name and call sequence of the exception was:
+    ExcInUse(counter, object_info->name(), infostring)
+Additional Information:
+Object of class 4FE_QILi2EE is still used by 1 other objects.
+  from Subscriber 10DoFHandlerILi2EE
+
+Stacktrace:
+-----------
+#0  /u/bangerth/p/deal.II/1/deal.II/lib/libbase.g.so: Subscriptor::~Subscriptor()
+#1  /u/bangerth/p/deal.II/1/deal.II/lib/libdeal_II_2d.g.so: FiniteElement<2>::~FiniteElement()
+#2  ./\step-6: FE_Poly<TensorProductPolynomials<2>, 2>::~FE_Poly()
+#3  ./\step-6: FE_Q<2>::~FE_Q()
+#4  ./\step-6: Step6<2>::~Step6()
+#5  ./\step-6: main
+--------------------------------------------------------
+make: *** [run] Aborted
+ at endcode
+
+
+
+From the above error message, we conclude that an object of type
+<code>10DoFHandlerILi2EE</code> is still using the object of type
+<code>4FE_QILi2EE</code>. These are of course <a
+href="http://en.wikipedia.org/wiki/Name_mangling">"mangled" names</a> for
+<code>DoFHandler</code> and <code>FE_Q</code>. The mangling works as
+follows: the first number indicates the number of characters of the
+template class, i.e. 10 for <code>DoFHandler</code> and 4
+for <code>FE_Q</code>; the rest of the text is then template
+arguments. From this we can already glean a little bit who's the
+culprit here, and who the victim:
+The one object that still uses the finite element is the
+<code>dof_handler</code> object.
+
+
+
+The stacktrace gives an indication of where the problem happened. We
+see that the exception was triggered in the
+destructor of the <code>FiniteElement</code> class that was called
+through a few more functions from the destructor of the
+<code>Step6</code> class, exactly where we have commented out
+the call to <code>DoFHandler::clear()</code>.
+
+
+
+<a name="extensions"></a>
+<h3>Possibilities for extensions</h3>
+
+<h4>Solvers and preconditioners</h4>
+
+
+One thing that is always worth playing around with if one solves
+problems of appreciable size (much bigger than the one we have here)
+is to try different solvers or preconditioners. In the current case,
+the linear system is symmetric and positive definite, which makes the
+CG algorithm pretty much the canonical choice for solving. However,
+the SSOR preconditioner we use in the <code>solve()</code> function is
+up for grabs.
+
+In deal.II, it is relatively simple to change the preconditioner. For
+example, by changing the existing lines of code
+ at code
+  PreconditionSSOR<> preconditioner;
+  preconditioner.initialize(system_matrix, 1.2);
+ at endcode
+into
+ at code
+  PreconditionSSOR<> preconditioner;
+  preconditioner.initialize(system_matrix, 1.0);
+ at endcode
+we can try out different relaxation parameters for SSOR. By using
+(you have to also add the header file <code>lac/sparse_ilu.h</code> to
+the include list at the top of the file)
+ at code
+  PreconditionJacobi<> preconditioner;
+  preconditioner.initialize(system_matrix);
+ at endcode
+we can use Jacobi as a preconditioner. And by using
+ at code
+  SparseILU<double> preconditioner;
+  preconditioner.initialize(system_matrix);
+ at endcode
+we can use a very simply incomplete LU decomposition without any
+thresholding or strengthening of the diagonal.
+
+Using these various different preconditioners, we can compare the
+number of CG iterations needed (available through the
+<code>solver_control.last_step()</code> call, see
+step-4) as well as CPU time needed (using the Timer class,
+discussed, for example, in step-12) and get the
+following results (left: iterations; right: CPU time):
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-6.q2.dofs_vs_iterations.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-6.q2.dofs_vs_time.png" alt="">
+    </td>
+  </tr>
+</table>
+
+As we can see, all preconditioners behave pretty much the same on this
+simple problem, with the number of iterations growing like ${\cal
+O}(N^{1/2})$ and because each iteration requires around ${\cal
+O}(N)$ operations the total CPU time grows like ${\cal
+O}(N^{3/2})$ (for the few smallest meshes, the CPU time is so small
+that it doesn't record). Note that even though it is the simplest
+method, Jacobi is the fastest for this problem. 
+
+The situation changes slightly when the finite element is not a
+bi-quadratic one as set in the constructor of this program, but a
+bi-linear one. If one makes this change, the results are as follows:
+
+<TABLE WIDTH="60%" ALIGN="center">
+  <tr>
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-6.q1.dofs_vs_iterations.png" alt="">
+    </td>
+
+    <td ALIGN="center">
+      <img src="http://www.dealii.org/images/steps/developer/step-6.q1.dofs_vs_time.png" alt="">
+    </td>
+  </tr>
+</table>
+
+In other words, while the increase in iterations and CPU time is as
+before, Jacobi is now the method that requires the most iterations; it
+is still the fastest one, however, owing to the simplicity of the
+operations it has to perform. This is not to say that Jacobi
+is actually a good preconditioner -- for problems of appreciable size, it is
+definitely not, and other methods will be substantially better -- but really
+only that it is fast because its implementation is so simple that it can
+compensate for a larger number of iterations. 
+
+The message to take away from this is not that simplicity in
+preconditioners is always best. While this may be true for the current
+problem, it definitely is not once we move to more complicated
+problems (elasticity or Stokes, for examples step-8 or
+step-22). Secondly, all of these preconditioners still
+lead to an increase in the number of iterations as the number $N$ of
+degrees of freedom grows, for example ${\cal O}(N^\alpha)$; this, in
+turn, leads to a total growth in effort as ${\cal O}(N^{1+\alpha})$
+since each iteration takes ${\cal O}(N)$ work. This behavior is
+undesirable: we would really like to solve linear systems with $N$
+unknowns in a total of ${\cal O}(N)$ work; there is a class
+of preconditioners that can achieve this, namely geometric (step-16,
+step-37, step-39)
+or algebraic multigrid (step-31, step-40, and several others)
+preconditioners. They are, however, significantly more complex than
+the preconditioners outlined above. 
+
+Finally, the last message to take
+home is that when the data shown above was generated (in 2008), linear
+systems with 100,000 unknowns are
+easily solved on a desktop machine in well under 10 seconds, making
+the solution of relatively simple 2d problems even to very high
+accuracy not that big a task as it used to be even in the recent
+past. At the time, the situation for 3d problems was entirely different,
+but even that has changed substantially in the intervening time -- though
+solving problems in 3d to high accuracy remains a challenge.
+
+
+<h4>A better mesh</h4>
+
+If you look at the meshes above, you will see even though the domain is the 
+unit disk, and the jump in the coefficient lies along a circle, the cells 
+that make up the mesh do not track this geometry well. The reason, already hinted
+at in step-1, is that by default the Triangulation class only sees a bunch of
+coarse grid cells but has, of course, no real idea what kind of geometry they
+might represent when looked at together. For this reason, we need to tell
+the Triangulation what to do when a cell is refined: where should the new
+vertices at the edge midpoints and the cell midpoint be located so that the
+child cells better represent the desired geometry than the parent cell.
+
+In the code above, we already do this for faces that sit at the boundary:
+we use the code
+ at code
+          static const SphericalManifold<dim> boundary;
+          triangulation.set_all_manifold_ids_on_boundary(0);
+          triangulation.set_manifold (0, boundary);
+ at endcode
+to tell the Triangulation where to ask when refining a boundary face.
+To make the mesh <i>interior</i> also track a circular domain, we need to work
+a bit harder, though.
+
+First, recall that our coarse mesh consists of a central square
+cell and four cells around it. Now first consider what would happen if we
+also attached the SphericalManifold object not only to the four exterior faces
+but also the four cells at the perimeter as well as all of their faces. We can
+do this by replacing the existing mesh creation code
+ at code
+          GridGenerator::hyper_ball (triangulation);
+
+          static const SphericalManifold<dim> boundary;
+          triangulation.set_all_manifold_ids_on_boundary(0);
+          triangulation.set_manifold (0, boundary);
+
+          triangulation.refine_global (1);
+ at endcode
+by the following snippet (testing that the center of a cell is larger
+than a small multiple, say one tenth, of the cell diameter away from
+center of the mesh only fails for the central square of the mesh):
+ at code
+          GridGenerator::hyper_ball (triangulation);
+
+          static const SphericalManifold<dim> boundary;
+          triangulation.set_all_manifold_ids_on_boundary(0);
+          triangulation.set_manifold (0, boundary);
+
+          const Point<dim> mesh_center;
+          for (typename Triangulation<dim>::active_cell_iterator cell = triangulation.begin_active();
+               cell != triangulation.end(); ++cell)
+            if (mesh_center.distance (cell->center()) > cell->diameter()/10)
+              cell->set_all_manifold_ids (0);
+	  
+          triangulation.refine_global (1);
+ at endcode
+
+After a few global refinement steps, this would lead to a mesh of the following
+kind:
+
+<img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-4-bad.png" alt="">
+
+This is not a good mesh: the central cell has been refined in such a way that
+the children located in the four corners of the original central cell
+<i>degenerate</i>: they all tend towards triangles as mesh refinement
+continues. This means that the Jacobian matrix of the transformation from
+reference cell to actual cell degenerates for these cells, and because
+all error estimates for finite element solutions contain the norm of the
+inverse of the Jacobian matrix, you will get very large errors on these
+cells and, in the limit as mesh refinement, a loss of convergence order because
+the cells in these corners become worse and worse under mesh refinement.
+
+So we need something smarter. To this end, consider the following solution
+originally developed by Konstantin Ladutenko. We will use the following code:
+ at code
+          GridGenerator::hyper_ball (triangulation);
+
+          static const SphericalManifold<dim> boundary;
+          triangulation.set_all_manifold_ids_on_boundary(0);
+          triangulation.set_manifold (0, boundary);
+
+          const Point<dim> mesh_center;
+          const double core_radius  = 1.0/5.0,
+                       inner_radius = 1.0/3.0;
+
+          // Step 1: Shrink the inner cell
+          //
+          // We cannot get a circle out of the inner cell because of
+          // the degeneration problem mentioned above. Rather, shrink
+          // the inner cell to a core radius of 1/5 that stays
+          // sufficiently far away from the place where the
+          // coefficient will have a discontinuity and where we want
+          // to have cell interfaces that actually lie on a circle.
+          // We do this shrinking by just scaling the location of each
+          // of the vertices, given that the center of the circle is
+          // simply the origin of the coordinate system.
+          for (typename Triangulation<dim>::active_cell_iterator cell = triangulation.begin_active();
+               cell != triangulation.end(); ++cell)
+            if (mesh_center.distance (cell->center()) < 1e-5)
+              {
+                for (unsigned int v=0;
+                     v < GeometryInfo<dim>::vertices_per_cell;
+                     ++v)
+                  cell->vertex(v) *= core_radius/mesh_center.distance (cell->vertex(v));
+              }
+
+          // Step 2: Refine all cells except the central one
+          for (typename Triangulation<dim>::active_cell_iterator cell = triangulation.begin_active();
+               cell != triangulation.end(); ++cell)
+            if (mesh_center.distance (cell->center()) >= 1e-5)
+              cell->set_refine_flag ();
+          triangulation.execute_coarsening_and_refinement ();
+
+
+          // Step 3: Resize the inner children of the outer cells
+          //
+          // The previous step replaced each of the four outer cells
+          // by its four children, but the radial distance at which we
+          // have intersected is not what we want to later refinement
+          // steps. Consequently, move the vertices that were just
+          // created in radial direction to a place where we need
+          // them.
+          for (typename Triangulation<dim>::active_cell_iterator cell = triangulation.begin_active();
+               cell != triangulation.end(); ++cell)
+            for (unsigned int v=0; v < GeometryInfo<dim>::vertices_per_cell; ++v)
+              {
+                const double dist = mesh_center.distance (cell->vertex(v));
+                if (dist > core_radius*1.0001 && dist < 0.9999)
+                  cell->vertex(v) *= inner_radius/dist;
+              }
+
+          // Step 4: Apply curved manifold description
+          //
+          // As discussed above, we can not expect to subdivide the
+          // inner four cells (or their faces) onto concentric rings,
+          // but we can do so for all other cells that are located
+          // outside the inner radius. To this end, we loop over all
+          // cells and determine whether it is in this zone. If it
+          // isn't, then we set the manifold description of the cell
+          // and all of its bounding faces to the one that describes
+          // the spherical manifold already introduced above and that
+          // will be used for all further mesh refinement.
+          for (typename Triangulation<dim>::active_cell_iterator cell = triangulation.begin_active();
+               cell != triangulation.end(); ++cell)
+            {
+              bool is_in_inner_circle = false;
+              for (unsigned int v=0; v < GeometryInfo<2>::vertices_per_cell; ++v)
+                if (mesh_center.distance (cell->vertex(v)) < inner_radius)
+                  {
+                    is_in_inner_circle = true;
+                    break;
+                  }
+
+              if (is_in_inner_circle == false)
+                cell->set_all_manifold_ids (0);
+            }
+ at endcode
+
+This code then generates the following, much better sequence of meshes:
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-0.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-1.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-2.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-3.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-4.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-5.png" alt="">
+</td>
+</tr>
+
+<tr>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-6.png" alt="">
+</td>
+<td>
+  <img src="http://www.dealii.org/images/steps/developer/step-6.manifold-grid-7.png" alt="">
+</td>
+</tr>
+</table>
+ 
+Creating good meshes, and in particular making them fit the geometry you
+want, is a complex topic in itself. You can find much more on this in
+step-49, step-53, and step-54, among other tutorial programs that cover
+the issue. Information on curved domains can also be found in the
+documentation module on @ref manifold "Manifold descriptions".
diff --git a/examples/step-6/doc/tooltip b/examples/step-6/doc/tooltip
new file mode 100644
index 0000000..00262bf
--- /dev/null
+++ b/examples/step-6/doc/tooltip
@@ -0,0 +1 @@
+Adaptive local refinement. Higher order elements.
\ No newline at end of file
diff --git a/examples/step-6/step-6.cc b/examples/step-6/step-6.cc
new file mode 100644
index 0000000..2699df2
--- /dev/null
+++ b/examples/step-6/step-6.cc
@@ -0,0 +1,808 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2000 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 2000
+ */
+
+
+// @sect3{Include files}
+
+// The first few files have already been covered in previous examples and will
+// thus not be further commented on.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+#include <iostream>
+
+// From the following include file we will import the declaration of
+// H1-conforming finite element shape functions. This family of finite
+// elements is called <code>FE_Q</code>, and was used in all examples before
+// already to define the usual bi- or tri-linear elements, but we will now use
+// it for bi-quadratic elements:
+#include <deal.II/fe/fe_q.h>
+// We will not read the grid from a file as in the previous example, but
+// generate it using a function of the library. However, we will want to write
+// out the locally refined grids (just the grid, not the solution) in each
+// step, so we need the following include file instead of
+// <code>grid_in.h</code>:
+#include <deal.II/grid/grid_out.h>
+
+
+// When using locally refined grids, we will get so-called <code>hanging
+// nodes</code>. However, the standard finite element methods assumes that the
+// discrete solution spaces be continuous, so we need to make sure that the
+// degrees of freedom on hanging nodes conform to some constraints such that
+// the global solution is continuous. We are also going to store the boundary
+// conditions in this object. The following file contains a class which is
+// used to handle these constraints:
+#include <deal.II/lac/constraint_matrix.h>
+
+// In order to refine our grids locally, we need a function from the library
+// that decides which cells to flag for refinement or coarsening based on the
+// error indicators we have computed. This function is defined here:
+#include <deal.II/grid/grid_refinement.h>
+
+// Finally, we need a simple way to actually compute the refinement indicators
+// based on some error estimate. While in general, adaptivity is very
+// problem-specific, the error indicator in the following file often yields
+// quite nicely adapted grids for a wide class of problems.
+#include <deal.II/numerics/error_estimator.h>
+
+// Finally, this is as in previous programs:
+using namespace dealii;
+
+
+// @sect3{The <code>Step6</code> class template}
+
+// The main class is again almost unchanged. Two additions, however, are made:
+// we have added the <code>refine_grid</code> function, which is used to
+// adaptively refine the grid (instead of the global refinement in the
+// previous examples), and a variable which will hold the constraints. In
+// addition, we have added a destructor to the class for reasons that will
+// become clear when we discuss its implementation.
+template <int dim>
+class Step6
+{
+public:
+  Step6 ();
+  ~Step6 ();
+
+  void run ();
+
+private:
+  void setup_system ();
+  void assemble_system ();
+  void solve ();
+  void refine_grid ();
+  void output_results (const unsigned int cycle) const;
+
+  Triangulation<dim>   triangulation;
+
+  DoFHandler<dim>      dof_handler;
+  FE_Q<dim>            fe;
+
+  // This is the new variable in the main class. We need an object which holds
+  // a list of constraints to hold the hanging nodes and the boundary
+  // conditions.
+  ConstraintMatrix     constraints;
+
+  SparsityPattern      sparsity_pattern;
+  SparseMatrix<double> system_matrix;
+
+  Vector<double>       solution;
+  Vector<double>       system_rhs;
+};
+
+
+// @sect3{Nonconstant coefficients}
+
+// The implementation of nonconstant coefficients is copied verbatim from
+// step-5:
+
+template <int dim>
+class Coefficient : public Function<dim>
+{
+public:
+  Coefficient () : Function<dim>() {}
+
+  virtual double value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<double>            &values,
+                           const unsigned int              component = 0) const;
+};
+
+
+
+template <int dim>
+double Coefficient<dim>::value (const Point<dim> &p,
+                                const unsigned int) const
+{
+  if (p.square() < 0.5*0.5)
+    return 20;
+  else
+    return 1;
+}
+
+
+
+template <int dim>
+void Coefficient<dim>::value_list (const std::vector<Point<dim> > &points,
+                                   std::vector<double>            &values,
+                                   const unsigned int              component) const
+{
+  const unsigned int n_points = points.size();
+
+  Assert (values.size() == n_points,
+          ExcDimensionMismatch (values.size(), n_points));
+
+  Assert (component == 0,
+          ExcIndexRange (component, 0, 1));
+
+  for (unsigned int i=0; i<n_points; ++i)
+    {
+      if (points[i].square() < 0.5*0.5)
+        values[i] = 20;
+      else
+        values[i] = 1;
+    }
+}
+
+
+// @sect3{The <code>Step6</code> class implementation}
+
+// @sect4{Step6::Step6}
+
+// The constructor of this class is mostly the same as before, but this time
+// we want to use the quadratic element. To do so, we only have to replace the
+// constructor argument (which was <code>1</code> in all previous examples) by
+// the desired polynomial degree (here <code>2</code>):
+template <int dim>
+Step6<dim>::Step6 ()
+  :
+  dof_handler (triangulation),
+  fe (2)
+{}
+
+
+// @sect4{Step6::~Step6}
+
+// Here comes the added destructor of the class. The reason why we want to add
+// it is a subtle change in the order of data elements in the class as
+// compared to all previous examples: the <code>dof_handler</code> object was
+// defined before and not after the <code>fe</code> object. Of course we could
+// have left this order unchanged, but we would like to show what happens if
+// the order is reversed since this produces a rather nasty side-effect and
+// results in an error which is difficult to track down if one does not know
+// what happens.
+//
+// Basically what happens is the following: when we distribute the degrees of
+// freedom using the function call <code>dof_handler.distribute_dofs()</code>,
+// the <code>dof_handler</code> also stores a pointer to the finite element in
+// use. Since this pointer is used every now and then until either the degrees
+// of freedom are re-distributed using another finite element object or until
+// the <code>dof_handler</code> object is destroyed, it would be unwise if we
+// would allow the finite element object to be deleted before the
+// <code>dof_handler</code> object. To disallow this, the DoF handler
+// increases a counter inside the finite element object which counts how many
+// objects use that finite element (this is what the
+// <code>Subscriptor</code>/<code>SmartPointer</code> class pair is used for,
+// in case you want something like this for your own programs; see step-7 for
+// a more complete discussion of this topic). The finite element object will
+// refuse its destruction if that counter is larger than zero, since then some
+// other objects might rely on the persistence of the finite element
+// object. An exception will then be thrown and the program will usually abort
+// upon the attempt to destroy the finite element.
+//
+// To be fair, such exceptions about still used objects are not particularly
+// popular among programmers using deal.II, since they only tell us that
+// something is wrong, namely that some other object is still using the object
+// that is presently being destructed, but most of the time not who this user
+// is. It is therefore often rather time-consuming to find out where the
+// problem exactly is, although it is then usually straightforward to remedy
+// the situation. However, we believe that the effort to find invalid
+// references to objects that do no longer exist is less if the problem is
+// detected once the reference becomes invalid, rather than when non-existent
+// objects are actually accessed again, since then usually only invalid data
+// is accessed, but no error is immediately raised.
+//
+// Coming back to the present situation, if we did not write this destructor,
+// the compiler will generate code that triggers exactly the behavior sketched
+// above. The reason is that member variables of the <code>Step6</code> class
+// are destructed bottom-up (i.e. in reverse order of their declaration in the
+// class), as always in C++. Thus, the finite element object will be
+// destructed before the DoF handler object, since its declaration is below
+// the one of the DoF handler. This triggers the situation above, and an
+// exception will be raised when the <code>fe</code> object is
+// destructed. What needs to be done is to tell the <code>dof_handler</code>
+// object to release its lock to the finite element. Of course, the
+// <code>dof_handler</code> will only release its lock if it really does not
+// need the finite element any more, i.e. when all finite element related data
+// is deleted from it. For this purpose, the <code>DoFHandler</code> class has
+// a function <code>clear</code> which deletes all degrees of freedom, and
+// releases its lock to the finite element. After this, you can safely
+// destruct the finite element object since its internal counter is then zero.
+//
+// For completeness, we add the output of the exception that would have been
+// triggered without this destructor, to the end of the results section of
+// this example.
+template <int dim>
+Step6<dim>::~Step6 ()
+{
+  dof_handler.clear ();
+}
+
+
+// @sect4{Step6::setup_system}
+
+// The next function is setting up all the variables that describe the linear
+// finite element problem, such as the DoF handler, the matrices, and
+// vectors. The difference to what we did in step-5 is only that we now also
+// have to take care of hanging node constraints. These constraints are
+// handled almost transparently by the library, i.e. you only need to know
+// that they exist and how to get them, but you do not have to know how they
+// are formed or what exactly is done with them.
+//
+// At the beginning of the function, you find all the things that are the same
+// as in step-5: setting up the degrees of freedom (this time we have
+// quadratic elements, but there is no difference from a user code perspective
+// to the linear -- or cubic, for that matter -- case), generating the
+// sparsity pattern, and initializing the solution and right hand side
+// vectors. Note that the sparsity pattern will have significantly more
+// entries per row now, since there are now 9 degrees of freedom per cell, not
+// only four, that can couple with each other.
+template <int dim>
+void Step6<dim>::setup_system ()
+{
+  dof_handler.distribute_dofs (fe);
+
+  solution.reinit (dof_handler.n_dofs());
+  system_rhs.reinit (dof_handler.n_dofs());
+
+
+  // After setting up all the degrees of freedoms, here are now the
+  // differences compared to step-5, all of which are related to constraints
+  // associated with the hanging nodes. In the class declaration, we have
+  // already allocated space for an object <code>constraints</code> that will
+  // hold a list of these constraints (they form a matrix, which is reflected
+  // in the name of the class, but that is immaterial for the moment). Now we
+  // have to fill this object. This is done using the following function calls
+  // (the first clears the contents of the object that may still be left over
+  // from computations on the previous mesh before the last adaptive
+  // refinement):
+  constraints.clear ();
+  DoFTools::make_hanging_node_constraints (dof_handler,
+                                           constraints);
+
+
+  // Now we are ready to interpolate the ZeroFunction to our boundary with
+  // indicator 0 (the whole boundary) and store the resulting constraints in
+  // our <code>constraints</code> object. Note that we do not to apply the
+  // boundary conditions after assembly, like we did in earlier steps.  As
+  // almost all the stuff, the interpolation of boundary values works also for
+  // higher order elements without the need to change your code for that. We
+  // note that for proper results, it is important that the elimination of
+  // boundary nodes from the system of equations happens *after* the
+  // elimination of hanging nodes. For that reason we are filling the boundary
+  // values into the ContraintMatrix after the hanging node constraints.
+  VectorTools::interpolate_boundary_values (dof_handler,
+                                            0,
+                                            ZeroFunction<dim>(),
+                                            constraints);
+
+
+  // The next step is <code>closing</code> this object. After all constraints
+  // have been added, they need to be sorted and rearranged to perform some
+  // actions more efficiently. This postprocessing is done using the
+  // <code>close()</code> function, after which no further constraints may be
+  // added any more:
+  constraints.close ();
+
+  // Now we first build our compressed sparsity pattern like we did in the
+  // previous examples. Nevertheless, we do not copy it to the final sparsity
+  // pattern immediately.  Note that we call a variant of
+  // make_sparsity_pattern that takes the ConstraintMatrix as the third
+  // argument. We are letting the routine know that we will never write into
+  // the locations given by <code>constraints</code> by setting the argument
+  // <code>keep_constrained_dofs</code> to false (in other words, that we will
+  // never write into entries of the matrix that correspond to constrained
+  // degrees of freedom). If we were to condense the
+  // constraints after assembling, we would have to pass <code>true</code>
+  // instead because then we would first write into these locations only to
+  // later set them to zero again during condensation.
+  DynamicSparsityPattern dsp(dof_handler.n_dofs());
+  DoFTools::make_sparsity_pattern(dof_handler,
+                                  dsp,
+                                  constraints,
+                                  /*keep_constrained_dofs = */ false);
+
+  // Now all non-zero entries of the matrix are known (i.e. those from
+  // regularly assembling the matrix and those that were introduced by
+  // eliminating constraints). We can thus copy our intermediate object to the
+  // sparsity pattern:
+  sparsity_pattern.copy_from(dsp);
+
+  // Finally, the so-constructed sparsity pattern serves as the basis on top
+  // of which we will create the sparse matrix:
+  system_matrix.reinit (sparsity_pattern);
+}
+
+
+// @sect4{Step6::assemble_system}
+
+// Next, we have to assemble the matrix again. There are two code changes
+// compared to step-5:
+//
+// First, we have to use a higher-order quadrature formula to account for the
+// higher polynomial degree in the finite element shape functions. This is
+// easy to change: the constructor of the <code>QGauss</code> class takes the
+// number of quadrature points in each space direction. Previously, we had two
+// points for bilinear elements. Now we should use three points for
+// biquadratic elements.
+//
+// Second, to copy the local matrix and vector on each cell into the global
+// system, we are no longer using a hand-written loop. Instead, we use
+// <code>ConstraintMatrix::distribute_local_to_global</code> that internally
+// executes this loop and eliminates all the constraints at the same time.
+//
+// The rest of the code that forms the local contributions remains
+// unchanged. It is worth noting, however, that under the hood several things
+// are different than before. First, the variables <code>dofs_per_cell</code>
+// and <code>n_q_points</code> now are 9 each, where they were 4
+// before. Introducing such variables as abbreviations is a good strategy to
+// make code work with different elements without having to change too much
+// code. Secondly, the <code>fe_values</code> object of course needs to do
+// other things as well, since the shape functions are now quadratic, rather
+// than linear, in each coordinate variable. Again, however, this is something
+// that is completely transparent to user code and nothing that you have to
+// worry about.
+template <int dim>
+void Step6<dim>::assemble_system ()
+{
+  const QGauss<dim>  quadrature_formula(3);
+
+  FEValues<dim> fe_values (fe, quadrature_formula,
+                           update_values    |  update_gradients |
+                           update_quadrature_points  |  update_JxW_values);
+
+  const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int   n_q_points    = quadrature_formula.size();
+
+  FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+  Vector<double>       cell_rhs (dofs_per_cell);
+
+  std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+  const Coefficient<dim> coefficient;
+  std::vector<double>    coefficient_values (n_q_points);
+
+  typename DoFHandler<dim>::active_cell_iterator
+  cell = dof_handler.begin_active(),
+  endc = dof_handler.end();
+  for (; cell!=endc; ++cell)
+    {
+      cell_matrix = 0;
+      cell_rhs = 0;
+
+      fe_values.reinit (cell);
+
+      coefficient.value_list (fe_values.get_quadrature_points(),
+                              coefficient_values);
+
+      for (unsigned int q_index=0; q_index<n_q_points; ++q_index)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              cell_matrix(i,j) += (coefficient_values[q_index] *
+                                   fe_values.shape_grad(i,q_index) *
+                                   fe_values.shape_grad(j,q_index) *
+                                   fe_values.JxW(q_index));
+
+            cell_rhs(i) += (fe_values.shape_value(i,q_index) *
+                            1.0 *
+                            fe_values.JxW(q_index));
+          }
+
+      // Finally, transfer the contributions from @p cell_matrix and
+      // @p cell_rhs into the global objects.
+      cell->get_dof_indices (local_dof_indices);
+      constraints.distribute_local_to_global (cell_matrix,
+                                              cell_rhs,
+                                              local_dof_indices,
+                                              system_matrix,
+                                              system_rhs);
+    }
+  // Now we are done assembling the linear system. The constraint matrix took
+  // care of applying the boundary conditions and also eliminated hanging node
+  // constraints. The constrained nodes are still in the linear system (there
+  // is a one on the diagonal of the matrix and all other entries for this
+  // line are set to zero) but the computed values are invalid. We compute the
+  // correct values for these nodes at the end of the <code>solve</code>
+  // function.
+}
+
+
+// @sect4{Step6::solve}
+
+// We continue with gradual improvements. The function that solves the linear
+// system again uses the SSOR preconditioner, and is again unchanged except
+// that we have to incorporate hanging node constraints. As mentioned above,
+// the degrees of freedom from the ConstraintMatrix corresponding to hanging
+// node constraints and boundary values have been removed from the linear
+// system by giving the rows and columns of the matrix a special
+// treatment. This way, the values for these degrees of freedom have wrong,
+// but well-defined values after solving the linear system. What we then have
+// to do is to use the constraints to assign to them the values that they
+// should have. This process, called <code>distributing</code> constraints,
+// computes the values of constrained nodes from the values of the
+// unconstrained ones, and requires only a single additional function call
+// that you find at the end of this function:
+
+template <int dim>
+void Step6<dim>::solve ()
+{
+  SolverControl      solver_control (1000, 1e-12);
+  SolverCG<>         solver (solver_control);
+
+  PreconditionSSOR<> preconditioner;
+  preconditioner.initialize(system_matrix, 1.2);
+
+  solver.solve (system_matrix, solution, system_rhs,
+                preconditioner);
+
+  constraints.distribute (solution);
+}
+
+
+// @sect4{Step6::refine_grid}
+
+// Instead of global refinement, we now use a slightly more elaborate
+// scheme. We will use the <code>KellyErrorEstimator</code> class which
+// implements an error estimator for the Laplace equation; it can in principle
+// handle variable coefficients, but we will not use these advanced features,
+// but rather use its most simple form since we are not interested in
+// quantitative results but only in a quick way to generate locally refined
+// grids.
+//
+// Although the error estimator derived by Kelly et al. was originally
+// developed for the Laplace equation, we have found that it is also well
+// suited to quickly generate locally refined grids for a wide class of
+// problems. Basically, it looks at the jumps of the gradients of the solution
+// over the faces of cells (which is a measure for the second derivatives) and
+// scales it by the size of the cell. It is therefore a measure for the local
+// smoothness of the solution at the place of each cell and it is thus
+// understandable that it yields reasonable grids also for hyperbolic
+// transport problems or the wave equation as well, although these grids are
+// certainly suboptimal compared to approaches specially tailored to the
+// problem. This error estimator may therefore be understood as a quick way to
+// test an adaptive program.
+//
+// The way the estimator works is to take a <code>DoFHandler</code> object
+// describing the degrees of freedom and a vector of values for each degree of
+// freedom as input and compute a single indicator value for each active cell
+// of the triangulation (i.e. one value for each of the
+// <code>triangulation.n_active_cells()</code> cells). To do so, it needs two
+// additional pieces of information: a quadrature formula on the faces
+// (i.e. quadrature formula on <code>dim-1</code> dimensional objects. We use
+// a 3-point Gauss rule again, a pick that is consistent and appropriate with
+// the choice bi-quadratic finite element shape functions in this program.
+// (What constitutes a suitable quadrature rule here of course depends on
+// knowledge of the way the error estimator evaluates the solution field. As
+// said above, the jump of the gradient is integrated over each face, which
+// would be a quadratic function on each face for the quadratic elements in
+// use in this example. In fact, however, it is the square of the jump of the
+// gradient, as explained in the documentation of that class, and that is a
+// quartic function, for which a 3 point Gauss formula is sufficient since it
+// integrates polynomials up to order 5 exactly.)
+//
+// Secondly, the function wants a list of boundary indicators for those
+// boundaries where we have imposed Neumann values of the kind
+// $\partial_n u(\mathbf x) = h(\mathbf x)$, along with a function $h(\mathbf x)$
+// for each such boundary. This information is
+// represented by an object of type <code>FunctionMap::type</code> that is
+// a typedef to a map from boundary indicators to function objects describing
+// the Neumann boundary values. In the present example program, we do not use
+// Neumann boundary values, so this map is empty, and in fact constructed
+// using the default constructor of the map in the place where the function
+// call expects the respective function argument.
+//
+// The output, as mentioned is a vector of values for all cells. While it may
+// make sense to compute the <b>value</b> of a solution degree of freedom
+// very accurately, it is usually not necessary to compute the <b>error indicator</b>
+// corresponding to the solution on a cell particularly accurately. We therefore
+// typically use a vector of floats instead of a vector of doubles to represent
+// error indicators.
+template <int dim>
+void Step6<dim>::refine_grid ()
+{
+  Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+  KellyErrorEstimator<dim>::estimate (dof_handler,
+                                      QGauss<dim-1>(3),
+                                      typename FunctionMap<dim>::type(),
+                                      solution,
+                                      estimated_error_per_cell);
+
+  // The above function returned one error indicator value for each cell in
+  // the <code>estimated_error_per_cell</code> array. Refinement is now done
+  // as follows: refine those 30 per cent of the cells with the highest error
+  // values, and coarsen the 3 per cent of cells with the lowest values.
+  //
+  // One can easily verify that if the second number were zero, this would
+  // approximately result in a doubling of cells in each step in two space
+  // dimensions, since for each of the 30 per cent of cells, four new would be
+  // replaced, while the remaining 70 per cent of cells remain untouched. In
+  // practice, some more cells are usually produced since it is disallowed
+  // that a cell is refined twice while the neighbor cell is not refined; in
+  // that case, the neighbor cell would be refined as well.
+  //
+  // In many applications, the number of cells to be coarsened would be set to
+  // something larger than only three per cent. A non-zero value is useful
+  // especially if for some reason the initial (coarse) grid is already rather
+  // refined. In that case, it might be necessary to refine it in some
+  // regions, while coarsening in some other regions is useful. In our case
+  // here, the initial grid is very coarse, so coarsening is only necessary in
+  // a few regions where over-refinement may have taken place. Thus a small,
+  // non-zero value is appropriate here.
+  //
+  // The following function now takes these refinement indicators and flags
+  // some cells of the triangulation for refinement or coarsening using the
+  // method described above. It is from a class that implements several
+  // different algorithms to refine a triangulation based on cell-wise error
+  // indicators.
+  GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                   estimated_error_per_cell,
+                                                   0.3, 0.03);
+
+  // After the previous function has exited, some cells are flagged for
+  // refinement, and some other for coarsening. The refinement or coarsening
+  // itself is not performed by now, however, since there are cases where
+  // further modifications of these flags is useful. Here, we don't want to do
+  // any such thing, so we can tell the triangulation to perform the actions
+  // for which the cells are flagged:
+  triangulation.execute_coarsening_and_refinement ();
+}
+
+
+// @sect4{Step6::output_results}
+
+// At the end of computations on each grid, and just before we continue the
+// next cycle with mesh refinement, we want to output the results from this
+// cycle.
+//
+// In the present program, we will not write the solution (except for in the
+// last step, see the next function), but only the meshes that we generated,
+// as a two-dimensional Encapsulated Postscript (EPS) file.
+//
+// We have already seen in step-1 how this can be achieved. The only thing we
+// have to change is the generation of the file name, since it should contain
+// the number of the present refinement cycle provided to this function as an
+// argument. The most general way is to use the std::stringstream class as
+// shown in step-5, but here's a little hack that makes it simpler if we know
+// that we have less than 10 iterations: assume that the %numbers `0' through
+// `9' are represented consecutively in the character set used on your machine
+// (this is in fact the case in all known character sets), then '0'+cycle
+// gives the character corresponding to the present cycle number. Of course,
+// this will only work if the number of cycles is actually less than 10, and
+// rather than waiting for the disaster to happen, we safeguard our little
+// hack with an explicit assertion at the beginning of the function. If this
+// assertion is triggered, i.e. when <code>cycle</code> is larger than or
+// equal to 10, an exception of type <code>ExcNotImplemented</code> is raised,
+// indicating that some functionality is not implemented for this case (the
+// functionality that is missing, of course, is the generation of file names
+// for that case):
+template <int dim>
+void Step6<dim>::output_results (const unsigned int cycle) const
+{
+  Assert (cycle < 10, ExcNotImplemented());
+
+  std::string filename = "grid-";
+  filename += ('0' + cycle);
+  filename += ".eps";
+
+  std::ofstream output (filename.c_str());
+
+  GridOut grid_out;
+  grid_out.write_eps (triangulation, output);
+}
+
+
+// @sect4{Step6::run}
+
+// The final function before <code>main()</code> is again the main driver of
+// the class, <code>run()</code>. It is similar to the one of step-5, except
+// that we generate a file in the program again instead of reading it from
+// disk, in that we adaptively instead of globally refine the mesh, and that
+// we output the solution on the final mesh in the present function.
+//
+// The first block in the main loop of the function deals with mesh
+// generation. If this is the first cycle of the program, instead of reading
+// the grid from a file on disk as in the previous example, we now again
+// create it using a library function. The domain is again a circle, which is
+// why we have to provide a suitable boundary object as well. We place the
+// center of the circle at the origin and have the radius be one (these are
+// the two hidden arguments to the function, which have default values).
+//
+// You will notice by looking at the coarse grid that it is of inferior
+// quality than the one which we read from the file in the previous example:
+// the cells are less equally formed. However, using the library function this
+// program works in any space dimension, which was not the case before.
+//
+// In case we find that this is not the first cycle, we want to refine the
+// grid. Unlike the global refinement employed in the last example program, we
+// now use the adaptive procedure described above.
+//
+// The rest of the loop looks as before:
+template <int dim>
+void Step6<dim>::run ()
+{
+  for (unsigned int cycle=0; cycle<8; ++cycle)
+    {
+      std::cout << "Cycle " << cycle << ':' << std::endl;
+
+      if (cycle == 0)
+        {
+          GridGenerator::hyper_ball (triangulation);
+
+          static const SphericalManifold<dim> boundary;
+          triangulation.set_all_manifold_ids_on_boundary(0);
+          triangulation.set_manifold (0, boundary);
+
+          triangulation.refine_global (1);
+        }
+      else
+        refine_grid ();
+
+
+      std::cout << "   Number of active cells:       "
+                << triangulation.n_active_cells()
+                << std::endl;
+
+      setup_system ();
+
+      std::cout << "   Number of degrees of freedom: "
+                << dof_handler.n_dofs()
+                << std::endl;
+
+      assemble_system ();
+      solve ();
+      output_results (cycle);
+    }
+
+  // After we have finished computing the solution on the finest mesh, and
+  // writing all the grids to disk, we want to also write the actual solution
+  // on this final mesh to a file. As already done in one of the previous
+  // examples, we use the EPS format for output, and to obtain a reasonable
+  // view on the solution, we rescale the z-axis by a factor of four.
+  DataOutBase::EpsFlags eps_flags;
+  eps_flags.z_scaling = 4;
+
+  DataOut<dim> data_out;
+  data_out.set_flags (eps_flags);
+
+  data_out.attach_dof_handler (dof_handler);
+  data_out.add_data_vector (solution, "solution");
+  data_out.build_patches ();
+
+  std::ofstream output ("final-solution.eps");
+  data_out.write_eps (output);
+}
+
+
+// @sect3{The <code>main</code> function}
+
+// The main function is unaltered in its functionality from the previous
+// example, but we have taken a step of additional caution. Sometimes,
+// something goes wrong (such as insufficient disk space upon writing an
+// output file, not enough memory when trying to allocate a vector or a
+// matrix, or if we can't read from or write to a file for whatever reason),
+// and in these cases the library will throw exceptions. Since these are
+// run-time problems, not programming errors that can be fixed once and for
+// all, this kind of exceptions is not switched off in optimized mode, in
+// contrast to the <code>Assert</code> macro which we have used to test
+// against programming errors. If uncaught, these exceptions propagate the
+// call tree up to the <code>main</code> function, and if they are not caught
+// there either, the program is aborted. In many cases, like if there is not
+// enough memory or disk space, we can't do anything but we can at least print
+// some text trying to explain the reason why the program failed. A way to do
+// so is shown in the following. It is certainly useful to write any larger
+// program in this way, and you can do so by more or less copying this
+// function except for the <code>try</code> block that actually encodes the
+// functionality particular to the present application.
+int main ()
+{
+
+  // The general idea behind the layout of this function is as follows: let's
+  // try to run the program as we did before...
+  try
+    {
+      Step6<2> laplace_problem_2d;
+      laplace_problem_2d.run ();
+    }
+  // ...and if this should fail, try to gather as much information as
+  // possible. Specifically, if the exception that was thrown is an object of
+  // a class that is derived from the C++ standard class
+  // <code>exception</code>, then we can use the <code>what</code> member
+  // function to get a string which describes the reason why the exception was
+  // thrown.
+  //
+  // The deal.II exception classes are all derived from the standard class,
+  // and in particular, the <code>exc.what()</code> function will return
+  // approximately the same string as would be generated if the exception was
+  // thrown using the <code>Assert</code> macro. You have seen the output of
+  // such an exception in the previous example, and you then know that it
+  // contains the file and line number of where the exception occurred, and
+  // some other information. This is also what the following statements would
+  // print.
+  //
+  // Apart from this, there isn't much that we can do except exiting the
+  // program with an error code (this is what the <code>return 1;</code>
+  // does):
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  // If the exception that was thrown somewhere was not an object of a class
+  // derived from the standard <code>exception</code> class, then we can't do
+  // anything at all. We then simply print an error message and exit.
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  // If we got to this point, there was no exception which propagated up to
+  // the main function (there may have been exceptions, but they were caught
+  // somewhere in the program or the library). Therefore, the program
+  // performed as was expected and we can return without error.
+  return 0;
+}
diff --git a/examples/step-7/CMakeLists.txt b/examples/step-7/CMakeLists.txt
new file mode 100644
index 0000000..e53c5a6
--- /dev/null
+++ b/examples/step-7/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-7 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-7")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-7/doc/builds-on b/examples/step-7/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-7/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-7/doc/intro.dox b/examples/step-7/doc/intro.dox
new file mode 100644
index 0000000..a2cd244
--- /dev/null
+++ b/examples/step-7/doc/intro.dox
@@ -0,0 +1,221 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+In this program, we will mainly consider two aspects:
+<ol>
+  <li> Verification of correctness of the program and generation of convergence
+  tables;
+  <li> Non-homogeneous Neumann boundary conditions for the Helmholtz equation.
+</ol>
+Besides these topics, again a variety of improvements and tricks will be
+shown. 
+
+<h3>Verification of correctness</h3>
+
+There has probably never been a
+non-trivial finite element program that worked right from the start. It is
+therefore necessary to find ways to verify whether a computed solution is
+correct or not. Usually, this is done by choosing the set-up of a simulation
+in such a way that we know the exact continuous solution and evaluate the difference
+between continuous and computed discrete solution. If this difference
+converges to zero with the right order of convergence, this is already a good
+indication of correctness, although there may be other sources of error
+persisting which have only a small contribution to the total error or are of
+higher order. In the context of finite element simulations, this technique
+is often called the <i>Method of Manufactured Solution</i>.
+
+In this example, we will not go into the theories of systematic software
+verification which is a very complicated problem. Rather we will demonstrate
+the tools which deal.II can offer in this respect. This is basically centered
+around the functionality of a single function, VectorTools::integrate_difference().
+This function computes the difference between a given continuous function and
+a finite element field in various norms on each cell. At the time of writing
+this tutorial program, the norms this function can compute are the following,
+where $u$ denotes the continuous function
+and $u_h$ the finite element field, and $K$ is an element of the
+triangulation:
+ at f{eqnarray*}
+  {\| u-u_h \|}_{L_1(K)} &=& \int_K |u-u_h| \; dx,
+  \\
+  {\| u-u_h \|}_{L_2(K)} &=& \left( \int_K |u-u_h|^2 \; dx \right)^{1/2},
+  \\
+  {\| u-u_h \|}_{L_\infty(K)} &=& \max_{x  \in K} |u(x) - u_h(x)|,
+  \\
+  {| u-u_h |}_{H^1(K)} &=& \left( \int_K |\nabla(u-u_h)|^2 \; dx \right)^{1/2},
+  \\
+  {\| u-u_h \|}_{H^1(K)} &=& \left( {\| u-u_h \|}^2_{L_2(K)} 
+                                   +{| u-u_h |}^2_{H^1(K)}    \right)^{1/2}.
+ at f}
+(All these norms and semi-norms can also be evaluated with weighting functions,
+for example in order to exclude singularities from the determination of the
+global error, and the function also works for vector-valued functions.) Of
+course, like with any other integral, we can only evaluate these norms using quadrature formulas;
+the choice of the right quadrature formula is therefore crucial to the
+accurate evaluation of the error. This holds in particular for the $L_\infty$
+norm, where we evaluate the maximal deviation of numerical and exact solution
+only at the quadrature points; one should then not try to use a quadrature
+rule with points only at points where super-convergence might occur, such as
+the Gauss points of the lowest-order Gauss quadrature formula for which the
+integrals in the assembly of the matrix is correct (e.g., for linear elements,
+do not use the QGauss(2) quadrature formula). In fact, this is generally good 
+advice also for the other norms: if your quadrature points are fortuitously
+chosen at locations where the error happens to be particularly small due to
+superconvergence, the computed error will look like it is much smaller than
+it really is and may even suggest a higher convergence order. Consequently,
+we will choose a different quadrature formula for the integration of these
+error norms than for the assembly of the linear system.
+
+The function VectorTools::integrate_difference() evaluates the desired norm on each
+cell $K$ of the triangulation and returns a vector which holds these
+values for each cell. From the local values, we can then obtain the global error. For
+example, if the vector $(e_i)$ contains the local $L_2$ norms, then
+ at f[
+  E = \| {\mathbf e} \| = \left( \sum_i e_i^2 \right)^{1/2}
+ at f]
+is the global $L_2$ error.
+
+In the program, we will show how to evaluate and use these quantities, and we
+will monitor their values under mesh refinement. Of course, we have to choose
+the problem at hand such that we can explicitly state the solution and its
+derivatives, but since we want to evaluate the correctness of the program,
+this is only reasonable. If we know that the program produces the correct
+solution for one (or, if one wants to be really sure: many) specifically
+chosen right hand sides, we can be rather confident that it will also compute
+the correct solution for problems where we don't know the exact values.
+
+In addition to simply computing these quantities, we will show how to generate
+nicely formatted tables from the data generated by this program that
+automatically computes convergence rates etc. In addition, we will compare
+different strategies for mesh refinement.
+
+
+<h3>Non-homogeneous Neumann boundary conditions</h3>
+
+The second, totally
+unrelated, subject of this example program is the use of non-homogeneous
+boundary conditions. These are included into the variational form using
+boundary integrals which we have to evaluate numerically when assembling the
+right hand side vector.
+
+Before we go into programming, let's have a brief look at the mathematical
+formulation. The equation that we want to solve here is the Helmholtz equation
+"with the nice sign":
+ at f[
+  -\Delta u + u = f,
+ at f]
+on the square $[-1,1]^2$, augmented by boundary conditions
+ at f[
+  u = g_1
+ at f]
+on some part $\Gamma_1$ of the boundary $\Gamma$, and
+ at f[
+  {\mathbf n}\cdot \nabla u = g_2
+ at f]
+on the rest $\Gamma_2 = \Gamma \backslash \Gamma_1$.
+In our particular testcase, we will use $\Gamma_1=\Gamma \cap\{\{x=1\} \cup \{y=1\}\}$.
+
+Because we want to verify the convergence of our numerical solution $u_h$,
+we want a setup so that we know the exact solution $u$. This is where
+the Method of Manufactured Solutions comes in. To this end, let us 
+choose a function
+ at f[
+  \bar u(x) = \sum_{i=1}^3 \exp\left(-\frac{|x-x_i|^2}{\sigma^2}\right)
+ at f]
+where the centers $x_i$ of the exponentials are 
+  $x_1=(-\frac 12,\frac 12)$,
+  $x_2=(-\frac 12,-\frac 12)$, and
+  $x_3=(\frac 12,-\frac 12)$,
+and the half width is set to $\sigma=\frac {1}{8}$. The method of manufactured
+solution then says: choose
+ at f{align*}
+  f &= -\Delta \bar u + \bar u, \\
+  g_1 &= \bar u|_{\Gamma_1}, \\
+  g_2 &= {\mathbf n}\cdot \nabla\bar u|_{\Gamma_2}.
+ at f}
+With this particular choice, we infer that of course the solution of the
+original problem happens to be $u=\bar u$. In other words, by choosing
+the right hand sides of the equation and the boundary conditions in a
+particular way, we have manufactured ourselves a problem to which we
+know the solution. This allows us then to compute the error of our
+numerical solution. In the code below, we represent $\bar u$ by the
+<code>Solution</code> class, and other classes will be used to
+denote $\bar u|_{\Gamma_1}$ and ${\mathbf n}\cdot \nabla\bar u|_{\Gamma_2}$.
+
+Using the above definitions, we can state the weak formulation of the
+equation, which reads: find $u\in H^1_g=\{v\in H^1: v|_{\Gamma_1}=g_1\}$ such
+that
+ at f[
+  {(\nabla u, \nabla v)}_\Omega + {(u,v)}_\Omega
+  =
+  {(f,v)}_\Omega + {(g_2,v)}_{\Gamma_2}
+ at f]
+for all test functions $v\in H^1_0=\{v\in H^1: v|_{\Gamma_1}=0\}$. The
+boundary term ${(g_2,v)}_{\Gamma_2}$ has appeared by integration by parts and
+using $\partial_n u=g_2$ on $\Gamma_2$ and $v=0$ on $\Gamma_1$. The cell
+matrices and vectors which we use to build the global matrices and right hand
+side vectors in the discrete formulation therefore look like this:
+ at f{eqnarray*}
+  A_{ij}^K &=& \left(\nabla \varphi_i, \nabla \varphi_j\right)_K
+              +\left(\varphi_i, \varphi_j\right)_K,
+  \\
+  f_i^K &=& \left(f,\varphi_i\right)_K
+           +\left(g_2, \varphi_i\right)_{\partial K\cap \Gamma_2}.
+ at f}
+Since the generation of the domain integrals has been shown in previous
+examples several times, only the generation of the contour integral is of
+interest here. It basically works along the following lines: for domain
+integrals we have the <code>FEValues</code> class that provides values and
+gradients of the shape values, as well as Jacobian determinants and other
+information and specified quadrature points in the cell; likewise, there is a
+class <code>FEFaceValues</code> that performs these tasks for integrations on
+faces of cells. One provides it with a quadrature formula for a manifold with
+dimension one less than the dimension of the domain is, and the cell and the
+number of its face on which we want to perform the integration. The class will
+then compute the values, gradients, normal vectors, weights, etc. at the
+quadrature points on this face, which we can then use in the same way as for
+the domain integrals. The details of how this is done are shown in the
+following program.
+
+
+<h3>A note on good programming practice</h3>
+
+Besides the mathematical topics outlined above, we also want to use this
+program to illustrate one aspect of good programming practice, namely the use
+of namespaces. In programming the deal.II library, we have take great care not
+to use names for classes and global functions that are overly generic, say
+<code>f(), sz(), rhs()</code> etc. Furthermore, we have put everything into
+namespace <code>dealii</code>. But when one writes application programs that
+aren't meant for others to use, one doesn't always pay this much attention. If
+you follow the programming style of step-1 through step-6, these functions
+then end up in the global namespace where, unfortunately, a lot of other stuff
+also lives (basically everything the C language provides, along with
+everything you get from the operating system through header files). To make
+things a bit worse, the designers of the C language were also not always
+careful in avoiding generic names; for example, the symbols <code>j1,
+jn</code> are defined in C header files (they denote Bessel functions).
+
+To avoid the problems that result if names of different functions or variables
+collide (often with confusing error messages), it is good practice to put
+everything you do into a <a
+href="http://en.wikipedia.org/wiki/Namespace_(computer_science)">namespace</a>. Following
+this style, we will open a namespace <code>Step7</code> at the top of the
+program, import the deal.II namespace into it, put everything that's specific
+to this program (with the exception of <code>main()</code>, which must be in
+the global namespace) into it, and only close it at the bottom of the file. In
+other words, the structure of the program is of the kind
+ at code
+  ... #includes
+
+  namespace Step7
+  {
+    using namespace dealii;
+
+    ...everything to do with the program...
+  }
+
+  int main ()
+  {
+    ...do whatever main() does...
+  }
+ at endcode
+We will follow this scheme throughout the remainder of the deal.II tutorial.
diff --git a/examples/step-7/doc/kind b/examples/step-7/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-7/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-7/doc/results.dox b/examples/step-7/doc/results.dox
new file mode 100644
index 0000000..e5fbede
--- /dev/null
+++ b/examples/step-7/doc/results.dox
@@ -0,0 +1,218 @@
+<h1>Results</h1>
+
+
+The program generates two kinds of output. The first are the output
+files <code>solution-adaptive-q1.vtk</code>,
+<code>solution-global-q1.vtk</code>, and
+<code>solution-global-q2.vtk</code>. We show the latter in a 3d view
+here:
+
+
+<img src="http://www.dealii.org/images/steps/developer/step-7.solution.png" alt="">
+
+
+
+
+Secondly, the program writes tables not only to disk, but also to the
+screen while running. The output looks like the following (recall that
+columns labeled as "<code>H1</code>" actually show the $H^1$ <i>semi-</i>norm
+of the error, not the full $H^1$ norm):
+
+
+ at code
+examples/\step-7> make run
+Solving with Q1 elements, adaptive refinement
+=============================================
+
+Cycle 0:
+   Number of active cells:       64
+   Number of degrees of freedom: 81
+Cycle 1:
+   Number of active cells:       124
+   Number of degrees of freedom: 157
+Cycle 2:
+   Number of active cells:       280
+   Number of degrees of freedom: 341
+Cycle 3:
+   Number of active cells:       577
+   Number of degrees of freedom: 690
+Cycle 4:
+   Number of active cells:       1099
+   Number of degrees of freedom: 1264
+Cycle 5:
+   Number of active cells:       2191
+   Number of degrees of freedom: 2452
+Cycle 6:
+   Number of active cells:       4165
+   Number of degrees of freedom: 4510
+Cycle 7:
+   Number of active cells:       7915
+   Number of degrees of freedom: 8440
+Cycle 8:
+   Number of active cells:       15196
+   Number of degrees of freedom: 15912
+
+cycle cells dofs     L2        H1      Linfty   
+    0    64    81 1.576e-01 1.418e+00 2.707e-01 
+    1   124   157 4.285e-02 1.285e+00 1.469e-01 
+    2   280   341 1.593e-02 7.909e-01 8.034e-02 
+    3   577   690 9.359e-03 5.096e-01 2.784e-02 
+    4  1099  1264 2.865e-03 3.038e-01 9.822e-03 
+    5  2191  2452 1.480e-03 2.106e-01 5.679e-03 
+    6  4165  4510 6.907e-04 1.462e-01 2.338e-03 
+    7  7915  8440 4.743e-04 1.055e-01 1.442e-03 
+    8 15196 15912 1.920e-04 7.468e-02 7.259e-04 
+
+Solving with Q1 elements, global refinement
+===========================================
+
+Cycle 0:
+   Number of active cells:       64
+   Number of degrees of freedom: 81
+Cycle 1:
+   Number of active cells:       256
+   Number of degrees of freedom: 289
+Cycle 2:
+   Number of active cells:       1024
+   Number of degrees of freedom: 1089
+Cycle 3:
+   Number of active cells:       4096
+   Number of degrees of freedom: 4225
+Cycle 4:
+   Number of active cells:       16384
+   Number of degrees of freedom: 16641
+
+cycle cells dofs     L2        H1      Linfty   
+    0    64    81 1.576e-01 1.418e+00 2.707e-01 
+    1   256   289 4.280e-02 1.285e+00 1.444e-01 
+    2  1024  1089 1.352e-02 7.556e-01 7.772e-02 
+    3  4096  4225 3.423e-03 3.822e-01 2.332e-02 
+    4 16384 16641 8.586e-04 1.917e-01 6.097e-03 
+
+n cells         H1                  L2          
+0    64 1.418e+00    -    - 1.576e-01    -    - 
+1   256 1.285e+00 1.10 0.14 4.280e-02 3.68 1.88 
+2  1024 7.556e-01 1.70 0.77 1.352e-02 3.17 1.66 
+3  4096 3.822e-01 1.98 0.98 3.423e-03 3.95 1.98 
+4 16384 1.917e-01 1.99 1.00 8.586e-04 3.99 2.00 
+
+Solving with Q2 elements, global refinement
+===========================================
+
+Cycle 0:
+   Number of active cells:       64
+   Number of degrees of freedom: 289
+Cycle 1:
+   Number of active cells:       256
+   Number of degrees of freedom: 1089
+Cycle 2:
+   Number of active cells:       1024
+   Number of degrees of freedom: 4225
+Cycle 3:
+   Number of active cells:       4096
+   Number of degrees of freedom: 16641
+Cycle 4:
+   Number of active cells:       16384
+   Number of degrees of freedom: 66049
+
+cycle cells dofs     L2        H1      Linfty   
+    0    64   289 1.606e-01 1.278e+00 3.029e-01 
+    1   256  1089 7.638e-03 5.248e-01 4.816e-02 
+    2  1024  4225 8.601e-04 1.086e-01 4.827e-03 
+    3  4096 16641 1.107e-04 2.756e-02 7.802e-04 
+    4 16384 66049 1.393e-05 6.915e-03 9.971e-05 
+
+n cells         H1                   L2          
+0    64 1.278e+00    -    - 1.606e-01     -    - 
+1   256 5.248e-01 2.43 1.28 7.638e-03 21.03 4.39 
+2  1024 1.086e-01 4.83 2.27 8.601e-04  8.88 3.15 
+3  4096 2.756e-02 3.94 1.98 1.107e-04  7.77 2.96 
+4 16384 6.915e-03 3.99 1.99 1.393e-05  7.94 2.99 
+
+Solving with Q2 elements, adaptive refinement
+===========================================
+
+Cycle 0:
+   Number of active cells:       64
+   Number of degrees of freedom: 289
+Cycle 1:
+   Number of active cells:       124
+   Number of degrees of freedom: 577
+Cycle 2:
+   Number of active cells:       289
+   Number of degrees of freedom: 1353
+Cycle 3:
+   Number of active cells:       547
+   Number of degrees of freedom: 2531
+Cycle 4:
+   Number of active cells:       1057
+   Number of degrees of freedom: 4919
+Cycle 5:
+   Number of active cells:       2059
+   Number of degrees of freedom: 9223
+Cycle 6:
+   Number of active cells:       3913
+   Number of degrees of freedom: 17887
+Cycle 7:
+   Number of active cells:       7441
+   Number of degrees of freedom: 33807
+Cycle 8:
+   Number of active cells:       14212
+   Number of degrees of freedom: 64731
+
+cycle cells dofs     L2        H1      Linfty   
+    0    64   289 1.606e-01 1.278e+00 3.029e-01 
+    1   124   577 7.891e-03 5.256e-01 4.852e-02 
+    2   289  1353 1.070e-03 1.155e-01 4.868e-03 
+    3   547  2531 5.962e-04 5.101e-02 1.876e-03 
+    4  1057  4919 1.977e-04 3.094e-02 7.923e-04 
+    5  2059  9223 7.738e-05 1.974e-02 7.270e-04 
+    6  3913 17887 2.925e-05 8.772e-03 1.463e-04 
+    7  7441 33807 1.024e-05 4.121e-03 8.567e-05 
+    8 14212 64731 3.761e-06 2.108e-03 2.167e-05 
+ at endcode
+
+
+One can see the error reduction upon grid refinement, and for the
+cases where global refinement was performed, also the convergence
+rates can be seen. The linear and quadratic convergence rates of Q1
+and Q2 elements in the $H^1$ semi-norm can clearly be seen, as
+are the quadratic and cubic rates in the $L_2$ norm.
+
+
+
+
+Finally, the program also generated LaTeX versions of the tables (not shown
+here).
+
+
+<h3> Possible extensions </h3>
+
+<h4> Higher Order Elements </h4>
+
+Go ahead and run the program with higher order elements (Q3, Q4, ...). You
+will notice that assertions in several parts of the code will trigger (for
+example in the generation of the filename for the data output). After fixing
+these you will not see the correct convergence orders that the theory
+predicts. This is because the orders for the quadrature formulas are
+hard-coded in this program and this order is not enough for higher order
+discretizations. What is a good way to pick the orders dynamically?
+
+<h4> Convergence Comparison </h4>
+
+Is Q1 or Q2 better? What about adaptive versus global refinement? A (somewhat
+unfair but typical) metric to compare them, is to look at the error as a
+function of the number of unknowns.
+
+To see this, create a plot in log-log style with the number of unknowns on the
+x axis and the L2 error on the y axis. You can add reference lines for
+$h^2=N^{-1}$ and $h^3=N^{-3/2}$ and check that global and adaptive refinement
+follow those. 
+
+Note that changing the half width of the peaks influences if adaptive or
+global refinement is more efficient (if the solution is very smooth, local
+refinement does not give any advantage over global refinement). Verify this.
+
+Finally, a more fair comparison would be to plot runtime (switch to release
+mode first!) instead of number of unknowns on the x axis. Picking a better
+linear solver might be appropriate though.
diff --git a/examples/step-7/doc/tooltip b/examples/step-7/doc/tooltip
new file mode 100644
index 0000000..cc5ef58
--- /dev/null
+++ b/examples/step-7/doc/tooltip
@@ -0,0 +1 @@
+Helmholtz equation. Computing errors. Boundary integrals.
diff --git a/examples/step-7/step-7.cc b/examples/step-7/step-7.cc
new file mode 100644
index 0000000..69a19b9
--- /dev/null
+++ b/examples/step-7/step-7.cc
@@ -0,0 +1,1404 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2000 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth and Ralf Hartmann, University of Heidelberg, 2000
+ */
+
+
+// @sect3{Include files}
+
+// These first include files have all been treated in previous examples, so we
+// won't explain what is in them again.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/data_out.h>
+
+// In this example, we will not use the numeration scheme which is used per
+// default by the DoFHandler class, but will renumber them using the
+// Cuthill-McKee algorithm. As has already been explained in step-2, the
+// necessary functions are declared in the following file:
+#include <deal.II/dofs/dof_renumbering.h>
+// Then we will show a little trick how we can make sure that objects are not
+// deleted while they are still in use. For this purpose, deal.II has the
+// SmartPointer helper class, which is declared in this file:
+#include <deal.II/base/smartpointer.h>
+// Next, we will want to use the function VectorTools::integrate_difference()
+// mentioned in the introduction, and we are going to use a ConvergenceTable
+// that collects all important data during a run and prints it at the end as a
+// table. These comes from the following two files:
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/base/convergence_table.h>
+// And finally, we need to use the FEFaceValues class, which is declared in
+// the same file as the FEValues class:
+#include <deal.II/fe/fe_values.h>
+
+// We need one more include from standard C++, which is necessary when we try
+// to find out the actual type behind a pointer to a base class. We will
+// explain this in slightly more detail below. The other two include files are
+// obvious then:
+#include <typeinfo>
+#include <fstream>
+#include <iostream>
+
+// The last step before we go on with the actual implementation is to open a
+// namespace <code>Step7</code> into which we will put everything, as
+// discussed at the end of the introduction, and to import the members of
+// namespace <code>dealii</code> into it:
+namespace Step7
+{
+  using namespace dealii;
+
+  // @sect3{Equation data}
+
+  // Before implementing the classes that actually solve something, we first
+  // declare and define some function classes that represent right hand side
+  // and solution classes. Since we want to compare the numerically obtained
+  // solution to the exact continuous one, we need a function object that
+  // represents the continuous solution. On the other hand, we need the right
+  // hand side function, and that one of course shares some characteristics
+  // with the solution. In order to reduce dependencies which arise if we have
+  // to change something in both classes at the same time, we move the common
+  // characteristics of both functions into a base class.
+  //
+  // The common characteristics for solution (as explained in the
+  // introduction, we choose a sum of three exponentials) and right hand side,
+  // are these: the number of exponentials, their centers, and their half
+  // width. We declare them in the following class. Since the number of
+  // exponentials is a constant scalar integral quantity, C++ allows its
+  // definition (i.e. assigning a value) right at the place of declaration
+  // (i.e. where we declare that such a variable exists).
+  template <int dim>
+  class SolutionBase
+  {
+  protected:
+    static const unsigned int n_source_centers = 3;
+    static const Point<dim>   source_centers[n_source_centers];
+    static const double       width;
+  };
+
+
+  // The variables which denote the centers and the width of the exponentials
+  // have just been declared, now we still need to assign values to
+  // them. Here, we can show another small piece of template sorcery, namely
+  // how we can assign different values to these variables depending on the
+  // dimension. We will only use the 2d case in the program, but we show the
+  // 1d case for exposition of a useful technique.
+  //
+  // First we assign values to the centers for the 1d case, where we place the
+  // centers equidistantly at -1/3, 0, and 1/3. The <code>template
+  // <></code> header for this definition indicates an explicit
+  // specialization. This means, that the variable belongs to a template, but
+  // that instead of providing the compiler with a template from which it can
+  // specialize a concrete variable by substituting <code>dim</code> with some
+  // concrete value, we provide a specialization ourselves, in this case for
+  // <code>dim=1</code>. If the compiler then sees a reference to this
+  // variable in a place where the template argument equals one, it knows that
+  // it doesn't have to generate the variable from a template by substituting
+  // <code>dim</code>, but can immediately use the following definition:
+  template <>
+  const Point<1>
+  SolutionBase<1>::source_centers[SolutionBase<1>::n_source_centers]
+    = { Point<1>(-1.0 / 3.0),
+        Point<1>(0.0),
+        Point<1>(+1.0 / 3.0)
+      };
+
+  // Likewise, we can provide an explicit specialization for
+  // <code>dim=2</code>. We place the centers for the 2d case as follows:
+  template <>
+  const Point<2>
+  SolutionBase<2>::source_centers[SolutionBase<2>::n_source_centers]
+    = { Point<2>(-0.5, +0.5),
+        Point<2>(-0.5, -0.5),
+        Point<2>(+0.5, -0.5)
+      };
+
+  // There remains to assign a value to the half-width of the exponentials. We
+  // would like to use the same value for all dimensions. In this case, we
+  // simply provide the compiler with a template from which it can generate a
+  // concrete instantiation by substituting <code>dim</code> with a concrete
+  // value:
+  template <int dim>
+  const double SolutionBase<dim>::width = 1./8.;
+
+
+
+  // After declaring and defining the characteristics of solution and right
+  // hand side, we can declare the classes representing these two. They both
+  // represent continuous functions, so they are derived from the
+  // Function<dim> base class, and they also inherit the characteristics
+  // defined in the SolutionBase class.
+  //
+  // The actual classes are declared in the following. Note that in order to
+  // compute the error of the numerical solution against the continuous one in
+  // the L2 and H1 (semi-)norms, we have to provide value and gradient of the exact
+  // solution. This is more than we have done in previous examples, where all
+  // we provided was the value at one or a list of points. Fortunately, the
+  // Function class also has virtual functions for the gradient, so we can
+  // simply overload the respective virtual member functions in the Function
+  // base class. Note that the gradient of a function in <code>dim</code>
+  // space dimensions is a vector of size <code>dim</code>, i.e. a tensor of
+  // rank 1 and dimension <code>dim</code>. As for so many other things, the
+  // library provides a suitable class for this.
+  //
+  // Just as in previous examples, we are forced by the C++ language
+  // specification to declare a seemingly useless default constructor.
+  template <int dim>
+  class Solution : public Function<dim>,
+    protected SolutionBase<dim>
+  {
+  public:
+    Solution () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+  };
+
+
+  // The actual definition of the values and gradients of the exact solution
+  // class is according to their mathematical definition and does not need
+  // much explanation.
+  //
+  // The only thing that is worth mentioning is that if we access elements of
+  // a base class that is template dependent (in this case the elements of
+  // SolutionBase<dim>), then the C++ language forces us to write
+  // <code>this->n_source_centers</code> (for example). Note that the
+  // <code>this-></code> qualification is not necessary if the base class
+  // is not template dependent, and also that the gcc compilers prior to
+  // version 3.4 don't enforce this requirement of the C++ standard. The
+  // reason why this is necessary is complicated; some books on C++ may
+  // explain it, so if you are interested you can look it up under the phrase
+  // <code>two-stage (name) lookup</code>.
+  template <int dim>
+  double Solution<dim>::value (const Point<dim>   &p,
+                               const unsigned int) const
+  {
+    double return_value = 0;
+    for (unsigned int i=0; i<this->n_source_centers; ++i)
+      {
+        const Tensor<1,dim> x_minus_xi = p - this->source_centers[i];
+        return_value += std::exp(-x_minus_xi.norm_square() /
+                                 (this->width * this->width));
+      }
+
+    return return_value;
+  }
+
+
+  // Likewise, this is the computation of the gradient of the solution.  In
+  // order to accumulate the gradient from the contributions of the
+  // exponentials, we allocate an object <code>return_value</code> that
+  // denotes the mathematical quantity of a tensor of rank <code>1</code> and
+  // dimension <code>dim</code>. Its default constructor sets it to the vector
+  // containing only zeroes, so we need not explicitly care for its
+  // initialization.
+  //
+  // Note that we could as well have taken the type of the object to be
+  // Point<dim> instead of Tensor<1,dim>. Tensors of rank 1 and
+  // points are almost exchangeable, and have only very slightly different
+  // mathematical meanings. In fact, the Point<dim> class is derived
+  // from the Tensor<1,dim> class, which makes up for their mutual
+  // exchange ability. Their main difference is in what they logically mean:
+  // points are points in space, such as the location at which we want to
+  // evaluate a function (see the type of the first argument of this function
+  // for example). On the other hand, tensors of rank 1 share the same
+  // transformation properties, for example that they need to be rotated in a
+  // certain way when we change the coordinate system; however, they do not
+  // share the same connotation that points have and are only objects in a
+  // more abstract space than the one spanned by the coordinate
+  // directions. (In fact, gradients live in `reciprocal' space, since the
+  // dimension of their components is not that of a length, but of one over
+  // length).
+  template <int dim>
+  Tensor<1,dim> Solution<dim>::gradient (const Point<dim>   &p,
+                                         const unsigned int) const
+  {
+    Tensor<1,dim> return_value;
+
+    for (unsigned int i=0; i<this->n_source_centers; ++i)
+      {
+        const Tensor<1,dim> x_minus_xi = p - this->source_centers[i];
+
+        // For the gradient, note that its direction is along (x-x_i), so we
+        // add up multiples of this distance vector, where the factor is given
+        // by the exponentials.
+        return_value += (-2 / (this->width * this->width) *
+                         std::exp(-x_minus_xi.norm_square() /
+                                  (this->width * this->width)) *
+                         x_minus_xi);
+      }
+
+    return return_value;
+  }
+
+
+
+  // Besides the function that represents the exact solution, we also need a
+  // function which we can use as right hand side when assembling the linear
+  // system of discretized equations. This is accomplished using the following
+  // class and the following definition of its function. Note that here we
+  // only need the value of the function, not its gradients or higher
+  // derivatives.
+  template <int dim>
+  class RightHandSide : public Function<dim>,
+    protected SolutionBase<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+  };
+
+
+  // The value of the right hand side is given by the negative Laplacian of
+  // the solution plus the solution itself, since we wanted to solve
+  // Helmholtz's equation:
+  template <int dim>
+  double RightHandSide<dim>::value (const Point<dim>   &p,
+                                    const unsigned int) const
+  {
+    double return_value = 0;
+    for (unsigned int i=0; i<this->n_source_centers; ++i)
+      {
+        const Tensor<1,dim> x_minus_xi = p - this->source_centers[i];
+
+        // The first contribution is the Laplacian:
+        return_value += ((2*dim - 4*x_minus_xi.norm_square()/
+                          (this->width * this->width)) /
+                         (this->width * this->width) *
+                         std::exp(-x_minus_xi.norm_square() /
+                                  (this->width * this->width)));
+        // And the second is the solution itself:
+        return_value += std::exp(-x_minus_xi.norm_square() /
+                                 (this->width * this->width));
+      }
+
+    return return_value;
+  }
+
+
+  // @sect3{The Helmholtz solver class}
+
+  // Then we need the class that does all the work. Except for its name, its
+  // interface is mostly the same as in previous examples.
+  //
+  // One of the differences is that we will use this class in several modes:
+  // for different finite elements, as well as for adaptive and global
+  // refinement. The decision whether global or adaptive refinement shall be
+  // used is communicated to the constructor of this class through an
+  // enumeration type declared at the top of the class. The constructor then
+  // takes a finite element object and the refinement mode as arguments.
+  //
+  // The rest of the member functions are as before except for the
+  // <code>process_solution</code> function: After the solution has been
+  // computed, we perform some analysis on it, such as computing the error in
+  // various norms. To enable some output, it requires the number of the
+  // refinement cycle, and consequently gets it as an argument.
+  template <int dim>
+  class HelmholtzProblem
+  {
+  public:
+    enum RefinementMode
+    {
+      global_refinement, adaptive_refinement
+    };
+
+    HelmholtzProblem (const FiniteElement<dim> &fe,
+                      const RefinementMode      refinement_mode);
+
+    ~HelmholtzProblem ();
+
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    void solve ();
+    void refine_grid ();
+    void process_solution (const unsigned int cycle);
+
+    // Now for the data elements of this class. Among the variables that we
+    // have already used in previous examples, only the finite element object
+    // differs: The finite elements which the objects of this class operate on
+    // are passed to the constructor of this class. It has to store a pointer
+    // to the finite element for the member functions to use. Now, for the
+    // present class there is no big deal in that, but since we want to show
+    // techniques rather than solutions in these programs, we will here point
+    // out a problem that often occurs -- and of course the right solution as
+    // well.
+    //
+    // Consider the following situation that occurs in all the example
+    // programs: we have a triangulation object, and we have a finite element
+    // object, and we also have an object of type DoFHandler that uses both of
+    // the first two. These three objects all have a lifetime that is rather
+    // long compared to most other objects: they are basically set at the
+    // beginning of the program or an outer loop, and they are destroyed at
+    // the very end. The question is: can we guarantee that the two objects
+    // which the DoFHandler uses, live at least as long as they are in use?
+    // This means that the DoFHandler must have some kind of lock on the
+    // destruction of the other objects, and it can only release this lock
+    // once it has cleared all active references to these objects. We have
+    // seen what happens if we violate this order of destruction in the
+    // previous example program: an exception is thrown that terminates the
+    // program in order to notify the programmer of this potentially dangerous
+    // state where an object is pointed to that no longer persists.
+    //
+    // We will show here how the library managed to find out that there are
+    // still active references to an object. Basically, the method is along
+    // the following line: all objects that are subject to such potentially
+    // dangerous pointers are derived from a class called Subscriptor. For
+    // example, the Triangulation, DoFHandler, and a base class of the
+    // FiniteElement class are derived from Subscriptor. This latter class
+    // does not offer much functionality, but it has a built-in counter which
+    // we can subscribe to, thus the name of the class. Whenever we initialize
+    // a pointer to that object, we can increase its use counter, and when we
+    // move away our pointer or do not need it any more, we decrease the
+    // counter again. This way, we can always check how many objects still use
+    // that object.
+    //
+    // On the other hand, if an object of a class that is derived from the
+    // Subscriptor class is destroyed, it also has to call the destructor of
+    // the Subscriptor class. In this destructor, there will then be a check
+    // whether the counter is really zero. If yes, then there are no active
+    // references to this object any more, and we can safely destroy it. If
+    // the counter is non-zero, however, then the destruction would result in
+    // stale and thus potentially dangerous pointers, and we rather throw an
+    // exception to alert the programmer that this is doing something
+    // dangerous and the program better be fixed.
+    //
+    // While this certainly all sounds very well, it has some problems in
+    // terms of usability: what happens if I forget to increase the counter
+    // when I let a pointer point to such an object? And what happens if I
+    // forget to decrease it again? Note that this may lead to extremely
+    // difficult to find bugs, since the place where we have forgotten
+    // something may be far away from the place where the check for zeroness
+    // of the counter upon destruction actually fails. This kind of bug is
+    // rather annoying and usually very hard to fix.
+    //
+    // The solution to this problem is to again use some C++ trickery: we
+    // create a class that acts just like a pointer, i.e. can be dereferenced,
+    // can be assigned to and from other pointers, and so on. This can be done
+    // by overloading the several dereferencing operators of that
+    // class. Within the constructors, destructors, and assignment operators
+    // of that class, we can however also manage increasing or decreasing the
+    // use counters of the objects we point to. Objects of that class
+    // therefore can be used just like ordinary pointers to objects, but they
+    // also serve to change the use counters of those objects without the need
+    // for the programmer to do so herself. The class that actually does all
+    // this is called SmartPointer and takes as template parameter the data
+    // type of the object which it shall point to. The latter type may be any
+    // class, as long as it is derived from the Subscriptor class.
+    //
+    // In the present example program, we want to protect the finite element
+    // object from the situation that for some reason the finite element
+    // pointed to is destroyed while still in use. We therefore use a
+    // SmartPointer to the finite element object; since the finite element
+    // object is actually never changed in our computations, we pass a const
+    // FiniteElement<dim> as template argument to the SmartPointer
+    // class. Note that the pointer so declared is assigned at construction
+    // time of the solve object, and destroyed upon destruction, so the lock
+    // on the destruction of the finite element object extends throughout the
+    // lifetime of this HelmholtzProblem object.
+    Triangulation<dim>                      triangulation;
+    DoFHandler<dim>                         dof_handler;
+
+    SmartPointer<const FiniteElement<dim> > fe;
+
+    ConstraintMatrix                        hanging_node_constraints;
+
+    SparsityPattern                         sparsity_pattern;
+    SparseMatrix<double>                    system_matrix;
+
+    Vector<double>                          solution;
+    Vector<double>                          system_rhs;
+
+    // The second to last variable stores the refinement mode passed to the
+    // constructor. Since it is only set in the constructor, we can declare
+    // this variable constant, to avoid that someone sets it involuntarily
+    // (e.g. in an `if'-statement where == was written as = by chance).
+    const RefinementMode                    refinement_mode;
+
+    // For each refinement level some data (like the number of cells, or the
+    // L2 error of the numerical solution) will be generated and later
+    // printed. The TableHandler can be used to collect all this data and to
+    // output it at the end of the run as a table in a simple text or in LaTeX
+    // format. Here we don't only use the TableHandler but we use the derived
+    // class ConvergenceTable that additionally evaluates rates of
+    // convergence:
+    ConvergenceTable                        convergence_table;
+  };
+
+
+  // @sect3{The HelmholtzProblem class implementation}
+
+  // @sect4{HelmholtzProblem::HelmholtzProblem}
+
+  // In the constructor of this class, we only set the variables passed as
+  // arguments, and associate the DoF handler object with the triangulation
+  // (which is empty at present, however).
+  template <int dim>
+  HelmholtzProblem<dim>::HelmholtzProblem (const FiniteElement<dim> &fe,
+                                           const RefinementMode refinement_mode) :
+    dof_handler (triangulation),
+    fe (&fe),
+    refinement_mode (refinement_mode)
+  {}
+
+
+  // @sect4{HelmholtzProblem::~HelmholtzProblem}
+
+  // This is no different than before:
+  template <int dim>
+  HelmholtzProblem<dim>::~HelmholtzProblem ()
+  {
+    dof_handler.clear ();
+  }
+
+
+  // @sect4{HelmholtzProblem::setup_system}
+
+  // The following function sets up the degrees of freedom, sizes of matrices
+  // and vectors, etc. Most of its functionality has been showed in previous
+  // examples, the only difference being the renumbering step immediately
+  // after first distributing degrees of freedom.
+  //
+  // Renumbering the degrees of freedom is not overly difficult, as long as
+  // you use one of the algorithms included in the library. It requires only a
+  // single line of code. Some more information on this can be found in
+  // step-2.
+  //
+  // Note, however, that when you renumber the degrees of freedom, you must do
+  // so immediately after distributing them, since such things as hanging
+  // nodes, the sparsity pattern etc. depend on the absolute numbers which are
+  // altered by renumbering.
+  //
+  // The reason why we introduce renumbering here is that it is a relatively
+  // cheap operation but often has a beneficial effect: While the CG iteration
+  // itself is independent of the actual ordering of degrees of freedom, we
+  // will use SSOR as a preconditioner. SSOR goes through all degrees of
+  // freedom and does some operations that depend on what happened before; the
+  // SSOR operation is therefore not independent of the numbering of degrees
+  // of freedom, and it is known that its performance improves by using
+  // renumbering techniques. A little experiment shows that indeed, for
+  // example, the number of CG iterations for the fifth refinement cycle of
+  // adaptive refinement with the Q1 program used here is 40 without, but 36
+  // with renumbering. Similar savings can generally be observed for all the
+  // computations in this program.
+  template <int dim>
+  void HelmholtzProblem<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (*fe);
+    DoFRenumbering::Cuthill_McKee (dof_handler);
+
+    hanging_node_constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             hanging_node_constraints);
+    hanging_node_constraints.close ();
+
+    DynamicSparsityPattern dsp (dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    hanging_node_constraints.condense (dsp);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+  }
+
+
+  // @sect4{HelmholtzProblem::assemble_system}
+
+  // Assembling the system of equations for the problem at hand is mostly as
+  // for the example programs before. However, some things have changed
+  // anyway, so we comment on this function fairly extensively.
+  //
+  // At the top of the function you will find the usual assortment of variable
+  // declarations. Compared to previous programs, of importance is only that
+  // we expect to solve problems also with bi-quadratic elements and therefore
+  // have to use sufficiently accurate quadrature formula. In addition, we
+  // need to compute integrals over faces, i.e. <code>dim-1</code> dimensional
+  // objects. The declaration of a face quadrature formula is then
+  // straightforward:
+  template <int dim>
+  void HelmholtzProblem<dim>::assemble_system ()
+  {
+    QGauss<dim>   quadrature_formula(3);
+    QGauss<dim-1> face_quadrature_formula(3);
+
+    const unsigned int n_q_points    = quadrature_formula.size();
+    const unsigned int n_face_q_points = face_quadrature_formula.size();
+
+    const unsigned int dofs_per_cell = fe->dofs_per_cell;
+
+    FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       cell_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    // Then we need objects which can evaluate the values, gradients, etc of
+    // the shape functions at the quadrature points. While it seems that it
+    // should be feasible to do it with one object for both domain and face
+    // integrals, there is a subtle difference since the weights in the domain
+    // integrals include the measure of the cell in the domain, while the face
+    // integral quadrature requires the measure of the face in a
+    // lower-dimensional manifold. Internally these two classes are rooted in
+    // a common base class which does most of the work and offers the same
+    // interface to both domain and interface integrals.
+    //
+    // For the domain integrals in the bilinear form for Helmholtz's equation,
+    // we need to compute the values and gradients, as well as the weights at
+    // the quadrature points. Furthermore, we need the quadrature points on
+    // the real cell (rather than on the unit cell) to evaluate the right hand
+    // side function. The object we use to get at this information is the
+    // FEValues class discussed previously.
+    //
+    // For the face integrals, we only need the values of the shape functions,
+    // as well as the weights. We also need the normal vectors and quadrature
+    // points on the real cell since we want to determine the Neumann values
+    // from the exact solution object (see below). The class that gives us
+    // this information is called FEFaceValues:
+    FEValues<dim>  fe_values (*fe, quadrature_formula,
+                              update_values   | update_gradients |
+                              update_quadrature_points | update_JxW_values);
+
+    FEFaceValues<dim> fe_face_values (*fe, face_quadrature_formula,
+                                      update_values         | update_quadrature_points  |
+                                      update_normal_vectors | update_JxW_values);
+
+    // Then we need some objects already known from previous examples: An
+    // object denoting the right hand side function, its values at the
+    // quadrature points on a cell, the cell matrix and right hand side, and
+    // the indices of the degrees of freedom on a cell.
+    //
+    // Note that the operations we will do with the right hand side object are
+    // only querying data, never changing the object. We can therefore declare
+    // it <code>const</code>:
+    const RightHandSide<dim> right_hand_side;
+    std::vector<double>  rhs_values (n_q_points);
+
+    // Finally we define an object denoting the exact solution function. We
+    // will use it to compute the Neumann values at the boundary from
+    // it. Usually, one would of course do so using a separate object, in
+    // particular since the exact solution is generally unknown while the
+    // Neumann values are prescribed. We will, however, be a little bit lazy
+    // and use what we already have in information. Real-life programs would
+    // to go other ways here, of course.
+    const Solution<dim> exact_solution;
+
+    // Now for the main loop over all cells. This is mostly unchanged from
+    // previous examples, so we only comment on the things that have changed.
+    typename DoFHandler<dim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        cell_matrix = 0;
+        cell_rhs = 0;
+
+        fe_values.reinit (cell);
+
+        right_hand_side.value_list (fe_values.get_quadrature_points(),
+                                    rhs_values);
+
+        for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            {
+              for (unsigned int j=0; j<dofs_per_cell; ++j)
+                // The first thing that has changed is the bilinear form. It
+                // now contains the additional term from the Helmholtz
+                // equation:
+                cell_matrix(i,j) += ((fe_values.shape_grad(i,q_point) *
+                                      fe_values.shape_grad(j,q_point)
+                                      +
+                                      fe_values.shape_value(i,q_point) *
+                                      fe_values.shape_value(j,q_point)) *
+                                     fe_values.JxW(q_point));
+
+              cell_rhs(i) += (fe_values.shape_value(i,q_point) *
+                              rhs_values [q_point] *
+                              fe_values.JxW(q_point));
+            }
+
+        // Then there is that second term on the right hand side, the contour
+        // integral. First we have to find out whether the intersection of the
+        // faces of this cell with the boundary part Gamma2 is nonzero. To
+        // this end, we loop over all faces and check whether its boundary
+        // indicator equals <code>1</code>, which is the value that we have
+        // assigned to that portions of the boundary composing Gamma2 in the
+        // <code>run()</code> function further below. (The default value of
+        // boundary indicators is <code>0</code>, so faces can only have an
+        // indicator equal to <code>1</code> if we have explicitly set it.)
+        for (unsigned int face_number=0; face_number<GeometryInfo<dim>::faces_per_cell; ++face_number)
+          if (cell->face(face_number)->at_boundary()
+              &&
+              (cell->face(face_number)->boundary_id() == 1))
+            {
+              // If we came into here, then we have found an external face
+              // belonging to Gamma2. Next, we have to compute the values of
+              // the shape functions and the other quantities which we will
+              // need for the computation of the contour integral. This is
+              // done using the <code>reinit</code> function which we already
+              // know from the FEValue class:
+              fe_face_values.reinit (cell, face_number);
+
+              // And we can then perform the integration by using a loop over
+              // all quadrature points.
+              //
+              // On each quadrature point, we first compute the value of the
+              // normal derivative. We do so using the gradient of the exact
+              // solution and the normal vector to the face at the present
+              // quadrature point obtained from the
+              // <code>fe_face_values</code> object. This is then used to
+              // compute the additional contribution of this face to the right
+              // hand side:
+              for (unsigned int q_point=0; q_point<n_face_q_points; ++q_point)
+                {
+                  const double neumann_value
+                    = (exact_solution.gradient (fe_face_values.quadrature_point(q_point)) *
+                       fe_face_values.normal_vector(q_point));
+
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    cell_rhs(i) += (neumann_value *
+                                    fe_face_values.shape_value(i,q_point) *
+                                    fe_face_values.JxW(q_point));
+                }
+            }
+
+        // Now that we have the contributions of the present cell, we can
+        // transfer it to the global matrix and right hand side vector, as in
+        // the examples before:
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              system_matrix.add (local_dof_indices[i],
+                                 local_dof_indices[j],
+                                 cell_matrix(i,j));
+
+            system_rhs(local_dof_indices[i]) += cell_rhs(i);
+          }
+      }
+
+    // Likewise, elimination and treatment of boundary values has been shown
+    // previously.
+    //
+    // We note, however that now the boundary indicator for which we
+    // interpolate boundary values (denoted by the second parameter to
+    // <code>interpolate_boundary_values</code>) does not represent the whole
+    // boundary any more. Rather, it is that portion of the boundary which we
+    // have not assigned another indicator (see below). The degrees of freedom
+    // at the boundary that do not belong to Gamma1 are therefore excluded
+    // from the interpolation of boundary values, just as we want.
+    hanging_node_constraints.condense (system_matrix);
+    hanging_node_constraints.condense (system_rhs);
+
+    std::map<types::global_dof_index,double> boundary_values;
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              Solution<dim>(),
+                                              boundary_values);
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix,
+                                        solution,
+                                        system_rhs);
+  }
+
+
+  // @sect4{HelmholtzProblem::solve}
+
+  // Solving the system of equations is done in the same way as before:
+  template <int dim>
+  void HelmholtzProblem<dim>::solve ()
+  {
+    SolverControl           solver_control (1000, 1e-12);
+    SolverCG<>              cg (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    cg.solve (system_matrix, solution, system_rhs,
+              preconditioner);
+
+    hanging_node_constraints.distribute (solution);
+  }
+
+
+  // @sect4{HelmholtzProblem::refine_grid}
+
+  // Now for the function doing grid refinement. Depending on the refinement
+  // mode passed to the constructor, we do global or adaptive refinement.
+  //
+  // Global refinement is simple, so there is not much to comment on.  In case
+  // of adaptive refinement, we use the same functions and classes as in the
+  // previous example program. Note that one could treat Neumann boundaries
+  // differently than Dirichlet boundaries, and one should in fact do so here
+  // since we have Neumann boundary conditions on part of the boundaries, but
+  // since we don't have a function here that describes the Neumann values (we
+  // only construct these values from the exact solution when assembling the
+  // matrix), we omit this detail even though doing this in a strictly correct
+  // way would not be hard to add.
+  //
+  // At the end of the switch, we have a default case that looks slightly
+  // strange: an <code>Assert</code> statement with a <code>false</code>
+  // condition. Since the <code>Assert</code> macro raises an error whenever
+  // the condition is false, this means that whenever we hit this statement
+  // the program will be aborted. This in intentional: Right now we have only
+  // implemented two refinement strategies (global and adaptive), but someone
+  // might want to add a third strategy (for example adaptivity with a
+  // different refinement criterion) and add a third member to the enumeration
+  // that determines the refinement mode. If it weren't for the default case
+  // of the switch statement, this function would simply run to its end
+  // without doing anything. This is most likely not what was intended. One of
+  // the defensive programming techniques that you will find all over the
+  // deal.II library is therefore to always have default cases that abort, to
+  // make sure that values not considered when listing the cases in the switch
+  // statement are eventually caught, and forcing programmers to add code to
+  // handle them. We will use this same technique in other places further down
+  // as well.
+  template <int dim>
+  void HelmholtzProblem<dim>::refine_grid ()
+  {
+    switch (refinement_mode)
+      {
+      case global_refinement:
+      {
+        triangulation.refine_global (1);
+        break;
+      }
+
+      case adaptive_refinement:
+      {
+        Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+        KellyErrorEstimator<dim>::estimate (dof_handler,
+                                            QGauss<dim-1>(3),
+                                            typename FunctionMap<dim>::type(),
+                                            solution,
+                                            estimated_error_per_cell);
+
+        GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                         estimated_error_per_cell,
+                                                         0.3, 0.03);
+
+        triangulation.execute_coarsening_and_refinement ();
+
+        break;
+      }
+
+      default:
+      {
+        Assert (false, ExcNotImplemented());
+      }
+      }
+  }
+
+
+  // @sect4{HelmholtzProblem::process_solution}
+
+  // Finally we want to process the solution after it has been computed. For
+  // this, we integrate the error in various (semi-)norms, and we generate tables
+  // that will later be used to display the convergence against the continuous
+  // solution in a nice format.
+  template <int dim>
+  void HelmholtzProblem<dim>::process_solution (const unsigned int cycle)
+  {
+    // Our first task is to compute error norms. In order to integrate the
+    // difference between computed numerical solution and the continuous
+    // solution (described by the Solution class defined at the top of this
+    // file), we first need a vector that will hold the norm of the error on
+    // each cell. Since accuracy with 16 digits is not so important for these
+    // quantities, we save some memory by using <code>float</code> instead of
+    // <code>double</code> values.
+    //
+    // The next step is to use a function from the library which computes the
+    // error in the L2 norm on each cell.  We have to pass it the DoF handler
+    // object, the vector holding the nodal values of the numerical solution,
+    // the continuous solution as a function object, the vector into which it
+    // shall place the norm of the error on each cell, a quadrature rule by
+    // which this norm shall be computed, and the type of norm to be
+    // used. Here, we use a Gauss formula with three points in each space
+    // direction, and compute the L2 norm.
+    //
+    // Finally, we want to get the global L2 norm. This can of course be
+    // obtained by summing the squares of the norms on each cell, and taking
+    // the square root of that value. This is equivalent to taking the l2
+    // (lower case <code>l</code>) norm of the vector of norms on each cell:
+    Vector<float> difference_per_cell (triangulation.n_active_cells());
+    VectorTools::integrate_difference (dof_handler,
+                                       solution,
+                                       Solution<dim>(),
+                                       difference_per_cell,
+                                       QGauss<dim>(3),
+                                       VectorTools::L2_norm);
+    const double L2_error = difference_per_cell.l2_norm();
+
+    // By same procedure we get the H1 semi-norm. We re-use the
+    // <code>difference_per_cell</code> vector since it is no longer used
+    // after computing the <code>L2_error</code> variable above. The global
+    // $H^1$ semi-norm error is then computed by taking the sum of squares
+    // of the errors on each individual cell, and then the square root of
+    // it -- an operation that conveniently again coincides with taking
+    // the $l_2$ norm of the vector of error indicators.
+    VectorTools::integrate_difference (dof_handler,
+                                       solution,
+                                       Solution<dim>(),
+                                       difference_per_cell,
+                                       QGauss<dim>(3),
+                                       VectorTools::H1_seminorm);
+    const double H1_error = difference_per_cell.l2_norm();
+
+    // Finally, we compute the maximum norm. Of course, we can't actually
+    // compute the true maximum, but only the maximum at the quadrature
+    // points. Since this depends quite sensitively on the quadrature rule
+    // being used, and since we would like to avoid false results due to
+    // super-convergence effects at some points, we use a special quadrature
+    // rule that is obtained by iterating the trapezoidal rule five times in
+    // each space direction. Note that the constructor of the QIterated class
+    // takes a one-dimensional quadrature rule and a number that tells it how
+    // often it shall use this rule in each space direction.
+    //
+    // Using this special quadrature rule, we can then try to find the maximal
+    // error on each cell. Finally, we compute the global L infinity error
+    // from the L infinite errors on each cell. Instead of summing squares, we
+    // now have to take the maximum value over all cell-wise entries, an
+    // operation that is conveniently done using the Vector::linfty()
+    // function:
+    const QTrapez<1>     q_trapez;
+    const QIterated<dim> q_iterated (q_trapez, 5);
+    VectorTools::integrate_difference (dof_handler,
+                                       solution,
+                                       Solution<dim>(),
+                                       difference_per_cell,
+                                       q_iterated,
+                                       VectorTools::Linfty_norm);
+    const double Linfty_error = difference_per_cell.linfty_norm();
+
+    // After all these errors have been computed, we finally write some
+    // output. In addition, we add the important data to the TableHandler by
+    // specifying the key of the column and the value.  Note that it is not
+    // necessary to define column keys beforehand -- it is sufficient to just
+    // add values, and columns will be introduced into the table in the order
+    // values are added the first time.
+    const unsigned int n_active_cells=triangulation.n_active_cells();
+    const unsigned int n_dofs=dof_handler.n_dofs();
+
+    std::cout << "Cycle " << cycle << ':'
+              << std::endl
+              << "   Number of active cells:       "
+              << n_active_cells
+              << std::endl
+              << "   Number of degrees of freedom: "
+              << n_dofs
+              << std::endl;
+
+    convergence_table.add_value("cycle", cycle);
+    convergence_table.add_value("cells", n_active_cells);
+    convergence_table.add_value("dofs", n_dofs);
+    convergence_table.add_value("L2", L2_error);
+    convergence_table.add_value("H1", H1_error);
+    convergence_table.add_value("Linfty", Linfty_error);
+  }
+
+
+  // @sect4{HelmholtzProblem::run}
+
+  // As in previous example programs, the <code>run</code> function controls
+  // the flow of execution. The basic layout is as in previous examples: an
+  // outer loop over successively refined grids, and in this loop first
+  // problem setup, assembling the linear system, solution, and
+  // post-processing.
+  //
+  // The first task in the main loop is creation and refinement of grids. This
+  // is as in previous examples, with the only difference that we want to have
+  // part of the boundary marked as Neumann type, rather than Dirichlet.
+  //
+  // For this, we will use the following convention: Faces belonging to Gamma1
+  // will have the boundary indicator <code>0</code> (which is the default, so
+  // we don't have to set it explicitly), and faces belonging to Gamma2 will
+  // use <code>1</code> as boundary indicator.  To set these values, we loop
+  // over all cells, then over all faces of a given cell, check whether it is
+  // part of the boundary that we want to denote by Gamma2, and if so set its
+  // boundary indicator to <code>1</code>. For the present program, we
+  // consider the left and bottom boundaries as Gamma2. We determine whether a
+  // face is part of that boundary by asking whether the x or y coordinates
+  // (i.e. vector components 0 and 1) of the midpoint of a face equals -1, up
+  // to some small wiggle room that we have to give since it is instable to
+  // compare floating point numbers that are subject to round off in
+  // intermediate computations.
+  //
+  // It is worth noting that we have to loop over all cells here, not only the
+  // active ones. The reason is that upon refinement, newly created faces
+  // inherit the boundary indicator of their parent face. If we now only set
+  // the boundary indicator for active faces, coarsen some cells and refine
+  // them later on, they will again have the boundary indicator of the parent
+  // cell which we have not modified, instead of the one we
+  // intended. Consequently, we have to change the boundary indicators of
+  // faces of all cells on Gamma2, whether they are active or not.
+  // Alternatively, we could of course have done this job on the coarsest mesh
+  // (i.e. before the first refinement step) and refined the mesh only after
+  // that.
+  template <int dim>
+  void HelmholtzProblem<dim>::run ()
+  {
+    const unsigned int n_cycles = (refinement_mode==global_refinement)?5:9;
+    for (unsigned int cycle=0; cycle<n_cycles; ++cycle)
+      {
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_cube (triangulation, -1, 1);
+            triangulation.refine_global (3);
+
+            typename Triangulation<dim>::cell_iterator
+            cell = triangulation.begin (),
+            endc = triangulation.end();
+            for (; cell!=endc; ++cell)
+              for (unsigned int face_number=0;
+                   face_number<GeometryInfo<dim>::faces_per_cell;
+                   ++face_number)
+                if ((std::fabs(cell->face(face_number)->center()(0) - (-1)) < 1e-12)
+                    ||
+                    (std::fabs(cell->face(face_number)->center()(1) - (-1)) < 1e-12))
+                  cell->face(face_number)->set_boundary_id (1);
+          }
+        else
+          refine_grid ();
+
+
+        // The next steps are already known from previous examples. This is
+        // mostly the basic set-up of every finite element program:
+        setup_system ();
+
+        assemble_system ();
+        solve ();
+
+        // The last step in this chain of function calls is usually the
+        // evaluation of the computed solution for the quantities one is
+        // interested in. This is done in the following function. Since the
+        // function generates output that indicates the number of the present
+        // refinement step, we pass this number as an argument.
+        process_solution (cycle);
+      }
+
+    // @sect5{Output of graphical data}
+
+    // After the last iteration we output the solution on the finest
+    // grid. This is done using the following sequence of statements which we
+    // have already discussed in previous examples. The first step is to
+    // generate a suitable filename (called <code>vtk_filename</code> here,
+    // since we want to output data in VTK format; we add the prefix to
+    // distinguish the filename from that used for other output files further
+    // down below). Here, we augment the name by the mesh refinement
+    // algorithm, and as above we make sure that we abort the program if
+    // another refinement method is added and not handled by the following
+    // switch statement:
+    std::string vtk_filename;
+    switch (refinement_mode)
+      {
+      case global_refinement:
+        vtk_filename = "solution-global";
+        break;
+      case adaptive_refinement:
+        vtk_filename = "solution-adaptive";
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // We augment the filename by a postfix denoting the finite element which
+    // we have used in the computation. To this end, the finite element base
+    // class stores the maximal polynomial degree of shape functions in each
+    // coordinate variable as a variable <code>degree</code>, and we use for
+    // the switch statement (note that the polynomial degree of bilinear shape
+    // functions is really 2, since they contain the term <code>x*y</code>;
+    // however, the polynomial degree in each coordinate variable is still
+    // only 1). We again use the same defensive programming technique to
+    // safeguard against the case that the polynomial degree has an unexpected
+    // value, using the <code>Assert (false, ExcNotImplemented())</code> idiom
+    // in the default branch of the switch statement:
+    switch (fe->degree)
+      {
+      case 1:
+        vtk_filename += "-q1";
+        break;
+      case 2:
+        vtk_filename += "-q2";
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // Once we have the base name for the output file, we add an extension
+    // appropriate for VTK output, open a file, and add the solution vector to
+    // the object that will do the actual output:
+    vtk_filename += ".vtk";
+    std::ofstream output (vtk_filename.c_str());
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "solution");
+
+    // Now building the intermediate format as before is the next step. We
+    // introduce one more feature of deal.II here. The background is the
+    // following: in some of the runs of this function, we have used
+    // biquadratic finite elements. However, since almost all output formats
+    // only support bilinear data, the data is written only bilinear, and
+    // information is consequently lost.  Of course, we can't change the
+    // format in which graphic programs accept their inputs, but we can write
+    // the data differently such that we more closely resemble the information
+    // available in the quadratic approximation. We can, for example, write
+    // each cell as four sub-cells with bilinear data each, such that we have
+    // nine data points for each cell in the triangulation. The graphic
+    // programs will, of course, display this data still only bilinear, but at
+    // least we have given some more of the information we have.
+    //
+    // In order to allow writing more than one sub-cell per actual cell, the
+    // <code>build_patches</code> function accepts a parameter (the default is
+    // <code>1</code>, which is why you haven't seen this parameter in
+    // previous examples). This parameter denotes into how many sub-cells per
+    // space direction each cell shall be subdivided for output. For example,
+    // if you give <code>2</code>, this leads to 4 cells in 2D and 8 cells in
+    // 3D. For quadratic elements, two sub-cells per space direction is
+    // obviously the right choice, so this is what we choose. In general, for
+    // elements of polynomial order <code>q</code>, we use <code>q</code>
+    // subdivisions, and the order of the elements is determined in the same
+    // way as above.
+    //
+    // With the intermediate format so generated, we can then actually write
+    // the graphical output:
+    data_out.build_patches (fe->degree);
+    data_out.write_vtk (output);
+
+    // @sect5{Output of convergence tables}
+
+    // After graphical output, we would also like to generate tables from the
+    // error computations we have done in
+    // <code>process_solution</code>. There, we have filled a table object
+    // with the number of cells for each refinement step as well as the errors
+    // in different norms.
+
+    // For a nicer textual output of this data, one may want to set the
+    // precision with which the values will be written upon output. We use 3
+    // digits for this, which is usually sufficient for error norms. By
+    // default, data is written in fixed point notation. However, for columns
+    // one would like to see in scientific notation another function call sets
+    // the <code>scientific_flag</code> to <code>true</code>, leading to
+    // floating point representation of numbers.
+    convergence_table.set_precision("L2", 3);
+    convergence_table.set_precision("H1", 3);
+    convergence_table.set_precision("Linfty", 3);
+
+    convergence_table.set_scientific("L2", true);
+    convergence_table.set_scientific("H1", true);
+    convergence_table.set_scientific("Linfty", true);
+
+    // For the output of a table into a LaTeX file, the default captions of
+    // the columns are the keys given as argument to the
+    // <code>add_value</code> functions. To have TeX captions that differ from
+    // the default ones you can specify them by the following function calls.
+    // Note, that `\\' is reduced to `\' by the compiler such that the real
+    // TeX caption is, e.g., `$L^\infty$-error'.
+    convergence_table.set_tex_caption("cells", "\\# cells");
+    convergence_table.set_tex_caption("dofs", "\\# dofs");
+    convergence_table.set_tex_caption("L2", "$L^2$-error");
+    convergence_table.set_tex_caption("H1", "$H^1$-error");
+    convergence_table.set_tex_caption("Linfty", "$L^\\infty$-error");
+
+    // Finally, the default LaTeX format for each column of the table is `c'
+    // (centered). To specify a different (e.g. `right') one, the following
+    // function may be used:
+    convergence_table.set_tex_format("cells", "r");
+    convergence_table.set_tex_format("dofs", "r");
+
+    // After this, we can finally write the table to the standard output
+    // stream <code>std::cout</code> (after one extra empty line, to make
+    // things look prettier). Note, that the output in text format is quite
+    // simple and that captions may not be printed directly above the specific
+    // columns.
+    std::cout << std::endl;
+    convergence_table.write_text(std::cout);
+
+    // The table can also be written into a LaTeX file.  The (nicely)
+    // formatted table can be viewed at after calling `latex filename' and
+    // e.g. `xdvi filename', where filename is the name of the file to which
+    // we will write output now. We construct the file name in the same way as
+    // before, but with a different prefix "error":
+    std::string error_filename = "error";
+    switch (refinement_mode)
+      {
+      case global_refinement:
+        error_filename += "-global";
+        break;
+      case adaptive_refinement:
+        error_filename += "-adaptive";
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    switch (fe->degree)
+      {
+      case 1:
+        error_filename += "-q1";
+        break;
+      case 2:
+        error_filename += "-q2";
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    error_filename += ".tex";
+    std::ofstream error_table_file(error_filename.c_str());
+
+    convergence_table.write_tex(error_table_file);
+
+
+    // @sect5{Further table manipulations}
+
+    // In case of global refinement, it might be of interest to also output
+    // the convergence rates. This may be done by the functionality the
+    // ConvergenceTable offers over the regular TableHandler. However, we do
+    // it only for global refinement, since for adaptive refinement the
+    // determination of something like an order of convergence is somewhat
+    // more involved. While we are at it, we also show a few other things that
+    // can be done with tables.
+    if (refinement_mode==global_refinement)
+      {
+        // The first thing is that one can group individual columns together
+        // to form so-called super columns. Essentially, the columns remain
+        // the same, but the ones that were grouped together will get a
+        // caption running across all columns in a group. For example, let's
+        // merge the "cycle" and "cells" columns into a super column named "n
+        // cells":
+        convergence_table.add_column_to_supercolumn("cycle", "n cells");
+        convergence_table.add_column_to_supercolumn("cells", "n cells");
+
+        // Next, it isn't necessary to always output all columns, or in the
+        // order in which they were originally added during the run.
+        // Selecting and re-ordering the columns works as follows (note that
+        // this includes super columns):
+        std::vector<std::string> new_order;
+        new_order.push_back("n cells");
+        new_order.push_back("H1");
+        new_order.push_back("L2");
+        convergence_table.set_column_order (new_order);
+
+        // For everything that happened to the ConvergenceTable until this
+        // point, it would have been sufficient to use a simple
+        // TableHandler. Indeed, the ConvergenceTable is derived from the
+        // TableHandler but it offers the additional functionality of
+        // automatically evaluating convergence rates. For example, here is
+        // how we can let the table compute reduction and convergence rates
+        // (convergence rates are the binary logarithm of the reduction rate):
+        convergence_table
+        .evaluate_convergence_rates("L2", ConvergenceTable::reduction_rate);
+        convergence_table
+        .evaluate_convergence_rates("L2", ConvergenceTable::reduction_rate_log2);
+        convergence_table
+        .evaluate_convergence_rates("H1", ConvergenceTable::reduction_rate);
+        convergence_table
+        .evaluate_convergence_rates("H1", ConvergenceTable::reduction_rate_log2);
+        // Each of these function calls produces an additional column that is
+        // merged with the original column (in our example the `L2' and the
+        // `H1' column) to a supercolumn.
+
+        // Finally, we want to write this convergence chart again, first to
+        // the screen and then, in LaTeX format, to disk. The filename is
+        // again constructed as above.
+        std::cout << std::endl;
+        convergence_table.write_text(std::cout);
+
+        std::string conv_filename = "convergence";
+        switch (refinement_mode)
+          {
+          case global_refinement:
+            conv_filename += "-global";
+            break;
+          case adaptive_refinement:
+            conv_filename += "-adaptive";
+            break;
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+        switch (fe->degree)
+          {
+          case 1:
+            conv_filename += "-q1";
+            break;
+          case 2:
+            conv_filename += "-q2";
+            break;
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+        conv_filename += ".tex";
+
+        std::ofstream table_file(conv_filename.c_str());
+        convergence_table.write_tex(table_file);
+      }
+  }
+
+  // The final step before going to <code>main()</code> is then to close the
+  // namespace <code>Step7</code> into which we have put everything we needed
+  // for this program:
+}
+
+// @sect3{Main function}
+
+// The main function is mostly as before. The only difference is that we solve
+// three times, once for Q1 and adaptive refinement, once for Q1 elements and
+// global refinement, and once for Q2 elements and global refinement.
+//
+// Since we instantiate several template classes below for two space
+// dimensions, we make this more generic by declaring a constant at the
+// beginning of the function denoting the number of space dimensions. If you
+// want to run the program in 1d or 2d, you will then only have to change this
+// one instance, rather than all uses below:
+int main ()
+{
+  const unsigned int dim = 2;
+
+  try
+    {
+      using namespace dealii;
+      using namespace Step7;
+
+      // Now for the three calls to the main class. Each call is blocked into
+      // curly braces in order to destroy the respective objects (i.e. the
+      // finite element and the HelmholtzProblem object) at the end of the
+      // block and before we go to the next run. This avoids conflicts with
+      // variable names, and also makes sure that memory is released
+      // immediately after one of the three runs has finished, and not only at
+      // the end of the <code>try</code> block.
+      {
+        std::cout << "Solving with Q1 elements, adaptive refinement" << std::endl
+                  << "=============================================" << std::endl
+                  << std::endl;
+
+        FE_Q<dim> fe(1);
+        HelmholtzProblem<dim>
+        helmholtz_problem_2d (fe, HelmholtzProblem<dim>::adaptive_refinement);
+
+        helmholtz_problem_2d.run ();
+
+        std::cout << std::endl;
+      }
+
+      {
+        std::cout << "Solving with Q1 elements, global refinement" << std::endl
+                  << "===========================================" << std::endl
+                  << std::endl;
+
+        FE_Q<dim> fe(1);
+        HelmholtzProblem<dim>
+        helmholtz_problem_2d (fe, HelmholtzProblem<dim>::global_refinement);
+
+        helmholtz_problem_2d.run ();
+
+        std::cout << std::endl;
+      }
+
+      {
+        std::cout << "Solving with Q2 elements, global refinement" << std::endl
+                  << "===========================================" << std::endl
+                  << std::endl;
+
+        FE_Q<dim> fe(2);
+        HelmholtzProblem<dim>
+        helmholtz_problem_2d (fe, HelmholtzProblem<dim>::global_refinement);
+
+        helmholtz_problem_2d.run ();
+
+        std::cout << std::endl;
+      }
+      {
+        std::cout << "Solving with Q2 elements, adaptive refinement" << std::endl
+                  << "===========================================" << std::endl
+                  << std::endl;
+
+        FE_Q<dim> fe(2);
+        HelmholtzProblem<dim>
+        helmholtz_problem_2d (fe, HelmholtzProblem<dim>::adaptive_refinement);
+
+        helmholtz_problem_2d.run ();
+
+        std::cout << std::endl;
+      }
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
+
+
+// What comes here is basically just an annoyance that you can ignore if you
+// are not working on an AIX system: on this system, static member variables
+// are not instantiated automatically when their enclosing class is
+// instantiated. This leads to linker errors if these variables are not
+// explicitly instantiated. As said, this is, strictly C++ standards speaking,
+// not necessary, but it doesn't hurt either on other systems, and since it is
+// necessary to get things running on AIX, why not do it:
+namespace Step7
+{
+  template const double SolutionBase<2>::width;
+}
diff --git a/examples/step-8/CMakeLists.txt b/examples/step-8/CMakeLists.txt
new file mode 100644
index 0000000..385d136
--- /dev/null
+++ b/examples/step-8/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-8 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-8")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-8/doc/builds-on b/examples/step-8/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-8/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-8/doc/intro.dox b/examples/step-8/doc/intro.dox
new file mode 100644
index 0000000..ab71b96
--- /dev/null
+++ b/examples/step-8/doc/intro.dox
@@ -0,0 +1,355 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+In real life, most partial differential equations are really systems
+of equations. Accordingly, the solutions are usually
+vector-valued. The deal.II library supports such problems (see the
+extensive documentation in the @ref vector_valued module), and we will show
+that that is mostly rather simple. The only more complicated problems
+are in assembling matrix and right hand side, but these are easily
+understood as well.
+
+ at dealiiVideoLecture{19}
+
+In this tutorial program we will want to solve the elastic equations. They are
+an extension to Laplace's equation with a vector-valued solution that
+describes the displacement in each space direction of a rigid body
+which is subject to a force. Of course, the force is also
+vector-valued, meaning that in each point it has a direction and an
+absolute value. The elastic equations are the following:
+ at f[
+  -
+  \partial_j (c_{ijkl} \partial_k u_l)
+  =
+  f_i,
+  \qquad
+  i=1\ldots d,
+ at f]
+where the values $c_{ijkl}$ are the stiffness coefficients and
+will usually depend on the space coordinates. In
+many cases, one knows that the material under consideration is
+isotropic, in which case by introduction of the two coefficients
+$\lambda$ and $\mu$ the coefficient tensor reduces to
+ at f[
+  c_{ijkl}
+  =
+  \lambda \delta_{ij} \delta_{kl} +
+  \mu (\delta_{ik} \delta_{jl} + \delta_{il} \delta_{jk}).
+ at f]
+
+The elastic equations can then be rewritten in much simpler a form:
+ at f[
+   -
+   \nabla \lambda (\nabla\cdot {\mathbf u})
+   -
+   (\nabla \cdot \mu \nabla) {\mathbf u}
+   -
+   \nabla\cdot \mu (\nabla {\mathbf u})^T
+   =
+   {\mathbf f},
+ at f]
+and the respective bilinear form is then
+ at f[
+  a({\mathbf u}, {\mathbf v}) =
+  \left(
+    \lambda \nabla\cdot {\mathbf u}, \nabla\cdot {\mathbf v}
+  \right)_\Omega
+  +
+  \sum_{k,l}
+  \left(
+    \mu \partial_k u_l, \partial_k v_l
+  \right)_\Omega
+  +
+  \sum_{k,l}
+  \left(
+    \mu \partial_k u_l, \partial_l v_k
+  \right)_\Omega,
+ at f]
+or also writing the first term a sum over components:
+ at f[
+  a({\mathbf u}, {\mathbf v}) =
+  \sum_{k,l}
+  \left(
+    \lambda \partial_l u_l, \partial_k v_k
+  \right)_\Omega
+  +
+  \sum_{k,l}
+  \left(
+    \mu \partial_k u_l, \partial_k v_l
+  \right)_\Omega
+  +
+  \sum_{k,l}
+  \left(
+    \mu \partial_k u_l, \partial_l v_k
+  \right)_\Omega.
+ at f]
+
+ at note As written, the equations above are generally considered to be the right
+description for the displacement of three-dimensional objects if the
+displacement is small and we can assume that <a
+href="http://en.wikipedia.org/wiki/Hookes_law">Hooke's law</a> is valid. In
+that case, the indices $i,j,k,l$ above all run over the set $\{1,2,3\}$ (or,
+in the C++ source, over $\{0,1,2\}$). However, as is, the program runs in 2d,
+and while the equations above also make mathematical sense in that case, they
+would only describe a truly two-dimensional solid. In particular, they are not
+the appropriate description of an $x-y$ cross-section of a body infinite in
+the $z$ direction; this is in contrast to many other two-dimensional equations
+that can be obtained by assuming that the body has infinite extent in
+$z$-direction and that the solution function does not depend on the $z$
+coordinate. On the other hand, there are equations for two-dimensional models
+of elasticity; see for example the Wikipedia article on <a
+href="http://en.wikipedia.org/wiki/Infinitesimal_strain_theory#Special_cases">plane
+strain</a>, <a
+href="http://en.wikipedia.org/wiki/Antiplane_shear">antiplane shear</a> and <a
+href="http://en.wikipedia.org/wiki/Plane_stress#Plane_stress">plan stress</a>.
+
+But let's get back to the original problem.
+How do we assemble the matrix for such an equation? A very long answer
+with a number of different alternatives is given in the documentation of the
+ at ref vector_valued module. Historically, the solution shown below was the only
+one available in the early years of the library. It turns out to also be the
+fastest. On the other hand, if a few per cent of compute time do not matter,
+there are simpler and probably more intuitive ways to assemble the linear
+system than the one discussed below but that weren't available until several
+years after this tutorial program was first written; if you are interested in
+them, take a look at the @ref vector_valued module.
+
+Let us go back to the question of how to assemble the linear system. The first
+thing we need is some knowledge about how the shape functions work in the case
+of vector-valued finite elements. Basically, this comes down to the following:
+let $n$ be the number of shape functions for the scalar finite element of
+which we build the vector element (for example, we will use bilinear functions
+for each component of the vector-valued finite element, so the scalar finite
+element is the <code>FE_Q(1)</code> element which we have used in previous
+examples already, and $n=4$ in two space dimensions). Further, let $N$ be the
+number of shape functions for the vector element; in two space dimensions, we
+need $n$ shape functions for each component of the vector, so $N=2n$. Then,
+the $i$th shape function of the vector element has the form
+ at f[
+  \Phi_i({\mathbf x}) = \varphi_{\text{base}(i)}({\mathbf x})\ {\mathbf e}_{\text{comp}(i)},
+ at f]
+where $e_l$ is the $l$th unit vector, $\text{comp}(i)$ is the function that tells
+us which component of $\Phi_i$ is the one that is nonzero (for
+each vector shape function, only one component is nonzero, and all others are
+zero). $\varphi_{\text{base}(i)}(x)$ describes the space dependence of the shape
+function, which is taken to be the $\text{base}(i)$-th shape function of the scalar
+element. Of course, while $i$ is in the range $0,\ldots,N-1$, the functions
+$\text{comp}(i)$ and $\text{base}(i)$ have the ranges $0,1$ (in 2D) and $0,\ldots,n-1$,
+respectively.
+
+For example (though this sequence of shape functions is not
+guaranteed, and you should not rely on it),
+the following layout could be used by the library:
+ at f{eqnarray*}
+  \Phi_0({\mathbf x}) &=&
+  \left(\begin{array}{c}
+    \varphi_0({\mathbf x}) \\ 0
+  \end{array}\right),
+  \\
+  \Phi_1({\mathbf x}) &=&
+  \left(\begin{array}{c}
+    0 \\ \varphi_0({\mathbf x})
+  \end{array}\right),
+  \\
+  \Phi_2({\mathbf x}) &=&
+  \left(\begin{array}{c}
+    \varphi_1({\mathbf x}) \\ 0
+  \end{array}\right),
+  \\
+  \Phi_3({\mathbf x}) &=&
+  \left(\begin{array}{c}
+    0 \\ \varphi_1({\mathbf x})
+  \end{array}\right),
+  \ldots
+ at f}
+where here
+ at f[
+  \text{comp}(0)=0, \quad  \text{comp}(1)=1, \quad  \text{comp}(2)=0, \quad  \text{comp}(3)=1, \quad  \ldots
+ at f]
+ at f[
+  \text{base}(0)=0, \quad  \text{base}(1)=0, \quad  \text{base}(2)=1, \quad  \text{base}(3)=1, \quad  \ldots
+ at f]
+
+In all but very rare cases, you will not need to know which shape function
+$\varphi_{\text{base}(i)}$ of the scalar element belongs to a shape function $\Phi_i$
+of the vector element. Let us therefore define
+ at f[
+  \phi_i = \varphi_{\text{base}(i)}
+ at f]
+by which we can write the vector shape function as
+ at f[
+  \Phi_i({\mathbf x}) = \phi_{i}({\mathbf x})\ {\mathbf e}_{\text{comp}(i)}.
+ at f]
+You can now safely forget about the function $\text{base}(i)$, at least for the rest
+of this example program.
+
+Now using this vector shape functions, we can write the discrete finite
+element solution as
+ at f[
+  {\mathbf u}_h({\mathbf x}) =
+  \sum_i \Phi_i({\mathbf x})\ U_i
+ at f]
+with scalar coefficients $U_i$. If we define an analog function ${\mathbf v}_h$ as
+test function, we can write the discrete problem as follows: Find coefficients
+$U_i$ such that
+ at f[
+  a({\mathbf u}_h, {\mathbf v}_h) = ({\mathbf f}, {\mathbf v}_h)
+  \qquad
+  \forall {\mathbf v}_h.
+ at f]
+
+If we insert the definition of the bilinear form and the representation of
+${\mathbf u}_h$ and ${\mathbf v}_h$ into this formula:
+ at f{eqnarray*}
+  \sum_{i,j}
+    U_i V_j
+  \sum_{k,l}
+  \left\{
+  \left(
+    \lambda \partial_l (\Phi_i)_l, \partial_k (\Phi_j)_k
+  \right)_\Omega
+  +
+  \left(
+    \mu \partial_l (\Phi_i)_k, \partial_l (\Phi_j)_k
+  \right)_\Omega
+  +
+  \left(
+    \mu \partial_l (\Phi_i)_k, \partial_k (\Phi_j)_l
+  \right)_\Omega
+  \right\}
+\\
+=
+  \sum_j V_j
+  \sum_l
+  \left(
+    f_l,
+    (\Phi_j)_l
+  \right)_\Omega.
+ at f}
+We note that here and in the following, the indices $k,l$ run over spatial
+directions, i.e. $0\le k,l < d$, and that indices $i,j$ run over degrees
+of freedoms.
+
+The local stiffness matrix on cell $K$ therefore has the following entries:
+ at f[
+  A^K_{ij}
+  =
+  \sum_{k,l}
+  \left\{
+  \left(
+    \lambda \partial_l (\Phi_i)_l, \partial_k (\Phi_j)_k
+  \right)_K
+  +
+  \left(
+    \mu \partial_l (\Phi_i)_k, \partial_l (\Phi_j)_k
+  \right)_K
+  +
+  \left(
+    \mu \partial_l (\Phi_i)_k, \partial_k (\Phi_j)_l
+  \right)_K
+  \right\},
+ at f]
+where $i,j$ now are local degrees of freedom and therefore $0\le i,j < N$.
+In these formulas, we always take some component of the vector shape functions
+$\Phi_i$, which are of course given as follows (see their definition):
+ at f[
+  (\Phi_i)_l = \phi_i \delta_{l,\text{comp}(i)},
+ at f]
+with the Kronecker symbol $\delta_{nm}$. Due to this, we can delete some of
+the sums over $k$ and $l$:
+ at f{eqnarray*}
+  A^K_{ij}
+  &=&
+  \sum_{k,l}
+  \Bigl\{
+  \left(
+    \lambda \partial_l \phi_i\ \delta_{l,\text{comp}(i)},
+            \partial_k \phi_j\ \delta_{k,\text{comp}(j)}
+  \right)_K
+\\
+  &\qquad\qquad& +
+  \left(
+    \mu \partial_l \phi_i\ \delta_{k,\text{comp}(i)},
+        \partial_l \phi_j\ \delta_{k,\text{comp}(j)}
+  \right)_K
+  +
+  \left(
+    \mu \partial_l \phi_i\ \delta_{k,\text{comp}(i)},
+        \partial_k \phi_j\ \delta_{l,\text{comp}(j)}
+  \right)_K
+  \Bigr\}
+\\
+  &=&
+  \left(
+    \lambda \partial_{\text{comp}(i)} \phi_i,
+            \partial_{\text{comp}(j)} \phi_j
+  \right)_K
+  +
+  \sum_l
+  \left(
+    \mu \partial_l \phi_i,
+        \partial_l \phi_j
+  \right)_K
+  \ \delta_{\text{comp}(i),\text{comp}(j)}
+  +
+  \left(
+    \mu \partial_{\text{comp}(j)} \phi_i,
+        \partial_{\text{comp}(i)} \phi_j
+  \right)_K
+\\
+  &=&
+  \left(
+    \lambda \partial_{\text{comp}(i)} \phi_i,
+            \partial_{\text{comp}(j)} \phi_j
+  \right)_K
+  +
+  \left(
+    \mu \nabla \phi_i,
+        \nabla \phi_j
+  \right)_K
+  \ \delta_{\text{comp}(i),\text{comp}(j)}
+  +
+  \left(
+    \mu \partial_{\text{comp}(j)} \phi_i,
+        \partial_{\text{comp}(i)} \phi_j
+  \right)_K.
+ at f}
+
+Likewise, the contribution of cell $K$ to the right hand side vector is
+ at f{eqnarray*}
+  f^K_j
+  &=&
+  \sum_l
+  \left(
+    f_l,
+    (\Phi_j)_l
+  \right)_K
+\\
+  &=&
+  \sum_l
+  \left(
+    f_l,
+    \phi_j \delta_{l,\text{comp}(j)}
+  \right)_K
+\\
+  &=&
+  \left(
+    f_{\text{comp}(j)},
+    \phi_j
+  \right)_K.
+ at f}
+
+This is the form in which we will implement the local stiffness matrix and
+right hand side vectors.
+
+As a final note: in the step-17 example program, we will
+revisit the elastic problem laid out here, and will show how to solve it in
+%parallel on a cluster of computers. The resulting program will thus be able to
+solve this problem to significantly higher accuracy, and more efficiently if
+this is required. In addition, in step-20, @ref step_21
+"step-21", as well as a few other of the later tutorial programs, we will
+revisit some vector-valued problems and show a few techniques that may make it
+simpler to actually go through all the stuff shown above, with
+FiniteElement::system_to_component_index etc.
+
diff --git a/examples/step-8/doc/kind b/examples/step-8/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-8/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-8/doc/results.dox b/examples/step-8/doc/results.dox
new file mode 100644
index 0000000..1a021ca
--- /dev/null
+++ b/examples/step-8/doc/results.dox
@@ -0,0 +1,58 @@
+<h1>Results</h1>
+
+
+There is not much to be said about the results of this program, other than
+that they look nice. All images were made using Visit from the
+output files that the program wrote to disk. The first two pictures show
+the $x$- and $y$-displacements as a scalar components:
+
+<TABLE WIDTH="100%">
+<tr>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-8.x.png" alt="">
+</td>
+<td>
+<img src="http://www.dealii.org/images/steps/developer/step-8.y.png" alt="">
+</td>
+</tr>
+</table>
+
+
+You can clearly see the sources of $x$-displacement around $x=0.5$ and
+$x=-0.5$, and of $y$-displacement at the origin. The next image shows the
+final grid after eight steps of refinement:
+
+<img src="http://www.dealii.org/images/steps/developer/step-8.grid.png" alt="">
+
+
+What one frequently would like to do is to show the displacement as a vector
+field, i.e., show vectors that for each point show the direction and magnitude
+of displacement. Unfortunately, that's a bit more involved. To understand why
+this is so, remember that we have just defined our finite element as a
+collection of two  components (in <code>dim=2</code> dimensions). Nowhere have
+we said that this is not just a pressure and a concentration (two scalar
+quantities) but that the two components actually are the parts of a
+vector-valued quantity, namely the displacement. Absent this knowledge, the
+DataOut class assumes that all individual variables we print are separate
+scalars, and Visit then faithfully assumes that this is indeed what it is. In
+other words, once we have written the data as scalars, there is nothing in
+Visit that allows us to paste these two scalar fields back together as a
+vector field. Where we would have to attack this problem is at the root,
+namely in <code>ElasticProblem::output_results</code>. We won't do so here but
+instead refer the reader to the step-22 program where we show how to do this
+for a more general situation. That said, we couldn't help generating the data
+anyway that would show how this would look if implemented as discussed in
+step-22. The vector field then looks like this (Visit randomly selects a few
+hundred vertices from which to draw the vectors; drawing them from each
+individual vertex would make the picture unreadable):
+
+<img src="http://www.dealii.org/images/steps/developer/step-8.vectors.png" alt="">
+
+
+We note that one may have intuitively expected the
+solution to be symmetric about the $x$- and $y$-axes since the $x$- and
+$y$-forces are symmetric with respect to these axes. However, the force
+considered as a vector is not symmetric and consequently neither is
+the solution.
+
+
diff --git a/examples/step-8/doc/tooltip b/examples/step-8/doc/tooltip
new file mode 100644
index 0000000..d8b7084
--- /dev/null
+++ b/examples/step-8/doc/tooltip
@@ -0,0 +1 @@
+Systems of PDE. Elasticity.
diff --git a/examples/step-8/step-8.cc b/examples/step-8/step-8.cc
new file mode 100644
index 0000000..1dc6ed1
--- /dev/null
+++ b/examples/step-8/step-8.cc
@@ -0,0 +1,810 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2000 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 2000
+ */
+
+
+// @sect3{Include files}
+
+// As usual, the first few include files are already known, so we will not
+// comment on them further.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+
+// In this example, we need vector-valued finite elements. The support for
+// these can be found in the following include file:
+#include <deal.II/fe/fe_system.h>
+// We will compose the vector-valued finite elements from regular Q1 elements
+// which can be found here, as usual:
+#include <deal.II/fe/fe_q.h>
+
+// This again is C++:
+#include <fstream>
+#include <iostream>
+
+// The last step is as in previous programs. In particular, just like in
+// step-7, we pack everything that's specific to this program into a namespace
+// of its own.
+namespace Step8
+{
+  using namespace dealii;
+
+  // @sect3{The <code>ElasticProblem</code> class template}
+
+  // The main class is, except for its name, almost unchanged with respect to
+  // the step-6 example.
+  //
+  // The only change is the use of a different class for the <code>fe</code>
+  // variable: Instead of a concrete finite element class such as
+  // <code>FE_Q</code>, we now use a more generic one,
+  // <code>FESystem</code>. In fact, <code>FESystem</code> is not really a
+  // finite element itself in that it does not implement shape functions of
+  // its own.  Rather, it is a class that can be used to stack several other
+  // elements together to form one vector-valued finite element. In our case,
+  // we will compose the vector-valued element of <code>FE_Q(1)</code>
+  // objects, as shown below in the constructor of this class.
+  template <int dim>
+  class ElasticProblem
+  {
+  public:
+    ElasticProblem ();
+    ~ElasticProblem ();
+    void run ();
+
+  private:
+    void setup_system ();
+    void assemble_system ();
+    void solve ();
+    void refine_grid ();
+    void output_results (const unsigned int cycle) const;
+
+    Triangulation<dim>   triangulation;
+    DoFHandler<dim>      dof_handler;
+
+    FESystem<dim>        fe;
+
+    ConstraintMatrix     hanging_node_constraints;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+
+    Vector<double>       solution;
+    Vector<double>       system_rhs;
+  };
+
+
+  // @sect3{Right hand side values}
+
+  // Before going over to the implementation of the main class, we declare and
+  // define the class which describes the right hand side. This time, the
+  // right hand side is vector-valued, as is the solution, so we will describe
+  // the changes required for this in some more detail.
+  //
+  // The first thing is that vector-valued functions have to have a
+  // constructor, since they need to pass down to the base class of how many
+  // components the function consists. The default value in the constructor of
+  // the base class is one (i.e.: a scalar function), which is why we did not
+  // need not define a constructor for the scalar function used in previous
+  // programs.
+  template <int dim>
+  class RightHandSide :  public Function<dim>
+  {
+  public:
+    RightHandSide ();
+
+    // The next change is that we want a replacement for the
+    // <code>value</code> function of the previous examples. There, a second
+    // parameter <code>component</code> was given, which denoted which
+    // component was requested. Here, we implement a function that returns the
+    // whole vector of values at the given place at once, in the second
+    // argument of the function. The obvious name for such a replacement
+    // function is <code>vector_value</code>.
+    //
+    // Secondly, in analogy to the <code>value_list</code> function, there is
+    // a function <code>vector_value_list</code>, which returns the values of
+    // the vector-valued function at several points at once:
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &values) const;
+
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> >   &value_list) const;
+  };
+
+
+  // This is the constructor of the right hand side class. As said above, it
+  // only passes down to the base class the number of components, which is
+  // <code>dim</code> in the present case (one force component in each of the
+  // <code>dim</code> space directions).
+  //
+  // Some people would have moved the definition of such a short function
+  // right into the class declaration. We do not do that, as a matter of
+  // style: the deal.II style guides require that class declarations contain
+  // only declarations, and that definitions are always to be found
+  // outside. This is, obviously, as much as matter of taste as indentation,
+  // but we try to be consistent in this direction.
+  template <int dim>
+  RightHandSide<dim>::RightHandSide ()
+    :
+    Function<dim> (dim)
+  {}
+
+
+  // Next the function that returns the whole vector of values at the point
+  // <code>p</code> at once.
+  //
+  // To prevent cases where the return vector has not previously been set to
+  // the right size we test for this case and otherwise throw an exception at
+  // the beginning of the function. Note that enforcing that output arguments
+  // already have the correct size is a convention in deal.II, and enforced
+  // almost everywhere. The reason is that we would otherwise have to check at
+  // the beginning of the function and possibly change the size of the output
+  // vector. This is expensive, and would almost always be unnecessary (the
+  // first call to the function would set the vector to the right size, and
+  // subsequent calls would only have to do redundant checks). In addition,
+  // checking and possibly resizing the vector is an operation that can not be
+  // removed if we can't rely on the assumption that the vector already has
+  // the correct size; this is in contract to the <code>Assert</code> call
+  // that is completely removed if the program is compiled in optimized mode.
+  //
+  // Likewise, if by some accident someone tried to compile and run the
+  // program in only one space dimension (in which the elastic equations do
+  // not make much sense since they reduce to the ordinary Laplace equation),
+  // we terminate the program in the second assertion. The program will work
+  // just fine in 3d, however.
+  template <int dim>
+  inline
+  void RightHandSide<dim>::vector_value (const Point<dim> &p,
+                                         Vector<double>   &values) const
+  {
+    Assert (values.size() == dim,
+            ExcDimensionMismatch (values.size(), dim));
+    Assert (dim >= 2, ExcNotImplemented());
+
+    // The rest of the function implements computing force values. We will use
+    // a constant (unit) force in x-direction located in two little circles
+    // (or spheres, in 3d) around points (0.5,0) and (-0.5,0), and y-force in
+    // an area around the origin; in 3d, the z-component of these centers is
+    // zero as well.
+    //
+    // For this, let us first define two objects that denote the centers of
+    // these areas. Note that upon construction of the <code>Point</code>
+    // objects, all components are set to zero.
+    Point<dim> point_1, point_2;
+    point_1(0) = 0.5;
+    point_2(0) = -0.5;
+
+    // If now the point <code>p</code> is in a circle (sphere) of radius 0.2
+    // around one of these points, then set the force in x-direction to one,
+    // otherwise to zero:
+    if (((p-point_1).norm_square() < 0.2*0.2) ||
+        ((p-point_2).norm_square() < 0.2*0.2))
+      values(0) = 1;
+    else
+      values(0) = 0;
+
+    // Likewise, if <code>p</code> is in the vicinity of the origin, then set
+    // the y-force to 1, otherwise to zero:
+    if (p.norm_square() < 0.2*0.2)
+      values(1) = 1;
+    else
+      values(1) = 0;
+  }
+
+
+
+  // Now, this is the function of the right hand side class that returns the
+  // values at several points at once. The function starts out with checking
+  // that the number of input and output arguments is equal (the sizes of the
+  // individual output vectors will be checked in the function that we call
+  // further down below). Next, we define an abbreviation for the number of
+  // points which we shall work on, to make some things simpler below.
+  template <int dim>
+  void RightHandSide<dim>::vector_value_list (const std::vector<Point<dim> > &points,
+                                              std::vector<Vector<double> >   &value_list) const
+  {
+    Assert (value_list.size() == points.size(),
+            ExcDimensionMismatch (value_list.size(), points.size()));
+
+    const unsigned int n_points = points.size();
+
+    // Finally we treat each of the points. In one of the previous examples,
+    // we have explained why the
+    // <code>value_list</code>/<code>vector_value_list</code> function had
+    // been introduced: to prevent us from calling virtual functions too
+    // frequently. On the other hand, we now need to implement the same
+    // function twice, which can lead to confusion if one function is changed
+    // but the other is not.
+    //
+    // We can prevent this situation by calling
+    // <code>RightHandSide::vector_value</code> on each point in the input
+    // list. Note that by giving the full name of the function, including the
+    // class name, we instruct the compiler to explicitly call this function,
+    // and not to use the virtual function call mechanism that would be used
+    // if we had just called <code>vector_value</code>. This is important,
+    // since the compiler generally can't make any assumptions which function
+    // is called when using virtual functions, and it therefore can't inline
+    // the called function into the site of the call. On the contrary, here we
+    // give the fully qualified name, which bypasses the virtual function
+    // call, and consequently the compiler knows exactly which function is
+    // called and will inline above function into the present location. (Note
+    // that we have declared the <code>vector_value</code> function above
+    // <code>inline</code>, though modern compilers are also able to inline
+    // functions even if they have not been declared as inline).
+    //
+    // It is worth noting why we go to such length explaining what we
+    // do. Using this construct, we manage to avoid any inconsistency: if we
+    // want to change the right hand side function, it would be difficult to
+    // always remember that we always have to change two functions in the same
+    // way. Using this forwarding mechanism, we only have to change a single
+    // place (the <code>vector_value</code> function), and the second place
+    // (the <code>vector_value_list</code> function) will always be consistent
+    // with it. At the same time, using virtual function call bypassing, the
+    // code is no less efficient than if we had written it twice in the first
+    // place:
+    for (unsigned int p=0; p<n_points; ++p)
+      RightHandSide<dim>::vector_value (points[p],
+                                        value_list[p]);
+  }
+
+
+
+  // @sect3{The <code>ElasticProblem</code> class implementation}
+
+  // @sect4{ElasticProblem::ElasticProblem}
+
+  // Following is the constructor of the main class. As said before, we would
+  // like to construct a vector-valued finite element that is composed of
+  // several scalar finite elements (i.e., we want to build the vector-valued
+  // element so that each of its vector components consists of the shape
+  // functions of a scalar element). Of course, the number of scalar finite
+  // elements we would like to stack together equals the number of components
+  // the solution function has, which is <code>dim</code> since we consider
+  // displacement in each space direction. The <code>FESystem</code> class can
+  // handle this: we pass it the finite element of which we would like to
+  // compose the system of, and how often it shall be repeated:
+
+  template <int dim>
+  ElasticProblem<dim>::ElasticProblem ()
+    :
+    dof_handler (triangulation),
+    fe (FE_Q<dim>(1), dim)
+  {}
+  // In fact, the <code>FESystem</code> class has several more constructors
+  // which can perform more complex operations than just stacking together
+  // several scalar finite elements of the same type into one; we will get to
+  // know these possibilities in later examples.
+
+
+
+  // @sect4{ElasticProblem::~ElasticProblem}
+
+  // The destructor, on the other hand, is exactly as in step-6:
+  template <int dim>
+  ElasticProblem<dim>::~ElasticProblem ()
+  {
+    dof_handler.clear ();
+  }
+
+
+  // @sect4{ElasticProblem::setup_system}
+
+  // Setting up the system of equations is identical to the function used in
+  // the step-6 example. The <code>DoFHandler</code> class and all other
+  // classes used here are fully aware that the finite element we want to use
+  // is vector-valued, and take care of the vector-valuedness of the finite
+  // element themselves. (In fact, they do not, but this does not need to
+  // bother you: since they only need to know how many degrees of freedom
+  // there are per vertex, line and cell, and they do not ask what they
+  // represent, i.e. whether the finite element under consideration is
+  // vector-valued or whether it is, for example, a scalar Hermite element
+  // with several degrees of freedom on each vertex).
+  template <int dim>
+  void ElasticProblem<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (fe);
+    hanging_node_constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             hanging_node_constraints);
+    hanging_node_constraints.close ();
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern(dof_handler,
+                                    dsp,
+                                    hanging_node_constraints,
+                                    /*keep_constrained_dofs = */ true);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+  }
+
+
+  // @sect4{ElasticProblem::assemble_system}
+
+  // The big changes in this program are in the creation of matrix and right
+  // hand side, since they are problem-dependent. We will go through that
+  // process step-by-step, since it is a bit more complicated than in previous
+  // examples.
+  //
+  // The first parts of this function are the same as before, however: setting
+  // up a suitable quadrature formula, initializing an <code>FEValues</code>
+  // object for the (vector-valued) finite element we use as well as the
+  // quadrature object, and declaring a number of auxiliary arrays. In
+  // addition, we declare the ever same two abbreviations:
+  // <code>n_q_points</code> and <code>dofs_per_cell</code>. The number of
+  // degrees of freedom per cell we now obviously ask from the composed finite
+  // element rather than from the underlying scalar Q1 element. Here, it is
+  // <code>dim</code> times the number of degrees of freedom per cell of the
+  // Q1 element, though this is not explicit knowledge we need to care about:
+  template <int dim>
+  void ElasticProblem<dim>::assemble_system ()
+  {
+    QGauss<dim>  quadrature_formula(2);
+
+    FEValues<dim> fe_values (fe, quadrature_formula,
+                             update_values   | update_gradients |
+                             update_quadrature_points | update_JxW_values);
+
+    const unsigned int   dofs_per_cell = fe.dofs_per_cell;
+    const unsigned int   n_q_points    = quadrature_formula.size();
+
+    FullMatrix<double>   cell_matrix (dofs_per_cell, dofs_per_cell);
+    Vector<double>       cell_rhs (dofs_per_cell);
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    // As was shown in previous examples as well, we need a place where to
+    // store the values of the coefficients at all the quadrature points on a
+    // cell. In the present situation, we have two coefficients, lambda and
+    // mu.
+    std::vector<double>     lambda_values (n_q_points);
+    std::vector<double>     mu_values (n_q_points);
+
+    // Well, we could as well have omitted the above two arrays since we will
+    // use constant coefficients for both lambda and mu, which can be declared
+    // like this. They both represent functions always returning the constant
+    // value 1.0. Although we could omit the respective factors in the
+    // assemblage of the matrix, we use them here for purpose of
+    // demonstration.
+    ConstantFunction<dim> lambda(1.), mu(1.);
+
+    // Then again, we need to have the same for the right hand side. This is
+    // exactly as before in previous examples. However, we now have a
+    // vector-valued right hand side, which is why the data type of the
+    // <code>rhs_values</code> array is changed. We initialize it by
+    // <code>n_q_points</code> elements, each of which is a
+    // <code>Vector@<double@></code> with <code>dim</code> elements.
+    RightHandSide<dim>      right_hand_side;
+    std::vector<Vector<double> > rhs_values (n_q_points,
+                                             Vector<double>(dim));
+
+
+    // Now we can begin with the loop over all cells:
+    typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active(),
+                                                   endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        cell_matrix = 0;
+        cell_rhs = 0;
+
+        fe_values.reinit (cell);
+
+        // Next we get the values of the coefficients at the quadrature
+        // points. Likewise for the right hand side:
+        lambda.value_list (fe_values.get_quadrature_points(), lambda_values);
+        mu.value_list     (fe_values.get_quadrature_points(), mu_values);
+
+        right_hand_side.vector_value_list (fe_values.get_quadrature_points(),
+                                           rhs_values);
+
+        // Then assemble the entries of the local stiffness matrix and right
+        // hand side vector. This follows almost one-to-one the pattern
+        // described in the introduction of this example.  One of the few
+        // comments in place is that we can compute the number
+        // <code>comp(i)</code>, i.e. the index of the only nonzero vector
+        // component of shape function <code>i</code> using the
+        // <code>fe.system_to_component_index(i).first</code> function call
+        // below.
+        //
+        // (By accessing the <code>first</code> variable of the return value
+        // of the <code>system_to_component_index</code> function, you might
+        // already have guessed that there is more in it. In fact, the
+        // function returns a <code>std::pair@<unsigned int, unsigned
+        // int@></code>, of which the first element is <code>comp(i)</code>
+        // and the second is the value <code>base(i)</code> also noted in the
+        // introduction, i.e.  the index of this shape function within all the
+        // shape functions that are nonzero in this component,
+        // i.e. <code>base(i)</code> in the diction of the introduction. This
+        // is not a number that we are usually interested in, however.)
+        //
+        // With this knowledge, we can assemble the local matrix
+        // contributions:
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            const unsigned int
+            component_i = fe.system_to_component_index(i).first;
+
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+                const unsigned int
+                component_j = fe.system_to_component_index(j).first;
+
+                for (unsigned int q_point=0; q_point<n_q_points;
+                     ++q_point)
+                  {
+                    cell_matrix(i,j)
+                    +=
+                      // The first term is (lambda d_i u_i, d_j v_j) + (mu d_i
+                      // u_j, d_j v_i).  Note that
+                      // <code>shape_grad(i,q_point)</code> returns the
+                      // gradient of the only nonzero component of the i-th
+                      // shape function at quadrature point q_point. The
+                      // component <code>comp(i)</code> of the gradient, which
+                      // is the derivative of this only nonzero vector
+                      // component of the i-th shape function with respect to
+                      // the comp(i)th coordinate is accessed by the appended
+                      // brackets.
+                      (
+                        (fe_values.shape_grad(i,q_point)[component_i] *
+                         fe_values.shape_grad(j,q_point)[component_j] *
+                         lambda_values[q_point])
+                        +
+                        (fe_values.shape_grad(i,q_point)[component_j] *
+                         fe_values.shape_grad(j,q_point)[component_i] *
+                         mu_values[q_point])
+                        +
+                        // The second term is (mu nabla u_i, nabla v_j).  We
+                        // need not access a specific component of the
+                        // gradient, since we only have to compute the scalar
+                        // product of the two gradients, of which an
+                        // overloaded version of the operator* takes care, as
+                        // in previous examples.
+                        //
+                        // Note that by using the ?: operator, we only do this
+                        // if comp(i) equals comp(j), otherwise a zero is
+                        // added (which will be optimized away by the
+                        // compiler).
+                        ((component_i == component_j) ?
+                         (fe_values.shape_grad(i,q_point) *
+                          fe_values.shape_grad(j,q_point) *
+                          mu_values[q_point])  :
+                         0)
+                      )
+                      *
+                      fe_values.JxW(q_point);
+                  }
+              }
+          }
+
+        // Assembling the right hand side is also just as discussed in the
+        // introduction:
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            const unsigned int
+            component_i = fe.system_to_component_index(i).first;
+
+            for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+              cell_rhs(i) += fe_values.shape_value(i,q_point) *
+                             rhs_values[q_point](component_i) *
+                             fe_values.JxW(q_point);
+          }
+
+        // The transfer from local degrees of freedom into the global matrix
+        // and right hand side vector does not depend on the equation under
+        // consideration, and is thus the same as in all previous
+        // examples. The same holds for the elimination of hanging nodes from
+        // the matrix and right hand side, once we are done with assembling
+        // the entire linear system:
+        cell->get_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              system_matrix.add (local_dof_indices[i],
+                                 local_dof_indices[j],
+                                 cell_matrix(i,j));
+
+            system_rhs(local_dof_indices[i]) += cell_rhs(i);
+          }
+      }
+
+    hanging_node_constraints.condense (system_matrix);
+    hanging_node_constraints.condense (system_rhs);
+
+    // The interpolation of the boundary values needs a small modification:
+    // since the solution function is vector-valued, so need to be the
+    // boundary values. The <code>ZeroFunction</code> constructor accepts a
+    // parameter that tells it that it shall represent a vector valued,
+    // constant zero function with that many components. By default, this
+    // parameter is equal to one, in which case the <code>ZeroFunction</code>
+    // object would represent a scalar function. Since the solution vector has
+    // <code>dim</code> components, we need to pass <code>dim</code> as number
+    // of components to the zero function as well.
+    std::map<types::global_dof_index,double> boundary_values;
+    VectorTools::interpolate_boundary_values (dof_handler,
+                                              0,
+                                              ZeroFunction<dim>(dim),
+                                              boundary_values);
+    MatrixTools::apply_boundary_values (boundary_values,
+                                        system_matrix,
+                                        solution,
+                                        system_rhs);
+  }
+
+
+
+  // @sect4{ElasticProblem::solve}
+
+  // The solver does not care about where the system of equations comes, as
+  // long as it stays positive definite and symmetric (which are the
+  // requirements for the use of the CG solver), which the system indeed
+  // is. Therefore, we need not change anything.
+  template <int dim>
+  void ElasticProblem<dim>::solve ()
+  {
+    SolverControl           solver_control (1000, 1e-12);
+    SolverCG<>              cg (solver_control);
+
+    PreconditionSSOR<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.2);
+
+    cg.solve (system_matrix, solution, system_rhs,
+              preconditioner);
+
+    hanging_node_constraints.distribute (solution);
+  }
+
+
+  // @sect4{ElasticProblem::refine_grid}
+
+  // The function that does the refinement of the grid is the same as in the
+  // step-6 example. The quadrature formula is adapted to the linear elements
+  // again. Note that the error estimator by default adds up the estimated
+  // obtained from all components of the finite element solution, i.e., it
+  // uses the displacement in all directions with the same weight. If we would
+  // like the grid to be adapted to the x-displacement only, we could pass the
+  // function an additional parameter which tells it to do so and do not
+  // consider the displacements in all other directions for the error
+  // indicators. However, for the current problem, it seems appropriate to
+  // consider all displacement components with equal weight.
+  template <int dim>
+  void ElasticProblem<dim>::refine_grid ()
+  {
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    KellyErrorEstimator<dim>::estimate (dof_handler,
+                                        QGauss<dim-1>(2),
+                                        typename FunctionMap<dim>::type(),
+                                        solution,
+                                        estimated_error_per_cell);
+
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     estimated_error_per_cell,
+                                                     0.3, 0.03);
+
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+  // @sect4{ElasticProblem::output_results}
+
+  // The output happens mostly as has been shown in previous examples
+  // already. The only difference is that the solution function is vector
+  // valued. The <code>DataOut</code> class takes care of this automatically,
+  // but we have to give each component of the solution vector a different
+  // name.
+  template <int dim>
+  void ElasticProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    std::string filename = "solution-";
+    filename += ('0' + cycle);
+    Assert (cycle < 10, ExcInternalError());
+
+    filename += ".vtk";
+    std::ofstream output (filename.c_str());
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+
+
+
+    // As said above, we need a different name for each component of the
+    // solution function. To pass one name for each component, a vector of
+    // strings is used. Since the number of components is the same as the
+    // number of dimensions we are working in, the following
+    // <code>switch</code> statement is used.
+    //
+    // We note that some graphics programs have restriction as to what
+    // characters are allowed in the names of variables. The library therefore
+    // supports only the minimal subset of these characters that is supported
+    // by all programs. Basically, these are letters, numbers, underscores,
+    // and some other characters, but in particular no whitespace and
+    // minus/hyphen. The library will throw an exception otherwise, at least
+    // if in debug mode.
+    //
+    // After listing the 1d, 2d, and 3d case, it is good style to let the
+    // program die if we run upon a case which we did not consider. Remember
+    // that the <code>Assert</code> macro generates an exception if the
+    // condition in the first parameter is not satisfied. Of course, the
+    // condition <code>false</code> can never be satisfied, so the program
+    // will always abort whenever it gets to the default statement:
+    std::vector<std::string> solution_names;
+    switch (dim)
+      {
+      case 1:
+        solution_names.push_back ("displacement");
+        break;
+      case 2:
+        solution_names.push_back ("x_displacement");
+        solution_names.push_back ("y_displacement");
+        break;
+      case 3:
+        solution_names.push_back ("x_displacement");
+        solution_names.push_back ("y_displacement");
+        solution_names.push_back ("z_displacement");
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // After setting up the names for the different components of the solution
+    // vector, we can add the solution vector to the list of data vectors
+    // scheduled for output. Note that the following function takes a vector
+    // of strings as second argument, whereas the one which we have used in
+    // all previous examples accepted a string there. In fact, the latter
+    // function is only a shortcut for the function which we call here: it
+    // puts the single string that is passed to it into a vector of strings
+    // with only one element and forwards that to the other function.
+    data_out.add_data_vector (solution, solution_names);
+    data_out.build_patches ();
+    data_out.write_vtk (output);
+  }
+
+
+
+  // @sect4{ElasticProblem::run}
+
+  // The <code>run</code> function does the same things as in step-6, for
+  // example. This time, we use the square [-1,1]^d as domain, and we refine
+  // it twice globally before starting the first iteration.
+  //
+  // The reason for refining twice is a bit accidental: we use the QGauss quadrature
+  // formula with two points in each direction for integration of the right
+  // hand side; that means that there are four quadrature points on each cell
+  // (in 2D). If we only refine the initial grid once globally, then there
+  // will be only four quadrature points in each direction on the
+  // domain. However, the right hand side function was chosen to be rather
+  // localized and in that case, by pure chance, it happens that all quadrature
+  // points lie at points where the the right hand side function is zero (in
+  // mathematical terms, the quadrature points happen to be at points outside
+  // the <i>support</i> of the right hand side function). The right hand side
+  // vector computed with quadrature will then contain only zeroes (even though
+  // it would of course be nonzero if we had computed the right hand side vector
+  // exactly using the integral) and the solution of the system of
+  // equations is the zero vector, i.e., a finite element function that is zero
+  // everywhere. In a sense, we
+  // should not be surprised that this is happening since we have chosen
+  // an initial grid that is totally unsuitable for the problem at hand.
+  //
+  // The unfortunate thing is that if the discrete solution is constant, then
+  // the error indicators computed by the <code>KellyErrorEstimator</code>
+  // class are zero for each cell as well, and the call to
+  // <code>refine_and_coarsen_fixed_number</code> on the
+  // <code>triangulation</code> object will not flag any cells for refinement
+  // (why should it if the indicated error is zero for each cell?). The grid
+  // in the next iteration will therefore consist of four cells only as well,
+  // and the same problem occurs again.
+  //
+  // The conclusion needs to be: while of course we will not choose the
+  // initial grid to be well-suited for the accurate solution of the problem,
+  // we must at least choose it such that it has the chance to capture the
+  // important features of the solution. In this case, it needs to be able
+  // to see the right hand side. Thus, we refine twice globally. (Any larger
+  // number of global refinement steps would of course also work.)
+  template <int dim>
+  void ElasticProblem<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<8; ++cycle)
+      {
+        std::cout << "Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_cube (triangulation, -1, 1);
+            triangulation.refine_global (2);
+          }
+        else
+          refine_grid ();
+
+        std::cout << "   Number of active cells:       "
+                  << triangulation.n_active_cells()
+                  << std::endl;
+
+        setup_system ();
+
+        std::cout << "   Number of degrees of freedom: "
+                  << dof_handler.n_dofs()
+                  << std::endl;
+
+        assemble_system ();
+        solve ();
+        output_results (cycle);
+      }
+  }
+}
+
+// @sect3{The <code>main</code> function}
+
+// After closing the <code>Step8</code> namespace in the last line above, the
+// following is the main function of the program and is again exactly like in
+// step-6 (apart from the changed class names, of course).
+int main ()
+{
+  try
+    {
+      Step8::ElasticProblem<2> elastic_problem_2d;
+      elastic_problem_2d.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/examples/step-9/CMakeLists.txt b/examples/step-9/CMakeLists.txt
new file mode 100644
index 0000000..31e4e18
--- /dev/null
+++ b/examples/step-9/CMakeLists.txt
@@ -0,0 +1,39 @@
+##
+#  CMake script for the step-9 tutorial program:
+##
+
+# Set the name of the project and target:
+SET(TARGET "step-9")
+
+# Declare all source files the target consists of. Here, this is only
+# the one step-X.cc file, but as you expand your project you may wish
+# to add other source files as well. If your project becomes much larger,
+# you may want to either replace the following statement by something like
+#    FILE(GLOB_RECURSE TARGET_SRC  "source/*.cc")
+#    FILE(GLOB_RECURSE TARGET_INC  "include/*.h")
+#    SET(TARGET_SRC ${TARGET_SRC}  ${TARGET_INC}) 
+# or switch altogether to the large project CMakeLists.txt file discussed
+# in the "CMake in user projects" page accessible from the "User info"
+# page of the documentation.
+SET(TARGET_SRC
+  ${TARGET}.cc
+  )
+
+# Usually, you will not need to modify anything beyond this point...
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+FIND_PACKAGE(deal.II 8.4 QUIET
+  HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR}
+  )
+IF(NOT ${deal.II_FOUND})
+  MESSAGE(FATAL_ERROR "\n"
+    "*** Could not locate a (sufficiently recent) version of deal.II. ***\n\n"
+    "You may want to either pass a flag -DDEAL_II_DIR=/path/to/deal.II to cmake\n"
+    "or set an environment variable \"DEAL_II_DIR\" that contains this path."
+    )
+ENDIF()
+
+DEAL_II_INITIALIZE_CACHED_VARIABLES()
+PROJECT(${TARGET})
+DEAL_II_INVOKE_AUTOPILOT()
diff --git a/examples/step-9/doc/builds-on b/examples/step-9/doc/builds-on
new file mode 100644
index 0000000..1740273
--- /dev/null
+++ b/examples/step-9/doc/builds-on
@@ -0,0 +1 @@
+step-6
diff --git a/examples/step-9/doc/intro.dox b/examples/step-9/doc/intro.dox
new file mode 100644
index 0000000..5572cc3
--- /dev/null
+++ b/examples/step-9/doc/intro.dox
@@ -0,0 +1,292 @@
+<a name="Intro"></a>
+<h1>Introduction</h1>
+
+
+In this example, our aims are the following:
+<ol>
+  <li> solve the advection equation $\beta \cdot \nabla u = f$;
+  <li> show how we can use multiple threads to get quicker to
+    the desired results if we have a multi-processor machine;
+  <li> develop a simple refinement criterion.
+</ol>
+While the second aim is difficult to describe in general terms without
+reference to the code, we will discuss the other two aims in the
+following. The use of multiple threads will then be detailed at the
+relevant places within the program. We will, however, follow the
+general discussion of the WorkStream approach detailed in the
+ at ref threads "Parallel computing with multiple processors accessing shared memory"
+documentation module.
+
+
+<h3>Discretizing the advection equation</h3>
+
+In the present example program, we shall numerically approximate the
+solution of the advection equation
+ at f[
+  \beta \cdot \nabla u = f,
+ at f]
+where $\beta$ is a vector field that describes advection direction and
+speed (which may be dependent on the space variables), $f$ is a source
+function, and $u$ is the solution. The physical process that this
+equation describes is that of a given flow field $\beta$, with which
+another substance is transported, the density or concentration of
+which is given by $u$. The equation does not contain diffusion of this
+second species within its carrier substance, but there are source
+terms.
+
+It is obvious that at the inflow, the above equation needs to be
+augmented by boundary conditions:
+ at f[
+  u = g \qquad\qquad \mathrm{on}\ \partial\Omega_-,
+ at f]
+where $\partial\Omega_-$ describes the inflow portion of the boundary and is
+formally defined by
+ at f[
+  \partial\Omega_-
+  =
+  \{{\mathbf x}\in \partial\Omega: \beta\cdot{\mathbf n}({\mathbf x}) < 0\},
+ at f]
+and ${\mathbf n}({\mathbf x})$ being the outward normal to the domain at point
+${\mathbf x}\in\partial\Omega$. This definition is quite intuitive, since
+as ${\mathbf n}$ points outward, the scalar product with $\beta$ can only
+be negative if the transport direction $\beta$ points inward, i.e. at
+the inflow boundary. The mathematical theory states that we must not
+pose any boundary condition on the outflow part of the boundary.
+
+As it is stated, the transport equation is not stably solvable using
+the standard finite element method, however. The problem is that
+solutions to this equation possess only insufficient regularity
+orthogonal to the transport direction: while they are smooth %parallel
+to $\beta$, they may be discontinuous perpendicular to this
+direction. These discontinuities lead to numerical instabilities that
+make a stable solution by a straight-forward discretization
+impossible. We will thus use the streamline diffusion stabilized
+formulation, in which we test the equation with test functions $v +
+\delta \beta\cdot\nabla v$ instead of $v$, where $\delta$ is a
+parameter that is chosen in the range of the (local) mesh width $h$;
+good results are usually obtained by setting $\delta=0.1h$. Note that
+the modification in the test function vanishes as the mesh size tends
+to zero. We will not discuss reasons, pros, and cons of the streamline
+diffusion method, but rather use it ``as is'', and refer the
+interested reader to the sufficiently available literature; every
+recent good book on finite elements should have a discussion of that
+topic.
+
+Using the test functions as defined above, the weak formulation of
+our stabilized problem reads: find a discrete function $u_h$ such that
+for all discrete test functions $v_h$ there holds
+ at f[
+  (\beta \cdot \nabla u_h, v_h + \delta \beta\cdot\nabla v_h)_\Omega
+  -
+  (\beta\cdot {\mathbf n} u_h, v_h)_{\partial\Omega_-}
+  =
+  (f, v_h + \delta \beta\cdot\nabla v_h)_\Omega
+  -
+  (\beta\cdot {\mathbf n} g, v_h)_{\partial\Omega_-}.
+ at f]
+Note that we have included the inflow boundary values into the weak
+form, and that the respective terms to the left hand side operator are
+positive definite due to the fact that $\beta\cdot{\mathbf n}<0$ on the
+inflow boundary. One would think that this leads to a system matrix
+to be inverted of the form
+ at f[
+  a_{ij} =
+  (\beta \cdot \nabla \varphi_i,
+   \varphi_j + \delta \beta\cdot\nabla \varphi_j)_\Omega
+  -
+  (\beta\cdot {\mathbf n} \varphi_i, \varphi_j)_{\partial\Omega_-},
+ at f]
+with basis functions $\varphi_i,\varphi_j$.  However, this is a
+pitfall that happens to every numerical analyst at least once
+(including the author): we have here expanded the solution
+$u_h = u_i \varphi_i$, but if we do so, we will have to solve the
+problem
+ at f[
+  {\mathbf u}^T A = {\mathbf f}^T,
+ at f]
+where ${\mathbf u}=(u_i)$, i.e., we have to solve the transpose problem of
+what we might have expected naively.
+
+This is a point we made in the introduction of step-3. There, we argued that
+to avoid this very kind of problem, one should get in the habit of always
+multiplying with test functions <i>from the left</i> instead of from the right
+to obtain the correct matrix right away. In order to obtain the form
+of the linear system that we need, it is therefore best to rewrite the weak
+formulation to
+ at f[
+  (v_h + \delta \beta\cdot\nabla v_h, \beta \cdot \nabla u_h)_\Omega
+  -
+  (\beta\cdot {\mathbf n} v_h, u_h)_{\partial\Omega_-}
+  =
+  (v_h + \delta \beta\cdot\nabla v_h, f)_\Omega
+  -
+  (\beta\cdot {\mathbf n} v_h, g)_{\partial\Omega_-}
+ at f]
+and then to obtain
+ at f[
+  a_{ij} =
+  (\varphi_i + \delta \beta \cdot \nabla \varphi_i,
+   \beta\cdot\nabla \varphi_j)_\Omega
+  -
+  (\beta\cdot {\mathbf n} \varphi_i, \varphi_j)_{\partial\Omega_-},
+ at f]
+as system matrix. We will assemble this matrix in the program.
+
+There remains the solution of this linear system of equations. As the
+resulting matrix is no longer symmetric positive definite, we can't
+employ the usual CG method any more. Suitable for the solution of
+systems as the one at hand is the BiCGStab (bi-conjugate gradients
+stabilized) method, which is also available in deal.II, so we will use
+it.
+
+
+Regarding the exact form of the problem which we will solve, we use
+the following domain and functions (in $d=2$ space dimensions):
+ at f{eqnarray*}
+  \Omega &=& [-1,1]^d \\
+  \beta({\mathbf x})
+  &=&
+  \left(
+    \begin{array}{c}2 \\ 1+\frac 45 \sin(8\pi x)\end{array}
+  \right),
+  \\
+  f({\mathbf x})
+  &=&
+  \left\{
+    \begin{array}{ll}
+        \frac 1{10 s^d} &
+        \mathrm{for}\ |{\mathbf x}-{\mathbf x}_0|<s, \\
+        0 & \mathrm{else},
+    \end{array}
+  \right.
+  \qquad\qquad
+  {\mathbf x}_0
+  =
+  \left(
+    \begin{array}{c} -\frac 34 \\ -\frac 34\end{array}
+  \right),
+  \\
+  g
+  &=&
+  e^{5(1-|{\mathbf x}|^2)} \sin(16\pi|{\mathbf x}|^2).
+ at f}
+For $d>2$, we extend $\beta$ and ${\mathbf x}_0$ by the same as the last
+component. Regarding these functions, we have the following
+comments:
+<ol>
+<li> The advection field $\beta$ transports the solution roughly in
+diagonal direction from lower left to upper right, but with a wiggle
+structure superimposed.
+<li> The right hand side adds to the field generated by the inflow
+boundary conditions a blob in the lower left corner, which is then
+transported along.
+<li> The inflow boundary conditions impose a weighted sinusoidal
+structure that is transported along with the flow field. Since
+$|{\mathbf x}|\ge 1$ on the boundary, the weighting term never gets very large.
+</ol>
+
+
+<h3>A simple refinement criterion</h3>
+
+In all previous examples with adaptive refinement, we have used an
+error estimator first developed by Kelly et al., which assigns to each
+cell $K$ the following indicator:
+ at f[
+  \eta_K =
+  \left(
+    \frac {h_K}{12}
+    \int_{\partial K}
+      [\partial_n u_h]^2 \; d\sigma
+  \right)^{1/2},
+ at f]
+where $[\partial n u_h]$ denotes the jump of the normal derivatives
+across a face $\gamma\subset\partial K$ of the cell $K$. It can be
+shown that this error indicator uses a discrete analogue of the second
+derivatives, weighted by a power of the cell size that is adjusted to
+the linear elements assumed to be in use here:
+ at f[
+  \eta_K \approx
+  C h \| \nabla^2 u \|_K,
+ at f]
+which itself is related to the error size in the energy norm.
+
+The problem with this error indicator in the present case is that it
+assumes that the exact solution possesses second derivatives. This is
+already questionable for solutions to Laplace's problem in some cases,
+although there most problems allow solutions in $H^2$. If solutions
+are only in $H^1$, then the second derivatives would be singular in
+some parts (of lower dimension) of the domain and the error indicators
+would not reduce there under mesh refinement. Thus, the algorithm
+would continuously refine the cells around these parts, i.e. would
+refine into points or lines (in 2d).
+
+However, for the present case, solutions are usually not even in $H^1$
+(and this missing regularity is not the exceptional case as for
+Laplace's equation), so the error indicator described above is not
+really applicable. We will thus develop an indicator that is based on
+a discrete approximation of the gradient. Although the gradient often
+does not exist, this is the only criterion available to us, at least
+as long as we use continuous elements as in the present
+example. To start with, we note that given two cells $K$, $K'$ of
+which the centers are connected by the vector ${\mathbf y}_{KK'}$, we can
+approximate the directional derivative of a function $u$ as follows:
+ at f[
+  \frac{{\mathbf y}_{KK'}^T}{|{\mathbf y}_{KK'}|} \nabla u
+  \approx
+  \frac{u(K') - u(K)}{|{\mathbf y}_{KK'}|},
+ at f]
+where $u(K)$ and $u(K')$ denote $u$ evaluated at the centers of the
+respective cells. We now multiply the above approximation by
+${\mathbf y}_{KK'}/|{\mathbf y}_{KK'}|$ and sum over all neighbors $K'$ of $K$:
+ at f[
+  \underbrace{
+    \left(\sum_{K'} \frac{{\mathbf y}_{KK'} {\mathbf y}_{KK'}^T}
+                         {|{\mathbf y}_{KK'}|^2}\right)}_{=:Y}
+  \nabla u
+  \approx
+  \sum_{K'}
+  \frac{{\mathbf y}_{KK'}}{|{\mathbf y}_{KK'}|}
+  \frac{u(K') - u(K)}{|{\mathbf y}_{KK'}|}.
+ at f]
+If the vectors ${\mathbf y}_{KK'}$ connecting $K$ with its neighbors span
+the whole space (i.e. roughly: $K$ has neighbors in all directions),
+then the term in parentheses in the left hand side expression forms a
+regular matrix, which we can invert to obtain an approximation of the
+gradient of $u$ on $K$:
+ at f[
+  \nabla u
+  \approx
+  Y^{-1}
+  \left(
+    \sum_{K'}
+    \frac{{\mathbf y}_{KK'}}{|{\mathbf y}_{KK'}|}
+    \frac{u(K') - u(K)}{|{\mathbf y}_{KK'}|}
+  \right).
+ at f]
+We will denote the approximation on the right hand side by
+$\nabla_h u(K)$, and we will use the following quantity as refinement
+criterion:
+ at f[
+  \eta_K = h^{1+d/2} |\nabla_h u_h(K)|,
+ at f]
+which is inspired by the following (not rigorous) argument:
+ at f{eqnarray*}
+  \|u-u_h\|^2_{L_2}
+  &\le&
+  C h^2 \|\nabla u\|^2_{L_2}
+\\
+  &\approx&
+  C
+  \sum_K
+  h_K^2 \|\nabla u\|^2_{L_2(K)}
+\\
+  &\le&
+  C
+  \sum_K
+  h_K^2 h_K^d \|\nabla u\|^2_{L_\infty(K)}
+\\
+  &\approx&
+  C
+  \sum_K
+  h_K^{2+d} |\nabla_h u_h(K)|^2
+ at f}
diff --git a/examples/step-9/doc/kind b/examples/step-9/doc/kind
new file mode 100644
index 0000000..c1d9154
--- /dev/null
+++ b/examples/step-9/doc/kind
@@ -0,0 +1 @@
+techniques
diff --git a/examples/step-9/doc/results.dox b/examples/step-9/doc/results.dox
new file mode 100644
index 0000000..d0e541a
--- /dev/null
+++ b/examples/step-9/doc/results.dox
@@ -0,0 +1,49 @@
+<h1>Results</h1>
+
+
+The results of this program are not particularly spectacular. They
+consist of the console output, some grid files, and the solution on
+the finest grid. First for the console output:
+ at code
+Cycle 0:
+   Number of active cells:       256
+   Number of degrees of freedom: 289
+Cycle 1:
+   Number of active cells:       643
+   Number of degrees of freedom: 793
+Cycle 2:
+   Number of active cells:       1669
+   Number of degrees of freedom: 1950
+Cycle 3:
+   Number of active cells:       4231
+   Number of degrees of freedom: 4923
+Cycle 4:
+   Number of active cells:       10753
+   Number of degrees of freedom: 12175
+Cycle 5:
+   Number of active cells:       27004
+   Number of degrees of freedom: 29810
+ at endcode
+As can be seen, quite a number of cells is used on the finest level to
+resolve the features of the solution. The final grid showing this is
+displayed in the following picture:
+
+
+<img src="http://www.dealii.org/images/steps/developer/step-9.grid.png" alt="">
+
+
+
+The structure of the grid will be understandable by looking at the
+solution itself:
+
+
+<img src="http://www.dealii.org/images/steps/developer/step-9.solution.png" alt="">
+
+
+
+Note that the solution is created by that part that is transported
+along the wiggly advection field from the left and lower boundaries
+to the top right, and the part that is created by the source in the
+lower left corner, and the results of which are also transported
+along. The grid shown above is well-adapted to resolve these
+features. 
diff --git a/examples/step-9/doc/tooltip b/examples/step-9/doc/tooltip
new file mode 100644
index 0000000..0d6b649
--- /dev/null
+++ b/examples/step-9/doc/tooltip
@@ -0,0 +1 @@
+Advection equation. Multithreading. Refinement criteria.
diff --git a/examples/step-9/step-9.cc b/examples/step-9/step-9.cc
new file mode 100644
index 0000000..2598e2b
--- /dev/null
+++ b/examples/step-9/step-9.cc
@@ -0,0 +1,1363 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2000 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+
+ *
+ * Author: Wolfgang Bangerth, University of Heidelberg, 2000
+ */
+
+
+// Just as in previous examples, we have to include several files of which the
+// meaning has already been discussed:
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/solver_bicgstab.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/grid/grid_out.h>
+
+// The following two files provide classes and information for multithreaded
+// programs. In the first one, the classes and functions are declared which we
+// need to do assembly in parallel (i.e. the
+// <code>WorkStream</code> namespace). The
+// second file has a class MultithreadInfo which can be used to query the
+// number of processors in your system, which is often useful when deciding
+// how many threads to start in parallel.
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/multithread_info.h>
+
+// The next new include file declares a base class <code>TensorFunction</code>
+// not unlike the <code>Function</code> class, but with the difference that
+// the return value is tensor-valued rather than scalar of vector-valued.
+#include <deal.II/base/tensor_function.h>
+
+#include <deal.II/numerics/error_estimator.h>
+
+// This is C++, as we want to write some output to disk:
+#include <fstream>
+#include <iostream>
+
+
+// The last step is as in previous programs:
+namespace Step9
+{
+  using namespace dealii;
+
+  // @sect3{AdvectionProblem class declaration}
+
+  // Following we declare the main class of this program. It is very much
+  // like the main classes of previous examples, so we again only comment on
+  // the differences.
+  template <int dim>
+  class AdvectionProblem
+  {
+  public:
+    AdvectionProblem ();
+    ~AdvectionProblem ();
+    void run ();
+
+  private:
+    void setup_system ();
+
+    // The next set of functions will be used to assemble the
+    // matrix. However, unlike in the previous examples, the
+    // <code>assemble_system()</code> function will not do the work
+    // itself, but rather will delegate the actual assembly to helper
+    // functions <code>assemble_local_system()</code> and
+    // <code>copy_local_to_global()</code>. The rationale is that
+    // matrix assembly can be parallelized quite well, as the
+    // computation of the local contributions on each cell is entirely
+    // independent of other cells, and we only have to synchronize
+    // when we add the contribution of a cell to the global
+    // matrix.
+    //
+    // The strategy for parallelization we choose here is one of the
+    // possibilities mentioned in detail in the @ref threads module in
+    // the documentation. Specifically, we will use the WorkStream
+    // approach discussed there. Since there is so much documentation
+    // in this module, we will not repeat the rationale for the design
+    // choices here (for example, if you read through the module
+    // mentioned above, you will understand what the purpose of the
+    // <code>AssemblyScratchData</code> and
+    // <code>AssemblyCopyData</code> structures is). Rather, we will
+    // only discuss the specific implementation.
+    //
+    // If you read the page mentioned above, you will find that in
+    // order to parallelize assembly, we need two data structures --
+    // one that corresponds to data that we need during local
+    // integration ("scratch data", i.e., things we only need as
+    // temporary storage), and one that carries information from the
+    // local integration to the function that then adds the local
+    // contributions to the corresponding elements of the global
+    // matrix. The former of these typically contains the FEValues and
+    // FEFaceValues objects, whereas the latter has the local matrix,
+    // local right hand side, and information about which degrees of
+    // freedom live on the cell for which we are assembling a local
+    // contribution. With this information, the following should be
+    // relatively self-explanatory:
+    struct AssemblyScratchData
+    {
+      AssemblyScratchData (const FiniteElement<dim> &fe);
+      AssemblyScratchData (const AssemblyScratchData &scratch_data);
+
+      FEValues<dim>     fe_values;
+      FEFaceValues<dim> fe_face_values;
+    };
+
+    struct AssemblyCopyData
+    {
+      FullMatrix<double>                   cell_matrix;
+      Vector<double>                       cell_rhs;
+      std::vector<types::global_dof_index> local_dof_indices;
+    };
+
+    void assemble_system ();
+    void local_assemble_system (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                                AssemblyScratchData                                  &scratch,
+                                AssemblyCopyData                                     &copy_data);
+    void copy_local_to_global (const AssemblyCopyData &copy_data);
+
+
+    // The following functions again are as in previous examples, as are the
+    // subsequent variables.
+    void solve ();
+    void refine_grid ();
+    void output_results (const unsigned int cycle) const;
+
+    Triangulation<dim>   triangulation;
+    DoFHandler<dim>      dof_handler;
+
+    FE_Q<dim>            fe;
+
+    ConstraintMatrix     hanging_node_constraints;
+
+    SparsityPattern      sparsity_pattern;
+    SparseMatrix<double> system_matrix;
+
+    Vector<double>       solution;
+    Vector<double>       system_rhs;
+  };
+
+
+
+  // @sect3{Equation data declaration}
+
+  // Next we declare a class that describes the advection field. This, of
+  // course, is a vector field with as many components as there are space
+  // dimensions. One could now use a class derived from the
+  // <code>Function</code> base class, as we have done for boundary values and
+  // coefficients in previous examples, but there is another possibility in
+  // the library, namely a base class that describes tensor valued
+  // functions. In contrast to the usual <code>Function</code> objects, we
+  // provide the compiler with knowledge on the size of the objects of the
+  // return type. This enables the compiler to generate efficient code, which
+  // is not so simple for usual vector-valued functions where memory has to be
+  // allocated on the heap (thus, the <code>Function::vector_value</code>
+  // function has to be given the address of an object into which the result
+  // is to be written, in order to avoid copying and memory allocation and
+  // deallocation on the heap). In addition to the known size, it is possible
+  // not only to return vectors, but also tensors of higher rank; however,
+  // this is not very often requested by applications, to be honest...
+  //
+  // The interface of the <code>TensorFunction</code> class is relatively
+  // close to that of the <code>Function</code> class, so there is probably no
+  // need to comment in detail the following declaration:
+  template <int dim>
+  class AdvectionField : public TensorFunction<1,dim>
+  {
+  public:
+    AdvectionField () : TensorFunction<1,dim> () {}
+
+    virtual Tensor<1,dim> value (const Point<dim> &p) const;
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<Tensor<1,dim> >    &values) const;
+
+    // In previous examples, we have used assertions that throw exceptions in
+    // several places. However, we have never seen how such exceptions are
+    // declared. This can be done as follows:
+    DeclException2 (ExcDimensionMismatch,
+                    unsigned int, unsigned int,
+                    << "The vector has size " << arg1 << " but should have "
+                    << arg2 << " elements.");
+    // The syntax may look a little strange, but is reasonable. The format is
+    // basically as follows: use the name of one of the macros
+    // <code>DeclExceptionN</code>, where <code>N</code> denotes the number of
+    // additional parameters which the exception object shall take. In this
+    // case, as we want to throw the exception when the sizes of two vectors
+    // differ, we need two arguments, so we use
+    // <code>DeclException2</code>. The first parameter then describes the
+    // name of the exception, while the following declare the data types of
+    // the parameters. The last argument is a sequence of output directives
+    // that will be piped into the <code>std::cerr</code> object, thus the
+    // strange format with the leading <code>@<@<</code> operator and the
+    // like. Note that we can access the parameters which are passed to the
+    // exception upon construction (i.e. within the <code>Assert</code> call)
+    // by using the names <code>arg1</code> through <code>argN</code>, where
+    // <code>N</code> is the number of arguments as defined by the use of the
+    // respective macro <code>DeclExceptionN</code>.
+    //
+    // To learn how the preprocessor expands this macro into actual code,
+    // please refer to the documentation of the exception classes in the base
+    // library. Suffice it to say that by this macro call, the respective
+    // exception class is declared, which also has error output functions
+    // already implemented.
+  };
+
+
+
+  // The following two functions implement the interface described above. The
+  // first simply implements the function as described in the introduction,
+  // while the second uses the same trick to avoid calling a virtual function
+  // as has already been introduced in the previous example program. Note the
+  // check for the right sizes of the arguments in the second function, which
+  // should always be present in such functions; it is our experience that
+  // many if not most programming errors result from incorrectly initialized
+  // arrays, incompatible parameters to functions and the like; using
+  // assertion as in this case can eliminate many of these problems.
+  template <int dim>
+  Tensor<1,dim>
+  AdvectionField<dim>::value (const Point<dim> &p) const
+  {
+    Point<dim> value;
+    value[0] = 2;
+    for (unsigned int i=1; i<dim; ++i)
+      value[i] = 1+0.8*std::sin(8*numbers::PI*p[0]);
+
+    return value;
+  }
+
+
+
+  template <int dim>
+  void
+  AdvectionField<dim>::value_list (const std::vector<Point<dim> > &points,
+                                   std::vector<Tensor<1,dim> >    &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch (values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = AdvectionField<dim>::value (points[i]);
+  }
+
+
+
+
+  // Besides the advection field, we need two functions describing the source
+  // terms (<code>right hand side</code>) and the boundary values. First for
+  // the right hand side, which follows the same pattern as in previous
+  // examples. As described in the introduction, the source is a constant
+  // function in the vicinity of a source point, which we denote by the
+  // constant static variable <code>center_point</code>. We set the values of
+  // this center using the same template tricks as we have shown in the step-7
+  // example program. The rest is simple and has been shown previously,
+  // including the way to avoid virtual function calls in the
+  // <code>value_list</code> function.
+  template <int dim>
+  class RightHandSide : public Function<dim>
+  {
+  public:
+    RightHandSide () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+  private:
+    static const Point<dim> center_point;
+  };
+
+
+  template <>
+  const Point<1> RightHandSide<1>::center_point = Point<1> (-0.75);
+
+  template <>
+  const Point<2> RightHandSide<2>::center_point = Point<2> (-0.75, -0.75);
+
+  template <>
+  const Point<3> RightHandSide<3>::center_point = Point<3> (-0.75, -0.75, -0.75);
+
+
+
+  // The only new thing here is that we check for the value of the
+  // <code>component</code> parameter. As this is a scalar function, it is
+  // obvious that it only makes sense if the desired component has the index
+  // zero, so we assert that this is indeed the
+  // case. <code>ExcIndexRange</code> is a global predefined exception
+  // (probably the one most often used, we therefore made it global instead of
+  // local to some class), that takes three parameters: the index that is
+  // outside the allowed range, the first element of the valid range and the
+  // one past the last (i.e. again the half-open interval so often used in the
+  // C++ standard library):
+  template <int dim>
+  double
+  RightHandSide<dim>::value (const Point<dim>   &p,
+                             const unsigned int  component) const
+  {
+    Assert (component == 0, ExcIndexRange (component, 0, 1));
+    const double diameter = 0.1;
+    return ( (p-center_point).norm_square() < diameter*diameter ?
+             .1/std::pow(diameter,dim) :
+             0);
+  }
+
+
+
+  template <int dim>
+  void
+  RightHandSide<dim>::value_list (const std::vector<Point<dim> > &points,
+                                  std::vector<double>            &values,
+                                  const unsigned int              component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch (values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = RightHandSide<dim>::value (points[i], component);
+  }
+
+
+
+  // Finally for the boundary values, which is just another class derived from
+  // the <code>Function</code> base class:
+  template <int dim>
+  class BoundaryValues : public Function<dim>
+  {
+  public:
+    BoundaryValues () : Function<dim>() {}
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+  };
+
+
+
+  template <int dim>
+  double
+  BoundaryValues<dim>::value (const Point<dim>   &p,
+                              const unsigned int  component) const
+  {
+    Assert (component == 0, ExcIndexRange (component, 0, 1));
+
+    const double sine_term = std::sin(16*numbers::PI*std::sqrt(p.norm_square()));
+    const double weight    = std::exp(-5*p.norm_square()) / std::exp(-5.);
+    return sine_term * weight;
+  }
+
+
+
+  template <int dim>
+  void
+  BoundaryValues<dim>::value_list (const std::vector<Point<dim> > &points,
+                                   std::vector<double>            &values,
+                                   const unsigned int              component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch (values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = BoundaryValues<dim>::value (points[i], component);
+  }
+
+
+
+  // @sect3{GradientEstimation class declaration}
+
+  // Now, finally, here comes the class that will compute the difference
+  // approximation of the gradient on each cell and weighs that with a power
+  // of the mesh size, as described in the introduction.  This class is a
+  // simple version of the <code>DerivativeApproximation</code> class in the
+  // library, that uses similar techniques to obtain finite difference
+  // approximations of the gradient of a finite element field, or of higher
+  // derivatives.
+  //
+  // The class has one public static function <code>estimate</code> that is
+  // called to compute a vector of error indicators, and a few private functions
+  // that do the actual work on all active cells. As in other parts of the
+  // library, we follow an informal convention to use vectors of floats for
+  // error indicators rather than the common vectors of doubles, as the
+  // additional accuracy is not necessary for estimated values.
+  //
+  // In addition to these two functions, the class declares two exceptions
+  // which are raised when a cell has no neighbors in each of the space
+  // directions (in which case the matrix described in the introduction would
+  // be singular and can't be inverted), while the other one is used in the
+  // more common case of invalid parameters to a function, namely a vector of
+  // wrong size.
+  //
+  // Two other comments: first, the class has no non-static member functions
+  // or variables, so this is not really a class, but rather serves the
+  // purpose of a <code>namespace</code> in C++. The reason that we chose a
+  // class over a namespace is that this way we can declare functions that are
+  // private. This can be done with namespaces as well, if one declares some
+  // functions in header files in the namespace and implements these and other
+  // functions in the implementation file. The functions not declared in the
+  // header file are still in the namespace but are not callable from
+  // outside. However, as we have only one file here, it is not possible to
+  // hide functions in the present case.
+  //
+  // The second comment is that the dimension template parameter is attached
+  // to the function rather than to the class itself. This way, you don't have
+  // to specify the template parameter yourself as in most other cases, but
+  // the compiler can figure its value out itself from the dimension of the
+  // DoF handler object that one passes as first argument.
+  //
+  // Before jumping into the fray with the implementation, let us also comment
+  // on the parallelization strategy. We have already introduced the necessary
+  // framework for using the WorkStream concept in the declaration of the main
+  // class of this program above. We will use it again here. In the current
+  // context, this means that we have to define (i) classes for scratch and
+  // copy objects, (ii) a function that does the local computation on one
+  // cell, and (iii) a function that copies the local result into a global
+  // object. Given this general framework, we will, however, deviate from it a
+  // bit. In particular, WorkStream was generally invented for cases where
+  // each local computation on a cell <i>adds</i> to a global object -- for
+  // example, when assembling linear systems where we add local contributions
+  // into a global matrix and right hand side. Here, however, the situation is
+  // slightly different: we compute contributions from every cell
+  // individually, but then all we need to do is put them into an element of
+  // an output vector that is unique to each cell. Consequently, there is no
+  // risk that the write operations from two cells might conflict, and the
+  // elaborate machinery of WorkStream to avoid conflicting writes is not
+  // necessary. Consequently, what we will do is this: We still need a scratch
+  // object that holds, for example, the FEValues object.  However, we only
+  // create an fake, empty copy data structure. Likewise, we do need the
+  // function that computes local contributions, but since it can already put
+  // the result into its final location, we do not need a copy-local-to-global
+  // function and will instead give the WorkStream::run function an empty
+  // function object -- the equivalent to a NULL function pointer.
+  //
+  // The second idea to make this approach work is this: If we want to write
+  // the result into its final destination right away, then the local worker
+  // function needs to already know where this destination is. Here, this is
+  // an element of a vector -- but which element is something that the local
+  // worker function (or, if we wanted to use one, a copy-local-to-global
+  // function) can not determine easily just knowing an iterator to a cell it
+  // is supposed to work on. Consequently, in addition to a cell, we need to
+  // pass a second piece of identifying information along: the element of the
+  // output vector to write into. What this means is that the work items are
+  // identified by two iterators: to a cell, and to an output vector
+  // element. Moving from one work item to the next requires incrementing both
+  // iterators. deal.II has a class for this, called SynchronousIterators,
+  // that takes a tuple of iterator types as arguments and stores an iterator
+  // of each type. Whenever the SynchronousIterators object is incremented, it
+  // increments the stored iterators in turn. Thus, this class is exactly what
+  // we need to do our work, and we consequently use it as the first argument
+  // of the worker function. We will further down below show how to create
+  // such an object.
+  class GradientEstimation
+  {
+  public:
+    template <int dim>
+    static void estimate (const DoFHandler<dim> &dof,
+                          const Vector<double>  &solution,
+                          Vector<float>         &error_per_cell);
+
+    DeclException2 (ExcInvalidVectorLength,
+                    int, int,
+                    << "Vector has length " << arg1 << ", but should have "
+                    << arg2);
+    DeclException0 (ExcInsufficientDirections);
+
+  private:
+    template <int dim>
+    struct EstimateScratchData
+    {
+      EstimateScratchData (const FiniteElement<dim> &fe,
+                           const Vector<double>     &solution);
+      EstimateScratchData (const EstimateScratchData &data);
+
+      FEValues<dim> fe_midpoint_value;
+      Vector<double> solution;
+    };
+
+    struct EstimateCopyData
+    {};
+
+    template <int dim>
+    static
+    void estimate_cell (const SynchronousIterators<std_cxx11::tuple<typename DoFHandler<dim>::active_cell_iterator,
+                        Vector<float>::iterator> >     &cell,
+                        EstimateScratchData<dim>       &scratch_data,
+                        const EstimateCopyData         &copy_data);
+  };
+
+
+
+  // @sect3{AdvectionProblem class implementation}
+
+
+  // Now for the implementation of the main class. Constructor, destructor and
+  // the function <code>setup_system</code> follow the same pattern that was
+  // used previously, so we need not comment on these three function:
+  template <int dim>
+  AdvectionProblem<dim>::AdvectionProblem ()
+    :
+    dof_handler (triangulation),
+    fe(1)
+  {}
+
+
+
+  template <int dim>
+  AdvectionProblem<dim>::~AdvectionProblem ()
+  {
+    dof_handler.clear ();
+  }
+
+
+
+  template <int dim>
+  void AdvectionProblem<dim>::setup_system ()
+  {
+    dof_handler.distribute_dofs (fe);
+    hanging_node_constraints.clear ();
+    DoFTools::make_hanging_node_constraints (dof_handler,
+                                             hanging_node_constraints);
+    hanging_node_constraints.close ();
+
+    DynamicSparsityPattern dsp(dof_handler.n_dofs(), dof_handler.n_dofs());
+    DoFTools::make_sparsity_pattern(dof_handler,
+                                    dsp,
+                                    hanging_node_constraints,
+                                    /*keep_constrained_dofs = */ true);
+    sparsity_pattern.copy_from (dsp);
+
+    system_matrix.reinit (sparsity_pattern);
+
+    solution.reinit (dof_handler.n_dofs());
+    system_rhs.reinit (dof_handler.n_dofs());
+  }
+
+
+
+  // In the following function, the matrix and right hand side are
+  // assembled. As stated in the documentation of the main class above, it
+  // does not do this itself, but rather delegates to the function following
+  // next, utilizing the WorkStream concept discussed in @ref threads .
+  //
+  // If you have looked through the @ref threads module, you will have
+  // seen that assembling in parallel does not take an incredible
+  // amount of extra code as long as you diligently describe what the
+  // scratch and copy data objects are, and if you define suitable
+  // functios for the local assembly and the copy operation from local
+  // contributions to global objects. This done, the following will do
+  // all the heavy lifting to get these operations done on multiple
+  // threads on as many cores as you have in your system:
+  template <int dim>
+  void AdvectionProblem<dim>::assemble_system ()
+  {
+    WorkStream::run(dof_handler.begin_active(),
+                    dof_handler.end(),
+                    *this,
+                    &AdvectionProblem::local_assemble_system,
+                    &AdvectionProblem::copy_local_to_global,
+                    AssemblyScratchData(fe),
+                    AssemblyCopyData());
+
+
+    // After the matrix has been assembled in parallel, we still have to
+    // eliminate hanging node constraints. This is something that can't be
+    // done on each of the threads separately, so we have to do it now.
+    // Note also, that unlike in previous examples, there are no boundary
+    // conditions to be applied to the system of equations. This, of course,
+    // is due to the fact that we have included them into the weak formulation
+    // of the problem.
+    hanging_node_constraints.condense (system_matrix);
+    hanging_node_constraints.condense (system_rhs);
+  }
+
+
+
+  // As already mentioned above, we need to have scratch objects for
+  // the parallel computation of local contributions. These objects
+  // contain FEValues and FEFaceValues objects, and so we will need to
+  // have constructors and copy constructors that allow us to create
+  // them. In initializing them, note first that we use bilinear
+  // elements, soGauss formulae with two points in each space
+  // direction are sufficient.  For the cell terms we need the values
+  // and gradients of the shape functions, the quadrature points in
+  // order to determine the source density and the advection field at
+  // a given point, and the weights of the quadrature points times the
+  // determinant of the Jacobian at these points. In contrast, for the
+  // boundary integrals, we don't need the gradients, but rather the
+  // normal vectors to the cells. This determines which update flags
+  // we will have to pass to the constructors of the members of the
+  // class:
+  template <int dim>
+  AdvectionProblem<dim>::AssemblyScratchData::
+  AssemblyScratchData (const FiniteElement<dim> &fe)
+    :
+    fe_values (fe,
+               QGauss<dim>(2),
+               update_values   | update_gradients |
+               update_quadrature_points | update_JxW_values),
+    fe_face_values (fe,
+                    QGauss<dim-1>(2),
+                    update_values     | update_quadrature_points   |
+                    update_JxW_values | update_normal_vectors)
+  {}
+
+
+
+  template <int dim>
+  AdvectionProblem<dim>::AssemblyScratchData::
+  AssemblyScratchData (const AssemblyScratchData &scratch_data)
+    :
+    fe_values (scratch_data.fe_values.get_fe(),
+               scratch_data.fe_values.get_quadrature(),
+               update_values   | update_gradients |
+               update_quadrature_points | update_JxW_values),
+    fe_face_values (scratch_data.fe_face_values.get_fe(),
+                    scratch_data.fe_face_values.get_quadrature(),
+                    update_values     | update_quadrature_points   |
+                    update_JxW_values | update_normal_vectors)
+  {}
+
+
+
+
+  // Now, this is the function that does the actual work. It is not very
+  // different from the <code>assemble_system</code> functions of previous
+  // example programs, so we will again only comment on the differences. The
+  // mathematical stuff follows closely what we have said in the introduction.
+  //
+  // There are a number of points worth mentioning here, though. The
+  // first one is that we have moved the FEValues and FEFaceValues
+  // objects into the ScratchData object. We have done so because the
+  // alternative would have been to simply create one every time we
+  // get into this function -- i.e., on every cell. It now turns out
+  // that the FEValues classes were written with the explicit goal of
+  // moving everything that remains the same from cell to cell into
+  // the construction of the object, and only do as little work as
+  // possible in FEValues::reinit() whenever we move to a new
+  // cell. What this means is that it would be very expensive to
+  // create a new object of this kind in this function as we would
+  // have to do it for every cell -- exactly the thing we wanted to
+  // avoid with the FEValues class. Instead, what we do is create it
+  // only once (or a small number of times) in the scratch objects and
+  // then re-use it as often as we can.
+  //
+  // This begs the question of whether there are other objects we
+  // create in this function whose creation is expensive compared to
+  // its use. Indeed, at the top of the function, we declare all sorts
+  // of objects. The <code>AdvectionField</code>,
+  // <code>RightHandSide</code> and <code>BoundaryValues</code> do not
+  // cost much to create, so there is no harm here. However,
+  // allocating memory in creating the <code>rhs_values</code> and
+  // similar variables below typically costs a significant amount of
+  // time, compared to just accessing the (temporary) values we store
+  // in them. Consequently, these would be candidates for moving into
+  // the <code>AssemblyScratchData</code> class. We will leave this as
+  // an exercise.
+  template <int dim>
+  void
+  AdvectionProblem<dim>::
+  local_assemble_system (const typename DoFHandler<dim>::active_cell_iterator &cell,
+                         AssemblyScratchData                                  &scratch_data,
+                         AssemblyCopyData                                     &copy_data)
+  {
+    // First of all, we will need some objects that describe boundary values,
+    // right hand side function and the advection field. As we will only
+    // perform actions on these objects that do not change them, we declare
+    // them as constant, which can enable the compiler in some cases to
+    // perform additional optimizations.
+    const AdvectionField<dim> advection_field;
+    const RightHandSide<dim>  right_hand_side;
+    const BoundaryValues<dim> boundary_values;
+
+    // Then we define some abbreviations to avoid unnecessarily long lines:
+    const unsigned int dofs_per_cell   = fe.dofs_per_cell;
+    const unsigned int n_q_points      = scratch_data.fe_values.get_quadrature().size();
+    const unsigned int n_face_q_points = scratch_data.fe_face_values.get_quadrature().size();
+
+    // We declare cell matrix and cell right hand side...
+    copy_data.cell_matrix.reinit (dofs_per_cell, dofs_per_cell);
+    copy_data.cell_rhs.reinit (dofs_per_cell);
+
+    // ... an array to hold the global indices of the degrees of freedom of
+    // the cell on which we are presently working...
+    copy_data.local_dof_indices.resize(dofs_per_cell);
+
+    // ... and array in which the values of right hand side, advection
+    // direction, and boundary values will be stored, for cell and face
+    // integrals respectively:
+    std::vector<double>         rhs_values (n_q_points);
+    std::vector<Tensor<1,dim> > advection_directions (n_q_points);
+    std::vector<double>         face_boundary_values (n_face_q_points);
+    std::vector<Tensor<1,dim> > face_advection_directions (n_face_q_points);
+
+
+    // ... then initialize the <code>FEValues</code> object...
+    scratch_data.fe_values.reinit (cell);
+
+    // ... obtain the values of right hand side and advection directions
+    // at the quadrature points...
+    advection_field.value_list (scratch_data.fe_values.get_quadrature_points(),
+                                advection_directions);
+    right_hand_side.value_list (scratch_data.fe_values.get_quadrature_points(),
+                                rhs_values);
+
+    // ... set the value of the streamline diffusion parameter as
+    // described in the introduction...
+    const double delta = 0.1 * cell->diameter ();
+
+    // ... and assemble the local contributions to the system matrix and
+    // right hand side as also discussed above:
+    for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        {
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            copy_data.cell_matrix(i,j) += ((advection_directions[q_point] *
+                                            scratch_data.fe_values.shape_grad(j,q_point)   *
+                                            (scratch_data.fe_values.shape_value(i,q_point) +
+                                             delta *
+                                             (advection_directions[q_point] *
+                                              scratch_data.fe_values.shape_grad(i,q_point)))) *
+                                           scratch_data.fe_values.JxW(q_point));
+
+          copy_data.cell_rhs(i) += ((scratch_data.fe_values.shape_value(i,q_point) +
+                                     delta *
+                                     (advection_directions[q_point] *
+                                      scratch_data.fe_values.shape_grad(i,q_point))        ) *
+                                    rhs_values[q_point] *
+                                    scratch_data.fe_values.JxW (q_point));
+        }
+
+    // Besides the cell terms which we have built up now, the bilinear
+    // form of the present problem also contains terms on the boundary of
+    // the domain. Therefore, we have to check whether any of the faces of
+    // this cell are on the boundary of the domain, and if so assemble the
+    // contributions of this face as well. Of course, the bilinear form
+    // only contains contributions from the <code>inflow</code> part of
+    // the boundary, but to find out whether a certain part of a face of
+    // the present cell is part of the inflow boundary, we have to have
+    // information on the exact location of the quadrature points and on
+    // the direction of flow at this point; we obtain this information
+    // using the FEFaceValues object and only decide within the main loop
+    // whether a quadrature point is on the inflow boundary.
+    for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+      if (cell->face(face)->at_boundary())
+        {
+          // Ok, this face of the present cell is on the boundary of the
+          // domain. Just as for the usual FEValues object which we have
+          // used in previous examples and also above, we have to
+          // reinitialize the FEFaceValues object for the present face:
+          scratch_data.fe_face_values.reinit (cell, face);
+
+          // For the quadrature points at hand, we ask for the values of
+          // the inflow function and for the direction of flow:
+          boundary_values.value_list (scratch_data.fe_face_values.get_quadrature_points(),
+                                      face_boundary_values);
+          advection_field.value_list (scratch_data.fe_face_values.get_quadrature_points(),
+                                      face_advection_directions);
+
+          // Now loop over all quadrature points and see whether it is on
+          // the inflow or outflow part of the boundary. This is
+          // determined by a test whether the advection direction points
+          // inwards or outwards of the domain (note that the normal
+          // vector points outwards of the cell, and since the cell is at
+          // the boundary, the normal vector points outward of the domain,
+          // so if the advection direction points into the domain, its
+          // scalar product with the normal vector must be negative):
+          for (unsigned int q_point=0; q_point<n_face_q_points; ++q_point)
+            if (scratch_data.fe_face_values.normal_vector(q_point) *
+                face_advection_directions[q_point]
+                < 0)
+              // If the is part of the inflow boundary, then compute the
+              // contributions of this face to the global matrix and right
+              // hand side, using the values obtained from the
+              // FEFaceValues object and the formulae discussed in the
+              // introduction:
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                {
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    copy_data.cell_matrix(i,j) -= (face_advection_directions[q_point] *
+                                                   scratch_data.fe_face_values.normal_vector(q_point) *
+                                                   scratch_data.fe_face_values.shape_value(i,q_point) *
+                                                   scratch_data.fe_face_values.shape_value(j,q_point) *
+                                                   scratch_data.fe_face_values.JxW(q_point));
+
+                  copy_data.cell_rhs(i) -= (face_advection_directions[q_point] *
+                                            scratch_data.fe_face_values.normal_vector(q_point) *
+                                            face_boundary_values[q_point]         *
+                                            scratch_data.fe_face_values.shape_value(i,q_point) *
+                                            scratch_data.fe_face_values.JxW(q_point));
+                }
+        }
+
+
+    // Now go on by transferring the local contributions to the system of
+    // equations into the global objects. The first step was to obtain the
+    // global indices of the degrees of freedom on this cell.
+    cell->get_dof_indices (copy_data.local_dof_indices);
+  }
+
+
+
+  // The second function we needed to write was the one that copies
+  // the local contributions the previous function has computed and
+  // put into the copy data object, into the global matrix and right
+  // hand side vector objects. This is essentially what we always had
+  // as the last block of code when assembling something on every
+  // cell. The following should therefore be pretty obvious:
+  template <int dim>
+  void
+  AdvectionProblem<dim>::copy_local_to_global (const AssemblyCopyData &copy_data)
+  {
+    for (unsigned int i=0; i<copy_data.local_dof_indices.size(); ++i)
+      {
+        for (unsigned int j=0; j<copy_data.local_dof_indices.size(); ++j)
+          system_matrix.add (copy_data.local_dof_indices[i],
+                             copy_data.local_dof_indices[j],
+                             copy_data.cell_matrix(i,j));
+
+        system_rhs(copy_data.local_dof_indices[i]) += copy_data.cell_rhs(i);
+      }
+  }
+
+
+
+
+  // Following is the function that solves the linear system of equations. As
+  // the system is no more symmetric positive definite as in all the previous
+  // examples, we can't use the Conjugate Gradients method anymore. Rather, we
+  // use a solver that is tailored to nonsymmetric systems like the one at
+  // hand, the BiCGStab method. As preconditioner, we use the Jacobi method.
+  template <int dim>
+  void AdvectionProblem<dim>::solve ()
+  {
+    SolverControl           solver_control (1000, 1e-12);
+    SolverBicgstab<>        bicgstab (solver_control);
+
+    PreconditionJacobi<> preconditioner;
+    preconditioner.initialize(system_matrix, 1.0);
+
+    bicgstab.solve (system_matrix, solution, system_rhs,
+                    preconditioner);
+
+    hanging_node_constraints.distribute (solution);
+  }
+
+
+  // The following function refines the grid according to the quantity
+  // described in the introduction. The respective computations are made in
+  // the class <code>GradientEstimation</code>. The only difference to
+  // previous examples is that we refine a little more aggressively (0.5
+  // instead of 0.3 of the number of cells).
+  template <int dim>
+  void AdvectionProblem<dim>::refine_grid ()
+  {
+    Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+    GradientEstimation::estimate (dof_handler,
+                                  solution,
+                                  estimated_error_per_cell);
+
+    GridRefinement::refine_and_coarsen_fixed_number (triangulation,
+                                                     estimated_error_per_cell,
+                                                     0.5, 0.03);
+
+    triangulation.execute_coarsening_and_refinement ();
+  }
+
+
+
+  // Writing output to disk is done in the same way as in the previous
+  // examples...
+  template <int dim>
+  void AdvectionProblem<dim>::output_results (const unsigned int cycle) const
+  {
+    std::string filename = "grid-";
+    filename += ('0' + cycle);
+    Assert (cycle < 10, ExcInternalError());
+
+    filename += ".eps";
+    std::ofstream output (filename.c_str());
+
+    GridOut grid_out;
+    grid_out.write_eps (triangulation, output);
+  }
+
+
+  // ... as is the main loop (setup -- solve -- refine)
+  template <int dim>
+  void AdvectionProblem<dim>::run ()
+  {
+    for (unsigned int cycle=0; cycle<6; ++cycle)
+      {
+        std::cout << "Cycle " << cycle << ':' << std::endl;
+
+        if (cycle == 0)
+          {
+            GridGenerator::hyper_cube (triangulation, -1, 1);
+            triangulation.refine_global (4);
+          }
+        else
+          {
+            refine_grid ();
+          }
+
+
+        std::cout << "   Number of active cells:       "
+                  << triangulation.n_active_cells()
+                  << std::endl;
+
+        setup_system ();
+
+        std::cout << "   Number of degrees of freedom: "
+                  << dof_handler.n_dofs()
+                  << std::endl;
+
+        assemble_system ();
+        solve ();
+        output_results (cycle);
+      }
+
+    DataOut<dim> data_out;
+    data_out.attach_dof_handler (dof_handler);
+    data_out.add_data_vector (solution, "solution");
+    data_out.build_patches ();
+
+    std::ofstream output ("final-solution.vtk");
+    data_out.write_vtk (output);
+  }
+
+
+
+  // @sect3{GradientEstimation class implementation}
+
+  // Now for the implementation of the <code>GradientEstimation</code> class.
+  // Let us start by defining constructors for the
+  // <code>EstimateScratchData</code> class used by the
+  // <code>estimate_cell()</code> function:
+  template <int dim>
+  GradientEstimation::EstimateScratchData<dim>::
+  EstimateScratchData (const FiniteElement<dim> &fe,
+                       const Vector<double>     &solution)
+    :
+    fe_midpoint_value(fe,
+                      QMidpoint<dim> (),
+                      update_values | update_quadrature_points),
+    solution(solution)
+  {}
+
+
+  template <int dim>
+  GradientEstimation::EstimateScratchData<dim>::
+  EstimateScratchData(const EstimateScratchData &scratch_data)
+    :
+    fe_midpoint_value(scratch_data.fe_midpoint_value.get_fe(),
+                      scratch_data.fe_midpoint_value.get_quadrature(),
+                      update_values | update_quadrature_points),
+    solution(scratch_data.solution)
+  {}
+
+
+  // Next for the implementation of the <code>GradientEstimation</code>
+  // class. The first function does not much except for delegating work to the
+  // other function, but there is a bit of setup at the top.
+  //
+  // Before starting with the work, we check that the vector into which the
+  // results are written has the right size. It is a common error that such
+  // parameters have the wrong size, but the resulting damage by not
+  // catching these errors are very subtle as they are usually corruption of
+  // data somewhere in memory. Often, the problems emerging from this are
+  // not reproducible, and it is well worth the effort to
+  // check for such things.
+  //
+  // The second piece is to set up the iterator that goes in lockstep over the
+  // cells of the domain and the corresponding elements of the output vector
+  // (see above where we introduced the <code>SynchronousIterators</code>
+  // class). We can abbreviate the process slightly by introducing a
+  // <code>typedef</code> that denotes a pair of iterators. This being set up,
+  // we can hand the whole thing off to WorkStream::run, keeping in mind that
+  // we do not need a copy-local-to-global function here but can get away by
+  // simply using a default-constructed function object (the equivalent to a
+  // NULL function pointer).
+  template <int dim>
+  void
+  GradientEstimation::estimate (const DoFHandler<dim> &dof_handler,
+                                const Vector<double>  &solution,
+                                Vector<float>         &error_per_cell)
+  {
+    Assert (error_per_cell.size() == dof_handler.get_triangulation().n_active_cells(),
+            ExcInvalidVectorLength (error_per_cell.size(),
+                                    dof_handler.get_triangulation().n_active_cells()));
+
+    typedef std_cxx11::tuple<typename DoFHandler<dim>::active_cell_iterator,Vector<float>::iterator>
+    IteratorTuple;
+
+    SynchronousIterators<IteratorTuple>
+    begin_sync_it (IteratorTuple (dof_handler.begin_active(),
+                                  error_per_cell.begin())),
+                                                       end_sync_it (IteratorTuple (dof_handler.end(),
+                                                           error_per_cell.end()));
+
+    WorkStream::run (begin_sync_it,
+                     end_sync_it,
+                     &GradientEstimation::template estimate_cell<dim>,
+                     std_cxx11::function<void (const EstimateCopyData &)> (),
+                     EstimateScratchData<dim> (dof_handler.get_fe(),
+                                               solution),
+                     EstimateCopyData ());
+  }
+
+
+  // Following now the function that actually computes the finite difference
+  // approximation to the gradient. The general outline of the function is to
+  // first compute the list of active neighbors of the present cell and then
+  // compute the quantities described in the introduction for each of the
+  // neighbors. The reason for this order is that it is not a one-liner to
+  // find a given neighbor with locally refined meshes. In principle, an
+  // optimized implementation would find neighbors and the quantities
+  // depending on them in one step, rather than first building a list of
+  // neighbors and in a second step their contributions but we will gladly
+  // leave this as an exercise. As discussed before, the worker function
+  // passed to WorkStream::run works on "scratch" objects that keep all
+  // temporary objects. This way, we do not need to create and initialize
+  // objects that are expensive to initialize within the function that does
+  // the work, every time it is called for a given cell. Such an argument is
+  // passed as the second argument. The third argument would be a "copy-data"
+  // object (see @ref threads for more information) but we do not actually use
+  // any of these here. Because WorkStream::run insists on passing three
+  // arguments, we declare this function with three arguments, but simply
+  // ignore the last one.
+  //
+  // (This is unsatisfactory from an esthetic perspective. It can be avoided,
+  // at the cost of some other trickery. If you allow, let us here show
+  // how. First, assume that we had declared this function to only take two
+  // arguments by omitting the unused last one. Now, WorkStream::run still
+  // wants to call this function with three arguments, so we need to find a
+  // way to "forget" the third argument in the call. Simply passing
+  // WorkStream::run the pointer to the function as we do above will not do
+  // this -- the compiler will complain that a function declared to have two
+  // arguments is called with three arguments.  However, we can do this by
+  // passing the following as the third argument when calling WorkStream::run
+  // above:
+  // @code
+  //    std_cxx11::function<void (const SynchronousIterators<IteratorTuple> &,
+  //                              EstimateScratchData<dim>                  &,
+  //                              EstimateCopyData                          &)>
+  //      (std_cxx11::bind (&GradientEstimation::template estimate_cell<dim>,
+  //                        std_cxx11::_1,
+  //                        std_cxx11::_2))
+  // @endcode
+  // This creates a function object taking three arguments, but when it calls
+  // the underlying function object, it simply only uses the first and second
+  // argument -- we simply "forget" to use the third argument :-)
+  // In the end, this isn't completely obvious either, and so we didn't implement
+  // it, but hey -- it can be done!)
+  //
+  // Now for the details:
+  template <int dim>
+  void
+  GradientEstimation::estimate_cell (const SynchronousIterators<std_cxx11::tuple<typename DoFHandler<dim>::active_cell_iterator,
+                                     Vector<float>::iterator> > &cell,
+                                     EstimateScratchData<dim>                                               &scratch_data,
+                                     const EstimateCopyData &)
+  {
+    // We need space for the tensor <code>Y</code>, which is the sum of
+    // outer products of the y-vectors.
+    Tensor<2,dim> Y;
+
+
+    // Then we allocate a vector to hold iterators to all active neighbors of
+    // a cell. We reserve the maximal number of active neighbors in order to
+    // avoid later reallocations. Note how this maximal number of active
+    // neighbors is computed here.
+    std::vector<typename DoFHandler<dim>::active_cell_iterator> active_neighbors;
+    active_neighbors.reserve (GeometryInfo<dim>::faces_per_cell *
+                              GeometryInfo<dim>::max_children_per_face);
+
+    typename DoFHandler<dim>::active_cell_iterator cell_it(std_cxx11::get<0>(cell.iterators));
+
+    // First initialize the <code>FEValues</code> object, as well as the
+    // <code>Y</code> tensor:
+    scratch_data.fe_midpoint_value.reinit (cell_it);
+
+    // Then allocate the vector that will be the sum over the y-vectors
+    // times the approximate directional derivative:
+    Tensor<1,dim> projected_gradient;
+
+
+    // Now before going on first compute a list of all active neighbors of
+    // the present cell. We do so by first looping over all faces and see
+    // whether the neighbor there is active, which would be the case if it
+    // is on the same level as the present cell or one level coarser (note
+    // that a neighbor can only be once coarser than the present cell, as
+    // we only allow a maximal difference of one refinement over a face in
+    // deal.II). Alternatively, the neighbor could be on the same level
+    // and be further refined; then we have to find which of its children
+    // are next to the present cell and select these (note that if a child
+    // of of neighbor of an active cell that is next to this active cell,
+    // needs necessarily be active itself, due to the one-refinement rule
+    // cited above).
+    //
+    // Things are slightly different in one space dimension, as there the
+    // one-refinement rule does not exist: neighboring active cells may
+    // differ in as many refinement levels as they like. In this case, the
+    // computation becomes a little more difficult, but we will explain
+    // this below.
+    //
+    // Before starting the loop over all neighbors of the present cell, we
+    // have to clear the array storing the iterators to the active
+    // neighbors, of course.
+    active_neighbors.clear ();
+    for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+      if (! std_cxx11::get<0>(cell.iterators)->at_boundary(face_no))
+        {
+          // First define an abbreviation for the iterator to the face and
+          // the neighbor
+          const typename DoFHandler<dim>::face_iterator
+          face = std_cxx11::get<0>(cell.iterators)->face(face_no);
+          const typename DoFHandler<dim>::cell_iterator
+          neighbor = std_cxx11::get<0>(cell.iterators)->neighbor(face_no);
+
+          // Then check whether the neighbor is active. If it is, then it
+          // is on the same level or one level coarser (if we are not in
+          // 1D), and we are interested in it in any case.
+          if (neighbor->active())
+            active_neighbors.push_back (neighbor);
+          else
+            {
+              // If the neighbor is not active, then check its children.
+              if (dim == 1)
+                {
+                  // To find the child of the neighbor which bounds to the
+                  // present cell, successively go to its right child if
+                  // we are left of the present cell (n==0), or go to the
+                  // left child if we are on the right (n==1), until we
+                  // find an active cell.
+                  typename DoFHandler<dim>::cell_iterator
+                  neighbor_child = neighbor;
+                  while (neighbor_child->has_children())
+                    neighbor_child = neighbor_child->child (face_no==0 ? 1 : 0);
+
+                  // As this used some non-trivial geometrical intuition,
+                  // we might want to check whether we did it right,
+                  // i.e. check whether the neighbor of the cell we found
+                  // is indeed the cell we are presently working
+                  // on. Checks like this are often useful and have
+                  // frequently uncovered errors both in algorithms like
+                  // the line above (where it is simple to involuntarily
+                  // exchange <code>n==1</code> for <code>n==0</code> or
+                  // the like) and in the library (the assumptions
+                  // underlying the algorithm above could either be wrong,
+                  // wrongly documented, or are violated due to an error
+                  // in the library). One could in principle remove such
+                  // checks after the program works for some time, but it
+                  // might be a good things to leave it in anyway to check
+                  // for changes in the library or in the algorithm above.
+                  //
+                  // Note that if this check fails, then this is certainly
+                  // an error that is irrecoverable and probably qualifies
+                  // as an internal error. We therefore use a predefined
+                  // exception class to throw here.
+                  Assert (neighbor_child->neighbor(face_no==0 ? 1 : 0)
+                          ==std_cxx11::get<0>(cell.iterators),ExcInternalError());
+
+                  // If the check succeeded, we push the active neighbor
+                  // we just found to the stack we keep:
+                  active_neighbors.push_back (neighbor_child);
+                }
+              else
+                // If we are not in 1d, we collect all neighbor children
+                // `behind' the subfaces of the current face
+                for (unsigned int subface_no=0; subface_no<face->n_children(); ++subface_no)
+                  active_neighbors.push_back (
+                    std_cxx11::get<0>(cell.iterators)->neighbor_child_on_subface(face_no,subface_no));
+            }
+        }
+
+    // OK, now that we have all the neighbors, lets start the computation
+    // on each of them. First we do some preliminaries: find out about the
+    // center of the present cell and the solution at this point. The
+    // latter is obtained as a vector of function values at the quadrature
+    // points, of which there are only one, of course. Likewise, the
+    // position of the center is the position of the first (and only)
+    // quadrature point in real space.
+    const Point<dim> this_center = scratch_data.fe_midpoint_value.quadrature_point(0);
+
+    std::vector<double> this_midpoint_value(1);
+    scratch_data.fe_midpoint_value.get_function_values (scratch_data.solution, this_midpoint_value);
+
+
+    // Now loop over all active neighbors and collect the data we
+    // need. Allocate a vector just like <code>this_midpoint_value</code>
+    // which we will use to store the value of the solution in the
+    // midpoint of the neighbor cell. We allocate it here already, since
+    // that way we don't have to allocate memory repeatedly in each
+    // iteration of this inner loop (memory allocation is a rather
+    // expensive operation):
+    std::vector<double> neighbor_midpoint_value(1);
+    typename std::vector<typename DoFHandler<dim>::active_cell_iterator>::const_iterator
+    neighbor_ptr = active_neighbors.begin();
+    for (; neighbor_ptr!=active_neighbors.end(); ++neighbor_ptr)
+      {
+        // First define an abbreviation for the iterator to the active
+        // neighbor cell:
+        const typename DoFHandler<dim>::active_cell_iterator
+        neighbor = *neighbor_ptr;
+
+        // Then get the center of the neighbor cell and the value of the
+        // finite element function thereon. Note that for this information
+        // we have to reinitialize the <code>FEValues</code> object for
+        // the neighbor cell.
+        scratch_data.fe_midpoint_value.reinit (neighbor);
+        const Point<dim> neighbor_center = scratch_data.fe_midpoint_value.quadrature_point(0);
+
+        scratch_data.fe_midpoint_value.get_function_values (scratch_data.solution,
+                                                            neighbor_midpoint_value);
+
+        // Compute the vector <code>y</code> connecting the centers of the
+        // two cells. Note that as opposed to the introduction, we denote
+        // by <code>y</code> the normalized difference vector, as this is
+        // the quantity used everywhere in the computations.
+        Tensor<1,dim> y        = neighbor_center - this_center;
+        const double  distance = y.norm();
+        y /= distance;
+
+        // Then add up the contribution of this cell to the Y matrix...
+        for (unsigned int i=0; i<dim; ++i)
+          for (unsigned int j=0; j<dim; ++j)
+            Y[i][j] += y[i] * y[j];
+
+        // ... and update the sum of difference quotients:
+        projected_gradient += (neighbor_midpoint_value[0] -
+                               this_midpoint_value[0]) /
+                              distance *
+                              y;
+      }
+
+    // If now, after collecting all the information from the neighbors, we
+    // can determine an approximation of the gradient for the present
+    // cell, then we need to have passed over vectors <code>y</code> which
+    // span the whole space, otherwise we would not have all components of
+    // the gradient. This is indicated by the invertibility of the matrix.
+    //
+    // If the matrix should not be invertible, this means that the present
+    // cell had an insufficient number of active neighbors. In contrast to
+    // all previous cases, where we raised exceptions, this is, however,
+    // not a programming error: it is a runtime error that can happen in
+    // optimized mode even if it ran well in debug mode, so it is
+    // reasonable to try to catch this error also in optimized mode. For
+    // this case, there is the <code>AssertThrow</code> macro: it checks
+    // the condition like the <code>Assert</code> macro, but not only in
+    // debug mode; it then outputs an error message, but instead of
+    // terminating the program as in the case of the <code>Assert</code>
+    // macro, the exception is thrown using the <code>throw</code> command
+    // of C++. This way, one has the possibility to catch this error and
+    // take reasonable counter actions. One such measure would be to
+    // refine the grid globally, as the case of insufficient directions
+    // can not occur if every cell of the initial grid has been refined at
+    // least once.
+    AssertThrow (determinant(Y) != 0,
+                 ExcInsufficientDirections());
+
+    // If, on the other hand the matrix is invertible, then invert it,
+    // multiply the other quantity with it and compute the estimated error
+    // using this quantity and the right powers of the mesh width:
+    const Tensor<2,dim> Y_inverse = invert(Y);
+
+    Tensor<1,dim> gradient = Y_inverse * projected_gradient;
+
+    // The last part of this function is the one where we
+    // write into the element of the output vector what
+    // we have just computed. As above, we need to get
+    // at the second element of the pair of iterators, which requires
+    // slightly awkward syntax but is not otherwise particularly
+    // difficult:
+    *(std_cxx11::get<1>(cell.iterators)) = (std::pow(std_cxx11::get<0>(cell.iterators)->diameter(),
+                                                     1+1.0*dim/2) *
+                                            std::sqrt(gradient.norm_square()));
+
+  }
+}
+
+
+// @sect3{Main function}
+
+// The <code>main</code> function is similar to the previous examples. The main
+// difference is that we use MultithreadInfo to set the maximum
+// number of threads (see @ref threads "Parallel computing with multiple
+// processors accessing shared memory" documentation module for more
+// explanation). The number of threads used is the minimum of the environment
+// variable DEAL_II_NUM_THREADS and the parameter of
+// <code>set_thread_limit</code>. If no value is given to
+// <code>set_thread_limit</code>, the default value from the Intel Threading
+// Building Blocks (TBB) library is used. If the call to
+// <code>set_thread_limit</code> is omitted, the number of threads will be
+// chosen by TBB indepently of DEAL_II_NUM_THREADS.
+int main ()
+{
+  try
+    {
+      dealii::MultithreadInfo::set_thread_limit();
+
+      Step9::AdvectionProblem<2> advection_problem_2d;
+      advection_problem_2d.run ();
+    }
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
new file mode 100644
index 0000000..436e048
--- /dev/null
+++ b/include/CMakeLists.txt
@@ -0,0 +1,45 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Configure config.h and revision.h
+#
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/deal.II/base/config.h.in
+  ${CMAKE_CURRENT_BINARY_DIR}/deal.II/base/config.h
+  )
+CONFIGURE_FILE(
+  ${CMAKE_CURRENT_SOURCE_DIR}/deal.II/base/revision.h.in
+  ${CMAKE_CURRENT_BINARY_DIR}/deal.II/base/revision.h
+  )
+
+#
+# Add a rule for how to install the header files:
+#
+INSTALL(DIRECTORY deal.II
+  DESTINATION ${DEAL_II_INCLUDE_RELDIR}
+  COMPONENT library
+  FILES_MATCHING PATTERN "*.h"
+  )
+
+#
+# and don't forget to install all generated header files, too:
+#
+INSTALL(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/deal.II
+  DESTINATION ${DEAL_II_INCLUDE_RELDIR}
+  COMPONENT library
+  FILES_MATCHING PATTERN "*.h"
+  )
+
diff --git a/include/deal.II/algorithms/any_data.h b/include/deal.II/algorithms/any_data.h
new file mode 100644
index 0000000..33bd563
--- /dev/null
+++ b/include/deal.II/algorithms/any_data.h
@@ -0,0 +1,480 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__any_data_h
+#define dealii__any_data_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+
+#include <boost/any.hpp>
+#include <vector>
+#include <algorithm>
+#include <typeinfo>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Store any amount of any type of data accessible by an identifier string.
+ *
+ * @todo GK: Deprecate access to AnyData by index and change to a map.
+ */
+class AnyData :
+  public Subscriptor
+{
+public:
+  /// Default constructor for empty object
+  AnyData();
+
+  /// Number of stored data objects.
+  unsigned int size() const;
+
+  /// Add a new data object
+  template <typename type>
+  void add(type entry, const std::string &name);
+
+  /**
+   * @brief Merge the data of another AnyData to the end of this object.
+   */
+  void merge(const AnyData &other);
+
+  /**
+   * @brief Access to stored data object by name.
+   *
+   * Find the object with given name, try to convert it to <tt>type</tt> and
+   * return it. This function throws an exception if either the name does not
+   * exist or if the conversion fails. If such an exception is not desired,
+   * use try_read() instead.
+   */
+  template <typename type>
+  type entry (const std::string &name);
+
+  /**
+   * @brief Read-only access to stored data object by name.
+   *
+   * Find the object with given name, try to convert it to <tt>type</tt> and
+   * return it. This function throws an exception if either the name does not
+   * exist or if the conversion fails. If such an exception is not desired,
+   * use try_read() instead.
+   */
+  template <typename type>
+  const type entry (const std::string &name) const;
+
+  /**
+   * @brief Dedicated read only access by name.
+   *
+   * For a constant object, this function equals entry(). For a non-const
+   * object, it forces read only access to the data. In particular, it throws
+   * an exception if the object is not found or cannot be converted to type.
+   * If such an exception is not desired, use try_read() instead.
+   *
+   * @warning Do not use this function for stored objects which are pointers.
+   * Use read_ptr() instead!
+   */
+  template <typename type>
+  const type read (const std::string &name) const;
+
+  /**
+   * @brief Dedicated read only access by name for pointer data.
+   *
+   * If the stored data object is a pointer to a constant object, the logic of
+   * access becomes fairly complicated. Namely, the standard read function may
+   * fail, depending on whether it was a const pointer or a regular pointer.
+   * This function fixes the logic and ascertains that the object does not
+   * become mutable by accident.
+   */
+  template <typename type>
+  const type *read_ptr (const std::string &name) const;
+
+  /**
+   * Perform the same action as read_ptr(), but do not throw an exception if
+   * the pointer does not exist. Return a null pointer instead.
+   */
+  template <typename type>
+  const type *try_read_ptr (const std::string &name) const;
+
+  /**
+   * @brief Dedicated read only access by name without exceptions.
+   *
+   * This function tries to find the name in the list and return a pointer to
+   * the associated object. If either the name is not found or the object
+   * cannot be converted to the return type, a null pointer is returned.
+   */
+  template <typename type>
+  const type *try_read (const std::string &name) const;
+
+  /**
+   * Access to stored data object by index.
+   */
+  template <typename type>
+  type entry (const unsigned int i);
+
+  /// Read-only access to stored data object by index.
+  template <typename type>
+  const type entry (const unsigned int i) const;
+
+  /// Dedicated read only access.
+  template <typename type>
+  const type read (const unsigned int i) const;
+
+  /// Dedicated read only access to pointer object.
+  template <typename type>
+  const type *read_ptr (const unsigned int i) const;
+
+  /// Dedicated read only access to pointer object without exception.
+  template <typename type>
+  const type *try_read_ptr (const unsigned int i) const;
+
+  /// Dedicated read only access without exception.
+  template <typename type>
+  const type *try_read (const unsigned int i) const;
+
+  /// Name of object at index.
+  const std::string &name(const unsigned int i) const;
+
+  /**
+   * @brief Find index of a named object
+   *
+   * Try to find the object and return its index in the list. Throw an
+   * exception if the object has not been found.
+   */
+  unsigned int find(const std::string &name) const;
+
+  /**
+   * @brief Try to find index of a named object
+   *
+   * Try to find the object and return its index in the list. returns
+   * numbers::invalid_unsigned_int if the name was not found.
+   */
+  unsigned int try_find(const std::string &name) const;
+
+  /// Find out if object is of a certain type
+  template <typename type>
+  bool is_type(const unsigned int i) const;
+
+  /// List the contents to a stream
+  template <class StreamType>
+  void list (StreamType &os) const;
+
+  /// An entry with this name does not exist in the AnyData object.
+  DeclException1(ExcNameNotFound, std::string,
+                 << "No entry with the name " << arg1 << " exists.");
+
+  /// The requested type and the stored type are different
+  DeclException2(ExcTypeMismatch,
+                 char *, char *,
+                 << "The requested type " << arg1
+                 << " and the stored type " << arg2
+                 << " must coincide");
+
+  /**
+   * Exception indicating that a function expected a vector to have a certain
+   * name, but we store a different name in that position.
+   */
+  DeclException2(ExcNameMismatch, int, std::string,
+                 << "Name at position " << arg1 << " is not equal to " << arg2);
+private:
+  /// The stored data
+  std::vector<boost::any> data;
+  /// The names of the stored data
+  std::vector<std::string> names;
+};
+
+
+inline
+AnyData::AnyData()
+{}
+
+
+unsigned int
+inline
+AnyData::size () const
+{
+  AssertDimension(data.size(), names.size());
+  return data.size();
+}
+
+
+template <typename type>
+inline
+type
+AnyData::entry (const unsigned int i)
+{
+  AssertIndexRange(i, size());
+  type *p = boost::any_cast<type>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type
+AnyData::entry (const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  const type *p = boost::any_cast<type>(&data[i]);
+  if (p==0 )
+    p = boost::any_cast<const type>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type
+AnyData::read(const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  const type *p = boost::any_cast<type>(&data[i]);
+  if (p==0)
+    p = boost::any_cast<const type>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type *
+AnyData::read_ptr(const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  const type *const *p = boost::any_cast<type *>(&data[i]);
+  if (p==0)
+    p = boost::any_cast<const type *>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type *).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type *
+AnyData::try_read_ptr(const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  const type *const *p = boost::any_cast<type *>(&data[i]);
+  if (p==0)
+    p = boost::any_cast<const type *>(&data[i]);
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type *
+AnyData::try_read(const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  const type *p = boost::any_cast<type>(&data[i]);
+  if (p==0)
+    p = boost::any_cast<const type>(&data[i]);
+  return p;
+}
+
+
+inline
+const std::string &
+AnyData::name(const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  return names[i];
+}
+
+
+inline
+unsigned int
+AnyData::try_find(const std::string &n) const
+{
+  std::vector<std::string>::const_iterator it =
+    std::find(names.begin(), names.end(), n);
+
+  if (it == names.end())
+    return numbers::invalid_unsigned_int;
+
+  return it - names.begin();
+}
+
+
+inline
+unsigned int
+AnyData::find(const std::string &n) const
+{
+  const unsigned int i = try_find(n);
+  Assert(i != numbers::invalid_unsigned_int, ExcNameNotFound(n));
+
+  return i;
+}
+
+
+template <typename type>
+inline
+bool
+AnyData::is_type(const unsigned int i) const
+{
+  return data[i].type() == typeid(type);
+}
+
+
+template <typename type>
+inline
+type
+AnyData::entry (const std::string &n)
+{
+  const unsigned int i = find(n);
+  type *p = boost::any_cast<type>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type
+AnyData::entry (const std::string &n) const
+{
+  const unsigned int i = find(n);
+  const type *p = boost::any_cast<type>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type
+AnyData::read(const std::string &n) const
+{
+  const unsigned int i = find(n);
+  const type *p = boost::any_cast<type>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type *
+AnyData::read_ptr(const std::string &n) const
+{
+  const unsigned int i = find(n);
+  const type *const *p = boost::any_cast<type *>(&data[i]);
+  if (p==0)
+    p = boost::any_cast<const type *>(&data[i]);
+  Assert(p != 0,
+         ExcTypeMismatch(typeid(type).name(),data[i].type().name()));
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type *
+AnyData::try_read_ptr(const std::string &n) const
+{
+  const unsigned int i = try_find(n);
+  if (i == numbers::invalid_unsigned_int)
+    return 0;
+
+  const type *const *p = boost::any_cast<type *>(&data[i]);
+  if (p==0)
+    p = boost::any_cast<const type *>(&data[i]);
+  return *p;
+}
+
+
+template <typename type>
+inline
+const type *
+AnyData::try_read(const std::string &n) const
+{
+  // Try to find name
+  std::vector<std::string>::const_iterator it =
+    std::find(names.begin(), names.end(), n);
+  // Return null pointer if not found
+  if (it == names.end())
+    return 0;
+
+  // Compute index and return casted pointer
+  unsigned int i=it-names.begin();
+  const type *p = boost::any_cast<type>(&data[i]);
+  return p;
+}
+
+
+template <typename type>
+inline
+void
+AnyData::add(type ent, const std::string &n)
+{
+  boost::any e = ent;
+  data.push_back(e);
+  names.push_back(n);
+}
+
+
+inline
+void
+AnyData::merge(const AnyData &other)
+{
+  for (unsigned int i=0; i<other.size(); ++i)
+    {
+      names.push_back(other.names[i]);
+      data.push_back(other.data[i]);
+    }
+}
+
+
+template <class StreamType>
+inline
+void AnyData::list(StreamType &os) const
+{
+  for (unsigned int i=0; i<names.size(); ++i)
+    {
+      os << i
+         << '\t' << names[i]
+         << '\t' << data[i].type().name()
+         << std::endl;
+    }
+}
+
+
+//----------------------------------------------------------------------//
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+
+
+
+
+
+
+
+
+
diff --git a/include/deal.II/algorithms/named_selection.h b/include/deal.II/algorithms/named_selection.h
new file mode 100644
index 0000000..bc8de04
--- /dev/null
+++ b/include/deal.II/algorithms/named_selection.h
@@ -0,0 +1,119 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__named_selection_h
+#define dealii__named_selection_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/algorithms/any_data.h>
+
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Select data from AnyData corresponding to the attached name.
+ *
+ * Given a list of names to search for (provided by add()), objects of this
+ * class provide an index list of the selected data.
+ *
+ * @author Guido Kanschat, 2009
+ */
+class NamedSelection
+{
+
+public:
+
+  /**
+   * Add a new name to be searched for in @p data supplied in initialize().
+   *
+   * @note Names will be added to the end of the current list.
+   */
+  void add (const std::string &name);
+
+
+  /**
+   * Create the index vector pointing into the AnyData object.
+   */
+  void initialize(const AnyData &data);
+
+
+  /**
+   * The number of names in this object. This function may be used whether
+   * initialize() was called before or not.
+   */
+  unsigned int size() const;
+
+
+  /**
+   * Return the corresponding index in the AnyData object supplied to the last
+   * initialize(). It is an error if initialize() has not been called before.
+   *
+   * Indices are in the same order as the calls to add().
+   */
+  unsigned int operator() (unsigned int i) const;
+
+
+private:
+
+  /**
+   * The selected names.
+   */
+  std::vector<std::string> names;
+
+  /**
+   * The index map generated by initialize() and accessed by operator().
+   */
+  std::vector<unsigned int> indices;
+
+};
+
+
+inline
+unsigned int
+NamedSelection::size() const
+{
+
+  return names.size();
+
+}
+
+
+inline
+void
+NamedSelection::add(const std::string &s)
+{
+
+  names.push_back(s);
+
+}
+
+
+inline
+unsigned int
+NamedSelection::operator() (unsigned int i) const
+{
+
+  Assert (indices.size() == names.size(), ExcNotInitialized());
+
+  AssertIndexRange(i, size());
+
+  return indices[i];
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/algorithms/newton.h b/include/deal.II/algorithms/newton.h
new file mode 100644
index 0000000..f29da51
--- /dev/null
+++ b/include/deal.II/algorithms/newton.h
@@ -0,0 +1,174 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__newton_h
+#define dealii__newton_h
+
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/algorithms/operator.h>
+#include <deal.II/algorithms/any_data.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+class ParameterHandler;
+
+namespace Algorithms
+{
+  /**
+   * Operator class performing Newton's iteration with standard step size
+   * control and adaptive matrix generation.
+   *
+   * This class performs a Newton iteration up to convergence determined by
+   * #control. If after an update the norm of the residual has become larger,
+   * then step size control is activated and the update is subsequently
+   * divided by two until the residual actually becomes smaller (or the
+   * minimal scaling factor determined by #n_stepsize_iterations is reached).
+   *
+   * Since assembling matrices, depending on the implementation, tends to be
+   * costly, this method applies an adaptive reassembling strategy. Only if
+   * the reduction factor for the residual is more than #threshold, the event
+   * Algorithms::bad_derivative is submitted to #inverse_derivative. It is up
+   * to this object to implement reassembling accordingly.
+   *
+   * <h3>Contents of the AnyData objects</h3>
+   *
+   * The only value used by the Newton method is the first vector in the
+   * parameter <tt>out</tt> of operator()(). It serves as the start vector of
+   * Newton's method and in the end contains the solution. All other vectors
+   * of <tt>out</tt> are ignored by Newton's method and its inner Operator
+   * objects. All vectors of <tt>in</tt> are forwarded to the inner Operator
+   * objects, with additional information added as follows.
+   *
+   * When calling (*#residual)(), the AnyData <tt>in</tt> given to the Newton
+   * iteration is prepended by a vector <tt>"Newton iterate"</tt>, the current
+   * value of the Newton iterate, which can be used to evaluate the residual
+   * at this point.
+   *
+   * For the call to (*#inverse_derivative), the vector <tt>"Newton
+   * residual"</tt> is inserted before <tt>"Newton iterate"</tt>.
+   *
+   * @author Guido Kanschat, 2006, 2010
+   */
+  template <typename VectorType>
+  class Newton : public OperatorBase
+  {
+  public:
+    /**
+     * Constructor, receiving the applications computing the residual and
+     * solving the linear problem, respectively.
+     */
+    Newton (OperatorBase &residual, OperatorBase &inverse_derivative);
+
+    /**
+     * Declare the parameters applicable to Newton's method.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Read the parameters in the ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+
+    /**
+     * Initialize the pointer data_out for debugging.
+     */
+    void initialize (OutputOperator<VectorType> &output);
+
+    /**
+     * The actual Newton iteration. The initial value is in <tt>out(0)</tt>,
+     * which also contains the result after convergence. Values in <tt>in</tt>
+     * are not used by Newton, but will be handed down to the objects
+     * #residual and #inverse_derivative.
+     */
+    virtual void operator() (AnyData &out, const AnyData &in);
+
+    virtual void notify(const Event &);
+
+    /**
+     * Set the maximal residual reduction allowed without triggering
+     * assembling in the next step. Return the previous value.
+     */
+    double threshold(double new_value);
+
+    /**
+     * Control object for the Newton iteration.
+     */
+    ReductionControl control;
+  private:
+    /**
+     * The operator computing the residual.
+     */
+    SmartPointer<OperatorBase, Newton<VectorType> > residual;
+
+    /**
+     * The operator applying the inverse derivative to the residual.
+     */
+    SmartPointer<OperatorBase, Newton<VectorType> > inverse_derivative;
+
+    /**
+     * The operator handling the output in case the debug_vectors is true.
+     * Call the initialize function first.
+     */
+    SmartPointer<OutputOperator<VectorType>, Newton<VectorType> > data_out;
+
+    /**
+     * This flag is set by the function assemble(), indicating that the matrix
+     * must be assembled anew upon start.
+     */
+    bool assemble_now;
+
+    /**
+     * A flag used to decide how many stepsize iteration should be made.
+     * Default is the original value of 21.
+     *
+     * Enter zero here to turn of stepsize control.
+     *
+     * @note Controlled by <tt>Stepsize iterations</tt> in parameter file
+     */
+    unsigned int n_stepsize_iterations;
+
+    /**
+     * Threshold for re-assembling matrix.
+     *
+     * If the quotient of two consecutive residuals is smaller than this
+     * threshold, the system matrix is not assembled in this step.
+     *
+     * @note This parameter should be adjusted to the residual gain of the
+     * inner solver.
+     *
+     * The default values is zero, resulting in reassembling in every Newton
+     * step.
+     */
+    double assemble_threshold;
+
+  public:
+    /**
+     * Print residual, update and updated solution after each step into file
+     * <tt>Newton_NNN</tt>?
+     */
+    bool debug_vectors;
+    /**
+     * Write debug output to @p deallog; the higher the number, the more
+     * output.
+     */
+    unsigned int debug;
+  };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/algorithms/newton.templates.h b/include/deal.II/algorithms/newton.templates.h
new file mode 100644
index 0000000..9a844e7
--- /dev/null
+++ b/include/deal.II/algorithms/newton.templates.h
@@ -0,0 +1,206 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/algorithms/newton.h>
+
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector_memory.h>
+
+#include <iomanip>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+  template <typename VectorType>
+  Newton<VectorType>::Newton(OperatorBase &residual,
+                             OperatorBase &inverse_derivative)
+    :
+    residual(&residual), inverse_derivative(&inverse_derivative),
+    assemble_now(false),
+    n_stepsize_iterations(21),
+    assemble_threshold(0.),
+    debug_vectors(false),
+    debug(0)
+  {}
+
+
+  template <typename VectorType>
+  void
+  Newton<VectorType>::declare_parameters(ParameterHandler &param)
+  {
+    param.enter_subsection("Newton");
+    ReductionControl::declare_parameters (param);
+    param.declare_entry("Assemble threshold", "0.", Patterns::Double());
+    param.declare_entry("Stepsize iterations", "21", Patterns::Integer());
+    param.declare_entry("Debug level", "0", Patterns::Integer());
+    param.declare_entry("Debug vectors", "false", Patterns::Bool());
+    param.leave_subsection();
+  }
+
+  template <typename VectorType>
+  void
+  Newton<VectorType>::parse_parameters (ParameterHandler &param)
+  {
+    param.enter_subsection("Newton");
+    control.parse_parameters (param);
+    assemble_threshold = param.get_double("Assemble threshold");
+    n_stepsize_iterations = param.get_integer("Stepsize iterations");
+    debug_vectors = param.get_bool("Debug vectors");
+    param.leave_subsection ();
+  }
+
+  template <typename VectorType>
+  void
+  Newton<VectorType>::initialize (OutputOperator<VectorType> &output)
+  {
+    data_out = &output;
+  }
+
+  template <typename VectorType>
+  void
+  Newton<VectorType>::notify(const Event &e)
+  {
+    residual->notify(e);
+    inverse_derivative->notify(e);
+  }
+
+
+  template <typename VectorType>
+  double
+  Newton<VectorType>::threshold(const double thr)
+  {
+    const double t = assemble_threshold;
+    assemble_threshold = thr;
+    return t;
+  }
+
+
+  template <typename VectorType>
+  void
+  Newton<VectorType>::operator() (AnyData &out, const AnyData &in)
+  {
+    Assert (out.size() == 1, ExcNotImplemented());
+    deallog.push ("Newton");
+
+    VectorType &u = *out.entry<VectorType *>(0);
+
+    if (debug>2)
+      deallog << "u: " << u.l2_norm() << std::endl;
+
+    GrowingVectorMemory<VectorType> mem;
+    typename VectorMemory<VectorType>::Pointer Du(mem);
+    typename VectorMemory<VectorType>::Pointer res(mem);
+
+    res->reinit(u);
+    AnyData src1;
+    AnyData src2;
+    src1.add<const VectorType *>(&u, "Newton iterate");
+    src1.merge(in);
+    src2.add<const VectorType *>(res, "Newton residual");
+    src2.merge(src1);
+    AnyData out1;
+    out1.add<VectorType *>(res, "Residual");
+    AnyData out2;
+    out2.add<VectorType *>(Du, "Update");
+
+    unsigned int step = 0;
+    // fill res with (f(u), v)
+    (*residual)(out1, src1);
+    double resnorm = res->l2_norm();
+    double old_residual = 0.;
+
+    if (debug_vectors)
+      {
+        AnyData out;
+        VectorType *p = &u;
+        out.add<const VectorType *>(p, "solution");
+        p = Du;
+        out.add<const VectorType *>(p, "update");
+        p = res;
+        out.add<const VectorType *>(p, "residual");
+        *data_out << step;
+        *data_out << out;
+      }
+
+    while (control.check(step++, resnorm) == SolverControl::iterate)
+      {
+        // assemble (Df(u), v)
+        if ((step > 1) && (resnorm/old_residual >= assemble_threshold))
+          inverse_derivative->notify (Events::bad_derivative);
+
+        Du->reinit(u);
+        try
+          {
+            (*inverse_derivative)(out2, src2);
+          }
+        catch (SolverControl::NoConvergence &e)
+          {
+            deallog << "Inner iteration failed after "
+                    << e.last_step << " steps with residual "
+                    << e.last_residual << std::endl;
+          }
+
+        if (debug_vectors)
+          {
+            AnyData out;
+            VectorType *p = &u;
+            out.add<const VectorType *>(p, "solution");
+            p = Du;
+            out.add<const VectorType *>(p, "update");
+            p = res;
+            out.add<const VectorType *>(p, "residual");
+            *data_out << step;
+            *data_out << out;
+          }
+
+        u.add(-1., *Du);
+        old_residual = resnorm;
+        (*residual)(out1, src1);
+        resnorm = res->l2_norm();
+
+        // Step size control
+        unsigned int step_size = 0;
+        while (resnorm >= old_residual)
+          {
+            ++step_size;
+            if (step_size > n_stepsize_iterations)
+              {
+                deallog << "No smaller stepsize allowed!";
+                break;
+              }
+            if (control.log_history())
+              deallog << "Trying step size: 1/" << (1<<step_size)
+                      << " since residual was " << resnorm << std::endl;
+            u.add(1./(1<<step_size), *Du);
+            (*residual)(out1, src1);
+            resnorm = res->l2_norm();
+          }
+      }
+    deallog.pop();
+
+    // in case of failure: throw exception
+    if (control.last_check() != SolverControl::success)
+      AssertThrow(false, SolverControl::NoConvergence (control.last_step(),
+                                                       control.last_value()));
+    // otherwise exit as normal
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/algorithms/operator.h b/include/deal.II/algorithms/operator.h
new file mode 100644
index 0000000..f56bdb1
--- /dev/null
+++ b/include/deal.II/algorithms/operator.h
@@ -0,0 +1,163 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__operator_h
+#define dealii__operator_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/algorithms/any_data.h>
+#include <deal.II/base/event.h>
+
+#include <fstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Namespace containing numerical algorithms in a unified form.
+ *
+ * All algorithmic classes in this namespace are derived from either Operator
+ * or OutputOperator, depending on whether they return a value or not. See the
+ * documentation of those classes for more detailed information on how to use
+ * them.
+ *
+ * @author Guido Kanschat
+ * @date 2012, 2013
+ */
+namespace Algorithms
+{
+  /**
+   * @todo Update this documentation and the one of Operator
+   *
+   * The abstract base class of all algorithms in this library. An operator is
+   * an object with an operator(), which transforms a set of named vectors
+   * into another set of named vectors.
+   *
+   * Furthermore, an operator can be notified of parameter changes by the
+   * calling routine. The outer iteration can notify() the Operator of an
+   * Event, which could be for instance a change of mesh, a different time
+   * step size or too slow convergence of Newton's method, which would then
+   * trigger reassembling of a matrix or similar things.
+   *
+   * <h3>Usage for nested iterations</h3>
+   *
+   * This is probably the most prominent use for Operator, where an outer
+   * iterative method calls an inner solver and so on. Typically, the
+   * innermost method in such a nested system will have to compute a residual
+   * using values from all outer iterations. Since the depth and order of such
+   * a nesting is hardly predictable when designing a general tool, we use
+   * AnyData to access these vectors. Typically, the first vector in
+   * <tt>out</tt> contains the start vector when operator()() is called, and
+   * the solution when the function returns. The object <tt>in</tt> is
+   * providing additional information and forwarded to the inner Operator
+   * objects of the nested iteration.
+   *
+   * @author Guido Kanschat
+   * @date 2014
+   */
+  class OperatorBase : public Subscriptor
+  {
+  public:
+    /**
+     * The virtual destructor.
+     */
+    ~OperatorBase();
+
+    /**
+     * The actual operation, which is implemented in a derived class.
+     */
+    virtual void operator() (AnyData &out, const AnyData &in) = 0;
+
+    /**
+     * Register an event triggered by an outer iteration.
+     */
+    virtual void notify(const Event &);
+    /**
+     * Clear all #notifications.
+     */
+    void clear_events();
+  protected:
+    /**
+     * Accumulate events here. If any of those is set, the function solve() of
+     * a terminal application must take care of reassembling the matrix.
+     */
+    Event notifications;
+
+  };
+
+  /**
+   * An unary operator base class, intended to output the vectors in AnyData
+   * in each step of an iteration.
+   *
+   * @author Guido Kanschat, 2010
+   */
+  template <typename VectorType>
+  class OutputOperator : public Subscriptor
+  {
+    OutputOperator(const OutputOperator<VectorType> &);
+  public:
+    OutputOperator ();
+    /**
+     * Empty virtual destructor.
+     */
+    virtual ~OutputOperator();
+
+    /**
+     * Set the stream @p os to which data is written. If no stream is selected
+     * with this function, data goes to @p deallog.
+     */
+    void initialize_stream(std::ostream &stream);
+    /**
+     * Set the current step.
+     */
+    void set_step(const unsigned int step);
+    /**
+     * Output all the vectors in AnyData.
+     */
+    virtual OutputOperator<VectorType> &operator<< (const AnyData &vectors);
+
+  protected:
+    unsigned int step;
+  private:
+    std::ostream *os;
+  };
+
+  template <typename VectorType>
+  inline
+  void
+  OutputOperator<VectorType>::set_step (const unsigned int s)
+  {
+    step = s;
+  }
+
+
+  /**
+   * Set the step number in OutputOperator by shifting an integer value.
+   *
+   * @relates OutputOperator
+   */
+  template <typename VectorType>
+  inline
+  OutputOperator<VectorType> &
+  operator<< (OutputOperator<VectorType> &out, unsigned int step)
+  {
+    out.set_step(step);
+    return out;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/algorithms/operator.templates.h b/include/deal.II/algorithms/operator.templates.h
new file mode 100644
index 0000000..937ee64
--- /dev/null
+++ b/include/deal.II/algorithms/operator.templates.h
@@ -0,0 +1,74 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/algorithms/operator.h>
+#include <deal.II/base/logstream.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+  template <typename VectorType>
+  OutputOperator<VectorType>::~OutputOperator()
+  {}
+
+  template <typename VectorType>
+  OutputOperator<VectorType>::OutputOperator()
+    :
+    os(0)
+  {}
+
+  template <typename VectorType>
+  void OutputOperator<VectorType>::initialize_stream(std::ostream &stream)
+  {
+    os =&stream;
+  }
+
+  template <typename VectorType>
+  OutputOperator<VectorType> &
+  OutputOperator<VectorType>::operator<< (const AnyData &vectors)
+  {
+    if (os == 0)
+      {
+        deallog << "Step " << step << std::endl;
+        for (unsigned int i=0; i<vectors.size(); ++i)
+          {
+            const VectorType *v = vectors.try_read_ptr<VectorType>(i);
+            if (v == 0) continue;
+            deallog << vectors.name(i);
+            for (unsigned int j=0; j<v->size(); ++j)
+              deallog << ' ' << (*v)(j);
+            deallog << std::endl;
+          }
+        deallog << std::endl;
+      }
+    else
+      {
+        (*os) << ' ' << step;
+        for (unsigned int i=0; i<vectors.size(); ++i)
+          {
+            const VectorType *v = vectors.try_read_ptr<VectorType>(i);
+            if (v == 0) continue;
+            for (unsigned int j=0; j<v->size(); ++j)
+              (*os) << ' ' << (*v)(j);
+          }
+        (*os) << std::endl;
+      }
+    return *this;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/algorithms/theta_timestepping.h b/include/deal.II/algorithms/theta_timestepping.h
new file mode 100644
index 0000000..6bbe3c6
--- /dev/null
+++ b/include/deal.II/algorithms/theta_timestepping.h
@@ -0,0 +1,395 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__theta_timestepping_h
+#define dealii__theta_timestepping_h
+
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/algorithms/operator.h>
+#include <deal.II/algorithms/timestep_control.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+class ParameterHandler;
+
+namespace Algorithms
+{
+  /**
+   * A little structure, gathering the size of a timestep and the current
+   * time. Time stepping schemes can use this to provide time step information
+   * to the classes actually performing a single step.
+   *
+   * The definition of what is considered "current time" depends on the
+   * scheme. For an explicit scheme, this is the time at the beginning of the
+   * step. For an implicit scheme, it is usually the time at the end.
+   */
+  struct TimestepData
+  {
+/// The current time
+    double time;
+/// The current step size times something
+    double step;
+  };
+
+  /**
+   * Application class performing the theta timestepping scheme.
+   *
+   * The theta scheme is an abstraction of implicit and explicit Euler
+   * schemes, the Crank-Nicholson scheme and linear combinations of those. The
+   * choice of the actual scheme is controlled by the parameter #theta as
+   * follows.
+   * <ul>
+   * <li> #theta=0: explicit Euler scheme
+   * <li> #theta=1: implicit Euler scheme
+   * <li> #theta=½: Crank-Nicholson scheme
+   * </ul>
+   *
+   * For fixed #theta, the Crank-Nicholson scheme is the only second order
+   * scheme. Nevertheless, further stability may be achieved by choosing
+   * #theta larger than ½, thereby introducing a first order error term. In
+   * order to avoid a loss of convergence order, the adaptive theta scheme can
+   * be used, where <i>#theta=½+c dt</i>.
+   *
+   * Assume that we want to solve the equation <i>u' + F(u) = 0</i> with a
+   * step size <i>k</i>.  A step of the theta scheme can be written as
+   *
+   * @f[
+   *   M u_{n+1} + \theta k F(u_{n+1})  = M u_n - (1-\theta)k F(u_n).
+   * @f]
+   *
+   * Here, <i>M</i> is the mass matrix. We see, that the right hand side
+   * amounts to an explicit Euler step with modified step size in weak form
+   * (up to inversion of M). The left hand side corresponds to an implicit
+   * Euler step with modified step size (right hand side given). Thus, the
+   * implementation of the theta scheme will use two Operator objects, one for
+   * the explicit, one for the implicit part. Each of these will use its own
+   * TimestepData to account for the modified step sizes (and different times
+   * if the problem is not autonomous). Note that once the explicit part has
+   * been computed, the left hand side actually constitutes a linear or
+   * nonlinear system which has to be solved.
+   *
+   * <h3>Usage AnyData</h3>
+   *
+   * ThetaTimestepping uses AnyData for communicating vectors and time step
+   * information. With outer or inner Operator objects. It does not use itself
+   * the input vectors provided, but forwards them to the explicit and
+   * implicit operators.
+   *
+   * <h4>Vector data</h4>
+   *
+   * The explicit Operator #op_explicit receives in its input in first place
+   * the vector "Previous iterate", which is the solution value after the
+   * previous timestep. It is followed by all vectors provided to
+   * ThetaTimestepping::operator() as input argument. #op_explicit is supposed
+   * to write its result into the first position of its output argument,
+   * labeled "Result".
+   *
+   * The implicit Operator #op_implicit receives the result of #op_explicit in
+   * its first input vector labeled "Previous time". It is followed by all
+   * vectors provided to ThetaTimestepping::operator() as input argument. The
+   * output of #op_implicit is directly written into the output argument given
+   * to ThetaTimestepping.
+   *
+   * <h4>Scalar data</h4>
+   *
+   * Since the introduction of AnyData, ThetaTimestepping is able to
+   * communicate the current time step information through AnyData as well.
+   * Therefore, the AnyData objects handed as input to #op_explicit and
+   * #op_implicit contain two entries of type `const double*` named "Time" and
+   * "Timestep". Note that "Time" refers to the time at the beginning of the
+   * current step for #op_explicit and at the end for #op_implicit,
+   * respectively.
+   *
+   * <h3>Usage of ThetaTimestepping</h3>
+   *
+   * The use ThetaTimestepping is more complicated than for instance Newton,
+   * since the inner operators will usually need to access the TimeStepData.
+   * Thus, we have a circular dependency of information, and we include the
+   * following example for its use. It can be found in
+   * <tt>examples/doxygen/theta_timestepping.cc</tt>
+   *
+   * @dontinclude theta_timestepping.cc
+   *
+   * First, we define the two operators used by ThetaTimestepping and call
+   * them <code>Implicit</code> and <code>Explicit</code>. They both share the
+   * public interface of Operator, and additionally provide storage for the
+   * matrices to be used and a pointer to TimestepData. Note that we do not
+   * use a SmartPointer here, since the TimestepData will be destroyed before
+   * the operator.
+   *
+   * @skip class Explicit @until End of declarations
+   *
+   * These operators will be implemented after the main program. But let us
+   * look first at how they get used. First, let us define a matrix to be used
+   * for our system and also an OutputOperator in order to write the data of
+   * each timestep to a file.
+   *
+   * @skipline main @until out.initialize
+   *
+   * Now we create objects for the implicit and explicit parts of the steps as
+   * well as the ThetaTimestepping itself. We initialize the timestepping with
+   * the output operator in order to be able to see the output in every step.
+   *
+   * @until set_output
+   *
+   * The next step is providing the vectors to be used. <tt>value</tt> is
+   * filled with the initial value and is also the vector where the solution
+   * at each timestep will be. Because the interface of Operator has to be
+   * able to handle several vectors, we need to store it in an AnyData object.
+   * Since our problem has no additional parameters, the input AnyData object
+   * remains empty.
+   *
+   * @until add
+   *
+   * Finally, we are ready to tell the solver, that we are starting at the
+   * initial timestep and run it.
+   *
+   * @until }
+   *
+   * First the constructor, which simply copies the system matrix into the
+   * member pointer for later use.
+   *
+   * @skip Explicit:: @until }
+   *
+   * Now we need to study the application of the implicit and explicit
+   * operator. We assume that the pointer <code>matrix</code> points to the
+   * matrix created in the main program (the constructor did this for us).
+   * Here, we first get the time step size from the AnyData object that was
+   * provided as input. Then, if we are in the first step or if the timestep
+   * has changed, we fill the local matrix $m$, such that with the given
+   * matrix $M$, it becomes \f[ m = I - \Delta t M. \f] After we have worked
+   * off the notifications, we clear them, such that the matrix is only
+   * generated when necessary.
+   *
+   * @skipline void @until clear
+   *
+   * Now we multiply the input vector with the new matrix and store on output.
+   *
+   * @until } The code for the implicit operator is almost the same, except
+   * that we change the sign in front of the timestep and use the inverse of t
+   * he matrix.
+   *
+   * @until vmult @until }
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  template <typename VectorType>
+  class ThetaTimestepping : public OperatorBase
+  {
+  public:
+    /**
+     * Constructor, receiving the two operators stored in #op_explicit and
+     * #op_implicit. For their meaning, see the description of those
+     * variables.
+     */
+    ThetaTimestepping (OperatorBase &op_explicit,
+                       OperatorBase &op_implicit);
+
+    /**
+     * The timestepping scheme.
+     *
+     * @param in is ignored by ThetaTimestepping, but is merged into the
+     * AnyData objects used as input for the operators #op_explicit and
+     * #op_implicit.
+     *
+     * @param out in its first argument must contain a pointer to a VectorType
+     * instance, which contains the initial value when the operator is called.
+     * It contains the final value when the operator returns.
+     */
+    virtual void operator() (AnyData &out, const AnyData &in);
+
+    /**
+     * Register an event triggered by an outer iteration.
+     */
+    virtual void notify(const Event &);
+
+    /**
+     * Define an operator which will output the result in each step. Note that
+     * no output will be generated without this.
+     */
+    void set_output(OutputOperator<VectorType> &output);
+
+    /**
+     * Declare parameters in a parameter handler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Read the parameters in the ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+
+    /**
+     * The current time in the timestepping scheme.
+     */
+    double current_time() const;
+    /**
+     * The current step size.
+     */
+    double step_size() const;
+    /**
+     * The weight between implicit and explicit part.
+     */
+    double theta() const;
+
+    /**
+     * Set a new weight and return the old
+     */
+    double theta(double new_theta);
+
+    /**
+     * The data handed to the #op_explicit time stepping operator.
+     *
+     * The time in here is the time at the beginning of the current step, the
+     * time step is (1-#theta) times the actual time step.
+     */
+    const TimestepData &explicit_data() const;
+
+    /**
+     * The data handed to the #op_implicit time stepping operator.
+     *
+     * The time in here is the time at the beginning of the current step, the
+     * time step is #theta times the actual time step.
+     */
+    const TimestepData &implicit_data() const;
+
+    /**
+     * Allow access to the control object.
+     */
+    TimestepControl &timestep_control();
+
+  private:
+    /**
+     * The object controlling the time step size and computing the new time in
+     * each step.
+     */
+    TimestepControl control;
+
+    /**
+     * The control parameter theta in the range <tt>[0,1]</tt>. It defaults to
+     * 0.5.
+     */
+    double vtheta;
+    /**
+     * Use adaptive #theta if <tt>true</tt>. Not yet implemented.
+     */
+    bool adaptive;
+
+    /**
+     * The data for the explicit part of the scheme.
+     */
+    TimestepData d_explicit;
+
+    /**
+     * The data for the implicit part of the scheme.
+     */
+    TimestepData d_implicit;
+
+
+    /**
+     * The operator computing the explicit part of the scheme. This will
+     * receive in its input data the value at the current time with name
+     * "Current time solution". It should obtain the current time and time
+     * step size from explicit_data().
+     *
+     * Its return value is $ Mu+cF(u) $, where $u$ is the current state
+     * vector, $M$ the mass matrix, $F$ the operator in space and $c$ is the
+     * adjusted time step size $(1-\theta) \Delta t$.
+     */
+    SmartPointer<OperatorBase, ThetaTimestepping<VectorType> > op_explicit;
+
+    /**
+     * The operator solving the implicit part of the scheme. It will receive
+     * in its input data the vector "Previous time". Information on the
+     * timestep should be obtained from implicit_data().
+     *
+     * Its return value is the solution <i>u</i> of <i>Mu-cF(u)=f</i>, where
+     * <i>f</i> is the dual space vector found in the "Previous time" entry of
+     * the input data, <i>M</i> the mass matrix, <i>F</i> the operator in
+     * space and <i>c</i> is the adjusted time step size $ \theta \Delta t$
+     */
+    SmartPointer<OperatorBase, ThetaTimestepping<VectorType> > op_implicit;
+
+    /**
+     * The operator writing the output in each time step
+     */
+    SmartPointer<OutputOperator<VectorType>, ThetaTimestepping<VectorType> > output;
+  };
+
+
+  template <typename VectorType>
+  inline
+  const TimestepData &
+  ThetaTimestepping<VectorType>::explicit_data () const
+  {
+    return d_explicit;
+  }
+
+
+  template <typename VectorType>
+  inline
+  const TimestepData &
+  ThetaTimestepping<VectorType>::implicit_data () const
+  {
+    return d_implicit;
+  }
+
+
+  template <typename VectorType>
+  inline
+  TimestepControl &
+  ThetaTimestepping<VectorType>::timestep_control ()
+  {
+    return control;
+  }
+
+  template <typename VectorType>
+  inline
+  void ThetaTimestepping<VectorType>::set_output (OutputOperator<VectorType> &out)
+  {
+    output = &out;
+  }
+
+
+  template <typename VectorType>
+  inline
+  double ThetaTimestepping<VectorType>::theta () const
+  {
+    return vtheta;
+  }
+
+
+  template <typename VectorType>
+  inline
+  double ThetaTimestepping<VectorType>::theta (double new_theta)
+  {
+    const double tmp = vtheta;
+    vtheta = new_theta;
+    return tmp;
+  }
+
+
+  template <typename VectorType>
+  inline
+  double ThetaTimestepping<VectorType>::current_time () const
+  {
+    return control.now();
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/algorithms/theta_timestepping.templates.h b/include/deal.II/algorithms/theta_timestepping.templates.h
new file mode 100644
index 0000000..40e19a1
--- /dev/null
+++ b/include/deal.II/algorithms/theta_timestepping.templates.h
@@ -0,0 +1,135 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/algorithms/theta_timestepping.h>
+
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/lac/vector_memory.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+  template <typename VectorType>
+  ThetaTimestepping<VectorType>::ThetaTimestepping (OperatorBase &e, OperatorBase &i)
+    : vtheta(0.5), adaptive(false), op_explicit(&e), op_implicit(&i)
+  {}
+
+
+  template <typename VectorType>
+  void
+  ThetaTimestepping<VectorType>::notify(const Event &e)
+  {
+    op_explicit->notify(e);
+    op_implicit->notify(e);
+  }
+
+  template <typename VectorType>
+  void
+  ThetaTimestepping<VectorType>::declare_parameters(ParameterHandler &param)
+  {
+    param.enter_subsection("ThetaTimestepping");
+    TimestepControl::declare_parameters (param);
+    param.declare_entry("Theta", ".5", Patterns::Double());
+    param.declare_entry("Adaptive", "false", Patterns::Bool());
+    param.leave_subsection();
+  }
+
+  template <typename VectorType>
+  void
+  ThetaTimestepping<VectorType>::parse_parameters (ParameterHandler &param)
+  {
+    param.enter_subsection("ThetaTimestepping");
+    control.parse_parameters (param);
+    vtheta = param.get_double("Theta");
+    adaptive = param.get_bool("Adaptive");
+    param.leave_subsection ();
+  }
+
+
+  template <typename VectorType>
+  void
+  ThetaTimestepping<VectorType>::operator() (AnyData &out, const AnyData &in)
+  {
+    Assert(!adaptive, ExcNotImplemented());
+
+    deallog.push ("Theta");
+
+    VectorType &solution = *out.entry<VectorType *>(0);
+    GrowingVectorMemory<VectorType> mem;
+    typename VectorMemory<VectorType>::Pointer aux(mem);
+    aux->reinit(solution);
+
+    control.restart();
+
+    d_explicit.time = control.now();
+
+    // The data used to compute the
+    // vector associated with the old
+    // timestep
+    AnyData src1;
+    src1.add<const VectorType *>(&solution, "Previous iterate");
+    src1.add<const double *>(&d_explicit.time, "Time");
+    src1.add<const double *>(&d_explicit.step, "Timestep");
+    src1.add<const double *>(&vtheta, "Theta");
+    src1.merge(in);
+
+    AnyData src2;
+
+    AnyData out1;
+    out1.add<VectorType *>(aux, "Solution");
+    // The data provided to the inner solver
+    src2.add<const VectorType *>(aux, "Previous time");
+    src2.add<const VectorType *>(&solution, "Previous iterate");
+    src2.add<const double *>(&d_implicit.time, "Time");
+    src2.add<const double *>(&d_implicit.step, "Timestep");
+    src2.add<const double *>(&vtheta, "Theta");
+    src2.merge(in);
+
+    if (output != 0)
+      (*output) << 0U << out;
+
+    for (unsigned int count = 1; d_explicit.time < control.final(); ++count)
+      {
+        const bool step_change = control.advance();
+        d_implicit.time = control.now();
+        d_explicit.step = (1.-vtheta)*control.step();
+        d_implicit.step = vtheta*control.step();
+        deallog << "Time step:" << d_implicit.time << std::endl;
+
+        op_explicit->notify(Events::new_time);
+        op_implicit->notify(Events::new_time);
+        if (step_change)
+          {
+            op_explicit->notify(Events::new_timestep_size);
+            op_implicit->notify(Events::new_timestep_size);
+          }
+
+        // Compute
+        // (I + (1-theta)dt A) u
+        (*op_explicit)(out1, src1);
+        (*op_implicit)(out, src2);
+
+        if (output != 0 && control.print())
+          (*output) << count << out;
+
+        d_explicit.time = control.now();
+      }
+    deallog.pop();
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/algorithms/timestep_control.h b/include/deal.II/algorithms/timestep_control.h
new file mode 100644
index 0000000..925feec
--- /dev/null
+++ b/include/deal.II/algorithms/timestep_control.h
@@ -0,0 +1,297 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__time_step_control_h
+#define dealii__time_step_control_h
+
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/vector_memory.h>
+#include <cstdio>
+
+DEAL_II_NAMESPACE_OPEN
+
+class ParameterHandler;
+
+namespace Algorithms
+{
+  /**
+   * Control class for timestepping schemes. Its main task is determining the
+   * size of the next time step and the according point in the time interval.
+   * Additionally, it controls writing the solution to a file.
+   *
+   * The size of the next time step is determined as follows:
+   * <ol>
+   * <li> According to the strategy, the step size is tentatively added to the
+   * current time.
+   * <li> If the resulting time exceeds the final time of the interval, the
+   * step size is reduced in order to meet this time.
+   * <li> If the resulting time is below the final time by just a fraction of
+   * the step size, the step size is increased in order to meet this time.
+   * <li> The resulting step size is used from the current time.
+   * </ol>
+   *
+   * The variable @p print_step can be used to control the amount of output
+   * generated by the timestepping scheme.
+   */
+  class TimestepControl : public Subscriptor
+  {
+  public:
+    /**
+     * The time stepping strategies. These are controlled by the value of
+     * tolerance() and start_step().
+     */
+    enum Strategy
+    {
+      /**
+       * Choose a uniform time step size. The step size is determined by
+       * start_step(), tolerance() is ignored.
+       */
+      uniform,
+      /**
+       * Start with the time step size given by start_step() and double it in
+       * every step. tolerance() is ignored.
+       *
+       * This strategy is intended for pseudo-timestepping schemes computing a
+       * stationary limit.
+       */
+      doubling
+    };
+
+    /**
+     * Constructor setting default values
+     */
+    TimestepControl (double start = 0.,
+                     double final = 1.,
+                     double tolerance = 1.e-2,
+                     double start_step = 1.e-2,
+                     double print_step = -1.,
+                     double max_step = 1.);
+
+    /**
+     * Declare the control parameters for parameter handler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+    /**
+     * Read the control parameters from a parameter handler.
+     */
+    void parse_parameters (ParameterHandler &param);
+
+    /**
+     * The left end of the time interval.
+     */
+    double start () const;
+    /**
+     * The right end of the time interval. The control mechanism ensures that
+     * the final time step ends at this point.
+     */
+    double final () const;
+    /**
+     * The tolerance value controlling the time steps.
+     */
+    double tolerance () const;
+    /**
+     * The size of the current time step.
+     */
+    double step () const;
+
+    /**
+     * The current time.
+     */
+    double now () const;
+
+    /**
+     * Compute the size of the next step and return true if it differs from
+     * the current step size. Advance the current time by the new step size.
+     */
+    bool advance ();
+
+    /**
+     * Set start value.
+     */
+    void start (double);
+    /**
+     * Set final time value.
+     */
+    void final (double);
+    /**
+     * Set tolerance
+     */
+    void tolerance (double);
+    /**
+     * Set strategy.
+     */
+    void strategy (Strategy);
+
+    /**
+     * Set size of the first step. This may be overwritten by the time
+     * stepping strategy.
+     *
+     * @param[in] step The size of the first step, which may be overwritten by
+     * the time stepping strategy.
+     */
+    void start_step (const double step);
+
+    /**
+     * Set size of the maximum step size.
+     */
+    void max_step (double);
+
+    /**
+     * Set now() equal to start(). Initialize step() and print() to their
+     * initial values.
+     */
+    void restart ();
+    /**
+     * Return true if this timestep should be written to disk.
+     */
+    bool print ();
+    /**
+     * Set the output name template.
+     */
+    void file_name_format (const char *);
+    const char *file_name_format ();
+  private:
+
+    double start_val;
+    double final_val;
+    double tolerance_val;
+    Strategy strategy_val;
+    double start_step_val;
+    double max_step_val;
+    double min_step_val;
+    /**
+     * The size of the current time step. This may differ from @p step_val, if
+     * we aimed at @p final_val.
+     */
+    double current_step_val;
+    double step_val;
+
+    double now_val;
+    double print_step;
+    double next_print_val;
+
+    char format[30];
+  };
+
+
+  inline double
+  TimestepControl::start () const
+  {
+    return start_val;
+  }
+
+
+  inline double
+  TimestepControl::final () const
+  {
+    return final_val;
+  }
+
+
+  inline double
+  TimestepControl::step () const
+  {
+    return current_step_val;
+  }
+
+
+  inline double
+  TimestepControl::tolerance () const
+  {
+    return tolerance_val;
+  }
+
+
+  inline double
+  TimestepControl::now () const
+  {
+    return now_val;
+  }
+
+
+  inline void
+  TimestepControl::start (double t)
+  {
+    start_val = t;
+  }
+
+
+  inline void
+  TimestepControl::final (double t)
+  {
+    final_val = t;
+  }
+
+
+  inline void
+  TimestepControl::tolerance (double t)
+  {
+    tolerance_val = t;
+  }
+
+
+  inline void
+  TimestepControl::strategy (Strategy t)
+  {
+    strategy_val = t;
+  }
+
+
+  inline void
+  TimestepControl::start_step (const double t)
+  {
+    start_step_val = t;
+  }
+
+
+  inline void
+  TimestepControl::max_step (double t)
+  {
+    max_step_val = t;
+  }
+
+
+  inline void
+  TimestepControl::restart ()
+  {
+    now_val = start_val;
+    step_val = start_step_val;
+    current_step_val = step_val;
+    if (print_step > 0.)
+      next_print_val = now_val + print_step;
+    else
+      next_print_val = now_val - 1.;
+  }
+
+
+  inline void
+  TimestepControl::file_name_format (const char *fmt)
+  {
+    strcpy(format, fmt);
+  }
+
+
+  inline const char *
+  TimestepControl::file_name_format ()
+  {
+    return format;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/aligned_vector.h b/include/deal.II/base/aligned_vector.h
new file mode 100644
index 0000000..5f9d441
--- /dev/null
+++ b/include/deal.II/base/aligned_vector.h
@@ -0,0 +1,888 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__aligned_vector_h
+#define dealii__aligned_vector_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/type_traits.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/parallel.h>
+#include <boost/serialization/array.hpp>
+#include <boost/serialization/split_member.hpp>
+
+#include <cstring>
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * This is a replacement class for std::vector to be used in combination with
+ * VectorizedArray and derived data types. It allocates memory aligned to
+ * addresses of a vectorized data type (in order to avoid segmentation faults
+ * when a variable of type VectorizedArray which the compiler assumes to be
+ * aligned to certain memory addresses does not actually follow these rules).
+ * This could also be achieved by proving std::vector with a user-defined
+ * allocator. On the other hand, writing an own small vector class lets us
+ * implement parallel copy and move operations with TBB, insert deal.II-style
+ * assertions, and cut some unnecessary functionality. Note that this vector
+ * is a bit more memory-consuming than std::vector because of alignment, so it
+ * is recommended to only use this vector on long vectors.
+ *
+ * @p author Katharina Kormann, Martin Kronbichler, 2011
+ */
+template < class T >
+class AlignedVector
+{
+public:
+  /**
+   * Declare standard types used in all containers. These types parallel those
+   * in the <tt>C++</tt> standard libraries <tt>vector<...></tt> class.
+   */
+  typedef T                   value_type;
+  typedef value_type         *pointer;
+  typedef const value_type   *const_pointer;
+  typedef value_type         *iterator;
+  typedef const value_type   *const_iterator;
+  typedef value_type         &reference;
+  typedef const value_type   &const_reference;
+  typedef std::size_t         size_type;
+
+  /**
+   * Empty constructor. Sets the vector size to zero.
+   */
+  AlignedVector ();
+
+  /**
+   * Sets the vector size to the given size and initializes all elements with
+   * T().
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  AlignedVector (const size_type size,
+                 const T        &init = T());
+
+  /**
+   * Destructor.
+   */
+  ~AlignedVector ();
+
+  /**
+   * Copy constructor.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  AlignedVector (const AlignedVector<T> &vec);
+
+  /**
+   * Assignment to the input vector @p vec.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  AlignedVector &
+  operator = (const AlignedVector<T> &vec);
+
+  /**
+   * Change the size of the vector. It keeps old elements previously available
+   * but does not initialize the newly allocated memory, leaving it in an
+   * undefined state.
+   *
+   * @note This method can only be invoked for classes @p T that define a
+   * default constructor, @p T(). Otherwise, compilation will fail.
+   */
+  void resize_fast (const size_type size);
+
+  /**
+   * Change the size of the vector. It keeps old elements previously
+   * available, and initializes each element with the specified data. If the
+   * new vector size is shorter than the old one, the memory is not released
+   * unless the new size is zero.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void resize (const size_type size_in,
+               const T        &init = T());
+
+  /**
+   * Reserve memory space for @p size elements. If the argument @p size is set
+   * to zero, all previously allocated memory is released.
+   *
+   * In order to avoid too frequent reallocation (which involves copy of the
+   * data), this function doubles the amount of memory occupied when the given
+   * size is larger than the previously allocated size.
+   */
+  void reserve (const size_type size_alloc);
+
+  /**
+   * Releases all previously allocated memory and leaves the vector in a state
+   * equivalent to the state after the default constructor has been called.
+   */
+  void clear ();
+
+  /**
+   * Inserts an element at the end of the vector, increasing the vector size
+   * by one. Note that the allocated size will double whenever the previous
+   * space is not enough to hold the new element.
+   */
+  void push_back (const T in_data);
+
+  /**
+   * Returns the last element of the vector (read and write access).
+   */
+  reference back ();
+
+  /**
+   * Returns the last element of the vector (read-only access).
+   */
+  const_reference back () const;
+
+  /**
+   * Inserts several elements at the end of the vector given by a range of
+   * elements.
+   */
+  template <typename ForwardIterator>
+  void insert_back (ForwardIterator begin,
+                    ForwardIterator end);
+
+  /**
+   * Fills the vector with size() copies of the given input.
+   *
+   * @note This method can only be invoked for classes that define the copy
+   * assignment operator. Otherwise, compilation will fail.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void fill (const T &element);
+
+  /**
+   * Swaps the given vector with the calling vector.
+   */
+  void swap (AlignedVector<T> &vec);
+
+  /**
+   * Returns whether the vector is empty, i.e., its size is zero.
+   */
+  bool empty () const;
+
+  /**
+   * Returns the size of the vector.
+   */
+  size_type size () const;
+
+  /**
+   * Returns the capacity of the vector, i.e., the size this vector can hold
+   * without reallocation. Note that capacity() >= size().
+   */
+  size_type capacity () const;
+
+  /**
+   * Read-write access to entry @p index in the vector.
+   */
+  reference
+  operator [] (const size_type index);
+
+  /**
+   * Read-only access to entry @p index in the vector.
+   */
+  const_reference operator [] (const size_type index) const;
+
+  /**
+   * Returns a read and write pointer to the beginning of the data array.
+   */
+  iterator begin ();
+
+  /**
+   * Returns a read and write pointer to the end of the data array.
+   */
+  iterator end ();
+
+  /**
+   * Returns a read-only pointer to the beginning of the data array.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Returns a read-only pointer to the end of the data array.
+   */
+  const_iterator end () const;
+
+  /**
+   * Returns the memory consumption of the allocated memory in this class. If
+   * the underlying type @p T allocates memory by itself, this memory is not
+   * counted.
+   */
+  size_type memory_consumption () const;
+
+  /**
+   * Write the data of this object to a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void save (Archive &ar, const unsigned int version) const;
+
+  /**
+   * Read the data of this object from a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void load (Archive &ar, const unsigned int version);
+
+  BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+private:
+
+  /**
+   * Pointer to actual class data.
+   */
+  T *_data;
+
+  /**
+   * Pointer to the end of valid data fields.
+   */
+  T *_end_data;
+
+  /**
+   * Pointer to the end of the allocated memory.
+   */
+  T *_end_allocated;
+};
+
+
+// ------------------------------- inline functions --------------------------
+
+/**
+ * This namespace defines the copy and set functions used in AlignedVector.
+ * These functions operate in parallel when there are enough elements in the
+ * vector.
+ */
+namespace internal
+{
+  /**
+   * Move and class that actually issues the copy commands in AlignedVector.
+   * This class is based on the specialized for loop base class
+   * ParallelForLoop in parallel.h whose purpose is the following: When
+   * calling a parallel for loop on AlignedVector with apply_to_subranges, it
+   * generates different code for every different argument we might choose (as
+   * it is templated). This gives a lot of code (e.g. it triples the memory
+   * required for compiling the file matrix_free.cc and the final object size
+   * is several times larger) which is completely useless. Therefore, this
+   * class channels all copy commands through one call to apply_to_subrange
+   * for all possible types, which makes the copy operation much cleaner
+   * (thanks to a virtual function, whose cost is negligible in this context).
+   *
+   * @relates AlignedVector
+   */
+  template <typename T>
+  class AlignedVectorMove : private parallel::ParallelForInteger
+  {
+    static const std::size_t minimum_parallel_grain_size = 160000/sizeof(T)+1;
+  public:
+    /**
+     * Constructor. Issues a parallel call if there are sufficiently many
+     * elements, otherwise works in serial. Copies the data from the half-open
+     * interval between @p source_begin and @p source_end to array starting at
+     * @p destination (by calling the copy constructor with placement new). If
+     * the flag copy_source is set to @p true, the elements from the source
+     * array are simply copied. If it is set to @p false, the data is moved
+     * between the two arrays by invoking the destructor on the source range
+     * (preparing for a subsequent call to free).
+     */
+    AlignedVectorMove (T *source_begin,
+                       T *source_end,
+                       T *destination,
+                       const bool copy_source)
+      :
+      source_ (source_begin),
+      destination_ (destination),
+      copy_source_ (copy_source)
+    {
+      Assert (source_end >= source_begin, ExcInternalError());
+      const std::size_t size = source_end - source_begin;
+      if (size < minimum_parallel_grain_size)
+        apply_to_subrange (0, size);
+      else
+        apply_parallel (0, size, minimum_parallel_grain_size);
+    }
+
+    /**
+     * This method moves elements from the source to the destination given in
+     * the constructor on a subrange given by two integers.
+     */
+    virtual void apply_to_subrange (const std::size_t begin,
+                                    const std::size_t end) const
+    {
+      // for classes trivial assignment can use memcpy. cast element to
+      // (void*) to silence compiler warning for virtual classes (they will
+      // never arrive here because they are non-trivial).
+
+      if (std_cxx11::is_trivial<T>::value == true)
+        std::memcpy ((void *)(destination_+begin), source_+begin,
+                     (end-begin)*sizeof(T));
+      else if (copy_source_ == false)
+        for (std::size_t i=begin; i<end; ++i)
+          {
+            // initialize memory (copy construct by placement new), and
+            // destruct the source
+            new (&destination_[i]) T(source_[i]);
+            source_[i].~T();
+          }
+      else
+        for (std::size_t i=begin; i<end; ++i)
+          new (&destination_[i]) T(source_[i]);
+    }
+
+  private:
+    T *source_;
+    T *destination_;
+    const bool copy_source_;
+  };
+
+  /**
+   * Class that issues the set commands for AlignedVector.
+   *
+   * @tparam initialize_memory Sets whether the the set command should
+   * initialize memory (with a call to the copy constructor) or rather use the
+   * copy assignment operator. A template is necessary to select the
+   * appropriate operation since some classes might define only one of those
+   * two operations.
+   *
+   * @relates AlignedVector
+   */
+  template <typename T, bool initialize_memory>
+  class AlignedVectorSet : private parallel::ParallelForInteger
+  {
+    static const std::size_t minimum_parallel_grain_size = 160000/sizeof(T)+1;
+  public:
+    /**
+     * Constructor. Issues a parallel call if there are sufficiently many
+     * elements, otherwise work in serial.
+     */
+    AlignedVectorSet (const std::size_t size,
+                      const T &element,
+                      T *destination)
+      :
+      element_ (element),
+      destination_ (destination),
+      trivial_element (false)
+    {
+      if (size == 0)
+        return;
+
+      // do not use memcmp for long double because on some systems it does not
+      // completely fill its memory and may lead to false positives in
+      // e.g. valgrind
+      if (std_cxx11::is_trivial<T>::value == true &&
+          types_are_equal<T,long double>::value == false)
+        {
+          const unsigned char zero [sizeof(T)] = {};
+          // cast element to (void*) to silence compiler warning for virtual
+          // classes (they will never arrive here because they are
+          // non-trivial).
+          if (std::memcmp(zero, (void *)&element, sizeof(T)) == 0)
+            trivial_element = true;
+        }
+      if (size < minimum_parallel_grain_size)
+        apply_to_subrange (0, size);
+      else
+        apply_parallel (0, size, minimum_parallel_grain_size);
+    }
+
+    /**
+     * This sets elements on a subrange given by two integers.
+     */
+    virtual void apply_to_subrange (const std::size_t begin,
+                                    const std::size_t end) const
+    {
+      // for classes with trivial assignment of zero can use memset. cast
+      // element to (void*) to silence compiler warning for virtual
+      // classes (they will never arrive here because they are
+      // non-trivial).
+      if (std_cxx11::is_trivial<T>::value == true && trivial_element)
+        std::memset ((void *)(destination_+begin), 0, (end-begin)*sizeof(T));
+      else
+        copy_construct_or_assign(begin, end,
+                                 ::dealii::internal::bool2type<initialize_memory>());
+    }
+
+  private:
+    const T &element_;
+    mutable T *destination_;
+    bool trivial_element;
+
+    // copy assignment operation
+    void copy_construct_or_assign(const std::size_t begin,
+                                  const std::size_t end,
+                                  ::dealii::internal::bool2type<false>) const
+    {
+      for (std::size_t i=begin; i<end; ++i)
+        destination_[i] = element_;
+    }
+
+    // copy constructor (memory initialization)
+    void copy_construct_or_assign(const std::size_t begin,
+                                  const std::size_t end,
+                                  ::dealii::internal::bool2type<true>) const
+    {
+      for (std::size_t i=begin; i<end; ++i)
+        new (&destination_[i]) T(element_);
+    }
+  };
+
+} // end of namespace internal
+
+
+#ifndef DOXYGEN
+
+
+template < class T >
+inline
+AlignedVector<T>::AlignedVector ()
+  :
+  _data (0),
+  _end_data (0),
+  _end_allocated (0)
+{}
+
+
+
+template < class T >
+inline
+AlignedVector<T>::AlignedVector (const size_type size,
+                                 const T &init)
+  :
+  _data (0),
+  _end_data (0),
+  _end_allocated (0)
+{
+  if (size > 0)
+    resize (size, init);
+}
+
+
+
+template < class T >
+inline
+AlignedVector<T>::~AlignedVector ()
+{
+  clear();
+}
+
+
+
+template < class T >
+inline
+AlignedVector<T>::AlignedVector (const AlignedVector<T> &vec)
+  :
+  _data (0),
+  _end_data (0),
+  _end_allocated (0)
+{
+  // copy the data from vec
+  reserve (vec._end_data - vec._data);
+  _end_data = _end_allocated;
+  internal::AlignedVectorMove<T> (vec._data, vec._end_data, _data, true);
+}
+
+
+
+template < class T >
+inline
+AlignedVector<T> &
+AlignedVector<T>::operator = (const AlignedVector<T> &vec)
+{
+  resize(0);
+  resize_fast (vec._end_data - vec._data);
+  internal::AlignedVectorMove<T> (vec._data, vec._end_data, _data, true);
+  return *this;
+}
+
+
+
+template < class T >
+inline
+void
+AlignedVector<T>::resize_fast (const size_type size_in)
+{
+  const size_type old_size = size();
+  if (std_cxx11::is_trivial<T>::value == false && size_in < old_size)
+    {
+      // call destructor on fields that are released. doing it backward
+      // releases the elements in reverse order as compared to how they were
+      // created
+      while (_end_data != _data+size_in)
+        (--_end_data)->~T();
+    }
+  reserve (size_in);
+  _end_data = _data + size_in;
+
+  // need to still set the values in case the class is non-trivial because
+  // virtual classes etc. need to run their (default) constructor
+  if (std_cxx11::is_trivial<T>::value == false && size_in > old_size)
+    dealii::internal::AlignedVectorSet<T,true> (size_in-old_size, T(), _data+old_size);
+}
+
+
+
+template < class T >
+inline
+void
+AlignedVector<T>::resize (const size_type size_in,
+                          const T        &init)
+{
+  const size_type old_size = size();
+  if (std_cxx11::is_trivial<T>::value == false && size_in < old_size)
+    {
+      // call destructor on fields that are released. doing it backward
+      // releases the elements in reverse order as compared to how they were
+      // created
+      while (_end_data != _data+size_in)
+        (--_end_data)->~T();
+    }
+  reserve (size_in);
+  _end_data = _data + size_in;
+
+  // finally set the desired init values
+  if (size_in > old_size)
+    dealii::internal::AlignedVectorSet<T,true> (size_in-old_size, init, _data+old_size);
+}
+
+
+
+template < class T >
+inline
+void
+AlignedVector<T>::reserve (const size_type size_alloc)
+{
+  const size_type old_size = _end_data - _data;
+  const size_type allocated_size = _end_allocated - _data;
+  if (size_alloc > allocated_size)
+    {
+      // if we continuously increase the size of the vector, we might be
+      // reallocating a lot of times. therefore, try to increase the size more
+      // aggressively
+      size_type new_size = size_alloc;
+      if (size_alloc < (2 * allocated_size))
+        new_size = 2 * allocated_size;
+
+      const size_type size_actual_allocate = new_size * sizeof(T);
+
+      // allocate and align along 64-byte boundaries (this is enough for all
+      // levels of vectorization currently supported by deal.II)
+      T *new_data;
+      Utilities::System::posix_memalign ((void **)&new_data, 64, size_actual_allocate);
+
+      // copy data in case there was some content before and release the old
+      // memory with the function corresponding to the one used for allocating
+      std::swap (_data, new_data);
+      _end_data = _data + old_size;
+      _end_allocated = _data + new_size;
+      if (_end_data != _data)
+        {
+          dealii::internal::AlignedVectorMove<T>(new_data, new_data + old_size,
+                                                 _data, false);
+          free(new_data);
+        }
+      else
+        Assert(new_data == 0, ExcInternalError());
+    }
+  else if (size_alloc == 0)
+    clear();
+}
+
+
+
+template < class T >
+inline
+void
+AlignedVector<T>::clear ()
+{
+  if (_data != 0)
+    {
+      if (std_cxx11::is_trivial<T>::value == false)
+        while (_end_data != _data)
+          (--_end_data)->~T();
+
+      free(_data);
+    }
+  _data = 0;
+  _end_data = 0;
+  _end_allocated = 0;
+}
+
+
+
+template < class T >
+inline
+void
+AlignedVector<T>::push_back (const T in_data)
+{
+  Assert (_end_data <= _end_allocated, ExcInternalError());
+  if (_end_data == _end_allocated)
+    reserve (std::max(2*capacity(),static_cast<size_type>(16)));
+  if (std_cxx11::is_trivial<T>::value == false)
+    new (_end_data) T;
+  *_end_data++ = in_data;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::reference
+AlignedVector<T>::back ()
+{
+  AssertIndexRange (0, size());
+  T *field = _end_data - 1;
+  return *field;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::const_reference
+AlignedVector<T>::back () const
+{
+  AssertIndexRange (0, size());
+  const T *field = _end_data - 1;
+  return *field;
+}
+
+
+
+template < class T >
+template <typename ForwardIterator>
+inline
+void
+AlignedVector<T>::insert_back (ForwardIterator begin,
+                               ForwardIterator end)
+{
+  const unsigned int old_size = size();
+  reserve (old_size + (end-begin));
+  for ( ; begin != end; ++begin, ++_end_data)
+    {
+      if (std_cxx11::is_trivial<T>::value == false)
+        new (_end_data) T;
+      *_end_data = *begin;
+    }
+}
+
+
+
+template < class T >
+inline
+void
+AlignedVector<T>::fill (const T &value)
+{
+  dealii::internal::AlignedVectorSet<T,false> (size(), value, _data);
+}
+
+
+
+template < class T >
+inline
+void
+AlignedVector<T>::swap (AlignedVector<T> &vec)
+{
+  std::swap (_data, vec._data);
+  std::swap (_end_data, vec._end_data);
+  std::swap (_end_allocated, vec._end_allocated);
+}
+
+
+
+template < class T >
+inline
+bool
+AlignedVector<T>::empty () const
+{
+  return _end_data == _data;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::size_type
+AlignedVector<T>::size () const
+{
+  return _end_data - _data;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::size_type
+AlignedVector<T>::capacity () const
+{
+  return _end_allocated - _data;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::reference
+AlignedVector<T>::operator [] (const size_type index)
+{
+  AssertIndexRange (index, size());
+  return _data[index];
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::const_reference
+AlignedVector<T>::operator [] (const size_type index) const
+{
+  AssertIndexRange (index, size());
+  return _data[index];
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::iterator
+AlignedVector<T>::begin ()
+{
+  return _data;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::iterator
+AlignedVector<T>::end ()
+{
+  return _end_data;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::const_iterator
+AlignedVector<T>::begin () const
+{
+  return _data;
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::const_iterator
+AlignedVector<T>::end () const
+{
+  return _end_data;
+}
+
+
+
+template < class T >
+template < class Archive >
+inline
+void
+AlignedVector<T>::save (Archive &ar, const unsigned int) const
+{
+  size_type vec_size (size());
+  ar &vec_size;
+  if (vec_size > 0)
+    ar &boost::serialization::make_array(_data, vec_size);
+}
+
+
+
+template < class T >
+template < class Archive >
+inline
+void
+AlignedVector<T>::load (Archive &ar, const unsigned int)
+{
+  size_type vec_size = 0;
+  ar &vec_size ;
+
+  if (vec_size > 0)
+    {
+      reserve(vec_size);
+      ar &boost::serialization::make_array(_data, vec_size);
+      _end_data = _data + vec_size;
+    }
+}
+
+
+
+template < class T >
+inline
+typename AlignedVector<T>::size_type
+AlignedVector<T>::memory_consumption () const
+{
+  size_type memory = sizeof(*this);
+  for (const T *t = _data ; t != _end_data; ++t)
+    memory += dealii::MemoryConsumption::memory_consumption(*t);
+  memory += sizeof(T) * (_end_allocated-_end_data);
+  return memory;
+}
+
+
+#endif // ifndef DOXYGEN
+
+
+/**
+ * Relational operator == for AlignedVector
+ *
+ * @relates AlignedVector
+ */
+template < class T >
+bool operator == (const AlignedVector<T> &lhs,
+                  const AlignedVector<T> &rhs)
+{
+  if (lhs.size() != rhs.size())
+    return false;
+  for (typename AlignedVector<T>::const_iterator lit = lhs.begin(),
+       rit = rhs.begin(); lit != lhs.end(); ++lit, ++rit)
+    if (*lit != *rit)
+      return false;
+  return true;
+}
+
+
+
+
+/**
+ * Relational operator != for AlignedVector
+ *
+ * @relates AlignedVector
+ */
+template < class T >
+bool operator != (const AlignedVector<T> &lhs,
+                  const AlignedVector<T> &rhs)
+{
+  return !(operator==(lhs, rhs));
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/array_view.h b/include/deal.II/base/array_view.h
new file mode 100644
index 0000000..c9d63d8
--- /dev/null
+++ b/include/deal.II/base/array_view.h
@@ -0,0 +1,441 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__array_view_h
+#define dealii__array_view_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table.h>
+
+#include <boost/type_traits/remove_cv.hpp>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A class that represents a window of memory locations of type @p ElementType
+ * and presents it as if it was an array that can be accessed via an
+ * <code>operator[]</code>. In essence, this class is nothing more than just a
+ * pointer to the first location and an integer that represents the length of
+ * the array in elements. The memory remains owned by whoever allocated it, as
+ * this class does not take over ownership.
+ *
+ * The advantage of using this class is that you don't have to pass around
+ * pairs of pointers and that <code>operator[]</code> checks for the validity
+ * of the index with which you subscript this array view.
+ *
+ * This class can handle views to both non-constant and constant memory
+ * locations. If you want to represent a view of a constant array, then the
+ * template argument type of this class needs to be @p const as well. The
+ * following code snippet gives an example:
+ * @code
+ *   std::vector<int>       array       = get_data();  // a writable array
+ *
+ *   ArrayView<int> view (&array[5], 5);               // a view of elements 5..9 (inclusive)
+ *   view[2] = 42;                                     // array[7] is set to 42
+ *
+ *   ArrayView<const int> const_view (&array[5], 5);   // same view, but read-only
+ *   int element_7 = const_view[2];                    // returns 42
+ *   const_view[2] = 42;                               // error, can't write into this view
+ * @endcode
+ * In either case, accessing an element of a view does not change the
+ * ArrayView object itself, and consequently ArrayView::operator[] is a @p
+ * const function. This corresponds to the notion that a view simply
+ * represents a, well, "view" of memory that is owned by someone else. Thus,
+ * accessing elements of the view changes the memory managed by some other
+ * object, but not the view itself, allowing us to make ArrayView::operator[]
+ * a @p const member function. This is in contrast to, say, std::vector, which
+ * manages the memory it points to and changing an element of the std::vector
+ * therefore changes the std::vector object itself -- consequently, the
+ * std::vector::operator[] is non- at p const.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, 2015
+ */
+template <typename ElementType>
+class ArrayView
+{
+public:
+  /**
+   * A typedef that denotes the "value_type" of this container-like class,
+   * i.e., the type of the element it "stores" or points to.
+   */
+  typedef ElementType value_type;
+
+  /**
+   * Constructor.
+   *
+   * @param[in] starting_element A pointer to the first element of the array
+   * this object should represent.
+   * @param[in] n_elements The length (in elements) of the chunk of memory
+   * this object should represent.
+   *
+   * @note The object that is constructed from these arguments has no
+   * knowledge how large the object into which it points really is. As a
+   * consequence, whenever you call ArrayView::operator[], the array view can
+   * check that the given index is within the range of the view, but it can't
+   * check that the view is indeed a subset of the valid range of elements of
+   * the underlying object that allocated that range. In other words, you need
+   * to ensure that the range of the view specified by the two arguments to
+   * this constructor is in fact a subset of the elements of the array into
+   * which it points. The appropriate way to do this is to use the
+   * make_array_view() functions.
+   */
+  ArrayView (value_type       *starting_element,
+             const std::size_t n_elements);
+
+  /**
+   * Copy constructor from array views that point to non- at p const elements. If
+   * the current object will point to non- at p const elements, then this is a
+   * straight forward copy constructor. On the other hand, if the current
+   * type's @p ElementType template argument is a @p const qualified type,
+   * then the current constructor is a conversion constructor that converts a
+   * non- at p const view to a @p const view, akin to converting a non- at p const
+   * pointer to a @p const pointer.
+   */
+  ArrayView (const ArrayView<typename boost::remove_cv<value_type>::type> &view);
+
+  /**
+   * Return the size (in elements) of the view of memory this object
+   * represents.
+   */
+  std::size_t size() const;
+
+  /**
+   * Return a reference to the $i$th element of the range represented by the
+   * current object.
+   *
+   * This function is marked as @p const because it does not change the
+   * <em>view object</em>. It may however return a reference to a non- at p const
+   * memory location depending on whether the template type of the class is @p
+   * const or not.
+   */
+  value_type &operator[] (const std::size_t i) const;
+
+private:
+  /**
+   * A pointer to the first element of the range of locations in memory that
+   * this object represents.
+   */
+  value_type  *const starting_element;
+
+  /**
+   * The length of the array this object represents.
+   */
+  const std::size_t  n_elements;
+};
+
+
+
+//---------------------------------------------------------------------------
+
+
+template <typename ElementType>
+inline
+ArrayView<ElementType>::ArrayView(value_type        *starting_element,
+                                  const std::size_t  n_elements)
+  :
+  starting_element (starting_element),
+  n_elements(n_elements)
+{}
+
+
+
+template <typename ElementType>
+inline
+ArrayView<ElementType>::ArrayView(const ArrayView<typename boost::remove_cv<value_type>::type> &view)
+  :
+  starting_element (&view[0]),
+  n_elements(view.size())
+{}
+
+
+
+template <typename ElementType>
+inline
+std::size_t
+ArrayView<ElementType>::size() const
+{
+  return n_elements;
+}
+
+
+template <typename ElementType>
+inline
+typename ArrayView<ElementType>::value_type &
+ArrayView<ElementType>::operator[](const std::size_t i) const
+{
+  Assert (i<n_elements, ExcIndexRange(i, 0, n_elements));
+
+  return *(starting_element + i);
+}
+
+
+/**
+ * Create a view to an entire std::vector object. This is equivalent to
+ * initializing an ArrayView object with a pointer to the first element and
+ * the size of the given argument.
+ *
+ * This function is used for non- at p const references to objects of vector
+ * type. Such objects contain elements that can be written to. Consequently,
+ * the return type of this function is a view to a set of writable objects.
+ *
+ * @param[in] vector The vector for which we want to have an array view
+ * object. The array view corresponds to the <em>entire</em> vector.
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<ElementType>
+make_array_view (std::vector<ElementType> &vector)
+{
+  return ArrayView<ElementType> (&vector[0], vector.size());
+}
+
+
+
+/**
+ * Create a view to an entire std::vector object. This is equivalent to
+ * initializing an ArrayView object with a pointer to the first element and
+ * the size of the given argument.
+ *
+ * This function is used for @p const references to objects of vector type
+ * because they contain immutable elements. Consequently, the return type of
+ * this function is a view to a set of @p const objects.
+ *
+ * @param[in] vector The vector for which we want to have an array view
+ * object. The array view corresponds to the <em>entire</em> vector.
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<const ElementType>
+make_array_view (const std::vector<ElementType> &vector)
+{
+  return ArrayView<const ElementType> (&vector[0], vector.size());
+}
+
+
+
+/**
+ * Create a view to a part of a std::vector object. This is equivalent to
+ * initializing the ArrayView object with a pointer to the @p starting_index-
+ * th element and the @p size_of_view as the length of the view.
+ *
+ * This function is used for non- at p const references to objects of vector
+ * type. Such objects contain elements that can be written to. Consequently,
+ * the return type of this function is a view to a set of writable objects.
+ *
+ * @param[in] vector The vector for which we want to have an array view
+ * object.
+ * @param[in] starting_index The index of the first element of the vector that
+ * will be part of this view.
+ * @param[in] size_of_view
+ *
+ * @pre <code>starting_index + size_of_view <= vector.size()</code>
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<ElementType>
+make_array_view (std::vector<ElementType> &vector,
+                 const std::size_t         starting_index,
+                 const std::size_t         size_of_view)
+{
+  Assert (starting_index + size_of_view <= vector.size(),
+          ExcMessage ("The starting index and size of the view you want to "
+                      "create would lead to a view that extends beyond the end "
+                      "of the given vector."));
+  return ArrayView<ElementType> (&vector[starting_index], size_of_view);
+}
+
+
+
+/**
+ * Create a view to a part of a std::vector object. This is equivalent to
+ * initializing the ArrayView object with a pointer to the @p starting_index-
+ * th element and the @p size_of_view as the length of the view.
+ *
+ * This function is used for @p const references to objects of vector type
+ * because they contain immutable elements. Consequently, the return type of
+ * this function is a view to a set of @p const objects.
+ *
+ * @param[in] vector The vector for which we want to have an array view
+ * object.
+ * @param[in] starting_index The index of the first element of the vector that
+ * will be part of this view.
+ * @param[in] size_of_view
+ *
+ * @pre <code>starting_index + size_of_view <= vector.size()</code>
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<const ElementType>
+make_array_view (const std::vector<ElementType> &vector,
+                 const std::size_t         starting_index,
+                 const std::size_t         size_of_view)
+{
+  Assert (starting_index + size_of_view <= vector.size(),
+          ExcMessage ("The starting index and size of the view you want to "
+                      "create would lead to a view that extends beyond the end "
+                      "of the given vector."));
+  return ArrayView<const ElementType> (&vector[starting_index], size_of_view);
+}
+
+
+
+/**
+ * Create a view to an entire row of a Table<2> object. This is equivalent to
+ * initializing an ArrayView object with a pointer to the first element of the
+ * given row, and the length of the row as the length of the view.
+ *
+ * This function is used for non- at p const references to objects of Table type.
+ * Such objects contain elements that can be written to. Consequently, the
+ * return type of this function is a view to a set of writable objects.
+ *
+ * @param[in] table The Table for which we want to have an array view object.
+ * The array view corresponds to an <em>entire</em> row.
+ * @param[in] row The index of the row into the table to which this view
+ * should correspond.
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<ElementType>
+make_array_view (Table<2,ElementType>                           &table,
+                 const typename Table<2,ElementType>::size_type  row)
+{
+  AssertIndexRange (row, table.size()[0]);
+  return ArrayView<ElementType> (&table[row][0], table.size()[1]);
+}
+
+
+
+/**
+ * Create a view to an entire row of a Table<2> object. This is equivalent to
+ * initializing an ArrayView object with a pointer to the first element of the
+ * given row, and the length of the row as the length of the view.
+ *
+ * This function is used for @p const references to objects of Table type
+ * because they contain immutable elements. Consequently, the return type of
+ * this function is a view to a set of @p const objects.
+ *
+ * @param[in] table The Table for which we want to have an array view object.
+ * The array view corresponds to an <em>entire</em> row.
+ * @param[in] row The index of the row into the table to which this view
+ * should correspond.
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<const ElementType>
+make_array_view (const Table<2,ElementType>                     &table,
+                 const typename Table<2,ElementType>::size_type  row)
+{
+  AssertIndexRange (row, table.size()[0]);
+  return ArrayView<const ElementType> (&table[row][0], table.size()[1]);
+}
+
+
+
+/**
+ * Create a view to (a part of) a row of a Table<2> object.
+ *
+ * This function is used for non- at p const references to objects of Table type.
+ * Such objects contain elements that can be written to. Consequently, the
+ * return type of this function is a view to a set of writable objects.
+ *
+ * @param[in] table The Table for which we want to have an array view object.
+ * The array view corresponds to an <em>entire</em> row.
+ * @param[in] row The index of the row into the table to which this view
+ * should correspond.
+ * @param[in] starting_column The index of the column into the given row of
+ * the table that corresponds to the first element of this view.
+ * @param[in] size_of_view The number of elements this view should have. This
+ * corresponds to the number of columns in the current row to which the view
+ * should correspond.
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<ElementType>
+make_array_view (Table<2,ElementType>                           &table,
+                 const typename Table<2,ElementType>::size_type  row,
+                 const typename Table<2,ElementType>::size_type  starting_column,
+                 const std::size_t                               size_of_view)
+{
+  AssertIndexRange (row, table.size()[0]);
+  AssertIndexRange (starting_column, table.size()[1]);
+  Assert (starting_column + size_of_view <= table.size()[1],
+          ExcMessage ("The starting index and size of the view you want to "
+                      "create would lead to a view that extends beyond the end "
+                      "of a column of the given table."));
+  return ArrayView<ElementType> (&table[row][starting_column], size_of_view);
+}
+
+
+
+/**
+ * Create a view to (a part of) a row of a Table<2> object.
+ *
+ * This function is used for @p const references to objects of Table type
+ * because they contain immutable elements. Consequently, the return type of
+ * this function is a view to a set of @p const objects.
+ *
+ * @param[in] table The Table for which we want to have an array view object.
+ * The array view corresponds to an <em>entire</em> row.
+ * @param[in] row The index of the row into the table to which this view
+ * should correspond.
+ * @param[in] starting_column The index of the column into the given row of
+ * the table that corresponds to the first element of this view.
+ * @param[in] size_of_view The number of elements this view should have. This
+ * corresponds to the number of columns in the current row to which the view
+ * should correspond.
+ *
+ * @relates ArrayView
+ */
+template <typename ElementType>
+inline
+ArrayView<const ElementType>
+make_array_view (const Table<2,ElementType>                     &table,
+                 const typename Table<2,ElementType>::size_type  row,
+                 const typename Table<2,ElementType>::size_type  starting_column,
+                 const std::size_t                               size_of_view)
+{
+  AssertIndexRange (row, table.size()[0]);
+  AssertIndexRange (starting_column, table.size()[1]);
+  Assert (starting_column + size_of_view <= table.size()[1],
+          ExcMessage ("The starting index and size of the view you want to "
+                      "create would lead to a view that extends beyond the end "
+                      "of a column of the given table."));
+  return ArrayView<const ElementType> (&table[row][starting_column], size_of_view);
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/auto_derivative_function.h b/include/deal.II/base/auto_derivative_function.h
new file mode 100644
index 0000000..a8c3199
--- /dev/null
+++ b/include/deal.II/base/auto_derivative_function.h
@@ -0,0 +1,233 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__auto_derivative_function_h
+#define dealii__auto_derivative_function_h
+
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/function.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class automatically computes the gradient of a function by employing
+ * numerical difference quotients. This only, if the user function does not
+ * provide the gradient function himself.
+ *
+ * The following example of an user defined function overloads and implements
+ * only the value() function but not the gradient() function. If the
+ * gradient() function is invoked then the gradient function implemented by
+ * the AutoDerivativeFunction is called, where the latter function employs
+ * numerical difference quotients.
+ *
+ * @code
+ * class UserFunction: public AutoDerivativeFunction
+ * {               // access to one component at one point
+ *   double value (const Point<dim> &p, const
+ *                 unsigned int component = 0) const
+ *          { // Implementation ....  };
+ * } user_function;
+ *
+ *            // gradient by employing difference quotients.
+ * Tensor<1,dim> grad=user_function.gradient(some_point);
+ * @endcode
+ *
+ * If the user overloads and implements also the gradient function, then, of
+ * course, the users gradient function is called.
+ *
+ * Note, that the usage of the value() and gradient() functions explained
+ * above, also applies to the value_list() and gradient_list() functions as
+ * well as to the vector valued versions of these functions, see e.g.
+ * vector_value(), vector_gradient(), vector_value_list() and
+ * vector_gradient_list().
+ *
+ * The gradient() and gradient_list() functions make use of the
+ * Function::value() function. The vector_gradient() and
+ * vector_gradient_list() make use of the Function::vector_value() function.
+ * Make sure that the user defined function implements the value() function
+ * and the vector_value() function, respectively.
+ *
+ * Furthermore note, that an object of this class does <b>not</b> represent
+ * the derivative of a function, like FunctionDerivative, that gives a
+ * directional derivative by calling the value() function. In fact, this class
+ * (the AutoDerivativeFunction class) can substitute the Function class as
+ * base class for user defined classes. This class implements the gradient()
+ * functions for automatic computation of numerical difference quotients and
+ * serves as intermediate class between the base Function class and the user
+ * defined function class.
+ *
+ * @ingroup functions
+ * @author Ralf Hartmann, 2001
+ */
+template <int dim>
+class AutoDerivativeFunction : public Function<dim>
+{
+public:
+
+  /**
+   * Names of difference formulas.
+   */
+  enum DifferenceFormula
+  {
+    /**
+     * The symmetric Euler formula of second order:
+     * @f[
+     * u'(t) \approx
+     * \frac{u(t+h) -
+     * u(t-h)}{2h}.
+     * @f]
+     */
+    Euler,
+    /**
+     * The upwind Euler formula of first order:
+     * @f[
+     * u'(t) \approx
+     * \frac{u(t) -
+     * u(t-h)}{h}.
+     * @f]
+     */
+    UpwindEuler,
+    /**
+     * The fourth order scheme
+     * @f[
+     * u'(t) \approx
+     * \frac{u(t-2h) - 8u(t-h)
+     * +  8u(t+h) - u(t+2h)}{12h}.
+     * @f]
+     */
+    FourthOrder
+  };
+
+  /**
+   * Constructor. Takes the difference step size <tt>h</tt>. It's within the
+   * user's responsibility to choose an appropriate value here. <tt>h</tt>
+   * should be chosen taking into account the absolute value as well as the
+   * amount of local variation of the function. Setting <tt>h=1e-6</tt> might
+   * be a good choice for functions with an absolute value of about 1, that
+   * furthermore does not vary to much.
+   *
+   * <tt>h</tt> can be changed later using the set_h() function.
+   *
+   * Sets DifferenceFormula <tt>formula</tt> to the default <tt>Euler</tt>
+   * formula of the set_formula() function. Change this preset formula by
+   * calling the set_formula() function.
+   */
+  AutoDerivativeFunction (const double h,
+                          const unsigned int n_components = 1,
+                          const double       initial_time = 0.0);
+
+  /**
+   * Virtual destructor; absolutely necessary in this case.
+   */
+  virtual ~AutoDerivativeFunction ();
+
+  /**
+   * Choose the difference formula. See the enum #DifferenceFormula for
+   * available choices.
+   */
+  void set_formula (const DifferenceFormula formula = Euler);
+
+  /**
+   * Takes the difference step size <tt>h</tt>. It's within the user's
+   * responsibility to choose an appropriate value here. <tt>h</tt> should be
+   * chosen taking into account the absolute value of as well as the amount of
+   * local variation of the function. Setting <tt>h=1e-6</tt> might be a good
+   * choice for functions with an absolute value of about 1, that furthermore
+   * does not vary to much.
+   */
+  void set_h (const double h);
+
+  /**
+   * Return the gradient of the specified component of the function at the
+   * given point.
+   *
+   * Computes numerical difference quotients using the preset
+   * #DifferenceFormula.
+   */
+  virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                  const unsigned int  component = 0) const;
+
+  /**
+   * Return the gradient of all components of the function at the given point.
+   *
+   * Computes numerical difference quotients using the preset
+   * #DifferenceFormula.
+   */
+  virtual void vector_gradient (const Point<dim>            &p,
+                                std::vector<Tensor<1,dim> > &gradients) const;
+
+  /**
+   * Set <tt>gradients</tt> to the gradients of the specified component of the
+   * function at the <tt>points</tt>.  It is assumed that <tt>gradients</tt>
+   * already has the right size, i.e.  the same size as the <tt>points</tt>
+   * array.
+   *
+   * Computes numerical difference quotients using the preset
+   * #DifferenceFormula.
+   */
+  virtual void gradient_list (const std::vector<Point<dim> > &points,
+                              std::vector<Tensor<1,dim> >    &gradients,
+                              const unsigned int              component = 0) const;
+
+  /**
+   * Set <tt>gradients</tt> to the gradients of the function at the
+   * <tt>points</tt>, for all components. It is assumed that
+   * <tt>gradients</tt> already has the right size, i.e. the same size as the
+   * <tt>points</tt> array.
+   *
+   * The outer loop over <tt>gradients</tt> is over the points in the list,
+   * the inner loop over the different components of the function.
+   *
+   * Computes numerical difference quotients using the preset
+   * #DifferenceFormula.
+   */
+  virtual void vector_gradient_list (const std::vector<Point<dim> > &points,
+                                     std::vector<std::vector<Tensor<1,dim> > > &gradients) const;
+
+  /**
+   * Returns a #DifferenceFormula of the order <tt>ord</tt> at minimum.
+   */
+  static
+  DifferenceFormula
+  get_formula_of_order (const unsigned int ord);
+
+  /**
+   * Exception.
+   */
+  DeclException0(ExcInvalidFormula);
+
+private:
+
+  /**
+   * Step size of the difference formula. Set by the set_h() function.
+   */
+  double h;
+
+  /**
+   * Includes the unit vectors scaled by <tt>h</tt>.
+   */
+  std::vector<Tensor<1,dim> > ht;
+
+  /**
+   * Difference formula. Set by the set_formula() function.
+   */
+  DifferenceFormula formula;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/complex_overloads.h b/include/deal.II/base/complex_overloads.h
new file mode 100644
index 0000000..934bd25
--- /dev/null
+++ b/include/deal.II/base/complex_overloads.h
@@ -0,0 +1,84 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__complex_overloads_h
+#define dealii__complex_overloads_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/template_constraints.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// Forward declarations
+template <typename T> struct EnableIfScalar;
+template <typename T, typename U> struct ProductType;
+
+#ifndef DEAL_II_HAVE_COMPLEX_OPERATOR_OVERLOADS
+/**
+ * Provide an <tt>operator*</tt> that operates on mixed complex floating point
+ * types. Annoyingly, the standard library does not provide such an
+ * operator...
+ *
+ * @relates ProductType
+ */
+template <typename T, typename U>
+typename ProductType<std::complex<T>, std::complex<U> >::type
+inline
+operator*(const std::complex<T> &left, const std::complex<U> &right)
+{
+  typedef typename ProductType<std::complex<T>, std::complex<U> >::type result_type;
+  return static_cast<result_type>(left) * static_cast<result_type>(right);
+}
+
+
+/**
+ * Provide an <tt>operator*</tt> for a scalar multiplication of a complex
+ * floating point type with a different real floating point type. Annoyingly,
+ * the standard library does not provide such an operator...
+ *
+ * @relates EnableIfScalar
+ * @relates ProductType
+ */
+template <typename T, typename U>
+typename ProductType<std::complex<T>, typename EnableIfScalar<U>::type>::type
+inline
+operator*(const std::complex<T> &left, const U &right)
+{
+  typedef typename ProductType<std::complex<T>, U>::type result_type;
+  return static_cast<result_type>(left) * static_cast<result_type>(right);
+}
+
+
+/**
+ * Provide an <tt>operator*</tt> for a scalar multiplication of a real
+ * floating point type with a different complex floating point type.
+ * Annoyingly, the standard library does not provide such an operator...
+ *
+ * @relates EnableIfScalar
+ * @relates ProductType
+ */
+template <typename T, typename U>
+typename ProductType<typename EnableIfScalar<T>::type, std::complex<U> >::type
+inline
+operator*(const T &left, const std::complex<U> &right)
+{
+  typedef typename ProductType<std::complex<T>, U>::type result_type;
+  return static_cast<result_type>(left) * static_cast<result_type>(right);
+}
+#endif /* DEAL_II_HAVE_COMPLEX_OPERATOR_OVERLOADS */
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/conditional_ostream.h b/include/deal.II/base/conditional_ostream.h
new file mode 100644
index 0000000..b854f66
--- /dev/null
+++ b/include/deal.II/base/conditional_ostream.h
@@ -0,0 +1,180 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__conditional_ostream_h
+#define dealii__conditional_ostream_h
+
+#include <deal.II/base/config.h>
+
+#include <ostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A class that allows printing to an output stream, e.g. @p std::cout,
+ * depending on the ConditionalOStream object being active (default) or not.
+ * The condition of this object can be changed by set_condition() and in the
+ * constructor. This class is used in the step-17, step-18, step-32, step-33,
+ * and step-35 tutorial programs.
+ *
+ * This class is mostly useful in parallel computations. Ordinarily, you would
+ * use @p std::cout to print messages like what the program is presently
+ * doing, or the number of degrees of freedom in each step. However, in
+ * parallel programs, this means that each of the MPI processes write to the
+ * screen, which yields many repetitions of the same text. To avoid it, one
+ * would have to have a designated process, say the one with MPI process
+ * number zero, do the output, and guard each write statement with an if-
+ * condition. This becomes cumbersome and clutters up the code. Rather than
+ * doing so, the present class can be used: objects of its type act just like
+ * a standard output stream, but they only print something based on a
+ * condition that can be set to, for example, <tt>mpi_process==0</tt>, so that
+ * only one process has a true condition and in all other processes writes to
+ * this object just disappear in nirvana.
+ *
+ * The usual usage of this class is as follows:
+ *
+ * @code
+ * ConditionalOStream pout(std::cout, this_mpi_process==0);
+ *
+ *                                  // all processes print following
+ *                                  // information to standard output
+ * std::cout << "Reading parameter file on process "
+ *           << this_mpi_process << std::endl;
+ *
+ *                                  // following is printed by
+ *                                  // process 0 only
+ * pout << "Solving ..." << std::endl;
+ * solve();
+ * pout << "done" << std::endl;
+ * @endcode
+ *
+ * Here, `Reading parameter file on process xy' is printed by each process
+ * separately. In contrast to that, `Solving ...' and `done' is printed to
+ * standard output only once, namely by process 0.
+ *
+ * This class is not derived from ostream. Therefore
+ * @code
+ * system_matrix.print_formatted(pout);
+ * @endcode
+ * is <em>not</em> possible. Instead use the is_active() function for a work-
+ * around:
+ *
+ * @code
+ * if (pout.is_active())
+ *   system_matrix.print_formatted(cout);
+ * @endcode
+ *
+ * @ingroup textoutput
+ * @author Ralf Hartmann, Wolfgang Bangerth, 2004
+ */
+class ConditionalOStream
+{
+public:
+  /**
+   * Constructor. Set the stream to which we want to write, and the condition
+   * based on which writes are actually forwarded. Per default the condition
+   * of an object is active.
+   */
+  ConditionalOStream (std::ostream &stream,
+                      const bool    active = true);
+
+  /**
+   * Depending on the <tt>active</tt> flag set the condition of this stream to
+   * active (true) or non-active (false). An object of this class prints to
+   * <tt>cout</tt> if and only if its condition is active.
+   */
+  void set_condition (const bool active);
+
+  /**
+   * Return the condition of the object.
+   */
+  bool is_active() const;
+
+  /**
+   * Return a reference to the stream currently in use.
+   */
+  std::ostream &get_stream () const;
+
+  /**
+   * Output a constant something through this stream. This function must be @p
+   * const so that member objects of this type can also be used from @p const
+   * member functions of the surrounding class.
+   */
+  template <typename T>
+  const ConditionalOStream &
+  operator << (const T &t) const;
+
+  /**
+   * Treat ostream manipulators. This function must be @p const so that member
+   * objects of this type can also be used from @p const member functions of
+   * the surrounding class.
+   *
+   * Note that compilers want to see this treated differently from the general
+   * template above since functions like @p std::endl are actually overloaded
+   * and can't be bound directly to a template type.
+   */
+  const ConditionalOStream &
+  operator<< (std::ostream& (*p) (std::ostream &)) const;
+
+private:
+  /**
+   * Reference to the stream we want to write to.
+   */
+  std::ostream  &output_stream;
+
+  /**
+   * Stores the actual condition the object is in.
+   */
+  bool active_flag;
+};
+
+
+// --------------------------- inline and template functions -----------
+
+template <class T>
+inline
+const ConditionalOStream &
+ConditionalOStream::operator<< (const T &t) const
+{
+  if (active_flag == true)
+    output_stream << t;
+
+  return *this;
+}
+
+
+inline
+const ConditionalOStream &
+ConditionalOStream::operator<< (std::ostream& (*p) (std::ostream &)) const
+{
+  if (active_flag == true)
+    output_stream << p;
+
+  return *this;
+}
+
+
+inline
+std::ostream &
+ConditionalOStream::get_stream () const
+{
+  return output_stream;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/config.h.in b/include/deal.II/base/config.h.in
new file mode 100644
index 0000000..d6c5f03
--- /dev/null
+++ b/include/deal.II/base/config.h.in
@@ -0,0 +1,334 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__config_h
+#define dealii__config_h
+
+
+/***********************************************************************
+ * Information about deal.II:
+ */
+
+#define DEAL_II_PACKAGE_NAME "@DEAL_II_PACKAGE_NAME@"
+
+#define DEAL_II_PACKAGE_VERSION "@DEAL_II_PACKAGE_VERSION@"
+
+#define DEAL_II_VERSION_MAJOR @DEAL_II_VERSION_MAJOR@
+#define DEAL_II_VERSION_MINOR @DEAL_II_VERSION_MINOR@
+#define DEAL_II_VERSION_SUBMINOR @DEAL_II_VERSION_SUBMINOR@
+
+
+/***********************************************************************
+ * Configured deal.II features:
+ */
+
+#cmakedefine DEAL_II_WITH_64BIT_INDICES
+#cmakedefine DEAL_II_WITH_ARPACK
+#cmakedefine DEAL_II_WITH_BZIP2
+#cmakedefine DEAL_II_WITH_CXX11
+#cmakedefine DEAL_II_WITH_CXX14
+#cmakedefine DEAL_II_WITH_HDF5
+#cmakedefine DEAL_II_WITH_LAPACK
+#cmakedefine DEAL_II_WITH_METIS
+#cmakedefine DEAL_II_WITH_MPI
+#cmakedefine DEAL_II_WITH_MUPARSER
+#cmakedefine DEAL_II_WITH_NETCDF
+#cmakedefine DEAL_II_WITH_OPENCASCADE
+#cmakedefine DEAL_II_WITH_P4EST
+#cmakedefine DEAL_II_WITH_PETSC
+#cmakedefine DEAL_II_WITH_SLEPC
+#cmakedefine DEAL_II_WITH_THREADS
+#cmakedefine DEAL_II_WITH_TRILINOS
+#cmakedefine DEAL_II_WITH_UMFPACK
+#cmakedefine DEAL_II_WITH_ZLIB
+
+
+/***********************************************************************
+ * Compiler bugs:
+ *
+ * For documentation see cmake/checks/check_03_compiler_bugs.cmake
+ */
+
+#cmakedefine DEAL_II_TEMPL_SPEC_FRIEND_BUG
+#cmakedefine DEAL_II_MEMBER_ARRAY_SPECIALIZATION_BUG
+#cmakedefine DEAL_II_MEMBER_VAR_SPECIALIZATION_BUG
+#cmakedefine DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+#cmakedefine DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+#cmakedefine DEAL_II_BOOST_BIND_COMPILER_BUG
+#cmakedefine DEAL_II_BIND_NO_CONST_OP_PARENTHESES
+#cmakedefine DEAL_II_CONSTEXPR_BUG
+#cmakedefine DEAL_II_ICC_SFINAE_BUG
+
+
+/***********************************************************************
+ * Compiler features:
+ *
+ * For documentation see cmake/checks/check_01_compiler_features.cmake
+ */
+
+#cmakedefine DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+#cmakedefine DEAL_II_VECTOR_ITERATOR_IS_POINTER
+#cmakedefine DEAL_II_HAVE_BUILTIN_EXPECT
+#cmakedefine DEAL_II_HAVE_VERBOSE_TERMINATE
+#cmakedefine DEAL_II_HAVE_GLIBC_STACKTRACE
+#cmakedefine DEAL_II_HAVE_LIBSTDCXX_DEMANGLER
+#cmakedefine __PRETTY_FUNCTION__ @__PRETTY_FUNCTION__@
+#cmakedefine DEAL_II_DEPRECATED @DEAL_II_DEPRECATED@
+#cmakedefine DEAL_II_ALWAYS_INLINE @DEAL_II_ALWAYS_INLINE@
+#cmakedefine DEAL_II_COMPILER_HAS_DIAGNOSTIC_PRAGMA
+
+
+/***********************************************************************
+ * CPU features:
+ *
+ * For documentation see cmake/checks/check_01_cpu_features.cmake
+ */
+
+#cmakedefine DEAL_II_WORDS_BIGENDIAN
+#define DEAL_II_COMPILER_VECTORIZATION_LEVEL @DEAL_II_COMPILER_VECTORIZATION_LEVEL@
+#define DEAL_II_OPENMP_SIMD_PRAGMA @DEAL_II_OPENMP_SIMD_PRAGMA@
+
+
+/***********************************************************************
+ * Language features:
+ *
+ * For documentation see cmake/checks/check_01_cxx_features.cmake
+ */
+
+#cmakedefine DEAL_II_HAVE_CXX11_IS_TRIVIALLY_COPYABLE
+#cmakedefine DEAL_II_HAVE_ISNAN
+#cmakedefine DEAL_II_HAVE_STD_ISNAN
+#cmakedefine DEAL_II_HAVE_UNDERSCORE_ISNAN
+#cmakedefine DEAL_II_HAVE_ISFINITE
+#cmakedefine DEAL_II_HAVE_FP_EXCEPTIONS
+#cmakedefine DEAL_II_HAVE_COMPLEX_OPERATOR_OVERLOADS
+
+
+/***********************************************************************
+ * System features:
+ *
+ * For documentation see cmake/checks/check_02_system_features.cmake
+ */
+
+#cmakedefine DEAL_II_HAVE_SYS_RESOURCE_H
+#cmakedefine DEAL_II_HAVE_SYS_TIME_H
+#cmakedefine DEAL_II_HAVE_SYS_TIMES_H
+#cmakedefine DEAL_II_HAVE_SYS_TYPES_H
+#cmakedefine DEAL_II_HAVE_UNISTD_H
+#cmakedefine DEAL_II_HAVE_GETHOSTNAME
+#cmakedefine DEAL_II_HAVE_GETPID
+#cmakedefine DEAL_II_HAVE_TIMES
+#cmakedefine DEAL_II_HAVE_JN
+
+#cmakedefine DEAL_II_MSVC
+
+
+/***********************************************************************
+ * Feature configuration
+ *
+ * For documentation see cmake/configure/configure_*.cmake and
+ * cmake/modules/Find*.cmake
+ */
+
+/* cmake/modules/FindARPACK.cmake */
+#cmakedefine DEAL_II_ARPACK_WITH_PARPACK
+
+/* cmake/configure/configure_1_threads.cmake */
+#cmakedefine DEAL_II_USE_MT_POSIX
+#cmakedefine DEAL_II_USE_MT_POSIX_NO_BARRIERS
+
+/*
+ * Depending on the use of threads, we will have to make some variables
+ * volatile. We do this here in a very old-fashioned C-style, but still
+ * convenient way.
+ */
+#ifdef DEAL_II_WITH_THREADS
+#  define DEAL_VOLATILE volatile
+#else
+#  define DEAL_VOLATILE
+#endif
+
+
+/***********************************************************************
+ * Various macros for version number query and comparison:
+ *
+ * These macros are defined to make testing for specific versions within
+ * the deal.II main code as simple as possible.
+ */
+
+/*
+ * deal.II:
+ */
+
+#define DEAL_II_VERSION_GTE(major,minor,subminor) \
+ ((DEAL_II_VERSION_MAJOR * 10000 + \
+    DEAL_II_VERSION_MINOR * 100 + \
+     DEAL_II_VERSION_SUBMINOR) \
+    >=  \
+    (major)*10000 + (minor)*100 + (subminor))
+
+/*
+ * p4est:
+ */
+
+#ifdef DEAL_II_WITH_P4EST
+#  define DEAL_II_P4EST_VERSION_MAJOR @P4EST_VERSION_MAJOR@
+#  define DEAL_II_P4EST_VERSION_MINOR @P4EST_VERSION_MINOR@
+#  define DEAL_II_P4EST_VERSION_SUBMINOR @P4EST_VERSION_SUBMINOR@
+#  define DEAL_II_P4EST_VERSION_PATCH @P4EST_VERSION_PATCH@
+
+#  define DEAL_II_P4EST_VERSION_GTE(major,minor,subminor,patch) \
+ ((DEAL_II_P4EST_VERSION_MAJOR * 1000000 + \
+    DEAL_II_P4EST_VERSION_MINOR * 10000 + \
+     DEAL_II_P4EST_VERSION_SUBMINOR * 100 + \
+      DEAL_II_P4EST_VERSION_PATCH) \
+    >=  \
+    (major)*1000000 + (minor)*10000 + (subminor)*100 + (patch))
+#else
+  // p4est up to 0.3.4.1 didn't define P4EST_VERSION_*. since
+  // we didn't supports anything before 0.3.4, we assume 0.3.4
+  // This means that we can't use the new features in 0.3.4.1
+#  define DEAL_II_P4EST_VERSION_GTE(major,minor,subminor,patch) \
+  ((0 * 1000000 + \
+    3 * 10000 + \
+    4 * 100 + \
+    0) \
+    >=  \
+    (major)*1000000 + (minor)*10000 + (subminor)*100 + (patch))
+#endif
+
+/*
+ * PETSc:
+ *
+ * Note: The following definitions will be set in petscconf.h and
+ *       petscversion.h, so we don't repeat them here.
+ *
+ *  PETSC_VERSION_MAJOR
+ *  PETSC_VERSION_MINOR
+ *  PETSC_VERSION_SUBMINOR
+ *  PETSC_VERSION_PATCH
+ *  PETSC_VERSION_RELEASE
+ *  PETSC_USE_COMPLEX
+ */
+
+#define DEAL_II_PETSC_VERSION_LT(major,minor,subminor) \
+  ((PETSC_VERSION_MAJOR * 10000 + \
+    PETSC_VERSION_MINOR * 100 + \
+    PETSC_VERSION_SUBMINOR) \
+    <  \
+    (major)*10000 + (minor)*100 + (subminor))
+
+#define DEAL_II_PETSC_VERSION_GTE(major,minor,subminor) \
+  ((PETSC_VERSION_MAJOR * 10000 + \
+    PETSC_VERSION_MINOR * 100 + \
+    PETSC_VERSION_SUBMINOR) \
+    >=  \
+    (major)*10000 + (minor)*100 + (subminor))
+
+/*
+ * Trilinos:
+ */
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  define DEAL_II_TRILINOS_VERSION_MAJOR @TRILINOS_VERSION_MAJOR@
+#  define DEAL_II_TRILINOS_VERSION_MINOR @TRILINOS_VERSION_MINOR@
+#  define DEAL_II_TRILINOS_VERSION_SUBMINOR @TRILINOS_VERSION_SUBMINOR@
+
+#  define DEAL_II_TRILINOS_VERSION_GTE(major,minor,subminor) \
+ ((DEAL_II_TRILINOS_VERSION_MAJOR * 10000 + \
+    DEAL_II_TRILINOS_VERSION_MINOR * 100 + \
+      DEAL_II_TRILINOS_VERSION_SUBMINOR) \
+    >=  \
+    (major)*10000 + (minor)*100 + (subminor))
+#endif
+
+
+/***********************************************************************
+ * Two macro names that we put at the top and bottom of all deal.II files
+ * and that will be expanded to "namespace dealii {" and "}".
+ */
+
+#define DEAL_II_NAMESPACE_OPEN namespace dealii {
+#define DEAL_II_NAMESPACE_CLOSE }
+
+
+/***********************************************************************
+ * Two macros to guard external header includes.
+ *
+ * Selectively disable diagnostics set by "-Wextra" (and similar flags) for
+ * GCC and compiler accepting GCC dialects (such as clang).
+ * "diagnostic push" is supported since gcc-4.6 and clang-3.3.
+ */
+
+#ifdef DEAL_II_COMPILER_HAS_DIAGNOSTIC_PRAGMA
+
+#  define DEAL_II_DISABLE_EXTRA_DIAGNOSTICS                      \
+_Pragma("GCC diagnostic push")                                   \
+_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"")          \
+_Pragma("GCC diagnostic ignored \"-Wpragmas\"")                  \
+_Pragma("GCC diagnostic ignored \"-Wextra\"")                    \
+_Pragma("GCC diagnostic ignored \"-Woverloaded-virtual\"")       \
+_Pragma("GCC diagnostic ignored \"-Wunused-function\"")          \
+_Pragma("GCC diagnostic ignored \"-Wunused-parameter\"")         \
+_Pragma("GCC diagnostic ignored \"-Wunused-variable\"")          \
+_Pragma("GCC diagnostic ignored \"-Wtype-limits\"")              \
+_Pragma("GCC diagnostic ignored \"-Wtautological-constant-out-of-range-compare\"") \
+_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") \
+_Pragma("GCC diagnostic ignored \"-Winfinite-recursion\"")       \
+_Pragma("GCC diagnostic ignored \"-Wunused-but-set-parameter\"") \
+_Pragma("GCC diagnostic ignored \"-Wnested-anon-types\"")        \
+_Pragma("GCC diagnostic ignored \"-Wunused-private-field\"")     \
+_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")  \
+_Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"")  \
+_Pragma("GCC diagnostic warning \"-Wpragmas\"")
+
+#  define DEAL_II_ENABLE_EXTRA_DIAGNOSTICS                       \
+_Pragma("GCC diagnostic pop")
+
+#else
+
+#  define DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  define DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#endif
+
+
+/**
+ * BOOST can falsely detect cxx11 support and will try to use
+ * variadic templates even when we disable cxx11. This would create
+ * a ton of warnings, so we tell boost to not use them in this case.
+ */
+#ifndef DEAL_II_WITH_CXX11
+#define BOOST_NO_CXX11_VARIADIC_TEMPLATES
+#endif
+
+
+/***********************************************************************
+ * Final inclusions:
+ */
+
+/*
+ * Some systems require including mpi.h before stdio.h which happens in
+ * types.h
+ */
+#if defined(DEAL_II_WITH_MPI) || defined(DEAL_II_WITH_PETSC)
+#  include <mpi.h>
+#endif
+
+#include <deal.II/base/numbers.h>
+#include <deal.II/base/types.h>
+
+#endif
+
diff --git a/include/deal.II/base/convergence_table.h b/include/deal.II/base/convergence_table.h
new file mode 100644
index 0000000..fbb041c
--- /dev/null
+++ b/include/deal.II/base/convergence_table.h
@@ -0,0 +1,225 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__convergence_table_h
+#define dealii__convergence_table_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table_handler.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * The ConvergenceTable class is an application to the TableHandler class and
+ * stores some convergence data, such as residuals of the cg-method, or some
+ * evaluated <i>L<sup>2</sup></i>-errors of discrete solutions, etc, and
+ * evaluates convergence rates or orders.
+ *
+ * The already implemented #RateMode's are #reduction_rate, where the
+ * convergence rate is the quotient of two following rows, and
+ * #reduction_rate_log2, that evaluates the order of convergence. These
+ * standard evaluations are useful for global refinement, for local refinement
+ * this may not be an appropriate method, as the convergence rates should be
+ * set in relation to the number of cells or the number of DoFs. The
+ * implementations of these non-standard methods is left to a user.
+ *
+ * The number of cells and the number of DoFs may be added to the table by
+ * calling e.g.  <tt>add_value("n cells", n_cells)</tt>. The table data is
+ * also added by calling add_value().  Before the output of the table the
+ * functions evaluate_convergence_rates() and evaluate_all_convergence_rates()
+ * may be called.
+ *
+ * There are two possibilities of how to evaluate the convergence rates of
+ * multiple columns in the same RateMode.
+ * <ol>
+ * <li> call evaluate_convergence_rates() for all wanted columns
+ * <li> call omit_column_from_convergence_rate_evaluation() for all columns
+ * for which this evaluation is not desired and then
+ * evaluate_all_convergence_rates() to evaluate the convergence rates of all
+ * columns that have not been flagged for omission.
+ * </ol>
+ *
+ * A detailed discussion of this class can also be found in the step-7 and
+ * step-13 example programs.
+ *
+ * @ingroup textoutput
+ * @author Ralf Hartmann, 1999
+ */
+class ConvergenceTable: public TableHandler
+{
+public:
+  /**
+   * Constructor.
+   */
+  ConvergenceTable();
+
+  /**
+   * Rate in relation to the rows.
+   */
+  enum RateMode
+  {
+    /**
+     * Do not do anything.
+     */
+    none,
+    /**
+     * Quotient of values in the previous row and in this row.
+     */
+    reduction_rate,
+    /**
+     * Logarithm of #reduction_rate to the base 2 representing the order of
+     * convergence when halving the grid size, e.g. from h to h/2.
+     */
+    reduction_rate_log2
+  };
+
+  /**
+   * Evaluates the convergence rates of the data column
+   * <tt>data_column_key</tt> due to the #RateMode in relation to the
+   * reference column <tt>reference_column_key</tt>. Be sure that the value
+   * types of the table entries of the data column and the reference data
+   * column is a number, i.e. double, float, (unsigned) int, and so on.
+   *
+   * As this class has no information on the space dimension upon which the
+   * reference column vs. the value column is based upon, it needs to be
+   * passed as last argument to this method. The <i>default dimension for the
+   * reference column</i> is 2, which is appropriate for the number of cells
+   * in 2D. If you work in 3D, set the number to 3. If the reference column is
+   * $1/h$, remember to set the dimension to 1 also when working in 3D to get
+   * correct rates.
+   *
+   * The new rate column and the data column will be merged to a supercolumn.
+   * The tex caption of the supercolumn will be (by default) the same as the
+   * one of the data column. This may be changed by using the
+   * <tt>set_tex_supercaption (...)</tt> function of the base class
+   * TableHandler.
+   *
+   * This method behaves in the following way:
+   *
+   * If RateMode is reduction_rate, then the computed output is $
+   * \frac{e_{n-1}/k_{n-1}}{e_n/k_n}, $ where $k$ is the reference column (no
+   * dimension dependence!).
+   *
+   * If RateMode is reduction_rate_log2, then the computed output is $ dim
+   * \frac{\log |e_{n-1}/e_{n}|}{\log |k_n/k_{n-1}|} $.
+   *
+   * This is useful, for example, if we use as reference key the number of
+   * degrees of freedom or better, the number of cells.  Assuming that the
+   * error is proportional to $ C (1/\sqrt{k})^r $ in 2D, then this method
+   * will produce the rate $r$ as a result. For general dimension, as
+   * described by the last parameter of this function, the formula needs to be
+   * $ C (1/\sqrt[dim]{k})^r $.
+   *
+   * @note Since this function adds columns to the table after several rows
+   * have already been filled, it switches off the auto fill mode of the
+   * TableHandler base class. If you intend to add further data with auto
+   * fill, you will have to re-enable it after calling this function.
+   */
+  void
+  evaluate_convergence_rates (const std::string &data_column_key,
+                              const std::string &reference_column_key,
+                              const RateMode     rate_mode,
+                              const unsigned int dim = 2);
+
+
+  /**
+   * Evaluates the convergence rates of the data column
+   * <tt>data_column_key</tt> due to the #RateMode.  Be sure that the value
+   * types of the table entries of the data column is a number, i.e. double,
+   * float, (unsigned) int, and so on.
+   *
+   * The new rate column and the data column will be merged to a supercolumn.
+   * The tex caption of the supercolumn will be (by default) the same as the
+   * one of the data column. This may be changed by using the
+   * set_tex_supercaption() function of the base class TableHandler.
+   *
+   * @note Since this function adds columns to the table after several rows
+   * have already been filled, it switches off the auto fill mode of the
+   * TableHandler base class. If you intend to add further data with auto
+   * fill, you will have to re-enable it after calling this function.
+   */
+  void
+  evaluate_convergence_rates (const std::string &data_column_key,
+                              const RateMode     rate_mode);
+
+  /**
+   * Omit this column <tt>key</tt> (not supercolumn!) from the evaluation of
+   * the convergence rates of `all' columns (see the following two functions).
+   *
+   * The Column::flag==1 is reserved for omitting the column from convergence
+   * rate evaluation.
+   */
+  void
+  omit_column_from_convergence_rate_evaluation(const std::string &key);
+
+  /**
+   * Evaluates convergence rates due to the <tt>rate_mode</tt> in relation to
+   * the reference column <tt>reference_column_key</tt>. This function
+   * evaluates the rates of ALL columns except of the columns that are to be
+   * omitted (see previous function) and except of the columns that are
+   * previously evaluated rate columns.  This function allows to evaluate the
+   * convergence rate for almost all columns of a table without calling
+   * evaluate_convergence_rates() for each column separately.
+   *
+   * Example: Columns like <tt>n cells</tt> or <tt>n dofs</tt> columns may be
+   * wanted to be omitted in the evaluation of the convergence rates. Hence
+   * they should omitted by calling the
+   * omit_column_from_convergence_rate_evaluation().
+   */
+  void
+  evaluate_all_convergence_rates(const std::string &reference_column_key,
+                                 const RateMode     rate_mode);
+
+  /**
+   * Evaluates convergence rates due to the <tt>rate_mode</tt>. This function
+   * evaluates the rates of ALL columns except of the columns that are to be
+   * omitted (see previous function) and except of the columns that are
+   * previously evaluated rate columns.  This function allows to evaluate the
+   * convergence rate for almost all columns of a table without calling
+   * evaluate_convergence_rates() for each column separately.
+   *
+   * Example: Columns like <tt>n cells</tt> or <tt>n dofs</tt> columns may be
+   * wanted to be omitted in the evaluation of the convergence rates. Hence
+   * they should omitted by calling the
+   * omit_column_from_convergence_rate_evaluation().
+   */
+  void
+  evaluate_all_convergence_rates(const RateMode rate_mode);
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcWrongValueType);
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcRateColumnAlreadyExists,
+                  std::string,
+                  << "Rate column <" << arg1 << "> does already exist.");
+  //@}
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/data_out_base.h b/include/deal.II/base/data_out_base.h
new file mode 100644
index 0000000..1c1eea7
--- /dev/null
+++ b/include/deal.II/base/data_out_base.h
@@ -0,0 +1,2756 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_out_base_h
+#define dealii__data_out_base_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/std_cxx11/tuple.h>
+
+#include <vector>
+#include <string>
+#include <limits>
+#include <typeinfo>
+
+#include <deal.II/base/mpi.h>
+
+// Only include the Tecplot API header if the appropriate files
+// were detected by configure
+#ifdef DEAL_II_HAVE_TECPLOT
+#  include "TECIO.h"
+#  include <string.h>
+#endif
+
+#include <ostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+class ParameterHandler;
+class XDMFEntry;
+
+/**
+ * This is a base class for output of data on meshes of very general form.
+ * Output data is expected as a set of <tt>patches</tt> and written to the
+ * output stream in the format expected by the visualization tool. For a list
+ * of output formats, check the enumeration #OutputFormat. For each format
+ * listed there, this class contains a function <tt>write_format</tt>, writing
+ * the output. Refer to the documentation of those functions for details on a
+ * certain format.
+ *
+ * <h3>Structure of the output data</h3>
+ *
+ * Data is not written with the deal.II mesh structure. Instead, it relies on
+ * a set of <tt>patches</tt> created by a derived class (for example the
+ * DataOut, DataOutStack, DataOutFaces, DataOutRotation, or MatrixOut
+ * classes).  Each Patch describes a single logical cell of a mesh, possibly
+ * subdivided a number of times to represent higher order polynomials defined
+ * on this cell. To this end, a patch consists of a <tt>dim</tt>-dimensional
+ * regular grid with the same number of grid points in each direction. In the
+ * simplest case it may consist of the corner points of a single mesh cell.
+ * For each point of this local grid, the Patch contains an arbitrary number
+ * of data values, though the number of data sets must be the same for each
+ * point on each patch.
+ *
+ * By offering this interface to the different output formats, it is simple to
+ * extend this class to new formats without depending on such things as actual
+ * triangulations and handling of data vectors. These things shall be provided
+ * by derived class which have a user callable interface then.
+ *
+ * Inside each patch, the data is organized in the usual lexicographical
+ * order, <i>x</i> running fastest, then <i>y</i> and <i>z</i>. Nodes are
+ * stored in this order and cells as well. Each cell in 3D is stored such that
+ * the front face is in the <i>xz</i>-plane. In order to enhance
+ * intelligibility of this concept, the following two sections are kept from a
+ * previous version of this documentation.
+ *
+ *
+ * <h4>Patches</h4>
+ *
+ * Grids can be thought of as a collection of cells; if you want to write out
+ * data on such a grid, you can do so by writing them one cell at a time. The
+ * functions in this class therefore take a list of objects describing the
+ * data on one cell each. This data for each cell usually consists of a list
+ * of vertices for this cell, and a list of data values (for example solution
+ * data, error information, etc) at each of these vertices.
+ *
+ * In some cases, this interface to a cell is too restricted, however. For
+ * example, you may have higher order elements and printing the values at the
+ * vertices only is not enough. For this reason, we not only provide writing
+ * the data on the vertices only, but the data is organizes as a tensor
+ * product grid on each cell. The parameter <tt>n_subdivisions</tt>, which is
+ * given for each patch separately, denotes how often the cell is to be
+ * divided for output; for example, <tt>n_subdivisions==1</tt> yields no
+ * subdivision of the cell, <tt>n_subdivisions==2</tt> will produce a grid of
+ * 3 times 3 points in two spatial dimensions and 3 times 3 times 3 points in
+ * three dimensions, <tt>n_subdivisions==3</tt> will yield 4 times 4 (times 4)
+ * points, etc. The actual location of these points on the patch will be
+ * computed by a multilinear transformation from the vertices given for this
+ * patch.  For cells at the boundary, a mapping might be used to calculate the
+ * position of the inner points. In that case the coordinates are stored
+ * inside the Patch, as they cannot be easily recovered otherwise.
+ *
+ * Given these comments, the actual data to be printed on this patch of points
+ * consists of several data sets each of which has a value at each of the
+ * patch points. For example with <tt>n_subdivisions==2</tt> in two space
+ * dimensions, each data set has to provide nine values, and since the patch
+ * is to be printed as a tensor product (or its transformation to the real
+ * space cell), its values are to be ordered like <i>(x0,y0) (x0,y1) (x0,y2)
+ * (x1,y0) (x1,y1) (x1,y2) (x2,y0) (x2,y1) (x2,y2)</i>, i.e. the z-coordinate
+ * runs fastest, then the y-coordinate, then x (if there are that many space
+ * directions).
+ *
+ *
+ * <h4>Generalized patches</h4>
+ *
+ * In general, the patches as explained above might be too restricted. For
+ * example, one might want to draw only the outer faces of a domain in a
+ * three-dimensional computation, if one is not interested in what happens
+ * inside. Then, the objects that should be drawn are two-dimensional in a
+ * three-dimensional world. The Patch class and associated output functions
+ * handle these cases. The Patch class therefore takes two template
+ * parameters, the first, named <tt>dim</tt> denoting the dimension of the
+ * object (in the above example, this would be two), while the second, named
+ * <tt>spacedim</tt>, denotes the dimension of the embedding space (this would
+ * be three). The corner points of a patch have the dimension of the space,
+ * while their number is determined by the dimension of the patch. By default,
+ * the second template parameter has the same value as the first, which would
+ * correspond to outputting a cell, rather than a face or something else.
+ *
+ * <h3>DataOutBaseInterface</h3>
+ *
+ * The members of this namespace are not usually called from user code
+ * directly. Rather, classes that use the functions declared here are
+ * typically derived from DataOutInterface.
+ *
+ * The interface of this class basically consists of the declaration of a data
+ * type describing a patch and a bunch of functions taking a list of patches
+ * and writing them in one format or other to the stream. It is in the
+ * responsibility of the derived classes to provide this list of patches. In
+ * addition to the list of patches, a name for each data set may be given.
+ *
+ *
+ * <h3>Querying interface</h3>
+ *
+ * This class also provides a few functions (parse_output_format(),
+ * get_output_format_names(), default_suffix()) that can be used to query
+ * which output formats this class supports. The provide a list of names for
+ * all the formats we can output, parse a string and return an enum indicating
+ * each format, and provide a way to convert a value of this enum into the
+ * usual suffix used for files of that name. Using these functions, one can
+ * entirely free applications from knowledge which formats the library
+ * presently allows to output; several of the example programs show how to do
+ * this.
+ *
+ * <h3>Output parameters</h3>
+ *
+ * All functions take a parameter which is a structure of type
+ * <tt>XFlags</tt>, where <tt>X</tt> is the name of the output format. To find
+ * out what flags are presently supported, read the documentation of the
+ * different structures.
+ *
+ * Note that usually the output formats used for scientific visualization
+ * programs have no or very few parameters (apart from some compatibility
+ * flags) because there the actual appearance of output is determined using
+ * the visualization program and the files produced by this class store more
+ * or less only raw data.
+ *
+ * The direct output formats, like Postscript or Povray need to be given a lot
+ * more parameters, though, since there the output file has to contain all
+ * details of the viewpoint, light source, etc.
+ *
+ * <h3>Writing backends</h3>
+ *
+ * An abstraction layer has been introduced to facilitate coding backends for
+ * additional visualization tools. It is applicable for data formats
+ * separating the information into a field of vertices, a field of connection
+ * information for the grid cells and data fields.
+ *
+ * For each of these fields, output functions are implemented, namely
+ * write_nodes(), write_cells() and write_data(). In order to use these
+ * functions, a format specific output stream must be written, following the
+ * examples of DXStream, GmvStream, VtkStream and so on, implemented in the
+ * .cc file.
+ *
+ * In this framework, the implementation of a new output format is reduced to
+ * writing the section headers and the new output stream class for writing a
+ * single mesh object.
+ *
+ * <h3>Credits</h3>
+ * <ul>
+ *
+ * <li>EPS output based on an earlier implementation by Stefan Nauber for the
+ * old DataOut class
+ *
+ * <li>Povray output by Thomas Richter
+ *
+ * <li>Tecplot output by Benjamin Shelton Kirk
+ *
+ * </ul>
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, Guido Kanschat 1999, 2000, 2001, 2002, 2005,
+ * 2006.
+ */
+namespace DataOutBase
+{
+  /**
+   * Data structure describing a patch of data in <tt>dim</tt> space
+   * dimensions.
+   *
+   * A patch consists of the following data:
+   * <ul>
+   * <li>the corner #vertices,
+   * <li> the number #n_subdivisions of the number of cells the Patch has in
+   * each space direction,
+   * <li> the #data attached to each vertex, in the usual lexicographic
+   * ordering,
+   * <li> Information on #neighbors.
+   * </ul>
+   *
+   * See the general documentation of the DataOutBase class for more
+   * information on its contents and purposes.  In the case of two dimensions,
+   * the next picture is an example of <tt>n_subdivisions</tt> = 4 because the
+   * number of (sub)cells within each patch is equal to
+   * <tt>2<sup>dim</sup></tt>.
+   *
+   * @ingroup output
+   *
+   * @author Wolfgang Bangerth, Guido Kanschat
+   */
+  template <int dim, int spacedim=dim>
+  struct Patch
+  {
+    /**
+     * Make the <tt>spacedim</tt> template parameter available.
+     */
+    static const unsigned int space_dim=spacedim;
+
+    /**
+     * Corner points of a patch.  Inner points are computed by a multilinear
+     * transform of the unit cell to the cell specified by these corner
+     * points. The order of points is the same as for cells in the
+     * triangulation.
+     */
+    Point<spacedim> vertices[GeometryInfo<dim>::vertices_per_cell];
+
+    /**
+     * Numbers of neighbors of a patch.  OpenDX format requires neighbor
+     * information for advanced output. Here the neighborship relationship of
+     * patches is stored. During output, this must be transformed into
+     * neighborship of sub-grid cells.
+     */
+    unsigned int neighbors[dim > 0
+                           ?
+                           GeometryInfo<dim>::faces_per_cell
+                           :
+                           1];
+
+    /**
+     * Number of this patch. Since we are not sure patches are handled in the
+     * same order, always, we better store this.
+     */
+    unsigned int patch_index;
+
+    /**
+     * Number of subdivisions with which this patch is to be written.
+     * <tt>1</tt> means no subdivision, <tt>2</tt> means bisection, <tt>3</tt>
+     * trisection, etc.
+     */
+    unsigned int n_subdivisions;
+
+    /**
+     * Data vectors. The format is as follows: <tt>data(i,.)</tt> denotes the
+     * data belonging to the <tt>i</tt>th data vector. <tt>data.n()</tt>
+     * therefore equals the number of output points; this number is
+     * <tt>(subdivisions+1)^{dim</tt>}. <tt>data.m()</tt> equals the number of
+     * data vectors.
+     *
+     * Within each column, <tt>data(.,j)</tt> are the data values at the
+     * output point <tt>j</tt>, where <tt>j</tt> denotes the usual
+     * lexicographic ordering in deal.II. This is also the order of points as
+     * provided by the <tt>QIterated</tt> class when used with the
+     * <tt>QTrapez</tt> class as subquadrature.
+     *
+     * Since the number of data vectors is usually the same for all patches to
+     * be printed, <tt>data.size()</tt> should yield the same value for all
+     * patches provided. The exception are patches for which
+     * points_are_available are set, where the actual coordinates of the point
+     * are appended to the 'data' field, see the documentation of the
+     * points_are_available flag.
+     */
+    Table<2,float> data;
+
+    /**
+     * A flag indicating whether the coordinates of the interior patch points
+     * (assuming that the patch is supposed to be subdivided further) are
+     * appended to the @p data table (@p true) or not (@p false). The latter
+     * is the default and in this case the locations of the points interior to
+     * this patch are computed by (bi-, tri-)linear interpolation from the
+     * vertices of the patch.
+     *
+     * This option exists since patch points may be evaluated using a Mapping
+     * (rather than by a linear interpolation) and therefore have to be stored
+     * in the Patch structure.
+     */
+    bool points_are_available;
+
+    /**
+     * Default constructor. Sets #n_subdivisions to one, #points_are_available
+     * to false, and #patch_index to #no_neighbor.
+     */
+    Patch ();
+
+    /**
+     * Compare the present patch for equality with another one. This is used
+     * in a few of the automated tests in our testsuite.
+     */
+    bool operator == (const Patch &patch) const;
+
+    /**
+     * Return an estimate for the memory consumption, in bytes, of this
+     * object. This is not exact (but will usually be close) because
+     * calculating the memory usage of trees (e.g., <tt>std::map</tt>) is
+     * difficult.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * Swap the current object's contents with those of the given argument.
+     */
+    void swap (Patch<dim,spacedim> &other_patch);
+
+    /**
+     * Value to be used if this patch has no neighbor on one side.
+     */
+    static const unsigned int no_neighbor = numbers::invalid_unsigned_int;
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception
+     */
+    DeclException2 (ExcInvalidCombinationOfDimensions,
+                    int, int,
+                    << "It is not possible to have a structural dimension of " << arg1
+                    << " to be larger than the space dimension of the surrounding"
+                    << " space " << arg2);
+    //@}
+  };
+
+
+  /**
+   * Base class describing common functionality between different output
+   * flags.
+   *
+   * This is implemented with the "Curiously Recurring Template Pattern";
+   * derived classes use their own type to fill in the typename so that
+   * <tt>memory_consumption</tt> works correctly. See the Wikipedia page on
+   * the pattern for more information.
+   *
+   * @ingroup output
+   */
+  template<typename FlagsType>
+  struct OutputFlagsBase
+  {
+    /**
+     * Declare all flags with name and type as offered by this class, for use
+     * in input files.
+     *
+     * This method does nothing, but child classes may override this method to
+     * add fields to <tt>prm</tt>.
+     */
+    static void declare_parameters (ParameterHandler &prm);
+
+    /**
+     * Read the parameters declared in declare_parameters() and set the flags
+     * for this output format accordingly.
+     *
+     * This method does nothing, but child classes may override this method to
+     * add fields to <tt>prm</tt>.
+     */
+    void parse_parameters (const ParameterHandler &prm);
+
+    /**
+     * Return an estimate for the memory consumption, in bytes, of this
+     * object. This is not exact (but will usually be close) because
+     * calculating the memory usage of trees (e.g., <tt>std::map</tt>) is
+     * difficult.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+
+  template<typename FlagsType>
+  void OutputFlagsBase<FlagsType>::declare_parameters (ParameterHandler &)
+  {}
+
+
+  template<typename FlagsType>
+  void OutputFlagsBase<FlagsType>::parse_parameters (const ParameterHandler &)
+  {}
+
+
+  template<typename FlagsType>
+  std::size_t OutputFlagsBase<FlagsType>::memory_consumption () const
+  {
+    return sizeof(FlagsType);
+  }
+
+
+  /**
+   * Flags controlling the details of output in OpenDX format.
+   *
+   * @ingroup output
+   */
+  struct DXFlags : public OutputFlagsBase<DXFlags>
+  {
+    /**
+     * Write neighbor information. This information is necessary for instance,
+     * if OpenDX is supposed to compute integral curves (streamlines). If it
+     * is not present, streamlines end at cell boundaries.
+     */
+    bool write_neighbors;
+    /**
+     * Write integer values of the Triangulation in binary format.
+     */
+    bool int_binary;
+    /**
+     * Write coordinate vectors in binary format.
+     */
+    bool coordinates_binary;
+
+    /**
+     * Write data vectors in binary format.
+     */
+    bool data_binary;
+
+    /**
+     * Write binary coordinate vectors as double (64 bit) numbers instead of
+     * float (32 bit).
+     */
+    bool data_double;
+
+    /**
+     * Constructor.
+     */
+    DXFlags (const bool write_neighbors = false,
+             const bool int_binary = false,
+             const bool coordinates_binary = false,
+             const bool data_binary = false);
+
+    /**
+     * Declare all flags with name and type as offered by this class, for use
+     * in input files.
+     */
+    static void declare_parameters (ParameterHandler &prm);
+
+    /**
+     * Read the parameters declared in declare_parameters() and set the flags
+     * for this output format accordingly.
+     *
+     * The flags thus obtained overwrite all previous contents of this object.
+     */
+    void parse_parameters (const ParameterHandler &prm);
+  };
+
+  /**
+   * Flags controlling the details of output in UCD format for AVS.
+   *
+   * @ingroup output
+   */
+  struct UcdFlags : public OutputFlagsBase<UcdFlags>
+  {
+    /**
+     * Write a comment at the beginning of the file stating the date of
+     * creation and some other data.  While this is supported by the UCD
+     * format and AVS, some other programs get confused by this, so the
+     * default is to not write a preamble. However, a preamble can be written
+     * using this flag.
+     *
+     * Default: <code>false</code>.
+     */
+    bool write_preamble;
+
+    /**
+     * Constructor.
+     */
+    UcdFlags (const bool write_preamble = false);
+
+    /**
+     * Declare all flags with name and type as offered by this class, for use
+     * in input files.
+     */
+    static void declare_parameters (ParameterHandler &prm);
+
+    /**
+     * Read the parameters declared in declare_parameters() and set the flags
+     * for this output format accordingly.
+     *
+     * The flags thus obtained overwrite all previous contents of this object.
+     */
+    void parse_parameters (const ParameterHandler &prm);
+  };
+
+  /**
+   * Flags controlling the details of output in Gnuplot format. At present no
+   * flags are implemented.
+   *
+   * @ingroup output
+   */
+  struct GnuplotFlags : public OutputFlagsBase<GnuplotFlags>
+  {};
+
+  /**
+   * Flags controlling the details of output in Povray format. Several flags
+   * are implemented, see their respective documentation.
+   *
+   * @ingroup output
+   */
+  struct PovrayFlags : public OutputFlagsBase<PovrayFlags>
+  {
+    /**
+     * Normal vector interpolation, if set to true
+     *
+     * default = false
+     */
+    bool smooth;
+
+    /**
+     * Use bicubic patches (b-splines) instead of triangles.
+     *
+     * default = false
+     */
+    bool bicubic_patch;
+
+    /**
+     * include external "data.inc" with camera, light and texture definition
+     * for the scene.
+     *
+     * default = false
+     */
+    bool external_data;
+
+    /**
+     * Constructor.
+     */
+    PovrayFlags (const bool smooth = false,
+                 const bool bicubic_patch = false,
+                 const bool external_data = false);
+
+    /**
+     * Declare all flags with name and type as offered by this class, for use
+     * in input files.
+     */
+    static void declare_parameters (ParameterHandler &prm);
+
+    /**
+     * Read the parameters declared in declare_parameters() and set the flags
+     * for this output format accordingly.
+     *
+     * The flags thus obtained overwrite all previous contents of this object.
+     */
+    void parse_parameters (const ParameterHandler &prm);
+  };
+
+
+  /**
+   * Flags controlling the details of output in encapsulated postscript
+   * format.
+   *
+   * @ingroup output
+   */
+  struct EpsFlags : public OutputFlagsBase<EpsFlags>
+  {
+    /**
+     * This denotes the number of the data vector which shall be used for
+     * generating the height information. By default, the first data vector is
+     * taken, i.e. <tt>height_vector==0</tt>, if there is any data vector. If
+     * there is no data vector, no height information is generated.
+     */
+    unsigned int height_vector;
+
+    /**
+     * Number of the vector which is to be taken to colorize cells. The same
+     * applies as for #height_vector.
+     */
+    unsigned int color_vector;
+
+    /**
+     * Enum denoting the possibilities whether the scaling should be done such
+     * that the given <tt>size</tt> equals the width or the height of the
+     * resulting picture.
+     */
+    enum SizeType
+    {
+      /// Scale to given width
+      width,
+      /// Scale to given height
+      height
+    };
+
+    /**
+     * See above. Default is <tt>width</tt>.
+     */
+    SizeType size_type;
+
+    /**
+     * Width or height of the output as given in postscript units This usually
+     * is given by the strange unit 1/72 inch. Whether this is height or width
+     * is specified by the flag <tt>size_type</tt>.
+     *
+     * Default is 300, which represents a size of roughly 10 cm.
+     */
+    unsigned int size;
+
+    /**
+     * Width of a line in postscript units. Default is 0.5.
+     */
+    double line_width;
+
+    /**
+     * Angle of the line origin-viewer against the z-axis in degrees.
+     *
+     * Default is the Gnuplot-default of 60.
+     */
+    double azimut_angle;
+
+    /**
+     * Angle by which the viewers position projected onto the x-y-plane is
+     * rotated around the z-axis, in positive sense when viewed from above.
+     * The unit are degrees, and zero equals a position above or below the
+     * negative y-axis.
+     *
+     * Default is the Gnuplot-default of 30.  An example of a Gnuplot-default
+     * of 0 is the following:
+     *
+     * @verbatim
+     *
+     *          3________7
+     *          /       /|
+     *         /       / |
+     *       2/______6/  |
+     *       |   |   |   |
+     * O-->  |   0___|___4
+     *       |  /    |  /
+     *       | /     | /
+     *      1|/______5/
+     *
+     * @endverbatim
+     */
+    double turn_angle;
+
+    /**
+     * Factor by which the z-axis is to be stretched as compared to the x- and
+     * y-axes. This is to compensate for the different sizes that coordinate
+     * and solution values may have and to prevent that the plot looks to much
+     * out-of-place (no elevation at all if solution values are much smaller
+     * than coordinate values, or the common "extremely mountainous area" in
+     * the opposite case.
+     *
+     * Default is <tt>1.0</tt>.
+     */
+    double z_scaling;
+
+    /**
+     * Flag the determines whether the lines bounding the cells (or the parts
+     * of each patch) are to be plotted.
+     *
+     * Default: <tt>true</tt>.
+     */
+    bool   draw_mesh;
+
+    /**
+     * Flag whether to fill the regions between the lines bounding the cells
+     * or not. If not, no hidden line removal is performed, which in this
+     * crude implementation is done through writing the cells in a back-to-
+     * front order, thereby hiding the cells in the background by cells in the
+     * foreground.
+     *
+     * If this flag is <tt>false</tt> and #draw_mesh is <tt>false</tt> as
+     * well, nothing will be printed.
+     *
+     * If this flag is <tt>true</tt>, then the cells will be drawn either
+     * colored by one of the data sets (if #shade_cells is <tt>true</tt>), or
+     * pure white (if #shade_cells is false or if there are no data sets).
+     *
+     * Default is <tt>true</tt>.
+     */
+    bool   draw_cells;
+
+    /**
+     * Flag to determine whether the cells shall be colorized by the data set
+     * denoted by #color_vector, or simply be painted in white. This flag only
+     * makes sense if <tt>#draw_cells==true</tt>. Colorization is done through
+     * #color_function.
+     *
+     * Default is <tt>true</tt>.
+     */
+    bool   shade_cells;
+
+    /**
+     * Structure keeping the three color values in the RGB system.
+     */
+    struct RgbValues
+    {
+      float red;
+      float green;
+      float blue;
+
+      /**
+       * Return <tt>true</tt> if the color represented by the three color
+       * values is a grey scale, i.e. all components are equal.
+       */
+      bool is_grey () const;
+    };
+
+    /**
+     * Definition of a function pointer type taking a value and returning a
+     * triple of color values in RGB values.
+     *
+     * Besides the actual value by which the color is to be computed, min and
+     * max values of the data to be colorized are given as well.
+     */
+    typedef RgbValues (*ColorFunction) (const double value,
+                                        const double min_value,
+                                        const double max_value);
+
+    /**
+     * This is a pointer to the function which is used to colorize the cells.
+     * By default, it points to the static function default_color_function()
+     * which is a member of this class.
+     */
+    ColorFunction color_function;
+
+
+    /**
+     * Default colorization function. This one does what one usually wants: It
+     * shifts colors from black (lowest value) through blue, green and red to
+     * white (highest value). For the exact definition of the color scale
+     * refer to the implementation.
+     *
+     * This function was originally written by Stefan Nauber.
+     */
+    static RgbValues
+    default_color_function (const double value,
+                            const double min_value,
+                            const double max_value);
+
+    /**
+     * This is an alternative color function producing a grey scale between
+     * black (lowest values) and white (highest values). You may use it by
+     * setting the #color_function variable to the address of this function.
+     */
+    static RgbValues
+    grey_scale_color_function (const double value,
+                               const double min_value,
+                               const double max_value);
+
+    /**
+     * This is one more alternative color function producing a grey scale
+     * between white (lowest values) and black (highest values), i.e. the
+     * scale is reversed to the previous one. You may use it by setting the
+     * #color_function variable to the address of this function.
+     */
+    static RgbValues
+    reverse_grey_scale_color_function (const double value,
+                                       const double min_value,
+                                       const double max_value);
+
+    /**
+     * Constructor.
+     */
+    EpsFlags (const unsigned int  height_vector = 0,
+              const unsigned int  color_vector  = 0,
+              const SizeType      size_type     = width,
+              const unsigned int  size          = 300,
+              const double        line_width    = 0.5,
+              const double        azimut_angle  = 60,
+              const double        turn_angle    = 30,
+              const double        z_scaling     = 1.0,
+              const bool          draw_mesh     = true,
+              const bool          draw_cells    = true,
+              const bool          shade_cells   = true,
+              const ColorFunction color_function= &default_color_function);
+
+    /**
+     * Declare all flags with name and type as offered by this class, for use
+     * in input files.
+     *
+     * For coloring, only the color functions declared in this class are
+     * offered.
+     */
+    static void declare_parameters (ParameterHandler &prm);
+
+    /**
+     * Read the parameters declared in declare_parameters() and set the flags
+     * for this output format accordingly.
+     *
+     * The flags thus obtained overwrite all previous contents of this object.
+     */
+    void parse_parameters (const ParameterHandler &prm);
+  };
+
+  /**
+   * Flags controlling the details of output in GMV format. At present no
+   * flags are implemented.
+   *
+   * @ingroup output
+   */
+  struct GmvFlags : public OutputFlagsBase<GmvFlags>
+  {};
+
+  /**
+   * Flags controlling the details of output in Tecplot format.
+   *
+   * @ingroup output
+   */
+  struct TecplotFlags : public OutputFlagsBase<TecplotFlags>
+  {
+    /**
+     * This variable is needed to hold the output file name when using the
+     * Tecplot API to write binary files.  If the user doesn't set the file
+     * name with this variable only ASCII Tecplot output will be produced.
+     */
+    const char *tecplot_binary_file_name;
+
+    /**
+     * Tecplot allows to assign names to zones. This variable stores this
+     * name.
+     */
+    const char *zone_name;
+
+    /**
+     * Solution time for each zone in a strand. This value must be non-
+     * negative, otherwise it will not be written to file. Do not assign any
+     * value for this in case of a static zone.
+     */
+    double solution_time;
+
+    /**
+     * Constructor.
+     */
+    TecplotFlags (const char *tecplot_binary_file_name = NULL,
+                  const char *zone_name = NULL,
+                  const double solution_time = -1.0);
+
+    /**
+     * Return an estimate for the memory consumption, in bytes, of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+  /**
+   * Flags controlling the details of output in VTK format.
+   *
+   * @ingroup output
+   */
+  struct VtkFlags : public OutputFlagsBase<VtkFlags>
+  {
+    /**
+     * The time of the time step if this file is part of a time dependent
+     * simulation.
+     *
+     * The value of this variable is written into the output file according to
+     * the instructions provided in
+     * http://www.visitusers.org/index.php?title=Time_and_Cycle_in_VTK_files
+     * unless it is at its default value of
+     * @verbatim std::numeric_limits<unsigned int>::min() @endverbatim.
+     */
+    double time;
+
+    /**
+     * The number of the time step if this file is part of a time dependent
+     * simulation, or the cycle within a nonlinear or other iteration.
+     *
+     * The value of this variable is written into the output file according to
+     * the instructions provided in
+     * http://www.visitusers.org/index.php?title=Time_and_Cycle_in_VTK_files
+     * unless it is at its default value of
+     * @verbatim std::numeric_limits<unsigned int>::min() @endverbatim.
+     */
+    unsigned int cycle;
+
+    /**
+     * Flag to determine whether the current date and time shall be printed as
+     * a comment in the file's second line.
+     *
+     * Default is <tt>true</tt>.
+     */
+    bool print_date_and_time;
+
+    /**
+     * A data type providing the different possible zlib compression levels.
+     */
+    enum ZlibCompressionLevel
+    {
+      no_compression,
+      best_speed,
+      best_compression,
+      default_compression
+    };
+
+    /**
+     * Flag determining the compression level at which zlib, if available, is
+     * run. The default is <tt>best_compression</tt>.
+     */
+    ZlibCompressionLevel compression_level;
+
+    /**
+     * Constructor.
+     */
+    VtkFlags (const double       time   = std::numeric_limits<double>::min(),
+              const unsigned int cycle  = std::numeric_limits<unsigned int>::min(),
+              const bool print_date_and_time = true,
+              const ZlibCompressionLevel compression_level = best_compression);
+  };
+
+
+  /**
+   * Flags for SVG output.
+   *
+   * @ingroup output
+   */
+  struct SvgFlags : public OutputFlagsBase<SvgFlags>
+  {
+    /**
+     * Height of the image in SVG units. Default value is 4000.
+     */
+    unsigned int height;
+
+    /**
+     * Width of the image in SVG units. If left zero, the width is computed
+     * from the height.
+     */
+    unsigned int width;
+
+    /**
+     * This denotes the number of the data vector which shall be used for
+     * generating the height information. By default, the first data vector is
+     * taken, i.e. <tt>#height_vector==0</tt>, if there is any data vector. If
+     * there is no data vector, no height information is generated.
+     */
+    unsigned int height_vector;
+
+    /**
+     * Angles for the perspective view
+     */
+    int azimuth_angle, polar_angle;
+
+    unsigned int line_thickness;
+
+    /**
+     * Draw a margin of 5% around the plotted area
+     */
+    bool margin;
+
+    /**
+     * Draw a colorbar encoding the cell coloring
+     */
+    bool draw_colorbar;
+
+    /**
+     * Constructor.
+     */
+    SvgFlags(const unsigned int height_vector = 0,
+             const int azimuth_angle = 37,
+             const int polar_angle = 45,
+             const unsigned int line_thickness = 1,
+             const bool margin = true,
+             const bool draw_colorbar = true);
+  };
+
+
+  /**
+   * Flags controlling the details of output in deal.II intermediate format.
+   * At present no flags are implemented.
+   *
+   * @ingroup output
+   */
+  struct Deal_II_IntermediateFlags : public OutputFlagsBase<Deal_II_IntermediateFlags>
+  {
+    /**
+     * An indicator of the current file format version used to write
+     * intermediate format. We do not attempt to be backward compatible, so
+     * this number is used only to verify that the format we are writing is
+     * what the current readers and writers understand.
+     */
+    static const unsigned int format_version = 3;
+  };
+
+  /**
+   * Flags controlling the DataOutFilter.
+   *
+   * @ingroup output
+   */
+
+  struct DataOutFilterFlags
+  {
+    /**
+     * Filter duplicate vertices and associated values. This will drastically
+     * reduce the output data size but may affect the correctness of some
+     * calculated values.
+     */
+    bool filter_duplicate_vertices;
+
+    /**
+     * Whether the XDMF output refers to HDF5 files. This affects how output
+     * is structured.
+     */
+    bool xdmf_hdf5_output;
+
+    /**
+     * Constructor.
+     */
+    DataOutFilterFlags (const bool filter_duplicate_vertices = false,
+                        const bool xdmf_hdf5_output = false);
+
+    /**
+     * Declare all flags with name and type as offered by this class, for use
+     * in input files.
+     */
+    static void declare_parameters (ParameterHandler &prm);
+
+    /**
+     * Read the parameters declared in <tt>declare_parameters</tt> and set the
+     * flags for this output format accordingly.
+     *
+     * The flags thus obtained overwrite all previous contents of this object.
+     */
+    void parse_parameters (const ParameterHandler &prm);
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+  /**
+   * DataOutFilter provides a way to remove redundant vertices and values
+   * generated by the deal.II output. By default, DataOutBase and the classes
+   * that build on it output data at each corner of each cell. This means that
+   * data is output multiple times for each vertex of the mesh. The purpose of
+   * this scheme is to support output of discontinuous quantities, either
+   * because the finite element space is discontinuous or because the quantity
+   * that is output is computed from a solution field and is discontinuous
+   * across faces.
+   *
+   * This class is an attempt to rein in the amount of data that is written.
+   * If the fields that are written to files are indeed discontinuous, the
+   * only way to faithfully represent them is indeed to write multiple values
+   * for each vertex (this is typically done by writing multiple node
+   * locations for the same vertex and defining data at these nodes). However,
+   * for fine meshes, one may not necessarily be interested in an exact
+   * representation of output fields that will likely only have small
+   * discontinuities. Rather, it may be sufficient to just output one value
+   * per vertex, which may be chosen arbitrarily from among those that are
+   * defined at this vertex from any of the adjacent cells.
+   */
+  class DataOutFilter
+  {
+  private:
+    /**
+     * Empty class to provide comparison function for Map3DPoint.
+     */
+    struct Point3Comp
+    {
+      bool operator() (const Point<3> &lhs, const Point<3> &rhs) const
+      {
+        return (lhs(0) < rhs(0) || (!(rhs(0) < lhs(0)) && (lhs(1) < rhs(1) || (!(rhs(1) < lhs(1)) && lhs(2) < rhs(2)))));
+      }
+    };
+
+    typedef std::multimap<Point<3>, unsigned int, Point3Comp> Map3DPoint;
+
+    /// Flags used to specify filtering behavior
+    DataOutBase::DataOutFilterFlags   flags;
+
+    /// Dimensionality of the nodes, used to properly output filtered data
+    int         node_dim;
+
+    /// Number of vertices per cell
+    int         n_cell_verts;
+
+    /// Map of points to an internal index
+    Map3DPoint        existing_points;
+
+    /// Map of actual point index to internal point index
+    std::map<unsigned int, unsigned int>  filtered_points;
+
+    /// Map of cells to the filtered points
+    std::map<unsigned int, unsigned int>  filtered_cells;
+
+    /// Data set names
+    std::vector<std::string>    data_set_names;
+
+    /// Data set dimensions
+    std::vector<unsigned int>   data_set_dims;
+
+    /// Data set data
+    std::vector<std::vector<double> > data_sets;
+
+    /**
+     * Record a cell vertex index based on the internal reordering.
+     */
+    void internal_add_cell(const unsigned int &cell_index, const unsigned int &pt_index);
+
+  public:
+    DataOutFilter() : flags(false, true) {};
+    DataOutFilter(const DataOutBase::DataOutFilterFlags &flags) : flags(flags) {};
+
+    /**
+     * Write a point with the specified index into the filtered data set. If
+     * the point already exists and we are filtering redundant values, the
+     * provided index will internally refer to another recorded point.
+     */
+    template<int dim>
+    void write_point(const unsigned int &index, const Point<dim> &p);
+
+    /**
+     * Record a deal.II cell in the internal reordered format.
+     */
+    template<int dim>
+    void write_cell(unsigned int index, unsigned int start, unsigned int d1, unsigned int d2, unsigned int d3);
+
+    /**
+     * Filter and record a data set. If there are multiple values at a given
+     * vertex and redundant values are being removed, one is arbitrarily
+     * chosen as the recorded value. In the future this can be expanded to
+     * average/min/max multiple values at a given vertex.
+     */
+    void write_data_set(const std::string &name, const unsigned int &dimension, const unsigned int &set_num, const Table<2,double> &data_vectors);
+
+    /**
+     * Resize and fill a vector with all the filtered node vertex points, for
+     * output to a file.
+     */
+    void fill_node_data(std::vector<double> &node_data) const;
+
+    /**
+     * Resize and fill a vector with all the filtered cell vertex indices, for
+     * output to a file.
+     */
+    void fill_cell_data(const unsigned int &local_node_offset, std::vector<unsigned int> &cell_data) const;
+
+    /**
+     * Get the name of the data set indicated by the set number.
+     */
+    std::string get_data_set_name(const unsigned int &set_num) const
+    {
+      return data_set_names.at(set_num);
+    };
+
+    /**
+     * Get the dimensionality of the data set indicated by the set number.
+     */
+    unsigned int get_data_set_dim(const unsigned int &set_num) const
+    {
+      return data_set_dims.at(set_num);
+    };
+
+    /**
+     * Get the raw double valued data of the data set indicated by the set
+     * number.
+     */
+    const double *get_data_set(const unsigned int &set_num) const
+    {
+      return &data_sets[set_num][0];
+    };
+
+    /**
+     * Return the number of nodes in this DataOutFilter. This may be smaller
+     * than the original number of nodes if filtering is enabled.
+     */
+    unsigned int n_nodes() const
+    {
+      return existing_points.size();
+    };
+
+    /**
+     * Return the number of filtered cells in this DataOutFilter. Cells are
+     * not filtered so this will be the original number of cells.
+     */
+    unsigned int n_cells() const
+    {
+      return filtered_cells.size()/n_cell_verts;
+    };
+
+    /**
+     * Return the number of filtered data sets in this DataOutFilter. Data
+     * sets are not filtered so this will be the original number of data sets.
+     */
+    unsigned int n_data_sets() const
+    {
+      return data_set_names.size();
+    };
+
+    /**
+     * Empty functions to do base class inheritance.
+     */
+    void flush_points () {};
+
+    /**
+     * Empty functions to do base class inheritance.
+     */
+    void flush_cells () {};
+
+  };
+
+
+  /**
+   * Provide a data type specifying the presently supported output formats.
+   */
+  enum OutputFormat
+  {
+    /**
+     * Use the format already stored in the object.
+     */
+    default_format,
+    /**
+     * Do not write any output.
+     */
+    none,
+    /**
+     * Output for OpenDX.
+     */
+    dx,
+    /**
+     * Output in the UCD format for AVS.
+     */
+    ucd,
+    /**
+     * Output for the Gnuplot tool.
+     */
+    gnuplot,
+    /**
+     * Output for the Povray raytracer.
+     */
+    povray,
+    /**
+     * Output in encapsulated PostScript.
+     */
+    eps,
+    /**
+     * Output for GMV.
+     */
+    gmv,
+    /**
+     * Output for Tecplot in text format.
+     */
+
+    tecplot,
+    /**
+     * Output for Tecplot in binary format. Faster and smaller than text
+     * format.
+     */
+    tecplot_binary,
+
+    /**
+     * Output in VTK format.
+     */
+    vtk,
+
+    /**
+     * Output in VTK format.
+     */
+    vtu,
+
+    /**
+     * Output in SVG format.
+     */
+    svg,
+
+    /**
+     * Output in deal.II intermediate format.
+     */
+    deal_II_intermediate,
+
+    /**
+     * Output in HDF5 format.
+     */
+    hdf5
+  };
+
+
+  /**
+   * Write the given list of patches to the output stream in OpenDX format.
+   */
+  template <int dim, int spacedim>
+  void write_dx (const std::vector<Patch<dim,spacedim> > &patches,
+                 const std::vector<std::string>          &data_names,
+                 const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                 const DXFlags                           &flags,
+                 std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in eps format.
+   *
+   * Output in this format circumvents the use of auxiliary graphic programs
+   * converting some output format into a graphics format. This has the
+   * advantage that output is easy and fast, and the disadvantage that you
+   * have to give a whole bunch of parameters which determine the direction of
+   * sight, the mode of colorization, the scaling of the height axis, etc. (Of
+   * course, all these parameters have reasonable default values, which you
+   * may want to change.)
+   *
+   * This function only supports output for two-dimensional domains (i.e.,
+   * with dim==2), with values in the vertical direction taken from a data
+   * vector.
+   *
+   * Basically, output consists of the mesh and the cells in between them. You
+   * can draw either of these, or both, or none if you are really interested
+   * in an empty picture. If written, the mesh uses black lines. The cells in
+   * between the mesh are either not printed (this will result in a loss of
+   * hidden line removal, i.e.  you can "see through" the cells to lines
+   * behind), printed in white (which does nothing apart from the hidden line
+   * removal), or colorized using one of the data vectors (which need not be
+   * the same as the one used for computing the height information) and a
+   * customizable color function. The default color functions chooses the
+   * color between black, blue, green, red and white, with growing values of
+   * the data field chosen for colorization. At present, cells are displayed
+   * with one color per cell only, which is taken from the value of the data
+   * field at the center of the cell; bilinear interpolation of the color on a
+   * cell is not used.
+   *
+   * By default, the viewpoint is chosen like the default viewpoint in
+   * GNUPLOT, i.e.  with an angle of 60 degrees with respect to the positive
+   * z-axis and rotated 30 degrees in positive sense (as seen from above) away
+   * from the negative y-axis.  Of course you can change these settings.
+   *
+   * EPS output is written without a border around the picture, i.e. the
+   * bounding box is close to the output on all four sides. Coordinates are
+   * written using at most five digits, to keep picture size at a reasonable
+   * size.
+   *
+   * All parameters along with their default values are listed in the
+   * documentation of the <tt>EpsFlags</tt> member class of this class. See
+   * there for more and detailed information.
+   */
+  template <int spacedim>
+  void write_eps (const std::vector<Patch<2,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                  const EpsFlags                          &flags,
+                  std::ostream                            &out);
+
+  /**
+   * This is the same function as above except for domains that are not two-
+   * dimensional. This function is not implemented (and will throw an error if
+   * called) but is declared to allow for dimension-independent programs.
+   */
+  template <int dim, int spacedim>
+  void write_eps (const std::vector<Patch<dim,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                  const EpsFlags                          &flags,
+                  std::ostream                            &out);
+
+
+  /**
+   * Write the given list of patches to the output stream in GMV format.
+   *
+   * Data is written in the following format: nodes are considered the points
+   * of the patches. In spatial dimensions less than three, zeroes are
+   * inserted for the missing coordinates. The data vectors are written as
+   * node or cell data, where for the first the data space is interpolated to
+   * (bi-,tri-)linear elements.
+   */
+  template <int dim, int spacedim>
+  void write_gmv (const std::vector<Patch<dim,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                  const GmvFlags                          &flags,
+                  std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in gnuplot format.
+   * Visualization of two-dimensional data can then be achieved by starting
+   * <tt>gnuplot</tt> and entering the commands
+   *
+   * @verbatim
+   * set data style lines
+   * splot "filename" using 1:2:n
+   * @endverbatim
+   * This example assumes that the number of the data vector displayed is
+   * <b>n-2</b>.
+   *
+   * The GNUPLOT format is not able to handle data on unstructured grids
+   * directly. Directly would mean that you only give the vertices and the
+   * solution values thereon and the program constructs its own grid to
+   * represent the data. This is only possible for a structured tensor product
+   * grid in two dimensions. However, it is possible to give several such
+   * patches within one file, which is exactly what the respective function of
+   * this class does: writing each cell's data as a patch of data, at least if
+   * the patches as passed from derived classes represent cells. Note that the
+   * functions on patches need not be continuous at interfaces between
+   * patches, so this method also works for discontinuous elements. Note also,
+   * that GNUPLOT can do hidden line removal for patched data.
+   *
+   * While this discussion applies to two spatial dimensions, it is more
+   * complicated in 3d. The reason is that we could still use patches, but it
+   * is difficult when trying to visualize them, since if we use a cut through
+   * the data (by, for example, using x- and z-coordinates, a fixed y-value
+   * and plot function values in z-direction, then the patched data is not a
+   * patch in the sense GNUPLOT wants it any more. Therefore, we use another
+   * approach, namely writing the data on the 3d grid as a sequence of lines,
+   * i.e. two points each associated with one or more data sets.  There are
+   * therefore 12 lines for each subcells of a patch.
+   *
+   * Given the lines as described above, a cut through this data in Gnuplot
+   * can then be achieved like this (& stands for the dollar sign in the
+   * following):
+   * @verbatim
+   *   set data style lines
+   *   splot [:][:][0:] "T" using 1:2:(&3==.5 ? &4 : -1)
+   * @endverbatim
+   *
+   * This command plots data in x- and y-direction unbounded, but in
+   * z-direction only those data points which are above the x-y-plane (we
+   * assume here a positive solution, if it has negative values, you might
+   * want to decrease the lower bound). Furthermore, it only takes the data
+   * points with z-values (<tt>&3</tt>) equal to 0.5, i.e. a cut through the
+   * domain at <tt>z=0.5</tt>. For the data points on this plane, the data
+   * values of the first data set (<tt>&4</tt>) are raised in z-direction
+   * above the x-y-plane; all other points are denoted the value <tt>-1</tt>
+   * instead of the value of the data vector and are not plotted due to the
+   * lower bound in z plotting direction, given in the third pair of brackets.
+   *
+   * More complex cuts are possible, including nonlinear ones. Note however,
+   * that only those points which are actually on the cut-surface are plotted.
+   */
+  template <int dim, int spacedim>
+  void write_gnuplot (const std::vector<Patch<dim,spacedim> > &patches,
+                      const std::vector<std::string>          &data_names,
+                      const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                      const GnuplotFlags                      &flags,
+                      std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream for the Povray
+   * raytracer.
+   *
+   * Output in this format creates a povray source file, include standard
+   * camera and light source definition for rendering with povray 3.1 At
+   * present, this format only supports output for two-dimensional data, with
+   * values in the third direction taken from a data vector.
+   *
+   * The output uses two different povray-objects:
+   *
+   * <ul>
+   * <li> <tt>BICUBIC_PATCH</tt> A <tt>bicubic_patch</tt> is a 3-dimensional
+   * Bezier patch. It consists of 16 Points describing the surface. The 4
+   * corner points are touched by the object, while the other 12 points pull
+   * and stretch the patch into shape. One <tt>bicubic_patch</tt> is generated
+   * on each patch. Therefor the number of subdivisions has to be 3 to provide
+   * the patch with 16 points. A bicubic patch is not exact but generates very
+   * smooth images.
+   *
+   * <li> <tt>MESH</tt> The mesh object is used to store large number of
+   * triangles. Every square of the patch data is split into one upper-left
+   * and one lower-right triangle. If the number of subdivisions is three, 32
+   * triangle are generated for every patch.
+   *
+   * Using the smooth flag povray interpolates the normals on the triangles,
+   * imitating a curved surface
+   * </ul>
+   *
+   * All objects get one texture definition called Tex. This texture has to be
+   * declared somewhere before the object data. This may be in an external
+   * data file or at the beginning of the output file. Setting the
+   * <tt>external_data</tt> flag to false, an standard camera, light and
+   * texture (scaled to fit the scene) is added to the output file. Set to
+   * true an include file "data.inc" is included. This file is not generated
+   * by deal and has to include camera, light and the texture definition Tex.
+   *
+   * You need povray (>=3.0) to render the scene. The minimum options for
+   * povray are:
+   * @verbatim
+   *   povray +I<inputfile> +W<horiz. size> +H<ver. size> +L<include path>
+   * @endverbatim
+   * If the external file "data.inc" is used, the path to this file has to be
+   * included in the povray options.
+   */
+  template <int dim, int spacedim>
+  void write_povray (const std::vector<Patch<dim,spacedim> > &patches,
+                     const std::vector<std::string>          &data_names,
+                     const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                     const PovrayFlags                       &flags,
+                     std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in Tecplot ASCII
+   * format (FEBLOCK).
+   *
+   * For more information consult the Tecplot Users and Reference manuals.
+   */
+  template <int dim, int spacedim>
+  void write_tecplot (const std::vector<Patch<dim,spacedim> > &patches,
+                      const std::vector<std::string>          &data_names,
+                      const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                      const TecplotFlags                      &flags,
+                      std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in Tecplot binary
+   * format.
+   *
+   * For this to work properly <tt>./configure</tt> checks for the Tecplot API
+   * at build time. To write Tecplot binary files directly make sure that the
+   * TECHOME environment variable points to the Tecplot installation
+   * directory, and that the files \$TECHOME/include/TECIO.h and
+   * \$TECHOME/lib/tecio.a are readable. If these files are not available (or
+   * in the case of 1D) this function will simply call write_tecplot() and
+   * thus larger ASCII data files will be produced rather than more efficient
+   * Tecplot binary files.
+   *
+   * @warning TecplotFlags::tecplot_binary_file_name indicates the name of the
+   * file to be written.  If the file name is not set ASCII output is
+   * produced.
+   *
+   * For more information consult the Tecplot Users and Reference manuals.
+   */
+  template <int dim, int spacedim>
+  void write_tecplot_binary (
+    const std::vector<Patch<dim,spacedim> > &patches,
+    const std::vector<std::string>          &data_names,
+    const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+    const TecplotFlags                      &flags,
+    std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in UCD format
+   * described in the AVS developer's guide (now AVS). Due to limitations in
+   * the present format, only node based data can be output, which in one
+   * reason why we invented the patch concept. In order to write higher order
+   * elements, you may split them up into several subdivisions of each cell.
+   * These subcells will then, however, also appear as different cells by
+   * programs which understand the UCD format.
+   *
+   * No use is made of the possibility to give model data since these are not
+   * supported by all UCD aware programs. You may give cell data in derived
+   * classes by setting all values of a given data set on a patch to the same
+   * value.
+   */
+  template <int dim, int spacedim>
+  void write_ucd (const std::vector<Patch<dim,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                  const UcdFlags                          &flags,
+                  std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in VTK format. The
+   * data is written in the traditional VTK format as opposed to the XML-based
+   * format that write_vtu() produces.
+   *
+   * The vector_data_ranges argument denotes ranges of components in the
+   * output that are considered a vector, rather than simply a collection of
+   * scalar fields. The VTK output format has special provisions that allow
+   * these components to be output by a single name rather than having to
+   * group several scalar fields into a vector later on in the visualization
+   * program.
+   *
+   * @note VTK is a legacy format and has largely been supplanted by the VTU
+   * format (an XML-structured version of VTK). In particular, VTU allows for
+   * the compression of data and consequently leads to much smaller file sizes
+   * that equivalent VTK files for large files. Since all visualization
+   * programs that support VTK also support VTU, you should consider using the
+   * latter file format instead, by using the write_vtu() function.
+   */
+  template <int dim, int spacedim>
+  void write_vtk (const std::vector<Patch<dim,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                  const VtkFlags                          &flags,
+                  std::ostream                            &out);
+
+
+  /**
+   * Write the given list of patches to the output stream in VTU format. The
+   * data is written in the XML-based VTK format as opposed to the traditional
+   * format that write_vtk() produces.
+   *
+   * The vector_data_ranges argument denotes ranges of components in the
+   * output that are considered a vector, rather than simply a collection of
+   * scalar fields. The VTK output format has special provisions that allow
+   * these components to be output by a single name rather than having to
+   * group several scalar fields into a vector later on in the visualization
+   * program.
+   *
+   * Some visualization programs, such as ParaView, can read several separate
+   * VTU files to parallelize visualization. In that case, you need a
+   * <code>.pvtu</code> file that describes which VTU files form a group. The
+   * DataOutInterface::write_pvtu_record() function can generate such a master
+   * record. Likewise, DataOutInterface::write_visit_record() does the same
+   * for VisIt (although VisIt can also read <code>pvtu</code> records since
+   * version 2.5.1). Finally, for time dependent problems, you may also want
+   * to look at DataOutInterface::write_pvd_record()
+   *
+   * The use of this function is explained in step-40.
+   */
+  template <int dim, int spacedim>
+  void write_vtu (const std::vector<Patch<dim,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                  const VtkFlags                          &flags,
+                  std::ostream                            &out);
+
+  /**
+   * This writes the header for the xml based vtu file format. This routine is
+   * used internally together with DataOutInterface::write_vtu_footer() and
+   * DataOutInterface::write_vtu_main() by DataOutBase::write_vtu().
+   */
+  void write_vtu_header (std::ostream &out,
+                         const VtkFlags &flags);
+
+  /**
+   * This writes the footer for the xml based vtu file format. This routine is
+   * used internally together with DataOutInterface::write_vtu_header() and
+   * DataOutInterface::write_vtu_main() by DataOutBase::write_vtu().
+   */
+  void write_vtu_footer (std::ostream &out);
+
+  /**
+   * This writes the main part for the xml based vtu file format. This routine
+   * is used internally together with DataOutInterface::write_vtu_header() and
+   * DataOutInterface::write_vtu_footer() by DataOutBase::write_vtu().
+   */
+  template <int dim, int spacedim>
+  void write_vtu_main (const std::vector<Patch<dim,spacedim> > &patches,
+                       const std::vector<std::string>          &data_names,
+                       const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                       const VtkFlags                          &flags,
+                       std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in SVG format.
+   *
+   * SVG (Scalable Vector Graphics) is an XML-based vector image format
+   * developed and maintained by the World Wide Web Consortium (W3C). This
+   * function conforms to the latest specification SVG 1.1, released on August
+   * 16, 2011. Controlling the graphic output is possible by setting or
+   * clearing the respective flags (see the SvgFlags struct). At present, this
+   * format only supports output for two-dimensional data, with values in the
+   * third direction taken from a data vector.
+   *
+   * For the output, each patch is subdivided into four triangles which are
+   * then written as polygons and filled with a linear color gradient. The
+   * arising coloring of the patches visualizes the data values at the
+   * vertices taken from the specified data vector. A colorbar can be drawn to
+   * encode the coloring.
+   *
+   * @note This function is so far only implemented for two dimensions with an
+   * additional dimension reserved for data information.
+   */
+  template <int spacedim>
+  void write_svg (const std::vector<Patch<2,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                  const SvgFlags                          &flags,
+                  std::ostream                            &out);
+
+  /**
+   * Write the given list of patches to the output stream in deal.II
+   * intermediate format. This is not a format understood by any other
+   * graphics program, but is rather a direct dump of the intermediate
+   * internal format used by deal.II. This internal format is generated by the
+   * various classes that can generate output using the DataOutBase class, for
+   * example from a finite element solution, and is then converted in the
+   * present class to the final graphics format.
+   *
+   * Note that the intermediate format is what its name suggests: a direct
+   * representation of internal data. It isn't standardized and will change
+   * whenever we change our internal representation. You can only expect to
+   * process files written in this format using the same version of deal.II
+   * that was used for writing.
+   *
+   * The reason why we offer to write out this intermediate format is that it
+   * can be read back into a deal.II program using the DataOutReader class,
+   * which is helpful in at least two contexts: First, this can be used to
+   * later generate graphical output in any other graphics format presently
+   * understood; this way, it is not necessary to know at run-time which
+   * output format is requested, or if multiple output files in different
+   * formats are needed. Secondly, in contrast to almost all other graphics
+   * formats, it is possible to merge several files that contain intermediate
+   * format data, and generate a single output file from it, which may be
+   * again in intermediate format or any of the final formats. This latter
+   * option is most helpful for parallel programs: as demonstrated in the
+   * step-17 example program, it is possible to let only one processor
+   * generate the graphical output for the entire parallel program, but this
+   * can become vastly inefficient if many processors are involved, because
+   * the load is no longer balanced. The way out is to let each processor
+   * generate intermediate graphical output for its chunk of the domain, and
+   * the later merge the different files into one, which is an operation that
+   * is much cheaper than the generation of the intermediate data.
+   *
+   * Intermediate format deal.II data is usually stored in files with the
+   * ending <tt>.d2</tt>.
+   */
+  template <int dim, int spacedim>
+  void write_deal_II_intermediate (
+    const std::vector<Patch<dim,spacedim> > &patches,
+    const std::vector<std::string>          &data_names,
+    const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+    const Deal_II_IntermediateFlags         &flags,
+    std::ostream                            &out);
+
+  /**
+   * Write the data in data_filter to a single HDF5 file containing both the
+   * mesh and solution values.
+   */
+  template <int dim, int spacedim>
+  void write_hdf5_parallel (const std::vector<Patch<dim,spacedim> > &patches,
+                            const DataOutFilter &data_filter,
+                            const std::string &filename,
+                            MPI_Comm comm);
+
+  /**
+   * Write the data in data_filter to HDF5 file(s). If write_mesh_file is
+   * false, the mesh data will not be written and the solution file will
+   * contain only the solution values. If write_mesh_file is true and the
+   * filenames are the same, the resulting file will contain both mesh data
+   * and solution values.
+   */
+  template <int dim, int spacedim>
+  void write_hdf5_parallel (const std::vector<Patch<dim,spacedim> > &patches,
+                            const DataOutFilter &data_filter,
+                            const bool write_mesh_file,
+                            const std::string &mesh_filename,
+                            const std::string &solution_filename,
+                            MPI_Comm comm);
+
+  /**
+   * DataOutFilter is an intermediate data format that reduces the amount of
+   * data that will be written to files. The object filled by this function
+   * can then later be used again to write data in a concrete file format;
+   * see, for example, DataOutBase::write_hdf5_parallel().
+   */
+  template <int dim, int spacedim>
+  void write_filtered_data (const std::vector<Patch<dim,spacedim> > &patches,
+                            const std::vector<std::string>          &data_names,
+                            const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                            DataOutFilter &filtered_data);
+
+  /**
+   * Given an input stream that contains data written by
+   * write_deal_II_intermediate(), determine the <tt>dim</tt> and
+   * <tt>spacedim</tt> template parameters with which that function was
+   * called, and return them as a pair of values.
+   *
+   * Note that this function eats a number of elements at the present position
+   * of the stream, and therefore alters it. In order to read from it using,
+   * for example, the DataOutReader class, you may wish to either reset the
+   * stream to its previous position, or close and reopen it.
+   */
+  std::pair<unsigned int, unsigned int>
+  determine_intermediate_format_dimensions (std::istream &input);
+
+  /**
+   * Return the OutputFormat value corresponding to the given string. If the
+   * string does not match any known format, an exception is thrown.
+   *
+   * The main purpose of this function is to allow a program to use any
+   * implemented output format without the need to extend the program's parser
+   * each time a new format is implemented.
+   *
+   * To get a list of presently available format names, e.g. to give it to the
+   * ParameterHandler class, use the function get_output_format_names().
+   */
+  OutputFormat parse_output_format (const std::string &format_name);
+
+  /**
+   * Return a list of implemented output formats. The different names are
+   * separated by vertical bar signs (<tt>`|'</tt>) as used by the
+   * ParameterHandler classes.
+   */
+  std::string get_output_format_names ();
+
+  /**
+   * Provide a function which tells us which suffix a file with a given output
+   * format usually has. At present the following formats are defined:
+   * <ul>
+   * <li> <tt>dx</tt>: <tt>.dx</tt>
+   * <li> <tt>ucd</tt>: <tt>.inp</tt>
+   * <li> <tt>gnuplot</tt>: <tt>.gnuplot</tt>
+   * <li> <tt>povray</tt>: <tt>.pov</tt>
+   * <li> <tt>eps</tt>: <tt>.eps</tt>
+   * <li> <tt>gmv</tt>: <tt>.gmv</tt>
+   * <li> <tt>tecplot</tt>: <tt>.dat</tt>
+   * <li> <tt>tecplot_binary</tt>: <tt>.plt</tt>
+   * <li> <tt>vtk</tt>: <tt>.vtk</tt>
+   * <li> <tt>vtu</tt>: <tt>.vtu</tt>
+   * <li> <tt>svg</tt>: <tt>.svg</tt>
+   * <li> <tt>deal_II_intermediate</tt>: <tt>.d2</tt>.
+   * </ul>
+   */
+  std::string default_suffix (const OutputFormat output_format);
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidDatasetSize,
+                  int, int,
+                  << "The number of points in this data set is " << arg1
+                  << ", but we expected " << arg2 << " in each space direction.");
+  /**
+   * An output function did not receive any patches for writing.
+   */
+  DeclException0 (ExcNoPatches);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcTecplotAPIError);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcErrorOpeningTecplotFile,
+                  char *,
+                  << "There was an error opening Tecplot file " << arg1
+                  << " for output");
+
+  //@}
+}
+
+
+
+
+/**
+ * This class is the interface to the functions in the DataOutBase namespace,
+ * as already its name might suggest. It does not offer much functionality
+ * apart from a way to access the implemented formats and a way to dynamically
+ * dispatch what output format to chose.
+ *
+ * This class is thought as a base class to classes actually generating data
+ * for output. It has two abstract virtual functions, get_patches() and
+ * get_dataset_names() produce the data which is actually needed. These are
+ * the only functions that need to be overloaded by a derived class.  In
+ * additional to that, it has a function for each output format supported by
+ * the underlying base class which gets the output data using these two
+ * virtual functions and passes them to the raw output functions.
+ *
+ * The purpose of this class is mainly two-fold: to support storing flags by
+ * which the output in the different output formats are controlled, and means
+ * to work with output in a way where output format, flags and other things
+ * are determined at run time. In addition to that it offers the abstract
+ * interface to derived classes briefly discussed above.
+ *
+ *
+ * <h3>Output flags</h3>
+ *
+ * The way we treat flags in this class is very similar to that used in the
+ * <tt>GridOut</tt> class. For detailed information on the why's and how's, as
+ * well as an example of programming, we refer to the documentation of that
+ * class.
+ *
+ * Basically, this class stores a set of flags for each output format
+ * supported by the underlying <tt>DataOutBase</tt> class. These are used
+ * whenever one of the <tt>write_*</tt> functions is used. By default, the
+ * values of these flags are set to reasonable start-ups, but in case you want
+ * to change them, you can create a structure holding the flags for one of the
+ * output formats and set it using the <tt>set_flags</tt> functions of this
+ * class to determine all future output the object might produce by that
+ * output format.
+ *
+ * For information on what parameters are supported by different output
+ * functions, please see the documentation of the <tt>DataOutBase</tt> class
+ * and its member classes.
+ *
+ *
+ * <h3>Run time selection of output parameters</h3>
+ *
+ * In the output flags classes, described above, many flags are defined for
+ * output in the different formats. In order to make them available to the
+ * input file handler class <tt>ParameterHandler</tt>, each of these has a
+ * function declaring these flags to the parameter handler and to read them
+ * back from an actual input file. In order to avoid that in user programs
+ * these functions have to be called for each available output format and the
+ * respective flag class, the present <tt>DataOutInterface</tt> class offers a
+ * function <tt>declare_parameters</tt> which calls the respective function of
+ * all known output format flags classes. The flags of each such format are
+ * packed together in a subsection in the input file. Likewise, there is a
+ * function <tt>parse_parameters</tt> which reads these parameters and stores
+ * them in the flags associated with this object (see above).
+ *
+ * Using these functions, you do not have to track which formats are presently
+ * implemented.
+ *
+ * Usage is as follows:
+ * @code
+ *                               // within function declaring parameters:
+ *   ...
+ *   prm.enter_subsection ("Output format options");
+ *     DataOutInterface<dim>::declare_parameters (prm);
+ *   prm.leave_subsection ();
+ *   ...
+ *
+ *
+ *                               // within function doing the output:
+ *   ...
+ *   DataOut<dim> out;
+ *   prm.enter_subsection ("Output format options");
+ *   out.parse_parameters (prm);
+ *   prm.leave_subsection ();
+ *   ...
+ * @endcode
+ * Note that in the present example, the class <tt>DataOut</tt> was used.
+ * However, any other class derived from <tt>DataOutInterface</tt> would work
+ * alike.
+ *
+ *
+ * <h3>Run time selection of formats</h3>
+ *
+ * This class, much like the <tt>GridOut</tt> class, has a set of functions
+ * providing a list of supported output formats, an <tt>enum</tt> denoting all
+ * these and a function to parse a string and return the respective
+ * <tt>enum</tt> value if it is a valid output format's name (actually, these
+ * functions are inherited from the base class). Finally, there is a function
+ * <tt>write</tt>, which takes a value of this <tt>enum</tt> and dispatches to
+ * one of the actual <tt>write_*</tt> functions depending on the output format
+ * selected by this value.
+ *
+ * The functions offering the different output format names are, respectively,
+ * <tt>default_suffix</tt>, <tt>parse_output_format</tt>, and
+ * <tt>get_output_format_names</tt>. They make the selection of output formats
+ * in parameter files much easier, and especially independent of the formats
+ * presently implemented. User programs need therefore not be changed whenever
+ * a new format is implemented.
+ *
+ * Additionally, objects of this class have a default format, which can be set
+ * by the parameter "Output format" of the parameter file. Within a program,
+ * this can be changed by the member function <tt>set_default_format</tt>.
+ * Using this default format, it is possible to leave the format selection
+ * completely to the parameter file. A suitable suffix for the output file
+ * name can be obtained by <tt>default_suffix</tt> without arguments.
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 1999
+ */
+template <int dim, int spacedim=dim>
+class DataOutInterface
+{
+public:
+  /**
+   * Constructor.
+   */
+  DataOutInterface ();
+
+  /**
+   * Destructor. Does nothing, but is declared virtual since this class has
+   * virtual functions.
+   */
+  virtual ~DataOutInterface ();
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in OpenDX
+   * format. See DataOutBase::write_dx.
+   */
+  void write_dx (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in EPS
+   * format. See DataOutBase::write_eps.
+   */
+  void write_eps (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in GMV
+   * format. See DataOutBase::write_gmv.
+   */
+  void write_gmv (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in GNUPLOT
+   * format. See DataOutBase::write_gnuplot.
+   */
+  void write_gnuplot (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in POVRAY
+   * format. See DataOutBase::write_povray.
+   */
+  void write_povray (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in Tecplot
+   * format. See DataOutBase::write_tecplot.
+   */
+  void write_tecplot (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it in the Tecplot binary
+   * output format. Note that the name of the output file must be specified
+   * through the TecplotFlags interface.
+   */
+  void write_tecplot_binary (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in UCD
+   * format for AVS. See DataOutBase::write_ucd.
+   */
+  void write_ucd (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in Vtk
+   * format. See DataOutBase::write_vtk.
+   *
+   * @note VTK is a legacy format and has largely been supplanted by the VTU
+   * format (an XML-structured version of VTK). In particular, VTU allows for
+   * the compression of data and consequently leads to much smaller file sizes
+   * that equivalent VTK files for large files. Since all visualization
+   * programs that support VTK also support VTU, you should consider using the
+   * latter file format instead, by using the write_vtu() function.
+   */
+  void write_vtk (std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in Vtu
+   * (VTK's XML) format. See DataOutBase::write_vtu.
+   *
+   * Some visualization programs, such as ParaView, can read several separate
+   * VTU files to parallelize visualization. In that case, you need a
+   * <code>.pvtu</code> file that describes which VTU files form a group. The
+   * DataOutInterface::write_pvtu_record() function can generate such a master
+   * record. Likewise, DataOutInterface::write_visit_record() does the same
+   * for older versions of VisIt (although VisIt can also read
+   * <code>pvtu</code> records since version 2.5.1). Finally,
+   * DataOutInterface::write_pvd_record() can be used to group together the
+   * files that jointly make up a time dependent simulation.
+   */
+  void write_vtu (std::ostream &out) const;
+
+  /**
+   * Collective MPI call to write the solution from all participating nodes
+   * (those in the given communicator) to a single compressed .vtu file on a
+   * shared file system.  The communicator can be a sub communicator of the
+   * one used by the computation.  This routine uses MPI I/O to achieve high
+   * performance on parallel filesystems. Also see
+   * DataOutInterface::write_vtu().
+   */
+  void write_vtu_in_parallel (const char *filename, MPI_Comm comm) const;
+
+  /**
+   * Some visualization programs, such as ParaView, can read several separate
+   * VTU files to parallelize visualization. In that case, you need a
+   * <code>.pvtu</code> file that describes which VTU files (written, for
+   * example, through the write_vtu() function) form a group. The current
+   * function can generate such a master record.
+   *
+   * The file so written contains a list of (scalar or vector) fields whose
+   * values are described by the individual files that comprise the set of
+   * parallel VTU files along with the names of these files. This function
+   * gets the names and types of fields through the get_patches() function of
+   * this class like all the other write_xxx() functions. The second argument
+   * to this function specifies the names of the files that form the parallel
+   * set.
+   *
+   * @note See DataOutBase::write_vtu for writing each piece. Also note that
+   * only one parallel process needs to call the current function, listing the
+   * names of the files written by all parallel processes.
+   *
+   * @note The use of this function is explained in step-40.
+   *
+   * @note In order to tell Paraview to group together multiple
+   * <code>pvtu</code> files that each describe one time step of a time
+   * dependent simulation, see the DataOutInterface::write_pvd_record()
+   * function.
+   *
+   * @note Older versions of VisIt (before 2.5.1), can not read
+   * <code>pvtu</code> records. However, it can read visit records as written
+   * by the write_visit_record() function.
+   */
+  void write_pvtu_record (std::ostream &out,
+                          const std::vector<std::string> &piece_names) const;
+
+  /**
+   * In ParaView it is possible to visualize time-dependent data tagged with
+   * the current integration time of a time dependent simulation. To use this
+   * feature you need a <code>.pvd</code> file that describes which VTU or
+   * PVTU file belongs to which timestep. This function writes a file that
+   * provides this mapping, i.e., it takes a list of pairs each of which
+   * indicates a particular time instant and the corresponding file that
+   * contains the graphical data for this time instant.
+   *
+   * A typical use case, in program that computes a time dependent solution,
+   * would be the following (<code>time</code> and <code>time_step</code> are
+   * member variables of the class with types <code>double</code> and
+   * <code>unsigned int</code>, respectively; the variable
+   * <code>times_and_names</code> is of type
+   * <code>std::vector@<std::pair@<double,std::string@> @></code>):
+   *
+   * @code
+   *  template <int dim>
+   *  void MyEquation<dim>::output_results () const
+   *  {
+   *    DataOut<dim> data_out;
+   *
+   *    data_out.attach_dof_handler (dof_handler);
+   *    data_out.add_data_vector (solution, "U");
+   *    data_out.build_patches ();
+   *
+   *    const std::string filename = "solution-" +
+   *                                 Utilities::int_to_string (timestep_number, 3) +
+   *                                 ".vtu";
+   *    std::ofstream output (filename.c_str());
+   *    data_out.write_vtu (output);
+   *
+   *    times_and_names.push_back (std::pair<double,std::string> (time, filename));
+   *    std::ofstream pvd_output ("solution.pvd");
+   *    data_out.write_pvd_record (pvd_output, times_and_names);
+   *  }
+   * @endcode
+   *
+   * @note See DataOutBase::write_vtu or DataOutInterface::write_pvtu_record
+   * for writing solutions at each timestep.
+   *
+   * @note The second element of each pair, i.e., the file in which the
+   * graphical data for each time is stored, may itself be again a file that
+   * references other files. For example, it could be the name for a
+   * <code>.pvtu</code> file that references multiple parts of a parallel
+   * computation.
+   *
+   * @author Marco Engelhard, 2012
+   */
+  void write_pvd_record (std::ostream &out,
+                         const std::vector<std::pair<double,std::string> >  &times_and_names) const;
+
+  /**
+   * This function is the exact equivalent of the write_pvtu_record() function
+   * but for older versions of the VisIt visualization program and for one
+   * visualization graph (or one time step only). See there for the purpose of
+   * this function.
+   *
+   * This function is documented in the "Creating a master file for parallel"
+   * section (section 5.7) of the "Getting data into VisIt" report that can be
+   * found here:
+   * https://wci.llnl.gov/codes/visit/2.0.0/GettingDataIntoVisIt2.0.0.pdf
+   */
+  void write_visit_record (std::ostream &out,
+                           const std::vector<std::string> &piece_names) const;
+
+  /**
+   * This function is equivalent to the write_visit_record() above but for
+   * multiple time steps. Here is an example of how the function would be
+   * used:
+   * @code
+   *  DataOut<dim> data_out;
+   *
+   *  const unsigned int number_of_time_steps = 3;
+   *  std::vector<std::vector<std::string > > piece_names(number_of_time_steps);
+   *
+   *  piece_names[0].push_back("subdomain_01.time_step_0.vtk");
+   *  piece_names[0].push_back("subdomain_02.time_step_0.vtk");
+   *
+   *  piece_names[1].push_back("subdomain_01.time_step_1.vtk");
+   *  piece_names[1].push_back("subdomain_02.time_step_1.vtk");
+   *
+   *  piece_names[2].push_back("subdomain_01.time_step_2.vtk");
+   *  piece_names[2].push_back("subdomain_02.time_step_2.vtk");
+   *
+   *  std::ofstream visit_output ("master_file.visit");
+   *
+   *  data_out.write_visit_record(visit_output, piece_names);
+   * @endcode
+   *
+   * This function is documented in the "Creating a master file for parallel"
+   * section (section 5.7) of the "Getting data into VisIt" report that can be
+   * found here:
+   * https://wci.llnl.gov/codes/visit/2.0.0/GettingDataIntoVisIt2.0.0.pdf
+   */
+  void write_visit_record (std::ostream &out,
+                           const std::vector<std::vector<std::string> > &piece_names) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in SVG
+   * format. See DataOutBase::write_svg.
+   */
+  void write_svg(std::ostream &out) const;
+
+  /**
+   * Obtain data through get_patches() and write it to <tt>out</tt> in deal.II
+   * intermediate format. See DataOutBase::write_deal_II_intermediate.
+   *
+   * Note that the intermediate format is what its name suggests: a direct
+   * representation of internal data. It isn't standardized and will change
+   * whenever we change our internal representation. You can only expect to
+   * process files written in this format using the same version of deal.II
+   * that was used for writing.
+   */
+  void write_deal_II_intermediate (std::ostream &out) const;
+
+  /**
+   * Create an XDMFEntry based on the data in the data_filter. This assumes
+   * the mesh and solution data were written to a single file. See
+   * write_xdmf_file() for an example of usage.
+   */
+  XDMFEntry create_xdmf_entry (const DataOutBase::DataOutFilter &data_filter,
+                               const std::string &h5_filename,
+                               const double cur_time,
+                               MPI_Comm comm) const;
+
+  /**
+   * Create an XDMFEntry based on the data in the data_filter. This assumes
+   * the mesh and solution data were written to separate files. See
+   * write_xdmf_file() for an example of usage.
+   */
+  XDMFEntry create_xdmf_entry (const DataOutBase::DataOutFilter &data_filter,
+                               const std::string &h5_mesh_filename,
+                               const std::string &h5_solution_filename,
+                               const double cur_time,
+                               MPI_Comm comm) const;
+
+  /**
+   * Write an XDMF file based on the provided vector of XDMFEntry objects.
+   * Below is an example of how to use this function with HDF5 and the
+   * DataOutFilter:
+   *
+   * @code
+   * DataOutBase::DataOutFilter   data_filter(DataOutBase::DataOutFilterFlags(true, true));
+   * std::vector<XDMFEntry>       xdmf_entries;
+   * // Filter the data and store it in data_filter
+   * data_out.write_filtered_data(data_filter);
+   * // Write the filtered data to HDF5
+   * data_out.write_hdf5_parallel(data_filter, "solution.h5", MPI_COMM_WORLD);
+   * // Create an XDMF entry detailing the HDF5 file
+   * new_xdmf_entry = data_out.create_xdmf_entry(data_filter, "solution.h5", simulation_time, MPI_COMM_WORLD);
+   * // Add the XDMF entry to the list
+   * xdmf_entries.push_back(new_xdmf_entry);
+   * // Create an XDMF file from all stored entries
+   * data_out.write_xdmf_file(xdmf_entries, "solution.xdmf", MPI_COMM_WORLD);
+   * @endcode
+   */
+  void write_xdmf_file (const std::vector<XDMFEntry> &entries,
+                        const std::string &filename,
+                        MPI_Comm comm) const;
+
+  /**
+   * Write the data in data_filter to a single HDF5 file containing both the
+   * mesh and solution values. Below is an example of how to use this function
+   * with the DataOutFilter:
+   *
+   * @code
+   * DataOutBase::DataOutFilter   data_filter(DataOutBase::DataOutFilterFlags(true, true));
+   * // Filter the data and store it in data_filter
+   * data_out.write_filtered_data(data_filter);
+   * // Write the filtered data to HDF5
+   * data_out.write_hdf5_parallel(data_filter, "solution.h5", MPI_COMM_WORLD);
+   * @endcode
+   */
+  void write_hdf5_parallel (const DataOutBase::DataOutFilter &data_filter,
+                            const std::string &filename,
+                            MPI_Comm comm) const;
+
+  /**
+   * Write the data in data_filter to HDF5 file(s). If write_mesh_file is
+   * false, the mesh data will not be written and the solution file will
+   * contain only the solution values. If write_mesh_file is true and the
+   * filenames are the same, the resulting file will contain both mesh data
+   * and solution values.
+   */
+  void write_hdf5_parallel (const DataOutBase::DataOutFilter &data_filter,
+                            const bool write_mesh_file,
+                            const std::string &mesh_filename,
+                            const std::string &solution_filename,
+                            MPI_Comm comm) const;
+
+  /**
+   * DataOutFilter is an intermediate data format that reduces the amount of
+   * data that will be written to files. The object filled by this function
+   * can then later be used again to write data in a concrete file format;
+   * see, for example, DataOutBase::write_hdf5_parallel().
+   */
+  void write_filtered_data (DataOutBase::DataOutFilter &filtered_data) const;
+
+
+  /**
+   * Write data and grid to <tt>out</tt> according to the given data format.
+   * This function simply calls the appropriate <tt>write_*</tt> function. If
+   * no output format is requested, the <tt>default_format</tt> is written.
+   *
+   * An error occurs if no format is provided and the default format is
+   * <tt>default_format</tt>.
+   */
+  void write (std::ostream       &out,
+              const DataOutBase::OutputFormat  output_format = DataOutBase::default_format) const;
+
+  /**
+   * Set the default format. The value set here is used anytime, output for
+   * format <tt>default_format</tt> is requested.
+   */
+  void set_default_format (const DataOutBase::OutputFormat default_format);
+
+
+  /**
+   * Set the flags to be used for output. This method expects <tt>flags</tt>
+   * to be a member of one of the child classes of <tt>OutputFlagsBase</tt>.
+   */
+  template<typename FlagType>
+  void set_flags (const FlagType &flags);
+
+
+  /**
+   * A function that returns the same string as the respective function in the
+   * base class does; the only exception being that if the parameter is
+   * omitted, then the value for the present default format is returned, i.e.
+   * the correct suffix for the format that was set through
+   * set_default_format() or parse_parameters() before calling this function.
+   */
+  std::string
+  default_suffix (const DataOutBase::OutputFormat output_format = DataOutBase::default_format) const;
+
+  /**
+   * Declare parameters for all output formats by declaring subsections within
+   * the parameter file for each output format and call the respective
+   * <tt>declare_parameters</tt> functions of the flag classes for each output
+   * format.
+   *
+   * Some of the declared subsections may not contain entries, if the
+   * respective format does not export any flags.
+   *
+   * Note that the top-level parameters denoting the number of subdivisions
+   * per patch and the output format are not declared, since they are only
+   * passed to virtual functions and are not stored inside objects of this
+   * type. You have to declare them yourself.
+   */
+  static void declare_parameters (ParameterHandler &prm);
+
+  /**
+   * Read the parameters declared in declare_parameters() and set the flags
+   * for the output formats accordingly.
+   *
+   * The flags thus obtained overwrite all previous contents of the flag
+   * objects as default-constructed or set by the set_flags() function.
+   */
+  void parse_parameters (ParameterHandler &prm);
+
+  /**
+   * Return an estimate for the memory consumption, in bytes, of this object.
+   * This is not exact (but will usually be close) because calculating the
+   * memory usage of trees (e.g., <tt>std::map</tt>) is difficult.
+   */
+  std::size_t memory_consumption () const;
+
+protected:
+  /**
+   * This is the abstract function through which derived classes propagate
+   * preprocessed data in the form of Patch structures (declared in the base
+   * class DataOutBase) to the actual output function. You need to overload
+   * this function to allow the output functions to know what they shall
+   * print.
+   */
+  virtual
+  const std::vector<DataOutBase::Patch<dim,spacedim> > &
+  get_patches () const = 0;
+
+  /**
+   * Abstract virtual function through which the names of data sets are
+   * obtained by the output functions of the base class.
+   */
+  virtual
+  std::vector<std::string>
+  get_dataset_names () const = 0;
+
+  /**
+   * This functions returns information about how the individual components of
+   * output files that consist of more than one data set are to be
+   * interpreted.
+   *
+   * It returns a list of index pairs and corresponding name indicating which
+   * components of the output are to be considered vector-valued rather than
+   * just a collection of scalar data. The index pairs are inclusive; for
+   * example, if we have a Stokes problem in 2d with components (u,v,p), then
+   * the corresponding vector data range should be (0,1), and the returned
+   * list would consist of only a single element with a tuple such as
+   * (0,1,"velocity").
+   *
+   * Since some of the derived classes do not know about vector data, this
+   * function has a default implementation that simply returns an empty
+   * string, meaning that all data is to be considered a collection of scalar
+   * fields.
+   */
+  virtual
+  std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+  get_vector_data_ranges () const;
+
+  /**
+   * The default number of subdivisions for patches. This is filled by
+   * parse_parameters() and should be obeyed by build_patches() in derived
+   * classes.
+   */
+  unsigned int default_subdivisions;
+
+private:
+  /**
+   * Standard output format.  Use this format, if output format default_format
+   * is requested. It can be changed by the <tt>set_format</tt> function or in
+   * a parameter file.
+   */
+  DataOutBase::OutputFormat default_fmt;
+
+  /**
+   * Flags to be used upon output of OpenDX data. Can be changed by using the
+   * <tt>set_flags</tt> function.
+   */
+  DataOutBase::DXFlags     dx_flags;
+
+  /**
+   * Flags to be used upon output of UCD data. Can be changed by using the
+   * <tt>set_flags</tt> function.
+   */
+  DataOutBase::UcdFlags     ucd_flags;
+
+  /**
+   * Flags to be used upon output of GNUPLOT data. Can be changed by using the
+   * <tt>set_flags</tt> function.
+   */
+  DataOutBase::GnuplotFlags gnuplot_flags;
+
+  /**
+   * Flags to be used upon output of POVRAY data. Can be changed by using the
+   * <tt>set_flags</tt> function.
+   */
+  DataOutBase::PovrayFlags povray_flags;
+
+  /**
+   * Flags to be used upon output of EPS data in one space dimension. Can be
+   * changed by using the <tt>set_flags</tt> function.
+   */
+  DataOutBase::EpsFlags     eps_flags;
+
+  /**
+   * Flags to be used upon output of gmv data in one space dimension. Can be
+   * changed by using the <tt>set_flags</tt> function.
+   */
+  DataOutBase::GmvFlags     gmv_flags;
+
+  /**
+   * Flags to be used upon output of Tecplot data in one space dimension. Can
+   * be changed by using the <tt>set_flags</tt> function.
+   */
+  DataOutBase::TecplotFlags tecplot_flags;
+
+  /**
+   * Flags to be used upon output of vtk data in one space dimension. Can be
+   * changed by using the <tt>set_flags</tt> function.
+   */
+  DataOutBase::VtkFlags     vtk_flags;
+
+  /**
+   * Flags to be used upon output of svg data in one space dimension. Can be
+   * changed by using the <tt>set_flags</tt> function.
+   */
+  DataOutBase::SvgFlags     svg_flags;
+
+  /**
+   * Flags to be used upon output of deal.II intermediate data in one space
+   * dimension. Can be changed by using the <tt>set_flags</tt> function.
+   */
+  DataOutBase::Deal_II_IntermediateFlags     deal_II_intermediate_flags;
+};
+
+
+
+/**
+ * A class that is used to read data written in deal.II intermediate format
+ * back in, so that it can be written out in any of the other supported
+ * graphics formats. This class has two main purposes:
+ *
+ * The first use of this class is so that application programs can defer the
+ * decision of which graphics format to use until after the program has been
+ * run. The data is written in intermediate format into a file, and later on
+ * it can then be converted into any graphics format you wish. This may be
+ * useful, for example, if you want to convert it to gnuplot format to get a
+ * quick glimpse and later on want to convert it to OpenDX format as well to
+ * get a high quality version of the data. The present class allows to read
+ * this intermediate format back into the program, and allows it to be written
+ * in any other supported format using the relevant functions of the base
+ * class.
+ *
+ * The second use is mostly useful in parallel programs: rather than having
+ * one central process generate the graphical output for the entire program,
+ * one can let each process generate the graphical data for the cells it owns,
+ * and write it into a separate file in intermediate format. Later on, all
+ * these intermediate files can then be read back in and merged together, a
+ * process that is fast compared to generating the data in the first place.
+ * The use of the intermediate format is mostly because it allows separate
+ * files to be merged, while this is almost impossible once the data has been
+ * written out in any of the supported established graphics formats.
+ *
+ * This second use scenario is explained in some detail in the step-18 example
+ * program.
+ *
+ * Both these applications are implemented in the step-19 example program.
+ * There, a slight complication is also explained: in order to read data back
+ * into this object, you have to know the template parameters for the space
+ * dimension which were used when writing the data. If this knowledge is
+ * available at compile time, then this is no problem. However, if it is not
+ * (such as in a simple format converter), then it needs to be figured out at
+ * run time, even though the compiler already needs it at compile time. A way
+ * around using the DataOutBase::determine_intermediate_format_dimensions()
+ * function is explained in step-19.
+ *
+ * Note that the intermediate format is what its name suggests: a direct
+ * representation of internal data. It isn't standardized and will change
+ * whenever we change our internal representation. You can only expect to
+ * process files written in this format using the same version of deal.II that
+ * was used for writing.
+ *
+ * @ingroup input output
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, int spacedim=dim>
+class DataOutReader : public DataOutInterface<dim,spacedim>
+{
+public:
+  /**
+   * Read a sequence of patches as written previously by
+   * <tt>DataOutBase::write_deal_II_intermediate</tt> and store them in the
+   * present object. This overwrites any previous content.
+   */
+  void read (std::istream &in);
+
+  /**
+   * This function can be used to merge the patches read by the other object
+   * into the patches that this present object stores. This is sometimes handy
+   * if one has, for example, a domain decomposition algorithm where each
+   * block is represented by a DoFHandler of its own, but one wants to output
+   * the solution on all the blocks at the same time. Alternatively, it may
+   * also be used for parallel programs, where each process only generates
+   * output for its share of the cells, even if all processes can see all
+   * cells.
+   *
+   * For this to work, the input files for the present object and the given
+   * argument need to have the same number of output vectors, and they need to
+   * use the same number of subdivisions per patch. The output will probably
+   * look rather funny if patches in both objects overlap in space.
+   *
+   * If you call read() for this object after merging in patches, the previous
+   * state is overwritten, and the merged-in patches are lost.
+   *
+   * This function will fail if either this or the other object did not yet
+   * set up any patches.
+   *
+   * The use of this function is demonstrated in step-19.
+   */
+  void merge (const DataOutReader<dim,spacedim> &other);
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcNoPatches);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcIncompatibleDatasetNames);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcIncompatiblePatchLists);
+  /**
+   * Exception
+   */
+  DeclException4 (ExcIncompatibleDimensions,
+                  int, int, int, int,
+                  << "Either the dimensions <" << arg1 << "> and <"
+                  << arg2 << "> or the space dimensions <"
+                  << arg3 << "> and <" << arg4
+                  << "> do not match!");
+
+protected:
+  /**
+   * This is the function through which this class propagates preprocessed
+   * data in the form of Patch structures (declared in the base class
+   * DataOutBase) to the actual output function.
+   *
+   * It returns the patches as read the last time a stream was given to the
+   * read() function.
+   */
+  virtual const std::vector<dealii::DataOutBase::Patch<dim,spacedim> > &
+  get_patches () const;
+
+  /**
+   * Abstract virtual function through which the names of data sets are
+   * obtained by the output functions of the base class.
+   *
+   * Return the names of the variables as read the last time we read a file.
+   */
+  virtual std::vector<std::string> get_dataset_names () const;
+
+  /**
+   * This functions returns information about how the individual components of
+   * output files that consist of more than one data set are to be
+   * interpreted.
+   *
+   * It returns a list of index pairs and corresponding name indicating which
+   * components of the output are to be considered vector-valued rather than
+   * just a collection of scalar data. The index pairs are inclusive; for
+   * example, if we have a Stokes problem in 2d with components (u,v,p), then
+   * the corresponding vector data range should be (0,1), and the returned
+   * list would consist of only a single element with a tuple such as
+   * (0,1,"velocity").
+   *
+   * Since some of the derived classes do not know about vector data, this
+   * function has a default implementation that simply returns an empty
+   * string, meaning that all data is to be considered a collection of scalar
+   * fields.
+   */
+  virtual
+  std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+  get_vector_data_ranges () const;
+
+private:
+  /**
+   * Arrays holding the set of patches as well as the names of output
+   * variables, all of which we read from an input stream.
+   */
+  std::vector<dealii::DataOutBase::Patch<dim,spacedim> > patches;
+  std::vector<std::string> dataset_names;
+
+  /**
+   * Information about whether certain components of the output field are to
+   * be considered vectors.
+   */
+  std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+  vector_data_ranges;
+};
+
+
+
+
+/**
+ * A class to store relevant data to use when writing the light data XDMF
+ * file. This should only contain valid data on the root node which writes the
+ * files, the rest of the nodes will have valid set to false. The XDMF file in
+ * turn points to heavy data files (such as HDF5) where the actual simulation
+ * data is stored. This allows flexibility in arranging the data, and also
+ * allows the mesh to be separated from the the point data.
+ */
+class XDMFEntry
+{
+private:
+  /// Whether this entry is valid and contains data to be written
+  bool                                valid;
+  /// The name of the HDF5 heavy data solution and/or mesh files this entry references
+  std::string                         h5_sol_filename, h5_mesh_filename;
+  /// The simulation time associated with this entry
+  double                              entry_time;
+  /// The number of nodes, cells and dimensionality associated with the data
+  unsigned int                        num_nodes, num_cells, dimension;
+  /// The attributes associated with this entry and their dimension
+  std::map<std::string, unsigned int> attribute_dims;
+
+  /// Small function to create indentation for XML file
+  std::string indent(const unsigned int indent_level) const
+  {
+    std::string res = "";
+    for (unsigned int i=0; i<indent_level; ++i) res += "  ";
+    return res;
+  }
+
+public:
+  XDMFEntry() : valid(false) {};
+  XDMFEntry(const std::string filename, const double time, const unsigned int nodes, const unsigned int cells, const unsigned int dim) : valid(true), h5_sol_filename(filename), h5_mesh_filename(filename), entry_time(time), num_nodes(nodes), num_cells(cells), dimension(dim) {};
+  XDMFEntry(const std::string mesh_filename, const std::string solution_filename, const double time, const unsigned int nodes, const unsigned int cells, const unsigned int dim) : valid(true), h5_sol_filename(solution_filename), h5_mesh_filename(mesh_filename), entry_time(time), num_nodes(nodes), num_cells(cells), dimension(dim) {};
+
+  /**
+   * Record an attribute and associated dimensionality.
+   */
+  void add_attribute(const std::string &attr_name, const unsigned int dimension)
+  {
+    attribute_dims[attr_name] = dimension;
+  }
+
+  /**
+   * Read or write the data of this object for serialization
+   */
+  template <class Archive>
+  void serialize(Archive &ar, const unsigned int /*version*/)
+  {
+    ar &valid
+    &h5_sol_filename
+    &h5_mesh_filename
+    &entry_time
+    &num_nodes
+    &num_cells
+    &dimension
+    &attribute_dims;
+  }
+
+  /// Get the XDMF content associated with this entry.
+  /// If the entry is not valid, this returns an empty string.
+  std::string get_xdmf_content(const unsigned int indent_level) const;
+};
+
+
+
+/* -------------------- inline functions ------------------- */
+
+namespace DataOutBase
+{
+  inline
+  bool
+  EpsFlags::RgbValues::is_grey () const
+  {
+    return (red == green) && (red == blue);
+  }
+
+
+  /* -------------------- template functions ------------------- */
+
+  /**
+   * Output operator for an object of type <tt>DataOutBase::Patch</tt>. This
+   * operator dumps the intermediate graphics format represented by the patch
+   * data structure. It may later be converted into regular formats for a
+   * number of graphics programs.
+   *
+   * @author Wolfgang Bangerth, 2005
+   */
+  template <int dim, int spacedim>
+  std::ostream &
+  operator << (std::ostream                           &out,
+               const Patch<dim,spacedim> &patch);
+
+
+
+  /**
+   * Input operator for an object of type <tt>DataOutBase::Patch</tt>. This
+   * operator reads the intermediate graphics format represented by the patch
+   * data structure, using the format in which it was written using the
+   * operator<<.
+   *
+   * @author Wolfgang Bangerth, 2005
+   */
+  template <int dim, int spacedim>
+  std::istream &
+  operator >> (std::istream                     &in,
+               Patch<dim,spacedim> &patch);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/derivative_form.h b/include/deal.II/base/derivative_form.h
new file mode 100644
index 0000000..5ed26c6
--- /dev/null
+++ b/include/deal.II/base/derivative_form.h
@@ -0,0 +1,484 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__derivative_form_h
+#define dealii__derivative_form_h
+
+#include <deal.II/base/tensor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class represents the (tangential) derivatives of a function $ f:
+ * {\mathbb R}^{\text{dim}} \rightarrow {\mathbb R}^{\text{spacedim}}$. Such
+ * functions are always used to map the reference dim-dimensional cell into
+ * spacedim-dimensional space. For such objects, the first derivative of the
+ * function is a linear map from ${\mathbb R}^{\text{dim}}$ to ${\mathbb
+ * R}^{\text{spacedim}}$, i.e., it can be represented as a matrix in ${\mathbb
+ * R}^{\text{spacedim}\times \text{dim}}$. This makes sense since one would
+ * represent the first derivative, $\nabla f(\mathbf x)$ with $\mathbf x\in
+ * {\mathbb R}^{\text{dim}}$, in such a way that the directional derivative in
+ * direction $\mathbf d\in {\mathbb R}^{\text{dim}}$ so that
+ * @f{align*}{
+ *   \nabla f(\mathbf x) \mathbf d
+ *   = \lim_{\varepsilon\rightarrow 0}
+ *     \frac{f(\mathbf x + \varepsilon \mathbf d) - f(\mathbf x)}{\varepsilon},
+ * @f}
+ * i.e., one needs to be able to multiply the matrix $\nabla f(\mathbf x)$ by
+ * a vector in ${\mathbb R}^{\text{dim}}$, and the result is a difference of
+ * function values, which are in ${\mathbb R}^{\text{spacedim}}$.
+ * Consequently, the matrix must be of size $\text{spacedim}\times\text{dim}$.
+ *
+ * Similarly, the second derivative is a bilinear map from  ${\mathbb
+ * R}^{\text{dim}} \times  {\mathbb R}^{\text{dim}}$ to ${\mathbb
+ * R}^{\text{spacedim}}$, which one can think of a rank-3 object of size
+ * $\text{spacedim}\times\text{dim}\times\text{dim}$.
+ *
+ * In deal.II we represent these derivatives using objects of type
+ * DerivativeForm@<1,dim,spacedim,Number@>,
+ * DerivativeForm@<2,dim,spacedim,Number@> and so on.
+ *
+ * @author Sebastian Pauletti, 2011, Luca Heltai, 2015
+ */
+template <int order, int dim, int spacedim, typename Number=double>
+class DerivativeForm
+{
+public:
+  /**
+   * Constructor. Initialize all entries to zero.
+   */
+  DerivativeForm ();
+
+  /**
+   * Constructor from a tensor.
+   */
+  DerivativeForm (const Tensor<order+1,dim,Number> &);
+
+  /**
+   * Read-Write access operator.
+   */
+  Tensor<order,dim,Number> &operator [] (const unsigned int i);
+
+  /**
+   * Read-only access operator.
+   */
+  const Tensor<order,dim,Number> &operator [] (const unsigned int i) const;
+
+  /**
+   * Assignment operator.
+   */
+  DerivativeForm   &operator = (const DerivativeForm <order, dim, spacedim, Number> &);
+
+
+  /**
+   * Assignment operator.
+   */
+  DerivativeForm   &operator = (const Tensor<order+1,dim, Number> &);
+
+  /**
+   * Assignment operator.
+   */
+  DerivativeForm   &operator = (const Tensor<1,dim, Number> &);
+
+  /**
+   * Converts a DerivativeForm <order,dim,dim> to Tensor<order+1,dim,Number>.
+   * In particular, if order==1 and the derivative is the Jacobian of F, then
+   * Tensor[i] = grad(F^i).
+   */
+  operator Tensor<order+1,dim,Number>() const;
+
+  /**
+   * Converts a DerivativeForm <1, dim, 1> to Tensor<1,dim,Number>.
+   */
+  operator Tensor<1,dim,Number>() const;
+
+  /**
+   * Return the transpose of a rectangular DerivativeForm, that is to say
+   * viewed as a two dimensional matrix.
+   */
+  DerivativeForm<1, spacedim, dim, Number> transpose () const;
+
+
+  /**
+   * Computes the volume element associated with the jacobian of the
+   * transformation F. That is to say if $DF$ is square, it computes
+   * $\det(DF)$, in case DF is not square returns $\sqrt(\det(DF^{t} * DF))$.
+   */
+  double determinant () const;
+
+
+  /**
+   * Assuming (*this) stores the jacobian of the mapping F, it computes its
+   * covariant matrix, namely $DF*G^{-1}$, where $G = DF^{t}*DF$. If $DF$ is
+   * square, covariant from gives $DF^{-t}$.
+   */
+  DerivativeForm<1, dim, spacedim, Number> covariant_form() const;
+
+
+
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  static std::size_t memory_consumption ();
+
+  /**
+   * Exception.
+   */
+  DeclException1 (ExcInvalidTensorIndex,
+                  int,
+                  << "Invalid DerivativeForm index " << arg1);
+
+
+private:
+  /**
+   * Auxiliary function that computes (*this) * T^{t}
+   */
+  DerivativeForm<1, dim, spacedim, Number> times_T_t (Tensor<2,dim,Number> T) const;
+
+
+private:
+  /**
+   * Array of tensors holding the subelements.
+   */
+  Tensor<order,dim,Number> tensor[spacedim];
+
+
+};
+
+
+/*--------------------------- Inline functions -----------------------------*/
+
+#ifndef DOXYGEN
+
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<order, dim, spacedim, Number>::DerivativeForm  ()
+{
+// default constructor. not specifying an initializer list calls
+// the default constructor of the subobjects, which initialize them
+// selves. therefore, the tensor array  is set to zero this way
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<order, dim, spacedim, Number>::DerivativeForm(const Tensor<order+1,dim,Number> &T)
+{
+  Assert( (dim == spacedim),
+          ExcMessage("Only allowed for forms with dim==spacedim."));
+  if (dim == spacedim)
+    for (unsigned int j=0; j<dim; ++j)
+      (*this)[j] = T[j];
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<order, dim, spacedim, Number> &
+DerivativeForm<order, dim, spacedim, Number>::
+operator = (const DerivativeForm<order, dim, spacedim, Number> &ta)
+{
+  for (unsigned int j=0; j<spacedim; ++j)
+    (*this)[j] = ta[j];
+  return *this;
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<order, dim, spacedim, Number> &DerivativeForm<order, dim, spacedim, Number>::
+operator = (const Tensor<order+1,dim,Number> &ta)
+{
+  Assert( (dim == spacedim),
+          ExcMessage("Only allowed when dim==spacedim."));
+
+  if (dim == spacedim)
+    for (unsigned int j=0; j<dim; ++j)
+      (*this)[j] = ta[j];
+  return *this;
+
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<order, dim, spacedim, Number> &DerivativeForm<order, dim, spacedim, Number>::
+operator = (const Tensor<1,dim,Number> &T)
+{
+  Assert( (1 == spacedim) && (order==1),
+          ExcMessage("Only allowed for spacedim==1 and order==1."));
+
+  (*this)[0] = T;
+
+  return *this;
+
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+Tensor<order,dim,Number> &DerivativeForm<order, dim, spacedim, Number>::
+operator[] (const unsigned int i)
+{
+  Assert (i<spacedim, ExcIndexRange(i, 0, spacedim));
+
+  return tensor[i];
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+const Tensor<order,dim,Number> &DerivativeForm<order, dim, spacedim, Number>::
+operator[] (const unsigned int i) const
+{
+  Assert (i<spacedim, ExcIndexRange(i, 0, spacedim));
+
+  return tensor[i];
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<order, dim, spacedim, Number>::operator Tensor<1,dim,Number>() const
+{
+  Assert( (1 == spacedim) && (order==1),
+          ExcMessage("Only allowed for spacedim==1."));
+
+  return (*this)[0];
+
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<order, dim, spacedim, Number>::operator Tensor<order+1,dim,Number>() const
+{
+  Assert( (dim == spacedim),
+          ExcMessage("Only allowed when dim==spacedim."));
+
+  Tensor<order+1,dim,Number> t;
+
+  if (dim == spacedim)
+    for (unsigned int j=0; j<dim; ++j)
+      t[j] = (*this)[j];
+
+  return t;
+
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<1,spacedim,dim,Number>
+DerivativeForm<order,dim,spacedim,Number>::
+transpose () const
+{
+  Assert(order==1, ExcMessage("Only for rectangular DerivativeForm."));
+  DerivativeForm<1,spacedim,dim,Number> tt;
+
+  for (unsigned int i=0; i<spacedim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      tt[j][i] = (*this)[i][j];
+
+  return tt;
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<1, dim, spacedim,Number>
+DerivativeForm<order,dim,spacedim,Number>::times_T_t (Tensor<2,dim,Number> T) const
+{
+  Assert( order==1, ExcMessage("Only for order == 1."));
+  DerivativeForm<1,dim, spacedim,Number> dest;
+  for (unsigned int i=0; i<spacedim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      dest[i][j] = (*this)[i] * T[j];
+
+  return dest;
+}
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+double
+DerivativeForm<order,dim,spacedim,Number>::determinant () const
+{
+  Assert( order==1, ExcMessage("Only for order == 1."));
+  if (dim == spacedim)
+    {
+      Tensor<2,dim,Number> T = (Tensor<2,dim,Number>)( (*this) );
+      return dealii::determinant(T);
+    }
+  else
+    {
+      Assert( spacedim>dim, ExcMessage("Only for spacedim>dim."));
+      DerivativeForm<1,spacedim,dim> DF_t = this->transpose();
+      Tensor<2,dim,Number> G; //First fundamental form
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=0; j<dim; ++j)
+          G[i][j] = DF_t[i] * DF_t[j];
+
+      return ( sqrt(dealii::determinant(G)) );
+
+    }
+
+}
+
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+DerivativeForm<1,dim,spacedim,Number>
+DerivativeForm<order,dim,spacedim,Number>::covariant_form() const
+{
+
+  if (dim == spacedim)
+    {
+
+      Tensor<2,dim,Number> DF_t (dealii::transpose(invert(  (Tensor<2,dim,Number>)(*this)   )));
+      DerivativeForm<1,dim, spacedim> result = DF_t;
+      return (result);
+    }
+  else
+    {
+
+      DerivativeForm<1,spacedim,dim> DF_t = this->transpose();
+      Tensor<2,dim,Number> G; //First fundamental form
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=0; j<dim; ++j)
+          G[i][j] = DF_t[i] * DF_t[j];
+
+      return (this->times_T_t(invert(G)));
+
+    }
+
+}
+
+
+template <int order, int dim, int spacedim, typename Number>
+inline
+std::size_t
+DerivativeForm<order, dim, spacedim, Number>::memory_consumption ()
+{
+  return sizeof(DerivativeForm<order, dim, spacedim, Number>);
+}
+
+#endif // DOXYGEN
+
+
+
+
+
+/**
+ * One of the uses of DerivativeForm is to apply it as a transformation. This
+ * is what this function does.  If @p T is DerivativeForm<1,dim,1> it computes
+ * $DF * T$, if @p T is DerivativeForm<1,dim,rank> it computes $T*DF^{t}$.
+ *
+ * @relates DerivativeForm
+ * @author Sebastian Pauletti, 2011
+ */
+template <int spacedim, int dim, typename Number>
+inline
+Tensor<1,spacedim,Number>
+apply_transformation (const DerivativeForm<1,dim,spacedim,Number> &DF,
+                      const Tensor<1,dim,Number>               &T)
+{
+  Tensor<1,spacedim,Number> dest;
+  for (unsigned int i=0; i<spacedim; ++i)
+    dest[i] = DF[i] * T;
+  return dest;
+}
+
+
+
+/**
+ * Similar to previous apply_transformation. It computes $T*DF^{t}$.
+ *
+ * @relates DerivativeForm
+ * @author Sebastian Pauletti, 2011
+ */
+//rank=2
+template <int spacedim, int dim, typename Number>
+inline
+DerivativeForm<1, spacedim, dim>
+apply_transformation (const DerivativeForm<1,dim,spacedim,Number> &DF,
+                      const Tensor<2,dim,Number>               &T)
+{
+
+  DerivativeForm<1, spacedim, dim> dest;
+  for (unsigned int i=0; i<dim; ++i)
+    dest[i] = apply_transformation(DF, T[i]);
+
+  return dest;
+}
+
+/**
+ * Similar to previous apply_transformation. It computes $DF2*DF1^{t}$
+ *
+ * @relates DerivativeForm
+ * @author Sebastian Pauletti, 2011
+ */
+template <int spacedim, int dim, typename Number>
+inline
+Tensor<2,spacedim,Number>
+apply_transformation (const DerivativeForm<1,dim,spacedim,Number> &DF1,
+                      const DerivativeForm<1,dim,spacedim,Number> &DF2)
+{
+  Tensor<2,spacedim,Number> dest;
+
+  for (unsigned int i=0; i<spacedim; ++i)
+    dest[i] = apply_transformation(DF1, DF2[i]);
+
+  return dest;
+}
+
+
+/**
+ * Transpose of a rectangular DerivativeForm DF, mostly for compatibility
+ * reasons.
+ *
+ * @relates DerivativeForm
+ * @author Sebastian Pauletti, 2011
+ */
+template <int dim, int spacedim, typename Number>
+inline
+DerivativeForm<1,spacedim,dim,Number>
+transpose (const DerivativeForm<1,dim,spacedim,Number> &DF)
+{
+  DerivativeForm<1,spacedim,dim,Number> tt;
+  tt = DF.transpose();
+  return tt;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/event.h b/include/deal.II/base/event.h
new file mode 100644
index 0000000..7fe4f4e
--- /dev/null
+++ b/include/deal.II/base/event.h
@@ -0,0 +1,300 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__event_h
+#define dealii__event_h
+
+#include <deal.II/base/config.h>
+
+#include <vector>
+#include <string>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+  /**
+   * Objects of this kind are used to notify interior applications of changes
+   * provoked by an outer loop. They are handed to the application through
+   * Operator::notify() and it is up to the actual application how to handle
+   * them.
+   *
+   * Event is organized as an extensible binary enumerator. Every class can
+   * add its own events using assign(). A typical code example is
+   *
+   * @code
+   * class A
+   * {
+   *   static Event event;
+   * };
+   *
+   * Event A::event = Event::assign("Event for A");
+   * @endcode
+   */
+  class Event
+  {
+  public:
+    /**
+     * This function registers a new event type and assigns a unique
+     * identifier to it. The result of this function should be stored for
+     * later use.
+     */
+    static Event assign (const char *name);
+
+    /**
+     * If you forgot to store the result of assign, here is how to retrieve it
+     * knowing the name.
+     */
+//      static Event find(const std::string& name);
+
+    /**
+     * Constructor, generating a clear Event.
+     */
+    Event ();
+
+    /**
+     * Clear all flags
+     */
+    void clear();
+
+    /**
+     * Set all flags
+     */
+    void all();
+
+    /**
+     * Add the flags of the other event
+     */
+    Event &operator += (const Event &event);
+
+    /**
+     * Clear the flags of the other event
+     */
+    Event &operator -= (const Event &event);
+
+    /**
+     * Test whether all the flags set in the other Event are also set in this
+     * one.
+     */
+    bool test (const Event &event) const;
+
+    /**
+     * Return <tt>true</tt> if any event is set.
+     */
+    bool any () const;
+
+    /**
+     * List the flags to a stream.
+     */
+    template <class OS>
+    void print (OS &os) const;
+
+    /**
+     * List all assigned events.
+     */
+    template <class OS>
+    static void print_assigned (OS &os);
+
+  private:
+    /**
+     * Sometimes, actions have to be taken by all means. Therefore, if this
+     * value is true, test() always returns true.
+     */
+    bool all_true;
+
+    /**
+     * The actual list of events
+     */
+    std::vector<bool> flags;
+
+    /**
+     * The names of registered events
+     */
+//TODO: This static field must be guarded by a mutex to be thread-safe!
+    static std::vector<std::string> names;
+  };
+
+  /**
+   * Events used by library operators
+   */
+  namespace Events
+  {
+    /**
+     * The program has just started and everything should be new.
+     */
+    extern const Event initial;
+
+    /**
+     * The mesh has changed.
+     */
+    extern const Event remesh;
+
+    /**
+     * The current derivative leads to slow convergence of Newton's method.
+     */
+    extern const Event bad_derivative;
+
+    /**
+     * The time stepping scheme starts a new time step.
+     */
+    extern const Event new_time;
+
+    /**
+     * The time stepping scheme has changed the time step size.
+     */
+    extern const Event new_timestep_size;
+  }
+
+
+//----------------------------------------------------------------------//
+
+
+  inline
+  bool
+  Event::any () const
+  {
+    if (all_true) return true;
+    for (std::vector<bool>::const_iterator i=flags.begin();
+         i != flags.end(); ++i)
+      if (*i) return true;
+    return false;
+  }
+
+
+  inline
+  bool
+  Event::test (const Event &event) const
+  {
+
+    // First, test all_true in this
+    if (all_true) return true;
+
+    const unsigned int n = flags.size();
+    const unsigned int m = event.flags.size();
+    const unsigned int n_min = (n<m)?n:m;
+
+    // Now, if all_true set in the
+    // other, then all must be true
+    // in this
+    if (event.all_true)
+      {
+        // Non existing flags are
+        // always assumed false
+        if (m > n)
+          return false;
+
+        // Test all flags separately
+        // and return false if one is
+        // not set
+        for (std::vector<bool>::const_iterator i=flags.begin();
+             i != flags.end(); ++i)
+          if (!*i) return false;
+        // All flags are set
+        return true;
+      }
+
+    // Finally, compare each flag
+    // separately
+    for (unsigned int i=0; i<n_min; ++i)
+      if (event.flags[i] && !flags[i])
+        return false;
+    for (unsigned int i=n_min; i<m; ++i)
+      if (event.flags[i])
+        return false;
+    return true;
+  }
+
+
+
+  inline
+  Event &Event::operator += (const Event &event)
+  {
+    all_true |= event.all_true;
+    if (all_true) return *this;
+
+    if (flags.size() < event.flags.size())
+      flags.resize(event.flags.size());
+    for (unsigned int i=0; i<event.flags.size(); ++i)
+      flags[i] = flags[i] || event.flags[i];
+
+    return *this;
+  }
+
+
+  inline
+  Event &Event::operator -= (const Event &event)
+  {
+    if (!event.any()) return *this;
+
+    all_true = false;
+    if (event.all_true)
+      {
+        for (std::vector<bool>::iterator i=flags.begin();
+             i != flags.end(); ++i)
+          *i = false;
+        return *this;
+      }
+
+    if (flags.size() < event.flags.size())
+      flags.resize(event.flags.size());
+    for (unsigned int i=0; i<event.flags.size(); ++i)
+      if (event.flags[i]) flags[i] = false;
+
+    return *this;
+  }
+
+
+  template <class OS>
+  inline
+  void
+  Event::print (OS &os) const
+  {
+    if (all_true)
+      os << " ALL";
+
+    for (unsigned int i=0; i<flags.size(); ++i)
+      if (flags[i])
+        os << ' ' << names[i];
+  }
+
+
+  template <class OS>
+  inline
+  void
+  Event::print_assigned (OS &os)
+  {
+    for (unsigned int i=0; i<names.size(); ++i)
+      os << i << '\t' << names[i] << std::endl;
+  }
+
+
+  /**
+   * Output shift operator for events. Calls Event::print().
+   *
+   * @relates Event
+   */
+  template <class OS>
+  OS &operator << (OS &o, const Event &e)
+  {
+    e.print(o);
+    return o;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/exceptions.h b/include/deal.II/base/exceptions.h
new file mode 100644
index 0000000..770bd2a
--- /dev/null
+++ b/include/deal.II/base/exceptions.h
@@ -0,0 +1,1103 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__exceptions_h
+#define dealii__exceptions_h
+
+#include <deal.II/base/config.h>
+
+#include <exception>
+#include <string>
+#include <ostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * This class is the base class for all exception classes. Do not use its
+ * methods and variables directly since the interface and mechanism may be
+ * subject to change. Rather create new exception classes using the
+ * <tt>DeclException</tt> macro family.
+ *
+ * See the
+ * @ref Exceptions
+ * module for more details on this class and what can be done with classes
+ * derived from it.
+ *
+ * @ingroup Exceptions
+ * @author Wolfgang Bangerth, 1997, 1998, Matthias Maier, 2013
+ */
+class ExceptionBase : public std::exception
+{
+public:
+  /**
+   * Default constructor.
+   */
+  ExceptionBase ();
+
+  /**
+   * Copy constructor.
+   */
+  ExceptionBase (const ExceptionBase &exc);
+
+  /**
+   * Destructor.
+   */
+  virtual ~ExceptionBase () throw();
+
+  /**
+   * Set the file name and line of where the exception appeared as well as the
+   * violated condition and the name of the exception as a char pointer. This
+   * function also populates the stacktrace.
+   */
+  void set_fields (const char *file,
+                   const int   line,
+                   const char *function,
+                   const char *cond,
+                   const char *exc_name);
+
+
+  /**
+   * Override the standard function that returns the description of the error.
+   */
+  virtual const char *what() const throw();
+
+  /**
+   * Get exception name.
+   */
+  const char *get_exc_name() const;
+
+  /**
+   * Print out the general part of the error information.
+   */
+  void print_exc_data (std::ostream &out) const;
+
+  /**
+   * Print more specific information about the exception which occurred.
+   * Overload this function in your own exception classes.
+   */
+  virtual void print_info (std::ostream &out) const;
+
+  /**
+   * Print a stacktrace, if one has been recorded previously, to the given
+   * stream.
+   */
+  void print_stack_trace (std::ostream &out) const;
+
+protected:
+  /**
+   * Name of the file this exception happens in.
+   */
+  const char  *file;
+
+  /**
+   * Line number in this file.
+   */
+  unsigned int line;
+
+  /**
+   * Name of the function, pretty printed.
+   */
+  const char  *function;
+
+  /**
+   * The violated condition, as a string.
+   */
+  const char  *cond;
+
+  /**
+   * Name of the exception and call sequence.
+   */
+  const char  *exc;
+
+  /**
+   * A backtrace to the position where the problem happened, if the system
+   * supports this.
+   */
+  mutable char **stacktrace;
+
+  /**
+   * The number of stacktrace frames that are stored in the previous variable.
+   * Zero if the system does not support stack traces.
+   */
+  int n_stacktrace_frames;
+
+#ifdef DEAL_II_HAVE_GLIBC_STACKTRACE
+  /**
+   * array of pointers that contains the raw stack trace
+   */
+  void *raw_stacktrace[25];
+#endif
+
+private:
+  /**
+   * Internal function that generates the c_string. Called by what().
+   */
+  void generate_message() const;
+
+  /**
+   * A pointer to the c_string that will be printed by what(). It is populated
+   * by generate_message()
+   */
+  mutable std::string what_str;
+};
+
+
+
+/**
+ * In this namespace, functions in connection with the Assert and AssertThrow
+ * mechanism are declared.
+ *
+ * @ingroup Exceptions
+ */
+namespace deal_II_exceptions
+{
+
+  /**
+   * Set a string that is printed upon output of the message indicating a
+   * triggered <tt>Assert</tt> statement. This string, which is printed in
+   * addition to the usual output may indicate information that is otherwise
+   * not readily available unless we are using a debugger. For example, with
+   * distributed programs on cluster computers, the output of all processes is
+   * redirected to the same console window. In this case, it is convenient to
+   * set as additional name the name of the host on which the program runs, so
+   * that one can see in which instance of the program the exception occurred.
+   *
+   * The string pointed to by the argument is copied, so doesn't need to be
+   * stored after the call to this function.
+   *
+   * Previously set additional output is replaced by the argument given to
+   * this function.
+   */
+  void set_additional_assert_output (const char *const p);
+
+  /**
+   * Calling this function disables printing a stacktrace along with the other
+   * output printed when an exception occurs. Most of the time, you will want
+   * to see such a stacktrace; suppressing it, however, is useful if one wants
+   * to compare the output of a program across different machines and systems,
+   * since the stacktrace shows memory addresses and library names/paths that
+   * depend on the exact setup of a machine.
+   */
+  void suppress_stacktrace_in_exceptions ();
+
+  /**
+   * Calling this function switches off the use of <tt>std::abort()</tt> when
+   * an exception is created using the Assert() macro. Instead, the Exception
+   * will be thrown using 'throw', so it can be caught if desired. Generally,
+   * you want to abort the execution of a program when Assert() is called, but
+   * it needs to be switched off if you want to log all exceptions created, or
+   * if you want to test if an assertion is working correctly. This is done
+   * for example in regression tests. Please note that some fatal errors will
+   * still call abort(), e.g. when an exception is caught during exception
+   * handling.
+   */
+  void disable_abort_on_exception ();
+
+  /**
+   * The functions in this namespace are in connection with the Assert and
+   * AssertThrow mechanism but are solely for internal purposes and are not
+   * for use outside the exception handling and throwing mechanism.
+   *
+   * @ingroup Exceptions
+   */
+  namespace internals
+  {
+
+    /**
+     * Conditionally abort the program.
+     *
+     * Depending on whether disable_abort_on_exception was called, this
+     * function either aborts the program flow by printing the error message
+     * provided by @p exc and calling <tt>std::abort()</tt>, or throws @p exc
+     * instead (if @p nothrow is set to <tt>false</tt>).
+     *
+     * If the boolean @p nothrow is set to true and disable_abort_on_exception
+     * was called, the exception type is just printed to deallog and program
+     * flow continues. This is useful if throwing an exception is prohibited
+     * (e.g. in a destructor with <tt>noexcept(true)</tt> or
+     * <tt>throw()</tt>).
+     */
+    void abort (const ExceptionBase &exc, bool nothrow = false);
+
+    /**
+     * An enum describing how to treat an exception in issue_error
+     */
+    enum ExceptionHandling
+    {
+      abort_on_exception,
+      throw_on_exception,
+      abort_nothrow_on_exception
+    };
+
+    /**
+     * This routine does the main work for the exception generation mechanism
+     * used in the <tt>Assert</tt> macro.
+     *
+     * @ref ExceptionBase
+     */
+    template <class exc>
+    void issue_error (ExceptionHandling handling,
+                      const char *file,
+                      int         line,
+                      const char *function,
+                      const char *cond,
+                      const char *exc_name,
+                      exc         e)
+    {
+      // Fill the fields of the exception object
+      e.set_fields (file, line, function, cond, exc_name);
+
+      switch (handling)
+        {
+        case abort_on_exception:
+          dealii::deal_II_exceptions::internals::abort(e);
+          break;
+        case abort_nothrow_on_exception:
+          dealii::deal_II_exceptions::internals::abort(e, /*nothrow =*/ true);
+          break;
+        case throw_on_exception:
+          throw e;
+        }
+    }
+
+  } /*namespace internals*/
+
+} /*namespace deal_II_exceptions*/
+
+
+
+/**
+ * This is the main routine in the exception mechanism for debug mode error
+ * checking. It asserts that a certain condition is fulfilled, otherwise
+ * issues an error and aborts the program.
+ *
+ * See the <tt>ExceptionBase</tt> class for more information.
+ *
+ * @note Active in DEBUG mode only
+ * @ingroup Exceptions
+ * @author Wolfgang Bangerth, 1997, 1998, Matthias Maier, 2013
+ */
+#ifdef DEBUG
+#define Assert(cond, exc)                                                   \
+  {                                                                           \
+    if (!(cond))                                                              \
+      ::dealii::deal_II_exceptions::internals::                               \
+      issue_error(::dealii::deal_II_exceptions::internals::abort_on_exception,\
+                  __FILE__, __LINE__, __PRETTY_FUNCTION__, #cond, #exc, exc); \
+  }
+#else
+#define Assert(cond, exc)                                                   \
+  {}
+#endif
+
+
+
+/**
+ * A variant of the <tt>Assert</tt> macro above that exhibits the same runtime
+ * behaviour as long as disable_abort_on_exception was not called.
+ *
+ * However, if disable_abort_on_exception was called, this macro merely prints
+ * the exception that would be thrown to deallog and continues normally
+ * without throwing an exception.
+ *
+ * See the <tt>ExceptionBase</tt> class for more information.
+ *
+ * @note Active in DEBUG mode only
+ * @ingroup Exceptions
+ * @author Wolfgang Bangerth, 1997, 1998, Matthias Maier, 2013
+ */
+#ifdef DEBUG
+#define AssertNothrow(cond, exc)                                            \
+  {                                                                           \
+    if (!(cond))                                                              \
+      ::dealii::deal_II_exceptions::internals::                               \
+      issue_error(                                                            \
+          ::dealii::deal_II_exceptions::internals::abort_nothrow_on_exception,  \
+          __FILE__, __LINE__, __PRETTY_FUNCTION__, #cond, #exc, exc);           \
+  }
+#else
+#define AssertNothrow(cond, exc)                                            \
+  {}
+#endif
+
+
+
+/**
+ * This is the main routine in the exception mechanism for run-time mode error
+ * checking. It assert that a certain condition is fulfilled, otherwise issues
+ * an error and aborts the program.
+ *
+ * See the <tt>ExceptionBase</tt> class for more information.
+ *
+ * @note Active in both DEBUG and RELEASE modes
+ * @ingroup Exceptions
+ * @author Wolfgang Bangerth, 1997, 1998, Matthias Maier, 2013
+ */
+#ifdef DEAL_II_HAVE_BUILTIN_EXPECT
+#define AssertThrow(cond, exc)                                              \
+  {                                                                           \
+    if (__builtin_expect(!(cond), false))                                     \
+      ::dealii::deal_II_exceptions::internals::                               \
+      issue_error(::dealii::deal_II_exceptions::internals::throw_on_exception,\
+                  __FILE__, __LINE__, __PRETTY_FUNCTION__, #cond, #exc, exc); \
+  }
+#else /*ifdef DEAL_II_HAVE_BUILTIN_EXPECT*/
+#define AssertThrow(cond, exc)                                              \
+  {                                                                           \
+    if (!(cond))                                                              \
+      ::dealii::deal_II_exceptions::internals::                               \
+      issue_error(::dealii::deal_II_exceptions::internals::throw_on_exception,\
+                  __FILE__, __LINE__, __PRETTY_FUNCTION__, #cond, #exc, exc); \
+  }
+#endif /*ifdef DEAL_II_HAVE_BUILTIN_EXPECT*/
+
+
+
+#ifndef DOXYGEN
+
+/**
+ * Declare an exception class derived from ExceptionBase without parameters.
+ *
+ * @author Wolfgang Bangerth, November 1997
+ * @ingroup Exceptions
+ */
+#define DeclException0(Exception0)                                        \
+  class Exception0 :  public dealii::ExceptionBase {}
+
+
+/**
+ * Declare an exception class derived from ExceptionBase that can take one
+ * runtime argument, but if none is given in the place where you want to throw
+ * the exception, it simply reverts to the default text provided when
+ * declaring the exception class through this macro.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclExceptionMsg(Exception, defaulttext)                          \
+  class Exception :  public dealii::ExceptionBase                         \
+  {                                                                       \
+  public:                                                                 \
+    Exception (const std::string &msg = defaulttext) : arg (msg) {}       \
+    virtual ~Exception () throw () {}                                     \
+    virtual void print_info (std::ostream &out) const {                   \
+      out << arg << std::endl;                                            \
+    }                                                                     \
+  private:                                                                \
+    const std::string arg;                                                \
+  }
+
+/**
+ * Declare an exception class derived from ExceptionBase with one additional
+ * parameter.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException1(Exception1, type1, outsequence)                    \
+  class Exception1 : public dealii::ExceptionBase {                       \
+  public:                                                                 \
+    Exception1 (const type1 a1) : arg1 (a1) {}                            \
+    virtual ~Exception1 () throw () {}                                    \
+    virtual void print_info (std::ostream &out) const {                   \
+      out outsequence << std::endl;                                       \
+    }                                                                     \
+  private:                                                                \
+    const type1 arg1;                                                     \
+  }
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with two additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException2(Exception2, type1, type2, outsequence)             \
+  class Exception2 : public dealii::ExceptionBase {                       \
+  public:                                                                 \
+    Exception2 (const type1 a1, const type2 a2) :                         \
+      arg1 (a1), arg2(a2) {}                                              \
+    virtual ~Exception2 () throw () {}                                    \
+    virtual void print_info (std::ostream &out) const {                   \
+      out outsequence << std::endl;                                       \
+    }                                                                     \
+  private:                                                                \
+    const type1 arg1;                                                     \
+    const type2 arg2;                                                     \
+  }
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with three additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException3(Exception3, type1, type2, type3, outsequence)      \
+  class Exception3 : public dealii::ExceptionBase {                       \
+  public:                                                                 \
+    Exception3 (const type1 a1, const type2 a2, const type3 a3) :         \
+      arg1 (a1), arg2(a2), arg3(a3) {}                                    \
+    virtual ~Exception3 () throw () {}                                    \
+    virtual void print_info (std::ostream &out) const {                   \
+      out outsequence << std::endl;                                       \
+    }                                                                     \
+  private:                                                                \
+    const type1 arg1;                                                     \
+    const type2 arg2;                                                     \
+    const type3 arg3;                                                     \
+  }
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with four additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException4(Exception4, type1, type2, type3, type4, outsequence) \
+  class Exception4 : public dealii::ExceptionBase {                       \
+  public:                                                                 \
+    Exception4 (const type1 a1, const type2 a2,                           \
+                const type3 a3, const type4 a4) :                         \
+      arg1 (a1), arg2(a2), arg3(a3), arg4(a4) {}                          \
+    virtual ~Exception4 () throw () {}                                    \
+    virtual void print_info (std::ostream &out) const {                   \
+      out outsequence << std::endl;                                       \
+    }                                                                     \
+  private:                                                                \
+    const type1 arg1;                                                     \
+    const type2 arg2;                                                     \
+    const type3 arg3;                                                     \
+    const type4 arg4;                                                     \
+  }
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with five additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException5(Exception5, type1, type2, type3, type4, type5, outsequence) \
+  class Exception5 : public dealii::ExceptionBase {                       \
+  public:                                                                 \
+    Exception5 (const type1 a1, const type2 a2, const type3 a3,           \
+                const type4 a4, const type5 a5) :                         \
+      arg1 (a1), arg2(a2), arg3(a3), arg4(a4), arg5(a5) {}                \
+    virtual ~Exception5 () throw () {}                                    \
+    virtual void print_info (std::ostream &out) const {                   \
+      out outsequence << std::endl;                                       \
+    }                                                                     \
+  private:                                                                \
+    const type1 arg1;                                                     \
+    const type2 arg2;                                                     \
+    const type3 arg3;                                                     \
+    const type4 arg4;                                                     \
+    const type5 arg5;                                                     \
+  }
+
+#else /*ifndef DOXYGEN*/
+
+// Dummy definitions for doxygen:
+
+/**
+ * Declare an exception class derived from ExceptionBase without parameters.
+ *
+ * @author Wolfgang Bangerth, November 1997
+ * @ingroup Exceptions
+ */
+#define DeclException0(Exception0)                                        \
+  static dealii::ExceptionBase& Exception0 ()
+
+/**
+ * Declare an exception class derived from ExceptionBase that can take one
+ * runtime argument, but if none is given in the place where you want to throw
+ * the exception, it simply reverts to the default text provided when
+ * declaring the exception class through this macro.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclExceptionMsg(Exception, defaulttext)                          \
+  static dealii::ExceptionBase& Exception ()
+
+/**
+ * Declare an exception class derived from ExceptionBase with one additional
+ * parameter.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException1(Exception1, type1, outsequence)                    \
+  static dealii::ExceptionBase& Exception1 (type1 arg1) throw (errortext outsequence)
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with two additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException2(Exception2, type1, type2, outsequence)             \
+  static dealii::ExceptionBase& Exception2 (type1 arg1, type2 arg2) throw (errortext outsequence)
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with three additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException3(Exception3, type1, type2, type3, outsequence)      \
+  static dealii::ExceptionBase& Exception3 (type1 arg1, type2 arg2, type3 arg3) throw (errortext outsequence)
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with four additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException4(Exception4, type1, type2, type3, type4, outsequence) \
+  static dealii::ExceptionBase& Exception4 (type1 arg1, type2 arg2, type3 arg3, type4 arg4) throw (errortext outsequence)
+
+
+/**
+ * Declare an exception class derived from ExceptionBase with five additional
+ * parameters.
+ *
+ * @ingroup Exceptions
+ */
+#define DeclException5(Exception5, type1, type2, type3, type4, type5, outsequence) \
+  static dealii::ExceptionBase& Exception5 (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) throw (errortext outsequence)
+
+#endif /*ifndef DOXYGEN*/
+
+
+/**
+ * Declare some exceptions that occur over and over. This way, you can simply
+ * use these exceptions, instead of having to declare them locally in your
+ * class. The namespace in which these exceptions are declared is later
+ * included into the global namespace by
+ * @code
+ * using namespace StandardExceptions;
+ * @endcode
+ *
+ * @ingroup Exceptions
+ */
+namespace StandardExceptions
+{
+  /**
+   * @addtogroup Exceptions
+   */
+  //@{
+
+  /**
+   * Exception denoting a division by zero.
+   */
+  DeclExceptionMsg (ExcDivideByZero,
+                    "A piece of code is attempting a division by zero. This is "
+                    "likely going to lead to results that make no sense.");
+
+  /**
+   * Exception raised if a number is not finite.
+   *
+   * This exception should be used to catch infinite or not a number results
+   * of arithmetic operations that do not result from a division by zero (use
+   * ExcDivideByZero for those).
+   *
+   * The exception uses std::complex as its argument to ensure that we can use
+   * it for all scalar arguments (real or complex-valued).
+   */
+  DeclException1 (ExcNumberNotFinite,
+                  std::complex<double>,
+                  << "In a significant number of places, deal.II checks that some intermediate "
+                  << "value is a finite number (as opposed to plus or minus infinity, or "
+                  << "NaN/Not a Number). In the current function, we encountered a number "
+                  << "that is not finite (its value is " << arg1 << " and therefore "
+                  << "violates the current assertion.\n\n"
+                  << "This may be due to the fact that some operation in this function "
+                  << "created such a value, or because one of the arguments you passed "
+                  << "to the function already had this value from some previous "
+                  << "operation. In the latter case, this function only triggered the "
+                  << "error but may not actually be responsible for the computation of "
+                  << "the number that is not finite.\n\n"
+                  << "There are two common cases where this situation happens. First, your "
+                  << "code (or something in deal.II) divides by zero in a place where this "
+                  << "should not happen. Or, you are trying to solve a linear system "
+                  << "with an unsuitable solver (such as an indefinite or non-symmetric "
+                  << "linear system using a Conjugate Gradient solver); such attempts "
+                  << "oftentimes yield an operation somewhere that tries to divide "
+                  << "by zero or take the square root of a negative value.\n\n"
+                  << "In any case, when trying to find the source of the error, "
+                  << "recall that the location where you are getting this error is "
+                  << "simply the first place in the program where there is a check "
+                  << "that a number (e.g., an element of a solution vector) is in fact "
+                  << "finite, but that the actual error that computed the number "
+                  << "may have happened far earlier. To find this location, you "
+                  << "may want to add checks for finiteness in places of your "
+                  << "program visited before the place where this error is produced."
+                  << "One way to check for finiteness is to use the 'AssertIsFinite' "
+                  << "macro.");
+
+  /**
+   * Trying to allocate a new object failed due to lack of free memory.
+   */
+  DeclExceptionMsg (ExcOutOfMemory,
+                    "Your program tried to allocate some memory but this "
+                    "allocation failed. Typically, this either means that "
+                    "you simply do not have enough memory in your system, "
+                    "or that you are (erroneously) trying to allocate "
+                    "a chunk of memory that is simply beyond all reasonable "
+                    "size, for example because the size of the object has "
+                    "been computed incorrectly.");
+
+  /**
+   * A memory handler reached a point where all allocated objects should have
+   * been released. Since this exception is thrown, some were still allocated.
+   */
+  DeclException1 (ExcMemoryLeak, int,
+                  << "Destroying memory handler while " << arg1
+                  << " objects are still allocated");
+
+  /**
+   * An error occurred reading or writing a file.
+   */
+  DeclExceptionMsg (ExcIO,
+                    "An input/output error has occurred. There are a number of "
+                    "reasons why this may be happening, both for reading and "
+                    "writing operations."
+                    "\n\n"
+                    "If this happens during an operation that tries to read "
+                    "data: First, you may be "
+                    "trying to read from a file that doesn't exist or that is "
+                    "not readable given its file permissions. Second, deal.II "
+                    "uses this error at times if it tries to "
+                    "read information from a file but where the information "
+                    "in the file does not correspond to the expected format. "
+                    "An example would be a truncated file, or a mesh file "
+                    "that contains not only sections that describe the "
+                    "vertices and cells, but also sections for additional "
+                    "data that deal.II does not understand."
+                    "\n\n"
+                    "If this happens during an operation that tries to write "
+                    "data: you may be trying to write to a file to which file "
+                    "or directory permissions do not allow you to write. A "
+                    "typical example is where you specify an output file in "
+                    "a directory that does not exist.");
+
+  /**
+   * An error occurred opening the named file.
+   *
+   * The constructor takes a single argument of type <tt>char*</tt> naming the
+   * file.
+   */
+  DeclException1 (ExcFileNotOpen,
+                  char *,
+                  << "Could not open file " << arg1);
+
+  /**
+   * Exception denoting a part of the library or application program that has
+   * not yet been implemented. In many cases, this only indicates that there
+   * wasn't much need for something yet, not that this is difficult to
+   * implement. It is therefore quite worth the effort to take a look at the
+   * corresponding place and see whether it can be implemented without too
+   * much effort.
+   */
+  DeclExceptionMsg (ExcNotImplemented,
+                    "You are trying to use functionality in deal.II that is "
+                    "currently not implemented. In many cases, this indicates "
+                    "that there simply didn't appear much of a need for it, or "
+                    "that the author of the original code did not have the "
+                    "time to implement a particular case. If you hit this "
+                    "exception, it is therefore worth the time to look into "
+                    "the code to find out whether you may be able to "
+                    "implement the missing functionality. If you do, please "
+                    "consider providing a patch to the deal.II development "
+                    "sources (see the deal.II website on how to contribute).");
+
+  /**
+   * This exception usually indicates that some condition which the programmer
+   * thinks must be satisfied at a certain point in an algorithm, is not
+   * fulfilled. This might be due to some programming error above, due to
+   * changes to the algorithm that did not preserve this assertion, or due to
+   * assumptions the programmer made that are not valid at all (i.e. the
+   * exception is thrown although there is no error here). Within the library,
+   * this exception is most often used when we write some kind of complicated
+   * algorithm and are not yet sure whether we got it right; we then put in
+   * assertions after each part of the algorithm that check for some
+   * conditions that should hold there, and throw an exception if they do not.
+   *
+   * We usually leave in these assertions even after we are confident that the
+   * implementation is correct, since if someone later changes or extends the
+   * algorithm, these exceptions will indicate to him if he violates
+   * assumptions that are used later in the algorithm. Furthermore, it
+   * sometimes happens that an algorithm does not work in very rare corner
+   * cases. These cases will then be trapped sooner or later by the exception,
+   * so that the algorithm can then be fixed for these cases as well.
+   */
+  DeclExceptionMsg (ExcInternalError,
+                    "This exception -- which is used in many places in the "
+                    "library -- usually indicates that some condition which "
+                    "the author of the code thought must be satisfied at a "
+                    "certain point in an algorithm, is not fulfilled. An "
+                    "example would be that the first part of an algorithm "
+                    "sorts elements of an array in ascending order, and "
+                    "a second part of the algorithm later encounters an "
+                    "an element that is not larger than the previous one."
+                    "\n\n"
+                    "There is usually not very much you can do if you "
+                    "encounter such an exception since it indicates an error "
+                    "in deal.II, not in your own program. Try to come up with "
+                    "the smallest possible program that still demonstrates "
+                    "the error and contact the deal.II mailing lists with it "
+                    "to obtain help.");
+
+  /**
+   * This exception is used in functions that may not be called (i.e. in pure
+   * functions) but could not be declared pure since the class is intended to
+   * be used anyway, even though the respective function may only be called if
+   * a derived class is used.
+   */
+  DeclExceptionMsg (ExcPureFunctionCalled,
+                    "You (or a place in the library) are trying to call a "
+                    "function that is declared as a virtual function in a "
+                    "base class but that has not been overridden in your "
+                    "derived class."
+                    "\n\n"
+                    "This exception happens in cases where the base class "
+                    "cannot provide a useful default implementation for "
+                    "the virtual function, but where we also do not want "
+                    "to mark the function as abstract (i.e., with '=0' at the end) "
+                    "because the function is not essential to the class in many "
+                    "contexts. In cases like this, the base class provides "
+                    "a dummy implementation that makes the compiler happy, but "
+                    "that then throws the current exception."
+                    "\n\n"
+                    "A concrete example would be the 'Function' class. It declares "
+                    "the existence of 'value()' and 'gradient()' member functions, "
+                    "and both are marked as 'virtual'. Derived classes have to "
+                    "override these functions for the values and gradients of a "
+                    "particular function. On the other hand, not every function "
+                    "has a gradient, and even for those that do, not every program "
+                    "actually needs to evaluate it. Consequently, there is no "
+                    "*requirement* that a derived class actually override the "
+                    "'gradient()' function (as there would be had it been marked "
+                    "as abstract). But, since the base class cannot know how to "
+                    "compute the gradient, if a derived class does not override "
+                    "the 'gradient()' function and it is called anyway, then the "
+                    "default implementation in the base class will simply throw "
+                    "an exception."
+                    "\n\n"
+                    "The exception you see is what happens in cases such as the "
+                    "one just illustrated. To fix the problem, you need to "
+                    "investigate whether the function being called should indeed have "
+                    "been called; if the answer is 'yes', then you need to "
+                    "implement the missing override in your class.");
+
+  /**
+   * Used for constructors that are disabled. Examples are copy constructors
+   * and assignment operators of large objects, which are only allowed for
+   * empty objects.
+   */
+  DeclException0 (ExcInvalidConstructorCall);
+
+  /**
+   * This exception is used if some object is found uninitialized.
+   */
+  DeclException0 (ExcNotInitialized);
+
+  /**
+   * The object is in a state not suitable for this operation.
+   */
+  DeclException0 (ExcInvalidState);
+
+  /**
+   * This exception is raised if a functionality is not possible in the given
+   * dimension. Mostly used to throw function calls in 1d.
+   *
+   * The constructor takes a single <tt>int</tt>, denoting the dimension.
+   */
+  DeclException1 (ExcImpossibleInDim,
+                  int,
+                  << "You are trying to execute functionality that is "
+                  << "impossible in " << arg1
+                  << "d or simply does not make any sense.");
+
+  /**
+   * A number is zero, but it should not be here.
+   */
+  DeclExceptionMsg(ExcZero,
+                   "In a check in the code, deal.II encountered a zero in "
+                   "a place where this does not make sense. See the condition "
+                   "that was being checked and that is printed further up "
+                   "in the error message to get more information on what "
+                   "the erroneous zero corresponds to.");
+
+  /**
+   * The object should have been filled with something before this member
+   * function is called.
+   */
+  DeclExceptionMsg(ExcEmptyObject,
+                   "The object you are trying to access is empty but it makes "
+                   "no sense to attempt the operation you are trying on an "
+                   "empty object.");
+
+  /**
+   * This exception is raised whenever the sizes of two objects were assumed
+   * to be equal, but were not.
+   *
+   * Parameters to the constructor are the first and second size, both of type
+   * <tt>int</tt>.
+   */
+  DeclException2 (ExcDimensionMismatch,
+                  std::size_t, std::size_t,
+                  << "Dimension " << arg1 << " not equal to " << arg2);
+
+  /**
+   * The first dimension should be either equal to the second or the third,
+   * but it is neither.
+   */
+  DeclException3 (ExcDimensionMismatch2,
+                  int, int, int,
+                  << "Dimension " << arg1 << " neither equal to " << arg2
+                  << " nor to " << arg3);
+
+  /**
+   * This exception indicates that an index is not within the expected range.
+   * For example, it may be that you are trying to access an element of a
+   * vector which does not exist.
+   *
+   * The constructor takes three <tt>int</tt> arguments, namely
+   * <ol>
+   * <li> the violating index
+   * <li> the lower bound
+   * <li> the upper bound plus one
+   * </ol>
+   */
+  DeclException3 (ExcIndexRange,
+                  int, int, int,
+                  << "Index " << arg1 << " is not in the half-open range [" << arg2 << ","
+                  << arg3 << ")."
+                  << (arg2==arg3 ?
+                      " In the current case, this half-open range is in fact empty, "
+                      "suggesting that you are accessing an element of an empty "
+                      "collection such as a vector that has not been set to the "
+                      "correct size."
+                      :
+                      ""));
+
+  /**
+   * This exception indicates that an index is not within the expected range.
+   * For example, it may be that you are trying to access an element of a
+   * vector which does not exist.
+   *
+   * The constructor takes three <tt>int</tt> arguments, namely
+   * <ol>
+   * <li> the violating index
+   * <li> the lower bound
+   * <li> the upper bound plus one
+   * </ol>
+   *
+   * This generic exception differs from ExcIndexRange by allowing to specify
+   * the type of indices.
+   */
+  template <typename T>
+  DeclException3 (ExcIndexRangeType,
+                  T,T,T,
+                  << "Index " << arg1 << " is not in the half-open range [" << arg2 << ","
+                  << arg3 << ")."
+                  << (arg2==arg3 ?
+                      " In the current case, this half-open range is in fact empty, "
+                      "suggesting that you are accessing an element of an empty "
+                      "collection such as a vector that has not been set to the "
+                      "correct size."
+                      :
+                      ""));
+
+  /**
+   * A number is too small.
+   */
+  DeclException2 (ExcLowerRange,
+                  int, int,
+                  << "Number " << arg1 << " must be larger than or equal "
+                  << arg2 << ").");
+
+  /**
+   * A generic exception definition for the ExcLowerRange above.
+   */
+  template <typename T>
+  DeclException2 (ExcLowerRangeType,
+                  T, T,
+                  << "Number " << arg1 << " must be larger than or equal "
+                  << arg2 << ".");
+
+  /**
+   * This exception indicates that the first argument should be an integer
+   * multiple of the second, but is not.
+   */
+  DeclException2 (ExcNotMultiple,
+                  int, int,
+                  << "Division " << arg1 << " by " << arg2
+                  << " has remainder different from zero");
+
+  /**
+   * This exception is thrown if the iterator you access has corrupted data.
+   * It might for instance be, that the container it refers does not have an
+   * entry at the point the iterator refers.
+   *
+   * Typically, this will be an internal error of deal.II, because the
+   * increment and decrement operators should never yield an invalid iterator.
+   */
+  DeclExceptionMsg (ExcInvalidIterator,
+                    "You are trying to use an iterator, but the iterator is "
+                    "in an invalid state. This may indicate that the iterator "
+                    "object has not been initialized, or that it has been "
+                    "moved beyond the end of the range of valid elements.");
+
+  /**
+   * This exception is thrown if the iterator you incremented or decremented
+   * was already at its final state.
+   */
+  DeclExceptionMsg (ExcIteratorPastEnd,
+                    "You are trying to use an iterator, but the iterator is "
+                    "pointing past the end of the range of valid elements. "
+                    "It is not valid to dereference the iterator in this "
+                    "case.");
+
+  /**
+   * This exception works around a design flaw in the <tt>DeclException0</tt>
+   * macro: exceptions declared through DeclException0 do not allow one to
+   * specify a message that is displayed when the exception is raised, as
+   * opposed to the other exceptions which allow to show a text along with the
+   * given parameters.
+   *
+   * When throwing this exception, you can give a message as a
+   * <tt>std::string</tt> as argument to the exception that is then displayed.
+   * The argument can, of course, be constructed at run-time, for example
+   * including the name of a file that can't be opened, or any other text you
+   * may want to assemble from different pieces.
+   */
+  DeclException1 (ExcMessage,
+                  std::string,
+                  << arg1);
+
+  /**
+   * Parallel vectors with ghost elements are read-only vectors.
+   */
+  DeclExceptionMsg (ExcGhostsPresent,
+                    "You are trying an operation on a vector that is only "
+                    "allowed if the vector has no ghost elements, but the "
+                    "vector you are operating on does have ghost elements. "
+                    "Specifically, vectors with ghost elements are read-only "
+                    "and cannot appear in operations that write into these "
+                    "vectors."
+                    "\n\n"
+                    "See the glossary entry on 'Ghosted vectors' for more "
+                    "information.");
+
+  /**
+   * Some of our numerical classes allow for setting all entries to zero using
+   * the assignment operator <tt>=</tt>.
+   *
+   * In many cases, this assignment operator makes sense <b>only</b> for the
+   * argument zero. In other cases, this exception is thrown.
+   */
+  DeclExceptionMsg (ExcScalarAssignmentOnlyForZeroValue,
+                    "You are trying an operation of the form 'vector=s' with "
+                    "a nonzero scalar value 's'. However, such assignments "
+                    "are only allowed if the right hand side is zero.");
+
+  /**
+   * This function requires support for the LAPACK library.
+   */
+  DeclException0 (ExcNeedsLAPACK);
+
+  /**
+   * This function requires support for the NetCDF library.
+   */
+  DeclException0 (ExcNeedsNetCDF);
+
+  /**
+   * This function requires support for the FunctionParser library.
+   */
+  DeclException0 (ExcNeedsFunctionparser);
+
+
+//@}
+} /*namespace StandardExceptions*/
+
+
+/**
+ * Special assertion for dimension mismatch.
+ *
+ * Since this is used very often and always repeats the arguments, we
+ * introduce this special assertion for ExcDimensionMismatch in order to keep
+ * the user codes shorter.
+ *
+ * @ingroup Exceptions
+ * @author Guido Kanschat 2007
+ */
+#define AssertDimension(dim1,dim2) Assert((dim1) == (dim2),       \
+                                          dealii::ExcDimensionMismatch((dim1),(dim2)))
+
+
+/**
+ * Special assertion, testing whether <tt>vec</tt> has size <tt>dim1</tt>, and
+ * each entry of the vector has the size <tt>dim2</tt>
+ *
+ * @ingroup Exceptions
+ * @author Guido Kanschat 2010
+ */
+#define AssertVectorVectorDimension(vec,dim1,dim2) AssertDimension((vec).size(), (dim1)) \
+  for (unsigned int i=0;i<dim1;++i) { AssertDimension((vec)[i].size(), (dim2)); }
+
+
+/**
+ * Special assertion for index range of nonnegative indices.
+ *
+ * Since this is used very often and always repeats the arguments, we
+ * introduce this special assertion for ExcIndexRange in order to keep the
+ * user codes shorter.
+ *
+ * Called wit arguments <tt>index</tt> and <tt>range</tt> it asserts that
+ * <tt>index<range</tt> and throws ExcIndexRange(index,0,range) if it
+ * fails.
+ *
+ * @ingroup Exceptions
+ * @author Guido Kanschat 2007
+ */
+#define AssertIndexRange(index,range) Assert((index) < (range), \
+                                             dealii::ExcIndexRange((index),0,(range)))
+
+#define AssertGlobalIndexRange(index,range) Assert((index) < (range), \
+                                                   ExcIndexRange<types::global_dof_index>((index),0,(range)))
+
+/**
+ * An assertion that checks whether a number is finite or not. We explicitly
+ * cast the number to std::complex to match the signature of the exception
+ * (see there for an explanation of why we use std::complex at all) and to
+ * satisfy the fact that std::complex has no implicit conversions.
+ *
+ * @ingroup Exceptions
+ * @author Wolfgang Bangerth, 2015
+ */
+#define AssertIsFinite(number) Assert(dealii::numbers::is_finite(number), \
+                                      ExcNumberNotFinite(std::complex<double>(number)))
+
+using namespace StandardExceptions;
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/flow_function.h b/include/deal.II/base/flow_function.h
new file mode 100644
index 0000000..fe47d66
--- /dev/null
+++ b/include/deal.II/base/flow_function.h
@@ -0,0 +1,291 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__flow_function_h
+#define dealii__flow_function_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/thread_management.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Functions
+{
+  /**
+   * Base class for analytic solutions to incompressible flow problems.
+   *
+   * Additional to the Function interface, this function provides for an
+   * offset of the pressure: if the pressure of the computed solution has an
+   * integral mean value different from zero, this value can be given to
+   * pressure_adjustment() in order to compute correct pressure errors.
+   *
+   * @note Derived classes should implement pressures with integral mean value
+   * zero always.
+   *
+   * @note Thread safety: Some of the functions make use of internal data to
+   * compute values. Therefore, every thread should obtain its own object of
+   * derived classes.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2007
+   */
+  template <int dim>
+  class FlowFunction : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor, setting up some internal data structures.
+     */
+    FlowFunction();
+
+    /**
+     * Virtual destructor.
+     */
+    virtual ~FlowFunction();
+
+    /**
+     * Store an adjustment for the pressure function, such that its mean value
+     * is <tt>p</tt>.
+     */
+    void pressure_adjustment(double p);
+
+    /**
+     * Values in a structure more suitable for vector valued functions. The
+     * outer vector is indexed by solution component, the inner by quadrature
+     * point.
+     */
+    virtual void vector_values (const std::vector<Point<dim> > &points,
+                                std::vector<std::vector<double> > &values) const = 0;
+    /**
+     * Gradients in a structure more suitable for vector valued functions. The
+     * outer vector is indexed by solution component, the inner by quadrature
+     * point.
+     */
+    virtual void vector_gradients (const std::vector<Point<dim> >            &points,
+                                   std::vector<std::vector<Tensor<1,dim> > > &gradients) const = 0;
+    /**
+     * Force terms in a structure more suitable for vector valued functions.
+     * The outer vector is indexed by solution component, the inner by
+     * quadrature point.
+     *
+     * @warning This is not the true Laplacian, but the force term to be used
+     * as right hand side in Stokes' equations
+     */
+    virtual void vector_laplacians (const std::vector<Point<dim> > &points,
+                                    std::vector<std::vector<double> >   &values) const = 0;
+
+    virtual void vector_value (const Point<dim> &points, Vector<double> &value) const;
+    virtual double value (const Point<dim> &points, const unsigned int component) const;
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> >   &values) const;
+    virtual void vector_gradient_list (const std::vector<Point<dim> >            &points,
+                                       std::vector<std::vector<Tensor<1,dim> > > &gradients) const;
+    /**
+     * The force term in the momentum equation.
+     */
+    virtual void vector_laplacian_list (const std::vector<Point<dim> > &points,
+                                        std::vector<Vector<double> >   &values) const;
+
+    std::size_t memory_consumption () const;
+
+  protected:
+    /**
+     * Mean value of the pressure to be added by derived classes.
+     */
+    double mean_pressure;
+
+  private:
+
+    /**
+     * A mutex that guards the following scratch arrays.
+     */
+    mutable Threads::Mutex mutex;
+
+    /**
+     * Auxiliary values for the usual Function interface.
+     */
+    mutable std::vector<std::vector<double> > aux_values;
+
+    /**
+     * Auxiliary values for the usual Function interface.
+     */
+    mutable std::vector<std::vector<Tensor<1,dim> > > aux_gradients;
+  };
+
+  /**
+   * Laminar pipe flow in two and three dimensions. The channel stretches
+   * along the <i>x</i>-axis and has radius @p radius. The @p Reynolds number
+   * is used to scale the pressure properly for a Navier-Stokes problem.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2007
+   */
+  template <int dim>
+  class PoisseuilleFlow : public FlowFunction<dim>
+  {
+  public:
+    /**
+     * Construct an object for the given channel radius <tt>r</tt> and the
+     * Reynolds number <tt>Re</tt>.
+     */
+    PoisseuilleFlow<dim> (const double r,
+                          const double Re);
+    virtual ~PoisseuilleFlow();
+
+    virtual void vector_values (const std::vector<Point<dim> > &points,
+                                std::vector<std::vector<double> > &values) const;
+    virtual void vector_gradients (const std::vector<Point<dim> > &points,
+                                   std::vector<std::vector<Tensor<1,dim> > > &gradients) const;
+    virtual void vector_laplacians (const std::vector<Point<dim> > &points,
+                                    std::vector<std::vector<double> >   &values) const;
+
+  private:
+    const double radius;
+    const double Reynolds;
+  };
+
+
+  /**
+   * Artificial divergence free function with homogeneous boundary conditions
+   * on the cube [-1,1]<sup>dim</sup>.
+   *
+   * The function in 2D is
+   * @f[
+   * \left(\begin{array}{c}u\\v\\p\end{array}\right)
+   * \left(\begin{array}{c}\cos^2x \sin y\cos y\\-\sin x\cos x\cos^2y\\
+   * \sin x\cos x\sin y\cos y\end{array}\right)
+   * @f]
+   * @ingroup functions
+   * @author Guido Kanschat, 2007
+   */
+  template <int dim>
+  class StokesCosine :
+    public FlowFunction<dim>
+  {
+  public:
+    /**
+     * Constructor setting the Reynolds number required for pressure
+     * computation and scaling of the right hand side.
+     */
+    StokesCosine (const double viscosity = 1., const double reaction = 0.);
+    /**
+     * Change the viscosity and the reaction parameter.
+     */
+    void set_parameters (const double viscosity, const double reaction);
+    virtual ~StokesCosine();
+
+    virtual void vector_values (const std::vector<Point<dim> > &points,
+                                std::vector<std::vector<double> > &values) const;
+    virtual void vector_gradients (const std::vector<Point<dim> > &points,
+                                   std::vector<std::vector<Tensor<1,dim> > > &gradients) const;
+    virtual void vector_laplacians (const std::vector<Point<dim> > &points,
+                                    std::vector<std::vector<double> >   &values) const;
+
+  private:
+    /// The viscosity
+    double viscosity;
+    /// The reaction parameter
+    double reaction;
+  };
+
+
+  /**
+   * The solution to Stokes' equations on an L-shaped domain.
+   *
+   * Taken from Houston, Schötzau, Wihler, proceeding ENUMATH 2003.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2007
+   */
+  class StokesLSingularity : public FlowFunction<2>
+  {
+  public:
+    /// Constructor setting up some data.
+    StokesLSingularity();
+
+    virtual void vector_values (const std::vector<Point<2> > &points,
+                                std::vector<std::vector<double> > &values) const;
+    virtual void vector_gradients (const std::vector<Point<2> > &points,
+                                   std::vector<std::vector<Tensor<1,2> > > &gradients) const;
+    virtual void vector_laplacians (const std::vector<Point<2> > &points,
+                                    std::vector<std::vector<double> >   &values) const;
+  private:
+    /// The auxiliary function Psi.
+    double Psi(double phi) const;
+    /// The derivative of Psi()
+    double Psi_1(double phi) const;
+    /// The 2nd derivative of Psi()
+    double Psi_2(double phi) const;
+    /// The 3rd derivative of Psi()
+    double Psi_3(double phi) const;
+    /// The 4th derivative of Psi()
+    double Psi_4(double phi) const;
+    /// The angle of the reentrant corner
+    const double omega;
+    /// The exponent of the radius
+    static const double lambda;
+    /// Cosine of lambda times omega
+    const double coslo;
+    /// Auxiliary variable 1+lambda
+    const double lp;
+    /// Auxiliary variable 1-lambda
+    const double lm;
+  };
+
+  /**
+   * Flow solution in 2D by Kovasznay (1947).
+   *
+   * This function is valid on the half plane right of the line <i>x=1/2</i>.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2007
+   */
+  class Kovasznay : public FlowFunction<2>
+  {
+  public:
+    /**
+     * Construct an object for the give Reynolds number <tt>Re</tt>. If the
+     * parameter <tt>Stokes</tt> is true, the right hand side of the momentum
+     * equation returned by vector_laplacians() contains the nonlinearity,
+     * such that the Kovasznay solution can be obtained as the solution to a
+     * Stokes problem.
+     */
+    Kovasznay (const double Re, bool Stokes = false);
+    virtual ~Kovasznay();
+
+    virtual void vector_values (const std::vector<Point<2> > &points,
+                                std::vector<std::vector<double> > &values) const;
+    virtual void vector_gradients (const std::vector<Point<2> > &points,
+                                   std::vector<std::vector<Tensor<1,2> > > &gradients) const;
+    virtual void vector_laplacians (const std::vector<Point<2> > &points,
+                                    std::vector<std::vector<double> >   &values) const;
+
+    /// The value of lambda.
+    double lambda () const;
+  private:
+    const double Reynolds;
+    double lbda;
+    double p_average;
+    const bool stokes;
+  };
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/function.h b/include/deal.II/base/function.h
new file mode 100644
index 0000000..1f1e93a
--- /dev/null
+++ b/include/deal.II/base/function.h
@@ -0,0 +1,866 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_h
+#define dealii__function_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/function_time.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/symmetric_tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/std_cxx11/function.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number> class Vector;
+template <int rank, int dim, typename Number> class TensorFunction;
+
+/**
+ * This class is a model for a general function that, given a point at which
+ * to evaluate the function, returns a vector of values with one or more
+ * components.
+ *
+ * The class serves the purpose of representing both scalar and vector valued
+ * functions. To this end, we consider scalar functions as a special case of
+ * vector valued functions, in the former case only having a single component
+ * return vector. Since handling vectors is comparatively expensive, the
+ * interface of this class has functions which only ask for a single component
+ * of the vector-valued results (this is what you will usually need in case
+ * you know that your function is scalar-valued) as well as functions you can
+ * ask for an entire vector of results with as many components as the function
+ * object represents. Access to function objects therefore is through the
+ * following methods:
+ * @code
+ *   // access to one component at one point
+ *   double value        (const Point<dim>   &p,
+ *                        const unsigned int  component = 0) const;
+ *
+ *   // return all components at one point
+ *   void   vector_value (const Point<dim>   &p,
+ *                        Vector<double>     &value) const;
+ * @endcode
+ *
+ * For more efficiency, there are other functions returning one or all
+ * components at a list of points at once:
+ * @code
+ *   // access to one component at several points
+ *   void   value_list (const std::vector<Point<dim> >  &point_list,
+ *                      std::vector<double>             &value_list,
+ *                      const unsigned int  component = 0) const;
+ *
+ *   // return all components at several points
+ *   void   vector_value_list (const std::vector<Point<dim> >    &point_list,
+ *                             std::vector<Vector<double> >      &value_list) const;
+ * @endcode
+ *
+ * Furthermore, there are functions returning the gradient of the function or
+ * even higher derivatives at one or several points.
+ *
+ * You will usually only overload those functions you need; the functions
+ * returning several values at a time (value_list(), vector_value_list(), and
+ * gradient analogs) will call those returning only one value (value(),
+ * vector_value(), and gradient analogs), while those ones will throw an
+ * exception when called but not overloaded.
+ *
+ * Conversely, the functions returning all components of the function at one
+ * or several points (i.e. vector_value(), vector_value_list()), will
+ * <em>not</em> call the function returning one component at one point
+ * repeatedly, once for each point and component. The reason is efficiency:
+ * this would amount to too many virtual function calls. If you have vector-
+ * valued functions, you should therefore also provide overloads of the
+ * virtual functions for all components at a time.
+ *
+ * Also note, that unless only called a very small number of times, you should
+ * overload all sets of functions (returning only one value, as well as those
+ * returning a whole array), since the cost of evaluation of a point value is
+ * often less than the virtual function call itself.
+ *
+ * Support for time dependent functions can be found in the base class
+ * FunctionTime.
+ *
+ *
+ * <h3>Functions that return tensors</h3>
+ *
+ * If the functions you are dealing with have a number of components that are
+ * a priori known (for example, <tt>dim</tt> elements), you might consider
+ * using the TensorFunction class instead. This is, in particular, true if the
+ * objects you return have the properties of a tensor, i.e., they are for
+ * example dim-dimensional vectors or dim-by-dim matrices. On the other hand,
+ * functions like VectorTools::interpolate or
+ * VectorTools::interpolate_boundary_values definitely only want objects of
+ * the current type. You can use the VectorFunctionFromTensorFunction class to
+ * convert the former to the latter.
+ *
+ *
+ * <h3>Functions that return different fields</h3>
+ *
+ * Most of the time, your functions will have the form $f : \Omega \rightarrow
+ * {\mathbb R}^{n_\text{components}}$. However, there are occasions where you
+ * want the function to return vectors (or scalars) over a different number
+ * field, for example functions that return complex numbers or vectors of
+ * complex numbers: $f : \Omega \rightarrow {\mathbb
+ * C}^{n_\text{components}}$. In such cases, you can use the second template
+ * argument of this class: it describes the scalar type to be used for each
+ * component of your return values. It defaults to @p double, but in the
+ * example above, it could be set to <code>std::complex@<double@></code>.
+ *
+ *
+ * @ingroup functions
+ * @author Wolfgang Bangerth, 1998, 1999, Luca Heltai 2014
+ */
+template <int dim, typename Number=double>
+class Function : public FunctionTime<Number>,
+  public Subscriptor
+{
+public:
+  /**
+   * Export the value of the template parameter as a static member constant.
+   * Sometimes useful for some expression template programming.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Number of vector components.
+   */
+  const unsigned int n_components;
+
+  /**
+   * Constructor. May take an initial value for the number of components
+   * (which defaults to one, i.e. a scalar function), and the time variable,
+   * which defaults to zero.
+   */
+  Function (const unsigned int n_components = 1,
+            const Number       initial_time = 0.0);
+
+  /**
+   * Virtual destructor; absolutely necessary in this case.
+   *
+   * This destructor is declared pure virtual, such that objects of this class
+   * cannot be created. Since all the other virtual functions have a pseudo-
+   * implementation to avoid overhead in derived classes, they can not be
+   * abstract. As a consequence, we could generate an object of this class
+   * because none of this class's functions are abstract.
+   *
+   * We circumvent this problem by making the destructor of this class
+   * abstract virtual. This ensures that at least one member function is
+   * abstract, and consequently, no objects of type Function can be created.
+   * However, there is no need for derived classes to explicitly implement a
+   * destructor: every class has a destructor, either explicitly implemented
+   * or implicitly generated by the compiler, and this resolves the
+   * abstractness of any derived class even if they do not have an explicitly
+   * declared destructor.
+   *
+   * Nonetheless, since derived classes want to call the destructor of a base
+   * class, this destructor is implemented (despite it being pure virtual).
+   */
+  virtual ~Function () = 0;
+
+  /**
+   * Assignment operator. This is here only so that you can have objects of
+   * derived classes in containers, or assign them otherwise. It will raise an
+   * exception if the object from which you assign has a different number of
+   * components than the one being assigned to.
+   */
+  Function &operator= (const Function &f);
+
+  /**
+   * Return the value of the function at the given point. Unless there is only
+   * one component (i.e. the function is scalar), you should state the
+   * component you want to have evaluated; it defaults to zero, i.e. the first
+   * component.
+   */
+  virtual Number value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+
+  /**
+   * Return all components of a vector-valued function at a given point.
+   *
+   * <tt>values</tt> shall have the right size beforehand, i.e. #n_components.
+   *
+   * The default implementation will call value() for each component.
+   */
+  virtual void vector_value (const Point<dim>   &p,
+                             Vector<Number>     &values) const;
+
+  /**
+   * Set <tt>values</tt> to the point values of the specified component of the
+   * function at the <tt>points</tt>.  It is assumed that <tt>values</tt>
+   * already has the right size, i.e.  the same size as the <tt>points</tt>
+   * array.
+   *
+   * By default, this function repeatedly calls value() for each point
+   * separately, to fill the output array.
+   */
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<Number>            &values,
+                           const unsigned int              component = 0) const;
+
+  /**
+   * Set <tt>values</tt> to the point values of the function at the
+   * <tt>points</tt>.  It is assumed that <tt>values</tt> already has the
+   * right size, i.e.  the same size as the <tt>points</tt> array, and that
+   * all elements be vectors with the same number of components as this
+   * function has.
+   *
+   * By default, this function repeatedly calls vector_value() for each point
+   * separately, to fill the output array.
+   */
+  virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                  std::vector<Vector<Number> >   &values) const;
+
+  /**
+   * For each component of the function, fill a vector of values, one for each
+   * point.
+   *
+   * The default implementation of this function in Function calls
+   * value_list() for each component. In order to improve performance, this
+   * can be reimplemented in derived classes to speed up performance.
+   */
+  virtual void vector_values (const std::vector<Point<dim> > &points,
+                              std::vector<std::vector<Number> > &values) const;
+
+  /**
+   * Return the gradient of the specified component of the function at the
+   * given point.
+   */
+  virtual Tensor<1,dim, Number> gradient (const Point<dim>   &p,
+                                          const unsigned int  component = 0) const;
+
+  /**
+   * Return the gradient of all components of the function at the given point.
+   */
+  virtual void vector_gradient (const Point<dim>            &p,
+                                std::vector<Tensor<1,dim, Number> > &gradients) const;
+
+  /**
+   * Set <tt>gradients</tt> to the gradients of the specified component of the
+   * function at the <tt>points</tt>.  It is assumed that <tt>gradients</tt>
+   * already has the right size, i.e.  the same size as the <tt>points</tt>
+   * array.
+   */
+  virtual void gradient_list (const std::vector<Point<dim> > &points,
+                              std::vector<Tensor<1,dim, Number> >    &gradients,
+                              const unsigned int              component = 0) const;
+
+  /**
+   * For each component of the function, fill a vector of gradient values, one
+   * for each point.
+   *
+   * The default implementation of this function in Function calls
+   * value_list() for each component. In order to improve performance, this
+   * can be reimplemented in derived classes to speed up performance.
+   */
+  virtual void vector_gradients (const std::vector<Point<dim> >            &points,
+                                 std::vector<std::vector<Tensor<1,dim, Number> > > &gradients) const;
+
+  /**
+   * Set <tt>gradients</tt> to the gradients of the function at the
+   * <tt>points</tt>, for all components. It is assumed that
+   * <tt>gradients</tt> already has the right size, i.e. the same size as the
+   * <tt>points</tt> array.
+   *
+   * The outer loop over <tt>gradients</tt> is over the points in the list,
+   * the inner loop over the different components of the function.
+   */
+  virtual void vector_gradient_list (const std::vector<Point<dim> >            &points,
+                                     std::vector<std::vector<Tensor<1,dim, Number> > > &gradients) const;
+
+  /**
+   * Compute the Laplacian of a given component at point <tt>p</tt>.
+   */
+  virtual Number laplacian (const Point<dim>   &p,
+                            const unsigned int  component = 0) const;
+
+  /**
+   * Compute the Laplacian of all components at point <tt>p</tt> and store
+   * them in <tt>values</tt>.
+   */
+  virtual void vector_laplacian (const Point<dim>   &p,
+                                 Vector<Number>     &values) const;
+
+  /**
+   * Compute the Laplacian of one component at a set of points.
+   */
+  virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                               std::vector<Number>            &values,
+                               const unsigned int              component = 0) const;
+
+  /**
+   * Compute the Laplacians of all components at a set of points.
+   */
+  virtual void vector_laplacian_list (const std::vector<Point<dim> > &points,
+                                      std::vector<Vector<Number> >   &values) const;
+
+  /**
+   * Compute the Hessian of a given component at point <tt>p</tt>, that is the
+   * gradient of the gradient of the function.
+   */
+  virtual SymmetricTensor<2,dim,Number> hessian (const Point<dim>   &p,
+                                                 const unsigned int          component = 0) const;
+
+  /**
+   * Compute the Hessian of all components at point <tt>p</tt> and store them
+   * in <tt>values</tt>.
+   */
+  virtual void vector_hessian (const Point<dim>                           &p,
+                               std::vector<SymmetricTensor<2,dim,Number> > &values) const;
+
+  /**
+   * Compute the Hessian of one component at a set of points.
+   */
+  virtual void hessian_list (const std::vector<Point<dim> >              &points,
+                             std::vector<SymmetricTensor<2,dim,Number> > &values,
+                             const unsigned int                          component = 0) const;
+
+  /**
+   * Compute the Hessians of all components at a set of points.
+   */
+  virtual void vector_hessian_list (const std::vector<Point<dim> >                            &points,
+                                    std::vector<std::vector<SymmetricTensor<2,dim,Number> > > &values) const;
+
+
+  /**
+   * Return an estimate for the memory consumption, in bytes, of this object.
+   * This is not exact (but will usually be close) because calculating the
+   * memory usage of trees (e.g., <tt>std::map</tt>) is difficult.
+   */
+  std::size_t memory_consumption () const;
+};
+
+
+
+/**
+ * Provide a function which always returns zero. Obviously, also the derivates
+ * of this function are zero. Also, it returns zero on all components in case
+ * the function is not a scalar one, which can be obtained by passing the
+ * constructor the appropriate number of components.
+ *
+ * This function is of use when you want to implement homogeneous boundary
+ * conditions, or zero initial conditions.
+ *
+ * @ingroup functions
+ * @author Wolfgang Bangerth, 1998, 1999
+ */
+template <int dim, typename Number=double>
+class ZeroFunction : public Function<dim, Number>
+{
+public:
+  /**
+   * Constructor. The number of components is preset to one.
+   */
+  ZeroFunction (const unsigned int n_components = 1);
+
+  /**
+   * Virtual destructor; absolutely necessary in this case.
+   *
+   */
+  virtual ~ZeroFunction ();
+
+  virtual Number value (const Point<dim>   &p,
+                        const unsigned int  component) const;
+
+  virtual void vector_value (const Point<dim> &p,
+                             Vector<Number>   &return_value) const;
+
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<Number>            &values,
+                           const unsigned int              component = 0) const;
+
+  virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                  std::vector<Vector<Number> >   &values) const;
+
+  virtual Tensor<1,dim, Number> gradient (const Point<dim> &p,
+                                          const unsigned int component = 0) const;
+
+  virtual void vector_gradient (const Point<dim>            &p,
+                                std::vector<Tensor<1,dim, Number> > &gradients) const;
+
+  virtual void gradient_list (const std::vector<Point<dim> > &points,
+                              std::vector<Tensor<1,dim, Number> >    &gradients,
+                              const unsigned int              component = 0) const;
+
+  virtual void vector_gradient_list (const std::vector<Point<dim> >            &points,
+                                     std::vector<std::vector<Tensor<1,dim, Number> > > &gradients) const;
+};
+
+
+
+/**
+ * Provide a function which always returns the constant values handed to the
+ * constructor.
+ *
+ * Obviously, the derivates of this function are zero, which is why we derive
+ * this class from <tt>ZeroFunction</tt>: we then only have to overload the
+ * value functions, not all the derivatives. In some way, it would be more
+ * obvious to do the derivation in the opposite direction, i.e. let
+ * <tt>ZeroFunction</tt> be a more specialized version of
+ * <tt>ConstantFunction</tt>; however, this would be less efficient, since we
+ * could not make use of the fact that the function value of the
+ * <tt>ZeroFunction</tt> is known at compile time and need not be looked up
+ * somewhere in memory.
+ *
+ * @ingroup functions
+ * @author Wolfgang Bangerth, 1998, 1999, Lei Qiao, 2015
+ */
+template <int dim, typename Number=double>
+class ConstantFunction : public ZeroFunction<dim, Number>
+{
+public:
+  /**
+   * Constructor; set values of all components to the provided one. The
+   * default number of components is one.
+   */
+  ConstantFunction (const Number       value,
+                    const unsigned int n_components = 1);
+
+  /**
+   * Constructor; takes an <tt>std::vector<Number></tt> object as an argument.
+   * The number of components is determined by <tt>values.size()</tt>.
+   */
+  ConstantFunction (const std::vector<Number> &values);
+
+  /**
+   * Constructor; takes an <tt>Vector<Number></tt> object as an argument. The
+   * number of components is determined by <tt>values.size()</tt>.
+   */
+  ConstantFunction (const Vector<Number> &values);
+
+  /**
+   * Constructor; uses whatever stores in [begin_ptr, begin_ptr+n_components)
+   * to initialize a new object.
+   */
+  ConstantFunction (const Number *begin_ptr, const unsigned int n_components);
+
+  /**
+   * Virtual destructor; absolutely necessary in this case.
+   */
+  virtual ~ConstantFunction ();
+
+  virtual Number value (const Point<dim>   &p,
+                        const unsigned int  component) const;
+
+  virtual void vector_value (const Point<dim> &p,
+                             Vector<Number>   &return_value) const;
+
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<Number>            &return_values,
+                           const unsigned int              component = 0) const;
+
+  virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                  std::vector<Vector<Number> >   &return_values) const;
+
+  std::size_t memory_consumption () const;
+
+protected:
+  /**
+   * Store the constant function value vector.
+   */
+  std::vector<Number> function_value_vector;
+};
+
+
+/**
+ * This is a constant vector-valued function, in which one or more components
+ * of the vector have a constant value and all other components are zero.  It
+ * is especially useful as a weight function for
+ * VectorTools::integrate_difference, where it allows to integrate only one or
+ * a few vector components, rather than the entire vector-valued solution. In
+ * other words, it acts as a component mask with a single component selected
+ * (see the
+ * @ref GlossComponentMask "the glossary entry on component masks").
+ * See the step-20 tutorial program for a detailed explanation and a use case.
+ *
+ * @ingroup functions
+ * @author Guido Kanschat, 2000, Wolfgang Bangerth 2006
+ */
+template <int dim, typename Number=double>
+class ComponentSelectFunction : public ConstantFunction<dim, Number>
+{
+public:
+  /**
+   * Constructor if only a single component shall be non-zero. Arguments
+   * denote the component selected, the value for that component and the total
+   * number of vector components.
+   */
+  ComponentSelectFunction (const unsigned int selected,
+                           const Number       value,
+                           const unsigned int n_components);
+
+  /**
+   * Constructor. As before, but the value for the selected component is
+   * assumed to be one. In essence, this function then works as a mask.
+   */
+  ComponentSelectFunction (const unsigned int selected,
+                           const unsigned int n_components);
+
+  /**
+   * Constructor if multiple components shall have non-zero, unit values (i.e.
+   * this should be a mask for multiple components). The first argument
+   * denotes a half-open interval of components (for example std::pair(0,dim)
+   * for the first dim components), and the second argument is the total
+   * number of vector components.
+   */
+  ComponentSelectFunction (const std::pair<unsigned int, unsigned int> &selected,
+                           const unsigned int n_components);
+
+
+  /**
+   * Substitute function value with value of a <tt>ConstantFunction@<dim,
+   * Number@></tt> object and keep the current selection pattern.
+   *
+   * This is useful if you want to have different values in different
+   * components since the provided constructors of
+   * <tt>ComponentSelectFunction@<dim, Number@></tt> class can only have same
+   * value for all components.
+   *
+   * @note: we copy the underlying component value data from @p f from its
+   * beginning. So the number of components of @p f cannot be less than the
+   * calling object.
+   */
+  virtual void substitute_function_value_with (const ConstantFunction<dim, Number> &f);
+
+  /**
+   * Return the value of the function at the given point for all components.
+   */
+  virtual void vector_value (const Point<dim> &p,
+                             Vector<Number>   &return_value) const;
+
+  /**
+   * Set <tt>values</tt> to the point values of the function at the
+   * <tt>points</tt>, for all components. It is assumed that <tt>values</tt>
+   * already has the right size, i.e. the same size as the <tt>points</tt>
+   * array.
+   */
+  virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                  std::vector<Vector<Number> >   &values) const;
+
+  /**
+   * Return an estimate for the memory consumption, in bytes, of this object.
+   * This is not exact (but will usually be close) because calculating the
+   * memory usage of trees (e.g., <tt>std::map</tt>) is difficult.
+   */
+  std::size_t memory_consumption () const;
+
+protected:
+  /**
+   * Half-open interval of the indices of selected components.
+   */
+  const std::pair<unsigned int,unsigned int> selected_components;
+};
+
+
+
+/**
+ * This class provides a way to convert a scalar function of the kind
+ * @code
+ *   Number foo (const Point<dim> &);
+ * @endcode
+ * into an object of type Function@<dim@>. Since the argument returns a
+ * scalar, the result is clearly a Function object for which
+ * <code>function.n_components==1</code>. The class works by storing a pointer
+ * to the given function and every time
+ * <code>function.value(p,component)</code> is called, calls
+ * <code>foo(p)</code> and returns the corresponding value. It also makes sure
+ * that <code>component</code> is in fact zero, as needs be for scalar
+ * functions.
+ *
+ * The class provides an easy way to turn a simple global function into
+ * something that has the required Function@<dim@> interface for operations
+ * like VectorTools::interpolate_boundary_values() etc., and thereby allows
+ * for simpler experimenting without having to write all the boiler plate code
+ * of declaring a class that is derived from Function and implementing the
+ * Function::value() function. An example of this is given in the results
+ * section of step-53.
+ *
+ * The class gains additional expressive power because the argument it takes
+ * does not have to be a pointer to an actual function. Rather, it is a
+ * function object, i.e., it can also be the result of call to std::bind (or
+ * boost::bind) or some other object that can be called with a single
+ * argument. For example, if you need a Function object that returns the norm
+ * of a point, you could write it like so:
+ * @code
+ *   template <int dim, typename Number>
+ *   class Norm : public Function<dim, Number> {
+ *     public:
+ *       virtual Number value (const Point<dim> &p,
+ *                             const unsigned int component) const {
+ *         Assert (component == 0, ExcMessage ("This object is scalar!"));
+ *         return p.norm();
+ *       }
+ *    };
+ *
+ *    Norm<2> my_norm_object;
+ * @endcode
+ * and then pass the <code>my_norm_object</code> around, or you could write it
+ * like so:
+ * @code
+ *   ScalarFunctionFromFunctionObject<dim, Number> my_norm_object (&Point<dim>::norm);
+ * @endcode
+ *
+ * Similarly, to generate an object that computes the distance to a point
+ * <code>q</code>, we could do this:
+ * @code
+ *   template <int dim, typename Number>
+ *   class DistanceTo : public Function<dim, Number> {
+ *     public:
+ *       DistanceTo (const Point<dim> &q) : q(q) {}
+ *       virtual Number value (const Point<dim> &p,
+ *                             const unsigned int component) const {
+ *         Assert (component == 0, ExcMessage ("This object is scalar!"));
+ *         return q.distance(p);
+ *       }
+ *     private:
+ *       const Point<dim> q;
+ *    };
+ *
+ *    Point<2> q (2,3);
+ *    DistanceTo<2> my_distance_object;
+ * @endcode
+ * or we could write it like so:
+ * @code
+ *    ScalarFunctionFromFunctionObject<dim, Number>
+ *      my_distance_object (std_cxx11::bind (&Point<dim>::distance,
+ *                                           q,
+ *                                           std_cxx11::_1));
+ * @endcode
+ * The savings in work to write this are apparent.
+ *
+ * @author Wolfgang Bangerth, 2011
+ */
+template <int dim, typename Number=double>
+class ScalarFunctionFromFunctionObject : public Function<dim, Number>
+{
+public:
+  /**
+   * Given a function object that takes a Point and returns a Number value,
+   * convert this into an object that matches the Function<dim, Number>
+   * interface.
+   */
+  ScalarFunctionFromFunctionObject (const std_cxx11::function<Number (const Point<dim> &)> &function_object);
+
+  /**
+   * Return the value of the function at the given point. Returns the value
+   * the function given to the constructor produces for this point.
+   */
+  virtual Number value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+
+private:
+  /**
+   * The function object which we call when this class's value() or
+   * value_list() functions are called.
+   */
+  const std_cxx11::function<Number (const Point<dim> &)> function_object;
+};
+
+
+
+/**
+ * This class is similar to the ScalarFunctionFromFunctionObject class in that
+ * it allows for the easy conversion of a function object to something that
+ * satisfies the interface of the Function base class. The difference is that
+ * here, the given function object is still a scalar function (i.e. it has a
+ * single value at each space point) but that the Function object generated is
+ * vector valued. The number of vector components is specified in the
+ * constructor, where one also selects a single one of these vector components
+ * that should be filled by the passed object. The result is a vector Function
+ * object that returns zero in each component except the single selected one
+ * where it returns the value returned by the given as the first argument to
+ * the constructor.
+ *
+ * @note In the above discussion, note the difference between the (scalar)
+ * "function object" (i.e., a C++ object <code>x</code> that can be called as
+ * in <code>x(p)</code>) and the capitalized (vector valued) "Function object"
+ * (i.e., an object of a class that is derived from the Function base class).
+ *
+ * To be more concrete, let us consider the following example:
+ * @code
+ *   Number one (const Point<2> &p) { return 1; }
+ *   VectorFunctionFromScalarFunctionObject<2>
+ *      component_mask (&one, 1, 3);
+ * @endcode
+ * Here, <code>component_mask</code> then represents a Function object that
+ * for every point returns the vector $(0, 1, 0)^T$, i.e. a mask function that
+ * could, for example, be passed to VectorTools::integrate_difference(). This
+ * effect can also be achieved using the ComponentSelectFunction class but is
+ * obviously easily extended to functions that are non-constant in their one
+ * component.
+ *
+ * @author Wolfgang Bangerth, 2011
+ */
+template <int dim, typename Number=double>
+class VectorFunctionFromScalarFunctionObject : public Function<dim, Number>
+{
+public:
+  /**
+   * Given a function object that takes a Point and returns a Number value,
+   * convert this into an object that matches the Function@<dim@> interface.
+   *
+   * @param function_object The scalar function that will form one component
+   * of the resulting Function object.
+   * @param n_components The total number of vector components of the
+   * resulting Function object.
+   * @param selected_component The single component that should be filled by
+   * the first argument.
+   */
+  VectorFunctionFromScalarFunctionObject (const std_cxx11::function<Number (const Point<dim> &)> &function_object,
+                                          const unsigned int selected_component,
+                                          const unsigned int n_components);
+
+  /**
+   * Return the value of the function at the given point. Returns the value
+   * the function given to the constructor produces for this point.
+   */
+  virtual Number value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+
+  /**
+   * Return all components of a vector-valued function at a given point.
+   *
+   * <tt>values</tt> shall have the right size beforehand, i.e. #n_components.
+   */
+  virtual void vector_value (const Point<dim>   &p,
+                             Vector<Number>     &values) const;
+
+private:
+  /**
+   * The function object which we call when this class's value() or
+   * value_list() functions are called.
+   */
+  const std_cxx11::function<Number (const Point<dim> &)> function_object;
+
+  /**
+   * The vector component whose value is to be filled by the given scalar
+   * function.
+   */
+  const unsigned int selected_component;
+};
+
+
+/**
+ * This class is built as a means of translating the <code>Tensor<1,dim,
+ * Number> </code> values produced by objects of type TensorFunction and
+ * returning them as a multiple component version of the same thing as a
+ * Vector for use in, for example, the VectorTools::interpolate or the many
+ * other functions taking Function objects. It allows the user to place the
+ * desired components into an <tt>n_components</tt> long vector starting at
+ * the <tt>selected_component</tt> location in that vector and have all other
+ * components be 0.
+ *
+ * For example: Say you created a class called
+ *  @code
+ *    class RightHandSide : public TensorFunction<rank,dim, Number>
+ *  @endcode
+ * which extends the TensorFunction class and you have an object
+ *  @code
+ *    RightHandSide<1,dim, Number> rhs;
+ *  @endcode
+ * of that class which you want to interpolate onto your mesh using the
+ * VectorTools::interpolate function, but the finite element you use for the
+ * DoFHandler object has 3 copies of a finite element with <tt>dim</tt>
+ * components, for a total of 3*dim components. To interpolate onto that
+ * DoFHandler, you need an object of type Function that has 3*dim vector
+ * components. Creating such an object from the existing <code>rhs</code>
+ * object is done using this piece of code:
+ *  @code
+ *      RighHandSide<1,dim, Number> rhs;
+ *      VectorFunctionFromTensorFunction<dim, Number> rhs_vector_function (rhs, 0, 3*dim);
+ *  @endcode
+ * where the <code>dim</code> components of the tensor function are placed
+ * into the first <code>dim</code> components of the function object.
+ *
+ * @author Spencer Patty, 2013
+ */
+template <int dim, typename Number=double>
+class VectorFunctionFromTensorFunction : public Function<dim, Number>
+{
+public:
+  /**
+   * Given a TensorFunction object that takes a <tt>Point</tt> and returns a
+   * <tt>Tensor<1,dim, Number></tt> value, convert this into an object that
+   * matches the Function@<dim@> interface.
+   *
+   * By default, create a Vector object of the same size as
+   * <tt>tensor_function</tt> returns, i.e., with <tt>dim</tt> components.
+   *
+   * @param tensor_function The TensorFunction that will form one component of
+   * the resulting Vector Function object.
+   * @param n_components The total number of vector components of the
+   * resulting TensorFunction object.
+   * @param selected_component The first component that should be filled by
+   * the first argument.  This should be such that the entire tensor_function
+   * fits inside the <tt>n_component</tt> length return vector.
+   */
+  VectorFunctionFromTensorFunction (const TensorFunction<1,dim, Number> &tensor_function,
+                                    const unsigned int selected_component=0,
+                                    const unsigned int n_components=dim);
+
+  /**
+   * This destructor is defined as virtual so as to coincide with all other
+   * aspects of class.
+   */
+  virtual ~VectorFunctionFromTensorFunction();
+
+  /**
+   * Return a single component of a vector-valued function at a given point.
+   */
+  virtual Number value (const Point<dim> &p,
+                        const unsigned int component = 0) const;
+
+  /**
+   * Return all components of a vector-valued function at a given point.
+   *
+   * <tt>values</tt> shall have the right size beforehand, i.e. #n_components.
+   */
+  virtual void vector_value (const Point<dim> &p,
+                             Vector<Number>   &values) const;
+
+  /**
+   * Return all components of a vector-valued function at a list of points.
+   *
+   * <tt>value_list</tt> shall be the same size as <tt>points</tt> and each
+   * element of the vector will be passed to vector_value() to evaluate the
+   * function
+   */
+  virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                  std::vector<Vector<Number> >   &value_list) const;
+
+private:
+  /**
+   * The TensorFunction object which we call when this class's vector_value()
+   * or vector_value_list() functions are called.
+   */
+  const TensorFunction<1,dim,Number> &tensor_function;
+
+  /**
+   * The first vector component whose value is to be filled by the given
+   * TensorFunction.  The values will be placed in components
+   * selected_component to selected_component+dim-1 for a
+   * <tt>TensorFunction<1,dim, Number></tt> object.
+   */
+  const unsigned int selected_component;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/function.templates.h b/include/deal.II/base/function.templates.h
new file mode 100644
index 0000000..65355b0
--- /dev/null
+++ b/include/deal.II/base/function.templates.h
@@ -0,0 +1,815 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_templates_h
+#define dealii__function_templates_h
+
+
+#include <deal.II/base/function.h>
+
+#include <deal.II/base/tensor_function.h>
+#include <deal.II/base/point.h>
+#include <deal.II/lac/vector.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, typename Number>
+const unsigned int Function<dim, Number>::dimension;
+
+
+template <int dim, typename Number>
+Function<dim, Number>::Function (const unsigned int n_components,
+                                 const Number       initial_time)
+  :
+  FunctionTime<Number>(initial_time),
+  n_components(n_components)
+{
+  // avoid the construction of function objects that don't return any
+  // values. This doesn't make much sense in the first place, but will lead
+  // to odd errors later on (happened to me in fact :-)
+  Assert (n_components > 0, ExcZero());
+}
+
+
+template <int dim, typename Number>
+Function<dim, Number>::~Function ()
+{}
+
+
+
+template <int dim, typename Number>
+Function<dim, Number> &Function<dim, Number>::operator= (const Function &f)
+{
+  (void)f;
+  AssertDimension (n_components, f.n_components);
+  return *this;
+}
+
+
+template <int dim, typename Number>
+Number Function<dim, Number>::value (const Point<dim> &,
+                                     const unsigned int) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return 0;
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_value (const Point<dim> &p,
+                                          Vector<Number> &v) const
+{
+  AssertDimension(v.size(), this->n_components);
+  for (unsigned int i=0; i<this->n_components; ++i)
+    v(i) = value(p, i);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::value_list (const std::vector<Point<dim> > &points,
+                                        std::vector<Number>            &values,
+                                        const unsigned int              component) const
+{
+  // check whether component is in the valid range is up to the derived
+  // class
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch(values.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    values[i]  = this->value (points[i], component);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_value_list (const std::vector<Point<dim> > &points,
+                                               std::vector<Vector<Number> >   &values) const
+{
+  // check whether component is in the valid range is up to the derived
+  // class
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch(values.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    this->vector_value (points[i], values[i]);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_values (
+  const std::vector<Point<dim> > &points,
+  std::vector<std::vector<Number> > &values) const
+{
+  const unsigned int n = this->n_components;
+  AssertDimension (values.size(), n);
+  for (unsigned int i=0; i<n; ++i)
+    value_list(points, values[i], i);
+}
+
+
+template <int dim, typename Number>
+Tensor<1,dim,Number> Function<dim, Number>::gradient (const Point<dim> &,
+                                                      const unsigned int) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return Tensor<1,dim,Number>();
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_gradient (
+  const Point<dim> &p,
+  std::vector<Tensor<1,dim,Number> > &v) const
+{
+  AssertDimension(v.size(), this->n_components);
+  for (unsigned int i=0; i<this->n_components; ++i)
+    v[i] = gradient(p, i);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::gradient_list (
+  const std::vector<Point<dim> >     &points,
+  std::vector<Tensor<1,dim,Number> > &gradients,
+  const unsigned int                  component) const
+{
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    gradients[i] = gradient(points[i], component);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_gradient_list (
+  const std::vector<Point<dim> >                   &points,
+  std::vector<std::vector<Tensor<1,dim,Number> > > &gradients) const
+{
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    {
+      Assert (gradients[i].size() == n_components,
+              ExcDimensionMismatch(gradients[i].size(), n_components));
+      vector_gradient (points[i], gradients[i]);
+    }
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_gradients (
+  const std::vector<Point<dim> > &points,
+  std::vector<std::vector<Tensor<1,dim,Number> > > &values) const
+{
+  const unsigned int n = this->n_components;
+  AssertDimension (values.size(), n);
+  for (unsigned int i=0; i<n; ++i)
+    gradient_list(points, values[i], i);
+}
+
+
+
+template <int dim, typename Number>
+Number Function<dim, Number>::laplacian (const Point<dim> &,
+                                         const unsigned int) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return 0;
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_laplacian (const Point<dim> &,
+                                              Vector<Number> &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+}
+
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::laplacian_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Number>            &laplacians,
+  const unsigned int              component) const
+{
+  // check whether component is in the valid range is up to the derived
+  // class
+  Assert (laplacians.size() == points.size(),
+          ExcDimensionMismatch(laplacians.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    laplacians[i]  = this->laplacian (points[i], component);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_laplacian_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Vector<Number> >   &laplacians) const
+{
+  // check whether component is in the valid range is up to the derived
+  // class
+  Assert (laplacians.size() == points.size(),
+          ExcDimensionMismatch(laplacians.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    this->vector_laplacian (points[i], laplacians[i]);
+}
+
+
+template <int dim, typename Number>
+SymmetricTensor<2,dim,Number> Function<dim, Number>::hessian (const Point<dim> &,
+    const unsigned int) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return SymmetricTensor<2,dim,Number>();
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_hessian (
+  const Point<dim> &p,
+  std::vector<SymmetricTensor<2,dim,Number> > &v) const
+{
+  AssertDimension(v.size(), this->n_components);
+  for (unsigned int i=0; i<this->n_components; ++i)
+    v[i] = hessian(p, i);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::hessian_list (
+  const std::vector<Point<dim> >     &points,
+  std::vector<SymmetricTensor<2,dim,Number> > &hessians,
+  const unsigned int                  component) const
+{
+  Assert (hessians.size() == points.size(),
+          ExcDimensionMismatch(hessians.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    hessians[i] = hessian(points[i], component);
+}
+
+
+template <int dim, typename Number>
+void Function<dim, Number>::vector_hessian_list (
+  const std::vector<Point<dim> >                   &points,
+  std::vector<std::vector<SymmetricTensor<2,dim,Number> > > &hessians) const
+{
+  Assert (hessians.size() == points.size(),
+          ExcDimensionMismatch(hessians.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    {
+      Assert (hessians[i].size() == n_components,
+              ExcDimensionMismatch(hessians[i].size(), n_components));
+      vector_hessian (points[i], hessians[i]);
+    }
+}
+
+
+
+template <int dim, typename Number>
+std::size_t
+Function<dim, Number>::memory_consumption () const
+{
+  // only simple data elements, so use sizeof operator
+  return sizeof (*this);
+}
+
+
+
+//---------------------------------------------------------------------------
+
+
+
+template <int dim, typename Number>
+ZeroFunction<dim, Number>::ZeroFunction (const unsigned int n_components)
+  :
+  Function<dim, Number> (n_components)
+{}
+
+
+template <int dim, typename Number>
+ZeroFunction<dim, Number>::~ZeroFunction ()
+{}
+
+
+template <int dim, typename Number>
+Number ZeroFunction<dim, Number>::value (const Point<dim> &,
+                                         const unsigned int) const
+{
+  return 0.;
+}
+
+
+template <int dim, typename Number>
+void ZeroFunction<dim, Number>::vector_value (const Point<dim> &,
+                                              Vector<Number>   &return_value) const
+{
+  Assert (return_value.size() == this->n_components,
+          ExcDimensionMismatch (return_value.size(), this->n_components));
+
+  std::fill (return_value.begin(), return_value.end(), 0.0);
+}
+
+
+template <int dim, typename Number>
+void ZeroFunction<dim, Number>::value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Number>            &values,
+  const unsigned int              /*component*/) const
+{
+  (void)points;
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch(values.size(), points.size()));
+
+  std::fill (values.begin(), values.end(), 0.);
+}
+
+
+template <int dim, typename Number>
+void ZeroFunction<dim, Number>::vector_value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Vector<Number> >   &values) const
+{
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch(values.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    {
+      Assert (values[i].size() == this->n_components,
+              ExcDimensionMismatch(values[i].size(), this->n_components));
+      std::fill (values[i].begin(), values[i].end(), 0.);
+    };
+}
+
+
+template <int dim, typename Number>
+Tensor<1,dim,Number> ZeroFunction<dim, Number>::gradient (const Point<dim> &,
+                                                          const unsigned int) const
+{
+  return Tensor<1,dim,Number>();
+}
+
+
+template <int dim, typename Number>
+void ZeroFunction<dim, Number>::vector_gradient (
+  const Point<dim> &,
+  std::vector<Tensor<1,dim,Number> > &gradients) const
+{
+  Assert (gradients.size() == this->n_components,
+          ExcDimensionMismatch(gradients.size(), this->n_components));
+
+  for (unsigned int c=0; c<this->n_components; ++c)
+    gradients[c].clear ();
+}
+
+
+template <int dim, typename Number>
+void ZeroFunction<dim, Number>::gradient_list (
+  const std::vector<Point<dim> >     &points,
+  std::vector<Tensor<1,dim,Number> > &gradients,
+  const unsigned int                  /*component*/) const
+{
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    gradients[i].clear ();
+}
+
+
+template <int dim, typename Number>
+void ZeroFunction<dim, Number>::vector_gradient_list (
+  const std::vector<Point<dim> >                   &points,
+  std::vector<std::vector<Tensor<1,dim,Number> > > &gradients) const
+{
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+  for (unsigned int i=0; i<points.size(); ++i)
+    {
+      Assert (gradients[i].size() == this->n_components,
+              ExcDimensionMismatch(gradients[i].size(), this->n_components));
+      for (unsigned int c=0; c<this->n_components; ++c)
+        gradients[i][c].clear ();
+    };
+}
+
+//---------------------------------------------------------------------------
+
+template <int dim, typename Number>
+ConstantFunction<dim, Number>::ConstantFunction (const Number value,
+                                                 const unsigned int n_components)
+  :
+  ZeroFunction<dim, Number> (n_components),
+  function_value_vector (n_components, value)
+{}
+
+template <int dim, typename Number>
+ConstantFunction<dim, Number>::
+ConstantFunction (const std::vector<Number> &values)
+  :
+  ZeroFunction<dim, Number> (values.size()),
+  function_value_vector (values)
+{}
+
+
+template <int dim, typename Number>
+ConstantFunction<dim, Number>::
+ConstantFunction (const Vector<Number> &values)
+  :
+  ZeroFunction<dim, Number> (values.size()),
+  function_value_vector (values.size())
+{
+  Assert (values.size() == function_value_vector.size(),
+          ExcDimensionMismatch (values.size(), function_value_vector.size()));
+  std::copy (values.begin(),values.end(),function_value_vector.begin());
+}
+
+
+template <int dim, typename Number>
+ConstantFunction<dim, Number>::
+ConstantFunction (const Number *begin_ptr, const unsigned int n_components)
+  :
+  ZeroFunction<dim, Number> (n_components),
+  function_value_vector (n_components)
+{
+  Assert (begin_ptr != 0, ExcMessage ("Null pointer encountered!"));
+  std::copy (begin_ptr, begin_ptr+n_components, function_value_vector.begin());
+}
+
+
+
+template <int dim, typename Number>
+ConstantFunction<dim, Number>::~ConstantFunction ()
+{
+  function_value_vector.clear();
+}
+
+
+template <int dim, typename Number>
+Number ConstantFunction<dim, Number>::value (const Point<dim> &,
+                                             const unsigned int component) const
+{
+  Assert (component < this->n_components,
+          ExcIndexRange (component, 0, this->n_components));
+  return function_value_vector[component];
+}
+
+
+
+template <int dim, typename Number>
+void ConstantFunction<dim, Number>::vector_value (const Point<dim> &,
+                                                  Vector<Number>   &return_value) const
+{
+  Assert (return_value.size() == this->n_components,
+          ExcDimensionMismatch (return_value.size(), this->n_components));
+
+  std::copy (function_value_vector.begin(),function_value_vector.end(),
+             return_value.begin());
+}
+
+
+
+template <int dim, typename Number>
+void ConstantFunction<dim, Number>::value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Number>            &return_values,
+  const unsigned int              component) const
+{
+  // To avoid warning of unused parameter
+  (void)points;
+  Assert (component < this->n_components,
+          ExcIndexRange (component, 0, this->n_components));
+  Assert (return_values.size() == points.size(),
+          ExcDimensionMismatch(return_values.size(), points.size()))
+
+  std::fill (return_values.begin(), return_values.end(), function_value_vector[component]);
+}
+
+
+
+template <int dim, typename Number>
+void ConstantFunction<dim, Number>::vector_value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Vector<Number> >   &return_values) const
+{
+  Assert (return_values.size() == points.size(),
+          ExcDimensionMismatch(return_values.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    {
+      Assert (return_values[i].size() == this->n_components,
+              ExcDimensionMismatch(return_values[i].size(), this->n_components));
+      std::copy (function_value_vector.begin(),function_value_vector.end(),
+                 return_values[i].begin());
+    };
+}
+
+
+
+template <int dim, typename Number>
+std::size_t
+ConstantFunction<dim, Number>::memory_consumption () const
+{
+  // Here we assume Number is a simple type.
+  return (sizeof(*this) + this->n_components*sizeof(Number));
+}
+
+//---------------------------------------------------------------------------
+
+template <int dim, typename Number>
+ComponentSelectFunction<dim, Number>::
+ComponentSelectFunction (const unsigned int selected,
+                         const Number value,
+                         const unsigned int n_components)
+  :
+  ConstantFunction<dim, Number> (value, n_components),
+  selected_components(std::make_pair(selected,selected+1))
+{}
+
+
+
+template <int dim, typename Number>
+ComponentSelectFunction<dim, Number>::
+ComponentSelectFunction (const unsigned int selected,
+                         const unsigned int n_components)
+  :
+  ConstantFunction<dim, Number> (1., n_components),
+  selected_components(std::make_pair(selected,selected+1))
+{
+  Assert (selected < n_components,
+          ExcIndexRange (selected, 0, n_components));
+}
+
+
+
+template <int dim, typename Number>
+ComponentSelectFunction<dim, Number>::
+ComponentSelectFunction (const std::pair<unsigned int,unsigned int> &selected,
+                         const unsigned int n_components)
+  :
+  ConstantFunction<dim, Number> (1., n_components),
+  selected_components(selected)
+{
+  Assert (selected_components.first < selected_components.second,
+          ExcMessage ("The upper bound of the interval must be larger than "
+                      "the lower bound"));
+  Assert (selected_components.second <= n_components,
+          ExcMessage ("The upper bound of the interval must be less than "
+                      "or equal to the total number of vector components"));
+}
+
+
+
+
+template <int dim, typename Number>
+void
+ComponentSelectFunction<dim, Number>::
+substitute_function_value_with (const ConstantFunction<dim, Number> &f)
+{
+  Point<dim> p;
+  for (unsigned int i=0; i<this->function_value_vector.size(); ++i)
+    this->function_value_vector[i] = f.value(p,i);
+}
+
+
+
+
+template <int dim, typename Number>
+void ComponentSelectFunction<dim, Number>::vector_value (
+  const Point<dim> &,
+  Vector<Number>   &return_value) const
+{
+  Assert (return_value.size() == this->n_components,
+          ExcDimensionMismatch (return_value.size(), this->n_components));
+
+  return_value = 0;
+  std::copy (this->function_value_vector.begin()+selected_components.first,
+             this->function_value_vector.begin()+selected_components.second,
+             return_value.begin()+selected_components.first);
+}
+
+
+
+template <int dim, typename Number>
+void ComponentSelectFunction<dim, Number>::vector_value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Vector<Number> >   &values) const
+{
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch(values.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    ComponentSelectFunction<dim, Number>::vector_value (points[i],
+                                                        values[i]);
+}
+
+
+
+template <int dim, typename Number>
+std::size_t
+ComponentSelectFunction<dim, Number>::memory_consumption () const
+{
+  // No new complex data structure is introduced here, just evaluate how much
+  // more memory is used *inside* the class via sizeof() and add that value to
+  // parent class's memory_consumption()
+  return (sizeof(*this) - sizeof(ConstantFunction<dim, Number>)
+          + ConstantFunction<dim, Number>::memory_consumption());
+}
+
+//---------------------------------------------------------------------------
+
+template <int dim, typename Number>
+ScalarFunctionFromFunctionObject<dim, Number>::
+ScalarFunctionFromFunctionObject (const std_cxx11::function<Number (const Point<dim> &)> &function_object)
+  :
+  Function<dim, Number>(1),
+  function_object (function_object)
+{}
+
+
+
+template <int dim, typename Number>
+Number
+ScalarFunctionFromFunctionObject<dim, Number>::value (const Point<dim> &p,
+                                                      const unsigned int component) const
+{
+  (void)component;
+  Assert (component == 0,
+          ExcMessage ("This object represents only scalar functions"));
+  return function_object (p);
+}
+
+
+
+template <int dim, typename Number>
+VectorFunctionFromScalarFunctionObject<dim, Number>::
+VectorFunctionFromScalarFunctionObject (
+  const std_cxx11::function<Number (const Point<dim> &)> &function_object,
+  const unsigned int selected_component,
+  const unsigned int n_components)
+  :
+  Function<dim, Number>(n_components),
+  function_object (function_object),
+  selected_component (selected_component)
+{
+  Assert (selected_component < this->n_components,
+          ExcIndexRange (selected_component, 0, this->n_components));
+}
+
+
+
+template <int dim, typename Number>
+Number
+VectorFunctionFromScalarFunctionObject<dim, Number>::value (
+  const Point<dim> &p,
+  const unsigned int component) const
+{
+  Assert (component < this->n_components,
+          ExcIndexRange (component, 0, this->n_components));
+
+  if (component == selected_component)
+    return function_object (p);
+  else
+    return 0;
+}
+
+
+
+template <int dim, typename Number>
+void
+VectorFunctionFromScalarFunctionObject<dim, Number>::
+vector_value (const Point<dim>   &p,
+              Vector<Number>     &values) const
+{
+  AssertDimension(values.size(), this->n_components);
+
+  // set everything to zero, and then the right component to its correct
+  // value
+  values = 0;
+  values(selected_component) = function_object (p);
+}
+
+
+
+/**
+ * The constructor for <tt>VectorFunctionFromTensorFunction</tt> which
+ * initiates the return vector to be size <tt>n_components</tt>.
+ */
+template <int dim, typename Number>
+VectorFunctionFromTensorFunction<dim, Number>::VectorFunctionFromTensorFunction (
+  const TensorFunction<1,dim,Number> &tensor_function,
+  const unsigned int selected_component,
+  const unsigned int n_components)
+  :
+  Function<dim, Number> (n_components),
+  tensor_function (tensor_function),
+  selected_component (selected_component)
+{
+
+  // Verify that the Tensor<1,dim,Number> will fit in the given length
+  // selected_components and not hang over the end of the vector.
+  Assert (selected_component + dim - 1 < this->n_components,
+          ExcIndexRange (selected_component, 0, this->n_components));
+}
+
+
+template <int dim, typename Number>
+VectorFunctionFromTensorFunction<dim, Number>::~VectorFunctionFromTensorFunction ()
+{}
+
+
+template <int dim, typename Number>
+inline
+Number VectorFunctionFromTensorFunction<dim, Number>::value (const Point<dim> &p,
+    const unsigned int component) const
+{
+  Assert (component<this->n_components,
+          ExcIndexRange(component, 0, this->n_components));
+
+  // if the requested component is out of the range selected, then we can
+  // return early
+  if ((component < selected_component)
+      ||
+      (component >= selected_component+dim))
+    return 0;
+
+  // otherwise retrieve the values from the <tt>tensor_function</tt> to be
+  // placed at the <tt>selected_component</tt> to
+  // <tt>selected_component + dim - 1</tt> elements of the <tt>Vector</tt>
+  // values and pick the correct one
+  const Tensor<1,dim,Number> tensor_value = tensor_function.value (p);
+
+  return tensor_value[component-selected_component];
+}
+
+
+template <int dim, typename Number>
+inline
+void VectorFunctionFromTensorFunction<dim, Number>::vector_value (
+  const Point<dim> &p,
+  Vector<Number>   &values) const
+{
+  Assert(values.size() == this->n_components,
+         ExcDimensionMismatch(values.size(),this->n_components));
+
+  // Retrieve the values from the <tt>tensor_function</tt> to be placed at
+  // the <tt>selected_component</tt> to
+  // <tt>selected_component + dim - 1</tt> elements of the <tt>Vector</tt>
+  // values.
+  const Tensor<1,dim,Number> tensor_value = tensor_function.value (p);
+
+  // First we make all elements of values = 0
+  values = 0;
+
+  // Second we adjust the desired components to take on the values in
+  // <tt>tensor_value</tt>.
+  for (unsigned int i=0; i<dim; ++i)
+    values(i+selected_component) = tensor_value[i];
+}
+
+
+/**
+ * Member function <tt>vector_value_list </tt> is the interface for giving a
+ * list of points (<code>vector<Point<dim> ></code>) of which to evaluate
+ * using the <tt>vector_value</tt> member function.  Again, this function is
+ * written so as to not replicate the function definition but passes each
+ * point on to <tt>vector_value</tt> to be evaluated.
+ */
+template <int dim, typename Number>
+void VectorFunctionFromTensorFunction<dim, Number>::vector_value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<Vector<Number> > &value_list) const
+{
+  Assert (value_list.size() == points.size(),
+          ExcDimensionMismatch (value_list.size(), points.size()));
+
+  const unsigned int n_points = points.size();
+
+  for (unsigned int p=0; p<n_points; ++p)
+    VectorFunctionFromTensorFunction<dim, Number>::vector_value(points[p],
+                                                                value_list[p]);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif /* dealii__function_templates_h */
diff --git a/include/deal.II/base/function_bessel.h b/include/deal.II/base/function_bessel.h
new file mode 100644
index 0000000..9100f13
--- /dev/null
+++ b/include/deal.II/base/function_bessel.h
@@ -0,0 +1,60 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_bessel_h
+#define dealii__function_bessel_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/point.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Functions
+{
+  /**
+   * The Bessel functions of first kind or positive integer order.
+   *
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  template <int dim>
+  class Bessel1 : public Function<dim>
+  {
+  public:
+    Bessel1(const unsigned int order,
+            const double wave_number,
+            const Point<dim> center = Point<dim>());
+    virtual double value (const Point<dim> &points, const unsigned int component) const;
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component = 0) const;
+  private:
+    unsigned int order;
+    double wave_number;
+    Point<dim> center;
+  };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
diff --git a/include/deal.II/base/function_derivative.h b/include/deal.II/base/function_derivative.h
new file mode 100644
index 0000000..7311018
--- /dev/null
+++ b/include/deal.II/base/function_derivative.h
@@ -0,0 +1,138 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_derivative_h
+#define dealii__function_derivative_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/auto_derivative_function.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * Derivative of a function object.  The value access functions of this class
+ * return the directional derivative of a function with respect to a direction
+ * provided on construction. If <tt>b</tt> is the vector, the derivative <tt>b
+ * . grad f</tt> is computed. This derivative is evaluated directly, not by
+ * computing the gradient of <tt>f</tt> and its scalar product with
+ * <tt>b</tt>.
+ *
+ * The derivative is computed numerically, using one of the provided
+ * difference formulas (see <tt>set_formula</tt> for available schemes).
+ * Experimenting with <tt>h</tt> and the difference scheme may be necessary to
+ * obtain sufficient results.
+ *
+ * @ingroup functions
+ * @author Guido Kanschat, 2000
+ */
+template <int dim>
+class FunctionDerivative : public AutoDerivativeFunction<dim>
+{
+public:
+  /**
+   * Constructor. Provided are the functions to compute derivatives of, the
+   * direction vector of the differentiation and the step size <tt>h</tt> of
+   * the difference formula.
+   */
+  FunctionDerivative (const Function<dim> &f,
+                      const Point<dim>    &direction,
+                      const double         h = 1.e-6);
+
+  /**
+   * Constructor. Provided are the functions to compute derivatives of and the
+   * direction vector of the differentiation in each quadrature point and the
+   * difference step size.
+   *
+   * This is the constructor for a variable velocity field. Most probably, a
+   * new object of <tt>FunctionDerivative</tt> has to be constructed for each
+   * set of quadrature points.
+   *
+   * The number of quadrature point must still be the same, when values are
+   * accessed.
+   */
+  FunctionDerivative (const Function<dim>            &f,
+                      const std::vector<Point<dim> > &direction,
+                      const double                    h = 1.e-6);
+
+  /**
+   * Choose the difference formula. This is set to the default in the
+   * constructor.
+   *
+   * Formulas implemented right now are first order backward Euler
+   * (<tt>UpwindEuler</tt>), second order symmetric Euler (<tt>Euler</tt>) and
+   * a symmetric fourth order formula (<tt>FourthOrder</tt>).
+   */
+  void set_formula (typename AutoDerivativeFunction<dim>::DifferenceFormula formula
+                    = AutoDerivativeFunction<dim>::Euler);
+  /**
+   * Change the base step size of the difference formula
+   */
+  void set_h (const double h);
+
+  virtual double value (const Point<dim> &p,
+                        const unsigned int component = 0) const;
+
+  virtual void vector_value(const Point<dim> &p,
+                            Vector<double> &value) const;
+
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<double>            &values,
+                           const unsigned int              component = 0) const;
+
+  /**
+   * Return an estimate for the memory consumption, in bytes, of this object.
+   * This is not exact (but will usually be close) because calculating the
+   * memory usage of trees (e.g., <tt>std::map</tt>) is difficult.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception.
+   */
+  DeclException0(ExcInvalidFormula);
+  //@}
+private:
+  /**
+   * Function for differentiation.
+   */
+  const Function<dim> &f;
+
+  /**
+   * Step size of the difference formula.
+   */
+  double h;
+
+  /**
+   * Difference formula.
+   */
+  typename AutoDerivativeFunction<dim>::DifferenceFormula formula;
+
+  /**
+   * Helper object. Contains the increment vector for the formula.
+   */
+  std::vector<Tensor<1,dim> > incr;
+};
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/function_lib.h b/include/deal.II/base/function_lib.h
new file mode 100644
index 0000000..67d34f3
--- /dev/null
+++ b/include/deal.II/base/function_lib.h
@@ -0,0 +1,1342 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_lib_h
+#define dealii__function_lib_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/table.h>
+
+#include <deal.II/base/std_cxx11/array.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Namespace implementing some concrete classes derived from the Function
+ * class that describe actual functions. This is rather a collection of
+ * classes that we have needed for our own programs once and thought might be
+ * useful to others as well at some point.
+ *
+ * @ingroup functions
+ */
+namespace Functions
+{
+
+
+  /**
+   * The distance to the origin squared.
+   *
+   * This function returns the square norm of the radius vector of a point.
+   *
+   * Together with the function, its derivatives and Laplacian are defined.
+   *
+   * @ingroup functions
+   * @author: Guido Kanschat, 1999
+   */
+  template<int dim>
+  class SquareFunction : public Function<dim>
+  {
+  public:
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+    virtual void vector_value (const Point<dim>   &p,
+                               Vector<double>     &values) const;
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+    virtual void vector_gradient (const Point<dim>   &p,
+                                  std::vector<Tensor<1,dim> >    &gradient) const;
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component = 0) const;
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+    virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                                 std::vector<double>            &values,
+                                 const unsigned int              component = 0) const;
+  };
+
+
+
+  /**
+   * The function <tt>xy</tt> in 2d and 3d, not implemented in 1d. This
+   * function serves as an example for a vanishing Laplacian.
+   *
+   * @ingroup functions
+   * @author: Guido Kanschat, 2000
+   */
+  template<int dim>
+  class Q1WedgeFunction : public Function<dim>
+  {
+  public:
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> > &values) const;
+
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int       component = 0) const;
+
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component = 0) const;
+
+    virtual void vector_gradient_list (const std::vector<Point<dim> > &,
+                                       std::vector<std::vector<Tensor<1,dim> > > &) const;
+
+    /**
+     * Laplacian of the function at one point.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+
+    /**
+     * Laplacian of the function at multiple points.
+     */
+    virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                                 std::vector<double>            &values,
+                                 const unsigned int              component = 0) const;
+  };
+
+
+
+  /**
+   * d-quadratic pillow on the unit hypercube.
+   *
+   * This is a function for testing the implementation. It has zero Dirichlet
+   * boundary values on the domain $(-1,1)^d$. In the inside, it is the
+   * product of $1-x_i^2$ over all space dimensions.
+   *
+   * Providing a non-zero argument to the constructor, the whole function can
+   * be offset by a constant.
+   *
+   * Together with the function, its derivatives and Laplacian are defined.
+   *
+   * @ingroup functions
+   * @author: Guido Kanschat, 1999
+   */
+  template<int dim>
+  class PillowFunction : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. Provide a constant that will be added to each function
+     * value.
+     */
+    PillowFunction (const double offset=0.);
+
+    /**
+     * The value at a single point.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    /**
+     * Gradient at a single point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    /**
+     * Gradients at multiple points.
+     */
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component = 0) const;
+
+    /**
+     * Laplacian at a single point.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+
+    /**
+     * Laplacian at multiple points.
+     */
+    virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                                 std::vector<double>            &values,
+                                 const unsigned int              component = 0) const;
+  private:
+    const double offset;
+  };
+
+
+
+  /**
+   * Cosine-shaped pillow function. This is another function with zero
+   * boundary values on $[-1,1]^d$. In the interior it is the product of
+   * $\cos(\pi/2 x_i)$.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 1999
+   */
+  template<int dim>
+  class CosineFunction : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor which allows to optionally generate a vector valued cosine
+     * function with the same value in each component.
+     */
+    CosineFunction (const unsigned int n_components = 1);
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> > &values) const;
+
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component = 0) const;
+
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+
+    virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                                 std::vector<double>            &values,
+                                 const unsigned int              component = 0) const;
+
+    /**
+     * Second derivatives at a single point.
+     */
+    virtual SymmetricTensor<2,dim> hessian (const Point<dim>   &p,
+                                            const unsigned int  component = 0) const;
+
+    /**
+     * Second derivatives at multiple points.
+     */
+    virtual void hessian_list (const std::vector<Point<dim> > &points,
+                               std::vector<SymmetricTensor<2,dim> >    &hessians,
+                               const unsigned int              component = 0) const;
+  };
+
+
+
+  /**
+   * Gradient of the cosine-shaped pillow function.
+   *
+   * This is a vector-valued function with @p dim components, the gradient of
+   * CosineFunction. On the square [-1,1], it has tangential boundary
+   * conditions zero. Thus, it can be used to test implementations of Maxwell
+   * operators without bothering about boundary terms.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2010
+   */
+  template<int dim>
+  class CosineGradFunction : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor, creating a function with @p dim components.
+     */
+    CosineGradFunction ();
+
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component) const;
+    virtual void vector_value (const Point<dim>   &p,
+                               Vector<double>     &values) const;
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component) const;
+
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> > &values) const;
+
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component) const;
+
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component) const;
+
+    virtual void vector_gradient_list (const std::vector<Point<dim> >            &points,
+                                       std::vector<std::vector<Tensor<1,dim> > > &gradients) const;
+
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component) const;
+  };
+
+
+
+  /**
+   * Product of exponential functions in each coordinate direction.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 1999
+   */
+  template<int dim>
+  class ExpFunction : public Function<dim>
+  {
+  public:
+    /**
+     * The value at a single point.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    /**
+     * Gradient at a single point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    /**
+     * Gradients at multiple points.
+     */
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component = 0) const;
+
+    /**
+     * Laplacian at a single point.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+
+    /**
+     * Laplacian at multiple points.
+     */
+    virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                                 std::vector<double>            &values,
+                                 const unsigned int              component = 0) const;
+  };
+
+
+
+  /**
+   * Harmonic singularity on the L-shaped domain in 2D.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat
+   * @date 1999
+   */
+  class LSingularityFunction : public Function<2>
+  {
+  public:
+    virtual double value (const Point<2>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void value_list (const std::vector<Point<2> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    virtual void vector_value_list (const std::vector<Point<2> > &points,
+                                    std::vector<Vector<double> > &values) const;
+
+    virtual Tensor<1,2> gradient (const Point<2>     &p,
+                                  const unsigned int  component = 0) const;
+
+    virtual void gradient_list (const std::vector<Point<2> > &points,
+                                std::vector<Tensor<1,2> >    &gradients,
+                                const unsigned int            component = 0) const;
+
+    virtual void vector_gradient_list (const std::vector<Point<2> > &,
+                                       std::vector<std::vector<Tensor<1,2> > > &) const;
+
+    virtual double laplacian (const Point<2>   &p,
+                              const unsigned int  component = 0) const;
+
+    virtual void laplacian_list (const std::vector<Point<2> > &points,
+                                 std::vector<double>          &values,
+                                 const unsigned int            component = 0) const;
+  };
+
+
+
+  /**
+   * Gradient of the harmonic singularity on the L-shaped domain in 2D.
+   *
+   * The gradient of LSingularityFunction, which is a vector valued function
+   * with vanishing curl and divergence.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2010
+   */
+  class LSingularityGradFunction : public Function<2>
+  {
+  public:
+    /**
+     * Default constructor setting the dimension to 2.
+     */
+    LSingularityGradFunction ();
+    virtual double value (const Point<2>   &p,
+                          const unsigned int  component) const;
+
+    virtual void value_list (const std::vector<Point<2> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component) const;
+
+    virtual void vector_value_list (const std::vector<Point<2> > &points,
+                                    std::vector<Vector<double> > &values) const;
+
+    virtual Tensor<1,2> gradient (const Point<2>     &p,
+                                  const unsigned int  component) const;
+
+    virtual void gradient_list (const std::vector<Point<2> > &points,
+                                std::vector<Tensor<1,2> >    &gradients,
+                                const unsigned int            component) const;
+
+    virtual void vector_gradient_list (const std::vector<Point<2> > &,
+                                       std::vector<std::vector<Tensor<1,2> > > &) const;
+
+    virtual double laplacian (const Point<2>   &p,
+                              const unsigned int  component) const;
+
+    virtual void laplacian_list (const std::vector<Point<2> > &points,
+                                 std::vector<double>          &values,
+                                 const unsigned int            component) const;
+  };
+
+
+
+  /**
+   * Singularity on the slit domain in 2D and 3D.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 1999, 2006
+   */
+  template <int dim>
+  class SlitSingularityFunction : public Function<dim>
+  {
+  public:
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> > &values) const;
+
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int            component = 0) const;
+
+    virtual void vector_gradient_list (const std::vector<Point<dim> > &,
+                                       std::vector<std::vector<Tensor<1,dim> > > &) const;
+
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+
+    virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                                 std::vector<double>          &values,
+                                 const unsigned int            component = 0) const;
+  };
+
+
+  /**
+   * Singularity on the slit domain with one Neumann boundary in 2D.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2002
+   */
+  class SlitHyperSingularityFunction : public Function<2>
+  {
+  public:
+    virtual double value (const Point<2>   &p,
+                          const unsigned int  component = 0) const;
+
+    virtual void value_list (const std::vector<Point<2> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    virtual void vector_value_list (const std::vector<Point<2> > &points,
+                                    std::vector<Vector<double> > &values) const;
+
+    virtual Tensor<1,2> gradient (const Point<2>   &p,
+                                  const unsigned int  component = 0) const;
+
+    virtual void gradient_list (const std::vector<Point<2> > &points,
+                                std::vector<Tensor<1,2> >    &gradients,
+                                const unsigned int            component = 0) const;
+
+    virtual void vector_gradient_list (const std::vector<Point<2> > &,
+                                       std::vector<std::vector<Tensor<1,2> > > &) const;
+
+    virtual double laplacian (const Point<2>   &p,
+                              const unsigned int  component = 0) const;
+
+    virtual void laplacian_list (const std::vector<Point<2> > &points,
+                                 std::vector<double>          &values,
+                                 const unsigned int            component = 0) const;
+  };
+
+
+
+  /**
+   * A jump in x-direction transported into some direction.
+   *
+   * If the advection is parallel to the y-axis, the function is
+   * <tt>-atan(sx)</tt>, where <tt>s</tt> is the steepness parameter provided
+   * in the constructor.
+   *
+   * For different advection directions, this function will be turned in the
+   * parameter space.
+   *
+   * Together with the function, its derivatives and Laplacian are defined.
+   *
+   * @ingroup functions
+   * @author: Guido Kanschat, 2000
+   */
+  template<int dim>
+  class JumpFunction : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. Provide the advection direction here and the steepness of
+     * the slope.
+     */
+    JumpFunction (const Point<dim> &direction,
+                  const double      steepness);
+
+    /**
+     * Function value at one point.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    /**
+     * Gradient at one point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    /**
+     * Gradients at multiple points.
+     */
+    virtual void gradient_list (const std::vector<Point<dim> > &points,
+                                std::vector<Tensor<1,dim> >    &gradients,
+                                const unsigned int              component = 0) const;
+
+    /**
+     * Laplacian of the function at one point.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+
+    /**
+     * Laplacian of the function at multiple points.
+     */
+    virtual void laplacian_list (const std::vector<Point<dim> > &points,
+                                 std::vector<double>            &values,
+                                 const unsigned int              component = 0) const;
+
+    /**
+     * Return an estimate for the memory consumption, in bytes, of this
+     * object. This is not exact (but will usually be close) because
+     * calculating the memory usage of trees (e.g., <tt>std::map</tt>) is
+     * difficult.
+     */
+    std::size_t memory_consumption () const;
+
+  protected:
+    /**
+     * Advection vector.
+     */
+    const Point<dim> direction;
+
+    /**
+     * Steepness (maximal derivative) of the slope.
+     */
+    const double steepness;
+
+    /**
+     * Advection angle.
+     */
+    double angle;
+
+    /**
+     * Sine of <tt>angle</tt>.
+     */
+    double sine;
+
+    /**
+     * Cosine of <tt>angle</tt>.
+     */
+    double cosine;
+  };
+
+
+
+  /**
+   * Given a wavenumber vector generate a cosine function. The wavenumber
+   * coefficient is given as a $d$-dimensional point $k$ in Fourier space, and
+   * the function is then recovered as $f(x) = \cos(\sum_i k_i x_i) =
+   * Re(\exp(i k.x))$.
+   *
+   * The class has its name from the fact that it resembles one component of a
+   * Fourier cosine decomposition.
+   *
+   * @ingroup functions
+   * @author Wolfgang Bangerth, 2001
+   */
+  template <int dim>
+  class FourierCosineFunction : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. Take the Fourier coefficients in each space direction as
+     * argument.
+     */
+    FourierCosineFunction (const Tensor<1,dim> &fourier_coefficients);
+
+    /**
+     * Return the value of the function at the given point. Unless there is
+     * only one component (i.e. the function is scalar), you should state the
+     * component you want to have evaluated; it defaults to zero, i.e. the
+     * first component.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Return the gradient of the specified component of the function at the
+     * given point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    /**
+     * Compute the Laplacian of a given component at point <tt>p</tt>.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+  private:
+    /**
+     * Stored Fourier coefficients.
+     */
+    const Tensor<1,dim> fourier_coefficients;
+  };
+
+
+
+  /**
+   * Given a wavenumber vector generate a sine function. The wavenumber
+   * coefficient is given as a $d$-dimensional point $k$ in Fourier space, and
+   * the function is then recovered as $f(x) = \sin(\sum_i k_i x_i) =
+   * Im(\exp(i k.x))$.
+   *
+   * The class has its name from the fact that it resembles one component of a
+   * Fourier sine decomposition.
+   *
+   * @ingroup functions
+   * @author Wolfgang Bangerth, 2001
+   */
+  template <int dim>
+  class FourierSineFunction : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. Take the Fourier coefficients in each space direction as
+     * argument.
+     */
+    FourierSineFunction (const Tensor<1,dim> &fourier_coefficients);
+
+    /**
+     * Return the value of the function at the given point. Unless there is
+     * only one component (i.e. the function is scalar), you should state the
+     * component you want to have evaluated; it defaults to zero, i.e. the
+     * first component.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Return the gradient of the specified component of the function at the
+     * given point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    /**
+     * Compute the Laplacian of a given component at point <tt>p</tt>.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+  private:
+    /**
+     * Stored Fourier coefficients.
+     */
+    const Tensor<1,dim> fourier_coefficients;
+  };
+
+
+  /**
+   * Given a sequence of wavenumber vectors and weights generate a sum of sine
+   * functions. Each wavenumber coefficient is given as a $d$-dimensional
+   * point $k$ in Fourier space, and the entire function is then recovered as
+   * $f(x) = \sum_j w_j sin(\sum_i k_i x_i) = Im(\sum_j w_j \exp(i k.x))$.
+   *
+   * @ingroup functions
+   * @author Wolfgang Bangerth, 2001
+   */
+  template <int dim>
+  class FourierSineSum : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. Take the Fourier coefficients in each space direction as
+     * argument.
+     */
+    FourierSineSum (const std::vector<Point<dim> > &fourier_coefficients,
+                    const std::vector<double>      &weights);
+
+    /**
+     * Return the value of the function at the given point. Unless there is
+     * only one component (i.e. the function is scalar), you should state the
+     * component you want to have evaluated; it defaults to zero, i.e. the
+     * first component.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Return the gradient of the specified component of the function at the
+     * given point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    /**
+     * Compute the Laplacian of a given component at point <tt>p</tt>.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+  private:
+    /**
+     * Stored Fourier coefficients and weights.
+     */
+    const std::vector<Point<dim> > fourier_coefficients;
+    const std::vector<double>      weights;
+  };
+
+
+
+  /**
+   * Given a sequence of wavenumber vectors and weights generate a sum of
+   * cosine functions. Each wavenumber coefficient is given as a
+   * $d$-dimensional point $k$ in Fourier space, and the entire function is
+   * then recovered as $f(x) = \sum_j w_j cos(\sum_i k_i x_i) = Re(\sum_j w_j
+   * \exp(i k.x))$.
+   *
+   * @ingroup functions
+   * @author Wolfgang Bangerth, 2001
+   */
+  template <int dim>
+  class FourierCosineSum : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. Take the Fourier coefficients in each space direction as
+     * argument.
+     */
+    FourierCosineSum (const std::vector<Point<dim> > &fourier_coefficients,
+                      const std::vector<double>      &weights);
+
+    /**
+     * Return the value of the function at the given point. Unless there is
+     * only one component (i.e. the function is scalar), you should state the
+     * component you want to have evaluated; it defaults to zero, i.e. the
+     * first component.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Return the gradient of the specified component of the function at the
+     * given point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+    /**
+     * Compute the Laplacian of a given component at point <tt>p</tt>.
+     */
+    virtual double laplacian (const Point<dim>   &p,
+                              const unsigned int  component = 0) const;
+
+  private:
+    /**
+     * Stored Fourier coefficients and weights.
+     */
+    const std::vector<Point<dim> > fourier_coefficients;
+    const std::vector<double>      weights;
+  };
+
+
+  /**
+   * Base function for cut-off function. This class stores the center and the
+   * radius of the supporting ball of a cut-off function. It also stores the
+   * number of the non-zero component, if the function is vector-valued.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2002
+   */
+  template <int dim>
+  class CutOffFunctionBase : public Function<dim>
+  {
+  public:
+    /**
+     * Value used in the constructor of this and derived classes to denote
+     * that no component is selected.
+     */
+    static const unsigned int no_component = numbers::invalid_unsigned_int;
+
+    /**
+     * Constructor. Arguments are the center of the ball and its radius.
+     *
+     * If an argument <tt>select</tt> is given and not -1, the cut-off
+     * function will be non-zero for this component only.
+     */
+    CutOffFunctionBase (const double radius = 1.,
+                        const Point<dim> = Point<dim>(),
+                        const unsigned int n_components = 1,
+                        const unsigned int select = CutOffFunctionBase<dim>::no_component);
+
+    /**
+     * Move the center of the ball to new point <tt>p</tt>.
+     */
+    void new_center (const Point<dim> &p);
+
+    /**
+     * Set the radius of the ball to <tt>r</tt>.
+     */
+    void new_radius (const double r);
+
+  protected:
+    /**
+     * Center of the integration ball.
+     */
+    Point<dim> center;
+
+    /**
+     * Radius of the ball.
+     */
+    double radius;
+
+    /**
+     * Component selected. If <tt>no_component</tt>, the function is the same
+     * in all components.
+     */
+    const unsigned int selected;
+  };
+
+
+
+  /**
+   * Cut-off function in L-infinity for an arbitrary ball.  This function is
+   * the characteristic function of a ball around <tt>center</tt> with a
+   * specified <tt>radius</tt>, that is, \f[ f = \chi(B_r(c)). \f] If vector
+   * valued, it can be restricted to a single component.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2001, 2002
+   */
+  template<int dim>
+  class CutOffFunctionLinfty : public CutOffFunctionBase<dim>
+  {
+  public:
+    /**
+     * Constructor. Arguments are the center of the ball and its radius.
+     *
+     * If an argument <tt>select</tt> is given and not -1, the cut-off
+     * function will be non-zero for this component only.
+     */
+    CutOffFunctionLinfty (const double radius = 1.,
+                          const Point<dim> = Point<dim>(),
+                          const unsigned int n_components = 1,
+                          const unsigned int select = CutOffFunctionBase<dim>::no_component);
+
+    /**
+     * Function value at one point.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> >           &values) const;
+  };
+
+
+  /**
+   * Cut-off function for an arbitrary ball. This function is a cone with
+   * support in a ball of certain <tt>radius</tt> around <tt>center</tt>. The
+   * maximum value is 1. If vector valued, it can be restricted to a single
+   * component.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2001, 2002
+   */
+  template<int dim>
+  class CutOffFunctionW1 : public CutOffFunctionBase<dim>
+  {
+  public:
+    /**
+     * Constructor. Arguments are the center of the ball and its radius.
+     * radius.
+     *
+     * If an argument <tt>select</tt> is given, the cut-off function will be
+     * non-zero for this component only.
+     */
+    CutOffFunctionW1 (const double radius = 1.,
+                      const Point<dim> = Point<dim>(),
+                      const unsigned int n_components = 1,
+                      const unsigned int select = CutOffFunctionBase<dim>::no_component);
+
+    /**
+     * Function value at one point.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> >           &values) const;
+  };
+
+
+  /**
+   * Cut-off function for an arbitrary ball. This is the traditional cut-off
+   * function in C-infinity for a ball of certain <tt>radius</tt> around
+   * <tt>center</tt>, $f(r)=exp(1-1/(1-r**2/s**2))$, where $r$ is the distance
+   * to the center, and $s$ is the radius of the sphere. If vector valued, it
+   * can be restricted to a single component.
+   *
+   * @ingroup functions
+   * @author Guido Kanschat, 2001, 2002
+   */
+  template<int dim>
+  class CutOffFunctionCinfty : public CutOffFunctionBase<dim>
+  {
+  public:
+    /**
+     * Constructor. Arguments are the center of the ball and its radius.
+     * radius.
+     *
+     * If an argument <tt>select</tt> is given, the cut-off function will be
+     * non-zero for this component only.
+     */
+    CutOffFunctionCinfty (const double radius = 1.,
+                          const Point<dim> = Point<dim>(),
+                          const unsigned int n_components = 1,
+                          const unsigned int select = CutOffFunctionBase<dim>::no_component);
+
+    /**
+     * Function value at one point.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void vector_value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<Vector<double> >           &values) const;
+
+    /**
+     * Function gradient at one point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+  };
+
+
+
+  /**
+   * A class that represents a function object for a monomial. Monomials are
+   * polynomials with only a single term, i.e. in 1-d they have the form
+   * $x^\alpha$, in 2-d the form $x_1^{\alpha_1}x_2^{\alpha_2}$, and in 3-d
+   * $x_1^{\alpha_1}x_2^{\alpha_2}x_3^{\alpha_3}$. Monomials are therefore
+   * described by a $dim$-tuple of exponents. Consequently, the class's
+   * constructor takes a Tensor<1,dim> to describe the set of exponents. Most
+   * of the time these exponents will of course be integers, but real
+   * exponents are of course equally valid. Exponents can't be real when the
+   * bases are negative numbers.
+   *
+   * @author Wolfgang Bangerth, 2006
+   */
+  template <int dim>
+  class Monomial : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. The first argument is explained in the general description
+     * of the class. The second argument denotes the number of vector
+     * components this object shall represent. All vector components will have
+     * the same value.
+     */
+    Monomial (const Tensor<1,dim> &exponents,
+              const unsigned int n_components = 1);
+
+    /**
+     * Function value at one point.
+     */
+    virtual double value (const Point<dim>   &p,
+                          const unsigned int  component = 0) const;
+
+    /**
+     * Return all components of a vector-valued function at a given point.
+     *
+     * <tt>values</tt> shall have the right size beforehand, i.e.
+     * #n_components.
+     */
+    virtual void vector_value (const Point<dim>   &p,
+                               Vector<double>     &values) const;
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component = 0) const;
+
+    /**
+     * Function gradient at one point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim>   &p,
+                                    const unsigned int  component = 0) const;
+
+  private:
+    /**
+     * The set of exponents.
+     */
+    const Tensor<1,dim> exponents;
+  };
+
+
+
+  /**
+   * A scalar function that computes its values by (bi-, tri-)linear
+   * interpolation from a set of point data that are arranged on a possibly
+   * non-uniform tensor product mesh. In other words, considering the three-
+   * dimensional case, let there be points $x_0,\ldots, x_{K-1}$,
+   * $y_0,\ldots,y_{L-1}$, $z_1,\ldots,z_{M-1}$, and data $d_{klm}$ defined at
+   * point $(x_k,y_l,z_m)^T$, then evaluating the function at a point $\mathbf
+   * x=(x,y,z)$ will find the box so that $x_k\le x\le x_{k+1}, y_l\le y\le
+   * y_{l+1}, z_m\le z\le z_{m+1}$, and do a trilinear interpolation of the
+   * data on this cell. Similar operations are done in lower dimensions.
+   *
+   * This class is most often used for either evaluating coefficients or right
+   * hand sides that are provided experimentally at a number of points inside
+   * the domain, or for comparing outputs of a solution on a finite element
+   * mesh against previously obtained data defined on a grid.
+   *
+   * @note If the points $x_i$ are actually equally spaced on an interval
+   * $[x_0,x_1]$ and the same is true for the other data points in higher
+   * dimensions, you should use the InterpolatedUniformGridData class instead.
+   *
+   * If a point is requested outside the box defined by the end points of the
+   * coordinate arrays, then the function is assumed to simply extend by
+   * constant values beyond the last data point in each coordinate direction.
+   * (The class does not throw an error if a point lies outside the box since
+   * it frequently happens that a point lies just outside the box by an amount
+   * on the order of numerical roundoff.)
+   *
+   * @note The use of the related class InterpolatedUniformGridData is
+   * discussed in step-53.
+   *
+   * @author Wolfgang Bangerth, 2013
+   */
+  template <int dim>
+  class InterpolatedTensorProductGridData : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor.
+     * @param coordinate_values An array of dim arrays. Each of the inner
+     * arrays contains the coordinate values $x_0,\ldots, x_{K-1}$ and
+     * similarly for the other coordinate directions. These arrays need not
+     * have the same size. Obviously, we need dim such arrays for a dim-
+     * dimensional function object. The coordinate values within this array
+     * are assumed to be strictly ascending to allow for efficient lookup.
+     * @param data_values A dim-dimensional table of data at each of the mesh
+     * points defined by the coordinate arrays above. Note that the Table
+     * class has a number of conversion constructors that allow converting
+     * other data types into a table where you specify this argument.
+     */
+    InterpolatedTensorProductGridData (const std_cxx11::array<std::vector<double>,dim> &coordinate_values,
+                                       const Table<dim,double>                         &data_values);
+
+    /**
+     * Compute the value of the function set by bilinear interpolation of the
+     * given data set.
+     *
+     * @param p The point at which the function is to be evaluated.
+     * @param component The vector component. Since this function is scalar,
+     * only zero is a valid argument here.
+     * @return The interpolated value at this point. If the point lies outside
+     * the set of coordinates, the function is extended by a constant.
+     */
+    virtual
+    double
+    value (const Point<dim> &p,
+           const unsigned int component = 0) const;
+
+    /**
+     * Compute the gradient of the function defined by bilinear interpolation
+     * of the given data set.
+     *
+     * @param p The point at which the function gradient is to be evaluated.
+     * @param component The vector component. Since this function is scalar,
+     * only zero is a valid argument here.
+     * @return The value of the gradient of the interpolated function at this
+     * point. If the point lies outside the set of coordinates, the function
+     * is extended by a constant and so its gradient is extended by 0.
+     */
+    virtual
+    Tensor<1, dim>
+    gradient (const Point<dim>    &p,
+              const unsigned int component = 0) const;
+
+  private:
+    /**
+     * The set of coordinate values in each of the coordinate directions.
+     */
+    const std_cxx11::array<std::vector<double>,dim> coordinate_values;
+
+    /**
+     * The data that is to be interpolated.
+     */
+    const Table<dim,double>                     data_values;
+  };
+
+
+  /**
+   * A scalar function that computes its values by (bi-, tri-)linear
+   * interpolation from a set of point data that are arranged on a uniformly
+   * spaced tensor product mesh. In other words, considering the three-
+   * dimensional case, let there be points $x_0,\ldots, x_{K-1}$ that result
+   * from a uniform subdivision of the interval $[x_0,x_{K-1}]$ into $K-1$
+   * sub-intervals of size $\Delta x = (x_{K-1}-x_0)/(K-1)$, and similarly
+   * $y_0,\ldots,y_{L-1}$, $z_1,\ldots,z_{M-1}$. Also consider data $d_{klm}$
+   * defined at point $(x_k,y_l,z_m)^T$, then evaluating the function at a
+   * point $\mathbf x=(x,y,z)$ will find the box so that $x_k\le x\le x_{k+1},
+   * y_l\le y\le y_{l+1}, z_m\le z\le z_{m+1}$, and do a trilinear
+   * interpolation of the data on this cell. Similar operations are done in
+   * lower dimensions.
+   *
+   * This class is most often used for either evaluating coefficients or right
+   * hand sides that are provided experimentally at a number of points inside
+   * the domain, or for comparing outputs of a solution on a finite element
+   * mesh against previously obtained data defined on a grid.
+   *
+   * @note If you have a problem where the points $x_i$ are not equally spaced
+   * (e.g., they result from a computation on a graded mesh that is denser
+   * closer to one boundary), then use the InterpolatedTensorProductGridData
+   * class instead.
+   *
+   * If a point is requested outside the box defined by the end points of the
+   * coordinate arrays, then the function is assumed to simply extend by
+   * constant values beyond the last data point in each coordinate direction.
+   * (The class does not throw an error if a point lies outside the box since
+   * it frequently happens that a point lies just outside the box by an amount
+   * on the order of numerical roundoff.)
+   *
+   * @note The use of this class is discussed in step-53.
+   *
+   * @author Wolfgang Bangerth, 2013
+   */
+  template <int dim>
+  class InterpolatedUniformGridData : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor
+     * @param interval_endpoints The left and right end points of the
+     * (uniformly subdivided) intervals in each of the coordinate directions.
+     * @param n_subintervals The number of subintervals in each coordinate
+     * direction. A value of one for a coordinate means that the interval is
+     * considered as one subinterval consisting of the entire range. A value
+     * of two means that there are two subintervals each with one half of the
+     * range, etc.
+     * @param data_values A dim-dimensional table of data at each of the mesh
+     * points defined by the coordinate arrays above. Note that the Table
+     * class has a number of conversion constructors that allow converting
+     * other data types into a table where you specify this argument.
+     */
+    InterpolatedUniformGridData (const std_cxx11::array<std::pair<double,double>,dim> &interval_endpoints,
+                                 const std_cxx11::array<unsigned int,dim>             &n_subintervals,
+                                 const Table<dim,double>                              &data_values);
+
+    /**
+     * Compute the value of the function set by bilinear interpolation of the
+     * given data set.
+     *
+     * @param p The point at which the function is to be evaluated.
+     * @param component The vector component. Since this function is scalar,
+     * only zero is a valid argument here.
+     * @return The interpolated value at this point. If the point lies outside
+     * the set of coordinates, the function is extended by a constant.
+     */
+    virtual
+    double
+    value (const Point<dim> &p,
+           const unsigned int component = 0) const;
+
+  private:
+    /**
+     * The set of interval endpoints in each of the coordinate directions.
+     */
+    const std_cxx11::array<std::pair<double,double>,dim> interval_endpoints;
+
+    /**
+     * The number of subintervals in each of the coordinate directions.
+     */
+    const std_cxx11::array<unsigned int,dim>             n_subintervals;
+
+    /**
+     * The data that is to be interpolated.
+     */
+    const Table<dim,double>                     data_values;
+  };
+
+
+  /**
+   * A class that represents a function object for a polynomial. A polynomial
+   * is composed by the summation of multiple monomials. If the polynomial has
+   * n monomials and the dimension is equal to dim, the polynomial can be
+   * written as $\sum_{i=1}^{n} a_{i}(\prod_{d=1}^{dim}
+   * x_{d}^{\alpha_{i,d}})$, where $a_{i}$ are the coefficients of the
+   * monomials and $\alpha_{i,d}$ are their exponents. The class's constructor
+   * takes a Table<2,double> to describe the set of exponents and a
+   * Vector<double> to describe the set of coefficients.
+   *
+   * @author Ángel Rodríguez, 2015
+   */
+  template <int dim>
+  class Polynomial : public Function<dim>
+  {
+  public:
+    /**
+     * Constructor. The coefficients and the exponents of the polynomial are
+     * passed as arguments. The Table<2, double> exponents has a number of
+     * rows equal to the number of monomials of the polynomial and a number of
+     * columns equal to dim. The i-th row of the exponents table contains the
+     * ${\alpha_{i,d}}$ exponents of the i-th monomial $a_{i}\prod_{d=1}^{dim}
+     * x_{d}^{\alpha_{i,d}}$. The i-th element of the coefficients vector
+     * contains the coefficient $a_{i}$ for the i-th monomial.
+     */
+    Polynomial (const Table<2,double>     &exponents,
+                const std::vector<double> &coefficients);
+
+    /**
+     * Function value at one point.
+     */
+    virtual double value (const Point<dim> &p,
+                          const unsigned int component = 0) const;
+
+
+    /**
+     * Function values at multiple points.
+     */
+    virtual void value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>      &values,
+                             const unsigned int       component = 0) const;
+
+    /**
+     * Function gradient at one point.
+     */
+    virtual Tensor<1,dim> gradient (const Point<dim> &p,
+                                    const unsigned int component = 0) const;
+
+  private:
+
+    /**
+     * The set of exponents.
+     */
+    const Table<2,double> exponents;
+
+    /**
+     * The set of coefficients.
+     */
+    const std::vector<double> coefficients;
+  };
+
+
+
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/function_parser.h b/include/deal.II/base/function_parser.h
new file mode 100644
index 0000000..5ae7291
--- /dev/null
+++ b/include/deal.II/base/function_parser.h
@@ -0,0 +1,384 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_parser_h
+#define dealii__function_parser_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/thread_local_storage.h>
+#include <vector>
+#include <map>
+
+namespace mu
+{
+  class Parser;
+}
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename> class Vector;
+
+
+/**
+ * This class implements a function object that gets its value by parsing a
+ * string describing this function. It is a wrapper class for the muparser
+ * library (see http://muparser.beltoforion.de/). This class lets you evaluate
+ * strings such as "sqrt(1-x^2+y^2)" for given values of 'x' and 'y'.  Please
+ * refer to the muparser documentation for more information.  This class is
+ * used in the step-33 and step-36 tutorial programs (the latter being much
+ * simpler to understand).
+ *
+ * The following examples shows how to use this class:
+ * @code
+ * // set up problem:
+ * std::string variables = "x,y";
+ * std::string expression = "cos(x)+sqrt(y)";
+ * std::map<std::string,double> constants;
+ *
+ * // FunctionParser with 2 variables and 1 component:
+ * FunctionParser<2> fp(1);
+ * fp.initialize(variables,
+ *               expression,
+ *               constants);
+ *
+ * // Point at which we want to evaluate the function
+ * Point<2> point(0.0, 4.0);
+ *
+ * // evaluate the expression at 'point':
+ * double result = fp.value(point);
+ *
+ * deallog << "Function '" << expression << "'"
+ *         << " @ " << point
+ *         << " is " << result << std::endl;
+ * @endcode
+ * The second example is a bit more complex:
+ * @code
+ * // Define some constants that will be used by the function parser
+ * std::map<std::string,double> constants;
+ * constants["pi"] = numbers::PI;
+ *
+ * // Define the variables that will be used inside the expressions
+ * std::string variables = "x,y,z";
+ *
+ * // Define the expressions of the individual components of a
+ * // vector valued function with two components:
+ * std::vector<std::string> expressions(2);
+ * expressions[0] = "sin(2*pi*x)+sinh(pi*z)";
+ * expressions[1] = "sin(2*pi*y)*exp(x^2)";
+ *
+ * // function parser with 3 variables and 2 components
+ * FunctionParser<3> vector_function(2);
+ *
+ * // And populate it with the newly created objects.
+ * vector_function.initialize(variables,
+ *                            expressions,
+ *                            constants);
+ *
+ * // Point at which we want to evaluate the function
+ * Point<3> point(0.0, 1.0, 1.0);
+ *
+ * // This Vector will store the result
+ * Vector<double> result(2);
+ *
+ * // Fill 'result' by evaluating the function
+ * vector_function.vector_value(point, result);
+ *
+ * // We can also only evaluate the 2nd component:
+ * const double c = vector_function.value(point, 1);
+ *
+ * // Output the evaluated function
+ * deallog << "Function '" << expressions[0] << "," << expressions[1] << "'"
+ *         << " at " << point
+ *         << " is " << result << std::endl;
+ * @endcode
+ *
+ * This class overloads the virtual methods value() and vector_value() of the
+ * Function base class with the byte compiled versions of the expressions
+ * given to the initialize() methods. Note that the class will not work unless
+ * you first call the initialize() method that accepts the text description of
+ * the function as an argument (among other things).
+ *
+ * The syntax to describe a function follows usual programming practice, and
+ * is explained in detail at the homepage of the underlying muparser library
+ * at http://muparser.beltoforion.de/ .
+ *
+ * For a wrapper of the FunctionParser class that supports ParameterHandler,
+ * see ParsedFunction.
+ *
+ * Vector-valued functions can either be declared using strings where the
+ * function components are separated by semicolons, or using a vector of
+ * strings each defining one vector component.
+ *
+ * An example of time dependent scalar function is the following:
+ * @code
+ *    // Empty constants object
+ *    std::map<std::string> constants;
+ *
+ *    // Variables that will be used inside the expressions
+ *    std::string variables = "x,y,t";
+ *
+ *    // Define the expression of the scalar time dependent function.
+ *    std::string expression = "exp(y*x)*exp(-t)";
+ *
+ *    // Generate an empty scalar function
+ *    FunctionParser<2> function;
+ *
+ *    // And populate it with the newly created objects.
+ *    function.initialize(variables,
+ *                        expression,
+ *                        constants,
+ *                        true);        // This tells the parser that
+ *                                      // it is a time-dependent function
+ *                                      // and there is another variable
+ *                                      // to be taken into account (t).
+ * @endcode
+ *
+ * The following is another example of how to instantiate a vector valued
+ * function by using a single string:
+ * @code
+ *    // Empty constants object
+ *    std::map<std::string> constants;
+ *
+ *    // Variables that will be used inside the expressions
+ *    std::string variables = "x,y";
+ *
+ *    // Define the expression of the vector valued  function.
+ *    std::string expression = "cos(2*pi*x)*y^2; sin(2*pi*x)*exp(y)";
+ *
+ *    // Generate an empty vector valued function
+ *    FunctionParser<2> function(2);
+ *
+ *    // And populate it with the newly created objects.
+ *    function.initialize(variables,
+ *                        expression,
+ *                        constants);
+ * @endcode
+ *
+ *
+ * @ingroup functions
+ * @author Luca Heltai, Timo Heister 2005, 2014
+ */
+template <int dim>
+class FunctionParser : public Function<dim>
+{
+public:
+  /**
+   * Constructor for Parsed functions. Its arguments are the same of the base
+   * class Function. The only difference is that this object needs to be
+   * initialized with initialize() method before you can use it. If an attempt
+   * to use this function is made before the initialize() method has been
+   * called, then an exception is thrown.
+   */
+  FunctionParser (const unsigned int n_components = 1,
+                  const double       initial_time = 0.0);
+
+  /**
+   * Destructor. Explicitly delete the FunctionParser objects (there is one
+   * for each component of the function).
+   */
+  ~FunctionParser();
+
+  /**
+   * Type for the constant map. Used by the initialize() method.
+   */
+  typedef std::map<std::string, double> ConstMap;
+
+  /**
+   * Iterator for the constants map. Used by the initialize() method.
+   */
+  typedef ConstMap::iterator ConstMapIterator;
+
+  /**
+   * Initialize the function.  This methods accepts the following parameters:
+   *
+   * <b>vars</b>: a string with the variables that will be used by the
+   * expressions to be evaluated. Note that the variables can have any name
+   * (of course different from the function names defined above!), but the
+   * order IS important. The first variable will correspond to the first
+   * component of the point in which the function is evaluated, the second
+   * variable to the second component and so forth. If this function is also
+   * time dependent, then it is necessary to specify it by setting the
+   * <tt>time_dependent</tt> parameter to true.  An exception is thrown if the
+   * number of variables specified here is different from dim (if this
+   * function is not time-dependent) or from dim+1 (if it is time- dependent).
+   *
+   * <b>expressions</b>: a list of strings containing the expressions that
+   * will be byte compiled by the internal parser (FunctionParser). Note that
+   * the size of this vector must match exactly the number of components of
+   * the FunctionParser, as declared in the constructor. If this is not the
+   * case, an exception is thrown.
+   *
+   * <b>constants</b>: a map of constants used to pass any necessary constant
+   * that we want to specify in our expressions (in the example above the
+   * number pi). An expression is valid if and only if it contains only
+   * defined variables and defined constants (other than the functions
+   * specified above). If a constant is given whose name is not valid (eg:
+   * <tt>constants["sin"] = 1.5;</tt>) an exception is thrown.
+   *
+   * <b>time_dependent</b>. If this is a time dependent function, then the
+   * last variable declared in <b>vars</b> is assumed to be the time variable,
+   * and this->get_time() is used to initialize it when evaluating the
+   * function. Naturally the number of variables parsed by the initialize()
+   * method in this case is dim+1. The value of this parameter defaults to
+   * false, i.e. do not consider time.
+   */
+  void initialize (const std::string              &vars,
+                   const std::vector<std::string> &expressions,
+                   const ConstMap                 &constants,
+                   const bool time_dependent = false);
+
+  /**
+   * Initialize the function. Same as above, but accepts a string rather than
+   * a vector of strings. If this is a vector valued function, its components
+   * are expected to be separated by a semicolon. An exception is thrown if
+   * this method is called and the number of components successfully parsed
+   * does not match the number of components of the base function.
+   */
+  void initialize (const std::string &vars,
+                   const std::string &expression,
+                   const ConstMap    &constants,
+                   const bool time_dependent = false);
+
+  /**
+   * A function that returns default names for variables, to be used in the
+   * first argument of the initialize() functions: it returns "x" in 1d, "x,y"
+   * in 2d, and "x,y,z" in 3d.
+   */
+  static
+  std::string
+  default_variable_names ();
+
+  /**
+   * Return the value of the function at the given point. Unless there is only
+   * one component (i.e. the function is scalar), you should state the
+   * component you want to have evaluated; it defaults to zero, i.e. the first
+   * component.
+   */
+  virtual double value (const Point<dim>   &p,
+                        const unsigned int  component = 0) const;
+
+  /**
+   * Return all components of a vector-valued function at the given point @p
+   * p.
+   *
+   * <tt>values</tt> shall have the right size beforehand, i.e. #n_components.
+   */
+  virtual void vector_value (const Point<dim>   &p,
+                             Vector<double>     &values) const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+  DeclException2 (ExcParseError,
+                  int, char *,
+                  << "Parsing Error at Column " << arg1
+                  << ". The parser said: " << arg2);
+
+  DeclException2 (ExcInvalidExpressionSize,
+                  int, int,
+                  << "The number of components (" << arg1
+                  << ") is not equal to the number of expressions ("
+                  << arg2 << ").");
+
+  //@}
+
+private:
+#ifdef DEAL_II_WITH_MUPARSER
+  /**
+   * Place for the variables for each thread
+   */
+  mutable Threads::ThreadLocalStorage<std::vector<double> > vars;
+
+  /**
+   * The muParser objects for each thread (and one for each component)
+   */
+  mutable Threads::ThreadLocalStorage<std::vector<mu::Parser> > fp;
+
+  /**
+   * An array to keep track of all the constants, required to initialize fp in
+   * each thread.
+   */
+  std::map<std::string, double> constants;
+
+  /**
+   * An array for the variable names, required to initialize fp in each
+   * thread.
+   */
+  std::vector<std::string> var_names;
+
+  /**
+   * An array of function expressions (one per component), required to
+   * initialize fp in each thread.
+   */
+  std::vector<std::string> expressions;
+
+  /**
+   * Initialize fp and vars on the current thread. This function may only be
+   * called once per thread. A thread can test whether the function has
+   * already been called by testing whether 'fp.get().size()==0' (not
+   * initialized) or >0 (already initialized).
+   */
+  void init_muparser() const;
+#endif
+
+  /**
+   * State of usability. This variable is checked every time the function is
+   * called for evaluation. It's set to true in the initialize() methods.
+   */
+  bool initialized;
+
+  /**
+   * Number of variables. If this is also a function of time, then the number
+   * of variables is dim+1, otherwise it is dim. In the case that this is a
+   * time dependent function, the time is supposed to be the last variable. If
+   * #n_vars is not identical to the number of the variables parsed by the
+   * initialize() method, then an exception is thrown.
+   */
+  unsigned int n_vars;
+};
+
+
+template <int dim>
+std::string
+FunctionParser<dim>::default_variable_names ()
+{
+  switch (dim)
+    {
+    case 1:
+      return "x";
+    case 2:
+      return "x,y";
+    case 3:
+      return "x,y,z";
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return "";
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+
diff --git a/include/deal.II/base/function_time.h b/include/deal.II/base/function_time.h
new file mode 100644
index 0000000..1c80ddd
--- /dev/null
+++ b/include/deal.II/base/function_time.h
@@ -0,0 +1,124 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_time_h
+#define dealii__function_time_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+/**
+ * Support for time dependent functions. The library was also designed for
+ * time dependent problems. For this purpose, the function objects also
+ * contain a field which stores the time, as well as functions manipulating
+ * them. Time independent problems should not access or even abuse them for
+ * other purposes, but since one normally does not create thousands of
+ * function objects, the gain in generality weighs out the fact that we need
+ * not store the time value for not time dependent problems. The second
+ * advantage is that the derived standard classes like <tt>ZeroFunction</tt>,
+ * <tt>ConstantFunction</tt> etc also work for time dependent problems.
+ *
+ * Access to the time goes through the following functions:
+ *  @verbatim
+ *  <li> <tt>get_time</tt>: return the present value of the time variable.
+ *  <li> <tt>set_time</tt>: set the time value to a specific value.
+ *  <li> <tt>advance_time</tt>: increase the time by a certain time step.
+ *  @endverbatim
+ * The latter two functions are virtual, so that derived classes can perform
+ * computations which need only be done once for every new time. For example,
+ * if a time dependent function had a factor <tt>sin(t)</tt>, then it may be a
+ * reasonable choice to calculate this factor in a derived version of
+ * set_time(), store it in a member variable and use that one rather than
+ * computing it every time <tt>value()</tt>, <tt>value_list</tt> or one of the
+ * other functions of class Function is called.
+ *
+ * By default, the advance_time() function calls the set_time() function with
+ * the new time, so it is sufficient in most cases to overload only set_time()
+ * for computations as sketched out above.
+ *
+ * The constructor of this class takes an initial value for the time variable,
+ * which defaults to zero. Because a default value is given, none of the
+ * derived classes needs to take an initial value for the time variable if not
+ * needed.
+ *
+ * @tparam Number The data type in which time values are to be stored. This
+ * will, in almost all cases, simply be the default @p double, but there are
+ * cases where one may want to store the time in a different (and always
+ * scalar) type. An example would be an interval type that can store a value
+ * as well as its uncertainty. Another example would be a type that allows for
+ * Automatic Differentiation (see, for example, the Sacado type used in
+ * step-33) and thereby can generate analytic (temporal) derivatives of a
+ * function.
+ *
+ *
+ * @ingroup functions
+ * @author Wolfgang Bangerth, Guido Kanschat, 1998, 1999
+ */
+template <typename Number=double>
+class FunctionTime
+{
+public:
+  /**
+   * Constructor. May take an initial value for the time variable, which
+   * defaults to zero.
+   */
+  FunctionTime (const Number initial_time = Number(0.0));
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~FunctionTime();
+
+  /**
+   * Return the value of the time variable.
+   */
+  Number get_time () const;
+
+  /**
+   * Set the time to <tt>new_time</tt>, overwriting the old value.
+   */
+  virtual void set_time (const Number new_time);
+
+  /**
+   * Advance the time by the given time step <tt>delta_t</tt>.
+   */
+  virtual void advance_time (const Number delta_t);
+
+private:
+  /**
+   * Store the present time.
+   */
+  Number time;
+};
+
+
+
+/*------------------------------ Inline functions ------------------------------*/
+
+#ifndef DOXYGEN
+
+template<typename Number>
+inline Number
+FunctionTime<Number>::get_time () const
+{
+  return time;
+}
+
+#endif
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/function_time.templates.h b/include/deal.II/base/function_time.templates.h
new file mode 100644
index 0000000..5ec72ad
--- /dev/null
+++ b/include/deal.II/base/function_time.templates.h
@@ -0,0 +1,56 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_time_templates_h
+#define dealii__function_time_templates_h
+
+#include <deal.II/base/function_time.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<typename Number>
+FunctionTime<Number>::FunctionTime(const Number initial_time)
+  :
+  time(initial_time)
+{}
+
+
+
+template<typename Number>
+FunctionTime<Number>::~FunctionTime()
+{}
+
+
+
+template<typename Number>
+void
+FunctionTime<Number>::set_time (const Number new_time)
+{
+  time = new_time;
+}
+
+
+template<typename Number>
+void
+FunctionTime<Number>::advance_time (const Number delta_t)
+{
+  set_time (time+delta_t);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif /* dealii__function_time_templates_h */
diff --git a/include/deal.II/base/geometry_info.h b/include/deal.II/base/geometry_info.h
new file mode 100644
index 0000000..5b78036
--- /dev/null
+++ b/include/deal.II/base/geometry_info.h
@@ -0,0 +1,2663 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__geometry_info_h
+#define dealii__geometry_info_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/point.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A class that can represent the kinds of objects a triangulation is made up
+ * of: vertices, lines, quads and hexes.
+ *
+ * The class is rather primitive: it only stores a single integer that
+ * represents the dimensionality of the object represented. In other words,
+ * this class is useful primarily as a way to pass around an object whose data
+ * type explains what it does (unlike just passing around an integer), and for
+ * providing symbolic names for these objects such as
+ * GeometryPrimitive::vertex instead of an integer zero.
+ *
+ * Since the ability to identify such objects with the integral dimension of
+ * the object represented, this class provides conversion operators to and
+ * from unsigned integers.
+ *
+ * @author Wolfgang Bangerth, 2014
+ */
+class GeometryPrimitive
+{
+public:
+  /**
+   * An enumeration providing symbolic names for the objects that can be
+   * represented by this class. The numeric values of these symbolic names
+   * equal the geometric dimensionality of the represented objects to make
+   * conversion from and to integer variables simpler.
+   */
+  enum Object
+  {
+    vertex = 0,
+    line   = 1,
+    quad   = 2,
+    hex    = 3
+  };
+
+  /**
+   * Constructor. Initialize the object with the given argument representing a
+   * vertex, line, etc.
+   */
+  GeometryPrimitive (const Object object);
+
+  /**
+   * Constructor. Initialize the object with an integer that should represent
+   * the dimensionality of the geometric object in question. This will usually
+   * be a number between zero (a vertex) and three (a hexahedron).
+   */
+  GeometryPrimitive (const unsigned int object_dimension);
+
+  /**
+   * Return the integral dimension of the object currently represented, i.e.
+   * zero for a vertex, one for a line, etc.
+   */
+  operator unsigned int () const;
+
+private:
+  /**
+   * The object currently represented.
+   */
+  Object object;
+};
+
+
+
+
+/**
+ * A class that provides possible choices for isotropic and anisotropic
+ * refinement flags in the current space dimension.
+ *
+ * This general template is unused except in some weird template constructs.
+ * Actual is made, however, of the specializations
+ * <code>RefinementPossibilities@<1@></code>,
+ * <code>RefinementPossibilities@<2@></code>, and
+ * <code>RefinementPossibilities@<3@></code>.
+ *
+ * @ingroup aniso
+ * @author Ralf Hartmann, 2005, Wolfgang Bangerth, 2007
+ */
+template <int dim>
+struct RefinementPossibilities
+{
+  /**
+   * Possible values for refinement cases in the current dimension.
+   *
+   * Note the construction of the values: the lowest bit describes a cut of
+   * the x-axis, the second to lowest bit corresponds to a cut of the y-axis
+   * and the third to lowest bit corresponds to a cut of the z-axis. Thus, the
+   * following relations hold (among others):
+   *
+   * @code
+   * cut_xy  == cut_x  | cut_y
+   * cut_xyz == cut_xy | cut_xz
+   * cut_x   == cut_xy & cut_xz
+   * @endcode
+   *
+   * Only those cuts that are reasonable in a given space dimension are
+   * offered, of course.
+   *
+   * In addition, the tag <code>isotropic_refinement</code> denotes isotropic
+   * refinement in the space dimension selected by the template argument of
+   * this class.
+   */
+  enum Possibilities
+  {
+    no_refinement= 0,
+
+    isotropic_refinement = static_cast<unsigned char>(-1)
+  };
+};
+
+
+
+/**
+ * A class that provides possible choices for isotropic and anisotropic
+ * refinement flags in the current space dimension.
+ *
+ * This specialization is used for <code>dim=1</code>, where it offers
+ * refinement in x-direction.
+ *
+ * @ingroup aniso
+ * @author Ralf Hartmann, 2005, Wolfgang Bangerth, 2007
+ */
+template <>
+struct RefinementPossibilities<1>
+{
+  /**
+   * Possible values for refinement cases in the current dimension.
+   *
+   * Note the construction of the values: the lowest bit describes a cut of
+   * the x-axis, the second to lowest bit corresponds to a cut of the y-axis
+   * and the third to lowest bit corresponds to a cut of the z-axis. Thus, the
+   * following relations hold (among others):
+   *
+   * @code
+   * cut_xy  == cut_x  | cut_y
+   * cut_xyz == cut_xy | cut_xz
+   * cut_x   == cut_xy & cut_xz
+   * @endcode
+   *
+   * Only those cuts that are reasonable in a given space dimension are
+   * offered, of course.
+   *
+   * In addition, the tag <code>isotropic_refinement</code> denotes isotropic
+   * refinement in the space dimension selected by the template argument of
+   * this class.
+   */
+  enum Possibilities
+  {
+    no_refinement= 0,
+    cut_x        = 1,
+
+    isotropic_refinement = cut_x
+  };
+};
+
+
+
+/**
+ * A class that provides possible choices for isotropic and anisotropic
+ * refinement flags in the current space dimension.
+ *
+ * This specialization is used for <code>dim=2</code>, where it offers
+ * refinement in x- and y-direction separately, as well as isotropic
+ * refinement in both directions at the same time.
+ *
+ * @ingroup aniso
+ * @author Ralf Hartmann, 2005, Wolfgang Bangerth, 2007
+ */
+template <>
+struct RefinementPossibilities<2>
+{
+  /**
+   * Possible values for refinement cases in the current dimension.
+   *
+   * Note the construction of the values: the lowest bit describes a cut of
+   * the x-axis, the second to lowest bit corresponds to a cut of the y-axis
+   * and the third to lowest bit corresponds to a cut of the z-axis. Thus, the
+   * following relations hold (among others):
+   *
+   * @code
+   * cut_xy  == cut_x  | cut_y
+   * cut_xyz == cut_xy | cut_xz
+   * cut_x   == cut_xy & cut_xz
+   * @endcode
+   *
+   * Only those cuts that are reasonable in a given space dimension are
+   * offered, of course.
+   *
+   * In addition, the tag <code>isotropic_refinement</code> denotes isotropic
+   * refinement in the space dimension selected by the template argument of
+   * this class.
+   */
+  enum Possibilities
+  {
+    no_refinement= 0,
+    cut_x        = 1,
+    cut_y        = 2,
+    cut_xy       = cut_x | cut_y,
+
+    isotropic_refinement = cut_xy
+  };
+};
+
+
+
+/**
+ * A class that provides possible choices for isotropic and anisotropic
+ * refinement flags in the current space dimension.
+ *
+ * This specialization is used for <code>dim=3</code>, where it offers
+ * refinement in x-, y- and z-direction separately, as well as combinations of
+ * these and isotropic refinement in all directions at the same time.
+ *
+ * @ingroup aniso
+ * @author Ralf Hartmann, 2005, Wolfgang Bangerth, 2007
+ */
+template <>
+struct RefinementPossibilities<3>
+{
+  /**
+   * Possible values for refinement cases in the current dimension.
+   *
+   * Note the construction of the values: the lowest bit describes a cut of
+   * the x-axis, the second to lowest bit corresponds to a cut of the y-axis
+   * and the third to lowest bit corresponds to a cut of the z-axis. Thus, the
+   * following relations hold (among others):
+   *
+   * @code
+   * cut_xy  == cut_x  | cut_y
+   * cut_xyz == cut_xy | cut_xz
+   * cut_x   == cut_xy & cut_xz
+   * @endcode
+   *
+   * Only those cuts that are reasonable in a given space dimension are
+   * offered, of course.
+   *
+   * In addition, the tag <code>isotropic_refinement</code> denotes isotropic
+   * refinement in the space dimension selected by the template argument of
+   * this class.
+   */
+  enum Possibilities
+  {
+    no_refinement= 0,
+    cut_x        = 1,
+    cut_y        = 2,
+    cut_xy       = cut_x | cut_y,
+    cut_z        = 4,
+    cut_xz       = cut_x | cut_z,
+    cut_yz       = cut_y | cut_z,
+    cut_xyz      = cut_x | cut_y | cut_z,
+
+    isotropic_refinement = cut_xyz
+  };
+};
+
+
+
+/**
+ * A class storing the possible anisotropic and isotropic refinement cases of
+ * an object with <code>dim</code> dimensions (for example, for a line
+ * <code>dim=1</code> in whatever space dimension we are, for a quad
+ * <code>dim=2</code>, etc.). Possible values of this class are the ones
+ * listed in the enumeration declared within the class.
+ *
+ * @ingroup aniso
+ * @author Ralf Hartmann, 2005, Wolfgang Bangerth, 2007
+ */
+template <int dim>
+class RefinementCase : public RefinementPossibilities<dim>
+{
+public:
+  /**
+   * Default constructor. Initialize the refinement case with no_refinement.
+   */
+  RefinementCase ();
+
+  /**
+   * Constructor. Take and store a value indicating a particular refinement
+   * from the list of possible refinements specified in the base class.
+   */
+  RefinementCase (const typename RefinementPossibilities<dim>::Possibilities refinement_case);
+
+  /**
+   * Constructor. Take and store a value indicating a particular refinement as
+   * a bit field. To avoid implicit conversions to and from integral values,
+   * this constructor is marked as explicit.
+   */
+  explicit RefinementCase (const unsigned char refinement_case);
+
+  /**
+   * Return the numeric value stored by this class. While the presence of this
+   * operator might seem dangerous, it is useful in cases where one would like
+   * to have code like <tt>switch (refinement_flag)... case
+   * RefinementCase<dim>::cut_x: ... </tt>, which can be written as
+   * <code>switch (static_cast@<unsigned char@>(refinement_flag)</code>.
+   * Another application is to use an object of the current type as an index
+   * into an array; however, this use is deprecated as it assumes a certain
+   * mapping from the symbolic flags defined in the RefinementPossibilities
+   * base class to actual numerical values (the array indices).
+   */
+  operator unsigned char () const;
+
+  /**
+   * Return the union of the refinement flags represented by the current
+   * object and the one given as argument.
+   */
+  RefinementCase operator | (const RefinementCase &r) const;
+
+  /**
+   * Return the intersection of the refinement flags represented by the
+   * current object and the one given as argument.
+   */
+  RefinementCase operator & (const RefinementCase &r) const;
+
+  /**
+   * Return the negation of the refinement flags represented by the current
+   * object. For example, in 2d, if the current object holds the flag
+   * <code>cut_x</code>, then the returned value will be <code>cut_y</code>;
+   * if the current value is <code>isotropic_refinement</code> then the result
+   * will be <code>no_refinement</code>; etc.
+   */
+  RefinementCase operator ~ () const;
+
+
+  /**
+   * Return the flag that corresponds to cutting a cell along the axis given
+   * as argument. For example, if <code>i=0</code> then the returned value is
+   * <tt>RefinementPossibilities<dim>::cut_x</tt>.
+   */
+  static
+  RefinementCase cut_axis (const unsigned int i);
+
+  /**
+   * Return the amount of memory occupied by an object of this type.
+   */
+  static std::size_t memory_consumption ();
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization
+   */
+  template <class Archive>
+  void serialize(Archive &ar,
+                 const unsigned int version);
+
+  /**
+   * Exception.
+   */
+  DeclException1 (ExcInvalidRefinementCase,
+                  int,
+                  << "The refinement flags given (" << arg1 << ") contain set bits that do not "
+                  << "make sense for the space dimension of the object to which they are applied.");
+
+private:
+  /**
+   * Store the refinement case as a bit field with as many bits as are
+   * necessary in any given dimension.
+   */
+unsigned char value :
+  (dim > 0 ? dim : 1);
+};
+
+
+namespace internal
+{
+
+
+  /**
+   * A class that provides all possible situations a face (in the current
+   * space dimension @p dim) might be subdivided into subfaces. For
+   * <code>dim=1</code> and <code>dim=2</code> they correspond to the cases
+   * given in <code>RefinementPossibilities@<dim-1@></code>. However,
+   * <code>SubfacePossibilities@<3@></code> includes the refinement cases of
+   * <code>RefinementPossibilities@<2@></code>, but additionally some subface
+   * possibilities a face might be subdivided into which occur through
+   * repeated anisotropic refinement steps performed on one of two neighboring
+   * cells.
+   *
+   * This general template is unused except in some weird template constructs.
+   * Actual is made, however, of the specializations
+   * <code>SubfacePossibilities@<1@></code>,
+   * <code>SubfacePossibilities@<2@></code> and
+   * <code>SubfacePossibilities@<3@></code>.
+   *
+   * @ingroup aniso
+   * @author Tobias Leicht 2007, Ralf Hartmann, 2008
+   */
+  template <int dim>
+  struct SubfacePossibilities
+  {
+    /**
+     * Possible cases of faces being subdivided into subface.
+     */
+    enum Possibilities
+    {
+      case_none = 0,
+
+      case_isotropic = static_cast<unsigned char>(-1)
+    };
+  };
+
+
+  /**
+   * A class that provides all possible situations a face (in the current
+   * space dimension @p dim) might be subdivided into subfaces.
+   *
+   * For <code>dim=0</code> we provide a dummy implementation only.
+   *
+   * @ingroup aniso
+   * @author Ralf Hartmann, 2008
+   */
+  template <>
+  struct SubfacePossibilities<0>
+  {
+    /**
+     * Possible cases of faces being subdivided into subface.
+     *
+     * Dummy implementation.
+     */
+    enum Possibilities
+    {
+      case_none = 0,
+
+      case_isotropic = case_none
+    };
+  };
+
+
+
+  /**
+   * A class that provides all possible situations a face (in the current
+   * space dimension @p dim) might be subdivided into subfaces.
+   *
+   * For <code>dim=1</code> there are no faces. Thereby, there are no subface
+   * possibilities.
+   *
+   * @ingroup aniso
+   * @author Ralf Hartmann, 2008
+   */
+  template <>
+  struct SubfacePossibilities<1>
+  {
+    /**
+     * Possible cases of faces being subdivided into subface.
+     *
+     * In 1d there are no faces, thus no subface possibilities.
+     */
+    enum Possibilities
+    {
+      case_none = 0,
+
+      case_isotropic = case_none
+    };
+  };
+
+
+
+  /**
+   * A class that provides all possible situations a face (in the current
+   * space dimension @p dim) might be subdivided into subfaces.
+   *
+   * This specialization is used for <code>dim=2</code>, where it offers the
+   * following possibilities: a face (line) being refined
+   * (<code>case_x</code>) or not refined (<code>case_no</code>).
+   *
+   * @ingroup aniso
+   * @author Ralf Hartmann, 2008
+   */
+  template <>
+  struct SubfacePossibilities<2>
+  {
+    /**
+     * Possible cases of faces being subdivided into subface.
+     *
+     * In 2d there are following possibilities: a face (line) being refined *
+     * (<code>case_x</code>) or not refined (<code>case_no</code>).
+     */
+    enum Possibilities
+    {
+      case_none = 0,
+      case_x    = 1,
+
+      case_isotropic = case_x
+    };
+  };
+
+
+
+  /**
+   * A class that provides all possible situations a face (in the current
+   * space dimension @p dim) might be subdivided into subfaces.
+   *
+   * This specialization is used for dim=3, where it offers following
+   * possibilities: a face (quad) being refined in x- or y-direction (in the
+   * face-intern coordinate system) separately, (<code>case_x</code> or
+   * (<code>case_y</code>), and in both directions (<code>case_x</code> which
+   * corresponds to (<code>case_isotropic</code>). Additionally, it offers the
+   * possibilities a face can have through repeated anisotropic refinement
+   * steps performed on one of the two neighboring cells.  It might be
+   * possible for example, that a face (quad) is refined with
+   * <code>cut_x</code> and afterwards the left child is again refined with
+   * <code>cut_y</code>, so that there are three active subfaces. Note,
+   * however, that only refinement cases are allowed such that each line on a
+   * face between two hexes has not more than one hanging node. Furthermore,
+   * it is not allowed that two neighboring hexes are refined such that one of
+   * the hexes refines the common face with <code>cut_x</code> and the other
+   * hex refines that face with <code>cut_y</code>. In fact,
+   * Triangulation::prepare_coarsening_and_refinement takes care of this
+   * situation and ensures that each face of a refined cell is completely
+   * contained in a single face of neighboring cells.
+   *
+   * The following drawings explain the SubfacePossibilities and give the
+   * corresponding subface numbers:
+   * @code
+   *
+   * *-------*
+   * |       |
+   * |   0   |    case_none
+   * |       |
+   * *-------*
+   *
+   * *---*---*
+   * |   |   |
+   * | 0 | 1 |    case_x
+   * |   |   |
+   * *---*---*
+   *
+   * *---*---*
+   * | 1 |   |
+   * *---* 2 |    case_x1y
+   * | 0 |   |
+   * *---*---*
+   *
+   * *---*---*
+   * |   | 2 |
+   * | 0 *---*    case_x2y
+   * |   | 1 |
+   * *---*---*
+   *
+   * *---*---*
+   * | 1 | 3 |
+   * *---*---*    case_x1y2y   (successive refinement: first cut_x, then cut_y for both children)
+   * | 0 | 2 |
+   * *---*---*
+   *
+   * *-------*
+   * |   1   |
+   * *-------*    case_y
+   * |   0   |
+   * *-------*
+   *
+   * *-------*
+   * |   2   |
+   * *---*---*    case_y1x
+   * | 0 | 1 |
+   * *---*---*
+   *
+   * *---*---*
+   * | 1 | 2 |
+   * *---*---*    case_y2x
+   * |   0   |
+   * *-------*
+   *
+   * *---*---*
+   * | 2 | 3 |
+   * *---*---*    case_y1x2x   (successive refinement: first cut_y, then cut_x for both children)
+   * | 0 | 1 |
+   * *---+---*
+   *
+   * *---*---*
+   * | 2 | 3 |
+   * *---*---*    case_xy      (one isotropic refinement step)
+   * | 0 | 1 |
+   * *---*---*
+   *
+   * @endcode
+   *
+   * @ingroup aniso
+   * @author Tobias Leicht 2007, Ralf Hartmann, 2008
+   */
+  template <>
+  struct SubfacePossibilities<3>
+  {
+    /**
+     * Possible cases of faces being subdivided into subface.
+     *
+     * See documentation to the SubfacePossibilities<3> for more details on
+     * the subface possibilities.
+     */
+    enum Possibilities
+    {
+      case_none  = 0,
+      case_x     = 1,
+      case_x1y   = 2,
+      case_x2y   = 3,
+      case_x1y2y = 4,
+      case_y     = 5,
+      case_y1x   = 6,
+      case_y2x   = 7,
+      case_y1x2x = 8,
+      case_xy    = 9,
+
+      case_isotropic = case_xy
+    };
+  };
+
+
+
+
+  /**
+   * A class that provides all possible cases a face (in the current space
+   * dimension @p dim) might be subdivided into subfaces.
+   *
+   * @ingroup aniso
+   * @author Ralf Hartmann, 2008
+   */
+  template <int dim>
+  class SubfaceCase : public SubfacePossibilities<dim>
+  {
+  public:
+    /**
+     * Constructor. Take and store a value indicating a particular subface
+     * possibility in the list of possible situations specified in the base
+     * class.
+     */
+    SubfaceCase (const typename SubfacePossibilities<dim>::Possibilities subface_possibility);
+
+    /**
+     * Return the numeric value stored by this class. While the presence of
+     * this operator might seem dangerous, it is useful in cases where one
+     * would like to have code like <code>switch (subface_case)... case
+     * SubfaceCase::case_x: ... </code>, which can be written as <code>switch
+     * (static_cast@<unsigned char@>(subface_case)</code>. Another application
+     * is to use an object of the current type as an index into an array;
+     * however, this use is deprecated as it assumes a certain mapping from
+     * the symbolic flags defined in the SubfacePossibilities base class to
+     * actual numerical values (the array indices).
+     */
+    operator unsigned char () const;
+
+    /**
+     * Return the amount of memory occupied by an object of this type.
+     */
+    static std::size_t memory_consumption ();
+
+    /**
+     * Exception.
+     */
+    DeclException1 (ExcInvalidSubfaceCase,
+                    int,
+                    << "The subface case given (" << arg1 << ") does not make sense "
+                    << "for the space dimension of the object to which they are applied.");
+
+  private:
+    /**
+     * Store the refinement case as a bit field with as many bits as are
+     * necessary in any given dimension.
+     */
+  unsigned char value :
+    (dim == 3 ? 4 : 1);
+  };
+
+} // namespace internal
+
+
+
+template <int dim> struct GeometryInfo;
+
+
+
+
+/**
+ * Topological description of zero dimensional cells, i.e. points. This class
+ * might not look too useful but often is if in a certain dimension we would
+ * like to enquire information about objects with dimension one lower than the
+ * present, e.g. about faces.
+ *
+ * This class contains as static members information on vertices and faces of
+ * a @p dim-dimensional grid cell. The interface is the same for all
+ * dimensions. If a value is of no use in a low dimensional cell, it is
+ * (correctly) set to zero, e.g. #max_children_per_face in 1d.
+ *
+ * This information should always replace hard-coded numbers of vertices,
+ * neighbors and so on, since it can be used dimension independently.
+ *
+ * @ingroup grid geomprimitives aniso
+ * @author Wolfgang Bangerth, 1998
+ */
+template <>
+struct GeometryInfo<0>
+{
+
+  /**
+   * Maximum number of children of a cell, i.e. the number of children of an
+   * isotropically refined cell.
+   *
+   * If a cell is refined anisotropically, the actual number of children may
+   * be less than the value given here.
+   */
+  static const unsigned int max_children_per_cell = 1;
+
+  /**
+   * Number of faces a cell has.
+   */
+  static const unsigned int faces_per_cell    = 0;
+
+  /**
+   * Maximum number of children of a refined face, i.e. the number of children
+   * of an isotropically refined face.
+   *
+   * If a cell is refined anisotropically, the actual number of children may
+   * be less than the value given here.
+   */
+  static const unsigned int max_children_per_face = 0;
+
+  /**
+   * Return the number of children of a cell (or face) refined with
+   * <tt>ref_case</tt>. Since we are concerned here with points, the number of
+   * children is equal to one.
+   */
+  static unsigned int n_children(const RefinementCase<0> &refinement_case);
+
+  /**
+   * Number of vertices a cell has.
+   */
+  static const unsigned int vertices_per_cell = 1;
+
+  /**
+   * Number of vertices each face has. Since this is not useful in one
+   * dimension, we provide a useless number (in the hope that a compiler may
+   * warn when it sees constructs like <tt>for (i=0; i<vertices_per_face;
+   * ++i)</tt>, at least if @p i is an <tt>unsigned int</tt>.
+   */
+  static const unsigned int vertices_per_face = 0;
+
+  /**
+   * Number of lines each face has.
+   */
+  static const unsigned int lines_per_face    = 0;
+
+  /**
+   * Number of quads on each face.
+   */
+  static const unsigned int quads_per_face    = 0;
+
+  /**
+   * Number of lines of a cell.
+   */
+  static const unsigned int lines_per_cell    = 0;
+
+  /**
+   * Number of quadrilaterals of a cell.
+   */
+  static const unsigned int quads_per_cell    = 0;
+
+  /**
+   * Number of hexahedra of a cell.
+   */
+  static const unsigned int hexes_per_cell    = 0;
+};
+
+
+
+
+
+/**
+ * This class provides dimension independent information to all topological
+ * structures that make up the unit, or
+ * @ref GlossReferenceCell "reference cell".
+ *
+ * It is the one central point in the library where information about the
+ * numbering of vertices, lines, or faces of the reference cell is collected.
+ * Consequently, the information of this class is used extensively in the
+ * geometric description of Triangulation objects, as well as in various other
+ * parts of the code. In particular, it also serves as the focus of writing
+ * code in a dimension independent way; for example, instead of writing a loop
+ * over vertices 0<=v<4 in 2d, one would write it as
+ * 0<=v<GeometryInfo<dim>::vertices_per_cell, thus allowing the code to work
+ * in 3d as well without changes.
+ *
+ * The most frequently used parts of the class are its static members like
+ * vertices_per_cell, faces_per_cell, etc. However, the class also offers
+ * information about more abstract questions like the orientation of faces,
+ * etc. The following documentation gives a textual description of many of
+ * these concepts.
+ *
+ *
+ * <h3>Implementation conventions for two spatial dimensions</h3>
+ *
+ * From version 5.2 onwards deal.II is based on a numbering scheme that uses a
+ * lexicographic ordering (with x running fastest) wherever possible, hence
+ * trying to adopt a kind of 'canonical' ordering.
+ *
+ * The ordering of vertices and faces (lines) in 2d is defined by
+ *
+ * - Vertices are numbered in lexicographic ordering
+ *
+ * - Faces (lines in 2d): first the two faces with normals in x- and then
+ * y-direction. For each two faces: first the face with normal in negative
+ * coordinate direction, then the one with normal in positive direction, i.e.
+ * the faces are ordered according to their normals pointing in -x, x, -y, y
+ * direction.
+ *
+ * - The direction of a line is represented by the direction of point 0
+ * towards point 1 and is always in one of the coordinate directions
+ *
+ * - Face lines in 3d are ordered, such that the induced 2d local coordinate
+ * system (x,y) implies (right hand rule) a normal in face normal direction,
+ * see N2/.
+ *
+ * The resulting numbering of vertices and faces (lines) in 2d as well as the
+ * directions of lines is shown in the following.
+ * @verbatim
+ *       3
+ *    2-->--3
+ *    |     |
+ *   0^     ^1
+ *    |     |
+ *    0-->--1
+ *        2
+ * @endverbatim
+ *
+ * Note that the orientation of lines has to be correct upon construction of a
+ * grid; however, it is automatically preserved upon refinement.
+ *
+ * Further we define that child lines have the same direction as their parent,
+ * i.e. that <tt>line->child(0)->vertex(0)==line->vertex(0)</tt> and
+ * <tt>line->child(1)->vertex(1)==line->vertex(1)</tt>. This also implies,
+ * that the first sub-line (<tt>line->child(0)</tt>) is the one at vertex(0)
+ * of the old line.
+ *
+ * Similarly we define, that the four children of a quad are adjacent to the
+ * vertex with the same number of the old quad.
+ *
+ * Note that information about several of these conventions can be extracted
+ * at run- or compile-time from the member functions and variables of the
+ * present class.
+ *
+ *
+ * <h4>Coordinate systems</h4>
+ *
+ * When explicit coordinates are required for points in a cell (e.g for
+ * quadrature formulae or the point of definition of trial functions), we
+ * define the following coordinate system for the unit cell:
+ * @verbatim
+ *  y^   2-----3
+ *   |   |     |
+ *   |   |     |
+ *   |   |     |
+ *   |   0-----1
+ *   *------------>x
+ * @endverbatim
+ *
+ * Here, vertex 0 is the origin of the coordinate system, vertex 1 has
+ * coordinates <tt>(1,0)</tt>, vertex 2 at <tt>(0,1)</tt> and vertex 3 at
+ * <tt>(1,1)</tt>. The GeometryInfo<dim>::unit_cell_vertex() function can be
+ * used to query this information at run-time.
+ *
+ *
+ * <h3>Implementation conventions for three spatial dimensions</h3>
+ *
+ * By convention, we will use the following numbering conventions for
+ * vertices, lines and faces of hexahedra in three space dimensions. Before
+ * giving these conventions we declare the following sketch to be the standard
+ * way of drawing 3d pictures of hexahedra:
+ * @verbatim
+ *                       *-------*        *-------*
+ *                      /|       |       /       /|
+ *                     / |       |      /       / |
+ *  z                 /  |       |     /       /  |
+ *  ^                *   |       |    *-------*   |
+ *  |   ^y           |   *-------*    |       |   *
+ *  |  /             |  /       /     |       |  /
+ *  | /              | /       /      |       | /
+ *  |/               |/       /       |       |/
+ *  *------>x        *-------*        *-------*
+ * @endverbatim
+ * The left part of the picture shows the left, bottom and back face of the
+ * cube, while the right one shall be the top, right and front face. You may
+ * recover the whole cube by moving the two parts together into one.
+ *
+ * Note again that information about several of the following conventions can
+ * be extracted at run- or compile-time from the member functions and
+ * variables of the present class.
+ *
+ * <h4>Vertices</h4>
+ *
+ * The ordering of vertices in 3d is defined by the same rules as in the 2d
+ * case. In particular, the following is still true:
+ *
+ * - Vertices are numbered in lexicographic ordering.
+ *
+ * Hence, the vertices are numbered as follows
+ * @verbatim
+ *       6-------7        6-------7
+ *      /|       |       /       /|
+ *     / |       |      /       / |
+ *    /  |       |     /       /  |
+ *   4   |       |    4-------5   |
+ *   |   2-------3    |       |   3
+ *   |  /       /     |       |  /
+ *   | /       /      |       | /
+ *   |/       /       |       |/
+ *   0-------1        0-------1
+ * @endverbatim
+ *
+ * We note, that first the vertices on the bottom face (z=0) are numbered
+ * exactly the same way as are the vertices on a quadrilateral. Then the
+ * vertices on the top face (z=1) are numbered similarly by moving the bottom
+ * face to the top. Again, the GeometryInfo<dim>::unit_cell_vertex() function
+ * can be used to query this information at run-time.
+ *
+ *
+ * <h4>Lines</h4>
+ *
+ * Here, the same holds as for the vertices:
+ *
+ * - Line ordering in 3d:
+ * <ul>
+ * <li>first the lines of face (z=0) in 2d line ordering,
+ * <li>then the lines of face (z=1) in 2d line ordering,
+ * <li>finally the lines in z direction in lexicographic ordering
+ * </ul>
+ * @verbatim
+ *       *---7---*        *---7---*
+ *      /|       |       /       /|
+ *     4 |       11     4       5 11
+ *    /  10      |     /       /  |
+ *   *   |       |    *---6---*   |
+ *   |   *---3---*    |       |   *
+ *   |  /       /     |       9  /
+ *   8 0       1      8       | 1
+ *   |/       /       |       |/
+ *   *---2---*        *---2---*
+ * @endverbatim
+ * As in 2d lines are directed in coordinate directions.
+ * @verbatim
+ *       *--->---*        *--->---*
+ *      /|       |       /       /|
+ *     ^ |       ^      ^       ^ ^
+ *    /  ^       |     /       /  |
+ *   *   |       |    *--->---*   |
+ *   |   *--->---*    |       |   *
+ *   |  /       /     |       ^  /
+ *   ^ ^       ^      ^       | ^
+ *   |/       /       |       |/
+ *   *--->---*        *--->---*
+ * @endverbatim
+ *
+ * The fact that edges (just as vertices and faces) are entities that are
+ * stored in their own right rather than constructed from cells each time they
+ * are needed, means that adjacent cells actually have pointers to edges that
+ * are thus shared between them. This implies that the convention that sets of
+ * parallel edges have parallel directions is not only a local condition.
+ * Before a list of cells is passed to an object of the Triangulation class
+ * for creation of a triangulation, you therefore have to make sure that cells
+ * are oriented in a compatible fashion, so that edge directions are globally
+ * according to above convention. However, the GridReordering class can do
+ * this for you, by reorienting cells and edges of an arbitrary list of input
+ * cells that need not be already sorted.
+ *
+ * <h4>Faces</h4>
+ *
+ * The numbering of faces in 3d is defined by a rule analogous to 2d:
+ *
+ * - Faces (quads in 3d): first the two faces with normals in x-, then y- and
+ * z-direction. For each two faces: first the face with normal in negative
+ * coordinate direction, then the one with normal in positive direction, i.e.
+ * the faces are ordered according to their normals pointing in -x, x, -y, y,
+ * -z, z direction.
+ *
+ * Therefore, the faces are numbered in the ordering: left, right, front,
+ * back, bottom and top face:
+ * @verbatim
+ *       *-------*        *-------*
+ *      /|       |       /       /|
+ *     / |   3   |      /   5   / |
+ *    /  |       |     /       /  |
+ *   *   |       |    *-------*   |
+ *   | 0 *-------*    |       | 1 *
+ *   |  /       /     |       |  /
+ *   | /   4   /      |   2   | /
+ *   |/       /       |       |/
+ *   *-------*        *-------*
+ * @endverbatim
+ *
+ * The <em>standard</em> direction of the faces is such, that the induced 2d
+ * local coordinate system (x,y) implies (right hand rule) a normal in face
+ * normal direction, see N2a).  In the following we show the local coordinate
+ * system and the numbering of face lines:
+ * <ul>
+ * <li> Faces 0 and 1:
+ *  @verbatim
+ *          Face 0           Face 1
+ *        *-------*        *-------*
+ *       /|       |       /       /|
+ *      3 1       |      /       3 1
+ *    y/  |       |     /      y/  |
+ *    *   |x      |    *-------*   |x
+ *    |   *-------*    |       |   *
+ *    0  /       /     |       0  /
+ *    | 2       /      |       | 2
+ *    |/       /       |       |/
+ *    *-------*        *-------*
+ *  @endverbatim
+ *
+ * <li> Faces 2 and 3:
+ *  @verbatim
+ *        x Face 3           Face 2
+ *        *---1---*        *-------*
+ *       /|       |       /       /|
+ *      / |       3      /       / |
+ *     /  2       |    x/       /  |
+ *    *   |       |    *---1---*   |
+ *    |   *---0---*y   |       |   *
+ *    |  /       /     |       3  /
+ *    | /       /      2       | /
+ *    |/       /       |       |/
+ *    *-------*        *---0---*y
+ *  @endverbatim
+ *
+ * <li> Faces 4 and 5:
+ *  @verbatim
+ *          Face 4         y Face 5
+ *        *-------*        *---3---*
+ *       /|       |       /       /|
+ *      / |       |      0       1 |
+ *     /  |       |     /       /  |
+ *    *   |y      |    *---2---* x |
+ *    |   *---3---*    |       |   *
+ *    |  /       /     |       |  /
+ *    | 0       1      |       | /
+ *    |/       /       |       |/
+ *    *---2---* x      *-------*
+ *  @endverbatim
+ * </ul>
+ *
+ * The face line numbers (0,1,2,3) correspond to following cell line numbers.
+ * <ul>
+ * <li> Face 0: lines 8, 10, 0, 4;
+ * <li> Face 1: lines 9, 11, 1, 5;
+ * <li> Face 2: lines 2, 6, 8, 9;
+ * <li> Face 3: lines 3, 7, 10, 11;
+ * <li> Face 4: lines 0, 1, 2, 3;
+ * <li> Face 5: lines 4, 5, 6, 7;
+ * </ul>
+ * You can get these numbers using the GeometryInfo<3>::face_to_cell_lines()
+ * function.
+ *
+ * The face normals can be deduced from the face orientation by applying the
+ * right hand side rule (x,y -> normal).  We note, that in the standard
+ * orientation of faces in 2d, faces 0 and 2 have normals that point into the
+ * cell, and faces 1 and 3 have normals pointing outward. In 3d, faces 0, 2,
+ * and 4 have normals that point into the cell, while the normals of faces 1,
+ * 3, and 5 point outward. This information, again, can be queried from
+ * GeometryInfo<dim>::unit_normal_orientation.
+ *
+ * However, it turns out that a significant number of 3d meshes cannot satisfy
+ * this convention. This is due to the fact that the face convention for one
+ * cell already implies something for the neighbor, since they share a common
+ * face and fixing it for the first cell also fixes the normal vectors of the
+ * opposite faces of both cells. It is easy to construct cases of loops of
+ * cells for which this leads to cases where we cannot find orientations for
+ * all faces that are consistent with this convention.
+ *
+ * For this reason, above convention is only what we call the <em>standard
+ * orientation</em>. deal.II actually allows faces in 3d to have either the
+ * standard direction, or its opposite, in which case the lines that make up a
+ * cell would have reverted orders, and the above line equivalences would not
+ * hold any more. You can ask a cell whether a given face has standard
+ * orientation by calling <tt>cell->face_orientation(face_no)</tt>: if the
+ * result is @p true, then the face has standard orientation, otherwise its
+ * normal vector is pointing the other direction. There are not very many
+ * places in application programs where you need this information actually,
+ * but a few places in the library make use of this. Note that in 2d, the
+ * result is always @p true. More information on the topic can be found in
+ * this
+ * @ref GlossFaceOrientation "glossary"
+ * article.
+ *
+ * In order to allow all kinds of meshes in 3d, including
+ * <em>Moebius</em>-loops, a face might even be rotated looking from one cell,
+ * whereas it is according to the standard looking at it from the neighboring
+ * cell sharing that particular face. In order to cope with this, two flags
+ * <tt>face_flip</tt> and <tt>face_rotation</tt> are available, to represent
+ * rotations by 180 and 90 degree, respectively. Setting both flags amounts to
+ * a rotation of 270 degrees (all counterclockwise). You can ask the cell for
+ * these flags like for the <tt>face_orientation</tt>. In order to enable
+ * rotated faces, even lines can deviate from their standard direction in 3d.
+ * This information is available as the <tt>line_orientation</tt> flag for
+ * cells and faces in 3d. Again, this is something that should be internal to
+ * the library and application program will probably never have to bother
+ * about it. For more information on this see also
+ * @ref GlossFaceOrientation "this glossary entry".
+ *
+ *
+ * <h4>Children</h4>
+ *
+ * The eight children of an isotropically refined cell are numbered according
+ * to the vertices they are adjacent to:
+ * @verbatim
+ *       *----*----*        *----*----*
+ *      /| 6  |  7 |       / 6  /  7 /|
+ *     *6|    |    |      *----*----*7|
+ *    /| *----*----*     / 4  /  5 /| *
+ *   * |/|    |    |    *----*----* |/|
+ *   |4* | 2  |  3 |    | 4  |  5 |5*3|
+ *   |/|2*----*----*    |    |    |/| *
+ *   * |/ 2  /  3 /     *----*----* |/
+ *   |0*----*----*      |    |    |1*
+ *   |/0   /  1 /       | 0  |  1 |/
+ *   *----*----*        *----*----*
+ * @endverbatim
+ *
+ * Taking into account the orientation of the faces, the following children
+ * are adjacent to the respective faces:
+ * <ul>
+ * <li> Face 0: children 0, 2, 4, 6;
+ * <li> Face 1: children 1, 3, 5, 7;
+ * <li> Face 2: children 0, 4, 1, 5;
+ * <li> Face 3: children 2, 6, 3, 7;
+ * <li> Face 4: children 0, 1, 2, 3;
+ * <li> Face 5: children 4, 5, 6, 7.
+ * </ul>
+ * You can get these numbers using the GeometryInfo<3>::child_cell_on_face()
+ * function. As each child is adjacent to the vertex with the same number
+ * these numbers are also given by the
+ * GeometryInfo<3>::face_to_cell_vertices() function.
+ *
+ * Note that, again, the above list only holds for faces in their standard
+ * orientation. If a face is not in standard orientation, then the children at
+ * positions 1 and 2 (counting from 0 to 3) would be swapped. In fact, this is
+ * what the child_cell_on_face and the face_to_cell_vertices functions of
+ * GeometryInfo<3> do, when invoked with a <tt>face_orientation=false</tt>
+ * argument.
+ *
+ * The information which child cell is at which position of which face is most
+ * often used when computing jump terms across faces with hanging nodes, using
+ * objects of type FESubfaceValues. Sitting on one cell, you would look at a
+ * face and figure out which child of the neighbor is sitting on a given
+ * subface between the present and the neighboring cell. To avoid having to
+ * query the standard orientation of the faces of the two cells every time in
+ * such cases, you should use a function call like
+ * <tt>cell->neighbor_child_on_subface(face_no,subface_no)</tt>, which returns
+ * the correct result both in 2d (where face orientations are immaterial) and
+ * 3d (where it is necessary to use the face orientation as additional
+ * argument to <tt>GeometryInfo<3>::child_cell_on_face</tt>).
+ *
+ * For anisotropic refinement, the child cells can not be numbered according
+ * to adjacent vertices, thus the following conventions are used:
+ * @verbatim
+ *            RefinementCase<3>::cut_x
+ *
+ *       *----*----*        *----*----*
+ *      /|    |    |       /    /    /|
+ *     / |    |    |      / 0  /  1 / |
+ *    /  | 0  |  1 |     /    /    /  |
+ *   *   |    |    |    *----*----*   |
+ *   | 0 |    |    |    |    |    | 1 |
+ *   |   *----*----*    |    |    |   *
+ *   |  /    /    /     | 0  | 1  |  /
+ *   | / 0  /  1 /      |    |    | /
+ *   |/    /    /       |    |    |/
+ *   *----*----*        *----*----*
+ * @endverbatim
+ *
+ * @verbatim
+ *            RefinementCase<3>::cut_y
+ *
+ *       *---------*        *---------*
+ *      /|         |       /    1    /|
+ *     * |         |      *---------* |
+ *    /| |    1    |     /    0    /| |
+ *   * |1|         |    *---------* |1|
+ *   | | |         |    |         | | |
+ *   |0| *---------*    |         |0| *
+ *   | |/    1    /     |    0    | |/
+ *   | *---------*      |         | *
+ *   |/    0    /       |         |/
+ *   *---------*        *---------*
+ * @endverbatim
+ *
+ * @verbatim
+ *            RefinementCase<3>::cut_z
+ *
+ *       *---------*        *---------*
+ *      /|    1    |       /         /|
+ *     / |         |      /    1    / |
+ *    /  *---------*     /         /  *
+ *   * 1/|         |    *---------* 1/|
+ *   | / |    0    |    |    1    | / |
+ *   |/  *---------*    |         |/  *
+ *   * 0/         /     *---------* 0/
+ *   | /    0    /      |         | /
+ *   |/         /       |    0    |/
+ *   *---------*        *---------*
+ * @endverbatim
+ *
+ * @verbatim
+ *            RefinementCase<3>::cut_xy
+ *
+ *       *----*----*        *----*----*
+ *      /|    |    |       / 2  /  3 /|
+ *     * |    |    |      *----*----* |
+ *    /| | 2  |  3 |     / 0  /  1 /| |
+ *   * |2|    |    |    *----*----* |3|
+ *   | | |    |    |    |    |    | | |
+ *   |0| *----*----*    |    |    |1| *
+ *   | |/ 2  /  3 /     | 0  |  1 | |/
+ *   | *----*----*      |    |    | *
+ *   |/ 0  /  1 /       |    |    |/
+ *   *----*----*        *----*----*
+ * @endverbatim
+ *
+ * @verbatim
+ *            RefinementCase<3>::cut_xz
+ *
+ *       *----*----*        *----*----*
+ *      /| 1  |  3 |       /    /    /|
+ *     / |    |    |      / 1  /  3 / |
+ *    /  *----*----*     /    /    /  *
+ *   * 1/|    |    |    *----*----* 3/|
+ *   | / | 0  |  2 |    | 1  |  3 | / |
+ *   |/  *----*----*    |    |    |/  *
+ *   * 0/    /    /     *----*----* 2/
+ *   | / 0  /  2 /      |    |    | /
+ *   |/    /    /       | 0  |  2 |/
+ *   *----*----*        *----*----*
+ * @endverbatim
+ *
+ * @verbatim
+ *            RefinementCase<3>::cut_yz
+ *
+ *       *---------*        *---------*
+ *      /|    3    |       /    3    /|
+ *     * |         |      *---------* |
+ *    /|3*---------*     /    2    /|3*
+ *   * |/|         |    *---------* |/|
+ *   |2* |    1    |    |    2    |2* |
+ *   |/|1*---------*    |         |/|1*
+ *   * |/    1    /     *---------* |/
+ *   |0*---------*      |         |0*
+ *   |/    0    /       |    0    |/
+ *   *---------*        *---------*
+ * @endverbatim
+ *
+ * This information can also be obtained by the
+ * <tt>GeometryInfo<3>::child_cell_on_face</tt> function.
+ *
+ * <h4>Coordinate systems</h4>
+ *
+ * We define the following coordinate system for the explicit coordinates of
+ * the vertices of the unit cell:
+ * @verbatim
+ *                       6-------7        6-------7
+ *                      /|       |       /       /|
+ *                     / |       |      /       / |
+ *  z                 /  |       |     /       /  |
+ *  ^                4   |       |    4-------5   |
+ *  |   ^y           |   2-------3    |       |   3
+ *  |  /             |  /       /     |       |  /
+ *  | /              | /       /      |       | /
+ *  |/               |/       /       |       |/
+ *  *------>x        0-------1        0-------1
+ * @endverbatim
+ *
+ * By the convention laid down as above, the vertices have the following
+ * coordinates (lexicographic, with x running fastest):
+ * <ul>
+ * <li> Vertex 0: <tt>(0,0,0)</tt>;
+ * <li> Vertex 1: <tt>(1,0,0)</tt>;
+ * <li> Vertex 2: <tt>(0,1,0)</tt>;
+ * <li> Vertex 3: <tt>(1,1,0)</tt>;
+ * <li> Vertex 4: <tt>(0,0,1)</tt>;
+ * <li> Vertex 5: <tt>(1,0,1)</tt>;
+ * <li> Vertex 6: <tt>(0,1,1)</tt>;
+ * <li> Vertex 7: <tt>(1,1,1)</tt>.
+ * </ul>
+ *
+ *
+ *
+ * @note Instantiations for this template are provided for dimensions 1,2,3,4,
+ * and there is a specialization for dim=0 (see the section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @ingroup grid geomprimitives aniso
+ * @author Wolfgang Bangerth, 1998, Ralf Hartmann, 2005, Tobias Leicht, 2007
+ */
+template <int dim>
+struct GeometryInfo
+{
+
+  /**
+   * Maximum number of children of a refined cell, i.e. the number of children
+   * of an isotropically refined cell.
+   *
+   * If a cell is refined anisotropically, the actual number of children may
+   * be less than the value given here.
+   */
+  static const unsigned int max_children_per_cell = 1 << dim;
+
+  /**
+   * Number of faces of a cell.
+   */
+  static const unsigned int faces_per_cell = 2 * dim;
+
+  /**
+   * Maximum number of children of a refined face, i.e. the number of children
+   * of an isotropically refined face.
+   *
+   * If a cell is refined anisotropically, the actual number of children may
+   * be less than the value given here.
+   */
+  static const unsigned int max_children_per_face = GeometryInfo<dim-1>::max_children_per_cell;
+
+  /**
+   * Number of vertices of a cell.
+   */
+  static const unsigned int vertices_per_cell = 1 << dim;
+
+  /**
+   * Number of vertices on each face.
+   */
+  static const unsigned int vertices_per_face = GeometryInfo<dim-1>::vertices_per_cell;
+
+  /**
+   * Number of lines on each face.
+   */
+  static const unsigned int lines_per_face
+    = GeometryInfo<dim-1>::lines_per_cell;
+
+  /**
+   * Number of quads on each face.
+   */
+  static const unsigned int quads_per_face
+    = GeometryInfo<dim-1>::quads_per_cell;
+
+  /**
+   * Number of lines of a cell.
+   *
+   * The formula to compute this makes use of the fact that when going from
+   * one dimension to the next, the object of the lower dimension is copied
+   * once (thus twice the old number of lines) and then a new line is inserted
+   * between each vertex of the old object and the corresponding one in the
+   * copy.
+   */
+  static const unsigned int lines_per_cell
+    = (2*GeometryInfo<dim-1>::lines_per_cell +
+       GeometryInfo<dim-1>::vertices_per_cell);
+
+  /**
+   * Number of quadrilaterals of a cell.
+   *
+   * This number is computed recursively just as the previous one, with the
+   * exception that new quads result from connecting an original line and its
+   * copy.
+   */
+  static const unsigned int quads_per_cell
+    = (2*GeometryInfo<dim-1>::quads_per_cell +
+       GeometryInfo<dim-1>::lines_per_cell);
+
+  /**
+   * Number of hexahedra of a cell.
+   */
+  static const unsigned int hexes_per_cell
+    = (2*GeometryInfo<dim-1>::hexes_per_cell +
+       GeometryInfo<dim-1>::quads_per_cell);
+
+  /**
+   * Rearrange vertices for UCD output.  For a cell being written in UCD
+   * format, each entry in this field contains the number of a vertex in
+   * <tt>deal.II</tt> that corresponds to the UCD numbering at this location.
+   *
+   * Typical example: write a cell and arrange the vertices, such that UCD
+   * understands them.
+   *
+   * @code
+   * for (i=0; i< n_vertices; ++i)
+   *   out << cell->vertex(ucd_to_deal[i]);
+   * @endcode
+   *
+   * As the vertex numbering in deal.II versions <= 5.1 happened to coincide
+   * with the UCD numbering, this field can also be used like a
+   * old_to_lexicographic mapping.
+   */
+  static const unsigned int ucd_to_deal[vertices_per_cell];
+
+  /**
+   * Rearrange vertices for OpenDX output.  For a cell being written in OpenDX
+   * format, each entry in this field contains the number of a vertex in
+   * <tt>deal.II</tt> that corresponds to the DX numbering at this location.
+   *
+   * Typical example: write a cell and arrange the vertices, such that OpenDX
+   * understands them.
+   *
+   * @code
+   * for (i=0; i< n_vertices; ++i)
+   *   out << cell->vertex(dx_to_deal[i]);
+   * @endcode
+   */
+  static const unsigned int dx_to_deal[vertices_per_cell];
+
+  /**
+   * This field stores for each vertex to which faces it belongs. In any given
+   * dimension, the number of faces is equal to the dimension. The first index
+   * in this 2D-array runs over all vertices, the second index over @p dim
+   * faces to which the vertex belongs.
+   *
+   * The order of the faces for each vertex is such that the first listed face
+   * bounds the reference cell in <i>x</i> direction, the second in <i>y</i>
+   * direction, and so on.
+   */
+  static const unsigned int vertex_to_face[vertices_per_cell][dim];
+
+  /**
+   * Return the number of children of a cell (or face) refined with
+   * <tt>ref_case</tt>.
+   */
+  static
+  unsigned int
+  n_children(const RefinementCase<dim> &refinement_case);
+
+  /**
+   * Return the number of subfaces of a face refined according to
+   * internal::SubfaceCase @p face_ref_case.
+   */
+  static
+  unsigned int
+  n_subfaces(const internal::SubfaceCase<dim> &subface_case);
+
+  /**
+   * Given a face on the reference element with a
+   * <code>internal::SubfaceCase@<dim@></code> @p face_refinement_case this
+   * function returns the ratio between the area of the @p subface_no th
+   * subface and the area(=1) of the face.
+   *
+   * E.g. for internal::SubfaceCase::cut_xy the ratio is 1/4 for each of the
+   * subfaces.
+   */
+  static
+  double
+  subface_ratio(const internal::SubfaceCase<dim> &subface_case,
+                const unsigned int subface_no);
+
+  /**
+   * Given a cell refined with the <code>RefinementCase</code> @p
+   * cell_refinement_case return the <code>SubfaceCase</code> of the @p
+   * face_no th face.
+   */
+  static
+  RefinementCase<dim-1>
+  face_refinement_case (const RefinementCase<dim> &cell_refinement_case,
+                        const unsigned int face_no,
+                        const bool face_orientation = true,
+                        const bool face_flip        = false,
+                        const bool face_rotation    = false);
+
+  /**
+   * Given the SubfaceCase @p face_refinement_case of the @p face_no th face,
+   * return the smallest RefinementCase of the cell, which corresponds to that
+   * refinement of the face.
+   */
+  static
+  RefinementCase<dim>
+  min_cell_refinement_case_for_face_refinement
+  (const RefinementCase<dim-1> &face_refinement_case,
+   const unsigned int face_no,
+   const bool face_orientation = true,
+   const bool face_flip        = false,
+   const bool face_rotation    = false);
+
+  /**
+   * Given a cell refined with the RefinementCase @p cell_refinement_case
+   * return the RefinementCase of the @p line_no th face.
+   */
+  static
+  RefinementCase<1>
+  line_refinement_case(const RefinementCase<dim> &cell_refinement_case,
+                       const unsigned int line_no);
+
+  /**
+   * Return the minimal / smallest RefinementCase of the cell, which ensures
+   * refinement of line @p line_no.
+   */
+  static
+  RefinementCase<dim>
+  min_cell_refinement_case_for_line_refinement(const unsigned int line_no);
+
+  /**
+   * This field stores which child cells are adjacent to a certain face of the
+   * mother cell.
+   *
+   * For example, in 2D the layout of a cell is as follows:
+   * @verbatim
+   * .      3
+   * .   2-->--3
+   * .   |     |
+   * . 0 ^     ^ 1
+   * .   |     |
+   * .   0-->--1
+   * .      2
+   * @endverbatim
+   * Vertices and faces are indicated with their numbers, faces also with
+   * their directions.
+   *
+   * Now, when refined, the layout is like this:
+   * @verbatim
+   * *--*--*
+   * | 2|3 |
+   * *--*--*
+   * | 0|1 |
+   * *--*--*
+   * @endverbatim
+   *
+   * Thus, the child cells on face 0 are (ordered in the direction of the
+   * face) 0 and 2, on face 3 they are 2 and 3, etc.
+   *
+   * For three spatial dimensions, the exact order of the children is laid
+   * down in the general documentation of this class.
+   *
+   * Through the <tt>face_orientation</tt>, <tt>face_flip</tt> and
+   * <tt>face_rotation</tt> arguments this function handles faces oriented in
+   * the standard and non-standard orientation. <tt>face_orientation</tt>
+   * defaults to <tt>true</tt>, <tt>face_flip</tt> and <tt>face_rotation</tt>
+   * default to <tt>false</tt> (standard orientation) and has no effect in 2d.
+   * The concept of face orientations is explained in this
+   * @ref GlossFaceOrientation "glossary"
+   * entry.
+   *
+   * In the case of anisotropically refined cells and faces, the @p
+   * RefinementCase of the face, <tt>face_ref_case</tt>, might have an
+   * influence on which child is behind which given subface, thus this is an
+   * additional argument, defaulting to isotropic refinement of the face.
+   */
+  static
+  unsigned int
+  child_cell_on_face (const RefinementCase<dim> &ref_case,
+                      const unsigned int face,
+                      const unsigned int subface,
+                      const bool face_orientation = true,
+                      const bool face_flip        = false,
+                      const bool face_rotation    = false,
+                      const RefinementCase<dim-1> &face_refinement_case
+                      = RefinementCase<dim-1>::isotropic_refinement);
+
+  /**
+   * Map line vertex number to cell vertex number, i.e. give the cell vertex
+   * number of the <tt>vertex</tt>th vertex of line <tt>line</tt>, e.g.
+   * <tt>GeometryInfo<2>::line_to_cell_vertices(3,0)=2</tt>.
+   *
+   * The order of the lines, as well as their direction (which in turn
+   * determines which is the first and which the second vertex on a line) is
+   * the canonical one in deal.II, as described in the general documentation
+   * of this class.
+   *
+   * For <tt>dim=2</tt> this call is simply passed down to the
+   * face_to_cell_vertices() function.
+   */
+  static
+  unsigned int
+  line_to_cell_vertices (const unsigned int line,
+                         const unsigned int vertex);
+
+  /**
+   * Map face vertex number to cell vertex number, i.e. give the cell vertex
+   * number of the <tt>vertex</tt>th vertex of face <tt>face</tt>, e.g.
+   * <tt>GeometryInfo<2>::face_to_cell_vertices(3,0)=2</tt>, see the image
+   * under point N4 in the 2d section of this class's documentation.
+   *
+   * Through the <tt>face_orientation</tt>, <tt>face_flip</tt> and
+   * <tt>face_rotation</tt> arguments this function handles faces oriented in
+   * the standard and non-standard orientation. <tt>face_orientation</tt>
+   * defaults to <tt>true</tt>, <tt>face_flip</tt> and <tt>face_rotation</tt>
+   * default to <tt>false</tt> (standard orientation). In 2d only
+   * <tt>face_flip</tt> is considered. See this
+   * @ref GlossFaceOrientation "glossary"
+   * article for more information.
+   *
+   * As the children of a cell are ordered according to the vertices of the
+   * cell, this call is passed down to the child_cell_on_face() function.
+   * Hence this function is simply a wrapper of child_cell_on_face() giving it
+   * a suggestive name.
+   */
+  static
+  unsigned int
+  face_to_cell_vertices (const unsigned int face,
+                         const unsigned int vertex,
+                         const bool face_orientation = true,
+                         const bool face_flip        = false,
+                         const bool face_rotation    = false);
+
+  /**
+   * Map face line number to cell line number, i.e. give the cell line number
+   * of the <tt>line</tt>th line of face <tt>face</tt>, e.g.
+   * <tt>GeometryInfo<3>::face_to_cell_lines(5,0)=4</tt>.
+   *
+   * Through the <tt>face_orientation</tt>, <tt>face_flip</tt> and
+   * <tt>face_rotation</tt> arguments this function handles faces oriented in
+   * the standard and non-standard orientation. <tt>face_orientation</tt>
+   * defaults to <tt>true</tt>, <tt>face_flip</tt> and <tt>face_rotation</tt>
+   * default to <tt>false</tt> (standard orientation) and has no effect in 2d.
+   */
+  static
+  unsigned int
+  face_to_cell_lines (const unsigned int face,
+                      const unsigned int line,
+                      const bool face_orientation = true,
+                      const bool face_flip        = false,
+                      const bool face_rotation    = false);
+
+  /**
+   * Map the vertex index @p vertex of a face in standard orientation to one
+   * of a face with arbitrary @p face_orientation, @p face_flip and @p
+   * face_rotation. The values of these three flags default to <tt>true</tt>,
+   * <tt>false</tt> and <tt>false</tt>, respectively. this combination
+   * describes a face in standard orientation.
+   *
+   * This function is only implemented in 3D.
+   */
+  static
+  unsigned int
+  standard_to_real_face_vertex (const unsigned int vertex,
+                                const bool face_orientation = true,
+                                const bool face_flip        = false,
+                                const bool face_rotation    = false);
+
+  /**
+   * Map the vertex index @p vertex of a face with arbitrary @p
+   * face_orientation, @p face_flip and @p face_rotation to a face in standard
+   * orientation. The values of these three flags default to <tt>true</tt>,
+   * <tt>false</tt> and <tt>false</tt>, respectively. this combination
+   * describes a face in standard orientation.
+   *
+   * This function is only implemented in 3D.
+   */
+  static
+  unsigned int
+  real_to_standard_face_vertex (const unsigned int vertex,
+                                const bool face_orientation = true,
+                                const bool face_flip        = false,
+                                const bool face_rotation    = false);
+
+  /**
+   * Map the line index @p line of a face in standard orientation to one of a
+   * face with arbitrary @p face_orientation, @p face_flip and @p
+   * face_rotation. The values of these three flags default to <tt>true</tt>,
+   * <tt>false</tt> and <tt>false</tt>, respectively. this combination
+   * describes a face in standard orientation.
+   *
+   * This function is only implemented in 3D.
+   */
+  static
+  unsigned int
+  standard_to_real_face_line (const unsigned int line,
+                              const bool face_orientation = true,
+                              const bool face_flip        = false,
+                              const bool face_rotation    = false);
+
+  /**
+   * Map the line index @p line of a face with arbitrary @p face_orientation,
+   * @p face_flip and @p face_rotation to a face in standard orientation. The
+   * values of these three flags default to <tt>true</tt>, <tt>false</tt> and
+   * <tt>false</tt>, respectively. this combination describes a face in
+   * standard orientation.
+   *
+   * This function is only implemented in 3D.
+   */
+  static
+  unsigned int
+  real_to_standard_face_line (const unsigned int line,
+                              const bool face_orientation = true,
+                              const bool face_flip        = false,
+                              const bool face_rotation    = false);
+
+  /**
+   * Return the position of the @p ith vertex on the unit cell. The order of
+   * vertices is the canonical one in deal.II, as described in the general
+   * documentation of this class.
+   */
+  static
+  Point<dim>
+  unit_cell_vertex (const unsigned int vertex);
+
+  /**
+   * Given a point @p p in unit coordinates, return the number of the child
+   * cell in which it would lie in. If the point lies on the interface of two
+   * children, return any one of their indices. The result is always less than
+   * GeometryInfo<dimension>::max_children_per_cell.
+   *
+   * The order of child cells is described the general documentation of this
+   * class.
+   */
+  static
+  unsigned int
+  child_cell_from_point (const Point<dim> &p);
+
+  /**
+   * Given coordinates @p p on the unit cell, return the values of the
+   * coordinates of this point in the coordinate system of the given child.
+   * Neither original nor returned coordinates need actually be inside the
+   * cell, we simply perform a scale-and-shift operation with a shift that
+   * depends on the number of the child.
+   */
+  static
+  Point<dim>
+  cell_to_child_coordinates (const Point<dim>          &p,
+                             const unsigned int         child_index,
+                             const RefinementCase<dim>  refine_case
+                             = RefinementCase<dim>::isotropic_refinement);
+
+  /**
+   * The reverse function to the one above: take a point in the coordinate
+   * system of the child, and transform it to the coordinate system of the
+   * mother cell.
+   */
+  static
+  Point<dim>
+  child_to_cell_coordinates (const Point<dim>          &p,
+                             const unsigned int         child_index,
+                             const RefinementCase<dim>  refine_case
+                             = RefinementCase<dim>::isotropic_refinement);
+
+  /**
+   * Return true if the given point is inside the unit cell of the present
+   * space dimension.
+   */
+  static
+  bool
+  is_inside_unit_cell (const Point<dim> &p);
+
+  /**
+   * Return true if the given point is inside the unit cell of the present
+   * space dimension. This * function accepts an additional * parameter which
+   * specifies how * much the point position may * actually be outside the
+   * true * unit cell. This is useful because in practice we may often not be
+   * able to compute the coordinates of a point in reference coordinates
+   * exactly, but only up to numerical roundoff.
+   *
+   * The tolerance parameter may be less than zero, indicating that the point
+   * should be safely inside the cell.
+   */
+  static
+  bool
+  is_inside_unit_cell (const Point<dim> &p,
+                       const double eps);
+
+  /**
+   * Projects a given point onto the unit cell, i.e. each coordinate outside
+   * [0..1] is modified to lie within that interval.
+   */
+  static
+  Point<dim>
+  project_to_unit_cell (const Point<dim> &p);
+
+  /**
+   * Returns the infinity norm of the vector between a given point @p p
+   * outside the unit cell to the closest unit cell boundary. For points
+   * inside the cell, this is defined as zero.
+   */
+  static
+  double
+  distance_to_unit_cell (const Point<dim> &p);
+
+  /**
+   * Compute the value of the $i$-th $d$-linear (i.e. (bi-,tri-)linear) shape
+   * function at location $\xi$.
+   */
+  static
+  double
+  d_linear_shape_function (const Point<dim> &xi,
+                           const unsigned int i);
+
+  /**
+   * Compute the gradient of the $i$-th $d$-linear (i.e. (bi-,tri-)linear)
+   * shape function at location $\xi$.
+   */
+  static
+  Tensor<1,dim>
+  d_linear_shape_function_gradient (const Point<dim> &xi,
+                                    const unsigned int i);
+
+  /**
+   * For a (bi-, tri-)linear mapping from the reference cell, face, or edge to
+   * the object specified by the given vertices, compute the alternating form
+   * of the transformed unit vectors vertices. For an object of dimensionality
+   * @p dim, there are @p dim vectors with @p spacedim components each, and
+   * the alternating form is a tensor of rank spacedim-dim that corresponds to
+   * the wedge product of the @p dim unit vectors, and it corresponds to the
+   * volume and normal vectors of the mapping from reference element to the
+   * element described by the vertices.
+   *
+   * For example, if dim==spacedim==2, then the alternating form is a scalar
+   * (because spacedim-dim=0) and its value equals $\mathbf v_1\wedge \mathbf
+   * v_2=\mathbf v_1^\perp \cdot\mathbf v_2$, where $\mathbf v_1^\perp$ is a
+   * vector that is rotated to the right by 90 degrees from $\mathbf v_1$. If
+   * dim==spacedim==3, then the result is again a scalar with value $\mathbf
+   * v_1\wedge \mathbf v_2 \wedge \mathbf v_3 = (\mathbf v_1\times \mathbf
+   * v_2)\cdot \mathbf v_3$, where $\mathbf v_1, \mathbf v_2, \mathbf v_3$ are
+   * the images of the unit vectors at a vertex of the unit dim-dimensional
+   * cell under transformation to the dim-dimensional cell in spacedim-
+   * dimensional space. In both cases, i.e. for dim==2 or 3, the result
+   * happens to equal the determinant of the Jacobian of the mapping from
+   * reference cell to cell in real space. Note that it is the actual
+   * determinant, not its absolute value as often used in transforming
+   * integrals from one coordinate system to another. In particular, if the
+   * object specified by the vertices is a parallelogram (i.e. a linear
+   * transformation of the reference cell) then the computed values are the
+   * same at all vertices and equal the (signed) area of the cell; similarly,
+   * for parallel-epipeds, it is the volume of the cell.
+   *
+   * Likewise, if we have dim==spacedim-1 (e.g. we have a quad in 3d space, or
+   * a line in 2d), then the alternating product denotes the normal vector
+   * (i.e. a rank-1 tensor, since spacedim-dim=1) to the object at each
+   * vertex, where the normal vector's magnitude denotes the area element of
+   * the transformation from the reference object to the object given by the
+   * vertices. In particular, if again the mapping from reference object to
+   * the object under consideration here is linear (not bi- or trilinear),
+   * then the returned vectors are all %parallel, perpendicular to the mapped
+   * object described by the vertices, and have a magnitude equal to the
+   * area/volume of the mapped object. If dim=1, spacedim=2, then the returned
+   * value is $\mathbf v_1^\perp$, where $\mathbf v_1$ is the image of the
+   * sole unit vector of a line mapped to the line in 2d given by the
+   * vertices; if dim=2, spacedim=3, then the returned values are $\mathbf v_1
+   * \wedge \mathbf v_2=\mathbf v_1 \times \mathbf v_2$ where $\mathbf
+   * v_1,\mathbf v_2$ are the two three-dimensional vectors that are
+   * tangential to the quad mapped into three-dimensional space.
+   *
+   * This function is used in order to determine how distorted a cell is (see
+   * the entry on
+   * @ref GlossDistorted "distorted cells"
+   * in the glossary).
+   */
+  template <int spacedim>
+  static void
+  alternating_form_at_vertices
+#ifndef DEAL_II_CONSTEXPR_BUG
+  (const Point<spacedim> (&vertices)[vertices_per_cell],
+   Tensor<spacedim-dim,spacedim> (&forms)[vertices_per_cell]);
+#else
+  (const Point<spacedim> *vertices,
+   Tensor<spacedim-dim,spacedim> *forms);
+#endif
+
+  /**
+   * For each face of the reference cell, this field stores the coordinate
+   * direction in which its normal vector points. In <tt>dim</tt> dimension
+   * these are the <tt>2*dim</tt> first entries of <tt>{0,0,1,1,2,2,3,3}</tt>.
+   *
+   * Note that this is only the coordinate number. The actual direction of the
+   * normal vector is obtained by multiplying the unit vector in this
+   * direction with #unit_normal_orientation.
+   */
+  static const unsigned int unit_normal_direction[faces_per_cell];
+
+  /**
+   * Orientation of the unit normal vector of a face of the reference cell. In
+   * <tt>dim</tt> dimension these are the <tt>2*dim</tt> first entries of
+   * <tt>{-1,1,-1,1,-1,1,-1,1}</tt>.
+   *
+   * Each value is either <tt>1</tt> or <tt>-1</tt>, corresponding to a normal
+   * vector pointing in the positive or negative coordinate direction,
+   * respectively.
+   *
+   * Note that this is only the <em>standard orientation</em> of faces. At
+   * least in 3d, actual faces of cells in a triangulation can also have the
+   * opposite orientation, depending on a flag that one can query from the
+   * cell it belongs to. For more information, see the
+   * @ref GlossFaceOrientation "glossary"
+   * entry on face orientation.
+   */
+  static const int unit_normal_orientation[faces_per_cell];
+
+  /**
+   * List of numbers which denotes which face is opposite to a given face. Its
+   * entries are the first <tt>2*dim</tt> entries of <tt>{ 1, 0, 3, 2, 5, 4,
+   * 7, 6}</tt>.
+   */
+  static const unsigned int opposite_face[faces_per_cell];
+
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidCoordinate,
+                  double,
+                  << "The coordinates must satisfy 0 <= x_i <= 1, "
+                  << "but here we have x_i=" << arg1);
+
+  /**
+   * Exception
+   */
+  DeclException3 (ExcInvalidSubface,
+                  int, int, int,
+                  << "RefinementCase<dim> " << arg1 << ": face " << arg2
+                  << " has no subface " << arg3);
+};
+
+
+
+
+#ifndef DOXYGEN
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DEAL_II_MEMBER_ARRAY_SPECIALIZATION_BUG
+template <>
+const unsigned int GeometryInfo<1>::unit_normal_direction[faces_per_cell];
+template <>
+const unsigned int GeometryInfo<2>::unit_normal_direction[faces_per_cell];
+template <>
+const unsigned int GeometryInfo<3>::unit_normal_direction[faces_per_cell];
+template <>
+const unsigned int GeometryInfo<4>::unit_normal_direction[faces_per_cell];
+
+template <>
+const int GeometryInfo<1>::unit_normal_orientation[faces_per_cell];
+template <>
+const int GeometryInfo<2>::unit_normal_orientation[faces_per_cell];
+template <>
+const int GeometryInfo<3>::unit_normal_orientation[faces_per_cell];
+template <>
+const int GeometryInfo<4>::unit_normal_orientation[faces_per_cell];
+
+template <>
+const unsigned int GeometryInfo<1>::opposite_face[faces_per_cell];
+template <>
+const unsigned int GeometryInfo<2>::opposite_face[faces_per_cell];
+template <>
+const unsigned int GeometryInfo<3>::opposite_face[faces_per_cell];
+template <>
+const unsigned int GeometryInfo<4>::opposite_face[faces_per_cell];
+#endif
+
+
+template <>
+Tensor<1,1>
+GeometryInfo<1>::
+d_linear_shape_function_gradient (const Point<1> &xi,
+                                  const unsigned int i);
+template <>
+Tensor<1,2>
+GeometryInfo<2>::
+d_linear_shape_function_gradient (const Point<2> &xi,
+                                  const unsigned int i);
+template <>
+Tensor<1,3>
+GeometryInfo<3>::
+d_linear_shape_function_gradient (const Point<3> &xi,
+                                  const unsigned int i);
+
+
+
+
+/* -------------- inline functions ------------- */
+
+
+inline
+GeometryPrimitive::GeometryPrimitive (const Object object)
+  :
+  object (object)
+{}
+
+
+
+inline
+GeometryPrimitive::GeometryPrimitive (const unsigned int object_dimension)
+  :
+  object (static_cast<Object>(object_dimension))
+{}
+
+
+inline
+GeometryPrimitive::operator unsigned int () const
+{
+  return static_cast<unsigned int>(object);
+}
+
+
+
+namespace internal
+{
+
+  template <int dim>
+  inline
+  SubfaceCase<dim>::SubfaceCase (const typename SubfacePossibilities<dim>::Possibilities subface_possibility)
+    :
+    value (subface_possibility)
+  {}
+
+
+  template <int dim>
+  inline
+  SubfaceCase<dim>::operator unsigned char () const
+  {
+    return value;
+  }
+
+
+} // namespace internal
+
+
+template <int dim>
+inline
+RefinementCase<dim>
+RefinementCase<dim>::cut_axis (const unsigned int)
+{
+  Assert (false, ExcInternalError());
+  return static_cast<unsigned char>(-1);
+}
+
+
+template <>
+inline
+RefinementCase<1>
+RefinementCase<1>::cut_axis (const unsigned int i)
+{
+  const unsigned int dim = 1;
+  Assert (i < dim, ExcIndexRange(i, 0, dim));
+
+  static const RefinementCase options[dim] = { RefinementPossibilities<1>::cut_x };
+  return options[i];
+}
+
+
+
+template <>
+inline
+RefinementCase<2>
+RefinementCase<2>::cut_axis (const unsigned int i)
+{
+  const unsigned int dim = 2;
+  Assert (i < dim, ExcIndexRange(i, 0, dim));
+
+  static const RefinementCase options[dim] = { RefinementPossibilities<2>::cut_x,
+                                               RefinementPossibilities<2>::cut_y
+                                             };
+  return options[i];
+}
+
+
+
+template <>
+inline
+RefinementCase<3>
+RefinementCase<3>::cut_axis (const unsigned int i)
+{
+  const unsigned int dim = 3;
+  Assert (i < dim, ExcIndexRange(i, 0, dim));
+
+  static const RefinementCase options[dim] = { RefinementPossibilities<3>::cut_x,
+                                               RefinementPossibilities<3>::cut_y,
+                                               RefinementPossibilities<3>::cut_z
+                                             };
+  return options[i];
+}
+
+
+
+template <int dim>
+inline
+RefinementCase<dim>::RefinementCase ()
+  :
+  value (RefinementPossibilities<dim>::no_refinement)
+{}
+
+
+
+template <int dim>
+inline
+RefinementCase<dim>::
+RefinementCase (const typename RefinementPossibilities<dim>::Possibilities refinement_case)
+  :
+  value (refinement_case)
+{
+  // check that only those bits of
+  // the given argument are set that
+  // make sense for a given space
+  // dimension
+  Assert ((refinement_case & RefinementPossibilities<dim>::isotropic_refinement) ==
+          refinement_case,
+          ExcInvalidRefinementCase (refinement_case));
+}
+
+
+
+template <int dim>
+inline
+RefinementCase<dim>::RefinementCase (const unsigned char refinement_case)
+  :
+  value (refinement_case)
+{
+  // check that only those bits of
+  // the given argument are set that
+  // make sense for a given space
+  // dimension
+  Assert ((refinement_case & RefinementPossibilities<dim>::isotropic_refinement) ==
+          refinement_case,
+          ExcInvalidRefinementCase (refinement_case));
+}
+
+
+
+template <int dim>
+inline
+RefinementCase<dim>::operator unsigned char () const
+{
+  return value;
+}
+
+
+
+template <int dim>
+inline
+RefinementCase<dim>
+RefinementCase<dim>::operator | (const RefinementCase<dim> &r) const
+{
+  return RefinementCase<dim>(static_cast<unsigned char> (value | r.value));
+}
+
+
+
+template <int dim>
+inline
+RefinementCase<dim>
+RefinementCase<dim>::operator & (const RefinementCase<dim> &r) const
+{
+  return RefinementCase<dim>(static_cast<unsigned char> (value & r.value));
+}
+
+
+
+template <int dim>
+inline
+RefinementCase<dim>
+RefinementCase<dim>::operator ~ () const
+{
+  return RefinementCase<dim>(static_cast<unsigned char> (
+                               (~value) & RefinementPossibilities<dim>::isotropic_refinement));
+}
+
+
+
+
+template <int dim>
+inline
+std::size_t
+RefinementCase<dim>::memory_consumption ()
+{
+  return sizeof(RefinementCase<dim>);
+}
+
+
+
+template <int dim>
+template <class Archive>
+void RefinementCase<dim>::serialize (Archive &ar,
+                                     const unsigned int)
+{
+  // serialization can't deal with bitfields, so copy from/to a full sized
+  // unsigned char
+  unsigned char uchar_value = value;
+  ar &uchar_value;
+  value = uchar_value;
+}
+
+
+
+
+template <>
+inline
+Point<1>
+GeometryInfo<1>::unit_cell_vertex (const unsigned int vertex)
+{
+  Assert (vertex < vertices_per_cell,
+          ExcIndexRange (vertex, 0, vertices_per_cell));
+
+  return Point<1>(static_cast<double>(vertex));
+}
+
+
+
+template <>
+inline
+Point<2>
+GeometryInfo<2>::unit_cell_vertex (const unsigned int vertex)
+{
+  Assert (vertex < vertices_per_cell,
+          ExcIndexRange (vertex, 0, vertices_per_cell));
+
+  return Point<2>(vertex%2, vertex/2);
+}
+
+
+
+template <>
+inline
+Point<3>
+GeometryInfo<3>::unit_cell_vertex (const unsigned int vertex)
+{
+  Assert (vertex < vertices_per_cell,
+          ExcIndexRange (vertex, 0, vertices_per_cell));
+
+  return Point<3>(vertex%2, vertex/2%2, vertex/4);
+}
+
+
+
+template <int dim>
+inline
+Point<dim>
+GeometryInfo<dim>::unit_cell_vertex (const unsigned int)
+{
+  Assert(false, ExcNotImplemented());
+
+  return Point<dim> ();
+}
+
+
+
+template <>
+inline
+unsigned int
+GeometryInfo<1>::child_cell_from_point (const Point<1> &p)
+{
+  Assert ((p[0] >= 0) && (p[0] <= 1), ExcInvalidCoordinate(p[0]));
+
+  return (p[0] <= 0.5 ? 0 : 1);
+}
+
+
+
+template <>
+inline
+unsigned int
+GeometryInfo<2>::child_cell_from_point (const Point<2> &p)
+{
+  Assert ((p[0] >= 0) && (p[0] <= 1), ExcInvalidCoordinate(p[0]));
+  Assert ((p[1] >= 0) && (p[1] <= 1), ExcInvalidCoordinate(p[1]));
+
+  return (p[0] <= 0.5 ?
+          (p[1] <= 0.5 ? 0 : 2) :
+          (p[1] <= 0.5 ? 1 : 3));
+}
+
+
+
+template <>
+inline
+unsigned int
+GeometryInfo<3>::child_cell_from_point (const Point<3> &p)
+{
+  Assert ((p[0] >= 0) && (p[0] <= 1), ExcInvalidCoordinate(p[0]));
+  Assert ((p[1] >= 0) && (p[1] <= 1), ExcInvalidCoordinate(p[1]));
+  Assert ((p[2] >= 0) && (p[2] <= 1), ExcInvalidCoordinate(p[2]));
+
+  return (p[0] <= 0.5 ?
+          (p[1] <= 0.5 ?
+           (p[2] <= 0.5 ? 0 : 4) :
+           (p[2] <= 0.5 ? 2 : 6)) :
+          (p[1] <= 0.5 ?
+           (p[2] <= 0.5 ? 1 : 5) :
+           (p[2] <= 0.5 ? 3 : 7)));
+}
+
+
+template <int dim>
+inline
+unsigned int
+GeometryInfo<dim>::child_cell_from_point (const Point<dim> &)
+{
+  Assert(false, ExcNotImplemented());
+
+  return 0;
+}
+
+
+
+template <>
+inline
+Point<1>
+GeometryInfo<1>::cell_to_child_coordinates (const Point<1>         &p,
+                                            const unsigned int      child_index,
+                                            const RefinementCase<1> refine_case)
+
+{
+  Assert (child_index < 2,
+          ExcIndexRange (child_index, 0, 2));
+  Assert (refine_case==RefinementCase<1>::cut_x,
+          ExcInternalError());
+  (void)refine_case; // removes -Wunused-parameter warning in optimized mode
+
+  return Point<1>(p*2.0-unit_cell_vertex(child_index));
+}
+
+
+
+template <>
+inline
+Point<2>
+GeometryInfo<2>::cell_to_child_coordinates (const Point<2>         &p,
+                                            const unsigned int      child_index,
+                                            const RefinementCase<2> refine_case)
+
+{
+  Assert (child_index < GeometryInfo<2>::n_children(refine_case),
+          ExcIndexRange (child_index, 0, GeometryInfo<2>::n_children(refine_case)));
+
+  Point<2> point=p;
+  switch (refine_case)
+    {
+    case RefinementCase<2>::cut_x:
+      point[0]*=2.0;
+      if (child_index==1)
+        point[0]-=1.0;
+      break;
+    case RefinementCase<2>::cut_y:
+      point[1]*=2.0;
+      if (child_index==1)
+        point[1]-=1.0;
+      break;
+    case RefinementCase<2>::cut_xy:
+      point*=2.0;
+      point-=unit_cell_vertex(child_index);
+      break;
+    default:
+      Assert(false, ExcInternalError());
+    }
+
+  return point;
+}
+
+
+
+template <>
+inline
+Point<3>
+GeometryInfo<3>::cell_to_child_coordinates (const Point<3>         &p,
+                                            const unsigned int      child_index,
+                                            const RefinementCase<3> refine_case)
+
+{
+  Assert (child_index < GeometryInfo<3>::n_children(refine_case),
+          ExcIndexRange (child_index, 0, GeometryInfo<3>::n_children(refine_case)));
+
+  Point<3> point=p;
+  // there might be a cleverer way to do
+  // this, but since this function is called
+  // in very few places for initialization
+  // purposes only, I don't care at the
+  // moment
+  switch (refine_case)
+    {
+    case RefinementCase<3>::cut_x:
+      point[0]*=2.0;
+      if (child_index==1)
+        point[0]-=1.0;
+      break;
+    case RefinementCase<3>::cut_y:
+      point[1]*=2.0;
+      if (child_index==1)
+        point[1]-=1.0;
+      break;
+    case RefinementCase<3>::cut_z:
+      point[2]*=2.0;
+      if (child_index==1)
+        point[2]-=1.0;
+      break;
+    case RefinementCase<3>::cut_xy:
+      point[0]*=2.0;
+      point[1]*=2.0;
+      if (child_index%2==1)
+        point[0]-=1.0;
+      if (child_index/2==1)
+        point[1]-=1.0;
+      break;
+    case RefinementCase<3>::cut_xz:
+      // careful, this is slightly
+      // different from xy and yz due to
+      // different internal numbering of
+      // children!
+      point[0]*=2.0;
+      point[2]*=2.0;
+      if (child_index/2==1)
+        point[0]-=1.0;
+      if (child_index%2==1)
+        point[2]-=1.0;
+      break;
+    case RefinementCase<3>::cut_yz:
+      point[1]*=2.0;
+      point[2]*=2.0;
+      if (child_index%2==1)
+        point[1]-=1.0;
+      if (child_index/2==1)
+        point[2]-=1.0;
+      break;
+    case RefinementCase<3>::cut_xyz:
+      point*=2.0;
+      point-=unit_cell_vertex(child_index);
+      break;
+    default:
+      Assert(false, ExcInternalError());
+    }
+
+  return point;
+}
+
+
+
+template <int dim>
+inline
+Point<dim>
+GeometryInfo<dim>::cell_to_child_coordinates (const Point<dim>         &/*p*/,
+                                              const unsigned int        /*child_index*/,
+                                              const RefinementCase<dim> /*refine_case*/)
+
+{
+  AssertThrow (false, ExcNotImplemented());
+  return Point<dim>();
+}
+
+
+
+template <>
+inline
+Point<1>
+GeometryInfo<1>::child_to_cell_coordinates (const Point<1>         &p,
+                                            const unsigned int      child_index,
+                                            const RefinementCase<1> refine_case)
+
+{
+  Assert (child_index < 2,
+          ExcIndexRange (child_index, 0, 2));
+  Assert (refine_case==RefinementCase<1>::cut_x,
+          ExcInternalError());
+  (void)refine_case; // removes -Wunused-parameter warning in optimized mode
+
+  return (p+unit_cell_vertex(child_index))*0.5;
+}
+
+
+
+template <>
+inline
+Point<3>
+GeometryInfo<3>::child_to_cell_coordinates (const Point<3>         &p,
+                                            const unsigned int      child_index,
+                                            const RefinementCase<3> refine_case)
+
+{
+  Assert (child_index < GeometryInfo<3>::n_children(refine_case),
+          ExcIndexRange (child_index, 0, GeometryInfo<3>::n_children(refine_case)));
+
+  Point<3> point=p;
+  // there might be a cleverer way to do
+  // this, but since this function is called
+  // in very few places for initialization
+  // purposes only, I don't care at the
+  // moment
+  switch (refine_case)
+    {
+    case RefinementCase<3>::cut_x:
+      if (child_index==1)
+        point[0]+=1.0;
+      point[0]*=0.5;
+      break;
+    case RefinementCase<3>::cut_y:
+      if (child_index==1)
+        point[1]+=1.0;
+      point[1]*=0.5;
+      break;
+    case RefinementCase<3>::cut_z:
+      if (child_index==1)
+        point[2]+=1.0;
+      point[2]*=0.5;
+      break;
+    case RefinementCase<3>::cut_xy:
+      if (child_index%2==1)
+        point[0]+=1.0;
+      if (child_index/2==1)
+        point[1]+=1.0;
+      point[0]*=0.5;
+      point[1]*=0.5;
+      break;
+    case RefinementCase<3>::cut_xz:
+      // careful, this is slightly
+      // different from xy and yz due to
+      // different internal numbering of
+      // children!
+      if (child_index/2==1)
+        point[0]+=1.0;
+      if (child_index%2==1)
+        point[2]+=1.0;
+      point[0]*=0.5;
+      point[2]*=0.5;
+      break;
+    case RefinementCase<3>::cut_yz:
+      if (child_index%2==1)
+        point[1]+=1.0;
+      if (child_index/2==1)
+        point[2]+=1.0;
+      point[1]*=0.5;
+      point[2]*=0.5;
+      break;
+    case RefinementCase<3>::cut_xyz:
+      point+=unit_cell_vertex(child_index);
+      point*=0.5;
+      break;
+    default:
+      Assert(false, ExcInternalError());
+    }
+
+  return point;
+}
+
+
+
+template <>
+inline
+Point<2>
+GeometryInfo<2>::child_to_cell_coordinates (const Point<2>         &p,
+                                            const unsigned int      child_index,
+                                            const RefinementCase<2> refine_case)
+{
+  Assert (child_index < GeometryInfo<2>::n_children(refine_case),
+          ExcIndexRange (child_index, 0, GeometryInfo<2>::n_children(refine_case)));
+
+  Point<2> point=p;
+  switch (refine_case)
+    {
+    case RefinementCase<2>::cut_x:
+      if (child_index==1)
+        point[0]+=1.0;
+      point[0]*=0.5;
+      break;
+    case RefinementCase<2>::cut_y:
+      if (child_index==1)
+        point[1]+=1.0;
+      point[1]*=0.5;
+      break;
+    case RefinementCase<2>::cut_xy:
+      point+=unit_cell_vertex(child_index);
+      point*=0.5;
+      break;
+    default:
+      Assert(false, ExcInternalError());
+    }
+
+  return point;
+}
+
+
+
+template <int dim>
+inline
+Point<dim>
+GeometryInfo<dim>::child_to_cell_coordinates (const Point<dim>         &/*p*/,
+                                              const unsigned int        /*child_index*/,
+                                              const RefinementCase<dim> /*refine_case*/)
+{
+  AssertThrow (false, ExcNotImplemented());
+  return Point<dim>();
+}
+
+
+
+template <>
+inline
+bool
+GeometryInfo<1>::is_inside_unit_cell (const Point<1> &p)
+{
+  return (p[0] >= 0.) && (p[0] <= 1.);
+}
+
+
+
+template <>
+inline
+bool
+GeometryInfo<2>::is_inside_unit_cell (const Point<2> &p)
+{
+  return (p[0] >= 0.) && (p[0] <= 1.) &&
+         (p[1] >= 0.) && (p[1] <= 1.);
+}
+
+
+
+template <>
+inline
+bool
+GeometryInfo<3>::is_inside_unit_cell (const Point<3> &p)
+{
+  return (p[0] >= 0.) && (p[0] <= 1.) &&
+         (p[1] >= 0.) && (p[1] <= 1.) &&
+         (p[2] >= 0.) && (p[2] <= 1.);
+}
+
+template <>
+inline
+bool
+GeometryInfo<1>::is_inside_unit_cell (const Point<1> &p,
+                                      const double eps)
+{
+  return (p[0] >= -eps) && (p[0] <= 1.+eps);
+}
+
+
+
+template <>
+inline
+bool
+GeometryInfo<2>::is_inside_unit_cell (const Point<2> &p,
+                                      const double eps)
+{
+  const double l = -eps, u = 1+eps;
+  return (p[0] >= l) && (p[0] <= u) &&
+         (p[1] >= l) && (p[1] <= u);
+}
+
+
+
+template <>
+inline
+bool
+GeometryInfo<3>::is_inside_unit_cell (const Point<3> &p,
+                                      const double eps)
+{
+  const double l = -eps, u = 1.0+eps;
+  return (p[0] >= l) && (p[0] <= u) &&
+         (p[1] >= l) && (p[1] <= u) &&
+         (p[2] >= l) && (p[2] <= u);
+}
+
+
+#endif // DOXYGEN
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/graph_coloring.h b/include/deal.II/base/graph_coloring.h
new file mode 100644
index 0000000..1f341a2
--- /dev/null
+++ b/include/deal.II/base/graph_coloring.h
@@ -0,0 +1,553 @@
+
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__graph_coloring_h
+#define dealii__graph_coloring_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/std_cxx11/function.h>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+
+#include <set>
+#include <vector>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A namespace containing functions that can color graphs.
+ */
+namespace GraphColoring
+{
+  namespace internal
+  {
+    /**
+     * Given two sets of indices that are assumed to be sorted, determine
+     * whether they will have a nonempty intersection. The actual intersection
+     * is not computed.
+     * @param indices1 A set of indices, assumed sorted.
+     * @param indices2 A set of indices, assumed sorted.
+     * @return Whether the two sets of indices do have a nonempty
+     * intersection.
+     */
+    inline
+    bool
+    have_nonempty_intersection (const std::vector<types::global_dof_index> &indices1,
+                                const std::vector<types::global_dof_index> &indices2)
+    {
+      // we assume that both arrays are sorted, so we can walk
+      // them in lockstep and see if we encounter an index that's
+      // in both arrays. once we reach the end of either array,
+      // we know that there is no intersection
+      std::vector<types::global_dof_index>::const_iterator
+      p = indices1.begin(),
+      q = indices2.begin();
+      while ((p != indices1.end()) && (q != indices2.end()))
+        {
+          if (*p < *q)
+            ++p;
+          else if (*p > *q)
+            ++q;
+          else
+            // conflict found!
+            return true;
+        }
+
+      // no conflict found!
+      return false;
+    }
+
+
+    /**
+     * Create a partitioning of the given range of iterators using a
+     * simplified version of the Cuthill-McKee algorithm (Breadth First Search
+     * algorithm). The function creates partitions that contain "zones" of
+     * iterators where the first partition contains the first iterator, the
+     * second zone contains all those iterators that have conflicts with the
+     * single element in the first zone, the third zone contains those
+     * iterators that have conflicts with the iterators of the second zone and
+     * have not previously been assigned to a zone, etc. If the iterators
+     * represent cells, then this generates partitions that are like onion
+     * shells around the very first cell. Note that elements in each zone may
+     * conflict with other elements in the same zone.
+     *
+     * The question whether two iterators conflict is determined by a user-
+     * provided function. The meaning of this function is discussed in the
+     * documentation of the GraphColoring::make_graph_coloring() function.
+     *
+     * @param[in] begin The first element of a range of iterators for which a
+     * partitioning is sought.
+     * @param[in] end The element past the end of the range of iterators.
+     * @param[in] get_conflict_indices A user defined function object
+     * returning a set of indicators that are descriptive of what represents a
+     * conflict. See above for a more thorough discussion.
+     * @return A set of sets of iterators (where sets are represented by
+     * std::vector for efficiency). Each element of the outermost set
+     * corresponds to the iterators pointing to objects that are in the same
+     * partition (i.e., the same zone).
+     *
+     * @author Martin Kronbichler, Bruno Turcksin
+     */
+    template <typename Iterator>
+    std::vector<std::vector<Iterator> >
+    create_partitioning(const Iterator &begin,
+                        const typename identity<Iterator>::type &end,
+                        const std_cxx11::function<std::vector<types::global_dof_index> (const Iterator &)> &get_conflict_indices)
+    {
+      // Number of iterators.
+      unsigned int n_iterators = 0;
+
+      // Create a map from conflict indices to iterators
+      boost::unordered_map<types::global_dof_index,std::vector<Iterator> > indices_to_iterators;
+      for (Iterator it=begin; it!=end; ++it)
+        {
+          const std::vector<types::global_dof_index> conflict_indices = get_conflict_indices(it);
+          const unsigned int n_conflict_indices = conflict_indices.size();
+          for (unsigned int i=0; i<n_conflict_indices; ++i)
+            indices_to_iterators[conflict_indices[i]].push_back(it);
+          ++n_iterators;
+        }
+
+      // create the very first zone which contains only the first
+      // iterator. then create the other zones. keep track of all the
+      // iterators that have already been assigned to a zone
+      std::vector<std::vector<Iterator> > zones(1,std::vector<Iterator> (1,begin));
+      std::set<Iterator> used_it;
+      used_it.insert(begin);
+      while (used_it.size()!=n_iterators)
+        {
+          // loop over the elements of the previous zone. for each element of
+          // the previous zone, get the conflict indices and from there get
+          // those iterators that are conflicting with the current element
+          typename std::vector<Iterator>::iterator previous_zone_it(zones.back().begin());
+          typename std::vector<Iterator>::iterator previous_zone_end(zones.back().end());
+          std::vector<Iterator> new_zone;
+          for (; previous_zone_it!=previous_zone_end; ++previous_zone_it)
+            {
+              const std::vector<types::global_dof_index>
+              conflict_indices = get_conflict_indices(*previous_zone_it);
+
+              const unsigned int n_conflict_indices(conflict_indices.size());
+              for (unsigned int i=0; i<n_conflict_indices; ++i)
+                {
+                  const std::vector<Iterator> &conflicting_elements
+                    = indices_to_iterators[conflict_indices[i]];
+                  for (unsigned int j=0; j<conflicting_elements.size(); ++j)
+                    {
+                      // check that the iterator conflicting with the current one is not
+                      // associated to a zone yet and if so, assign it to the current
+                      // zone. mark it as used
+                      //
+                      // we can shortcut this test if the conflicting iterator is the
+                      // current iterator
+                      if ((conflicting_elements[j] != *previous_zone_it)
+                          &&
+                          (used_it.count(conflicting_elements[j])==0))
+                        {
+                          new_zone.push_back(conflicting_elements[j]);
+                          used_it.insert(conflicting_elements[j]);
+                        }
+                    }
+                }
+            }
+
+          // If there are iterators in the new zone, then the zone is added to the
+          // partition. Otherwise, the graph is disconnected and we need to find
+          // an iterator on the other part of the graph. start the whole process again
+          // with the first iterator that hasn't been assigned to a zone yet
+          if (new_zone.size()!=0)
+            zones.push_back(new_zone);
+          else
+            for (Iterator it=begin; it!=end; ++it)
+              if (used_it.count(it)==0)
+                {
+                  zones.push_back(std::vector<Iterator> (1,it));
+                  used_it.insert(it);
+                  break;
+                }
+        }
+
+      return zones;
+    }
+
+
+
+    /**
+     * This function uses DSATUR (Degree SATURation) to color the elements of
+     * a set. DSATUR works as follows: -# Arrange the vertices by decreasing
+     * order of degrees. -# Color a vertex of maximal degree with color 1. -#
+     * Choose a vertex with a maximal saturation degree. If there is equality,
+     * choose any vertex of maximal degree in the uncolored subgraph. -# Color
+     * the chosen vertex with the least possible (lowest numbered) color. -#
+     * If all the vertices are colored, stop. Otherwise, return to 3.
+     *
+     * @param[in] partition The set of iterators that should be colored.
+     * @param[in] get_conflict_indices A user defined function object
+     * returning a set of indicators that are descriptive of what represents a
+     * conflict. See above for a more thorough discussion.
+     * @param[out] partition_coloring A set of sets of iterators (where sets
+     * are represented by std::vector for efficiency). Each element of the
+     * outermost set corresponds to the iterators pointing to objects that are
+     * in the same partition (have the same color) and consequently do not
+     * conflict. The elements of different sets may conflict.
+     */
+    template <typename Iterator>
+    void
+    make_dsatur_coloring(std::vector<Iterator> &partition,
+                         const std_cxx11::function<std::vector<types::global_dof_index> (const Iterator &)> &get_conflict_indices,
+                         std::vector<std::vector<Iterator> > &partition_coloring)
+    {
+      partition_coloring.clear ();
+
+      // Number of zones composing the partitioning.
+      const unsigned int partition_size(partition.size());
+      std::vector<unsigned int> sorted_vertices(partition_size);
+      std::vector<int> degrees(partition_size);
+      std::vector<std::vector<types::global_dof_index> > conflict_indices(partition_size);
+      std::vector<std::vector<unsigned int> > graph(partition_size);
+
+      // Get the conflict indices associated to each iterator. The conflict_indices have to
+      // be sorted so we can more easily find conflicts later on
+      for (unsigned int i=0; i<partition_size; ++i)
+        {
+          conflict_indices[i] = get_conflict_indices(partition[i]);
+          std::sort(conflict_indices[i].begin(), conflict_indices[i].end());
+        }
+
+      // Compute the degree of each vertex of the graph using the
+      // intersection of the conflict indices.
+      for (unsigned int i=0; i<partition_size; ++i)
+        for (unsigned int j=i+1; j<partition_size; ++j)
+          // If the two iterators share indices then we increase the degree of the
+          // vertices and create an ''edge'' in the graph.
+          if (have_nonempty_intersection (conflict_indices[i], conflict_indices[j]))
+            {
+              ++degrees[i];
+              ++degrees[j];
+              graph[i].push_back(j);
+              graph[j].push_back(i);
+            }
+
+      // Sort the vertices by decreasing degree.
+      std::vector<int>::iterator degrees_it;
+      for (unsigned int i=0; i<partition_size; ++i)
+        {
+          // Find the largest element.
+          degrees_it = std::max_element(degrees.begin(),degrees.end());
+          sorted_vertices[i] = degrees_it-degrees.begin();
+          // Put the largest element to -1 so it cannot be chosen again.
+          *degrees_it = -1;
+        }
+
+      // Color the graph.
+      std::vector<boost::unordered_set<unsigned int> > colors_used;
+      for (unsigned int i=0; i<partition_size; ++i)
+        {
+          const unsigned int current_vertex(sorted_vertices[i]);
+          bool new_color(true);
+          // Try to use an existing color, i.e., try to find a color which is not
+          // associated to one of the vertices linked to current_vertex.
+          // Loop over the color.
+          for (unsigned int j=0; j<partition_coloring.size(); ++j)
+            {
+              // Loop on the vertices linked to current_vertex. If one vertex linked
+              // to current_vertex is already using the color j, this color cannot
+              // be used anymore.
+              bool unused_color(true);
+              for (unsigned int k=0; k<graph[current_vertex].size(); ++k)
+                if (colors_used[j].count(graph[current_vertex][k])==1)
+                  {
+                    unused_color = false;
+                    break;
+                  }
+              if (unused_color)
+                {
+                  partition_coloring[j].push_back(partition[current_vertex]);
+                  colors_used[j].insert(current_vertex);
+                  new_color = false;
+                  break;
+                }
+            }
+          // Add a new color.
+          if (new_color)
+            {
+              partition_coloring.push_back(std::vector<Iterator> (1,
+                                                                  partition[current_vertex]));
+              boost::unordered_set<unsigned int> tmp;
+              tmp.insert(current_vertex);
+              colors_used.push_back(tmp);
+            }
+        }
+    }
+
+
+
+    /**
+     * Given a partition-coloring graph, i.e., a set of zones (partitions)
+     * each of which is colored, produce a combined coloring for the entire
+     * set of iterators. This is possible because any color on an even (resp.
+     * odd) zone does not conflict with any color of any other even (resp.
+     * odd) zone. Consequently, we can combine colors from all even and all
+     * odd zones. This function tries to create colors of similar number of
+     * elements.
+     */
+    template <typename Iterator>
+    std::vector<std::vector<Iterator> >
+    gather_colors(const std::vector<std::vector<std::vector<Iterator> > > &partition_coloring)
+    {
+      std::vector<std::vector<Iterator> > coloring;
+
+      // Count the number of iterators in each color.
+      const unsigned int partition_size(partition_coloring.size());
+      std::vector<std::vector<unsigned int> > colors_counter(partition_size);
+      for (unsigned int i=0; i<partition_size; ++i)
+        {
+          const unsigned int n_colors(partition_coloring[i].size());
+          colors_counter[i].resize(n_colors);
+          for (unsigned int j=0; j<n_colors; ++j)
+            colors_counter[i][j] = partition_coloring[i][j].size();
+        }
+
+      // Find the partition with the largest number of colors for the even partition.
+      unsigned int i_color(0);
+      unsigned int max_even_n_colors(0);
+      const unsigned int colors_size(colors_counter.size());
+      for (unsigned int i=0; i<colors_size; i+=2)
+        {
+          if (max_even_n_colors<colors_counter[i].size())
+            {
+              max_even_n_colors = colors_counter[i].size();
+              i_color = i;
+            }
+        }
+      coloring.resize(max_even_n_colors);
+      for (unsigned int j=0; j<colors_counter[i_color].size(); ++j)
+        coloring[j] = partition_coloring[i_color][j];
+
+      for (unsigned int i=0; i<partition_size; i+=2)
+        {
+          if (i!=i_color)
+            {
+              boost::unordered_set<unsigned int> used_k;
+              for (unsigned int j=0; j<colors_counter[i].size(); ++j)
+                {
+                  // Find the color in the current partition with the largest number of
+                  // iterators.
+                  std::vector<unsigned int>::iterator it;
+                  it = std::max_element(colors_counter[i].begin(),colors_counter[i].end());
+                  unsigned int min_iterators(-1);
+                  unsigned int pos(0);
+                  // Find the color of coloring with the least number of colors among
+                  // the colors that have not been used yet.
+                  for (unsigned int k=0; k<max_even_n_colors; ++k)
+                    if (used_k.count(k)==0)
+                      if (colors_counter[i_color][k]<min_iterators)
+                        {
+                          min_iterators = colors_counter[i_color][k];
+                          pos = k;
+                        }
+                  colors_counter[i_color][pos] += *it;
+                  // Concatenate the current color with the existing coloring.
+                  coloring[pos].insert(coloring[pos].end(),
+                                       partition_coloring[i][it-colors_counter[i].begin()].begin(),
+                                       partition_coloring[i][it-colors_counter[i].begin()].end());
+                  used_k.insert(pos);
+                  // Put the number of iterators to the current color to zero.
+                  *it = 0;
+                }
+            }
+        }
+
+      // If there is more than one partition, do the same thing that we did for the even partitions
+      // to the odd partitions
+      if (partition_size>1)
+        {
+          unsigned int max_odd_n_colors(0);
+          for (unsigned int i=1; i<partition_size; i+=2)
+            {
+              if (max_odd_n_colors<colors_counter[i].size())
+                {
+                  max_odd_n_colors = colors_counter[i].size();
+                  i_color = i;
+                }
+            }
+          coloring.resize(max_even_n_colors+max_odd_n_colors);
+          for (unsigned int j=0; j<colors_counter[i_color].size(); ++j)
+            coloring[max_even_n_colors+j] = partition_coloring[i_color][j];
+
+          for (unsigned int i=1; i<partition_size; i+=2)
+            {
+              if (i!=i_color)
+                {
+                  boost::unordered_set<unsigned int> used_k;
+                  for (unsigned int j=0; j<colors_counter[i].size(); ++j)
+                    {
+                      // Find the color in the current partition with the largest number of
+                      // iterators.
+                      std::vector<unsigned int>::iterator it;
+                      it = std::max_element(colors_counter[i].begin(),colors_counter[i].end());
+                      unsigned int min_iterators(-1);
+                      unsigned int pos(0);
+                      // Find the color of coloring with the least number of colors among
+                      // the colors that have not been used yet.
+                      for (unsigned int k=0; k<max_odd_n_colors; ++k)
+                        if (used_k.count(k)==0)
+                          if (colors_counter[i_color][k]<min_iterators)
+                            {
+                              min_iterators = colors_counter[i_color][k];
+                              pos = k;
+                            }
+                      colors_counter[i_color][pos] += *it;
+                      // Concatenate the current color with the existing coloring.
+                      coloring[max_even_n_colors+pos].insert(coloring[max_even_n_colors+pos].end(),
+                                                             partition_coloring[i][it-colors_counter[i].begin()].begin(),
+                                                             partition_coloring[i][it-colors_counter[i].begin()].end());
+                      used_k.insert(pos);
+                      // Put the number of iterators to the current color to zero.
+                      *it = 0;
+                    }
+                }
+            }
+        }
+
+      return coloring;
+    }
+  }
+
+
+  /**
+   * Create a partitioning of the given range of iterators so that iterators
+   * that point to conflicting objects will be placed into different
+   * partitions, where the question whether two objects conflict is determined
+   * by a user-provided function.
+   *
+   * This function can also be considered as a graph coloring: each object
+   * pointed to by an iterator is considered to be a node and there is an edge
+   * between each two nodes that conflict. The graph coloring algorithm then
+   * assigns a color to each node in such a way that two nodes connected by an
+   * edge do not have the same color.
+   *
+   * A typical use case for this function is in assembling a matrix in
+   * parallel. There, one would like to assemble local contributions on
+   * different cells at the same time (an operation that is purely local and
+   * so requires no synchronization) but then we need to add these local
+   * contributions to the global matrix. In general, the contributions from
+   * different cells may be to the same matrix entries if the cells share
+   * degrees of freedom and, consequently, can not happen at the same time
+   * unless we want to risk a race condition (see
+   * http://en.wikipedia.org/wiki/Race_condition). Thus, we call these two
+   * cells in conflict, and we can only allow operations in parallel from
+   * cells that do not conflict. In other words, two cells are in conflict if
+   * the set of matrix entries (for example characterized by the rows) have a
+   * nonempty intersection.
+   *
+   * In this generality, computing the graph of conflicts would require
+   * calling a function that determines whether two iterators (or the two
+   * objects they represent) conflict, and calling it for every pair of
+   * iterators, i.e., $\frac 12 N (N-1)$ times. This is too expensive in
+   * general. A better approach is to require a user-defined function that
+   * returns for every iterator it is called for a set of indicators of some
+   * kind that characterize a conflict; two iterators are in conflict if their
+   * conflict indicator sets have a nonempty intersection. In the example of
+   * assembling a matrix, the conflict indicator set would contain the indices
+   * of all degrees of freedom on the cell pointed to (in the case of
+   * continuous Galerkin methods) or the union of indices of degree of freedom
+   * on the current cell and all cells adjacent to the faces of the current
+   * cell (in the case of discontinuous Galerkin methods, because there one
+   * computes face integrals coupling the degrees of freedom connected by a
+   * common face -- see step-12).
+   *
+   * @note The conflict set returned by the user defined function passed as
+   * third argument needs to accurately describe <i>all</i> degrees of freedom
+   * for which anything is written into the matrix or right hand side. In
+   * other words, if the writing happens through a function like
+   * ConstraintMatrix::copy_local_to_global(), then the set of conflict
+   * indices must actually contain not only the degrees of freedom on the
+   * current cell, but also those they are linked to by constraints such as
+   * hanging nodes.
+   *
+   * In other situations, the conflict indicator sets may represent something
+   * different altogether -- it is up to the caller of this function to
+   * describe what it means for two iterators to conflict. Given this,
+   * computing conflict graph edges can be done significantly more cheaply
+   * than with ${\cal O}(N^2)$ operations.
+   *
+   * In any case, the result of the function will be so that iterators whose
+   * conflict indicator sets have overlap will not be assigned to the same
+   * color.
+   *
+   * @note The algorithm used in this function is described in a paper by
+   * Turcksin, Kronbichler and Bangerth, see
+   * @ref workstream_paper.
+   *
+   * @param[in] begin The first element of a range of iterators for which a
+   * coloring is sought.
+   * @param[in] end The element past the end of the range of iterators.
+   * @param[in] get_conflict_indices A user defined function object returning
+   * a set of indicators that are descriptive of what represents a conflict.
+   * See above for a more thorough discussion.
+   * @return A set of sets of iterators (where sets are represented by
+   * std::vector for efficiency). Each element of the outermost set
+   * corresponds to the iterators pointing to objects that are in the same
+   * partition (have the same color) and consequently do not conflict. The
+   * elements of different sets may conflict.
+   *
+   * @author Martin Kronbichler, Bruno Turcksin
+   */
+  template <typename Iterator>
+  std::vector<std::vector<Iterator> >
+  make_graph_coloring(const Iterator &begin,
+                      const typename identity<Iterator>::type &end,
+                      const std_cxx11::function<std::vector<types::global_dof_index> (const typename identity<Iterator>::type &)> &get_conflict_indices)
+  {
+    Assert (begin != end, ExcMessage ("GraphColoring is not prepared to deal with empty ranges!"));
+
+    // Create the partitioning.
+    std::vector<std::vector<Iterator> >
+    partitioning = internal::create_partitioning (begin,
+                                                  end,
+                                                  get_conflict_indices);
+
+    // Color the iterators within each partition.
+    // Run the coloring algorithm on each zone in parallel
+    const unsigned int partitioning_size(partitioning.size());
+    std::vector<std::vector<std::vector<Iterator> > >
+    partition_coloring(partitioning_size);
+
+    Threads::TaskGroup<> tasks;
+    for (unsigned int i=0; i<partitioning_size; ++i)
+      tasks += Threads::new_task (&internal::make_dsatur_coloring<Iterator>,
+                                  partitioning[i],
+                                  get_conflict_indices,
+                                  partition_coloring[i]);
+    tasks.join_all();
+
+    // Gather the colors together.
+    return internal::gather_colors(partition_coloring);
+  }
+
+} // End graph_coloring namespace
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+//----------------------------   graph_coloring.h     ---------------------------
+// end of #ifndef dealii__graph_coloring_h
+#endif
+//----------------------------   graph_coloring.h     ---------------------------
diff --git a/include/deal.II/base/index_set.h b/include/deal.II/base/index_set.h
new file mode 100644
index 0000000..99e1917
--- /dev/null
+++ b/include/deal.II/base/index_set.h
@@ -0,0 +1,1610 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__index_set_h
+#define dealii__index_set_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/exceptions.h>
+#include <boost/serialization/vector.hpp>
+#include <vector>
+#include <algorithm>
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  include <Epetra_Map.h>
+#endif
+
+#if defined(DEAL_II_WITH_MPI) || defined(DEAL_II_WITH_PETSC)
+#include <mpi.h>
+#else
+typedef int MPI_Comm;
+#  ifndef MPI_COMM_WORLD
+#    define MPI_COMM_WORLD 0
+#  endif
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A class that represents a subset of indices among a larger set. For
+ * example, it can be used to denote the set of degrees of freedom within the
+ * range $[0,\text{dof\_handler.n\_dofs})$ that belongs to a particular
+ * subdomain, or those among all degrees of freedom that are stored on a
+ * particular processor in a distributed parallel computation.
+ *
+ * This class can represent a collection of half-open ranges of indices as
+ * well as individual elements. For practical purposes it also stores the
+ * overall range these indices can assume. In other words, you need to specify
+ * the size of the index space $[0,\text{size})$ of which objects of this
+ * class are a subset.
+ *
+ * There are two ways to iterate over the IndexSets: First, begin() and end()
+ * allow iteration over individual indices in the set. Second,
+ * begin_interval() and end_interval() allow iteration over the half-open
+ * ranges as described above.
+ *
+ * The data structures used in this class along with a rationale can be found
+ * in the
+ * @ref distributed_paper "Distributed Computing paper".
+ *
+ * @author Wolfgang Bangerth, 2009
+ */
+class IndexSet
+{
+public:
+  // forward declarations:
+  class ElementIterator;
+  class IntervalIterator;
+
+  /**
+   * @p size_type is the type used for storing the size and the individual
+   * entries in the IndexSet.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * One can see an IndexSet as a container of size size(), where the elements
+   * of the containers are bool values that are either false or true,
+   * depending on whether a particular index is an element of the IndexSet or
+   * not. In other words, an IndexSet is a bit like a vector in which the
+   * elements we store are booleans. In this view, the correct local typedef
+   * indicating the type of the elements of the vector would then be @p bool.
+   *
+   * On the other hand, @p bool has the disadvantage that it is not a
+   * numerical type that, for example, allows multiplication with a @p double.
+   * In other words, one can not easily use a vector of booleans in a place
+   * where other vectors are allowed. Consequently, we declare the type of the
+   * elements of such a vector as a signed integer. This uses the fact that in
+   * the C++ language, booleans are implicitly convertible to integers. In
+   * other words, declaring the type of the elements of the vector as a signed
+   * integer is only a small lie, but it is a useful one.
+   */
+  typedef signed int value_type;
+
+
+  /**
+   * Default constructor.
+   */
+  IndexSet ();
+
+  /**
+   * Constructor that also sets the overall size of the index range.
+   */
+  explicit IndexSet (const size_type size);
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * Constructor from a trilinos Epetra_Map.
+   */
+  explicit IndexSet(const Epetra_Map &map);
+#endif
+
+  /**
+   * Remove all indices from this index set. The index set retains its size,
+   * however.
+   */
+  void clear ();
+
+  /**
+   * Set the maximal size of the indices upon which this object operates.
+   *
+   * This function can only be called if the index set does not yet contain
+   * any elements.  This can be achieved by calling clear(), for example.
+   */
+  void set_size (const size_type size);
+
+  /**
+   * Return the size of the index space of which this index set is a subset
+   * of.
+   *
+   * Note that the result is not equal to the number of indices within this
+   * set. The latter information is returned by n_elements().
+   */
+  size_type size () const;
+
+  /**
+   * Add the half-open range $[\text{begin},\text{end})$ to the set of indices
+   * represented by this class.
+   * @param[in] begin The first element of the range to be added.
+   * @param[in] end The past-the-end element of the range to be added.
+   */
+  void add_range (const size_type begin,
+                  const size_type end);
+
+  /**
+   * Add an individual index to the set of indices.
+   */
+  void add_index (const size_type index);
+
+  /**
+   * Add a whole set of indices described by dereferencing every element of
+   * the iterator range <code>[begin,end)</code>.
+   *
+   * @param[in] begin Iterator to the first element of range of indices to be
+   * added
+   * @param[in] end The past-the-end iterator for the range of elements to be
+   * added. @pre The condition <code>begin@<=end</code> needs to be satisfied.
+   */
+  template <typename ForwardIterator>
+  void add_indices (const ForwardIterator &begin,
+                    const ForwardIterator &end);
+
+  /**
+   * Add the given IndexSet @p other to the current one, constructing the
+   * union of *this and @p other.
+   *
+   * If the @p offset argument is nonzero, then every index in @p other is
+   * shifted by @p offset before being added to the current index set. This
+   * allows to construct, for example, one index set from several others that
+   * are supposed to represent index sets corresponding to different ranges
+   * (e.g., when constructing the set of nonzero entries of a block vector
+   * from the sets of nonzero elements of the individual blocks of a vector).
+   *
+   * This function will generate an exception if any of the (possibly shifted)
+   * indices of the @p other index set lie outside the range
+   * <code>[0,size())</code> represented by the current object.
+   */
+  void add_indices(const IndexSet &other,
+                   const unsigned int offset = 0);
+
+  /**
+   * Return whether the specified index is an element of the index set.
+   */
+  bool is_element (const size_type index) const;
+
+  /**
+   * Return whether the index set stored by this object defines a contiguous
+   * range. This is true also if no indices are stored at all.
+   */
+  bool is_contiguous () const;
+
+  /**
+   * Return the number of elements stored in this index set.
+   */
+  size_type n_elements () const;
+
+  /**
+   * Return the global index of the local index with number @p local_index
+   * stored in this index set. @p local_index obviously needs to be less than
+   * n_elements().
+   */
+  size_type nth_index_in_set (const unsigned int local_index) const;
+
+  /**
+   * Return the how-manyth element of this set (counted in ascending order) @p
+   * global_index is. @p global_index needs to be less than the size(). This
+   * function throws an exception if the index @p global_index is not actually
+   * a member of this index set, i.e. if is_element(global_index) is false.
+   */
+  size_type index_within_set (const size_type global_index) const;
+
+  /**
+   * Each index set can be represented as the union of a number of contiguous
+   * intervals of indices, where if necessary intervals may only consist of
+   * individual elements to represent isolated members of the index set.
+   *
+   * This function returns the minimal number of such intervals that are
+   * needed to represent the index set under consideration.
+   */
+  unsigned int n_intervals () const;
+
+  /**
+   * This function returns the local index of the beginning of the largest
+   * range.
+   */
+  unsigned int largest_range_starting_index() const;
+
+  /**
+   * Compress the internal representation by merging individual elements with
+   * contiguous ranges, etc. This function does not have any external effect.
+   */
+  void compress () const;
+
+  /**
+   * Comparison for equality of index sets. This operation is only allowed if
+   * the size of the two sets is the same (though of course they do not have
+   * to have the same number of indices).
+   */
+  bool operator == (const IndexSet &is) const;
+
+  /**
+   * Comparison for inequality of index sets. This operation is only allowed
+   * if the size of the two sets is the same (though of course they do not
+   * have to have the same number of indices).
+   */
+  bool operator != (const IndexSet &is) const;
+
+  /**
+   * Return the intersection of the current index set and the argument given,
+   * i.e. a set of indices that are elements of both index sets. The two index
+   * sets must have the same size (though of course they do not have to have
+   * the same number of indices).
+   */
+  IndexSet operator & (const IndexSet &is) const;
+
+  /**
+   * This command takes an interval <tt>[begin, end)</tt> and returns the
+   * intersection of the current index set with the interval, shifted to the
+   * range <tt>[0, end-begin)</tt>.
+   *
+   * In other words, the result of this operation is the intersection of the
+   * set represented by the current object and the interval <tt>[begin,
+   * end)</tt>, as seen <i>within the interval <tt>[begin, end)</tt></i> by
+   * shifting the result of the intersection operation to the left by
+   * <tt>begin</tt>. This corresponds to the notion of a <i>view</i>: The
+   * interval <tt>[begin, end)</tt> is a <i>window</i> through which we see
+   * the set represented by the current object.
+   */
+  IndexSet get_view (const size_type begin,
+                     const size_type end) const;
+
+  /**
+   * Removes all elements contained in @p other from this set. In other words,
+   * if $x$ is the current object and $o$ the argument, then we compute $x
+   * \leftarrow x \backslash o$.
+   */
+  void subtract_set (const IndexSet &other);
+
+  /**
+   * Fills the given vector with all indices contained in this IndexSet.
+   */
+  void fill_index_vector(std::vector<size_type> &indices) const;
+
+  /**
+   * Fill the given vector with either zero or one elements, providing a
+   * binary representation of this index set. The given vector is assumed to
+   * already have the correct size.
+   *
+   * The given argument is filled with integer values zero and one, using
+   * <code>vector.operator[]</code>. Thus, any object that has such an
+   * operator can be used as long as it allows conversion of integers zero and
+   * one to elements of the vector. Specifically, this is the case for classes
+   * Vector, BlockVector, but also std::vector@<bool@>, std::vector@<int@>,
+   * and std::vector@<double@>.
+   */
+  template <typename VectorType>
+  void fill_binary_vector (VectorType &vector) const;
+
+  /**
+   * Outputs a text representation of this IndexSet to the given stream. Used
+   * for testing.
+   */
+  template <class StreamType>
+  void print(StreamType &out) const;
+
+  /**
+   * Writes the IndexSet into a text based file format, that can be read in
+   * again using the read() function.
+   */
+  void write(std::ostream &out) const;
+
+  /**
+   * Constructs the IndexSet from a text based representation given by the
+   * stream @p in written by the write() function.
+   */
+  void read(std::istream &in);
+
+  /**
+   * Writes the IndexSet into a binary, compact representation, that can be
+   * read in again using the block_read() function.
+   */
+  void block_write(std::ostream &out) const;
+
+  /**
+   * Constructs the IndexSet from a binary representation given by the stream
+   * @p in written by the write_block() function.
+   */
+  void block_read(std::istream &in);
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * Given an MPI communicator, create a Trilinos map object that represents a
+   * distribution of vector elements or matrix rows in which we will locally
+   * store those elements or rows for which we store the index in the current
+   * index set, and all the other elements/rows elsewhere on one of the other
+   * MPI processes.
+   *
+   * The last argument only plays a role if the communicator is a parallel
+   * one, distributing computations across multiple processors. In that case,
+   * if the last argument is false, then it is assumed that the index sets
+   * this function is called with on all processors are mutually exclusive but
+   * together enumerate each index exactly once. In other words, if you call
+   * this function on two processors, then the index sets this function is
+   * called with must together have all possible indices from zero to
+   * size()-1, and no index must appear in both index sets. This corresponds,
+   * for example, to the case where we want to split the elements of vectors
+   * into unique subsets to be stored on different processors -- no element
+   * should be owned by more than one processor, but each element must be
+   * owned by one.
+   *
+   * On the other hand, if the second argument is true, then the index sets
+   * can be overlapping, and they also do not need to span the whole index
+   * set. This is a useful operation if we want to create vectors that not
+   * only contain the locally owned indices, but for example also the elements
+   * that correspond to degrees of freedom located on ghost cells. Another
+   * application of this method is to select a subset of the elements of a
+   * vector, e.g. for extracting only certain solution components.
+   */
+  Epetra_Map make_trilinos_map (const MPI_Comm &communicator = MPI_COMM_WORLD,
+                                const bool      overlapping  = false) const;
+#endif
+
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  DeclException1 (ExcIndexNotPresent, size_type,
+                  << "The global index " << arg1
+                  << " is not an element of this set.");
+
+  /**
+   * Write or read the data of this object to or from a stream for the purpose
+   * of serialization
+   */
+  template <class Archive>
+  void serialize (Archive &ar, const unsigned int version);
+
+
+  /**
+   * @name Iterators
+   * @{
+   */
+
+  /**
+   * Dereferencing an IntervalIterator will return a reference to an object of
+   * this type. It allows access to a contiguous interval $[a,b[$ (also called
+   * a range) of the IndexSet being iterated over.
+   */
+  class IntervalAccessor
+  {
+  public:
+    /**
+     * Construct a valid accessor given an IndexSet and the index @p range_idx
+     * of the range to point to.
+     */
+    IntervalAccessor(const IndexSet *idxset, const size_type range_idx);
+
+    /**
+     * Construct an invalid accessor for the IndexSet.
+     */
+    explicit IntervalAccessor(const IndexSet *idxset);
+
+    /**
+     * Number of elements in this interval.
+     */
+    size_type n_elements() const;
+
+    /**
+     * If true, we are pointing at a valid interval in the IndexSet.
+     */
+    bool is_valid() const;
+
+    /**
+     * Return an iterator pointing at the first index in this interval.
+     */
+    ElementIterator begin() const;
+
+    /**
+     * Return an iterator pointing directly after the last index in this
+     * interval.
+     */
+    ElementIterator end() const;
+
+    /**
+     * Return the index of the last index in this interval.
+     */
+    size_type last() const;
+
+  private:
+    /**
+     * Private copy constructor.
+     */
+    IntervalAccessor(const IntervalAccessor &other);
+    /**
+     * Private copy operator.
+     */
+    IntervalAccessor &operator = (const IntervalAccessor &other);
+
+    /**
+     * Test for equality, used by IntervalIterator.
+     */
+    bool operator == (const IntervalAccessor &other) const;
+    /**
+     * Smaller-than operator, used by IntervalIterator.
+     */
+    bool operator < (const IntervalAccessor &other) const;
+    /**
+     * Advance this accessor to point to the next interval in the @p
+     * index_set.
+     */
+    void advance ();
+    /**
+     * Reference to the IndexSet.
+     */
+    const IndexSet *index_set;
+
+    /**
+     * Index into index_set.ranges[]. Set to numbers::invalid_dof_index if
+     * invalid or the end iterator.
+     */
+    size_type range_idx;
+
+    friend class IntervalIterator;
+  };
+
+  /**
+   * Class that represents an iterator pointing to a contiguous interval
+   * $[a,b[$ as returned by IndexSet::begin_interval().
+   */
+  class IntervalIterator
+  {
+  public:
+    /**
+     * Construct a valid iterator pointing to the interval with index @p
+     * range_idx.
+     */
+    IntervalIterator(const IndexSet *idxset, const size_type range_idx);
+
+    /**
+     * Construct an invalid iterator (used as end()).
+     */
+    explicit IntervalIterator(const IndexSet *idxset);
+
+    /**
+     * Construct an empty iterator.
+     */
+    IntervalIterator();
+
+    /**
+     * Copy constructor from @p other iterator.
+     */
+    IntervalIterator(const IntervalIterator &other);
+
+    /**
+     * Assignment of another iterator.
+     */
+    IntervalIterator &operator = (const IntervalIterator &other);
+
+    /**
+     * Prefix increment.
+     */
+    IntervalIterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    IntervalIterator operator++ (int);
+
+    /**
+     * Dereferencing operator, returns an IntervalAccessor.
+     */
+    const IntervalAccessor &operator* () const;
+
+    /**
+     * Dereferencing operator, returns a pointer to an IntervalAccessor.
+     */
+    const IntervalAccessor *operator-> () const;
+
+    /**
+     * Comparison.
+     */
+    bool operator == (const IntervalIterator &) const;
+
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const IntervalIterator &) const;
+
+    /**
+     * Comparison operator.
+     */
+    bool operator < (const IntervalIterator &) const;
+
+    /**
+     * Return the distance between the current iterator and the argument. The
+     * distance is given by how many times one has to apply operator++ to the
+     * current iterator to get the argument (for a positive return value), or
+     * operator-- (for a negative return value).
+     */
+    int operator - (const IntervalIterator &p) const;
+
+  private:
+    /**
+     * Accessor that contains what IndexSet and interval we are pointing at.
+     */
+    IntervalAccessor accessor;
+  };
+
+  /**
+   * Class that represents an iterator pointing to a single element in the
+   * IndexSet as returned by IndexSet::begin().
+   */
+  class ElementIterator
+  {
+  public:
+    /**
+     * Construct an iterator pointing to the global index @p index in the
+     * interval @p range_idx
+     */
+    ElementIterator(const IndexSet *idxset, const size_type range_idx, const size_type index);
+
+    /**
+     * Construct an iterator pointing to the end of the IndexSet.
+     */
+    explicit ElementIterator(const IndexSet *idxset);
+
+    /**
+     * Dereferencing operator. The returned value is the index of the element
+     * inside the IndexSet.
+     */
+    size_type operator* () const;
+
+    /**
+     * Does this iterator point to an existing element?
+     */
+    bool is_valid () const;
+
+    /**
+     * Prefix increment.
+     */
+    ElementIterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    ElementIterator operator++ (int);
+
+    /**
+     * Comparison.
+     */
+    bool operator == (const ElementIterator &) const;
+
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const ElementIterator &) const;
+
+    /**
+     * Comparison operator.
+     */
+    bool operator < (const ElementIterator &) const;
+
+    /**
+     * Return the distance between the current iterator and the argument. In
+     * the expression <code>it_left-it_right</code> the distance is given by
+     * how many times one has to apply operator++ to the right operand @p
+     * it_right to get the left operand @p it_left (for a positive return
+     * value), or to @p it_left to get the @p it_right (for a negative return
+     * value).
+     */
+    std::ptrdiff_t operator - (const ElementIterator &p) const;
+
+  private:
+    /**
+     * Advance iterator by one.
+     */
+    void advance ();
+
+    /**
+     * The parent IndexSet.
+     */
+    const IndexSet *index_set;
+    /**
+     * Index into index_set.ranges.
+     */
+    size_type range_idx;
+    /**
+     * The global index this iterator is pointing at.
+     */
+    size_type idx;
+  };
+
+  /**
+   * Return an iterator that points at the first index that is contained in
+   * this IndexSet.
+   */
+  ElementIterator begin() const;
+
+  /**
+   * Return an iterator that points one after the last index that is contained
+   * in this IndexSet.
+   */
+  ElementIterator end() const;
+
+  /**
+   * Return an Iterator that points at the first interval of this IndexSet.
+   */
+  IntervalIterator begin_intervals() const;
+
+  /**
+   * Return an Iterator that points one after the last interval of this
+   * IndexSet.
+   */
+  IntervalIterator end_intervals() const;
+
+  /**
+   * @}
+   */
+
+private:
+  /**
+   * A type that denotes the half open index range <code>[begin,end)</code>.
+   *
+   * The nth_index_in_set denotes the how many-th index within this IndexSet
+   * the first element of the current range is. This information is only
+   * accurate if IndexSet::compress() has been called after the last
+   * insertion.
+   */
+  struct Range
+  {
+    size_type begin;
+    size_type end;
+
+    size_type nth_index_in_set;
+
+    /**
+     * Default constructor. Since there is no useful choice for a default
+     * constructed interval, this constructor simply creates something that
+     * resembles an invalid range. We need this constructor for serialization
+     * purposes, but the invalid range should be filled with something read
+     * from the archive before it is used, so we should hopefully never get to
+     * see an invalid range in the wild.
+     */
+    Range ();
+
+    /**
+     * Constructor. Create a half-open interval with the given indices.
+     *
+     * @param i1 Left end point of the interval.
+     * @param i2 First index greater than the last index of the indicated
+     * range.
+     */
+    Range (const size_type i1,
+           const size_type i2);
+
+    friend
+    inline bool operator< (const Range &range_1,
+                           const Range &range_2)
+    {
+      return ((range_1.begin < range_2.begin)
+              ||
+              ((range_1.begin == range_2.begin)
+               &&
+               (range_1.end < range_2.end)));
+    }
+
+    static bool end_compare(const IndexSet::Range &x, const IndexSet::Range &y)
+    {
+      return x.end < y.end;
+    }
+
+    static bool nth_index_compare (const IndexSet::Range &x,
+                                   const IndexSet::Range &y)
+    {
+      return (x.nth_index_in_set+(x.end-x.begin) <
+              y.nth_index_in_set+(y.end-y.begin));
+    }
+
+    friend
+    inline bool operator== (const Range &range_1,
+                            const Range &range_2)
+    {
+      return ((range_1.begin == range_2.begin)
+              &&
+              (range_1.end == range_2.end));
+    }
+
+    std::size_t memory_consumption () const
+    {
+      return sizeof(Range);
+    }
+
+    /**
+     * Write or read the data of this object to or from a stream for the
+     * purpose of serialization
+     */
+    template <class Archive>
+    void serialize (Archive &ar, const unsigned int version);
+  };
+
+  /**
+   * A set of contiguous ranges of indices that make up (part of) this index
+   * set. This variable is always kept sorted.
+   *
+   * The variable is marked "mutable" so that it can be changed by compress(),
+   * though this of course doesn't change anything about the external
+   * representation of this index set.
+   */
+  mutable std::vector<Range> ranges;
+
+  /**
+   * True if compress() has been called after the last change in the set of
+   * indices.
+   *
+   * The variable is marked "mutable" so that it can be changed by compress(),
+   * though this of course doesn't change anything about the external
+   * representation of this index set.
+   */
+  mutable bool is_compressed;
+
+  /**
+   * The overall size of the index range. Elements of this index set have to
+   * have a smaller number than this value.
+   */
+  size_type index_space_size;
+
+  /**
+   * This integer caches the index of the largest range in @p ranges. This
+   * gives <tt>O(1)</tt> access to the range with most elements, while general
+   * access costs <tt>O(log(n_ranges))</tt>. The largest range is needed for
+   * the methods @p is_element(), @p index_within_set(), @p nth_index_in_set.
+   * In many applications, the largest range contains most elements (the
+   * locally owned range), whereas there are only a few other elements
+   * (ghosts).
+   */
+  mutable size_type largest_range;
+
+  /**
+   * Actually perform the compress() operation.
+   */
+  void do_compress() const;
+};
+
+
+/**
+ * Create and return an index set of size $N$ that contains every single index
+ * within this range. In essence, this function returns an index set created
+ * by
+ * @code
+ *  IndexSet is (N);
+ *  is.add_range(0, N);
+ * @endcode
+ * This function exists so that one can create and initialize index sets that
+ * are complete in one step, or so one can write code like
+ * @code
+ *   if (my_index_set == complete_index_set(my_index_set.size())
+ *     ...
+ * @endcode
+ *
+ * @relates IndexSet
+ */
+inline
+IndexSet complete_index_set (const unsigned int N)
+{
+  IndexSet is (N);
+  is.add_range(0, N);
+  return is;
+}
+
+/* ------------------ inline functions ------------------ */
+
+
+/* IntervalAccessor */
+
+inline
+IndexSet::IntervalAccessor::IntervalAccessor(const IndexSet *idxset, const IndexSet::size_type range_idx)
+  : index_set(idxset), range_idx(range_idx)
+{
+  Assert(range_idx < idxset->n_intervals(), ExcInternalError("Invalid range index"));
+}
+
+inline
+IndexSet::IntervalAccessor::IntervalAccessor(const IndexSet *idxset)
+  : index_set(idxset), range_idx(numbers::invalid_dof_index)
+{}
+
+inline
+IndexSet::size_type IndexSet::IntervalAccessor::n_elements() const
+{
+  Assert(is_valid(), ExcMessage("invalid iterator"));
+  return index_set->ranges[range_idx].end - index_set->ranges[range_idx].begin;
+}
+
+inline
+bool IndexSet::IntervalAccessor::is_valid() const
+{
+  return index_set != NULL && range_idx < index_set->n_intervals();
+}
+
+inline
+IndexSet::ElementIterator IndexSet::IntervalAccessor::begin() const
+{
+  Assert(is_valid(), ExcMessage("invalid iterator"));
+  return IndexSet::ElementIterator(index_set, range_idx, index_set->ranges[range_idx].begin);
+}
+
+inline
+IndexSet::ElementIterator IndexSet::IntervalAccessor::end() const
+{
+  Assert(is_valid(), ExcMessage("invalid iterator"));
+
+  // point to first index in next interval unless we are the last interval.
+  if (range_idx < index_set->ranges.size()-1)
+    return IndexSet::ElementIterator(index_set, range_idx+1, index_set->ranges[range_idx+1].begin);
+  else
+    return index_set->end();
+}
+
+inline
+IndexSet::size_type
+IndexSet::IntervalAccessor::last() const
+{
+  Assert(is_valid(), ExcMessage("invalid iterator"));
+
+  return index_set->ranges[range_idx].end-1;
+}
+
+inline
+IndexSet::IntervalAccessor::IntervalAccessor(const IndexSet::IntervalAccessor &other)
+  : index_set (other.index_set), range_idx(other.range_idx)
+{
+  Assert( range_idx == numbers::invalid_dof_index || is_valid(),  ExcMessage("invalid iterator"));
+}
+
+inline
+IndexSet::IntervalAccessor &
+IndexSet::IntervalAccessor::operator = (const IndexSet::IntervalAccessor &other)
+{
+  index_set = other.index_set;
+  range_idx = other.range_idx;
+  Assert( range_idx == numbers::invalid_dof_index || is_valid(),  ExcMessage("invalid iterator"));
+  return *this;
+}
+
+inline
+bool IndexSet::IntervalAccessor::operator == (const IndexSet::IntervalAccessor &other) const
+{
+  Assert (index_set == other.index_set, ExcMessage("Can not compare accessors pointing to different IndexSets"));
+  return range_idx == other.range_idx;
+}
+
+inline
+bool IndexSet::IntervalAccessor::operator < (const IndexSet::IntervalAccessor &other) const
+{
+  Assert (index_set == other.index_set, ExcMessage("Can not compare accessors pointing to different IndexSets"));
+  return range_idx < other.range_idx;
+}
+
+inline
+void IndexSet::IntervalAccessor::advance ()
+{
+  Assert(is_valid(), ExcMessage("Impossible to advance an IndexSet::IntervalIterator that is invalid"));
+  ++range_idx;
+
+  // set ourselves to invalid if we walk off the end
+  if (range_idx>=index_set->ranges.size())
+    range_idx = numbers::invalid_dof_index;
+}
+
+/* IntervalIterator */
+
+inline
+IndexSet::IntervalIterator::IntervalIterator(const IndexSet *idxset, const IndexSet::size_type range_idx)
+  : accessor(idxset, range_idx)
+{}
+
+inline
+IndexSet::IntervalIterator::IntervalIterator()
+  : accessor(NULL)
+{}
+
+inline
+IndexSet::IntervalIterator::IntervalIterator(const IndexSet *idxset)
+  : accessor(idxset)
+{}
+
+inline
+IndexSet::IntervalIterator::IntervalIterator(const IndexSet::IntervalIterator &other)
+  : accessor(other.accessor)
+{}
+
+inline
+IndexSet::IntervalIterator &
+IndexSet::IntervalIterator::operator = (const IntervalIterator &other)
+{
+  accessor = other.accessor;
+  return *this;
+}
+
+
+inline
+IndexSet::IntervalIterator &
+IndexSet::IntervalIterator::operator++ ()
+{
+  accessor.advance();
+  return *this;
+}
+
+inline
+IndexSet::IntervalIterator
+IndexSet::IntervalIterator::operator++ (int)
+{
+  const IndexSet::IntervalIterator iter = *this;
+  accessor.advance ();
+  return iter;
+}
+
+inline
+const IndexSet::IntervalAccessor &
+IndexSet::IntervalIterator::operator* () const
+{
+  return accessor;
+}
+
+inline
+const IndexSet::IntervalAccessor *
+IndexSet::IntervalIterator::operator-> () const
+{
+  return &accessor;
+}
+
+inline
+bool IndexSet::IntervalIterator::operator == (const IndexSet::IntervalIterator &other) const
+{
+  return accessor == other.accessor;
+}
+
+inline
+bool IndexSet::IntervalIterator::operator != (const IndexSet::IntervalIterator &other) const
+{
+  return !(*this == other);
+}
+
+inline
+bool IndexSet::IntervalIterator::operator < (const IndexSet::IntervalIterator &other) const
+{
+  return accessor < other.accessor;
+}
+
+inline
+int IndexSet::IntervalIterator::operator - (const IndexSet::IntervalIterator &other) const
+{
+  Assert (accessor.index_set == other.accessor.index_set, ExcMessage("Can not compare iterators belonging to different IndexSets"));
+
+  const size_type lhs = (accessor.range_idx == numbers::invalid_dof_index) ? accessor.index_set->ranges.size() : accessor.range_idx;
+  const size_type rhs = (other.accessor.range_idx == numbers::invalid_dof_index) ? accessor.index_set->ranges.size() : other.accessor.range_idx;
+
+  if (lhs > rhs)
+    return static_cast<int>(lhs - rhs);
+  else
+    return -static_cast<int>(rhs - lhs);
+}
+
+
+/* ElementIterator */
+
+inline
+bool IndexSet::ElementIterator::is_valid() const
+{
+  Assert(
+    (range_idx == numbers::invalid_dof_index && idx == numbers::invalid_dof_index)
+    ||
+    (range_idx < index_set->ranges.size() && idx<index_set->ranges[range_idx].end)
+    , ExcInternalError("Invalid ElementIterator state."));
+
+  return range_idx < index_set->ranges.size() && idx<index_set->ranges[range_idx].end;
+}
+
+inline
+IndexSet::ElementIterator::ElementIterator(const IndexSet *idxset, const IndexSet::size_type range_idx, const IndexSet::size_type index)
+  : index_set(idxset), range_idx(range_idx), idx(index)
+{
+  Assert(range_idx < index_set->ranges.size(),
+         ExcMessage("Invalid range index for IndexSet::ElementIterator constructor."));
+  Assert(idx >= index_set->ranges[range_idx].begin
+         &&
+         idx < index_set->ranges[range_idx].end,
+         ExcInternalError("Invalid index argument for IndexSet::ElementIterator constructor."));
+}
+
+inline
+IndexSet::ElementIterator::ElementIterator(const IndexSet *idxset)
+  : index_set(idxset), range_idx(numbers::invalid_dof_index), idx(numbers::invalid_dof_index)
+{}
+
+inline
+IndexSet::size_type
+IndexSet::ElementIterator::operator* () const
+{
+  Assert(is_valid(), ExcMessage("Impossible to dereference an IndexSet::ElementIterator that is invalid"));
+  return idx;
+}
+
+inline
+bool IndexSet::ElementIterator::operator == (const IndexSet::ElementIterator &other) const
+{
+  Assert (index_set == other.index_set, ExcMessage("Can not compare iterators belonging to different IndexSets"));
+  return range_idx == other.range_idx && idx==other.idx;
+}
+
+inline
+void IndexSet::ElementIterator::advance ()
+{
+  Assert(is_valid(), ExcMessage("Impossible to advance an IndexSet::ElementIterator that is invalid"));
+  if (idx < index_set->ranges[range_idx].end)
+    ++idx;
+  // end of this range?
+  if (idx == index_set->ranges[range_idx].end)
+    {
+      // point to first element in next interval if possible
+      if (range_idx < index_set->ranges.size()-1)
+        {
+          ++range_idx;
+          idx = index_set->ranges[range_idx].begin;
+        }
+      else
+        {
+          // we just fell off the end, set to invalid:
+          range_idx = numbers::invalid_dof_index;
+          idx = numbers::invalid_dof_index;
+        }
+    }
+}
+
+inline
+IndexSet::ElementIterator &
+IndexSet::ElementIterator::operator++ ()
+{
+  advance();
+  return *this;
+}
+
+inline
+IndexSet::ElementIterator
+IndexSet::ElementIterator::operator++ (int)
+{
+  IndexSet::ElementIterator it = *this;
+  advance();
+  return it;
+}
+
+inline
+bool IndexSet::ElementIterator::operator != (const IndexSet::ElementIterator &other) const
+{
+  return !(*this == other);
+}
+
+inline
+bool IndexSet::ElementIterator::operator < (const IndexSet::ElementIterator &other) const
+{
+  Assert (index_set == other.index_set, ExcMessage("Can not compare iterators belonging to different IndexSets"));
+  return range_idx < other.range_idx || (range_idx == other.range_idx && idx<other.idx);
+}
+
+inline
+std::ptrdiff_t IndexSet::ElementIterator::operator - (const IndexSet::ElementIterator &other) const
+{
+  Assert (index_set == other.index_set, ExcMessage("Can not compare iterators belonging to different IndexSets"));
+  if (*this == other)
+    return 0;
+  if (!(*this < other))
+    return -(other-*this);
+
+  // only other can be equal to end() because of the checks above.
+  Assert (is_valid(), ExcInternalError());
+
+  // Note: we now compute how far advance *this in "*this < other" to get other, so we need to return -c at the end.
+
+  // first finish the current range:
+  std::ptrdiff_t c = index_set->ranges[range_idx].end-idx;
+
+  // now walk in steps of ranges (need to start one behind our current one):
+  for (size_type range=range_idx+1; range<index_set->ranges.size() && range<=other.range_idx; ++range)
+    c += index_set->ranges[range].end-index_set->ranges[range].begin;
+
+  Assert(other.range_idx < index_set->ranges.size() || other.range_idx == numbers::invalid_dof_index,
+         ExcMessage("Inconsistent iterator state. Did you invalidate iterators by modifying the IndexSet?"));
+
+  // We might have walked too far because we went until the end of other.range_idx, so walk backwards to other.idx:
+  if (other.range_idx != numbers::invalid_dof_index)
+    c -= index_set->ranges[other.range_idx].end - other.idx;
+
+  return -c;
+}
+
+
+/* Range */
+
+inline
+IndexSet::Range::Range ()
+  :
+  begin(numbers::invalid_dof_index),
+  end(numbers::invalid_dof_index)
+{}
+
+
+inline
+IndexSet::Range::Range (const size_type i1,
+                        const size_type i2)
+  :
+  begin(i1),
+  end(i2)
+{}
+
+
+/* IndexSet itself */
+
+inline
+IndexSet::ElementIterator IndexSet::begin() const
+{
+  compress();
+  if (ranges.size()>0)
+    return IndexSet::ElementIterator(this, 0, ranges[0].begin);
+  else
+    return end();
+}
+
+inline
+IndexSet::ElementIterator IndexSet::end() const
+{
+  compress();
+  return IndexSet::ElementIterator(this);
+}
+
+
+inline
+IndexSet::IntervalIterator IndexSet::begin_intervals() const
+{
+  compress();
+  if (ranges.size()>0)
+    return IndexSet::IntervalIterator(this, 0);
+  else
+    return end_intervals();
+}
+
+inline
+IndexSet::IntervalIterator IndexSet::end_intervals() const
+{
+  compress();
+  return IndexSet::IntervalIterator(this);
+}
+
+
+
+inline
+IndexSet::IndexSet ()
+  :
+  is_compressed (true),
+  index_space_size (0),
+  largest_range (numbers::invalid_unsigned_int)
+{}
+
+
+
+inline
+IndexSet::IndexSet (const size_type size)
+  :
+  is_compressed (true),
+  index_space_size (size),
+  largest_range (numbers::invalid_unsigned_int)
+{}
+
+
+
+inline
+void
+IndexSet::clear ()
+{
+  ranges.clear ();
+  largest_range = 0;
+  is_compressed = true;
+}
+
+
+
+inline
+void
+IndexSet::set_size (const size_type sz)
+{
+  Assert (ranges.empty(),
+          ExcMessage ("This function can only be called if the current "
+                      "object does not yet contain any elements."));
+  index_space_size = sz;
+  is_compressed = true;
+}
+
+
+
+inline
+IndexSet::size_type
+IndexSet::size () const
+{
+  return index_space_size;
+}
+
+
+
+inline
+void
+IndexSet::compress () const
+{
+  if (is_compressed == true)
+    return;
+
+  do_compress();
+}
+
+
+
+inline
+void
+IndexSet::add_index (const size_type index)
+{
+  Assert (index < index_space_size,
+          ExcIndexRangeType<size_type> (index, 0, index_space_size));
+
+  const Range new_range(index, index+1);
+  if (ranges.size() == 0 || index > ranges.back().end)
+    ranges.push_back(new_range);
+  else if (index == ranges.back().end)
+    ranges.back().end++;
+  else
+    ranges.insert (Utilities::lower_bound (ranges.begin(),
+                                           ranges.end(),
+                                           new_range),
+                   new_range);
+  is_compressed = false;
+}
+
+
+
+template <typename ForwardIterator>
+inline
+void
+IndexSet::add_indices (const ForwardIterator &begin,
+                       const ForwardIterator &end)
+{
+  // insert each element of the range. if some of them happen to be
+  // consecutive, merge them to a range
+  for (ForwardIterator p=begin; p!=end;)
+    {
+      const size_type begin_index = *p;
+      size_type       end_index   = begin_index + 1;
+      ForwardIterator q = p;
+      ++q;
+      while ((q != end) && (*q == end_index))
+        {
+          ++end_index;
+          ++q;
+        }
+
+      add_range (begin_index, end_index);
+      p = q;
+    }
+}
+
+
+
+inline
+bool
+IndexSet::is_element (const size_type index) const
+{
+  if (ranges.empty() == false)
+    {
+      compress ();
+
+      // fast check whether the index is in the largest range
+      Assert (largest_range < ranges.size(), ExcInternalError());
+      if (index >= ranges[largest_range].begin &&
+          index < ranges[largest_range].end)
+        return true;
+
+      // get the element after which we would have to insert a range that
+      // consists of all elements from this element to the end of the index
+      // range plus one. after this call we know that if p!=end() then
+      // p->begin<=index unless there is no such range at all
+      //
+      // if the searched for element is an element of this range, then we're
+      // done. otherwise, the element can't be in one of the following ranges
+      // because otherwise p would be a different iterator
+      //
+      // since we already know the position relative to the largest range (we
+      // called compress!), we can perform the binary search on ranges with
+      // lower/higher number compared to the largest range
+      std::vector<Range>::const_iterator
+      p = std::upper_bound (ranges.begin() + (index<ranges[largest_range].begin?
+                                              0 : largest_range+1),
+                            index<ranges[largest_range].begin ?
+                            ranges.begin() + largest_range:
+                            ranges.end(),
+                            Range (index, size()+1));
+
+      if (p == ranges.begin())
+        return ((index >= p->begin) && (index < p->end));
+
+      Assert ((p == ranges.end()) || (p->begin > index),
+              ExcInternalError());
+
+      // now move to that previous range
+      --p;
+      Assert (p->begin <= index, ExcInternalError());
+
+      return (p->end > index);
+    }
+
+  // didn't find this index, so it's not in the set
+  return false;
+}
+
+
+
+inline
+bool
+IndexSet::is_contiguous () const
+{
+  compress ();
+  return (ranges.size() <= 1);
+}
+
+
+
+inline
+IndexSet::size_type
+IndexSet::n_elements () const
+{
+  // make sure we have non-overlapping ranges
+  compress ();
+
+  size_type v = 0;
+  if (!ranges.empty())
+    {
+      Range &r = ranges.back();
+      v = r.nth_index_in_set + r.end - r.begin;
+    }
+
+#ifdef DEBUG
+  size_type s = 0;
+  for (std::vector<Range>::iterator range = ranges.begin();
+       range != ranges.end();
+       ++range)
+    s += (range->end - range->begin);
+  Assert(s==v, ExcInternalError());
+#endif
+
+  return v;
+}
+
+
+
+inline
+unsigned int
+IndexSet::n_intervals () const
+{
+  compress ();
+  return ranges.size();
+}
+
+
+
+inline
+unsigned int
+IndexSet::largest_range_starting_index() const
+{
+  Assert(ranges.empty()==false, ExcMessage("IndexSet cannot be empty."));
+
+  compress();
+  const std::vector<Range>::const_iterator main_range=ranges.begin()+largest_range;
+
+  return main_range->nth_index_in_set;
+}
+
+
+
+inline
+IndexSet::size_type
+IndexSet::nth_index_in_set (const unsigned int n) const
+{
+  // to make this call thread-safe, compress() must not be called through this
+  // function
+  Assert (is_compressed == true, ExcMessage ("IndexSet must be compressed."));
+  Assert (n < n_elements(), ExcIndexRangeType<size_type> (n, 0, n_elements()));
+
+  // first check whether the index is in the largest range
+  Assert (largest_range < ranges.size(), ExcInternalError());
+  std::vector<Range>::const_iterator main_range=ranges.begin()+largest_range;
+  if (n>=main_range->nth_index_in_set &&
+      n<main_range->nth_index_in_set+(main_range->end-main_range->begin))
+    return main_range->begin + (n-main_range->nth_index_in_set);
+
+  // find out which chunk the local index n belongs to by using a binary
+  // search. the comparator is based on the end of the ranges. Use the
+  // position relative to main_range to subdivide the ranges
+  Range r (n,n+1);
+  r.nth_index_in_set = n;
+  std::vector<Range>::const_iterator range_begin, range_end;
+  if (n<main_range->nth_index_in_set)
+    {
+      range_begin = ranges.begin();
+      range_end   = main_range;
+    }
+  else
+    {
+      range_begin = main_range + 1;
+      range_end   = ranges.end();
+    }
+
+  const std::vector<Range>::const_iterator
+  p = Utilities::lower_bound(range_begin, range_end, r,
+                             Range::nth_index_compare);
+
+  Assert (p != ranges.end(), ExcInternalError());
+  return p->begin + (n-p->nth_index_in_set);
+}
+
+
+
+inline
+IndexSet::size_type
+IndexSet::index_within_set (const size_type n) const
+{
+  // to make this call thread-safe, compress() must not be called through this
+  // function
+  Assert (is_compressed == true, ExcMessage ("IndexSet must be compressed."));
+  Assert (is_element(n) == true, ExcIndexNotPresent (n));
+  Assert (n < size(), ExcIndexRangeType<size_type> (n, 0, size()));
+
+  // check whether the index is in the largest range. use the result to
+  // perform a one-sided binary search afterward
+  Assert (largest_range < ranges.size(), ExcInternalError());
+  std::vector<Range>::const_iterator main_range=ranges.begin()+largest_range;
+  if (n >= main_range->begin && n < main_range->end)
+    return (n-main_range->begin) + main_range->nth_index_in_set;
+
+  Range r(n, n);
+  std::vector<Range>::const_iterator range_begin, range_end;
+  if (n<main_range->begin)
+    {
+      range_begin = ranges.begin();
+      range_end   = main_range;
+    }
+  else
+    {
+      range_begin = main_range + 1;
+      range_end   = ranges.end();
+    }
+
+  std::vector<Range>::const_iterator
+  p = Utilities::lower_bound(range_begin, range_end, r,
+                             Range::end_compare);
+
+  Assert(p!=ranges.end(), ExcInternalError());
+  Assert(p->begin<=n, ExcInternalError());
+  Assert(n<p->end, ExcInternalError());
+  return (n-p->begin) + p->nth_index_in_set;
+}
+
+
+
+inline
+bool
+IndexSet::operator == (const IndexSet &is) const
+{
+  Assert (size() == is.size(),
+          ExcDimensionMismatch (size(), is.size()));
+
+  compress ();
+  is.compress ();
+
+  return ranges == is.ranges;
+}
+
+
+
+inline
+bool
+IndexSet::operator != (const IndexSet &is) const
+{
+  Assert (size() == is.size(),
+          ExcDimensionMismatch (size(), is.size()));
+
+  compress ();
+  is.compress ();
+
+  return ranges != is.ranges;
+}
+
+
+
+template <typename Vector>
+void
+IndexSet::fill_binary_vector (Vector &vector) const
+{
+  Assert (vector.size() == size(),
+          ExcDimensionMismatch (vector.size(), size()));
+
+  compress();
+  // first fill all elements of the vector with zeroes.
+  std::fill (vector.begin(), vector.end(), 0);
+
+  // then write ones into the elements whose indices are contained in the
+  // index set
+  for (std::vector<Range>::iterator it = ranges.begin();
+       it != ranges.end();
+       ++it)
+    for (size_type i=it->begin; i<it->end; ++i)
+      vector[i] = 1;
+}
+
+
+
+template <class StreamType>
+inline
+void
+IndexSet::print (StreamType &out) const
+{
+  compress();
+  out << "{";
+  std::vector<Range>::const_iterator p;
+  for (p = ranges.begin(); p != ranges.end(); ++p)
+    {
+      if (p->end-p->begin==1)
+        out << p->begin;
+      else
+        out << "[" << p->begin << "," << p->end-1 << "]";
+
+      if (p !=--ranges.end())
+        out << ", ";
+    }
+  out << "}" << std::endl;
+}
+
+
+
+template <class Archive>
+inline
+void
+IndexSet::Range::serialize (Archive &ar, const unsigned int)
+{
+  ar &begin &end &nth_index_in_set;
+}
+
+
+
+template <class Archive>
+inline
+void
+IndexSet::serialize (Archive &ar, const unsigned int)
+{
+  ar &ranges &is_compressed &index_space_size &largest_range;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/iterator_range.h b/include/deal.II/base/iterator_range.h
new file mode 100644
index 0000000..9720236
--- /dev/null
+++ b/include/deal.II/base/iterator_range.h
@@ -0,0 +1,330 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__iterator_range_h
+#define dealii__iterator_range_h
+
+
+#include <deal.II/base/config.h>
+
+#include <iterator>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A class that is used to denote a collection of iterators that can be
+ * expressed in terms of a range of iterators characterized by a begin and an
+ * end iterator. As is common in C++, these ranges are specified as half open
+ * intervals defined by a begin iterator and a one-past-the-end iterator.
+ *
+ * The purpose of this class is so that classes such as Triangulation and
+ * DoFHandler can return ranges of cell iterators using an object of the
+ * current type from functions such as Triangulation::cells() and that such an
+ * object can then be used in a range-based for loop as supported by C++11,
+ * see also
+ * @ref CPP11 "C++11 standard".
+ *
+ * For example, such a loop could look like this if the goal is to set the
+ * user flag on every active cell:
+ * @code
+ *   Triangulation<dim> triangulation;
+ *   ...
+ *   for (auto cell : triangulation.active_cell_iterators())
+ *     cell->set_user_flag();
+ * @endcode
+ * In other words, the <code>cell</code> objects are iterators, and the range
+ * object returned by Triangulation::active_cell_iterators() and similar
+ * functions are conceptually thought of as <i>collections of iterators</i>.
+ *
+ * Of course, the class may also be used to denote other iterator ranges using
+ * different kinds of iterators into other containers.
+ *
+ *
+ * <h3>Class design: Motivation</h3>
+ *
+ * Informally, the way the C++11 standard describes <a
+ * href="http://en.wikipedia.org/wiki/C%2B%2B11#Range-based_for_loop">range-
+ * based for loops</a> works as follows: A <i>range-based for loop</i> of the
+ * form
+ * @code
+ *   Container c;
+ *   for (auto v : c)
+ *     statement;
+ * @endcode
+ * where <code>c</code> is a container or collection, is equivalent to the
+ * following loop:
+ * @code
+ *   Container c;
+ *   for (auto tmp=c.begin(); tmp!=c.end(); ++tmp)
+ *     {
+ *       auto v = *tmp;
+ *       statement;
+ *     }
+ * @endcode
+ * In other words, the compiler introduces a temporary variable that
+ * <i>iterates</i> over the elements of the container or collection, and the
+ * original variable <code>v</code> that appeared in the range-based for loop
+ * represents the <i>dereferenced</i> state of these iterators -- in other
+ * words, the <i>elements</i> of the collection.
+ *
+ * In the context of loops over cells, we typically want to retain the fact
+ * that the loop variable is an iterator, not a value. This is because in
+ * deal.II, we never actually use the <i>dereferenced state</i> of a cell
+ * iterator: conceptually, it would represent a cell, and technically it is
+ * implemented by classes such as CellAccessor and DoFCellAccessor, but these
+ * classes are never used explicitly. Consequently, what we would like is that
+ * a call such as Triangulation::active_cell_iterators() returns an object
+ * that represents a <i>collection of iterators</i> of the kind <code>{begin,
+ * begin+1, ..., end-1}</code>. This is conveniently expressed as the half
+ * open interval <code>[begin,end)</code>. The loop variable in the range-
+ * based for loop would then take on each of these iterators in turn.
+ *
+ *
+ * <h3>Class design: Implementation</h3>
+ *
+ * To represent the desired semantics as outlined above, this class stores a
+ * half-open range of iterators <code>[b,e)</code> of the given template type.
+ * Secondly, the class needs to provide begin() and end() functions in such a
+ * way that if you <i>dereference</i> the result of IteratorRange::begin(),
+ * you get the <code>b</code> iterator. Furthermore, you must be able to
+ * increment the object returned by IteratorRange::begin() so that
+ * <code>*(++begin()) == b+1</code>. In other words, IteratorRange::begin()
+ * must return an iterator that when dereferenced returns an iterator of the
+ * template type <code>Iterator</code>: It is an iterator over iterators in
+ * the same sense as if you had a pointer into an array of pointers.
+ *
+ * This is implemented in the form of the IteratorRange::IteratorOverIterators
+ * class.
+ *
+ * @ingroup CPP11
+ * @author Wolfgang Bangerth, 2014
+ */
+template <typename Iterator>
+class IteratorRange
+{
+public:
+  /**
+   * A class that implements the semantics of iterators over iterators as
+   * discussed in the design sections of the IteratorRange class.
+   */
+  class IteratorOverIterators : public std::iterator<std::forward_iterator_tag, Iterator,
+    typename Iterator::difference_type>
+  {
+  public:
+    /**
+     * Typedef the elements of the collection to give them a name that is more
+     * distinct.
+     */
+    typedef Iterator BaseIterator;
+
+    /**
+     * Constructor. Initialize this iterator-over-iterator in such a way that
+     * it points to the given argument.
+     *
+     * @param iterator An iterator to which this object is supposed to point.
+     */
+    IteratorOverIterators (const BaseIterator &iterator);
+
+    /**
+     * Dereferencing operator.
+     * @return The iterator within the collection currently pointed to.
+     */
+    BaseIterator operator* () const;
+
+    /**
+     * Dereferencing operator.
+     * @return The iterator within the collection currently pointed to.
+     */
+    const BaseIterator *operator-> () const;
+
+    /**
+     * Prefix increment operator. Move the current iterator to the next
+     * element of the collection and return the new value.
+     */
+    IteratorOverIterators &operator ++ ();
+
+    /**
+     * Postfix increment operator. Move the current iterator to the next
+     * element of the collection, but return the previous value of the
+     * iterator.
+     */
+    IteratorOverIterators operator ++ (int);
+
+    /**
+     * Comparison operator
+     * @param i_o_i Another iterator over iterators.
+     * @return Returns whether the current iterator points to a different
+     * object than the iterator represented by the argument.
+     */
+    bool operator != (const IteratorOverIterators &i_o_i);
+
+  private:
+    /**
+     * The object this iterator currently points to.
+     */
+    BaseIterator element_of_iterator_collection;
+  };
+
+
+  /**
+   * Typedef for the iterator type represent by this class.
+   */
+  typedef Iterator iterator;
+
+  /**
+   * Default constructor. Create a range represented by two default
+   * constructed iterators. This range is likely (depending on the type of the
+   * iterators) empty.
+   */
+  IteratorRange();
+
+  /**
+   * Constructor. Constructs a range given the begin and end iterators.
+   *
+   * @param[in] begin An iterator pointing to the first element of the range
+   * @param[in] end   An iterator pointing past the last element represented
+   * by this range.
+   */
+  IteratorRange (const iterator begin,
+                 const iterator end);
+
+  /**
+   * Return the iterator pointing to the first element of this range.
+   */
+  IteratorOverIterators begin();
+
+  /**
+   * Return the iterator pointing to the element past the last element of this
+   * range.
+   */
+  IteratorOverIterators end();
+
+private:
+  /**
+   * Iterators characterizing the begin and end of the range.
+   */
+  const iterator it_begin;
+  const iterator it_end;
+};
+
+
+// ------------------- template member functions
+
+
+template <typename Iterator>
+inline
+IteratorRange<Iterator>::IteratorOverIterators::
+IteratorOverIterators (const BaseIterator &iterator)
+  :
+  element_of_iterator_collection (iterator)
+{}
+
+
+
+template <typename Iterator>
+inline
+typename IteratorRange<Iterator>::IteratorOverIterators::BaseIterator
+IteratorRange<Iterator>::IteratorOverIterators::operator* () const
+{
+  return element_of_iterator_collection;
+}
+
+
+
+template <typename Iterator>
+inline
+const typename IteratorRange<Iterator>::IteratorOverIterators::BaseIterator *
+IteratorRange<Iterator>::IteratorOverIterators::operator-> () const
+{
+  return &element_of_iterator_collection;
+}
+
+
+
+template <typename Iterator>
+inline
+typename IteratorRange<Iterator>::IteratorOverIterators &
+IteratorRange<Iterator>::IteratorOverIterators::operator ++ ()
+{
+  ++element_of_iterator_collection;
+  return *this;
+}
+
+
+
+template <typename Iterator>
+inline
+typename IteratorRange<Iterator>::IteratorOverIterators
+IteratorRange<Iterator>::IteratorOverIterators::operator ++ (int)
+{
+  const IteratorOverIterators old_value = *this;
+  ++element_of_iterator_collection;
+  return *old_value;
+}
+
+
+
+template <typename Iterator>
+inline
+bool
+IteratorRange<Iterator>::IteratorOverIterators::operator != (const IteratorOverIterators &i_o_i)
+{
+  return element_of_iterator_collection != i_o_i.element_of_iterator_collection;
+}
+
+
+template <typename Iterator>
+inline
+IteratorRange<Iterator>::IteratorRange ()
+  :
+  it_begin(),
+  it_end()
+{}
+
+
+
+template <typename Iterator>
+inline
+IteratorRange<Iterator>::IteratorRange (const iterator b,
+                                        const iterator e)
+  :
+  it_begin(b),
+  it_end(e)
+{}
+
+
+template <typename Iterator>
+inline
+typename IteratorRange<Iterator>::IteratorOverIterators
+IteratorRange<Iterator>::begin()
+{
+  return IteratorOverIterators(it_begin);
+}
+
+
+template <typename Iterator>
+inline
+typename IteratorRange<Iterator>::IteratorOverIterators
+IteratorRange<Iterator>::end()
+{
+  return IteratorOverIterators(it_end);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/job_identifier.h b/include/deal.II/base/job_identifier.h
new file mode 100644
index 0000000..e11855c
--- /dev/null
+++ b/include/deal.II/base/job_identifier.h
@@ -0,0 +1,81 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__job_identifier_h
+#define dealii__job_identifier_h
+
+
+#include <deal.II/base/config.h>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+/**
+ * Identification of a program run. <tt>JobIdentifier</tt> determines the
+ * start time of a program run and stores it as a program identifier. There
+ * exists a library object <tt>dealjobid</tt> of this class. This object can
+ * be accessed by all output functions to provide an id for the current job.
+ *
+ * @ingroup utilities
+ */
+class JobIdentifier
+{
+public:
+  /**
+   * Constructor. Set program identifier to value of <tt>program_id</tt>
+   * concatenated with the present time.
+   */
+  JobIdentifier();
+
+  /**
+   * This function returns an identifier for the running program. Currently,
+   * the library provides a function returning "JobID".
+   *
+   * The user may define a replacement of this function in his source code and
+   * avoid linking the library version. Unfortunately, this mechanism does not
+   * work with shared libraries.
+   */
+  static const char *program_id();
+
+  /**
+   * Obtain the base name of the file currently being compiled. That is, if
+   * the file is <tt>mypath/file.cc</tt> return just <tt>file</tt>. Typically,
+   * this can be called from a program with the argument <tt>__FILE__</tt> and
+   * is used in the deal.II test suite.
+   */
+  static std::string base_name(const char *filename);
+
+  /**
+   * Return the value of <tt>id</tt>.
+   */
+  const std::string operator () () const;
+
+private:
+  /**
+   * String holding the identifier of the presently running program.
+   */
+  std::string id;
+};
+
+
+/*------------------------------ Inline functions ------------------------------*/
+
+
+/**
+ * Global object to identify the presently running program.
+ */
+extern JobIdentifier dealjobid;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/logstream.h b/include/deal.II/base/logstream.h
new file mode 100644
index 0000000..2f1b9ba
--- /dev/null
+++ b/include/deal.II/base/logstream.h
@@ -0,0 +1,670 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__logstream_h
+#define dealii__logstream_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/base/thread_local_storage.h>
+
+#include <string>
+#include <stack>
+#include <map>
+#include <cmath>
+#include <sstream>
+
+#ifdef DEAL_II_HAVE_SYS_TIMES_H
+#  include <sys/times.h>
+#else
+struct tms
+{
+  int tms_utime, tms_stime, tms_cutime, tms_cstime;
+};
+#endif
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A class that simplifies the process of execution logging. It does so by
+ * providing
+ * <ul>
+ * <li> a push and pop mechanism for prefixes, and
+ * <li> the possibility of distributing information to files and the console.
+ * </ul>
+ *
+ * The usual usage of this class is through the pregenerated object
+ * <tt>deallog</tt>. Typical setup steps are:
+ * <ul>
+ * <li> <tt>deallog.depth_console(n)</tt>: restrict output on screen to outer
+ * loops.
+ * <li> <tt>deallog.attach(std::ostream)</tt>: write logging information into
+ * a file.
+ * <li> <tt>deallog.depth_file(n)</tt>: restrict output to file to outer
+ * loops.
+ * </ul>
+ *
+ * Before entering a new phase of your program, e.g. a new loop, a new prefix
+ * can be set via <tt>LogStream::Prefix p("loopname");</tt>. The destructor of
+ * the prefix will pop the prefix text from the stack.
+ *
+ * Writes via the <tt><<</tt> operator, <tt> deallog << "This is a log
+ * notice";</tt> will be buffered thread locally until a <tt>std::flush</tt>
+ * or <tt>std::endl</tt> is encountered, which will trigger a writeout to the
+ * console and, if set up, the log file.
+ *
+ * <h3>LogStream and thread safety</h3>
+ *
+ * In the vicinity of concurrent threads, LogStream behaves in the following
+ * manner:
+ * <ul>
+ * <li> Every write to a Logstream with operator <tt><<</tt> (or with
+ * one of the special member functions) is buffered in a thread-local storage.
+ * <li> An <tt>std::flush</tt> or <tt>std::endl</tt> will trigger a writeout
+ * to the console and (if attached) to the file stream. This writeout is
+ * sequentialized so that output from concurrent threads don't interleave.
+ * <li> On a new thread, invoking a writeout, as well as a call to #push or
+ * #pop will copy the current prefix of the "blessed" thread that created the
+ * LogStream instance to a thread-local storage. After that prefixes are
+ * thread-local.
+ * </ul>
+ *
+ * <h3>LogStream and reproducible regression test output</h3>
+ *
+ * Generating reproducible floating point output for regression tests is
+ * mildly put a nightmare. In order to make life a little easier, LogStream
+ * implements a few features that try to achieve such a goal. These features
+ * are turned on by calling test_mode(), and it is not recommended to use them
+ * in any other environment. Right now, LogStream implements the following:
+ *
+ * <ol>
+ * <li> A double number very close to zero will end up being output in
+ * exponential format, although it has no significant digits. The parameter
+ * #double_threshold determines which numbers are too close to zero to be
+ * considered nonzero.
+ * <li> For float numbers holds the same, but with a typically larger
+ * #float_threshold.
+ * <li> Rounded numbers become unreliable with inexact arithmetics. Therefore,
+ * adding a small number before rounding makes results more reproducible,
+ * assuming that numbers like 0.5 are more likely than 0.49997.
+ * </ol>
+ * It should be pointed out that all of these measures distort the output and
+ * make it less accurate. Therefore, they are only recommended if the output
+ * needs to be reproducible.
+ *
+ * @ingroup textoutput
+ * @author Guido Kanschat, Wolfgang Bangerth, 1999, 2003, 2011
+ */
+class LogStream : public Subscriptor
+{
+public:
+  /**
+   * A subclass allowing for the safe generation and removal of prefixes.
+   *
+   * Somewhere at the beginning of a block, create one of these objects, and
+   * it will appear as a prefix in LogStream output like @p deallog. At the
+   * end of the block, the prefix will automatically be removed, when this
+   * object is destroyed.
+   *
+   * In other words, the scope of the object so created determines the
+   * lifetime of the prefix. The advantage of using such an object is that the
+   * prefix is removed whichever way you exit the scope -- by
+   * <code>continue</code>, <code>break</code>, <code>return</code>,
+   * <code>throw</code>, or by simply reaching the closing brace. In all of
+   * these cases, it is not necessary to remember to pop the prefix manually
+   * using LogStream::pop. In this, it works just like the better known
+   * Threads::Mutex::ScopedLock class.
+   */
+  class Prefix
+  {
+  public:
+    /**
+     * Set a new prefix for @p deallog, which will be removed when the
+     * variable is destroyed.
+     */
+    Prefix(const std::string &text);
+
+    /**
+     * Set a new prefix for the given stream, which will be removed when the
+     * variable is destroyed.
+     */
+    Prefix(const std::string &text,
+           LogStream &stream);
+
+    /**
+     * Remove the prefix associated with this variable.
+     */
+    ~Prefix ();
+
+  private:
+    SmartPointer<LogStream,LogStream::Prefix> stream;
+  };
+
+
+  /**
+   * Standard constructor, since we intend to provide an object
+   * <tt>deallog</tt> in the library. Set the standard output stream to
+   * <tt>std::cerr</tt>.
+   */
+  LogStream ();
+
+
+  /**
+   * Destructor.
+   */
+  ~LogStream();
+
+
+  /**
+   * Enable output to a second stream <tt>o</tt>.
+   *
+   * The optional argument @p print_job_id specifies whether
+   */
+  void attach (std::ostream &o,
+               const bool    print_job_id = true);
+
+
+  /**
+   * Disable output to the second stream. You may want to call <tt>close</tt>
+   * on the stream that was previously attached to this object.
+   */
+  void detach ();
+
+
+  /**
+   * Setup the logstream for regression test mode.
+   *
+   * This sets the parameters #double_threshold, #float_threshold, and #offset
+   * to nonzero values. The exact values being used have been determined
+   * experimentally and can be found in the source code.
+   *
+   * Called with an argument <tt>false</tt>, switches off test mode and sets
+   * all involved parameters to zero.
+   */
+  void test_mode (bool on=true);
+
+
+  /**
+   * Gives the default stream (<tt>std_out</tt>).
+   */
+  std::ostream &get_console ();
+
+
+  /**
+   * Gives the file stream.
+   */
+  std::ostream &get_file_stream ();
+
+
+  /**
+   * @return true, if file stream has already been attached.
+   */
+  bool has_file () const;
+
+
+  /**
+   * Reroutes cerr to LogStream. Works as a switch, turning logging of
+   * <tt>cerr</tt> on and off alternatingly with every call.
+   */
+  void log_cerr ();
+
+
+  /**
+   * Return the prefix string.
+   */
+  const std::string &get_prefix () const;
+
+
+  /**
+   * Push another prefix on the stack. Prefixes are automatically separated by
+   * a colon and there is a double colon after the last prefix.
+   *
+   * A simpler way to add a prefix (without the manual need to add the
+   * corresponding pop()) is to use the Prefix class.
+   */
+  void push (const std::string &text);
+
+
+  /**
+   * Remove the last prefix added with push().
+   */
+  void pop ();
+
+
+  /**
+   * Maximum number of levels to be printed on the console. The default is 0,
+   * which will not generate any output. This function allows one to restrict
+   * console output to the highest levels of iterations. Only output with less
+   * than <tt>n</tt> prefixes is printed. By calling this function with
+   * <tt>n=0</tt>, no console output will be written. See step-3 for an
+   * example usage of this method.
+   *
+   * The previous value of this parameter is returned.
+   */
+  unsigned int depth_console (const unsigned int n);
+
+
+  /**
+   * Maximum number of levels to be written to the log file. The functionality
+   * is the same as <tt>depth_console</tt>, nevertheless, this function should
+   * be used with care, since it may spoil the value of a log file.
+   *
+   * The previous value of this parameter is returned.
+   */
+  unsigned int depth_file (const unsigned int n);
+
+
+  /**
+   * Set time printing flag. If this flag is true, each output line will be
+   * prepended by the user time used by the running program so far.
+   *
+   * The previous value of this parameter is returned.
+   */
+  bool log_execution_time (const bool flag);
+
+
+  /**
+   * Output time differences between consecutive logs. If this function is
+   * invoked with <tt>true</tt>, the time difference between the previous log
+   * line and the recent one is printed. If it is invoked with <tt>false</tt>,
+   * the accumulated time since start of the program is printed (default
+   * behavior).
+   *
+   * The measurement of times is not changed by this function, just the
+   * output.
+   *
+   * The previous value of this parameter is returned.
+   */
+  bool log_time_differences (const bool flag);
+
+
+  /**
+   * Write detailed timing information.
+   */
+  void timestamp();
+
+
+  /**
+   * Log the thread id.
+   */
+  bool log_thread_id (const bool flag);
+
+
+  /**
+   * Set a threshold for the minimal absolute value of double values. All
+   * numbers with a smaller absolute value will be printed as zero.
+   *
+   * The default value for this threshold is zero, i.e. numbers are printed
+   * according to their real value.
+   *
+   * This feature is mostly useful for automated tests: there, one would like
+   * to reproduce the exact same solution in each run of a testsuite. However,
+   * subtle difference in processor, operating system, or compiler version can
+   * lead to differences in the last few digits of numbers, due to different
+   * rounding. While one can avoid trouble for most numbers when comparing
+   * with stored results by simply limiting the accuracy of output, this does
+   * not hold for numbers very close to zero, i.e. zero plus accumulated
+   * round-off. For these numbers, already the first digit is tainted by
+   * round-off. Using the present function, it is possible to eliminate this
+   * source of problems, by simply writing zero to the output in this case.
+   */
+  void threshold_double(const double t);
+
+
+  /**
+   * The same as threshold_double(), but for float values.
+   */
+  void threshold_float(const float t);
+
+
+  /**
+   * set the precision for the underlying stream and returns the previous
+   * stream precision. This function mimics
+   * http://www.cplusplus.com/reference/ios/ios_base/precision/
+   */
+  std::streamsize precision (const std::streamsize prec);
+
+
+  /**
+   * set the width for the underlying stream and returns the previous stream
+   * width. This function mimics
+   * http://www.cplusplus.com/reference/ios/ios_base/width/
+   */
+  std::streamsize width (const std::streamsize wide);
+
+
+  /**
+   * set the flags for the underlying stream and returns the previous stream
+   * flags. This function mimics
+   * http://www.cplusplus.com/reference/ios/ios_base/flags/
+   */
+  std::ios::fmtflags flags(const std::ios::fmtflags f);
+
+
+  /**
+   * Output double precision numbers through this stream.
+   *
+   * If they are set, this function applies the methods for making floating
+   * point output reproducible as discussed in the introduction.
+   */
+  LogStream &operator << (const double t);
+
+
+  /**
+   * Output single precision numbers through this stream.
+   *
+   * If they are set, this function applies the methods for making floating
+   * point output reproducible as discussed in the introduction.
+   */
+  LogStream &operator << (const float t);
+
+
+  /**
+   * Treat ostream manipulators. This passes on the whole thing to the
+   * template function with the exception of the <tt>std::endl</tt>
+   * manipulator, for which special action is performed: write the temporary
+   * stream buffer including a header to the file and <tt>std::cout</tt> and
+   * empty the buffer.
+   *
+   * An overload of this function is needed anyway, since the compiler can't
+   * bind manipulators like @p std::endl directly to template arguments @p T
+   * like in the previous general template. This is due to the fact that @p
+   * std::endl is actually an overloaded set of functions for @p std::ostream,
+   * @p std::wostream, and potentially more of this kind. This function is
+   * therefore necessary to pick one element from this overload set.
+   */
+  LogStream &operator<< (std::ostream& (*p) (std::ostream &));
+
+
+  /**
+   * Return an estimate for the memory consumption, in bytes, of this object.
+   * This is not exact (but will usually be close) because calculating the
+   * memory usage of trees (e.g., <tt>std::map</tt>) is difficult.
+   */
+  std::size_t memory_consumption () const;
+
+
+  /**
+   * Exception.
+   */
+  DeclException0(ExcNoFileStreamGiven);
+
+private:
+
+
+  /**
+   * Internal wrapper around thread-local prefixes. This private function will
+   * return the correct internal prefix stack. More important, a new thread-
+   * local stack will be copied from the current stack of the "blessed" thread
+   * that created this LogStream instance (usually, in the case of deallog,
+   * the "main" thread).
+   */
+  std::stack<std::string> &get_prefixes() const;
+
+  /**
+   * Stack of strings which are printed at the beginning of each line to allow
+   * identification where the output was generated.
+   */
+  mutable Threads::ThreadLocalStorage<std::stack<std::string> > prefixes;
+
+  /**
+   * Default stream, where the output is to go to. This stream defaults to
+   * <tt>std::cerr</tt>, but can be set to another stream through the
+   * constructor.
+   */
+  std::ostream  *std_out;
+
+  /**
+   * Pointer to a stream, where a copy of the output is to go to. Usually,
+   * this will be a file stream.
+   *
+   * You can set and reset this stream by the <tt>attach</tt> function.
+   */
+  std::ostream  *file;
+
+  /**
+   * Value denoting the number of prefixes to be printed to the standard
+   * output. If more than this number of prefixes is pushed to the stack, then
+   * no output will be generated until the number of prefixes shrinks back
+   * below this number.
+   */
+  unsigned int std_depth;
+
+  /**
+   * Same for the maximum depth of prefixes for output to a file.
+   */
+  unsigned int file_depth;
+
+  /**
+   * Flag for printing execution time.
+   */
+  bool print_utime;
+
+  /**
+   * Flag for printing time differences.
+   */
+  bool diff_utime;
+
+  /**
+   * Time of last output line.
+   */
+  double last_time;
+
+  /**
+   * Threshold for printing double values. Every number with absolute value
+   * less than this is printed as zero.
+   */
+  double double_threshold;
+
+  /**
+   * Threshold for printing float values. Every number with absolute value
+   * less than this is printed as zero.
+   */
+  float float_threshold;
+
+  /**
+   * An offset added to every float or double number upon output. This is done
+   * after the number is compared to #double_threshold or #float_threshold,
+   * but before rounding.
+   *
+   * This functionality was introduced to produce more reproducible floating
+   * point output for regression tests. The rationale is, that an exact output
+   * value is much more likely to be 1/8 than 0.124997. If we round to two
+   * digits though, 1/8 becomes unreliably either .12 or .13 due to machine
+   * accuracy. On the other hand, if we add a something above machine accuracy
+   * first, we will always get .13.
+   *
+   * It is safe to leave this value equal to zero. For regression tests, the
+   * function test_mode() sets it to a reasonable value.
+   *
+   * The offset is relative to the magnitude of the number.
+   */
+  double offset;
+
+  /**
+   * Flag for printing thread id.
+   */
+  bool print_thread_id;
+
+  /**
+   * The value times() returned on initialization.
+   */
+  double reference_time_val;
+
+  /**
+   * The tms structure times() filled on initialization.
+   */
+  struct tms reference_tms;
+
+  /**
+   * Original buffer of <tt>std::cerr</tt>. We store the address of that
+   * buffer when #log_cerr is called, and reset it to this value if #log_cerr
+   * is called a second time, or when the destructor of this class is run.
+   */
+  std::streambuf *old_cerr;
+
+  /**
+   * A flag indicating whether output is currently at a new line
+   */
+  bool at_newline;
+
+  /**
+   * Print head of line. This prints optional time information and the
+   * contents of the prefix stack.
+   */
+  void print_line_head ();
+
+  /**
+   * Internal wrapper around "thread local" outstreams. This private function
+   * will return the correct internal ostringstream buffer for operator<<.
+   */
+  std::ostringstream &get_stream();
+
+  /**
+   * We use tbb's thread local storage facility to generate a stringstream for
+   * every thread that sends log messages.
+   */
+  Threads::ThreadLocalStorage<std_cxx11::shared_ptr<std::ostringstream> > outstreams;
+
+  template <typename T> friend LogStream &operator << (LogStream &log, const T &t);
+};
+
+
+/* ----------------------------- Inline functions and templates ---------------- */
+
+
+/**
+ * Output a constant something through LogStream:
+ *
+ * @note We declare this operator as a non-member function so that it is
+ * possible to overload it with more specialized templated versions under
+ * C++11 overload resolution rules
+ */
+template <typename T>
+inline
+LogStream &operator<< (LogStream &log, const T &t)
+{
+  // print to the internal stringstream
+  log.get_stream() << t;
+  return log;
+}
+
+
+inline
+std::ostringstream &
+LogStream::get_stream()
+{
+  // see if we have already created this stream. if not, do so and
+  // set the default flags (why we set these flags is lost to
+  // history, but this is what we need to keep several hundred tests
+  // from producing different output)
+  //
+  // note that in all of this we need not worry about thread-safety
+  // because we operate on a thread-local object and by definition
+  // there can only be one access at a time
+  if (outstreams.get().get() == 0)
+    {
+      outstreams.get().reset (new std::ostringstream);
+      outstreams.get()->setf(std::ios::showpoint | std::ios::left);
+    }
+
+  // then return the stream
+  return *outstreams.get();
+}
+
+
+
+
+inline
+LogStream &
+LogStream::operator<< (const double t)
+{
+  std::ostringstream &stream = get_stream();
+
+  // drop small numbers or skew them away from zero.
+  // we have to make sure that we don't catch NaN's and +-Inf's with the
+  // test, because for these denormals all comparisons are always false.
+  if (! numbers::is_finite(t))
+    stream << t;
+  else if (std::fabs(t) < double_threshold)
+    stream << '0';
+  else
+    stream << t*(1.+offset);
+
+  return *this;
+}
+
+
+
+inline
+LogStream &
+LogStream::operator<< (const float t)
+{
+  std::ostringstream &stream = get_stream();
+
+  // we have to make sure that we don't catch NaN's and +-Inf's with the
+  // test, because for these denormals all comparisons are always false.
+  // thus, for a NaN, both t<=0 and t>=0 are false at the same time, which
+  // can't be said for any other number
+  if (! (t<=0) && !(t>=0))
+    stream << t;
+  else if (std::fabs(t) < float_threshold)
+    stream << '0';
+  else
+    stream << t*(1.+offset);
+
+  return *this;
+}
+
+
+inline
+LogStream::Prefix::Prefix(const std::string &text, LogStream &s)
+  :
+  stream(&s)
+{
+  stream->push(text);
+}
+
+
+inline
+LogStream::Prefix::~Prefix()
+{
+  stream->pop();
+}
+
+
+/**
+ * The standard log object of deal.II:
+ *
+ * @author Guido Kanschat, 1999
+ */
+extern LogStream deallog;
+
+
+inline
+LogStream::Prefix::Prefix(const std::string &text)
+  :
+  stream(&deallog)
+{
+  stream->push(text);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/memory_consumption.h b/include/deal.II/base/memory_consumption.h
new file mode 100644
index 0000000..58e3aba
--- /dev/null
+++ b/include/deal.II/base/memory_consumption.h
@@ -0,0 +1,389 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__memory_consumption_h
+#define dealii__memory_consumption_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/base/std_cxx11/type_traits.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+
+#include <string>
+#include <complex>
+#include <vector>
+#include <cstddef>
+#include <cstring>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// forward declaration
+template <typename T> class VectorizedArray;
+
+
+/**
+ * This namespace provides functions helping to determine the amount of memory
+ * used by objects. The goal is not necessarily to give the amount of memory
+ * used up to the last bit (what is the memory used by a <tt>std::map</tt>
+ * object?), but rather to aid in the search for memory bottlenecks.
+ *
+ * This namespace has a single member function memory_consumption() and a lot
+ * of specializations. Depending on the argument type of the function, there
+ * are several modes of operation:
+ *
+ * <ol>
+ * <li> If the argument is a fundamental C++ data type (such as <tt>bool</tt>,
+ * <tt>float</tt>, <tt>double</tt> or any of the integer types), then
+ * memory_consumption() just returns <tt>sizeof</tt> of its argument. The
+ * library also provides an estimate for the amount of memory occupied by a
+ * <tt>std::string</tt>.
+ *
+ * <li> For objects, which are neither standard types, nor vectors,
+ * memory_consumption() will simply call the member function of same name. It
+ * is up to the implementation of the data type to provide a good estimate of
+ * the amount of memory used. Inside this function, the use of
+ * MemoryConsumption::memory_consumption() for compounds of the class helps to
+ * obtain this estimate. Most classes in the deal.II library have such a
+ * member function.
+ *
+ * <li> For vectors and C++ arrays of objects, memory_consumption()
+ * recursively calls itself for all entries and adds the results to the size
+ * of the object itself. Some optimized specializations for standard data
+ * types exist.
+ *
+ * <li> For vectors of regular pointers, memory_consumption(T*) returns the
+ * size of the vector of pointers, ignoring the size of the objects.
+ *
+ * </ol>
+ *
+ * <h3>Extending this namespace</h3>
+ *
+ * The function in this namespace and the functionality provided by it relies
+ * on the assumption that there is either a function
+ * <tt>memory_consumption(T)</tt> in this namespace determining the amount of
+ * memory used by objects of type <tt>T</tt> or that the class <tt>T</tt> has
+ * a member function of that name. While the latter is true for almost all
+ * classes in deal.II, we have only implemented the first kind of functions
+ * for the most common data types, such as fundamental types, strings, C++
+ * vectors, C-style arrays, and C++ pairs. These functions therefore do not
+ * cover, for example, C++ maps, lists, etc. If you need such functions feel
+ * free to implement them and send them to us for inclusion.
+ *
+ * @ingroup memory
+ * @author Wolfgang Bangerth, documentation updated by Guido Kanschat, David
+ * Wells
+ * @date 2000, 2015
+ */
+namespace MemoryConsumption
+{
+  /**
+   * Calculate the memory consumption of a fundamental type. See
+   * EnableIfScalar for a discussion on how this restriction (SFINAE) is
+   * implemented.
+   */
+  template <typename T>
+  inline
+  typename std_cxx11::enable_if<std_cxx11::is_fundamental<T>::value, std::size_t>::type
+  memory_consumption (const T &t);
+
+  /**
+   * Estimate the memory consumption of an object. If no further template
+   * specialization (past this one) is available for the type <tt>T</tt>, then
+   * this function returns the member function
+   * <tt>t.memory_consumption()</tt>'s value.
+   */
+  template <typename T>
+  inline
+  typename std_cxx11::enable_if<!(std_cxx11::is_fundamental<T>::value || std_cxx11::is_pointer<T>::value), std::size_t>::type
+  memory_consumption (const T &t);
+
+  /**
+   * Determine the amount of memory consumed by a C-style string. The returned
+   * value does not include the size of the pointer. This function only
+   * measures up to (and including) the NUL byte; the underlying buffer may be
+   * larger.
+   */
+  inline
+  std::size_t memory_consumption (const char *string);
+
+  /**
+   * Determine the amount of memory in bytes consumed by a
+   * <tt>std::complex</tt> variable.
+   */
+  template <typename T>
+  inline
+  std::size_t memory_consumption (const std::complex<T> &);
+
+  /**
+   * Determine the amount of memory in bytes consumed by a
+   * <tt>VectorizedArray</tt> variable.
+   */
+  template <typename T>
+  inline
+  std::size_t memory_consumption (const VectorizedArray<T> &);
+
+  /**
+   * Determine an estimate of the amount of memory in bytes consumed by a
+   * <tt>std::string</tt> variable.
+   */
+  inline
+  std::size_t memory_consumption (const std::string &s);
+
+  /**
+   * Determine the amount of memory in bytes consumed by a
+   * <tt>std::vector</tt> of elements of type <tt>T</tt> by recursively
+   * calling memory_consumption() for each entry.
+   *
+   * This function loops over all entries of the vector and determines their
+   * sizes using memory_consumption() for each <tt>v[i]</tt>. If the entries
+   * are of constant size, there might be another global function
+   * memory_consumption() for this data type or if there is a member function
+   * of that class of that names that returns a constant value and the
+   * compiler will unroll this loop so that the operation is fast. If the size
+   * of the data elements is variable, for example if they do memory
+   * allocation themselves, then the operation will necessarily be more
+   * expensive.
+   *
+   * Using the algorithm, in particular the loop over all elements, it is
+   * possible to also compute the memory consumption of vectors of vectors,
+   * vectors of strings, etc, where the individual elements may have vastly
+   * different sizes.
+   *
+   * Note that this algorithm also takes into account the size of elements
+   * that are allocated by this vector but not currently used.
+   *
+   * For the most commonly used vectors, there are special functions that
+   * compute the size without a loop. This also applies for the special case
+   * of vectors of bools.
+   */
+  template <typename T>
+  inline
+  std::size_t memory_consumption (const std::vector<T> &v);
+
+  /**
+   * Estimate the amount of memory (in bytes) occupied by a C-style array.
+   * Since in this library we do not usually store simple data elements like
+   * <tt>double</tt>s in such arrays (but rather use <tt>std::vector</tt>s or
+   * deal.II <tt>Vector</tt> objects), we do not provide specializations like
+   * for the <tt>std::vector</tt> arrays, but always use the loop over all
+   * elements.
+   */
+  template <typename T, int N>
+  inline
+  std::size_t memory_consumption (const T (&v)[N]);
+
+  /**
+   * Specialization of the determination of the memory consumption of a
+   * vector, here for a vector of <tt>bool</tt>s.
+   *
+   * This is a special case, as the bools are not stored one-by-one, but as a
+   * bit field.
+   */
+  inline
+  std::size_t memory_consumption (const std::vector<bool> &v);
+
+  /**
+   * Determine an estimate of the amount of memory in bytes consumed by a pair
+   * of values.
+   */
+  template <typename A, typename B>
+  inline
+  std::size_t memory_consumption (const std::pair<A,B> &p);
+
+  /**
+   * Calculate the memory consumption of a pointer.
+   *
+   * @note This function is overloaded for C-style strings; see the
+   * documentation of that function for that case.
+   *
+   * @note This returns the size of the pointer, not the size of the object
+   * pointed to.
+   */
+  template<typename T>
+  inline
+  std::size_t
+  memory_consumption (const T *const);
+
+  /**
+   * Return the amount of memory used by a shared pointer.
+   *
+   * @note This returns the size of the pointer, not of the object pointed to.
+   */
+  template <typename T>
+  inline
+  std::size_t memory_consumption (const std_cxx11::shared_ptr<T> &);
+
+  /**
+   * Return the amount of memory used by a std_cxx11::unique_ptr object.
+   *
+   * @note This returns the size of the pointer, not of the object pointed to.
+   */
+  template <typename T>
+  inline
+  std::size_t memory_consumption (const std_cxx11::unique_ptr<T> &);
+}
+
+
+
+// now comes the implementation of these functions
+
+namespace MemoryConsumption
+{
+  template <typename T>
+  inline
+  typename std_cxx11::enable_if<std_cxx11::is_fundamental<T>::value, std::size_t>::type
+  memory_consumption(const T &)
+  {
+    return sizeof(T);
+  }
+
+
+
+  inline
+  std::size_t memory_consumption (const char *string)
+  {
+    if (string == NULL)
+      {
+        return 0;
+      }
+    else
+      {
+        return sizeof(char)*(strlen(string) /*Remember the NUL*/ + 1);
+      }
+  }
+
+
+
+  template <typename T>
+  inline
+  std::size_t memory_consumption (const std::complex<T> &)
+  {
+    return sizeof(std::complex<T>);
+  }
+
+
+
+  template <typename T>
+  inline
+  std::size_t memory_consumption (const VectorizedArray<T> &)
+  {
+    return sizeof(VectorizedArray<T>);
+  }
+
+
+
+  inline
+  std::size_t memory_consumption (const std::string &s)
+  {
+    return sizeof(s) + s.length();
+  }
+
+
+
+  template <typename T>
+  std::size_t memory_consumption (const std::vector<T> &v)
+  {
+    // shortcut for types that do not allocate memory themselves
+    if (std_cxx11::is_fundamental<T>::value || std_cxx11::is_pointer<T>::value)
+      {
+        return v.capacity()*sizeof(T) + sizeof(v);
+      }
+    else
+      {
+        std::size_t mem = sizeof(std::vector<T>);
+        for (unsigned int i=0; i<v.size(); ++i)
+          {
+            mem += memory_consumption(v[i]);
+          }
+        mem += (v.capacity() - v.size())*sizeof(T);
+        return mem;
+      }
+  }
+
+
+
+  template <typename T, int N>
+  std::size_t memory_consumption (const T (&v)[N])
+  {
+    std::size_t mem = 0;
+    for (unsigned int i=0; i<N; ++i)
+      mem += memory_consumption(v[i]);
+    return mem;
+  }
+
+
+
+  inline
+  std::size_t memory_consumption (const std::vector<bool> &v)
+  {
+    return v.capacity() / 8 + sizeof(v);
+  }
+
+
+
+  template <typename A, typename B>
+  inline
+  std::size_t memory_consumption (const std::pair<A,B> &p)
+  {
+    return (memory_consumption(p.first) +
+            memory_consumption(p.second));
+  }
+
+
+
+  template<typename T>
+  inline
+  std::size_t
+  memory_consumption(const T *const)
+  {
+    return sizeof(T *);
+  }
+
+
+
+  template <typename T>
+  inline
+  std::size_t
+  memory_consumption (const std_cxx11::shared_ptr<T> &)
+  {
+    return sizeof(std_cxx11::shared_ptr<T>);
+  }
+
+
+
+  template <typename T>
+  inline
+  std::size_t
+  memory_consumption (const std_cxx11::unique_ptr<T> &)
+  {
+    return sizeof(std_cxx11::unique_ptr<T>);
+  }
+
+
+
+  template <typename T>
+  inline
+  typename std_cxx11::enable_if<!(std_cxx11::is_fundamental<T>::value || std_cxx11::is_pointer<T>::value), std::size_t>::type
+  memory_consumption (const T &t)
+  {
+    return t.memory_consumption();
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/mg_level_object.h b/include/deal.II/base/mg_level_object.h
new file mode 100644
index 0000000..3d797e3
--- /dev/null
+++ b/include/deal.II/base/mg_level_object.h
@@ -0,0 +1,212 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_level_object_h
+#define dealii__mg_level_object_h
+
+#include <deal.II/base/subscriptor.h>
+#include <vector>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * An array with an object for each level.  The purpose of this class is
+ * mostly to store objects and allow access by level number, even if the lower
+ * levels are not used and therefore have no object at all; this is done by
+ * simply shifting the given index by the minimum level we have stored.
+ *
+ * In most cases, the objects which are stored on each levels, are either
+ * matrices or vectors.
+ *
+ * @ingroup mg
+ * @ingroup data
+ * @author Wolfgang Bangerth, Guido Kanschat, 1999, 2005, 2010
+ */
+template<class Object>
+class MGLevelObject : public Subscriptor
+{
+public:
+  /**
+   * Constructor allowing to initialize the number of levels. By default, the
+   * object is created empty.
+   */
+  MGLevelObject (const unsigned int minlevel = 0,
+                 const unsigned int maxlevel = 0);
+
+  /**
+   * Access object on level @p level.
+   */
+  Object &operator[] (const unsigned int level);
+
+  /**
+   * Access object on level @p level. Constant version.
+   */
+  const Object &operator[] (const unsigned int level) const;
+
+  /**
+   * Delete all previous contents of this object and reset its size according
+   * to the values of @p new_minlevel and @p new_maxlevel.
+   */
+  void resize (const unsigned int new_minlevel,
+               const unsigned int new_maxlevel);
+
+  /**
+   * Call <tt>operator = (s)</tt> on all objects stored by this object.  This
+   * is particularly useful for e.g. <tt>Object==Vector@<T@></tt>
+   */
+  MGLevelObject<Object> &operator = (const double d);
+
+  /**
+   * Call @p clear on all objects stored by this object. This function is only
+   * implemented for some @p Object classes, e.g. the PreconditionBlockSOR and
+   * similar classes.
+   */
+  void clear();
+
+  /**
+   * Coarsest level for multigrid.
+   */
+  unsigned int min_level () const;
+
+  /**
+   * Finest level for multigrid.
+   */
+  unsigned int max_level () const;
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Level of first component.
+   */
+  unsigned int minlevel;
+
+  /**
+   * Array of the objects to be held.
+   */
+  std::vector<std_cxx11::shared_ptr<Object> > objects;
+};
+
+
+/* ------------------------------------------------------------------- */
+
+
+template<class Object>
+MGLevelObject<Object>::MGLevelObject(const unsigned int min,
+                                     const unsigned int max)
+  :
+  minlevel(0)
+{
+  resize (min, max);
+}
+
+
+template<class Object>
+Object &
+MGLevelObject<Object>::operator[] (const unsigned int i)
+{
+  Assert((i>=minlevel) && (i<minlevel+objects.size()),
+         ExcIndexRange (i, minlevel, minlevel+objects.size()));
+  return *objects[i-minlevel];
+}
+
+
+template<class Object>
+const Object &
+MGLevelObject<Object>::operator[] (const unsigned int i) const
+{
+  Assert((i>=minlevel) && (i<minlevel+objects.size()),
+         ExcIndexRange (i, minlevel, minlevel+objects.size()));
+  return *objects[i-minlevel];
+}
+
+
+template<class Object>
+void
+MGLevelObject<Object>::resize (const unsigned int new_minlevel,
+                               const unsigned int new_maxlevel)
+{
+  Assert (new_minlevel <= new_maxlevel, ExcInternalError());
+  // note that on clear(), the
+  // shared_ptr class takes care of
+  // deleting the object it points to
+  // by itself
+  objects.clear ();
+
+  minlevel = new_minlevel;
+  for (unsigned int i=0; i<new_maxlevel-new_minlevel+1; ++i)
+    objects.push_back(std_cxx11::shared_ptr<Object> (new Object));
+}
+
+
+template<class Object>
+MGLevelObject<Object> &
+MGLevelObject<Object>::operator = (const double d)
+{
+  typename std::vector<std_cxx11::shared_ptr<Object> >::iterator v;
+  for (v = objects.begin(); v != objects.end(); ++v)
+    **v=d;
+  return *this;
+}
+
+
+template<class Object>
+void
+MGLevelObject<Object>::clear ()
+{
+  typename std::vector<std_cxx11::shared_ptr<Object> >::iterator v;
+  for (v = objects.begin(); v != objects.end(); ++v)
+    (*v)->clear();
+}
+
+
+template<class Object>
+unsigned int
+MGLevelObject<Object>::min_level () const
+{
+  return minlevel;
+}
+
+
+template<class Object>
+unsigned int
+MGLevelObject<Object>::max_level () const
+{
+  return minlevel + objects.size() - 1;
+}
+
+
+template<class Object>
+std::size_t
+MGLevelObject<Object>::memory_consumption () const
+{
+  std::size_t result = sizeof(*this);
+  typedef typename std::vector<std_cxx11::shared_ptr<Object> >::const_iterator Iter;
+  const Iter end = objects.end();
+  for (Iter o=objects.begin(); o!=end; ++o)
+    result += (*o)->memory_consumption();
+
+  return result;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/mpi.h b/include/deal.II/base/mpi.h
new file mode 100644
index 0000000..183d141
--- /dev/null
+++ b/include/deal.II/base/mpi.h
@@ -0,0 +1,763 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mpi_h
+#define dealii__mpi_h
+
+#include <deal.II/base/config.h>
+#include <vector>
+
+#if !defined(DEAL_II_WITH_MPI) && !defined(DEAL_II_WITH_PETSC)
+// without MPI, we would still like to use
+// some constructs with MPI data
+// types. Therefore, create some dummies
+typedef int MPI_Comm;
+const int MPI_COMM_SELF = 0;
+typedef int MPI_Datatype;
+typedef int MPI_Op;
+namespace MPI
+{
+  static const unsigned int UNSIGNED = 0;
+  static const unsigned int LONG_DOUBLE = 0;
+  static const unsigned int LONG_DOUBLE_COMPLEX = 0;
+  static const unsigned int MAX = 0;
+  static const unsigned int MIN = 0;
+  static const unsigned int SUM = 0;
+}
+static const int MPI_MIN = 0;
+static const int MPI_MAX = 0;
+static const int MPI_SUM = 0;
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+//Forward type declarations to allow MPI sums over tensorial types
+template <int rank, int dim, typename Number> class Tensor;
+template <int rank, int dim, typename Number> class SymmetricTensor;
+
+//Forward type declaration to allow MPI sums over Vector<number> type
+template <typename Number> class Vector;
+
+
+namespace Utilities
+{
+  /**
+   * A namespace for utility functions that abstract certain operations using
+   * the Message Passing Interface (MPI) or provide fallback operations in
+   * case deal.II is configured not to use MPI at all.
+   *
+   * @ingroup utilities
+   */
+  namespace MPI
+  {
+    /**
+     * Return the number of MPI processes there exist in the given
+     * @ref GlossMPICommunicator "communicator"
+     * object. If this is a sequential job, it returns 1.
+     */
+    unsigned int n_mpi_processes (const MPI_Comm &mpi_communicator);
+
+    /**
+     * Return the
+     * @ref GlossMPIRank "rank of the present MPI process"
+     * in the space of processes described by the given
+     * @ref GlossMPICommunicator "communicator".
+     * This will be a unique value for each process between zero and (less
+     * than) the number of all processes (given by get_n_mpi_processes()).
+     */
+    unsigned int this_mpi_process (const MPI_Comm &mpi_communicator);
+
+    /**
+     * Consider an unstructured communication pattern where every process in
+     * an MPI universe wants to send some data to a subset of the other
+     * processors. To do that, the other processors need to know who to expect
+     * messages from. This function computes this information.
+     *
+     * @param mpi_comm A
+     * @ref GlossMPICommunicator "communicator"
+     * that describes the processors that are going to communicate with each
+     * other.
+     *
+     * @param destinations The list of processors the current process wants to
+     * send information to. This list need not be sorted in any way. If it
+     * contains duplicate entries that means that multiple messages are
+     * intended for a given destination.
+     *
+     * @return A list of processors that have indicated that they want to send
+     * something to the current processor. The resulting list is not sorted.
+     * It may contain duplicate entries if processors enter the same
+     * destination more than once in their destinations list.
+     */
+    std::vector<unsigned int>
+    compute_point_to_point_communication_pattern (const MPI_Comm &mpi_comm,
+                                                  const std::vector<unsigned int> &destinations);
+
+    /**
+     * Given a
+     * @ref GlossMPICommunicator "communicator",
+     * generate a new communicator that contains the same set of processors
+     * but that has a different, unique identifier.
+     *
+     * This functionality can be used to ensure that different objects, such
+     * as distributed matrices, each have unique communicators over which they
+     * can interact without interfering with each other.
+     *
+     * When no longer needed, the communicator created here needs to be
+     * destroyed using <code>MPI_Comm_free</code>.
+     */
+    MPI_Comm duplicate_communicator (const MPI_Comm &mpi_communicator);
+
+    /**
+     * Return the sum over all processors of the value @p t. This function is
+     * collective over all processors given in the
+     * @ref GlossMPICommunicator "communicator".
+     * If deal.II is not configured for use of MPI, this function simply
+     * returns the value of @p t. This function corresponds to the
+     * <code>MPI_Allreduce</code> function, i.e. all processors receive the
+     * result of this operation.
+     *
+     * @note Sometimes, not all processors need a result and in that case one
+     * would call the <code>MPI_Reduce</code> function instead of the
+     * <code>MPI_Allreduce</code> function. The latter is at most twice as
+     * expensive, so if you are concerned about performance, it may be
+     * worthwhile investigating whether your algorithm indeed needs the result
+     * everywhere.
+     *
+     * @note This function is only implemented for certain template arguments
+     * <code>T</code>, namely <code>float, double, int, unsigned int</code>.
+     */
+    template <typename T>
+    T sum (const T &t,
+           const MPI_Comm &mpi_communicator);
+
+    /**
+     * Like the previous function, but take the sums over the elements of an
+     * array of length N. In other words, the i-th element of the results
+     * array is the sum over the i-th entries of the input arrays from each
+     * processor.
+     *
+     * Input and output arrays may be the same.
+     */
+    template <typename T, unsigned int N>
+    inline
+    void sum (const T (&values)[N],
+              const MPI_Comm &mpi_communicator,
+              T (&sums)[N]);
+
+    /**
+     * Like the previous function, but take the sums over the elements of a
+     * std::vector. In other words, the i-th element of the results array is
+     * the sum over the i-th entries of the input arrays from each processor.
+     *
+     * Input and output vectors may be the same.
+     */
+    template <typename T>
+    inline
+    void sum (const std::vector<T> &values,
+              const MPI_Comm &mpi_communicator,
+              std::vector<T> &sums);
+
+    /**
+     * Like the previous function, but take the sums over the elements of a
+     * Vector<T>.
+     *
+     * Input and output vectors may be the same.
+     */
+    template <typename T>
+    inline
+    void sum (const Vector<T> &values,
+              const MPI_Comm &mpi_communicator,
+              Vector<T> &sums);
+
+
+    /**
+     * Perform an MPI sum of the entries of a symmetric tensor.
+     *
+     * @relates SymmetricTensor
+     */
+    template <int rank, int dim, typename Number>
+    inline
+    SymmetricTensor<rank,dim,Number>
+    sum (const SymmetricTensor<rank,dim,Number> &local,
+         const MPI_Comm &mpi_communicator);
+
+    /**
+     * Perform an MPI sum of the entries of a tensor.
+     *
+     * @relates Tensor
+     */
+    template <int rank, int dim, typename Number>
+    inline
+    Tensor<rank,dim,Number>
+    sum (const Tensor<rank,dim,Number> &local,
+         const MPI_Comm &mpi_communicator);
+
+    /**
+     * Return the maximum over all processors of the value @p t. This function
+     * is collective over all processors given in the
+     * @ref GlossMPICommunicator "communicator".
+     * If deal.II is not configured for use of MPI, this function simply
+     * returns the value of @p t. This function corresponds to the
+     * <code>MPI_Allreduce</code> function, i.e. all processors receive the
+     * result of this operation.
+     *
+     * @note Sometimes, not all processors need a result and in that case one
+     * would call the <code>MPI_Reduce</code> function instead of the
+     * <code>MPI_Allreduce</code> function. The latter is at most twice as
+     * expensive, so if you are concerned about performance, it may be
+     * worthwhile investigating whether your algorithm indeed needs the result
+     * everywhere.
+     *
+     * @note This function is only implemented for certain template arguments
+     * <code>T</code>, namely <code>float, double, int, unsigned int</code>.
+     */
+    template <typename T>
+    T max (const T &t,
+           const MPI_Comm &mpi_communicator);
+
+    /**
+     * Like the previous function, but take the maxima over the elements of an
+     * array of length N. In other words, the i-th element of the results
+     * array is the maximum of the i-th entries of the input arrays from each
+     * processor.
+     *
+     * Input and output arrays may be the same.
+     */
+    template <typename T, unsigned int N>
+    inline
+    void max (const T (&values)[N],
+              const MPI_Comm &mpi_communicator,
+              T (&maxima)[N]);
+
+    /**
+     * Like the previous function, but take the maximum over the elements of a
+     * std::vector. In other words, the i-th element of the results array is
+     * the maximum over the i-th entries of the input arrays from each
+     * processor.
+     *
+     * Input and output vectors may be the same.
+     */
+    template <typename T>
+    inline
+    void max (const std::vector<T> &values,
+              const MPI_Comm &mpi_communicator,
+              std::vector<T> &maxima);
+
+    /**
+     * Return the minimum over all processors of the value @p t. This function
+     * is collective over all processors given in the
+     * @ref GlossMPICommunicator "communicator".
+     * If deal.II is not configured for use of MPI, this function simply
+     * returns the value of @p t. This function corresponds to the
+     * <code>MPI_Allreduce</code> function, i.e. all processors receive the
+     * result of this operation.
+     *
+     * @note Sometimes, not all processors need a result and in that case one
+     * would call the <code>MPI_Reduce</code> function instead of the
+     * <code>MPI_Allreduce</code> function. The latter is at most twice as
+     * expensive, so if you are concerned about performance, it may be
+     * worthwhile investigating whether your algorithm indeed needs the result
+     * everywhere.
+     *
+     * @note This function is only implemented for certain template arguments
+     * <code>T</code>, namely <code>float, double, int, unsigned int</code>.
+     */
+    template <typename T>
+    T min (const T &t,
+           const MPI_Comm &mpi_communicator);
+
+    /**
+     * Like the previous function, but take the minima over the elements of an
+     * array of length N. In other words, the i-th element of the results
+     * array is the minimum of the i-th entries of the input arrays from each
+     * processor.
+     *
+     * Input and output arrays may be the same.
+     */
+    template <typename T, unsigned int N>
+    inline
+    void min (const T (&values)[N],
+              const MPI_Comm &mpi_communicator,
+              T (&minima)[N]);
+
+    /**
+     * Like the previous function, but take the minimum over the elements of a
+     * std::vector. In other words, the i-th element of the results array is
+     * the minimum over the i-th entries of the input arrays from each
+     * processor.
+     *
+     * Input and output vectors may be the same.
+     */
+    template <typename T>
+    inline
+    void min (const std::vector<T> &values,
+              const MPI_Comm &mpi_communicator,
+              std::vector<T> &minima);
+
+
+    /**
+     * Data structure to store the result of min_max_avg().
+     */
+    struct MinMaxAvg
+    {
+      // Note: We assume a POD property of this struct in the MPI calls in
+      // min_max_avg
+      double sum;
+      double min;
+      double max;
+      unsigned int min_index;
+      unsigned int max_index;
+      double avg;
+    };
+
+    /**
+     * Returns sum, average, minimum, maximum, processor id of minimum and
+     * maximum as a collective operation of on the given MPI
+     * @ref GlossMPICommunicator "communicator"
+     * @p mpi_communicator. Each processor's value is given in @p my_value and
+     * the result will be returned. The result is available on all machines.
+     *
+     * @note Sometimes, not all processors need a result and in that case one
+     * would call the <code>MPI_Reduce</code> function instead of the
+     * <code>MPI_Allreduce</code> function. The latter is at most twice as
+     * expensive, so if you are concerned about performance, it may be
+     * worthwhile investigating whether your algorithm indeed needs the result
+     * everywhere.
+     */
+    MinMaxAvg
+    min_max_avg (const double my_value,
+                 const MPI_Comm &mpi_communicator);
+
+
+
+    /**
+     * A class that is used to initialize the MPI system at the beginning of a
+     * program and to shut it down again at the end. It also allows you to
+     * control the number threads used in each MPI task.
+     *
+     * If deal.II is configured with PETSc, the library will also be
+     * initialized in the beginning and destroyed at the end automatically
+     * (internally by calling PetscInitialize() and PetscFinalize()).
+     *
+     * If a program uses MPI one would typically just create an object of this
+     * type at the beginning of <code>main()</code>. The constructor of this
+     * class then runs <code>MPI_Init()</code> with the given arguments. At
+     * the end of the program, the compiler will invoke the destructor of this
+     * object which in turns calls <code>MPI_Finalize</code> to shut down the
+     * MPI system.
+     *
+     * This class is used in step-32, for example.
+     */
+    class MPI_InitFinalize
+    {
+    public:
+      /**
+       * Initialize MPI (and, if deal.II was configured to use it, PETSc) and
+       * set the number of threads used by deal.II (via the underlying
+       * Threading Building Blocks library) to the given parameter.
+       *
+       * @param[in,out] argc A reference to the 'argc' argument passed to
+       * main. This argument is used to initialize MPI (and, possibly, PETSc)
+       * as they read arguments from the command line.
+       * @param[in,out] argv A reference to the 'argv' argument passed to
+       * main.
+       * @param[in] max_num_threads The maximal number of threads this MPI
+       * process should utilize. If this argument is set to
+       * numbers::invalid_unsigned_int (the default value), then the number of
+       * threads is determined automatically in the following way: the number
+       * of threads to run on this MPI process is set in such a way that all
+       * of the cores in your node are spoken for. In other words, if you have
+       * started one MPI process per node, setting this argument is equivalent
+       * to setting it to the number of cores present in the node this MPI
+       * process runs on. If you have started as many MPI processes per node
+       * as there are cores on each node, then this is equivalent to passing 1
+       * as the argument. On the other hand, if, for example, you start 4 MPI
+       * processes on each 16-core node, then this option will start 4 worker
+       * threads for each node. If you start 3 processes on an 8 core node,
+       * then they will start 3, 3 and 2 threads, respectively.
+       *
+       * @note This function calls MultithreadInfo::set_thread_limit() with
+       * either @p max_num_threads or, following the discussion above, a
+       * number of threads equal to the number of cores allocated to this MPI
+       * process. However, MultithreadInfo::set_thread_limit() in turn also
+       * evaluates the environment variable DEAL_II_NUM_THREADS. Finally, the
+       * worker threads can only be created on cores to which the current MPI
+       * process has access to; some MPI implementations limit the number of
+       * cores each process has access to to one or a subset of cores in order
+       * to ensure better cache behavior. Consequently, the number of threads
+       * that will really be created will be the minimum of the argument
+       * passed here, the environment variable (if set), and the number of
+       * cores accessible to the thread.
+       *
+       * @note MultithreadInfo::set_thread_limit() can only work if it is
+       * called before any threads are created. The safest place for a call to
+       * it is therefore at the beginning of <code>main()</code>.
+       * Consequently, this extends to the current class: the best place to
+       * create an object of this type is also at or close to the top of
+       * <code>main()</code>.
+       */
+      MPI_InitFinalize (int    &argc,
+                        char ** &argv,
+                        const unsigned int max_num_threads = numbers::invalid_unsigned_int);
+
+      /**
+       * Destructor. Calls <tt>MPI_Finalize()</tt> in case this class owns the
+       * MPI process.
+       */
+      ~MPI_InitFinalize();
+    };
+
+    /**
+     * Return whether (i) deal.II has been compiled to support MPI (for
+     * example by compiling with <code>CXX=mpiCC</code>) and if so whether
+     * (ii) <code>MPI_Init()</code> has been called (for example using the
+     * Utilities::MPI::MPI_InitFinalize class). In other words, the result
+     * indicates whether the current job is running under MPI.
+     *
+     * @note The function does not take into account whether an MPI job
+     * actually runs on more than one processor or is, in fact, a single-node
+     * job that happens to run under MPI.
+     */
+    bool job_supports_mpi ();
+
+    namespace internal
+    {
+#ifdef DEAL_II_WITH_MPI
+      /**
+       * Return the corresponding MPI data type id for the argument given.
+       */
+      inline MPI_Datatype mpi_type_id (const int *)
+      {
+        return MPI_INT;
+      }
+
+
+      inline MPI_Datatype mpi_type_id (const long int *)
+      {
+        return MPI_LONG;
+      }
+
+
+      inline MPI_Datatype mpi_type_id (const unsigned int *)
+      {
+        return MPI_UNSIGNED;
+      }
+
+
+      inline MPI_Datatype mpi_type_id (const unsigned long int *)
+      {
+        return MPI_UNSIGNED_LONG;
+      }
+
+
+      inline MPI_Datatype mpi_type_id (const unsigned long long int *)
+      {
+        return MPI_UNSIGNED_LONG_LONG;
+      }
+
+
+      inline MPI_Datatype mpi_type_id (const float *)
+      {
+        return MPI_FLOAT;
+      }
+
+
+      inline MPI_Datatype mpi_type_id (const double *)
+      {
+        return MPI_DOUBLE;
+      }
+
+
+      inline MPI_Datatype mpi_type_id (const long double *)
+      {
+        return MPI_LONG_DOUBLE;
+      }
+#endif
+
+      template <typename T>
+      inline
+      T all_reduce (const MPI_Op &mpi_op,
+                    const T &t,
+                    const MPI_Comm &mpi_communicator)
+      {
+#ifdef DEAL_II_WITH_MPI
+        if (job_supports_mpi())
+          {
+            T output;
+            MPI_Allreduce (const_cast<void *>(static_cast<const void *>(&t)),
+                           &output, 1, internal::mpi_type_id(&t), mpi_op,
+                           mpi_communicator);
+            return output;
+          }
+        else
+#endif
+          {
+            (void)mpi_op;
+            (void)mpi_communicator;
+            return t;
+          }
+      }
+
+      template <typename T, unsigned int N>
+      inline
+      void all_reduce (const MPI_Op &mpi_op,
+                       const T (&values)[N],
+                       const MPI_Comm &mpi_communicator,
+                       T (&output)[N])
+      {
+#ifdef DEAL_II_WITH_MPI
+        if (job_supports_mpi())
+          {
+            MPI_Allreduce ((&values[0] != &output[0]
+                            ?
+                            const_cast<void *>(static_cast<const void *>(&values[0]))
+                            :
+                            MPI_IN_PLACE),
+                           &output[0], N, internal::mpi_type_id(values), mpi_op,
+                           mpi_communicator);
+          }
+        else
+#endif
+          {
+            (void)mpi_op;
+            (void)mpi_communicator;
+            for (unsigned int i=0; i<N; ++i)
+              output[i] = values[i];
+          }
+      }
+
+      template <typename T>
+      inline
+      void all_reduce (const MPI_Op &mpi_op,
+                       const std::vector<T> &values,
+                       const MPI_Comm       &mpi_communicator,
+                       std::vector<T>       &output)
+      {
+#ifdef DEAL_II_WITH_MPI
+        if (job_supports_mpi())
+          {
+            output.resize (values.size());
+            MPI_Allreduce ((&values[0] != &output[0]
+                            ?
+                            const_cast<void *>(static_cast<const void *>(&values[0]))
+                            :
+                            MPI_IN_PLACE),
+                           &output[0], values.size(), internal::mpi_type_id((T *)0), mpi_op,
+                           mpi_communicator);
+          }
+        else
+#endif
+          {
+            (void)mpi_op;
+            (void)mpi_communicator;
+            output = values;
+          }
+      }
+
+      template <typename T>
+      inline
+      void all_reduce (const MPI_Op    &mpi_op,
+                       const Vector<T> &values,
+                       const MPI_Comm  &mpi_communicator,
+                       Vector<T>  &output)
+      {
+#ifdef DEAL_II_WITH_MPI
+        if (job_supports_mpi())
+          {
+            if (values.begin() != output.begin())
+              output.reinit (values.size());
+
+            MPI_Allreduce ((values.begin() != output.begin()
+                            ?
+                            const_cast<void *>(static_cast<const void *>(values.begin()))
+                            :
+                            MPI_IN_PLACE),
+                           output.begin(), values.size(), internal::mpi_type_id((T *)0), mpi_op,
+                           mpi_communicator);
+          }
+        else
+#endif
+          {
+            (void)mpi_op;
+            (void)mpi_communicator;
+            output = values;
+          }
+      }
+
+
+
+    }
+
+
+    template <typename T>
+    inline
+    T sum (const T &t,
+           const MPI_Comm &mpi_communicator)
+    {
+      return internal::all_reduce(MPI_SUM, t, mpi_communicator);
+    }
+
+
+    template <typename T, unsigned int N>
+    inline
+    void sum (const T (&values)[N],
+              const MPI_Comm &mpi_communicator,
+              T (&sums)[N])
+    {
+      internal::all_reduce(MPI_SUM, values, mpi_communicator, sums);
+    }
+
+
+    template <typename T>
+    inline
+    void sum (const std::vector<T> &values,
+              const MPI_Comm       &mpi_communicator,
+              std::vector<T>       &sums)
+    {
+      internal::all_reduce(MPI_SUM, values, mpi_communicator, sums);
+    }
+
+    template <typename T>
+    inline
+    void sum (const Vector<T> &values,
+              const MPI_Comm &mpi_communicator,
+              Vector<T> &sums)
+    {
+      internal::all_reduce(MPI_SUM, values, mpi_communicator, sums);
+    }
+
+
+    template <int rank, int dim, typename Number>
+    inline
+    Tensor<rank,dim,Number>
+    sum (const Tensor<rank,dim,Number> &local,
+         const MPI_Comm &mpi_communicator)
+    {
+      const unsigned int n_entries = Tensor<rank,dim,Number>::n_independent_components;
+      Number entries[ Tensor<rank,dim,Number>::n_independent_components ];
+
+      for (unsigned int i=0; i< n_entries; ++i)
+        entries[i] = local[ local.unrolled_to_component_indices(i) ];
+
+      Number global_entries[ Tensor<rank,dim,Number>::n_independent_components ];
+      dealii::Utilities::MPI::sum( entries, mpi_communicator, global_entries );
+
+      Tensor<rank,dim,Number> global;
+      for (unsigned int i=0; i< n_entries; ++i)
+        global[ global.unrolled_to_component_indices(i) ] = global_entries[i];
+
+      return global;
+    }
+
+    template <int rank, int dim, typename Number>
+    inline
+    SymmetricTensor<rank,dim,Number>
+    sum (const SymmetricTensor<rank,dim,Number> &local,
+         const MPI_Comm &mpi_communicator)
+    {
+      const unsigned int n_entries = SymmetricTensor<rank,dim,Number>::n_independent_components;
+      Number entries[ SymmetricTensor<rank,dim,Number>::n_independent_components ];
+
+      for (unsigned int i=0; i< n_entries; ++i)
+        entries[i] = local[ local.unrolled_to_component_indices(i) ];
+
+      Number global_entries[ SymmetricTensor<rank,dim,Number>::n_independent_components ];
+      dealii::Utilities::MPI::sum( entries, mpi_communicator, global_entries );
+
+      SymmetricTensor<rank,dim,Number> global;
+      for (unsigned int i=0; i< n_entries; ++i)
+        global[ global.unrolled_to_component_indices(i) ] = global_entries[i];
+
+      return global;
+    }
+
+    template <typename T>
+    inline
+    T max (const T &t,
+           const MPI_Comm &mpi_communicator)
+    {
+      return internal::all_reduce(MPI_MAX, t, mpi_communicator);
+    }
+
+
+    template <typename T, unsigned int N>
+    inline
+    void max (const T (&values)[N],
+              const MPI_Comm &mpi_communicator,
+              T (&maxima)[N])
+    {
+      internal::all_reduce(MPI_MAX, values, mpi_communicator, maxima);
+    }
+
+
+    template <typename T>
+    inline
+    void max (const std::vector<T> &values,
+              const MPI_Comm       &mpi_communicator,
+              std::vector<T>       &maxima)
+    {
+      internal::all_reduce(MPI_MAX, values, mpi_communicator, maxima);
+    }
+
+
+    template <typename T>
+    inline
+    T min (const T &t,
+           const MPI_Comm &mpi_communicator)
+    {
+      return internal::all_reduce(MPI_MIN, t, mpi_communicator);
+    }
+
+
+    template <typename T, unsigned int N>
+    inline
+    void min (const T (&values)[N],
+              const MPI_Comm &mpi_communicator,
+              T (&minima)[N])
+    {
+      internal::all_reduce(MPI_MIN, values, mpi_communicator, minima);
+    }
+
+
+    template <typename T>
+    inline
+    void min (const std::vector<T> &values,
+              const MPI_Comm       &mpi_communicator,
+              std::vector<T>       &minima)
+    {
+      internal::all_reduce(MPI_MIN, values, mpi_communicator, minima);
+    }
+
+
+    inline
+    bool job_supports_mpi ()
+    {
+#ifdef DEAL_II_WITH_MPI
+      int MPI_has_been_started = 0;
+      MPI_Initialized(&MPI_has_been_started);
+
+      return (MPI_has_been_started > 0);
+#else
+      return false;
+#endif
+    }
+  } // end of namespace MPI
+} // end of namespace Utilities
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/multithread_info.h b/include/deal.II/base/multithread_info.h
new file mode 100644
index 0000000..d6d4821
--- /dev/null
+++ b/include/deal.II/base/multithread_info.h
@@ -0,0 +1,136 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__multithread_info_h
+#define dealii__multithread_info_h
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/types.h>
+#include <deal.II/base/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class provides information about the system which may be of use in
+ * multithreaded programs.  At the moment this is just the number of CPUs. If
+ * deal.II is compiled with multithreading support, some functions will use
+ * multiple threads for their action. Currently the library supports both
+ * thread-based and task-based parallelism.
+ * @ref threads
+ * describes the different uses of each. The default number of threads used
+ * for task-based parallel methods is selected automatically by the Threading
+ * Building Blocks library. See
+ * @ref threads
+ * for more information on this.  Thread-based parallel methods need to
+ * explicitly create threads and may want to use a number of threads that is
+ * related to the number of CPUs in your system. The recommended number of
+ * threads can be queried using MultithreadInfo::n_threads(), while the number
+ * of cores in the system is returned by MultithreadInfo::n_cores().
+ *
+ * @ingroup threads
+ * @author Thomas Richter, Wolfgang Bangerth, 2000
+ */
+class MultithreadInfo
+{
+public:
+  /**
+   * The number of CPUs in the system. At the moment detection of CPUs is only
+   * implemented on Linux, FreeBSD, and Mac computers.  It is one if detection
+   * failed or is not implemented on your system.
+   *
+   * If it is one, although you are on a multi-processor machine, please refer
+   * to the documentation in <tt>multithread_info.cc</tt> near to the
+   * <tt>error</tt> directive.
+   */
+  static unsigned int n_cores ();
+
+  /**
+   * Returns the number of threads to use. This is initially set to the number
+   * of cores the system has (see n_cores()) but can be further restricted by
+   * set_thread_limit() and the environment variable DEAL_II_NUM_THREADS.
+   */
+  static unsigned int n_threads ();
+
+  /**
+   * Return an estimate for the memory consumption, in bytes, of this object.
+   * This is not exact (but will usually be close) because calculating the
+   * memory usage of trees (e.g., <tt>std::map</tt>) is difficult.
+   */
+  static std::size_t memory_consumption ();
+
+  /**
+   * Set the maximum number of threads to be used to the minimum of the
+   * environment variable DEAL_II_NUM_THREADS and the given argument (or its
+   * default value). This affects the initialization of the TBB. If neither is
+   * given, the default from TBB is used (based on the number of cores in the
+   * system).
+   *
+   * This routine is executed automatically with the default argument before
+   * your code in main() is running (using a static constructor). It is also
+   * executed by MPI_InitFinalize. Use the appropriate argument of the
+   * constructor of MPI_InitFinalize if you have an MPI based code.
+   */
+  static void set_thread_limit (const unsigned int max_threads = numbers::invalid_unsigned_int);
+
+  /**
+   * Returns if the TBB is running using a single thread either because of
+   * thread affinity or because it is set via a call to set_thread_limit. This
+   * is used in the PETScWrappers to avoid using the interface that is not
+   * thread-safe.
+   */
+  static bool is_running_single_threaded ();
+
+  /**
+   * Exception
+   */
+  DeclException0(ExcProcNotPresent);
+
+private:
+
+
+  /**
+   * Constructor made private because no instance of this class needs to be
+   * constructed as all members are static.
+   */
+  MultithreadInfo ();
+
+  /**
+   * Private function to determine the number of CPUs. Implementation for
+   * Linux, OSF, SGI, and Sun machines; if no detection of the number of CPUs
+   * is supported, or if detection fails, this function returns one.
+   */
+  static unsigned int get_n_cpus ();
+
+  /**
+   * Variable representing the maximum number of threads.
+   */
+  static unsigned int n_max_threads;
+
+  /**
+   * Variable representing the number of cores in the system. This is computed
+   * by get_n_cpus() and is returned by n_cores().
+   */
+  static const unsigned int n_cpus;
+};
+
+
+
+//---------------------------------------------------------------------------
+DEAL_II_NAMESPACE_CLOSE
+// end of #ifndef dealii__multithread_info_h
+#endif
+//---------------------------------------------------------------------------
diff --git a/include/deal.II/base/numbers.h b/include/deal.II/base/numbers.h
new file mode 100644
index 0000000..40fd69c
--- /dev/null
+++ b/include/deal.II/base/numbers.h
@@ -0,0 +1,360 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__numbers_h
+#define dealii__numbers_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/types.h>
+
+#include <cmath>
+#include <cstdlib>
+#include <complex>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Namespace for the declaration of universal constants. Since the
+ * availability in <tt>math.h</tt> is not always guaranteed, we put them here.
+ * Since this file is included by <tt>base/config.h</tt>, they are available
+ * to the whole library.
+ *
+ * The constants defined here are a subset of the <tt>M_XXX</tt> constants
+ * sometimes declared in the system include file <tt>math.h</tt>, but without
+ * the prefix <tt>M_</tt>.
+ *
+ * In addition to that, we declare  <tt>invalid_unsigned_int</tt> to be the
+ * largest unsigned integer representable; this value is widely used in the
+ * library as a marker for an invalid index, an invalid size of an array, and
+ * similar purposes.
+ */
+namespace numbers
+{
+  /**
+   * e
+   */
+  static const double  E       = 2.7182818284590452354;
+
+  /**
+   * log_2 e
+   */
+  static const double  LOG2E   = 1.4426950408889634074;
+
+  /**
+   * log_10 e
+   */
+  static const double  LOG10E  = 0.43429448190325182765;
+
+  /**
+   * log_e 2
+   */
+  static const double  LN2     = 0.69314718055994530942;
+
+  /**
+   * log_e 10
+   */
+  static const double  LN10    = 2.30258509299404568402;
+
+  /**
+   * pi
+   */
+  static const double  PI      = 3.14159265358979323846;
+
+  /**
+   * pi/2
+   */
+  static const double  PI_2    = 1.57079632679489661923;
+
+  /**
+   * pi/4
+   */
+  static const double  PI_4    = 0.78539816339744830962;
+
+  /**
+   * sqrt(2)
+   */
+  static const double  SQRT2   = 1.41421356237309504880;
+
+  /**
+   * 1/sqrt(2)
+   */
+  static const double  SQRT1_2 = 0.70710678118654752440;
+
+  /**
+   * Check whether a value is not a number.
+   *
+   * This function uses either <code>std::isnan</code>, <code>isnan</code>, or
+   * <code>_isnan</code>, whichever is available on the system and returns the
+   * result.
+   *
+   * If none of the functions detecting NaN is available, this function
+   * returns false.
+   */
+  bool is_nan (const double x);
+
+  /**
+   * Return @p true if the given value is a finite floating point number, i.e.
+   * is neither plus or minus infinity nor NaN (not a number).
+   *
+   * Note that the argument type of this function is <code>double</code>. In
+   * other words, if you give a very large number of type <code>long
+   * double</code>, this function may return <code>false</code> even if the
+   * number is finite with respect to type <code>long double</code>.
+   */
+  bool is_finite (const double x);
+
+  /**
+   * Return @p true if real and imaginary parts of the given complex number
+   * are finite.
+   */
+  bool is_finite (const std::complex<double> &x);
+
+  /**
+   * Return @p true if real and imaginary parts of the given complex number
+   * are finite.
+   */
+  bool is_finite (const std::complex<float> &x);
+
+  /**
+   * Return @p true if real and imaginary parts of the given complex number
+   * are finite.
+   *
+   * Again may not work correctly if real or imaginary parts are very large
+   * numbers that are infinite in terms of <code>double</code>, but finite
+   * with respect to <code>long double</code>.
+   */
+  bool is_finite (const std::complex<long double> &x);
+
+  /**
+   * A structure that, together with its partial specializations
+   * NumberTraits<std::complex<number> >, provides traits and member functions
+   * that make it possible to write templates that work on both real number
+   * types and complex number types. This template is mostly used to implement
+   * linear algebra classes such as vectors and matrices that work for both
+   * real and complex numbers.
+   *
+   * @author Wolfgang Bangerth, 2007
+   */
+  template <typename number>
+  struct NumberTraits
+  {
+    /**
+     * A flag that specifies whether the template type given to this class is
+     * complex or real. Since the general template is selected for non-complex
+     * types, the answer is <code>false</code>.
+     */
+    static const bool is_complex = false;
+
+    /**
+     * For this data type, typedef the corresponding real type. Since the
+     * general template is selected for all data types that are not
+     * specializations of std::complex<T>, the underlying type must be real-
+     * values, so the real_type is equal to the underlying type.
+     */
+    typedef number real_type;
+
+    /**
+     * Return the complex-conjugate of the given number. Since the general
+     * template is selected if number is not a complex data type, this
+     * function simply returns the given number.
+     */
+    static
+    const number &conjugate (const number &x);
+
+    /**
+     * Return the square of the absolute value of the given number. Since the
+     * general template is chosen for types not equal to std::complex, this
+     * function simply returns the square of the given number.
+     */
+    static
+    real_type abs_square (const number &x);
+
+    /**
+     * Return the absolute value of a number.
+     */
+    static
+    real_type abs (const number &x);
+  };
+
+
+  /**
+   * Specialization of the general NumberTraits class that provides the
+   * relevant information if the underlying data type is std::complex<T>.
+   *
+   * @author Wolfgang Bangerth, 2007
+   */
+  template <typename number>
+  struct NumberTraits<std::complex<number> >
+  {
+    /**
+     * A flag that specifies whether the template type given to this class is
+     * complex or real. Since this specialization of the general template is
+     * selected for complex types, the answer is <code>true</code>.
+     */
+    static const bool is_complex = true;
+
+    /**
+     * For this data type, typedef the corresponding real type. Since this
+     * specialization of the template is selected for number types
+     * std::complex<T>, the real type is equal to the type used to store the
+     * two components of the complex number.
+     */
+    typedef number real_type;
+
+    /**
+     * Return the complex-conjugate of the given number.
+     */
+    static
+    std::complex<number> conjugate (const std::complex<number> &x);
+
+    /**
+     * Return the square of the absolute value of the given number. Since this
+     * specialization of the general template is chosen for types equal to
+     * std::complex, this function returns the product of a number and its
+     * complex conjugate.
+     */
+    static
+    real_type abs_square (const std::complex<number> &x);
+
+
+    /**
+     * Return the absolute value of a complex number.
+     */
+    static
+    real_type abs (const std::complex<number> &x);
+  };
+
+  // --------------- inline and template functions ---------------- //
+
+  inline bool is_nan (const double x)
+  {
+#ifdef DEAL_II_HAVE_STD_ISNAN
+    return std::isnan(x);
+#elif defined(DEAL_II_HAVE_ISNAN)
+    return isnan(x);
+#elif defined(DEAL_II_HAVE_UNDERSCORE_ISNAN)
+    return _isnan(x);
+#else
+    return false;
+#endif
+  }
+
+  inline bool is_finite (const double x)
+  {
+#ifdef DEAL_II_HAVE_ISFINITE
+    return !is_nan(x) && std::isfinite (x);
+#else
+    // Check against infinities. Note
+    // that if x is a NaN, then both
+    // comparisons will be false
+    return ((x >= -std::numeric_limits<double>::max())
+            &&
+            (x <= std::numeric_limits<double>::max()));
+#endif
+  }
+
+
+
+  inline bool is_finite (const std::complex<double> &x)
+  {
+    // Check complex numbers for infinity
+    // by testing real and imaginary part
+    return ( is_finite (x.real())
+             &&
+             is_finite (x.imag()) );
+  }
+
+
+
+  inline bool is_finite (const std::complex<float> &x)
+  {
+    // Check complex numbers for infinity
+    // by testing real and imaginary part
+    return ( is_finite (x.real())
+             &&
+             is_finite (x.imag()) );
+  }
+
+
+
+  inline bool is_finite (const std::complex<long double> &x)
+  {
+    // Same for std::complex<long double>
+    return ( is_finite (x.real())
+             &&
+             is_finite (x.imag()) );
+  }
+
+
+  template <typename number>
+  const number &
+  NumberTraits<number>::conjugate (const number &x)
+  {
+    return x;
+  }
+
+
+
+  template <typename number>
+  typename NumberTraits<number>::real_type
+  NumberTraits<number>::abs_square (const number &x)
+  {
+    return x * x;
+  }
+
+
+
+  template <typename number>
+  typename NumberTraits<number>::real_type
+  NumberTraits<number>::abs (const number &x)
+  {
+    return std::abs(x);
+  }
+
+
+
+  template <typename number>
+  std::complex<number>
+  NumberTraits<std::complex<number> >::conjugate (const std::complex<number> &x)
+  {
+    return std::conj(x);
+  }
+
+
+
+  template <typename number>
+  typename NumberTraits<std::complex<number> >::real_type
+  NumberTraits<std::complex<number> >::abs (const std::complex<number> &x)
+  {
+    return std::abs(x);
+  }
+
+
+
+  template <typename number>
+  typename NumberTraits<std::complex<number> >::real_type
+  NumberTraits<std::complex<number> >::abs_square (const std::complex<number> &x)
+  {
+    return std::norm (x);
+  }
+
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/parallel.h b/include/deal.II/base/parallel.h
new file mode 100644
index 0000000..164afa8
--- /dev/null
+++ b/include/deal.II/base/parallel.h
@@ -0,0 +1,785 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__parallel_h
+#define dealii__parallel_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/synchronous_iterator.h>
+
+#include <deal.II/base/std_cxx11/tuple.h>
+#include <deal.II/base/std_cxx11/bind.h>
+#include <deal.II/base/std_cxx11/function.h>
+
+#include <cstddef>
+
+#ifdef DEAL_II_WITH_THREADS
+#  include <tbb/parallel_for.h>
+#  include <tbb/parallel_reduce.h>
+#  include <tbb/partitioner.h>
+#  include <tbb/blocked_range.h>
+#endif
+
+
+//TODO[WB]: allow calling functions to pass along a tbb::affinity_partitioner object to ensure that subsequent calls use the same cache lines
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace parallel
+{
+  namespace internal
+  {
+    /**
+     * Helper struct to tell us if we can use SIMD instructions for the given
+     * @p Number type.
+     */
+    template <typename Number>
+    struct EnableOpenMPSimdFor
+    {
+      static const bool value = true;
+    };
+
+#ifdef __INTEL_COMPILER
+    // Disable long double SIMD instructions on ICC. This is to work around a bug
+    // that generates wrong code at least up to intel 15 (see
+    // tests/lac/vector-vector, tests/lac/intel-15-bug, and the discussion at
+    // https://github.com/dealii/dealii/issues/598).
+    template <>
+    struct EnableOpenMPSimdFor<long double>
+    {
+      static const bool value = false;
+    };
+#endif
+
+
+
+    /**
+     * Convert a function object of type F into an object that can be applied
+     * to all elements of a range of synchronous iterators.
+     */
+    template <typename F>
+    struct Body
+    {
+      /**
+       * Constructor. Take and package the given function object.
+       */
+      Body (const F &f)
+        :
+        f (f)
+      {}
+
+      template <typename Range>
+      void
+      operator () (const Range &range) const
+      {
+        for (typename Range::const_iterator p=range.begin();
+             p != range.end(); ++p)
+          apply (f, p.iterators);
+      }
+
+    private:
+      /**
+       * The stored function object.
+       */
+      const F f;
+
+      /**
+       * Apply F to a set of iterators with two elements.
+       */
+      template <typename I1, typename I2>
+      static
+      void
+      apply (const F &f,
+             const std_cxx11::tuple<I1,I2> &p)
+      {
+        *std_cxx11::get<1>(p) = f (*std_cxx11::get<0>(p));
+      }
+
+      /**
+       * Apply F to a set of iterators with three elements.
+       */
+      template <typename I1, typename I2, typename I3>
+      static
+      void
+      apply (const F &f,
+             const std_cxx11::tuple<I1,I2,I3> &p)
+      {
+        *std_cxx11::get<2>(p) = f (*std_cxx11::get<0>(p),
+                                   *std_cxx11::get<1>(p));
+      }
+
+      /**
+       * Apply F to a set of iterators with three elements.
+       */
+      template <typename I1, typename I2,
+                typename I3, typename I4>
+      static
+      void
+      apply (const F &f,
+             const std_cxx11::tuple<I1,I2,I3,I4> &p)
+      {
+        *std_cxx11::get<3>(p) = f (*std_cxx11::get<0>(p),
+                                   *std_cxx11::get<1>(p),
+                                   *std_cxx11::get<2>(p));
+      }
+    };
+
+
+    /**
+     * Take a function object and create a Body object from it. We do this in
+     * this helper function since alternatively we would have to specify the
+     * actual data type of F -- which for function objects is often
+     * extraordinarily complicated.
+     */
+    template <typename F>
+    Body<F> make_body(const F &f)
+    {
+      return Body<F>(f);
+    }
+  }
+
+  /**
+   * An algorithm that performs the action <code>*out++ =
+   * predicate(*in++)</code> where the <code>in</code> iterator ranges over
+   * the given input range.
+   *
+   * This algorithm does pretty much what std::transform does. The difference
+   * is that the function can run in parallel when deal.II is configured to
+   * use multiple threads.
+   *
+   * If running in parallel, the iterator range is split into several chunks
+   * that are each packaged up as a task and given to the Threading Building
+   * Blocks scheduler to work on as compute resources are available. The
+   * function returns once all chunks have been worked on. The last argument
+   * denotes the minimum number of elements of the iterator range per task;
+   * the number must be large enough to amortize the startup cost of new
+   * tasks, and small enough to ensure that tasks can be reasonably load
+   * balanced.
+   *
+   * For a discussion of the kind of problems to which this function is
+   * applicable, see the
+   * @ref threads "Parallel computing with multiple processors"
+   * module.
+   */
+  template <typename InputIterator,
+            typename OutputIterator,
+            typename Predicate>
+  void transform (const InputIterator &begin_in,
+                  const InputIterator &end_in,
+                  OutputIterator       out,
+                  Predicate           &predicate,
+                  const unsigned int   grainsize)
+  {
+#ifndef DEAL_II_WITH_THREADS
+    // make sure we don't get compiler
+    // warnings about unused arguments
+    (void) grainsize;
+
+    for (OutputIterator in = begin_in; in != end_in;)
+      *out++ = predicate (*in++);
+#else
+    typedef std_cxx11::tuple<InputIterator,OutputIterator> Iterators;
+    typedef SynchronousIterators<Iterators> SyncIterators;
+    Iterators x_begin (begin_in, out);
+    Iterators x_end (end_in, OutputIterator());
+    tbb::parallel_for (tbb::blocked_range<SyncIterators>(x_begin,
+                                                         x_end,
+                                                         grainsize),
+                       internal::make_body (predicate),
+                       tbb::auto_partitioner());
+#endif
+  }
+
+
+
+  /**
+   * An algorithm that performs the action <code>*out++ = predicate(*in1++,
+   * *in2++)</code> where the <code>in1</code> iterator ranges over the given
+   * input range, using the parallel for operator of tbb.
+   *
+   * This algorithm does pretty much what std::transform does. The difference
+   * is that the function can run in parallel when deal.II is configured to
+   * use multiple threads.
+   *
+   * If running in parallel, the iterator range is split into several chunks
+   * that are each packaged up as a task and given to the Threading Building
+   * Blocks scheduler to work on as compute resources are available. The
+   * function returns once all chunks have been worked on. The last argument
+   * denotes the minimum number of elements of the iterator range per task;
+   * the number must be large enough to amortize the startup cost of new
+   * tasks, and small enough to ensure that tasks can be reasonably load
+   * balanced.
+   *
+   * For a discussion of the kind of problems to which this function is
+   * applicable, see the
+   * @ref threads "Parallel computing with multiple processors"
+   * module.
+   */
+  template <typename InputIterator1,
+            typename InputIterator2,
+            typename OutputIterator,
+            typename Predicate>
+  void transform (const InputIterator1 &begin_in1,
+                  const InputIterator1 &end_in1,
+                  InputIterator2        in2,
+                  OutputIterator        out,
+                  Predicate            &predicate,
+                  const unsigned int    grainsize)
+  {
+#ifndef DEAL_II_WITH_THREADS
+    // make sure we don't get compiler
+    // warnings about unused arguments
+    (void) grainsize;
+
+    for (OutputIterator in1 = begin_in1; in1 != end_in1;)
+      *out++ = predicate (*in1++, *in2++);
+#else
+    typedef
+    std_cxx11::tuple<InputIterator1,InputIterator2,OutputIterator>
+    Iterators;
+    typedef SynchronousIterators<Iterators> SyncIterators;
+    Iterators x_begin (begin_in1, in2, out);
+    Iterators x_end (end_in1, InputIterator2(), OutputIterator());
+    tbb::parallel_for (tbb::blocked_range<SyncIterators>(x_begin,
+                                                         x_end,
+                                                         grainsize),
+                       internal::make_body (predicate),
+                       tbb::auto_partitioner());
+#endif
+  }
+
+
+
+  /**
+   * An algorithm that performs the action <code>*out++ = predicate(*in1++,
+   * *in2++, *in3++)</code> where the <code>in1</code> iterator ranges over
+   * the given input range.
+   *
+   * This algorithm does pretty much what std::transform does. The difference
+   * is that the function can run in parallel when deal.II is configured to
+   * use multiple threads.
+   *
+   * If running in parallel, the iterator range is split into several chunks
+   * that are each packaged up as a task and given to the Threading Building
+   * Blocks scheduler to work on as compute resources are available. The
+   * function returns once all chunks have been worked on. The last argument
+   * denotes the minimum number of elements of the iterator range per task;
+   * the number must be large enough to amortize the startup cost of new
+   * tasks, and small enough to ensure that tasks can be reasonably load
+   * balanced.
+   *
+   * For a discussion of the kind of problems to which this function is
+   * applicable, see the
+   * @ref threads "Parallel computing with multiple processors"
+   * module.
+   */
+  template <typename InputIterator1,
+            typename InputIterator2,
+            typename InputIterator3,
+            typename OutputIterator,
+            typename Predicate>
+  void transform (const InputIterator1 &begin_in1,
+                  const InputIterator1 &end_in1,
+                  InputIterator2        in2,
+                  InputIterator3        in3,
+                  OutputIterator        out,
+                  Predicate            &predicate,
+                  const unsigned int    grainsize)
+  {
+#ifndef DEAL_II_WITH_THREADS
+    // make sure we don't get compiler
+    // warnings about unused arguments
+    (void) grainsize;
+
+    for (OutputIterator in1 = begin_in1; in1 != end_in1;)
+      *out++ = predicate (*in1++, *in2++, *in3++);
+#else
+    typedef
+    std_cxx11::tuple<InputIterator1,InputIterator2,InputIterator3,OutputIterator>
+    Iterators;
+    typedef SynchronousIterators<Iterators> SyncIterators;
+    Iterators x_begin (begin_in1, in2, in3, out);
+    Iterators x_end (end_in1, InputIterator2(),
+                     InputIterator3(), OutputIterator());
+    tbb::parallel_for (tbb::blocked_range<SyncIterators>(x_begin,
+                                                         x_end,
+                                                         grainsize),
+                       internal::make_body (predicate),
+                       tbb::auto_partitioner());
+#endif
+  }
+
+
+  namespace internal
+  {
+#ifdef DEAL_II_WITH_THREADS
+    /**
+     * Take a range argument and call the given function with its begin and
+     * end.
+     */
+    template <typename RangeType, typename Function>
+    void apply_to_subranges (const tbb::blocked_range<RangeType> &range,
+                             const Function  &f)
+    {
+      f (range.begin(), range.end());
+    }
+#endif
+  }
+
+
+  /**
+   * This function applies the given function argument @p f to all elements in
+   * the range <code>[begin,end)</code> and may do so in parallel.
+   *
+   * However, in many cases it is not efficient to call a function on each
+   * element, so this function calls the given function object on sub-ranges.
+   * In other words: if the given range <code>[begin,end)</code> is smaller
+   * than grainsize or if multithreading is not enabled, then we call
+   * <code>f(begin,end)</code>; otherwise, we may execute, possibly in
+   * %parallel, a sequence of calls <code>f(b,e)</code> where
+   * <code>[b,e)</code> are subintervals of <code>[begin,end)</code> and the
+   * collection of calls we do to <code>f(.,.)</code> will happen on disjoint
+   * subintervals that collectively cover the original interval
+   * <code>[begin,end)</code>.
+   *
+   * Oftentimes, the called function will of course have to get additional
+   * information, such as the object to work on for a given value of the
+   * iterator argument. This can be achieved by <i>binding</i> certain
+   * arguments. For example, here is an implementation of a matrix-vector
+   * multiplication $y=Ax$ for a full matrix $A$ and vectors $x,y$:
+   * @code
+   *   void matrix_vector_product (const FullMatrix &A,
+   *                               const Vector     &x,
+   *                               Vector           &y)
+   *   {
+   *     parallel::apply_to_subranges
+   *        (0, A.n_rows(),
+   *         std_cxx11::bind (&mat_vec_on_subranges,
+   *                          std_cxx11::_1, std_cxx11::_2,
+   *                          std_cxx11::cref(A),
+   *                          std_cxx11::cref(x),
+   *                          std_cxx11::ref(y)),
+   *         50);
+   *   }
+   *
+   *   void mat_vec_on_subranges (const unsigned int begin_row,
+   *                              const unsigned int end_row,
+   *                              const FullMatrix &A,
+   *                              const Vector     &x,
+   *                              Vector           &y)
+   *   {
+   *     for (unsigned int row=begin_row; row!=end_row; ++row)
+   *       for (unsigned int col=0; col<x.size(); ++col)
+   *         y(row) += A(row,col) * x(col);
+   *   }
+   * @endcode
+   *
+   * Note how we use the <code>std_cxx11::bind</code> function to convert
+   * <code>mat_vec_on_subranges</code> from a function that takes 5 arguments
+   * to one taking 2 by binding the remaining arguments (the modifiers
+   * <code>std_cxx11::ref</code> and <code>std_cxx11::cref</code> make sure
+   * that the enclosed variables are actually passed by reference and constant
+   * reference, rather than by value). The resulting function object requires
+   * only two arguments, begin_row and end_row, with all other arguments
+   * fixed.
+   *
+   * The code, if in single-thread mode, will call
+   * <code>mat_vec_on_subranges</code> on the entire range
+   * <code>[0,n_rows)</code> exactly once. In multi-threaded mode, however, it
+   * may be called multiple times on subranges of this interval, possibly
+   * allowing more than one CPU core to take care of part of the work.
+   *
+   * The @p grainsize argument (50 in the example above) makes sure that
+   * subranges do not become too small, to avoid spending more time on
+   * scheduling subranges to CPU resources than on doing actual work.
+   *
+   * For a discussion of the kind of problems to which this function is
+   * applicable, see also the
+   * @ref threads "Parallel computing with multiple processors"
+   * module.
+   */
+  template <typename RangeType, typename Function>
+  void apply_to_subranges (const RangeType                          &begin,
+                           const typename identity<RangeType>::type &end,
+                           const Function                           &f,
+                           const unsigned int                        grainsize)
+  {
+#ifndef DEAL_II_WITH_THREADS
+    // make sure we don't get compiler
+    // warnings about unused arguments
+    (void) grainsize;
+
+#  ifndef DEAL_II_BIND_NO_CONST_OP_PARENTHESES
+    f (begin, end);
+#  else
+    // work around a problem with MS VC++ where there is no const
+    // operator() in 'Function' if 'Function' is the result of std::bind
+    Function ff = f;
+    ff (begin, end);
+#  endif
+#else
+    tbb::parallel_for (tbb::blocked_range<RangeType>
+                       (begin, end, grainsize),
+                       std_cxx11::bind (&internal::apply_to_subranges<RangeType,Function>,
+                                        std_cxx11::_1,
+                                        std_cxx11::cref(f)),
+                       tbb::auto_partitioner());
+#endif
+  }
+
+
+
+  /**
+   * This is a class specialized to for loops with a fixed range given by
+   * unsigned integers. This is an abstract base class that an actual worker
+   * function is derived from. There is a public function apply that issues a
+   * for loop in parallel, subdividing the work onto available processor cores
+   * whenever there is enough work to be done (i.e., the number of elements is
+   * larger than grain_size). Inside the function, a virtual function
+   * apply_to_subrange specifying a range of two integers <tt>[lower,
+   * upper)</tt> is called which needs to be defined in a derived class.
+   *
+   * The parallelization cases covered by this class are a subset of what is
+   * possible with the function apply_to_subranges (which also covers the case
+   * of more general iterators that might not be described by an integer
+   * range). However, for simple integer ranges one might prefer this class,
+   * like when there are many structurally similar loops, e.g., some simple
+   * copy or arithmetic operations on an array of pointers. In that case,
+   * apply_to_subranges will generate a lot of code (or rather, a lot of
+   * symbols) because it passes the long names generated by std::bind to the
+   * templated parallel for functions in TBB. This can considerably increase
+   * compile times and the size of the object code. Similarly, the incorrect
+   * use of std::bind often results in very cryptic error messages, which can
+   * be avoided by this class (only a virtual function needs to be defined in
+   * a derived class). Finally, the additional cost of a virtual function is
+   * negligible in the context of parallel functions: It is much more
+   * expensive to actually issue the work onto a thread, which in turn should
+   * be much less than the actual work done in the for loop.
+   */
+  struct ParallelForInteger
+  {
+    /**
+     * Destructor. Made virtual to ensure that derived classes also have
+     * virtual destructors.
+     */
+    virtual ~ParallelForInteger ();
+
+    /**
+     * This function runs the for loop over the given range
+     * <tt>[lower,upper)</tt>, possibly in parallel when end-begin is larger
+     * than the minimum parallel grain size. This function is marked const
+     * because it any operation that changes the data of a derived class will
+     * inherently not be thread-safe when several threads work with the same
+     * data simultaneously.
+     */
+    void apply_parallel (const std::size_t begin,
+                         const std::size_t end,
+                         const std::size_t minimum_parallel_grain_size) const;
+
+    /**
+     * Virtual function for working on subrange to be defined in a derived
+     * class.  This function is marked const because it any operation that
+     * changes the data of a derived class will inherently not be thread-safe
+     * when several threads work with the same data simultaneously.
+     */
+    virtual void apply_to_subrange (const std::size_t,
+                                    const std::size_t) const = 0;
+  };
+
+
+
+  namespace internal
+  {
+#ifdef DEAL_II_WITH_THREADS
+    /**
+     * A class that conforms to the Body requirements of the TBB
+     * parallel_reduce function. The first template argument denotes the type
+     * on which the reduction is to be done. The second denotes the type of
+     * the function object that shall be called for each subrange.
+     */
+    template <typename ResultType,
+              typename Function>
+    struct ReductionOnSubranges
+    {
+      /**
+       * A variable that will hold the result of the reduction.
+       */
+      ResultType result;
+
+      /**
+       * Constructor. Take the function object to call on each sub-range as
+       * well as the neutral element with respect to the reduction operation.
+       *
+       * The second argument denotes a function object that will be used to
+       * reduce the result of two computations into one number. An example if
+       * we want to simply accumulate integer results would be
+       * std::plus<int>().
+       */
+      template <typename Reductor>
+      ReductionOnSubranges (const Function &f,
+                            const Reductor &reductor,
+                            const ResultType neutral_element = ResultType())
+        :
+        result (neutral_element),
+        f (f),
+        neutral_element (neutral_element),
+        reductor (reductor)
+      {}
+
+      /**
+       * Splitting constructor. See the TBB book for more details about this.
+       */
+      ReductionOnSubranges (const ReductionOnSubranges &r,
+                            tbb::split)
+        :
+        result (r.neutral_element),
+        f (r.f),
+        neutral_element (r.neutral_element),
+        reductor (r.reductor)
+      {}
+
+      /**
+       * Join operation: merge the results from computations on different sub-
+       * intervals.
+       */
+      void join (const ReductionOnSubranges &r)
+      {
+        result = reductor(result, r.result);
+      }
+
+      /**
+       * Execute the given function on the specified range.
+       */
+      template <typename RangeType>
+      void operator () (const tbb::blocked_range<RangeType> &range)
+      {
+        result = reductor(result,
+                          f (range.begin(), range.end()));
+      }
+
+    private:
+      /**
+       * The function object to call on every sub-range.
+       */
+      const Function f;
+
+      /**
+       * The neutral element with respect to the reduction operation. This is
+       * needed when calling the splitting constructor since we have to re-set
+       * the result variable in this case.
+       */
+      const ResultType neutral_element;
+
+      /**
+       * The function object to be used to reduce the result of two calls into
+       * one number.
+       */
+      const std_cxx11::function<ResultType (ResultType, ResultType)> reductor;
+    };
+#endif
+  }
+
+
+  /**
+   * This function works a lot like the apply_to_subranges(), but it allows to
+   * accumulate numerical results computed on each subrange into one number.
+   * The type of this number is given by the ResultType template argument that
+   * needs to be explicitly specified.
+   *
+   * An example of use of this function is to compute the value of the
+   * expression $x^T A x$ for a square matrix $A$ and a vector $x$. The sum
+   * over rows can be parallelized and the whole code might look like this:
+   * @code
+   *   void matrix_norm (const FullMatrix &A,
+   *                     const Vector     &x)
+   *   {
+   *     return
+   *      std::sqrt
+   *       (parallel::accumulate_from_subranges<double>
+   *        (0, A.n_rows(),
+   *         std_cxx11::bind (&mat_norm_sqr_on_subranges,
+   *                          std_cxx11::_1, std_cxx11::_2,
+   *                          std_cxx11::cref(A),
+   *                          std_cxx11::cref(x)),
+   *         50);
+   *   }
+   *
+   *   double
+   *   mat_norm_sqr_on_subranges (const unsigned int begin_row,
+   *                              const unsigned int end_row,
+   *                              const FullMatrix &A,
+   *                              const Vector     &x)
+   *   {
+   *     double norm_sqr = 0;
+   *     for (unsigned int row=begin_row; row!=end_row; ++row)
+   *       for (unsigned int col=0; col<x.size(); ++col)
+   *         norm_sqr += x(row) * A(row,col) * x(col);
+   *     return norm_sqr;
+   *   }
+   * @endcode
+   *
+   * Here, <code>mat_norm_sqr_on_subranges</code> is called on the entire
+   * range <code>[0,A.n_rows())</code> if this range is less than the minimum
+   * grainsize (above chosen as 50) or if deal.II is configured to not use
+   * multithreading. Otherwise, it may be called on subsets of the given
+   * range, with results from the individual subranges accumulated internally.
+   *
+   * @warning If ResultType is a floating point type, then accumulation is not
+   * an associative operation. In other words, if the given function object is
+   * called three times on three subranges, returning values $a,b,c$, then the
+   * returned result of this function is $(a+b)+c$. However, depending on how
+   * the three sub-tasks are distributed on available CPU resources, the
+   * result may also be $(a+c)+b$ or any other permutation; because floating
+   * point addition is not associative (as opposed, of course, to addition of
+   * real %numbers), the result of invoking this function several times may
+   * differ on the order of round-off.
+   *
+   * For a discussion of the kind of problems to which this function is
+   * applicable, see also the
+   * @ref threads "Parallel computing with multiple processors"
+   * module.
+   */
+  template <typename ResultType, typename RangeType, typename Function>
+  ResultType accumulate_from_subranges (const Function &f,
+                                        const RangeType                          &begin,
+                                        const typename identity<RangeType>::type &end,
+                                        const unsigned int grainsize)
+  {
+#ifndef DEAL_II_WITH_THREADS
+    // make sure we don't get compiler
+    // warnings about unused arguments
+    (void) grainsize;
+
+#  ifndef DEAL_II_BIND_NO_CONST_OP_PARENTHESES
+    return f (begin, end);
+#  else
+    // work around a problem with MS VC++ where there is no const
+    // operator() in 'Function' if 'Function' is the result of std::bind
+    Function ff = f;
+    return ff (begin, end);
+#  endif
+#else
+    internal::ReductionOnSubranges<ResultType,Function>
+    reductor (f, std::plus<ResultType>(), 0);
+    tbb::parallel_reduce (tbb::blocked_range<RangeType>(begin, end, grainsize),
+                          reductor,
+                          tbb::auto_partitioner());
+    return reductor.result;
+#endif
+  }
+
+}
+
+
+namespace internal
+{
+  namespace Vector
+  {
+    /**
+     * If we do computations on vectors in parallel (say, we add two vectors
+     * to get a third, and we do the loop over all elements in parallel), then
+     * this variable determines the minimum number of elements for which it is
+     * profitable to split a range of elements any further to distribute to
+     * different threads.
+     *
+     * This variable is available as a global writable variable in order to
+     * allow the testsuite to also test the parallel case. By default, it is
+     * set to several thousand elements, which is a case that the testsuite
+     * would not normally encounter. As a consequence, in the testsuite we set
+     * it to one -- a value that's hugely unprofitable but definitely tests
+     * parallel operations.
+     */
+    extern unsigned int minimum_parallel_grain_size;
+  }
+
+
+  namespace SparseMatrix
+  {
+    /**
+     * Like internal::Vector::minimum_parallel_grain_size, but now denoting
+     * the number of rows of a matrix that should be worked on as a minimum.
+     */
+    extern unsigned int minimum_parallel_grain_size;
+  }
+
+} // end of namespace internal
+
+
+/* --------------------------- inline functions ------------------------- */
+
+namespace parallel
+{
+
+#ifdef DEAL_II_WITH_THREADS
+
+  namespace internal
+  {
+    /**
+     * This is the function actually called by TBB for the ParallelForInteger
+     * class.
+     */
+    struct ParallelForWrapper
+    {
+      ParallelForWrapper (const parallel::ParallelForInteger &worker)
+        :
+        worker_ (worker)
+      {}
+
+      void operator() (const tbb::blocked_range<std::size_t> &range) const
+      {
+        worker_.apply_to_subrange (range.begin(), range.end());
+      }
+
+      const parallel::ParallelForInteger &worker_;
+    };
+  }
+
+#endif
+
+
+  inline
+  ParallelForInteger::~ParallelForInteger ()
+  {}
+
+
+  inline
+  void
+  ParallelForInteger::apply_parallel (const std::size_t begin,
+                                      const std::size_t end,
+                                      const std::size_t minimum_parallel_grain_size) const
+  {
+#ifndef DEAL_II_WITH_THREADS
+    // make sure we don't get compiler
+    // warnings about unused arguments
+    (void) minimum_parallel_grain_size;
+
+    apply_to_subrange (begin, end);
+#else
+    internal::ParallelForWrapper worker(*this);
+    tbb::parallel_for (tbb::blocked_range<std::size_t>
+                       (begin, end, minimum_parallel_grain_size),
+                       worker,
+                       tbb::auto_partitioner());
+#endif
+  }
+
+} // end of namespace parallel
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/parameter_handler.h b/include/deal.II/base/parameter_handler.h
new file mode 100644
index 0000000..b7be6c1
--- /dev/null
+++ b/include/deal.II/base/parameter_handler.h
@@ -0,0 +1,2444 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__parameter_handler_h
+#define dealii__parameter_handler_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+
+#include <boost/property_tree/ptree_fwd.hpp>
+#include <boost/serialization/split_member.hpp>
+
+#include <map>
+#include <vector>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations for interfaces and friendship
+class LogStream;
+class MultipleParameterLoop;
+
+
+/**
+ * Namespace for a few classes that act as patterns for the ParameterHandler
+ * class. These classes implement an interface that checks whether a parameter
+ * in an input file matches a certain pattern, such as "being boolean", "an
+ * integer value", etc.
+ *
+ * @ingroup input
+ */
+namespace Patterns
+{
+
+  /**
+   * Base class to declare common interface. The purpose of this class is
+   * mostly to define the interface of patterns, and to force derived classes
+   * to have a <tt>clone</tt> function. It is thus, in the languages of the
+   * "Design Patterns" book (Gamma et al.), a "prototype".
+   */
+  class PatternBase
+  {
+  public:
+    /**
+     * Make destructor of this and all derived classes virtual.
+     */
+    virtual ~PatternBase ();
+
+    /**
+     * Return <tt>true</tt> if the given string matches the pattern.
+     */
+    virtual bool match (const std::string &test_string) const = 0;
+
+    /**
+     * Return a string describing the pattern.
+     */
+    virtual std::string description () const = 0;
+
+    /**
+     * Return a pointer to an exact copy of the object. This is necessary
+     * since we want to store objects of this type in containers, were we need
+     * to copy objects without knowledge of their actual data type (we only
+     * have pointers to the base class).
+     *
+     * Ownership of the objects returned by this function is passed to the
+     * caller of this function.
+     */
+    virtual PatternBase *clone () const = 0;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object. To avoid unnecessary overhead, we do not force derived classes
+     * to provide this function as a virtual overloaded one, but rather try to
+     * cast the present object to one of the known derived classes and if that
+     * fails then take the size of this base class instead and add 32 byte
+     * (this value is arbitrary, it should account for virtual function
+     * tables, and some possible data elements). Since there are usually not
+     * many thousands of objects of this type around, and since the
+     * memory_consumption mechanism is used to find out where memory in the
+     * range of many megabytes is, this seems like a reasonable approximation.
+     *
+     * On the other hand, if you know that your class deviates from this
+     * assumption significantly, you can still overload this function.
+     */
+    virtual std::size_t memory_consumption () const;
+  };
+
+  /**
+   * Returns pointer to the correct derived class based on description.
+   */
+  PatternBase *pattern_factory (const std::string &description);
+
+  /**
+   * Test for the string being an integer. If bounds are given to the
+   * constructor, then the integer given also needs to be within the interval
+   * specified by these bounds. Note that unlike common convention in the C++
+   * standard library, both bounds of this interval are inclusive; the reason
+   * is that in practice in most cases, one needs closed intervals, but these
+   * can only be realized with inclusive bounds for non-integer values. We
+   * thus stay consistent by always using closed intervals.
+   *
+   * If the upper bound given to the constructor is smaller than the lower
+   * bound, then the infinite interval is implied, i.e. every integer is
+   * allowed.
+   *
+   * Giving bounds may be useful if for example a value can only be positive
+   * and less than a reasonable upper bound (for example the number of
+   * refinement steps to be performed), or in many other cases.
+   */
+  class Integer : public PatternBase
+  {
+  public:
+    /**
+     * Minimal integer value. If the numeric_limits class is available use
+     * this information to obtain the extremal values, otherwise set it so
+     * that this class understands that all values are allowed.
+     */
+    static const int min_int_value;
+
+    /**
+     * Maximal integer value. If the numeric_limits class is available use
+     * this information to obtain the extremal values, otherwise set it so
+     * that this class understands that all values are allowed.
+     */
+    static const int max_int_value;
+
+    /**
+     * Constructor. Bounds can be specified within which a valid parameter has
+     * to be. If the upper bound is smaller than the lower bound, then the
+     * infinite interval is meant. The default values are chosen such that no
+     * bounds are enforced on parameters.
+     */
+    Integer (const int lower_bound = min_int_value,
+             const int upper_bound = max_int_value);
+
+    /**
+     * Return <tt>true</tt> if the string is an integer and its value is
+     * within the specified range.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match. If bounds were specified to the constructor, then include them
+     * into this description.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static Integer *create (const std::string &description);
+
+  private:
+    /**
+     * Value of the lower bound. A number that satisfies the
+     * @ref match
+     * operation of this class must be equal to this value or larger, if the
+     * bounds of the interval for a valid range.
+     */
+    const int lower_bound;
+
+    /**
+     * Value of the upper bound. A number that satisfies the
+     * @ref match
+     * operation of this class must be equal to this value or less, if the
+     * bounds of the interval for a valid range.
+     */
+    const int upper_bound;
+
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+  /**
+   * Test for the string being a <tt>double</tt>. If bounds are given to the
+   * constructor, then the integer given also needs to be within the interval
+   * specified by these bounds. Note that unlike common convention in the C++
+   * standard library, both bounds of this interval are inclusive; the reason
+   * is that in practice in most cases, one needs closed intervals, but these
+   * can only be realized with inclusive bounds for non-integer values. We
+   * thus stay consistent by always using closed intervals.
+   *
+   * If the upper bound given to the constructor is smaller than the lower
+   * bound, then the infinite interval is implied, i.e. every integer is
+   * allowed.
+   *
+   * Giving bounds may be useful if for example a value can only be positive
+   * and less than a reasonable upper bound (for example damping parameters
+   * are frequently only reasonable if between zero and one), or in many other
+   * cases.
+   */
+  class Double : public PatternBase
+  {
+  public:
+    /**
+     * Minimal double value. If the <tt>std::numeric_limits</tt> class is
+     * available use this information to obtain the extremal values, otherwise
+     * set it so that this class understands that all values are allowed.
+     */
+    static const double min_double_value;
+
+    /**
+     * Maximal double value. If the numeric_limits class is available use this
+     * information to obtain the extremal values, otherwise set it so that
+     * this class understands that all values are allowed.
+     */
+    static const double max_double_value;
+
+    /**
+     * Constructor. Bounds can be specified within which a valid parameter has
+     * to be. If the upper bound is smaller than the lower bound, then the
+     * infinite interval is meant. The default values are chosen such that no
+     * bounds are enforced on parameters.
+     */
+    Double (const double lower_bound = min_double_value,
+            const double upper_bound = max_double_value);
+
+    /**
+     * Return <tt>true</tt> if the string is a number and its value is within
+     * the specified range.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match. If bounds were specified to the constructor, then include them
+     * into this description.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static Double *create (const std::string &description);
+
+  private:
+    /**
+     * Value of the lower bound. A number that satisfies the
+     * @ref match
+     * operation of this class must be equal to this value or larger, if the
+     * bounds of the interval for a valid range.
+     */
+    const double lower_bound;
+
+    /**
+     * Value of the upper bound. A number that satisfies the
+     * @ref match
+     * operation of this class must be equal to this value or less, if the
+     * bounds of the interval for a valid range.
+     */
+    const double upper_bound;
+
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+  /**
+   * Test for the string being one of a sequence of values given like a
+   * regular expression. For example, if the string given to the constructor
+   * is <tt>"red|blue|black"</tt>, then the
+   * @ref match
+   * function returns <tt>true</tt> exactly if the string is either "red" or
+   * "blue" or "black". Spaces around the pipe signs do not matter and are
+   * eliminated.
+   */
+  class Selection : public PatternBase
+  {
+  public:
+    /**
+     * Constructor. Take the given parameter as the specification of valid
+     * strings.
+     */
+    Selection (const std::string &seq);
+
+    /**
+     * Return <tt>true</tt> if the string is an element of the description
+     * list passed to the constructor.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match. Here, this is the list of valid strings passed to the
+     * constructor.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static Selection *create (const std::string &description);
+
+  private:
+    /**
+     * List of valid strings as passed to the constructor. We don't make this
+     * string constant, as we process it somewhat in the constructor.
+     */
+    std::string sequence;
+
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+
+  /**
+   * This pattern matches a list of values separated by commas (or another
+   * string), each of which have to match a pattern given to the constructor.
+   * With two additional parameters, the number of elements this list has to
+   * have can be specified. If none is specified, the list may have zero or
+   * more entries.
+   */
+  class List : public PatternBase
+  {
+  public:
+    /**
+     * Maximal integer value. If the numeric_limits class is available use
+     * this information to obtain the extremal values, otherwise set it so
+     * that this class understands that all values are allowed.
+     */
+    static const unsigned int max_int_value;
+
+    /**
+     * Constructor. Take the given parameter as the specification of valid
+     * elements of the list.
+     *
+     * The three other arguments can be used to denote minimal and maximal
+     * allowable lengths of the list, and the string that is used as a
+     * separator between elements of the list.
+     */
+    List (const PatternBase  &base_pattern,
+          const unsigned int  min_elements = 0,
+          const unsigned int  max_elements = max_int_value,
+          const std::string  &separator = ",");
+
+    /**
+     * Destructor.
+     */
+    virtual ~List ();
+
+    /**
+     * Return <tt>true</tt> if the string is a comma-separated list of strings
+     * each of which match the pattern given to the constructor.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static List *create (const std::string &description);
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception.
+     */
+    DeclException2 (ExcInvalidRange,
+                    int, int,
+                    << "The values " << arg1 << " and " << arg2
+                    << " do not form a valid range.");
+    //@}
+  private:
+    /**
+     * Copy of the pattern that each element of the list has to satisfy.
+     */
+    PatternBase *pattern;
+
+    /**
+     * Minimum number of elements the list must have.
+     */
+    const unsigned int min_elements;
+
+    /**
+     * Maximum number of elements the list must have.
+     */
+    const unsigned int max_elements;
+
+    /**
+     * Separator between elements of the list.
+     */
+    const std::string separator;
+
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+
+  /**
+   * This pattern matches a list of comma-separated values each of which
+   * denotes a pair of key and value. Both key and value have to match a
+   * pattern given to the constructor. For each entry of the map, parameters
+   * have to be entered in the form <code>key: value</code>. In other words, a
+   * map is described in the form <code>key1: value1, key2: value2, key3:
+   * value3, ...</code>. A constructor argument allows to choose a delimiter
+   * between pairs other than the comma.
+   *
+   * With two additional parameters, the number of elements this list has to
+   * have can be specified. If none is specified, the map may have zero or
+   * more entries.
+   */
+  class Map : public PatternBase
+  {
+  public:
+    /**
+     * Maximal integer value. If the numeric_limits class is available use
+     * this information to obtain the extremal values, otherwise set it so
+     * that this class understands that all values are allowed.
+     */
+    static const unsigned int max_int_value;
+
+    /**
+     * Constructor. Take the given parameter as the specification of valid
+     * elements of the list.
+     *
+     * The three other arguments can be used to denote minimal and maximal
+     * allowable lengths of the list as well as the separator used to delimit
+     * pairs of the map.
+     */
+    Map (const PatternBase  &key_pattern,
+         const PatternBase  &value_pattern,
+         const unsigned int  min_elements = 0,
+         const unsigned int  max_elements = max_int_value,
+         const std::string  &separator = ",");
+
+    /**
+     * Destructor.
+     */
+    virtual ~Map ();
+
+    /**
+     * Return <tt>true</tt> if the string is a comma-separated list of strings
+     * each of which match the pattern given to the constructor.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static Map *create (const std::string &description);
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception.
+     */
+    DeclException2 (ExcInvalidRange,
+                    int, int,
+                    << "The values " << arg1 << " and " << arg2
+                    << " do not form a valid range.");
+    //@}
+  private:
+    /**
+     * Copy of the patterns that each key and each value of the map has to
+     * satisfy.
+     */
+    PatternBase *key_pattern;
+    PatternBase *value_pattern;
+
+    /**
+     * Minimum number of elements the list must have.
+     */
+    const unsigned int min_elements;
+
+    /**
+     * Maximum number of elements the list must have.
+     */
+    const unsigned int max_elements;
+
+    /**
+     * Separator between elements of the list.
+     */
+    const std::string separator;
+
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+
+  /**
+   * This class is much like the Selection class, but it allows the input to
+   * be a comma-separated list of values which each have to be given in the
+   * constructor argument. The input is allowed to be empty or contain values
+   * more than once and have an arbitrary number of spaces around commas. Of
+   * course commas are not allowed inside the values given to the constructor.
+   *
+   * For example, if the string to the constructor was <tt>"ucd|gmv|eps"</tt>,
+   * then the following would be legal inputs: "eps", "gmv, eps", or "".
+   */
+  class MultipleSelection : public PatternBase
+  {
+  public:
+    /**
+     * Constructor. @p seq is a list of valid options separated by "|".
+     */
+    MultipleSelection (const std::string &seq);
+
+    /**
+     * Return <tt>true</tt> if the string is an element of the description
+     * list passed to the constructor.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match. Here, this is the list of valid strings passed to the
+     * constructor.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static MultipleSelection *create (const std::string &description);
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception.
+     */
+    DeclException1 (ExcCommasNotAllowed,
+                    int,
+                    << "A comma was found at position " << arg1
+                    << " of your input string, but commas are not allowed here.");
+    //@}
+  private:
+    /**
+     * List of valid strings as passed to the constructor. We don't make this
+     * string constant, as we process it somewhat in the constructor.
+     */
+    std::string sequence;
+
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+  /**
+   * Test for the string being either "true" or "false". This is mapped to the
+   * Selection class.
+   */
+  class Bool : public Selection
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    Bool ();
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static Bool *create (const std::string &description);
+
+  private:
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+  /**
+   * Always returns <tt>true</tt> when testing a string.
+   */
+  class Anything : public PatternBase
+  {
+  public:
+    /**
+     * Constructor. (Allow for at least one non-virtual function in this
+     * class, as otherwise sometimes no virtual table is emitted.)
+     */
+    Anything ();
+
+    /**
+     * Return <tt>true</tt> if the string matches its constraints, i.e.
+     * always.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match. Here, this is the string <tt>"[Anything]"</tt>.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static Anything *create (const std::string &description);
+
+  private:
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+
+  /**
+   * A pattern that can be used to indicate when a parameter is intended to be
+   * the name of a file. By itself, this class does not check whether the
+   * string that is given in a parameter file actually corresponds to an
+   * existing file (it could, for example, be the name of a file to which you
+   * want to write output). Functionally, the class is therefore equivalent to
+   * the Anything class. However, it allows to specify the <i>intent</i> of a
+   * parameter. The flag given to the constructor also allows to specify
+   * whether the file is supposed to be an input or output file.
+   *
+   * The reason for the existence of this class is to support graphical user
+   * interfaces for editing parameter files. These may open a file selection
+   * dialog if the filename is supposed to represent an input file.
+   */
+  class FileName : public PatternBase
+  {
+  public:
+    /**
+     * Files can be used for input or output. This can be specified in the
+     * constructor by choosing the flag <tt>type</tt>.
+     */
+    enum FileType {input = 0, output = 1};
+
+    /**
+     * Constructor.  The type of the file can be specified by choosing the
+     * flag.
+     */
+    FileName (const FileType type = input);
+
+    /**
+     * Return <tt>true</tt> if the string matches its constraints, i.e.
+     * always.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match. Here, this is the string <tt>"[Filename]"</tt>.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * file type flag
+     */
+    FileType  file_type;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static FileName *create (const std::string &description);
+
+  private:
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+
+
+  /**
+   * A pattern that can be used to indicate when a parameter is intended to be
+   * the name of a directory. By itself, this class does not check whether the
+   * string that is given in a parameter file actually corresponds to an
+   * existing directory. Functionally, the class is therefore equivalent to
+   * the Anything class. However, it allows to specify the <i>intent</i> of a
+   * parameter.
+   *
+   * The reason for the existence of this class is to support graphical user
+   * interfaces for editing parameter files. These may open a file selection
+   * dialog to select or create a directory.
+   */
+  class DirectoryName : public PatternBase
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    DirectoryName ();
+
+    /**
+     * Return <tt>true</tt> if the string matches its constraints, i.e.
+     * always.
+     */
+    virtual bool match (const std::string &test_string) const;
+
+    /**
+     * Return a description of the pattern that valid strings are expected to
+     * match. Here, this is the string <tt>"[Filename]"</tt>.
+     */
+    virtual std::string description () const;
+
+    /**
+     * Return a copy of the present object, which is newly allocated on the
+     * heap. Ownership of that object is transferred to the caller of this
+     * function.
+     */
+    virtual PatternBase *clone () const;
+
+    /**
+     * Creates new object if the start of description matches
+     * description_init.  Ownership of that object is transferred to the
+     * caller of this function.
+     */
+    static DirectoryName *create (const std::string &description);
+
+  private:
+    /**
+     * Initial part of description
+     */
+    static const char *description_init;
+  };
+}
+
+
+/**
+ * The ParameterHandler class provides a standard interface to an input file
+ * which provides at run-time for program parameters such as time step sizes,
+ * geometries, right hand sides etc. The input for the program is given in
+ * files, streams or strings in memory using text like
+ *   @code
+ *     set Time step size = 0.3
+ *     set Geometry       = [0,1]x[0,3]
+ *   @endcode
+ * Input may be sorted into subsection trees in order to give the input a
+ * logical structure, and input files may include other files.
+ *
+ * The ParameterHandler class is discussed in detail in the
+ * @ref step_19 "step-19"
+ * example program, and is used in more realistic situations in step-29,
+ * step-33 and step-34.
+ *
+ * <h3>Declaring entries</h3>
+ *
+ * In order to use the facilities of a ParameterHandler object, one first has
+ * to make known the different entries the input file may or may not contain.
+ * This is done in the following way:
+ *
+ *   @code
+ *     ...
+ *     ParameterHandler prm;
+ *     prm.declare_entry ("Time step size",
+ *                       "0.2",
+ *                       Patterns::Double(),
+ *                       "Some documentation");
+ *     prm.declare_entry ("Geometry",
+ *                       "[0,1]x[0,1]",
+ *                       Patterns::Anything());
+ *     ...
+ *   @endcode
+ * Each entry is declared using the function declare_entry(). The first
+ * parameter is the name of the entry (in short: the entry). The second is the
+ * default answer to be taken in case the entry is not specified in the input
+ * file. The third parameter is a regular expression which the input (and the
+ * default answer) has to match.  Several such regular expressions are defined
+ * in Patterns. This parameter can be omitted, in which case it will default
+ * to Patterns::Anything, i.e. a pattern that matches every input string. The
+ * fourth parameter can be used to document the intent or expected format of
+ * an entry; its value is printed as a comment when writing all entries of a
+ * ParameterHandler object using the print_parameters() function to allow for
+ * easier understanding of a parameter file. It can be omitted as well, in
+ * which case no such documentation will be printed.
+ *
+ * Entries may be located in subsections which form a kind of input tree. For
+ * example input parameters for linear solver routines should be classified in
+ * a subsection named <tt>Linear solver</tt> or any other suitable name. This
+ * is accomplished in the following way:
+ *   @code
+ *     ...
+ *       LinEq eq;
+ *       eq.declare_parameters (prm);
+ *     ...
+ *
+ *     void LinEq::declare_parameters (ParameterHandler &prm) {
+ *       prm.enter_subsection("Linear solver");
+ *       {
+ *         prm.declare_entry ("Solver",
+ *                            "CG",
+ *                            Patterns::Selection("CG|GMRES|GaussElim"),
+ *                            "Name of a linear solver for the inner iteration");
+ *         prm.declare_entry ("Maximum number of iterations",
+ *                            "20",
+ *                            ParameterHandler::RegularExpressions::Integer());
+ *         ...
+ *       }
+ *       prm.leave_subsection ();
+ *     }
+ *   @endcode
+ *
+ * Subsections may be nested. For example a nonlinear solver may have a linear
+ * solver as member object. Then the function call tree would be something
+ * like (if the class <tt>NonLinEq</tt> has a member variables <tt>eq</tt> of
+ * type <tt>LinEq</tt>):
+ *   @code
+ *     void NonLinEq::declare_parameters (ParameterHandler &prm) {
+ *       prm.enter_subsection ("Nonlinear solver");
+ *       {
+ *         prm.declare_entry ("Nonlinear method",
+ *                            "Newton-Raphson",
+ *                            ParameterHandler::RegularExpressions::Anything());
+ *         eq.declare_parameters (prm);
+ *       }
+ *       prm.leave_subsection ();
+ *     }
+ *   @endcode
+ *
+ * For class member functions which declare the different entries we propose
+ * to use the common name <tt>declare_parameters</tt>. In normal cases this
+ * method can be <tt>static</tt> since the entries will not depend on any
+ * previous knowledge. Classes for which entries should logically be grouped
+ * into subsections should declare these subsections themselves. If a class
+ * has two or more member variables of the same type both of which should have
+ * their own parameters, this parent class' method <tt>declare_parameters</tt>
+ * is responsible to group them into different subsections:
+ *   @code
+ *     void NonLinEq::declare_parameters (ParameterHandler &prm) {
+ *       prm.enter_subsection ("Nonlinear solver");
+ *       {
+ *         prm.enter_subsection ("Linear solver 1");
+ *         {
+ *           eq1.declare_parameters (prm);
+ *         }
+ *         prm.leave_subsection ();
+ *
+ *         prm.enter_subsection ("Linear solver 2");
+ *         {
+ *           eq2.declare_parameters (prm);
+ *         }
+ *         prm.leave_subsection ();
+ *       }
+ *       prm.leave_subsection ();
+ *     }
+ *   @endcode
+ *
+ *
+ * <h3>Input files and special characters</h3>
+ *
+ * For the first example above the input file would look like the following:
+ *   @code
+ *     ...
+ *     subsection Nonlinear solver
+ *       set Nonlinear method = Gradient
+ *       # this is a comment
+ *       subsection Linear solver
+ *         set Solver                       = CG
+ *         set Maximum number of iterations = 30
+ *       end
+ *     end
+ *     ...                       # other stuff
+ *   @endcode
+ * The words <tt>subsection</tt>, <tt>set</tt> and <tt>end</tt> may be either
+ * written in lowercase or uppercase letters. Leading and trailing whitespace
+ * is removed, multiple whitespace is condensed into only one. Since the
+ * latter applies also to the name of an entry, an entry name will not be
+ * recognized if in the declaration multiple whitespace is used.
+ *
+ * In entry names and values the following characters are not allowed:
+ * <tt>\#</tt>, <tt>{</tt>, <tt>}</tt>, <tt>|</tt>. Their use is reserved for
+ * the MultipleParameterLoop class.
+ *
+ * Comments starting with \# are skipped.
+ *
+ * Continuation lines are allowed by means of the character <tt>\\</tt>, which
+ * must be the last character (aside from whitespace, which is ignored) of the
+ * line. When a line is a continuation (i.e., the previous line ended in a
+ * <tt>\\</tt>), then, unlike the default behavior of the <tt>C</tt>
+ * preprocessor, all whitespace at the beginning of the line is ignored.
+ *
+ * We propose to use the following scheme to name entries: start the first
+ * word with a capital letter and use lowercase letters further on. The same
+ * applies to the possible entry values to the right of the <tt>=</tt> sign.
+ *
+ *
+ * <h3>Including other input files</h3>
+ *
+ * An input file can include other include files using the syntax
+ *   @code
+ *     ...
+ *     include some_other_file.prm
+ *     ...
+ *   @endcode
+ * The file so referenced is searched for relative to the current directory
+ * (not relative to the directory in which the including parameter file is
+ * located, since this is not known to all three versions of the read_input()
+ * function).
+ *
+ *
+ * <h3>Reading data from input sources</h3>
+ *
+ * In order to read input there are three possibilities: reading from an
+ * <tt>std::istream</tt> object, reading from a file of which the name is
+ * given and reading from a string in memory in which the lines are separated
+ * by <tt>@\n</tt> characters. These possibilities are used as follows:
+ *   @code
+ *     ParameterHandler prm;
+ *     ...
+ *     // declaration of entries
+ *     ...
+ *     prm.read_input (cin);         // read input from standard in,
+ *     // or
+ *     prm.read_input ("simulation.in");
+ *     // or
+ *     char *in = "set Time step size = 0.3 \n ...";
+ *     prm.read_input_from_string (in);
+ *     ...
+ *   @endcode
+ * You can use several sources of input successively. Entries which are
+ * changed more than once will be overwritten every time they are used.
+ *
+ * You should not try to declare entries using declare_entry() and
+ * enter_subsection() with as yet unknown subsection names after using
+ * read_input(). The results in this case are unspecified.
+ *
+ * If an error occurs upon reading the input, error messages are written to
+ * <tt>std::cerr</tt> and the reader function returns with a return value of
+ * <code>false</code>. This is opposed to almost all other functions in
+ * deal.II, which would normally throw an exception if an error occurs; this
+ * difference in behavior is a relic of the fact that this class predates
+ * deal.II and had previously been written for a different project.
+ *
+ *
+ * <h3>Using the %ParameterHandler Graphical User Interface</h3>
+ *
+ * An alternative to using the hand-written input files shown above is to use
+ * the graphical user interface (GUI) that accompanies this class. For this,
+ * you first need to write a description of all the parameters, their default
+ * values, patterns and documentation strings into a file in a format that the
+ * GUI can understand; this is done using the
+ * ParameterHandler::print_parameters() function with ParameterHandler::XML as
+ * second argument, as discussed in more detail below in the <i>Representation
+ * of Parameters</i> section. This file can then be loaded using the
+ * executable for the GUI, which should be located in
+ * <code>lib/bin/dealii_parameter_gui</code> of your deal.II installation,
+ * assuming that you have a sufficiently recent version of the <a
+ * href="http://qt.nokia.com/">Qt toolkit</a> installed.
+ *
+ * Once loaded, the GUI displays subsections and individual parameters in tree
+ * form (see also the discussion in the <i>Representation of Parameters</i>
+ * section below). Here is a screen shot with some sub-sections expanded and
+ * one parameter selected for editing:
+ *
+ * @image html parameter_gui.png "Parameter GUI"
+ *
+ * Using the GUI, you can edit the values of individual parameters and save
+ * the result in the same format as before. It can then be read in using the
+ * ParameterHandler::read_input_from_xml() function.
+ *
+ *
+ * <h3>Getting entry values out of a %ParameterHandler object</h3>
+ *
+ * Each class gets its data out of a ParameterHandler object by calling the
+ * get()  member functions like this:
+ *   @code
+ *      void NonLinEq::get_parameters (ParameterHandler &prm) {
+ *       prm.enter_subsection ("Nonlinear solver");
+ *       std::string method = prm.get ("Nonlinear method");
+ *       eq.get_parameters (prm);
+ *       prm.leave_subsection ();
+ *     }
+ *   @endcode
+ * get() returns the value of the given entry. If the entry was not specified
+ * in the input source(s), the default value is returned. You have to enter
+ * and leave subsections exactly as you did when declaring subsection. You may
+ * chose the order in which to transverse the subsection tree.
+ *
+ * It is guaranteed that only entries matching the given regular expression
+ * are returned, i.e. an input entry value which does not match the regular
+ * expression is not stored.
+ *
+ * You can use get() to retrieve the parameter in text form, get_integer() to
+ * get an integer or get_double() to get a double. You can also use
+ * get_bool(). It will cause an internal error if the string could not be
+ * converted to an integer, double or a bool. This should, though, not happen
+ * if you correctly specified the regular expression for this entry; you
+ * should not try to get out an integer or a double from an entry for which no
+ * according regular expression was set. The internal error is raised through
+ * the Assert() macro family which only works in debug mode.
+ *
+ * If you want to print out all user selectable features, use the
+ * print_parameters() function. It is generally a good idea to print all
+ * parameters at the beginning of a log file, since this way input and output
+ * are together in one file which makes matching at a later time easier.
+ * Additionally, the function also print those entries which have not been
+ * modified in the input file and are thus set to default values; since
+ * default values may change in the process of program development, you cannot
+ * know the values of parameters not specified in the input file.
+ *
+ *
+ * <h3>Style guide for data retrieval</h3>
+ *
+ * We propose that every class which gets data out of a ParameterHandler
+ * object provides a function named <tt>get_parameters</tt>. This should be
+ * declared <tt>virtual</tt>. <tt>get_parameters</tt> functions in derived
+ * classes should call the <tt>BaseClass::get_parameters</tt> function.
+ *
+ *
+ * <h3>Experience with large parameter lists</h3>
+ *
+ * Experience has shown that in programs defining larger numbers of parameters
+ * (more than, say, fifty) it is advantageous to define an additional class
+ * holding these parameters. This class is more like a C-style structure,
+ * having a large number of variables, usually public. It then has at least
+ * two functions, which declare and parse the parameters. In the main program,
+ * the main class has an object of this parameter class and delegates
+ * declaration and parsing of parameters to this object.
+ *
+ * The advantage of this approach is that you can keep out the technical
+ * details (declaration and parsing) out of the main class and additionally
+ * don't clutter up your main class with dozens or more variables denoting the
+ * parameters.
+ *
+ *
+ *
+ * <h3>Worked Example</h3>
+ *
+ * This is the code:
+ *   @code
+ *     #include <iostream>
+ *     #include "../include/parameter_handler.h"
+ *
+ *     using namespace dealii;
+ *
+ *     class LinEq
+ *     {
+ *     public:
+ *       static void declare_parameters (ParameterHandler &prm);
+ *       void get_parameters (ParameterHandler &prm);
+ *     private:
+ *       std::string Method;
+ *       int    MaxIterations;
+ *     };
+ *
+ *
+ *     class Problem
+ *     {
+ *     private:
+ *       LinEq eq1, eq2;
+ *       std::string Matrix1, Matrix2;
+ *       std::string outfile;
+ *     public:
+ *       static void declare_parameters (ParameterHandler &prm);
+ *       void get_parameters (ParameterHandler &prm);
+ *     };
+ *
+ *
+ *
+ *     void LinEq::declare_parameters (ParameterHandler &prm)
+ *     {
+ *       // declare parameters for the linear solver in a subsection
+ *       prm.enter_subsection ("Linear solver");
+ *       prm.declare_entry ("Solver",
+ *                          "CG",
+ *                          Patterns::Selection("CG|BiCGStab|GMRES"),
+ *                          "Name of a linear solver for the inner iteration");
+ *       prm.declare_entry ("Maximum number of iterations",
+ *                          "20",
+ *                          Patterns::Integer());
+ *       prm.leave_subsection ();
+ *     }
+ *
+ *
+ *     void LinEq::get_parameters (ParameterHandler &prm)
+ *     {
+ *       prm.enter_subsection ("Linear solver");
+ *       Method        = prm.get ("Solver");
+ *       MaxIterations = prm.get_integer ("Maximum number of iterations");
+ *       prm.leave_subsection ();
+ *       std::cout << "  LinEq: Method=" << Method << ", MaxIterations=" << MaxIterations << std::endl;
+ *     }
+ *
+ *
+ *
+ *     void Problem::declare_parameters (ParameterHandler &prm)
+ *     {
+ *       // first some global parameter entries
+ *       prm.declare_entry ("Output file",
+ *                          "out",
+ *                          Patterns::Anything(),
+ *                          "Name of the output file, either relative to the present"
+ *                          "path or absolute");
+ *       prm.declare_entry ("Equation 1",
+ *                          "Laplace",
+ *                          Patterns::Anything(),
+ *                          "String identifying the equation we want to solve");
+ *       prm.declare_entry ("Equation 2",
+ *                          "Elasticity",
+ *                          Patterns::Anything());
+ *
+ *       // declare parameters for the first equation
+ *       prm.enter_subsection ("Equation 1");
+ *       prm.declare_entry ("Matrix type",
+ *                          "Sparse",
+ *                          Patterns::Selection("Full|Sparse|Diagonal"),
+ *                          "Type of the matrix to be used, either full,"
+ *                          "sparse, or diagonal");
+ *       LinEq::declare_parameters (prm);  // for eq1
+ *       prm.leave_subsection ();
+ *
+ *       // declare parameters for the second equation
+ *       prm.enter_subsection ("Equation 2");
+ *       prm.declare_entry ("Matrix type",
+ *                          "Sparse",
+ *                          Patterns::Selection("Full|Sparse|Diagonal"));
+ *       LinEq::declare_parameters (prm);  // for eq2
+ *       prm.leave_subsection ();
+ *     }
+ *
+ *
+ *     void Problem::get_parameters (ParameterHandler &prm)
+ *     {
+ *       // entries of the problem class
+ *       outfile = prm.get ("Output file");
+ *
+ *       std::string equation1 = prm.get ("Equation 1"),
+ *              equation2 = prm.get ("Equation 2");
+ *
+ *       // get parameters for the first equation
+ *       prm.enter_subsection ("Equation 1");
+ *       Matrix1 = prm.get ("Matrix type");
+ *       eq1.get_parameters (prm); // for eq1
+ *       prm.leave_subsection ();
+ *
+ *       // get parameters for the second equation
+ *       prm.enter_subsection ("Equation 2");
+ *       Matrix2 = prm.get ("Matrix type");
+ *       eq2.get_parameters (prm); // for eq2
+ *       prm.leave_subsection ();
+ *
+ *       std::cout << "  Problem: outfile=" << outfile << '\n'
+ *                 << "           eq1="     << equation1 << ", eq2=" << equation2 << '\n'
+ *                 << "           Matrix1=" << Matrix1 << ", Matrix2=" << Matrix2 << std::endl;
+ *     }
+ *
+ *
+ *
+ *
+ *     int main ()
+ *     {
+ *       ParameterHandler prm;
+ *       Problem p;
+ *
+ *       p.declare_parameters (prm);
+ *
+ *       // read input from "prmtest.prm"; giving argv[1] would also be a
+ *       // good idea
+ *       prm.read_input ("prmtest.prm");
+ *
+ *       // print parameters to std::cout as ASCII text
+ *       std::cout << std::endl << std::endl;
+ *       prm.print_parameters (std::cout, ParameterHandler::Text);
+ *
+ *       // get parameters into the program
+ *       std::cout << std::endl << std::endl
+ *                 << "Getting parameters:" << std::endl;
+ *       p.get_parameters (prm);
+ *
+ *       // now run the program with these input parameters
+ *       p.do_something ();
+ *     }
+ *   @endcode
+ *
+ *
+ * This is the input file (named "prmtest.prm"):
+ *   @code
+ *     # first declare the types of equations
+ *     set Equation 1 = Poisson
+ *     set Equation 2 = Navier-Stokes
+ *
+ *     subsection Equation 1
+ *       set Matrix type = Sparse
+ *       subsection Linear solver # parameters for linear solver 1
+ *         set Solver                       = Gauss-Seidel
+ *         set Maximum number of iterations = 40
+ *       end
+ *     end
+ *
+ *     subsection Equation 2
+ *       set Matrix type = Full
+ *       subsection Linear solver
+ *         set Solver                       = CG
+ *         set Maximum number of iterations = 100
+ *       end
+ *     end
+ *   @endcode
+ *
+ * And here is the output of the program:
+ *   @code
+ *     Line 8:
+ *         The entry value
+ *             Gauss-Seidel
+ *         for the entry named
+ *             Solver
+ *         does not match the given regular expression
+ *             CG|BiCGStab|GMRES
+ *
+ *
+ *     Listing of Parameters
+ *     ---------------------
+ *       set Equation 1  = Poisson  # Laplace
+ *       set Equation 2  = Navier-Stokes  # Elasticity
+ *       set Output file = out
+ *       subsection Equation 1
+ *         set Matrix type = Sparse  # Sparse
+ *         subsection Linear solver
+ *           set Maximum number of iterations = 40  # 20
+ *           set Solver                       = CG
+ *         end
+ *       end
+ *       subsection Equation 2
+ *         set Matrix type = Full  # Sparse
+ *         subsection Linear solver
+ *           set Maximum number of iterations = 100  # 20
+ *           set Solver                       = CG   # CG
+ *         end
+ *       end
+ *
+ *
+ *     Getting parameters:
+ *       LinEq: Method=CG, MaxIterations=40
+ *       LinEq: Method=CG, MaxIterations=100
+ *       Problem: outfile=out
+ *                eq1=Poisson, eq2=Navier-Stokes
+ *                Matrix1=Sparse, Matrix2=Full
+ *   @endcode
+ *
+ *
+ *
+ * <h3>Representation of Parameters</h3>
+ *
+ * Here is some more internal information about the representation of
+ * parameters:
+ *
+ * Logically, parameters and the nested sections they are arranged in can be
+ * thought of as a hierarchical directory structure, or a tree. Take, for
+ * example, the following code declaring a set of parameters and sections they
+ * live in:
+ *   @code
+ *     ParameterHandler prm;
+ *
+ *     prm.declare_entry ("Maximal number of iterations",
+ *                        "10",
+ *                        Patterns::Integer (1, 1000),
+ *                        "A parameter that describes the maximal number of "
+ *                        "iterations the CG method is to take before giving "
+ *                        "up on a matrix.");
+ *     prm.enter_subsection ("Preconditioner");
+ *     {
+ *       prm.declare_entry ("Kind",
+ *                          "SSOR",
+ *                          Patterns::Selection ("SSOR|Jacobi"),
+ *                          "A string that describes the kind of preconditioner "
+ *                          "to use.");
+ *       prm.declare_entry ("Relaxation factor",
+ *                          "1.0",
+ *                          Patterns::Double (0, 1),
+ *                          "The numerical value (between zero and one) for the "
+ *                          "relaxation factor to use in the preconditioner.");
+ *     }
+ *     prm.leave_subsection ();
+ *   @endcode
+ *
+ * We can think of the parameters so arranged as a file system in which every
+ * parameter is a directory. The name of this directory is the name of the
+ * parameter, and in this directory lie files that describe the parameter.
+ * These files are: - <code>value</code>: The content of this file is the
+ * current value of this parameter; initially, the content of the file equals
+ * the default value of the parameter. - <code>default_value</code>: The
+ * content of this file is the default value value of the parameter. -
+ * <code>pattern</code>: A textual representation of the pattern that
+ * describes the parameter's possible values. - <code>pattern_index</code>: A
+ * number that indexes the Patterns::PatternBase object that is used to
+ * describe the parameter. - <code>documentation</code>: The content of this
+ * file is the documentation given for a parameter as the last argument of the
+ * ParameterHandler::declare_entry call. With the exception of the
+ * <code>value</code> file, the contents of files are never changed after
+ * declaration of a parameter.
+ *
+ * Alternatively, a directory in this file system may not have a file called
+ * <code>value</code> in it. In that case, the directory represents a
+ * subsection as declared above, and the directory's name will correspond to
+ * the name of the subsection. It will then have no files in it at all, but it
+ * may have further directories in it: some of these directories will be
+ * parameters (indicates by the presence of files) or further nested
+ * subsections.
+ *
+ * Given this explanation, the code above will lead to a hierarchical
+ * representation of data that looks like this (the content of files is
+ * indicated at the right in a different font):
+ *
+ * @image html parameter_handler.png
+ *
+ * Once parameters have been read in, the contents of the <code>value</code>
+ * "files" may be different while the other files remain untouched.
+ *
+ * Using the ParameterHandler::print_parameters() function with
+ * ParameterHandler::XML as second argument, we can get a complete
+ * representation of this data structure in XML. It will look like this:
+ *   @code
+ *   <?xml version="1.0" encoding="utf-8"?>
+ *   <ParameterHandler>
+ *     <Maximal_20number_20of_20iterations>
+ *       <value>10</value>
+ *       <default_value>10</default_value>
+ *       <documentation>A parameter that describes the maximal number of iterations the CG method is to take before giving up on a matrix.</documentation>
+ *       <pattern>0</pattern>
+ *       <pattern_description>[Integer range 1...1000 (inclusive)]</pattern_description>
+ *     </Maximal_20number_20of_20iterations>
+ *     <Preconditioner>
+ *       <Kind><value>SSOR</value>
+ *         <default_value>SSOR</default_value>
+ *         <documentation>A string that describes the kind of preconditioner to use.</documentation>
+ *         <pattern>1</pattern>
+ *         <pattern_description>SSOR|Jacobi</pattern_description>
+ *       </Kind>
+ *       <Relaxation_20factor>
+ *         <value>1.0</value>
+ *         <default_value>1.0</default_value>
+ *         <documentation>The numerical value (between zero and one) for the relaxation factor to use in the preconditioner.</documentation>
+ *         <pattern>2</pattern>
+ *         <pattern_description>[Floating point range 0...1 (inclusive)]</pattern_description>
+ *       </Relaxation_20factor>
+ *     </Preconditioner>
+ *   <ParameterHandler>
+ *   @endcode
+ * This representation closely resembles the directory/file structure
+ * discussed above. The only difference is that directory and file names are
+ * mangled: since they should only contain letters and numbers, every
+ * character in their names that is not a letter or number is replaced by an
+ * underscore followed by its two-digit hexadecimal representation. In
+ * addition, the special name "value" is mangled when used as the name of a
+ * parameter, given that this name is also used to name special files in the
+ * hierarchy structure. Finally, the entire tree is wrapped into a tag
+ * <code>%ParameterHandler</code> to satisfy the XML requirement that there be
+ * only a single top-level construct in each file.
+ *
+ * The tree structure (and its XML representation) is what the graphical user
+ * interface (see above) uses to represent parameters like a directory/file
+ * collection.
+ *
+ *
+ * @ingroup input
+ * @author Wolfgang Bangerth, October 1997, revised February 1998, 2010, 2011
+ * @author Alberto Sartori, 2015
+ * @author David Wells, 2016
+ */
+class ParameterHandler : public Subscriptor
+{
+private:
+  /**
+   * Inhibit automatic CopyConstructor.
+   */
+  ParameterHandler (const ParameterHandler &);
+
+  /**
+   * Inhibit automatic assignment operator.
+   */
+  ParameterHandler &operator= (const ParameterHandler &);
+
+public:
+  /**
+   * List of possible output formats.
+   *
+   * The formats down the list with prefix <em>Short</em> and bit 6 and 7 set
+   * reproduce the old behavior of not writing comments or original values to
+   * the files.
+   */
+  enum OutputStyle
+  {
+    /**
+     * Write human readable output suitable to be read by ParameterHandler
+     * again.
+     */
+    Text = 1,
+    /**
+     * Write parameters as a LaTeX table.
+     */
+    LaTeX = 2,
+    /**
+     * Write out declared parameters with description and possible values.
+     */
+    Description = 3,
+
+    /**
+     * Write out everything as an <a
+     * href="http://en.wikipedia.org/wiki/XML">XML</a> file.
+     *
+     * See the general documentation of this class for an example of output.
+     */
+    XML = 4,
+
+    /**
+     * Write out everything as a <a
+     * href="http://en.wikipedia.org/wiki/JSON">JSON</a> file.
+     */
+    JSON = 5,
+
+    /**
+     * Write input for ParameterHandler without comments or changed default
+     * values.
+     */
+    ShortText = 193
+  };
+
+
+
+  /**
+   * Constructor.
+   */
+  ParameterHandler ();
+
+  /**
+   * Destructor. Declare this only to have a virtual destructor, which is
+   * safer as we have virtual functions.  It actually does nothing
+   * spectacular.
+   */
+  virtual ~ParameterHandler ();
+
+  /**
+   * Read input from a stream until the stream returns the <tt>eof</tt>
+   * condition or error. The second argument can be used to denote the name of
+   * the file (if that's what the input stream represents) we are reading
+   * from; this is only used when creating output for error messages.
+   *
+   * Return whether the read was successful.
+   */
+  virtual bool read_input (std::istream &input,
+                           const std::string &filename = "input file");
+
+  /**
+   * Read input from a file the name of which is given. The PathSearch class
+   * "PARAMETERS" is used to find the file.
+   *
+   * Return whether the read was successful.
+   *
+   * Unless <tt>optional</tt> is <tt>true</tt>, this function will
+   * automatically generate the requested file with default values if the file
+   * did not exist. This file will not contain additional comments if
+   * <tt>write_stripped_file</tt> is <tt>true</tt>.
+   */
+  virtual bool read_input (const std::string &filename,
+                           const bool optional = false,
+                           const bool write_stripped_file = false);
+
+  /**
+   * Read input from a string in memory. The lines in memory have to be
+   * separated by <tt>@\n</tt> characters.
+   *
+   * Return whether the read was successful.
+   */
+  virtual bool read_input_from_string (const char *s);
+
+  /**
+   * Read a parameter file in XML format. This could be from a file originally
+   * written by the print_parameters() function using the XML output style and
+   * then modified by hand as necessary; or from a file written using this
+   * method and then modified by the graphical parameter GUI (see the general
+   * documentation of this class).
+   *
+   * Return whether the read was successful.
+   */
+  virtual bool read_input_from_xml (std::istream &input);
+
+  /**
+   * Clear all contents.
+   */
+  void clear ();
+
+
+  /**
+   * Declare a new entry with name <tt>entry</tt>, default and for which any
+   * input has to match the <tt>pattern</tt> (default: any pattern).
+   *
+   * The last parameter defaulting to an empty string is used to add a
+   * documenting text to each entry which will be printed as a comment when
+   * this class is asked to write out all declarations to a stream using the
+   * print_parameters() function.
+   *
+   * The function generates an exception of type ExcValueDoesNotMatchPattern
+   * if the default value doesn't match the given pattern, using the C++ throw
+   * mechanism. However, this exception is only generated <i>after</i> the
+   * entry has been created; if you have code where no sensible default value
+   * for a parameter is possible, you can then catch and ignore this
+   * exception.
+   *
+   * @note An entry can be declared more than once without generating an
+   * error, for example to override an earlier default value.
+   */
+  void declare_entry (const std::string           &entry,
+                      const std::string           &default_value,
+                      const Patterns::PatternBase &pattern = Patterns::Anything(),
+                      const std::string           &documentation = std::string());
+
+  /**
+   * Create an alias for an existing entry. This provides a way to refer to a
+   * parameter in the input file using an alternate name. The alias will be in
+   * the current section, and the referenced entry needs to be an existing
+   * entry in the current section.
+   *
+   * The primary purpose of this function is to allow for a backward
+   * compatible way of changing names in input files of applications for which
+   * backward compatibility is important. This can be achieved by changing the
+   * name of the parameter in the call to declare_entry(), and then creating
+   * an alias that maps the old name to the new name. This way, old input
+   * files can continue to refer to parameters under the old name, and they
+   * will automatically be mapped to the new parameter name.
+   *
+   * It is valid to set the same parameter multiple times in an input file.
+   * The value that will ultimately be chosen in such cases is simply the last
+   * value set. This rule also applies to aliases, where the final value of a
+   * parameter is the last value set either through the current name of the
+   * parameter or through any of its possible multiple aliases. For example,
+   * if you have an input file that looks like
+   * @code
+   *   set parm1       = 1
+   *   set parm1_alias = 2
+   * @endcode
+   * where <code>parm1_alias</code> is an alias declared via
+   * @code
+   *   prm.declare_alias ("parm1", "parm1_alias");
+   * @endcode
+   * then the final value for the parameter called <code>parm1</code> will be
+   * 2, not 1.
+   *
+   * @param existing_entry_name The name of an existing parameter in the
+   * current section that the alias should refer to.
+   * @param alias_name An alternate name for the parameter referenced by the
+   * first argument.
+   * @param alias_is_deprecated If true, mark the alias as deprecated. This
+   * will then be listed in the description of the alias if you call
+   * print_parameters(), and you will get a warning on the screen when reading
+   * an input file that contains this deprecated alias. The purpose of this
+   * argument is to be able to allow the use of an old name for a parameter
+   * (see above) but make it clear that this old name will eventually be
+   * removed.
+   */
+  void declare_alias (const std::string &existing_entry_name,
+                      const std::string &alias_name,
+                      const bool         alias_is_deprecated = false);
+
+  /**
+   * Enter a subsection. If it does not yet exist, create it.
+   */
+  void enter_subsection (const std::string &subsection);
+
+  /**
+   * Leave present subsection.
+   */
+  void leave_subsection ();
+
+  /**
+   * Return value of entry <tt>entry_string</tt>.  If the entry was changed,
+   * then the changed value is returned, otherwise the default value. If the
+   * value of an undeclared entry is required, an exception will be thrown.
+   */
+  std::string get (const std::string &entry_string) const;
+
+  /**
+   * Return value of entry <tt>entry_string</tt> as <tt>long int</tt>. (A long
+   * int is chosen so that even very large unsigned values can be returned by
+   * this function).
+   */
+  long int       get_integer (const std::string &entry_string) const;
+
+  /**
+   * Return value of entry <tt>entry_name</tt> as <tt>double</tt>.
+   */
+  double         get_double (const std::string &entry_name) const;
+
+  /**
+   * Return value of entry <tt>entry_name</tt> as <tt>bool</tt>. The entry may
+   * be "true" or "yes" for <tt>true</tt>, "false" or "no" for <tt>false</tt>
+   * respectively.
+   */
+  bool           get_bool (const std::string &entry_name) const;
+
+  /**
+   * Change the value presently stored for <tt>entry_name</tt> to the one
+   * given in the second argument.
+   *
+   * The parameter must already exist in the present subsection.
+   *
+   * The function throws an exception of type ExcValueDoesNotMatchPattern if
+   * the new value does not conform to the pattern for this entry.
+   */
+  void           set (const std::string &entry_name,
+                      const std::string &new_value);
+
+  /**
+   * Same as above, but an overload where the second argument is a character
+   * pointer. This is necessary, since otherwise the call to
+   * <tt>set("abc","def")</code> will be mapped to the function taking one
+   * string and a bool as arguments, which is certainly not what is most often
+   * intended.
+   *
+   * The function throws an exception of type ExcValueDoesNotMatchPattern if
+   * the new value does not conform to the pattern for this entry.
+   */
+  void           set (const std::string &entry_name,
+                      const char        *new_value);
+
+  /**
+   * Change the value presently stored for <tt>entry_name</tt> to the one
+   * given in the second argument.
+   *
+   * The parameter must already exist in the present subsection.
+   *
+   * The function throws an exception of type ExcValueDoesNotMatchPattern if
+   * the new value does not conform to the pattern for this entry.
+   */
+  void           set (const std::string &entry_name,
+                      const long int    &new_value);
+
+  /**
+   * Change the value presently stored for <tt>entry_name</tt> to the one
+   * given in the second argument.
+   *
+   * The parameter must already exist in the present subsection.
+   *
+   * For internal purposes, the new value needs to be converted to a string.
+   * This is done using 16 digits of accuracy, so the set value and the one
+   * you can get back out using get_double() may differ in the 16th digit.
+   *
+   * The function throws an exception of type ExcValueDoesNotMatchPattern if
+   * the new value does not conform to the pattern for this entry.
+   */
+  void           set (const std::string &entry_name,
+                      const double      &new_value);
+
+  /**
+   * Change the value presently stored for <tt>entry_name</tt> to the one
+   * given in the second argument.
+   *
+   * The parameter must already exist in the present subsection.
+   *
+   * The function throws an exception of type ExcValueDoesNotMatchPattern if
+   * the new value does not conform to the pattern for this entry.
+   */
+  void           set (const std::string &entry_name,
+                      const bool        &new_value);
+
+
+  /**
+   * Print all parameters with the given style to <tt>out</tt>. Presently only
+   * <tt>Text</tt>, <tt>LaTeX</tt> and <tt>XML</tt> are implemented.
+   *
+   * In <tt>Text</tt> format, the output is formatted in such a way that it is
+   * possible to use it for later input again. This is most useful to record
+   * the parameters for a specific run, since if you output the parameters
+   * using this function into a log file, you can always recover the results
+   * by simply copying the output to your input file.
+   *
+   * Besides the name and value of each entry, the output also contains the
+   * default value of entries if it is different from the actual value, as
+   * well as the documenting string given to the declare_entry() function if
+   * available.
+   *
+   * In <tt>XML</tt> format, the output starts with one root element
+   * <tt>ParameterHandler</tt> in order to get a valid XML document and all
+   * subsections under it.
+   *
+   * In <tt>LaTeX</tt> format, the output contains the same information but in
+   * a format so that the resulting file can be input into a latex document
+   * such as a manual for the code for which this object handles run-time
+   * parameters. The various sections of parameters are then represented by
+   * latex section and subsection commands as well as by nested enumerations.
+   *
+   * In addition, all parameter names are listed with <code>@\index</code>
+   * statements in two indices called <code>prmindex</code> (where the name of
+   * each parameter is listed in the index) and <code>prmindexfull</code>
+   * where parameter names are listed sorted by the section in which they
+   * exist. By default, the LaTeX program ignores these <code>@\index</code>
+   * commands, but they can be used to generate an index by using the
+   * following commands in the preamble of the latex file:
+   * @code
+   * \usepackage{imakeidx}
+   * \makeindex[name=prmindex, title=Index of run-time parameter entries]
+   * \makeindex[name=prmindexfull, title=Index of run-time parameters with section names]
+   * @endcode
+   * and at the end of the file this:
+   * @code
+   * \printindex[prmindex]
+   * \printindex[prmindexfull]
+   * @endcode
+   */
+  std::ostream &print_parameters (std::ostream      &out,
+                                  const OutputStyle  style);
+
+  /**
+   * Print out the parameters of the present subsection as given by the
+   * <tt>subsection_path</tt> member variable. This variable is controlled by
+   * entering and leaving subsections through the enter_subsection() and
+   * leave_subsection() functions.
+   *
+   * If <tt>include_top_level_elements</tt> is <tt>true</tt>, also the higher
+   * subsection elements are printed. In <tt>XML</tt> format this is required
+   * to get a valid XML document and output starts with one root element
+   * <tt>ParameterHandler</tt>.
+   *
+   * In most cases, you will not want to use this function directly, but have
+   * it called recursively by the previous function.
+   */
+  void print_parameters_section (std::ostream       &out,
+                                 const OutputStyle   style,
+                                 const unsigned int  indent_level,
+                                 const bool          include_top_level_elements = false);
+
+  /**
+   * Print parameters to a logstream. This function allows to print all
+   * parameters into a log-file. Sections will be indented in the usual log-
+   * file style.
+   */
+  void log_parameters (LogStream &out);
+
+  /**
+   * Log parameters in the present subsection. The subsection is determined by
+   * the <tt>subsection_path</tt> member variable. This variable is controlled
+   * by entering and leaving subsections through the enter_subsection() and
+   * leave_subsection() functions.
+   *
+   * In most cases, you will not want to use this function directly, but have
+   * it called recursively by the previous function.
+   */
+  void log_parameters_section (LogStream &out);
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Write the data of this object to a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void save (Archive &ar, const unsigned int version) const;
+
+  /**
+   * Read the data of this object from a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void load (Archive &ar, const unsigned int version);
+
+  BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+  /**
+   * Test for equality.
+   */
+  bool operator == (const ParameterHandler &prm2)  const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcEntryAlreadyExists,
+                  std::string,
+                  << "The following entry already exists: " << arg1);
+  /**
+   * Exception
+   */
+  DeclException2 (ExcValueDoesNotMatchPattern,
+                  std::string, std::string,
+                  << "The string <" << arg1
+                  << "> does not match the given pattern <" << arg2 << ">");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcAlreadyAtTopLevel);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcEntryUndeclared,
+                  std::string,
+                  << "You can't ask for entry <" << arg1 << "> you have not yet declared");
+  //@}
+private:
+  /**
+   * The separator used when accessing elements of a path into the parameter
+   * tree.
+   */
+  static const char path_separator = '.';
+
+  /**
+   * The complete tree of sections and entries. See the general documentation
+   * of this class for a description how data is stored in this variable.
+   *
+   * The variable is a pointer so that we can use an incomplete type, rather
+   * than having to include all of the property_tree stuff from boost. This
+   * works around a problem with gcc 4.5.
+   */
+  std_cxx11::unique_ptr<boost::property_tree::ptree> entries;
+
+  /**
+   * A list of patterns that are used to describe the parameters of this
+   * object. The are indexed by nodes in the property tree.
+   */
+  std::vector<std_cxx11::shared_ptr<const Patterns::PatternBase> > patterns;
+
+  /**
+   * Mangle a string so that it doesn't contain any special characters or
+   * spaces.
+   */
+  static std::string mangle (const std::string &s);
+
+  /**
+   * Unmangle a string into its original form.
+   */
+  static std::string demangle (const std::string &s);
+
+  /**
+   * Path of presently selected subsections; empty list means top level
+   */
+  std::vector<std::string> subsection_path;
+
+  /**
+   * Return the string that identifies the current path into the property
+   * tree. This is only a path, i.e. it is not terminated by the
+   * path_separator character.
+   */
+  std::string get_current_path () const;
+
+  /**
+   * Given the name of an entry as argument, the function computes a full path
+   * into the parameter tree using the current subsection.
+   */
+  std::string get_current_full_path (const std::string &name) const;
+
+  /**
+   * Scan one line of input. <tt>input_filename</tt> and
+   * <tt>current_line_n</tt> are the name of the input file and the current
+   * number of the line presently scanned (for the logs if there are
+   * messages). Return <tt>false</tt> if line contained stuff that could not
+   * be understood, the uppermost subsection was to be left by an <tt>END</tt>
+   * or <tt>end</tt> statement, a value for a non-declared entry was given or
+   * the entry value did not match the regular expression. <tt>true</tt>
+   * otherwise.
+   *
+   * The function modifies its argument, but also takes it by value, so the
+   * caller's variable is not changed.
+   */
+  bool scan_line (std::string         line,
+                  const std::string  &input_filename,
+                  const unsigned int  current_line_n);
+
+  friend class MultipleParameterLoop;
+};
+
+
+
+/**
+ * The class MultipleParameterLoop offers an easy possibility to test several
+ * parameter sets during one run of the program. For this it uses the
+ * ParameterHandler class to read in data in a standardized form, searches for
+ * variant entry values and performs a loop over all combinations of
+ * parameters.
+ *
+ * Variant entry values are given like this:
+ *   @verbatim
+ *     set Time step size = { 0.1 | 0.2 | 0.3 }
+ *   @endverbatim
+ * The loop will then perform three runs of the program, one for each value of
+ * <tt>Time step size</tt>, while all other parameters are as specified or
+ * with their default value. If there are several variant entry values in the
+ * input, a loop is performed for each combination of variant values:
+ *   @verbatim
+ *     set Time step size = { 0.1 | 0.2 }
+ *     set Solver         = { CG  | GMRES }
+ *   @endverbatim
+ * will result in four runs of the programs, with time step 0.1 and 0.2 for
+ * each of the two solvers.
+ *
+ * In addition to variant entries, this class also supports <i>array
+ * entries</i> that look like this:
+ *   @verbatim
+ *     set Output file = ofile.{{ 1 | 2 | 3 | 4 }}
+ *   @endverbatim
+ * This indicates that if there are variant entries producing a total of four
+ * different runs, then we will write their results to the files
+ * <tt>ofile.1</tt>, <tt>ofile.2</tt>, <tt>ofile.3</tt> and <tt>ofile.4</tt>,
+ * respectively. Array entries do not generate multiple runs of the main loop
+ * themselves, but if there are variant entries, then in the <i>n</i>th run of
+ * the main loop, also the <i>n</i>th value of an array is returned.
+ *
+ * Since the different variants are constructed in the order of declaration,
+ * not in the order in which the variant entries appear in the input file, it
+ * may be difficult to guess the mapping between the different variants and
+ * the appropriate entry in an array. You will have to check the order of
+ * declaration, or use only one variant entry.
+ *
+ * It is guaranteed that only selections which match the regular expression
+ * (pattern) given upon declaration of an entry are given back to the program.
+ * If a variant value does not match the regular expression, the default value
+ * is stored and an error is issued. Before the first run of the loop, all
+ * possible values are checked for their conformance, so that the error is
+ * issued at the very beginning of the program.
+ *
+ *
+ * <h3>Usage</h3>
+ *
+ * The usage of this class is similar to the ParameterHandler class. First the
+ * entries and subsections have to be declared, then a loop is performed in
+ * which the different parameter sets are set, a new instance of a user class
+ * is created which is then called. Taking the classes of the example for the
+ * ParameterHandler class, the extended program would look like this:
+ *   @code
+ *     class HelperClass : public MultipleParameterLoop::UserClass
+ *     {
+ *     public:
+ *       HelperClass ();
+ *
+ *       virtual void create_new (const unsigned int run_no);
+ *       virtual void run (ParameterHandler &prm);
+ *
+ *       static void declare_parameters (ParameterHandler &prm);
+ *     private:
+ *       std_cxx11::shared_ptr<Problem> p;
+ *     };
+ *
+ *
+ *     HelperClass::HelperClass () : p(0) {}
+ *
+ *
+ *     void HelperClass::create_new (const unsigned int run_no)
+ *     {
+ *       p.reset(new Problem());
+ *     }
+ *
+ *
+ *     void HelperClass::declare_parameters (ParameterHandler &prm)
+ *     {
+ *       Problem::declare_parameters (prm);
+ *     }
+ *
+ *
+ *     void HelperClass::run (ParameterHandler &prm)
+ *     {
+ *       p->get_parameters (prm);
+ *       p->do_useful_work ();
+ *     }
+ *
+ *
+ *
+ *     int main ()
+ *     {
+ *       class MultipleParameterLoop prm;
+ *       HelperClass h;
+ *       HelperClass::declare_parameters (prm);
+ *       prm.read_input ("prmtest.prm");
+ *       prm.loop (h);
+ *       return 0;
+ *     }
+ *   @endcode
+ *
+ * As can be seen, first a new helper class has to be set up. This must
+ * contain a virtual constructor for a problem class. You can also derive your
+ * problem class from MultipleParameterLoop::UserClass and let
+ * <tt>create_new</tt> clear all member variables. If you have access to all
+ * inherited member variables in some way this is the recommended procedure. A
+ * third possibility is to use multiple inheritance and derive a helper class
+ * from both the MultipleParameterLoop::UserClass and the problem class. In
+ * any case, <tt>create_new</tt> has to provide a clean problem object which
+ * is the problem in the second and third possibility.
+ *
+ * The derived class also has to provide for member functions which declare
+ * the entries and which run the program. Running the program includes getting
+ * the parameters out of the ParameterHandler object.
+ *
+ * After defining an object of this helper class and an object of the
+ * MultipleParameterLoop class, the entries have to be declared in the same
+ * way as for the ParameterHandler class. Then the input has to be read.
+ * Finally the loop is called. This executes the following steps:
+ *   @code
+ *     for (each combination)
+ *       {
+ *         UserObject.create_new (run_no);
+ *
+ *         // set parameters for this run
+ *
+ *         UserObject.run (*this);
+ *       }
+ *   @endcode
+ * <tt>UserObject</tt> is the parameter to the <tt>loop</tt> function.
+ * <tt>create_new</tt> is given the number of the run (starting from one) to
+ * enable naming output files differently for each run.
+ *
+ *
+ * <h3>Syntax for variant and array entry values</h3>
+ *
+ * Variant values are specified like <tt>prefix{ v1 | v2 | v3 | ...
+ * }postfix</tt>. Whitespace to the right of the opening brace <tt>{</tt> is
+ * ignored as well as to the left of the closing brace <tt>}</tt> while
+ * whitespace on the respectively other side is not ignored. Whitespace around
+ * the mid symbols <tt>|</tt> is also ignored. The empty selection <tt>prefix{
+ * v1 | }postfix</tt> is also allowed and produces the strings
+ * <tt>prefixv1postfix</tt> and <tt>prefixpostfix</tt>.
+ *
+ * The syntax for array values is equal, apart from the double braces:
+ * <tt>prefix{{ v1 | v2 | v3 }}postfix</tt>.
+ *
+ *
+ * <h3>Worked example</h3>
+ *
+ * Given the above extensions to the example program for the ParameterHandler
+ * and the following input file
+ *   @verbatim
+ *     set Equation 1 = Poisson
+ *     set Equation 2 = Navier-Stokes
+ *     set Output file= results.{{ 1 | 2 | 3 | 4 | 5 | 6 }}
+ *
+ *     subsection Equation 1
+ *       set Matrix type = Sparse
+ *       subsection Linear solver
+ *         set Solver                       = CG
+ *         set Maximum number of iterations = { 10 | 20 | 30 }
+ *       end
+ *     end
+ *
+ *     subsection Equation 2
+ *       set Matrix type = Full
+ *       subsection Linear solver
+ *         set Solver                       = { BiCGStab | GMRES }
+ *         set Maximum number of iterations = 100
+ *       end
+ *     end
+ *   @endverbatim
+ * this is the output:
+ *   @verbatim
+ *     LinEq: Method=CG, MaxIterations=10
+ *     LinEq: Method=BiCGStab, MaxIterations=100
+ *     Problem: outfile=results.1
+ *              eq1=Poisson, eq2=Navier-Stokes
+ *              Matrix1=Sparse, Matrix2=Full
+ *     LinEq: Method=CG, MaxIterations=20
+ *     LinEq: Method=BiCGStab, MaxIterations=100
+ *     Problem: outfile=results.2
+ *              eq1=Poisson, eq2=Navier-Stokes
+ *              Matrix1=Sparse, Matrix2=Full
+ *     LinEq: Method=CG, MaxIterations=30
+ *     LinEq: Method=BiCGStab, MaxIterations=100
+ *     Problem: outfile=results.3
+ *              eq1=Poisson, eq2=Navier-Stokes
+ *              Matrix1=Sparse, Matrix2=Full
+ *     LinEq: Method=CG, MaxIterations=10
+ *     LinEq: Method=GMRES, MaxIterations=100
+ *     Problem: outfile=results.4
+ *              eq1=Poisson, eq2=Navier-Stokes
+ *              Matrix1=Sparse, Matrix2=Full
+ *     LinEq: Method=CG, MaxIterations=20
+ *     LinEq: Method=GMRES, MaxIterations=100
+ *     Problem: outfile=results.5
+ *              eq1=Poisson, eq2=Navier-Stokes
+ *              Matrix1=Sparse, Matrix2=Full
+ *     LinEq: Method=CG, MaxIterations=30
+ *     LinEq: Method=GMRES, MaxIterations=100
+ *     Problem: outfile=results.6
+ *              eq1=Poisson, eq2=Navier-Stokes
+ *              Matrix1=Sparse, Matrix2=Full
+ *   @endverbatim
+ * Since <tt>create_new</tt> gets the number of the run it would also be
+ * possible to output the number of the run.
+ *
+ *
+ * @ingroup input
+ * @author Wolfgang Bangerth, October 1997, 2010
+ */
+class MultipleParameterLoop : public ParameterHandler
+{
+public:
+  /**
+   * This is the class the helper class or the problem class has to be derived
+   * of.
+   */
+  class UserClass
+  {
+  public:
+    /**
+     * Destructor. It doesn't actually do anything, but is declared to force
+     * derived classes to have a virtual destructor.
+     */
+    virtual ~UserClass ();
+
+    /**
+     * <tt>create_new</tt> must provide a clean object, either by creating a
+     * new one or by cleaning an old one.
+     */
+    virtual void create_new (const unsigned int run_no) = 0;
+
+    /**
+     * Get the parameters and run any necessary action.
+     */
+    virtual void run (ParameterHandler &prm) = 0;
+  };
+
+  /**
+   * Constructor
+   */
+  MultipleParameterLoop ();
+
+  /**
+   * Destructor. Declare this only to have a virtual destructor, which is
+   * safer as we have virtual functions. It actually does nothing spectacular.
+   */
+  virtual ~MultipleParameterLoop ();
+
+  /**
+   * Read input from a stream until the stream returns the <tt>eof</tt>
+   * condition or error. The second argument can be used to denote the name of
+   * the file (if that's what the input stream represents) we are reading
+   * from; this is only used when creating output for error messages.
+   *
+   * Return whether the read was successful.
+   *
+   * @note Of the three <tt>read_input</tt> functions implemented by
+   * ParameterHandler, this is the only one overridden with new behavior by
+   * this class. This is because the other two <tt>read_input</tt> functions
+   * just reformat their inputs and then call this version.
+   */
+  virtual bool read_input (std::istream &input,
+                           const std::string &filename = "input file");
+
+  /**
+   * Overriding virtual functions which are overloaded (like
+   * ParameterHandler::read_input, which has two different sets of input
+   * argument types) causes the non-overridden functions to be hidden. Get
+   * around this by explicitly using both variants of
+   * ParameterHandler::read_input and then overriding the one we care about.
+   */
+  using ParameterHandler::read_input;
+
+  /**
+   * run the central loop.
+   */
+  void loop (UserClass &uc);
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+
+  /**
+   * An object in the list of entries with multiple values.
+   */
+  class Entry
+  {
+  public:
+    /**
+     * Declare what a multiple entry is: a variant * entry (in curly braces
+     * <tt>{</tt>, <tt>}</tt>) or an array (in double curly braces
+     * <tt>{{</tt>, <tt>}}</tt>).
+     */
+    enum MultipleEntryType
+    {
+      variant, array
+    };
+
+    /**
+     * Constructor
+     */
+    Entry () : type (array) {}
+
+    /**
+     * Construct an object with given subsection path, name and value. The
+     * splitting up into the different variants is done later by
+     * <tt>split_different_values</tt>.
+     */
+    Entry (const std::vector<std::string> &Path,
+           const std::string              &Name,
+           const std::string              &Value);
+
+    /**
+     * Split the entry value into the different branches.
+     */
+    void split_different_values ();
+
+    /**
+     * Path to variant entry.
+     */
+    std::vector<std::string> subsection_path;
+
+    /**
+     * Name of entry.
+     */
+    std::string         entry_name;
+
+    /**
+     * Original variant value.
+     */
+    std::string         entry_value;
+
+    /**
+     * List of entry values constructed out of what was given in the input
+     * file.
+     */
+    std::vector<std::string> different_values;
+
+    /**
+     * Store whether this entry is a variant entry or an array.
+     */
+    MultipleEntryType      type;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+  /**
+   * List of variant entry values.
+   */
+  std::vector<Entry> multiple_choices;
+
+  /**
+   * Number of branches constructed from the different combinations of the
+   * variants. This obviously equals the number of runs to be performed.
+   */
+  unsigned int n_branches;
+
+  /**
+   * Initialize the different branches, i.e.  construct the combinations.
+   */
+  void init_branches ();
+
+  /**
+   * Traverse the section currently set by
+   * enter_subsection()/leave_subsection() and see which of the entries are
+   * variant or array entries. Then fill the multiple_choices variable using
+   * this information.
+   */
+  void init_branches_current_section ();
+
+  /**
+   * Transfer the entry values for one run to the entry tree.
+   */
+  void fill_entry_values (const unsigned int run_no);
+};
+
+
+template <class Archive>
+inline
+void
+ParameterHandler::save (Archive &ar, const unsigned int) const
+{
+  // Forward to serialization
+  // function in the base class.
+  ar   &static_cast<const Subscriptor &>(*this);
+
+  ar & *entries.get();
+
+  std::vector<std::string> descriptions;
+
+  for (unsigned int j=0; j<patterns.size(); ++j)
+    descriptions.push_back (patterns[j]->description());
+
+  ar &descriptions;
+}
+
+
+template <class Archive>
+inline
+void
+ParameterHandler::load (Archive &ar, const unsigned int)
+{
+  // Forward to serialization
+  // function in the base class.
+  ar   &static_cast<Subscriptor &>(*this);
+
+  ar & *entries.get();
+
+  std::vector<std::string> descriptions;
+  ar &descriptions;
+
+  patterns.clear ();
+  for (unsigned int j=0; j<descriptions.size(); ++j)
+    patterns.push_back (std_cxx11::shared_ptr<const Patterns::PatternBase>(Patterns::pattern_factory(descriptions[j])));
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/parsed_function.h b/include/deal.II/base/parsed_function.h
new file mode 100644
index 0000000..bd78213
--- /dev/null
+++ b/include/deal.II/base/parsed_function.h
@@ -0,0 +1,216 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__parsed_function_h
+#define dealii__parsed_function_h
+
+#include <deal.II/base/auto_derivative_function.h>
+#include <deal.II/base/function_parser.h>
+#include <deal.II/base/parameter_handler.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Functions
+{
+  /**
+   * Friendly interface to the FunctionParser class. This class is meant as a
+   * wrapper for the FunctionParser class. It is used in the step-34 tutorial
+   * program.
+   *
+   * It provides two methods to declare and parse a ParameterHandler object
+   * and creates the Function object declared in the parameter file. This
+   * class is derived from the AutoDerivativeFunction class, so you don't need
+   * to specify derivatives. An example of usage of this class is as follows:
+   *
+   *   @code
+   *   // A parameter handler
+   *   ParameterHandler prm;
+   *
+   *   // Declare a section for the function we need
+   *   prm.enter_subsection("My vector function");
+   *   ParsedFunction<dim>::declare_parameters(prm, dim);
+   *   prm.leave_subsection();
+   *
+   *   // Create a ParsedFunction
+   *   ParsedFunction<dim> my_vector_function(dim);
+   *
+   *   // Parse an input file.
+   *   prm.read_input(some_input_file);
+   *
+   *   // Initialize the ParsedFunction object with the given file
+   *   prm.enter_subsection("My vector function");
+   *   my_vector_function.parse_parameters(prm);
+   *   prm.leave_subsection();
+   *
+   *   @endcode
+   *
+   * And here is an example of how the input parameter could look like (see
+   * the documentation of the FunctionParser class for a detailed description
+   * of the syntax of the function definition):
+   *
+   *   @code
+   *
+   *   # A test two dimensional vector function, depending on time
+   *   subsection My vector function
+   *   set Function constants  = kappa=.1, lambda=2.
+   *   set Function expression = if(y>.5, kappa*x*(1-x),0); t^2*cos(lambda*pi*x)
+   *   set Variable names      = x,y,t
+   *   end
+   *
+   *   @endcode
+   *
+   * @ingroup functions
+   * @author Luca Heltai, 2006
+   */
+  template <int dim>
+  class ParsedFunction :  public AutoDerivativeFunction<dim>
+  {
+  public:
+    /**
+     * Construct a vector function. The vector function which is generated has
+     * @p n_components components (defaults to 1). The parameter @p h is used
+     * to initialize the AutoDerivativeFunction class from which this class is
+     * derived.
+     */
+    ParsedFunction (const unsigned int n_components = 1, const double h=1e-8);
+
+    /**
+     * Declare parameters needed by this class. The additional parameter @p
+     * n_components is used to generate the right code according to the number
+     * of components of the function that will parse this ParameterHandler. If
+     * the number of components which is parsed does not match the number of
+     * components of this object, an assertion is thrown and the program is
+     * aborted.  The default behavior for this class is to declare the
+     * following entries:
+     *
+     *  @code
+     *
+     *  set Function constants  =
+     *  set Function expression = 0
+     *  set Variable names      = x,y,t
+     *
+     *  @endcode
+     *
+     */
+    static void declare_parameters(ParameterHandler &prm,
+                                   const unsigned int n_components = 1);
+
+    /**
+     * Parse parameters needed by this class.  If the number of components
+     * which is parsed does not match the number of components of this object,
+     * an assertion is thrown and the program is aborted.  In order for the
+     * class to function properly, we follow the same conventions declared in
+     * the FunctionParser class (look there for a detailed description of the
+     * syntax for function declarations).
+     *
+     * The three variables that can be parsed from a parameter file are the
+     * following:
+     *
+     *  @code
+     *
+     *  set Function constants  =
+     *  set Function expression =
+     *  set Variable names      =
+     *
+     *  @endcode
+     *
+     * Function constants is a collection of pairs in the form name=value,
+     * separated by commas, for example:
+     *
+     *  @code
+     *
+     *  set Function constants = lambda=1., alpha=2., gamma=3.
+     *
+     *  @endcode
+     *
+     * These constants can be used in the declaration of the function
+     * expression, which follows the convention of the FunctionParser class.
+     * In order to specify vector functions, semicolons have to be used to
+     * separate the different components, e.g.:
+     *
+     *  @code
+     *
+     *  set Function expression = cos(pi*x) ; cos(pi*y)
+     *
+     *  @endcode
+     *
+     * The variable names entry can be used to customize the name of the
+     * variables used in the Function. It defaults to
+     *
+     *  @code
+     *
+     *  set Variable names      = x,t
+     *
+     *  @endcode
+     *
+     * for one dimensional problems,
+     *
+     *  @code
+     *
+     *  set Variable names      = x,y,t
+     *
+     *  @endcode
+     *
+     * for two dimensional problems and
+     *
+     *  @code
+     *
+     *  set Variable names      = x,y,z,t
+     *
+     *  @endcode
+     *
+     * for three dimensional problems.
+     *
+     * The time variable can be set according to specifications in the
+     * FunctionTime base class.
+     */
+    void parse_parameters(ParameterHandler &prm);
+
+    /**
+     * Return all components of a vector-valued function at the given point @p
+     * p.
+     */
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &values) const;
+
+    /**
+     * Return the value of the function at the given point. Unless there is
+     * only one component (i.e. the function is scalar), you should state the
+     * component you want to have evaluated; it defaults to zero, i.e. the
+     * first component.
+     */
+    virtual double value (const Point< dim >     &p,
+                          const unsigned int  component = 0)    const;
+
+    /**
+     * Set the time to a specific value for time-dependent functions.
+     *
+     * We need to overwrite this to set the time also in the accessor
+     * FunctionParser<dim>.
+     */
+    virtual void set_time(const double newtime);
+
+  private:
+    /**
+     * The object with which we do computations.
+     */
+    FunctionParser<dim> function_object;
+  };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/partitioner.h b/include/deal.II/base/partitioner.h
new file mode 100644
index 0000000..415782d
--- /dev/null
+++ b/include/deal.II/base/partitioner.h
@@ -0,0 +1,552 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__partitioner_h
+#define dealii__partitioner_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/mpi.h>
+#include <deal.II/base/types.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+
+#include <limits>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Utilities
+{
+  namespace MPI
+  {
+    /**
+     * This class defines a model for the partitioning of a vector (or, in
+     * fact, any linear data structure) among processors using MPI.
+     *
+     * The partitioner stores the global vector size and the locally owned
+     * range as a half-open interval [@p lower, @p upper). Furthermore, it
+     * includes a structure for the point-to-point communication patterns. It
+     * allows the inclusion of ghost indices (i.e. indices that a current
+     * processor needs to have access to, but are owned by another process)
+     * through an IndexSet. In addition, it also stores the other processors'
+     * ghost indices belonging to the current processor, which are the indices
+     * where other processors might require information from. In a sense,
+     * these import indices form the dual of the ghost indices. This
+     * information is gathered once when constructing the partitioner, which
+     * obviates subsequent global communication steps when exchanging data.
+     *
+     * The partitioner includes a mechanism for converting global to local and
+     * local to global indices. Internally, this class stores vector elements
+     * using the convention as follows: The local range is associated with
+     * local indices [0, at p local_size), and ghost indices are stored
+     * consecutively in [@p local_size, @p local_size + @p n_ghost_indices).
+     * The ghost indices are sorted according to their global index.
+     *
+     *
+     * @author Katharina Kormann, Martin Kronbichler, 2010, 2011
+     */
+    class Partitioner
+    {
+    public:
+      /**
+       * Empty Constructor.
+       */
+      Partitioner ();
+
+      /**
+       * Constructor with size argument. Creates an MPI_COMM_SELF structure
+       * where there is no real parallel layout.
+       */
+      Partitioner (const unsigned int size);
+
+      /**
+       * Constructor with index set arguments. This constructor creates a
+       * distributed layout based on a given communicators, an IndexSet
+       * describing the locally owned range and another one for describing
+       * ghost indices that are owned by other processors, but we need to have
+       * read or write access to.
+       */
+      Partitioner (const IndexSet &locally_owned_indices,
+                   const IndexSet &ghost_indices_in,
+                   const MPI_Comm  communicator_in);
+
+      /**
+       * Constructor with one index set argument. This constructor creates a
+       * distributed layout based on a given communicator, and an IndexSet
+       * describing the locally owned range. It allows to set the ghost
+       * indices at a later time. Apart from this, it is similar to the other
+       * constructor with two index sets.
+       */
+      Partitioner (const IndexSet &locally_owned_indices,
+                   const MPI_Comm  communicator_in);
+
+      /**
+       * Sets the locally owned indices. Used in the constructor.
+       */
+      void set_owned_indices (const IndexSet &locally_owned_indices);
+
+      /**
+       * Allows to set the ghost indices after the constructor has been
+       * called.
+       */
+      void set_ghost_indices (const IndexSet &ghost_indices);
+
+      /**
+       * Returns the global size.
+       */
+      types::global_dof_index size() const;
+
+      /**
+       * Returns the local size, i.e. local_range().second minus
+       * local_range().first.
+       */
+      unsigned int local_size() const;
+
+      /**
+       * Returns an IndexSet representation of the local range. This class
+       * only supports contiguous local ranges, so the IndexSet actually only
+       * consists of one single range of data, and is equivalent to the result
+       * of local_range().
+       */
+      const IndexSet &locally_owned_range() const;
+
+      /**
+       * Returns the local range. The returned pair consists of the index of
+       * the first element and the index of the element one past the last
+       * locally owned one.
+       */
+      std::pair<types::global_dof_index,types::global_dof_index>
+      local_range() const;
+
+      /**
+       * Returns true if the given global index is in the local range of this
+       * processor.
+       */
+      bool in_local_range (const types::global_dof_index global_index) const;
+
+      /**
+       * Returns the local index corresponding to the given global index. If
+       * the given global index is neither locally owned nor a ghost, an
+       * exception is thrown.
+       *
+       * Note that the local index for locally owned indices is between 0 and
+       * local_size()-1, and the local index for ghosts is between
+       * local_size() and local_size()+n_ghost_indices()-1.
+       */
+      unsigned int
+      global_to_local (const types::global_dof_index global_index) const;
+
+      /**
+       * Returns the global index corresponding to the given local index.
+       *
+       * Note that the local index for locally owned indices is between 0 and
+       * local_size()-1, and the local index for ghosts is between
+       * local_size() and local_size()+n_ghost_indices()-1.
+       */
+      types::global_dof_index
+      local_to_global (const unsigned int local_index) const;
+
+      /**
+       * Returns whether the given global index is a ghost index on the
+       * present processor. Returns false for indices that are owned locally
+       * and for indices not present at all.
+       */
+      bool is_ghost_entry (const types::global_dof_index global_index) const;
+
+      /**
+       * Returns an IndexSet representation of all ghost indices.
+       */
+      const IndexSet &ghost_indices() const;
+
+      /**
+       * Returns the number of ghost indices. Same as
+       * ghost_indices().n_elements(), but cached for simpler access.
+       */
+      unsigned int n_ghost_indices() const;
+
+      /**
+       * Returns a list of processors (first entry) and the number of degrees
+       * of freedom for the individual processor on the ghost elements present
+       * (second entry).
+       */
+      const std::vector<std::pair<unsigned int, unsigned int> > &
+      ghost_targets() const;
+
+      /**
+       * The set of (local) indices that we are importing during compress(),
+       * i.e., others' ghosts that belong to the local range. Similar
+       * structure as in an IndexSet, but tailored to be iterated over, and
+       * some indices may be duplicates.
+       */
+      const std::vector<std::pair<unsigned int, unsigned int> > &
+      import_indices() const;
+
+      /**
+       * Number of import indices, i.e., indices that are ghosts on other
+       * processors and we will receive data from.
+       */
+      unsigned int n_import_indices() const;
+
+      /**
+       * Returns a list of processors (first entry) and the number of degrees
+       * of freedom for all the processors that data is obtained from (second
+       * entry), i.e., locally owned indices that are ghosts on other
+       * processors.
+       */
+      const std::vector<std::pair<unsigned int, unsigned int> > &
+      import_targets() const;
+
+      /**
+       * Checks whether the given partitioner is compatible with the
+       * partitioner used for this vector. Two partitioners are compatible if
+       * they have the same local size and the same ghost indices. They do not
+       * necessarily need to be the same data field. This is a local operation
+       * only, i.e., if only some processors decide that the partitioning is
+       * not compatible, only these processors will return @p false, whereas
+       * the other processors will return @p true.
+       */
+      bool is_compatible (const Partitioner &part) const;
+
+      /**
+       * Checks whether the given partitioner is compatible with the
+       * partitioner used for this vector. Two partitioners are compatible if
+       * they have the same local size and the same ghost indices. They do not
+       * necessarily need to be the same data field. As opposed to
+       * is_compatible(), this method checks for compatibility among all
+       * processors and the method only returns @p true if the partitioner is
+       * the same on all processors.
+       *
+       * This method performs global communication, so make sure to use it
+       * only in a context where all processors call it the same number of
+       * times.
+       */
+      bool is_globally_compatible (const Partitioner &part) const;
+
+      /**
+       * Returns the MPI ID of the calling processor. Cached to have simple
+       * access.
+       */
+      unsigned int this_mpi_process () const;
+
+      /**
+       * Returns the total number of MPI processor participating in the given
+       * partitioner. Cached to have simple access.
+       */
+      unsigned int n_mpi_processes () const;
+
+      /**
+       * Returns the MPI communicator underlying the partitioner object.
+       */
+      const MPI_Comm &get_communicator() const;
+
+      /**
+       * Returns whether ghost indices have been explicitly added as a @p
+       * ghost_indices argument. Only true if a reinit call or constructor
+       * provided that argument.
+       */
+      bool ghost_indices_initialized() const;
+
+      /**
+       * Computes the memory consumption of this structure.
+       */
+      std::size_t memory_consumption() const;
+
+      /**
+       * Exception
+       */
+      DeclException2 (ExcIndexNotPresent,
+                      types::global_dof_index,
+                      unsigned int,
+                      << "Global index " << arg1
+                      << " neither owned nor ghost on proc " << arg2);
+
+    private:
+      /**
+       * The global size of the vector over all processors
+       */
+      const types::global_dof_index global_size;
+
+      /**
+       * The range of the vector that is stored locally.
+       */
+      IndexSet locally_owned_range_data;
+
+      /**
+       * The range of the vector that is stored locally. Extracted from
+       * locally_owned_range for performance reasons.
+       */
+      std::pair<types::global_dof_index,types::global_dof_index> local_range_data;
+
+      /**
+       * The set of indices to which we need to have read access but that are
+       * not locally owned
+       */
+      IndexSet ghost_indices_data;
+
+      /**
+       * Caches the number of ghost indices. It would be expensive to use @p
+       * ghost_indices.n_elements() to compute this.
+       */
+      unsigned int n_ghost_indices_data;
+
+      /**
+       * Contains information which processors my ghost indices belong to and
+       * how many those indices are
+       */
+      std::vector<std::pair<unsigned int, unsigned int> > ghost_targets_data;
+
+      /**
+       * The set of (local) indices that we are importing during compress(),
+       * i.e., others' ghosts that belong to the local range. Similar
+       * structure as in an IndexSet, but tailored to be iterated over, and
+       * some indices may be duplicates.
+       */
+      std::vector<std::pair<unsigned int, unsigned int> > import_indices_data;
+
+      /**
+       * Caches the number of ghost indices. It would be expensive to compute
+       * it by iterating over the import indices and accumulate them.
+       */
+      unsigned int n_import_indices_data;
+
+      /**
+       * The set of processors and length of data field which send us their
+       * ghost data
+       */
+      std::vector<std::pair<unsigned int, unsigned int> > import_targets_data;
+
+      /**
+       * The ID of the current processor in the MPI network
+       */
+      unsigned int my_pid;
+
+      /**
+       * The total number of processors active in the problem
+       */
+      unsigned int n_procs;
+
+      /**
+       * The MPI communicator involved in the problem
+       */
+      const MPI_Comm communicator;
+
+      /**
+       * Stores whether the ghost indices have been explicitly set.
+       */
+      bool have_ghost_indices;
+    };
+
+
+
+    /*----------------------- Inline functions ----------------------------------*/
+
+#ifndef DOXYGEN
+
+    inline
+    types::global_dof_index Partitioner::size() const
+    {
+      return global_size;
+    }
+
+
+
+    inline
+    const IndexSet &Partitioner::locally_owned_range() const
+    {
+      return locally_owned_range_data;
+    }
+
+
+
+    inline
+    std::pair<types::global_dof_index,types::global_dof_index>
+    Partitioner::local_range() const
+    {
+      return local_range_data;
+    }
+
+
+
+    inline
+    unsigned int
+    Partitioner::local_size () const
+    {
+      types::global_dof_index size= local_range_data.second - local_range_data.first;
+      Assert(size<=std::numeric_limits<unsigned int>::max(),
+             ExcNotImplemented());
+      return static_cast<unsigned int>(size);
+    }
+
+
+
+    inline
+    bool
+    Partitioner::in_local_range (const types::global_dof_index global_index) const
+    {
+      return (local_range_data.first <= global_index &&
+              global_index < local_range_data.second);
+    }
+
+
+
+    inline
+    bool
+    Partitioner::is_ghost_entry (const types::global_dof_index global_index) const
+    {
+      // if the index is in the global range, it is trivially not a ghost
+      if (in_local_range(global_index) == true)
+        return false;
+      else
+        return ghost_indices().is_element(global_index);
+    }
+
+
+
+    inline
+    unsigned int
+    Partitioner::global_to_local (const types::global_dof_index global_index) const
+    {
+      Assert(in_local_range(global_index) || is_ghost_entry (global_index),
+             ExcIndexNotPresent(global_index, my_pid));
+      if (in_local_range(global_index))
+        return static_cast<unsigned int>(global_index - local_range_data.first);
+      else if (is_ghost_entry (global_index))
+        return (local_size() +
+                static_cast<unsigned int>(ghost_indices_data.index_within_set (global_index)));
+      else
+        // should only end up here in optimized mode, when we use this large
+        // number to trigger a segfault when using this method for array
+        // access
+        return numbers::invalid_unsigned_int;
+    }
+
+
+
+    inline
+    types::global_dof_index
+    Partitioner::local_to_global (const unsigned int local_index) const
+    {
+      AssertIndexRange (local_index, local_size() + n_ghost_indices_data);
+      if (local_index < local_size())
+        return local_range_data.first + types::global_dof_index(local_index);
+      else
+        return ghost_indices_data.nth_index_in_set (local_index-local_size());
+    }
+
+
+
+    inline
+    const IndexSet  &Partitioner::ghost_indices() const
+    {
+      return ghost_indices_data;
+    }
+
+
+
+    inline
+    unsigned int
+    Partitioner::n_ghost_indices() const
+    {
+      return n_ghost_indices_data;
+    }
+
+
+
+    inline
+    const std::vector<std::pair<unsigned int, unsigned int> > &
+    Partitioner::ghost_targets() const
+    {
+      return ghost_targets_data;
+    }
+
+
+    inline
+    const std::vector<std::pair<unsigned int, unsigned int> > &
+    Partitioner::import_indices() const
+    {
+      return import_indices_data;
+    }
+
+
+
+    inline
+    unsigned int
+    Partitioner::n_import_indices() const
+    {
+      return n_import_indices_data;
+    }
+
+
+
+    inline
+    const std::vector<std::pair<unsigned int, unsigned int> > &
+    Partitioner::import_targets() const
+    {
+      return import_targets_data;
+    }
+
+
+
+    inline
+    unsigned int
+    Partitioner::this_mpi_process() const
+    {
+      // return the id from the variable stored in this class instead of
+      // Utilities::MPI::this_mpi_process() in order to make this query also
+      // work when MPI is not initialized.
+      return my_pid;
+    }
+
+
+
+    inline
+    unsigned int
+    Partitioner::n_mpi_processes() const
+    {
+      // return the number of MPI processes from the variable stored in this
+      // class instead of Utilities::MPI::n_mpi_processes() in order to make
+      // this query also work when MPI is not initialized.
+      return n_procs;
+    }
+
+
+
+    inline
+    const MPI_Comm &
+    Partitioner::get_communicator() const
+    {
+      return communicator;
+    }
+
+
+
+    inline
+    bool
+    Partitioner::ghost_indices_initialized() const
+    {
+      return have_ghost_indices;
+    }
+
+#endif  // ifndef DOXYGEN
+
+  } // end of namespace MPI
+
+} // end of namespace Utilities
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/path_search.h b/include/deal.II/base/path_search.h
new file mode 100644
index 0000000..94ec30f
--- /dev/null
+++ b/include/deal.II/base/path_search.h
@@ -0,0 +1,280 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__path_search_h
+#define dealii__path_search_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <string>
+#include <fstream>
+#include <map>
+#include <vector>
+#include <memory>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Support for searching files in a list of paths and with a list of suffixes.
+ *
+ * A list of search paths is maintained for each file class supported. A file
+ * class is defined by a unique string. The classes provided are <dl> <dt>
+ * MESH <dd> mesh input files in various formats (see GridIn) <dt> PARAMETER
+ * <dd> Parameter files (<tt>.prm</tt>) </dl>
+ *
+ * Additional file classes can be added easily by using add_class().
+ *
+ * Usage: First, you construct a PathSearch object for a certain file class,
+ * e.g. meshes. Then, you use the find() method to obtain a full path name and
+ * you can open the file.
+ * @code
+ * #include <deal.II/base/path_search.h>
+ *
+ * dealii::PathSearch search("MESH");
+ * std::string full_name = search.find("grid");
+ * std::ifstream in(full_name.c_str());
+ * ...
+ * @endcode
+ *
+ * This piece of code will first traverse all paths in the list set up for
+ * file class <tt>MESH</tt>. If it manages to open a file, it returns the
+ * <tt>istream</tt> object. If not, it will try to append the first suffix of
+ * the suffix list and do the same. And so on. If no file is found in the end,
+ * an exception is thrown.
+ *
+ * If you want to restrict your search to a certain mesh format, <tt>.inp</tt>
+ * for instance, then either use <tt>"grid.inp"</tt> in the code above or use
+ * the alternative find(const std::string&,const std::string&,const char*)
+ * function
+ * @code
+ * std::string full_name = search.find("grid", ".inp");
+ * @endcode
+ *
+ * Path lists are by default starting with the current directory
+ * (<tt>"./"</tt>), followed optionally by a standard directory of deal.II.
+ * Use show() to find out the path list for a given class. Paths and suffixes
+ * can be added using the functions add_path() and add_suffix(), respectively.
+ *
+ * @note Directories in the path list should always end with a trailing
+ * <tt>"/"</tt>, while suffixes should always start with a dot. These
+ * characters are not added automatically (allowing you to do some real file
+ * name editing).
+ *
+ * @todo Add support for environment variables like in kpathsea.
+ *
+ * @ingroup input
+ * @author Guido Kanschat, Luca Heltai 2005
+ */
+class PathSearch
+{
+public:
+  /**
+   * Position for adding a new item to a list.
+   */
+  enum Position
+  {
+    /// Add new item at end of list
+    back,
+    /// Add new item at front of list
+    front,
+    /// Add in path list after empty element
+    after_none
+  };
+
+  /**
+   * Constructor. The first argument is a string identifying the class of
+   * files to be searched for.
+   *
+   * The debug argument determines the verbosity of this class.
+   */
+  PathSearch (const std::string &cls,
+              const unsigned int debug=0);
+
+  /**
+   * Find a file in the class specified by the constructor and return its
+   * complete path name (including a possible suffix).
+   *
+   * File search works by actually trying to open the file. If @p fopen is
+   * successful with the provided @p open_mode, then the file is found,
+   * otherwise the search continues.
+   *
+   * @warning Be careful with @p open_mode! In particular, use <tt>"w"</tt>
+   * with great care! If the file does not exist, it cannot be found. If it
+   * does exist, the @p fopen function will truncate it to zero length.
+   *
+   * @param filename The base name of the file to be found, without path
+   * components and suffix.
+   * @param open_mode The mode handed over to the @p fopen function.
+   */
+  std::string find (const std::string &filename,
+                    const char *open_mode = "r");
+
+  /**
+   * Find a file in the class specified by the constructor and return its
+   * complete path name. Do not use the standard suffix list, but only try to
+   * apply the suffix given.
+   *
+   * File search works by actually trying to open the file. If @p fopen is
+   * successful with the provided @p open_mode, then the file is found,
+   * otherwise the search continues.
+   *
+   * @warning Be careful with @p open_mode! In particular, use <tt>"w"</tt>
+   * with great care! If the file does not exist, it cannot be found. If it
+   * does exist, the @p fopen function will truncate it to zero length.
+   *
+   * @param filename The base name of the file to be found, without path
+   * components and suffix.
+   * @param suffix The suffix to be used for opening.
+   * @param open_mode The mode handed over to the @p fopen function.
+   */
+  std::string find (const std::string &filename,
+                    const std::string &suffix,
+                    const char *open_mode = "r");
+
+  /**
+   * Show the paths and suffixes used for this object.
+   */
+  template <class StreamType>
+  void show(StreamType &stream) const;
+
+  /**
+   * Add a new class.
+   */
+  static void add_class (const std::string &cls);
+
+  /**
+   * Add a path to the current class. See PathSearch::Position for possible
+   * position arguments.
+   */
+  void add_path (const std::string &path, Position pos = back);
+
+  /**
+   * Add a path to the current class. See PathSearch::Position for possible
+   * position arguments.
+   */
+  void add_suffix (const std::string &suffix, Position pos = back);
+
+  /**
+   * This class was not registered in the path search mechanism.
+   * @ingroup Exceptions
+   */
+  DeclException1(ExcNoClass,
+                 std::string,
+                 << "The class "
+                 << arg1
+                 << " must be registered before referring it in PathSearch");
+  /**
+   * The PathSearch class could not find a file with this name in its path
+   * list.
+   * @ingroup Exceptions
+   */
+  DeclException2(ExcFileNotFound,
+                 std::string, std::string,
+                 << "The file \"" << arg1
+                 << "\" was not found in the path for files of class "
+                 << arg2);
+
+private:
+  /**
+   * Type of values in the class maps.
+   */
+  typedef std::map<std::string, std::vector<std::string> >::value_type map_type;
+
+  /**
+   * Initialize the static list objects for further use.
+   */
+  static void initialize_classes();
+
+  /**
+   * Get path list for a certain class. Used to set up #my_path_list in
+   * constructor.
+   */
+  static std::vector<std::string> &get_path_list(const std::string &cls);
+
+  /**
+   * Get suffix list for a certain class. Used to set up #my_suffix_list in
+   * constructor.
+   */
+  static std::vector<std::string> &get_suffix_list(const std::string &cls);
+
+  /**
+   * The file class handled by this object.
+   */
+  const std::string cls;
+
+  /**
+   * All path lists for all classes, such that we can build them only once.
+   */
+  static std::map<std::string, std::vector<std::string> > path_lists;
+
+  /**
+   * List of suffixes for each class.
+   */
+  static std::map<std::string, std::vector<std::string> > suffix_lists;
+
+  /**
+   * Path list for the class this object belongs to.
+   */
+  std::vector<std::string> &my_path_list;
+
+  /**
+   * Suffix list for the class this object belongs to.
+   */
+  std::vector<std::string> &my_suffix_list;
+
+  /**
+   * Debug flag. No output if zero.
+   */
+  const unsigned int debug;
+
+  /**
+   * The empty string.
+   */
+  static std::string empty;
+};
+
+
+/* ----------------------------- inline functions ------------------------- */
+
+
+template <class StreamType>
+inline
+void
+PathSearch::show(StreamType &out) const
+{
+  out << "DEAL_II_" << cls << "PATH=\"";
+  bool first = true;
+  for (std::vector<std::string>::iterator p = my_path_list.begin();
+       p != my_path_list.end(); ++p)
+    {
+      if (!first)
+        out << ':';
+      out << *p;
+      first = false;
+    }
+  out << '"' << std::endl << " Suffixes";
+  for (std::vector<std::string>::iterator s = my_suffix_list.begin();
+       s != my_suffix_list.end(); ++s)
+    out << " \"" << *s << '"';
+  out << std::endl;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
diff --git a/include/deal.II/base/point.h b/include/deal.II/base/point.h
new file mode 100644
index 0000000..67a0abb
--- /dev/null
+++ b/include/deal.II/base/point.h
@@ -0,0 +1,546 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__point_h
+#define dealii__point_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A class that represents a point in a space with arbitrary dimension
+ * <tt>dim</tt>.
+ *
+ * Objects of this class are used to represent points, i.e., vectors anchored
+ * at the origin of a Cartesian vector space. They are, among other uses,
+ * passed to functions that operate on points in spaces of a priori fixed
+ * dimension: rather than using functions like <tt>double f(double x)</tt> and
+ * <tt>double f(double x, double y)</tt>, you should use <tt>double
+ * f(Point<dim> &p)</tt> instead as it allows writing dimension independent
+ * code.
+ *
+ *
+ * <h3>What's a <code>Point@<dim@></code> and what is a
+ * <code>Tensor@<1,dim@></code>?</h3>
+ *
+ * The Point class is derived from Tensor@<1,dim@> and consequently shares the
+ * latter's member functions and other attributes. In fact, it has relatively
+ * few additional functions itself (the most notable exception being the
+ * distance() function to compute the Euclidean distance between two points in
+ * space), and these two classes can therefore often be used interchangeably.
+ *
+ * Nonetheless, there are semantic differences that make us use these classes
+ * in different and well-defined contexts. Within deal.II, we use the
+ * <tt>Point</tt> class to denote points in space, i.e., for vectors (rank-1
+ * tensors) that are <em>anchored at the origin</em>. On the other hand,
+ * vectors that are anchored elsewhere (and consequently do not represent
+ * <em>points</em> in the common usage of the word) are represented by objects
+ * of type Tensor@<1,dim@>. In particular, this is the case for direction
+ * vectors, normal vectors, gradients, and the differences between two points
+ * (i.e., what you get when you subtract one point from another): all of these
+ * are represented by Tensor@<1,dim@> objects rather than Point@<dim@>.
+ *
+ * Furthermore, the Point class is only used where the coordinates of an
+ * object can be thought to possess the dimension of a length. An object that
+ * represents the weight, height, and cost of an object is neither a point nor
+ * a tensor (because it lacks the transformation properties under rotation of
+ * the coordinate system) and should consequently not be represented by either
+ * of these classes. Use an array of size 3 in this case, or the
+ * <code>std_cxx11::array</code> class. Alternatively, as in the case of
+ * vector-valued functions, you can use objects of type Vector or
+ * <code>std::vector</code>.
+ *
+ *
+ * @tparam dim An integer that denotes the dimension of the space in which a
+ * point lies. This of course equals the number of coordinates that identify a
+ * point.
+ * @tparam Number The data type in which the coordinates values are to be
+ * stored. This will, in almost all cases, simply be the default @p double,
+ * but there are cases where one may want to store coordinates in a different
+ * (and always scalar) type. An example would be an interval type that can
+ * store the value of a coordinate as well as its uncertainty. Another example
+ * would be a type that allows for Automatic Differentiation (see, for
+ * example, the Sacado type used in step-33) and thereby can generate analytic
+ * (spatial) derivatives of a function when passed a Point object whose
+ * coordinates are stored in such a type.
+ *
+ *
+ * @ingroup geomprimitives
+ * @author Wolfgang Bangerth, 1997
+ */
+template <int dim, typename Number = double>
+class Point : public Tensor<1,dim,Number>
+{
+public:
+  /**
+   * Standard constructor. Creates an object that corresponds to the origin,
+   * i.e., all coordinates are set to zero.
+   */
+  Point ();
+
+  /**
+   * Convert a tensor to a point.
+   */
+  explicit Point (const Tensor<1,dim,Number> &);
+
+  /**
+   * Constructor for one dimensional points. This function is only implemented
+   * for <tt>dim==1</tt> since the usage is considered unsafe for points with
+   * <tt>dim!=1</tt> as it would leave some components of the point
+   * coordinates uninitialized.
+   */
+  explicit Point (const Number x);
+
+  /**
+   * Constructor for two dimensional points. This function is only implemented
+   * for <tt>dim==2</tt> since the usage is considered unsafe for points with
+   * <tt>dim!=2</tt> as it would leave some components of the point
+   * coordinates uninitialized (if dim>2) or would not use some arguments (if
+   * dim<2).
+   */
+  Point (const Number x,
+         const Number y);
+
+  /**
+   * Constructor for three dimensional points. This function is only
+   * implemented for <tt>dim==3</tt> since the usage is considered unsafe for
+   * points with <tt>dim!=3</tt> as it would leave some components of the
+   * point coordinates uninitialized (if dim>3) or would not use some
+   * arguments (if dim<3).
+   */
+  Point (const Number x,
+         const Number y,
+         const Number z);
+
+  /**
+   * Return a unit vector in coordinate direction <tt>i</tt>, i.e., a vector
+   * that is zero in all coordinates except for a single 1 in the <tt>i</tt>th
+   * coordinate.
+   */
+  static Point<dim,Number> unit_vector(const unsigned int i);
+
+  /**
+   * Read access to the <tt>index</tt>th coordinate.
+   */
+  Number operator () (const unsigned int index) const;
+
+  /**
+   * Read and write access to the <tt>index</tt>th coordinate.
+   */
+  Number &operator () (const unsigned int index);
+
+  /*
+   * @name Addition and subtraction of points.
+   * @{
+   */
+
+  /**
+   * Add an offset given as Tensor<1,dim,Number> to a point.
+   */
+  Point<dim,Number> operator + (const Tensor<1,dim,Number> &) const;
+
+  /**
+   * Subtract two points, i.e., obtain the vector that connects the two. As
+   * discussed in the documentation of this class, subtracting two points
+   * results in a vector anchored at one of the two points (rather than at the
+   * origin) and, consequently, the result is returned as a Tensor@<1,dim@>
+   * rather than as a Point@<dim@>.
+   */
+  Tensor<1,dim,Number> operator - (const Point<dim,Number> &) const;
+
+  /**
+   * Subtract a difference vector (represented by a Tensor@<1,dim@>) from the
+   * current point. This results in another point and, as discussed in the
+   * documentation of this class, the result is then naturally returned as a
+   * Point@<dim@> object rather than as a Tensor@<1,dim@>.
+   */
+  Point<dim,Number> operator - (const Tensor<1,dim,Number> &) const;
+
+  /**
+   * The opposite vector.
+   */
+  Point<dim,Number> operator - () const;
+
+  /**
+   * @}
+   */
+
+  /*
+   * @name Multiplication and scaling of points. Dot products. Norms.
+   * @{
+   */
+
+  /**
+   * Multiply the current point by a factor.
+   *
+   * @relates EnableIfScalar
+   */
+  template <typename OtherNumber>
+  Point<dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+  operator * (const OtherNumber) const;
+
+  /**
+   * Divide the current point by a factor.
+   */
+  template <typename OtherNumber>
+  Point<dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+  operator / (const OtherNumber) const;
+
+  /**
+   * Return the scalar product of the vectors representing two points.
+   */
+  Number operator * (const Tensor<1,dim,Number> &p) const;
+
+  /**
+   * Return the scalar product of this point vector with itself, i.e. the
+   * square, or the square of the norm. In case of a complex number type it is
+   * equivalent to the contraction of this point vector with a complex
+   * conjugate of itself.
+   *
+   * @note This function is equivalent to
+   * Tensor<rank,dim,Number>::norm_square() which returns the square of the
+   * Frobenius norm.
+   */
+  typename numbers::NumberTraits<Number>::real_type square () const;
+
+  /**
+   * Return the Euclidean distance of <tt>this</tt> point to the point
+   * <tt>p</tt>, i.e. the <tt>l_2</tt> norm of the difference between the
+   * vectors representing the two points.
+   */
+  typename numbers::NumberTraits<Number>::real_type distance (const Point<dim,Number> &p) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization
+   */
+  template <class Archive>
+  void serialize(Archive &ar, const unsigned int version);
+};
+
+/*------------------------------- Inline functions: Point ---------------------------*/
+
+#ifndef DOXYGEN
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>::Point ()
+{}
+
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>::Point (const Tensor<1,dim,Number> &t)
+  :
+  Tensor<1,dim,Number>(t)
+{}
+
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>::Point (const Number x)
+{
+  switch (dim)
+    {
+    case 1:
+      this->values[0] = x;
+    default:
+      Assert (dim==1, StandardExceptions::ExcInvalidConstructorCall());
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>::Point (const Number x, const Number y)
+{
+  switch (dim)
+    {
+    case 2:
+      this->values[0] = x;
+      this->values[1] = y;
+    default:
+      Assert (dim==2, StandardExceptions::ExcInvalidConstructorCall());
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>::Point (const Number x, const Number y, const Number z)
+{
+  switch (dim)
+    {
+    case 3:
+      this->values[0] = x;
+      this->values[1] = y;
+      this->values[2] = z;
+    default:
+      Assert (dim==3, StandardExceptions::ExcInvalidConstructorCall());
+    }
+}
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>
+Point<dim,Number>::unit_vector(unsigned int i)
+{
+  Point<dim,Number> p;
+  p[i] = 1.;
+  return p;
+}
+
+
+template <int dim, typename Number>
+inline
+Number
+Point<dim,Number>::operator () (const unsigned int index) const
+{
+  AssertIndexRange((int) index, dim);
+  return this->values[index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+Number &
+Point<dim,Number>::operator () (const unsigned int index)
+{
+  AssertIndexRange((int) index, dim);
+  return this->values[index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>
+Point<dim,Number>::operator + (const Tensor<1,dim,Number> &p) const
+{
+  Point<dim,Number> tmp = *this;
+  tmp += p;
+  return tmp;
+}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<1,dim,Number>
+Point<dim,Number>::operator - (const Point<dim,Number> &p) const
+{
+  return (Tensor<1,dim,Number>(*this) -= p);
+}
+
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>
+Point<dim,Number>::operator - (const Tensor<1,dim,Number> &p) const
+{
+  Point<dim,Number> tmp = *this;
+  tmp -= p;
+  return tmp;
+}
+
+
+
+template <int dim, typename Number>
+inline
+Point<dim,Number>
+Point<dim,Number>::operator - () const
+{
+  Point<dim,Number> result;
+  for (unsigned int i=0; i<dim; ++i)
+    result.values[i] = -this->values[i];
+  return result;
+}
+
+
+
+template <int dim, typename Number>
+template<typename OtherNumber>
+inline
+Point<dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+Point<dim,Number>::operator * (const OtherNumber factor) const
+{
+  Point<dim,typename ProductType<Number, OtherNumber>::type> tmp;
+  for (unsigned int i=0; i<dim; ++i)
+    tmp[i] = this->operator[](i) * factor;
+  return tmp;
+}
+
+
+
+template <int dim, typename Number>
+template<typename OtherNumber>
+inline
+Point<dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+Point<dim,Number>::operator / (const OtherNumber factor) const
+{
+  Point<dim,typename ProductType<Number, OtherNumber>::type> tmp;
+  for (unsigned int i=0; i<dim; ++i)
+    tmp[i] = this->operator[](i) / factor;
+  return tmp;
+}
+
+
+
+template <int dim, typename Number>
+inline
+Number
+Point<dim,Number>::operator * (const Tensor<1,dim,Number> &p) const
+{
+  Number res = Number();
+  for (unsigned int i=0; i<dim; ++i)
+    res += this->operator[](i) * p[i];
+  return res;
+}
+
+
+template <int dim, typename Number>
+inline
+typename numbers::NumberTraits<Number>::real_type
+Point<dim,Number>::square () const
+{
+  return this->norm_square();
+}
+
+
+
+template <int dim, typename Number>
+inline
+typename numbers::NumberTraits<Number>::real_type
+Point<dim,Number>::distance (const Point<dim,Number> &p) const
+{
+  Number sum = Number();
+  for (unsigned int i=0; i<dim; ++i)
+    {
+      const Number diff=this->values[i]-p(i);
+      sum += numbers::NumberTraits<Number>::abs_square (diff);
+    }
+
+  return std::sqrt(sum);
+}
+
+
+
+template <int dim, typename Number>
+template <class Archive>
+inline
+void
+Point<dim,Number>::serialize(Archive &ar, const unsigned int)
+{
+  // forward to serialization
+  // function in the base class
+  ar   &static_cast<Tensor<1,dim,Number> &>(*this);
+}
+
+#endif // DOXYGEN
+
+
+/*------------------------------- Global functions: Point ---------------------------*/
+
+
+/**
+ * Global operator scaling a point vector by a scalar.
+ *
+ * @relates Point
+ * @relates EnableIfScalar
+ */
+template <int dim, typename Number, typename OtherNumber>
+inline
+Point<dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+operator * (const OtherNumber        factor,
+            const Point<dim,Number> &p)
+{
+  return p * factor;
+}
+
+
+
+/**
+ * Output operator for points. Print the elements consecutively, with a space
+ * in between.
+ * @relates Point
+ */
+template <int dim, typename Number>
+inline
+std::ostream &operator << (std::ostream            &out,
+                           const Point<dim,Number> &p)
+{
+  for (unsigned int i=0; i<dim-1; ++i)
+    out << p[i] << ' ';
+  out << p[dim-1];
+
+  return out;
+}
+
+
+
+/**
+ * Output operator for points. Print the elements consecutively, with a space
+ * in between.
+ * @relates Point
+ */
+template <int dim, typename Number>
+inline
+std::istream &operator >> (std::istream      &in,
+                           Point<dim,Number> &p)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    in >> p[i];
+
+  return in;
+}
+
+
+#ifndef DOXYGEN
+
+/**
+ * Output operator for points of dimension 1. This is implemented specialized
+ * from the general template in order to avoid a compiler warning that the
+ * loop is empty.
+ */
+template <typename Number>
+inline
+std::ostream &operator << (std::ostream &out,
+                           const Point<1,Number> &p)
+{
+  out << p[0];
+
+  return out;
+}
+
+#endif // DOXYGEN
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomial.h b/include/deal.II/base/polynomial.h
new file mode 100644
index 0000000..0260768
--- /dev/null
+++ b/include/deal.II/base/polynomial.h
@@ -0,0 +1,677 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__polynomial_h
+#define dealii__polynomial_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * @addtogroup Polynomials
+ * @{
+ */
+
+/**
+ * A namespace in which classes relating to the description of 1d polynomial
+ * spaces are declared.
+ */
+namespace Polynomials
+{
+
+  /**
+   * Base class for all 1D polynomials. A polynomial is represented in this
+   * class by its coefficients, which are set through the constructor or by
+   * derived classes. Evaluation of a polynomial happens through the Horner
+   * scheme which provides both numerical stability and a minimal number of
+   * numerical operations.
+   *
+   * @author Ralf Hartmann, Guido Kanschat, 2000, 2006
+   */
+  template <typename number>
+  class Polynomial : public Subscriptor
+  {
+  public:
+    /**
+     * Constructor. The coefficients of the polynomial are passed as
+     * arguments, and denote the polynomial $\sum_i a[i] x^i$, i.e. the first
+     * element of the array denotes the constant term, the second the linear
+     * one, and so on. The degree of the polynomial represented by this object
+     * is thus the number of elements in the <tt>coefficient</tt> array minus
+     * one.
+     */
+    Polynomial (const std::vector<number> &coefficients);
+
+    /**
+     * Constructor creating a zero polynomial of degree @p n.
+     */
+    Polynomial (const unsigned int n);
+
+    /**
+     * Constructor for Lagrange polynomial and its point of evaluation. The
+     * idea is to construct $\prod_{i\neq j} \frac{x-x_i}{x_j-x_i}$, where j
+     * is the evaluation point specified as argument and the support points
+     * contain all points (including x_j, which will internally not be
+     * stored).
+     */
+    Polynomial (const std::vector<Point<1> > &lagrange_support_points,
+                const unsigned int            evaluation_point);
+
+    /**
+     * Default constructor creating an illegal object.
+     */
+    Polynomial ();
+
+    /**
+     * Return the value of this polynomial at the given point.
+     *
+     * This function uses the Horner scheme for numerical stability of the
+     * evaluation.
+     */
+    number value (const number x) const;
+
+    /**
+     * Return the values and the derivatives of the Polynomial at point
+     * <tt>x</tt>.  <tt>values[i], i=0,...,values.size()-1</tt> includes the
+     * <tt>i</tt>th derivative. The number of derivatives to be computed is
+     * thus determined by the size of the array passed.
+     *
+     * This function uses the Horner scheme for numerical stability of the
+     * evaluation.
+     */
+    void value (const number         x,
+                std::vector<number> &values) const;
+
+    /**
+     * Degree of the polynomial. This is the degree reflected by the number of
+     * coefficients provided by the constructor. Leading non-zero coefficients
+     * are not treated separately.
+     */
+    unsigned int degree () const;
+
+    /**
+     * Scale the abscissa of the polynomial.  Given the polynomial <i>p(t)</i>
+     * and the scaling <i>t = ax</i>, then the result of this operation is the
+     * polynomial <i>q</i>, such that <i>q(x) = p(t)</i>.
+     *
+     * The operation is performed in place.
+     */
+    void scale (const number factor);
+
+    /**
+     * Shift the abscissa oft the polynomial.  Given the polynomial
+     * <i>p(t)</i> and the shift <i>t = x + a</i>, then the result of this
+     * operation is the polynomial <i>q</i>, such that <i>q(x) = p(t)</i>.
+     *
+     * The template parameter allows to compute the new coefficients with
+     * higher accuracy, since all computations are performed with type
+     * <tt>number2</tt>. This may be necessary, since this operation involves
+     * a big number of additions. On a Sun Sparc Ultra with Solaris 2.8, the
+     * difference between <tt>double</tt> and <tt>long double</tt> was not
+     * significant, though.
+     *
+     * The operation is performed in place, i.e. the coefficients of the
+     * present object are changed.
+     */
+    template <typename number2>
+    void shift (const number2 offset);
+
+    /**
+     * Compute the derivative of a polynomial.
+     */
+    Polynomial<number> derivative () const;
+
+    /**
+     * Compute the primitive of a polynomial. the coefficient of the zero
+     * order term of the polynomial is zero.
+     */
+    Polynomial<number> primitive () const;
+
+    /**
+     * Multiply with a scalar.
+     */
+    Polynomial<number> &operator *= (const double s);
+
+    /**
+     * Multiply with another polynomial.
+     */
+    Polynomial<number> &operator *= (const Polynomial<number> &p);
+
+    /**
+     * Add a second polynomial.
+     */
+    Polynomial<number> &operator += (const Polynomial<number> &p);
+
+    /**
+     * Subtract a second polynomial.
+     */
+    Polynomial<number> &operator -= (const Polynomial<number> &p);
+
+    /**
+     * Test for equality of two polynomials.
+     */
+    bool operator == (const Polynomial<number> &p)  const;
+
+    /**
+     * Print coefficients.
+     */
+    void print(std::ostream &out) const;
+
+    /**
+     * Write or read the data of this object to or from a stream for the
+     * purpose of serialization.
+     */
+    template <class Archive>
+    void serialize (Archive &ar, const unsigned int version);
+
+  protected:
+
+    /**
+     * This function performs the actual scaling.
+     */
+    static void scale (std::vector<number> &coefficients,
+                       const number         factor);
+
+    /**
+     * This function performs the actual shift
+     */
+    template <typename number2>
+    static void shift (std::vector<number> &coefficients,
+                       const number2        shift);
+
+    /**
+     * Multiply polynomial by a factor.
+     */
+    static void multiply (std::vector<number> &coefficients,
+                          const number factor);
+
+    /**
+     * Transforms polynomial form of product of linear factors into standard
+     * form, $\sum_i a_i x^i$. Deletes all data structures related to the
+     * product form.
+     */
+    void transform_into_standard_form ();
+
+    /**
+     * Coefficients of the polynomial $\sum_i a_i x^i$. This vector is filled
+     * by the constructor of this class and may be passed down by derived
+     * classes.
+     *
+     * This vector cannot be constant since we want to allow copying of
+     * polynomials.
+     */
+    std::vector<number> coefficients;
+
+    /**
+     * Stores whether the polynomial is in Lagrange product form, i.e.,
+     * constructed as a product $(x-x_0) (x-x_1) \ldots (x-x_n)/c$, or not.
+     */
+    bool in_lagrange_product_form;
+
+    /**
+     * If the polynomial is in Lagrange product form, i.e., constructed as a
+     * product $(x-x_0) (x-x_1) \ldots (x-x_n)/c$, store the shifts $x_i$.
+     */
+    std::vector<number> lagrange_support_points;
+
+    /**
+     * If the polynomial is in Lagrange product form, i.e., constructed as a
+     * product $(x-x_0) (x-x_1) \ldots (x-x_n)/c$, store the weight c.
+     */
+    number lagrange_weight;
+  };
+
+
+  /**
+   * Class generates Polynomial objects representing a monomial of degree n,
+   * that is, the function $x^n$.
+   *
+   * @author Guido Kanschat, 2004
+   */
+  template <typename number>
+  class Monomial : public Polynomial<number>
+  {
+  public:
+    /**
+     * Constructor, taking the degree of the monomial and an optional
+     * coefficient as arguments.
+     */
+    Monomial(const unsigned int n,
+             const double coefficient = 1.);
+
+    /**
+     * Return a vector of Monomial objects of degree zero through
+     * <tt>degree</tt>, which then spans the full space of polynomials up to
+     * the given degree. This function may be used to initialize the
+     * TensorProductPolynomials and PolynomialSpace classes.
+     */
+    static
+    std::vector<Polynomial<number> >
+    generate_complete_basis (const unsigned int degree);
+
+  private:
+    /**
+     * Needed by constructor.
+     */
+    static std::vector<number> make_vector(unsigned int n,
+                                           const double coefficient);
+  };
+
+
+  /**
+   * Lagrange polynomials with equidistant interpolation points in [0,1]. The
+   * polynomial of degree <tt>n</tt> has got <tt>n+1</tt> interpolation
+   * points. The interpolation points are sorted in ascending order. This
+   * order gives an index to each interpolation point.  A Lagrangian
+   * polynomial equals to 1 at its `support point', and 0 at all other
+   * interpolation points. For example, if the degree is 3, and the support
+   * point is 1, then the polynomial represented by this object is cubic and
+   * its value is 1 at the point <tt>x=1/3</tt>, and zero at the point
+   * <tt>x=0</tt>, <tt>x=2/3</tt>, and <tt>x=1</tt>. All the polynomials have
+   * polynomial degree equal to <tt>degree</tt>, but together they span the
+   * entire space of polynomials of degree less than or equal <tt>degree</tt>.
+   *
+   * The Lagrange polynomials are implemented up to degree 10.
+   *
+   * @author Ralf Hartmann, 2000
+   */
+  class LagrangeEquidistant: public Polynomial<double>
+  {
+  public:
+    /**
+     * Constructor. Takes the degree <tt>n</tt> of the Lagrangian polynomial
+     * and the index <tt>support_point</tt> of the support point. Fills the
+     * <tt>coefficients</tt> of the base class Polynomial.
+     */
+    LagrangeEquidistant (const unsigned int n,
+                         const unsigned int support_point);
+
+    /**
+     * Return a vector of polynomial objects of degree <tt>degree</tt>, which
+     * then spans the full space of polynomials up to the given degree. The
+     * polynomials are generated by calling the constructor of this class with
+     * the same degree but support point running from zero to <tt>degree</tt>.
+     * This function may be used to initialize the TensorProductPolynomials
+     * and PolynomialSpace classes.
+     */
+    static
+    std::vector<Polynomial<double> >
+    generate_complete_basis (const unsigned int degree);
+
+  private:
+
+    /**
+     * Computes the <tt>coefficients</tt> of the base class Polynomial. This
+     * function is <tt>static</tt> to allow to be called in the constructor.
+     */
+    static
+    void
+    compute_coefficients (const unsigned int n,
+                          const unsigned int support_point,
+                          std::vector<double> &a);
+  };
+
+
+
+  /**
+   * Given a set of points along the real axis, this function returns all
+   * Lagrange polynomials for interpolation of these points. The number of
+   * polynomials is equal to the number of points and the maximum degree is
+   * one less.
+   */
+  std::vector<Polynomial<double> >
+  generate_complete_Lagrange_basis (const std::vector<Point<1> > &points);
+
+
+
+  /**
+   * Legendre polynomials of arbitrary degree. Constructing a Legendre
+   * polynomial of degree <tt>p</tt>, the coefficients will be computed by the
+   * three-term recursion formula.
+   *
+   * @note The polynomials defined by this class differ in two aspects by what
+   * is usually referred to as Legendre polynomials: (i) This classes defines
+   * them on the reference interval $[0,1]$, rather than the commonly used
+   * interval $[-1,1]$. (ii) The polynomials have been scaled in such a way
+   * that they are orthonormal, not just orthogonal; consequently, the
+   * polynomials do not necessarily have boundary values equal to one.
+   *
+   * @author Guido Kanschat, 2000
+   */
+  class Legendre : public Polynomial<double>
+  {
+  public:
+    /**
+     * Constructor for polynomial of degree <tt>p</tt>.
+     */
+    Legendre (const unsigned int p);
+
+    /**
+     * Return a vector of Legendre polynomial objects of degrees zero through
+     * <tt>degree</tt>, which then spans the full space of polynomials up to
+     * the given degree. This function may be used to initialize the
+     * TensorProductPolynomials and PolynomialSpace classes.
+     */
+    static
+    std::vector<Polynomial<double> >
+    generate_complete_basis (const unsigned int degree);
+
+  private:
+    /**
+     * Coefficients for the interval $[0,1]$.
+     */
+    static std::vector<std_cxx11::shared_ptr<const std::vector<double> > > shifted_coefficients;
+
+    /**
+     * Vector with already computed coefficients. For each degree of the
+     * polynomial, we keep one pointer to the list of coefficients; we do so
+     * rather than keeping a vector of vectors in order to simplify
+     * programming multithread-safe. In order to avoid memory leak, we use a
+     * shared_ptr in order to correctly free the memory of the vectors when
+     * the global destructor is called.
+     */
+    static std::vector<std_cxx11::shared_ptr<const std::vector<double> > > recursive_coefficients;
+
+    /**
+     * Compute coefficients recursively. The coefficients are stored in a
+     * static data vector to be available when needed next time. Since the
+     * recursion is performed for the interval $[-1,1]$, the polynomials are
+     * shifted to $[0,1]$ by the <tt>scale</tt> and <tt>shift</tt> functions
+     * of <tt>Polynomial</tt>, afterwards.
+     */
+    static void compute_coefficients (const unsigned int p);
+
+    /**
+     * Get coefficients for constructor.  This way, it can use the non-
+     * standard constructor of Polynomial.
+     */
+    static const std::vector<double> &
+    get_coefficients (const unsigned int k);
+  };
+
+  /**
+   * Lobatto polynomials of arbitrary degree on <tt>[0,1]</tt>.
+   *
+   * These polynomials are the integrated Legendre polynomials on [0,1]. The
+   * first two polynomials are the standard linear shape functions given by
+   * $l_0(x) = 1-x$ and $l_1(x) = x$. For $i\geq2$ we use the definition
+   * $l_i(x) = \frac{1}{\Vert L_{i-1}\Vert_2}\int_0^x L_{i-1}(t)\,dt$, where
+   * $L_i$ denotes the $i$-th Legendre polynomial on $[0,1]$. The Lobatto
+   * polynomials $l_0,\ldots,l_k$ form a complete basis of the polynomials
+   * space of degree $k$.
+   *
+   * Calling the constructor with a given index <tt>k</tt> will generate the
+   * polynomial with index <tt>k</tt>. But only for $k\geq 1$ the index equals
+   * the degree of the polynomial. For <tt>k==0</tt> also a polynomial of
+   * degree 1 is generated.
+   *
+   * These polynomials are used for the construction of the shape functions of
+   * Nédélec elements of arbitrary order.
+   *
+   * @author Markus Bürg, 2009
+   */
+  class Lobatto : public Polynomial<double>
+  {
+  public:
+    /**
+     * Constructor for polynomial of degree <tt>p</tt>. There is an exception
+     * for <tt>p==0</tt>, see the general documentation.
+     */
+    Lobatto (const unsigned int p = 0);
+
+    /**
+     * Return the polynomials with index <tt>0</tt> up to <tt>degree</tt>.
+     * There is an exception for <tt>p==0</tt>, see the general documentation.
+     */
+    static std::vector<Polynomial<double> >
+    generate_complete_basis (const unsigned int p);
+
+  private:
+    /**
+     * Compute coefficients recursively.
+     */
+    std::vector<double> compute_coefficients (const unsigned int p);
+  };
+
+
+
+  /**
+   * Hierarchical polynomials of arbitrary degree on <tt>[0,1]</tt>.
+   *
+   * When Constructing a Hierarchical polynomial of degree <tt>p</tt>, the
+   * coefficients will be computed by a recursion formula.  The coefficients
+   * are stored in a static data vector to be available when needed next time.
+   *
+   * These hierarchical polynomials are based on those of Demkowicz, Oden,
+   * Rachowicz, and Hardy (CMAME 77 (1989) 79-112, Sec. 4). The first two
+   * polynomials are the standard linear shape functions given by $\phi_{0}(x)
+   * = 1 - x$ and $\phi_{1}(x) = x$. For $l \geq 2$ we use the definitions
+   * $\phi_{l}(x) = (2x-1)^l - 1, l = 2,4,6,...$ and $\phi_{l}(x) = (2x-1)^l -
+   * (2x-1), l = 3,5,7,...$. These satisfy the recursion relations
+   * $\phi_{l}(x) = (2x-1)\phi_{l-1}, l=3,5,7,...$ and $\phi_{l}(x) =
+   * (2x-1)\phi_{l-1} + \phi_{2}, l=4,6,8,...$.
+   *
+   * The degrees of freedom are the values at the vertices and the derivatives
+   * at the midpoint. Currently, we do not scale the polynomials in any way,
+   * although better conditioning of the element stiffness matrix could
+   * possibly be achieved with scaling.
+   *
+   * Calling the constructor with a given index <tt>p</tt> will generate the
+   * following: if <tt>p==0</tt>, then the resulting polynomial is the linear
+   * function associated with the left vertex, if <tt>p==1</tt> the one
+   * associated with the right vertex. For higher values of <tt>p</tt>, you
+   * get the polynomial of degree <tt>p</tt> that is orthogonal to all
+   * previous ones. Note that for <tt>p==0</tt> you therefore do <b>not</b>
+   * get a polynomial of degree zero, but one of degree one. This is to allow
+   * generating a complete basis for polynomial spaces, by just iterating over
+   * the indices given to the constructor.
+   *
+   * On the other hand, the function generate_complete_basis() creates a
+   * complete basis of given degree. In order to be consistent with the
+   * concept of a polynomial degree, if the given argument is zero, it does
+   * <b>not</b> return the linear polynomial described above, but rather a
+   * constant polynomial.
+   *
+   * @author Brian Carnes, 2002
+   */
+  class Hierarchical : public Polynomial<double>
+  {
+  public:
+    /**
+     * Constructor for polynomial of degree <tt>p</tt>. There is an exception
+     * for <tt>p==0</tt>, see the general documentation.
+     */
+    Hierarchical (const unsigned int p);
+
+    /**
+     * Return a vector of Hierarchical polynomial objects of degrees zero
+     * through <tt>degree</tt>, which then spans the full space of polynomials
+     * up to the given degree. Note that there is an exception if the given
+     * <tt>degree</tt> equals zero, see the general documentation of this
+     * class.
+     *
+     * This function may be used to initialize the TensorProductPolynomials,
+     * AnisotropicPolynomials, and PolynomialSpace classes.
+     */
+    static
+    std::vector<Polynomial<double> >
+    generate_complete_basis (const unsigned int degree);
+
+  private:
+    /**
+     * Compute coefficients recursively.
+     */
+    static void compute_coefficients (const unsigned int p);
+
+    /**
+     * Get coefficients for constructor.  This way, it can use the non-
+     * standard constructor of Polynomial.
+     */
+    static const std::vector<double> &
+    get_coefficients (const unsigned int p);
+
+    /**
+     * Vector with already computed coefficients. For each degree of the
+     * polynomial, we keep one pointer to the list of coefficients; we do so
+     * rather than keeping a vector of vectors in order to simplify
+     * programming multithread-safe. In order to avoid memory leak, we use a
+     * shared_ptr in order to correctly free the memory of the vectors when
+     * the global destructor is called.
+     */
+    static std::vector<std_cxx11::shared_ptr<const std::vector<double> > > recursive_coefficients;
+  };
+
+
+  /**
+   * Polynomials for Hermite interpolation condition.
+   *
+   * This is the set of polynomials of degree at least three, such that the
+   * following interpolation conditions are met: the polynomials and their
+   * first derivatives vanish at the values <i>x</i>=0 and <i>x</i>=1, with
+   * the exceptions <i>p</i><sub>0</sub>(0)=1,
+   * <i>p</i><sub><i>1</i></sub>(1)=1, <i>p</i>'<sub>2</sub>(0)=1,
+   * <i>p'</i><sub>3</sub>(1)=1.
+   *
+   * For degree three, we obtain the standard four Hermitian interpolation
+   * polynomials, see for instance <a
+   * href="http://en.wikipedia.org/wiki/Cubic_Hermite_spline">Wikipedia</a>.
+   * For higher degrees, these are augmented first, by the polynomial of
+   * degree four with vanishing values and derivatives at <i>x</i>=0 and
+   * <i>x</i>=1, then by the product of this fourth order polynomial with
+   * Legendre polynomials of increasing order. The implementation is
+   * @f{align*}{
+   * p_0(x) &= 2x^3-3x^2+1 \\
+   * p_1(x) &= -2x^2+3x^2 \\
+   * p_2(x) &= x^3-2x^2+x  \\
+   * p_3(x) &= x^3-x^2 \\
+   * p_4(x) &= 16x^2(x-1)^2 \\
+   * \ldots & \ldots \\
+   * p_k(x) &= x^2(x-1)^2 L_{k-4}(x)
+   * @f}
+   *
+   * @author Guido Kanschat
+   * @date 2012
+   */
+  class HermiteInterpolation : public Polynomial<double>
+  {
+  public:
+    /**
+     * Constructor for polynomial with index <tt>p</tt>. See the class
+     * documentation on the definition of the sequence of polynomials.
+     */
+    HermiteInterpolation (const unsigned int p);
+
+    /**
+     * Return the polynomials with index <tt>0</tt> up to <tt>p+1</tt> in a
+     * space of degree up to <tt>p</tt>. Here, <tt>p</tt> has to be at least
+     * 3.
+     */
+    static std::vector<Polynomial<double> >
+    generate_complete_basis (const unsigned int p);
+  };
+}
+
+
+/** @} */
+
+/* -------------------------- inline functions --------------------- */
+
+namespace Polynomials
+{
+  template <typename number>
+  inline
+  Polynomial<number>::Polynomial ()
+    :
+    in_lagrange_product_form (false),
+    lagrange_weight          (1.)
+  {}
+
+
+
+  template <typename number>
+  inline
+  unsigned int
+  Polynomial<number>::degree () const
+  {
+    if (in_lagrange_product_form == true)
+      {
+        return lagrange_support_points.size();
+      }
+    else
+      {
+        Assert (coefficients.size()>0, ExcEmptyObject());
+        return coefficients.size() - 1;
+      }
+  }
+
+
+
+  template <typename number>
+  inline
+  number
+  Polynomial<number>::value (const number x) const
+  {
+    if (in_lagrange_product_form == false)
+      {
+        Assert (coefficients.size() > 0, ExcEmptyObject());
+
+        // Horner scheme
+        const unsigned int m=coefficients.size();
+        number value = coefficients.back();
+        for (int k=m-2; k>=0; --k)
+          value = value*x + coefficients[k];
+        return value;
+      }
+    else
+      {
+        // direct evaluation of Lagrange polynomial
+        const unsigned int m = lagrange_support_points.size();
+        number value = 1.;
+        for (unsigned int j=0; j<m; ++j)
+          value *= x-lagrange_support_points[j];
+        value *= lagrange_weight;
+        return value;
+      }
+  }
+
+
+
+  template <typename number>
+  template <class Archive>
+  inline
+  void
+  Polynomial<number>::serialize (Archive &ar, const unsigned int)
+  {
+    // forward to serialization function in the base class.
+    ar &static_cast<Subscriptor &>(*this);
+    ar &coefficients;
+    ar &in_lagrange_product_form;
+    ar &lagrange_support_points;
+    ar &lagrange_weight;
+  }
+
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomial_space.h b/include/deal.II/base/polynomial_space.h
new file mode 100644
index 0000000..728a85f
--- /dev/null
+++ b/include/deal.II/base/polynomial_space.h
@@ -0,0 +1,433 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__polynomial_space_h
+#define dealii__polynomial_space_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/smartpointer.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Representation of the space of polynomials of degree at most n in higher
+ * dimensions.
+ *
+ * Given a vector of <i>n</i> one-dimensional polynomials <i>P<sub>0</sub></i>
+ * to <i>P<sub>n</sub></i>, where <i>P<sub>i</sub></i> has degree <i>i</i>,
+ * this class generates all dim-dimensional polynomials of the form <i>
+ * P<sub>ijk</sub>(x,y,z) =
+ * P<sub>i</sub>(x)P<sub>j</sub>(y)P<sub>k</sub>(z)</i>, where the sum of
+ * <i>i</i>, <i>j</i> and <i>k</i> is less than or equal <i>n</i>.
+ *
+ * The output_indices() function prints the ordering of the polynomials, i.e.
+ * for each dim-dimensional polynomial in the polynomial space it gives the
+ * indices i,j,k of the one-dimensional polynomials in x,y and z direction.
+ * The ordering of the dim-dimensional polynomials can be changed by using the
+ * set_numbering() function.
+ *
+ * The standard ordering of polynomials is that indices for the first space
+ * dimension vary fastest and the last space dimension is slowest. In
+ * particular, if we take for simplicity the vector of monomials
+ * <i>x<sup>0</sup>, x<sup>1</sup>, x<sup>2</sup>,..., x<sup>n</sup></i>, we
+ * get
+ *
+ * <dl> <dt> 1D <dd> <i> x<sup>0</sup>, x<sup>1</sup>,...,x<sup>n</sup></i>
+ * <dt> 2D: <dd> <i> x<sup>0</sup>y<sup>0</sup>,
+ * x<sup>1</sup>y<sup>0</sup>,..., x<sup>n</sup>y<sup>0</sup>,
+ * <br>
+ * x<sup>0</sup>y<sup>1</sup>, x<sup>1</sup>y<sup>1</sup>,...,
+ * x<sup>n-1</sup>y<sup>1</sup>,
+ * <br>
+ * x<sup>0</sup>y<sup>2</sup>,... x<sup>n-2</sup>y<sup>2</sup>,
+ * <br>
+ * ...
+ * <br>
+ * x<sup>0</sup>y<sup>n-1</sup>, x<sup>1</sup>y<sup>n-1</sup>,
+ * <br>
+ * x<sup>0</sup>y<sup>n</sup> </i> <dt> 3D: <dd> <i>
+ * x<sup>0</sup>y<sup>0</sup>z<sup>0</sup>,...,
+ * x<sup>n</sup>y<sup>0</sup>z<sup>0</sup>,
+ * <br>
+ * x<sup>0</sup>y<sup>1</sup>z<sup>0</sup>,...,
+ * x<sup>n-1</sup>y<sup>1</sup>z<sup>0</sup>,
+ * <br>
+ * ...
+ * <br>
+ * x<sup>0</sup>y<sup>n</sup>z<sup>0</sup>,
+ * <br>
+ * x<sup>0</sup>y<sup>0</sup>z<sup>1</sup>,...
+ * x<sup>n-1</sup>y<sup>0</sup>z<sup>1</sup>,
+ * <br>
+ * ...
+ * <br>
+ * x<sup>0</sup>y<sup>n-1</sup>z<sup>1</sup>,
+ * <br>
+ * x<sup>0</sup>y<sup>0</sup>z<sup>2</sup>,...
+ * x<sup>n-2</sup>y<sup>0</sup>z<sup>2</sup>,
+ * <br>
+ * ...
+ * <br>
+ * x<sup>0</sup>y<sup>0</sup>z<sup>n</sup> </i> </dl>
+ *
+ * @ingroup Polynomials
+ * @author Guido Kanschat, Wolfgang Bangerth, Ralf Hartmann 2002, 2003, 2004,
+ * 2005
+ */
+template <int dim>
+class PolynomialSpace
+{
+public:
+  /**
+   * Access to the dimension of this object, for checking and automatic
+   * setting of dimension in other classes.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Constructor. <tt>pols</tt> is a vector of pointers to one-dimensional
+   * polynomials and will be copied into a private member variable. The static
+   * type of the template argument <tt>pols</tt> needs to be convertible to
+   * Polynomials::Polynomial@<double@>, i.e. should usually be a derived class
+   * of Polynomials::Polynomial@<double@>.
+   */
+  template <class Pol>
+  PolynomialSpace (const std::vector<Pol> &pols);
+
+  /**
+   * Prints the list of the indices to <tt>out</tt>.
+   */
+  template <class StreamType>
+  void output_indices(StreamType &out) const;
+
+  /**
+   * Sets the ordering of the polynomials. Requires
+   * <tt>renumber.size()==n()</tt>. Stores a copy of <tt>renumber</tt>.
+   */
+  void set_numbering(const std::vector<unsigned int> &renumber);
+
+  /**
+   * Computes the value and the first and second derivatives of each
+   * polynomial at <tt>unit_point</tt>.
+   *
+   * The size of the vectors must either be equal 0 or equal n(). In the first
+   * case, the function will not compute these values, i.e. you indicate what
+   * you want to have computed by resizing those vectors which you want
+   * filled.
+   *
+   * If you need values or derivatives of all polynomials then use this
+   * function, rather than using any of the compute_value(), compute_grad() or
+   * compute_grad_grad() functions, see below, in a loop over all polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<double>         &values,
+                std::vector<Tensor<1,dim> > &grads,
+                std::vector<Tensor<2,dim> > &grad_grads,
+                std::vector<Tensor<3,dim> > &third_derivatives,
+                std::vector<Tensor<4,dim> > &fourth_derivatives) const;
+
+  /**
+   * Computes the value of the <tt>i</tt>th polynomial at unit point
+   * <tt>p</tt>.
+   *
+   * Consider using compute() instead.
+   */
+  double compute_value (const unsigned int i,
+                        const Point<dim> &p) const;
+
+  /**
+   * Computes the <tt>order</tt>th derivative of the <tt>i</tt>th polynomial
+   * at unit point <tt>p</tt>.
+   *
+   * Consider using compute() instead.
+   *
+   * @tparam order The order of the derivative.
+   */
+  template <int order>
+  Tensor<order,dim> compute_derivative (const unsigned int i,
+                                        const Point<dim> &p) const;
+
+  /**
+   * Computes the gradient of the <tt>i</tt>th polynomial at unit point
+   * <tt>p</tt>.
+   *
+   * Consider using compute() instead.
+   */
+  Tensor<1,dim> compute_grad (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Computes the second derivative (grad_grad) of the <tt>i</tt>th polynomial
+   * at unit point <tt>p</tt>.
+   *
+   * Consider using compute() instead.
+   */
+  Tensor<2,dim> compute_grad_grad (const unsigned int i,
+                                   const Point<dim> &p) const;
+
+  /**
+   * Return the number of polynomials spanning the space represented by this
+   * class. Here, if <tt>N</tt> is the number of one-dimensional polynomials
+   * given, then the result of this function is <i>N</i> in 1d,
+   * <i>N(N+1)/2</i> in 2d, and <i>N(N+1)(N+2)/6</i> in 3d.
+   */
+  unsigned int n () const;
+
+  /**
+   * Degree of the space. This is by definition the number of polynomials
+   * given to the constructor, NOT the maximal degree of a polynomial in this
+   * vector. The latter value is never checked and therefore left to the
+   * application.
+   */
+  unsigned int degree () const;
+
+  /**
+   * Static function used in the constructor to compute the number of
+   * polynomials.
+   *
+   * @warning The argument `n` is not the maximal degree, but the number of
+   * onedimensional polynomials, thus the degree plus one.
+   */
+  static unsigned int compute_n_pols (const unsigned int n);
+
+protected:
+
+  /**
+   * Compute numbers in x, y and z direction. Given an index <tt>n</tt> in the
+   * d-dimensional polynomial space, compute the indices i,j,k such that
+   * <i>p<sub>n</sub>(x,y,z) =
+   * p<sub>i</sub>(x)p<sub>j</sub>(y)p<sub>k</sub>(z)</i>.
+   */
+  void compute_index (const unsigned int n,
+                      unsigned int      (&index)[dim>0?dim:1]) const;
+
+private:
+  /**
+   * Copy of the vector <tt>pols</tt> of polynomials given to the constructor.
+   */
+  const std::vector<Polynomials::Polynomial<double> > polynomials;
+
+  /**
+   * Store the precomputed value which the <tt>n()</tt> function returns.
+   */
+  const unsigned int n_pols;
+
+  /**
+   * Index map for reordering the polynomials.
+   */
+  std::vector<unsigned int> index_map;
+
+  /**
+   * Index map for reordering the polynomials.
+   */
+  std::vector<unsigned int> index_map_inverse;
+};
+
+
+/* -------------- declaration of explicit specializations --- */
+
+template <>
+void PolynomialSpace<1>::compute_index(const unsigned int n,
+                                       unsigned int      (&index)[1]) const;
+template <>
+void PolynomialSpace<2>::compute_index(const unsigned int n,
+                                       unsigned int      (&index)[2]) const;
+template <>
+void PolynomialSpace<3>::compute_index(const unsigned int n,
+                                       unsigned int      (&index)[3]) const;
+
+
+
+/* -------------- inline and template functions ------------- */
+
+template <int dim>
+template <class Pol>
+PolynomialSpace<dim>::PolynomialSpace (const std::vector<Pol> &pols)
+  :
+  polynomials (pols.begin(), pols.end()),
+  n_pols (compute_n_pols(polynomials.size())),
+  index_map(n_pols),
+  index_map_inverse(n_pols)
+{
+  // per default set this index map
+  // to identity. This map can be
+  // changed by the user through the
+  // set_numbering function
+  for (unsigned int i=0; i<n_pols; ++i)
+    {
+      index_map[i]=i;
+      index_map_inverse[i]=i;
+    }
+}
+
+
+template<int dim>
+inline
+unsigned int
+PolynomialSpace<dim>::n() const
+{
+  return n_pols;
+}
+
+
+
+template<int dim>
+inline
+unsigned int
+PolynomialSpace<dim>::degree() const
+{
+  return polynomials.size();
+}
+
+
+template <int dim>
+template <class StreamType>
+void
+PolynomialSpace<dim>::output_indices(StreamType &out) const
+{
+  unsigned int ix[dim];
+  for (unsigned int i=0; i<n_pols; ++i)
+    {
+      compute_index(i,ix);
+      out << i << "\t";
+      for (unsigned int d=0; d<dim; ++d)
+        out << ix[d] << " ";
+      out << std::endl;
+    }
+}
+
+template <int dim>
+template <int order>
+Tensor<order,dim>
+PolynomialSpace<dim>::compute_derivative (const unsigned int i,
+                                          const Point<dim> &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  double v [dim][order+1];
+  {
+    std::vector<double> tmp (order+1);
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        polynomials[indices[d]].value (p(d), tmp);
+        for (unsigned int j=0; j<order+1; ++j)
+          v[d][j] = tmp[j];
+      }
+  }
+
+  Tensor<order,dim> derivative;
+  switch (order)
+    {
+    case 1:
+    {
+      Tensor<1,dim> &derivative_1 = *reinterpret_cast<Tensor<1,dim>*>(&derivative);
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          derivative_1[d] = 1.;
+          for (unsigned int x=0; x<dim; ++x)
+            {
+              unsigned int x_order=0;
+              if (d==x) ++x_order;
+
+              derivative_1[d] *= v[x][x_order];
+            }
+        }
+
+      return derivative;
+    }
+    case 2:
+    {
+      Tensor<2,dim> &derivative_2 = *reinterpret_cast<Tensor<2,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          {
+            derivative_2[d1][d2] = 1.;
+            for (unsigned int x=0; x<dim; ++x)
+              {
+                unsigned int x_order=0;
+                if (d1==x) ++x_order;
+                if (d2==x) ++x_order;
+
+                derivative_2[d1][d2] *= v[x][x_order];
+              }
+          }
+
+      return derivative;
+    }
+    case 3:
+    {
+      Tensor<3,dim> &derivative_3 = *reinterpret_cast<Tensor<3,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          for (unsigned int d3=0; d3<dim; ++d3)
+            {
+              derivative_3[d1][d2][d3] = 1.;
+              for (unsigned int x=0; x<dim; ++x)
+                {
+                  unsigned int x_order=0;
+                  if (d1==x) ++x_order;
+                  if (d2==x) ++x_order;
+                  if (d3==x) ++x_order;
+
+                  derivative_3[d1][d2][d3] *= v[x][x_order];
+                }
+            }
+
+      return derivative;
+    }
+    case 4:
+    {
+      Tensor<4,dim> &derivative_4 = *reinterpret_cast<Tensor<4,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          for (unsigned int d3=0; d3<dim; ++d3)
+            for (unsigned int d4=0; d4<dim; ++d4)
+              {
+                derivative_4[d1][d2][d3][d4] = 1.;
+                for (unsigned int x=0; x<dim; ++x)
+                  {
+                    unsigned int x_order=0;
+                    if (d1==x) ++x_order;
+                    if (d2==x) ++x_order;
+                    if (d3==x) ++x_order;
+                    if (d4==x) ++x_order;
+
+                    derivative_4[d1][d2][d3][d4] *= v[x][x_order];
+                  }
+              }
+
+      return derivative;
+    }
+    default:
+    {
+      Assert (false, ExcNotImplemented());
+      return derivative;
+    }
+    }
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_abf.h b/include/deal.II/base/polynomials_abf.h
new file mode 100644
index 0000000..a6f9188
--- /dev/null
+++ b/include/deal.II/base/polynomials_abf.h
@@ -0,0 +1,188 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__polynomials_abf_h
+#define dealii__polynomials_abf_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/thread_management.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class implements the <i>H<sup>div</sup></i>-conforming, vector-valued
+ * Arnold-Boffi-Falk polynomials as described in the article by Arnold-Boffi-
+ * Falk: Quadrilateral H(div) finite elements, SIAM J. Numer. Anal. Vol.42,
+ * No.6, pp.2429-2451
+ *
+ *
+ * The ABF polynomials are constructed such that the divergence is in the
+ * tensor product polynomial space <i>Q<sub>k</sub></i>. Therefore, the
+ * polynomial order of each component must be two orders higher in the
+ * corresponding direction, yielding the polynomial spaces
+ * <i>(Q<sub>k+2,k</sub>, Q<sub>k,k+2</sub>)</i> and <i>(Q<sub>k+2,k,k</sub>,
+ * Q<sub>k,k+2,k</sub>, Q<sub>k,k,k+2</sub>)</i> in 2D and 3D, resp.
+ *
+ * @ingroup Polynomials
+ * @author Oliver Kayser-Herold, based on code from Guido Kanschat
+ * @date 2006
+ */
+template <int dim>
+class PolynomialsABF
+{
+public:
+  /**
+   * Constructor. Creates all basis functions for Raviart-Thomas polynomials
+   * of given degree.
+   *
+   * @arg k: the degree of the Raviart-Thomas-space, which is the degree of
+   * the largest tensor product polynomial space <i>Q<sub>k</sub></i>
+   * contained.
+   */
+  PolynomialsABF (const unsigned int k);
+
+  /**
+   * Destructor deleting the polynomials.
+   */
+  ~PolynomialsABF ();
+
+  /**
+   * Computes the value and the first and second derivatives of each Raviart-
+   * Thomas polynomial at @p unit_point.
+   *
+   * The size of the vectors must either be zero or equal <tt>n()</tt>.  In
+   * the first case, the function will not compute these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the <tt>compute_value</tt>,
+   * <tt>compute_grad</tt> or <tt>compute_grad_grad</tt> functions, see below,
+   * in a loop over all tensor product polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<Tensor<1,dim> > &values,
+                std::vector<Tensor<2,dim> > &grads,
+                std::vector<Tensor<3,dim> > &grad_grads,
+                std::vector<Tensor<4,dim> > &third_derivatives,
+                std::vector<Tensor<5,dim> > &fourth_derivatives) const;
+
+  /**
+   * Returns the number of ABF polynomials.
+   */
+  unsigned int n () const;
+
+  /**
+   * Returns the degree of the ABF space, which is two less than the highest
+   * polynomial degree.
+   */
+  unsigned int degree () const;
+
+  /**
+   * Return the name of the space, which is <tt>ABF</tt>.
+   */
+  std::string name () const;
+
+  /**
+   * Return the number of polynomials in the space <tt>RT(degree)</tt> without
+   * requiring to build an object of PolynomialsABF. This is required by the
+   * FiniteElement classes.
+   */
+  static unsigned int compute_n_pols(unsigned int degree);
+
+private:
+  /**
+   * The degree of this object as given to the constructor.
+   */
+  const unsigned int my_degree;
+
+  /**
+   * An object representing the polynomial space for a single component. We
+   * can re-use it by rotating the coordinates of the evaluation point.
+   */
+  AnisotropicPolynomials<dim> *polynomial_space;
+
+  /**
+   * Number of Raviart-Thomas polynomials.
+   */
+  unsigned int n_pols;
+
+  /**
+   * A mutex that guards the following scratch arrays.
+   */
+  mutable Threads::Mutex mutex;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<double> p_values;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<1,dim> > p_grads;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<2,dim> > p_grad_grads;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<3,dim> > p_third_derivatives;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<4,dim> > p_fourth_derivatives;
+};
+
+
+template <int dim>
+inline unsigned int
+PolynomialsABF<dim>::n() const
+{
+  return n_pols;
+}
+
+
+template <int dim>
+inline unsigned int
+PolynomialsABF<dim>::degree() const
+{
+  return my_degree;
+}
+
+
+template <int dim>
+inline std::string
+PolynomialsABF<dim>::name() const
+{
+  return "ABF";
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_adini.h b/include/deal.II/base/polynomials_adini.h
new file mode 100644
index 0000000..d02826c
--- /dev/null
+++ b/include/deal.II/base/polynomials_adini.h
@@ -0,0 +1,135 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__polynomials_adini_h
+#define dealii__polynomials_adini_h
+
+#include <deal.II/base/point.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/table.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * The cubic polynomial space for the Adini element
+ *
+ * This space consists of the cubic space <i>P<sub>3</sub></i> augmented by
+ * the functions <i>xy<sup>3</sup></i> and <i>x<sup>3</sup>y</i>.
+ *
+ * The basis of the space is chosen to match the node functionals of the Adini
+ * element.
+ *
+ * @todo This polynomial space is implemented in 2D only.
+ *
+ * @author Bärbel Janssen, 2007
+ */
+
+class PolynomialsAdini
+{
+public:
+  /**
+   * Constructor for the polynomials of the described space
+   */
+  PolynomialsAdini ();
+  /**
+   * Computes the value and the first and second derivatives of each
+   * polynomial at <tt>unit_point</tt>.
+   *
+   * The size of the vectors must either be equal 0 or equal n(). In the first
+   * case, the function will not compute these values, i.e. you indicate what
+   * you want to have computed by resizing those vectors which you want
+   * filled.
+   *
+   * If you need values or derivatives of all polynomials then use this
+   * function, rather than using any of the compute_value(), compute_grad() or
+   * compute_grad_grad() functions, see below, in a loop over all polynomials.
+   */
+
+  void compute (const Point<2> &unit_point,
+                std::vector<double> &values,
+                std::vector<Tensor<1,2> > &grads,
+                std::vector< Tensor<2,2> > &grad_grads) const;
+
+  /**
+   * Computes the value of the <tt>i</tt>th polynomial at <tt>unit_point</tt>.
+   *
+   * Consider using compute() instead.
+   */
+
+  double compute_value (const unsigned int i,
+                        const Point<2> &p) const;
+
+  /**
+   * Computes the gradient of the <tt>i</tt>th polynomial at
+   * <tt>unit_point</tt>.
+   *
+   * Consider using compute() instead.
+   */
+
+  Tensor<1,2> compute_grad (const unsigned int i,
+                            const Point<2> &p) const;
+  /**
+   * Computes the second derivative (grad_grad) of the <tt>i</tt>th polynomial
+   * at <tt>unit_point</tt>.
+   *
+   * Consider using compute() instead.
+   */
+
+  Tensor<2,2> compute_grad_grad (const unsigned int i, const Point<2> &p) const;
+  Tensor<2,2> compute_grad_grad_2 (const unsigned int i, const Point<2> &p) const;
+
+private:
+  /**
+   * Store the coefficients of the polynomials in the order
+   * $1,x,y,x^2,y^2,xy,x^3,y^3,xy^2,x^2y,x^3y,xy^3$
+   */
+  Table<2, double> coef;
+
+  /**
+   * Store the coefficients of the x-derivative of the polynomials in the
+   * order $1,x,y,x^2,y^2,xy,x^3,y^3,xy^2,x^2y,x^3y,xy^3$
+   */
+
+  Table<2, double> dx;
+  /**
+   * Store the coefficients of the y-derivative of the polynomials in the
+   * order $1,x,y,x^2,y^2,xy,x^3,y^3,xy^2,x^2y,x^3y,xy^3$
+   */
+
+  Table<2, double> dy;
+  /**
+   * Store the coefficients of the second x-derivative of the polynomials in
+   * the order $1,x,y,x^2,y^2,xy,x^3,y^3,xy^2,x^2y,x^3y,xy^3$
+   */
+  Table<2, double> dxx;
+  /**
+   * Store the coefficients of the second y-derivative of the polynomials in
+   * the order $1,x,y,x^2,y^2,xy,x^3,y^3,xy^2,x^2y,x^3y,xy^3$
+   */
+  Table<2, double> dyy;
+  /**
+   * Store the coefficients of the second mixed derivative of the polynomials
+   * in the order $1,x,y,x^2,y^2,xy,x^3,y^3,xy^2,x^2y,x^3y,xy^3$
+   */
+  Table<2, double> dxy;
+
+};
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_bdm.h b/include/deal.II/base/polynomials_bdm.h
new file mode 100644
index 0000000..9af1b22
--- /dev/null
+++ b/include/deal.II/base/polynomials_bdm.h
@@ -0,0 +1,231 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__polynomials_BDM_h
+#define dealii__polynomials_BDM_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/thread_management.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class implements the <i>H<sup>div</sup></i>-conforming, vector-valued
+ * Brezzi-Douglas-Marini (<i> BDM </i>) polynomials described in Brezzi and
+ * Fortin's <i>Mixed and Hybrid Finite Element Methods</i> (refer to pages
+ * 119 - 124).
+ *
+ * The <i> BDM </i> polynomial space contain the entire  $(P_{k})^{n}$
+ * space (constructed with PolynomialSpace Legendre polynomials) as well as
+ * part of $(P_{k+1})^{n}$
+ * (ie. $(P_{k})^{n} \subset BDM_{k} \subset (P_{k+1})^{n}$).  Furthermore,
+ * $BDM_{k}$ elements are designed so that
+ * $\nabla \cdot q \in P_{k-1} (K)$ and $q \cdot n |_{e_{i}} \in P_{k}(e_{i})$.
+ * More details
+ * of two and three dimensional $BDM_{k}$ elements are given below.
+ *<dl>
+ *   <dt> In 2D:
+ *   <dd> $ BDM_{k} = \{\mathbf{q} | \mathbf{q} = p_{k} (x,y) +
+ *      r \; \text{curl} (x^{k+1}y) + s \;
+ *      \text{curl} (xy^{k+1}), p_{k} \in (P_{k})^{2} \}$.
+ *
+ *   Note: the curl of a scalar function is given by $\text{curl}(f(x,y)) =
+ *   \begin{pmatrix} f_{y}(x,y) \\ -f_{x}(x,y) \end{pmatrix}$.
+ *
+ *   The basis used to construct the $BDM_{1}$ shape functions is
+ *   @f{align*}{
+ *     \phi_0 = \begin{pmatrix} 1 \\ 0 \end{pmatrix},
+ *     \phi_1 = \begin{pmatrix} -\sqrt{3}+2\sqrt{3}x \\ 0 \end{pmatrix},
+ *     \phi_2 = \begin{pmatrix} -\sqrt{3}+2\sqrt{3}y \\ 0 \end{pmatrix},
+ *     \phi_3 = \begin{pmatrix} 0 \\ 1 \end{pmatrix},
+ *     \phi_4 = \begin{pmatrix} 0 \\ -\sqrt{3}+2\sqrt{3}x \end{pmatrix},
+ *     \phi_5 = \begin{pmatrix} 0 \\ -\sqrt{3}+2\sqrt{3}y \end{pmatrix},
+ *     \phi_6 = \begin{pmatrix} x^2 \\ -2xy \end{pmatrix},
+ *     \phi_7 = \begin{pmatrix} 2xy \\ -y^2 \end{pmatrix}.
+ *   @f}
+ *
+ *   The dimension of the $BDM_{k}$ space is
+ * $(k+1)(k+2)+2$, with $k+1$ unknowns per
+ * edge and $k(k-1)$ interior unknowns.
+ *
+ *   <dt> In 3D:
+ *   <dd> $ BDM_{k} =
+ *        \{\mathbf{q} | \mathbf{q} = p_{k} (x,y,z)
+ *        + \sum_{i=0}^{k} (
+ *        r_{i} \; \text{curl}
+ *        \begin{pmatrix} 0\\0\\xy^{i+1}z^{k-i} \end{pmatrix}
+ *        + s_{i} \; \text{curl}
+ *        \begin{pmatrix} yz^{i+1}x^{k-i}\\0\\0 \end{pmatrix}
+ *        + t_{i} \; \text{curl}
+ *        \begin{pmatrix}0\\zx^{i+1}y^{k-i}\\0\end{pmatrix})
+ *        , p_{k} \in (P_{k})^{3} \}$.
+ *
+ *   Note: the 3D description of $BDM_{k}$ is not unique.  See <i>Mixed and
+ *   Hybrid Finite Element Methods</i> page 122 for an alternative definition.
+ *
+ *   The dimension of the $BDM_{k}$ space is
+ *   $\dfrac{(k+1)(k+2)(k+3)}{2}+3(k+1)$, with $\dfrac{(k+1)(k+2)}{2}$
+ *   unknowns per face and $\dfrac{(k-1)k(k+1)}{2}$ interior unknowns.
+ *
+ *</dl>
+ *
+ *
+ *
+ * @ingroup Polynomials
+ * @author Guido Kanschat
+ * @date 2003, 2005, 2009
+ */
+template <int dim>
+class PolynomialsBDM
+{
+public:
+  /**
+   * Constructor. Creates all basis functions for BDM polynomials of given
+   * degree.
+   *
+   * @arg k: the degree of the BDM-space, which is the degree of the largest
+   * complete polynomial space <i>P<sub>k</sub></i> contained in the BDM-
+   * space.
+   */
+  PolynomialsBDM (const unsigned int k);
+
+  /**
+   * Computes the value and the first and second derivatives of each BDM
+   * polynomial at @p unit_point.
+   *
+   * The size of the vectors must either be zero or equal <tt>n()</tt>.  In
+   * the first case, the function will not compute these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the <tt>compute_value</tt>,
+   * <tt>compute_grad</tt> or <tt>compute_grad_grad</tt> functions, see below,
+   * in a loop over all tensor product polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<Tensor<1,dim> > &values,
+                std::vector<Tensor<2,dim> > &grads,
+                std::vector<Tensor<3,dim> > &grad_grads,
+                std::vector<Tensor<4,dim> > &third_derivatives,
+                std::vector<Tensor<5,dim> > &fourth_derivatives) const;
+
+  /**
+   * Returns the number of BDM polynomials.
+   */
+  unsigned int n () const;
+
+  /**
+   * Returns the degree of the BDM space, which is one less than the highest
+   * polynomial degree.
+   */
+  unsigned int degree () const;
+
+  /**
+   * Return the name of the space, which is <tt>BDM</tt>.
+   */
+  std::string name () const;
+
+  /**
+   * Return the number of polynomials in the space <tt>BDM(degree)</tt>
+   * without requiring to build an object of PolynomialsBDM. This is required
+   * by the FiniteElement classes.
+   */
+  static unsigned int compute_n_pols(unsigned int degree);
+
+private:
+  /**
+   * An object representing the polynomial space used here. The constructor
+   * fills this with the monomial basis.
+   */
+  const PolynomialSpace<dim> polynomial_space;
+
+  /**
+   * Storage for monomials. In 2D, this is just the polynomial of order
+   * <i>k</i>. In 3D, we need all polynomials from degree zero to <i>k</i>.
+   */
+  std::vector<Polynomials::Polynomial<double> > monomials;
+
+  /**
+   * Number of BDM polynomials.
+   */
+  unsigned int n_pols;
+
+  /**
+   * A mutex that guards the following scratch arrays.
+   */
+  mutable Threads::Mutex mutex;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<double> p_values;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<1,dim> > p_grads;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<2,dim> > p_grad_grads;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<3,dim> > p_third_derivatives;
+
+  /**
+   * Auxiliary memory.
+   */
+  mutable std::vector<Tensor<4,dim> > p_fourth_derivatives;
+};
+
+
+template <int dim>
+inline unsigned int
+PolynomialsBDM<dim>::n() const
+{
+  return n_pols;
+}
+
+
+template <int dim>
+inline unsigned int
+PolynomialsBDM<dim>::degree() const
+{
+  return polynomial_space.degree()-1;
+}
+
+
+template <int dim>
+inline std::string
+PolynomialsBDM<dim>::name() const
+{
+  return "BDM";
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_bernstein.h b/include/deal.II/base/polynomials_bernstein.h
new file mode 100644
index 0000000..d3a2dbb
--- /dev/null
+++ b/include/deal.II/base/polynomials_bernstein.h
@@ -0,0 +1,64 @@
+#include <deal.II/base/polynomial.h>
+#include <fstream>
+#include <iostream>
+
+#ifndef dealii__polynomials_bernstein_h
+#define dealii__polynomials_bernstein_h
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class implements Bernstein basis polynomials of desire degree as
+ * described in http://www.idav.ucdavis.edu/education/CAGDNotes/Bernstein-
+ * Polynomials.pdf in the paragraph "Converting from the Bernstein Basis to
+ * the Power Basis".
+ *
+ * They are used to create the Bernstein finite element FE_Bernstein.
+ *
+ * @ingroup Polynomials
+ * @author Luca Heltai, Marco Tezzele
+ * @date 2013, 2015
+ */
+template <typename number>
+class PolynomialsBernstein : public Polynomials::Polynomial<number>
+{
+public:
+  /**
+   * Construct the @p index -th Bernstein Polynomial of degree @p degree.
+   *
+   * @f{align*}{
+   * B_{\text{index}, \text{degree}} (t)
+   *   &= \text{binom}(\text{degree}, \text{index})
+   *      \cdot t^{\text{index}}
+   *      \cdot (1 - t)^{\text{degree} - \text{index}} \\
+   *   &= \sum_{i = \text{index}}^\text{degree}
+   *      \cdot (-1)^{i - \text{index}}
+   *      \cdot \text{binom}(\text{degree}, i)
+   *      \cdot \text{binom}(i, \text{index})
+   *      \cdot t^i
+   * @f}
+   *
+   * @param index
+   * @param degree
+   */
+  PolynomialsBernstein (
+    const unsigned int index,
+    const unsigned int degree);
+};
+
+
+template <typename number>
+std::vector<Polynomials::Polynomial<number> >
+generate_complete_bernstein_basis (
+  const unsigned int degree)
+{
+  std::vector<Polynomials::Polynomial<number> > v;
+  for (unsigned int i = 0; i < degree + 1; ++i)
+    v.push_back(PolynomialsBernstein<number>(i, degree));
+  return v;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_nedelec.h b/include/deal.II/base/polynomials_nedelec.h
new file mode 100644
index 0000000..46e5872
--- /dev/null
+++ b/include/deal.II/base/polynomials_nedelec.h
@@ -0,0 +1,154 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__polynomials_nedelec_h
+#define dealii__polynomials_nedelec_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/table.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class implements the first family <i>H<sup>curl</sup></i>-conforming,
+ * vector-valued polynomials, proposed by J.-C. Nédélec in 1980 (Numer.
+ * Math. 35).
+ *
+ * The Nédélec polynomials are constructed such that the curl is in the
+ * tensor product polynomial space <i>Q<sub>k</sub></i>. Therefore, the
+ * polynomial order of each component must be one order higher in the
+ * corresponding two directions, yielding the polynomial spaces
+ * <i>(Q<sub>k,k+1</sub>, Q<sub>k+1,k</sub>)</i> and
+ * <i>(Q<sub>k,k+1,k+1</sub>, Q<sub>k+1,k,k+1</sub>,
+ * Q<sub>k+1,k+1,k</sub>)</i> in 2D and 3D, resp.
+ *
+ * @ingroup Polynomials
+ * @author Markus Bürg
+ * @date 2009, 2010
+ */
+template <int dim>
+class PolynomialsNedelec
+{
+public:
+  /**
+   * Constructor. Creates all basis functions for Nédélec polynomials of
+   * given degree.
+   *
+   * @arg k: the degree of the Nédélec space, which is the degree of the
+   * largest tensor product polynomial space <i>Q<sub>k</sub></i> contained.
+   */
+  PolynomialsNedelec (const unsigned int k);
+
+  /**
+   * Computes the value and the first and second derivatives of each Nédélec
+   * polynomial at @p unit_point.
+   *
+   * The size of the vectors must either be zero or equal <tt>n()</tt>.  In
+   * the first case, the function will not compute these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the <tt>compute_value</tt>,
+   * <tt>compute_grad</tt> or <tt>compute_grad_grad</tt> functions, see below,
+   * in a loop over all tensor product polynomials.
+   */
+  void compute (const Point<dim> &unit_point, std::vector<Tensor<1, dim> > &values,
+                std::vector<Tensor<2, dim> > &grads,
+                std::vector<Tensor<3,dim> > &grad_grads,
+                std::vector<Tensor<4,dim> > &third_derivatives,
+                std::vector<Tensor<5,dim> > &fourth_derivatives) const;
+
+  /**
+   * Returns the number of Nédélec polynomials.
+   */
+  unsigned int n () const;
+
+  /**
+   * Returns the degree of the Nédélec space, which is one less than the
+   * highest polynomial degree.
+   */
+  unsigned int degree () const;
+
+  /**
+   * Return the name of the space, which is <tt>Nedelec</tt>.
+   */
+  std::string name () const;
+
+  /**
+   * Return the number of polynomials in the space <tt>N(degree)</tt> without
+   * requiring to build an object of PolynomialsNedelec. This is required by
+   * the FiniteElement classes.
+   */
+  static unsigned int compute_n_pols (unsigned int degree);
+
+private:
+  /**
+   * The degree of this object as given to the constructor.
+   */
+  const unsigned int my_degree;
+
+  /**
+   * An object representing the polynomial space for a single component. We
+   * can re-use it by rotating the coordinates of the evaluation point.
+   */
+  const AnisotropicPolynomials<dim> polynomial_space;
+
+  /**
+   * Number of Nédélec polynomials.
+   */
+  const unsigned int n_pols;
+
+  /**
+   * A static member function that creates the polynomial space we use to
+   * initialize the #polynomial_space member variable.
+   */
+  static std::vector<std::vector< Polynomials::Polynomial< double > > > create_polynomials (const unsigned int k);
+};
+
+
+template <int dim>
+inline unsigned int PolynomialsNedelec<dim>::n () const
+{
+  return n_pols;
+}
+
+
+template <int dim>
+inline unsigned int PolynomialsNedelec<dim>::degree () const
+{
+  return my_degree;
+}
+
+
+template <int dim>
+inline std::string
+PolynomialsNedelec<dim>::name() const
+{
+  return "Nedelec";
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_p.h b/include/deal.II/base/polynomials_p.h
new file mode 100644
index 0000000..d9a945b
--- /dev/null
+++ b/include/deal.II/base/polynomials_p.h
@@ -0,0 +1,112 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__polynomials_P_h
+#define dealii__polynomials_P_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/table.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+/**
+ * @addtogroup Polynomials
+ * @{
+ */
+
+/**
+ * This class implements the polynomial space of degree <tt>p</tt> based on
+ * the monomials ${1,x,x^2,...}$. I.e. in <tt>d</tt> dimensions it constructs
+ * all polynomials of the form $\prod_{i=1}^d x_i^{n_i}$, where $\sum_i
+ * n_i\leq p$. The base polynomials are given a specific ordering, e.g. in 2
+ * dimensions: ${1,x,y,xy,x^2,y^2,x^2y,xy^2,x^3,y^3,...}$. The ordering of the
+ * monomials in $P_k1$ matches the ordering of the monomials in $P_k2$ for
+ * $k2>k1$.
+ *
+ * @author Ralf Hartmann, 2004
+ */
+template <int dim>
+class PolynomialsP: public PolynomialSpace<dim>
+{
+public:
+  /**
+   * Access to the dimension of this object, for checking and automatic
+   * setting of dimension in other classes.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Constructor. Creates all basis functions of $P_p$. @arg p: the degree of
+   * the polynomial space
+   */
+  PolynomialsP (const unsigned int p);
+
+  /**
+   * Returns the degree <tt>p</tt> of the polynomial space <tt>P_p</tt>.
+   *
+   * Note, that this number is <tt>PolynomialSpace::degree()-1</tt>, compare
+   * definition in PolynomialSpace.
+   */
+  unsigned int degree() const;
+
+  /**
+   * For the <tt>n</tt>th polynomial $p_n(x,y,z)=x^i y^j z^k$ this function
+   * gives the degrees i,j,k in the x,y,z directions.
+   */
+  void directional_degrees(unsigned int n,
+                           unsigned int (&degrees)[dim]) const;
+
+private:
+
+  /**
+   * Fills the <tt>index_map</tt>.
+   */
+  void create_polynomial_ordering(std::vector<unsigned int> &index_map) const;
+
+  /**
+   * Degree <tt>p</tt> of the polynomial space $P_p$, i.e. the number
+   * <tt>p</tt> which was given to the constructor.
+   */
+  const unsigned int p;
+};
+
+/** @} */
+
+template <int dim>
+inline unsigned int
+PolynomialsP<dim>::degree() const
+{
+  return p;
+}
+
+
+template <int dim>
+inline void
+PolynomialsP<dim>::directional_degrees(unsigned int n,
+                                       unsigned int (&degrees)[dim]) const
+{
+  this->compute_index(n,degrees);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_piecewise.h b/include/deal.II/base/polynomials_piecewise.h
new file mode 100644
index 0000000..3746c1e
--- /dev/null
+++ b/include/deal.II/base/polynomials_piecewise.h
@@ -0,0 +1,227 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__polynomials_piecewise_h
+#define dealii__polynomials_piecewise_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/point.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * @addtogroup Polynomials
+ * @{
+ */
+
+/**
+ * A namespace in which classes relating to the description of 1d polynomial
+ * spaces are declared.
+ */
+namespace Polynomials
+{
+
+  /**
+   * Definition of piecewise 1D polynomials for the unit interval. This space
+   * allows the description of interpolating polynomials on parts of the unit
+   * interval, similarly to the definition of finite element basis functions
+   * on the subdivided elements. This primary purpose of this class is to
+   * allow constructing FE_Q_iso_Q1 elements that put additional degrees of
+   * freedom into an equivalent of a refined mesh instead of higher order
+   * polynomials, which is useful when using mixed finite elements.
+   *
+   * @author Martin Kronbichler, 2013
+   */
+  template <typename number>
+  class PiecewisePolynomial : public Subscriptor
+  {
+  public:
+    /**
+     * Constructor for Lagrange polynomial on an interval that is a subset of
+     * the unit interval. It uses a polynomial description that is scaled to
+     * the size of the subinterval compared to the unit interval, the total
+     * number of intervals (subdivisions), the current index of the interval
+     * as well as if the polynomial spans onto the next interval (e.g., if it
+     * lives on two neighboring intervals).
+     *
+     * If the number of intervals is one, the piecewise polynomial behaves
+     * exactly like a usual polynomial.
+     */
+    PiecewisePolynomial (const Polynomial<number> &coefficients_on_interval,
+                         const unsigned int        n_intervals,
+                         const unsigned int        interval,
+                         const bool                spans_next_interval);
+
+    /**
+     * Return the value of this polynomial at the given point, evaluating the
+     * underlying polynomial. The polynomial evaluates to zero when outside of
+     * the given interval (and possible the next one to the right when it
+     * spans over that range).
+     */
+    number value (const number x) const;
+
+    /**
+     * Return the values and the derivatives of the Polynomial at point
+     * <tt>x</tt>.  <tt>values[i], i=0,...,values.size()-1</tt> includes the
+     * <tt>i</tt>th derivative. The number of derivatives to be computed is
+     * thus determined by the size of the array passed.
+     *
+     * Note that all the derivatives evaluate to zero at the border between
+     * intervals (assuming exact arithmetics) in the interior of the unit
+     * interval, as there is no unique gradient value in that case for a
+     * piecewise polynomial. This is not always desired (e.g., when evaluating
+     * jumps of gradients on the element boundary), but it is the user's
+     * responsibility to avoid evaluation at these points when it does not
+     * make sense.
+     */
+    void value (const number         x,
+                std::vector<number> &values) const;
+
+    /**
+     * Degree of the polynomial. This is the degree of the underlying base
+     * polynomial.
+     */
+    unsigned int degree () const;
+
+    /**
+     * Write or read the data of this object to or from a stream for the
+     * purpose of serialization.
+     */
+    template <class Archive>
+    void serialize (Archive &ar, const unsigned int version);
+
+  protected:
+
+    /**
+     * Underlying polynomial object that is scaled to a subinterval and
+     * concatenated accordingly.
+     */
+    Polynomial<number> polynomial;
+
+    /**
+     * Stores the number of intervals that the unit interval is divided into.
+     */
+    unsigned int n_intervals;
+
+    /**
+     * Stores the index of the current polynomial in the range of intervals.
+     */
+    unsigned int interval;
+
+    /**
+     * Store if the polynomial spans over two adjacent intervals, i.e., the
+     * one given in subinterval and the next one.
+     */
+    bool spans_two_intervals;
+  };
+
+
+
+  /**
+   * Generates a complete Lagrange basis on a subdivision of the unit interval
+   * in smaller intervals for a given degree on the subintervals and number of
+   * intervals.
+   */
+  std::vector<PiecewisePolynomial<double> >
+  generate_complete_Lagrange_basis_on_subdivisions (const unsigned int n_subdivisions,
+                                                    const unsigned int base_degree);
+
+}
+
+
+/** @} */
+
+/* -------------------------- inline functions --------------------- */
+
+namespace Polynomials
+{
+  template <typename number>
+  inline
+  unsigned int
+  PiecewisePolynomial<number>::degree () const
+  {
+    return polynomial.degree();
+  }
+
+
+
+  template <typename number>
+  inline
+  number
+  PiecewisePolynomial<number>::value (const number x) const
+  {
+    AssertIndexRange (interval, n_intervals);
+    number y = x;
+    // shift polynomial if necessary
+    if (n_intervals > 1)
+      {
+        const number step = 1./n_intervals;
+
+        // polynomial spans over two intervals
+        if (spans_two_intervals == true)
+          {
+            const number offset = step * interval;
+            if (x<offset)
+              return 0;
+            else if (x>offset+step+step)
+              return 0;
+            else if (x<offset+step)
+              y = x-offset;
+            else
+              y = offset+step+step-x;
+          }
+        else
+          {
+            const number offset = step * interval;
+            if (x<offset || x>offset+step)
+              return 0;
+            else
+              y = x-offset;
+          }
+
+        return polynomial.value(y);
+      }
+    else
+      return polynomial.value(x);
+  }
+
+
+
+  template <typename number>
+  template <class Archive>
+  inline
+  void
+  PiecewisePolynomial<number>::serialize (Archive &ar, const unsigned int)
+  {
+    // forward to serialization function in the base class.
+    ar &static_cast<Subscriptor &>(*this);
+    ar &polynomial;
+    ar &n_intervals;
+    ar &interval;
+    ar &spans_two_intervals;
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_rannacher_turek.h b/include/deal.II/base/polynomials_rannacher_turek.h
new file mode 100644
index 0000000..16a0cf9
--- /dev/null
+++ b/include/deal.II/base/polynomials_rannacher_turek.h
@@ -0,0 +1,208 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__polynomials_rannacher_turek_h
+#define dealii__polynomials_rannacher_turek_h
+
+#include <deal.II/base/point.h>
+#include <deal.II/base/tensor.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * Basis for polynomial space on the unit square used for lowest order
+ * Rannacher Turek element.
+ *
+ * The i-th basis function is the dual basis element corresponding to the dof
+ * which evaluates the function's mean value across the i-th face. The
+ * numbering can be found in GeometryInfo.
+ *
+ * @ingroup Polynomials
+ * @author Patrick Esser
+ * @date 2015
+ */
+template <int dim>
+class PolynomialsRannacherTurek
+{
+public:
+  /**
+   * Dimension we are working in.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Constructor, checking that the basis is implemented in this dimension.
+   */
+  PolynomialsRannacherTurek();
+
+  /**
+   * Value of basis function @p i at @p p.
+   */
+  double compute_value(const unsigned int i,
+                       const Point<dim> &p) const;
+
+  /**
+   * <tt>order</tt>-th of basis function @p i at @p p.
+   *
+   * Consider using compute() instead.
+   */
+  template <int order>
+  Tensor<order,dim> compute_derivative (const unsigned int i,
+                                        const Point<dim> &p) const;
+
+  /**
+   * Gradient of basis function @p i at @p p.
+   */
+  Tensor<1, dim> compute_grad(const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Gradient of gradient of basis function @p i at @p p.
+   */
+  Tensor<2, dim> compute_grad_grad(const unsigned int i,
+                                   const Point<dim> &p) const;
+
+  /**
+   * Compute values and derivatives of all basis functions at @p unit_point.
+   *
+   * Size of the vectors must be either equal to the number of polynomials or
+   * zero. A size of zero means that we are not computing the vector entries.
+   */
+  void compute(const Point<dim> &unit_point,
+               std::vector<double> &values,
+               std::vector<Tensor<1, dim> > &grads,
+               std::vector<Tensor<2,dim> > &grad_grads,
+               std::vector<Tensor<3,dim> > &third_derivatives,
+               std::vector<Tensor<4,dim> > &fourth_derivatives) const;
+};
+
+
+namespace internal
+{
+  namespace PolynomialsRannacherTurek
+  {
+    template <int order, int dim>
+    inline
+    Tensor<order,dim>
+    compute_derivative (const unsigned int,
+                        const Point<dim> &)
+    {
+      Assert (dim == 2, ExcNotImplemented());
+      return Tensor<order,dim>();
+    }
+
+
+    template <int order>
+    inline
+    Tensor<order,2>
+    compute_derivative (const unsigned int i,
+                        const Point<2> &p)
+    {
+      const unsigned int dim = 2;
+
+      Tensor<order,dim> derivative;
+      switch (order)
+        {
+        case 1:
+        {
+          Tensor<1,dim> &grad = *reinterpret_cast<Tensor<1,dim>*>(&derivative);
+          if (i == 0)
+            {
+              grad[0] = -2.5 + 3*p(0);
+              grad[1] = 1.5 - 3*p(1);
+            }
+          else if (i == 1)
+            {
+              grad[0] = -0.5 + 3.0*p(0);
+              grad[1] = 1.5 - 3.0*p(1);
+            }
+          else if (i == 2)
+            {
+              grad[0] = 1.5 - 3.0*p(0);
+              grad[1] = -2.5 + 3.0*p(1);
+            }
+          else if (i == 3)
+            {
+              grad[0] = 1.5 - 3.0*p(0);
+              grad[1] = -0.5 + 3.0*p(1);
+            }
+          else
+            {
+              Assert(false, ExcNotImplemented());
+            }
+          return derivative;
+        }
+        case 2:
+        {
+          Tensor<2,dim> &grad_grad = *reinterpret_cast<Tensor<2,dim>*>(&derivative);
+          if (i == 0)
+            {
+              grad_grad[0][0] = 3;
+              grad_grad[0][1] = 0;
+              grad_grad[1][0] = 0;
+              grad_grad[1][1] = -3;
+            }
+          else if (i == 1)
+            {
+              grad_grad[0][0] = 3;
+              grad_grad[0][1] = 0;
+              grad_grad[1][0] = 0;
+              grad_grad[1][1] = -3;
+            }
+          else if (i == 2)
+            {
+              grad_grad[0][0] = -3;
+              grad_grad[0][1] = 0;
+              grad_grad[1][0] = 0;
+              grad_grad[1][1] = 3;
+            }
+          else if (i == 3)
+            {
+              grad_grad[0][0] = -3;
+              grad_grad[0][1] = 0;
+              grad_grad[1][0] = 0;
+              grad_grad[1][1] = 3;
+            }
+          return derivative;
+        }
+        default:
+        {
+          // higher derivatives are all zero
+          return Tensor<order,dim>();
+        }
+        }
+    }
+  }
+}
+
+
+
+// template functions
+template <int dim>
+template <int order>
+Tensor<order,dim>
+PolynomialsRannacherTurek<dim>::compute_derivative (const unsigned int i,
+                                                    const Point<dim> &p) const
+{
+  return internal::PolynomialsRannacherTurek::compute_derivative<order> (i, p);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/polynomials_raviart_thomas.h b/include/deal.II/base/polynomials_raviart_thomas.h
new file mode 100644
index 0000000..6c6761e
--- /dev/null
+++ b/include/deal.II/base/polynomials_raviart_thomas.h
@@ -0,0 +1,160 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__polynomials_raviart_thomas_h
+#define dealii__polynomials_raviart_thomas_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/table.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class implements the <i>H<sup>div</sup></i>-conforming, vector-valued
+ * Raviart-Thomas polynomials as described in the book by Brezzi and Fortin.
+ *
+ * The Raviart-Thomas polynomials are constructed such that the divergence is
+ * in the tensor product polynomial space <i>Q<sub>k</sub></i>. Therefore, the
+ * polynomial order of each component must be one order higher in the
+ * corresponding direction, yielding the polynomial spaces
+ * <i>(Q<sub>k+1,k</sub>, Q<sub>k,k+1</sub>)</i> and <i>(Q<sub>k+1,k,k</sub>,
+ * Q<sub>k,k+1,k</sub>, Q<sub>k,k,k+1</sub>)</i> in 2D and 3D, resp.
+ *
+ * @ingroup Polynomials
+ * @author Guido Kanschat
+ * @date 2005
+ */
+template <int dim>
+class PolynomialsRaviartThomas
+{
+public:
+  /**
+   * Constructor. Creates all basis functions for Raviart-Thomas polynomials
+   * of given degree.
+   *
+   * @arg k: the degree of the Raviart-Thomas-space, which is the degree of
+   * the largest tensor product polynomial space <i>Q<sub>k</sub></i>
+   * contains.
+   */
+  PolynomialsRaviartThomas (const unsigned int k);
+
+  /**
+   * Computes the value and the first and second derivatives of each Raviart-
+   * Thomas polynomial at @p unit_point.
+   *
+   * The size of the vectors must either be zero or equal <tt>n()</tt>.  In
+   * the first case, the function will not compute these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the <tt>compute_value</tt>,
+   * <tt>compute_grad</tt> or <tt>compute_grad_grad</tt> functions, see below,
+   * in a loop over all tensor product polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<Tensor<1,dim> > &values,
+                std::vector<Tensor<2,dim> > &grads,
+                std::vector<Tensor<3,dim> > &grad_grads,
+                std::vector<Tensor<4,dim> > &third_derivatives,
+                std::vector<Tensor<5,dim> > &fourth_derivatives) const;
+
+  /**
+   * Returns the number of Raviart-Thomas polynomials.
+   */
+  unsigned int n () const;
+
+  /**
+   * Returns the degree of the Raviart-Thomas space, which is one less than
+   * the highest polynomial degree.
+   */
+  unsigned int degree () const;
+
+  /**
+   * Return the name of the space, which is <tt>RaviartThomas</tt>.
+   */
+  std::string name () const;
+
+  /**
+   * Return the number of polynomials in the space <tt>RT(degree)</tt> without
+   * requiring to build an object of PolynomialsRaviartThomas. This is
+   * required by the FiniteElement classes.
+   */
+  static unsigned int compute_n_pols(unsigned int degree);
+
+private:
+  /**
+   * The degree of this object as given to the constructor.
+   */
+  const unsigned int my_degree;
+
+  /**
+   * An object representing the polynomial space for a single component. We
+   * can re-use it by rotating the coordinates of the evaluation point.
+   */
+  const AnisotropicPolynomials<dim> polynomial_space;
+
+  /**
+   * Number of Raviart-Thomas polynomials.
+   */
+  const unsigned int n_pols;
+
+  /**
+   * A static member function that creates the polynomial space we use to
+   * initialize the #polynomial_space member variable.
+   */
+  static
+  std::vector<std::vector< Polynomials::Polynomial< double > > >
+  create_polynomials (const unsigned int k);
+};
+
+
+
+template <int dim>
+inline unsigned int
+PolynomialsRaviartThomas<dim>::n() const
+{
+  return n_pols;
+}
+
+
+
+template <int dim>
+inline unsigned int
+PolynomialsRaviartThomas<dim>::degree() const
+{
+  return my_degree;
+}
+
+
+
+template <int dim>
+inline std::string
+PolynomialsRaviartThomas<dim>::name() const
+{
+  return "RaviartThomas";
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/qprojector.h b/include/deal.II/base/qprojector.h
new file mode 100644
index 0000000..6a5fb16
--- /dev/null
+++ b/include/deal.II/base/qprojector.h
@@ -0,0 +1,428 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__qprojector_h
+#define dealii__qprojector_h
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/geometry_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup Quadrature */
+/*@{*/
+
+
+/**
+ * This class is a helper class to facilitate the usage of quadrature formulae
+ * on faces or subfaces of cells. It computes the locations of quadrature
+ * points on the unit cell from a quadrature object for a manifold of one
+ * dimension less than that of the cell and the number of the face. For
+ * example, giving the Simpson rule in one dimension and using the
+ * project_to_face() function with face number 1, the returned points will be
+ * (1,0), (1,0.5) and (1,1). Note that faces have an orientation, so when
+ * projecting to face 3, you will get (0,0), (0,0.5) and (0,1), which is in
+ * clockwise sense, while for face 1 the points were in counterclockwise
+ * sense.
+ *
+ * For the projection to subfaces (i.e. to the children of a face of the unit
+ * cell), the same applies as above. Note the order in which the children of a
+ * face are numbered, which in two dimensions coincides with the orientation
+ * of the face.
+ *
+ * The second set of functions generates a quadrature formula by projecting a
+ * given quadrature rule on <b>all</b> faces and subfaces. This is used in the
+ * FEFaceValues and FESubfaceValues classes. Since we now have the quadrature
+ * points of all faces and subfaces in one array, we need to have a way to
+ * find the starting index of the points and weights corresponding to one face
+ * or subface within this array. This is done through the DataSetDescriptor
+ * member class.
+ *
+ * The different functions are grouped into a common class to avoid putting
+ * them into global namespace. However, since they have no local data, all
+ * functions are declared <tt>static</tt> and can be called without creating
+ * an object of this class.
+ *
+ * For the 3d case, you should note that the orientation of faces is even more
+ * intricate than for two dimensions. Quadrature formulae are projected upon
+ * the faces in their standard orientation, not to the inside or outside of
+ * the hexahedron. To make things more complicated, in 3d we allow faces in
+ * two orientations (which can be identified using
+ * <tt>cell->face_orientation(face)</tt>), so we have to project quadrature
+ * formula onto faces and subfaces in two orientations. (Refer to the
+ * documentation of the Triangulation class for a description of the
+ * orientation of the different faces, as well as to
+ * @ref GlossFaceOrientation "the glossary entry on face orientation"
+ * for more information on this.) The DataSetDescriptor member class is used
+ * to identify where each dataset starts.
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, 1998, 1999, 2003, 2005
+ */
+template <int dim>
+class QProjector
+{
+public:
+  /**
+   * Define a typedef for a quadrature that acts on an object of one dimension
+   * less. For cells, this would then be a face quadrature.
+   */
+  typedef Quadrature<dim-1> SubQuadrature;
+
+  /**
+   * Compute the quadrature points on the cell if the given quadrature formula
+   * is used on face <tt>face_no</tt>. For further details, see the general
+   * doc for this class.
+   */
+  static void project_to_face (const SubQuadrature &quadrature,
+                               const unsigned int      face_no,
+                               std::vector<Point<dim> > &q_points);
+
+  /**
+   * Compute the cell quadrature formula corresponding to using
+   * <tt>quadrature</tt> on face <tt>face_no</tt>. For further details, see
+   * the general doc for this class.
+   */
+  static Quadrature<dim>
+  project_to_face (const SubQuadrature &quadrature,
+                   const unsigned int      face_no);
+
+  /**
+   * Compute the quadrature points on the cell if the given quadrature formula
+   * is used on face <tt>face_no</tt>, subface number <tt>subface_no</tt>
+   * corresponding to RefineCase::Type <tt>ref_case</tt>. The last argument is
+   * only used in 3D.
+   *
+   * @note Only the points are transformed. The quadrature weights are the
+   * same as those of the original rule.
+   */
+  static void project_to_subface (const SubQuadrature       &quadrature,
+                                  const unsigned int         face_no,
+                                  const unsigned int         subface_no,
+                                  std::vector<Point<dim> >  &q_points,
+                                  const RefinementCase<dim-1> &ref_case=RefinementCase<dim-1>::isotropic_refinement);
+
+  /**
+   * Compute the cell quadrature formula corresponding to using
+   * <tt>quadrature</tt> on subface <tt>subface_no</tt> of face
+   * <tt>face_no</tt> with RefinementCase<dim-1> <tt>ref_case</tt>. The last
+   * argument is only used in 3D.
+   *
+   * @note Only the points are transformed. The quadrature weights are the
+   * same as those of the original rule.
+   */
+  static Quadrature<dim>
+  project_to_subface (const SubQuadrature       &quadrature,
+                      const unsigned int         face_no,
+                      const unsigned int         subface_no,
+                      const RefinementCase<dim-1> &ref_case=RefinementCase<dim-1>::isotropic_refinement);
+
+  /**
+   * Take a face quadrature formula and generate a cell quadrature formula
+   * from it where the quadrature points of the given argument are projected
+   * on all faces.
+   *
+   * The weights of the new rule are replications of the original weights.
+   * Thus, the sum of the weights is not one, but the number of faces, which
+   * is the surface of the reference cell.
+   *
+   * This in particular allows us to extract a subset of points corresponding
+   * to a single face and use it as a quadrature on this face, as is done in
+   * FEFaceValues.
+   *
+   * @note In 3D, this function produces eight sets of quadrature points for
+   * each face, in order to cope possibly different orientations of the mesh.
+   */
+  static Quadrature<dim>
+  project_to_all_faces (const SubQuadrature &quadrature);
+
+  /**
+   * Take a face quadrature formula and generate a cell quadrature formula
+   * from it where the quadrature points of the given argument are projected
+   * on all subfaces.
+   *
+   * Like in project_to_all_faces(), the weights of the new rule sum up to the
+   * number of faces (not subfaces), which is the surface of the reference
+   * cell.
+   *
+   * This in particular allows us to extract a subset of points corresponding
+   * to a single subface and use it as a quadrature on this face, as is done
+   * in FESubfaceValues.
+   */
+  static Quadrature<dim>
+  project_to_all_subfaces (const SubQuadrature &quadrature);
+
+  /**
+   * Project a given quadrature formula to a child of a cell. You may want to
+   * use this function in case you want to extend an integral only over the
+   * area which a potential child would occupy. The child numbering is the
+   * same as the children would be numbered upon refinement of the cell.
+   *
+   * As integration using this quadrature formula now only extends over a
+   * fraction of the cell, the weights of the resulting object are divided by
+   * GeometryInfo<dim>::children_per_cell.
+   */
+  static
+  Quadrature<dim>
+  project_to_child (const Quadrature<dim>  &quadrature,
+                    const unsigned int      child_no);
+
+  /**
+   * Project a quadrature rule to all children of a cell. Similarly to
+   * project_to_all_subfaces(), this function replicates the formula generated
+   * by project_to_child() for all children, such that the weights sum up to
+   * one, the volume of the total cell again.
+   *
+   * The child numbering is the same as the children would be numbered upon
+   * refinement of the cell.
+   */
+  static
+  Quadrature<dim>
+  project_to_all_children (const Quadrature<dim>  &quadrature);
+
+  /**
+   * Project the one dimensional rule <tt>quadrature</tt> to the straight line
+   * connecting the points <tt>p1</tt> and <tt>p2</tt>.
+   */
+  static
+  Quadrature<dim>
+  project_to_line(const Quadrature<1> &quadrature,
+                  const Point<dim> &p1,
+                  const Point<dim> &p2);
+
+  /**
+   * Since the project_to_all_faces() and project_to_all_subfaces() functions
+   * chain together the quadrature points and weights of all projections of a
+   * face quadrature formula to the faces or subfaces of a cell, we need a way
+   * to identify where the starting index of the points and weights for a
+   * particular face or subface is. This class provides this: there are static
+   * member functions that generate objects of this type, given face or
+   * subface indices, and you can then use the generated object in place of an
+   * integer that denotes the offset of a given dataset.
+   *
+   * @author Wolfgang Bangerth, 2003
+   */
+  class DataSetDescriptor
+  {
+  public:
+    /**
+     * Default constructor. This doesn't do much except generating an invalid
+     * index, since you didn't give a valid descriptor of the cell, face, or
+     * subface you wanted.
+     */
+    DataSetDescriptor ();
+
+    /**
+     * Static function to generate the offset of a cell. Since we only have
+     * one cell per quadrature object, this offset is of course zero, but we
+     * carry this function around for consistency with the other static
+     * functions.
+     */
+    static DataSetDescriptor cell ();
+
+    /**
+     * Static function to generate an offset object for a given face of a cell
+     * with the given face orientation, flip and rotation. This function of
+     * course is only allowed if <tt>dim>=2</tt>, and the face orientation,
+     * flip and rotation are ignored if the space dimension equals 2.
+     *
+     * The last argument denotes the number of quadrature points the lower-
+     * dimensional face quadrature formula (the one that has been projected
+     * onto the faces) has.
+     */
+    static
+    DataSetDescriptor
+    face (const unsigned int face_no,
+          const bool         face_orientation,
+          const bool         face_flip,
+          const bool         face_rotation,
+          const unsigned int n_quadrature_points);
+
+    /**
+     * Static function to generate an offset object for a given subface of a
+     * cell with the given face orientation, flip and rotation. This function
+     * of course is only allowed if <tt>dim>=2</tt>, and the face orientation,
+     * flip and rotation are ignored if the space dimension equals 2.
+     *
+     * The last but one argument denotes the number of quadrature points the
+     * lower-dimensional face quadrature formula (the one that has been
+     * projected onto the faces) has.
+     *
+     * Through the last argument anisotropic refinement can be respected.
+     */
+    static
+    DataSetDescriptor
+    subface (const unsigned int face_no,
+             const unsigned int subface_no,
+             const bool         face_orientation,
+             const bool         face_flip,
+             const bool         face_rotation,
+             const unsigned int n_quadrature_points,
+             const internal::SubfaceCase<dim> ref_case=internal::SubfaceCase<dim>::case_isotropic);
+
+    /**
+     * Conversion operator to an integer denoting the offset of the first
+     * element of this dataset in the set of quadrature formulas all projected
+     * onto faces and subfaces. This conversion operator allows us to use
+     * offset descriptor objects in place of integer offsets.
+     */
+    operator unsigned int () const;
+
+  private:
+    /**
+     * Store the integer offset for a given cell, face, or subface.
+     */
+    const unsigned int dataset_offset;
+
+    /**
+     * This is the real constructor, but it is private and thus only available
+     * to the static member functions above.
+     */
+    DataSetDescriptor (const unsigned int dataset_offset);
+  };
+
+private:
+  /**
+   * Given a quadrature object in 2d, reflect all quadrature points at the
+   * main diagonal and return them with their original weights.
+   *
+   * This function is necessary for projecting a 2d quadrature rule onto the
+   * faces of a 3d cube, since there we need both orientations.
+   */
+  static Quadrature<2> reflect (const Quadrature<2> &q);
+
+  /**
+   * Given a quadrature object in 2d, rotate all quadrature points by @p
+   * n_times * 90 degrees counterclockwise and return them with their original
+   * weights.
+   *
+   * This function is necessary for projecting a 2d quadrature rule onto the
+   * faces of a 3d cube, since there we need all rotations to account for
+   * face_flip and face_rotation of non-standard faces.
+   */
+  static Quadrature<2> rotate (const Quadrature<2> &q,
+                               const unsigned int n_times);
+};
+
+/*@}*/
+
+
+// -------------------  inline and template functions ----------------
+
+
+
+template <int dim>
+inline
+QProjector<dim>::DataSetDescriptor::
+DataSetDescriptor (const unsigned int dataset_offset)
+  :
+  dataset_offset (dataset_offset)
+{}
+
+
+template <int dim>
+inline
+QProjector<dim>::DataSetDescriptor::
+DataSetDescriptor ()
+  :
+  dataset_offset (numbers::invalid_unsigned_int)
+{}
+
+
+
+template <int dim>
+typename QProjector<dim>::DataSetDescriptor
+QProjector<dim>::DataSetDescriptor::cell ()
+{
+  return 0;
+}
+
+
+
+template <int dim>
+inline
+QProjector<dim>::DataSetDescriptor::operator unsigned int () const
+{
+  return dataset_offset;
+}
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+
+template <>
+void
+QProjector<1>::project_to_face (const Quadrature<0> &,
+                                const unsigned int,
+                                std::vector<Point<1> > &);
+template <>
+void
+QProjector<2>::project_to_face (const Quadrature<1>      &quadrature,
+                                const unsigned int        face_no,
+                                std::vector<Point<2> >   &q_points);
+template <>
+void
+QProjector<3>::project_to_face (const Quadrature<2>    &quadrature,
+                                const unsigned int      face_no,
+                                std::vector<Point<3> > &q_points);
+
+template <>
+Quadrature<1>
+QProjector<1>::project_to_all_faces (const Quadrature<0> &quadrature);
+
+
+template <>
+void
+QProjector<1>::project_to_subface (const Quadrature<0> &,
+                                   const unsigned int,
+                                   const unsigned int,
+                                   std::vector<Point<1> > &,
+                                   const RefinementCase<0> &);
+template <>
+void
+QProjector<2>::project_to_subface (const Quadrature<1>    &quadrature,
+                                   const unsigned int      face_no,
+                                   const unsigned int      subface_no,
+                                   std::vector<Point<2> > &q_points,
+                                   const RefinementCase<1> &);
+template <>
+void
+QProjector<3>::project_to_subface (const Quadrature<2>       &quadrature,
+                                   const unsigned int         face_no,
+                                   const unsigned int         subface_no,
+                                   std::vector<Point<3> >    &q_points,
+                                   const RefinementCase<2> &face_ref_case);
+
+template <>
+Quadrature<1>
+QProjector<1>::project_to_all_subfaces (const Quadrature<0> &quadrature);
+
+
+template <>
+bool
+QIterated<1>::uses_both_endpoints (const Quadrature<1> &base_quadrature);
+
+template <>
+QIterated<1>::QIterated (const Quadrature<1> &base_quadrature,
+                         const unsigned int   n_copies);
+
+
+#endif // DOXYGEN
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/quadrature.h b/include/deal.II/base/quadrature.h
new file mode 100644
index 0000000..227fb4f
--- /dev/null
+++ b/include/deal.II/base/quadrature.h
@@ -0,0 +1,409 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__quadrature_h
+#define dealii__quadrature_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/subscriptor.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup Quadrature */
+/*@{*/
+
+/**
+ * Base class for quadrature formulae in arbitrary dimensions. This class
+ * stores quadrature points and weights on the unit line [0,1], unit square
+ * [0,1]x[0,1], etc.
+ *
+ * There are a number of derived classes, denoting concrete integration
+ * formulae. Their names names prefixed by <tt>Q</tt>. Refer to the list of
+ * derived classes for more details.
+ *
+ * The schemes for higher dimensions are typically tensor products of the one-
+ * dimensional formulae, but refer to the section on implementation detail
+ * below.
+ *
+ * In order to allow for dimension independent programming, a quadrature
+ * formula of dimension zero exists. Since an integral over zero dimensions is
+ * the evaluation at a single point, any constructor of such a formula
+ * initializes to a single quadrature point with weight one. Access to the
+ * weight is possible, while access to the quadrature point is not permitted,
+ * since a Point of dimension zero contains no information. The main purpose
+ * of these formulae is their use in QProjector, which will create a useful
+ * formula of dimension one out of them.
+ *
+ * <h3>Mathematical background</h3>
+ *
+ * For each quadrature formula we denote by <tt>m</tt>, the maximal degree of
+ * polynomials integrated exactly. This number is given in the documentation
+ * of each formula. The order of the integration error is <tt>m+1</tt>, that
+ * is, the error is the size of the cell to the <tt>m+1</tt> by the Bramble-
+ * Hilbert Lemma. The number <tt>m</tt> is to be found in the documentation of
+ * each concrete formula. For the optimal formulae QGauss we have $m = 2N-1$,
+ * where N is the constructor parameter to QGauss. The tensor product formulae
+ * are exact on tensor product polynomials of degree <tt>m</tt> in each space
+ * direction, but they are still only of <tt>m+1</tt>st order.
+ *
+ * <h3>Implementation details</h3>
+ *
+ * Most integration formulae in more than one space dimension are tensor
+ * products of quadrature formulae in one space dimension, or more generally
+ * the tensor product of a formula in <tt>(dim-1)</tt> dimensions and one in
+ * one dimension. There is a special constructor to generate a quadrature
+ * formula from two others.  For example, the QGauss@<dim@> formulae include
+ * <i>N<sup>dim</sup></i> quadrature points in <tt>dim</tt> dimensions, where
+ * N is the constructor parameter of QGauss.
+ *
+ * @note Instantiations for this template are provided for dimensions 0, 1, 2,
+ * and 3 (see the section on
+ * @ref Instantiations).
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, 1998, 1999, 2000, 2005, 2009
+ */
+template <int dim>
+class Quadrature : public Subscriptor
+{
+public:
+  /**
+   * Define a typedef for a quadrature that acts on an object of one dimension
+   * less. For cells, this would then be a face quadrature.
+   */
+  typedef Quadrature<dim-1> SubQuadrature;
+
+  /**
+   * Constructor.
+   *
+   * This constructor is marked as explicit to avoid involuntary accidents
+   * like in <code>hp::QCollection@<dim@> q_collection(3)</code> where
+   * <code>hp::QCollection@<dim@> q_collection(QGauss@<dim@>(3))</code> was
+   * meant.
+   */
+  explicit Quadrature (const unsigned int n_quadrature_points = 0);
+
+  /**
+   * Build this quadrature formula as the tensor product of a formula in a
+   * dimension one less than the present and a formula in one dimension.
+   *
+   * <tt>SubQuadrature<dim>::type</tt> expands to <tt>Quadrature<dim-1></tt>.
+   */
+  Quadrature (const SubQuadrature &,
+              const Quadrature<1> &);
+
+  /**
+   * Build this quadrature formula as the <tt>dim</tt>-fold tensor product of
+   * a formula in one dimension.
+   *
+   * Assuming that the points in the one-dimensional rule are in ascending
+   * order, the points of the resulting rule are ordered lexicographically
+   * with <i>x</i> running fastest.
+   *
+   * In order to avoid a conflict with the copy constructor in 1d, we let the
+   * argument be a 0d quadrature formula for dim==1, and a 1d quadrature
+   * formula for all other space dimensions.
+   */
+  explicit Quadrature (const Quadrature<dim != 1 ? 1 : 0> &quadrature_1d);
+
+  /**
+   * Copy constructor.
+   */
+  Quadrature (const Quadrature<dim> &q);
+
+  /**
+   * Construct a quadrature formula from given vectors of quadrature points
+   * (which should really be in the unit cell) and the corresponding weights.
+   * You will want to have the weights sum up to one, but this is not checked.
+   */
+  Quadrature (const std::vector<Point<dim> > &points,
+              const std::vector<double>      &weights);
+
+  /**
+   * Construct a dummy quadrature formula from a list of points, with weights
+   * set to infinity. The resulting object is therefore not meant to actually
+   * perform integrations, but rather to be used with FEValues objects in
+   * order to find the position of some points (the quadrature points in this
+   * object) on the transformed cell in real space.
+   */
+  Quadrature (const std::vector<Point<dim> > &points);
+
+  /**
+   * Constructor for a one-point quadrature. Sets the weight of this point to
+   * one.
+   */
+  Quadrature (const Point<dim> &point);
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~Quadrature ();
+
+  /**
+   * Assignment operator. Copies contents of #weights and #quadrature_points
+   * as well as size.
+   */
+  Quadrature &operator = (const Quadrature<dim> &);
+
+  /**
+   * Test for equality of two quadratures.
+   */
+  bool operator == (const Quadrature<dim> &p) const;
+
+  /**
+   * Set the quadrature points and weights to the values provided in the
+   * arguments.
+   */
+  void initialize(const std::vector<Point<dim> > &points,
+                  const std::vector<double>      &weights);
+
+  /**
+   * Number of quadrature points.
+   */
+  unsigned int size () const;
+
+  /**
+   * Return the <tt>i</tt>th quadrature point.
+   */
+  const Point<dim> &point (const unsigned int i) const;
+
+  /**
+   * Return a reference to the whole array of quadrature points.
+   */
+  const std::vector<Point<dim> > &get_points () const;
+
+  /**
+   * Return the weight of the <tt>i</tt>th quadrature point.
+   */
+  double weight (const unsigned int i) const;
+
+  /**
+   * Return a reference to the whole array of weights.
+   */
+  const std::vector<double> &get_weights () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Write or read the data of this object to or from a stream for the purpose
+   * of serialization.
+   */
+  template <class Archive>
+  void serialize (Archive &ar, const unsigned int version);
+
+protected:
+  /**
+   * List of quadrature points. To be filled by the constructors of derived
+   * classes.
+   */
+  std::vector<Point<dim> > quadrature_points;
+
+  /**
+   * List of weights of the quadrature points.  To be filled by the
+   * constructors of derived classes.
+   */
+  std::vector<double>      weights;
+};
+
+
+/**
+ * Quadrature formula implementing anisotropic distributions of quadrature
+ * points on the reference cell. To this end, the tensor product of
+ * <tt>dim</tt> one-dimensional quadrature formulas is generated.
+ *
+ * @note Each constructor can only be used in the dimension matching the
+ * number of arguments.
+ *
+ * @author Guido Kanschat, 2005
+ */
+template <int dim>
+class QAnisotropic : public Quadrature<dim>
+{
+public:
+  /**
+   * Constructor for a one-dimensional formula. This one just copies the given
+   * quadrature rule.
+   */
+  QAnisotropic(const Quadrature<1> &qx);
+
+  /**
+   * Constructor for a two-dimensional formula.
+   */
+  QAnisotropic(const Quadrature<1> &qx,
+               const Quadrature<1> &qy);
+
+  /**
+   * Constructor for a three-dimensional formula.
+   */
+  QAnisotropic(const Quadrature<1> &qx,
+               const Quadrature<1> &qy,
+               const Quadrature<1> &qz);
+};
+
+
+/**
+ * Quadrature formula constructed by iteration of another quadrature formula
+ * in each direction. In more than one space dimension, the resulting
+ * quadrature formula is constructed in the usual way by building the tensor
+ * product of the respective iterated quadrature formula in one space
+ * dimension.
+ *
+ * In one space dimension, the given base formula is copied and scaled onto a
+ * given number of subintervals of length <tt>1/n_copies</tt>. If the
+ * quadrature formula uses both end points of the unit interval, then in the
+ * interior of the iterated quadrature formula there would be quadrature
+ * points which are used twice; we merge them into one with a weight which is
+ * the sum of the weights of the left- and the rightmost quadrature point.
+ *
+ * Since all dimensions higher than one are built up by tensor products of one
+ * dimensional and <tt>dim-1</tt> dimensional quadrature formulae, the
+ * argument given to the constructor needs to be a quadrature formula in one
+ * space dimension, rather than in <tt>dim</tt> dimensions.
+ *
+ * The aim of this class is to provide a low order formula, where the error
+ * constant can be tuned by increasing the number of quadrature points. This
+ * is useful in integrating non-differentiable functions on cells.
+ *
+ * @author Wolfgang Bangerth 1999
+ */
+template <int dim>
+class QIterated : public Quadrature<dim>
+{
+public:
+  /**
+   * Constructor. Iterate the given quadrature formula <tt>n_copies</tt> times
+   * in each direction.
+   */
+  QIterated (const Quadrature<1> &base_quadrature,
+             const unsigned int   n_copies);
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcInvalidQuadratureFormula,
+                    "The quadrature formula you provided cannot be used "
+                    "as the basis for iteration.");
+private:
+  /**
+   * Check whether the given quadrature formula has quadrature points at the
+   * left and right end points of the interval.
+   */
+  static bool
+  uses_both_endpoints (const Quadrature<1> &base_quadrature);
+};
+
+
+
+/*@}*/
+
+#ifndef DOXYGEN
+
+// -------------------  inline and template functions ----------------
+
+
+template<int dim>
+inline
+unsigned int
+Quadrature<dim>::size () const
+{
+  return weights.size();
+}
+
+
+template <int dim>
+inline
+const Point<dim> &
+Quadrature<dim>::point (const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  return quadrature_points[i];
+}
+
+
+
+template <int dim>
+double
+Quadrature<dim>::weight (const unsigned int i) const
+{
+  AssertIndexRange(i, size());
+  return weights[i];
+}
+
+
+
+template <int dim>
+inline
+const std::vector<Point<dim> > &
+Quadrature<dim>::get_points () const
+{
+  return quadrature_points;
+}
+
+
+
+template <int dim>
+inline
+const std::vector<double> &
+Quadrature<dim>::get_weights () const
+{
+  return weights;
+}
+
+
+
+template <int dim>
+template <class Archive>
+inline
+void
+Quadrature<dim>::serialize (Archive &ar, const unsigned int)
+{
+  // forward to serialization
+  // function in the base class.
+  ar   &static_cast<Subscriptor &>(*this);
+
+  ar &quadrature_points &weights;
+}
+
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+template <>
+Quadrature<0>::Quadrature (const unsigned int);
+template <>
+Quadrature<0>::Quadrature (const Quadrature<-1> &,
+                           const Quadrature<1> &);
+template <>
+Quadrature<0>::Quadrature (const Quadrature<1> &);
+template <>
+Quadrature<0>::~Quadrature ();
+
+template <>
+Quadrature<1>::Quadrature (const Quadrature<0> &,
+                           const Quadrature<1> &);
+
+template <>
+Quadrature<1>::Quadrature (const Quadrature<0> &);
+
+#endif // DOXYGEN
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/quadrature_lib.h b/include/deal.II/base/quadrature_lib.h
new file mode 100644
index 0000000..1c39bbe
--- /dev/null
+++ b/include/deal.II/base/quadrature_lib.h
@@ -0,0 +1,659 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__quadrature_lib_h
+#define dealii__quadrature_lib_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/quadrature.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup Quadrature */
+/*@{*/
+
+/**
+ * The Gauss-Legendre family of quadrature rules for numerical integration.
+ *
+ * The coefficients of these quadrature rules are computed by the function
+ * described in <a
+ * href="http://en.wikipedia.org/wiki/Numerical_Recipes">Numerical
+ * Recipes</a>.
+ *
+ * @author Guido Kanschat, 2001
+ */
+template <int dim>
+class QGauss : public Quadrature<dim>
+{
+public:
+  /**
+   * Generate a formula with <tt>n</tt> quadrature points (in each space
+   * direction), exact for polynomials of degree <tt>2n-1</tt>.
+   */
+  QGauss (const unsigned int n);
+};
+
+
+/**
+ * The Gauss-Lobatto family of quadrature rules for numerical integration.
+ *
+ * This modification of the Gauss quadrature uses the two interval end points
+ * as well. Being exact for polynomials of degree <i>2n-3</i>, this formula is
+ * suboptimal by two degrees.
+ *
+ * The quadrature points are interval end points plus the roots of the
+ * derivative of the Legendre polynomial <i>P<sub>n-1</sub></i> of degree
+ * <i>n-1</i>. The quadrature weights are
+ * <i>2/(n(n-1)(P<sub>n-1</sub>(x<sub>i</sub>)<sup>2</sup>)</i>.
+ *
+ * @note This implementation has not been optimized concerning numerical
+ * stability and efficiency. It can be easily adapted to the general case of
+ * Gauss-Lobatto-Jacobi-Bouzitat quadrature with arbitrary parameters
+ * $\alpha$, $\beta$, of which the Gauss-Lobatto-Legendre quadrature ($\alpha
+ * = \beta = 0$) is a special case.
+ *
+ * @sa http://en.wikipedia.org/wiki/Handbook_of_Mathematical_Functions @sa
+ * Karniadakis, G.E. and Sherwin, S.J.: Spectral/hp element methods for
+ * computational fluid dynamics. Oxford: Oxford University Press, 2005
+ *
+ * @author Guido Kanschat, 2005, 2006; F. Prill, 2006
+ */
+template <int dim>
+class QGaussLobatto : public Quadrature<dim>
+{
+public:
+  /**
+   * Generate a formula with <tt>n</tt> quadrature points (in each space
+   * direction).
+   */
+  QGaussLobatto(const unsigned int n);
+
+protected:
+  /**
+   * Compute Legendre-Gauss-Lobatto quadrature points in the interval $[-1,
+   * +1]$. They are equal to the roots of the corresponding Jacobi polynomial
+   * (specified by @p alpha, @p beta).  @p q is the number of points.
+   *
+   * @return Vector containing nodes.
+   */
+  std::vector<long double>
+  compute_quadrature_points (const unsigned int q,
+                             const int alpha,
+                             const int beta) const;
+
+  /**
+   * Compute Legendre-Gauss-Lobatto quadrature weights. The quadrature points
+   * and weights are related to Jacobi polynomial specified by @p alpha, @p
+   * beta. @p x denotes the quadrature points.
+   *
+   * @return Vector containing weights.
+   */
+  std::vector<long double>
+  compute_quadrature_weights (const std::vector<long double> &x,
+                              const int alpha,
+                              const int beta) const;
+
+  /**
+   * Evaluate a Jacobi polynomial $ P^{\alpha, \beta}_n(x) $ specified by the
+   * parameters @p alpha, @p beta, @p n. Note: The Jacobi polynomials are not
+   * orthonormal and defined on the interval $[-1, +1]$. @p x is the point of
+   * evaluation.
+   */
+  long double JacobiP(const long double x,
+                      const int alpha,
+                      const int beta,
+                      const unsigned int n) const;
+
+  /**
+   * Evaluate the Gamma function $ \Gamma(n) = (n-1)! $.
+   * @param n  point of evaluation (integer).
+   */
+  long double gamma(const unsigned int n) const;
+};
+
+
+
+/**
+ * The midpoint rule for numerical quadrature. This one-point formula is exact
+ * for linear polynomials.
+ */
+template <int dim>
+class QMidpoint : public Quadrature<dim>
+{
+public:
+  QMidpoint ();
+};
+
+
+/**
+ * The Simpson rule for numerical quadrature. This formula with 3 quadrature
+ * points is exact for polynomials of degree 3.
+ */
+template <int dim>
+class QSimpson : public Quadrature<dim>
+{
+public:
+  QSimpson ();
+};
+
+
+
+/**
+ * The trapezoidal rule for numerical quadrature. This formula with two
+ * quadrature points is exact for linear polynomials.
+ *
+ * The class is poorly named since the proper name of the quadrature formula
+ * is "trapezoidal rule", or sometimes also called the "trapezoid rule". The
+ * misnomer results from the fact that its original authors' poor English
+ * language skills led them to translate the name incorrectly from the German
+ * "Trapezregel".
+ *
+ * @author Wolfgang Bangerth, 1998
+ */
+template <int dim>
+class QTrapez : public Quadrature<dim>
+{
+public:
+  QTrapez ();
+};
+
+
+
+/**
+ * The Milne rule for numerical quadrature formula. The Milne rule is a closed
+ * Newton-Cotes formula and is exact for polynomials of degree 5.
+ *
+ * @sa Stoer: Einführung in die Numerische Mathematik I, p. 102
+ */
+template <int dim>
+class QMilne : public Quadrature<dim>
+{
+public:
+  QMilne ();
+};
+
+
+/**
+ * The Weddle rule for numerical quadrature. The Weddle rule is a closed
+ * Newton-Cotes formula and is exact for polynomials of degree 7.
+ *
+ * @sa Stoer: Einführung in die Numerische Mathematik I, p. 102
+ */
+template <int dim>
+class QWeddle : public Quadrature<dim>
+{
+public:
+  QWeddle ();
+};
+
+
+
+/**
+ * A class for Gauss quadrature with logarithmic weighting function. This
+ * formula is used to integrate $\ln|x|\;f(x)$ on the interval $[0,1]$, where
+ * $f$ is a smooth function without singularities. The collection of
+ * quadrature points and weights has been obtained using <tt>Numerical
+ * Recipes</tt>.
+ *
+ * Notice that only the function $f(x)$ should be provided, i.e., $\int_0^1
+ * f(x) \ln|x| dx = \sum_{i=0}^N w_i f(q_i)$. Setting the @p revert flag to
+ * true at construction time switches the weight from $\ln|x|$ to $\ln|1-x|$.
+ *
+ * The weights and functions have been tabulated up to order 12.
+ */
+template <int dim>
+class QGaussLog : public Quadrature<dim>
+{
+public:
+  /**
+   * Generate a formula with <tt>n</tt> quadrature points
+   */
+  QGaussLog(const unsigned int n,
+            const bool revert=false);
+
+protected:
+  /**
+   * Sets the points of the quadrature formula.
+   */
+  std::vector<double>
+  set_quadrature_points(const unsigned int n) const;
+
+  /**
+   * Sets the weights of the quadrature formula.
+   */
+  std::vector<double>
+  set_quadrature_weights(const unsigned int n) const;
+
+};
+
+
+
+
+/**
+ * A class for Gauss quadrature with arbitrary logarithmic weighting function.
+ * This formula is used to to integrate $\ln(|x-x_0|/\alpha)\;f(x)$ on the
+ * interval $[0,1]$, where $f$ is a smooth function without singularities, and
+ * $x_0$ and $\alpha$ are given at construction time, and are the location of
+ * the singularity $x_0$ and an arbitrary scaling factor in the singularity.
+ *
+ * You have to make sure that the point $x_0$ is not one of the Gauss
+ * quadrature points of order $N$, otherwise an exception is thrown, since the
+ * quadrature weights cannot be computed correctly.
+ *
+ * This quadrature formula is rather expensive, since it uses internally two
+ * Gauss quadrature formulas of order n to integrate the nonsingular part of
+ * the factor, and two GaussLog quadrature formulas to integrate on the
+ * separate segments $[0,x_0]$ and $[x_0,1]$. If the singularity is one of the
+ * extremes and the factor alpha is 1, then this quadrature is the same as
+ * QGaussLog.
+ *
+ * The last argument from the constructor allows you to use this quadrature
+ * rule in one of two possible ways: \f[ \int_0^1 g(x) dx = \int_0^1 f(x)
+ * \ln\left(\frac{|x-x_0|}{\alpha}\right) dx = \sum_{i=0}^N w_i g(q_i) =
+ * \sum_{i=0}^N \bar{w}_i f(q_i) \f]
+ *
+ * Which one of the two sets of weights is provided, can be selected by the @p
+ * factor_out_singular_weight parameter. If it is false (the default), then
+ * the $\bar{w}_i$ weights are computed, and you should provide only the
+ * smooth function $f(x)$, since the singularity is included inside the
+ * quadrature. If the parameter is set to true, then the singularity is
+ * factored out of the quadrature formula, and you should provide a function
+ * $g(x)$, which should at least be similar to $\ln(|x-x_0|/\alpha)$.
+ *
+ * Notice that this quadrature rule is worthless if you try to use it for
+ * regular functions once you factored out the singularity.
+ *
+ * The weights and functions have been tabulated up to order 12.
+ */
+template <int dim>
+class QGaussLogR : public Quadrature<dim>
+{
+public:
+  /**
+   * The constructor takes four arguments: the order of the Gauss formula on
+   * each of the segments $[0,x_0]$ and $[x_0,1]$, the actual location of the
+   * singularity, the scale factor inside the logarithmic function and a flag
+   * that decides whether the singularity is left inside the quadrature
+   * formula or it is factored out, to be included in the integrand.
+   */
+  QGaussLogR(const unsigned int n,
+             const Point<dim> x0 = Point<dim>(),
+             const double alpha = 1,
+             const bool factor_out_singular_weight=false);
+
+protected:
+  /**
+   * This is the length of interval $(0,origin)$, or 1 if either of the two
+   * extremes have been selected.
+   */
+  const double fraction;
+};
+
+
+/**
+ * A class for Gauss quadrature with $1/R$ weighting function. This formula
+ * can be used to to integrate $1/R \ f(x)$ on the reference element
+ * $[0,1]^2$, where $f$ is a smooth function without singularities, and $R$ is
+ * the distance from the point $x$ to the vertex $\xi$, given at construction
+ * time by specifying its index. Notice that this distance is evaluated in the
+ * reference element.
+ *
+ * This quadrature formula is obtained from two QGauss quadrature formulas,
+ * upon transforming them into polar coordinate system centered at the
+ * singularity, and then again into another reference element. This allows for
+ * the singularity to be cancelled by part of the Jacobian of the
+ * transformation, which contains $R$. In practice the reference element is
+ * transformed into a triangle by collapsing one of the sides adjacent to the
+ * singularity. The Jacobian of this transformation contains $R$, which is
+ * removed before scaling the original quadrature, and this process is
+ * repeated for the next half element.
+ *
+ * Upon construction it is possible to specify whether we want the singularity
+ * removed, or not. In other words, this quadrature can be used to integrate
+ * $g(x) = 1/R\ f(x)$, or simply $f(x)$, with the $1/R$ factor already
+ * included in the quadrature weights.
+ */
+template <int dim>
+class QGaussOneOverR : public Quadrature<dim>
+{
+public:
+  /**
+   * This constructor takes three arguments: the order of the Gauss formula,
+   * the point of the reference element in which the singularity is located,
+   * and whether we include the weighting singular function inside the
+   * quadrature, or we leave it in the user function to be integrated.
+   *
+   * Traditionally, quadrature formulas include their weighting function, and
+   * the last argument is set to false by default. There are cases, however,
+   * where this is undesirable (for example when you only know that your
+   * singularity has the same order of 1/R, but cannot be written exactly in
+   * this way).
+   *
+   * In other words, you can use this function in either of the following way,
+   * obtaining the same result:
+   *
+   * @code
+   * QGaussOneOverR singular_quad(order, q_point, false);
+   * // This will produce the integral of f(x)/R
+   * for(unsigned int i=0; i<singular_quad.size(); ++i)
+   *   integral += f(singular_quad.point(i))*singular_quad.weight(i);
+   *
+   * // And the same here
+   * QGaussOneOverR singular_quad_noR(order, q_point, true);
+   *
+   * // This also will produce the integral of f(x)/R, but 1/R has to
+   * // be specified.
+   * for(unsigned int i=0; i<singular_quad.size(); ++i) {
+   *   double R = (singular_quad_noR.point(i)-cell->vertex(vertex_id)).norm();
+   *   integral += f(singular_quad_noR.point(i))*singular_quad_noR.weight(i)/R;
+   * }
+   * @endcode
+   */
+  QGaussOneOverR(const unsigned int n,
+                 const Point<dim> singularity,
+                 const bool factor_out_singular_weight=false);
+  /**
+   * The constructor takes three arguments: the order of the Gauss formula,
+   * the index of the vertex where the singularity is located, and whether we
+   * include the weighting singular function inside the quadrature, or we
+   * leave it in the user function to be integrated. Notice that this is a
+   * specialized version of the previous constructor which works only for the
+   * vertices of the quadrilateral.
+   *
+   * Traditionally, quadrature formulas include their weighting function, and
+   * the last argument is set to false by default. There are cases, however,
+   * where this is undesirable (for example when you only know that your
+   * singularity has the same order of 1/R, but cannot be written exactly in
+   * this way).
+   *
+   * In other words, you can use this function in either of the following way,
+   * obtaining the same result:
+   *
+   * @code
+   * QGaussOneOverR singular_quad(order, vertex_id, false);
+   * // This will produce the integral of f(x)/R
+   * for(unsigned int i=0; i<singular_quad.size(); ++i)
+   *   integral += f(singular_quad.point(i))*singular_quad.weight(i);
+   *
+   * // And the same here
+   * QGaussOneOverR singular_quad_noR(order, vertex_id, true);
+   *
+   * // This also will produce the integral of f(x)/R, but 1/R has to
+   * // be specified.
+   * for(unsigned int i=0; i<singular_quad.size(); ++i) {
+   *   double R = (singular_quad_noR.point(i)-cell->vertex(vertex_id)).norm();
+   *   integral += f(singular_quad_noR.point(i))*singular_quad_noR.weight(i)/R;
+   * }
+   * @endcode
+   */
+  QGaussOneOverR(const unsigned int n,
+                 const unsigned int vertex_index,
+                 const bool factor_out_singular_weight=false);
+private:
+  /**
+   * Given a quadrature point and a degree n, this function returns the size
+   * of the singular quadrature rule, considering whether the point is inside
+   * the cell, on an edge of the cell, or on a corner of the cell.
+   */
+  static unsigned int quad_size(const Point<dim> singularity,
+                                const unsigned int n);
+};
+
+
+
+/**
+ * Sorted Quadrature. Given an arbitrary quadrature formula, this class
+ * generates a quadrature formula where the quadrature points are ordered
+ * according the weights, from those with smaller corresponding weight, to
+ * those with higher corresponding weights. This might be necessary, for
+ * example, when integrating high order polynomials, since in these cases you
+ * might sum very big numbers with very small numbers, and summation is not
+ * stable if the numbers to sum are not close to each other.
+ */
+template <int dim>
+class QSorted : public Quadrature<dim>
+{
+public:
+  /**
+   * The constructor takes an arbitrary quadrature formula.
+   */
+  QSorted (const Quadrature<dim>);
+
+  /**
+   * A rule to reorder pairs of points and weights.
+   */
+  bool operator()(const std::pair<double, Point<dim> > &a,
+                  const std::pair<double, Point<dim> > &b);
+};
+
+/**
+ * Telles quadrature of arbitrary order.
+ *
+ * The coefficients of these quadrature rules are computed using a non linear
+ * change of variables starting from a Gauss-Legendre quadrature formula. This
+ * is done using a cubic polynomial, $n = a x^3 + b x^2 + c x + d$ in order to
+ * integrate a singular integral, with singularity at a given point x_0.
+ *
+ * We start from a Gauss Quadrature Formula with arbitrary function. Then we
+ * apply the cubic variable change. In the paper, J.C.F.Telles:A Self-Adaptive
+ * Co-ordinate Transformation For Efficient Numerical Evaluation of General
+ * Boundary Element Integrals. International Journal for Numerical Methods in
+ * Engineering, vol 24, pages 959–973. year 1987, the author applies the
+ * transformation on the reference cell $[-1, 1]$ getting
+ * @f{align*}{
+ * n(1) &= 1, \\ n(-1) &= -1, \\ \frac{dn}{dx} &= 0 \text{ at }
+ * x = x_0, \\ \frac{d^2n}{dx^2} &= 0 \text{ at  } x = x_0
+ * @f}
+ * We get
+ * @f{align*}{
+ * a &= \frac{1}{q}, \\
+ * b &= -3 \frac{\bar{\Gamma}}{q}, \\
+ * c &= 3 \frac{\bar{\Gamma}}{q}, \\
+ * d &= -b,
+ * @f}
+ * with
+ * @f{align*}{
+ * \eta^{*} &= \bar{\eta}^2 - 1, \\
+ * \bar{\Gamma}  &= \sqrt[3]{\bar{\eta} \eta^{*} + |\eta^{*} | }
+ *                  + \sqrt[3]{ \bar{\eta} \eta^{*} - |\eta^{*} | }
+ *                  + \bar{\eta}, \\
+ * q &= (\Gamma-\bar{\Gamma})^3 + \bar{\Gamma}
+ *      \frac{\bar{\Gamma}^2+3}{1+3\bar{\Gamma}^2}
+ * @f}
+ * Since the library assumes $[0,1]$ as reference interval, we will map these
+ * values on the proper reference interval in the implementation.
+ *
+ * This variable change can be used to integrate singular integrals. One
+ * example is $f(x)/|x-x_0|$ on the reference interval $[0,1]$, where $x_0$ is
+ * given at construction time, and is the location of the singularity $x_0$,
+ * and $f(x)$ is a smooth non singular function.
+ *
+ * Singular quadrature formula are rather expensive, nevertheless Telles'
+ * quadrature formula are much easier to compute with respect to other
+ * singular integration techniques as Lachat-Watson.
+ *
+ * We have implemented the case for $dim = 1$. When we deal the case $dim >1$
+ * we have computed the quadrature formula has a tensorial product of one
+ * dimensional Telles' quadrature formulas considering the different
+ * components of the singularity.
+ *
+ * The weights and functions for Gauss Legendre formula have been tabulated up
+ * to order 12.
+ *
+ * @author Nicola Giuliani, Luca Heltai 2015
+ */
+template <int dim>
+class QTelles: public Quadrature<dim>
+{
+public:
+  /**
+   * A constructor that takes a quadrature formula and a singular point as
+   * argument. The quadrature formula will be mapped using Telles' rule. Make
+   * sure that the order of the quadrature rule is appropriate for the
+   * singularity in question.
+   */
+  QTelles (const Quadrature<1> &base_quad, const Point<dim> &singularity);
+  /**
+   * A variant of above constructor that takes as parameters the order @p n
+   * and location of a singularity. A Gauss Legendre quadrature of order n
+   * will be used
+   */
+  QTelles (const unsigned int n, const Point<dim> &singularity);
+
+};
+
+/*@}*/
+
+/**
+ * Gauss-Chebyshev quadrature rules integrate the weighted product
+ * $\int_{-1}^1 f(x) w(x) dx$ with weight given by: $w(x) = 1/\sqrt{1-x^2}$.
+ * The nodes and weights are known analytically, and are exact for monomials
+ * up to the order $2n-1$, where $n$ is the number of quadrature points. Here
+ * we rescale the quadrature formula so that it is defined on the interval
+ * $[0,1]$ instead of $[-1,1]$. So the quadrature formulas integrate exactly
+ * the integral $\int_0^1 f(x) w(x) dx$ with the weight: $w(x) =
+ * 1/sqrt{x(1-x)}$. For details see: M. Abramowitz & I.A. Stegun: Handbook of
+ * Mathematical Functions, par. 25.4.38
+ *
+ * @author Giuseppe Pitton, Luca Heltai 2015
+ */
+template <int dim>
+class QGaussChebyshev : public Quadrature<dim>
+{
+public:
+  /// Generate a formula with <tt>n</tt> quadrature points
+  QGaussChebyshev(const unsigned int n);
+
+private:
+  /// Computes the points of the quadrature formula.
+  static std::vector<double>
+  get_quadrature_points(const unsigned int n);
+
+  /// Computes the weights of the quadrature formula.
+  static std::vector<double>
+  get_quadrature_weights(const unsigned int n);
+
+};
+
+
+/**
+ * Gauss-Radau-Chebyshev quadrature rules integrate the weighted product
+ * $\int_{-1}^1 f(x) w(x) dx$ with weight given by: $w(x) = 1/\sqrt{1-x^2}$
+ * with the additional constraint that a quadrature point lies at one of the
+ * two extrema of the interval. The nodes and weights are known analytically,
+ * and are exact for monomials up to the order $2n-2$, where $n$ is the number
+ * of quadrature points. Here we rescale the quadrature formula so that it is
+ * defined on the interval $[0,1]$ instead of $[-1,1]$. So the quadrature
+ * formulas integrate exactly the integral $\int_0^1 f(x) w(x) dx$ with the
+ * weight: $w(x) = 1/sqrt{x(1-x)}$. By default the quadrature is constructed
+ * with the left endpoint as quadrature node, but the quadrature node can be
+ * imposed at the right endpoint through the variable ep that can assume the
+ * values left or right.
+ *
+ * @author Giuseppe Pitton, Luca Heltai 2015
+ */
+template <int dim>
+class QGaussRadauChebyshev : public Quadrature<dim>
+{
+public:
+  /* EndPoint is used to specify which of the two endpoints of the unit interval
+   * is used also as quadrature point
+   */
+  enum EndPoint { left,right };
+  /// Generate a formula with <tt>n</tt> quadrature points
+  QGaussRadauChebyshev(const unsigned int n,
+                       EndPoint ep=QGaussRadauChebyshev::left);
+
+private:
+  const EndPoint ep;
+  /// Computes the points of the quadrature formula.
+  static std::vector<double>
+  get_quadrature_points(const unsigned int n, EndPoint ep);
+
+  /// Computes the weights of the quadrature formula.
+  static std::vector<double>
+  get_quadrature_weights(const unsigned int n, EndPoint ep);
+
+};
+
+/**
+ * Gauss-Lobatto-Chebyshev quadrature rules integrate the weighted product
+ * $\int_{-1}^1 f(x) w(x) dx$ with weight given by: $w(x) = 1/\sqrt{1-x^2}$,
+ * with the additional constraint that two of the quadrature points are
+ * located at the endpoints of the quadrature interval. The nodes and weights
+ * are known analytically, and are exact for monomials up to the order $2n-3$,
+ * where $n$ is the number of quadrature points. Here we rescale the
+ * quadrature formula so that it is defined on the interval $[0,1]$ instead of
+ * $[-1,1]$. So the quadrature formulas integrate exactly the integral
+ * $\int_0^1 f(x) w(x) dx$ with the weight: $w(x) = 1/sqrt{x(1-x)}$. For
+ * details see: M. Abramowitz & I.A. Stegun: Handbook of Mathematical
+ * Functions, par. 25.4.40
+ *
+ * @author Giuseppe Pitton, Luca Heltai 2015
+ */
+template <int dim>
+class QGaussLobattoChebyshev : public Quadrature<dim>
+{
+public:
+  /// Generate a formula with <tt>n</tt> quadrature points
+  QGaussLobattoChebyshev(const unsigned int n);
+
+private:
+  /// Computes the points of the quadrature formula.
+  static std::vector<double>
+  get_quadrature_points(const unsigned int n);
+
+  /// Computes the weights of the quadrature formula.
+  static std::vector<double>
+  get_quadrature_weights(const unsigned int n);
+
+};
+
+/* -------------- declaration of explicit specializations ------------- */
+
+template <> QGauss<1>::QGauss (const unsigned int n);
+template <> QGaussLobatto<1>::QGaussLobatto (const unsigned int n);
+template <>
+std::vector<long double> QGaussLobatto<1>::
+compute_quadrature_points(const unsigned int, const int, const int) const;
+template <>
+std::vector<long double> QGaussLobatto<1>::
+compute_quadrature_weights(const std::vector<long double> &, const int, const int) const;
+template <>
+long double QGaussLobatto<1>::
+JacobiP(const long double, const int, const int, const unsigned int) const;
+template <>
+long double
+QGaussLobatto<1>::gamma(const unsigned int n) const;
+
+template <> std::vector<double> QGaussLog<1>::set_quadrature_points(const unsigned int) const;
+template <> std::vector<double> QGaussLog<1>::set_quadrature_weights(const unsigned int) const;
+
+template <> QMidpoint<1>::QMidpoint ();
+template <> QTrapez<1>::QTrapez ();
+template <> QSimpson<1>::QSimpson ();
+template <> QMilne<1>::QMilne ();
+template <> QWeddle<1>::QWeddle ();
+template <> QGaussLog<1>::QGaussLog (const unsigned int n, const bool revert);
+template <> QGaussLogR<1>::QGaussLogR (const unsigned int n, const Point<1> x0, const double alpha, const bool flag);
+template <> QGaussOneOverR<2>::QGaussOneOverR (const unsigned int n, const unsigned int index, const bool flag);
+template <> QTelles<1>::QTelles(const Quadrature<1> &base_quad, const Point<1> &singularity);
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/quadrature_selector.h b/include/deal.II/base/quadrature_selector.h
new file mode 100644
index 0000000..025dc07
--- /dev/null
+++ b/include/deal.II/base/quadrature_selector.h
@@ -0,0 +1,104 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__quadrature_selector_h
+#define dealii__quadrature_selector_h
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/exceptions.h>
+
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class implements the quadrature rule passed to its constructor as a
+ * string. Supported quadratures are QGauss (of all orders), QMidpoint,
+ * QMilne, QSimpson, QTrapez and QWeddle.
+ *
+ * This class is useful if you want to use flexible quadrature rules, that are
+ * read from a parameter file (see ParameterHandler for this).
+ *
+ * @ingroup Quadrature
+ * @author Ralf Schulz, 2003
+ */
+template<int dim>
+class QuadratureSelector : public Quadrature<dim>
+{
+public:
+  /**
+   * Constructor. Takes the name of the quadrature rule (one of "gauss",
+   * "milne", "weddle", etc) and, if it is "gauss", the number of quadrature
+   * points in each coordinate direction.
+   */
+  QuadratureSelector (const std::string &s,
+                      const unsigned int order=0);
+
+  /**
+   * This function returns all possible names for quadratures as a list
+   * separated by <tt>|</tt>, so that you can use it for the definition of
+   * parameter files (see ParameterHandler for details).
+   */
+  static std::string get_quadrature_names();
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidQGaussOrder,
+                  int,
+                  << "You tried to generate a QGauss object with an invalid "
+                  << "number " << arg1
+                  << " of quadrature points in each coordinate "
+                  << "direction. This number must be greater than or equal "
+                  << "to 1.");
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidOrder,
+                  std::string,
+                  unsigned int,
+                  << "You tried to generate a " << arg1
+                  << " object; no order is needed for objects of this kind, but "
+                  << arg2 << " was given as argument.");
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidQuadrature,
+                  std::string,
+                  << arg1
+                  << " is not a valid name for a quadrature rule.");
+  //@}
+private:
+  /**
+   * This static function creates a quadrature object according to the name
+   * given as a string, and the appropriate order (if the name is "gauss"). It
+   * is called from the constructor.
+   */
+  static
+  Quadrature<dim>
+  create_quadrature (const std::string &s,
+                     const unsigned int order);
+};
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/revision.h.in b/include/deal.II/base/revision.h.in
new file mode 100644
index 0000000..7e8cc37
--- /dev/null
+++ b/include/deal.II/base/revision.h.in
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__revision_h
+#define dealii__revision_h
+
+/**
+ * Name of the local git branch of the source directory.
+ */
+#define DEAL_II_GIT_BRANCH "@DEAL_II_GIT_BRANCH@"
+
+/**
+ * Full sha1 revision of the current git HEAD.
+ */
+#define DEAL_II_GIT_REVISION "@DEAL_II_GIT_REVISION@"
+
+/**
+ * Short sha1 revision of the current git HEAD.
+ */
+#define DEAL_II_GIT_SHORTREV "@DEAL_II_GIT_SHORTREV@"
+
+#endif
diff --git a/include/deal.II/base/sacado_product_type.h b/include/deal.II/base/sacado_product_type.h
new file mode 100644
index 0000000..abd7ad1
--- /dev/null
+++ b/include/deal.II/base/sacado_product_type.h
@@ -0,0 +1,166 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sacado_product_type_h
+#define dealii__sacado_product_type_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/symmetric_tensor.h>
+#include <deal.II/base/template_constraints.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+#include "Sacado.hpp"
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename T>
+struct ProductType<Sacado::Fad::DFad<T>, float>
+{
+  typedef Sacado::Fad::DFad<T> type;
+};
+
+template <typename T>
+struct ProductType<float, Sacado::Fad::DFad<T> >
+{
+  typedef Sacado::Fad::DFad<T> type;
+};
+
+template <typename T>
+struct ProductType<Sacado::Fad::DFad<T>, double>
+{
+  typedef Sacado::Fad::DFad<T> type;
+};
+
+template <typename T>
+struct ProductType<double, Sacado::Fad::DFad<T> >
+{
+  typedef Sacado::Fad::DFad<T> type;
+};
+
+template <typename T>
+struct ProductType<Sacado::Fad::DFad<T>, int>
+{
+  typedef Sacado::Fad::DFad<T> type;
+};
+
+template <typename T>
+struct ProductType<int, Sacado::Fad::DFad<T> >
+{
+  typedef Sacado::Fad::DFad<T> type;
+};
+
+template <typename T, typename U>
+struct ProductType<Sacado::Fad::DFad<T>, Sacado::Fad::DFad<U> >
+{
+  typedef Sacado::Fad::DFad<typename ProductType<T,U>::type > type;
+};
+
+template <typename T>
+struct EnableIfScalar<Sacado::Fad::DFad<T> >
+{
+  typedef Sacado::Fad::DFad<T> type;
+};
+
+
+
+/**
+ * Compute the scalar product $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two
+ * tensors $a,b$ of rank 2. We don't use <code>operator*</code> for this
+ * operation since the product between two tensors is usually assumed to be
+ * the contraction over the last index of the first tensor and the first index
+ * of the second tensor, for example $(a\cdot b)_{ij}=\sum_k a_{ik}b_{kj}$.
+ *
+ * @relates Tensor @relates SymmetricTensor
+ */
+template <int dim, typename Number, typename T>
+inline
+Sacado::Fad::DFad<T>
+scalar_product (const SymmetricTensor<2,dim,Sacado::Fad::DFad<T> > &t1,
+                const Tensor<2,dim,Number> &t2)
+{
+  Sacado::Fad::DFad<T> s = 0;
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      s += t1[i][j] * t2[i][j];
+  return s;
+}
+
+
+/**
+ * Compute the scalar product $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two
+ * tensors $a,b$ of rank 2. We don't use <code>operator*</code> for this
+ * operation since the product between two tensors is usually assumed to be
+ * the contraction over the last index of the first tensor and the first index
+ * of the second tensor, for example $(a\cdot b)_{ij}=\sum_k a_{ik}b_{kj}$.
+ *
+ * @relates Tensor @relates SymmetricTensor
+ */
+template <int dim, typename Number, typename T >
+inline
+Sacado::Fad::DFad<T>
+scalar_product (const Tensor<2,dim,Number> &t1,
+                const SymmetricTensor<2,dim,Sacado::Fad::DFad<T> > &t2)
+{
+  return scalar_product(t2, t1);
+}
+
+
+/**
+ * Compute the scalar product $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two
+ * tensors $a,b$ of rank 2. We don't use <code>operator*</code> for this
+ * operation since the product between two tensors is usually assumed to be
+ * the contraction over the last index of the first tensor and the first index
+ * of the second tensor, for example $(a\cdot b)_{ij}=\sum_k a_{ik}b_{kj}$.
+ *
+ * @relates Tensor @relates SymmetricTensor
+ */
+template <int dim, typename Number, typename T>
+inline
+Sacado::Fad::DFad<T>
+scalar_product (const SymmetricTensor<2,dim,Number> &t1,
+                const Tensor<2,dim,Sacado::Fad::DFad<T> > &t2)
+{
+  Sacado::Fad::DFad<T> s = 0;
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      s += t1[i][j] * t2[i][j];
+  return s;
+}
+
+
+/**
+ * Compute the scalar product $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two
+ * tensors $a,b$ of rank 2. We don't use <code>operator*</code> for this
+ * operation since the product between two tensors is usually assumed to be
+ * the contraction over the last index of the first tensor and the first index
+ * of the second tensor, for example $(a\cdot b)_{ij}=\sum_k a_{ik}b_{kj}$.
+ *
+ * @relates Tensor @relates SymmetricTensor
+ */
+template <int dim, typename Number, typename T >
+inline
+Sacado::Fad::DFad<T>
+scalar_product (const Tensor<2,dim,Sacado::Fad::DFad<T> > &t1,
+                const SymmetricTensor<2,dim,Number> &t2)
+{
+  return scalar_product(t2, t1);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
+
+#endif
diff --git a/include/deal.II/base/signaling_nan.h b/include/deal.II/base/signaling_nan.h
new file mode 100644
index 0000000..6c9a423
--- /dev/null
+++ b/include/deal.II/base/signaling_nan.h
@@ -0,0 +1,209 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__signaling_nan_h
+#define dealii__signaling_nan_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/symmetric_tensor.h>
+#include <deal.II/base/derivative_form.h>
+
+#include <limits>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace numbers
+{
+
+  namespace internal
+  {
+    /**
+     * A namespace for the implementation of functions that create signaling
+     * NaN objects. This is where the Utilities::signaling_nan() function
+     * calls into.
+     */
+    namespace SignalingNaN
+    {
+      /**
+       * A general template for classes that know how to initialize objects of
+       * type @p T with signaling NaNs to denote invalid values.
+       *
+       * The real implementation of this class happens in (partial)
+       * specializations for particular values of the template argument @p T.
+       */
+      template <typename T>
+      struct NaNInitializer;
+
+
+      /**
+       * A specialization of the general NaNInitializer class that provides a
+       * function that returns a @p float value equal to the invalid signaling
+       * NaN.
+       */
+      template <>
+      struct NaNInitializer<float>
+      {
+        static float invalid_element ()
+        {
+          return std::numeric_limits<float>::signaling_NaN();
+        }
+      };
+
+
+      /**
+       * A specialization of the general NaNInitializer class that provides a
+       * function that returns a @p double value equal to the invalid
+       * signaling NaN.
+       */
+      template <>
+      struct NaNInitializer<double>
+      {
+        static double invalid_element ()
+        {
+          return std::numeric_limits<double>::signaling_NaN();
+        }
+      };
+
+
+      /**
+       * A specialization of the general NaNInitializer class that provides a
+       * function that returns a Tensor<1,dim> value whose components are
+       * invalid signaling NaN values.
+       */
+      template <int dim, typename T>
+      struct NaNInitializer<Tensor<1,dim,T> >
+      {
+        static Tensor<1,dim,T> invalid_element ()
+        {
+          Tensor<1,dim,T> nan_tensor;
+
+          for (unsigned int i=0; i<dim; ++i)
+            nan_tensor[i] = NaNInitializer<T>::invalid_element();
+
+          return nan_tensor;
+        }
+      };
+
+
+
+      /**
+       * A specialization of the general NaNInitializer class that provides a
+       * function that returns a Tensor<rank,dim> value whose components are
+       * invalid signaling NaN values.
+       */
+      template <int rank, int dim, typename T>
+      struct NaNInitializer<Tensor<rank,dim,T> >
+      {
+        static Tensor<rank,dim,T> invalid_element ()
+        {
+          Tensor<rank,dim,T> nan_tensor;
+
+          // recursively initialize sub-tensors with invalid elements
+          for (unsigned int i=0; i<dim; ++i)
+            nan_tensor[i] = NaNInitializer<Tensor<rank-1,dim,T> >::invalid_element();
+
+          return nan_tensor;
+        }
+      };
+
+
+
+      /**
+       * A specialization of the general NaNInitializer class that provides a
+       * function that returns a SymmetricTensor<rank,dim> value whose
+       * components are invalid signaling NaN values.
+       */
+      template <int rank, int dim, typename T>
+      struct NaNInitializer<SymmetricTensor<rank,dim,T> >
+      {
+        static SymmetricTensor<rank,dim,T> invalid_element ()
+        {
+          // initialize symmetric tensors via the unrolled list of elements
+          T initializers[SymmetricTensor<rank,dim,T>::n_independent_components];
+          for (unsigned int i=0; i<SymmetricTensor<rank,dim,T>::n_independent_components; ++i)
+            initializers[i] = NaNInitializer<T>::invalid_element();
+
+          return SymmetricTensor<rank,dim,T>(initializers);
+        }
+      };
+
+
+
+      /**
+       * A specialization of the general NaNInitializer class that provides a
+       * function that returns a DerivativeForm<order,dim,spacedim> value
+       * whose components are invalid signaling NaN values.
+       */
+      template <int order, int dim, int spacedim, typename T>
+      struct NaNInitializer<DerivativeForm<order,dim,spacedim,T> >
+      {
+        static DerivativeForm<order,dim,spacedim,T> invalid_element ()
+        {
+          DerivativeForm<order,dim,spacedim,T> form;
+
+          // recursively initialize sub-tensors with invalid elements
+          for (unsigned int i=0; i<spacedim; ++i)
+            form[i] = NaNInitializer<Tensor<order,dim,T> >::invalid_element();
+
+          return form;
+        }
+      };
+    }
+  }
+
+
+
+
+  /**
+   * Provide an object of type @p T filled with a signaling NaN that will
+   * cause an exception when used in a computation. The content of these
+   * objects is a "signaling NaN" ("NaN" stands for "not a number", and
+   * "signaling" implies that at least on platforms where this is supported,
+   * any arithmetic operation using them terminates the program). The purpose
+   * of such objects is to use them as markers for uninitialized objects and
+   * arrays that are required to be filled in other places, and to trigger an
+   * error when this later initialization does not happen before the first
+   * use.
+   *
+   * @tparam T The type of the returned invalid object. This type can either
+   * be a scalar, or of type Tensor, SymmetricTensor, or DerivativeForm. Other
+   * types may be supported if there is a corresponding specialization of the
+   * internal::SignalingNaN::NaNInitializer class for this type.
+   *
+   * @note Because the type @p T is not used as a function argument, the
+   * compiler cannot deduce it from the type of arguments. Consequently, you
+   * have to provide it explicitly. For example, the line
+   *   @code
+   *     Tensor<1,dim> tensor = Utilities::signaling_nan<Tensor<1,dim> >();
+   *   @endcode
+   * initializes a tensor with invalid values.
+   */
+  template <class T>
+  T
+  signaling_nan()
+  {
+    // dispatch to the classes in the internal namespace because there
+    // we can do partial specializations, which is not possible for
+    // template functions such as the current one
+    return internal::SignalingNaN::NaNInitializer<T>::invalid_element ();
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/smartpointer.h b/include/deal.II/base/smartpointer.h
new file mode 100644
index 0000000..3079f9e
--- /dev/null
+++ b/include/deal.II/base/smartpointer.h
@@ -0,0 +1,469 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__smartpointer_h
+#define dealii__smartpointer_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Smart pointers avoid destruction of an object in use. They can be used just
+ * like a pointer (i.e. using the <tt>*</tt> and <tt>-></tt> operators and
+ * through casting) but make sure that the object pointed to is not deleted in
+ * the course of use of the pointer by signaling the pointee its use.
+ *
+ * Objects pointed to, i.e. the class T, should inherit Subscriptor or must
+ * implement the same functionality. Null pointers are an exception from this
+ * rule and are allowed, too.
+ *
+ * The second template argument P only serves a single purpose: if a
+ * constructor without a debug string is used, then the name of P is used as
+ * the debug string.
+ *
+ * SmartPointer does NOT implement any memory handling! Especially, deleting a
+ * SmartPointer does not delete the object. Writing
+ * @code
+ * SmartPointer<T,P> dont_do_this = new T;
+ * @endcode
+ * is a sure way to program a memory leak! The secure version is
+ * @code
+ * T* p = new T;
+ * {
+ *   SmartPointer<T,P> t(p);
+ *   ...
+ * }
+ * delete p;
+ * @endcode
+ *
+ * Note that a smart pointer can handle <tt>const</tt>ness of an object, i.e.
+ * a <tt>SmartPointer<const ABC></tt> really behaves as if it were a pointer
+ * to a constant object (disallowing write access when dereferenced), while
+ * <tt>SmartPointer<ABC></tt> is a mutable pointer.
+ *
+ * @ingroup memory
+ * @author Guido Kanschat, Wolfgang Bangerth, 1998 - 2009
+ */
+template<typename T, typename P = void>
+class SmartPointer
+{
+public:
+  /**
+   * Standard constructor for null pointer. The id of this pointer is set to
+   * the name of the class P.
+   */
+  SmartPointer ();
+
+  /**
+   * Copy constructor for SmartPointer. We do not copy the object subscribed
+   * to from <tt>tt</tt>, but subscribe ourselves to it again.
+   */
+  template <class Q>
+  SmartPointer (const SmartPointer<T,Q> &tt);
+
+  /**
+   * Copy constructor for SmartPointer. We do not copy the object subscribed
+   * to from <tt>tt</tt>, but subscribe ourselves to it again.
+   */
+  SmartPointer (const SmartPointer<T,P> &tt);
+
+  /**
+   * Constructor taking a normal pointer.  If possible, i.e. if the pointer is
+   * not a null pointer, the constructor subscribes to the given object to
+   * lock it, i.e. to prevent its destruction before the end of its use.
+   *
+   * The <tt>id</tt> is used in the call to Subscriptor::subscribe(id) and by
+   * ~SmartPointer() in the call to Subscriptor::unsubscribe().
+   */
+  SmartPointer (T *t, const char *id);
+
+  /**
+   * Constructor taking a normal pointer.  If possible, i.e. if the pointer is
+   * not a null pointer, the constructor subscribes to the given object to
+   * lock it, i.e. to prevent its destruction before the end of its use. The
+   * id of this pointer is set to the name of the class P.
+   */
+  SmartPointer (T *t);
+
+
+  /**
+   * Destructor, removing the subscription.
+   */
+  ~SmartPointer();
+
+  /**
+   * Assignment operator for normal pointers. The pointer subscribes to the
+   * new object automatically and unsubscribes to an old one if it exists. It
+   * will not try to subscribe to a null-pointer, but still delete the old
+   * subscription.
+   */
+  SmartPointer<T,P> &operator= (T *tt);
+
+  /**
+   * Assignment operator for SmartPointer.  The pointer subscribes to the new
+   * object automatically and unsubscribes to an old one if it exists.
+   */
+  template <class Q>
+  SmartPointer<T,P> &operator= (const SmartPointer<T,Q> &tt);
+
+  /**
+   * Assignment operator for SmartPointer.  The pointer subscribes to the new
+   * object automatically and unsubscribes to an old one if it exists.
+   */
+  SmartPointer<T,P> &operator= (const SmartPointer<T,P> &tt);
+
+  /**
+   * Delete the object pointed to and set the pointer to zero.
+   */
+  void clear ();
+
+  /**
+   * Conversion to normal pointer.
+   */
+  operator T *() const;
+
+  /**
+   * Dereferencing operator. This operator throws an ExcNotInitialized if the
+   * pointer is a null pointer.
+   */
+  T &operator * () const;
+
+  /**
+   * Dereferencing operator. This operator throws an ExcNotInitialized if the
+   * pointer is a null pointer.
+   */
+  T *operator -> () const;
+
+  /**
+   * Exchange the pointers of this object and the argument. Since both the
+   * objects to which is pointed are subscribed to before and after, we do not
+   * have to change their subscription counters.
+   *
+   * Note that this function (with two arguments) and the respective functions
+   * where one of the arguments is a pointer and the other one is a C-style
+   * pointer are implemented in global namespace.
+   */
+  template <class Q>
+  void swap (SmartPointer<T,Q> &tt);
+
+  /**
+   * Swap pointers between this object and the pointer given. As this releases
+   * the object pointed to presently, we reduce its subscription count by one,
+   * and increase it at the object which we will point to in the future.
+   *
+   * Note that we indeed need a reference of a pointer, as we want to change
+   * the pointer variable which we are given.
+   */
+  void swap (T *&tt);
+
+  /**
+   * Return an estimate of the amount of memory (in bytes) used by this class.
+   * Note in particular, that this only includes the amount of memory used by
+   * <b>this</b> object, not by the object pointed to.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Pointer to the object we want to subscribe to. Since it is often
+   * necessary to follow this pointer when debugging, we have deliberately
+   * chosen a short name.
+   */
+  T *t;
+
+  /**
+   * The identification for the subscriptor.
+   */
+  const char *const id;
+};
+
+
+/* --------------------------- inline Template functions ------------------------------*/
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P>::SmartPointer ()
+  :
+  t (0), id(typeid(P).name())
+{}
+
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P>::SmartPointer (T *t)
+  :
+  t (t), id(typeid(P).name())
+{
+  if (t != 0)
+    t->subscribe(id);
+}
+
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P>::SmartPointer (T *t, const char *id)
+  :
+  t (t), id(id)
+{
+  if (t != 0)
+    t->subscribe(id);
+}
+
+
+
+template <typename T, typename P>
+template <class Q>
+inline
+SmartPointer<T,P>::SmartPointer (const SmartPointer<T,Q> &tt)
+  :
+  t (tt.t), id(tt.id)
+{
+  if (t != 0)
+    t->subscribe(id);
+}
+
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P>::SmartPointer (const SmartPointer<T,P> &tt)
+  :
+  t (tt.t), id(tt.id)
+{
+  if (t != 0)
+    t->subscribe(id);
+}
+
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P>::~SmartPointer ()
+{
+  if (t != 0)
+    t->unsubscribe(id);
+}
+
+
+
+template <typename T, typename P>
+inline
+void
+SmartPointer<T,P>::clear ()
+{
+  if (t != 0)
+    {
+      t->unsubscribe(id);
+      delete t;
+      t = 0;
+    }
+}
+
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P> &SmartPointer<T,P>::operator = (T *tt)
+{
+  // optimize if no real action is
+  // requested
+  if (t == tt)
+    return *this;
+
+  if (t != 0)
+    t->unsubscribe(id);
+  t = tt;
+  if (tt != 0)
+    tt->subscribe(id);
+  return *this;
+}
+
+
+
+template <typename T, typename P>
+template <class Q>
+inline
+SmartPointer<T,P> &
+SmartPointer<T,P>::operator = (const SmartPointer<T,Q> &tt)
+{
+  // if objects on the left and right
+  // hand side of the operator= are
+  // the same, then this is a no-op
+  if (&tt == this)
+    return *this;
+
+  if (t != 0)
+    t->unsubscribe(id);
+  t = static_cast<T *>(tt);
+  if (tt != 0)
+    tt->subscribe(id);
+  return *this;
+}
+
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P> &
+SmartPointer<T,P>::operator = (const SmartPointer<T,P> &tt)
+{
+  // if objects on the left and right
+  // hand side of the operator= are
+  // the same, then this is a no-op
+  if (&tt == this)
+    return *this;
+
+  if (t != 0)
+    t->unsubscribe(id);
+  t = static_cast<T *>(tt);
+  if (tt != 0)
+    tt->subscribe(id);
+  return *this;
+}
+
+
+
+template <typename T, typename P>
+inline
+SmartPointer<T,P>::operator T *() const
+{
+  return t;
+}
+
+
+
+template <typename T, typename P>
+inline
+T &SmartPointer<T,P>::operator * () const
+{
+  Assert(t != 0, ExcNotInitialized());
+  return *t;
+}
+
+
+
+template <typename T, typename P>
+inline
+T *SmartPointer<T,P>::operator -> () const
+{
+  Assert(t != 0, ExcNotInitialized());
+  return t;
+}
+
+
+
+template <typename T, typename P>
+template <class Q>
+inline
+void SmartPointer<T,P>::swap (SmartPointer<T,Q> &tt)
+{
+#ifdef DEBUG
+  SmartPointer<T,P> aux(t,id);
+  *this = tt;
+  tt = aux;
+#else
+  std::swap (t, tt.t);
+#endif
+}
+
+
+
+template <typename T, typename P>
+inline
+void SmartPointer<T,P>::swap (T *&tt)
+{
+  if (t != 0)
+    t->unsubscribe (id);
+
+  std::swap (t, tt);
+
+  if (t != 0)
+    t->subscribe (id);
+}
+
+
+
+template <typename T, typename P>
+inline
+std::size_t
+SmartPointer<T,P>::memory_consumption () const
+{
+  return sizeof(SmartPointer<T,P>);
+}
+
+
+
+// The following function is not strictly necessary but is an optimization
+// for places where you call swap(p1,p2) with SmartPointer objects p1, p2.
+// Unfortunately, MS Visual Studio (at least up to the 2013 edition) trips
+// over it when calling std::swap(v1,v2) where v1,v2 are std::vectors of
+// SmartPointer objects: it can't determine whether it should call std::swap
+// or dealii::swap on the individual elements (see bug #184 on our Google Code
+// site. Consequently, just take this function out of the competition for this
+// compiler.
+#ifndef _MSC_VER
+/**
+ * Global function to swap the contents of two smart pointers. As both objects
+ * to which the pointers point retain to be subscribed to, we do not have to
+ * change their subscription count.
+ */
+template <typename T, typename P, class Q>
+inline
+void swap (SmartPointer<T,P> &t1, SmartPointer<T,Q> &t2)
+{
+  t1.swap (t2);
+}
+#endif
+
+
+/**
+ * Global function to swap the contents of a smart pointer and a C-style
+ * pointer.
+ *
+ * Note that we indeed need a reference of a pointer, as we want to change the
+ * pointer variable which we are given.
+ */
+template <typename T, typename P>
+inline
+void swap (SmartPointer<T,P> &t1, T *&t2)
+{
+  t1.swap (t2);
+}
+
+
+
+/**
+ * Global function to swap the contents of a C-style pointer and a smart
+ * pointer.
+ *
+ * Note that we indeed need a reference of a pointer, as we want to change the
+ * pointer variable which we are given.
+ */
+template <typename T, typename P>
+inline
+void swap (T *&t1, SmartPointer<T,P> &t2)
+{
+  t2.swap (t1);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/array.h b/include/deal.II/base/std_cxx11/array.h
new file mode 100644
index 0000000..4590660
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/array.h
@@ -0,0 +1,49 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_array_h
+#define dealii__std_cxx11_array_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <array>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::array;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#include <boost/array.hpp>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::array;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/bind.h b/include/deal.II/base/std_cxx11/bind.h
new file mode 100644
index 0000000..b6e5470
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/bind.h
@@ -0,0 +1,92 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_bind_h
+#define dealii__std_cxx11_bind_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <functional>
+
+DEAL_II_NAMESPACE_OPEN
+// In boost, the placeholders _1, _2, ... are in the global namespace. In
+// C++11, they are in namespace std::placeholders, which makes them awkward to
+// use. Import them into the dealii::std_cxx11 namespace instead and do them
+// same below if we use boost instead. Namespace 'placeholders' is also defined
+// in dealii::std_cxx11 namespace to make code C++ standard compatible.
+// That is to say, if std::something works with C++11 standard,
+// then dealii::std_cxx11::something should also work.
+namespace std_cxx11
+{
+  using namespace std::placeholders;
+  using std::bind;
+  using std::ref;
+  using std::cref;
+  using std::reference_wrapper;
+
+  namespace placeholders = std::placeholders;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#include <boost/bind.hpp>
+
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::bind;
+  using boost::ref;
+  using boost::cref;
+  using boost::reference_wrapper;
+
+  // now also import the _1, _2 placeholders from the global namespace
+  // into the current one as suggested above
+  using ::_1;
+  using ::_2;
+  using ::_3;
+  using ::_4;
+  using ::_5;
+  using ::_6;
+  using ::_7;
+  using ::_8;
+  using ::_9;
+
+  namespace placeholders
+  {
+    using ::_1;
+    using ::_2;
+    using ::_3;
+    using ::_4;
+    using ::_5;
+    using ::_6;
+    using ::_7;
+    using ::_8;
+    using ::_9;
+  }
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/condition_variable.h b/include/deal.II/base/std_cxx11/condition_variable.h
new file mode 100644
index 0000000..1c1f4be
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/condition_variable.h
@@ -0,0 +1,53 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_condition_variable_h
+#define dealii__std_cxx11_condition_variable_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <condition_variable>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::condition_variable;
+  using std::unique_lock;
+  using std::adopt_lock;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#  include <boost/thread/condition_variable.hpp>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::condition_variable;
+  using boost::unique_lock;
+  using boost::adopt_lock;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/function.h b/include/deal.II/base/std_cxx11/function.h
new file mode 100644
index 0000000..ec61d05
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/function.h
@@ -0,0 +1,49 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_function_h
+#define dealii__std_cxx11_function_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <functional>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::function;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#include <boost/function.hpp>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::function;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/iterator.h b/include/deal.II/base/std_cxx11/iterator.h
new file mode 100644
index 0000000..66576e3
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/iterator.h
@@ -0,0 +1,44 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_iterator_h
+#define dealii__std_cxx11_iterator_h
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <iterator>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::begin;
+  using std::end;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#include <boost/range.hpp>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::begin;
+  using boost::end;
+}
+DEAL_II_NAMESPACE_CLOSE
+#endif
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/mutex.h b/include/deal.II/base/std_cxx11/mutex.h
new file mode 100644
index 0000000..a8f8142
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/mutex.h
@@ -0,0 +1,49 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_mutex_h
+#define dealii__std_cxx11_mutex_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <mutex>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::mutex;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#  include <boost/thread/mutex.hpp>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::mutex;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/shared_ptr.h b/include/deal.II/base/std_cxx11/shared_ptr.h
new file mode 100644
index 0000000..4c90c75
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/shared_ptr.h
@@ -0,0 +1,52 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_shared_ptr_h
+#define dealii__std_cxx11_shared_ptr_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <memory>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::shared_ptr;
+  using std::enable_shared_from_this;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#include <boost/shared_ptr.hpp>
+#include <boost/enable_shared_from_this.hpp>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::shared_ptr;
+  using boost::enable_shared_from_this;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/thread.h b/include/deal.II/base/std_cxx11/thread.h
new file mode 100644
index 0000000..9a754c4
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/thread.h
@@ -0,0 +1,52 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_thread_h
+#define dealii__std_cxx11_thread_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <thread>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::thread;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <boost/thread.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::thread;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/tuple.h b/include/deal.II/base/std_cxx11/tuple.h
new file mode 100644
index 0000000..89831a7
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/tuple.h
@@ -0,0 +1,72 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_tuple_h
+#define dealii__std_cxx11_tuple_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <tuple>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::tuple;
+  using std::make_tuple;
+  using std::get;
+  using std::tuple_size;
+  using std::tuple_element;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#include <boost/tuple/tuple.hpp>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::tuple;
+  using boost::make_tuple;
+  using boost::get;
+
+  // boost::tuples::length has been renamed
+  // by the standard to std::tuple_size
+  template <typename T>
+  struct tuple_size
+  {
+    static const std::size_t value = boost::tuples::length<T>::value;
+  };
+
+  // similarly, boost::tuples::element has
+  // been renamed by the standard to
+  // std::tuple_element
+  template <int N, typename T>
+  struct tuple_element
+  {
+    typedef typename boost::tuples::element<N,T>::type type;
+  };
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/type_traits.h b/include/deal.II/base/std_cxx11/type_traits.h
new file mode 100644
index 0000000..d9692f7
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/type_traits.h
@@ -0,0 +1,91 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_type_traits_h
+#define dealii__std_cxx11_type_traits_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <type_traits>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  // TODO: could fill up with more types from
+  // C++11 type traits
+  using std::is_fundamental;
+  using std::is_pod;
+  using std::is_pointer;
+  using std::is_standard_layout;
+  using std::is_trivial;
+  using std::enable_if;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/type_traits.hpp>
+#include <boost/version.hpp>
+#if BOOST_VERSION<105600
+#include <boost/utility/enable_if.hpp>
+#else
+#include <boost/core/enable_if.hpp>
+#endif
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using boost::is_fundamental;
+  using boost::is_pod;
+  using boost::is_pointer;
+
+  // boost::enable_if_c, *not* boost::enable_if, is equivalent to std::enable_if.
+  template <bool B, class T = void>
+  struct enable_if : public boost::enable_if_c<B, T>
+  {};
+
+  // boost does not have is_standard_layout and
+  // is_trivial, but those are both a subset of
+  // is_pod
+  template <typename T>
+  struct is_standard_layout
+  {
+    static const bool value = boost::is_pod<T>::value;
+  };
+
+  template <typename T>
+  struct is_trivial
+  {
+    static const bool value = boost::has_trivial_copy<T>::value &&
+                              boost::has_trivial_assign<T>::value &&
+                              boost::has_trivial_constructor<T>::value &&
+                              boost::has_trivial_destructor<T>::value;
+  };
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx11/unique_ptr.h b/include/deal.II/base/std_cxx11/unique_ptr.h
new file mode 100644
index 0000000..5476ec6
--- /dev/null
+++ b/include/deal.II/base/std_cxx11/unique_ptr.h
@@ -0,0 +1,79 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__std_cxx11_unique_ptr_h
+#define dealii__std_cxx11_unique_ptr_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#  include <memory>
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  using std::unique_ptr;
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#else
+
+#include <boost/scoped_ptr.hpp>
+
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx11
+{
+  /**
+   * Implementation of a basic replacement for C++11's std::unique_ptr class.
+   *
+   * BOOST does not have a replacement for std::unique_ptr (because unique_ptr
+   * requires move semantics that aren't available unless you have a C++11
+   * compiler -- in which case you also have std::unique_ptr; see for example
+   * http://stackoverflow.com/questions/2953530/unique-ptr-boost-equivalent)
+   *
+   * Consequently, we emulate the class by just wrapping a boost::scoped_ptr
+   * in the cheapest possible way -- by just deriving from it and repeating
+   * the basic constructors. Everything else is inherited from the scoped_ptr
+   * class.
+   *
+   * There is no overhead to this approach: scoped_ptr cannot be copied or
+   * moved. Instances of unique_ptr cannot be copied, and if you do not have a
+   * C++11 compiler, then you cannot move anything anyway.
+   */
+  template <typename T>
+  class unique_ptr : public boost::scoped_ptr<T>
+  {
+  public:
+    unique_ptr () {}
+
+    template<class Y>
+    explicit unique_ptr (Y *p)
+      :
+      boost::scoped_ptr<T>(p)
+    {}
+  };
+
+}
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/std_cxx1x/array.h b/include/deal.II/base/std_cxx1x/array.h
new file mode 100644
index 0000000..c8d60e6
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/array.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/array.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/bind.h b/include/deal.II/base/std_cxx1x/bind.h
new file mode 100644
index 0000000..c434b64
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/bind.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/bind.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/condition_variable.h b/include/deal.II/base/std_cxx1x/condition_variable.h
new file mode 100644
index 0000000..293676b
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/condition_variable.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/condition_variable.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/function.h b/include/deal.II/base/std_cxx1x/function.h
new file mode 100644
index 0000000..ad4104b
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/function.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/function.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/mutex.h b/include/deal.II/base/std_cxx1x/mutex.h
new file mode 100644
index 0000000..d5f05b2
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/mutex.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/mutex.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/shared_ptr.h b/include/deal.II/base/std_cxx1x/shared_ptr.h
new file mode 100644
index 0000000..7eecd04
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/shared_ptr.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/shared_ptr.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/thread.h b/include/deal.II/base/std_cxx1x/thread.h
new file mode 100644
index 0000000..fa40151
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/thread.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/thread.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/tuple.h b/include/deal.II/base/std_cxx1x/tuple.h
new file mode 100644
index 0000000..d19478c
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/tuple.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/tuple.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/std_cxx1x/type_traits.h b/include/deal.II/base/std_cxx1x/type_traits.h
new file mode 100644
index 0000000..de7ec25
--- /dev/null
+++ b/include/deal.II/base/std_cxx1x/type_traits.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// this file is deprecated. simply include the one we use now
+#include "../std_cxx11/type_traits.h"
+
+// then allow using the old namespace name instead of the new one
+DEAL_II_NAMESPACE_OPEN
+namespace std_cxx1x = std_cxx11;
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/base/subscriptor.h b/include/deal.II/base/subscriptor.h
new file mode 100644
index 0000000..17023d8
--- /dev/null
+++ b/include/deal.II/base/subscriptor.h
@@ -0,0 +1,215 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__subscriptor_h
+#define dealii__subscriptor_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <typeinfo>
+#include <map>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Handling of subscriptions.
+ *
+ * This class, as a base class, allows to keep track of other objects using a
+ * specific object. It is used, when an object, given to a constructor by
+ * reference, is stored. Then, the original object may not be deleted before
+ * the dependent object is deleted. You can assert this constraint by letting
+ * the object passed be derived from this class and let the user subscribe()
+ * to this object. The destructor the used object inherits from the
+ * Subscriptor class then will lead to an error when destruction is attempted
+ * while there are still subscriptions.
+ *
+ * The utility of this class is even enhanced by providing identifying strings
+ * to the functions subscribe() and unsubscribe(). In case of a hanging
+ * subscription during destruction, this string will be listed in the
+ * exception's message. For reasons of efficiency, these strings are handled
+ * as <tt>const char*</tt>. Therefore, the pointers provided to subscribe()
+ * and to unsubscribe() must be the same. Strings with equal contents will not
+ * be recognized to be the same. The handling in SmartPointer will take care
+ * of this.
+ *
+ * @note Due to a problem with <tt>volatile</tt> declarations, this additional
+ * feature is switched off if multithreading is used.
+ *
+ * @ingroup memory
+ * @author Guido Kanschat, 1998 - 2005
+ */
+class Subscriptor
+{
+public:
+  /**
+   * Constructor setting the counter to zero.
+   */
+  Subscriptor();
+
+  /**
+   * Copy-constructor.
+   *
+   * The counter of the copy is zero, since references point to the original
+   * object.
+   */
+  Subscriptor(const Subscriptor &);
+
+  /**
+   * Destructor, asserting that the counter is zero.
+   */
+  virtual ~Subscriptor();
+
+  /**
+   * Assignment operator.
+   *
+   * This has to be handled with care, too, because the counter has to remain
+   * the same. It therefore does nothing more than returning <tt>*this</tt>.
+   */
+  Subscriptor &operator = (const Subscriptor &);
+
+  /**
+   * Subscribes a user of the object. The subscriber may be identified by text
+   * supplied as <tt>identifier</tt>.
+   */
+  void subscribe (const char *identifier = 0) const;
+
+  /**
+   * Unsubscribes a user from the object.
+   *
+   * @note The <tt>identifier</tt> must be the <b>same pointer</b> as the one
+   * supplied to subscribe(), not just the same text.
+   */
+  void unsubscribe (const char *identifier = 0) const;
+
+  /**
+   * Return the present number of subscriptions to this object. This allows to
+   * use this class for reference counted lifetime determination where the
+   * last one to unsubscribe also deletes the object.
+   */
+  unsigned int n_subscriptions () const;
+
+  /**
+   * List the subscribers to @p deallog.
+   */
+  void list_subscribers () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception: Object may not be deleted, since it is used.
+   */
+  DeclException3(ExcInUse,
+                 int, char *, std::string &,
+                 << "Object of class " << arg2
+                 << " is still used by " << arg1 << " other objects."
+                 << "\n\n"
+                 << "(Additional information: " << arg3 << ")\n\n"
+                 << "See the entry in the Frequently Asked Questions of "
+                 << "deal.II (linked to from http://www.dealii.org/) for "
+                 << "a lot more information on what this error means and "
+                 << "how to fix programs in which it happens.");
+
+  /**
+   * A subscriber with the identification string given to
+   * Subscriptor::unsubscribe() did not subscribe to the object.
+   */
+  DeclException2(ExcNoSubscriber, char *, char *,
+                 << "No subscriber with identifier <" << arg2
+                 << "> subscribes to this object of class " << arg1
+                 << ". Consequently, it cannot be unsubscribed.");
+  //@}
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization.
+   *
+   * This function does not actually serialize any of the member variables of
+   * this class. The reason is that what this class stores is only who
+   * subscribes to this object, but who does so at the time of storing the
+   * contents of this object does not necessarily have anything to do with who
+   * subscribes to the object when it is restored. Consequently, we do not
+   * want to overwrite the subscribers at the time of restoring, and then
+   * there is no reason to write the subscribers out in the first place.
+   */
+  template <class Archive>
+  void serialize(Archive &ar, const unsigned int version);
+
+private:
+  /**
+   * The data type used in #counter_map.
+   */
+  typedef std::map<const char *, unsigned int>::value_type
+  map_value_type;
+
+  /**
+   * The iterator type used in #counter_map.
+   */
+  typedef std::map<const char *, unsigned int>::iterator
+  map_iterator;
+
+  /**
+   * Store the number of objects which subscribed to this object. Initially,
+   * this number is zero, and upon destruction it shall be zero again (i.e.
+   * all objects which subscribed should have unsubscribed again).
+   *
+   * The creator (and owner) of an object is counted in the map below if HE
+   * manages to supply identification.
+   *
+   * We use the <tt>mutable</tt> keyword in order to allow subscription to
+   * constant objects also.
+   *
+   * In multithreaded mode, this counter may be modified by different threads.
+   * We thus have to mark it <tt>volatile</tt>. However, this is counter-
+   * productive in non-MT mode since it may pessimize code. So use the macro
+   * defined in <tt>deal.II/base/config.h</tt> to selectively add volatility.
+   */
+  mutable DEAL_VOLATILE unsigned int counter;
+
+  /**
+   * In this map, we count subscriptions for each different identification
+   * string supplied to subscribe().
+   */
+  mutable std::map<const char *, unsigned int> counter_map;
+
+  /**
+   * Pointer to the typeinfo object of this object, from which we can later
+   * deduce the class name. Since this information on the derived class is
+   * neither available in the destructor, nor in the constructor, we obtain it
+   * in between and store it here.
+   */
+  mutable const std::type_info *object_info;
+};
+
+//---------------------------------------------------------------------------
+
+template <class Archive>
+inline
+void
+Subscriptor::serialize(Archive &,
+                       const unsigned int)
+{
+  // do nothing, as explained in the
+  // documentation of this function
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/symmetric_tensor.h b/include/deal.II/base/symmetric_tensor.h
new file mode 100644
index 0000000..4743fd2
--- /dev/null
+++ b/include/deal.II/base/symmetric_tensor.h
@@ -0,0 +1,3156 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__symmetric_tensor_h
+#define dealii__symmetric_tensor_h
+
+
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/table_indices.h>
+#include <deal.II/base/template_constraints.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int rank, int dim, typename Number=double> class SymmetricTensor;
+
+template <int dim, typename Number> SymmetricTensor<2,dim,Number>
+unit_symmetric_tensor ();
+template <int dim, typename Number> SymmetricTensor<4,dim,Number>
+deviator_tensor ();
+template <int dim, typename Number> SymmetricTensor<4,dim,Number>
+identity_tensor ();
+template <int dim, typename Number> SymmetricTensor<4,dim,Number>
+invert (const SymmetricTensor<4,dim,Number> &);
+template <int dim2, typename Number> Number
+trace (const SymmetricTensor<2,dim2,Number> &);
+
+template <int dim, typename Number> SymmetricTensor<2,dim,Number>
+deviator (const SymmetricTensor<2,dim,Number> &);
+template <int dim, typename Number> Number
+determinant (const SymmetricTensor<2,dim,Number> &);
+
+
+
+namespace internal
+{
+  /**
+   * A namespace for classes that are internal to how the SymmetricTensor
+   * class works.
+   */
+  namespace SymmetricTensorAccessors
+  {
+    /**
+     * Create a TableIndices<2> object where the first entries up to
+     * <tt>position-1</tt> are taken from previous_indices, and new_index is
+     * put at position <tt>position</tt>. The remaining indices remain in
+     * invalid state.
+     */
+    inline
+    TableIndices<2> merge (const TableIndices<2> &previous_indices,
+                           const unsigned int     new_index,
+                           const unsigned int     position)
+    {
+      Assert (position < 2, ExcIndexRange (position, 0, 2));
+
+      if (position == 0)
+        return TableIndices<2>(new_index);
+      else
+        return TableIndices<2>(previous_indices[0], new_index);
+    }
+
+
+
+    /**
+     * Create a TableIndices<4> object where the first entries up to
+     * <tt>position-1</tt> are taken from previous_indices, and new_index is
+     * put at position <tt>position</tt>. The remaining indices remain in
+     * invalid state.
+     */
+    inline
+    TableIndices<4> merge (const TableIndices<4> &previous_indices,
+                           const unsigned int     new_index,
+                           const unsigned int     position)
+    {
+      Assert (position < 4, ExcIndexRange (position, 0, 4));
+
+      switch (position)
+        {
+        case 0:
+          return TableIndices<4>(new_index);
+        case 1:
+          return TableIndices<4>(previous_indices[0],
+                                 new_index);
+        case 2:
+          return TableIndices<4>(previous_indices[0],
+                                 previous_indices[1],
+                                 new_index);
+        case 3:
+          return TableIndices<4>(previous_indices[0],
+                                 previous_indices[1],
+                                 previous_indices[2],
+                                 new_index);
+        }
+      Assert (false, ExcInternalError());
+      return TableIndices<4>();
+    }
+
+
+    /**
+     * Typedef template magic denoting the result of a double contraction
+     * between two tensors or ranks rank1 and rank2. In general, this is a
+     * tensor of rank <tt>rank1+rank2-4</tt>, but if this is zero it is a
+     * single scalar Number. For this case, we have a specialization.
+     *
+     * @author Wolfgang Bangerth, 2005
+     */
+    template <int rank1, int rank2, int dim, typename Number>
+    struct double_contraction_result
+    {
+      typedef ::dealii::SymmetricTensor<rank1+rank2-4,dim,Number> type;
+    };
+
+
+    /**
+     * Typedef template magic denoting the result of a double contraction
+     * between two tensors or ranks rank1 and rank2. In general, this is a
+     * tensor of rank <tt>rank1+rank2-4</tt>, but if this is zero it is a
+     * single scalar Number. For this case, we have a specialization.
+     *
+     * @author Wolfgang Bangerth, 2005
+     */
+    template <int dim, typename Number>
+    struct double_contraction_result<2,2,dim,Number>
+    {
+      typedef Number type;
+    };
+
+
+
+    /**
+     * Declaration of typedefs for the type of data structures which are used
+     * to store symmetric tensors. For example, for rank-2 symmetric tensors,
+     * we use a flat vector to store all the elements. On the other hand,
+     * symmetric rank-4 tensors are mappings from symmetric rank-2 tensors
+     * into symmetric rank-2 tensors, so they can be represented as matrices,
+     * etc.
+     *
+     * This information is probably of little interest to all except the
+     * accessor classes that need it. In particular, you shouldn't make any
+     * assumptions about the storage format in your application programs.
+     */
+    template <int rank, int dim, typename Number>
+    struct StorageType;
+
+    /**
+     * Specialization of StorageType for rank-2 tensors.
+     */
+    template <int dim, typename Number>
+    struct StorageType<2,dim,Number>
+    {
+      /**
+       * Number of independent components of a symmetric tensor of rank 2. We
+       * store only the upper right half of it.
+       */
+      static const unsigned int
+      n_independent_components = (dim*dim + dim)/2;
+
+      /**
+       * Declare the type in which we actually store the data.
+       */
+      typedef Tensor<1,n_independent_components,Number> base_tensor_type;
+    };
+
+
+
+    /**
+     * Specialization of StorageType for rank-4 tensors.
+     */
+    template <int dim, typename Number>
+    struct StorageType<4,dim,Number>
+    {
+      /**
+       * Number of independent components of a symmetric tensor of rank 2.
+       * Since rank-4 tensors are mappings between such objects, we need this
+       * information.
+       */
+      static const unsigned int
+      n_rank2_components = (dim*dim + dim)/2;
+
+      /**
+       * Number of independent components of a symmetric tensor of rank 4.
+       */
+      static const unsigned int
+      n_independent_components = (n_rank2_components *
+                                  StorageType<2,dim,Number>::n_independent_components);
+
+      /**
+       * Declare the type in which we actually store the data. Symmetric
+       * rank-4 tensors are mappings between symmetric rank-2 tensors, so we
+       * can represent the data as a matrix if we represent the rank-2 tensors
+       * as vectors.
+       */
+      typedef Tensor<2,n_rank2_components,Number> base_tensor_type;
+    };
+
+
+
+    /**
+     * Switch type to select a tensor of rank 2 and dimension <tt>dim</tt>,
+     * switching on whether the tensor should be constant or not.
+     */
+    template <int rank, int dim, bool constness, typename Number>
+    struct AccessorTypes;
+
+    /**
+     * Switch type to select a tensor of rank 2 and dimension <tt>dim</tt>,
+     * switching on whether the tensor should be constant or not.
+     *
+     * Specialization for constant tensors.
+     */
+    template <int rank, int dim, typename Number>
+    struct AccessorTypes<rank,dim,true,Number>
+    {
+      typedef const ::dealii::SymmetricTensor<rank,dim,Number> tensor_type;
+
+      typedef Number reference;
+    };
+
+    /**
+     * Switch type to select a tensor of rank 2 and dimension <tt>dim</tt>,
+     * switching on whether the tensor should be constant or not.
+     *
+     * Specialization for non-constant tensors.
+     */
+    template <int rank, int dim, typename Number>
+    struct AccessorTypes<rank,dim,false,Number>
+    {
+      typedef ::dealii::SymmetricTensor<rank,dim,Number> tensor_type;
+
+      typedef Number &reference;
+    };
+
+
+    /**
+     * @internal
+     *
+     * Class that acts as accessor to elements of type SymmetricTensor. The
+     * template parameter <tt>C</tt> may be either true or false, and
+     * indicates whether the objects worked on are constant or not (i.e. write
+     * access is only allowed if the value is false).
+     *
+     * Since with <tt>N</tt> indices, the effect of applying
+     * <tt>operator[]</tt> is getting access to something we <tt>N-1</tt>
+     * indices, we have to implement these accessor classes recursively, with
+     * stopping when we have only one index left. For the latter case, a
+     * specialization of this class is declared below, where calling
+     * <tt>operator[]</tt> gives you access to the objects actually stored by
+     * the tensor; the tensor class also makes sure that only those elements
+     * are actually accessed which we actually store, i.e. it reorders indices
+     * if necessary. The template parameter <tt>P</tt> indicates how many
+     * remaining indices there are. For a rank-2 tensor, <tt>P</tt> may be
+     * two, and when using <tt>operator[]</tt>, an object with <tt>P=1</tt>
+     * emerges.
+     *
+     * As stated for the entire namespace, you will not usually have to do
+     * with these classes directly, and should not try to use their interface
+     * directly as it may change without notice. In fact, since the
+     * constructors are made private, you will not even be able to generate
+     * objects of this class, as they are only thought as temporaries for
+     * access to elements of the table class, not for passing them around as
+     * arguments of functions, etc.
+     *
+     * This class is an adaptation of a similar class used for the Table
+     * class.
+     *
+     * @author Wolfgang Bangerth, 2002, 2005
+     */
+    template <int rank, int dim, bool constness, int P, typename Number>
+    class Accessor
+    {
+    public:
+      /**
+       * Import two typedefs from the switch class above.
+       */
+      typedef typename AccessorTypes<rank,dim,constness,Number>::reference reference;
+      typedef typename AccessorTypes<rank,dim,constness,Number>::tensor_type tensor_type;
+
+    private:
+      /**
+       * Constructor. Take a reference to the tensor object which we will
+       * access.
+       *
+       * The second argument denotes the values of previous indices into the
+       * tensor. For example, for a rank-4 tensor, if P=2, then we will
+       * already have had two successive element selections (e.g. through
+       * <tt>tensor[1][2]</tt>), and the two index values have to be stored
+       * somewhere. This class therefore only makes use of the first rank-P
+       * elements of this array, but passes it on to the next level with P-1
+       * which fills the next entry, and so on.
+       *
+       * The constructor is made private in order to prevent you having such
+       * objects around. The only way to create such objects is via the
+       * <tt>Table</tt> class, which only generates them as temporary objects.
+       * This guarantees that the accessor objects go out of scope earlier
+       * than the mother object, avoid problems with data consistency.
+       */
+      Accessor (tensor_type              &tensor,
+                const TableIndices<rank> &previous_indices);
+
+      /**
+       * Default constructor. Not needed, and invisible, so private.
+       */
+      Accessor ();
+
+      /**
+       * Copy constructor. Not needed, and invisible, so private.
+       */
+      Accessor (const Accessor &a);
+
+    public:
+
+      /**
+       * Index operator.
+       */
+      Accessor<rank,dim,constness,P-1,Number> operator [] (const unsigned int i);
+
+    private:
+      /**
+       * Store the data given to the constructor.
+       */
+      tensor_type             &tensor;
+      const TableIndices<rank> previous_indices;
+
+      // declare some other classes
+      // as friends. make sure to
+      // work around bugs in some
+      // compilers
+      template <int,int,typename> friend class dealii::SymmetricTensor;
+      template <int,int,bool,int,typename>
+      friend class Accessor;
+#  ifndef DEAL_II_TEMPL_SPEC_FRIEND_BUG
+      friend class ::dealii::SymmetricTensor<rank,dim,Number>;
+      friend class Accessor<rank,dim,constness,P+1,Number>;
+#  endif
+    };
+
+
+
+    /**
+     * @internal Accessor class for SymmetricTensor. This is the
+     * specialization for the last index, which actually allows access to the
+     * elements of the table, rather than recursively returning access objects
+     * for further subsets. The same holds for this specialization as for the
+     * general template; see there for more information.
+     *
+     * @author Wolfgang Bangerth, 2002, 2005
+     */
+    template <int rank, int dim, bool constness, typename Number>
+    class Accessor<rank,dim,constness,1,Number>
+    {
+    public:
+      /**
+       * Import two typedefs from the switch class above.
+       */
+      typedef typename AccessorTypes<rank,dim,constness,Number>::reference reference;
+      typedef typename AccessorTypes<rank,dim,constness,Number>::tensor_type tensor_type;
+
+    private:
+      /**
+       * Constructor. Take a reference to the tensor object which we will
+       * access.
+       *
+       * The second argument denotes the values of previous indices into the
+       * tensor. For example, for a rank-4 tensor, if P=2, then we will
+       * already have had two successive element selections (e.g. through
+       * <tt>tensor[1][2]</tt>), and the two index values have to be stored
+       * somewhere. This class therefore only makes use of the first rank-P
+       * elements of this array, but passes it on to the next level with P-1
+       * which fills the next entry, and so on.
+       *
+       * For this particular specialization, i.e. for P==1, all but the last
+       * index are already filled.
+       *
+       * The constructor is made private in order to prevent you having such
+       * objects around. The only way to create such objects is via the
+       * <tt>Table</tt> class, which only generates them as temporary objects.
+       * This guarantees that the accessor objects go out of scope earlier
+       * than the mother object, avoid problems with data consistency.
+       */
+      Accessor (tensor_type              &tensor,
+                const TableIndices<rank> &previous_indices);
+
+      /**
+       * Default constructor. Not needed, and invisible, so private.
+       */
+      Accessor ();
+
+      /**
+       * Copy constructor. Not needed, and invisible, so private.
+       */
+      Accessor (const Accessor &a);
+
+    public:
+
+      /**
+       * Index operator.
+       */
+      reference operator [] (const unsigned int);
+
+    private:
+      /**
+       * Store the data given to the constructor.
+       */
+      tensor_type             &tensor;
+      const TableIndices<rank> previous_indices;
+
+      // declare some other classes
+      // as friends. make sure to
+      // work around bugs in some
+      // compilers
+      template <int,int,typename> friend class dealii::SymmetricTensor;
+      template <int,int,bool,int,typename>
+      friend class SymmetricTensorAccessors::Accessor;
+#  ifndef DEAL_II_TEMPL_SPEC_FRIEND_BUG
+      friend class ::dealii::SymmetricTensor<rank,dim,Number>;
+      friend class SymmetricTensorAccessors::Accessor<rank,dim,constness,2,Number>;
+#  endif
+    };
+  }
+}
+
+
+
+/**
+ * Provide a class that stores symmetric tensors of rank 2,4,... efficiently,
+ * i.e. only store those off-diagonal elements of the full tensor that are not
+ * redundant. For example, for symmetric 2x2 tensors, this would be the
+ * elements 11, 22, and 12, while the element 21 is equal to the 12 element.
+ *
+ * Using this class for symmetric tensors of rank 2 has advantages over
+ * matrices in many cases since the dimension is known to the compiler as well
+ * as the location of the data. It is therefore possible to produce far more
+ * efficient code than for matrices with runtime-dependent dimension. It is
+ * also more efficient than using the more general <tt>Tensor</tt> class,
+ * since less elements are stored, and the class automatically makes sure that
+ * the tensor represents a symmetric object.
+ *
+ * For tensors of higher rank, the savings in storage are even higher. For
+ * example for the 3x3x3x3 tensors of rank 4, only 36 instead of the full 81
+ * entries have to be stored.
+ *
+ * While the definition of a symmetric rank-2 tensor is obvious, tensors of
+ * rank 4 are considered symmetric if they are operators mapping symmetric
+ * rank-2 tensors onto symmetric rank-2 tensors. This entails certain symmetry
+ * properties on the elements in their 4-dimensional index space, in
+ * particular that
+ * <tt>C<sub>ijkl</sub>=C<sub>jikl</sub>=C<sub>ijlk</sub></tt>. However, it
+ * does not imply the relation <tt>C<sub>ijkl</sub>=C<sub>klij</sub></tt>.
+ * Consequently, symmetric tensors of rank 4 as understood here are only
+ * tensors that map symmetric tensors onto symmetric tensors, but they do not
+ * necessarily induce a symmetric scalar product <tt>a:C:b=b:C:a</tt> or even
+ * a positive (semi-)definite form <tt>a:C:a</tt>, where <tt>a,b</tt> are
+ * symmetric rank-2 tensors and the colon indicates the common double-index
+ * contraction that acts as a product for symmetric tensors.
+ *
+ * Symmetric tensors are most often used in structural and fluid mechanics,
+ * where strains and stresses are usually symmetric tensors, and the stress-
+ * strain relationship is given by a symmetric rank-4 tensor.
+ *
+ * Note that symmetric tensors only exist with even numbers of indices. In
+ * other words, the only objects that you can use are
+ * <tt>SymmetricTensor<2,dim></tt>, <tt>SymmetricTensor<4,dim></tt>, etc, but
+ * <tt>SymmetricTensor<1,dim></tt> and <tt>SymmetricTensor<3,dim></tt> do not
+ * exist and their use will most likely lead to compiler errors.
+ *
+ *
+ * <h3>Accessing elements</h3>
+ *
+ * The elements of a tensor <tt>t</tt> can be accessed using the bracket
+ * operator, i.e. for a tensor of rank 4, <tt>t[0][1][0][1]</tt> accesses the
+ * element <tt>t<sub>0101</sub></tt>. This access can be used for both reading
+ * and writing (if the tensor is non-constant at least). You may also perform
+ * other operations on it, although that may lead to confusing situations
+ * because several elements of the tensor are stored at the same location. For
+ * example, for a rank-2 tensor that is assumed to be zero at the beginning,
+ * writing <tt>t[0][1]+=1; t[1][0]+=1;</tt> will lead to the same element
+ * being increased by one <em>twice</em>, because even though the accesses use
+ * different indices, the elements that are accessed are symmetric and
+ * therefore stored at the same location. It may therefore be useful in
+ * application programs to restrict operations on individual elements to
+ * simple reads or writes.
+ *
+ * @ingroup geomprimitives
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int rank, int dim, typename Number>
+class SymmetricTensor
+{
+public:
+  /**
+   * Provide a way to get the dimension of an object without explicit
+   * knowledge of it's data type. Implementation is this way instead of
+   * providing a function <tt>dimension()</tt> because now it is possible to
+   * get the dimension at compile time without the expansion and preevaluation
+   * of an inlined function; the compiler may therefore produce more efficient
+   * code and you may use this value to declare other data types.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * An integer denoting the number of independent components that fully
+   * describe a symmetric tensor. In $d$ space dimensions, this number equals
+   * $\frac 12 (d^2+d)$ for symmetric tensors of rank 2.
+   */
+  static const unsigned int n_independent_components
+    = internal::SymmetricTensorAccessors::StorageType<rank,dim,Number>::
+      n_independent_components;
+
+  /**
+   * Default constructor. Creates a tensor with all entries equal to zero.
+   */
+  SymmetricTensor ();
+
+  /**
+   * Constructor. Generate a symmetric tensor from a general one. Assumes that
+   * @p t is already symmetric, and in debug mode this is in fact checked.
+   * Note that no provision is made to assure that the tensor is symmetric
+   * only up to round-off error: if the incoming tensor is not exactly
+   * symmetric, then an exception is thrown. If you know that incoming tensor
+   * is symmetric only up to round-off, then you may want to call the
+   * <tt>symmetrize</tt> function first. If you aren't sure, it is good
+   * practice to check before calling <tt>symmetrize</tt>.
+   */
+  SymmetricTensor (const Tensor<2,dim,Number> &t);
+
+  /**
+   * A constructor that creates a symmetric tensor from an array holding its
+   * independent elements. Using this constructor assumes that the caller
+   * knows the order in which elements are stored in symmetric tensors; its
+   * use is therefore discouraged, but if you think you want to use it anyway
+   * you can query the order of elements using the unrolled_index() function.
+   *
+   * This constructor is currently only implemented for symmetric tensors of
+   * rank 2.
+   *
+   * The size of the array passed is equal to
+   * SymmetricTensor<rank,dim>::n_independent_component; the reason for using
+   * the object from the internal namespace is to work around bugs in some
+   * older compilers.
+   */
+  SymmetricTensor (const Number (&array) [n_independent_components]);
+
+  /**
+   * Copy constructor from tensors with different underlying scalar type. This
+   * obviously requires that the @p OtherNumber type is convertible to @p
+   * Number.
+   */
+  template <typename OtherNumber>
+  explicit
+  SymmetricTensor (const SymmetricTensor<rank,dim,OtherNumber> &initializer);
+
+  /**
+   * Assignment operator.
+   */
+  SymmetricTensor &operator = (const SymmetricTensor &);
+
+  /**
+   * This operator assigns a scalar to a tensor. To avoid confusion with what
+   * exactly it means to assign a scalar value to a tensor, zero is the only
+   * value allowed for <tt>d</tt>, allowing the intuitive notation
+   * <tt>t=0</tt> to reset all elements of the tensor to zero.
+   */
+  SymmetricTensor &operator = (const Number d);
+
+  /**
+   * Convert the present symmetric tensor into a full tensor with the same
+   * elements, but using the different storage scheme of full tensors.
+   */
+  operator Tensor<rank,dim,Number> () const;
+
+  /**
+   * Test for equality of two tensors.
+   */
+  bool operator == (const SymmetricTensor &) const;
+
+  /**
+   * Test for inequality of two tensors.
+   */
+  bool operator != (const SymmetricTensor &) const;
+
+  /**
+   * Add another tensor.
+   */
+  SymmetricTensor &operator += (const SymmetricTensor &);
+
+  /**
+   * Subtract another tensor.
+   */
+  SymmetricTensor &operator -= (const SymmetricTensor &);
+
+  /**
+   * Scale the tensor by <tt>factor</tt>, i.e. multiply all components by
+   * <tt>factor</tt>.
+   */
+  SymmetricTensor &operator *= (const Number factor);
+
+  /**
+   * Scale the vector by <tt>1/factor</tt>.
+   */
+  SymmetricTensor &operator /= (const Number factor);
+
+  /**
+   * Add two tensors. If possible, you should use <tt>operator +=</tt> instead
+   * since this does not need the creation of a temporary.
+   */
+  SymmetricTensor   operator + (const SymmetricTensor &s) const;
+
+  /**
+   * Subtract two tensors. If possible, you should use <tt>operator -=</tt>
+   * instead since this does not need the creation of a temporary.
+   */
+  SymmetricTensor   operator - (const SymmetricTensor &s) const;
+
+  /**
+   * Unary minus operator. Negate all entries of a tensor.
+   */
+  SymmetricTensor   operator - () const;
+
+  /**
+   * Product between the present symmetric tensor and a tensor of rank 2. For
+   * example, if the present object is also a rank-2 tensor, then this is the
+   * scalar-product double contraction <tt>a<sub>ij</sub>b<sub>ij</sub></tt>
+   * over all indices <tt>i,j</tt>. In this case, the return value evaluates
+   * to a single scalar. While it is possible to define other scalar product
+   * (and associated induced norms), this one seems to be the most appropriate
+   * one.
+   *
+   * If the present object is a rank-4 tensor, then the result is a rank-2
+   * tensor, i.e., the operation contracts over the last two indices of the
+   * present object and the indices of the argument, and the result is a
+   * tensor of rank 2.
+   *
+   * Note that the multiplication operator for symmetric tensors is defined to
+   * be a double contraction over two indices, while it is defined as a single
+   * contraction over only one index for regular <tt>Tensor</tt> objects. For
+   * symmetric tensors it therefore acts in a way that is commonly denoted by
+   * a "colon multiplication" in the mathematical literature.
+   *
+   * There are global functions <tt>double_contract</tt> that do the same work
+   * as this operator, but rather than returning the result as a return value,
+   * they write it into the first argument to the function.
+   */
+  typename internal::SymmetricTensorAccessors::double_contraction_result<rank,2,dim,Number>::type
+  operator * (const SymmetricTensor<2,dim,Number> &s) const;
+
+  /**
+   * Contraction over two indices of the present object with the rank-4
+   * symmetric tensor given as argument.
+   */
+  typename internal::SymmetricTensorAccessors::double_contraction_result<rank,4,dim,Number>::type
+  operator * (const SymmetricTensor<4,dim,Number> &s) const;
+
+  /**
+   * Return a read-write reference to the indicated element.
+   */
+  Number &operator() (const TableIndices<rank> &indices);
+
+  /**
+   * Return the value of the indicated element as a read-only reference.
+   *
+   * We return the requested value as a constant reference rather than by
+   * value since this object may hold data types that may be large, and we
+   * don't know here whether copying is expensive or not.
+   */
+  Number operator() (const TableIndices<rank> &indices) const;
+
+  /**
+   * Access the elements of a row of this symmetric tensor. This function is
+   * called for constant tensors.
+   */
+  internal::SymmetricTensorAccessors::Accessor<rank,dim,true,rank-1,Number>
+  operator [] (const unsigned int row) const;
+
+  /**
+   * Access the elements of a row of this symmetric tensor. This function is
+   * called for non-constant tensors.
+   */
+  internal::SymmetricTensorAccessors::Accessor<rank,dim,false,rank-1,Number>
+  operator [] (const unsigned int row);
+
+  /**
+   * Access to an element where you specify the entire set of indices.
+   */
+  Number
+  operator [] (const TableIndices<rank> &indices) const;
+
+  /**
+   * Access to an element where you specify the entire set of indices.
+   */
+  Number &
+  operator [] (const TableIndices<rank> &indices);
+
+  /**
+   * Access to an element according to unrolled index. The function
+   * <tt>s.access_raw_entry(i)</tt> does the same as
+   * <tt>s[s.unrolled_to_component_indices(i)]</tt>, but more efficiently.
+   */
+  Number
+  access_raw_entry (const unsigned int unrolled_index) const;
+
+  /**
+   * Access to an element according to unrolled index. The function
+   * <tt>s.access_raw_entry(i)</tt> does the same as
+   * <tt>s[s.unrolled_to_component_indices(i)]</tt>, but more efficiently.
+   */
+  Number &
+  access_raw_entry (const unsigned int unrolled_index);
+
+  /**
+   * Return the Frobenius-norm of a tensor, i.e. the square root of the sum of
+   * squares of all entries. This norm is induced by the scalar product
+   * defined above for two symmetric tensors. Note that it includes <i>all</i>
+   * entries of the tensor, counting symmetry, not only the unique ones (for
+   * example, for rank-2 tensors, this norm includes adding up the squares of
+   * upper right as well as lower left entries, not just one of them, although
+   * they are equal for symmetric tensors).
+   */
+  Number norm () const;
+
+  /**
+   * Tensors can be unrolled by simply pasting all elements into one long
+   * vector, but for this an order of elements has to be defined. For
+   * symmetric tensors, this function returns which index within the range
+   * <code>[0,n_independent_components)</code> the given entry in a symmetric
+   * tensor has.
+   */
+  static
+  unsigned int
+  component_to_unrolled_index (const TableIndices<rank> &indices);
+
+  /**
+   * The opposite of the previous function: given an index $i$ in the unrolled
+   * form of the tensor, return what set of indices $(k,l)$ (for rank-2
+   * tensors) or $(k,l,m,n)$ (for rank-4 tensors) corresponds to it.
+   */
+  static
+  TableIndices<rank>
+  unrolled_to_component_indices (const unsigned int i);
+
+  /**
+   * Reset all values to zero.
+   *
+   * Note that this is partly inconsistent with the semantics of the @p
+   * clear() member functions of the standard library containers and of
+   * several other classes within deal.II, which not only reset the values of
+   * stored elements to zero, but release all memory and return the object
+   * into a virginial state. However, since the size of objects of the present
+   * type is determined by its template parameters, resizing is not an option,
+   * and indeed the state where all elements have a zero value is the state
+   * right after construction of such an object.
+   */
+  void clear ();
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  static std::size_t memory_consumption ();
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization
+   */
+  template <class Archive>
+  void serialize(Archive &ar, const unsigned int version);
+
+private:
+  /**
+   * A structure that describes properties of the base tensor.
+   */
+  typedef
+  internal::SymmetricTensorAccessors::StorageType<rank,dim,Number>
+  base_tensor_descriptor;
+
+  /**
+   * Data storage type for a symmetric tensor.
+   */
+  typedef typename base_tensor_descriptor::base_tensor_type base_tensor_type;
+
+  /**
+   * The place where we store the data of the tensor.
+   */
+  base_tensor_type data;
+
+  /**
+   * Make all other symmetric tensors friends.
+   */
+  template <int, int, typename> friend class SymmetricTensor;
+
+  /**
+   * Make a few more functions friends.
+   */
+  template <int dim2, typename Number2>
+  friend Number2 trace (const SymmetricTensor<2,dim2,Number2> &d);
+
+  template <int dim2, typename Number2>
+  friend Number2 determinant (const SymmetricTensor<2,dim2,Number2> &t);
+
+  template <int dim2, typename Number2>
+  friend SymmetricTensor<2,dim2,Number2>
+  deviator (const SymmetricTensor<2,dim2,Number2> &t);
+
+  template <int dim2, typename Number2>
+  friend SymmetricTensor<2,dim2,Number2> unit_symmetric_tensor ();
+
+  template <int dim2, typename Number2>
+  friend SymmetricTensor<4,dim2,Number2> deviator_tensor ();
+
+  template <int dim2, typename Number2>
+  friend SymmetricTensor<4,dim2,Number2> identity_tensor ();
+
+  template <int dim2, typename Number2>
+  friend SymmetricTensor<4,dim2,Number2> invert (const SymmetricTensor<4,dim2,Number2> &);
+};
+
+
+
+// ------------------------- inline functions ------------------------
+
+#ifndef DOXYGEN
+
+namespace internal
+{
+  namespace SymmetricTensorAccessors
+  {
+    template <int rank, int dim, bool constness, int P, typename Number>
+    Accessor<rank,dim,constness,P,Number>::
+    Accessor ()
+      :
+      tensor (*static_cast<tensor_type *>(0)),
+      previous_indices ()
+    {
+      Assert (false, ExcMessage ("You can't call the default constructor of this class."));
+    }
+
+
+    template <int rank, int dim, bool constness, int P, typename Number>
+    Accessor<rank,dim,constness,P,Number>::
+    Accessor (tensor_type              &tensor,
+              const TableIndices<rank> &previous_indices)
+      :
+      tensor (tensor),
+      previous_indices (previous_indices)
+    {}
+
+
+    template <int rank, int dim, bool constness, int P, typename Number>
+    Accessor<rank,dim,constness,P,Number>::
+    Accessor (const Accessor &a)
+      :
+      tensor (a.tensor),
+      previous_indices (a.previous_indices)
+    {}
+
+
+
+    template <int rank, int dim, bool constness, int P, typename Number>
+    Accessor<rank,dim,constness,P-1,Number>
+    Accessor<rank,dim,constness,P,Number>::operator[] (const unsigned int i)
+    {
+      return Accessor<rank,dim,constness,P-1,Number> (tensor,
+                                                      merge (previous_indices, i, rank-P));
+    }
+
+
+
+    template <int rank, int dim, bool constness, typename Number>
+    Accessor<rank,dim,constness,1,Number>::
+    Accessor ()
+      :
+      tensor (*static_cast<tensor_type *>(0)),
+      previous_indices ()
+    {
+      Assert (false, ExcMessage ("You can't call the default constructor of this class."));
+    }
+
+
+
+    template <int rank, int dim, bool constness, typename Number>
+    Accessor<rank,dim,constness,1,Number>::
+    Accessor (tensor_type              &tensor,
+              const TableIndices<rank> &previous_indices)
+      :
+      tensor (tensor),
+      previous_indices (previous_indices)
+    {}
+
+
+
+    template <int rank, int dim, bool constness, typename Number>
+    Accessor<rank,dim,constness,1,Number>::
+    Accessor (const Accessor &a)
+      :
+      tensor (a.tensor),
+      previous_indices (a.previous_indices)
+    {}
+
+
+
+    template <int rank, int dim, bool constness, typename Number>
+    typename Accessor<rank,dim,constness,1,Number>::reference
+    Accessor<rank,dim,constness,1,Number>::operator[] (const unsigned int i)
+    {
+      return tensor(merge (previous_indices, i, rank-1));
+    }
+
+
+  }
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>::SymmetricTensor ()
+{}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>::SymmetricTensor (const Tensor<2,dim,Number> &t)
+{
+  Assert (rank == 2, ExcNotImplemented());
+  switch (dim)
+    {
+    case 2:
+      Assert (t[0][1] == t[1][0], ExcInternalError());
+
+      data[0] = t[0][0];
+      data[1] = t[1][1];
+      data[2] = t[0][1];
+
+      break;
+    case 3:
+      Assert (t[0][1] == t[1][0], ExcInternalError());
+      Assert (t[0][2] == t[2][0], ExcInternalError());
+      Assert (t[1][2] == t[2][1], ExcInternalError());
+
+      data[0] = t[0][0];
+      data[1] = t[1][1];
+      data[2] = t[2][2];
+      data[3] = t[0][1];
+      data[4] = t[0][2];
+      data[5] = t[1][2];
+
+      break;
+    default:
+      for (unsigned int d=0; d<dim; ++d)
+        for (unsigned int e=0; e<d; ++e)
+          Assert(t[d][e] == t[e][d], ExcInternalError());
+
+      for (unsigned int d=0; d<dim; ++d)
+        data[d] = t[d][d];
+
+      for (unsigned int d=0, c=0; d<dim; ++d)
+        for (unsigned int e=d+1; e<dim; ++e, ++c)
+          data[dim+c] = t[d][e];
+    }
+}
+
+
+
+template <int rank, int dim, typename Number>
+template <typename OtherNumber>
+inline
+SymmetricTensor<rank,dim,Number>::
+SymmetricTensor (const SymmetricTensor<rank,dim,OtherNumber> &initializer)
+{
+  for (unsigned int i=0; i<n_independent_components; ++i)
+    data[i] = initializer.data[i];
+}
+
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>::SymmetricTensor (const Number (&array) [n_independent_components])
+  :
+  data (*reinterpret_cast<const typename base_tensor_type::array_type *>(array))
+{
+  // ensure that the reinterpret_cast above actually works
+  Assert (sizeof(typename base_tensor_type::array_type)
+          == sizeof(array),
+          ExcInternalError());
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number> &
+SymmetricTensor<rank,dim,Number>::operator = (const SymmetricTensor<rank,dim,Number> &t)
+{
+  data = t.data;
+  return *this;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number> &
+SymmetricTensor<rank,dim,Number>::operator = (const Number d)
+{
+  Assert (d==0, ExcMessage ("Only assignment with zero is allowed"));
+  (void) d;
+
+  data = 0;
+
+  return *this;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>::
+operator Tensor<rank,dim,Number> () const
+{
+  Assert (rank == 2, ExcNotImplemented());
+  Number t[dim][dim];
+  for (unsigned int d=0; d<dim; ++d)
+    t[d][d] = data[d];
+  for (unsigned int d=0, c=0; d<dim; ++d)
+    for (unsigned int e=d+1; e<dim; ++e, ++c)
+      {
+        t[d][e] = data[dim+c];
+        t[e][d] = data[dim+c];
+      }
+  return Tensor<2,dim,Number>(t);
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+bool
+SymmetricTensor<rank,dim,Number>::operator ==
+(const SymmetricTensor<rank,dim,Number> &t) const
+{
+  return data == t.data;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+bool
+SymmetricTensor<rank,dim,Number>::operator !=
+(const SymmetricTensor<rank,dim,Number> &t) const
+{
+  return data != t.data;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number> &
+SymmetricTensor<rank,dim,Number>::operator +=
+(const SymmetricTensor<rank,dim,Number> &t)
+{
+  data += t.data;
+  return *this;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number> &
+SymmetricTensor<rank,dim,Number>::operator -=
+(const SymmetricTensor<rank,dim,Number> &t)
+{
+  data -= t.data;
+  return *this;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number> &
+SymmetricTensor<rank,dim,Number>::operator *= (const Number d)
+{
+  data *= d;
+  return *this;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number> &
+SymmetricTensor<rank,dim,Number>::operator /= (const Number d)
+{
+  data /= d;
+  return *this;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>
+SymmetricTensor<rank,dim,Number>::operator + (const SymmetricTensor &t) const
+{
+  SymmetricTensor tmp = *this;
+  tmp.data += t.data;
+  return tmp;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>
+SymmetricTensor<rank,dim,Number>::operator - (const SymmetricTensor &t) const
+{
+  SymmetricTensor tmp = *this;
+  tmp.data -= t.data;
+  return tmp;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>
+SymmetricTensor<rank,dim,Number>::operator - () const
+{
+  SymmetricTensor tmp = *this;
+  tmp.data = -tmp.data;
+  return tmp;
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+void
+SymmetricTensor<rank,dim,Number>::clear ()
+{
+  data.clear ();
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+std::size_t
+SymmetricTensor<rank,dim,Number>::memory_consumption ()
+{
+  return
+    internal::SymmetricTensorAccessors::StorageType<rank,dim,Number>::memory_consumption ();
+}
+
+
+
+namespace internal
+{
+
+  template <int dim, typename Number>
+  inline
+  typename SymmetricTensorAccessors::double_contraction_result<2,2,dim,Number>::type
+  perform_double_contraction (const typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type &data,
+                              const typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type &sdata)
+  {
+    switch (dim)
+      {
+      case 1:
+        return data[0] * sdata[0];
+      case 2:
+        return (data[0] * sdata[0] +
+                data[1] * sdata[1] +
+                2*data[2] * sdata[2]);
+      default:
+        // Start with the non-diagonal part to avoid some multiplications by
+        // 2.
+        Number sum = data[dim] * sdata[dim];
+        for (unsigned int d=dim+1; d<(dim*(dim+1)/2); ++d)
+          sum += data[d] * sdata[d];
+        sum *= 2.;
+        for (unsigned int d=0; d<dim; ++d)
+          sum += data[d] * sdata[d];
+        return sum;
+      }
+  }
+
+
+
+  template <int dim, typename Number>
+  inline
+  typename SymmetricTensorAccessors::double_contraction_result<4,2,dim,Number>::type
+  perform_double_contraction (const typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type &data,
+                              const typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type &sdata)
+  {
+    const unsigned int data_dim =
+      SymmetricTensorAccessors::StorageType<2,dim,Number>::n_independent_components;
+    Number tmp [data_dim];
+    for (unsigned int i=0; i<data_dim; ++i)
+      tmp[i] = perform_double_contraction<dim,Number>(data[i], sdata);
+    return dealii::SymmetricTensor<2,dim,Number>(tmp);
+  }
+
+
+
+  template <int dim, typename Number>
+  inline
+  typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type
+  perform_double_contraction (const typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type &data,
+                              const typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type &sdata)
+  {
+    typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type tmp;
+    for (unsigned int i=0; i<tmp.dimension; ++i)
+      {
+        for (unsigned int d=0; d<dim; ++d)
+          tmp[i] += data[d] * sdata[d][i];
+        for (unsigned int d=dim; d<(dim*(dim+1)/2); ++d)
+          tmp[i] += 2 * data[d] * sdata[d][i];
+      }
+    return tmp;
+  }
+
+
+
+  template <int dim, typename Number>
+  inline
+  typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type
+  perform_double_contraction (const typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type &data,
+                              const typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type &sdata)
+  {
+    const unsigned int data_dim =
+      SymmetricTensorAccessors::StorageType<2,dim,Number>::n_independent_components;
+    typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type tmp;
+    for (unsigned int i=0; i<data_dim; ++i)
+      for (unsigned int j=0; j<data_dim; ++j)
+        {
+          for (unsigned int d=0; d<dim; ++d)
+            tmp[i][j] += data[i][d] * sdata[d][j];
+          for (unsigned int d=dim; d<(dim*(dim+1)/2); ++d)
+            tmp[i][j] += 2 * data[i][d] * sdata[d][j];
+        }
+    return tmp;
+  }
+
+} // end of namespace internal
+
+
+
+template <int rank, int dim, typename Number>
+inline
+typename internal::SymmetricTensorAccessors::double_contraction_result<rank,2,dim,Number>::type
+SymmetricTensor<rank,dim,Number>::operator * (const SymmetricTensor<2,dim,Number> &s) const
+{
+  // need to have two different function calls
+  // because a scalar and rank-2 tensor are not
+  // the same data type (see internal function
+  // above)
+  return internal::perform_double_contraction<dim,Number> (data, s.data);
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+typename internal::SymmetricTensorAccessors::double_contraction_result<rank,4,dim,Number>::type
+SymmetricTensor<rank,dim,Number>::operator * (const SymmetricTensor<4,dim,Number> &s) const
+{
+  typename internal::SymmetricTensorAccessors::
+  double_contraction_result<rank,4,dim,Number>::type tmp;
+  tmp.data = internal::perform_double_contraction<dim,Number> (data,s.data);
+  return tmp;
+}
+
+
+
+// internal namespace to switch between the
+// access of different tensors. There used to
+// be explicit instantiations before for
+// different ranks and dimensions, but since
+// we now allow for templates on the data
+// type, and since we cannot partially
+// specialize the implementation, this got
+// into a separate namespace
+namespace internal
+{
+  template <int dim, typename Number>
+  inline
+  Number &
+  symmetric_tensor_access (const TableIndices<2> &indices,
+                           typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type &data)
+  {
+    // 1d is very simple and done first
+    if (dim == 1)
+      return data[0];
+
+    // first treat the main diagonal elements, which are stored consecutively
+    // at the beginning
+    if (indices[0] == indices[1])
+      return data[indices[0]];
+
+    // the rest is messier and requires a few switches.
+    switch (dim)
+      {
+      case 2:
+        // at least for the 2x2 case it is reasonably simple
+        Assert (((indices[0]==1) && (indices[1]==0)) ||
+                ((indices[0]==0) && (indices[1]==1)),
+                ExcInternalError());
+        return data[2];
+
+      default:
+        // to do the rest, sort our indices before comparing
+      {
+        TableIndices<2> sorted_indices (indices);
+        sorted_indices.sort ();
+
+        for (unsigned int d=0, c=0; d<dim; ++d)
+          for (unsigned int e=d+1; e<dim; ++e, ++c)
+            if ((sorted_indices[0]==d) && (sorted_indices[1]==e))
+              return data[dim+c];
+        Assert (false, ExcInternalError());
+      }
+      }
+
+    static Number dummy_but_referenceable = Number();
+    return dummy_but_referenceable;
+  }
+
+
+
+  template <int dim, typename Number>
+  inline
+  Number
+  symmetric_tensor_access (const TableIndices<2> &indices,
+                           const typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type &data)
+  {
+    // 1d is very simple and done first
+    if (dim == 1)
+      return data[0];
+
+    // first treat the main diagonal elements, which are stored consecutively
+    // at the beginning
+    if (indices[0] == indices[1])
+      return data[indices[0]];
+
+    // the rest is messier and requires a few switches.
+    switch (dim)
+      {
+      case 2:
+        // at least for the 2x2 case it is reasonably simple
+        Assert (((indices[0]==1) && (indices[1]==0)) ||
+                ((indices[0]==0) && (indices[1]==1)),
+                ExcInternalError());
+        return data[2];
+
+      default:
+        // to do the rest, sort our indices before comparing
+      {
+        TableIndices<2> sorted_indices (indices);
+        sorted_indices.sort ();
+
+        for (unsigned int d=0, c=0; d<dim; ++d)
+          for (unsigned int e=d+1; e<dim; ++e, ++c)
+            if ((sorted_indices[0]==d) && (sorted_indices[1]==e))
+              return data[dim+c];
+        Assert (false, ExcInternalError());
+      }
+      }
+
+    static Number dummy_but_referenceable = Number();
+    return dummy_but_referenceable;
+  }
+
+
+
+  template <int dim, typename Number>
+  inline
+  Number &
+  symmetric_tensor_access (const TableIndices<4> &indices,
+                           typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type &data)
+  {
+    switch (dim)
+      {
+      case 1:
+        return data[0][0];
+
+      case 2:
+        // each entry of the tensor can be
+        // thought of as an entry in a
+        // matrix that maps the rolled-out
+        // rank-2 tensors into rolled-out
+        // rank-2 tensors. this is the
+        // format in which we store rank-4
+        // tensors. determine which
+        // position the present entry is
+        // stored in
+      {
+        unsigned int base_index[2] ;
+        if ((indices[0] == 0) && (indices[1] == 0))
+          base_index[0] = 0;
+        else if ((indices[0] == 1) && (indices[1] == 1))
+          base_index[0] = 1;
+        else
+          base_index[0] = 2;
+
+        if ((indices[2] == 0) && (indices[3] == 0))
+          base_index[1] = 0;
+        else if ((indices[2] == 1) && (indices[3] == 1))
+          base_index[1] = 1;
+        else
+          base_index[1] = 2;
+
+        return data[base_index[0]][base_index[1]];
+      }
+
+      case 3:
+        // each entry of the tensor can be
+        // thought of as an entry in a
+        // matrix that maps the rolled-out
+        // rank-2 tensors into rolled-out
+        // rank-2 tensors. this is the
+        // format in which we store rank-4
+        // tensors. determine which
+        // position the present entry is
+        // stored in
+      {
+        unsigned int base_index[2] ;
+        if ((indices[0] == 0) && (indices[1] == 0))
+          base_index[0] = 0;
+        else if ((indices[0] == 1) && (indices[1] == 1))
+          base_index[0] = 1;
+        else if ((indices[0] == 2) && (indices[1] == 2))
+          base_index[0] = 2;
+        else if (((indices[0] == 0) && (indices[1] == 1)) ||
+                 ((indices[0] == 1) && (indices[1] == 0)))
+          base_index[0] = 3;
+        else if (((indices[0] == 0) && (indices[1] == 2)) ||
+                 ((indices[0] == 2) && (indices[1] == 0)))
+          base_index[0] = 4;
+        else
+          {
+            Assert (((indices[0] == 1) && (indices[1] == 2)) ||
+                    ((indices[0] == 2) && (indices[1] == 1)),
+                    ExcInternalError());
+            base_index[0] = 5;
+          }
+
+        if ((indices[2] == 0) && (indices[3] == 0))
+          base_index[1] = 0;
+        else if ((indices[2] == 1) && (indices[3] == 1))
+          base_index[1] = 1;
+        else if ((indices[2] == 2) && (indices[3] == 2))
+          base_index[1] = 2;
+        else if (((indices[2] == 0) && (indices[3] == 1)) ||
+                 ((indices[2] == 1) && (indices[3] == 0)))
+          base_index[1] = 3;
+        else if (((indices[2] == 0) && (indices[3] == 2)) ||
+                 ((indices[2] == 2) && (indices[3] == 0)))
+          base_index[1] = 4;
+        else
+          {
+            Assert (((indices[2] == 1) && (indices[3] == 2)) ||
+                    ((indices[2] == 2) && (indices[3] == 1)),
+                    ExcInternalError());
+            base_index[1] = 5;
+          }
+
+        return data[base_index[0]][base_index[1]];
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    static Number dummy;
+    return dummy;
+  }
+
+
+  template <int dim, typename Number>
+  inline
+  Number
+  symmetric_tensor_access (const TableIndices<4> &indices,
+                           const typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type &data)
+  {
+    switch (dim)
+      {
+      case 1:
+        return data[0][0];
+
+      case 2:
+        // each entry of the tensor can be
+        // thought of as an entry in a
+        // matrix that maps the rolled-out
+        // rank-2 tensors into rolled-out
+        // rank-2 tensors. this is the
+        // format in which we store rank-4
+        // tensors. determine which
+        // position the present entry is
+        // stored in
+      {
+        unsigned int base_index[2] ;
+        if ((indices[0] == 0) && (indices[1] == 0))
+          base_index[0] = 0;
+        else if ((indices[0] == 1) && (indices[1] == 1))
+          base_index[0] = 1;
+        else
+          base_index[0] = 2;
+
+        if ((indices[2] == 0) && (indices[3] == 0))
+          base_index[1] = 0;
+        else if ((indices[2] == 1) && (indices[3] == 1))
+          base_index[1] = 1;
+        else
+          base_index[1] = 2;
+
+        return data[base_index[0]][base_index[1]];
+      }
+
+      case 3:
+        // each entry of the tensor can be
+        // thought of as an entry in a
+        // matrix that maps the rolled-out
+        // rank-2 tensors into rolled-out
+        // rank-2 tensors. this is the
+        // format in which we store rank-4
+        // tensors. determine which
+        // position the present entry is
+        // stored in
+      {
+        unsigned int base_index[2] ;
+        if ((indices[0] == 0) && (indices[1] == 0))
+          base_index[0] = 0;
+        else if ((indices[0] == 1) && (indices[1] == 1))
+          base_index[0] = 1;
+        else if ((indices[0] == 2) && (indices[1] == 2))
+          base_index[0] = 2;
+        else if (((indices[0] == 0) && (indices[1] == 1)) ||
+                 ((indices[0] == 1) && (indices[1] == 0)))
+          base_index[0] = 3;
+        else if (((indices[0] == 0) && (indices[1] == 2)) ||
+                 ((indices[0] == 2) && (indices[1] == 0)))
+          base_index[0] = 4;
+        else
+          {
+            Assert (((indices[0] == 1) && (indices[1] == 2)) ||
+                    ((indices[0] == 2) && (indices[1] == 1)),
+                    ExcInternalError());
+            base_index[0] = 5;
+          }
+
+        if ((indices[2] == 0) && (indices[3] == 0))
+          base_index[1] = 0;
+        else if ((indices[2] == 1) && (indices[3] == 1))
+          base_index[1] = 1;
+        else if ((indices[2] == 2) && (indices[3] == 2))
+          base_index[1] = 2;
+        else if (((indices[2] == 0) && (indices[3] == 1)) ||
+                 ((indices[2] == 1) && (indices[3] == 0)))
+          base_index[1] = 3;
+        else if (((indices[2] == 0) && (indices[3] == 2)) ||
+                 ((indices[2] == 2) && (indices[3] == 0)))
+          base_index[1] = 4;
+        else
+          {
+            Assert (((indices[2] == 1) && (indices[3] == 2)) ||
+                    ((indices[2] == 2) && (indices[3] == 1)),
+                    ExcInternalError());
+            base_index[1] = 5;
+          }
+
+        return data[base_index[0]][base_index[1]];
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    static Number dummy;
+    return dummy;
+  }
+
+} // end of namespace internal
+
+
+
+template <int rank, int dim, typename Number>
+inline
+Number &
+SymmetricTensor<rank,dim,Number>::operator () (const TableIndices<rank> &indices)
+{
+  for (unsigned int r=0; r<rank; ++r)
+    Assert (indices[r] < dimension, ExcIndexRange (indices[r], 0, dimension));
+  return internal::symmetric_tensor_access<dim,Number> (indices, data);
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+Number
+SymmetricTensor<rank,dim,Number>::operator ()
+(const TableIndices<rank> &indices) const
+{
+  for (unsigned int r=0; r<rank; ++r)
+    Assert (indices[r] < dimension, ExcIndexRange (indices[r], 0, dimension));
+  return internal::symmetric_tensor_access<dim,Number> (indices, data);
+}
+
+
+
+template <int rank, int dim, typename Number>
+internal::SymmetricTensorAccessors::Accessor<rank,dim,true,rank-1,Number>
+SymmetricTensor<rank,dim,Number>::operator [] (const unsigned int row) const
+{
+  return
+    internal::SymmetricTensorAccessors::
+    Accessor<rank,dim,true,rank-1,Number> (*this, TableIndices<rank> (row));
+}
+
+
+
+template <int rank, int dim, typename Number>
+internal::SymmetricTensorAccessors::Accessor<rank,dim,false,rank-1,Number>
+SymmetricTensor<rank,dim,Number>::operator [] (const unsigned int row)
+{
+  return
+    internal::SymmetricTensorAccessors::
+    Accessor<rank,dim,false,rank-1,Number> (*this, TableIndices<rank> (row));
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+Number
+SymmetricTensor<rank,dim,Number>::operator [] (const TableIndices<rank> &indices) const
+{
+  return data[component_to_unrolled_index(indices)];
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+Number &
+SymmetricTensor<rank,dim,Number>::operator [] (const TableIndices<rank> &indices)
+{
+  return data[component_to_unrolled_index(indices)];
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+Number
+SymmetricTensor<rank,dim,Number>::access_raw_entry (const unsigned int index) const
+{
+  AssertIndexRange (index, data.dimension);
+  return data[index];
+}
+
+
+
+template <int rank, int dim, typename Number>
+inline
+Number &
+SymmetricTensor<rank,dim,Number>::access_raw_entry (const unsigned int index)
+{
+  AssertIndexRange (index, data.dimension);
+  return data[index];
+}
+
+
+
+namespace internal
+{
+  template <int dim, typename Number>
+  inline
+  Number
+  compute_norm (const typename SymmetricTensorAccessors::StorageType<2,dim,Number>::base_tensor_type &data)
+  {
+    Number return_value;
+    switch (dim)
+      {
+      case 1:
+        return_value = std::fabs(data[0]);
+        break;
+      case 2:
+        return_value = std::sqrt(data[0]*data[0] + data[1]*data[1] +
+                                 2*data[2]*data[2]);
+        break;
+      case 3:
+        return_value =  std::sqrt(data[0]*data[0] + data[1]*data[1] +
+                                  data[2]*data[2] + 2*data[3]*data[3] +
+                                  2*data[4]*data[4] + 2*data[5]*data[5]);
+        break;
+      default:
+        return_value = Number();
+        for (unsigned int d=0; d<dim; ++d)
+          return_value += data[d] * data[d];
+        for (unsigned int d=dim; d<(dim*dim+dim)/2; ++d)
+          return_value += 2 * data[d] * data[d];
+        return_value = std::sqrt(return_value);
+      }
+    return return_value;
+  }
+
+
+
+  template <int dim, typename Number>
+  inline
+  Number
+  compute_norm (const typename SymmetricTensorAccessors::StorageType<4,dim,Number>::base_tensor_type &data)
+  {
+    Number return_value;
+    const unsigned int n_independent_components = data.dimension;
+
+    switch (dim)
+      {
+      case 1:
+        return_value = std::fabs (data[0][0]);
+        break;
+      default:
+        return_value = Number();
+        for (unsigned int i=0; i<dim; ++i)
+          for (unsigned int j=0; j<dim; ++j)
+            return_value += data[i][j] * data[i][j];
+        for (unsigned int i=0; i<dim; ++i)
+          for (unsigned int j=dim; j<n_independent_components; ++j)
+            return_value += 2 * data[i][j] * data[i][j];
+        for (unsigned int i=dim; i<n_independent_components; ++i)
+          for (unsigned int j=0; j<dim; ++j)
+            return_value += 2 * data[i][j] * data[i][j];
+        for (unsigned int i=dim; i<n_independent_components; ++i)
+          for (unsigned int j=dim; j<n_independent_components; ++j)
+            return_value += 4 * data[i][j] * data[i][j];
+        return_value = std::sqrt(return_value);
+      }
+
+    return return_value;
+  }
+
+} // end of namespace internal
+
+
+
+template <int rank, int dim, typename Number>
+inline
+Number
+SymmetricTensor<rank,dim,Number>::norm () const
+{
+  return internal::compute_norm<dim,Number> (data);
+}
+
+
+
+namespace internal
+{
+  namespace SymmetricTensor
+  {
+    namespace
+    {
+      // a function to do the unrolling from a set of indices to a
+      // scalar index into the array in which we store the elements of
+      // a symmetric tensor
+      //
+      // this function is for rank-2 tensors
+      template <int dim>
+      inline
+      unsigned int
+      component_to_unrolled_index
+      (const TableIndices<2> &indices)
+      {
+        Assert (indices[0] < dim, ExcIndexRange(indices[0], 0, dim));
+        Assert (indices[1] < dim, ExcIndexRange(indices[1], 0, dim));
+
+        switch (dim)
+          {
+          case 1:
+          {
+            return 0;
+          }
+
+          case 2:
+          {
+            static const unsigned int table[2][2] = {{0, 2},
+              {2, 1}
+            };
+            return table[indices[0]][indices[1]];
+          }
+
+          case 3:
+          {
+            static const unsigned int table[3][3] = {{0, 3, 4},
+              {3, 1, 5},
+              {4, 5, 2}
+            };
+            return table[indices[0]][indices[1]];
+          }
+
+          case 4:
+          {
+            static const unsigned int table[4][4] = {{0, 4, 5, 6},
+              {4, 1, 7, 8},
+              {5, 7, 2, 9},
+              {6, 8, 9, 3}
+            };
+            return table[indices[0]][indices[1]];
+          }
+
+          default:
+            // for the remainder, manually figure out the numbering
+          {
+            if (indices[0] == indices[1])
+              return indices[0];
+
+            TableIndices<2> sorted_indices (indices);
+            sorted_indices.sort ();
+
+            for (unsigned int d=0, c=0; d<dim; ++d)
+              for (unsigned int e=d+1; e<dim; ++e, ++c)
+                if ((sorted_indices[0]==d) && (sorted_indices[1]==e))
+                  return dim+c;
+
+            // should never get here:
+            AssertThrow(false, ExcInternalError());
+            return 0;
+          }
+          }
+      }
+
+      // a function to do the unrolling from a set of indices to a
+      // scalar index into the array in which we store the elements of
+      // a symmetric tensor
+      //
+      // this function is for tensors of ranks not already handled
+      // above
+      template <int dim, int rank>
+      inline
+      unsigned int
+      component_to_unrolled_index
+      (const TableIndices<rank> &indices)
+      {
+        (void)indices;
+        Assert (false, ExcNotImplemented());
+        return numbers::invalid_unsigned_int;
+      }
+    }
+  }
+}
+
+
+template <int rank, int dim, typename Number>
+inline
+unsigned int
+SymmetricTensor<rank,dim,Number>::component_to_unrolled_index
+(const TableIndices<rank> &indices)
+{
+  return internal::SymmetricTensor::component_to_unrolled_index<dim> (indices);
+}
+
+
+
+namespace internal
+{
+  namespace SymmetricTensor
+  {
+    namespace
+    {
+      // a function to do the inverse of the unrolling from a set of
+      // indices to a scalar index into the array in which we store
+      // the elements of a symmetric tensor. in other words, it goes
+      // from the scalar index into the array to a set of indices of
+      // the tensor
+      //
+      // this function is for rank-2 tensors
+      template <int dim>
+      inline
+      TableIndices<2>
+      unrolled_to_component_indices
+      (const unsigned int i,
+       const int2type<2> &)
+      {
+        Assert ((i < dealii::SymmetricTensor<2,dim,double>::n_independent_components),
+                ExcIndexRange(i, 0, dealii::SymmetricTensor<2,dim,double>::n_independent_components));
+        switch (dim)
+          {
+          case 1:
+          {
+            return TableIndices<2>(0,0);
+          }
+
+          case 2:
+          {
+            const TableIndices<2> table[3] =
+            {
+              TableIndices<2> (0,0),
+              TableIndices<2> (1,1),
+              TableIndices<2> (0,1)
+            };
+            return table[i];
+          }
+
+          case 3:
+          {
+            const TableIndices<2> table[6] =
+            {
+              TableIndices<2> (0,0),
+              TableIndices<2> (1,1),
+              TableIndices<2> (2,2),
+              TableIndices<2> (0,1),
+              TableIndices<2> (0,2),
+              TableIndices<2> (1,2)
+            };
+            return table[i];
+          }
+
+          default:
+            if (i<dim)
+              return TableIndices<2> (i,i);
+
+            for (unsigned int d=0, c=0; d<dim; ++d)
+              for (unsigned int e=d+1; e<dim; ++e, ++c)
+                if (c==i)
+                  return TableIndices<2>(d,e);
+
+            // should never get here:
+            AssertThrow(false, ExcInternalError());
+            return TableIndices<2>(0, 0);
+          }
+      }
+
+      // a function to do the inverse of the unrolling from a set of
+      // indices to a scalar index into the array in which we store
+      // the elements of a symmetric tensor. in other words, it goes
+      // from the scalar index into the array to a set of indices of
+      // the tensor
+      //
+      // this function is for tensors of a rank not already handled
+      // above
+      template <int dim, int rank>
+      inline
+      TableIndices<rank>
+      unrolled_to_component_indices
+      (const unsigned int i,
+       const int2type<rank> &)
+      {
+        (void)i;
+        Assert ((i < dealii::SymmetricTensor<rank,dim,double>::n_independent_components),
+                ExcIndexRange(i, 0, dealii::SymmetricTensor<rank,dim,double>::n_independent_components));
+        Assert (false, ExcNotImplemented());
+        return TableIndices<rank>();
+      }
+
+    }
+  }
+}
+
+template <int rank, int dim, typename Number>
+inline
+TableIndices<rank>
+SymmetricTensor<rank,dim,Number>::unrolled_to_component_indices
+(const unsigned int i)
+{
+  return
+    internal::SymmetricTensor::unrolled_to_component_indices<dim> (i,
+        internal::int2type<rank>());
+}
+
+
+
+template <int rank, int dim, typename Number>
+template <class Archive>
+inline
+void
+SymmetricTensor<rank,dim,Number>::serialize(Archive &ar, const unsigned int)
+{
+  ar &data;
+}
+
+
+#endif // DOXYGEN
+
+/* ----------------- Non-member functions operating on tensors. ------------ */
+
+
+/**
+ * Addition of a SymmetricTensor and a general Tensor of equal rank. The
+ * result is a general Tensor.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type>
+operator+(const SymmetricTensor<rank, dim, Number> &left,
+          const Tensor<rank, dim, OtherNumber> &right)
+{
+  return Tensor<rank, dim, Number>(left) + right;
+}
+
+
+/**
+ * Addition of a general Tensor with a SymmetricTensor of equal rank. The
+ * result is a general Tensor.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type>
+operator+(const Tensor<rank, dim, Number> &left,
+          const SymmetricTensor<rank, dim, OtherNumber> &right)
+{
+  return left + Tensor<rank, dim, OtherNumber>(right);
+}
+
+
+/**
+ * Subtraction of a SymmetricTensor and a general Tensor of equal rank. The
+ * result is a general Tensor.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type>
+operator-(const SymmetricTensor<rank, dim, Number> &left,
+          const Tensor<rank, dim, OtherNumber> &right)
+{
+  return Tensor<rank, dim, Number>(left) - right;
+}
+
+
+/**
+ * Subtraction of a general Tensor with a SymmetricTensor of equal rank. The
+ * result is a general Tensor.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type>
+operator-(const Tensor<rank, dim, Number> &left,
+          const SymmetricTensor<rank, dim, OtherNumber> &right)
+{
+  return left - Tensor<rank, dim, OtherNumber>(right);
+}
+
+
+
+/**
+ * Compute the determinant of a tensor or rank 2. The determinant is also
+ * commonly referred to as the third invariant of rank-2 tensors.
+ *
+ * For a one-dimensional tensor, the determinant equals the only element and
+ * is therefore equivalent to the trace.
+ *
+ * For greater notational simplicity, there is also a <tt>third_invariant</tt>
+ * function that returns the determinant of a tensor.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+Number determinant (const SymmetricTensor<2,dim,Number> &t)
+{
+  switch (dim)
+    {
+    case 1:
+      return t.data[0];
+    case 2:
+      return (t.data[0] * t.data[1] - t.data[2]*t.data[2]);
+    case 3:
+      // in analogy to general tensors, but
+      // there's something to be simplified for
+      // the present case
+      return ( t.data[0]*t.data[1]*t.data[2]
+               -t.data[0]*t.data[5]*t.data[5]
+               -t.data[1]*t.data[4]*t.data[4]
+               -t.data[2]*t.data[3]*t.data[3]
+               +2*t.data[3]*t.data[4]*t.data[5] );
+    default:
+      Assert (false, ExcNotImplemented());
+      return 0;
+    }
+}
+
+
+
+/**
+ * Compute the determinant of a tensor or rank 2. This function therefore
+ * computes the same value as the <tt>determinant()</tt> functions and is only
+ * provided for greater notational simplicity (since there are also functions
+ * <tt>first_invariant</tt> and <tt>second_invariant</tt>).
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+double third_invariant (const SymmetricTensor<2,dim,Number> &t)
+{
+  return determinant (t);
+}
+
+
+
+/**
+ * Compute and return the trace of a tensor of rank 2, i.e. the sum of its
+ * diagonal entries. The trace is the first invariant of a rank-2 tensor.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+Number trace (const SymmetricTensor<2,dim,Number> &d)
+{
+  Number t = d.data[0];
+  for (unsigned int i=1; i<dim; ++i)
+    t += d.data[i];
+  return t;
+}
+
+
+/**
+ * Compute the trace of a tensor or rank 2. This function therefore computes
+ * the same value as the <tt>trace()</tt> functions and is only provided for
+ * greater notational simplicity (since there are also functions
+ * <tt>second_invariant</tt> and <tt>third_invariant</tt>).
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+Number first_invariant (const SymmetricTensor<2,dim,Number> &t)
+{
+  return trace (t);
+}
+
+
+/**
+ * Compute the second invariant of a tensor of rank 2. The second invariant is
+ * defined as <tt>I2 = 1/2[ (trace sigma)^2 - trace (sigma^2) ]</tt>.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005, 2010
+ */
+template <typename Number>
+inline
+Number second_invariant (const SymmetricTensor<2,1,Number> &)
+{
+  return 0;
+}
+
+
+
+/**
+ * Compute the second invariant of a tensor of rank 2. The second invariant is
+ * defined as <tt>I2 = 1/2[ (trace sigma)^2 - trace (sigma^2) ]</tt>.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005, 2010
+ */
+template <typename Number>
+inline
+Number second_invariant (const SymmetricTensor<2,2,Number> &t)
+{
+  return t[0][0]*t[1][1] - t[0][1]*t[0][1];
+}
+
+
+
+/**
+ * Compute the second invariant of a tensor of rank 2. The second invariant is
+ * defined as <tt>I2 = 1/2[ (trace sigma)^2 - trace (sigma^2) ]</tt>.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005, 2010
+ */
+template <typename Number>
+inline
+Number second_invariant (const SymmetricTensor<2,3,Number> &t)
+{
+  return (t[0][0]*t[1][1] + t[1][1]*t[2][2] + t[2][2]*t[0][0]
+          - t[0][1]*t[0][1] - t[0][2]*t[0][2] - t[1][2]*t[1][2]);
+}
+
+
+
+
+/**
+ * Return the transpose of the given symmetric tensor. Since we are working
+ * with symmetric objects, the transpose is of course the same as the original
+ * tensor. This function mainly exists for compatibility with the Tensor
+ * class.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>
+transpose (const SymmetricTensor<rank,dim,Number> &t)
+{
+  return t;
+}
+
+
+
+/**
+ * Compute the deviator of a symmetric tensor, which is defined as <tt>dev[s]
+ * = s - 1/dim*tr[s]*I</tt>, where <tt>I</tt> is the identity operator. This
+ * quantity equals the original tensor minus its contractive or dilative
+ * component and refers to the shear in, for example, elasticity.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+SymmetricTensor<2,dim,Number>
+deviator (const SymmetricTensor<2,dim,Number> &t)
+{
+  SymmetricTensor<2,dim,Number> tmp = t;
+
+  // subtract scaled trace from the diagonal
+  const Number tr = trace(t) / dim;
+  for (unsigned int i=0; i<dim; ++i)
+    tmp.data[i] -= tr;
+
+  return tmp;
+}
+
+
+
+/**
+ * Return a unit symmetric tensor of rank 2, i.e., the dim-by-dim identity
+ * matrix.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+SymmetricTensor<2,dim,Number>
+unit_symmetric_tensor ()
+{
+  // create a default constructed matrix filled with
+  // zeros, then set the diagonal elements to one
+  SymmetricTensor<2,dim,Number> tmp;
+  switch (dim)
+    {
+    case 1:
+      tmp.data[0] = 1;
+      break;
+    case 2:
+      tmp.data[0] = tmp.data[1] = 1;
+      break;
+    case 3:
+      tmp.data[0] = tmp.data[1] = tmp.data[2] = 1;
+      break;
+    default:
+      for (unsigned int d=0; d<dim; ++d)
+        tmp.data[d] = 1;
+    }
+  return tmp;
+}
+
+
+
+/**
+ * Return a unit symmetric tensor of rank 2, i.e., the dim-by-dim identity
+ * matrix. This specialization of the function uses <code>double</code> as the
+ * data type for the elements.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim>
+inline
+SymmetricTensor<2,dim>
+unit_symmetric_tensor ()
+{
+  return unit_symmetric_tensor<dim,double>();
+}
+
+
+
+/**
+ * Return the tensor of rank 4 that, when multiplied by a symmetric rank 2
+ * tensor <tt>t</tt> returns the deviator $\textrm{dev}\ t$. It is the
+ * operator representation of the linear deviator operator.
+ *
+ * For every tensor <tt>t</tt>, there holds the identity
+ * <tt>deviator(t)==deviator_tensor<dim>()*t</tt>, up to numerical
+ * round-off. The reason this operator representation is provided is that one
+ * sometimes needs to invert operators like <tt>identity_tensor<dim>() +
+ * delta_t*deviator_tensor<dim>()</tt> or similar.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+SymmetricTensor<4,dim,Number>
+deviator_tensor ()
+{
+  SymmetricTensor<4,dim,Number> tmp;
+
+  // fill the elements treating the diagonal
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      tmp.data[i][j] = (i==j ? 1 : 0) - 1./dim;
+
+  // then fill the ones that copy over the
+  // non-diagonal elements. note that during
+  // the double-contraction, we handle the
+  // off-diagonal elements twice, so simply
+  // copying requires a weight of 1/2
+  for (unsigned int i=dim;
+       i<internal::SymmetricTensorAccessors::StorageType<4,dim,Number>::n_rank2_components;
+       ++i)
+    tmp.data[i][i] = 0.5;
+
+  return tmp;
+}
+
+
+
+/**
+ * Return the tensor of rank 4 that, when multiplied by a symmetric rank 2
+ * tensor <tt>t</tt> returns the deviator <tt>dev t</tt>. It is the operator
+ * representation of the linear deviator operator.
+ *
+ * For every tensor <tt>t</tt>, there holds the identity
+ * <tt>deviator(t)==deviator_tensor<dim>()*t</tt>, up to numerical
+ * round-off. The reason this operator representation is provided is that one
+ * sometimes needs to invert operators like <tt>identity_tensor<dim>() +
+ * delta_t*deviator_tensor<dim>()</tt> or similar.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim>
+inline
+SymmetricTensor<4,dim>
+deviator_tensor ()
+{
+  return deviator_tensor<dim,double>();
+}
+
+
+
+/**
+ * Returns the fourth-order symmetric identity tensor which maps symmetric
+ * second-order tensors to themselves.
+ *
+ * Note that this tensor, even though it is the identity, has a somewhat funny
+ * form, and in particular does not only consist of zeros and ones. For
+ * example, for <tt>dim=2</tt>, the identity tensor has all zero entries
+ * except for <tt>id[0][0][0][0]=id[1][1][1][1]=1</tt> and
+ * <tt>id[0][1][0][1]=id[0][1][1][0]=id[1][0][0][1]=id[1][0][1][0]=1/2</tt>.
+ * To see why this factor of 1/2 is necessary, consider computing <tt>A=Id :
+ * B</tt>. For the element <tt>a_01</tt> we have <tt>a_01=id_0100 b_00 +
+ * id_0111 b_11 + id_0101 b_01 + id_0110 b_10</tt>. On the other hand, we need
+ * to have <tt>a_01=b_01</tt>, and symmetry implies <tt>b_01=b_10</tt>,
+ * leading to <tt>a_01=(id_0101+id_0110) b_01</tt>, or, again by symmetry,
+ * <tt>id_0101=id_0110=1/2</tt>. Similar considerations hold for the three-
+ * dimensional case.
+ *
+ * This issue is also explained in the introduction to step-44.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+SymmetricTensor<4,dim,Number>
+identity_tensor ()
+{
+  SymmetricTensor<4,dim,Number> tmp;
+
+  // fill the elements treating the diagonal
+  for (unsigned int i=0; i<dim; ++i)
+    tmp.data[i][i] = 1;
+
+  // then fill the ones that copy over the
+  // non-diagonal elements. note that during
+  // the double-contraction, we handle the
+  // off-diagonal elements twice, so simply
+  // copying requires a weight of 1/2
+  for (unsigned int i=dim;
+       i<internal::SymmetricTensorAccessors::StorageType<4,dim,Number>::n_rank2_components;
+       ++i)
+    tmp.data[i][i] = 0.5;
+
+  return tmp;
+}
+
+
+
+/**
+ * Return the tensor of rank 4 that, when multiplied by a symmetric rank 2
+ * tensor <tt>t</tt> returns the deviator <tt>dev t</tt>. It is the operator
+ * representation of the linear deviator operator.
+ *
+ * Note that this tensor, even though it is the identity, has a somewhat funny
+ * form, and in particular does not only consist of zeros and ones. For
+ * example, for <tt>dim=2</tt>, the identity tensor has all zero entries
+ * except for <tt>id[0][0][0][0]=id[1][1][1][1]=1</tt> and
+ * <tt>id[0][1][0][1]=id[0][1][1][0]=id[1][0][0][1]=id[1][0][1][0]=1/2</tt>.
+ * To see why this factor of 1/2 is necessary, consider computing <tt>A=Id .
+ * B</tt>. For the element <tt>a_01</tt> we have <tt>a_01=id_0100 b_00 +
+ * id_0111 b_11 + id_0101 b_01 + id_0110 b_10</tt>. On the other hand, we need
+ * to have <tt>a_01=b_01</tt>, and symmetry implies <tt>b_01=b_10</tt>,
+ * leading to <tt>a_01=(id_0101+id_0110) b_01</tt>, or, again by symmetry,
+ * <tt>id_0101=id_0110=1/2</tt>. Similar considerations hold for the three-
+ * dimensional case.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim>
+inline
+SymmetricTensor<4,dim>
+identity_tensor ()
+{
+  return identity_tensor<dim,double>();
+}
+
+
+
+/**
+ * Invert a symmetric rank-4 tensor. Since symmetric rank-4 tensors are
+ * mappings from and to symmetric rank-2 tensors, they can have an inverse.
+ * This function computes it, if it exists, for the case that the dimension
+ * equals either 1 or 2.
+ *
+ * If a tensor is not invertible, then the result is unspecified, but will
+ * likely contain the results of a division by zero or a very small number at
+ * the very least.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+SymmetricTensor<4,dim,Number>
+invert (const SymmetricTensor<4,dim,Number> &t)
+{
+  SymmetricTensor<4,dim,Number> tmp;
+  switch (dim)
+    {
+    case 1:
+      tmp.data[0][0] = 1./t.data[0][0];
+      break;
+    case 2:
+
+      // inverting this tensor is a little more
+      // complicated than necessary, since we
+      // store the data of 't' as a 3x3 matrix
+      // t.data, but the product between a rank-4
+      // and a rank-2 tensor is really not the
+      // product between this matrix and the
+      // 3-vector of a rhs, but rather
+      //
+      // B.vec = t.data * mult * A.vec
+      //
+      // where mult is a 3x3 matrix with
+      // entries [[1,0,0],[0,1,0],[0,0,2]] to
+      // capture the fact that we need to add up
+      // both the c_ij12*a_12 and the c_ij21*a_21
+      // terms
+      //
+      // in addition, in this scheme, the
+      // identity tensor has the matrix
+      // representation mult^-1.
+      //
+      // the inverse of 't' therefore has the
+      // matrix representation
+      //
+      // inv.data = mult^-1 * t.data^-1 * mult^-1
+      //
+      // in order to compute it, let's first
+      // compute the inverse of t.data and put it
+      // into tmp.data; at the end of the
+      // function we then scale the last row and
+      // column of the inverse by 1/2,
+      // corresponding to the left and right
+      // multiplication with mult^-1
+    {
+      const Number t4 = t.data[0][0]*t.data[1][1],
+                   t6 = t.data[0][0]*t.data[1][2],
+                   t8 = t.data[0][1]*t.data[1][0],
+                   t00 = t.data[0][2]*t.data[1][0],
+                   t01 = t.data[0][1]*t.data[2][0],
+                   t04 = t.data[0][2]*t.data[2][0],
+                   t07 = 1.0/(t4*t.data[2][2]-t6*t.data[2][1]-
+                              t8*t.data[2][2]+t00*t.data[2][1]+
+                              t01*t.data[1][2]-t04*t.data[1][1]);
+      tmp.data[0][0] = (t.data[1][1]*t.data[2][2]-t.data[1][2]*t.data[2][1])*t07;
+      tmp.data[0][1] = -(t.data[0][1]*t.data[2][2]-t.data[0][2]*t.data[2][1])*t07;
+      tmp.data[0][2] = -(-t.data[0][1]*t.data[1][2]+t.data[0][2]*t.data[1][1])*t07;
+      tmp.data[1][0] = -(t.data[1][0]*t.data[2][2]-t.data[1][2]*t.data[2][0])*t07;
+      tmp.data[1][1] = (t.data[0][0]*t.data[2][2]-t04)*t07;
+      tmp.data[1][2] = -(t6-t00)*t07;
+      tmp.data[2][0] = -(-t.data[1][0]*t.data[2][1]+t.data[1][1]*t.data[2][0])*t07;
+      tmp.data[2][1] = -(t.data[0][0]*t.data[2][1]-t01)*t07;
+      tmp.data[2][2] = (t4-t8)*t07;
+
+      // scale last row and column as mentioned
+      // above
+      tmp.data[2][0] /= 2;
+      tmp.data[2][1] /= 2;
+      tmp.data[0][2] /= 2;
+      tmp.data[1][2] /= 2;
+      tmp.data[2][2] /= 4;
+    }
+    break;
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return tmp;
+}
+
+
+
+/**
+ * Invert a symmetric rank-4 tensor. Since symmetric rank-4 tensors are
+ * mappings from and to symmetric rank-2 tensors, they can have an inverse.
+ * This function computes it, if it exists, for the case that the dimension
+ * equals 3.
+ *
+ * If a tensor is not invertible, then the result is unspecified, but will
+ * likely contain the results of a division by zero or a very small number at
+ * the very least.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <>
+SymmetricTensor<4,3,double>
+invert (const SymmetricTensor<4,3,double> &t);
+// this function is implemented in the .cc file for double data types
+
+
+
+/**
+ * Return the tensor of rank 4 that is the outer product of the two tensors
+ * given as arguments, i.e. the result $T=t1 \otimes t2$ satisfies <tt>T phi =
+ * t1 (t2 : phi)</tt> for all symmetric tensors <tt>phi</tt>.
+ *
+ * For example, the deviator tensor can be computed as
+ * <tt>identity_tensor<dim,Number>() -
+ * 1/d*outer_product(unit_symmetric_tensor<dim,Number>(),
+ * unit_symmetric_tensor<dim,Number>())</tt>, since the (double) contraction
+ * with the unit tensor yields the trace of a symmetric tensor.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+inline
+SymmetricTensor<4,dim,Number>
+outer_product (const SymmetricTensor<2,dim,Number> &t1,
+               const SymmetricTensor<2,dim,Number> &t2)
+{
+  SymmetricTensor<4,dim,Number> tmp;
+
+  // fill only the elements really needed
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=i; j<dim; ++j)
+      for (unsigned int k=0; k<dim; ++k)
+        for (unsigned int l=k; l<dim; ++l)
+          tmp[i][j][k][l] = t1[i][j] * t2[k][l];
+
+  return tmp;
+}
+
+
+
+/**
+ * Return the symmetrized version of a full rank-2 tensor, i.e.
+ * (t+transpose(t))/2, as a symmetric rank-2 tensor. This is the version for
+ * general dimensions.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim,typename Number>
+inline
+SymmetricTensor<2,dim,Number>
+symmetrize (const Tensor<2,dim,Number> &t)
+{
+  Number array[(dim*dim+dim)/2];
+  for (unsigned int d=0; d<dim; ++d)
+    array[d] = t[d][d];
+  for (unsigned int d=0, c=0; d<dim; ++d)
+    for (unsigned int e=d+1; e<dim; ++e, ++c)
+      array[dim+c] = (t[d][e]+t[e][d])*0.5;
+  return SymmetricTensor<2,dim,Number>(array);
+}
+
+
+
+/**
+ * Multiplication of a symmetric tensor of general rank with a scalar from the
+ * right. This version of the operator is used if the scalar has the same data
+ * type as is used to store the elements of the symmetric tensor.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>
+operator * (const SymmetricTensor<rank,dim,Number> &t,
+            const Number                            factor)
+{
+  SymmetricTensor<rank,dim,Number> tt = t;
+  tt *= factor;
+  return tt;
+}
+
+
+
+/**
+ * Multiplication of a symmetric tensor of general rank with a scalar from the
+ * left. This version of the operator is used if the scalar has the same data
+ * type as is used to store the elements of the symmetric tensor.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>
+operator * (const Number                            factor,
+            const SymmetricTensor<rank,dim,Number> &t)
+{
+  // simply forward to the other operator
+  return t*factor;
+}
+
+
+#ifndef DEAL_II_WITH_CXX11
+
+template <typename T, typename U, int rank, int dim>
+struct ProductType<T,SymmetricTensor<rank,dim,U> >
+{
+  typedef SymmetricTensor<rank,dim,typename ProductType<T,U>::type> type;
+};
+
+template <typename T, typename U, int rank, int dim>
+struct ProductType<SymmetricTensor<rank,dim,T>,U>
+{
+  typedef SymmetricTensor<rank,dim,typename ProductType<T,U>::type> type;
+};
+
+#endif
+
+
+
+/**
+ * Multiplication of a symmetric tensor with a scalar number from the right.
+ *
+ * The purpose of this operator is to enable only multiplication of a tensor
+ * by a scalar number (i.e., a floating point number, a complex floating point
+ * number, etc.). The function is written in a way that only allows the
+ * compiler to consider the function if the second argument is indeed a scalar
+ * number -- in other words, @p OtherNumber will not match, for example
+ * <code>std::vector@<double@></code> as the product of a tensor and a vector
+ * clearly would make no sense. The mechanism by which the compiler is
+ * prohibited of considering this operator for multiplication with non-scalar
+ * types are explained in the documentation of the EnableIfScalar class.
+ *
+ * The return type of the function is chosen so that it matches the types of
+ * both the tensor and the scalar argument. For example, if you multiply a
+ * <code>SymmetricTensor@<2,dim,double@></code> by
+ * <code>std::complex@<double@></code>, then the result will be a
+ * <code>SymmetricTensor@<2,dim,std::complex@<double@>@></code>. In other
+ * words, the type with which the returned tensor stores its components equals
+ * the type you would get if you multiplied an individual component of the
+ * input tensor by the scalar factor.
+ *
+ * @relates SymmetricTensor
+ * @relates EnableIfScalar
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+SymmetricTensor<rank,dim,typename ProductType<Number,typename EnableIfScalar<OtherNumber>::type>::type>
+operator * (const SymmetricTensor<rank,dim,Number> &t,
+            const OtherNumber                    factor)
+{
+  // form the product. we have to convert the two factors into the final
+  // type via explicit casts because, for awkward reasons, the C++
+  // standard committee saw it fit to not define an
+  //   operator*(float,std::complex<double>)
+  // (as well as with switched arguments and double<->float).
+  typedef typename ProductType<Number,OtherNumber>::type product_type;
+  SymmetricTensor<rank,dim,product_type> tt(t);
+  tt *= product_type(factor);
+  return tt;
+}
+
+
+
+/**
+ * Multiplication of a symmetric tensor with a scalar number from the left.
+ * See the discussion with the operator with switched arguments for more
+ * information about template arguments and the return type.
+ *
+ * @relates SymmetricTensor
+ * @relates EnableIfScalar
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+SymmetricTensor<rank,dim,typename ProductType<Number,typename EnableIfScalar<OtherNumber>::type>::type>
+operator * (const Number                     factor,
+            const SymmetricTensor<rank,dim,OtherNumber> &t)
+{
+  // simply forward to the other operator with switched arguments
+  return (t*factor);
+}
+
+
+
+/**
+ * Division of a symmetric tensor of general rank by a scalar.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim, typename Number>
+inline
+SymmetricTensor<rank,dim,Number>
+operator / (const SymmetricTensor<rank,dim,Number> &t,
+            const Number                            factor)
+{
+  SymmetricTensor<rank,dim,Number> tt = t;
+  tt /= factor;
+  return tt;
+}
+
+
+
+/**
+ * Multiplication of a symmetric tensor of general rank with a scalar from the
+ * right.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim>
+inline
+SymmetricTensor<rank,dim>
+operator * (const SymmetricTensor<rank,dim> &t,
+            const double                     factor)
+{
+  SymmetricTensor<rank,dim> tt = t;
+  tt *= factor;
+  return tt;
+}
+
+
+
+/**
+ * Multiplication of a symmetric tensor of general rank with a scalar from the
+ * left.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim>
+inline
+SymmetricTensor<rank,dim>
+operator * (const double                     factor,
+            const SymmetricTensor<rank,dim> &t)
+{
+  SymmetricTensor<rank,dim> tt = t;
+  tt *= factor;
+  return tt;
+}
+
+
+
+/**
+ * Division of a symmetric tensor of general rank by a scalar.
+ *
+ * @relates SymmetricTensor
+ */
+template <int rank, int dim>
+inline
+SymmetricTensor<rank,dim>
+operator / (const SymmetricTensor<rank,dim> &t,
+            const double                     factor)
+{
+  SymmetricTensor<rank,dim> tt = t;
+  tt /= factor;
+  return tt;
+}
+
+/**
+ * Compute the scalar product $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two
+ * tensors $a,b$ of rank 2. In the current case where both arguments are
+ * symmetric tensors, this is equivalent to calling the expression
+ * <code>t1*t2</code> which uses the overloaded <code>operator*</code> between
+ * two symmetric tensors of rank 2.
+ *
+ * @relates SymmetricTensor
+ */
+template <int dim, typename Number>
+inline
+Number
+scalar_product (const SymmetricTensor<2,dim,Number> &t1,
+                const SymmetricTensor<2,dim,Number> &t2)
+{
+  return (t1*t2);
+}
+
+
+/**
+ * Compute the scalar product $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two
+ * tensors $a,b$ of rank 2. We don't use <code>operator*</code> for this
+ * operation since the product between two tensors is usually assumed to be
+ * the contraction over the last index of the first tensor and the first index
+ * of the second tensor, for example $(a\cdot b)_{ij}=\sum_k a_{ik}b_{kj}$.
+ *
+ * @relates Tensor @relates SymmetricTensor
+ */
+template <int dim, typename Number>
+inline
+Number
+scalar_product (const SymmetricTensor<2,dim,Number> &t1,
+                const Tensor<2,dim,Number> &t2)
+{
+  Number s = 0;
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      s += t1[i][j] * t2[i][j];
+  return s;
+}
+
+
+/**
+ * Compute the scalar product $a:b=\sum_{i,j} a_{ij}b_{ij}$ between two
+ * tensors $a,b$ of rank 2. We don't use <code>operator*</code> for this
+ * operation since the product between two tensors is usually assumed to be
+ * the contraction over the last index of the first tensor and the first index
+ * of the second tensor, for example $(a\cdot b)_{ij}=\sum_k a_{ik}b_{kj}$.
+ *
+ * @relates Tensor @relates SymmetricTensor
+ */
+template <int dim, typename Number>
+inline
+Number
+scalar_product (const Tensor<2,dim,Number> &t1,
+                const SymmetricTensor<2,dim,Number> &t2)
+{
+  return scalar_product(t2, t1);
+}
+
+
+/**
+ * Double contraction between a rank-4 and a rank-2 symmetric tensor,
+ * resulting in the symmetric tensor of rank 2 that is given as first argument
+ * to this function. This operation is the symmetric tensor analogon of a
+ * matrix-vector multiplication.
+ *
+ * This function does the same as the member operator* of the SymmetricTensor
+ * class. It should not be used, however, since the member operator has
+ * knowledge of the actual data storage format and is at least 2 orders of
+ * magnitude faster. This function mostly exists for compatibility purposes
+ * with the general tensor class.
+ *
+ * @related SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <typename Number>
+inline
+void
+double_contract (SymmetricTensor<2,1,Number> &tmp,
+                 const SymmetricTensor<4,1,Number> &t,
+                 const SymmetricTensor<2,1,Number> &s)
+{
+  tmp[0][0] = t[0][0][0][0] * s[0][0];
+}
+
+
+
+/**
+ * Double contraction between a rank-4 and a rank-2 symmetric tensor,
+ * resulting in the symmetric tensor of rank 2 that is given as first argument
+ * to this function. This operation is the symmetric tensor analogon of a
+ * matrix-vector multiplication.
+ *
+ * This function does the same as the member operator* of the SymmetricTensor
+ * class. It should not be used, however, since the member operator has
+ * knowledge of the actual data storage format and is at least 2 orders of
+ * magnitude faster. This function mostly exists for compatibility purposes
+ * with the general tensor class.
+ *
+ * @related SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <typename Number>
+inline
+void
+double_contract (SymmetricTensor<2,1,Number> &tmp,
+                 const SymmetricTensor<2,1,Number> &s,
+                 const SymmetricTensor<4,1,Number> &t)
+{
+  tmp[0][0] = t[0][0][0][0] * s[0][0];
+}
+
+
+
+/**
+ * Double contraction between a rank-4 and a rank-2 symmetric tensor,
+ * resulting in the symmetric tensor of rank 2 that is given as first argument
+ * to this function. This operation is the symmetric tensor analogon of a
+ * matrix-vector multiplication.
+ *
+ * This function does the same as the member operator* of the SymmetricTensor
+ * class. It should not be used, however, since the member operator has
+ * knowledge of the actual data storage format and is at least 2 orders of
+ * magnitude faster. This function mostly exists for compatibility purposes
+ * with the general tensor class.
+ *
+ * @related SymmetricTensor @author Wolfgang Bangerth, 2005
+ */
+template <typename Number>
+inline
+void
+double_contract (SymmetricTensor<2,2,Number> &tmp,
+                 const SymmetricTensor<4,2,Number> &t,
+                 const SymmetricTensor<2,2,Number> &s)
+{
+  const unsigned int dim = 2;
+
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=i; j<dim; ++j)
+      tmp[i][j] = t[i][j][0][0] * s[0][0] +
+                  t[i][j][1][1] * s[1][1] +
+                  2 * t[i][j][0][1] * s[0][1];
+}
+
+
+
+/**
+ * Double contraction between a rank-4 and a rank-2 symmetric tensor,
+ * resulting in the symmetric tensor of rank 2 that is given as first argument
+ * to this function. This operation is the symmetric tensor analogon of a
+ * matrix-vector multiplication.
+ *
+ * This function does the same as the member operator* of the SymmetricTensor
+ * class. It should not be used, however, since the member operator has
+ * knowledge of the actual data storage format and is at least 2 orders of
+ * magnitude faster. This function mostly exists for compatibility purposes
+ * with the general tensor class.
+ *
+ * @related SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <typename Number>
+inline
+void
+double_contract (SymmetricTensor<2,2,Number> &tmp,
+                 const SymmetricTensor<2,2,Number> &s,
+                 const SymmetricTensor<4,2,Number> &t)
+{
+  const unsigned int dim = 2;
+
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=i; j<dim; ++j)
+      tmp[i][j] = s[0][0] * t[0][0][i][j] * +
+                  s[1][1] * t[1][1][i][j] +
+                  2 * s[0][1] * t[0][1][i][j];
+}
+
+
+
+/**
+ * Double contraction between a rank-4 and a rank-2 symmetric tensor,
+ * resulting in the symmetric tensor of rank 2 that is given as first argument
+ * to this function. This operation is the symmetric tensor analogon of a
+ * matrix-vector multiplication.
+ *
+ * This function does the same as the member operator* of the SymmetricTensor
+ * class. It should not be used, however, since the member operator has
+ * knowledge of the actual data storage format and is at least 2 orders of
+ * magnitude faster. This function mostly exists for compatibility purposes
+ * with the general tensor class.
+ *
+ * @related SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <typename Number>
+inline
+void
+double_contract (SymmetricTensor<2,3,Number> &tmp,
+                 const SymmetricTensor<4,3,Number> &t,
+                 const SymmetricTensor<2,3,Number> &s)
+{
+  const unsigned int dim = 3;
+
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=i; j<dim; ++j)
+      tmp[i][j] = t[i][j][0][0] * s[0][0] +
+                  t[i][j][1][1] * s[1][1] +
+                  t[i][j][2][2] * s[2][2] +
+                  2 * t[i][j][0][1] * s[0][1] +
+                  2 * t[i][j][0][2] * s[0][2] +
+                  2 * t[i][j][1][2] * s[1][2];
+}
+
+
+
+/**
+ * Double contraction between a rank-4 and a rank-2 symmetric tensor,
+ * resulting in the symmetric tensor of rank 2 that is given as first argument
+ * to this function. This operation is the symmetric tensor analogon of a
+ * matrix-vector multiplication.
+ *
+ * This function does the same as the member operator* of the SymmetricTensor
+ * class. It should not be used, however, since the member operator has
+ * knowledge of the actual data storage format and is at least 2 orders of
+ * magnitude faster. This function mostly exists for compatibility purposes
+ * with the general tensor class.
+ *
+ * @related SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <typename Number>
+inline
+void
+double_contract (SymmetricTensor<2,3,Number> &tmp,
+                 const SymmetricTensor<2,3,Number> &s,
+                 const SymmetricTensor<4,3,Number> &t)
+{
+  const unsigned int dim = 3;
+
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=i; j<dim; ++j)
+      tmp[i][j] = s[0][0] * t[0][0][i][j] +
+                  s[1][1] * t[1][1][i][j] +
+                  s[2][2] * t[2][2][i][j] +
+                  2 * s[0][1] * t[0][1][i][j] +
+                  2 * s[0][2] * t[0][2][i][j] +
+                  2 * s[1][2] * t[1][2][i][j];
+}
+
+
+
+/**
+ * Multiplication operator performing a contraction of the last index of the
+ * first argument and the first index of the second argument. This function
+ * therefore does the same as the corresponding <tt>contract</tt> function,
+ * but returns the result as a return value, rather than writing it into the
+ * reference given as the first argument to the <tt>contract</tt> function.
+ *
+ * Note that for the <tt>Tensor</tt> class, the multiplication operator only
+ * performs a contraction over a single pair of indices. This is in contrast
+ * to the multiplication operator for symmetric tensors, which does the double
+ * contraction.
+ *
+ * @relates SymmetricTensor
+ * @author Wolfgang Bangerth, 2005
+ */
+template <int dim, typename Number>
+Tensor<1,dim,Number>
+operator * (const SymmetricTensor<2,dim,Number> &src1,
+            const Tensor<1,dim,Number> &src2)
+{
+  Tensor<1,dim,Number> dest;
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      dest[i] += src1[i][j] * src2[j];
+  return dest;
+}
+
+
+/**
+ * Output operator for symmetric tensors of rank 2. Print the elements
+ * consecutively, with a space in between, two spaces between rank 1
+ * subtensors, three between rank 2 and so on. No special amends are made to
+ * represents the symmetry in the output, for example by outputting only the
+ * unique entries.
+ *
+ * @relates SymmetricTensor
+ */
+template <int dim, typename Number>
+inline
+std::ostream &operator << (std::ostream &out,
+                           const SymmetricTensor<2,dim,Number> &t)
+{
+  //make out lives a bit simpler by outputing
+  //the tensor through the operator for the
+  //general Tensor class
+  Tensor<2,dim,Number> tt;
+
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      tt[i][j] = t[i][j];
+
+  return out << tt;
+}
+
+
+
+/**
+ * Output operator for symmetric tensors of rank 4. Print the elements
+ * consecutively, with a space in between, two spaces between rank 1
+ * subtensors, three between rank 2 and so on. No special amends are made to
+ * represents the symmetry in the output, for example by outputting only the
+ * unique entries.
+ *
+ * @relates SymmetricTensor
+ */
+template <int dim, typename Number>
+inline
+std::ostream &operator << (std::ostream &out,
+                           const SymmetricTensor<4,dim,Number> &t)
+{
+  //make out lives a bit simpler by outputing
+  //the tensor through the operator for the
+  //general Tensor class
+  Tensor<4,dim,Number> tt;
+
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      for (unsigned int k=0; k<dim; ++k)
+        for (unsigned int l=0; l<dim; ++l)
+          tt[i][j][k][l] = t[i][j][k][l];
+
+  return out << tt;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/synchronous_iterator.h b/include/deal.II/base/synchronous_iterator.h
new file mode 100644
index 0000000..05ec5a2
--- /dev/null
+++ b/include/deal.II/base/synchronous_iterator.h
@@ -0,0 +1,272 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__synchronous_iterator_h
+#define dealii__synchronous_iterator_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <deal.II/base/std_cxx11/tuple.h>
+
+#include <iterator>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A class that represents a set of iterators each of which are incremented by
+ * one at the same time. This is typically used in calls like
+ * <code>std::transform(a.begin(), a.end(), b.begin(), functor);</code> where
+ * we have synchronous iterators marching through the containers
+ * <code>a,b</code>. If an object of this type represents the end of a range,
+ * only the first element is considered (we only have <code>a.end()</code>,
+ * not <code>b.end()</code>)
+ *
+ * The template argument of the current class shall be of type
+ * <code>std_cxx11::tuple</code> with arguments equal to the iterator types.
+ *
+ * The individual iterators can be accessed using
+ * <code>std_cxx11::get<X>(synchronous_iterator.iterators)</code> where X is
+ * the number corresponding to the desired iterator.
+ *
+ * This type, and the helper functions associated with it, are used as the
+ * Value concept for the blocked_range type of the Threading Building Blocks.
+ *
+ * @author Wolfgang Bangerth, 2008
+ */
+template <typename Iterators>
+struct SynchronousIterators
+{
+  /**
+   * Constructor.
+   */
+  SynchronousIterators (const Iterators &i);
+
+  /**
+   * Copy constructor.
+   */
+  SynchronousIterators (const SynchronousIterators &i);
+
+  /**
+   * Storage for the iterators represented by the current class.
+   */
+  Iterators iterators;
+};
+
+
+
+template <typename Iterators>
+inline
+SynchronousIterators<Iterators>::
+SynchronousIterators (const Iterators &i)
+  :
+  iterators (i)
+{}
+
+
+template <typename Iterators>
+inline
+SynchronousIterators<Iterators>::
+SynchronousIterators (const SynchronousIterators &i)
+  :
+  iterators (i.iterators)
+{}
+
+
+
+/**
+ * Return whether the first element of the first argument is less than the
+ * first element of the second argument. Since the objects compared march
+ * forward all elements at the same time, comparing the first element is
+ * sufficient.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename Iterators>
+inline
+bool
+operator< (const SynchronousIterators<Iterators> &a,
+           const SynchronousIterators<Iterators> &b)
+{
+  return std_cxx11::get<0>(a.iterators) < std_cxx11::get<0>(b.iterators);
+}
+
+
+
+/**
+ * Return the distance between the first and the second argument. Since the
+ * objects compared march forward all elements at the same time, differencing
+ * the first element is sufficient.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename Iterators>
+inline
+std::size_t
+operator- (const SynchronousIterators<Iterators> &a,
+           const SynchronousIterators<Iterators> &b)
+{
+  Assert (std::distance (std_cxx11::get<0>(b.iterators),
+                         std_cxx11::get<0>(a.iterators)) >= 0,
+          ExcInternalError());
+  return std::distance (std_cxx11::get<0>(b.iterators),
+                        std_cxx11::get<0>(a.iterators));
+}
+
+
+/**
+ * Advance a tuple of iterators by $n$.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename I1, typename I2>
+inline
+void advance (std_cxx11::tuple<I1,I2> &t,
+              const unsigned int       n)
+{
+  std::advance (std_cxx11::get<0>(t), n);
+  std::advance (std_cxx11::get<1>(t), n);
+}
+
+/**
+ * Advance a tuple of iterators by $n$.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename I1, typename I2, typename I3>
+inline
+void advance (std_cxx11::tuple<I1,I2,I3> &t,
+              const unsigned int          n)
+{
+  std::advance (std_cxx11::get<0>(t), n);
+  std::advance (std_cxx11::get<1>(t), n);
+  std::advance (std_cxx11::get<2>(t), n);
+}
+
+/**
+ * Advance a tuple of iterators by $n$.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename I1, typename I2,
+          typename I3, typename I4>
+inline
+void advance (std_cxx11::tuple<I1,I2,I3, I4> &t,
+              const unsigned int              n)
+{
+  std::advance (std_cxx11::get<0>(t), n);
+  std::advance (std_cxx11::get<1>(t), n);
+  std::advance (std_cxx11::get<2>(t), n);
+  std::advance (std_cxx11::get<3>(t), n);
+}
+
+
+
+/**
+ * Advance a tuple of iterators by 1.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename I1, typename I2>
+inline
+void advance_by_one (std_cxx11::tuple<I1,I2> &t)
+{
+  ++std_cxx11::get<0>(t);
+  ++std_cxx11::get<1>(t);
+}
+
+/**
+ * Advance a tuple of iterators by 1.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename I1, typename I2, typename I3>
+inline
+void advance_by_one (std_cxx11::tuple<I1,I2,I3> &t)
+{
+  ++std_cxx11::get<0>(t);
+  ++std_cxx11::get<1>(t);
+  ++std_cxx11::get<2>(t);
+}
+
+/**
+ * Advance a tuple of iterators by 1.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename I1, typename I2,
+          typename I3, typename I4>
+inline
+void advance_by_one (std_cxx11::tuple<I1,I2,I3,I4> &t)
+{
+  ++std_cxx11::get<0>(t);
+  ++std_cxx11::get<1>(t);
+  ++std_cxx11::get<2>(t);
+  ++std_cxx11::get<3>(t);
+}
+
+
+
+/**
+ * Advance the elements of this iterator by $n$.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename Iterators>
+inline
+SynchronousIterators<Iterators>
+operator + (const SynchronousIterators<Iterators> &a,
+            const std::size_t                      n)
+{
+  SynchronousIterators<Iterators> x (a);
+  dealii::advance (x.iterators, n);
+  return x;
+}
+
+/**
+ * Advance the elements of this iterator by 1.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename Iterators>
+inline
+SynchronousIterators<Iterators>
+operator ++ (SynchronousIterators<Iterators> &a)
+{
+  dealii::advance_by_one (a.iterators);
+  return a;
+}
+
+
+/**
+ * Compare synch iterators for inequality. Since they march in synch,
+ * comparing only the first element is sufficient.
+ *
+ * @relates SynchronousIterators
+ */
+template <typename Iterators>
+inline
+bool
+operator != (const SynchronousIterators<Iterators> &a,
+             const SynchronousIterators<Iterators> &b)
+{
+  return (std_cxx11::get<0>(a.iterators) !=
+          std_cxx11::get<0>(b.iterators));
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/table.h b/include/deal.II/base/table.h
new file mode 100644
index 0000000..f7ea7c3
--- /dev/null
+++ b/include/deal.II/base/table.h
@@ -0,0 +1,3187 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__table_h
+#define dealii__table_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/table_indices.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/aligned_vector.h>
+
+#include <cstddef>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declaration
+template <int N, typename T> class TableBase;
+template <int N, typename T> class Table;
+template <typename T> class Table<1,T>;
+template <typename T> class Table<2,T>;
+template <typename T> class Table<3,T>;
+template <typename T> class Table<4,T>;
+template <typename T> class Table<5,T>;
+template <typename T> class Table<6,T>;
+
+
+
+namespace internal
+{
+
+  /**
+   * @internal Have a namespace in which we declare some classes that are used
+   * to access the elements of tables using the <tt>operator[]</tt>. These are
+   * quite technical, since they have to do their work recursively (due to the
+   * fact that the number of indices is not known, we have to return an
+   * iterator into the next lower dimension object if we access one object,
+   * until we are on the lowest level and can actually return a reference to
+   * the stored data type itself).  This is so technical that you will not
+   * usually want to look at these classes at all, except possibly for
+   * educational reasons.  None of the classes herein has a interface that you
+   * should use explicitly in your programs (except, of course, through access
+   * to the elements of tables with <tt>operator[]</tt>, which generates
+   * temporary objects of the types of this namespace).
+   *
+   * @author Wolfgang Bangerth, 2002
+   */
+  namespace TableBaseAccessors
+  {
+    /**
+     * @internal Have a class which declares some nested typedefs, depending
+     * on its template parameters. The general template declares nothing, but
+     * there are more useful specializations regarding the last parameter
+     * indicating constness of the table for which accessor objects are to be
+     * generated in this namespace.
+     */
+    template <int N, typename T, bool Constness>
+    struct Types
+    {};
+
+    /**
+     * @internal Have a class which declares some nested typedefs, depending
+     * on its template parameters. Specialization for accessors to constant
+     * objects.
+     */
+    template <int N, typename T> struct Types<N,T,true>
+    {
+      typedef const T value_type;
+      typedef const TableBase<N,T> TableType;
+
+      typedef typename AlignedVector<T>::const_iterator iterator;
+      typedef typename AlignedVector<T>::const_iterator const_iterator;
+
+      typedef typename AlignedVector<T>::const_reference reference;
+      typedef typename AlignedVector<T>::const_reference const_reference;
+    };
+
+    /**
+     * @internal Have a class which declares some nested typedefs, depending
+     * on its template parameters. Specialization for accessors to non-
+     * constant objects.
+     */
+    template <int N, typename T> struct Types<N,T,false>
+    {
+      typedef T value_type;
+      typedef TableBase<N,T> TableType;
+
+      typedef typename AlignedVector<T>::iterator iterator;
+      typedef typename AlignedVector<T>::const_iterator const_iterator;
+
+      typedef typename AlignedVector<T>::reference reference;
+      typedef typename AlignedVector<T>::const_reference const_reference;
+    };
+
+
+    /**
+     * @internal Class that acts as accessor to subobjects of tables of type
+     * <tt>Table<N,T></tt>. The template parameter <tt>C</tt> may be either
+     * true or false, and indicates whether the objects worked on are constant
+     * or not (i.e. write access is only allowed if the value is false).
+     *
+     * Since with <tt>N</tt> indices, the effect of applying
+     * <tt>operator[]</tt> is getting access to something we <tt>N-1</tt>
+     * indices, we have to implement these accessor classes recursively, with
+     * stopping when we have only one index left. For the latter case, a
+     * specialization of this class is declared below, where calling
+     * <tt>operator[]</tt> gives you access to the objects actually stored by
+     * the table. In the value given to the index operator needs to be checked
+     * whether it is inside its bounds, for which we need to know which index
+     * of the table we are actually accessing presently. This is done through
+     * the template parameter <tt>P</tt>: it indicates, how many remaining
+     * indices there are. For a vector, <tt>P</tt> may only be one (and then
+     * the specialization below is used). For a table this value may be two,
+     * and when using <tt>operator[]</tt>, an object with <tt>P=1</tt>
+     * emerges.
+     *
+     * The value of <tt>P</tt> is also used to determine the stride: this
+     * object stores a pointer indicating the beginning of the range of
+     * objects that it may access. When we apply <tt>operator[]</tt> on this
+     * object, the resulting new accessor may only access a subset of these
+     * elements, and to know which subset we need to know the dimensions of
+     * the table and the present index, which is indicated by <tt>P</tt>.
+     *
+     * As stated for the entire namespace, you will not usually have to do
+     * with these classes directly, and should not try to use their interface
+     * directly as it may change without notice. In fact, since the
+     * constructors are made private, you will not even be able to generate
+     * objects of this class, as they are only thought as temporaries for
+     * access to elements of the table class, not for passing them around as
+     * arguments of functions, etc.
+     *
+     * @author Wolfgang Bangerth, 2002
+     */
+    template <int N, typename T, bool C, unsigned int P>
+    class Accessor
+    {
+    public:
+      typedef typename Types<N,T,C>::TableType TableType;
+
+      typedef typename Types<N,T,C>::iterator iterator;
+      typedef typename Types<N,T,C>::const_iterator const_iterator;
+
+      typedef size_t size_type;
+      typedef ptrdiff_t difference_type;
+    private:
+      /**
+       * Constructor. Take a pointer to the table object to know about the
+       * sizes of the various dimensions, and a pointer to the subset of data
+       * we may access.
+       */
+      Accessor (const TableType &table,
+                const iterator    data);
+
+      /**
+       * Default constructor. Not needed, and invisible, so private.
+       */
+      Accessor ();
+
+    public:
+
+      /**
+       * Copy constructor. This constructor is public so that one can pass
+       * sub-tables to functions as arguments, as in <code>f(table[i])</code>.
+       *
+       * Using this constructor is risky if accessors are stored longer than
+       * the table it points to. Don't do this.
+       */
+      Accessor (const Accessor &a);
+
+      /**
+       * Index operator. Performs a range check.
+       */
+      Accessor<N,T,C,P-1> operator [] (const unsigned int i) const;
+
+      /**
+       * Exception for range check. Do not use global exception since this way
+       * we can output which index is the wrong one.
+       */
+      DeclException3 (ExcIndexRange, int, int, int,
+                      << "Index " << N-P+1 << "has a value of "
+                      << arg1 << " but needs to be in the range ["
+                      << arg2 << "," << arg3 << "[.");
+    private:
+      /**
+       * Store the data given to the constructor. There are no non-const
+       * member functions of this class, so there is no reason not to make
+       * these elements constant.
+       */
+      const TableType &table;
+      const iterator   data;
+
+      // declare some other classes
+      // as friends. make sure to
+      // work around bugs in some
+      // compilers
+      template <int N1, typename T1> friend class dealii::Table;
+      template <int N1, typename T1, bool C1, unsigned int P1>
+      friend class Accessor;
+#  ifndef DEAL_II_TEMPL_SPEC_FRIEND_BUG
+      friend class dealii::Table<N,T>;
+      friend class Accessor<N,T,C,P+1>;
+#  endif
+    };
+
+
+
+    /**
+     * @internal Accessor class for tables. This is the specialization for the
+     * last index, which actually allows access to the elements of the table,
+     * rather than recursively returning access objects for further subsets.
+     * The same holds for this specialization as for the general template; see
+     * there for more information.
+     *
+     * @author Wolfgang Bangerth, 2002
+     */
+    template <int N, typename T, bool C>
+    class Accessor<N,T,C,1>
+    {
+    public:
+      /**
+       * Typedef constant and non-constant iterator types to the elements of
+       * this row, as well as all the other types usually required for the
+       * standard library algorithms.
+       */
+      typedef typename Types<N,T,C>::value_type value_type;
+
+      typedef typename Types<N,T,C>::iterator iterator;
+      typedef typename Types<N,T,C>::const_iterator const_iterator;
+
+      typedef typename Types<N,T,C>::reference reference;
+      typedef typename Types<N,T,C>::const_reference const_reference;
+
+      typedef size_t size_type;
+      typedef ptrdiff_t difference_type;
+
+      /**
+       * Import a typedef from the switch class above.
+       */
+      typedef typename Types<N,T,C>::TableType    TableType;
+
+    private:
+
+      /**
+       * Constructor. Take an iterator to the table object to know about the
+       * sizes of the various dimensions, and a iterator to the subset of data
+       * we may access (which in this particular case is only one row).
+       *
+       * The constructor is made private in order to prevent you having such
+       * objects around. The only way to create such objects is via the
+       * <tt>Table</tt> class, which only generates them as temporary objects.
+       * This guarantees that the accessor objects go out of scope earlier
+       * than the mother object, avoid problems with data consistency.
+       */
+      Accessor (const TableType &table,
+                const iterator    data);
+
+      /**
+       * Default constructor. Not needed, so private.
+       */
+      Accessor ();
+
+    public:
+      /**
+       * Copy constructor. This constructor is public so that one can pass
+       * sub-tables to functions as arguments, as in <code>f(table[i])</code>.
+       *
+       * Using this constructor is risky if accessors are stored longer than
+       * the table it points to. Don't do this.
+       */
+      Accessor (const Accessor &a);
+
+
+      /**
+       * Index operator. Performs a range check.
+       */
+      reference operator [] (const unsigned int) const;
+
+      /**
+       * Return the length of one row, i.e. the number of elements
+       * corresponding to the last index of the table object.
+       */
+      unsigned int size () const;
+
+      /**
+       * Return an iterator to the first element of this row.
+       */
+      iterator begin () const;
+
+      /**
+       * Return an iterator to the element past the end of this row.
+       */
+      iterator end () const;
+
+    private:
+      /**
+       * Store the data given to the constructor. There are no non-const
+       * member functions of this class, so there is no reason not to make
+       * these elements constant.
+       */
+      const TableType &table;
+      const iterator   data;
+
+      // declare some other classes
+      // as friends. make sure to
+      // work around bugs in some
+      // compilers
+      template <int N1, typename T1> friend class dealii::Table;
+      template <int N1, typename T1, bool C1, unsigned int P1>
+      friend class Accessor;
+#  ifndef DEAL_II_TEMPL_SPEC_FRIEND_BUG
+      friend class dealii::Table<2,T>;
+      friend class Accessor<N,T,C,2>;
+#  endif
+    };
+  }
+
+} // namespace internal
+
+
+
+
+/**
+ * General class holding an array of objects of templated type in multiple
+ * dimensions. If the template parameter indicating the number of dimensions
+ * is one, then this is more or less a vector, if it is two then it is a
+ * matrix, and so on.
+ *
+ * Previously, this data type was emulated in this library by constructs like
+ * <tt>std::vector<std::vector<T>></tt>, or even higher nested constructs.
+ * However, this has the disadvantage that it is hard to initialize, and most
+ * importantly that it is very inefficient if all rows have the same size
+ * (which is the usual case), since then the memory for each row is allocated
+ * independently, both wasting time and memory. This can be made more
+ * efficient by allocating only one chunk of memory for the entire object.
+ *
+ * Therefore, this data type was invented. Its implementation is rather
+ * straightforward, with two exceptions. The first thing to think about is how
+ * to pass the size in each of the coordinate directions to the object; this
+ * is done using the TableIndices class. Second, how to access the individual
+ * elements. The basic problem here is that we would like to make the number
+ * of arguments to be passed to the constructor as well as the access
+ * functions dependent on the template parameter <tt>N</tt> indicating the
+ * number of dimensions. Of course, this is not possible.
+ *
+ * The way out of the first problem (and partly the second one as well) is to
+ * have a common base class TableBase and a derived class for each value of
+ * <tt>N</tt>.  This derived class has a constructor with the correct number
+ * of arguments, namely <tt>N</tt>. These then transform their arguments into
+ * the data type the base class (this class in fact) uses in the constructor
+ * as well as in element access through operator() functions.
+ *
+ * The second problem is that we would like to allow access through a sequence
+ * of <tt>operator[]</tt> calls. This mostly because, as said, this class is a
+ * replacement for previous use of nested <tt>std::vector</tt> objects, where
+ * we had to use the <tt>operator[]</tt> access function recursively until we
+ * were at the innermost object. Emulating this behavior without losing the
+ * ability to do index checks, and in particular without losing performance is
+ * possible but nontrivial, and done in the TableBaseAccessors namespace.
+ *
+ *
+ * <h3>Comparison with the Tensor class</h3>
+ *
+ * In some way, this class is similar to the Tensor class, in that it
+ * templatizes on the number of dimensions. However, there are two major
+ * differences. The first is that the Tensor class stores only numeric values
+ * (as <tt>double</tt>s), while the Table class stores arbitrary objects. The
+ * second is that the Tensor class has fixed dimensions, also given as a
+ * template argument, while this class can handle arbitrary dimensions, which
+ * may also be different between different indices.
+ *
+ * This has two consequences. First, since the size is not known at compile
+ * time, it has to do explicit memory allocating. Second, the layout of
+ * individual elements is not known at compile time, so access is slower than
+ * for the Tensor class where the number of elements are their location is
+ * known at compile time and the compiler can optimize with this knowledge
+ * (for example when unrolling loops). On the other hand, this class is of
+ * course more flexible, for example when you want a two-dimensional table
+ * with the number of rows equal to the number of degrees of freedom on a
+ * cell, and the number of columns equal to the number of quadrature points.
+ * Both numbers may only be known at run-time, so a flexible table is needed
+ * here. Furthermore, you may want to store, say, the gradients of shape
+ * functions, so the data type is not a single scalar value, but a tensor
+ * itself.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, 2002.
+ */
+template <int N, typename T>
+class TableBase : public Subscriptor
+{
+public:
+  typedef T value_type;
+
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename AlignedVector<T>::size_type size_type;
+
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  TableBase ();
+
+  /**
+   * Constructor. Initialize the array with the given dimensions in each index
+   * component.
+   */
+  TableBase (const TableIndices<N> &sizes);
+
+  /**
+   * Constructor. Initialize the array with the given dimensions in each index
+   * component, and then initialize the elements of the table using the second
+   * and third argument by calling fill(entries,C_style_indexing).
+   */
+  template <typename InputIterator>
+  TableBase (const TableIndices<N> &sizes,
+             InputIterator entries,
+             const bool      C_style_indexing = true);
+
+  /**
+   * Copy constructor. Performs a deep copy.
+   */
+  TableBase (const TableBase<N,T> &src);
+
+  /**
+   * Copy constructor. Performs a deep copy from a table object storing some
+   * other data type.
+   */
+  template <typename T2>
+  TableBase (const TableBase<N,T2> &src);
+
+  /**
+   * Destructor. Free allocated memory.
+   */
+  ~TableBase ();
+
+  /**
+   * Assignment operator. Copy all elements of <tt>src</tt> into the matrix.
+   * The size is adjusted if needed.
+   *
+   * We can't use the other, templatized version since if we don't declare
+   * this one, the compiler will happily generate a predefined copy operator
+   * which is not what we want.
+   */
+  TableBase<N,T> &operator = (const TableBase<N,T> &src);
+
+  /**
+   * Copy operator. Copy all elements of <tt>src</tt> into the array. The size
+   * is adjusted if needed.
+   *
+   * This function requires that the type <tt>T2</tt> is convertible to
+   * <tt>T</tt>.
+   */
+  template<typename T2>
+  TableBase<N,T> &operator = (const TableBase<N,T2> &src);
+
+  /**
+   * Test for equality of two tables.
+   */
+  bool operator == (const TableBase<N,T> &T2)  const;
+
+  /**
+   * Set all entries to their default value (i.e. copy them over with default
+   * constructed objects). Do not change the size of the table, though.
+   */
+  void reset_values ();
+
+  /**
+   * Set the dimensions of this object to the sizes given in the argument, and
+   * newly allocate the required memory. If
+   * <tt>omit_default_initialization</tt> is set to <tt>false</tt>, all
+   * elements of the table are set to a default constructed object for the
+   * element type. Otherwise the memory is left in an uninitialized or
+   * otherwise undefined state.
+   */
+  void reinit (const TableIndices<N> &new_size,
+               const bool             omit_default_initialization = false);
+
+  /**
+   * Size of the table in direction <tt>i</tt>.
+   */
+  unsigned int size (const unsigned int i) const;
+
+  /**
+   * Return the sizes of this object in each direction.
+   */
+  const TableIndices<N> &size () const;
+
+  /**
+   * Return the number of elements stored in this object, which is the product
+   * of the extensions in each dimension.
+   */
+  size_type n_elements () const;
+
+  /**
+   * Return whether the object is empty, i.e. one of the directions is zero.
+   * This is equivalent to <tt>n_elements()==0</tt>.
+   */
+  bool empty () const;
+
+  /**
+   * Fill this table (which is assumed to already have the correct size) from
+   * a source given by dereferencing the given forward iterator (which could,
+   * for example, be a pointer to the first element of an array, or an
+   * inserting std::istream_iterator). The second argument denotes whether the
+   * elements pointed to are arranged in a way that corresponds to the last
+   * index running fastest or slowest. The default is to use C-style indexing
+   * where the last index runs fastest (as opposed to Fortran-style where the
+   * first index runs fastest when traversing multidimensional arrays. For
+   * example, if you try to fill an object of type Table<2,T>, then calling
+   * this function with the default value for the second argument will result
+   * in the equivalent of doing
+   * @code
+   *   Table<2,T> t;
+   *   for (unsigned int i=0; i<t.sizes()[0]; ++i)
+   *     for (unsigned int j=0; j<t.sizes()[1]; ++j)
+   *       t[i][j] = *entries++;
+   * @endcode
+   * On the other hand, if the second argument to this function is false, then
+   * this would result in code of the following form:
+   * @code
+   *   Table<2,T> t;
+   *   for (unsigned int j=0; j<t.sizes()[1]; ++j)
+   *     for (unsigned int i=0; i<t.sizes()[0]; ++i)
+   *       t[i][j] = *entries++;
+   * @endcode
+   * Note the switched order in which we fill the table elements by traversing
+   * the given set of iterators.
+   *
+   * @param entries An iterator to a set of elements from which to initialize
+   * this table. It is assumed that iterator can be incremented and
+   * dereferenced a sufficient number of times to fill this table.
+   * @param C_style_indexing If true, run over elements of the table with the
+   * last index changing fastest as we dereference subsequent elements of the
+   * input range. If false, change the first index fastest.
+   */
+  template <typename InputIterator>
+  void fill (InputIterator entries,
+             const bool      C_style_indexing = true);
+
+  /**
+   * Fill all table entries with the same value.
+   */
+  void fill (const T &value);
+
+  /**
+   * Return a read-write reference to the indicated element.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const TableIndices<N> &indices);
+
+  /**
+   * Return the value of the indicated element as a read-only reference.
+   *
+   * We return the requested value as a constant reference rather than by
+   * value since this object may hold data types that may be large, and we
+   * don't know here whether copying is expensive or not.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const TableIndices<N> &indices) const;
+
+  /**
+   * Swap the contents of this table and the other table @p v. One could do
+   * this operation with a temporary variable and copying over the data
+   * elements, but this function is significantly more efficient since it only
+   * swaps the pointers to the data of the two vectors and therefore does not
+   * need to allocate temporary storage and move data around.
+   *
+   * This function is analog to the the @p swap function of all C++ standard
+   * containers. Also, there is a global function <tt>swap(u,v)</tt> that
+   * simply calls <tt>u.swap(v)</tt>, again in analogy to standard functions.
+   */
+  void swap (TableBase<N,T> &v);
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Write or read the data of this object to or from a stream for the purpose
+   * of serialization.
+   */
+  template <class Archive>
+  void serialize (Archive &ar, const unsigned int version);
+
+protected:
+  /**
+   * Return the position of the indicated element within the array of elements
+   * stored one after the other. This function does no index checking.
+   */
+  size_type position (const TableIndices<N> &indices) const;
+
+  /**
+   * Return a read-write reference to the indicated element.
+   *
+   * This function does no bounds checking and is only to be used internally
+   * and in functions already checked.
+   */
+  typename AlignedVector<T>::reference el (const TableIndices<N> &indices);
+
+  /**
+   * Return the value of the indicated element as a read-only reference.
+   *
+   * This function does no bounds checking and is only to be used internally
+   * and in functions already checked.
+   *
+   * We return the requested value as a constant reference rather than by
+   * value since this object may hold data types that may be large, and we
+   * don't know here whether copying is expensive or not.
+   */
+  typename AlignedVector<T>::const_reference el (const TableIndices<N> &indices) const;
+
+protected:
+  /**
+   * Component-array.
+   */
+  AlignedVector<T> values;
+
+  /**
+   * Size in each direction of the table.
+   */
+  TableIndices<N> table_size;
+
+  /**
+   * Make all other table classes friends.
+   */
+  template <int, typename> friend class TableBase;
+};
+
+
+/**
+ * A class representing a table with arbitrary but fixed number of indices.
+ * This general template implements some additional functions over those
+ * provided by the TableBase class, such as indexing functions taking the
+ * correct number of arguments, etc.
+ *
+ * Rather than this general template, these functions are implemented in
+ * partial specializations of this class, with fixed numbers of dimensions.
+ * See there, and in the documentation of the base class for more information.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, 2002
+ */
+template <int N,typename T>
+class Table : public TableBase<N,T>
+{
+};
+
+
+/**
+ * A class representing a one-dimensional table, i.e. a vector-like class.
+ * Since the C++ library has a vector class, there is probably not much need
+ * for this particular class, but since it is so simple to implement on top of
+ * the template base class, we provide it anyway.
+ *
+ * For the rationale of this class, and a description of the interface, see
+ * the base class.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, 2002
+ */
+template <typename T>
+class Table<1,T> : public TableBase<1,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<1,T>::size_type size_type;
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  Table ();
+
+  /**
+   * Constructor. Pass down the given dimension to the base class.
+   */
+  Table (const unsigned int size);
+
+  /**
+   * Constructor. Create a table with a given size and initialize it from a
+   * set of iterators.
+   *
+   * This function is entirely equivalent to creating a table <code>t</code>
+   * of the given size and then calling
+   * @code
+   *   t.fill (entries, C_style_indexing);
+   * @endcode
+   * on it, using the TableBase::fill() function where the arguments are
+   * explained in more detail. The point, however, is that that is only
+   * possible if the table can be changed after running the constructor,
+   * whereas calling the current constructor allows sizing and initializing an
+   * object right away so that it can be marked const.
+   *
+   * Using this constructor, you can do things like this:
+   * @code
+   *   const double values[] = { 1, 2, 3 };
+   *   const Table<1,double> t(3, entries, true);
+   * @endcode
+   * You can also initialize a table right from a file, using input iterators:
+   * @code
+   *   std::ifstream input ("myfile");
+   *   const Table<1,double> t(3,
+   *                           std::istream_iterator<double>(input),
+   *                           true);
+   * @endcode
+   *
+   *
+   * @param size The size of this one-dimensional table.
+   * @param entries An iterator to a set of elements from which to initialize
+   * this table. It is assumed that iterator can be incremented and
+   * dereferenced a sufficient number of times to fill this table.
+   * @param C_style_indexing If true, run over elements of the table with the
+   * last index changing fastest as we dereference subsequent elements of the
+   * input range. If false, change the first index fastest.
+   */
+  template <typename InputIterator>
+  Table (const unsigned int size,
+         InputIterator entries,
+         const bool      C_style_indexing = true);
+
+  /**
+   * Access operator. Since this is a one-dimensional object, this simply
+   * accesses the requested data element. Returns a read-only reference.
+   */
+  typename AlignedVector<T>::const_reference
+  operator [] (const unsigned int i) const;
+
+  /**
+   * Access operator. Since this is a one-dimensional object, this simply
+   * accesses the requested data element. Returns a read-write reference.
+   */
+  typename AlignedVector<T>::reference
+  operator [] (const unsigned int i);
+
+  /**
+   * Access operator. Since this is a one-dimensional object, this simply
+   * accesses the requested data element. Returns a read-only reference.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const unsigned int i) const;
+
+  /**
+   * Access operator. Since this is a one-dimensional object, this simply
+   * accesses the requested data element. Returns a read-write reference.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const unsigned int i);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const TableIndices<1> &indices);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const TableIndices<1> &indices) const;
+};
+
+
+
+/**
+ * A class representing a two-dimensional table, i.e. a matrix of objects (not
+ * necessarily only numbers).
+ *
+ * For the rationale of this class, and a description of the interface, see
+ * the base class. Since this serves as the base class of the full matrix
+ * classes in this library, and to keep a minimal compatibility with a
+ * predecessor class (<tt>vector2d</tt>), some additional functions are
+ * provided.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, 2002
+ */
+template <typename T>
+class Table<2,T> : public TableBase<2,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<2,T>::size_type size_type;
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  Table ();
+
+  /**
+   * Constructor. Pass down the given dimensions to the base class.
+   */
+  Table (const unsigned int size1,
+         const unsigned int size2);
+
+  /**
+   * Constructor. Create a table with a given size and initialize it from a
+   * set of iterators.
+   *
+   * This function is entirely equivalent to creating a table <code>t</code>
+   * of the given size and then calling
+   * @code
+   *   t.fill (entries, C_style_indexing);
+   * @endcode
+   * on it, using the TableBase::fill() function where the arguments are
+   * explained in more detail. The point, however, is that that is only
+   * possible if the table can be changed after running the constructor,
+   * whereas calling the current constructor allows sizing and initializing an
+   * object right away so that it can be marked const.
+   *
+   * Using this constructor, you can do things like this:
+   * @code
+   *   const double values[] = { 1, 2, 3, 4, 5, 6 };
+   *   const Table<2,double> t(2, 3, entries, true);
+   * @endcode
+   * You can also initialize a table right from a file, using input iterators:
+   * @code
+   *   std::ifstream input ("myfile");
+   *   const Table<2,double> t(2, 3,
+   *                           std::istream_iterator<double>(input),
+   *                           true);
+   * @endcode
+   *
+   *
+   * @param size1 The size of this table in the first dimension.
+   * @param size2 The size of this table in the second dimension.
+   * @param entries An iterator to a set of elements from which to initialize
+   * this table. It is assumed that iterator can be incremented and
+   * dereferenced a sufficient number of times to fill this table.
+   * @param C_style_indexing If true, run over elements of the table with the
+   * last index changing fastest as we dereference subsequent elements of the
+   * input range. If false, change the first index fastest.
+   */
+  template <typename InputIterator>
+  Table (const unsigned int size1,
+         const unsigned int size2,
+         InputIterator entries,
+         const bool      C_style_indexing = true);
+
+  /**
+   * Reinitialize the object. This function is mostly here for compatibility
+   * with the earlier <tt>vector2d</tt> class. Passes down to the base class
+   * by converting the arguments to the data type requested by the base class.
+   */
+  void reinit (const unsigned int size1,
+               const unsigned int size2,
+               const bool         omit_default_initialization = false);
+
+  using TableBase<2,T>::reinit;
+
+  /**
+   * Access operator. Generate an object that accesses the requested row of
+   * this two-dimensional table. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<2,T,true,1>
+  operator [] (const unsigned int i) const;
+
+  /**
+   * Access operator. Generate an object that accesses the requested row of
+   * this two-dimensional table. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<2,T,false,1>
+  operator [] (const unsigned int i);
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const unsigned int i,
+               const unsigned int j) const;
+
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const unsigned int i,
+               const unsigned int j);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const TableIndices<2> &indices);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const TableIndices<2> &indices) const;
+
+
+  /**
+   * Number of rows. This function really makes only sense since we have a
+   * two-dimensional object here.
+   */
+  unsigned int n_rows () const;
+
+  /**
+   * Number of columns. This function really makes only sense since we have a
+   * two-dimensional object here.
+   */
+  unsigned int n_cols () const;
+
+protected:
+  /**
+   * Return a read-write reference to the element <tt>(i,j)</tt>.
+   *
+   * This function does no bounds checking and is only to be used internally
+   * and in functions already checked.
+   *
+   * These functions are mainly here for compatibility with a former
+   * implementation of these table classes for 2d arrays, then called
+   * <tt>vector2d</tt>.
+   */
+  typename AlignedVector<T>::reference el (const unsigned int i,
+                                           const unsigned int j);
+
+  /**
+   * Return the value of the element <tt>(i,j)</tt> as a read-only reference.
+   *
+   * This function does no bounds checking and is only to be used internally
+   * and in functions already checked.
+   *
+   * We return the requested value as a constant reference rather than by
+   * value since this object may hold data types that may be large, and we
+   * don't know here whether copying is expensive or not.
+   *
+   * These functions are mainly here for compatibility with a former
+   * implementation of these table classes for 2d arrays, then called
+   * <tt>vector2d</tt>.
+   */
+  typename AlignedVector<T>::const_reference el (const unsigned int i,
+                                                 const unsigned int j) const;
+};
+
+
+
+/**
+ * A class representing a three-dimensional table of objects (not necessarily
+ * only numbers).
+ *
+ * For the rationale of this class, and a description of the interface, see
+ * the base class.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, 2002
+ */
+template <typename T>
+class Table<3,T> : public TableBase<3,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<3,T>::size_type size_type;
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  Table ();
+
+  /**
+   * Constructor. Pass down the given dimensions to the base class.
+   */
+  Table (const unsigned int size1,
+         const unsigned int size2,
+         const unsigned int size3);
+
+  /**
+   * Constructor. Create a table with a given size and initialize it from a
+   * set of iterators.
+   *
+   * This function is entirely equivalent to creating a table <code>t</code>
+   * of the given size and then calling
+   * @code
+   *   t.fill (entries, C_style_indexing);
+   * @endcode
+   * on it, using the TableBase::fill() function where the arguments are
+   * explained in more detail. The point, however, is that that is only
+   * possible if the table can be changed after running the constructor,
+   * whereas calling the current constructor allows sizing and initializing an
+   * object right away so that it can be marked const.
+   *
+   * Using this constructor, you can do things like this (shown here for a
+   * two-dimensional table, but the same works for the current class):
+   * @code
+   *   const double values[] = { 1, 2, 3, 4, 5, 6 };
+   *   const Table<2,double> t(2, 3, entries, true);
+   * @endcode
+   * You can also initialize a table right from a file, using input iterators:
+   * @code
+   *   std::ifstream input ("myfile");
+   *   const Table<2,double> t(2, 3,
+   *                           std::istream_iterator<double>(input),
+   *                           true);
+   * @endcode
+   *
+   *
+   * @param size1 The size of this table in the first dimension.
+   * @param size2 The size of this table in the second dimension.
+   * @param size3 The size of this table in the third dimension.
+   * @param entries An iterator to a set of elements from which to initialize
+   * this table. It is assumed that iterator can be incremented and
+   * dereferenced a sufficient number of times to fill this table.
+   * @param C_style_indexing If true, run over elements of the table with the
+   * last index changing fastest as we dereference subsequent elements of the
+   * input range. If false, change the first index fastest.
+   */
+  template <typename InputIterator>
+  Table (const unsigned int size1,
+         const unsigned int size2,
+         const unsigned int size3,
+         InputIterator entries,
+         const bool      C_style_indexing = true);
+
+  /**
+   * Access operator. Generate an object that accesses the requested two-
+   * dimensional subobject of this three-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function only allows read access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<3,T,true,2>
+  operator [] (const unsigned int i) const;
+
+  /**
+   * Access operator. Generate an object that accesses the requested two-
+   * dimensional subobject of this three-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<3,T,false,2>
+  operator [] (const unsigned int i);
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  typename AlignedVector<T>::const_reference operator () (const unsigned int i,
+                                                          const unsigned int j,
+                                                          const unsigned int k) const;
+
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  typename AlignedVector<T>::reference operator () (const unsigned int i,
+                                                    const unsigned int j,
+                                                    const unsigned int k);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::reference operator () (const TableIndices<3> &indices);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::const_reference operator () (const TableIndices<3> &indices) const;
+};
+
+
+
+/**
+ * A class representing a four-dimensional table of objects (not necessarily
+ * only numbers).
+ *
+ * For the rationale of this class, and a description of the interface, see
+ * the base class.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, Ralf Hartmann 2002
+ */
+template <typename T>
+class Table<4,T> : public TableBase<4,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<4,T>::size_type size_type;
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  Table ();
+
+  /**
+   * Constructor. Pass down the given dimensions to the base class.
+   */
+  Table (const unsigned int size1,
+         const unsigned int size2,
+         const unsigned int size3,
+         const unsigned int size4);
+
+  /**
+   * Access operator. Generate an object that accesses the requested three-
+   * dimensional subobject of this four-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function only allows read access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<4,T,true,3>
+  operator [] (const unsigned int i) const;
+
+  /**
+   * Access operator. Generate an object that accesses the requested three-
+   * dimensional subobject of this four-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<4,T,false,3>
+  operator [] (const unsigned int i);
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  typename AlignedVector<T>::const_reference operator () (const unsigned int i,
+                                                          const unsigned int j,
+                                                          const unsigned int k,
+                                                          const unsigned int l) const;
+
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  typename AlignedVector<T>::reference operator () (const unsigned int i,
+                                                    const unsigned int j,
+                                                    const unsigned int k,
+                                                    const unsigned int l);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const TableIndices<4> &indices);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const TableIndices<4> &indices) const;
+};
+
+
+
+/**
+ * A class representing a five-dimensional table of objects (not necessarily
+ * only numbers).
+ *
+ * For the rationale of this class, and a description of the interface, see
+ * the base class.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, Ralf Hartmann 2002
+ */
+template <typename T>
+class Table<5,T> : public TableBase<5,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<5,T>::size_type size_type;
+
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  Table ();
+
+  /**
+   * Constructor. Pass down the given dimensions to the base class.
+   */
+  Table (const unsigned int size1,
+         const unsigned int size2,
+         const unsigned int size3,
+         const unsigned int size4,
+         const unsigned int size5);
+
+  /**
+   * Access operator. Generate an object that accesses the requested four-
+   * dimensional subobject of this five-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function only allows read access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<5,T,true,4>
+  operator [] (const unsigned int i) const;
+
+  /**
+   * Access operator. Generate an object that accesses the requested four-
+   * dimensional subobject of this five-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<5,T,false,4>
+  operator [] (const unsigned int i);
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  typename AlignedVector<T>::const_reference operator () (const unsigned int i,
+                                                          const unsigned int j,
+                                                          const unsigned int k,
+                                                          const unsigned int l,
+                                                          const unsigned int m) const;
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  typename AlignedVector<T>::reference operator () (const unsigned int i,
+                                                    const unsigned int j,
+                                                    const unsigned int k,
+                                                    const unsigned int l,
+                                                    const unsigned int m);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const TableIndices<5> &indices);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const TableIndices<5> &indices) const;
+};
+
+
+
+/**
+ * A class representing a six-dimensional table of objects (not necessarily
+ * only numbers).
+ *
+ * For the rationale of this class, and a description of the interface, see
+ * the base class.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, Ralf Hartmann 2002
+ */
+template <typename T>
+class Table<6,T> : public TableBase<6,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<6,T>::size_type size_type;
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  Table ();
+
+  /**
+   * Constructor. Pass down the given dimensions to the base class.
+   */
+  Table (const unsigned int size1,
+         const unsigned int size2,
+         const unsigned int size3,
+         const unsigned int size4,
+         const unsigned int size5,
+         const unsigned int size6);
+
+  /**
+   * Access operator. Generate an object that accesses the requested five-
+   * dimensional subobject of this six-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function only allows read access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<6,T,true,5>
+  operator [] (const unsigned int i) const;
+
+  /**
+   * Access operator. Generate an object that accesses the requested five-
+   * dimensional subobject of this six-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<6,T,false,5>
+  operator [] (const unsigned int i);
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  typename AlignedVector<T>::const_reference operator () (const unsigned int i,
+                                                          const unsigned int j,
+                                                          const unsigned int k,
+                                                          const unsigned int l,
+                                                          const unsigned int m,
+                                                          const unsigned int n) const;
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  typename AlignedVector<T>::reference operator () (const unsigned int i,
+                                                    const unsigned int j,
+                                                    const unsigned int k,
+                                                    const unsigned int l,
+                                                    const unsigned int m,
+                                                    const unsigned int n);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const TableIndices<6> &indices);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const TableIndices<6> &indices) const;
+};
+
+
+/**
+ * A class representing a seven-dimensional table of objects (not necessarily
+ * only numbers).
+ *
+ * For the rationale of this class, and a description of the interface, see
+ * the base class.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, 2002, Ralf Hartmann 2004
+ */
+template <typename T>
+class Table<7,T> : public TableBase<7,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<7,T>::size_type size_type;
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  Table ();
+
+  /**
+   * Constructor. Pass down the given dimensions to the base class.
+   */
+  Table (const unsigned int size1,
+         const unsigned int size2,
+         const unsigned int size3,
+         const unsigned int size4,
+         const unsigned int size5,
+         const unsigned int size6,
+         const unsigned int size7);
+
+  /**
+   * Access operator. Generate an object that accesses the requested six-
+   * dimensional subobject of this seven-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function only allows read access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<7,T,true,6>
+  operator [] (const unsigned int i) const;
+
+  /**
+   * Access operator. Generate an object that accesses the requested six-
+   * dimensional subobject of this seven-dimensional table. Range checks are
+   * performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  dealii::internal::TableBaseAccessors::Accessor<7,T,false,6>
+  operator [] (const unsigned int i);
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  typename AlignedVector<T>::const_reference operator () (const unsigned int i,
+                                                          const unsigned int j,
+                                                          const unsigned int k,
+                                                          const unsigned int l,
+                                                          const unsigned int m,
+                                                          const unsigned int n,
+                                                          const unsigned int o) const;
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  typename AlignedVector<T>::reference operator () (const unsigned int i,
+                                                    const unsigned int j,
+                                                    const unsigned int k,
+                                                    const unsigned int l,
+                                                    const unsigned int m,
+                                                    const unsigned int n,
+                                                    const unsigned int o);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::reference
+  operator () (const TableIndices<7> &indices);
+
+  /**
+   * Make the corresponding operator () from the TableBase base class
+   * available also in this class.
+   */
+  typename AlignedVector<T>::const_reference
+  operator () (const TableIndices<7> &indices) const;
+};
+
+
+
+/**
+ * A class representing a transpose two-dimensional table, i.e. a matrix of
+ * objects (not necessarily only numbers) in column first numbering (FORTRAN
+ * convention). The only real difference is therefore really in the storage
+ * format.
+ *
+ * This class copies the functions of Table<2,T>, but the element access and
+ * the dimensions will be for the transpose ordering of the data field in
+ * TableBase.
+ *
+ * @ingroup data
+ * @author Guido Kanschat, 2005
+ */
+template <typename T>
+class TransposeTable : public TableBase<2,T>
+{
+public:
+  /**
+   * Integer type used to count the number of elements in this container.
+   */
+  typedef typename TableBase<2,T>::size_type size_type;
+
+  /**
+   * Default constructor. Set all dimensions to zero.
+   */
+  TransposeTable ();
+
+  /**
+   * Constructor. Pass down the given dimensions to the base class.
+   */
+  TransposeTable (const unsigned int size1,
+                  const unsigned int size2);
+
+  /**
+   * Reinitialize the object. This function is mostly here for compatibility
+   * with the earlier <tt>vector2d</tt> class. Passes down to the base class
+   * by converting the arguments to the data type requested by the base class.
+   */
+  void reinit (const unsigned int size1,
+               const unsigned int size2,
+               const bool         omit_default_initialization = false);
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function only allows read access.
+   */
+  typename AlignedVector<T>::const_reference operator () (const unsigned int i,
+                                                          const unsigned int j) const;
+
+  /**
+   * Direct access to one element of the table by specifying all indices at
+   * the same time. Range checks are performed.
+   *
+   * This version of the function allows read-write access.
+   */
+  typename AlignedVector<T>::reference operator () (const unsigned int i,
+                                                    const unsigned int j);
+
+  /**
+   * Number of rows. This function really makes only sense since we have a
+   * two-dimensional object here.
+   */
+  unsigned int n_rows () const;
+
+  /**
+   * Number of columns. This function really makes only sense since we have a
+   * two-dimensional object here.
+   */
+  unsigned int n_cols () const;
+
+protected:
+  /**
+   * Return a read-write reference to the element <tt>(i,j)</tt>.
+   *
+   * This function does no bounds checking and is only to be used internally
+   * and in functions already checked.
+   *
+   * These functions are mainly here for compatibility with a former
+   * implementation of these table classes for 2d arrays, then called
+   * <tt>vector2d</tt>.
+   */
+  typename AlignedVector<T>::reference el (const unsigned int i,
+                                           const unsigned int j);
+
+  /**
+   * Return the value of the element <tt>(i,j)</tt> as a read-only reference.
+   *
+   * This function does no bounds checking and is only to be used internally
+   * and in functions already checked.
+   *
+   * We return the requested value as a constant reference rather than by
+   * value since this object may hold data types that may be large, and we
+   * don't know here whether copying is expensive or not.
+   *
+   * These functions are mainly here for compatibility with a former
+   * implementation of these table classes for 2d arrays, then called
+   * <tt>vector2d</tt>.
+   */
+  typename AlignedVector<T>::const_reference el (const unsigned int i,
+                                                 const unsigned int j) const;
+};
+
+
+
+
+/* --------------------- Template and inline functions ---------------- */
+
+#ifndef DOXYGEN
+
+template <int N, typename T>
+TableBase<N,T>::TableBase ()
+{}
+
+
+
+template <int N, typename T>
+TableBase<N,T>::TableBase (const TableIndices<N> &sizes)
+{
+  reinit (sizes);
+}
+
+
+
+template <int N, typename T>
+template <typename InputIterator>
+TableBase<N,T>::
+TableBase (const TableIndices<N> &sizes,
+           InputIterator entries,
+           const bool      C_style_indexing)
+{
+  reinit (sizes);
+  fill (entries, C_style_indexing);
+}
+
+
+
+
+template <int N, typename T>
+TableBase<N,T>::TableBase (const TableBase<N,T> &src)
+  :
+  Subscriptor ()
+{
+  reinit (src.table_size, true);
+  values = src.values;
+}
+
+
+
+template <int N, typename T>
+template <typename T2>
+TableBase<N,T>::TableBase (const TableBase<N,T2> &src)
+{
+  reinit (src.table_size);
+  if (src.n_elements() != 0)
+    std::copy (src.values.begin(), src.values.end(), values.begin());
+}
+
+
+
+template <int N, typename T>
+template <class Archive>
+inline
+void
+TableBase<N,T>::serialize (Archive &ar, const unsigned int)
+{
+  ar &static_cast<Subscriptor &>(*this);
+
+  ar &values &table_size;
+}
+
+
+
+namespace internal
+{
+  namespace TableBaseAccessors
+  {
+    template <int N, typename T, bool C, unsigned int P>
+    inline
+    Accessor<N,T,C,P>::Accessor (const TableType &table,
+                                 const iterator    data)
+      :
+      table (table),
+      data (data)
+    {}
+
+
+
+    template <int N, typename T, bool C, unsigned int P>
+    inline
+    Accessor<N,T,C,P>::Accessor (const Accessor &a)
+      :
+      table (a.table),
+      data (a.data)
+    {}
+
+
+
+    template <int N, typename T, bool C, unsigned int P>
+    inline
+    Accessor<N,T,C,P>::Accessor ()
+      :
+      table (*static_cast<const TableType *>(0)),
+      data (0)
+    {
+      // accessor objects are only
+      // temporary objects, so should
+      // not need to be copied around
+      Assert (false, ExcInternalError());
+    }
+
+
+
+    template <int N, typename T, bool C, unsigned int P>
+    inline
+    Accessor<N,T,C,P-1>
+    Accessor<N,T,C,P>::operator [] (const unsigned int i) const
+    {
+      Assert (i < table.size()[N-P],
+              ExcIndexRange (i, 0, table.size()[N-P]));
+
+      // access i-th
+      // subobject. optimize on the
+      // case i==0
+      if (i==0)
+        return Accessor<N,T,C,P-1> (table, data);
+      else
+        {
+          // note: P>1, otherwise the
+          // specialization would have
+          // been taken!
+          size_type subobject_size = table.size()[N-1];
+          for (int p=P-1; p>1; --p)
+            subobject_size *= table.size()[N-p];
+          const iterator new_data = data + i*subobject_size;
+          return Accessor<N,T,C,P-1> (table, new_data);
+        }
+    }
+
+
+
+    template <int N, typename T, bool C>
+    inline
+    Accessor<N,T,C,1>::Accessor (const TableType &table,
+                                 const iterator    data)
+      :
+      table (table),
+      data (data)
+    {}
+
+
+
+    template <int N, typename T, bool C>
+    inline
+    Accessor<N,T,C,1>::Accessor ()
+      :
+      table (*static_cast<const TableType *>(0)),
+      data (0)
+    {
+      // accessor objects are only
+      // temporary objects, so should
+      // not need to be copied around
+      Assert (false, ExcInternalError());
+    }
+
+
+
+    template <int N, typename T, bool C>
+    inline
+    Accessor<N,T,C,1>::Accessor (const Accessor &a)
+      :
+      table (a.table),
+      data (a.data)
+    {}
+
+
+
+    template <int N, typename T, bool C>
+    inline
+    typename Accessor<N,T,C,1>::reference
+    Accessor<N,T,C,1>::operator [] (const unsigned int i) const
+    {
+      Assert (i < table.size()[N-1],
+              ExcIndexRange (i, 0, table.size()[N-1]));
+      return *(data+i);
+    }
+
+
+
+    template <int N, typename T, bool C>
+    inline
+    unsigned int
+    Accessor<N,T,C,1>::size () const
+    {
+      return table.size()[N-1];
+    }
+
+
+
+    template <int N, typename T, bool C>
+    inline
+    typename Accessor<N,T,C,1>::iterator
+    Accessor<N,T,C,1>::begin () const
+    {
+      return data;
+    }
+
+
+
+    template <int N, typename T, bool C>
+    inline
+    typename Accessor<N,T,C,1>::iterator
+    Accessor<N,T,C,1>::end () const
+    {
+      return data+table.size()[N-1];
+    }
+  }
+}
+
+
+
+template <int N, typename T>
+inline
+TableBase<N,T>::~TableBase ()
+{}
+
+
+
+template <int N, typename T>
+inline
+TableBase<N,T> &
+TableBase<N,T>::operator = (const TableBase<N,T> &m)
+{
+  if (!m.empty())
+    values = m.values;
+  reinit (m.size(), true);
+
+  return *this;
+}
+
+
+
+template <int N, typename T>
+template <typename T2>
+inline
+TableBase<N,T> &
+TableBase<N,T>::operator = (const TableBase<N,T2> &m)
+{
+  reinit (m.size(), true);
+  if (!empty())
+    std::copy (m.values.begin(), m.values.begin() + n_elements(),
+               values.begin());
+
+  return *this;
+}
+
+
+template <int N, typename T>
+inline
+bool
+TableBase<N,T>::operator == (const TableBase<N,T> &T2)  const
+{
+  return (values == T2.values);
+}
+
+
+
+template <int N, typename T>
+inline
+void
+TableBase<N,T>::reset_values ()
+{
+  // use parallel set operation
+  if (n_elements() != 0)
+    values.fill(T());
+}
+
+
+
+template <int N, typename T>
+inline
+void
+TableBase<N,T>::fill (const T &value)
+{
+  if (n_elements() != 0)
+    values.fill(value);
+}
+
+
+
+
+template <int N, typename T>
+inline
+void
+TableBase<N,T>::reinit (const TableIndices<N> &new_sizes,
+                        const bool             omit_default_initialization)
+{
+  table_size = new_sizes;
+
+  const size_type new_size = n_elements();
+
+  // if zero size was given: free all memory
+  if (new_size == 0)
+    {
+      values.resize (0);
+      // set all sizes to zero, even
+      // if one was previously
+      // nonzero. This simplifies
+      // some assertions.
+      table_size = TableIndices<N>();
+
+      return;
+    }
+
+  // adjust values field. If it was empty before, we can simply call resize(),
+  // which can set all the data fields. Otherwise, select the fast resize and
+  // manually fill in all the elements as required by the design of this
+  // class. (Selecting another code for the empty case ensures that we touch
+  // the memory only once for non-trivial classes that need to initialize the
+  // memory also in resize_fast.)
+  if (!omit_default_initialization)
+    {
+      if (values.empty())
+        values.resize(new_size, T());
+      else
+        {
+          values.resize_fast(new_size);
+          values.fill(T());
+        }
+    }
+  else
+    values.resize_fast (new_size);
+}
+
+
+
+template <int N, typename T>
+inline
+const TableIndices<N> &
+TableBase<N,T>::size () const
+{
+  return table_size;
+}
+
+
+
+template <int N, typename T>
+inline
+unsigned int
+TableBase<N,T>::size (const unsigned int i) const
+{
+  Assert (i<N, ExcIndexRange(i,0,N));
+  return table_size[i];
+}
+
+
+
+template <int N, typename T>
+inline
+typename TableBase<N,T>::size_type
+TableBase<N,T>::n_elements () const
+{
+  size_type s = 1;
+  for (unsigned int n=0; n<N; ++n)
+    s *= table_size[n];
+  return s;
+}
+
+
+
+template <int N, typename T>
+inline
+bool
+TableBase<N,T>::empty () const
+{
+  return (n_elements() == 0);
+}
+
+
+
+namespace internal
+{
+  namespace Table
+  {
+    template <typename InputIterator, typename T>
+    void fill_Fortran_style (InputIterator  entries,
+                             TableBase<1,T>  &table)
+    {
+      for (unsigned int i=0; i<table.size()[0]; ++i)
+        table(TableIndices<1>(i)) = *entries++;
+    }
+
+
+    template <typename InputIterator, typename T>
+    void fill_Fortran_style (InputIterator  entries,
+                             TableBase<2,T>  &table)
+    {
+      for (unsigned int j=0; j<table.size()[1]; ++j)
+        for (unsigned int i=0; i<table.size()[0]; ++i)
+          table(TableIndices<2>(i,j)) = *entries++;
+    }
+
+
+    template <typename InputIterator, typename T>
+    void fill_Fortran_style (InputIterator  entries,
+                             TableBase<3,T>  &table)
+    {
+      for (unsigned int k=0; k<table.size()[2]; ++k)
+        for (unsigned int j=0; j<table.size()[1]; ++j)
+          for (unsigned int i=0; i<table.size()[0]; ++i)
+            table(TableIndices<3>(i,j,k)) = *entries++;
+    }
+
+
+    template <typename InputIterator, typename T, int N>
+    void fill_Fortran_style (InputIterator,
+                             TableBase<N,T> &)
+    {
+      Assert (false, ExcNotImplemented());
+    }
+  }
+}
+
+
+template <int N, typename T>
+template <typename InputIterator>
+inline
+void
+TableBase<N,T>::fill (InputIterator entries,
+                      const bool C_style_indexing)
+{
+  Assert (n_elements() != 0,
+          ExcMessage("Trying to fill an empty matrix."));
+
+  if (C_style_indexing)
+    for (typename AlignedVector<T>::iterator p = values.begin();
+         p != values.end(); ++p)
+      *p = *entries++;
+  else
+    internal::Table::fill_Fortran_style (entries, *this);
+}
+
+
+
+template <int N, typename T>
+inline
+void
+TableBase<N,T>::swap (TableBase<N,T> &v)
+{
+  values.swap(v.values);
+  std::swap (table_size, v.table_size);
+}
+
+
+
+template <int N, typename T>
+inline
+std::size_t
+TableBase<N,T>::memory_consumption () const
+{
+  return sizeof(*this) + MemoryConsumption::memory_consumption(values);
+}
+
+
+
+template <int N, typename T>
+inline
+typename TableBase<N,T>::size_type
+TableBase<N,T>::position (const TableIndices<N> &indices) const
+{
+  // specialize this for the
+  // different numbers of dimensions,
+  // to make the job somewhat easier
+  // for the compiler. have the
+  // general formula nevertheless:
+  switch (N)
+    {
+    case 1:
+      return indices[0];
+    case 2:
+      return size_type(indices[0])*table_size[1] + indices[1];
+    case 3:
+      return ((size_type(indices[0])*table_size[1] + indices[1])*table_size[2]
+              + indices[2]);
+    default:
+    {
+      size_type s = indices[0];
+      for (unsigned int n=1; n<N; ++n)
+        s = s*table_size[n] + indices[n];
+      return s;
+    }
+    }
+}
+
+
+
+template <int N, typename T>
+inline
+typename AlignedVector<T>::const_reference
+TableBase<N,T>::operator () (const TableIndices<N> &indices) const
+{
+  for (unsigned int n=0; n<N; ++n)
+    Assert (indices[n] < table_size[n],
+            ExcIndexRange (indices[n], 0, table_size[n]));
+  return el(indices);
+}
+
+
+
+template <int N, typename T>
+inline
+typename AlignedVector<T>::reference
+TableBase<N,T>::operator () (const TableIndices<N> &indices)
+{
+  for (unsigned int n=0; n<N; ++n)
+    Assert (indices[n] < table_size[n],
+            ExcIndexRange (indices[n], 0, table_size[n]));
+  return el(indices);
+}
+
+
+
+template <int N, typename T>
+inline
+typename AlignedVector<T>::const_reference
+TableBase<N,T>::el (const TableIndices<N> &indices) const
+{
+  return values[position(indices)];
+}
+
+
+
+template <int N, typename T>
+inline
+typename AlignedVector<T>::reference
+TableBase<N,T>::el (const TableIndices<N> &indices)
+{
+  Assert (position(indices) < values.size(),
+          ExcIndexRange (position(indices), 0, values.size()));
+  return values[position(indices)];
+}
+
+
+
+template <typename T>
+inline
+Table<1,T>::Table ()
+{}
+
+
+
+template <typename T>
+inline
+Table<1,T>::Table (const unsigned int size)
+  :
+  TableBase<1,T> (TableIndices<1> (size))
+{}
+
+
+
+template <typename T>
+template <typename InputIterator>
+inline
+Table<1,T>::Table (const unsigned int size,
+                   InputIterator entries,
+                   const bool C_style_indexing)
+  :
+  TableBase<1,T> (TableIndices<1> (size),
+                  entries,
+                  C_style_indexing)
+{}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<1,T>::operator [] (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  return this->values[i];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<1,T>::operator [] (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  return this->values[i];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<1,T>::operator () (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  return this->values[i];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<1,T>::operator () (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  return this->values[i];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<1,T>::operator () (const TableIndices<1> &indices) const
+{
+  return TableBase<1,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<1,T>::operator () (const TableIndices<1> &indices)
+{
+  return TableBase<1,T>::operator () (indices);
+}
+
+
+//---------------------------------------------------------------------------
+
+template <typename T>
+inline
+Table<2,T>::Table ()
+{}
+
+
+
+template <typename T>
+inline
+Table<2,T>::Table (const unsigned int size1,
+                   const unsigned int size2)
+  :
+  TableBase<2,T> (TableIndices<2> (size1, size2))
+{}
+
+
+
+template <typename T>
+template <typename InputIterator>
+inline
+Table<2,T>::Table (const unsigned int size1,
+                   const unsigned int size2,
+                   InputIterator entries,
+                   const bool C_style_indexing)
+  :
+  TableBase<2,T> (TableIndices<2> (size1, size2),
+                  entries,
+                  C_style_indexing)
+{}
+
+
+
+template <typename T>
+inline
+void
+Table<2,T>::reinit (const unsigned int size1,
+                    const unsigned int size2,
+                    const bool         omit_default_initialization)
+{
+  this->TableBase<2,T>::reinit (TableIndices<2> (size1, size2),omit_default_initialization);
+}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<2,T,true,1>
+Table<2,T>::operator [] (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  return dealii::internal::TableBaseAccessors::Accessor<2,T,true,1>(*this,
+         this->values.begin()+size_type(i)*n_cols());
+}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<2,T,false,1>
+Table<2,T>::operator [] (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  return dealii::internal::TableBaseAccessors::Accessor<2,T,false,1>(*this,
+         this->values.begin()+size_type(i)*n_cols());
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<2,T>::operator () (const unsigned int i,
+                         const unsigned int j) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  return this->values[size_type(i)*this->table_size[1]+j];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<2,T>::operator () (const unsigned int i,
+                         const unsigned int j)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  return this->values[size_type(i)*this->table_size[1]+j];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<2,T>::operator () (const TableIndices<2> &indices) const
+{
+  return TableBase<2,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<2,T>::operator () (const TableIndices<2> &indices)
+{
+  return TableBase<2,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<2,T>::el (const unsigned int i,
+                const unsigned int j) const
+{
+  return this->values[size_type(i)*this->table_size[1]+j];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<2,T>::el (const unsigned int i,
+                const unsigned int j)
+{
+  return this->values[size_type(i)*this->table_size[1]+j];
+}
+
+
+
+template <typename T>
+inline
+unsigned int
+Table<2,T>::n_rows () const
+{
+  return this->table_size[0];
+}
+
+
+
+template <typename T>
+inline
+unsigned int
+Table<2,T>::n_cols () const
+{
+  return this->table_size[1];
+}
+
+
+
+//---------------------------------------------------------------------------
+
+template <typename T>
+inline
+TransposeTable<T>::TransposeTable ()
+{}
+
+
+
+template <typename T>
+inline
+TransposeTable<T>::TransposeTable (const unsigned int size1,
+                                   const unsigned int size2)
+  :
+  TableBase<2,T> (TableIndices<2> (size2, size1))
+{}
+
+
+
+template <typename T>
+inline
+void
+TransposeTable<T>::reinit (const unsigned int size1,
+                           const unsigned int size2,
+                           const bool         omit_default_initialization)
+{
+  this->TableBase<2,T>::reinit (TableIndices<2> (size2, size1), omit_default_initialization);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+TransposeTable<T>::operator () (const unsigned int i,
+                                const unsigned int j) const
+{
+  Assert (i < this->table_size[1],
+          ExcIndexRange (i, 0, this->table_size[1]));
+  Assert (j < this->table_size[0],
+          ExcIndexRange (j, 0, this->table_size[0]));
+  return this->values[size_type(j)*this->table_size[1]+i];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+TransposeTable<T>::operator () (const unsigned int i,
+                                const unsigned int j)
+{
+  Assert (i < this->table_size[1],
+          ExcIndexRange (i, 0, this->table_size[1]));
+  Assert (j < this->table_size[0],
+          ExcIndexRange (j, 0, this->table_size[0]));
+  return this->values[size_type(j)*this->table_size[1]+i];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+TransposeTable<T>::el (const unsigned int i,
+                       const unsigned int j) const
+{
+  return this->values[size_type(j)*this->table_size[1]+i];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+TransposeTable<T>::el (const unsigned int i,
+                       const unsigned int j)
+{
+  return this->values[size_type(j)*this->table_size[1]+i];
+}
+
+
+
+template <typename T>
+inline
+unsigned int
+TransposeTable<T>::n_rows () const
+{
+  return this->table_size[1];
+}
+
+
+
+template <typename T>
+inline
+unsigned int
+TransposeTable<T>::n_cols () const
+{
+  return this->table_size[0];
+}
+
+
+
+//---------------------------------------------------------------------------
+
+
+template <typename T>
+inline
+Table<3,T>::Table ()
+{}
+
+
+
+template <typename T>
+inline
+Table<3,T>::Table (const unsigned int size1,
+                   const unsigned int size2,
+                   const unsigned int size3)
+  :
+  TableBase<3,T> (TableIndices<3> (size1, size2, size3))
+{}
+
+
+
+template <typename T>
+template <typename InputIterator>
+inline
+Table<3,T>::Table (const unsigned int size1,
+                   const unsigned int size2,
+                   const unsigned int size3,
+                   InputIterator entries,
+                   const bool C_style_indexing)
+  :
+  TableBase<3,T> (TableIndices<3> (size1, size2, size3),
+                  entries,
+                  C_style_indexing)
+{}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<3,T,true,2>
+Table<3,T>::operator [] (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2];
+  return (dealii::internal::TableBaseAccessors::Accessor<3,T,true,2>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<3,T,false,2>
+Table<3,T>::operator [] (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2];
+  return (dealii::internal::TableBaseAccessors::Accessor<3,T,false,2>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<3,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  return this->values[(size_type(i)*this->table_size[1]+j)
+                      *this->table_size[2] + k];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<3,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  return this->values[(size_type(i)*this->table_size[1]+j)
+                      *this->table_size[2] + k];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<3,T>::operator () (const TableIndices<3> &indices) const
+{
+  return TableBase<3,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<3,T>::operator () (const TableIndices<3> &indices)
+{
+  return TableBase<3,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+Table<4,T>::Table ()
+{}
+
+
+
+template <typename T>
+inline
+Table<4,T>::Table (const unsigned int size1,
+                   const unsigned int size2,
+                   const unsigned int size3,
+                   const unsigned int size4)
+  :
+  TableBase<4,T> (TableIndices<4> (size1, size2, size3, size4))
+{}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<4,T,true,3>
+Table<4,T>::operator [] (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3];
+  return (dealii::internal::TableBaseAccessors::Accessor<4,T,true,3>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<4,T,false,3>
+Table<4,T>::operator [] (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3];
+  return (dealii::internal::TableBaseAccessors::Accessor<4,T,false,3>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<4,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  return this->values[((size_type(i)*this->table_size[1]+j)
+                       *this->table_size[2] + k)
+                      *this->table_size[3] + l];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<4,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  return this->values[((size_type(i)*this->table_size[1]+j)
+                       *this->table_size[2] + k)
+                      *this->table_size[3] + l];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<4,T>::operator () (const TableIndices<4> &indices) const
+{
+  return TableBase<4,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<4,T>::operator () (const TableIndices<4> &indices)
+{
+  return TableBase<4,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+Table<5,T>::Table ()
+{}
+
+
+
+template <typename T>
+inline
+Table<5,T>::Table (const unsigned int size1,
+                   const unsigned int size2,
+                   const unsigned int size3,
+                   const unsigned int size4,
+                   const unsigned int size5)
+  :
+  TableBase<5,T> (TableIndices<5> (size1, size2, size3, size4, size5))
+{}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<5,T,true,4>
+Table<5,T>::operator [] (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3] *
+                                   this->table_size[4];
+  return (dealii::internal::TableBaseAccessors::Accessor<5,T,true,4>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<5,T,false,4>
+Table<5,T>::operator [] (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3] *
+                                   this->table_size[4];
+  return (dealii::internal::TableBaseAccessors::Accessor<5,T,false,4>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<5,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l,
+                         const unsigned int m) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  Assert (m < this->table_size[4],
+          ExcIndexRange (m, 0, this->table_size[4]));
+  return this->values[(((size_type(i)*this->table_size[1]+j)
+                        *this->table_size[2] + k)
+                       *this->table_size[3] + l)
+                      *this->table_size[4] + m];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<5,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l,
+                         const unsigned int m)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  Assert (m < this->table_size[4],
+          ExcIndexRange (m, 0, this->table_size[4]));
+  return this->values[(((size_type(i)*this->table_size[1]+j)
+                        *this->table_size[2] + k)
+                       *this->table_size[3] + l)
+                      *this->table_size[4] + m];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<5,T>::operator () (const TableIndices<5> &indices) const
+{
+  return TableBase<5,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<5,T>::operator () (const TableIndices<5> &indices)
+{
+  return TableBase<5,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+Table<6,T>::Table ()
+{}
+
+
+
+template <typename T>
+inline
+Table<6,T>::Table (const unsigned int size1,
+                   const unsigned int size2,
+                   const unsigned int size3,
+                   const unsigned int size4,
+                   const unsigned int size5,
+                   const unsigned int size6)
+  :
+  TableBase<6,T> (TableIndices<6> (size1, size2, size3, size4, size5, size6))
+{}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<6,T,true,5>
+Table<6,T>::operator [] (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3] *
+                                   this->table_size[4] *
+                                   this->table_size[5];
+  return (dealii::internal::TableBaseAccessors::Accessor<6,T,true,5>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<6,T,false,5>
+Table<6,T>::operator [] (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3] *
+                                   this->table_size[4] *
+                                   this->table_size[5];
+  return (dealii::internal::TableBaseAccessors::Accessor<6,T,false,5>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<6,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l,
+                         const unsigned int m,
+                         const unsigned int n) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  Assert (m < this->table_size[4],
+          ExcIndexRange (m, 0, this->table_size[4]));
+  Assert (n < this->table_size[5],
+          ExcIndexRange (n, 0, this->table_size[5]));
+  return this->values[((((size_type(i)*this->table_size[1]+j)
+                         *this->table_size[2] + k)
+                        *this->table_size[3] + l)
+                       *this->table_size[4] + m)
+                      *this->table_size[5] + n];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<6,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l,
+                         const unsigned int m,
+                         const unsigned int n)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  Assert (m < this->table_size[4],
+          ExcIndexRange (m, 0, this->table_size[4]));
+  Assert (n < this->table_size[5],
+          ExcIndexRange (n, 0, this->table_size[5]));
+  return this->values[((((size_type(i)*this->table_size[1]+j)
+                         *this->table_size[2] + k)
+                        *this->table_size[3] + l)
+                       *this->table_size[4] + m)
+                      *this->table_size[5] + n];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<6,T>::operator () (const TableIndices<6> &indices) const
+{
+  return TableBase<6,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<6,T>::operator () (const TableIndices<6> &indices)
+{
+  return TableBase<6,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+Table<7,T>::Table ()
+{}
+
+
+
+template <typename T>
+inline
+Table<7,T>::Table (const unsigned int size1,
+                   const unsigned int size2,
+                   const unsigned int size3,
+                   const unsigned int size4,
+                   const unsigned int size5,
+                   const unsigned int size6,
+                   const unsigned int size7)
+  :
+  TableBase<7,T> (TableIndices<7> (size1, size2, size3, size4, size5, size6, size7))
+{}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<7,T,true,6>
+Table<7,T>::operator [] (const unsigned int i) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3] *
+                                   this->table_size[4] *
+                                   this->table_size[5] *
+                                   this->table_size[6];
+  return (dealii::internal::TableBaseAccessors::Accessor<7,T,true,6>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+dealii::internal::TableBaseAccessors::Accessor<7,T,false,6>
+Table<7,T>::operator [] (const unsigned int i)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  const size_type subobject_size = size_type(this->table_size[1]) *
+                                   this->table_size[2] *
+                                   this->table_size[3] *
+                                   this->table_size[4] *
+                                   this->table_size[5] *
+                                   this->table_size[6];
+  return (dealii::internal::TableBaseAccessors::Accessor<7,T,false,6>
+          (*this,
+           this->values.begin() + i*subobject_size));
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<7,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l,
+                         const unsigned int m,
+                         const unsigned int n,
+                         const unsigned int o) const
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  Assert (m < this->table_size[4],
+          ExcIndexRange (m, 0, this->table_size[4]));
+  Assert (n < this->table_size[5],
+          ExcIndexRange (n, 0, this->table_size[5]));
+  Assert (o < this->table_size[6],
+          ExcIndexRange (o, 0, this->table_size[6]));
+  return this->values[(((((size_type(i)*this->table_size[1]+j)
+                          *this->table_size[2] + k)
+                         *this->table_size[3] + l)
+                        *this->table_size[4] + m)
+                       *this->table_size[5] + n)
+                      *this->table_size[6] + o];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<7,T>::operator () (const unsigned int i,
+                         const unsigned int j,
+                         const unsigned int k,
+                         const unsigned int l,
+                         const unsigned int m,
+                         const unsigned int n,
+                         const unsigned int o)
+{
+  Assert (i < this->table_size[0],
+          ExcIndexRange (i, 0, this->table_size[0]));
+  Assert (j < this->table_size[1],
+          ExcIndexRange (j, 0, this->table_size[1]));
+  Assert (k < this->table_size[2],
+          ExcIndexRange (k, 0, this->table_size[2]));
+  Assert (l < this->table_size[3],
+          ExcIndexRange (l, 0, this->table_size[3]));
+  Assert (m < this->table_size[4],
+          ExcIndexRange (m, 0, this->table_size[4]));
+  Assert (n < this->table_size[5],
+          ExcIndexRange (n, 0, this->table_size[5]));
+  Assert (o < this->table_size[5],
+          ExcIndexRange (o, 0, this->table_size[6]));
+  return this->values[(((((size_type(i)*this->table_size[1]+j)
+                          *this->table_size[2] + k)
+                         *this->table_size[3] + l)
+                        *this->table_size[4] + m)
+                       *this->table_size[5] + n)
+                      *this->table_size[6] + o];
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::const_reference
+Table<7,T>::operator () (const TableIndices<7> &indices) const
+{
+  return TableBase<7,T>::operator () (indices);
+}
+
+
+
+template <typename T>
+inline
+typename AlignedVector<T>::reference
+Table<7,T>::operator () (const TableIndices<7> &indices)
+{
+  return TableBase<7,T>::operator () (indices);
+}
+
+
+#endif // DOXYGEN
+
+
+
+/**
+ * Global function @p swap which overloads the default implementation of the
+ * C++ standard library which uses a temporary object. The function simply
+ * exchanges the data of the two tables.
+ *
+ * @author Martin Kronbichler, 2013
+ */
+template <int N, typename T>
+inline
+void swap (TableBase<N,T> &u, TableBase<N,T> &v)
+{
+  u.swap (v);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/table_handler.h b/include/deal.II/base/table_handler.h
new file mode 100644
index 0000000..a994609
--- /dev/null
+++ b/include/deal.II/base/table_handler.h
@@ -0,0 +1,883 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__table_handler_h
+#define dealii__table_handler_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <map>
+#include <vector>
+#include <string>
+#include <fstream>
+
+#include <ostream>
+
+#include <boost/variant.hpp>
+#include <boost/serialization/map.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
+#include <boost/serialization/split_member.hpp>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+class TableHandler;
+
+namespace internal
+{
+  /**
+   * A <tt>TableEntry</tt> stores the value of a table entry. It can either be
+   * of type int, unsigned int, double or std::string. In essence, this
+   * structure is the same as <code>boost::variant@<int,unsigned
+   * int,double,std::string@></code> but we wrap this object in a structure
+   * for which we can write a function that can serialize it. This is also why
+   * the function is not in fact of type boost::any.
+   */
+  struct TableEntry
+  {
+  public:
+    /**
+     * Default constructor.
+     */
+    TableEntry ();
+
+    /**
+     * Constructor. Initialize this table element with the value
+     * <code>t</code>.
+     */
+    template <typename T>
+    TableEntry (const T &t);
+
+    /**
+     * Return the value stored by this object. The template type T must be one
+     * of <code>int,unsigned int,double,std::string</code> and it must match
+     * the data type of the object originally stored in this TableEntry
+     * object.
+     */
+    template <typename T>
+    T get () const;
+
+    /**
+     * Return the numeric value of this object if data has been stored in it
+     * either as an integer, an unsigned integer, or a double.
+     *
+     * @return double
+     */
+    double get_numeric_value () const;
+
+    /**
+     * Cache the contained value with the given formatting and return it. The
+     * given parameters from the column definition are used for the
+     * formatting. The value is cached as a string internally in cached_value.
+     * The cache needs to be invalidated with this routine if the formatting
+     * of the column changes.
+     */
+    void cache_string(bool scientific, unsigned int precision) const;
+
+    /**
+     * Return the value cached using cache_string(). This is just a wrapper
+     * around cached_value.
+     */
+    const std::string &get_cached_string() const;
+
+
+    /**
+     * Return a TableEntry object that has the same data type of the stored
+     * value but with a value that is default constructed for this data type.
+     * This is used to pad columns below previously set ones.
+     */
+    TableEntry get_default_constructed_copy() const;
+
+    /**
+     * Write the data of this object to a stream for the purpose of
+     * serialization.
+     */
+    template <class Archive>
+    void save (Archive &ar, const unsigned int version) const;
+
+    /**
+     * Read the data of this object from a stream for the purpose of
+     * serialization.
+     */
+    template <class Archive>
+    void load (Archive &ar, const unsigned int version);
+
+    BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+  private:
+    /**
+     * Abbreviation for the data type stored by this object.
+     */
+    typedef boost::variant<int,unsigned int,unsigned long long int,double,std::string> value_type;
+
+    /**
+     * Stored value.
+     */
+    value_type value;
+
+    /**
+     * cache the current value as a string
+     */
+    mutable std::string cached_value;
+
+    friend class dealii::TableHandler;
+  };
+}
+
+
+/**
+ * The TableHandler stores TableEntries of arbitrary value type and writes the
+ * table as text or in tex format to an output stream. The value type actually
+ * may vary from column to column and from row to row.
+ *
+ * <h3>Usage</h3>
+ *
+ * The most important function is the templatized function
+ * <code>add_value(const std::string &key, const T value)</code> that adds a
+ * column with the name <tt>key</tt> to the table if this column does not yet
+ * exist and adds the given value of type <tt>T</tt> (which must be one of
+ * <tt>int</tt>, <tt>unsigned int</tt>, <tt>double</tt>, <tt>std::string</tt>)
+ * to this column.  After the table is complete there are different
+ * possibilities of output, e.g., into a latex file with write_tex() or as
+ * text with write_text().
+ *
+ * Two (or more) columns may be merged into a "supercolumn" by twice (or
+ * multiple) calling add_column_to_supercolumn(), see there. Additionally
+ * there is a function to set for each column the precision of the output of
+ * numbers, and there are several functions to prescribe the format and the
+ * captions the columns are written with in tex mode.
+ *
+ * A detailed explanation of this class is also given in the step-13 tutorial
+ * program.
+ *
+ *
+ * <h3>Example</h3>
+ *
+ * This is a simple example demonstrating the usage of this class. The first
+ * column includes the numbers <tt>i=1..n</tt>, the second $1^2$...$n^2$, the
+ * third $sqrt(1)...sqrt(n)$, where the second and third columns are merged
+ * into one supercolumn with the superkey <tt>squares and roots</tt>.
+ * Additionally the first column is aligned to the right (the default was
+ * <tt>centered</tt>) and the precision of the square roots are set to be 6
+ * (instead of 4 as default).
+ *
+ * @code
+ * TableHandler table;
+ *
+ * for (unsigned int i=1; i<=n; ++i)
+ *   {
+ *     table.add_value("numbers", i);
+ *     table.add_value("squares", i*i);
+ *     table.add_value("square roots", sqrt(i));
+ *   }
+ *                                  // merge the second and third column
+ * table.add_column_to_supercolumn("squares", "squares and roots");
+ * table.add_column_to_supercolumn("square roots", "squares and roots");
+ *
+ *                                  // additional settings
+ * table.set_tex_format("numbers", "r");
+ * table.set_precision("square roots", 6);
+ *
+ *                                  // output
+ * std::ofstream out_file("number_table.tex");
+ * table.write_tex(out_file);
+ * out_file.close();
+ * @endcode
+ *
+ *
+ * <h3>Dealing with sparse data: auto-fill mode</h3>
+ *
+ * When generating output, TableHandler expects that all columns have the
+ * exact same number of elements in it so that the result is in fact a table.
+ * This assumes that in each of the iterations (time steps, nonlinear
+ * iterations, etc) you fill every single column. On the other hand, this may
+ * not always be what you want to do. For example, it could be that the
+ * function that computes the nonlinear residual is only called every few time
+ * steps; or, a function computing statistics of the mesh is only called
+ * whenever the mesh is in fact refined. In these cases, the add_value()
+ * function will be called less often for some columns and the column would
+ * therefore have fewer elements; furthermore, these elements would not be
+ * aligned with the rows that contain the other data elements that were
+ * produced during this iteration. An entirely different scenario is that the
+ * table is filled and at a later time we use the data in there to compute the
+ * elements of other rows; the ConvergenceTable class does something like
+ * this.
+ *
+ * To support both scenarios, the TableHandler class has a property called <i
+ * >auto-fill mode</i>. By default, auto-fill mode is off, but it can be
+ * enabled by calling set_auto_fill_mode(). If auto-fill mode is enabled we
+ * use the following algorithm: - When calling <code>add_value(key,
+ * value)</code>, we count the number of elements in the column corresponding
+ * to <code>key</code>. Let's call this number $m$. - We also determine the
+ * maximal number of elements in the other columns; call it $n$. - If $m <
+ * n-1$ then we add $n-m-1$ copies of the object <code>T()</code> to this
+ * column. Here, <code>T</code> is the data type of the given
+ * <code>value</code>. For example, if <code>T</code> is a numeric type, then
+ * <code>T()</code> is the number zero; if <code>T</code> is
+ * <code>std::string</code>, then <code>T()</code> is the empty string
+ * <code>""</code>. - Add the given value to this column.
+ *
+ * Padding the column with default elements makes sure that after the addition
+ * the column has as many entries as the longest other column. In other words,
+ * if we have skipped previous invocations of add_value() for a given key,
+ * then the padding will enter default values into this column.
+ *
+ * The algorithm as described will fail if you try to skip adding values for a
+ * key if adding an element for this key is the first thing you want to do for
+ * a given iteration or time step, since we would then pad to the length of
+ * the longest column of the <i>previous</i> iteration or time step. You may
+ * have to re-order adding to this column to a different spot in your program,
+ * after adding to a column that will always be added to; or, you may want to
+ * start every iteration by adding the number of the iteration to the table,
+ * for example in column 1.
+ *
+ * In the case above, we have always padded columns <b>above</b> the element
+ * that is being added to a column. However, there is also a case where we
+ * have to pad <b>below</b>. Namely, if a previous row has been completely
+ * filled using TableHandler::add_value(), subsequent rows have been filled
+ * partially, and we then ask for output via write_text() or write_tex(). In
+ * that case, the last few rows that have been filled only partially need to
+ * be padded below the last element that has been added to them. As before, we
+ * do that by using default constructed objects of the same type as the last
+ * element of that column.
+ *
+ * @ingroup textoutput
+ * @author Ralf Hartmann, 1999; Wolfgang Bangerth, 2011
+ */
+class TableHandler
+{
+public:
+  /**
+   * Set of options how a table should be formatted when output with the
+   * write_text() function. The following possibilities exist:
+   *
+   * - <code>table_with_headers</code>: The table is formatted in such a way
+   * that the contents are aligned under the key of each column, i.e. the key
+   * sits atop each column. This is suitable for tables with few columns where
+   * the entire table can be displayed on the screen. Output looks like this:
+   *   @code
+   *     key1 key2 key3
+   *     0    0    ""
+   *     1    0    ""
+   *     2    13   a
+   *     1    0    ""
+   *   @endcode
+   * - <code>table_with_separate_column_description</code>: This is a better
+   * format when there are many columns and the table as a whole can not be
+   * displayed on the screen. Here, the column keys are first listed one-by-
+   * one on lines of their own, and are numbered for better readability. In
+   * addition, each of these description lines are prefixed by '#' to mark
+   * these lines as comments for programs that want to read the following
+   * table as data and should ignore these descriptive lines. GNUPLOT is one
+   * such program that will automatically ignore lines so prefixed. Output
+   * with this option looks like this:
+   *   @code
+   *     # 1: key1
+   *     # 2: key2
+   *     # 3: key3
+   *     0 0  ""
+   *     1 0  ""
+   *     2 13 a
+   *     1 0  ""
+   *   @endcode
+   * - <code>simple_table_with_separate_column_description</code>: This format
+   * is very similar to <code>table_with_separate_column_description</code>,
+   * but it skips aligning the columns with additional white space. This
+   * increases the performance of write_text() for large tables. Example
+   * output:
+   *   @code
+   *     # 1: key1
+   *     # 2: key2
+   *     # 3: key3
+   *     0 0 ""
+   *     1 0 ""
+   *     2 13 a
+   *     1 0 ""
+   *   @endcode
+   * - <code>org_mode_table</code>: Outputs to org-mode (http://orgmode.org/)
+   * table format. It is easy to convert org-mode tables to HTML/LaTeX/csv.
+   * Example output:
+   *   @code
+   *   | key1 | key2 | key3 |
+   *   | 0    | 0    | ""   |
+   *   | 1    | 0    | ""   |
+   *   | 2    | 13   | a    |
+   *   | 1    | 0    | ""   |
+   *   @endcode
+   */
+  enum TextOutputFormat
+  {
+    table_with_headers,
+    table_with_separate_column_description,
+    simple_table_with_separate_column_description,
+    org_mode_table
+  };
+
+  /**
+   * Constructor.
+   */
+  TableHandler ();
+
+  /**
+   * Adds a column (if not yet existent) with the key <tt>key</tt> and adds
+   * the value of type <tt>T</tt> to the column. Values of type <tt>T</tt>
+   * must be convertible to one of <code>int, unsigned int, double,
+   * std::string</code> or a compiler error will result.
+   */
+  template <typename T>
+  void add_value (const std::string &key,
+                  const T            value);
+
+  /**
+   * Switch auto-fill mode on or off. See the general documentation of this
+   * class for a description of what auto-fill mode does.
+   */
+  void set_auto_fill_mode (const bool state);
+
+  /**
+   * Creates a supercolumn (if not yet existent) and includes column to it.
+   * The keys of the column and the supercolumn are <tt>key</tt> and
+   * <tt>superkey</tt>, respectively.  To merge two columns <tt>c1</tt> and
+   * <tt>c2</tt> to a supercolumn <tt>sc</tt> hence call
+   * <tt>add_column_to_supercolumn(c1,sc)</tt> and
+   * <tt>add_column_to_supercolumn(c2,sc)</tt>.
+   *
+   * Concerning the order of the columns, the supercolumn replaces the first
+   * column that is added to the supercolumn. Within the supercolumn the order
+   * of output follows the order the columns are added to the supercolumn.
+   */
+  void add_column_to_supercolumn (const std::string &key,
+                                  const std::string &superkey);
+
+  /**
+   * Change the order of columns and supercolumns in the table.
+   *
+   * <tt>new_order</tt> includes the keys and superkeys of the columns and
+   * supercolumns in the order the user would like them to be output. If a
+   * superkey is included the keys of the subcolumns need not be explicitly
+   * mentioned in this vector.  The order of subcolumns within a supercolumn
+   * is not changeable and remains in the order in which the columns are added
+   * to the supercolumn.
+   *
+   * This function may also be used to break big tables with too many columns
+   * into smaller ones. For example, you can call this function with the first
+   * five columns and then call one of the <tt>write_*</tt> functions, then
+   * call this function with the next five columns and again <tt>write_*</tt>,
+   * and so on.
+   */
+  void set_column_order (const std::vector<std::string> &new_order);
+
+  /**
+   * Sets the <tt>precision</tt> e.g. double or float variables are written
+   * with. <tt>precision</tt> is the same as in calling
+   * <tt>out<<setprecision(precision)</tt>.
+   */
+  void set_precision (const std::string &key,
+                      const unsigned int precision);
+
+  /**
+   * Sets the <tt>scientific_flag</tt>. True means scientific, false means
+   * fixed point notation.
+   */
+  void set_scientific (const std::string &key,
+                       const bool         scientific);
+
+  /**
+   * Sets the caption of the column <tt>key</tt> for tex output. You may want
+   * to chose this different from <tt>key</tt>, if it contains formulas or
+   * similar constructs.
+   */
+  void set_tex_caption (const std::string &key,
+                        const std::string &tex_caption);
+
+  /**
+   * Sets the tex caption of the entire <tt>table</tt> for tex output.
+   */
+  void set_tex_table_caption (const std::string &table_caption);
+
+  /**
+   * Sets the label of this <tt>table</tt> for tex output.
+   */
+  void set_tex_table_label (const std::string &table_label);
+
+  /**
+   * Sets the caption the the supercolumn <tt>superkey</tt> for tex output.
+   * You may want to chose this different from <tt>superkey</tt>, if it
+   * contains formulas or similar constructs.
+   */
+  void set_tex_supercaption (const std::string &superkey,
+                             const std::string &tex_supercaption);
+
+  /**
+   * Sets the tex output format of a column, e.g. <tt>c</tt>, <tt>r</tt>,
+   * <tt>l</tt>, or <tt>p{3cm}</tt>. The default is <tt>c</tt>. Also if this
+   * function is not called for a column, the default is preset to be
+   * <tt>c</tt>.
+   */
+  void set_tex_format (const std::string &key,
+                       const std::string &format="c");
+
+  /**
+   * Write table as formatted text to the given stream. The text is formatted
+   * in such as way that it represents data as formatted columns of text. To
+   * avoid problems when reading these tables automatically, for example for
+   * postprocessing, if an entry in a cell of this table is empty (i.e. it has
+   * been created by calling the add_value() function with an empty string),
+   * then the entry of the table is printed as <code>""</code>.
+   *
+   * The second argument indicates how column keys are to be displayed. See
+   * the description of TextOutputFormat for more information
+   */
+  void write_text (std::ostream &out,
+                   const TextOutputFormat format = table_with_headers) const;
+
+  /**
+   * Write table as a tex file. If with_header is set to false (it is true by
+   * default), then no "\documentclass{...}", "\begin{document}" and
+   * "\end{document}" are used. In this way the file can be included into an
+   * existing tex file using a command like "\input{table_file}".
+   */
+  void write_tex (std::ostream &file, const bool with_header=true) const;
+
+  /**
+   * Clears the rows of the table, i.e. calls clear() on all the underlying
+   * storage data structures.
+   */
+  void clear ();
+
+  /**
+   * Remove all values added at the current row. This is useful when, for
+   * example, a time-step is rejected and all data recorded about it needs to
+   * be discarded.
+   */
+  void clear_current_row ();
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization.
+   */
+  template <class Archive>
+  void serialize(Archive &ar, const unsigned int version);
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcColumnNotExistent,
+                  std::string,
+                  << "Column <" << arg1 << "> does not exist.");
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcSuperColumnNotExistent,
+                  std::string,
+                  << "Supercolumn <" << arg1 << "> does not exist.");
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcColumnOrSuperColumnNotExistent,
+                  std::string,
+                  << "Column or supercolumn <" << arg1 << "> does not exist.");
+
+  /**
+   * Exception
+   */
+  DeclException4 (ExcWrongNumberOfDataEntries,
+                  std::string, int, std::string, int,
+                  << "Column <" << arg1 << "> has got " << arg2
+                  << " rows, but Column <" << arg3 << "> has got " << arg4 << ".");
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcUndefinedTexFormat,
+                  std::string,
+                  << "<" << arg1 << "> is not a tex column format. Use "
+                  << "'l', 'c', or 'r' to indicate left, centered, or "
+                  << "right aligned text.");
+  //@}
+protected:
+
+  /**
+   * Structure encapsulating all the data that is needed to describe one
+   * column of a table.
+   */
+  struct Column
+  {
+    /**
+     * Constructor needed by <tt>std::map</tt>.
+     */
+    Column ();
+
+    /**
+     * Constructor.
+     */
+    Column (const std::string &tex_caption);
+
+    /**
+     * Pad this column with default constructed elements to the number of rows
+     * given by the argument.
+     */
+    void pad_column_below (const unsigned int length);
+
+    /**
+     * Read or write the data of this object to or from a stream for the
+     * purpose of serialization.
+     */
+    template <class Archive>
+    void save(Archive &ar, const unsigned int version) const;
+    template<class Archive>
+    void load(Archive &ar, const unsigned int version);
+    BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+
+    /**
+     * Invalidates the string cache of all the entries and recomputes the
+     * maximum length max_length.
+     */
+    void invalidate_cache();
+
+    /**
+     * List of entries within this column. Values are always immediately
+     * converted to strings to provide a uniform method of lookup.
+     */
+    std::vector<internal::TableEntry> entries;
+
+    /**
+     * The caption of the column in tex output.  By default, this is the key
+     * string that is given to the <tt>TableHandler</tt> by
+     * <tt>TableHandler::add_value(...)</tt>. This may be changed by calling
+     * <tt>TableHandler::set_tex_caption(...)</tt>.
+     */
+    std::string tex_caption;
+
+    /**
+     * The column format in tex output.  By default, this is <tt>"c"</tt>,
+     * meaning `centered'. This may be changed by calling
+     * <tt>TableHandler::set_tex_format(...)</tt> with <tt>"c", "r", "l"</tt>
+     * for centered, right or left.
+     */
+
+    std::string tex_format;
+
+    /**
+     * Double or float entries are written with this precision (set by the
+     * user).  The default is 4.
+     */
+    unsigned int precision;
+
+    /**
+     * <tt>scientific</tt>=false means fixed point notation.
+     */
+    bool scientific;
+
+    /**
+     * Flag that may be used by derived classes for arbitrary purposes.
+     *
+     * In particular, the ConvergenceTable class uses the flag to denote
+     * columns for which convergence information has already been computed, or
+     * should not be computed at all.
+     */
+    unsigned int flag;
+
+    /**
+     * This entry caches the maximum length in characters for all entries in
+     * this table.
+     */
+    unsigned int max_length;
+  };
+
+  /**
+   * Help function that gives a vector of the keys of all columns that are
+   * mentioned in <tt>column_order</tt>, where each supercolumn key is
+   * replaced by its subcolumn keys.
+   *
+   * This function implicitly checks the consistency of the data. The result
+   * is returned in <tt>sel_columns</tt>.
+   */
+  void get_selected_columns (std::vector<std::string> &sel_columns) const;
+
+  /**
+   * Builtin function, that gives the number of rows in the table and that
+   * checks if the number of rows is equal in every column. This function is
+   * e.g. called before writing output.
+   */
+  unsigned int n_rows() const;
+
+  /**
+   * Stores the column and supercolumn keys in the order desired by the user.
+   * By default this is the order of adding the columns. This order may be
+   * changed by <tt>set_column_order(...)</tt>.
+   */
+  std::vector<std::string> column_order;
+
+  /**
+   * Maps the column keys to the columns (not supercolumns).
+   *
+   * The field is declared mutable so that the write_text() and write_tex()
+   * functions can be const, even though they may pad columns below if
+   * auto_fill_mode is on.
+   */
+  mutable std::map<std::string,Column> columns;
+
+  /**
+   * Maps each supercolumn key to the the keys of its subcolumns in the right
+   * order.  It is allowed that a supercolumn has got the same key as a
+   * column.
+   *
+   * Note that we do not use a <tt>multimap</tt> here since the order of
+   * column keys for each supercolumn key is relevant.
+   */
+  std::map<std::string, std::vector<std::string> > supercolumns;
+
+  /**
+   * Maps the supercolumn keys to the captions of the supercolumns that are
+   * used in tex output.
+   *
+   * By default these are just the supercolumn keys but they may be changed by
+   * <tt>set_tex_supercaptions(...)</tt>.
+   */
+  std::map<std::string, std::string> tex_supercaptions;
+
+  /**
+   * The caption of the table itself.
+   */
+  std::string tex_table_caption;
+  /**
+   * The label of the table.
+   */
+  std::string tex_table_label;
+
+  /**
+   * Flag indicating whether auto-fill mode should be used.
+   */
+  bool auto_fill_mode;
+};
+
+
+namespace internal
+{
+  template <typename T>
+  TableEntry::TableEntry (const T &t)
+    :
+    value (t)
+  {}
+
+
+  template <typename T>
+  T TableEntry::get () const
+  {
+    // we don't quite know the data type in 'value', but
+    // it must be one of the ones in the type list of the
+    // boost::variant. so if T is not in the list, or if
+    // the data stored in the TableEntry is not of type
+    // T, then we will get an exception that we can
+    // catch and produce an error message
+    try
+      {
+        return boost::get<T>(value);
+      }
+    catch (...)
+      {
+        Assert(false, ExcMessage ("This TableEntry object does not store a datum of type T"));
+        throw;
+      }
+  }
+
+
+
+  template <class Archive>
+  void TableEntry::save (Archive &ar,
+                         const unsigned int) const
+  {
+    // write first an identifier for the kind
+    // of data stored and then the actual
+    // data, in its correct data type
+    if (const int *p = boost::get<int>(&value))
+      {
+        char c = 'i';
+        ar &c & *p;
+      }
+    else if (const unsigned int *p = boost::get<unsigned int>(&value))
+      {
+        char c = 'u';
+        ar &c & *p;
+      }
+    else if (const double *p = boost::get<double>(&value))
+      {
+        char c = 'd';
+        ar &c & *p;
+      }
+    else if (const std::string *p = boost::get<std::string>(&value))
+      {
+        char c = 's';
+        ar &c & *p;
+      }
+    else if (const unsigned long long int *p = boost::get<unsigned long long int>(&value))
+      {
+        char c = 'l';
+        ar &c & *p;
+      }
+    else
+      Assert (false, ExcInternalError());
+  }
+
+
+
+  template <class Archive>
+  void TableEntry::load (Archive &ar,
+                         const unsigned int)
+  {
+    // following what we do in the save()
+    // function, first read in the data type
+    // as a one-character id, and then read
+    // the data
+    char c;
+    ar &c;
+
+    switch (c)
+      {
+      case 'i':
+      {
+        int val;
+        ar &val;
+        value = val;
+        break;
+      }
+
+      case 'u':
+      {
+        unsigned int val;
+        ar &val;
+        value = val;
+        break;
+      }
+
+      case 'd':
+      {
+        double val;
+        ar &val;
+        value = val;
+        break;
+      }
+
+      case 's':
+      {
+        std::string val;
+        ar &val;
+        value = val;
+        break;
+      }
+
+      case 'l':
+      {
+        unsigned long long int val;
+        ar &val;
+        value = val;
+        break;
+      }
+
+      default:
+        Assert (false, ExcInternalError());
+      }
+  }
+}
+
+template <typename T>
+void TableHandler::add_value (const std::string &key,
+                              const T            value)
+{
+  // see if the column already exists
+  if (columns.find(key) == columns.end())
+    {
+      std::pair<std::string, Column> new_column(key, Column(key));
+      columns.insert(new_column);
+      column_order.push_back(key);
+    }
+
+  if (auto_fill_mode == true)
+    {
+      // follow the algorithm given in the introduction to this class
+      // of padding columns as necessary
+      unsigned int n = 0;
+      for (std::map< std::string, Column >::iterator p = columns.begin(); p != columns.end(); ++p)
+        n = (n >= p->second.entries.size() ? n : p->second.entries.size());
+
+      while (columns[key].entries.size()+1 < n)
+        {
+          columns[key].entries.push_back (internal::TableEntry(T()));
+          internal::TableEntry &entry = columns[key].entries.back();
+          entry.cache_string(columns[key].scientific, columns[key].precision);
+          columns[key].max_length = std::max(columns[key].max_length, static_cast<unsigned int>(entry.get_cached_string().length()));
+        }
+    }
+
+  // now push the value given to this function
+  columns[key].entries.push_back (internal::TableEntry(value));
+  internal::TableEntry &entry = columns[key].entries.back();
+  entry.cache_string(columns[key].scientific, columns[key].precision);
+  columns[key].max_length = std::max(columns[key].max_length, static_cast<unsigned int>(entry.get_cached_string().length()));
+}
+
+
+template <class Archive>
+void
+TableHandler::Column::save(Archive &ar, const unsigned int /*version*/) const
+{
+  ar &entries &tex_caption
+  & tex_format &precision
+  & scientific
+  & flag
+  & max_length;
+}
+
+template<class Archive>
+void
+TableHandler::Column::load(Archive &ar, const unsigned int /*version*/)
+{
+  ar &entries &tex_caption
+  & tex_format &precision
+  & scientific
+  & flag
+  & max_length;
+  invalidate_cache();
+}
+
+
+template <class Archive>
+void
+TableHandler::serialize(Archive &ar,
+                        const unsigned int)
+{
+  ar &column_order &columns
+  & supercolumns &tex_supercaptions
+  & tex_table_caption
+  & tex_table_label
+  & auto_fill_mode;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/table_indices.h b/include/deal.II/base/table_indices.h
new file mode 100644
index 0000000..d9cab5a
--- /dev/null
+++ b/include/deal.II/base/table_indices.h
@@ -0,0 +1,281 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__table_indices_h
+#define dealii__table_indices_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/std_cxx11/iterator.h>
+
+#include <algorithm>
+#include <ostream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A class representing a fixed size array of indices.
+ *
+ * It is used in tensorial objects like the TableBase and SymmetricTensor
+ * classes to represent a nested choice of indices.
+ *
+ * @ingroup data
+ * @author Wolfgang Bangerth, Matthias Maier, 2002, 2015
+ */
+template <int N>
+class TableIndices
+{
+public:
+
+  /**
+   * Default constructor. It sets all indices to zero.
+   */
+  TableIndices();
+
+  /**
+   * Convenience constructor that takes up to 9 arguments. It can be used to
+   * populate a TableIndices object upon creation, either completely, or
+   * partially.
+   *
+   * Index entries that are not set by these arguments (either because they
+   * are omitted, or because $N > 9$) are set to
+   * numbers::invalid_unsigned_int.
+   *
+   * Note that only the first <tt>N</tt> arguments are actually used.
+   *
+   * @tparam N The number of indices stored in each object.
+   */
+  TableIndices (const unsigned int index0,
+                const unsigned int index1 = numbers::invalid_unsigned_int,
+                const unsigned int index2 = numbers::invalid_unsigned_int,
+                const unsigned int index3 = numbers::invalid_unsigned_int,
+                const unsigned int index4 = numbers::invalid_unsigned_int,
+                const unsigned int index5 = numbers::invalid_unsigned_int,
+                const unsigned int index6 = numbers::invalid_unsigned_int,
+                const unsigned int index7 = numbers::invalid_unsigned_int,
+                const unsigned int index8 = numbers::invalid_unsigned_int);
+
+  /**
+   * Read-only access the value of the <tt>i</tt>th index.
+   */
+  unsigned int operator[] (const unsigned int i) const;
+
+  /**
+   * Write access the value of the <tt>i</tt>th index.
+   */
+  unsigned int &operator[] (const unsigned int i);
+
+  /**
+   * Compare two index fields for equality.
+   */
+  bool operator == (const TableIndices<N> &other) const;
+
+  /**
+   * Compare two index fields for inequality.
+   */
+  bool operator != (const TableIndices<N> &other) const;
+
+  /**
+   * Sort the indices in ascending order. While this operation is not very
+   * useful for Table objects, it is used for the SymmetricTensor class.
+   */
+  void sort ();
+
+  /**
+   * Write or read the data of this object to or from a stream for the purpose
+   * of serialization.
+   */
+  template <class Archive>
+  void serialize (Archive &ar, const unsigned int version);
+
+protected:
+  /**
+   * Store the indices in an array.
+   */
+  unsigned int indices[N];
+};
+
+
+
+/* --------------------- Template and inline functions ---------------- */
+
+
+template <int N>
+TableIndices<N>::TableIndices()
+{
+  Assert (N > 0, ExcMessage("Cannot create a TableIndices object of size 0"));
+
+  for (unsigned int i=0; i<N; ++i)
+    indices[i] = 0;
+}
+
+
+template <int N>
+TableIndices<N>::TableIndices(const unsigned int index0,
+                              const unsigned int index1,
+                              const unsigned int index2,
+                              const unsigned int index3,
+                              const unsigned int index4,
+                              const unsigned int index5,
+                              const unsigned int index6,
+                              const unsigned int index7,
+                              const unsigned int index8)
+{
+  Assert (N > 0, ExcMessage("Cannot create a TableIndices object of size 0"));
+
+  switch (N)
+    {
+    case 1: // fallthrough
+      Assert (index1 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    case 2: // fallthrough
+      Assert (index2 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    case 3: // fallthrough
+      Assert (index3 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    case 4: // fallthrough
+      Assert (index4 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    case 5: // fallthrough
+      Assert (index5 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    case 6: // fallthrough
+      Assert (index6 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    case 7: // fallthrough
+      Assert (index7 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    case 8: // fallthrough
+      Assert (index8 == numbers::invalid_unsigned_int, ExcMessage("more than N index values provided"));
+    default:
+      ;
+    }
+
+  // Always access "indices" with indices modulo N to avoid bogus compiler
+  // warnings (although such access is always in dead code...
+  switch (N)
+    {
+    default:
+      // For TableIndices of size 10 or larger als default initialize the
+      // remaining indices to numbers::invalid_unsigned_int:
+      for (unsigned int i=0; i<N; ++i)
+        indices[i] = numbers::invalid_unsigned_int;
+    case 9: // fallthrough
+      indices[8 % N] = index8;
+    case 8: // fallthrough
+      indices[7 % N] = index7;
+    case 7: // fallthrough
+      indices[6 % N] = index6;
+    case 6: // fallthrough
+      indices[5 % N] = index5;
+    case 5: // fallthrough
+      indices[4 % N] = index4;
+    case 4: // fallthrough
+      indices[3 % N] = index3;
+    case 3: // fallthrough
+      indices[2 % N] = index2;
+    case 2: // fallthrough
+      indices[1 % N] = index1;
+    case 1: // fallthrough
+      indices[0 % N] = index0;
+    }
+
+}
+
+
+template <int N>
+inline
+unsigned int
+TableIndices<N>::operator [] (const unsigned int i) const
+{
+  Assert (i < N, ExcIndexRange (i, 0, N));
+  return indices[i];
+}
+
+
+template <int N>
+inline
+unsigned int &
+TableIndices<N>::operator [] (const unsigned int i)
+{
+  Assert (i < N, ExcIndexRange (i, 0, N));
+  return indices[i];
+}
+
+
+template <int N>
+inline
+bool
+TableIndices<N>::operator == (const TableIndices<N> &other) const
+{
+  for (unsigned int i=0; i<N; ++i)
+    if (indices[i] != other.indices[i])
+      return false;
+  return true;
+}
+
+
+template <int N>
+inline
+bool
+TableIndices<N>::operator != (const TableIndices<N> &other) const
+{
+  return !(*this == other);
+}
+
+
+template <int N>
+inline
+void
+TableIndices<N>::sort ()
+{
+  std::sort(std_cxx11::begin(indices), std_cxx11::end(indices));
+}
+
+
+template <int N>
+template <class Archive>
+inline
+void
+TableIndices<N>::serialize (Archive &ar, const unsigned int)
+{
+  ar &indices;
+}
+
+
+/**
+ * Output operator for TableIndices objects; reports them in a list like this:
+ * <code>[i1,i2,...]</code>.
+ *
+ * @relates TableIndices
+ */
+template <int N>
+std::ostream &
+operator << (std::ostream &out,
+             const TableIndices<N> &indices)
+{
+  out << '[';
+  for (unsigned int i=0; i<N; ++i)
+    {
+      out << indices[i];
+      if (i+1 != N)
+        out << ',';
+    }
+  out << ']';
+
+  return out;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/template_constraints.h b/include/deal.II/base/template_constraints.h
new file mode 100644
index 0000000..bc44e71
--- /dev/null
+++ b/include/deal.II/base/template_constraints.h
@@ -0,0 +1,626 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__template_constraints_h
+#define dealii__template_constraints_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/complex_overloads.h>
+
+#include <complex>
+#include <utility>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <bool, typename> struct constraint_and_return_value;
+
+
+/**
+ * This specialization of the general template for the case of a <tt>true</tt>
+ * first template argument declares a local typedef <tt>type</tt> to the
+ * second template argument. It is used in order to construct constraints on
+ * template arguments in template (and member template) functions. The
+ * negative specialization is missing.
+ *
+ * Here's how the trick works, called SFINAE (substitution failure is not an
+ * error): The C++ standard prescribes that a template function is only
+ * considered in a call, if all parts of its signature can be instantiated
+ * with the template parameter replaced by the respective types/values in this
+ * particular call. Example:
+ * @code
+ *   template <typename T>
+ *   typename T::type  foo(T) {...};
+ *   ...
+ *   foo(1);
+ * @endcode
+ * The compiler should detect that in this call, the template parameter T must
+ * be identified with the type "int". However, the return type T::type does
+ * not exist. The trick now is that this is not considered an error: this
+ * template is simply not considered, the compiler keeps on looking for
+ * another possible function foo.
+ *
+ * The idea is then to make the return type un-instantiatable if certain
+ * constraints on the template types are not satisfied:
+ * @code
+ *   template <bool, typename> struct constraint_and_return_value;
+ *   template <typename T> struct constraint_and_return_value<true,T> {
+ *     typedef T type;
+ *   };
+ * @endcode
+ * constraint_and_return_value<false,T> is not defined. Given something like
+ * @code
+ *   template <typename>
+ *   struct int_or_double         { static const bool value = false;};
+ *   template <>
+ *   struct int_or_double<int>    { static const bool value = true; };
+ *   template <>
+ *   struct int_or_double<double> { static const bool value = true; };
+ * @endcode
+ * we can write a template
+ * @code
+ *   template <typename T>
+ *   typename constraint_and_return_value<int_or_double<T>::value,void>::type
+ *   f (T);
+ * @endcode
+ * which can only be instantiated if T=int or T=double. A call to f('c') will
+ * just fail with a compiler error: "no instance of f(char) found". On the
+ * other hand, if the predicate in the first argument to the
+ * constraint_and_return_value template is true, then the return type is just
+ * the second type in the template.
+ *
+ * @author Wolfgang Bangerth, 2003
+ */
+template <typename T> struct constraint_and_return_value<true,T>
+{
+  typedef T type;
+};
+
+
+
+/**
+ * A template class that simply exports its template argument as a local
+ * typedef. This class, while at first appearing useless, makes sense in the
+ * following context: if you have a function template as follows:
+ * @code
+ *   template <typename T> void f(T, T);
+ * @endcode
+ * then it can't be called in an expression like <code>f(1, 3.141)</code>
+ * because the type <code>T</code> of the template can not be deduced in a
+ * unique way from the types of the arguments. However, if the template is
+ * written as
+ * @code
+ *   template <typename T> void f(T, typename identity<T>::type);
+ * @endcode
+ * then the call becomes valid: the type <code>T</code> is not deducible from
+ * the second argument to the function, so only the first argument
+ * participates in template type resolution.
+ *
+ * The context for this feature is as follows: consider
+ * @code
+ * template <typename RT, typename A>
+ * void forward_call(RT (*p) (A), A a)  { p(a); }
+ *
+ * void h (double);
+ *
+ * void g()
+ * {
+ *   forward_call(&h, 1);
+ * }
+ * @endcode
+ * This code fails to compile because the compiler can't decide whether the
+ * template type <code>A</code> should be <code>double</code> (from the
+ * signature of the function given as first argument to
+ * <code>forward_call</code>, or <code>int</code> because the expression
+ * <code>1</code> has that type. Of course, what we would like the compiler to
+ * do is simply cast the <code>1</code> to <code>double</code>. We can achieve
+ * this by writing the code as follows:
+ * @code
+ * template <typename RT, typename A>
+ * void forward_call(RT (*p) (A), typename identity<A>::type a)  { p(a); }
+ *
+ * void h (double);
+ *
+ * void g()
+ * {
+ *   forward_call(&h, 1);
+ * }
+ * @endcode
+ *
+ * @author Wolfgang Bangerth, 2008
+ */
+template <typename T>
+struct identity
+{
+  typedef T type;
+};
+
+
+
+/**
+ * A class to perform comparisons of arbitrary pointers for equality. In some
+ * circumstances, one would like to make sure that two arguments to a function
+ * are not the same object. One would, in this case, make sure that their
+ * addresses are not the same. However, sometimes the types of these two
+ * arguments may be template types, and they may be the same type or not. In
+ * this case, a simple comparison as in <tt>&object1 != &object2</tt> does
+ * only work if the types of the two objects are equal, but the compiler will
+ * barf if they are not. However, in the latter case, since the types of the
+ * two objects are different, we can be sure that the two objects cannot be
+ * the same.
+ *
+ * This class implements a comparison function that always returns @p false if
+ * the types of its two arguments are different, and returns <tt>p1 == p2</tt>
+ * otherwise.
+ *
+ * @author Wolfgang Bangerth, 2004
+ */
+struct PointerComparison
+{
+  /**
+   * Comparison function for pointers of the same type. Returns @p true if the
+   * two pointers are equal.
+   */
+  template <typename T>
+  static bool equal (const T *p1, const T *p2);
+
+  /**
+   * Comparison function for pointers of different types. The C++ language
+   * does not allow comparing these pointers using <tt>operator==</tt>.
+   * However, since the two pointers have different types, we know that they
+   * can't be the same, so we always return @p false.
+   */
+  template <typename T, typename U>
+  static bool equal (const T *, const U *);
+};
+
+
+
+namespace internal
+{
+  /**
+   * A type that is sometimes used for template tricks. For example, in some
+   * situations one would like to do this:
+   *
+   * @code
+   *   template <int dim>
+   *   class X {
+   *     // do something on subdim-dimensional sub-objects of the big
+   *     // dim-dimensional thing (for example on vertices/lines/quads of
+   *     // cells):
+   *     template <int subdim> void f();
+   *   };
+   *
+   *   template <int dim>
+   *   template <>
+   *   void X<dim>::f<0> () { ...operate on the vertices of a cell... }
+   *
+   *   template <int dim, int subdim> void g(X<dim> &x) {
+   *     x.f<subdim> ();
+   *   }
+   * @endcode
+   *
+   * The problem is: the language doesn't allow us to specialize
+   * <code>X::f()</code> without specializing the outer class first. One of
+   * the common tricks is therefore to use something like this:
+   *
+   * @code
+   *   template <int N> struct int2type {};
+   *
+   *   template <int dim>
+   *   class X {
+   *     // do something on subdim-dimensional sub-objects of the big
+   *     // dim-dimensional thing (for example on vertices/lines/quads of
+   *     // cells):
+   *     void f(int2type<0>);
+   *     void f(int2type<1>);
+   *     void f(int2type<2>);
+   *     void f(int2type<3>);
+   *   };
+   *
+   *   template <int dim>
+   *   void X<dim>::f (int2type<0>) { ...operate on the vertices of a cell... }
+   *
+   *   template <int dim>
+   *   void X<dim>::f (int2type<1>) { ...operate on the lines of a cell... }
+   *
+   *   template <int dim, int subdim> void g(X<dim> &x) {
+   *     x.f (int2type<subdim>());
+   *   }
+   * @endcode
+   *
+   * Note that we have replaced specialization of <code>X::f()</code> by
+   * overloading, but that from inside the function <code>g()</code>, we can
+   * still select which of the different <code>X::f()</code> we want based on
+   * the <code>subdim</code> template argument.
+   *
+   * @author Wolfgang Bangerth, 2006
+   */
+  template <int N>
+  struct int2type
+  {};
+
+
+  /**
+   * The equivalent of the int2type class for boolean arguments.
+   *
+   * @author Wolfgang Bangerth, 2009
+   */
+  template <bool B>
+  struct bool2type
+  {};
+}
+
+
+
+/**
+ * A type that can be used to determine whether two types are equal. It allows
+ * to write code like
+ * @code
+ *   template <typename T>
+ *   void Vector<T>::some_operation () {
+ *     if (types_are_equal<T,double>::value == true)
+ *       call_some_blas_function_for_doubles;
+ *     else
+ *       do_it_by_hand;
+ *   }
+ * @endcode
+ *
+ * This construct is made possible through the existence of a partial
+ * specialization of the class for template arguments that are equal.
+ */
+template <typename T, typename U>
+struct types_are_equal
+{
+  static const bool value = false;
+};
+
+
+/**
+ * Partial specialization of the general template for the case that both
+ * template arguments are equal. See the documentation of the general template
+ * for more information.
+ */
+template <typename T>
+struct types_are_equal<T,T>
+{
+  static const bool value = true;
+};
+
+
+
+/**
+ * A class with a local typedef that represents the type that results from the
+ * product of two variables of type @p T and @p U. In other words, we would
+ * like to infer the type of the <code>product</code> variable in code like
+ * this:
+ * @code
+ *   T t;
+ *   U u;
+ *   auto product = t*u;
+ * @endcode
+ * The local typedef of this structure represents the type the variable
+ * <code>product</code> would have.
+ *
+ *
+ * <h3>Where is this useful</h3>
+ *
+ * The purpose of this class is principally to represent the type one needs to
+ * use to represent the values or gradients of finite element fields at
+ * quadrature points. For example, assume you are storing the values $U_j$ of
+ * unknowns in a Vector<float>, then evaluating $u_h(x_q) = \sum_j U_j
+ * \varphi_j(x_q)$ at quadrature points results in values $u_h(x_q)$ that need
+ * to be stored as @p double variables because the $U_j$ are @p float values
+ * and the $\varphi_j(x_q)$ are computed as @p double values, and the product
+ * are then @p double values. On the other hand, if you store your unknowns
+ * $U_j$ as <code>std::complex@<double@></code> values and you try to evaluate
+ * $\nabla u_h(x_q) = \sum_j U_j \nabla\varphi_j(x_q)$ at quadrature points,
+ * then the gradients $\nabla u_h(x_q)$ need to be stored as objects of type
+ * <code>Tensor@<1,dim,std::complex@<double@>@></code> because that's what you
+ * get when you multiply a complex number by a <code>Tensor@<1,dim@></code>
+ * (the type used to represent the gradient of shape functions of scalar
+ * finite elements).
+ *
+ * Likewise, if you are using a vector valued element (with dim components)
+ * and the $U_j$ are stored as @p double variables, then $u_h(x_q) = \sum_j
+ * U_j \varphi_j(x_q)$ needs to have type <code>Tensor@<1,dim@></code>
+ * (because the shape functions have type <code>Tensor@<1,dim@></code>).
+ * Finally, if you store the $U_j$ as objects of type
+ * <code>std::complex@<double@></code> and you have a vector valued element,
+ * then the gradients $\nabla u_h(x_q) = \sum_j U_j \nabla\varphi_j(x_q)$ will
+ * result in objects of type <code>Tensor@<2,dim,std::complex@<double@>
+ * @></code>.
+ *
+ * In all of these cases, this type is used to identify which type needs to be
+ * used for the result of computing the product of unknowns and the values,
+ * gradients, or other properties of shape functions.
+ *
+ * @author Wolfgang Bangerth, 2015
+ */
+template <typename T, typename U>
+struct ProductType
+{
+#ifdef DEAL_II_WITH_CXX11
+  typedef decltype(std::declval<T>() * std::declval<U>()) type;
+#endif
+};
+
+#ifndef DEAL_II_WITH_CXX11
+
+template <typename T>
+struct ProductType<T,T>
+{
+  typedef T type;
+};
+
+template <typename T>
+struct ProductType<T,bool>
+{
+  typedef T type;
+};
+
+template <typename T>
+struct ProductType<bool, T>
+{
+  typedef T type;
+};
+
+template <>
+struct ProductType<bool,double>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<double,bool>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<double,float>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<float,double>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<double,long double>
+{
+  typedef long double type;
+};
+
+template <>
+struct ProductType<long double,double>
+{
+  typedef long double type;
+};
+
+template <>
+struct ProductType<double,int>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<int,double>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<float,int>
+{
+  typedef float type;
+};
+
+template <>
+struct ProductType<int,float>
+{
+  typedef float type;
+};
+
+template <>
+struct ProductType<double, unsigned int>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<unsigned int, double>
+{
+  typedef double type;
+};
+
+template <>
+struct ProductType<float,unsigned int>
+{
+  typedef float type;
+};
+
+template <>
+struct ProductType<unsigned int,float>
+{
+  typedef float type;
+};
+
+#endif
+
+// Annoyingly, there is no std::complex<T>::operator*(U) for scalars U
+// other than T (not even in C++11, or C++14). We provide our own overloads
+// in base/complex_overloads.h, but in order for them to work, we have to
+// manually specify all products we want to allow:
+
+template <typename T>
+struct ProductType<std::complex<T>,std::complex<T> >
+{
+  typedef std::complex<T> type;
+};
+
+template <typename T, typename U>
+struct ProductType<std::complex<T>,std::complex<U> >
+{
+  typedef std::complex<typename ProductType<T,U>::type> type;
+};
+
+template <typename U>
+struct ProductType<double,std::complex<U> >
+{
+  typedef std::complex<typename ProductType<double,U>::type> type;
+};
+
+template <typename T>
+struct ProductType<std::complex<T>,double>
+{
+  typedef std::complex<typename ProductType<T,double>::type> type;
+};
+
+
+template <typename U>
+struct ProductType<float,std::complex<U> >
+{
+  typedef std::complex<typename ProductType<float,U>::type> type;
+};
+
+template <typename T>
+struct ProductType<std::complex<T>,float>
+{
+  typedef std::complex<typename ProductType<T,float>::type> type;
+};
+
+
+
+/**
+ * This class provides a local typedef @p type that is equal to the template
+ * argument but only if the template argument corresponds to a scalar type
+ * (i.e., one of the floating point types, signed or unsigned integer, or a
+ * complex number). If the template type @p T is not a scalar, then no class
+ * <code>EnableIfScalar@<T@></code> is declared and, consequently, no local
+ * typedef is available.
+ *
+ * The purpose of the class is to disable certain template functions if one of
+ * the arguments is not a scalar number. By way of (nonsensical) example,
+ * consider the following function:
+ * @code
+ *   template <typename T>
+ *   T multiply (const T t1, const T t2) { return t1*t2; }
+ * @endcode
+ * This function can be called with any two arguments of the same type @p T.
+ * This includes arguments for which this clearly makes no sense.
+ * Consequently, one may want to restrict the function to only scalars, and
+ * this can be written as
+ * @code
+ *   template <typename T>
+ *   typename EnableIfScalar<T>::type
+ *   multiply (const T t1, const T t2) { return t1*t2; }
+ * @endcode
+ * At a place where you call the function, the compiler will deduce the type
+ * @p T from the arguments. For example, in
+ * @code
+ *   multiply(1.234, 2.345);
+ * @endcode
+ * it will deduce @p T to be @p double, and because
+ * <code>EnableIfScalar@<double@>::type</code> equals @p double, the compiler
+ * will instantiate a function <code>double multiply(const double, const
+ * double)</code> from the template above. On the other hand, in a context
+ * like
+ * @code
+ *   std::vector<char> v1, v2;
+ *   multiply(v1, v2);
+ * @endcode
+ * the compiler will deduce @p T to be <code>std::vector@<char@></code> but
+ * because <code>EnableIfScalar@<std::vector@<char@>@>::type</code> does not
+ * exist the compiler does not consider the template for instantiation. This
+ * technique is called "Substitution Failure is not an Error (SFINAE)". It
+ * makes sure that the template function can not even be called, rather than
+ * leading to a later error about the fact that the operation
+ * <code>t1*t2</code> is not defined (or may lead to some nonsensical result).
+ * It also allows the declaration of overloads of a function such as @p
+ * multiply for different types of arguments, without resulting in ambiguous
+ * call errors by the compiler.
+ *
+ * @author Wolfgang Bangerth, 2015
+ */
+template <typename T>
+struct EnableIfScalar;
+
+
+template <> struct EnableIfScalar<double>
+{
+  typedef double type;
+};
+
+
+template <> struct EnableIfScalar<float>
+{
+  typedef float type;
+};
+
+
+template <> struct EnableIfScalar<long double>
+{
+  typedef long double type;
+};
+
+
+template <> struct EnableIfScalar<int>
+{
+  typedef int type;
+};
+
+
+template <> struct EnableIfScalar<unsigned int>
+{
+  typedef unsigned int type;
+};
+
+
+
+template <typename T> struct EnableIfScalar<std::complex<T> >
+{
+  typedef std::complex<T> type;
+};
+
+
+// --------------- inline functions -----------------
+
+
+template <typename T, typename U>
+inline
+bool
+PointerComparison::equal (const T *, const U *)
+{
+  return false;
+}
+
+
+
+template <typename T>
+inline
+bool
+PointerComparison::equal (const T *p1, const T *p2)
+{
+  return (p1==p2);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/tensor.h b/include/deal.II/base/tensor.h
new file mode 100644
index 0000000..a2168cb
--- /dev/null
+++ b/include/deal.II/base/tensor.h
@@ -0,0 +1,1983 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_h
+#define dealii__tensor_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table_indices.h>
+#include <deal.II/base/tensor_accessors.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/utilities.h>
+
+#include <cmath>
+#include <ostream>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+// Forward declarations:
+
+template <int dim, typename Number> class Point;
+template <int rank_, int dim, typename Number = double> class Tensor;
+
+#ifndef DOXYGEN
+// Overload invalid tensor types of negative rank that come up during
+// overload resolution of operator* and related contraction variants.
+template <int dim, typename Number>
+class Tensor<-2, dim, Number>
+{
+};
+
+template <int dim, typename Number>
+class Tensor<-1, dim, Number>
+{
+};
+#endif /* DOXYGEN */
+
+
+/**
+ * This class is a specialized version of the <tt>Tensor<rank,dim,Number></tt>
+ * class. It handles tensors of rank zero, i.e. scalars. The second template
+ * argument @p dim is ignored.
+ *
+ * This class exists because in some cases we want to construct objects of
+ * type Tensor@<spacedim-dim,dim,Number@>, which should expand to scalars,
+ * vectors, matrices, etc, depending on the values of the template arguments
+ * @p dim and @p spacedim. We therefore need a class that acts as a scalar
+ * (i.e. @p Number) for all purposes but is part of the Tensor template
+ * family.
+ *
+ * @tparam dim An integer that denotes the dimension of the space in which
+ * this tensor operates. This of course equals the number of coordinates that
+ * identify a point and rank-1 tensor. Since the current object is a rank-0
+ * tensor (a scalar), this template argument has no meaning for this class.
+ *
+ * @tparam Number The data type in which the tensor elements are to be stored.
+ * This will, in almost all cases, simply be the default @p double, but there
+ * are cases where one may want to store elements in a different (and always
+ * scalar) type. It can be used to base tensors on @p float or @p complex
+ * numbers or any other data type that implements basic arithmetic operations.
+ * Another example would be a type that allows for Automatic Differentiation
+ * (see, for example, the Sacado type used in step-33) and thereby can
+ * generate analytic (spatial) derivatives of a function that takes a tensor
+ * as argument.
+ *
+ * @ingroup geomprimitives
+ * @author Wolfgang Bangerth, 2009, Matthias Maier, 2015
+ */
+template <int dim, typename Number>
+class Tensor<0,dim,Number>
+{
+public:
+  /**
+   * Provide a way to get the dimension of an object without explicit
+   * knowledge of it's data type. Implementation is this way instead of
+   * providing a function <tt>dimension()</tt> because now it is possible to
+   * get the dimension at compile time without the expansion and preevaluation
+   * of an inlined function; the compiler may therefore produce more efficient
+   * code and you may use this value to declare other data types.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Publish the rank of this tensor to the outside world.
+   */
+  static const unsigned int rank = 0;
+
+  /**
+   * Number of independent components of a tensor of rank 0.
+   */
+  static const unsigned int n_independent_components = 1;
+
+  /**
+   * Declare a type that has holds real-valued numbers with the same precision
+   * as the template argument to this class. For std::complex<number>, this
+   * corresponds to type number, and it is equal to Number for all other
+   * cases. See also the respective field in Vector<Number>.
+   *
+   * This typedef is used to represent the return type of norms.
+   */
+  typedef typename numbers::NumberTraits<Number>::real_type real_type;
+
+  /**
+   * Type of objects encapsulated by this container and returned by
+   * operator[](). This is a scalar number type for a rank 0 tensor.
+   */
+  typedef Number value_type;
+
+  /**
+   * Declare an array type which can be used to initialize an object of this
+   * type statically. In case of a a tensor of rank 0 this is just the scalar
+   * number type Number.
+   */
+  typedef Number array_type;
+
+  /**
+   * Constructor. Set to zero.
+   */
+  Tensor ();
+
+  /**
+   * Copy constructor.
+   */
+  Tensor (const Tensor<0,dim,Number> &initializer);
+
+  /**
+   * Constructor from tensors with different underlying scalar type. This
+   * obviously requires that the @p OtherNumber type is convertible to @p
+   * Number.
+   */
+  template <typename OtherNumber>
+  Tensor (const Tensor<0,dim,OtherNumber> &initializer);
+
+  /**
+   * Constructor, where the data is copied from a C-style array.
+   */
+  template <typename OtherNumber>
+  Tensor (const OtherNumber initializer);
+
+  /**
+   * Return a reference to the encapsulated Number object. Since rank-0
+   * tensors are scalars, this is a natural operation.
+   *
+   * This is the non-const conversion operator that returns a writable
+   * reference.
+   */
+  operator Number &();
+
+  /**
+   * Return a reference to the encapsulated Number object. Since rank-0
+   * tensors are scalars, this is a natural operation.
+   *
+   * This is the const conversion operator that returns a read-only reference.
+   */
+  operator const Number &() const;
+
+  /**
+   * Copy assignment operator.
+   */
+  Tensor<0,dim,Number> &operator = (const Tensor<0,dim,Number> &rhs);
+
+  /**
+   * Assignment from tensors with different underlying scalar type. This
+   * obviously requires that the @p OtherNumber type is convertible to @p
+   * Number.
+   */
+  template <typename OtherNumber>
+  Tensor<0,dim,Number> &operator = (const Tensor<0,dim,OtherNumber> &rhs);
+
+  /**
+   * Test for equality of two tensors.
+   */
+  template<typename OtherNumber>
+  bool operator == (const Tensor<0,dim,OtherNumber> &rhs) const;
+
+  /**
+   * Test for inequality of two tensors.
+   */
+  template<typename OtherNumber>
+  bool operator != (const Tensor<0,dim,OtherNumber> &rhs) const;
+
+  /**
+   * Add another scalar
+   */
+  template<typename OtherNumber>
+  Tensor<0,dim,Number> &operator += (const Tensor<0,dim,OtherNumber> &rhs);
+
+  /**
+   * Subtract another scalar.
+   */
+  template<typename OtherNumber>
+  Tensor<0,dim,Number> &operator -= (const Tensor<0,dim,OtherNumber> &rhs);
+
+  /**
+   * Multiply the scalar with a <tt>factor</tt>.
+   */
+  template<typename OtherNumber>
+  Tensor<0,dim,Number> &operator *= (const OtherNumber factor);
+
+  /**
+   * Divide the scalar by <tt>factor</tt>.
+   */
+  template<typename OtherNumber>
+  Tensor<0,dim,Number> &operator /= (const OtherNumber factor);
+
+  /**
+   * Tensor with inverted entries.
+   */
+  Tensor<0,dim,Number>   operator - () const;
+
+  /**
+   * Reset all values to zero.
+   *
+   * Note that this is partly inconsistent with the semantics of the @p
+   * clear() member functions of the standard library containers and of
+   * several other classes within deal.II, which not only reset the values of
+   * stored elements to zero, but release all memory and return the object
+   * into a virginial state. However, since the size of objects of the present
+   * type is determined by its template parameters, resizing is not an option,
+   * and indeed the state where all elements have a zero value is the state
+   * right after construction of such an object.
+   */
+  void clear ();
+
+  /**
+   * Return the Frobenius-norm of a tensor, i.e. the square root of the sum of
+   * the absolute squares of all entries. For the present case of rank-1
+   * tensors, this equals the usual <tt>l<sub>2</sub></tt> norm of the vector.
+   */
+  real_type norm () const;
+
+  /**
+   * Return the square of the Frobenius-norm of a tensor, i.e. the sum of the
+   * absolute squares of all entries.
+   */
+  real_type norm_square () const;
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization
+   */
+  template <class Archive>
+  void serialize(Archive &ar, const unsigned int version);
+
+  /**
+   * Internal type declaration that is used to specialize the return type of
+   * operator[]() for Tensor<1,dim,Number>
+   */
+  typedef Number tensor_type;
+
+private:
+  /**
+   * The value of this scalar object.
+   */
+  Number value;
+
+  /**
+   * Internal helper function for unroll.
+   */
+  template <typename OtherNumber>
+  void unroll_recursion(Vector<OtherNumber> &result,
+                        unsigned int        &start_index) const;
+
+  /**
+   * Allow an arbitrary Tensor to access the underlying values.
+   */
+  template <int, int, typename> friend class Tensor;
+};
+
+
+
+/**
+ * A general tensor class with an arbitrary rank, i.e. with an arbitrary
+ * number of indices. The Tensor class provides an indexing operator and a bit
+ * of infrastructure, but most functionality is recursively handed down to
+ * tensors of rank 1 or put into external templated functions, e.g. the
+ * <tt>contract</tt> family.
+ *
+ * Using this tensor class for objects of rank 2 has advantages over matrices
+ * in many cases since the dimension is known to the compiler as well as the
+ * location of the data. It is therefore possible to produce far more
+ * efficient code than for matrices with runtime-dependent dimension. It also
+ * makes the code easier to read because of the semantic difference between a
+ * tensor (an object that relates to a coordinate system and has
+ * transformation properties with regard to coordinate rotations and
+ * transforms) and matrices (which we consider as operators on arbitrary
+ * vector spaces related to linear algebra things).
+ *
+ * @tparam rank_ An integer that denotes the rank of this tensor. A rank-0
+ * tensor is a scalar, a rank-1 tensor is a vector with @p dim components, a
+ * rank-2 tensor is a matrix with dim-by-dim components, etc. There are
+ * specializations of this class for rank-0 and rank-1 tensors. There is also
+ * a related class SymmetricTensor for tensors of even rank whose elements are
+ * symmetric.
+ *
+ * @tparam dim An integer that denotes the dimension of the space in which
+ * this tensor operates. This of course equals the number of coordinates that
+ * identify a point and rank-1 tensor.
+ *
+ * @tparam Number The data type in which the tensor elements are to be stored.
+ * This will, in almost all cases, simply be the default @p double, but there
+ * are cases where one may want to store elements in a different (and always
+ * scalar) type. It can be used to base tensors on @p float or @p complex
+ * numbers or any other data type that implements basic arithmetic operations.
+ * Another example would be a type that allows for Automatic Differentiation
+ * (see, for example, the Sacado type used in step-33) and thereby can
+ * generate analytic (spatial) derivatives of a function that takes a tensor
+ * as argument.
+ *
+ * @ingroup geomprimitives
+ * @author Wolfgang Bangerth, 1998-2005, Matthias Maier, 2015
+ */
+template <int rank_, int dim, typename Number>
+class Tensor
+{
+public:
+  /**
+   * Provide a way to get the dimension of an object without explicit
+   * knowledge of it's data type. Implementation is this way instead of
+   * providing a function <tt>dimension()</tt> because now it is possible to
+   * get the dimension at compile time without the expansion and preevaluation
+   * of an inlined function; the compiler may therefore produce more efficient
+   * code and you may use this value to declare other data types.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Publish the rank of this tensor to the outside world.
+   */
+  static const unsigned int rank = rank_;
+
+  /**
+   * Number of independent components of a tensor of current rank. This is dim
+   * times the number of independent components of each sub-tensor.
+   */
+  static const unsigned int
+  n_independent_components = Tensor<rank_-1,dim>::n_independent_components *dim;
+
+  /**
+   * Type of objects encapsulated by this container and returned by
+   * operator[](). This is a tensor of lower rank for a general tensor, and a
+   * scalar number type for Tensor<1,dim,Number>.
+   */
+  typedef typename Tensor<rank_-1,dim,Number>::tensor_type value_type;
+
+  /**
+   * Declare an array type which can be used to initialize an object of this
+   * type statically.
+   */
+  typedef typename Tensor<rank_-1,dim,Number>::array_type
+  array_type[(dim != 0) ? dim : 1];
+  // ... avoid a compiler warning in case of dim == 0 and ensure that the
+  // array always has positive size.
+
+  /**
+   * Constructor. Initialize all entries to zero.
+   */
+  Tensor ();
+
+  /**
+   * Copy constructor.
+   */
+  Tensor (const Tensor<rank_,dim,Number> &initializer);
+
+  /**
+   * Constructor, where the data is copied from a C-style array.
+   */
+  Tensor (const array_type &initializer);
+
+  /**
+   * Constructor from tensors with different underlying scalar type. This
+   * obviously requires that the @p OtherNumber type is convertible to @p
+   * Number.
+   */
+  template <typename OtherNumber>
+  Tensor (const Tensor<rank_,dim,OtherNumber> &initializer);
+
+  /**
+   * Constructor that converts from a "tensor of tensors".
+   */
+  template <typename OtherNumber>
+  Tensor (const Tensor<1,dim,Tensor<rank_-1,dim,OtherNumber> > &initializer);
+
+  /**
+   * Conversion operator to tensor of tensors.
+   */
+  template <typename OtherNumber>
+  operator Tensor<1,dim,Tensor<rank_-1,dim,OtherNumber> > () const;
+
+  /**
+   * Read-Write access operator.
+   */
+  value_type &operator [] (const unsigned int i);
+
+  /**
+   * Read-only access operator.
+   */
+  const value_type &operator[](const unsigned int i) const;
+
+  /**
+   * Read access using TableIndices <tt>indices</tt>
+   */
+  const Number &operator [] (const TableIndices<rank_> &indices) const;
+
+  /**
+   * Read and write access using TableIndices <tt>indices</tt>
+   */
+  Number &operator [] (const TableIndices<rank_> &indices);
+
+  /**
+   * Copy assignment operator.
+   */
+  Tensor &operator = (const Tensor<rank_,dim,Number> &rhs);
+
+  /**
+   * Assignment operator from tensors with different underlying scalar type.
+   * This obviously requires that the @p OtherNumber type is convertible to @p
+   * Number.
+   */
+  template <typename OtherNumber>
+  Tensor &operator = (const Tensor<rank_,dim,OtherNumber> &rhs);
+
+  /**
+   * This operator assigns a scalar to a tensor. To avoid confusion with what
+   * exactly it means to assign a scalar value to a tensor, zero is the only
+   * value allowed for <tt>d</tt>, allowing the intuitive notation
+   * <tt>t=0</tt> to reset all elements of the tensor to zero.
+   */
+  Tensor<rank_,dim,Number> &operator = (const Number d);
+
+  /**
+   * Test for equality of two tensors.
+   */
+  template <typename OtherNumber>
+  bool operator == (const Tensor<rank_,dim,OtherNumber> &) const;
+
+  /**
+   * Test for inequality of two tensors.
+   */
+  template <typename OtherNumber>
+  bool operator != (const Tensor<rank_,dim,OtherNumber> &) const;
+
+  /**
+   * Add another tensor.
+   */
+  template <typename OtherNumber>
+  Tensor<rank_,dim,Number> &operator += (const Tensor<rank_,dim,OtherNumber> &);
+
+  /**
+   * Subtract another tensor.
+   */
+  template <typename OtherNumber>
+  Tensor<rank_,dim,Number> &operator -= (const Tensor<rank_,dim,OtherNumber> &);
+
+  /**
+   * Scale the tensor by <tt>factor</tt>, i.e. multiply all components by
+   * <tt>factor</tt>.
+   */
+  template <typename OtherNumber>
+  Tensor<rank_,dim,Number> &operator *= (const OtherNumber factor);
+
+  /**
+   * Scale the vector by <tt>1/factor</tt>.
+   */
+  template <typename OtherNumber>
+  Tensor<rank_,dim,Number> &operator /= (const OtherNumber factor);
+
+  /**
+   * Unary minus operator. Negate all entries of a tensor.
+   */
+  Tensor<rank_,dim,Number>   operator - () const;
+
+  /**
+   * Reset all values to zero.
+   *
+   * Note that this is partly inconsistent with the semantics of the @p
+   * clear() member functions of the standard library containers and of
+   * several other classes within deal.II, which not only reset the values of
+   * stored elements to zero, but release all memory and return the object
+   * into a virginial state. However, since the size of objects of the present
+   * type is determined by its template parameters, resizing is not an option,
+   * and indeed the state where all elements have a zero value is the state
+   * right after construction of such an object.
+   */
+  void clear ();
+
+  /**
+   * Return the Frobenius-norm of a tensor, i.e. the square root of the sum of
+   * the absolute squares of all entries. For the present case of rank-1
+   * tensors, this equals the usual <tt>l<sub>2</sub></tt> norm of the vector.
+   */
+
+  typename numbers::NumberTraits<Number>::real_type norm() const;
+
+  /**
+   * Return the square of the Frobenius-norm of a tensor, i.e. the sum of the
+   * absolute squares of all entries.
+   */
+  typename numbers::NumberTraits<Number>::real_type norm_square() const;
+
+  /**
+   * Fill a vector with all tensor elements.
+   *
+   * This function unrolls all tensor entries into a single, linearly numbered
+   * vector. As usual in C++, the rightmost index of the tensor marches
+   * fastest.
+   */
+  template <typename OtherNumber>
+  void unroll (Vector<OtherNumber> &result) const;
+
+  /**
+   * Returns an unrolled index in the range [0,dim^rank-1] for the element of
+   * the tensor indexed by the argument to the function.
+   */
+  static
+  unsigned int
+  component_to_unrolled_index(const TableIndices<rank_> &indices);
+
+  /**
+   * Opposite of  component_to_unrolled_index: For an index in the range
+   * [0,dim^rank-1], return which set of indices it would correspond to.
+   */
+  static
+  TableIndices<rank_> unrolled_to_component_indices(const unsigned int i);
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  static std::size_t memory_consumption ();
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization
+   */
+  template <class Archive>
+  void serialize(Archive &ar, const unsigned int version);
+
+  /**
+   * Internal type declaration that is used to specialize the return type of
+   * operator[]() for Tensor<1,dim,Number>
+   */
+  typedef Tensor<rank_, dim, Number> tensor_type;
+
+private:
+  /**
+   * Array of tensors holding the subelements.
+   */
+  Tensor<rank_-1, dim, Number> values[(dim != 0) ? dim : 1];
+  // ... avoid a compiler warning in case of dim == 0 and ensure that the
+  // array always has positive size.
+
+  /**
+   * Internal helper function for unroll.
+   */
+  template <typename OtherNumber>
+  void unroll_recursion(Vector<OtherNumber> &result,
+                        unsigned int        &start_index) const;
+
+  /**
+   * Allow an arbitrary Tensor to access the underlying values.
+   */
+  template <int, int, typename> friend class Tensor;
+
+  /**
+   * Point is allowed access to the coordinates. This is supposed to improve
+   * speed.
+   */
+  friend class Point<dim,Number>;
+};
+
+
+/*---------------------- Inline functions: Tensor<0,dim> ---------------------*/
+
+
+template <int dim,typename Number>
+inline
+Tensor<0,dim,Number>::Tensor ()
+  : value()
+{
+}
+
+
+template <int dim, typename Number>
+inline
+Tensor<0,dim,Number>::Tensor (const Tensor<0,dim,Number> &p)
+{
+  value = p.value;
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<0,dim,Number>::Tensor (const OtherNumber initializer)
+{
+  value = initializer;
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<0,dim,Number>::Tensor (const Tensor<0,dim,OtherNumber> &p)
+{
+  value = p.value;
+}
+
+
+template <int dim, typename Number>
+inline
+Tensor<0,dim,Number>::operator Number &()
+{
+  Assert(dim != 0, ExcMessage("Cannot access an object of type Tensor<0,0,Number>"));
+  return value;
+}
+
+
+template <int dim, typename Number>
+inline
+Tensor<0,dim,Number>::operator const Number &() const
+{
+  Assert(dim != 0, ExcMessage("Cannot access an object of type Tensor<0,0,Number>"));
+  return value;
+}
+
+
+template <int dim, typename Number>
+inline
+Tensor<0,dim,Number> &Tensor<0,dim,Number>::operator = (const Tensor<0,dim,Number> &p)
+{
+  value = p.value;
+  return *this;
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<0,dim,Number> &Tensor<0,dim,Number>::operator = (const Tensor<0,dim,OtherNumber> &p)
+{
+  value = p.value;
+  return *this;
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+bool Tensor<0,dim,Number>::operator == (const Tensor<0,dim,OtherNumber> &p) const
+{
+  return (value == p.value);
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+bool Tensor<0,dim,Number>::operator != (const Tensor<0,dim,OtherNumber> &p) const
+{
+  return !((*this) == p);
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<0,dim,Number> &Tensor<0,dim,Number>::operator += (const Tensor<0,dim,OtherNumber> &p)
+{
+  value += p.value;
+  return *this;
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<0,dim,Number> &Tensor<0,dim,Number>::operator -= (const Tensor<0,dim,OtherNumber> &p)
+{
+  value -= p.value;
+  return *this;
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<0,dim,Number> &Tensor<0,dim,Number>::operator *= (const OtherNumber s)
+{
+  value *= s;
+  return *this;
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<0,dim,Number> &Tensor<0,dim,Number>::operator /= (const OtherNumber s)
+{
+  value /= s;
+  return *this;
+}
+
+
+template <int dim, typename Number>
+inline
+Tensor<0,dim,Number> Tensor<0,dim,Number>::operator - () const
+{
+  return -value;
+}
+
+
+template <int dim, typename Number>
+inline
+typename Tensor<0,dim,Number>::real_type
+Tensor<0,dim,Number>::norm () const
+{
+  Assert(dim != 0, ExcMessage("Cannot access an object of type Tensor<0,0,Number>"));
+  return numbers::NumberTraits<Number>::abs (value);
+}
+
+
+template <int dim, typename Number>
+inline
+typename Tensor<0,dim,Number>::real_type
+Tensor<0,dim,Number>::norm_square () const
+{
+  Assert(dim != 0, ExcMessage("Cannot access an object of type Tensor<0,0,Number>"));
+  return numbers::NumberTraits<Number>::abs_square (value);
+}
+
+
+template <int dim, typename Number>
+template <typename OtherNumber>
+inline
+void
+Tensor<0, dim, Number>::unroll_recursion (Vector<OtherNumber> &result,
+                                          unsigned int        &index) const
+{
+  Assert(dim != 0, ExcMessage("Cannot unroll an object of type Tensor<0,0,Number>"));
+  result[index] = value;
+  ++index;
+}
+
+
+template <int dim, typename Number>
+inline
+void Tensor<0,dim,Number>::clear ()
+{
+  value = value_type();
+}
+
+
+template <int dim, typename Number>
+template <class Archive>
+inline
+void Tensor<0,dim,Number>::serialize(Archive &ar, const unsigned int)
+{
+  ar &value;
+}
+
+
+/*-------------------- Inline functions: Tensor<rank,dim> --------------------*/
+
+
+template <int rank_, int dim, typename Number>
+inline
+Tensor<rank_,dim,Number>::Tensor ()
+{
+  // All members of the c-style array values are already default initialized
+  // and thus all values are already set to zero recursively.
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+Tensor<rank_,dim,Number>::Tensor (const Tensor<rank_,dim,Number> &initializer)
+{
+  if (dim > 0)
+    std::copy (&initializer[0], &initializer[0]+dim, &values[0]);
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+Tensor<rank_,dim,Number>::Tensor (const array_type &initializer)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] = initializer[i];
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number>::Tensor (const Tensor<rank_,dim,OtherNumber> &initializer)
+{
+  for (unsigned int i=0; i!=dim; ++i)
+    values[i] = initializer[i];
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number>::Tensor
+(const Tensor<1,dim,Tensor<rank_-1,dim,OtherNumber> > &initializer)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] = initializer[i];
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number>::
+operator Tensor<1,dim,Tensor<rank_-1,dim,OtherNumber> > () const
+{
+  return Tensor<1,dim,Tensor<rank_-1,dim,Number> > (values);
+}
+
+
+
+namespace internal
+{
+  namespace TensorSubscriptor
+  {
+    template <typename ArrayElementType, int dim>
+    inline DEAL_II_ALWAYS_INLINE
+    ArrayElementType &
+    subscript (ArrayElementType *values,
+               const unsigned int i,
+               dealii::internal::int2type<dim>)
+    {
+      Assert (i<dim, ExcIndexRange(i, 0, dim));
+      return values[i];
+    }
+
+
+    template <typename ArrayElementType>
+    ArrayElementType &
+    subscript (ArrayElementType *,
+               const unsigned int,
+               dealii::internal::int2type<0>)
+    {
+      Assert(false, ExcMessage("Cannot access elements of an object of type Tensor<rank,0,Number>."));
+      static ArrayElementType t;
+      return t;
+    }
+  }
+}
+
+
+template <int rank_, int dim, typename Number>
+inline DEAL_II_ALWAYS_INLINE
+typename Tensor<rank_,dim,Number>::value_type &
+Tensor<rank_,dim,Number>::operator[] (const unsigned int i)
+{
+  return dealii::internal::TensorSubscriptor::subscript(values, i, dealii::internal::int2type<dim>());
+}
+
+
+template <int rank_, int dim, typename Number>
+inline DEAL_II_ALWAYS_INLINE
+const typename Tensor<rank_,dim,Number>::value_type &
+Tensor<rank_,dim,Number>::operator[] (const unsigned int i) const
+{
+  return dealii::internal::TensorSubscriptor::subscript(values, i, dealii::internal::int2type<dim>());
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+const Number &
+Tensor<rank_,dim,Number>::operator[] (const TableIndices<rank_> &indices) const
+{
+  Assert(dim != 0, ExcMessage("Cannot access an object of type Tensor<rank_,0,Number>"));
+
+  return TensorAccessors::extract<rank_>(*this, indices);
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+Number &
+Tensor<rank_,dim,Number>::operator[] (const TableIndices<rank_> &indices)
+{
+  Assert(dim != 0, ExcMessage("Cannot access an object of type Tensor<rank_,0,Number>"));
+
+  return TensorAccessors::extract<rank_>(*this, indices);
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+Tensor<rank_,dim,Number> &
+Tensor<rank_,dim,Number>::operator = (const Tensor<rank_,dim,Number> &t)
+{
+  if (dim > 0)
+    std::copy (&t.values[0], &t.values[0]+dim, &values[0]);
+  return *this;
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number> &
+Tensor<rank_,dim,Number>::operator = (const Tensor<rank_,dim,OtherNumber> &t)
+{
+  if (dim > 0)
+    std::copy (&t.values[0], &t.values[0]+dim, &values[0]);
+  return *this;
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+Tensor<rank_,dim,Number> &
+Tensor<rank_,dim,Number>::operator = (const Number d)
+{
+  Assert (d == Number(), ExcMessage ("Only assignment with zero is allowed"));
+  (void) d;
+
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] = Number();
+  return *this;
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+bool
+Tensor<rank_,dim,Number>::operator == (const Tensor<rank_,dim,OtherNumber> &p) const
+{
+  for (unsigned int i=0; i<dim; ++i)
+    if (values[i] != p.values[i])
+      return false;
+  return true;
+}
+
+
+// At some places in the library, we have Point<0> for formal reasons
+// (e.g., we sometimes have Quadrature<dim-1> for faces, so we have
+// Quadrature<0> for dim=1, and then we have Point<0>). To avoid warnings
+// in the above function that the loop end check always fails, we
+// implement this function here
+template <>
+template <>
+inline
+bool Tensor<1,0,double>::operator == (const Tensor<1,0,double> &) const
+{
+  return true;
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+bool
+Tensor<rank_,dim,Number>::operator != (const Tensor<rank_,dim,OtherNumber> &p) const
+{
+  return !((*this) == p);
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number> &
+Tensor<rank_,dim,Number>::operator += (const Tensor<rank_,dim,OtherNumber> &p)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] += p.values[i];
+  return *this;
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number> &
+Tensor<rank_,dim,Number>::operator -= (const Tensor<rank_,dim,OtherNumber> &p)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] -= p.values[i];
+  return *this;
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number> &
+Tensor<rank_,dim,Number>::operator *= (const OtherNumber s)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] *= s;
+  return *this;
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+Tensor<rank_,dim,Number> &
+Tensor<rank_,dim,Number>::operator /= (const OtherNumber s)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] /= s;
+  return *this;
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+Tensor<rank_,dim,Number>
+Tensor<rank_,dim,Number>::operator - () const
+{
+  Tensor<rank_,dim,Number> tmp;
+
+  for (unsigned int i=0; i<dim; ++i)
+    tmp.values[i] = -values[i];
+
+  return tmp;
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+typename numbers::NumberTraits<Number>::real_type
+Tensor<rank_,dim,Number>::norm () const
+{
+  return std::sqrt (norm_square());
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+typename numbers::NumberTraits<Number>::real_type
+Tensor<rank_,dim,Number>::norm_square () const
+{
+  typename numbers::NumberTraits<Number>::real_type s = typename numbers::NumberTraits<Number>::real_type();
+  for (unsigned int i=0; i<dim; ++i)
+    s += values[i].norm_square();
+
+  return s;
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+void
+Tensor<rank_, dim, Number>::unroll (Vector<OtherNumber> &result) const
+{
+  AssertDimension (result.size(),(Utilities::fixed_power<rank_, unsigned int>(dim)));
+
+  unsigned int index = 0;
+  unroll_recursion (result, index);
+}
+
+
+template <int rank_, int dim, typename Number>
+template <typename OtherNumber>
+inline
+void
+Tensor<rank_, dim, Number>::unroll_recursion (Vector<OtherNumber> &result,
+                                              unsigned int        &index) const
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i].unroll_recursion(result, index);
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+unsigned int
+Tensor<rank_, dim, Number>::component_to_unrolled_index(const TableIndices<rank_> &indices)
+{
+  unsigned int index = 0;
+  for (int r = 0; r < rank_; ++r)
+    index = index * dim + indices[r];
+
+  return index;
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+TableIndices<rank_>
+Tensor<rank_, dim, Number>::unrolled_to_component_indices(const unsigned int i)
+{
+  Assert (i < n_independent_components,
+          ExcIndexRange (i, 0, n_independent_components));
+
+  TableIndices<rank_>   indices;
+
+  unsigned int remainder = i;
+  for (int r=rank_-1; r>=0; --r)
+    {
+      indices[r] = (remainder % dim);
+      remainder /= dim;
+    }
+  Assert (remainder == 0, ExcInternalError());
+
+  return indices;
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+void Tensor<rank_,dim,Number>::clear ()
+{
+  for (unsigned int i=0; i<dim; ++i)
+    values[i] = value_type();
+}
+
+
+template <int rank_, int dim, typename Number>
+inline
+std::size_t
+Tensor<rank_,dim,Number>::memory_consumption ()
+{
+  return sizeof(Tensor<rank_,dim,Number>);
+}
+
+
+template <int rank_, int dim, typename Number>
+template <class Archive>
+inline
+void
+Tensor<rank_,dim,Number>::serialize(Archive &ar, const unsigned int)
+{
+  ar &values;
+}
+
+
+/* ----------------- Non-member functions operating on tensors. ------------ */
+
+/**
+ * @name Output functions for Tensor objects
+ */
+//@{
+
+/**
+ * Output operator for tensors. Print the elements consecutively, with a space
+ * in between, two spaces between rank 1 subtensors, three between rank 2 and
+ * so on.
+ *
+ * @relates Tensor
+ */
+template <int rank_, int dim, typename Number>
+inline
+std::ostream &operator << (std::ostream &out, const Tensor<rank_,dim,Number> &p)
+{
+  for (unsigned int i = 0; i < dim; ++i)
+    {
+      out << p[i];
+      if (i != dim - 1)
+        out << ' ';
+    }
+
+  return out;
+}
+
+
+/**
+ * Output operator for tensors of rank 0. Since such tensors are scalars, we
+ * simply print this one value.
+ *
+ * @relates Tensor<0,dim,Number>
+ */
+template <int dim, typename Number>
+inline
+std::ostream &operator << (std::ostream &out, const Tensor<0,dim,Number> &p)
+{
+  out << static_cast<const Number &>(p);
+  return out;
+}
+
+
+//@}
+/**
+ * @name Vector space operations on Tensor objects:
+ */
+//@{
+
+
+#ifndef DEAL_II_WITH_CXX11
+template <typename T, typename U, int rank, int dim>
+struct ProductType<T,Tensor<rank,dim,U> >
+{
+  typedef Tensor<rank,dim,typename ProductType<T,U>::type> type;
+};
+
+template <typename T, typename U, int rank, int dim>
+struct ProductType<Tensor<rank,dim,T>,U>
+{
+  typedef Tensor<rank,dim,typename ProductType<T,U>::type> type;
+};
+#endif
+
+
+
+/**
+ * Scalar multiplication of a tensor of rank 0 with an object from the left.
+ *
+ * This function unwraps the underlying @p Number stored in the Tensor and
+ * multiplies @p object with it.
+ *
+ * @relates Tensor<0,dim,Number>
+ */
+template <int dim, typename Number, typename Other>
+inline
+typename ProductType<Other, Number>::type
+operator * (const Other                 object,
+            const Tensor<0,dim,Number> &t)
+{
+  return object * static_cast<const Number &>(t);
+}
+
+
+/**
+ * Scalar multiplication of a tensor of rank 0 with an object from the right.
+ *
+ * This function unwraps the underlying @p Number stored in the Tensor and
+ * multiplies @p object with it.
+ *
+ * @relates Tensor<0,dim,Number>
+ */
+template <int dim, typename Number, typename Other>
+inline
+typename ProductType<Number, Other>::type
+operator * (const Tensor<0,dim,Number> &t,
+            const Other                 object)
+{
+  return static_cast<const Number &>(t) * object;
+}
+
+
+/**
+ * Scalar multiplication of two tensors of rank 0.
+ *
+ * This function unwraps the underlying objects of type @p Number and @p
+ * OtherNumber that are stored within the Tensor and multiplies them. It
+ * returns an unwrapped number of product type.
+ *
+ * @relates Tensor<0,dim,Number>
+ */
+template <int dim, typename Number, typename OtherNumber>
+inline
+typename ProductType<Number, OtherNumber>::type
+operator * (const Tensor<0, dim, Number>      &src1,
+            const Tensor<0, dim, OtherNumber> &src2)
+{
+  return static_cast<const Number &>(src1) *
+         static_cast<const OtherNumber &>(src2);
+}
+
+
+/**
+ * Division of a tensor of rank 0 by a scalar number.
+ *
+ * @relates Tensor<0,dim,Number>
+ */
+template <int dim, typename Number, typename OtherNumber>
+inline
+Tensor<0,dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+operator / (const Tensor<0,dim,Number> &t,
+            const OtherNumber           factor)
+{
+  return static_cast<Number>(t) / factor;
+}
+
+
+/**
+ * Add two tensors of rank 0.
+ *
+ * @relates Tensor<0,dim,Number>
+ */
+template <int dim, typename Number, typename OtherNumber>
+inline
+Tensor<0, dim, typename ProductType<Number, OtherNumber>::type>
+operator+ (const Tensor<0,dim,Number> &p, const Tensor<0,dim,OtherNumber> &q)
+{
+  return static_cast<const Number &>(p) + static_cast<const OtherNumber &>(q);
+}
+
+
+/**
+ * Subtract two tensors of rank 0.
+ *
+ * @relates Tensor<0,dim,Number>
+ */
+template <int dim, typename Number, typename OtherNumber>
+inline
+Tensor<0, dim, typename ProductType<Number, OtherNumber>::type>
+operator- (const Tensor<0,dim,Number> &p, const Tensor<0,dim,OtherNumber> &q)
+{
+  return static_cast<const Number &>(p) - static_cast<const OtherNumber &>(q);
+}
+
+
+/**
+ * Multiplication of a tensor of general rank with a scalar number from the
+ * right.
+ *
+ * Only multiplication with a scalar number type (i.e., a floating point
+ * number, a complex floating point number, etc.) is allowed, see the
+ * documentation of EnableIfScalar for details.
+ *
+ * @relates Tensor
+ */
+template <int rank, int dim,
+          typename Number,
+          typename OtherNumber>
+inline
+Tensor<rank,dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+operator * (const Tensor<rank,dim,Number> &t,
+            const OtherNumber              factor)
+{
+  // recurse over the base objects
+  Tensor<rank,dim,typename ProductType<Number,OtherNumber>::type> tt;
+  for (unsigned int d=0; d<dim; ++d)
+    tt[d] = t[d] * factor;
+  return tt;
+}
+
+
+/**
+ * Multiplication of a tensor of general rank with a scalar number from the
+ * left.
+ *
+ * Only multiplication with a scalar number type (i.e., a floating point
+ * number, a complex floating point number, etc.) is allowed, see the
+ * documentation of EnableIfScalar for details.
+ *
+ * @relates Tensor
+ */
+template <int rank, int dim,
+          typename Number,
+          typename OtherNumber>
+inline
+Tensor<rank,dim,typename ProductType<typename EnableIfScalar<Number>::type, OtherNumber>::type>
+operator * (const Number                        factor,
+            const Tensor<rank,dim,OtherNumber> &t)
+{
+  // simply forward to the operator above
+  return t * factor;
+}
+
+
+/**
+ * Division of a tensor of general rank with a scalar number. See the
+ * discussion on operator*() above for more information about template
+ * arguments and the return type.
+ *
+ * @relates Tensor
+ */
+template <int rank, int dim,
+          typename Number,
+          typename OtherNumber>
+inline
+Tensor<rank,dim,typename ProductType<Number, typename EnableIfScalar<OtherNumber>::type>::type>
+operator / (const Tensor<rank,dim,Number> &t,
+            const OtherNumber              factor)
+{
+  // recurse over the base objects
+  Tensor<rank,dim,typename ProductType<Number,OtherNumber>::type> tt;
+  for (unsigned int d=0; d<dim; ++d)
+    tt[d] = t[d] / factor;
+  return tt;
+}
+
+
+/**
+ * Addition of two tensors of general rank.
+ *
+ * @tparam rank The rank of both tensors.
+ *
+ * @relates Tensor
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type>
+operator+ (const Tensor<rank,dim,Number> &p, const Tensor<rank,dim,OtherNumber> &q)
+{
+  Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type> tmp (p);
+
+  for (unsigned int i=0; i<dim; ++i)
+    tmp[i] += q[i];
+
+  return tmp;
+}
+
+
+/**
+ * Subtraction of two tensors of general rank.
+ *
+ * @tparam rank The rank of both tensors.
+ *
+ * @relates Tensor
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type>
+operator- (const Tensor<rank,dim,Number> &p, const Tensor<rank,dim,OtherNumber> &q)
+{
+  Tensor<rank, dim, typename ProductType<Number, OtherNumber>::type> tmp (p);
+
+  for (unsigned int i=0; i<dim; ++i)
+    tmp[i] -= q[i];
+
+  return tmp;
+}
+
+
+//@}
+/**
+ * @name Contraction operations and the outer product for tensor objects
+ */
+//@{
+
+
+/**
+ * The dot product (single contraction) for tensors: Return a tensor of rank
+ * $(\text{rank}_1 + \text{rank}_2 - 2)$ that is the contraction of the last
+ * index of a tensor @p src1 of rank @p rank_1 with the first index of a
+ * tensor @p src2 of rank @p rank_2:
+ * @f[
+ *   \text{result}_{i_1,..,i_{r1},j_1,..,j_{r2}}
+ *   = \sum_{k}
+ *     \text{left}_{i_1,..,i_{r1}, k}
+ *     \text{right}_{k, j_1,..,j_{r2}}
+ * @f]
+ *
+ * @note For the Tensor class, the multiplication operator only performs a
+ * contraction over a single pair of indices. This is in contrast to the
+ * multiplication operator for SymmetricTensor, which does the double
+ * contraction.
+ *
+ * @note In case the contraction yields a tensor of rank 0 the scalar number
+ * is returned as an unwrapped number type.
+ *
+ * @relates Tensor
+ * @author Matthias Maier, 2015
+ */
+template <int rank_1, int rank_2, int dim,
+          typename Number, typename OtherNumber>
+inline DEAL_II_ALWAYS_INLINE
+typename Tensor<rank_1 + rank_2 - 2, dim, typename ProductType<Number, OtherNumber>::type>::tensor_type
+operator * (const Tensor<rank_1, dim, Number> &src1,
+            const Tensor<rank_2, dim, OtherNumber> &src2)
+{
+  typename Tensor<rank_1 + rank_2 - 2, dim, typename ProductType<Number, OtherNumber>::type>::tensor_type result;
+
+  TensorAccessors::internal::ReorderedIndexView<0, rank_2, const Tensor<rank_2, dim, OtherNumber> >
+  reordered = TensorAccessors::reordered_index_view<0, rank_2>(src2);
+  TensorAccessors::contract<1, rank_1, rank_2, dim>(result, src1, reordered);
+
+  return result;
+}
+
+
+/**
+ * Generic contraction of a pair of indices of two tensors of arbitrary rank:
+ * Return a tensor of rank $(\text{rank}_1 + \text{rank}_2 - 2)$ that is the
+ * contraction of index @p index_1 of a tensor @p src1 of rank @p rank_1 with
+ * the index @p index_2 of a tensor @p src2 of rank @p rank_2:
+ * @f[
+ *   \text{result}_{i_1,..,i_{r1},j_1,..,j_{r2}}
+ *   = \sum_{k}
+ *     \text{left}_{i_1,..,k,..,i_{r1}}
+ *     \text{right}_{j_1,..,k,..,j_{r2}}
+ * @f]
+ *
+ * If for example the first index (<code>index_1==0</code>) of a tensor
+ * <code>t1</code> shall be contracted with the third index
+ * (<code>index_2==2</code>) of a tensor <code>t2</code>, the invocation of
+ * this function is
+ * @code
+ *   contract<0, 2>(t1, t2);
+ * @endcode
+ *
+ * @note The position of the index is counted from 0, i.e.,
+ * $0\le\text{index}_i<\text{range}_i$.
+ *
+ * @note In case the contraction yields a tensor of rank 0 the scalar number
+ * is returned as an unwrapped number type.
+ *
+ * @relates Tensor
+ * @author Matthias Maier, 2015
+ */
+template <int index_1, int index_2,
+          int rank_1, int rank_2, int dim,
+          typename Number, typename OtherNumber>
+inline
+typename Tensor<rank_1 + rank_2 - 2, dim, typename ProductType<Number, OtherNumber>::type>::tensor_type
+contract (const Tensor<rank_1, dim, Number> &src1,
+          const Tensor<rank_2, dim, OtherNumber> &src2)
+{
+  Assert(0 <= index_1 && index_1 < rank_1,
+         ExcMessage("The specified index_1 must lie within the range [0,rank_1)"));
+  Assert(0 <= index_2 && index_2 < rank_2,
+         ExcMessage("The specified index_2 must lie within the range [0,rank_2)"));
+
+  using namespace TensorAccessors;
+  using namespace TensorAccessors::internal;
+
+  // Reorder index_1 to the end of src1:
+  ReorderedIndexView<index_1, rank_1, const Tensor<rank_1, dim, Number> >
+  reord_01 = reordered_index_view<index_1, rank_1>(src1);
+
+  // Reorder index_2 to the end of src2:
+  ReorderedIndexView<index_2, rank_2, const Tensor<rank_2, dim, OtherNumber> >
+  reord_02 = reordered_index_view<index_2, rank_2>(src2);
+
+  typename Tensor<rank_1 + rank_2 - 2, dim, typename ProductType<Number, OtherNumber>::type>::tensor_type
+  result;
+  TensorAccessors::contract<1, rank_1, rank_2, dim>(result, reord_01, reord_02);
+  return result;
+}
+
+
+/**
+ * Generic contraction of two pairs of indices of two tensors of arbitrary
+ * rank: Return a tensor of rank $(\text{rank}_1 + \text{rank}_2 - 4)$ that is
+ * the contraction of index @p index_1 with index @p index_2, and index @p
+ * index_3 with index @p index_4 of a tensor @p src1 of rank @p rank_1 and a
+ * tensor @p src2 of rank @p rank_2:
+ * @f[
+ *   \text{result}_{i_1,..,i_{r1},j_1,..,j_{r2}}
+ *   = \sum_{k, l}
+ *     \text{left}_{i_1,..,k,..,l,..,i_{r1}}
+ *     \text{right}_{j_1,..,k,..,l..,j_{r2}}
+ * @f]
+ *
+ * If for example the first index (<code>index_1==0</code>) shall be
+ * contracted with the third index (<code>index_2==2</code>), and the second
+ * index (<code>index_3==1</code>) with the first index
+ * (<code>index_4==0</code>) the invocation of this function is this function
+ * is
+ * @code
+ *   contract<0, 2, 1, 0>(t1, t2);
+ * @endcode
+ *
+ * @note The position of the index is counted from 0, i.e.,
+ * $0\le\text{index}_i<\text{range}_i$.
+ *
+ * @note In case the contraction yields a tensor of rank 0 the scalar number
+ * is returned as an unwrapped number type.
+ *
+ * @relates Tensor
+ * @author Matthias Maier, 2015
+ */
+template <int index_1, int index_2, int index_3, int index_4,
+          int rank_1, int rank_2, int dim,
+          typename Number, typename OtherNumber>
+inline
+typename Tensor<rank_1 + rank_2 - 4, dim, typename ProductType<Number, OtherNumber>::type>::tensor_type
+double_contract (const Tensor<rank_1, dim, Number> &src1,
+                 const Tensor<rank_2, dim, OtherNumber> &src2)
+{
+  Assert(0 <= index_1 && index_1 < rank_1,
+         ExcMessage("The specified index_1 must lie within the range [0,rank_1)"));
+  Assert(0 <= index_3 && index_3 < rank_1,
+         ExcMessage("The specified index_3 must lie within the range [0,rank_1)"));
+  Assert(index_1 != index_3,
+         ExcMessage("index_1 and index_3 must not be the same"));
+  Assert(0 <= index_2 && index_2 < rank_2,
+         ExcMessage("The specified index_2 must lie within the range [0,rank_2)"));
+  Assert(0 <= index_4 && index_4 < rank_2,
+         ExcMessage("The specified index_4 must lie within the range [0,rank_2)"));
+  Assert(index_2 != index_4,
+         ExcMessage("index_2 and index_4 must not be the same"));
+
+  using namespace TensorAccessors;
+  using namespace TensorAccessors::internal;
+
+  // Reorder index_1 to the end of src1:
+  ReorderedIndexView<index_1, rank_1, const Tensor<rank_1, dim, Number> >
+  reord_1 = TensorAccessors::reordered_index_view<index_1, rank_1>(src1);
+
+  // Reorder index_2 to the end of src2:
+  ReorderedIndexView<index_2, rank_2, const Tensor<rank_2, dim, OtherNumber> >
+  reord_2 = TensorAccessors::reordered_index_view<index_2, rank_2>(src2);
+
+  // Now, reorder index_3 to the end of src1. We have to make sure to
+  // preserve the orginial ordering: index_1 has been removed. If
+  // index_3 > index_1, we have to use (index_3 - 1) instead:
+  ReorderedIndexView<(index_3 < index_1 ? index_3 : index_3 - 1), rank_1, ReorderedIndexView<index_1, rank_1, const Tensor<rank_1, dim, Number> > >
+  reord_3 = TensorAccessors::reordered_index_view<index_3 < index_1 ? index_3 : index_3 - 1, rank_1>(reord_1);
+
+  // Now, reorder index_4 to the end of src2. We have to make sure to
+  // preserve the orginial ordering: index_2 has been removed. If
+  // index_4 > index_2, we have to use (index_4 - 1) instead:
+  ReorderedIndexView<(index_4 < index_2 ? index_4 : index_4 - 1), rank_2, ReorderedIndexView<index_2, rank_2, const Tensor<rank_2, dim, OtherNumber> > >
+  reord_4 = TensorAccessors::reordered_index_view<index_4 < index_2 ? index_4 : index_4 - 1, rank_2>(reord_2);
+
+  typename Tensor<rank_1 + rank_2 - 4, dim, typename ProductType<Number, OtherNumber>::type>::tensor_type
+  result;
+  TensorAccessors::contract<2, rank_1, rank_2, dim>(result, reord_3, reord_4);
+  return result;
+}
+
+
+/**
+ * The scalar product, or (generalized) Frobenius inner product of two tensors
+ * of equal rank: Return a scalar number that is the result of a full
+ * contraction of a tensor @p left and @p right:
+ * @f[
+ *   \sum_{i_1,..,i_r}
+ *   \text{left}_{i_1,..,i_r}
+ *   \text{right}_{i_1,..,i_r}
+ * @f]
+ *
+ * @relates Tensor
+ * @author Matthias Maier, 2015
+ */
+template <int rank, int dim, typename Number, typename OtherNumber>
+inline
+typename ProductType<Number, OtherNumber>::type
+scalar_product (const Tensor<rank, dim, Number> &left,
+                const Tensor<rank, dim, OtherNumber> &right)
+{
+  typename ProductType<Number, OtherNumber>::type result;
+  TensorAccessors::contract<rank, rank, rank, dim>(result, left, right);
+  return result;
+}
+
+
+/**
+ * Full contraction of three tensors: Return a scalar number that is the
+ * result of a full contraction of a tensor @p left of rank @p rank_1, a
+ * tensor @p middle of rank $(\text{rank}_1+\text{rank}_2)$ and a tensor @p
+ * right of rank @p rank_2:
+ * @f[
+ *   \sum_{i_1,..,i_{r1},j_1,..,j_{r2}}
+ *   \text{left}_{i_1,..,i_{r1}}
+ *   \text{middle}_{i_1,..,i_{r1},j_1,..,j_{r2}}
+ *   \text{right}_{j_1,..,j_{r2}}
+ * @f]
+ *
+ * @relates Tensor
+ * @author Matthias Maier, 2015
+ */
+template <int rank_1, int rank_2, int dim,
+          typename T1, typename T2, typename T3>
+typename ProductType<T1, typename ProductType<T2, T3>::type>::type
+contract3 (const Tensor<rank_1, dim, T1> &left,
+           const Tensor<rank_1 + rank_2, dim, T2> &middle,
+           const Tensor<rank_2, dim, T3> &right)
+{
+  typedef typename ProductType<T1, typename ProductType<T2, T3>::type>::type
+  return_type;
+  return TensorAccessors::contract3<rank_1, rank_2, dim, return_type>(
+           left, middle, right);
+}
+
+
+/**
+ * The outer product of two tensors of @p rank_1 and @p rank_2: Returns a
+ * tensor of rank $(\text{rank}_1 + \text{rank}_2)$:
+ * @f[
+ *   \text{result}_{i_1,..,i_{r1},j_1,..,j_{r2}}
+ *   = \text{left}_{i_1,..,i_{r1}}\,\text{right}_{j_1,..,j_{r2}.}
+ * @f]
+ *
+ * @relates Tensor
+ * @author Matthias Maier, 2015
+ */
+template <int rank_1, int rank_2, int dim,
+          typename Number, typename OtherNumber>
+inline
+Tensor<rank_1 + rank_2, dim, typename ProductType<Number, OtherNumber>::type>
+outer_product(const Tensor<rank_1, dim, Number> &src1,
+              const Tensor<rank_2, dim, OtherNumber> &src2)
+{
+  typename Tensor<rank_1 + rank_2, dim, typename ProductType<Number, OtherNumber>::type>::tensor_type result;
+  TensorAccessors::contract<0, rank_1, rank_2, dim>(result, src1, src2);
+  return result;
+}
+
+
+//@}
+/**
+ * @name Special operations on tensors of rank 1
+ */
+//@{
+
+
+/**
+ * Returns the cross product in 2d. This is just a rotation by 90 degrees
+ * clockwise to compute the outer normal from a tangential vector. This
+ * function is defined for all space dimensions to allow for dimension
+ * independent programming (e.g. within switches over the space dimension),
+ * but may only be called if the actual dimension of the arguments is two
+ * (e.g. from the <tt>dim==2</tt> case in the switch).
+ *
+ * @relates Tensor
+ * @author Guido Kanschat, 2001
+ */
+template <int dim, typename Number>
+inline
+Tensor<1,dim,Number>
+cross_product_2d (const Tensor<1,dim,Number> &src)
+{
+  Assert (dim==2, ExcInternalError());
+
+  Tensor<1, dim, Number> result;
+
+  result[0] = src[1];
+  result[1] = -src[0];
+
+  return result;
+}
+
+
+/**
+ * Returns the cross product of 2 vectors in 3d. This function is defined for
+ * all space dimensions to allow for dimension independent programming (e.g.
+ * within switches over the space dimension), but may only be called if the
+ * actual dimension of the arguments is three (e.g. from the <tt>dim==3</tt>
+ * case in the switch).
+ *
+ * @relates Tensor
+ * @author Guido Kanschat, 2001
+ */
+template <int dim, typename Number>
+inline
+Tensor<1,dim,Number>
+cross_product_3d (const Tensor<1,dim,Number> &src1,
+                  const Tensor<1,dim,Number> &src2)
+{
+  Assert (dim==3, ExcInternalError());
+
+  Tensor<1, dim, Number> result;
+
+  result[0] = src1[1]*src2[2] - src1[2]*src2[1];
+  result[1] = src1[2]*src2[0] - src1[0]*src2[2];
+  result[2] = src1[0]*src2[1] - src1[1]*src2[0];
+
+  return result;
+}
+
+
+//@}
+/**
+ * @name Special operations on tensors of rank 2
+ */
+//@{
+
+
+/**
+ * Compute the determinant of a tensor or rank 2.
+ *
+ * @relates Tensor
+ * @author Wolfgang Bangerth, 2009
+ */
+template <int dim, typename Number>
+inline
+Number determinant (const Tensor<2,dim,Number> &t)
+{
+  // Compute the determinant using the Laplace expansion of the
+  // determinant. We expand along the last row.
+  Number det = Number();
+
+  for (unsigned int k=0; k<dim; ++k)
+    {
+      Tensor<2,dim-1,Number> minor;
+      for (unsigned int i=0; i<dim-1; ++i)
+        for (unsigned int j=0; j<dim-1; ++j)
+          minor[i][j] = t[i][j<k ? j : j+1];
+
+      const Number cofactor = ((k % 2 == 0) ? -1. : 1.) * determinant(minor);
+
+      det += t[dim-1][k] * cofactor;
+    }
+
+  return ((dim % 2 == 0) ? 1. : -1.) * det;
+}
+
+/**
+ * Specialization for dim==1.
+ *
+ * @relates Tensor
+ */
+template <typename Number>
+inline
+Number determinant (const Tensor<2,1,Number> &t)
+{
+  return t[0][0];
+}
+
+
+/**
+ * Compute and return the trace of a tensor of rank 2, i.e. the sum of its
+ * diagonal entries.
+ *
+ * @relates Tensor
+ * @author Wolfgang Bangerth, 2001
+ */
+template <int dim, typename Number>
+Number trace (const Tensor<2,dim,Number> &d)
+{
+  Number t=d[0][0];
+  for (unsigned int i=1; i<dim; ++i)
+    t += d[i][i];
+  return t;
+}
+
+
+/**
+ * Compute and return the inverse of the given tensor. Since the compiler can
+ * perform the return value optimization, and since the size of the return
+ * object is known, it is acceptable to return the result by value, rather
+ * than by reference as a parameter.
+ *
+ * @relates Tensor
+ * @author Wolfgang Bangerth, 2000
+ */
+template <int dim, typename Number>
+inline
+Tensor<2,dim,Number>
+invert (const Tensor<2,dim,Number> &t)
+{
+  Number return_tensor [dim][dim];
+  switch (dim)
+    {
+    case 1:
+      return_tensor[0][0] = 1.0/t[0][0];
+      break;
+
+    case 2:
+      // this is Maple output,
+      // thus a bit unstructured
+    {
+      const Number det = t[0][0]*t[1][1]-t[1][0]*t[0][1];
+      const Number t4 = 1.0/det;
+      return_tensor[0][0] = t[1][1]*t4;
+      return_tensor[0][1] = -t[0][1]*t4;
+      return_tensor[1][0] = -t[1][0]*t4;
+      return_tensor[1][1] = t[0][0]*t4;
+      break;
+    }
+
+    case 3:
+    {
+      const Number t4 = t[0][0]*t[1][1],
+                   t6 = t[0][0]*t[1][2],
+                   t8 = t[0][1]*t[1][0],
+                   t00 = t[0][2]*t[1][0],
+                   t01 = t[0][1]*t[2][0],
+                   t04 = t[0][2]*t[2][0],
+                   det = (t4*t[2][2]-t6*t[2][1]-t8*t[2][2]+
+                          t00*t[2][1]+t01*t[1][2]-t04*t[1][1]),
+                         t07 = 1.0/det;
+      return_tensor[0][0] = (t[1][1]*t[2][2]-t[1][2]*t[2][1])*t07;
+      return_tensor[0][1] = (t[0][2]*t[2][1]-t[0][1]*t[2][2])*t07;
+      return_tensor[0][2] = (t[0][1]*t[1][2]-t[0][2]*t[1][1])*t07;
+      return_tensor[1][0] = (t[1][2]*t[2][0]-t[1][0]*t[2][2])*t07;
+      return_tensor[1][1] = (t[0][0]*t[2][2]-t04)*t07;
+      return_tensor[1][2] = (t00-t6)*t07;
+      return_tensor[2][0] = (t[1][0]*t[2][1]-t[1][1]*t[2][0])*t07;
+      return_tensor[2][1] = (t01-t[0][0]*t[2][1])*t07;
+      return_tensor[2][2] = (t4-t8)*t07;
+
+      break;
+    }
+
+    // if desired, take over the
+    // inversion of a 4x4 tensor
+    // from the FullMatrix
+    default:
+      AssertThrow (false, ExcNotImplemented());
+    }
+  return Tensor<2,dim,Number>(return_tensor);
+}
+
+
+/**
+ * Return the transpose of the given tensor.
+ *
+ * @relates Tensor
+ * @author Wolfgang Bangerth, 2002
+ */
+template <int dim, typename Number>
+inline
+Tensor<2,dim,Number>
+transpose (const Tensor<2,dim,Number> &t)
+{
+  Tensor<2, dim, Number> tt;
+  for (unsigned int i=0; i<dim; ++i)
+    {
+      tt[i][i] = t[i][i];
+      for (unsigned int j=i+1; j<dim; ++j)
+        {
+          tt[i][j] = t[j][i];
+          tt[j][i] = t[i][j];
+        };
+    }
+  return tt;
+}
+
+
+/**
+ * Return the $l_1$ norm of the given rank-2 tensor, where $||t||_1 = \max_j
+ * \sum_i |t_{ij}|$ (maximum of the sums over columns).
+ *
+ * @relates Tensor
+ * @author Wolfgang Bangerth, 2012
+ */
+template <int dim, typename Number>
+inline
+double
+l1_norm (const Tensor<2,dim,Number> &t)
+{
+  double max = 0;
+  for (unsigned int j=0; j<dim; ++j)
+    {
+      double sum = 0;
+      for (unsigned int i=0; i<dim; ++i)
+        sum += std::fabs(t[i][j]);
+
+      if (sum > max)
+        max = sum;
+    }
+
+  return max;
+}
+
+
+/**
+ * Return the $l_\infty$ norm of the given rank-2 tensor, where $||t||_\infty
+ * = \max_i \sum_j |t_{ij}|$ (maximum of the sums over rows).
+ *
+ * @relates Tensor
+ * @author Wolfgang Bangerth, 2012
+ */
+template <int dim, typename Number>
+inline
+double
+linfty_norm (const Tensor<2,dim,Number> &t)
+{
+  double max = 0;
+  for (unsigned int i=0; i<dim; ++i)
+    {
+      double sum = 0;
+      for (unsigned int j=0; j<dim; ++j)
+        sum += std::fabs(t[i][j]);
+
+      if (sum > max)
+        max = sum;
+    }
+
+  return max;
+}
+
+//@}
+
+DEAL_II_NAMESPACE_CLOSE
+
+// include deprecated non-member functions operating on Tensor
+#include <deal.II/base/tensor_deprecated.h>
+
+#endif
+
diff --git a/include/deal.II/base/tensor_accessors.h b/include/deal.II/base/tensor_accessors.h
new file mode 100644
index 0000000..f366cd4
--- /dev/null
+++ b/include/deal.II/base/tensor_accessors.h
@@ -0,0 +1,772 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_accessors_h
+#define dealii__tensor_accessors_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/table_indices.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This namespace is a collection of algorithms working on generic tensorial
+ * objects (of arbitrary rank).
+ *
+ * The rationale to implement such functionality in a generic fashion in a
+ * separate namespace is
+ *  - to easy code reusability and therefore avoid code duplication.
+ *  - to have a well-defined interface that allows to exchange the low
+ * level implementation.
+ *
+ *
+ * A tensorial object has the notion of a rank and allows a rank-times
+ * recursive application of the index operator, e.g., if <code>t</code> is a
+ * tensorial object of rank 4, the following access is valid:
+ * @code
+ *   t[1][2][1][4]
+ * @endcode
+ *
+ * deal.II has its own implementation for tensorial objects such as
+ * dealii::Tensor<rank, dim, Number> and dealii::SymmetricTensor<rank, dim,
+ * Number>
+ *
+ * The methods and algorithms implemented in this namespace, however, are
+ * fully generic. More precisely, it can operate on nested c-style arrays, or
+ * on class types <code>T</code> with a minimal interface that provides a
+ * local typedef <code>value_type</code> and an index operator
+ * <code>operator[](unsigned int)</code> that returns a (const or non-const)
+ * reference of <code>value_type</code>:
+ * @code
+ *   template<...>
+ *   class T
+ *   {
+ *     typedef ... value_type;
+ *     value_type & operator[](unsigned int);
+ *     const value_type & operator[](unsigned int) const;
+ *   };
+ * @endcode
+ *
+ * This namespace provides primitives for access, reordering and contraction
+ * of such objects.
+ *
+ * @ingroup geomprimitives
+ *
+ * @author Matthias Maier, 2015
+ */
+namespace TensorAccessors
+{
+  // forward declarations
+  namespace internal
+  {
+    template <int index, int rank, typename T> class ReorderedIndexView;
+    template <int position, int rank> struct ExtractHelper;
+    template <int no_contr, int rank_1, int rank_2, int dim> class Contract;
+    template <int rank_1, int rank_2, int dim> class Contract3;
+  }
+
+
+  /**
+   * This class provides a local typedef @p value_type denoting the resulting
+   * type of an access with operator[](unsigned int). More precisely, @p
+   * value_type will be
+   *  - <code>T::value_type</code> if T is a tensorial class providing a
+   * typedef <code>value_type</code> and does not have a const qualifier.
+   *  - <code>const T::value_type</code> if T is a tensorial class
+   * providing a typedef <code>value_type</code> and does have a const
+   * qualifier.
+   *  - <code>const T::value_type</code> if T is a tensorial class
+   * providing a typedef <code>value_type</code> and does have a const
+   * qualifier.
+   *  - <code>A</code> if T is of array type <code>A[...]</code>
+   *  - <code>const A</code> if T is of array type <code>A[...]</code> and
+   * does have a const qualifier.
+   */
+  template <typename T>
+  struct ValueType
+  {
+    typedef typename T::value_type value_type;
+  };
+
+  template <typename T>
+  struct ValueType<const T>
+  {
+    typedef const typename T::value_type value_type;
+  };
+
+  template <typename T, std::size_t N>
+  struct ValueType<T[N]>
+  {
+    typedef T value_type;
+  };
+
+  template <typename T, std::size_t N>
+  struct ValueType<const T[N]>
+  {
+    typedef const T value_type;
+  };
+
+
+  /**
+   * This class provides a local typedef @p value_type that is equal to the
+   * typedef <code>value_type</code> after @p deref_steps recursive
+   * dereferences via ```operator[](unsigned int)```. Further, constness is
+   * preserved via the ValueType type trait, i.e., if T is const,
+   * ReturnType<rank, T>::value_type will also be const.
+   */
+  template <int deref_steps, typename T>
+  struct ReturnType
+  {
+    typedef typename ReturnType<deref_steps - 1, typename ValueType<T>::value_type>::value_type value_type;
+  };
+
+  template <typename T>
+  struct ReturnType<0, T>
+  {
+    typedef T value_type;
+  };
+
+
+  /**
+   * Provide a "tensorial view" to a reference @p t of a tensor object of rank
+   * @p rank in which the index @p index is shifted to the end. As an example
+   * consider a tensor of 5th order in dim=5 space dimensions that can be
+   * accessed through 5 recursive <code>operator[]()</code> invocations:
+   * @code
+   *   Tensor<5, dim> tensor;
+   *   tensor[0][1][2][3][4] = 42.;
+   * @endcode
+   * Index 1 (the 2nd index, count starts at 0) can now be shifted to the end
+   * via
+   * @code
+   *   auto tensor_view = reordered_index_view<1, 5>(tensor);
+   *   tensor_view[0][2][3][4][1] == 42.; // is true
+   * @endcode
+   * The usage of the dealii::Tensor type was solely for the sake of an
+   * example. The mechanism implemented by this function is available for
+   * fairly general tensorial types @p T.
+   *
+   * The purpose of this reordering facility is to be able to contract over an
+   * arbitrary index of two (or more) tensors:
+   *  - reorder the indices in mind to the end of the tensors
+   *  - use the contract function below that contracts the _last_ elements of
+   * tensors.
+   *
+   * @note This function returns an internal class object consisting of an
+   * array subscript operator <code>operator[](unsigned int)</code> and a
+   * typedef <code>value_type</code> describing its return value.
+   *
+   * @tparam index The index to be shifted to the end. Indices are counted
+   * from 0, thus the valid range is $0\le\text{index}<\text{rank}$.
+   * @tparam rank Rank of the tensorial object @p t
+   * @tparam T A tensorial object of rank @p rank. @p T must provide a local
+   * typedef <code>value_type</code> and an index operator
+   * <code>operator[]()</code> that returns a (const or non-const) reference
+   * of <code>value_type</code>.
+   *
+   * @author Matthias Maier, 2015
+   */
+  template <int index, int rank, typename T>
+  inline DEAL_II_ALWAYS_INLINE
+  internal::ReorderedIndexView<index, rank, T>
+  reordered_index_view(T &t)
+  {
+#ifdef DEAL_II_WITH_CXX11
+    static_assert(0 <= index && index < rank,
+                  "The specified index must lie within the range [0,rank)");
+#endif
+
+    return internal::ReorderedIndexView<index, rank, T>(t);
+  }
+
+
+  /**
+   * Return a reference (const or non-const) to a subobject of a tensorial
+   * object @p t of type @p T, as described by an array type @p ArrayType
+   * object @p indices. For example: @code
+   *   Tensor<5, dim> tensor;
+   *   TableIndices<5> indices (0, 1, 2, 3, 4);
+   *   TensorAccessors::extract(tensor, indices) = 42;
+   * @endcode
+   * This is equivalent to <code>tensor[0][1][2][3][4] = 42.</code>.
+   *
+   * @tparam T A tensorial object of rank @p rank. @p T must provide a local
+   * typedef <code>value_type</code> and an index operator
+   * <code>operator[]()</code> that returns a (const or non-const) reference
+   * of <code>value_type</code>. Further, its tensorial rank must be equal or
+   * greater than @p rank.
+   *
+   * @tparam ArrayType An array like object, such as std::array, or
+   * dealii::TableIndices  that stores at least @p rank indices that can be
+   * accessed via operator[]().
+   *
+   * @author Matthias Maier, 2015
+   */
+  template<int rank, typename T, typename ArrayType> typename
+  ReturnType<rank, T>::value_type &
+  extract(T &t, const ArrayType &indices)
+  {
+    return internal::ExtractHelper<0, rank>::template extract<T, ArrayType>(t, indices);
+  }
+
+
+  /**
+   * This function contracts two tensorial objects @p left and @p right and
+   * stores the result in @p result. The contraction is done over the _last_
+   * @p no_contr indices of both tensorial objects:
+   *
+   * @f[
+   *   \text{result}_{i_1,..,i_{r1},j_1,..,j_{r2}}
+   *   = \sum_{k_1,..,k_{\text{no\_contr}}}
+   *     \text{left}_{i_1,..,i_{r1},k_1,..,k_{\text{no\_contr}}}
+   *     \text{right}_{j_1,..,j_{r2},k_1,..,k_{\text{no\_contr}}}
+   * @f]
+   *
+   * Calling this function is equivalent of writing the following low level
+   * code:
+   * @code
+   *   for(unsigned int i_0 = 0; i_0 < dim; ++i_0)
+   *     ...
+   *       for(unsigned int i_ = 0; i_ < dim; ++i_)
+   *         for(unsigned int j_0 = 0; j_0 < dim; ++j_0)
+   *           ...
+   *             for(unsigned int j_ = 0; j_ < dim; ++j_)
+   *               {
+   *                 result[i_0]..[i_][j_0]..[j_] = 0.;
+   *                 for(unsigned int k_0 = 0; k_0 < dim; ++k_0)
+   *                   ...
+   *                     for(unsigned int k_ = 0; k_ < dim; ++k_)
+   *                       result[i_0]..[i_][j_0]..[j_] += left[i_0]..[i_][k_0]..[k_] * right[j_0]..[j_][k_0]..[k_];
+   *               }
+   * @endcode
+   * with r = rank_1 + rank_2 - 2 * no_contr, l = rank_1 - no_contr, l1 =
+   * rank_1, and c = no_contr.
+   *
+   * @note The Types @p T1, @p T2, and @p T3 must have rank rank_1 + rank_2 -
+   * 2 * no_contr, rank_1, or rank_2, respectively. Obviously, no_contr must
+   * be less or equal than rank_1 and rank_2.
+   *
+   * @author Matthias Maier, 2015
+   */
+  template <int no_contr, int rank_1, int rank_2, int dim, typename T1, typename T2, typename T3>
+  inline DEAL_II_ALWAYS_INLINE
+  void contract(T1 &result, const T2 &left, const T3 &right)
+  {
+#ifdef DEAL_II_WITH_CXX11
+    static_assert(rank_1 >= no_contr, "The rank of the left tensor must be "
+                  "equal or greater than the number of "
+                  "contractions");
+    static_assert(rank_2 >= no_contr, "The rank of the right tensor must be "
+                  "equal or greater than the number of "
+                  "contractions");
+#endif
+
+    internal::Contract<no_contr, rank_1, rank_2, dim>::template contract<T1, T2, T3>
+    (result, left, right);
+  }
+
+
+  /**
+   * Full contraction of three tensorial objects:
+   *
+   * @f[
+   *   \sum_{i_1,..,i_{r1},j_1,..,j_{r2}}
+   *   \text{left}_{i_1,..,i_{r1}}
+   *   \text{middle}_{i_1,..,i_{r1},j_1,..,j_{r2}}
+   *   \text{right}_{j_1,..,j_{r2}}
+   * @f]
+   *
+   * Calling this function is equivalent of writing the following low level
+   * code:
+   * @code
+   *   T1 result = T1();
+   *   for(unsigned int i_0 = 0; i_0 < dim; ++i_0)
+   *     ...
+   *       for(unsigned int i_ = 0; i_ < dim; ++i_)
+   *         for(unsigned int j_0 = 0; j_0 < dim; ++j_0)
+   *           ...
+   *             for(unsigned int j_ = 0; j_ < dim; ++j_)
+   *               result += left[i_0]..[i_] * middle[i_0]..[i_][j_0]..[j_] * right[j_0]..[j_];
+   * @endcode
+   *
+   * @note The Types @p T2, @p T3, and @p T4 must have rank rank_1, rank_1 +
+   * rank_2, and rank_3, respectively. @p T1 must be a scalar type.
+   *
+   * @author Matthias Maier, 2015
+   */
+  template <int rank_1, int rank_2, int dim, typename T1, typename T2, typename T3, typename T4>
+  T1 contract3(const T2 &left, const T3 &middle, const T4 &right)
+  {
+    return internal::Contract3<rank_1, rank_2, dim>::template contract3<T1, T2, T3, T4>
+    (left, middle, right);
+  }
+
+
+  namespace internal
+  {
+    // -------------------------------------------------------------------------
+    // Forward declarations and type traits
+    // -------------------------------------------------------------------------
+
+    template <int rank, typename S> class StoreIndex;
+    template <typename T> class Identity;
+    template <int no_contr, int dim> class Contract2;
+
+    /**
+     * An internally used type trait to allow nested application of the
+     * function reordered_index_view(T &t).
+     *
+     * The problem is that when working with the actual tensorial types, we
+     * have to return subtensors by reference - but sometimes, especially for
+     * StoreIndex and ReorderedIndexView that return rvalues, we have to
+     * return by value.
+     */
+    template<typename T>
+    struct ReferenceType
+    {
+      typedef T &type;
+    };
+
+    template <int rank, typename S>
+    struct ReferenceType<StoreIndex<rank, S> >
+    {
+      typedef StoreIndex<rank, S> type;
+    };
+
+    template <int index, int rank, typename T>
+    struct ReferenceType<ReorderedIndexView<index, rank, T> >
+    {
+      typedef ReorderedIndexView<index, rank, T> type;
+    };
+
+
+    // TODO: Is there a possibility to just have the following block of
+    // explanation on an internal page in doxygen? If, yes. Doxygen
+    // wizards, your call!
+
+    // -------------------------------------------------------------------------
+    // Implementation of helper classes for reordered_index_view
+    // -------------------------------------------------------------------------
+
+    // OK. This is utterly brutal template magic. Therefore, we will not
+    // comment on the individual internal helper classes, because this is
+    // of not much value, but explain the general recursion procedure.
+    //
+    // (In order of appearance)
+    //
+    // Our task is to reorder access to a tensor object where a specified
+    // index is moved to the end. Thus we want to construct an object
+    // <code>reordered</code> out of a <code>tensor</code> where the
+    // following access patterns are equivalent:
+    // @code
+    //   tensor    [i_0]...[i_index-1][i_index][i_index+1]...[i_n]
+    //   reordered [i_0]...[i_index_1][i_index+1]...[i_n][i_index]
+    // @endcode
+    //
+    // The first task is to get rid of the application of
+    // [i_0]...[i_index-1]. This is a classical recursion pattern - relay
+    // the task from <index, rank> to <index-1, rank-1> by accessing the
+    // subtensor object:
+
+    template <int index, int rank, typename T>
+    class ReorderedIndexView
+    {
+    public:
+      ReorderedIndexView(typename ReferenceType<T>::type t) : t_(t) {}
+
+      typedef ReorderedIndexView<index - 1, rank - 1, typename ValueType<T>::value_type>
+      value_type;
+
+      // Recurse by applying index j directly:
+      inline DEAL_II_ALWAYS_INLINE
+      value_type operator[](unsigned int j) const
+      {
+        return value_type(t_[j]);
+      }
+
+    private:
+      typename ReferenceType<T>::type t_;
+    };
+
+    // At some point we hit the condition index == 0 and rank > 1, i.e.,
+    // the first index should be reordered to the end.
+    //
+    // At this point we cannot be lazy any more and have to start storing
+    // indices because we get them in the wrong order. The user supplies
+    //   [i_0][i_1]...[i_{rank - 1}]
+    // but we have to call the subtensor object with
+    //   [i_{rank - 1}[i_0][i_1]...[i_{rank-2}]
+    //
+    // So give up and relay the task to the StoreIndex class:
+
+    template <int rank, typename T>
+    class ReorderedIndexView<0, rank, T>
+    {
+    public:
+      ReorderedIndexView(typename ReferenceType<T>::type t) : t_(t) {}
+
+      typedef StoreIndex<rank - 1, internal::Identity<T> > value_type;
+
+      inline DEAL_II_ALWAYS_INLINE
+      value_type operator[](unsigned int j) const
+      {
+        return value_type(Identity<T>(t_), j);
+      }
+
+    private:
+      typename ReferenceType<T>::type t_;
+    };
+
+    // Sometimes, we're lucky and don't have to do anything. In this case
+    // just return the original tensor.
+
+    template <typename T>
+    class ReorderedIndexView<0, 1, T>
+    {
+    public:
+      ReorderedIndexView(typename ReferenceType<T>::type t) : t_(t) {}
+
+      typedef typename ReferenceType<typename ValueType<T>::value_type>::type value_type;
+
+      inline DEAL_II_ALWAYS_INLINE
+      value_type operator[](unsigned int j) const
+      {
+        return t_[j];
+      }
+
+    private:
+      typename ReferenceType<T>::type t_;
+    };
+
+    // Here, Identity is a helper class to ground the recursion in
+    // StoreIndex. Its implementation is easy - we haven't stored any
+    // indices yet. So, we just provide a function apply that returns the
+    // application of an index j to the stored tensor t_:
+
+    template <typename T>
+    class Identity
+    {
+    public:
+      Identity(typename ReferenceType<T>::type t) : t_(t) {}
+
+      typedef typename ValueType<T>::value_type return_type;
+
+      inline DEAL_II_ALWAYS_INLINE
+      typename ReferenceType<return_type>::type apply(unsigned int j) const
+      {
+        return t_[j];
+      }
+
+    private:
+      typename ReferenceType<T>::type t_;
+    };
+
+    // StoreIndex is a class that stores an index recursively with every
+    // invocation of operator[](unsigned int j): We do this by recursively
+    // creating a new StoreIndex class of lower rank that stores the
+    // supplied index j and holds a copy of the current class (with all
+    // other stored indices). Again, we provide an apply member function
+    // that knows how to apply an index on the highest rank and all
+    // subsequently stored indices:
+
+    template <int rank, typename S>
+    class StoreIndex
+    {
+    public:
+      StoreIndex(S s, int i) : s_(s), i_(i) {}
+
+      typedef StoreIndex<rank - 1, StoreIndex<rank, S> > value_type;
+
+      inline DEAL_II_ALWAYS_INLINE
+      value_type operator[](unsigned int j) const
+      {
+        return value_type(*this, j);
+      }
+
+      typedef typename ValueType<typename S::return_type>::value_type return_type;
+
+      inline
+      typename ReferenceType<return_type>::type apply(unsigned int j) const
+      {
+        return s_.apply(j)[i_];
+      }
+
+    private:
+      const S s_;
+      const int i_;
+    };
+
+    // We have to store indices until we hit rank == 1. Then, upon the next
+    // invocation of operator[](unsigned int j) we have all necessary
+    // information available to return the actual object.
+
+    template <typename S>
+    class StoreIndex<1, S>
+    {
+    public:
+      StoreIndex(S s, int i) : s_(s), i_(i) {}
+
+      typedef typename ValueType<typename S::return_type>::value_type return_type;
+      typedef return_type value_type;
+
+      inline DEAL_II_ALWAYS_INLINE
+      return_type &operator[](unsigned int j) const
+      {
+        return s_.apply(j)[i_];
+      }
+
+    private:
+      const S s_;
+      const int i_;
+    };
+
+
+    // -------------------------------------------------------------------------
+    // Implementation of helper classes for extract
+    // -------------------------------------------------------------------------
+
+    // Straightforward recursion implemented by specializing ExtractHelper
+    // for position == rank. We use the type trait ReturnType<rank, T> to
+    // have an idea what the final type will be.
+    template<int position, int rank>
+    struct ExtractHelper
+    {
+      template<typename T, typename ArrayType>
+      inline
+      static
+      typename ReturnType<rank - position, T>::value_type &
+      extract(T &t,
+              const ArrayType &indices)
+      {
+        return ExtractHelper<position + 1, rank>::
+               template extract<typename ValueType<T>::value_type, ArrayType>
+        (t[indices[position]], indices);
+      }
+    };
+
+    // For position == rank there is nothing to extract, just return the
+    // object.
+    template<int rank>
+    struct ExtractHelper<rank, rank>
+    {
+      template<typename T, typename ArrayType>
+      inline
+      static
+      T &extract(T &t,
+                 const ArrayType &)
+      {
+        return t;
+      }
+    };
+
+
+    // -------------------------------------------------------------------------
+    // Implementation of helper classes for contract
+    // -------------------------------------------------------------------------
+
+    // Straightforward recursive pattern:
+    //
+    // As long as rank_1 > no_contr, assign indices from the left tensor to
+    // result. This builds up the first part of the nested outer loops:
+    //
+    // for(unsigned int i_0; i_0 < dim; ++i_0)
+    //   ...
+    //     for(i_; i_ < dim; ++i_)
+    //       [...]
+    //         result[i_0]..[i_] ... left[i_0]..[i_] ...
+
+    template <int no_contr, int rank_1, int rank_2, int dim>
+    class Contract
+    {
+    public:
+      template<typename T1, typename T2, typename T3>
+      inline DEAL_II_ALWAYS_INLINE static
+      void contract(T1 &result, const T2 &left, const T3 &right)
+      {
+        for (unsigned int i = 0; i < dim; ++i)
+          Contract<no_contr, rank_1 - 1, rank_2, dim>::
+          contract(result[i], left[i], right);
+      }
+    };
+
+    // If rank_1 == no_contr leave out the remaining no_contr indices for
+    // the contraction and assign indices from the right tensor to the
+    // result. This builds up the second part of the nested loops:
+    //
+    //  for(unsigned int i_0 = 0; i_0 < dim; ++i_0)
+    //    ...
+    //      for(unsigned int i_ = 0; i_ < dim; ++i_)
+    //        for(unsigned int j_0 = 0; j_0 < dim; ++j_0)
+    //          ...
+    //            for(unsigned int j_ = 0; j_ < dim; ++j_)
+    //             [...]
+    //               result[i_0]..[i_][j_0]..[j_] ... left[i_0]..[i_] ... right[j_0]..[j_]
+    //
+
+    template <int no_contr, int rank_2, int dim>
+    class Contract<no_contr, no_contr, rank_2, dim>
+    {
+    public:
+      template<typename T1, typename T2, typename T3>
+      inline DEAL_II_ALWAYS_INLINE static
+      void contract(T1 &result, const T2 &left, const T3 &right)
+      {
+        for (unsigned int i = 0; i < dim; ++i)
+          Contract<no_contr, no_contr, rank_2 - 1, dim>::
+          contract(result[i], left, right[i]);
+      }
+    };
+
+    // If rank_1 == rank_2 == no_contr we have built up all of the outer
+    // loop. Now, it is time to do the actual contraction:
+    //
+    // [...]
+    //   {
+    //     result[i_0]..[i_][j_0]..[j_] = 0.;
+    //     for(unsigned int k_0 = 0; k_0 < dim; ++k_0)
+    //       ...
+    //         for(unsigned int k_ = 0; k_ < dim; ++k_)
+    //           result[i_0]..[i_][j_0]..[j_] += left[i_0]..[i_][k_0]..[k_] * right[j_0]..[j_][k_0]..[k_];
+    //   }
+    //
+    //  Relay this summation to another helper class.
+
+    template <int no_contr, int dim>
+    class Contract<no_contr, no_contr, no_contr, dim>
+    {
+    public:
+      template<typename T1, typename T2, typename T3>
+      inline DEAL_II_ALWAYS_INLINE static
+      void contract(T1 &result, const T2 &left, const T3 &right)
+      {
+        result = Contract2<no_contr, dim>::template contract2<T1>(left, right);
+      }
+    };
+
+    // Straightforward recursion:
+    //
+    // Contract leftmost index and recurse one down.
+
+    template <int no_contr, int dim>
+    class Contract2
+    {
+    public:
+      template<typename T1, typename T2, typename T3>
+      inline DEAL_II_ALWAYS_INLINE static
+      T1 contract2(const T2 &left, const T3 &right)
+      {
+        T1 result = T1();
+        for (unsigned int i = 0; i < dim; ++i)
+          result += Contract2<no_contr - 1, dim>::template contract2<T1>(left[i], right[i]);
+        return result;
+      }
+    };
+
+    // A contraction of two objects of order 0 is just a scalar
+    // multiplication:
+
+    template <int dim>
+    class Contract2<0, dim>
+    {
+    public:
+      template<typename T1, typename T2, typename T3>
+      inline DEAL_II_ALWAYS_INLINE static
+      T1 contract2(const T2 &left, const T3 &right)
+      {
+        return left * right;
+      }
+    };
+
+
+    // -------------------------------------------------------------------------
+    // Implementation of helper classes for contract3
+    // -------------------------------------------------------------------------
+
+    // Fully contract three tensorial objects
+    //
+    // As long as rank_1 > 0, recurse over left and middle:
+    //
+    // for(unsigned int i_0; i_0 < dim; ++i_0)
+    //   ...
+    //     for(i_; i_ < dim; ++i_)
+    //       [...]
+    //         left[i_0]..[i_] ... middle[i_0]..[i_] ... right
+
+    template <int rank_1, int rank_2, int dim>
+    class Contract3
+    {
+    public:
+      template<typename T1, typename T2, typename T3, typename T4>
+      static inline
+      T1 contract3(const T2 &left, const T3 &middle, const T4 &right)
+      {
+        T1 result = T1();
+        for (unsigned int i = 0; i < dim; ++i)
+          result += Contract3<rank_1 - 1, rank_2, dim>::template contract3<T1>(left[i], middle[i], right);
+        return result;
+      }
+    };
+
+    // If rank_1 ==0, continue to recurse over middle and right:
+    //
+    // for(unsigned int i_0; i_0 < dim; ++i_0)
+    //   ...
+    //     for(i_; i_ < dim; ++i_)
+    //       for(unsigned int j_0; j_0 < dim; ++j_0)
+    //         ...
+    //           for(j_; j_ < dim; ++j_)
+    //             [...]
+    //               left[i_0]..[i_] ... middle[i_0]..[i_][j_0]..[j_] ... right[j_0]..[j_]
+
+    template <int rank_2, int dim>
+    class Contract3<0, rank_2, dim>
+    {
+    public:
+      template<typename T1, typename T2, typename T3, typename T4>
+      static inline
+      T1 contract3(const T2 &left, const T3 &middle, const T4 &right)
+      {
+        T1 result = T1();
+        for (unsigned int i = 0; i < dim; ++i)
+          result += Contract3<0, rank_2 - 1, dim>::template contract3<T1>(left, middle[i], right[i]);
+        return result;
+      }
+    };
+
+    // Contraction of three tensorial objects of rank 0 is just a scalar
+    // multiplication.
+
+    template <int dim>
+    class Contract3<0, 0, dim>
+    {
+    public:
+      template<typename T1, typename T2, typename T3, typename T4>
+      static inline
+      T1 contract3(const T2 &left, const T3 &middle, const T4 &right)
+      {
+        return left * middle * right;
+      }
+    };
+
+    // -------------------------------------------------------------------------
+
+  } /* namespace internal */
+} /* namespace TensorAccessors */
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif /* dealii__tensor_accessors_h */
diff --git a/include/deal.II/base/tensor_base.h b/include/deal.II/base/tensor_base.h
new file mode 100644
index 0000000..eb38831
--- /dev/null
+++ b/include/deal.II/base/tensor_base.h
@@ -0,0 +1,6 @@
+#ifndef dealii__tensor_base_h
+#define dealii__tensor_base_h
+#warning This file is deprecated. Use <deal.II/base/tensor.h> instead.
+#endif
+
+#  include <deal.II/base/tensor.h>
diff --git a/include/deal.II/base/tensor_deprecated.h b/include/deal.II/base/tensor_deprecated.h
new file mode 100644
index 0000000..6f3ac96
--- /dev/null
+++ b/include/deal.II/base/tensor_deprecated.h
@@ -0,0 +1,522 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_deprecated_h
+#define dealii__tensor_deprecated_h
+
+#include <deal.II/base/tensor.h>
+
+
+/* --------- Deprecated non-member functions operating on tensors. ---------- */
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * @name Deprecated Tensor operations
+ */
+//@{
+
+/**
+ * Exception.
+ *
+ * @deprecated
+ */
+DeclException1 (ExcInvalidTensorContractionIndex,
+                int,
+                << "You have requested contraction of tensors over index "
+                << arg1
+                << ", but this is not possible for tensors of the current type.");
+
+
+/**
+ * Double contract two tensors of rank 2, thus computing the Frobenius inner
+ * product <tt>sum<sub>i,j</sub> src1[i][j]*src2[i][j]</tt>.
+ *
+ * @deprecated Use the double_contract() function that takes indices as
+ * template arguments and returns its result instead.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+Number double_contract (const Tensor<2, dim, Number> &src1,
+                        const Tensor<2, dim, Number> &src2) DEAL_II_DEPRECATED;
+
+
+/**
+ * Contract the last two indices of <tt>src1</tt> with the two indices
+ * <tt>src2</tt>, creating a rank-2 tensor. This is the matrix-vector product
+ * analog operation between tensors of rank 4 and rank 2.
+ *
+ * @deprecated Use the double_contract() function that takes indices as
+ * template arguments and returns its result instead.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void double_contract (Tensor<2,dim,Number>       &dest,
+                      const Tensor<4,dim,Number> &src1,
+                      const Tensor<2,dim,Number> &src2) DEAL_II_DEPRECATED;
+
+/**
+ * Contract a tensor of rank 2 with a tensor of rank 2. The contraction is
+ * performed over index <tt>index1</tt> of the first tensor, and
+ * <tt>index2</tt> of the second tensor. Note that the number of the index is
+ * counted from 1 on, not from zero as usual.
+ *
+ * @deprecated Use the contract() function that takes indices as template
+ * arguments and returns its result instead.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void contract (Tensor<2,dim,Number>       &dest,
+               const Tensor<2,dim,Number> &src1,
+               const unsigned int          index1,
+               const Tensor<2,dim,Number> &src2,
+               const unsigned int          index3) DEAL_II_DEPRECATED;
+
+/**
+ * Contract a tensor of rank 3 with a tensor of rank 1. The contraction is
+ * performed over index <tt>index1</tt> of the first tensor. Note that the
+ * number of the index is counted from 1 on, not from zero as usual.
+ *
+ * @deprecated Use the contract() function that takes indices as template
+ * arguments and returns its result instead.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void contract (Tensor<2,dim,Number>       &dest,
+               const Tensor<3,dim,Number> &src1,
+               const unsigned int          index1,
+               const Tensor<1,dim,Number> &src2) DEAL_II_DEPRECATED;
+
+/**
+ * Contract a tensor of rank 3 with a tensor of rank 2. The contraction is
+ * performed over index <tt>index1</tt> of the first tensor, and
+ * <tt>index2</tt> of the second tensor. Note that the number of the index is
+ * counted from 1 on, not from zero as usual.
+ *
+ * @deprecated Use the contract() function that takes indices as template
+ * arguments and returns its result instead.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void contract (Tensor<3,dim,Number>       &dest,
+               const Tensor<3,dim,Number> &src1,
+               const unsigned int          index1,
+               const Tensor<2,dim,Number> &src2,
+               const unsigned int          index2) DEAL_II_DEPRECATED;
+
+/**
+ * Single contraction for tensors: contract the last index of a tensor @p src1
+ * of rank @p rank_1 with the first index of a tensor @p src2 of rank @p
+ * rank_2.
+ *
+ * @deprecated Use operator* instead. It denotes a single contraction.
+ * @relates Tensor
+ */
+template <int rank_1, int rank_2, int dim, typename Number>
+inline
+void contract (Tensor<rank_1 + rank_2 - 2, dim, Number> &dest,
+               const Tensor<rank_1 ,dim, Number>        &src1,
+               const Tensor<rank_2 ,dim, Number>        &src2) DEAL_II_DEPRECATED;
+
+/**
+ * Contract a tensor of rank 1 with a tensor of rank 1 and return the result.
+ *
+ * @deprecated Use operator* instead. It denotes a single contraction.
+ * @relates Tensor
+ */
+template <int dim, typename Number, typename OtherNumber>
+inline
+typename ProductType<Number,OtherNumber>::type
+contract (const Tensor<1,dim,Number> &src1,
+          const Tensor<1,dim,OtherNumber> &src2) DEAL_II_DEPRECATED;
+
+
+/**
+ * The cross product of one vector in 2d. This is just a rotation by 90
+ * degrees.
+ *
+ * @deprecated Use the function cross_product_2d() that returns the value.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void
+cross_product (Tensor<1,dim,Number>       &dst,
+               const Tensor<1,dim,Number> &src) DEAL_II_DEPRECATED;
+
+/**
+ * The cross product of 2 vectors in 3d.
+ *
+ * @deprecated Use the function cross_product_3d() that returns the value.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void
+cross_product (Tensor<1,dim,Number>       &dst,
+               const Tensor<1,dim,Number> &src1,
+               const Tensor<1,dim,Number> &src2) DEAL_II_DEPRECATED;
+
+/**
+ * Form the outer product of two tensors.
+ *
+ * @deprecated Use the generic version that returns its result instead.
+ * @relates Tensor
+ */
+template <int rank_1, int rank_2, int dim, typename Number>
+inline
+void outer_product(Tensor<rank_1 + rank_2, dim, Number> &dst,
+                   const Tensor<rank_1, dim, Number>    &src1,
+                   const Tensor<rank_2, dim, Number>    &src2) DEAL_II_DEPRECATED;
+
+/**
+ * Multiply a Tensor<1,dim,Number> with a Number.
+ *
+ * @deprecated Use operator* instead.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void outer_product (Tensor<1,dim,Number>       &dst,
+                    const Number                src1,
+                    const Tensor<1,dim,Number> &src2) DEAL_II_DEPRECATED;
+
+/**
+ * Multiply a Tensor<1,dim,Number> with a Number.
+ *
+ * @deprecated Use operator* instead.
+ * @relates Tensor
+ */
+template <int dim, typename Number>
+inline
+void outer_product (Tensor<1,dim,Number>       &dst,
+                    const Tensor<1,dim,Number>  src1,
+                    const Number                src2) DEAL_II_DEPRECATED;
+
+/**
+ * @deprecated Do not use this function, evaluate the value manually.
+ * @relates Tensor
+ */
+template <int rank, typename Number>
+inline
+Number determinant (const Tensor<rank,1,Number> &t) DEAL_II_DEPRECATED;
+
+
+/**
+ * @deprecated Do not use this function, evaluate the value manually.
+ * @relates Tensor
+ */
+template <typename Number>
+inline
+Number determinant (const Tensor<1,1,Number> &t) DEAL_II_DEPRECATED;
+
+//@}
+
+/* ----------------------------- Definitions: ------------------------------- */
+
+template <int dim, typename Number>
+inline
+Number double_contract (const Tensor<2, dim, Number> &src1,
+                        const Tensor<2, dim, Number> &src2)
+{
+  Number res = 0.;
+  for (unsigned int i=0; i<dim; ++i)
+    res += src1[i] * src2[i];
+
+  return res;
+}
+
+template <int dim, typename Number>
+inline
+void double_contract (Tensor<2,dim,Number>       &dest,
+                      const Tensor<4,dim,Number> &src1,
+                      const Tensor<2,dim,Number> &src2)
+{
+  dest.clear ();
+  for (unsigned int i=0; i<dim; ++i)
+    for (unsigned int j=0; j<dim; ++j)
+      for (unsigned int k=0; k<dim; ++k)
+        for (unsigned int l=0; l<dim; ++l)
+          dest[i][j] += src1[i][j][k][l] * src2[k][l];
+}
+
+template <int dim, typename Number>
+inline
+void contract (Tensor<2,dim,Number>       &dest,
+               const Tensor<2,dim,Number> &src1,   const unsigned int index1,
+               const Tensor<2,dim,Number> &src2,   const unsigned int index2)
+{
+  dest.clear ();
+
+  switch (index1)
+    {
+    case 1:
+      switch (index2)
+        {
+        case 1:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                dest[i][j] += src1[k][i] * src2[k][j];
+          break;
+        case 2:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                dest[i][j] += src1[k][i] * src2[j][k];
+          break;
+
+        default:
+          Assert (false, (ExcInvalidTensorContractionIndex (index2)));
+        };
+      break;
+    case 2:
+      switch (index2)
+        {
+        case 1:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                dest[i][j] += src1[i][k] * src2[k][j];
+          break;
+        case 2:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                dest[i][j] += src1[i][k] * src2[j][k];
+          break;
+
+        default:
+          Assert (false, (ExcInvalidTensorContractionIndex (index2)));
+        };
+      break;
+
+    default:
+      Assert (false, (ExcInvalidTensorContractionIndex (index1)));
+    };
+}
+
+template <int dim, typename Number>
+inline
+void contract (Tensor<2,dim,Number>       &dest,
+               const Tensor<3,dim,Number> &src1,   const unsigned int index1,
+               const Tensor<1,dim,Number> &src2)
+{
+  dest.clear ();
+
+  switch (index1)
+    {
+    case 1:
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=0; j<dim; ++j)
+          for (unsigned int k=0; k<dim; ++k)
+            dest[i][j] += src1[k][i][j] * src2[k];
+      break;
+
+    case 2:
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=0; j<dim; ++j)
+          for (unsigned int k=0; k<dim; ++k)
+            dest[i][j] += src1[i][k][j] * src2[k];
+      break;
+
+    case 3:
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=0; j<dim; ++j)
+          for (unsigned int k=0; k<dim; ++k)
+            dest[i][j] += src1[i][j][k] * src2[k];
+      break;
+
+    default:
+      Assert (false, (ExcInvalidTensorContractionIndex (index1)));
+    };
+}
+
+template <int dim, typename Number>
+inline
+void contract (Tensor<3,dim,Number>       &dest,
+               const Tensor<3,dim,Number> &src1, const unsigned int index1,
+               const Tensor<2,dim,Number> &src2, const unsigned int index2)
+{
+  dest.clear ();
+
+  switch (index1)
+    {
+    case 1:
+      switch (index2)
+        {
+        case 1:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                for (unsigned int l=0; l<dim; ++l)
+                  dest[i][j][k] += src1[l][i][j] * src2[l][k];
+          break;
+        case 2:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                for (unsigned int l=0; l<dim; ++l)
+                  dest[i][j][k] += src1[l][i][j] * src2[k][l];
+          break;
+        default:
+          Assert (false, (ExcInvalidTensorContractionIndex (index2)));
+        }
+
+      break;
+    case 2:
+      switch (index2)
+        {
+        case 1:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                for (unsigned int l=0; l<dim; ++l)
+                  dest[i][j][k] += src1[i][l][j] * src2[l][k];
+          break;
+        case 2:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                for (unsigned int l=0; l<dim; ++l)
+                  dest[i][j][k] += src1[i][l][j] * src2[k][l];
+          break;
+        default:
+          Assert (false, (ExcInvalidTensorContractionIndex (index2)));
+        }
+
+      break;
+    case 3:
+      switch (index2)
+        {
+        case 1:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                for (unsigned int l=0; l<dim; ++l)
+                  dest[i][j][k] += src1[i][j][l] * src2[l][k];
+          break;
+        case 2:
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              for (unsigned int k=0; k<dim; ++k)
+                for (unsigned int l=0; l<dim; ++l)
+                  dest[i][j][k] += src1[i][j][l] * src2[k][l];
+          break;
+        default:
+          Assert (false, (ExcInvalidTensorContractionIndex (index2)));
+        }
+
+      break;
+    default:
+      Assert (false, (ExcInvalidTensorContractionIndex (index1)));
+    }
+}
+
+template <int rank_1, int rank_2, int dim, typename Number>
+inline
+void contract (Tensor<rank_1 + rank_2 - 2, dim, Number> &dest,
+               const Tensor<rank_1 ,dim, Number>        &src1,
+               const Tensor<rank_2 ,dim, Number>        &src2)
+{
+  TensorAccessors::internal::ReorderedIndexView<0, rank_2, const Tensor<rank_2, dim, Number> >
+  reordered = TensorAccessors::reordered_index_view<0, rank_2>(src2);
+  TensorAccessors::contract<1, rank_1, rank_2, dim>(dest, src1, reordered);
+}
+
+template <int dim, typename Number, typename OtherNumber>
+inline
+typename ProductType<Number,OtherNumber>::type
+contract (const Tensor<1,dim,Number> &src1,
+          const Tensor<1,dim,OtherNumber> &src2)
+{
+  typename ProductType<Number,OtherNumber>::type res
+    = typename ProductType<Number,OtherNumber>::type();
+  for (unsigned int i=0; i<dim; ++i)
+    res += src1[i] * src2[i];
+
+  return res;
+}
+
+template <int dim, typename Number>
+inline
+void
+cross_product (Tensor<1,dim,Number>       &dst,
+               const Tensor<1,dim,Number> &src)
+{
+  dst = cross_product_2d(src);
+}
+
+template <int dim, typename Number>
+inline
+void
+cross_product (Tensor<1,dim,Number>       &dst,
+               const Tensor<1,dim,Number> &src1,
+               const Tensor<1,dim,Number> &src2)
+{
+  dst = cross_product_3d(src1, src2);
+}
+
+template <int rank_1, int rank_2, int dim, typename Number>
+inline
+void outer_product(Tensor<rank_1 + rank_2, dim, Number> &dst,
+                   const Tensor<rank_1, dim, Number>    &src1,
+                   const Tensor<rank_2, dim, Number>    &src2)
+{
+  TensorAccessors::contract<0, rank_1, rank_2, dim>(dst, src1, src2);
+}
+
+template <int dim, typename Number>
+inline
+void outer_product (Tensor<1,dim,Number>       &dst,
+                    const Number                src1,
+                    const Tensor<1,dim,Number> &src2)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    dst[i] = src1 * src2[i];
+}
+
+template <int dim, typename Number>
+inline
+void outer_product (Tensor<1,dim,Number>       &dst,
+                    const Tensor<1,dim,Number>  src1,
+                    const Number         src2)
+{
+  for (unsigned int i=0; i<dim; ++i)
+    dst[i] = src1[i] * src2;
+}
+
+template <int rank, typename Number>
+inline
+Number determinant (const Tensor<rank,1,Number> &t)
+{
+  return determinant(t[0]);
+}
+
+template <typename Number>
+inline
+Number determinant (const Tensor<1,1,Number> &t)
+{
+  return t[0];
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/tensor_function.h b/include/deal.II/base/tensor_function.h
new file mode 100644
index 0000000..1b926f5
--- /dev/null
+++ b/include/deal.II/base/tensor_function.h
@@ -0,0 +1,171 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_function_h
+#define dealii__tensor_function_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/function_time.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class is a model for a tensor valued function. The interface of the
+ * class is mostly the same as that for the Function class, with the exception
+ * that it does not support vector-valued functions with several components,
+ * but that the return type is always tensor-valued. The returned values of
+ * the evaluation of objects of this type are always whole tensors, while for
+ * the <tt>Function</tt> class, one can ask for a specific component only, or
+ * use the <tt>vector_value</tt> function, which however does not return the
+ * value, but rather writes it into the address provided by its second
+ * argument. The reason for the different behaviour of the classes is that in
+ * the case of tensor valued functions, the size of the argument is known to
+ * the compiler a priori, such that the correct amount of memory can be
+ * allocated on the stack for the return value; on the other hand, for the
+ * vector valued functions, the size is not known to the compiler, so memory
+ * has to be allocated on the heap, resulting in relatively expensive copy
+ * operations. One can therefore consider this class a specialization of the
+ * <tt>Function</tt> class for which the size is known. An additional benefit
+ * is that tensors of arbitrary rank can be returned, not only vectors, as for
+ * them the size can be determined similarly simply.
+ *
+ * @ingroup functions
+ * @author Guido Kanschat, 1999
+ */
+template <int rank, int dim, typename Number=double>
+class TensorFunction : public FunctionTime<Number>,
+  public Subscriptor
+{
+public:
+  /**
+   * Define typedefs for the return types of the <tt>value</tt> functions.
+   */
+  typedef Tensor<rank,dim,Number> value_type;
+
+  typedef Tensor<rank+1,dim,Number> gradient_type;
+
+  /**
+   * Constructor. May take an initial value for the time variable, which
+   * defaults to zero.
+   */
+  TensorFunction (const Number initial_time = Number(0.0));
+
+  /**
+   * Virtual destructor; absolutely necessary in this case, as classes are
+   * usually not used by their true type, but rather through pointers to this
+   * base class.
+   */
+  virtual ~TensorFunction ();
+
+  /**
+   * Return the value of the function at the given point.
+   */
+  virtual value_type value (const Point<dim> &p) const;
+
+  /**
+   * Set <tt>values</tt> to the point values of the function at the
+   * <tt>points</tt>.  It is assumed that <tt>values</tt> already has the
+   * right size, i.e.  the same size as the <tt>points</tt> array.
+   */
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<value_type> &values) const;
+
+  /**
+   * Return the gradient of the function at the given point.
+   */
+  virtual gradient_type gradient (const Point<dim> &p) const;
+
+  /**
+   * Set <tt>gradients</tt> to the gradients of the function at the
+   * <tt>points</tt>.  It is assumed that <tt>values</tt> already has the
+   * right size, i.e.  the same size as the <tt>points</tt> array.
+   */
+  virtual void gradient_list (const std::vector<Point<dim> >   &points,
+                              std::vector<gradient_type> &gradients) const;
+};
+
+
+
+/**
+ * Provide a tensor valued function which always returns a constant tensor
+ * value. Obviously, all derivates of this function are zero.
+ *
+ * @ingroup functions
+ * @author Matthias Maier, 2013
+ */
+template <int rank, int dim, typename Number=double>
+class ConstantTensorFunction : public TensorFunction<rank, dim, Number>
+{
+public:
+  /**
+   * Constructor; takes the constant tensor value as an argument. The
+   * reference value is copied internally.
+   *
+   * An initial value for the time variable may be specified, otherwise it
+   * defaults to zero.
+   */
+  ConstantTensorFunction (const dealii::Tensor<rank, dim, Number> &value,
+                          const Number initial_time = 0.0);
+
+  virtual ~ConstantTensorFunction ();
+
+  virtual typename dealii::TensorFunction<rank, dim, Number>::value_type value (const Point<dim> &p) const;
+
+  virtual void value_list (const std::vector<Point<dim> > &points,
+                           std::vector<typename dealii::TensorFunction<rank, dim, Number>::value_type> &values) const;
+
+  virtual typename dealii::TensorFunction<rank, dim, Number>::gradient_type gradient (const Point<dim> &p) const;
+
+  virtual void gradient_list (const std::vector<Point<dim> > &points,
+                              std::vector<typename dealii::TensorFunction<rank, dim, Number>::gradient_type> &gradients) const;
+
+private:
+  const dealii::Tensor<rank, dim, Number> _value;
+};
+
+
+
+/**
+ * Provide a tensor valued function which always returns zero. Obviously, all
+ * derivates of this function are zero.
+ *
+ * @ingroup functions
+ * @author Matthias Maier, 2013
+ */
+template <int rank, int dim, typename Number=double>
+class ZeroTensorFunction : public ConstantTensorFunction<rank, dim, Number>
+{
+public:
+  /**
+   * Constructor.
+   *
+   * An initial value for the time variable may be specified, otherwise it
+   * defaults to zero.
+   */
+  ZeroTensorFunction (const Number initial_time = 0.0);
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/tensor_function.templates.h b/include/deal.II/base/tensor_function.templates.h
new file mode 100644
index 0000000..2b89d87
--- /dev/null
+++ b/include/deal.II/base/tensor_function.templates.h
@@ -0,0 +1,164 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_function_templates_h
+#define dealii__tensor_function_templates_h
+
+#include <deal.II/base/tensor_function.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/lac/vector.h>
+
+#include <vector>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int rank, int dim, typename Number>
+TensorFunction<rank, dim, Number>::TensorFunction (const Number initial_time)
+  :
+  FunctionTime<Number> (initial_time)
+{}
+
+
+template <int rank, int dim, typename Number>
+TensorFunction<rank, dim, Number>::~TensorFunction ()
+{}
+
+
+template <int rank, int dim, typename Number>
+typename TensorFunction<rank, dim, Number>::value_type
+TensorFunction<rank, dim, Number>::value (const Point<dim> &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return Tensor<rank,dim, Number>();
+}
+
+
+template <int rank, int dim, typename Number>
+void
+TensorFunction<rank, dim, Number>::value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<value_type>        &values) const
+{
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch(values.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    values[i]  = this->value (points[i]);
+}
+
+
+template <int rank, int dim, typename Number>
+typename TensorFunction<rank, dim, Number>::gradient_type
+TensorFunction<rank, dim, Number>::gradient (const Point<dim> &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return Tensor<rank+1,dim, Number>();
+}
+
+
+template <int rank, int dim, typename Number>
+void
+TensorFunction<rank, dim, Number>::gradient_list (
+  const std::vector<Point<dim> >   &points,
+  std::vector<gradient_type> &gradients) const
+{
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+
+  for (unsigned int i=0; i<points.size(); ++i)
+    gradients[i] = gradient(points[i]);
+}
+
+
+
+template <int rank, int dim, typename Number>
+ConstantTensorFunction<rank, dim, Number>::ConstantTensorFunction (
+  const Tensor<rank, dim, Number> &value,
+  const Number initial_time)
+  :
+  TensorFunction<rank, dim, Number> (initial_time),
+  _value(value)
+{}
+
+
+template <int rank, int dim, typename Number>
+ConstantTensorFunction<rank, dim, Number>::~ConstantTensorFunction ()
+{}
+
+
+template <int rank, int dim, typename Number>
+typename TensorFunction<rank, dim, Number>::value_type
+ConstantTensorFunction<rank, dim, Number>::value (
+  const Point<dim> &/*point*/) const
+{
+  return _value;
+}
+
+
+template <int rank, int dim, typename Number>
+void
+ConstantTensorFunction<rank, dim, Number>::value_list (
+  const std::vector<Point<dim> > &points,
+  std::vector<typename TensorFunction<rank, dim, Number>::value_type> &values) const
+{
+  (void)points;
+  Assert (values.size() == points.size(),
+          ExcDimensionMismatch(values.size(), points.size()));
+
+  for (unsigned int i=0; i<values.size(); ++i)
+    values[i]  = _value;
+}
+
+
+template <int rank, int dim, typename Number>
+typename TensorFunction<rank, dim, Number>::gradient_type
+ConstantTensorFunction<rank, dim, Number>::gradient (const Point<dim> &) const
+{
+  static const Tensor<rank+1, dim, Number> zero;
+
+  return zero;
+}
+
+
+template <int rank, int dim, typename Number>
+void
+ConstantTensorFunction<rank, dim, Number>::gradient_list (
+  const std::vector<Point<dim> >   &points,
+  std::vector<typename TensorFunction<rank, dim, Number>::gradient_type> &gradients) const
+{
+  (void)points;
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+
+  static const Tensor<rank+1, dim, Number> zero;
+
+  for (unsigned int i=0; i<gradients.size(); ++i)
+    gradients[i] = zero;
+}
+
+
+
+template <int rank, int dim, typename Number>
+ZeroTensorFunction<rank, dim, Number>::ZeroTensorFunction (const Number initial_time)
+  :
+  ConstantTensorFunction<rank, dim, Number> (dealii::Tensor<rank, dim, Number>(), initial_time)
+{}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif /* dealii__tensor_function_templates_h */
diff --git a/include/deal.II/base/tensor_product_polynomials.h b/include/deal.II/base/tensor_product_polynomials.h
new file mode 100644
index 0000000..794d1da
--- /dev/null
+++ b/include/deal.II/base/tensor_product_polynomials.h
@@ -0,0 +1,659 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_product_polynomials_h
+#define dealii__tensor_product_polynomials_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/utilities.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * @addtogroup Polynomials
+ * @{
+ */
+
+/**
+ * Tensor product of given polynomials.
+ *
+ * Given a vector of <i>n</i> one-dimensional polynomials <i>P<sub>1</sub></i>
+ * to <i>P<sub>n</sub></i>, this class generates <i>n<sup>dim</sup></i>
+ * polynomials of the form <i>Q<sub>ijk</sub>(x,y,z) =
+ * P<sub>i</sub>(x)P<sub>j</sub>(y)P<sub>k</sub>(z)</i>. If the base
+ * polynomials are mutually orthogonal on the interval [-1,1] or [0,1], then
+ * the tensor product polynomials are orthogonal on [-1,1]<sup>dim</sup> or
+ * [0,1]<sup>dim</sup>, respectively.
+ *
+ * Indexing is as follows: the order of dim-dimensional polynomials is
+ * x-coordinates running fastest, then y-coordinate, etc. The first few
+ * polynomials are thus <i>P<sub>1</sub>(x)P<sub>1</sub>(y),
+ * P<sub>2</sub>(x)P<sub>1</sub>(y), P<sub>3</sub>(x)P<sub>1</sub>(y), ...,
+ * P<sub>1</sub>(x)P<sub>2</sub>(y), P<sub>2</sub>(x)P<sub>2</sub>(y),
+ * P<sub>3</sub>(x)P<sub>2</sub>(y), ...</i> and likewise in 3d.
+ *
+ * The output_indices() function prints the ordering of the dim-dimensional
+ * polynomials, i.e. for each polynomial in the polynomial space it gives the
+ * indices i,j,k of the one-dimensional polynomials in x,y and z direction.
+ * The ordering of the dim-dimensional polynomials can be changed by using the
+ * set_numbering() function.
+ *
+ * @author Ralf Hartmann, 2000, 2004, Guido Kanschat, 2000, Wolfgang Bangerth
+ * 2003
+ */
+template <int dim, typename PolynomialType=Polynomials::Polynomial<double> >
+class TensorProductPolynomials
+{
+public:
+  /**
+   * Access to the dimension of this object, for checking and automatic
+   * setting of dimension in other classes.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Constructor. <tt>pols</tt> is a vector of objects that should be derived
+   * or otherwise convertible to one-dimensional polynomial objects of type @p
+   * PolynomialType (template argument of class). It will be copied element by
+   * element into a private variable.
+   */
+  template <class Pol>
+  TensorProductPolynomials (const std::vector<Pol> &pols);
+
+  /**
+   * Prints the list of the indices to <tt>out</tt>.
+   */
+  void output_indices(std::ostream &out) const;
+
+  /**
+   * Sets the ordering of the polynomials. Requires
+   * <tt>renumber.size()==n()</tt>.  Stores a copy of <tt>renumber</tt>.
+   */
+  void set_numbering(const std::vector<unsigned int> &renumber);
+
+  /**
+   * Gives read access to the renumber vector.
+   */
+  const std::vector<unsigned int> &get_numbering() const;
+
+  /**
+   * Gives read access to the inverse renumber vector.
+   */
+  const std::vector<unsigned int> &get_numbering_inverse() const;
+
+  /**
+   * Computes the value and the first and second derivatives of each tensor
+   * product polynomial at <tt>unit_point</tt>.
+   *
+   * The size of the vectors must either be equal 0 or equal n(). In the first
+   * case, the function will not compute these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the compute_value(),
+   * compute_grad() or compute_grad_grad() functions, see below, in a loop
+   * over all tensor product polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<double>         &values,
+                std::vector<Tensor<1,dim> > &grads,
+                std::vector<Tensor<2,dim> > &grad_grads,
+                std::vector<Tensor<3,dim> > &third_derivatives,
+                std::vector<Tensor<4,dim> > &fourth_derivatives) const;
+
+  /**
+   * Computes the value of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each point value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function with
+   * <tt>values.size()==</tt>n() to get the point values of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  double compute_value (const unsigned int i,
+                        const Point<dim> &p) const;
+
+  /**
+   * Computes the <tt>order</tt>th derivative of the <tt>i</tt>th tensor
+   * product polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in
+   * tensor product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with the
+   * size of the appropriate parameter set to n() to get the point value of
+   * all tensor polynomials all at once and in a much more efficient way.
+   *
+   * @tparam order The derivative order.
+   */
+  template <int order>
+  Tensor<order,dim> compute_derivative (const unsigned int i,
+                                        const Point<dim> &p) const;
+
+  /**
+   * Computes the grad of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with
+   * <tt>grads.size()==</tt>n() to get the point value of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  Tensor<1,dim> compute_grad (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Computes the second derivative (grad_grad) of the <tt>i</tt>th tensor
+   * product polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in
+   * tensor product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with
+   * <tt>grad_grads.size()==</tt>n() to get the point value of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  Tensor<2,dim> compute_grad_grad (const unsigned int i,
+                                   const Point<dim> &p) const;
+
+  /**
+   * Returns the number of tensor product polynomials. For <i>n</i> 1d
+   * polynomials this is <i>n<sup>dim</sup></i>.
+   */
+  unsigned int n () const;
+
+
+protected:
+  /**
+   * Copy of the vector <tt>pols</tt> of polynomials given to the constructor.
+   */
+  std::vector<PolynomialType> polynomials;
+
+  /**
+   * Number of tensor product polynomials. See n().
+   */
+  unsigned int n_tensor_pols;
+
+  /**
+   * Index map for reordering the polynomials.
+   */
+  std::vector<unsigned int> index_map;
+
+  /**
+   * Index map for reordering the polynomials.
+   */
+  std::vector<unsigned int> index_map_inverse;
+
+  /**
+   * Each tensor product polynomial <i>i</i> is a product of one-dimensional
+   * polynomials in each space direction. Compute the indices of these one-
+   * dimensional polynomials for each space direction, given the index
+   * <i>i</i>.
+   */
+  // fix to avoid compiler warnings about zero length arrays
+  void compute_index (const unsigned int i,
+                      unsigned int       (&indices)[(dim>0?dim:1)]) const;
+};
+
+
+
+/**
+ * Anisotropic tensor product of given polynomials.
+ *
+ * Given one-dimensional polynomials <tt>Px1</tt>, <tt>Px2</tt>, ... in
+ * x-direction, <tt>Py1</tt>, <tt>Py2</tt>, ... in y-direction, and so on,
+ * this class generates polynomials of the form  <i>Q<sub>ijk</sub>(x,y,z) =
+ * Pxi(x)Pyj(y)Pzk(z)</i>. If the base polynomials are mutually orthogonal on
+ * the interval $[-1,1]$ or $[0,d]$, then the tensor product polynomials are
+ * orthogonal on $[-1,1]^d$ or $[0,1]^d$, respectively.
+ *
+ * Indexing is as follows: the order of dim-dimensional polynomials is
+ * x-coordinates running fastest, then y-coordinate, etc. The first few
+ * polynomials are thus <tt>Px1(x)Py1(y)</tt>, <tt>Px2(x)Py1(y)</tt>,
+ * <tt>Px3(x)Py1(y)</tt>, ..., <tt>Px1(x)Py2(y)</tt>, <tt>Px2(x)Py2(y)</tt>,
+ * <tt>Px3(x)Py2(y)</tt>, ..., and likewise in 3d.
+ *
+ * @author Wolfgang Bangerth 2003
+ */
+template <int dim>
+class AnisotropicPolynomials
+{
+public:
+  /**
+   * Constructor. <tt>pols</tt> is a table of one-dimensional polynomials. The
+   * number of rows in this table should be equal to the space dimension, with
+   * the elements of each row giving the polynomials that shall be used in
+   * this particular coordinate direction. These polynomials may vary between
+   * coordinates, as well as their number.
+   */
+  AnisotropicPolynomials (const std::vector<std::vector<Polynomials::Polynomial<double> > > &pols);
+
+  /**
+   * Computes the value and the first and second derivatives of each tensor
+   * product polynomial at <tt>unit_point</tt>.
+   *
+   * The size of the vectors must either be equal <tt>0</tt> or equal
+   * <tt>n_tensor_pols</tt>.  In the first case, the function will not compute
+   * these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the <tt>compute_value</tt>,
+   * <tt>compute_grad</tt> or <tt>compute_grad_grad</tt> functions, see below,
+   * in a loop over all tensor product polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<double>         &values,
+                std::vector<Tensor<1,dim> > &grads,
+                std::vector<Tensor<2,dim> > &grad_grads,
+                std::vector<Tensor<3,dim> > &third_derivatives,
+                std::vector<Tensor<4,dim> > &fourth_derivatives) const;
+
+  /**
+   * Computes the value of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each point value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the <tt>compute</tt> function, see above,
+   * with <tt>values.size()==n_tensor_pols</tt> to get the point values of all
+   * tensor polynomials all at once and in a much more efficient way.
+   */
+  double compute_value (const unsigned int i,
+                        const Point<dim> &p) const;
+
+  /**
+   * Computes the <tt>order</tt>th derivative of the <tt>i</tt>th tensor
+   * product polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in
+   * tensor product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with the
+   * size of the appropriate parameter set to n() to get the point value of
+   * all tensor polynomials all at once and in a much more efficient way.
+   *
+   * @tparam order The derivative order.
+   */
+  template <int order>
+  Tensor<order,dim> compute_derivative (const unsigned int i,
+                                        const Point<dim> &p) const;
+
+  /**
+   * Computes the grad of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the <tt>compute</tt> function, see above,
+   * with <tt>grads.size()==n_tensor_pols</tt> to get the point value of all
+   * tensor polynomials all at once and in a much more efficient way.
+   */
+  Tensor<1,dim> compute_grad (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Computes the second derivative (grad_grad) of the <tt>i</tt>th tensor
+   * product polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in
+   * tensor product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the <tt>compute</tt> function, see above,
+   * with <tt>grad_grads.size()==n_tensor_pols</tt> to get the point value of
+   * all tensor polynomials all at once and in a much more efficient way.
+   */
+  Tensor<2,dim> compute_grad_grad (const unsigned int i,
+                                   const Point<dim> &p) const;
+
+  /**
+   * Returns the number of tensor product polynomials. It is the product of
+   * the number of polynomials in each coordinate direction.
+   */
+  unsigned int n () const;
+
+private:
+  /**
+   * Copy of the vector <tt>pols</tt> of polynomials given to the constructor.
+   */
+  std::vector<std::vector<Polynomials::Polynomial<double> > > polynomials;
+
+  /**
+   * Number of tensor product polynomials. This is <tt>Nx*Ny*Nz</tt>, or with
+   * terms dropped if the number of space dimensions is less than 3.
+   */
+  unsigned int n_tensor_pols;
+
+  /**
+   * Each tensor product polynomial @þ{i} is a product of one-dimensional
+   * polynomials in each space direction. Compute the indices of these one-
+   * dimensional polynomials for each space direction, given the index
+   * <tt>i</tt>.
+   */
+  void compute_index (const unsigned int i,
+                      unsigned int       (&indices)[dim]) const;
+
+  /**
+   * Given the input to the constructor, compute <tt>n_tensor_pols</tt>.
+   */
+  static
+  unsigned int
+  get_n_tensor_pols (const std::vector<std::vector<Polynomials::Polynomial<double> > > &pols);
+};
+
+/** @} */
+
+#ifndef DOXYGEN
+
+
+/* ---------------- template and inline functions ---------- */
+
+
+template <int dim, typename PolynomialType>
+template <class Pol>
+inline
+TensorProductPolynomials<dim,PolynomialType>::
+TensorProductPolynomials(const std::vector<Pol> &pols)
+  :
+  polynomials (pols.begin(), pols.end()),
+  n_tensor_pols(Utilities::fixed_power<dim>(pols.size())),
+  index_map(n_tensor_pols),
+  index_map_inverse(n_tensor_pols)
+{
+  // per default set this index map to identity. This map can be changed by
+  // the user through the set_numbering() function
+  for (unsigned int i=0; i<n_tensor_pols; ++i)
+    {
+      index_map[i]=i;
+      index_map_inverse[i]=i;
+    }
+}
+
+
+
+template <int dim, typename PolynomialType>
+inline
+unsigned int
+TensorProductPolynomials<dim,PolynomialType>::n() const
+{
+  if (dim == 0)
+    return numbers::invalid_unsigned_int;
+  else
+    return n_tensor_pols;
+}
+
+
+
+template <int dim, typename PolynomialType>
+inline
+const std::vector<unsigned int> &
+TensorProductPolynomials<dim,PolynomialType>::get_numbering() const
+{
+  return index_map;
+}
+
+
+template <int dim, typename PolynomialType>
+inline
+const std::vector<unsigned int> &
+TensorProductPolynomials<dim,PolynomialType>::get_numbering_inverse() const
+{
+  return index_map_inverse;
+}
+
+template <int dim, typename PolynomialType>
+template <int order>
+Tensor<order,dim>
+TensorProductPolynomials<dim,PolynomialType>::compute_derivative
+(const unsigned int  i,
+ const Point<dim>   &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  double v [dim][5];
+  {
+    std::vector<double> tmp (5);
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        polynomials[indices[d]].value (p(d), tmp);
+        v[d][0] = tmp[0];
+        v[d][1] = tmp[1];
+        v[d][2] = tmp[2];
+        v[d][3] = tmp[3];
+        v[d][4] = tmp[4];
+      }
+  }
+
+  Tensor<order,dim> derivative;
+  switch (order)
+    {
+    case 1:
+    {
+      Tensor<1,dim> &derivative_1 = *reinterpret_cast<Tensor<1,dim>*>(&derivative);
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          derivative_1[d] = 1.;
+          for (unsigned int x=0; x<dim; ++x)
+            {
+              unsigned int x_order=0;
+              if (d==x) ++x_order;
+
+              derivative_1[d] *= v[x][x_order];
+            }
+        }
+
+      return derivative;
+    }
+    case 2:
+    {
+      Tensor<2,dim> &derivative_2 = *reinterpret_cast<Tensor<2,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          {
+            derivative_2[d1][d2] = 1.;
+            for (unsigned int x=0; x<dim; ++x)
+              {
+                unsigned int x_order=0;
+                if (d1==x) ++x_order;
+                if (d2==x) ++x_order;
+
+                derivative_2[d1][d2] *= v[x][x_order];
+              }
+          }
+
+      return derivative;
+    }
+    case 3:
+    {
+      Tensor<3,dim> &derivative_3 = *reinterpret_cast<Tensor<3,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          for (unsigned int d3=0; d3<dim; ++d3)
+            {
+              derivative_3[d1][d2][d3] = 1.;
+              for (unsigned int x=0; x<dim; ++x)
+                {
+                  unsigned int x_order=0;
+                  if (d1==x) ++x_order;
+                  if (d2==x) ++x_order;
+                  if (d3==x) ++x_order;
+
+                  derivative_3[d1][d2][d3] *= v[x][x_order];
+                }
+            }
+
+      return derivative;
+    }
+    case 4:
+    {
+      Tensor<4,dim> &derivative_4 = *reinterpret_cast<Tensor<4,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          for (unsigned int d3=0; d3<dim; ++d3)
+            for (unsigned int d4=0; d4<dim; ++d4)
+              {
+                derivative_4[d1][d2][d3][d4] = 1.;
+                for (unsigned int x=0; x<dim; ++x)
+                  {
+                    unsigned int x_order=0;
+                    if (d1==x) ++x_order;
+                    if (d2==x) ++x_order;
+                    if (d3==x) ++x_order;
+                    if (d4==x) ++x_order;
+
+                    derivative_4[d1][d2][d3][d4] *= v[x][x_order];
+                  }
+              }
+
+      return derivative;
+    }
+    default:
+    {
+      Assert (false, ExcNotImplemented());
+      return derivative;
+    }
+    }
+}
+
+template <int dim>
+template <int order>
+Tensor<order,dim>
+AnisotropicPolynomials<dim>::compute_derivative (const unsigned int i,
+                                                 const Point<dim> &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  std::vector<std::vector<double> > v(dim, std::vector<double> (order+1));
+  for (unsigned int d=0; d<dim; ++d)
+    polynomials[d][indices[d]].value(p(d), v[d]);
+
+  Tensor<order,dim> derivative;
+  switch (order)
+    {
+    case 1:
+    {
+      Tensor<1,dim> &derivative_1 = *reinterpret_cast<Tensor<1,dim>*>(&derivative);
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          derivative_1[d] = 1.;
+          for (unsigned int x=0; x<dim; ++x)
+            {
+              unsigned int x_order=0;
+              if (d==x) ++x_order;
+
+              derivative_1[d] *= v[x][x_order];
+            }
+        }
+
+      return derivative;
+    }
+    case 2:
+    {
+      Tensor<2,dim> &derivative_2 = *reinterpret_cast<Tensor<2,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          {
+            derivative_2[d1][d2] = 1.;
+            for (unsigned int x=0; x<dim; ++x)
+              {
+                unsigned int x_order=0;
+                if (d1==x) ++x_order;
+                if (d2==x) ++x_order;
+
+                derivative_2[d1][d2] *= v[x][x_order];
+              }
+          }
+
+      return derivative;
+    }
+    case 3:
+    {
+      Tensor<3,dim> &derivative_3 = *reinterpret_cast<Tensor<3,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          for (unsigned int d3=0; d3<dim; ++d3)
+            {
+              derivative_3[d1][d2][d3] = 1.;
+              for (unsigned int x=0; x<dim; ++x)
+                {
+                  unsigned int x_order=0;
+                  if (d1==x) ++x_order;
+                  if (d2==x) ++x_order;
+                  if (d3==x) ++x_order;
+
+                  derivative_3[d1][d2][d3] *= v[x][x_order];
+                }
+            }
+
+      return derivative;
+    }
+    case 4:
+    {
+      Tensor<4,dim> &derivative_4 = *reinterpret_cast<Tensor<4,dim>*>(&derivative);
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          for (unsigned int d3=0; d3<dim; ++d3)
+            for (unsigned int d4=0; d4<dim; ++d4)
+              {
+                derivative_4[d1][d2][d3][d4] = 1.;
+                for (unsigned int x=0; x<dim; ++x)
+                  {
+                    unsigned int x_order=0;
+                    if (d1==x) ++x_order;
+                    if (d2==x) ++x_order;
+                    if (d3==x) ++x_order;
+                    if (d4==x) ++x_order;
+
+                    derivative_4[d1][d2][d3][d4] *= v[x][x_order];
+                  }
+              }
+
+      return derivative;
+    }
+    default:
+    {
+      Assert (false, ExcNotImplemented());
+      return derivative;
+    }
+    }
+}
+
+
+
+#endif // DOXYGEN
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/tensor_product_polynomials_bubbles.h b/include/deal.II/base/tensor_product_polynomials_bubbles.h
new file mode 100644
index 0000000..0540d2a
--- /dev/null
+++ b/include/deal.II/base/tensor_product_polynomials_bubbles.h
@@ -0,0 +1,350 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 2012 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_product_polynomials_bubbles_h
+#define dealii__tensor_product_polynomials_bubbles_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/utilities.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * @addtogroup Polynomials
+ * @{
+ */
+
+/**
+ * Tensor product of given polynomials and bubble functions of form
+ * $(2*x_j-1)^{degree-1}\prod_{i=0}^{dim-1}(x_i(1-x_i))$. This class inherits
+ * most of its functionality from TensorProductPolynomials. The bubble
+ * enrichments are added for the last indices. index.
+ *
+ * @author Daniel Arndt, 2015
+ */
+template <int dim>
+class TensorProductPolynomialsBubbles : public TensorProductPolynomials<dim>
+{
+public:
+  /**
+   * Access to the dimension of this object, for checking and automatic
+   * setting of dimension in other classes.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Constructor. <tt>pols</tt> is a vector of objects that should be derived
+   * or otherwise convertible to one-dimensional polynomial objects. It will
+   * be copied element by element into a private variable.
+   */
+  template <class Pol>
+  TensorProductPolynomialsBubbles (const std::vector<Pol> &pols);
+
+  /**
+   * Computes the value and the first and second derivatives of each tensor
+   * product polynomial at <tt>unit_point</tt>.
+   *
+   * The size of the vectors must either be equal 0 or equal n(). In the first
+   * case, the function will not compute these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the compute_value(),
+   * compute_grad() or compute_grad_grad() functions, see below, in a loop
+   * over all tensor product polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<double>         &values,
+                std::vector<Tensor<1,dim> > &grads,
+                std::vector<Tensor<2,dim> > &grad_grads,
+                std::vector<Tensor<3,dim> > &third_derivatives,
+                std::vector<Tensor<4,dim> > &fourth_derivatives) const;
+
+  /**
+   * Computes the value of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each point value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function with
+   * <tt>values.size()==</tt>n() to get the point values of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  double compute_value (const unsigned int i,
+                        const Point<dim> &p) const;
+
+  /**
+   * Computes the order @p order derivative of the <tt>i</tt>th tensor product
+   * polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor
+   * product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with the
+   * size of the appropriate parameter set to n() to get the point value of
+   * all tensor polynomials all at once and in a much more efficient way.
+   */
+  template <int order>
+  Tensor<order,dim> compute_derivative (const unsigned int i,
+                                        const Point<dim> &p) const;
+
+  /**
+   * Computes the grad of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with
+   * <tt>grads.size()==</tt>n() to get the point value of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  Tensor<1,dim> compute_grad (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Computes the second derivative (grad_grad) of the <tt>i</tt>th tensor
+   * product polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in
+   * tensor product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with
+   * <tt>grad_grads.size()==</tt>n() to get the point value of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  Tensor<2,dim> compute_grad_grad (const unsigned int i,
+                                   const Point<dim> &p) const;
+
+  /**
+   * Returns the number of tensor product polynomials plus the bubble
+   * enrichments. For <i>n</i> 1d polynomials this is <i>n<sup>dim</sup>+1</i>
+   * if the maximum degree of the polynomials is one and
+   * <i>n<sup>dim</sup>+dim</i> otherwise.
+   */
+  unsigned int n () const;
+};
+
+/** @} */
+
+
+/* ---------------- template and inline functions ---------- */
+
+#ifndef DOXYGEN
+
+template <int dim>
+template <class Pol>
+inline
+TensorProductPolynomialsBubbles<dim>::
+TensorProductPolynomialsBubbles(const std::vector<Pol> &pols)
+  :
+  TensorProductPolynomials<dim>(pols)
+{
+  const unsigned int q_degree = this->polynomials.size()-1;
+  const unsigned int n_bubbles = ((q_degree<=1)?1:dim);
+  // append index for renumbering
+  for (unsigned int i=0; i<n_bubbles; ++i)
+    {
+      this->index_map.push_back(i+this->n_tensor_pols);
+      this->index_map_inverse.push_back(i+this->n_tensor_pols);
+    }
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+TensorProductPolynomialsBubbles<dim>::n() const
+{
+  return this->n_tensor_pols+dim;
+}
+
+
+
+template <>
+inline
+unsigned int
+TensorProductPolynomialsBubbles<0>::n() const
+{
+  return numbers::invalid_unsigned_int;
+}
+
+template <int dim>
+template <int order>
+Tensor<order,dim>
+TensorProductPolynomialsBubbles<dim>::compute_derivative (const unsigned int i,
+                                                          const Point<dim> &p) const
+{
+  const unsigned int q_degree = this->polynomials.size()-1;
+  const unsigned int max_q_indices = this->n_tensor_pols;
+  const unsigned int n_bubbles = ((q_degree<=1)?1:dim);
+  (void)n_bubbles;
+  Assert (i<max_q_indices+n_bubbles, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_q_indices)
+    return this->TensorProductPolynomials<dim>::template compute_derivative<order>(i,p);
+
+  const unsigned int comp = i - this->n_tensor_pols;
+
+  Tensor<order,dim> derivative;
+  switch (order)
+    {
+    case 1:
+    {
+      Tensor<1,dim> &derivative_1 = *reinterpret_cast<Tensor<1,dim>*>(&derivative);
+
+      for (unsigned int d=0; d<dim ; ++d)
+        {
+          derivative_1[d] = 1.;
+          //compute grad(4*\prod_{i=1}^d (x_i(1-x_i)))(p)
+          for (unsigned j=0; j<dim; ++j)
+            derivative_1[d] *= (d==j ? 4*(1-2*p(j)) : 4*p(j)*(1-p(j)));
+          // and multiply with (2*x_i-1)^{r-1}
+          for (unsigned int i=0; i<q_degree-1; ++i)
+            derivative_1[d]*=2*p(comp)-1;
+        }
+
+      if (q_degree>=2)
+        {
+          //add \prod_{i=1}^d 4*(x_i(1-x_i))(p)
+          double value=1.;
+          for (unsigned int j=0; j < dim; ++j)
+            value*=4*p(j)*(1-p(j));
+          //and multiply with grad(2*x_i-1)^{r-1}
+          double tmp=value*2*(q_degree-1);
+          for (unsigned int i=0; i<q_degree-2; ++i)
+            tmp*=2*p(comp)-1;
+          derivative_1[comp]+=tmp;
+        }
+
+      return derivative;
+    }
+    case 2:
+    {
+      Tensor<2,dim> &derivative_2 = *reinterpret_cast<Tensor<2,dim>*>(&derivative);
+
+      double v [dim+1][3];
+      {
+        for (unsigned int c=0; c<dim; ++c)
+          {
+            v[c][0] = 4*p(c)*(1-p(c));
+            v[c][1] = 4*(1-2*p(c));
+            v[c][2] = -8;
+          }
+
+        double tmp=1.;
+        for (unsigned int i=0; i<q_degree-1; ++i)
+          tmp *= 2*p(comp)-1;
+        v[dim][0] = tmp;
+
+        if (q_degree>=2)
+          {
+            double tmp = 2*(q_degree-1);
+            for (unsigned int i=0; i<q_degree-2; ++i)
+              tmp *= 2*p(comp)-1;
+            v[dim][1] = tmp;
+          }
+        else
+          v[dim][1] = 0.;
+
+        if (q_degree>=3)
+          {
+            double tmp=4*(q_degree-2)*(q_degree-1);
+            for (unsigned int i=0; i<q_degree-3; ++i)
+              tmp *= 2*p(comp)-1;
+            v[dim][2] = tmp;
+          }
+        else
+          v[dim][2] = 0.;
+      }
+
+      //calculate (\partial_j \partial_k \psi) * monomial
+      Tensor<2,dim> grad_grad_1;
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          {
+            grad_grad_1[d1][d2] = v[dim][0];
+            for (unsigned int x=0; x<dim; ++x)
+              {
+                unsigned int derivative=0;
+                if (d1==x || d2==x)
+                  {
+                    if (d1==d2)
+                      derivative=2;
+                    else
+                      derivative=1;
+                  }
+                grad_grad_1[d1][d2] *= v[x][derivative];
+              }
+          }
+
+      //calculate (\partial_j  \psi) *(\partial_k monomial)
+      // and (\partial_k  \psi) *(\partial_j monomial)
+      Tensor<2,dim> grad_grad_2;
+      Tensor<2,dim> grad_grad_3;
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          grad_grad_2[d][comp] = v[dim][1];
+          grad_grad_3[comp][d] = v[dim][1];
+          for (unsigned int x=0; x<dim; ++x)
+            {
+              grad_grad_2[d][comp] *= v[x][d==x];
+              grad_grad_3[comp][d] *= v[x][d==x];
+            }
+        }
+
+      //calculate \psi *(\partial j \partial_k monomial) and sum
+      double psi_value = 1.;
+      for (unsigned int x=0; x<dim; ++x)
+        psi_value *= v[x][0];
+
+      for (unsigned int d1=0; d1<dim; ++d1)
+        for (unsigned int d2=0; d2<dim; ++d2)
+          derivative_2[d1][d2] = grad_grad_1[d1][d2]
+                                 +grad_grad_2[d1][d2]
+                                 +grad_grad_3[d1][d2];
+      derivative_2[comp][comp]+=psi_value*v[dim][2];
+
+      return derivative;
+    }
+    default:
+    {
+      Assert (false, ExcNotImplemented());
+      return derivative;
+    }
+    }
+}
+
+
+#endif // DOXYGEN
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/tensor_product_polynomials_const.h b/include/deal.II/base/tensor_product_polynomials_const.h
new file mode 100644
index 0000000..44039c6
--- /dev/null
+++ b/include/deal.II/base/tensor_product_polynomials_const.h
@@ -0,0 +1,214 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tensor_product_polynomials_const_h
+#define dealii__tensor_product_polynomials_const_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/utilities.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * @addtogroup Polynomials
+ * @{
+ */
+
+/**
+ * Tensor product of given polynomials and a locally constant function. This
+ * class inherits most of its functionality from TensorProductPolynomials. It
+ * works similarly to that class but adds a constant function for the last
+ * index.
+ *
+ * @author Timo Heister, 2012
+ */
+template <int dim>
+class TensorProductPolynomialsConst : public TensorProductPolynomials<dim>
+{
+public:
+  /**
+   * Access to the dimension of this object, for checking and automatic
+   * setting of dimension in other classes.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Constructor. <tt>pols</tt> is a vector of objects that should be derived
+   * or otherwise convertible to one-dimensional polynomial objects. It will
+   * be copied element by element into a private variable.
+   */
+  template <class Pol>
+  TensorProductPolynomialsConst (const std::vector<Pol> &pols);
+
+  /**
+   * Computes the value and the first and second derivatives of each tensor
+   * product polynomial at <tt>unit_point</tt>.
+   *
+   * The size of the vectors must either be equal 0 or equal n(). In the first
+   * case, the function will not compute these values.
+   *
+   * If you need values or derivatives of all tensor product polynomials then
+   * use this function, rather than using any of the compute_value(),
+   * compute_grad() or compute_grad_grad() functions, see below, in a loop
+   * over all tensor product polynomials.
+   */
+  void compute (const Point<dim>            &unit_point,
+                std::vector<double>         &values,
+                std::vector<Tensor<1,dim> > &grads,
+                std::vector<Tensor<2,dim> > &grad_grads,
+                std::vector<Tensor<3,dim> > &third_derivatives,
+                std::vector<Tensor<4,dim> > &fourth_derivatives) const;
+
+  /**
+   * Computes the value of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each point value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function with
+   * <tt>values.size()==</tt>n() to get the point values of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  double compute_value (const unsigned int i,
+                        const Point<dim> &p) const;
+
+  /**
+   * Computes the <tt>order</tt>th derivative of the <tt>i</tt>th tensor
+   * product polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in
+   * tensor product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with the
+   * size of the appropriate parameter set to n() to get the point value of
+   * all tensor polynomials all at once and in a much more efficient way.
+   *
+   * @tparam order The derivative order.
+   */
+  template <int order>
+  Tensor<order,dim> compute_derivative (const unsigned int i,
+                                        const Point<dim> &p) const;
+
+  /**
+   * Computes the grad of the <tt>i</tt>th tensor product polynomial at
+   * <tt>unit_point</tt>. Here <tt>i</tt> is given in tensor product
+   * numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with
+   * <tt>grads.size()==</tt>n() to get the point value of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  Tensor<1,dim> compute_grad (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Computes the second derivative (grad_grad) of the <tt>i</tt>th tensor
+   * product polynomial at <tt>unit_point</tt>. Here <tt>i</tt> is given in
+   * tensor product numbering.
+   *
+   * Note, that using this function within a loop over all tensor product
+   * polynomials is not efficient, because then each derivative value of the
+   * underlying (one-dimensional) polynomials is (unnecessarily) computed
+   * several times.  Instead use the compute() function, see above, with
+   * <tt>grad_grads.size()==</tt>n() to get the point value of all tensor
+   * polynomials all at once and in a much more efficient way.
+   */
+  Tensor<2,dim> compute_grad_grad (const unsigned int i,
+                                   const Point<dim> &p) const;
+
+  /**
+   * Returns the number of tensor product polynomials plus the constant
+   * function. For <i>n</i> 1d polynomials this is <i>n<sup>dim</sup>+1</i>.
+   */
+  unsigned int n () const;
+};
+
+/** @} */
+
+
+/* ---------------- template and inline functions ---------- */
+
+#ifndef DOXYGEN
+
+template <int dim>
+template <class Pol>
+inline
+TensorProductPolynomialsConst<dim>::
+TensorProductPolynomialsConst(const std::vector<Pol> &pols)
+  :
+  TensorProductPolynomials<dim>(pols)
+{
+  // append index for renumbering
+  this->index_map.push_back(this->n_tensor_pols);
+  this->index_map_inverse.push_back(this->n_tensor_pols);
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+TensorProductPolynomialsConst<dim>::n() const
+{
+  return this->n_tensor_pols+1;
+}
+
+
+
+template <>
+inline
+unsigned int
+TensorProductPolynomialsConst<0>::n() const
+{
+  return numbers::invalid_unsigned_int;
+}
+
+template <int dim>
+template <int order>
+Tensor<order,dim>
+TensorProductPolynomialsConst<dim>::compute_derivative (const unsigned int i,
+                                                        const Point<dim> &p) const
+{
+  const unsigned int max_indices = this->n_tensor_pols;
+  Assert (i<=max_indices, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_indices)
+    return this->TensorProductPolynomials<dim>::template compute_derivative<order>(i,p);
+  else
+    // this is for the constant function
+    return Tensor<order,dim>();
+}
+
+
+#endif // DOXYGEN
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/thread_local_storage.h b/include/deal.II/base/thread_local_storage.h
new file mode 100644
index 0000000..8dc290d
--- /dev/null
+++ b/include/deal.II/base/thread_local_storage.h
@@ -0,0 +1,283 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__thread_local_storage_h
+#define dealii__thread_local_storage_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_THREADS
+#  include <tbb/enumerable_thread_specific.h>
+#endif
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup threads */
+/*@{*/
+
+
+namespace Threads
+{
+  /**
+   * @brief A class that provides a separate storage location on each thread
+   * that accesses the object.
+   *
+   * This class offers ways so that every thread that accesses it has its own
+   * copy of an object of type T. In essence, accessing this object can never
+   * result in race conditions in multithreaded programs since no other thread
+   * than the current one can ever access it.
+   *
+   * The class builds on the Threading Building Blocks's
+   * tbb::enumerable_thread_specific class but wraps it in such a way that
+   * this class can also be used when deal.II is configured not to use threads
+   * at all -- in that case, this class simply stores a single copy of an
+   * object of type T.
+   *
+   * <h3>Construction and destruction</h3>
+   *
+   * Objects of this class can either be default constructed or by providing
+   * an "exemplar", i.e. an object of type T so that every time we need to
+   * create a T on a thread that doesn't already have such an object, it is
+   * copied from the exemplar.
+   *
+   * Upon destruction of objects of this class, all T objects that correspond
+   * to threads that have accessed this object are destroyed. Note that this
+   * may be before the time when a thread is terminated.
+   *
+   * <h3>Access</h3>
+   *
+   * The T object stored by this object can be accessed using the get()
+   * function. It provides a reference to a unique object when accessed from
+   * different threads. Objects of type T are created lazily, i.e. they are
+   * only created whenever a thread actually calls get().
+   */
+  template <typename T>
+  class ThreadLocalStorage
+  {
+  public:
+    /**
+     * Default constructor. Initialize each thread local object using its
+     * default constructor.
+     */
+    ThreadLocalStorage ();
+
+    /**
+     * A kind of copy constructor. Initialize each thread local object by
+     * copying the given object.
+     */
+    explicit ThreadLocalStorage (const T &t);
+
+    /**
+     * Copy constructor. Initialize each thread local object with the
+     * corresponding object of the given object.
+     */
+    ThreadLocalStorage (const ThreadLocalStorage<T> &t);
+
+    /**
+     * Return a reference to the data stored by this object for the current
+     * thread this function is called on.
+     *
+     * Note that there is no member function get() that is const and returns a
+     * const reference as one would expect. The reason is that if such a
+     * member function were called on a thread for which no thread-local
+     * object has been created yet, then one has to create such an object
+     * first which would certainly be a non-constant operation. If you need to
+     * call the get() function for a member variable of a class from a const
+     * member function, then you need to declare the member variable
+     * <code>mutable</code> to allow such access.
+     */
+    T &get ();
+
+    /**
+     * Same as above, except that @p exists is set to true if an element was
+     * already present for the current thread; false otherwise.
+     */
+    T &get (bool &exists);
+
+    /**
+     * Conversion operator that simply converts the thread-local object to the
+     * data type that it stores. This function is equivalent to calling the
+     * get() member function; it's purpose is to make the TLS object look more
+     * like the object it is storing.
+     */
+    operator T &();
+
+    /**
+     * Copy the given argument into the storage space used to represent the
+     * current thread. Calling this function as <code>tls_data = object</code>
+     * is equivalent to calling <code>tls_data.get() = object</code>. The
+     * intent of this operator is to make the ThreadLocalStorage object look
+     * more like the object it represents on the current thread.
+     *
+     * @param t The object to be copied into the storage space used for the
+     * current thread.
+     *
+     * @return The current object, after the changes have been made
+     */
+    ThreadLocalStorage<T> &operator = (const T &t);
+
+    /**
+     * Remove the thread-local objects stored for all threads that have
+     * created one with this object (i.e., that have called get() at least
+     * once on this thread. This includes the current thread. If you call
+     * get() subsequently on this or any other thread, new objects will again
+     * be created.
+     *
+     * If deal.II has been configured to not use multithreading, then this
+     * function does not do anything at all. Note that this of course has
+     * different semantics as in the multithreading context the objects are
+     * deleted and created again (possible by copying from a sample object, if
+     * the appropriate constructor of this class was called), whereas in the
+     * multithreaded context the object is simply not touched at all. At the
+     * same time, the purpose of this function is to release memory other
+     * threads may have allocated for their own thread local objects after
+     * which every use of this object will require some kind of
+     * initialization. This is necessary both in the multithreaded or non-
+     * multithreaded case.
+     */
+    void clear ();
+
+    /**
+     * Returns a reference to the internal Threading Building Blocks
+     * implementation. This function is really only useful if deal.II has been
+     * configured with multithreading and has no useful purpose otherwise.
+     */
+#ifdef DEAL_II_WITH_THREADS
+    tbb::enumerable_thread_specific<T> &
+#else
+    T &
+#endif
+    get_implementation();
+
+  private:
+#ifdef DEAL_II_WITH_THREADS
+    /**
+     * The data element we store. If we support threads, then this object will
+     * be of a type that provides a separate object for each thread.
+     * Otherwise, it is simply a single object of type T.
+     */
+    tbb::enumerable_thread_specific<T> data;
+#else
+    T data;
+#endif
+  };
+
+// ----------------- inline and template functions ----------------------------
+
+  template <typename T>
+  inline
+  ThreadLocalStorage<T>::ThreadLocalStorage()
+  {}
+
+
+  template <typename T>
+  inline
+  ThreadLocalStorage<T>::ThreadLocalStorage(const T &t)
+    :
+    data (t)
+  {}
+
+
+  template <typename T>
+  inline
+  ThreadLocalStorage<T>::ThreadLocalStorage(const ThreadLocalStorage<T> &t)
+    :
+    data (t)
+  {}
+
+
+  template <typename T>
+  inline
+  T &
+  ThreadLocalStorage<T>::get ()
+  {
+#ifdef DEAL_II_WITH_THREADS
+    return data.local();
+#else
+    return data;
+#endif
+  }
+
+
+  template <typename T>
+  inline
+  T &
+  ThreadLocalStorage<T>::get (bool &exists)
+  {
+#ifdef DEAL_II_WITH_THREADS
+    return data.local(exists);
+#else
+    exists = true;
+    return data;
+#endif
+  }
+
+
+  template <typename T>
+  inline
+  ThreadLocalStorage<T>::operator T &()
+  {
+    return get();
+  }
+
+
+  template <typename T>
+  inline
+  ThreadLocalStorage<T> &
+  ThreadLocalStorage<T>::operator = (const T &t)
+  {
+    get() = t;
+    return *this;
+  }
+
+
+  template <typename T>
+  inline
+#ifdef DEAL_II_WITH_THREADS
+  tbb::enumerable_thread_specific<T> &
+#else
+  T &
+#endif
+  ThreadLocalStorage<T>::get_implementation()
+  {
+    return data;
+  }
+
+
+
+  template <typename T>
+  inline
+  void
+  ThreadLocalStorage<T>::clear ()
+  {
+#ifdef DEAL_II_WITH_THREADS
+    data.clear ();
+#endif
+  }
+}   // end of implementation of namespace Threads
+
+/**
+ * @}
+ */
+
+
+//---------------------------------------------------------------------------
+DEAL_II_NAMESPACE_CLOSE
+// end of #ifndef dealii__thread_local_storage_h
+#endif
+//---------------------------------------------------------------------------
diff --git a/include/deal.II/base/thread_management.h b/include/deal.II/base/thread_management.h
new file mode 100644
index 0000000..66060f3
--- /dev/null
+++ b/include/deal.II/base/thread_management.h
@@ -0,0 +1,4030 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__thread_management_h
+#define dealii__thread_management_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/std_cxx11/tuple.h>
+#include <deal.II/base/std_cxx11/function.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/base/std_cxx11/bind.h>
+
+#ifdef DEAL_II_WITH_THREADS
+#  include <deal.II/base/std_cxx11/thread.h>
+#  include <deal.II/base/std_cxx11/mutex.h>
+#  include <deal.II/base/std_cxx11/condition_variable.h>
+#endif
+
+#include <iterator>
+#include <vector>
+#include <list>
+#include <utility>
+
+
+#ifdef DEAL_II_WITH_THREADS
+#  ifdef DEAL_II_USE_MT_POSIX
+#    include <pthread.h>
+#  endif
+#  include <tbb/task.h>
+#endif
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup threads */
+/*@{*/
+
+
+/**
+ * A namespace for the implementation of thread management in deal.II. Most of
+ * the content of this namespace is discussed in detail in one of the reports
+ * linked to from the documentation page of deal.II.
+ *
+ * @ingroup threads
+ */
+namespace Threads
+{
+  /**
+   * This class is used instead of a true lock class when not using
+   * multithreading. It allows to write programs such that they start new
+   * threads and/or lock objects in multithreading mode, and use dummy thread
+   * management and synchronization classes instead when running in single-
+   * thread mode. Specifically, the new_thread() functions only call the
+   * function but wait for it to return instead of running in on another
+   * thread, and the mutexes do nothing really. The only reason to provide
+   * such a function is that the program can be compiled both in MT and non-MT
+   * mode without difference.
+   *
+   * @author Wolfgang Bangerth, 2000, 2003
+   */
+  class DummyThreadMutex
+  {
+  public:
+    /**
+     * Scoped lock class. When you declare an object of this type, you have to
+     * pass it a mutex, which is locked in the constructor of this class and
+     * unlocked in the destructor. The lock is thus held during the entire
+     * lifetime of this object, i.e. until the end of the present scope, which
+     * explains where the name comes from. This pattern of using locks with
+     * mutexes follows the resource-acquisition-is-initialization pattern, and
+     * was used first for mutexes by Doug Schmidt. It has the advantage that
+     * locking a mutex this way is thread-safe, i.e. when an exception is
+     * thrown between the locking and unlocking point, the destructor makes
+     * sure that the mutex is unlocked; this would not automatically be the
+     * case when you lock and unlock the mutex "by hand", i.e. using
+     * Mutex::acquire() and Mutex::release().
+     */
+    class ScopedLock
+    {
+    public:
+      /**
+       * Constructor. Lock the mutex. Since this is a dummy mutex class, this
+       * of course does nothing.
+       */
+      ScopedLock (DummyThreadMutex &) {}
+
+      /**
+       * Destructor. Unlock the mutex. Since this is a dummy mutex class, this
+       * of course does nothing.
+       */
+      ~ScopedLock () {}
+    };
+
+    /**
+     * Simulate acquisition of the mutex. As this class does nothing really,
+     * this function does nothing as well.
+     */
+    inline void acquire () const {}
+
+    /**
+     * Simulate release of the mutex. As this class does nothing really, this
+     * function does nothing as well.
+     */
+    inline void release () const {}
+  };
+
+
+
+  /**
+   * This class is used in single threaded mode instead of a class
+   * implementing real condition variable semantics. It allows to write
+   * programs such that they start new threads and/or lock objects in
+   * multithreading mode, and use dummy thread management and synchronization
+   * classes instead when running in single-thread mode. Specifically, the
+   * new_thread() functions only call the function but wait for it to return
+   * instead of running in on another thread, and the mutexes do nothing
+   * really. The only reason to provide such a function is that the program
+   * can be compiled both in MT and non-MT mode without difference.
+   *
+   * In this particular case, just as with mutexes, the functions do nothing,
+   * and by this provide the same semantics of condition variables as in
+   * multi-threaded mode.
+   *
+   * @author Wolfgang Bangerth, 2003
+   */
+  class DummyThreadCondition
+  {
+  public:
+    /**
+     * Signal to a single listener that a condition has been met, i.e. that
+     * some data will now be available. Since in single threaded mode, this
+     * function of course does nothing.
+     */
+    inline void signal () const {}
+
+    /**
+     * Signal to multiple listener that a condition has been met, i.e. that
+     * some data will now be available. Since in single threaded mode, this
+     * function of course does nothing.
+     */
+    inline void broadcast () const {}
+
+    /**
+     * Wait for the condition to be signalled. Signal variables need to be
+     * guarded by a mutex which needs to be given to this function as an
+     * argument, see the man page of <code>posix_cond_wait</code> for a
+     * description of the mechanisms. Since in single threaded mode, this
+     * function of course does nothing, but returns immediately.
+     */
+    inline void wait (DummyThreadMutex &) const {}
+  };
+
+
+
+  /**
+   * This class is used instead of a true barrier class when not using
+   * multithreading. It allows to write programs such that they use the same
+   * class names in multithreading and non-MT mode and thus may be compiled
+   * with or without thread-support without the need to use conditional
+   * compilation. Since a barrier class only makes sense in non-multithread
+   * mode if only one thread is to be synchronised (otherwise, the barrier
+   * could not be left, since the one thread is waiting for some other part of
+   * the program to reach a certain point of execution), the constructor of
+   * this class throws an exception if the <code>count</code> argument
+   * denoting the number of threads that need to be synchronized is not equal
+   * to one.
+   *
+   * @author Wolfgang Bangerth, 2001
+   */
+  class DummyBarrier
+  {
+  public:
+    /**
+     * Constructor. Since barriers are only useful in single-threaded mode if
+     * the number of threads to be synchronised is one, this constructor
+     * raises an exception if the <code>count</code> argument is one.
+     */
+    DummyBarrier (const unsigned int  count,
+                  const char         *name = 0,
+                  void               *arg  = 0);
+
+    /**
+     * Wait for all threads to reach this point. Since there may only be one
+     * thread, return immediately, i.e. this function is a no-op.
+     */
+    int wait () const
+    {
+      return 0;
+    }
+
+    /**
+     * Dump the state of this object. Here: do nothing.
+     */
+    void dump () const {}
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception.
+     */
+    DeclException1 (ExcBarrierSizeNotUseful,
+                    int,
+                    << "In single-thread mode, barrier sizes other than 1 are not "
+                    << "useful. You gave " << arg1 << ".");
+
+    //@}
+  };
+
+
+#ifdef DEAL_II_WITH_THREADS
+
+  /**
+   * Class implementing a Mutex. Mutexes are used to lock data structures to
+   * ensure that only a single thread of execution can access them at the same
+   * time.
+   *
+   * <h3>Copy semantics</h3>
+   *
+   * When copied, the receiving object does not receive any state from the
+   * object being copied, i.e. an entirely new mutex is created. This is
+   * consistent with expectations if a mutex is used as a member variable to
+   * lock the other member variables of a class: in that case, the mutex of
+   * the copied-to object should only guard the members of the copied-to
+   * object, not the members of both the copied-to and copied-from object.
+   *
+   * @author Wolfgang Bangerth, 2002, 2003, 2009
+   */
+  class Mutex
+  {
+  public:
+    /**
+     * Scoped lock class. When you declare an object of this type, you have to
+     * pass it a mutex, which is locked in the constructor of this class and
+     * unlocked in the destructor. The lock is thus held during the entire
+     * lifetime of this object, i.e. until the end of the present scope, which
+     * explains where the name comes from. This pattern of using locks with
+     * mutexes follows the resource-acquisition-is-initialization pattern, and
+     * was used first for mutexes by Doug Schmidt. It has the advantage that
+     * locking a mutex this way is thread-safe, i.e. when an exception is
+     * thrown between the locking and unlocking point, the destructor makes
+     * sure that the mutex is unlocked; this would not automatically be the
+     * case when you lock and unlock the mutex "by hand", i.e. using
+     * Mutex::acquire() and Mutex::release().
+     */
+    class ScopedLock
+    {
+    public:
+      /**
+       * Constructor. Lock the mutex.
+       */
+      ScopedLock (Mutex &m) : mutex(m)
+      {
+        mutex.acquire();
+      }
+
+      /**
+       * Destructor. Unlock the mutex. Since this is a dummy mutex class, this
+       * of course does nothing.
+       */
+      ~ScopedLock ()
+      {
+        mutex.release ();
+      }
+
+    private:
+      /**
+       * Store the address of the mutex object.
+       */
+      Mutex &mutex;
+    };
+
+    /**
+     * Default constructor.
+     */
+    Mutex ()
+    {}
+
+    /**
+     * Copy constructor. As discussed in this class's documentation, no state
+     * is copied from the object given as argument.
+     */
+    Mutex (const Mutex &)
+      :
+      mutex()
+    {}
+
+
+    /**
+     * Acquire a mutex.
+     */
+    inline void acquire ()
+    {
+      mutex.lock();
+    }
+
+    /**
+     * Release the mutex again.
+     */
+    inline void release ()
+    {
+      mutex.unlock();
+    }
+
+  private:
+    /**
+     * Data object storing the mutex data
+     */
+    std_cxx11::mutex mutex;
+
+    /**
+     * Make the class implementing condition variables a friend, since it
+     * needs to access the mutex.
+     */
+    friend class ConditionVariable;
+  };
+
+
+  /**
+   * Class implementing a condition variable. The semantics of this class and
+   * its member functions are the same as those of the POSIX functions.
+   *
+   * @author Wolfgang Bangerth, 2003
+   */
+  class ConditionVariable
+  {
+  public:
+    /**
+     * Signal to a single listener that a condition has been met, i.e. that
+     * some data will now be available.
+     */
+    inline void signal ()
+    {
+      condition_variable.notify_one();
+    }
+
+    /**
+     * Signal to multiple listener that a condition has been met, i.e. that
+     * some data will now be available.
+     */
+    inline void broadcast ()
+    {
+      condition_variable.notify_all();
+    }
+
+    /**
+     * Wait for the condition to be signalled. Signal variables need to be
+     * guarded by a mutex which needs to be given to this function as an
+     * argument, see the man page of <code>pthread_cond_wait</code> for a
+     * description of the mechanisms.
+     *
+     * The mutex is assumed held at the entry to this function but is released
+     * upon exit.
+     */
+    inline void wait (Mutex &mutex)
+    {
+      std_cxx11::unique_lock<std_cxx11::mutex> lock(mutex.mutex,
+                                                    std_cxx11::adopt_lock);
+      condition_variable.wait (lock);
+    }
+
+  private:
+    /**
+     * Data object storing the necessary data.
+     */
+    std_cxx11::condition_variable condition_variable;
+  };
+
+
+  /**
+   * Implementation of a thread barrier class, based on the POSIX thread
+   * functions. POSIX barriers are a relatively new feature and are not
+   * supported on all systems.
+   *
+   * If the configuration detected the absence of these functions, then
+   * barriers will not be available, and creating objects of this class will
+   * result in an exception been thrown unless the count given for the parties
+   * waiting for the barrier is equal to one (as in this case waiting for the
+   * barrier is a no-operation, and we can dispense with the POSIX functions
+   * at all). The rest of the threading functionality will be available in its
+   * full extent, though, even if POSIX barriers are not available.
+   *
+   * @author Wolfgang Bangerth, 2002
+   */
+  class PosixThreadBarrier
+  {
+  public:
+    /**
+     * Constructor. Initialize the underlying POSIX barrier data structure.
+     */
+    PosixThreadBarrier (const unsigned int  count,
+                        const char         *name = 0,
+                        void               *arg  = 0);
+
+    /**
+     * Destructor. Release all resources.
+     */
+    ~PosixThreadBarrier ();
+
+    /**
+     * Wait for all threads to reach this point. The return value is zero for
+     * all participating threads except for one, for which the return value is
+     * some non-zero value. The operating system picks the special thread by
+     * some not further known method.
+     */
+    int wait ();
+
+  private:
+    /**
+     * Data object storing the POSIX data which we need to call the POSIX
+     * functions.
+     */
+#ifndef DEAL_II_USE_MT_POSIX_NO_BARRIERS
+    pthread_barrier_t barrier;
+#else
+    unsigned int count;
+#endif
+  };
+
+
+  /**
+   * If using POSIX functions, then alias the POSIX wrapper classes to the
+   * names we use throughout the library.
+   */
+  typedef PosixThreadBarrier Barrier;
+
+#else
+  /**
+   * In non-multithread mode, the mutex and thread management classes are
+   * aliased to dummy classes that actually do nothing, in particular not lock
+   * objects. Likewise for the barrier class.
+   */
+  typedef DummyThreadMutex     Mutex;
+
+  /**
+   * In non-multithread mode, the mutex and thread management classes are
+   * aliased to dummy classes that actually do nothing, in particular not lock
+   * objects. Likewise for the barrier class.
+   */
+  typedef DummyThreadCondition ConditionVariable;
+
+  /**
+   * In non-multithread mode, the mutex and thread management classes are
+   * aliased to dummy classes that actually do nothing, in particular not lock
+   * objects. Likewise for the barrier class.
+   */
+  typedef DummyBarrier         Barrier;
+#endif
+
+}
+
+
+namespace Threads
+{
+
+  /**
+   * Return the number of presently existing threads. This function may be
+   * useful in a situation where a large number of threads are concurrently,
+   * and you want to decide whether creation of another thread is reasonable
+   * or whether running the respective operation sequentially is more useful
+   * since already many more threads than processors are running.
+   *
+   * Note that the function returns the total number of threads, not those
+   * actually running. Some of the threads may be waiting for locks and
+   * mutexes, or may be sleeping until they are delivered with data to work
+   * on.
+   *
+   * Upon program start, this number is one. It is increased each time a
+   * thread is created using the Threads::new_thread function. It is decreased
+   * once a thread terminates by returning from the function that was spawned.
+   *
+   * Note that this means that only threads created and terminated through the
+   * interfaces of this namespace are taken care of. If threads are created by
+   * directly calling the respective functions of the operating system (e.g.
+   * <code>pthread_create</code> for the POSIX thread interface), or if they
+   * are killed (e.g. either through <code>pthread_exit</code> from the
+   * spawned thread, or <code>pthread_kill</code> from another thread), then
+   * these events are not registered and counted for the result of this
+   * function. Likewise, threads that the Threading Building Blocks library
+   * may have created to work on tasks created using the Threads::new_task
+   * functions are not counted.
+   *
+   * @ingroup threads
+   */
+  unsigned int n_existing_threads ();
+
+  /**
+   * Return a number used as id of this thread. This number is generated using
+   * the system call <code>getpid</code>, or, if it exists
+   * <code>gettid</code>. The result of either is converted to an integer and
+   * returned by this function.
+   *
+   * @todo As of now, none of our systems seems to support
+   * <code>gettid</code>, so that part of the code is untested yet.
+   *
+   * @ingroup threads
+   */
+  unsigned int this_thread_id ();
+
+  /**
+   * Split the range <code>[begin,end)</code> into <code>n_intervals</code>
+   * subintervals of equal size. The last interval will be a little bit
+   * larger, if the number of elements in the whole range is not exactly
+   * divisible by <code>n_intervals</code>. The type of the iterators has to
+   * fulfill the requirements of a forward iterator, i.e.
+   * <code>operator++</code> must be available, and of course it must be
+   * assignable.
+   *
+   * A list of subintervals is returned as a vector of pairs of iterators,
+   * where each pair denotes the range <code>[begin[i],end[i])</code>.
+   *
+   * @ingroup threads
+   */
+  template <typename ForwardIterator>
+  std::vector<std::pair<ForwardIterator,ForwardIterator> >
+  split_range (const ForwardIterator &begin,
+               const ForwardIterator &end,
+               const unsigned int n_intervals);
+
+  /**
+   * Split the interval <code>[begin,end)</code> into subintervals of (almost)
+   * equal size. This function works mostly as the one before, with the
+   * difference that instead of iterators, now values are taken that define
+   * the whole interval.
+   *
+   * @ingroup threads
+   */
+  std::vector<std::pair<unsigned int,unsigned int> >
+  split_interval (const unsigned int begin,
+                  const unsigned int end,
+                  const unsigned int n_intervals);
+
+  /**
+   * @cond internal
+   */
+
+  /**
+   * A namespace in which helper functions and the like for the threading
+   * subsystem are implemented. The members of this namespace are not meant
+   * for public use.
+   *
+   * @author Wolfgang Bangerth, 2003
+   */
+  namespace internal
+  {
+    /**
+     * @internal
+     *
+     * If in a sub-thread an exception is thrown, it is not propagated to the
+     * main thread. Therefore, the exception handler that is provided by the
+     * applications main function or some of its other parts will not be able
+     * to catch these exceptions. Therefore, we have to provide an exception
+     * handler in the top function of each sub-thread that at least catches
+     * the exception and prints some information, rather than letting the
+     * operating system to just kill the program without a message. In each of
+     * the functions we use as entry points to new threads, we therefore
+     * install a try-catch block, and if an exception of type
+     * <code>std::exception</code> is caught, it passes over control to this
+     * function, which will then provide some output.
+     */
+    void handle_std_exception (const std::exception &exc);
+
+    /**
+     * @internal
+     *
+     * Same as above, but the type of the exception is not derived from
+     * <code>std::exception</code>, so there is little way to provide
+     * something more useful.
+     */
+    void handle_unknown_exception ();
+
+    /**
+     * @internal
+     *
+     * The following function is used for internal bookkeeping of the number
+     * of existing threads. It is not thought for use in application programs,
+     * but only for use in the template functions below.
+     */
+    void register_thread ();
+
+    /**
+     * @internal
+     *
+     * The following function is used for internal bookkeeping of the number
+     * of existing threads. It is not thought for use in application programs,
+     * but only for use in the template functions below.
+     */
+    void deregister_thread ();
+  }
+
+  /**
+   * @endcond
+   */
+
+}   // end declarations of namespace Threads
+
+/* ----------- implementation of functions in namespace Threads ---------- */
+#ifndef DOXYGEN
+namespace Threads
+{
+  template <typename ForwardIterator>
+  std::vector<std::pair<ForwardIterator,ForwardIterator> >
+  split_range (const ForwardIterator &begin,
+               const ForwardIterator &end,
+               const unsigned int     n_intervals)
+  {
+    typedef std::pair<ForwardIterator,ForwardIterator> IteratorPair;
+
+    // in non-multithreaded mode, we often have the case that this
+    // function is called with n_intervals==1, so have a shortcut here
+    // to handle that case efficiently
+
+    if (n_intervals==1)
+      return (std::vector<IteratorPair>
+              (1, IteratorPair(begin, end)));
+
+    // if more than one interval requested, do the full work
+    const unsigned int n_elements              = std::distance (begin, end);
+    const unsigned int n_elements_per_interval = n_elements / n_intervals;
+    const unsigned int residual                = n_elements % n_intervals;
+
+    std::vector<IteratorPair> return_values (n_intervals);
+
+    return_values[0].first = begin;
+    for (unsigned int i=0; i<n_intervals; ++i)
+      {
+        if (i != n_intervals-1)
+          {
+            return_values[i].second = return_values[i].first;
+            // note: the cast is performed to avoid a warning of gcc
+            // that in the library `dist>=0' is checked (dist has a
+            // template type, which here is unsigned if no cast is
+            // performed)
+            std::advance (return_values[i].second,
+                          static_cast<signed int>(n_elements_per_interval));
+            // distribute residual in division equally among the first
+            // few subintervals
+            if (i < residual)
+              ++return_values[i].second;
+
+            return_values[i+1].first = return_values[i].second;
+          }
+        else
+          return_values[i].second = end;
+      }
+    return return_values;
+  }
+}
+
+#endif // DOXYGEN
+
+namespace Threads
+{
+  namespace internal
+  {
+    /**
+     * @internal
+     *
+     * Given an arbitrary type RT, store an element of it and grant access to
+     * it through functions get() and set(). There are specializations for
+     * reference types (which cannot be set), and for type void.
+     */
+    template <typename RT> struct return_value
+    {
+    private:
+      RT value;
+    public:
+      inline return_value () : value() {}
+
+      inline RT get () const
+      {
+        return value;
+      }
+
+      inline void set (RT v)
+      {
+        value = v;
+      }
+    };
+
+
+    /**
+     * @internal
+     *
+     * Given an arbitrary type RT, store an element of it and grant access to
+     * it through functions get() and set(). This is the specialization for
+     * reference types: since they cannot be set after construction time, we
+     * store a pointer instead, that holds the address of the object being
+     * referenced.
+     */
+    template <typename RT> struct return_value<RT &>
+    {
+    private:
+      RT *value;
+    public:
+      inline return_value () : value(0) {}
+
+      inline RT &get () const
+      {
+        return *value;
+      }
+      inline void set (RT &v)
+      {
+        value = &v;
+      }
+    };
+
+
+    /**
+     * @internal
+     *
+     * Given an arbitrary type RT, store an element of it and grant access to
+     * it through functions get() and set(). This is the specialization for
+     * type void: there is obviously nothing to store, so no function set(),
+     * and a function get() that returns void.
+     */
+    template <> struct return_value<void>
+    {
+      static inline void get () {}
+    };
+  }
+
+
+
+  namespace internal
+  {
+    template <typename RT>
+    inline void call (const std_cxx11::function<RT ()> &function,
+                      internal::return_value<RT> &ret_val)
+    {
+      ret_val.set (function());
+    }
+
+
+    inline void call (const std_cxx11::function<void ()> &function,
+                      internal::return_value<void> &)
+    {
+      function();
+    }
+  }
+
+
+
+  namespace internal
+  {
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments, and whether the second argument is a const or non-const
+     * class, depending on which the member function will also me const or
+     * non-const. There are specializations of this class for each number of
+     * arguments.
+     */
+    template <typename RT, typename ArgList,
+              int length = std_cxx11::tuple_size<ArgList>::value>
+    struct fun_ptr_helper;
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 0 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 0>
+    {
+      typedef RT (type) ();
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 1 argument.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 1>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 2 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 2>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 3 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 3>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 4 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 4>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type,
+                         typename std_cxx11::tuple_element<3,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 5 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 5>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type,
+                         typename std_cxx11::tuple_element<3,ArgList>::type,
+                         typename std_cxx11::tuple_element<4,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 6 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 6>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type,
+                         typename std_cxx11::tuple_element<3,ArgList>::type,
+                         typename std_cxx11::tuple_element<4,ArgList>::type,
+                         typename std_cxx11::tuple_element<5,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 7 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 7>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type,
+                         typename std_cxx11::tuple_element<3,ArgList>::type,
+                         typename std_cxx11::tuple_element<4,ArgList>::type,
+                         typename std_cxx11::tuple_element<5,ArgList>::type,
+                         typename std_cxx11::tuple_element<6,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 8 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 8>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type,
+                         typename std_cxx11::tuple_element<3,ArgList>::type,
+                         typename std_cxx11::tuple_element<4,ArgList>::type,
+                         typename std_cxx11::tuple_element<5,ArgList>::type,
+                         typename std_cxx11::tuple_element<6,ArgList>::type,
+                         typename std_cxx11::tuple_element<7,ArgList>::type);
+    };
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 9 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 9>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type,
+                         typename std_cxx11::tuple_element<3,ArgList>::type,
+                         typename std_cxx11::tuple_element<4,ArgList>::type,
+                         typename std_cxx11::tuple_element<5,ArgList>::type,
+                         typename std_cxx11::tuple_element<6,ArgList>::type,
+                         typename std_cxx11::tuple_element<7,ArgList>::type,
+                         typename std_cxx11::tuple_element<8,ArgList>::type);
+    };
+
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. This is the specialization for 10 arguments.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr_helper<RT, ArgList, 10>
+    {
+      typedef RT (type) (typename std_cxx11::tuple_element<0,ArgList>::type,
+                         typename std_cxx11::tuple_element<1,ArgList>::type,
+                         typename std_cxx11::tuple_element<2,ArgList>::type,
+                         typename std_cxx11::tuple_element<3,ArgList>::type,
+                         typename std_cxx11::tuple_element<4,ArgList>::type,
+                         typename std_cxx11::tuple_element<5,ArgList>::type,
+                         typename std_cxx11::tuple_element<6,ArgList>::type,
+                         typename std_cxx11::tuple_element<7,ArgList>::type,
+                         typename std_cxx11::tuple_element<8,ArgList>::type,
+                         typename std_cxx11::tuple_element<9,ArgList>::type);
+    };
+
+
+
+    /**
+     * @internal
+     *
+     * Construct a pointer to non-member function based on the template
+     * arguments. We do this by dispatching to the fun_ptr_helper classes that
+     * are overloaded on the number of elements.
+     *
+     * Note that the last template argument for the fun_ptr_helper class is
+     * automatically computed in the default argument to the general template.
+     */
+    template <typename RT, typename ArgList>
+    struct fun_ptr
+    {
+      typedef typename fun_ptr_helper<RT,ArgList>::type type;
+    };
+  }
+
+
+
+  namespace internal
+  {
+#ifdef DEAL_II_WITH_THREADS
+
+    /**
+     * A class that represents threads. For each thread, we create exactly one
+     * of these objects -- exactly one because it carries the returned value
+     * of the function called on the thread.
+     *
+     * While we have only one of these objects per thread, several
+     * Threads::Thread objects may refer to this descriptor. If all Thread
+     * objects go out of scope the ThreadDescriptor will detach from the
+     * thread before being destructed.
+     */
+    template <typename RT>
+    struct ThreadDescriptor
+    {
+      /**
+       * An object that represents the thread started.
+       */
+      std_cxx11::thread thread;
+
+      /**
+       * An object that will hold the value returned by the function called on
+       * the thread.
+       *
+       * The return value is stored in a shared_ptr because we might abandon
+       * the the ThreadDescriptor.  This makes sure the object stays alive
+       * until the thread exits.
+       */
+      std_cxx11::shared_ptr<return_value<RT> > ret_val;
+
+      /**
+       * A bool variable that is initially false, is set to true when a new
+       * thread is started, and is set back to false once join() has been
+       * called.
+       *
+       * We use this variable to make sure we can call join() twice on the
+       * same thread. For some reason, the C++ standard library throws a
+       * std::system_error exception if one tries to call std::thread::join
+       * twice (and in fact, before the second call, std::thread::joinable
+       * returns false) but this is a somewhat desirable thing to do because
+       * one doesn't have to keep track whether join() has been called before.
+       * Using this variable, whenever we have called join() before, the
+       * variable is set to true and we can skip over calling
+       * std::thread::join() a second time. Access to this variable is guarded
+       * by the following mutex.
+       *
+       * @note Historically, we did not need the mutex for this variable:
+       * threads can only be joined from the thread that created it
+       * originally. Consequently, everything that happens in a function that
+       * does not create threads (such as the join() function below) looks
+       * atomic to the outside world. Since we clear and test thread_is_active
+       * in the same function as we call std::thread::join, these actions are
+       * atomic and need no mutex. Of course, two threads may call join() on
+       * the same thread object at the same time, but this action is undefined
+       * anyway since they can not both join the same thread. That said, more
+       * recent C++ standards do not appear to have the requirement any more
+       * that the only thread that can call join() is the one that created the
+       * thread. Neither does pthread_join appear to have this requirement any
+       * more.  Consequently, we can in fact join from different threads and
+       * we test this in base/thread_validity_07.
+       */
+      bool thread_is_active;
+
+      /**
+       * Mutex guarding access to the previous variable.
+       */
+      Mutex thread_is_active_mutex;
+
+      /**
+       * Default constructor.
+       */
+      ThreadDescriptor ()
+        :
+        thread_is_active (false)
+      {}
+
+      ~ThreadDescriptor ()
+      {
+        if (!thread_is_active)
+          return;
+        thread.detach();
+        thread_is_active = false;
+      }
+
+      /**
+       * Start the thread and let it put its return value into the ret_val
+       * object.
+       */
+      void start (const std_cxx11::function<RT ()> &function)
+      {
+        thread_is_active = true;
+        ret_val.reset(new return_value<RT>());
+        thread = std_cxx11::thread (thread_entry_point, function, ret_val);
+      }
+
+
+      /**
+       * Wait for the thread to end.
+       */
+      void join ()
+      {
+        // see if the thread hasn't been joined yet. if it has, then
+        // join() is a no-op. use schmidt's double-checking strategy
+        // to use the mutex only when necessary
+        if (thread_is_active == false)
+          return;
+
+        Mutex::ScopedLock lock (thread_is_active_mutex);
+        if (thread_is_active == true)
+          {
+            Assert (thread.joinable(), ExcInternalError());
+            thread.join ();
+            thread_is_active = false;
+          }
+      }
+
+    private:
+
+      /**
+       * The function that runs on the thread.
+       */
+      static
+      void thread_entry_point (const std_cxx11::function<RT ()> function,
+                               std_cxx11::shared_ptr<return_value<RT> > ret_val)
+      {
+        // call the function in question. since an exception that is
+        // thrown from one of the called functions will not propagate
+        // to the main thread, it will kill the program if not treated
+        // here before we return to the operating system's thread
+        // library
+        internal::register_thread ();
+        try
+          {
+            call (function, *ret_val);
+          }
+        catch (const std::exception &exc)
+          {
+            internal::handle_std_exception (exc);
+          }
+        catch (...)
+          {
+            internal::handle_unknown_exception ();
+          }
+        internal::deregister_thread ();
+      }
+    };
+
+#else
+    /**
+     * A class that represents threads. For each thread, we create exactly one
+     * of these objects -- exactly one because it carries the returned value
+     * of the function called on the thread.
+     *
+     * While we have only one of these objects per thread, several
+     * Threads::Thread objects may refer to this descriptor.
+     */
+    template <typename RT>
+    struct ThreadDescriptor
+    {
+      /**
+       * An object that will hold the value returned by the function called on
+       * the thread.
+       */
+      std_cxx11::shared_ptr<return_value<RT> > ret_val;
+
+      /**
+       * Start the thread and let it put its return value into the ret_val
+       * object.
+       */
+      void start (const std_cxx11::function<RT ()> &function)
+      {
+        ret_val.reset(new return_value<RT>());
+        call (function, *ret_val);
+      }
+
+      /**
+       * Wait for the thread to end.
+       */
+      void join ()
+      {}
+    };
+
+#endif
+  }
+
+
+  /**
+   * An object that represents a spawned thread. This object can be freely
+   * copied around in user space, and all instances will represent the same
+   * thread and can require to wait for its termination and access its return
+   * value.
+   *
+   * Threads can be abandoned, i.e. if you just call Threads::new_thread but
+   * don't care about the returned object, or if you assign the return
+   * Threads::Thread object to an object that subsequently goes out of scope,
+   * then the thread previously created will still continue to do work. You
+   * will simply not be able to access its return value any more, and it may
+   * also happen that your program terminates before the thread has finished
+   * its work.
+   *
+   * The default value of the template argument is <code>void</code>, so if
+   * the function you are calling on a new thread has no return value, you can
+   * omit the template argument.
+   *
+   * @author Wolfgang Bangerth, 2003, 2009
+   * @ingroup threads
+   * @ingroup threads
+   */
+  template <typename RT = void>
+  class Thread
+  {
+  public:
+    /**
+     * Construct a thread object with a function object.
+     */
+    Thread (const std_cxx11::function<RT ()> &function)
+      :
+      thread_descriptor (new internal::ThreadDescriptor<RT>())
+    {
+      // in a second step, start the thread.
+      thread_descriptor->start (function);
+    }
+
+    /**
+     * Default constructor. You can't do much with a thread object constructed
+     * this way, except for assigning it a thread object that holds data
+     * created by the new_thread() functions.
+     */
+    Thread () {}
+
+    /**
+     * Copy constructor.
+     */
+    Thread (const Thread<RT> &t)
+      :
+      thread_descriptor (t.thread_descriptor)
+    {}
+
+    /**
+     * Join the thread represented by this object, i.e. wait for it to finish.
+     * If you have used the default constructor of this class and have not
+     * assigned a thread object to it, then this function is a no-op.
+     */
+    void join () const
+    {
+      if (thread_descriptor)
+        thread_descriptor->join ();
+    }
+
+    /**
+     * Get the return value of the function of the thread. Since this is only
+     * available once the thread finishes, this implicitly also calls join().
+     */
+    RT return_value ()
+    {
+      join ();
+      return thread_descriptor->ret_val->get();
+    }
+
+    /**
+     * Return true if this object has had a thread associated with it, either
+     * by using the non-default constructor or by assignment.
+     */
+    bool valid () const
+    {
+      return static_cast<bool>(thread_descriptor);
+    }
+
+
+    /**
+     * Check for equality of thread objects. Since objects of this class store
+     * an implicit pointer to an object that exists exactly once for each
+     * thread, the check is simply to compare these pointers.
+     */
+    bool operator == (const Thread &t)
+    {
+      return thread_descriptor == t.thread_descriptor;
+    }
+
+  private:
+    /**
+     * Shared pointer to the object representing the thread, and abstracting
+     * operating system functions to work on it. Boost's shared pointer
+     * implementation will make sure that that object lives as long as there
+     * is at least one subscriber to it.
+     */
+    std_cxx11::shared_ptr<internal::ThreadDescriptor<RT> > thread_descriptor;
+  };
+
+
+  namespace internal
+  {
+    /**
+     * A general template that returns std_cxx11::ref(t) if t is of reference
+     * type, and t otherwise.
+     *
+     * The case that t is of reference type is handled in a partial
+     * specialization declared below.
+     */
+    template <typename T>
+    struct maybe_make_ref
+    {
+      static T act (T &t)
+      {
+        return t;
+      }
+    };
+
+
+
+    /**
+     * A general template that returns std_cxx11::ref(t) if t is of reference
+     * type, and t otherwise.
+     *
+     * The case that t is of reference type is handled in this partial
+     * specialization.
+     */
+    template <typename T>
+    struct maybe_make_ref<T &>
+    {
+      static std_cxx11::reference_wrapper<T> act (T &t)
+      {
+        return std_cxx11::ref(t);
+      }
+    };
+  }
+
+
+
+  namespace internal
+  {
+    /**
+     * @internal
+     *
+     * General template declaration of a class that is used to encapsulate
+     * arguments to global and static member functions, make sure a new thread
+     * is created and that function being run on that thread.
+     *
+     * Although this general template is not implemented at all, the default
+     * template argument makes sure that whenever using the name of this
+     * class, the last template argument will be computed correctly from the
+     * previous arguments, and the correct specialization for this last
+     * template argument be used, even though we need to specify it.
+     */
+    template <typename RT, typename ArgList, int length>
+    class fun_encapsulator;
+
+
+// ----------- encapsulators for function objects
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with no arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 0>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() ()
+      {
+        return Thread<RT> (function);
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 1 argument.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 1>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 2 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 2>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 3 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 3>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2,
+                  typename std_cxx11::tuple_element<2,ArgList>::type arg3)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<2,ArgList>::type>::act(arg3)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 4 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 4>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2,
+                  typename std_cxx11::tuple_element<2,ArgList>::type arg3,
+                  typename std_cxx11::tuple_element<3,ArgList>::type arg4)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<2,ArgList>::type>::act(arg3),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<3,ArgList>::type>::act(arg4)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 5 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 5>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2,
+                  typename std_cxx11::tuple_element<2,ArgList>::type arg3,
+                  typename std_cxx11::tuple_element<3,ArgList>::type arg4,
+                  typename std_cxx11::tuple_element<4,ArgList>::type arg5)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<2,ArgList>::type>::act(arg3),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<3,ArgList>::type>::act(arg4),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<4,ArgList>::type>::act(arg5)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 6 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 6>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2,
+                  typename std_cxx11::tuple_element<2,ArgList>::type arg3,
+                  typename std_cxx11::tuple_element<3,ArgList>::type arg4,
+                  typename std_cxx11::tuple_element<4,ArgList>::type arg5,
+                  typename std_cxx11::tuple_element<5,ArgList>::type arg6)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<2,ArgList>::type>::act(arg3),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<3,ArgList>::type>::act(arg4),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<4,ArgList>::type>::act(arg5),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<5,ArgList>::type>::act(arg6)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 7 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 7>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2,
+                  typename std_cxx11::tuple_element<2,ArgList>::type arg3,
+                  typename std_cxx11::tuple_element<3,ArgList>::type arg4,
+                  typename std_cxx11::tuple_element<4,ArgList>::type arg5,
+                  typename std_cxx11::tuple_element<5,ArgList>::type arg6,
+                  typename std_cxx11::tuple_element<6,ArgList>::type arg7)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<2,ArgList>::type>::act(arg3),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<3,ArgList>::type>::act(arg4),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<4,ArgList>::type>::act(arg5),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<5,ArgList>::type>::act(arg6),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<6,ArgList>::type>::act(arg7)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 8 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 8>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2,
+                  typename std_cxx11::tuple_element<2,ArgList>::type arg3,
+                  typename std_cxx11::tuple_element<3,ArgList>::type arg4,
+                  typename std_cxx11::tuple_element<4,ArgList>::type arg5,
+                  typename std_cxx11::tuple_element<5,ArgList>::type arg6,
+                  typename std_cxx11::tuple_element<6,ArgList>::type arg7,
+                  typename std_cxx11::tuple_element<7,ArgList>::type arg8)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<2,ArgList>::type>::act(arg3),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<3,ArgList>::type>::act(arg4),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<4,ArgList>::type>::act(arg5),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<5,ArgList>::type>::act(arg6),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<6,ArgList>::type>::act(arg7),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<7,ArgList>::type>::act(arg8)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+
+    /**
+     * @internal
+     *
+     * Encapsulator class for functions with 9 arguments.
+     */
+    template <typename RT, typename ArgList>
+    class fun_encapsulator<RT, ArgList, 9>
+    {
+    public:
+      fun_encapsulator (typename internal::fun_ptr<RT,ArgList>::type *function)
+        : function (*function)
+      {}
+
+      fun_encapsulator (const std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> &function)
+        : function (function)
+      {}
+
+      inline
+      Thread<RT>
+      operator() (typename std_cxx11::tuple_element<0,ArgList>::type arg1,
+                  typename std_cxx11::tuple_element<1,ArgList>::type arg2,
+                  typename std_cxx11::tuple_element<2,ArgList>::type arg3,
+                  typename std_cxx11::tuple_element<3,ArgList>::type arg4,
+                  typename std_cxx11::tuple_element<4,ArgList>::type arg5,
+                  typename std_cxx11::tuple_element<5,ArgList>::type arg6,
+                  typename std_cxx11::tuple_element<6,ArgList>::type arg7,
+                  typename std_cxx11::tuple_element<7,ArgList>::type arg8,
+                  typename std_cxx11::tuple_element<8,ArgList>::type arg9)
+      {
+        return
+          Thread<RT>
+          (std_cxx11::bind (function,
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<0,ArgList>::type>::act(arg1),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<1,ArgList>::type>::act(arg2),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<2,ArgList>::type>::act(arg3),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<3,ArgList>::type>::act(arg4),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<4,ArgList>::type>::act(arg5),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<5,ArgList>::type>::act(arg6),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<6,ArgList>::type>::act(arg7),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<7,ArgList>::type>::act(arg8),
+                            internal::maybe_make_ref<typename std_cxx11::tuple_element<8,ArgList>::type>::act(arg9)));
+      }
+
+    private:
+      std_cxx11::function<typename internal::fun_ptr<RT,ArgList>::type> function;
+    };
+  }
+
+
+
+// ----------- thread starters for functions not taking any parameters
+
+  /**
+   * Overload of the new_thread function for objects that can be converted to
+   * std_cxx11::function<RT ()>, i.e. anything that can be called like a
+   * function object without arguments and returning an object of type RT (or
+   * void).
+   *
+   * @ingroup threads
+   */
+  template <typename RT>
+  inline
+  Thread<RT>
+  new_thread (const std_cxx11::function<RT ()> &function)
+  {
+    return Thread<RT>(function);
+  }
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with no arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)())
+  {
+    return Thread<RT>(fun_ptr);
+  }
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with
+   * no arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(),
+              typename identity<C>::type &c)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c)));
+  }
+
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with no
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)() const,
+              const typename identity<C>::type &c)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c)));
+  }
+#endif
+
+
+
+// ----------- thread starters for unary functions
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 1 argument.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename Arg1>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1),
+              typename identity<Arg1>::type arg1)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 1
+   * argument.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 1
+   * argument.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1)));
+  }
+#endif
+
+// ----------- thread starters for binary functions
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 2 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename Arg1, typename Arg2>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 2
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1, typename Arg2>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 2
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1, typename Arg2>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2)));
+  }
+#endif
+
+// ----------- thread starters for ternary functions
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 3 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2,Arg3),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 3
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 3
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3)));
+  }
+#endif
+
+
+// ----------- thread starters for functions with 4 arguments
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 4 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3, typename Arg4>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 4
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3, typename Arg4>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 4
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3, typename Arg4>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4)));
+  }
+#endif
+
+// ----------- thread starters for functions with 5 arguments
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 5 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 5
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 5
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5)));
+  }
+#endif
+
+// ----------- thread starters for functions with 6 arguments
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 6 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 6
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 6
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6)));
+  }
+#endif
+
+// ----------- thread starters for functions with 7 arguments
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 7 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6,Arg7),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 7
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6,Arg7),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 7
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6,Arg7) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7)));
+  }
+#endif
+
+// ----------- thread starters for functions with 8 arguments
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 8 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                            Arg6,Arg7,Arg8),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7,
+              typename identity<Arg8>::type arg8)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 8
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                               Arg6,Arg7,Arg8),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7,
+              typename identity<Arg8>::type arg8)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 8
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                               Arg6,Arg7,Arg8) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7,
+              typename identity<Arg8>::type arg8)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8)));
+  }
+#endif
+
+// ----------- thread starters for functions with 9 arguments
+
+  /**
+   * Overload of the new_thread function for non-member or static member
+   * functions with 9 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8, typename Arg9>
+  inline
+  Thread<RT>
+  new_thread (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                            Arg6,Arg7,Arg8,Arg9),
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7,
+              typename identity<Arg8>::type arg8,
+              typename identity<Arg9>::type arg9)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8),
+                       internal::maybe_make_ref<Arg9>::act(arg9)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_thread function for member functions with 9
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8, typename Arg9>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                               Arg6,Arg7,Arg8,Arg9),
+              typename identity<C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7,
+              typename identity<Arg8>::type arg8,
+              typename identity<Arg9>::type arg9)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8),
+                       internal::maybe_make_ref<Arg9>::act(arg9)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_thread function for const member functions with 9
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8, typename Arg9>
+  inline
+  Thread<RT>
+  new_thread (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                               Arg6,Arg7,Arg8,Arg9) const,
+              typename identity<const C>::type &c,
+              typename identity<Arg1>::type arg1,
+              typename identity<Arg2>::type arg2,
+              typename identity<Arg3>::type arg3,
+              typename identity<Arg4>::type arg4,
+              typename identity<Arg5>::type arg5,
+              typename identity<Arg6>::type arg6,
+              typename identity<Arg7>::type arg7,
+              typename identity<Arg8>::type arg8,
+              typename identity<Arg9>::type arg9)
+  {
+    return
+      Thread<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8),
+                       internal::maybe_make_ref<Arg9>::act(arg9)));
+  }
+#endif
+
+// ------------------------ ThreadGroup -------------------------------------
+
+  /**
+   * A container for thread objects. Allows to add new thread objects and wait
+   * for them all together. The thread objects need to have the same return
+   * value for the called function.
+   *
+   * @author Wolfgang Bangerth, 2003
+   * @ingroup threads
+   */
+  template <typename RT = void>
+  class ThreadGroup
+  {
+  public:
+    /**
+     * Add another thread object to the collection.
+     */
+    ThreadGroup &operator += (const Thread<RT> &t)
+    {
+      threads.push_back (t);
+      return *this;
+    }
+
+    /**
+     * Wait for all threads in the collection to finish. It is not a problem
+     * if some of them have already been waited for, i.e. you may call this
+     * function more than once, and you can also add new thread objects
+     * between subsequent calls to this function if you want.
+     */
+    void join_all () const
+    {
+      for (typename std::list<Thread<RT> >::const_iterator
+           t=threads.begin(); t!=threads.end(); ++t)
+        t->join ();
+    }
+
+  private:
+    /**
+     * List of thread objects.
+     */
+    std::list<Thread<RT> > threads;
+  };
+
+
+  template <typename> class Task;
+
+
+  namespace internal
+  {
+#ifdef DEAL_II_WITH_THREADS
+
+    template <typename> struct TaskDescriptor;
+
+    /**
+     * The task class for TBB that is used by the TaskDescriptor class.
+     */
+    template <typename RT>
+    struct TaskEntryPoint : public tbb::task
+    {
+      TaskEntryPoint (TaskDescriptor<RT> &task_descriptor)
+        :
+        task_descriptor (task_descriptor)
+      {}
+
+      virtual tbb::task *execute ()
+      {
+        // call the function object and put the return value into the
+        // proper place
+        try
+          {
+            call (task_descriptor.function, task_descriptor.ret_val);
+          }
+        catch (const std::exception &exc)
+          {
+            internal::handle_std_exception (exc);
+          }
+        catch (...)
+          {
+            internal::handle_unknown_exception ();
+          }
+        return 0;
+      }
+
+      /**
+       * A reference to the descriptor object of this task.
+       */
+      TaskDescriptor<RT> &task_descriptor;
+    };
+
+    /**
+     * @internal
+     *
+     * Base class describing a task. This is the basic class abstracting the
+     * Threading Building Blocks implementation of tasks.  It provides a
+     * mechanism to start a new task, as well as for joining it.
+     *
+     * Internally, the way things are implemented is that all Task<> objects
+     * keep a shared pointer to the task descriptor. When the last Task<>
+     * object goes out of scope, the destructor of the descriptor is called.
+     * Since tasks can not be abandoned, the destructor makes sure that the
+     * task is finished before it can continue to destroy the object.
+     *
+     * Note that unlike threads, tasks are not always started right away, and
+     * so the starting thread can't rely on the fact that the started task can
+     * copy things off the spawning thread's stack frame. As a consequence,
+     * the task description needs to include a way to store the function and
+     * its arguments that shall be run on the task.
+     *
+     * @author Wolfgang Bangerth, 2009
+     */
+    template <typename RT>
+    struct TaskDescriptor
+    {
+    private:
+      /**
+       * The function and its arguments that are to be run on the task.
+       */
+      std_cxx11::function<RT ()> function;
+
+      /**
+       * Variable holding the data the TBB needs to work with a task. Set by
+       * the queue_up_task() function. Note that the object behind this
+       * pointer will be deleted upon termination of the task, so we do not
+       * have to do so ourselves. In particular, if all objects with pointers
+       * to this task_description object go out of scope then no action is
+       * needed on our behalf.
+       */
+      tbb::task *task;
+
+      /**
+       * A place where the task will deposit its return value.
+       */
+      return_value<RT> ret_val;
+
+      /**
+       * A flag indicating whether the task has terminated.
+       */
+      bool task_is_done;
+
+    public:
+
+      /**
+       * Constructor. Take the function to be run on this task as argument.
+       */
+      TaskDescriptor (const std_cxx11::function<RT ()> &function);
+
+      /**
+       * Default constructor. Throws an exception since we want to queue a
+       * task immediately upon construction of these objects to make sure that
+       * each TaskDescriptor object corresponds to exactly one task.
+       */
+      TaskDescriptor ();
+
+      /**
+       * Copy constructor. Throws an exception since we want to make sure that
+       * each TaskDescriptor object corresponds to exactly one task.
+       */
+      TaskDescriptor (const TaskDescriptor &);
+
+      /**
+       * Destructor.
+       */
+      ~TaskDescriptor ();
+
+      /**
+       * Queue up the task to the scheduler. We need to do this in a separate
+       * function since the new tasks needs to access objects from the current
+       * object and that can only reliably happen if the current object is
+       * completely constructed already.
+       */
+      void queue_task ();
+
+      /**
+       * Join a task, i.e. wait for it to finish. This function can safely be
+       * called from different threads at the same time, and can also be
+       * called more than once.
+       */
+      void join ();
+
+
+      template <typename> friend struct TaskEntryPoint;
+      friend class dealii::Threads::Task<RT>;
+    };
+
+
+
+    template <typename RT>
+    inline
+    TaskDescriptor<RT>::TaskDescriptor (const std_cxx11::function<RT ()> &function)
+      :
+      function (function),
+      task_is_done (false)
+    {}
+
+
+    template <typename RT>
+    inline
+    void
+    TaskDescriptor<RT>::queue_task ()
+    {
+      // use the pattern described in the TBB book on pages 230/231
+      // ("Start a large task in parallel with the main program")
+      task = new (tbb::task::allocate_root()) tbb::empty_task;
+      task->set_ref_count (2);
+
+      tbb::task *worker = new (task->allocate_child()) TaskEntryPoint<RT>(*this);
+      task->spawn (*worker);
+    }
+
+
+
+    template <typename RT>
+    TaskDescriptor<RT>::TaskDescriptor ()
+    {
+      Assert (false, ExcInternalError());
+    }
+
+
+
+    template <typename RT>
+    TaskDescriptor<RT>::TaskDescriptor (const TaskDescriptor &)
+    {
+      Assert (false, ExcInternalError());
+    }
+
+
+
+    template <typename RT>
+    inline
+    TaskDescriptor<RT>::~TaskDescriptor ()
+    {
+      // wait for the task to complete for sure
+      join ();
+
+      // now destroy the empty task structure. the book recommends to
+      // spawn it as well and let the scheduler destroy the object
+      // when done, but this has the disadvantage that the scheduler
+      // may not get to actually finishing the task before it goes out
+      // of scope (at the end of the program, or if a thread is done
+      // on which it was run) and then we would get a hard-to-decipher
+      // warning about unfinished tasks when the scheduler "goes out
+      // of the arena". rather, let's explicitly destroy the empty
+      // task object. before that, make sure that the task has been
+      // shut down, expressed by a zero reference count
+      Assert (task != 0, ExcInternalError());
+      Assert (task->ref_count()==0, ExcInternalError());
+      task->destroy (*task);
+    }
+
+
+    template <typename RT>
+    inline
+    void
+    TaskDescriptor<RT>::join ()
+    {
+      // if the task is already done, just return. this makes sure we
+      // call tbb::Task::wait_for_all() exactly once, as required by
+      // TBB. we could also get the reference count of task for doing
+      // this, but that is usually slower. note that this does not
+      // work when the thread calling this function is not the same as
+      // the one that initialized the task.
+      //
+      // TODO: can we assert that no other thread tries to end the
+      // task?
+      if (task_is_done == true)
+        return;
+
+      // let TBB wait for the task to complete.
+      task_is_done = true;
+      task->wait_for_all();
+    }
+
+
+
+#else        // no threading enabled
+
+    /**
+     * A way to describe tasks. Since we are in non-MT mode at this place,
+     * things are a lot simpler than in MT mode.
+     */
+    template <typename RT>
+    struct TaskDescriptor
+    {
+      /**
+       * A place where the task will deposit its return value.
+       */
+      return_value<RT> ret_val;
+
+      /**
+       * Constructor. Call the given function and emplace the return value
+       * into the slot reserved for this purpose.
+       */
+      TaskDescriptor (const std_cxx11::function<RT ()> &function)
+      {
+        call (function, ret_val);
+      }
+
+      /**
+       * Wait for the task to return. Since we are in non-MT mode here, there
+       * is nothing to do.
+       */
+      static void join () {}
+
+      /**
+       * Run the task. Since we are here in non-MT mode, there is nothing to
+       * do that the constructor hasn't already done.
+       */
+      static void queue_task () {}
+    };
+
+#endif
+
+  }
+
+
+
+  /**
+   * Describes one task object based on the Threading Building Blocks' Task.
+   * Note that the call to join() must be executed on the same thread as the
+   * call to the constructor. Otherwise, there might be a deadlock. In other
+   * words, a Task object should never passed on to another task for calling
+   * the join() method.
+   *
+   * @author Wolfgang Bangerth, 2009
+   * @ingroup threads
+   */
+  template <typename RT = void>
+  class Task
+  {
+  public:
+    /**
+     * Construct a task object given a function object to execute on the task,
+     * and then schedule this function for execution.
+     *
+     * @post Using this constructor automatically makes the task object
+     * joinable().
+     */
+    Task (const std_cxx11::function<RT ()> &function_object)
+    {
+      // create a task descriptor and tell it to queue itself up with
+      // the scheduling system
+      task_descriptor.reset (new internal::TaskDescriptor<RT>(function_object));
+      task_descriptor->queue_task ();
+    }
+
+
+    /**
+     * Copy constructor.
+     *
+     * @post Using this constructor automatically makes the task object
+     * joinable().
+     */
+    Task (const Task<RT> &t)
+      :
+      task_descriptor (t.task_descriptor)
+    {}
+
+
+    /**
+     * Default constructor. You can't do much with a task object constructed
+     * this way, except for assigning it a task object that holds data created
+     * by the Threads::new_task() functions.
+     *
+     * @post Using this constructor leaves the object in an unjoinable state,
+     * i.e., joinable() will return false.
+     */
+    Task () {}
+
+    /**
+     * Join the task represented by this object, i.e. wait for it to finish.
+     *
+     * A task can be joined multiple times (while the first join() operation
+     * may block until the task has completed running, all successive attempts
+     * to join will return immediately).
+     *
+     * @pre You can't call this function if you have used the default
+     * constructor of this class and have not assigned a task object to it. In
+     * other words, the function joinable() must return true.
+     */
+    void join () const
+    {
+      AssertThrow (joinable(), ExcNoTask());
+      task_descriptor->join ();
+    }
+
+    /**
+     * Return whether the current object can be joined. You can join a task
+     * object once a task (typically created with Threads::new_task()) has
+     * actually been assigned to it. On the other hand, the function returns
+     * false if the object has been default constructed.
+     *
+     * A task can be joined multiple times (while the first join() operation
+     * may block until the task has completed running, all successive attempts
+     * to join will return immediately). Consequently, if this function
+     * returns true, it will continue to return true until the task object it
+     * reports on is assigned to from another object.
+     */
+    bool joinable () const
+    {
+      return (task_descriptor !=
+              std_cxx11::shared_ptr<internal::TaskDescriptor<RT> >());
+    }
+
+
+    /**
+     * Get the return value of the function of the task. Since this is only
+     * available once the task finishes, this implicitly also calls join().
+     * You can call this function multiple times as long as the object refers
+     * to the same task, and expect to get the same return value every time.
+     *
+     * @pre You can't call this function if you have used the default
+     * constructor of this class and have not assigned a task object to it. In
+     * other words, the function joinable() must return true.
+     */
+    RT return_value ()
+    {
+      join ();
+      return task_descriptor->ret_val.get();
+    }
+
+
+    /**
+     * Check for equality of task objects. Since objects of this class store
+     * an implicit pointer to an object that exists exactly once for each
+     * task, the check is simply to compare these pointers.
+     */
+    bool operator == (const Task &t)
+    {
+      AssertThrow (joinable(), ExcNoTask());
+      return task_descriptor == t.task_descriptor;
+    }
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception
+     */
+    DeclExceptionMsg (ExcNoTask,
+                      "The current object is not associated with a task that "
+                      "can be joined. It may have been detached, or you "
+                      "may have already joined it in the past.");
+    //@}
+  private:
+    /**
+     * Shared pointer to the object representing the task. Boost's shared
+     * pointer implementation will make sure that that object lives as long as
+     * there is at least one subscriber to it.
+     */
+    std_cxx11::shared_ptr<internal::TaskDescriptor<RT> > task_descriptor;
+  };
+
+
+
+  /**
+   * Overload of the new_task function for objects that can be converted to
+   * std_cxx11::function<RT ()>, i.e. anything that can be called like a
+   * function object without arguments and returning an object of type RT (or
+   * void).
+   *
+   * @ingroup threads
+   */
+  template <typename RT>
+  inline
+  Task<RT>
+  new_task (const std_cxx11::function<RT ()> &function)
+  {
+    return Task<RT>(function);
+  }
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with no arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)())
+  {
+    return new_task<RT>(std_cxx11::function<RT ()>(fun_ptr));
+  }
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with no
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(),
+            typename identity<C>::type &c)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with no
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)() const,
+            const typename identity<C>::type &c)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c)));
+  }
+#endif
+
+
+
+// ----------- thread starters for unary functions
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 1 argument.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename Arg1>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1),
+            typename identity<Arg1>::type arg1)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 1
+   * argument.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 1
+   * argument.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1)));
+  }
+#endif
+
+// ----------- thread starters for binary functions
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 2 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename Arg1, typename Arg2>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 2
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1, typename Arg2>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 2
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C, typename Arg1, typename Arg2>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2)));
+  }
+#endif
+
+// ----------- thread starters for ternary functions
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 3 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2,Arg3),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 3
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 3
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3)));
+  }
+#endif
+
+
+// ----------- thread starters for functions with 4 arguments
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 4 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3, typename Arg4>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 4
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3, typename Arg4>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 4
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3, typename Arg4>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4)));
+  }
+#endif
+
+// ----------- thread starters for functions with 5 arguments
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 5 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 5
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 5
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5)));
+  }
+#endif
+
+// ----------- thread starters for functions with 6 arguments
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 6 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 6
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 6
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6)));
+  }
+#endif
+
+// ----------- thread starters for functions with 7 arguments
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 7 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6,Arg7),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 7
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6,Arg7),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 7
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,Arg6,Arg7) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7)));
+  }
+#endif
+
+// ----------- thread starters for functions with 8 arguments
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 8 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                          Arg6,Arg7,Arg8),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7,
+            typename identity<Arg8>::type arg8)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 8
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                             Arg6,Arg7,Arg8),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7,
+            typename identity<Arg8>::type arg8)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 8
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                             Arg6,Arg7,Arg8) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7,
+            typename identity<Arg8>::type arg8)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8)));
+  }
+#endif
+
+// ----------- thread starters for functions with 9 arguments
+
+  /**
+   * Overload of the new_task function for non-member or static member
+   * functions with 9 arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8, typename Arg9>
+  inline
+  Task<RT>
+  new_task (RT (*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                          Arg6,Arg7,Arg8,Arg9),
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7,
+            typename identity<Arg8>::type arg8,
+            typename identity<Arg9>::type arg9)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr,
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8),
+                       internal::maybe_make_ref<Arg9>::act(arg9)));
+  }
+
+
+
+  /**
+   * Overload of the non-const new_task function for member functions with 9
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8, typename Arg9>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                             Arg6,Arg7,Arg8,Arg9),
+            typename identity<C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7,
+            typename identity<Arg8>::type arg8,
+            typename identity<Arg9>::type arg9)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::ref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8),
+                       internal::maybe_make_ref<Arg9>::act(arg9)));
+  }
+
+#ifndef DEAL_II_CONST_MEMBER_DEDUCTION_BUG
+  /**
+   * Overload of the new_task function for const member functions with 9
+   * arguments.
+   *
+   * @ingroup threads
+   */
+  template <typename RT, typename C,
+            typename Arg1, typename Arg2, typename Arg3,
+            typename Arg4, typename Arg5, typename Arg6,
+            typename Arg7, typename Arg8, typename Arg9>
+  inline
+  Task<RT>
+  new_task (RT (C::*fun_ptr)(Arg1,Arg2,Arg3,Arg4,Arg5,
+                             Arg6,Arg7,Arg8,Arg9) const,
+            typename identity<const C>::type &c,
+            typename identity<Arg1>::type arg1,
+            typename identity<Arg2>::type arg2,
+            typename identity<Arg3>::type arg3,
+            typename identity<Arg4>::type arg4,
+            typename identity<Arg5>::type arg5,
+            typename identity<Arg6>::type arg6,
+            typename identity<Arg7>::type arg7,
+            typename identity<Arg8>::type arg8,
+            typename identity<Arg9>::type arg9)
+  {
+    return
+      new_task<RT>
+      (std_cxx11::bind(fun_ptr, std_cxx11::cref(c),
+                       internal::maybe_make_ref<Arg1>::act(arg1),
+                       internal::maybe_make_ref<Arg2>::act(arg2),
+                       internal::maybe_make_ref<Arg3>::act(arg3),
+                       internal::maybe_make_ref<Arg4>::act(arg4),
+                       internal::maybe_make_ref<Arg5>::act(arg5),
+                       internal::maybe_make_ref<Arg6>::act(arg6),
+                       internal::maybe_make_ref<Arg7>::act(arg7),
+                       internal::maybe_make_ref<Arg8>::act(arg8),
+                       internal::maybe_make_ref<Arg9>::act(arg9)));
+  }
+#endif
+
+
+// ------------------------ TaskGroup -------------------------------------
+
+  /**
+   * A container for task objects. Allows to add new task objects and wait for
+   * them all together. The task objects need to have the same return value
+   * for the called function.
+   *
+   * Note that the call to join_all() must be executed on the same thread as
+   * the calls that add subtasks. Otherwise, there might be a deadlock. In
+   * other words, a Task object should never passed on to another task for
+   * calling the join() method.
+   *
+   * @author Wolfgang Bangerth, 2003
+   * @ingroup tasks
+   */
+  template <typename RT = void>
+  class TaskGroup
+  {
+  public:
+    /**
+     * Add another task object to the collection.
+     */
+    TaskGroup &operator += (const Task<RT> &t)
+    {
+      tasks.push_back (t);
+      return *this;
+    }
+
+    /**
+     * Wait for all tasks in the collection to finish. It is not a problem if
+     * some of them have already been waited for, i.e. you may call this
+     * function more than once, and you can also add new task objects between
+     * subsequent calls to this function if you want.
+     */
+    void join_all () const
+    {
+      for (typename std::list<Task<RT> >::const_iterator
+           t=tasks.begin(); t!=tasks.end(); ++t)
+        t->join ();
+    }
+
+  private:
+    /**
+     * List of task objects.
+     */
+    std::list<Task<RT> > tasks;
+  };
+
+}   // end of implementation of namespace Threads
+
+/**
+ * @}
+ */
+
+
+//---------------------------------------------------------------------------
+DEAL_II_NAMESPACE_CLOSE
+// end of #ifndef dealii__thread_management_h
+#endif
+//---------------------------------------------------------------------------
diff --git a/include/deal.II/base/time_stepping.h b/include/deal.II/base/time_stepping.h
new file mode 100644
index 0000000..19c2923
--- /dev/null
+++ b/include/deal.II/base/time_stepping.h
@@ -0,0 +1,591 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__time_stepping_h
+#define dealii__time_stepping_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/function.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Namespace containing the time stepping methods.
+ *
+ * @author Bruno Turcksin
+ * @date 2014
+ */
+
+namespace TimeStepping
+{
+  /**
+   * The following Runge-Kutta methods are available:
+   * - Explicit methods (see ExplicitRungeKutta::initialize):
+   *   - FORWARD_EULER (first order)
+   *   - RK_THIRD_ORDER (third order Runge-Kutta)
+   *   - RK_CLASSIC_FOURTH_ORDER (classical fourth order Runge-Kutta)
+   * - Implicit methods (see ImplicitRungeKutta::initialize):
+   *   - BACKWARD_EULER (first order)
+   *   - IMPLICIT_MIDPOINT (second order)
+   *   - CRANK_NICOLSON (second order)
+   *   - SDIRK_TWO_STAGES (second order)
+   * - Embedded explicit methods (see EmbeddedExplicitRungeKutta::initialize):
+   *   - HEUN_EULER (second order)
+   *   - BOGACKI_SHAMPINE (third order)
+   *   - DOPRI: Dormand-Prince (fifth order, method used by ode45 in
+   * MATLAB)
+   *   - FEHLBERG (fifth order)
+   *   - CASH_KARP (firth order)
+   */
+  enum runge_kutta_method { FORWARD_EULER, RK_THIRD_ORDER, RK_CLASSIC_FOURTH_ORDER,
+                            BACKWARD_EULER, IMPLICIT_MIDPOINT, CRANK_NICOLSON,
+                            SDIRK_TWO_STAGES, HEUN_EULER, BOGACKI_SHAMPINE, DOPRI,
+                            FEHLBERG, CASH_KARP
+                          };
+
+
+
+  /**
+   * Reason for exiting evolve_one_time_step when using an embedded method:
+   * DELTA_T (the time step is in the valid range), MIN_DELTA_T (the time step
+   * was increased to the minimum acceptable time step), MAX_DELTA_T (the time
+   * step was reduced to the maximum acceptable time step).
+   */
+  enum embedded_runge_kutta_time_step { DELTA_T, MIN_DELTA_T, MAX_DELTA_T };
+
+
+
+  /**
+   * Abstract class for time stepping methods. These methods assume that the
+   * equation has the form: $ \frac{\partial y}{\partial t} = f(t,y) $.
+   */
+  template <typename VectorType>
+  class TimeStepping
+  {
+  public:
+    /**
+     * Virtual destructor.
+     */
+    virtual ~TimeStepping() {}
+
+    /**
+     * Purely virtual function. This function is used to advance from time @p
+     * t to t+ @p delta_t. @p F is a vector of functions $ f(t,y) $ that
+     * should be integrated, the input parameters are the time t and the
+     * vector y and the output is value of f at this point. @p J_inverse is a
+     * vector functions that compute the inverse of the Jacobians associated
+     * to the implicit problems. The input parameters are the time, $ \tau $,
+     * and a vector. The output is the value of function at this point. This
+     * function returns the time at the end of the time step.
+     */
+    virtual double evolve_one_time_step
+    (std::vector<std_cxx11::function<VectorType (const double, const VectorType &)> >               &F,
+     std::vector<std_cxx11::function<VectorType (const double, const double, const VectorType &)> > &J_inverse,
+     double                                                                                         t,
+     double                                                                                         delta_t,
+     VectorType                                                                                     &y) = 0;
+
+    /**
+     * Empty structure used to store information.
+     */
+    struct Status {};
+
+    /**
+     * Purely virtual function that return Status.
+     */
+    virtual const Status &get_status() const = 0;
+  };
+
+
+
+  /**
+   * Base class for the Runge-Kutta method
+   *
+   * @author Damien Lebrun-Grandie, Bruno Turcksin
+   * @date 2014
+   */
+  template <typename VectorType>
+  class RungeKutta : public TimeStepping<VectorType>
+  {
+  public:
+    /**
+     * Virtual destructor.
+     */
+    virtual ~RungeKutta() {}
+
+    /**
+     * Purely virtual method used to initialize the Runge-Kutta method.
+     */
+    virtual void initialize(runge_kutta_method method) = 0;
+    /**
+     * This function is used to advance from time @p t to t+ @p delta_t. @p F
+     * is a vector of functions $ f(t,y) $ that should be integrated, the
+     * input parameters are the time t and the vector y and the output is
+     * value of f at this point. @p J_inverse is a vector functions that
+     * compute the inverse of the Jacobians associated to the implicit
+     * problems. The input parameters are the time, $ \tau $, and a vector.
+     * The output is the value of function at this point. This function
+     * returns the time at the end of the time step. When using Runge-Kutta
+     * methods, @p F and @ J_inverse can only contain one element.
+     */
+    double evolve_one_time_step
+    (std::vector<std_cxx11::function<VectorType (const double, const VectorType &)> >               &F,
+     std::vector<std_cxx11::function<VectorType (const double, const double, const VectorType &)> > &J_inverse,
+     double                                                                                         t,
+     double                                                                                         delta_t,
+     VectorType &y);
+
+    /**
+     * Purely virtual function. This function is used to advance from time @p
+     * t to t+ @p delta_t. @p f  is the function $ f(t,y) $ that should be
+     * integrated, the input parameters are the time t and the vector y and
+     * the output is value of f at this point. @p id_minus_tau_J_inverse is a
+     * function that computes $ inv(I-\tau J)$ where $ I $ is the identity
+     * matrix, $ \tau $ is given, and $ J $ is the Jacobian $ \frac{\partial
+     * J}{\partial y} $. The input parameters are the time, $ \tau $, and a
+     * vector. The output is the value of function at this point.
+     * evolve_one_time_step returns the time at the end of the time step.
+     */
+    virtual double evolve_one_time_step
+    (std_cxx11::function<VectorType (const double, const VectorType &)>               f,
+     std_cxx11::function<VectorType (const double, const double, const VectorType &)> id_minus_tau_J_inverse,
+     double                                                                           t,
+     double                                                                           delta_t,
+     VectorType                                                                       &y) = 0;
+
+  protected:
+    /**
+     * Number of stages of the Runge-Kutta method.
+     */
+    unsigned int n_stages;
+
+    /**
+     * Butcher tableau coefficients.
+     */
+    std::vector<double> b;
+
+    /**
+     * Butcher tableau coefficients.
+     */
+    std::vector<double> c;
+
+    /**
+     * Butcher tableau coefficients.
+     */
+    std::vector<std::vector<double> > a;
+  };
+
+
+
+  /**
+   * ExplicitRungeKutta is derived from RungeKutta and implement the explicit
+   * methods.
+   */
+  template <typename VectorType>
+  class ExplicitRungeKutta : public RungeKutta<VectorType>
+  {
+  public:
+    using RungeKutta<VectorType>::evolve_one_time_step;
+
+    /**
+     * Default constructor. initialize(runge_kutta_method) needs to be called
+     * before the object can be used.
+     */
+    ExplicitRungeKutta() {}
+
+    /**
+     * Constructor. This function calls initialize(runge_kutta_method).
+     */
+    ExplicitRungeKutta(runge_kutta_method method);
+
+    /**
+     * Initialize the explicit Runge-Kutta method.
+     */
+    void initialize(runge_kutta_method method);
+
+    /**
+     * This function is used to advance from time @p t to t+ @p delta_t. @p f
+     * is the function $ f(t,y) $ that should be integrated, the input
+     * parameters are the time t and the vector y and the output is value of f
+     * at this point. @p id_minus_tau_J_inverse is a function that computes $
+     * inv(I-\tau J)$ where $ I $ is the identity matrix, $ \tau $ is given,
+     * and $ J $ is the Jacobian $ \frac{\partial J}{\partial y} $. The input
+     * parameter are the time, $ \tau $, and a vector. The output is the value
+     * of function at this point. evolve_one_time_step returns the time at the
+     * end of the time step.
+     */
+    double evolve_one_time_step
+    (std_cxx11::function<VectorType (const double, const VectorType &)>               f,
+     std_cxx11::function<VectorType (const double, const double, const VectorType &)> id_minus_tau_J_inverse,
+     double                                                                           t,
+     double                                                                           delta_t,
+     VectorType &y);
+
+    /**
+     * This function is used to advance from time @p t to t+ @p delta_t. This
+     * function is similar to the one derived from RungeKutta, but does not
+     * required id_minus_tau_J_inverse because it is not used for explicit
+     * methods. evolve_one_time_step returns the time at the end of the time
+     * step.
+     */
+    double evolve_one_time_step
+    (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+     double                                                             t,
+     double                                                             delta_t,
+     VectorType                                                         &y);
+
+    /**
+     * This structure stores the name of the method used.
+     */
+    struct Status : public TimeStepping<VectorType>::Status
+    {
+      runge_kutta_method method;
+    };
+
+    /**
+     * Return the status of the current object.
+     */
+    const Status &get_status() const;
+
+  private:
+    /**
+     * Compute the different stages needed.
+     */
+    void compute_stages
+    (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+     const double                                                       t,
+     const double                                                       delta_t,
+     const VectorType                                                   &y,
+     std::vector<VectorType>                                            &f_stages) const;
+
+    /**
+     * Status structure of the object.
+     */
+    Status status;
+  };
+
+
+
+  /**
+   * This class is derived from RungeKutta and implement the implicit methods.
+   * This class works only for Diagonal Implicit Runge-Kutta (DIRK) methods.
+   */
+  template <typename VectorType>
+  class ImplicitRungeKutta : public RungeKutta<VectorType>
+  {
+  public:
+    using RungeKutta<VectorType>::evolve_one_time_step;
+
+    /**
+     * Default constructor. initialize(runge_kutta_method) and
+     * set_newton_solver_parameters(unsigned int,double) need to be called
+     * before the object can be used.
+     */
+    ImplicitRungeKutta() {}
+
+    /**
+     * Constructor. This function calls initialize(runge_kutta_method) and
+     * initialize the maximum number of iterations and the tolerance of the
+     * Newton solver.
+     */
+    ImplicitRungeKutta(runge_kutta_method method, unsigned int max_it=100, double tolerance=1e-6);
+
+    /**
+     * Initialize the implicit Runge-Kutta method.
+     */
+    void initialize(runge_kutta_method method);
+
+    /**
+     * This function is used to advance from time @p t to t+ @p delta_t. @p f
+     * is the function $ f(t,y) $ that should be integrated, the input
+     * parameters are the time t and the vector y and the output is value of f
+     * at this point. @p id_minus_tau_J_inverse is a function that computes $
+     * (I-\tau J)^{-1}$ where $ I $ is the identity matrix, $ \tau $ is given,
+     * and $ J $ is the Jacobian $ \frac{\partial J}{\partial y} $. The input
+     * parameters this function receives are the time, $ \tau $, and a vector.
+     * The output is the value of function at this point. evolve_one_time_step
+     * returns the time at the end of the time step.
+     */
+    double evolve_one_time_step
+    (std_cxx11::function<VectorType (const double, const VectorType &)>               f,
+     std_cxx11::function<VectorType (const double, const double, const VectorType &)> id_minus_tau_J_inverse,
+     double                                                                           t,
+     double                                                                           delta_t,
+     VectorType                                                                       &y);
+
+    /**
+     * Set the maximum number of iterations and the tolerance used by the
+     * Newton solver.
+     */
+    void set_newton_solver_parameters(unsigned int max_it, double tolerance);
+
+    /**
+     * Structure that stores the name of the method, the number of Newton
+     * iterations and the norm of the residual when exiting the Newton solver.
+     */
+    struct Status : public TimeStepping<VectorType>::Status
+    {
+      runge_kutta_method method;
+      unsigned int       n_iterations;
+      double             norm_residual;
+    };
+
+    /**
+     * Return the status of the current object.
+     */
+    const Status &get_status() const;
+
+  private:
+    /**
+     * Compute the different stages needed.
+     */
+    void compute_stages
+    (std_cxx11::function<VectorType (const double, const VectorType &)>               f,
+     std_cxx11::function<VectorType (const double, const double, const VectorType &)> id_minus_tau_J_inverse,
+     double                                                                           t,
+     double                                                                           delta_t,
+     VectorType                                                                       &y,
+     std::vector<VectorType> &f_stages);
+
+    /**
+     * Newton solver used for the implicit stages.
+     */
+    void newton_solve(std_cxx11::function<void (const VectorType &,VectorType &)> get_residual,
+                      std_cxx11::function<VectorType (const VectorType &)>        id_minus_tau_J_inverse,
+                      VectorType                                                  &y);
+
+    /**
+     * Compute the residual needed by the Newton solver.
+     */
+    void compute_residual(std_cxx11::function<VectorType (const double, const VectorType &)> f,
+                          double                                                             t,
+                          double                                                             delta_t,
+                          const VectorType                                                   &old_y,
+                          const VectorType                                                   &y,
+                          VectorType                                                         &tendency,
+                          VectorType                                                         &residual) const;
+
+    /**
+     * When using SDIRK, there is no need to compute the linear combination of
+     * the stages. Thus, when this flag is true, the linear combination is
+     * skipped.
+     */
+    bool skip_linear_combi;
+
+    /**
+     * Maximum number of iterations of the Newton solver.
+     */
+    unsigned int max_it;
+
+    /**
+     * Tolerance of the Newton solver.
+     */
+    double tolerance;
+
+    /**
+     * Status structure of the object.
+     */
+    Status status;
+  };
+
+
+
+  /**
+   * This is class is derived from RungeKutta and implement embedded explicit
+   * methods.
+   */
+  template <typename VectorType>
+  class EmbeddedExplicitRungeKutta : public RungeKutta<VectorType>
+  {
+  public:
+    using RungeKutta<VectorType>::evolve_one_time_step;
+
+    /**
+     * Default constructor. initialize(runge_kutta_method) and
+     * set_time_adaptation_parameters(double, double, double, double, double,
+     * double) need to be called before the object can be used.
+     */
+    EmbeddedExplicitRungeKutta() {}
+
+    /**
+     * Constructor. This function calls initialize(runge_kutta_method) and
+     * initialize the parameters needed for time adaptation.
+     */
+    EmbeddedExplicitRungeKutta(runge_kutta_method method,
+                               double coarsen_param = 1.2,
+                               double refine_param = 0.8,
+                               double min_delta = 1e-14,
+                               double max_delta = 1e100,
+                               double refine_tol = 1e-8,
+                               double coarsen_tol = 1e-12);
+
+    /**
+     * Destructor.
+     */
+    ~EmbeddedExplicitRungeKutta()
+    {
+      free_memory();
+    }
+
+    /**
+     * If necessary, deallocate memory allocated by the object.
+     */
+    void free_memory();
+
+    /**
+     * Initialize the embedded explicit Runge-Kutta method.
+     */
+    void initialize(runge_kutta_method method);
+
+    /**
+     * This function is used to advance from time @p t to t+ @p delta_t. @p f
+     * is the function $ f(t,y) $ that should be integrated, the input
+     * parameters are the time t and the vector y and the output is value of f
+     * at this point. @p id_minus_tau_J_inverse is a function that computes $
+     * inv(I-\tau J)$ where $ I $ is the identity matrix, $ \tau $ is given,
+     * and $ J $ is the Jacobian $ \frac{\partial J}{\partial y} $. The input
+     * parameters are the time, $ \tau $, and a vector. The output is the
+     * value of function at this point. evolve_one_time_step returns the time
+     * at the end of the time step.
+     */
+    double evolve_one_time_step
+    (std_cxx11::function<VectorType (const double, const VectorType &)>               f,
+     std_cxx11::function<VectorType (const double, const double, const VectorType &)> id_minus_tau_J_inverse,
+     double                                                                           t,
+     double                                                                           delta_t,
+     VectorType &y);
+
+    /**
+     * This function is used to advance from time @p t to t+ @p delta_t. This
+     * function is similar to the one derived from TimeStepping, but does not
+     * required id_minus_tau_J_inverse because it is not used for explicit
+     * methods. evolve_one_time_step returns the time at the end of the time
+     * step.
+     */
+    double evolve_one_time_step
+    (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+     double                                                             t,
+     double                                                             delta_t,
+     VectorType &y);
+
+    /**
+     * Set the parameters necessary for the time adaptation.
+     */
+    void set_time_adaptation_parameters(double coarsen_param,
+                                        double refine_param,
+                                        double min_delta,
+                                        double max_delta,
+                                        double refine_tol,
+                                        double coarsen_tol);
+
+    /**
+     * Structure that stores the name of the method, the reason to exit
+     * evolve_one_time_step, the number of iteration inside n_iterations, a
+     * guess of what the next time step should be, and an estimate of the norm
+     * of the error.
+     */
+    struct Status : public TimeStepping<VectorType>::Status
+    {
+      runge_kutta_method method;
+      embedded_runge_kutta_time_step exit_delta_t;
+      unsigned int n_iterations;
+      double delta_t_guess;
+      double error_norm;
+    };
+
+    /**
+     * Return the status of the current object.
+     */
+    const Status &get_status() const;
+
+  private:
+    /**
+     * Compute the different stages needed.
+     */
+    void compute_stages(std_cxx11::function<VectorType (const double, const VectorType &)> f,
+                        const double                                                       t,
+                        const double                                                       delta_t,
+                        const VectorType                                                   &y,
+                        std::vector<VectorType>                                            &f_stages);
+
+    /**
+     * This parameter is the factor (>1) by which the time step is multiplied
+     * when the time stepping can be coarsen.
+     */
+    double coarsen_param;
+
+    /**
+     * This parameter is the factor (<1) by which the time step is multiplied
+     * when the time stepping must be refined.
+     */
+    double refine_param;
+
+    /**
+     * Smallest time step allowed.
+     */
+    double min_delta_t;
+
+    /**
+     * Largest time step allowed.
+     */
+    double max_delta_t;
+
+    /**
+     * Refinement tolerance: if the error estimate is larger than refine_tol,
+     * the time step is refined.
+     */
+    double refine_tol;
+
+    /**
+     * Coarsening tolerance: if the error estimate is smaller than coarse_tol,
+     * the time step is coarsen.
+     */
+    double coarsen_tol;
+
+    /**
+     * If the flag is true, the last stage is the same as the first stage and
+     * one evaluation of f can be saved.
+     */
+    bool last_same_as_first;
+
+    /**
+     * Butcher tableau coefficients.
+     */
+    std::vector<double> b1;
+
+    /**
+     * Butcher tableau coefficients.
+     */
+    std::vector<double> b2;
+
+    /**
+     * If the last_same_as_first flag is set to true, the last stage is saved
+     * and reused as the first stage of the next time step.
+     */
+    VectorType *last_stage;
+
+    /**
+     * Status structure of the object.
+     */
+    Status status;
+  };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/time_stepping.templates.h b/include/deal.II/base/time_stepping.templates.h
new file mode 100644
index 0000000..d36af60
--- /dev/null
+++ b/include/deal.II/base/time_stepping.templates.h
@@ -0,0 +1,837 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__time_stepping_templates_h
+#define dealii__time_stepping_templates_h
+
+#include <deal.II/base/std_cxx11/bind.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/time_stepping.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TimeStepping
+{
+  // ----------------------------------------------------------------------
+  // RungeKutta
+  // ----------------------------------------------------------------------
+
+  template <typename VectorType>
+  double RungeKutta<VectorType>::evolve_one_time_step(
+    std::vector<std_cxx11::function<VectorType (const double, const VectorType &)> > &F,
+    std::vector<std_cxx11::function<VectorType (const double, const double, const VectorType &)> > &J_inverse,
+
+    double t,
+    double delta_t,
+    VectorType &y)
+  {
+    AssertThrow(F.size()==0,
+                ExcMessage("RungeKutta methods cannot handle more that one function to integate."));
+    AssertThrow(J_inverse.size()==0,
+                ExcMessage("RungeKutta methods cannot handle more that one function to integate."));
+
+    return evolve_one_time_step(F[0],J_inverse[0],t,delta_t,y);
+  }
+
+
+
+  // ----------------------------------------------------------------------
+  // ExplicitRungeKutta
+  // ----------------------------------------------------------------------
+
+  template <typename VectorType>
+  ExplicitRungeKutta<VectorType>::ExplicitRungeKutta(runge_kutta_method method)
+  {
+    initialize(method);
+  }
+
+
+
+  template <typename VectorType>
+  void ExplicitRungeKutta<VectorType>::initialize(runge_kutta_method method)
+  {
+    status.method = method;
+
+    switch (method)
+      {
+      case (FORWARD_EULER) :
+      {
+        this->n_stages = 1;
+        this->b.push_back(1.0);
+        this->c.push_back(0.0);
+
+        break;
+      }
+      case (RK_THIRD_ORDER) :
+      {
+        this->n_stages = 3;
+        this->b.reserve(this->n_stages);
+        this->c.reserve(this->n_stages);
+        this->b.push_back(1.0/6.0);
+        this->b.push_back(2.0/3.0);
+        this->b.push_back(1.0/6.0);
+        this->c.push_back(0.0);
+        this->c.push_back(0.5);
+        this->c.push_back(1.0);
+        std::vector<double> tmp;
+        this->a.push_back(tmp);
+        tmp.resize(1);
+        tmp[0] = 0.5;
+        this->a.push_back(tmp);
+        tmp.resize(2);
+        tmp[0] = -1.0;
+        tmp[1] = 2.0;
+        this->a.push_back(tmp);
+
+        break;
+      }
+      case (RK_CLASSIC_FOURTH_ORDER) :
+      {
+        this->n_stages = 4;
+        this->b.reserve(this->n_stages);
+        this->c.reserve(this->n_stages);
+        std::vector<double> tmp;
+        this->a.push_back(tmp);
+        tmp.resize(1);
+        tmp[0] = 0.5;
+        this->a.push_back(tmp);
+        tmp.resize(2);
+        tmp[0] = 0.0;
+        tmp[1] = 0.5;
+        this->a.push_back(tmp);
+        tmp.resize(3);
+        tmp[1] = 0.0;
+        tmp[2] = 1.0;
+        this->a.push_back(tmp);
+        this->b.push_back(1.0/6.0);
+        this->b.push_back(1.0/3.0);
+        this->b.push_back(1.0/3.0);
+        this->b.push_back(1.0/6.0);
+        this->c.push_back(0.0);
+        this->c.push_back(0.5);
+        this->c.push_back(0.5);
+        this->c.push_back(1.0);
+
+        break;
+      }
+      default :
+      {
+        AssertThrow(false,ExcMessage("Unimplemented explicit Runge-Kutta method."));
+      }
+      }
+  }
+
+
+
+  template <typename VectorType>
+  double ExplicitRungeKutta<VectorType>::evolve_one_time_step
+  (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+   std_cxx11::function<VectorType (const double, const double, const VectorType &)> /*id_minus_tau_J_inverse*/,
+   double                                                             t,
+   double                                                             delta_t,
+   VectorType                                                         &y)
+  {
+    return evolve_one_time_step(f,t,delta_t,y);
+  }
+
+
+
+  template <typename VectorType>
+  double ExplicitRungeKutta<VectorType>::evolve_one_time_step
+  (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+   double                                                             t,
+   double                                                             delta_t,
+   VectorType                                                         &y)
+  {
+    std::vector<VectorType> f_stages(this->n_stages,y);
+    // Compute the different stages needed.
+    compute_stages(f,t,delta_t,y,f_stages);
+
+    // Linear combinations of the stages.
+    for (unsigned int i=0; i<this->n_stages; ++i)
+      y.sadd(1.,delta_t *this->b[i],f_stages[i]);
+
+    return (t+delta_t);
+  }
+
+
+
+  template <typename VectorType>
+  const typename ExplicitRungeKutta<VectorType>::Status &ExplicitRungeKutta<VectorType>::get_status() const
+  {
+    return status;
+  }
+
+
+
+  template <typename VectorType>
+  void ExplicitRungeKutta<VectorType>::compute_stages
+  (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+   const double                                                       t,
+   const double                                                       delta_t,
+   const VectorType                                                   &y,
+   std::vector<VectorType>                                            &f_stages) const
+  {
+    for (unsigned int i=0; i<this->n_stages; ++i)
+      {
+        VectorType Y(y);
+        for (unsigned int j=0; j<i; ++j)
+          Y.sadd(1.,delta_t *this->a[i][j],f_stages[j]);
+        // Evaluate the function f at the point (t+c[i]*delta_t,Y).
+        f_stages[i] = f(t+this->c[i]*delta_t,Y);
+      }
+  }
+
+
+
+  // ----------------------------------------------------------------------
+  // ImplicitRungeKutta
+  // ----------------------------------------------------------------------
+
+  template <typename VectorType>
+  ImplicitRungeKutta<VectorType>::ImplicitRungeKutta(runge_kutta_method method,
+                                                     unsigned int max_it,
+                                                     double tolerance)
+    :
+    RungeKutta<VectorType> (),
+    skip_linear_combi(false),
+    max_it(max_it),
+    tolerance(tolerance)
+  {
+    initialize(method);
+  }
+
+
+
+  template <typename VectorType>
+  void ImplicitRungeKutta<VectorType>::initialize(runge_kutta_method method)
+  {
+    status.method = method;
+
+    switch (method)
+      {
+      case (BACKWARD_EULER) :
+      {
+        this->n_stages = 1;
+        this->a.push_back(std::vector<double>(1, 1.0));
+        this->b.push_back(1.0);
+        this->c.push_back(1.0);
+
+        break;
+      }
+      case (IMPLICIT_MIDPOINT) :
+      {
+        this->a.push_back(std::vector<double>(1, 0.5));
+        this->b.push_back(1.0);
+        this->c.push_back(0.5);
+        this->n_stages = 1;
+
+        break;
+      }
+      case (CRANK_NICOLSON) :
+      {
+        this->n_stages = 2;
+        this->b.reserve(this->n_stages);
+        this->c.reserve(this->n_stages);
+        this->a.push_back(std::vector<double>(1, 0.0));
+        this->a.push_back(std::vector<double>(2, 0.5));
+        this->b.push_back(0.5);
+        this->b.push_back(0.5);
+        this->c.push_back(0.0);
+        this->c.push_back(1.0);
+
+        break;
+      }
+      case (SDIRK_TWO_STAGES) :
+      {
+        this->n_stages = 2;
+        this->b.reserve(this->n_stages);
+        this->c.reserve(this->n_stages);
+        double const gamma = 1.0 - 1.0 / std::sqrt(2.0);
+        this->b.push_back(1.0 - gamma);
+        this->b.push_back(gamma);
+        this->a.push_back(std::vector<double>(1, gamma));
+        this->a.push_back(this->b);
+        this->c.push_back(gamma);
+        this->c.push_back(1.0);
+
+        break;
+      }
+      default :
+      {
+        AssertThrow(false,ExcMessage("Unimplemented implicit Runge-Kutta method."));
+      }
+      }
+  }
+
+
+
+  template <typename VectorType>
+  double ImplicitRungeKutta<VectorType>::evolve_one_time_step
+  (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+   std_cxx11::function<VectorType (const double, const double, const VectorType &)> id_minus_tau_J_inverse,
+   double                                                             t,
+   double                                                             delta_t,
+   VectorType                                                         &y)
+  {
+    VectorType old_y(y);
+    std::vector<VectorType> f_stages(this->n_stages,y);
+    // Compute the different stages needed.
+    compute_stages(f,id_minus_tau_J_inverse,t,delta_t,y,f_stages);
+
+    // If necessary, compute the linear combinations of the stages.
+    if (skip_linear_combi==false)
+      {
+        y = old_y;
+        for (unsigned int i=0; i<this->n_stages; ++i)
+          y.sadd(1.,delta_t *this->b[i],f_stages[i]);
+      }
+
+    return (t+delta_t);
+  }
+
+
+
+  template <typename VectorType>
+  void ImplicitRungeKutta<VectorType>::set_newton_solver_parameters(unsigned int max_it_, double tolerance_)
+  {
+    max_it = max_it_;
+    tolerance = tolerance_;
+  }
+
+
+
+  template <typename VectorType>
+  const typename ImplicitRungeKutta<VectorType>::Status &ImplicitRungeKutta<VectorType>::get_status() const
+  {
+    return status;
+  }
+
+
+
+  template <typename VectorType>
+  void ImplicitRungeKutta<VectorType>::compute_stages(
+    std_cxx11::function<VectorType (const double, const VectorType &)> f,
+    std_cxx11::function<VectorType (const double, const double, const VectorType &)> id_minus_tau_J_inverse,
+    double t,
+    double delta_t,
+    VectorType &y,
+    std::vector<VectorType> &f_stages)
+  {
+    VectorType z(y);
+    for (unsigned int i=0; i<this->n_stages; ++i)
+      {
+        VectorType old_y(z);
+        for (unsigned int j=0; j<i; ++j)
+          old_y.sadd(1.,delta_t *this->a[i][j],f_stages[j]);
+
+        // Solve the nonlinear system using Newton's method
+        const double new_t = t+this->c[i]*delta_t;
+        const double new_delta_t = this->a[i][i]*delta_t;
+        newton_solve(std_cxx11::bind(&ImplicitRungeKutta<VectorType>::compute_residual,this,f,new_t,new_delta_t,
+                                     std_cxx11::cref(old_y),std_cxx11::_1,std_cxx11::ref(f_stages[i]),std_cxx11::_2),
+                     std_cxx11::bind(id_minus_tau_J_inverse,new_t,new_delta_t,std_cxx11::_1),y);
+      }
+  }
+
+
+
+  template <typename VectorType>
+  void ImplicitRungeKutta<VectorType>::newton_solve(
+    std_cxx11::function<void (const VectorType &,VectorType &)> get_residual,
+    std_cxx11::function<VectorType (const VectorType &)> id_minus_tau_J_inverse,
+    VectorType &y)
+  {
+    VectorType residual(y);
+    get_residual(y,residual);
+    unsigned int i=0;
+    const double initial_residual_norm = residual.l2_norm();
+    double norm_residual = initial_residual_norm;
+    while (i<max_it)
+      {
+        y.sadd(1.0,-1.0,id_minus_tau_J_inverse(residual));
+        get_residual(y,residual);
+        norm_residual = residual.l2_norm();
+        if (norm_residual < tolerance)
+          break;
+        ++i;
+      }
+    status.n_iterations = i+1;
+    status.norm_residual = norm_residual;
+  }
+
+
+
+  template <typename VectorType>
+  void ImplicitRungeKutta<VectorType>::compute_residual
+  (std_cxx11::function<VectorType (const double, const VectorType &)> f,
+   double                                                             t,
+   double                                                             delta_t,
+   const VectorType                                                   &old_y,
+   const VectorType                                                   &y,
+   VectorType                                                         &tendency,
+   VectorType                                                         &residual) const
+  {
+    // The tendency is stored to save one evaluation of f.
+    tendency = f(t,y);
+    residual = tendency;
+    residual.sadd(delta_t,1.0,old_y);
+    residual.sadd(-1.0,1.,y);
+  }
+
+
+
+  // ----------------------------------------------------------------------
+  // EmbeddedExplicitRungeKutta
+  // ----------------------------------------------------------------------
+
+  template <typename VectorType>
+  EmbeddedExplicitRungeKutta<VectorType>::EmbeddedExplicitRungeKutta
+  (runge_kutta_method method,
+   double             coarsen_param,
+   double             refine_param,
+   double             min_delta,
+   double             max_delta,
+   double             refine_tol,
+   double             coarsen_tol)
+    :
+    coarsen_param(coarsen_param),
+    refine_param(refine_param),
+    min_delta_t(min_delta),
+    max_delta_t(max_delta),
+    refine_tol(refine_tol),
+    coarsen_tol(coarsen_tol),
+    last_same_as_first(false),
+    last_stage(NULL)
+  {
+    initialize(method);
+  }
+
+
+
+  template <typename VectorType>
+  void EmbeddedExplicitRungeKutta<VectorType>::initialize(runge_kutta_method method)
+  {
+    status.method = method;
+
+    switch (method)
+      {
+      case (HEUN_EULER) :
+      {
+        this->n_stages = 2;
+        this->a.push_back(std::vector<double>());
+        this->a.push_back(std::vector<double>(1, 1.0));
+        this->c.push_back(0.0);
+        this->c.push_back(1.0);
+        b1.push_back(0.5);
+        b1.push_back(0.5);
+        b2.push_back(1.0);
+        b2.push_back(0.0);
+
+        break;
+      }
+      case (BOGACKI_SHAMPINE) :
+      {
+        last_same_as_first = true;
+        this->n_stages = 4;
+        this->c.reserve(this->n_stages);
+        this->b1.reserve(this->n_stages);
+        this->b2.reserve(this->n_stages);
+        std::vector<double> tmp;
+        this->a.push_back(tmp);
+        tmp.resize(1);
+        tmp[0] = 0.5;
+        this->a.push_back(tmp);
+        tmp.resize(2);
+        tmp[0] = 0.0;
+        tmp[1] = 0.75;
+        this->a.push_back(tmp);
+        tmp.resize(3);
+        tmp[0] = 2.0/9.0;
+        tmp[1] = 1.0/3.0;
+        tmp[2] = 4.0/9.0;
+        this->a.push_back(tmp);
+        this->c.push_back(0.0);
+        this->c.push_back(0.5);
+        this->c.push_back(0.75);
+        this->c.push_back(1.0);
+        this->b1.push_back(2.0/9.0);
+        this->b1.push_back(1.0/3.0);
+        this->b1.push_back(4.0/9.0);
+        this->b1.push_back(0.0);
+        this->b2.push_back(7.0/24.0);
+        this->b2.push_back(0.25);
+        this->b2.push_back(1.0/3.0);
+        this->b2.push_back(0.125);
+
+        break;
+      }
+      case (DOPRI) :
+      {
+        last_same_as_first = true;
+        this->n_stages = 7;
+        this->c.reserve(this->n_stages);
+        this->b1.reserve(this->n_stages);
+        this->b2.reserve(this->n_stages);
+        std::vector<double> tmp;
+        this->a.push_back(tmp);
+        tmp.resize(1);
+        tmp[0] = 1./5.;
+        this->a.push_back(tmp);
+        tmp.resize(2);
+        tmp[0] = 3./40.;
+        tmp[1] = 9./40.;
+        this->a.push_back(tmp);
+        tmp.resize(3);
+        tmp[0] = 44./45.;
+        tmp[1] = -56./15.;
+        tmp[2] = 32./9.;
+        this->a.push_back(tmp);
+        tmp.resize(4);
+        tmp[0] = 19372./6561.;
+        tmp[1] = -25360./2187.;
+        tmp[2] = 64448./6561.;
+        tmp[3] = -212./729.;
+        this->a.push_back(tmp);
+        tmp.resize(5);
+        tmp[0] = 9017./3168.;
+        tmp[1] = -355./33.;
+        tmp[2] = 46732./5247.;
+        tmp[3] = 49./176.;
+        tmp[4] = -5103./18656;
+        this->a.push_back(tmp);
+        tmp.resize(6);
+        tmp[0] = 35./384.;
+        tmp[1] = 0.;
+        tmp[2] = 500./1113.;
+        tmp[3] = 125./192.;
+        tmp[4] = -2187./6784.;
+        tmp[5] = 11./84.;
+        this->a.push_back(tmp);
+        this->c.push_back(0.);
+        this->c.push_back(1./5.);
+        this->c.push_back(3./10.);
+        this->c.push_back(4./5.);
+        this->c.push_back(8./9.);
+        this->c.push_back(1.);
+        this->c.push_back(1.);
+        this->b1.push_back(35./384.);
+        this->b1.push_back(0.);
+        this->b1.push_back(500./1113.);
+        this->b1.push_back(125./192.);
+        this->b1.push_back(-2187./6784.);
+        this->b1.push_back(11./84.);
+        this->b1.push_back(0.);
+        this->b2.push_back(5179./57600.);
+        this->b2.push_back(0.);
+        this->b2.push_back(7571./16695.);
+        this->b2.push_back(393./640.);
+        this->b2.push_back(-92097./339200.);
+        this->b2.push_back(187./2100.);
+        this->b2.push_back(1./40.);
+
+        break;
+      }
+      case (FEHLBERG) :
+      {
+        this->n_stages = 6;
+        this->c.reserve(this->n_stages);
+        this->b1.reserve(this->n_stages);
+        this->b2.reserve(this->n_stages);
+        std::vector<double> tmp;
+        this->a.push_back(tmp);
+        tmp.resize(1);
+        tmp[0] = 0.25;
+        this->a.push_back(tmp);
+        tmp.resize(2);
+        tmp[0] = 0.09375;
+        tmp[1] = 0.28125;
+        this->a.push_back(tmp);
+        tmp.resize(3);
+        tmp[0] = 1932.0/2197.0;
+        tmp[1] = -7200.0/2197.0;
+        tmp[2] = 7296.0/2197.0;
+        this->a.push_back(tmp);
+        tmp.resize(4);
+        tmp[0] = 439.0/216.0;
+        tmp[1] = -8.0;
+        tmp[2] = 3680.0/513.0;
+        tmp[3] = -845.0/4104.0;
+        this->a.push_back(tmp);
+        tmp.resize(5);
+        tmp[0] = -8.0/27.0;
+        tmp[1] = 2.0;
+        tmp[2] = -3544.0/2565.0;
+        tmp[3] = 1859.0/4104.0;
+        tmp[4] = -0.275;
+        this->a.push_back(tmp);
+        this->c.push_back(0.0);
+        this->c.push_back(0.25);
+        this->c.push_back(0.375);
+        this->c.push_back(12.0/13.0);
+        this->c.push_back(1.0);
+        this->c.push_back(0.5);
+        this->b1.push_back(16.0/135.0);
+        this->b1.push_back(0.0);
+        this->b1.push_back(6656.0/12825.0);
+        this->b1.push_back(28561.0/56430.0);
+        this->b1.push_back(-0.18);
+        this->b1.push_back(2.0/55.0);
+        this->b2.push_back(25.0/216.0);
+        this->b2.push_back(0.0);
+        this->b2.push_back(1408.0/2565.0);
+        this->b2.push_back(2197.0/4104.0);
+        this->b2.push_back(-0.2);
+        this->b2.push_back(0.0);
+
+        break;
+      }
+      case (CASH_KARP) :
+      {
+        this->n_stages = 6;
+        this->c.reserve(this->n_stages);
+        this->b1.reserve(this->n_stages);
+        this->b2.reserve(this->n_stages);
+        std::vector<double> tmp;
+        this->a.push_back(tmp);
+        tmp.resize(1);
+        tmp[0] = 0.2;
+        this->a.push_back(tmp);
+        tmp.resize(2);
+        tmp[0] = 0.075;
+        tmp[1] = 0.225;
+        this->a.push_back(tmp);
+        tmp.resize(3);
+        tmp[0] = 0.3;
+        tmp[1] = -0.9;
+        tmp[2] = 1.2;
+        this->a.push_back(tmp);
+        tmp.resize(4);
+        tmp[0] = -11.0/54.0;
+        tmp[1] = 2.5;
+        tmp[2] = -70.0/27.0;
+        tmp[3] = 35.0/27.0;
+        this->a.push_back(tmp);
+        tmp.resize(5);
+        tmp[0] = 1631.0/55296.0;
+        tmp[1] = 175.0/512.0;
+        tmp[2] = 575.0/13824.0;
+        tmp[3] = 44275.0/110592.0;
+        tmp[4] = 253.0/4096.0;
+        this->a.push_back(tmp);
+        this->c.push_back(0.0);
+        this->c.push_back(0.2);
+        this->c.push_back(0.3);
+        this->c.push_back(0.6);
+        this->c.push_back(1.0);
+        this->c.push_back(0.875);
+        this->b1.push_back(37.0/378.0);
+        this->b1.push_back(0.0);
+        this->b1.push_back(250.0/621.0);
+        this->b1.push_back(125.0/594.0);
+        this->b1.push_back(0.0);
+        this->b1.push_back(512.0/1771.0);
+        this->b2.push_back(2825.0/27648.0);
+        this->b2.push_back(0.0);
+        this->b2.push_back(18575.0/48384.0);
+        this->b2.push_back(13525.0/55296.0);
+        this->b2.push_back(277.0/14336.0);
+        this->b2.push_back(0.25);
+
+        break;
+      }
+      default :
+      {
+        AssertThrow(false,ExcMessage("Unimplemented Embedded Runge-Kutta method."));
+      }
+      }
+  }
+
+
+
+  template <typename VectorType>
+  void EmbeddedExplicitRungeKutta<VectorType>::free_memory()
+  {
+    if (last_stage!=NULL)
+      delete last_stage;
+
+    last_stage = NULL;
+  }
+
+
+
+  template <typename VectorType>
+  double EmbeddedExplicitRungeKutta<VectorType>::evolve_one_time_step(
+    std_cxx11::function<VectorType (const double, const VectorType &)> f,
+    std_cxx11::function<VectorType (const double, const double, const VectorType &)> /*id_minus_tau_J_inverse*/,
+    double t,
+    double delta_t,
+    VectorType &y)
+  {
+    return evolve_one_time_step(f,t,delta_t,y);
+  }
+
+
+
+  template <typename VectorType>
+  double EmbeddedExplicitRungeKutta<VectorType>::evolve_one_time_step(
+    std_cxx11::function<VectorType (const double, const VectorType &)> f,
+    double t, double delta_t, VectorType &y)
+  {
+    bool done = false;
+    unsigned int count = 0;
+    double error_norm = 0.;
+    VectorType old_y(y);
+    VectorType error(y);
+    std::vector<VectorType> f_stages(this->n_stages,y);
+
+    while (!done)
+      {
+        error = 0.;
+        y = old_y;
+        // Compute the different stages needed.
+        compute_stages(f,t,delta_t,y,f_stages);
+
+        for (unsigned int i=0; i<this->n_stages; ++i)
+          {
+            y.sadd(1.,delta_t *this->b1[i],f_stages[i]);
+            error.sadd(1.,delta_t *(b2[i]-b1[i]),f_stages[i]);
+          }
+
+        error_norm = error.l2_norm();
+        // Check if the norm of error is less than the coarsening tolerance
+        if (error_norm<coarsen_tol)
+          {
+            done = true;
+            // Increase the guessed time step
+            double new_delta_t = delta_t *coarsen_param;
+            // Check that the guessed time step is smaller than the maximum time
+            // step allowed.
+            if (new_delta_t>max_delta_t)
+              {
+                status.exit_delta_t = MAX_DELTA_T;
+                status.delta_t_guess =  max_delta_t;
+              }
+            else
+              {
+                status.exit_delta_t = DELTA_T;
+                status.delta_t_guess = delta_t;
+              }
+          }
+        // Check if the norm of error is less than the refining tolerance
+        else if (error_norm<refine_tol)
+          {
+            done = true;
+            status.exit_delta_t = DELTA_T;
+            status.delta_t_guess = delta_t;
+          }
+        else
+          {
+            // If the time step is already the smallest acceptable, exit.
+            if (delta_t==min_delta_t)
+              {
+                done = true;
+                status.exit_delta_t = MIN_DELTA_T;
+                status.delta_t_guess = delta_t;
+              }
+            // Reduce the time step.
+            else
+              {
+                delta_t *= refine_param;
+                if (delta_t<min_delta_t)
+                  delta_t = min_delta_t;
+              }
+          }
+
+        ++count;
+      }
+
+    // Save the last stage if necessary
+    if (last_same_as_first==true)
+      {
+        if (last_stage==NULL)
+          last_stage = new VectorType(f_stages.back());
+        else
+          *last_stage = f_stages.back();
+      }
+
+    status.n_iterations = count;
+    status.error_norm = error_norm;
+
+    return (t+delta_t);
+  }
+
+
+
+  template <typename VectorType>
+  void EmbeddedExplicitRungeKutta<VectorType>::set_time_adaptation_parameters(double coarsen_param_,
+      double refine_param_,
+      double min_delta_,
+      double max_delta_,
+      double refine_tol_,
+      double coarsen_tol_)
+  {
+    coarsen_param = coarsen_param_;
+    refine_param = refine_param_;
+    min_delta_t = min_delta_;
+    max_delta_t = max_delta_;
+    refine_tol = refine_tol_;
+    coarsen_tol = coarsen_tol_;
+  }
+
+
+
+  template <typename VectorType>
+  const typename EmbeddedExplicitRungeKutta<VectorType>::Status &EmbeddedExplicitRungeKutta<VectorType>::get_status() const
+  {
+    return status;
+  }
+
+
+  template <typename VectorType>
+  void EmbeddedExplicitRungeKutta<VectorType>::compute_stages(
+    std_cxx11::function<VectorType (const double, const VectorType &)> f,
+    const double t,
+    const double delta_t,
+    const VectorType &y,
+    std::vector<VectorType> &f_stages)
+  {
+    VectorType Y(y);
+    unsigned int i = 0;
+
+    // If the last stage is the same as the first, we can skip the evaluation
+    // of the first stage.
+    if (last_same_as_first==true)
+      {
+        if (last_stage!=NULL)
+          {
+            f_stages[0] = *last_stage;
+            i = 1;
+          }
+      }
+
+    for (; i<this->n_stages; ++i)
+      {
+        Y = y;
+        for (unsigned int j = 0; j < i; ++j)
+          Y.sadd(1.0,delta_t *this->a[i][j],f_stages[j]);
+        f_stages[i] = f(t+this->c[i]*delta_t,Y);
+      }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/timer.h b/include/deal.II/base/timer.h
new file mode 100644
index 0000000..a652929
--- /dev/null
+++ b/include/deal.II/base/timer.h
@@ -0,0 +1,749 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__timer_h
+#define dealii__timer_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/utilities.h>
+
+#ifdef DEAL_II_WITH_MPI
+#  include <mpi.h>
+#endif
+
+#include <string>
+#include <list>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This is a very simple class which provides information about both the CPU
+ * time and the wallclock time elapsed since the timer was started last time.
+ * Information is retrieved from the system on the basis of clock cycles since
+ * last time the computer was booted for the CPU time. The wall time is based
+ * on the system clock accessed by @p gettimeofday, with a typical accuracy of
+ * 0.01 ms on linux systems.
+ *
+ *
+ * <h3>Usage</h3>
+ *
+ * Use of this class is as you might expect by looking at the member
+ * functions:
+ * @code
+ *   Timer timer;
+ *   timer.start ();
+ *
+ *   // do some complicated computations here
+ *   ...
+ *
+ *   timer.stop ();
+ *
+ *   std::cout << "Elapsed CPU time: " << timer() << " seconds.";
+ *   std::cout << "Elapsed wall time: " << timer.wall_time() << " seconds.";
+ *
+ *   // reset timer for the next thing it shall do
+ *   timer.reset();
+ * @endcode
+ *
+ * Alternatively, you can also restart the timer instead of resetting it. The
+ * times between successive calls to start() / stop() will then be
+ * accumulated. The usage of this class is also explained in the step-28,
+ * step-29 and step-30 tutorial programs.
+ *
+ * @note Implementation of this class is system dependent. In case
+ * multithreaded routines (matrix-vector products, error estimators, etc.) are
+ * used, the CPU time is accumulated from all the children.
+ *
+ * @ingroup utilities
+ * @author G. Kanschat, W. Bangerth, M. Kronbichler
+ */
+class Timer
+{
+public:
+  /**
+   * Constructor. Starts the timer at 0 sec.
+   */
+  Timer ();
+
+#ifdef DEAL_II_WITH_MPI
+  /**
+   * Constructor that takes an MPI communicator as input. A timer constructed
+   * this way will sum up the CPU times over all processors in the MPI network
+   * when requested by the operator ().
+   *
+   * Starts the timer at 0 sec.
+   *
+   * If @p sync_wall_time is true, the wall time is synchronized between all
+   * CPUs using a MPI_Barrier() and a collective operation. Note that this
+   * only works if you stop() the timer before querying for the wall time. The
+   * time for the MPI operations are not included in the timing but may slow
+   * down your program.
+   *
+   * This constructor is only available if the deal.II compiler is an MPI
+   * compiler.
+   */
+  Timer (MPI_Comm mpi_communicator,
+         const bool sync_wall_time = false);
+
+  /**
+   * Returns a reference to the data structure with global timing information.
+   * Filled after calling stop().
+   */
+  const Utilities::MPI::MinMaxAvg &get_data() const;
+
+  /**
+   * Prints the data to the given stream.
+   */
+  template <class StreamType>
+  void print_data(StreamType &stream) const;
+
+
+#endif
+
+  /**
+   * Re-start the timer at the point where it was stopped. This way a
+   * cumulative measurement of time is possible.
+   */
+  void start ();
+
+  /**
+   * Sets the current time as next starting time and return the elapsed time
+   * in seconds.
+   */
+  double stop ();
+
+  /**
+   * Stop the timer if necessary and reset the elapsed time to zero.
+   */
+  void reset ();
+
+  /**
+   * Resets the elapsed time to zero and starts the timer. This corresponds to
+   * calling @p reset() and @p start() on the Timer object.
+   */
+  void restart();
+
+  /**
+   * Access to the current CPU time without disturbing time measurement. The
+   * elapsed time is returned in units of seconds.
+   */
+  double operator() () const;
+
+  /**
+   * Access to the current wall time without disturbing time measurement. The
+   * elapsed time is returned in units of seconds.
+   */
+  double wall_time () const;
+
+  /**
+   * Returns the last lap time; the time taken between the last start()/stop()
+   * call.
+   */
+  double get_lap_time () const;
+
+private:
+
+  /**
+   * Value of the user time when start() was called the last time or when the
+   * object was created and no stop() was issued in between.
+   */
+  double              start_time;
+
+
+  /**
+   * Similar to #start_time, but needed for children threads in multithread
+   * mode. Value of the user time when start() was called the last time or
+   * when the object was created and no stop() was issued in between.
+   *
+   * For some reason (error in operating system?) the function call
+   * <tt>getrusage(RUSAGE_CHILDREN,.)</tt> gives always 0 (at least on
+   * Solaris7). Hence the Timer class still does not yet work for
+   * multithreading mode.
+   */
+  double              start_time_children;
+
+  /**
+   * Value of the wall time when start() was called the last time or when the
+   * object was created and no stop() was issued in between.
+   */
+  double              start_wall_time;
+
+  /**
+   * Accumulated time for all previous start()/stop() cycles. The time for the
+   * present cycle is not included.
+   */
+  double              cumulative_time;
+
+  /**
+   * Accumulated wall time for all previous start()/stop() cycles. The wall
+   * time for the present cycle is not included.
+   */
+  double              cumulative_wall_time;
+
+  /**
+   * Stores the last lap time; the time between the last start()/stop() cycle.
+   */
+  double              last_lap_time;
+
+  /**
+   * Store whether the timer is presently running.
+   */
+  bool                running;
+
+  /**
+   * Store whether the timer is presently running.
+   */
+  MPI_Comm            mpi_communicator;
+
+#ifdef DEAL_II_WITH_MPI
+  /**
+   * Store whether the wall time is synchronized between machines.
+   */
+  bool sync_wall_time;
+
+  /**
+   * A structure for parallel wall time measurement that includes the minimum
+   * time recorded among all processes, the maximum time as well as the
+   * average time defined as the sum of all individual times divided by the
+   * number of MPI processes in the MPI_Comm.
+   */
+  Utilities::MPI::MinMaxAvg mpi_data;
+#endif
+};
+
+
+
+//TODO: The following class is not thread-safe
+/**
+ * This class can be used to generate formatted output from time measurements
+ * of different subsections in a program. It is possible to create several
+ * sections that perform certain aspects of the program. A section can be
+ * entered several times. By changing the options in OutputFrequency and
+ * OutputType, the user can choose whether output should be generated every
+ * time a section is joined or just in the end of the program. Moreover, it is
+ * possible to show CPU times, wall times or both.
+ *
+ * <h3>Usage</h3>
+ *
+ * Use of this class could be as follows:
+ * @code
+ *   TimerOutput timer (std::cout, TimerOutput::summary,
+ *                      TimerOutput::wall_times);
+ *
+ *   timer.enter_subsection ("Setup dof system");
+ *   setup_dofs();
+ *   timer.leave_subsection();
+ *
+ *   timer.enter_subsection ("Assemble");
+ *   assemble_system_1();
+ *   timer.leave_subsection();
+ *
+ *   timer.enter_subsection ("Solve");
+ *   solve_system_1();
+ *   timer.leave_subsection();
+ *
+ *   timer.enter_subsection ("Assemble");
+ *   assemble_system_2();
+ *   timer.leave_subsection();
+ *
+ *   timer.enter_subsection ("Solve");
+ *   solve_system_2();
+ *   timer.leave_subsection();
+ *
+ *   // do something else...
+ * @endcode
+ * When run, this program will return an output like this:
+ * @code
+ * +---------------------------------------------+------------+------------+
+ * | Total wallclock time elapsed since start    |      88.8s |            |
+ * |                                             |            |            |
+ * | Section                         | no. calls |  wall time | % of total |
+ * +---------------------------------+-----------+------------+------------+
+ * | Assemble                        |         2 |      19.7s |        22% |
+ * | Solve                           |         2 |      3.03s |       3.4% |
+ * | Setup dof system                |         1 |      3.97s |       4.5% |
+ * +---------------------------------+-----------+------------+------------+
+ * @endcode
+ * The output will see that we entered the assembly and solve section twice,
+ * and reports how much time we spent there. Moreover, the class measures the
+ * total time spent from start to termination of the TimerOutput object. In
+ * this case, we did a lot of other stuff, so that the time proportions of the
+ * functions we measured are far away from 100 percent.
+ *
+ *
+ * <h3>Using scoped timers</h3>
+ *
+ * The scheme above where you have to have calls to
+ * TimerOutput::enter_subsection() and TimerOutput::leave_subsection() is
+ * awkward if the sections in between these calls contain <code>return</code>
+ * statements or may throw exceptions. In that case, it is easy to forget that
+ * one nevertheless needs to leave the section somehow, somewhere. An easier
+ * approach is to use "scoped" sections. This is a variable that when you
+ * create it enters a section, and leaves the section when you destroy it. If
+ * this is a variable local to a particular scope (a code block between curly
+ * braces) and you leave this scope due to a <code>return</code> statements or
+ * an exception, then the variable is destroyed and the timed section is left
+ * automatically. Consequently, we could have written the code piece above as
+ * follows, with exactly the same result but now exception-safe:
+ * @code
+ *   TimerOutput timer (std::cout, TimerOutput::summary,
+ *                      TimerOutput::wall_times);
+ *
+ *   {
+ *     TimerOutput::Scope timer_section(timer, "Setup dof system");
+ *     setup_dofs();
+ *   }
+ *
+ *   {
+ *     TimerOutput::Scope timer_section(timer, "Assemble");
+ *     assemble_system_1();
+ *   }
+ *
+ *   {
+ *     TimerOutput::Scope timer_section(timer, "Solve");
+ *     solve_system_1();
+ *   }
+ *
+ *   {
+ *     TimerOutput::Scope timer_section(timer, "Assemble");
+ *     assemble_system_2();
+ *   }
+ *
+ *   {
+ *     TimerOutput::Scope timer_section(timer, "Solve");
+ *     solve_system_2();
+ *   }
+ *
+ *   // do something else...
+ * @endcode
+ *
+ *
+ * <h3>Usage in parallel programs using MPI</h3>
+ *
+ * In a parallel program built on MPI, using the class in a way such as the
+ * one shown above would result in a situation where each process times the
+ * corresponding sections and then outputs the resulting timing information at
+ * the end. This is annoying since you'd get a lot of output -- one set of
+ * timing information from each processor.
+ *
+ * This can be avoided by only letting one processor generate screen output,
+ * typically by using an object of type ConditionalOStream instead of
+ * <code>std::cout</code> to write to screen (see, for example, step-17,
+ * step-18, step-32 and step-40, all of which use this method).
+ *
+ * This way, only a single processor outputs timing information, typically the
+ * first process in the MPI universe. However, if you take the above code
+ * snippet as an example, imagine what would happen if
+ * <code>setup_dofs()</code> is fast on processor zero and slow on at least
+ * one of the other processors; and if the first thing
+ * <code>assemble_system_1()</code> does is something that requires all
+ * processors to communicate. In this case, on processor zero, the timing
+ * section with name <code>"Setup dof system"</code> will yield a short run
+ * time on processor zero, whereas the section <code> "Assemble"</code> will
+ * take a long time: not because <code>assemble_system_1()</code> takes a
+ * particularly long time, but because on the processor on which we time (or,
+ * rather, the one on which we generate output) happens to have to wait for a
+ * long time till the other processor is finally done with
+ * <code>setup_dofs()</code> and starts to participate in
+ * <code>assemble_system_1()</code>. In other words, the timing that is
+ * reported is unreliable because it reflects run times from other processors.
+ * Furthermore, the run time of this section on processor zero has nothing to
+ * do with the run time of the section on other processors but instead with
+ * the run time of <i>the previous section</i> on another processor.
+ *
+ * The usual way to avoid this is to introduce a barrier into the parallel
+ * code just before we start and stop timing sections. This ensures that all
+ * processes are at the same place and the timing information then reflects
+ * the maximal run time across all processors. To achieve this, you need to
+ * initialize the TimerOutput object with an MPI communicator object, for
+ * example as in the following code:
+ * @code
+ *   TimerOutput timer (MPI_COMM_WORLD,
+ *                      pcout,
+ *                      TimerOutput::summary,
+ *                      TimerOutput::wall_times);
+ * @endcode
+ * Here, <code>pcout</code> is an object of type ConditionalOStream that makes
+ * sure that we only generate output on a single processor. See the step-32
+ * and step-40 tutorial programs for this kind of usage of this class.
+ *
+ * @ingroup utilities
+ * @author M. Kronbichler, 2009.
+ */
+class TimerOutput
+{
+public:
+  /**
+   * Helper class to enter/exit sections in TimerOutput be constructing a
+   * simple scope-based object. The purpose of this class is explained in the
+   * documentation of TimerOutput.
+   */
+  class Scope
+  {
+  public:
+    /**
+     * Enter the given section in the timer. Exit automatically when calling
+     * stop() or destructor runs.
+     */
+    Scope(dealii::TimerOutput &timer_, const std::string &section_name);
+
+    /**
+     * Destructor calls stop()
+     */
+    ~Scope();
+
+    /**
+     * In case you want to exit the scope before the destructor is executed,
+     * call this function.
+     */
+    void stop();
+
+  private:
+    /**
+     * Reference to the TimerOutput object
+     */
+    dealii::TimerOutput &timer;
+    /**
+     * Do we still need to exit the section we are in?
+     */
+    bool in;
+  };
+
+  /**
+   * An enumeration data type that describes whether to generate output every
+   * time we exit a section, just in the end, both, or never.
+   */
+  enum OutputFrequency
+  {
+    every_call,
+    summary,
+    every_call_and_summary,
+    never
+  };
+
+  /**
+   * An enumeration data type that describes whether to show CPU times, wall
+   * times, or both CPU and wall times whenever we generate output.
+   */
+  enum OutputType
+  {
+    cpu_times,
+    wall_times,
+    cpu_and_wall_times
+  };
+
+  /**
+   * Constructor.
+   *
+   * @param stream The stream (of type std::ostream) to which output is
+   * written.
+   * @param output_frequency A variable indicating when output is to be
+   * written to the given stream.
+   * @param output_type A variable indicating what kind of timing the output
+   * should represent (CPU or wall time).
+   */
+  TimerOutput (std::ostream              &stream,
+               const enum OutputFrequency output_frequency,
+               const enum OutputType      output_type);
+
+  /**
+   * Constructor.
+   *
+   * @param stream The stream (of type ConditionalOstream) to which output is
+   * written.
+   * @param output_frequency A variable indicating when output is to be
+   * written to the given stream.
+   * @param output_type A variable indicating what kind of timing the output
+   * should represent (CPU or wall time).
+   */
+  TimerOutput (ConditionalOStream        &stream,
+               const enum OutputFrequency output_frequency,
+               const enum OutputType      output_type);
+
+#ifdef DEAL_II_WITH_MPI
+  /**
+   * Constructor that takes an MPI communicator as input. A timer constructed
+   * this way will sum up the CPU times over all processors in the MPI network
+   * for calculating the CPU time, or take the maximum over all processors,
+   * depending on the value of @p output_type . See the documentation of this
+   * class for the rationale for this constructor and an example.
+   *
+   * @param mpi_comm An MPI communicator across which we should accumulate or
+   * otherwise synchronize the timing information we produce on every MPI
+   * process.
+   * @param stream The stream (of type std::ostream) to which output is
+   * written.
+   * @param output_frequency A variable indicating when output is to be
+   * written to the given stream.
+   * @param output_type A variable indicating what kind of timing the output
+   * should represent (CPU or wall time). In this parallel context, when this
+   * argument selects CPU time, then times are accumulated over all processes
+   * participating in the MPI communicator. If this argument selects wall
+   * time, then reported times are the maximum over all processors' run times
+   * for this section. (The latter is computed by placing an
+   * <code>MPI_Barrier</code> call before starting and stopping the timer for
+   * each section.
+   */
+  TimerOutput (MPI_Comm                   mpi_comm,
+               std::ostream              &stream,
+               const enum OutputFrequency output_frequency,
+               const enum OutputType      output_type);
+
+  /**
+   * Constructor that takes an MPI communicator as input. A timer constructed
+   * this way will sum up the CPU times over all processors in the MPI network
+   * for calculating the CPU time, or take the maximum over all processors,
+   * depending on the value of @p output_type . See the documentation of this
+   * class for the rationale for this constructor and an example.
+   *
+   * @param mpi_comm An MPI communicator across which we should accumulate or
+   * otherwise synchronize the timing information we produce on every MPI
+   * process.
+   * @param stream The stream (of type ConditionalOstream) to which output is
+   * written.
+   * @param output_frequency A variable indicating when output is to be
+   * written to the given stream.
+   * @param output_type A variable indicating what kind of timing the output
+   * should represent (CPU or wall time). In this parallel context, when this
+   * argument selects CPU time, then times are accumulated over all processes
+   * participating in the MPI communicator. If this argument selects wall
+   * time, then reported times are the maximum over all processors' run times
+   * for this section. (The latter is computed by placing an
+   * <code>MPI_Barrier</code> call before starting and stopping the timer for
+   * each section.)
+   */
+  TimerOutput (MPI_Comm                   mpi_comm,
+               ConditionalOStream        &stream,
+               const enum OutputFrequency output_frequency,
+               const enum OutputType      output_type);
+
+
+
+
+#endif
+
+  /**
+   * Destructor. Calls print_summary() in case the option for writing the
+   * summary output is set.
+   */
+  ~TimerOutput();
+
+  /**
+   * Open a section by given a string name of it. In case the name already
+   * exists, that section is entered once again and times are accumulated.
+   */
+  void enter_subsection (const std::string &section_name);
+
+  /**
+   * Same as @p enter_subsection.
+   */
+  void enter_section (const std::string &section_name);
+
+  //TODO: make some of these functions DEPRECATED (I would keep enter/exit_section)
+
+  /**
+   * Leave a section. If no name is given, the last section that was entered
+   * is left.
+   */
+  void leave_subsection (const std::string &section_name = std::string());
+
+  /**
+   * Same as @p leave_subsection.
+   */
+  void exit_section (const std::string &section_name = std::string());
+
+  /**
+   * Print a formatted table that summarizes the time consumed in the various
+   * sections.
+   */
+  void print_summary () const;
+
+  /**
+   * By calling this function, all output can be disabled. This function
+   * together with enable_output() can be useful if one wants to control the
+   * output in a flexible way without putting a lot of <tt>if</tt> clauses in
+   * the program.
+   */
+  void disable_output ();
+
+  /**
+   * This function re-enables output of this class if it was previously
+   * disabled with disable_output(). This function together with
+   * disable_output() can be useful if one wants to control the output in a
+   * flexible way without putting a lot of <tt>if</tt> clauses in the program.
+   */
+  void enable_output ();
+
+  /**
+   * Resets the recorded timing information.
+   */
+  void reset ();
+
+private:
+  /**
+   * When to output information to the output stream.
+   */
+  OutputFrequency output_frequency;
+
+  /**
+   * Whether to show CPU times, wall times, or both CPU and wall times.
+   */
+  OutputType output_type;
+
+
+  /**
+   * A timer object for the overall run time. If we are using MPI, this timer
+   * also accumulates over all MPI processes.
+   */
+  Timer              timer_all;
+
+  /**
+   * A structure that groups all information that we collect about each of the
+   * sections.
+   */
+  struct Section
+  {
+    Timer  timer;
+    double total_cpu_time;
+    double total_wall_time;
+    unsigned int n_calls;
+  };
+
+  /**
+   * A list of all the sections and their information.
+   */
+  std::map<std::string, Section> sections;
+
+  /**
+   * The stream object to which we are to output.
+   */
+  ConditionalOStream out_stream;
+
+  /**
+   * A boolean variable that sets whether output of this class is currently on
+   * or off.
+   */
+  bool output_is_enabled;
+
+  /**
+   * A list of the sections that have been entered and not exited. The list is
+   * kept in the order in which sections have been entered, but elements may
+   * be removed in the middle if an argument is given to the exit_section()
+   * function.
+   */
+  std::list<std::string> active_sections;
+
+  /**
+   * mpi communicator
+   */
+  MPI_Comm            mpi_communicator;
+
+  /**
+   * A lock that makes sure that this class gives reasonable results even when
+   * used with several threads.
+   */
+  Threads::Mutex mutex;
+};
+
+
+
+/* ---------------- inline functions ----------------- */
+
+
+inline
+void Timer::restart ()
+{
+  reset();
+  start();
+}
+
+
+
+#ifdef DEAL_II_WITH_MPI
+
+inline
+const Utilities::MPI::MinMaxAvg &
+Timer::get_data() const
+{
+  return mpi_data;
+}
+
+
+
+template <class StreamType>
+inline
+void
+Timer::print_data(StreamType &stream) const
+{
+  unsigned int my_id = dealii::Utilities::MPI::this_mpi_process(mpi_communicator);
+  if (my_id==0)
+    stream << mpi_data.max << " wall,"
+           << " max @" << mpi_data.max_index
+           << ", min=" << mpi_data.min << " @" << mpi_data.min_index
+           << ", avg=" << mpi_data.avg
+           << std::endl;
+}
+
+#endif
+
+
+
+inline
+void
+TimerOutput::enter_section (const std::string &section_name)
+{
+  enter_subsection(section_name);
+}
+
+
+
+inline
+void
+TimerOutput::exit_section (const std::string &section_name)
+{
+  leave_subsection(section_name);
+}
+
+inline
+TimerOutput::Scope::Scope(dealii::TimerOutput &timer_, const std::string &section_name)
+  :
+  timer(timer_), in(true)
+{
+  timer.enter_section(section_name);
+}
+
+inline
+TimerOutput::Scope::~Scope()
+{
+  stop();
+}
+
+inline
+void
+TimerOutput::Scope::stop()
+{
+  if (!in) return;
+  in=false;
+
+  timer.exit_section();
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/types.h b/include/deal.II/base/types.h
new file mode 100644
index 0000000..7928db2
--- /dev/null
+++ b/include/deal.II/base/types.h
@@ -0,0 +1,261 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__types_h
+#define dealii__types_h
+
+
+#include <deal.II/base/config.h>
+#include <cstddef>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A namespace in which we declare typedefs for types used in deal.II, as well
+ * as special values for these types.
+ */
+namespace types
+{
+  /**
+   * The type used to denote subdomain_ids of cells.
+   *
+   * See the
+   * @ref GlossSubdomainId "glossary"
+   * for more information.
+   *
+   * There is a special value, numbers::invalid_subdomain_id that is used to
+   * indicate an invalid value of this type.
+   */
+  typedef unsigned int subdomain_id;
+
+  /**
+   * The type used for global indices of vertices.
+   */
+  typedef unsigned long long int global_vertex_index;
+
+  /**
+   * An identifier that denotes the MPI type associated with
+   * types::global_vertex_index.
+   */
+#  define DEAL_II_VERTEX_INDEX_MPI_TYPE MPI_UNSIGNED_LONG_LONG
+
+#ifdef DEAL_II_WITH_64BIT_INDICES
+  /**
+   * The type used for global indices of degrees of freedom. While in
+   * sequential computations the 4 billion indices of 32-bit unsigned integers
+   * is plenty, parallel computations using the
+   * parallel::distributed::Triangulation class can overflow this number and
+   * we need a bigger index space.
+   *
+   * The data type always indicates an unsigned integer type.
+   *
+   * See the
+   * @ref GlobalDoFIndex
+   * page for guidance on when this type should or should not be used.
+   */
+  // TODO: we should check that unsigned long long int
+  // has the same size as uint64_t
+  typedef unsigned long long int global_dof_index;
+
+  /**
+   * An identifier that denotes the MPI type associated with
+   * types::global_dof_index.
+   */
+#  define DEAL_II_DOF_INDEX_MPI_TYPE MPI_UNSIGNED_LONG_LONG
+#else
+  /**
+   * The type used for global indices of degrees of freedom. While in
+   * sequential computations the 4 billion indices of 32-bit unsigned integers
+   * is plenty, parallel computations using the
+   * parallel::distributed::Triangulation class can overflow this number and
+   * we need a bigger index space.
+   *
+   * The data type always indicates an unsigned integer type.
+   */
+  typedef unsigned int global_dof_index;
+
+  /**
+   * An identifier that denotes the MPI type associated with
+   * types::global_dof_index.
+   */
+#  define DEAL_II_DOF_INDEX_MPI_TYPE MPI_UNSIGNED
+#endif
+
+  /**
+   * The type used to denote boundary indicators associated with every piece
+   * of the boundary and, in the case of meshes that describe manifolds in
+   * higher dimensions, associated with every cell.
+   *
+   * There is a special value, numbers::internal_face_boundary_id that is used
+   * to indicate an invalid value of this type and that is used as the
+   * boundary indicator for faces that are in the interior of the domain and
+   * therefore not part of any addressable boundary component.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  typedef unsigned char boundary_id;
+
+  /**
+   * The type used to denote manifold indicators associated with every object
+   * of the mesh.
+   *
+   * There is a special value, numbers::flat_manifold_id that is used to
+   * indicate the standard cartesian manifold.
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  typedef unsigned int manifold_id;
+
+  /**
+   * The type used to denote material indicators associated with every cell.
+   *
+   * There is a special value, numbers::invalid_material_id that is used to
+   * indicate an invalid value of this type.
+   */
+  typedef unsigned char material_id;
+}
+
+namespace TrilinosWrappers
+{
+  namespace types
+  {
+#ifdef DEAL_II_WITH_64BIT_INDICES
+    /**
+     * Declare type of integer used in the Epetra package of Trilinos.
+     */
+    typedef long long int_type;
+#else
+    /**
+     * Declare type of integer used in the Epetra package of Trilinos.
+     */
+    typedef int int_type;
+#endif
+  }
+}
+
+
+// this part of the namespace numbers got moved to the bottom types.h file,
+// because otherwise we get a circular inclusion of config.h, types.h, and
+// numbers.h
+namespace numbers
+{
+  /**
+   * Representation of the largest number that can be put into an unsigned
+   * integer. This value is widely used throughout the library as a marker for
+   * an invalid unsigned integer value, such as an invalid array index, an
+   * invalid array size, and the like.
+   */
+  static const unsigned int
+  invalid_unsigned_int = static_cast<unsigned int> (-1);
+
+  /**
+   * Representation of the largest number that can be put into a size_type.
+   * This value is used throughout the library as a marker for an invalid
+   * size_type value, such as an invalid array index, an invalid array size,
+   * and the like. Invalid_size_type is equivalent to invalid_dof_index.
+   */
+  const types::global_dof_index
+  invalid_size_type = static_cast<types::global_dof_index> (-1);
+
+  /**
+   * An invalid value for indices of degrees of freedom.
+   */
+  const types::global_dof_index invalid_dof_index = static_cast<types::global_dof_index>(-1);
+
+  /**
+   * Invalid material_id which we need in several places as a default value.
+   * We assume that all material_ids lie in the range [0,
+   * invalid_material_id).
+   */
+  const types::material_id invalid_material_id = static_cast<types::material_id>(-1);
+
+  /**
+   * Invalid boundary_id which we need in several places as a default value.
+   * We assume that all valid boundary_ids lie in the range [0,
+   * invalid_boundary_id).
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  const types::boundary_id invalid_boundary_id = static_cast<types::boundary_id>(-1);
+
+  /**
+   * A boundary indicator number that we reserve for internal faces.  We
+   * assume that all valid boundary_ids lie in the range [0,
+   * internal_face_boundary_id).
+   *
+   * This is an indicator that is used internally (by the library) to
+   * differentiate between faces that lie at the boundary of the domain and
+   * faces that lie in the interior of the domain. You should never try to
+   * assign this boundary indicator to anything in user code.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  const types::boundary_id internal_face_boundary_id = static_cast<types::boundary_id>(-1);
+
+  /**
+   * Invalid manifold_id which we need in several places as a default value.
+   * We assume that all valid manifold_ids lie in the range [0,
+   * invalid_manifold_id).
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  const types::manifold_id invalid_manifold_id = static_cast<types::manifold_id>(-1);
+
+  /**
+   * A manifold_id we reserve for the default flat Cartesian manifold.
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  const types::manifold_id flat_manifold_id = static_cast<types::manifold_id>(-1);
+
+  /**
+   * A special id for an invalid subdomain id. This value may not be used as a
+   * valid id but is used, for example, for default arguments to indicate a
+   * subdomain id that is not to be used.
+   *
+   * See the
+   * @ref GlossSubdomainId "glossary"
+   * for more information.
+   */
+  const types::subdomain_id invalid_subdomain_id = static_cast<types::subdomain_id>(-1);
+
+  /**
+   * The subdomain id assigned to a cell whose true subdomain id we don't
+   * know, for example because it resides on a different processor on a mesh
+   * that is kept distributed on many processors. Such cells are called
+   * "artificial".
+   *
+   * See the glossary entries on
+   * @ref GlossSubdomainId "subdomain ids"
+   * and
+   * @ref GlossArtificialCell "artificial cells"
+   * as well as the
+   * @ref distributed
+   * module for more information.
+   */
+  const types::subdomain_id artificial_subdomain_id = static_cast<types::subdomain_id>(-2);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/utilities.h b/include/deal.II/base/utilities.h
new file mode 100644
index 0000000..4223e81
--- /dev/null
+++ b/include/deal.II/base/utilities.h
@@ -0,0 +1,710 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__utilities_h
+#define dealii__utilities_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/mpi.h>
+
+#include <vector>
+#include <utility>
+#include <functional>
+#include <string>
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  include <Epetra_Comm.h>
+#  include <Epetra_Map.h>
+#  ifdef DEAL_II_WITH_MPI
+#    include <Epetra_MpiComm.h>
+#  else
+#    include <Epetra_SerialComm.h>
+#  endif
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A namespace for utility functions that are not particularly specific to
+ * finite element computing or numerical programs, but nevertheless are needed
+ * in various contexts when writing applications.
+ *
+ * @ingroup utilities
+ * @author Wolfgang Bangerth, 2005
+ */
+namespace Utilities
+{
+
+  /**
+   * Convert a number @p value to a string, with as many digits as given to
+   * fill with leading zeros.
+   *
+   * If the second parameter is left at its default value, the number is not
+   * padded with leading zeros. The result is then the same as if the standard
+   * C function <code>itoa()</code> had been called.
+   *
+   * When calling this function signed integers are implicitly converted to
+   * unsigned integers and long integers might experience an overflow.
+   *
+   * @note The use of this function is discouraged and users should use
+   * <code>Utilities::to_string()</code> instead. In its current
+   * implementation the function simply calls <code>to_string@<unsigned
+   * int@>()</code>.
+   */
+  std::string
+  int_to_string (const unsigned int value,
+                 const unsigned int digits = numbers::invalid_unsigned_int);
+
+  /**
+   * Convert a number @p value to a string, with @p digits characters. The
+   * string is padded with leading zeros, after a possible minus sign.
+   * Therefore the total number of padding zeros is @p digits minus any signs,
+   * decimal points and digits of @p value.
+   *
+   * If the second parameter is left at its default value, the number is not
+   * padded with leading zeros. The result is then the same as if the boost
+   * function <code>lexical_cast@<std::string@>()</code> had been called.
+   */
+  template <typename number>
+  std::string
+  to_string (const number value,
+             const unsigned int digits = numbers::invalid_unsigned_int);
+
+  /**
+   * Determine how many digits are needed to represent numbers at most as
+   * large as the given number.
+   */
+  unsigned int
+  needed_digits (const unsigned int max_number);
+
+  /**
+   * Given a string, convert it to an integer. Throw an assertion if that is
+   * not possible.
+   */
+  int
+  string_to_int (const std::string &s);
+
+  /**
+   * Return a string describing the dimensions of the object. Often, functions
+   * in the deal.II library as well as in user codes need to define a string
+   * containing the template dimensions of some objects defined using two
+   * template parameters: dim (the topological dimension of the object) and
+   * spacedim (the dimension of the embedding Euclidean space).  Since in all
+   * deal.II classes, by default spacedim is equal to dimension, the above
+   * string is usually contracted to "<dim>", instead of "<dim,spacedim>".
+   * This function returns a string containing "dim" if dim is equal to
+   * spacedim, otherwise it returns "dim,spacedim".
+   */
+  std::string dim_string(const int dim, const int spacedim);
+
+  /**
+   * Given a list of strings, convert it to a list of integers. Throw an
+   * assertion if that is not possible.
+   */
+  std::vector<int>
+  string_to_int (const std::vector<std::string> &s);
+
+  /**
+   * Given a string, convert it to an double. Throw an assertion if that is
+   * not possible.
+   */
+  double
+  string_to_double (const std::string &s);
+
+
+  /**
+   * Given a list of strings, convert it to a list of doubles. Throw an
+   * assertion if that is not possible.
+   */
+  std::vector<double>
+  string_to_double (const std::vector<std::string> &s);
+
+  /**
+   * Given a string that contains text separated by a @p delimiter, split it
+   * into its components; for each component, remove leading and trailing
+   * spaces. The default value of the delimiter is a comma, so that the
+   * function splits comma separated lists of strings.
+   *
+   * To make data input from tables simpler, if the input string ends in a
+   * delimiter (possibly followed by an arbitrary amount of whitespace), then
+   * this last delimiter is ignored. For example,
+   * @code
+   *   Utilities::split_string_list("abc; def; ghi; ", ';');
+   * @endcode
+   * yields the same 3-element list of output <code>{"abc","def","ghi"}</code>
+   * as you would get if the input had been
+   * @code
+   *   Utilities::split_string_list("abc; def; ghi", ';');
+   * @endcode
+   * or
+   * @code
+   *   Utilities::split_string_list("abc; def; ghi;", ';');
+   * @endcode
+   * As a consequence of this rule, a call like
+   * @code
+   *   Utilities::split_string_list(" ; ", ';');
+   * @endcode
+   * yields a one-element list. Because of the trimming of whitespace, the
+   * single element is the empty string.
+   *
+   * This function can digest the case that the delimiter is a space. In this
+   * case, it returns all words in the string. Combined with the rules above,
+   * this implies that
+   * @code
+   *   Utilities::split_string_list("abc def ghi ", ' ');
+   * @endcode
+   * yields again the 3-element list of output
+   * <code>{"abc","def","ghi"}</code> from above despite the presence of space
+   * at the end of the string. Furthermore,
+   * @code
+   *   Utilities::split_string_list("      ", ' ');
+   * @endcode
+   * yields an empty list regardless of the number of spaces in the string.
+   */
+  std::vector<std::string>
+  split_string_list (const std::string &s,
+                     const char         delimiter = ',');
+
+  /**
+   * Take a text, usually a documentation or something, and try to break it
+   * into individual lines of text at most @p width characters wide, by
+   * breaking at positions marked by @p delimiter in the text. If this is not
+   * possible, return the shortest lines that are longer than @p width.  The
+   * default value of the delimiter is a space character. If original_text
+   * contains newline characters (\n), the string is split at these locations,
+   * too.
+   */
+  std::vector<std::string>
+  break_text_into_lines (const std::string &original_text,
+                         const unsigned int width,
+                         const char delimiter = ' ');
+
+  /**
+   * Return true if the given pattern string appears in the first position of
+   * the string.
+   */
+  bool
+  match_at_string_start (const std::string &name,
+                         const std::string &pattern);
+
+  /**
+   * Read a (signed) integer starting at the position in @p name indicated by
+   * the second argument, and return this integer as a pair together with how
+   * many characters it takes up in the string.
+   *
+   * If no integer can be read at the indicated position, return
+   * (-1,numbers::invalid_unsigned_int)
+   */
+  std::pair<int, unsigned int>
+  get_integer_at_position (const std::string &name,
+                           const unsigned int position);
+
+  /**
+   * Return a string with all occurrences of @p from in @p input replaced by
+   * @p to.
+   */
+  std::string replace_in_string(const std::string &input,
+                                const std::string &from,
+                                const std::string &to);
+
+  /**
+   * Return a string with all standard whitespace characters (including
+   * '<tt>\\t</tt>', '<tt>\\n</tt>', and '<tt>\\r</tt>') at the beginning and
+   * end of @p input removed.
+   */
+  std::string
+  trim(const std::string &input);
+
+  /**
+   * Generate a random number from a normalized Gaussian probability
+   * distribution centered around @p a and with standard deviation @p sigma.
+   *
+   * This function is reentrant, i.e., it can safely be called from multiple
+   * threads at the same time. In addition, each thread will get the same
+   * sequence of numbers every time. On the other hand, if you run
+   * Threads::Task objects via the Threading Building Blocks, then tasks will
+   * be assigned to mostly random threads, and may get a different sequence of
+   * random numbers in different runs of the program, since a previous task
+   * may already have consumed the first few random numbers generated for the
+   * thread you're on. If this is a problem, you need to create your own
+   * random number generator objects every time you want to start from a
+   * defined point.
+   *
+   * @note Like the system function rand(), this function produces the same
+   * sequence of random numbers every time a program is started. This is an
+   * important property for debugging codes, but it makes it impossible to
+   * really verify statistics properties of a code. For rand(), you can call
+   * srand() to "seed" the random number generator to get different sequences
+   * of random numbers every time a program is called. However, this function
+   * does not allow seeding the random number generator. If you need this, as
+   * above, use one of the C++ or BOOST facilities.
+   */
+  double
+  generate_normal_random_number (const double a,
+                                 const double sigma);
+
+
+  /**
+   * Calculate a fixed power, provided as a template argument, of a number.
+   *
+   * This function provides an efficient way to calculate things like
+   * <code>t^N</code> where <code>N</code> is a known number at compile time.
+   *
+   * Use this function as in <code>fixed_power@<dim@> (n)</code>.
+   */
+  template <int N, typename T>
+  T
+  fixed_power (const T t);
+
+  /**
+   * Calculate a fixed power of an integer number by a template expression
+   * where both the number <code>a</code> and the power <code>N</code> are
+   * compile-time constants. This computes the result of the power operation
+   * at compile time, enabling its use e.g. in other templates.
+   *
+   * Use this class as in <code>fixed_int_power@<5,2@>::%value</code> to
+   * compute 5<sup>2</sup>.
+   */
+  template <int a, int N>
+  struct fixed_int_power
+  {
+    static const int value = a *fixed_int_power<a,N-1>::value;
+  };
+
+  /**
+   * Base case for the power operation with <code>N=0</code>, which gives the
+   * result 1.
+   */
+  template <int a>
+  struct fixed_int_power<a,0>
+  {
+    static const int value = 1;
+  };
+
+  /**
+   * Optimized replacement for <tt>std::lower_bound</tt> for searching within
+   * the range of column indices. Slashes execution time by approximately one
+   * half for the present application, partly because because the binary
+   * search is replaced by a linear search for small loop lengths.
+   *
+   * Another reason for this function is rather obscure: when using the GCC
+   * libstdc++ function std::lower_bound, complexity is O(log(N)) as required.
+   * However, when using the debug version of the GCC libstdc++ as we do when
+   * running the testsuite, then std::lower_bound tests whether the sequence
+   * is in fact partitioned with respect to the pivot 'value' (i.e. in essence
+   * that the sequence is sorted as required for binary search to work).
+   * However, verifying this means that the complexity of std::lower_bound
+   * jumps to O(N); we call this function O(N) times below, making the overall
+   * complexity O(N**2). The consequence is that a few tests with big meshes
+   * completely run off the wall time limit for tests and fail with the
+   * libstdc++ debug mode
+   *
+   * This function simply makes the assumption that the sequence is sorted,
+   * and we simply don't do the additional check.
+   */
+  template<typename Iterator, typename T>
+  Iterator
+  lower_bound (Iterator  first,
+               Iterator  last,
+               const T  &val);
+
+
+  /**
+   * The same function as above, but taking an argument that is used to
+   * compare individual elements of the sequence of objects pointed to by the
+   * iterators.
+   */
+  template<typename Iterator, typename T, typename Comp>
+  Iterator
+  lower_bound (Iterator   first,
+               Iterator   last,
+               const T   &val,
+               const Comp comp);
+
+  /**
+   * Given a permutation vector (i.e. a vector $p_0\ldots p_{N-1}$ where each
+   * $p_i\in [0,N)$ and $p_i\neq p_j$ for $i\neq j$), produce the reverse
+   * permutation $q_i=N-1-p_i$.
+   */
+  std::vector<unsigned int>
+  reverse_permutation (const std::vector<unsigned int> &permutation);
+
+  /**
+   * Given a permutation vector (i.e. a vector $p_0\ldots p_{N-1}$ where each
+   * $p_i\in [0,N)$ and $p_i\neq p_j$ for $i\neq j$), produce the inverse
+   * permutation $q_0\ldots q_{N-1}$ so that $q_{p_i}=p_{q_i}=i$.
+   */
+  std::vector<unsigned int>
+  invert_permutation (const std::vector<unsigned int> &permutation);
+
+  /**
+   * Given a permutation vector (i.e. a vector $p_0\ldots p_{N-1}$ where each
+   * $p_i\in [0,N)$ and $p_i\neq p_j$ for $i\neq j$), produce the reverse
+   * permutation $q_i=N-1-p_i$.
+   */
+  std::vector<unsigned long long int>
+  reverse_permutation (const std::vector<unsigned long long int> &permutation);
+
+  /**
+   * Given a permutation vector (i.e. a vector $p_0\ldots p_{N-1}$ where each
+   * $p_i\in [0,N)$ and $p_i\neq p_j$ for $i\neq j$), produce the inverse
+   * permutation $q_0\ldots q_{N-1}$ so that $q_{p_i}=p_{q_i}=i$.
+   */
+  std::vector<unsigned long long int>
+  invert_permutation (const std::vector<unsigned long long int> &permutation);
+
+  /**
+   * A namespace for utility functions that probe system properties.
+   *
+   * @ingroup utilities
+   */
+  namespace System
+  {
+
+    /**
+     * Return the CPU load as returned by "uptime". Note that the
+     * interpretation of this number depends on the actual number of
+     * processors in the machine. This is presently only implemented on Linux,
+     * using the /proc/loadavg pseudo-file, on other systems we simply return
+     * zero.
+     */
+    double get_cpu_load ();
+
+    /**
+     * Structure that hold information about memory usage in kB. Used by
+     * get_memory_stats(). See man 5 proc entry /status for details.
+     */
+    struct MemoryStats
+    {
+      unsigned long int VmPeak; /** peak virtual memory size in kB */
+      unsigned long int VmSize; /** current virtual memory size in kB */
+      unsigned long int VmHWM; /** peak resident memory size in kB */
+      unsigned long int VmRSS; /** current resident memory size in kB */
+    };
+
+
+    /**
+     * Fills the @p stats structure with information about the memory
+     * consumption of this process. This is only implemented on Linux.
+     */
+    void get_memory_stats (MemoryStats &stats);
+
+
+    /**
+     * Return the name of the host this process runs on.
+     */
+    std::string get_hostname ();
+
+
+    /**
+     * Return the present time as HH:MM:SS.
+     */
+    std::string get_time ();
+
+    /**
+     * Return the present date as YYYY/MM/DD. MM and DD may be either one or
+     * two digits.
+     */
+    std::string get_date ();
+
+    /**
+     * Call the system function posix_memalign, or a replacement function if
+     * not available, to allocate memory with a certain minimal alignment. The
+     * first argument will then return a pointer to this memory block that can
+     * be released later on through a standard <code>free</code> call.
+     *
+     * @param memptr The address of a pointer variable that will after this
+     * call point to the allocated memory.
+     * @param alignment The minimal alignment of the memory block, in bytes.
+     * @param size The size of the memory block to be allocated, in bytes.
+     *
+     * @note This function checks internally for error codes, rather than
+     * leaving this task to the calling site.
+     */
+    void posix_memalign (void **memptr, size_t alignment, size_t size);
+
+    /**
+     * @deprecated Use Utilities::MPI::job_supports_mpi() instead.
+     */
+    bool job_supports_mpi () DEAL_II_DEPRECATED;
+  }
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * This namespace provides some of the basic structures used in the
+   * initialization of the Trilinos objects (e.g., matrices, vectors, and
+   * preconditioners).
+   */
+  namespace Trilinos
+  {
+    /**
+     * Returns a Trilinos Epetra_Comm object needed for creation of
+     * Epetra_Maps.
+     *
+     * If deal.II has been configured to use a compiler that does not support
+     * MPI then the resulting communicator will be a serial one. Otherwise,
+     * the communicator will correspond to MPI_COMM_WORLD, i.e. a communicator
+     * that encompasses all processes within this MPI universe.
+     */
+    const Epetra_Comm &comm_world();
+
+    /**
+     * Returns a Trilinos Epetra_Comm object needed for creation of
+     * Epetra_Maps.
+     *
+     * If deal.II has been configured to use a compiler that does not support
+     * MPI then the resulting communicator will be a serial one. Otherwise,
+     * the communicator will correspond to MPI_COMM_SELF, i.e. a communicator
+     * that comprises only this one processor.
+     */
+    const Epetra_Comm &comm_self();
+
+    /**
+     * Given a communicator, duplicate it. If the given communicator is
+     * serial, that means to just return a copy of itself. On the other hand,
+     * if it is %parallel, we duplicate the underlying MPI_Comm object: we
+     * create a separate MPI communicator that contains the same processors
+     * and in the same order but has a separate identifier distinct from the
+     * given communicator. The function returns a pointer to a new object of a
+     * class derived from Epetra_Comm. The caller of this function needs to
+     * assume ownership of this function. The returned object should be
+     * destroyed using the destroy_communicator() function.
+     *
+     * This facility is used to separate streams of communication. For
+     * example, a program could simply use MPI_Comm_World for everything. But
+     * it is easy to come up with scenarios where sometimes not all processors
+     * participate in a communication that is intended to be global -- for
+     * example if we assemble a matrix on a coarse mesh with fewer cells than
+     * there are processors, some processors may not sync their matrices with
+     * the rest because they haven't written into it because they own no
+     * cells. That's clearly a bug. However, if these processors just continue
+     * their work, and the next %parallel operation happens to be a sync on a
+     * different matrix, then the sync could succeed -- by accident, since
+     * different processors are talking about different matrices.
+     *
+     * This kind of situation can be avoided if we use different communicators
+     * for different matrices which reduces the likelihood that communications
+     * meant to be separate aren't recognized as such just because they happen
+     * on the same communicator. In addition, it is conceivable that some MPI
+     * operations can be parallelized using multiple threads because their
+     * communicators identifies the communication in question, not their
+     * relative timing as is the case in a sequential program that just uses a
+     * single communicator.
+     */
+    Epetra_Comm *
+    duplicate_communicator (const Epetra_Comm &communicator);
+
+    /**
+     * Given an Epetra communicator that was created by the
+     * duplicate_communicator() function, destroy the underlying MPI
+     * communicator object and reset the Epetra_Comm object to a the result of
+     * comm_self().
+     *
+     * It is necessary to call this function at the time when the result of
+     * duplicate_communicator() is no longer needed. The reason is that in
+     * that function, we first create a new MPI_Comm object and then create an
+     * Epetra_Comm around it. While we can take care of destroying the latter,
+     * it doesn't destroy the communicator since it can only assume that it
+     * may also be still used by other objects in the program. Consequently,
+     * we have to take care of destroying it ourselves, explicitly.
+     *
+     * This function does exactly that. Because this has to happen while the
+     * Epetra_Comm object is still around, it first resets the latter and then
+     * destroys the communicator object.
+     *
+     * @note If you call this function on an Epetra_Comm object that is not
+     * created by duplicate_communicator(), you are likely doing something
+     * quite wrong. Don't do this.
+     */
+    void
+    destroy_communicator (Epetra_Comm &communicator);
+
+    /**
+     * Return the number of MPI processes there exist in the given
+     * communicator object. If this is a sequential job, it returns 1.
+     */
+    unsigned int get_n_mpi_processes (const Epetra_Comm &mpi_communicator);
+
+    /**
+     * Return the number of the present MPI process in the space of processes
+     * described by the given communicator. This will be a unique value for
+     * each process between zero and (less than) the number of all processes
+     * (given by get_n_mpi_processes()).
+     */
+    unsigned int get_this_mpi_process (const Epetra_Comm &mpi_communicator);
+
+    /**
+     * Given a Trilinos Epetra map, create a new map that has the same
+     * subdivision of elements to processors but uses the given communicator
+     * object instead of the one stored in the first argument. In essence,
+     * this means that we create a map that communicates among the same
+     * processors in the same way, but using a separate channel.
+     *
+     * This function is typically used with a communicator that has been
+     * obtained by the duplicate_communicator() function.
+     */
+    Epetra_Map
+    duplicate_map (const Epetra_BlockMap  &map,
+                   const Epetra_Comm &comm);
+  }
+
+#endif
+
+
+}
+
+
+// --------------------- inline functions
+
+namespace Utilities
+{
+  template <int N, typename T>
+  inline
+  T fixed_power (const T n)
+  {
+    Assert (N>0, ExcNotImplemented());
+    switch (N)
+      {
+      case 1:
+        return n;
+      case 2:
+        return n*n;
+      case 3:
+        return n*n*n;
+      case 4:
+        return n*n*n*n;
+      default:
+        T result = n;
+        for (int d=1; d<N; ++d)
+          result *= n;
+        return result;
+      }
+  }
+
+
+
+  template<typename Iterator, typename T>
+  inline
+  Iterator
+  lower_bound (Iterator  first,
+               Iterator  last,
+               const T  &val)
+  {
+    return Utilities::lower_bound (first, last, val,
+                                   std::less<T>());
+  }
+
+
+
+  template<typename Iterator, typename T, typename Comp>
+  inline
+  Iterator
+  lower_bound (Iterator    first,
+               Iterator    last,
+               const T    &val,
+               const Comp  comp)
+  {
+    // verify that the two iterators are properly ordered. since
+    // we need operator- for the iterator type anyway, do the
+    // test as follows, rather than via 'last >= first'
+    Assert (last - first >= 0,
+            ExcMessage ("The given iterators do not satisfy the proper ordering."));
+
+    unsigned int len = static_cast<unsigned int>(last-first);
+
+    if (len==0)
+      return first;
+
+    while (true)
+      {
+        // if length equals 8 or less,
+        // then do a rolled out
+        // search. use a switch without
+        // breaks for that and roll-out
+        // the loop somehow
+        if (len < 8)
+          {
+            switch (len)
+              {
+              case 7:
+                if (!comp(*first, val))
+                  return first;
+                ++first;
+              case 6:
+                if (!comp(*first, val))
+                  return first;
+                ++first;
+              case 5:
+                if (!comp(*first, val))
+                  return first;
+                ++first;
+              case 4:
+                if (!comp(*first, val))
+                  return first;
+                ++first;
+              case 3:
+                if (!comp(*first, val))
+                  return first;
+                ++first;
+              case 2:
+                if (!comp(*first, val))
+                  return first;
+                ++first;
+              case 1:
+                if (!comp(*first, val))
+                  return first;
+                return first+1;
+              default:
+                // indices seem
+                // to not be
+                // sorted
+                // correctly!? or
+                // did len
+                // become==0
+                // somehow? that
+                // shouldn't have
+                // happened
+                Assert (false, ExcInternalError());
+              }
+          }
+
+
+
+        const unsigned int half   = len >> 1;
+        const Iterator     middle = first + half;
+
+        // if the value is larger than
+        // that pointed to by the
+        // middle pointer, then the
+        // insertion point must be
+        // right of it
+        if (comp(*middle, val))
+          {
+            first = middle + 1;
+            len  -= half + 1;
+          }
+        else
+          len = half;
+      }
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/vector_slice.h b/include/deal.II/base/vector_slice.h
new file mode 100644
index 0000000..5031952
--- /dev/null
+++ b/include/deal.II/base/vector_slice.h
@@ -0,0 +1,279 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__vector_slice_h
+#define dealii__vector_slice_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/array_view.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * Filter a range out of any object having a random access <tt>operator[]
+ * (unsigned int)</tt> and a function <tt>size() const</tt>.
+ *
+ * The use of this object is straightforward. It duplicates the random access
+ * operator of the <tt>VectorType</tt> and adds an offset to every index.
+ *
+ * Some precautions have to be taken if it is used for a constant vector: the
+ * VectorSlice object has to be constant, too. The appropriate initialization
+ * sequence is like this:
+ *
+ * @code
+ *   void f(const std::vector<int>& v)
+ *   {
+ *     const VectorSlice<const std::vector<int> > slice(v,...);
+ *     ...
+ *   }
+ * @endcode
+ *
+ * @ingroup data
+ * @author Guido Kanschat, 2004
+ */
+template <typename VectorType>
+class VectorSlice
+{
+public:
+  /**
+   * Construct a vector slice containing the whole vector. Comes handy, if you
+   * did not want to have a slice at all, but the function you call wants it:
+   * just put in the vector itself as argument and let this constructor make a
+   * slice for you.
+   */
+  VectorSlice(VectorType &v);
+  /**
+   * The real constructor for a vector slice, allowing you to specify the
+   * start index and the length of the slice.
+   */
+  VectorSlice(VectorType   &v,
+              unsigned int start,
+              unsigned int length);
+
+  /**
+   * Conversion operator to an ArrayView object that represents an array of
+   * non-const elements pointing to the same location as the current object.
+   */
+  operator ArrayView<typename VectorType::value_type *> ();
+
+  /**
+   * Conversion operator to an ArrayView object that represents an array of
+   * const elements pointing to the same location as the current object.
+   */
+  operator ArrayView<const typename VectorType::value_type *> () const;
+
+  /**
+   * Return the length of the slice using the same interface as
+   * <tt>std::vector</tt>.
+   */
+  unsigned int size() const;
+
+  /**
+   * Return a reference to the $i$th element of the range represented by the
+   * current object.
+   */
+  typename VectorType::reference operator[] (unsigned int i);
+
+  /**
+   * Return a @p const reference to the $i$th element of the range represented
+   * by the current object.
+   */
+  typename VectorType::const_reference operator[] (unsigned int i) const;
+
+  /**
+   * Standard-conforming iterator function.
+   */
+  typename VectorType::iterator begin();
+
+  /**
+   * Standard-conforming iterator function.
+   */
+  typename VectorType::const_iterator begin() const;
+
+  /**
+   * Standard-conforming iterator function.
+   */
+  typename VectorType::iterator end();
+
+  /**
+   * Standard-conforming iterator function.
+   */
+  typename VectorType::const_iterator end() const;
+
+private:
+  /**
+   * The vector we extract from.
+   */
+  VectorType &v;
+  /**
+   * The start index of the slice.
+   */
+  const unsigned int start;
+  /**
+   * The length of the slice.
+   */
+  const unsigned int length;
+};
+
+
+/**
+ * Helper function for creating temporary objects without typing template
+ * arguments.
+ *
+ * @relates VectorSlice
+ * @author Guido Kanschat, 2004
+ */
+template <typename VectorType>
+inline
+const VectorSlice<const VectorType>
+make_slice (VectorType &v)
+{
+  const VectorSlice<const VectorType> r(v);
+  return r;
+}
+
+
+
+/**
+ * Helper function for creating temporary objects without typing template
+ * arguments.
+ *
+ * @relates VectorSlice
+ * @author Guido Kanschat, 2004
+ */
+template <typename VectorType>
+inline
+const VectorSlice<const VectorType>
+make_slice (VectorType         &v,
+            const unsigned int start,
+            const unsigned int length)
+{
+  const VectorSlice<const VectorType> r(v, start, length);
+  return r;
+}
+
+
+
+
+//---------------------------------------------------------------------------
+
+template <typename VectorType>
+inline
+VectorSlice<VectorType>::VectorSlice(VectorType &v)
+  :
+  v(v), start(0), length(v.size())
+{}
+
+
+template <typename VectorType>
+inline
+VectorSlice<VectorType>::VectorSlice(VectorType   &v,
+                                     unsigned int start,
+                                     unsigned int length)
+  :
+  v(v), start(start), length(length)
+{
+  Assert((start+length<=v.size()),
+         ExcIndexRange(length, 0, v.size()-start+1));
+}
+
+
+template <typename VectorType>
+inline
+unsigned int
+VectorSlice<VectorType>::size() const
+{
+  return length;
+}
+
+
+template <typename VectorType>
+VectorSlice<VectorType>::
+operator ArrayView<typename VectorType::value_type *> ()
+{
+  return ArrayView<typename VectorType::value_type *> (&v[start], length);
+}
+
+
+template <typename VectorType>
+VectorSlice<VectorType>::
+operator ArrayView<const typename VectorType::value_type *> () const
+{
+  return ArrayView<const typename VectorType::value_type *> (&v[start], length);
+}
+
+
+template <typename VectorType>
+inline
+typename VectorType::reference
+VectorSlice<VectorType>::operator[](unsigned int i)
+{
+  Assert ((i<length), ExcIndexRange(i, 0, length));
+
+  return v[start+i];
+}
+
+
+template <typename VectorType>
+inline
+typename VectorType::const_reference
+VectorSlice<VectorType>::operator[](unsigned int i) const
+{
+  Assert ((i<length), ExcIndexRange(i, 0, length));
+
+  return v[start+i];
+}
+
+
+template <typename VectorType>
+inline
+typename VectorType::const_iterator
+VectorSlice<VectorType>::begin() const
+{
+  return v.begin()+start;
+}
+
+
+template <typename VectorType>
+inline
+typename VectorType::iterator
+VectorSlice<VectorType>::begin()
+{
+  return v.begin()+start;
+}
+
+
+template <typename VectorType>
+inline
+typename VectorType::const_iterator
+VectorSlice<VectorType>::end() const
+{
+  return v.begin()+start+length;
+}
+
+
+template <typename VectorType>
+inline
+typename VectorType::iterator
+VectorSlice<VectorType>::end()
+{
+  return v.begin()+start+length;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/base/vectorization.h b/include/deal.II/base/vectorization.h
new file mode 100644
index 0000000..9b404c9
--- /dev/null
+++ b/include/deal.II/base/vectorization.h
@@ -0,0 +1,2670 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__vectorization_h
+#define dealii__vectorization_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <cmath>
+
+// Note:
+// The flag DEAL_II_COMPILER_VECTORIZATION_LEVEL is essentially constructed
+// according to the following scheme
+// #ifdef __AVX512F__
+// #define DEAL_II_COMPILER_VECTORIZATION_LEVEL 3
+// #elif defined (__AVX__)
+// #define DEAL_II_COMPILER_VECTORIZATION_LEVEL 2
+// #elif defined (__SSE2__)
+// #define DEAL_II_COMPILER_VECTORIZATION_LEVEL 1
+// #else
+// #define DEAL_II_COMPILER_VECTORIZATION_LEVEL 0
+// #endif
+// In addition to checking the flags __AVX__ and __SSE2__, a CMake test,
+// 'check_01_cpu_features.cmake', ensures that these feature are not only
+// present in the compilation unit but also working properly.
+
+#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 2 // AVX, AVX-512
+#include <immintrin.h>
+#elif DEAL_II_COMPILER_VECTORIZATION_LEVEL == 1 // SSE2
+#include <emmintrin.h>
+#endif
+
+
+// forward declarations
+DEAL_II_NAMESPACE_OPEN
+template <typename Number> class VectorizedArray;
+template <typename T> struct EnableIfScalar;
+DEAL_II_NAMESPACE_CLOSE
+
+
+namespace std
+{
+  template <typename Number> ::dealii::VectorizedArray<Number>
+  sqrt(const ::dealii::VectorizedArray<Number> &);
+  template <typename Number> ::dealii::VectorizedArray<Number>
+  abs(const ::dealii::VectorizedArray<Number> &);
+  template <typename Number> ::dealii::VectorizedArray<Number>
+  max(const ::dealii::VectorizedArray<Number> &, const ::dealii::VectorizedArray<Number> &);
+  template <typename Number> ::dealii::VectorizedArray<Number>
+  min (const ::dealii::VectorizedArray<Number> &, const ::dealii::VectorizedArray<Number> &);
+}
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// Enable the EnableIfScalar type trait for VectorizedArray<Number> such
+// that it can be used as a Number type in Tensor<rank,dim,Number>, etc.
+
+template<typename Number>
+struct EnableIfScalar<VectorizedArray<Number> >
+{
+  typedef VectorizedArray<typename EnableIfScalar<Number>::type> type;
+};
+
+
+/**
+ * This generic class defines a unified interface to a vectorized data type.
+ * For general template arguments, this class simply corresponds to the
+ * template argument. For example, VectorizedArray<long double> is nothing
+ * else but a wrapper around <tt>long double</tt> with exactly one data field
+ * of type <tt>long double</tt> and overloaded arithmetic operations. This
+ * means that <tt>VectorizedArray<ComplicatedType></tt> has a similar layout
+ * as ComplicatedType, provided that ComplicatedType defines basic arithmetic
+ * operations. For floats and doubles, an array of numbers are packed
+ * together, though. The number of elements packed together depend on the
+ * computer system and compiler flags that are used for compilation of
+ * deal.II. The fundamental idea of these packed data types is to use one
+ * single CPU instruction to perform arithmetic operations on the whole array
+ * using the processor's vector units. Most computer systems by 2010 standards
+ * will use an array of two doubles and four floats, respectively (this
+ * corresponds to the SSE/SSE2 data sets) when compiling deal.II on 64-bit
+ * operating systems. On Intel Sandy Bridge processors and newer or AMD
+ * Bulldozer processors and newer, four doubles and eight floats are used when
+ * deal.II is configured e.g. using gcc with --with-cpu=native or --with-
+ * cpu=corei7-avx. On compilations with AVX-512 support, eight doubles and
+ * sixteen floats are used.
+ *
+ * This behavior of this class is made similar to the basic data types double
+ * and float. The definition of a vectorized array does not initialize the
+ * data field but rather leaves it undefined, as is the case for double and
+ * float. However, when calling something like VectorizedArray<double> a =
+ * VectorizedArray<double>(), it sets all numbers in this field to zero. In
+ * other words, this class is a plain old data (POD) type which has an
+ * equivalent C representation and can e.g. be safely copied with std::memcpy.
+ * This POD layout is also necessary for ensuring correct alignment of data
+ * with address boundaries when collected in a vector (i.e., when the first
+ * element in a vector is properly aligned, all subsequent elements will be
+ * correctly aligned, too).
+ *
+ * Note that for proper functioning of this class, certain data alignment
+ * rules must be respected. This is because the computer expects the starting
+ * address of a VectorizedArray<double> field at specific addresses in memory
+ * (usually, the address of the vectorized array should be a multiple of the
+ * length of the array in bytes). Otherwise, a segmentation fault or a severe
+ * loss of performance might occur. When creating a single data field on the
+ * stack like <tt>VectorizedArray<double> a = VectorizedArray<double>()</tt>,
+ * the compiler will take care of data alignment automatically. However, when
+ * allocating a long vector of VectorizedArray<double> data, one needs to
+ * respect these rules. Use the class AlignedVector or data containers based
+ * on AlignedVector (such as Table) for this purpose. It is a class very
+ * similar to std::vector otherwise but always makes sure that data is
+ * correctly aligned.
+ *
+ * @author Katharina Kormann, Martin Kronbichler, 2010, 2011
+ */
+template <typename Number>
+class VectorizedArray
+{
+public:
+  /**
+   * This gives the number of vectors collected in this class.
+   */
+  static const unsigned int n_array_elements = 1;
+
+  // POD means that there should be no user-defined constructors, destructors
+  // and copy functions (the standard is somewhat relaxed in C++2011, though).
+
+  /**
+   * This function assigns a scalar to this class.
+   */
+  VectorizedArray &
+  operator = (const Number scalar)
+  {
+    data = scalar;
+    return *this;
+  }
+
+  /**
+   * Access operator (only valid with component 0)
+   */
+  Number &
+  operator [] (const unsigned int comp)
+  {
+    (void)comp;
+    AssertIndexRange (comp, 1);
+    return data;
+  }
+
+  /**
+   * Constant access operator (only valid with component 0)
+   */
+  const Number &
+  operator [] (const unsigned int comp) const
+  {
+    (void)comp;
+    AssertIndexRange (comp, 1);
+    return data;
+  }
+
+  /**
+   * Addition
+   */
+  VectorizedArray &
+  operator += (const VectorizedArray<Number> &vec)
+  {
+    data+=vec.data;
+    return *this;
+  }
+
+  /**
+   * Subtraction
+   */
+  VectorizedArray &
+  operator -= (const VectorizedArray<Number> &vec)
+  {
+    data-=vec.data;
+    return *this;
+  }
+
+  /**
+   * Multiplication
+   */
+  VectorizedArray &
+  operator *= (const VectorizedArray<Number> &vec)
+  {
+    data*=vec.data;
+    return *this;
+  }
+
+  /**
+   * Division
+   */
+  VectorizedArray &
+  operator /= (const VectorizedArray<Number> &vec)
+  {
+    data/=vec.data;
+    return *this;
+  }
+
+  /**
+   * Loads @p n_array_elements from memory into the calling class, starting at
+   * the given address. The memory need not be aligned by the amount of bytes
+   * in the vectorized array, as opposed to casting a double address to
+   * VectorizedArray<double>*.
+   */
+  void load (const Number *ptr)
+  {
+    data = *ptr;
+  }
+
+  /**
+   * Writes the content of the calling class into memory in form of @p
+   * n_array_elements to the given address. The memory need not be aligned by
+   * the amount of bytes in the vectorized array, as opposed to casting a
+   * double address to VectorizedArray<double>*.
+   */
+  void store (Number *ptr) const
+  {
+    *ptr = data;
+  }
+
+  /**
+   * Actual data field. Since this class represents a POD data type, it is
+   * declared public.
+   */
+  Number data;
+
+private:
+  /**
+   * Returns the square root of this field. Not for use in user code. Use
+   * sqrt(x) instead.
+   */
+  VectorizedArray
+  get_sqrt () const
+  {
+    VectorizedArray res;
+    res.data = std::sqrt(data);
+    return res;
+  }
+
+  /**
+   * Returns the absolute value of this field. Not for use in user code. Use
+   * abs(x) instead.
+   */
+  VectorizedArray
+  get_abs () const
+  {
+    VectorizedArray res;
+    res.data = std::fabs(data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise maximum of this field and another one. Not for
+   * use in user code. Use max(x,y) instead.
+   */
+  VectorizedArray
+  get_max (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = std::max (data, other.data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise minimum of this field and another one. Not for
+   * use in user code. Use min(x,y) instead.
+   */
+  VectorizedArray
+  get_min (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = std::min (data, other.data);
+    return res;
+  }
+
+  /**
+   * Make a few functions friends.
+   */
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::sqrt (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::abs  (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::max  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::min  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+};
+
+
+
+/**
+ * Create a vectorized array that sets all entries in the array to the given
+ * scalar.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+make_vectorized_array (const Number &u)
+{
+  VectorizedArray<Number> result;
+  result = u;
+  return result;
+}
+
+
+
+/**
+ * This method loads VectorizedArray::n_array_elements data streams from the
+ * given array @p in. The offsets to the input array are given by the array @p
+ * offsets. From each stream, n_entries are read. The data is then transposed
+ * and stored it into an array of VectorizedArray type. The output array @p
+ * out is expected to be an array of size @p n_entries. This method operates
+ * on plain arrays, so no checks for valid data access are made. It is the
+ * user's responsibility to ensure that the given arrays are valid according
+ * to the access layout below.
+ *
+ * This operation corresponds to a transformation of an array-of-struct
+ * (input) into a struct-of-array (output) according to the following formula:
+ *
+ * @code
+ * for (unsigned int i=0; i<n_entries; ++i)
+ *   for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+ *     out[i][v] = in[offsets[v]+i];
+ * @endcode
+ *
+ * A more optimized version of this code will be used for supported types.
+ *
+ * This is the inverse operation to vectorized_transpose_and_store().
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+void
+vectorized_load_and_transpose(const unsigned int       n_entries,
+                              const Number            *in,
+                              const unsigned int      *offsets,
+                              VectorizedArray<Number> *out)
+{
+  for (unsigned int i=0; i<n_entries; ++i)
+    for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+      out[i][v] = in[offsets[v]+i];
+}
+
+
+
+/**
+ * This method stores the vectorized arrays in transposed form into the given
+ * output array @p out with the given offsets @p offsets. This operation
+ * corresponds to a transformation of a struct-of-array (input) into an array-
+ * of-struct (output). This method operates on plain array, so no checks for
+ * valid data access are made. It is the user's responsibility to ensure that
+ * the given arrays are valid according to the access layout below.
+ *
+ * This method assumes that the specified offsets do not overlap. Otherwise,
+ * the behavior is undefined in the vectorized case. It is the user's
+ * responsibility to make sure that the access does not overlap and avoid
+ * undefined behavior.
+ *
+ * The argument @p add_into selects where the entries should only be written
+ * into the output arrays or the result should be added into the existing
+ * entries in the output. For <code>add_into == false</code>, the following
+ * code is assumed:
+ *
+ * @code
+ * for (unsigned int i=0; i<n_entries; ++i)
+ *   for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+ *     out[offsets[v]+i] = in[i][v];
+ * @endcode
+ *
+ * For <code>add_into == true</code>, the code implements the following
+ * action:
+ * @code
+ * for (unsigned int i=0; i<n_entries; ++i)
+ *   for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+ *     out[offsets[v]+i] += in[i][v];
+ * @endcode
+ *
+ * A more optimized version of this code will be used for supported types.
+ *
+ * This is the inverse operation to vectorized_load_and_transpose().
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+void
+vectorized_transpose_and_store(const bool                     add_into,
+                               const unsigned int             n_entries,
+                               const VectorizedArray<Number> *in,
+                               const unsigned int            *offsets,
+                               Number                        *out)
+{
+  if (add_into)
+    for (unsigned int i=0; i<n_entries; ++i)
+      for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+        out[offsets[v]+i] += in[i][v];
+  else
+    for (unsigned int i=0; i<n_entries; ++i)
+      for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+        out[offsets[v]+i] = in[i][v];
+}
+
+
+
+// for safety, also check that __AVX512F__ is defined in case the user manually
+// set some conflicting compile flags which prevent compilation
+
+#if DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 3  && defined(__AVX512F__)
+
+/**
+ * Specialization of VectorizedArray class for double and AVX-512.
+ */
+template <>
+class VectorizedArray<double>
+{
+public:
+  /**
+   * This gives the number of vectors collected in this class.
+   */
+  static const unsigned int n_array_elements = 8;
+
+  /**
+   * This function can be used to set all data fields to a given scalar.
+   */
+  VectorizedArray &
+  operator = (const double x)
+  {
+    data = _mm512_set1_pd(x);
+    return *this;
+  }
+
+  /**
+   * Access operator.
+   */
+  double &
+  operator [] (const unsigned int comp)
+  {
+    AssertIndexRange (comp, 8);
+    return *(reinterpret_cast<double *>(&data)+comp);
+  }
+
+  /**
+   * Constant access operator.
+   */
+  const double &
+  operator [] (const unsigned int comp) const
+  {
+    AssertIndexRange (comp, 8);
+    return *(reinterpret_cast<const double *>(&data)+comp);
+  }
+
+  /**
+   * Addition.
+   */
+  VectorizedArray &
+  operator += (const VectorizedArray &vec)
+  {
+    // if the compiler supports vector arithmetics, we can simply use +=
+    // operator on the given data type. this allows the compiler to combine
+    // additions with multiplication (fused multiply-add) if those
+    // instructions are available. Otherwise, we need to use the built-in
+    // intrinsic command for __m512d
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data += vec.data;
+#else
+    data = _mm512_add_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Subtraction.
+   */
+  VectorizedArray &
+  operator -= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data -= vec.data;
+#else
+    data = _mm512_sub_pd(data,vec.data);
+#endif
+    return *this;
+  }
+  /**
+   * Multiplication.
+   */
+  VectorizedArray &
+  operator *= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data *= vec.data;
+#else
+    data = _mm512_mul_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Division.
+   */
+  VectorizedArray &
+  operator /= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data /= vec.data;
+#else
+    data = _mm512_div_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Loads @p n_array_elements from memory into the calling class, starting at
+   * the given address. The memory need not be aligned by 64 bytes, as opposed
+   * to casting a double address to VectorizedArray<double>*.
+   */
+  void load (const double *ptr)
+  {
+    data = _mm512_loadu_pd (ptr);
+  }
+
+  /**
+   * Writes the content of the calling class into memory in form of @p
+   * n_array_elements to the given address. The memory need not be aligned by
+   * 64 bytes, as opposed to casting a double address to
+   * VectorizedArray<double>*.
+   */
+  void store (double *ptr) const
+  {
+    _mm512_storeu_pd (ptr, data);
+  }
+
+  /**
+   * Actual data field. Since this class represents a POD data type, it
+   * remains public.
+   */
+  __m512d data;
+
+private:
+  /**
+   * Returns the square root of this field. Not for use in user code. Use
+   * sqrt(x) instead.
+   */
+  VectorizedArray
+  get_sqrt () const
+  {
+    VectorizedArray res;
+    res.data = _mm512_sqrt_pd(data);
+    return res;
+  }
+
+  /**
+   * Returns the absolute value of this field. Not for use in user code. Use
+   * abs(x) instead.
+   */
+  VectorizedArray
+  get_abs () const
+  {
+    // to compute the absolute value, perform bitwise andnot with -0. This
+    // will leave all value and exponent bits unchanged but force the sign
+    // value to +. Since there is no andnot for AVX512, we interpret the data
+    // as 64 bit integers and do the andnot on those types (note that andnot
+    // is a bitwise operation so the data type does not matter)
+    __m512d mask = _mm512_set1_pd (-0.);
+    VectorizedArray res;
+    res.data = (__m512d)_mm512_andnot_epi64 ((__m512i)mask, (__m512i)data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise maximum of this field and another one. Not for
+   * use in user code. Use max(x,y) instead.
+   */
+  VectorizedArray
+  get_max (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm512_max_pd (data, other.data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise minimum of this field and another one. Not for
+   * use in user code. Use min(x,y) instead.
+   */
+  VectorizedArray
+  get_min (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm512_min_pd (data, other.data);
+    return res;
+  }
+
+  /**
+   * Make a few functions friends.
+   */
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::sqrt (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::abs  (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::max  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::min  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+};
+
+
+
+/**
+ * Specialization for float and AVX.
+ */
+template<>
+class VectorizedArray<float>
+{
+public:
+  /**
+   * This gives the number of vectors collected in this class.
+   */
+  static const unsigned int n_array_elements = 16;
+
+  /**
+   * This function can be used to set all data fields to a given scalar.
+   */
+  VectorizedArray &
+  operator = (const float x)
+  {
+    data = _mm512_set1_ps(x);
+    return *this;
+  }
+
+  /**
+   * Access operator.
+   */
+  float &
+  operator [] (const unsigned int comp)
+  {
+    AssertIndexRange (comp, 16);
+    return *(reinterpret_cast<float *>(&data)+comp);
+  }
+
+  /**
+   * Constant access operator.
+   */
+  const float &
+  operator [] (const unsigned int comp) const
+  {
+    AssertIndexRange (comp, 16);
+    return *(reinterpret_cast<const float *>(&data)+comp);
+  }
+
+  /**
+   * Addition.
+   */
+  VectorizedArray &
+  operator += (const VectorizedArray &vec)
+  {
+    // if the compiler supports vector arithmetics, we can simply use +=
+    // operator on the given data type. this allows the compiler to combine
+    // additions with multiplication (fused multiply-add) if those
+    // instructions are available. Otherwise, we need to use the built-in
+    // intrinsic command for __m512d
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data += vec.data;
+#else
+    data = _mm512_add_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Subtraction.
+   */
+  VectorizedArray &
+  operator -= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data -= vec.data;
+#else
+    data = _mm512_sub_ps(data,vec.data);
+#endif
+    return *this;
+  }
+  /**
+   * Multiplication.
+   */
+  VectorizedArray &
+  operator *= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data *= vec.data;
+#else
+    data = _mm512_mul_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Division.
+   */
+  VectorizedArray &
+  operator /= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data /= vec.data;
+#else
+    data = _mm512_div_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Loads @p n_array_elements from memory into the calling class, starting at
+   * the given address. The memory need not be aligned by 64 bytes, as opposed
+   * to casting a float address to VectorizedArray<float>*.
+   */
+  void load (const float *ptr)
+  {
+    data = _mm512_loadu_ps (ptr);
+  }
+
+  /**
+   * Writes the content of the calling class into memory in form of @p
+   * n_array_elements to the given address. The memory need not be aligned by
+   * 64 bytes, as opposed to casting a float address to
+   * VectorizedArray<float>*.
+   */
+  void store (float *ptr) const
+  {
+    _mm512_storeu_ps (ptr, data);
+  }
+
+  /**
+   * Actual data field. Since this class represents a POD data type, it
+   * remains public.
+   */
+  __m512 data;
+
+private:
+
+  /**
+   * Returns the square root of this field. Not for use in user code. Use
+   * sqrt(x) instead.
+   */
+  VectorizedArray
+  get_sqrt () const
+  {
+    VectorizedArray res;
+    res.data = _mm512_sqrt_ps(data);
+    return res;
+  }
+
+  /**
+   * Returns the absolute value of this field. Not for use in user code. Use
+   * abs(x) instead.
+   */
+  VectorizedArray
+  get_abs () const
+  {
+    // to compute the absolute value, perform bitwise andnot with -0. This
+    // will leave all value and exponent bits unchanged but force the sign
+    // value to +. Since there is no andnot for AVX512, we interpret the data
+    // as 32 bit integers and do the andnot on those types (note that andnot
+    // is a bitwise operation so the data type does not matter)
+    __m512 mask = _mm512_set1_ps (-0.f);
+    VectorizedArray res;
+    res.data = (__m512)_mm512_andnot_epi32 ((__m512i)mask, (__m512i)data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise maximum of this field and another one. Not for
+   * use in user code. Use max(x,y) instead.
+   */
+  VectorizedArray
+  get_max (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm512_max_ps (data, other.data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise minimum of this field and another one. Not for
+   * use in user code. Use min(x,y) instead.
+   */
+  VectorizedArray
+  get_min (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm512_min_ps (data, other.data);
+    return res;
+  }
+
+  /**
+   * Make a few functions friends.
+   */
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::sqrt (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::abs  (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::max  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::min  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+};
+
+
+
+#elif DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 2  && defined(__AVX__)
+
+/**
+ * Specialization of VectorizedArray class for double and AVX.
+ */
+template <>
+class VectorizedArray<double>
+{
+public:
+  /**
+   * This gives the number of vectors collected in this class.
+   */
+  static const unsigned int n_array_elements = 4;
+
+  /**
+   * This function can be used to set all data fields to a given scalar.
+   */
+  VectorizedArray &
+  operator = (const double x)
+  {
+    data = _mm256_set1_pd(x);
+    return *this;
+  }
+
+  /**
+   * Access operator.
+   */
+  double &
+  operator [] (const unsigned int comp)
+  {
+    AssertIndexRange (comp, 4);
+    return *(reinterpret_cast<double *>(&data)+comp);
+  }
+
+  /**
+   * Constant access operator.
+   */
+  const double &
+  operator [] (const unsigned int comp) const
+  {
+    AssertIndexRange (comp, 4);
+    return *(reinterpret_cast<const double *>(&data)+comp);
+  }
+
+  /**
+   * Addition.
+   */
+  VectorizedArray &
+  operator += (const VectorizedArray &vec)
+  {
+    // if the compiler supports vector arithmetics, we can simply use +=
+    // operator on the given data type. this allows the compiler to combine
+    // additions with multiplication (fused multiply-add) if those
+    // instructions are available. Otherwise, we need to use the built-in
+    // intrinsic command for __m256d
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data += vec.data;
+#else
+    data = _mm256_add_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Subtraction.
+   */
+  VectorizedArray &
+  operator -= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data -= vec.data;
+#else
+    data = _mm256_sub_pd(data,vec.data);
+#endif
+    return *this;
+  }
+  /**
+   * Multiplication.
+   */
+  VectorizedArray &
+  operator *= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data *= vec.data;
+#else
+    data = _mm256_mul_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Division.
+   */
+  VectorizedArray &
+  operator /= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data /= vec.data;
+#else
+    data = _mm256_div_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Loads @p n_array_elements from memory into the calling class, starting at
+   * the given address. The memory need not be aligned by 32 bytes, as opposed
+   * to casting a double address to VectorizedArray<double>*.
+   */
+  void load (const double *ptr)
+  {
+    data = _mm256_loadu_pd (ptr);
+  }
+
+  /**
+   * Writes the content of the calling class into memory in form of @p
+   * n_array_elements to the given address. The memory need not be aligned by
+   * 32 bytes, as opposed to casting a double address to
+   * VectorizedArray<double>*.
+   */
+  void store (double *ptr) const
+  {
+    _mm256_storeu_pd (ptr, data);
+  }
+
+  /**
+   * Actual data field. Since this class represents a POD data type, it
+   * remains public.
+   */
+  __m256d data;
+
+private:
+  /**
+   * Returns the square root of this field. Not for use in user code. Use
+   * sqrt(x) instead.
+   */
+  VectorizedArray
+  get_sqrt () const
+  {
+    VectorizedArray res;
+    res.data = _mm256_sqrt_pd(data);
+    return res;
+  }
+
+  /**
+   * Returns the absolute value of this field. Not for use in user code. Use
+   * abs(x) instead.
+   */
+  VectorizedArray
+  get_abs () const
+  {
+    // to compute the absolute value, perform bitwise andnot with -0. This
+    // will leave all value and exponent bits unchanged but force the sign
+    // value to +.
+    __m256d mask = _mm256_set1_pd (-0.);
+    VectorizedArray res;
+    res.data = _mm256_andnot_pd(mask, data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise maximum of this field and another one. Not for
+   * use in user code. Use max(x,y) instead.
+   */
+  VectorizedArray
+  get_max (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm256_max_pd (data, other.data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise minimum of this field and another one. Not for
+   * use in user code. Use min(x,y) instead.
+   */
+  VectorizedArray
+  get_min (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm256_min_pd (data, other.data);
+    return res;
+  }
+
+  /**
+   * Make a few functions friends.
+   */
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::sqrt (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::abs  (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::max  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::min  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+};
+
+
+
+/**
+ * Specialization for double and AVX.
+ */
+template <>
+inline
+void
+vectorized_load_and_transpose(const unsigned int       n_entries,
+                              const double            *in,
+                              const unsigned int      *offsets,
+                              VectorizedArray<double> *out)
+{
+  const unsigned int n_chunks = n_entries/4, remainder = n_entries%4;
+  for (unsigned int i=0; i<n_chunks; ++i)
+    {
+      __m256d u0 = _mm256_loadu_pd(in+4*i+offsets[0]);
+      __m256d u1 = _mm256_loadu_pd(in+4*i+offsets[1]);
+      __m256d u2 = _mm256_loadu_pd(in+4*i+offsets[2]);
+      __m256d u3 = _mm256_loadu_pd(in+4*i+offsets[3]);
+      __m256d t0 = _mm256_permute2f128_pd (u0, u2, 0x20);
+      __m256d t1 = _mm256_permute2f128_pd (u1, u3, 0x20);
+      __m256d t2 = _mm256_permute2f128_pd (u0, u2, 0x31);
+      __m256d t3 = _mm256_permute2f128_pd (u1, u3, 0x31);
+      out[4*i+0].data = _mm256_unpacklo_pd (t0, t1);
+      out[4*i+1].data = _mm256_unpackhi_pd (t0, t1);
+      out[4*i+2].data = _mm256_unpacklo_pd (t2, t3);
+      out[4*i+3].data = _mm256_unpackhi_pd (t2, t3);
+    }
+  if (remainder > 0 && n_chunks > 0)
+    {
+      // simple re-load all data in the last slot
+      const unsigned int final_pos = n_chunks*4-4+remainder;
+      Assert(final_pos+4 == n_entries, ExcInternalError());
+      __m256d u0 = _mm256_loadu_pd(in+final_pos+offsets[0]);
+      __m256d u1 = _mm256_loadu_pd(in+final_pos+offsets[1]);
+      __m256d u2 = _mm256_loadu_pd(in+final_pos+offsets[2]);
+      __m256d u3 = _mm256_loadu_pd(in+final_pos+offsets[3]);
+      __m256d t0 = _mm256_permute2f128_pd (u0, u2, 0x20);
+      __m256d t1 = _mm256_permute2f128_pd (u1, u3, 0x20);
+      __m256d t2 = _mm256_permute2f128_pd (u0, u2, 0x31);
+      __m256d t3 = _mm256_permute2f128_pd (u1, u3, 0x31);
+      out[final_pos+0].data = _mm256_unpacklo_pd (t0, t1);
+      out[final_pos+1].data = _mm256_unpackhi_pd (t0, t1);
+      out[final_pos+2].data = _mm256_unpacklo_pd (t2, t3);
+      out[final_pos+3].data = _mm256_unpackhi_pd (t2, t3);
+    }
+  else if (remainder > 0)
+    for (unsigned int i=0; i<n_entries; ++i)
+      for (unsigned int v=0; v<4; ++v)
+        out[i][v] = in[offsets[v]+i];
+}
+
+
+
+/**
+ * Specialization for double and AVX.
+ */
+template <>
+inline
+void
+vectorized_transpose_and_store(const bool                     add_into,
+                               const unsigned int             n_entries,
+                               const VectorizedArray<double> *in,
+                               const unsigned int            *offsets,
+                               double                        *out)
+{
+  const unsigned int n_chunks = n_entries/4;
+  for (unsigned int i=0; i<n_chunks; ++i)
+    {
+      __m256d u0 = in[4*i+0].data;
+      __m256d u1 = in[4*i+1].data;
+      __m256d u2 = in[4*i+2].data;
+      __m256d u3 = in[4*i+3].data;
+      __m256d t0 = _mm256_permute2f128_pd (u0, u2, 0x20);
+      __m256d t1 = _mm256_permute2f128_pd (u1, u3, 0x20);
+      __m256d t2 = _mm256_permute2f128_pd (u0, u2, 0x31);
+      __m256d t3 = _mm256_permute2f128_pd (u1, u3, 0x31);
+      __m256d res0 = _mm256_unpacklo_pd (t0, t1);
+      __m256d res1 = _mm256_unpackhi_pd (t0, t1);
+      __m256d res2 = _mm256_unpacklo_pd (t2, t3);
+      __m256d res3 = _mm256_unpackhi_pd (t2, t3);
+
+      // Cannot use the same store instructions in both paths of the 'if'
+      // because the compiler cannot know that there is no aliasing between
+      // pointers
+      if (add_into)
+        {
+          res0 = _mm256_add_pd(_mm256_loadu_pd(out+4*i+offsets[0]), res0);
+          _mm256_storeu_pd(out+4*i+offsets[0], res0);
+          res1 = _mm256_add_pd(_mm256_loadu_pd(out+4*i+offsets[1]), res1);
+          _mm256_storeu_pd(out+4*i+offsets[1], res1);
+          res2 = _mm256_add_pd(_mm256_loadu_pd(out+4*i+offsets[2]), res2);
+          _mm256_storeu_pd(out+4*i+offsets[2], res2);
+          res3 = _mm256_add_pd(_mm256_loadu_pd(out+4*i+offsets[3]), res3);
+          _mm256_storeu_pd(out+4*i+offsets[3], res3);
+        }
+      else
+        {
+          _mm256_storeu_pd(out+4*i+offsets[0], res0);
+          _mm256_storeu_pd(out+4*i+offsets[1], res1);
+          _mm256_storeu_pd(out+4*i+offsets[2], res2);
+          _mm256_storeu_pd(out+4*i+offsets[3], res3);
+        }
+    }
+  const unsigned int shift = n_chunks * 4;
+  if (add_into)
+    for (unsigned int i=shift; i<n_entries; ++i)
+      for (unsigned int v=0; v<4; ++v)
+        out[offsets[v]+i] += in[i][v];
+  else
+    for (unsigned int i=shift; i<n_entries; ++i)
+      for (unsigned int v=0; v<4; ++v)
+        out[offsets[v]+i] = in[i][v];
+}
+
+
+
+/**
+ * Specialization for float and AVX.
+ */
+template<>
+class VectorizedArray<float>
+{
+public:
+  /**
+   * This gives the number of vectors collected in this class.
+   */
+  static const unsigned int n_array_elements = 8;
+
+  /**
+   * This function can be used to set all data fields to a given scalar.
+   */
+  VectorizedArray &
+  operator = (const float x)
+  {
+    data = _mm256_set1_ps(x);
+    return *this;
+  }
+
+  /**
+   * Access operator.
+   */
+  float &
+  operator [] (const unsigned int comp)
+  {
+    AssertIndexRange (comp, 8);
+    return *(reinterpret_cast<float *>(&data)+comp);
+  }
+
+  /**
+   * Constant access operator.
+   */
+  const float &
+  operator [] (const unsigned int comp) const
+  {
+    AssertIndexRange (comp, 8);
+    return *(reinterpret_cast<const float *>(&data)+comp);
+  }
+
+  /**
+   * Addition.
+   */
+  VectorizedArray &
+  operator += (const VectorizedArray &vec)
+  {
+    // if the compiler supports vector arithmetics, we can simply use +=
+    // operator on the given data type. this allows the compiler to combine
+    // additions with multiplication (fused multiply-add) if those
+    // instructions are available. Otherwise, we need to use the built-in
+    // intrinsic command for __m256d
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data += vec.data;
+#else
+    data = _mm256_add_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Subtraction.
+   */
+  VectorizedArray &
+  operator -= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data -= vec.data;
+#else
+    data = _mm256_sub_ps(data,vec.data);
+#endif
+    return *this;
+  }
+  /**
+   * Multiplication.
+   */
+  VectorizedArray &
+  operator *= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data *= vec.data;
+#else
+    data = _mm256_mul_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Division.
+   */
+  VectorizedArray &
+  operator /= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data /= vec.data;
+#else
+    data = _mm256_div_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Loads @p n_array_elements from memory into the calling class, starting at
+   * the given address. The memory need not be aligned by 32 bytes, as opposed
+   * to casting a float address to VectorizedArray<float>*.
+   */
+  void load (const float *ptr)
+  {
+    data = _mm256_loadu_ps (ptr);
+  }
+
+  /**
+   * Writes the content of the calling class into memory in form of @p
+   * n_array_elements to the given address. The memory need not be aligned by
+   * 32 bytes, as opposed to casting a float address to
+   * VectorizedArray<float>*.
+   */
+  void store (float *ptr) const
+  {
+    _mm256_storeu_ps (ptr, data);
+  }
+
+  /**
+   * Actual data field. Since this class represents a POD data type, it
+   * remains public.
+   */
+  __m256 data;
+
+private:
+
+  /**
+   * Returns the square root of this field. Not for use in user code. Use
+   * sqrt(x) instead.
+   */
+  VectorizedArray
+  get_sqrt () const
+  {
+    VectorizedArray res;
+    res.data = _mm256_sqrt_ps(data);
+    return res;
+  }
+
+  /**
+   * Returns the absolute value of this field. Not for use in user code. Use
+   * abs(x) instead.
+   */
+  VectorizedArray
+  get_abs () const
+  {
+    // to compute the absolute value, perform bitwise andnot with -0. This
+    // will leave all value and exponent bits unchanged but force the sign
+    // value to +.
+    __m256 mask = _mm256_set1_ps (-0.f);
+    VectorizedArray res;
+    res.data = _mm256_andnot_ps(mask, data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise maximum of this field and another one. Not for
+   * use in user code. Use max(x,y) instead.
+   */
+  VectorizedArray
+  get_max (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm256_max_ps (data, other.data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise minimum of this field and another one. Not for
+   * use in user code. Use min(x,y) instead.
+   */
+  VectorizedArray
+  get_min (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm256_min_ps (data, other.data);
+    return res;
+  }
+
+  /**
+   * Make a few functions friends.
+   */
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::sqrt (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::abs  (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::max  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::min  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+};
+
+
+
+/**
+ * Specialization for double and AVX.
+ */
+template <>
+inline
+void
+vectorized_load_and_transpose(const unsigned int      n_entries,
+                              const float            *in,
+                              const unsigned int     *offsets,
+                              VectorizedArray<float> *out)
+{
+  const unsigned int n_chunks = n_entries/4, remainder = n_entries%4;
+  for (unsigned int i=0; i<n_chunks; ++i)
+    {
+      __m128 u0 = _mm_loadu_ps(in+4*i+offsets[0]);
+      __m128 u1 = _mm_loadu_ps(in+4*i+offsets[1]);
+      __m128 u2 = _mm_loadu_ps(in+4*i+offsets[2]);
+      __m128 u3 = _mm_loadu_ps(in+4*i+offsets[3]);
+      __m128 u4 = _mm_loadu_ps(in+4*i+offsets[4]);
+      __m128 u5 = _mm_loadu_ps(in+4*i+offsets[5]);
+      __m128 u6 = _mm_loadu_ps(in+4*i+offsets[6]);
+      __m128 u7 = _mm_loadu_ps(in+4*i+offsets[7]);
+      // To avoid warnings about uninitialized variables, need to initialize
+      // one variable with zero before using it.
+      __m256 t0, t1, t2, t3 = _mm256_set1_ps(0.F);
+      t0 = _mm256_insertf128_ps (t3, u0, 0);
+      t0 = _mm256_insertf128_ps (t0, u4, 1);
+      t1 = _mm256_insertf128_ps (t3, u1, 0);
+      t1 = _mm256_insertf128_ps (t1, u5, 1);
+      t2 = _mm256_insertf128_ps (t3, u2, 0);
+      t2 = _mm256_insertf128_ps (t2, u6, 1);
+      t3 = _mm256_insertf128_ps (t3, u3, 0);
+      t3 = _mm256_insertf128_ps (t3, u7, 1);
+      __m256 v0 = _mm256_shuffle_ps (t0, t1, 0x44);
+      __m256 v1 = _mm256_shuffle_ps (t0, t1, 0xee);
+      __m256 v2 = _mm256_shuffle_ps (t2, t3, 0x44);
+      __m256 v3 = _mm256_shuffle_ps (t2, t3, 0xee);
+      out[4*i+0].data = _mm256_shuffle_ps (v0, v2, 0x88);
+      out[4*i+1].data = _mm256_shuffle_ps (v0, v2, 0xdd);
+      out[4*i+2].data = _mm256_shuffle_ps (v1, v3, 0x88);
+      out[4*i+3].data = _mm256_shuffle_ps (v1, v3, 0xdd);
+    }
+  if (remainder > 0 && n_chunks > 0)
+    {
+      // simple re-load all data in the last slot
+      const unsigned int final_pos = n_chunks*4-4+remainder;
+      Assert(final_pos+4 == n_entries, ExcInternalError());
+      __m128 u0 = _mm_loadu_ps(in+final_pos+offsets[0]);
+      __m128 u1 = _mm_loadu_ps(in+final_pos+offsets[1]);
+      __m128 u2 = _mm_loadu_ps(in+final_pos+offsets[2]);
+      __m128 u3 = _mm_loadu_ps(in+final_pos+offsets[3]);
+      __m128 u4 = _mm_loadu_ps(in+final_pos+offsets[4]);
+      __m128 u5 = _mm_loadu_ps(in+final_pos+offsets[5]);
+      __m128 u6 = _mm_loadu_ps(in+final_pos+offsets[6]);
+      __m128 u7 = _mm_loadu_ps(in+final_pos+offsets[7]);
+      __m256 t0, t1, t2, t3 = _mm256_set1_ps(0.F);
+      t0 = _mm256_insertf128_ps (t3, u0, 0);
+      t0 = _mm256_insertf128_ps (t0, u4, 1);
+      t1 = _mm256_insertf128_ps (t3, u1, 0);
+      t1 = _mm256_insertf128_ps (t1, u5, 1);
+      t2 = _mm256_insertf128_ps (t3, u2, 0);
+      t2 = _mm256_insertf128_ps (t2, u6, 1);
+      t3 = _mm256_insertf128_ps (t3, u3, 0);
+      t3 = _mm256_insertf128_ps (t3, u7, 1);
+      __m256 v0 = _mm256_shuffle_ps (t0, t1, 0x44);
+      __m256 v1 = _mm256_shuffle_ps (t0, t1, 0xee);
+      __m256 v2 = _mm256_shuffle_ps (t2, t3, 0x44);
+      __m256 v3 = _mm256_shuffle_ps (t2, t3, 0xee);
+      out[final_pos+0].data = _mm256_shuffle_ps (v0, v2, 0x88);
+      out[final_pos+1].data = _mm256_shuffle_ps (v0, v2, 0xdd);
+      out[final_pos+2].data = _mm256_shuffle_ps (v1, v3, 0x88);
+      out[final_pos+3].data = _mm256_shuffle_ps (v1, v3, 0xdd);
+    }
+  else if (remainder > 0)
+    for (unsigned int i=0; i<n_entries; ++i)
+      for (unsigned int v=0; v<8; ++v)
+        out[i][v] = in[offsets[v]+i];
+}
+
+
+
+/**
+ * Specialization for double and AVX.
+ */
+template <>
+inline
+void
+vectorized_transpose_and_store(const bool                    add_into,
+                               const unsigned int            n_entries,
+                               const VectorizedArray<float> *in,
+                               const unsigned int           *offsets,
+                               float                        *out)
+{
+  const unsigned int n_chunks = n_entries/4;
+  for (unsigned int i=0; i<n_chunks; ++i)
+    {
+      __m256 u0 = in[4*i+0].data;
+      __m256 u1 = in[4*i+1].data;
+      __m256 u2 = in[4*i+2].data;
+      __m256 u3 = in[4*i+3].data;
+      __m256 t0 = _mm256_shuffle_ps (u0, u1, 0x44);
+      __m256 t1 = _mm256_shuffle_ps (u0, u1, 0xee);
+      __m256 t2 = _mm256_shuffle_ps (u2, u3, 0x44);
+      __m256 t3 = _mm256_shuffle_ps (u2, u3, 0xee);
+      u0 = _mm256_shuffle_ps (t0, t2, 0x88);
+      u1 = _mm256_shuffle_ps (t0, t2, 0xdd);
+      u2 = _mm256_shuffle_ps (t1, t3, 0x88);
+      u3 = _mm256_shuffle_ps (t1, t3, 0xdd);
+      __m128 res0 = _mm256_extractf128_ps (u0, 0);
+      __m128 res4 = _mm256_extractf128_ps (u0, 1);
+      __m128 res1 = _mm256_extractf128_ps (u1, 0);
+      __m128 res5 = _mm256_extractf128_ps (u1, 1);
+      __m128 res2 = _mm256_extractf128_ps (u2, 0);
+      __m128 res6 = _mm256_extractf128_ps (u2, 1);
+      __m128 res3 = _mm256_extractf128_ps (u3, 0);
+      __m128 res7 = _mm256_extractf128_ps (u3, 1);
+
+      // Cannot use the same store instructions in both paths of the 'if'
+      // because the compiler cannot know that there is no aliasing between
+      // pointers
+      if (add_into)
+        {
+          res0 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[0]), res0);
+          _mm_storeu_ps(out+4*i+offsets[0], res0);
+          res1 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[1]), res1);
+          _mm_storeu_ps(out+4*i+offsets[1], res1);
+          res2 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[2]), res2);
+          _mm_storeu_ps(out+4*i+offsets[2], res2);
+          res3 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[3]), res3);
+          _mm_storeu_ps(out+4*i+offsets[3], res3);
+          res4 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[4]), res4);
+          _mm_storeu_ps(out+4*i+offsets[4], res4);
+          res5 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[5]), res5);
+          _mm_storeu_ps(out+4*i+offsets[5], res5);
+          res6 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[6]), res6);
+          _mm_storeu_ps(out+4*i+offsets[6], res6);
+          res7 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[7]), res7);
+          _mm_storeu_ps(out+4*i+offsets[7], res7);
+        }
+      else
+        {
+          _mm_storeu_ps(out+4*i+offsets[0], res0);
+          _mm_storeu_ps(out+4*i+offsets[1], res1);
+          _mm_storeu_ps(out+4*i+offsets[2], res2);
+          _mm_storeu_ps(out+4*i+offsets[3], res3);
+          _mm_storeu_ps(out+4*i+offsets[4], res4);
+          _mm_storeu_ps(out+4*i+offsets[5], res5);
+          _mm_storeu_ps(out+4*i+offsets[6], res6);
+          _mm_storeu_ps(out+4*i+offsets[7], res7);
+        }
+    }
+  const unsigned int shift = n_chunks * 4;
+  if (add_into)
+    for (unsigned int i=shift; i<n_entries; ++i)
+      for (unsigned int v=0; v<8; ++v)
+        out[offsets[v]+i] += in[i][v];
+  else
+    for (unsigned int i=shift; i<n_entries; ++i)
+      for (unsigned int v=0; v<8; ++v)
+        out[offsets[v]+i] = in[i][v];
+}
+
+
+
+// for safety, also check that __SSE2__ is defined in case the user manually
+// set some conflicting compile flags which prevent compilation
+
+#elif DEAL_II_COMPILER_VECTORIZATION_LEVEL >= 1 && defined(__SSE2__)
+
+/**
+ * Specialization for double and SSE2.
+ */
+template <>
+class VectorizedArray<double>
+{
+public:
+  /**
+   * This gives the number of vectors collected in this class.
+   */
+  static const unsigned int n_array_elements = 2;
+
+  /**
+   * This function can be used to set all data fields to a given scalar.
+   */
+  VectorizedArray &
+  operator = (const double x)
+  {
+    data = _mm_set1_pd(x);
+    return *this;
+  }
+
+  /**
+   * Access operator.
+   */
+  double &
+  operator [] (const unsigned int comp)
+  {
+    AssertIndexRange (comp, 2);
+    return *(reinterpret_cast<double *>(&data)+comp);
+  }
+
+  /**
+   * Constant access operator.
+   */
+  const double &
+  operator [] (const unsigned int comp) const
+  {
+    AssertIndexRange (comp, 2);
+    return *(reinterpret_cast<const double *>(&data)+comp);
+  }
+
+  /**
+   * Addition.
+   */
+  VectorizedArray &
+  operator += (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data += vec.data;
+#else
+    data = _mm_add_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Subtraction.
+   */
+  VectorizedArray &
+  operator -= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data -= vec.data;
+#else
+    data = _mm_sub_pd(data,vec.data);
+#endif
+    return *this;
+  }
+  /**
+   * Multiplication.
+   */
+  VectorizedArray &
+  operator *= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data *= vec.data;
+#else
+    data = _mm_mul_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Division.
+   */
+  VectorizedArray &
+  operator /= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data /= vec.data;
+#else
+    data = _mm_div_pd(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Loads @p n_array_elements from memory into the calling class, starting at
+   * the given address. The memory need not be aligned by 16 bytes, as opposed
+   * to casting a double address to VectorizedArray<double>*.
+   */
+  void load (const double *ptr)
+  {
+    data = _mm_loadu_pd (ptr);
+  }
+
+  /**
+   * Writes the content of the calling class into memory in form of @p
+   * n_array_elements to the given address. The memory need not be aligned by
+   * 16 bytes, as opposed to casting a double address to
+   * VectorizedArray<double>*.
+   */
+  void store (double *ptr) const
+  {
+    _mm_storeu_pd (ptr, data);
+  }
+
+  /**
+   * Actual data field. Since this class represents a POD data type, it
+   * remains public.
+   */
+  __m128d data;
+
+private:
+  /**
+   * Returns the square root of this field. Not for use in user code. Use
+   * sqrt(x) instead.
+   */
+  VectorizedArray
+  get_sqrt () const
+  {
+    VectorizedArray res;
+    res.data = _mm_sqrt_pd(data);
+    return res;
+  }
+
+  /**
+   * Returns the absolute value of this field. Not for use in user code. Use
+   * abs(x) instead.
+   */
+  VectorizedArray
+  get_abs () const
+  {
+    // to compute the absolute value, perform
+    // bitwise andnot with -0. This will leave all
+    // value and exponent bits unchanged but force
+    // the sign value to +.
+    __m128d mask = _mm_set1_pd (-0.);
+    VectorizedArray res;
+    res.data = _mm_andnot_pd(mask, data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise maximum of this field and another one. Not for
+   * use in user code. Use max(x,y) instead.
+   */
+  VectorizedArray
+  get_max (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm_max_pd (data, other.data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise minimum of this field and another one. Not for
+   * use in user code. Use min(x,y) instead.
+   */
+  VectorizedArray
+  get_min (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm_min_pd (data, other.data);
+    return res;
+  }
+
+  /**
+   * Make a few functions friends.
+   */
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::sqrt (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::abs  (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::max  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::min  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+};
+
+
+
+/**
+ * Specialization for double and SSE2.
+ */
+template <>
+inline
+void vectorized_load_and_transpose(const unsigned int      n_entries,
+                                   const double            *in,
+                                   const unsigned int      *offsets,
+                                   VectorizedArray<double> *out)
+{
+  const unsigned int n_chunks = n_entries/2, remainder = n_entries%2;
+  for (unsigned int i=0; i<n_chunks; ++i)
+    {
+      __m128d u0 = _mm_loadu_pd(in+2*i+offsets[0]);
+      __m128d u1 = _mm_loadu_pd(in+2*i+offsets[1]);
+      out[2*i+0].data = _mm_unpacklo_pd (u0, u1);
+      out[2*i+1].data = _mm_unpackhi_pd (u0, u1);
+    }
+  if (remainder > 0)
+    for (unsigned int i=0; i<n_entries; ++i)
+      for (unsigned int v=0; v<2; ++v)
+        out[i][v] = in[offsets[v]+i];
+}
+
+
+
+/**
+ * Specialization for double and AVX.
+ */
+template <>
+inline
+void
+vectorized_transpose_and_store(const bool                     add_into,
+                               const unsigned int             n_entries,
+                               const VectorizedArray<double> *in,
+                               const unsigned int            *offsets,
+                               double                        *out)
+{
+  const unsigned int n_chunks = n_entries/2;
+  if (add_into)
+    {
+      for (unsigned int i=0; i<n_chunks; ++i)
+        {
+          __m128d u0 = in[2*i+0].data;
+          __m128d u1 = in[2*i+1].data;
+          __m128d res0 = _mm_unpacklo_pd (u0, u1);
+          __m128d res1 = _mm_unpackhi_pd (u0, u1);
+          _mm_storeu_pd(out+2*i+offsets[0], _mm_add_pd(_mm_loadu_pd(out+2*i+offsets[0]), res0));
+          _mm_storeu_pd(out+2*i+offsets[1], _mm_add_pd(_mm_loadu_pd(out+2*i+offsets[1]), res1));
+        }
+      const unsigned int shift = n_chunks * 2;
+      for (unsigned int i=shift; i<n_entries; ++i)
+        for (unsigned int v=0; v<2; ++v)
+          out[offsets[v]+i] += in[i][v];
+    }
+  else
+    {
+      for (unsigned int i=0; i<n_chunks; ++i)
+        {
+          __m128d u0 = in[2*i+0].data;
+          __m128d u1 = in[2*i+1].data;
+          __m128d res0 = _mm_unpacklo_pd (u0, u1);
+          __m128d res1 = _mm_unpackhi_pd (u0, u1);
+          _mm_storeu_pd(out+2*i+offsets[0], res0);
+          _mm_storeu_pd(out+2*i+offsets[1], res1);
+        }
+      const unsigned int shift = n_chunks * 2;
+      for (unsigned int i=shift; i<n_entries; ++i)
+        for (unsigned int v=0; v<2; ++v)
+          out[offsets[v]+i] = in[i][v];
+    }
+}
+
+
+
+/**
+ * Specialization for float and SSE2.
+ */
+template <>
+class VectorizedArray<float>
+{
+public:
+  /**
+   * This gives the number of vectors collected in this class.
+   */
+  static const unsigned int n_array_elements = 4;
+
+  /**
+   * This function can be used to set all data fields to a given scalar.
+   */
+
+  VectorizedArray &
+  operator = (const float x)
+  {
+    data = _mm_set1_ps(x);
+    return *this;
+  }
+
+  /**
+   * Access operator.
+   */
+  float &
+  operator [] (const unsigned int comp)
+  {
+    AssertIndexRange (comp, 4);
+    return *(reinterpret_cast<float *>(&data)+comp);
+  }
+
+  /**
+   * Constant access operator.
+   */
+  const float &
+  operator [] (const unsigned int comp) const
+  {
+    AssertIndexRange (comp, 4);
+    return *(reinterpret_cast<const float *>(&data)+comp);
+  }
+
+  /**
+   * Addition.
+   */
+  VectorizedArray &
+  operator += (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data += vec.data;
+#else
+    data = _mm_add_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Subtraction.
+   */
+  VectorizedArray &
+  operator -= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data -= vec.data;
+#else
+    data = _mm_sub_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Multiplication.
+   */
+  VectorizedArray &
+  operator *= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data *= vec.data;
+#else
+    data = _mm_mul_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Division.
+   */
+  VectorizedArray &
+  operator /= (const VectorizedArray &vec)
+  {
+#ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
+    data /= vec.data;
+#else
+    data = _mm_div_ps(data,vec.data);
+#endif
+    return *this;
+  }
+
+  /**
+   * Loads @p n_array_elements from memory into the calling class, starting at
+   * the given address. The memory need not be aligned by 16 bytes, as opposed
+   * to casting a float address to VectorizedArray<float>*.
+   */
+  void load (const float *ptr)
+  {
+    data = _mm_loadu_ps (ptr);
+  }
+
+  /**
+   * Writes the content of the calling class into memory in form of @p
+   * n_array_elements to the given address. The memory need not be aligned by
+   * 16 bytes, as opposed to casting a float address to
+   * VectorizedArray<float>*.
+   */
+  void store (float *ptr) const
+  {
+    _mm_storeu_ps (ptr, data);
+  }
+
+  /**
+   * Actual data field. Since this class represents a POD data type, it
+   * remains public.
+   */
+  __m128 data;
+
+private:
+  /**
+   * Returns the square root of this field. Not for use in user code. Use
+   * sqrt(x) instead.
+   */
+  VectorizedArray
+  get_sqrt () const
+  {
+    VectorizedArray res;
+    res.data = _mm_sqrt_ps(data);
+    return res;
+  }
+
+  /**
+   * Returns the absolute value of this field. Not for use in user code. Use
+   * abs(x) instead.
+   */
+  VectorizedArray
+  get_abs () const
+  {
+    // to compute the absolute value, perform bitwise andnot with -0. This
+    // will leave all value and exponent bits unchanged but force the sign
+    // value to +.
+    __m128 mask = _mm_set1_ps (-0.f);
+    VectorizedArray res;
+    res.data = _mm_andnot_ps(mask, data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise maximum of this field and another one. Not for
+   * use in user code. Use max(x,y) instead.
+   */
+  VectorizedArray
+  get_max (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm_max_ps (data, other.data);
+    return res;
+  }
+
+  /**
+   * Returns the component-wise minimum of this field and another one. Not for
+   * use in user code. Use min(x,y) instead.
+   */
+  VectorizedArray
+  get_min (const VectorizedArray &other) const
+  {
+    VectorizedArray res;
+    res.data = _mm_min_ps (data, other.data);
+    return res;
+  }
+
+  /**
+   * Make a few functions friends.
+   */
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::sqrt (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::abs  (const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::max  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+  template <typename Number2> friend VectorizedArray<Number2>
+  std::min  (const VectorizedArray<Number2> &, const VectorizedArray<Number2> &);
+};
+
+
+
+/**
+ * Specialization for float and SSE2.
+ */
+template <>
+inline
+void vectorized_load_and_transpose(const unsigned int      n_entries,
+                                   const float            *in,
+                                   const unsigned int     *offsets,
+                                   VectorizedArray<float> *out)
+{
+  const unsigned int n_chunks = n_entries/4, remainder = n_entries%4;
+  for (unsigned int i=0; i<n_chunks; ++i)
+    {
+      __m128 u0 = _mm_loadu_ps(in+4*i+offsets[0]);
+      __m128 u1 = _mm_loadu_ps(in+4*i+offsets[1]);
+      __m128 u2 = _mm_loadu_ps(in+4*i+offsets[2]);
+      __m128 u3 = _mm_loadu_ps(in+4*i+offsets[3]);
+      __m128 v0 = _mm_shuffle_ps (u0, u1, 0x44);
+      __m128 v1 = _mm_shuffle_ps (u0, u1, 0xee);
+      __m128 v2 = _mm_shuffle_ps (u2, u3, 0x44);
+      __m128 v3 = _mm_shuffle_ps (u2, u3, 0xee);
+      out[4*i+0].data = _mm_shuffle_ps (v0, v2, 0x88);
+      out[4*i+1].data = _mm_shuffle_ps (v0, v2, 0xdd);
+      out[4*i+2].data = _mm_shuffle_ps (v1, v3, 0x88);
+      out[4*i+3].data = _mm_shuffle_ps (v1, v3, 0xdd);
+    }
+  if (remainder > 0 && n_chunks > 0)
+    {
+      // simple re-load all data in the last slot
+      const unsigned int final_pos = n_chunks*4-4+remainder;
+      Assert(final_pos+4 == n_entries, ExcInternalError());
+      __m128 u0 = _mm_loadu_ps(in+final_pos+offsets[0]);
+      __m128 u1 = _mm_loadu_ps(in+final_pos+offsets[1]);
+      __m128 u2 = _mm_loadu_ps(in+final_pos+offsets[2]);
+      __m128 u3 = _mm_loadu_ps(in+final_pos+offsets[3]);
+      __m128 v0 = _mm_shuffle_ps (u0, u1, 0x44);
+      __m128 v1 = _mm_shuffle_ps (u0, u1, 0xee);
+      __m128 v2 = _mm_shuffle_ps (u2, u3, 0x44);
+      __m128 v3 = _mm_shuffle_ps (u2, u3, 0xee);
+      out[final_pos+0].data = _mm_shuffle_ps (v0, v2, 0x88);
+      out[final_pos+1].data = _mm_shuffle_ps (v0, v2, 0xdd);
+      out[final_pos+2].data = _mm_shuffle_ps (v1, v3, 0x88);
+      out[final_pos+3].data = _mm_shuffle_ps (v1, v3, 0xdd);
+    }
+  else if (remainder > 0)
+    for (unsigned int i=0; i<n_entries; ++i)
+      for (unsigned int v=0; v<4; ++v)
+        out[i][v] = in[offsets[v]+i];
+}
+
+
+
+/**
+ * Specialization for double and AVX.
+ */
+template <>
+inline
+void
+vectorized_transpose_and_store(const bool                    add_into,
+                               const unsigned int            n_entries,
+                               const VectorizedArray<float> *in,
+                               const unsigned int           *offsets,
+                               float                        *out)
+{
+  const unsigned int n_chunks = n_entries/4;
+  for (unsigned int i=0; i<n_chunks; ++i)
+    {
+      __m128 u0 = in[4*i+0].data;
+      __m128 u1 = in[4*i+1].data;
+      __m128 u2 = in[4*i+2].data;
+      __m128 u3 = in[4*i+3].data;
+      __m128 t0 = _mm_shuffle_ps (u0, u1, 0x44);
+      __m128 t1 = _mm_shuffle_ps (u0, u1, 0xee);
+      __m128 t2 = _mm_shuffle_ps (u2, u3, 0x44);
+      __m128 t3 = _mm_shuffle_ps (u2, u3, 0xee);
+      u0 = _mm_shuffle_ps (t0, t2, 0x88);
+      u1 = _mm_shuffle_ps (t0, t2, 0xdd);
+      u2 = _mm_shuffle_ps (t1, t3, 0x88);
+      u3 = _mm_shuffle_ps (t1, t3, 0xdd);
+
+      // Cannot use the same store instructions in both paths of the 'if'
+      // because the compiler cannot know that there is no aliasing between
+      // pointers
+      if (add_into)
+        {
+          u0 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[0]), u0);
+          _mm_storeu_ps(out+4*i+offsets[0], u0);
+          u1 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[1]), u1);
+          _mm_storeu_ps(out+4*i+offsets[1], u1);
+          u2 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[2]), u2);
+          _mm_storeu_ps(out+4*i+offsets[2], u2);
+          u3 = _mm_add_ps(_mm_loadu_ps(out+4*i+offsets[3]), u3);
+          _mm_storeu_ps(out+4*i+offsets[3], u3);
+        }
+      else
+        {
+          _mm_storeu_ps(out+4*i+offsets[0], u0);
+          _mm_storeu_ps(out+4*i+offsets[1], u1);
+          _mm_storeu_ps(out+4*i+offsets[2], u2);
+          _mm_storeu_ps(out+4*i+offsets[3], u3);
+        }
+    }
+  const unsigned int shift = n_chunks * 4;
+  if (add_into)
+    for (unsigned int i=shift; i<n_entries; ++i)
+      for (unsigned int v=0; v<4; ++v)
+        out[offsets[v]+i] += in[i][v];
+  else
+    for (unsigned int i=shift; i<n_entries; ++i)
+      for (unsigned int v=0; v<4; ++v)
+        out[offsets[v]+i] = in[i][v];
+}
+
+
+
+#endif // if DEAL_II_COMPILER_VECTORIZATION_LEVEL > 0
+
+
+/**
+ * Addition of two vectorized arrays with operator +.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator + (const VectorizedArray<Number> &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp = u;
+  return tmp+=v;
+}
+
+/**
+ * Subtraction of two vectorized arrays with operator -.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator - (const VectorizedArray<Number> &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp = u;
+  return tmp-=v;
+}
+
+/**
+ * Multiplication of two vectorized arrays with operator *.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator * (const VectorizedArray<Number> &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp = u;
+  return tmp*=v;
+}
+
+/**
+ * Division of two vectorized arrays with operator /.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator / (const VectorizedArray<Number> &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp = u;
+  return tmp/=v;
+}
+
+/**
+ * Addition of a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) and a vectorized array.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator + (const Number                  &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp;
+  tmp = u;
+  return tmp+=v;
+}
+
+/**
+ * Addition of a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) and a vectorized array in case the scalar
+ * is a double (needed in order to be able to write simple code with constants
+ * that are usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator + (const double                 &u,
+            const VectorizedArray<float> &v)
+{
+  VectorizedArray<float> tmp;
+  tmp = u;
+  return tmp+=v;
+}
+
+/**
+ * Addition of a vectorized array and a scalar (expanded to a vectorized array
+ * with @p n_array_elements equal entries).
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator + (const VectorizedArray<Number> &v,
+            const Number                  &u)
+{
+  return u + v;
+}
+
+/**
+ * Addition of a vectorized array and a scalar (expanded to a vectorized array
+ * with @p n_array_elements equal entries) in case the scalar is a double
+ * (needed in order to be able to write simple code with constants that are
+ * usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator + (const VectorizedArray<float> &v,
+            const double                 &u)
+{
+  return u + v;
+}
+
+/**
+ * Subtraction of a vectorized array from a scalar (expanded to a vectorized
+ * array with @p n_array_elements equal entries).
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator - (const Number                  &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp;
+  tmp = u;
+  return tmp-=v;
+}
+
+/**
+ * Subtraction of a vectorized array from a scalar (expanded to a vectorized
+ * array with @p n_array_elements equal entries) in case the scalar is a
+ * double (needed in order to be able to write simple code with constants that
+ * are usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator - (const double                 &u,
+            const VectorizedArray<float> &v)
+{
+  VectorizedArray<float> tmp;
+  tmp = float(u);
+  return tmp-=v;
+}
+
+/**
+ * Subtraction of a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) from a vectorized array.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator - (const VectorizedArray<Number> &v,
+            const Number                  &u)
+{
+  VectorizedArray<Number> tmp;
+  tmp = u;
+  return v-tmp;
+}
+
+/**
+ * Subtraction of a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) from a vectorized array in case the scalar
+ * is a double (needed in order to be able to write simple code with constants
+ * that are usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator - (const VectorizedArray<float> &v,
+            const double                 &u)
+{
+  VectorizedArray<float> tmp;
+  tmp = float(u);
+  return v-tmp;
+}
+
+/**
+ * Multiplication of a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) and a vectorized array.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator * (const Number                  &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp;
+  tmp = u;
+  return tmp*=v;
+}
+
+/**
+ * Multiplication of a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) and a vectorized array in case the scalar
+ * is a double (needed in order to be able to write simple code with constants
+ * that are usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator * (const double                 &u,
+            const VectorizedArray<float> &v)
+{
+  VectorizedArray<float> tmp;
+  tmp = float(u);
+  return tmp*=v;
+}
+
+/**
+ * Multiplication of a vectorized array and a scalar (expanded to a vectorized
+ * array with @p n_array_elements equal entries).
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator * (const VectorizedArray<Number> &v,
+            const Number                  &u)
+{
+  return u * v;
+}
+
+/**
+ * Multiplication of a vectorized array and a scalar (expanded to a vectorized
+ * array with @p n_array_elements equal entries) in case the scalar is a
+ * double (needed in order to be able to write simple code with constants that
+ * are usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator * (const VectorizedArray<float> &v,
+            const double                 &u)
+{
+  return u * v;
+}
+
+/**
+ * Quotient between a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) and a vectorized array.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator / (const Number                  &u,
+            const VectorizedArray<Number> &v)
+{
+  VectorizedArray<Number> tmp;
+  tmp = u;
+  return tmp/=v;
+}
+
+/**
+ * Quotient between a scalar (expanded to a vectorized array with @p
+ * n_array_elements equal entries) and a vectorized array in case the scalar
+ * is a double (needed in order to be able to write simple code with constants
+ * that are usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator / (const double                 &u,
+            const VectorizedArray<float> &v)
+{
+  VectorizedArray<float> tmp;
+  tmp = float(u);
+  return tmp/=v;
+}
+
+/**
+ * Quotient between a vectorized array and a scalar (expanded to a vectorized
+ * array with @p n_array_elements equal entries).
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator / (const VectorizedArray<Number> &v,
+            const Number                  &u)
+{
+  VectorizedArray<Number> tmp;
+  tmp = u;
+  return v/tmp;
+}
+
+/**
+ * Quotient between a vectorized array and a scalar (expanded to a vectorized
+ * array with @p n_array_elements equal entries) in case the scalar is a
+ * double (needed in order to be able to write simple code with constants that
+ * are usually double numbers).
+ *
+ * @relates VectorizedArray
+ */
+inline
+VectorizedArray<float>
+operator / (const VectorizedArray<float> &v,
+            const double                 &u)
+{
+  VectorizedArray<float> tmp;
+  tmp = float(u);
+  return v/tmp;
+}
+
+/**
+ * Unary operator + on a vectorized array.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator + (const VectorizedArray<Number> &u)
+{
+  return u;
+}
+
+/**
+ * Unary operator - on a vectorized array.
+ *
+ * @relates VectorizedArray
+ */
+template <typename Number>
+inline
+VectorizedArray<Number>
+operator - (const VectorizedArray<Number> &u)
+{
+  // to get a negative sign, subtract the input from zero (could also
+  // multiply by -1, but this one is slightly simpler)
+  return VectorizedArray<Number>()-u;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+/**
+ * Implementation of functions from cmath on VectorizedArray. These functions
+ * do not reside in the dealii namespace in order to ensure a similar
+ * interface as for the respective functions in cmath. Instead, call them
+ * using std::sin.
+ */
+namespace std
+{
+  /**
+   * Computes the sine of a vectorized data field. The result is returned as
+   * vectorized array in the form <tt>{sin(x[0]), sin(x[1]), ...,
+   * sin(x[n_array_elements-1])}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  sin (const ::dealii::VectorizedArray<Number> &x)
+  {
+    // put values in an array and later read in that array with an unaligned
+    // read. This should save some instructions as compared to directly
+    // setting the individual elements and also circumvents a compiler
+    // optimization bug in gcc-4.6 with SSE2 (see also deal.II developers list
+    // from April 2014, topic "matrix_free/step-48 Test").
+    Number values[::dealii::VectorizedArray<Number>::n_array_elements];
+    for (unsigned int i=0; i<dealii::VectorizedArray<Number>::n_array_elements; ++i)
+      values[i] = std::sin(x[i]);
+    ::dealii::VectorizedArray<Number> out;
+    out.load(&values[0]);
+    return out;
+  }
+
+
+
+  /**
+   * Computes the cosine of a vectorized data field. The result is returned as
+   * vectorized array in the form <tt>{cos(x[0]), cos(x[1]), ...,
+   * cos(x[n_array_elements-1])}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  cos (const ::dealii::VectorizedArray<Number> &x)
+  {
+    Number values[::dealii::VectorizedArray<Number>::n_array_elements];
+    for (unsigned int i=0; i<dealii::VectorizedArray<Number>::n_array_elements; ++i)
+      values[i] = std::cos(x[i]);
+    ::dealii::VectorizedArray<Number> out;
+    out.load(&values[0]);
+    return out;
+  }
+
+
+
+  /**
+   * Computes the tangent of a vectorized data field. The result is returned
+   * as vectorized array in the form <tt>{tan(x[0]), tan(x[1]), ...,
+   * tan(x[n_array_elements-1])}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  tan (const ::dealii::VectorizedArray<Number> &x)
+  {
+    Number values[::dealii::VectorizedArray<Number>::n_array_elements];
+    for (unsigned int i=0; i<dealii::VectorizedArray<Number>::n_array_elements; ++i)
+      values[i] = std::tan(x[i]);
+    ::dealii::VectorizedArray<Number> out;
+    out.load(&values[0]);
+    return out;
+  }
+
+
+
+  /**
+   * Computes the exponential of a vectorized data field. The result is
+   * returned as vectorized array in the form <tt>{exp(x[0]), exp(x[1]), ...,
+   * exp(x[n_array_elements-1])}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  exp (const ::dealii::VectorizedArray<Number> &x)
+  {
+    Number values[::dealii::VectorizedArray<Number>::n_array_elements];
+    for (unsigned int i=0; i<dealii::VectorizedArray<Number>::n_array_elements; ++i)
+      values[i] = std::exp(x[i]);
+    ::dealii::VectorizedArray<Number> out;
+    out.load(&values[0]);
+    return out;
+  }
+
+
+
+  /**
+   * Computes the natural logarithm of a vectorized data field. The result is
+   * returned as vectorized array in the form <tt>{log(x[0]), log(x[1]), ...,
+   * log(x[n_array_elements-1])}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  log (const ::dealii::VectorizedArray<Number> &x)
+  {
+    Number values[::dealii::VectorizedArray<Number>::n_array_elements];
+    for (unsigned int i=0; i<dealii::VectorizedArray<Number>::n_array_elements; ++i)
+      values[i] = std::log(x[i]);
+    ::dealii::VectorizedArray<Number> out;
+    out.load(&values[0]);
+    return out;
+  }
+
+
+
+  /**
+   * Computes the square root of a vectorized data field. The result is
+   * returned as vectorized array in the form <tt>{sqrt(x[0]), sqrt(x[1]),
+   * ..., sqrt(x[n_array_elements-1])}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  sqrt (const ::dealii::VectorizedArray<Number> &x)
+  {
+    return x.get_sqrt();
+  }
+
+
+
+  /**
+   * Raises the given number @p x to the power @p p for a vectorized data
+   * field. The result is returned as vectorized array in the form
+   * <tt>{pow(x[0],p), pow(x[1],p), ..., pow(x[n_array_elements-1],p)}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  pow (const ::dealii::VectorizedArray<Number> &x,
+       const Number p)
+  {
+    Number values[::dealii::VectorizedArray<Number>::n_array_elements];
+    for (unsigned int i=0; i<dealii::VectorizedArray<Number>::n_array_elements; ++i)
+      values[i] = std::pow(x[i], p);
+    ::dealii::VectorizedArray<Number> out;
+    out.load(&values[0]);
+    return out;
+  }
+
+
+
+  /**
+   * Computes the absolute value (modulus) of a vectorized data field. The
+   * result is returned as vectorized array in the form <tt>{abs(x[0]),
+   * abs(x[1]), ..., abs(x[n_array_elements-1])}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  abs (const ::dealii::VectorizedArray<Number> &x)
+  {
+    return x.get_abs();
+  }
+
+
+
+  /**
+   * Computes the componentwise maximum of two vectorized data fields. The
+   * result is returned as vectorized array in the form <tt>{max(x[0],y[0]),
+   * max(x[1],y[1]), ...}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  max (const ::dealii::VectorizedArray<Number> &x,
+       const ::dealii::VectorizedArray<Number> &y)
+  {
+    return x.get_max(y);
+  }
+
+
+
+  /**
+   * Computes the componentwise minimum of two vectorized data fields. The
+   * result is returned as vectorized array in the form <tt>{min(x[0],y[0]),
+   * min(x[1],y[1]), ...}</tt>.
+   *
+   * @relates VectorizedArray
+   */
+  template <typename Number>
+  inline
+  ::dealii::VectorizedArray<Number>
+  min (const ::dealii::VectorizedArray<Number> &x,
+       const ::dealii::VectorizedArray<Number> &y)
+  {
+    return x.get_min(y);
+  }
+
+}
+
+#endif
diff --git a/include/deal.II/base/work_stream.h b/include/deal.II/base/work_stream.h
new file mode 100644
index 0000000..a924961
--- /dev/null
+++ b/include/deal.II/base/work_stream.h
@@ -0,0 +1,1269 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__work_stream_h
+#define dealii__work_stream_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/graph_coloring.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/std_cxx11/function.h>
+#include <deal.II/base/std_cxx11/bind.h>
+#include <deal.II/base/thread_local_storage.h>
+#include <deal.II/base/parallel.h>
+
+#ifdef DEAL_II_WITH_THREADS
+#  include <deal.II/base/thread_management.h>
+#  include <tbb/pipeline.h>
+#endif
+
+#include <vector>
+#include <utility>
+#include <memory>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/**
+ * A namespace whose main template function supports running multiple threads
+ * each of which operates on a subset of the given range of objects. The class
+ * uses the Intel Threading Building Blocks (TBB) to load balance the
+ * individual subranges onto the available threads. For a lengthy discussion
+ * of the rationale of this class, see the
+ * @ref threads "Parallel computing with multiple processors"
+ * module. It is used in the tutorial first in step-9, and again in step-13,
+ * step-14, step-32 and others.
+ *
+ * The class is built on the following premise: One frequently has some work
+ * that needs to be done on a sequence of objects; a prototypical example is
+ * assembling cell contributions to a system matrix or right hand side. In
+ * many such examples, part of the work can be done entirely independently and
+ * in parallel, possibly using several processor cores on a machine with
+ * shared memory. However, some other part of this work may need to be
+ * synchronised and be done in order. In the example of assembling a matrix,
+ * the computation of local contributions can be done entirely in parallel,
+ * but copying the local contributions into the global matrix requires some
+ * care: First, several threads can't write at the same time, but need to
+ * synchronise writing using a mutex; secondly, we want the order in which
+ * local contributions are added to the global matrix to be always the same
+ * because floating point addition is not commutative and adding local
+ * contributions to the global matrix in different orders leads to subtly
+ * different results that can affect the number of iterations for iterative
+ * solvers as well as the round-off error in the solution in random ways.
+ * Consequently, we want to ensure that only one thread at a time writes into
+ * the global matrix, and that results are copied in a stable and reproducible
+ * order.
+ *
+ * This class implements a framework for this work model. It works with a
+ * stream of objects given by an iterator range, runs a worker function in
+ * parallel on all of these objects and then passes each object to a
+ * postprocessor function that runs sequentially and gets objects in exactly
+ * the order in which they appear in the input iterator range. None of the
+ * synchronisation work is exposed to the user of this class.
+ *
+ * Internally, the range given to the run() function of this class is split
+ * into a sequence of "items", which are then distributed according to some
+ * %internal algorithm onto the number of available threads. An item is an
+ * element of the range of iterators on which we are to operate; for example,
+ * for the purpose of assembling matrices or evaluating error indicators, an
+ * item could be a cell. The TBB library determines how many threads are
+ * created (typically as many as there are processor cores), but the number of
+ * items that may be active at any given time is specified by the argument to
+ * the constructor. It should be bigger or equal to the number of processor
+ * cores - the default is four times the number of cores on the current
+ * system.
+ *
+ * Items are created upon request by the TBB whenever one of the worker
+ * threads is idle or is expected to become idle. It is then handed off to a
+ * worker function, typically a member function of a main class. These worker
+ * functions are run in parallel on a number of threads, and there is no
+ * guarantee that they are asked to work on items in any particular order, in
+ * particular not necessarily in the order in which items are generated from
+ * the iterator range.
+ *
+ * Typically, worker functions need additional data, for example FEValues
+ * objects, input data vectors, etc, some of which can not be shared among
+ * threads. To this end, the run() function takes another template argument,
+ * ScratchData, which designates a type objects of which are stored with each
+ * item and which threads can use as private data without having to share them
+ * with other threads. The run() function takes an additional argument with an
+ * object of type ScratchData that is going to be copied for the arguments
+ * passed to each of the worker functions.
+ *
+ * In addition, worker functions store their results in objects of template
+ * type CopyData. These are then handed off to a separate function, called
+ * copier, that may use the stored results to transfer them into permanent
+ * storage. For example, it may copy the results of local contributions to a
+ * matrix computed by a worker function into the global matrix. In contrast to
+ * the worker function, however, only one instance of the copier is run at any
+ * given time; it can therefore safely copy local contributions into the
+ * global matrix without the need to lock the global object using a mutex or
+ * similar means. Furthermore, it is guaranteed that the copier is run with
+ * CopyData objects in the same order in which their associated items were
+ * created; consequently, even if worker threads may compute results in
+ * unspecified order, the copier always receives the results in exactly the
+ * same order as the items were created.
+ *
+ * Once an item is processed by the copier, it is deleted and the ScratchData
+ * and CopyData objects that were used in its computation are considered
+ * unused and may be re-used for the next invocation of the worker function,
+ * on this or another thread.
+ *
+ * The functions in this namespace only really work in parallel when
+ * multithread mode was selected during deal.II configuration. Otherwise they
+ * simply work on each item sequentially.
+ *
+ * @ingroup threads
+ * @author Wolfgang Bangerth, 2007, 2008, 2009, 2013. Bruno Turcksin, 2013.
+ */
+namespace WorkStream
+{
+
+#ifdef DEAL_II_WITH_THREADS
+
+  namespace internal
+  {
+
+//TODO: The following classes all use std_cxx11::shared_ptr, but the
+//  correct pointer class would actually be std::unique_ptr. make this
+//  replacement whenever we have a class that provides these semantics
+//  and that is available also as a fall-back whenever via boost or similar
+
+    /**
+     * A namespace for the implementation of details of the WorkStream pattern
+     * and function. This namespace holds classes that deal with the second
+     * implementation described in the paper by Turcksin, Kronbichler and
+     * Bangerth (see
+     * @ref workstream_paper).
+     *
+     * Even though this implementation is slower than the third implementation
+     * discussed in that paper, we need to keep it around for two reasons: (i)
+     * a user may not give us a graph coloring, (ii) we want to use this
+     * implementation for colors that are just too small.
+     */
+    namespace Implementation2
+    {
+      /**
+       * A class that creates a sequence of items from a range of iterators.
+       */
+      template <typename Iterator,
+                typename ScratchData,
+                typename CopyData>
+      class IteratorRangeToItemStream : public tbb::filter
+      {
+      public:
+        /**
+         * A data type that we use to identify items to be worked on. This is
+         * the structure that is passed around between the different parts of
+         * the WorkStream implementation to identify what needs to be done by
+         * the various stages of the pipeline.
+         */
+        struct ItemType
+        {
+          /**
+           * A structure that contains a pointer to a scratch data object
+           * along with a flag that indicates whether this object is currently
+           * in use.
+           */
+          struct ScratchDataObject
+          {
+            std_cxx11::shared_ptr<ScratchData> scratch_data;
+            bool                               currently_in_use;
+
+            /**
+             * Default constructor.
+             */
+            ScratchDataObject ()
+              :
+              currently_in_use (false)
+            {}
+
+            ScratchDataObject (ScratchData *p,
+                               const bool in_use)
+              :
+              scratch_data (p),
+              currently_in_use (in_use)
+            {}
+
+//TODO: when we push back an object to the list of scratch objects, in
+//  Worker::operator(), we first create an object and then copy
+//  it to the end of this list. this involves having two objects
+//      of the current type having pointers to it, each with their own
+//      currently_in_use flag. there is probably little harm in this because
+//      the original one goes out of scope right away again, but it's
+//      certainly awkward. one way to avoid this would be to use unique_ptr
+//      but we'd need to figure out a way to use it in non-C++11 mode
+            ScratchDataObject (const ScratchDataObject &o)
+              :
+              scratch_data (o.scratch_data),
+              currently_in_use (o.currently_in_use)
+            {}
+          };
+
+
+          /**
+           * Typedef to a list of scratch data objects. The rationale for this
+           * list is provided in the variables that use these lists.
+           */
+          typedef std::list<ScratchDataObject> ScratchDataList;
+
+          /**
+           * A list of iterators that need to be worked on. Only the first
+           * n_items are relevant.
+           */
+          std::vector<Iterator> work_items;
+
+          /**
+           * The CopyData objects that the Worker part of the pipeline fills
+           * for each work item. Again, only the first n_items elements are
+           * what we care about.
+           */
+          std::vector<CopyData> copy_datas;
+
+          /**
+           * Number of items identified by the work_items array that the
+           * Worker and Copier pipeline stage need to work on. The maximum
+           * value of this variable will be chunk_size.
+           */
+          unsigned int          n_items;
+
+          /**
+           * Pointer to a thread local variable identifying the scratch data
+           * objects this thread will use. The initial implementation of this
+           * class using thread local variables provided only a single scratch
+           * object per thread. This doesn't work, because the worker
+           * functions may start tasks itself and then call
+           * Threads::TaskGroup::join_all() or a similar function, which the
+           * TBB scheduler may use to run something else on the current thread
+           * -- for example another instance of the worker function.
+           * Consequently, there would be two instances of the worker function
+           * that use the same scratch object if we only provided a single
+           * scratch object per thread. The solution is to provide a list of
+           * scratch objects for each thread, together with a flag indicating
+           * whether this scratch object is currently used. If a thread needs
+           * a scratch object, it walks this list until it finds an unused
+           * object, or, if there is none, creates one itself. Note that we
+           * need not use synchronization primitives for this process since
+           * the lists are thread-local and we are guaranteed that only a
+           * single thread accesses them as long as we have no yield point in
+           * between the accesses to the list.
+           *
+           * The pointers to scratch objects stored in each of these lists
+           * must be so that they are deleted on all threads when the thread
+           * local object is destroyed. This is achieved by using shared_ptr.
+           *
+           * Note that when a worker needs to create a scratch object, it
+           * allocates it using sample_scratch_data to copy from. This has the
+           * advantage of a first-touch initialization, i.e., the memory for
+           * the scratch data object is allocated and initialized by the same
+           * thread that will later use it.
+           */
+          Threads::ThreadLocalStorage<ScratchDataList> *scratch_data;
+
+          /**
+           * Pointer to a sample scratch data object, to be used to initialize
+           * the scratch data objects created for each individual thread.
+           */
+          const ScratchData *sample_scratch_data;
+
+          /**
+           * Flag is true if the buffer is used and false if the buffer can be
+           * used.
+           */
+          bool currently_in_use;
+
+
+          /**
+           * Default constructor. Initialize everything that doesn't have a
+           * default constructor itself.
+           */
+          ItemType ()
+            :
+            n_items (0),
+            scratch_data (0),
+            sample_scratch_data (0),
+            currently_in_use (false)
+          {}
+        };
+
+
+        /**
+         * Constructor. Take an iterator range, the size of a buffer that can
+         * hold items, and the sample additional data object that will be
+         * passed to each worker and copier function invocation.
+         */
+        IteratorRangeToItemStream (const Iterator       &begin,
+                                   const Iterator       &end,
+                                   const unsigned int    buffer_size,
+                                   const unsigned int    chunk_size,
+                                   const ScratchData    &sample_scratch_data,
+                                   const CopyData       &sample_copy_data)
+          :
+          tbb::filter (/*is_serial=*/true),
+          remaining_iterator_range (begin, end),
+          item_buffer (buffer_size),
+          sample_scratch_data (sample_scratch_data),
+          chunk_size (chunk_size)
+        {
+          // initialize the elements of the ring buffer
+          for (unsigned int element=0; element<item_buffer.size(); ++element)
+            {
+              Assert (item_buffer[element].n_items == 0,
+                      ExcInternalError());
+
+              item_buffer[element].work_items.resize (chunk_size,
+                                                      remaining_iterator_range.second);
+              item_buffer[element].scratch_data = &thread_local_scratch;
+              item_buffer[element].sample_scratch_data = &sample_scratch_data;
+              item_buffer[element].copy_datas.resize (chunk_size,
+                                                      sample_copy_data);
+              item_buffer[element].currently_in_use = false;
+            }
+        }
+
+
+        /**
+         * Create an item and return a pointer to it.
+         */
+        virtual void *operator () (void *)
+        {
+          // find first unused item. we know that there must be one
+          // because we have set the maximal number of tokens in flight
+          // and have set the ring buffer to have exactly this size. so
+          // if this function is called, we know that less than the
+          // maximal number of items in currently in flight
+          //
+          // note that we need not lock access to this array since
+          // the current stage is run sequentially and we can therefore
+          // enter the following block only once at any given time.
+          // thus, there can be no race condition between checking that
+          // a flag is false and setting it to true. (there may be
+          // another thread where we release items and set 'false'
+          // flags to 'true', but that too does not produce any
+          // problems)
+          ItemType *current_item = 0;
+          for (unsigned int i=0; i<item_buffer.size(); ++i)
+            if (item_buffer[i].currently_in_use == false)
+              {
+                item_buffer[i].currently_in_use = true;
+                current_item = &item_buffer[i];
+                break;
+              }
+          Assert (current_item != 0, ExcMessage ("This can't be. There must be a free item!"));
+
+          // initialize the next item. it may
+          // consist of at most chunk_size
+          // elements
+          current_item->n_items = 0;
+          while ((remaining_iterator_range.first !=
+                  remaining_iterator_range.second)
+                 &&
+                 (current_item->n_items < chunk_size))
+            {
+              current_item->work_items[current_item->n_items]
+                = remaining_iterator_range.first;
+
+              ++remaining_iterator_range.first;
+              ++current_item->n_items;
+            }
+
+          if (current_item->n_items == 0)
+            // there were no items
+            // left. terminate the pipeline
+            return 0;
+          else
+            return current_item;
+        }
+
+      private:
+        /**
+         * The interval of iterators still to be worked on. This range will
+         * shrink over time.
+         */
+        std::pair<Iterator,Iterator> remaining_iterator_range;
+
+        /**
+         * A buffer that will store items.
+         */
+        std::vector<ItemType>        item_buffer;
+
+        /**
+         * Pointer to a thread local variable identifying the scratch data
+         * objects this thread will use. The initial implementation of this
+         * class using thread local variables provided only a single scratch
+         * object per thread. This doesn't work, because the worker functions
+         * may start tasks itself and then call Threads::TaskGroup::join_all()
+         * or a similar function, which the TBB scheduler may use to run
+         * something else on the current thread -- for example another
+         * instance of the worker function. Consequently, there would be two
+         * instances of the worker function that use the same scratch object
+         * if we only provided a single scratch object per thread. The
+         * solution is to provide a list of scratch objects for each thread,
+         * together with a flag indicating whether this scratch object is
+         * currently used. If a thread needs a scratch object, it walks this
+         * list until it finds an unused object, or, if there is none, creates
+         * one itself. Note that we need not use synchronization primitives
+         * for this process since the lists are thread-local and we are
+         * guaranteed that only a single thread accesses them as long as we
+         * have no yield point in between the accesses to the list.
+         *
+         * The pointers to scratch objects stored in each of these lists must
+         * be so that they are deleted on all threads when the thread local
+         * object is destroyed. This is achieved by using shared_ptr.
+         *
+         * Note that when a worker needs to create a scratch object, it
+         * allocates it using sample_scratch_data to copy from. This has the
+         * advantage of a first-touch initialization, i.e., the memory for the
+         * scratch data object is allocated and initialized by the same thread
+         * that will later use it.
+         */
+        Threads::ThreadLocalStorage<typename ItemType::ScratchDataList> thread_local_scratch;
+
+        /**
+         * A reference to a sample scratch data that will be used to
+         * initialize the thread-local pointers to a scratch data object each
+         * of the worker tasks uses.
+         */
+        const ScratchData &sample_scratch_data;
+
+        /**
+         * Number of elements of the iterator range that each thread should
+         * work on sequentially; a large number makes sure that each thread
+         * gets a significant amount of work before the next task switch
+         * happens, whereas a small number is better for load balancing.
+         */
+        const unsigned int           chunk_size;
+
+        /**
+         * Initialize the pointers and vector elements in the specified entry
+         * of the item_buffer.
+         */
+        void init_buffer_elements (const unsigned int element,
+                                   const CopyData    &sample_copy_data)
+        {
+          Assert (item_buffer[element].n_items == 0,
+                  ExcInternalError());
+
+          item_buffer[element].work_items
+          .resize (chunk_size, remaining_iterator_range.second);
+          item_buffer[element].scratch_data
+            = &thread_local_scratch;
+          item_buffer[element].sample_scratch_data
+            = &sample_scratch_data;
+          item_buffer[element].copy_datas
+          .resize (chunk_size, sample_copy_data);
+        }
+      };
+
+
+
+      /**
+       * A class that manages calling the worker function on a number of
+       * parallel threads. Note that it is, in the TBB notation, a filter that
+       * can run in parallel.
+       */
+      template <typename Iterator,
+                typename ScratchData,
+                typename CopyData>
+      class Worker : public tbb::filter
+      {
+      public:
+        /**
+         * Constructor. Takes a reference to the object on which we will
+         * operate as well as a pointer to the function that will do the
+         * assembly.
+         */
+        Worker (const std_cxx11::function<void (const Iterator &,
+                                                ScratchData &,
+                                                CopyData &)> &worker,
+                bool copier_exist=true)
+          :
+          tbb::filter (/* is_serial= */ false),
+          worker (worker),
+          copier_exist (copier_exist)
+        {}
+
+
+        /**
+         * Work on an item.
+         */
+        void *operator () (void *item)
+        {
+          // first unpack the current item
+          typedef
+          typename IteratorRangeToItemStream<Iterator,ScratchData,CopyData>::ItemType
+          ItemType;
+
+          ItemType *current_item = static_cast<ItemType *> (item);
+
+          // we need to find an unused scratch data object in the list that
+          // corresponds to the current thread and then mark it as used. if
+          // we can't find one, create one
+          //
+          // as discussed in the discussion of the documentation of the
+          // IteratorRangeToItemStream::scratch_data variable, there is no
+          // need to synchronize access to this variable using a mutex
+          // as long as we have no yield-point in between. this means that
+          // we can't take an iterator into the list now and expect it to
+          // still be valid after calling the worker, but we at least do
+          // not have to lock the following section
+          ScratchData *scratch_data = 0;
+          {
+            typename ItemType::ScratchDataList &
+            scratch_data_list = current_item->scratch_data->get();
+
+            // see if there is an unused object. if so, grab it and mark
+            // it as used
+            for (typename ItemType::ScratchDataList::iterator
+                 p = scratch_data_list.begin();
+                 p != scratch_data_list.end(); ++p)
+              if (p->currently_in_use == false)
+                {
+                  scratch_data = p->scratch_data.get();
+                  p->currently_in_use = true;
+                  break;
+                }
+
+            // if no object was found, create one and mark it as used
+            if (scratch_data == 0)
+              {
+                scratch_data = new ScratchData(*current_item->sample_scratch_data);
+
+                typename ItemType::ScratchDataList::value_type
+                new_scratch_object (scratch_data, true);
+                scratch_data_list.push_back (new_scratch_object);
+              }
+          }
+
+          // then call the worker function on each element of the chunk we were
+          // given. since these worker functions are called on separate threads,
+          // nothing good can happen if they throw an exception and we are best
+          // off catching it and showing an error message
+          for (unsigned int i=0; i<current_item->n_items; ++i)
+            {
+              try
+                {
+                  if (worker)
+                    worker (current_item->work_items[i],
+                            *scratch_data,
+                            current_item->copy_datas[i]);
+                }
+              catch (const std::exception &exc)
+                {
+                  Threads::internal::handle_std_exception (exc);
+                }
+              catch (...)
+                {
+                  Threads::internal::handle_unknown_exception ();
+                }
+            }
+
+          // finally mark the scratch object as unused again. as above, there
+          // is no need to lock anything here since the object we work on
+          // is thread-local
+          {
+            typename ItemType::ScratchDataList &
+            scratch_data_list = current_item->scratch_data->get();
+
+            for (typename ItemType::ScratchDataList::iterator p =
+                   scratch_data_list.begin(); p != scratch_data_list.end();
+                 ++p)
+              if (p->scratch_data.get() == scratch_data)
+                {
+                  Assert(p->currently_in_use == true, ExcInternalError());
+                  p->currently_in_use = false;
+                }
+          }
+
+          // if there is no copier, mark current item as usable again
+          if (copier_exist==false)
+            current_item->currently_in_use = false;
+
+
+          // then return the original pointer
+          // to the now modified object
+          return item;
+        }
+
+
+      private:
+        /**
+         * Pointer to the function that does the assembling on the sequence of
+         * cells.
+         */
+        const std_cxx11::function<void (const Iterator &,
+                                        ScratchData &,
+                                        CopyData &)> worker;
+
+        /**
+         * This flag is true if the copier stage exist. If it does not, the
+         * worker has to free the buffer. Otherwise the copier will do it.
+         */
+        bool copier_exist;
+      };
+
+
+
+      /**
+       * A class that manages calling the copier function. Note that it is, in
+       * the TBB notation, a filter that runs sequentially, ensuring that all
+       * items are copied in the same order in which they are created.
+       */
+      template <typename Iterator,
+                typename ScratchData,
+                typename CopyData>
+      class Copier : public tbb::filter
+      {
+      public:
+        /**
+         * Constructor. Takes a reference to the object on which we will
+         * operate as well as a pointer to the function that will do the
+         * copying from the additional data object to the global matrix or
+         * similar.
+         */
+        Copier (const std_cxx11::function<void (const CopyData &)> &copier)
+          :
+          tbb::filter (/*is_serial=*/true),
+          copier (copier)
+        {}
+
+
+        /**
+         * Work on a single item.
+         */
+        void *operator () (void *item)
+        {
+          // first unpack the current item
+          typedef
+          typename IteratorRangeToItemStream<Iterator,ScratchData,CopyData>::ItemType
+          ItemType;
+
+          ItemType *current_item = static_cast<ItemType *> (item);
+
+          // initiate copying data. for the same reasons as in the worker class
+          // above, catch exceptions rather than letting it propagate into
+          // unknown territories
+          for (unsigned int i=0; i<current_item->n_items; ++i)
+            {
+              try
+                {
+                  if (copier)
+                    copier (current_item->copy_datas[i]);
+                }
+              catch (const std::exception &exc)
+                {
+                  Threads::internal::handle_std_exception (exc);
+                }
+              catch (...)
+                {
+                  Threads::internal::handle_unknown_exception ();
+                }
+            }
+
+          // mark current item as usable again
+          current_item->currently_in_use = false;
+
+
+          // return an invalid item since we are at the end of the
+          // pipeline
+          return 0;
+        }
+
+
+      private:
+        /**
+         * Pointer to the function that does the copying of data.
+         */
+        const std_cxx11::function<void (const CopyData &)> copier;
+      };
+
+    }
+
+
+    /**
+     * A namespace for the implementation of details of the WorkStream pattern
+     * and function. This namespace holds classes that deal with the third
+     * implementation described in the paper by Turcksin, Kronbichler and
+     * Bangerth (see
+     * @ref workstream_paper).
+     */
+    namespace Implementation3
+    {
+      /**
+       * A structure that contains a pointer to scratch and copy data objects
+       * along with a flag that indicates whether this object is currently in
+       * use.
+       */
+      template <typename Iterator,
+                typename ScratchData,
+                typename CopyData>
+      struct ScratchAndCopyDataObjects
+      {
+        std_cxx11::shared_ptr<ScratchData> scratch_data;
+        std_cxx11::shared_ptr<CopyData>    copy_data;
+        bool                               currently_in_use;
+
+        /**
+         * Default constructor.
+         */
+        ScratchAndCopyDataObjects ()
+          :
+          currently_in_use (false)
+        {}
+
+        ScratchAndCopyDataObjects (ScratchData *p,
+                                   CopyData *q,
+                                   const bool in_use)
+          :
+          scratch_data (p),
+          copy_data (q),
+          currently_in_use (in_use)
+        {}
+
+//TODO: when we push back an object to the list of scratch objects, in
+//      Worker::operator(), we first create an object and then copy
+//      it to the end of this list. this involves having two objects
+//      of the current type having pointers to it, each with their own
+//      currently_in_use flag. there is probably little harm in this because
+//      the original one goes out of scope right away again, but it's
+//      certainly awkward. one way to avoid this would be to use unique_ptr
+//      but we'd need to figure out a way to use it in non-C++11 mode
+        ScratchAndCopyDataObjects (const ScratchAndCopyDataObjects &o)
+          :
+          scratch_data (o.scratch_data),
+          copy_data (o.copy_data),
+          currently_in_use (o.currently_in_use)
+        {}
+      };
+
+
+
+
+
+      /**
+       * A class that manages calling the worker and copier functions. Unlike
+       * the other implementations, parallel_for is used instead of a
+       * pipeline.
+       */
+      template <typename Iterator,
+                typename ScratchData,
+                typename CopyData>
+      class WorkerAndCopier
+      {
+      public:
+        /**
+         * Constructor.
+         */
+        WorkerAndCopier (const std_cxx11::function<void (const Iterator &,
+                                                         ScratchData &,
+                                                         CopyData &)> &worker,
+                         const std_cxx11::function<void (const CopyData &)> &copier,
+                         const ScratchData    &sample_scratch_data,
+                         const CopyData       &sample_copy_data)
+          :
+          worker (worker),
+          copier (copier),
+          sample_scratch_data (sample_scratch_data),
+          sample_copy_data (sample_copy_data)
+        {}
+
+
+        /**
+         * The function that calls the worker and the copier functions on a
+         * range of items denoted by the two arguments.
+         */
+        void operator() (const tbb::blocked_range<typename std::vector<Iterator>::const_iterator> &range)
+        {
+          // we need to find an unused scratch and corresponding copy
+          // data object in the list that corresponds to the current
+          // thread and then mark it as used. If we can't find one,
+          // create one as discussed in the discussion of the documentation
+          // of the IteratorRangeToItemStream::scratch_data variable,
+          // there is no need to synchronize access to this variable
+          // using a mutex as long as we have no yield-point in between.
+          // This means that we can't take an iterator into the list
+          // now and expect it to still be valid after calling the worker,
+          // but we at least do not have to lock the following section.
+          ScratchData *scratch_data = 0;
+          CopyData    *copy_data    = 0;
+          {
+            ScratchAndCopyDataList &scratch_and_copy_data_list = data.get();
+
+            // see if there is an unused object. if so, grab it and mark
+            // it as used
+            for (typename ScratchAndCopyDataList::iterator
+                 p = scratch_and_copy_data_list.begin();
+                 p != scratch_and_copy_data_list.end(); ++p)
+              if (p->currently_in_use == false)
+                {
+                  scratch_data = p->scratch_data.get();
+                  copy_data    = p->copy_data.get();
+                  p->currently_in_use = true;
+                  break;
+                }
+
+            // if no element in the list was found, create one and mark it as used
+            if (scratch_data == 0)
+              {
+                Assert (copy_data==0, ExcInternalError());
+                scratch_data = new ScratchData(sample_scratch_data);
+                copy_data    = new CopyData(sample_copy_data);
+
+                typename ScratchAndCopyDataList::value_type
+                new_scratch_object (scratch_data, copy_data, true);
+                scratch_and_copy_data_list.push_back (new_scratch_object);
+              }
+          }
+
+          // then call the worker and copier functions on each
+          // element of the chunk we were given.
+          for (typename std::vector<Iterator>::const_iterator p=range.begin();
+               p != range.end(); ++p)
+            {
+              try
+                {
+                  if (worker)
+                    worker (*p,
+                            *scratch_data,
+                            *copy_data);
+                  if (copier)
+                    copier (*copy_data);
+                }
+              catch (const std::exception &exc)
+                {
+                  Threads::internal::handle_std_exception (exc);
+                }
+              catch (...)
+                {
+                  Threads::internal::handle_unknown_exception ();
+                }
+            }
+
+          // finally mark the scratch object as unused again. as above, there
+          // is no need to lock anything here since the object we work on
+          // is thread-local
+          {
+            ScratchAndCopyDataList &scratch_and_copy_data_list = data.get();
+
+            for (typename ScratchAndCopyDataList::iterator p =
+                   scratch_and_copy_data_list.begin(); p != scratch_and_copy_data_list.end();
+                 ++p)
+              if (p->scratch_data.get() == scratch_data)
+                {
+                  Assert(p->currently_in_use == true, ExcInternalError());
+                  p->currently_in_use = false;
+                }
+          }
+
+        }
+
+      private:
+        typedef
+        typename Implementation3::ScratchAndCopyDataObjects<Iterator,ScratchData,CopyData>
+        ScratchAndCopyDataObjects;
+
+        /**
+         * Typedef to a list of scratch data objects. The rationale for this
+         * list is provided in the variables that use these lists.
+         */
+        typedef std::list<ScratchAndCopyDataObjects> ScratchAndCopyDataList;
+
+        Threads::ThreadLocalStorage<ScratchAndCopyDataList> data;
+
+        /**
+         * Pointer to the function that does the assembling on the sequence of
+         * cells.
+         */
+        const std_cxx11::function<void (const Iterator &,
+                                        ScratchData &,
+                                        CopyData &)> worker;
+
+        /**
+         * Pointer to the function that does the copying from local
+         * contribution to global object.
+         */
+        const std_cxx11::function<void (const CopyData &)> copier;
+
+        /**
+         * References to sample scratch and copy data for when we need them.
+         */
+        const ScratchData    &sample_scratch_data;
+        const CopyData       &sample_copy_data;
+      };
+    }
+
+  }
+
+
+#endif // DEAL_II_WITH_THREADS
+
+
+  /**
+   * This is one of two main functions of the WorkStream concept, doing work
+   * as described in the introduction to this namespace. It corresponds to
+   * implementation 3 of the paper by Turcksin, Kronbichler and Bangerth, see
+   * @ref workstream_paper.
+   * As such, it takes not a range of iterators described by a begin and end
+   * iterator, but a "colored" graph of iterators where each color represents
+   * cells for which writing the cell contributions into the global object
+   * does not conflict (in other words, these cells are not neighbors). Each
+   * "color" is represented by std::vectors of cells. The first argument to
+   * this function, a set of sets of cells (which are represent as a vector of
+   * vectors, for efficiency), is typically constructed by calling
+   * GraphColoring::make_graph_coloring(). See there for more information.
+   *
+   * This function that can be used for worker and copier objects that are
+   * either pointers to non-member functions or objects that allow to be
+   * called with an operator(), for example objects created by std::bind.
+   *
+   * The two data types <tt>ScratchData</tt> and <tt>CopyData</tt> need to
+   * have a working copy constructor. <tt>ScratchData</tt> is only used in the
+   * <tt>worker</tt> function, while <tt>CopyData</tt> is the object passed
+   * from the <tt>worker</tt> to the <tt>copier</tt>.
+   *
+   * The @p queue_length argument indicates the number of items that can be
+   * live at any given time. Each item consists of @p chunk_size elements of
+   * the input stream that will be worked on by the worker and copier
+   * functions one after the other on the same thread.
+   *
+   * @note If your data objects are large, or their constructors are
+   * expensive, it is helpful to keep in mind that <tt>queue_length</tt>
+   * copies of the <tt>ScratchData</tt> object and
+   * <tt>queue_length*chunk_size</tt> copies of the <tt>CopyData</tt> object
+   * are generated.
+   */
+  template <typename Worker,
+            typename Copier,
+            typename Iterator,
+            typename ScratchData,
+            typename CopyData>
+  void
+  run (const std::vector<std::vector<Iterator> > &colored_iterators,
+       Worker                                     worker,
+       Copier                                     copier,
+       const ScratchData                         &sample_scratch_data,
+       const CopyData                            &sample_copy_data,
+       const unsigned int queue_length = 2*MultithreadInfo::n_threads(),
+       const unsigned int                         chunk_size = 8);
+
+
+  /**
+   * This is one of two main functions of the WorkStream concept, doing work
+   * as described in the introduction to this namespace. It corresponds to
+   * implementation 2 of the paper by Turcksin, Kronbichler and Bangerth (see
+   * @ref workstream_paper).
+   *
+   * This function that can be used for worker and copier objects that are
+   * either pointers to non-member functions or objects that allow to be
+   * called with an operator(), for example objects created by std::bind. If
+   * the copier is an empty function, it is ignored in the pipeline.
+   *
+   * The argument passed as @p end must be convertible to the same type as @p
+   * begin, but doesn't have to be of the same type itself. This allows to
+   * write code like <code>WorkStream().run(dof_handler.begin_active(),
+   * dof_handler.end(), ...</code> where the first is of type
+   * DoFHandler::active_cell_iterator whereas the second is of type
+   * DoFHandler::raw_cell_iterator.
+   *
+   * The two data types <tt>ScratchData</tt> and <tt>CopyData</tt> need to
+   * have a working copy constructor. <tt>ScratchData</tt> is only used in the
+   * <tt>worker</tt> function, while <tt>CopyData</tt> is the object passed
+   * from the <tt>worker</tt> to the <tt>copier</tt>.
+   *
+   * The @p queue_length argument indicates the number of items that can be
+   * live at any given time. Each item consists of @p chunk_size elements of
+   * the input stream that will be worked on by the worker and copier
+   * functions one after the other on the same thread.
+   *
+   * @note If your data objects are large, or their constructors are
+   * expensive, it is helpful to keep in mind that <tt>queue_length</tt>
+   * copies of the <tt>ScratchData</tt> object and
+   * <tt>queue_length*chunk_size</tt> copies of the <tt>CopyData</tt> object
+   * are generated.
+   */
+  template <typename Worker,
+            typename Copier,
+            typename Iterator,
+            typename ScratchData,
+            typename CopyData>
+  void
+  run (const Iterator                          &begin,
+       const typename identity<Iterator>::type &end,
+       Worker                                   worker,
+       Copier                                   copier,
+       const ScratchData                       &sample_scratch_data,
+       const CopyData                          &sample_copy_data,
+       const unsigned int queue_length = 2*MultithreadInfo::n_threads(),
+       const unsigned int                       chunk_size = 8)
+  {
+    Assert (queue_length > 0,
+            ExcMessage ("The queue length must be at least one, and preferably "
+                        "larger than the number of processors on this system."));
+    (void)queue_length; // removes -Wunused-parameter warning in optimized mode
+    Assert (chunk_size > 0,
+            ExcMessage ("The chunk_size must be at least one."));
+    (void)chunk_size; // removes -Wunused-parameter warning in optimized mode
+
+    // if no work then skip. (only use operator!= for iterators since we may
+    // not have an equality comparison operator)
+    if (!(begin != end))
+      return;
+
+    // we want to use TBB if we have support and if it is not disabled at
+    // runtime:
+#ifdef DEAL_II_WITH_THREADS
+    if (MultithreadInfo::n_threads()==1)
+#endif
+      {
+        // need to copy the sample since it is marked const
+        ScratchData scratch_data = sample_scratch_data;
+        CopyData    copy_data    = sample_copy_data;
+
+        for (Iterator i=begin; i!=end; ++i)
+          {
+            // need to check if the function is not the zero function. To
+            // check zero-ness, create a C++ function out of it and check that
+            if (static_cast<const std_cxx11::function<void (const Iterator &,
+                                                            ScratchData &,
+                                                            CopyData &)>& >(worker))
+              worker (i, scratch_data, copy_data);
+            if (static_cast<const std_cxx11::function<void (const CopyData &)>& >
+                (copier))
+              copier (copy_data);
+          }
+      }
+#ifdef DEAL_II_WITH_THREADS
+    else // have TBB and use more than one thread
+      {
+        // Check that the copier exist
+        if (static_cast<const std_cxx11::function<void (const CopyData &)>& >(copier))
+          {
+            // create the three stages of the pipeline
+            internal::Implementation2::IteratorRangeToItemStream<Iterator,ScratchData,CopyData>
+            iterator_range_to_item_stream (begin, end,
+                                           queue_length,
+                                           chunk_size,
+                                           sample_scratch_data,
+                                           sample_copy_data);
+
+            internal::Implementation2::Worker<Iterator, ScratchData, CopyData> worker_filter (worker);
+            internal::Implementation2::Copier<Iterator, ScratchData, CopyData> copier_filter (copier);
+
+            // now create a pipeline from these stages
+            tbb::pipeline assembly_line;
+            assembly_line.add_filter (iterator_range_to_item_stream);
+            assembly_line.add_filter (worker_filter);
+            assembly_line.add_filter (copier_filter);
+
+            // and run it
+            assembly_line.run (queue_length);
+
+            assembly_line.clear ();
+          }
+        else
+          {
+            // there is no copier function. in this case, we have an
+            // embarrassingly parallel problem where we can
+            // essentially apply parallel_for. because parallel_for
+            // requires subdividing the range for which operator- is
+            // necessary between iterators, it is often inefficient to
+            // apply it directory to cell ranges and similar iterator
+            // types for which operator- is expensive or, in fact,
+            // nonexistent. rather, in that case, we simply copy the
+            // iterators into a large array and use operator- on
+            // iterators to this array of iterators.
+            //
+            // instead of duplicating code, this is essentially the
+            // same situation we have in Implementation3 below, so we
+            // just defer to that place
+            std::vector<std::vector<Iterator> > all_iterators (1);
+            for (Iterator p=begin; p!=end; ++p)
+              all_iterators[0].push_back (p);
+
+            run (all_iterators,
+                 worker, copier,
+                 sample_scratch_data,
+                 sample_copy_data,
+                 queue_length,
+                 chunk_size);
+          }
+      }
+#endif
+  }
+
+
+  // Implementation 3:
+  template <typename Worker,
+            typename Copier,
+            typename Iterator,
+            typename ScratchData,
+            typename CopyData>
+  void
+  run (const std::vector<std::vector<Iterator> > &colored_iterators,
+       Worker                                     worker,
+       Copier                                     copier,
+       const ScratchData                         &sample_scratch_data,
+       const CopyData                            &sample_copy_data,
+       const unsigned int                         queue_length,
+       const unsigned int                         chunk_size)
+  {
+    Assert (queue_length > 0,
+            ExcMessage ("The queue length must be at least one, and preferably "
+                        "larger than the number of processors on this system."));
+    (void)queue_length; // removes -Wunused-parameter warning in optimized mode
+    Assert (chunk_size > 0,
+            ExcMessage ("The chunk_size must be at least one."));
+    (void)chunk_size; // removes -Wunused-parameter warning in optimized mode
+
+    // we want to use TBB if we have support and if it is not disabled at
+    // runtime:
+#ifdef DEAL_II_WITH_THREADS
+    if (MultithreadInfo::n_threads()==1)
+#endif
+      {
+        // need to copy the sample since it is marked const
+        ScratchData scratch_data = sample_scratch_data;
+        CopyData    copy_data    = sample_copy_data;
+
+        for (unsigned int color=0; color<colored_iterators.size(); ++color)
+          for (typename std::vector<Iterator>::const_iterator p = colored_iterators[color].begin();
+               p != colored_iterators[color].end(); ++p)
+            {
+              // need to check if the function is not the zero function. To
+              // check zero-ness, create a C++ function out of it and check that
+              if (static_cast<const std_cxx11::function<void (const Iterator &,
+                                                              ScratchData &,
+                                                              CopyData &)>& >(worker))
+                worker (*p, scratch_data, copy_data);
+              if (static_cast<const std_cxx11::function<void (const CopyData &)>& >(copier))
+                copier (copy_data);
+            }
+      }
+#ifdef DEAL_II_WITH_THREADS
+    else // have TBB and use more than one thread
+      {
+        // loop over the various colors of what we're given
+        for (unsigned int color=0; color<colored_iterators.size(); ++color)
+          if (colored_iterators[color].size() > 0)
+            {
+              typedef
+              internal::Implementation3::WorkerAndCopier<Iterator,ScratchData,CopyData>
+              WorkerAndCopier;
+
+              typedef
+              typename std::vector<Iterator>::const_iterator
+              RangeType;
+
+              WorkerAndCopier worker_and_copier (worker,
+                                                 copier,
+                                                 sample_scratch_data,
+                                                 sample_copy_data);
+
+              tbb::parallel_for (tbb::blocked_range<RangeType>
+                                 (colored_iterators[color].begin(),
+                                  colored_iterators[color].end(),
+                                  /*grain_size=*/chunk_size),
+                                 std_cxx11::bind (&WorkerAndCopier::operator(),
+                                                  std_cxx11::ref(worker_and_copier),
+                                                  std_cxx11::_1),
+                                 tbb::auto_partitioner());
+            }
+      }
+#endif
+  }
+
+
+
+
+
+  /**
+   * This is a variant of one of the two main functions of the WorkStream
+   * concept, doing work as described in the introduction to this namespace.
+   * It corresponds to implementation 2 of the paper by Turcksin, Kronbichler
+   * and Bangerth (see
+   * @ref workstream_paper).
+   *
+   * This is the function that can be used for worker and copier functions
+   * that are member functions of a class. If the copier is an empty function,
+   * it is ignored in the pipeline.
+   *
+   * The argument passed as @p end must be convertible to the same type as @p
+   * begin, but doesn't have to be of the same type itself. This allows to
+   * write code like <code>WorkStream().run(dof_handler.begin_active(),
+   * dof_handler.end(), ...</code> where the first is of type
+   * DoFHandler::active_cell_iterator whereas the second is of type
+   * DoFHandler::raw_cell_iterator.
+   *
+   * The @p queue_length argument indicates the number of items that can be
+   * live at any given time. Each item consists of @p chunk_size elements of
+   * the input stream that will be worked on by the worker and copier
+   * functions one after the other on the same thread.
+   *
+   * @note If your data objects are large, or their constructors are
+   * expensive, it is helpful to keep in mind that <tt>queue_length</tt>
+   * copies of the <tt>ScratchData</tt> object and
+   * <tt>queue_length*chunk_size</tt> copies of the <tt>CopyData</tt> object
+   * are generated.
+   */
+  template <typename MainClass,
+            typename Iterator,
+            typename ScratchData,
+            typename CopyData>
+  void
+  run (const Iterator                          &begin,
+       const typename identity<Iterator>::type &end,
+       MainClass                               &main_object,
+       void (MainClass::*worker) (const Iterator &,
+                                  ScratchData &,
+                                  CopyData &),
+       void (MainClass::*copier) (const CopyData &),
+       const ScratchData                       &sample_scratch_data,
+       const CopyData                          &sample_copy_data,
+       const unsigned int queue_length =        2*MultithreadInfo::n_threads(),
+       const unsigned int chunk_size =          8)
+  {
+    // forward to the other function
+    run (begin, end,
+         std_cxx11::bind (worker,
+                          std_cxx11::ref (main_object),
+                          std_cxx11::_1, std_cxx11::_2, std_cxx11::_3),
+         std_cxx11::bind (copier,
+                          std_cxx11::ref (main_object),
+                          std_cxx11::_1),
+         sample_scratch_data,
+         sample_copy_data,
+         queue_length,
+         chunk_size);
+  }
+
+}
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+
+
+//----------------------------   work_stream.h     ---------------------------
+// end of #ifndef dealii__work_stream_h
+#endif
+//----------------------------   work_stream.h     ---------------------------
diff --git a/include/deal.II/distributed/grid_refinement.h b/include/deal.II/distributed/grid_refinement.h
new file mode 100644
index 0000000..ff774b1
--- /dev/null
+++ b/include/deal.II/distributed/grid_refinement.h
@@ -0,0 +1,111 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__distribute_grid_refinement_h
+#define dealii__distribute_grid_refinement_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/distributed/tria.h>
+
+#include <vector>
+#include <limits>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace parallel
+{
+  namespace distributed
+  {
+    // forward declarations
+    template <int dim, int spacedim> class Triangulation;
+
+
+    /**
+     * Collection of functions controlling refinement and coarsening of
+     * parallel::distributed::Triangulation objects. This namespace provides
+     * similar functionality to the dealii::GridRefinement namespace, except
+     * that it works for meshes that are parallel and distributed.
+     *
+     * @ingroup grid
+     * @author Wolfgang Bangerth, 2009
+     */
+    namespace GridRefinement
+    {
+      /**
+       * Like dealii::GridRefinement::refine_and_coarsen_fixed_number, but for
+       * parallel distributed triangulation.
+       *
+       * The vector of criteria needs to be a vector of refinement criteria
+       * for all cells active on the current triangulation, i.e.
+       * <code>tria.n_active_cells()</code> (and not
+       * <code>tria.n_locally_owned_active_cells()</code>). However, the
+       * function will only look at the indicators that correspond to those
+       * cells that are actually locally owned, and ignore the indicators for
+       * all other cells. The function will then coordinate among all
+       * processors that store part of the triangulation so that at the end @p
+       * top_fraction_of_cells are refined, where the fraction is enforced as
+       * a fraction of Triangulation::n_global_active_cells, not
+       * Triangulation::n_locally_active_cells on each processor individually.
+       * In other words, it may be that on some processors, no cells are
+       * refined at all.
+       *
+       * The same is true for the fraction of cells that is coarsened.
+       */
+      template <int dim, class VectorType, int spacedim>
+      void
+      refine_and_coarsen_fixed_number
+      (parallel::distributed::Triangulation<dim,spacedim> &tria,
+       const VectorType                                   &criteria,
+       const double                                       top_fraction_of_cells,
+       const double                                       bottom_fraction_of_cells,
+       const unsigned int                                 max_n_cells = std::numeric_limits<unsigned int>::max());
+
+      /**
+       * Like dealii::GridRefinement::refine_and_coarsen_fixed_fraction, but
+       * for parallel distributed triangulation.
+       *
+       * The vector of criteria needs to be a vector of refinement criteria
+       * for all cells active on the current triangulation,
+       * <code>tria.n_active_cells()</code> (and not
+       * <code>tria.n_locally_owned_active_cells()</code>). However, the
+       * function will only look at the indicators that correspond to those
+       * cells that are actually locally owned, and ignore the indicators for
+       * all other cells. The function will then coordinate among all
+       * processors that store part of the triangulation so that at the end
+       * the smallest fraction of Triangulation::n_global_active_cells (not
+       * Triangulation::n_locally_active_cells on each processor individually)
+       * is refined that together make up a total of @p top_fraction_of_error
+       * of the total error. In other words, it may be that on some
+       * processors, no cells are refined at all.
+       *
+       * The same is true for the fraction of cells that is coarsened.
+       */
+      template <int dim, class VectorType, int spacedim>
+      void
+      refine_and_coarsen_fixed_fraction
+      (parallel::distributed::Triangulation<dim,spacedim> &tria,
+       const VectorType                                   &criteria,
+       const double                                       top_fraction_of_error,
+       const double                                       bottom_fraction_of_error);
+    }
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif //dealii__distributed_grid_refinement_h
diff --git a/include/deal.II/distributed/shared_tria.h b/include/deal.II/distributed/shared_tria.h
new file mode 100644
index 0000000..3d3d669
--- /dev/null
+++ b/include/deal.II/distributed/shared_tria.h
@@ -0,0 +1,206 @@
+// ---------------------------------------------------------------------
+// $Id: tria.h 32739 2014-04-08 16:39:47Z denis.davydov $
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__distributed__shared_tria_h
+#define dealii__distributed__shared_tria_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/grid/tria.h>
+
+#include <deal.II/distributed/tria_base.h>
+
+#include <deal.II/base/std_cxx1x/function.h>
+#include <deal.II/base/std_cxx1x/tuple.h>
+
+#include <set>
+#include <vector>
+#include <list>
+#include <utility>
+
+#ifdef DEAL_II_WITH_MPI
+#  include <mpi.h>
+#endif
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int> class Triangulation;
+
+
+namespace parallel
+{
+
+#ifdef DEAL_II_WITH_MPI
+
+
+  namespace shared
+  {
+
+    /**
+     * This is an extension of dealii::Triangulation class to automatically
+     * partition triangulation when run with MPI. Different from the
+     * parallel::distributed::Triangulation, the entire mesh is stored on each
+     * processor. However, cells are labeled according to the id of the
+     * processor which "owns" them. The partitioning is done automatically
+     * inside the DoFHandler by calling Metis. This enables distributing DoFs
+     * among processors and therefore splitting matrices and vectors across
+     * processors. The usage of this class is demonstrated in Step-18.
+     *
+     * @author Denis Davydov, 2015
+     * @ingroup distributed
+     *
+     */
+    template <int dim, int spacedim = dim>
+    class Triangulation : public dealii::parallel::Triangulation<dim,spacedim>
+    {
+    public:
+      typedef typename dealii::Triangulation<dim,spacedim>::active_cell_iterator active_cell_iterator;
+      typedef typename dealii::Triangulation<dim,spacedim>::cell_iterator        cell_iterator;
+
+      /**
+       * Constructor.
+       *
+       * If @p allow_aritifical_cells is true, this class will behave similar
+       * to parallel::distributed::Triangulation in that there will be locally
+       * owned, ghost and artificial cells.
+       *
+       * Otherwise all non-locally owned cells are considered ghost.
+       */
+      Triangulation (MPI_Comm mpi_communicator,
+                     const typename dealii::Triangulation<dim,spacedim>::MeshSmoothing =
+                       (dealii::Triangulation<dim,spacedim>::none),
+                     const bool allow_artificial_cells = false);
+
+      /**
+       * Destructor.
+       */
+      virtual ~Triangulation ();
+
+      /**
+       * Coarsen and refine the mesh according to refinement and coarsening
+       * flags set.
+       *
+       * This step is equivalent to the dealii::Triangulation class with an
+       * addition of calling dealii::GridTools::partition_triangulation() at
+       * the end.
+       */
+      virtual void execute_coarsening_and_refinement ();
+
+      /**
+       * Create a triangulation.
+       *
+       * This function also partitions triangulation based on the MPI
+       * communicator provided to constructor.
+       */
+      virtual void create_triangulation (const std::vector< Point< spacedim > > &vertices,
+                                         const std::vector< CellData< dim > > &cells,
+                                         const SubCellData &subcelldata);
+
+      /**
+       * Return a vector of length Triangulation::n_active_cells() where each
+       * element stores the subdomain id of the owner of this cell. The
+       * elements of the vector are obviously the same as the subdomain ids
+       * for locally owned and ghost cells, but are also correct for
+       * artificial cells that do not store who the owner of the cell is in
+       * their subdomain_id field.
+       */
+      const std::vector<types::subdomain_id> &get_true_subdomain_ids_of_cells() const;
+
+      /**
+       * Return allow_artificial_cells , namely true if artificial cells are
+       * allowed.
+       */
+      bool with_artificial_cells() const;
+
+    private:
+      /**
+       * A flag to decide whether or not artificial cells are allowed.
+       */
+      const bool allow_artificial_cells;
+
+      /**
+       * This function calls GridTools::partition_triangulation () and if
+       * requested in the constructor of the class marks artificial cells.
+       */
+      void partition();
+
+      /**
+       * A vector containing subdomain IDs of cells obtained by partitioning
+       * using METIS. In case allow_artificial_cells is false, this vector is
+       * consistent with IDs stored in cell->subdomain_id() of the
+       * triangulation class. When allow_artificial_cells is true, cells which
+       * are artificial will have cell->subdomain_id() == numbers::artificial;
+       *
+       * The original parition information is stored to allow using sequential
+       * DoF distribution and partitioning functions with semi-artificial
+       * cells.
+       */
+      std::vector<types::subdomain_id> true_subdomain_ids_of_cells;
+    };
+  }
+#else
+
+  namespace shared
+  {
+
+    /**
+     * Dummy class the compiler chooses for parallel shared triangulations if
+     * we didn't actually configure deal.II with the MPI library. The
+     * existence of this class allows us to refer to
+     * parallel::shared::Triangulation objects throughout the library even if
+     * it is disabled.
+     *
+     * Since the constructor of this class is private, no such objects can
+     * actually be created if MPI is not available.
+     */
+    template <int dim, int spacedim = dim>
+    class Triangulation : public dealii::parallel::Triangulation<dim,spacedim>
+    {
+    public:
+
+      /**
+       * A dummy function to return empty vector.
+       */
+      const std::vector<types::subdomain_id> &get_true_subdomain_ids_of_cells() const;
+
+      /**
+       * A dummy function which always returns true.
+       */
+      bool with_artificial_cells() const;
+    private:
+      /**
+       * Constructor.
+       */
+      Triangulation ();
+
+      /**
+       * A dummy vector.
+       */
+      std::vector<types::subdomain_id> true_subdomain_ids_of_cells;
+    };
+  }
+
+
+#endif
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/distributed/solution_transfer.h b/include/deal.II/distributed/solution_transfer.h
new file mode 100644
index 0000000..6717495
--- /dev/null
+++ b/include/deal.II/distributed/solution_transfer.h
@@ -0,0 +1,252 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__distributed_solution_transfer_h
+#define dealii__distributed_solution_transfer_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+
+#include <vector>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace parallel
+{
+
+  namespace distributed
+  {
+    /**
+     * Transfers a discrete FE function (like a solution vector) by
+     * interpolation while refining and/or coarsening a distributed grid and
+     * handles the necessary communication.
+     *
+     * @note It is important to note, that if you use more than one
+     * SolutionTransfer object at the same time, that the calls to prepare_*()
+     * and interpolate()/deserialize() need to be in the same order.
+     *
+     * <h3>Note on ghost elements</h3> In a parallel computation PETSc or
+     * Trilinos vector may contain ghost elements or not. For reading in
+     * information with prepare_for_coarsening_and_refinement() or
+     * prepare_serialization() you need to supply vectors with ghost elements,
+     * so that all locally_active elements can be read. On the other hand,
+     * ghosted vectors are generally not writable, so for calls to
+     * interpolate() or deserialize() you need to supply distributed vectors
+     * without ghost elements.
+     *
+     * <h3>Transferring a solution</h3> Here VectorType is your favorite
+     * vector type, e.g. PETScWrappers::MPI::Vector,
+     * TrilinosWrappers::MPI::Vector, or corresponding blockvectors.
+     * @code
+     * SolutionTransfer<dim, VectorType> soltrans(dof_handler);
+     *                                   // flag some cells for refinement
+     *                                   // and coarsening, e.g.
+     * GridRefinement::refine_and_coarsen_fixed_fraction(
+     * tria, error_indicators, 0.3, 0.05);
+     *                                   // prepare the triangulation,
+     * tria.prepare_coarsening_and_refinement();
+     *                                   // prepare the SolutionTransfer object
+     *                                   // for coarsening and refinement and give
+     *                                   // the solution vector that we intend to
+     *                                   // interpolate later,
+     * soltrans.prepare_for_coarsening_and_refinement(solution);
+     *                                   // actually execute the refinement,
+     * tria.execute_coarsening_and_refinement ();
+     *                                   // redistribute dofs,
+     * dof_handler.distribute_dofs (fe);
+     *                                   // and interpolate the solution
+     * VectorType interpolated_solution;
+     * //create VectorType in the right size here
+     * soltrans.interpolate(interpolated_solution);
+     * @endcode
+     *
+     * <h3>Use for Serialization</h3>
+     *
+     * This class can be used to serialize and later deserialize a distributed
+     * mesh with solution vectors to a file. If you use more than one
+     * DoFHandler and therefore more than one SolutionTransfer object, they
+     * need to be serialized and deserialized in the same order.
+     *
+     * If vector has the locally relevant DoFs, serialization works as
+     * follows:
+     * *@code
+     *
+     * parallel::distributed::SolutionTransfer<dim,VectorType> sol_trans(dof_handler);
+     * sol_trans.prepare_serialization (vector);
+     *
+     * triangulation.save(filename);
+     * @endcode
+     * For deserialization the vector needs to be a distributed vector
+     * (without ghost elements):
+     * @code
+     * //[create coarse mesh...]
+     * triangulation.load(filename);
+     *
+     * parallel::distributed::SolutionTransfer<dim,VectorType> sol_trans(dof_handler);
+     * sol_trans.deserialize (distributed_vector);
+     * @endcode
+     *
+     *
+     * <h3>Interaction with hanging nodes</h3>
+     *
+     * In essence, this class implements the same steps as does
+     * dealii::SolutionTransfer (though the implementation is entirely
+     * separate). Consequently, the same issue with hanging nodes and
+     * coarsening can happen with this class as happens with
+     * dealii::SolutionTransfer. See there for an extended discussion.
+     *
+     * @ingroup distributed
+     * @author Timo Heister, 2009-2011
+     */
+    template<int dim, typename VectorType, typename DoFHandlerType=DoFHandler<dim> >
+    class SolutionTransfer
+    {
+    public:
+      /**
+       * Constructor, takes the current DoFHandler as argument.
+       */
+      SolutionTransfer(const DoFHandlerType &dof);
+      /**
+       * Destructor.
+       */
+      ~SolutionTransfer();
+
+      /**
+       * Prepares the @p SolutionTransfer for coarsening and refinement. It
+       * stores the dof indices of each cell and stores the dof values of the
+       * vectors in @p all_in in each cell that'll be coarsened. @p all_in
+       * includes all vectors that are to be interpolated onto the new
+       * (refined and/or coarsened) grid.
+       */
+      void prepare_for_coarsening_and_refinement (const std::vector<const VectorType *> &all_in);
+
+      /**
+       * Same as previous function but for only one discrete function to be
+       * interpolated.
+       */
+      void prepare_for_coarsening_and_refinement (const VectorType &in);
+
+      /**
+       * Interpolate the data previously stored in this object before the mesh
+       * was refined or coarsened onto the current set of cells. Do so for
+       * each of the vectors provided to
+       * prepare_for_coarsening_and_refinement() and write the result into the
+       * given set of vectors.
+       */
+      void interpolate (std::vector<VectorType *> &all_out);
+
+      /**
+       * Same as the previous function. It interpolates only one function. It
+       * assumes the vectors having the right sizes (i.e.
+       * <tt>in.size()==n_dofs_old</tt>, <tt>out.size()==n_dofs_refined</tt>)
+       *
+       * Multiple calling of this function is NOT allowed. Interpolating
+       * several functions can be performed in one step by using
+       * <tt>interpolate (all_in, all_out)</tt>
+       */
+      void interpolate (VectorType &out);
+
+
+      /**
+       * Return the size in bytes that need to be stored per cell.
+       */
+      unsigned int get_data_size() const;
+
+
+      /**
+       * Prepare the serialization of the given vector. The serialization is
+       * done by Triangulation::save(). The given vector needs all information
+       * on the locally active DoFs (it must be ghosted). See documentation of
+       * this class for more information.
+       */
+      void prepare_serialization(const VectorType &in);
+
+
+      /**
+       * Same as the function above, only for a list of vectors.
+       */
+      void prepare_serialization(const std::vector<const VectorType *> &all_in);
+
+
+      /**
+       * Execute the deserialization of the given vector. This needs to be
+       * done after calling Triangulation::load(). The given vector must be a
+       * fully distributed vector without ghost elements. See documentation of
+       * this class for more information.
+       */
+      void deserialize(VectorType &in);
+
+
+      /**
+       * Same as the function above, only for a list of vectors.
+       */
+      void deserialize(std::vector<VectorType *> &all_in);
+
+    private:
+      /**
+       * Pointer to the degree of freedom handler to work with.
+       */
+      SmartPointer<const DoFHandlerType,SolutionTransfer<dim,VectorType,DoFHandlerType> > dof_handler;
+
+      /**
+       * A vector that stores pointers to all the vectors we are supposed to
+       * copy over from the old to the new mesh.
+       */
+      std::vector<const VectorType *> input_vectors;
+
+      /**
+       * The offset that the Triangulation has assigned to this object
+       * starting at which we are allowed to write.
+       */
+      unsigned int offset;
+
+      /**
+       * A callback function used to pack the data on the current mesh into
+       * objects that can later be retrieved after refinement, coarsening and
+       * repartitioning.
+       */
+      void pack_callback(const typename Triangulation<dim,DoFHandlerType::space_dimension>::cell_iterator &cell,
+                         const typename Triangulation<dim,DoFHandlerType::space_dimension>::CellStatus status,
+                         void *data);
+
+      /**
+       * A callback function used to unpack the data on the current mesh that
+       * has been packed up previously on the mesh before refinement,
+       * coarsening and repartitioning.
+       */
+      void unpack_callback(const typename Triangulation<dim,DoFHandlerType::space_dimension>::cell_iterator &cell,
+                           const typename Triangulation<dim,DoFHandlerType::space_dimension>::CellStatus status,
+                           const void *data,
+                           std::vector<VectorType *> &all_out);
+
+
+      /**
+       *
+       */
+      void register_data_attach(const std::size_t size);
+
+    };
+
+
+  }
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/distributed/tria.h b/include/deal.II/distributed/tria.h
new file mode 100644
index 0000000..1c347c6
--- /dev/null
+++ b/include/deal.II/distributed/tria.h
@@ -0,0 +1,1132 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__distributed_tria_h
+#define dealii__distributed_tria_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/grid/tria.h>
+
+#include <deal.II/base/std_cxx11/function.h>
+#include <deal.II/base/std_cxx11/tuple.h>
+
+#include <deal.II/distributed/tria_base.h>
+
+#include <set>
+#include <vector>
+#include <list>
+#include <utility>
+
+#ifdef DEAL_II_WITH_MPI
+#  include <mpi.h>
+#endif
+
+#ifdef DEAL_II_WITH_P4EST
+#include <p4est_connectivity.h>
+#include <p4est.h>
+#include <p4est_ghost.h>
+
+#include <p8est_connectivity.h>
+#include <p8est.h>
+#include <p8est_ghost.h>
+#endif
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int> class Triangulation;
+
+#ifdef DEAL_II_WITH_P4EST
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    namespace Policy
+    {
+      template <int, int> class ParallelDistributed;
+    }
+  }
+}
+
+
+namespace internal
+{
+  namespace p4est
+  {
+    /**
+     * A structure whose explicit specializations contain typedefs to the
+     * relevant p4est_* and p8est_* types. Using this structure, for example
+     * by saying <tt>types<dim>::connectivity</tt> we can write code in a
+     * dimension independent way, either referring to p4est_connectivity_t or
+     * p8est_connectivity_t, depending on template argument.
+     */
+    template <int> struct types;
+
+    template <>
+    struct types<2>
+    {
+      typedef p4est_connectivity_t connectivity;
+      typedef p4est_t              forest;
+      typedef p4est_tree_t         tree;
+      typedef p4est_quadrant_t     quadrant;
+      typedef p4est_topidx_t       topidx;
+      typedef p4est_locidx_t       locidx;
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      typedef p4est_connect_type_t balance_type;
+#else
+      typedef p4est_balance_type_t balance_type;
+#endif
+      typedef p4est_ghost_t        ghost;
+    };
+
+    template <>
+    struct types<3>
+    {
+      typedef p8est_connectivity_t connectivity;
+      typedef p8est_t              forest;
+      typedef p8est_tree_t         tree;
+      typedef p8est_quadrant_t     quadrant;
+      typedef p4est_topidx_t       topidx;
+      typedef p4est_locidx_t       locidx;
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      typedef p8est_connect_type_t balance_type;
+#else
+      typedef p8est_balance_type_t balance_type;
+#endif
+      typedef p8est_ghost_t        ghost;
+    };
+
+
+    /**
+     * Initialize the GeometryInfo<dim>::max_children_per_cell children of the
+     * cell p4est_cell.
+     */
+    template <int dim>
+    void
+    init_quadrant_children
+    (const typename types<dim>::quadrant &p4est_cell,
+     typename types<dim>::quadrant (&p4est_children)[GeometryInfo<dim>::max_children_per_cell]);
+
+
+    /**
+     * Initialize quadrant to represent a coarse cell.
+     */
+    template <int dim>
+    void
+    init_coarse_quadrant(typename types<dim>::quadrant &quad);
+
+
+
+    /**
+     * Returns whether q1 and q2 are equal
+     */
+    template <int dim>
+    bool
+    quadrant_is_equal (const typename types<dim>::quadrant &q1,
+                       const typename types<dim>::quadrant &q2);
+
+    //TODO: remove these functions from
+    //public interface somehow? [TH]
+
+    /**
+     * returns whether q1 is an ancestor of q2
+     */
+    template <int dim>
+    bool
+    quadrant_is_ancestor (const typename types<dim>::quadrant &q1,
+                          const typename types<dim>::quadrant &q2);
+  }
+}
+
+//forward declaration of the data type for periodic face pairs
+namespace GridTools
+{
+  template <typename CellIterator> struct PeriodicFacePair;
+}
+
+namespace parallel
+{
+  namespace distributed
+  {
+
+
+    /**
+     * This class acts like the dealii::Triangulation class, but it
+     * distributes the mesh across a number of different processors when using
+     * MPI. The class's interface does not add a lot to the
+     * dealii::Triangulation class but there are a number of difficult
+     * algorithms under the hood that ensure we always have a load-balanced,
+     * fully distributed mesh. Use of this class is explained in step-40,
+     * step-32, the
+     * @ref distributed
+     * documentation module, as well as the
+     * @ref distributed_paper.
+     * See there for more information. This class satisfies the
+     * @ref ConceptMeshType "MeshType concept".
+     *
+     * @note This class does not support anisotropic refinement, because it
+     * relies on the p4est library that does not support this. Attempts to
+     * refine cells anisotropically will result in errors.
+     *
+     * @note There is currently no support for distributing 1d triangulations.
+     *
+     *
+     * <h3> Interaction with boundary description </h3>
+     *
+     * Refining and coarsening a distributed triangulation is a complicated
+     * process because cells may have to be migrated from one processor to
+     * another. On a single processor, materializing that part of the global
+     * mesh that we want to store here from what we have stored before
+     * therefore may involve several cycles of refining and coarsening the
+     * locally stored set of cells until we have finally gotten from the
+     * previous to the next triangulation. This process is described in more
+     * detail in the
+     * @ref distributed_paper.
+     * Unfortunately, in this process, some information can get lost relating
+     * to flags that are set by user code and that are inherited from mother
+     * to child cell but that are not moved along with a cell if that cell is
+     * migrated from one processor to another.
+     *
+     * An example are boundary indicators. Assume, for example, that you start
+     * with a single cell that is refined once globally, yielding four
+     * children. If you have four processors, each one owns one cell. Assume
+     * now that processor 1 sets the boundary indicators of the external
+     * boundaries of the cell it owns to 42. Since processor 0 does not own
+     * this cell, it doesn't set the boundary indicators of its ghost cell
+     * copy of this cell. Now, assume we do several mesh refinement cycles and
+     * end up with a configuration where this processor suddenly finds itself
+     * as the owner of this cell. If boundary indicator 42 means that we need
+     * to integrate Neumann boundary conditions along this boundary, then
+     * processor 0 will forget to do so because it has never set the boundary
+     * indicator along this cell's boundary to 42.
+     *
+     * The way to avoid this dilemma is to make sure that things like setting
+     * boundary indicators or material ids is done immediately every time a
+     * parallel triangulation is refined. This is not necessary for sequential
+     * triangulations because, there, these flags are inherited from mother to
+     * child cell and remain with a cell even if it is refined and the
+     * children are later coarsened again, but this does not hold for
+     * distributed triangulations. It is made even more difficult by the fact
+     * that in the process of refining a parallel distributed triangulation,
+     * the triangulation may call
+     * dealii::Triangulation::execute_coarsening_and_refinement multiple times
+     * and this function needs to know about boundaries. In other words, it is
+     * <i>not</i> enough to just set boundary indicators on newly created
+     * faces only <i>after</i> calling
+     * <tt>distributed::parallel::Triangulation::execute_coarsening_and_refinement</tt>:
+     * it actually has to happen while that function is still running.
+     *
+     * The way to do this is by writing a function that sets boundary
+     * indicators and that will be called by the dealii::Triangulation class.
+     * The triangulation does not provide a pointer to itself to the function
+     * being called, nor any other information, so the trick is to get this
+     * information into the function. C++ provides a nice mechanism for this
+     * that is best explained using an example:
+     * @code
+     *     #include <deal.II/base/std_cxx11/bind.h>
+     *
+     *     template <int dim>
+     *     void set_boundary_ids (parallel::distributed::Triangulation<dim> &triangulation)
+     *     {
+     *       ... set boundary indicators on the triangulation object ...
+     *     }
+     *
+     *     template <int dim>
+     *     void
+     *     MyClass<dim>::
+     *     create_coarse_mesh (parallel::distributed::Triangulation<dim> &coarse_grid) const
+     *     {
+     *       ... create the coarse mesh ...
+     *
+     *       coarse_grid.signals.post_refinement.connect
+     *         (std_cxx11::bind (&set_boundary_ids<dim>,
+     *                           std_cxx11::ref(coarse_grid)));
+     *
+     *     }
+     * @endcode
+     *
+     * What the call to <code>std_cxx11::bind</code> does is to produce an
+     * object that can be called like a function with no arguments. It does so
+     * by taking the address of a function that does, in fact, take an
+     * argument but permanently fix this one argument to a reference to the
+     * coarse grid triangulation. After each refinement step, the
+     * triangulation will then call the object so created which will in turn
+     * call <code>set_boundary_ids<dim></code> with the reference to the
+     * coarse grid as argument.
+     *
+     * This approach can be generalized. In the example above, we have used a
+     * global function that will be called. However, sometimes it is necessary
+     * that this function is in fact a member function of the class that
+     * generates the mesh, for example because it needs to access run-time
+     * parameters. This can be achieved as follows: assuming the
+     * <code>set_boundary_ids()</code> function has been declared as a (non-
+     * static, but possibly private) member function of the
+     * <code>MyClass</code> class, then the following will work:
+     * @code
+     *     #include <deal.II/base/std_cxx11/bind.h>
+     *
+     *     template <int dim>
+     *     void
+     *     MyClass<dim>::
+     *     set_boundary_ids (parallel::distributed::Triangulation<dim> &triangulation) const
+     *     {
+     *       ... set boundary indicators on the triangulation object ...
+     *     }
+     *
+     *     template <int dim>
+     *     void
+     *     MyClass<dim>::
+     *     create_coarse_mesh (parallel::distributed::Triangulation<dim> &coarse_grid) const
+     *     {
+     *       ... create the coarse mesh ...
+     *
+     *       coarse_grid.signals.post_refinement.connect
+     *         (std_cxx11::bind (&MyGeometry<dim>::set_boundary_ids,
+     *                           std_cxx11::cref(*this),
+     *                           std_cxx11::ref(coarse_grid)));
+     *     }
+     * @endcode
+     * Here, like any other member function, <code>set_boundary_ids</code>
+     * implicitly takes a pointer or reference to the object it belongs to as
+     * first argument. <code>std::bind</code> again creates an object that can
+     * be called like a global function with no arguments, and this object in
+     * turn calls <code>set_boundary_ids</code> with a pointer to the current
+     * object and a reference to the triangulation to work on. Note that
+     * because the <code>create_coarse_mesh</code> function is declared as
+     * <code>const</code>, it is necessary that the
+     * <code>set_boundary_ids</code> function is also declared
+     * <code>const</code>.
+     *
+     * <b>Note:</b>For reasons that have to do with the way the
+     * parallel::distributed::Triangulation is implemented, functions that
+     * have been attached to the post-refinement signal of the triangulation
+     * are called more than once, sometimes several times, every time the
+     * triangulation is actually refined.
+     *
+     *
+     * @author Wolfgang Bangerth, Timo Heister 2008, 2009, 2010, 2011
+     * @ingroup distributed
+     */
+    template <int dim, int spacedim = dim>
+    class Triangulation : public dealii::parallel::Triangulation<dim,spacedim>
+    {
+    public:
+      /**
+       * A typedef that is used to to identify cell iterators. The concept of
+       * iterators is discussed at length in the
+       * @ref Iterators "iterators documentation module".
+       *
+       * The current typedef identifies cells in a triangulation. You can find
+       * the exact type it refers to in the base class's own typedef, but it
+       * should be TriaIterator<CellAccessor<dim,spacedim> >. The TriaIterator
+       * class works like a pointer that when you dereference it yields an
+       * object of type CellAccessor. CellAccessor is a class that identifies
+       * properties that are specific to cells in a triangulation, but it is
+       * derived (and consequently inherits) from TriaAccessor that describes
+       * what you can ask of more general objects (lines, faces, as well as
+       * cells) in a triangulation.
+       *
+       * @ingroup Iterators
+       */
+      typedef typename dealii::Triangulation<dim,spacedim>::cell_iterator        cell_iterator;
+
+      /**
+       * A typedef that is used to to identify
+       * @ref GlossActive "active cell iterators".
+       * The concept of iterators is discussed at length in the
+       * @ref Iterators "iterators documentation module".
+       *
+       * The current typedef identifies active cells in a triangulation. You
+       * can find the exact type it refers to in the base class's own typedef,
+       * but it should be TriaActiveIterator<CellAccessor<dim,spacedim> >. The
+       * TriaActiveIterator class works like a pointer to active objects that
+       * when you dereference it yields an object of type CellAccessor.
+       * CellAccessor is a class that identifies properties that are specific
+       * to cells in a triangulation, but it is derived (and consequently
+       * inherits) from TriaAccessor that describes what you can ask of more
+       * general objects (lines, faces, as well as cells) in a triangulation.
+       *
+       * @ingroup Iterators
+       */
+      typedef typename dealii::Triangulation<dim,spacedim>::active_cell_iterator active_cell_iterator;
+
+      typedef typename dealii::Triangulation<dim,spacedim>::CellStatus CellStatus;
+
+      /**
+       * Configuration flags for distributed Triangulations to be set in the
+       * constructor. Settings can be combined using bitwise OR.
+       */
+      enum Settings
+      {
+        /**
+         * Default settings, other options are disabled.
+         */
+        default_setting = 0x0,
+        /**
+         * If set, the deal.II mesh will be reconstructed from the coarse mesh
+         * every time a repartioning in p4est happens. This can be a bit more
+         * expensive, but guarantees the same memory layout and therefore cell
+         * ordering in the deal.II mesh. As assembly is done in the deal.II
+         * cell ordering, this flag is required to get reproducible behaviour
+         * after snapshot/resume.
+         */
+        mesh_reconstruction_after_repartitioning = 0x1,
+        /**
+         * This flags needs to be set to use the geometric multigrid
+         * functionality. This option requires additional computation and
+         * communication. Note: geometric multigrid is still a work in
+         * progress.
+         */
+        construct_multigrid_hierarchy = 0x2,
+        /**
+         * Setting this flag will disable automatic repartioning of the cells
+         * after a refinement cycle. It can be executed manually by calling
+         * repartition().
+         */
+        no_automatic_repartitioning = 0x4
+      };
+
+
+
+      /**
+       * Constructor.
+       *
+       * @param mpi_communicator denotes the MPI communicator to be used for
+       * the triangulation.
+       *
+       * @param smooth_grid Degree and kind of mesh smoothing to be applied to
+       * the mesh. See the dealii::Triangulation class for a description of
+       * the kinds of smoothing operations that can be applied.
+       *
+       * @param settings See the description of the Settings enumerator.
+       *
+       * @note This class does not currently support the
+       * <code>check_for_distorted_cells</code> argument provided by the base
+       * class.
+       *
+       * @note While it is possible to pass all of the mesh smoothing flags
+       * listed in the base class to objects of this type, it is not always
+       * possible to honor all of these smoothing options if they would
+       * require knowledge of refinement/coarsening flags on cells not locally
+       * owned by this processor. As a consequence, for some of these flags,
+       * the ultimate number of cells of the parallel triangulation may depend
+       * on the number of processors into which it is partitioned. On the
+       * other hand, if no smoothing flags are passed, if you always mark the
+       * same cells of the mesh, you will always get the exact same refined
+       * mesh independent of the number of processors into which the
+       * triangulation is partitioned.
+       */
+      Triangulation (MPI_Comm mpi_communicator,
+                     const typename dealii::Triangulation<dim,spacedim>::MeshSmoothing
+                     smooth_grid = (dealii::Triangulation<dim,spacedim>::none),
+                     const Settings settings = default_setting);
+
+      /**
+       * Destructor.
+       */
+      virtual ~Triangulation ();
+
+      /**
+       * Reset this triangulation into a virgin state by deleting all data.
+       *
+       * Note that this operation is only allowed if no subscriptions to this
+       * object exist any more, such as DoFHandler objects using it.
+       */
+      virtual void clear ();
+
+      /**
+       * Implementation of the same function as in the base class.
+       */
+      virtual void copy_triangulation (const dealii::Triangulation<dim, spacedim> &old_tria);
+
+      /**
+       * Create a triangulation as documented in the base class.
+       *
+       * This function also sets up the various data structures necessary to
+       * distribute a mesh across a number of processors. This will be
+       * necessary once the mesh is being refined, though we will always keep
+       * the entire coarse mesh that is generated by this function on all
+       * processors.
+       */
+      virtual void create_triangulation (const std::vector<Point<spacedim> >    &vertices,
+                                         const std::vector<CellData<dim> > &cells,
+                                         const SubCellData                 &subcelldata);
+
+      /**
+       * Coarsen and refine the mesh according to refinement and coarsening
+       * flags set.
+       *
+       * Since the current processor only has control over those cells it owns
+       * (i.e. the ones for which <code>cell-@>subdomain_id() ==
+       * this-@>locally_owned_subdomain()</code>), refinement and coarsening
+       * flags are only respected for those locally owned cells. Flags may be
+       * set on other cells as well (and may often, in fact, if you call
+       * dealii::Triangulation::prepare_coarsening_and_refinement()) but will
+       * be largely ignored: the decision to refine the global mesh will only
+       * be affected by flags set on locally owned cells.
+       *
+       * @note This function by default partitions the mesh in such a way that
+       * the number of cells on all processors is roughly equal. If you want
+       * to set weights for partitioning, e.g. because some cells are more
+       * expensive to compute than others, you can use the signal cell_weight
+       * as documented in the dealii::Triangulation class. This function will
+       * check whether a function is connected to the signal and if so use it.
+       * If you prefer to repartition the mesh yourself at user-defined
+       * intervals only, you can create your triangulation object by passing
+       * the parallel::distributed::Triangulation::no_automatic_repartitioning
+       * flag to the constructor, which ensures that calling the current
+       * function only refines and coarsens the triangulation, but doesn't
+       * partition it. You can then call the repartition() function manually.
+       * The usage of the cell_weights signal is identical in both cases, if a
+       * function is connected to the signal it will be used to balance the
+       * calculated weights, otherwise the number of cells is balanced.
+       */
+      virtual void execute_coarsening_and_refinement ();
+
+      /**
+       * Override the implementation of prepare_coarsening_and_refinement from
+       * the base class. This is necessary if periodic boundaries are enabled
+       * and the level difference over vertices over the periodic boundary
+       * must be not more than 2:1.
+       */
+      virtual bool prepare_coarsening_and_refinement ();
+
+      /**
+       * Manually repartition the active cells between processors. Normally
+       * this repartitioning will happen automatically when calling
+       * execute_coarsening_and_refinement() (or refine_global()) unless the
+       * @p no_automatic_repartitioning is set in the constructor. Setting the
+       * flag and then calling repartition() gives the same result.
+       *
+       * If you want to transfer data (using SolutionTransfer or manually with
+       * register_data_attach() and notify_ready_to_unpack()), you need to set
+       * it up twice: once when calling execute_coarsening_and_refinement(),
+       * which will handle coarsening and refinement but obviously won't ship
+       * any data between processors, and a second time when calling
+       * repartition().  Here, no coarsening and refinement will be done but
+       * information will be packed and shipped to different processors. In
+       * other words, you probably want to treat a call to repartition() in
+       * the same way as execute_coarsening_and_refinement() with respect to
+       * dealing with data movement (SolutionTransfer, etc.).
+       *
+       * @note If no function is connected to the cell_weight signal described
+       * in the dealii::Triangulation class, this function will balance the
+       * number of cells on each processor. If one or more functions are
+       * connected, it will calculate the sum of the weights and balance the
+       * weights across processors. The only requirement on the weights is
+       * that every cell's weight is positive and that the sum over all
+       * weights on all processors can be formed using a 64-bit integer.
+       * Beyond that, it is your choice how you want to interpret the weights.
+       * A common approach is to consider the weights proportional to the cost
+       * of doing computations on a cell, e.g., by summing the time for
+       * assembly and solving. In practice, determining this cost is of course
+       * not trivial since we don't solve on isolated cells, but on the entire
+       * mesh. In such cases, one could, for example, choose the weight equal
+       * to the number of unknowns per cell (in the context of hp finite
+       * element methods), or using a heuristic that estimates the cost on
+       * each cell depending on whether, for example, one has to run some
+       * expensive algorithm on some cells but not others (such as forming
+       * boundary integrals during the assembly only on cells that are
+       * actually at the boundary, or computing expensive nonlinear terms only
+       * on some cells but not others, e.g., in the elasto-plastic problem in
+       * step-42).
+       */
+      void repartition ();
+
+      /**
+       * When vertices have been moved locally, for example using code like
+       * @code
+       *   cell->vertex(0) = new_location;
+       * @endcode
+       * then this function can be used to update the location of vertices
+       * between MPI processes.
+       *
+       * All the vertices that have been moved and might be in the ghost layer
+       * of a process have to be reported in the @p vertex_locally_moved
+       * argument. This ensures that that part of the information that has to
+       * be send between processes is actually sent. Additionally, it is quite
+       * important that vertices on the boundary between processes are
+       * reported on exactly one process (e.g. the one with the highest id).
+       * Otherwise we could expect undesirable results if multiple processes
+       * move a vertex differently. A typical strategy is to let processor $i$
+       * move those vertices that are adjacent to cells whose owners include
+       * processor $i$ but no other processor $j$ with $j<i$; in other words,
+       * for vertices at the boundary of a subdomain, the processor with the
+       * lowest subdomain id "owns" a vertex.
+       *
+       * @note It only makes sense to move vertices that are either located on
+       * locally owned cells or on cells in the ghost layer. This is because
+       * you can be sure that these vertices indeed exist on the finest mesh
+       * aggregated over all processors, whereas vertices on artificial cells
+       * but not at least in the ghost layer may or may not exist on the
+       * globally finest mesh. Consequently, the @p vertex_locally_moved
+       * argument may not contain vertices that aren't at least on ghost
+       * cells.
+       *
+       * @note This function moves vertices in such a way that on every
+       * processor, the vertices of every locally owned and ghost cell is
+       * consistent with the corresponding location of these cells on other
+       * processors. On the other hand, the locations of artificial cells will
+       * in general be wrong since artificial cells may or may not exist on
+       * other processors and consequently it is not possible to determine
+       * their location in any way. This is not usually a problem since one
+       * never does anything on artificial cells. However, it may lead to
+       * problems if the mesh with moved vertices is refined in a later step.
+       * If that's what you want to do, the right way to do it is to save the
+       * offset applied to every vertex, call this function, and before
+       * refining or coarsening the mesh apply the opposite offset and call
+       * this function again.
+       *
+       * @param vertex_locally_moved A bitmap indicating which vertices have
+       * been moved. The size of this array must be equal to
+       * Triangulation::n_vertices() and must be a subset of those vertices
+       * flagged by GridTools::get_locally_owned_vertices().
+       *
+       * @see This function is used, for example, in
+       * GridTools::distort_random().
+       */
+      void
+      communicate_locally_moved_vertices (const std::vector<bool> &vertex_locally_moved);
+
+
+      /**
+       * Returns true if the triangulation has hanging nodes.
+       *
+       * In the context of parallel distributed triangulations, every
+       * processor stores only that part of the triangulation it locally owns.
+       * However, it also stores the entire coarse mesh, and to guarantee the
+       * 2:1 relationship between cells, this may mean that there are hanging
+       * nodes between cells that are not locally owned or ghost cells (i.e.,
+       * between ghost cells and artificial cells, or between artificial and
+       * artificial cells; see
+       * @ref GlossArtificialCell "the glossary").
+       * One is not typically interested in this case, so the function returns
+       * whether there are hanging nodes between any two cells of the "global"
+       * mesh, i.e., the union of locally owned cells on all processors.
+       */
+      virtual
+      bool has_hanging_nodes() const;
+
+      /**
+       * Return the local memory consumption in bytes.
+       */
+      virtual std::size_t memory_consumption () const;
+
+      /**
+       * Return the local memory consumption contained in the p4est data
+       * structures alone. This is already contained in memory_consumption()
+       * but made available separately for debugging purposes.
+       */
+      virtual std::size_t memory_consumption_p4est () const;
+
+      /**
+       * A collective operation that produces a sequence of output files with
+       * the given file base name that contain the mesh in VTK format.
+       *
+       * More than anything else, this function is useful for debugging the
+       * interface between deal.II and p4est.
+       */
+      void write_mesh_vtk (const char *file_basename) const;
+
+      /**
+       * Produce a check sum of the triangulation.  This is a collective
+       * operation and is mostly useful for debugging purposes.
+       */
+      unsigned int get_checksum () const;
+
+      /**
+       * Save the refinement information from the coarse mesh into the given
+       * file. This file needs to be reachable from all nodes in the
+       * computation on a shared network file system. See the SolutionTransfer
+       * class on how to store solution vectors into this file. Additional
+       * cell-based data can be saved using register_data_attach().
+       */
+      void save(const char *filename) const;
+
+      /**
+       * Load the refinement information saved with save() back in. The mesh
+       * must contain the same coarse mesh that was used in save() before
+       * calling this function.
+       *
+       * You do not need to load with the same number of MPI processes that
+       * you saved with. Rather, if a mesh is loaded with a different number
+       * of MPI processes than used at the time of saving, the mesh is
+       * repartitioned appropriately. Cell-based data that was saved with
+       * register_data_attach() can be read in with notify_ready_to_unpack()
+       * after calling load().
+       *
+       * If you use p4est version > 0.3.4.2 the @p autopartition flag tells
+       * p4est to ignore the partitioning that the triangulation had when it
+       * was saved and make it uniform upon loading. If @p autopartition is
+       * set to false, the triangulation is only repartitioned if needed (i.e.
+       * if a different number of MPI processes is encountered).
+       */
+      void load(const char *filename,
+                const bool autopartition = true);
+
+      /**
+       * Register a function with the current Triangulation object that will
+       * be used to attach data to active cells before
+       * execute_coarsening_and_refinement(). In
+       * execute_coarsening_and_refinement() the Triangulation will call the
+       * given function pointer and provide @p size bytes to store data. If
+       * necessary, this data will be transferred to the new owner of that
+       * cell during repartitioning the tree. See notify_ready_to_unpack() on
+       * how to retrieve the data.
+       *
+       * Callers need to store the return value.  It specifies an offset of
+       * the position at which data can later be retrieved during a call to
+       * notify_ready_to_unpack().
+       *
+       * The CellStatus argument in the callback function will tell you if the
+       * given cell will be coarsened, refined, or will persist as is (this
+       * can be different than the coarsen and refine flags set by you). If it
+       * is
+       *
+       * - CELL_PERIST: the cell won't be refined/coarsened, but might be
+       * moved to a different processor - CELL_REFINE: this cell will be
+       * refined into 4/8 cells, you can not access the children (because they
+       * don't exist yet) - CELL_COARSEN: the children of this cell will be
+       * coarsened into the given cell (you can access the active children!)
+       *
+       * When unpacking the data with notify_ready_to_unpack() you can access
+       * the children of the cell if the status is CELL_REFINE but not for
+       * CELL_COARSEN. As a consequence you need to handle coarsening while
+       * packing and refinement during unpacking.
+       *
+       * @note The two functions can also be used for serialization of data
+       * using save() and load() in the same way. Then the status will always
+       * be CELL_PERSIST.
+       */
+      unsigned int
+      register_data_attach (const std::size_t size,
+                            const std_cxx11::function<void (const cell_iterator &,
+                                                            const CellStatus,
+                                                            void *)> &pack_callback);
+
+      /**
+       * The supplied callback function is called for each newly locally owned
+       * cell and corresponding data saved with register_data_attach().  This
+       * function needs to be called after execute_coarsening_and_refinement()
+       * with the offset returned by register_data_attach().
+       *
+       * The CellStatus will indicate if the cell was refined, coarsened, or
+       * persisted unchanged. The cell_iterator will either by an active,
+       * locally owned cell (if the cell was not refined), or the immediate
+       * parent if it was refined during execute_coarsening_and_refinement().
+       * Therefore, contrary to during register_data_attach(), you can now
+       * access the children if the status is CELL_REFINE but no longer for
+       * callbacks with status CELL_COARSEN.
+       */
+      void
+      notify_ready_to_unpack (const unsigned int offset,
+                              const std_cxx11::function<void (const cell_iterator &,
+                                                              const CellStatus,
+                                                              const void *)> &unpack_callback);
+
+      /**
+       * Return a permutation vector for the order the coarse cells are handed
+       * off to p4est. For example the value of the $i$th element in this
+       * vector is the index of the deal.II coarse cell (counting from
+       * begin(0)) that corresponds to the $i$th tree managed by p4est.
+       */
+      const std::vector<types::global_dof_index> &
+      get_p4est_tree_to_coarse_cell_permutation() const;
+
+      /**
+       * Return a permutation vector for the mapping from the coarse deal
+       * cells to the p4est trees. This is the inverse of
+       * get_p4est_tree_to_coarse_cell_permutation.
+       */
+      const std::vector<types::global_dof_index> &
+      get_coarse_cell_to_p4est_tree_permutation() const;
+
+      /**
+       * Join faces in the p4est forest for periodic boundary conditions. As a
+       * result, each pair of faces will differ by at most one refinement
+       * level and ghost neighbors will be available across these faces.
+       *
+       * The vector can be filled by the function
+       * GridTools::collect_periodic_faces.
+       *
+       * For more information on periodic boundary conditions see
+       * GridTools::collect_periodic_faces,
+       * DoFTools::make_periodicity_constraints and step-45.
+       *
+       * @note Before this function can be used the Triangulation has to be
+       * initialized and must not be refined. Calling this function more than
+       * once is possible, but not recommended: The function destroys and
+       * rebuilds the p4est forest each time it is called.
+       */
+      void
+      add_periodicity
+      (const std::vector<GridTools::PeriodicFacePair<cell_iterator> > &);
+
+
+    private:
+
+      /**
+       * Override the function to update the number cache so we can fill data
+       * like @p level_ghost_owners.
+       *
+       */
+      virtual void update_number_cache ();
+
+      /**
+       * store the Settings.
+       */
+      Settings settings;
+
+      /**
+       * A flag that indicates whether the triangulation has actual content.
+       */
+      bool triangulation_has_content;
+
+      /**
+       * A data structure that holds the connectivity between trees. Since
+       * each tree is rooted in a coarse grid cell, this data structure holds
+       * the connectivity between the cells of the coarse grid.
+       */
+      typename dealii::internal::p4est::types<dim>::connectivity *connectivity;
+
+      /**
+       * A data structure that holds the local part of the global
+       * triangulation.
+       */
+      typename dealii::internal::p4est::types<dim>::forest *parallel_forest;
+      /**
+       * A data structure that holds some information about the ghost cells of
+       * the triangulation.
+       */
+      typename dealii::internal::p4est::types<dim>::ghost  *parallel_ghost;
+
+      /**
+       * A flag that indicates whether refinement of a triangulation is
+       * currently in progress. This flag is used to disambiguate whether a
+       * call to execute_coarsening_and_triangulation came from the outside or
+       * through a recursive call. While the first time we want to take over
+       * work to copy things from a refined p4est, the other times we don't
+       * want to get in the way as these latter calls to
+       * Triangulation::execute_coarsening_and_refinement() are simply there
+       * in order to re-create a triangulation that matches the p4est.
+       */
+      bool refinement_in_progress;
+
+
+      /**
+       * number of bytes that get attached to the Triangulation through
+       * register_data_attach() for example SolutionTransfer.
+       */
+      unsigned int attached_data_size;
+
+      /**
+       * number of functions that get attached to the Triangulation through
+       * register_data_attach() for example SolutionTransfer.
+       */
+      unsigned int n_attached_datas;
+
+      /**
+       * number of functions that need to unpack their data after a call from
+       * load()
+       */
+      unsigned int n_attached_deserialize;
+
+      typedef  std_cxx11::function<
+      void(typename Triangulation<dim,spacedim>::cell_iterator, CellStatus, void *)
+      > pack_callback_t;
+
+      typedef std::pair<unsigned int, pack_callback_t> callback_pair_t;
+
+      typedef std::list<callback_pair_t> callback_list_t;
+
+      /**
+       * List of callback functions registered by register_data_attach() that
+       * are going to be called for packing data.
+       */
+      callback_list_t attached_data_pack_callbacks;
+
+
+      /**
+       * Two arrays that store which p4est tree corresponds to which coarse
+       * grid cell and vice versa. We need these arrays because p4est goes
+       * with the original order of coarse cells when it sets up its forest,
+       * and then applies the Morton ordering within each tree. But if coarse
+       * grid cells are badly ordered this may mean that individual parts of
+       * the forest stored on a local machine may be split across coarse grid
+       * cells that are not geometrically close. Consequently, we apply a
+       * hierarchical preordering according to
+       * SparsityTools::reorder_hierarchical() to ensure that the part of the
+       * forest stored by p4est is located on geometrically close coarse grid
+       * cells.
+       */
+      std::vector<types::global_dof_index> coarse_cell_to_p4est_tree_permutation;
+      std::vector<types::global_dof_index> p4est_tree_to_coarse_cell_permutation;
+
+      /**
+       * If add_periodicity() is called, this variable stores the given
+       * periodic face pairs on level 0 for later access during the
+       * identification of ghost cells for the multigrid hierarchy.
+       */
+      std::vector<GridTools::PeriodicFacePair<cell_iterator> > periodic_face_pairs_level_0;
+
+      /**
+       * Return a pointer to the p4est tree that belongs to the given
+       * dealii_coarse_cell_index()
+       */
+      typename dealii::internal::p4est::types<dim>::tree *
+      init_tree(const int dealii_coarse_cell_index) const;
+
+      /**
+       * The function that computes the permutation between the two data
+       * storage schemes.
+       */
+      void setup_coarse_cell_to_p4est_tree_permutation ();
+
+      /**
+       * Take the contents of a newly created triangulation we are attached to
+       * and copy it to p4est data structures.
+       *
+       * This function exists in 2d and 3d variants.
+       */
+      void copy_new_triangulation_to_p4est (dealii::internal::int2type<2>);
+      void copy_new_triangulation_to_p4est (dealii::internal::int2type<3>);
+
+      /**
+       * Copy the local part of the refined forest from p4est into the
+       * attached triangulation.
+       */
+      void copy_local_forest_to_triangulation ();
+
+      /**
+       * Internal function notifying all registered classes to attach their
+       * data before repartitioning occurs. Called from
+       * execute_coarsening_and_refinement().
+       */
+      void attach_mesh_data();
+
+      /**
+       * Internal function notifying all registered slots to provide their
+       * weights before repartitioning occurs. Called from
+       * execute_coarsening_and_refinement() and repartition().
+       *
+       * @return A vector of unsigned integers representing the weight or
+       * computational load of every cell after the refinement/coarsening/
+       * repartition cycle. Note that the number of entries does not need to
+       * be equal to either n_active_cells or n_locally_owned_active_cells,
+       * because the triangulation is not updated yet. The weights are sorted
+       * in the order that p4est will encounter them while iterating over
+       * them.
+       */
+      std::vector<unsigned int>
+      get_cell_weights();
+
+      /**
+       * Fills a map that, for each vertex, lists all the processors whose
+       * subdomains are adjacent to that vertex. Used by
+       * DoFHandler::Policy::ParallelDistributed.
+       */
+      void
+      fill_vertices_with_ghost_neighbors
+      (std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+       &vertices_with_ghost_neighbors);
+
+      /**
+       * Fills a map that, for each vertex, lists all the processors whose
+       * subdomains are adjacent to that vertex on the given level for the
+       * multigrid hierarchy. Used by DoFHandler::Policy::ParallelDistributed.
+       */
+      void
+      fill_level_vertices_with_ghost_neighbors
+      (const unsigned int level,
+       std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+       &vertices_with_ghost_neighbors);
+
+      /**
+       * This method returns a bit vector of length tria.n_vertices()
+       * indicating the locally active vertices on a level, i.e., the vertices
+       * touched by the locally owned level cells for use in geometric
+       * multigrid (possibly including the vertices due to periodic boundary
+       * conditions) are marked by true.
+       *
+       * Used by DoFHandler::Policy::ParallelDistributed.
+       */
+      std::vector<bool>
+      mark_locally_active_vertices_on_level(const unsigned int level) const;
+
+      template <int, int> friend class dealii::internal::DoFHandler::Policy::ParallelDistributed;
+    };
+
+
+    /**
+     * Specialization of the general template for the 1d case. There is
+     * currently no support for distributing 1d triangulations. Consequently,
+     * all this class does is throw an exception.
+     */
+    template <int spacedim>
+    class Triangulation<1,spacedim> : public dealii::parallel::Triangulation<1,spacedim>
+    {
+    public:
+      /**
+       * Constructor. The argument denotes the MPI communicator to be used for
+       * the triangulation.
+       */
+      Triangulation (MPI_Comm mpi_communicator);
+
+      /**
+       * Destructor.
+       */
+      virtual ~Triangulation ();
+
+      /**
+       * Returns a permutation vector for the order the coarse cells are
+       * handed of to p4est. For example the first element i in this vector
+       * denotes that the first cell in hierarchical ordering is the ith deal
+       * cell starting from begin(0).
+       */
+      const std::vector<types::global_dof_index> &
+      get_p4est_tree_to_coarse_cell_permutation() const;
+
+      /**
+       * When vertices have been moved locally, for example using code like
+       * @code
+       *   cell->vertex(0) = new_location;
+       * @endcode
+       * then this function can be used to update the location of vertices
+       * between MPI processes.
+       *
+       * All the vertices that have been moved and might be in the ghost layer
+       * of a process have to be reported in the @p vertex_locally_moved
+       * argument. This ensures that that part of the information that has to
+       * be send between processes is actually sent. Additionally, it is quite
+       * important that vertices on the boundary between processes are
+       * reported on exactly one process (e.g. the one with the highest id).
+       * Otherwise we could expect undesirable results if multiple processes
+       * move a vertex differently. A typical strategy is to let processor $i$
+       * move those vertices that are adjacent to cells whose owners include
+       * processor $i$ but no other processor $j$ with $j<i$; in other words,
+       * for vertices at the boundary of a subdomain, the processor with the
+       * lowest subdomain id "owns" a vertex.
+       *
+       * @note It only makes sense to move vertices that are either located on
+       * locally owned cells or on cells in the ghost layer. This is because
+       * you can be sure that these vertices indeed exist on the finest mesh
+       * aggregated over all processors, whereas vertices on artificial cells
+       * but not at least in the ghost layer may or may not exist on the
+       * globally finest mesh. Consequently, the @p vertex_locally_moved
+       * argument may not contain vertices that aren't at least on ghost
+       * cells.
+       *
+       * @see This function is used, for example, in
+       * GridTools::distort_random().
+       */
+      void
+      communicate_locally_moved_vertices (const std::vector<bool> &vertex_locally_moved);
+
+      /**
+       * Dummy arrays. This class isn't usable but the compiler wants to see
+       * these variables at a couple places anyway.
+       */
+      std::vector<types::global_dof_index> coarse_cell_to_p4est_tree_permutation;
+      std::vector<types::global_dof_index> p4est_tree_to_coarse_cell_permutation;
+
+      /**
+       * dummy settings
+       */
+      enum Settings
+      {
+        default_setting = 0x0,
+        mesh_reconstruction_after_repartitioning = 0x1,
+        construct_multigrid_hierarchy = 0x2
+      };
+
+
+//TODO: The following variable should really be private, but it is used in dof_handler_policy.cc ...
+      /**
+       * dummy settings object
+       */
+      Settings settings;
+
+      /**
+       * Like above, this method, which is only implemented for dim = 2 or 3,
+       * needs a stub because it is used in dof_handler_policy.cc
+       */
+      void
+      fill_vertices_with_ghost_neighbors
+      (std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+       &vertices_with_ghost_neighbors);
+
+      /**
+       * Like above, this method, which is only implemented for dim = 2 or 3,
+       * needs a stub because it is used in dof_handler_policy.cc
+       */
+      void
+      fill_level_vertices_with_ghost_neighbors
+      (const unsigned int level,
+       std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+       &vertices_with_ghost_neighbors);
+
+      /**
+       * Like above, this method, which is only implemented for dim = 2 or 3,
+       * needs a stub because it is used in dof_handler_policy.cc
+       */
+      std::vector<bool>
+      mark_locally_active_vertices_on_level(const unsigned int level) const;
+
+    };
+  }
+}
+
+
+#else // DEAL_II_WITH_P4EST
+
+namespace parallel
+{
+  namespace distributed
+  {
+    /**
+     * Dummy class the compiler chooses for parallel distributed
+     * triangulations if we didn't actually configure deal.II with the p4est
+     * library. The existence of this class allows us to refer to
+     * parallel::distributed::Triangulation objects throughout the library
+     * even if it is disabled.
+     *
+     * Since the constructor of this class is private, no such objects can
+     * actually be created if we don't have p4est available.
+     */
+    template <int dim, int spacedim = dim>
+    class Triangulation : public dealii::parallel::Triangulation<dim,spacedim>
+    {
+    private:
+      /**
+       * Constructor.
+       */
+      Triangulation ();
+
+    };
+  }
+}
+
+
+#endif
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/distributed/tria_base.h b/include/deal.II/distributed/tria_base.h
new file mode 100644
index 0000000..6dd25d0
--- /dev/null
+++ b/include/deal.II/distributed/tria_base.h
@@ -0,0 +1,225 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__distributed__tria_base_h
+#define dealii__distributed__tria_base_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/mpi.h>
+#include <deal.II/grid/tria.h>
+
+#include <deal.II/base/std_cxx1x/function.h>
+#include <deal.II/base/std_cxx1x/tuple.h>
+
+#include <set>
+#include <vector>
+#include <list>
+#include <utility>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int> class Triangulation;
+
+
+namespace parallel
+{
+  /**
+   * This class describes the interface for all triangulation classes that
+   * work in parallel, namely parallel::distributed::Triangulation and
+   * parallel::shared::Triangulation.
+   */
+  template <int dim, int spacedim = dim>
+  class Triangulation : public dealii::Triangulation<dim,spacedim>
+  {
+  public:
+
+    /**
+     * Constructor.
+     */
+    Triangulation (MPI_Comm mpi_communicator,
+                   const typename dealii::Triangulation<dim,spacedim>::MeshSmoothing smooth_grid = (dealii::Triangulation<dim,spacedim>::none),
+                   const bool check_for_distorted_cells = false);
+
+    /**
+     * Destructor.
+     */
+    virtual ~Triangulation ();
+
+    /**
+     * Return MPI communicator used by this triangulation.
+     */
+    virtual MPI_Comm get_communicator () const;
+
+    /**
+     * Implementation of the same function as in the base class.
+     */
+    virtual void copy_triangulation (const dealii::Triangulation<dim, spacedim> &old_tria);
+
+    /**
+     * Return the number of active cells owned by each of the MPI processes
+     * that contribute to this triangulation. The element of this vector
+     * indexed by locally_owned_subdomain() equals the result of
+     * n_locally_owned_active_cells().
+     */
+    const std::vector<unsigned int> &
+    n_locally_owned_active_cells_per_processor () const;
+
+
+    /**
+     * Return the number of active cells in the triangulation that are locally
+     * owned, i.e. that have a subdomain_id equal to
+     * locally_owned_subdomain(). Note that there may be more active cells in
+     * the triangulation stored on the present processor, such as for example
+     * ghost cells, or cells further away from the locally owned block of
+     * cells but that are needed to ensure that the triangulation that stores
+     * this processor's set of active cells still remains balanced with
+     * respect to the 2:1 size ratio of adjacent cells.
+     *
+     * As a consequence of the remark above, the result of this function is
+     * always smaller or equal to the result of the function with the same
+     * name in the ::Triangulation base class, which includes the active ghost
+     * and artificial cells (see also
+     * @ref GlossArtificialCell
+     * and
+     * @ref GlossGhostCell).
+     */
+    unsigned int n_locally_owned_active_cells () const;
+
+    /**
+     * Return the sum over all processors of the number of active cells owned
+     * by each processor. This equals the overall number of active cells in
+     * the triangulation.
+     */
+    virtual types::global_dof_index n_global_active_cells () const;
+
+    /**
+     * Return the local memory consumption in bytes.
+     */
+    virtual std::size_t memory_consumption () const;
+
+
+    /**
+     * Returns the global maximum level. This may be bigger than the number
+     * dealii::Triangulation::n_levels() (a function in this class's base
+     * class) returns if the current processor only stores cells in parts of
+     * the domain that are not very refined, but if other processors store
+     * cells in more deeply refined parts of the domain.
+     */
+    virtual unsigned int n_global_levels () const;
+
+    /**
+     * Return the subdomain id of those cells that are owned by the current
+     * processor. All cells in the triangulation that do not have this
+     * subdomain id are either owned by another processor or have children
+     * that only exist on other processors.
+     */
+    types::subdomain_id locally_owned_subdomain () const;
+
+    /**
+     * Returns a set of MPI ranks of the processors that have at least one
+     * ghost cell adjacent to the cells of the local processor. In other
+     * words, this is the set of subdomain_id() for all ghost cells.
+     *
+     * @note: If @p i is contained in the list of processor @p j, then @p j
+     * will also be contained in the list of processor @p i.
+     */
+    const std::set<unsigned int> &ghost_owners () const;
+
+    /**
+     * Returns a set of MPI ranks of the processors that have at least one
+     * level ghost cell adjacent to our cells used in geometric multigrid. In
+     * other words, this is the set of level_subdomain_id() for all level
+     * ghost cells.
+     *
+     * @note: If @p i is contained in the list of processor @p j, then @p j
+     * will also be contained in the list of processor @p i.
+     */
+    const std::set<unsigned int> &level_ghost_owners () const;
+
+  protected:
+    /**
+     * MPI communicator to be used for the triangulation. We create a unique
+     * communicator for this class, which is a duplicate of the one passed to
+     * the constructor.
+     */
+    MPI_Comm mpi_communicator;
+
+    /**
+     * The subdomain id to be used for the current processor. This is the MPI
+     * rank.
+     */
+    types::subdomain_id my_subdomain;
+
+    /**
+     * The total number of subdomains (or the size of the MPI communicator).
+     */
+    types::subdomain_id n_subdomains;
+
+    /**
+     * A structure that contains information about the distributed
+     * triangulation.
+     */
+    struct NumberCache
+    {
+      /**
+       * This vector stores the number of locally owned active cells per MPI
+       * rank.
+       */
+      std::vector<unsigned int> n_locally_owned_active_cells;
+      /**
+       * The total number of active cells (sum of @p
+       * n_locally_owned_active_cells).
+       */
+      types::global_dof_index   n_global_active_cells;
+      /**
+       * The global number of levels computed as the maximum number of levels
+       * taken over all MPI ranks, so <tt>n_levels()<=n_global_levels =
+       * max(n_levels() on proc i)</tt>.
+       */
+      unsigned int              n_global_levels;
+      /**
+       * A set containing the subdomain_id (MPI rank) of the owners of the
+       * ghost cells on this processor.
+       */
+      std::set<unsigned int> ghost_owners;
+      /**
+       * A set containing the MPI ranks of the owners of the level ghost cells
+       * on this processor (for all levels).
+       */
+      std::set<unsigned int> level_ghost_owners;
+
+      NumberCache();
+    };
+
+    NumberCache number_cache;
+
+    /**
+     * Update the number_cache variable after mesh creation or refinement.
+     */
+    virtual void update_number_cache ();
+
+
+  };
+
+} // namespace parallel
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/block_info.h b/include/deal.II/dofs/block_info.h
new file mode 100644
index 0000000..c079c4a
--- /dev/null
+++ b/include/deal.II/dofs/block_info.h
@@ -0,0 +1,316 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_info_h
+#define dealii__block_info_h
+
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/block_indices.h>
+
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+// Forward declarations
+
+template <int dim, int spacedim> class DoFHandler;
+namespace hp
+{
+  template <int dim, int spacedim> class DoFHandler;
+}
+
+
+/**
+ * @brief A small class collecting the different BlockIndices involved in
+ * global, multilevel and local computations.
+ *
+ * Once a DoFHandler has been initialized with an FESystem, a data object of
+ * type BlockInfo (accessed by DoFHandler::block_info() ) is filled, which
+ * reflects the block structure of the degrees of freedom.
+ *
+ * BlockInfo consists of several BlockIndices objects. The member global()
+ * reflects the block structure of the system on the active cell level,
+ * usually referred to as the global system. As soon as
+ * DoFHandler::distribute_dofs() has been called, the function
+ * BlockIndices::block_size() in global() will return the correct sizes of
+ * each block. After DoFRenumbering::block_wise(), BlockIndices::block_start()
+ * will return the start index for each of the blocks.
+ *
+ * When a DoFHandler with levels is used, the same structure is automatically
+ * generated for each level. The level blocks can be accessed through level().
+ *
+ * Finally, there are local() BlockIndices, which describe the block structure
+ * on a single cell. This is used for instance by
+ * MeshWorker::Assembler::MatrixLocalBlocksToGlobalBlocks. The local indices
+ * are not filled automatically, since they change the behavior of the
+ * MeshWorker::Assembler classes relying on BlockInfo. They must be
+ * initialized by hand through initialize_local().
+ *
+ * <h3>Usage</h3>
+ *
+ * The most common usage for this object is initializing vectors as in the
+ * following code:
+ *
+ * @code
+ * DoFHandler<dim> dof_handler(triangulation);
+ * dof_handler.distribute_dofs(fe_system);
+ * dof_handler.distribute_mg_dofs(fe_system);
+ * DoFRenumbering::block_wise(dof_handler);
+ *
+ * BlockVector<double> solution(dof_handler.block_info().global());
+ *
+ * MGLevelObject<BlockVector<double> > mg_vector(0, triangulation.n_levels()-1);
+ * for (unsigned int i = 0; i < triangulation.n_levels(); ++i)
+ *   {
+ *     mg_vector[i].reinit(dof_handler.block_info().level(i));
+ *   }
+ * @endcode
+ *
+ * In this example, <tt>solution</tt> obtains the block structure needed to
+ * represent a finite element function on the DoFHandler. Similarly, all
+ * levels of <tt>mg_vector</tt> will have the block structure needed on that
+ * level.
+ *
+ * @todo Extend the functions local() and renumber() to the concept to
+ * hpDoFHandler.
+ *
+ * @ingroup dofs
+ * @author Guido Kanschat, 2009
+ */
+class BlockInfo : public Subscriptor
+{
+public:
+  /**
+   * @brief Fill the object with values describing block structure of the
+   * DoFHandler.
+   *
+   * By default, this function will attempt to initialize whatever is
+   * possible. If active dofs have been assigned int the DoFHandler argument,
+   * they BlockIndices for those will be generated. The same for level dofs.
+   *
+   * This default behavior can be overridden by the two parameters, which can
+   * switch off active dofs or level dofs.
+   *
+   * This function will also clear the local() indices.
+   */
+  template <int dim, int spacedim>
+  void initialize(const DoFHandler<dim, spacedim> &, bool levels_only = false, bool active_only = false);
+
+  /**
+   * @brief Initialize block structure on cells and compute renumbering
+   * between cell dofs and block cell dofs.
+   */
+  template <int dim, int spacedim>
+  void initialize_local(const DoFHandler<dim, spacedim> &);
+
+  /**
+   * Access the BlockIndices structure of the global system.
+   */
+  const BlockIndices &global() const;
+
+  /**
+   * Access BlockIndices for the local system on a cell.
+   */
+  const BlockIndices &local() const;
+
+  /**
+   * Access the BlockIndices structure of a level in the multilevel hierarchy.
+   */
+  const BlockIndices &level(unsigned int level) const;
+
+  /**
+   * Return the index after local renumbering.
+   *
+   * The input of this function is an index between zero and the number of
+   * dofs per cell, numbered in local block ordering, that is first all
+   * indices of the first system block, then all of the second block and so
+   * forth. The function then outputs the index in the standard local
+   * numbering of DoFAccessor.
+   */
+  types::global_dof_index renumber (const unsigned int i) const;
+
+  /**
+   * The number of base elements.
+   */
+  unsigned int n_base_elements() const;
+
+  /**
+   * Return the base element of this index.
+   */
+  unsigned int base_element (const unsigned int i) const;
+
+  /**
+   * Write a summary of the block structure to the stream.
+   */
+  template <class OS>
+  void
+  print(OS &stream) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Read or write the data of this object to or from a stream for the purpose
+   * of serialization
+   */
+  template <class Archive>
+  void serialize (Archive &ar,
+                  const unsigned int /*version*/);
+
+private:
+  /**
+   * @brief The block structure of the global system.
+   */
+  BlockIndices bi_global;
+  /**
+   * @brief The multilevel block structure.
+   */
+  std::vector<BlockIndices> levels;
+
+  /**
+   * @brief The block structure of the cell systems.
+   */
+  BlockIndices bi_local;
+
+  /**
+   * The base element associated with each block.
+   */
+  std::vector<unsigned int> base_elements;
+
+  /**
+   * A vector containing the renumbering from the standard order of degrees of
+   * freedom on a cell to a component wise ordering. Filled by initialize().
+   */
+  std::vector<types::global_dof_index> local_renumbering;
+};
+
+
+
+//----------------------------------------------------------------------//
+
+inline
+const BlockIndices &
+BlockInfo::global() const
+{
+  return bi_global;
+}
+
+
+inline
+const BlockIndices &
+BlockInfo::local() const
+{
+  return bi_local;
+}
+
+
+inline
+const BlockIndices &
+BlockInfo::level (const unsigned int l) const
+{
+  AssertIndexRange(l, levels.size());
+  return levels[l];
+}
+
+
+inline
+types::global_dof_index BlockInfo::renumber (const unsigned int i) const
+{
+  AssertIndexRange(i, static_cast<unsigned int>(local_renumbering.size()));
+  return local_renumbering[i];
+}
+
+
+inline
+unsigned int
+BlockInfo::base_element (const unsigned int i) const
+{
+  AssertIndexRange(i, base_elements.size());
+
+  return base_elements[i];
+}
+
+
+inline
+unsigned int
+BlockInfo::n_base_elements() const
+{
+  return base_elements.size();
+}
+
+
+
+template <class OS>
+inline
+void
+BlockInfo::print (OS &os) const
+{
+  os << "global   dofs " << std::setw(5) << global().total_size() << " blocks";
+  for (unsigned int i=0; i<global().size(); ++i)
+    os << ' ' << std::setw(5) << global().block_size(i);
+  os << std::endl;
+
+  if (local().size() == 0)
+    {
+      os << "local dofs not initialized" << std::endl;
+    }
+  else
+    {
+      os << "local    dofs " << std::setw(5) << local().total_size() << " blocks";
+      for (unsigned int i=0; i<local().size(); ++i)
+        os << ' '  << std::setw(5) << local().block_size(i);
+      os << std::endl;
+    }
+
+  for (unsigned int l=0; l<levels.size(); ++l)
+    {
+      os << "level " << std::setw(2) << l << " dofs " << std::setw(5) << level(l).total_size() << " blocks";
+      for (unsigned int i=0; i<level(l).size(); ++i)
+        os << ' '  << std::setw(5) << level(l).block_size(i);
+      os << std::endl;
+    }
+}
+
+
+inline
+std::size_t
+BlockInfo::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (bi_global) +
+          MemoryConsumption::memory_consumption (levels) +
+          MemoryConsumption::memory_consumption (bi_local) +
+          MemoryConsumption::memory_consumption (base_elements));
+}
+
+
+template <class Archive>
+void BlockInfo::serialize (Archive &ar,
+                           const unsigned int /*version*/)
+{
+  ar &bi_global;
+  ar &levels;
+  ar &bi_local;
+  ar &base_elements;
+  ar &local_renumbering;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/dof_accessor.h b/include/deal.II/dofs/dof_accessor.h
new file mode 100644
index 0000000..1f3f7fa
--- /dev/null
+++ b/include/deal.II/dofs/dof_accessor.h
@@ -0,0 +1,1708 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_accessor_h
+#define dealii__dof_accessor_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/hp/dof_handler.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class FullMatrix;
+template <typename number> class SparseMatrix;
+template <typename number> class Vector;
+class ConstraintMatrix;
+
+template <typename Accessor> class TriaRawIterator;
+
+template <int, int> class FiniteElement;
+
+
+namespace internal
+{
+  namespace DoFCellAccessor
+  {
+    struct Implementation;
+  }
+
+  namespace DoFHandler
+  {
+    struct Implementation;
+    namespace Policy
+    {
+      struct Implementation;
+    }
+  }
+
+  namespace hp
+  {
+    namespace DoFHandler
+    {
+      struct Implementation;
+    }
+  }
+}
+
+// note: the file dof_accessor.templates.h is included at the end of
+// this file.  this includes a lot of templates and thus makes
+// compilation slower, but at the same time allows for more aggressive
+// inlining and thus faster code.
+
+
+namespace internal
+{
+  namespace DoFAccessor
+  {
+    /**
+     * This is a switch class which only declares a @p typedef. It is meant to
+     * determine which class a DoFAccessor class is to be derived from. By
+     * default, <tt>DoFAccessor@<structdim,dim,spacedim@></tt> derives from
+     * the typedef in the general
+     * <tt>Inheritance@<structdim,dim,spacedim@></tt> class, which is
+     * <tt>TriaAccessor@<structdim,dim,spacedim@></tt>, but if
+     * <tt>structdim==dim</tt>, then the specialization
+     * <tt>Inheritance@<dim,dim,spacedim@></tt> is used which declares its
+     * local type to be <tt>CellAccessor@<dim,spacedim@></tt>. Therefore, the
+     * inheritance is automatically chosen to be from CellAccessor if the
+     * object under consideration has full dimension, i.e. constitutes a cell.
+     *
+     * @ingroup dofs
+     * @ingroup Accessors
+     * @author Wolfgang Bangerth, 1999
+     */
+    template <int structdim, int dim, int spacedim>
+    struct Inheritance
+    {
+      /**
+       * Declaration of the @p typedef.  See the full documentation for more
+       * information.
+       */
+      typedef dealii::TriaAccessor<structdim,dim,spacedim> BaseClass;
+    };
+
+
+    /**
+     * This is the specialization of the general template used for the case
+     * where an object has full dimension, i.e. is a cell. See the general
+     * template for more details.
+     */
+    template <int dim, int spacedim>
+    struct Inheritance<dim,dim,spacedim>
+    {
+      /**
+       * Declaration of the @p typedef.  See the full documentation for more
+       * information.
+       */
+      typedef dealii::CellAccessor<dim,spacedim> BaseClass;
+    };
+  }
+}
+
+
+/* -------------------------------------------------------------------------- */
+
+
+
+/**
+ * A class that gives access to the degrees of freedom stored in a DoFHandler
+ * or hp::DoFHandler object. Accessors are used to access the data that
+ * pertains to edges, faces, and cells of a triangulation. The concept is
+ * explained in more detail in connection to
+ * @ref Iterators.
+ *
+ * This class follows mainly the route laid out by the accessor library
+ * declared in the triangulation library (TriaAccessor). It enables the user
+ * to access the degrees of freedom on lines, quads, or hexes. The first
+ * template argument of this class determines the dimensionality of the object
+ * under consideration: 1 for lines, 2 for quads, and 3 for hexes. The second
+ * argument denotes the type of DoF handler we should work on. It can either
+ * be ::DoFHandler or hp::DoFHandler.  From the second template argument we
+ * also deduce the dimension of the Triangulation this object refers to as
+ * well as the dimension of the space into which it is embedded. Finally, the
+ * template argument <code>level_dof_access</code> governs the behavior of the
+ * function get_active_or_mg_dof_indices(). See the section on Generic loops
+ * below.
+ *
+ * <h3>Typedefs</h3>
+ *
+ * Usage is best to happen through the typedefs to the various kinds of
+ * iterators provided by the DoFHandler and hp::DoFHandler classes, since they
+ * are more secure to changes in the class naming and template interface as
+ * well as providing easier typing (much less complicated names!).
+ *
+ * <h3>Generic loops and the third template argument</h3>
+ *
+ * Many loops look very similar, whether they operate on the active dofs of
+ * the active cells of the Triangulation or on the level dofs of a single
+ * level or the whole grid hierarchy. In order to use polymorphism in such
+ * loops, they access degrees of freedom through the function
+ * get_active_or_mg_dof_indices(), which changes behavior according to the
+ * third template argument.  If the argument is false, then the active dofs of
+ * active cells are accessed. If it is true, the level dofs are used.
+ * DoFHandler has functions, for instance begin() and begin_mg(), which return
+ * either type or the other. Additionally, they can be cast into each other,
+ * in case this is needed, since they access the same data.
+ *
+ * It is highly recommended to use the function get_active_or_mg_dof_indices()
+ * in generic loops in lieu of get_dof_indices() or get_mg_dof_indices().
+ *
+ * <h3>Inheritance</h3>
+ *
+ * If the structural dimension given by the first template argument equals the
+ * dimension of the DoFHandler (given as the second template argument), then
+ * we are obviously dealing with cells, rather than lower-dimensional objects.
+ * In that case, inheritance is from CellAccessor, to provide access to all
+ * the cell specific information afforded by that class. Otherwise, i.e. for
+ * lower-dimensional objects, inheritance is from TriaAccessor.
+ *
+ * There is a DoFCellAccessor class that provides the equivalent to the
+ * CellAccessor class.
+ *
+ * @ingroup dofs
+ * @ingroup Accessors
+ * @author Wolfgang Bangerth, 1998, 2006, 2008, Timo Heister, Guido Kanschat,
+ * 2012, 2013
+ */
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+class DoFAccessor : public dealii::internal::DoFAccessor::Inheritance<structdim, DoFHandlerType::dimension, DoFHandlerType::space_dimension>::BaseClass
+{
+public:
+
+  /**
+   * A static variable that allows users of this class to discover the value
+   * of the second template argument.
+   */
+  static const unsigned int dimension=DoFHandlerType::dimension;
+
+  /**
+   * A static variable that allows users of this class to discover the value
+   * of the third template argument.
+   */
+  static const unsigned int space_dimension=DoFHandlerType::space_dimension;
+
+  /**
+   * Declare a typedef to the base class to make accessing some of the
+   * exception classes simpler.
+   */
+  typedef
+  typename dealii::internal::DoFAccessor::Inheritance<structdim, dimension, space_dimension>::BaseClass
+  BaseClass;
+
+  /**
+   * Data type passed by the iterator class.
+   */
+  typedef DoFHandlerType AccessorData;
+
+  /**
+   * @name Constructors
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Default constructor. Provides an accessor that can't be used.
+   */
+  DoFAccessor ();
+
+  /**
+   * Constructor
+   */
+  DoFAccessor (const Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *tria,
+               const int             level,
+               const int             index,
+               const DoFHandlerType *local_data);
+
+  /**
+   * Conversion constructor. This constructor exists to make certain
+   * constructs simpler to write in dimension independent code. For example,
+   * it allows assigning a face iterator to a line iterator, an operation that
+   * is useful in 2d but doesn't make any sense in 3d. The constructor here
+   * exists for the purpose of making the code conform to C++ but it will
+   * unconditionally abort; in other words, assigning a face iterator to a
+   * line iterator is better put into an if-statement that checks that the
+   * dimension is two, and assign to a quad iterator in 3d (an operator that,
+   * without this constructor would be illegal if we happen to compile for
+   * 2d).
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  DoFAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Another conversion operator between objects that don't make sense, just
+   * like the previous one.
+   */
+  template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+  DoFAccessor (const DoFAccessor<dim2, DoFHandlerType2, level_dof_access2> &);
+
+  /**
+   * Copy constructor allowing to switch level access and active access.
+   */
+  template <bool level_dof_access2>
+  DoFAccessor(const DoFAccessor<structdim, DoFHandlerType, level_dof_access2> &);
+  /**
+   * @}
+   */
+
+  /**
+   * Return a handle on the DoFHandler object which we are using.
+   */
+  const DoFHandlerType &
+  get_dof_handler () const;
+
+  /**
+   * Implement the copy operator needed for the iterator classes.
+   */
+  template <bool level_dof_access2>
+  void copy_from (const DoFAccessor<structdim, DoFHandlerType, level_dof_access2> &a);
+
+  /**
+   * Copy operator used by the iterator class. Keeps the previously set dof
+   * handler, but sets the object coordinates of the TriaAccessor.
+   */
+  void copy_from (const TriaAccessorBase<structdim, DoFHandlerType::dimension, DoFHandlerType::space_dimension> &da);
+
+  /**
+   * Tell the caller whether get_active_or_mg_dof_indices() accesses active or
+   * level dofs.
+   */
+  static bool is_level_cell();
+
+  /**
+   * @name Accessing sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return an iterator pointing to the @p c-th child.
+   */
+  TriaIterator<DoFAccessor<structdim,DoFHandlerType, level_dof_access> >
+  child (const unsigned int c) const;
+
+  /**
+   * Pointer to the @p ith line bounding this object. If the current object is
+   * a line itself, then the only valid index is @p i equals to zero, and the
+   * function returns an iterator to itself.
+   */
+  typename dealii::internal::DoFHandler::Iterators<DoFHandlerType, level_dof_access>::line_iterator
+  line (const unsigned int i) const;
+
+  /**
+   * Pointer to the @p ith quad bounding this object. If the current object is
+   * a quad itself, then the only valid index is @p i equals to zero, and the
+   * function returns an iterator to itself.
+   */
+  typename dealii::internal::DoFHandler::Iterators<DoFHandlerType, level_dof_access>::quad_iterator
+  quad (const unsigned int i) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing the DoF indices of this object
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the <i>global</i> indices of the degrees of freedom located on
+   * this object in the standard ordering defined by the finite element (i.e.,
+   * dofs on vertex 0, dofs on vertex 1, etc, dofs on line 0, dofs on line 1,
+   * etc, dofs on quad 0, etc.) This function is only available on
+   * <i>active</i> objects (see
+   * @ref GlossActive "this glossary entry").
+   *
+   * The cells needs to be an active cell (and not artificial in a parallel
+   * distributed computation).
+   *
+   * The vector has to have the right size before being passed to this
+   * function.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   *
+   * For cells, there is only a single possible finite element index (namely
+   * the one for that cell, returned by <code>cell-@>active_fe_index</code>.
+   * Consequently, the derived DoFCellAccessor class has an overloaded version
+   * of this function that calls the present function with
+   * <code>cell-@>active_fe_index</code> as last argument.
+   *
+   */
+  void get_dof_indices (std::vector<types::global_dof_index> &dof_indices,
+                        const unsigned int fe_index = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * Return the global multilevel indices of the degrees of freedom that live
+   * on the current object with respect to the given level within the
+   * multigrid hierarchy. The indices refer to the local numbering for the
+   * level this line lives on.
+   */
+  void get_mg_dof_indices (const int level,
+                           std::vector<types::global_dof_index> &dof_indices,
+                           const unsigned int fe_index = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * Sets the level DoF indices that are returned by get_mg_dof_indices.
+   */
+  void set_mg_dof_indices (const int level,
+                           const std::vector<types::global_dof_index> &dof_indices,
+                           const unsigned int fe_index = DoFHandlerType::default_fe_index);
+
+  /**
+   * Global DoF index of the <i>i</i> degree associated with the @p vertexth
+   * vertex of the present cell.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   */
+  types::global_dof_index vertex_dof_index
+  (const unsigned int vertex,
+   const unsigned int i,
+   const unsigned int fe_index = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * Returns the global DoF index of the <code>i</code>th degree of freedom
+   * associated with the <code>vertex</code>th vertex on level @p level. Also
+   * see vertex_dof_index().
+   */
+  types::global_dof_index mg_vertex_dof_index
+  (const int level,
+   const unsigned int vertex,
+   const unsigned int i,
+   const unsigned int fe_index = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * Index of the <i>i</i>th degree of freedom of this object.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   *
+   * @note While the get_dof_indices() function returns an array that contains
+   * the indices of all degrees of freedom that somehow live on this object
+   * (i.e. on the vertices, edges or interior of this object), the current
+   * dof_index() function only considers the DoFs that really belong to this
+   * particular object's interior. In other words, as an example, if the
+   * current object refers to a quad (a cell in 2d, a face in 3d) and the
+   * finite element associated with it is a bilinear one, then the
+   * get_dof_indices() will return an array of size 4 while dof_index() will
+   * produce an exception because no degrees are defined in the interior of
+   * the face.
+   */
+  types::global_dof_index dof_index
+  (const unsigned int i,
+   const unsigned int fe_index = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * Returns the dof_index on the given level. Also see dof_index.
+   */
+  types::global_dof_index mg_dof_index (const int level, const unsigned int i) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing the finite element associated with this object
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the number of finite elements that are active on a given object.
+   *
+   * For non-hp DoFHandler objects, the answer is of course always one.
+   * However, for hp::DoFHandler objects, this isn't the case: If this is a
+   * cell, the answer is of course one. If it is a face, the answer may be one
+   * or two, depending on whether the two adjacent cells use the same finite
+   * element or not. If it is an edge in 3d, the possible return value may be
+   * one or any other value larger than that.
+   */
+  unsigned int
+  n_active_fe_indices () const;
+
+  /**
+   * Return the @p n-th active fe index on this object. For cells and all non-
+   * hp objects, there is only a single active fe index, so the argument must
+   * be equal to zero. For lower-dimensional hp objects, there are
+   * n_active_fe_indices() active finite elements, and this function can be
+   * queried for their indices.
+   */
+  unsigned int
+  nth_active_fe_index (const unsigned int n) const;
+
+  /**
+   * Return true if the finite element with given index is active on the
+   * present object. For non-hp DoF accessors, this is of course the case only
+   * if @p fe_index equals zero. For cells, it is the case if @p fe_index
+   * equals active_fe_index() of this cell. For faces and other lower-
+   * dimensional objects, there may be more than one @p fe_index that are
+   * active on any given object (see n_active_fe_indices()).
+   */
+  bool
+  fe_index_is_active (const unsigned int fe_index) const;
+
+  /**
+   * Return a reference to the finite element used on this object with the
+   * given @p fe_index. @p fe_index must be used on this object, i.e.
+   * <code>fe_index_is_active(fe_index)</code> must return true.
+   */
+  const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &
+  get_fe (const unsigned int fe_index) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * Exceptions for child classes
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInvalidObject);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcVectorNotEmpty);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcVectorDoesNotMatch);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcMatrixDoesNotMatch);
+  /**
+   * A function has been called for a cell which should be
+   * @ref GlossActive "active",
+   * but is refined.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcNotActive);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCantCompareIterators);
+
+protected:
+
+  /**
+   * Store the address of the DoFHandler object to be accessed.
+   */
+  DoFHandlerType *dof_handler;
+public:
+  /**
+   * Compare for equality. Return <tt>true</tt> if the two accessors refer to
+   * the same object.
+   *
+   * The template parameters of this function allow for a comparison of very
+   * different objects. Therefore, some of them are disabled. Namely, if the
+   * dimension, or the dof handler of the two objects differ, an exception is
+   * generated. It can be expected that this is an unwanted comparison.
+   *
+   * The template parameter <tt>level_dof_access2</tt> is ignored, such that
+   * an iterator with level access can be equal to one with access to the
+   * active degrees of freedom.
+   */
+  template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+  bool operator == (const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &) const;
+
+  /**
+   * Compare for inequality. The boolean not of operator==().
+   */
+  template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+  bool operator != (const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &) const;
+protected:
+  /**
+   * Reset the DoF handler pointer.
+   */
+  void set_dof_handler (DoFHandlerType *dh);
+
+  /**
+   * Set the index of the <i>i</i>th degree of freedom of this object to @p
+   * index.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   */
+  void set_dof_index
+  (const unsigned int i,
+   const types::global_dof_index index,
+   const unsigned int fe_index = DoFHandlerType::default_fe_index) const;
+
+  void set_mg_dof_index (const int level, const unsigned int i, const types::global_dof_index index) const;
+
+  /**
+   * Set the global index of the <i>i</i> degree on the @p vertex-th vertex of
+   * the present cell to @p index.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   */
+  void set_vertex_dof_index
+  (const unsigned int            vertex,
+   const unsigned int            i,
+   const types::global_dof_index index,
+   const unsigned int            fe_index = DoFHandlerType::default_fe_index) const;
+
+  void set_mg_vertex_dof_index
+  (const int level,
+   const unsigned int vertex,
+   const unsigned int i,
+   const types::global_dof_index index,
+   const unsigned int fe_index = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * Iterator classes need to be friends because they need to access
+   * operator== and operator!=.
+   */
+  template <typename> friend class TriaRawIterator;
+  template <int, class, bool> friend class DoFAccessor;
+
+private:
+  /**
+   * Copy operator. This is normally used in a context like <tt>iterator a,b;
+   * *a=*b;</tt>. Presumably, the intent here is to copy the object pointed to
+   * by @p b to the object pointed to by @p a. However, the result of
+   * dereferencing an iterator is not an object but an accessor; consequently,
+   * this operation is not useful for iterators on triangulations. We declare
+   * this function here private, thus it may not be used from outside.
+   * Furthermore it is not implemented and will give a linker error if used
+   * anyway.
+   */
+  DoFAccessor<structdim,DoFHandlerType, level_dof_access> &
+  operator = (const DoFAccessor<structdim,DoFHandlerType, level_dof_access> &da);
+
+  /**
+   * Make the DoFHandler class a friend so that it can call the set_xxx()
+   * functions.
+   */
+  template <int dim, int spacedim> friend class DoFHandler;
+  template <int dim, int spacedim> friend class hp::DoFHandler;
+
+  friend struct dealii::internal::DoFHandler::Policy::Implementation;
+  friend struct dealii::internal::DoFHandler::Implementation;
+  friend struct dealii::internal::hp::DoFHandler::Implementation;
+  friend struct dealii::internal::DoFCellAccessor::Implementation;
+  friend struct dealii::internal::DoFAccessor::Implementation;
+};
+
+
+
+/**
+ * Specialization of the general DoFAccessor class template for the case of
+ * zero-dimensional objects (a vertex) that are the face of a one-dimensional
+ * cell in spacedim space dimensions. Since vertices function differently than
+ * general faces, this class does a few things differently than the general
+ * template, but the interface should look the same.
+ *
+ * @author Wolfgang Bangerth, 2010
+ */
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+class DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access> : public TriaAccessor<0,1,spacedim>
+{
+public:
+
+  /**
+   * A static variable that allows users of this class to discover the value
+   * of the second template argument.
+   */
+  static const unsigned int dimension=1;
+
+  /**
+   * A static variable that allows users of this class to discover the value
+   * of the third template argument.
+   */
+  static const unsigned int space_dimension=spacedim;
+
+  /**
+   * Declare a typedef to the base class to make accessing some of the
+   * exception classes simpler.
+   */
+  typedef TriaAccessor<0,1,spacedim> BaseClass;
+
+  /**
+   * Data type passed by the iterator class.
+   */
+  typedef DoFHandlerType<1,spacedim> AccessorData;
+
+  /**
+   * @name Constructors
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Default constructor. Provides an accessor that can't be used.
+   */
+  DoFAccessor ();
+
+  /**
+   * Constructor to be used if the object here refers to a vertex of a one-
+   * dimensional triangulation, i.e. a face of the triangulation.
+   *
+   * Since there is no mapping from vertices to cells, an accessor object for
+   * a point has no way to figure out whether it is at the boundary of the
+   * domain or not. Consequently, the second argument must be passed by the
+   * object that generates this accessor -- e.g. a 1d cell that can figure out
+   * whether its left or right vertex are at the boundary.
+   *
+   * The third argument is the global index of the vertex we point to.
+   *
+   * The fourth argument is a pointer to the DoFHandler object.
+   *
+   * This iterator can only be called for one-dimensional triangulations.
+   */
+  DoFAccessor (const Triangulation<1,spacedim>                       *tria,
+               const typename TriaAccessor<0,1,spacedim>::VertexKind  vertex_kind,
+               const unsigned int                                     vertex_index,
+               const DoFHandlerType<1,spacedim>                      *dof_handler);
+
+  /**
+   * Constructor. This constructor exists in order to maintain interface
+   * compatibility with the other accessor classes. However, it doesn't do
+   * anything useful here and so may not actually be called.
+   */
+  DoFAccessor (const Triangulation<1,spacedim> *,
+               const                             int         = 0,
+               const                             int         = 0,
+               const DoFHandlerType<1,spacedim> *dof_handler = 0);
+
+  /**
+   * Conversion constructor. This constructor exists to make certain
+   * constructs simpler to write in dimension independent code. For example,
+   * it allows assigning a face iterator to a line iterator, an operation that
+   * is useful in 2d but doesn't make any sense in 3d. The constructor here
+   * exists for the purpose of making the code conform to C++ but it will
+   * unconditionally abort; in other words, assigning a face iterator to a
+   * line iterator is better put into an if-statement that checks that the
+   * dimension is two, and assign to a quad iterator in 3d (an operator that,
+   * without this constructor would be illegal if we happen to compile for
+   * 2d).
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  DoFAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Another conversion operator between objects that don't make sense, just
+   * like the previous one.
+   */
+  template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+  DoFAccessor (const DoFAccessor<dim2, DoFHandlerType2, level_dof_access2> &);
+
+  /**
+   * @}
+   */
+
+  /**
+   * Return a handle on the DoFHandler object which we are using.
+   */
+  const DoFHandlerType<1,spacedim> &
+  get_dof_handler () const;
+
+  /**
+   * Copy operator.
+   */
+  DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access> &
+  operator = (const DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access> &da);
+
+  /**
+   * Implement the copy operator needed for the iterator classes.
+   */
+  template <bool level_dof_access2>
+  void copy_from (const DoFAccessor<0, DoFHandlerType<1,spacedim>, level_dof_access2> &a);
+
+  /**
+   * Copy operator used by the iterator class. Keeps the previously set dof
+   * handler, but sets the object coordinates of the TriaAccessor.
+   */
+  void copy_from (const TriaAccessorBase<0, 1, spacedim> &da);
+
+  /**
+   * @name Accessing sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return an invalid iterator of a type that represents pointing to a child
+   * of the current object. The object is invalid because points (as
+   * represented by the current class) do not have children.
+   */
+  TriaIterator<DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access > >
+  child (const unsigned int c) const;
+
+  /**
+   * Pointer to the @p ith line bounding this object. If the current object is
+   * a line itself, then the only valid index is @p i equals to zero, and the
+   * function returns an iterator to itself.
+   */
+  typename dealii::internal::DoFHandler::Iterators<DoFHandlerType<1,spacedim>, level_dof_access>::line_iterator
+  line (const unsigned int i) const;
+
+  /**
+   * Pointer to the @p ith quad bounding this object. If the current object is
+   * a quad itself, then the only valid index is @p i equals to zero, and the
+   * function returns an iterator to itself.
+   */
+  typename dealii::internal::DoFHandler::Iterators<DoFHandlerType<1,spacedim>, level_dof_access>::quad_iterator
+  quad (const unsigned int i) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing the DoF indices of this object
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the <i>global</i> indices of the degrees of freedom located on
+   * this object in the standard ordering defined by the finite element (i.e.,
+   * dofs on vertex 0, dofs on vertex 1, etc, dofs on line 0, dofs on line 1,
+   * etc, dofs on quad 0, etc.) This function is only available on
+   * <i>active</i> objects (see
+   * @ref GlossActive "this glossary entry").
+   *
+   * The cells needs to be an active cell (and not artificial in a parallel
+   * distributed computation).
+   *
+   * The vector has to have the right size before being passed to this
+   * function.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   *
+   * For cells, there is only a single possible finite element index (namely
+   * the one for that cell, returned by <code>cell-@>active_fe_index</code>.
+   * Consequently, the derived DoFCellAccessor class has an overloaded version
+   * of this function that calls the present function with
+   * <code>cell-@>active_fe_index</code> as last argument.
+   */
+  void get_dof_indices (std::vector<types::global_dof_index> &dof_indices,
+                        const unsigned int fe_index = AccessorData::default_fe_index) const;
+
+  /**
+   * Global DoF index of the <i>i</i> degree associated with the @p vertexth
+   * vertex of the present cell.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   */
+  types::global_dof_index vertex_dof_index (const unsigned int vertex,
+                                            const unsigned int i,
+                                            const unsigned int fe_index = AccessorData::default_fe_index) const;
+
+  /**
+   * Index of the <i>i</i>th degree of freedom of this object.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   *
+   * @note While the get_dof_indices() function returns an array that contains
+   * the indices of all degrees of freedom that somehow live on this object
+   * (i.e. on the vertices, edges or interior of this object), the current
+   * dof_index() function only considers the DoFs that really belong to this
+   * particular object's interior. In other words, as an example, if the
+   * current object refers to a quad (a cell in 2d, a face in 3d) and the
+   * finite element associated with it is a bilinear one, then the
+   * get_dof_indices() will return an array of size 4 while dof_index() will
+   * produce an exception because no degrees are defined in the interior of
+   * the face.
+   */
+  types::global_dof_index dof_index (const unsigned int i,
+                                     const unsigned int fe_index = AccessorData::default_fe_index) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing the finite element associated with this object
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the number of finite elements that are active on a given object.
+   *
+   * For non-hp DoFHandler objects, the answer is of course always one.
+   * However, for hp::DoFHandler objects, this isn't the case: If this is a
+   * cell, the answer is of course one. If it is a face, the answer may be one
+   * or two, depending on whether the two adjacent cells use the same finite
+   * element or not. If it is an edge in 3d, the possible return value may be
+   * one or any other value larger than that.
+   */
+  unsigned int
+  n_active_fe_indices () const;
+
+  /**
+   * Return the @p n-th active fe index on this object. For cells and all non-
+   * hp objects, there is only a single active fe index, so the argument must
+   * be equal to zero. For lower-dimensional hp objects, there are
+   * n_active_fe_indices() active finite elements, and this function can be
+   * queried for their indices.
+   */
+  unsigned int
+  nth_active_fe_index (const unsigned int n) const;
+
+  /**
+   * Return true if the finite element with given index is active on the
+   * present object. For non-hp DoF accessors, this is of course the case only
+   * if @p fe_index equals zero. For cells, it is the case if @p fe_index
+   * equals active_fe_index() of this cell. For faces and other lower-
+   * dimensional objects, there may be more than one @p fe_index that are
+   * active on any given object (see n_active_fe_indices()).
+   */
+  bool
+  fe_index_is_active (const unsigned int fe_index) const;
+
+  /**
+   * Return a reference to the finite element used on this object with the
+   * given @p fe_index. @p fe_index must be used on this object, i.e.
+   * <code>fe_index_is_active(fe_index)</code> must return true.
+   */
+  const FiniteElement<DoFHandlerType<1,spacedim>::dimension,DoFHandlerType<1,spacedim>::space_dimension> &
+  get_fe (const unsigned int fe_index) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * Exceptions for child classes
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInvalidObject);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcVectorNotEmpty);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcVectorDoesNotMatch);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcMatrixDoesNotMatch);
+  /**
+   * A function has been called for a cell which should be
+   * @ref GlossActive "active",
+   * but is refined.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcNotActive);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCantCompareIterators);
+
+protected:
+
+  /**
+   * Store the address of the DoFHandler object to be accessed.
+   */
+  DoFHandlerType<1,spacedim> *dof_handler;
+
+  /**
+   * Compare for equality.
+   */
+  template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+  bool operator == (const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &) const;
+
+  /**
+   * Compare for inequality.
+   */
+  template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+  bool operator != (const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &) const;
+
+  /**
+   * Reset the DoF handler pointer.
+   */
+  void set_dof_handler (DoFHandlerType<1,spacedim> *dh);
+
+  /**
+   * Set the index of the <i>i</i>th degree of freedom of this object to @p
+   * index.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   */
+  void set_dof_index (const unsigned int i,
+                      const types::global_dof_index index,
+                      const unsigned int fe_index = AccessorData::default_fe_index) const;
+
+  /**
+   * Set the global index of the <i>i</i> degree on the @p vertex-th vertex of
+   * the present cell to @p index.
+   *
+   * The last argument denotes the finite element index. For the standard
+   * ::DoFHandler class, this value must be equal to its default value since
+   * that class only supports the same finite element on all cells anyway.
+   *
+   * However, for hp objects (i.e. the hp::DoFHandler class), different finite
+   * element objects may be used on different cells. On faces between two
+   * cells, as well as vertices, there may therefore be two sets of degrees of
+   * freedom, one for each of the finite elements used on the adjacent cells.
+   * In order to specify which set of degrees of freedom to work on, the last
+   * argument is used to disambiguate. Finally, if this function is called for
+   * a cell object, there can only be a single set of degrees of freedom, and
+   * fe_index has to match the result of active_fe_index().
+   */
+  void set_vertex_dof_index (const unsigned int vertex,
+                             const unsigned int i,
+                             const types::global_dof_index index,
+                             const unsigned int fe_index = AccessorData::default_fe_index) const;
+
+  /**
+   * Iterator classes need to be friends because they need to access
+   * operator== and operator!=.
+   */
+  template <typename> friend class TriaRawIterator;
+
+
+  /**
+   * Make the DoFHandler class a friend so that it can call the set_xxx()
+   * functions.
+   */
+  template <int, int> friend class DoFHandler;
+  template <int, int> friend class hp::DoFHandler;
+
+  friend struct dealii::internal::DoFHandler::Policy::Implementation;
+  friend struct dealii::internal::DoFHandler::Implementation;
+  friend struct dealii::internal::hp::DoFHandler::Implementation;
+  friend struct dealii::internal::DoFCellAccessor::Implementation;
+};
+
+
+/* -------------------------------------------------------------------------- */
+
+
+/**
+ * Grant access to the degrees of freedom on a cell.
+ *
+ * Note that since for the class we derive from, i.e.
+ * <tt>DoFAccessor<dim></tt>, the two template parameters are equal, the base
+ * class is actually derived from CellAccessor, which makes the functions of
+ * this class available to the DoFCellAccessor class as well.
+ *
+ * @ingroup dofs
+ * @ingroup Accessors
+ * @author Wolfgang Bangerth, 1998, Timo Heister, Guido Kanschat, 2012
+ */
+template <typename DoFHandlerType, bool level_dof_access>
+class DoFCellAccessor :  public DoFAccessor<DoFHandlerType::dimension,DoFHandlerType, level_dof_access>
+{
+public:
+  /**
+   * Extract dimension from DoFHandlerType.
+   */
+  static const unsigned int dim = DoFHandlerType::dimension;
+
+  /**
+   * Extract space dimension from DoFHandlerType.
+   */
+  static const unsigned int spacedim = DoFHandlerType::space_dimension;
+
+
+  /**
+   * Data type passed by the iterator class.
+   */
+  typedef DoFHandlerType AccessorData;
+
+  /**
+   * Declare a typedef to the base class to make accessing some of the
+   * exception classes simpler.
+   */
+  typedef DoFAccessor<DoFHandlerType::dimension,DoFHandlerType, level_dof_access> BaseClass;
+
+  /**
+   * Define the type of the container this is part of.
+   */
+  typedef DoFHandlerType Container;
+
+  /**
+   * A type for an iterator over the faces of a cell. This is what the face()
+   * function returns.
+   */
+  typedef
+  TriaIterator<DoFAccessor<DoFHandlerType::dimension-1, DoFHandlerType, level_dof_access> >
+  face_iterator;
+
+  /**
+   * @name Constructors and initialization
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Constructor
+   */
+  DoFCellAccessor (const Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *tria,
+                   const int           level,
+                   const int           index,
+                   const AccessorData *local_data);
+
+  /**
+   * Conversion constructor. This constructor exists to make certain
+   * constructs simpler to write in dimension independent code. For example,
+   * it allows assigning a face iterator to a line iterator, an operation that
+   * is useful in 2d but doesn't make any sense in 3d. The constructor here
+   * exists for the purpose of making the code conform to C++ but it will
+   * unconditionally abort; in other words, assigning a face iterator to a
+   * line iterator is better put into an if-statement that checks that the
+   * dimension is two, and assign to a quad iterator in 3d (an operator that,
+   * without this constructor would be illegal if we happen to compile for
+   * 2d).
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  DoFCellAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Another conversion operator between objects that don't make sense, just
+   * like the previous one.
+   */
+  template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+  explicit
+  DoFCellAccessor (const DoFAccessor<dim2, DoFHandlerType2, level_dof_access2> &);
+
+  /**
+   * @}
+   */
+
+  /**
+   * Return the parent of this cell as a DoF cell iterator. If the parent does
+   * not exist (i.e., if the object is at the coarsest level of the mesh
+   * hierarchy), an exception is generated.
+   *
+   * This function is needed since the parent function of the base class
+   * CellAccessor returns a triangulation cell accessor without access to the
+   * DoF data.
+   */
+  TriaIterator<DoFCellAccessor<DoFHandlerType, level_dof_access> >
+  parent () const;
+
+  /**
+   * @name Accessing sub-objects and neighbors
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the @p ith neighbor as a DoF cell iterator. This function is
+   * needed since the neighbor function of the base class returns a cell
+   * accessor without access to the DoF data.
+   */
+  TriaIterator<DoFCellAccessor<DoFHandlerType, level_dof_access> >
+  neighbor (const unsigned int) const;
+
+  /**
+   * Return the @p ith child as a DoF cell iterator. This function is needed
+   * since the child function of the base class returns a cell accessor
+   * without access to the DoF data.
+   */
+  TriaIterator<DoFCellAccessor<DoFHandlerType, level_dof_access> >
+  child (const unsigned int) const;
+
+  /**
+   * Return an iterator to the @p ith face of this cell.
+   *
+   * This function is not implemented in 1D, and returns DoFAccessor::line in
+   * 2D and DoFAccessor::quad in 3d.
+   */
+  face_iterator
+  face (const unsigned int i) const;
+
+  /**
+   * Return the result of the @p neighbor_child_on_subface function of the
+   * base class, but convert it so that one can also access the DoF data (the
+   * function in the base class only returns an iterator with access to the
+   * triangulation data).
+   */
+  TriaIterator<DoFCellAccessor<DoFHandlerType, level_dof_access> >
+  neighbor_child_on_subface (const unsigned int face_no,
+                             const unsigned int subface_no) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Extracting values from global vectors
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the values of the given vector restricted to the dofs of this cell
+   * in the standard ordering: dofs on vertex 0, dofs on vertex 1, etc, dofs
+   * on line 0, dofs on line 1, etc, dofs on quad 0, etc.
+   *
+   * The vector has to have the right size before being passed to this
+   * function. This function is only callable for active cells.
+   *
+   * The input vector may be either a <tt>Vector<float></tt>, Vector<double>,
+   * or a BlockVector<double>, or a PETSc or Trilinos vector if deal.II is
+   * compiled to support these libraries. It is in the responsibility of the
+   * caller to assure that the types of the numbers stored in input and output
+   * vectors are compatible and with similar accuracy.
+   */
+  template <class InputVector, typename number>
+  void get_dof_values (const InputVector &values,
+                       Vector<number>    &local_values) const;
+
+  /**
+   * Return the values of the given vector restricted to the dofs of this cell
+   * in the standard ordering: dofs on vertex 0, dofs on vertex 1, etc, dofs
+   * on line 0, dofs on line 1, etc, dofs on quad 0, etc.
+   *
+   * The vector has to have the right size before being passed to this
+   * function. This function is only callable for active cells.
+   *
+   * The input vector may be either a <tt>Vector<float></tt>, Vector<double>,
+   * or a BlockVector<double>, or a PETSc or Trilinos vector if deal.II is
+   * compiled to support these libraries. It is in the responsibility of the
+   * caller to assure that the types of the numbers stored in input and output
+   * vectors are compatible and with similar accuracy.
+   */
+  template <class InputVector, typename ForwardIterator>
+  void get_dof_values (const InputVector &values,
+                       ForwardIterator    local_values_begin,
+                       ForwardIterator    local_values_end) const;
+
+  /**
+   * Return the values of the given vector restricted to the dofs of this cell
+   * in the standard ordering: dofs on vertex 0, dofs on vertex 1, etc, dofs
+   * on line 0, dofs on line 1, etc, dofs on quad 0, etc.
+   *
+   * The vector has to have the right size before being passed to this
+   * function. This function is only callable for active cells.
+   *
+   * The input vector may be either a <tt>Vector<float></tt>, Vector<double>,
+   * or a BlockVector<double>, or a PETSc or Trilinos vector if deal.II is
+   * compiled to support these libraries. It is in the responsibility of the
+   * caller to assure that the types of the numbers stored in input and output
+   * vectors are compatible and with similar accuracy. The ConstraintMatrix
+   * passed as an argument to this function makes sure that constraints are
+   * correctly distributed when the dof values are calculated.
+   */
+  template <class InputVector, typename ForwardIterator>
+  void get_dof_values (const ConstraintMatrix &constraints,
+                       const InputVector      &values,
+                       ForwardIterator         local_values_begin,
+                       ForwardIterator         local_values_end) const;
+
+  /**
+   * This function is the counterpart to get_dof_values(): it takes a vector
+   * of values for the degrees of freedom of the cell pointed to by this
+   * iterator and writes these values into the global data vector @p values.
+   * This function is only callable for active cells.
+   *
+   * Note that for continuous finite elements, calling this function affects
+   * the dof values on neighboring cells as well. It may also violate
+   * continuity requirements for hanging nodes, if neighboring cells are less
+   * refined than the present one. These requirements are not taken care of
+   * and must be enforced by the user afterwards.
+   *
+   * The vector has to have the right size before being passed to this
+   * function.
+   *
+   * The output vector may be either a Vector<float>, Vector<double>, or a
+   * BlockVector<double>, or a PETSc vector if deal.II is compiled to support
+   * these libraries. It is in the responsibility of the caller to assure that
+   * the types of the numbers stored in input and output vectors are
+   * compatible and with similar accuracy.
+   */
+  template <class OutputVector, typename number>
+  void set_dof_values (const Vector<number> &local_values,
+                       OutputVector         &values) const;
+
+  /**
+   * Return the interpolation of the given finite element function to the
+   * present cell. In the simplest case, the cell is a terminal one, i.e., it
+   * has no children; then, the returned value is the vector of nodal values
+   * on that cell. You could as well get the desired values through the @p
+   * get_dof_values function. In the other case, when the cell has children,
+   * we use the restriction matrices provided by the finite element class to
+   * compute the interpolation from the children to the present cell.
+   *
+   * If the cell is part of a hp::DoFHandler object, cells only have an
+   * associated finite element space if they are active. However, this
+   * function is supposed to also provide information on inactive cells with
+   * children. Consequently, it carries a third argument that can be used in
+   * the hp context that denotes the finite element space we are supposed to
+   * interpolate onto. If the cell is active, this function then obtains the
+   * finite element function from the <code>values</code> vector on this cell
+   * and interpolates it onto the space described by the
+   * <code>fe_index</code>th element of the hp::FECollection associated with
+   * the hp::DoFHandler of which this cell is a part of. If the cell is not
+   * active, then we first perform this interpolation on all of its terminal
+   * children and then interpolate this function down to the cell requested
+   * keeping the function space the same.
+   *
+   * It is assumed that both input vectors already have the right size
+   * beforehand.
+   *
+   * @note Unlike the get_dof_values() function, this function is only
+   * available on cells, rather than on lines, quads, and hexes, since
+   * interpolation is presently only provided for cells by the finite element
+   * classes.
+   */
+  template <class InputVector, typename number>
+  void get_interpolated_dof_values (const InputVector &values,
+                                    Vector<number>    &interpolated_values,
+                                    const unsigned int fe_index
+                                    = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * This function is the counterpart to get_interpolated_dof_values(): you
+   * specify the dof values on a cell and these are interpolated to the
+   * children of the present cell and set on the terminal cells.
+   *
+   * In principle, it works as follows: if the cell pointed to by this object
+   * is terminal (i.e., has no children), then the dof values are set in the
+   * global data vector by calling the set_dof_values() function; otherwise,
+   * the values are prolonged to each of the children and this function is
+   * called for each of them.
+   *
+   * Using the get_interpolated_dof_values() and this function, you can
+   * compute the interpolation of a finite element function to a coarser grid
+   * by first getting the interpolated solution on a cell of the coarse grid
+   * and afterwards redistributing it using this function.
+   *
+   * Note that for continuous finite elements, calling this function affects
+   * the dof values on neighboring cells as well. It may also violate
+   * continuity requirements for hanging nodes, if neighboring cells are less
+   * refined than the present one, or if their children are less refined than
+   * the children of this cell. These requirements are not taken care of and
+   * must be enforced by the user afterward.
+   *
+   * If the cell is part of a hp::DoFHandler object, cells only have an
+   * associated finite element space if they are active. However, this
+   * function is supposed to also work on inactive cells with children.
+   * Consequently, it carries a third argument that can be used in the hp
+   * context that denotes the finite element space we are supposed to
+   * interpret the input vector of this function in. If the cell is active,
+   * this function then interpolates the input vector interpreted as an
+   * element of the space described by the <code>fe_index</code>th element of
+   * the hp::FECollection associated with the hp::DoFHandler of which this
+   * cell is a part of, and interpolates it into the space that is associated
+   * with this cell. On the other hand, if the cell is not active, then we
+   * first perform this interpolation from this cell to its children using the
+   * given <code>fe_index</code> until we end up on an active cell, at which
+   * point we follow the procedure outlined at the beginning of the paragraph.
+   *
+   * It is assumed that both vectors already have the right size beforehand.
+   * This function relies on the existence of a natural interpolation property
+   * of finite element spaces of a cell to its children, denoted by the
+   * prolongation matrices of finite element classes. For some elements, the
+   * spaces on coarse and fine grids are not nested, in which case the
+   * interpolation to a child is not the identity; refer to the documentation
+   * of the respective finite element class for a description of what the
+   * prolongation matrices represent in this case.
+   *
+   * @note Unlike the get_dof_values() function, this function is only
+   * available on cells, rather than on lines, quads, and hexes, since
+   * interpolation is presently only provided for cells by the finite element
+   * classes.
+   */
+  template <class OutputVector, typename number>
+  void set_dof_values_by_interpolation (const Vector<number> &local_values,
+                                        OutputVector         &values,
+                                        const unsigned int    fe_index
+                                        = DoFHandlerType::default_fe_index) const;
+
+  /**
+   * Distribute a local (cell based) vector to a global one by mapping the
+   * local numbering of the degrees of freedom to the global one and entering
+   * the local values into the global vector.
+   *
+   * The elements are <em>added</em> up to the elements in the global vector,
+   * rather than just set, since this is usually what one wants.
+   */
+  template <typename number, typename OutputVector>
+  void
+  distribute_local_to_global (const Vector<number> &local_source,
+                              OutputVector         &global_destination) const;
+
+  /**
+   * Distribute a local (cell based) vector in iterator format to a global one
+   * by mapping the local numbering of the degrees of freedom to the global
+   * one and entering the local values into the global vector.
+   *
+   * The elements are <em>added</em> up to the elements in the global vector,
+   * rather than just set, since this is usually what one wants.
+   */
+  template <typename ForwardIterator, typename OutputVector>
+  void
+  distribute_local_to_global (ForwardIterator   local_source_begin,
+                              ForwardIterator   local_source_end,
+                              OutputVector     &global_destination) const;
+
+  /**
+   * Distribute a local (cell based) vector in iterator format to a global one
+   * by mapping the local numbering of the degrees of freedom to the global
+   * one and entering the local values into the global vector.
+   *
+   * The elements are <em>added</em> up to the elements in the global vector,
+   * rather than just set, since this is usually what one wants. Moreover, the
+   * ConstraintMatrix passed to this function makes sure that also constraints
+   * are eliminated in this process.
+   */
+  template <typename ForwardIterator, typename OutputVector>
+  void
+  distribute_local_to_global (const ConstraintMatrix &constraints,
+                              ForwardIterator         local_source_begin,
+                              ForwardIterator         local_source_end,
+                              OutputVector           &global_destination) const;
+
+  /**
+   * This function does much the same as the
+   * <tt>distribute_local_to_global(Vector,Vector)</tt> function, but operates
+   * on matrices instead of vectors. If the matrix type is a sparse matrix
+   * then it is supposed to have non-zero entry slots where required.
+   */
+  template <typename number, typename OutputMatrix>
+  void
+  distribute_local_to_global (const FullMatrix<number> &local_source,
+                              OutputMatrix             &global_destination) const;
+
+  /**
+   * This function does what the two <tt>distribute_local_to_global</tt>
+   * functions with vector and matrix argument do, but all at once.
+   */
+  template <typename number, typename OutputMatrix, typename OutputVector>
+  void
+  distribute_local_to_global (const FullMatrix<number> &local_matrix,
+                              const Vector<number>     &local_vector,
+                              OutputMatrix             &global_matrix,
+                              OutputVector             &global_vector) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing the DoF indices of this object
+   */
+
+  /**
+   * @{
+   */
+
+  /**
+   * Obtain the global indices of the local degrees of freedom on this cell.
+   *
+   * If this object accesses a level cell (indicated by the third template
+   * argument or #is_level_cell), then return the result of
+   * get_mg_dof_indices(), else return get_dof_indices().
+   *
+   * You will get a level_cell_iterator when calling begin_mg() and a normal
+   * one otherwise.
+   *
+   * Examples for this use are in the implementation of DoFRenumbering.
+   */
+  void get_active_or_mg_dof_indices (std::vector<types::global_dof_index> &dof_indices) const;
+
+  /**
+   * Return the <i>global</i> indices of the degrees of freedom located on
+   * this object in the standard ordering defined by the finite element (i.e.,
+   * dofs on vertex 0, dofs on vertex 1, etc, dofs on line 0, dofs on line 1,
+   * etc, dofs on quad 0, etc.) This function is only available on
+   * <i>active</i> objects (see
+   * @ref GlossActive "this glossary entry").
+   *
+   * @param[out] dof_indices The vector into which the indices will be
+   * written. It has to have the right size (namely,
+   * <code>fe.dofs_per_cell</code>, <code>fe.dofs_per_face</code>, or
+   * <code>fe.dofs_per_line</code>, depending on which kind of object this
+   * function is called) before being passed to this function.
+   *
+   * This function reimplements the same function in the base class. In
+   * contrast to the function in the base class, we do not need the
+   * <code>fe_index</code> here because there is always a unique finite
+   * element index on cells.
+   *
+   * This is a function which requires that the cell is active.
+   *
+   * Also see get_active_or_mg_dof_indices().
+   *
+   * @note In many places in the tutorial and elsewhere in the library, the
+   * argument to this function is called <code>local_dof_indices</code> by
+   * convention. The name is not meant to indicate the <i>local</i> numbers of
+   * degrees of freedom (which are always between zero and
+   * <code>fe.dofs_per_cell</code>) but instead that the returned values are
+   * the <i>global</i> indices of those degrees of freedom that are located
+   * locally on the current cell.
+   *
+   * @deprecated Currently, this function can also be called for non-active
+   * cells, if all degrees of freedom of the FiniteElement are located in
+   * vertices. This functionality will vanish in a future release.
+   */
+  void get_dof_indices (std::vector<types::global_dof_index> &dof_indices) const;
+
+  /**
+   * @deprecated Use get_active_or_mg_dof_indices() with level_cell_iterator
+   * returned from begin_mg().
+   *
+   * Retrieve the global indices of the degrees of freedom on this cell in the
+   * level vector associated to the level of the cell.
+   */
+  void get_mg_dof_indices (std::vector<types::global_dof_index> &dof_indices) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing the finite element associated with this object
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the finite element that is used on the cell pointed to by this
+   * iterator. For non-hp DoF handlers, this is of course always the same
+   * element, independent of the cell we are presently on, but for hp DoF
+   * handlers, this may change from cell to cell.
+   *
+   * @note Since degrees of freedoms only exist on active cells for
+   * hp::DoFHandler (i.e., there is currently no implementation of multilevel
+   * hp::DoFHandler objects), it does not make sense to query the finite
+   * element on non-active cells since they do not have finite element spaces
+   * associated with them without having any degrees of freedom. Consequently,
+   * this function will produce an exception when called on non-active cells.
+   */
+  const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &
+  get_fe () const;
+
+  /**
+   * Returns the index inside the hp::FECollection of the FiniteElement used
+   * for this cell. This function is only useful if the DoF handler object
+   * associated with the current cell is an hp::DoFHandler.
+   *
+   * @note Since degrees of freedoms only exist on active cells for
+   * hp::DoFHandler (i.e., there is currently no implementation of multilevel
+   * hp::DoFHandler objects), it does not make sense to query active FE
+   * indices on non-active cells since they do not have finite element spaces
+   * associated with them without having any degrees of freedom. Consequently,
+   * this function will produce an exception when called on non-active cells.
+   */
+  unsigned int active_fe_index () const;
+
+  /**
+   * Sets the index of the FiniteElement used for this cell. This determines
+   * which element in an hp::FECollection to use. This function is only useful
+   * if the DoF handler object associated with the current cell is an
+   * hp::DoFHandler.
+   *
+   * @note Since degrees of freedoms only exist on active cells for
+   * hp::DoFHandler (i.e., there is currently no implementation of multilevel
+   * hp::DoFHandler objects), it does not make sense to assign active FE
+   * indices to non-active cells since they do not have finite element spaces
+   * associated with them without having any degrees of freedom. Consequently,
+   * this function will produce an exception when called on non-active cells.
+   */
+  void set_active_fe_index (const unsigned int i);
+  /**
+   * @}
+   */
+
+  /**
+   * Set the DoF indices of this cell to the given values. This function
+   * bypasses the DoF cache, if one exists for the given DoF handler class.
+   */
+  void set_dof_indices (const std::vector<types::global_dof_index> &dof_indices);
+
+  /**
+   * Set the Level DoF indices of this cell to the given values.
+   */
+  void set_mg_dof_indices (const std::vector<types::global_dof_index> &dof_indices);
+
+  /**
+   * Update the cache in which we store the dof indices of this cell.
+   */
+  void update_cell_dof_indices_cache () const;
+
+private:
+  /**
+   * Copy operator. This is normally used in a context like <tt>iterator a,b;
+   * *a=*b;</tt>. Presumably, the intent here is to copy the object pointed to
+   * by @p b to the object pointed to by @p a. However, the result of
+   * dereferencing an iterator is not an object but an accessor; consequently,
+   * this operation is not useful for iterators on triangulations. We declare
+   * this function here private, thus it may not be used from outside.
+   * Furthermore it is not implemented and will give a linker error if used
+   * anyway.
+   */
+  DoFCellAccessor<DoFHandlerType, level_dof_access> &
+  operator = (const DoFCellAccessor<DoFHandlerType, level_dof_access> &da);
+
+  /**
+   * Make the DoFHandler class a friend so that it can call the
+   * update_cell_dof_indices_cache() function
+   */
+  template <int dim, int spacedim> friend class DoFHandler;
+  friend struct dealii::internal::DoFCellAccessor::Implementation;
+};
+
+
+template <int sd, typename DoFHandlerType, bool level_dof_access>
+inline
+bool
+DoFAccessor<sd, DoFHandlerType, level_dof_access>::is_level_cell()
+{
+  return level_dof_access;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+// include more templates
+#include "dof_accessor.templates.h"
+
+
+#endif
diff --git a/include/deal.II/dofs/dof_accessor.templates.h b/include/deal.II/dofs/dof_accessor.templates.h
new file mode 100644
index 0000000..f2597af
--- /dev/null
+++ b/include/deal.II/dofs/dof_accessor.templates.h
@@ -0,0 +1,3584 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_accessor_templates_h
+#define dealii__dof_accessor_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/types.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_levels.h>
+#include <deal.II/dofs/dof_faces.h>
+#include <deal.II/hp/dof_level.h>
+#include <deal.II/hp/dof_faces.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_iterator.templates.h>
+
+#include <vector>
+#include <limits>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*------------------------- Functions: DoFAccessor ---------------------------*/
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::DoFAccessor ()
+{
+  Assert (false, ExcInvalidObject());
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::DoFAccessor (
+  const Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *tria,
+  const int             level,
+  const int             index,
+  const DoFHandlerType *dof_handler)
+  :
+  dealii::internal::DoFAccessor::Inheritance<structdim,DoFHandlerType::dimension,
+  DoFHandlerType::space_dimension>::BaseClass (tria,
+                                               level,
+                                               index),
+  dof_handler(const_cast<DoFHandlerType *>(dof_handler))
+{}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+template <int structdim2, int dim2, int spacedim2>
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::DoFAccessor
+(const InvalidAccessor<structdim2,dim2,spacedim2> &)
+{
+  Assert (false, ExcInvalidObject());
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+inline
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::DoFAccessor
+(const DoFAccessor<dim2, DoFHandlerType2, level_dof_access2> &other)
+  : BaseClass(other),
+    dof_handler(0)
+{
+  Assert (false, ExcMessage("You are trying to assign iterators that are incompatible. "
+                            "Reasons for incompatibility are that they point to different "
+                            "types of DoFHandlers (e.g., dealii::DoFHandler and "
+                            "dealii::hp::DoFHandler) or that the refer to objects of "
+                            "different dimensionality (e.g., assigning a line iterator "
+                            "to a quad iterator)."));
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+template <bool level_dof_access2>
+inline
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::DoFAccessor
+(const DoFAccessor<structdim, DoFHandlerType, level_dof_access2> &other)
+  : BaseClass(other),
+    dof_handler(const_cast<DoFHandlerType *>(other.dof_handler))
+{
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::set_dof_handler (DoFHandlerType *dh)
+{
+  Assert (dh != 0, ExcInvalidObject());
+  this->dof_handler = dh;
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+const DoFHandlerType &
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::get_dof_handler () const
+{
+  return *this->dof_handler;
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::copy_from
+(const TriaAccessorBase<structdim, DoFHandlerType::dimension, DoFHandlerType::space_dimension> &da)
+{
+  Assert (this->dof_handler != 0, ExcInvalidObject());
+  BaseClass::copy_from(da);
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+template <bool level_dof_access2>
+inline
+void
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::copy_from
+(const DoFAccessor<structdim,DoFHandlerType,level_dof_access2> &a)
+{
+  BaseClass::copy_from (a);
+  set_dof_handler (a.dof_handler);
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+inline
+bool
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::operator ==
+(const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &a) const
+{
+  Assert (structdim == dim2, ExcCantCompareIterators());
+  Assert (this->dof_handler == a.dof_handler, ExcCantCompareIterators());
+  return (BaseClass::operator == (a));
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+inline
+bool
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::operator !=
+(const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &a) const
+{
+  Assert (structdim == dim2, ExcCantCompareIterators());
+  Assert (this->dof_handler == a.dof_handler, ExcCantCompareIterators());
+  return (BaseClass::operator != (a));
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+TriaIterator<DoFAccessor<structdim,DoFHandlerType,level_dof_access> >
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::child (const unsigned int i) const
+{
+  Assert (static_cast<unsigned int>(this->level()) < this->dof_handler->levels.size(),
+          ExcMessage ("DoFHandler not initialized"));
+
+  TriaIterator<TriaAccessor<structdim,DoFHandlerType::dimension,DoFHandlerType::space_dimension> >
+  t = TriaAccessor<structdim,DoFHandlerType::dimension,DoFHandlerType::space_dimension>::child(i);
+
+  TriaIterator<DoFAccessor<structdim,DoFHandlerType,level_dof_access> > q (*t, this->dof_handler);
+  return q;
+}
+
+
+namespace internal
+{
+  namespace DoFAccessor
+  {
+    /**
+     * A class like the one with same name in tria.cc. See there for more
+     * information.
+     */
+    struct Implementation
+    {
+      /**
+       * Implementations of the get_dof_index/set_dof_index functions.
+       */
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::DoFHandler<1,spacedim>   &dof_handler,
+                     const unsigned int                      obj_level,
+                     const unsigned int                      obj_index,
+                     const unsigned int                      fe_index,
+                     const unsigned int                      local_index,
+                     dealii::internal::int2type<1>)
+      {
+        return dof_handler.levels[obj_level]->dof_object.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::DoFHandler<1,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<1>,
+                     const types::global_dof_index              global_index)
+      {
+        dof_handler.levels[obj_level]->dof_object.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::DoFHandler<2,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<1>)
+      {
+        (void)obj_level;
+        // faces have no levels
+        Assert (obj_level == 0, ExcInternalError());
+        return dof_handler.faces->lines.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::DoFHandler<2,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<1>,
+                     const types::global_dof_index              global_index)
+      {
+        (void)obj_level;
+        // faces have no levels
+        Assert (obj_level == 0, ExcInternalError());
+        dof_handler.faces->lines.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::DoFHandler<2,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<2>)
+      {
+        return dof_handler.levels[obj_level]->dof_object.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::DoFHandler<2,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<2>,
+                     const types::global_dof_index              global_index)
+      {
+        dof_handler.levels[obj_level]->dof_object.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::DoFHandler<3,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<1>)
+      {
+        (void)obj_level;
+        // faces have no levels
+        Assert (obj_level == 0, ExcInternalError());
+        return dof_handler.faces->lines.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::DoFHandler<3,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<1>,
+                     const types::global_dof_index              global_index)
+      {
+        (void)obj_level;
+        // faces have no levels
+        Assert (obj_level == 0, ExcInternalError());
+        dof_handler.faces->lines.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::DoFHandler<3,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<2>)
+      {
+        (void)obj_level;
+        // faces have no levels
+        Assert (obj_level == 0, ExcInternalError());
+        return dof_handler.faces->quads.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::DoFHandler<3,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<2>,
+                     const types::global_dof_index              global_index)
+      {
+        (void)obj_level;
+        // faces have no levels
+        Assert (obj_level == 0, ExcInternalError());
+        dof_handler.faces->quads.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::DoFHandler<3,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<3>)
+      {
+        return dof_handler.levels[obj_level]->dof_object.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::DoFHandler<3,spacedim>   &dof_handler,
+                     const unsigned int              obj_level,
+                     const unsigned int              obj_index,
+                     const unsigned int              fe_index,
+                     const unsigned int              local_index,
+                     dealii::internal::int2type<3>,
+                     const types::global_dof_index              global_index)
+      {
+        dof_handler.levels[obj_level]->dof_object.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::hp::DoFHandler<1,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.levels[obj_level]->
+               get_dof_index (obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::hp::DoFHandler<1,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<1> &,
+                     const types::global_dof_index       global_index)
+      {
+        dof_handler.levels[obj_level]->
+        set_dof_index (obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index,
+                              obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<1> &,
+                     const types::global_dof_index       global_index)
+      {
+        dof_handler.faces->lines.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index,
+                       obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<2> &)
+      {
+        return dof_handler.levels[obj_level]->
+               get_dof_index (obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<2> &,
+                     const types::global_dof_index       global_index)
+      {
+        dof_handler.levels[obj_level]->
+        set_dof_index (obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index,
+                              obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<1> &,
+                     const types::global_dof_index       global_index)
+      {
+        dof_handler.faces->lines.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index,
+                       obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<2> &)
+      {
+        return dof_handler.faces->quads.
+               get_dof_index (dof_handler,
+                              obj_index,
+                              fe_index,
+                              local_index,
+                              obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<2> &,
+                     const types::global_dof_index       global_index)
+      {
+        dof_handler.faces->quads.
+        set_dof_index (dof_handler,
+                       obj_index,
+                       fe_index,
+                       local_index,
+                       global_index,
+                       obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<3> &)
+      {
+        return dof_handler.levels[obj_level]->
+               get_dof_index (obj_index,
+                              fe_index,
+                              local_index);
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_dof_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                     const unsigned int       obj_level,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const dealii::internal::int2type<3> &,
+                     const types::global_dof_index       global_index)
+      {
+        dof_handler.levels[obj_level]->
+        set_dof_index (obj_index,
+                       fe_index,
+                       local_index,
+                       global_index);
+      }
+
+
+      template <int structdim, int dim, int spacedim>
+      static
+      bool
+      fe_index_is_active (const dealii::DoFHandler<dim,spacedim> &,
+                          const unsigned int,
+                          const unsigned int,
+                          const unsigned int fe_index,
+                          const dealii::internal::int2type<structdim> &)
+      {
+        return (fe_index == 0);
+      }
+
+
+
+      template <int structdim, int dim, int spacedim>
+      static
+      unsigned int
+      n_active_fe_indices (const dealii::DoFHandler<dim,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const dealii::internal::int2type<structdim> &)
+      {
+        (void)dof_handler;
+        (void)obj_level;
+        (void)obj_index;
+        // check that the object we look
+        // at is in fact active. the
+        // problem is that we have
+        // templatized on the
+        // dimensionality of the object,
+        // so it may be a cell, a face,
+        // or a line. we have a bit of
+        // trouble doing this all in the
+        // generic case, so only check if
+        // it is either a cell or a
+        // line. the only case this
+        // leaves out is faces in 3d --
+        // let's hope that this never is
+        // a problem
+        Assert ((dim==structdim
+                 ?
+                 typename
+                 internal::Triangulation::Iterators<dim,spacedim>::
+                 raw_cell_iterator (&dof_handler.get_triangulation(),
+                                    obj_level,
+                                    obj_index)->used()
+                 :
+                 (structdim==1
+                  ?
+                  typename
+                  internal::Triangulation::Iterators<dim,spacedim>::
+                  raw_line_iterator (&dof_handler.get_triangulation(),
+                                     obj_level,
+                                     obj_index)->used()
+                  :
+                  true))
+                == true,
+                ExcMessage ("This cell is not active and therefore can't be "
+                            "queried for its active FE indices"));
+        return 1;
+      }
+
+
+
+      template <int structdim, int dim, int spacedim>
+      static
+      unsigned int
+      nth_active_fe_index (const dealii::DoFHandler<dim,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const unsigned int n,
+                           const dealii::internal::int2type<structdim> &)
+      {
+        (void)dof_handler;
+        (void)obj_level;
+        (void)obj_index;
+        (void)n;
+        // check that the object we look
+        // at is in fact active. the
+        // problem is that we have
+        // templatized on the
+        // dimensionality of the object,
+        // so it may be a cell, a face,
+        // or a line. we have a bit of
+        // trouble doing this all in the
+        // generic case, so only check if
+        // it is either a cell or a
+        // line. the only case this
+        // leaves out is faces in 3d --
+        // let's hope that this never is
+        // a problem
+        Assert ((dim==structdim
+                 ?
+                 typename
+                 internal::Triangulation::Iterators<dim,spacedim>::
+                 raw_cell_iterator (&dof_handler.get_triangulation(),
+                                    obj_level,
+                                    obj_index)->used()
+                 :
+                 (structdim==1
+                  ?
+                  typename
+                  internal::Triangulation::Iterators<dim,spacedim>::
+                  raw_line_iterator (&dof_handler.get_triangulation(),
+                                     obj_level,
+                                     obj_index)->used()
+                  :
+                  true))
+                == true,
+                ExcMessage ("This cell is not active and therefore can't be "
+                            "queried for its active FE indices"));
+        Assert (n == 0, ExcIndexRange (n, 0, 1));
+
+        return dealii::DoFHandler<dim,spacedim>::default_fe_index;
+      }
+
+
+      template <int spacedim>
+      static
+      bool
+      fe_index_is_active (const dealii::hp::DoFHandler<1,spacedim> &dof_handler,
+                          const unsigned int obj_level,
+                          const unsigned int obj_index,
+                          const unsigned int fe_index,
+                          const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.levels[obj_level]->fe_index_is_active(obj_index,
+                                                                 fe_index);
+      }
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      n_active_fe_indices (const dealii::hp::DoFHandler<1,spacedim> &,
+                           const unsigned int /*obj_level*/,
+                           const unsigned int /*obj_index*/,
+                           const dealii::internal::int2type<1> &)
+      {
+        // on a cell, the number of active elements is one
+        return 1;
+      }
+
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      nth_active_fe_index (const dealii::hp::DoFHandler<1,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const unsigned int n,
+                           const dealii::internal::int2type<1> &)
+      {
+        (void)n;
+        Assert (n==0, ExcMessage("On cells, there can only be one active FE index"));
+        return dof_handler.levels[obj_level]->active_fe_index (obj_index);
+      }
+
+
+      template <int spacedim>
+      static
+      bool
+      fe_index_is_active (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                          const unsigned int obj_level,
+                          const unsigned int obj_index,
+                          const unsigned int fe_index,
+                          const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.fe_index_is_active(dof_handler,
+                                                           obj_index,
+                                                           fe_index,
+                                                           obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      n_active_fe_indices (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                           const unsigned int ,
+                           const unsigned int obj_index,
+                           const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.n_active_fe_indices (dof_handler,
+                                                             obj_index);
+      }
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      nth_active_fe_index (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const unsigned int n,
+                           const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.nth_active_fe_index (dof_handler,
+                                                             obj_level,
+                                                             obj_index,
+                                                             n);
+      }
+
+
+
+      template <int spacedim>
+      static
+      bool
+      fe_index_is_active (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                          const unsigned int obj_level,
+                          const unsigned int obj_index,
+                          const unsigned int fe_index,
+                          const dealii::internal::int2type<2> &)
+      {
+        return dof_handler.levels[obj_level]->fe_index_is_active(obj_index,
+                                                                 fe_index);
+      }
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      n_active_fe_indices (const dealii::hp::DoFHandler<2,spacedim> &,
+                           const unsigned int /*obj_level*/,
+                           const unsigned int /*obj_index*/,
+                           const dealii::internal::int2type<2> &)
+      {
+        // on a cell, the number of active elements is one
+        return 1;
+      }
+
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      nth_active_fe_index (const dealii::hp::DoFHandler<2,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const unsigned int n,
+                           const dealii::internal::int2type<2> &)
+      {
+        (void)n;
+        Assert (n==0, ExcMessage("On cells, there can only be one active FE index"));
+        return dof_handler.levels[obj_level]->active_fe_index (obj_index);
+      }
+
+
+
+      template <int spacedim>
+      static
+      bool
+      fe_index_is_active (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                          const unsigned int obj_level,
+                          const unsigned int obj_index,
+                          const unsigned int fe_index,
+                          const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.fe_index_is_active(dof_handler,
+                                                           obj_index,
+                                                           fe_index,
+                                                           obj_level);
+      }
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      n_active_fe_indices (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                           const unsigned int,
+                           const unsigned int obj_index,
+                           const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.n_active_fe_indices (dof_handler,
+                                                             obj_index);
+      }
+
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      nth_active_fe_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const unsigned int n,
+                           const dealii::internal::int2type<1> &)
+      {
+        return dof_handler.faces->lines.nth_active_fe_index (dof_handler,
+                                                             obj_level,
+                                                             obj_index,
+                                                             n);
+      }
+
+
+
+      template <int spacedim>
+      static
+      bool
+      fe_index_is_active (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                          const unsigned int obj_level,
+                          const unsigned int obj_index,
+                          const unsigned int fe_index,
+                          const dealii::internal::int2type<2> &)
+      {
+        return dof_handler.faces->quads.fe_index_is_active(dof_handler,
+                                                           obj_index,
+                                                           fe_index,
+                                                           obj_level);
+      }
+
+      template <int spacedim>
+      static
+      bool
+      fe_index_is_active (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                          const unsigned int obj_level,
+                          const unsigned int obj_index,
+                          const unsigned int fe_index,
+                          const dealii::internal::int2type<3> &)
+      {
+        return dof_handler.levels[obj_level]->fe_index_is_active(obj_index,
+                                                                 fe_index);
+      }
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      n_active_fe_indices (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                           const unsigned int ,
+                           const unsigned int obj_index,
+                           const dealii::internal::int2type<2> &)
+      {
+        return dof_handler.faces->quads.n_active_fe_indices (dof_handler,
+                                                             obj_index);
+      }
+
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      nth_active_fe_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const unsigned int n,
+                           const dealii::internal::int2type<2> &)
+      {
+        return dof_handler.faces->quads.nth_active_fe_index (dof_handler,
+                                                             obj_level,
+                                                             obj_index,
+                                                             n);
+      }
+
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      n_active_fe_indices (const dealii::hp::DoFHandler<3,spacedim> &,
+                           const unsigned int /*obj_level*/,
+                           const unsigned int /*obj_index*/,
+                           const dealii::internal::int2type<3> &)
+      {
+        // on a cell, the number of active elements is one
+        return 1;
+      }
+
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      nth_active_fe_index (const dealii::hp::DoFHandler<3,spacedim> &dof_handler,
+                           const unsigned int obj_level,
+                           const unsigned int obj_index,
+                           const unsigned int n,
+                           const dealii::internal::int2type<3> &)
+      {
+        (void)n;
+        Assert (n==0, ExcMessage("On cells, there can only be one active FE index"));
+        return dof_handler.levels[obj_level]->active_fe_index (obj_index);
+      }
+
+      /**
+       * Set the @p local_index-th degree of freedom corresponding to the
+       * finite element specified by @p fe_index on the vertex with global
+       * number @p vertex_index to @p global_index.
+       */
+      template <int dim, int spacedim>
+      static
+      void
+      set_vertex_dof_index (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                            const unsigned int vertex_index,
+                            const unsigned int fe_index,
+                            const unsigned int local_index,
+                            const types::global_dof_index global_index)
+      {
+        (void)fe_index;
+        Assert ((fe_index == dealii::DoFHandler<dim,spacedim>::default_fe_index),
+                ExcMessage ("Only the default FE index is allowed for non-hp DoFHandler objects"));
+        Assert (dof_handler.selected_fe != 0,
+                ExcMessage ("No finite element collection is associated with "
+                            "this DoFHandler"));
+        Assert (local_index < dof_handler.selected_fe->dofs_per_vertex,
+                ExcIndexRange(local_index, 0,
+                              dof_handler.selected_fe->dofs_per_vertex));
+
+        dof_handler.vertex_dofs[vertex_index *
+                                dof_handler.selected_fe->dofs_per_vertex
+                                + local_index]
+          = global_index;
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      void
+      set_vertex_dof_index (dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                            const unsigned int vertex_index,
+                            const unsigned int fe_index,
+                            const unsigned int local_index,
+                            const types::global_dof_index global_index)
+      {
+        Assert ( (fe_index != dealii::hp::DoFHandler<dim,spacedim>::default_fe_index),
+                 ExcMessage ("You need to specify a FE index when working "
+                             "with hp DoFHandlers"));
+        Assert (dof_handler.finite_elements != 0,
+                ExcMessage ("No finite element collection is associated with "
+                            "this DoFHandler"));
+        Assert (local_index < (*dof_handler.finite_elements)[fe_index].dofs_per_vertex,
+                ExcIndexRange(local_index, 0,
+                              (*dof_handler.finite_elements)[fe_index].dofs_per_vertex));
+        Assert (fe_index < dof_handler.finite_elements->size(),
+                ExcInternalError());
+        Assert (dof_handler.vertex_dofs_offsets[vertex_index] !=
+                numbers::invalid_dof_index,
+                ExcMessage ("This vertex is unused and has no DoFs associated with it"));
+
+        // hop along the list of index
+        // sets until we find the one
+        // with the correct fe_index, and
+        // then poke into that
+        // part. trigger an exception if
+        // we can't find a set for this
+        // particular fe_index
+        const types::global_dof_index starting_offset = dof_handler.vertex_dofs_offsets[vertex_index];
+        types::global_dof_index *pointer = &dof_handler.vertex_dofs[starting_offset];
+        while (true)
+          {
+            Assert (pointer <= &dof_handler.vertex_dofs.back(), ExcInternalError());
+
+            // a fe index is always small
+            Assert((*pointer)<std::numeric_limits<unsigned int>::max(), ExcInternalError());
+            const types::global_dof_index this_fe_index = *pointer;
+
+            Assert (this_fe_index != numbers::invalid_dof_index,
+                    ExcInternalError());
+            Assert (this_fe_index < dof_handler.finite_elements->size(),
+                    ExcInternalError());
+
+            if (this_fe_index == fe_index)
+              {
+                *(pointer + 1 + local_index) = global_index;
+                return;
+              }
+            else
+              pointer += static_cast<types::global_dof_index>(
+                           (*dof_handler.finite_elements)[this_fe_index].dofs_per_vertex + 1);
+          }
+      }
+
+
+      /**
+       * Get the @p local_index-th degree of freedom corresponding to the
+       * finite element specified by @p fe_index on the vertex with global
+       * number @p vertex_index to @p global_index.
+       */
+
+      template <int dim, int spacedim>
+      static
+      types::global_dof_index
+      get_vertex_dof_index (const dealii::DoFHandler<dim,spacedim> &dof_handler,
+                            const unsigned int vertex_index,
+                            const unsigned int fe_index,
+                            const unsigned int local_index)
+      {
+        (void)fe_index;
+        Assert ((fe_index == dealii::DoFHandler<dim,spacedim>::default_fe_index),
+                ExcMessage ("Only the default FE index is allowed for non-hp DoFHandler objects"));
+        Assert (dof_handler.selected_fe != 0,
+                ExcMessage ("No finite element collection is associated with "
+                            "this DoFHandler"));
+        Assert (local_index < dof_handler.selected_fe->dofs_per_vertex,
+                ExcIndexRange(local_index, 0,
+                              dof_handler.selected_fe->dofs_per_vertex));
+
+        return
+          dof_handler.vertex_dofs[vertex_index *
+                                  dof_handler.selected_fe->dofs_per_vertex
+                                  + local_index];
+      }
+
+
+      template<int dim, int spacedim>
+      static
+      types::global_dof_index
+      get_vertex_dof_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                            const unsigned int vertex_index,
+                            const unsigned int fe_index,
+                            const unsigned int local_index)
+      {
+        Assert ( (fe_index != dealii::hp::DoFHandler<dim,spacedim>::default_fe_index),
+                 ExcMessage ("You need to specify a FE index when working "
+                             "with hp DoFHandlers"));
+        Assert (dof_handler.finite_elements != 0,
+                ExcMessage ("No finite element collection is associated with "
+                            "this DoFHandler"));
+        Assert (local_index < (*dof_handler.finite_elements)[fe_index].dofs_per_vertex,
+                ExcIndexRange(local_index, 0,
+                              (*dof_handler.finite_elements)[fe_index].dofs_per_vertex));
+        Assert (vertex_index < dof_handler.vertex_dofs_offsets.size(),
+                ExcIndexRange (vertex_index, 0,
+                               dof_handler.vertex_dofs_offsets.size()));
+        Assert (dof_handler.vertex_dofs_offsets[vertex_index] !=
+                numbers::invalid_dof_index,
+                ExcMessage ("This vertex is unused and has no DoFs associated with it"));
+
+        // hop along the list of index
+        // sets until we find the one
+        // with the correct fe_index, and
+        // then poke into that
+        // part. trigger an exception if
+        // we can't find a set for this
+        // particular fe_index
+        const types::global_dof_index starting_offset = dof_handler.vertex_dofs_offsets[vertex_index];
+        const types::global_dof_index *pointer = &dof_handler.vertex_dofs[starting_offset];
+        while (true)
+          {
+            Assert (pointer <= &dof_handler.vertex_dofs.back(), ExcInternalError());
+
+            Assert((*pointer)<std::numeric_limits<types::global_dof_index>::max(), ExcInternalError());
+            const types::global_dof_index this_fe_index = *pointer;
+
+            Assert (this_fe_index != numbers::invalid_dof_index,
+                    ExcInternalError());
+            Assert (this_fe_index < dof_handler.finite_elements->size(),
+                    ExcInternalError());
+
+            if (this_fe_index == fe_index)
+              return *(pointer + 1 + local_index);
+            else
+              pointer += static_cast<types::global_dof_index>(
+                           (*dof_handler.finite_elements)[this_fe_index].dofs_per_vertex + 1);
+          }
+      }
+
+
+      /**
+       * Return the number of different finite elements that are active on a
+       * given vertex.
+       */
+      template<int dim, int spacedim>
+      static
+      unsigned int
+      n_active_vertex_fe_indices (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                                  const unsigned int vertex_index)
+      {
+        Assert (dof_handler.finite_elements != 0,
+                ExcMessage ("No finite element collection is associated with "
+                            "this DoFHandler"));
+
+        // if this vertex is unused, return 0
+        if (dof_handler.vertex_dofs_offsets[vertex_index] == numbers::invalid_dof_index)
+          return 0;
+
+        // hop along the list of index
+        // sets and count the number of
+        // hops
+        const types::global_dof_index starting_offset = dof_handler.vertex_dofs_offsets[vertex_index];
+        const types::global_dof_index *pointer = &dof_handler.vertex_dofs[starting_offset];
+
+        Assert (*pointer != numbers::invalid_dof_index,
+                ExcInternalError());
+
+        unsigned int counter = 0;
+        while (true)
+          {
+            Assert (pointer <= &dof_handler.vertex_dofs.back(), ExcInternalError());
+
+            const types::global_dof_index this_fe_index = *pointer;
+
+            if (this_fe_index == numbers::invalid_dof_index)
+              return counter;
+            else
+              {
+                pointer += static_cast<types::global_dof_index>(
+                             (*dof_handler.finite_elements)[this_fe_index].dofs_per_vertex + 1);
+                ++counter;
+              }
+          }
+      }
+
+
+
+      /**
+       * Return the fe index of the n-th finite element active on a given
+       * vertex.
+       */
+      template<int dim, int spacedim>
+      static
+      unsigned int
+      nth_active_vertex_fe_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                                  const unsigned int vertex_index,
+                                  const unsigned int n)
+      {
+        Assert (dof_handler.finite_elements != 0,
+                ExcMessage ("No finite element collection is associated with "
+                            "this DoFHandler"));
+        Assert (n < n_active_vertex_fe_indices(dof_handler, vertex_index),
+                ExcIndexRange (n, 0, n_active_vertex_fe_indices(dof_handler,
+                                                                vertex_index)));
+        // make sure we don't ask on
+        // unused vertices
+        Assert (dof_handler.vertex_dofs_offsets[vertex_index] !=
+                numbers::invalid_dof_index,
+                ExcInternalError());
+
+        // hop along the list of index
+        // sets and count the number of
+        // hops
+        const types::global_dof_index starting_offset = dof_handler.vertex_dofs_offsets[vertex_index];
+        const types::global_dof_index *pointer = &dof_handler.vertex_dofs[starting_offset];
+
+        Assert (*pointer != numbers::invalid_dof_index,
+                ExcInternalError());
+
+        unsigned int counter = 0;
+        while (true)
+          {
+            Assert (pointer <= &dof_handler.vertex_dofs.back(), ExcInternalError());
+
+            Assert((*pointer)<std::numeric_limits<unsigned int>::max(), ExcInternalError());
+            const types::global_dof_index this_fe_index = *pointer;
+
+            Assert (this_fe_index < dof_handler.finite_elements->size(),
+                    ExcInternalError());
+
+            if (counter == n)
+              return this_fe_index;
+
+            Assert (this_fe_index != numbers::invalid_dof_index,
+                    ExcInternalError());
+
+            pointer += static_cast<types::global_dof_index>(
+                         (*dof_handler.finite_elements)[this_fe_index].dofs_per_vertex + 1);
+            ++counter;
+          }
+      }
+
+
+
+      /**
+       * Return whether a particular finite element index is active on the
+       * specified vertex.
+       */
+      template<int dim, int spacedim>
+      static
+      bool
+      fe_is_active_on_vertex (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                              const unsigned int vertex_index,
+                              const unsigned int fe_index)
+      {
+        Assert ( (fe_index != dealii::hp::DoFHandler<dim,spacedim>::default_fe_index),
+                 ExcMessage ("You need to specify a FE index when working "
+                             "with hp DoFHandlers"));
+        Assert (dof_handler.finite_elements != 0,
+                ExcMessage ("No finite element collection is associated with "
+                            "this DoFHandler"));
+        Assert (fe_index < dof_handler.finite_elements->size(),
+                ExcInternalError());
+
+        // make sure we don't ask on
+        // unused vertices
+        Assert (dof_handler.vertex_dofs_offsets[vertex_index] !=
+                numbers::invalid_dof_index,
+                ExcInternalError());
+
+        // hop along the list of index
+        // sets and see whether we find
+        // the given index
+        const types::global_dof_index starting_offset = dof_handler.vertex_dofs_offsets[vertex_index];
+        const types::global_dof_index *pointer = &dof_handler.vertex_dofs[starting_offset];
+
+        Assert (*pointer != numbers::invalid_dof_index,
+                ExcInternalError());
+
+        while (true)
+          {
+            Assert (pointer <= &dof_handler.vertex_dofs.back(), ExcInternalError());
+
+            Assert((*pointer)<std::numeric_limits<types::global_dof_index>::max(), ExcInternalError());
+            const types::global_dof_index this_fe_index = *pointer;
+
+            Assert (this_fe_index < dof_handler.finite_elements->size(),
+                    ExcInternalError());
+
+            if (this_fe_index == numbers::invalid_dof_index)
+              return false;
+            else if (this_fe_index == fe_index)
+              return true;
+            else
+              pointer += (*dof_handler.finite_elements)[this_fe_index].dofs_per_vertex + 1;
+          }
+      }
+
+      template<typename DoFHandlerType, bool level_dof_access>
+      static
+      void set_mg_dof_indices (const dealii::DoFAccessor<1,DoFHandlerType,level_dof_access> &,
+                               const int,
+                               const std::vector<types::global_dof_index> &,
+                               const unsigned int)
+      {
+        AssertThrow (false, ExcNotImplemented ()); //TODO[TH]: implement
+      }
+
+
+
+      template<typename DoFHandlerType, bool level_dof_access>
+      static
+      void set_mg_dof_indices (dealii::DoFAccessor<2, DoFHandlerType,level_dof_access> &accessor,
+                               const int level,
+                               const std::vector<types::global_dof_index> &dof_indices,
+                               const unsigned int fe_index)
+      {
+        const FiniteElement<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe
+          = accessor.get_dof_handler ().get_fe ()[fe_index];
+        std::vector<types::global_dof_index>::const_iterator next = dof_indices.begin ();
+
+        for (unsigned int vertex = 0; vertex < GeometryInfo<2>::vertices_per_cell; ++vertex)
+          for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+            accessor.set_mg_vertex_dof_index(level, vertex, dof, *next++, fe_index);
+
+        for (unsigned int line = 0; line < GeometryInfo<2>::lines_per_cell; ++line)
+          for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+            accessor.line(line)->set_mg_dof_index(level, dof, *next++);
+
+        for (unsigned int dof = 0; dof < fe.dofs_per_quad; ++dof)
+          accessor.set_mg_dof_index(level, dof, *next++);
+
+        Assert (next == dof_indices.end (), ExcInternalError ());
+      }
+
+
+
+      template<typename DoFHandlerType, bool level_dof_access>
+      static
+      void set_mg_dof_indices
+      (const dealii::DoFAccessor<3, DoFHandlerType,level_dof_access>      &accessor,
+       const int                                   level,
+       const std::vector<types::global_dof_index> &dof_indices,
+       const unsigned int                          fe_index)
+      {
+        const FiniteElement<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe
+          = accessor.get_dof_handler ().get_fe ()[fe_index];
+        std::vector<types::global_dof_index>::const_iterator next = dof_indices.begin ();
+
+        for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_cell; ++vertex)
+          for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+            accessor.set_mg_vertex_dof_index(level, vertex, dof, *next++, fe_index);
+
+        for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_cell; ++line)
+          for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+            accessor.line(line)->set_mg_dof_index(level, dof, *next++);
+
+        for (unsigned int quad = 0; quad < GeometryInfo<3>::quads_per_cell; ++quad)
+          for (unsigned int dof = 0; dof < fe.dofs_per_quad; ++dof)
+            accessor.quad(quad)->set_mg_dof_index(level, dof, *next++);
+
+        for (unsigned int dof = 0; dof < fe.dofs_per_hex; ++dof)
+          accessor.set_mg_dof_index(level, dof, *next++);
+
+        Assert (next == dof_indices.end (), ExcInternalError ());
+      }
+
+    };
+  }
+}
+
+
+
+template <int dim, typename DoFHandlerType, bool level_dof_access>
+inline
+types::global_dof_index
+DoFAccessor<dim,DoFHandlerType,level_dof_access>::dof_index (const unsigned int i,
+    const unsigned int fe_index) const
+{
+  // access the respective DoF
+  return dealii::internal::DoFAccessor::Implementation::get_dof_index (*this->dof_handler,
+         this->level(),
+         this->present_index,
+         fe_index,
+         i,
+         dealii::internal::int2type<dim>());
+}
+
+
+template<int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+types::global_dof_index
+DoFAccessor<structdim, DoFHandlerType,level_dof_access>::mg_dof_index (const int level,
+    const unsigned int i) const
+{
+  return this->dof_handler->template get_dof_index<structdim> (level, this->present_index, 0, i);
+}
+
+
+template <int dim, typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFAccessor<dim,DoFHandlerType,level_dof_access>::set_dof_index (const unsigned int i,
+    const types::global_dof_index index,
+    const unsigned int fe_index) const
+{
+  // access the respective DoF
+  dealii::internal::DoFAccessor::Implementation::set_dof_index (*this->dof_handler,
+      this->level(),
+      this->present_index,
+      fe_index,
+      i,
+      dealii::internal::int2type<dim>(),
+      index);
+}
+
+
+
+template <int dim, typename DoFHandlerType, bool level_dof_access>
+inline
+unsigned int
+DoFAccessor<dim,DoFHandlerType,level_dof_access>::n_active_fe_indices () const
+{
+  // access the respective DoF
+  return
+    dealii::internal::DoFAccessor::Implementation::
+    n_active_fe_indices (*this->dof_handler,
+                         this->level(),
+                         this->present_index,
+                         dealii::internal::int2type<dim>());
+}
+
+
+
+template <int dim, typename DoFHandlerType, bool level_dof_access>
+inline
+unsigned int
+DoFAccessor<dim,DoFHandlerType,level_dof_access>::nth_active_fe_index (const unsigned int n) const
+{
+  // access the respective DoF
+  return
+    dealii::internal::DoFAccessor::Implementation::
+    nth_active_fe_index (*this->dof_handler,
+                         this->level(),
+                         this->present_index,
+                         n,
+                         dealii::internal::int2type<dim>());
+}
+
+
+
+template <int dim, typename DoFHandlerType, bool level_dof_access>
+inline
+bool
+DoFAccessor<dim,DoFHandlerType,level_dof_access>::fe_index_is_active (const unsigned int fe_index) const
+{
+  // access the respective DoF
+  return
+    dealii::internal::DoFAccessor::Implementation::
+    fe_index_is_active (*this->dof_handler,
+                        this->level(),
+                        this->present_index,
+                        fe_index,
+                        dealii::internal::int2type<dim>());
+}
+
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+types::global_dof_index
+DoFAccessor<structdim, DoFHandlerType,level_dof_access>::vertex_dof_index
+(const unsigned int vertex,
+ const unsigned int i,
+ const unsigned int fe_index) const
+{
+  return
+    dealii::internal::DoFAccessor::Implementation::get_vertex_dof_index
+    (*this->dof_handler,
+     this->vertex_index(vertex),
+     fe_index,
+     i);
+}
+
+
+template<int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+types::global_dof_index
+DoFAccessor<structdim, DoFHandlerType,level_dof_access>::mg_vertex_dof_index (const int level,
+    const unsigned int vertex,
+    const unsigned int i,
+    const unsigned int fe_index) const
+{
+  (void)fe_index;
+  Assert (this->dof_handler != 0, ExcInvalidObject ());
+  Assert (vertex < GeometryInfo<structdim>::vertices_per_cell, ExcIndexRange (vertex, 0, GeometryInfo<structdim>::vertices_per_cell));
+  Assert (i < this->dof_handler->get_fe ()[fe_index].dofs_per_vertex, ExcIndexRange (i, 0, this->dof_handler->get_fe ()[fe_index].dofs_per_vertex));
+  return this->dof_handler->mg_vertex_dofs[this->vertex_index (vertex)].get_index (level, i);
+}
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFAccessor<structdim, DoFHandlerType,level_dof_access>::set_vertex_dof_index (const unsigned int vertex,
+    const unsigned int i,
+    const types::global_dof_index index,
+    const unsigned int fe_index) const
+{
+  dealii::internal::DoFAccessor::Implementation::set_vertex_dof_index
+  (*this->dof_handler,
+   this->vertex_index(vertex),
+   fe_index,
+   i,
+   index);
+}
+
+
+template<int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFAccessor<structdim, DoFHandlerType,level_dof_access>::set_mg_vertex_dof_index
+(const int                     level,
+ const unsigned int            vertex,
+ const unsigned int            i,
+ const types::global_dof_index index,
+ const unsigned int            fe_index) const
+{
+  (void)fe_index;
+  Assert (this->dof_handler != 0, ExcInvalidObject ());
+  Assert (vertex < GeometryInfo<structdim>::vertices_per_cell, ExcIndexRange (vertex, 0, GeometryInfo<structdim>::vertices_per_cell));
+  Assert (i < this->dof_handler->get_fe ()[fe_index].dofs_per_vertex, ExcIndexRange (i, 0, this->dof_handler->get_fe ()[fe_index].dofs_per_vertex));
+  this->dof_handler->mg_vertex_dofs[this->vertex_index (vertex)].set_index (level, i, index);
+}
+
+
+template<int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFAccessor<structdim, DoFHandlerType,level_dof_access>::set_mg_dof_index
+(const int                     level,
+ const unsigned int            i,
+ const types::global_dof_index index) const
+{
+  this->dof_handler->template set_dof_index<structdim> (level, this->present_index, 0, i, index);
+}
+
+
+namespace internal
+{
+  namespace DoFAccessor
+  {
+    template <int dim, int spacedim>
+    inline
+    const FiniteElement<dim,spacedim> &
+    get_fe (const FiniteElement<dim,spacedim> &fe,
+            const unsigned int)
+    {
+      return fe;
+    }
+
+
+
+    template <int dim, int spacedim>
+    inline
+    const FiniteElement<dim,spacedim> &
+    get_fe (const dealii::hp::FECollection<dim,spacedim> &fe,
+            const unsigned int                            index)
+    {
+      return fe[index];
+    }
+  }
+}
+
+
+template <int dim, typename DoFHandlerType, bool level_dof_access>
+inline
+const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &
+DoFAccessor<dim,DoFHandlerType,level_dof_access>::get_fe (const unsigned int fe_index) const
+{
+  Assert (fe_index_is_active (fe_index) == true,
+          ExcMessage ("This function can only be called for active fe indices"));
+
+  return dealii::internal::DoFAccessor::get_fe (this->dof_handler->get_fe(), fe_index);
+}
+
+
+
+namespace internal
+{
+  namespace DoFAccessor
+  {
+    template <typename DoFHandlerType, bool level_dof_access>
+    void get_dof_indices (const dealii::DoFAccessor<1,DoFHandlerType,level_dof_access> &accessor,
+                          std::vector<types::global_dof_index>                         &dof_indices,
+                          const unsigned int                                            fe_index)
+    {
+      const unsigned int dofs_per_vertex = accessor.get_fe(fe_index).dofs_per_vertex,
+                         dofs_per_line   = accessor.get_fe(fe_index).dofs_per_line;
+      std::vector<types::global_dof_index>::iterator next = dof_indices.begin();
+      for (unsigned int vertex=0; vertex<2; ++vertex)
+        for (unsigned int d=0; d<dofs_per_vertex; ++d)
+          *next++ = accessor.vertex_dof_index(vertex,d,fe_index);
+      for (unsigned int d=0; d<dofs_per_line; ++d)
+        *next++ = accessor.dof_index(d,fe_index);
+    }
+
+
+
+    template <typename DoFHandlerType, bool level_dof_access>
+    void get_dof_indices (const dealii::DoFAccessor<2,DoFHandlerType,level_dof_access> &accessor,
+                          std::vector<types::global_dof_index>                         &dof_indices,
+                          const unsigned int                                            fe_index)
+    {
+      const unsigned int dofs_per_vertex = accessor.get_fe(fe_index).dofs_per_vertex,
+                         dofs_per_line   = accessor.get_fe(fe_index).dofs_per_line,
+                         dofs_per_quad   = accessor.get_fe(fe_index).dofs_per_quad;
+      std::vector<types::global_dof_index>::iterator next = dof_indices.begin();
+      for (unsigned int vertex=0; vertex<4; ++vertex)
+        for (unsigned int d=0; d<dofs_per_vertex; ++d)
+          *next++ = accessor.vertex_dof_index(vertex,d,fe_index);
+      // now copy dof numbers from the line. for
+      // lines with the wrong orientation (which
+      // might occur in 3d), we have already made
+      // sure that we're ok by picking the correct
+      // vertices (this happens automatically in
+      // the vertex() function). however, if the
+      // line is in wrong orientation, we look at
+      // it in flipped orientation and we will have
+      // to adjust the shape function indices that
+      // we see to correspond to the correct
+      // (face-local) ordering.
+      for (unsigned int line=0; line<4; ++line)
+        for (unsigned int d=0; d<dofs_per_line; ++d)
+          *next++ = accessor.line(line)->dof_index(accessor.get_fe(fe_index).
+                                                   adjust_line_dof_index_for_line_orientation(d,
+                                                       accessor.line_orientation(line)),
+                                                   fe_index);
+      for (unsigned int d=0; d<dofs_per_quad; ++d)
+        *next++ = accessor.dof_index(d,fe_index);
+    }
+
+
+
+    template <typename DoFHandlerType, bool level_dof_access>
+    void get_dof_indices (const dealii::DoFAccessor<3,DoFHandlerType,level_dof_access> &accessor,
+                          std::vector<types::global_dof_index>                         &dof_indices,
+                          const unsigned int                                            fe_index)
+    {
+      const unsigned int dofs_per_vertex = accessor.get_fe(fe_index).dofs_per_vertex,
+                         dofs_per_line   = accessor.get_fe(fe_index).dofs_per_line,
+                         dofs_per_quad   = accessor.get_fe(fe_index).dofs_per_quad,
+                         dofs_per_hex    = accessor.get_fe(fe_index).dofs_per_hex;
+      std::vector<types::global_dof_index>::iterator next = dof_indices.begin();
+      for (unsigned int vertex=0; vertex<8; ++vertex)
+        for (unsigned int d=0; d<dofs_per_vertex; ++d)
+          *next++ = accessor.vertex_dof_index(vertex,d,fe_index);
+      // now copy dof numbers from the line. for
+      // lines with the wrong orientation, we have
+      // already made sure that we're ok by picking
+      // the correct vertices (this happens
+      // automatically in the vertex()
+      // function). however, if the line is in
+      // wrong orientation, we look at it in
+      // flipped orientation and we will have to
+      // adjust the shape function indices that we
+      // see to correspond to the correct
+      // (cell-local) ordering.
+      for (unsigned int line=0; line<12; ++line)
+        for (unsigned int d=0; d<dofs_per_line; ++d)
+          *next++ = accessor.line(line)->dof_index(accessor.get_fe(fe_index).
+                                                   adjust_line_dof_index_for_line_orientation(d,
+                                                       accessor.line_orientation(line)),fe_index);
+      // now copy dof numbers from the face. for
+      // faces with the wrong orientation, we
+      // have already made sure that we're ok by
+      // picking the correct lines and vertices
+      // (this happens automatically in the
+      // line() and vertex() functions). however,
+      // if the face is in wrong orientation, we
+      // look at it in flipped orientation and we
+      // will have to adjust the shape function
+      // indices that we see to correspond to the
+      // correct (cell-local) ordering. The same
+      // applies, if the face_rotation or
+      // face_orientation is non-standard
+      for (unsigned int quad=0; quad<6; ++quad)
+        for (unsigned int d=0; d<dofs_per_quad; ++d)
+          *next++ = accessor.quad(quad)->dof_index(accessor.get_fe(fe_index).
+                                                   adjust_quad_dof_index_for_face_orientation(d,
+                                                       accessor.face_orientation(quad),
+                                                       accessor.face_flip(quad),
+                                                       accessor.face_rotation(quad)),
+                                                   fe_index);
+      for (unsigned int d=0; d<dofs_per_hex; ++d)
+        *next++ = accessor.dof_index(d,fe_index);
+    }
+
+
+
+    template<typename DoFHandlerType, bool level_dof_access>
+    void get_mg_dof_indices
+    (const dealii::DoFAccessor<1, DoFHandlerType,level_dof_access> &accessor,
+     const int                                                      level,
+     std::vector<types::global_dof_index>                          &dof_indices,
+     const unsigned int                                             fe_index)
+    {
+      const DoFHandlerType &handler = accessor.get_dof_handler();
+      Assert(handler.n_dofs(level) != numbers::invalid_dof_index,
+             ExcNotInitialized());
+
+      const FiniteElement<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe
+        = handler.get_fe ()[fe_index];
+      std::vector<types::global_dof_index>::iterator next = dof_indices.begin ();
+
+      for (unsigned int vertex = 0; vertex < GeometryInfo<1>::vertices_per_cell; ++vertex)
+        for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+          *next++ = accessor.mg_vertex_dof_index (level, vertex, dof);
+
+      for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+        *next++ = accessor.mg_dof_index (level, dof);
+
+      Assert (next == dof_indices.end (), ExcInternalError ());
+    }
+
+
+
+    template<typename DoFHandlerType, bool level_dof_access>
+    void get_mg_dof_indices (const dealii::DoFAccessor<2, DoFHandlerType,level_dof_access> &accessor,
+                             const int level,
+                             std::vector<types::global_dof_index> &dof_indices,
+                             const unsigned int fe_index)
+    {
+      const DoFHandlerType &handler = accessor.get_dof_handler();
+      Assert(handler.n_dofs(level) != numbers::invalid_dof_index,
+             ExcNotInitialized());
+
+      const FiniteElement<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe
+        = handler.get_fe ()[fe_index];
+      std::vector<types::global_dof_index>::iterator next = dof_indices.begin ();
+
+      for (unsigned int vertex = 0; vertex < GeometryInfo<2>::vertices_per_cell; ++vertex)
+        for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+          *next++ = accessor.mg_vertex_dof_index (level, vertex, dof);
+
+      for (unsigned int line = 0; line < GeometryInfo<2>::lines_per_cell; ++line)
+        for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+          *next++ = accessor.line (line)->mg_dof_index (level, dof);
+
+      for (unsigned int dof = 0; dof < fe.dofs_per_quad; ++dof)
+        *next++ = accessor.mg_dof_index (level, dof);
+
+      Assert (next == dof_indices.end (), ExcInternalError ());
+    }
+
+
+
+    template<typename DoFHandlerType, bool level_dof_access>
+    void get_mg_dof_indices
+    (const dealii::DoFAccessor<3, DoFHandlerType,level_dof_access> &accessor,
+     const int                                                      level,
+     std::vector<types::global_dof_index>                          &dof_indices,
+     const unsigned int                                             fe_index)
+    {
+      const DoFHandlerType &handler = accessor.get_dof_handler();
+      Assert(handler.n_dofs(level) != numbers::invalid_dof_index,
+             ExcNotInitialized());
+
+      const FiniteElement<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe
+        = handler.get_fe ()[fe_index];
+      std::vector<types::global_dof_index>::iterator next = dof_indices.begin ();
+
+      for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_cell; ++vertex)
+        for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+          *next++ = accessor.mg_vertex_dof_index (level, vertex, dof);
+
+      for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_cell; ++line)
+        for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+          *next++ = accessor.line (line)->mg_dof_index (level, dof);
+
+      for (unsigned int quad = 0; quad < GeometryInfo<3>::quads_per_cell; ++quad)
+        for (unsigned int dof = 0; dof < fe.dofs_per_quad; ++dof)
+          *next++ = accessor.quad (quad)->mg_dof_index (level, dof);
+
+      for (unsigned int dof = 0; dof < fe.dofs_per_hex; ++dof)
+        *next++ = accessor.mg_dof_index (level, dof);
+
+      Assert (next == dof_indices.end (), ExcInternalError ());
+    }
+
+
+  }
+}
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::get_dof_indices
+(std::vector<types::global_dof_index> &dof_indices,
+ const unsigned int                    fe_index) const
+{
+  Assert (this->dof_handler != 0, ExcNotInitialized());
+  Assert (static_cast<unsigned int>(this->level()) < this->dof_handler->levels.size(),
+          ExcMessage ("DoFHandler not initialized"));
+
+  switch (structdim)
+    {
+    case 1:
+      Assert (dof_indices.size() ==
+              (2*this->dof_handler->get_fe()[fe_index].dofs_per_vertex +
+               this->dof_handler->get_fe()[fe_index].dofs_per_line),
+              ExcVectorDoesNotMatch());
+      break;
+    case 2:
+      Assert (dof_indices.size() ==
+              (4*this->dof_handler->get_fe()[fe_index].dofs_per_vertex +
+               4*this->dof_handler->get_fe()[fe_index].dofs_per_line +
+               this->dof_handler->get_fe()[fe_index].dofs_per_quad),
+              ExcVectorDoesNotMatch());
+      break;
+    case 3:
+      Assert (dof_indices.size() ==
+              (8*this->dof_handler->get_fe()[fe_index].dofs_per_vertex +
+               12*this->dof_handler->get_fe()[fe_index].dofs_per_line +
+               6*this->dof_handler->get_fe()[fe_index].dofs_per_quad +
+               this->dof_handler->get_fe()[fe_index].dofs_per_hex),
+              ExcVectorDoesNotMatch());
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+
+
+  // this function really only makes
+  // sense if either a) there are
+  // degrees of freedom defined on
+  // the present object, or b) the
+  // object is non-active objects but
+  // all degrees of freedom are
+  // located on vertices, since
+  // otherwise there are degrees of
+  // freedom on sub-objects which are
+  // not allocated for this
+  // non-active thing
+  Assert (this->fe_index_is_active (fe_index)
+          ||
+          (this->dof_handler->get_fe()[fe_index].dofs_per_cell ==
+           GeometryInfo<structdim>::vertices_per_cell *
+           this->dof_handler->get_fe()[fe_index].dofs_per_vertex),
+          ExcInternalError());
+
+  // now do the actual work
+  dealii::internal::DoFAccessor::get_dof_indices (*this, dof_indices, fe_index);
+}
+
+
+
+template<int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void DoFAccessor<structdim, DoFHandlerType,level_dof_access>::get_mg_dof_indices
+(const int                             level,
+ std::vector<types::global_dof_index> &dof_indices,
+ const unsigned int                    fe_index) const
+{
+  Assert (this->dof_handler != 0, ExcInvalidObject ());
+
+  switch (structdim)
+    {
+    case 1:
+    {
+      Assert (dof_indices.size () ==
+              2 * this->dof_handler->get_fe ()[fe_index].dofs_per_vertex +
+              this->dof_handler->get_fe ()[fe_index].dofs_per_line,
+              ExcVectorDoesNotMatch ());
+      break;
+    }
+
+    case 2:
+    {
+      Assert (dof_indices.size () ==
+              4 * (this->dof_handler->get_fe ()[fe_index].dofs_per_vertex +
+                   this->dof_handler->get_fe ()[fe_index].dofs_per_line) +
+              this->dof_handler->get_fe ()[fe_index].dofs_per_quad,
+              ExcVectorDoesNotMatch ());
+      break;
+    }
+
+    case 3:
+    {
+      Assert (dof_indices.size () ==
+              8 * this->dof_handler->get_fe ()[fe_index].dofs_per_vertex +
+              12 * this->dof_handler->get_fe ()[fe_index].dofs_per_line +
+              6 * this->dof_handler->get_fe ()[fe_index].dofs_per_quad +
+              this->dof_handler->get_fe ()[fe_index].dofs_per_hex,
+              ExcVectorDoesNotMatch ());
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+
+  internal::DoFAccessor::get_mg_dof_indices (*this,
+                                             level,
+                                             dof_indices,
+                                             fe_index);
+}
+
+
+template<int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+void DoFAccessor<structdim, DoFHandlerType,level_dof_access>::set_mg_dof_indices
+(const int                                   level,
+ const std::vector<types::global_dof_index> &dof_indices,
+ const unsigned int                          fe_index)
+{
+  Assert (this->dof_handler != 0, ExcInvalidObject ());
+
+  switch (structdim)
+    {
+    case 1:
+    {
+      Assert (dof_indices.size () ==
+              2 * this->dof_handler->get_fe ()[fe_index].dofs_per_vertex +
+              this->dof_handler->get_fe ()[fe_index].dofs_per_line,
+              ExcVectorDoesNotMatch ());
+      break;
+    }
+
+    case 2:
+    {
+      Assert (dof_indices.size () ==
+              4 * (this->dof_handler->get_fe ()[fe_index].dofs_per_vertex +
+                   this->dof_handler->get_fe ()[fe_index].dofs_per_line) +
+              this->dof_handler->get_fe ()[fe_index].dofs_per_quad,
+              ExcVectorDoesNotMatch ());
+      break;
+    }
+
+    case 3:
+    {
+      Assert (dof_indices.size () ==
+              8 * this->dof_handler->get_fe ()[fe_index].dofs_per_vertex +
+              12 * this->dof_handler->get_fe ()[fe_index].dofs_per_line +
+              6 * this->dof_handler->get_fe ()[fe_index].dofs_per_quad +
+              this->dof_handler->get_fe ()[fe_index].dofs_per_hex,
+              ExcVectorDoesNotMatch ());
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+
+  internal::DoFAccessor::Implementation::set_mg_dof_indices (*this,
+                                                             level,
+                                                             dof_indices,
+                                                             fe_index);
+}
+
+
+namespace internal
+{
+  namespace DoFAccessor
+  {
+    template <bool level_dof_access, typename DoFHandlerType>
+    inline
+    typename dealii::internal::DoFHandler::Iterators<DoFHandlerType, level_dof_access>::quad_iterator
+    get_quad(const dealii::Triangulation<DoFHandlerType::dimension, DoFHandlerType::space_dimension> *,
+             unsigned int /*index*/,
+             DoFHandlerType *)
+    {
+    }
+
+
+    template<bool level_dof_access>
+    inline
+    typename dealii::internal::DoFHandler::Iterators<dealii::DoFHandler<2,2>, level_dof_access>::quad_iterator
+    get_quad(const dealii::Triangulation<2,2> *,
+             unsigned int,
+             dealii::DoFHandler<2,2> *)
+    {
+      Assert(false, ExcNotImplemented());
+      return typename dealii::internal::DoFHandler::Iterators<dealii::DoFHandler<2,2>, level_dof_access>::line_iterator();
+    }
+
+    template<bool level_dof_access>
+    inline
+    typename dealii::internal::DoFHandler::Iterators<dealii::DoFHandler<2,3>, level_dof_access>::quad_iterator
+    get_quad(const dealii::Triangulation<2,3> *,
+             unsigned int,
+             dealii::DoFHandler<2,3> *)
+    {
+      Assert(false, ExcNotImplemented());
+      return typename dealii::internal::DoFHandler::Iterators<dealii::DoFHandler<2,3>, level_dof_access>::line_iterator();
+    }
+
+    template<bool level_dof_access>
+    inline
+    typename dealii::internal::DoFHandler::Iterators<dealii::hp::DoFHandler<2,2>, level_dof_access>::quad_iterator
+    get_quad(const dealii::Triangulation<2,2> *,
+             unsigned int,
+             dealii::hp::DoFHandler<2,2> *)
+    {
+      Assert(false, ExcNotImplemented());
+      return typename dealii::internal::DoFHandler::Iterators<dealii::hp::DoFHandler<2,2>, level_dof_access>::line_iterator();
+    }
+
+    template<bool level_dof_access>
+    inline
+    typename dealii::internal::DoFHandler::Iterators<dealii::hp::DoFHandler<2,3>, level_dof_access>::quad_iterator
+    get_quad(const dealii::Triangulation<2,3> *,
+             unsigned int,
+             dealii::hp::DoFHandler<2,3> *)
+    {
+      Assert(false, ExcNotImplemented());
+      return typename dealii::internal::DoFHandler::Iterators<dealii::hp::DoFHandler<2,3>, level_dof_access>::line_iterator();
+    }
+  }
+}
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+typename dealii::internal::DoFHandler::Iterators<DoFHandlerType,level_dof_access>::line_iterator
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::line (const unsigned int i) const
+{
+  // if we are asking for a particular line and this object refers to
+  // a line, then the only valid index is i==0 and we should return
+  // *this
+  if (structdim == 1)
+    {
+      Assert (i==0, ExcMessage ("You can only ask for line zero if the "
+                                "current object is a line itself."));
+      return
+        typename dealii::internal::DoFHandler::Iterators<DoFHandlerType,level_dof_access>::cell_iterator
+        (&this->get_triangulation(),
+         this->level(),
+         this->index(),
+         &this->get_dof_handler());
+    }
+
+  // otherwise we need to be in structdim>=2
+  Assert (structdim > 1, ExcImpossibleInDim(structdim));
+  Assert (DoFHandlerType::dimension > 1, ExcImpossibleInDim(DoFHandlerType::dimension));
+
+  // checking of 'i' happens in line_index(i)
+  return typename dealii::internal::DoFHandler::Iterators<DoFHandlerType,level_dof_access>::line_iterator
+         (this->tria,
+          0,  // only sub-objects are allowed, which have no level
+          this->line_index(i),
+          this->dof_handler);
+}
+
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+inline
+typename dealii::internal::DoFHandler::Iterators<DoFHandlerType,level_dof_access>::quad_iterator
+DoFAccessor<structdim,DoFHandlerType,level_dof_access>::quad (const unsigned int i) const
+{
+  // if we are asking for a
+  // particular quad and this object
+  // refers to a quad, then the only
+  // valid index is i==0 and we
+  // should return *this
+  if (structdim == 2)
+    {
+      Assert (i==0, ExcMessage ("You can only ask for quad zero if the "
+                                "current object is a quad itself."));
+      return
+        typename dealii::internal::DoFHandler::Iterators<DoFHandlerType>::cell_iterator
+        (&this->get_triangulation(),
+         this->level(),
+         this->index(),
+         &this->get_dof_handler());
+    }
+
+  // otherwise we need to be in structdim>=3
+  Assert (structdim > 2, ExcImpossibleInDim(structdim));
+  Assert (DoFHandlerType::dimension > 2, ExcImpossibleInDim(DoFHandlerType::dimension));
+
+  // checking of 'i' happens in quad_index(i)
+  return typename dealii::internal::DoFHandler::Iterators<DoFHandlerType,level_dof_access>::quad_iterator
+         (this->tria,
+          0,  // only sub-objects are allowed, which have no level
+          this->quad_index(i),
+          this->dof_handler);
+}
+
+
+/*------------------------- Functions: DoFAccessor<0,1,spacedim> ---------------------------*/
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::DoFAccessor ()
+{
+  Assert (false, ExcInvalidObject());
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::
+DoFAccessor (const Triangulation<1,spacedim>                       *tria,
+             const typename TriaAccessor<0,1,spacedim>::VertexKind  vertex_kind,
+             const unsigned int                                     vertex_index,
+             const DoFHandlerType<1,spacedim>                      *dof_handler)
+  :
+  BaseClass (tria,
+             vertex_kind,
+             vertex_index),
+  dof_handler(const_cast<DoFHandlerType<1,spacedim>*>(dof_handler))
+{}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::
+DoFAccessor (const Triangulation<1,spacedim> *,
+             const int                 ,
+             const int                 ,
+             const DoFHandlerType<1,spacedim> *)
+  :
+  dof_handler(0)
+{
+  Assert (false,
+          ExcMessage ("This constructor can not be called for face iterators in 1d."));
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+template <int structdim2, int dim2, int spacedim2>
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::DoFAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &)
+{
+  Assert (false, ExcInvalidObject());
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+inline
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::DoFAccessor
+(const DoFAccessor<dim2, DoFHandlerType2, level_dof_access2> &)
+{
+  Assert (false, ExcInvalidObject());
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+void
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::set_dof_handler
+(DoFHandlerType<1,spacedim> *dh)
+{
+  Assert (dh != 0, ExcInvalidObject());
+  this->dof_handler = dh;
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+const DoFHandlerType<1,spacedim> &
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::get_dof_handler () const
+{
+  return *this->dof_handler;
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+void
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::get_dof_indices
+(std::vector<types::global_dof_index> &dof_indices,
+ const unsigned int                    fe_index) const
+{
+  for (unsigned int i=0; i<dof_indices.size(); ++i)
+    dof_indices[i]
+      = dealii::internal::DoFAccessor::Implementation::get_vertex_dof_index (
+          *dof_handler,
+          this->global_vertex_index,
+          fe_index,
+          i);
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+types::global_dof_index
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::
+vertex_dof_index (const unsigned int vertex,
+                  const unsigned int i,
+                  const unsigned int fe_index) const
+{
+  (void)vertex;
+  Assert (vertex == 0, ExcIndexRange (vertex, 0, 1));
+  return dealii::internal::DoFAccessor::Implementation::get_vertex_dof_index (
+           *dof_handler,
+           this->global_vertex_index,
+           fe_index,
+           i);
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+void
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::copy_from
+(const TriaAccessorBase<0,1,spacedim> &da)
+{
+  Assert (this->dof_handler != 0, ExcInvalidObject());
+  BaseClass::copy_from(da);
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+template <bool level_dof_access2>
+inline
+void
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::copy_from
+(const DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access2> &a)
+{
+  BaseClass::copy_from (a);
+  set_dof_handler (a.dof_handler);
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+inline
+bool
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::operator ==
+(const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &a) const
+{
+  Assert (dim2 == 0, ExcCantCompareIterators());
+  Assert (this->dof_handler == a.dof_handler, ExcCantCompareIterators());
+  return (BaseClass::operator == (a));
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+inline
+bool
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::operator !=
+(const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &a) const
+{
+  Assert (dim2 == 0, ExcCantCompareIterators());
+  Assert (this->dof_handler == a.dof_handler, ExcCantCompareIterators());
+  return (BaseClass::operator != (a));
+}
+
+
+
+template <template <int, int> class DoFHandlerType, int spacedim, bool level_dof_access>
+inline
+TriaIterator<DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access > >
+DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access>::child (const unsigned int /*i*/) const
+{
+  return TriaIterator<DoFAccessor<0,DoFHandlerType<1,spacedim>, level_dof_access > >();
+}
+
+
+
+
+
+/*------------------------- Functions: DoFCellAccessor -----------------------*/
+
+
+namespace internal
+{
+  namespace DoFCellAccessor
+  {
+    // make sure we refer to class
+    // dealii::DoFCellAccessor, not
+    // namespace
+    // dealii::internal::DoFCellAccessor
+    using dealii::DoFCellAccessor;
+    using dealii::DoFHandler;
+
+    /**
+     * A class with the same purpose as the similarly named class of the
+     * Triangulation class. See there for more information.
+     */
+    struct Implementation
+    {
+      /**
+       * Implement the updating of the cache. Currently not implemented for
+       * hp::DoFHandler objects.
+       */
+      template <int spacedim, bool level_dof_access>
+      static
+      void
+      update_cell_dof_indices_cache (const DoFCellAccessor<DoFHandler<1,spacedim>, level_dof_access> &accessor)
+      {
+        // check as in documentation that
+        // cell is either active, or dofs
+        // are only in vertices. otherwise
+        // simply don't update the cache at
+        // all. the get_dof_indices
+        // function will then make sure we
+        // don't access the invalid data
+        if (accessor.has_children()
+            &&
+            (accessor.get_fe().dofs_per_cell !=
+             accessor.get_fe().dofs_per_vertex * GeometryInfo<1>::vertices_per_cell))
+          return;
+
+        const unsigned int dofs_per_vertex = accessor.get_fe().dofs_per_vertex,
+                           dofs_per_line   = accessor.get_fe().dofs_per_line,
+                           dofs_per_cell   = accessor.get_fe().dofs_per_cell;
+
+        // make sure the cache is at least
+        // as big as we need it when
+        // writing to the last element of
+        // this cell
+        Assert (accessor.present_index * dofs_per_cell + dofs_per_cell
+                <=
+                accessor.dof_handler->levels[accessor.present_level]
+                ->cell_dof_indices_cache.size(),
+                ExcInternalError());
+
+        std::vector<types::global_dof_index>::iterator next
+          = (accessor.dof_handler->levels[accessor.present_level]
+             ->cell_dof_indices_cache.begin() + accessor.present_index * dofs_per_cell);
+
+        for (unsigned int vertex=0; vertex<2; ++vertex)
+          for (unsigned int d=0; d<dofs_per_vertex; ++d)
+            *next++ = accessor.vertex_dof_index(vertex,d);
+        for (unsigned int d=0; d<dofs_per_line; ++d)
+          *next++ = accessor.dof_index(d);
+      }
+
+
+
+      template <int spacedim, bool level_dof_access>
+      static
+      void
+      update_cell_dof_indices_cache (const DoFCellAccessor<DoFHandler<2,spacedim>, level_dof_access> &accessor)
+      {
+        // check as in documentation that
+        // cell is either active, or dofs
+        // are only in vertices. otherwise
+        // simply don't update the cache at
+        // all. the get_dof_indices
+        // function will then make sure we
+        // don't access the invalid data
+        if (accessor.has_children()
+            &&
+            (accessor.get_fe().dofs_per_cell !=
+             accessor.get_fe().dofs_per_vertex * GeometryInfo<2>::vertices_per_cell))
+          return;
+
+        const unsigned int dofs_per_vertex = accessor.get_fe().dofs_per_vertex,
+                           dofs_per_line   = accessor.get_fe().dofs_per_line,
+                           dofs_per_quad   = accessor.get_fe().dofs_per_quad,
+                           dofs_per_cell   = accessor.get_fe().dofs_per_cell;
+
+        // make sure the cache is at least
+        // as big as we need it when
+        // writing to the last element of
+        // this cell
+        Assert (accessor.present_index * dofs_per_cell + dofs_per_cell
+                <=
+                accessor.dof_handler->levels[accessor.present_level]
+                ->cell_dof_indices_cache.size(),
+                ExcInternalError());
+
+        std::vector<types::global_dof_index>::iterator next
+          = (accessor.dof_handler->levels[accessor.present_level]
+             ->cell_dof_indices_cache.begin() + accessor.present_index * dofs_per_cell);
+
+        for (unsigned int vertex=0; vertex<4; ++vertex)
+          for (unsigned int d=0; d<dofs_per_vertex; ++d)
+            *next++ = accessor.vertex_dof_index(vertex,d);
+        for (unsigned int line=0; line<4; ++line)
+          for (unsigned int d=0; d<dofs_per_line; ++d)
+            *next++ = accessor.line(line)->dof_index(d);
+        for (unsigned int d=0; d<dofs_per_quad; ++d)
+          *next++ = accessor.dof_index(d);
+      }
+
+
+      template <int spacedim, bool level_dof_access>
+      static
+      void
+      update_cell_dof_indices_cache (const DoFCellAccessor<DoFHandler<3,spacedim>, level_dof_access> &accessor)
+      {
+        // check as in documentation that
+        // cell is either active, or dofs
+        // are only in vertices. otherwise
+        // simply don't update the cache at
+        // all. the get_dof_indices
+        // function will then make sure we
+        // don't access the invalid data
+        if (accessor.has_children()
+            &&
+            (accessor.get_fe().dofs_per_cell !=
+             accessor.get_fe().dofs_per_vertex * GeometryInfo<3>::vertices_per_cell))
+          return;
+
+        const unsigned int dofs_per_vertex = accessor.get_fe().dofs_per_vertex,
+                           dofs_per_line   = accessor.get_fe().dofs_per_line,
+                           dofs_per_quad   = accessor.get_fe().dofs_per_quad,
+                           dofs_per_hex    = accessor.get_fe().dofs_per_hex,
+                           dofs_per_cell   = accessor.get_fe().dofs_per_cell;
+
+        // make sure the cache is at least
+        // as big as we need it when
+        // writing to the last element of
+        // this cell
+        Assert (accessor.present_index * dofs_per_cell + dofs_per_cell
+                <=
+                accessor.dof_handler->levels[accessor.present_level]
+                ->cell_dof_indices_cache.size(),
+                ExcInternalError());
+
+        std::vector<types::global_dof_index>::iterator next
+          = (accessor.dof_handler->levels[accessor.present_level]
+             ->cell_dof_indices_cache.begin() + accessor.present_index * dofs_per_cell);
+
+        for (unsigned int vertex=0; vertex<8; ++vertex)
+          for (unsigned int d=0; d<dofs_per_vertex; ++d)
+            *next++ = accessor.vertex_dof_index(vertex,d);
+        // now copy dof numbers from the line. for
+        // lines with the wrong orientation, we have
+        // already made sure that we're ok by picking
+        // the correct vertices (this happens
+        // automatically in the vertex()
+        // function). however, if the line is in
+        // wrong orientation, we look at it in
+        // flipped orientation and we will have to
+        // adjust the shape function indices that we
+        // see to correspond to the correct
+        // (cell-local) ordering.
+        for (unsigned int line=0; line<12; ++line)
+          for (unsigned int d=0; d<dofs_per_line; ++d)
+            *next++ = accessor.line(line)->dof_index(accessor.dof_handler->get_fe().
+                                                     adjust_line_dof_index_for_line_orientation(d,
+                                                         accessor.line_orientation(line)));
+        // now copy dof numbers from the face. for
+        // faces with the wrong orientation, we
+        // have already made sure that we're ok by
+        // picking the correct lines and vertices
+        // (this happens automatically in the
+        // line() and vertex() functions). however,
+        // if the face is in wrong orientation, we
+        // look at it in flipped orientation and we
+        // will have to adjust the shape function
+        // indices that we see to correspond to the
+        // correct (cell-local) ordering. The same
+        // applies, if the face_rotation or
+        // face_orientation is non-standard
+        for (unsigned int quad=0; quad<6; ++quad)
+          for (unsigned int d=0; d<dofs_per_quad; ++d)
+            *next++ = accessor.quad(quad)->dof_index(accessor.dof_handler->get_fe().
+                                                     adjust_quad_dof_index_for_face_orientation(d,
+                                                         accessor.face_orientation(quad),
+                                                         accessor.face_flip(quad),
+                                                         accessor.face_rotation(quad)));
+        for (unsigned int d=0; d<dofs_per_hex; ++d)
+          *next++ = accessor.dof_index(d);
+      }
+
+
+      // implementation for the case of
+      // hp::DoFHandler objects. it's
+      // not implemented there, for no
+      // space dimension
+      template <int dim, int spacedim, bool level_dof_access>
+      static
+      void
+      update_cell_dof_indices_cache (const DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &accessor)
+      {
+        // caches are only for cells with DoFs, i.e., for active ones
+        if (accessor.has_children())
+          return;
+
+        const unsigned int dofs_per_cell   = accessor.get_fe().dofs_per_cell;
+
+        // make sure the cache is at least
+        // as big as we need it when
+        // writing to the last element of
+        // this cell
+        Assert (static_cast<unsigned int>(accessor.present_index)
+                <
+                accessor.dof_handler->levels[accessor.present_level]
+                ->cell_cache_offsets.size(),
+                ExcInternalError());
+        Assert (accessor.dof_handler->levels[accessor.present_level]
+                ->cell_cache_offsets[accessor.present_index]
+                <=
+                accessor.dof_handler->levels[accessor.present_level]
+                ->cell_dof_indices_cache.size(),
+                ExcInternalError());
+
+        std::vector<types::global_dof_index> dof_indices (dofs_per_cell);
+        static_cast<const dealii::DoFAccessor<dim,dealii::hp::DoFHandler<dim,spacedim>,level_dof_access> &>
+        (accessor).get_dof_indices (dof_indices, accessor.active_fe_index());
+
+        types::global_dof_index *next_dof_index
+          = &accessor.dof_handler->levels[accessor.present_level]
+            ->cell_dof_indices_cache[accessor.dof_handler->levels[accessor.present_level]
+                                     ->cell_cache_offsets[accessor.present_index]];
+        for (unsigned int i=0; i<dofs_per_cell; ++i, ++next_dof_index)
+          *next_dof_index = dof_indices[i];
+      }
+
+
+
+      /**
+       * Implement setting dof indices on a cell. Currently not implemented
+       * for hp::DoFHandler objects.
+       */
+      template <int spacedim, bool level_dof_access>
+      static
+      void
+      set_dof_indices (DoFCellAccessor<DoFHandler<1,spacedim>, level_dof_access> &accessor,
+                       const std::vector<types::global_dof_index>          &local_dof_indices)
+      {
+        Assert (accessor.has_children() == false,
+                ExcInternalError());
+
+        const unsigned int dofs_per_vertex = accessor.get_fe().dofs_per_vertex,
+                           dofs_per_line   = accessor.get_fe().dofs_per_line,
+                           dofs_per_cell   = accessor.get_fe().dofs_per_cell;
+        (void)dofs_per_cell;
+
+        Assert (local_dof_indices.size() == dofs_per_cell,
+                ExcInternalError());
+
+        unsigned int index = 0;
+
+        for (unsigned int vertex=0; vertex<2; ++vertex)
+          for (unsigned int d=0; d<dofs_per_vertex; ++d, ++index)
+            accessor.set_vertex_dof_index(vertex,d,
+                                          local_dof_indices[index]);
+
+        for (unsigned int d=0; d<dofs_per_line; ++d, ++index)
+          accessor.set_dof_index(d, local_dof_indices[index]);
+
+        Assert (index == dofs_per_cell,
+                ExcInternalError());
+      }
+
+
+
+      template <int spacedim, bool level_dof_access>
+      static
+      void
+      set_dof_indices (DoFCellAccessor<DoFHandler<2,spacedim>, level_dof_access> &accessor,
+                       const std::vector<types::global_dof_index>          &local_dof_indices)
+      {
+        Assert (accessor.has_children() == false,
+                ExcInternalError());
+
+        const unsigned int dofs_per_vertex = accessor.get_fe().dofs_per_vertex,
+                           dofs_per_line   = accessor.get_fe().dofs_per_line,
+                           dofs_per_quad   = accessor.get_fe().dofs_per_quad,
+                           dofs_per_cell   = accessor.get_fe().dofs_per_cell;
+        (void)dofs_per_cell;
+
+        Assert (local_dof_indices.size() == dofs_per_cell,
+                ExcInternalError());
+
+        unsigned int index = 0;
+
+        for (unsigned int vertex=0; vertex<4; ++vertex)
+          for (unsigned int d=0; d<dofs_per_vertex; ++d, ++index)
+            accessor.set_vertex_dof_index(vertex,d,
+                                          local_dof_indices[index]);
+        for (unsigned int line=0; line<4; ++line)
+          for (unsigned int d=0; d<dofs_per_line; ++d, ++index)
+            accessor.line(line)->set_dof_index(d, local_dof_indices[index]);
+
+        for (unsigned int d=0; d<dofs_per_quad; ++d, ++index)
+          accessor.set_dof_index(d, local_dof_indices[index]);
+
+        Assert (index == dofs_per_cell,
+                ExcInternalError());
+      }
+
+
+
+      template <int spacedim, bool level_dof_access>
+      static
+      void
+      set_dof_indices (DoFCellAccessor<DoFHandler<3,spacedim>, level_dof_access> &accessor,
+                       const std::vector<types::global_dof_index>          &local_dof_indices)
+      {
+        Assert (accessor.has_children() == false,
+                ExcInternalError());
+
+        const unsigned int dofs_per_vertex = accessor.get_fe().dofs_per_vertex,
+                           dofs_per_line   = accessor.get_fe().dofs_per_line,
+                           dofs_per_quad   = accessor.get_fe().dofs_per_quad,
+                           dofs_per_hex    = accessor.get_fe().dofs_per_hex,
+                           dofs_per_cell   = accessor.get_fe().dofs_per_cell;
+        (void)dofs_per_cell;
+
+        Assert (local_dof_indices.size() == dofs_per_cell,
+                ExcInternalError());
+
+        unsigned int index = 0;
+
+        for (unsigned int vertex=0; vertex<8; ++vertex)
+          for (unsigned int d=0; d<dofs_per_vertex; ++d, ++index)
+            accessor.set_vertex_dof_index(vertex,d,
+                                          local_dof_indices[index]);
+        // now copy dof numbers into the line. for
+        // lines with the wrong orientation, we have
+        // already made sure that we're ok by picking
+        // the correct vertices (this happens
+        // automatically in the vertex()
+        // function). however, if the line is in
+        // wrong orientation, we look at it in
+        // flipped orientation and we will have to
+        // adjust the shape function indices that we
+        // see to correspond to the correct
+        // (cell-local) ordering.
+        for (unsigned int line=0; line<12; ++line)
+          for (unsigned int d=0; d<dofs_per_line; ++d, ++index)
+            accessor.line(line)->set_dof_index(accessor.dof_handler->get_fe().
+                                               adjust_line_dof_index_for_line_orientation(d,
+                                                   accessor.line_orientation(line)),
+                                               local_dof_indices[index]);
+        // now copy dof numbers into the face. for
+        // faces with the wrong orientation, we
+        // have already made sure that we're ok by
+        // picking the correct lines and vertices
+        // (this happens automatically in the
+        // line() and vertex() functions). however,
+        // if the face is in wrong orientation, we
+        // look at it in flipped orientation and we
+        // will have to adjust the shape function
+        // indices that we see to correspond to the
+        // correct (cell-local) ordering. The same
+        // applies, if the face_rotation or
+        // face_orientation is non-standard
+        for (unsigned int quad=0; quad<6; ++quad)
+          for (unsigned int d=0; d<dofs_per_quad; ++d, ++index)
+            accessor.quad(quad)->set_dof_index(accessor.dof_handler->get_fe().
+                                               adjust_quad_dof_index_for_face_orientation(d,
+                                                   accessor.face_orientation(quad),
+                                                   accessor.face_flip(quad),
+                                                   accessor.face_rotation(quad)),
+                                               local_dof_indices[index]);
+        for (unsigned int d=0; d<dofs_per_hex; ++d, ++index)
+          accessor.set_dof_index(d, local_dof_indices[index]);
+
+        Assert (index == dofs_per_cell,
+                ExcInternalError());
+      }
+
+
+      // implementation for the case of
+      // hp::DoFHandler objects. it's
+      // not implemented there, for no
+      // space dimension
+      template <int dim, int spacedim, bool level_dof_access>
+      static
+      void
+      set_dof_indices (const DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &,
+                       const std::vector<types::global_dof_index> &)
+      {
+        Assert (false, ExcNotImplemented());
+      }
+
+
+
+      /**
+       * Do what the active_fe_index function in the parent class is supposed
+       * to do.
+       */
+      template <int dim, int spacedim, bool level_dof_access>
+      static
+      unsigned int
+      active_fe_index (const DoFCellAccessor<DoFHandler<dim,spacedim>, level_dof_access> &)
+      {
+        // ::DoFHandler only supports a
+        // single active fe with index
+        // zero
+        return 0;
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access>
+      static
+      unsigned int
+      active_fe_index (const DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &accessor)
+      {
+        Assert (static_cast<unsigned int>(accessor.level()) < accessor.dof_handler->levels.size(),
+                ExcMessage ("DoFHandler not initialized"));
+
+        return accessor.dof_handler->levels[accessor.level()]
+               ->active_fe_index(accessor.present_index);
+      }
+
+
+
+      /**
+       * Do what the set_active_fe_index function in the parent class is
+       * supposed to do.
+       */
+      template <int dim, int spacedim, bool level_dof_access>
+      static
+      void
+      set_active_fe_index (const DoFCellAccessor<DoFHandler<dim,spacedim>, level_dof_access> &,
+                           const unsigned int                                i)
+      {
+        (void)i;
+        // ::DoFHandler only supports a
+        // single active fe with index
+        // zero
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (i == 0, typename BaseClass::ExcInvalidObject());
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access>
+      static
+      void
+      set_active_fe_index (DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                           const unsigned int                                      i)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (static_cast<unsigned int>(accessor.level()) <
+                accessor.dof_handler->levels.size(),
+                ExcMessage ("DoFHandler not initialized"));
+
+        accessor.dof_handler->levels[accessor.level()]
+        ->set_active_fe_index (accessor.present_index, i);
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename ForwardIterator, class OutputVector>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  ForwardIterator local_source_begin,
+                                  ForwardIterator local_source_end,
+                                  OutputVector   &global_destination)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (static_cast<unsigned int>(local_source_end-local_source_begin)
+                ==
+                accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.size(),
+                typename BaseClass::ExcVectorDoesNotMatch());
+
+        Assert (!accessor.has_children(),
+                ExcMessage ("Cell must be active"));
+
+        const unsigned int n_dofs = local_source_end - local_source_begin;
+
+        types::global_dof_index *dofs = &accessor.dof_handler->levels[accessor.level()]
+                                        ->cell_dof_indices_cache[accessor.present_index * n_dofs];
+
+        // distribute cell vector
+        global_destination.add(n_dofs, dofs, local_source_begin);
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename ForwardIterator, class OutputVector>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  ForwardIterator local_source_begin,
+                                  ForwardIterator local_source_end,
+                                  OutputVector   &global_destination)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (local_source_end-local_source_begin == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.size(),
+                typename BaseClass::ExcVectorDoesNotMatch());
+
+        const unsigned int n_dofs = local_source_end - local_source_begin;
+
+//TODO[WB/MK]: This function could me made more efficient because it allocates memory, which could be avoided by passing in another argument as a scratch array. This should be fixed eventually. another option would be to let the surrounding class have a (static, mutable) scratch array that is thread-local
+
+        // get indices of dofs
+        std::vector<types::global_dof_index> dofs (n_dofs);
+        accessor.get_dof_indices (dofs);
+
+        // distribute cell vector
+        global_destination.add (n_dofs, dofs.begin(), local_source_begin);
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename ForwardIterator, class OutputVector>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  const ConstraintMatrix &constraints,
+                                  ForwardIterator         local_source_begin,
+                                  ForwardIterator         local_source_end,
+                                  OutputVector           &global_destination)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (local_source_end-local_source_begin == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.size(),
+                typename BaseClass::ExcVectorDoesNotMatch());
+
+        Assert (!accessor.has_children(),
+                ExcMessage ("Cell must be active."));
+
+        const unsigned int n_dofs = local_source_end - local_source_begin;
+
+        types::global_dof_index *dofs = &accessor.dof_handler->levels[accessor.level()]
+                                        ->cell_dof_indices_cache[accessor.present_index * n_dofs];
+
+        // distribute cell vector
+        constraints.distribute_local_to_global (local_source_begin, local_source_end,
+                                                dofs, global_destination);
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename ForwardIterator, class OutputVector>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  const ConstraintMatrix &constraints,
+                                  ForwardIterator         local_source_begin,
+                                  ForwardIterator         local_source_end,
+                                  OutputVector           &global_destination)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (local_source_end-local_source_begin == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.size(),
+                typename BaseClass::ExcVectorDoesNotMatch());
+
+        const unsigned int n_dofs = local_source_end - local_source_begin;
+
+//TODO[WB/MK]: This function could me made more efficient because it allocates memory, which could be avoided by passing in another argument as a scratch array. This should be fixed eventually
+
+        // get indices of dofs
+        std::vector<types::global_dof_index> dofs (n_dofs);
+        accessor.get_dof_indices (dofs);
+
+        // distribute cell vector
+        constraints.distribute_local_to_global (local_source_begin, local_source_end,
+                                                dofs.begin(), global_destination);
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename number, class OutputMatrix>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  const dealii::FullMatrix<number> &local_source,
+                                  OutputMatrix                     &global_destination)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (local_source.m() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (local_source.n() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.m(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.n(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+
+        Assert (!accessor.has_children(),
+                ExcMessage ("Cell must be active."));
+
+        const unsigned int n_dofs = local_source.m();
+
+        types::global_dof_index *dofs = &accessor.dof_handler->levels[accessor.level()]
+                                        ->cell_dof_indices_cache[accessor.present_index * n_dofs];
+
+        // distribute cell matrix
+        for (unsigned int i=0; i<n_dofs; ++i)
+          global_destination.add(dofs[i], n_dofs, dofs,
+                                 &local_source(i,0));
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename number, class OutputMatrix>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  const dealii::FullMatrix<number> &local_source,
+                                  OutputMatrix                     &global_destination)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (local_source.m() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (local_source.n() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.m(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_destination.n(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+
+        const unsigned int n_dofs = local_source.size();
+
+//TODO[WB/MK]: This function could me made more efficient because it allocates memory, which could be avoided by passing in another argument as a scratch array.
+
+        // get indices of dofs
+        std::vector<types::global_dof_index> dofs (n_dofs);
+        accessor.get_dof_indices (dofs);
+
+        // distribute cell matrix
+        global_destination.add(dofs,local_source);
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename number,
+                class OutputMatrix, typename OutputVector>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  const dealii::FullMatrix<number> &local_matrix,
+                                  const dealii::Vector<number>     &local_vector,
+                                  OutputMatrix                     &global_matrix,
+                                  OutputVector                     &global_vector)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (local_matrix.m() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (local_matrix.n() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_matrix.m(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_matrix.n(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (local_vector.size() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_vector.size(),
+                typename BaseClass::ExcVectorDoesNotMatch());
+
+        Assert (!accessor.has_children(),
+                ExcMessage ("Cell must be active."));
+
+        const unsigned int n_dofs = accessor.get_fe().dofs_per_cell;
+        types::global_dof_index *dofs = &accessor.dof_handler->levels[accessor.level()]
+                                        ->cell_dof_indices_cache[accessor.present_index *n_dofs];
+
+        // distribute cell matrices
+        for (unsigned int i=0; i<n_dofs; ++i)
+          {
+            global_matrix.add(dofs[i], n_dofs, dofs, &local_matrix(i,0));
+            global_vector(dofs[i]) += local_vector(i);
+          }
+      }
+
+
+
+      template <int dim, int spacedim, bool level_dof_access, typename number,
+                class OutputMatrix, typename OutputVector>
+      static
+      void
+      distribute_local_to_global (const DoFCellAccessor<dealii::hp::DoFHandler<dim,spacedim>, level_dof_access> &accessor,
+                                  const dealii::FullMatrix<number> &local_matrix,
+                                  const dealii::Vector<number>     &local_vector,
+                                  OutputMatrix                     &global_matrix,
+                                  OutputVector                     &global_vector)
+      {
+        typedef dealii::DoFAccessor<dim,DoFHandler<dim,spacedim>, level_dof_access> BaseClass;
+        Assert (accessor.dof_handler != 0,
+                typename BaseClass::ExcInvalidObject());
+        Assert (local_matrix.m() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (local_matrix.n() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_matrix.m(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_matrix.n(),
+                typename BaseClass::ExcMatrixDoesNotMatch());
+        Assert (local_vector.size() == accessor.get_fe().dofs_per_cell,
+                typename BaseClass::ExcVectorDoesNotMatch());
+        Assert (accessor.dof_handler->n_dofs() == global_vector.size(),
+                typename BaseClass::ExcVectorDoesNotMatch());
+
+        const unsigned int n_dofs = local_matrix.size();
+
+//TODO[WB/MK]: This function could me made more efficient because it
+//allocates memory, which could be avoided by passing in another
+//argument as a scratch array. Comment(GK) Do not bother and leave this
+//to ConstraintMatrix or MeshWorker::Assembler
+
+        // get indices of dofs
+        std::vector<types::global_dof_index> dofs (n_dofs);
+        accessor.get_dof_indices (dofs);
+
+        // distribute cell matrix and vector
+        global_matrix.add(dofs,local_matrix);
+        global_vector.add(dofs,local_vector);
+      }
+    };
+  }
+}
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+DoFCellAccessor<DoFHandlerType,level_dof_access>::DoFCellAccessor
+(const Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *tria,
+ const int           level,
+ const int           index,
+ const AccessorData *local_data)
+  :
+  DoFAccessor<DoFHandlerType::dimension,DoFHandlerType,level_dof_access> (tria,level,index, local_data)
+{}
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <int structdim2, int dim2, int spacedim2>
+inline
+DoFCellAccessor<DoFHandlerType,level_dof_access>::DoFCellAccessor
+(const InvalidAccessor<structdim2,dim2,spacedim2> &)
+{
+  Assert (false, typename BaseClass::ExcInvalidObject());
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <int dim2, class DoFHandlerType2, bool level_dof_access2>
+inline
+DoFCellAccessor<DoFHandlerType,level_dof_access>::DoFCellAccessor
+(const DoFAccessor<dim2,DoFHandlerType2,level_dof_access2> &other)
+  :
+  BaseClass(other)
+{}
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> >
+DoFCellAccessor<DoFHandlerType,level_dof_access>::neighbor (const unsigned int i) const
+{
+  TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> >
+  q (this->tria,
+     this->neighbor_level (i),
+     this->neighbor_index (i),
+     this->dof_handler);
+
+#ifdef DEBUG
+  if (q.state() != IteratorState::past_the_end)
+    Assert (q->used(), TriaAccessorExceptions::ExcUnusedCellAsNeighbor());
+#endif
+  return q;
+}
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> >
+DoFCellAccessor<DoFHandlerType,level_dof_access>::child (const unsigned int i) const
+{
+  TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> >
+  q (this->tria,
+     this->level()+1,
+     this->child_index (i),
+     this->dof_handler);
+
+#ifdef DEBUG
+  if (q.state() != IteratorState::past_the_end)
+    Assert (q->used(), TriaAccessorExceptions::ExcUnusedCellAsChild());
+#endif
+  return q;
+}
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> >
+DoFCellAccessor<DoFHandlerType,level_dof_access>::parent () const
+{
+  TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> >
+  q (this->tria,
+     this->level() - 1,
+     this->parent_index (),
+     this->dof_handler);
+
+  return q;
+}
+
+
+namespace internal
+{
+  namespace DoFCellAccessor
+  {
+    template <typename DoFHandlerType, bool level_dof_access>
+    inline
+    TriaIterator<dealii::DoFAccessor<DoFHandlerType::dimension-1,DoFHandlerType,level_dof_access> >
+    get_face (const dealii::DoFCellAccessor<DoFHandlerType,level_dof_access> &cell,
+              const unsigned int i,
+              const dealii::internal::int2type<1>)
+    {
+      dealii::DoFAccessor<0, DoFHandlerType,level_dof_access>
+      a (&cell.get_triangulation(),
+         ((i == 0) && cell.at_boundary(0)
+          ?
+          dealii::TriaAccessor<0, 1, DoFHandlerType::space_dimension>::left_vertex
+          :
+          ((i == 1) && cell.at_boundary(1)
+           ?
+           dealii::TriaAccessor<0, 1, DoFHandlerType::space_dimension>::right_vertex
+           :
+           dealii::TriaAccessor<0, 1, DoFHandlerType::space_dimension>::interior_vertex)),
+         cell.vertex_index(i),
+         &cell.get_dof_handler());
+      return dealii::TriaIterator<dealii::DoFAccessor<0,DoFHandlerType,level_dof_access> > (a);
+    }
+
+
+    template <typename DoFHandlerType, bool level_dof_access>
+    inline
+    TriaIterator<dealii::DoFAccessor<DoFHandlerType::dimension-1,DoFHandlerType,level_dof_access> >
+    get_face (const dealii::DoFCellAccessor<DoFHandlerType,level_dof_access> &cell,
+              const unsigned int i,
+              const dealii::internal::int2type<2>)
+    {
+      return cell.line(i);
+    }
+
+
+    template <typename DoFHandlerType, bool level_dof_access>
+    inline
+    TriaIterator<dealii::DoFAccessor<DoFHandlerType::dimension-1,DoFHandlerType,level_dof_access> >
+    get_face (const dealii::DoFCellAccessor<DoFHandlerType,level_dof_access> &cell,
+              const unsigned int i,
+              const dealii::internal::int2type<3>)
+    {
+      return cell.quad(i);
+    }
+  }
+}
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+typename DoFCellAccessor<DoFHandlerType,level_dof_access>::face_iterator
+DoFCellAccessor<DoFHandlerType,level_dof_access>::face (const unsigned int i) const
+{
+  Assert (i<GeometryInfo<dim>::faces_per_cell, ExcIndexRange (i, 0, GeometryInfo<dim>::faces_per_cell));
+
+  const unsigned int dim = DoFHandlerType::dimension;
+  return dealii::internal::DoFCellAccessor::get_face (*this, i, dealii::internal::int2type<dim>());
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::get_dof_indices
+(std::vector<types::global_dof_index> &dof_indices) const
+{
+  Assert (this->active(), ExcMessage ("get_dof_indices() only works on active cells."));
+  Assert (this->is_artificial() == false,
+          ExcMessage ("Can't ask for DoF indices on artificial cells."));
+  AssertDimension (dof_indices.size(), this->get_fe().dofs_per_cell);
+
+  const types::global_dof_index *cache
+    = this->dof_handler->levels[this->present_level]
+      ->get_cell_cache_start (this->present_index, this->get_fe().dofs_per_cell);
+  for (unsigned int i=0; i<this->get_fe().dofs_per_cell; ++i, ++cache)
+    dof_indices[i] = *cache;
+}
+
+
+
+template<typename DoFHandlerType, bool level_dof_access>
+inline
+void DoFCellAccessor<DoFHandlerType,level_dof_access>::get_mg_dof_indices
+(std::vector<types::global_dof_index> &dof_indices) const
+{
+  DoFAccessor<dim, DoFHandlerType,level_dof_access>::get_mg_dof_indices (this->level (), dof_indices);
+}
+
+
+
+template<typename DoFHandlerType, bool level_dof_access>
+inline
+void DoFCellAccessor<DoFHandlerType,level_dof_access>::set_mg_dof_indices
+(const std::vector<types::global_dof_index> &dof_indices)
+{
+  DoFAccessor<dim, DoFHandlerType,level_dof_access>::set_mg_dof_indices (this->level (), dof_indices);
+}
+
+
+
+template<typename DoFHandlerType, bool level_dof_access>
+inline
+void DoFCellAccessor<DoFHandlerType,level_dof_access>::get_active_or_mg_dof_indices
+(std::vector<types::global_dof_index> &dof_indices) const
+{
+  if (level_dof_access)
+    get_mg_dof_indices (dof_indices);
+  else
+    get_dof_indices (dof_indices);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <class InputVector, typename number>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::get_dof_values
+(const InputVector &values,
+ Vector<number>    &local_values) const
+{
+  get_dof_values (values, local_values.begin(), local_values.end());
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <class InputVector, typename ForwardIterator>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::get_dof_values
+(const InputVector &values,
+ ForwardIterator    local_values_begin,
+ ForwardIterator    local_values_end) const
+{
+  (void)local_values_end;
+  Assert (this->is_artificial() == false,
+          ExcMessage ("Can't ask for DoF indices on artificial cells."));
+  Assert (!this->has_children(),
+          ExcMessage ("Cell must be active."));
+
+  Assert (static_cast<unsigned int>(local_values_end-local_values_begin)
+          == this->get_fe().dofs_per_cell,
+          typename DoFCellAccessor::ExcVectorDoesNotMatch());
+  Assert (values.size() == this->get_dof_handler().n_dofs(),
+          typename DoFCellAccessor::ExcVectorDoesNotMatch());
+
+  const types::global_dof_index *cache
+    = this->dof_handler->levels[this->present_level]
+      ->get_cell_cache_start (this->present_index, this->get_fe().dofs_per_cell);
+
+  values.extract_subvector_to (cache,
+                               cache + this->get_fe().dofs_per_cell,
+                               local_values_begin);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <class InputVector, typename ForwardIterator>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::get_dof_values
+(const ConstraintMatrix &constraints,
+ const InputVector      &values,
+ ForwardIterator         local_values_begin,
+ ForwardIterator         local_values_end) const
+{
+  Assert (this->is_artificial() == false,
+          ExcMessage ("Can't ask for DoF indices on artificial cells."));
+  Assert (!this->has_children(),
+          ExcMessage ("Cell must be active."));
+
+  Assert (static_cast<unsigned int>(local_values_end-local_values_begin)
+          == this->get_fe().dofs_per_cell,
+          typename DoFCellAccessor::ExcVectorDoesNotMatch());
+  Assert (values.size() == this->get_dof_handler().n_dofs(),
+          typename DoFCellAccessor::ExcVectorDoesNotMatch());
+
+
+  const types::global_dof_index *cache
+    = this->dof_handler->levels[this->present_level]
+      ->get_cell_cache_start (this->present_index, this->get_fe().dofs_per_cell);
+
+  constraints.get_dof_values(values, *cache, local_values_begin,
+                             local_values_end);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <class OutputVector, typename number>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::set_dof_values
+(const Vector<number> &local_values,
+ OutputVector         &values) const
+{
+  Assert (this->is_artificial() == false,
+          ExcMessage ("Can't ask for DoF indices on artificial cells."));
+  Assert (!this->has_children(),
+          ExcMessage ("Cell must be active."));
+
+  Assert (static_cast<unsigned int>(local_values.size())
+          == this->get_fe().dofs_per_cell,
+          typename DoFCellAccessor::ExcVectorDoesNotMatch());
+  Assert (values.size() == this->get_dof_handler().n_dofs(),
+          typename DoFCellAccessor::ExcVectorDoesNotMatch());
+
+
+  const types::global_dof_index *cache
+    = this->dof_handler->levels[this->present_level]
+      ->get_cell_cache_start (this->present_index, this->get_fe().dofs_per_cell);
+
+  for (unsigned int i=0; i<this->get_fe().dofs_per_cell; ++i, ++cache)
+    values(*cache) = local_values(i);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &
+DoFCellAccessor<DoFHandlerType,level_dof_access>::get_fe () const
+{
+  Assert ((dynamic_cast<const dealii::DoFHandler<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+           (this->dof_handler) != 0)
+          ||
+          (this->has_children() == false),
+          ExcMessage ("In hp::DoFHandler objects, finite elements are only associated "
+                      "with active cells. Consequently, you can not ask for the "
+                      "active finite element on cells with children."));
+  return dealii::internal::DoFAccessor::get_fe (this->dof_handler->get_fe(), active_fe_index());
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+unsigned int
+DoFCellAccessor<DoFHandlerType,level_dof_access>::active_fe_index () const
+{
+  Assert ((dynamic_cast<const dealii::DoFHandler<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+           (this->dof_handler) != 0)
+          ||
+          (this->has_children() == false),
+          ExcMessage ("You can not ask for the active_fe_index on a cell that has "
+                      "children because no degrees of freedom are assigned "
+                      "to this cell and, consequently, no finite element "
+                      "is associated with it."));
+  return dealii::internal::DoFCellAccessor::Implementation::active_fe_index (*this);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::set_active_fe_index (const unsigned int i)
+{
+  Assert ((dynamic_cast<const dealii::DoFHandler<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+           (this->dof_handler) != 0)
+          ||
+          (this->has_children() == false),
+          ExcMessage ("You can not set the active_fe_index on a cell that has "
+                      "children because no degrees of freedom will be assigned "
+                      "to this cell."));
+  dealii::internal::DoFCellAccessor::Implementation::set_active_fe_index (*this, i);
+}
+
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <typename number, typename OutputVector>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::distribute_local_to_global
+(const Vector<number> &local_source,
+ OutputVector         &global_destination) const
+{
+  dealii::internal::DoFCellAccessor::Implementation::
+  distribute_local_to_global (*this, local_source.begin(),
+                              local_source.end(), global_destination);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <typename ForwardIterator, typename OutputVector>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::distribute_local_to_global
+(ForwardIterator  local_source_begin,
+ ForwardIterator  local_source_end,
+ OutputVector    &global_destination) const
+{
+  dealii::internal::DoFCellAccessor::Implementation::
+  distribute_local_to_global (*this, local_source_begin,
+                              local_source_end, global_destination);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <typename ForwardIterator, typename OutputVector>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::distribute_local_to_global
+(const ConstraintMatrix &constraints,
+ ForwardIterator         local_source_begin,
+ ForwardIterator         local_source_end,
+ OutputVector           &global_destination) const
+{
+  dealii::internal::DoFCellAccessor::Implementation::
+  distribute_local_to_global (*this, constraints, local_source_begin,
+                              local_source_end, global_destination);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <typename number, typename OutputMatrix>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::distribute_local_to_global
+(const FullMatrix<number> &local_source,
+ OutputMatrix             &global_destination) const
+{
+  dealii::internal::DoFCellAccessor::Implementation::
+  distribute_local_to_global (*this,local_source,global_destination);
+}
+
+
+
+template <typename DoFHandlerType, bool level_dof_access>
+template <typename number, typename OutputMatrix, typename OutputVector>
+inline
+void
+DoFCellAccessor<DoFHandlerType,level_dof_access>::distribute_local_to_global
+(const FullMatrix<number> &local_matrix,
+ const Vector<number>     &local_vector,
+ OutputMatrix             &global_matrix,
+ OutputVector             &global_vector) const
+{
+  dealii::internal::DoFCellAccessor::Implementation::
+  distribute_local_to_global (*this,local_matrix,local_vector,
+                              global_matrix,global_vector);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/dof_faces.h b/include/deal.II/dofs/dof_faces.h
new file mode 100644
index 0000000..f4ea6b4
--- /dev/null
+++ b/include/deal.II/dofs/dof_faces.h
@@ -0,0 +1,196 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_faces_h
+#define dealii__dof_faces_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/dofs/dof_objects.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  /**
+   * A namespace for internal data structures of the DoFHandler group of
+   * classes.
+   *
+   * @ingroup dofs
+   */
+  namespace DoFHandler
+  {
+
+    /**
+     *
+     * <h4>DoFFaces</h4>
+     *
+     * These classes are similar to the DoFLevel classes. We here store
+     * information that is associated with faces, rather than cells, as this
+     * information is independent of the hierarchical structure of cells,
+     * which are organized in levels. In 2D we store information on degrees of
+     * freedom located on lines whereas in 3D we store information on degrees
+     * of freedom located on quads and lines. In 1D we do nothing, as the
+     * faces of lines are vertices which are treated separately.
+     *
+     * Apart from the DoFObjects object containing the data to store (degree
+     * of freedom indices) we do not store any data or provide any
+     * functionality. However, we do implement a function to determine an
+     * estimate of the memory consumption of the contained DoFObjects
+     * object(s).
+     *
+     * The data contained isn't usually directly accessed. Rather, except for
+     * some access from the DoFHandler class, access is usually through the
+     * DoFAccessor::set_dof_index() and DoFAccessor::dof_index() functions or
+     * similar functions of derived classes that in turn access the member
+     * variables using the DoFHandler::get_dof_index() and corresponding
+     * setter functions. Knowledge of the actual data format is therefore
+     * encapsulated to the present hierarchy of classes as well as the
+     * dealii::DoFHandler class.
+     *
+     * @author Tobias Leicht, 2006
+     */
+    template<int dim>
+    class DoFFaces
+    {
+      /**
+       * Make the constructor private to prevent the use of this template,
+       * only the specializations should be used
+       */
+    private:
+      DoFFaces();
+    };
+
+    /**
+     * Store the indices of degrees of freedom on faces in 1D. As these would
+     * be vertices, which are treated separately, don't do anything.
+     *
+     * @author Tobias Leicht, 2006
+     */
+    template<>
+    class DoFFaces<1>
+    {
+    public:
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+    /**
+     * Store the indices of degrees of freedom on faces in 2D, which are
+     * lines.
+     *
+     * @author Tobias Leicht, 2006
+     */
+    template<>
+    class DoFFaces<2>
+    {
+    public:
+      /**
+       * The object containing the data of DoFs on lines.
+       */
+      internal::DoFHandler::DoFObjects<1> lines;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+    /**
+     * Store the indices of degrees of freedom on faces in 3D, which are
+     * quads, additionally also on lines.
+     *
+     * @author Tobias Leicht, 2006
+     */
+    template<>
+    class DoFFaces<3>
+    {
+    public:
+      /**
+       * The object containing the data of DoFs on lines.
+       */
+      internal::DoFHandler::DoFObjects<1> lines;
+
+      /**
+       * The object containing the data of DoFs on quads.
+       */
+      internal::DoFHandler::DoFObjects<2> quads;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+
+
+    template <class Archive>
+    void DoFFaces<1>::serialize (Archive &,
+                                 const unsigned int)
+    {}
+
+
+    template <class Archive>
+    void DoFFaces<2>::serialize (Archive &ar,
+                                 const unsigned int)
+    {
+      ar &lines;
+    }
+
+
+    template <class Archive>
+    void DoFFaces<3>::serialize (Archive &ar,
+                                 const unsigned int)
+    {
+      ar &lines &quads;
+    }
+
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/dof_handler.h b/include/deal.II/dofs/dof_handler.h
new file mode 100644
index 0000000..7273724
--- /dev/null
+++ b/include/deal.II/dofs/dof_handler.h
@@ -0,0 +1,1344 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_handler_h
+#define dealii__dof_handler_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/iterator_range.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/dofs/block_info.h>
+#include <deal.II/dofs/dof_iterator_selector.h>
+#include <deal.II/dofs/number_cache.h>
+#include <deal.II/dofs/dof_faces.h>
+#include <deal.II/dofs/dof_levels.h>
+#include <deal.II/dofs/function_map.h>
+
+#include <boost/serialization/split_member.hpp>
+
+#include <vector>
+#include <map>
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    struct Implementation;
+
+    namespace Policy
+    {
+      template <int dim, int spacedim> class PolicyBase;
+      struct Implementation;
+    }
+  }
+
+  namespace DoFAccessor
+  {
+    struct Implementation;
+  }
+
+  namespace DoFCellAccessor
+  {
+    struct Implementation;
+  }
+}
+
+
+/**
+ * Manage the distribution and numbering of the degrees of freedom for non-
+ * multigrid algorithms. This class satisfies the
+ * @ref ConceptMeshType "MeshType concept"
+ * requirements.
+ *
+ * It is first used in the step-2 tutorial program.
+ *
+ * For each vertex, line, quad, etc, this class stores a list of the indices
+ * of degrees of freedom living on this object. These indices refer to the
+ * unconstrained degrees of freedom, i.e. constrained degrees of freedom are
+ * numbered in the same way as unconstrained ones, and are only later
+ * eliminated.  This leads to the fact that indices in global vectors and
+ * matrices also refer to all degrees of freedom and some kind of condensation
+ * is needed to restrict the systems of equations to the unconstrained degrees
+ * of freedom only. The actual layout of storage of the indices is described
+ * in the dealii::internal::DoFHandler::DoFLevel class documentation.
+ *
+ * The class offers iterators to traverse all cells, in much the same way as
+ * the Triangulation class does. Using the begin() and end() functions (and
+ * companions, like begin_active()), one can obtain iterators to walk over
+ * cells, and query the degree of freedom structures as well as the
+ * triangulation data. These iterators are built on top of those of the
+ * Triangulation class, but offer the additional information on degrees of
+ * freedom functionality compared to pure triangulation iterators. The order
+ * in which dof iterators are presented by the <tt>++</tt> and <tt>--</tt>
+ * operators is the same as that for the corresponding iterators traversing
+ * the triangulation on which this DoFHandler is constructed.
+ *
+ * The <tt>spacedim</tt> parameter has to be used if one wants to solve
+ * problems on surfaces. If not specified, this parameter takes the default
+ * value <tt>=dim</tt> implying that we want to solve problems in a domain
+ * whose dimension equals the dimension of the space in which it is embedded.
+ *
+ *
+ * <h3>Distribution of indices for degrees of freedom</h3>
+ *
+ * The degrees of freedom (`dofs') are distributed on the given triangulation
+ * by the function distribute_dofs(). It gets passed a finite element object
+ * describing how many degrees of freedom are located on vertices, lines, etc.
+ * It traverses the triangulation cell by cell and numbers the dofs of that
+ * cell if not yet numbered. For non-multigrid algorithms, only active cells
+ * are considered. Active cells are defined to be those cells which have no
+ * children, i.e. they are the most refined ones.
+ *
+ * Since the triangulation is traversed starting with the cells of the
+ * coarsest active level and going to more refined levels, the lowest numbers
+ * for dofs are given to the largest cells as well as their bounding lines and
+ * vertices, with the dofs of more refined cells getting higher numbers.
+ *
+ * This numbering implies very large bandwidths of the resulting matrices and
+ * is thus vastly suboptimal for some solution algorithms. For this reason,
+ * the DoFRenumbering class offers several algorithms to reorder the dof
+ * numbering according. See there for a discussion of the implemented
+ * algorithms.
+ *
+ *
+ * <h3>Interaction with distributed meshes</h3>
+ *
+ * Upon construction, this class takes a reference to a triangulation object.
+ * In most cases, this will be a reference to an object of type Triangulation,
+ * i.e. the class that represents triangulations that entirely reside on a
+ * single processor. However, it can also be of type
+ * parallel::distributed::Triangulation (see, for example, step-32, step-40
+ * and in particular the
+ * @ref distributed
+ * module) in which case the DoFHandler object will proceed to only manage
+ * degrees of freedom on locally owned and ghost cells. This process is
+ * entirely transparent to the used.
+ *
+ *
+ * <h3>User defined renumbering schemes</h3>
+ *
+ * The DoFRenumbering class offers a number of renumbering schemes like the
+ * Cuthill-McKee scheme. Basically, the function sets up an array in which for
+ * each degree of freedom we store the new index this DoF should have after
+ * renumbering. Using this array, the renumber_dofs() function of the present
+ * class is called, which actually performs the change from old DoF indices to
+ * the ones given in the array. In some cases, however, a user may want to
+ * compute her own renumbering order; in this case, one can allocate an array
+ * with one element per degree of freedom and fill it with the number that the
+ * respective degree of freedom shall be assigned. This number may, for
+ * example, be obtained by sorting the support points of the degrees of
+ * freedom in downwind direction.  Then call the
+ * <tt>renumber_dofs(vector<types::global_dof_index>)</tt> function with the
+ * array, which converts old into new degree of freedom indices.
+ *
+ *
+ * <h3>Serializing (loading or storing) DoFHandler objects</h3>
+ *
+ * Like many other classes in deal.II, the DoFHandler class can stream its
+ * contents to an archive using BOOST's serialization facilities. The data so
+ * stored can later be retrieved again from the archive to restore the
+ * contents of this object. This facility is frequently used to save the state
+ * of a program to disk for possible later resurrection, often in the context
+ * of checkpoint/restart strategies for long running computations or on
+ * computers that aren't very reliable (e.g. on very large clusters where
+ * individual nodes occasionally fail and then bring down an entire MPI job).
+ *
+ * The model for doing so is similar for the DoFHandler class as it is for the
+ * Triangulation class (see the section in the general documentation of that
+ * class). In particular, the load() function does not exactly restore the
+ * same state as was stored previously using the save() function. Rather, the
+ * function assumes that you load data into a DoFHandler object that is
+ * already associated with a triangulation that has a content that matches the
+ * one that was used when the data was saved. Likewise, the load() function
+ * assumes that the current object is already associated with a finite element
+ * object that matches the one that was associated with it when data was
+ * saved; the latter can be achieved by calling DoFHandler::distribute_dofs()
+ * using the same kind of finite element before re-loading data from the
+ * serialization archive.
+ *
+ * @ingroup dofs
+ * @author Wolfgang Bangerth, Markus Buerg, Timo Heister, Guido Kanschat,
+ * @date 1998, 1999, 2000, 2012
+ */
+template <int dim, int spacedim=dim>
+class DoFHandler  :  public Subscriptor
+{
+  typedef dealii::internal::DoFHandler::Iterators<DoFHandler<dim,spacedim>, false> ActiveSelector;
+  typedef dealii::internal::DoFHandler::Iterators<DoFHandler<dim,spacedim>, true> LevelSelector;
+public:
+  typedef typename ActiveSelector::CellAccessor         cell_accessor;
+  typedef typename ActiveSelector::FaceAccessor         face_accessor;
+
+  typedef typename ActiveSelector::line_iterator        line_iterator;
+  typedef typename ActiveSelector::active_line_iterator active_line_iterator;
+
+  typedef typename ActiveSelector::quad_iterator        quad_iterator;
+  typedef typename ActiveSelector::active_quad_iterator active_quad_iterator;
+
+  typedef typename ActiveSelector::hex_iterator         hex_iterator;
+  typedef typename ActiveSelector::active_hex_iterator  active_hex_iterator;
+
+  /**
+   * A typedef that is used to to identify
+   * @ref GlossActive "active cell iterators".
+   * The concept of iterators is discussed at length in the
+   * @ref Iterators "iterators documentation module".
+   *
+   * The current typedef identifies active cells in a DoFHandler object. While
+   * the actual data type of the typedef is hidden behind a few layers of
+   * (unfortunately necessary) indirections, it is in essence
+   * TriaActiveIterator<DoFCellAccessor>. The TriaActiveIterator class works
+   * like a pointer to active objects that when you dereference it yields an
+   * object of type DoFCellAccessor. DoFCellAccessor is a class that
+   * identifies properties that are specific to cells in a DoFHandler, but it
+   * is derived (and consequently inherits) from both DoFAccessor,
+   * TriaCellAccessor and TriaAccessor that describe what you can ask of more
+   * general objects (lines, faces, as well as cells) in a triangulation and
+   * DoFHandler objects.
+   *
+   * @ingroup Iterators
+   */
+  typedef typename ActiveSelector::active_cell_iterator active_cell_iterator;
+
+  /**
+   * A typedef that is used to to identify cell iterators. The concept of
+   * iterators is discussed at length in the
+   * @ref Iterators "iterators documentation module".
+   *
+   * The current typedef identifies cells in a DoFHandler object. Some of
+   * these cells may in fact be active (see
+   * @ref GlossActive "active cell iterators")
+   * in which case they can in fact be asked for the degrees of freedom that
+   * live on them. On the other hand, if the cell is not active, any such
+   * query will result in an error. Note that this is what distinguishes this
+   * typedef from the level_cell_iterator typedef.
+   *
+   * While the actual data type of the typedef is hidden behind a few layers
+   * of (unfortunately necessary) indirections, it is in essence
+   * TriaIterator<DoFCellAccessor>. The TriaIterator class works like a
+   * pointer to objects that when you dereference it yields an object of type
+   * DoFCellAccessor. DoFCellAccessor is a class that identifies properties
+   * that are specific to cells in a DoFHandler, but it is derived (and
+   * consequently inherits) from both DoFAccessor, TriaCellAccessor and
+   * TriaAccessor that describe what you can ask of more general objects
+   * (lines, faces, as well as cells) in a triangulation and DoFHandler
+   * objects.
+   *
+   * @ingroup Iterators
+   */
+  typedef typename ActiveSelector::cell_iterator        cell_iterator;
+
+  typedef typename ActiveSelector::face_iterator        face_iterator;
+  typedef typename ActiveSelector::active_face_iterator active_face_iterator;
+
+  typedef typename LevelSelector::CellAccessor          level_cell_accessor;
+  typedef typename LevelSelector::FaceAccessor          level_face_accessor;
+
+  typedef typename LevelSelector::cell_iterator         level_cell_iterator;
+  typedef typename LevelSelector::face_iterator         level_face_iterator;
+
+
+  /**
+   * Alias the @p FunctionMap type declared elsewhere.
+   */
+  typedef typename dealii::FunctionMap<spacedim>::type FunctionMap;
+
+  /**
+   * Make the dimension available in function templates.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Make the space dimension available in function templates.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * When the arrays holding the DoF indices are set up, but before they are
+   * filled with actual values, they are set to an invalid value, in order to
+   * monitor possible problems. This invalid value is the constant defined
+   * here.
+   *
+   * Please note that you should not rely on it having a certain value, but
+   * rather take its symbolic name.
+   */
+  static const types::global_dof_index invalid_dof_index = numbers::invalid_dof_index;
+
+  /**
+   * The default index of the finite element to be used on a given cell. Since
+   * the present class only supports the same finite element to be used on all
+   * cells, the index of the finite element needs to be the same on all cells
+   * anyway, and by convention we pick zero for this value. The situation is
+   * different for hp objects (i.e. the hp::DoFHandler class) where different
+   * finite element indices may be used on different cells, and the default
+   * index there corresponds to an invalid value.
+   */
+  static const unsigned int default_fe_index = 0;
+
+  /**
+   * Standard constructor, not initializing any data. After constructing an
+   * object with this constructor, use initialize() to make a valid
+   * DoFHandler.
+   */
+  DoFHandler ();
+
+  /**
+   * Constructor. Take @p tria as the triangulation to work on.
+   */
+  DoFHandler ( const Triangulation<dim,spacedim> &tria);
+
+  /**
+   * Destructor.
+   */
+  virtual ~DoFHandler ();
+
+  /**
+   * Assign a Triangulation and a FiniteElement to the DoFHandler and compute
+   * the distribution of degrees of freedom over the mesh.
+   */
+  void initialize(const Triangulation<dim,spacedim> &tria,
+                  const FiniteElement<dim,spacedim> &fe);
+
+  /**
+   * Go through the triangulation and "distribute" the degrees of freedoms
+   * needed for the given finite element. "Distributing" degrees of freedom
+   * involved allocating memory to store the information that describes it
+   * (e.g., whether it is located on a vertex, edge, face, etc) and to
+   * sequentially enumerate all degrees of freedom. In other words, while the
+   * mesh and the finite element object by themselves simply define a finite
+   * element space $V_h$, the process of distributing degrees of freedom makes
+   * sure that there is a basis for this space and that the shape functions of
+   * this basis are enumerated in an indexable, predictable way.
+   *
+   * The purpose of this function is first discussed in the introduction to
+   * the step-2 tutorial program.
+   *
+   * @note A pointer of the finite element given as argument is stored.
+   * Therefore, the lifetime of the finite element object shall be longer than
+   * that of this object. If you don't want this behavior, you may want to
+   * call the @p clear member function which also releases the lock of this
+   * object to the finite element.
+   */
+  virtual void distribute_dofs (const FiniteElement<dim,spacedim> &fe);
+
+  /**
+   * Distribute level degrees of freedom on each level for geometric
+   * multigrid. The active DoFs need to be distributed using distribute_dofs()
+   * before calling this function and the @p fe needs to be identical to the
+   * finite element passed to distribute_dofs().
+   */
+  virtual void distribute_mg_dofs (const FiniteElement<dim, spacedim> &fe);
+
+  /**
+   * This function returns whether this DoFHandler has DoFs distributed on
+   * each multigrid level or in other words if distribute_mg_dofs() has been
+   * called.
+   */
+  bool has_level_dofs() const;
+
+  /**
+   * This function returns whether this DoFHandler has active DoFs. This is
+   * equivalent to asking whether (i) distribute_dofs() has been called and
+   * (ii) the finite element for which degrees of freedom have been
+   * distributed actually has degrees of freedom (which is not the case for
+   * FE_Nothing, for example).
+   *
+   * If this object is based on a parallel::distributed::Triangulation, then
+   * the current function returns true if <i>any</i> partition of the parallel
+   * DoFHandler object has any degrees of freedom. In other words, the
+   * function returns true even if the Triangulation does not own any active
+   * cells on the current MPI process, but at least one process owns cells and
+   * at least this one process has any degrees of freedom associated with it.
+   */
+  bool has_active_dofs() const;
+
+  /**
+   * After distribute_dofs() with an FESystem element, the block structure of
+   * global and level vectors is stored in a BlockInfo object accessible with
+   * block_info(). This function initializes the local block structure on each
+   * cell in the same object.
+   */
+  void initialize_local_block_info();
+
+  /**
+   * Clear all data of this object and especially delete the lock this object
+   * has to the finite element used the last time when @p distribute_dofs was
+   * called.
+   */
+  virtual void clear ();
+
+  /**
+   * Renumber degrees of freedom based on a list of new dof numbers for all
+   * the dofs.
+   *
+   * This function is called by the functions in DoFRenumbering function after
+   * computing the ordering of the degrees of freedom. This function is
+   * called, for example, by the functions in the DoFRenumbering namespace,
+   * but it can of course also be called from user code.
+   *
+   * @arg new_number This array must have a size equal to the number of
+   * degrees of freedom owned by the current processor, i.e. the size must be
+   * equal to what n_locally_owned_dofs() returns. If only one processor
+   * participates in storing the current mesh, then this equals the total
+   * number of degrees of freedom, i.e. the result of n_dofs(). The contents
+   * of this array are the new global indices for each freedom listed in the
+   * IndexSet returned by locally_owned_dofs(). In the case of a sequential
+   * mesh this means that the array is a list of new indices for each of the
+   * degrees of freedom on the current mesh. In the case that we have a
+   * parallel::distributed::Triangulation underlying this DoFHandler object,
+   * the array is a list of new indices for all the locally owned degrees of
+   * freedom, enumerated in the same order as the currently locally owned
+   * DoFs. In other words, assume that degree of freedom <code>i</code> is
+   * currently locally owned, then
+   * <code>new_numbers[locally_owned_dofs().index_within_set(i)]</code>
+   * returns the new global DoF index of <code>i</code>. Since the IndexSet of
+   * locally_owned_dofs() is complete in the sequential case, the latter
+   * convention for the content of the array reduces to the former in the case
+   * that only one processor participates in the mesh.
+   */
+  void renumber_dofs (const std::vector<types::global_dof_index> &new_numbers);
+
+  /**
+   * The same function as above, but renumber the degrees of freedom of a
+   * single level of a multigrid hierarchy.
+   */
+  void renumber_dofs (const unsigned int level,
+                      const std::vector<types::global_dof_index> &new_numbers);
+
+  /**
+   * Return the maximum number of degrees of freedom a degree of freedom in
+   * the given triangulation with the given finite element may couple with.
+   * This is the maximum number of entries per line in the system matrix; this
+   * information can therefore be used upon construction of the
+   * SparsityPattern object.
+   *
+   * The returned number is not really the maximum number but an estimate
+   * based on the finite element and the maximum number of cells meeting at a
+   * vertex. The number holds for the constrained matrix as well.
+   *
+   * The determination of the number of couplings can be done by simple
+   * picture drawing. An example can be found in the implementation of this
+   * function.
+   *
+   * @note This function is most often used to determine the maximal row
+   * length for sparsity patterns. Unfortunately, while the estimates returned
+   * by this function are rather accurate in 1d and 2d, they are often
+   * significantly too high in 3d, leading the SparsityPattern class to
+   * allocate much too much memory in some cases. Unless someone comes around
+   * to improving the present function for 3d, there is not very much one can
+   * do about these cases. The typical way to work around this problem is to
+   * use an intermediate compressed sparsity pattern that only allocates
+   * memory on demand. Refer to the step-2 and step-11 example programs on how
+   * to do this. The problem is also discussed in the documentation of the
+   * module on
+   * @ref Sparsity.
+   */
+  unsigned int max_couplings_between_dofs () const;
+
+  /**
+   * Return the number of degrees of freedom located on the boundary another
+   * dof on the boundary can couple with.
+   *
+   * The number is the same as for max_couplings_between_dofs() in one
+   * dimension less.
+   *
+   * @note The same applies to this function as to max_couplings_per_dofs() as
+   * regards the performance of this function. Think about one of the dynamic
+   * sparsity pattern classes instead (see
+   * @ref Sparsity).
+   */
+  unsigned int max_couplings_between_boundary_dofs () const;
+
+  /*--------------------------------------*/
+
+  /**
+   * @name Cell iterator functions
+   */
+
+  /*
+   * @{
+   */
+
+  /**
+   * Iterator to the first used cell on level @p level.
+   */
+  cell_iterator        begin       (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first active cell on level @p level. If the given level
+   * does not contain any active cells (i.e., all cells on this level are
+   * further refined, then this function returns
+   * <code>end_active(level)</code> so that loops of the kind
+   *  @code
+   *    for (cell=dof_handler.begin_active(level); cell!=dof_handler.end_active(level); ++cell)
+   *      ...
+   *  @endcode
+   * have zero iterations, as may be expected if there are no active cells on
+   * this level.
+   */
+  active_cell_iterator begin_active(const unsigned int level = 0) const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states.
+   */
+  cell_iterator        end () const;
+
+  /**
+   * Return an iterator which is the first iterator not on the given level. If
+   * @p level is the last level, then this returns <tt>end()</tt>.
+   */
+  cell_iterator end (const unsigned int level) const;
+
+  /**
+   * Return an active iterator which is the first active iterator not on the
+   * given level. If @p level is the last level, then this returns
+   * <tt>end()</tt>.
+   */
+  active_cell_iterator end_active (const unsigned int level) const;
+
+
+  /**
+   * Iterator to the first used cell on level @p level. This returns a
+   * level_cell_iterator that returns level dofs when dof_indices() is called.
+   */
+  level_cell_iterator begin_mg (const unsigned int level = 0) const;
+
+  /**
+   * Iterator past the last cell on level @p level. This returns a
+   * level_cell_iterator that returns level dofs when dof_indices() is called.
+   */
+  level_cell_iterator end_mg (const unsigned int level) const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states.
+   */
+  level_cell_iterator end_mg () const;
+
+  /**
+   * @name Cell iterator functions returning ranges of iterators
+   */
+
+  /**
+   * Return an iterator range that contains all cells (active or not) that
+   * make up this DoFHandler. Such a range is useful to initialize range-based
+   * for loops as supported by C++11. See the example in the documentation of
+   * active_cell_iterators().
+   *
+   * @return The half open range <code>[this->begin(), this->end())</code>
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<cell_iterator>        cell_iterators () const;
+
+  /**
+   * Return an iterator range that contains all active cells that make up this
+   * DoFHandler. Such a range is useful to initialize range-based for loops as
+   * supported by C++11, see also
+   * @ref CPP11 "C++11 standard".
+   *
+   * Range-based for loops are useful in that they require much less code than
+   * traditional loops (see <a href="http://en.wikipedia.org/wiki/C%2B%2B11
+   * #Range-based_for_loop">here</a> for a discussion of how they work). An
+   * example is that without range-based for loops, one often writes code such
+   * as the following:
+   * @code
+   *   DoFHandler<dim> dof_handler;
+   *   ...
+   *   typename DoFHandler<dim>::active_cell_iterator
+   *     cell = dof_handler.begin_active(),
+   *     endc = dof_handler.end();
+   *   for (; cell!=endc; ++cell)
+   *     {
+   *       fe_values.reinit (cell);
+   *       ...do the local integration on 'cell'...;
+   *     }
+   * @endcode
+   * Using C++11's range-based for loops, this is now entirely equivalent to
+   * the following:
+   * @code
+   *   DoFHandler<dim> dof_handler;
+   *   ...
+   *   for (auto cell : dof_handler.active_cell_iterators())
+   *     {
+   *       fe_values.reinit (cell);
+   *       ...do the local integration on 'cell'...;
+   *     }
+   * @endcode
+   * To use this feature, you need a compiler that supports C++11.
+   *
+   * @return The half open range <code>[this->begin_active(),
+   * this->end())</code>
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<active_cell_iterator> active_cell_iterators () const;
+
+  /**
+   * Return an iterator range that contains all cells (active or not) that
+   * make up this DoFHandler in their level-cell form. Such a range is useful
+   * to initialize range-based for loops as supported by C++11. See the
+   * example in the documentation of active_cell_iterators().
+   *
+   * @return The half open range <code>[this->begin_mg(),
+   * this->end_mg())</code>
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<level_cell_iterator>  mg_cell_iterators () const;
+
+  /**
+   * Return an iterator range that contains all cells (active or not) that
+   * make up the given level of this DoFHandler. Such a range is useful to
+   * initialize range-based for loops as supported by C++11. See the example
+   * in the documentation of active_cell_iterators().
+   *
+   * @param[in] level A given level in the refinement hierarchy of this
+   * triangulation.
+   * @return The half open range <code>[this->begin(level),
+   * this->end(level))</code>
+   *
+   * @pre level must be less than this->n_levels().
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<cell_iterator>        cell_iterators_on_level (const unsigned int level) const;
+
+  /**
+   * Return an iterator range that contains all active cells that make up the
+   * given level of this DoFHandler. Such a range is useful to initialize
+   * range-based for loops as supported by C++11. See the example in the
+   * documentation of active_cell_iterators().
+   *
+   * @param[in] level A given level in the refinement hierarchy of this
+   * triangulation.
+   * @return The half open range <code>[this->begin_active(level),
+   * this->end(level))</code>
+   *
+   * @pre level must be less than this->n_levels().
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<active_cell_iterator> active_cell_iterators_on_level (const unsigned int level) const;
+
+  /**
+   * Return an iterator range that contains all cells (active or not) that
+   * make up the given level of this DoFHandler in their level-cell form. Such
+   * a range is useful to initialize range-based for loops as supported by
+   * C++11. See the example in the documentation of active_cell_iterators().
+   *
+   * @param[in] level A given level in the refinement hierarchy of this
+   * triangulation.
+   * @return The half open range <code>[this->begin_mg(level),
+   * this->end_mg(level))</code>
+   *
+   * @pre level must be less than this->n_levels().
+   *
+   * @ingroup CPP11
+   *
+   */
+  IteratorRange<level_cell_iterator> mg_cell_iterators_on_level (const unsigned int level) const;
+
+  /*
+   * @}
+   */
+
+
+  /*---------------------------------------*/
+
+
+  /**
+   * Return the global number of degrees of freedom. If the current object
+   * handles all degrees of freedom itself (even if you may intend to solve
+   * your linear system in parallel, such as in step-17 or step-18), then this
+   * number equals the number of locally owned degrees of freedom since this
+   * object doesn't know anything about what you want to do with it and
+   * believes that it owns every degree of freedom it knows about.
+   *
+   * On the other hand, if this object operates on a
+   * parallel::distributed::Triangulation object, then this function returns
+   * the global number of degrees of freedom, accumulated over all processors.
+   *
+   * In either case, included in the returned number are those DoFs which are
+   * constrained by hanging nodes, see
+   * @ref constraints.
+   */
+  types::global_dof_index n_dofs () const;
+
+  /**
+   * The (global) number of multilevel degrees of freedom on a given level.
+   *
+   * If no level degrees of freedom have been assigned to this level, returns
+   * numbers::invalid_dof_index. Else returns the number of degrees of freedom
+   * on this level.
+   */
+  types::global_dof_index n_dofs (const unsigned int level) const;
+
+  /**
+   * Return the number of degrees of freedom located on the boundary.
+   */
+  types::global_dof_index n_boundary_dofs () const;
+
+  /**
+   * Return the number of degrees of freedom located on those parts of the
+   * boundary which have a boundary indicator listed in the given set. The
+   * reason that a @p map rather than a @p set is used is the same as
+   * described in the section on the @p make_boundary_sparsity_pattern
+   * function.
+   */
+  types::global_dof_index
+  n_boundary_dofs (const FunctionMap &boundary_ids) const;
+
+  /**
+   * Same function, but with different data type of the argument, which is
+   * here simply a list of the boundary indicators under consideration.
+   */
+  types::global_dof_index
+  n_boundary_dofs (const std::set<types::boundary_id> &boundary_ids) const;
+
+  /**
+   * Access to an object informing of the block structure of the dof handler.
+   *
+   * If an FESystem is used in distribute_dofs(), degrees of freedom naturally
+   * split into several
+   * @ref GlossBlock "blocks".
+   * For each base element as many blocks appear as its multiplicity.
+   *
+   * At the end of distribute_dofs(), the number of degrees of freedom in each
+   * block is counted, and stored in a BlockInfo object, which can be accessed
+   * here. If you have previously called distribute_mg_dofs(), the same is
+   * done on each level of the multigrid hierarchy. Additionally, the block
+   * structure on each cell can be generated in this object by calling
+   * initialize_local_block_info().
+   */
+  const BlockInfo &block_info() const;
+
+
+  /**
+   * Return the number of degrees of freedom that belong to this process.
+   *
+   * If this is a sequential job, then the result equals that produced by
+   * n_dofs(). On the other hand, if we are operating on a
+   * parallel::distributed::Triangulation, then it includes only the degrees
+   * of freedom that the current processor owns. Note that in this case this
+   * does not include all degrees of freedom that have been distributed on the
+   * current processor's image of the mesh: in particular, some of the degrees
+   * of freedom on the interface between the cells owned by this processor and
+   * cells owned by other processors may be theirs, and degrees of freedom on
+   * ghost cells are also not necessarily included.
+   */
+  unsigned int n_locally_owned_dofs() const;
+
+  /**
+   * Return an IndexSet describing the set of locally owned DoFs as a subset
+   * of 0..n_dofs(). The number of elements of this set equals
+   * n_locally_owned_dofs().
+   */
+  const IndexSet &locally_owned_dofs() const;
+
+  /**
+   * Returns an IndexSet describing the set of locally owned DoFs used for the
+   * given multigrid level as a subset of 0..n_dofs(level).
+   */
+  const IndexSet &locally_owned_mg_dofs(const unsigned int level) const;
+
+
+  /**
+   * Returns a vector that stores the locally owned DoFs of each processor. If
+   * you are only interested in the number of elements each processor owns
+   * then n_locally_owned_dofs_per_processor() is a better choice.
+   *
+   * If this is a sequential job, then the vector has a single element that
+   * equals the IndexSet representing the entire range [0,n_dofs()].
+   */
+  const std::vector<IndexSet> &
+  locally_owned_dofs_per_processor () const;
+
+  const std::vector<IndexSet> &
+  locally_owned_mg_dofs_per_processor (const unsigned int level) const;
+
+  /**
+   * Return a vector that stores the number of degrees of freedom each
+   * processor that participates in this triangulation owns locally. The sum
+   * of all these numbers equals the number of degrees of freedom that exist
+   * globally, i.e. what n_dofs() returns.
+   *
+   * Each element of the vector returned by this function equals the number of
+   * elements of the corresponding sets returned by global_dof_indices().
+   *
+   * If this is a sequential job, then the vector has a single element equal
+   * to n_dofs().
+   */
+  const std::vector<types::global_dof_index> &
+  n_locally_owned_dofs_per_processor () const;
+
+  /**
+   * Return a constant reference to the selected finite element object.
+   */
+  const FiniteElement<dim,spacedim> &get_fe () const;
+
+  /**
+   * Return a constant reference to the triangulation underlying this object.
+   *
+   * @deprecated Use get_triangulation() instead.
+   */
+  const Triangulation<dim,spacedim> &get_tria () const DEAL_II_DEPRECATED;
+
+  /**
+   * Return a constant reference to the triangulation underlying this object.
+   */
+  const Triangulation<dim,spacedim> &get_triangulation () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since a dof handler object might be
+   * accessed through a pointers to this base class, although the actual
+   * object might be a derived class.
+   */
+  virtual std::size_t memory_consumption () const;
+
+  /**
+   * Write the data of this object to a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void save (Archive &ar, const unsigned int version) const;
+
+  /**
+   * Read the data of this object from a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void load (Archive &ar, const unsigned int version);
+
+  BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+  /**
+   * We are trying to renumber the degrees of freedom, but somehow did not
+   * count correctly.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcRenumberingIncomplete);
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcGridsDoNotMatch);
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInvalidBoundaryIndicator);
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcNewNumbersNotConsecutive,
+                  types::global_dof_index,
+                  << "The given list of new dof indices is not consecutive: "
+                  << "the index " << arg1 << " does not exist.");
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcInvalidLevel,
+                  int,
+                  << "The given level " << arg1
+                  << " is not in the valid range!");
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFacesHaveNoLevel);
+  /**
+   * The triangulation level you accessed is empty.
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcEmptyLevel,
+                  int,
+                  << "You tried to do something on level " << arg1
+                  << ", but this level is empty.");
+
+
+private:
+  /**
+   * Copy constructor. I can see no reason why someone might want to use it,
+   * so I don't provide it. Since this class has pointer members, making it
+   * private prevents the compiler to provide it's own, incorrect one if
+   * anyone chose to copy such an object.
+   */
+  DoFHandler (const DoFHandler &);
+
+  /**
+   * Copy operator. I can see no reason why someone might want to use it, so I
+   * don't provide it. Since this class has pointer members, making it private
+   * prevents the compiler to provide it's own, incorrect one if anyone chose
+   * to copy such an object.
+   */
+  DoFHandler &operator = (const DoFHandler &);
+
+
+  /**
+   * An object containing information on the block structure.
+   */
+  BlockInfo block_info_object;
+
+  /**
+   * Address of the triangulation to work on.
+   */
+  SmartPointer<const Triangulation<dim,spacedim>,DoFHandler<dim,spacedim> >
+  tria;
+
+  /**
+   * Store a pointer to the finite element given latest for the distribution
+   * of dofs. In order to avoid destruction of the object before the lifetime
+   * of the DoF handler, we subscribe to the finite element object. To unlock
+   * the FE before the end of the lifetime of this DoF handler, use the
+   * <tt>clear()</tt> function (this clears all data of this object as well,
+   * though).
+   */
+  SmartPointer<const FiniteElement<dim,spacedim>,DoFHandler<dim,spacedim> >
+  selected_fe;
+
+  /**
+   * An object that describes how degrees of freedom should be distributed and
+   * renumbered.
+   */
+  std_cxx11::shared_ptr<dealii::internal::DoFHandler::Policy::PolicyBase<dim,spacedim> > policy;
+
+  /**
+   * A structure that contains all sorts of numbers that characterize the
+   * degrees of freedom this object works on.
+   *
+   * For most members of this structure, there is an accessor function in this
+   * class that returns its value.
+   */
+  dealii::internal::DoFHandler::NumberCache number_cache;
+
+  /**
+   * Data structure like number_cache, but for each multigrid level.
+   */
+  std::vector<dealii::internal::DoFHandler::NumberCache> mg_number_cache;
+
+  /**
+   * A data structure that is used to store the DoF indices associated with a
+   * particular vertex. Unlike cells, vertices live on several levels of a
+   * multigrid hierarchy; consequently, we need to store DoF indices for each
+   * vertex for each of the levels it lives on. This class does this.
+   */
+  class MGVertexDoFs
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    MGVertexDoFs ();
+
+    /**
+     * Destructor.
+     */
+    ~MGVertexDoFs ();
+
+    /**
+     * A function that is called to allocate the necessary amount of memory to
+     * store the indices of the DoFs that live on this vertex for the given
+     * (inclusive) range of levels.
+     */
+    void init (const unsigned int coarsest_level,
+               const unsigned int finest_level,
+               const unsigned int dofs_per_vertex);
+
+    /**
+     * Return the coarsest level for which this structure stores data.
+     */
+    unsigned int get_coarsest_level () const;
+
+    /**
+     * Return the finest level for which this structure stores data.
+     */
+    unsigned int get_finest_level () const;
+
+    /**
+     * Return the index of the <code>dof_number</code>th degree of freedom for
+     * the given level stored for the current vertex.
+     */
+    types::global_dof_index
+    get_index (const unsigned int level,
+               const unsigned int dof_number) const;
+
+    /**
+     * Set the index of the <code>dof_number</code>th degree of freedom for
+     * the given level stored for the current vertex to <code>index</code>.
+     */
+    void set_index (const unsigned int level,
+                    const unsigned int dof_number,
+                    const types::global_dof_index index);
+
+    /**
+     * Exception.
+     */
+    DeclException0 (ExcNoMemory);
+
+  private:
+    /**
+     * Coarsest level for which this object stores DoF indices.
+     */
+    unsigned int coarsest_level;
+
+    /**
+     * Finest level for which this object stores DoF indices.
+     */
+    unsigned int finest_level;
+
+    /**
+     * A pointer to an array where we store the indices of the DoFs that live
+     * on the various levels this vertex exists on.
+     */
+    types::global_dof_index *indices;
+
+    /**
+     * This array stores, for each level starting with coarsest_level, the
+     * offset in the <code>indices</code> array where the DoF indices for each
+     * level are stored.
+     */
+    types::global_dof_index *indices_offset;
+  };
+
+  void clear_mg_space ();
+
+  /**
+   * Free all used memory.
+   */
+  void clear_space ();
+
+  void reserve_space ();
+
+  template <int structdim>
+  types::global_dof_index get_dof_index (const unsigned int obj_level,
+                                         const unsigned int obj_index,
+                                         const unsigned int fe_index,
+                                         const unsigned int local_index) const;
+
+  template<int structdim>
+  void set_dof_index (const unsigned int obj_level,
+                      const unsigned int obj_index,
+                      const unsigned int fe_index,
+                      const unsigned int local_index,
+                      const types::global_dof_index global_index) const;
+
+  /**
+   * Array to store the indices for degrees of freedom located at vertices.
+   */
+  std::vector<types::global_dof_index> vertex_dofs;
+
+  /**
+   * An array to store the indices for level degrees of freedom located at
+   * vertices.
+   */
+  std::vector<MGVertexDoFs> mg_vertex_dofs;
+
+  /**
+   * Space to store the DoF numbers for the different levels. Analogous to the
+   * <tt>levels[]</tt> tree of the Triangulation objects.
+   */
+  std::vector<dealii::internal::DoFHandler::DoFLevel<dim>*> levels;
+
+  std::vector<dealii::internal::DoFHandler::DoFLevel<dim>*> mg_levels;
+
+  /**
+   * Space to store DoF numbers of faces. They are not stored in
+   * <tt>levels</tt> since faces are not organized hierarchically, but in a
+   * flat array.
+   */
+  dealii::internal::DoFHandler::DoFFaces<dim> *faces;
+
+  dealii::internal::DoFHandler::DoFFaces<dim> *mg_faces;
+
+  /**
+   * Make accessor objects friends.
+   */
+  template <int, class, bool> friend class DoFAccessor;
+  template <class, bool> friend class DoFCellAccessor;
+  friend struct dealii::internal::DoFAccessor::Implementation;
+  friend struct dealii::internal::DoFCellAccessor::Implementation;
+
+  friend struct dealii::internal::DoFHandler::Implementation;
+  friend struct dealii::internal::DoFHandler::Policy::Implementation;
+};
+
+
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <> types::global_dof_index DoFHandler<1>::n_boundary_dofs () const;
+template <> types::global_dof_index DoFHandler<1>::n_boundary_dofs (const FunctionMap &) const;
+template <> types::global_dof_index DoFHandler<1>::n_boundary_dofs (const std::set<types::boundary_id> &) const;
+
+template <> void DoFHandler<1>::renumber_dofs(unsigned int,const std::vector<types::global_dof_index>  &new_numbers);
+template <> void DoFHandler<2>::renumber_dofs(unsigned int,const std::vector<types::global_dof_index>  &new_numbers);
+template <> void DoFHandler<3>::renumber_dofs(unsigned int,const std::vector<types::global_dof_index>  &new_numbers);
+
+
+/* ----------------------- Inline functions ---------------------------------- */
+
+
+template <int dim, int spacedim>
+inline
+bool
+DoFHandler<dim,spacedim>::has_level_dofs() const
+{
+  return mg_number_cache.size()>0;
+}
+
+template <int dim, int spacedim>
+inline
+bool
+DoFHandler<dim,spacedim>::has_active_dofs() const
+{
+  return number_cache.n_global_dofs>0;
+}
+
+template <int dim, int spacedim>
+inline
+types::global_dof_index
+DoFHandler<dim,spacedim>::n_dofs () const
+{
+  return number_cache.n_global_dofs;
+}
+
+template<int dim, int spacedim>
+inline
+types::global_dof_index DoFHandler<dim, spacedim>::n_dofs (const unsigned int level) const
+{
+  Assert(has_level_dofs(), ExcMessage("n_dofs(level) can only be called after distribute_mg_dofs()"));
+  Assert (level < mg_number_cache.size (), ExcInvalidLevel (level));
+  return mg_number_cache[level].n_global_dofs;
+}
+
+
+template <int dim, int spacedim>
+unsigned int
+DoFHandler<dim, spacedim>::n_locally_owned_dofs() const
+{
+  return number_cache.n_locally_owned_dofs;
+}
+
+
+template <int dim, int spacedim>
+const IndexSet &
+DoFHandler<dim, spacedim>::locally_owned_dofs() const
+{
+  return number_cache.locally_owned_dofs;
+}
+
+template <int dim, int spacedim>
+const IndexSet &
+DoFHandler<dim, spacedim>::locally_owned_mg_dofs(const unsigned int level) const
+{
+  Assert(level < this->get_triangulation().n_global_levels(), ExcMessage("invalid level in locally_owned_mg_dofs"));
+  return mg_number_cache[level].locally_owned_dofs;
+}
+
+template <int dim, int spacedim>
+const std::vector<types::global_dof_index> &
+DoFHandler<dim, spacedim>::n_locally_owned_dofs_per_processor() const
+{
+  return number_cache.n_locally_owned_dofs_per_processor;
+}
+
+
+template <int dim, int spacedim>
+const std::vector<IndexSet> &
+DoFHandler<dim, spacedim>::locally_owned_dofs_per_processor () const
+{
+  return number_cache.locally_owned_dofs_per_processor;
+}
+
+template <int dim, int spacedim>
+const std::vector<IndexSet> &
+DoFHandler<dim, spacedim>::locally_owned_mg_dofs_per_processor (const unsigned int level) const
+{
+  Assert(level < this->get_triangulation().n_global_levels(), ExcMessage("invalid level in locally_owned_mg_dofs_per_processor"));
+  return mg_number_cache[level].locally_owned_dofs_per_processor;
+}
+
+
+template <int dim, int spacedim>
+inline
+const FiniteElement<dim,spacedim> &
+DoFHandler<dim,spacedim>::get_fe () const
+{
+  Assert(selected_fe!=0, ExcMessage("You are trying to access the DoFHandler's FiniteElement object before it has been initialized."));
+  return *selected_fe;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Triangulation<dim,spacedim> &
+DoFHandler<dim,spacedim>::get_tria () const
+{
+  Assert(tria != 0, ExcNotInitialized());
+  return *tria;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Triangulation<dim,spacedim> &
+DoFHandler<dim,spacedim>::get_triangulation () const
+{
+  Assert(tria != 0, ExcNotInitialized());
+  return *tria;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const BlockInfo &
+DoFHandler<dim,spacedim>::block_info () const
+{
+  return block_info_object;
+}
+
+
+namespace internal
+{
+  /**
+   * returns a string representing the dynamic type of the given argument.
+   * This is basically the same what typeid(...).name() does, but it turns out
+   * this is broken on Intel 13+.
+   *
+   * Defined in dof_handler.cc.
+   */
+  template<int dim, int spacedim>
+  std::string policy_to_string(const dealii::internal::DoFHandler::Policy::PolicyBase<dim,spacedim> &policy);
+
+}
+
+
+template <int dim, int spacedim>
+template <class Archive>
+void DoFHandler<dim,spacedim>::save (Archive &ar,
+                                     const unsigned int) const
+{
+  ar &block_info_object;
+  ar &vertex_dofs;
+  ar &number_cache;
+  ar &levels;
+  ar &faces;
+
+  // write out the number of triangulation cells and later check during
+  // loading that this number is indeed correct; same with something that
+  // identifies the FE and the policy
+  unsigned int n_cells = tria->n_cells();
+  std::string  fe_name = selected_fe->get_name();
+  std::string  policy_name = internal::policy_to_string(*policy);
+
+  ar &n_cells &fe_name &policy_name;
+}
+
+
+template <int dim, int spacedim>
+template <class Archive>
+void DoFHandler<dim,spacedim>::load (Archive &ar,
+                                     const unsigned int)
+{
+  ar &block_info_object;
+  ar &vertex_dofs;
+  ar &number_cache;
+
+  // boost::serialization can restore pointers just fine, but if the
+  // pointer object still points to something useful, that object is not
+  // destroyed and we end up with a memory leak. consequently, first delete
+  // previous content before re-loading stuff
+  for (unsigned int i=0; i<levels.size(); ++i)
+    delete levels[i];
+  levels.resize (0);
+  delete faces;
+  faces = 0;
+
+  ar &levels;
+  ar &faces;
+
+  // these are the checks that correspond to the last block in the save()
+  // function
+  unsigned int n_cells;
+  std::string  fe_name;
+  std::string  policy_name;
+
+  ar &n_cells &fe_name &policy_name;
+
+  AssertThrow (n_cells == tria->n_cells(),
+               ExcMessage ("The object being loaded into does not match the triangulation "
+                           "that has been stored previously."));
+  AssertThrow (fe_name == selected_fe->get_name(),
+               ExcMessage ("The finite element associated with this DoFHandler does not match "
+                           "the one that was associated with the DoFHandler previously stored."));
+  AssertThrow (policy_name == internal::policy_to_string(*policy),
+               ExcMessage (std::string ("The policy currently associated with this DoFHandler (")
+                           + internal::policy_to_string(*policy)
+                           +std::string(") does not match the one that was associated with the "
+                                        "DoFHandler previously stored (")
+                           + policy_name
+                           + ")."));
+}
+
+
+template<int dim, int spacedim>
+inline
+types::global_dof_index DoFHandler<dim, spacedim>::MGVertexDoFs::get_index (
+  const unsigned int level,
+  const unsigned int dof_number) const
+{
+  Assert ((level >= coarsest_level) && (level <= finest_level), ExcInvalidLevel (level));
+  return indices[indices_offset[level - coarsest_level] + dof_number];
+}
+
+
+template<int dim, int spacedim>
+inline
+void DoFHandler<dim, spacedim>::MGVertexDoFs::set_index (
+  const unsigned int level,
+  const unsigned int dof_number,
+  const types::global_dof_index index)
+{
+  Assert ((level >= coarsest_level) && (level <= finest_level), ExcInvalidLevel (level));
+  indices[indices_offset[level - coarsest_level] + dof_number] = index;
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/dof_handler_policy.h b/include/deal.II/dofs/dof_handler_policy.h
new file mode 100644
index 0000000..91f2126
--- /dev/null
+++ b/include/deal.II/dofs/dof_handler_policy.h
@@ -0,0 +1,228 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_handler_policy_h
+#define dealii__dof_handler_policy_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_renumbering.h>
+
+#include <vector>
+#include <map>
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int> class FiniteElement;
+template <int, int> class DoFHandler;
+
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    struct NumberCache;
+
+    /**
+     * A namespace in which we define classes that describe how to distribute
+     * and renumber degrees of freedom.
+     */
+    namespace Policy
+    {
+      struct Implementation;
+
+      /**
+       * A class that implements policies for how the
+       * DoFHandler::distribute_dofs and DoFHandler::renumber_dofs functions
+       * should work.
+       */
+      template <int dim, int spacedim>
+      class PolicyBase
+      {
+      public:
+        /**
+         * Destructor.
+         */
+        virtual ~PolicyBase ();
+
+        /**
+         * Distribute degrees of freedom on the object given as first
+         * argument. The reference to the NumberCache of the DoFHandler object
+         * has to be passed in a second argument. It could then be modified to
+         * make DoFHandler related functions work properly when called within
+         * the policies classes. The updated NumberCache is written to that
+         * argument.
+         */
+        virtual
+        void
+        distribute_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                         NumberCache &number_cache) const = 0;
+
+        /**
+         * Distribute the multigrid dofs on each level
+         */
+        virtual
+        void
+        distribute_mg_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                            std::vector<NumberCache> &number_caches) const = 0;
+
+        /**
+         * Renumber degrees of freedom as specified by the first argument. The
+         * reference to the NumberCache of the DoFHandler object has to be
+         * passed in a second argument. It could then be modified to make
+         * DoFHandler related functions work properly when called within the
+         * policies classes. The updated NumberCache is written to that
+         * argument.
+         */
+        virtual
+        void
+        renumber_dofs (const std::vector<types::global_dof_index> &new_numbers,
+                       dealii::DoFHandler<dim,spacedim> &dof_handler,
+                       NumberCache &number_cache) const = 0;
+      };
+
+
+      /**
+       * This class implements the default policy for sequential operations,
+       * i.e. for the case where all cells get degrees of freedom.
+       */
+      template <int dim, int spacedim>
+      class Sequential : public PolicyBase<dim,spacedim>
+      {
+      public:
+        /**
+         * Distribute degrees of freedom on the object given as last argument.
+         */
+        virtual
+        void
+        distribute_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                         NumberCache &number_cache) const;
+
+        /**
+         * Distribute multigrid DoFs.
+         */
+        virtual
+        void
+        distribute_mg_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                            std::vector<NumberCache> &number_caches) const;
+
+        /**
+         * Renumber degrees of freedom as specified by the first argument.
+         */
+        virtual
+        void
+        renumber_dofs (const std::vector<types::global_dof_index>  &new_numbers,
+                       dealii::DoFHandler<dim,spacedim> &dof_handler,
+                       NumberCache &number_cache) const;
+      };
+
+      /**
+       * This class implements the policy for operations when we use a
+       * parallel::shared::Triangulation object.
+       */
+      template <int dim, int spacedim>
+      class ParallelShared : public Sequential<dim,spacedim>
+      {
+      public:
+
+        /**
+         * Distribute degrees of freedom on the object given as first
+         * argument.
+         *
+         * On distribution, DoFs are renumbered subdomain-wise and
+         * number_cache.n_locally_owned_dofs_per_processor[i] and
+         * number_cache.locally_owned_dofs are updated consistently.
+         */
+        virtual
+        void
+        distribute_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                         NumberCache &number_cache) const;
+
+        /**
+         * This function is not yet implemented.
+         */
+        virtual
+        void
+        distribute_mg_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                            std::vector<NumberCache> &number_caches) const;
+
+        /**
+         * Renumber degrees of freedom as specified by the first argument.
+         *
+         * The input argument @p new_numbers may either have as many entries
+         * as there are global degrees of freedom (i.e. dof_handler.n_dofs() )
+         * or dof_handler.locally_owned_dofs().n_elements(). Therefore it can
+         * be utilised with renumbering functions implemented for the
+         * parallel::distributed case.
+         */
+        virtual
+        void
+        renumber_dofs (const std::vector<types::global_dof_index>  &new_numbers,
+                       dealii::DoFHandler<dim,spacedim> &dof_handler,
+                       NumberCache &number_cache) const;
+      private:
+
+      };
+
+
+      /**
+       * This class implements the policy for operations when we use a
+       * parallel::distributed::Triangulation object.
+       */
+      template <int dim, int spacedim>
+      class ParallelDistributed : public PolicyBase<dim,spacedim>
+      {
+      public:
+        /**
+         * Distribute degrees of freedom on the object given as last argument.
+         */
+        virtual
+        void
+        distribute_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                         NumberCache &number_cache) const;
+
+        /**
+         * Distribute multigrid DoFs.
+         */
+        virtual
+        void
+        distribute_mg_dofs (dealii::DoFHandler<dim,spacedim> &dof_handler,
+                            std::vector<NumberCache> &number_caches) const;
+
+        /**
+         * Renumber degrees of freedom as specified by the first argument.
+         */
+        virtual
+        void
+        renumber_dofs (const std::vector<types::global_dof_index>  &new_numbers,
+                       dealii::DoFHandler<dim,spacedim> &dof_handler,
+                       NumberCache &number_cache) const;
+      };
+    }
+  }
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+/*----------------------------   dof_handler_policy.h     ---------------------------*/
+#endif
+/*----------------------------   dof_handler_policy.h     ---------------------------*/
diff --git a/include/deal.II/dofs/dof_iterator_selector.h b/include/deal.II/dofs/dof_iterator_selector.h
new file mode 100644
index 0000000..f3a7ec3
--- /dev/null
+++ b/include/deal.II/dofs/dof_iterator_selector.h
@@ -0,0 +1,175 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_iterators_h
+#define dealii__dof_iterators_h
+
+#include <deal.II/base/config.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int, int> class InvalidAccessor;
+
+template <int structdim, typename DoFHandlerType, bool lda> class DoFAccessor;
+template <typename DoFHandlerType, bool lda> class DoFCellAccessor;
+
+template <int dim, int spacedim> class FiniteElement;
+template <typename Accessor> class TriaRawIterator;
+template <typename Accessor> class TriaIterator;
+template <typename Accessor> class TriaActiveIterator;
+template <int dim, int spacedim> class Triangulation;
+template <int dim, int spacedim> class DoFHandler;
+
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    template <typename DoFHandlerType, bool lda=false>
+    struct Iterators;
+
+
+    /**
+     * Define some types for DoF handling in one dimension.
+     *
+     * The types have the same meaning as those declared in
+     * internal::Triangulation::Iterators<1,spacedim>, only the treatment of
+     * templates is a little more complicated. See the
+     * @ref Iterators
+     * module for more information.
+     *
+     * @author Wolfgang Bangerth, Oliver Kayser-Herold, Guido Kanschat, 1998,
+     * 2003, 2008, 2010
+     */
+    template <template <int, int> class DoFHandlerType, int spacedim, bool lda>
+    struct Iterators<DoFHandlerType<1, spacedim>, lda>
+    {
+      typedef DoFHandlerType<1,spacedim> DoFHandler_type;
+      typedef dealii::DoFCellAccessor<DoFHandler_type, lda> CellAccessor;
+      typedef dealii::DoFAccessor<0,DoFHandler_type, lda> FaceAccessor;
+
+      typedef TriaRawIterator   <CellAccessor> raw_line_iterator;
+      typedef TriaIterator      <CellAccessor> line_iterator;
+      typedef TriaActiveIterator<CellAccessor> active_line_iterator;
+
+      typedef TriaRawIterator   <InvalidAccessor<2,1,spacedim> > raw_quad_iterator;
+      typedef TriaIterator      <InvalidAccessor<2,1,spacedim> > quad_iterator;
+      typedef TriaActiveIterator<InvalidAccessor<2,1,spacedim> > active_quad_iterator;
+
+      typedef TriaRawIterator   <InvalidAccessor<3,1,spacedim> > raw_hex_iterator;
+      typedef TriaIterator      <InvalidAccessor<3,1,spacedim> > hex_iterator;
+      typedef TriaActiveIterator<InvalidAccessor<3,1,spacedim> > active_hex_iterator;
+
+      typedef raw_line_iterator    raw_cell_iterator;
+      typedef line_iterator        cell_iterator;
+      typedef active_line_iterator active_cell_iterator;
+
+      typedef TriaRawIterator   <FaceAccessor> raw_face_iterator;
+      typedef TriaIterator      <FaceAccessor> face_iterator;
+      typedef TriaActiveIterator<FaceAccessor> active_face_iterator;
+    };
+
+
+
+
+    /**
+     * Define some types for DoF handling in two dimensions.
+     *
+     * The types have the same meaning as those declared in
+     * internal::Triangulation::Iterators<2,spacedim>, only the treatment of
+     * templates is a little more complicated. See the
+     * @ref Iterators
+     * module for more information.
+     *
+     * @author Wolfgang Bangerth, Oliver Kayser-Herold, Guido Kanschat, 1998,
+     * 2003, 2008, 2010
+     */
+    template <template <int, int> class DoFHandlerType, int spacedim, bool lda>
+    struct Iterators<DoFHandlerType<2, spacedim>, lda>
+    {
+      typedef DoFHandlerType<2,spacedim> DoFHandler_type;
+      typedef dealii::DoFCellAccessor<DoFHandler_type, lda> CellAccessor;
+      typedef dealii::DoFAccessor<1, DoFHandler_type, lda> FaceAccessor;
+
+      typedef TriaRawIterator   <FaceAccessor> raw_line_iterator;
+      typedef TriaIterator      <FaceAccessor> line_iterator;
+      typedef TriaActiveIterator<FaceAccessor> active_line_iterator;
+
+      typedef TriaRawIterator   <CellAccessor> raw_quad_iterator;
+      typedef TriaIterator      <CellAccessor> quad_iterator;
+      typedef TriaActiveIterator<CellAccessor> active_quad_iterator;
+
+      typedef TriaRawIterator   <InvalidAccessor<3,2,spacedim> > raw_hex_iterator;
+      typedef TriaIterator      <InvalidAccessor<3,2,spacedim> > hex_iterator;
+      typedef TriaActiveIterator<InvalidAccessor<3,2,spacedim> > active_hex_iterator;
+
+      typedef raw_quad_iterator    raw_cell_iterator;
+      typedef quad_iterator        cell_iterator;
+      typedef active_quad_iterator active_cell_iterator;
+
+      typedef raw_line_iterator    raw_face_iterator;
+      typedef line_iterator        face_iterator;
+      typedef active_line_iterator active_face_iterator;
+    };
+
+
+
+
+    /**
+     * Define some types for DoF handling in three dimensions.
+     *
+     * The types have the same meaning as those declared in
+     * internal::Triangulation::Iterators<3,spacedim>, only the treatment of
+     * templates is a little more complicated. See the
+     * @ref Iterators
+     * module for more information.
+     *
+     * @author Wolfgang Bangerth, Oliver Kayser-Herold, Guido Kanschat, 1998,
+     * 2003, 2008, 2010
+     */
+    template <template <int, int> class DoFHandlerType, int spacedim, bool lda>
+    struct Iterators<DoFHandlerType<3, spacedim>, lda>
+    {
+      typedef DoFHandlerType<3, spacedim> DoFHandler_type;
+      typedef dealii::DoFCellAccessor<DoFHandler_type, lda> CellAccessor;
+      typedef dealii::DoFAccessor<2, DoFHandler_type, lda> FaceAccessor;
+
+      typedef TriaRawIterator   <dealii::DoFAccessor<1, DoFHandler_type, lda> > raw_line_iterator;
+      typedef TriaIterator      <dealii::DoFAccessor<1, DoFHandler_type, lda> > line_iterator;
+      typedef TriaActiveIterator<dealii::DoFAccessor<1, DoFHandler_type, lda> > active_line_iterator;
+
+      typedef TriaRawIterator   <FaceAccessor> raw_quad_iterator;
+      typedef TriaIterator      <FaceAccessor> quad_iterator;
+      typedef TriaActiveIterator<FaceAccessor> active_quad_iterator;
+
+      typedef TriaRawIterator   <CellAccessor> raw_hex_iterator;
+      typedef TriaIterator      <CellAccessor> hex_iterator;
+      typedef TriaActiveIterator<CellAccessor> active_hex_iterator;
+
+      typedef raw_hex_iterator    raw_cell_iterator;
+      typedef hex_iterator        cell_iterator;
+      typedef active_hex_iterator active_cell_iterator;
+
+      typedef raw_quad_iterator    raw_face_iterator;
+      typedef quad_iterator        face_iterator;
+      typedef active_quad_iterator active_face_iterator;
+    };
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__dof_iterator_selector_h
diff --git a/include/deal.II/dofs/dof_levels.h b/include/deal.II/dofs/dof_levels.h
new file mode 100644
index 0000000..d2fbaf8
--- /dev/null
+++ b/include/deal.II/dofs/dof_levels.h
@@ -0,0 +1,155 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_levels_h
+#define dealii__dof_levels_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/dofs/dof_objects.h>
+#include <vector>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+
+
+    /**
+     * Structure for storing degree of freedom information for cells,
+     * organized by levels.
+     *
+     * We store are cached values for the DoF indices on each cell
+     * in#cell_dof_indices_cache, since this is a frequently requested
+     * operation. The values are set by
+     * DoFCellAccessor::update_cell_dof_indices_cache and are used by
+     * DoFCellAccessor::get_dof_indices.
+     *
+     * Note that vertices are separate from, and in fact have nothing to do
+     * with cells. The indices of degrees of freedom located on vertices
+     * therefore are not stored here, but rather in member variables of the
+     * dealii::DoFHandler class.
+     *
+     * The indices of degrees of freedom located on lower dimensional objects,
+     * i.e. on lines for 2D and on quads and lines for 3D are treated
+     * similarly than that on cells. However, these geometrical objects, which
+     * are called faces as a generalisation, are not organised in a
+     * hierarchical structure of levels. Therefore, the degrees of freedom
+     * located on these objects are stored in separate classes, namely the
+     * <tt>DoFFaces</tt> classes.
+     *
+     * Access to this object is usually through the
+     * DoFAccessor::set_dof_index() and DoFAccessor::dof_index() functions or
+     * similar functions of derived classes that in turn access the member
+     * variables using the DoFHandler::get_dof_index() and corresponding
+     * setter functions. Knowledge of the actual data format is therefore
+     * encapsulated to the present hierarchy of classes as well as the
+     * dealii::DoFHandler class.
+     *
+     * @author Wolfgang Bangerth, 1998, 2006, Guido Kanschat, 2012
+     */
+    template <int dim>
+    class DoFLevel
+    {
+    public:
+      /**
+       * Cache for the DoF indices on cells. The size of this array equals the
+       * number of cells on a given level times selected_fe.dofs_per_cell.
+       */
+      std::vector<types::global_dof_index> cell_dof_indices_cache;
+
+      /**
+       * The object containing dof-indices and related access-functions
+       */
+      DoFObjects<dim> dof_object;
+
+      /**
+       * Return a pointer to the beginning of the DoF indices cache for a
+       * given cell.
+       *
+       * @param obj_index The number of the cell we are looking at.
+       * @param dofs_per_cell The number of DoFs per cell for this cell.
+       * @return A pointer to the first DoF index for the current cell. The
+       * next dofs_per_cell indices are for the current cell.
+       */
+      const types::global_dof_index *
+      get_cell_cache_start (const unsigned int obj_index,
+                            const unsigned int dofs_per_cell) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+
+
+    template <int dim>
+    inline
+    const types::global_dof_index *
+    DoFLevel<dim>::get_cell_cache_start (const unsigned int obj_index,
+                                         const unsigned int dofs_per_cell) const
+    {
+      Assert (obj_index*dofs_per_cell+dofs_per_cell
+              <=
+              cell_dof_indices_cache.size(),
+              ExcInternalError());
+
+      return &cell_dof_indices_cache[obj_index*dofs_per_cell];
+    }
+
+
+
+    template <int dim>
+    inline
+    std::size_t
+    DoFLevel<dim>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (cell_dof_indices_cache) +
+              MemoryConsumption::memory_consumption (dof_object));
+    }
+
+
+    template <int dim>
+    template <class Archive>
+    inline
+    void
+    DoFLevel<dim>::serialize (Archive &ar,
+                              const unsigned int)
+    {
+      ar &cell_dof_indices_cache;
+      ar &dof_object;
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/dof_objects.h b/include/deal.II/dofs/dof_objects.h
new file mode 100644
index 0000000..8975902
--- /dev/null
+++ b/include/deal.II/dofs/dof_objects.h
@@ -0,0 +1,222 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_objects_h
+#define dealii__dof_objects_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int> class DoFHandler;
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    template <int> class DoFLevel;
+    template <int> class DoFFaces;
+
+
+    /**
+     * Store the indices of the degrees of freedom which are located on
+     * objects of dimension @p dim.
+     *
+     * <h3>Information for all DoFObjects classes</h3>
+     *
+     * The DoFObjects classes store the global indices of the degrees of
+     * freedom for each cell on a certain level. The global index or number of
+     * a degree of freedom is the zero-based index of the according value in
+     * the solution vector and the row and column index in the global matrix
+     * or the multigrid matrix for this level. These indices refer to the
+     * unconstrained vectors and matrices, where we have not taken account of
+     * the constraints introduced by hanging nodes.
+     *
+     * Since vertices are not associated with a particular level, the indices
+     * associated with vertices are not stored in the DoFObjects classes but
+     * rather in the DoFHandler::vertex_dofs array.
+     *
+     * The DoFObjects classes are not used directly, but objects of theses
+     * classes are included in the DoFLevel and DoFFaces classes.
+     *
+     * @ingroup dofs
+     * @author Tobias Leicht, 2006
+     */
+    template <int dim>
+    class DoFObjects
+    {
+    public:
+      /**
+       * Store the global indices of the degrees of freedom.
+       */
+      std::vector<types::global_dof_index> dofs;
+
+    public:
+      /**
+       * Set the global index of the @p local_index-th degree of freedom
+       * located on the object with number @p obj_index to the value given by
+       * the last argument. The @p dof_handler argument is used to access the
+       * finite element that is to be used to compute the location where this
+       * data is stored.
+       *
+       * The third argument, @p fe_index, must equal zero. It is otherwise
+       * unused, but we retain the argument so that we can use the same
+       * interface for non-hp and hp finite element methods, in effect making
+       * it possible to share the DoFAccessor class hierarchy between hp and
+       * non-hp classes.
+       */
+      template <int dh_dim, int spacedim>
+      void
+      set_dof_index (const dealii::DoFHandler<dh_dim,spacedim> &dof_handler,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index,
+                     const types::global_dof_index       global_index);
+
+      /**
+       * Return the global index of the @p local_index-th degree of freedom
+       * located on the object with number @p obj_index. The @p dof_handler
+       * argument is used to access the finite element that is to be used to
+       * compute the location where this data is stored.
+       *
+       * The third argument, @p fe_index, must equal zero. It is otherwise
+       * unused, but we retain the argument so that we can use the same
+       * interface for non-hp and hp finite element methods, in effect making
+       * it possible to share the DoFAccessor class hierarchy between hp and
+       * non-hp classes.
+       */
+      template <int dh_dim, int spacedim>
+      types::global_dof_index
+      get_dof_index (const dealii::DoFHandler<dh_dim,spacedim> &dof_handler,
+                     const unsigned int       obj_index,
+                     const unsigned int       fe_index,
+                     const unsigned int       local_index) const;
+
+      /**
+       * Return the value 1. The meaning of this function becomes clear by
+       * looking at what the corresponding functions in the classes
+       * internal::hp::DoFObjects
+       */
+      template <int dh_dim, int spacedim>
+      unsigned int
+      n_active_fe_indices (const dealii::DoFHandler<dh_dim,spacedim> &dof_handler,
+                           const types::global_dof_index       index) const;
+
+      /**
+       * Similar to the function above. Assert that the given index is zero,
+       * and then return true.
+       */
+      template <int dh_dim, int spacedim>
+      bool
+      fe_index_is_active (const dealii::DoFHandler<dh_dim,spacedim> &dof_handler,
+                          const types::global_dof_index       index,
+                          const unsigned int       fe_index) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+
+      /**
+       * Declare the classes that store levels and faces of DoFs friends so
+       * that they can resize arrays.
+       */
+      template <int> friend class DoFLevel;
+      template <int> friend class DoFFaces;
+    };
+
+
+// --------------------- template and inline functions ------------------
+
+    template <int dim>
+    template <int dh_dim, int spacedim>
+    inline
+    unsigned int
+    DoFObjects<dim>::n_active_fe_indices (const dealii::DoFHandler<dh_dim,spacedim> &,
+                                          const types::global_dof_index) const
+    {
+      return 1;
+    }
+
+
+
+    template <int dim>
+    template <int dh_dim, int spacedim>
+    inline
+    bool
+    DoFObjects<dim>::fe_index_is_active (const dealii::DoFHandler<dh_dim,spacedim> &,
+                                         const types::global_dof_index,
+                                         const unsigned int fe_index) const
+    {
+      (void)fe_index;
+      Assert (fe_index == 0,
+              ExcMessage ("Only zero fe_index values are allowed for "
+                          "non-hp DoFHandlers."));
+      return true;
+    }
+
+
+
+    template <int dim>
+    template <int dh_dim, int spacedim>
+    inline
+    types::global_dof_index
+    DoFObjects<dim>::
+    get_dof_index (const dealii::DoFHandler<dh_dim,spacedim> &dof_handler,
+                   const unsigned int       obj_index,
+                   const unsigned int       fe_index,
+                   const unsigned int       local_index) const
+    {
+      (void)fe_index;
+      Assert ((fe_index == dealii::DoFHandler<dh_dim,spacedim>::default_fe_index),
+              ExcMessage ("Only the default FE index is allowed for non-hp DoFHandler objects"));
+      Assert (local_index<dof_handler.get_fe().template n_dofs_per_object<dim>(),
+              ExcIndexRange (local_index, 0, dof_handler.get_fe().template n_dofs_per_object<dim>()));
+      Assert (obj_index * dof_handler.get_fe().template n_dofs_per_object<dim>()+local_index
+              <
+              dofs.size(),
+              ExcInternalError());
+
+      return dofs[obj_index * dof_handler.get_fe()
+                  .template n_dofs_per_object<dim>() + local_index];
+    }
+
+
+    template <int dim>
+    template <class Archive>
+    void DoFObjects<dim>::serialize(Archive &ar,
+                                    const unsigned int)
+    {
+      ar &dofs;
+    }
+
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/dof_renumbering.h b/include/deal.II/dofs/dof_renumbering.h
new file mode 100644
index 0000000..9a849e0
--- /dev/null
+++ b/include/deal.II/dofs/dof_renumbering.h
@@ -0,0 +1,1130 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_renumbering_h
+#define dealii__dof_renumbering_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/point.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/hp/dof_handler.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Implementation of a number of renumbering algorithms for the degrees of
+ * freedom on a triangulation.
+ *
+ * <h3>Cuthill-McKee like algorithms</h3>
+ *
+ * Within this class, the Cuthill-McKee algorithm is implemented. It starts at
+ * a degree of freedom, searches the other DoFs for those which are coupled
+ * with the one we started with and numbers these in a certain way. It then
+ * finds the second level of DoFs, namely those that couple with those of the
+ * previous level (which were those that coupled with the initial DoF) and
+ * numbers these. And so on. For the details of the algorithm, especially the
+ * numbering within each level, please see H. R. Schwarz: "Methode der finiten
+ * Elemente". The reverse Cuthill-McKee algorithm does the same job, but
+ * numbers all elements in the reverse order.
+ *
+ * These algorithms have one major drawback: they require a good starting
+ * point, i.e. the degree of freedom index that will get a new index of zero.
+ * The renumbering functions therefore allow the caller to specify such an
+ * initial DoF, e.g. by exploiting knowledge of the actual topology of the
+ * domain. It is also possible to give several starting indices, which may be
+ * used to simulate a simple upstream numbering (by giving the inflow dofs as
+ * starting values) or to make preconditioning faster (by letting the
+ * Dirichlet boundary indices be starting points).
+ *
+ * If no starting index is given, one is chosen automatically, namely one with
+ * the smallest coordination number (the coordination number is the number of
+ * other dofs this dof couples with). This dof is usually located on the
+ * boundary of the domain. There is, however, large ambiguity in this when
+ * using the hierarchical meshes used in this library, since in most cases the
+ * computational domain is not approximated by tilting and deforming elements
+ * and by plugging together variable numbers of elements at vertices, but
+ * rather by hierarchical refinement. There is therefore a large number of
+ * dofs with equal coordination numbers. The renumbering algorithms will
+ * therefore not give optimal results.
+ *
+ * In the book of Schwarz (H.R.Schwarz: Methode der finiten Elemente), it is
+ * advised to test many starting points, if possible all with the smallest
+ * coordination number and also those with slightly higher numbers. However,
+ * this seems only possible for meshes with at most several dozen or a few
+ * hundred elements found in small engineering problems of the early 1980s
+ * (the second edition was published in 1984), but certainly not with those
+ * used in this library, featuring several 10,000 to a few 100,000 elements.
+ *
+ *
+ * <h4>Implementation of renumbering schemes</h4>
+ *
+ * The renumbering algorithms need quite a lot of memory, since they have to
+ * store for each dof with which other dofs it couples. This is done using a
+ * SparsityPattern object used to store the sparsity pattern of matrices. It
+ * is not useful for the user to do anything between distributing the dofs and
+ * renumbering, i.e. the calls to DoFHandler::distribute_dofs and
+ * DoFHandler::renumber_dofs should follow each other immediately. If you try
+ * to create a sparsity pattern or anything else in between, these will be
+ * invalid afterwards.
+ *
+ * The renumbering may take care of dof-to-dof couplings only induced by
+ * eliminating constraints. In addition to the memory consumption mentioned
+ * above, this also takes quite some computational time, but it may be
+ * switched off upon calling the @p renumber_dofs function. This will then
+ * give inferior results, since knots in the graph (representing dofs) are not
+ * found to be neighbors even if they would be after condensation.
+ *
+ * The renumbering algorithms work on a purely algebraic basis, due to the
+ * isomorphism between the graph theoretical groundwork underlying the
+ * algorithms and binary matrices (matrices of which the entries are binary
+ * values) represented by the sparsity patterns. In special, the algorithms do
+ * not try to exploit topological knowledge (e.g. corner detection) to find
+ * appropriate starting points. This way, however, they work in arbitrary
+ * space dimension.
+ *
+ * If you want to give starting points, you may give a list of dof indices
+ * which will form the first step of the renumbering. The dofs of the list
+ * will be consecutively numbered starting with zero, i.e. this list is not
+ * renumbered according to the coordination number of the nodes. Indices not
+ * in the allowed range are deleted. If no index is allowed, the algorithm
+ * will search for its own starting point.
+ *
+ *
+ * <h4>Results of renumbering</h4>
+ *
+ * The renumbering schemes mentioned above do not lead to optimal results.
+ * However, after all there is no algorithm that accomplishes this within
+ * reasonable time. There are situations where the lack of optimality even
+ * leads to worse results than with the original, crude, levelwise numbering
+ * scheme; one of these examples is a mesh of four cells of which always those
+ * cells are refined which are neighbors to the center (you may call this mesh
+ * a `zoom in' mesh). In one such example the bandwidth was increased by about
+ * 50 per cent.
+ *
+ * In most other cases, the bandwidth is reduced significantly. The reduction
+ * is the better the less structured the grid is. With one grid where the
+ * cells were refined according to a random driven algorithm, the bandwidth
+ * was reduced by a factor of six.
+ *
+ * Using the constraint information usually leads to reductions in bandwidth
+ * of 10 or 20 per cent, but may for some very unstructured grids also lead to
+ * an increase. You have to weigh the decrease in your case with the time
+ * spent to use the constraint information, which usually is several times
+ * longer than the `pure' renumbering algorithm.
+ *
+ * In almost all cases, the renumbering scheme finds a corner to start with.
+ * Since there is more than one corner in most grids and since even an
+ * interior degree of freedom may be a better starting point, giving the
+ * starting point by the user may be a viable way if you have a simple scheme
+ * to derive a suitable point (e.g. by successively taking the third child of
+ * the cell top left of the coarsest level, taking its third vertex and the
+ * dof index thereof, if you want the top left corner vertex). If you do not
+ * know beforehand what your grid will look like (e.g. when using adaptive
+ * algorithms), searching a best starting point may be difficult, however, and
+ * in many cases will not justify the effort.
+ *
+ *
+ * <h3>Component-wise and block-wise numberings</h3>
+ *
+ * For finite elements composed of several base elements using the FESystem
+ * class, or for elements which provide several components themselves, it may
+ * be of interest to sort the DoF indices by component. This will then bring
+ * out the block matrix structure, since otherwise the degrees of freedom are
+ * numbered cell-wise without taking into account that they may belong to
+ * different components. For example, one may want to sort degree of freedom
+ * for a Stokes discretization so that we first get all velocities and then
+ * all the pressures so that the resulting matrix naturally decomposes into a
+ * $2\times 2$ system.
+ *
+ * This kind of numbering may be obtained by calling the component_wise()
+ * function of this class. Since it does not touch the order of indices within
+ * each component, it may be worthwhile to first renumber using the Cuthill-
+ * McKee or a similar algorithm and afterwards renumbering component-wise.
+ * This will bring out the matrix structure and additionally have a good
+ * numbering within each block.
+ *
+ * The component_wise() function allows not only to honor enumeration based on
+ * vector components, but also allows to group together vector components into
+ * "blocks" using a defaulted argument to the various
+ * DoFRenumbering::component_wise() functions (see
+ * @ref GlossComponent
+ * vs
+ * @ref GlossBlock
+ * for a description of the difference). The blocks designated through this
+ * argument may, but do not have to be, equal to the blocks that the finite
+ * element reports. For example, a typical Stokes element would be
+ * @code
+ *   FESystem<dim> stokes_fe (FE_Q<dim>(2), dim,   // dim velocities
+ *                            FE_Q<dim>(1), 1);    // one pressure
+ * @endcode
+ * This element has <code>dim+1</code> vector components and equally many
+ * blocks. However, one may want to consider the velocities as one logical
+ * block so that all velocity degrees of freedom are enumerated the same way,
+ * independent of whether they are $x$- or $y$-velocities. This is done, for
+ * example, in step-20 and step-22 as well as several other tutorial programs.
+ *
+ * On the other hand, if you really want to use block structure reported by
+ * the finite element itself (a case that is often the case if you have finite
+ * elements that have multiple vector components, e.g. the FE_RaviartThomas or
+ * FE_Nedelec elements) then you can use the DoFRenumbering::block_wise
+ * instead of the DoFRenumbering::component_wise functions.
+ *
+ *
+ * <h3>Cell-wise numbering</h3>
+ *
+ * Given an ordered vector of cells, the function cell_wise() sorts the
+ * degrees of freedom such that degrees on earlier cells of this vector will
+ * occur before degrees on later cells.
+ *
+ * This rule produces a well-defined ordering for discontinuous Galerkin
+ * methods (FE_DGP, FE_DGQ). For continuous methods, we use the additional
+ * rule that each degree of freedom is ordered according to the first cell in
+ * the ordered vector it belongs to.
+ *
+ * Applications of this scheme are downstream() and clock_wise_dg(). The first
+ * orders the cells according to a downstream direction and then applies
+ * cell_wise().
+ *
+ * @note For DG elements, the internal numbering in each cell remains
+ * unaffected. This cannot be guaranteed for continuous elements anymore,
+ * since degrees of freedom shared with an earlier cell will be accounted for
+ * by the other cell.
+ *
+ *
+ * <h3>Random renumbering</h3>
+ *
+ * The random() function renumbers degrees of freedom randomly. This function
+ * is probably seldom of use, except to check the dependence of solvers
+ * (iterative or direct ones) on the numbering of the degrees of freedom.
+ *
+ *
+ * <h3>A comparison of reordering strategies</h3>
+ *
+ * As a benchmark of comparison, let us consider what the different sparsity
+ * patterns produced by the various algorithms when using the $Q_2^d\times
+ * Q_1$ element combination typically employed in the discretization of Stokes
+ * equations, when used on the mesh obtained in step-22 after one adaptive
+ * mesh refinement in 3d. The space dimension together with the coupled finite
+ * element leads to a rather dense system matrix with, on average around 180
+ * nonzero entries per row. After applying each of the reordering strategies
+ * shown below, the degrees of freedom are also sorted using
+ * DoFRenumbering::component_wise into velocity and pressure groups; this
+ * produces the $2\times 2$ block structure seen below with the large
+ * velocity-velocity block at top left, small pressure-pressure block at
+ * bottom right, and coupling blocks at top right and bottom left.
+ *
+ * The goal of reordering strategies is to improve the preconditioner. In
+ * step-22 we use a SparseILU to preconditioner for the velocity-velocity
+ * block at the top left. The quality of the preconditioner can then be
+ * measured by the number of CG iterations required to solve a linear system
+ * with this block. For some of the reordering strategies below we record this
+ * number for adaptive refinement cycle 3, with 93176 degrees of freedom;
+ * because we solve several linear systems with the same matrix in the Schur
+ * complement, the average number of iterations is reported. The lower the
+ * number the better the preconditioner and consequently the better the
+ * renumbering of degrees of freedom is suited for this task. We also state
+ * the run-time of the program, in part determined by the number of iterations
+ * needed, for the first 4 cycles on one of our machines. Note that the
+ * reported times correspond to the run time of the entire program, not just
+ * the affected solver; if a program runs twice as fast with one particular
+ * ordering than with another one, then this means that the actual solver is
+ * actually several times faster.
+ *
+ * <table> <tr> <td>
+ * @image html "reorder_sparsity_step_31_original.png"
+ * </td> <td>
+ * @image html "reorder_sparsity_step_31_random.png"
+ * </td> <td>
+ * @image html "reorder_sparsity_step_31_deal_cmk.png"
+ * </td> </tr> <tr> <td> Enumeration as produced by deal.II's
+ * DoFHandler::distribute_dofs function and no further reordering apart from
+ * the component-wise one.
+ *
+ * With this renumbering, we needed an average of 92.2 iterations for the
+ * testcase outlined above, and a runtime of 7min53s. </td> <td> Random
+ * enumeration as produced by applying DoFRenumbering::random after calling
+ * DoFHandler::distribute_dofs. This enumeration produces nonzero entries in
+ * matrices pretty much everywhere, appearing here as an entirely unstructured
+ * matrix.
+ *
+ * With this renumbering, we needed an average of 71 iterations for the
+ * testcase outlined above, and a runtime of 10min55s. The longer runtime
+ * despite less iterations compared to the default ordering may be due to the
+ * fact that computing and applying the ILU requires us to jump back and forth
+ * all through memory due to the lack of localization of matrix entries around
+ * the diagonal; this then leads to many cache misses and consequently bad
+ * timings. </td> <td> Cuthill-McKee enumeration as produced by calling the
+ * deal.II implementation of the algorithm provided by
+ * DoFRenumbering::Cuthill_McKee after DoFHandler::distribute_dofs.
+ *
+ * With this renumbering, we needed an average of 57.3 iterations for the
+ * testcase outlined above, and a runtime of 6min10s. </td> </td> </tr>
+ *
+ * <tr> <td>
+ * @image html "reorder_sparsity_step_31_boost_cmk.png"
+ * </td> <td>
+ * @image html "reorder_sparsity_step_31_boost_king.png"
+ * </td> <td>
+ * @image html "reorder_sparsity_step_31_boost_md.png"
+ * </td> </tr> <tr> <td> Cuthill- McKee enumeration as produced by calling the
+ * BOOST implementation of the algorithm provided by
+ * DoFRenumbering::boost::Cuthill_McKee after DoFHandler::distribute_dofs.
+ *
+ * With this renumbering, we needed an average of 51.7 iterations for the
+ * testcase outlined above, and a runtime of 5min52s. </td> <td> King
+ * enumeration as produced by calling the BOOST implementation of the
+ * algorithm provided by DoFRenumbering::boost::king_ordering after
+ * DoFHandler::distribute_dofs. The sparsity pattern appears denser than with
+ * BOOST's Cuthill-McKee algorithm; however, this is only an illusion: the
+ * number of nonzero entries is the same, they are simply not as well
+ * clustered.
+ *
+ * With this renumbering, we needed an average of 51.0 iterations for the
+ * testcase outlined above, and a runtime of 5min03s. Although the number of
+ * iterations is only slightly less than with BOOST's Cuthill-McKee
+ * implementation, runtime is significantly less. This, again, may be due to
+ * cache effects. As a consequence, this is the algorithm best suited to the
+ * testcase, and is in fact used in step-22. </td> <td> Minimum degree
+ * enumeration as produced by calling the BOOST implementation of the
+ * algorithm provided by DoFRenumbering::boost::minimum_degree after
+ * DoFHandler::distribute_dofs. The minimum degree algorithm does not attempt
+ * to minimize the bandwidth of a matrix but to minimize the amount of fill-in
+ * a LU decomposition would produce, i.e. the number of places in the matrix
+ * that would be occupied by elements of an LU decomposition that are not
+ * already occupied by elements of the original matrix. The resulting sparsity
+ * pattern obviously has an entirely different structure than the ones
+ * produced by algorithms trying to minimize the bandwidth.
+ *
+ * With this renumbering, we needed an average of 58.9 iterations for the
+ * testcase outlined above, and a runtime of 6min11s. </td> </tr>
+ *
+ * <tr> <td>
+ * @image html "reorder_sparsity_step_31_downstream.png"
+ * </td> <td> </td> <td> </td> </tr> <tr> <td> Downstream enumeration using
+ * DoFRenumbering::downstream using a direction that points diagonally through
+ * the domain.
+ *
+ * With this renumbering, we needed an average of 90.5 iterations for the
+ * testcase outlined above, and a runtime of 7min05s. </td> <td> </td> <td>
+ * </td> </tr> </table>
+ *
+ *
+ * <h3>Multigrid DoF numbering</h3>
+ *
+ * Most of the algorithms listed above also work on multigrid degree of
+ * freedom numberings. Refer to the actual function declarations to get more
+ * information on this.
+ *
+ * @ingroup dofs
+ * @author Wolfgang Bangerth, Guido Kanschat, 1998, 1999, 2000, 2004, 2007,
+ * 2008
+ */
+namespace DoFRenumbering
+{
+  /**
+   * Direction based comparator for cell iterators: it returns @p true if the
+   * center of the second cell is downstream of the center of the first one
+   * with respect to the direction given to the constructor.
+   */
+  template <class Iterator, int dim>
+  struct CompareDownstream
+  {
+    /**
+     * Constructor.
+     */
+    CompareDownstream (const Tensor<1,dim> &dir)
+      :
+      dir(dir)
+    {}
+    /**
+     * Return true if c1 less c2.
+     */
+    bool operator () (const Iterator &c1, const Iterator &c2) const
+    {
+      const Tensor<1,dim> diff = c2->center() - c1->center();
+      return (diff*dir > 0);
+    }
+
+  private:
+    /**
+     * Flow direction.
+     */
+    const Tensor<1,dim> dir;
+  };
+
+
+  /**
+   * Point based comparator for downstream directions: it returns @p true if
+   * the second point is downstream of the first one with respect to the
+   * direction given to the constructor. If the points are the same with
+   * respect to the downstream direction, the point with the lower DoF number
+   * is considered smaller.
+   */
+  template <int dim>
+  struct ComparePointwiseDownstream
+  {
+    /**
+     * Constructor.
+     */
+    ComparePointwiseDownstream (const Tensor<1,dim> &dir)
+      :
+      dir(dir)
+    {}
+    /**
+     * Return true if c1 less c2.
+     */
+    bool operator () (const std::pair<Point<dim>,types::global_dof_index> &c1,
+                      const std::pair<Point<dim>,types::global_dof_index> &c2) const
+    {
+      const Tensor<1,dim> diff = c2.first-c1.first;
+      return (diff*dir > 0 || (diff*dir==0 && c1.second<c2.second));
+    }
+
+  private:
+    /**
+     * Flow direction.
+     */
+    const Tensor<1,dim> dir;
+  };
+
+
+
+  /**
+   * A namespace for the implementation of some renumbering algorithms based
+   * on algorithms implemented in the Boost Graph Library (BGL) by Jeremy Siek
+   * and others.
+   *
+   * While often slightly slower to compute, the algorithms using BOOST often
+   * lead to matrices with smaller bandwidths and sparse ILUs based on this
+   * numbering are therefore more efficient.
+   *
+   * For a comparison of these algorithms with the ones defined in
+   * DoFRenumbering, see the comparison section in the documentation of the
+   * DoFRenumbering namespace.
+   */
+  namespace boost
+  {
+    /**
+     * Renumber the degrees of freedom according to the Cuthill-McKee method,
+     * eventually using the reverse numbering scheme.
+     *
+     * See the general documentation of the parent class for details on the
+     * different methods.
+     *
+     * As an example of the results of this algorithm, take a look at the
+     * comparison of various algorithms in the documentation of the
+     * DoFRenumbering namespace.
+     */
+    template <typename DoFHandlerType>
+    void
+    Cuthill_McKee (DoFHandlerType &dof_handler,
+                   const bool      reversed_numbering = false,
+                   const bool      use_constraints    = false);
+
+    /**
+     * Computes the renumbering vector needed by the Cuthill_McKee() function.
+     * Does not perform the renumbering on the DoFHandler dofs but returns the
+     * renumbering vector.
+     */
+    template <typename DoFHandlerType>
+    void
+    compute_Cuthill_McKee (std::vector<types::global_dof_index> &new_dof_indices,
+                           const DoFHandlerType &,
+                           const bool                            reversed_numbering = false,
+                           const bool                            use_constraints    = false);
+
+    /**
+     * Renumber the degrees of freedom based on the BOOST implementation of
+     * the King algorithm. This often results in slightly larger (by a few
+     * percent) bandwidths than the Cuthill-McKee algorithm, but sparse ILUs
+     * are often slightly (also by a few percent) better preconditioners.
+     *
+     * As an example of the results of this algorithm, take a look at the
+     * comparison of various algorithms in the documentation of the
+     * DoFRenumbering namespace.
+     *
+     * This algorithm is used in step-22.
+     */
+    template <typename DoFHandlerType>
+    void
+    king_ordering (DoFHandlerType &dof_handler,
+                   const bool      reversed_numbering = false,
+                   const bool      use_constraints    = false);
+
+    /**
+     * Compute the renumbering for the King algorithm but do not actually
+     * renumber the degrees of freedom in the DoF handler argument.
+     */
+    template <typename DoFHandlerType>
+    void
+    compute_king_ordering (std::vector<types::global_dof_index> &new_dof_indices,
+                           const DoFHandlerType &,
+                           const bool                            reversed_numbering = false,
+                           const bool                            use_constraints    = false);
+
+    /**
+     * Renumber the degrees of freedom based on the BOOST implementation of
+     * the minimum degree algorithm. Unlike the Cuthill-McKee algorithm, this
+     * algorithm does not attempt to minimize the bandwidth of a matrix but to
+     * minimize the amount of fill-in when doing an LU decomposition. It may
+     * sometimes yield better ILUs because of this property.
+     *
+     * As an example of the results of this algorithm, take a look at the
+     * comparison of various algorithms in the documentation of the
+     * DoFRenumbering namespace.
+     */
+    template <typename DoFHandlerType>
+    void
+    minimum_degree (DoFHandlerType &dof_handler,
+                    const bool      reversed_numbering = false,
+                    const bool      use_constraints    = false);
+
+    /**
+     * Compute the renumbering for the minimum degree algorithm but do not
+     * actually renumber the degrees of freedom in the DoF handler argument.
+     */
+    template <typename DoFHandlerType>
+    void
+    compute_minimum_degree (std::vector<types::global_dof_index> &new_dof_indices,
+                            const DoFHandlerType &,
+                            const bool                            reversed_numbering = false,
+                            const bool                            use_constraints    = false);
+  }
+
+  /**
+   * Renumber the degrees of freedom according to the Cuthill-McKee method,
+   * possibly using the reverse numbering scheme.
+   *
+   * See the general documentation of this class for details on the different
+   * methods.
+   *
+   * As an example of the results of this algorithm, take a look at the
+   * comparison of various algorithms in the documentation of the
+   * DoFRenumbering namespace.
+   *
+   * If the given DoFHandler uses a distributed triangulation (i.e., if
+   * dof_handler.locally_owned() is not the complete index set), the
+   * renumbering is performed on each processor's degrees of freedom
+   * individually, without any communication between processors.
+   *
+   * @param dof_handler The DoFHandler or hp::DoFHandler object to work on.
+   * @param reversed_numbering Whether to use the original Cuthill-McKee
+   * algorithm, or to reverse the ordering.
+   * @param use_constraints Whether or not to use hanging node constraints in
+   * determining the reordering of degrees of freedom.
+   * @param starting_indices A set of degrees of freedom that form the first
+   * level of renumbered degrees of freedom. If the set is empty, then a
+   * single starting entry is chosen automatically among those that have the
+   * smallest number of others that couple with it. If the DoFHandler is built
+   * on a parallel triangulation, then on every processor, these starting
+   * indices need to be a (possibly empty) subset of the
+   * @ref GlossLocallyOwnedDof "locally owned degrees of freedom".
+   * These will then be used as starting indices for the local renumbering on
+   * the current processor. (In other words, this argument will in fact be
+   * different on every processor unless you pass an empty list as is the
+   * default.)
+   */
+  template <typename DoFHandlerType>
+  void
+  Cuthill_McKee (DoFHandlerType                             &dof_handler,
+                 const bool                                  reversed_numbering = false,
+                 const bool                                  use_constraints    = false,
+                 const std::vector<types::global_dof_index> &starting_indices
+                 = std::vector<types::global_dof_index>());
+
+  /**
+   * Computes the renumbering vector needed by the Cuthill_McKee() function.
+   * Does not perform the renumbering on the DoFHandler dofs but returns the
+   * renumbering vector.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_Cuthill_McKee (std::vector<types::global_dof_index>       &new_dof_indices,
+                         const DoFHandlerType &,
+                         const bool                                  reversed_numbering = false,
+                         const bool                                  use_constraints    = false,
+                         const std::vector<types::global_dof_index> &starting_indices
+                         = std::vector<types::global_dof_index>());
+
+  /**
+   * Renumber the degrees of freedom according to the Cuthill-McKee method,
+   * eventually using the reverse numbering scheme, in this case for a
+   * multigrid numbering of degrees of freedom.
+   *
+   * You can give a triangulation level to which this function is to be
+   * applied.  Since with a level-wise numbering there are no hanging nodes,
+   * no constraints can be used, so the respective parameter of the previous
+   * function is omitted.
+   *
+   * See the general documentation of this class for details on the different
+   * methods.
+   */
+  template <typename DoFHandlerType>
+  void
+  Cuthill_McKee (DoFHandlerType                             &dof_handler,
+                 const unsigned int                          level,
+                 const bool                                  reversed_numbering = false,
+                 const std::vector<types::global_dof_index> &starting_indices
+                 = std::vector<types::global_dof_index> ());
+
+  /**
+   * @name Component-wise numberings
+   * @{
+   */
+
+  /**
+   * Sort the degrees of freedom by vector component. The numbering within
+   * each component is not touched, so a degree of freedom with index $i$,
+   * belonging to some component, and another degree of freedom with index $j$
+   * belonging to the same component will be assigned new indices $n(i)$ and
+   * $n(j)$ with $n(i)<n(j)$ if $i<j$ and $n(i)>n(j)$ if $i>j$.
+   *
+   * You can specify that the components are ordered in a different way than
+   * suggested by the FESystem object you use. To this end, set up the vector
+   * @p target_component such that the entry at index @p i denotes the number
+   * of the target component for dofs with component @p i in the FESystem.
+   * Naming the same target component more than once is possible and results
+   * in a blocking of several components into one. This is discussed in
+   * step-22. If you omit this argument, the same order as given by the finite
+   * element is used.
+   *
+   * If one of the base finite elements from which the global finite element
+   * under consideration here, is a non-primitive one, i.e. its shape
+   * functions have more than one non-zero component, then it is not possible
+   * to associate these degrees of freedom with a single vector component. In
+   * this case, they are associated with the first vector component to which
+   * they belong.
+   *
+   * For finite elements with only one component, or a single non-primitive
+   * base element, this function is the identity operation.
+   */
+  template <int dim, int spacedim>
+  void
+  component_wise (DoFHandler<dim,spacedim>        &dof_handler,
+                  const std::vector<unsigned int> &target_component
+                  = std::vector<unsigned int>());
+
+
+  /**
+   * Sort the degrees of freedom by component. It does the same thing as the
+   * above function.
+   */
+  template <int dim>
+  void
+  component_wise (hp::DoFHandler<dim>             &dof_handler,
+                  const std::vector<unsigned int> &target_component = std::vector<unsigned int> ());
+
+  /**
+   * Sort the degrees of freedom by component. It does the same thing as the
+   * above function, only that it does this for one single level of a
+   * multilevel discretization. The non-multigrid part of the the DoFHandler
+   * is not touched.
+   */
+  template <typename DoFHandlerType>
+  void
+  component_wise (DoFHandlerType                  &dof_handler,
+                  const unsigned int               level,
+                  const std::vector<unsigned int> &target_component
+                  = std::vector<unsigned int>());
+
+  /**
+   * Computes the renumbering vector needed by the component_wise() functions.
+   * Does not perform the renumbering on the DoFHandler dofs but returns the
+   * renumbering vector.
+   */
+  template <int dim, int spacedim, class ITERATOR, class ENDITERATOR>
+  types::global_dof_index
+  compute_component_wise (std::vector<types::global_dof_index> &new_dof_indices,
+                          const ITERATOR &start,
+                          const ENDITERATOR &end,
+                          const std::vector<unsigned int> &target_component,
+                          bool is_level_operation);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Block-wise numberings
+   * @{
+   */
+
+  /**
+   * Sort the degrees of freedom by vector block. The numbering within each
+   * block is not touched, so a degree of freedom with index $i$, belonging to
+   * some block, and another degree of freedom with index $j$ belonging to the
+   * same block will be assigned new indices $n(i)$ and $n(j)$ with
+   * $n(i)<n(j)$ if $i<j$ and $n(i)>n(j)$ if $i>j$.
+   */
+  template <int dim, int spacedim>
+  void
+  block_wise (DoFHandler<dim,spacedim> &dof_handler);
+
+  /**
+   * Sort the degrees of freedom by vector block. It does the same thing as
+   * the above function, only that it does this for one single level of a
+   * multilevel discretization. The non-multigrid part of the the DoFHandler
+   * is not touched.
+   */
+  template <int dim, int spacedim>
+  void
+  block_wise (DoFHandler<dim,spacedim> &dof_handler, const unsigned int level);
+
+  /**
+   * Sort the degrees of freedom by block. It does the same thing as the above
+   * function.
+   *
+   * This function only succeeds if each of the elements in the
+   * hp::FECollection attached to the hp::DoFHandler argument has exactly the
+   * same number of blocks (see
+   * @ref GlossBlock "the glossary"
+   * for more information). Note that this is not always given: while the
+   * hp::FECollection class ensures that all of its elements have the same
+   * number of vector components, they need not have the same number of
+   * blocks. At the same time, this function here needs to match individual
+   * blocks across elements and therefore requires that elements have the same
+   * number of blocks and that subsequent blocks in one element have the same
+   * meaning as in another element.
+   */
+  template <int dim, int spacedim>
+  void
+  block_wise (hp::DoFHandler<dim,spacedim> &dof_handler);
+
+  /**
+   * Computes the renumbering vector needed by the block_wise() functions.
+   * Does not perform the renumbering on the DoFHandler dofs but returns the
+   * renumbering vector.
+   */
+  template <int dim, int spacedim, class ITERATOR, class ENDITERATOR>
+  types::global_dof_index
+  compute_block_wise (std::vector<types::global_dof_index> &new_dof_indices,
+                      const ITERATOR &start,
+                      const ENDITERATOR &end,
+                      bool is_level_operation);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Various cell-wise numberings
+   * @{
+   */
+
+  /**
+   * Renumber the degrees cell by cell in hierarchical order (also known as
+   * z-order). The main usage is that this guarantees the same ordering
+   * independent of the number of processors involved in a parallel
+   * distributed computation.
+   */
+  template <int dim>
+  void
+  hierarchical (DoFHandler<dim> &dof_handler);
+
+  /**
+   * Renumber degrees of freedom by cell. The function takes a vector of cell
+   * iterators (which needs to list <i>all</i> active cells of the DoF handler
+   * objects) and will give degrees of freedom new indices based on where in
+   * the given list of cells the cell is on which the degree of freedom is
+   * located. Degrees of freedom that exist at the interface between two or
+   * more cells will be numbered when they are encountered first.
+   *
+   * Degrees of freedom that are encountered first on the same cell retain
+   * their original ordering before the renumbering step.
+   *
+   * @param[in,out] dof_handler The DoFHandler whose degrees of freedom are to
+   * be renumbered.
+   * @param[in] cell_order A vector that contains the order of the cells that
+   * defines the order in which degrees of freedom should be renumbered.
+   *
+   * @pre @p cell_order must have size
+   * <code>dof_handler.get_triangulation().n_active_cells()</code>. Every
+   * active cell iterator of that triangulation needs to be present in @p
+   * cell_order exactly once.
+   */
+  template <typename DoFHandlerType>
+  void
+  cell_wise (DoFHandlerType &dof_handler,
+             const std::vector<typename DoFHandlerType::active_cell_iterator> &cell_order);
+
+  /**
+   * Compute a renumbering of degrees of freedom by cell. The function takes a
+   * vector of cell iterators (which needs to list <i>all</i> active cells of
+   * the DoF handler objects) and will give degrees of freedom new indices
+   * based on where in the given list of cells the cell is on which the degree
+   * of freedom is located. Degrees of freedom that exist at the interface
+   * between two or more cells will be numbered when they are encountered
+   * first.
+   *
+   * Degrees of freedom that are encountered first on the same cell retain
+   * their original ordering before the renumbering step.
+   *
+   * @param[out] renumbering A vector of length
+   * <code>dof_handler.n_dofs()</code> that contains for each degree of
+   * freedom (in their current numbering) their future DoF index. This vector
+   * therefore presents a (very particular) <i>permutation</i> of the current
+   * DoF indices.
+   * @param[out] inverse_renumbering The reverse of the permutation returned
+   * in the previous argument.
+   * @param[in] dof_handler The DoFHandler whose degrees of freedom are to be
+   * renumbered.
+   * @param[in] cell_order A vector that contains the order of the cells that
+   * defines the order in which degrees of freedom should be renumbered.
+   *
+   * @pre @p cell_order must have size
+   * <code>dof_handler.get_triangulation().n_active_cells()</code>. Every
+   * active cell iterator of that triangulation needs to be present in @p
+   * cell_order exactly once. @post For each @p i between zero and
+   * <code>dof_handler.n_dofs()</code>, the condition
+   * <code>renumbering[inverse_renumbering[i]] == i</code> will hold.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_cell_wise
+  (std::vector<types::global_dof_index>                             &renumbering,
+   std::vector<types::global_dof_index>                             &inverse_renumbering,
+   const DoFHandlerType                                             &dof_handler,
+   const std::vector<typename DoFHandlerType::active_cell_iterator> &cell_order);
+
+  /**
+   * Like the other cell_wise() function, but for one level of a multilevel
+   * enumeration of degrees of freedom.
+   */
+  template <typename DoFHandlerType>
+  void
+  cell_wise (DoFHandlerType                                                  &dof_handler,
+             const unsigned int                                               level,
+             const std::vector<typename DoFHandlerType::level_cell_iterator> &cell_order);
+
+  /**
+   * Like the other compute_cell_wise() function, but for one level of a
+   * multilevel enumeration of degrees of freedom.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_cell_wise
+  (std::vector<types::global_dof_index>                            &renumbering,
+   std::vector<types::global_dof_index>                            &inverse_renumbering,
+   const DoFHandlerType                                            &dof_handler,
+   const unsigned int                                               level,
+   const std::vector<typename DoFHandlerType::level_cell_iterator> &cell_order);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Directional numberings
+   * @{
+   */
+
+  /**
+   * Downstream numbering with respect to a constant flow direction. If the
+   * additional argument @p dof_wise_renumbering is set to @p false, the
+   * numbering is performed cell-wise, otherwise it is performed based on the
+   * location of the support points.
+   *
+   * The cells are sorted such that the centers of higher numbers are further
+   * downstream with respect to the constant vector @p direction than the
+   * centers of lower numbers. Even if this yields a downstream numbering with
+   * respect to the flux on the edges for fairly general grids, this might not
+   * be guaranteed for all meshes.
+   *
+   * If the @p dof_wise_renumbering argument is set to @p false, this function
+   * produces a downstream ordering of the mesh cells and calls cell_wise().
+   * Therefore, the output only makes sense for Discontinuous Galerkin Finite
+   * Elements (all degrees of freedom have to be associated with the interior
+   * of the cell in that case) in that case.
+   *
+   * If @p dof_wise_renumbering is set to @p true, the degrees of freedom are
+   * renumbered based on the support point location of the individual degrees
+   * of freedom (obviously, the finite element needs to define support points
+   * for this to work). The numbering of points with the same position in
+   * downstream location (e.g. those parallel to the flow direction, or
+   * several dofs within a FESystem) will be unaffected.
+   */
+  template <typename DoFHandlerType>
+  void
+  downstream (DoFHandlerType                               &dof_handler,
+              const Point<DoFHandlerType::space_dimension> &direction,
+              const bool                                    dof_wise_renumbering = false);
+
+
+  /**
+   * Cell-wise downstream numbering with respect to a constant flow direction
+   * on one level. See the other function with the same name.
+   */
+  template <typename DoFHandlerType>
+  void
+  downstream (DoFHandlerType                               &dof_handler,
+              const unsigned int                            level,
+              const Point<DoFHandlerType::space_dimension> &direction,
+              const bool                                    dof_wise_renumbering = false);
+
+  /**
+   * Computes the renumbering vector needed by the downstream() function. Does
+   * not perform the renumbering on the DoFHandler dofs but returns the
+   * renumbering vector.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_downstream (std::vector<types::global_dof_index>         &new_dof_indices,
+                      std::vector<types::global_dof_index>         &reverse,
+                      const DoFHandlerType                         &dof_handler,
+                      const Point<DoFHandlerType::space_dimension> &direction,
+                      const bool                                    dof_wise_renumbering);
+
+  /**
+   * Computes the renumbering vector needed by the downstream() function. Does
+   * not perform the renumbering on the DoFHandler dofs but returns the
+   * renumbering vector.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_downstream (std::vector<types::global_dof_index>         &new_dof_indices,
+                      std::vector<types::global_dof_index>         &reverse,
+                      const DoFHandlerType                         &dof_handler,
+                      const unsigned int                            level,
+                      const Point<DoFHandlerType::space_dimension> &direction,
+                      const bool                                    dof_wise_renumbering);
+
+  /**
+   * Cell-wise clockwise numbering.
+   *
+   * This function produces a (counter)clockwise ordering of the mesh cells
+   * with respect to the hub @p center and calls cell_wise().  Therefore, it
+   * only works with Discontinuous Galerkin Finite Elements, i.e. all degrees
+   * of freedom have to be associated with the interior of the cell.
+   */
+  template <typename DoFHandlerType>
+  void
+  clockwise_dg (DoFHandlerType                               &dof_handler,
+                const Point<DoFHandlerType::space_dimension> &center,
+                const bool                                    counter = false);
+
+  /**
+   * Cell-wise clockwise numbering on one level. See the other function with
+   * the same name.
+   */
+  template <typename DoFHandlerType>
+  void
+  clockwise_dg (DoFHandlerType                               &dof_handler,
+                const unsigned int                            level,
+                const Point<DoFHandlerType::space_dimension> &center,
+                const bool                                    counter = false);
+
+  /**
+   * Computes the renumbering vector needed by the clockwise_dg() functions.
+   * Does not perform the renumbering on the DoFHandler dofs but returns the
+   * renumbering vector.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_clockwise_dg (std::vector<types::global_dof_index>         &new_dof_indices,
+                        const DoFHandlerType                         &dof_handler,
+                        const Point<DoFHandlerType::space_dimension> &center,
+                        const bool                                    counter);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Selective and random numberings
+   * @{
+   */
+
+  /**
+   * Sort those degrees of freedom which are tagged with @p true in the @p
+   * selected_dofs array to the back of the DoF numbers. The sorting is
+   * stable, i.e. the relative order within the tagged degrees of freedom is
+   * preserved, as is the relative order within the untagged ones.
+   *
+   * @pre The @p selected_dofs array must have as many elements as the @p
+   * dof_handler has degrees of freedom.
+   */
+  template <typename DoFHandlerType>
+  void
+  sort_selected_dofs_back (DoFHandlerType          &dof_handler,
+                           const std::vector<bool> &selected_dofs);
+
+  /**
+   * Sort those degrees of freedom which are tagged with @p true in the @p
+   * selected_dofs array on the level @p level to the back of the DoF numbers.
+   * The sorting is stable, i.e. the relative order within the tagged degrees
+   * of freedom is preserved, as is the relative order within the untagged
+   * ones.
+   *
+   * @pre The @p selected_dofs array must have as many elements as the @p
+   * dof_handler has degrees of freedom on the given level.
+   */
+  template <typename DoFHandlerType>
+  void
+  sort_selected_dofs_back (DoFHandlerType          &dof_handler,
+                           const std::vector<bool> &selected_dofs,
+                           const unsigned int       level);
+
+  /**
+   * Computes the renumbering vector needed by the sort_selected_dofs_back()
+   * function. Does not perform the renumbering on the DoFHandler dofs but
+   * returns the renumbering vector.
+   *
+   * @pre The @p selected_dofs array must have as many elements as the @p
+   * dof_handler has degrees of freedom.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_sort_selected_dofs_back (std::vector<types::global_dof_index> &new_dof_indices,
+                                   const DoFHandlerType                 &dof_handler,
+                                   const std::vector<bool>              &selected_dofs);
+
+  /**
+   * This function computes the renumbering vector on each level needed by the
+   * sort_selected_dofs_back() function. Does not perform the renumbering on
+   * the DoFHandler dofs but only computes the renumbering and returns the
+   * renumbering vector.
+   *
+   * @pre The @p selected_dofs array must have as many elements as the @p
+   * dof_handler has degrees of freedom on the given level.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_sort_selected_dofs_back (std::vector<types::global_dof_index> &new_dof_indices,
+                                   const DoFHandlerType                 &dof_handler,
+                                   const std::vector<bool>              &selected_dofs,
+                                   const unsigned int                    level);
+
+  /**
+   * Renumber the degrees of freedom in a random way. The result of this
+   * function is repeatable in that two runs of the same program will yield
+   * the same result. This is achieved by creating a new random number
+   * generator with a fixed seed every time this function is entered. In
+   * particular, the function therefore does not rely on an external random
+   * number generator for which it would matter how often it has been called
+   * before this function (or, for that matter, whether other threads running
+   * concurrently to this function also draw random numbers).
+   */
+  template <typename DoFHandlerType>
+  void
+  random (DoFHandlerType &dof_handler);
+
+  /**
+   * Computes the renumbering vector needed by the random() function. See
+   * there for more information on the computed random renumbering.
+   *
+   * This function does not perform the renumbering on the DoFHandler dofs but
+   * returns the renumbering vector.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_random (std::vector<types::global_dof_index> &new_dof_indices,
+                  const DoFHandlerType &dof_handler);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Numberings based on cell attributes
+   * @{
+   */
+
+  /**
+   * Renumber the degrees of freedom such that they are associated with the
+   * subdomain id of the cells they are living on, i.e. first all degrees of
+   * freedom that belong to cells with subdomain zero, then all with subdomain
+   * one, etc. This is useful when doing parallel computations after assigning
+   * subdomain ids using a partitioner (see the
+   * GridTools::partition_triangulation function for this).
+   *
+   * Note that degrees of freedom associated with faces, edges, and vertices
+   * may be associated with multiple subdomains if they are sitting on
+   * partition boundaries. It would therefore be undefined with which
+   * subdomain they have to be associated. For this, we use what we get from
+   * the DoFTools::get_subdomain_association function.
+   *
+   * The algorithm is stable, i.e. if two dofs i,j have <tt>i<j</tt> and
+   * belong to the same subdomain, then they will be in this order also after
+   * reordering.
+   */
+  template <typename DoFHandlerType>
+  void
+  subdomain_wise (DoFHandlerType &dof_handler);
+
+  /**
+   * Computes the renumbering vector needed by the subdomain_wise() function.
+   * Does not perform the renumbering on the @p DoFHandler dofs but returns
+   * the renumbering vector.
+   */
+  template <typename DoFHandlerType>
+  void
+  compute_subdomain_wise (std::vector<types::global_dof_index> &new_dof_indices,
+                          const DoFHandlerType                 &dof_handler);
+
+  /**
+   * @}
+   */
+
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcRenumberingIncomplete);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInvalidComponentOrder);
+  /**
+   * The function is only implemented for Discontinuous Galerkin Finite
+   * elements.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcNotDGFEM);
+}
+
+/* ------------------------- inline functions -------------- */
+
+#ifndef DOXYGEN
+namespace DoFRenumbering
+{
+  template <typename DoFHandlerType>
+  void
+  inline
+  downstream (DoFHandlerType                               &dof,
+              const Point<DoFHandlerType::space_dimension> &direction,
+              const bool                                    dof_wise_renumbering)
+  {
+    std::vector<types::global_dof_index> renumbering(dof.n_dofs());
+    std::vector<types::global_dof_index> reverse(dof.n_dofs());
+    compute_downstream(renumbering, reverse, dof, direction,
+                       dof_wise_renumbering);
+
+    dof.renumber_dofs(renumbering);
+  }
+}
+#endif
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/dof_tools.h b/include/deal.II/dofs/dof_tools.h
new file mode 100644
index 0000000..b1eef9f
--- /dev/null
+++ b/include/deal.II/dofs/dof_tools.h
@@ -0,0 +1,2450 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_tools_h
+#define dealii__dof_tools_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/point.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/dofs/function_map.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/component_mask.h>
+#include <deal.II/hp/mapping_collection.h>
+
+#include <vector>
+#include <set>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<int dim, class T> class Table;
+class SparsityPattern;
+template <typename number> class Vector;
+template <int dim, typename Number> class Function;
+template <int dim, int spacedim> class FiniteElement;
+template <int dim, int spacedim> class DoFHandler;
+namespace hp
+{
+  template <int dim, int spacedim> class DoFHandler;
+  template <int dim, int spacedim> class MappingCollection;
+}
+class ConstraintMatrix;
+template <class MeshType> class InterGridMap;
+template <int dim, int spacedim> class Mapping;
+
+namespace GridTools
+{
+  template <typename CellIterator> struct PeriodicFacePair;
+}
+
+//TODO: map_support_points_to_dofs should generate a multimap, rather than just a map, since several dofs may be located at the same support point
+
+/**
+ * This is a collection of functions operating on, and manipulating the
+ * numbers of degrees of freedom. The documentation of the member functions
+ * will provide more information, but for functions that exist in multiple
+ * versions, there are sections in this global documentation stating some
+ * commonalities.
+ *
+ * <h3>Setting up sparsity patterns</h3>
+ *
+ * When assembling system matrices, the entries are usually of the form
+ * $a_{ij} = a(\phi_i, \phi_j)$, where $a$ is a bilinear functional, often an
+ * integral. When using sparse matrices, we therefore only need to reserve
+ * space for those $a_{ij}$ only, which are nonzero, which is the same as to
+ * say that the basis functions $\phi_i$ and $\phi_j$ have a nonempty
+ * intersection of their support. Since the support of basis functions is
+ * bound only on cells on which they are located or to which they are
+ * adjacent, to determine the sparsity pattern it is sufficient to loop over
+ * all cells and connect all basis functions on each cell with all other basis
+ * functions on that cell.  There may be finite elements for which not all
+ * basis functions on a cell connect with each other, but no use of this case
+ * is made since no examples where this occurs are known to the author.
+ *
+ *
+ * <h3>DoF numberings on boundaries</h3>
+ *
+ * When projecting the traces of functions to the boundary or parts thereof,
+ * one needs to build matrices and vectors that act only on those degrees of
+ * freedom that are located on the boundary, rather than on all degrees of
+ * freedom. One could do that by simply building matrices in which the entries
+ * for all interior DoFs are zero, but such matrices are always very rank
+ * deficient and not very practical to work with.
+ *
+ * What is needed instead in this case is a numbering of the boundary degrees
+ * of freedom, i.e. we should enumerate all the degrees of freedom that are
+ * sitting on the boundary, and exclude all other (interior) degrees of
+ * freedom. The map_dof_to_boundary_indices() function does exactly this: it
+ * provides a vector with as many entries as there are degrees of freedom on
+ * the whole domain, with each entry being the number in the numbering of the
+ * boundary or DoFHandler::invalid_dof_index if the dof is not on the
+ * boundary.
+ *
+ * With this vector, one can get, for any given degree of freedom, a unique
+ * number among those DoFs that sit on the boundary; or, if your DoF was
+ * interior to the domain, the result would be DoFHandler::invalid_dof_index.
+ * We need this mapping, for example, to build the mass matrix on the boundary
+ * (for this, see make_boundary_sparsity_pattern() function, the corresponding
+ * section below, as well as the MatrixCreator namespace documentation).
+ *
+ * Actually, there are two map_dof_to_boundary_indices() functions, one
+ * producing a numbering for all boundary degrees of freedom and one producing
+ * a numbering for only parts of the boundary, namely those parts for which
+ * the boundary indicator is listed in a set of indicators given to the
+ * function. The latter case is needed if, for example, we would only want to
+ * project the boundary values for the Dirichlet part of the boundary. You
+ * then give the function a list of boundary indicators referring to Dirichlet
+ * parts on which the projection is to be performed. The parts of the boundary
+ * on which you want to project need not be contiguous; however, it is not
+ * guaranteed that the indices of each of the boundary parts are continuous,
+ * i.e. the indices of degrees of freedom on different parts may be
+ * intermixed.
+ *
+ * Degrees of freedom on the boundary but not on one of the specified boundary
+ * parts are given the index DoFHandler::invalid_dof_index, as if they were in
+ * the interior. If no boundary indicator was given or if no face of a cell
+ * has a boundary indicator contained in the given list, the vector of new
+ * indices consists solely of DoFHandler::invalid_dof_index.
+ *
+ * (As a side note, for corner cases: The question what a degree of freedom on
+ * the boundary is, is not so easy.  It should really be a degree of freedom
+ * of which the respective basis function has nonzero values on the boundary.
+ * At least for Lagrange elements this definition is equal to the statement
+ * that the off-point, or what deal.II calls support_point, of the shape
+ * function, i.e. the point where the function assumes its nominal value (for
+ * Lagrange elements this is the point where it has the function value 1), is
+ * located on the boundary. We do not check this directly, the criterion is
+ * rather defined through the information the finite element class gives: the
+ * FiniteElement class defines the numbers of basis functions per vertex, per
+ * line, and so on and the basis functions are numbered after this
+ * information; a basis function is to be considered to be on the face of a
+ * cell (and thus on the boundary if the cell is at the boundary) according to
+ * it belonging to a vertex, line, etc but not to the interior of the cell.
+ * The finite element uses the same cell-wise numbering so that we can say
+ * that if a degree of freedom was numbered as one of the dofs on lines, we
+ * assume that it is located on the line. Where the off-point actually is, is
+ * a secret of the finite element (well, you can ask it, but we don't do it
+ * here) and not relevant in this context.)
+ *
+ *
+ * <h3>Setting up sparsity patterns for boundary matrices</h3>
+ *
+ * In some cases, one wants to only work with DoFs that sit on the boundary.
+ * One application is, for example, if rather than interpolating non-
+ * homogenous boundary values, one would like to project them. For this, we
+ * need two things: a way to identify nodes that are located on (parts of) the
+ * boundary, and a way to build matrices out of only degrees of freedom that
+ * are on the boundary (i.e. much smaller matrices, in which we do not even
+ * build the large zero block that stems from the fact that most degrees of
+ * freedom have no support on the boundary of the domain). The first of these
+ * tasks is done by the map_dof_to_boundary_indices() function (described
+ * above).
+ *
+ * The second part requires us first to build a sparsity pattern for the
+ * couplings between boundary nodes, and then to actually build the components
+ * of this matrix. While actually computing the entries of these small
+ * boundary matrices is discussed in the MatrixCreator namespace, the creation
+ * of the sparsity pattern is done by the create_boundary_sparsity_pattern()
+ * function. For its work, it needs to have a numbering of all those degrees
+ * of freedom that are on those parts of the boundary that we are interested
+ * in. You can get this from the map_dof_to_boundary_indices() function. It
+ * then builds the sparsity pattern corresponding to integrals like
+ * $\int_\Gamma \varphi_{b2d(i)} \varphi_{b2d(j)} dx$, where $i$ and $j$ are
+ * indices into the matrix, and $b2d(i)$ is the global DoF number of a degree
+ * of freedom sitting on a boundary (i.e., $b2d$ is the inverse of the mapping
+ * returned by map_dof_to_boundary_indices() function).
+ *
+ *
+ * @ingroup dofs
+ * @author Wolfgang Bangerth, Guido Kanschat and others
+ */
+namespace DoFTools
+{
+  /**
+   * The flags used in tables by certain <tt>make_*_pattern</tt> functions to
+   * describe whether two components of the solution couple in the bilinear
+   * forms corresponding to cell or face terms. An example of using these
+   * flags is shown in the introduction of step-46.
+   *
+   * In the descriptions of the individual elements below, remember that these
+   * flags are used as elements of tables of size FiniteElement::n_components
+   * times FiniteElement::n_components where each element indicates whether
+   * two components do or do not couple.
+   */
+  enum Coupling
+  {
+    /**
+     * Two components do not couple.
+     */
+    none,
+    /**
+     * Two components do couple.
+     */
+    always,
+    /**
+     * Two components couple only if their shape functions are both nonzero on
+     * a given face. This flag is only used when computing integrals over
+     * faces of cells, e.g., in DoFTools::make_flux_sparsity_pattern().
+     */
+    nonzero
+  };
+
+  /**
+   * @name Functions to support code that generically uses both DoFHandler and
+   * hp::DoFHandler
+   * @{
+   */
+  /**
+   * Maximal number of degrees of freedom on a cell.
+   *
+   * @relates DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  max_dofs_per_cell (const DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Maximal number of degrees of freedom on a cell.
+   *
+   * @relates hp::DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  max_dofs_per_cell (const hp::DoFHandler<dim,spacedim> &dh);
+
+
+  /**
+   * Maximal number of degrees of freedom on a face.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  max_dofs_per_face (const DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Maximal number of degrees of freedom on a face.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates hp::DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  max_dofs_per_face (const hp::DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Maximal number of degrees of freedom on a vertex.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  max_dofs_per_vertex (const DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Maximal number of degrees of freedom on a vertex.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates hp::DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  max_dofs_per_vertex (const hp::DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Number of vector components in the finite element object used by this
+   * DoFHandler.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  n_components (const DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Number of vector components in the finite element object used by this
+   * DoFHandler.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates hp::DoFHandler
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  n_components (const hp::DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Find out whether the FiniteElement used by this DoFHandler is primitive
+   * or not.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates DoFHandler
+   */
+  template <int dim, int spacedim>
+  bool
+  fe_is_primitive (const DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * Find out whether the FiniteElement used by this DoFHandler is primitive
+   * or not.
+   *
+   * This function exists for both non-hp and hp DoFHandlers, to allow for a
+   * uniform interface to query this property.
+   *
+   * @relates hp::DoFHandler
+   */
+  template <int dim, int spacedim>
+  bool
+  fe_is_primitive (const hp::DoFHandler<dim,spacedim> &dh);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Sparsity pattern generation
+   * @{
+   */
+
+  /**
+   * Compute which entries of a matrix built on the given @p dof_handler may
+   * possibly be nonzero, and create a sparsity pattern object that represents
+   * these nonzero locations.
+   *
+   * This function computes the possible positions of non-zero entries in the
+   * global system matrix by <i>simulating</i> which entries one would write
+   * to during the actual assembly of a matrix. For this, the function assumes
+   * that each finite element basis function is non-zero on a cell only if its
+   * degree of freedom is associated with the interior, a face, an edge or a
+   * vertex of this cell.  As a result, a matrix entry $A_{ij}$ that is
+   * computed from two basis functions $\varphi_i$ and $\varphi_j$ with
+   * (global) indices $i$ and $j$ (for example, using a bilinear form
+   * $A_{ij}=a(\varphi_i,\varphi_j)$) can be non-zero only if these shape
+   * functions correspond to degrees of freedom that are defined on at least
+   * one common cell. Therefore, this function just loops over all cells,
+   * figures out the global indices of all degrees of freedom, and presumes
+   * that all matrix entries that couple any of these indices will result in a
+   * nonzero matrix entry. These will then be added to the sparsity pattern.
+   * As this process of generating the sparsity pattern does not take into
+   * account the equation to be solved later on, the resulting sparsity
+   * pattern is symmetric.
+   *
+   * This algorithm makes no distinction between shape functions on each cell,
+   * i.e., it simply couples all degrees of freedom on a cell with all other
+   * degrees of freedom on a cell. This is often the case, and always a safe
+   * assumption. However, if you know something about the structure of your
+   * operator and that it does not couple certain shape functions with certain
+   * test functions, then you can get a sparser sparsity pattern by calling a
+   * variant of the current function described below that allows to specify
+   * which vector components couple with which other vector components.
+   *
+   * The method described above lives on the assumption that coupling between
+   * degrees of freedom only happens if shape functions overlap on at least
+   * one cell. This is the case with most usual finite element formulations
+   * involving conforming elements. However, for formulations such as the
+   * Discontinuous Galerkin finite element method, the bilinear form contains
+   * terms on interfaces between cells that couple shape functions that live
+   * on one cell with shape functions that live on a neighboring cell. The
+   * current function would not see these couplings, and would consequently
+   * not allocate entries in the sparsity pattern. You would then get into
+   * trouble during matrix assembly because you try to write into matrix
+   * entries for which no space has been allocated in the sparsity pattern.
+   * This can be avoided by calling the DoFTools::make_flux_sparsity_pattern()
+   * function instead, which takes into account coupling between degrees of
+   * freedom on neighboring cells.
+   *
+   * There are other situations where bilinear forms contain non-local terms,
+   * for example in treating integral equations. These require different
+   * methods for building the sparsity patterns that depend on the exact
+   * formulation of the problem. You will have to do this yourself then.
+   *
+   * @param[in] dof_handler The DoFHandler or hp::DoFHandler object that
+   * describes which degrees of freedom live on which cells.
+   *
+   * @param[out] sparsity_pattern The sparsity pattern to be filled with
+   * entries.
+   *
+   * @param[in] constraints The process for generating entries described above
+   * is purely local to each cell. Consequently, the sparsity pattern does not
+   * provide for matrix entries that will only be written into during the
+   * elimination of hanging nodes or other constraints. They have to be taken
+   * care of by a subsequent call to ConstraintMatrix::condense().
+   * Alternatively, the constraints on degrees of freedom can already be taken
+   * into account at the time of creating the sparsity pattern. For this, pass
+   * the ConstraintMatrix object as the third argument to the current
+   * function. No call to ConstraintMatrix::condense() is then necessary. This
+   * process is explained in step-6, step-27, and other tutorial programs.
+   *
+   * @param[in] keep_constrained_dofs In case the constraints are already
+   * taken care of in this function by passing in a ConstraintMatrix object,
+   * it is possible to abandon some off-diagonal entries in the sparsity
+   * pattern if these entries will also not be written into during the actual
+   * assembly of the matrix this sparsity pattern later serves. Specifically,
+   * when using an assembly method that uses
+   * ConstraintMatrix::distribute_local_to_global(), no entries will ever be
+   * written into those matrix rows or columns that correspond to constrained
+   * degrees of freedom. In such cases, you can set the argument @p
+   * keep_constrained_dofs to @p false to avoid allocating these entries in
+   * the sparsity pattern.
+   *
+   * @param[in] subdomain_id If specified, the sparsity pattern is built only
+   * on cells that have a subdomain_id equal to the given argument. This is
+   * useful in parallel contexts where the matrix and sparsity pattern (for
+   * example a TrilinosWrappers::SparsityPattern) may be distributed and not
+   * every MPI process needs to build the entire sparsity pattern; in that
+   * case, it is sufficient if every process only builds that part of the
+   * sparsity pattern that corresponds to the subdomain_id for which it is
+   * responsible. This feature is used in step-32. (This argument is not
+   * usually needed for objects of type parallel::distributed::Triangulation
+   * because the current function only loops over locally owned cells anyway;
+   * thus, this argument typically only makes sense if you want to use the
+   * subdomain_id for anything other than indicating which processor owns a
+   * cell, for example which geometric component of the domain a cell belongs
+   * to.)
+   *
+   * @note The actual type of the sparsity pattern may be SparsityPattern,
+   * DynamicSparsityPattern, BlockSparsityPattern,
+   * BlockDynamicSparsityPattern, or any other class that satisfies similar
+   * requirements. It is assumed that the size of the sparsity pattern matches
+   * the number of degrees of freedom and that enough unused nonzero entries
+   * are left to fill the sparsity pattern if the sparsity pattern is of
+   * "static" kind (see
+   * @ref Sparsity
+   * for more information on what this means). The nonzero entries generated
+   * by this function are added to possible previous content of the object,
+   * i.e., previously added entries are not removed.
+   *
+   * @note If the sparsity pattern is represented by an object of type
+   * SparsityPattern (as opposed to, for example, DynamicSparsityPattern), you
+   * need to remember using SparsityPattern::compress() after generating the
+   * pattern.
+   *
+   * @ingroup constraints
+   */
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_sparsity_pattern (const DoFHandlerType      &dof_handler,
+                         SparsityPatternType       &sparsity_pattern,
+                         const ConstraintMatrix    &constraints           = ConstraintMatrix(),
+                         const bool                 keep_constrained_dofs = true,
+                         const types::subdomain_id  subdomain_id          = numbers::invalid_subdomain_id);
+
+  /**
+   * Compute which entries of a matrix built on the given @p dof_handler may
+   * possibly be nonzero, and create a sparsity pattern object that represents
+   * these nonzero locations.
+   *
+   * This function is a simple variation on the previous
+   * make_sparsity_pattern() function (see there for a description of all of
+   * the common arguments), but it provides functionality for vector finite
+   * elements that allows to be more specific about which variables couple in
+   * which equation.
+   *
+   * For example, if you wanted to solve the Stokes equations,
+   *
+   * @f{align*}{
+   * -\Delta \mathbf u + \nabla p &= 0,\\ \text{div}\ u &= 0
+   * @f}
+   *
+   * in two space dimensions, using stable Q2/Q1 mixed elements (using the
+   * FESystem class), then you don't want all degrees of freedom to couple in
+   * each equation. More specifically, in the first equation, only $u_x$ and
+   * $p$ appear; in the second equation, only $u_y$ and $p$ appear; and in the
+   * third equation, only $u_x$ and $u_y$ appear. (Note that this discussion
+   * only talks about vector components of the solution variable and the
+   * different equation, and has nothing to do with degrees of freedom, or in
+   * fact with any kind of discretization.) We can describe this by the
+   * following pattern of "couplings":
+   *
+   * @f[
+   * \left[
+   * \begin{array}{ccc}
+   *   1 & 0 & 1 \\
+   *   0 & 1 & 1 \\
+   *   1 & 1 & 0
+   * \end{array}
+   * \right]
+   * @f]
+   *
+   * where "1" indicates that two variables (i.e., vector components of the
+   * FESystem) couple in the respective equation, and a "0" means no coupling.
+   * These zeros imply that upon discretization via a standard finite element
+   * formulation, we will not write entries into the matrix that, for example,
+   * couple pressure test functions with pressure shape functions (and similar
+   * for the other zeros above). It is then a waste to allocate memory for
+   * these entries in the matrix and the sparsity pattern, and you can avoid
+   * this by creating a mask such as the one above that describes this to the
+   * (current) function that computes the sparsity pattern. As stated above,
+   * the mask shown above refers to components of the composed FESystem,
+   * rather than to degrees of freedom or shape functions.
+   *
+   * This function is designed to accept a coupling pattern, like the one
+   * shown above, through the @p couplings parameter, which contains values of
+   * type #Coupling. It builds the matrix structure just like the previous
+   * function, but does not create matrix elements if not specified by the
+   * coupling pattern. If the couplings are symmetric, then so will be the
+   * resulting sparsity pattern.
+   *
+   * There is a complication if some or all of the shape functions of the
+   * finite element in use are non-zero in more than one component (in deal.II
+   * speak: they are
+   * @ref GlossPrimitive "non-primitive finite elements").
+   * In this case, the coupling element corresponding to the first non-zero
+   * component is taken and additional ones for this component are ignored.
+   *
+   * @ingroup constraints
+   */
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_sparsity_pattern (const DoFHandlerType     &dof_handler,
+                         const Table<2, Coupling> &coupling,
+                         SparsityPatternType      &sparsity_pattern,
+                         const ConstraintMatrix   &constraints           = ConstraintMatrix(),
+                         const bool                keep_constrained_dofs = true,
+                         const types::subdomain_id subdomain_id         = numbers::invalid_subdomain_id);
+
+  /**
+   * Construct a sparsity pattern that allows coupling degrees of freedom on
+   * two different but related meshes.
+   *
+   * The idea is that if the two given DoFHandler objects correspond to two
+   * different meshes (and potentially to different finite elements used on
+   * these cells), but that if the two triangulations they are based on are
+   * derived from the same coarse mesh through hierarchical refinement, then
+   * one may set up a problem where one would like to test shape functions
+   * from one mesh against the shape functions from another mesh. In
+   * particular, this means that shape functions from a cell on the first mesh
+   * are tested against those on the second cell that are located on the
+   * corresponding cell; this correspondence is something that the
+   * IntergridMap class can determine.
+   *
+   * This function then constructs a sparsity pattern for which the degrees of
+   * freedom that represent the rows come from the first given DoFHandler,
+   * whereas the ones that correspond to columns come from the second
+   * DoFHandler.
+   */
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_sparsity_pattern (const DoFHandlerType &dof_row,
+                         const DoFHandlerType &dof_col,
+                         SparsityPatternType  &sparsity);
+
+  /**
+   * Compute which entries of a matrix built on the given @p dof_handler may
+   * possibly be nonzero, and create a sparsity pattern object that represents
+   * these nonzero locations. This function is a variation of the
+   * make_sparsity_pattern() functions above in that it assumes that the
+   * bilinear form you want to use to generate the matrix also contains terms
+   * that integrate over the <i>faces</i> between cells (i.e., it contains
+   * "fluxes" between cells, explaining the name of the function).
+   *
+   * This function is useful for Discontinuous Galerkin methods where the
+   * standard make_sparsity_pattern() function would only create nonzero
+   * entries for all degrees of freedom on one cell coupling to all other
+   * degrees of freedom on the same cell; however, in DG methods, all or some
+   * degrees of freedom on each cell also couple to the degrees of freedom on
+   * other cells connected to the current one by a common face. The current
+   * function also creates the nonzero entries in the matrix resulting from
+   * these additional couplings. In other words, this function computes a
+   * strict super-set of nonzero entries compared to the work done by
+   * make_sparsity_pattern().
+   *
+   * @param[in] dof_handler The DoFHandler or hp::DoFHandler object that
+   * describes which degrees of freedom live on which cells.
+   *
+   * @param[out] sparsity_pattern The sparsity pattern to be filled with
+   * entries.
+   *
+   * @note The actual type of the sparsity pattern may be SparsityPattern,
+   * DynamicSparsityPattern, BlockSparsityPattern,
+   * BlockDynamicSparsityPattern, or any other class that satisfies similar
+   * requirements. It is assumed that the size of the sparsity pattern matches
+   * the number of degrees of freedom and that enough unused nonzero entries
+   * are left to fill the sparsity pattern if the sparsity pattern is of
+   * "static" kind (see
+   * @ref Sparsity
+   * for more information on what this means). The nonzero entries generated
+   * by this function are added to possible previous content of the object,
+   * i.e., previously added entries are not removed.
+   *
+   * @note If the sparsity pattern is represented by an object of type
+   * SparsityPattern (as opposed to, for example, DynamicSparsityPattern), you
+   * need to remember using SparsityPattern::compress() after generating the
+   * pattern.
+   *
+   * @ingroup constraints
+   */
+  template<typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_flux_sparsity_pattern (const DoFHandlerType &dof_handler,
+                              SparsityPatternType  &sparsity_pattern);
+
+  /**
+   * This function does essentially the same as the other
+   * make_flux_sparsity_pattern() function but allows the specification of a
+   * number of additional arguments. These carry the same meaning as discussed
+   * in the first make_sparsity_pattern() function above.
+   *
+   * @ingroup constraints
+   */
+  template<typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_flux_sparsity_pattern (const DoFHandlerType      &dof_handler,
+                              SparsityPatternType       &sparsity_pattern,
+                              const ConstraintMatrix    &constraints,
+                              const bool                 keep_constrained_dofs = true,
+                              const types::subdomain_id  subdomain_id          = numbers::invalid_unsigned_int);
+
+  /**
+   * This function does essentially the same as the other
+   * make_flux_sparsity_pattern() function but allows the specification of
+   * coupling matrices that state which components of the solution variable
+   * couple in each of the equations you are discretizing. This works in
+   * complete analogy as discussed in the second make_sparsity_pattern()
+   * function above.
+   *
+   * In fact, this function takes two such masks, one describing which
+   * variables couple with each other in the cell integrals that make up your
+   * bilinear form, and which variables coupld with each other in the face
+   * integrals. If you passed masks consisting of only 1s to both of these,
+   * then you would get the same sparsity pattern as if you had called the
+   * first of the make_sparsity_pattern() functions above. By setting some of
+   * the entries of these masks to zeros, you can get a sparser sparsity
+   * pattern.
+   *
+   * @ingroup constraints
+   */
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_flux_sparsity_pattern (const DoFHandlerType    &dof,
+                              SparsityPatternType     &sparsity,
+                              const Table<2,Coupling> &cell_integrals_mask,
+                              const Table<2,Coupling> &face_integrals_mask);
+
+  /**
+   * Create the sparsity pattern for boundary matrices. See the general
+   * documentation of this class for more information.
+   *
+   * The function does essentially what the other make_sparsity_pattern()
+   * functions do, but assumes that the bilinear form that is used to build
+   * the matrix does not consist of domain integrals, but only of integrals
+   * over the boundary of the domain.
+   */
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_boundary_sparsity_pattern (const DoFHandlerType                       &dof,
+                                  const std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                                  SparsityPatternType                        &sparsity_pattern);
+
+  /**
+   * This function is a variation of the previous
+   * make_boundary_sparsity_pattern() function in which we assume that the
+   * boundary integrals that will give rise to the matrix extends only over
+   * those parts of the boundary whose boundary indicators are listed in the
+   * @p boundary_ids argument to this function.
+   *
+   * This function could have been written by passing a @p set of boundary_id
+   * numbers. However, most of the functions throughout deal.II dealing with
+   * boundary indicators take a mapping of boundary indicators and the
+   * corresponding boundary function, i.e., a FunctionMap argument.
+   * Correspondingly, this function does the same, though the actual boundary
+   * function is ignored here. (Consequently, if you don't have any such
+   * boundary functions, just create a map with the boundary indicators you
+   * want and set the function pointers to null pointers).
+   */
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_boundary_sparsity_pattern
+  (const DoFHandlerType                                              &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type &boundary_ids,
+   const std::vector<types::global_dof_index>                        &dof_to_boundary_mapping,
+   SparsityPatternType                                               &sparsity);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Hanging nodes and other constraints
+   * @{
+   */
+
+  /**
+   * Compute the constraints resulting from the presence of hanging nodes.
+   * Hanging nodes are best explained using a small picture:
+   *
+   * @image html hanging_nodes.png
+   *
+   * In order to make a finite element function globally continuous, we have
+   * to make sure that the dark red nodes have values that are compatible with
+   * the adjacent yellow nodes, so that the function has no jump when coming
+   * from the small cells to the large one at the top right. We therefore have
+   * to add conditions that constrain those "hanging nodes".
+   *
+   * The object into which these are inserted is later used to condense the
+   * global system matrix and right hand side, and to extend the solution
+   * vectors from the true degrees of freedom also to the constraint nodes.
+   * This function is explained in detail in the
+   * @ref step_6 "step-6"
+   * tutorial program and is used in almost all following programs as well.
+   *
+   * This function does not clear the constraint matrix object before use, in
+   * order to allow adding constraints from different sources to the same
+   * object. You therefore need to make sure it contains only constraints you
+   * still want; otherwise call the ConstraintMatrix::clear() function.
+   * Likewise, this function does not close the object since you may want to
+   * enter other constraints later on yourself.
+   *
+   * In the hp-case, i.e. when the argument is of type hp::DoFHandler, we
+   * consider constraints due to different finite elements used on two sides
+   * of a face between cells as hanging nodes as well. In other words, for hp
+   * finite elements, this function computes all constraints due to differing
+   * mesh sizes (h) or polynomial degrees (p) between adjacent cells.
+   *
+   * The template argument (and by consequence the type of the first argument
+   * to this function) can be either ::DoFHandler or hp::DoFHandler.
+   *
+   * @ingroup constraints
+   */
+  template <typename DoFHandlerType>
+  void
+  make_hanging_node_constraints (const DoFHandlerType &dof_handler,
+                                 ConstraintMatrix     &constraints);
+
+  /**
+   * This function is used when different variables in a problem are
+   * discretized on different grids, where one grid is strictly coarser than
+   * the other. An example are optimization problems where the control
+   * variable is often discretized on a coarser mesh than the state variable.
+   *
+   * The function's result can be stated as follows mathematically: Let ${\cal
+   * T}_0$ and ${\cal T}_1$ be two meshes where ${\cal T}_1$ results from
+   * ${\cal T}_0$ strictly by refining or leaving alone the cells of ${\cal
+   * T}_0$. Using the same finite element on both, there are function spaces
+   * ${\cal V}_0$ and ${\cal V}_1$ associated with these meshes. Then every
+   * function $v_0 \in {\cal V}_0$ can of course also be represented exactly
+   * in ${\cal V}_1$ since by construction ${\cal V}_0 \subset {\cal V}_1$.
+   * However, not every function in ${\cal V}_1$ can be expressed as a linear
+   * combination of the shape functions of ${\cal V}_0$. The functions that
+   * can be represented lie in a homogenous subspace of ${\cal V}_1$ (namely,
+   * ${\cal V}_0$, of course) and this subspace can be represented by a linear
+   * constraint of the form $CV=0$ where $V$ is the vector of nodal values of
+   * functions $v\in {\cal V}_1$. In other words, every function $v_h=\sum_j
+   * V_j \varphi_j^{(1)} \in {\cal V}_1$ that also satisfies $v_h\in {\cal
+   * V}_0$ automatically satisfies $CV=0$. This function computes the matrix
+   * $C$ in the form of a ConstraintMatrix object.
+   *
+   * The construction of these constraints is done as follows: for each of the
+   * degrees of freedom (i.e. shape functions) on the coarse grid, we compute
+   * its representation on the fine grid, i.e. how the linear combination of
+   * shape functions on the fine grid looks like that resembles the shape
+   * function on the coarse grid. From this information, we can then compute
+   * the constraints which have to hold if a solution of a linear equation on
+   * the fine grid shall be representable on the coarse grid. The exact
+   * algorithm how these constraints can be computed is rather complicated and
+   * is best understood by reading the source code, which contains many
+   * comments.
+   *
+   * The use of this function is as follows: it accepts as parameters two DoF
+   * Handlers, the first of which refers to the coarse grid and the second of
+   * which is the fine grid. On both, a finite element is represented by the
+   * DoF handler objects, which will usually have several vector components,
+   * which may belong to different base elements. The second and fourth
+   * parameter of this function therefore state which vector component on the
+   * coarse grid shall be used to restrict the stated component on the fine
+   * grid. The finite element used for the respective components on the two
+   * grids needs to be the same. An example may clarify this: consider an
+   * optimization problem with controls $q$ discretized on a coarse mesh and a
+   * state variable $u$ (and corresponding Lagrange multiplier $\lambda$)
+   * discretized on the fine mesh. These are discretized using piecewise
+   * constant discontinuous, continuous linear, and continuous linear
+   * elements, respectively. Only the parameter $q$ is represented on the
+   * coarse grid, thus the DoFHandler object on the coarse grid represents
+   * only one variable, discretized using piecewise constant discontinuous
+   * elements. Then, the parameter denoting the vector component on the coarse
+   * grid would be zero (the only possible choice, since the variable on the
+   * coarse grid is scalar). If the ordering of variables in the fine mesh
+   * FESystem is $u, q, \lambda$, then the fourth argument of the function
+   * corresponding to the vector component would be one (corresponding to the
+   * variable $q$; zero would be $u$, two would be $\lambda$).
+   *
+   * The function also requires an object of type IntergridMap representing
+   * how to get from the coarse mesh cells to the corresponding cells on the
+   * fine mesh. This could in principle be generated by the function itself
+   * from the two DoFHandler objects, but since it is probably available
+   * anyway in programs that use different meshes, the function simply takes
+   * it as an argument.
+   *
+   * The computed constraints are entered into a variable of type
+   * ConstraintMatrix; previous contents are not deleted.
+   */
+  template <int dim, int spacedim>
+  void
+  compute_intergrid_constraints (const DoFHandler<dim,spacedim>                &coarse_grid,
+                                 const unsigned int                             coarse_component,
+                                 const DoFHandler<dim,spacedim>                &fine_grid,
+                                 const unsigned int                             fine_component,
+                                 const InterGridMap<DoFHandler<dim,spacedim> > &coarse_to_fine_grid_map,
+                                 ConstraintMatrix                              &constraints);
+
+
+  /**
+   * This function generates a matrix such that when a vector of data with as
+   * many elements as there are degrees of freedom of this component on the
+   * coarse grid is multiplied to this matrix, we obtain a vector with as many
+   * elements as there are global degrees of freedom on the fine grid. All the
+   * elements of the other vector components of the finite element fields on
+   * the fine grid are not touched.
+   *
+   * Triangulation of the fine grid can be distributed. When called in
+   * parallel, each process has to have a copy of the coarse grid. In this
+   * case, function returns transfer representation for a set of locally owned
+   * cells.
+   *
+   * The output of this function is a compressed format that can be used to
+   * construct corresponding sparse transfer matrix.
+   */
+  template <int dim, int spacedim>
+  void
+  compute_intergrid_transfer_representation (const DoFHandler<dim,spacedim>                         &coarse_grid,
+                                             const unsigned int                                      coarse_component,
+                                             const DoFHandler<dim,spacedim>                         &fine_grid,
+                                             const unsigned int                                      fine_component,
+                                             const InterGridMap<DoFHandler<dim,spacedim> >          &coarse_to_fine_grid_map,
+                                             std::vector<std::map<types::global_dof_index, float> > &transfer_representation);
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * @name Periodic boundary conditions
+   * @{
+   */
+
+  /**
+   * Insert the (algebraic) constraints due to periodic boundary conditions
+   * into a ConstraintMatrix @p constraint_matrix.
+   *
+   * Given a pair of not necessarily active boundary faces @p face_1 and @p
+   * face_2, this functions constrains all DoFs associated with the boundary
+   * described by @p face_1 to the respective DoFs of the boundary described
+   * by @p face_2. More precisely:
+   *
+   * If @p face_1 and @p face_2 are both active faces it adds the DoFs of @p
+   * face_1 to the list of constrained DoFs in @p constraint_matrix and adds
+   * entries to constrain them to the corresponding values of the DoFs on @p
+   * face_2. This happens on a purely algebraic level, meaning, the global DoF
+   * with (local face) index <tt>i</tt> on @p face_1 gets constraint to the
+   * DoF with (local face) index <tt>i</tt> on @p face_2 (possibly corrected
+   * for orientation, see below).
+   *
+   * Otherwise, if @p face_1 and @p face_2 are not active faces, this function
+   * loops recursively over the children of @p face_1 and @p face_2. If only
+   * one of the two faces is active, then we recursively iterate over the
+   * children of the non-active ones and make sure that the solution function
+   * on the refined side equals that on the non-refined face in much the same
+   * way as we enforce hanging node constraints at places where differently
+   * refined cells come together. (However, unlike hanging nodes, we do not
+   * enforce the requirement that there be only a difference of one refinement
+   * level between the two sides of the domain you would like to be periodic).
+   *
+   * This routine only constrains DoFs that are not already constrained. If
+   * this routine encounters a DoF that already is constrained (for instance
+   * by Dirichlet boundary conditions), the old setting of the constraint
+   * (dofs the entry is constrained to, inhomogeneities) is kept and nothing
+   * happens.
+   *
+   * The flags in the @p component_mask (see
+   * @ref GlossComponentMask)
+   * denote which components of the finite element space shall be constrained
+   * with periodic boundary conditions. If it is left as specified by the
+   * default value all components are constrained. If it is different from the
+   * default value, it is assumed that the number of entries equals the number
+   * of components of the finite element. This can be used to enforce
+   * periodicity in only one variable in a system of equations.
+   *
+   * @p face_orientation, @p face_flip and @p face_rotation describe an
+   * orientation that should be applied to @p face_1 prior to matching and
+   * constraining DoFs. This has nothing to do with the actual orientation of
+   * the given faces in their respective cells (which for boundary faces is
+   * always the default) but instead how you want to see periodicity to be
+   * enforced. For example, by using these flags, you can enforce a condition
+   * of the kind $u(0,y)=u(1,1-y)$ (i.e., a Moebius band) or in 3d a twisted
+   * torus. More precisely, these flags match local face DoF indices in the
+   * following manner:
+   *
+   * In 2d: <tt>face_orientation</tt> must always be <tt>true</tt>,
+   * <tt>face_rotation</tt> is always <tt>false</tt>, and face_flip has the
+   * meaning of <tt>line_flip</tt>; this implies e.g. for <tt>Q1</tt>:
+   *
+   * @code
+   *
+   * face_orientation = true, face_flip = false, face_rotation = false:
+   *
+   *     face1:           face2:
+   *
+   *     1                1
+   *     |        <-->    |
+   *     0                0
+   *
+   *     Resulting constraints: 0 <-> 0, 1 <-> 1
+   *
+   *     (Numbers denote local face DoF indices.)
+   *
+   *
+   * face_orientation = true, face_flip = true, face_rotation = false:
+   *
+   *     face1:           face2:
+   *
+   *     0                1
+   *     |        <-->    |
+   *     1                0
+   *
+   *     Resulting constraints: 1 <-> 0, 0 <-> 1
+   * @endcode
+   *
+   * And similarly for the case of Q1 in 3d:
+   *
+   * @code
+   *
+   * face_orientation = true, face_flip = false, face_rotation = false:
+   *
+   *     face1:           face2:
+   *
+   *     2 - 3            2 - 3
+   *     |   |    <-->    |   |
+   *     0 - 1            0 - 1
+   *
+   *     Resulting constraints: 0 <-> 0, 1 <-> 1, 2 <-> 2, 3 <-> 3
+   *
+   *     (Numbers denote local face DoF indices.)
+   *
+   *
+   * face_orientation = false, face_flip = false, face_rotation = false:
+   *
+   *     face1:           face2:
+   *
+   *     1 - 3            2 - 3
+   *     |   |    <-->    |   |
+   *     0 - 2            0 - 1
+   *
+   *     Resulting constraints: 0 <-> 0, 2 <-> 1, 1 <-> 2, 3 <-> 3
+   *
+   *
+   * face_orientation = true, face_flip = true, face_rotation = false:
+   *
+   *     face1:           face2:
+   *
+   *     1 - 0            2 - 3
+   *     |   |    <-->    |   |
+   *     3 - 2            0 - 1
+   *
+   *     Resulting constraints: 3 <-> 0, 2 <-> 1, 1 <-> 2, 0 <-> 3
+   *
+   *
+   * face_orientation = true, face_flip = false, face_rotation = true
+   *
+   *     face1:           face2:
+   *
+   *     0 - 2            2 - 3
+   *     |   |    <-->    |   |
+   *     1 - 3            0 - 1
+   *
+   *     Resulting constraints: 1 <-> 0, 3 <-> 1, 0 <-> 2, 2 <-> 3
+   *
+   * and any combination of that...
+   * @endcode
+   *
+   * Optionally a matrix @p matrix along with an std::vector @p
+   * first_vector_components can be specified that describes how DoFs on @p
+   * face_1 should be modified prior to constraining to the DoFs of @p face_2.
+   * Here, two declarations are possible: If the std::vector @p
+   * first_vector_components is non empty the matrix is interpreted as a @p
+   * dim $\times$ @p dim rotation matrix that is applied to all vector valued
+   * blocks listed in @p first_vector_components of the FESystem. If @p
+   * first_vector_components is empty the matrix is interpreted as an
+   * interpolation matrix with size no_face_dofs $\times$ no_face_dofs.
+   *
+   * Detailed information can be found in the see
+   * @ref GlossPeriodicConstraints "Glossary entry on periodic boundary conditions".
+   *
+   * @author Matthias Maier, 2012 - 2015
+   */
+  template<typename FaceIterator>
+  void
+  make_periodicity_constraints
+  (const FaceIterator                          &face_1,
+   const typename identity<FaceIterator>::type &face_2,
+   dealii::ConstraintMatrix                    &constraint_matrix,
+   const ComponentMask                         &component_mask = ComponentMask(),
+   const bool                                  face_orientation = true,
+   const bool                                  face_flip = false,
+   const bool                                  face_rotation = false,
+   const FullMatrix<double>                    &matrix = FullMatrix<double>(),
+   const std::vector<unsigned int>             &first_vector_components = std::vector<unsigned int>());
+
+
+
+  /**
+   * Insert the (algebraic) constraints due to periodic boundary conditions
+   * into a ConstraintMatrix @p constraint_matrix.
+   *
+   * This is the main high level interface for above low level variant of
+   * make_periodicity_constraints(). It takes a std::vector @p periodic_faces
+   * as argument and applies above make_periodicity_constraints() on each
+   * entry. @p periodic_faces can be created by
+   * GridTools::collect_periodic_faces.
+   *
+   * @note For DoFHandler objects that are built on a
+   * parallel::distributed::Triangulation object
+   * parallel::distributed::Triangulation::add_periodicity has to be called
+   * before calling this function..
+   *
+   * @see
+   * @ref GlossPeriodicConstraints "Glossary entry on periodic boundary conditions"
+   * and step-45 for further information.
+   *
+   * @author Daniel Arndt, Matthias Maier, 2013 - 2015
+   */
+  template<typename DoFHandlerType>
+  void
+  make_periodicity_constraints
+  (const std::vector<GridTools::PeriodicFacePair<typename DoFHandlerType::cell_iterator> >
+   &periodic_faces,
+   dealii::ConstraintMatrix        &constraint_matrix,
+   const ComponentMask             &component_mask = ComponentMask(),
+   const std::vector<unsigned int> &first_vector_components = std::vector<unsigned int>());
+
+
+
+  /**
+   * Insert the (algebraic) constraints due to periodic boundary conditions
+   * into a ConstraintMatrix @p constraint_matrix.
+   *
+   * This function serves as a high level interface for the
+   * make_periodicity_constraints() function.
+   *
+   * Define a 'first' boundary as all boundary faces having boundary_id @p
+   * b_id1 and a 'second' boundary consisting of all faces belonging to @p
+   * b_id2.
+   *
+   * This function tries to match all faces belonging to the first boundary
+   * with faces belonging to the second boundary with the help of
+   * orthogonal_equality().
+   *
+   * If this matching is successful it constrains all DoFs associated with the
+   * 'first' boundary to the respective DoFs of the 'second' boundary
+   * respecting the relative orientation of the two faces.
+   *
+   * @note: This function is a convenience wrapper. It internally calls
+   * GridTools::collect_periodic_faces() with the supplied paramaters and
+   * feeds the output to above make_periodicity_constraints() variant. If you
+   * need more functionality use GridTools::collect_periodic_faces() directly.
+   *
+   * @see
+   * @ref GlossPeriodicConstraints "Glossary entry on periodic boundary conditions"
+   * for further information.
+   *
+   * @author Matthias Maier, 2012
+   */
+  template<typename DoFHandlerType>
+  void
+  make_periodicity_constraints
+  (const DoFHandlerType     &dof_handler,
+   const types::boundary_id  b_id1,
+   const types::boundary_id  b_id2,
+   const int                 direction,
+   dealii::ConstraintMatrix &constraint_matrix,
+   const ComponentMask      &component_mask = ComponentMask());
+
+
+
+  /**
+   * This compatibility version of make_periodicity_constraints only works on
+   * grids with cells in
+   * @ref GlossFaceOrientation "standard orientation".
+   *
+   * Instead of defining a 'first' and 'second' boundary with the help of two
+   * boundary_ids this function defines a 'left' boundary as all faces with
+   * local face index <code>2*dimension</code> and boundary indicator @p b_id
+   * and, similarly, a 'right' boundary consisting of all face with local face
+   * index <code>2*dimension+1</code> and boundary indicator @p b_id.
+   *
+   * @note This version of make_periodicity_constraints  will not work on
+   * meshes with cells not in
+   * @ref GlossFaceOrientation "standard orientation".
+   *
+   * @note: This function is a convenience wrapper. It internally calls
+   * GridTools::collect_periodic_faces() with the supplied paramaters and
+   * feeds the output to above make_periodicity_constraints() variant. If you
+   * need more functionality use GridTools::collect_periodic_faces() directly.
+   *
+   * @see
+   * @ref GlossPeriodicConstraints "Glossary entry on periodic boundary conditions"
+   * for further information.
+   */
+  template<typename DoFHandlerType>
+  void
+  make_periodicity_constraints
+  (const DoFHandlerType     &dof_handler,
+   const types::boundary_id  b_id,
+   const int                 direction,
+   dealii::ConstraintMatrix &constraint_matrix,
+   const ComponentMask      &component_mask = ComponentMask());
+
+  /**
+   * Take a vector of values which live on cells (e.g. an error per cell) and
+   * distribute it to the dofs in such a way that a finite element field
+   * results, which can then be further processed, e.g. for output. You should
+   * note that the resulting field will not be continuous at hanging nodes.
+   * This can, however, easily be arranged by calling the appropriate @p
+   * distribute function of a ConstraintMatrix object created for this
+   * DoFHandler object, after the vector has been fully assembled.
+   *
+   * It is assumed that the number of elements in @p cell_data equals the
+   * number of active cells and that the number of elements in @p dof_data
+   * equals <tt>dof_handler.n_dofs()</tt>.
+   *
+   * Note that the input vector may be a vector of any data type as long as it
+   * is convertible to @p double.  The output vector, being a data vector on a
+   * DoF handler, always consists of elements of type @p double.
+   *
+   * In case the finite element used by this DoFHandler consists of more than
+   * one component, you need to specify which component in the output vector
+   * should be used to store the finite element field in; the default is zero
+   * (no other value is allowed if the finite element consists only of one
+   * component). All other components of the vector remain untouched, i.e.
+   * their contents are not changed.
+   *
+   * This function cannot be used if the finite element in use has shape
+   * functions that are non-zero in more than one vector component (in deal.II
+   * speak: they are non-primitive).
+   */
+  template <typename DoFHandlerType, typename Number>
+  void
+  distribute_cell_to_dof_vector (const DoFHandlerType  &dof_handler,
+                                 const Vector<Number>  &cell_data,
+                                 Vector<double>        &dof_data,
+                                 const unsigned int     component = 0);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Identifying subsets of degrees of freedom with particular
+   * properties
+   * @{
+   */
+
+  /**
+   * Extract the indices of the degrees of freedom belonging to certain vector
+   * components of a vector-valued finite element. The @p component_mask
+   * defines which components or blocks of an FESystem are to be extracted
+   * from the DoFHandler @p dof. The entries in the output array @p
+   * selected_dofs corresponding to degrees of freedom belonging to these
+   * components are then flagged @p true, while all others are set to @p
+   * false.
+   *
+   * The size of @p component_mask must be compatible with the number of
+   * components in the FiniteElement used by @p dof. The size of @p
+   * selected_dofs must equal DoFHandler::n_dofs(). Previous contents of this
+   * array are overwritten.
+   *
+   * If the finite element under consideration is not primitive, i.e., some or
+   * all of its shape functions are non-zero in more than one vector component
+   * (which holds, for example, for FE_Nedelec or FE_RaviartThomas elements),
+   * then shape functions cannot be associated with a single vector component.
+   * In this case, if <em>one</em> shape vector component of this element is
+   * flagged in @p component_mask (see
+   * @ref GlossComponentMask),
+   * then this is equivalent to selecting <em>all</em> vector components
+   * corresponding to this non-primitive base element.
+   */
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const DoFHandler<dim,spacedim> &dof_handler,
+                const ComponentMask            &component_mask,
+                std::vector<bool>              &selected_dofs);
+
+  /**
+   * The same function as above, but for a hp::DoFHandler.
+   */
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const hp::DoFHandler<dim,spacedim> &dof_handler,
+                const ComponentMask                &component_mask,
+                std::vector<bool>                  &selected_dofs);
+
+  /**
+   * This function is the equivalent to the DoFTools::extract_dofs() functions
+   * above except that the selection of which degrees of freedom to extract is
+   * not done based on components (see
+   * @ref GlossComponent)
+   * but instead based on whether they are part of a particular block (see
+   * @ref GlossBlock).
+   * Consequently, the second argument is not a ComponentMask but a BlockMask
+   * object.
+   *
+   * @param dof_handler The DoFHandler object from which to extract degrees of
+   * freedom
+   * @param block_mask The block mask that describes which blocks to consider
+   * (see
+   * @ref GlossBlockMask)
+   * @param selected_dofs A vector of length DoFHandler::n_dofs() in which
+   * those entries are true that correspond to the selected blocks.
+   */
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const DoFHandler<dim,spacedim> &dof_handler,
+                const BlockMask                &block_mask,
+                std::vector<bool>              &selected_dofs);
+
+  /**
+   * The same function as above, but for a hp::DoFHandler.
+   */
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const hp::DoFHandler<dim,spacedim> &dof_handler,
+                const BlockMask                    &block_mask,
+                std::vector<bool>                  &selected_dofs);
+
+  /**
+   * Do the same thing as the corresponding extract_dofs() function for one
+   * level of a multi-grid DoF numbering.
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_level_dofs (const unsigned int    level,
+                      const DoFHandlerType &dof,
+                      const ComponentMask  &component_mask,
+                      std::vector<bool>    &selected_dofs);
+
+  /**
+   * Do the same thing as the corresponding extract_dofs() function for one
+   * level of a multi-grid DoF numbering.
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_level_dofs (const unsigned int    level,
+                      const DoFHandlerType &dof,
+                      const BlockMask      &component_mask,
+                      std::vector<bool>    &selected_dofs);
+
+  /**
+   * Extract all degrees of freedom which are at the boundary and belong to
+   * specified components of the solution. The function returns its results in
+   * the last non-default-valued parameter which contains @p true if a degree
+   * of freedom is at the boundary and belongs to one of the selected
+   * components, and @p false otherwise. The function is used in step-15.
+   *
+   * By specifying the @p boundary_id variable, you can select which boundary
+   * indicators the faces have to have on which the degrees of freedom are
+   * located that shall be extracted. If it is an empty list, then all
+   * boundary indicators are accepted.
+   *
+   * The size of @p component_mask (see
+   * @ref GlossComponentMask)
+   * shall equal the number of components in the finite element used by @p
+   * dof. The size of @p selected_dofs shall equal
+   * <tt>dof_handler.n_dofs()</tt>. Previous contents of this array or
+   * overwritten.
+   *
+   * Using the usual convention, if a shape function is non-zero in more than
+   * one component (i.e. it is non-primitive), then the element in the
+   * component mask is used that corresponds to the first non-zero components.
+   * Elements in the mask corresponding to later components are ignored.
+   *
+   * @note This function will not work for DoFHandler objects that are built
+   * on a parallel::distributed::Triangulation object. The reasons is that the
+   * output argument @p selected_dofs has to have a length equal to <i>all</i>
+   * global degrees of freedom. Consequently, this does not scale to very
+   * large problems. If you need the functionality of this function for
+   * parallel triangulations, then you need to use the other
+   * DoFTools::extract_boundary_dofs function.
+   *
+   * @param dof_handler The object that describes which degrees of freedom
+   * live on which cell
+   * @param component_mask A mask denoting the vector components of the finite
+   * element that should be considered (see also
+   * @ref GlossComponentMask).
+   * @param selected_dofs The IndexSet object that is returned and that will
+   * contain the indices of degrees of freedom that are located on the
+   * boundary (and correspond to the selected vector components and boundary
+   * indicators, depending on the values of the @p component_mask and @p
+   * boundary_ids arguments).
+   * @param boundary_ids If empty, this function extracts the indices of the
+   * degrees of freedom for all parts of the boundary. If it is a non- empty
+   * list, then the function only considers boundary faces with the boundary
+   * indicators listed in this argument.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_boundary_dofs (const DoFHandlerType       &dof_handler,
+                         const ComponentMask        &component_mask,
+                         std::vector<bool>          &selected_dofs,
+                         const std::set<types::boundary_id> &boundary_ids = std::set<types::boundary_id>());
+
+  /**
+   * This function does the same as the previous one but it returns its result
+   * as an IndexSet rather than a std::vector@<bool@>. Thus, it can also be
+   * called for DoFHandler objects that are defined on
+   * parallel::distributed::Triangulation objects.
+   *
+   * @note If the DoFHandler object is indeed defined on a
+   * parallel::distributed::Triangulation, then the @p selected_dofs index set
+   * will contain only those degrees of freedom on the boundary that belong to
+   * the locally relevant set (see
+   * @ref GlossLocallyRelevantDof "locally relevant DoFs").
+   *
+   * @param dof_handler The object that describes which degrees of freedom
+   * live on which cell
+   * @param component_mask A mask denoting the vector components of the finite
+   * element that should be considered (see also
+   * @ref GlossComponentMask).
+   * @param selected_dofs The IndexSet object that is returned and that will
+   * contain the indices of degrees of freedom that are located on the
+   * boundary (and correspond to the selected vector components and boundary
+   * indicators, depending on the values of the @p component_mask and @p
+   * boundary_ids arguments).
+   * @param boundary_ids If empty, this function extracts the indices of the
+   * degrees of freedom for all parts of the boundary. If it is a non- empty
+   * list, then the function only considers boundary faces with the boundary
+   * indicators listed in this argument.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_boundary_dofs (const DoFHandlerType       &dof_handler,
+                         const ComponentMask        &component_mask,
+                         IndexSet                   &selected_dofs,
+                         const std::set<types::boundary_id> &boundary_ids = std::set<types::boundary_id>());
+
+  /**
+   * This function is similar to the extract_boundary_dofs() function but it
+   * extracts those degrees of freedom whose shape functions are nonzero on at
+   * least part of the selected boundary. For continuous elements, this is
+   * exactly the set of shape functions whose degrees of freedom are defined
+   * on boundary faces. On the other hand, if the finite element in used is a
+   * discontinuous element, all degrees of freedom are defined in the inside
+   * of cells and consequently none would be boundary degrees of freedom.
+   * Several of those would have shape functions that are nonzero on the
+   * boundary, however. This function therefore extracts all those for which
+   * the FiniteElement::has_support_on_face function says that it is nonzero
+   * on any face on one of the selected boundary parts.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_dofs_with_support_on_boundary (const DoFHandlerType   &dof_handler,
+                                         const ComponentMask    &component_mask,
+                                         std::vector<bool>      &selected_dofs,
+                                         const std::set<types::boundary_id> &boundary_ids = std::set<types::boundary_id>());
+
+  /**
+   * Extract a vector that represents the constant modes of the DoFHandler for
+   * the components chosen by <tt>component_mask</tt> (see
+   * @ref GlossComponentMask).
+   * The constant modes on a discretization are the null space of a Laplace
+   * operator on the selected components with Neumann boundary conditions
+   * applied. The null space is a necessary ingredient for obtaining a good
+   * AMG preconditioner when using the class
+   * TrilinosWrappers::PreconditionAMG.  Since the ML AMG package only works
+   * on algebraic properties of the respective matrix, it has no chance to
+   * detect whether the matrix comes from a scalar or a vector valued problem.
+   * However, a near null space supplies exactly the needed information about
+   * the components placement of vector components within the matrix. The null
+   * space (or rather, the constant modes) is provided by the finite element
+   * underlying the given DoFHandler and for most elements, the null space
+   * will consist of as many vectors as there are true arguments in
+   * <tt>component_mask</tt> (see
+   * @ref GlossComponentMask),
+   * each of which will be one in one vector component and zero in all others.
+   * However, the representation of the constant function for e.g. FE_DGP is
+   * different (the first component on each element one, all other components
+   * zero), and some scalar elements may even have two constant modes
+   * (FE_Q_DG0). Therefore, we store this object in a vector of vectors, where
+   * the outer vector contains the collection of the actual constant modes on
+   * the DoFHandler. Each inner vector has as many components as there are
+   * (locally owned) degrees of freedom in the selected components. Note that
+   * any matrix associated with this null space must have been constructed
+   * using the same <tt>component_mask</tt> argument, since the numbering of
+   * DoFs is done relative to the selected dofs, not to all dofs.
+   *
+   * The main reason for this program is the use of the null space with the
+   * AMG preconditioner.
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_constant_modes (const DoFHandlerType            &dof_handler,
+                          const ComponentMask             &component_mask,
+                          std::vector<std::vector<bool> > &constant_modes);
+
+  /**
+   * @}
+   */
+  /**
+   * @name Hanging nodes
+   * @{
+   */
+
+  /**
+   * Select all dofs that will be constrained by interface constraints, i.e.
+   * all hanging nodes.
+   *
+   * The size of @p selected_dofs shall equal <tt>dof_handler.n_dofs()</tt>.
+   * Previous contents of this array or overwritten.
+   */
+  template <int dim, int spacedim>
+  void
+  extract_hanging_node_dofs (const DoFHandler<dim,spacedim> &dof_handler,
+                             std::vector<bool>              &selected_dofs);
+  //@}
+
+  /**
+   * @name Parallelization and domain decomposition
+   * @{
+   */
+  /**
+   * Flag all those degrees of freedom which are on cells with the given
+   * subdomain id. Note that DoFs on faces can belong to cells with differing
+   * subdomain ids, so the sets of flagged degrees of freedom are not mutually
+   * exclusive for different subdomain ids.
+   *
+   * If you want to get a unique association of degree of freedom with
+   * subdomains, use the @p get_subdomain_association function.
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_subdomain_dofs (const DoFHandlerType      &dof_handler,
+                          const types::subdomain_id  subdomain_id,
+                          std::vector<bool>         &selected_dofs);
+
+
+  /**
+   * Extract the set of global DoF indices that are owned by the current
+   * processor. For regular DoFHandler objects, this set is the complete set
+   * with all DoF indices. In either case, it equals what
+   * DoFHandler::locally_owned_dofs() returns.
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_locally_owned_dofs (const DoFHandlerType &dof_handler,
+                              IndexSet             &dof_set);
+
+
+  /**
+   * Extract the set of global DoF indices that are active on the current
+   * DoFHandler. For regular DoFHandlers, these are all DoF indices, but for
+   * DoFHandler objects built on parallel::distributed::Triangulation this set
+   * is a superset of DoFHandler::locally_owned_dofs() and contains all DoF
+   * indices that live on all locally owned cells (including on the interface
+   * to ghost cells). However, it does not contain the DoF indices that are
+   * exclusively defined on ghost or artificial cells (see
+   * @ref GlossArtificialCell "the glossary").
+   *
+   * The degrees of freedom identified by this function equal those obtained
+   * from the dof_indices_with_subdomain_association() function when called
+   * with the locally owned subdomain id.
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_locally_active_dofs (const DoFHandlerType &dof_handler,
+                               IndexSet             &dof_set);
+
+  /**
+   * Extract the set of global DoF indices that are active on the current
+   * DoFHandler. For regular DoFHandlers, these are all DoF indices, but for
+   * DoFHandler objects built on parallel::distributed::Triangulation this set
+   * is the union of DoFHandler::locally_owned_dofs() and the DoF indices on
+   * all ghost cells. In essence, it is the DoF indices on all cells that are
+   * not artificial (see
+   * @ref GlossArtificialCell "the glossary").
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_locally_relevant_dofs (const DoFHandlerType &dof_handler,
+                                 IndexSet             &dof_set);
+
+  /**
+   *
+   * For each processor, determine the set of locally owned degrees of freedom
+   * as an IndexSet. This function then returns a vector of index sets, where
+   * the vector has size equal to the number of MPI processes that participate
+   * in the DoF handler object.
+   *
+   * The function can be used for objects of type dealii::Triangulation or
+   * parallel::shared::Triangulation. It will not work for objects of type
+   * parallel::distributed::Triangulation since for such triangulations we do
+   * not have information about all cells of the triangulation available
+   * locally, and consequently can not say anything definitive about the
+   * degrees of freedom active on other processors' locally owned cells.
+   *
+   * @author Denis Davydov, 2015
+   */
+  template <typename DoFHandlerType>
+  std::vector<IndexSet>
+  locally_owned_dofs_per_subdomain (const DoFHandlerType &dof_handler);
+
+  /**
+   *
+   * For each processor, determine the set of locally relevant degrees of
+   * freedom as an IndexSet. This function then returns a vector of index
+   * sets, where the vector has size equal to the number of MPI processes that
+   * participate in the DoF handler object.
+   *
+   * The function can be used for objects of type dealii::Triangulation or
+   * parallel::shared::Triangulation. It will not work for objects of type
+   * parallel::distributed::Triangulation since for such triangulations we do
+   * not have information about all cells of the triangulation available
+   * locally, and consequently can not say anything definitive about the
+   * degrees of freedom active on other processors' locally owned cells.
+   *
+   * @author Jean-Paul Pelteret, 2015
+   */
+  template <typename DoFHandlerType>
+  std::vector<IndexSet>
+  locally_relevant_dofs_per_subdomain (const DoFHandlerType &dof_handler);
+
+
+  /**
+   * Same as extract_locally_relevant_dofs() but for multigrid DoFs for the
+   * given @p level.
+   */
+  template <typename DoFHandlerType>
+  void
+  extract_locally_relevant_level_dofs (const DoFHandlerType &dof_handler,
+                                       const unsigned int    level,
+                                       IndexSet             &dof_set);
+
+
+  /**
+   * For each degree of freedom, return in the output array to which subdomain
+   * (as given by the <tt>cell->subdomain_id()</tt> function) it belongs. The
+   * output array is supposed to have the right size already when calling this
+   * function.
+   *
+   * Note that degrees of freedom associated with faces, edges, and vertices
+   * may be associated with multiple subdomains if they are sitting on
+   * partition boundaries. In these cases, we put them into one of the
+   * associated partitions in an undefined way. This may sometimes lead to
+   * different numbers of degrees of freedom in partitions, even if the number
+   * of cells is perfectly equidistributed. While this is regrettable, it is
+   * not a problem in practice since the number of degrees of freedom on
+   * partition boundaries is asymptotically vanishing as we refine the mesh as
+   * long as the number of partitions is kept constant.
+   *
+   * This function returns the association of each DoF with one subdomain. If
+   * you are looking for the association of each @em cell with a subdomain,
+   * either query the <tt>cell->subdomain_id()</tt> function, or use the
+   * <tt>GridTools::get_subdomain_association</tt> function.
+   *
+   * Note that this function is of questionable use for DoFHandler objects
+   * built on parallel::distributed::Triangulation since in that case
+   * ownership of individual degrees of freedom by MPI processes is controlled
+   * by the DoF handler object, not based on some geometric algorithm in
+   * conjunction with subdomain id. In particular, the degrees of freedom
+   * identified by the functions in this namespace as associated with a
+   * subdomain are not the same the DoFHandler class identifies as those it
+   * owns.
+   */
+  template <typename DoFHandlerType>
+  void
+  get_subdomain_association (const DoFHandlerType             &dof_handler,
+                             std::vector<types::subdomain_id> &subdomain);
+
+  /**
+   * Count how many degrees of freedom are uniquely associated with the given
+   * @p subdomain index.
+   *
+   * Note that there may be rare cases where cells with the given @p subdomain
+   * index exist, but none of its degrees of freedom are actually associated
+   * with it. In that case, the returned value will be zero.
+   *
+   * This function will generate an exception if there are no cells with the
+   * given @p subdomain index.
+   *
+   * This function returns the number of DoFs associated with one subdomain.
+   * If you are looking for the association of @em cells with this subdomain,
+   * use the <tt>GridTools::count_cells_with_subdomain_association</tt>
+   * function.
+   *
+   * Note that this function is of questionable use for DoFHandler objects
+   * built on parallel::distributed::Triangulation since in that case
+   * ownership of individual degrees of freedom by MPI processes is controlled
+   * by the DoF handler object, not based on some geometric algorithm in
+   * conjunction with subdomain id. In particular, the degrees of freedom
+   * identified by the functions in this namespace as associated with a
+   * subdomain are not the same the DoFHandler class identifies as those it
+   * owns.
+   */
+  template <typename DoFHandlerType>
+  unsigned int
+  count_dofs_with_subdomain_association (const DoFHandlerType      &dof_handler,
+                                         const types::subdomain_id  subdomain);
+
+  /**
+   * Count how many degrees of freedom are uniquely associated with the given
+   * @p subdomain index.
+   *
+   * This function does what the previous one does except that it splits the
+   * result among the vector components of the finite element in use by the
+   * DoFHandler object. The last argument (which must have a length equal to
+   * the number of vector components) will therefore store how many degrees of
+   * freedom of each vector component are associated with the given subdomain.
+   *
+   * Note that this function is of questionable use for DoFHandler objects
+   * built on parallel::distributed::Triangulation since in that case
+   * ownership of individual degrees of freedom by MPI processes is controlled
+   * by the DoF handler object, not based on some geometric algorithm in
+   * conjunction with subdomain id. In particular, the degrees of freedom
+   * identified by the functions in this namespace as associated with a
+   * subdomain are not the same the DoFHandler class identifies as those it
+   * owns.
+   */
+  template <typename DoFHandlerType>
+  void
+  count_dofs_with_subdomain_association (const DoFHandlerType      &dof_handler,
+                                         const types::subdomain_id  subdomain,
+                                         std::vector<unsigned int> &n_dofs_on_subdomain);
+
+  /**
+   * Return a set of indices that denotes the degrees of freedom that live on
+   * the given subdomain, i.e. that are on cells owned by the current
+   * processor. Note that this includes the ones that this subdomain "owns"
+   * (i.e. the ones for which get_subdomain_association() returns a value
+   * equal to the subdomain given here and that are selected by the
+   * extract_locally_owned_dofs() function) but also all of those that sit on
+   * the boundary between the given subdomain and other subdomain. In essence,
+   * degrees of freedom that sit on boundaries between subdomain will be in
+   * the index sets returned by this function for more than one subdomain.
+   *
+   * Note that this function is of questionable use for DoFHandler objects
+   * built on parallel::distributed::Triangulation since in that case
+   * ownership of individual degrees of freedom by MPI processes is controlled
+   * by the DoF handler object, not based on some geometric algorithm in
+   * conjunction with subdomain id. In particular, the degrees of freedom
+   * identified by the functions in this namespace as associated with a
+   * subdomain are not the same the DoFHandler class identifies as those it
+   * owns.
+   */
+  template <typename DoFHandlerType>
+  IndexSet
+  dof_indices_with_subdomain_association (const DoFHandlerType &dof_handler,
+                                          const types::subdomain_id subdomain);
+  // @}
+  /**
+   * @name DoF indices on patches of cells
+   *
+   * Create structures containing a large set of degrees of freedom for small
+   * patches of cells. The resulting objects can be used in RelaxationBlockSOR
+   * and related classes to implement Schwarz preconditioners and smoothers,
+   * where the subdomains consist of small numbers of cells only.
+   */
+  //@{
+  /**
+   * Create an incidence matrix that for every cell on a given level of a
+   * multilevel DoFHandler flags which degrees of freedom are associated with
+   * the corresponding cell. This data structure is a matrix with as many rows
+   * as there are cells on a given level, as many columns as there are degrees
+   * of freedom on this level, and entries that are either true or false. This
+   * data structure is conveniently represented by a SparsityPattern object.
+   *
+   * @note The ordering of rows (cells) follows the ordering of the standard
+   * cell iterators.
+   */
+  template <typename DoFHandlerType, class Sparsity>
+  void make_cell_patches(Sparsity                &block_list,
+                         const DoFHandlerType    &dof_handler,
+                         const unsigned int       level,
+                         const std::vector<bool> &selected_dofs = std::vector<bool>(),
+                         types::global_dof_index  offset        = 0);
+
+  /**
+   * Create an incidence matrix that for every vertex on a given level of a
+   * multilevel DoFHandler flags which degrees of freedom are associated with
+   * the adjacent cells. This data structure is a matrix with as many rows as
+   * there are vertices on a given level, as many columns as there are degrees
+   * of freedom on this level, and entries that are either true or false. This
+   * data structure is conveniently represented by a SparsityPattern object.
+   * The sparsity pattern may be empty when entering this function and will be
+   * reinitialized to the correct size.
+   *
+   * The function has some boolean arguments (listed below) controlling
+   * details of the generated patches. The default settings are those for
+   * Arnold-Falk-Winther type smoothers for divergence and curl conforming
+   * finite elements with essential boundary conditions. Other applications
+   * are possible, in particular changing <tt>boundary_patches</tt> for non-
+   * essential boundary conditions.
+   *
+   * @arg <tt>block_list</tt>: the SparsityPattern into which the patches will
+   * be stored.
+   *
+   * @arg <tt>dof_handler</tt>: The multilevel dof handler providing the
+   * topology operated on.
+   *
+   * @arg <tt>interior_dofs_only</tt>: for each patch of cells around a
+   * vertex, collect only the interior degrees of freedom of the patch and
+   * disregard those on the boundary of the patch. This is for instance the
+   * setting for smoothers of Arnold-Falk-Winther type.
+   *
+   * @arg <tt>boundary_patches</tt>: include patches around vertices at the
+   * boundary of the domain. If not, only patches around interior vertices
+   * will be generated.
+   *
+   * @arg <tt>level_boundary_patches</tt>: same for refinement edges towards
+   * coarser cells.
+   *
+   * @arg <tt>single_cell_patches</tt>: if not true, patches containing a
+   * single cell are eliminated.
+   */
+  template <typename DoFHandlerType>
+  void make_vertex_patches(SparsityPattern      &block_list,
+                           const DoFHandlerType &dof_handler,
+                           const unsigned int    level,
+                           const bool            interior_dofs_only,
+                           const bool            boundary_patches       = false,
+                           const bool            level_boundary_patches = false,
+                           const bool            single_cell_patches    = false);
+
+  /**
+   * Create an incidence matrix that for every cell on a given level of a
+   * multilevel DoFHandler flags which degrees of freedom are associated with
+   * children of this cell. This data structure is conveniently represented by
+   * a SparsityPattern object.
+   *
+   * The function thus creates a sparsity pattern which in each row (with rows
+   * corresponding to the cells on this level) lists the degrees of freedom
+   * associated to the cells that are the children of this cell. The DoF
+   * indices used here are level dof indices of a multilevel hierarchy, i.e.,
+   * they may be associated with children that are not themselves active. The
+   * sparsity pattern may be empty when entering this function and will be
+   * reinitialized to the correct size.
+   *
+   * The function has some boolean arguments (listed below) controlling
+   * details of the generated patches. The default settings are those for
+   * Arnold-Falk-Winther type smoothers for divergence and curl conforming
+   * finite elements with essential boundary conditions. Other applications
+   * are possible, in particular changing <tt>boundary_dofs</tt> for non-
+   * essential boundary conditions.
+   *
+   * @arg <tt>block_list</tt>: the SparsityPattern into which the patches will
+   * be stored.
+   *
+   * @arg <tt>dof_handler</tt>: The multilevel dof handler providing the
+   * topology operated on.
+   *
+   * @arg <tt>interior_dofs_only</tt>: for each patch of cells around a
+   * vertex, collect only the interior degrees of freedom of the patch and
+   * disregard those on the boundary of the patch. This is for instance the
+   * setting for smoothers of Arnold-Falk-Winther type.
+   *
+   * @arg <tt>boundary_dofs</tt>: include degrees of freedom, which would have
+   * excluded by <tt>interior_dofs_only</tt>, but are lying on the boundary of
+   * the domain, and thus need smoothing. This parameter has no effect if
+   * <tt>interior_dofs_only</tt> is false.
+   */
+  template <typename DoFHandlerType>
+  void make_child_patches(SparsityPattern      &block_list,
+                          const DoFHandlerType &dof_handler,
+                          const unsigned int    level,
+                          const bool            interior_dofs_only,
+                          const bool            boundary_dofs = false);
+
+  /**
+   * Create a block list with only a single patch, which in turn contains all
+   * degrees of freedom on the given level.
+   *
+   * This function is mostly a closure on level 0 for functions like
+   * make_child_patches() and make_vertex_patches(), which may produce an
+   * empty patch list.
+   *
+   * @arg <tt>block_list</tt>: the SparsityPattern into which the patches will
+   * be stored.
+   *
+   * @arg <tt>dof_handler</tt>: The multilevel dof handler providing the
+   * topology operated on.
+   *
+   * @arg <tt>level</tt> The grid level used for building the list.
+   *
+   * @arg <tt>interior_dofs_only</tt>: if true, exclude degrees of freedom on
+   * the boundary of the domain.
+   */
+  template <typename DoFHandlerType>
+  void make_single_patch(SparsityPattern      &block_list,
+                         const DoFHandlerType &dof_handler,
+                         const unsigned int    level,
+                         const bool            interior_dofs_only = false);
+
+  /**
+   * @}
+   */
+  /**
+   * @name Counting degrees of freedom and related functions
+   * @{
+   */
+
+  /**
+   * Count how many degrees of freedom out of the total number belong to each
+   * component. If the number of components the finite element has is one
+   * (i.e. you only have one scalar variable), then the number in this
+   * component obviously equals the total number of degrees of freedom.
+   * Otherwise, the sum of the DoFs in all the components needs to equal the
+   * total number.
+   *
+   * However, the last statement does not hold true if the finite element is
+   * not primitive, i.e. some or all of its shape functions are non-zero in
+   * more than one vector component. This applies, for example, to the Nedelec
+   * or Raviart-Thomas elements. In this case, a degree of freedom is counted
+   * in each component in which it is non-zero, so that the sum mentioned
+   * above is greater than the total number of degrees of freedom.
+   *
+   * This behavior can be switched off by the optional parameter
+   * <tt>vector_valued_once</tt>. If this is <tt>true</tt>, the number of
+   * components of a nonprimitive vector valued element is collected only in
+   * the first component. All other components will have a count of zero.
+   *
+   * The additional optional argument @p target_component allows for a re-
+   * sorting and grouping of components. To this end, it contains for each
+   * component the component number it shall be counted as. Having the same
+   * number entered several times sums up several components as the same. One
+   * of the applications of this argument is when you want to form block
+   * matrices and vectors, but want to pack several components into the same
+   * block (for example, when you have @p dim velocities and one pressure, to
+   * put all velocities into one block, and the pressure into another).
+   *
+   * The result is returned in @p dofs_per_component. Note that the size of @p
+   * dofs_per_component needs to be enough to hold all the indices specified
+   * in @p target_component. If this is not the case, an assertion is thrown.
+   * The indices not targeted by target_components are left untouched.
+   */
+  template <typename DoFHandlerType>
+  void
+  count_dofs_per_component (const DoFHandlerType                 &dof_handler,
+                            std::vector<types::global_dof_index> &dofs_per_component,
+                            const bool                            vector_valued_once = false,
+                            std::vector<unsigned int> target_component
+                            = std::vector<unsigned int>());
+
+  /**
+   * Count the degrees of freedom in each block. This function is similar to
+   * count_dofs_per_component(), with the difference that the counting is done
+   * by blocks. See
+   * @ref GlossBlock "blocks"
+   * in the glossary for details. Again the vectors are assumed to have the
+   * correct size before calling this function. If this is not the case, an
+   * assertion is thrown.
+   *
+   * This function is used in the step-22, step-31, and step-32 tutorial
+   * programs.
+   *
+   * @pre The dofs_per_block variable has as many components as the finite
+   * element used by the dof_handler argument has blocks, or alternatively as
+   * many blocks as are enumerated in the target_blocks argument if given.
+   */
+  template <typename DoFHandlerType>
+  void
+  count_dofs_per_block (const DoFHandlerType                 &dof,
+                        std::vector<types::global_dof_index> &dofs_per_block,
+                        const std::vector<unsigned int>  &target_block
+                        = std::vector<unsigned int>());
+
+  /**
+   * For each active cell of a DoFHandler or hp::DoFHandler, extract the
+   * active finite element index and fill the vector given as second argument.
+   * This vector is assumed to have as many entries as there are active cells.
+   *
+   * For non-hp DoFHandler objects given as first argument, the returned
+   * vector will consist of only zeros, indicating that all cells use the same
+   * finite element. For a hp::DoFHandler, the values may be different,
+   * though.
+   */
+  template <typename DoFHandlerType>
+  void
+  get_active_fe_indices (const DoFHandlerType      &dof_handler,
+                         std::vector<unsigned int> &active_fe_indices);
+
+  /**
+   * Count how many degrees of freedom live on a set of cells (i.e., a patch)
+   * described by the argument.
+   *
+   * Patches are often used in defining error estimators that require the
+   * solution of a local problem on the patch surrounding each of the cells of
+   * the mesh. You can get a list of cells that form the patch around a given
+   * cell using GridTools::get_patch_around_cell(). This function is then
+   * useful in setting up the size of the linear system used to solve the
+   * local problem on the patch around a cell. The function
+   * DoFTools::get_dofs_on_patch() will then help to make the connection
+   * between global degrees of freedom and the local ones.
+   *
+   * @tparam DoFHandlerType A type that is either DoFHandler or
+   * hp::DoFHandler. In C++, the compiler can not determine the type of
+   * <code>DoFHandlerType</code> from the function call. You need to specify
+   * it as an explicit template argument following the function name.
+   *
+   * @param patch A collection of cells within an object of type
+   * DoFHandlerType
+   *
+   * @return The number of degrees of freedom associated with the cells of
+   * this patch.
+   *
+   * @note In the context of a parallel distributed computation, it only makes
+   * sense to call this function on patches around locally owned cells. This
+   * is because the neighbors of locally owned cells are either locally owned
+   * themselves, or ghost cells. For both, we know that these are in fact the
+   * real cells of the complete, parallel triangulation. We can also query the
+   * degrees of freedom on these. In other words, this function can only work
+   * if all cells in the patch are either locally owned or ghost cells.
+   *
+   * @author Arezou Ghesmati, Wolfgang Bangerth, 2014
+   */
+  template <typename DoFHandlerType>
+  unsigned int
+  count_dofs_on_patch (const std::vector<typename DoFHandlerType::active_cell_iterator> &patch);
+
+  /**
+   * Return the set of degrees of freedom that live on a set of cells (i.e., a
+   * patch) described by the argument.
+   *
+   * Patches are often used in defining error estimators that require the
+   * solution of a local problem on the patch surrounding each of the cells of
+   * the mesh. You can get a list of cells that form the patch around a given
+   * cell using GridTools::get_patch_around_cell(). While
+   * DoFTools::count_dofs_on_patch() can be used to determine the size of
+   * these local problems, so that one can assemble the local system and then
+   * solve it, it is still necessary to provide a mapping between the global
+   * indices of the degrees of freedom that live on the patch and a local
+   * enumeration. This function provides such a local enumeration by returning
+   * the set of degrees of freedom that live on the patch.
+   *
+   * Since this set is returned in the form of a std::vector, one can also
+   * think of it as a mapping
+   * @code
+   *   i -> global_dof_index
+   * @endcode
+   * where <code>i</code> is an index into the returned vector (i.e., a the
+   * <i>local</i> index of a degree of freedom on the patch) and
+   * <code>global_dof_index</code> is the global index of a degree of freedom
+   * located on the patch. The array returned has size equal to
+   * DoFTools::count_dofs_on_patch().
+   *
+   * @note The array returned is sorted by global DoF index. Consequently, if
+   * one considers the index into this array a local DoF index, then the local
+   * system that results retains the block structure of the global system.
+   *
+   * @tparam DoFHandlerType A type that is either DoFHandler or
+   * hp::DoFHandler. In C++, the compiler can not determine the type of
+   * <code>DoFHandlerType</code> from the function call. You need to specify
+   * it as an explicit template argument following the function name.
+   *
+   * @param patch A collection of cells within an object of type
+   * DoFHandlerType
+   *
+   * @return A list of those global degrees of freedom located on the patch,
+   * as defined above.
+   *
+   * @note In the context of a parallel distributed computation, it only makes
+   * sense to call this function on patches around locally owned cells. This
+   * is because the neighbors of locally owned cells are either locally owned
+   * themselves, or ghost cells. For both, we know that these are in fact the
+   * real cells of the complete, parallel triangulation. We can also query the
+   * degrees of freedom on these. In other words, this function can only work
+   * if all cells in the patch are either locally owned or ghost cells.
+   *
+   * @author Arezou Ghesmati, Wolfgang Bangerth, 2014
+   */
+  template <typename DoFHandlerType>
+  std::vector<types::global_dof_index>
+  get_dofs_on_patch (const std::vector<typename DoFHandlerType::active_cell_iterator> &patch);
+
+  /**
+   * @}
+   */
+
+  /**
+   * Create a mapping from degree of freedom indices to the index of that
+   * degree of freedom on the boundary. After this operation,
+   * <tt>mapping[dof]</tt> gives the index of the degree of freedom with
+   * global number @p dof in the list of degrees of freedom on the boundary.
+   * If the degree of freedom requested is not on the boundary, the value of
+   * <tt>mapping[dof]</tt> is @p invalid_dof_index. This function is mainly
+   * used when setting up matrices and vectors on the boundary from the trial
+   * functions, which have global numbers, while the matrices and vectors use
+   * numbers of the trial functions local to the boundary.
+   *
+   * Prior content of @p mapping is deleted.
+   */
+  template <typename DoFHandlerType>
+  void
+  map_dof_to_boundary_indices (const DoFHandlerType                 &dof_handler,
+                               std::vector<types::global_dof_index> &mapping);
+
+  /**
+   * Same as the previous function, except that only those parts of the
+   * boundary are considered for which the boundary indicator is listed in the
+   * second argument.
+   *
+   * See the general doc of this class for more information.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  map_dof_to_boundary_indices (const DoFHandlerType                 &dof_handler,
+                               const std::set<types::boundary_id>   &boundary_ids,
+                               std::vector<types::global_dof_index> &mapping);
+
+  /**
+   * Return a list of support points (see this
+   * @ref GlossSupport "glossary entry")
+   * for all the degrees of freedom handled by this DoF handler object. This
+   * function, of course, only works if the finite element object used by the
+   * DoF handler object actually provides support points, i.e. no edge
+   * elements or the like. Otherwise, an exception is thrown.
+   *
+   * @pre The given array must have a length of as many elements as there are
+   * degrees of freedom.
+   *
+   * @note The precondition to this function that the output argument needs to
+   * have size equal to the total number of degrees of freedom makes this
+   * function unsuitable for the case that the given DoFHandler object derives
+   * from a parallel::distributed::Triangulation object.  Consequently, this
+   * function will produce an error if called with such a DoFHandler.
+   */
+  template <int dim, int spacedim>
+  void
+  map_dofs_to_support_points (const Mapping<dim,spacedim>       &mapping,
+                              const DoFHandler<dim,spacedim>    &dof_handler,
+                              std::vector<Point<spacedim> >     &support_points);
+
+  /**
+   * Same as the previous function but for the hp case.
+   */
+
+  template <int dim, int spacedim>
+  void
+  map_dofs_to_support_points (const dealii::hp::MappingCollection<dim,spacedim>   &mapping,
+                              const hp::DoFHandler<dim,spacedim>    &dof_handler,
+                              std::vector<Point<spacedim> > &support_points);
+
+  /**
+   * This function is a version of the above map_dofs_to_support_points
+   * function that doesn't simply return a vector of support points (see this
+   * @ref GlossSupport "glossary entry")
+   * with one entry for each global degree of freedom, but instead a map that
+   * maps from the DoFs index to its location. The point of this function is
+   * that it is also usable in cases where the DoFHandler is based on a
+   * parallel::distributed::Triangulation object. In such cases, each
+   * processor will not be able to determine the support point location of all
+   * DoFs, and worse no processor may be able to hold a vector that would
+   * contain the locations of all DoFs even if they were known. As a
+   * consequence, this function constructs a map from those DoFs for which we
+   * can know the locations (namely, those DoFs that are locally relevant (see
+   * @ref GlossLocallyRelevantDof "locally relevant DoFs")
+   * to their locations.
+   *
+   * For non-distributed triangulations, the map returned as @p support_points
+   * is of course dense, i.e., every DoF is to be found in it.
+   *
+   * @param mapping The mapping from the reference cell to the real cell on
+   * which DoFs are defined.
+   * @param dof_handler The object that describes which DoF indices live on
+   * which cell of the triangulation.
+   * @param support_points A map that for every locally relevant DoF index
+   * contains the corresponding location in real space coordinates. Previous
+   * content of this object is deleted in this function.
+   */
+  template <int dim, int spacedim>
+  void
+  map_dofs_to_support_points (const Mapping<dim,spacedim>       &mapping,
+                              const DoFHandler<dim,spacedim>    &dof_handler,
+                              std::map<types::global_dof_index, Point<spacedim> >     &support_points);
+
+  /**
+   * Same as the previous function but for the hp case.
+   */
+  template <int dim, int spacedim>
+  void
+  map_dofs_to_support_points (const dealii::hp::MappingCollection<dim,spacedim>   &mapping,
+                              const hp::DoFHandler<dim,spacedim>    &dof_handler,
+                              std::map<types::global_dof_index, Point<spacedim> > &support_points);
+
+
+  /**
+   * This is the opposite function to the one above. It generates a map where
+   * the keys are the support points of the degrees of freedom, while the
+   * values are the DoF indices. For a definition of support points, see this
+   * @ref GlossSupport "glossary entry".
+   *
+   * Since there is no natural order in the space of points (except for the 1d
+   * case), you have to provide a map with an explicitly specified comparator
+   * object. This function is therefore templatized on the comparator object.
+   * Previous content of the map object is deleted in this function.
+   *
+   * Just as with the function above, it is assumed that the finite element in
+   * use here actually supports the notion of support points of all its
+   * components.
+   */
+  template <typename DoFHandlerType, class Comp>
+  void
+  map_support_points_to_dofs
+  (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension>       &mapping,
+   const DoFHandlerType                                                            &dof_handler,
+   std::map<Point<DoFHandlerType::space_dimension>, types::global_dof_index, Comp> &point_to_index_map);
+
+  /**
+   * Map a coupling table from the user friendly organization by components to
+   * the organization by blocks. Specializations of this function for
+   * DoFHandler and hp::DoFHandler are required due to the different results
+   * of their finite element access.
+   *
+   * The return vector will be initialized to the correct length inside this
+   * function.
+   */
+  template <int dim, int spacedim>
+  void
+  convert_couplings_to_blocks (const hp::DoFHandler<dim,spacedim> &dof_handler,
+                               const Table<2, Coupling> &table_by_component,
+                               std::vector<Table<2,Coupling> > &tables_by_block);
+
+  /**
+   * Make a constraint matrix for the constraints that result from zero
+   * boundary values on the given boundary indicator.
+   *
+   * This function constrains all degrees of freedom on the given part of the
+   * boundary.
+   *
+   * A variant of this function with different arguments is used in step-36.
+   *
+   * @param dof The DoFHandler to work on.
+   * @param boundary_id The indicator of that part of the boundary for which
+   * constraints should be computed. If this number equals
+   * numbers::invalid_boundary_id then all boundaries of the domain will be
+   * treated.
+   * @param zero_boundary_constraints The constraint object into which the
+   * constraints will be written. The new constraints due to zero boundary
+   * values will simply be added, preserving any other constraints previously
+   * present. However, this will only work if the previous content of that
+   * object consists of constraints on degrees of freedom that are not located
+   * on the boundary treated here. If there are previously existing
+   * constraints for degrees of freedom located on the boundary, then this
+   * would constitute a conflict. See the
+   * @ref constraints
+   * module for handling the case where there are conflicting constraints on
+   * individual degrees of freedom.
+   * @param component_mask An optional component mask that restricts the
+   * functionality of this function to a subset of an FESystem. For non-
+   * @ref GlossPrimitive "primitive"
+   * shape functions, any degree of freedom is affected that belongs to a
+   * shape function where at least one of its nonzero components is affected
+   * by the component mask (see
+   * @ref GlossComponentMask).
+   * If this argument is omitted, all components of the finite element with
+   * degrees of freedom at the boundary will be considered.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, int spacedim, template <int, int> class DoFHandlerType>
+  void
+  make_zero_boundary_constraints (const DoFHandlerType<dim,spacedim> &dof,
+                                  const types::boundary_id            boundary_id,
+                                  ConstraintMatrix                   &zero_boundary_constraints,
+                                  const ComponentMask                &component_mask = ComponentMask());
+
+  /**
+   * Do the same as the previous function, except do it for all parts of the
+   * boundary, not just those with a particular boundary indicator. This
+   * function is then equivalent to calling the previous one with
+   * numbers::invalid_boundary_id as second argument.
+   *
+   * This function is used in step-36, for example.
+   *
+   * @ingroup constraints
+   */
+  template <int dim, int spacedim, template <int, int> class DoFHandlerType>
+  void
+  make_zero_boundary_constraints (const DoFHandlerType<dim,spacedim> &dof,
+                                  ConstraintMatrix                   &zero_boundary_constraints,
+                                  const ComponentMask                &component_mask = ComponentMask());
+
+
+  /**
+   * Map a coupling table from the user friendly organization by components to
+   * the organization by blocks. Specializations of this function for
+   * DoFHandler and hp::DoFHandler are required due to the different results
+   * of their finite element access.
+   *
+   * The return vector will be initialized to the correct length inside this
+   * function.
+   */
+  template <int dim, int spacedim>
+  void
+  convert_couplings_to_blocks (const DoFHandler<dim,spacedim> &dof_handler,
+                               const Table<2, Coupling> &table_by_component,
+                               std::vector<Table<2,Coupling> > &tables_by_block);
+
+  /**
+   * Given a finite element and a table how the vector components of it couple
+   * with each other, compute and return a table that describes how the
+   * individual shape functions couple with each other.
+   */
+  template <int dim, int spacedim>
+  Table<2,Coupling>
+  dof_couplings_from_component_couplings (const FiniteElement<dim,spacedim> &fe,
+                                          const Table<2,Coupling> &component_couplings);
+
+  /**
+   * Same function as above for a collection of finite elements, returning a
+   * collection of tables.
+   *
+   * The function currently treats DoFTools::Couplings::nonzero the same as
+   * DoFTools::Couplings::always .
+   */
+  template <int dim, int spacedim>
+  std::vector<Table<2,Coupling> >
+  dof_couplings_from_component_couplings (const hp::FECollection<dim,spacedim> &fe,
+                                          const Table<2,Coupling> &component_couplings);
+  /**
+   * @todo Write description
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFiniteElementsDontMatch);
+  /**
+   * @todo Write description
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcGridNotCoarser);
+  /**
+   * @todo Write description
+   *
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcGridsDontMatch);
+  /**
+   * The ::DoFHandler or hp::DoFHandler was not initialized with a finite
+   * element. Please call DoFHandler::distribute_dofs() etc. first.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcNoFESelected);
+  /**
+   * @todo Write description
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInvalidBoundaryIndicator);
+}
+
+
+
+/* ------------------------- inline functions -------------- */
+
+#ifndef DOXYGEN
+
+namespace DoFTools
+{
+  /**
+   * Operator computing the maximum coupling out of two.
+   *
+   * @relates DoFTools
+   */
+  inline
+  Coupling operator |= (Coupling &c1,
+                        const Coupling c2)
+  {
+    if (c2 == always)
+      c1 = always;
+    else if (c1 != always && c2 == nonzero)
+      return c1 = nonzero;
+    return c1;
+  }
+
+
+  /**
+   * Operator computing the maximum coupling out of two.
+   *
+   * @relates DoFTools
+   */
+  inline
+  Coupling operator | (const Coupling c1,
+                       const Coupling c2)
+  {
+    if (c1 == always || c2 == always)
+      return always;
+    if (c1 == nonzero || c2 == nonzero)
+      return nonzero;
+    return none;
+  }
+
+
+// ---------------------- inline and template functions --------------------
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  max_dofs_per_cell (const DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().dofs_per_cell;
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  max_dofs_per_face (const DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().dofs_per_face;
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  max_dofs_per_vertex (const DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().dofs_per_vertex;
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  n_components (const DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().n_components();
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  bool
+  fe_is_primitive (const DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().is_primitive();
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  max_dofs_per_cell (const hp::DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().max_dofs_per_cell ();
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  max_dofs_per_face (const hp::DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().max_dofs_per_face ();
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  max_dofs_per_vertex (const hp::DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe().max_dofs_per_vertex ();
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  n_components (const hp::DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe()[0].n_components();
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  bool
+  fe_is_primitive (const hp::DoFHandler<dim,spacedim> &dh)
+  {
+    return dh.get_fe()[0].is_primitive();
+  }
+
+
+  template <typename DoFHandlerType, class Comp>
+  void
+  map_support_points_to_dofs
+  (
+    const Mapping<DoFHandlerType::dimension,DoFHandlerType::space_dimension>        &mapping,
+    const DoFHandlerType                                                            &dof_handler,
+    std::map<Point<DoFHandlerType::space_dimension>, types::global_dof_index, Comp> &point_to_index_map)
+  {
+    // let the checking of arguments be
+    // done by the function first
+    // called
+    std::vector<Point<DoFHandlerType::space_dimension> > support_points (dof_handler.n_dofs());
+    map_dofs_to_support_points (mapping, dof_handler, support_points);
+    // now copy over the results of the
+    // previous function into the
+    // output arg
+    point_to_index_map.clear ();
+    for (types::global_dof_index i=0; i<dof_handler.n_dofs(); ++i)
+      point_to_index_map[support_points[i]] = i;
+  }
+}
+
+#endif
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/function_map.h b/include/deal.II/dofs/function_map.h
new file mode 100644
index 0000000..5e8f240
--- /dev/null
+++ b/include/deal.II/dofs/function_map.h
@@ -0,0 +1,86 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__function_map_h
+#define dealii__function_map_h
+
+#include <deal.II/base/config.h>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int spacedim, typename Number> class Function;
+
+
+
+/**
+ * This class declares a local typedef that denotes a mapping between a
+ * boundary indicator (see
+ * @ref GlossBoundaryIndicator)
+ * that is used to describe what kind of boundary condition holds on a
+ * particular piece of the boundary, and the function describing the actual
+ * function that provides the boundary values on this part of the boundary.
+ * This type is required in many functions in the library where, for example,
+ * we need to know about the functions $h_i(\mathbf x)$ used in boundary
+ * conditions
+ * @f{align*}{
+ * \mathbf n \cdot \nabla u = h_i \qquad \qquad
+ * \text{on}\ \Gamma_i\subset\partial\Omega.
+ * @f}
+ * An example is the function KellyErrorEstimator::estimate() that allows us
+ * to provide a set of functions $h_i$ for all those boundary indicators $i$
+ * for which the boundary condition is supposed to be of Neumann type. Of
+ * course, the same kind of principle can be applied to cases where we care
+ * about Dirichlet values, where one needs to provide a map from boundary
+ * indicator $i$ to Dirichlet function $h_i$ if the boundary conditions are
+ * given as
+ * @f{align*}{
+ * u = h_i \qquad \qquad \text{on}\ \Gamma_i\subset\partial\Omega.
+ * @f}
+ * This is, for example, the case for the VectorTools::interpolate()
+ * functions.
+ *
+ * Tutorial programs step-6, step-7 and step-8 show examples of how to use
+ * function arguments of this type in situations where we actually have an
+ * empty map (i.e., we want to describe that <i>no</i> part of the boundary is
+ * a Neumann boundary). step-16 actually uses it in a case where one of the
+ * parts of the boundary uses a boundary indicator for which we want to use a
+ * function object.
+ *
+ * It seems odd at first to declare this typedef inside a class, rather than
+ * declaring a typedef at global scope. The reason is that C++ does not allow
+ * to define templated typedefs, where here in fact we want a typedef that
+ * depends on the space dimension. (Defining templated typedefs is something
+ * that is possible starting with the C++11 standard, but that wasn't possible
+ * within the C++98 standard in place when this programming pattern was
+ * conceived.)
+ *
+ * @ingroup functions
+ * @author Wolfgang Bangerth, Ralf Hartmann, 2001
+ */
+template<int dim,typename Number=double>
+struct FunctionMap
+{
+  /**
+   * Declare the type as discussed above. Since we can't name it FunctionMap
+   * (as that would ambiguate a possible constructor of this class), name it
+   * in the fashion of the standard container local typedefs.
+   */
+  typedef std::map<types::boundary_id, const Function<dim,Number>*> type;
+};
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/dofs/number_cache.h b/include/deal.II/dofs/number_cache.h
new file mode 100644
index 0000000..c101dfe
--- /dev/null
+++ b/include/deal.II/dofs/number_cache.h
@@ -0,0 +1,113 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__number_cache_h
+#define dealii__number_cache_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/index_set.h>
+
+#include <vector>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    /**
+     * A structure used by the DoFHandler classes to store information about
+     * the degrees of freedom they deal with.
+     */
+    struct NumberCache
+    {
+      /**
+       * Default constructor.
+       */
+      NumberCache ();
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * This function resets all the stored information.
+       */
+      void clear ();
+
+      /**
+       * Total number of dofs, accumulated over all processors that may
+       * participate on this mesh.
+       */
+      types::global_dof_index n_global_dofs;
+
+      /**
+       * Number of dofs owned by this MPI process. If this is a sequential
+       * computation, then this equals n_global_dofs.
+       */
+      types::global_dof_index n_locally_owned_dofs;
+
+      /**
+       * An index set denoting the set of locally owned dofs. If this is a
+       * sequential computation, then it contains the entire range
+       * [0,n_global_dofs).
+       */
+      IndexSet locally_owned_dofs;
+
+      /**
+       * The number of dofs owned by each of the various MPI processes. If
+       * this is a sequential job, then the vector contains a single element
+       * equal to n_global_dofs.
+       */
+      std::vector<types::global_dof_index> n_locally_owned_dofs_per_processor;
+
+      /**
+       * The dofs owned by each of the various MPI processes. If this is a
+       * sequential job, then the vector has a single element equal to
+       * locally_owned_dofs.
+       */
+      std::vector<IndexSet> locally_owned_dofs_per_processor;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize (Archive &ar,
+                      const unsigned int version);
+    };
+
+
+    template <class Archive>
+    void
+    NumberCache::serialize (Archive &ar,
+                            const unsigned int /*version*/)
+    {
+      ar &n_global_dofs &n_locally_owned_dofs;
+      ar &locally_owned_dofs;
+      ar &n_locally_owned_dofs_per_processor;
+      ar &locally_owned_dofs_per_processor;
+    }
+
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__dof_iterator_selector_h
diff --git a/include/deal.II/fe/block_mask.h b/include/deal.II/fe/block_mask.h
new file mode 100644
index 0000000..33b7972
--- /dev/null
+++ b/include/deal.II/fe/block_mask.h
@@ -0,0 +1,413 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_block_mask_h
+#define dealii__fe_block_mask_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/memory_consumption.h>
+
+#include <vector>
+#include <iosfwd>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/**
+ * This class represents a mask that can be used to select individual vector
+ * blocks of a finite element (see also
+ * @ref GlossBlockMask "this glossary entry").
+ * It will typically have as many elements as the finite element has blocks,
+ * and one can use <code>operator[]</code> to query whether a particular block
+ * has been selected.
+ *
+ * The semantics of this class are the same as the related ComponentMask
+ * class, i.e., a default constructed mask represents all possible blocks. See
+ * there for more information about these semantics.
+ *
+ * Objects of this kind are used in many places where one wants to restrict
+ * operations to a certain subset of blocks, e.g. in DoFTools::extract_dofs.
+ * These objects can either be created by hand, or, simpler, by asking the
+ * finite element to generate a block mask from certain selected blocks using
+ * code such as this where we create a mask that only denotes the velocity
+ * block of a Stokes element (see
+ * @ref vector_valued):
+ * @code
+ *   FESystem<dim> stokes_fe (FESystem<dim>(FE_Q<dim>(2), dim), 1,    // Q2 element for the velocities
+ *                            FE_Q<dim>(1),                     1);     // Q1 element for the pressure
+ *   FEValuesExtractors::Scalar pressure(dim);
+ *   BlockMask pressure_mask = stokes_fe.block_mask (pressure);
+ * @endcode
+ * Note that by wrapping the velocity elements into a single FESystem object
+ * we make sure that the overall element has only 2 blocks. The result is a
+ * block mask that, in both 2d and 3d, would have values <code>[false,
+ * true]</code>. (Compare this to the corresponding component mask discussed
+ * in the ComponentMask documentation.) Similarly, using
+ * @code
+ *   FEValuesExtractors::Vector velocities(0);
+ *   BlockMask velocity_mask = stokes_fe.block_mask (velocities);
+ * @endcode
+ * would result in a mask <code>[true, false]</code> in both 2d and 3d.
+ *
+ * @ingroup fe
+ * @author Wolfgang Bangerth
+ * @date 2012
+ * @ingroup vector_valued
+ */
+class BlockMask
+{
+public:
+  /**
+   * Initialize a block mask. The default is that a block mask represents a
+   * set of blocks that are <i>all</i> selected, i.e., calling this
+   * constructor results in a block mask that always returns <code>true</code>
+   * whenever asked whether a block is selected.
+   */
+  BlockMask ();
+
+  /**
+   * Initialize an object of this type with a set of selected blocks specified
+   * by the argument.
+   *
+   * @param block_mask A vector of <code>true/false</code> entries that
+   * determine which blocks of a finite element are selected. If the length of
+   * the given vector is zero, then this interpreted as the case where
+   * <i>every</i> block is selected.
+   */
+  BlockMask (const std::vector<bool> &block_mask);
+
+  /**
+   * Initialize the block mask with a number of elements that are either all
+   * true or false.
+   *
+   * @param n_blocks The number of elements of this mask
+   * @param initializer The value each of these elements is supposed to have:
+   * either true or false.
+   */
+  BlockMask (const unsigned int n_blocks,
+             const bool         initializer);
+
+  /**
+   * If this block mask has been initialized with a mask of size greater than
+   * zero, then return the size of the mask represented by this object. On the
+   * other hand, if this mask has been initialized as an empty object that
+   * represents a mask that is true for every element (i.e., if this object
+   * would return true when calling represents_the_all_selected_mask()) then
+   * return zero since no definite size is known.
+   */
+  unsigned int size () const;
+
+  /**
+   * Return whether a particular block is selected by this mask. If this mask
+   * represents the case of an object that selects <i>all blocks</i> (e.g. if
+   * it is created using the default constructor or is converted from an empty
+   * vector of type bool) then this function returns true regardless of the
+   * given argument.
+   *
+   * @param block_index The index for which the function should return whether
+   * the block is selected. If this object represents a mask in which all
+   * blocks are always selected then any index is allowed here. Otherwise, the
+   * given index needs to be between zero and the number of blocks that this
+   * mask represents.
+   */
+  bool operator[] (const unsigned int block_index) const;
+
+  /**
+   * Return whether this block mask represents a mask with exactly
+   * <code>n</code> blocks. This is true if either it was initialized with a
+   * vector with exactly <code>n</code> entries of type <code>bool</code> (in
+   * this case, @p n must equal the result of size()) or if it was initialized
+   * with an empty vector (or using the default constructor) in which case it
+   * can represent a mask with an arbitrary number of blocks and will always
+   * say that a block is selected.
+   */
+  bool
+  represents_n_blocks (const unsigned int n) const;
+
+  /**
+   * Return the number of blocks that are selected by this mask.
+   *
+   * Since empty block masks represent a block mask that would return
+   * <code>true</code> for every block, this function may not know the true
+   * size of the block mask and it therefore requires an argument that denotes
+   * the overall number of blocks.
+   *
+   * If the object has been initialized with a non-empty mask (i.e., if the
+   * size() function returns something greater than zero, or equivalently if
+   * represents_the_all_selected_mask() returns false) then the argument can
+   * be omitted and the result of size() is taken.
+   */
+  unsigned int
+  n_selected_blocks (const unsigned int overall_number_of_blocks = numbers::invalid_unsigned_int) const;
+
+  /**
+   * Return the index of the first selected block. The argument is there for
+   * the same reason it exists with the n_selected_blocks() function.
+   *
+   * The function throws an exception if no block is selected at all.
+   */
+  unsigned int
+  first_selected_block (const unsigned int overall_number_of_blocks = numbers::invalid_unsigned_int) const;
+
+  /**
+   * Return true if this mask represents a default constructed mask that
+   * corresponds to one in which all blocks are selected. If true, then the
+   * size() function will return zero.
+   */
+  bool
+  represents_the_all_selected_mask () const;
+
+  /**
+   * Return a block mask that contains the union of the blocks selected by the
+   * current object and the one passed as an argument.
+   */
+  BlockMask operator | (const BlockMask &mask) const;
+
+  /**
+   * Return a block mask that has only those elements set that are set both in
+   * the current object as well as the one passed as an argument.
+   */
+  BlockMask operator & (const BlockMask &mask) const;
+
+  /**
+   * Return whether this object and the argument are identical.
+   */
+  bool operator== (const BlockMask &mask) const;
+
+  /**
+   * Return whether this object and the argument are not identical.
+   */
+  bool operator!= (const BlockMask &mask) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t
+  memory_consumption () const;
+
+private:
+  /**
+   * The actual block mask.
+   */
+  std::vector<bool> block_mask;
+
+  // make the output operator a friend so it can access
+  // the block_mask array
+  friend
+  std::ostream &operator << (std::ostream &out,
+                             const BlockMask &mask);
+};
+
+
+/**
+ * Write a block mask to an output stream. If the block mask represents one
+ * where all blocks are selected without specifying a particular size of the
+ * mask, then it writes the string <code>[all blocks selected]</code> to the
+ * stream. Otherwise, it prints the block mask in a form like
+ * <code>[true,true,true,false]</code>.
+ *
+ * @param out The stream to write to.
+ * @param mask The mask to write. @return A reference to the first argument.
+ */
+std::ostream &operator << (std::ostream &out,
+                           const BlockMask &mask);
+
+
+// -------------------- inline functions ---------------------
+
+inline
+BlockMask::BlockMask()
+{}
+
+
+inline
+BlockMask::BlockMask(const std::vector<bool> &block_mask)
+  :
+  block_mask (block_mask)
+{}
+
+
+inline
+BlockMask::BlockMask(const unsigned int n_blocks,
+                     const bool         initializer)
+  :
+  block_mask (n_blocks, initializer)
+{}
+
+
+inline
+unsigned int
+BlockMask::size () const
+{
+  return block_mask.size();
+}
+
+
+inline
+bool
+BlockMask::operator [](const unsigned int block_index) const
+{
+  // if the mask represents the all-block mask
+  // then always return true
+  if (block_mask.size() == 0)
+    return true;
+  else
+    {
+      // otherwise check the validity of the index and
+      // return whatever is appropriate
+      Assert (block_index < block_mask.size(),
+              ExcIndexRange (block_index, 0, block_mask.size()));
+      return block_mask[block_index];
+    }
+}
+
+
+inline
+bool
+BlockMask::represents_n_blocks(const unsigned int n) const
+{
+  return ((block_mask.size() == 0)
+          ||
+          (block_mask.size() == n));
+}
+
+
+inline
+unsigned int
+BlockMask::n_selected_blocks(const unsigned int n) const
+{
+  if ((n != numbers::invalid_unsigned_int) && (size() > 0))
+    AssertDimension (n, size());
+
+  const unsigned int real_n = (n != numbers::invalid_unsigned_int
+                               ?
+                               n
+                               :
+                               size());
+  if (block_mask.size() == 0)
+    return real_n;
+  else
+    {
+      AssertDimension (real_n, block_mask.size());
+      unsigned int c = 0;
+      for (unsigned int i=0; i<block_mask.size(); ++i)
+        if (block_mask[i] == true)
+          ++c;
+      return c;
+    }
+}
+
+
+inline
+unsigned int
+BlockMask::first_selected_block(const unsigned int n) const
+{
+  if ((n != numbers::invalid_unsigned_int) && (size() > 0))
+    AssertDimension (n, size());
+
+  if (block_mask.size() == 0)
+    return 0;
+  else
+    {
+      for (unsigned int c=0; c<block_mask.size(); ++c)
+        if (block_mask[c] == true)
+          return c;
+
+      Assert (false, ExcMessage ("No block is selected at all!"));
+      return numbers::invalid_unsigned_int;
+    }
+}
+
+
+
+inline
+bool
+BlockMask::represents_the_all_selected_mask () const
+{
+  return (block_mask.size() == 0);
+}
+
+
+
+inline
+BlockMask
+BlockMask::operator | (const BlockMask &mask) const
+{
+  // if one of the two masks denotes the all-block mask,
+  // then return the other one
+  if (block_mask.size() == 0)
+    return mask;
+  else if (mask.block_mask.size() == 0)
+    return *this;
+  else
+    {
+      // if both masks have individual entries set, form
+      // the combination of the two
+      AssertDimension(block_mask.size(), mask.block_mask.size());
+      std::vector<bool> new_mask (block_mask.size());
+      for (unsigned int i=0; i<block_mask.size(); ++i)
+        new_mask[i] = (block_mask[i] || mask.block_mask[i]);
+
+      return new_mask;
+    }
+}
+
+
+inline
+BlockMask
+BlockMask::operator & (const BlockMask &mask) const
+{
+  // if one of the two masks denotes the all-block mask,
+  // then return the other one
+  if (block_mask.size() == 0)
+    return mask;
+  else if (mask.block_mask.size() == 0)
+    return *this;
+  else
+    {
+      // if both masks have individual entries set, form
+      // the combination of the two
+      AssertDimension(block_mask.size(), mask.block_mask.size());
+      std::vector<bool> new_mask (block_mask.size());
+      for (unsigned int i=0; i<block_mask.size(); ++i)
+        new_mask[i] = (block_mask[i] && mask.block_mask[i]);
+
+      return new_mask;
+    }
+}
+
+
+inline
+bool
+BlockMask::operator== (const BlockMask &mask) const
+{
+  return block_mask == mask.block_mask;
+}
+
+
+inline
+bool
+BlockMask::operator!= (const BlockMask &mask) const
+{
+  return block_mask != mask.block_mask;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/component_mask.h b/include/deal.II/fe/component_mask.h
new file mode 100644
index 0000000..018990f
--- /dev/null
+++ b/include/deal.II/fe/component_mask.h
@@ -0,0 +1,430 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_component_mask_h
+#define dealii__fe_component_mask_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/memory_consumption.h>
+
+#include <vector>
+#include <iosfwd>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/**
+ * This class represents a mask that can be used to select individual vector
+ * components of a finite element (see also
+ * @ref GlossComponentMask "this glossary entry").
+ * It will typically have as many elements as the finite element has vector
+ * components, and one can use <code>operator[]</code> to query whether a
+ * particular component has been selected.
+ *
+ * Objects of this kind are used in many places where one wants to restrict
+ * operations to a certain subset of components, e.g. in
+ * DoFTools::make_zero_boundary_values or
+ * VectorTools::interpolate_boundary_values. These objects can either be
+ * created by hand, or, simpler, by asking the finite element to generate a
+ * component mask from certain selected components using code such as this
+ * where we create a mask that only denotes the velocity components of a
+ * Stokes element (see
+ * @ref vector_valued):
+ * @code
+ *   FESystem<dim> stokes_fe (FE_Q<dim>(2), dim,    // Q2 element for the velocities
+ *                            FE_Q<dim>(1), 1);     // Q1 element for the pressure
+ *   FEValuesExtractors::Scalar pressure(dim);
+ *   ComponentMask pressure_mask = stokes_fe.component_mask (pressure);
+ * @endcode
+ * The result is a component mask that, in 2d, would have values <code>[false,
+ * false, true]</code>. Similarly, using
+ * @code
+ *   FEValuesExtractors::Vector velocities(0);
+ *   ComponentMask velocity_mask = stokes_fe.component_mask (velocities);
+ * @endcode
+ * would result in a mask <code>[true, true, false]</code> in 2d. Of course,
+ * in 3d, the result would be <code>[true, true, true, false]</code>.
+ *
+ * @ingroup fe
+ * @author Wolfgang Bangerth
+ * @date 2012
+ * @ingroup vector_valued
+ */
+class ComponentMask
+{
+public:
+  /**
+   * Initialize a component mask. The default is that a component mask
+   * represents a set of components that are <i>all</i> selected, i.e.,
+   * calling this constructor results in a component mask that always returns
+   * <code>true</code> whenever asked whether a component is selected.
+   */
+  ComponentMask ();
+
+  /**
+   * Initialize an object of this type with a set of selected components
+   * specified by the argument.
+   *
+   * @param component_mask A vector of <code>true/false</code> entries that
+   * determine which components of a finite element are selected. If the
+   * length of the given vector is zero, then this interpreted as the case
+   * where <i>every</i> component is selected.
+   */
+  ComponentMask (const std::vector<bool> &component_mask);
+
+  /**
+   * Initialize the component mask with a number of elements that are either
+   * all true or false.
+   *
+   * @param n_components The number of elements of this mask
+   * @param initializer The value each of these elements is supposed to have:
+   * either true or false.
+   */
+  ComponentMask (const unsigned int n_components,
+                 const bool         initializer);
+
+  /**
+   * Set a particular entry in the mask to a value.
+   */
+  void set (const unsigned int index, const bool value);
+
+  /**
+   * If this component mask has been initialized with a mask of size greater
+   * than zero, then return the size of the mask represented by this object.
+   * On the other hand, if this mask has been initialized as an empty object
+   * that represents a mask that is true for every element (i.e., if this
+   * object would return true when calling represents_the_all_selected_mask())
+   * then return zero since no definite size is known.
+   */
+  unsigned int size () const;
+
+  /**
+   * Return whether a particular component is selected by this mask. If this
+   * mask represents the case of an object that selects <i>all components</i>
+   * (e.g. if it is created using the default constructor or is converted from
+   * an empty vector of type bool) then this function returns true regardless
+   * of the given argument.
+   *
+   * @param component_index The index for which the function should return
+   * whether the component is selected. If this object represents a mask in
+   * which all components are always selected then any index is allowed here.
+   * Otherwise, the given index needs to be between zero and the number of
+   * components that this mask represents.
+   */
+  bool operator[] (const unsigned int component_index) const;
+
+  /**
+   * Return whether this component mask represents a mask with exactly
+   * <code>n</code> components. This is true if either it was initialized with
+   * a vector with exactly <code>n</code> entries of type <code>bool</code>
+   * (in this case, @p n must equal the result of size()) or if it was
+   * initialized with an empty vector (or using the default constructor) in
+   * which case it can represent a mask with an arbitrary number of components
+   * and will always say that a component is selected.
+   */
+  bool
+  represents_n_components (const unsigned int n) const;
+
+  /**
+   * Return the number of components that are selected by this mask.
+   *
+   * Since empty component masks represent a component mask that would return
+   * <code>true</code> for every component, this function may not know the
+   * true size of the component mask and it therefore requires an argument
+   * that denotes the overall number of components.
+   *
+   * If the object has been initialized with a non-empty mask (i.e., if the
+   * size() function returns something greater than zero, or equivalently if
+   * represents_the_all_selected_mask() returns false) then the argument can
+   * be omitted and the result of size() is taken.
+   */
+  unsigned int
+  n_selected_components (const unsigned int overall_number_of_components = numbers::invalid_unsigned_int) const;
+
+  /**
+   * Return the index of the first selected component. The argument is there
+   * for the same reason it exists with the n_selected_components() function.
+   *
+   * The function throws an exception if no component is selected at all.
+   */
+  unsigned int
+  first_selected_component (const unsigned int overall_number_of_components = numbers::invalid_unsigned_int) const;
+
+  /**
+   * Return true if this mask represents a default constructed mask that
+   * corresponds to one in which all components are selected. If true, then
+   * the size() function will return zero.
+   */
+  bool
+  represents_the_all_selected_mask () const;
+
+  /**
+   * Return a component mask that contains the union of the components
+   * selected by the current object and the one passed as an argument.
+   */
+  ComponentMask operator | (const ComponentMask &mask) const;
+
+  /**
+   * Return a component mask that has only those elements set that are set
+   * both in the current object as well as the one passed as an argument.
+   */
+  ComponentMask operator & (const ComponentMask &mask) const;
+
+  /**
+   * Return whether this object and the argument are identical.
+   */
+  bool operator== (const ComponentMask &mask) const;
+
+  /**
+   * Return whether this object and the argument are not identical.
+   */
+  bool operator!= (const ComponentMask &mask) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t
+  memory_consumption () const;
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcNoComponentSelected,
+                    "The number of selected components in a mask "
+                    "must be greater than zero.");
+
+private:
+  /**
+   * The actual component mask.
+   */
+  std::vector<bool> component_mask;
+
+  // make the output operator a friend so it can access
+  // the component_mask array
+  friend
+  std::ostream &operator << (std::ostream &out,
+                             const ComponentMask &mask);
+};
+
+
+/**
+ * Write a component mask to an output stream. If the component mask
+ * represents one where all components are selected without specifying a
+ * particular size of the mask, then it writes the string <code>[all
+ * components selected]</code> to the stream. Otherwise, it prints the
+ * component mask in a form like <code>[true,true,true,false]</code>.
+ *
+ * @param out The stream to write to.
+ * @param mask The mask to write. @return A reference to the first argument.
+ */
+std::ostream &operator << (std::ostream &out,
+                           const ComponentMask &mask);
+
+
+// -------------------- inline functions ---------------------
+
+inline
+ComponentMask::ComponentMask()
+{}
+
+
+inline
+ComponentMask::ComponentMask(const std::vector<bool> &component_mask)
+  :
+  component_mask (component_mask)
+{}
+
+
+inline
+ComponentMask::ComponentMask(const unsigned int n_components,
+                             const bool         initializer)
+  :
+  component_mask (n_components, initializer)
+{}
+
+
+inline
+unsigned int
+ComponentMask::size () const
+{
+  return component_mask.size();
+}
+
+
+inline
+void
+ComponentMask::set(const unsigned int index, const bool value)
+{
+  AssertIndexRange(index, component_mask.size());
+  component_mask[index] = value;
+}
+
+
+inline
+bool
+ComponentMask::operator [](const unsigned int component_index) const
+{
+  // if the mask represents the all-component mask
+  // then always return true
+  if (component_mask.size() == 0)
+    return true;
+  else
+    {
+      // otherwise check the validity of the index and
+      // return whatever is appropriate
+      AssertIndexRange (component_index, component_mask.size());
+      return component_mask[component_index];
+    }
+}
+
+
+inline
+bool
+ComponentMask::represents_n_components(const unsigned int n) const
+{
+  return ((component_mask.size() == 0)
+          ||
+          (component_mask.size() == n));
+}
+
+
+inline
+unsigned int
+ComponentMask::n_selected_components(const unsigned int n) const
+{
+  if ((n != numbers::invalid_unsigned_int) && (size() > 0))
+    AssertDimension (n, size());
+
+  const unsigned int real_n = (n != numbers::invalid_unsigned_int
+                               ?
+                               n
+                               :
+                               size());
+  if (component_mask.size() == 0)
+    return real_n;
+  else
+    {
+      AssertDimension (real_n, component_mask.size());
+      unsigned int c = 0;
+      for (unsigned int i=0; i<component_mask.size(); ++i)
+        if (component_mask[i] == true)
+          ++c;
+      return c;
+    }
+}
+
+
+inline
+unsigned int
+ComponentMask::first_selected_component(const unsigned int n) const
+{
+  if ((n != numbers::invalid_unsigned_int) && (size() > 0))
+    AssertDimension (n, size());
+
+  if (component_mask.size() == 0)
+    return 0;
+  else
+    {
+      for (unsigned int c=0; c<component_mask.size(); ++c)
+        if (component_mask[c] == true)
+          return c;
+
+      Assert (false, ExcMessage ("No component is selected at all!"));
+      return numbers::invalid_unsigned_int;
+    }
+}
+
+
+
+inline
+bool
+ComponentMask::represents_the_all_selected_mask () const
+{
+  return (component_mask.size() == 0);
+}
+
+
+
+inline
+ComponentMask
+ComponentMask::operator | (const ComponentMask &mask) const
+{
+  // if one of the two masks denotes the all-component mask,
+  // then return the other one
+  if (component_mask.size() == 0)
+    return mask;
+  else if (mask.component_mask.size() == 0)
+    return *this;
+  else
+    {
+      // if both masks have individual entries set, form
+      // the combination of the two
+      AssertDimension(component_mask.size(), mask.component_mask.size());
+      std::vector<bool> new_mask (component_mask.size());
+      for (unsigned int i=0; i<component_mask.size(); ++i)
+        new_mask[i] = (component_mask[i] || mask.component_mask[i]);
+
+      return new_mask;
+    }
+}
+
+
+inline
+ComponentMask
+ComponentMask::operator & (const ComponentMask &mask) const
+{
+  // if one of the two masks denotes the all-component mask,
+  // then return the other one
+  if (component_mask.size() == 0)
+    return mask;
+  else if (mask.component_mask.size() == 0)
+    return *this;
+  else
+    {
+      // if both masks have individual entries set, form
+      // the combination of the two
+      AssertDimension(component_mask.size(), mask.component_mask.size());
+      std::vector<bool> new_mask (component_mask.size());
+      for (unsigned int i=0; i<component_mask.size(); ++i)
+        new_mask[i] = (component_mask[i] && mask.component_mask[i]);
+
+      return new_mask;
+    }
+}
+
+
+inline
+bool
+ComponentMask::operator== (const ComponentMask &mask) const
+{
+  return component_mask == mask.component_mask;
+}
+
+
+inline
+bool
+ComponentMask::operator!= (const ComponentMask &mask) const
+{
+  return component_mask != mask.component_mask;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe.h b/include/deal.II/fe/fe.h
new file mode 100644
index 0000000..1ae29d7
--- /dev/null
+++ b/include/deal.II/fe/fe.h
@@ -0,0 +1,2983 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_h
+#define dealii__fe_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/fe/fe_base.h>
+#include <deal.II/fe/fe_values_extractors.h>
+#include <deal.II/fe/fe_update_flags.h>
+#include <deal.II/fe/component_mask.h>
+#include <deal.II/fe/block_mask.h>
+#include <deal.II/fe/mapping.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class FEValuesBase;
+template <int dim, int spacedim> class FEValues;
+template <int dim, int spacedim> class FEFaceValues;
+template <int dim, int spacedim> class FESubfaceValues;
+template <int dim, int spacedim> class FESystem;
+namespace hp
+{
+  template <int dim, int spacedim> class FECollection;
+}
+
+
+/**
+ * This is the base class for finite elements in arbitrary dimensions. It
+ * declares the interface both in terms of member variables and public member
+ * functions through which properties of a concrete implementation of a finite
+ * element can be accessed. This interface generally consists of a number of
+ * groups of variables and functions that can roughly be delineated as
+ * follows:
+ * - Basic information about the finite element, such as the number of
+ * degrees of freedom per vertex, edge, or cell. This kind of data is stored
+ * in the FiniteElementData base class. (Though the FiniteElement::get_name()
+ * member function also falls into this category.)
+ * - A description of the shape functions and their derivatives on the
+ * reference cell $[0,1]^d$, if an element is indeed defined by mapping shape
+ * functions from the reference cell to an actual cell.
+ * - Matrices (and functions that access them) that describe how an
+ * element's shape functions related to those on parent or child cells
+ * (restriction or prolongation) or neighboring cells (for hanging node
+ * constraints), as well as to other finite element spaces defined on the same
+ * cell (e.g., when doing $p$ refinement).
+ * - %Functions that describe the properties of individual shape functions,
+ * for example which
+ * @ref GlossComponent "vector components"
+ * of a
+ * @ref vector_valued "vector-valued finite element's"
+ * shape function is nonzero, or whether an element is
+ * @ref GlossPrimitive "primitive".
+ * - For elements that are interpolatory, such as the common $Q_p$
+ * Lagrange elements, data that describes where their
+ * @ref GlossSupport "support points"
+ * are located.
+ * - %Functions that define the interface to the FEValues class that is
+ * almost always used to access finite element shape functions from user code.
+ *
+ * The following sections discuss many of these concepts in more detail, and
+ * outline strategies by which concrete implementations of a finite element
+ * can provide the details necessary for a complete description of a finite
+ * element space.
+ *
+ * As a general rule, there are three ways by which derived classes provide
+ * this information:
+ * - A number of fields that are generally easy to compute and that
+ * are initialized by the constructor of this class (or the constructor of the
+ * FiniteElementData base class) and derived classes therefore have to compute
+ * in the process of calling this class's constructor. This is, specifically,
+ * the case for the basic information and parts of the descriptive information
+ * about shape functions mentioned above.
+ * - Some common matrices that are widely used in the library and for
+ * which this class provides protected member variables that the constructors
+ * of derived classes need to fill. The purpose of providing these matrices in
+ * this class is that (i) they are frequently used, and (ii) they are
+ * expensive to compute. Consequently, it makes sense to only compute them
+ * once, rather than every time they are used. In most cases, the constructor
+ * of the current class already sets them to their correct size, and derived
+ * classes therefore only have to fill them. Examples of this include the
+ * matrices that relate the shape functions on one cell to the shape functions
+ * on neighbors, children, and parents.
+ * - Uncommon information, or information that depends on specific input
+ * arguments, and that needs to be implemented by derived classes. For these,
+ * this base class only declares abstract virtual member functions and derived
+ * classes then have to implement them. Examples of this category would
+ * include the functions that compute values and derivatives of shape
+ * functions on the reference cell for which it is not possible to tabulate
+ * values because there are infinitely many points at which one may want to
+ * evaluate them. In some cases, derived classes may choose to simply not
+ * implement <i>all</i> possible interfaces (or may not <i>yet</i> have a
+ * complete implementation); for uncommon functions, there is then often a
+ * member function derived classes can overload that describes whether a
+ * particular feature is implemented. An example is whether an element
+ * implements the information necessary to use it in the $hp$ finite element
+ * context (see
+ * @ref hp "hp finite element support").
+ *
+ *
+ * <h3>Nomenclature</h3>
+ *
+ * Finite element classes have to define a large number of different
+ * properties describing  a finite element space. The following subsections
+ * describe some nomenclature that will be used in the documentation below.
+ *
+ * <h4>Components and blocks</h4>
+ *
+ * @ref vector_valued "Vector-valued finite element"
+ * are elements used for systems of partial differential equations.
+ * Oftentimes, they are composed via the FESystem class (which is itself
+ * derived from the current class), but there are also non-composed elements
+ * that have multiple components (for example the FE_Nedelec and
+ * FE_RaviartThomas classes, among others). For any of these vector valued
+ * elements, individual shape functions may be nonzero in one or several
+ * @ref GlossComponent "components"
+ * of the vector valued function. If the element is
+ * @ref GlossPrimitive "primitive",
+ * there is indeed a single component with a nonzero entry for each shape
+ * function. This component can be determined using the
+ * FiniteElement::system_to_component_index() function.
+ *
+ * On the other hand, if there is at least one shape function that is nonzero
+ * in more than one vector component, then we call the entire element "non-
+ * primitive". The FiniteElement::get_nonzero_components() can then be used to
+ * determine which vector components of a shape function are nonzero. The
+ * number of nonzero components of a shape function is returned by
+ * FiniteElement::n_components(). Whether a shape function is non-primitive
+ * can be queried by FiniteElement::is_primitive().
+ *
+ * Oftentimes, one may want to split linear system into blocks so that they
+ * reflect the structure of the underlying operator. This is typically not
+ * done based on vector components, but based on the use of
+ * @ref GlossBlock "blocks",
+ * and the result is then used to substructure objects of type BlockVector,
+ * BlockSparseMatrix, BlockMatrixArray, and so on. If you use non-primitive
+ * elements, you cannot determine the block number by
+ * FiniteElement::system_to_component_index(). Instead, you can use
+ * FiniteElement::system_to_block_index(). The number of blocks of a finite
+ * element can be determined by FiniteElement::n_blocks().
+ *
+ *
+ * <h4>Support points</h4>
+ *
+ * Finite elements are frequently defined by defining a polynomial space and a
+ * set of dual functionals. If these functionals involve point evaluations,
+ * then the element is "interpolatory" and it is possible to interpolate an
+ * arbitrary (but sufficiently smooth) function onto the finite element space
+ * by evaluating it at these points. We call these points "support points".
+ *
+ * Most finite elements are defined by mapping from the reference cell to a
+ * concrete cell. Consequently, the support points are then defined on the
+ * reference ("unit") cell, see
+ * @ref GlossSupport "this glossary entry".
+ * The support points on a concrete cell can then be computed by mapping the
+ * unit support points, using the Mapping class interface and derived classes,
+ * typically via the FEValues class.
+ *
+ * A typical code snippet to do so would look as follows:
+ * @code
+ * Quadrature<dim> dummy_quadrature (fe.get_unit_support_points());
+ * FEValues<dim>   fe_values (mapping, fe, dummy_quadrature,
+ *                            update_quadrature_points);
+ * fe_values.reinit (cell);
+ * Point<dim> mapped_point = fe_values.quadrature_point (i);
+ * @endcode
+ *
+ * Alternatively, the points can be transformed one-by-one:
+ * @code
+ * const vector<Point<dim> > &unit_points =
+ *    fe.get_unit_support_points();
+ *
+ * Point<dim> mapped_point =
+ *    mapping.transform_unit_to_real_cell (cell, unit_points[i]);
+ * @endcode
+ *
+ * @note Finite elements' implementation of the get_unit_support_points()
+ * function returns these points in the same order as shape functions. As a
+ * consequence, the quadrature points accessed above are also ordered in this
+ * way. The order of shape functions is typically documented in the class
+ * documentation of the various finite element classes.
+ *
+ *
+ * <h3>Implementing finite element spaces in derived classes</h3>
+ *
+ * The following sections provide some more guidance for implementing concrete
+ * finite element spaces in derived classes. This includes information that
+ * depends on the dimension for which you want to provide something, followed
+ * by a list of tools helping to generate information in concrete cases.
+ *
+ * It is important to note that there is a number of intermediate classes that
+ * can do a lot of what is necessary for a complete description of finite
+ * element spaces. For example, the FE_Poly, FE_PolyTensor, and FE_PolyFace
+ * classes in essence build a complete finite element space if you only
+ * provide them with an abstract description of the polynomial space upon
+ * which you want to build an element. Using these intermediate classes
+ * typically makes implementing finite element descriptions vastly simpler.
+ *
+ * As a general rule, if you want to implement an element, you will likely
+ * want to look at the implementation of other, similar elements first. Since
+ * many of the more complicated pieces of a finite element interface have to
+ * do with how they interact with mappings, quadrature, and the FEValues
+ * class, you will also want to read through the
+ * @ref FE_vs_Mapping_vs_FEValues
+ * documentation module.
+ *
+ *
+ * <h4>Interpolation matrices in one dimension</h4>
+ *
+ * In one space dimension (i.e., for <code>dim==1</code> and any value of
+ * <code>spacedim</code>), finite element classes implementing the interface
+ * of the current base class need only set the #restriction and #prolongation
+ * matrices that describe the interpolation of the finite element space on one
+ * cell to that of its parent cell, and to that on its children, respectively.
+ * The constructor of the current class in one dimension presets the
+ * #interface_constraints matrix (used to describe hanging node constraints at
+ * the interface between cells of different refinement levels) to have size
+ * zero because there are no hanging nodes in 1d.
+ *
+ * <h4>Interpolation matrices in two dimensions</h4>
+ *
+ * In addition to the fields discussed above for 1D, a constraint matrix is
+ * needed to describe hanging node constraints if the finite element has
+ * degrees of freedom located on edges or vertices. These constraints are
+ * represented by an $m\times n$-matrix #interface_constraints, where <i>m</i>
+ * is the number of degrees of freedom on the refined side without the corner
+ * vertices (those dofs on the middle vertex plus those on the two lines), and
+ * <i>n</i> is that of the unrefined side (those dofs on the two vertices plus
+ * those on the line). The matrix is thus a rectangular one. The $m\times n$
+ * size of the #interface_constraints matrix can also be accessed through the
+ * interface_constraints_size() function.
+ *
+ * The mapping of the dofs onto the indices of the matrix on the unrefined
+ * side is as follows: let $d_v$ be the number of dofs on a vertex, $d_l$ that
+ * on a line, then $n=0...d_v-1$ refers to the dofs on vertex zero of the
+ * unrefined line, $n=d_v...2d_v-1$ to those on vertex one,
+ * $n=2d_v...2d_v+d_l-1$ to those on the line.
+ *
+ * Similarly, $m=0...d_v-1$ refers to the dofs on the middle vertex of the
+ * refined side (vertex one of child line zero, vertex zero of child line
+ * one), $m=d_v...d_v+d_l-1$ refers to the dofs on child line zero,
+ * $m=d_v+d_l...d_v+2d_l-1$ refers to the dofs on child line one.  Please note
+ * that we do not need to reserve space for the dofs on the end vertices of
+ * the refined lines, since these must be mapped one-to-one to the appropriate
+ * dofs of the vertices of the unrefined line.
+ *
+ * Through this construction, the degrees of freedom on the child faces are
+ * constrained to the degrees of freedom on the parent face. The information
+ * so provided is typically consumed by the
+ * DoFTools::make_hanging_node_constraints() function.
+ *
+ * @note The hanging node constraints described by these matrices are only
+ * relevant to the case where the same finite element space is used on
+ * neighboring (but differently refined) cells. The case that the finite
+ * element spaces on different sides of a face are different, i.e., the $hp$
+ * case (see
+ * @ref hp "hp finite element support")
+ * is handled by separate functions. See the
+ * FiniteElement::get_face_interpolation_matrix() and
+ * FiniteElement::get_subface_interpolation_matrix() functions.
+ *
+ *
+ * <h4>Interpolation matrices in three dimensions</h4>
+ *
+ * For the interface constraints, the 3d case is similar to the 2d case. The
+ * numbering for the indices $n$ on the mother face is obvious and keeps to
+ * the usual numbering of degrees of freedom on quadrilaterals.
+ *
+ * The numbering of the degrees of freedom on the interior of the refined
+ * faces for the index $m$ is as follows: let $d_v$ and $d_l$ be as above, and
+ * $d_q$ be the number of degrees of freedom per quadrilateral (and therefore
+ * per face), then $m=0...d_v-1$ denote the dofs on the vertex at the center,
+ * $m=d_v...5d_v-1$ for the dofs on the vertices at the center of the bounding
+ * lines of the quadrilateral, $m=5d_v..5d_v+4*d_l-1$ are for the degrees of
+ * freedom on the four lines connecting the center vertex to the outer
+ * boundary of the mother face, $m=5d_v+4*d_l...5d_v+4*d_l+8*d_l-1$ for the
+ * degrees of freedom on the small lines surrounding the quad, and
+ * $m=5d_v+12*d_l...5d_v+12*d_l+4*d_q-1$ for the dofs on the four child faces.
+ * Note the direction of the lines at the boundary of the quads, as shown
+ * below.
+ *
+ * The order of the twelve lines and the four child faces can be extracted
+ * from the following sketch, where the overall order of the different dof
+ * groups is depicted:
+ * @verbatim
+ *    *--15--4--16--*
+ *    |      |      |
+ *    10 19  6  20  12
+ *    |      |      |
+ *    1--7---0--8---2
+ *    |      |      |
+ *    9  17  5  18  11
+ *    |      |      |
+ *    *--13--3--14--*
+ * @endverbatim
+ * The numbering of vertices and lines, as well as the numbering of children
+ * within a line is consistent with the one described in Triangulation.
+ * Therefore, this numbering is seen from the outside and inside,
+ * respectively, depending on the face.
+ *
+ * The three-dimensional case has a few pitfalls available for derived classes
+ * that want to implement constraint matrices. Consider the following case:
+ * @verbatim
+ *          *-------*
+ *         /       /|
+ *        /       / |
+ *       /       /  |
+ *      *-------*   |
+ *      |       |   *-------*
+ *      |       |  /       /|
+ *      |   1   | /       / |
+ *      |       |/       /  |
+ *      *-------*-------*   |
+ *      |       |       |   *
+ *      |       |       |  /
+ *      |   2   |   3   | /
+ *      |       |       |/
+ *      *-------*-------*
+ * @endverbatim
+ * Now assume that we want to refine cell 2. We will end up with two faces
+ * with hanging nodes, namely the faces between cells 1 and 2, as well as
+ * between cells 2 and 3. Constraints have to be applied to the degrees of
+ * freedom on both these faces. The problem is that there is now an edge (the
+ * top right one of cell 2) which is part of both faces. The hanging node(s)
+ * on this edge are therefore constrained twice, once from both faces. To be
+ * meaningful, these constraints of course have to be consistent: both faces
+ * have to constrain the hanging nodes on the edge to the same nodes on the
+ * coarse edge (and only on the edge, as there can then be no constraints to
+ * nodes on the rest of the face), and they have to do so with the same
+ * weights. This is sometimes tricky since the nodes on the edge may have
+ * different local numbers.
+ *
+ * For the constraint matrix this means the following: if a degree of freedom
+ * on one edge of a face is constrained by some other nodes on the same edge
+ * with some weights, then the weights have to be exactly the same as those
+ * for constrained nodes on the three other edges with respect to the
+ * corresponding nodes on these edges. If this isn't the case, you will get
+ * into trouble with the ConstraintMatrix class that is the primary consumer
+ * of the constraint information: while that class is able to handle
+ * constraints that are entered more than once (as is necessary for the case
+ * above), it insists that the weights are exactly the same.
+ *
+ * Using this scheme, child face degrees of freedom are constrained against
+ * parent face degrees of freedom that contain those on the edges of the
+ * parent face; it is possible that some of them are in turn constrained
+ * themselves, leading to longer chains of constraints that the
+ * ConstraintMatrix class will eventually have to sort out. (The constraints
+ * described above are used by the DoFTools::make_hanging_node_constraints()
+ * function that constructs a ConstraintMatrix object.) However, this is of no
+ * concern for the FiniteElement and derived classes since they only act
+ * locally on one cell and its immediate neighbor, and do not see the bigger
+ * picture. The
+ * @ref hp_paper
+ * details how such chains are handled in practice.
+ *
+ *
+ * <h4>Helper functions</h4>
+ *
+ * Construction of a finite element and computation of the matrices described
+ * above is often a tedious task, in particular if it has to be performed for
+ * several dimensions. Most of this work can be avoided by using the
+ * intermediate classes already mentioned above (e.g., FE_Poly, FE_PolyTensor,
+ * etc). Other tasks can be automated by some of the functions in namespace
+ * FETools.
+ *
+ * <h5>Computing the correct basis from a set of linearly independent
+ * functions</h5>
+ *
+ * First, it may already be difficult to compute the basis of shape functions
+ * for arbitrary order and dimension. On the other hand, if the
+ * @ref GlossNodes "node values"
+ * are given, then the duality relation between node functionals and basis
+ * functions defines the basis. As a result, the shape function space may be
+ * defined from a set of linearly independent functions, such that the actual
+ * finite element basis is computed from linear combinations of them. The
+ * coefficients of these combinations are determined by the duality of node
+ * values and form a matrix.
+ *
+ * Using this matrix allows the construction of the basis of shape functions
+ * in two steps.
+ * <ol>
+ *
+ * <li>Define the space of shape functions using an arbitrary basis
+ * <i>w<sub>j</sub></i> and compute the matrix <i>M</i> of node functionals
+ * <i>N<sub>i</sub></i> applied to these basis functions, such that its
+ * entries are <i>m<sub>ij</sub> = N<sub>i</sub>(w<sub>j</sub>)</i>.
+ *
+ * <li>Compute the basis <i>v<sub>j</sub></i> of the finite element shape
+ * function space by applying <i>M<sup>-1</sup></i> to the basis
+ * <i>w<sub>j</sub></i>.
+ * </ol>
+ *
+ * The matrix <i>M</i> may be computed using FETools::compute_node_matrix().
+ * This function relies on the existence of #generalized_support_points and an
+ * implementation of the FiniteElement::interpolate() function with
+ * VectorSlice argument. (See the
+ * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+ * for more information.) With this, one can then use the following piece of
+ * code in the constructor of a class derived from FinitElement to compute the
+ * $M$ matrix:
+ * @code
+ * FullMatrix<double> M(this->dofs_per_cell, this->dofs_per_cell);
+ * FETools::compute_node_matrix(M, *this);
+ * this->inverse_node_matrix.reinit(this->dofs_per_cell, this->dofs_per_cell);
+ * this->inverse_node_matrix.invert(M);
+ * @endcode
+ * Don't forget to make sure that #unit_support_points or
+ * #generalized_support_points are initialized before this!
+ *
+ * <h5>Computing prolongation matrices</h5>
+ *
+ * Once you have shape functions, you can define matrices that transfer data
+ * from one cell to its children or the other way around. This is a common
+ * operation in multigrid, of course, but is also used when interpolating the
+ * solution from one mesh to another after mesh refinement, as well as in the
+ * definition of some error estimators.
+ *
+ * To define the prolongation matrices, i.e., those matrices that describe the
+ * transfer of a finite element field from one cell to its children,
+ * implementations of finite elements can either fill the #prolongation array
+ * by hand, or can call FETools::compute_embedding_matrices().
+ *
+ * In the latter case, all that is required is the following piece of code:
+ * @code
+ * for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+ *   this->prolongation[c].reinit (this->dofs_per_cell,
+ *                                 this->dofs_per_cell);
+ * FETools::compute_embedding_matrices (*this, this->prolongation);
+ * @endcode
+ * As in this example, prolongation is almost always implemented via
+ * embedding, i.e., the nodal values of the function on the children may be
+ * different from the nodal values of the function on the parent cell, but as
+ * a function of $\mathbf x\in{\mathbb R}^\text{spacedim}$, the finite element
+ * field on the child is the same as on the parent.
+ *
+ *
+ * <h5>Computing restriction matrices</h5>
+ *
+ * The opposite operation, restricting a finite element function defined on
+ * the children to the parent cell is typically implemented by interpolating
+ * the finite element function on the children to the nodal values of the
+ * parent cell. In deal.II, the restriction operation is implemented as a loop
+ * over the children of a cell that each apply a matrix to the vector of
+ * unknowns on that child cell (these matrices are stored in #restriction and
+ * are accessed by get_restriction_matrix()). The operation that then needs to
+ * be implemented turns out to be surprisingly difficult to describe, but is
+ * instructive to describe because it also defines the meaning of the
+ * #restriction_is_additive_flags array (accessed via the
+ * restriction_is_additive() function).
+ *
+ * To give a concrete example, assume we use a $Q_1$ element in 1d, and that
+ * on each of the parent and child cells degrees of freedom are (locally and
+ * globally) numbered as follows:
+ * @code
+ * meshes:             *-------*                        *---*---*
+ * local DoF numbers:  0       1                        0  1|0  1
+ * global DoF numbers: 0       1                        0   1   2
+ * @endcode
+ * Then we want the restriction operation to take the value of the zeroth DoF
+ * on child 0 as the value of the zeroth DoF on the parent, and take the value
+ * of the first DoF on child 1 as the value of the first DoF on the parent.
+ * Ideally, we would like to write this follows
+ * @f[
+ *   U^\text{coarse}|_\text{parent}
+ *   = \sum_{\text{child}=0}^1 R_\text{child} U^\text{fine}|_\text{child}
+ * @f]
+ * where $U^\text{fine}|_\text{child=0}=(U^\text{fine}_0,U^\text{fine}_1)^T$
+ * and $U^\text{fine}|_\text{child=1}=(U^\text{fine}_1,U^\text{fine}_2)^T$.
+ * Writing the requested operation like this would here be possible by
+ * choosing
+ * @f[
+ *   R_0 = \left(\begin{matrix}1 & 0 \\ 0 & 0\end{matrix}\right),
+ *   \qquad\qquad
+ *   R_1 = \left(\begin{matrix}0 & 0 \\ 0 & 1\end{matrix}\right).
+ * @f]
+ * However, this approach already fails if we go to a $Q_2$ element with the
+ * following degrees of freedom:
+ * @code
+ * meshes:             *-------*                        *----*----*
+ * local DoF numbers:  0   2   1                        0 2 1|0 2 1
+ * global DoF numbers: 0   2   1                        0 2  1  4 3
+ * @endcode
+ * Writing things as the sum over matrix operations as above would not easily
+ * work because we have to add nonzero values to $U^\text{coarse}_2$ twice,
+ * once for each child.
+ *
+ * Consequently, restriction is typically implemented as a
+ * <i>concatenation</i> operation. I.e., we first compute the individual
+ * restrictions from each child,
+ * @f[
+ *   \tilde U^\text{coarse}_\text{child}
+ *   = R_\text{child} U^\text{fine}|_\text{child},
+ * @f]
+ * and then compute the values of $U^\text{coarse}|_\text{parent}$ with the
+ * following code:
+ * @code
+ * for (unsigned int child=0; child<cell->n_children(); ++child)
+ *   for (unsigned int i=0; i<dofs_per_cell; ++i)
+ *     if (U_tilde_coarse[child][i] != 0)
+ *       U_coarse_on_parent[i] = U_tilde_coarse[child][i];
+ * @endcode
+ * In other words, each nonzero element of $\tilde
+ * U^\text{coarse}_\text{child}$ <i>overwrites</i>, rather than adds to the
+ * corresponding element of $U^\text{coarse}|_\text{parent}$. This typically
+ * also implies that the restriction matrices from two different cells should
+ * agree on a value for coarse degrees of freedom that they both want to touch
+ * (otherwise the result would depend on the order in which we loop over
+ * children, which would be unreasonable because the order of children is an
+ * otherwise arbitrary convention). For example, in the example above, the
+ * restriction matrices will be
+ * @f[
+ *   R_0 = \left(\begin{matrix}1 & 0 & 0 \\ 0 & 0 & 0 \\ 0 & 1 & 0 \end{matrix}\right),
+ *   \qquad\qquad
+ *   R_1 = \left(\begin{matrix}0 & 0 & 0 \\ 0 & 1 & 0 \\ 1 & 0 & 0 \end{matrix}\right),
+ * @f]
+ * and the compatibility condition is the $R_{0,21}=R_{1,20}$ because they
+ * both indicate that $U^\text{coarse}|_\text{parent,2}$ should be set to one
+ * times $U^\text{fine}|_\text{child=0,1}$ and
+ * $U^\text{fine}|_\text{child=1,0}$.
+ *
+ * Unfortunately, not all finite elements allow to write the restriction
+ * operation in this way. For example, for the piecewise constant FE_DGQ(0)
+ * element, the value of the finite element field on the parent cell can not
+ * be determined by interpolation from the children. Rather, the only
+ * reasonable choice is to take it as the <i>average</i> value between the
+ * children -- so we are back to the sum operation, rather than the
+ * concatenation. Further thought shows that whether restriction should be
+ * additive or not is a property of the individual shape function, not of the
+ * finite element as a whole. Consequently, the
+ * FiniteElement::restriction_is_additive() function returns whether a
+ * particular shape function should act via concatenation (a return value of
+ * @p false) or via addition (return value of @p true), and the correct code
+ * for the overall operation is then as follows (and as, in fact, implemented
+ * in DoFAccessor::get_interpolated_dof_values()):
+ * @code
+ * for (unsigned int child=0; child<cell->n_children(); ++child)
+ *   for (unsigned int i=0; i<dofs_per_cell; ++i)
+ *     if (fe.restriction_is_additive(i) == true)
+ *       U_coarse_on_parent[i] += U_tilde_coarse[child][i];
+ *     else
+ *       if (U_tilde_coarse[child][i] != 0)
+ *         U_coarse_on_parent[i] = U_tilde_coarse[child][i];
+ * @endcode
+ *
+ *
+ * <h5>Computing #interface_constraints</h5>
+ *
+ * Constraint matrices can be computed semi-automatically using
+ * FETools::compute_face_embedding_matrices(). This function computes the
+ * representation of the coarse mesh functions by fine mesh functions for each
+ * child of a face separately. These matrices must be convoluted into a single
+ * rectangular constraint matrix, eliminating degrees of freedom on common
+ * vertices and edges as well as on the coarse grid vertices. See the
+ * discussion above for details of this numbering.
+ *
+ * @ingroup febase fe
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, Ralf Hartmann, 1998, 2000, 2001,
+ * 2005, 2015
+ */
+template <int dim, int spacedim=dim>
+class FiniteElement : public Subscriptor,
+  public FiniteElementData<dim>
+{
+public:
+  /**
+   * The dimension of the image space, corresponding to Triangulation.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * A base class for internal data that derived finite element classes may
+   * wish to store.
+   *
+   * The class is used as follows: Whenever an FEValues (or FEFaceValues or
+   * FESubfaceValues) object is initialized, it requests that the finite
+   * element it is associated with creates an object of a class derived from
+   * the current one here. This is done via each derived class's
+   * FiniteElement::get_data() function. This object is then passed to the
+   * FiniteElement::fill_fe_values(), FiniteElement::fill_fe_face_values(),
+   * and FiniteElement::fill_fe_subface_values() functions as a constant
+   * object. The intent of these objects is so that finite element classes can
+   * pre-compute information once at the beginning (in the call to
+   * FiniteElement::get_data() call) that can then be used on each cell that
+   * is subsequently visited. An example for this is the values of shape
+   * functions at the quadrature point of the reference cell, which remain the
+   * same no matter the cell visited, and that can therefore be computed once
+   * at the beginning and reused later on.
+   *
+   * Because only derived classes can know what they can pre-compute, each
+   * derived class that wants to store information computed once at the
+   * beginning, needs to derive its own InternalData class from this class,
+   * and return an object of the derived type through its get_data() function.
+   *
+   * @author Guido Kanschat, 2001; Wolfgang Bangerth, 2015.
+   */
+  class InternalDataBase
+  {
+  private:
+    /**
+     * Copy construction is forbidden.
+     */
+    InternalDataBase (const InternalDataBase &);
+
+  public:
+    /**
+     * Constructor. Sets update_flags to @p update_default and @p first_cell
+     * to @p true.
+     */
+    InternalDataBase ();
+
+    /**
+     * Destructor. Made virtual to allow polymorphism.
+     */
+    virtual ~InternalDataBase ();
+
+    /**
+     * A set of update flags specifying the kind of information that an
+     * implementation of the FiniteElement interface needs to compute on each
+     * cell or face, i.e., in FiniteElement::fill_fe_values() and friends.
+     *
+     * This set of flags is stored here by implementations of
+     * FiniteElement::get_data(), FiniteElement::get_face_data(), or
+     * FiniteElement::get_subface_data(), and is that subset of the update
+     * flags passed to those functions that require re-computation on every
+     * cell. (The subset of the flags corresponding to information that can be
+     * computed once and for all already at the time of the call to
+     * FiniteElement::get_data() -- or an implementation of that interface --
+     * need not be stored here because it has already been taken care of.)
+     */
+    UpdateFlags          update_each;
+
+    /**
+     * Return an estimate (in bytes) or the memory consumption of this object.
+     */
+    virtual std::size_t memory_consumption () const;
+  };
+
+public:
+  /**
+   * Constructor: initialize the fields of this base class of all finite
+   * elements.
+   *
+   * @param[in] fe_data An object that stores identifying (typically integral)
+   * information about the element to be constructed. In particular, this
+   * object will contain data such as the number of degrees of freedom per
+   * cell (and per vertex, line, etc), the number of vector components, etc.
+   * This argument is used to initialize the base class of the current object
+   * under construction.
+   * @param[in] restriction_is_additive_flags A vector of size
+   * <code>dofs_per_cell</code> (or of size one, see below) that for each
+   * shape function states whether the shape function is additive or not. The
+   * meaning of these flags is described in the section on restriction
+   * matrices in the general documentation of this class.
+   * @param[in] nonzero_components A vector of size <code>dofs_per_cell</code>
+   * (or of size one, see below) that for each shape function provides a
+   * ComponentMask (of size <code>fe_data.n_components()</code>) that
+   * indicates in which vector components this shape function is nonzero
+   * (after mapping the shape function to the real cell). For "primitive"
+   * shape functions, this component mask will have a single entry (see
+   * @ref GlossPrimitive
+   * for more information about primitive elements). On the other hand, for
+   * elements such as the Raviart-Thomas or Nedelec elements, shape functions
+   * are nonzero in more than one vector component (after mapping to the real
+   * cell) and the given component mask will contain more than one entry. (For
+   * these two elements, all entries will in fact be set, but this would not
+   * be the case if you couple a FE_RaviartThomas and a FE_Nedelec together
+   * into a FESystem.)
+   *
+   * @pre <code>restriction_is_additive_flags.size() == dofs_per_cell</code>,
+   * or <code>restriction_is_additive_flags.size() == 1</code>. In the latter
+   * case, the array is simply interpreted as having size
+   * <code>dofs_per_cell</code> where each element has the same value as the
+   * single element given.
+   *
+   * @pre <code>nonzero_components.size() == dofs_per_cell</code>, or
+   * <code>nonzero_components.size() == 1</code>. In the latter case, the
+   * array is simply interpreted as having size <code>dofs_per_cell</code>
+   * where each element equals the component mask provided in the single
+   * element given.
+   */
+  FiniteElement (const FiniteElementData<dim>     &fe_data,
+                 const std::vector<bool>          &restriction_is_additive_flags,
+                 const std::vector<ComponentMask> &nonzero_components);
+
+  /**
+   * Virtual destructor. Makes sure that pointers to this class are deleted
+   * properly.
+   */
+  virtual ~FiniteElement ();
+
+  /**
+   * A sort of virtual copy constructor. Some places in the library, for
+   * example the constructors of FESystem as well as the hp::FECollection
+   * class, need to make copies of finite elements without knowing their exact
+   * type. They do so through this function.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const = 0;
+
+  /**
+   * Return a string that uniquely identifies a finite element. The general
+   * convention is that this is the class name, followed by the dimension in
+   * angle brackets, and the polynomial degree and whatever else is necessary
+   * in parentheses. For example, <tt>FE_Q<2>(3)</tt> is the value returned
+   * for a cubic element in 2d.
+   *
+   * Systems of elements have their own naming convention, see the FESystem
+   * class.
+   */
+  virtual std::string get_name () const = 0;
+
+  /**
+   * This operator returns a reference to the present object if the argument
+   * given equals to zero. While this does not seem particularly useful, it is
+   * helpful in writing code that works with both ::DoFHandler and the hp
+   * version hp::DoFHandler, since one can then write code like this:
+   * @code
+   *   dofs_per_cell
+   *     = dof_handler->get_fe()[cell->active_fe_index()].dofs_per_cell;
+   * @endcode
+   *
+   * This code doesn't work in both situations without the present operator
+   * because DoFHandler::get_fe() returns a finite element, whereas
+   * hp::DoFHandler::get_fe() returns a collection of finite elements that
+   * doesn't offer a <code>dofs_per_cell</code> member variable: one first has
+   * to select which finite element to work on, which is done using the
+   * operator[]. Fortunately, <code>cell-@>active_fe_index()</code> also works
+   * for non-hp classes and simply returns zero in that case. The present
+   * operator[] accepts this zero argument, by returning the finite element
+   * with index zero within its collection (that, of course, consists only of
+   * the present finite element anyway).
+   */
+  const FiniteElement<dim,spacedim> &operator[] (const unsigned int fe_index) const;
+
+  /**
+   * @name Shape function access
+   * @{
+   */
+
+  /**
+   * Return the value of the @p ith shape function at the point @p p. @p p is
+   * a point on the reference element. If the finite element is vector-valued,
+   * then return the value of the only non-zero component of the vector value
+   * of this shape function. If the shape function has more than one non-zero
+   * component (which we refer to with the term non-primitive), then derived
+   * classes implementing this function should throw an exception of type
+   * ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_value_component() function.
+   *
+   * Implementations of this function should throw an exception of type
+   * ExcUnitShapeValuesDoNotExist if the shape functions of the FiniteElement
+   * under consideration depend on the shape of the cell in real space, i.e.,
+   * if the shape functions are not defined by mapping from the reference
+   * cell. Some non-conforming elements are defined this way, as is the
+   * FE_DGPNonparametric class, to name just one example.
+   *
+   * The default implementation of this virtual function does exactly this,
+   * i.e., it simply throws an exception of type ExcUnitShapeValuesDoNotExist.
+   */
+  virtual double shape_value (const unsigned int  i,
+                              const Point<dim>   &p) const;
+
+  /**
+   * Just like for shape_value(), but this function will be called when the
+   * shape function has more than one non-zero vector component. In that case,
+   * this function should return the value of the @p component-th vector
+   * component of the @p ith shape function at point @p p.
+   */
+  virtual double shape_value_component (const unsigned int i,
+                                        const Point<dim>   &p,
+                                        const unsigned int component) const;
+
+  /**
+   * Return the gradient of the @p ith shape function at the point @p p. @p p
+   * is a point on the reference element, and likewise the gradient is the
+   * gradient on the unit cell with respect to unit cell coordinates. If the
+   * finite element is vector-valued, then return the value of the only non-
+   * zero component of the vector value of this shape function. If the shape
+   * function has more than one non-zero component (which we refer to with the
+   * term non-primitive), then derived classes implementing this function
+   * should throw an exception of type ExcShapeFunctionNotPrimitive. In that
+   * case, use the shape_grad_component() function.
+   *
+   * Implementations of this function should throw an exception of type
+   * ExcUnitShapeValuesDoNotExist if the shape functions of the FiniteElement
+   * under consideration depend on the shape of the cell in real space, i.e.,
+   * if the shape functions are not defined by mapping from the reference
+   * cell. Some non-conforming elements are defined this way, as is the
+   * FE_DGPNonparametric class, to name just one example.
+   *
+   * The default implementation of this virtual function does exactly this,
+   * i.e., it simply throws an exception of type ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<1,dim> shape_grad (const unsigned int  i,
+                                    const Point<dim>   &p) const;
+
+  /**
+   * Just like for shape_grad(), but this function will be called when the
+   * shape function has more than one non-zero vector component. In that case,
+   * this function should return the gradient of the @p component-th vector
+   * component of the @p ith shape function at point @p p.
+   */
+  virtual Tensor<1,dim> shape_grad_component (const unsigned int i,
+                                              const Point<dim>   &p,
+                                              const unsigned int component) const;
+
+  /**
+   * Return the tensor of second derivatives of the @p ith shape function at
+   * point @p p on the unit cell. The derivatives are derivatives on the unit
+   * cell with respect to unit cell coordinates. If the finite element is
+   * vector-valued, then return the value of the only non-zero component of
+   * the vector value of this shape function. If the shape function has more
+   * than one non-zero component (which we refer to with the term non-
+   * primitive), then derived classes implementing this function should throw
+   * an exception of type ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_grad_grad_component() function.
+   *
+   * Implementations of this function should throw an exception of type
+   * ExcUnitShapeValuesDoNotExist if the shape functions of the FiniteElement
+   * under consideration depend on the shape of the cell in real space, i.e.,
+   * if the shape functions are not defined by mapping from the reference
+   * cell. Some non-conforming elements are defined this way, as is the
+   * FE_DGPNonparametric class, to name just one example.
+   *
+   * The default implementation of this virtual function does exactly this,
+   * i.e., it simply throws an exception of type ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<2,dim> shape_grad_grad (const unsigned int  i,
+                                         const Point<dim>   &p) const;
+
+  /**
+   * Just like for shape_grad_grad(), but this function will be called when
+   * the shape function has more than one non-zero vector component. In that
+   * case, this function should return the gradient of the @p component-th
+   * vector component of the @p ith shape function at point @p p.
+   */
+  virtual Tensor<2,dim> shape_grad_grad_component (const unsigned int i,
+                                                   const Point<dim>   &p,
+                                                   const unsigned int component) const;
+
+  /**
+   * Return the tensor of third derivatives of the @p ith shape function at
+   * point @p p on the unit cell. The derivatives are derivatives on the unit
+   * cell with respect to unit cell coordinates. If the finite element is
+   * vector-valued, then return the value of the only non-zero component of
+   * the vector value of this shape function. If the shape function has more
+   * than one non-zero component (which we refer to with the term non-
+   * primitive), then derived classes implementing this function should throw
+   * an exception of type ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_3rd_derivative_component() function.
+   *
+   * Implementations of this function should throw an exception of type
+   * ExcUnitShapeValuesDoNotExist if the shape functions of the FiniteElement
+   * under consideration depend on the shape of the cell in real space, i.e.,
+   * if the shape functions are not defined by mapping from the reference
+   * cell. Some non-conforming elements are defined this way, as is the
+   * FE_DGPNonparametric class, to name just one example.
+   *
+   * The default implementation of this virtual function does exactly this,
+   * i.e., it simply throws an exception of type ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<3,dim> shape_3rd_derivative (const unsigned int  i,
+                                              const Point<dim>   &p) const;
+
+  /**
+   * Just like for shape_3rd_derivative(), but this function will be called
+   * when the shape function has more than one non-zero vector component. In
+   * that case, this function should return the gradient of the @p component-
+   * th vector component of the @p ith shape function at point @p p.
+   */
+  virtual Tensor<3,dim> shape_3rd_derivative_component (const unsigned int i,
+                                                        const Point<dim>   &p,
+                                                        const unsigned int component) const;
+
+  /**
+   * Return the tensor of fourth derivatives of the @p ith shape function at
+   * point @p p on the unit cell. The derivatives are derivatives on the unit
+   * cell with respect to unit cell coordinates. If the finite element is
+   * vector-valued, then return the value of the only non-zero component of
+   * the vector value of this shape function. If the shape function has more
+   * than one non-zero component (which we refer to with the term non-
+   * primitive), then derived classes implementing this function should throw
+   * an exception of type ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_4th_derivative_component() function.
+   *
+   * Implementations of this function should throw an exception of type
+   * ExcUnitShapeValuesDoNotExist if the shape functions of the FiniteElement
+   * under consideration depend on the shape of the cell in real space, i.e.,
+   * if the shape functions are not defined by mapping from the reference
+   * cell. Some non-conforming elements are defined this way, as is the
+   * FE_DGPNonparametric class, to name just one example.
+   *
+   * The default implementation of this virtual function does exactly this,
+   * i.e., it simply throws an exception of type ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<4,dim> shape_4th_derivative (const unsigned int  i,
+                                              const Point<dim>   &p) const;
+
+  /**
+   * Just like for shape_4th_derivative(), but this function will be called
+   * when the shape function has more than one non-zero vector component. In
+   * that case, this function should return the gradient of the @p component-
+   * th vector component of the @p ith shape function at point @p p.
+   */
+  virtual Tensor<4,dim> shape_4th_derivative_component (const unsigned int i,
+                                                        const Point<dim>   &p,
+                                                        const unsigned int component) const;
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index. The
+   * function is typically used to determine whether some matrix elements
+   * resulting from face integrals can be assumed to be zero and may therefore
+   * be omitted from integration.
+   *
+   * A default implementation is provided in this base class which always
+   * returns @p true. This is the safe way to go.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  //@}
+  /**
+   * @name Transfer and constraint matrices
+   * @{
+   */
+
+  /**
+   * Return the matrix that describes restricting a finite element field from
+   * the given @p child (as obtained by the given @p refinement_case) to the
+   * parent cell. The interpretation of the returned matrix depends on what
+   * restriction_is_additive() returns for each shape function.
+   *
+   * Row and column indices are related to coarse grid and fine grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * If projection matrices are not implemented in the derived finite element
+   * class, this function aborts with an exception of type
+   * FiniteElement::ExcProjectionVoid. You can check whether this would happen
+   * by first calling the restriction_is_implemented() or the
+   * isotropic_restriction_is_implemented() function.
+   */
+  virtual const FullMatrix<double> &
+  get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Prolongation/embedding matrix between grids.
+   *
+   * The identity operator from a coarse grid space into a fine grid space
+   * (where both spaces are identified as functions defined on the parent and
+   * child cells) is associated with a matrix @p P that maps the corresponding
+   * representations of these functions in terms of their nodal values. The
+   * restriction of this matrix @p P_i to a single child cell is returned
+   * here.
+   *
+   * The matrix @p P is the concatenation, not the sum of the cell matrices @p
+   * P_i. That is, if the same non-zero entry <tt>j,k</tt> exists in in two
+   * different child matrices @p P_i, the value should be the same in both
+   * matrices and it is copied into the matrix @p P only once.
+   *
+   * Row and column indices are related to fine grid and coarse grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * These matrices are used by routines assembling the prolongation matrix
+   * for multi-level methods.  Upon assembling the transfer matrix between
+   * cells using this matrix array, zero elements in the prolongation matrix
+   * are discarded and will not fill up the transfer matrix.
+   *
+   * If prolongation matrices are not implemented in the derived finite
+   * element class, this function aborts with an exception of type
+   * FiniteElement::ExcEmbeddingVoid. You can check whether this would happen
+   * by first calling the prolongation_is_implemented() or the
+   * isotropic_prolongation_is_implemented() function.
+   */
+  virtual const FullMatrix<double> &
+  get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Return whether this element implements its prolongation matrices. The
+   * return value also indicates whether a call to the
+   * get_prolongation_matrix() function will generate an error or not.
+   *
+   * Note, that this function returns <code>true</code> only if the
+   * prolongation matrices of the isotropic and all anisotropic refinement
+   * cases are implemented. If you are interested in the prolongation matrices
+   * for isotropic refinement only, use the
+   * isotropic_prolongation_is_implemented function instead.
+   *
+   * This function is mostly here in order to allow us to write more efficient
+   * test programs which we run on all kinds of weird elements, and for which
+   * we simply need to exclude certain tests in case something is not
+   * implemented. It will in general probably not be a great help in
+   * applications, since there is not much one can do if one needs these
+   * features and they are not implemented. This function could be used to
+   * check whether a call to <tt>get_prolongation_matrix()</tt> will succeed;
+   * however, one then still needs to cope with the lack of information this
+   * just expresses.
+   */
+  bool prolongation_is_implemented () const;
+
+  /**
+   * Return whether this element implements its prolongation matrices for
+   * isotropic children. The return value also indicates whether a call to the
+   * @p get_prolongation_matrix function will generate an error or not.
+   *
+   * This function is mostly here in order to allow us to write more efficient
+   * test programs which we run on all kinds of weird elements, and for which
+   * we simply need to exclude certain tests in case something is not
+   * implemented. It will in general probably not be a great help in
+   * applications, since there is not much one can do if one needs these
+   * features and they are not implemented. This function could be used to
+   * check whether a call to <tt>get_prolongation_matrix()</tt> will succeed;
+   * however, one then still needs to cope with the lack of information this
+   * just expresses.
+   */
+  bool isotropic_prolongation_is_implemented () const;
+
+  /**
+   * Return whether this element implements its restriction matrices. The
+   * return value also indicates whether a call to the
+   * get_restriction_matrix() function will generate an error or not.
+   *
+   * Note, that this function returns <code>true</code> only if the
+   * restriction matrices of the isotropic and all anisotropic refinement
+   * cases are implemented. If you are interested in the restriction matrices
+   * for isotropic refinement only, use the
+   * isotropic_restriction_is_implemented() function instead.
+   *
+   * This function is mostly here in order to allow us to write more efficient
+   * test programs which we run on all kinds of weird elements, and for which
+   * we simply need to exclude certain tests in case something is not
+   * implemented. It will in general probably not be a great help in
+   * applications, since there is not much one can do if one needs these
+   * features and they are not implemented. This function could be used to
+   * check whether a call to <tt>get_restriction_matrix()</tt> will succeed;
+   * however, one then still needs to cope with the lack of information this
+   * just expresses.
+   */
+  bool restriction_is_implemented () const;
+
+  /**
+   * Return whether this element implements its restriction matrices for
+   * isotropic children. The return value also indicates whether a call to the
+   * get_restriction_matrix() function will generate an error or not.
+   *
+   * This function is mostly here in order to allow us to write more efficient
+   * test programs which we run on all kinds of weird elements, and for which
+   * we simply need to exclude certain tests in case something is not
+   * implemented. It will in general probably not be a great help in
+   * applications, since there is not much one can do if one needs these
+   * features and they are not implemented. This function could be used to
+   * check whether a call to <tt>get_restriction_matrix()</tt> will succeed;
+   * however, one then still needs to cope with the lack of information this
+   * just expresses.
+   */
+  bool isotropic_restriction_is_implemented () const;
+
+
+  /**
+   * Access the #restriction_is_additive_flags field. See the discussion about
+   * restriction matrices in the general class documentation for more
+   * information.
+   *
+   * The index must be between zero and the number of shape functions of this
+   * element.
+   */
+  bool restriction_is_additive (const unsigned int index) const;
+
+  /**
+   * Return a read only reference to the matrix that describes the constraints
+   * at the interface between a refined and an unrefined cell.
+   *
+   * Some finite elements do not (yet) implement hanging node constraints. If
+   * this is the case, then this function will generate an exception, since no
+   * useful return value can be generated. If you should have a way to live
+   * with this, then you might want to use the constraints_are_implemented()
+   * function to check up front whether this function will succeed or generate
+   * the exception.
+   */
+  const FullMatrix<double> &constraints (const dealii::internal::SubfaceCase<dim> &subface_case=dealii::internal::SubfaceCase<dim>::case_isotropic) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints. The
+   * return value also indicates whether a call to the constraints() function
+   * will generate an error or not.
+   *
+   * This function is mostly here in order to allow us to write more efficient
+   * test programs which we run on all kinds of weird elements, and for which
+   * we simply need to exclude certain tests in case hanging node constraints
+   * are not implemented. It will in general probably not be a great help in
+   * applications, since there is not much one can do if one needs hanging
+   * node constraints and they are not implemented. This function could be
+   * used to check whether a call to <tt>constraints()</tt> will succeed;
+   * however, one then still needs to cope with the lack of information this
+   * just expresses.
+   */
+  bool constraints_are_implemented (const dealii::internal::SubfaceCase<dim> &subface_case=dealii::internal::SubfaceCase<dim>::case_isotropic) const;
+
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".  That
+   * means, the element properly implements the get_face_interpolation_matrix
+   * and get_subface_interpolation_matrix methods. Therefore the return value
+   * also indicates whether a call to the get_face_interpolation_matrix()
+   * method and the get_subface_interpolation_matrix() method will generate an
+   * error or not.
+   *
+   * Currently the main purpose of this function is to allow the
+   * make_hanging_node_constraints method to decide whether the new
+   * procedures, which are supposed to work in the hp framework can be used,
+   * or if the old well verified but not hp capable functions should be used.
+   * Once the transition to the new scheme for computing the interface
+   * constraints is complete, this function will be superfluous and will
+   * probably go away.
+   *
+   * Derived classes should implement this function accordingly. The default
+   * assumption is that a finite element does not provide hp capable face
+   * interpolation, and the default implementation therefore returns @p false.
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one. The size of the matrix is then #dofs_per_cell times
+   * <tt>source.#dofs_per_cell</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                            FullMatrix<double>       &matrix) const;
+  //@}
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.#dofs_per_face</tt> times <tt>this->#dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the
+   * subface of the neighboring element.  The size of the matrix is then
+   * <tt>source.#dofs_per_face</tt> times <tt>this->#dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+  //@}
+
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  //@}
+
+  /**
+   * Comparison operator. We also check for equality of the constraint matrix,
+   * which is quite an expensive operation.  Do therefore use this function
+   * with care, if possible only for debugging purposes.
+   *
+   * Since this function is not that important, we avoid an implementational
+   * question about comparing arrays and do not compare the matrix arrays
+   * #restriction and #prolongation.
+   */
+  bool operator == (const FiniteElement<dim,spacedim> &) const;
+
+  /**
+   * @name Index computations
+   * @{
+   */
+  /**
+   * Compute vector component and index of this shape function within the
+   * shape functions corresponding to this component from the index of a shape
+   * function within this finite element.
+   *
+   * If the element is scalar, then the component is always zero, and the
+   * index within this component is equal to the overall index.
+   *
+   * If the shape function referenced has more than one non-zero component,
+   * then it cannot be associated with one vector component, and an exception
+   * of type ExcShapeFunctionNotPrimitive will be raised.
+   *
+   * Note that if the element is composed of other (base) elements, and a base
+   * element has more than one component but all its shape functions are
+   * primitive (i.e. are non-zero in only one component), then this mapping
+   * contains valid information. However, the index of a shape function of
+   * this element within one component (i.e. the second number of the
+   * respective entry of this array) does not indicate the index of the
+   * respective shape function within the base element (since that has more
+   * than one vector-component). For this information, refer to the
+   * #system_to_base_table field and the system_to_base_index() function.
+   *
+   * The use of this function is explained extensively in the step-8 and
+   * @ref step_20 "step-20"
+   * tutorial programs as well as in the
+   * @ref vector_valued
+   * module.
+   */
+  std::pair<unsigned int, unsigned int>
+  system_to_component_index (const unsigned int index) const;
+
+  /**
+   * Compute the shape function for the given vector component and index.
+   *
+   * If the element is scalar, then the component must be zero, and the index
+   * within this component is equal to the overall index.
+   *
+   * This is the opposite operation from the system_to_component_index()
+   * function.
+   */
+  unsigned int component_to_system_index(const unsigned int component,
+                                         const unsigned int index) const;
+
+  /**
+   * Same as system_to_component_index(), but do it for shape functions and
+   * their indices on a face. The range of allowed indices is therefore
+   * 0..#dofs_per_face.
+   *
+   * You will rarely need this function in application programs, since almost
+   * all application codes only need to deal with cell indices, not face
+   * indices. The function is mainly there for use inside the library.
+   */
+  std::pair<unsigned int, unsigned int>
+  face_system_to_component_index (const unsigned int index) const;
+
+  /**
+   * For faces with non-standard face_orientation in 3D, the dofs on faces
+   * (quads) have to be permuted in order to be combined with the correct
+   * shape functions. Given a local dof @p index on a quad, return the local
+   * index, if the face has non-standard face_orientation, face_flip or
+   * face_rotation. In 2D and 1D there is no need for permutation and
+   * consequently an exception is thrown.
+   */
+  unsigned int adjust_quad_dof_index_for_face_orientation (const unsigned int index,
+                                                           const bool face_orientation,
+                                                           const bool face_flip,
+                                                           const bool face_rotation) const;
+
+  /**
+   * Given an index in the natural ordering of indices on a face, return the
+   * index of the same degree of freedom on the cell.
+   *
+   * To explain the concept, consider the case where we would like to know
+   * whether a degree of freedom on a face, for example as part of an FESystem
+   * element, is primitive. Unfortunately, the is_primitive() function in the
+   * FiniteElement class takes a cell index, so we would need to find the cell
+   * index of the shape function that corresponds to the present face index.
+   * This function does that.
+   *
+   * Code implementing this would then look like this:
+   * @code
+   * for (i=0; i<dofs_per_face; ++i)
+   *  if (fe.is_primitive(fe.face_to_equivalent_cell_index(i, some_face_no)))
+   *   ... do whatever
+   * @endcode
+   * The function takes additional arguments that account for the fact that
+   * actual faces can be in their standard ordering with respect to the cell
+   * under consideration, or can be flipped, oriented, etc.
+   *
+   * @param face_dof_index The index of the degree of freedom on a face. This
+   * index must be between zero and dofs_per_face.
+   * @param face The number of the face this degree of freedom lives on. This
+   * number must be between zero and GeometryInfo::faces_per_cell.
+   * @param face_orientation One part of the description of the orientation of
+   * the face. See
+   * @ref GlossFaceOrientation.
+   * @param face_flip One part of the description of the orientation of the
+   * face. See
+   * @ref GlossFaceOrientation.
+   * @param face_rotation One part of the description of the orientation of
+   * the face. See
+   * @ref GlossFaceOrientation.
+   * @return The index of this degree of freedom within the set of degrees of
+   * freedom on the entire cell. The returned value will be between zero and
+   * dofs_per_cell.
+   *
+   * @note This function exists in this class because that is where it was
+   * first implemented. However, it can't really work in the most general case
+   * without knowing what element we have. The reason is that when a face is
+   * flipped or rotated, we also need to know whether we need to swap the
+   * degrees of freedom on this face, or whether they are immune from this.
+   * For this, consider the situation of a $Q_3$ element in 2d. If face_flip
+   * is true, then we need to consider the two degrees of freedom on the edge
+   * in reverse order. On the other hand, if the element were a $Q_1^2$, then
+   * because the two degrees of freedom on this edge belong to different
+   * vector components, they should not be considered in reverse order. What
+   * all of this shows is that the function can't work if there are more than
+   * one degree of freedom per line or quad, and that in these cases the
+   * function will throw an exception pointing out that this functionality
+   * will need to be provided by a derived class that knows what degrees of
+   * freedom actually represent.
+   */
+  virtual
+  unsigned int face_to_cell_index (const unsigned int face_dof_index,
+                                   const unsigned int face,
+                                   const bool face_orientation = true,
+                                   const bool face_flip        = false,
+                                   const bool face_rotation    = false) const;
+
+  /**
+   * For lines with non-standard line_orientation in 3D, the dofs on lines
+   * have to be permuted in order to be combined with the correct shape
+   * functions. Given a local dof @p index on a line, return the local index,
+   * if the line has non-standard line_orientation. In 2D and 1D there is no
+   * need for permutation, so the given index is simply returned.
+   */
+  unsigned int adjust_line_dof_index_for_line_orientation (const unsigned int index,
+                                                           const bool line_orientation) const;
+
+  /**
+   * Return in which of the vector components of this finite element the @p
+   * ith shape function is non-zero. The length of the returned array is equal
+   * to the number of vector components of this element.
+   *
+   * For most finite element spaces, the result of this function will be a
+   * vector with exactly one element being @p true, since for most spaces the
+   * individual vector components are independent. In that case, the component
+   * with the single zero is also the first element of what
+   * system_to_component_index() returns.
+   *
+   * Only for those spaces that couple the components, for example to make a
+   * shape function divergence free, will there be more than one @p true
+   * entry.  Elements for which this is true are called non-primitive (see
+   * @ref GlossPrimitive).
+   */
+  const ComponentMask &
+  get_nonzero_components (const unsigned int i) const;
+
+  /**
+   * Return in how many vector components the @p ith shape function is non-
+   * zero. This value equals the number of entries equal to @p true in the
+   * result of the get_nonzero_components() function.
+   *
+   * For most finite element spaces, the result will be equal to one. It is
+   * not equal to one only for those ansatz spaces for which vector-valued
+   * shape functions couple the individual components, for example in order to
+   * make them divergence-free.
+   */
+  unsigned int
+  n_nonzero_components (const unsigned int i) const;
+
+  /**
+   * Return whether the @p ith shape function is primitive in the sense that
+   * the shape function is non-zero in only one vector component. Non-
+   * primitive shape functions would then, for example, be those of divergence
+   * free ansatz spaces, in which the individual vector components are
+   * coupled.
+   *
+   * The result of the function is @p true if and only if the result of
+   * <tt>n_nonzero_components(i)</tt> is equal to one.
+   */
+  bool
+  is_primitive (const unsigned int i) const;
+
+  /**
+   * Import function that is overloaded by the one above and would otherwise
+   * be hidden.
+   */
+  using FiniteElementData<dim>::is_primitive;
+
+  /**
+   * Number of base elements in a mixed discretization.
+   *
+   * Note that even for vector valued finite elements, the number of
+   * components needs not coincide with the number of base elements, since
+   * they may be reused. For example, if you create a FESystem with three
+   * identical finite element classes by using the constructor that takes one
+   * finite element and a multiplicity, then the number of base elements is
+   * still one, although the number of components of the finite element is
+   * equal to the multiplicity.
+   */
+  unsigned int n_base_elements () const;
+
+  /**
+   * Access to base element objects. If the element is atomic, then
+   * <code>base_element(0)</code> is @p this.
+   */
+  virtual
+  const FiniteElement<dim,spacedim> &
+  base_element (const unsigned int index) const;
+
+  /**
+   * This index denotes how often the base element @p index is used in a
+   * composed element. If the element is atomic, then the result is always
+   * equal to one. See the documentation for the n_base_elements() function
+   * for more details.
+   */
+  unsigned int
+  element_multiplicity (const unsigned int index) const;
+
+  /**
+   * Return for shape function @p index the base element it belongs to, the
+   * number of the copy of this base element (which is between zero and the
+   * multiplicity of this element), and the index of this shape function
+   * within this base element.
+   *
+   * If the element is not composed of others, then base and instance are
+   * always zero, and the index is equal to the number of the shape function.
+   * If the element is composed of single instances of other elements (i.e.
+   * all with multiplicity one) all of which are scalar, then base values and
+   * dof indices within this element are equal to the
+   * #system_to_component_table. It differs only in case the element is
+   * composed of other elements and at least one of them is vector-valued
+   * itself.
+   *
+   * This function returns valid values also in the case of vector-valued
+   * (i.e. non-primitive) shape functions, in contrast to the
+   * system_to_component_index() function.
+   */
+  std::pair<std::pair<unsigned int, unsigned int>, unsigned int>
+  system_to_base_index (const unsigned int index) const;
+
+  /**
+   * Same as system_to_base_index(), but for degrees of freedom located on a
+   * face. The range of allowed indices is therefore 0..#dofs_per_face.
+   *
+   * You will rarely need this function in application programs, since almost
+   * all application codes only need to deal with cell indices, not face
+   * indices. The function is mainly there for use inside the library.
+   */
+  std::pair<std::pair<unsigned int, unsigned int>, unsigned int>
+  face_system_to_base_index (const unsigned int index) const;
+
+  /**
+   * Given a base element number, return the first block of a BlockVector it
+   * would generate.
+   */
+  types::global_dof_index first_block_of_base (const unsigned int b) const;
+
+  /**
+   * For each vector component, return which base element implements this
+   * component and which vector component in this base element this is. This
+   * information is only of interest for vector-valued finite elements which
+   * are composed of several sub-elements. In that case, one may want to
+   * obtain information about the element implementing a certain vector
+   * component, which can be done using this function and the
+   * FESystem::base_element() function.
+   *
+   * If this is a scalar finite element, then the return value is always equal
+   * to a pair of zeros.
+   */
+  std::pair<unsigned int, unsigned int>
+  component_to_base_index (const unsigned int component) const;
+
+
+  /**
+   * Return the base element for this block and the number of the copy of the
+   * base element.
+   */
+  std::pair<unsigned int,unsigned int>
+  block_to_base_index (const unsigned int block) const;
+
+  /**
+   * The vector block and the index inside the block for this shape function.
+   */
+  std::pair<unsigned int,types::global_dof_index>
+  system_to_block_index (const unsigned int component) const;
+
+  /**
+   * The vector block for this component.
+   */
+  unsigned int
+  component_to_block_index (const unsigned int component) const;
+
+  //@}
+
+  /**
+   * @name Component and block matrices
+   * @{
+   */
+
+  /**
+   * Return a component mask with as many elements as this object has vector
+   * components and of which exactly the one component is true that
+   * corresponds to the given argument. See
+   * @ref GlossComponentMask "the glossary"
+   * for more information.
+   *
+   * @param scalar An object that represents a single scalar vector component
+   * of this finite element.
+   * @return A component mask that is false in all components except for the
+   * one that corresponds to the argument.
+   */
+  ComponentMask
+  component_mask (const FEValuesExtractors::Scalar &scalar) const;
+
+  /**
+   * Return a component mask with as many elements as this object has vector
+   * components and of which exactly the <code>dim</code> components are true
+   * that correspond to the given argument. See
+   * @ref GlossComponentMask "the glossary"
+   * for more information.
+   *
+   * @param vector An object that represents dim vector components of this
+   * finite element.
+   * @return A component mask that is false in all components except for the
+   * ones that corresponds to the argument.
+   */
+  ComponentMask
+  component_mask (const FEValuesExtractors::Vector &vector) const;
+
+  /**
+   * Return a component mask with as many elements as this object has vector
+   * components and of which exactly the <code>dim*(dim+1)/2</code> components
+   * are true that correspond to the given argument. See
+   * @ref GlossComponentMask "the glossary"
+   * for more information.
+   *
+   * @param sym_tensor An object that represents dim*(dim+1)/2 components of
+   * this finite element that are jointly to be interpreted as forming a
+   * symmetric tensor.
+   * @return A component mask that is false in all components except for the
+   * ones that corresponds to the argument.
+   */
+  ComponentMask
+  component_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const;
+
+  /**
+   * Given a block mask (see
+   * @ref GlossBlockMask "this glossary entry"),
+   * produce a component mask (see
+   * @ref GlossComponentMask "this glossary entry")
+   * that represents the components that correspond to the blocks selected in
+   * the input argument. This is essentially a conversion operator from
+   * BlockMask to ComponentMask.
+   *
+   * @param block_mask The mask that selects individual blocks of the finite
+   * element
+   * @return A mask that selects those components corresponding to the
+   * selected blocks of the input argument.
+   */
+  ComponentMask
+  component_mask (const BlockMask &block_mask) const;
+
+  /**
+   * Return a block mask with as many elements as this object has blocks and
+   * of which exactly the one component is true that corresponds to the given
+   * argument. See
+   * @ref GlossBlockMask "the glossary"
+   * for more information.
+   *
+   * @note This function will only succeed if the scalar referenced by the
+   * argument encompasses a complete block. In other words, if, for example,
+   * you pass an extractor for the single $x$ velocity and this object
+   * represents an FE_RaviartThomas object, then the single scalar object you
+   * selected is part of a larger block and consequently there is no block
+   * mask that would represent it. The function will then produce an
+   * exception.
+   *
+   * @param scalar An object that represents a single scalar vector component
+   * of this finite element.
+   * @return A component mask that is false in all components except for the
+   * one that corresponds to the argument.
+   */
+  BlockMask
+  block_mask (const FEValuesExtractors::Scalar &scalar) const;
+
+  /**
+   * Return a component mask with as many elements as this object has vector
+   * components and of which exactly the <code>dim</code> components are true
+   * that correspond to the given argument. See
+   * @ref GlossBlockMask "the glossary"
+   * for more information.
+   *
+   * @note The same caveat applies as to the version of the function above:
+   * The extractor object passed as argument must be so that it corresponds to
+   * full blocks and does not split blocks of this element.
+   *
+   * @param vector An object that represents dim vector components of this
+   * finite element.
+   * @return A component mask that is false in all components except for the
+   * ones that corresponds to the argument.
+   */
+  BlockMask
+  block_mask (const FEValuesExtractors::Vector &vector) const;
+
+  /**
+   * Return a component mask with as many elements as this object has vector
+   * components and of which exactly the <code>dim*(dim+1)/2</code> components
+   * are true that correspond to the given argument. See
+   * @ref GlossBlockMask "the glossary"
+   * for more information.
+   *
+   * @note The same caveat applies as to the version of the function above:
+   * The extractor object passed as argument must be so that it corresponds to
+   * full blocks and does not split blocks of this element.
+   *
+   * @param sym_tensor An object that represents dim*(dim+1)/2 components of
+   * this finite element that are jointly to be interpreted as forming a
+   * symmetric tensor.
+   * @return A component mask that is false in all components except for the
+   * ones that corresponds to the argument.
+   */
+  BlockMask
+  block_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const;
+
+  /**
+   * Given a component mask (see
+   * @ref GlossComponentMask "this glossary entry"),
+   * produce a block mask (see
+   * @ref GlossBlockMask "this glossary entry")
+   * that represents the blocks that correspond to the components selected in
+   * the input argument. This is essentially a conversion operator from
+   * ComponentMask to BlockMask.
+   *
+   * @note This function will only succeed if the components referenced by the
+   * argument encompasses complete blocks. In other words, if, for example,
+   * you pass an component mask for the single $x$ velocity and this object
+   * represents an FE_RaviartThomas object, then the single component you
+   * selected is part of a larger block and consequently there is no block
+   * mask that would represent it. The function will then produce an
+   * exception.
+   *
+   * @param component_mask The mask that selects individual components of the
+   * finite element
+   * @return A mask that selects those blocks corresponding to the selected
+   * blocks of the input argument.
+   */
+  BlockMask
+  block_mask (const ComponentMask &component_mask) const;
+
+  /**
+   * Returns a list of constant modes of the element. The number of rows in
+   * the resulting table depends on the elements in use. For standard
+   * elements, the table has as many rows as there are components in the
+   * element and dofs_per_cell columns. To each component of the finite
+   * element, the row in the returned table contains a basis representation of
+   * the constant function 1 on the element. However, there are some scalar
+   * elements where there is more than one constant mode, e.g. the element
+   * FE_Q_DG0.
+   *
+   * In order to match the constant modes to the actual components in the
+   * element, the returned data structure also returns a vector with as many
+   * components as there are constant modes on the element that contains the
+   * component number.
+   */
+  virtual std::pair<Table<2,bool>,std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  //@}
+
+  /**
+   * @name Support points and interpolation
+   * @{
+   */
+
+  /**
+   * Return the support points of the trial functions on the unit cell, if the
+   * derived finite element defines them.  Finite elements that allow some
+   * kind of interpolation operation usually have support points. On the other
+   * hand, elements that define their degrees of freedom by, for example,
+   * moments on faces, or as derivatives, don't have support points. In that
+   * case, the returned field is empty.
+   *
+   * If the finite element defines support points, then their number equals
+   * the number of degrees of freedom of the element.  The order of points in
+   * the array matches that returned by the <tt>cell->get_dof_indices</tt>
+   * function.
+   *
+   * See the class documentation for details on support points.
+   *
+   * @note Finite elements' implementation of this function returns these
+   * points in the same order as shape functions. The order of shape functions
+   * is typically documented in the class documentation of the various finite
+   * element classes. In particular, shape functions (and consequently the
+   * mapped quadrature points discussed in the class documentation of this
+   * class) will then traverse first those shape functions located on
+   * vertices, then on lines, then on quads, etc.
+   *
+   * @note If this element implements support points, then it will return one
+   * such point per shape function. Since multiple shape functions may be
+   * defined at the same location, the support points returned here may be
+   * duplicated. An example would be an element of the kind
+   * <code>FESystem(FE_Q(1),3)</code> for which each support point would
+   * appear three times in the returned array.
+   */
+  const std::vector<Point<dim> > &
+  get_unit_support_points () const;
+
+  /**
+   * Return whether a finite element has defined support points. If the result
+   * is true, then a call to the get_unit_support_points() yields a non-empty
+   * array.
+   *
+   * The result may be false if an element is not defined by interpolating
+   * shape functions, for example by P-elements on quadrilaterals. It will
+   * usually only be true if the element constructs its shape functions by the
+   * requirement that they be one at a certain point and zero at all the
+   * points associated with the other shape functions.
+   *
+   * In composed elements (i.e. for the FESystem class), the result will be
+   * true if all all the base elements have defined support points. FE_Nothing
+   * is a special case in FESystems, because it has 0 support points and
+   * has_support_points() is false, but an FESystem containing an FE_Nothing
+   * among other elements will return true.
+   */
+  bool has_support_points () const;
+
+  /**
+   * Return the position of the support point of the @p indexth shape
+   * function. If it does not exist, raise an exception.
+   *
+   * The default implementation simply returns the respective element from the
+   * array you get from get_unit_support_points(), but derived elements may
+   * overload this function. In particular, note that the FESystem class
+   * overloads it so that it can return the support points of individual base
+   * elements, if not all the base elements define support points. In this
+   * way, you can still ask for certain support points, even if
+   * get_unit_support_points() only returns an empty array.
+   */
+  virtual
+  Point<dim>
+  unit_support_point (const unsigned int index) const;
+
+  /**
+   * Return the support points of the trial functions on the unit face, if the
+   * derived finite element defines some.  Finite elements that allow some
+   * kind of interpolation operation usually have support points. On the other
+   * hand, elements that define their degrees of freedom by, for example,
+   * moments on faces, or as derivatives, don't have support points. In that
+   * case, the returned field is empty
+   *
+   * Note that elements that have support points need not necessarily have
+   * some on the faces, even if the interpolation points are located
+   * physically on a face. For example, the discontinuous elements have
+   * interpolation points on the vertices, and for higher degree elements also
+   * on the faces, but they are not defined to be on faces since in that case
+   * degrees of freedom from both sides of a face (or from all adjacent
+   * elements to a vertex) would be identified with each other, which is not
+   * what we would like to have). Logically, these degrees of freedom are
+   * therefore defined to belong to the cell, rather than the face or vertex.
+   * In that case, the returned element would therefore have length zero.
+   *
+   * If the finite element defines support points, then their number equals
+   * the number of degrees of freedom on the face (#dofs_per_face). The order
+   * of points in the array matches that returned by the
+   * <tt>cell->get_dof_indices</tt> function.
+   *
+   * See the class documentation for details on support points.
+   */
+  const std::vector<Point<dim-1> > &
+  get_unit_face_support_points () const;
+
+  /**
+   * Return whether a finite element has defined support points on faces. If
+   * the result is true, then a call to the get_unit_face_support_points()
+   * yields a non-empty array.
+   *
+   * For more information, see the documentation for the has_support_points()
+   * function.
+   */
+  bool has_face_support_points () const;
+
+  /**
+   * The function corresponding to the unit_support_point() function, but for
+   * faces. See there for more information.
+   */
+  virtual
+  Point<dim-1>
+  unit_face_support_point (const unsigned int index) const;
+
+  /**
+   * Return a support point vector for generalized interpolation.
+   *
+   * See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized points"
+   * for more information.
+   */
+  const std::vector<Point<dim> > &
+  get_generalized_support_points () const;
+
+  /**
+   * Returns <tt>true</tt> if the class provides nonempty vectors either from
+   * get_unit_support_points() or get_generalized_support_points().
+   *
+   * See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+   * for more information.
+   */
+  bool has_generalized_support_points () const;
+
+  /**
+   *
+   */
+  const std::vector<Point<dim-1> > &
+  get_generalized_face_support_points () const;
+
+  /**
+   * Return whether a finite element has defined generalized support points on
+   * faces. If the result is true, then a call to the
+   * get_generalized_face_support_points yields a non-empty array.
+   *
+   * For more information, see the documentation for the has_support_points()
+   * function.
+   */
+  bool
+  has_generalized_face_support_points () const;
+
+  /**
+   * For a given degree of freedom, return whether it is logically associated
+   * with a vertex, line, quad or hex.
+   *
+   * For instance, for continuous finite elements this coincides with the
+   * lowest dimensional object the support point of the degree of freedom lies
+   * on. To give an example, for $Q_1$ elements in 3d, every degree of freedom
+   * is defined by a shape function that we get by interpolating using support
+   * points that lie on the vertices of the cell. The support of these points
+   * of course extends to all edges connected to this vertex, as well as the
+   * adjacent faces and the cell interior, but we say that logically the
+   * degree of freedom is associated with the vertex as this is the lowest-
+   * dimensional object it is associated with. Likewise, for $Q_2$ elements in
+   * 3d, the degrees of freedom with support points at edge midpoints would
+   * yield a value of GeometryPrimitive::line from this function, whereas
+   * those on the centers of faces in 3d would return GeometryPrimitive::quad.
+   *
+   * To make this more formal, the kind of object returned by this function
+   * represents the object so that the support of the shape function
+   * corresponding to the degree of freedom, (i.e., that part of the domain
+   * where the function "lives") is the union of all of the cells sharing this
+   * object. To return to the example above, for $Q_2$ in 3d, the shape
+   * function with support point at an edge midpoint has support on all cells
+   * that share the edge and not only the cells that share the adjacent faces,
+   * and consequently the function will return GeometryPrimitive::line.
+   *
+   * On the other hand, for discontinuous elements of type $DGQ_2$, a degree
+   * of freedom associated with an interpolation polynomial that has its
+   * support point physically located at a line bounding a cell, but is
+   * nonzero only on one cell. Consequently, it is logically associated with
+   * the interior of that cell (i.e., with a GeometryPrimitive::quad in 2d and
+   * a GeometryPrimitive::hex in 3d).
+   *
+   * @param[in] cell_dof_index The index of a shape function or degree of
+   * freedom. This index must be in the range <code>[0,dofs_per_cell)</code>.
+   *
+   * @note The integer value of the object returned by this function equals
+   * the dimensionality of the object it describes, and can consequently be
+   * used in generic programming paradigms. For example, if a degree of
+   * freedom is associated with a vertex, then this function returns
+   * GeometryPrimitive::vertex, which has a numeric value of zero (the
+   * dimensionality of a vertex).
+   */
+  GeometryPrimitive
+  get_associated_geometry_primitive (const unsigned int cell_dof_index) const;
+
+  /**
+   * Interpolate a set of scalar values, computed in the generalized support
+   * points.
+   *
+   * @note This function is implemented in FiniteElement for the case that the
+   * element has support points. In this case, the resulting coefficients are
+   * just the values in the support points. All other elements must
+   * reimplement it.
+   */
+  virtual
+  void
+  interpolate(std::vector<double>       &local_dofs,
+              const std::vector<double> &values) const;
+
+  /**
+   * Interpolate a set of vector values, computed in the generalized support
+   * points.
+   *
+   * Since a finite element often only interpolates part of a vector,
+   * <tt>offset</tt> is used to determine the first component of the vector to
+   * be interpolated. Maybe consider changing your data structures to use the
+   * next function.
+   */
+  virtual
+  void
+  interpolate(std::vector<double>                &local_dofs,
+              const std::vector<Vector<double> > &values,
+              unsigned int offset = 0) const;
+
+  /**
+   * Interpolate a set of vector values, computed in the generalized support
+   * points.
+   */
+  virtual
+  void
+  interpolate(std::vector<double> &local_dofs,
+              const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+
+  //@}
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since finite element objects are usually
+   * accessed through pointers to their base class, rather than the class
+   * itself.
+   */
+  virtual std::size_t memory_consumption () const;
+
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcShapeFunctionNotPrimitive,
+                  int,
+                  << "The shape function with index " << arg1
+                  << " is not primitive, i.e. it is vector-valued and "
+                  << "has more than one non-zero vector component. This "
+                  << "function cannot be called for these shape functions. "
+                  << "Maybe you want to use the same function with the "
+                  << "_component suffix?");
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFENotPrimitive);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclExceptionMsg (ExcUnitShapeValuesDoNotExist,
+                    "You are trying to access the values or derivatives of shape functions "
+                    "on the reference cell of an element that does not define its shape "
+                    "functions through mapping from the reference cell. Consequently, "
+                    "you cannot ask for shape function values or derivatives on the "
+                    "reference cell.");
+
+  /**
+   * Attempt to access support points of a finite element that is not
+   * Lagrangian.
+   *
+   * @ingroup Exceptions
+   */
+  DeclExceptionMsg (ExcFEHasNoSupportPoints,
+                    "You are trying to access the support points of a finite "
+                    "element that either has no support points at all, or for "
+                    "which the corresponding tables have not been implemented.");
+
+  /**
+   * Attempt to access embedding matrices of a finite element that did not
+   * implement these matrices.
+   *
+   * @ingroup Exceptions
+   */
+  DeclExceptionMsg (ExcEmbeddingVoid,
+                    "You are trying to access the matrices that describe how "
+                    "to embed a finite element function on one cell into the "
+                    "finite element space on one of its children (i.e., the "
+                    "'embedding' or 'prolongation' matrices). However, the "
+                    "current finite element can either not define this sort of "
+                    "operation, or it has not yet been implemented.");
+
+  /**
+   * Attempt to access restriction matrices of a finite element that did not
+   * implement these matrices.
+   *
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclExceptionMsg (ExcProjectionVoid,
+                    "You are trying to access the matrices that describe how "
+                    "to restrict a finite element function from the children "
+                    "of one cell to the finite element space defined on their "
+                    "parent (i.e., the 'restriction' or 'projection' matrices). "
+                    "However, the current finite element can either not define "
+                    "this sort of operation, or it has not yet been "
+                    "implemented.");
+
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException2 (ExcWrongInterfaceMatrixSize,
+                  int, int,
+                  << "The interface matrix has a size of " << arg1
+                  << "x" << arg2
+                  << ", which is not reasonable for the current element "
+                  "in the present dimension.");
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInterpolationNotImplemented);
+
+protected:
+
+  /**
+   * Reinit the vectors of restriction and prolongation matrices to the right
+   * sizes: For every refinement case, except for
+   * RefinementCase::no_refinement, and for every child of that refinement
+   * case the space of one restriction and prolongation matrix is allocated,
+   * see the documentation of the restriction and prolongation vectors for
+   * more detail on the actual vector sizes.
+   *
+   * @param isotropic_restriction_only only the restriction matrices required
+   * for isotropic refinement are reinited to the right size.
+   * @param isotropic_prolongation_only only the prolongation matrices
+   * required for isotropic refinement are reinited to the right size.
+   */
+  void reinit_restriction_and_prolongation_matrices(const bool isotropic_restriction_only=false,
+                                                    const bool isotropic_prolongation_only=false);
+
+  /**
+   * Vector of projection matrices. See get_restriction_matrix() above. The
+   * constructor initializes these matrices to zero dimensions, which can be
+   * changed by derived classes implementing them.
+   *
+   * Note, that <code>restriction[refinement_case-1][child]</code> includes
+   * the restriction matrix of child <code>child</code> for the RefinementCase
+   * <code>refinement_case</code>. Here, we use <code>refinement_case-1</code>
+   * instead of <code>refinement_case</code> as for
+   * RefinementCase::no_refinement(=0) there are no restriction matrices
+   * available.
+   */
+  std::vector<std::vector<FullMatrix<double> > > restriction;
+
+  /**
+   * Vector of embedding matrices. See <tt>get_prolongation_matrix()</tt>
+   * above. The constructor initializes these matrices to zero dimensions,
+   * which can be changed by derived classes implementing them.
+   *
+   * Note, that <code>prolongation[refinement_case-1][child]</code> includes
+   * the prolongation matrix of child <code>child</code> for the
+   * RefinementCase <code>refinement_case</code>. Here, we use
+   * <code>refinement_case-1</code> instead of <code>refinement_case</code> as
+   * for RefinementCase::no_refinement(=0) there are no prolongation matrices
+   * available.
+   */
+  std::vector<std::vector<FullMatrix<double> > > prolongation;
+
+  /**
+   * Specify the constraints which the dofs on the two sides of a cell
+   * interface underlie if the line connects two cells of which one is refined
+   * once.
+   *
+   * For further details see the general description of the derived class.
+   *
+   * This field is obviously useless in one dimension and has there a zero
+   * size.
+   */
+  FullMatrix<double> interface_constraints;
+
+  /**
+   * List of support points on the unit cell, in case the finite element has
+   * any. The constructor leaves this field empty, derived classes may write
+   * in some contents.
+   *
+   * Finite elements that allow some kind of interpolation operation usually
+   * have support points. On the other hand, elements that define their
+   * degrees of freedom by, for example, moments on faces, or as derivatives,
+   * don't have support points. In that case, this field remains empty.
+   */
+  std::vector<Point<dim> > unit_support_points;
+
+  /**
+   * Same for the faces. See the description of the
+   * get_unit_face_support_points() function for a discussion of what
+   * contributes a face support point.
+   */
+  std::vector<Point<dim-1> > unit_face_support_points;
+
+  /**
+   * Support points used for interpolation functions of non-Lagrangian
+   * elements.
+   */
+  std::vector<Point<dim> > generalized_support_points;
+
+  /**
+   * Face support points used for interpolation functions of non-Lagrangian
+   * elements.
+   */
+  std::vector<Point<dim-1> > generalized_face_support_points;
+
+  /**
+   * For faces with non-standard face_orientation in 3D, the dofs on faces
+   * (quads) have to be permuted in order to be combined with the correct
+   * shape functions. Given a local dof @p index on a quad, return the shift
+   * in the local index, if the face has non-standard face_orientation, i.e.
+   * <code>old_index + shift = new_index</code>. In 2D and 1D there is no need
+   * for permutation so the vector is empty. In 3D it has the size of <code>
+   * #dofs_per_quad * 8 </code>, where 8 is the number of orientations, a face
+   * can be in (all combinations of the three bool flags face_orientation,
+   * face_flip and face_rotation).
+   *
+   * The standard implementation fills this with zeros, i.e. no permutation at
+   * all. Derived finite element classes have to fill this Table with the
+   * correct values.
+   */
+  Table<2,int> adjust_quad_dof_index_for_face_orientation_table;
+
+  /**
+   * For lines with non-standard line_orientation in 3D, the dofs on lines
+   * have to be permuted in order to be combined with the correct shape
+   * functions. Given a local dof @p index on a line, return the shift in the
+   * local index, if the line has non-standard line_orientation, i.e.
+   * <code>old_index + shift = new_index</code>. In 2D and 1D there is no need
+   * for permutation so the vector is empty. In 3D it has the size of
+   * #dofs_per_line.
+   *
+   * The standard implementation fills this with zeros, i.e. no permutation at
+   * all. Derived finite element classes have to fill this vector with the
+   * correct values.
+   */
+  std::vector<int> adjust_line_dof_index_for_line_orientation_table;
+
+  /**
+   * Store what system_to_component_index() will return.
+   */
+  std::vector<std::pair<unsigned int, unsigned int> > system_to_component_table;
+
+  /**
+   * Map between linear dofs and component dofs on face. This is filled with
+   * default values in the constructor, but derived classes will have to
+   * overwrite the information if necessary.
+   *
+   * By component, we mean the vector component, not the base element. The
+   * information thus makes only sense if a shape function is non-zero in only
+   * one component.
+   */
+  std::vector<std::pair<unsigned int, unsigned int> > face_system_to_component_table;
+
+  /**
+   * For each shape function, store to which base element and which instance
+   * of this base element (in case its multiplicity is greater than one) it
+   * belongs, and its index within this base element. If the element is not
+   * composed of others, then base and instance are always zero, and the index
+   * is equal to the number of the shape function. If the element is composed
+   * of single instances of other elements (i.e. all with multiplicity one)
+   * all of which are scalar, then base values and dof indices within this
+   * element are equal to the #system_to_component_table. It differs only in
+   * case the element is composed of other elements and at least one of them
+   * is vector-valued itself.
+   *
+   * This array has valid values also in the case of vector-valued (i.e. non-
+   * primitive) shape functions, in contrast to the
+   * #system_to_component_table.
+   */
+  std::vector<std::pair<std::pair<unsigned int,unsigned int>,unsigned int> >
+  system_to_base_table;
+
+  /**
+   * Likewise for the indices on faces.
+   */
+  std::vector<std::pair<std::pair<unsigned int,unsigned int>,unsigned int> >
+  face_system_to_base_table;
+
+  /**
+   * For each base element, store the number of blocks generated by the base
+   * and the first block in a block vector it will generate.
+   */
+  BlockIndices base_to_block_indices;
+
+  /**
+   * The base element establishing a component.
+   *
+   * For each component number <tt>c</tt>, the entries have the following
+   * meaning: <dl> <dt><tt>table[c].first.first</tt></dt> <dd>Number of the
+   * base element for <tt>c</tt>.</dd> <dt><tt>table[c].first.second</tt></dt>
+   * <dd>Component in the base element for <tt>c</tt>.</dd>
+   * <dt><tt>table[c].second</tt></dt> <dd>Multiple of the base element for
+   * <tt>c</tt>.</dd> </dl>
+   *
+   * This variable is set to the correct size by the constructor of this
+   * class, but needs to be initialized by derived classes, unless its size is
+   * one and the only entry is a zero, which is the case for scalar elements.
+   * In that case, the initialization by the base class is sufficient.
+   */
+  std::vector<std::pair<std::pair<unsigned int, unsigned int>, unsigned int> >
+  component_to_base_table;
+
+  /**
+   * A flag determining whether restriction matrices are to be concatenated or
+   * summed up. See the discussion about restriction matrices in the general
+   * class documentation for more information.
+   */
+  const std::vector<bool> restriction_is_additive_flags;
+
+  /**
+   * For each shape function, give a vector of bools (with size equal to the
+   * number of vector components which this finite element has) indicating in
+   * which component each of these shape functions is non-zero.
+   *
+   * For primitive elements, there is only one non-zero component.
+   */
+  const std::vector<ComponentMask> nonzero_components;
+
+  /**
+   * This array holds how many values in the respective entry of the
+   * #nonzero_components element are non-zero. The array is thus a short-cut
+   * to allow faster access to this information than if we had to count the
+   * non-zero entries upon each request for this information. The field is
+   * initialized in the constructor of this class.
+   */
+  const std::vector<unsigned int> n_nonzero_components_table;
+
+  /**
+   * Return the size of interface constraint matrices. Since this is needed in
+   * every derived finite element class when initializing their size, it is
+   * placed into this function, to avoid having to recompute the dimension-
+   * dependent size of these matrices each time.
+   *
+   * Note that some elements do not implement the interface constraints for
+   * certain polynomial degrees. In this case, this function still returns the
+   * size these matrices should have when implemented, but the actual matrices
+   * are empty.
+   */
+  TableIndices<2>
+  interface_constraints_size () const;
+
+  /**
+   * Given the pattern of nonzero components for each shape function, compute
+   * for each entry how many components are non-zero for each shape function.
+   * This function is used in the constructor of this class.
+   */
+  static
+  std::vector<unsigned int>
+  compute_n_nonzero_components (const std::vector<ComponentMask> &nonzero_components);
+
+  /**
+   * Given a set of update flags, compute which other quantities <i>also</i>
+   * need to be computed in order to satisfy the request by the given flags.
+   * Then return the combination of the original set of flags and those just
+   * computed.
+   *
+   * As an example, if @p update_flags contains update_gradients a finite
+   * element class will typically require the computation of the inverse of
+   * the Jacobian matrix in order to rotate the gradient of shape functions on
+   * the reference cell to the real cell. It would then return not just
+   * update_gradients, but also update_covariant_transformation, the flag that
+   * makes the mapping class produce the inverse of the Jacobian matrix.
+   *
+   * An extensive discussion of the interaction between this function and
+   * FEValues can be found in the
+   * @ref FE_vs_Mapping_vs_FEValues
+   * documentation module.
+   *
+   * @see UpdateFlags
+   */
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const = 0;
+
+  /**
+   * Create an internal data object and return a pointer to it of which the
+   * caller of this function then assumes ownership. This object will then be
+   * passed to the FiniteElement::fill_fe_values() every time the finite
+   * element shape functions and their derivatives are evaluated on a concrete
+   * cell. The object created here is therefore used by derived classes as a
+   * place for scratch objects that are used in evaluating shape functions, as
+   * well as to store information that can be pre-computed once and re-used on
+   * every cell (e.g., for evaluating the values and gradients of shape
+   * functions on the reference cell, for later re-use when transforming these
+   * values to a concrete cell).
+   *
+   * This function is the first one called in the process of initializing a
+   * FEValues object for a given mapping and finite element object. The
+   * returned object will later be passed to FiniteElement::fill_fe_values()
+   * for a concrete cell, which will itself place its output into an object of
+   * type internal::FEValues::FiniteElementRelatedData. Since there may be
+   * data that can already be computed in its <i>final</i> form on the
+   * reference cell, this function also receives a reference to the
+   * internal::FEValues::FiniteElementRelatedData object as its last argument.
+   * This output argument is guaranteed to always be the same one when used
+   * with the InternalDataBase object returned by this function. In other
+   * words, the subdivision of scratch data and final data in the returned
+   * object and the @p output_data object is as follows: If data can be pre-
+   * computed on the reference cell in the exact form in which it will later
+   * be needed on a concrete cell, then this function should already emplace
+   * it in the @p output_data object. An example are the values of shape
+   * functions at quadrature points for the usual Lagrange elements which on a
+   * concrete cell are identical to the ones on the reference cell. On the
+   * other hand, if some data can be pre-computed to make computations on a
+   * concrete cell <i>cheaper</i>, then it should be put into the returned
+   * object for later re-use in a derive class's implementation of
+   * FiniteElement::fill_fe_values(). An example are the gradients of shape
+   * functions on the reference cell for Lagrange elements: to compute the
+   * gradients of the shape functions on a concrete cell, one has to multiply
+   * the gradients on the reference cell by the inverse of the Jacobian of the
+   * mapping; consequently, we cannot already compute the gradients on a
+   * concrete cell at the time the current function is called, but we can at
+   * least pre-compute the gradients on the reference cell, and store it in
+   * the object returned.
+   *
+   * An extensive discussion of the interaction between this function and
+   * FEValues can be found in the
+   * @ref FE_vs_Mapping_vs_FEValues
+   * documentation module. See also the documentation of the InternalDataBase
+   * class.
+   *
+   * @param[in] update_flags A set of UpdateFlags values that describe what
+   * kind of information the FEValues object requests the finite element to
+   * compute. This set of flags may also include information that the finite
+   * element can not compute, e.g., flags that pertain to data produced by the
+   * mapping. An implementation of this function needs to set up all data
+   * fields in the returned object that are necessary to produce the finite-
+   * element related data specified by these flags, and may already pre-
+   * compute part of this information as discussed above. Elements may want to
+   * store these update flags (or a subset of these flags) in
+   * InternalDataBase::update_each so they know at the time when
+   * FinitElement::fill_fe_values() is called what they are supposed to
+   * compute
+   * @param[in] mapping A reference to the mapping used for computing values
+   * and derivatives of shape functions.
+   * @param[in] quadrature A reference to the object that describes where the
+   * shape functions should be evaluated.
+   * @param[out] output_data A reference to the object that FEValues will use
+   * in conjunction with the object returned here and where an implementation
+   * of FiniteElement::fill_fe_values() will place the requested information.
+   * This allows the current function to already pre-compute pieces of
+   * information that can be computed on the reference cell, as discussed
+   * above. FEValues guarantees that this output object and the object
+   * returned by the current function will always be used together.
+   * @return A pointer to an object of a type derived from InternalDataBase
+   * and that derived classes can use to store scratch data that can be pre-
+   * computed, or for scratch arrays that then only need to be allocated once.
+   * The calling site assumes ownership of this object and will delete it when
+   * it is no longer necessary.
+   */
+  virtual
+  InternalDataBase *
+  get_data (const UpdateFlags                                                    update_flags,
+            const Mapping<dim,spacedim>                                         &mapping,
+            const Quadrature<dim>                                               &quadrature,
+            dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const = 0;
+
+  /**
+   * Like get_data(), but return an object that will later be used for
+   * evaluating shape function information at quadrature points on faces of
+   * cells. The object will then be used in calls to implementations of
+   * FiniteElement::fill_fe_face_values(). See the documentation of get_data()
+   * for more information.
+   *
+   * The default implementation of this function converts the face quadrature
+   * into a cell quadrature with appropriate quadrature point locations, and
+   * with that calls the get_data() function above that has to be implemented
+   * in derived classes.
+   *
+   * @param[in] update_flags A set of UpdateFlags values that describe what
+   * kind of information the FEValues object requests the finite element to
+   * compute. This set of flags may also include information that the finite
+   * element can not compute, e.g., flags that pertain to data produced by the
+   * mapping. An implementation of this function needs to set up all data
+   * fields in the returned object that are necessary to produce the finite-
+   * element related data specified by these flags, and may already pre-
+   * compute part of this information as discussed above. Elements may want to
+   * store these update flags (or a subset of these flags) in
+   * InternalDataBase::update_each so they know at the time when
+   * FinitElement::fill_fe_face_values() is called what they are supposed to
+   * compute
+   * @param[in] mapping A reference to the mapping used for computing values
+   * and derivatives of shape functions.
+   * @param[in] quadrature A reference to the object that describes where the
+   * shape functions should be evaluated.
+   * @param[out] output_data A reference to the object that FEValues will use
+   * in conjunction with the object returned here and where an implementation
+   * of FiniteElement::fill_fe_face_values() will place the requested
+   * information. This allows the current function to already pre-compute
+   * pieces of information that can be computed on the reference cell, as
+   * discussed above. FEValues guarantees that this output object and the
+   * object returned by the current function will always be used together.
+   * @return A pointer to an object of a type derived from InternalDataBase
+   * and that derived classes can use to store scratch data that can be pre-
+   * computed, or for scratch arrays that then only need to be allocated once.
+   * The calling site assumes ownership of this object and will delete it when
+   * it is no longer necessary.
+   */
+  virtual
+  InternalDataBase *
+  get_face_data (const UpdateFlags                                                    update_flags,
+                 const Mapping<dim,spacedim>                                         &mapping,
+                 const Quadrature<dim-1>                                             &quadrature,
+                 dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Like get_data(), but return an object that will later be used for
+   * evaluating shape function information at quadrature points on children of
+   * faces of cells. The object will then be used in calls to implementations
+   * of FiniteElement::fill_fe_subface_values(). See the documentation of
+   * get_data() for more information.
+   *
+   * The default implementation of this function converts the face quadrature
+   * into a cell quadrature with appropriate quadrature point locations, and
+   * with that calls the get_data() function above that has to be implemented
+   * in derived classes.
+   *
+   * @param[in] update_flags A set of UpdateFlags values that describe what
+   * kind of information the FEValues object requests the finite element to
+   * compute. This set of flags may also include information that the finite
+   * element can not compute, e.g., flags that pertain to data produced by the
+   * mapping. An implementation of this function needs to set up all data
+   * fields in the returned object that are necessary to produce the finite-
+   * element related data specified by these flags, and may already pre-
+   * compute part of this information as discussed above. Elements may want to
+   * store these update flags (or a subset of these flags) in
+   * InternalDataBase::update_each so they know at the time when
+   * FinitElement::fill_fe_subface_values() is called what they are supposed
+   * to compute
+   * @param[in] mapping A reference to the mapping used for computing values
+   * and derivatives of shape functions.
+   * @param[in] quadrature A reference to the object that describes where the
+   * shape functions should be evaluated.
+   * @param[out] output_data A reference to the object that FEValues will use
+   * in conjunction with the object returned here and where an implementation
+   * of FiniteElement::fill_fe_subface_values() will place the requested
+   * information. This allows the current function to already pre-compute
+   * pieces of information that can be computed on the reference cell, as
+   * discussed above. FEValues guarantees that this output object and the
+   * object returned by the current function will always be used together.
+   * @return A pointer to an object of a type derived from InternalDataBase
+   * and that derived classes can use to store scratch data that can be pre-
+   * computed, or for scratch arrays that then only need to be allocated once.
+   * The calling site assumes ownership of this object and will delete it when
+   * it is no longer necessary.
+   */
+  virtual
+  InternalDataBase *
+  get_subface_data (const UpdateFlags                                                    update_flags,
+                    const Mapping<dim,spacedim>                                         &mapping,
+                    const Quadrature<dim-1>                                             &quadrature,
+                    dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Compute information about the shape functions on the cell denoted by the
+   * first argument. Derived classes will have to implement this function
+   * based on the kind of element they represent. It is called by
+   * FEValues::reinit().
+   *
+   * Conceptually, this function evaluates shape functions and their
+   * derivatives at the quadrature points represented by the mapped locations
+   * of those described by the quadrature argument to this function. In many
+   * cases, computing derivatives of shape functions (and in some cases also
+   * computing values of shape functions) requires making use of the mapping
+   * from the reference to the real cell; this information can either be taken
+   * from the @p mapping_data object that has been filled for the current cell
+   * before this function is called, or by calling the member functions of a
+   * Mapping object with the @p mapping_internal object that also corresponds
+   * to the current cell.
+   *
+   * The information computed by this function is used to fill the various
+   * member variables of the output argument of this function. Which of the
+   * member variables of that structure should be filled is determined by the
+   * update flags stored in the FiniteElement::InternalDataBase::update_each
+   * field of the object passed to this function. These flags are typically
+   * set by FiniteElement::get_data(), FiniteElement::get_face_date() and
+   * FiniteElement::get_subface_data() (or, more specifically, implementations
+   * of these functions in derived classes).
+   *
+   * An extensive discussion of the interaction between this function and
+   * FEValues can be found in the
+   * @ref FE_vs_Mapping_vs_FEValues
+   * documentation module.
+   *
+   * @param[in] cell The cell of the triangulation for which this function is
+   * to compute a mapping from the reference cell to.
+   * @param[in] cell_similarity Whether or not the cell given as first
+   * argument is simply a translation, rotation, etc of the cell for which
+   * this function was called the most recent time. This information is
+   * computed simply by matching the vertices (as stored by the Triangulation)
+   * between the previous and the current cell. The value passed here may be
+   * modified by implementations of this function and should then be returned
+   * (see the discussion of the return value of this function).
+   * @param[in] quadrature A reference to the quadrature formula in use for
+   * the current evaluation. This quadrature object is the same as the one
+   * used when creating the @p internal_data object. The current object is
+   * then responsible for evaluating shape functions at the mapped locations
+   * of the quadrature points represented by this object.
+   * @param[in] mapping A reference to the mapping object used to map from the
+   * reference cell to the current cell. This object was used to compute the
+   * information in the @p mapping_data object before the current function was
+   * called. It is also the mapping object that created the @p
+   * mapping_internal object via Mapping::get_data(). You will need the
+   * reference to this mapping object most often to call Mapping::transform()
+   * to transform gradients and higher derivatives from the reference to the
+   * current cell.
+   * @param[in] mapping_internal An object specific to the mapping object.
+   * What the mapping chooses to store in there is of no relevance to the
+   * current function, but you may have to pass a reference to this object to
+   * certain functions of the Mapping class (e.g., Mapping::transform()) if
+   * you need to call them from the current function.
+   * @param[in] mapping_data The output object into which the
+   * Mapping::fill_fe_values() function wrote the mapping information
+   * corresponding to the current cell. This includes, for example, Jacobians
+   * of the mapping that may be of relevance to the current function, as well
+   * as other information that FEValues::reinit() requested from the mapping.
+   * @param[in] fe_internal A reference to an object previously created by
+   * get_data() and that may be used to store information the mapping can
+   * compute once on the reference cell. See the documentation of the
+   * FiniteElement::InternalDataBase class for an extensive description of the
+   * purpose of these objects.
+   * @param[out] output_data A reference to an object whose member variables
+   * should be computed. Not all of the members of this argument need to be
+   * filled; which ones need to be filled is determined by the update flags
+   * stored inside the @p fe_internal object.
+   *
+   * @note FEValues ensures that this function is always called with the same
+   * pair of @p fe_internal and @p output_data objects. In other words, if an
+   * implementation of this function knows that it has written a piece of data
+   * into the output argument in a previous call, then there is no need to
+   * copy it there again in a later call if the implementation knows that this
+   * is the same value.
+   */
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                     cell_similarity,
+                  const Quadrature<dim>                                               &quadrature,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                  const InternalDataBase                                              &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const = 0;
+
+  /**
+   * This function is the equivalent to FiniteElement::fill_fe_values(), but
+   * for faces of cells. See there for an extensive discussion of its purpose.
+   * It is called by FEFaceValues::reinit().
+   *
+   * @param[in] cell The cell of the triangulation for which this function is
+   * to compute a mapping from the reference cell to.
+   * @param[in] face_no The number of the face we are currently considering,
+   * indexed among the faces of the cell specified by the previous argument.
+   * @param[in] quadrature A reference to the quadrature formula in use for
+   * the current evaluation. This quadrature object is the same as the one
+   * used when creating the @p internal_data object. The current object is
+   * then responsible for evaluating shape functions at the mapped locations
+   * of the quadrature points represented by this object.
+   * @param[in] mapping A reference to the mapping object used to map from the
+   * reference cell to the current cell. This object was used to compute the
+   * information in the @p mapping_data object before the current function was
+   * called. It is also the mapping object that created the @p
+   * mapping_internal object via Mapping::get_data(). You will need the
+   * reference to this mapping object most often to call Mapping::transform()
+   * to transform gradients and higher derivatives from the reference to the
+   * current cell.
+   * @param[in] mapping_internal An object specific to the mapping object.
+   * What the mapping chooses to store in there is of no relevance to the
+   * current function, but you may have to pass a reference to this object to
+   * certain functions of the Mapping class (e.g., Mapping::transform()) if
+   * you need to call them from the current function.
+   * @param[in] mapping_data The output object into which the
+   * Mapping::fill_fe_values() function wrote the mapping information
+   * corresponding to the current cell. This includes, for example, Jacobians
+   * of the mapping that may be of relevance to the current function, as well
+   * as other information that FEValues::reinit() requested from the mapping.
+   * @param[in] fe_internal A reference to an object previously created by
+   * get_data() and that may be used to store information the mapping can
+   * compute once on the reference cell. See the documentation of the
+   * FiniteElement::InternalDataBase class for an extensive description of the
+   * purpose of these objects.
+   * @param[out] output_data A reference to an object whose member variables
+   * should be computed. Not all of the members of this argument need to be
+   * filled; which ones need to be filled is determined by the update flags
+   * stored inside the @p fe_internal object.
+   */
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                   face_no,
+                       const Quadrature<dim-1>                                             &quadrature,
+                       const Mapping<dim,spacedim>                                         &mapping,
+                       const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                       const InternalDataBase                                              &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const = 0;
+
+  /**
+   * This function is the equivalent to FiniteElement::fill_fe_values(), but
+   * for the children of faces of cells. See there for an extensive discussion
+   * of its purpose. It is called by FESubfaceValues::reinit().
+   *
+   * @param[in] cell The cell of the triangulation for which this function is
+   * to compute a mapping from the reference cell to.
+   * @param[in] face_no The number of the face we are currently considering,
+   * indexed among the faces of the cell specified by the previous argument.
+   * @param[in] sub_no The number of the subface, i.e., the number of the
+   * child of a face, that we are currently considering, indexed among the
+   * children of the face specified by the previous argument.
+   * @param[in] quadrature A reference to the quadrature formula in use for
+   * the current evaluation. This quadrature object is the same as the one
+   * used when creating the @p internal_data object. The current object is
+   * then responsible for evaluating shape functions at the mapped locations
+   * of the quadrature points represented by this object.
+   * @param[in] mapping A reference to the mapping object used to map from the
+   * reference cell to the current cell. This object was used to compute the
+   * information in the @p mapping_data object before the current function was
+   * called. It is also the mapping object that created the @p
+   * mapping_internal object via Mapping::get_data(). You will need the
+   * reference to this mapping object most often to call Mapping::transform()
+   * to transform gradients and higher derivatives from the reference to the
+   * current cell.
+   * @param[in] mapping_internal An object specific to the mapping object.
+   * What the mapping chooses to store in there is of no relevance to the
+   * current function, but you may have to pass a reference to this object to
+   * certain functions of the Mapping class (e.g., Mapping::transform()) if
+   * you need to call them from the current function.
+   * @param[in] mapping_data The output object into which the
+   * Mapping::fill_fe_values() function wrote the mapping information
+   * corresponding to the current cell. This includes, for example, Jacobians
+   * of the mapping that may be of relevance to the current function, as well
+   * as other information that FEValues::reinit() requested from the mapping.
+   * @param[in] fe_internal A reference to an object previously created by
+   * get_data() and that may be used to store information the mapping can
+   * compute once on the reference cell. See the documentation of the
+   * FiniteElement::InternalDataBase class for an extensive description of the
+   * purpose of these objects.
+   * @param[out] output_data A reference to an object whose member variables
+   * should be computed. Not all of the members of this argument need to be
+   * filled; which ones need to be filled is determined by the update flags
+   * stored inside the @p fe_internal object.
+   */
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                   face_no,
+                          const unsigned int                                                   sub_no,
+                          const Quadrature<dim-1>                                             &quadrature,
+                          const Mapping<dim,spacedim>                                         &mapping,
+                          const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                          const InternalDataBase                                              &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const = 0;
+
+  friend class InternalDataBase;
+  friend class FEValuesBase<dim,spacedim>;
+  friend class FEValues<dim,spacedim>;
+  friend class FEFaceValues<dim,spacedim>;
+  friend class FESubfaceValues<dim,spacedim>;
+  friend class FESystem<dim,spacedim>;
+};
+
+
+//----------------------------------------------------------------------//
+
+
+template <int dim, int spacedim>
+inline
+const FiniteElement<dim,spacedim> &
+FiniteElement<dim,spacedim>::operator[] (const unsigned int fe_index) const
+{
+  (void)fe_index;
+  Assert (fe_index == 0,
+          ExcMessage ("A fe_index of zero is the only index allowed here"));
+  return *this;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+std::pair<unsigned int,unsigned int>
+FiniteElement<dim,spacedim>::system_to_component_index (const unsigned int index) const
+{
+  Assert (index < system_to_component_table.size(),
+          ExcIndexRange(index, 0, system_to_component_table.size()));
+  Assert (is_primitive (index),
+          ( typename FiniteElement<dim,spacedim>::ExcShapeFunctionNotPrimitive(index)) );
+  return system_to_component_table[index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+FiniteElement<dim,spacedim>::n_base_elements () const
+{
+  return base_to_block_indices.size();
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+FiniteElement<dim,spacedim>::element_multiplicity (const unsigned int index) const
+{
+  return static_cast<unsigned int>(base_to_block_indices.block_size(index));
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+FiniteElement<dim,spacedim>::component_to_system_index (const unsigned int component,
+                                                        const unsigned int index) const
+{
+  AssertIndexRange(component, this->n_components());
+  const std::vector<std::pair<unsigned int, unsigned int> >::const_iterator
+  it = std::find(system_to_component_table.begin(), system_to_component_table.end(),
+                 std::pair<unsigned int, unsigned int>(component, index));
+
+  Assert(it != system_to_component_table.end(),
+         ExcMessage ("You are asking for the number of the shape function "
+                     "within a system element that corresponds to vector "
+                     "component " + Utilities::int_to_string(component) + " and within this to "
+                     "index " + Utilities::int_to_string(index) + ". But no such "
+                     "shape function exists."));
+  return std::distance(system_to_component_table.begin(), it);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+std::pair<unsigned int,unsigned int>
+FiniteElement<dim,spacedim>::face_system_to_component_index (const unsigned int index) const
+{
+  Assert(index < face_system_to_component_table.size(),
+         ExcIndexRange(index, 0, face_system_to_component_table.size()));
+
+  // in debug mode, check whether the
+  // function is primitive, since
+  // otherwise the result may have no
+  // meaning
+  //
+  // since the primitivity tables are
+  // all geared towards cell dof
+  // indices, rather than face dof
+  // indices, we have to work a
+  // little bit...
+  //
+  // in 1d, the face index is equal
+  // to the cell index
+  Assert (is_primitive(this->face_to_cell_index(index, 0)),
+          (typename FiniteElement<dim,spacedim>::ExcShapeFunctionNotPrimitive(index)) );
+
+  return face_system_to_component_table[index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+std::pair<std::pair<unsigned int,unsigned int>,unsigned int>
+FiniteElement<dim,spacedim>::system_to_base_index (const unsigned int index) const
+{
+  Assert (index < system_to_base_table.size(),
+          ExcIndexRange(index, 0, system_to_base_table.size()));
+  return system_to_base_table[index];
+}
+
+
+
+
+template <int dim, int spacedim>
+inline
+std::pair<std::pair<unsigned int,unsigned int>,unsigned int>
+FiniteElement<dim,spacedim>::face_system_to_base_index (const unsigned int index) const
+{
+  Assert(index < face_system_to_base_table.size(),
+         ExcIndexRange(index, 0, face_system_to_base_table.size()));
+  return face_system_to_base_table[index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+types::global_dof_index
+FiniteElement<dim,spacedim>::first_block_of_base (const unsigned int index) const
+{
+  return base_to_block_indices.block_start(index);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+std::pair<unsigned int,unsigned int>
+FiniteElement<dim,spacedim>::component_to_base_index (const unsigned int index) const
+{
+  Assert(index < component_to_base_table.size(),
+         ExcIndexRange(index, 0, component_to_base_table.size()));
+
+  return component_to_base_table[index].first;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+std::pair<unsigned int,unsigned int>
+FiniteElement<dim,spacedim>::block_to_base_index (const unsigned int index) const
+{
+  return base_to_block_indices.global_to_local(index);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+std::pair<unsigned int,types::global_dof_index>
+FiniteElement<dim,spacedim>::system_to_block_index (const unsigned int index) const
+{
+  Assert (index < this->dofs_per_cell,
+          ExcIndexRange(index, 0, this->dofs_per_cell));
+  // The block is computed simply as
+  // first block of this base plus
+  // the index within the base blocks
+  return std::pair<unsigned int, types::global_dof_index>(
+           first_block_of_base(system_to_base_table[index].first.first)
+           + system_to_base_table[index].first.second,
+           system_to_base_table[index].second);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+FiniteElement<dim,spacedim>::restriction_is_additive (const unsigned int index) const
+{
+  Assert(index < this->dofs_per_cell,
+         ExcIndexRange(index, 0, this->dofs_per_cell));
+  return restriction_is_additive_flags[index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const ComponentMask &
+FiniteElement<dim,spacedim>::get_nonzero_components (const unsigned int i) const
+{
+  Assert (i < this->dofs_per_cell, ExcIndexRange (i, 0, this->dofs_per_cell));
+  return nonzero_components[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+FiniteElement<dim,spacedim>::n_nonzero_components (const unsigned int i) const
+{
+  Assert (i < this->dofs_per_cell, ExcIndexRange (i, 0, this->dofs_per_cell));
+  return n_nonzero_components_table[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+FiniteElement<dim,spacedim>::is_primitive (const unsigned int i) const
+{
+  Assert (i < this->dofs_per_cell, ExcIndexRange (i, 0, this->dofs_per_cell));
+
+  // return primitivity of a shape
+  // function by checking whether it
+  // has more than one non-zero
+  // component or not. we could cache
+  // this value in an array of bools,
+  // but accessing a bit-vector (as
+  // std::vector<bool> is) is
+  // probably more expensive than
+  // just comparing against 1
+  //
+  // for good measure, short circuit the test
+  // if the entire FE is primitive
+  return (is_primitive() ||
+          (n_nonzero_components_table[i] == 1));
+}
+
+
+
+template <int dim, int spacedim>
+inline
+GeometryPrimitive
+FiniteElement<dim,spacedim>::get_associated_geometry_primitive (const unsigned int cell_dof_index) const
+{
+  Assert (cell_dof_index < this->dofs_per_cell,
+          ExcIndexRange (cell_dof_index, 0, this->dofs_per_cell));
+
+  // just go through the usual cases, taking into account how DoFs
+  // are enumerated on the reference cell
+  if (cell_dof_index < this->first_line_index)
+    return GeometryPrimitive::vertex;
+  else if (cell_dof_index < this->first_quad_index)
+    return GeometryPrimitive::line;
+  else if (cell_dof_index < this->first_hex_index)
+    return GeometryPrimitive::quad;
+  else
+    return GeometryPrimitive::hex;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_abf.h b/include/deal.II/fe/fe_abf.h
new file mode 100644
index 0000000..3df9ceb
--- /dev/null
+++ b/include/deal.II/fe/fe_abf.h
@@ -0,0 +1,255 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_abf_h
+#define dealii__fe_abf_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/polynomials_abf.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_poly_tensor.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of Arnold-Boffi-Falk (ABF) elements, conforming with the
+ * space H<sup>div</sup>. These elements generate vector fields with normal
+ * components continuous between mesh cells.
+ *
+ * These elements are based on an article from Arnold, Boffi and Falk:
+ * Quadrilateral H(div) finite elements, SIAM J. Numer. Anal. Vol.42, No.6,
+ * pp.2429-2451
+ *
+ * In this article, the authors demonstrate that the usual RT elements and
+ * also BDM and other proposed finite dimensional subspaces of H(div) do not
+ * work properly on arbitrary FE grids. I.e. the convergence rates deteriorate
+ * on these meshes. As a solution the authors propose the ABF elements, which
+ * are implemented in this module.
+ *
+ * This class is not implemented for the codimension one case (<tt>spacedim !=
+ * dim</tt>).
+ *
+ * @todo Even if this element is implemented for two and three space
+ * dimensions, the definition of the node values relies on consistently
+ * oriented faces in 3D. Therefore, care should be taken on complicated
+ * meshes.
+ *
+ * <h3>Interpolation</h3>
+ *
+ * The
+ * @ref GlossInterpolation "interpolation"
+ * operators associated with the RT element are constructed such that
+ * interpolation and computing the divergence are commuting operations. We
+ * require this from interpolating arbitrary functions as well as the
+ * #restriction matrices.  It can be achieved by two interpolation schemes,
+ * the simplified one in FE_RaviartThomasNodal and the original one here:
+ *
+ * <h4>Node values on edges/faces</h4>
+ *
+ * On edges or faces, the
+ * @ref GlossNodes "node values"
+ * are the moments of the normal component of the interpolated function with
+ * respect to the traces of the RT polynomials. Since the normal trace of the
+ * RT space of degree <i>k</i> on an edge/face is the space
+ * <i>Q<sub>k</sub></i>, the moments are taken with respect to this space.
+ *
+ * <h4>Interior node values</h4>
+ *
+ * Higher order RT spaces have interior nodes. These are moments taken with
+ * respect to the gradient of functions in <i>Q<sub>k</sub></i> on the cell
+ * (this space is the matching space for RT<sub>k</sub> in a mixed
+ * formulation).
+ *
+ * <h4>Generalized support points</h4>
+ *
+ * The node values above rely on integrals, which will be computed by
+ * quadrature rules themselves. The generalized support points are a set of
+ * points such that this quadrature can be performed with sufficient accuracy.
+ * The points needed are those of QGauss<sub>k+1</sub> on each face as well as
+ * QGauss<sub>k</sub> in the interior of the cell (or none for
+ * RT<sub>0</sub>). See the
+ * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+ * for more information.
+ *
+ *
+ * @author Oliver Kayser-Herold, 2006, based on previous work by Guido
+ * Kanschat and Wolfgang Bangerth
+ */
+template <int dim>
+class FE_ABF : public FE_PolyTensor<PolynomialsABF<dim>, dim>
+{
+public:
+  /**
+   * Constructor for the ABF element of degree @p p.
+   */
+  FE_ABF (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_ABF<dim>(degree)</tt>, with @p dim and @p degree replaced
+   * by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   *
+   * Right now, this is only implemented for RT0 in 1D. Otherwise, returns
+   * always @p true.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<double> &values) const;
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<Vector<double> > &values,
+                           unsigned int offset = 0) const;
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+  virtual std::size_t memory_consumption () const;
+  virtual FiniteElement<dim> *clone() const;
+
+private:
+  /**
+   * The order of the ABF element. The lowest order elements are usually
+   * referred to as RT0, even though their shape functions are piecewise
+   * quadratics.
+   */
+  const unsigned int rt_order;
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Initialize the @p generalized_support_points field of the FiniteElement
+   * class and fill the tables with interpolation weights (#boundary_weights
+   * and #interior_weights). Called from the constructor.
+   *
+   * See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+   * for more information.
+   */
+  void initialize_support_points (const unsigned int rt_degree);
+
+  /**
+   * Initialize the interpolation from functions on refined mesh cells onto
+   * the father cell. According to the philosophy of the Raviart-Thomas
+   * element, this restriction operator preserves the divergence of a function
+   * weakly.
+   */
+  void initialize_restriction ();
+
+  /**
+   * Fields of cell-independent data.
+   *
+   * For information about the general purpose of this class, see the
+   * documentation of the base class.
+   */
+  class InternalData : public FiniteElement<dim>::InternalDataBase
+  {
+  public:
+    /**
+     * Array with shape function values in quadrature points. There is one row
+     * for each shape function, containing values for each quadrature point.
+     * Since the shape functions are vector-valued (with as many components as
+     * there are space dimensions), the value is a tensor.
+     *
+     * In this array, we store the values of the shape function in the
+     * quadrature points on the unit cell. The transformation to the real
+     * space cell is then simply done by multiplication with the Jacobian of
+     * the mapping.
+     */
+    std::vector<std::vector<Tensor<1,dim> > > shape_values;
+
+    /**
+     * Array with shape function gradients in quadrature points. There is one
+     * row for each shape function, containing values for each quadrature
+     * point.
+     *
+     * We store the gradients in the quadrature points on the unit cell. We
+     * then only have to apply the transformation (which is a matrix-vector
+     * multiplication) when visiting an actual cell.
+     */
+    std::vector<std::vector<Tensor<2,dim> > > shape_gradients;
+  };
+
+  /**
+   * These are the factors multiplied to a function in the
+   * #generalized_face_support_points when computing the integration. They are
+   * organized such that there is one row for each generalized face support
+   * point and one column for each degree of freedom on the face.
+   */
+  Table<2, double> boundary_weights;
+  /**
+   * Precomputed factors for interpolation of interior degrees of freedom. The
+   * rationale for this Table is the same as for #boundary_weights. Only, this
+   * table has a third coordinate for the space direction of the component
+   * evaluated.
+   */
+  Table<3, double> interior_weights;
+
+
+
+  /**
+   * These are the factors multiplied to a function in the
+   * #generalized_face_support_points when computing the integration. They are
+   * organized such that there is one row for each generalized face support
+   * point and one column for each degree of freedom on the face.
+   */
+  Table<2, double> boundary_weights_abf;
+  /**
+   * Precomputed factors for interpolation of interior degrees of freedom. The
+   * rationale for this Table is the same as for #boundary_weights. Only, this
+   * table has a third coordinate for the space direction of the component
+   * evaluated.
+   */
+  Table<3, double> interior_weights_abf;
+
+
+  /**
+   * Allow access from other dimensions.
+   */
+  template <int dim1> friend class FE_ABF;
+};
+
+
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_base.h b/include/deal.II/fe/fe_base.h
new file mode 100644
index 0000000..fe4f0e9
--- /dev/null
+++ b/include/deal.II/fe/fe_base.h
@@ -0,0 +1,683 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_base_h
+#define dealii__fe_base_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/vector_slice.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/fe/fe_update_flags.h>
+
+#include <string>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<int dim, int spacedim> class FESystem;
+
+
+/**
+ * A namespace solely for the purpose of defining the Domination enum as well
+ * as associated operators.
+ */
+namespace FiniteElementDomination
+{
+  /**
+   * An enum that describes the outcome of comparing two elements for mutual
+   * domination. If one element dominates another, then the restriction of the
+   * space described by the dominated element to a face of the cell is
+   * strictly larger than that of the dominating element. For example, in 2-d
+   * Q(2) elements dominate Q(4) elements, because the traces of Q(4) elements
+   * are quartic polynomials which is a space strictly larger than the
+   * quadratic polynomials (the restriction of the Q(2) element). In general,
+   * Q(k) dominates Q(k') if $k\le k'$.
+   *
+   * This enum is used in the FiniteElement::compare_for_face_domination()
+   * function that is used in the context of hp finite element methods when
+   * determining what to do at faces where two different finite elements meet
+   * (see the
+   * @ref hp_paper "hp paper"
+   * for a more detailed description of the following). In that case, the
+   * degrees of freedom of one side need to be constrained to those on the
+   * other side. The determination which side is which is based on the outcome
+   * of a comparison for mutual domination: the dominated side is constrained
+   * to the dominating one.
+   *
+   * A similar situation happens in 3d, where we have to consider different
+   * elements meeting at only an edge, not an entire face. Such comparisons
+   * are then implemented in the FiniteElement::compare_for_line_domination()
+   * function.
+   *
+   * Note that there are situations where neither side dominates. The
+   * @ref hp_paper "hp paper"
+   * lists two case, with the simpler one being that a $Q_2\times Q_1$ vector-
+   * valued element (i.e. a <code>FESystem(FE_Q(2),1,FE_Q(1),1)</code>) meets
+   * a $Q_1\times Q_2$ element: here, for each of the two vector-components,
+   * we can define a domination relationship, but it is different for the two
+   * components.
+   *
+   * It is clear that the concept of domination doesn't matter for
+   * discontinuous elements. However, discontinuous elements may be part of
+   * vector-valued elements and may therefore be compared against each other
+   * for domination. They should return
+   * <code>either_element_can_dominate</code> in that case. Likewise, when
+   * comparing two identical finite elements, they should return this code;
+   * the reason is that we can not decide which element will dominate at the
+   * time we look at the first component of, for example, two $Q_2\times Q_1$
+   * and $Q_2\times Q_2$ elements, and have to keep our options open until we
+   * get to the second base element.
+   *
+   * Finally, the code no_requirements exists for cases where elements impose
+   * no continuity requirements. The case is primarily meant for FE_Nothing
+   * which is an element that has no degrees of freedom in a subdomain. It
+   * could also be used by discontinuous elements, for example.
+   *
+   * More details on domination can be found in the
+   * @ref hp_paper "hp paper".
+   */
+  enum Domination
+  {
+    this_element_dominates,
+    other_element_dominates,
+    neither_element_dominates,
+    either_element_can_dominate,
+    no_requirements
+  };
+
+
+  /**
+   * A generalization of the binary <code>and</code> operator to a comparison
+   * relationship. The way this works is pretty much as when you would want to
+   * define a comparison relationship for vectors: either all elements of the
+   * first vector are smaller, equal, or larger than those of the second
+   * vector, or some are and some are not.
+   *
+   * This operator is pretty much the same: if both arguments are
+   * <code>this_element_dominates</code> or
+   * <code>other_element_dominates</code>, then the returned value is that
+   * value. On the other hand, if one of the values is
+   * <code>either_element_can_dominate</code>, then the returned value is that
+   * of the other argument. If either argument is
+   * <code>neither_element_dominates</code>, or if the two arguments are
+   * <code>this_element_dominates</code> and
+   * <code>other_element_dominates</code>, then the returned value is
+   * <code>neither_element_dominates</code>.
+   */
+  inline Domination operator & (const Domination d1,
+                                const Domination d2);
+}
+
+
+/**
+ * A class that declares a number of scalar constant variables that describe
+ * basic properties of a finite element implementation. This includes, for
+ * example, the number of degrees of freedom per vertex, line, or cell; the
+ * number of vector components; etc.
+ *
+ * The kind of information stored here is computed during initialization of a
+ * finite element object and is passed down to this class via its constructor.
+ * The data stored by this class is part of the public interface of the
+ * FiniteElement class (which derives from the current class). See there for
+ * more information.
+ *
+ * @ingroup febase
+ * @author Wolfgang Bangerth, Guido Kanschat, 1998, 1999, 2000, 2001, 2003,
+ * 2005
+ */
+template <int dim>
+class FiniteElementData
+{
+public:
+  /**
+   * Enumerator for the different types of continuity a finite element may
+   * have. Continuity is measured by the Sobolev space containing the
+   * constructed finite element space and is also called this way.
+   *
+   * Note that certain continuities may imply others. For instance, a function
+   * in <i>H<sup>1</sup></i> is in <i>H<sup>curl</sup></i> and
+   * <i>H<sup>div</sup></i> as well.
+   *
+   * If you are interested in continuity in the classical sense, then the
+   * following relations hold:
+   *
+   * <ol>
+   *
+   * <li> <i>H<sup>1</sup></i> implies that the function is continuous over
+   * cell boundaries.
+   *
+   * <li> <i>H<sup>2</sup></i> implies that the function is continuously
+   * differentiable over cell boundaries.
+   *
+   * <li> <i>L<sup>2</sup></i> indicates that the element is discontinuous.
+   * Since discontinuous elements have no topological couplings between grid
+   * cells and code may actually depend on this property, <i>L<sup>2</sup></i>
+   * conformity is handled in a special way in the sense that it is <b>not</b>
+   * implied by any higher conformity.
+   * </ol>
+   *
+   * In order to test if a finite element conforms to a certain space, use
+   * FiniteElementData<dim>::conforms().
+   */
+  enum Conformity
+  {
+    /**
+     * Indicates incompatible continuities of a system.
+     */
+    unknown = 0x00,
+
+    /**
+     * Discontinuous elements. See above!
+     */
+    L2 = 0x01,
+
+    /**
+     * Conformity with the space <i>H<sup>curl</sup></i> (continuous
+     * tangential component of a vector field)
+     */
+    Hcurl = 0x02,
+
+    /**
+     * Conformity with the space <i>H<sup>div</sup></i> (continuous normal
+     * component of a vector field)
+     */
+    Hdiv = 0x04,
+
+    /**
+     * Conformity with the space <i>H<sup>1</sup></i> (continuous)
+     */
+    H1 = Hcurl | Hdiv,
+
+    /**
+     * Conformity with the space <i>H<sup>2</sup></i> (continuously
+     * differentiable)
+     */
+    H2 = 0x0e
+  };
+
+  /**
+   * The dimension of the finite element, which is the template parameter
+   * <tt>dim</tt>
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Number of degrees of freedom on a vertex.
+   */
+  const unsigned int dofs_per_vertex;
+
+  /**
+   * Number of degrees of freedom in a line; not including the degrees of
+   * freedom on the vertices of the line.
+   */
+  const unsigned int dofs_per_line;
+
+  /**
+   * Number of degrees of freedom in a quadrilateral; not including the
+   * degrees of freedom on the lines and vertices of the quadrilateral.
+   */
+  const unsigned int dofs_per_quad;
+
+  /**
+   * Number of degrees of freedom in a hexahedron; not including the degrees
+   * of freedom on the quadrilaterals, lines and vertices of the hexahedron.
+   */
+  const unsigned int dofs_per_hex;
+
+  /**
+   * First index of dof on a line.
+   */
+  const unsigned int first_line_index;
+
+  /**
+   * First index of dof on a quad.
+   */
+  const unsigned int first_quad_index;
+
+  /**
+   * First index of dof on a hexahedron.
+   */
+  const unsigned int first_hex_index;
+
+  /**
+   * First index of dof on a line for face data.
+   */
+  const unsigned int first_face_line_index;
+
+  /**
+   * First index of dof on a quad for face data.
+   */
+  const unsigned int first_face_quad_index;
+
+  /**
+   * Number of degrees of freedom on a face. This is the accumulated number of
+   * degrees of freedom on all the objects of dimension up to <tt>dim-1</tt>
+   * constituting a face.
+   */
+  const unsigned int dofs_per_face;
+
+  /**
+   * Total number of degrees of freedom on a cell. This is the accumulated
+   * number of degrees of freedom on all the objects of dimension up to
+   * <tt>dim</tt> constituting a cell.
+   */
+  const unsigned int dofs_per_cell;
+
+  /**
+   * Number of vector components of this finite element, and dimension of the
+   * image space. For vector-valued finite elements (i.e. when this number is
+   * greater than one), the number of vector components is in many cases equal
+   * to the number of base elements glued together with the help of the
+   * FESystem class. However, for elements like the Nedelec element, the
+   * number is greater than one even though we only have one base element.
+   */
+  const unsigned int components;
+
+  /**
+   * Maximal polynomial degree of a shape function in a single coordinate
+   * direction.
+   */
+  const unsigned int degree;
+
+  /**
+   * Indicate the space this element conforms to.
+   */
+  const Conformity conforming_space;
+
+  /**
+   * Storage for an object describing the sizes of each block of a compound
+   * element. For an element which is not an FESystem, this contains only a
+   * single block with length #dofs_per_cell.
+   */
+  const BlockIndices block_indices_data;
+
+  /**
+   * Constructor, computing all necessary values from the distribution of dofs
+   * to geometrical objects.
+   *
+   * @param[in] dofs_per_object A vector that describes the number of degrees
+   * of freedom on geometrical objects for each dimension. This vector must
+   * have size dim+1, and entry 0 describes the number of degrees of freedom
+   * per vertex, entry 1 the number of degrees of freedom per line, etc. As an
+   * example, for the common $Q_1$ Lagrange element in 2d, this vector would
+   * have elements <code>(1,0,0)</code>. On the other hand, for a $Q_3$
+   * element in 3d, it would have entries <code>(1,2,4,8)</code>.
+   *
+   * @param[in] n_components Number of vector components of the element.
+   *
+   * @param[in] degree The maximal polynomial degree of any of the shape
+   * functions of this element in any variable on the reference element. For
+   * example, for the $Q_1$ element (in any space dimension), this would be
+   * one; this is so despite the fact that the element has a shape function of
+   * the form $\hat x\hat y$ (in 2d) and $\hat x\hat y\hat z$ (in 3d), which,
+   * although quadratic and cubic polynomials, are still only linear in each
+   * reference variable separately. The information provided by this variable
+   * is typically used in determining what an appropriate quadrature formula
+   * is.
+   *
+   * @param[in] conformity A variable describing which Sobolev space this
+   * element conforms to. For example, the $Q_p$ Lagrange elements
+   * (implemented by the FE_Q class) are $H^1$ conforming, whereas the
+   * Raviart-Thomas element (implemented by the FE_RaviartThomas class) is
+   * $H_\text{div}$ conforming; finally, completely discontinuous elements
+   * (implemented by the FE_DGQ class) are only $L_2$ conforming.
+   *
+   * @param[in] block_indices An argument that describes how the base elements
+   * of a finite element are grouped. The default value constructs a single
+   * block that consists of all @p dofs_per_cell degrees of freedom. This is
+   * appropriate for all "atomic" elements (including non-primitive ones) and
+   * these can therefore omit this argument. On the other hand, composed
+   * elements such as FESystem will want to pass a different value here.
+   */
+  FiniteElementData (const std::vector<unsigned int> &dofs_per_object,
+                     const unsigned int               n_components,
+                     const unsigned int               degree,
+                     const Conformity                 conformity = unknown,
+                     const BlockIndices              &block_indices = BlockIndices());
+
+  /**
+   * Number of dofs per vertex.
+   */
+  unsigned int n_dofs_per_vertex () const;
+
+  /**
+   * Number of dofs per line. Not including dofs on lower dimensional objects.
+   */
+  unsigned int n_dofs_per_line () const;
+
+  /**
+   * Number of dofs per quad. Not including dofs on lower dimensional objects.
+   */
+  unsigned int n_dofs_per_quad () const;
+
+  /**
+   * Number of dofs per hex. Not including dofs on lower dimensional objects.
+   */
+  unsigned int n_dofs_per_hex () const;
+
+  /**
+   * Number of dofs per face, accumulating degrees of freedom of all lower
+   * dimensional objects.
+   */
+  unsigned int n_dofs_per_face () const;
+
+  /**
+   * Number of dofs per cell, accumulating degrees of freedom of all lower
+   * dimensional objects.
+   */
+  unsigned int n_dofs_per_cell () const;
+
+  /**
+   * Return the number of degrees per structdim-dimensional object. For
+   * structdim==0, the function therefore returns dofs_per_vertex, for
+   * structdim==1 dofs_per_line, etc. This function is mostly used to allow
+   * some template trickery for functions that should work on all sorts of
+   * objects without wanting to use the different names (vertex, line, ...)
+   * associated with these objects.
+   */
+  template <int structdim>
+  unsigned int n_dofs_per_object () const;
+
+  /**
+   * Number of components. See
+   * @ref GlossComponent "the glossary"
+   * for more information.
+   */
+  unsigned int n_components () const;
+
+  /**
+   * Number of blocks. See
+   * @ref GlossBlock "the glossary"
+   * for more information.
+   */
+  unsigned int n_blocks () const;
+
+  /**
+   * Detailed information on block sizes.
+   */
+  const BlockIndices &block_indices() const;
+
+  /**
+   * Return whether the entire finite element is primitive, in the sense that
+   * all its shape functions are primitive. If the finite element is scalar,
+   * then this is always the case.
+   *
+   * Since this is an extremely common operation, the result is cached in the
+   * #cached_primitivity variable which is computed in the constructor.
+   */
+  bool is_primitive () const;
+
+  /**
+   * Maximal polynomial degree of a shape function in a single coordinate
+   * direction.
+   *
+   * This function can be used to determine the optimal quadrature rule.
+   */
+  unsigned int tensor_degree () const;
+
+  /**
+   * Test whether a finite element space conforms to a certain Sobolev space.
+   *
+   * @note This function will return a true value even if the finite element
+   * space has higher regularity than asked for.
+   */
+  bool conforms (const Conformity) const;
+
+  /**
+   * Comparison operator.
+   */
+  bool operator == (const FiniteElementData &) const;
+
+protected:
+
+  /**
+   * Set the primitivity of the element. This is usually done by the
+   * constructor of a derived class.  See
+   * @ref GlossPrimitive "primitive"
+   * for details.
+   */
+  void set_primitivity(const bool value);
+
+private:
+  /**
+   * Store whether all shape functions are primitive. Since finding this out
+   * is a very common operation, we cache the result, i.e. compute the value
+   * in the constructor for simpler access.
+   */
+  bool cached_primitivity;
+};
+
+
+
+// --------- inline and template functions ---------------
+
+
+#ifndef DOXYGEN
+
+namespace FiniteElementDomination
+{
+  inline
+  Domination operator & (const Domination d1,
+                         const Domination d2)
+  {
+    // go through the entire list of possibilities. note that if we were into
+    // speed, obfuscation and cared enough, we could implement this operator
+    // by doing a bitwise & (and) if we gave these values to the enum values:
+    // neither_element_dominates=0, this_element_dominates=1,
+    // other_element_dominates=2, either_element_can_dominate=3
+    // =this_element_dominates|other_element_dominates
+    switch (d1)
+      {
+      case this_element_dominates:
+        if ((d2 == this_element_dominates) ||
+            (d2 == either_element_can_dominate) ||
+            (d2 == no_requirements))
+          return this_element_dominates;
+        else
+          return neither_element_dominates;
+
+      case other_element_dominates:
+        if ((d2 == other_element_dominates) ||
+            (d2 == either_element_can_dominate) ||
+            (d2 == no_requirements))
+          return other_element_dominates;
+        else
+          return neither_element_dominates;
+
+      case neither_element_dominates:
+        return neither_element_dominates;
+
+      case either_element_can_dominate:
+        if (d2 == no_requirements)
+          return either_element_can_dominate;
+        else
+          return d2;
+
+      case no_requirements:
+        return d2;
+
+      default:
+        // shouldn't get here
+        Assert (false, ExcInternalError());
+      }
+
+    return neither_element_dominates;
+  }
+}
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_dofs_per_vertex () const
+{
+  return dofs_per_vertex;
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_dofs_per_line () const
+{
+  return dofs_per_line;
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_dofs_per_quad () const
+{
+  return dofs_per_quad;
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_dofs_per_hex () const
+{
+  return dofs_per_hex;
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_dofs_per_face () const
+{
+  return dofs_per_face;
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_dofs_per_cell () const
+{
+  return dofs_per_cell;
+}
+
+
+
+template <int dim>
+template <int structdim>
+inline
+unsigned int
+FiniteElementData<dim>::n_dofs_per_object () const
+{
+  switch (structdim)
+    {
+    case 0:
+      return dofs_per_vertex;
+    case 1:
+      return dofs_per_line;
+    case 2:
+      return dofs_per_quad;
+    case 3:
+      return dofs_per_hex;
+    default:
+      Assert (false, ExcInternalError());
+    }
+  return numbers::invalid_unsigned_int;
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_components () const
+{
+  return components;
+}
+
+
+
+template <int dim>
+inline
+bool
+FiniteElementData<dim>::is_primitive () const
+{
+  return cached_primitivity;
+}
+
+
+template <int dim>
+inline
+void
+FiniteElementData<dim>::set_primitivity (const bool value)
+{
+  cached_primitivity = value;
+}
+
+
+template <int dim>
+inline
+const BlockIndices &
+FiniteElementData<dim>::block_indices () const
+{
+  return block_indices_data;
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::n_blocks () const
+{
+  return block_indices_data.size();
+}
+
+
+
+template <int dim>
+inline
+unsigned int
+FiniteElementData<dim>::tensor_degree () const
+{
+  return degree;
+}
+
+
+template <int dim>
+inline
+bool
+FiniteElementData<dim>::conforms (const Conformity space) const
+{
+  return ((space & conforming_space) == space);
+}
+
+
+#endif // DOXYGEN
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_bdm.h b/include/deal.II/fe/fe_bdm.h
new file mode 100644
index 0000000..c49e225
--- /dev/null
+++ b/include/deal.II/fe/fe_bdm.h
@@ -0,0 +1,123 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_bdm_h
+#define dealii__fe_bdm_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/polynomials_bdm.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_poly_tensor.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * The Brezzi-Douglas-Marini element.
+ *
+ * <h3>Degrees of freedom</h3>
+ *
+ * @todo The 3D version exhibits some numerical instabilities, in particular
+ * for higher order
+ *
+ * @todo Restriction matrices are missing.
+ *
+ * The matching pressure space for FE_BDM of order <i>k</i> is the element
+ * FE_DGP of order <i>k-1</i>.
+ *
+ * The BDM element of order @p p has <i>p+1</i> degrees of freedom on each
+ * face. These are implemented as the function values in the <i>p+1</i> Gauss
+ * points on each face.
+ *
+ * Additionally, for order greater or equal 2, we have additional
+ * <i>p(p-1)</i>, the number of vector valued polynomials in
+ * <i>P<sub>p</sub></i>, interior degrees of freedom. These are the vector
+ * function values in the first <i>p(p-1)/2</i> of the <i>p<sup>2</sup></i>
+ * Gauss points in the cell.
+ */
+template <int dim>
+class FE_BDM
+  :
+  public FE_PolyTensor<PolynomialsBDM<dim>, dim>
+{
+public:
+  /**
+   * Constructor for the BDM element of degree @p p.
+   */
+  FE_BDM (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_BDM<dim>(degree)</tt>, with @p dim and @p degree replaced
+   * by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  virtual FiniteElement<dim> *clone () const;
+
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<double> &values) const;
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<Vector<double> > &values,
+                           unsigned int offset = 0) const;
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+private:
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Compute the vector used for the @p restriction_is_additive field passed
+   * to the base class's constructor.
+   */
+  static std::vector<bool>
+  get_ria_vector (const unsigned int degree);
+  /**
+   * Initialize the FiniteElement<dim>::generalized_support_points and
+   * FiniteElement<dim>::generalized_face_support_points fields. Called from
+   * the constructor. See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+   * for more information.
+   */
+  void initialize_support_points (const unsigned int bdm_degree);
+  /**
+   * The values in the face support points of the polynomials needed as test
+   * functions. The outer vector is indexed by quadrature points, the inner by
+   * the test function. The test function space is PolynomialsP<dim-1>.
+   */
+  std::vector<std::vector<double> > test_values_face;
+  /**
+   * The values in the interior support points of the polynomials needed as
+   * test functions. The outer vector is indexed by quadrature points, the
+   * inner by the test function. The test function space is PolynomialsP<dim>.
+   */
+  std::vector<std::vector<double> > test_values_cell;
+};
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_bernstein.h b/include/deal.II/fe/fe_bernstein.h
new file mode 100644
index 0000000..f55fd70
--- /dev/null
+++ b/include/deal.II/fe/fe_bernstein.h
@@ -0,0 +1,192 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_bernstein_h
+#define dealii__fe_bernstein_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/fe/fe_q_base.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of a scalar Bernstein finite element @p that we call
+ * FE_Bernstein in analogy with FE_Q that yields the finite element space of
+ * continuous, piecewise Bernstein polynomials of degree @p p in each
+ * coordinate direction. This class is realized using tensor product
+ * polynomials of Bernstein basis polynomials.
+ *
+ *
+ * The standard constructor of this class takes the degree @p p of this finite
+ * element.
+ *
+ * For more information about the <tt>spacedim</tt> template parameter check
+ * the documentation of FiniteElement or the one of Triangulation.
+ *
+ * <h3>Implementation</h3>
+ *
+ * The constructor creates a TensorProductPolynomials object that includes the
+ * tensor product of @p Bernstein polynomials of degree @p p. This @p
+ * TensorProductPolynomials object provides all values and derivatives of the
+ * shape functions.
+ *
+ * <h3>Numbering of the degrees of freedom (DoFs)</h3>
+ *
+ * The original ordering of the shape functions represented by the
+ * TensorProductPolynomials is a tensor product numbering. However, the shape
+ * functions on a cell are renumbered beginning with the shape functions whose
+ * support points are at the vertices, then on the line, on the quads, and
+ * finally (for 3d) on the hexes. See the documentation of FE_Q for more
+ * details.
+ *
+ *
+ * @author Marco Tezzele, Luca Heltai
+ * @date 2013, 2015
+ */
+
+template <int dim, int spacedim=dim>
+class FE_Bernstein : public FE_Q_Base<TensorProductPolynomials<dim>,dim,spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree @p p.
+   */
+  FE_Bernstein (const unsigned int p);
+
+  /**
+   * Return the matrix interpolating from a face of one element to the face of
+   * the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. The
+   * FE_Bernstein element family only provides interpolation matrices for
+   * elements of the same type and FE_Nothing. For all other elements, an
+   * exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of one element to the face of
+   * the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. The
+   * FE_Bernstein element family only provides interpolation matrices for
+   * elements of the same type and FE_Nothing. For all other elements, an
+   * exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_Bernstein<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+protected:
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector(const unsigned int degree);
+
+  /**
+   * This function renumbers Bernstein basis functions from hierarchic to
+   * lexicographic numbering.
+   */
+  TensorProductPolynomials<dim> renumber_bases(const unsigned int degree);
+};
+
+
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_dg_vector.h b/include/deal.II/fe/fe_dg_vector.h
new file mode 100644
index 0000000..0735e94
--- /dev/null
+++ b/include/deal.II/fe/fe_dg_vector.h
@@ -0,0 +1,244 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_dg_vector_h
+#define dealii__fe_dg_vector_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/polynomials_raviart_thomas.h>
+#include <deal.II/base/polynomials_nedelec.h>
+#include <deal.II/base/polynomials_bdm.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_poly_tensor.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+
+
+/**
+ * DG elements based on vector valued polynomials.
+ *
+ * These elements use vector valued polynomial spaces as they have been
+ * introduced for H<sup>div</sup> and H<sup>curl</sup> conforming finite
+ * elements, but do not use the usual continuity of these elements. Thus, they
+ * are suitable for DG and hybrid formulations involving these function
+ * spaces.
+ *
+ * The template argument <tt>PolynomialType</tt> refers to a vector valued
+ * polynomial space like PolynomialsRaviartThomas or PolynomialsNedelec. Note
+ * that the dimension of the polynomial space and the argument <tt>dim</tt>
+ * must coincide.
+ *
+ * @ingroup febase
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template <class PolynomialType, int dim, int spacedim=dim>
+class FE_DGVector
+  :
+  public FE_PolyTensor<PolynomialType, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for the vector element of degree @p p.
+   */
+  FE_DGVector (const unsigned int p, MappingType m);
+public:
+
+  FiniteElement<dim, spacedim> *clone() const;
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_RaviartThomas<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   *
+   * For this element, we always return @p true.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<double> &values) const;
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<Vector<double> > &values,
+                           unsigned int offset = 0) const;
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+  virtual std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Initialize the @p generalized_support_points field of the FiniteElement
+   * class and fill the tables with @p interior_weights. Called from the
+   * constructor.
+   *
+   * See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+   * for more information.
+   */
+  void initialize_support_points (const unsigned int degree);
+
+  /**
+   * Initialize the interpolation from functions on refined mesh cells onto
+   * the father cell. According to the philosophy of the Raviart-Thomas
+   * element, this restriction operator preserves the divergence of a function
+   * weakly.
+   */
+  void initialize_restriction ();
+
+  /**
+   * Fields of cell-independent data.
+   *
+   * For information about the general purpose of this class, see the
+   * documentation of the base class.
+   */
+  class InternalData : public FiniteElement<dim>::InternalDataBase
+  {
+  public:
+    /**
+     * Array with shape function values in quadrature points. There is one row
+     * for each shape function, containing values for each quadrature point.
+     * Since the shape functions are vector-valued (with as many components as
+     * there are space dimensions), the value is a tensor.
+     *
+     * In this array, we store the values of the shape function in the
+     * quadrature points on the unit cell. The transformation to the real
+     * space cell is then simply done by multiplication with the Jacobian of
+     * the mapping.
+     */
+    std::vector<std::vector<Tensor<1,dim> > > shape_values;
+
+    /**
+     * Array with shape function gradients in quadrature points. There is one
+     * row for each shape function, containing values for each quadrature
+     * point.
+     *
+     * We store the gradients in the quadrature points on the unit cell. We
+     * then only have to apply the transformation (which is a matrix-vector
+     * multiplication) when visiting an actual cell.
+     */
+    std::vector<std::vector<Tensor<2,dim> > > shape_gradients;
+  };
+  Table<3, double> interior_weights;
+};
+
+
+
+/**
+ * A vector-valued DG element based on the polynomials space of FE_Nedelec.
+ *
+ * @ingroup fe
+ * @author Guido Kanschat
+ * @date 2011
+ */
+template <int dim, int spacedim=dim>
+class FE_DGNedelec : public FE_DGVector<PolynomialsNedelec<dim>, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for the discontinuous Nédélec element of degree
+   * @p p.
+   */
+  FE_DGNedelec (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGNedelec<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+};
+
+
+
+/**
+ * A vector-valued DG element based on the polynomials space of
+ * FE_RaviartThomas.
+ *
+ * @ingroup fe
+ * @author Guido Kanschat
+ * @date 2011
+ */
+template <int dim, int spacedim=dim>
+class FE_DGRaviartThomas : public FE_DGVector<PolynomialsRaviartThomas<dim>, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for the Raviart-Thomas element of degree @p p.
+   */
+  FE_DGRaviartThomas (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGRaviartThomas<dim>(degree)</tt>, with @p dim and @p
+   * degree replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+};
+
+
+
+/**
+ * A vector-valued DG element based on the polynomials space of FE_BDM.
+ *
+ * @ingroup fe
+ * @author Guido Kanschat
+ * @date 2011
+ */
+template <int dim, int spacedim=dim>
+class FE_DGBDM : public FE_DGVector<PolynomialsBDM<dim>, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for the discontinuous BDM element of degree @p p.
+   */
+  FE_DGBDM (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGBDM<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_dg_vector.templates.h b/include/deal.II/fe/fe_dg_vector.templates.h
new file mode 100644
index 0000000..8ed1f57
--- /dev/null
+++ b/include/deal.II/fe/fe_dg_vector.templates.h
@@ -0,0 +1,128 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_dg_vector.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/base/quadrature_lib.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+//TODO:[GK] deg+1 is wrong here and should be fixed after FiniteElementData was cleaned up
+
+template <class PolynomialType, int dim, int spacedim>
+FE_DGVector<PolynomialType,dim,spacedim>::FE_DGVector (
+  const unsigned int deg, MappingType map)
+  :
+  FE_PolyTensor<PolynomialType, dim, spacedim>(
+    deg,
+    FiniteElementData<dim>(
+      get_dpo_vector(deg), dim, deg+1, FiniteElementData<dim>::L2),
+    std::vector<bool>(PolynomialType::compute_n_pols(deg), true),
+    std::vector<ComponentMask>(PolynomialType::compute_n_pols(deg),
+                               ComponentMask(dim,true)))
+{
+  this->mapping_type = map;
+  const unsigned int polynomial_degree = this->tensor_degree();
+
+  QGauss<dim> quadrature(polynomial_degree+1);
+  this->generalized_support_points = quadrature.get_points();
+
+  this->reinit_restriction_and_prolongation_matrices(true, true);
+  FETools::compute_projection_matrices (*this, this->restriction, true);
+  FETools::compute_embedding_matrices (*this, this->prolongation, true);
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+FiniteElement<dim, spacedim> *
+FE_DGVector<PolynomialType,dim,spacedim>::clone() const
+{
+  return new FE_DGVector<PolynomialType, dim, spacedim>(*this);
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::string
+FE_DGVector<PolynomialType,dim,spacedim>::get_name() const
+{
+  std::ostringstream namebuf;
+  namebuf << "FE_DGVector_" << this->poly_space.name()
+          << "<" << dim << ">(" << this->degree-1 << ")";
+  return namebuf.str();
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::vector<unsigned int>
+FE_DGVector<PolynomialType,dim,spacedim>::get_dpo_vector (const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1);
+  dpo[dim] = PolynomialType::compute_n_pols(deg);
+
+  return dpo;
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+bool
+FE_DGVector<PolynomialType,dim,spacedim>::has_support_on_face
+(const unsigned int,
+ const unsigned int) const
+{
+  return true;
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_DGVector<PolynomialType,dim,spacedim>::interpolate
+(std::vector<double> &,
+ const std::vector<double> &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_DGVector<PolynomialType,dim,spacedim>::interpolate
+(std::vector<double> & /*local_dofs*/,
+ const std::vector<Vector<double> > & /*values*/,
+ unsigned int /*offset*/) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_DGVector<PolynomialType,dim,spacedim>::interpolate
+(std::vector<double> & /*local_dofs*/,
+ const VectorSlice<const std::vector<std::vector<double> > > & /*values*/) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::size_t
+FE_DGVector<PolynomialType,dim,spacedim>::memory_consumption() const
+{
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/fe/fe_dgp.h b/include/deal.II/fe/fe_dgp.h
new file mode 100644
index 0000000..aa73c24
--- /dev/null
+++ b/include/deal.II/fe/fe_dgp.h
@@ -0,0 +1,541 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_dgp_h
+#define dealii__fe_dgp_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/fe/fe_poly.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Discontinuous finite elements based on Legendre polynomials.
+ *
+ * This finite element implements complete polynomial spaces, that is, dim-
+ * dimensional polynomials of degree p. For example, in 2d the element
+ * FE_DGP(1) would represent the span of the functions $\{1,\hat x,\hat y\}$,
+ * which is in contrast to the element FE_DGQ(1) that is formed by the span of
+ * $\{1,\hat x,\hat y,\hat x\hat y\}$. Since the DGP space has only three
+ * unknowns for each quadrilateral, it is immediately clear that this element
+ * can not be continuous.
+ *
+ * The basis functions used in this element for the space described above are
+ * chosen to form a Legendre basis on the unit square, i.e., in particular
+ * they are $L_2$-orthogonal and normalized on the reference cell (but not
+ * necessarily on the real cell). As a consequence, the first basis function
+ * of this element is always the function that is constant and equal to one,
+ * regardless of the polynomial degree of the element. In addition, as a
+ * result of the orthogonality of the basis functions, the mass matrix is
+ * diagonal if the grid cells are parallelograms. Note that this is in
+ * contrast to the FE_DGPMonomial class that actually uses the monomial basis
+ * listed above as basis functions, without transformation from reference to
+ * real cell.
+ *
+ * The shape functions are defined in the class PolynomialSpace. The
+ * polynomials used inside PolynomialSpace are Polynomials::Legendre up to
+ * degree <tt>p</tt> given in FE_DGP. For the ordering of the basis functions,
+ * refer to PolynomialSpace, remembering that the Legendre polynomials are
+ * ordered by ascending degree.
+ *
+ * @note This element is not defined by finding shape functions within the
+ * given function space that interpolate a particular set of points.
+ * Consequently, there are no support points to which a given function could
+ * be interpolated; finding a finite element function that approximates a
+ * given function is therefore only possible through projection, rather than
+ * interpolation. Secondly, the shape functions of this element do not jointly
+ * add up to one. As a consequence of this, adding or subtracting a constant
+ * value -- such as one would do to make a function have mean value zero --
+ * can not be done by simply subtracting the constant value from each degree
+ * of freedom. Rather, one needs to use the fact that the first basis function
+ * is constant equal to one and simply subtract the constant from the value of
+ * the degree of freedom corresponding to this first shape function on each
+ * cell.
+ *
+ *
+ * @note This class is only partially implemented for the codimension one case
+ * (<tt>spacedim != dim </tt>), since no passage of information between meshes
+ * of different refinement level is possible because the embedding and
+ * projection matrices are not computed in the class constructor.
+ *
+ * <h3>Transformation properties</h3>
+ *
+ * It is worth noting that under a (bi-, tri-)linear mapping, the space
+ * described by this element does not contain $P(k)$, even if we use a basis
+ * of polynomials of degree $k$. Consequently, for example, on meshes with
+ * non-affine cells, a linear function can not be exactly represented by
+ * elements of type FE_DGP(1) or FE_DGPMonomial(1).
+ *
+ * This can be understood by the following 2-d example: consider the cell with
+ * vertices at $(0,0),(1,0),(0,1),(s,s)$:
+ * @image html dgp_doesnt_contain_p.png
+ *
+ * For this cell, a bilinear transformation $F$ produces the relations $x=\hat
+ * x+\hat x\hat y$ and $y=\hat y+\hat x\hat y$ that correlate reference
+ * coordinates $\hat x,\hat y$ and coordinates in real space $x,y$. Under this
+ * mapping, the constant function is clearly mapped onto itself, but the two
+ * other shape functions of the $P_1$ space, namely $\phi_1(\hat x,\hat
+ * y)=\hat x$ and $\phi_2(\hat x,\hat y)=\hat y$ are mapped onto
+ * $\phi_1(x,y)=\frac{x-t}{t(s-1)},\phi_2(x,y)=t$ where
+ * $t=\frac{y}{s-x+sx+y-sy}$.
+ *
+ * For the simple case that $s=1$, i.e. if the real cell is the unit square,
+ * the expressions can be simplified to $t=y$ and
+ * $\phi_1(x,y)=x,\phi_2(x,y)=y$. However, for all other cases, the functions
+ * $\phi_1(x,y),\phi_2(x,y)$ are not linear any more, and neither is any
+ * linear combination of them. Consequently, the linear functions are not
+ * within the range of the mapped $P_1$ polynomials.
+ *
+ * <h3>Visualization of shape functions</h3> In 2d, the shape functions of
+ * this element look as follows.
+ *
+ * <h4>$P_0$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P1/P1_DGP_shape0000.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_0$ element,
+ * shape function 0 </td>
+ *
+ * <td align="center"></tr> </table>
+ *
+ * <h4>$P_1$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P1/P1_DGP_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P1/P1_DGP_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_1$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_1$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P1/P1_DGP_shape0002.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_1$ element,
+ * shape function 2 </td>
+ *
+ * <td align="center"></td> </tr> </table>
+ *
+ *
+ * <h4>$P_2$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P2/P2_DGP_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P2/P2_DGP_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P2/P2_DGP_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P2/P2_DGP_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P2/P2_DGP_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P2/P2_DGP_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 5 </td> </tr> </table>
+ *
+ *
+ * <h4>$P_3$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 6 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P3/P3_DGP_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 8 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 9 </td> </tr> </table>
+ *
+ *
+ * <h4>$P_4$ element</h4> <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 6 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 8 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 9 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0010.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0011.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 10 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 11 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0012.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0013.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 12 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 13 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGP/P4/P4_DGP_shape0014.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_4$ element,
+ * shape function 14 </td>
+ *
+ * <td align="center"></td> </tr> </table>
+ *
+ * @author Guido Kanschat, 2001, 2002, Ralf Hartmann 2004
+ */
+template <int dim, int spacedim=dim>
+class FE_DGP : public FE_Poly<PolynomialSpace<dim>,dim,spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree @p p.
+   */
+  FE_DGP (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGP<dim>(degree)</tt>, with @p dim and @p degree replaced
+   * by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * For the FE_DGP class the result is always true (independent of the degree
+   * of the element), as it has no hanging nodes (being a discontinuous
+   * element).
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since finite element objects are usually
+   * accessed through pointers to their base class, rather than the class
+   * itself.
+   */
+  virtual std::size_t memory_consumption () const;
+
+
+  /**
+   * Declare a nested class which will hold static definitions of various
+   * matrices such as constraint and embedding matrices. The definition of the
+   * various static fields are in the files <tt>fe_dgp_[123]d.cc</tt> in the
+   * source directory.
+   */
+  struct Matrices
+  {
+    /**
+     * As @p embedding but for projection matrices.
+     */
+    static const double *const projection_matrices[][GeometryInfo<dim>::max_children_per_cell];
+
+    /**
+     * As @p n_embedding_matrices but for projection matrices.
+     */
+    static const unsigned int n_projection_matrices;
+  };
+
+  /**
+   * Returns a list of constant modes of the element. For this element, the
+   * first entry is true, all other are false.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+protected:
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+private:
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector (const unsigned int degree);
+};
+
+/* @} */
+#ifndef DOXYGEN
+
+
+// declaration of explicit specializations of member variables, if the
+// compiler allows us to do that (the standard says we must)
+#ifndef DEAL_II_MEMBER_VAR_SPECIALIZATION_BUG
+template <>
+const double *const FE_DGP<1>::Matrices::projection_matrices[][GeometryInfo<1>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGP<1>::Matrices::n_projection_matrices;
+
+template <>
+const double *const FE_DGP<2>::Matrices::projection_matrices[][GeometryInfo<2>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGP<2>::Matrices::n_projection_matrices;
+
+template <>
+const double *const FE_DGP<3>::Matrices::projection_matrices[][GeometryInfo<3>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGP<3>::Matrices::n_projection_matrices;
+
+//codimension 1
+template <>
+const double *const FE_DGP<1,2>::Matrices::projection_matrices[][GeometryInfo<1>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGP<1,2>::Matrices::n_projection_matrices;
+
+template <>
+const double *const FE_DGP<2,3>::Matrices::projection_matrices[][GeometryInfo<2>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGP<2,3>::Matrices::n_projection_matrices;
+
+#endif
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_dgp_monomial.h b/include/deal.II/fe/fe_dgp_monomial.h
new file mode 100644
index 0000000..ccf7c51
--- /dev/null
+++ b/include/deal.II/fe/fe_dgp_monomial.h
@@ -0,0 +1,473 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_dgp_monomial_h
+#define dealii__fe_dgp_monomial_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/polynomials_p.h>
+#include <deal.II/fe/fe_poly.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Discontinuous finite elements based on monomials.
+ *
+ * This finite element implements complete polynomial spaces, that is, dim-
+ * dimensional polynomials of degree p. For example, in 2d the element
+ * FE_DGP(1) would represent the span of the functions $\{1,\hat x,\hat y\}$,
+ * which is in contrast to the element FE_DGQ(1) that is formed by the span of
+ * $\{1,\hat x,\hat y,\hat x\hat y\}$. Since the DGP space has only three
+ * unknowns for each quadrilateral, it is immediately clear that this element
+ * can not be continuous.
+ *
+ * The basis functions for this element are chosen to be the monomials listed
+ * above. Note that this is the main difference to the FE_DGP class that uses
+ * a set of polynomials of complete degree <code>p</code> that form a Legendre
+ * basis on the unit square. Thus, there, the mass matrix is diagonal, if the
+ * grid cells are parallelograms. The basis here does not have this property;
+ * however, it is simpler to compute. On the other hand, this element has the
+ * additional disadvantage that the local cell matrices usually have a worse
+ * condition number than the ones originating from the FE_DGP element.
+ *
+ * This class is not implemented for the codimension one case (<tt>spacedim !=
+ * dim</tt>).
+ *
+ * <h3>Transformation properties</h3>
+ *
+ * It is worth noting that under a (bi-, tri-)linear mapping, the space
+ * described by this element does not contain $P(k)$, even if we use a basis
+ * of polynomials of degree $k$. Consequently, for example, on meshes with
+ * non-affine cells, a linear function can not be exactly represented by
+ * elements of type FE_DGP(1) or FE_DGPMonomial(1).
+ *
+ * This can be understood by the following 2-d example: consider the cell with
+ * vertices at $(0,0),(1,0),(0,1),(s,s)$:
+ * @image html dgp_doesnt_contain_p.png
+ *
+ * For this cell, a bilinear transformation $F$ produces the relations $x=\hat
+ * x+\hat x\hat y$ and $y=\hat y+\hat x\hat y$ that correlate reference
+ * coordinates $\hat x,\hat y$ and coordinates in real space $x,y$. Under this
+ * mapping, the constant function is clearly mapped onto itself, but the two
+ * other shape functions of the $P_1$ space, namely $\phi_1(\hat x,\hat
+ * y)=\hat x$ and $\phi_2(\hat x,\hat y)=\hat y$ are mapped onto
+ * $\phi_1(x,y)=\frac{x-t}{t(s-1)},\phi_2(x,y)=t$ where
+ * $t=\frac{y}{s-x+sx+y-sy}$.
+ *
+ * For the simple case that $s=1$, i.e. if the real cell is the unit square,
+ * the expressions can be simplified to $t=y$ and
+ * $\phi_1(x,y)=x,\phi_2(x,y)=y$. However, for all other cases, the functions
+ * $\phi_1(x,y),\phi_2(x,y)$ are not linear any more, and neither is any
+ * linear combination of them. Consequently, the linear functions are not
+ * within the range of the mapped $P_1$ polynomials.
+ *
+ *
+ * <h3>Visualization of shape functions</h3> In 2d, the shape functions of
+ * this element look as follows.
+ *
+ * <h4>$P_0$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P1/P1_DGPMonomial_shape0000.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_0$ element,
+ * shape function 0 </td>
+ *
+ * <td align="center"></tr> </table>
+ *
+ * <h4>$P_1$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P1/P1_DGPMonomial_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P1/P1_DGPMonomial_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_1$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_1$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P1/P1_DGPMonomial_shape0002.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_1$ element,
+ * shape function 2 </td>
+ *
+ * <td align="center"></td> </tr> </table>
+ *
+ *
+ * <h4>$P_2$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P2/P2_DGPMonomial_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P2/P2_DGPMonomial_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P2/P2_DGPMonomial_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P2/P2_DGPMonomial_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P2/P2_DGPMonomial_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P2/P2_DGPMonomial_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 5 </td> </tr> </table>
+ *
+ *
+ * <h4>$P_3$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 6 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P3/P3_DGPMonomial_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 8 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 9 </td> </tr> </table>
+ *
+ *
+ * <h4>$P_4$ element</h4> <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 6 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 8 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 9 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0010.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0011.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 10 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 11 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0012.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0013.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 12 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 13 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPMonomial/P4/P4_DGPMonomial_shape0014.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_4$ element,
+ * shape function 14 </td>
+ *
+ * <td align="center"></td> </tr> </table>
+ *
+ * @author Ralf Hartmann, 2004
+ */
+template <int dim>
+class FE_DGPMonomial : public FE_Poly<PolynomialsP<dim>,dim>
+{
+public:
+  /**
+   * Constructor for the polynomial space of degree <tt>p</tt>.
+   */
+  FE_DGPMonomial (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGPMonomial<dim>(degree)</tt>, with <tt>dim</tt> and
+   * <tt>p</tt> replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * For the FE_DGPMonomial class the result is always true (independent of
+   * the degree of the element), as it has no hanging nodes (being a
+   * discontinuous element).
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one. The size of the matrix is then @p dofs_per_cell times
+   * <tt>source.dofs_per_cell</tt>.
+   *
+   * These matrices are only available if the source element is also a @p FE_Q
+   * element. Otherwise, an exception of type
+   * FiniteElement<dim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_interpolation_matrix (const FiniteElement<dim> &source,
+                            FullMatrix<double>           &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then @p
+   * dofs_per_face times <tt>source.dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then @p
+   * dofs_per_face times <tt>source.dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since finite element objects are usually
+   * accessed through pointers to their base class, rather than the class
+   * itself.
+   */
+  virtual std::size_t memory_consumption () const;
+
+protected:
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim> *clone() const;
+
+private:
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Initialize the embedding matrices. Called from the constructor.
+   */
+  void initialize_embedding ();
+
+  /**
+   * Initialize the restriction matrices. Called from the constructor.
+   */
+  void initialize_restriction ();
+};
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_dgp_nonparametric.h b/include/deal.II/fe/fe_dgp_nonparametric.h
new file mode 100644
index 0000000..a3e0caf
--- /dev/null
+++ b/include/deal.II/fe/fe_dgp_nonparametric.h
@@ -0,0 +1,667 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_dgp_nonparametric_h
+#define dealii__fe_dgp_nonparametric_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/fe/fe.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim> class PolynomialSpace;
+template <int dim, int spacedim> class MappingQ;
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Discontinuous finite elements evaluated at the mapped quadrature points.
+ *
+ * Warning: this class does not work properly, yet. Don't use it!
+ *
+ * This finite element implements complete polynomial spaces, that is,
+ * $d$-dimensional polynomials of order $k$.
+ *
+ * The polynomials are not mapped. Therefore, they are constant, linear,
+ * quadratic, etc. on any grid cell.
+ *
+ * Since the polynomials are evaluated at the quadrature points of the actual
+ * grid cell, no grid transfer and interpolation matrices are available.
+ *
+ * The purpose of this class is experimental, therefore the implementation
+ * will remain incomplete.
+ *
+ * Besides, this class is not implemented for the codimension one case
+ * (<tt>spacedim != dim</tt>).
+ *
+ *
+ * <h3>Visualization of shape functions</h3> In 2d, the shape functions of
+ * this element look as follows.
+ *
+ * <h4>$P_0$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P1/P1_DGPNonparametric_shape0000.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_0$ element,
+ * shape function 0 </td>
+ *
+ * <td align="center"></tr> </table>
+ *
+ * <h4>$P_1$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P1/P1_DGPNonparametric_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P1/P1_DGPNonparametric_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_1$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_1$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P1/P1_DGPNonparametric_shape0002.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_1$ element,
+ * shape function 2 </td>
+ *
+ * <td align="center"></td> </tr> </table>
+ *
+ *
+ * <h4>$P_2$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P2/P2_DGPNonparametric_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P2/P2_DGPNonparametric_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P2/P2_DGPNonparametric_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P2/P2_DGPNonparametric_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P2/P2_DGPNonparametric_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P2/P2_DGPNonparametric_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_2$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_2$ element, shape function 5 </td> </tr> </table>
+ *
+ *
+ * <h4>$P_3$ element</h4>
+ *
+ * <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 6 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P3/P3_DGPNonparametric_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $P_3$ element, shape function 8 </td>
+ *
+ * <td align="center"> $P_3$ element, shape function 9 </td> </tr> </table>
+ *
+ *
+ * <h4>$P_4$ element</h4> <table> <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 0 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 2 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 4 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 6 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 8 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 9 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0010.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0011.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 10 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 11 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0012.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0013.png
+ * </td> </tr> <tr> <td align="center"> $P_4$ element, shape function 12 </td>
+ *
+ * <td align="center"> $P_4$ element, shape function 13 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/DGPNonparametric/P4/P4_DGPNonparametric_shape0014.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $P_4$ element,
+ * shape function 14 </td>
+ *
+ * <td align="center"></td> </tr> </table>
+ *
+ *
+ * <h3> Implementation details </h3>
+ *
+ * This element does not have an InternalData class, unlike all other
+ * elements, because the InternalData classes are used to store things that
+ * can be computed once and reused multiple times (such as the values of shape
+ * functions at quadrature points on the reference cell). However, because the
+ * element is not mapped, this element has nothing that could be computed on
+ * the reference cell -- everything needs to be computed on the real cell --
+ * and consequently there is nothing we'd like to store in such an object. We
+ * can thus simply use the members already provided by
+ * FiniteElement::InternalDataBase without adding anything in a derived class
+ * in this class.
+ *
+ * @author Guido Kanschat, 2002
+ */
+template <int dim, int spacedim=dim>
+class FE_DGPNonparametric : public FiniteElement<dim,spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree @p k.
+   */
+  FE_DGPNonparametric (const unsigned int k);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGPNonparametric<dim>(degree)</tt>, with @p dim and @p
+   * degree replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  // for documentation, see the FiniteElement base class
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  /**
+   * This function is intended to return the value of a shape function at a
+   * point on the reference cell. However, since the current element does not
+   * implement shape functions by mapping from a reference cell, no shape
+   * functions exist on the reference cell.
+   *
+   * Consequently, as discussed in the corresponding function in the base
+   * class, FiniteElement::shape_value(), this function throws an exception of
+   * type FiniteElement::ExcUnitShapeValuesDoNotExist.
+   */
+  virtual double shape_value (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * This function is intended to return the value of a shape function at a
+   * point on the reference cell. However, since the current element does not
+   * implement shape functions by mapping from a reference cell, no shape
+   * functions exist on the reference cell.
+   *
+   * Consequently, as discussed in the corresponding function in the base
+   * class, FiniteElement::shape_value_component(), this function throws an
+   * exception of type FiniteElement::ExcUnitShapeValuesDoNotExist.
+   */
+  virtual double shape_value_component (const unsigned int i,
+                                        const Point<dim> &p,
+                                        const unsigned int component) const;
+
+  /**
+   * This function is intended to return the gradient of a shape function at a
+   * point on the reference cell. However, since the current element does not
+   * implement shape functions by mapping from a reference cell, no shape
+   * functions exist on the reference cell.
+   *
+   * Consequently, as discussed in the corresponding function in the base
+   * class, FiniteElement::shape_grad(), this function throws an exception of
+   * type FiniteElement::ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<1,dim> shape_grad (const unsigned int  i,
+                                    const Point<dim>   &p) const;
+
+  /**
+   * This function is intended to return the gradient of a shape function at a
+   * point on the reference cell. However, since the current element does not
+   * implement shape functions by mapping from a reference cell, no shape
+   * functions exist on the reference cell.
+   *
+   * Consequently, as discussed in the corresponding function in the base
+   * class, FiniteElement::shape_grad_component(), this function throws an
+   * exception of type FiniteElement::ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<1,dim> shape_grad_component (const unsigned int i,
+                                              const Point<dim> &p,
+                                              const unsigned int component) const;
+
+  /**
+   * This function is intended to return the Hessian of a shape function at a
+   * point on the reference cell. However, since the current element does not
+   * implement shape functions by mapping from a reference cell, no shape
+   * functions exist on the reference cell.
+   *
+   * Consequently, as discussed in the corresponding function in the base
+   * class, FiniteElement::shape_grad_grad(), this function throws an
+   * exception of type FiniteElement::ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<2,dim> shape_grad_grad (const unsigned int  i,
+                                         const Point<dim> &p) const;
+
+  /**
+   * This function is intended to return the Hessian of a shape function at a
+   * point on the reference cell. However, since the current element does not
+   * implement shape functions by mapping from a reference cell, no shape
+   * functions exist on the reference cell.
+   *
+   * Consequently, as discussed in the corresponding function in the base
+   * class, FiniteElement::shape_grad_grad_component(), this function throws
+   * an exception of type FiniteElement::ExcUnitShapeValuesDoNotExist.
+   */
+  virtual Tensor<2,dim> shape_grad_grad_component (const unsigned int i,
+                                                   const Point<dim> &p,
+                                                   const unsigned int component) const;
+
+  /**
+   * Return the polynomial degree of this finite element, i.e. the value
+   * passed to the constructor.
+   */
+  unsigned int get_degree () const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * For the FE_DGPNonparametric class the result is always true (independent
+   * of the degree of the element), as it has no hanging nodes (being a
+   * discontinuous element).
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since finite element objects are usually
+   * accessed through pointers to their base class, rather than the class
+   * itself.
+   */
+  virtual std::size_t memory_consumption () const;
+
+
+private:
+  /**
+   * Declare a nested class which will hold static definitions of various
+   * matrices such as constraint and embedding matrices. The definition of the
+   * various static fields are in the files <tt>fe_dgp_[123]d.cc</tt> in the
+   * source directory.
+   */
+  struct Matrices
+  {
+    /**
+     * Pointers to the embedding matrices, one for each polynomial degree
+     * starting from constant elements
+     */
+    static const double *const embedding[][GeometryInfo<dim>::max_children_per_cell];
+
+    /**
+     * Number of elements (first index) the above field has. Equals the
+     * highest polynomial degree plus one for which the embedding matrices
+     * have been computed.
+     */
+    static const unsigned int n_embedding_matrices;
+
+    /**
+     * As @p embedding but for projection matrices.
+     */
+    static const double *const projection_matrices[][GeometryInfo<dim>::max_children_per_cell];
+
+    /**
+     * As @p n_embedding_matrices but for projection matrices.
+     */
+    static const unsigned int n_projection_matrices;
+  };
+
+protected:
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+  /**
+   * Prepare internal data structures and fill in values independent of the
+   * cell.
+   */
+  virtual
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_data (const UpdateFlags                                                    update_flags,
+            const Mapping<dim,spacedim>                                         &mapping,
+            const Quadrature<dim>                                               &quadrature,
+            dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                     cell_similarity,
+                  const Quadrature<dim>                                               &quadrature,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                  const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                   face_no,
+                       const Quadrature<dim-1>                                             &quadrature,
+                       const Mapping<dim,spacedim>                                         &mapping,
+                       const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                       const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                   face_no,
+                          const unsigned int                                                   sub_no,
+                          const Quadrature<dim-1>                                             &quadrature,
+                          const Mapping<dim,spacedim>                                         &mapping,
+                          const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                          const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+private:
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static
+  std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Degree of the polynomials.
+   */
+  const unsigned int degree;
+
+  /**
+   * Pointer to an object representing the polynomial space used here.
+   */
+  const PolynomialSpace<dim> polynomial_space;
+
+  /**
+   * Allow access from other dimensions.
+   */
+  template <int, int> friend class FE_DGPNonparametric;
+};
+
+/*@}*/
+
+#ifndef DOXYGEN
+
+// declaration of explicit specializations of member variables, if the
+// compiler allows us to do that (the standard says we must)
+#ifndef DEAL_II_MEMBER_VAR_SPECIALIZATION_BUG
+template <>
+const double *const FE_DGPNonparametric<1,1>::Matrices::embedding[][GeometryInfo<1>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGPNonparametric<1,1>::Matrices::n_embedding_matrices;
+
+template <>
+const double *const FE_DGPNonparametric<1,1>::Matrices::projection_matrices[][GeometryInfo<1>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGPNonparametric<1,1>::Matrices::n_projection_matrices;
+
+template <>
+const double *const FE_DGPNonparametric<2,2>::Matrices::embedding[][GeometryInfo<2>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGPNonparametric<2,2>::Matrices::n_embedding_matrices;
+
+template <>
+const double *const FE_DGPNonparametric<2,2>::Matrices::projection_matrices[][GeometryInfo<2>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGPNonparametric<2,2>::Matrices::n_projection_matrices;
+
+template <>
+const double *const FE_DGPNonparametric<3,3>::Matrices::embedding[][GeometryInfo<3>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGPNonparametric<3,3>::Matrices::n_embedding_matrices;
+
+template <>
+const double *const FE_DGPNonparametric<3,3>::Matrices::projection_matrices[][GeometryInfo<3>::max_children_per_cell];
+
+template <>
+const unsigned int FE_DGPNonparametric<3,3>::Matrices::n_projection_matrices;
+#endif
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_dgq.h b/include/deal.II/fe/fe_dgq.h
new file mode 100644
index 0000000..2c44352
--- /dev/null
+++ b/include/deal.II/fe/fe_dgq.h
@@ -0,0 +1,402 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_dgq_h
+#define dealii__fe_dgq_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/fe/fe_poly.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+template <int dim> class Quadrature;
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of scalar, discontinuous tensor product elements based on
+ * equidistant support points.
+ *
+ * This is a discontinuous finite element based on tensor products of
+ * Lagrangian polynomials. The shape functions are Lagrangian interpolants of
+ * an equidistant grid of points on the unit cell. The points are numbered in
+ * lexicographical order, with <i>x</i> running fastest, then <i>y</i>, then
+ * <i>z</i> (if these coordinates are present for a given space dimension at
+ * all). For example, these are the node orderings for <tt>FE_DGQ(1)</tt> in
+ * 3d:
+ *  @verbatim
+ *         6-------7        6-------7
+ *        /|       |       /       /|
+ *       / |       |      /       / |
+ *      /  |       |     /       /  |
+ *     4   |       |    4-------5   |
+ *     |   2-------3    |       |   3
+ *     |  /       /     |       |  /
+ *     | /       /      |       | /
+ *     |/       /       |       |/
+ *     0-------1        0-------1
+ *  @endverbatim
+ * and <tt>FE_DGQ(2)</tt>:
+ *  @verbatim
+ *         24--25--26       24--25--26
+ *        /|       |       /       /|
+ *      21 |       |     21  22  23 |
+ *      /  15  16  17    /       /  17
+ *    18   |       |   18--19--20   |
+ *     |12 6---7---8    |       |14 8
+ *     9  /       /     9  10  11  /
+ *     | 3   4   5      |       | 5
+ *     |/       /       |       |/
+ *     0---1---2        0---1---2
+ *  @endverbatim
+ * with node 13 being placed in the interior of the hex.
+ *
+ * Note, however, that these are just the Lagrange interpolation points of the
+ * shape functions. Even though they may physically be on the boundary of the
+ * cell, they are logically in the interior since there are no continuity
+ * requirements for these shape functions across cell boundaries. While
+ * discontinuous, when restricted to a single cell the shape functions of this
+ * element are exactly the same as those of the FE_Q element where they are
+ * shown visually.
+ *
+ * @author Ralf Hartmann, Guido Kanschat 2001, 2004
+ */
+template <int dim, int spacedim=dim>
+class FE_DGQ : public FE_Poly<TensorProductPolynomials<dim>, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree <tt>p</tt>. The
+   * shape functions created using this constructor correspond to Lagrange
+   * interpolation polynomials for equidistantly spaced support points in each
+   * coordinate direction.
+   */
+  FE_DGQ (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGQ<dim>(degree)</tt>, with <tt>dim</tt> and
+   * <tt>degree</tt> replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one. The size of the matrix is then @p dofs_per_cell times
+   * <tt>source.dofs_per_cell</tt>.
+   *
+   * These matrices are only available if the source element is also a @p
+   * FE_DGQ element. Otherwise, an exception of type
+   * FiniteElement<dim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_interpolation_matrix (const FiniteElement<dim, spacedim> &source,
+                            FullMatrix<double>           &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim, spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * FiniteElement<dim>::ExcInterpolationNotImplemented.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim, spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * Projection from a fine grid space onto a coarse grid space. Overrides the
+   * respective method in FiniteElement, implementing lazy evaluation
+   * (initialize when requested).
+   *
+   * If this projection operator is associated with a matrix @p P, then the
+   * restriction of this matrix @p P_i to a single child cell is returned
+   * here.
+   *
+   * The matrix @p P is the concatenation or the sum of the cell matrices @p
+   * P_i, depending on the #restriction_is_additive_flags. This distinguishes
+   * interpolation (concatenation) and projection with respect to scalar
+   * products (summation).
+   *
+   * Row and column indices are related to coarse grid and fine grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   */
+  virtual const FullMatrix<double> &
+  get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Embedding matrix between grids. Overrides the respective method in
+   * FiniteElement, implementing lazy evaluation (initialize when queried).
+   *
+   * The identity operator from a coarse grid space into a fine grid space is
+   * associated with a matrix @p P. The restriction of this matrix @p P_i to a
+   * single child cell is returned here.
+   *
+   * The matrix @p P is the concatenation, not the sum of the cell matrices @p
+   * P_i. That is, if the same non-zero entry <tt>j,k</tt> exists in in two
+   * different child matrices @p P_i, the value should be the same in both
+   * matrices and it is copied into the matrix @p P only once.
+   *
+   * Row and column indices are related to fine grid and coarse grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * These matrices are used by routines assembling the prolongation matrix
+   * for multi-level methods.  Upon assembling the transfer matrix between
+   * cells using this matrix array, zero elements in the prolongation matrix
+   * are discarded and will not fill up the transfer matrix.
+   */
+  virtual const FullMatrix<double> &
+  get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim, spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim, spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   *
+   * This being a discontinuous element, the set of such constraints is of
+   * course empty.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim, spacedim> &fe_other) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * For the FE_DGQ class the result is always true (independent of the degree
+   * of the element), as it has no hanging nodes (being a discontinuous
+   * element).
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim, spacedim> &fe_other) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, it
+   * simply returns one row with all entries set to true.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since finite element objects are usually
+   * accessed through pointers to their base class, rather than the class
+   * itself.
+   */
+  virtual std::size_t memory_consumption () const;
+
+
+protected:
+  /**
+   * Constructor for tensor product polynomials based on Polynomials::Lagrange
+   * interpolation of the support points in the quadrature rule
+   * <tt>points</tt>. The degree of these polynomials is
+   * <tt>points.size()-1</tt>.
+   *
+   * Note: The FE_DGQ::clone function does not work properly for FE with
+   * arbitrary nodes!
+   */
+  FE_DGQ (const Quadrature<1> &points);
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim, spacedim> *clone() const;
+
+private:
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Compute renumbering for rotation of degrees of freedom.
+   *
+   * Rotates a tensor product numbering of degrees of freedom by 90 degrees.
+   * It is used to compute the transfer matrices of the children by using only
+   * the matrix for the first child.
+   *
+   * The direction parameter determines the type of rotation. It is one
+   * character of @p xXyYzZ. The character determines the axis of rotation,
+   * case determines the direction. Lower case is counter-clockwise seen in
+   * direction of the axis.
+   *
+   * Since rotation around the y-axis is not used, it is not implemented
+   * either.
+   */
+  void rotate_indices (std::vector<unsigned int> &indices,
+                       const char                 direction) const;
+
+  /*
+   * Mutex for protecting initialization of restriction and embedding matrix.
+   */
+  mutable Threads::Mutex mutex;
+
+  /**
+   * Allow access from other dimensions.
+   */
+  template <int dim1, int spacedim1> friend class FE_DGQ;
+
+  /**
+   * Allows @p MappingQ class to access to build_renumbering function.
+   */
+  template <int dim1, int spacedim1> friend class MappingQ;
+};
+
+
+
+/**
+ * Implementation of scalar, discontinuous tensor product elements based on
+ * Lagrange polynomials with arbitrary nodes. The primary purpose of this
+ * class is to provide an element for which the mass matrix can be made
+ * diagonal by choosing basis functions that are not either zero or one at the
+ * vertices of the cell, but instead are zero or one at a given set of
+ * quadrature points. If this set of quadrature points is then also used in
+ * integrating the mass matrix, then it will be diagonal. The number of
+ * quadrature points automatically determines the polynomial degree chosen for
+ * this element.
+ *
+ * See the base class documentation in FE_DGQ for details.
+ *
+ * @author F. Prill 2006
+ */
+template <int dim,int spacedim=dim>
+class FE_DGQArbitraryNodes : public FE_DGQ<dim,spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials based on Polynomials::Lagrange
+   * interpolation of the support points in the quadrature rule
+   * <tt>points</tt>. The degree of these polynomials is
+   * <tt>points.size()-1</tt>.
+   */
+  FE_DGQArbitraryNodes (const Quadrature<1> &points);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGQArbitraryNodes<dim>(degree)</tt>, with <tt>dim</tt> and
+   * <tt>degree</tt> replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+protected:
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+};
+
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_face.h b/include/deal.II/fe/fe_face.h
new file mode 100644
index 0000000..2e51edd
--- /dev/null
+++ b/include/deal.II/fe/fe_face.h
@@ -0,0 +1,463 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_face_h
+#define dealii__fe_face_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/fe/fe_poly_face.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A finite element, which is a tensor product polynomial on each face and
+ * undefined in the interior of the cells. The basis functions on the faces
+ * are Lagrange polynomials based on the support points of the
+ * (dim-1)-dimensional Gauss--Lobatto quadrature rule. For element degree one
+ * and two, the polynomials hence correspond to the usual Lagrange polynomials
+ * on equidistant points.
+ *
+ * Although the name does not give it away, the element is discontinuous at
+ * locations where faces of cells meet. In particular, this finite element is
+ * the trace space of FE_RaviartThomas on the faces and serves in hybridized
+ * methods, e.g. in combination with the FE_DGQ element. Its use is
+ * demonstrated in the step-51 tutorial program.
+ *
+ * @note Since this element is defined only on faces, only FEFaceValues and
+ * FESubfaceValues will be able to extract reasonable values from any face
+ * polynomial. In order to make the use of FESystem simpler, using a (cell)
+ * FEValues object will not fail using this finite element space, but all
+ * shape function values extracted will be equal to zero.
+ *
+ * @ingroup fe
+ * @author Guido Kanschat, Martin Kronbichler
+ * @date 2009, 2011, 2013
+ */
+template <int dim, int spacedim=dim>
+class FE_FaceQ : public FE_PolyFace<TensorProductPolynomials<dim-1>, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree <tt>p</tt>. The
+   * shape functions created using this constructor correspond to Lagrange
+   * polynomials in each coordinate direction.
+   */
+  FE_FaceQ (const unsigned int p);
+
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_FaceQ<dim>(degree)</tt>, with <tt>dim</tt> and
+   * <tt>degree</tt> replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, it
+   * simply returns one row with all entries set to true.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+private:
+  /**
+   * Return vector with dofs per vertex, line, quad, hex.
+   */
+  static std::vector<unsigned int> get_dpo_vector (const unsigned int deg);
+};
+
+
+
+/**
+ * Specialization of FE_FaceQ for 1D. In that case, the finite element only
+ * consists of one degree of freedom in each of the two faces (= vertices) of
+ * a cell, irrespective of the degree. However, this element still accepts a
+ * degree in its constructor and also returns that degree. This way,
+ * dimension-independent programming with trace elements is also possible in
+ * 1D (even though there is no computational benefit at all from it in 1D).
+ *
+ * @ingroup fe
+ * @author Guido Kanschat, Martin Kronbichler
+ * @date 2014
+ */
+template <int spacedim>
+class FE_FaceQ<1,spacedim> : public FiniteElement<1,spacedim>
+{
+public:
+  /**
+   * Constructor.
+   */
+  FE_FaceQ (const unsigned int p);
+
+  /**
+   * Clone method.
+   */
+  virtual FiniteElement<1,spacedim> *clone() const;
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_FaceQ<dim>(degree)</tt>, with <tt>dim</tt> and
+   * <tt>degree</tt> replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  // for documentation, see the FiniteElement base class
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<1,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<1,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<1,spacedim> &fe_other) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, it
+   * simply returns one row with all entries set to true.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+protected:
+  /*
+   * NOTE: The following functions have their definitions inlined into the class declaration
+   * because we otherwise run into a compiler error with MS Visual Studio.
+   */
+
+
+  virtual
+  typename FiniteElement<1,spacedim>::InternalDataBase *
+  get_data (const UpdateFlags                                                  /*update_flags*/,
+            const Mapping<1,spacedim>                                         &/*mapping*/,
+            const Quadrature<1>                                               &/*quadrature*/,
+            dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &/*output_data*/) const
+  {
+    return new typename FiniteElement<1, spacedim>::InternalDataBase;
+  }
+
+  typename FiniteElement<1,spacedim>::InternalDataBase *
+  get_face_data(const UpdateFlags update_flags,
+                const Mapping<1,spacedim> &/*mapping*/,
+                const Quadrature<0> &quadrature,
+                dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &/*output_data*/) const
+  {
+    // generate a new data object and initialize some fields
+    typename FiniteElement<1,spacedim>::InternalDataBase *data =
+      new typename FiniteElement<1,spacedim>::InternalDataBase;
+    data->update_each = requires_update_flags(update_flags);
+
+    const unsigned int n_q_points = quadrature.size();
+    AssertDimension(n_q_points, 1);
+    (void)n_q_points;
+
+    // No derivatives of this element are implemented.
+    if (data->update_each & update_gradients || data->update_each & update_hessians)
+      {
+        Assert(false, ExcNotImplemented());
+      }
+
+    return data;
+  }
+
+  typename FiniteElement<1,spacedim>::InternalDataBase *
+  get_subface_data(const UpdateFlags                                                  update_flags,
+                   const Mapping<1,spacedim>                                         &mapping,
+                   const Quadrature<0>                                               &quadrature,
+                   dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &output_data) const
+  {
+    return get_face_data(update_flags, mapping, quadrature, output_data);
+  }
+
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<1,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                   cell_similarity,
+                  const Quadrature<1>                                               &quadrature,
+                  const Mapping<1,spacedim>                                         &mapping,
+                  const typename Mapping<1,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<1, spacedim> &mapping_data,
+                  const typename FiniteElement<1,spacedim>::InternalDataBase        &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<1,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                 face_no,
+                       const Quadrature<0>                                               &quadrature,
+                       const Mapping<1,spacedim>                                         &mapping,
+                       const typename Mapping<1,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<1, spacedim> &mapping_data,
+                       const typename FiniteElement<1,spacedim>::InternalDataBase        &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<1,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                 face_no,
+                          const unsigned int                                                 sub_no,
+                          const Quadrature<0>                                               &quadrature,
+                          const Mapping<1,spacedim>                                         &mapping,
+                          const typename Mapping<1,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<1, spacedim> &mapping_data,
+                          const typename FiniteElement<1,spacedim>::InternalDataBase        &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &output_data) const;
+
+private:
+  /**
+   * Return vector with dofs per vertex, line, quad, hex.
+   */
+  static
+  std::vector<unsigned int>
+  get_dpo_vector (const unsigned int deg);
+};
+
+
+
+/**
+ * A finite element, which is a Legendre element of complete polynomials on
+ * each face (i.e., it is the face equivalent of what FE_DGP is on cells) and
+ * undefined in the interior of the cells. The basis functions on the faces
+ * are from Polynomials::Legendre.
+ *
+ * Although the name does not give it away, the element is discontinuous at
+ * locations where faces of cells meet. The element serves in hybridized
+ * methods, e.g. in combination with the FE_DGP element. An example of
+ * hybridizes methods can be found in the step-51 tutorial program.
+ *
+ * @note Since this element is defined only on faces, only FEFaceValues and
+ * FESubfaceValues will be able to extract reasonable values from any face
+ * polynomial. In order to make the use of FESystem simpler, using a (cell)
+ * FEValues object will not fail using this finite element space, but all
+ * shape function values extracted will be equal to zero.
+ *
+ * @ingroup fe
+ * @author Martin Kronbichler
+ * @date 2013
+ */
+template <int dim, int spacedim=dim>
+class FE_FaceP : public FE_PolyFace<PolynomialSpace<dim-1>, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for complete basis of polynomials of degree <tt>p</tt>. The
+   * shape functions created using this constructor correspond to Legendre
+   * polynomials in each coordinate direction.
+   */
+  FE_FaceP(unsigned int p);
+
+  /**
+   * Clone method.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_FaceP<dim>(degree)</tt> , with <tt>dim</tt> and
+   * <tt>degree</tt> replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, the
+   * first entry on each face is true, all other are false (as the constant
+   * function is represented by the first base function of Legendre
+   * polynomials).
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+private:
+  /**
+   * Return vector with dofs per vertex, line, quad, hex.
+   */
+  static std::vector<unsigned int> get_dpo_vector (const unsigned int deg);
+};
+
+
+
+/**
+ * FE_FaceP in 1D, i.e., with degrees of freedom on the element vertices.
+ */
+template <int spacedim>
+class FE_FaceP<1,spacedim> : public FE_FaceQ<1,spacedim>
+{
+public:
+  /**
+   * Constructor.
+   */
+  FE_FaceP (const unsigned int p);
+
+  /**
+   * Returns the name of the element
+   */
+  std::string get_name() const;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_nedelec.h b/include/deal.II/fe/fe_nedelec.h
new file mode 100644
index 0000000..4bee4f1
--- /dev/null
+++ b/include/deal.II/fe/fe_nedelec.h
@@ -0,0 +1,340 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_nedelec_h
+#define dealii__fe_nedelec_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/polynomials_nedelec.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_poly_tensor.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * @warning Several aspects of the implementation are experimental. For the
+ * moment, it is safe to use the element on globally refined meshes with
+ * consistent orientation of faces. See the todo entries below for more
+ * detailed caveats.
+ *
+ * Implementation of Nédélec elements, conforming with the space
+ * H<sup>curl</sup>. These elements generate vector fields with tangential
+ * components continuous between mesh cells.
+ *
+ * We follow the convention that the degree of Nédélec elements
+ * denotes the polynomial degree of the largest complete polynomial subspace
+ * contained in the Nédélec space. This leads to the
+ * consistently numbered sequence of spaces
+ * @f[
+ *   Q_{k+1}
+ *   \stackrel{\text{grad}}{\rightarrow}
+ *   \text{Nedelec}_k
+ *   \stackrel{\text{curl}}{\rightarrow}
+ *   \text{RaviartThomas}_k
+ *   \stackrel{\text{div}}{\rightarrow}
+ *   DGQ_{k}
+ * @f]
+ * Consequently, approximation order of the Nédélec space equals the value
+ * <i>degree</i> given to the constructor. In this scheme, the lowest order
+ * element would be created by the call FE_Nedelec<dim>(0). Note that this
+ * follows the convention of Brezzi and Raviart, though not the one used in
+ * the original paper by Nédélec.
+ *
+ * This class is not implemented for the codimension one case (<tt>spacedim !=
+ * dim</tt>).
+ *
+ * @todo Even if this element is implemented for two and three space
+ * dimensions, the definition of the node values relies on consistently
+ * oriented faces in 3D. Therefore, care should be taken on complicated
+ * meshes.
+ *
+ *
+ * <h3>Interpolation</h3>
+ *
+ * The
+ * @ref GlossInterpolation "interpolation"
+ * operators associated with the Nédélec element are constructed
+ * such that interpolation and computing the curl are commuting operations on
+ * rectangular mesh cells. We require this from interpolating arbitrary
+ * functions as well as the #restriction matrices.
+ *
+ * <h4>Node values</h4>
+ *
+ * The
+ * @ref GlossNodes "node values"
+ * for an element of degree <i>k</i> on the reference cell are:
+ * <ol>
+ * <li> On edges: the moments of the tangential component with respect to
+ * polynomials of degree <i>k</i>.
+ * <li> On faces: the moments of the tangential components with respect to
+ * <tt>dim</tt>-1 dimensional FE_Nedelec polynomials of degree <i>k</i>-1.
+ * <li> In cells: the moments with respect to gradients of polynomials in FE_Q
+ * of degree <i>k</i>.
+ * </ol>
+ *
+ * <h4>Generalized support points</h4>
+ *
+ * The node values above rely on integrals, which will be computed by
+ * quadrature rules themselves. The generalized support points are a set of
+ * points such that this quadrature can be performed with sufficient accuracy.
+ * The points needed are those of QGauss<sub>k+1</sub> on each edge and
+ * QGauss<sub>k+2</sub> on each face and in the interior of the cell (or none
+ * for N<sub>1</sub>).
+ *
+ * @author Markus Bürg
+ * @date 2009, 2010, 2011
+ */
+template <int dim>
+class FE_Nedelec : public FE_PolyTensor<PolynomialsNedelec<dim>, dim>
+{
+public:
+  /**
+   * Constructor for the Nédélec element of degree @p p.
+   */
+  FE_Nedelec (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_Nedelec<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * For the <tt>FE_Nedelec</tt> class the result is always true (independent
+   * of the degree of the element), as it implements the complete set of
+   * functions necessary for hp capability.
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return whether this element dominates the one, which is given as
+   * argument.
+   */
+  virtual FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   */
+  virtual std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   */
+  virtual std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   */
+  virtual std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Return the matrix interpolating from a face of one element to the face of
+   * the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * <tt>FiniteElement<dim>::ExcInterpolationNotImplemented</tt>.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim> &source,
+                                 FullMatrix<double> &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of one element to the subface
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * <tt>ExcInterpolationNotImplemented</tt>.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim> &source,
+                                    const unsigned int subface,
+                                    FullMatrix<double> &matrix) const;
+  /**
+   * Projection from a fine grid space onto a coarse grid space. If this
+   * projection operator is associated with a matrix @p P, then the
+   * restriction of this matrix @p P_i to a single child cell is returned
+   * here.
+   *
+   * The matrix @p P is the concatenation or the sum of the cell matrices @p
+   * P_i, depending on the #restriction_is_additive_flags. This distinguishes
+   * interpolation (concatenation) and projection with respect to scalar
+   * products (summation).
+   *
+   * Row and column indices are related to coarse grid and fine grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   */
+  virtual const FullMatrix<double> &
+  get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Embedding matrix between grids.
+   *
+   * The identity operator from a coarse grid space into a fine grid space is
+   * associated with a matrix @p P. The restriction of this matrix @p P_i to a
+   * single child cell is returned here.
+   *
+   * The matrix @p P is the concatenation, not the sum of the cell matrices @p
+   * P_i. That is, if the same non-zero entry <tt>j,k</tt> exists in in two
+   * different child matrices @p P_i, the value should be the same in both
+   * matrices and it is copied into the matrix @p P only once.
+   *
+   * Row and column indices are related to fine grid and coarse grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * These matrices are used by routines assembling the prolongation matrix
+   * for multi-level methods.  Upon assembling the transfer matrix between
+   * cells using this matrix array, zero elements in the prolongation matrix
+   * are discarded and will not fill up the transfer matrix.
+   */
+  virtual const FullMatrix<double> &
+  get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  virtual void interpolate (std::vector<double> &local_dofs,
+                            const std::vector<double> &values) const;
+
+  virtual void interpolate (std::vector<double> &local_dofs,
+                            const std::vector<Vector<double> > &values,
+                            unsigned int offset = 0) const;
+  virtual void interpolate (std::vector<double> &local_dofs,
+                            const VectorSlice<const std::vector<std::vector<double> > > &values)
+  const;
+
+  /**
+   * Returns a list of constant modes of the element.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  virtual std::size_t memory_consumption () const;
+  virtual FiniteElement<dim> *clone() const;
+
+private:
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   *
+   * If the optional argument <tt>dg</tt> is true, the vector returned will
+   * have all degrees of freedom assigned to the cell, none on the faces and
+   * edges.
+   */
+  static std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree, bool dg=false);
+
+  /**
+   * Initialize the @p generalized_support_points field of the FiniteElement
+   * class and fill the tables with interpolation weights (#boundary_weights
+   * and interior_weights). Called from the constructor.
+   */
+  void initialize_support_points (const unsigned int degree);
+
+  /**
+   * Initialize the interpolation from functions on refined mesh cells onto
+   * the father cell. According to the philosophy of the Nédélec element,
+   * this restriction operator preserves the curl of a function weakly.
+   */
+  void initialize_restriction ();
+
+  /**
+   * These are the factors multiplied to a function in the
+   * #generalized_face_support_points when computing the integration.
+   *
+   * See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+   * for more information.
+   */
+  Table<2, double> boundary_weights;
+
+  /*
+   * Mutex for protecting initialization of restriction and embedding matrix.
+   */
+  mutable Threads::Mutex mutex;
+
+  /**
+   * Allow access from other dimensions.
+   */
+  template <int dim1> friend class FE_Nedelec;
+};
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <>
+void
+FE_Nedelec<1>::initialize_restriction();
+
+#endif // DOXYGEN
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_nothing.h b/include/deal.II/fe/fe_nothing.h
new file mode 100644
index 0000000..bdf1281
--- /dev/null
+++ b/include/deal.II/fe/fe_nothing.h
@@ -0,0 +1,268 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_nothing_h
+#define dealii__fe_nothing_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/fe/fe.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Definition of a finite element with zero degrees of freedom.  This class is
+ * useful (in the context of an hp method) to represent empty cells in the
+ * triangulation on which no degrees of freedom should be allocated, or to
+ * describe a field that is extended by zero to a part of the domain where we
+ * don't need it.  Thus a triangulation may be divided into two regions: an
+ * active region where normal elements are used, and an inactive region where
+ * FE_Nothing elements are used.  The hp::DoFHandler will therefore assign no
+ * degrees of freedom to the FE_Nothing cells, and this subregion is therefore
+ * implicitly deleted from the computation. step-46 shows a use case for this
+ * element. An interesting application for this element is also presented in
+ * the paper A. Cangiani, J. Chapman, E. Georgoulis, M. Jensen:
+ * <b>Implementation of the Continuous-Discontinuous Galerkin Finite Element
+ * Method</b>, arXiv:1201.2878v1 [math.NA], 2012 (see
+ * http://arxiv.org/abs/1201.2878).
+ *
+ * Note that some care must be taken that the resulting mesh topology
+ * continues to make sense when FE_Nothing elements are introduced. This is
+ * particularly true when dealing with hanging node constraints, because the
+ * library makes some basic assumptions about the nature of those constraints.
+ * The following geometries are acceptable:
+ * @code
+ * +---------+----+----+
+ * |         | 0  |    |
+ * |    1    +----+----+
+ * |         | 0  |    |
+ * +---------+----+----+
+ * @endcode
+ * @code
+ * +---------+----+----+
+ * |         | 1  |    |
+ * |    0    +----+----+
+ * |         | 1  |    |
+ * +---------+----+----+
+ * @endcode
+ * Here, 0 denotes an FE_Nothing cell, and 1 denotes some other element type.
+ * The library has no difficulty computing the necessary hanging node
+ * constraints in these cases (i.e. no constraint). However, the following
+ * geometry is NOT acceptable (at least in the current implementation):
+ * @code
+ * +---------+----+----+
+ * |         | 0  |    |
+ * |    1    +----+----+
+ * |         | 1  |    |
+ * +---------+----+----+
+ * @endcode
+ * The distinction lies in the mixed nature of the child faces, a case we have
+ * not implemented as of yet.
+ *
+ * @author Joshua White, Wolfgang Bangerth
+ */
+template <int dim, int spacedim=dim>
+class FE_Nothing : public FiniteElement<dim,spacedim>
+{
+public:
+
+  /**
+   * Constructor. First argument denotes the number of components to give this
+   * finite element (default = 1).
+   *
+   * Second argument decides whether FE_Nothing will dominate any other FE in
+   * compare_for_face_domination() (default = false). Therefore at interfaces
+   * where, for example, a Q1 meets an FE_Nothing, we will force the traces of
+   * the two functions to be the same. Because the FE_Nothing encodes a space
+   * that is zero everywhere, this means that the Q1 field will be forced to
+   * become zero at this interface.
+   */
+  FE_Nothing (const unsigned int n_components = 1,
+              const bool dominate = false);
+
+  /**
+   * A sort of virtual copy constructor. Some places in the library, for
+   * example the constructors of FESystem as well as the hp::FECollection
+   * class, need to make copied of finite elements without knowing their exact
+   * type. They do so through this function.
+   */
+  virtual
+  FiniteElement<dim,spacedim> *
+  clone() const;
+
+  /**
+   * Return a string that uniquely identifies a finite element. In this case
+   * it is <code>FE_Nothing@<dim@></code>.
+   */
+  virtual
+  std::string
+  get_name() const;
+
+  // for documentation, see the FiniteElement base class
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  /**
+   * Return the value of the @p ith shape function at the point @p p. @p p is
+   * a point on the reference element. Because the current element has no
+   * degrees of freedom, this function should obviously not be called in
+   * practice.  All this function really does, therefore, is trigger an
+   * exception.
+   */
+  virtual
+  double
+  shape_value (const unsigned int i, const Point<dim> &p) const;
+
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                     cell_similarity,
+                  const Quadrature<dim>                                               &quadrature,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                  const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                   face_no,
+                       const Quadrature<dim-1>                                             &quadrature,
+                       const Mapping<dim,spacedim>                                         &mapping,
+                       const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                       const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                   face_no,
+                          const unsigned int                                                   sub_no,
+                          const Quadrature<dim-1>                                             &quadrature,
+                          const Mapping<dim,spacedim>                                         &mapping,
+                          const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                          const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Prepare internal data structures and fill in values independent of the
+   * cell. Returns a pointer to an object of which the caller of this function
+   * then has to assume ownership (which includes destruction when it is no
+   * more needed).
+   *
+   * In the current case, this function just returns a default pointer, since
+   * no meaningful data exists for this element.
+   */
+  virtual
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_data (const UpdateFlags                                                    update_flags,
+            const Mapping<dim,spacedim>                                         &mapping,
+            const Quadrature<dim>                                               &quadrature,
+            dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   *
+   * In the current case, this element is assumed to dominate if the second
+   * argument in the constructor @p dominate is true. When this argument is
+   * false and @p fe_other is also of type FE_Nothing(), either element can
+   * dominate. Otherwise there are no_requirements.
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+
+
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  virtual
+  bool
+  hp_constraints_are_implemented () const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.#dofs_per_face</tt> times <tt>this->#dofs_per_face</tt>.
+   *
+   * Since the current finite element has no degrees of freedom, the
+   * interpolation matrix is necessarily empty.
+   */
+
+  virtual
+  void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source_fe,
+                                 FullMatrix<double>       &interpolation_matrix) const;
+
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the
+   * subface of the neighboring element. The size of the matrix is then
+   * <tt>source.#dofs_per_face</tt> times <tt>this->#dofs_per_face</tt>.
+   *
+   * Since the current finite element has no degrees of freedom, the
+   * interpolation matrix is necessarily empty.
+   */
+
+  virtual
+  void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source_fe,
+                                    const unsigned int index,
+                                    FullMatrix<double>  &interpolation_matrix) const;
+
+  /**
+   * @return true if the FE dominates any other.
+   */
+  bool is_dominating() const;
+
+private:
+
+  /**
+   * If true, this element will dominate any other apart from itself in
+   * compare_for_face_domination();
+   */
+  const bool dominate;
+};
+
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
diff --git a/include/deal.II/fe/fe_poly.h b/include/deal.II/fe/fe_poly.h
new file mode 100644
index 0000000..a20f52a
--- /dev/null
+++ b/include/deal.II/fe/fe_poly.h
@@ -0,0 +1,451 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_poly_h
+#define dealii__fe_poly_h
+
+
+#include <deal.II/fe/fe.h>
+#include <deal.II/base/quadrature.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup febase */
+/*@{*/
+
+/**
+ * This class gives a unified framework for the implementation of
+ * FiniteElement classes based on polynomial spaces like the
+ * TensorProductPolynomials or PolynomialSpace classes.
+ *
+ * Every class conforming to the following interface can be used as template
+ * parameter PolynomialType.
+ *
+ * @code
+ * static const unsigned int dimension;
+ *
+ *  void compute (const Point<dim>            &unit_point,
+ *                std::vector<double>         &values,
+ *                std::vector<Tensor<1,dim> > &grads,
+ *                std::vector<Tensor<2,dim> > &grad_grads,
+ *                std::vector<Tensor<3,dim> > &third_derivatives,
+ *                std::vector<Tensor<4,dim> > &fourth_derivatives) const;
+ *
+ * double compute_value (const unsigned int i,
+ *                       const Point<dim> &p) const;
+ *
+ *  template <int order>
+ *  Tensor<order,dim> compute_derivative (const unsigned int i,
+ *                                        const Point<dim> &p) const;
+ * @endcode
+ * Example classes are TensorProductPolynomials, PolynomialSpace or
+ * PolynomialsP.
+ *
+ * This class is not a fully implemented FiniteElement class. Instead there
+ * are several pure virtual functions declared in the FiniteElement and
+ * FiniteElement classes which cannot be implemented by this class but are
+ * left for implementation in derived classes.
+ *
+ * @todo Since nearly all functions for spacedim != dim are specialized, this
+ * class needs cleaning up.
+ *
+ * @author Ralf Hartmann 2004, Guido Kanschat, 2009
+ */
+
+template <class PolynomialType, int dim=PolynomialType::dimension, int spacedim=dim>
+class FE_Poly : public FiniteElement<dim,spacedim>
+{
+public:
+  /**
+   * Constructor.
+   */
+  FE_Poly (const PolynomialType &poly_space,
+           const FiniteElementData<dim> &fe_data,
+           const std::vector<bool> &restriction_is_additive_flags,
+           const std::vector<ComponentMask> &nonzero_components);
+
+  /**
+   * Return the polynomial degree of this finite element, i.e. the value
+   * passed to the constructor.
+   */
+  unsigned int get_degree () const;
+
+  // for documentation, see the FiniteElement base class
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  /**
+   * Return the numbering of the underlying polynomial space compared to
+   * lexicographic ordering of the basis functions. Returns
+   * PolynomialType::get_numbering().
+   */
+  std::vector<unsigned int> get_poly_space_numbering() const;
+
+  /**
+   * Return the inverse numbering of the underlying polynomial space. Returns
+   * PolynomialType::get_numbering_inverse().
+   */
+  std::vector<unsigned int> get_poly_space_numbering_inverse() const;
+
+  /**
+   * Return the value of the <tt>i</tt>th shape function at the point
+   * <tt>p</tt>. See the FiniteElement base class for more information about
+   * the semantics of this function.
+   */
+  virtual double shape_value (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Return the value of the <tt>component</tt>th vector component of the
+   * <tt>i</tt>th shape function at the point <tt>p</tt>. See the
+   * FiniteElement base class for more information about the semantics of this
+   * function.
+   *
+   * Since this element is scalar, the returned value is the same as if the
+   * function without the <tt>_component</tt> suffix were called, provided
+   * that the specified component is zero.
+   */
+  virtual double shape_value_component (const unsigned int i,
+                                        const Point<dim> &p,
+                                        const unsigned int component) const;
+
+  /**
+   * Return the gradient of the <tt>i</tt>th shape function at the point
+   * <tt>p</tt>. See the FiniteElement base class for more information about
+   * the semantics of this function.
+   */
+  virtual Tensor<1,dim> shape_grad (const unsigned int  i,
+                                    const Point<dim>   &p) const;
+
+  /**
+   * Return the gradient of the <tt>component</tt>th vector component of the
+   * <tt>i</tt>th shape function at the point <tt>p</tt>. See the
+   * FiniteElement base class for more information about the semantics of this
+   * function.
+   *
+   * Since this element is scalar, the returned value is the same as if the
+   * function without the <tt>_component</tt> suffix were called, provided
+   * that the specified component is zero.
+   */
+  virtual Tensor<1,dim> shape_grad_component (const unsigned int i,
+                                              const Point<dim> &p,
+                                              const unsigned int component) const;
+
+  /**
+   * Return the tensor of second derivatives of the <tt>i</tt>th shape
+   * function at point <tt>p</tt> on the unit cell. See the FiniteElement base
+   * class for more information about the semantics of this function.
+   */
+  virtual Tensor<2,dim> shape_grad_grad (const unsigned int  i,
+                                         const Point<dim> &p) const;
+
+  /**
+   * Return the second derivative of the <tt>component</tt>th vector component
+   * of the <tt>i</tt>th shape function at the point <tt>p</tt>. See the
+   * FiniteElement base class for more information about the semantics of this
+   * function.
+   *
+   * Since this element is scalar, the returned value is the same as if the
+   * function without the <tt>_component</tt> suffix were called, provided
+   * that the specified component is zero.
+   */
+  virtual Tensor<2,dim> shape_grad_grad_component (const unsigned int i,
+                                                   const Point<dim> &p,
+                                                   const unsigned int component) const;
+
+  /**
+   * Return the tensor of third derivatives of the <tt>i</tt>th shape function
+   * at point <tt>p</tt> on the unit cell. See the FiniteElement base class
+   * for more information about the semantics of this function.
+   */
+  virtual Tensor<3,dim> shape_3rd_derivative (const unsigned int  i,
+                                              const Point<dim>   &p) const;
+
+  /**
+   * Return the third derivative of the <tt>component</tt>th vector component
+   * of the <tt>i</tt>th shape function at the point <tt>p</tt>. See the
+   * FiniteElement base class for more information about the semantics of this
+   * function.
+   *
+   * Since this element is scalar, the returned value is the same as if the
+   * function without the <tt>_component</tt> suffix were called, provided
+   * that the specified component is zero.
+   */
+  virtual Tensor<3,dim> shape_3rd_derivative_component (const unsigned int i,
+                                                        const Point<dim>   &p,
+                                                        const unsigned int component) const;
+
+  /**
+   * Return the tensor of fourth derivatives of the <tt>i</tt>th shape
+   * function at point <tt>p</tt> on the unit cell. See the FiniteElement base
+   * class for more information about the semantics of this function.
+   */
+  virtual Tensor<4,dim> shape_4th_derivative (const unsigned int  i,
+                                              const Point<dim>   &p) const;
+
+  /**
+   * Return the fourth derivative of the <tt>component</tt>th vector component
+   * of the <tt>i</tt>th shape function at the point <tt>p</tt>. See the
+   * FiniteElement base class for more information about the semantics of this
+   * function.
+   *
+   * Since this element is scalar, the returned value is the same as if the
+   * function without the <tt>_component</tt> suffix were called, provided
+   * that the specified component is zero.
+   */
+  virtual Tensor<4,dim> shape_4th_derivative_component (const unsigned int i,
+                                                        const Point<dim>   &p,
+                                                        const unsigned int component) const;
+
+protected:
+  /*
+   * NOTE: The following function has its definition inlined into the class declaration
+   * because we otherwise run into a compiler error with MS Visual Studio.
+   */
+
+
+  virtual
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_data(const UpdateFlags                                                    update_flags,
+           const Mapping<dim,spacedim>                                         &/*mapping*/,
+           const Quadrature<dim>                                               &quadrature,
+           dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+  {
+    // generate a new data object and
+    // initialize some fields
+    InternalData *data = new InternalData;
+    data->update_each = requires_update_flags(update_flags);
+
+    const unsigned int n_q_points = quadrature.size();
+
+    // initialize some scratch arrays. we need them for the underlying
+    // polynomial to put the values and derivatives of shape functions
+    // to put there, depending on what the user requested
+    std::vector<double> values(update_flags & update_values ?
+                               this->dofs_per_cell : 0);
+    std::vector<Tensor<1,dim> > grads(update_flags & update_gradients ?
+                                      this->dofs_per_cell : 0);
+    std::vector<Tensor<2,dim> > grad_grads(update_flags & update_hessians ?
+                                           this->dofs_per_cell : 0);
+    std::vector<Tensor<3,dim> > third_derivatives(update_flags & update_3rd_derivatives ?
+                                                  this->dofs_per_cell : 0);
+    std::vector<Tensor<4,dim> > fourth_derivatives;   // won't be needed, so leave empty
+
+    // now also initialize fields the fields of this class's own
+    // temporary storage, depending on what we need for the given
+    // update flags.
+    //
+    // there is one exception from the rule: if we are dealing with
+    // cells (i.e., if this function is not called via
+    // get_(sub)face_data()), then we can already store things in the
+    // final location where FEValues::reinit() later wants to see
+    // things. we then don't need the intermediate space. we determine
+    // whether we are on a cell by asking whether the number of
+    // elements in the output array equals the number of quadrature
+    // points (yes, it's a cell) or not (because in that case the
+    // number of quadrature points we use here equals the number of
+    // quadrature points summed over *all* faces or subfaces, whereas
+    // the number of output slots equals the number of quadrature
+    // points on only *one* face)
+    if ((update_flags & update_values)
+        &&
+        !((output_data.shape_values.n_rows() > 0)
+          &&
+          (output_data.shape_values.n_cols() == n_q_points)))
+      data->shape_values.reinit (this->dofs_per_cell, n_q_points);
+
+    if (update_flags & update_gradients)
+      data->shape_gradients.reinit (this->dofs_per_cell, n_q_points);
+
+    if (update_flags & update_hessians)
+      data->shape_hessians.reinit (this->dofs_per_cell, n_q_points);
+
+    if (update_flags & update_3rd_derivatives)
+      data->shape_3rd_derivatives.reinit (this->dofs_per_cell, n_q_points);
+
+    // next already fill those fields of which we have information by
+    // now. note that the shape gradients are only those on the unit
+    // cell, and need to be transformed when visiting an actual cell
+    if (update_flags & (update_values | update_gradients
+                        | update_hessians | update_3rd_derivatives) )
+      for (unsigned int i=0; i<n_q_points; ++i)
+        {
+          poly_space.compute(quadrature.point(i),
+                             values, grads, grad_grads,
+                             third_derivatives,
+                             fourth_derivatives);
+
+          // the values of shape functions at quadrature points don't change.
+          // consequently, write these values right into the output array if
+          // we can, i.e., if the output array has the correct size. this is
+          // the case on cells. on faces, we already precompute data on *all*
+          // faces and subfaces, but we later on copy only a portion of it
+          // into the output object; in that case, copy the data from all
+          // faces into the scratch object
+          if (update_flags & update_values)
+            if (output_data.shape_values.n_rows() > 0)
+              {
+                if (output_data.shape_values.n_cols() == n_q_points)
+                  for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+                    output_data.shape_values[k][i] = values[k];
+                else
+                  for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+                    data->shape_values[k][i] = values[k];
+              }
+
+          // for everything else, derivatives need to be transformed,
+          // so we write them into our scratch space and only later
+          // copy stuff into where FEValues wants it
+          if (update_flags & update_gradients)
+            for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+              data->shape_gradients[k][i] = grads[k];
+
+          if (update_flags & update_hessians)
+            for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+              data->shape_hessians[k][i] = grad_grads[k];
+
+          if (update_flags & update_3rd_derivatives)
+            for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+              data->shape_3rd_derivatives[k][i] = third_derivatives[k];
+        }
+    return data;
+  }
+
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                     cell_similarity,
+                  const Quadrature<dim>                                               &quadrature,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                  const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                   face_no,
+                       const Quadrature<dim-1>                                             &quadrature,
+                       const Mapping<dim,spacedim>                                         &mapping,
+                       const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                       const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                   face_no,
+                          const unsigned int                                                   sub_no,
+                          const Quadrature<dim-1>                                             &quadrature,
+                          const Mapping<dim,spacedim>                                         &mapping,
+                          const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                          const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Fields of cell-independent data.
+   *
+   * For information about the general purpose of this class, see the
+   * documentation of the base class.
+   */
+  class InternalData : public FiniteElement<dim,spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Array with shape function values in quadrature points. There is one row
+     * for each shape function, containing values for each quadrature point.
+     *
+     * In this array, we store the values of the shape function in the
+     * quadrature points on the unit cell. Since these values do not change
+     * under transformation to the real cell, we only need to copy them over
+     * when visiting a concrete cell.
+     */
+    Table<2,double> shape_values;
+
+    /**
+     * Array with shape function gradients in quadrature points. There is one
+     * row for each shape function, containing values for each quadrature
+     * point.
+     *
+     * We store the gradients in the quadrature points on the unit cell. We
+     * then only have to apply the transformation (which is a matrix-vector
+     * multiplication) when visiting an actual cell.
+     */
+    Table<2,Tensor<1,dim> > shape_gradients;
+
+    /**
+     * Array with shape function hessians in quadrature points. There is one
+     * row for each shape function, containing values for each quadrature
+     * point.
+     *
+     * We store the hessians in the quadrature points on the unit cell. We
+     * then only have to apply the transformation when visiting an actual
+     * cell.
+     */
+    Table<2,Tensor<2,dim> > shape_hessians;
+
+    /**
+     * Array with shape function third derivatives in quadrature points. There
+     * is one row for each shape function, containing values for each
+     * quadrature point.
+     *
+     * We store the third derivatives in the quadrature points on the unit
+     * cell. We then only have to apply the transformation when visiting an
+     * actual cell.
+     */
+    Table<2,Tensor<3,dim> > shape_3rd_derivatives;
+  };
+
+  /**
+   * Correct the shape third derivatives by subtracting the terms
+   * corresponding to the Jacobian pushed forward gradient and second
+   * derivative.
+   *
+   * Before the correction, the third derivatives would be given by
+   * @f[
+   * D_{ijkl} = \frac{d^3\phi_i}{d \hat x_J d \hat x_K d \hat x_L} (J_{jJ})^{-1} (J_{kK})^{-1} (J_{lL})^{-1},
+   * @f]
+   * where $J_{iI}=\frac{d x_i}{d \hat x_I}$. After the correction, the
+   * correct third derivative would be given by
+   * @f[
+   * \frac{d^3\phi_i}{d x_j d x_k d x_l} = D_{ijkl} - H_{mjl} \frac{d^2 \phi_i}{d x_k d x_m}
+   * - H_{mkl} \frac{d^2 \phi_i}{d x_j d x_m} - H_{mjk} \frac{d^2 \phi_i}{d x_l d x_m}
+   * - K_{mjkl} \frac{d \phi_i}{d x_m},
+   * @f]
+   * where $H_{ijk}$ is the Jacobian pushed-forward derivative and $K_{ijkl}$
+   * is the Jacobian pushed-forward second derivative.
+   */
+  void
+  correct_third_derivatives (internal::FEValues::FiniteElementRelatedData<dim,spacedim>       &output_data,
+                             const internal::FEValues::MappingRelatedData<dim,spacedim>       &mapping_data,
+                             const unsigned int                                                n_q_points,
+                             const unsigned int                                                dof) const;
+
+  /**
+   * The polynomial space. Its type is given by the template parameter
+   * PolynomialType.
+   */
+  PolynomialType poly_space;
+};
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_poly.templates.h b/include/deal.II/fe/fe_poly.templates.h
new file mode 100644
index 0000000..3caeaa7
--- /dev/null
+++ b/include/deal.II/fe/fe_poly.templates.h
@@ -0,0 +1,536 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/tensor_product_polynomials_const.h>
+#include <deal.II/base/tensor_product_polynomials_bubbles.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_poly.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <class PolynomialType, int dim, int spacedim>
+FE_Poly<PolynomialType,dim,spacedim>::FE_Poly
+(const PolynomialType             &poly_space,
+ const FiniteElementData<dim>     &fe_data,
+ const std::vector<bool>          &restriction_is_additive_flags,
+ const std::vector<ComponentMask> &nonzero_components):
+  FiniteElement<dim,spacedim> (fe_data,
+                               restriction_is_additive_flags,
+                               nonzero_components),
+  poly_space(poly_space)
+{
+  AssertDimension(dim, PolynomialType::dimension);
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+unsigned int
+FE_Poly<PolynomialType,dim,spacedim>::get_degree () const
+{
+  return this->degree;
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+double
+FE_Poly<PolynomialType,dim,spacedim>::shape_value (const unsigned int i,
+                                                   const Point<dim>  &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  return poly_space.compute_value(i, p);
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+double
+FE_Poly<PolynomialType,dim,spacedim>::shape_value_component
+(const unsigned int  i,
+ const Point<dim>   &p,
+ const unsigned int  component) const
+{
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  return poly_space.compute_value(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<1,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_grad (const unsigned int i,
+                                                  const Point<dim>  &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  return poly_space.template compute_derivative<1>(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<1,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_grad_component (const unsigned int  i,
+                                                            const Point<dim>   &p,
+                                                            const unsigned int  component) const
+{
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  return poly_space.template compute_derivative<1>(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<2,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_grad_grad (const unsigned int i,
+                                                       const Point<dim>  &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  return poly_space.template compute_derivative<2>(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<2,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_grad_grad_component
+(const unsigned int  i,
+ const Point<dim>   &p,
+ const unsigned int  component) const
+{
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  return poly_space.template compute_derivative<2>(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<3,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_3rd_derivative (const unsigned int i,
+                                                            const Point<dim>  &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  return poly_space.template compute_derivative<3>(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<3,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_3rd_derivative_component
+(const unsigned int  i,
+ const Point<dim>   &p,
+ const unsigned int  component) const
+{
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  return poly_space.template compute_derivative<3>(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<4,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_4th_derivative (const unsigned int i,
+                                                            const Point<dim>  &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  return poly_space.template compute_derivative<4>(i, p);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<4,dim>
+FE_Poly<PolynomialType,dim,spacedim>::shape_4th_derivative_component
+(const unsigned int  i,
+ const Point<dim>   &p,
+ const unsigned int  component) const
+{
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  return poly_space.template compute_derivative<4>(i, p);
+}
+
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+template <class PolynomialType, int dim, int spacedim>
+UpdateFlags
+FE_Poly<PolynomialType,dim,spacedim>::requires_update_flags (const UpdateFlags flags) const
+{
+  UpdateFlags out = update_default;
+
+  if (flags & update_values)
+    out |= update_values;
+  if (flags & update_gradients)
+    out |= update_gradients | update_covariant_transformation;
+  if (flags & update_hessians)
+    out |= update_hessians | update_covariant_transformation
+           | update_gradients | update_jacobian_pushed_forward_grads;
+  if (flags & update_3rd_derivatives)
+    out |= update_3rd_derivatives | update_covariant_transformation
+           | update_hessians | update_gradients
+           | update_jacobian_pushed_forward_grads
+           | update_jacobian_pushed_forward_2nd_derivatives;
+  if (flags & update_cell_normal_vectors)
+    out |= update_cell_normal_vectors | update_JxW_values;
+
+  return out;
+}
+
+
+
+//---------------------------------------------------------------------------
+// Fill data of FEValues
+//---------------------------------------------------------------------------
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Poly<PolynomialType,dim,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                const CellSimilarity::Similarity                                     cell_similarity,
+                const Quadrature<dim>                                               &quadrature,
+                const Mapping<dim,spacedim>                                         &mapping,
+                const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  const UpdateFlags flags(fe_data.update_each);
+
+  // transform gradients and higher derivatives. there is nothing to do
+  // for values since we already emplaced them into output_data when
+  // we were in get_data()
+  if (flags & update_gradients && cell_similarity != CellSimilarity::translation)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      mapping.transform (make_array_view(fe_data.shape_gradients, k),
+                         mapping_covariant,
+                         mapping_internal,
+                         make_array_view(output_data.shape_gradients, k));
+
+  if (flags & update_hessians && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_hessians, k),
+                           mapping_covariant_gradient,
+                           mapping_internal,
+                           make_array_view(output_data.shape_hessians, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            output_data.shape_hessians[k][i] -=
+              mapping_data.jacobian_pushed_forward_grads[i][j]
+              * output_data.shape_gradients[k][i][j];
+    }
+
+  if (flags & update_3rd_derivatives && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_3rd_derivatives, k),
+                           mapping_covariant_hessian,
+                           mapping_internal,
+                           make_array_view(output_data.shape_3rd_derivatives, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        correct_third_derivatives(output_data, mapping_data, quadrature.size(), k);
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Poly<PolynomialType,dim,spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                     const unsigned int                                                   face_no,
+                     const Quadrature<dim-1>                                             &quadrature,
+                     const Mapping<dim,spacedim>                                         &mapping,
+                     const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                     const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                     const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                     dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  // offset determines which data set
+  // to take (all data sets for all
+  // faces are stored contiguously)
+
+  const typename QProjector<dim>::DataSetDescriptor offset
+    = QProjector<dim>::DataSetDescriptor::face (face_no,
+                                                cell->face_orientation(face_no),
+                                                cell->face_flip(face_no),
+                                                cell->face_rotation(face_no),
+                                                quadrature.size());
+
+  const UpdateFlags flags(fe_data.update_each);
+
+  // transform gradients and higher derivatives. we also have to copy
+  // the values (unlike in the case of fill_fe_values()) since
+  // we need to take into account the offsets
+  if (flags & update_values)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      for (unsigned int i=0; i<quadrature.size(); ++i)
+        output_data.shape_values(k,i) = fe_data.shape_values[k][i+offset];
+
+  if (flags & update_gradients)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      mapping.transform (make_array_view(fe_data.shape_gradients, k, offset, quadrature.size()),
+                         mapping_covariant,
+                         mapping_internal,
+                         make_array_view(output_data.shape_gradients, k));
+
+  if (flags & update_hessians)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_hessians, k, offset, quadrature.size()),
+                           mapping_covariant_gradient,
+                           mapping_internal,
+                           make_array_view(output_data.shape_hessians, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            output_data.shape_hessians[k][i] -=
+              mapping_data.jacobian_pushed_forward_grads[i][j]
+              * output_data.shape_gradients[k][i][j];
+    }
+
+  if (flags & update_3rd_derivatives)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_3rd_derivatives, k, offset, quadrature.size()),
+                           mapping_covariant_hessian,
+                           mapping_internal,
+                           make_array_view(output_data.shape_3rd_derivatives, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        correct_third_derivatives(output_data, mapping_data, quadrature.size(), k);
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Poly<PolynomialType,dim,spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                        const unsigned int                                                   face_no,
+                        const unsigned int                                                   sub_no,
+                        const Quadrature<dim-1>                                             &quadrature,
+                        const Mapping<dim,spacedim>                                         &mapping,
+                        const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                        const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                        const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                        dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  // offset determines which data set
+  // to take (all data sets for all
+  // sub-faces are stored contiguously)
+
+  const typename QProjector<dim>::DataSetDescriptor offset
+    = QProjector<dim>::DataSetDescriptor::subface (face_no, sub_no,
+                                                   cell->face_orientation(face_no),
+                                                   cell->face_flip(face_no),
+                                                   cell->face_rotation(face_no),
+                                                   quadrature.size(),
+                                                   cell->subface_case(face_no));
+
+  const UpdateFlags flags(fe_data.update_each);
+
+  // transform gradients and higher derivatives. we also have to copy
+  // the values (unlike in the case of fill_fe_values()) since
+  // we need to take into account the offsets
+  if (flags & update_values)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      for (unsigned int i=0; i<quadrature.size(); ++i)
+        output_data.shape_values(k,i) = fe_data.shape_values[k][i+offset];
+
+  if (flags & update_gradients)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      mapping.transform (make_array_view(fe_data.shape_gradients, k, offset, quadrature.size()),
+                         mapping_covariant,
+                         mapping_internal,
+                         make_array_view(output_data.shape_gradients, k));
+
+  if (flags & update_hessians)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_hessians, k, offset, quadrature.size()),
+                           mapping_covariant_gradient,
+                           mapping_internal,
+                           make_array_view(output_data.shape_hessians, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            output_data.shape_hessians[k][i] -=
+              mapping_data.jacobian_pushed_forward_grads[i][j]
+              * output_data.shape_gradients[k][i][j];
+    }
+
+  if (flags & update_3rd_derivatives)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_3rd_derivatives, k, offset, quadrature.size()),
+                           mapping_covariant_hessian,
+                           mapping_internal,
+                           make_array_view(output_data.shape_3rd_derivatives, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        correct_third_derivatives(output_data, mapping_data, quadrature.size(), k);
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+inline void
+FE_Poly<PolynomialType,dim,spacedim>::
+correct_third_derivatives (internal::FEValues::FiniteElementRelatedData<dim,spacedim>       &output_data,
+                           const internal::FEValues::MappingRelatedData<dim,spacedim>       &mapping_data,
+                           const unsigned int                                                n_q_points,
+                           const unsigned int                                                dof) const
+{
+  for (unsigned int i=0; i<n_q_points; ++i)
+    for (unsigned int j=0; j<spacedim; ++j)
+      for (unsigned int k=0; k<spacedim; ++k)
+        for (unsigned int l=0; l<spacedim; ++l)
+          for (unsigned int m=0; m<spacedim; ++m)
+            {
+              output_data.shape_3rd_derivatives[dof][i][j][k][l] -=
+                (mapping_data.jacobian_pushed_forward_grads[i][m][j][l] *
+                 output_data.shape_hessians[dof][i][k][m])
+                + (mapping_data.jacobian_pushed_forward_grads[i][m][k][l] *
+                   output_data.shape_hessians[dof][i][j][m])
+                + (mapping_data.jacobian_pushed_forward_grads[i][m][j][k] *
+                   output_data.shape_hessians[dof][i][l][m])
+                + (mapping_data.jacobian_pushed_forward_2nd_derivatives[i][m][j][k][l] *
+                   output_data.shape_gradients[dof][i][m]);
+            }
+
+}
+
+namespace internal
+{
+  template <class PolynomialType>
+  inline
+  std::vector<unsigned int>
+  get_poly_space_numbering (const PolynomialType &)
+  {
+    Assert (false, ExcNotImplemented());
+    return std::vector<unsigned int>();
+  }
+
+  template <class PolynomialType>
+  inline
+  std::vector<unsigned int>
+  get_poly_space_numbering_inverse (const PolynomialType &)
+  {
+    Assert (false, ExcNotImplemented());
+    return std::vector<unsigned int>();
+  }
+
+  template <int dim, typename PolynomialType>
+  inline
+  std::vector<unsigned int>
+  get_poly_space_numbering (const TensorProductPolynomials<dim,PolynomialType> &poly)
+  {
+    return poly.get_numbering();
+  }
+
+  template <int dim, typename PolynomialType>
+  inline
+  std::vector<unsigned int>
+  get_poly_space_numbering_inverse (const TensorProductPolynomials<dim,PolynomialType> &poly)
+  {
+    return poly.get_numbering_inverse();
+  }
+
+  template <int dim>
+  inline
+  std::vector<unsigned int>
+  get_poly_space_numbering (const TensorProductPolynomialsConst<dim> &poly)
+  {
+    return poly.get_numbering();
+  }
+
+  template <int dim>
+  inline
+  std::vector<unsigned int>
+  get_poly_space_numbering_inverse (const TensorProductPolynomialsConst<dim> &poly)
+  {
+    return poly.get_numbering_inverse();
+  }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::vector<unsigned int>
+FE_Poly<PolynomialType,dim,spacedim>::get_poly_space_numbering () const
+{
+  return internal::get_poly_space_numbering (poly_space);
+}
+
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::vector<unsigned int>
+FE_Poly<PolynomialType,dim,spacedim>::get_poly_space_numbering_inverse () const
+{
+  return internal::get_poly_space_numbering_inverse (poly_space);
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/fe/fe_poly_face.h b/include/deal.II/fe/fe_poly_face.h
new file mode 100644
index 0000000..2605841
--- /dev/null
+++ b/include/deal.II/fe/fe_poly_face.h
@@ -0,0 +1,224 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_poly_face_h
+#define dealii__fe_poly_face_h
+
+
+#include <deal.II/base/qprojector.h>
+#include <deal.II/fe/fe.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup febase */
+/*@{*/
+
+/**
+ * @warning This class is not sufficiently tested yet!
+ *
+ * This class gives a unified framework for the implementation of
+ * FiniteElement classes only located on faces of the mesh. They are based on
+ * polynomial spaces like the TensorProductPolynomials or a PolynomialSpace
+ * classes.
+ *
+ * Every class that implements the following functions can be used as template
+ * parameter PolynomialType.
+ *
+ * @code
+ * double compute_value (const unsigned int i,
+ *                       const Point<dim> &p) const;
+ * @endcode
+ * Example classes are TensorProductPolynomials, PolynomialSpace or
+ * PolynomialsP.
+ *
+ * This class is not a fully implemented FiniteElement class. Instead there
+ * are several pure virtual functions declared in the FiniteElement class
+ * which cannot be implemented by this class but are left for implementation
+ * in derived classes.
+ *
+ * @author Guido Kanschat, 2009
+ */
+template <class PolynomialType, int dim=PolynomialType::dimension+1, int spacedim=dim>
+class FE_PolyFace : public FiniteElement<dim,spacedim>
+{
+public:
+  /**
+   * Constructor.
+   */
+  FE_PolyFace (const PolynomialType &poly_space,
+               const FiniteElementData<dim> &fe_data,
+               const std::vector<bool> &restriction_is_additive_flags);
+
+  /**
+   * Return the polynomial degree of this finite element, i.e. the value
+   * passed to the constructor.
+   */
+  unsigned int get_degree () const;
+
+  // for documentation, see the FiniteElement base class
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+protected:
+  /*
+   * NOTE: The following functions have their definitions inlined into the class declaration
+   * because we otherwise run into a compiler error with MS Visual Studio.
+   */
+
+
+  virtual
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_data (const UpdateFlags                                                    /*update_flags*/,
+            const Mapping<dim,spacedim>                                         &/*mapping*/,
+            const Quadrature<dim>                                               &/*quadrature*/,
+            dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+  {
+    InternalData *data = new InternalData;
+    return data;
+  }
+
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_face_data(const UpdateFlags                                                    update_flags,
+                const Mapping<dim,spacedim>                                         &/*mapping*/,
+                const Quadrature<dim-1>                                             &quadrature,
+                dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+  {
+    // generate a new data object and
+    // initialize some fields
+    InternalData *data = new InternalData;
+    data->update_each = requires_update_flags(update_flags);
+
+    const unsigned int n_q_points = quadrature.size();
+
+    // some scratch arrays
+    std::vector<double> values(0);
+    std::vector<Tensor<1,dim-1> > grads(0);
+    std::vector<Tensor<2,dim-1> > grad_grads(0);
+    std::vector<Tensor<3,dim-1> > empty_vector_of_3rd_order_tensors;
+    std::vector<Tensor<4,dim-1> > empty_vector_of_4th_order_tensors;
+
+    // initialize fields only if really
+    // necessary. otherwise, don't
+    // allocate memory
+    if (data->update_each & update_values)
+      {
+        values.resize (poly_space.n());
+        data->shape_values.resize (poly_space.n(),
+                                   std::vector<double> (n_q_points));
+        for (unsigned int i=0; i<n_q_points; ++i)
+          {
+            poly_space.compute(quadrature.point(i),
+                               values, grads, grad_grads,
+                               empty_vector_of_3rd_order_tensors,
+                               empty_vector_of_4th_order_tensors);
+
+            for (unsigned int k=0; k<poly_space.n(); ++k)
+              data->shape_values[k][i] = values[k];
+          }
+      }
+    // No derivatives of this element
+    // are implemented.
+    if (data->update_each & update_gradients || data->update_each & update_hessians)
+      {
+        Assert(false, ExcNotImplemented());
+      }
+
+    return data;
+  }
+
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_subface_data(const UpdateFlags                                                    update_flags,
+                   const Mapping<dim,spacedim>                                         &mapping,
+                   const Quadrature<dim-1>                                             &quadrature,
+                   dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+  {
+    return get_face_data(update_flags, mapping,
+                         QProjector<dim - 1>::project_to_all_children(quadrature),
+                         output_data);
+  }
+
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                     cell_similarity,
+                  const Quadrature<dim>                                               &quadrature,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                  const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                   face_no,
+                       const Quadrature<dim-1>                                             &quadrature,
+                       const Mapping<dim,spacedim>                                         &mapping,
+                       const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                       const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                   face_no,
+                          const unsigned int                                                   sub_no,
+                          const Quadrature<dim-1>                                             &quadrature,
+                          const Mapping<dim,spacedim>                                         &mapping,
+                          const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                          const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Fields of cell-independent data.
+   *
+   * For information about the general purpose of this class, see the
+   * documentation of the base class.
+   */
+  class InternalData : public FiniteElement<dim,spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Array with shape function values in quadrature points on one face.
+     * There is one row for each shape function, containing values for each
+     * quadrature point.
+     *
+     * In this array, we store the values of the shape function in the
+     * quadrature points on one face of the unit cell. Since these values do
+     * not change under transformation to the real cell, we only need to copy
+     * them over when visiting a concrete cell.
+     *
+     * In particular, we can simply copy the same set of values to each of the
+     * faces.
+     */
+    std::vector<std::vector<double> > shape_values;
+  };
+
+  /**
+   * The polynomial space. Its type is given by the template parameter
+   * PolynomialType.
+   */
+  PolynomialType poly_space;
+};
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_poly_face.templates.h b/include/deal.II/fe/fe_poly_face.templates.h
new file mode 100644
index 0000000..6b9a74e
--- /dev/null
+++ b/include/deal.II/fe/fe_poly_face.templates.h
@@ -0,0 +1,196 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_poly_face.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <class PolynomialType, int dim, int spacedim>
+FE_PolyFace<PolynomialType,dim,spacedim>::FE_PolyFace (
+  const PolynomialType &poly_space,
+  const FiniteElementData<dim> &fe_data,
+  const std::vector<bool> &restriction_is_additive_flags):
+  FiniteElement<dim,spacedim> (fe_data,
+                               restriction_is_additive_flags,
+                               std::vector<ComponentMask> (1, ComponentMask(1,true))),
+  poly_space(poly_space)
+{
+  AssertDimension(dim, PolynomialType::dimension+1);
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+unsigned int
+FE_PolyFace<PolynomialType,dim,spacedim>::get_degree () const
+{
+  return this->degree;
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+template <class PolynomialType, int dim, int spacedim>
+UpdateFlags
+FE_PolyFace<PolynomialType,dim,spacedim>::requires_update_flags (const UpdateFlags flags) const
+{
+  UpdateFlags out = flags & update_values;
+  if (flags & update_gradients)
+    out |= update_gradients | update_covariant_transformation;
+  if (flags & update_hessians)
+    out |= update_hessians | update_covariant_transformation;
+  if (flags & update_cell_normal_vectors)
+    out |= update_cell_normal_vectors | update_JxW_values;
+
+  return out;
+}
+
+
+//---------------------------------------------------------------------------
+// Fill data of FEValues
+//---------------------------------------------------------------------------
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_PolyFace<PolynomialType,dim,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                const CellSimilarity::Similarity                                     ,
+                const Quadrature<dim> &,
+                const Mapping<dim,spacedim> &,
+                const typename Mapping<dim,spacedim>::InternalDataBase &,
+                const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &,
+                const typename FiniteElement<dim,spacedim>::InternalDataBase &,
+                dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &) const
+{
+  // Do nothing, since we do not have
+  // values in the interior
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_PolyFace<PolynomialType,dim,spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                     const unsigned int                                                   face_no,
+                     const Quadrature<dim-1>                                             &quadrature,
+                     const Mapping<dim,spacedim> &,
+                     const typename Mapping<dim,spacedim>::InternalDataBase &,
+                     const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &,
+                     const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                     dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  if (fe_data.update_each & update_values)
+    for (unsigned int i=0; i<quadrature.size(); ++i)
+      {
+        for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+          output_data.shape_values(k,i) = 0.;
+        switch (dim)
+          {
+          case 3:
+          {
+            // Fill data for quad shape functions
+            if (this->dofs_per_quad !=0)
+              {
+                const unsigned int foffset = this->first_quad_index + this->dofs_per_quad * face_no;
+                for (unsigned int k=0; k<this->dofs_per_quad; ++k)
+                  output_data.shape_values(foffset+k,i) = fe_data.shape_values[k+this->first_face_quad_index][i];
+              }
+
+            // fall through...
+          }
+
+          case 2:
+          {
+            // Fill data for line shape functions
+            if (this->dofs_per_line != 0)
+              {
+                const unsigned int foffset = this->first_line_index;
+                for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_face; ++line)
+                  {
+                    for (unsigned int k=0; k<this->dofs_per_line; ++k)
+                      output_data.shape_values(foffset+GeometryInfo<dim>::face_to_cell_lines(face_no, line)*this->dofs_per_line+k,i)
+                        = fe_data.shape_values[k+(line*this->dofs_per_line)+this->first_face_line_index][i];
+                  }
+              }
+
+            // fall through...
+          }
+
+          case 1:
+          {
+            // Fill data for vertex shape functions
+            if (this->dofs_per_vertex != 0)
+              for (unsigned int lvertex=0; lvertex<GeometryInfo<dim>::vertices_per_face; ++lvertex)
+                output_data.shape_values(GeometryInfo<dim>::face_to_cell_vertices(face_no, lvertex),i)
+                  = fe_data.shape_values[lvertex][i];
+            break;
+          }
+          }
+      }
+}
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_PolyFace<PolynomialType,dim,spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                        const unsigned int                                                   face_no,
+                        const unsigned int                                                   sub_no,
+                        const Quadrature<dim-1>                                             &quadrature,
+                        const Mapping<dim,spacedim> &,
+                        const typename Mapping<dim,spacedim>::InternalDataBase &,
+                        const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &,
+                        const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                        dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  const unsigned int foffset = fe_data.shape_values.size() * face_no;
+  const unsigned int offset = sub_no*quadrature.size();
+
+  if (fe_data.update_each & update_values)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          output_data.shape_values(k,i) = 0.;
+      for (unsigned int k=0; k<fe_data.shape_values.size(); ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          output_data.shape_values(foffset+k,i) = fe_data.shape_values[k][i+offset];
+    }
+
+  Assert (!(fe_data.update_each & update_gradients), ExcNotImplemented());
+  Assert (!(fe_data.update_each & update_hessians), ExcNotImplemented());
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/fe/fe_poly_tensor.h b/include/deal.II/fe/fe_poly_tensor.h
new file mode 100644
index 0000000..a754a59
--- /dev/null
+++ b/include/deal.II/fe/fe_poly_tensor.h
@@ -0,0 +1,437 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_poly_tensor_h
+#define dealii__fe_poly_tensor_h
+
+
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/quadrature.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class gives a unified framework for the implementation of
+ * FiniteElement classes based on Tensor valued polynomial spaces like
+ * PolynomialsBDM and PolynomialsRaviartThomas.
+ *
+ * Every class that implements following function can be used as template
+ * parameter PolynomialType.
+ *
+ * @code
+ * void compute (const Point<dim>            &unit_point,
+ *               std::vector<Tensor<1,dim> > &values,
+ *               std::vector<Tensor<2,dim> > &grads,
+ *               std::vector<Tensor<3,dim> > &grad_grads) const;
+ * @endcode
+ *
+ * In many cases, the node functionals depend on the shape of the mesh cell,
+ * since they evaluate normal or tangential components on the faces. In order
+ * to allow for a set of transformations, the variable #mapping_type has been
+ * introduced. It should also be set in the constructor of a derived class.
+ *
+ * This class is not a fully implemented FiniteElement class, but implements
+ * some common features of vector valued elements based on vector valued
+ * polynomial classes. What's missing here in particular is information on the
+ * topological location of the node values.
+ *
+ * For more information on the template parameter <tt>spacedim</tt>, see the
+ * documentation for the class Triangulation.
+ *
+ * <h3>Deriving classes</h3>
+ *
+ * Any derived class must decide on the polynomial space to use.  This
+ * polynomial space should be implemented simply as a set of vector valued
+ * polynomials like PolynomialsBDM and PolynomialsRaviartThomas.  In order to
+ * facilitate this implementation, the basis of this space may be arbitrary.
+ *
+ * <h4>Determining the correct basis</h4>
+ *
+ * In most cases, the set of desired node values $N_i$ and the basis functions
+ * $v_j$ will not fulfill the interpolation condition $N_i(v_j) =
+ * \delta_{ij}$.
+ *
+ * The use of the member data #inverse_node_matrix allows to compute the basis
+ * $v_j$ automatically, after the node values for each original basis function
+ * of the polynomial space have been computed.
+ *
+ * Therefore, the constructor of a derived class should have a structure like
+ * this (example for interpolation in support points):
+ *
+ * @code
+ *  fill_support_points();
+ *
+ *  const unsigned int n_dofs = this->dofs_per_cell;
+ *  FullMatrix<double> N(n_dofs, n_dofs);
+ *
+ *  for (unsigned int i=0;i<n_dofs;++i)
+ *    {
+ *      const Point<dim>& p = this->unit_support_point[i];
+ *
+ *      for (unsigned int j=0;j<n_dofs;++j)
+ *        for (unsigned int d=0;d<dim;++d)
+ *          N(i,j) += node_vector[i][d]
+ *                  * this->shape_value_component(j, p, d);
+ *    }
+ *
+ *  this->inverse_node_matrix.reinit(n_dofs, n_dofs);
+ *  this->inverse_node_matrix.invert(N);
+ * @endcode
+ *
+ * @note The matrix #inverse_node_matrix should have dimensions zero before
+ * this piece of code is executed. Only then, shape_value_component() will
+ * return the raw polynomial <i>j</i> as defined in the polynomial space
+ * PolynomialType.
+ *
+ * <h4>Setting the transformation</h4>
+ *
+ * In most cases, vector valued basis functions must be transformed when
+ * mapped from the reference cell to the actual grid cell. These
+ * transformations can be selected from the set MappingType and stored in
+ * #mapping_type. Therefore, each constructor should contain a line like:
+ * @code
+ * this->mapping_type = this->mapping_none;
+ * @endcode
+ *
+ * @see PolynomialsBDM, PolynomialsRaviartThomas
+ * @ingroup febase
+ * @author Guido Kanschat
+ * @date 2005
+ */
+template <class PolynomialType, int dim, int spacedim=dim>
+class FE_PolyTensor : public FiniteElement<dim,spacedim>
+{
+public:
+  /**
+   * Constructor.
+   *
+   * @arg @c degree: constructor argument for poly. May be different from @p
+   * fe_data.degree.
+   */
+  FE_PolyTensor (const unsigned int degree,
+                 const FiniteElementData<dim> &fe_data,
+                 const std::vector<bool> &restriction_is_additive_flags,
+                 const std::vector<ComponentMask> &nonzero_components);
+
+  // for documentation, see the FiniteElement base class
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  /**
+   * Compute the (scalar) value of shape function @p i at the given quadrature
+   * point @p p. Since the elements represented by this class are vector
+   * valued, there is no such scalar value and the function therefore throws
+   * an exception.
+   */
+  virtual double shape_value (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  // documentation inherited from the base class
+  virtual double shape_value_component (const unsigned int i,
+                                        const Point<dim> &p,
+                                        const unsigned int component) const;
+
+  /**
+   * Compute the gradient of (scalar) shape function @p i at the given
+   * quadrature point @p p. Since the elements represented by this class are
+   * vector valued, there is no such scalar value and the function therefore
+   * throws an exception.
+   */
+  virtual Tensor<1,dim> shape_grad (const unsigned int  i,
+                                    const Point<dim>   &p) const;
+
+  // documentation inherited from the base class
+  virtual Tensor<1,dim> shape_grad_component (const unsigned int i,
+                                              const Point<dim> &p,
+                                              const unsigned int component) const;
+
+  /**
+   * Compute the Hessian of (scalar) shape function @p i at the given
+   * quadrature point @p p. Since the elements represented by this class are
+   * vector valued, there is no such scalar value and the function therefore
+   * throws an exception.
+   */
+  virtual Tensor<2,dim> shape_grad_grad (const unsigned int  i,
+                                         const Point<dim> &p) const;
+
+  // documentation inherited from the base class
+  virtual Tensor<2,dim> shape_grad_grad_component (const unsigned int i,
+                                                   const Point<dim> &p,
+                                                   const unsigned int component) const;
+
+protected:
+  /**
+   * The mapping type to be used to map shape functions from the reference
+   * cell to the mesh cell.
+   */
+  MappingType mapping_type;
+
+
+  /* NOTE: The following function has its definition inlined into the class declaration
+     because we otherwise run into a compiler error with MS Visual Studio. */
+  virtual
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_data(const UpdateFlags                                                    update_flags,
+           const Mapping<dim,spacedim>                                         &/*mapping*/,
+           const Quadrature<dim>                                               &quadrature,
+           dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+  {
+    // generate a new data object and
+    // initialize some fields
+    InternalData *data = new InternalData;
+    data->update_each = requires_update_flags(update_flags);
+
+    const unsigned int n_q_points = quadrature.size();
+
+    // some scratch arrays
+    std::vector<Tensor<1,dim> > values(0);
+    std::vector<Tensor<2,dim> > grads(0);
+    std::vector<Tensor<3,dim> > grad_grads(0);
+    std::vector<Tensor<4,dim> > third_derivatives(0);
+    std::vector<Tensor<5,dim> > fourth_derivatives(0);
+
+    if (update_flags & (update_values | update_gradients | update_hessians) )
+      data->sign_change.resize (this->dofs_per_cell);
+
+    // initialize fields only if really
+    // necessary. otherwise, don't
+    // allocate memory
+    if (update_flags & update_values)
+      {
+        values.resize (this->dofs_per_cell);
+        data->shape_values.reinit (this->dofs_per_cell, n_q_points);
+        if (mapping_type != mapping_none)
+          data->transformed_shape_values.resize (n_q_points);
+      }
+
+    if (update_flags & update_gradients)
+      {
+        grads.resize (this->dofs_per_cell);
+        data->shape_grads.reinit (this->dofs_per_cell, n_q_points);
+        data->transformed_shape_grads.resize (n_q_points);
+
+        if ( (mapping_type == mapping_raviart_thomas)
+             ||
+             (mapping_type == mapping_piola)
+             ||
+             (mapping_type == mapping_nedelec)
+             ||
+             (mapping_type == mapping_contravariant))
+          data->untransformed_shape_grads.resize(n_q_points);
+      }
+
+    if (update_flags & update_hessians)
+      {
+        grad_grads.resize (this->dofs_per_cell);
+        data->shape_grad_grads.reinit (this->dofs_per_cell, n_q_points);
+        data->transformed_shape_hessians.resize (n_q_points);
+        if ( mapping_type != mapping_none )
+          data->untransformed_shape_hessian_tensors.resize(n_q_points);
+      }
+
+    // Compute shape function values
+    // and derivatives and hessians on
+    // the reference cell.
+    // Make sure, that for the
+    // node values N_i holds
+    // N_i(v_j)=\delta_ij for all basis
+    // functions v_j
+    if (update_flags & (update_values | update_gradients))
+      for (unsigned int k=0; k<n_q_points; ++k)
+        {
+          poly_space.compute(quadrature.point(k),
+                             values, grads, grad_grads,
+                             third_derivatives,
+                             fourth_derivatives);
+
+          if (update_flags & update_values)
+            {
+              if (inverse_node_matrix.n_cols() == 0)
+                for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+                  data->shape_values[i][k] = values[i];
+              else
+                for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+                  {
+                    Tensor<1,dim> add_values;
+                    for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+                      add_values += inverse_node_matrix(j,i) * values[j];
+                    data->shape_values[i][k] = add_values;
+                  }
+            }
+
+          if (update_flags & update_gradients)
+            {
+              if (inverse_node_matrix.n_cols() == 0)
+                for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+                  data->shape_grads[i][k] = grads[i];
+              else
+                for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+                  {
+                    Tensor<2,dim> add_grads;
+                    for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+                      add_grads += inverse_node_matrix(j,i) * grads[j];
+                    data->shape_grads[i][k] = add_grads;
+                  }
+            }
+
+          if (update_flags & update_hessians)
+            {
+              if (inverse_node_matrix.n_cols() == 0)
+                for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+                  data->shape_grad_grads[i][k] = grad_grads[i];
+              else
+                for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+                  {
+                    Tensor<3,dim> add_grad_grads;
+                    for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+                      add_grad_grads += inverse_node_matrix(j,i) * grad_grads[j];
+                    data->shape_grad_grads[i][k] = add_grad_grads;
+                  }
+            }
+
+        }
+    return data;
+  }
+
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                     cell_similarity,
+                  const Quadrature<dim>                                               &quadrature,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                  const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                   face_no,
+                       const Quadrature<dim-1>                                             &quadrature,
+                       const Mapping<dim,spacedim>                                         &mapping,
+                       const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                       const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                   face_no,
+                          const unsigned int                                                   sub_no,
+                          const Quadrature<dim-1>                                             &quadrature,
+                          const Mapping<dim,spacedim>                                         &mapping,
+                          const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                          const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Fields of cell-independent data for FE_PolyTensor. Stores the values of
+   * the shape functions and their derivatives on the reference cell for later
+   * use.
+   *
+   * All tables are organized in a way, that the value for shape function
+   * <i>i</i> at quadrature point <i>k</i> is accessed by indices
+   * <i>(i,k)</i>.
+   */
+  class InternalData : public FiniteElement<dim,spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Array with shape function values in quadrature points. There is one row
+     * for each shape function, containing values for each quadrature point.
+     */
+    Table<2,Tensor<1,dim> > shape_values;
+
+    /**
+     * Array with shape function gradients in quadrature points. There is one
+     * row for each shape function, containing values for each quadrature
+     * point.
+     */
+    Table<2,DerivativeForm<1, dim, spacedim> > shape_grads;
+
+    /**
+     * Array with shape function hessians in quadrature points. There is one
+     * row for each shape function, containing values for each quadrature
+     * point.
+     */
+    Table<2,DerivativeForm<2, dim, spacedim> > shape_grad_grads;
+
+    /**
+     * Scratch arrays for intermediate computations
+     */
+    mutable std::vector<double>                sign_change;
+    mutable std::vector<Tensor<1, spacedim> >  transformed_shape_values;
+    // for shape_gradient computations
+    mutable std::vector<Tensor<2, spacedim > > transformed_shape_grads;
+    mutable std::vector<Tensor<2, dim > >      untransformed_shape_grads;
+    // for shape_hessian computations
+    mutable std::vector<Tensor<3, spacedim > > transformed_shape_hessians;
+    mutable std::vector<Tensor<3, dim > >      untransformed_shape_hessian_tensors;
+  };
+
+
+
+  /**
+   * The polynomial space. Its type is given by the template parameter
+   * PolynomialType.
+   */
+  PolynomialType poly_space;
+
+  /**
+   * The inverse of the matrix <i>a<sub>ij</sub></i> of node values
+   * <i>N<sub>i</sub></i> applied to polynomial <i>p<sub>j</sub></i>. This
+   * matrix is used to convert polynomials in the "raw" basis provided in
+   * #poly_space to the basis dual to the node functionals on the reference
+   * cell.
+   *
+   * This object is not filled by FE_PolyTensor, but is a chance for a derived
+   * class to allow for reorganization of the basis functions. If it is left
+   * empty, the basis in #poly_space is used.
+   */
+  FullMatrix<double> inverse_node_matrix;
+
+  /**
+   * If a shape function is computed at a single point, we must compute all of
+   * them to apply #inverse_node_matrix. In order to avoid too much overhead,
+   * we cache the point and the function values for the next evaluation.
+   */
+  mutable Point<dim> cached_point;
+
+  /**
+   * Cached shape function values after call to shape_value_component().
+   */
+  mutable std::vector<Tensor<1,dim> > cached_values;
+
+  /**
+   * Cached shape function gradients after call to shape_grad_component().
+   */
+  mutable std::vector<Tensor<2,dim> > cached_grads;
+
+  /**
+   * Cached second derivatives of shape functions after call to
+   * shape_grad_grad_component().
+   */
+  mutable std::vector<Tensor<3,dim> > cached_grad_grads;
+};
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_q.h b/include/deal.II/fe/fe_q.h
new file mode 100644
index 0000000..f47cba2
--- /dev/null
+++ b/include/deal.II/fe/fe_q.h
@@ -0,0 +1,573 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_q_h
+#define dealii__fe_q_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/fe/fe_q_base.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of a scalar Lagrange finite element @p Qp that yields the
+ * finite element space of continuous, piecewise polynomials of degree @p p in
+ * each coordinate direction. This class is realized using tensor product
+ * polynomials based on equidistant or given support points.
+ *
+ * The standard constructor of this class takes the degree @p p of this finite
+ * element. Alternatively, it can take a quadrature formula @p points defining
+ * the support points of the Lagrange interpolation in one coordinate
+ * direction.
+ *
+ * For more information about the <tt>spacedim</tt> template parameter check
+ * the documentation of FiniteElement or the one of Triangulation.
+ *
+ * <h3>Implementation</h3>
+ *
+ * The constructor creates a TensorProductPolynomials object that includes the
+ * tensor product of @p LagrangeEquidistant polynomials of degree @p p. This
+ * @p TensorProductPolynomials object provides all values and derivatives of
+ * the shape functions.  In case a quadrature rule is given, the constructor
+ * creates a TensorProductPolynomials object that includes the tensor product
+ * of @p Lagrange polynomials with the support points from @p points.
+ *
+ * Furthermore the constructor fills the @p interface_constraints, the @p
+ * prolongation (embedding) and the @p restriction matrices. These are
+ * implemented only up to a certain degree and may not be available for very
+ * high polynomial degree.
+ *
+ *
+ * <h3>Numbering of the degrees of freedom (DoFs)</h3>
+ *
+ * The original ordering of the shape functions represented by the
+ * TensorProductPolynomials is a tensor product numbering. However, the shape
+ * functions on a cell are renumbered beginning with the shape functions whose
+ * support points are at the vertices, then on the line, on the quads, and
+ * finally (for 3d) on the hexes. To be explicit, these numberings are listed
+ * in the following:
+ *
+ * <h4>Q1 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0-------1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2-------3
+ *      |       |
+ *      |       |
+ *      |       |
+ *      0-------1
+ *   @endverbatim
+ *
+ * <li> 3D case:
+ *   @verbatim
+ *         6-------7        6-------7
+ *        /|       |       /       /|
+ *       / |       |      /       / |
+ *      /  |       |     /       /  |
+ *     4   |       |    4-------5   |
+ *     |   2-------3    |       |   3
+ *     |  /       /     |       |  /
+ *     | /       /      |       | /
+ *     |/       /       |       |/
+ *     0-------1        0-------1
+ *   @endverbatim
+ *
+ * The respective coordinate values of the support points of the shape
+ * functions are as follows:
+ * <ul>
+ * <li> Shape function 0: <tt>[0, 0, 0]</tt>;
+ * <li> Shape function 1: <tt>[1, 0, 0]</tt>;
+ * <li> Shape function 2: <tt>[0, 1, 0]</tt>;
+ * <li> Shape function 3: <tt>[1, 1, 0]</tt>;
+ * <li> Shape function 4: <tt>[0, 0, 1]</tt>;
+ * <li> Shape function 5: <tt>[1, 0, 1]</tt>;
+ * <li> Shape function 6: <tt>[0, 1, 1]</tt>;
+ * <li> Shape function 7: <tt>[1, 1, 1]</tt>;
+ * </ul>
+ * </ul>
+ *
+ * In 2d, these shape functions look as follows: <table> <tr> <td
+ * align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q1/Q1_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q1/Q1_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_1$ element, shape function 0 </td>
+ *
+ * <td align="center"> $Q_1$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q1/Q1_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q1/Q1_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_1$ element, shape function 2 </td>
+ *
+ * <td align="center"> $Q_1$ element, shape function 3 </td> </tr> </table>
+ *
+ *
+ * <h4>Q2 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0---2---1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2---7---3
+ *      |       |
+ *      4   8   5
+ *      |       |
+ *      0---6---1
+ *   @endverbatim
+ *
+ * <li> 3D case:
+ *   @verbatim
+ *         6--15---7        6--15---7
+ *        /|       |       /       /|
+ *      12 |       19     12      1319
+ *      /  18      |     /       /  |
+ *     4   |       |    4---14--5   |
+ *     |   2---11--3    |       |   3
+ *     |  /       /     |      17  /
+ *    16 8       9     16       | 9
+ *     |/       /       |       |/
+ *     0---10--1        0---10--1
+ *
+ *         *-------*        *-------*
+ *        /|       |       /       /|
+ *       / |  23   |      /  25   / |
+ *      /  |       |     /       /  |
+ *     *   |       |    *-------*   |
+ *     |20 *-------*    |       |21 *
+ *     |  /       /     |   22  |  /
+ *     | /  24   /      |       | /
+ *     |/       /       |       |/
+ *     *-------*        *-------*
+ *   @endverbatim
+ * The center vertex has number 26.
+ *
+ * The respective coordinate values of the support points of the shape
+ * functions are as follows:
+ * <ul>
+ * <li> Shape function 0: <tt>[0, 0, 0]</tt>;
+ * <li> Shape function 1: <tt>[1, 0, 0]</tt>;
+ * <li> Shape function 2: <tt>[0, 1, 0]</tt>;
+ * <li> Shape function 3: <tt>[1, 1, 0]</tt>;
+ * <li> Shape function 4: <tt>[0, 0, 1]</tt>;
+ * <li> Shape function 5: <tt>[1, 0, 1]</tt>;
+ * <li> Shape function 6: <tt>[0, 1, 1]</tt>;
+ * <li> Shape function 7: <tt>[1, 1, 1]</tt>;
+ * <li> Shape function 8: <tt>[0, 1/2, 0]</tt>;
+ * <li> Shape function 9: <tt>[1, 1/2, 0]</tt>;
+ * <li> Shape function 10: <tt>[1/2, 0, 0]</tt>;
+ * <li> Shape function 11: <tt>[1/2, 1, 0]</tt>;
+ * <li> Shape function 12: <tt>[0, 1/2, 1]</tt>;
+ * <li> Shape function 13: <tt>[1, 1/2, 1]</tt>;
+ * <li> Shape function 14: <tt>[1/2, 0, 1]</tt>;
+ * <li> Shape function 15: <tt>[1/2, 1, 1]</tt>;
+ * <li> Shape function 16: <tt>[0, 0, 1/2]</tt>;
+ * <li> Shape function 17: <tt>[1, 0, 1/2]</tt>;
+ * <li> Shape function 18: <tt>[0, 1, 1/2]</tt>;
+ * <li> Shape function 19: <tt>[1, 1, 1/2]</tt>;
+ * <li> Shape function 20: <tt>[0, 1/2, 1/2]</tt>;
+ * <li> Shape function 21: <tt>[1, 1/2, 1/2]</tt>;
+ * <li> Shape function 22: <tt>[1/2, 0, 1/2]</tt>;
+ * <li> Shape function 23: <tt>[1/2, 1, 1/2]</tt>;
+ * <li> Shape function 24: <tt>[1/2, 1/2, 0]</tt>;
+ * <li> Shape function 25: <tt>[1/2, 1/2, 1]</tt>;
+ * <li> Shape function 26: <tt>[1/2, 1/2, 1/2]</tt>;
+ * </ul>
+ * </ul>
+ *
+ *
+ * In 2d, these shape functions look as follows (the black plane corresponds
+ * to zero; negative shape function values may not be visible): <table> <tr>
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_2$ element, shape function 0 </td>
+ *
+ * <td align="center"> $Q_2$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_2$ element, shape function 2 </td>
+ *
+ * <td align="center"> $Q_2$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $Q_2$ element, shape function 4 </td>
+ *
+ * <td align="center"> $Q_2$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $Q_2$ element, shape function 6 </td>
+ *
+ * <td align="center"> $Q_2$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q2/Q2_shape0008.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $Q_2$ element,
+ * shape function 8 </td>
+ *
+ * <td align="center"> </td> </tr> </table>
+ *
+ *
+ * <h4>Q3 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0--2--3--1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2--10-11-3
+ *      |        |
+ *      5  14 15 7
+ *      |        |
+ *      4  12 13 6
+ *      |        |
+ *      0--8--9--1
+ *   @endverbatim
+ * </ul>
+ *
+ * In 2d, these shape functions look as follows (the black plane corresponds
+ * to zero; negative shape function values may not be visible): <table> <tr>
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 0 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 2 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 4 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 6 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 8 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 9 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0010.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0011.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 10 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 11 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0012.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0013.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 12 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 13 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0014.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q3/Q3_shape0015.png
+ * </td> </tr> <tr> <td align="center"> $Q_3$ element, shape function 14 </td>
+ *
+ * <td align="center"> $Q_3$ element, shape function 15 </td> </tr> </table>
+ *
+ *
+ * <h4>Q4 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0--2--3--4--1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2--13-14-15-3
+ *      |           |
+ *      6  22 23 24 9
+ *      |           |
+ *      5  19 20 21 8
+ *      |           |
+ *      4  16 17 18 7
+ *      |           |
+ *      0--10-11-12-1
+ *   @endverbatim
+ * </ul>
+ *
+ * In 2d, these shape functions look as follows (the black plane corresponds
+ * to zero; negative shape function values may not be visible): <table> <tr>
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 0 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 2 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 4 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 6 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 8 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 9 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0010.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0011.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 10 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 11 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0012.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0013.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 12 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 13 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0014.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0015.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 14 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 15 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0016.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0017.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 16 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 17 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0018.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0019.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 18 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 19 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0020.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0021.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 20 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 21 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0022.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0023.png
+ * </td> </tr> <tr> <td align="center"> $Q_4$ element, shape function 22 </td>
+ *
+ * <td align="center"> $Q_4$ element, shape function 23 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/lagrange/Q4/Q4_shape0024.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $Q_4$ element,
+ * shape function 24 </td>
+ *
+ * <td align="center"> </td> </tr> </table>
+ *
+ *
+ *
+ * @author Wolfgang Bangerth, 1998, 2003; Guido Kanschat, 2001; Ralf Hartmann,
+ * 2001, 2004, 2005; Oliver Kayser-Herold, 2004; Katharina Kormann, 2008;
+ * Martin Kronbichler, 2008
+ */
+template <int dim, int spacedim=dim>
+class FE_Q : public FE_Q_Base<TensorProductPolynomials<dim>,dim,spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree @p p.
+   */
+  FE_Q (const unsigned int p);
+
+  /**
+   * Constructor for tensor product polynomials with support points @p points
+   * based on a one-dimensional quadrature formula. The degree of the finite
+   * element is <tt>points.size()-1</tt>.  Note that the first point has to be
+   * 0 and the last one 1. If
+   * <tt>FE_Q<dim>(QGaussLobatto<1>(fe_degree+1))</tt> is specified, so-called
+   * Gauss-Lobatto elements are obtained which can give a diagonal mass matrix
+   * if combined with Gauss-Lobatto quadrature on the same points. Their use
+   * is shown in step-48.
+   */
+  FE_Q (const Quadrature<1> &points);
+
+  /**
+   * Constructs a FE_Q_isoQ1 element. That element shares large parts of code
+   * with FE_Q so most of the construction work is done in this routine,
+   * whereas the public constructor is in the class FE_Q_isoQ1.
+   */
+  FE_Q(const unsigned int subdivisions_per_dimension,
+       const unsigned int base_degree);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_Q<dim>(degree)</tt>, with @p dim and @p degree replaced by
+   * appropriate values.
+   */
+  virtual std::string get_name () const;
+
+protected:
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+};
+
+
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_q_base.h b/include/deal.II/fe/fe_q_base.h
new file mode 100644
index 0000000..601189d
--- /dev/null
+++ b/include/deal.II/fe/fe_q_base.h
@@ -0,0 +1,342 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_q_base_h
+#define dealii__fe_q_base_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/fe/fe_poly.h>
+#include <deal.II/base/thread_management.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * This class collects the basic methods used in FE_Q, FE_Q_DG0 and
+ * FE_Q_Bubbles. There is no public constructor for this class as it is not
+ * functional as a stand- alone. The completion of definitions is left to the
+ * derived classes.
+ *
+ * @author Wolfgang Bangerth, 1998, 2003; Guido Kanschat, 2001; Ralf Hartmann,
+ * 2001, 2004, 2005; Oliver Kayser-Herold, 2004; Katharina Kormann, 2008;
+ * Martin Kronbichler, 2008, 2013
+ */
+template <class PolynomialType, int dim=PolynomialType::dimension, int spacedim=dim>
+class FE_Q_Base : public FE_Poly<PolynomialType,dim,spacedim>
+{
+public:
+  /**
+   * Constructor.
+   */
+  FE_Q_Base (const PolynomialType &poly_space,
+             const FiniteElementData<dim> &fe_data,
+             const std::vector<bool> &restriction_is_additive_flags);
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one. The size of the matrix is then @p dofs_per_cell times
+   * <tt>source.dofs_per_cell</tt>.
+   *
+   * These matrices are only available if the source element is also a @p FE_Q
+   * element. Otherwise, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                            FullMatrix<double>       &matrix) const;
+
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. The
+   * FE_Q element family only provides interpolation matrices for elements of
+   * the same type and FE_Nothing. For all other elements, an exception of
+   * type FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is
+   * thrown.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. The
+   * FE_Q element family only provides interpolation matrices for elements of
+   * the same type and FE_Nothing. For all other elements, an exception of
+   * type FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is
+   * thrown.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Projection from a fine grid space onto a coarse grid space. Overrides the
+   * respective method in FiniteElement, implementing lazy evaluation
+   * (initialize when requested).
+   *
+   * If this projection operator is associated with a matrix @p P, then the
+   * restriction of this matrix @p P_i to a single child cell is returned
+   * here.
+   *
+   * The matrix @p P is the concatenation or the sum of the cell matrices @p
+   * P_i, depending on the #restriction_is_additive_flags. This distinguishes
+   * interpolation (concatenation) and projection with respect to scalar
+   * products (summation).
+   *
+   * Row and column indices are related to coarse grid and fine grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * If projection matrices are not implemented in the derived finite element
+   * class, this function aborts with ExcProjectionVoid. You can check whether
+   * this is the case by calling the restriction_is_implemented() or the
+   * isotropic_restriction_is_implemented() function.
+   */
+  virtual const FullMatrix<double> &
+  get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Embedding matrix between grids. Overrides the respective method in
+   * FiniteElement, implementing lazy evaluation (initialize when queried).
+   *
+   * The identity operator from a coarse grid space into a fine grid space is
+   * associated with a matrix @p P. The restriction of this matrix @p P_i to a
+   * single child cell is returned here.
+   *
+   * The matrix @p P is the concatenation, not the sum of the cell matrices @p
+   * P_i. That is, if the same non-zero entry <tt>j,k</tt> exists in in two
+   * different child matrices @p P_i, the value should be the same in both
+   * matrices and it is copied into the matrix @p P only once.
+   *
+   * Row and column indices are related to fine grid and coarse grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * These matrices are used by routines assembling the prolongation matrix
+   * for multi-level methods.  Upon assembling the transfer matrix between
+   * cells using this matrix array, zero elements in the prolongation matrix
+   * are discarded and will not fill up the transfer matrix.
+   *
+   * If projection matrices are not implemented in the derived finite element
+   * class, this function aborts with ExcEmbeddingVoid. You can check whether
+   * this is the case by calling the prolongation_is_implemented() or the
+   * isotropic_prolongation_is_implemented() function.
+   */
+  virtual const FullMatrix<double> &
+  get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Given an index in the natural ordering of indices on a face, return the
+   * index of the same degree of freedom on the cell.
+   *
+   * To explain the concept, consider the case where we would like to know
+   * whether a degree of freedom on a face, for example as part of an FESystem
+   * element, is primitive. Unfortunately, the is_primitive() function in the
+   * FiniteElement class takes a cell index, so we would need to find the cell
+   * index of the shape function that corresponds to the present face index.
+   * This function does that.
+   *
+   * Code implementing this would then look like this:
+   * @code
+   * for (i=0; i<dofs_per_face; ++i)
+   *  if (fe.is_primitive(fe.face_to_equivalent_cell_index(i, some_face_no)))
+   *   ... do whatever
+   * @endcode
+   * The function takes additional arguments that account for the fact that
+   * actual faces can be in their standard ordering with respect to the cell
+   * under consideration, or can be flipped, oriented, etc.
+   *
+   * @param face_dof_index The index of the degree of freedom on a face. This
+   * index must be between zero and dofs_per_face.
+   * @param face The number of the face this degree of freedom lives on. This
+   * number must be between zero and GeometryInfo::faces_per_cell.
+   * @param face_orientation One part of the description of the orientation of
+   * the face. See
+   * @ref GlossFaceOrientation.
+   * @param face_flip One part of the description of the orientation of the
+   * face. See
+   * @ref GlossFaceOrientation.
+   * @param face_rotation One part of the description of the orientation of
+   * the face. See
+   * @ref GlossFaceOrientation.
+   * @return The index of this degree of freedom within the set of degrees of
+   * freedom on the entire cell. The returned value will be between zero and
+   * dofs_per_cell.
+   */
+  virtual
+  unsigned int face_to_cell_index (const unsigned int face_dof_index,
+                                   const unsigned int face,
+                                   const bool face_orientation = true,
+                                   const bool face_flip        = false,
+                                   const bool face_rotation    = false) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, the
+   * list consists of true arguments for all components.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * For the FE_Q class the result is always true (independent of the degree
+   * of the element), as it implements the complete set of functions necessary
+   * for hp capability.
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+  //@}
+
+protected:
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector(const unsigned int degree);
+
+  /**
+   * Performs the initialization of the element based on 1D support points,
+   * i.e., sets renumbering, initializes unit support points, initializes
+   * constraints as well as restriction and prolongation matrices.
+   */
+  void initialize (const std::vector<Point<1> > &support_points_1d);
+
+  /**
+   * Initialize the hanging node constraints matrices. Called from
+   * initialize().
+   */
+  void initialize_constraints (const std::vector<Point<1> > &points);
+
+  /**
+   * Initialize the @p unit_support_points field of the FiniteElement class.
+   * Called from initialize().
+   */
+  void initialize_unit_support_points (const std::vector<Point<1> > &points);
+
+  /**
+   * Initialize the @p unit_face_support_points field of the FiniteElement
+   * class. Called from initialize().
+   */
+  void initialize_unit_face_support_points (const std::vector<Point<1> > &points);
+
+  /**
+   * Initialize the @p adjust_quad_dof_index_for_face_orientation_table field
+   * of the FiniteElement class. Called from initialize().
+   */
+  void initialize_quad_dof_index_permutation ();
+
+  /**
+   * Forward declaration of a class into which we put significant parts of the
+   * implementation.
+   *
+   * See the .cc file for more information.
+   */
+  struct Implementation;
+
+  /*
+   * Declare implementation friend.
+   */
+  friend struct FE_Q_Base<PolynomialType,dim,spacedim>::Implementation;
+
+private:
+  /*
+   * Mutex for protecting initialization of restriction and embedding matrix.
+   */
+  mutable Threads::Mutex mutex;
+
+  /*
+   * The highest polynomial degree of the underlying tensor product space
+   * without any enrichment. For FE_Q*(p) this is p. Note that enrichments
+   * may lead to a difference to degree.
+   */
+  const unsigned int q_degree;
+};
+
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_q_bubbles.h b/include/deal.II/fe/fe_q_bubbles.h
new file mode 100644
index 0000000..b7ecfb9
--- /dev/null
+++ b/include/deal.II/fe/fe_q_bubbles.h
@@ -0,0 +1,205 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 2012 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__fe_q_bubbles_h
+#define dealii__fe_q_bubbles_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials_bubbles.h>
+#include <deal.II/fe/fe_q_base.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of a scalar Lagrange finite element @p Q_p^+ that yields the
+ * finite element space of continuous, piecewise polynomials of degree @p p in
+ * each coordinate direction plus some bubble enrichment space spanned by
+ * $(2x_j-1)^{p-1}\prod_{i=0}^{dim-1}(x_i(1-x_i))$. Therefore the highest
+ * polynomial degree is $p+1$. This class is realized using tensor product
+ * polynomials based on equidistant or given support points.
+ *
+ * The standard constructor of this class takes the degree @p p of this finite
+ * element. Alternatively, it can take a quadrature formula @p points defining
+ * the support points of the Lagrange interpolation in one coordinate
+ * direction.
+ *
+ * For more information about the <tt>spacedim</tt> template parameter check
+ * the documentation of FiniteElement or the one of Triangulation.
+ *
+ * Due to the fact that the enrichments are small almost everywhere for large
+ * p, the condition number for the mass and stiffness matrix fastly
+ * increaseses with increasing p. Below you see a comparison with
+ * FE_Q(QGaussLobatto(p+1)) for dim=1.
+ *
+ * <p ALIGN="center">
+ * @image html fe_q_bubbles_conditioning.png
+ * </p>
+ *
+ * Therefore, this element should be used with care for $p>3$.
+ *
+ * <h3>Implementation</h3>
+ *
+ * The constructor creates a TensorProductPolynomials object that includes the
+ * tensor product of @p LagrangeEquidistant polynomials of degree @p p plus
+ * the bubble enrichments. This @p TensorProductPolynomialsBubbles object
+ * provides all values and derivatives of the shape functions. In case a
+ * quadrature rule is given, the constructor creates a
+ * TensorProductPolynomialsBubbles object that includes the tensor product of
+ * @p Lagrange polynomials with the support points from @p points and the
+ * bubble enrichments as defined above.
+ *
+ * Furthermore the constructor fills the @p interface_constrains, the @p
+ * prolongation (embedding) and the @p restriction matrices.
+ *
+ * <h3>Numbering of the degrees of freedom (DoFs)</h3>
+ *
+ * The original ordering of the shape functions represented by the
+ * TensorProductPolynomialsBubbles is a tensor product numbering. However, the
+ * shape functions on a cell are renumbered beginning with the shape functions
+ * whose support points are at the vertices, then on the line, on the quads,
+ * and finally (for 3d) on the hexes. Finally, there are support points for
+ * the bubble enrichments in the middle of the cell.
+ *
+ */
+template <int dim, int spacedim=dim>
+class FE_Q_Bubbles : public FE_Q_Base<TensorProductPolynomialsBubbles<dim>,dim,spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree @p p plus bubble
+   * enrichments
+   *
+   */
+  FE_Q_Bubbles (const unsigned int p);
+
+  /**
+   * Constructor for tensor product polynomials with support points @p points
+   * plus bubble enrichments based on a one-dimensional quadrature formula.
+   * The degree of the finite element is <tt>points.size()</tt>. Note that the
+   * first point has to be 0 and the last one 1.
+   */
+  FE_Q_Bubbles (const Quadrature<1> &points);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_Q_Bubbles<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * Interpolate a set of scalar values, computed in the generalized support
+   * points.
+   */
+  virtual void interpolate(std::vector<double>       &local_dofs,
+                           const std::vector<double> &values) const;
+
+  /**
+   * Interpolate a set of vector values, computed in the generalized support
+   * points.
+   *
+   * Since a finite element often only interpolates part of a vector,
+   * <tt>offset</tt> is used to determine the first component of the vector to
+   * be interpolated. Maybe consider changing your data structures to use the
+   * next function.
+   */
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<Vector<double> > &values,
+                           unsigned int offset = 0) const;
+
+  /**
+   * Interpolate a set of vector values, computed in the generalized support
+   * points.
+   */
+  virtual void interpolate(
+    std::vector<double>          &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one.  The size of the matrix is then @p dofs_per_cell times
+   * <tt>source.dofs_per_cell</tt>.
+   *
+   * These matrices are only available if the source element is also a @p
+   * FE_Q_Bubbles element. Otherwise, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                            FullMatrix<double>       &matrix) const;
+
+  virtual const FullMatrix<double> &
+  get_prolongation_matrix  (const unsigned int child,
+                            const RefinementCase<dim> &refinement_case) const;
+
+  virtual const FullMatrix<double> &
+  get_restriction_matrix  (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case) const;
+
+  /**
+   * Check for non-zero values on a face.
+   *
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero values on the face @p face_index.
+   *
+   * Implementation of the interface in FiniteElement
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+protected:
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+private:
+
+  /**
+   * Returns the restriction_is_additive flags. Only the last components for
+   * the bubble enrichments are true.
+   */
+  static std::vector<bool> get_riaf_vector(const unsigned int degree);
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector(const unsigned int degree);
+
+  /**
+   * Number of additional bubble functions
+   */
+  const unsigned int n_bubbles;
+};
+
+
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_q_dg0.h b/include/deal.II/fe/fe_q_dg0.h
new file mode 100644
index 0000000..605217a
--- /dev/null
+++ b/include/deal.II/fe/fe_q_dg0.h
@@ -0,0 +1,351 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__fe_q_dg0_h
+#define dealii__fe_q_dg0_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials_const.h>
+#include <deal.II/fe/fe_q_base.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of a scalar Lagrange finite element @p Qp+DG0 that yields
+ * the finite element space of continuous, piecewise polynomials of degree @p
+ * p in each coordinate direction plus the space of locally constant
+ * functions. This class is realized using tensor product polynomials based on
+ * equidistant or given support points.
+ *
+ * The standard constructor of this class takes the degree @p p of this finite
+ * element. Alternatively, it can take a quadrature formula @p points defining
+ * the support points of the Lagrange interpolation in one coordinate
+ * direction.
+ *
+ * For more information about the <tt>spacedim</tt> template parameter check
+ * the documentation of FiniteElement or the one of Triangulation.
+ *
+ * For more information regarding this element see: Boffi, D., et al. "Local
+ * Mass Conservation of Stokes Finite Elements." Journal of Scientific
+ * Computing (2012): 1-18.
+ *
+ * <h3>Implementation</h3>
+ *
+ * The constructor creates a TensorProductPolynomials object that includes the
+ * tensor product of @p LagrangeEquidistant polynomials of degree @p p plus
+ * the locally constant function. This @p TensorProductPolynomialsConst object
+ * provides all values and derivatives of the shape functions. In case a
+ * quadrature rule is given, the constructor creates a
+ * TensorProductPolynomialsConst object that includes the tensor product of @p
+ * Lagrange polynomials with the support points from @p points and a locally
+ * constant function.
+ *
+ * Furthermore the constructor fills the @p interface_constrains, the @p
+ * prolongation (embedding) and the @p restriction matrices.
+ *
+ * <h3>Numbering of the degrees of freedom (DoFs)</h3>
+ *
+ * The original ordering of the shape functions represented by the
+ * TensorProductPolynomialsConst is a tensor product numbering. However, the
+ * shape functions on a cell are renumbered beginning with the shape functions
+ * whose support points are at the vertices, then on the line, on the quads,
+ * and finally (for 3d) on the hexes. Finally there is a support point for the
+ * discontinuous shape function in the middle of the cell. To be explicit,
+ * these numberings are listed in the following:
+ *
+ * <h4>Q1 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0---2---1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2-------3
+ *      |       |
+ *      |   5   |
+ *      |       |
+ *      0-------1
+ *   @endverbatim
+ *
+ * <li> 3D case:
+ *   @verbatim
+ *         6-------7        6-------7
+ *        /|       |       /       /|
+ *       / |       |      /       / |
+ *      /  |       |     /       /  |
+ *     4   |  8    |    4-------5   |
+ *     |   2-------3    |       |   3
+ *     |  /       /     |       |  /
+ *     | /       /      |       | /
+ *     |/       /       |       |/
+ *     0-------1        0-------1
+ *   @endverbatim
+ *
+ * The respective coordinate values of the support points of the degrees of
+ * freedom are as follows:
+ * <ul>
+ * <li> Index 0: <tt>[ 0,  0, 0]</tt>;
+ * <li> Index 1: <tt>[ 1,  0, 0]</tt>;
+ * <li> Index 2: <tt>[ 0,  1, 0]</tt>;
+ * <li> Index 3: <tt>[ 1,  1, 0]</tt>;
+ * <li> Index 4: <tt>[ 0,  0, 1]</tt>;
+ * <li> Index 5: <tt>[ 1,  0, 1]</tt>;
+ * <li> Index 6: <tt>[ 0,  1, 1]</tt>;
+ * <li> Index 7: <tt>[ 1,  1, 1]</tt>;
+ * <li> Index 8: <tt>[1/2, 1/2, 1/2]</tt>;
+ * </ul>
+ * </ul>
+ * <h4>Q2 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0---2---1
+ *   @endverbatim
+ * Index 3 has the same coordinates as index 2
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2---7---3
+ *      |       |
+ *      4   8   5
+ *      |       |
+ *      0---6---1
+ *   @endverbatim
+ * Index 9 has the same coordinates as index 2
+ *
+ * <li> 3D case:
+ *   @verbatim
+ *         6--15---7        6--15---7
+ *        /|       |       /       /|
+ *      12 |       19     12      1319
+ *      /  18      |     /       /  |
+ *     4   |       |    4---14--5   |
+ *     |   2---11--3    |       |   3
+ *     |  /       /     |      17  /
+ *    16 8       9     16       | 9
+ *     |/       /       |       |/
+ *     0---10--1        0---8---1
+ *
+ *         *-------*        *-------*
+ *        /|       |       /       /|
+ *       / |  23   |      /  25   / |
+ *      /  |       |     /       /  |
+ *     *   |       |    *-------*   |
+ *     |20 *-------*    |       |21 *
+ *     |  /       /     |   22  |  /
+ *     | /  24   /      |       | /
+ *     |/       /       |       |/
+ *     *-------*        *-------*
+ *   @endverbatim
+ * The center vertices have number 26 and 27.
+ *
+ * The respective coordinate values of the support points of the degrees of
+ * freedom are as follows:
+ * <ul>
+ * <li> Index 0: <tt>[0, 0, 0]</tt>;
+ * <li> Index 1: <tt>[1, 0, 0]</tt>;
+ * <li> Index 2: <tt>[0, 1, 0]</tt>;
+ * <li> Index 3: <tt>[1, 1, 0]</tt>;
+ * <li> Index 4: <tt>[0, 0, 1]</tt>;
+ * <li> Index 5: <tt>[1, 0, 1]</tt>;
+ * <li> Index 6: <tt>[0, 1, 1]</tt>;
+ * <li> Index 7: <tt>[1, 1, 1]</tt>;
+ * <li> Index 8: <tt>[0, 1/2, 0]</tt>;
+ * <li> Index 9: <tt>[1, 1/2, 0]</tt>;
+ * <li> Index 10: <tt>[1/2, 0, 0]</tt>;
+ * <li> Index 11: <tt>[1/2, 1, 0]</tt>;
+ * <li> Index 12: <tt>[0, 1/2, 1]</tt>;
+ * <li> Index 13: <tt>[1, 1/2, 1]</tt>;
+ * <li> Index 14: <tt>[1/2, 0, 1]</tt>;
+ * <li> Index 15: <tt>[1/2, 1, 1]</tt>;
+ * <li> Index 16: <tt>[0, 0, 1/2]</tt>;
+ * <li> Index 17: <tt>[1, 0, 1/2]</tt>;
+ * <li> Index 18: <tt>[0, 1, 1/2]</tt>;
+ * <li> Index 19: <tt>[1, 1, 1/2]</tt>;
+ * <li> Index 20: <tt>[0, 1/2, 1/2]</tt>;
+ * <li> Index 21: <tt>[1, 1/2, 1/2]</tt>;
+ * <li> Index 22: <tt>[1/2, 0, 1/2]</tt>;
+ * <li> Index 23: <tt>[1/2, 1, 1/2]</tt>;
+ * <li> Index 24: <tt>[1/2, 1/2, 0]</tt>;
+ * <li> Index 25: <tt>[1/2, 1/2, 1]</tt>;
+ * <li> Index 26: <tt>[1/2, 1/2, 1/2]</tt>;
+ * <li> Index 27: <tt>[1/2, 1/2, 1/2]</tt>;
+ * </ul>
+ * </ul>
+ * <h4>Q3 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0--2-4-3--1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2--10-11-3
+ *      |        |
+ *      5  14 15 7
+ *      |    16  |
+ *      4  12 13 6
+ *      |        |
+ *      0--8--9--1
+ *   @endverbatim
+ * </ul>
+ * <h4>Q4 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0--2--3--4--1
+ *   @endverbatim
+ * Index 5 has the same coordinates as index 3
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2--13-14-15-3
+ *      |           |
+ *      6  22 23 24 9
+ *      |           |
+ *      5  19 20 21 8
+ *      |           |
+ *      4  16 17 18 7
+ *      |           |
+ *      0--10-11-12-1
+ *   @endverbatim
+ * Index 21 has the same coordinates as index 20
+ * </ul>
+ *
+ */
+template <int dim, int spacedim=dim>
+class FE_Q_DG0 : public FE_Q_Base<TensorProductPolynomialsConst<dim>,dim,spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree @p p plus locally
+   * constant functions.
+   */
+  FE_Q_DG0 (const unsigned int p);
+
+  /**
+   * Constructor for tensor product polynomials with support points @p points
+   * plus locally constant functions based on a one-dimensional quadrature
+   * formula. The degree of the finite element is <tt>points.size()-1</tt>.
+   * Note that the first point has to be 0 and the last one 1.
+   */
+  FE_Q_DG0 (const Quadrature<1> &points);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_Q_DG0<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * Interpolate a set of scalar values, computed in the generalized support
+   * points.
+   */
+  virtual void interpolate(std::vector<double>       &local_dofs,
+                           const std::vector<double> &values) const;
+
+  /**
+   * Interpolate a set of vector values, computed in the generalized support
+   * points.
+   *
+   * Since a finite element often only interpolates part of a vector,
+   * <tt>offset</tt> is used to determine the first component of the vector to
+   * be interpolated. Maybe consider changing your data structures to use the
+   * next function.
+   */
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<Vector<double> > &values,
+                           unsigned int offset = 0) const;
+
+  /**
+   * Interpolate a set of vector values, computed in the generalized support
+   * points.
+   */
+  virtual void interpolate(
+    std::vector<double>          &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one.  The size of the matrix is then @p dofs_per_cell times
+   * <tt>source.dofs_per_cell</tt>.
+   *
+   * These matrices are only available if the source element is also a @p
+   * FE_Q_DG0 element. Otherwise, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                            FullMatrix<double>       &matrix) const;
+
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, there
+   * are two constant modes despite the element is scalar: The first constant
+   * mode is all ones for the usual FE_Q basis and the second one only using
+   * the discontinuous part.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+protected:
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+private:
+
+  /**
+   * Returns the restriction_is_additive flags. Only the last component is
+   * true.
+   */
+  static std::vector<bool> get_riaf_vector(const unsigned int degree);
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector(const unsigned int degree);
+};
+
+
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_q_hierarchical.h b/include/deal.II/fe/fe_q_hierarchical.h
new file mode 100644
index 0000000..f3a78cc
--- /dev/null
+++ b/include/deal.II/fe/fe_q_hierarchical.h
@@ -0,0 +1,838 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_q_hierarchical_h
+#define dealii__fe_q_hierarchical_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/fe/fe_poly.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of hierarchical @p Qp shape functions that yield the finite
+ * element space of continuous, piecewise polynomials of degree @p p. This
+ * class is realized using tensor product polynomials based on a hierarchical
+ * basis Polynomials::Hierarchical on the interval <tt>[0,1]</tt> which is
+ * suitable for building an @p hp tensor product finite element if we assume
+ * that each element has a single degree.
+ *
+ * The constructor of this class takes the degree @p p of this finite element.
+ *
+ * This class is not implemented for the codimension one case (<tt>spacedim !=
+ * dim</tt>).
+ *
+ * <h3>Implementation</h3>
+ *
+ * The constructor creates a TensorProductPolynomials object that includes the
+ * tensor product of @p Hierarchical polynomials of degree @p p. This @p
+ * TensorProductPolynomials object provides all values and derivatives of the
+ * shape functions.
+ *
+ * <h3>Numbering of the degrees of freedom (DoFs)</h3>
+ *
+ * The original ordering of the shape functions represented by the
+ * TensorProductPolynomials is a tensor product numbering. However, the shape
+ * functions on a cell are renumbered beginning with the shape functions whose
+ * support points are at the vertices, then on the line, on the quads, and
+ * finally (for 3d) on the hexes. To be explicit, these numberings are listed
+ * in the following:
+ *
+ * <h4>Q1 elements</h4>
+ *
+ * The $Q_1^H$ element is of polynomial degree one and, consequently, is
+ * exactly the same as the $Q_1$ element in class FE_Q. In particular, the
+ * shape function are defined in the exact same way:
+ *
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0-------1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2-------3
+ *      |       |
+ *      |       |
+ *      |       |
+ *      0-------1
+ *   @endverbatim
+ *
+ * <li> 3D case:
+ *   @verbatim
+ *         6-------7        6-------7
+ *        /|       |       /       /|
+ *       / |       |      /       / |
+ *      /  |       |     /       /  |
+ *     4   |       |    4-------5   |
+ *     |   2-------3    |       |   3
+ *     |  /       /     |       |  /
+ *     | /       /      |       | /
+ *     |/       /       |       |/
+ *     0-------1        0-------1
+ *   @endverbatim
+ *
+ * The respective coordinate values of the support points of the degrees of
+ * freedom are as follows:
+ * <ul>
+ * <li> Shape function 0: <tt>[0, 0, 0]</tt>;
+ * <li> Shape function 1: <tt>[1, 0, 0]</tt>;
+ * <li> Shape function 2: <tt>[0, 1, 0]</tt>;
+ * <li> Shape function 3: <tt>[1, 1, 0]</tt>;
+ * <li> Shape function 4: <tt>[0, 0, 1]</tt>;
+ * <li> Shape function 5: <tt>[1, 0, 1]</tt>;
+ * <li> Shape function 6: <tt>[0, 1, 1]</tt>;
+ * <li> Shape function 7: <tt>[1, 1, 1]</tt>;
+ * </ul>
+ * </ul>
+ *
+ * In 2d, these shape functions look as follows: <table> <tr> <td
+ * align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q1/Q1H_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q1/Q1H_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_1^H$ element, shape function 0
+ * </td>
+ *
+ * <td align="center"> $Q_1^H$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q1/Q1H_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q1/Q1H_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_1^H$ element, shape function 2
+ * </td>
+ *
+ * <td align="center"> $Q_1^H$ element, shape function 3 </td> </tr> </table>
+ *
+ *
+ * <h4>Q2 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0---2---1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2---7---3
+ *      |       |
+ *      4   8   5
+ *      |       |
+ *      0---6---1
+ *   @endverbatim
+ *
+ * <li> 3D case:
+ *   @verbatim
+ *         6--15---7        6--15---7
+ *        /|       |       /       /|
+ *      12 |       19     12      1319
+ *      /  18      |     /       /  |
+ *     4   |       |    4---14--5   |
+ *     |   2---11--3    |       |   3
+ *     |  /       /     |      17  /
+ *    16 8       9     16       | 9
+ *     |/       /       |       |/
+ *     0---10--1        0---8---1
+ *
+ *         *-------*        *-------*
+ *        /|       |       /       /|
+ *       / |  23   |      /  25   / |
+ *      /  |       |     /       /  |
+ *     *   |       |    *-------*   |
+ *     |20 *-------*    |       |21 *
+ *     |  /       /     |   22  |  /
+ *     | /  24   /      |       | /
+ *     |/       /       |       |/
+ *     *-------*        *-------*
+ *   @endverbatim
+ * The center vertex has number 26.
+ *
+ * The respective coordinate values of the support points of the degrees of
+ * freedom are as follows:
+ * <ul>
+ * <li> Shape function 0: <tt>[0, 0, 0]</tt>;
+ * <li> Shape function 1: <tt>[1, 0, 0]</tt>;
+ * <li> Shape function 2: <tt>[0, 1, 0]</tt>;
+ * <li> Shape function 3: <tt>[1, 1, 0]</tt>;
+ * <li> Shape function 4: <tt>[0, 0, 1]</tt>;
+ * <li> Shape function 5: <tt>[1, 0, 1]</tt>;
+ * <li> Shape function 6: <tt>[0, 1, 1]</tt>;
+ * <li> Shape function 7: <tt>[1, 1, 1]</tt>;
+ * <li> Shape function 8: <tt>[0, 1/2, 0]</tt>;
+ * <li> Shape function 9: <tt>[1, 1/2, 0]</tt>;
+ * <li> Shape function 10: <tt>[1/2, 0, 0]</tt>;
+ * <li> Shape function 11: <tt>[1/2, 1, 0]</tt>;
+ * <li> Shape function 12: <tt>[0, 1/2, 1]</tt>;
+ * <li> Shape function 13: <tt>[1, 1/2, 1]</tt>;
+ * <li> Shape function 14: <tt>[1/2, 0, 1]</tt>;
+ * <li> Shape function 15: <tt>[1/2, 1, 1]</tt>;
+ * <li> Shape function 16: <tt>[0, 0, 1/2]</tt>;
+ * <li> Shape function 17: <tt>[1, 0, 1/2]</tt>;
+ * <li> Shape function 18: <tt>[0, 1, 1/2]</tt>;
+ * <li> Shape function 19: <tt>[1, 1, 1/2]</tt>;
+ * <li> Shape function 20: <tt>[0, 1/2, 1/2]</tt>;
+ * <li> Shape function 21: <tt>[1, 1/2, 1/2]</tt>;
+ * <li> Shape function 22: <tt>[1/2, 0, 1/2]</tt>;
+ * <li> Shape function 23: <tt>[1/2, 1, 1/2]</tt>;
+ * <li> Shape function 24: <tt>[1/2, 1/2, 0]</tt>;
+ * <li> Shape function 25: <tt>[1/2, 1/2, 1]</tt>;
+ * <li> Shape function 26: <tt>[1/2, 1/2, 1/2]</tt>;
+ * </ul>
+ * </ul>
+ *
+ *
+ * In 2d, these shape functions look as follows (the black plane corresponds
+ * to zero; negative shape function values may not be visible): <table> <tr>
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_2^H$ element, shape function 0
+ * </td>
+ *
+ * <td align="center"> $Q_2^H$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_2^H$ element, shape function 2
+ * </td>
+ *
+ * <td align="center"> $Q_2^H$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $Q_2^H$ element, shape function 4
+ * </td>
+ *
+ * <td align="center"> $Q_2^H$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $Q_2^H$ element, shape function 6
+ * </td>
+ *
+ * <td align="center"> $Q_2^H$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q2/Q2H_shape0008.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $Q_2^H$ element,
+ * shape function 8 </td>
+ *
+ * <td align="center"> </td> </tr> </table>
+ *
+ *
+ * <h4>Q3 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0--2--3--1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2--10-11-3
+ *      |        |
+ *      5  14 15 7
+ *      |        |
+ *      4  12 13 6
+ *      |        |
+ *      0--8--9--1
+ *   @endverbatim
+ * </ul>
+ *
+ * In 2d, these shape functions look as follows (the black plane corresponds
+ * to zero; negative shape function values may not be visible): <table> <tr>
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 0
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 2
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 4
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 6
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 8
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 9 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0010.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0011.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 10
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 11 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0012.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0013.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 12
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 13 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0014.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q3/Q3H_shape0015.png
+ * </td> </tr> <tr> <td align="center"> $Q_3^H$ element, shape function 14
+ * </td>
+ *
+ * <td align="center"> $Q_3^H$ element, shape function 15 </td> </tr> </table>
+ *
+ *
+ * <h4>Q4 elements</h4>
+ * <ul>
+ * <li> 1D case:
+ *   @verbatim
+ *      0--2--3--4--1
+ *   @endverbatim
+ *
+ * <li> 2D case:
+ *   @verbatim
+ *      2--13-14-15-3
+ *      |           |
+ *      6  22 23 24 9
+ *      |           |
+ *      5  19 20 21 8
+ *      |           |
+ *      4  16 17 18 7
+ *      |           |
+ *      0--10-11-12-1
+ *   @endverbatim
+ * </ul>
+ *
+ * In 2d, these shape functions look as follows (the black plane corresponds
+ * to zero; negative shape function values may not be visible): <table> <tr>
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0000.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0001.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 0
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 1 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0002.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0003.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 2
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 3 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0004.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0005.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 4
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 5 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0006.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0007.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 6
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 7 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0008.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0009.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 8
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 9 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0010.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0011.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 10
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 11 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0012.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0013.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 12
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 13 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0014.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0015.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 14
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 15 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0016.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0017.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 16
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 17 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0018.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0019.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 18
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 19 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0020.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0021.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 20
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 21 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0022.png
+ * </td>
+ *
+ * <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0023.png
+ * </td> </tr> <tr> <td align="center"> $Q_4^H$ element, shape function 22
+ * </td>
+ *
+ * <td align="center"> $Q_4^H$ element, shape function 23 </td> </tr>
+ *
+ * <tr> <td align="center">
+ * @image html http://www.dealii.org/images/shape-functions/hierarchical/Q4/Q4H_shape0024.png
+ * </td>
+ *
+ * <td align="center"> </td> </tr> <tr> <td align="center"> $Q_4^H$ element,
+ * shape function 24 </td>
+ *
+ * <td align="center"> </td> </tr> </table>
+ *
+ *
+ *
+ * @author Brian Carnes, 2002, Ralf Hartmann 2004, 2005, Denis Davydov, 2015
+ */
+template <int dim>
+class FE_Q_Hierarchical : public FE_Poly<TensorProductPolynomials<dim>,dim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree @p p.
+   */
+  FE_Q_Hierarchical (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_Q_Hierarchical<dim>(degree)</tt>, with @p dim and @p
+   * degree replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * For the FE_Q_Hierarchical class the result is always true (independent of
+   * the degree of the element), as it implements the complete set of
+   * functions necessary for hp capability.
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one. Interpolation only between FE_Q_Hierarchical is supported.
+   */
+  virtual void get_interpolation_matrix(const FiniteElement< dim> &source,
+                                        FullMatrix< double > &matrix) const;
+
+  /**
+   * Embedding matrix between grids. Only isotropic refinement is supported.
+   */
+  virtual const
+  FullMatrix<double> &get_prolongation_matrix  (const unsigned int child,
+                                                const RefinementCase<dim> &refinement_case = RefinementCase< dim >::isotropic_refinement) const;
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Same as above but for lines.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Same as above but for faces.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  /*@}*/
+
+  /**
+   * Return the matrix interpolating from a face of one element to the face of
+   * the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * <tt>FiniteElement<dim>::ExcInterpolationNotImplemented</tt>.
+   */
+  virtual void get_face_interpolation_matrix (const FiniteElement<dim> &source, FullMatrix<double> &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of one element to the subface
+   * of the neighboring element. The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Derived elements will have to implement this function. They may only
+   * provide interpolation matrices for certain source finite elements, for
+   * example those from the same family. If they don't implement interpolation
+   * from a given element, then they must throw an exception of type
+   * <tt>ExcInterpolationNotImplemented</tt>.
+   */
+  virtual void get_subface_interpolation_matrix (const FiniteElement<dim> &source, const unsigned int subface, FullMatrix<double> &matrix) const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim> &fe_other) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since finite element objects are usually
+   * accessed through pointers to their base class, rather than the class
+   * itself.
+   */
+  virtual std::size_t memory_consumption () const;
+
+  /**
+   * For a finite element of degree @p sub_degree < @p degree, we return a
+   * vector which maps the numbering on an FE of degree @p sub_degree into the
+   * numbering on this element.
+   */
+  std::vector<unsigned int> get_embedding_dofs (const unsigned int sub_degree) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, the
+   * list consists of true arguments for the first vertex shape functions and
+   * false for the remaining ones.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  /**
+   * This function is not implemented and throws an exception if called.
+   */
+  virtual
+  void interpolate(std::vector<double>       &local_dofs,
+                   const std::vector<double> &values) const;
+
+  /**
+   * This function is not implemented and throws an exception if called.
+   */
+  virtual
+  void
+  interpolate(std::vector<double>                &local_dofs,
+              const std::vector<Vector<double> > &values,
+              unsigned int offset = 0) const;
+
+  /**
+   * This function is not implemented and throws an exception if called.
+   */
+  virtual
+  void
+  interpolate(std::vector<double> &local_dofs,
+              const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+
+
+protected:
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim> *clone() const;
+
+private:
+
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int> get_dpo_vector(const unsigned int degree);
+
+  /**
+   * The numbering of the degrees of freedom in continuous finite elements is
+   * hierarchic, i.e. in such a way that we first number the vertex dofs, in
+   * the order of the vertices as defined by the triangulation, then the line
+   * dofs in the order and respecting the direction of the lines, then the
+   * dofs on quads, etc.
+   *
+   * The dofs associated with 1d hierarchical polynomials are ordered with the
+   * vertices first ($phi_0(x)=1-x$ and $phi_1(x)=x$) and then the line dofs
+   * (the higher degree polynomials).  The 2d and 3d hierarchical polynomials
+   * originate from the 1d hierarchical polynomials by tensor product. In the
+   * following, the resulting numbering of dofs will be denoted by
+   * `fe_q_hierarchical numbering`.
+   *
+   * This function constructs a table which fe_q_hierarchical index each
+   * degree of freedom in the hierarchic numbering would have.
+   *
+   * This function is analogous to the
+   * FETools::hierarchical_to_lexicographic_numbering() function. However, in
+   * contrast to the fe_q_hierarchical numbering defined above, the
+   * lexicographic numbering originates from the tensor products of
+   * consecutive numbered dofs (like for LagrangeEquidistant).
+   *
+   * It is assumed that the size of the output argument already matches the
+   * correct size, which is equal to the number of degrees of freedom in the
+   * finite element.
+   */
+  static
+  std::vector<unsigned int> hierarchic_to_fe_q_hierarchical_numbering (
+    const FiniteElementData<dim> &fe);
+
+  /**
+   * This is an analogon to the previous function, but working on faces.
+   */
+  static
+  std::vector<unsigned int>
+  face_fe_q_hierarchical_to_hierarchic_numbering (const unsigned int degree);
+
+  /**
+   * Initialize two auxiliary fields that will be used in setting up the
+   * various matrices in the constructor.
+   */
+  void build_dofs_cell (std::vector<FullMatrix<double> > &dofs_cell,
+                        std::vector<FullMatrix<double> > &dofs_subcell) const;
+
+  /**
+   * Initialize the hanging node constraints matrices. Called from the
+   * constructor.
+   */
+  void initialize_constraints (const std::vector<FullMatrix<double> > &dofs_subcell);
+
+  /**
+   * Initialize the embedding matrices. Called from the constructor.
+   */
+  void initialize_embedding_and_restriction (const std::vector<FullMatrix<double> > &dofs_cell,
+                                             const std::vector<FullMatrix<double> > &dofs_subcell);
+
+  /**
+   * Initialize the @p unit_support_points field of the FiniteElement class.
+   * Called from the constructor.
+   */
+  void initialize_unit_support_points ();
+
+  /**
+   * Initialize the @p unit_face_support_points field of the FiniteElement
+   * class. Called from the constructor.
+   */
+  void initialize_unit_face_support_points ();
+
+  /**
+   * Mapping from lexicographic to shape function numbering on first face.
+   */
+  const std::vector<unsigned int> face_renumber;
+
+  /**
+   * Allow access from other dimensions. We need this since we want to call
+   * the functions @p get_dpo_vector and @p
+   * lexicographic_to_hierarchic_numbering for the faces of the finite element
+   * of dimension dim+1.
+   */
+  template <int dim1> friend class FE_Q_Hierarchical;
+};
+
+/*@}*/
+
+/* -------------- declaration of explicit specializations ------------- */
+
+template <>
+void FE_Q_Hierarchical<1>::initialize_unit_face_support_points ();
+
+template <>
+bool
+FE_Q_Hierarchical<1>::has_support_on_face (const unsigned int,
+                                           const unsigned int) const;
+
+template <>
+std::vector<unsigned int>
+FE_Q_Hierarchical<1>::face_fe_q_hierarchical_to_hierarchic_numbering (const unsigned int);
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_q_iso_q1.h b/include/deal.II/fe/fe_q_iso_q1.h
new file mode 100644
index 0000000..333f25e
--- /dev/null
+++ b/include/deal.II/fe/fe_q_iso_q1.h
@@ -0,0 +1,159 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_q_iso_q1_h
+#define dealii__fe_q_iso_q1_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/polynomials_piecewise.h>
+#include <deal.II/fe/fe_q_base.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of a scalar Lagrange finite element @p Qp-iso-Q1 that
+ * defines the finite element space of continuous, piecewise linear elements
+ * with @p p subdivisions in each coordinate direction. It yields an element
+ * with the same number of degrees of freedom as the @p Qp elements but using
+ * linear interpolation instead of higher order one. This type of element is
+ * also called macro element in the literature as it really consists of
+ * several smaller elements, namely <i>p</i><tt><sup>dim</sup></tt>.
+ *
+ * The numbering of degrees of freedom is done in exactly the same way as in
+ * FE_Q of degree @p p. See there for a detailed description on how degrees of
+ * freedom are numbered within one element.
+ *
+ * This element represents a Q-linear finite element space on a reduced mesh
+ * size <i>h/p</i>. Its effect is equivalent to using FE_Q of degree one on a
+ * finer mesh by a factor @p p if an equivalent quadrature is used. However,
+ * this element reduces the flexibility in the choice of (adaptive) mesh size
+ * by exactly this factor @p p, which typically reduces efficiency. On the
+ * other hand, comparing this element with @p p subdivisions to the FE_Q
+ * element of degree @p p on the same mesh shows that the convergence is
+ * typically much worse for smooth problems. In particular, @p Qp elements
+ * achieve interpolation orders of <i>h<sup>p+1</sup></i> in the L2 norm,
+ * whereas these elements reach only <i>(h/p)<sup>2</sup></i>. For these two
+ * reasons, this element is usually not very useful as a standalone. In
+ * addition, any evaluation of face terms on the boundaries within the
+ * elements becomes impossible with this element.
+ *
+ * Nonetheless, there are a few use cases where this element actually is
+ * useful:
+ * <ol>
+ *
+ * <li> Systems of PDEs where certain variables demand for higher resolutions
+ * than the others and the additional degrees of freedom should be spend on
+ * increasing the resolution of linears instead of higher order polynomials,
+ * and you do not want to use two different meshes for the different
+ * components. This can be the case when irregularities (shocks) appear in the
+ * solution and stabilization techniques are used that work for linears but
+ * not higher order elements. </li>
+ *
+ * <li> Stokes/Navier Stokes systems as the one discussed in step-22 could be
+ * solved with Q2-iso-Q1 elements for velocities instead of Q2 elements.
+ * Combined with Q1 pressures they give a stable mixed element pair. However,
+ * they perform worse than the standard approach in most situations.  </li>
+ *
+ * <li> Preconditioning systems of FE_Q systems of higher order @p p with a
+ * preconditioner based on @p Qp-iso-Q1 elements: Some preconditioners like
+ * algebraic multigrid perform much better with linear elements than with
+ * higher order elements because they often implicitly assume a sparse
+ * connectivity between entries. Then, creating a preconditioner matrix based
+ * on these elements yields the same number of degrees of freedom (and a
+ * spectrally equivalent linear system), which can be combined with a (high
+ * order) system matrix in an iterative solver like CG.  </li>
+ * </ol>
+ *
+ * <h3>Appropriate integration</h3>
+ *
+ * Due to the nature of these elements as a concatenation of linears, care
+ * must be taken when selecting quadrature formulas for this element. The
+ * standard choice for an element of @p p subelements is a formula
+ * <tt>QIterated<dim>(QGauss<1>(2), p)</tt>, which corresponds to the formula
+ * that would be used for integrating functions on a finer mesh. This is in
+ * contrast with FE_Q(p) where QGauss<dim>(p+1) is the default choice. In
+ * particular, care must be taken to not use a quadrature formula that
+ * evaluates the basis functions (and their derivatives) on sub-element
+ * boundaries as the gradients of piecewiese functions on internal boundaries
+ * are set to zero. No checks are performed internally to ensure that this is
+ * not the case - it is the user's responsibility to avoid these situations.
+ *
+ * Also note that the usual deal.II routines for setting up sparsity patterns
+ * and assembling matrices do not make use of the increased sparsity in this
+ * element compared to FE_Q. This is because DoFTools::make_sparsity_pattern
+ * assumes coupling between all degrees of freedom within the element, whereas
+ * FE_Q_iso_Q1 with more than one subdivision does have less coupling.
+ *
+ * @author Martin Kronbichler, 2013
+ */
+template <int dim, int spacedim=dim>
+class FE_Q_iso_Q1 : public FE_Q_Base<TensorProductPolynomials<dim, Polynomials::PiecewisePolynomial<double> >,dim,spacedim>
+{
+public:
+  /**
+   * Constructs a FE_Q_iso_Q1 element with a given number of subdivisions. The
+   * number of subdivision is similar to the degree in FE_Q in the sense that
+   * both elements produce the same number of degrees of freedom.
+   */
+  FE_Q_iso_Q1(const unsigned int n_subdivisions);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_Q_iso_q1<dim>(equivalent_degree)</tt>, with @p dim and @p
+   * equivalent_degree replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+  //@}
+
+protected:
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+};
+
+
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_rannacher_turek.h b/include/deal.II/fe/fe_rannacher_turek.h
new file mode 100644
index 0000000..17dbd0f
--- /dev/null
+++ b/include/deal.II/fe/fe_rannacher_turek.h
@@ -0,0 +1,109 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__fe_rannacher_turek_h
+#define dealii__fe_rannacher_turek_h
+
+#include <deal.II/base/polynomials_rannacher_turek.h>
+#include <deal.II/fe/fe_poly.h>
+#include <deal.II/fe/fe_base.h>
+#include <string>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * Implementation of Rannacher-Turek elements. Functions generated by this
+ * element will be discontinuous, but their jump along faces is mean value
+ * free.
+ *
+ * Implemented only in dimension 2, lowest order, without hanging nodes and
+ * restriction/prolongation.
+ *
+ * <h3>Interpolation</h3>
+ *
+ * <h4>Node values</h4> The
+ * @ref GlossNodes "node values"
+ * are moments on faces.
+ *
+ * <h4>Generalized support points</h4> To calculate the node values, we are
+ * using a QGauss rule on each face. By default, we are using a two point rule
+ * to integrate Rannacher-Turek functions exactly. But in order to be able to
+ * interpolate other functions with sufficient accuracy, the number of
+ * quadrature points used on a face can be adjusted in the constructor.
+ *
+ * @ingroup fe
+ * @author Patrick Esser
+ * @date 2015
+ */
+template <int dim>
+class FE_RannacherTurek : public FE_Poly<PolynomialsRannacherTurek<dim>, dim>
+{
+public:
+  /**
+   * Constructor for Rannacher-Turek element of degree @p degree, using @p
+   * n_face_support_points quadrature points on each face for interpolation.
+   * Notice that the element of degree 0 contains polynomials of degree 2.
+   *
+   * Only implemented for degree 0 in 2D.
+   */
+  FE_RannacherTurek(const unsigned int degree = 0,
+                    const unsigned int n_face_support_points = 2);
+
+  virtual std::string get_name() const;
+  virtual FiniteElement<dim> *clone() const;
+
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const std::vector<double> &values) const;
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const std::vector<Vector<double> > &values,
+    unsigned int offset) const;
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+private:
+  /**
+   * Degree of this element.
+   */
+  const unsigned int degree;
+  /**
+   * The number of quadrature points used on each face to evaluate node
+   * functionals during interpolation.
+   */
+  const unsigned int n_face_support_points;
+  /**
+   * The weights used on the faces to evaluate node functionals.
+   */
+  std::vector<double> weights;
+
+  /**
+   * Compute generalized support points and their weights.
+   */
+  void initialize_support_points();
+  /**
+   * Return information about degrees of freedom per object as needed during
+   * construction.
+   */
+  std::vector<unsigned int> get_dpo_vector();
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_raviart_thomas.h b/include/deal.II/fe/fe_raviart_thomas.h
new file mode 100644
index 0000000..0ab401d
--- /dev/null
+++ b/include/deal.II/fe/fe_raviart_thomas.h
@@ -0,0 +1,348 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_raviart_thomas_h
+#define dealii__fe_raviart_thomas_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/polynomials_raviart_thomas.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_poly_tensor.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class MappingQ;
+
+
+/*!@addtogroup fe */
+/*@{*/
+
+/**
+ * Implementation of Raviart-Thomas (RT) elements, conforming with the space
+ * H<sup>div</sup>. These elements generate vector fields with normal
+ * components continuous between mesh cells.
+ *
+ * We follow the usual definition of the degree of RT elements, which denotes
+ * the polynomial degree of the largest complete polynomial subspace contained
+ * in the RT space. Then, approximation order of the function itself is
+ * <i>degree+1</i>, as with usual polynomial spaces. The numbering so chosen
+ * implies the sequence
+ * @f[
+ *   Q_{k+1}
+ *   \stackrel{\text{grad}}{\rightarrow}
+ *   \text{Nedelec}_k
+ *   \stackrel{\text{curl}}{\rightarrow}
+ *   \text{RaviartThomas}_k
+ *   \stackrel{\text{div}}{\rightarrow}
+ *   DGQ_{k}
+ * @f]
+ * The lowest order element is consequently FE_RaviartThomas(0).
+ *
+ * This class is not implemented for the codimension one case (<tt>spacedim !=
+ * dim</tt>).
+ *
+ * @todo Even if this element is implemented for two and three space
+ * dimensions, the definition of the node values relies on consistently
+ * oriented faces in 3D. Therefore, care should be taken on complicated
+ * meshes.
+ *
+ * <h3>Interpolation</h3>
+ *
+ * The
+ * @ref GlossInterpolation "interpolation"
+ * operators associated with the RT element are constructed such that
+ * interpolation and computing the divergence are commuting operations. We
+ * require this from interpolating arbitrary functions as well as the
+ * #restriction matrices.  It can be achieved by two interpolation schemes,
+ * the simplified one in FE_RaviartThomasNodal and the original one here:
+ *
+ * <h4>Node values on edges/faces</h4>
+ *
+ * On edges or faces, the
+ * @ref GlossNodes "node values"
+ * are the moments of the normal component of the interpolated function with
+ * respect to the traces of the RT polynomials. Since the normal trace of the
+ * RT space of degree <i>k</i> on an edge/face is the space
+ * <i>Q<sub>k</sub></i>, the moments are taken with respect to this space.
+ *
+ * <h4>Interior node values</h4>
+ *
+ * Higher order RT spaces have interior nodes. These are moments taken with
+ * respect to the gradient of functions in <i>Q<sub>k</sub></i> on the cell
+ * (this space is the matching space for RT<sub>k</sub> in a mixed
+ * formulation).
+ *
+ * <h4>Generalized support points</h4>
+ *
+ * The node values above rely on integrals, which will be computed by
+ * quadrature rules themselves. The generalized support points are a set of
+ * points such that this quadrature can be performed with sufficient accuracy.
+ * The points needed are those of QGauss<sub>k+1</sub> on each face as well as
+ * QGauss<sub>k</sub> in the interior of the cell (or none for
+ * RT<sub>0</sub>).
+ *
+ *
+ * @author Guido Kanschat, 2005, based on previous Work by Wolfgang Bangerth
+ */
+template <int dim>
+class FE_RaviartThomas
+  :
+  public FE_PolyTensor<PolynomialsRaviartThomas<dim>, dim>
+{
+public:
+  /**
+   * Constructor for the Raviart-Thomas element of degree @p p.
+   */
+  FE_RaviartThomas (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_RaviartThomas<dim>(degree)</tt>, with @p dim and @p degree
+   * replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   *
+   * Right now, this is only implemented for RT0 in 1D. Otherwise, returns
+   * always @p true.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<double> &values) const;
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<Vector<double> > &values,
+                           unsigned int offset = 0) const;
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+
+  /**
+   * Returns a list of constant modes of the element. This method is currently
+   * not correctly implemented because it returns ones for all components.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  virtual std::size_t memory_consumption () const;
+  virtual FiniteElement<dim> *clone() const;
+
+private:
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Initialize the @p generalized_support_points field of the FiniteElement
+   * class and fill the tables with interpolation weights (#boundary_weights
+   * and #interior_weights). Called from the constructor.
+   */
+  void initialize_support_points (const unsigned int rt_degree);
+
+  /**
+   * Initialize the interpolation from functions on refined mesh cells onto
+   * the father cell. According to the philosophy of the Raviart-Thomas
+   * element, this restriction operator preserves the divergence of a function
+   * weakly.
+   */
+  void initialize_restriction ();
+
+  /**
+   * These are the factors multiplied to a function in the
+   * #generalized_face_support_points when computing the integration. They are
+   * organized such that there is one row for each generalized face support
+   * point and one column for each degree of freedom on the face.
+   *
+   * See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+   * for more information.
+   */
+  Table<2, double> boundary_weights;
+
+  /**
+   * Precomputed factors for interpolation of interior degrees of freedom. The
+   * rationale for this Table is the same as for #boundary_weights. Only, this
+   * table has a third coordinate for the space direction of the component
+   * evaluated.
+   */
+  Table<3, double> interior_weights;
+
+  /**
+   * Allow access from other dimensions.
+   */
+  template <int dim1> friend class FE_RaviartThomas;
+};
+
+
+
+/**
+ * The Raviart-Thomas elements with node functionals defined as point values
+ * in Gauss points.
+ *
+ * <h3>Description of node values</h3>
+ *
+ * For this Raviart-Thomas element, the node values are not cell and face
+ * moments with respect to certain polynomials, but the values in quadrature
+ * points. Following the general scheme for numbering degrees of freedom, the
+ * node values on edges are first, edge by edge, according to the natural
+ * ordering of the edges of a cell. The interior degrees of freedom are last.
+ *
+ * For an RT-element of degree <i>k</i>, we choose <i>(k+1)<sup>d-1</sup></i>
+ * Gauss points on each face. These points are ordered lexicographically with
+ * respect to the orientation of the face. This way, the normal component
+ * which is in <i>Q<sub>k</sub></i> is uniquely determined. Furthermore, since
+ * this Gauss-formula is exact on <i>Q<sub>2k+1</sub></i>, these node values
+ * correspond to the exact integration of the moments of the RT-space.
+ *
+ * In the interior of the cells, the moments are with respect to an
+ * anisotropic <i>Q<sub>k</sub></i> space, where the test functions are one
+ * degree lower in the direction corresponding to the vector component under
+ * consideration. This is emulated by using an anisotropic Gauss formula for
+ * integration.
+ *
+ * @todo The current implementation is for Cartesian meshes only. You must use
+ * MappingCartesian.
+ *
+ * @todo Even if this element is implemented for two and three space
+ * dimensions, the definition of the node values relies on consistently
+ * oriented faces in 3D. Therefore, care should be taken on complicated
+ * meshes.
+ *
+ * @note The degree stored in the member variable
+ * FiniteElementData<dim>::degree is higher by one than the constructor
+ * argument!
+ *
+ * @author Guido Kanschat, 2005, Zhu Liang, 2008
+ */
+template <int dim>
+class FE_RaviartThomasNodal
+  :
+  public FE_PolyTensor<PolynomialsRaviartThomas<dim>, dim>
+{
+public:
+  /**
+   * Constructor for the Raviart-Thomas element of degree @p p.
+   */
+  FE_RaviartThomasNodal (const unsigned int p);
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_RaviartThomasNodal<dim>(degree)</tt>, with @p dim and @p
+   * degree replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  virtual FiniteElement<dim> *clone () const;
+
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<double> &values) const;
+  virtual void interpolate(std::vector<double>                &local_dofs,
+                           const std::vector<Vector<double> > &values,
+                           unsigned int offset = 0) const;
+  virtual void interpolate(
+    std::vector<double> &local_dofs,
+    const VectorSlice<const std::vector<std::vector<double> > > &values) const;
+
+
+  virtual void get_face_interpolation_matrix (const FiniteElement<dim> &source,
+                                              FullMatrix<double>       &matrix) const;
+
+  virtual void get_subface_interpolation_matrix (const FiniteElement<dim> &source,
+                                                 const unsigned int        subface,
+                                                 FullMatrix<double>       &matrix) const;
+  virtual bool hp_constraints_are_implemented () const;
+
+  virtual std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  virtual std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  virtual std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim> &fe_other) const;
+
+  virtual FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim> &fe_other) const;
+
+private:
+  /**
+   * Only for internal use. Its full name is @p get_dofs_per_object_vector
+   * function and it creates the @p dofs_per_object vector that is needed
+   * within the constructor to be passed to the constructor of @p
+   * FiniteElementData.
+   */
+  static std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree);
+
+  /**
+   * Compute the vector used for the @p restriction_is_additive field passed
+   * to the base class's constructor.
+   */
+  static std::vector<bool>
+  get_ria_vector (const unsigned int degree);
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   *
+   * Right now, this is only implemented for RT0 in 1D. Otherwise, returns
+   * always @p true.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+  /**
+   * Initialize the FiniteElement<dim>::generalized_support_points and
+   * FiniteElement<dim>::generalized_face_support_points fields. Called from
+   * the constructor.
+   *
+   * See the
+   * @ref GlossGeneralizedSupport "glossary entry on generalized support points"
+   * for more information.
+   */
+  void initialize_support_points (const unsigned int rt_degree);
+};
+
+
+/*@}*/
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <>
+void
+FE_RaviartThomas<1>::initialize_restriction();
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_system.h b/include/deal.II/fe/fe_system.h
new file mode 100644
index 0000000..8c936a7
--- /dev/null
+++ b/include/deal.II/fe/fe_system.h
@@ -0,0 +1,1088 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_system_h
+#define dealii__fe_system_h
+
+
+/*----------------------------   fe_system.h     ---------------------------*/
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/fe/fe.h>
+#include <vector>
+#include <utility>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * This class provides an interface to group several elements together into
+ * one. To the outside world, the resulting object looks just like a usual
+ * finite element object, which is composed of several other finite elements
+ * that are possibly of different type. The result is then a vector-valued
+ * finite element. %Vector valued elements are discussed in a number of
+ * tutorial programs, for example step-8, step-20, step-21, and in particular
+ * in the
+ * @ref vector_valued
+ * module.
+ *
+ * @dealiiVideoLecture{19,20}
+ *
+ * <h3>FESystem, components and blocks</h3>
+ *
+ * An FESystem, except in the most trivial case, produces a vector-valued
+ * finite element with several components. The number of components
+ * n_components() corresponds to the dimension of the solution function in the
+ * PDE system, and correspondingly also to the number of equations your PDE
+ * system has. For example, the mixed Laplace system covered in step-20 has
+ * $d+1$ components in $d$ space dimensions: the scalar pressure and the $d$
+ * components of the velocity vector. Similarly, the elasticity equation
+ * covered in step-8 has $d$ components in $d$ space dimensions. In general,
+ * the number of components of a FESystem element is the accumulated number of
+ * components of all base elements times their multiplicities. A bit more on
+ * components is also given in the
+ * @ref GlossComponent "glossary entry on components".
+ *
+ * While the concept of components is important from the viewpoint of a
+ * partial differential equation, the finite element side looks a bit
+ * different Since not only FESystem, but also vector-valued elements like
+ * FE_RaviartThomas, have several components. The concept needed here is a
+ * @ref GlossBlock "block".
+ * Each block encompasses the set of degrees of freedom associated with a
+ * single base element of an FESystem, where base elements with multiplicities
+ * count multiple times. These blocks are usually addressed using the
+ * information in DoFHandler::block_info(). The number of blocks of a FESystem
+ * object is simply the sum of all multiplicities of base elements and is
+ * given by n_blocks().
+ *
+ * For example, the FESystem for the Taylor-Hood element for the three-
+ * dimensional Stokes problem can be built using the code
+ *
+ * @code
+ * FE_Q<3> u(2);
+ * FE_Q<3> p(1);
+ * FESystem<3> sys1(u,3, p,1);
+ * @endcode
+ *
+ * This example creates an FESystem @p sys1 with four components, three for
+ * the velocity components and one for the pressure, and also four blocks with
+ * the degrees of freedom of each of the velocity components and the pressure
+ * in a separate block each. The number of blocks is four since the first base
+ * element is repeated three times.
+ *
+ * On the other hand, a Taylor-Hood element can also be constructed using
+ *
+ * @code
+ * FESystem<3> U(u,3);
+ * FESystem<3> sys2(U,1, p,1);
+ * @endcode
+ *
+ * The FESystem @p sys2 created here has the same four components, but the
+ * degrees of freedom are distributed into only two blocks. The first block
+ * has all velocity degrees of freedom from @p U, while the second block
+ * contains the pressure degrees of freedom. Note that while @p U itself has 3
+ * blocks, the FESystem @p sys2 does not attempt to split @p U into its base
+ * elements but considers it a block of its own. By blocking all velocities
+ * into one system first as in @p sys2, we achieve the same block structure
+ * that would be generated if instead of using a $Q_2^3$ element for the
+ * velocities we had used vector-valued base elements, for instance like using
+ * a mixed discretization of Darcy's law using
+ *
+ * @code
+ * FE_RaviartThomas<3> u(1);
+ * FE_DGQ<3> p(1);
+ * FESystem<3> sys3(u,1, p,1);
+ * @endcode
+ *
+ * This example also produces a system with four components, but only two
+ * blocks.
+ *
+ * In most cases, the composed element behaves as if it were a usual element.
+ * It just has more degrees of freedom than most of the "common" elements.
+ * However the underlying structure is visible in the restriction,
+ * prolongation and interface constraint matrices, which do not couple the
+ * degrees of freedom of the base elements. E.g. the continuity requirement is
+ * imposed for the shape functions of the subobjects separately; no
+ * requirement exist between shape functions of different subobjects, i.e. in
+ * the above example: on a hanging node, the respective value of the @p u
+ * velocity is only coupled to @p u at the vertices and the line on the larger
+ * cell next to this vertex, but there is no interaction with @p v and @p w of
+ * this or the other cell.
+ *
+ *
+ * <h3>Internal information on numbering of degrees of freedom</h3>
+ *
+ * The overall numbering of degrees of freedom is as follows: for each
+ * subobject (vertex, line, quad, or hex), the degrees of freedom are numbered
+ * such that we run over all subelements first, before turning for the next
+ * dof on this subobject or for the next subobject. For example, for an
+ * element of three components in one space dimension, the first two
+ * components being cubic lagrange elements and the third being a quadratic
+ * lagrange element, the ordering for the system <tt>s=(u,v,p)</tt> is:
+ *
+ * <ul>
+ * <li> First vertex: <tt>u0, v0, p0 = s0, s1, s2</tt>
+ * <li> Second vertex: <tt>u1, v1, p1 = s3, s4, s5</tt>
+ * <li> First component on the line: <tt>u2, u3 = s4, s5</tt>
+ * <li> Second component on the line: <tt>v2, v3 = s6, s7</tt>.
+ * <li> Third component on the line: <tt>p2 = s8</tt>.
+ * </ul>
+ * That said, you should not rely on this numbering in your application as
+ * these %internals might change in future. Rather use the functions
+ * system_to_component_index() and component_to_system_index().
+ *
+ * For more information on the template parameter <tt>spacedim</tt> see the
+ * documentation of Triangulation.
+ *
+ * @ingroup febase fe vector_valued
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, 1999, 2002, 2003, 2006, Ralf
+ * Hartmann 2001.
+ */
+template <int dim, int spacedim=dim>
+class FESystem : public FiniteElement<dim,spacedim>
+{
+public:
+
+  /**
+   * Constructor. Take a finite element and the number of elements you want to
+   * group together using this class.
+   *
+   * The object @p fe is not actually used for anything other than creating a
+   * copy that will then be owned by the current object. In other words, it is
+   * completely fine to call this constructor with a temporary object for the
+   * finite element, as in this code snippet:
+   * @code
+   *   FESystem<dim> fe (FE_Q<dim>(2), 2);
+   * @endcode
+   * Here, <code>FE_Q@<dim@>(2)</code> constructs an unnamed, temporary object
+   * that is passed to the FESystem constructor to create a finite element
+   * that consists of two components, both of which are quadratic FE_Q
+   * elements. The temporary is destroyed again at the end of the code that
+   * corresponds to this line, but this does not matter because FESystem
+   * creates its own copy of the FE_Q object.
+   *
+   * This constructor (or its variants below) is used in essentially all
+   * tutorial programs that deal with vector valued problems. See step-8,
+   * step-20, step-22 and others for use cases. Also see the module on
+   * @ref vector_valued "Handling vector valued problems".
+   *
+   * @dealiiVideoLecture{19,20}
+   *
+   * @param[in] fe The finite element that will be used to represent the
+   * components of this composed element.
+   * @param[in] n_elements An integer denoting how many copies of @p fe this
+   * element should consist of.
+   */
+  FESystem (const FiniteElement<dim,spacedim> &fe,
+            const unsigned int n_elements);
+
+  /**
+   * Constructor for mixed discretizations with two base elements.
+   *
+   * See the other constructor above for an explanation of the general idea of
+   * composing elements.
+   */
+  FESystem (const FiniteElement<dim,spacedim> &fe1, const unsigned int n1,
+            const FiniteElement<dim,spacedim> &fe2, const unsigned int n2);
+
+  /**
+   * Constructor for mixed discretizations with three base elements.
+   *
+   * See the other constructor above for an explanation of the general idea of
+   * composing elements.
+   */
+  FESystem (const FiniteElement<dim,spacedim> &fe1, const unsigned int n1,
+            const FiniteElement<dim,spacedim> &fe2, const unsigned int n2,
+            const FiniteElement<dim,spacedim> &fe3, const unsigned int n3);
+
+  /**
+   * Constructor for mixed discretizations with four base elements.
+   *
+   * See the first of the other constructors above for an explanation of the
+   * general idea of composing elements.
+   */
+  FESystem (const FiniteElement<dim,spacedim> &fe1, const unsigned int n1,
+            const FiniteElement<dim,spacedim> &fe2, const unsigned int n2,
+            const FiniteElement<dim,spacedim> &fe3, const unsigned int n3,
+            const FiniteElement<dim,spacedim> &fe4, const unsigned int n4);
+
+  /**
+   * Constructor for mixed discretizations with five base elements.
+   *
+   * See the first of the other constructors above for an explanation of the
+   * general idea of composing elements.
+   */
+  FESystem (const FiniteElement<dim,spacedim> &fe1, const unsigned int n1,
+            const FiniteElement<dim,spacedim> &fe2, const unsigned int n2,
+            const FiniteElement<dim,spacedim> &fe3, const unsigned int n3,
+            const FiniteElement<dim,spacedim> &fe4, const unsigned int n4,
+            const FiniteElement<dim,spacedim> &fe5, const unsigned int n5);
+
+  /**
+   * Same as above but for any number of base elements. Pointers to the base
+   * elements and their multiplicities are passed as vectors to this
+   * constructor. The lengths of these vectors are assumed to be equal.
+   *
+   * As above, the finite element objects pointed to by the first argument are
+   * not actually used other than to create copies internally. Consequently,
+   * you can delete these pointers immediately again after calling this
+   * constructor.
+   *
+   * <h4>How to use this constructor</h4>
+   *
+   * Using this constructor is a bit awkward at times because you need to pass
+   * two vectors in a place where it may not be straightforward to construct
+   * such a vector -- for example, in the member initializer list of a class
+   * with an FESystem member variable. For example, if your main class looks
+   * like this:
+   * @code
+   *   template <int dim>
+   *   class MySimulator {
+   *   public:
+   *     MySimulator (const unsigned int polynomial_degree);
+   *   private:
+   *     FESystem<dim> fe;
+   *   };
+   *
+   *   template <int dim>
+   *   MySimulator<dim>::MySimulator (const unsigned int polynomial_degree)
+   *     :
+   *     fe (...)  // what to pass here???
+   *   {}
+   * @endcode
+   *
+   * If your compiler supports the C++11 language standard (or later) and
+   * deal.II has been configured to use it, then you could do something like
+   * this to create an element with four base elements and multiplicities 1,
+   * 2, 3 and 4:
+   * @code
+   *   template <int dim>
+   *   MySimulator<dim>::MySimulator (const unsigned int polynomial_degree)
+   *     :
+   *     fe (std::vector<const FiniteElement<dim>*> { new FE_Q<dim>(1),
+   *                                                  new FE_Q<dim>(2),
+   *                                                  new FE_Q<dim>(3),
+   *                                                  new FE_Q<dim>(4) },
+   *         std::vector<unsigned int> { 1, 2, 3, 4 })
+   *   {}
+   * @endcode
+   * This creates two vectors in place and initializes them using the
+   * initializer list enclosed in braces <code>{ ... }</code>.
+   *
+   * This code has a problem: it creates four memory leaks because the first
+   * vector above is created with pointers to elements that are allocated with
+   * <code>new</code> but never destroyed. Without C++11, you have another
+   * problem: brace-initializer don't exist in earlier C++ standards.
+   *
+   * The solution to the second of these problems is to create two static
+   * member functions that can create vectors. Here is an example:
+   * @code
+   *   template <int dim>
+   *   class MySimulator {
+   *   public:
+   *     MySimulator (const unsigned int polynomial_degree);
+   *
+   *   private:
+   *     FESystem<dim> fe;
+   *
+   *     static std::vector<const FiniteElement<dim>*>
+   *     create_fe_list (const unsigned int polynomial_degree);
+   *
+   *     static std::vector<unsigned int>
+   *     create_fe_multiplicities ();
+   *   };
+   *
+   *   template <int dim>
+   *   std::vector<const FiniteElement<dim>*>
+   *   MySimulator<dim>::create_fe_list (const unsigned int polynomial_degree)
+   *   {
+   *     std::vector<const FiniteElement<dim>*> fe_list;
+   *     fe_list.push_back (new FE_Q<dim>(1));
+   *     fe_list.push_back (new FE_Q<dim>(2));
+   *     fe_list.push_back (new FE_Q<dim>(3));
+   *     fe_list.push_back (new FE_Q<dim>(4));
+   *     return fe_list;
+   *   }
+   *
+   *   template <int dim>
+   *   std::vector<unsigned int>
+   *   MySimulator<dim>::create_fe_multiplicities ()
+   *   {
+   *     std::vector<unsigned int> multiplicities;
+   *     multiplicities.push_back (1);
+   *     multiplicities.push_back (2);
+   *     multiplicities.push_back (3);
+   *     multiplicities.push_back (4);
+   *     return multiplicities;
+   *   }
+   *
+   *   template <int dim>
+   *   MySimulator<dim>::MySimulator (const unsigned int polynomial_degree)
+   *     :
+   *     fe (create_fe_list (polynomial_degree),
+   *         create_fe_multiplicities ())
+   *   {}
+   * @endcode
+   *
+   * The way this works is that we have two static member functions that
+   * create the necessary vectors to pass to the constructor of the member
+   * variable <code>fe</code>. They need to be static because they are called
+   * during the constructor of <code>MySimulator</code> at a time when the
+   * <code>*this</code> object isn't fully constructed and, consequently,
+   * regular member functions cannot be called yet.
+   *
+   * The code above does not solve the problem with the memory leak yet,
+   * though: the <code>create_fe_list()</code> function creates a vector of
+   * pointers, but nothing destroys these. This is the solution:
+   * @code
+   *   template <int dim>
+   *   class MySimulator {
+   *   public:
+   *     MySimulator (const unsigned int polynomial_degree);
+   *
+   *   private:
+   *     FESystem<dim> fe;
+   *
+   *     struct VectorElementDestroyer {
+   *       const std::vector<const FiniteElement<dim>*> data;
+   *       VectorElementDestroyer (const std::vector<const FiniteElement<dim>*> &pointers);
+   *       ~VectorElementDestroyer (); // destructor to delete the pointers
+   *       const std::vector<const FiniteElement<dim>*> & get_data () const;
+   *     };
+   *
+   *     static std::vector<const FiniteElement<dim>*>
+   *     create_fe_list (const unsigned int polynomial_degree);
+   *
+   *     static std::vector<unsigned int>
+   *     create_fe_multiplicities ();
+   *   };
+   *
+   *   template <int dim>
+   *   MySimulator<dim>::VectorElementDestroyer::
+   *   VectorElementDestroyer (const std::vector<const FiniteElement<dim>*> &pointers)
+   *     : data(pointers)
+   *   {}
+   *
+   *   template <int dim>
+   *   MySimulator<dim>::VectorElementDestroyer::
+   *   ~VectorElementDestroyer ()
+   *   {
+   *     for (unsigned int i=0; i<data.size(); ++i)
+   *       delete data[i];
+   *   }
+   *
+   *   template <int dim>
+   *   const std::vector<const FiniteElement<dim>*> &
+   *   MySimulator<dim>::VectorElementDestroyer::
+   *   get_data () const
+   *   {
+   *     return data;
+   *   }
+   *
+   *
+   *   template <int dim>
+   *   MySimulator<dim>::MySimulator (const unsigned int polynomial_degree)
+   *     :
+   *     fe (VectorElementDestroyer(create_fe_list (polynomial_degree)).get_data(),
+   *         create_fe_multiplicities ())
+   *   {}
+   * @endcode
+   *
+   * In other words, the vector we receive from the
+   * <code>create_fe_list()</code> is packed into a temporary object of type
+   * <code>VectorElementDestroyer</code>; we then get the vector from this
+   * temporary object immediately to pass it to the constructor of
+   * <code>fe</code>; and finally, the <code>VectorElementDestroyer</code>
+   * destructor is called at the end of the entire expression (after the
+   * constructor of <code>fe</code> has finished) and destroys the elements of
+   * the temporary vector. Voila: not short nor elegant, but it works!
+   */
+  FESystem (const std::vector<const FiniteElement<dim,spacedim>*> &fes,
+            const std::vector<unsigned int>                   &multiplicities);
+
+  /**
+   * Destructor.
+   */
+  virtual ~FESystem ();
+
+  /**
+   * Return a string that uniquely identifies a finite element. This element
+   * returns a string that is composed of the strings @p name1... at p nameN
+   * returned by the basis elements. From these, we create a sequence
+   * <tt>FESystem<dim>[name1^m1-name2^m2-...-nameN^mN]</tt>, where @p mi are
+   * the multiplicities of the basis elements. If a multiplicity is equal to
+   * one, then the superscript is omitted.
+   */
+  virtual std::string get_name () const;
+
+  // for documentation, see the FiniteElement base class
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  /**
+   * Return the value of the @p ith shape function at the point @p p.  @p p is
+   * a point on the reference element. Since this finite element is always
+   * vector-valued, we return the value of the only non-zero component of the
+   * vector value of this shape function. If the shape function has more than
+   * one non-zero component (which we refer to with the term non-primitive),
+   * then throw an exception of type @p ExcShapeFunctionNotPrimitive.
+   *
+   * An @p ExcUnitShapeValuesDoNotExist is thrown if the shape values of the
+   * @p FiniteElement (corresponding to the @p ith shape function) depend on
+   * the shape of the cell in real space.
+   */
+  virtual double shape_value (const unsigned int i,
+                              const Point<dim> &p) const;
+
+  /**
+   * Return the value of the @p componentth vector component of the @p ith
+   * shape function at the point @p p. See the FiniteElement base class for
+   * more information about the semantics of this function.
+   *
+   * Since this element is vector valued in general, it relays the computation
+   * of these values to the base elements.
+   */
+  virtual double shape_value_component (const unsigned int i,
+                                        const Point<dim> &p,
+                                        const unsigned int component) const;
+
+  /**
+   * Return the gradient of the @p ith shape function at the point @p p. @p p
+   * is a point on the reference element, and likewise the gradient is the
+   * gradient on the unit cell with respect to unit cell coordinates. Since
+   * this finite element is always vector-valued, we return the value of the
+   * only non-zero component of the vector value of this shape function. If
+   * the shape function has more than one non-zero component (which we refer
+   * to with the term non-primitive), then throw an exception of type @p
+   * ExcShapeFunctionNotPrimitive.
+   *
+   * An @p ExcUnitShapeValuesDoNotExist is thrown if the shape values of the
+   * @p FiniteElement (corresponding to the @p ith shape function) depend on
+   * the shape of the cell in real space.
+   */
+  virtual Tensor<1,dim> shape_grad (const unsigned int  i,
+                                    const Point<dim>   &p) const;
+
+  /**
+   * Return the gradient of the @p componentth vector component of the @p ith
+   * shape function at the point @p p. See the FiniteElement base class for
+   * more information about the semantics of this function.
+   *
+   * Since this element is vector valued in general, it relays the computation
+   * of these values to the base elements.
+   */
+  virtual Tensor<1,dim> shape_grad_component (const unsigned int i,
+                                              const Point<dim> &p,
+                                              const unsigned int component) const;
+
+  /**
+   * Return the tensor of second derivatives of the @p ith shape function at
+   * point @p p on the unit cell. The derivatives are derivatives on the unit
+   * cell with respect to unit cell coordinates. Since this finite element is
+   * always vector-valued, we return the value of the only non-zero component
+   * of the vector value of this shape function. If the shape function has
+   * more than one non-zero component (which we refer to with the term non-
+   * primitive), then throw an exception of type @p
+   * ExcShapeFunctionNotPrimitive.
+   *
+   * An @p ExcUnitShapeValuesDoNotExist is thrown if the shape values of the
+   * @p FiniteElement (corresponding to the @p ith shape function) depend on
+   * the shape of the cell in real space.
+   */
+  virtual Tensor<2,dim> shape_grad_grad (const unsigned int  i,
+                                         const Point<dim> &p) const;
+
+  /**
+   * Return the second derivatives of the @p componentth vector component of
+   * the @p ith shape function at the point @p p. See the FiniteElement base
+   * class for more information about the semantics of this function.
+   *
+   * Since this element is vector valued in general, it relays the computation
+   * of these values to the base elements.
+   */
+  virtual
+  Tensor<2,dim>
+  shape_grad_grad_component (const unsigned int i,
+                             const Point<dim> &p,
+                             const unsigned int component) const;
+
+  /**
+   * Return the tensor of third derivatives of the @p ith shape function at
+   * point @p p on the unit cell. The derivatives are derivatives on the unit
+   * cell with respect to unit cell coordinates. Since this finite element is
+   * always vector-valued, we return the value of the only non-zero component
+   * of the vector value of this shape function. If the shape function has
+   * more than one non-zero component (which we refer to with the term non-
+   * primitive), then throw an exception of type @p
+   * ExcShapeFunctionNotPrimitive.
+   *
+   * An @p ExcUnitShapeValuesDoNotExist is thrown if the shape values of the
+   * @p FiniteElement (corresponding to the @p ith shape function) depend on
+   * the shape of the cell in real space.
+   */
+  virtual Tensor<3,dim> shape_3rd_derivative (const unsigned int  i,
+                                              const Point<dim>   &p) const;
+
+  /**
+   * Return the third derivatives of the @p componentth vector component of
+   * the @p ith shape function at the point @p p. See the FiniteElement base
+   * class for more information about the semantics of this function.
+   *
+   * Since this element is vector valued in general, it relays the computation
+   * of these values to the base elements.
+   */
+  virtual Tensor<3,dim> shape_3rd_derivative_component (const unsigned int i,
+                                                        const Point<dim>   &p,
+                                                        const unsigned int component) const;
+
+  /**
+   * Return the tensor of fourth derivatives of the @p ith shape function at
+   * point @p p on the unit cell. The derivatives are derivatives on the unit
+   * cell with respect to unit cell coordinates. Since this finite element is
+   * always vector-valued, we return the value of the only non-zero component
+   * of the vector value of this shape function. If the shape function has
+   * more than one non-zero component (which we refer to with the term non-
+   * primitive), then throw an exception of type @p
+   * ExcShapeFunctionNotPrimitive.
+   *
+   * An @p ExcUnitShapeValuesDoNotExist is thrown if the shape values of the
+   * @p FiniteElement (corresponding to the @p ith shape function) depend on
+   * the shape of the cell in real space.
+   */
+  virtual Tensor<4,dim> shape_4th_derivative (const unsigned int  i,
+                                              const Point<dim>   &p) const;
+
+  /**
+   * Return the fourth derivatives of the @p componentth vector component of
+   * the @p ith shape function at the point @p p. See the FiniteElement base
+   * class for more information about the semantics of this function.
+   *
+   * Since this element is vector valued in general, it relays the computation
+   * of these values to the base elements.
+   */
+  virtual Tensor<4,dim> shape_4th_derivative_component (const unsigned int i,
+                                                        const Point<dim>   &p,
+                                                        const unsigned int component) const;
+
+  /**
+   * Return the matrix interpolating from the given finite element to the
+   * present one. The size of the matrix is then @p dofs_per_cell times
+   * <tt>source.dofs_per_cell</tt>.
+   *
+   * These matrices are available if source and destination element are both
+   * @p FESystem elements, have the same number of base elements with same
+   * element multiplicity, and if these base elements also implement their @p
+   * get_interpolation_matrix functions. Otherwise, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                            FullMatrix<double>           &matrix) const;
+
+  /**
+   * Access to a composing element. The index needs to be smaller than the
+   * number of base elements. Note that the number of base elements may in
+   * turn be smaller than the number of components of the system element, if
+   * the multiplicities are greater than one.
+   */
+  virtual const FiniteElement<dim,spacedim> &
+  base_element (const unsigned int index) const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Projection from a fine grid space onto a coarse grid space. Overrides the
+   * respective method in FiniteElement, implementing lazy evaluation
+   * (initialize when requested).
+   *
+   * If this projection operator is associated with a matrix @p P, then the
+   * restriction of this matrix @p P_i to a single child cell is returned
+   * here.
+   *
+   * The matrix @p P is the concatenation or the sum of the cell matrices @p
+   * P_i, depending on the #restriction_is_additive_flags. This distinguishes
+   * interpolation (concatenation) and projection with respect to scalar
+   * products (summation).
+   *
+   * Row and column indices are related to coarse grid and fine grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * If projection matrices are not implemented in the derived finite element
+   * class, this function aborts with an exception of type
+   * FiniteElement::ExcProjectionVoid. You can check whether this would happen
+   * by first calling the restriction_is_implemented() or the
+   * isotropic_restriction_is_implemented() function.
+   */
+  virtual const FullMatrix<double> &
+  get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Embedding matrix between grids. Overrides the respective method in
+   * FiniteElement, implementing lazy evaluation (initialize when queried).
+   *
+   * The identity operator from a coarse grid space into a fine grid space is
+   * associated with a matrix @p P. The restriction of this matrix @p P_i to a
+   * single child cell is returned here.
+   *
+   * The matrix @p P is the concatenation, not the sum of the cell matrices @p
+   * P_i. That is, if the same non-zero entry <tt>j,k</tt> exists in in two
+   * different child matrices @p P_i, the value should be the same in both
+   * matrices and it is copied into the matrix @p P only once.
+   *
+   * Row and column indices are related to fine grid and coarse grid spaces,
+   * respectively, consistent with the definition of the associated operator.
+   *
+   * These matrices are used by routines assembling the prolongation matrix
+   * for multi-level methods.  Upon assembling the transfer matrix between
+   * cells using this matrix array, zero elements in the prolongation matrix
+   * are discarded and will not fill up the transfer matrix.
+   *
+   * If prolongation matrices are not implemented in one of the base finite
+   * element classes, this function aborts with an exception of type
+   * FiniteElement::ExcEmbeddingVoid. You can check whether this would happen
+   * by first calling the prolongation_is_implemented() or the
+   * isotropic_prolongation_is_implemented() function.
+   */
+  virtual const FullMatrix<double> &
+  get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case=RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Given an index in the natural ordering of indices on a face, return the
+   * index of the same degree of freedom on the cell.
+   *
+   * To explain the concept, consider the case where we would like to know
+   * whether a degree of freedom on a face, for example as part of an FESystem
+   * element, is primitive. Unfortunately, the is_primitive() function in the
+   * FiniteElement class takes a cell index, so we would need to find the cell
+   * index of the shape function that corresponds to the present face index.
+   * This function does that.
+   *
+   * Code implementing this would then look like this:
+   * @code
+   * for (i=0; i<dofs_per_face; ++i)
+   *  if (fe.is_primitive(fe.face_to_equivalent_cell_index(i, some_face_no)))
+   *   ... do whatever
+   * @endcode
+   * The function takes additional arguments that account for the fact that
+   * actual faces can be in their standard ordering with respect to the cell
+   * under consideration, or can be flipped, oriented, etc.
+   *
+   * @param face_dof_index The index of the degree of freedom on a face. This
+   * index must be between zero and dofs_per_face.
+   * @param face The number of the face this degree of freedom lives on. This
+   * number must be between zero and GeometryInfo::faces_per_cell.
+   * @param face_orientation One part of the description of the orientation of
+   * the face. See
+   * @ref GlossFaceOrientation.
+   * @param face_flip One part of the description of the orientation of the
+   * face. See
+   * @ref GlossFaceOrientation.
+   * @param face_rotation One part of the description of the orientation of
+   * the face. See
+   * @ref GlossFaceOrientation.
+   * @return The index of this degree of freedom within the set of degrees of
+   * freedom on the entire cell. The returned value will be between zero and
+   * dofs_per_cell.
+   */
+  virtual
+  unsigned int face_to_cell_index (const unsigned int face_dof_index,
+                                   const unsigned int face,
+                                   const bool face_orientation = true,
+                                   const bool face_flip        = false,
+                                   const bool face_rotation    = false) const;
+
+  /**
+   * Implementation of the respective function in the base class.
+   */
+  virtual
+  Point<dim>
+  unit_support_point (const unsigned int index) const;
+
+  /**
+   * Implementation of the respective function in the base class.
+   */
+  virtual
+  Point<dim-1>
+  unit_face_support_point (const unsigned int index) const;
+
+  /**
+   * Returns a list of constant modes of the element. The returns table has as
+   * many rows as there are components in the element and dofs_per_cell
+   * columns. To each component of the finite element, the row in the returned
+   * table contains a basis representation of the constant function 1 on the
+   * element. Concatenates the constant modes of each base element.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  /**
+   * @name Functions to support hp
+   * @{
+   */
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   *
+   * This function returns @p true iff all its base elements return @p true
+   * for this function.
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Base elements of this element will have to implement this function. They
+   * may only provide interpolation matrices for certain source finite
+   * elements, for example those from the same family. If they don't implement
+   * interpolation from a given element, then they must throw an exception of
+   * type FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented, which
+   * will get propagated out from this element.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the
+   * subface of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>.
+   *
+   * Base elements of this element will have to implement this function. They
+   * may only provide interpolation matrices for certain source finite
+   * elements, for example those from the same family. If they don't implement
+   * interpolation from a given element, then they must throw an exception of
+   * type FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented, which
+   * will get propagated out from this element.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * If, on a vertex, several finite elements are active, the hp code first
+   * assigns the degrees of freedom of each of these FEs different global
+   * indices. It then calls this function to find out which of them should get
+   * identical values, and consequently can receive the same global DoF index.
+   * This function therefore returns a list of identities between DoFs of the
+   * present finite element object with the DoFs of @p fe_other, which is a
+   * reference to a finite element object representing one of the other finite
+   * elements active on this particular vertex. The function computes which of
+   * the degrees of freedom of the two finite element objects are equivalent,
+   * both numbered between zero and the corresponding value of dofs_per_vertex
+   * of the two finite elements. The first index of each pair denotes one of
+   * the vertex dofs of the present element, whereas the second is the
+   * corresponding index of the other finite element.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on lines.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Same as hp_vertex_dof_indices(), except that the function treats degrees
+   * of freedom on quads.
+   */
+  virtual
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+  //@}
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since finite element objects are usually
+   * accessed through pointers to their base class, rather than the class
+   * itself.
+   */
+  virtual std::size_t memory_consumption () const;
+
+protected:
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+
+  virtual typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_data (const UpdateFlags                                                    update_flags,
+            const Mapping<dim,spacedim>                                         &mapping,
+            const Quadrature<dim>                                               &quadrature,
+            dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_face_data (const UpdateFlags                                                    update_flags,
+                 const Mapping<dim,spacedim>                                         &mapping,
+                 const Quadrature<dim-1>                                             &quadrature,
+                 dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  typename FiniteElement<dim,spacedim>::InternalDataBase *
+  get_subface_data (const UpdateFlags                                                    update_flags,
+                    const Mapping<dim,spacedim>                                         &mapping,
+                    const Quadrature<dim-1>                                             &quadrature,
+                    dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                  const CellSimilarity::Similarity                                     cell_similarity,
+                  const Quadrature<dim>                                               &quadrature,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                  const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                  const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                       const unsigned int                                                   face_no,
+                       const Quadrature<dim-1>                                             &quadrature,
+                       const Mapping<dim,spacedim>                                         &mapping,
+                       const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                       const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                       const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                       dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  virtual
+  void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                          const unsigned int                                                   face_no,
+                          const unsigned int                                                   sub_no,
+                          const Quadrature<dim-1>                                             &quadrature,
+                          const Mapping<dim,spacedim>                                         &mapping,
+                          const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                          const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                          const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * Do the work for the three <tt>fill_fe*_values</tt> functions.
+   *
+   * Calls (among other things) <tt>fill_fe_([sub]face)_values</tt> of the
+   * base elements. Calls @p fill_fe_values if
+   * <tt>face_no==invalid_face_no</tt> and <tt>sub_no==invalid_face_no</tt>;
+   * calls @p fill_fe_face_values if <tt>face_no==invalid_face_no</tt> and
+   * <tt>sub_no!=invalid_face_no</tt>; and calls @p fill_fe_subface_values if
+   * <tt>face_no!=invalid_face_no</tt> and <tt>sub_no!=invalid_face_no</tt>.
+   */
+  template <int dim_1>
+  void compute_fill (const Mapping<dim,spacedim>                      &mapping,
+                     const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                     const unsigned int                                face_no,
+                     const unsigned int                                sub_no,
+                     const Quadrature<dim_1>                          &quadrature,
+                     const CellSimilarity::Similarity                   cell_similarity,
+                     const typename Mapping<dim,spacedim>::InternalDataBase &mapping_internal,
+                     const typename FiniteElement<dim,spacedim>::InternalDataBase &fe_data,
+                     const internal::FEValues::MappingRelatedData<dim,spacedim> &mapping_data,
+                     internal::FEValues::FiniteElementRelatedData<dim,spacedim> &output_data) const;
+
+private:
+
+  /**
+   * Value to indicate that a given face or subface number is invalid.
+   */
+  static const unsigned int invalid_face_number = numbers::invalid_unsigned_int;
+
+  /**
+   * Pointers to underlying finite element objects.
+   *
+   * This object contains a pointer to each contributing element of a mixed
+   * discretization and its multiplicity. It is created by the constructor and
+   * constant afterwards.
+   *
+   * The pointers are managed as shared pointers. This ensures that we can use
+   * the copy constructor of this class without having to manage cloning the
+   * elements themselves. Since finite element objects do not contain any
+   * state, this also allows multiple copies of an FESystem object to share
+   * pointers to the underlying base finite elements. The last one of these
+   * copies around will then delete the pointer to the base elements.
+   */
+  std::vector<std::pair<std_cxx11::shared_ptr<const FiniteElement<dim,spacedim> >,
+      unsigned int> >
+      base_elements;
+
+
+  /**
+   * Initialize the @p unit_support_points field of the FiniteElement class.
+   * Called from the constructor.
+   */
+  void initialize_unit_support_points ();
+
+  /**
+   * Initialize the @p unit_face_support_points field of the FiniteElement
+   * class. Called from the constructor.
+   */
+  void initialize_unit_face_support_points ();
+
+  /**
+   * Initialize the @p adjust_quad_dof_index_for_face_orientation_table field
+   * of the FiniteElement class. Called from the constructor.
+   */
+  void initialize_quad_dof_index_permutation ();
+  /**
+   * This function is simply singled out of the constructors since there are
+   * several of them. It sets up the index table for the system as well as @p
+   * restriction and @p prolongation matrices.
+   */
+  void initialize (const std::vector<const FiniteElement<dim,spacedim>*> &fes,
+                   const std::vector<unsigned int> &multiplicities);
+
+  /**
+   * Used by @p initialize.
+   */
+  void build_cell_tables();
+
+  /**
+   * Used by @p initialize.
+   */
+  void build_face_tables();
+
+  /**
+   * Used by @p initialize.
+   */
+  void build_interface_constraints ();
+
+  /**
+   * A function that computes the hp_vertex_dof_identities(),
+   * hp_line_dof_identities(), or hp_quad_dof_identities(), depending on the
+   * value of the template parameter.
+   */
+  template <int structdim>
+  std::vector<std::pair<unsigned int, unsigned int> >
+  hp_object_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const;
+
+  /**
+   * Usually: Fields of cell-independent data.
+   *
+   * However, here, this class does not itself store the data but only
+   * pointers to @p InternalData objects for each of the base elements.
+   */
+  class InternalData : public FiniteElement<dim,spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Constructor. Is called by the @p get_data function. Sets the size of
+     * the @p base_fe_datas vector to @p n_base_elements.
+     */
+    InternalData (const unsigned int n_base_elements);
+
+    /**
+     * Destructor. Deletes all @p InternalDatas whose pointers are stored by
+     * the @p base_fe_datas vector.
+     */
+    ~InternalData();
+
+    /**
+     * Gives write-access to the pointer to a @p InternalData of the @p
+     * base_noth base element.
+     */
+    void set_fe_data(const unsigned int                        base_no,
+                     typename FiniteElement<dim,spacedim>::InternalDataBase *);
+
+    /**
+     * Gives read-access to the pointer to a @p InternalData of the @p
+     * base_noth base element.
+     */
+    typename FiniteElement<dim,spacedim>::InternalDataBase &
+    get_fe_data (const unsigned int base_no) const;
+
+    /**
+     * Gives read-access to the pointer to an object to which into which the
+     * <code>base_no</code>th base element will write its output when calling
+     * FiniteElement::fill_fe_values() and similar functions.
+     */
+    internal::FEValues::FiniteElementRelatedData<dim,spacedim> &
+    get_fe_output_object (const unsigned int base_no) const;
+
+  private:
+
+    /**
+     * Pointers to @p InternalData objects for each of the base elements. They
+     * are accessed to by the @p set_ and @p get_fe_data functions.
+     *
+     * The size of this vector is set to @p n_base_elements by the
+     * InternalData constructor.  It is filled by the @p get_data function.
+     * Note that since the data for each instance of a base class is
+     * necessarily the same, we only need as many of these objects as there
+     * are base elements, irrespective of their multiplicity.
+     */
+    typename std::vector<typename FiniteElement<dim,spacedim>::InternalDataBase *> base_fe_datas;
+
+    /**
+     * A collection of objects to which the base elements will write their
+     * output when we call FiniteElement::fill_fe_values() and related
+     * functions on them.
+     *
+     * The size of this vector is set to @p n_base_elements by the
+     * InternalData constructor.
+     */
+    mutable std::vector<internal::FEValues::FiniteElementRelatedData<dim,spacedim> > base_fe_output_objects;
+  };
+
+  /*
+   * Mutex for protecting initialization of restriction and embedding matrix.
+   */
+  mutable Threads::Mutex mutex;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+/*----------------------------  fe_system.h  ---------------------------*/
+#endif
+/*----------------------------  fe_system.h  ---------------------------*/
diff --git a/include/deal.II/fe/fe_tools.h b/include/deal.II/fe/fe_tools.h
new file mode 100644
index 0000000..b61a792
--- /dev/null
+++ b/include/deal.II/fe/fe_tools.h
@@ -0,0 +1,1061 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_tools_H
+#define dealii__fe_tools_H
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/symmetric_tensor.h>
+
+#include <vector>
+#include <string>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class FullMatrix;
+template <typename number> class Vector;
+template <int dim> class Quadrature;
+template <int dim, int spacedim> class FiniteElement;
+template <int dim, int spacedim> class DoFHandler;
+namespace hp
+{
+  template <int dim, int spacedim> class DoFHandler;
+}
+template <int dim> class FiniteElementData;
+class ConstraintMatrix;
+
+
+
+/*!@addtogroup feall */
+/*@{*/
+
+
+/**
+ * This namespace offers interpolations and extrapolations of discrete
+ * functions of one @p FiniteElement @p fe1 to another @p FiniteElement @p
+ * fe2.
+ *
+ * It also provides the local interpolation matrices that interpolate on each
+ * cell. Furthermore it provides the difference matrix $id-I_h$ that is needed
+ * for evaluating $(id-I_h)z$ for e.g. the dual solution $z$.
+ *
+ * For more information about the <tt>spacedim</tt> template parameter check
+ * the documentation of FiniteElement or the one of Triangulation.
+ *
+ * @author Wolfgang Bangerth, Ralf Hartmann, Guido Kanschat; 2000, 2003, 2004,
+ * 2005, 2006
+ */
+namespace FETools
+{
+  /**
+   * A base class for factory objects creating finite elements of a given
+   * degree. Derived classes are called whenever one wants to have a
+   * transparent way to create a finite element object.
+   *
+   * This class is used in the FETools::get_fe_from_name() and
+   * FETools::add_fe_name() functions.
+   *
+   * @author Guido Kanschat, 2006
+   */
+  template <int dim, int spacedim=dim>
+  class FEFactoryBase : public Subscriptor
+  {
+  public:
+    /**
+     * Create a FiniteElement and return a pointer to it.
+     */
+    virtual FiniteElement<dim,spacedim> *
+    get (const unsigned int degree) const = 0;
+
+    /**
+     * Create a FiniteElement from a quadrature formula (currently only
+     * implemented for FE_Q) and return a pointer to it.
+     */
+
+    virtual FiniteElement<dim,spacedim> *
+    get (const Quadrature<1> &quad) const = 0;
+    /**
+     * Virtual destructor doing nothing but making the compiler happy.
+     */
+    virtual ~FEFactoryBase();
+  };
+
+  /**
+   * A concrete class for factory objects creating finite elements of a given
+   * degree.
+   *
+   * The class's get() function generates a finite element object of the type
+   * given as template argument, and with the degree (however the finite
+   * element class wishes to interpret this number) given as argument to
+   * get().
+   *
+   * @author Guido Kanschat, 2006
+   */
+  template <class FE>
+  class FEFactory : public FEFactoryBase<FE::dimension,FE::space_dimension>
+  {
+  public:
+    /**
+     * Create a FiniteElement and return a pointer to it.
+     */
+    virtual FiniteElement<FE::dimension,FE::space_dimension> *
+    get (const unsigned int degree) const;
+
+    /**
+     * Create a FiniteElement from a quadrature formula (currently only
+     * implemented for FE_Q) and return a pointer to it.
+     */
+    virtual FiniteElement<FE::dimension,FE::space_dimension> *
+    get (const Quadrature<1> &quad) const;
+  };
+
+  /**
+   * @warning In most cases, you will probably want to use
+   * compute_base_renumbering().
+   *
+   * Compute the vector required to renumber the dofs of a cell by component.
+   * Furthermore, compute the vector storing the start indices of each
+   * component in the local block vector.
+   *
+   * The second vector is organized such that there is a vector for each base
+   * element containing the start index for each component served by this base
+   * element.
+   *
+   * While the first vector is checked to have the correct size, the second
+   * one is reinitialized for convenience.
+   */
+  template<int dim, int spacedim>
+  void compute_component_wise(
+    const FiniteElement<dim,spacedim>                &fe,
+    std::vector<unsigned int>               &renumbering,
+    std::vector<std::vector<unsigned int> > &start_indices);
+
+  /**
+   * Compute the vector required to renumber the dofs of a cell by block.
+   * Furthermore, compute the vector storing either the start indices or the
+   * size of each local block vector.
+   *
+   * If the @p bool parameter is true, @p block_data is filled with the start
+   * indices of each local block. If it is false, then the block sizes are
+   * returned.
+   *
+   * The vector <tt>renumbering</tt> will be indexed by the standard numbering
+   * of local degrees of freedom, namely first first vertex, then second
+   * vertex, after vertices lines, quads, and hexes. For each index, the entry
+   * indicates the index which this degree of freedom receives in a numbering
+   * scheme, where the first block is numbered completely before the second.
+   */
+  template<int dim, int spacedim>
+  void compute_block_renumbering (
+    const FiniteElement<dim,spacedim>  &fe,
+    std::vector<types::global_dof_index> &renumbering,
+    std::vector<types::global_dof_index> &block_data,
+    bool return_start_indices = true);
+
+  /**
+   * @name Generation of local matrices
+   * @{
+   */
+  /**
+   * Gives the interpolation matrix that interpolates a @p fe1- function to a
+   * @p fe2-function on each cell. The interpolation_matrix needs to be of
+   * size <tt>(fe2.dofs_per_cell, fe1.dofs_per_cell)</tt>.
+   *
+   * Note, that if the finite element space @p fe1 is a subset of the finite
+   * element space @p fe2 then the @p interpolation_matrix is an embedding
+   * matrix.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  get_interpolation_matrix(const FiniteElement<dim,spacedim> &fe1,
+                           const FiniteElement<dim,spacedim> &fe2,
+                           FullMatrix<number> &interpolation_matrix);
+
+  /**
+   * Gives the interpolation matrix that interpolates a @p fe1- function to a
+   * @p fe2-function, and interpolates this to a second @p fe1-function on
+   * each cell. The interpolation_matrix needs to be of size
+   * <tt>(fe1.dofs_per_cell, fe1.dofs_per_cell)</tt>.
+   *
+   * Note, that this function only makes sense if the finite element space due
+   * to @p fe1 is not a subset of the finite element space due to @p fe2, as
+   * if it were a subset then the @p interpolation_matrix would be only the
+   * unit matrix.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  get_back_interpolation_matrix(const FiniteElement<dim,spacedim> &fe1,
+                                const FiniteElement<dim,spacedim> &fe2,
+                                FullMatrix<number> &interpolation_matrix);
+
+  /**
+   * Gives the unit matrix minus the back interpolation matrix.  The @p
+   * difference_matrix needs to be of size <tt>(fe1.dofs_per_cell,
+   * fe1.dofs_per_cell)</tt>.
+   *
+   * This function gives the matrix that transforms a @p fe1 function $z$ to
+   * $z-I_hz$ where $I_h$ denotes the interpolation operator from the @p fe1
+   * space to the @p fe2 space. This matrix hence is useful to evaluate error-
+   * representations where $z$ denotes the dual solution.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  get_interpolation_difference_matrix(const FiniteElement<dim,spacedim> &fe1,
+                                      const FiniteElement<dim,spacedim> &fe2,
+                                      FullMatrix<number> &difference_matrix);
+
+  /**
+   * Compute the local $L^2$-projection matrix from fe1 to fe2.
+   */
+  template <int dim, typename number, int spacedim>
+  void get_projection_matrix(const FiniteElement<dim,spacedim> &fe1,
+                             const FiniteElement<dim,spacedim> &fe2,
+                             FullMatrix<number> &matrix);
+
+  /**
+   * Compute the matrix of nodal values of a finite element applied to all its
+   * shape functions.
+   *
+   * This function is supposed to help building finite elements from
+   * polynomial spaces and should be called inside the constructor of an
+   * element. Applied to a completely initialized finite element, the result
+   * should be the unit matrix by definition of the node values.
+   *
+   * Using this matrix allows the construction of the basis of shape functions
+   * in two steps.
+   *
+   * <ol>
+   *
+   * <li>Define the space of shape functions using an arbitrary basis
+   * <i>w<sub>j</sub></i> and compute the matrix <i>M</i> of node functionals
+   * <i>N<sub>i</sub></i> applied to these basis functions.
+   *
+   * <li>Compute the basis <i>v<sub>j</sub></i> of the finite element shape
+   * function space by applying <i>M<sup>-1</sup></i> to the basis
+   * <i>w<sub>j</sub></i>.
+   *
+   * </ol>
+   *
+   * @note The FiniteElement must provide generalized support points and and
+   * interpolation functions.
+   */
+  template <int dim, int spacedim>
+  void compute_node_matrix(FullMatrix<double> &M,
+                           const FiniteElement<dim,spacedim> &fe);
+
+  /**
+   * For all possible (isotropic and anisotropic) refinement cases compute the
+   * embedding matrices from a coarse cell to the child cells. Each column of
+   * the resulting matrices contains the representation of a coarse grid basis
+   * function by the fine grid basis; the matrices are split such that there
+   * is one matrix for every child.
+   *
+   * This function computes the coarse grid function in a sufficiently large
+   * number of quadrature points and fits the fine grid functions using least
+   * squares approximation. Therefore, the use of this function is restricted
+   * to the case that the finite element spaces are actually nested.
+   *
+   * Note, that <code>matrices[refinement_case-1][child]</code> includes the
+   * embedding (or prolongation) matrix of child <code>child</code> for the
+   * RefinementCase <code>refinement_case</code>. Here, we use
+   * <code>refinement_case-1</code> instead of <code>refinement_case</code> as
+   * for RefinementCase::no_refinement(=0) there are no prolongation matrices
+   * available.
+   *
+   * Typically this function is called by the various implementations of
+   * FiniteElement classes in order to fill the respective
+   * FiniteElement::prolongation matrices.
+   *
+   * @param fe The finite element class for which we compute the embedding
+   * matrices.
+   *
+   * @param matrices A reference to RefinementCase<dim>::isotropic_refinement
+   * vectors of FullMatrix objects. Each vector corresponds to one
+   * RefinementCase @p refinement_case and is of the vector size
+   * GeometryInfo<dim>::n_children(refinement_case). This is the format used
+   * in FiniteElement, where we want to use this function mostly.
+   *
+   * @param isotropic_only Set to <code>true</code> if you only want to
+   * compute matrices for isotropic refinement.
+   *
+   * @param threshold is the gap allowed in the least squares algorithm
+   * computing the embedding.
+   */
+  template <int dim, typename number, int spacedim>
+  void compute_embedding_matrices(const FiniteElement<dim,spacedim> &fe,
+                                  std::vector<std::vector<FullMatrix<number> > > &matrices,
+                                  const bool isotropic_only = false,
+                                  const double threshold = 1.e-12);
+
+  /**
+   * Compute the embedding matrices on faces needed for constraint matrices.
+   *
+   * @param fe The finite element for which to compute these matrices.
+   *
+   * @param matrices An array of <i>GeometryInfo<dim>::subfaces_per_face =
+   * 2<sup>dim-1</sup></i> FullMatrix objects,holding the embedding matrix for
+   * each subface.
+   *
+   * @param face_coarse The number of the face on the coarse side of the face
+   * for which this is computed.
+   *
+   * @param face_fine The number of the face on the refined side of the face
+   * for which this is computed.
+   *
+   * @param threshold is the gap allowed in the least squares algorithm
+   * computing the embedding.
+   *
+   * @warning This function will be used in computing constraint matrices. It
+   * is not sufficiently tested yet.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  compute_face_embedding_matrices(const FiniteElement<dim,spacedim> &fe,
+                                  FullMatrix<number> (&matrices)[GeometryInfo<dim>::max_children_per_face],
+                                  const unsigned int face_coarse,
+                                  const unsigned int face_fine,
+                                  const double threshold = 1.e-12);
+
+  /**
+   * For all possible (isotropic and anisotropic) refinement cases compute the
+   * <i>L<sup>2</sup></i>-projection matrices from the children to a coarse
+   * cell.
+   *
+   * Note, that <code>matrices[refinement_case-1][child]</code> includes the
+   * projection (or restriction) matrix of child <code>child</code> for the
+   * RefinementCase <code>refinement_case</code>. Here, we use
+   * <code>refinement_case-1</code> instead of <code>refinement_case</code> as
+   * for RefinementCase::no_refinement(=0) there are no projection matrices
+   * available.
+   *
+   * Typically this function is called by the various implementations of
+   * FiniteElement classes in order to fill the respective
+   * FiniteElement::restriction matrices.
+   *
+   * @arg fe The finite element class for which we compute the projection
+   * matrices.  @arg matrices A reference to
+   * <tt>RefinementCase<dim>::isotropic_refinement</tt> vectors of FullMatrix
+   * objects. Each vector corresponds to one RefinementCase @p refinement_case
+   * and is of the vector size
+   * <tt>GeometryInfo<dim>::n_children(refinement_case)</tt>. This is the
+   * format used in FiniteElement, where we want to use this function mostly.
+   *
+   * @arg isotropic_only Set to <code>true</code> if you only want to compute
+   * matrices for isotropic refinement.
+   */
+  template <int dim, typename number, int spacedim>
+  void compute_projection_matrices(
+    const FiniteElement<dim,spacedim> &fe,
+    std::vector<std::vector<FullMatrix<number> > > &matrices,
+    const bool isotropic_only = false);
+
+  /**
+   * Projects scalar data defined in quadrature points to a finite element
+   * space on a single cell.
+   *
+   * What this function does is the following: assume that there is scalar
+   * data <tt>u<sub>q</sub>, 0 <= q < Q:=quadrature.size()</tt> defined at the
+   * quadrature points of a cell, with the points defined by the given
+   * <tt>rhs_quadrature</tt> object. We may then want to ask for that finite
+   * element function (on a single cell) <tt>v<sub>h</sub></tt> in the finite-
+   * dimensional space defined by the given FE object that is the projection
+   * of <tt>u</tt> in the following sense:
+   *
+   * Usually, the projection <tt>v<sub>h</sub></tt> is that function that
+   * satisfies <tt>(v<sub>h</sub>,w)=(u,w)</tt> for all discrete test
+   * functions <tt>w</tt>. In the present case, we can't evaluate the right
+   * hand side, since <tt>u</tt> is only defined in the quadrature points
+   * given by <tt>rhs_quadrature</tt>, so we replace it by a quadrature
+   * approximation. Likewise, the left hand side is approximated using the
+   * <tt>lhs_quadrature</tt> object; if this quadrature object is chosen
+   * appropriately, then the integration of the left hand side can be done
+   * exactly, without any approximation. The use of different quadrature
+   * objects is necessary if the quadrature object for the right hand side has
+   * too few quadrature points -- for example, if data <tt>q</tt> is only
+   * defined at the cell center, then the corresponding one-point quadrature
+   * formula is obviously insufficient to approximate the scalar product on
+   * the left hand side by a definite form.
+   *
+   * After these quadrature approximations, we end up with a nodal
+   * representation <tt>V<sub>h</sub></tt> of <tt>v<sub>h</sub></tt> that
+   * satisfies the following system of linear equations: <tt>M V<sub>h</sub> =
+   * Q U</tt>, where <tt>M<sub>ij</sub>=(phi_i,phi_j)</tt> is the mass matrix
+   * approximated by <tt>lhs_quadrature</tt>, and <tt>Q</tt> is the matrix
+   * <tt>Q<sub>iq</sub>=phi<sub>i</sub>(x<sub>q</sub>) w<sub>q</sub></tt>
+   * where <tt>w<sub>q</sub></tt> are quadrature weights; <tt>U</tt> is the
+   * vector of quadrature point data <tt>u<sub>q</sub></tt>.
+   *
+   * In order to then get the nodal representation <tt>V<sub>h</sub></tt> of
+   * the projection of <tt>U</tt>, one computes <tt>V<sub>h</sub> = X U,
+   * X=M<sup>-1</sup> Q</tt>. The purpose of this function is to compute the
+   * matrix <tt>X</tt> and return it through the last argument of this
+   * function.
+   *
+   * Note that this function presently only supports scalar data. An extension
+   * of the mass matrix is of course trivial, but one has to define the order
+   * of data in the vector <tt>U</tt> if it contains vector valued data in all
+   * quadrature points.
+   *
+   * A use for this function is described in the introduction to the step-18
+   * example program.
+   *
+   * The opposite of this function, interpolation of a finite element function
+   * onto quadrature points is essentially what the
+   * <tt>FEValues::get_function_values</tt> functions do; to make things a
+   * little simpler, the
+   * <tt>FETools::compute_interpolation_to_quadrature_points_matrix</tt>
+   * provides the matrix form of this.
+   *
+   * Note that this function works on a single cell, rather than an entire
+   * triangulation. In effect, it therefore doesn't matter if you use a
+   * continuous or discontinuous version of the finite element.
+   *
+   * It is worth noting that there are a few confusing cases of this function.
+   * The first one is that it really only makes sense to project onto a finite
+   * element that has at most as many degrees of freedom per cell as there are
+   * quadrature points; the projection of N quadrature point data into a space
+   * with M>N unknowns is well-defined, but often yields funny and non-
+   * intuitive results. Secondly, one would think that if the quadrature point
+   * data is defined in the support points of the finite element, i.e. the
+   * quadrature points of <tt>ths_quadrature</tt> equal
+   * <tt>fe.get_unit_support_points()</tt>, then the projection should be the
+   * identity, i.e. each degree of freedom of the finite element equals the
+   * value of the given data in the support point of the corresponding shape
+   * function. However, this is not generally the case: while the matrix
+   * <tt>Q</tt> in that case is the identity matrix, the mass matrix
+   * <tt>M</tt> is not equal to the identity matrix, except for the special
+   * case that the quadrature formula <tt>lhs_quadrature</tt> also has its
+   * quadrature points in the support points of the finite element.
+   *
+   * Finally, this function only defines a cell wise projection, while one
+   * frequently wants to apply it to all cells in a triangulation. However, if
+   * it is applied to one cell after the other, the results from later cells
+   * may overwrite nodal values computed already from previous cells if
+   * degrees of freedom live on the interfaces between cells. The function is
+   * therefore most useful for discontinuous elements.
+   */
+  template <int dim, int spacedim>
+  void
+  compute_projection_from_quadrature_points_matrix (const FiniteElement<dim,spacedim> &fe,
+                                                    const Quadrature<dim>    &lhs_quadrature,
+                                                    const Quadrature<dim>    &rhs_quadrature,
+                                                    FullMatrix<double>       &X);
+
+  /**
+   * Given a (scalar) local finite element function, compute the matrix that
+   * maps the vector of nodal values onto the vector of values of this
+   * function at quadrature points as given by the second argument. In a
+   * sense, this function does the opposite of the
+   * FETools::compute_projection_from_quadrature_points_matrix function.
+   */
+  template <int dim, int spacedim>
+  void
+  compute_interpolation_to_quadrature_points_matrix (const FiniteElement<dim,spacedim> &fe,
+                                                     const Quadrature<dim>    &quadrature,
+                                                     FullMatrix<double>       &I_q);
+
+  /**
+   * Computes the projection of tensorial (first-order tensor) data stored at
+   * the quadrature points @p vector_of_tensors_at_qp to data @p
+   * vector_of_tensors_at_nodes at the support points of the cell.  The data
+   * in @p vector_of_tensors_at_qp is ordered sequentially following the
+   * quadrature point numbering.  The size of @p vector_of_tensors_at_qp must
+   * correspond to the number of columns of @p projection_matrix.  The size of
+   * @p vector_of_tensors_at_nodes must correspond to the number of rows of @p
+   * vector_of_tensors_at_nodes .  The projection matrix @p projection_matrix
+   * describes the projection of scalar data from the quadrature points and
+   * can be obtained from the
+   * FETools::compute_projection_from_quadrature_points_matrix function.
+   */
+  template <int dim>
+  void
+  compute_projection_from_quadrature_points(
+    const FullMatrix<double>    &projection_matrix,
+    const std::vector< Tensor<1, dim > >    &vector_of_tensors_at_qp,
+    std::vector< Tensor<1, dim > >          &vector_of_tensors_at_nodes);
+
+
+
+  /**
+   * same as last function but for a @p SymmetricTensor .
+   */
+  template <int dim>
+  void
+  compute_projection_from_quadrature_points(
+    const FullMatrix<double>    &projection_matrix,
+    const std::vector< SymmetricTensor<2, dim > >   &vector_of_tensors_at_qp,
+    std::vector< SymmetricTensor<2, dim > >         &vector_of_tensors_at_nodes);
+
+
+
+
+  /**
+   * This method implements the
+   * FETools::compute_projection_from_quadrature_points_matrix method for
+   * faces of a mesh.  The matrix that it returns, X, is face specific and its
+   * size is fe.dofs_per_cell by rhs_quadrature.size().  The dimension, dim
+   * must be larger than 1 for this class, since Quadrature<dim-1> objects are
+   * required. See the documentation on the Quadrature class for more
+   * information.
+   */
+  template <int dim, int spacedim>
+  void
+  compute_projection_from_face_quadrature_points_matrix (const FiniteElement<dim, spacedim> &fe,
+                                                         const Quadrature<dim-1>    &lhs_quadrature,
+                                                         const Quadrature<dim-1>    &rhs_quadrature,
+                                                         const typename DoFHandler<dim, spacedim>::active_cell_iterator &cell,
+                                                         const unsigned int          face,
+                                                         FullMatrix<double>         &X);
+
+
+
+  //@}
+  /**
+   * @name Functions which should be in DoFTools
+   */
+  //@{
+  /**
+   * Gives the interpolation of a the @p dof1-function @p u1 to a @p
+   * dof2-function @p u2. @p dof1 and @p dof2 need to be DoFHandlers based on
+   * the same triangulation.
+   *
+   * If the elements @p fe1 and @p fe2 are either both continuous or both
+   * discontinuous then this interpolation is the usual point interpolation.
+   * The same is true if @p fe1 is a continuous and @p fe2 is a discontinuous
+   * finite element. For the case that @p fe1 is a discontinuous and @p fe2 is
+   * a continuous finite element there is no point interpolation defined at
+   * the discontinuities.  Therefore the mean value is taken at the DoF values
+   * on the discontinuities.
+   *
+   * Note that for continuous elements on grids with hanging nodes (i.e.
+   * locally refined grids) this function does not give the expected output.
+   * Indeed, the resulting output vector does not necessarily respect
+   * continuity requirements at hanging nodes: if, for example, you are
+   * interpolating a Q2 field to a Q1 field, then at hanging nodes the output
+   * field will have the function value of the input field, which however is
+   * not usually the mean value of the two adjacent nodes. It is thus not part
+   * of the Q1 function space on the whole triangulation, although it is of
+   * course Q1 on each cell.
+   *
+   * For this case (continuous elements on grids with hanging nodes), please
+   * use the @p interpolate() function with an additional ConstraintMatrix
+   * argument, see below, or make the field conforming yourself by calling the
+   * @p distribute function of your hanging node constraints object.
+   */
+  template <int dim, int spacedim,
+            template <int, int> class DoFHandlerType1,
+            template <int, int> class DoFHandlerType2,
+            class InVector, class OutVector>
+  void
+  interpolate (const DoFHandlerType1<dim,spacedim> &dof1,
+               const InVector                      &u1,
+               const DoFHandlerType2<dim,spacedim> &dof2,
+               OutVector                           &u2);
+
+  /**
+   * Gives the interpolation of a the @p dof1-function @p u1 to a @p
+   * dof2-function @p u2. @p dof1 and @p dof2 need to be DoFHandlers (or
+   * hp::DoFHandlers) based on the same triangulation.  @p constraints is a
+   * hanging node constraints object corresponding to @p dof2. This object is
+   * particular important when interpolating onto continuous elements on grids
+   * with hanging nodes (locally refined grids).
+   *
+   * If the elements @p fe1 and @p fe2 are either both continuous or both
+   * discontinuous then this interpolation is the usual point interpolation.
+   * The same is true if @p fe1 is a continuous and @p fe2 is a discontinuous
+   * finite element. For the case that @p fe1 is a discontinuous and @p fe2 is
+   * a continuous finite element there is no point interpolation defined at
+   * the discontinuities.  Therefore the mean value is taken at the DoF values
+   * at the discontinuities.
+   */
+  template <int dim, int spacedim,
+            template <int, int> class DoFHandlerType1,
+            template <int, int> class DoFHandlerType2,
+            class InVector, class OutVector>
+  void interpolate (const DoFHandlerType1<dim,spacedim> &dof1,
+                    const InVector                      &u1,
+                    const DoFHandlerType2<dim,spacedim> &dof2,
+                    const ConstraintMatrix              &constraints,
+                    OutVector                           &u2);
+
+  /**
+   * Gives the interpolation of the @p fe1-function @p u1 to a @p
+   * fe2-function, and interpolates this to a second @p fe1-function named @p
+   * u1_interpolated.
+   *
+   * Note, that this function does not work on continuous elements at hanging
+   * nodes. For that case use the @p back_interpolate function, below, that
+   * takes an additional @p ConstraintMatrix object.
+   *
+   * Furthermore note, that for the specific case when the finite element
+   * space corresponding to @p fe1 is a subset of the finite element space
+   * corresponding to @p fe2, this function is simply an identity mapping.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void back_interpolate (const DoFHandler<dim,spacedim>    &dof1,
+                         const InVector                    &u1,
+                         const FiniteElement<dim,spacedim> &fe2,
+                         OutVector                         &u1_interpolated);
+
+  /**
+   * Same as last function, except that the dof handler objects might be of
+   * type @p hp::DoFHandler.
+   */
+  template <int dim,
+            template <int> class DoFHandlerType,
+            class InVector, class OutVector, int spacedim>
+  void back_interpolate (const DoFHandlerType<dim>         &dof1,
+                         const InVector                    &u1,
+                         const FiniteElement<dim,spacedim> &fe2,
+                         OutVector                         &u1_interpolated);
+
+  /**
+   * Gives the interpolation of the @p dof1-function @p u1 to a @p
+   * dof2-function, and interpolates this to a second @p dof1-function named
+   * @p u1_interpolated.  @p constraints1 and @p constraints2 are the hanging
+   * node constraints corresponding to @p dof1 and @p dof2, respectively.
+   * These objects are particular important when continuous elements on grids
+   * with hanging nodes (locally refined grids) are involved.
+   *
+   * Furthermore note, that for the specific case when the finite element
+   * space corresponding to @p dof1 is a subset of the finite element space
+   * corresponding to @p dof2, this function is simply an identity mapping.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void back_interpolate (const DoFHandler<dim,spacedim> &dof1,
+                         const ConstraintMatrix         &constraints1,
+                         const InVector                 &u1,
+                         const DoFHandler<dim,spacedim> &dof2,
+                         const ConstraintMatrix         &constraints2,
+                         OutVector                      &u1_interpolated);
+
+  /**
+   * Gives $(Id-I_h)z_1$ for a given @p dof1-function $z_1$, where $I_h$ is
+   * the interpolation from @p fe1 to @p fe2. The result $(Id-I_h)z_1$ is
+   * written into @p z1_difference.
+   *
+   * Note, that this function does not work for continuous elements at hanging
+   * nodes. For that case use the @p interpolation_difference function, below,
+   * that takes an additional @p ConstraintMatrix object.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void interpolation_difference(const DoFHandler<dim,spacedim>    &dof1,
+                                const InVector                    &z1,
+                                const FiniteElement<dim,spacedim> &fe2,
+                                OutVector                         &z1_difference);
+
+  /**
+   * Gives $(Id-I_h)z_1$ for a given @p dof1-function $z_1$, where $I_h$ is
+   * the interpolation from @p fe1 to @p fe2. The result $(Id-I_h)z_1$ is
+   * written into @p z1_difference.  @p constraints1 and @p constraints2 are
+   * the hanging node constraints corresponding to @p dof1 and @p dof2,
+   * respectively. These objects are particular important when continuous
+   * elements on grids with hanging nodes (locally refined grids) are
+   * involved.
+   *
+   * For parallel computations with PETSc, supply @p z1 with ghost elements
+   * and @p z1_difference without ghost elements.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void interpolation_difference(const DoFHandler<dim,spacedim> &dof1,
+                                const ConstraintMatrix         &constraints1,
+                                const InVector                 &z1,
+                                const DoFHandler<dim,spacedim> &dof2,
+                                const ConstraintMatrix         &constraints2,
+                                OutVector                      &z1_difference);
+
+
+
+  /**
+   * $L^2$ projection for discontinuous elements. Operates the same direction
+   * as interpolate.
+   *
+   * The global projection can be computed by local matrices if the finite
+   * element spaces are discontinuous. With continuous elements, this is
+   * impossible, since a global mass matrix must be inverted.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void project_dg (const DoFHandler<dim,spacedim> &dof1,
+                   const InVector                 &u1,
+                   const DoFHandler<dim,spacedim> &dof2,
+                   OutVector                      &u2);
+
+  /**
+   * Gives the patchwise extrapolation of a @p dof1 function @p z1 to a @p
+   * dof2 function @p z2.  @p dof1 and @p dof2 need to be DoFHandler objects
+   * based on the same triangulation. This function is used, for example, for
+   * extrapolating patchwise a piecewise linear solution to a piecewise
+   * quadratic solution.
+   *
+   * The function's name is historical and probably not particularly well
+   * chosen. The function performs the following operations, one after the
+   * other:
+   *
+   * - It interpolates directly from every cell of @p dof1 to the
+   * corresponding cell of `dof2` using the interpolation matrix of the finite
+   * element spaces used on these cells and provided by the finite element
+   * objects involved. This step is done using the FETools::interpolate()
+   * function. - It then performs a loop over all non-active cells of `dof2`.
+   * If such a non-active cell has at least one active child, then we call the
+   * children of this cell a "patch". We then interpolate from the children of
+   * this patch to the patch, using the finite element space associated with
+   * `dof2` and immediately interpolate back to the children. In essence, this
+   * information throws away all information in the solution vector that lives
+   * on a scale smaller than the patch cell. - Since we traverse non-active
+   * cells from the coarsest to the finest levels, we may find patches that
+   * correspond to child cells of previously treated patches if the mesh had
+   * been refined adaptively (this cannot happen if the mesh has been refined
+   * globally because there the children of a patch are all active). We also
+   * perform the operation described above on these patches, but it is easy to
+   * see that on patches that are children of previously treated patches, the
+   * operation is now the identity operation (since it interpolates from the
+   * children of the current patch a function that had previously been
+   * interpolated to these children from an even coarser patch). Consequently,
+   * this does not alter the solution vector any more.
+   *
+   * The name of the function originates from the fact that it can be used to
+   * construct a representation of a function of higher polynomial degree on a
+   * once coarser mesh. For example, if you imagine that you start with a
+   * $Q_1$ function on globally refined mesh, and that @p dof2 is associated
+   * with a $Q_2$ element, then this function computes the equivalent of the
+   * operator $I_{2h}^{(2)}$ interpolating the original piecewise linear
+   * function onto a quadratic function on a once coarser mesh with mesh size
+   * $2h$ (but representing this function on the original mesh with size $h$).
+   * If the exact solution is sufficiently smooth, then
+   * $u^\ast=I_{2h}^{(2)}u_h$ is typically a better approximation to the exact
+   * solution $u$ of the PDE than $u_h$ is. In other words, this function
+   * provides a postprocessing step that improves the solution in a similar
+   * way one often obtains by extrapolating a sequence of solutions,
+   * explaining the origin of the function's name.
+   *
+   * @note The resulting field does not satisfy continuity requirements of the
+   * given finite elements if the algorithm outlined above is used. When you
+   * use continuous elements on grids with hanging nodes, please use the @p
+   * extrapolate function with an additional ConstraintMatrix argument, see
+   * below.
+   *
+   * @note Since this function operates on patches of cells, it requires that
+   * the underlying grid is refined at least once for every coarse grid cell.
+   * If this is not the case, an exception will be raised.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void extrapolate (const DoFHandler<dim,spacedim> &dof1,
+                    const InVector                 &z1,
+                    const DoFHandler<dim,spacedim> &dof2,
+                    OutVector                      &z2);
+
+  /**
+   * Gives the patchwise extrapolation of a @p dof1 function @p z1 to a @p
+   * dof2 function @p z2.  @p dof1 and @p dof2 need to be DoFHandler objects
+   * based on the same triangulation.  @p constraints is a hanging node
+   * constraints object corresponding to @p dof2. This object is necessary
+   * when interpolating onto continuous elements on grids with hanging nodes
+   * (locally refined grids).
+   *
+   * Otherwise, the function does the same as the other @p extrapolate
+   * function above (for which the documentation provides an extensive
+   * description of its operation).
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void extrapolate (const DoFHandler<dim,spacedim> &dof1,
+                    const InVector                 &z1,
+                    const DoFHandler<dim,spacedim> &dof2,
+                    const ConstraintMatrix         &constraints,
+                    OutVector                      &z2);
+  //@}
+  /**
+   * The numbering of the degrees of freedom in continuous finite elements is
+   * hierarchic, i.e. in such a way that we first number the vertex dofs, in
+   * the order of the vertices as defined by the triangulation, then the line
+   * dofs in the order and respecting the direction of the lines, then the
+   * dofs on quads, etc. However, we could have, as well, numbered them in a
+   * lexicographic way, i.e. with indices first running in x-direction, then
+   * in y-direction and finally in z-direction. Discontinuous elements of
+   * class FE_DGQ() are numbered in this way, for example.
+   *
+   * This function constructs a table which lexicographic index each degree of
+   * freedom in the hierarchic numbering would have. It operates on the
+   * continuous finite element given as first argument, and outputs the
+   * lexicographic indices in the second.
+   *
+   * Note that since this function uses specifics of the continuous finite
+   * elements, it can only operate on FiniteElementData<dim> objects inherent
+   * in FE_Q(). However, this function does not take a FE_Q object as it is
+   * also invoked by the FE_Q() constructor.
+   *
+   * It is assumed that the size of the output argument already matches the
+   * correct size, which is equal to the number of degrees of freedom in the
+   * finite element.
+   */
+
+  template <int dim>
+  void
+  hierarchic_to_lexicographic_numbering (unsigned int degree,
+                                         std::vector<unsigned int> &h2l);
+
+  template <int dim>
+  void
+  hierarchic_to_lexicographic_numbering (const FiniteElementData<dim> &fe_data,
+                                         std::vector<unsigned int>    &h2l);
+
+  /**
+   * Like the previous function but instead of returning its result through
+   * the last argument return it as a value.
+   */
+  template <int dim>
+  std::vector<unsigned int>
+  hierarchic_to_lexicographic_numbering (const FiniteElementData<dim> &fe_data);
+
+  /**
+   * This is the reverse function to the above one, generating the map from
+   * the lexicographic to the hierarchical numbering. All the remarks made
+   * about the above function are also valid here.
+   */
+  template <int dim>
+  void
+  lexicographic_to_hierarchic_numbering (const FiniteElementData<dim> &fe_data,
+                                         std::vector<unsigned int>    &l2h);
+
+  /**
+   * Like the previous function but instead of returning its result through
+   * the last argument return it as a value.
+   */
+  template <int dim>
+  std::vector<unsigned int>
+  lexicographic_to_hierarchic_numbering (const FiniteElementData<dim> &fe_data);
+
+  /**
+   * Parse the name of a finite element and generate a finite element object
+   * accordingly. The parser ignores space characters between words (things
+   * matching the regular expression [A-Za-z0-9_]).
+   *
+   * The name must be in the form which is returned by the
+   * FiniteElement::get_name function, where dimension template parameters
+   * <2> etc. can be omitted. Alternatively, the explicit number can be
+   * replaced by <tt>dim</tt> or <tt>d</tt>. If a number is given, it
+   * <b>must</b> match the template parameter of this function.
+   *
+   * The names of FESystem elements follow the pattern
+   * <code>FESystem[FE_Base1^p1-FE_Base2^p2]</code> The powers <code>p1</code>
+   * etc. may either be numbers or can be replaced by <tt>dim</tt> or
+   * <tt>d</tt>.
+   *
+   *
+   * If no finite element can be reconstructed from this string, an exception
+   * of type @p FETools::ExcInvalidFEName is thrown.
+   *
+   * The function returns a pointer to a newly create finite element. It is in
+   * the caller's responsibility to destroy the object pointed to at an
+   * appropriate later time.
+   *
+   * Since the value of the template argument can't be deduced from the
+   * (string) argument given to this function, you have to explicitly specify
+   * it when you call this function.
+   *
+   * This function knows about all the standard elements defined in the
+   * library. However, it doesn't by default know about elements that you may
+   * have defined in your program. To make your own elements known to this
+   * function, use the add_fe_name() function.  This function does not work if
+   * one wants to get a codimension 1 finite element.
+   */
+  template <int dim, int spacedim>
+  FiniteElement<dim, spacedim> *
+  get_fe_by_name (const std::string &name);
+
+
+  /**
+   * @deprecated Use get_fe_by_name() with two template parameters instead
+   */
+  template <int dim>
+  FiniteElement<dim,dim> *
+  get_fe_from_name (const std::string &name);
+
+
+  /**
+   * Extend the list of finite elements that can be generated by
+   * get_fe_from_name() by the one given as @p name. If get_fe_from_name() is
+   * later called with this name, it will use the object given as second
+   * argument to create a finite element object.
+   *
+   * The format of the @p name parameter should include the name of a finite
+   * element. However, it is safe to use either the class name alone or to use
+   * the result of FiniteElement::get_name (which includes the space dimension
+   * as well as the polynomial degree), since everything after the first non-
+   * name character will be ignored.
+   *
+   * The FEFactory object should be an object newly created with <tt>new</tt>.
+   * FETools will take ownership of this object and delete it once it is not
+   * used anymore.
+   *
+   * In most cases, if you want objects of type <code>MyFE</code> be created
+   * whenever the name <code>my_fe</code> is given to get_fe_from_name, you
+   * will want the second argument to this function be of type
+   * FEFactory@<MyFE@>, but you can of course create your custom finite
+   * element factory class.
+   *
+   * This function takes over ownership of the object given as second
+   * argument, i.e. you should never attempt to destroy it later on. The
+   * object will be deleted at the end of the program's lifetime.
+   *
+   * If the name of the element is already in use, an exception is thrown.
+   * Thus, functionality of get_fe_from_name() can only be added, not changed.
+   *
+   * @note This function manipulates a global table (one table for each space
+   * dimension). It is thread safe in the sense that every access to this
+   * table is secured by a lock. Nevertheless, since each name can be added
+   * only once, user code has to make sure that only one thread adds a new
+   * element.
+   *
+   * Note also that this table exists once for each space dimension. If you
+   * have a program that works with finite elements in different space
+   * dimensions (for example,
+   * @ref step_4 "step-4"
+   * does something like this), then you should call this function for each
+   * space dimension for which you want your finite element added to the map.
+   */
+  template <int dim, int spacedim>
+  void add_fe_name (const std::string &name,
+                    const FEFactoryBase<dim,spacedim> *factory);
+
+  /**
+   * The string used for get_fe_from_name() cannot be translated to a finite
+   * element.
+   *
+   * Either the string is badly formatted or you are using a custom element
+   * that must be added using add_fe_name() first.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcInvalidFEName,
+                  std::string,
+                  << "Can't re-generate a finite element from the string '"
+                  << arg1 << "'.");
+
+  /**
+   * The string used for get_fe_from_name() cannot be translated to a finite
+   * element.
+   *
+   * Dimension arguments in finite element names should be avoided. If they
+   * are there, the dimension should be <tt>dim</tt> or <tt>d</tt>. Here, you
+   * gave a numeric dimension argument, which does not match the template
+   * dimension of the finite element class.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException2 (ExcInvalidFEDimension,
+                  char, int,
+                  << "The dimension " << arg1
+                  << " in the finite element string must match "
+                  << "the space dimension "
+                  << arg2 << ".");
+
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInvalidFE);
+
+  /**
+   * The finite element must be
+   * @ref GlossPrimitive "primitive".
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFENotPrimitive);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcTriangulationMismatch);
+
+  /**
+   * A continuous element is used on a mesh with hanging nodes, but the
+   * constraint matrices are missing.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcHangingNodesNotAllowed,
+                  int,
+                  << "You are using continuous elements on a grid with "
+                  << "hanging nodes but without providing hanging node "
+                  << "constraints. Use the respective function with "
+                  << "additional ConstraintMatrix argument(s), instead."
+                  << (arg1?"":""));
+  /**
+   * You need at least two grid levels.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcGridNotRefinedAtLeastOnce);
+  /**
+   * The dimensions of the matrix used did not match the expected dimensions.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException4 (ExcMatrixDimensionMismatch,
+                  int, int, int, int,
+                  << "This is a " << arg1 << "x" << arg2 << " matrix, "
+                  << "but should be a " << arg3 << "x" << arg4 << " matrix.");
+
+  /**
+   * Exception thrown if an embedding matrix was computed inaccurately.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1(ExcLeastSquaresError, double,
+                 << "Least squares fit leaves a gap of " << arg1);
+
+  /**
+   * Exception thrown if one variable may not be greater than another.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException2 (ExcNotGreaterThan,
+                  int,  int,
+                  << arg1 << " must be greater than " << arg2);
+}
+
+
+#ifndef DOXYGEN
+
+namespace FETools
+{
+  template <class FE>
+  FiniteElement<FE::dimension, FE::space_dimension> *
+  FEFactory<FE>::get (const unsigned int degree) const
+  {
+    return new FE(degree);
+  }
+}
+
+#endif
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+/*----------------------------   fe_tools.h     ---------------------------*/
+/* end of #ifndef dealii__fe_tools_H */
+#endif
+/*----------------------------   fe_tools.h     ---------------------------*/
diff --git a/include/deal.II/fe/fe_trace.h b/include/deal.II/fe/fe_trace.h
new file mode 100644
index 0000000..3998f75
--- /dev/null
+++ b/include/deal.II/fe/fe_trace.h
@@ -0,0 +1,163 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_trace_h
+#define dealii__fe_trace_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/fe/fe_poly_face.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_face.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A finite element, which is the trace of FE_Q elements, that is a tensor
+ * product of polynomials on the faces, undefined in the interior of the cells
+ * and continuous. The basis functions on the faces are from
+ * Polynomials::LagrangeEquidistant
+ *
+ * This finite element is the trace space of FE_Q on the faces.
+ *
+ * @note Since these are only finite elements on faces, only FEFaceValues and
+ * FESubfaceValues will be able to extract reasonable values from any face
+ * polynomial. In order to make the use of FESystem simpler, FEValues objects
+ * will not fail using this finite element space, but all shape function
+ * values extracted will equal to zero.
+ *
+ * @todo Polynomials::LagrangeEquidistant should be and will be replaced by
+ * Polynomials::LagrangeGaussLobatto as soon as such a polynomial set exists.
+ */
+
+template <int dim, int spacedim=dim>
+class FE_TraceQ : public FE_PolyFace<TensorProductPolynomials<dim-1>, dim, spacedim>
+{
+public:
+  /**
+   * Constructor for tensor product polynomials of degree <tt>p</tt>. The
+   * shape functions created using this constructor correspond to Legendre
+   * polynomials in each coordinate direction.
+   */
+  FE_TraceQ(unsigned int p);
+
+  /**
+   * @p clone function instead of a copy constructor.
+   *
+   * This function is needed by the constructors of @p FESystem.
+   */
+  virtual FiniteElement<dim,spacedim> *clone() const;
+
+  /**
+   * Return a string that uniquely identifies a finite element. This class
+   * returns <tt>FE_DGQ<dim>(degree)</tt>, with <tt>dim</tt> and
+   * <tt>degree</tt> replaced by appropriate values.
+   */
+  virtual std::string get_name () const;
+
+  /**
+   * This function returns @p true, if the shape function @p shape_index has
+   * non-zero function values somewhere on the face @p face_index.
+   */
+  virtual bool has_support_on_face (const unsigned int shape_index,
+                                    const unsigned int face_index) const;
+
+  /**
+   * Returns a list of constant modes of the element. For this element, it
+   * simply returns one row with all entries set to true.
+   */
+  virtual std::pair<Table<2,bool>, std::vector<unsigned int> >
+  get_constant_modes () const;
+
+  /**
+   * Return whether this element implements its hanging node constraints in
+   * the new way, which has to be used to make elements "hp compatible".
+   */
+  virtual bool hp_constraints_are_implemented () const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                 FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return the matrix interpolating from a face of of one element to the face
+   * of the neighboring element.  The size of the matrix is then
+   * <tt>source.dofs_per_face</tt> times <tt>this->dofs_per_face</tt>. This
+   * element only provides interpolation matrices for elements of the same
+   * type and FE_Nothing. For all other elements, an exception of type
+   * FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented is thrown.
+   */
+  virtual void
+  get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &source,
+                                    const unsigned int        subface,
+                                    FullMatrix<double>       &matrix) const;
+
+  /**
+   * Return whether this element dominates the one given as argument when they
+   * meet at a common face, whether it is the other way around, whether
+   * neither dominates, or if either could dominate.
+   *
+   * For a definition of domination, see FiniteElementBase::Domination and in
+   * particular the
+   * @ref hp_paper "hp paper".
+   */
+  virtual
+  FiniteElementDomination::Domination
+  compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const;
+
+private:
+  /**
+   * Store a copy of FE_Q for delegating the hp-constraints functionality.
+   */
+  FE_Q<dim, spacedim> fe_q;
+
+  /**
+   * Return vector with dofs per vertex, line, quad, hex.
+   */
+  static std::vector<unsigned int> get_dpo_vector (const unsigned int deg);
+};
+
+
+
+/**
+ * FE_TraceQ in 1D, i.e., with degrees of freedom on the element vertices.
+ */
+template <int spacedim>
+class FE_TraceQ<1,spacedim> : public FE_FaceQ<1,spacedim>
+{
+public:
+  /**
+   * Constructor.
+   */
+  FE_TraceQ (const unsigned int p);
+
+  /**
+   * Returns the name of the element
+   */
+  std::string get_name() const;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_update_flags.h b/include/deal.II/fe/fe_update_flags.h
new file mode 100644
index 0000000..9ae9590
--- /dev/null
+++ b/include/deal.II/fe/fe_update_flags.h
@@ -0,0 +1,584 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_update_flags_h
+#define dealii__fe_update_flags_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/tensor.h>
+
+#include <vector>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int,int> class FiniteElement;
+
+
+/*!@addtogroup feaccess */
+/*@{*/
+
+/**
+ * The enum type given to the constructors of FEValues, FEFaceValues and
+ * FESubfaceValues, telling those objects which data will be needed on each
+ * mesh cell.
+ *
+ * Selecting these flags in a restrictive way is crucial for the efficiency of
+ * FEValues::reinit(), FEFaceValues::reinit() and FESubfaceValues::reinit().
+ * Therefore, only the flags actually needed should be selected. It is the
+ * responsibility of the involved Mapping and FiniteElement to add additional
+ * flags according to their own requirements. For instance, most finite
+ * elements will add #update_covariant_transformation if #update_gradients is
+ * selected.  By default, all flags are off, i.e. no reinitialization will be
+ * done.
+ *
+ * You can select more than one flag by concatenation using the bitwise or
+ * operator|(UpdateFlags,UpdateFlags).
+ *
+ * <h3>Use of these flags flags</h3>
+ *
+ * More information on the use of this type both in user code as well as
+ * internally can be found in the documentation modules on
+ * @ref UpdateFlags "The interplay of UpdateFlags, Mapping, and FiniteElement in FEValues"
+ * and
+ * @ref FE_vs_Mapping_vs_FEValues "How Mapping, FiniteElement, and FEValues work together".
+ */
+enum UpdateFlags
+{
+  //! No update
+  update_default = 0,
+  //! Shape function values
+  /**
+   * Compute the values of the shape functions at the quadrature points on the
+   * real space cell. For the usual Lagrange elements, these values are equal
+   * to the values of the shape functions at the quadrature points on the unit
+   * cell, but they are different for more complicated elements, such as
+   * FE_RaviartThomas elements.
+   */
+  update_values = 0x0001,
+  //! Shape function gradients
+  /**
+   * Compute the gradients of the shape functions in coordinates of the real
+   * cell.
+   */
+  update_gradients = 0x0002,
+  //! Second derivatives of shape functions
+  /**
+   * Compute the second derivatives of the shape functions in coordinates of
+   * the real cell.
+   */
+  update_hessians = 0x0004,
+  //! Third derivatives of shape functions
+  /**
+   * Compute the third derivatives of the shape functions in coordinates of
+   * the real cell
+   */
+  update_3rd_derivatives = 0x0008,
+  //! Outer normal vector, not normalized
+  /**
+   * Vector product of tangential vectors, yielding a normal vector with a
+   * length corresponding to the surface element; may be more efficient than
+   * computing both.
+   */
+  update_boundary_forms = 0x0010,
+  //! Transformed quadrature points
+  /**
+   * Compute the quadrature points transformed into real cell coordinates.
+   */
+  update_quadrature_points = 0x0020,
+  //! Transformed quadrature weights
+  /**
+   * Compute the quadrature weights on the real cell, i.e. the weights of the
+   * quadrature rule multiplied with the determinant of the Jacobian of the
+   * transformation from reference to real cell.
+   */
+  update_JxW_values = 0x0040,
+  //! Normal vectors
+  /**
+   * Compute the normal vectors, either for a face or for a cell of
+   * codimension one. Setting this flag for any other object will raise an
+   * error.
+   */
+  update_normal_vectors = 0x0080,
+  /**
+   * @deprecated Use #update_normal_vectors instead.
+   */
+  update_face_normal_vectors = update_normal_vectors,
+  /**
+   * @deprecated Use #update_normal_vectors instead.
+   */
+  update_cell_normal_vectors = update_normal_vectors,
+  //! Volume element
+  /**
+   * Compute the Jacobian of the transformation from the reference cell to the
+   * real cell.
+   */
+  update_jacobians = 0x0100,
+  //! Gradient of volume element
+  /**
+   * Compute the derivatives of the Jacobian of the transformation.
+   */
+  update_jacobian_grads = 0x0200,
+  //! Volume element
+  /**
+   * Compute the inverse Jacobian of the transformation from the reference
+   * cell to the real cell.
+   */
+  update_inverse_jacobians = 0x0400,
+  //! Covariant transformation
+  /**
+   * Compute all values the Mapping needs to perform a contravariant
+   * transformation of vectors. For special mappings like MappingCartesian
+   * this may be simpler than #update_inverse_jacobians.
+   */
+  update_covariant_transformation = 0x0800,
+  //! Contravariant transformation
+  /**
+   * Compute all values the Mapping needs to perform a contravariant
+   * transformation of vectors. For special mappings like MappingCartesian
+   * this may be simpler than #update_jacobians.
+   */
+  update_contravariant_transformation = 0x1000,
+  //! Shape function values of transformation
+  /**
+   * Compute the shape function values of the transformation defined by the
+   * Mapping.
+   */
+  update_transformation_values = 0x2000,
+  //! Shape function gradients of transformation
+  /**
+   * Compute the shape function gradients of the transformation defined by the
+   * Mapping.
+   */
+  update_transformation_gradients = 0x4000,
+  //! Determinant of the Jacobian
+  /**
+   * Compute the volume element in each quadrature point.
+   */
+  update_volume_elements = 0x10000,
+  /**
+   * Compute the derivatives of the Jacobian of the transformation pushed
+   * forward to the real cell coordinates.
+   */
+  update_jacobian_pushed_forward_grads = 0x100000,
+  /**
+   * Compute the second derivatives of the Jacobian of the transformation.
+   */
+  update_jacobian_2nd_derivatives = 0x200000,
+  /**
+   * Compute the second derivatives of the Jacobian of the transformation
+   * pushed forward to the real cell coordinates.
+   */
+  update_jacobian_pushed_forward_2nd_derivatives = 0x400000,
+  /**
+   * Compute the third derivatives of the Jacobian of the transformation.
+   */
+  update_jacobian_3rd_derivatives = 0x800000,
+  /**
+   * Compute the third derivatives of the Jacobian of the transformation
+   * pushed forward to the real cell coordinates.
+   */
+  update_jacobian_pushed_forward_3rd_derivatives = 0x1000000,
+  /**
+   * @deprecated Update quadrature points
+   */
+  update_q_points = update_quadrature_points,
+  /**
+   * @deprecated Use #update_hessians instead.
+   */
+  update_second_derivatives = update_hessians,
+  //! Values needed for Piola transform
+  /**
+   * Combination of the flags needed for Piola transform of Hdiv elements.
+   */
+  update_piola = update_volume_elements | update_contravariant_transformation
+};
+
+
+/**
+ * Output operator which outputs update flags as a set of or'd text values.
+ *
+ * @ref UpdateFlags
+ */
+template <class StreamType>
+inline
+StreamType &operator << (StreamType &s, UpdateFlags u)
+{
+  s << " UpdateFlags|";
+  if (u & update_values)                                  s << "values|";
+  if (u & update_gradients)                               s << "gradients|";
+  if (u & update_hessians)                                s << "hessians|";
+  if (u & update_3rd_derivatives)                         s << "3rd_derivatives|";
+  if (u & update_quadrature_points)                       s << "quadrature_points|";
+  if (u & update_JxW_values)                              s << "JxW_values|";
+  if (u & update_normal_vectors)                          s << "normal_vectors|";
+  if (u & update_jacobians)                               s << "jacobians|";
+  if (u & update_inverse_jacobians)                       s << "inverse_jacobians|";
+  if (u & update_jacobian_grads)                          s << "jacobian_grads|";
+  if (u & update_covariant_transformation)                s << "covariant_transformation|";
+  if (u & update_contravariant_transformation)            s << "contravariant_transformation|";
+  if (u & update_transformation_values)                   s << "transformation_values|";
+  if (u & update_transformation_gradients)                s << "transformation_gradients|";
+  if (u & update_jacobian_pushed_forward_grads)           s << "jacobian_pushed_forward_grads|";
+  if (u & update_jacobian_2nd_derivatives)                s << "jacobian_2nd_derivatives|";
+  if (u & update_jacobian_pushed_forward_2nd_derivatives) s << "jacobian_pushed_forward_2nd_derivatives|";
+  if (u &update_jacobian_3rd_derivatives)                 s << "jacobian_3rd_derivatives|";
+  if (u & update_jacobian_pushed_forward_3rd_derivatives) s << "jacobian_pushed_forward_3rd_derivatives|";
+
+//TODO: check that 'u' really only has the flags set that are handled above
+  return s;
+}
+
+
+/**
+ * Global operator which returns an object in which all bits are set which are
+ * either set in the first or the second argument. This operator exists since
+ * if it did not then the result of the bit-or <tt>operator |</tt> would be an
+ * integer which would in turn trigger a compiler warning when we tried to
+ * assign it to an object of type UpdateFlags.
+ *
+ * @ref UpdateFlags
+ */
+inline
+UpdateFlags
+operator | (UpdateFlags f1, UpdateFlags f2)
+{
+  return static_cast<UpdateFlags> (
+           static_cast<unsigned int> (f1) |
+           static_cast<unsigned int> (f2));
+}
+
+
+
+
+/**
+ * Global operator which sets the bits from the second argument also in the
+ * first one.
+ *
+ * @ref UpdateFlags
+ */
+inline
+UpdateFlags &
+operator |= (UpdateFlags &f1, UpdateFlags f2)
+{
+  f1 = f1 | f2;
+  return f1;
+}
+
+
+/**
+ * Global operator which returns an object in which all bits are set which are
+ * set in the first as well as the second argument. This operator exists since
+ * if it did not then the result of the bit-and <tt>operator &</tt> would be
+ * an integer which would in turn trigger a compiler warning when we tried to
+ * assign it to an object of type UpdateFlags.
+ *
+ * @ref UpdateFlags
+ */
+inline
+UpdateFlags
+operator & (UpdateFlags f1, UpdateFlags f2)
+{
+  return static_cast<UpdateFlags> (
+           static_cast<unsigned int> (f1) &
+           static_cast<unsigned int> (f2));
+}
+
+
+/**
+ * Global operator which clears all the bits in the first argument if they are
+ * not also set in the second argument.
+ *
+ * @ref UpdateFlags
+ */
+inline
+UpdateFlags &
+operator &= (UpdateFlags &f1, UpdateFlags f2)
+{
+  f1 = f1 & f2;
+  return f1;
+}
+
+
+
+/**
+ * This enum definition is used for storing similarities of the current cell
+ * to the previously visited cell. This information is used for reusing data
+ * when calling the method FEValues::reinit() (like derivatives, which do not
+ * change if one cell is just a translation of the previous). Currently, this
+ * variable does only recognize a translation and an inverted translation (if
+ * dim<spacedim). However, this concept makes it easy to add additional states
+ * to be detected in FEValues/FEFaceValues for making use of these
+ * similarities as well.
+ */
+namespace CellSimilarity
+{
+  enum Similarity
+  {
+    none,
+    translation,
+    inverted_translation,
+    invalid_next_cell
+  };
+}
+
+
+namespace internal
+{
+  namespace FEValues
+  {
+    /**
+     * A class that stores all of the mapping related data used in
+     * dealii::FEValues, dealii::FEFaceValues, and dealii::FESubfaceValues
+     * objects. Objects of this kind will be given as <i>output</i> argument
+     * when dealii::FEValues::reinit() calls Mapping::fill_fe_values() for a
+     * given cell, face, or subface.
+     *
+     * The data herein will then be provided as <i>input</i> argument in the
+     * following call to FiniteElement::fill_fe_values().
+     *
+     * @ingroup feaccess
+     */
+    template <int dim, int spacedim=dim>
+    class MappingRelatedData
+    {
+    public:
+      /**
+       * Initialize all vectors to correct size.
+       */
+      void initialize (const unsigned int n_quadrature_points,
+                       const UpdateFlags  flags);
+
+      /**
+       * Compute and return an estimate for the memory consumption (in bytes)
+       * of this object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Store an array of weights times the Jacobi determinant at the
+       * quadrature points. This function is reset each time reinit() is
+       * called. The Jacobi determinant is actually the reciprocal value of
+       * the Jacobi matrices stored in this class, see the general
+       * documentation of this class for more information.
+       *
+       * However, if this object refers to an FEFaceValues or FESubfaceValues
+       * object, then the JxW_values correspond to the Jacobian of the
+       * transformation of the face, not the cell, i.e. the dimensionality is
+       * that of a surface measure, not of a volume measure. In this case, it
+       * is computed from the boundary forms, rather than the Jacobian matrix.
+       */
+      std::vector<double>       JxW_values;
+
+      /**
+       * Array of the Jacobian matrices at the quadrature points.
+       */
+      std::vector< DerivativeForm<1,dim,spacedim> > jacobians;
+
+      /**
+       * Array of the derivatives of the Jacobian matrices at the quadrature
+       * points.
+       */
+      std::vector<DerivativeForm<2,dim,spacedim> >  jacobian_grads;
+
+      /**
+       * Array of the inverse Jacobian matrices at the quadrature points.
+       */
+      std::vector<DerivativeForm<1,spacedim,dim> > inverse_jacobians;
+
+      /**
+       * Array of the derivatives of the Jacobian matrices at the quadrature
+       * points, pushed forward to the real cell coordinates.
+       */
+      std::vector<Tensor<3,spacedim> > jacobian_pushed_forward_grads;
+
+      /**
+       * Array of the second derivatives of the Jacobian matrices at the
+       * quadrature points.
+       */
+      std::vector<DerivativeForm<3,dim,spacedim> > jacobian_2nd_derivatives;
+
+      /**
+       * Array of the  second derivatives of the Jacobian matrices at the
+       * quadrature points, pushed forward to the real cell coordinates.
+       */
+      std::vector<Tensor<4,spacedim> > jacobian_pushed_forward_2nd_derivatives;
+
+      /**
+       * Array of the  third derivatives of the Jacobian matrices at the
+       * quadrature points.
+       */
+      std::vector<DerivativeForm<4,dim,spacedim> > jacobian_3rd_derivatives;
+
+      /**
+       * Array of the  third derivatives of the Jacobian matrices at the
+       * quadrature points, pushed forward to the real cell coordinates.
+       */
+      std::vector<Tensor<5,spacedim> > jacobian_pushed_forward_3rd_derivatives;
+
+      /**
+       * Array of quadrature points. This array is set up upon calling
+       * reinit() and contains the quadrature points on the real element,
+       * rather than on the reference element.
+       */
+      std::vector<Point<spacedim> >  quadrature_points;
+
+      /**
+       * List of outward normal vectors at the quadrature points.
+       */
+      std::vector<Tensor<1,spacedim> >  normal_vectors;
+
+      /**
+       * List of boundary forms at the quadrature points.
+       */
+      std::vector<Tensor<1,spacedim> >  boundary_forms;
+    };
+
+
+    /**
+     * A class that stores all of the shape function related data used in
+     * dealii::FEValues, dealii::FEFaceValues, and dealii::FESubfaceValues
+     * objects. Objects of this kind will be given as <i>output</i> argument
+     * when dealii::FEValues::reinit() calls FiniteElement::fill_fe_values().
+     *
+     * @ingroup feaccess
+     */
+    template <int dim, int spacedim=dim>
+    class FiniteElementRelatedData
+    {
+    public:
+      /**
+       * Initialize all vectors to correct size.
+       */
+      void initialize (const unsigned int        n_quadrature_points,
+                       const FiniteElement<dim,spacedim> &fe,
+                       const UpdateFlags         flags);
+
+      /**
+       * Compute and return an estimate for the memory consumption (in bytes)
+       * of this object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Storage type for shape values. Each row in the matrix denotes the
+       * values of a single shape function at the different points, columns
+       * are for a single point with the different shape functions.
+       *
+       * If a shape function has more than one non-zero component (in deal.II
+       * diction: it is non-primitive), then we allocate one row per non-zero
+       * component, and shift subsequent rows backward.  Lookup of the correct
+       * row for a shape function is thus simple in case the entire finite
+       * element is primitive (i.e. all shape functions are primitive), since
+       * then the shape function number equals the row number. Otherwise, use
+       * the #shape_function_to_row_table array to get at the first row that
+       * belongs to this particular shape function, and navigate among all the
+       * rows for this shape function using the
+       * FiniteElement::get_nonzero_components() function which tells us which
+       * components are non-zero and thus have a row in the array presently
+       * under discussion.
+       */
+      typedef dealii::Table<2,double> ShapeVector;
+
+      /**
+       * Storage type for gradients. The layout of data is the same as for the
+       * #ShapeVector data type.
+       */
+      typedef dealii::Table<2,Tensor<1,spacedim> > GradientVector;
+
+      /**
+       * Likewise for second order derivatives.
+       */
+      typedef dealii::Table<2,Tensor<2,spacedim> > HessianVector;
+
+      /**
+       * And the same also applies to the third order derivatives.
+       */
+      typedef dealii::Table<2,Tensor<3,spacedim> > ThirdDerivativeVector;
+
+      /**
+       * Store the values of the shape functions at the quadrature points. See
+       * the description of the data type for the layout of the data in this
+       * field.
+       */
+      ShapeVector shape_values;
+
+      /**
+       * Store the gradients of the shape functions at the quadrature points.
+       * See the description of the data type for the layout of the data in
+       * this field.
+       */
+      GradientVector shape_gradients;
+
+      /**
+       * Store the 2nd derivatives of the shape functions at the quadrature
+       * points.  See the description of the data type for the layout of the
+       * data in this field.
+       */
+      HessianVector shape_hessians;
+
+      /**
+       * Store the 3nd derivatives of the shape functions at the quadrature
+       * points.  See the description of the data type for the layout of the
+       * data in this field.
+       */
+      ThirdDerivativeVector shape_3rd_derivatives;
+
+      /**
+       * When asked for the value (or gradient, or Hessian) of shape function
+       * i's c-th vector component, we need to look it up in the
+       * #shape_values, #shape_gradients and #shape_hessians arrays.  The
+       * question is where in this array does the data for shape function i,
+       * component c reside. This is what this table answers.
+       *
+       * The format of the table is as follows: - It has dofs_per_cell times
+       * n_components entries. - The entry that corresponds to shape function
+       * i, component c is <code>i * n_components + c</code>. - The value
+       * stored at this position indicates the row in #shape_values and the
+       * other tables where the corresponding datum is stored for all the
+       * quadrature points.
+       *
+       * In the general, vector-valued context, the number of components is
+       * larger than one, but for a given shape function, not all vector
+       * components may be nonzero (e.g., if a shape function is primitive,
+       * then exactly one vector component is non-zero, while the others are
+       * all zero). For such zero components, #shape_values and friends do not
+       * have a row. Consequently, for vector components for which shape
+       * function i is zero, the entry in the current table is
+       * numbers::invalid_unsigned_int.
+       *
+       * On the other hand, the table is guaranteed to have at least one valid
+       * index for each shape function. In particular, for a primitive finite
+       * element, each shape function has exactly one nonzero component and so
+       * for each i, there is exactly one valid index within the range
+       * <code>[i*n_components, (i+1)*n_components)</code>.
+       */
+      std::vector<unsigned int> shape_function_to_row_table;
+    };
+  }
+}
+
+
+/*@}*/
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_values.h b/include/deal.II/fe/fe_values.h
new file mode 100644
index 0000000..f01219e
--- /dev/null
+++ b/include/deal.II/fe/fe_values.h
@@ -0,0 +1,4704 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_values_h
+#define dealii__fe_values_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/symmetric_tensor.h>
+#include <deal.II/base/vector_slice.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_update_flags.h>
+#include <deal.II/fe/fe_values_extractors.h>
+#include <deal.II/fe/mapping.h>
+
+#include <algorithm>
+
+// dummy include in order to have the
+// definition of PetscScalar available
+// without including other PETSc stuff
+#ifdef DEAL_II_WITH_PETSC
+#  include <petsc.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim>   class Quadrature;
+template <int dim, int spacedim=dim> class FEValuesBase;
+
+template <typename Number> class Vector;
+template <typename Number> class BlockVector;
+
+
+namespace internal
+{
+  /**
+   * A class whose specialization is used to define what type the curl of a
+   * vector valued function corresponds to.
+   */
+  template <int dim>
+  struct CurlType;
+
+  /**
+   * A class whose specialization is used to define what type the curl of a
+   * vector valued function corresponds to.
+   *
+   * In 1d, the curl is a scalar.
+   */
+  template <>
+  struct CurlType<1>
+  {
+    typedef Tensor<1,1>     type;
+  };
+
+  /**
+   * A class whose specialization is used to define what type the curl of a
+   * vector valued function corresponds to.
+   *
+   * In 2d, the curl is a scalar.
+   */
+  template <>
+  struct CurlType<2>
+  {
+    typedef Tensor<1,1>     type;
+  };
+
+  /**
+   * A class whose specialization is used to define what type the curl of a
+   * vector valued function corresponds to.
+   *
+   * In 3d, the curl is a vector.
+   */
+  template <>
+  struct CurlType<3>
+  {
+    typedef Tensor<1,3>     type;
+  };
+}
+
+
+
+
+/**
+ * A namespace for "views" on a FEValues, FEFaceValues, or FESubfaceValues
+ * object. A view represents only a certain part of the whole: whereas the
+ * FEValues object represents <i>all</i> values, gradients, or second
+ * derivatives of all components of a vector-valued element, views restrict
+ * the attention to only a single component or a subset of components. You
+ * typically get objects of classes defined in this namespace by applying
+ * FEValuesExtractors objects to a FEValues, FEFaceValues or FESubfaceValues
+ * objects using the square bracket operator.
+ *
+ * There are classes that present views for single scalar components, vector
+ * components consisting of <code>dim</code> elements, and symmetric second
+ * order tensor components consisting of <code>(dim*dim + dim)/2</code>
+ * elements
+ *
+ * See the description of the
+ * @ref vector_valued
+ * module for examples how to use the features of this namespace.
+ *
+ * @ingroup feaccess vector_valued
+ */
+namespace FEValuesViews
+{
+  /**
+   * A class representing a view to a single scalar component of a possibly
+   * vector-valued finite element. Views are discussed in the
+   * @ref vector_valued
+   * module.
+   *
+   * You get an object of this type if you apply a FEValuesExtractors::Scalar
+   * to an FEValues, FEFaceValues or FESubfaceValues object.
+   *
+   * @ingroup feaccess vector_valued
+   */
+  template <int dim, int spacedim=dim>
+  class Scalar
+  {
+  public:
+    /**
+     * A typedef for the data type of values of the view this class
+     * represents. Since we deal with a single components, the value type is a
+     * scalar double.
+     */
+    typedef double        value_type;
+
+    /**
+     * A typedef for the type of gradients of the view this class represents.
+     * Here, for a scalar component of the finite element, the gradient is a
+     * <code>Tensor@<1,dim@></code>.
+     */
+    typedef dealii::Tensor<1,spacedim> gradient_type;
+
+    /**
+     * A typedef for the type of second derivatives of the view this class
+     * represents. Here, for a scalar component of the finite element, the
+     * Hessian is a <code>Tensor@<2,dim@></code>.
+     */
+    typedef dealii::Tensor<2,spacedim> hessian_type;
+
+    /**
+     * A typedef for the type of third derivatives of the view this class
+     * represents. Here, for a scalar component of the finite element, the
+     * Third derivative is a <code>Tensor@<3,dim@></code>.
+     */
+    typedef dealii::Tensor<3,spacedim> third_derivative_type;
+
+    /**
+     * A structure where for each shape function we pre-compute a bunch of
+     * data that will make later accesses much cheaper.
+     */
+    struct ShapeFunctionData
+    {
+      /**
+       * For each shape function, store whether the selected vector component
+       * may be nonzero. For primitive shape functions we know for sure
+       * whether a certain scalar component of a given shape function is
+       * nonzero, whereas for non-primitive shape functions this may not be
+       * entirely clear (e.g. for RT elements it depends on the shape of a
+       * cell).
+       */
+      bool is_nonzero_shape_function_component;
+
+      /**
+       * For each shape function, store the row index within the shape_values,
+       * shape_gradients, and shape_hessians tables (the column index is the
+       * quadrature point index). If the shape function is primitive, then we
+       * can get this information from the shape_function_to_row_table of the
+       * FEValues object; otherwise, we have to work a bit harder to compute
+       * this information.
+       */
+      unsigned int row_index;
+    };
+
+    /**
+     * Default constructor. Creates an invalid object.
+     */
+    Scalar ();
+
+    /**
+     * Constructor for an object that represents a single scalar component of
+     * a FEValuesBase object (or of one of the classes derived from
+     * FEValuesBase).
+     */
+    Scalar (const FEValuesBase<dim,spacedim> &fe_values_base,
+            const unsigned int       component);
+
+    /**
+     * Copy operator. This is not a lightweight object so we don't allow
+     * copying and generate an exception if this function is called.
+     */
+    Scalar &operator= (const Scalar<dim,spacedim> &);
+
+    /**
+     * Return the value of the vector component selected by this view, for the
+     * shape function and quadrature point selected by the arguments.
+     *
+     * @param shape_function Number of the shape function to be evaluated.
+     * Note that this number runs from zero to dofs_per_cell, even in the case
+     * of an FEFaceValues or FESubfaceValues object.
+     *
+     * @param q_point Number of the quadrature point at which function is to
+     * be evaluated.
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    value_type
+    value (const unsigned int shape_function,
+           const unsigned int q_point) const;
+
+    /**
+     * Return the gradient (a tensor of rank 1) of the vector component
+     * selected by this view, for the shape function and quadrature point
+     * selected by the arguments.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    gradient_type
+    gradient (const unsigned int shape_function,
+              const unsigned int q_point) const;
+
+    /**
+     * Return the Hessian (the tensor of rank 2 of all second derivatives) of
+     * the vector component selected by this view, for the shape function and
+     * quadrature point selected by the arguments.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_hessians}
+     */
+    hessian_type
+    hessian (const unsigned int shape_function,
+             const unsigned int q_point) const;
+
+    /**
+     * Return the tensor of rank 3 of all third derivatives of the vector
+     * component selected by this view, for the shape function and quadrature
+     * point selected by the arguments.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_third_derivatives}
+     */
+    third_derivative_type
+    third_derivative (const unsigned int shape_function,
+                      const unsigned int q_point) const;
+
+    /**
+     * Return the values of the selected scalar component of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_values function but it only works on the
+     * selected scalar component.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the values of shape functions (i.e., @p value_type) times the
+     * type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    template <class InputVector>
+    void get_function_values (const InputVector &fe_function,
+                              std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const;
+
+    /**
+     * Return the gradients of the selected scalar component of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_gradients function but it only works on the
+     * selected scalar component.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the gradients of shape functions (i.e., @p gradient_type)
+     * times the type used to store the values of the unknowns $U_j$ of your
+     * finite element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    template <class InputVector>
+    void get_function_gradients (const InputVector &fe_function,
+                                 std::vector<typename ProductType<gradient_type,typename InputVector::value_type>::type> &gradients) const;
+
+    /**
+     * Return the Hessians of the selected scalar component of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_hessians function but it only works on the
+     * selected scalar component.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the Hessians of shape functions (i.e., @p hessian_type) times
+     * the type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_hessians}
+     */
+    template <class InputVector>
+    void get_function_hessians (const InputVector &fe_function,
+                                std::vector<typename ProductType<hessian_type,typename InputVector::value_type>::type> &hessians) const;
+
+    /**
+     * Return the Laplacians of the selected scalar component of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called. The
+     * Laplacians are the trace of the Hessians.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_laplacians function but it only works on the
+     * selected scalar component.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the Laplacians of shape functions (i.e., @p value_type) times
+     * the type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_hessians}
+     */
+    template <class InputVector>
+    void get_function_laplacians (const InputVector &fe_function,
+                                  std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &laplacians) const;
+
+    /**
+     * Return the third derivatives of the selected scalar component of the
+     * finite element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_third_derivatives function but it only works
+     * on the selected scalar component.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the third derivatives of shape functions (i.e., @p
+     * third_derivative_type) times the type used to store the values of the
+     * unknowns $U_j$ of your finite element vector $U$ (represented by the @p
+     * fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_third_derivatives}
+     */
+    template <class InputVector>
+    void get_function_third_derivatives (const InputVector &fe_function,
+                                         std::vector<typename ProductType<third_derivative_type,
+                                         typename InputVector::value_type>::type> &third_derivatives) const;
+
+  private:
+    /**
+     * A reference to the FEValuesBase object we operate on.
+     */
+    const FEValuesBase<dim,spacedim> &fe_values;
+
+    /**
+     * The single scalar component this view represents of the FEValuesBase
+     * object.
+     */
+    const unsigned int component;
+
+    /**
+     * Store the data about shape functions.
+     */
+    std::vector<ShapeFunctionData> shape_function_data;
+  };
+
+
+
+  /**
+   * A class representing a view to a set of <code>spacedim</code> components
+   * forming a vector part of a vector-valued finite element. Views are
+   * discussed in the
+   * @ref vector_valued
+   * module.
+   *
+   * Note that in the current context, a vector is meant in the sense physics
+   * uses it: it has <code>spacedim</code> components that behave in specific
+   * ways under coordinate system transformations. Examples include velocity
+   * or displacement fields. This is opposed to how mathematics uses the word
+   * "vector" (and how we use this word in other contexts in the library, for
+   * example in the Vector class), where it really stands for a collection of
+   * numbers. An example of this latter use of the word could be the set of
+   * concentrations of chemical species in a flame; however, these are really
+   * just a collection of scalar variables, since they do not change if the
+   * coordinate system is rotated, unlike the components of a velocity vector,
+   * and consequently, this class should not be used for this context.
+   *
+   * This class allows to query the value, gradient and divergence of
+   * (components of) shape functions and solutions representing vectors. The
+   * gradient of a vector $d_{k}, 0\le k<\text{dim}$ is defined as $S_{ij} =
+   * \frac{\partial d_{i}}{\partial x_j}, 0\le i,j<\text{dim}$.
+   *
+   * You get an object of this type if you apply a FEValuesExtractors::Vector
+   * to an FEValues, FEFaceValues or FESubfaceValues object.
+   *
+   * @ingroup feaccess vector_valued
+   */
+  template <int dim, int spacedim=dim>
+  class Vector
+  {
+  public:
+    /**
+     * A typedef for the data type of values of the view this class
+     * represents. Since we deal with a set of <code>dim</code> components,
+     * the value type is a Tensor<1,spacedim>.
+     */
+    typedef dealii::Tensor<1,spacedim>          value_type;
+
+    /**
+     * A typedef for the type of gradients of the view this class represents.
+     * Here, for a set of <code>dim</code> components of the finite element,
+     * the gradient is a <code>Tensor@<2,spacedim@></code>.
+     *
+     * See the general documentation of this class for how exactly the
+     * gradient of a vector is defined.
+     */
+    typedef dealii::Tensor<2,spacedim>          gradient_type;
+
+    /**
+     * A typedef for the type of symmetrized gradients of the view this class
+     * represents. Here, for a set of <code>dim</code> components of the
+     * finite element, the symmetrized gradient is a
+     * <code>SymmetricTensor@<2,spacedim@></code>.
+     *
+     * The symmetric gradient of a vector field $\mathbf v$ is defined as
+     * $\varepsilon(\mathbf v)=\frac 12 (\nabla \mathbf v + \nabla \mathbf
+     * v^T)$.
+     */
+    typedef dealii::SymmetricTensor<2,spacedim> symmetric_gradient_type;
+
+    /**
+     * A typedef for the type of the divergence of the view this class
+     * represents. Here, for a set of <code>dim</code> components of the
+     * finite element, the divergence of course is a scalar.
+     */
+    typedef double                 divergence_type;
+
+    /**
+     * A typedef for the type of the curl of the view this class represents.
+     * Here, for a set of <code>spacedim=2</code> components of the finite
+     * element, the curl is a <code>Tensor@<1, 1@></code>. For
+     * <code>spacedim=3</code> it is a <code>Tensor@<1, dim@></code>.
+     */
+    typedef typename dealii::internal::CurlType<spacedim>::type   curl_type;
+
+    /**
+     * A typedef for the type of second derivatives of the view this class
+     * represents. Here, for a set of <code>dim</code> components of the
+     * finite element, the Hessian is a <code>Tensor@<3,dim@></code>.
+     */
+    typedef dealii::Tensor<3,spacedim>          hessian_type;
+
+    /**
+     * A typedef for the type of third derivatives of the view this class
+     * represents. Here, for a set of <code>dim</code> components of the
+     * finite element, the third derivative is a <code>Tensor@<4,dim@></code>.
+     */
+    typedef dealii::Tensor<4,spacedim>          third_derivative_type;
+
+    /**
+     * A structure where for each shape function we pre-compute a bunch of
+     * data that will make later accesses much cheaper.
+     */
+    struct ShapeFunctionData
+    {
+      /**
+       * For each pair (shape function,component within vector), store whether
+       * the selected vector component may be nonzero. For primitive shape
+       * functions we know for sure whether a certain scalar component of a
+       * given shape function is nonzero, whereas for non-primitive shape
+       * functions this may not be entirely clear (e.g. for RT elements it
+       * depends on the shape of a cell).
+       */
+      bool is_nonzero_shape_function_component[spacedim];
+
+      /**
+       * For each pair (shape function, component within vector), store the
+       * row index within the shape_values, shape_gradients, and
+       * shape_hessians tables (the column index is the quadrature point
+       * index). If the shape function is primitive, then we can get this
+       * information from the shape_function_to_row_table of the FEValues
+       * object; otherwise, we have to work a bit harder to compute this
+       * information.
+       */
+      unsigned int row_index[spacedim];
+
+      /**
+       * For each shape function say the following: if only a single entry in
+       * is_nonzero_shape_function_component for this shape function is
+       * nonzero, then store the corresponding value of row_index and
+       * single_nonzero_component_index represents the index between 0 and dim
+       * for which it is attained. If multiple components are nonzero, then
+       * store -1. If no components are nonzero then store -2.
+       */
+      int          single_nonzero_component;
+      unsigned int single_nonzero_component_index;
+    };
+
+    /**
+     * Default constructor. Creates an invalid object.
+     */
+    Vector ();
+
+    /**
+     * Constructor for an object that represents dim components of a
+     * FEValuesBase object (or of one of the classes derived from
+     * FEValuesBase), representing a vector-valued variable.
+     *
+     * The second argument denotes the index of the first component of the
+     * selected vector.
+     */
+    Vector (const FEValuesBase<dim,spacedim> &fe_values_base,
+            const unsigned int first_vector_component);
+
+    /**
+     * Copy operator. This is not a lightweight object so we don't allow
+     * copying and generate an exception if this function is called.
+     */
+    Vector &operator= (const Vector<dim,spacedim> &);
+
+    /**
+     * Return the value of the vector components selected by this view, for
+     * the shape function and quadrature point selected by the arguments.
+     * Here, since the view represents a vector-valued part of the FEValues
+     * object with <code>dim</code> components, the return type is a tensor of
+     * rank 1 with <code>dim</code> components.
+     *
+     * @param shape_function Number of the shape function to be evaluated.
+     * Note that this number runs from zero to dofs_per_cell, even in the case
+     * of an FEFaceValues or FESubfaceValues object.
+     *
+     * @param q_point Number of the quadrature point at which function is to
+     * be evaluated.
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    value_type
+    value (const unsigned int shape_function,
+           const unsigned int q_point) const;
+
+    /**
+     * Return the gradient (a tensor of rank 2) of the vector component
+     * selected by this view, for the shape function and quadrature point
+     * selected by the arguments.
+     *
+     * See the general documentation of this class for how exactly the
+     * gradient of a vector is defined.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    gradient_type
+    gradient (const unsigned int shape_function,
+              const unsigned int q_point) const;
+
+    /**
+     * Return the symmetric gradient (a symmetric tensor of rank 2) of the
+     * vector component selected by this view, for the shape function and
+     * quadrature point selected by the arguments.
+     *
+     * The symmetric gradient is defined as $\frac 12 [(\nabla \phi_i(x_q)) +
+     * (\nabla \phi_i(x_q))^T]$, where $\phi_i$ represents the
+     * <code>dim</code> components selected from the FEValuesBase object, and
+     * $x_q$ is the location of the $q$-th quadrature point.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    symmetric_gradient_type
+    symmetric_gradient (const unsigned int shape_function,
+                        const unsigned int q_point) const;
+
+    /**
+     * Return the scalar divergence of the vector components selected by this
+     * view, for the shape function and quadrature point selected by the
+     * arguments.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    divergence_type
+    divergence (const unsigned int shape_function,
+                const unsigned int q_point) const;
+
+    /**
+     * Return the vector curl of the vector components selected by this view,
+     * for the shape function and quadrature point selected by the arguments.
+     * For 1d this function does not make any sense. Thus it is not
+     * implemented for <code>spacedim=1</code>.  In 2d the curl is defined as
+     * @f{equation*}{
+     * \operatorname{curl}(u):=\frac{du_2}{dx} -\frac{du_1}{dy},
+     * @f}
+     * whereas in 3d it is given by
+     * @f{equation*}{
+     * \operatorname{curl}(u):=\left( \begin{array}{c}
+     * \frac{du_3}{dy}-\frac{du_2}{dz}\\ \frac{du_1}{dz}-\frac{du_3}{dx}\\
+     * \frac{du_2}{dx}-\frac{du_1}{dy} \end{array} \right).
+     * @f}
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    curl_type
+    curl (const unsigned int shape_function,
+          const unsigned int q_point) const;
+
+    /**
+     * Return the Hessian (the tensor of rank 2 of all second derivatives) of
+     * the vector components selected by this view, for the shape function and
+     * quadrature point selected by the arguments.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_hessians}
+     */
+    hessian_type
+    hessian (const unsigned int shape_function,
+             const unsigned int q_point) const;
+
+    /**
+     * Return the tensor of rank 3 of all third derivatives of the vector
+     * components selected by this view, for the shape function and quadrature
+     * point selected by the arguments.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_3rd_derivatives}
+     */
+    third_derivative_type
+    third_derivative (const unsigned int shape_function,
+                      const unsigned int q_point) const;
+
+    /**
+     * Return the values of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_values function but it only works on the
+     * selected vector components.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the values of shape functions (i.e., @p value_type) times the
+     * type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    template <class InputVector>
+    void get_function_values (const InputVector &fe_function,
+                              std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const;
+
+    /**
+     * Return the gradients of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_gradients function but it only works on the
+     * selected vector components.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the gradients of shape functions (i.e., @p gradient_type)
+     * times the type used to store the values of the unknowns $U_j$ of your
+     * finite element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    template <class InputVector>
+    void get_function_gradients (const InputVector &fe_function,
+                                 std::vector<typename ProductType<gradient_type,typename InputVector::value_type>::type> &gradients) const;
+
+    /**
+     * Return the symmetrized gradients of the selected vector components of
+     * the finite element function characterized by <tt>fe_function</tt> at
+     * the quadrature points of the cell, face or subface selected the last
+     * time the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * The symmetric gradient of a vector field $\mathbf v$ is defined as
+     * $\varepsilon(\mathbf v)=\frac 12 (\nabla \mathbf v + \nabla \mathbf
+     * v^T)$.
+     *
+     * @note There is no equivalent function such as
+     * FEValuesBase::get_function_symmetric_gradients in the FEValues classes
+     * but the information can be obtained from
+     * FEValuesBase::get_function_gradients, of course.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the symmetric gradients of shape functions (i.e., @p
+     * symmetric_gradient_type) times the type used to store the values of the
+     * unknowns $U_j$ of your finite element vector $U$ (represented by the @p
+     * fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    template <class InputVector>
+    void
+    get_function_symmetric_gradients (const InputVector &fe_function,
+                                      std::vector<typename ProductType<symmetric_gradient_type,typename InputVector::value_type>::type> &symmetric_gradients) const;
+
+    /**
+     * Return the divergence of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * There is no equivalent function such as
+     * FEValuesBase::get_function_divergences in the FEValues classes but the
+     * information can be obtained from FEValuesBase::get_function_gradients,
+     * of course.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the divergences of shape functions (i.e., @p divergence_type)
+     * times the type used to store the values of the unknowns $U_j$ of your
+     * finite element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    template <class InputVector>
+    void get_function_divergences (const InputVector &fe_function,
+                                   std::vector<typename ProductType<divergence_type,typename InputVector::value_type>::type> &divergences) const;
+
+    /**
+     * Return the curl of the selected vector components of the finite element
+     * function characterized by <tt>fe_function</tt> at the quadrature points
+     * of the cell, face or subface selected the last time the <tt>reinit</tt>
+     * function of the FEValues object was called.
+     *
+     * There is no equivalent function such as
+     * FEValuesBase::get_function_curls in the FEValues classes but the
+     * information can be obtained from FEValuesBase::get_function_gradients,
+     * of course.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the curls of shape functions (i.e., @p curl_type) times the
+     * type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    template <class InputVector>
+    void get_function_curls (const InputVector &fe_function,
+                             std::vector<typename ProductType<curl_type,typename InputVector::value_type>::type> &curls) const;
+
+    /**
+     * Return the Hessians of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_hessians function but it only works on the
+     * selected vector components.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the Hessians of shape functions (i.e., @p hessian_type) times
+     * the type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_hessians}
+     */
+    template <class InputVector>
+    void get_function_hessians (const InputVector &fe_function,
+                                std::vector<typename ProductType<hessian_type,typename InputVector::value_type>::type> &hessians) const;
+
+    /**
+     * Return the Laplacians of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called. The
+     * Laplacians are the trace of the Hessians.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_laplacians function but it only works on the
+     * selected vector components.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the Laplacians of shape functions (i.e., @p laplacian_type)
+     * times the type used to store the values of the unknowns $U_j$ of your
+     * finite element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_hessians}
+     */
+    template <class InputVector>
+    void get_function_laplacians (const InputVector &fe_function,
+                                  std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &laplacians) const;
+
+    /**
+     * Return the third derivatives of the selected scalar component of the
+     * finite element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_third_derivatives function but it only works
+     * on the selected scalar component.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the third derivatives of shape functions (i.e., @p
+     * third_derivative_type) times the type used to store the values of the
+     * unknowns $U_j$ of your finite element vector $U$ (represented by the @p
+     * fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_third_derivatives}
+     */
+    template <class InputVector>
+    void get_function_third_derivatives (const InputVector &fe_function,
+                                         std::vector<typename ProductType<third_derivative_type,
+                                         typename InputVector::value_type>::type> &third_derivatives) const;
+
+  private:
+    /**
+     * A reference to the FEValuesBase object we operate on.
+     */
+    const FEValuesBase<dim,spacedim> &fe_values;
+
+    /**
+     * The first component of the vector this view represents of the
+     * FEValuesBase object.
+     */
+    const unsigned int first_vector_component;
+
+    /**
+     * Store the data about shape functions.
+     */
+    std::vector<ShapeFunctionData> shape_function_data;
+  };
+
+
+  template <int rank, int dim, int spacedim = dim>
+  class SymmetricTensor;
+
+  /**
+   * A class representing a view to a set of <code>(dim*dim + dim)/2</code>
+   * components forming a symmetric second-order tensor from a vector-valued
+   * finite element. Views are discussed in the
+   * @ref vector_valued
+   * module.
+   *
+   * This class allows to query the value and divergence of (components of)
+   * shape functions and solutions representing symmetric tensors. The
+   * divergence of a symmetric tensor $S_{ij}, 0\le i,j<\text{dim}$ is defined
+   * as $d_i = \sum_j \frac{\partial S_{ij}}{\partial x_j}, 0\le
+   * i<\text{dim}$, which due to the symmetry of the tensor is also $d_i =
+   * \sum_j \frac{\partial S_{ji}}{\partial x_j}$.  In other words, it due to
+   * the symmetry of $S$ it does not matter whether we apply the nabla
+   * operator by row or by column to get the divergence.
+   *
+   * You get an object of this type if you apply a
+   * FEValuesExtractors::SymmetricTensor to an FEValues, FEFaceValues or
+   * FESubfaceValues object.
+   *
+   * @ingroup feaccess vector_valued
+   *
+   * @author Andrew McBride, 2009
+   */
+  template <int dim, int spacedim>
+  class SymmetricTensor<2,dim,spacedim>
+  {
+  public:
+    /**
+     * A typedef for the data type of values of the view this class
+     * represents. Since we deal with a set of <code>(dim*dim + dim)/2</code>
+     * components (i.e. the unique components of a symmetric second-order
+     * tensor), the value type is a SymmetricTensor<2,spacedim>.
+     */
+    typedef dealii::SymmetricTensor<2, spacedim> value_type;
+
+    /**
+     * A typedef for the type of the divergence of the view this class
+     * represents. Here, for a set of of <code>(dim*dim + dim)/2</code> unique
+     * components of the finite element representing a symmetric second-order
+     * tensor, the divergence of course is a * <code>Tensor@<1,dim@></code>.
+     *
+     * See the general discussion of this class for a definition of the
+     * divergence.
+     */
+    typedef dealii::Tensor<1, spacedim> divergence_type;
+
+    /**
+     * A structure where for each shape function we pre-compute a bunch of
+     * data that will make later accesses much cheaper.
+     */
+    struct ShapeFunctionData
+    {
+      /**
+       * For each pair (shape function,component within vector), store whether
+       * the selected vector component may be nonzero. For primitive shape
+       * functions we know for sure whether a certain scalar component of a
+       * given shape function is nonzero, whereas for non-primitive shape
+       * functions this may not be entirely clear (e.g. for RT elements it
+       * depends on the shape of a cell).
+       */
+      bool is_nonzero_shape_function_component[value_type::n_independent_components];
+
+      /**
+       * For each pair (shape function, component within vector), store the
+       * row index within the shape_values, shape_gradients, and
+       * shape_hessians tables (the column index is the quadrature point
+       * index). If the shape function is primitive, then we can get this
+       * information from the shape_function_to_row_table of the FEValues
+       * object; otherwise, we have to work a bit harder to compute this
+       * information.
+       */
+      unsigned int row_index[value_type::n_independent_components];
+
+      /**
+       * For each shape function say the following: if only a single entry in
+       * is_nonzero_shape_function_component for this shape function is
+       * nonzero, then store the corresponding value of row_index and
+       * single_nonzero_component_index represents the index between 0 and
+       * (dim^2 + dim)/2 for which it is attained. If multiple components are
+       * nonzero, then store -1. If no components are nonzero then store -2.
+       */
+      int single_nonzero_component;
+      unsigned int single_nonzero_component_index;
+    };
+
+    /**
+     * Default constructor. Creates an invalid object.
+     */
+    SymmetricTensor();
+
+    /**
+     * Constructor for an object that represents <code>(dim*dim +
+     * dim)/2</code> components of a FEValuesBase object (or of one of the
+     * classes derived from FEValuesBase), representing the unique components
+     * comprising a symmetric second- order tensor valued variable.
+     *
+     * The second argument denotes the index of the first component of the
+     * selected symmetric second order tensor.
+     */
+    SymmetricTensor(const FEValuesBase<dim, spacedim> &fe_values_base,
+                    const unsigned int first_tensor_component);
+
+    /**
+     * Copy operator. This is not a lightweight object so we don't allow
+     * copying and generate an exception if this function is called.
+     */
+    SymmetricTensor &operator=(const SymmetricTensor<2, dim, spacedim> &);
+
+    /**
+     * Return the value of the vector components selected by this view, for
+     * the shape function and quadrature point selected by the arguments.
+     * Here, since the view represents a vector-valued part of the FEValues
+     * object with <code>(dim*dim + dim)/2</code> components (the unique
+     * components of a symmetric second-order tensor), the return type is a
+     * symmetric tensor of rank 2.
+     *
+     * @param shape_function Number of the shape function to be evaluated.
+     * Note that this number runs from zero to dofs_per_cell, even in the case
+     * of an FEFaceValues or FESubfaceValues object.
+     *
+     * @param q_point Number of the quadrature point at which function is to
+     * be evaluated.
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    value_type
+    value (const unsigned int shape_function,
+           const unsigned int q_point) const;
+
+
+    /**
+     * Return the vector divergence of the vector components selected by this
+     * view, for the shape function and quadrature point selected by the
+     * arguments.
+     *
+     * See the general discussion of this class for a definition of the
+     * divergence.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    divergence_type
+    divergence (const unsigned int shape_function,
+                const unsigned int q_point) const;
+
+    /**
+     * Return the values of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_values function but it only works on the
+     * selected vector components.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the values of shape functions (i.e., @p value_type) times the
+     * type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    template <class InputVector>
+    void get_function_values (const InputVector &fe_function,
+                              std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const;
+
+    /**
+     * Return the divergence of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * There is no equivalent function such as
+     * FEValuesBase::get_function_divergences in the FEValues classes but the
+     * information can be obtained from FEValuesBase::get_function_gradients,
+     * of course.
+     *
+     * See the general discussion of this class for a definition of the
+     * divergence.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the divergences of shape functions (i.e., @p divergence_type)
+     * times the type used to store the values of the unknowns $U_j$ of your
+     * finite element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    template <class InputVector>
+    void get_function_divergences (const InputVector &fe_function,
+                                   std::vector<typename ProductType<divergence_type,typename InputVector::value_type>::type> &divergences) const;
+
+  private:
+    /**
+     * A reference to the FEValuesBase object we operate on.
+     */
+    const FEValuesBase<dim, spacedim> &fe_values;
+
+    /**
+     * The first component of the vector this view represents of the
+     * FEValuesBase object.
+     */
+    const unsigned int first_tensor_component;
+
+    /**
+     * Store the data about shape functions.
+     */
+    std::vector<ShapeFunctionData> shape_function_data;
+  };
+
+
+  template <int rank, int dim, int spacedim = dim>
+  class Tensor;
+
+  /**
+   * A class representing a view to a set of <code>dim*dim</code> components
+   * forming a second-order tensor from a vector-valued finite element. Views
+   * are discussed in the
+   * @ref vector_valued
+   * module.
+   *
+   * This class allows to query the value and divergence of (components of)
+   * shape functions and solutions representing tensors. The divergence of a
+   * tensor $T_{ij}, 0\le i,j<\text{dim}$ is defined as $d_i = \sum_j
+   * \frac{\partial T_{ji}}{\partial x_j}, 0\le i<\text{dim}$.
+   *
+   * You get an object of this type if you apply a FEValuesExtractors::Tensor
+   * to an FEValues, FEFaceValues or FESubfaceValues object.
+   *
+   * @ingroup feaccess vector_valued
+   *
+   * @author Denis Davydov, 2013
+   */
+  template <int dim, int spacedim>
+  class Tensor<2,dim,spacedim>
+  {
+  public:
+
+    /**
+     * Data type for what you get when you apply an extractor of this kind to
+     * a vector-valued finite element.
+     */
+    typedef dealii::Tensor<2, spacedim> value_type;
+
+    /**
+     * Data type for taking the divergence of a tensor: a vector.
+     */
+    typedef dealii::Tensor<1, spacedim> divergence_type;
+
+    /**
+     * A structure where for each shape function we pre-compute a bunch of
+     * data that will make later accesses much cheaper.
+     */
+    struct ShapeFunctionData
+    {
+      /**
+       * For each pair (shape function,component within vector), store whether
+       * the selected vector component may be nonzero. For primitive shape
+       * functions we know for sure whether a certain scalar component of a
+       * given shape function is nonzero, whereas for non-primitive shape
+       * functions this may not be entirely clear (e.g. for RT elements it
+       * depends on the shape of a cell).
+       */
+      bool is_nonzero_shape_function_component[value_type::n_independent_components];
+
+      /**
+       * For each pair (shape function, component within vector), store the
+       * row index within the shape_values, shape_gradients, and
+       * shape_hessians tables (the column index is the quadrature point
+       * index). If the shape function is primitive, then we can get this
+       * information from the shape_function_to_row_table of the FEValues
+       * object; otherwise, we have to work a bit harder to compute this
+       * information.
+       */
+      unsigned int row_index[value_type::n_independent_components];
+
+      /**
+       * For each shape function say the following: if only a single entry in
+       * is_nonzero_shape_function_component for this shape function is
+       * nonzero, then store the corresponding value of row_index and
+       * single_nonzero_component_index represents the index between 0 and
+       * (dim^2) for which it is attained. If multiple components are nonzero,
+       * then store -1. If no components are nonzero then store -2.
+       */
+      int single_nonzero_component;
+      unsigned int single_nonzero_component_index;
+    };
+
+    /**
+     * Default constructor. Creates an invalid object.
+     */
+    Tensor();
+
+
+    /**
+     * Constructor for an object that represents <code>(dim*dim)</code>
+     * components of a FEValuesBase object (or of one of the classes derived
+     * from FEValuesBase), representing the unique components comprising a
+     * second-order tensor valued variable.
+     *
+     * The second argument denotes the index of the first component of the
+     * selected symmetric second order tensor.
+     */
+    Tensor(const FEValuesBase<dim, spacedim> &fe_values_base,
+           const unsigned int first_tensor_component);
+
+
+    /**
+     * Copy operator. This is not a lightweight object so we don't allow
+     * copying and generate an exception if this function is called.
+     */
+    Tensor &operator=(const Tensor<2, dim, spacedim> &);
+
+    /**
+     * Return the value of the vector components selected by this view, for
+     * the shape function and quadrature point selected by the arguments.
+     * Here, since the view represents a vector-valued part of the FEValues
+     * object with <code>(dim*dim)</code> components (the unique components of
+     * a second-order tensor), the return type is a tensor of rank 2.
+     *
+     * @param shape_function Number of the shape function to be evaluated.
+     * Note that this number runs from zero to dofs_per_cell, even in the case
+     * of an FEFaceValues or FESubfaceValues object.
+     *
+     * @param q_point Number of the quadrature point at which function is to
+     * be evaluated.
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    value_type
+    value (const unsigned int shape_function,
+           const unsigned int q_point) const;
+
+    /**
+     * Return the vector divergence of the vector components selected by this
+     * view, for the shape function and quadrature point selected by the
+     * arguments.
+     *
+     * See the general discussion of this class for a definition of the
+     * divergence.
+     *
+     * @note The meaning of the arguments is as documented for the value()
+     * function.
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    divergence_type
+    divergence (const unsigned int shape_function,
+                const unsigned int q_point) const;
+
+    /**
+     * Return the values of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * This function is the equivalent of the
+     * FEValuesBase::get_function_values function but it only works on the
+     * selected vector components.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the values of shape functions (i.e., @p value_type) times the
+     * type used to store the values of the unknowns $U_j$ of your finite
+     * element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_values}
+     */
+    template <class InputVector>
+    void get_function_values (const InputVector &fe_function,
+                              std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const;
+
+
+    /**
+     * Return the divergence of the selected vector components of the finite
+     * element function characterized by <tt>fe_function</tt> at the
+     * quadrature points of the cell, face or subface selected the last time
+     * the <tt>reinit</tt> function of the FEValues object was called.
+     *
+     * There is no equivalent function such as
+     * FEValuesBase::get_function_divergences in the FEValues classes but the
+     * information can be obtained from FEValuesBase::get_function_gradients,
+     * of course.
+     *
+     * See the general discussion of this class for a definition of the
+     * divergence.
+     *
+     * The data type stored by the output vector must be what you get when you
+     * multiply the divergences of shape functions (i.e., @p divergence_type)
+     * times the type used to store the values of the unknowns $U_j$ of your
+     * finite element vector $U$ (represented by the @p fe_function argument).
+     *
+     * @dealiiRequiresUpdateFlags{update_gradients}
+     */
+    template <class InputVector>
+    void get_function_divergences (const InputVector &fe_function,
+                                   std::vector<typename ProductType<divergence_type,typename InputVector::value_type>::type> &divergences) const;
+
+  private:
+    /**
+     * A reference to the FEValuesBase object we operate on.
+     */
+    const FEValuesBase<dim, spacedim> &fe_values;
+
+    /**
+     * The first component of the vector this view represents of the
+     * FEValuesBase object.
+     */
+    const unsigned int first_tensor_component;
+
+    /**
+     * Store the data about shape functions.
+     */
+    std::vector<ShapeFunctionData> shape_function_data;
+  };
+
+}
+
+
+namespace internal
+{
+  namespace FEValuesViews
+  {
+    /**
+     * A class objects of which store a collection of FEValuesViews::Scalar,
+     * FEValuesViews::Vector, etc object. The FEValuesBase class uses it to
+     * generate all possible Views classes upon construction time; we do this
+     * at construction time since the Views classes cache some information and
+     * are therefore relatively expensive to create.
+     */
+    template <int dim, int spacedim>
+    struct Cache
+    {
+      /**
+       * Caches for scalar and vector, and symmetric second-order tensor
+       * valued views.
+       */
+      std::vector<dealii::FEValuesViews::Scalar<dim,spacedim> > scalars;
+      std::vector<dealii::FEValuesViews::Vector<dim,spacedim> > vectors;
+      std::vector<dealii::FEValuesViews::SymmetricTensor<2,dim,spacedim> >
+      symmetric_second_order_tensors;
+      std::vector<dealii::FEValuesViews::Tensor<2,dim,spacedim> >
+      second_order_tensors;
+
+      /**
+       * Constructor.
+       */
+      Cache (const FEValuesBase<dim,spacedim> &fe_values);
+    };
+  }
+}
+
+
+
+/**
+ * FEValues, FEFaceValues and FESubfaceValues objects are interfaces to finite
+ * element and mapping classes on the one hand side, to cells and quadrature
+ * rules on the other side. They allow to evaluate values or derivatives of
+ * shape functions at the quadrature points of a quadrature formula when
+ * projected by a mapping from the unit cell onto a cell in real space. The
+ * reason for this abstraction is possible optimization: Depending on the type
+ * of finite element and mapping, some values can be computed once on the unit
+ * cell. Others must be computed on each cell, but maybe computation of
+ * several values at the same time offers ways for optimization. Since this
+ * interplay may be complex and depends on the actual finite element, it
+ * cannot be left to the applications programmer.
+ *
+ * FEValues, FEFaceValues and FESubfaceValues provide only data handling:
+ * computations are left to objects of type Mapping and FiniteElement. These
+ * provide functions <tt>get_*_data</tt> and <tt>fill_*_values</tt> which are
+ * called by the constructor and <tt>reinit</tt> functions of
+ * <tt>FEValues*</tt>, respectively.
+ *
+ * <h3>General usage</h3>
+ *
+ * Usually, an object of <tt>FEValues*</tt> is used in integration loops over
+ * all cells of a triangulation (or faces of cells). To take full advantage of
+ * the optimization features, it should be constructed before the loop so that
+ * information that does not depend on the location and shape of cells can be
+ * computed once and for all (this includes, for example, the values of shape
+ * functions at quadrature points for the most common elements: we can
+ * evaluate them on the unit cell and they will be the same when mapped to the
+ * real cell). Then, in the loop over all cells, it must be re-initialized for
+ * each grid cell to compute that part of the information that changes
+ * depending on the actual cell (for example, the gradient of shape functions
+ * equals the gradient on the unit cell -- which can be computed once and for
+ * all -- times the Jacobian matrix of the mapping between unit and real cell,
+ * which needs to be recomputed for each cell).
+ *
+ * A typical piece of code, adding up local contributions to the Laplace
+ * matrix looks like this:
+ *
+ * @code
+ * FEValues values (mapping, finite_element, quadrature, flags);
+ * for (cell = dof_handler.begin_active();
+ *      cell != dof_handler.end();
+ *      ++cell)
+ *   {
+ *     values.reinit(cell);
+ *     for (unsigned int q=0; q<quadrature.size(); ++q)
+ *       for (unsigned int i=0; i<finite_element.dofs_per_cell; ++i)
+ *         for (unsigned int j=0; j<finite_element.dofs_per_cell; ++j)
+ *         A(i,j) += fe_values.shape_value(i,q) *
+ *                   fe_values.shape_value(j,q) *
+ *                   fe_values.JxW(q);
+ *     ...
+ *   }
+ * @endcode
+ *
+ * The individual functions used here are described below. Note that by
+ * design, the order of quadrature points used inside the FEValues object is
+ * the same as defined by the quadrature formula passed to the constructor of
+ * the FEValues object above.
+ *
+ * <h3>Member functions</h3>
+ *
+ * The functions of this class fall into different categories:
+ * <ul>
+ * <li> shape_value(), shape_grad(), etc: return one of the values of this
+ * object at a time. These functions are inlined, so this is the suggested
+ * access to all finite element values. There should be no loss in performance
+ * with an optimizing compiler. If the finite element is vector valued, then
+ * these functions return the only non-zero component of the requested shape
+ * function. However, some finite elements have shape functions that have more
+ * than one non-zero component (we call them non-"primitive"), and in this
+ * case this set of functions will throw an exception since they cannot
+ * generate a useful result. Rather, use the next set of functions.
+ *
+ * <li> shape_value_component(), shape_grad_component(), etc: This is the same
+ * set of functions as above, except that for vector valued finite elements
+ * they return only one vector component. This is useful for elements of which
+ * shape functions have more than one non-zero component, since then the above
+ * functions cannot be used, and you have to walk over all (or only the non-
+ * zero) components of the shape function using this set of functions.
+ *
+ * <li> get_function_values(), get_function_gradients(), etc.: Compute a
+ * finite element function or its derivative in quadrature points.
+ *
+ * <li> reinit: initialize the FEValues object for a certain cell. This
+ * function is not in the present class but only in the derived classes and
+ * has a variable call syntax. See the docs for the derived classes for more
+ * information.
+ * </ul>
+ *
+ *
+ * <h3>Internals about the implementation</h3>
+ *
+ * The mechanisms by which this class work are discussed on the page on
+ * @ref UpdateFlags "Update flags"
+ * and about the
+ * @ref FE_vs_Mapping_vs_FEValues "How Mapping, FiniteElement, and FEValues work together".
+ *
+ *
+ * @ingroup feaccess
+ * @author Wolfgang Bangerth, 1998, 2003, Guido Kanschat, 2001
+ */
+template <int dim, int spacedim>
+class FEValuesBase :
+  public Subscriptor
+{
+public:
+  /**
+   * Dimension in which this object operates.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Dimension of the space in which this object operates.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * Number of quadrature points.
+   */
+  const unsigned int n_quadrature_points;
+
+  /**
+   * Number of shape functions per cell. If we use this base class to evaluate
+   * a finite element on faces of cells, this is still the number of degrees
+   * of freedom per cell, not per face.
+   */
+  const unsigned int dofs_per_cell;
+
+
+  /**
+   * Constructor. Set up the array sizes with <tt>n_q_points</tt> quadrature
+   * points, <tt>dofs_per_cell</tt> trial functions per cell and with the
+   * given pattern to update the fields when the <tt>reinit</tt> function of
+   * the derived classes is called. The fields themselves are not set up, this
+   * must happen in the constructor of the derived class.
+   */
+  FEValuesBase (const unsigned int n_q_points,
+                const unsigned int dofs_per_cell,
+                const UpdateFlags update_flags,
+                const Mapping<dim,spacedim> &mapping,
+                const FiniteElement<dim,spacedim> &fe);
+
+
+  /**
+   * Destructor.
+   */
+  ~FEValuesBase ();
+
+
+  /// @name ShapeAccess Access to shape function values. These fields are filled by the finite element.
+  //@{
+
+  /**
+   * Value of a shape function at a quadrature point on the cell, face or
+   * subface selected the last time the <tt>reinit</tt> function of the
+   * derived class was called.
+   *
+   * If the shape function is vector-valued, then this returns the only non-
+   * zero component. If the shape function has more than one non-zero
+   * component (i.e. it is not primitive), then throw an exception of type
+   * ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_value_component() function.
+   *
+   * @param function_no Number of the shape function to be evaluated. Note
+   * that this number runs from zero to dofs_per_cell, even in the case of an
+   * FEFaceValues or FESubfaceValues object.
+   *
+   * @param point_no Number of the quadrature point at which function is to be
+   * evaluated
+   *
+   * @dealiiRequiresUpdateFlags{update_values}
+   */
+  const double &shape_value (const unsigned int function_no,
+                             const unsigned int point_no) const;
+
+  /**
+   * Compute one vector component of the value of a shape function at a
+   * quadrature point. If the finite element is scalar, then only component
+   * zero is allowed and the return value equals that of the shape_value()
+   * function. If the finite element is vector valued but all shape functions
+   * are primitive (i.e. they are non-zero in only one component), then the
+   * value returned by shape_value() equals that of this function for exactly
+   * one component. This function is therefore only of greater interest if the
+   * shape function is not primitive, but then it is necessary since the other
+   * function cannot be used.
+   *
+   * @param function_no Number of the shape function to be evaluated.
+   *
+   * @param point_no Number of the quadrature point at which function is to be
+   * evaluated.
+   *
+   * @param component vector component to be evaluated.
+   *
+   * @dealiiRequiresUpdateFlags{update_values}
+   */
+  double shape_value_component (const unsigned int function_no,
+                                const unsigned int point_no,
+                                const unsigned int component) const;
+
+  /**
+   * Compute the gradient of the <tt>function_no</tt>th shape function at the
+   * <tt>quadrature_point</tt>th quadrature point with respect to real cell
+   * coordinates.  If you want to get the derivative in one of the coordinate
+   * directions, use the appropriate function of the Tensor class to extract
+   * one component of the Tensor returned by this function. Since only a
+   * reference to the gradient's value is returned, there should be no major
+   * performance drawback.
+   *
+   * If the shape function is vector-valued, then this returns the only non-
+   * zero component. If the shape function has more than one non-zero
+   * component (i.e. it is not primitive), then it will throw an exception of
+   * type ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_grad_component() function.
+   *
+   * The same holds for the arguments of this function as for the
+   * shape_value() function.
+   *
+   * @param function_no Number of the shape function to be evaluated.
+   *
+   * @param quadrature_point Number of the quadrature point at which function
+   * is to be evaluated.
+   *
+   * @dealiiRequiresUpdateFlags{update_gradients}
+   */
+  const Tensor<1,spacedim> &
+  shape_grad (const unsigned int function_no,
+              const unsigned int quadrature_point) const;
+
+  /**
+   * Return one vector component of the gradient of a shape function at a
+   * quadrature point. If the finite element is scalar, then only component
+   * zero is allowed and the return value equals that of the shape_grad()
+   * function. If the finite element is vector valued but all shape functions
+   * are primitive (i.e. they are non-zero in only one component), then the
+   * value returned by shape_grad() equals that of this function for exactly
+   * one component. This function is therefore only of greater interest if the
+   * shape function is not primitive, but then it is necessary since the other
+   * function cannot be used.
+   *
+   * The same holds for the arguments of this function as for the
+   * shape_value_component() function.
+   *
+   * @dealiiRequiresUpdateFlags{update_gradients}
+   */
+  Tensor<1,spacedim>
+  shape_grad_component (const unsigned int function_no,
+                        const unsigned int point_no,
+                        const unsigned int component) const;
+
+  /**
+   * Second derivatives of the <tt>function_no</tt>th shape function at the
+   * <tt>point_no</tt>th quadrature point with respect to real cell
+   * coordinates. If you want to get the derivatives in one of the coordinate
+   * directions, use the appropriate function of the Tensor class to extract
+   * one component. Since only a reference to the hessian values is returned,
+   * there should be no major performance drawback.
+   *
+   * If the shape function is vector-valued, then this returns the only non-
+   * zero component. If the shape function has more than one non-zero
+   * component (i.e. it is not primitive), then throw an exception of type
+   * ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_hessian_component() function.
+   *
+   * The same holds for the arguments of this function as for the
+   * shape_value() function.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  const Tensor<2,spacedim> &
+  shape_hessian (const unsigned int function_no,
+                 const unsigned int point_no) const;
+
+  /**
+   * Return one vector component of the gradient of a shape function at a
+   * quadrature point. If the finite element is scalar, then only component
+   * zero is allowed and the return value equals that of the shape_hessian()
+   * function. If the finite element is vector valued but all shape functions
+   * are primitive (i.e. they are non-zero in only one component), then the
+   * value returned by shape_hessian() equals that of this function for
+   * exactly one component. This function is therefore only of greater
+   * interest if the shape function is not primitive, but then it is necessary
+   * since the other function cannot be used.
+   *
+   * The same holds for the arguments of this function as for the
+   * shape_value_component() function.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  Tensor<2,spacedim>
+  shape_hessian_component (const unsigned int function_no,
+                           const unsigned int point_no,
+                           const unsigned int component) const;
+
+  /**
+   * Third derivatives of the <tt>function_no</tt>th shape function at the
+   * <tt>point_no</tt>th quadrature point with respect to real cell
+   * coordinates. If you want to get the 3rd derivatives in one of the
+   * coordinate directions, use the appropriate function of the Tensor class
+   * to extract one component. Since only a reference to the 3rd derivative
+   * values is returned, there should be no major performance drawback.
+   *
+   * If the shape function is vector-valued, then this returns the only non-
+   * zero component. If the shape function has more than one non-zero
+   * component (i.e. it is not primitive), then throw an exception of type
+   * ExcShapeFunctionNotPrimitive. In that case, use the
+   * shape_3rdderivative_component() function.
+   *
+   * The same holds for the arguments of this function as for the
+   * shape_value() function.
+   *
+   * @dealiiRequiresUpdateFlags{update_3rd_derivatives}
+   */
+  const Tensor<3,spacedim> &
+  shape_3rd_derivative (const unsigned int function_no,
+                        const unsigned int point_no) const;
+
+  /**
+   * Return one vector component of the third derivative of a shape function
+   * at a quadrature point. If the finite element is scalar, then only
+   * component zero is allowed and the return value equals that of the
+   * shape_3rdderivative() function. If the finite element is vector valued
+   * but all shape functions are primitive (i.e. they are non-zero in only one
+   * component), then the value returned by shape_3rdderivative() equals that
+   * of this function for exactly one component. This function is therefore
+   * only of greater interest if the shape function is not primitive, but then
+   * it is necessary since the other function cannot be used.
+   *
+   * The same holds for the arguments of this function as for the
+   * shape_value_component() function.
+   *
+   * @dealiiRequiresUpdateFlags{update_3rd_derivatives}
+   */
+  Tensor<3,spacedim>
+  shape_3rd_derivative_component (const unsigned int function_no,
+                                  const unsigned int point_no,
+                                  const unsigned int component) const;
+
+  //@}
+  /// @name Access to values of global finite element fields
+  //@{
+
+  /**
+   * Returns the values of a finite element function restricted to the current
+   * cell, face or subface selected the last time the <tt>reinit</tt> function
+   * of the derived class was called, at the quadrature points.
+   *
+   * If the present cell is not active then values are interpolated to the
+   * current cell and point values are computed from that.
+   *
+   * This function may only be used if the finite element in use is a scalar
+   * one, i.e. has only one vector component.  To get values of multi-
+   * component elements, there is another get_function_values() below,
+   * returning a vector of vectors of results.
+   *
+   * @param[in] fe_function A vector of values that describes (globally) the
+   * finite element function that this function should evaluate at the
+   * quadrature points of the current cell.
+   *
+   * @param[out] values The values of the function specified by fe_function at
+   * the quadrature points of the current cell.  The object is assume to
+   * already have the correct size. The data type stored by this output vector
+   * must be what you get when you multiply the values of shape function times
+   * the type used to store the values of the unknowns $U_j$ of your finite
+   * element vector $U$ (represented by the @p fe_function argument). This
+   * happens to be equal to the type of the elements of the solution vector.
+   *
+   * @post <code>values[q]</code> will contain the value of the field
+   * described by fe_function at the $q$th quadrature point.
+   *
+   * @note The actual data type of the input vector may be either a
+   * Vector<T>, BlockVector<T>, or one of the sequential PETSc or
+   * Trilinos vector wrapper classes. It represents a global vector of DoF
+   * values associated with the DofHandler object with which this FEValues
+   * object was last initialized. Alternatively, if the vector argument is of
+   * type IndexSet, then the function is represented as one that is either
+   * zero or one, depending on whether a DoF index is in the set or not.
+   *
+   * @dealiiRequiresUpdateFlags{update_values}
+   */
+  template <class InputVector>
+  void get_function_values (const InputVector &fe_function,
+                            std::vector<typename InputVector::value_type> &values) const;
+
+  /**
+   * This function does the same as the other get_function_values(), but
+   * applied to multi-component (vector-valued) elements. The meaning of the
+   * arguments is as explained there.
+   *
+   * @post <code>values[q]</code> is a vector of values of the field described
+   * by fe_function at the $q$th quadrature point. The size of the vector
+   * accessed by <code>values[q]</code> equals the number of components of the
+   * finite element, i.e. <code>values[q](c)</code> returns the value of the
+   * $c$th vector component at the $q$th quadrature point.
+   *
+   * @dealiiRequiresUpdateFlags{update_values}
+   */
+  template <class InputVector>
+  void get_function_values (const InputVector       &fe_function,
+                            std::vector<Vector<typename InputVector::value_type> > &values) const;
+
+  /**
+   * Generate function values from an arbitrary vector.
+   *
+   * This function offers the possibility to extract function values in
+   * quadrature points from vectors not corresponding to a whole
+   * discretization.
+   *
+   * The vector <tt>indices</tt> corresponds to the degrees of freedom on a
+   * single cell. Its length may even be a multiple of the number of dofs per
+   * cell. Then, the vectors in <tt>value</tt> should allow for the same
+   * multiple of the components of the finite element.
+   *
+   * You may want to use this function, if you want to access just a single
+   * block from a BlockVector, if you have a multi-level vector or if you
+   * already have a local representation of your finite element data.
+   *
+   * @dealiiRequiresUpdateFlags{update_values}
+   */
+  template <class InputVector>
+  void get_function_values (const InputVector &fe_function,
+                            const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+                            std::vector<typename InputVector::value_type> &values) const;
+
+  /**
+   * Generate vector function values from an arbitrary vector.
+   *
+   * This function offers the possibility to extract function values in
+   * quadrature points from vectors not corresponding to a whole
+   * discretization.
+   *
+   * The vector <tt>indices</tt> corresponds to the degrees of freedom on a
+   * single cell. Its length may even be a multiple of the number of dofs per
+   * cell. Then, the vectors in <tt>value</tt> should allow for the same
+   * multiple of the components of the finite element.
+   *
+   * You may want to use this function, if you want to access just a single
+   * block from a BlockVector, if you have a multi-level vector or if you
+   * already have a local representation of your finite element data.
+   *
+   * Since this function allows for fairly general combinations of argument
+   * sizes, be aware that the checks on the arguments may not detect errors.
+   *
+   * @dealiiRequiresUpdateFlags{update_values}
+   */
+  template <class InputVector>
+  void get_function_values (const InputVector &fe_function,
+                            const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+                            std::vector<Vector<typename InputVector::value_type> > &values) const;
+
+
+  /**
+   * Generate vector function values from an arbitrary vector.
+   *
+   * This function offers the possibility to extract function values in
+   * quadrature points from vectors not corresponding to a whole
+   * discretization.
+   *
+   * The vector <tt>indices</tt> corresponds to the degrees of freedom on a
+   * single cell. Its length may even be a multiple of the number of dofs per
+   * cell. Then, the vectors in <tt>value</tt> should allow for the same
+   * multiple of the components of the finite element.
+   *
+   * Depending on the value of the last argument, the outer vector of
+   * <tt>values</tt> has either the length of the quadrature rule
+   * (<tt>quadrature_points_fastest == false</tt>) or the length of components
+   * to be filled <tt>quadrature_points_fastest == true</tt>. If <tt>p</tt> is
+   * the current quadrature point number and <tt>i</tt> is the vector
+   * component of the solution desired, the access to <tt>values</tt> is
+   * <tt>values[p][i]</tt> if <tt>quadrature_points_fastest == false</tt>, and
+   * <tt>values[i][p]</tt> otherwise.
+   *
+   * You may want to use this function, if you want to access just a single
+   * block from a BlockVector, if you have a multi-level vector or if you
+   * already have a local representation of your finite element data.
+   *
+   * Since this function allows for fairly general combinations of argument
+   * sizes, be aware that the checks on the arguments may not detect errors.
+   *
+   * @dealiiRequiresUpdateFlags{update_values}
+   */
+  template <class InputVector>
+  void get_function_values (const InputVector &fe_function,
+                            const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+                            VectorSlice<std::vector<std::vector<typename InputVector::value_type> > > values,
+                            const bool quadrature_points_fastest) const;
+
+  //@}
+  /// @name Access to derivatives of global finite element fields
+  //@{
+
+  /**
+   * Compute the gradients of a finite element at the quadrature points of a
+   * cell. This function is the equivalent of the corresponding
+   * get_function_values() function (see there for more information) but
+   * evaluates the finite element field's gradient instead of its value.
+   *
+   * This function may only be used if the finite element in use is a scalar
+   * one, i.e. has only one vector component. There is a corresponding
+   * function of the same name for vector-valued finite elements.
+   *
+   * @param[in] fe_function A vector of values that describes (globally) the
+   * finite element function that this function should evaluate at the
+   * quadrature points of the current cell.
+   *
+   * @param[out] gradients The gradients of the function specified by
+   * fe_function at the quadrature points of the current cell.  The gradients
+   * are computed in real space (as opposed to on the unit cell).  The object
+   * is assume to already have the correct size. The data type stored by this
+   * output vector must be what you get when you multiply the gradients of
+   * shape function times the type used to store the values of the unknowns
+   * $U_j$ of your finite element vector $U$ (represented by the @p
+   * fe_function argument).
+   *
+   * @post <code>gradients[q]</code> will contain the gradient of the field
+   * described by fe_function at the $q$th quadrature point.
+   * <code>gradients[q][d]</code> represents the derivative in coordinate
+   * direction $d$ at quadrature point $q$.
+   *
+   * @note The actual data type of the input vector may be either a
+   * Vector<T>, BlockVector<T>, or one of the sequential PETSc or
+   * Trilinos vector wrapper classes. It represents a global vector of DoF
+   * values associated with the DoFHandler object with which this FEValues
+   * object was last initialized. Alternatively, if the vector argument is of
+   * type IndexSet, then the function is represented as one that is either
+   * zero or one, depending on whether a DoF index is in the set or not.
+   *
+   * @dealiiRequiresUpdateFlags{update_gradients}
+   */
+  template <class InputVector>
+  void get_function_gradients (const InputVector      &fe_function,
+                               std::vector<Tensor<1,spacedim,typename InputVector::value_type> > &gradients) const;
+
+  /**
+   * This function does the same as the other get_function_gradients(), but
+   * applied to multi-component (vector-valued) elements. The meaning of the
+   * arguments is as explained there.
+   *
+   * @post <code>gradients[q]</code> is a vector of gradients of the field
+   * described by fe_function at the $q$th quadrature point. The size of the
+   * vector accessed by <code>gradients[q]</code> equals the number of
+   * components of the finite element, i.e. <code>gradients[q][c]</code>
+   * returns the gradient of the $c$th vector component at the $q$th
+   * quadrature point. Consequently, <code>gradients[q][c][d]</code> is the
+   * derivative in coordinate direction $d$ of the $c$th vector component of
+   * the vector field at quadrature point $q$ of the current cell.
+   *
+   * @dealiiRequiresUpdateFlags{update_gradients}
+   */
+  template <class InputVector>
+  void get_function_gradients (const InputVector               &fe_function,
+                               std::vector<std::vector<Tensor<1,spacedim,typename InputVector::value_type> > > &gradients) const;
+
+  /**
+   * Function gradient access with more flexibility. See get_function_values()
+   * with corresponding arguments.
+   *
+   * @dealiiRequiresUpdateFlags{update_gradients}
+   */
+  template <class InputVector>
+  void get_function_gradients (const InputVector &fe_function,
+                               const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+                               std::vector<Tensor<1,spacedim,typename InputVector::value_type> > &gradients) const;
+
+  /**
+   * Function gradient access with more flexibility. See get_function_values()
+   * with corresponding arguments.
+   *
+   * @dealiiRequiresUpdateFlags{update_gradients}
+   */
+  template <class InputVector>
+  void get_function_gradients (const InputVector &fe_function,
+                               const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+                               VectorSlice<std::vector<std::vector<Tensor<1,spacedim,typename InputVector::value_type> > > > gradients,
+                               bool quadrature_points_fastest = false) const;
+
+  //@}
+  /// @name Access to second derivatives (Hessian matrices and Laplacians) of global finite element fields
+  //@{
+
+  /**
+   * Compute the tensor of second derivatives of a finite element at the
+   * quadrature points of a cell. This function is the equivalent of the
+   * corresponding get_function_values() function (see there for more
+   * information) but evaluates the finite element field's second derivatives
+   * instead of its value.
+   *
+   * This function may only be used if the finite element in use is a scalar
+   * one, i.e. has only one vector component. There is a corresponding
+   * function of the same name for vector-valued finite elements.
+   *
+   * @param[in] fe_function A vector of values that describes (globally) the
+   * finite element function that this function should evaluate at the
+   * quadrature points of the current cell.
+   *
+   * @param[out] hessians The Hessians of the function specified by
+   * fe_function at the quadrature points of the current cell.  The Hessians
+   * are computed in real space (as opposed to on the unit cell).  The object
+   * is assume to already have the correct size. The data type stored by this
+   * output vector must be what you get when you multiply the Hessians of
+   * shape function times the type used to store the values of the unknowns
+   * $U_j$ of your finite element vector $U$ (represented by the @p
+   * fe_function argument).
+   *
+   * @post <code>hessians[q]</code> will contain the Hessian of the field
+   * described by fe_function at the $q$th quadrature point.
+   * <code>hessians[q][i][j]</code> represents the $(i,j)$th component of the
+   * matrix of second derivatives at quadrature point $q$.
+   *
+   * @note The actual data type of the input vector may be either a
+   * Vector<T>, BlockVector<T>, or one of the sequential PETSc or
+   * Trilinos vector wrapper classes. It represents a global vector of DoF
+   * values associated with the DofHandler object with which this FEValues
+   * object was last initialized. Alternatively, if the vector argument is of
+   * type IndexSet, then the function is represented as one that is either
+   * zero or one, depending on whether a DoF index is in the set or not.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void
+  get_function_hessians (const InputVector &fe_function,
+                         std::vector<Tensor<2,spacedim,typename InputVector::value_type> > &hessians) const;
+
+  /**
+   * This function does the same as the other get_function_hessians(), but
+   * applied to multi-component (vector-valued) elements. The meaning of the
+   * arguments is as explained there.
+   *
+   * @post <code>hessians[q]</code> is a vector of Hessians of the field
+   * described by fe_function at the $q$th quadrature point. The size of the
+   * vector accessed by <code>hessians[q]</code> equals the number of
+   * components of the finite element, i.e. <code>hessians[q][c]</code>
+   * returns the Hessian of the $c$th vector component at the $q$th quadrature
+   * point. Consequently, <code>hessians[q][c][i][j]</code> is the $(i,j)$th
+   * component of the matrix of second derivatives of the $c$th vector
+   * component of the vector field at quadrature point $q$ of the current
+   * cell.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void
+  get_function_hessians (const InputVector      &fe_function,
+                         std::vector<std::vector<Tensor<2,spacedim,typename InputVector::value_type> > > &hessians,
+                         bool quadrature_points_fastest = false) const;
+
+  /**
+   * Access to the second derivatives of a function with more flexibility. See
+   * get_function_values() with corresponding arguments.
+   */
+  template <class InputVector>
+  void get_function_hessians (
+    const InputVector &fe_function,
+    const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+    std::vector<Tensor<2,spacedim,typename InputVector::value_type> > &hessians) const;
+
+  /**
+   * Access to the second derivatives of a function with more flexibility. See
+   * get_function_values() with corresponding arguments.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void get_function_hessians (
+    const InputVector &fe_function,
+    const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+    VectorSlice<std::vector<std::vector<Tensor<2,spacedim,typename InputVector::value_type> > > > hessians,
+    bool quadrature_points_fastest = false) const;
+
+  /**
+   * Compute the (scalar) Laplacian (i.e. the trace of the tensor of second
+   * derivatives) of a finite element at the quadrature points of a cell. This
+   * function is the equivalent of the corresponding get_function_values()
+   * function (see there for more information) but evaluates the finite
+   * element field's second derivatives instead of its value.
+   *
+   * This function may only be used if the finite element in use is a scalar
+   * one, i.e. has only one vector component. There is a corresponding
+   * function of the same name for vector-valued finite elements.
+   *
+   * @param[in] fe_function A vector of values that describes (globally) the
+   * finite element function that this function should evaluate at the
+   * quadrature points of the current cell.
+   *
+   * @param[out] laplacians The Laplacians of the function specified by
+   * fe_function at the quadrature points of the current cell.  The Laplacians
+   * are computed in real space (as opposed to on the unit cell).  The object
+   * is assume to already have the correct size. The data type stored by this
+   * output vector must be what you get when you multiply the Laplacians of
+   * shape function times the type used to store the values of the unknowns
+   * $U_j$ of your finite element vector $U$ (represented by the @p
+   * fe_function argument). This happens to be equal to the type of the
+   * elements of the input vector.
+   *
+   * @post <code>laplacians[q]</code> will contain the Laplacian of the field
+   * described by fe_function at the $q$th quadrature point.
+   * <code>gradients[q][i][j]</code> represents the $(i,j)$th component of the
+   * matrix of second derivatives at quadrature point $q$.
+   *
+   * @post For each component of the output vector, there holds
+   * <code>laplacians[q]=trace(hessians[q])</code>, where <tt>hessians</tt>
+   * would be the output of the get_function_hessians() function.
+   *
+   * @note The actual data type of the input vector may be either a
+   * Vector<T>, BlockVector<T>, or one of the sequential PETSc or
+   * Trilinos vector wrapper classes. It represents a global vector of DoF
+   * values associated with the DofHandler object with which this FEValues
+   * object was last initialized. Alternatively, if the vector argument is of
+   * type IndexSet, then the function is represented as one that is either
+   * zero or one, depending on whether a DoF index is in the set or not.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void
+  get_function_laplacians (const InputVector &fe_function,
+                           std::vector<typename InputVector::value_type> &laplacians) const;
+
+  /**
+   * This function does the same as the other get_function_laplacians(), but
+   * applied to multi-component (vector-valued) elements. The meaning of the
+   * arguments is as explained there.
+   *
+   * @post <code>laplacians[q]</code> is a vector of Laplacians of the field
+   * described by fe_function at the $q$th quadrature point. The size of the
+   * vector accessed by <code>laplacians[q]</code> equals the number of
+   * components of the finite element, i.e. <code>laplacians[q][c]</code>
+   * returns the Laplacian of the $c$th vector component at the $q$th
+   * quadrature point.
+   *
+   * @post For each component of the output vector, there holds
+   * <code>laplacians[q][c]=trace(hessians[q][c])</code>, where
+   * <tt>hessians</tt> would be the output of the get_function_hessians()
+   * function.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void
+  get_function_laplacians (const InputVector      &fe_function,
+                           std::vector<Vector<typename InputVector::value_type> > &laplacians) const;
+
+  /**
+   * Access to the second derivatives of a function with more flexibility. See
+   * get_function_values() with corresponding arguments.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void get_function_laplacians (
+    const InputVector &fe_function,
+    const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+    std::vector<typename InputVector::value_type> &laplacians) const;
+
+  /**
+   * Access to the second derivatives of a function with more flexibility. See
+   * get_function_values() with corresponding arguments.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void get_function_laplacians (
+    const InputVector &fe_function,
+    const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+    std::vector<Vector<typename InputVector::value_type> > &laplacians) const;
+
+  /**
+   * Access to the second derivatives of a function with more flexibility. See
+   * get_function_values() with corresponding arguments.
+   *
+   * @dealiiRequiresUpdateFlags{update_hessians}
+   */
+  template <class InputVector>
+  void get_function_laplacians (
+    const InputVector &fe_function,
+    const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+    std::vector<std::vector<typename InputVector::value_type> > &laplacians,
+    bool quadrature_points_fastest = false) const;
+
+  //@}
+  /// @name Access to third derivatives of global finite element fields
+  //@{
+
+  /**
+   * Compute the tensor of third derivatives of a finite element at the
+   * quadrature points of a cell. This function is the equivalent of the
+   * corresponding get_function_values() function (see there for more
+   * information) but evaluates the finite element field's third derivatives
+   * instead of its value.
+   *
+   * This function may only be used if the finite element in use is a scalar
+   * one, i.e. has only one vector component. There is a corresponding
+   * function of the same name for vector-valued finite elements.
+   *
+   * @param[in] fe_function A vector of values that describes (globally) the
+   * finite element function that this function should evaluate at the
+   * quadrature points of the current cell.
+   *
+   * @param[out] third_derivatives The third derivatives of the function
+   * specified by fe_function at the quadrature points of the current cell.
+   * The third derivatives are computed in real space (as opposed to on the
+   * unit cell).  The object is assumed to already have the correct size. The
+   * data type stored by this output vector must be what you get when you
+   * multiply the third derivatives of shape function times the type used to
+   * store the values of the unknowns $U_j$ of your finite element vector $U$
+   * (represented by the @p fe_function argument).
+   *
+   * @post <code>third_derivatives[q]</code> will contain the third
+   * derivatives of the field described by fe_function at the $q$th quadrature
+   * point. <code>third_derivatives[q][i][j][k]</code> represents the
+   * $(i,j,k)$th component of the 3rd order tensor of third derivatives at
+   * quadrature point $q$.
+   *
+   * @note The actual data type of the input vector may be either a
+   * Vector<T>, BlockVector<T>, or one of the sequential PETSc or
+   * Trilinos vector wrapper classes. It represents a global vector of DoF
+   * values associated with the DofHandler object with which this FEValues
+   * object was last initialized. Alternatively, if the vector argument is of
+   * type IndexSet, then the function is represented as one that is either
+   * zero or one, depending on whether a DoF index is in the set or not.
+   *
+   * @dealiiRequiresUpdateFlags{update_3rd_derivatives}
+   */
+  template <class InputVector>
+  void
+  get_function_third_derivatives (const InputVector &fe_function,
+                                  std::vector<Tensor<3,spacedim,typename InputVector::value_type> > &third_derivatives) const;
+
+  /**
+   * This function does the same as the other
+   * get_function_third_derivatives(), but applied to multi-component (vector-
+   * valued) elements. The meaning of the arguments is as explained there.
+   *
+   * @post <code>third_derivatives[q]</code> is a vector of third derivatives
+   * of the field described by fe_function at the $q$th quadrature point. The
+   * size of the vector accessed by <code>third_derivatives[q]</code> equals
+   * the number of components of the finite element, i.e.
+   * <code>third_derivatives[q][c]</code> returns the third derivative of the
+   * $c$th vector component at the $q$th quadrature point. Consequently,
+   * <code>third_derivatives[q][c][i][j][k]</code> is the $(i,j,k)$th
+   * component of the tensor of third derivatives of the $c$th vector
+   * component of the vector field at quadrature point $q$ of the current
+   * cell.
+   *
+   * @dealiiRequiresUpdateFlags{update_3rd_derivatives}
+   */
+  template <class InputVector>
+  void
+  get_function_third_derivatives (const InputVector      &fe_function,
+                                  std::vector<std::vector<Tensor<3,spacedim,typename InputVector::value_type> > > &third_derivatives,
+                                  bool quadrature_points_fastest = false) const;
+
+  /**
+   * Access to the third derivatives of a function with more flexibility. See
+   * get_function_values() with corresponding arguments.
+   */
+  template <class InputVector>
+  void get_function_third_derivatives (
+    const InputVector &fe_function,
+    const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+    std::vector<Tensor<3,spacedim,typename InputVector::value_type> > &third_derivatives) const;
+
+  /**
+   * Access to the third derivatives of a function with more flexibility. See
+   * get_function_values() with corresponding arguments.
+   *
+   * @dealiiRequiresUpdateFlags{update_3rd_derivatives}
+   */
+  template <class InputVector>
+  void get_function_third_derivatives (
+    const InputVector &fe_function,
+    const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+    VectorSlice<std::vector<std::vector<Tensor<3,spacedim,typename InputVector::value_type> > > > third_derivatives,
+    bool quadrature_points_fastest = false) const;
+  //@}
+
+  /// @name Geometry of the cell
+  //@{
+
+  /**
+   * Position of the <tt>q</tt>th quadrature point in real space.
+   *
+   * @dealiiRequiresUpdateFlags{update_quadrature_points}
+   */
+  const Point<spacedim> &
+  quadrature_point (const unsigned int q) const;
+
+  /**
+   * Return a reference to the vector of quadrature points in real space.
+   *
+   * @dealiiRequiresUpdateFlags{update_quadrature_points}
+   */
+  const std::vector<Point<spacedim> > &get_quadrature_points () const;
+
+  /**
+   * Mapped quadrature weight. If this object refers to a volume evaluation
+   * (i.e. the derived class is of type FEValues), then this is the Jacobi
+   * determinant times the weight of the *<tt>i</tt>th unit quadrature point.
+   *
+   * For surface evaluations (i.e. classes FEFaceValues or FESubfaceValues),
+   * it is the mapped surface element times the weight of the quadrature
+   * point.
+   *
+   * You can think of the quantity returned by this function as the volume or
+   * surface element $dx, ds$ in the integral that we implement here by
+   * quadrature.
+   *
+   * @dealiiRequiresUpdateFlags{update_JxW_values}
+   */
+  double JxW (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by JxW().
+   */
+  const std::vector<double> &get_JxW_values () const;
+
+  /**
+   * Return the Jacobian of the transformation at the specified quadrature
+   * point, i.e.  $J_{ij}=dx_i/d\hat x_j$
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobians}
+   */
+  const DerivativeForm<1,dim,spacedim> &jacobian (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * jacobian().
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobians}
+   */
+  const std::vector<DerivativeForm<1,dim,spacedim> > &get_jacobians () const;
+
+  /**
+   * Return the second derivative of the transformation from unit to real
+   * cell, i.e. the first derivative of the Jacobian, at the specified
+   * quadrature point, i.e. $G_{ijk}=dJ_{jk}/d\hat x_i$.
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_grads}
+   */
+  const DerivativeForm<2,dim,spacedim> &jacobian_grad (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * jacobian_grads().
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_grads}
+   */
+  const std::vector<DerivativeForm<2,dim,spacedim> > &get_jacobian_grads () const;
+
+  /**
+   * Return the second derivative of the transformation from unit to real
+   * cell, i.e. the first derivative of the Jacobian, at the specified
+   * quadrature point, pushed forward to the real cell coordinates, i.e.
+   * $G_{ijk}=dJ_{iJ}/d\hat x_K (J_{jJ})^{-1} (J_{kK})^{-1}$.
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_pushed_forward_grads}
+   */
+  const Tensor<3,spacedim> &jacobian_pushed_forward_grad (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * jacobian_pushed_forward_grads().
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_pushed_forward_grads}
+   */
+  const std::vector<Tensor<3,spacedim> > &get_jacobian_pushed_forward_grads () const;
+
+  /**
+   * Return the third derivative of the transformation from unit to real cell,
+   * i.e. the second derivative of the Jacobian, at the specified quadrature
+   * point, i.e. $G_{ijkl}=\frac{d^2J_{ij}}{d\hat x_k d\hat x_l}$.
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_2nd_derivatives}
+   */
+  const DerivativeForm<3,dim,spacedim> &jacobian_2nd_derivative (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * jacobian_2nd_derivatives().
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_2nd_derivatives}
+   */
+  const std::vector<DerivativeForm<3,dim,spacedim> > &get_jacobian_2nd_derivatives () const;
+
+  /**
+   * Return the third derivative of the transformation from unit to real cell,
+   * i.e. the second derivative of the Jacobian, at the specified quadrature
+   * point, pushed forward to the real cell coordinates, i.e.
+   * $G_{ijkl}=\frac{d^2J_{iJ}}{d\hat x_K d\hat x_L} (J_{jJ})^{-1}
+   * (J_{kK})^{-1}(J_{lL})^{-1}$.
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_pushed_forward_2nd_derivatives}
+   */
+  const Tensor<4,spacedim> &jacobian_pushed_forward_2nd_derivative (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * jacobian_pushed_forward_2nd_derivatives().
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_pushed_forward_2nd_derivatives}
+   */
+  const std::vector<Tensor<4,spacedim> > &get_jacobian_pushed_forward_2nd_derivatives () const;
+
+  /**
+   * Return the fourth derivative of the transformation from unit to real
+   * cell, i.e. the third derivative of the Jacobian, at the specified
+   * quadrature point, i.e. $G_{ijklm}=\frac{d^2J_{ij}}{d\hat x_k d\hat x_l
+   * d\hat x_m}$.
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_3rd_derivatives}
+   */
+  const DerivativeForm<4,dim,spacedim> &jacobian_3rd_derivative (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * jacobian_3rd_derivatives().
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_3rd_derivatives}
+   */
+  const std::vector<DerivativeForm<4,dim,spacedim> > &get_jacobian_3rd_derivatives () const;
+
+  /**
+   * Return the fourth derivative of the transformation from unit to real
+   * cell, i.e. the third derivative of the Jacobian, at the specified
+   * quadrature point, pushed forward to the real cell coordinates, i.e.
+   * $G_{ijklm}=\frac{d^3J_{iJ}}{d\hat x_K d\hat x_L d\hat x_M} (J_{jJ})^{-1}
+   * (J_{kK})^{-1} (J_{lL})^{-1} (J_{mM})^{-1}$.
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_pushed_forward_3rd_derivatives}
+   */
+  const Tensor<5,spacedim> &jacobian_pushed_forward_3rd_derivative (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * jacobian_pushed_forward_3rd_derivatives().
+   *
+   * @dealiiRequiresUpdateFlags{update_jacobian_pushed_forward_2nd_derivatives}
+   */
+  const std::vector<Tensor<5,spacedim> > &get_jacobian_pushed_forward_3rd_derivatives () const;
+
+  /**
+   * Return the inverse Jacobian of the transformation at the specified
+   * quadrature point, i.e.  $J_{ij}=d\hat x_i/dx_j$
+   *
+   * @dealiiRequiresUpdateFlags{update_inverse_jacobians}
+   */
+  const DerivativeForm<1,spacedim,dim> &inverse_jacobian (const unsigned int quadrature_point) const;
+
+  /**
+   * Return a reference to the array holding the values returned by
+   * inverse_jacobian().
+   *
+   * @dealiiRequiresUpdateFlags{update_inverse_jacobians}
+   */
+  const std::vector<DerivativeForm<1,spacedim,dim> > &get_inverse_jacobians () const;
+
+  /**
+   * For a face, return the outward normal vector to the cell at the
+   * <tt>i</tt>th quadrature point.
+   *
+   * For a cell of codimension one, return the normal vector. There are of
+   * course two normal directions to a manifold in that case, and this
+   * function returns the "up" direction as induced by the numbering of the
+   * vertices.
+   *
+   * The length of the vector is normalized to one.
+   *
+   * @dealiiRequiresUpdateFlags{update_normal_vectors}
+   */
+  const Tensor<1,spacedim> &normal_vector (const unsigned int i) const;
+
+  /**
+   * Return the normal vectors at the quadrature points. For a face, these are
+   * the outward normal vectors to the cell. For a cell of codimension one,
+   * the orientation is given by the numbering of vertices.
+   *
+   * @dealiiRequiresUpdateFlags{update_normal_vectors}
+   *
+   * @note This function should really be named get_normal_vectors(), but this
+   * function already exists with a different return type that returns a
+   * vector of Point objects, rather than a vector of Tensor objects. This is
+   * a historical accident, but can not be fixed in a backward compatible
+   * style. That said, the get_normal_vectors() function is now deprecated,
+   * will be removed in the next version, and the current function will then
+   * be renamed.
+   */
+  const std::vector<Tensor<1,spacedim> > &get_all_normal_vectors () const;
+
+  /**
+   * Return the normal vectors at the quadrature points as a vector of Point
+   * objects. This function is deprecated because normal vectors are correctly
+   * represented by rank-1 Tensor objects, not Point objects. Use
+   * get_all_normal_vectors() instead.
+   *
+   * @dealiiRequiresUpdateFlags{update_normal_vectors}
+   *
+   * @deprecated
+   */
+  std::vector<Point<spacedim> > get_normal_vectors () const DEAL_II_DEPRECATED;
+
+  /**
+   * Transform a set of vectors, one for each quadrature point. The
+   * <tt>mapping</tt> can be any of the ones defined in MappingType.
+   *
+   * @deprecated Use the various Mapping::transform() functions instead.
+   */
+  void transform (std::vector<Tensor<1,spacedim> > &transformed,
+                  const std::vector<Tensor<1,dim> > &original,
+                  MappingType mapping) const DEAL_II_DEPRECATED;
+
+  //@}
+
+  /// @name Extractors Methods to extract individual components
+  //@{
+
+  /**
+   * Create a view of the current FEValues object that represents a particular
+   * scalar component of the possibly vector-valued finite element. The
+   * concept of views is explained in the documentation of the namespace
+   * FEValuesViews and in particular in the
+   * @ref vector_valued
+   * module.
+   */
+  const FEValuesViews::Scalar<dim,spacedim> &
+  operator[] (const FEValuesExtractors::Scalar &scalar) const;
+
+  /**
+   * Create a view of the current FEValues object that represents a set of
+   * <code>dim</code> scalar components (i.e. a vector) of the vector-valued
+   * finite element. The concept of views is explained in the documentation of
+   * the namespace FEValuesViews and in particular in the
+   * @ref vector_valued
+   * module.
+   */
+  const FEValuesViews::Vector<dim,spacedim> &
+  operator[] (const FEValuesExtractors::Vector &vector) const;
+
+  /**
+   * Create a view of the current FEValues object that represents a set of
+   * <code>(dim*dim + dim)/2</code> scalar components (i.e. a symmetric 2nd
+   * order tensor) of the vector-valued finite element. The concept of views
+   * is explained in the documentation of the namespace FEValuesViews and in
+   * particular in the
+   * @ref vector_valued
+   * module.
+   */
+  const FEValuesViews::SymmetricTensor<2,dim,spacedim> &
+  operator[] (const FEValuesExtractors::SymmetricTensor<2> &tensor) const;
+
+
+  /**
+   * Create a view of the current FEValues object that represents a set of
+   * <code>(dim*dim)</code> scalar components (i.e. a 2nd order tensor) of the
+   * vector-valued finite element. The concept of views is explained in the
+   * documentation of the namespace FEValuesViews and in particular in the
+   * @ref vector_valued
+   * module.
+   */
+  const FEValuesViews::Tensor<2,dim,spacedim> &
+  operator[] (const FEValuesExtractors::Tensor<2> &tensor) const;
+
+  //@}
+
+  /// @name Access to the raw data
+  //@{
+
+  /**
+   * Constant reference to the selected mapping object.
+   */
+  const Mapping<dim,spacedim> &get_mapping () const;
+
+  /**
+   * Constant reference to the selected finite element object.
+   */
+  const FiniteElement<dim,spacedim> &get_fe () const;
+
+  /**
+   * Return the update flags set for this object.
+   */
+  UpdateFlags get_update_flags () const;
+
+  /**
+   * Return a triangulation iterator to the current cell.
+   */
+  const typename Triangulation<dim,spacedim>::cell_iterator get_cell () const;
+
+  /**
+   * Return the relation of the current cell to the previous cell. This allows
+   * re-use of some cell data (like local matrices for equations with constant
+   * coefficients) if the result is <tt>CellSimilarity::translation</tt>.
+   */
+  CellSimilarity::Similarity get_cell_similarity () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+  //@}
+
+
+  /**
+   * This exception is thrown if FEValuesBase is asked to return the value of
+   * a field which was not required by the UpdateFlags for this FEValuesBase.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcAccessToUninitializedField,
+                  char *,
+                  << ("You are requesting information from an FEValues/FEFaceValues/FESubfaceValues "
+                      "object for which this kind of information has not been computed. What "
+                      "information these objects compute is determined by the update_* flags you "
+                      "pass to the constructor. Here, the operation you are attempting requires "
+                      "the <")
+                  << arg1
+                  << "> flag to be set, but it was apparently not specified upon construction.");
+  /**
+   * @todo Document this
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCannotInitializeField);
+  /**
+   * @todo Document this
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcInvalidUpdateFlag);
+  /**
+   * @todo Document this
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFEDontMatch);
+  /**
+   * @todo Document this
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcShapeFunctionNotPrimitive,
+                  int,
+                  << "The shape function with index " << arg1
+                  << " is not primitive, i.e. it is vector-valued and "
+                  << "has more than one non-zero vector component. This "
+                  << "function cannot be called for these shape functions. "
+                  << "Maybe you want to use the same function with the "
+                  << "_component suffix?");
+  /**
+   * @todo Document this
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFENotPrimitive);
+
+protected:
+  /**
+   * Objects of the FEValues class need to store a pointer (i.e. an iterator)
+   * to the present cell in order to be able to extract the values of the
+   * degrees of freedom on this cell in the get_function_values() and assorted
+   * functions. On the other hand, this class should also work for different
+   * iterators, as long as they have the same interface to extract the DoF
+   * values (i.e., for example, they need to have a @p
+   * get_interpolated_dof_values function).
+   *
+   * This calls for a common base class of iterator classes, and making the
+   * functions we need here @p virtual. On the other hand, this is the only
+   * place in the library where we need this, and introducing a base class of
+   * iterators and making a function virtual penalizes <em>all</em> users of
+   * the iterators, which are basically intended as very fast accessor
+   * functions. So we do not want to do this. Rather, what we do here is
+   * making the functions we need virtual only for use with <em>this
+   * class</em>. The idea is the following: have a common base class which
+   * declares some pure virtual functions, and for each possible iterator
+   * type, we have a derived class which stores the iterator to the cell and
+   * implements these functions. Since the iterator classes have the same
+   * interface, we can make the derived classes a template, templatized on the
+   * iterator type.
+   *
+   * This way, the use of virtual functions is restricted to only this class,
+   * and other users of iterators do not have to bear the negative effects.
+   *
+   * @author Wolfgang Bangerth, 2003
+   */
+  class CellIteratorBase;
+
+  /**
+   * Forward declaration of classes derived from CellIteratorBase. Their
+   * definition and implementation is given in the .cc file.
+   */
+  template <typename CI> class CellIterator;
+  class TriaCellIterator;
+
+  /**
+   * Store the cell selected last time the reinit() function was called.  This
+   * is necessary for the <tt>get_function_*</tt> functions as well as the
+   * functions of same name in the extractor classes.
+   */
+  std_cxx11::unique_ptr<const CellIteratorBase> present_cell;
+
+  /**
+   * A signal connection we use to ensure we get informed whenever the
+   * triangulation changes. We need to know about that because it invalidates
+   * all cell iterators and, as part of that, the 'present_cell' iterator we
+   * keep around between subsequent calls to reinit() in order to compute the
+   * cell similarity.
+   */
+  boost::signals2::connection tria_listener;
+
+  /**
+   * A function that is connected to the triangulation in order to reset the
+   * stored 'present_cell' iterator to an invalid one whenever the
+   * triangulation is changed and the iterator consequently becomes invalid.
+   */
+  void invalidate_present_cell ();
+
+  /**
+   * This function is called by the various reinit() functions in derived
+   * classes. Given the cell indicated by the argument, test whether we have
+   * to throw away the previously stored present_cell argument because it
+   * would require us to compare cells from different triangulations. In
+   * checking all this, also make sure that we have tria_listener connected to
+   * the triangulation to which we will set present_cell right after calling
+   * this function.
+   */
+  void
+  maybe_invalidate_previous_present_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell);
+
+  /**
+   * A pointer to the mapping object associated with this FEValues object.
+   */
+  const SmartPointer<const Mapping<dim,spacedim>,FEValuesBase<dim,spacedim> > mapping;
+
+  /**
+   * A pointer to the internal data object of mapping, obtained from
+   * Mapping::get_data(), Mapping::get_face_data(), or
+   * Mapping::get_subface_data().
+   */
+  std_cxx11::unique_ptr<typename Mapping<dim,spacedim>::InternalDataBase> mapping_data;
+
+  /**
+   * An object into which the Mapping::fill_fe_values() and similar functions
+   * place their output.
+   */
+  dealii::internal::FEValues::MappingRelatedData<dim, spacedim> mapping_output;
+
+
+  /**
+   * A pointer to the finite element object associated with this FEValues
+   * object.
+   */
+  const SmartPointer<const FiniteElement<dim,spacedim>,FEValuesBase<dim,spacedim> > fe;
+
+  /**
+   * A pointer to the internal data object of finite element, obtained from
+   * FiniteElement::get_data(), Mapping::get_face_data(), or
+   * FiniteElement::get_subface_data().
+   */
+  std_cxx11::unique_ptr<typename FiniteElement<dim,spacedim>::InternalDataBase> fe_data;
+
+  /**
+   * An object into which the FiniteElement::fill_fe_values() and similar
+   * functions place their output.
+   */
+  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> finite_element_output;
+
+
+  /**
+   * Original update flags handed to the constructor of FEValues.
+   */
+  UpdateFlags          update_flags;
+
+  /**
+   * Initialize some update flags. Called from the @p initialize functions of
+   * derived classes, which are in turn called from their constructors.
+   *
+   * Basically, this function finds out using the finite element and mapping
+   * object already stored which flags need to be set to compute everything
+   * the user wants, as expressed through the flags passed as argument.
+   */
+  UpdateFlags compute_update_flags (const UpdateFlags update_flags) const;
+
+  /**
+   * An enum variable that can store different states of the current cell in
+   * comparison to the previously visited cell. If wanted, additional states
+   * can be checked here and used in one of the methods used during reinit.
+   */
+  CellSimilarity::Similarity cell_similarity;
+
+  /**
+   * A function that checks whether the new cell is similar to the one
+   * previously used. Then, a significant amount of the data can be reused,
+   * e.g. the derivatives of the basis functions in real space, shape_grad.
+   */
+  void
+  check_cell_similarity (const typename Triangulation<dim,spacedim>::cell_iterator &cell);
+
+private:
+  /**
+   * Copy constructor. Since objects of this class are not copyable, we make
+   * it private, and also do not implement it.
+   */
+  FEValuesBase (const FEValuesBase &);
+
+  /**
+   * Copy operator. Since objects of this class are not copyable, we make it
+   * private, and also do not implement it.
+   */
+  FEValuesBase &operator= (const FEValuesBase &);
+
+  /**
+   * A cache for all possible FEValuesViews objects.
+   */
+  dealii::internal::FEValuesViews::Cache<dim,spacedim> fe_values_views_cache;
+
+  /**
+   * Make the view classes friends of this class, since they access internal
+   * data.
+   */
+  template <int, int> friend class FEValuesViews::Scalar;
+  template <int, int> friend class FEValuesViews::Vector;
+  template <int, int, int> friend class FEValuesViews::SymmetricTensor;
+  template <int, int, int> friend class FEValuesViews::Tensor;
+};
+
+
+
+/**
+ * Finite element evaluated in quadrature points of a cell.
+ *
+ * This function implements the initialization routines for FEValuesBase, if
+ * values in quadrature points of a cell are needed. For further documentation
+ * see this class.
+ *
+ * @ingroup feaccess
+ * @author Wolfgang Bangerth, 1998, Guido Kanschat, 2001
+ */
+template <int dim, int spacedim=dim>
+class FEValues : public FEValuesBase<dim,spacedim>
+{
+public:
+  /**
+   * Dimension of the object over which we integrate. For the present class,
+   * this is equal to <code>dim</code>.
+   */
+  static const unsigned int integral_dimension = dim;
+
+  /**
+   * Constructor. Gets cell independent data from mapping and finite element
+   * objects, matching the quadrature rule and update flags.
+   */
+  FEValues (const Mapping<dim,spacedim>       &mapping,
+            const FiniteElement<dim,spacedim> &fe,
+            const Quadrature<dim>             &quadrature,
+            const UpdateFlags                  update_flags);
+
+  /**
+   * Constructor. This constructor is equivalent to the other one except that
+   * it makes the object use a $Q_1$ mapping (i.e., an object of type
+   * MappingQGeneric(1)) implicitly.
+   */
+  FEValues (const FiniteElement<dim,spacedim> &fe,
+            const Quadrature<dim>             &quadrature,
+            const UpdateFlags                  update_flags);
+
+  /**
+   * Reinitialize the gradients, Jacobi determinants, etc for the given cell
+   * of type "iterator into a DoFHandler object", and the finite element
+   * associated with this object. It is assumed that the finite element used
+   * by the given cell is also the one used by this FEValues object.
+   */
+  template <template <int, int> class DoFHandlerType, bool level_dof_access>
+  void reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>,level_dof_access> > &cell);
+
+  /**
+   * Reinitialize the gradients, Jacobi determinants, etc for the given cell
+   * of type "iterator into a Triangulation object", and the given finite
+   * element. Since iterators into triangulation alone only convey information
+   * about the geometry of a cell, but not about degrees of freedom possibly
+   * associated with this cell, you will not be able to call some functions of
+   * this class if they need information about degrees of freedom. These
+   * functions are, above all, the
+   * <tt>get_function_value/gradients/hessians/laplacians/third_derivatives</tt>
+   * functions. If you want to call these functions, you have to call the @p
+   * reinit variants that take iterators into DoFHandler or other DoF handler
+   * type objects.
+   */
+  void reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell);
+
+  /**
+   * Return a reference to the copy of the quadrature formula stored by this
+   * object.
+   */
+  const Quadrature<dim> &get_quadrature () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Return a reference to this very object.
+   *
+   * Though it seems that it is not very useful, this function is there to
+   * provide capability to the hpFEValues class, in which case it provides the
+   * FEValues object for the present cell (remember that for hp finite
+   * elements, the actual FE object used may change from cell to cell, so we
+   * also need different FEValues objects for different cells; once you
+   * reinitialize the hpFEValues object for a specific cell, it retrieves the
+   * FEValues object for the FE on that cell and returns it through a function
+   * of the same name as this one; this function here therefore only provides
+   * the same interface so that one can templatize on FEValues/hpFEValues).
+   */
+  const FEValues<dim,spacedim> &get_present_fe_values () const;
+
+private:
+  /**
+   * Store a copy of the quadrature formula here.
+   */
+  const Quadrature<dim> quadrature;
+
+  /**
+   * Do work common to the two constructors.
+   */
+  void initialize (const UpdateFlags update_flags);
+
+  /**
+   * The reinit() functions do only that part of the work that requires
+   * knowledge of the type of iterator. After setting present_cell(), they
+   * pass on to this function, which does the real work, and which is
+   * independent of the actual type of the cell iterator.
+   */
+  void do_reinit ();
+};
+
+
+/**
+ * Extend the interface of FEValuesBase to values that only make sense when
+ * evaluating something on the surface of a cell. All the data that is
+ * available in the interior of cells is also available here.
+ *
+ * See FEValuesBase
+ *
+ * @ingroup feaccess
+ * @author Wolfgang Bangerth, 1998, Guido Kanschat, 2000, 2001
+ */
+template <int dim, int spacedim=dim>
+class FEFaceValuesBase : public FEValuesBase<dim,spacedim>
+{
+public:
+  /**
+   * Dimension of the object over which we integrate. For the present class,
+   * this is equal to <code>dim-1</code>.
+   */
+  static const unsigned int integral_dimension = dim-1;
+
+  /**
+   * Constructor. Call the constructor of the base class and set up the arrays
+   * of this class with the right sizes.  Actually filling these arrays is a
+   * duty of the derived class's constructors.
+   *
+   * @p n_faces_or_subfaces is the number of faces or subfaces that this
+   * object is to store. The actual number depends on the derived class, for
+   * FEFaceValues it is <tt>2*dim</tt>, while for the FESubfaceValues class it
+   * is <tt>2*dim*(1<<(dim-1))</tt>, i.e. the number of faces times the number
+   * of subfaces per face.
+   */
+  FEFaceValuesBase (const unsigned int                 n_q_points,
+                    const unsigned int                 dofs_per_cell,
+                    const UpdateFlags                  update_flags,
+                    const Mapping<dim,spacedim>       &mapping,
+                    const FiniteElement<dim,spacedim> &fe,
+                    const Quadrature<dim-1>&           quadrature);
+
+  /**
+   * Boundary form of the transformation of the cell at the <tt>i</tt>th
+   * quadrature point.  See
+   * @ref GlossBoundaryForm.
+   *
+   * @dealiiRequiresUpdateFlags{update_boundary_forms}
+   */
+  const Tensor<1,spacedim> &boundary_form (const unsigned int i) const;
+
+  /**
+   * Return the list of outward normal vectors times the Jacobian of the
+   * surface mapping.
+   *
+   * @dealiiRequiresUpdateFlags{update_boundary_forms}
+   */
+  const std::vector<Tensor<1,spacedim> > &get_boundary_forms () const;
+
+  /**
+   * Return the index of the face selected the last time the reinit() function
+   * was called.
+   */
+  unsigned int get_face_index() const;
+
+  /**
+   * Return a reference to the copy of the quadrature formula stored by this
+   * object.
+   */
+  const Quadrature<dim-1> & get_quadrature () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+protected:
+
+  /**
+   * Index of the face selected the last time the reinit() function was
+   * called.
+   */
+  unsigned int present_face_index;
+
+  /**
+   * Store a copy of the quadrature formula here.
+   */
+  const Quadrature<dim-1> quadrature;
+};
+
+
+
+/**
+ * Finite element evaluated in quadrature points on a face.
+ *
+ * This class adds the functionality of FEFaceValuesBase to FEValues; see
+ * there for more documentation.
+ *
+ * Since finite element functions and their derivatives may be discontinuous
+ * at cell boundaries, there is no restriction of this function to a mesh
+ * face. But, there are limits of these values approaching the face from
+ * either of the neighboring cells.
+ *
+ * @ingroup feaccess
+ * @author Wolfgang Bangerth, 1998, Guido Kanschat, 2000, 2001
+ */
+template <int dim, int spacedim=dim>
+class FEFaceValues : public FEFaceValuesBase<dim,spacedim>
+{
+public:
+  /**
+   * Dimension in which this object operates.
+   */
+
+  static const unsigned int dimension = dim;
+
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * Dimension of the object over which we integrate. For the present class,
+   * this is equal to <code>dim-1</code>.
+   */
+  static const unsigned int integral_dimension = dim-1;
+
+  /**
+   * Constructor. Gets cell independent data from mapping and finite element
+   * objects, matching the quadrature rule and update flags.
+   */
+  FEFaceValues (const Mapping<dim,spacedim>       &mapping,
+                const FiniteElement<dim,spacedim> &fe,
+                const Quadrature<dim-1>           &quadrature,
+                const UpdateFlags                  update_flags);
+
+  /**
+   * Constructor. This constructor is equivalent to the other one except that
+   * it makes the object use a $Q_1$ mapping (i.e., an object of type
+   * MappingQGeneric(1)) implicitly.
+   */
+  FEFaceValues (const FiniteElement<dim,spacedim> &fe,
+                const Quadrature<dim-1>           &quadrature,
+                const UpdateFlags                  update_flags);
+
+  /**
+   * Reinitialize the gradients, Jacobi determinants, etc for the face with
+   * number @p face_no of @p cell and the given finite element.
+   */
+  template <template <int, int> class DoFHandlerType, bool level_dof_access>
+  void reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>,level_dof_access> > &cell,
+               const unsigned int face_no);
+
+  /**
+   * Reinitialize the gradients, Jacobi determinants, etc for the given face
+   * on given cell of type "iterator into a Triangulation object", and the
+   * given finite element. Since iterators into triangulation alone only
+   * convey information about the geometry of a cell, but not about degrees of
+   * freedom possibly associated with this cell, you will not be able to call
+   * some functions of this class if they need information about degrees of
+   * freedom. These functions are, above all, the
+   * <tt>get_function_value/gradients/hessians/third_derivatives</tt>
+   * functions. If you want to call these functions, you have to call the @p
+   * reinit variants that take iterators into DoFHandler or other DoF handler
+   * type objects.
+   */
+  void reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+               const unsigned int                                         face_no);
+
+  /**
+   * Return a reference to this very object.
+   *
+   * Though it seems that it is not very useful, this function is there to
+   * provide capability to the hpFEValues class, in which case it provides the
+   * FEValues object for the present cell (remember that for hp finite
+   * elements, the actual FE object used may change from cell to cell, so we
+   * also need different FEValues objects for different cells; once you
+   * reinitialize the hpFEValues object for a specific cell, it retrieves the
+   * FEValues object for the FE on that cell and returns it through a function
+   * of the same name as this one; this function here therefore only provides
+   * the same interface so that one can templatize on FEValues/hpFEValues).
+   */
+  const FEFaceValues<dim,spacedim> &get_present_fe_values () const;
+private:
+
+  /**
+   * Do work common to the two constructors.
+   */
+  void initialize (const UpdateFlags update_flags);
+
+  /**
+   * The reinit() functions do only that part of the work that requires
+   * knowledge of the type of iterator. After setting present_cell(), they
+   * pass on to this function, which does the real work, and which is
+   * independent of the actual type of the cell iterator.
+   */
+  void do_reinit (const unsigned int face_no);
+};
+
+
+/**
+ * Finite element evaluated in quadrature points on a face.
+ *
+ * This class adds the functionality of FEFaceValuesBase to FEValues; see
+ * there for more documentation.
+ *
+ * This class is used for faces lying on a refinement edge. In this case, the
+ * neighboring cell is refined. To be able to compute differences between
+ * interior and exterior function values, the refinement of the neighboring
+ * cell must be simulated on this cell. This is achieved by applying a
+ * quadrature rule that simulates the refinement. The resulting data fields
+ * are split up to reflect the refinement structure of the neighbor: a subface
+ * number corresponds to the number of the child of the neighboring face.
+ *
+ * @ingroup feaccess
+ * @author Wolfgang Bangerth, 1998, Guido Kanschat, 2000, 2001
+ */
+template <int dim, int spacedim=dim>
+class FESubfaceValues : public FEFaceValuesBase<dim,spacedim>
+{
+public:
+  /**
+   * Dimension in which this object operates.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Dimension of the space in which this object operates.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * Dimension of the object over which we integrate. For the present class,
+   * this is equal to <code>dim-1</code>.
+   */
+  static const unsigned int integral_dimension = dim-1;
+
+  /**
+   * Constructor. Gets cell independent data from mapping and finite element
+   * objects, matching the quadrature rule and update flags.
+   */
+  FESubfaceValues (const Mapping<dim,spacedim>       &mapping,
+                   const FiniteElement<dim,spacedim> &fe,
+                   const Quadrature<dim-1>  &face_quadrature,
+                   const UpdateFlags         update_flags);
+
+  /**
+   * Constructor. This constructor is equivalent to the other one except that
+   * it makes the object use a $Q_1$ mapping (i.e., an object of type
+   * MappingQGeneric(1)) implicitly.
+   */
+  FESubfaceValues (const FiniteElement<dim,spacedim> &fe,
+                   const Quadrature<dim-1>  &face_quadrature,
+                   const UpdateFlags         update_flags);
+
+  /**
+   * Reinitialize the gradients, Jacobi determinants, etc for the given cell
+   * of type "iterator into a DoFHandler object", and the finite element
+   * associated with this object. It is assumed that the finite element used
+   * by the given cell is also the one used by this FESubfaceValues object.
+   */
+  template <template <int, int> class DoFHandlerType, bool level_dof_access>
+  void reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>,level_dof_access> > &cell,
+               const unsigned int face_no,
+               const unsigned int subface_no);
+
+  /**
+   * Reinitialize the gradients, Jacobi determinants, etc for the given
+   * subface on given cell of type "iterator into a Triangulation object", and
+   * the given finite element. Since iterators into triangulation alone only
+   * convey information about the geometry of a cell, but not about degrees of
+   * freedom possibly associated with this cell, you will not be able to call
+   * some functions of this class if they need information about degrees of
+   * freedom. These functions are, above all, the
+   * <tt>get_function_value/gradients/hessians/third_derivatives</tt>
+   * functions. If you want to call these functions, you have to call the @p
+   * reinit variants that take iterators into DoFHandler or other DoF handler
+   * type objects.
+   */
+  void reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+               const unsigned int                    face_no,
+               const unsigned int                    subface_no);
+
+  /**
+   * Return a reference to this very object.
+   *
+   * Though it seems that it is not very useful, this function is there to
+   * provide capability to the hpFEValues class, in which case it provides the
+   * FEValues object for the present cell (remember that for hp finite
+   * elements, the actual FE object used may change from cell to cell, so we
+   * also need different FEValues objects for different cells; once you
+   * reinitialize the hpFEValues object for a specific cell, it retrieves the
+   * FEValues object for the FE on that cell and returns it through a function
+   * of the same name as this one; this function here therefore only provides
+   * the same interface so that one can templatize on FEValues/hpFEValues).
+   */
+  const FESubfaceValues<dim,spacedim> &get_present_fe_values () const;
+
+  /**
+   * @todo Document this
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcReinitCalledWithBoundaryFace);
+
+  /**
+   * @todo Document this
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFaceHasNoSubfaces);
+
+private:
+
+  /**
+   * Do work common to the two constructors.
+   */
+  void initialize (const UpdateFlags update_flags);
+
+  /**
+   * The reinit() functions do only that part of the work that requires
+   * knowledge of the type of iterator. After setting present_cell(), they
+   * pass on to this function, which does the real work, and which is
+   * independent of the actual type of the cell iterator.
+   */
+  void do_reinit (const unsigned int face_no,
+                  const unsigned int subface_no);
+};
+
+
+#ifndef DOXYGEN
+
+
+/*------------------------ Inline functions: namespace FEValuesViews --------*/
+
+namespace FEValuesViews
+{
+  template <int dim, int spacedim>
+  inline
+  typename Scalar<dim,spacedim>::value_type
+  Scalar<dim,spacedim>::value (const unsigned int shape_function,
+                               const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_values,
+            typename FVB::ExcAccessToUninitializedField("update_values"));
+
+    // an adaptation of the FEValuesBase::shape_value_component function
+    // except that here we know the component as fixed and we have
+    // pre-computed and cached a bunch of information. See the comments there.
+    if (shape_function_data[shape_function].is_nonzero_shape_function_component)
+      return fe_values.finite_element_output.shape_values(shape_function_data[shape_function]
+                                                          .row_index,
+                                                          q_point);
+    else
+      return 0;
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Scalar<dim,spacedim>::gradient_type
+  Scalar<dim,spacedim>::gradient (const unsigned int shape_function,
+                                  const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+
+    // an adaptation of the
+    // FEValuesBase::shape_grad_component
+    // function except that here we know the
+    // component as fixed and we have
+    // pre-computed and cached a bunch of
+    // information. See the comments there.
+    if (shape_function_data[shape_function].is_nonzero_shape_function_component)
+      return fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function]
+                                                             .row_index][q_point];
+    else
+      return gradient_type();
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Scalar<dim,spacedim>::hessian_type
+  Scalar<dim,spacedim>::hessian (const unsigned int shape_function,
+                                 const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_hessians,
+            typename FVB::ExcAccessToUninitializedField("update_hessians"));
+
+    // an adaptation of the
+    // FEValuesBase::shape_hessian_component
+    // function except that here we know the
+    // component as fixed and we have
+    // pre-computed and cached a bunch of
+    // information. See the comments there.
+    if (shape_function_data[shape_function].is_nonzero_shape_function_component)
+      return fe_values.finite_element_output.shape_hessians[shape_function_data[shape_function].row_index][q_point];
+    else
+      return hessian_type();
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Scalar<dim,spacedim>::third_derivative_type
+  Scalar<dim,spacedim>::third_derivative (const unsigned int shape_function,
+                                          const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_3rd_derivatives,
+            typename FVB::ExcAccessToUninitializedField("update_3rd_derivatives"));
+
+    // an adaptation of the
+    // FEValuesBase::shape_3rdderivative_component
+    // function except that here we know the
+    // component as fixed and we have
+    // pre-computed and cached a bunch of
+    // information. See the comments there.
+    if (shape_function_data[shape_function].is_nonzero_shape_function_component)
+      return fe_values.finite_element_output.shape_3rd_derivatives[shape_function_data[shape_function].row_index][q_point];
+    else
+      return third_derivative_type();
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Vector<dim,spacedim>::value_type
+  Vector<dim,spacedim>::value (const unsigned int shape_function,
+                               const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_values,
+            typename FVB::ExcAccessToUninitializedField("update_values"));
+
+    // same as for the scalar case except
+    // that we have one more index
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+    if (snc == -2)
+      return value_type();
+    else if (snc != -1)
+      {
+        value_type return_value;
+        return_value[shape_function_data[shape_function].single_nonzero_component_index]
+          = fe_values.finite_element_output.shape_values(snc,q_point);
+        return return_value;
+      }
+    else
+      {
+        value_type return_value;
+        for (unsigned int d=0; d<dim; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            return_value[d]
+              = fe_values.finite_element_output.shape_values(shape_function_data[shape_function].row_index[d],q_point);
+
+        return return_value;
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Vector<dim,spacedim>::gradient_type
+  Vector<dim,spacedim>::gradient (const unsigned int shape_function,
+                                  const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+
+    // same as for the scalar case except
+    // that we have one more index
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+    if (snc == -2)
+      return gradient_type();
+    else if (snc != -1)
+      {
+        gradient_type return_value;
+        return_value[shape_function_data[shape_function].single_nonzero_component_index]
+          = fe_values.finite_element_output.shape_gradients[snc][q_point];
+        return return_value;
+      }
+    else
+      {
+        gradient_type return_value;
+        for (unsigned int d=0; d<dim; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            return_value[d]
+              = fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[d]][q_point];
+
+        return return_value;
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Vector<dim,spacedim>::divergence_type
+  Vector<dim,spacedim>::divergence (const unsigned int shape_function,
+                                    const unsigned int q_point) const
+  {
+    // this function works like in
+    // the case above
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+
+    // same as for the scalar case except
+    // that we have one more index
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+    if (snc == -2)
+      return divergence_type();
+    else if (snc != -1)
+      return
+        fe_values.finite_element_output.shape_gradients[snc][q_point][shape_function_data[shape_function].single_nonzero_component_index];
+    else
+      {
+        divergence_type return_value = 0;
+        for (unsigned int d=0; d<dim; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            return_value
+            += fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[d]][q_point][d];
+
+        return return_value;
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Vector<dim,spacedim>::curl_type
+  Vector<dim,spacedim>::curl (const unsigned int shape_function, const unsigned int q_point) const
+  {
+    // this function works like in the case above
+    typedef FEValuesBase<dim,spacedim> FVB;
+
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    // same as for the scalar case except that we have one more index
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+    if (snc == -2)
+      return curl_type ();
+
+    else
+      switch (dim)
+        {
+        case 1:
+        {
+          Assert (false, ExcMessage("Computing the curl in 1d is not a useful operation"));
+          return curl_type ();
+        }
+
+        case 2:
+        {
+          if (snc != -1)
+            {
+              curl_type return_value;
+
+              // the single
+              // nonzero component
+              // can only be zero
+              // or one in 2d
+              if (shape_function_data[shape_function].single_nonzero_component_index == 0)
+                return_value[0] = -1.0 * fe_values.finite_element_output.shape_gradients[snc][q_point][1];
+              else
+                return_value[0] = fe_values.finite_element_output.shape_gradients[snc][q_point][0];
+
+              return return_value;
+            }
+
+          else
+            {
+              curl_type return_value;
+
+              return_value[0] = 0.0;
+
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[0])
+                return_value[0]
+                -= fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[0]][q_point][1];
+
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[1])
+                return_value[0]
+                += fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[1]][q_point][0];
+
+              return return_value;
+            }
+        }
+
+        case 3:
+        {
+          if (snc != -1)
+            {
+              curl_type return_value;
+
+              switch (shape_function_data[shape_function].single_nonzero_component_index)
+                {
+                case 0:
+                {
+                  return_value[0] = 0;
+                  return_value[1] = fe_values.finite_element_output.shape_gradients[snc][q_point][2];
+                  return_value[2] = -1.0 * fe_values.finite_element_output.shape_gradients[snc][q_point][1];
+                  return return_value;
+                }
+
+                case 1:
+                {
+                  return_value[0] = -1.0 * fe_values.finite_element_output.shape_gradients[snc][q_point][2];
+                  return_value[1] = 0;
+                  return_value[2] = fe_values.finite_element_output.shape_gradients[snc][q_point][0];
+                  return return_value;
+                }
+
+                default:
+                {
+                  return_value[0] = fe_values.finite_element_output.shape_gradients[snc][q_point][1];
+                  return_value[1] = -1.0 * fe_values.finite_element_output.shape_gradients[snc][q_point][0];
+                  return_value[2] = 0;
+                  return return_value;
+                }
+                }
+            }
+
+          else
+            {
+              curl_type return_value;
+
+              for (unsigned int i = 0; i < dim; ++i)
+                return_value[i] = 0.0;
+
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[0])
+                {
+                  return_value[1]
+                  += fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[0]][q_point][2];
+                  return_value[2]
+                  -= fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[0]][q_point][1];
+                }
+
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[1])
+                {
+                  return_value[0]
+                  -= fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[1]][q_point][2];
+                  return_value[2]
+                  += fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[1]][q_point][0];
+                }
+
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[2])
+                {
+                  return_value[0]
+                  += fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[2]][q_point][1];
+                  return_value[1]
+                  -= fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[2]][q_point][0];
+                }
+
+              return return_value;
+            }
+        }
+        }
+    // should not end up here
+    Assert (false, ExcInternalError());
+    return curl_type();
+  }
+
+  template <int dim, int spacedim>
+  inline
+  typename Vector<dim,spacedim>::hessian_type
+  Vector<dim,spacedim>::hessian (const unsigned int shape_function,
+                                 const unsigned int q_point) const
+  {
+    // this function works like in
+    // the case above
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_hessians,
+            typename FVB::ExcAccessToUninitializedField("update_hessians"));
+
+    // same as for the scalar case except
+    // that we have one more index
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+    if (snc == -2)
+      return hessian_type();
+    else if (snc != -1)
+      {
+        hessian_type return_value;
+        return_value[shape_function_data[shape_function].single_nonzero_component_index]
+          = fe_values.finite_element_output.shape_hessians[snc][q_point];
+        return return_value;
+      }
+    else
+      {
+        hessian_type return_value;
+        for (unsigned int d=0; d<dim; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            return_value[d]
+              = fe_values.finite_element_output.shape_hessians[shape_function_data[shape_function].row_index[d]][q_point];
+
+        return return_value;
+      }
+  }
+
+  template <int dim, int spacedim>
+  inline
+  typename Vector<dim,spacedim>::third_derivative_type
+  Vector<dim,spacedim>::third_derivative (const unsigned int shape_function,
+                                          const unsigned int q_point) const
+  {
+    // this function works like in
+    // the case above
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_3rd_derivatives,
+            typename FVB::ExcAccessToUninitializedField("update_3rd_derivatives"));
+
+    // same as for the scalar case except
+    // that we have one more index
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+    if (snc == -2)
+      return third_derivative_type();
+    else if (snc != -1)
+      {
+        third_derivative_type return_value;
+        return_value[shape_function_data[shape_function].single_nonzero_component_index]
+          = fe_values.finite_element_output.shape_3rd_derivatives[snc][q_point];
+        return return_value;
+      }
+    else
+      {
+        third_derivative_type return_value;
+        for (unsigned int d=0; d<dim; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            return_value[d]
+              = fe_values.finite_element_output.shape_3rd_derivatives[shape_function_data[shape_function].row_index[d]][q_point];
+
+        return return_value;
+      }
+  }
+
+
+  namespace
+  {
+    /**
+     * Return the symmetrized version of a tensor whose n'th row equals the
+     * second argument, with all other rows equal to zero.
+     */
+    inline
+    dealii::SymmetricTensor<2,1>
+    symmetrize_single_row (const unsigned int n,
+                           const dealii::Tensor<1,1> &t)
+    {
+      Assert (n < 1, ExcIndexRange (n, 0, 1));
+      (void)n; // removes -Wunused-parameter warning in optimized mode
+
+      const double array[1] = { t[0] };
+      return dealii::SymmetricTensor<2,1>(array);
+    }
+
+
+    inline
+    dealii::SymmetricTensor<2,2>
+    symmetrize_single_row (const unsigned int n,
+                           const dealii::Tensor<1,2> &t)
+    {
+      switch (n)
+        {
+        case 0:
+        {
+          const double array[3] = { t[0], 0, t[1]/2 };
+          return dealii::SymmetricTensor<2,2>(array);
+        }
+        case 1:
+        {
+          const double array[3] = { 0, t[1], t[0]/2 };
+          return dealii::SymmetricTensor<2,2>(array);
+        }
+        default:
+        {
+          Assert (false, ExcIndexRange (n, 0, 2));
+          return dealii::SymmetricTensor<2,2>();
+        }
+        }
+    }
+
+
+    inline
+    dealii::SymmetricTensor<2,3>
+    symmetrize_single_row (const unsigned int n,
+                           const dealii::Tensor<1,3> &t)
+    {
+      switch (n)
+        {
+        case 0:
+        {
+          const double array[6] = { t[0], 0, 0, t[1]/2, t[2]/2, 0 };
+          return dealii::SymmetricTensor<2,3>(array);
+        }
+        case 1:
+        {
+          const double array[6] = { 0, t[1], 0, t[0]/2, 0, t[2]/2 };
+          return dealii::SymmetricTensor<2,3>(array);
+        }
+        case 2:
+        {
+          const double array[6] = { 0, 0, t[2], 0, t[0]/2, t[1]/2 };
+          return dealii::SymmetricTensor<2,3>(array);
+        }
+        default:
+        {
+          Assert (false, ExcIndexRange (n, 0, 3));
+          return dealii::SymmetricTensor<2,3>();
+        }
+        }
+    }
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Vector<dim,spacedim>::symmetric_gradient_type
+  Vector<dim,spacedim>::symmetric_gradient (const unsigned int shape_function,
+                                            const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+
+    // same as for the scalar case except
+    // that we have one more index
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+    if (snc == -2)
+      return symmetric_gradient_type();
+    else if (snc != -1)
+      return symmetrize_single_row (shape_function_data[shape_function].single_nonzero_component_index,
+                                    fe_values.finite_element_output.shape_gradients[snc][q_point]);
+    else
+      {
+        gradient_type return_value;
+        for (unsigned int d=0; d<dim; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            return_value[d]
+              = fe_values.finite_element_output.shape_gradients[shape_function_data[shape_function].row_index[d]][q_point];
+
+        return symmetrize(return_value);
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  inline
+  typename SymmetricTensor<2, dim, spacedim>::value_type
+  SymmetricTensor<2, dim, spacedim>::value (const unsigned int shape_function,
+                                            const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_values,
+            typename FVB::ExcAccessToUninitializedField("update_values"));
+
+    // similar to the vector case where we
+    // have more then one index and we need
+    // to convert between unrolled and
+    // component indexing for tensors
+    const int snc
+      = shape_function_data[shape_function].single_nonzero_component;
+
+    if (snc == -2)
+      {
+        // shape function is zero for the
+        // selected components
+        return value_type();
+
+      }
+    else if (snc != -1)
+      {
+        value_type return_value;
+        const unsigned int comp =
+          shape_function_data[shape_function].single_nonzero_component_index;
+        return_value[value_type::unrolled_to_component_indices(comp)]
+          = fe_values.finite_element_output.shape_values(snc,q_point);
+        return return_value;
+      }
+    else
+      {
+        value_type return_value;
+        for (unsigned int d = 0; d < value_type::n_independent_components; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            return_value[value_type::unrolled_to_component_indices(d)]
+              = fe_values.finite_element_output.shape_values(shape_function_data[shape_function].row_index[d],q_point);
+        return return_value;
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  typename SymmetricTensor<2, dim, spacedim>::divergence_type
+  SymmetricTensor<2, dim, spacedim>::divergence(const unsigned int shape_function,
+                                                const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+    if (snc == -2)
+      {
+        // shape function is zero for the
+        // selected components
+        return divergence_type();
+      }
+    else if (snc != -1)
+      {
+        // we have a single non-zero component
+        // when the symmetric tensor is
+        // represented in unrolled form.
+        // this implies we potentially have
+        // two non-zero components when
+        // represented in component form!  we
+        // will only have one non-zero entry
+        // if the non-zero component lies on
+        // the diagonal of the tensor.
+        //
+        // the divergence of a second-order tensor
+        // is a first order tensor.
+        //
+        // assume the second-order tensor is
+        // A with components A_{ij}.  then
+        // A_{ij} = A_{ji} and there is only
+        // one (if diagonal) or two non-zero
+        // entries in the tensorial
+        // representation.  define the
+        // divergence as:
+        // b_i := \dfrac{\partial phi_{ij}}{\partial x_j}.
+        // (which is incidentally also
+        // b_j := \dfrac{\partial phi_{ij}}{\partial x_i}).
+        // In both cases, a sum is implied.
+        //
+        // Now, we know the nonzero component
+        // in unrolled form: it is indicated
+        // by 'snc'. we can figure out which
+        // tensor components belong to this:
+        const unsigned int comp =
+          shape_function_data[shape_function].single_nonzero_component_index;
+        const unsigned int ii = value_type::unrolled_to_component_indices(comp)[0];
+        const unsigned int jj = value_type::unrolled_to_component_indices(comp)[1];
+
+        // given the form of the divergence
+        // above, if ii=jj there is only a
+        // single nonzero component of the
+        // full tensor and the gradient
+        // equals
+        // b_ii := \dfrac{\partial phi_{ii,ii}}{\partial x_ii}.
+        // all other entries of 'b' are zero
+        //
+        // on the other hand, if ii!=jj, then
+        // there are two nonzero entries in
+        // the full tensor and
+        // b_ii := \dfrac{\partial phi_{ii,jj}}{\partial x_ii}.
+        // b_jj := \dfrac{\partial phi_{ii,jj}}{\partial x_jj}.
+        // again, all other entries of 'b' are
+        // zero
+        const dealii::Tensor<1, spacedim> phi_grad = fe_values.finite_element_output.shape_gradients[snc][q_point];
+
+        divergence_type return_value;
+        return_value[ii] = phi_grad[jj];
+
+        if (ii != jj)
+          return_value[jj] = phi_grad[ii];
+
+        return return_value;
+
+      }
+    else
+      {
+        Assert (false, ExcNotImplemented());
+        divergence_type return_value;
+        return return_value;
+      }
+  }
+
+  template <int dim, int spacedim>
+  inline
+  typename Tensor<2, dim, spacedim>::value_type
+  Tensor<2, dim, spacedim>::value (const unsigned int shape_function,
+                                   const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_values,
+            typename FVB::ExcAccessToUninitializedField("update_values"));
+
+    // similar to the vector case where we
+    // have more then one index and we need
+    // to convert between unrolled and
+    // component indexing for tensors
+    const int snc
+      = shape_function_data[shape_function].single_nonzero_component;
+
+    if (snc == -2)
+      {
+        // shape function is zero for the
+        // selected components
+        return value_type();
+
+      }
+    else if (snc != -1)
+      {
+        value_type return_value;
+        const unsigned int comp =
+          shape_function_data[shape_function].single_nonzero_component_index;
+        const TableIndices<2> indices = dealii::Tensor<2,spacedim>::unrolled_to_component_indices(comp);
+        return_value[indices] = fe_values.finite_element_output.shape_values(snc,q_point);
+        return return_value;
+      }
+    else
+      {
+        value_type return_value;
+        for (unsigned int d = 0; d < dim*dim; ++d)
+          if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+            {
+              const TableIndices<2> indices = dealii::Tensor<2,spacedim>::unrolled_to_component_indices(d);
+              return_value[indices]
+                = fe_values.finite_element_output.shape_values(shape_function_data[shape_function].row_index[d],q_point);
+            }
+        return return_value;
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  typename Tensor<2, dim, spacedim>::divergence_type
+  Tensor<2, dim, spacedim>::divergence(const unsigned int shape_function,
+                                       const unsigned int q_point) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (shape_function < fe_values.fe->dofs_per_cell,
+            ExcIndexRange (shape_function, 0, fe_values.fe->dofs_per_cell));
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+
+    const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+    if (snc == -2)
+      {
+        // shape function is zero for the
+        // selected components
+        return divergence_type();
+      }
+    else if (snc != -1)
+      {
+        // we have a single non-zero component
+        // when the tensor is
+        // represented in unrolled form.
+        //
+        // the divergence of a second-order tensor
+        // is a first order tensor.
+        //
+        // assume the second-order tensor is
+        // A with components A_{ij}.
+        // divergence as:
+        // b_j := \dfrac{\partial phi_{ij}}{\partial x_i}.
+        //
+        // Now, we know the nonzero component
+        // in unrolled form: it is indicated
+        // by 'snc'. we can figure out which
+        // tensor components belong to this:
+        const unsigned int comp =
+          shape_function_data[shape_function].single_nonzero_component_index;
+        const TableIndices<2> indices = dealii::Tensor<2,spacedim>::unrolled_to_component_indices(comp);
+        const unsigned int ii = indices[0];
+        const unsigned int jj = indices[1];
+
+        const dealii::Tensor<1, spacedim> phi_grad = fe_values.finite_element_output.shape_gradients[snc][q_point];
+
+        divergence_type return_value;
+        return_value[jj] = phi_grad[ii];
+
+        return return_value;
+
+      }
+    else
+      {
+        Assert (false, ExcNotImplemented());
+        divergence_type return_value;
+        return return_value;
+      }
+  }
+}
+
+
+
+/*------------------------ Inline functions: FEValuesBase ------------------------*/
+
+
+
+template <int dim, int spacedim>
+inline
+const FEValuesViews::Scalar<dim,spacedim> &
+FEValuesBase<dim,spacedim>::
+operator[] (const FEValuesExtractors::Scalar &scalar) const
+{
+  Assert (scalar.component < fe_values_views_cache.scalars.size(),
+          ExcIndexRange (scalar.component,
+                         0, fe_values_views_cache.scalars.size()));
+
+  return fe_values_views_cache.scalars[scalar.component];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const FEValuesViews::Vector<dim,spacedim> &
+FEValuesBase<dim,spacedim>::
+operator[] (const FEValuesExtractors::Vector &vector) const
+{
+  Assert (vector.first_vector_component <
+          fe_values_views_cache.vectors.size(),
+          ExcIndexRange (vector.first_vector_component,
+                         0, fe_values_views_cache.vectors.size()));
+
+  return fe_values_views_cache.vectors[vector.first_vector_component];
+}
+
+template <int dim, int spacedim>
+inline
+const FEValuesViews::SymmetricTensor<2,dim,spacedim> &
+FEValuesBase<dim,spacedim>::
+operator[] (const FEValuesExtractors::SymmetricTensor<2> &tensor) const
+{
+  Assert (tensor.first_tensor_component <
+          fe_values_views_cache.symmetric_second_order_tensors.size(),
+          ExcIndexRange (tensor.first_tensor_component,
+                         0, fe_values_views_cache.symmetric_second_order_tensors.size()));
+
+  return fe_values_views_cache.symmetric_second_order_tensors[tensor.first_tensor_component];
+}
+
+template <int dim, int spacedim>
+inline
+const FEValuesViews::Tensor<2,dim,spacedim> &
+FEValuesBase<dim,spacedim>::
+operator[] (const FEValuesExtractors::Tensor<2> &tensor) const
+{
+  Assert (tensor.first_tensor_component <
+          fe_values_views_cache.second_order_tensors.size(),
+          ExcIndexRange (tensor.first_tensor_component,
+                         0, fe_values_views_cache.second_order_tensors.size()));
+
+  return fe_values_views_cache.second_order_tensors[tensor.first_tensor_component];
+}
+
+
+
+
+template <int dim, int spacedim>
+inline
+const double &
+FEValuesBase<dim,spacedim>::shape_value (const unsigned int i,
+                                         const unsigned int j) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_values,
+          ExcAccessToUninitializedField("update_values"));
+  Assert (fe->is_primitive (i),
+          ExcShapeFunctionNotPrimitive(i));
+
+  // if the entire FE is primitive,
+  // then we can take a short-cut:
+  if (fe->is_primitive())
+    return this->finite_element_output.shape_values(i,j);
+  else
+    {
+      // otherwise, use the mapping
+      // between shape function
+      // numbers and rows. note that
+      // by the assertions above, we
+      // know that this particular
+      // shape function is primitive,
+      // so we can call
+      // system_to_component_index
+      const unsigned int
+      row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + fe->system_to_component_index(i).first];
+      return this->finite_element_output.shape_values(row, j);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+inline
+double
+FEValuesBase<dim,spacedim>::shape_value_component (const unsigned int i,
+                                                   const unsigned int j,
+                                                   const unsigned int component) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_values,
+          ExcAccessToUninitializedField("update_values"));
+  Assert (component < fe->n_components(),
+          ExcIndexRange(component, 0, fe->n_components()));
+
+  // check whether the shape function
+  // is non-zero at all within
+  // this component:
+  if (fe->get_nonzero_components(i)[component] == false)
+    return 0;
+
+  // look up the right row in the
+  // table and take the data from
+  // there
+  const unsigned int
+  row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + component];
+  return this->finite_element_output.shape_values(row, j);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<1,spacedim> &
+FEValuesBase<dim,spacedim>::shape_grad (const unsigned int i,
+                                        const unsigned int j) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_gradients,
+          ExcAccessToUninitializedField("update_gradients"));
+  Assert (fe->is_primitive (i),
+          ExcShapeFunctionNotPrimitive(i));
+
+  // if the entire FE is primitive,
+  // then we can take a short-cut:
+  if (fe->is_primitive())
+    return this->finite_element_output.shape_gradients[i][j];
+  else
+    {
+      // otherwise, use the mapping
+      // between shape function
+      // numbers and rows. note that
+      // by the assertions above, we
+      // know that this particular
+      // shape function is primitive,
+      // so we can call
+      // system_to_component_index
+      const unsigned int
+      row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + fe->system_to_component_index(i).first];
+      return this->finite_element_output.shape_gradients[row][j];
+    }
+}
+
+
+
+template <int dim, int spacedim>
+inline
+Tensor<1,spacedim>
+FEValuesBase<dim,spacedim>::shape_grad_component (const unsigned int i,
+                                                  const unsigned int j,
+                                                  const unsigned int component) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_gradients,
+          ExcAccessToUninitializedField("update_gradients"));
+  Assert (component < fe->n_components(),
+          ExcIndexRange(component, 0, fe->n_components()));
+
+  // check whether the shape function
+  // is non-zero at all within
+  // this component:
+  if (fe->get_nonzero_components(i)[component] == false)
+    return Tensor<1,spacedim>();
+
+  // look up the right row in the
+  // table and take the data from
+  // there
+  const unsigned int
+  row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + component];
+  return this->finite_element_output.shape_gradients[row][j];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<2,spacedim> &
+FEValuesBase<dim,spacedim>::shape_hessian (const unsigned int i,
+                                           const unsigned int j) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  Assert (fe->is_primitive (i),
+          ExcShapeFunctionNotPrimitive(i));
+
+  // if the entire FE is primitive,
+  // then we can take a short-cut:
+  if (fe->is_primitive())
+    return this->finite_element_output.shape_hessians[i][j];
+  else
+    {
+      // otherwise, use the mapping
+      // between shape function
+      // numbers and rows. note that
+      // by the assertions above, we
+      // know that this particular
+      // shape function is primitive,
+      // so we can call
+      // system_to_component_index
+      const unsigned int
+      row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + fe->system_to_component_index(i).first];
+      return this->finite_element_output.shape_hessians[row][j];
+    }
+}
+
+
+
+template <int dim, int spacedim>
+inline
+Tensor<2,spacedim>
+FEValuesBase<dim,spacedim>::shape_hessian_component (const unsigned int i,
+                                                     const unsigned int j,
+                                                     const unsigned int component) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  Assert (component < fe->n_components(),
+          ExcIndexRange(component, 0, fe->n_components()));
+
+  // check whether the shape function
+  // is non-zero at all within
+  // this component:
+  if (fe->get_nonzero_components(i)[component] == false)
+    return Tensor<2,spacedim>();
+
+  // look up the right row in the
+  // table and take the data from
+  // there
+  const unsigned int
+  row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + component];
+  return this->finite_element_output.shape_hessians[row][j];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<3,spacedim> &
+FEValuesBase<dim,spacedim>::shape_3rd_derivative (const unsigned int i,
+                                                  const unsigned int j) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_3rd_derivatives"));
+  Assert (fe->is_primitive (i),
+          ExcShapeFunctionNotPrimitive(i));
+
+  // if the entire FE is primitive,
+  // then we can take a short-cut:
+  if (fe->is_primitive())
+    return this->finite_element_output.shape_3rd_derivatives[i][j];
+  else
+    {
+      // otherwise, use the mapping
+      // between shape function
+      // numbers and rows. note that
+      // by the assertions above, we
+      // know that this particular
+      // shape function is primitive,
+      // so we can call
+      // system_to_component_index
+      const unsigned int
+      row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + fe->system_to_component_index(i).first];
+      return this->finite_element_output.shape_3rd_derivatives[row][j];
+    }
+}
+
+
+
+template <int dim, int spacedim>
+inline
+Tensor<3,spacedim>
+FEValuesBase<dim,spacedim>::shape_3rd_derivative_component (const unsigned int i,
+                                                            const unsigned int j,
+                                                            const unsigned int component) const
+{
+  Assert (i < fe->dofs_per_cell,
+          ExcIndexRange (i, 0, fe->dofs_per_cell));
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_3rd_derivatives"));
+  Assert (component < fe->n_components(),
+          ExcIndexRange(component, 0, fe->n_components()));
+
+  // check whether the shape function
+  // is non-zero at all within
+  // this component:
+  if (fe->get_nonzero_components(i)[component] == false)
+    return Tensor<3,spacedim>();
+
+  // look up the right row in the
+  // table and take the data from
+  // there
+  const unsigned int
+  row = this->finite_element_output.shape_function_to_row_table[i * fe->n_components() + component];
+  return this->finite_element_output.shape_3rd_derivatives[row][j];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const FiniteElement<dim,spacedim> &
+FEValuesBase<dim,spacedim>::get_fe () const
+{
+  return *fe;
+}
+
+
+template <int dim, int spacedim>
+inline
+const Mapping<dim,spacedim> &
+FEValuesBase<dim,spacedim>::get_mapping () const
+{
+  return *mapping;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+UpdateFlags
+FEValuesBase<dim,spacedim>::get_update_flags () const
+{
+  return this->update_flags;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<Point<spacedim> > &
+FEValuesBase<dim,spacedim>::get_quadrature_points () const
+{
+  Assert (this->update_flags & update_quadrature_points,
+          ExcAccessToUninitializedField("update_quadrature_points"));
+  return this->mapping_output.quadrature_points;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<double> &
+FEValuesBase<dim,spacedim>::get_JxW_values () const
+{
+  Assert (this->update_flags & update_JxW_values,
+          ExcAccessToUninitializedField("update_JxW_values"));
+  return this->mapping_output.JxW_values;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<DerivativeForm<1,dim,spacedim> > &
+FEValuesBase<dim,spacedim>::get_jacobians () const
+{
+  Assert (this->update_flags & update_jacobians,
+          ExcAccessToUninitializedField("update_jacobians"));
+  return this->mapping_output.jacobians;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<DerivativeForm<2,dim,spacedim> > &
+FEValuesBase<dim,spacedim>::get_jacobian_grads () const
+{
+  Assert (this->update_flags & update_jacobian_grads,
+          ExcAccessToUninitializedField("update_jacobians_grads"));
+  return this->mapping_output.jacobian_grads;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<3,spacedim> &
+FEValuesBase<dim,spacedim>::jacobian_pushed_forward_grad (const unsigned int i) const
+{
+  Assert (this->update_flags & update_jacobian_pushed_forward_grads,
+          ExcAccessToUninitializedField("update_jacobian_pushed_forward_grads"));
+  return this->mapping_output.jacobian_pushed_forward_grads[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<Tensor<3,spacedim> > &
+FEValuesBase<dim,spacedim>::get_jacobian_pushed_forward_grads () const
+{
+  Assert (this->update_flags & update_jacobian_pushed_forward_grads,
+          ExcAccessToUninitializedField("update_jacobian_pushed_forward_grads"));
+  return this->mapping_output.jacobian_pushed_forward_grads;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const DerivativeForm<3,dim,spacedim> &
+FEValuesBase<dim,spacedim>::jacobian_2nd_derivative (const unsigned int i) const
+{
+  Assert (this->update_flags & update_jacobian_2nd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_2nd_derivatives"));
+  return this->mapping_output.jacobian_2nd_derivatives[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<DerivativeForm<3,dim,spacedim> > &
+FEValuesBase<dim,spacedim>::get_jacobian_2nd_derivatives () const
+{
+  Assert (this->update_flags & update_jacobian_2nd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_2nd_derivatives"));
+  return this->mapping_output.jacobian_2nd_derivatives;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<4,spacedim> &
+FEValuesBase<dim,spacedim>::jacobian_pushed_forward_2nd_derivative (const unsigned int i) const
+{
+  Assert (this->update_flags & update_jacobian_pushed_forward_2nd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_pushed_forward_2nd_derivatives"));
+  return this->mapping_output.jacobian_pushed_forward_2nd_derivatives[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<Tensor<4,spacedim> > &
+FEValuesBase<dim,spacedim>::get_jacobian_pushed_forward_2nd_derivatives () const
+{
+  Assert (this->update_flags & update_jacobian_pushed_forward_2nd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_pushed_forward_2nd_derivatives"));
+  return this->mapping_output.jacobian_pushed_forward_2nd_derivatives;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const DerivativeForm<4,dim,spacedim> &
+FEValuesBase<dim,spacedim>::jacobian_3rd_derivative (const unsigned int i) const
+{
+  Assert (this->update_flags & update_jacobian_3rd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_3rd_derivatives"));
+  return this->mapping_output.jacobian_3rd_derivatives[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<DerivativeForm<4,dim,spacedim> > &
+FEValuesBase<dim,spacedim>::get_jacobian_3rd_derivatives () const
+{
+  Assert (this->update_flags & update_jacobian_3rd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_3rd_derivatives"));
+  return this->mapping_output.jacobian_3rd_derivatives;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<5,spacedim> &
+FEValuesBase<dim,spacedim>::jacobian_pushed_forward_3rd_derivative (const unsigned int i) const
+{
+  Assert (this->update_flags & update_jacobian_pushed_forward_3rd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_pushed_forward_3rd_derivatives"));
+  return this->mapping_output.jacobian_pushed_forward_3rd_derivatives[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<Tensor<5,spacedim> > &
+FEValuesBase<dim,spacedim>::get_jacobian_pushed_forward_3rd_derivatives () const
+{
+  Assert (this->update_flags & update_jacobian_pushed_forward_3rd_derivatives,
+          ExcAccessToUninitializedField("update_jacobian_pushed_forward_3rd_derivatives"));
+  return this->mapping_output.jacobian_pushed_forward_3rd_derivatives;
+}
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<DerivativeForm<1,spacedim,dim> > &
+FEValuesBase<dim,spacedim>::get_inverse_jacobians () const
+{
+  Assert (this->update_flags & update_inverse_jacobians,
+          ExcAccessToUninitializedField("update_inverse_jacobians"));
+  return this->mapping_output.inverse_jacobians;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Point<spacedim> &
+FEValuesBase<dim,spacedim>::quadrature_point (const unsigned int i) const
+{
+  Assert (this->update_flags & update_quadrature_points,
+          ExcAccessToUninitializedField("update_quadrature_points"));
+  Assert (i<this->mapping_output.quadrature_points.size(),
+          ExcIndexRange(i, 0, this->mapping_output.quadrature_points.size()));
+
+  return this->mapping_output.quadrature_points[i];
+}
+
+
+
+
+template <int dim, int spacedim>
+inline
+double
+FEValuesBase<dim,spacedim>::JxW (const unsigned int i) const
+{
+  Assert (this->update_flags & update_JxW_values,
+          ExcAccessToUninitializedField("update_JxW_values"));
+  Assert (i<this->mapping_output.JxW_values.size(),
+          ExcIndexRange(i, 0, this->mapping_output.JxW_values.size()));
+
+  return this->mapping_output.JxW_values[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const DerivativeForm<1,dim,spacedim> &
+FEValuesBase<dim,spacedim>::jacobian (const unsigned int i) const
+{
+  Assert (this->update_flags & update_jacobians,
+          ExcAccessToUninitializedField("update_jacobians"));
+  Assert (i<this->mapping_output.jacobians.size(),
+          ExcIndexRange(i, 0, this->mapping_output.jacobians.size()));
+
+  return this->mapping_output.jacobians[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const DerivativeForm<2,dim,spacedim> &
+FEValuesBase<dim,spacedim>::jacobian_grad (const unsigned int i) const
+{
+  Assert (this->update_flags & update_jacobian_grads,
+          ExcAccessToUninitializedField("update_jacobians_grads"));
+  Assert (i<this->mapping_output.jacobian_grads.size(),
+          ExcIndexRange(i, 0, this->mapping_output.jacobian_grads.size()));
+
+  return this->mapping_output.jacobian_grads[i];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const DerivativeForm<1,spacedim,dim> &
+FEValuesBase<dim,spacedim>::inverse_jacobian (const unsigned int i) const
+{
+  Assert (this->update_flags & update_inverse_jacobians,
+          ExcAccessToUninitializedField("update_inverse_jacobians"));
+  Assert (i<this->mapping_output.inverse_jacobians.size(),
+          ExcIndexRange(i, 0, this->mapping_output.inverse_jacobians.size()));
+
+  return this->mapping_output.inverse_jacobians[i];
+}
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<1,spacedim> &
+FEValuesBase<dim,spacedim>::normal_vector (const unsigned int i) const
+{
+  typedef FEValuesBase<dim,spacedim> FVB;
+  Assert (this->update_flags & update_normal_vectors,
+          typename FVB::ExcAccessToUninitializedField("update_normal_vectors"));
+  Assert (i<this->mapping_output.normal_vectors.size(),
+          ExcIndexRange(i, 0, this->mapping_output.normal_vectors.size()));
+
+  return this->mapping_output.normal_vectors[i];
+}
+
+
+
+/*------------------------ Inline functions: FEValues ----------------------------*/
+
+
+template <int dim, int spacedim>
+inline
+const Quadrature<dim> &
+FEValues<dim,spacedim>::get_quadrature () const
+{
+  return quadrature;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const FEValues<dim,spacedim> &
+FEValues<dim,spacedim>::get_present_fe_values () const
+{
+  return *this;
+}
+
+
+/*------------------------ Inline functions: FEFaceValuesBase --------------------*/
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+FEFaceValuesBase<dim,spacedim>::get_face_index () const
+{
+  return present_face_index;
+}
+
+
+/*------------------------ Inline functions: FE*FaceValues --------------------*/
+
+template <int dim, int spacedim>
+inline
+const Quadrature<dim-1> &
+FEFaceValuesBase<dim,spacedim>::get_quadrature () const
+{
+  return quadrature;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const FEFaceValues<dim,spacedim> &
+FEFaceValues<dim,spacedim>::get_present_fe_values () const
+{
+  return *this;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const FESubfaceValues<dim,spacedim> &
+FESubfaceValues<dim,spacedim>::get_present_fe_values () const
+{
+  return *this;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<1,spacedim> &
+FEFaceValuesBase<dim,spacedim>::boundary_form (const unsigned int i) const
+{
+  typedef FEValuesBase<dim,spacedim> FVB;
+  Assert (i<this->mapping_output.boundary_forms.size(),
+          ExcIndexRange(i, 0, this->mapping_output.boundary_forms.size()));
+  Assert (this->update_flags & update_boundary_forms,
+          typename FVB::ExcAccessToUninitializedField("update_boundary_forms"));
+
+  return this->mapping_output.boundary_forms[i];
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/fe_values_extractors.h b/include/deal.II/fe/fe_values_extractors.h
new file mode 100644
index 0000000..d771a05
--- /dev/null
+++ b/include/deal.II/fe/fe_values_extractors.h
@@ -0,0 +1,285 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_values_extractors_h
+#define dealii__fe_values_extractors_h
+
+
+#include <deal.II/base/config.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A namespace in which we declare "extractors", i.e. classes that when used
+ * as subscripts in operator[] expressions on FEValues, FEFaceValues, and
+ * FESubfaceValues objects extract certain components of a vector-valued
+ * element. The result of applying an extractor to these objects is an object
+ * with corresponding type from the namespace FEValuesViews. There are
+ * extractors for single scalar components, vector components consisting of
+ * <code>dim</code> elements, and second order symmetric tensors consisting of
+ * <code>(dim*dim + dim)/2</code> components
+ *
+ * See the description of the
+ * @ref vector_valued
+ * module for examples how to use the features of this namespace.
+ *
+ * @ingroup feaccess vector_valued
+ */
+namespace FEValuesExtractors
+{
+  /**
+   * Extractor for a single scalar component of a vector-valued element. The
+   * result of applying an object of this type to an FEValues, FEFaceValues or
+   * FESubfaceValues object is of type FEValuesViews::Scalar. The concept of
+   * extractors is defined in the documentation of the namespace
+   * FEValuesExtractors and in the
+   * @ref vector_valued
+   * module.
+   *
+   * @ingroup feaccess vector_valued
+   */
+  struct Scalar
+  {
+    /**
+     * The selected scalar component of the vector.
+     */
+    unsigned int component;
+
+    /**
+     * Default constructor. Initialize the object with an invalid component.
+     * This leads to an object that can not be used, but it allows objects of
+     * this kind to be put into arrays that require a default constructor upon
+     * resizing the array, and then later assigning a suitable object to each
+     * element of the array.
+     */
+    Scalar ();
+
+    /**
+     * Constructor. Take the selected vector component as argument.
+     */
+    Scalar (const unsigned int component);
+  };
+
+
+  /**
+   * Extractor for a vector of <code>spacedim</code> components of a vector-
+   * valued element. The value of <code>spacedim</code> is defined by the
+   * FEValues object the extractor is applied to. The result of applying an
+   * object of this type to an FEValues, FEFaceValues or FESubfaceValues
+   * object is of type FEValuesViews::Vector.
+   *
+   * The concept of extractors is defined in the documentation of the
+   * namespace FEValuesExtractors and in the
+   * @ref vector_valued
+   * module.
+   *
+   * Note that in the current context, a vector is meant in the sense physics
+   * uses it: it has <code>spacedim</code> components that behave in specific
+   * ways under coordinate system transformations. Examples include velocity
+   * or displacement fields. This is opposed to how mathematics uses the word
+   * "vector" (and how we use this word in other contexts in the library, for
+   * example in the Vector class), where it really stands for a collection of
+   * numbers. An example of this latter use of the word could be the set of
+   * concentrations of chemical species in a flame; however, these are really
+   * just a collection of scalar variables, since they do not change if the
+   * coordinate system is rotated, unlike the components of a velocity vector,
+   * and consequently, this class should not be used for this context.
+   *
+   * @ingroup feaccess vector_valued
+   */
+  struct Vector
+  {
+    /**
+     * The first component of the vector view.
+     */
+    unsigned int first_vector_component;
+
+    /**
+     * Default constructor. Initialize the object with an invalid component.
+     * This leads to an object that can not be used, but it allows objects of
+     * this kind to be put into arrays that require a default constructor upon
+     * resizing the array, and then later assigning a suitable object to each
+     * element of the array.
+     */
+    Vector ();
+
+    /**
+     * Constructor. Take the first component of the selected vector inside the
+     * FEValues object as argument.
+     */
+    Vector (const unsigned int first_vector_component);
+  };
+
+
+  /**
+   * Extractor for a symmetric tensor of a rank specified by the template
+   * argument. For a second order symmetric tensor, this represents a
+   * collection of <code>(dim*dim + dim)/2</code> components of a vector-
+   * valued element. The value of <code>dim</code> is defined by the FEValues
+   * object the extractor is applied to. The result of applying an object of
+   * this type to an FEValues, FEFaceValues or FESubfaceValues object is of
+   * type FEValuesViews::SymmetricTensor.
+   *
+   * The concept of extractors is defined in the documentation of the
+   * namespace FEValuesExtractors and in the
+   * @ref vector_valued
+   * module.
+   *
+   * @ingroup feaccess vector_valued
+   *
+   * @author Andrew McBride, 2009
+   */
+  template <int rank>
+  struct SymmetricTensor
+  {
+    /**
+     * The first component of the tensor view.
+     */
+    unsigned int first_tensor_component;
+
+    /**
+     * Default constructor. Initialize the object with an invalid component.
+     * This leads to an object that can not be used, but it allows objects of
+     * this kind to be put into arrays that require a default constructor upon
+     * resizing the array, and then later assigning a suitable object to each
+     * element of the array.
+     */
+    SymmetricTensor ();
+
+    /**
+     * Constructor. Take the first component of the selected tensor inside the
+     * FEValues object as argument.
+     */
+    SymmetricTensor (const unsigned int first_tensor_component);
+  };
+
+
+  /**
+   * Extractor for a (possible non-)symmetric tensor of a rank specified by
+   * the template argument. For a second order tensor, this represents a
+   * collection of <code>(dim*dim)</code> components of a vector-valued
+   * element. The value of <code>dim</code> is defined by the FEValues object
+   * the extractor is applied to. The result of applying an object of this
+   * type to an FEValues, FEFaceValues or FESubfaceValues object is of type
+   * FEValuesViews::Tensor.
+   *
+   * The concept of extractors is defined in the documentation of the
+   * namespace FEValuesExtractors and in the
+   * @ref vector_valued
+   * module.
+   *
+   * @ingroup feaccess vector_valued
+   *
+   * @author Denis Davydov, 2013
+   */
+  template <int rank>
+  struct Tensor
+  {
+    /**
+     * The first component of the tensor view.
+     */
+    unsigned int first_tensor_component;
+
+    /**
+     * Default constructor. Initialize the object with an invalid component.
+     * This leads to an object that can not be used, but it allows objects of
+     * this kind to be put into arrays that require a default constructor upon
+     * resizing the array, and then later assigning a suitable object to each
+     * element of the array.
+     */
+    Tensor ();
+
+    /**
+     * Constructor. Take the first component of the selected tensor inside the
+     * FEValues object as argument.
+     */
+    Tensor (const unsigned int first_tensor_component);
+  };
+}
+
+
+/*------------------------ Inline functions: namespace FEValuesExtractors --------*/
+
+namespace FEValuesExtractors
+{
+  inline
+  Scalar::Scalar ()
+    :
+    component (numbers::invalid_unsigned_int)
+  {}
+
+
+
+  inline
+  Scalar::Scalar (const unsigned int component)
+    :
+    component (component)
+  {}
+
+
+
+  inline
+  Vector::Vector ()
+    :
+    first_vector_component (numbers::invalid_unsigned_int)
+  {}
+
+
+  inline
+  Vector::Vector (const unsigned int first_vector_component)
+    :
+    first_vector_component (first_vector_component)
+  {}
+
+
+  template <int rank>
+  inline
+  SymmetricTensor<rank>::SymmetricTensor ()
+    :
+    first_tensor_component(numbers::invalid_unsigned_int)
+  {}
+
+
+  template <int rank>
+  inline
+  SymmetricTensor<rank>::SymmetricTensor (const unsigned int first_tensor_component)
+    :
+    first_tensor_component (first_tensor_component)
+  {}
+
+
+  template <int rank>
+  inline
+  Tensor<rank>::Tensor ()
+    :
+    first_tensor_component(numbers::invalid_unsigned_int)
+  {}
+
+
+  template <int rank>
+  inline
+  Tensor<rank>::Tensor (const unsigned int first_tensor_component)
+    :
+    first_tensor_component (first_tensor_component)
+  {}
+}
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping.h b/include/deal.II/fe/mapping.h
new file mode 100644
index 0000000..32be5cc
--- /dev/null
+++ b/include/deal.II/fe/mapping.h
@@ -0,0 +1,1181 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_h
+#define dealii__mapping_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/std_cxx11/array.h>
+#include <deal.II/base/array_view.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/fe/fe_update_flags.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim> class Quadrature;
+template <int dim, int spacedim> class FEValues;
+template <int dim, int spacedim> class FEValuesBase;
+template <int dim, int spacedim> class FEValues;
+template <int dim, int spacedim> class FEFaceValues;
+template <int dim, int spacedim> class FESubfaceValues;
+
+
+/**
+ * The transformation type used for the Mapping::transform() functions.
+ *
+ * Special finite elements may need special Mapping from the reference cell to
+ * the actual mesh cell. In order to be most flexible, this enum provides an
+ * extensible interface for arbitrary transformations. Nevertheless, these
+ * must be implemented in the transform() functions of inheriting classes in
+ * order to work.
+ *
+ * @ingroup mapping
+ */
+enum MappingType
+{
+  /**
+   * No mapping, i.e., shape functions are not mapped from a reference cell
+   * but instead are defined right on the real-space cell.
+   */
+  mapping_none = 0x0000,
+
+  /**
+   * Covariant mapping (see Mapping::transform() for details).
+   */
+  mapping_covariant = 0x0001,
+
+  /**
+   * Contravariant mapping (see Mapping::transform() for details).
+   */
+  mapping_contravariant = 0x0002,
+
+  /**
+   * Mapping of the gradient of a covariant vector field (see
+   * Mapping::transform() for details).
+   */
+  mapping_covariant_gradient = 0x0003,
+
+  /**
+   * Mapping of the gradient of a contravariant vector field (see
+   * Mapping::transform() for details).
+   */
+  mapping_contravariant_gradient = 0x0004,
+
+  /**
+   * The Piola transform usually used for Hdiv elements. Piola transform is
+   * the standard transformation of vector valued elements in H<sup>div</sup>.
+   * It amounts to a contravariant transformation scaled by the inverse of the
+   * volume element.
+   */
+  mapping_piola = 0x0100,
+
+  /**
+   * Transformation for the gradient of a vector field corresponding to a
+   * mapping_piola transformation (see Mapping::transform() for details).
+   */
+  mapping_piola_gradient = 0x0101,
+
+  /**
+   * The mapping used for Nedelec elements.
+   *
+   * Curl-conforming elements are mapped as covariant vectors. Nevertheless,
+   * we introduce a separate mapping type, such that we can use the same flag
+   * for the vector and its gradient (see Mapping::transform() for details).
+   */
+  mapping_nedelec = 0x0200,
+
+  /**
+   * The mapping used for Raviart-Thomas elements.
+   */
+  mapping_raviart_thomas = 0x0300,
+
+  /**
+   * The mapping used for BDM elements.
+   */
+  mapping_bdm = mapping_raviart_thomas,
+
+  /**
+   * The mappings for 2-forms and third order tensors.
+   *
+   * These are mappings typpically applied to hessians transformed to the
+   * reference cell.
+   *
+   * Mapping of the hessian of a covariant vector field (see
+   * Mapping::transform() for details).
+   */
+  mapping_covariant_hessian,
+
+  /**
+   * Mapping of the hessian of a contravariant vector field (see
+   * Mapping::transform() for details).
+   */
+  mapping_contravariant_hessian,
+
+  /**
+   * Mapping of the hessian of a piola vector field (see Mapping::transform()
+   * for details).
+   */
+  mapping_piola_hessian
+};
+
+
+/**
+ * @short Abstract base class for mapping classes.
+ *
+ * This class declares the interface for the functionality to describe
+ * mappings from the reference (unit) cell to a cell in real space, as well as
+ * for filling the information necessary to use the FEValues, FEFaceValues,
+ * and FESubfaceValues classes. Concrete implementations of these interfaces
+ * are provided in derived classes.
+ *
+ * <h3>Mathematics of the mapping</h3>
+ *
+ * The mapping is a transformation $\mathbf x = \mathbf F_K(\hat{\mathbf  x})$
+ * which maps points $\hat{\mathbf x}$ in the reference cell
+ * $[0,1]^\text{dim}$ to points $\mathbf x$ in the actual grid cell
+ * $K\subset{\mathbb R}^\text{spacedim}$. Many of the applications of such
+ * mappings require the Jacobian of this mapping, $J(\hat{\mathbf x}) =
+ * \hat\nabla {\mathbf F}_K(\hat{\mathbf  x})$. For instance, if
+ * dim=spacedim=2, we have
+ * @f[
+ * J(\hat{\mathbf  x}) = \left(\begin{matrix}
+ * \frac{\partial x}{\partial \hat x} & \frac{\partial x}{\partial \hat y}
+ * \\
+ * \frac{\partial y}{\partial \hat x} & \frac{\partial y}{\partial \hat y}
+ * \end{matrix}\right)
+ * @f]
+ *
+ * <h4>%Mapping of scalar functions</h4>
+ *
+ * The shape functions of scalar finite elements are typically defined on a
+ * reference cell and are then simply mapped according to the rule
+ * @f[
+ * \varphi(\mathbf x) = \varphi\bigl(\mathbf F_K(\hat{\mathbf  x})\bigr)
+ * = \hat \varphi(\hat{\mathbf  x}).
+ * @f]
+ *
+ *
+ * <h4>%Mapping of integrals</h4>
+ *
+ * Using simply a change of variables, integrals of scalar functions over a
+ * cell $K$ can be expressed as an integral over the reference cell $\hat K$.
+ * Specifically, The volume form $d\hat x$ is transformed so that
+ * @f[
+ *  \int_K u(\mathbf x)\,dx = \int_{\hat K} \hat
+ * u(\hat{\mathbf  x}) \left|\text{det}J(\hat{\mathbf  x})\right|
+ * \,d\hat x.
+ * @f]
+ *
+ * In expressions where such integrals are approximated by quadrature, this
+ * then leads to terms of the form
+ * @f[
+ *  \int_K u(\mathbf x)\,dx
+ *  \approx
+ *  \sum_{q}
+ *  \hat u(\hat{\mathbf  x}_q)
+ *  \underbrace{\left|\text{det}J(\hat{\mathbf  x}_q)\right| w_q}_{=: \text{JxW}_q}.
+ * @f]
+ * Here, the weights $\text{JxW}_q$ of each quadrature point (where <i>JxW</i>
+ * mnemonically stands for <i>Jacobian times Quadrature Weights</i>) take the
+ * role of the $dx$ in the original integral. Consequently, they appear in all
+ * code that computes integrals approximated by quadrature, and are accessed
+ * by FEValues::JxW().
+ *
+ * @todo Document what happens in the codimension-1 case.
+ *
+ *
+ * <h4>%Mapping of vector fields, differential forms and gradients of vector
+ * fields</h4>
+ *
+ * The transformation of vector fields or differential forms (gradients of
+ * scalar functions) $\mathbf v$, and gradients of vector fields $\mathbf T$
+ * follows the general form
+ *
+ * @f[
+ * \mathbf v(\mathbf x) = \mathbf A(\hat{\mathbf  x})
+ * \hat{\mathbf  v}(\hat{\mathbf  x}),
+ * \qquad
+ * \mathbf T(\mathbf x) = \mathbf A(\hat{\mathbf  x})
+ * \hat{\mathbf  T}(\hat{\mathbf  x}) \mathbf B(\hat{\mathbf  x}).
+ * @f]
+ * The differential forms <b>A</b> and <b>B</b> are determined by the kind of
+ * object being transformed. These transformations are performed through the
+ * transform() functions, and the type of object being transformed is
+ * specified by their MappingType argument. See the documentation there for
+ * possible choices.
+ *
+ * <h4>Derivatives of the mapping</h4>
+ *
+ * Some applications require the derivatives of the mapping, of which the
+ * first order derivative is the mapping Jacobian, $J_{iJ}(\hat{\mathbf
+ * x})=\frac{\partial x_i}{\partial \hat x_J}$, described above. Higher order
+ * derivatives of the mapping are similarly defined, for example the Jacobian
+ * derivative, $\hat H_{iJK}(\hat{\mathbf  x}) = \frac{\partial^2
+ * x_i}{\partial \hat x_J \partial \hat x_K}$, and the Jacobian second
+ * derivative, $\hat K_{iJKL}(\hat{\mathbf  x}) = \frac{\partial^3
+ * x_i}{\partial \hat x_J \partial \hat x_K \partial \hat x_L}$. It is also
+ * useful to define the "pushed-forward" versions of the higher order
+ * derivatives: the Jacobian pushed-forward derivative, $H_{ijk}(\hat{\mathbf
+ * x}) = \frac{\partial^2 x_i}{\partial \hat x_J \partial \hat
+ * x_K}(J_{jJ})^{-1}(J_{kK})^{-1}$, and the Jacobian pushed-forward second
+ * derivative, $K_{ijkl}(\hat{\mathbf  x}) = \frac{\partial^3 x_i}{\partial
+ * \hat x_J \partial \hat x_K \partial \hat
+ * x_L}(J_{jJ})^{-1}(J_{kK})^{-1}(J_{lL})^{-1}$. These pushed-forward versions
+ * can be used to compute the higher order derivatives of functions defined on
+ * the reference cell with respect to the real cell coordinates. For instance,
+ * the Jacobian derivative with respect to the real cell coordinates is given
+ * by:
+ *
+ * @f[
+ * \frac{\partial}{\partial x_j}\left[J_{iJ}(\hat{\mathbf  x})\right] =
+ * H_{ikn}(\hat{\mathbf  x})J_{nJ}(\hat{\mathbf  x}),
+ * @f]
+ * and the derivative of the Jacobian inverse with respect to the real cell
+ * coordinates is similarly given by:
+ * @f[
+ * \frac{\partial}{\partial x_j}\left[\left(J_{iJ}(\hat{\mathbf  x})\right)^{-1}\right]
+ * = -H_{nik}(\hat{\mathbf  x})\left(J_{nJ}(\hat{\mathbf  x})\right)^{-1}.
+ * @f]
+ *
+ * In a similar fashion, higher order derivatives, with respect to the real
+ * cell coordinates, of functions defined on the reference cell can be defined
+ * using the Jacobian pushed-forward higher-order derivatives. For example,
+ * the derivative, with respect to the real cell coordinates, of the Jacobian
+ * pushed-forward derivative is given by:
+ *
+ * @f[
+ * \frac{\partial}{\partial x_l}\left[H_{ijk}(\hat{\mathbf  x})\right] = K_{ijkl}(\hat{\mathbf  x})
+ * -H_{mjl}(\hat{\mathbf  x})H_{imk}(\hat{\mathbf  x})-H_{mkl}(\hat{\mathbf  x})H_{imj}(\hat{\mathbf  x}).
+ * @f]
+ *
+ * <h3>References</h3>
+ *
+ * A general publication on differential geometry and finite elements is the
+ * survey
+ * <ul>
+ * <li>Douglas N. Arnold, Richard S. Falk, and Ragnar Winther. <i>Finite
+ * element exterior calculus: from Hodge theory to numerical stability.</i>
+ * Bull. Amer. Math. Soc. (N.S.), 47:281-354, 2010. <a
+ * href="http://dx.doi.org/10.1090/S0273-0979-10-01278-4">DOI:
+ * 10.1090/S0273-0979-10-01278-4</a>.
+ * </ul>
+ *
+ * The description of the Piola transform has been taken from the <a
+ * href="http://www.math.uh.edu/~rohop/spring_05/downloads/">lecture notes</a>
+ * by Ronald H. W. Hoppe, University of Houston, Chapter 7.
+ *
+ * @ingroup mapping
+ * @author Guido Kanschat, Ralf Hartmann 2000, 2001
+ */
+template <int dim, int spacedim=dim>
+class Mapping : public Subscriptor
+{
+public:
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~Mapping ();
+
+  /**
+   * Return a pointer to a copy of the present object. The caller of this copy
+   * then assumes ownership of it.
+   *
+   * The function is declared abstract virtual in this base class, and derived
+   * classes will have to implement it.
+   *
+   * This function is mainly used by the hp::MappingCollection class.
+   */
+  virtual
+  Mapping<dim,spacedim> *clone () const = 0;
+
+  /**
+   * Return the mapped vertices of a cell.
+   *
+   * Most of the time, these values will simply be the coordinates of the
+   * vertices of a cell as returned by <code>cell-@>vertex(v)</code> for
+   * vertex <code>v</code>, i.e., information stored by the triangulation.
+   * However, there are also mappings that add displacements or choose
+   * completely different locations, e.g., MappingQEulerian,
+   * MappingQ1Eulerian, or MappingFEField.
+   *
+   * The default implementation of this function simply returns the
+   * information stored by the triangulation, i.e.,
+   * <code>cell-@>vertex(v)</code>.
+   */
+  virtual
+  std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+  get_vertices (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+
+  /**
+   * Returns whether the mapping preserves vertex locations. In other words,
+   * this function returns whether the mapped location of the reference cell
+   * vertices (given by GeometryInfo::unit_cell_vertex()) equals the result of
+   * <code>cell-@>vertex()</code> (i.e., information stored by the
+   * triangulation).
+   *
+   * For example, implementations in derived classes return @p true for
+   * MappingQ, MappingQGeneric, MappingCartesian, but @p false for
+   * MappingQEulerian, MappingQ1Eulerian, and MappingFEField.
+   */
+  virtual
+  bool preserves_vertex_locations () const = 0;
+
+  /**
+   * @name Mapping points between reference and real cells
+   * @{
+   */
+
+  /**
+   * Maps the point @p p on the unit cell to the corresponding point on the
+   * real cell @p cell.
+   *
+   * @param cell Iterator to the cell that will be used to define the mapping.
+   * @param p Location of a point on the reference cell.
+   * @return The location of the reference point mapped to real space using
+   * the mapping defined by the class derived from the current one that
+   * implements the mapping, and the coordinates of the cell identified by the
+   * first argument.
+   */
+  virtual
+  Point<spacedim>
+  transform_unit_to_real_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<dim>                                          &p) const = 0;
+
+  /**
+   * Maps the point @p p on the real @p cell to the corresponding point on the
+   * unit cell, and return its coordinates. This function provides the inverse
+   * of the mapping provided by transform_unit_to_real_cell().
+   *
+   * In the codimension one case, this function returns the normal projection
+   * of the real point @p p on the curve or surface identified by the @p cell.
+   *
+   * @note Polynomial mappings from the reference (unit) cell coordinates to
+   * the coordinate system of a real cell are not always invertible if the
+   * point for which the inverse mapping is to be computed lies outside the
+   * cell's boundaries. In such cases, the current function may fail to
+   * compute a point on the reference cell whose image under the mapping
+   * equals the given point @p p.  If this is the case then this function
+   * throws an exception of type Mapping::ExcTransformationFailed . Whether
+   * the given point @p p lies outside the cell can therefore be determined by
+   * checking whether the returned reference coordinates lie inside or outside
+   * the reference cell (e.g., using GeometryInfo::is_inside_unit_cell()) or
+   * whether the exception mentioned above has been thrown.
+   *
+   * @param cell Iterator to the cell that will be used to define the mapping.
+   * @param p Location of a point on the given cell.
+   * @return The reference cell location of the point that when mapped to real
+   * space equals the coordinates given by the second argument. This mapping
+   * uses the mapping defined by the class derived from the current one that
+   * implements the mapping, and the coordinates of the cell identified by the
+   * first argument.
+   */
+  virtual
+  Point<dim>
+  transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<spacedim>                                     &p) const = 0;
+
+  /**
+   * Transforms the point @p p on the real @p cell to the corresponding point
+   * on the unit cell, and then projects it to a dim-1  point on the face with
+   * the given face number @p face_no. Ideally the point @p p is near the face
+   * @p face_no, but any point in the cell can technically be projected.
+   *
+   * This function does not make physical sense when dim=1, so it throws an
+   * exception in this case.
+   */
+  Point<dim-1>
+  project_real_point_to_unit_point_on_face (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                            const unsigned int &face_no,
+                                            const Point<spacedim> &p) const;
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * @name Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidData);
+
+
+  /**
+   * Computing the mapping between a real space point and a point in reference
+   * space failed, typically because the given point lies outside the cell
+   * where the inverse mapping is not unique.
+   *
+   * @ingroup Exceptions
+   */
+  DeclExceptionMsg (ExcTransformationFailed,
+                    "Computing the mapping between a real space point and a point in reference "
+                    "space failed, typically because the given point lies outside the cell "
+                    "where the inverse mapping is not unique.");
+
+  /**
+   * deal.II assumes the Jacobian determinant to be positive. When the cell
+   * geometry is distorted under the image of the mapping, the mapping becomes
+   * invalid and this exception is thrown.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException3 (ExcDistortedMappedCell,
+                  Point<spacedim>, double, int,
+                  << "The image of the mapping applied to cell with center ["
+                  << arg1 << "] is distorted. The cell geometry or the "
+                  << "mapping are invalid, giving a non-positive volume "
+                  << "fraction of " << arg2 << " in quadrature point "
+                  << arg3 << ".");
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Interface with FEValues
+   * @{
+   */
+
+public:
+  /**
+   * Base class for internal data of mapping objects. The internal mechanism
+   * is that upon construction of a FEValues object, it asks the mapping and
+   * finite element classes that are to be used to allocate memory for their
+   * own purpose in which they may store data that only needs to be computed
+   * once. For example, most finite elements will store the values of the
+   * shape functions at the quadrature points in this object, since they do
+   * not change from cell to cell and only need to be computed once. The same
+   * may be true for Mapping classes that want to only evaluate the shape
+   * functions used for mapping once at the quadrature points.
+   *
+   * Since different FEValues objects using different quadrature rules might
+   * access the same mapping object at the same time, it is necessary to
+   * create one such object per FEValues object. FEValues does this by calling
+   * Mapping::get_data(), or in reality the implementation of the
+   * corresponding function in derived classes. Ownership of the object
+   * created by Mapping::get_data() is then transferred to the FEValues
+   * object, but a reference to this object is passed to the mapping object
+   * every time it is asked to compute information on a concrete cell. This
+   * happens when FEValues::reinit() (or the corresponding classes in
+   * FEFaceValues and FESubfaceValues) call Mapping::fill_fe_values() (and
+   * similarly via Mapping::fill_fe_face_values() and
+   * Mapping::fill_fe_subface_values()).
+   *
+   * The purpose of this class is for mapping objects to store information
+   * that can be computed once at the beginning, on the reference cell, and to
+   * access it later when computing information on a concrete cell. As such,
+   * the object handed to Mapping::fill_fe_values() is marked as
+   * <code>const</code>, because the assumption is that at the time this
+   * information is used, it will not need to modified again. However, classes
+   * derived from Mapping can also use such objects for two other purposes:
+   *
+   * - To provide scratch space for computations that are done in
+   * Mapping::fill_fe_values() and similar functions. Some of the derived
+   * classes would like to use scratch arrays and it would be a waste of time
+   * to allocate these arrays every time this function is called, just to de-
+   * allocate it again at the end of the function. Rather, one could allocate
+   * this memory once as a member variable of the current class, and simply
+   * use it in Mapping::fill_fe_values().
+   * - After calling Mapping::fill_fe_values(), FEValues::reinit()
+   * calls FiniteElement::fill_fe_values() where the finite element computes
+   * values, gradients, etc of the shape functions using both information
+   * computed once at the beginning using a mechanism similar to the one
+   * described here (see FiniteElement::InternalDataBase) as well as the data
+   * already computed by Mapping::fill_fe_values(). As part of its work, some
+   * implementations of FiniteElement::fill_fe_values() need to transform
+   * shape function data, and they do so by calling Mapping::transform(). The
+   * call to the latter function also receives a reference to the
+   * Mapping::InternalDataBase object. Since Mapping::transform() may be
+   * called many times on each cell, it is sometimes worth for derived classes
+   * to compute some information only once in Mapping::fill_fe_values() and
+   * reuse it in Mapping::transform(). This information can also be stored in
+   * the classes that derived mapping classes derive from InternalDataBase.
+   *
+   * In both of these cases, the InternalDataBase object being passed around
+   * is "morally const", i.e., no external observer can tell whether a scratch
+   * array or some intermediate data for Mapping::transform() is being
+   * modified by Mapping::fill_fe_values() or not. Consequently, the
+   * InternalDataBase objects are always passed around as <code>const</code>
+   * objects. Derived classes that would like to make use of the two
+   * additional uses outlined above therefore need to mark the member
+   * variables they want to use for these purposes as <code>mutable</code> to
+   * allow for their modification despite the fact that the surrounding object
+   * is marked as <code>const</code>.
+   */
+  class InternalDataBase
+  {
+  private:
+    /**
+     * Copy construction is forbidden.
+     */
+    InternalDataBase (const InternalDataBase &);
+
+  public:
+    /**
+     * Constructor. Sets update_flags to @p update_default and @p first_cell
+     * to @p true.
+     */
+    InternalDataBase ();
+
+    /**
+     * Virtual destructor for derived classes
+     */
+    virtual ~InternalDataBase ();
+
+    /**
+     * A set of update flags specifying the kind of information that an
+     * implementation of the Mapping interface needs to compute on each cell
+     * or face, i.e., in Mapping::fill_fe_values() and friends.
+     *
+     * This set of flags is stored here by implementations of
+     * Mapping::get_data(), Mapping::get_face_data(), or
+     * Mapping::get_subface_data(), and is that subset of the update flags
+     * passed to those functions that require re-computation on every cell.
+     * (The subset of the flags corresponding to information that can be
+     * computed once and for all already at the time of the call to
+     * Mapping::get_data() -- or an implementation of that interface -- need
+     * not be stored here because it has already been taken care of.)
+     */
+    UpdateFlags          update_each;
+
+    /**
+     * Return an estimate (in bytes) or the memory consumption of this object.
+     */
+    virtual std::size_t memory_consumption () const;
+  };
+
+
+protected:
+  /**
+   * Given a set of update flags, compute which other quantities <i>also</i>
+   * need to be computed in order to satisfy the request by the given flags.
+   * Then return the combination of the original set of flags and those just
+   * computed.
+   *
+   * As an example, if @p update_flags contains update_JxW_values (i.e., the
+   * product of the determinant of the Jacobian and the weights provided by
+   * the quadrature formula), a mapping may require the computation of the
+   * full Jacobian matrix in order to compute its determinant. They would then
+   * return not just update_JxW_values, but also update_jacobians. (This is
+   * not how it is actually done internally in the derived classes that
+   * compute the JxW values -- they set update_contravariant_transformation
+   * instead, from which the determinant can also be computed -- but this does
+   * not take away from the instructiveness of the example.)
+   *
+   * An extensive discussion of the interaction between this function and
+   * FEValues can be found in the
+   * @ref FE_vs_Mapping_vs_FEValues
+   * documentation module.
+   *
+   * @see UpdateFlags
+   */
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const = 0;
+
+  /**
+   * Create and return a pointer to an object into which mappings can store
+   * data that only needs to be computed once but that can then be used
+   * whenever the mapping is applied to a concrete cell (e.g., in the various
+   * transform() functions, as well as in the fill_fe_values(),
+   * fill_fe_face_values() and fill_fe_subface_values() that form the
+   * interface of mappings with the FEValues class).
+   *
+   * Derived classes will return pointers to objects of a type derived from
+   * Mapping::InternalDataBase (see there for more information) and may pre-
+   * compute some information already (in accordance with what will be asked
+   * of the mapping in the future, as specified by the update flags) and for
+   * the given quadrature object. Subsequent calls to transform() or
+   * fill_fe_values() and friends will then receive back the object created
+   * here (with the same set of update flags and for the same quadrature
+   * object). Derived classes can therefore pre-compute some information in
+   * their get_data() function and store it in the internal data object.
+   *
+   * The mapping classes do not keep track of the objects created by this
+   * function. Ownership will therefore rest with the caller.
+   *
+   * An extensive discussion of the interaction between this function and
+   * FEValues can be found in the
+   * @ref FE_vs_Mapping_vs_FEValues
+   * documentation module.
+   *
+   * @param update_flags A set of flags that define what is expected of the
+   * mapping class in future calls to transform() or the fill_fe_values()
+   * group of functions. This set of flags may contain flags that mappings do
+   * not know how to deal with (e.g., for information that is in fact computed
+   * by the finite element classes, such as UpdateFlags::update_values).
+   * Derived classes will need to store these flags, or at least that subset
+   * of flags that will require the mapping to perform any actions in
+   * fill_fe_values(), in InternalDataBase::update_each.
+   * @param quadrature The quadrature object for which mapping information
+   * will have to be computed. This includes the locations and weights of
+   * quadrature points.
+   * @return A pointer to a newly created object of type InternalDataBase (or
+   * a derived class). Ownership of this object passes to the calling
+   * function.
+   *
+   * @note C++ allows that virtual functions in derived classes may return
+   * pointers to objects not of type InternalDataBase but in fact pointers to
+   * objects of classes <i>derived</i> from InternalDataBase. (This feature is
+   * called "covariant return types".) This is useful in some contexts where
+   * the calling is within the derived class and will immediately make use of
+   * the returned object, knowing its real (derived) type.
+   */
+  virtual
+  InternalDataBase *
+  get_data (const UpdateFlags      update_flags,
+            const Quadrature<dim> &quadrature) const = 0;
+
+  /**
+   * Like get_data(), but in preparation for later calls to transform() or
+   * fill_fe_face_values() that will need information about mappings from the
+   * reference face to a face of a concrete cell.
+   *
+   * @param update_flags A set of flags that define what is expected of the
+   * mapping class in future calls to transform() or the fill_fe_values()
+   * group of functions. This set of flags may contain flags that mappings do
+   * not know how to deal with (e.g., for information that is in fact computed
+   * by the finite element classes, such as UpdateFlags::update_values).
+   * Derived classes will need to store these flags, or at least that subset
+   * of flags that will require the mapping to perform any actions in
+   * fill_fe_values(), in InternalDataBase::update_each.
+   * @param quadrature The quadrature object for which mapping information
+   * will have to be computed. This includes the locations and weights of
+   * quadrature points.
+   * @return A pointer to a newly created object of type InternalDataBase (or
+   * a derived class). Ownership of this object passes to the calling
+   * function.
+   *
+   * @note C++ allows that virtual functions in derived classes may return
+   * pointers to objects not of type InternalDataBase but in fact pointers to
+   * objects of classes <i>derived</i> from InternalDataBase. (This feature is
+   * called "covariant return types".) This is useful in some contexts where
+   * the calling is within the derived class and will immediately make use of
+   * the returned object, knowing its real (derived) type.
+   */
+  virtual
+  InternalDataBase *
+  get_face_data (const UpdateFlags        update_flags,
+                 const Quadrature<dim-1> &quadrature) const = 0;
+
+  /**
+   * Like get_data() and get_face_data(), but in preparation for later calls
+   * to transform() or fill_fe_subface_values() that will need information
+   * about mappings from the reference face to a child of a face (i.e.,
+   * subface) of a concrete cell.
+   *
+   * @param update_flags A set of flags that define what is expected of the
+   * mapping class in future calls to transform() or the fill_fe_values()
+   * group of functions. This set of flags may contain flags that mappings do
+   * not know how to deal with (e.g., for information that is in fact computed
+   * by the finite element classes, such as UpdateFlags::update_values).
+   * Derived classes will need to store these flags, or at least that subset
+   * of flags that will require the mapping to perform any actions in
+   * fill_fe_values(), in InternalDataBase::update_each.
+   * @param quadrature The quadrature object for which mapping information
+   * will have to be computed. This includes the locations and weights of
+   * quadrature points.
+   * @return A pointer to a newly created object of type InternalDataBase (or
+   * a derived class). Ownership of this object passes to the calling
+   * function.
+   *
+   * @note C++ allows that virtual functions in derived classes may return
+   * pointers to objects not of type InternalDataBase but in fact pointers to
+   * objects of classes <i>derived</i> from InternalDataBase. (This feature is
+   * called "covariant return types".) This is useful in some contexts where
+   * the calling is within the derived class and will immediately make use of
+   * the returned object, knowing its real (derived) type.
+   */
+  virtual
+  InternalDataBase *
+  get_subface_data (const UpdateFlags        update_flags,
+                    const Quadrature<dim-1> &quadrature) const = 0;
+
+  /**
+   * Compute information about the mapping from the reference cell to the real
+   * cell indicated by the first argument to this function. Derived classes
+   * will have to implement this function based on the kind of mapping they
+   * represent. It is called by FEValues::reinit().
+   *
+   * Conceptually, this function's represents the application of the mapping
+   * $\mathbf x=\mathbf F_K(\hat {\mathbf x})$ from reference coordinates
+   * $\mathbf\in [0,1]^d$ to real space coordinates $\mathbf x$ for a given
+   * cell $K$. Its purpose is to compute the following kinds of data:
+   *
+   * - Data that results from the application of the mapping itself, e.g.,
+   * computing the location $\mathbf x_q = \mathbf F_K(\hat{\mathbf x}_q)$ of
+   * quadrature points on the real cell, and that is directly useful to users
+   * of FEValues, for example during assembly.
+   * - Data that is necessary for finite element implementations to compute
+   * their shape functions on the real cell. To this end, the
+   * FEValues::reinit() function calls FiniteElement::fill_fe_values() after
+   * the current function, and the output of this function serves as input to
+   * FiniteElement::fill_fe_values(). Examples of information that needs to be
+   * computed here for use by the finite element classes is the Jacobian of
+   * the mapping, $\hat\nabla \mathbf F_K(\hat{\mathbf x})$ or its inverse,
+   * for example to transform the gradients of shape functions on the
+   * reference cell to the gradients of shape functions on the real cell.
+   *
+   * The information computed by this function is used to fill the various
+   * member variables of the output argument of this function. Which of the
+   * member variables of that structure should be filled is determined by the
+   * update flags stored in the Mapping::InternalDataBase object passed to
+   * this function.
+   *
+   * An extensive discussion of the interaction between this function and
+   * FEValues can be found in the
+   * @ref FE_vs_Mapping_vs_FEValues
+   * documentation module.
+   *
+   * @param[in] cell The cell of the triangulation for which this function is
+   * to compute a mapping from the reference cell to.
+   * @param[in] cell_similarity Whether or not the cell given as first
+   * argument is simply a translation, rotation, etc of the cell for which
+   * this function was called the most recent time. This information is
+   * computed simply by matching the vertices (as stored by the Triangulation)
+   * between the previous and the current cell. The value passed here may be
+   * modified by implementations of this function and should then be returned
+   * (see the discussion of the return value of this function).
+   * @param[in] quadrature A reference to the quadrature formula in use for
+   * the current evaluation. This quadrature object is the same as the one
+   * used when creating the @p internal_data object. The object is used both
+   * to map the location of quadrature points, as well as to compute the JxW
+   * values for each quadrature point (which involves the quadrature weights).
+   * @param[in] internal_data A reference to an object previously created by
+   * get_data() and that may be used to store information the mapping can
+   * compute once on the reference cell. See the documentation of the
+   * Mapping::InternalDataBase class for an extensive description of the
+   * purpose of these objects.
+   * @param[out] output_data A reference to an object whose member variables
+   * should be computed. Not all of the members of this argument need to be
+   * filled; which ones need to be filled is determined by the update flags
+   * stored inside the @p internal_data object.
+   * @return An updated value of the @p cell_similarity argument to this
+   * function. The returned value will be used for the corresponding argument
+   * when FEValues::reinit() calls FiniteElement::fill_fe_values(). In most
+   * cases, derived classes will simply want to return the value passed for @p
+   * cell_similarity. However, implementations of this function may downgrade
+   * the level of cell similarity. This is, for example, the case for classes
+   * that take not only into account the locations of the vertices of a cell
+   * (as reported by the Triangulation), but also other information specific
+   * to the mapping. The purpose is that FEValues::reinit() can compute
+   * whether a cell is similar to the previous one only based on the cell's
+   * vertices, whereas the mapping may also consider displacement fields
+   * (e.g., in the MappingQ1Eulerian and MappingFEField classes). In such
+   * cases, the mapping may conclude that the previously computed cell
+   * similarity is too optimistic, and invalidate it for subsequent use in
+   * FiniteElement::fill_fe_values() by returning a less optimistic cell
+   * similarity value.
+   *
+   * @note FEValues ensures that this function is always called with the same
+   * pair of @p internal_data and @p output_data objects. In other words, if
+   * an implementation of this function knows that it has written a piece of
+   * data into the output argument in a previous call, then there is no need
+   * to copy it there again in a later call if the implementation knows that
+   * this is the same value.
+   */
+  virtual
+  CellSimilarity::Similarity
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator    &cell,
+                  const CellSimilarity::Similarity                              cell_similarity,
+                  const Quadrature<dim>                                        &quadrature,
+                  const typename Mapping<dim,spacedim>::InternalDataBase       &internal_data,
+                  dealii::internal::FEValues::MappingRelatedData<dim,spacedim> &output_data) const = 0;
+
+  /**
+   * This function is the equivalent to Mapping::fill_fe_values(), but for
+   * faces of cells. See there for an extensive discussion of its purpose. It
+   * is called by FEFaceValues::reinit().
+   *
+   * @param[in] cell The cell of the triangulation for which this function is
+   * to compute a mapping from the reference cell to.
+   * @param[in] face_no The number of the face of the given cell for which
+   * information is requested.
+   * @param[in] quadrature A reference to the quadrature formula in use for
+   * the current evaluation. This quadrature object is the same as the one
+   * used when creating the @p internal_data object. The object is used both
+   * to map the location of quadrature points, as well as to compute the JxW
+   * values for each quadrature point (which involves the quadrature weights).
+   * @param[in] internal_data A reference to an object previously created by
+   * get_data() and that may be used to store information the mapping can
+   * compute once on the reference cell. See the documentation of the
+   * Mapping::InternalDataBase class for an extensive description of the
+   * purpose of these objects.
+   * @param[out] output_data A reference to an object whose member variables
+   * should be computed. Not all of the members of this argument need to be
+   * filled; which ones need to be filled is determined by the update flags
+   * stored inside the @p internal_data object.
+   */
+  virtual void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator    &cell,
+                       const unsigned int                                            face_no,
+                       const Quadrature<dim-1>                                      &quadrature,
+                       const typename Mapping<dim,spacedim>::InternalDataBase       &internal_data,
+                       dealii::internal::FEValues::MappingRelatedData<dim,spacedim> &output_data) const = 0;
+
+  /**
+   * This function is the equivalent to Mapping::fill_fe_values(), but for
+   * subfaces (i.e., children of faces) of cells. See there for an extensive
+   * discussion of its purpose. It is called by FESubfaceValues::reinit().
+   *
+   * @param[in] cell The cell of the triangulation for which this function is
+   * to compute a mapping from the reference cell to.
+   * @param[in] face_no The number of the face of the given cell for which
+   * information is requested.
+   * @param[in] subface_no The number of the child of a face of the given cell
+   * for which information is requested.
+   * @param[in] quadrature A reference to the quadrature formula in use for
+   * the current evaluation. This quadrature object is the same as the one
+   * used when creating the @p internal_data object. The object is used both
+   * to map the location of quadrature points, as well as to compute the JxW
+   * values for each quadrature point (which involves the quadrature weights).
+   * @param[in] internal_data A reference to an object previously created by
+   * get_data() and that may be used to store information the mapping can
+   * compute once on the reference cell. See the documentation of the
+   * Mapping::InternalDataBase class for an extensive description of the
+   * purpose of these objects.
+   * @param[out] output_data A reference to an object whose member variables
+   * should be computed. Not all of the members of this argument need to be
+   * filled; which ones need to be filled is determined by the update flags
+   * stored inside the @p internal_data object.
+   */
+  virtual void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator     &cell,
+                          const unsigned int                                             face_no,
+                          const unsigned int                                             subface_no,
+                          const Quadrature<dim-1>                                       &quadrature,
+                          const typename Mapping<dim,spacedim>::InternalDataBase        &internal_data,
+                          dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &output_data) const = 0;
+
+  /**
+   * @}
+   */
+
+public:
+
+  /**
+   * @name Functions to transform tensors from reference to real coordinates
+   * @{
+   */
+
+  /**
+   * Transform a field of vectors or 1-differential forms according to the
+   * selected MappingType.
+   *
+   * @note Normally, this function is called by a finite element, filling
+   * FEValues objects. For this finite element, there should be an alias
+   * MappingType like @p mapping_bdm, @p mapping_nedelec, etc. This alias
+   * should be preferred to using the types below.
+   *
+   * The mapping types currently implemented by derived classes are:
+   * <ul>
+   * <li> @p mapping_contravariant: maps a vector field on the reference cell
+   * is to the physical cell through the Jacobian:
+   * @f[
+   * \mathbf u(\mathbf x) = J(\hat{\mathbf  x})\hat{\mathbf  u}(\hat{\mathbf  x}).
+   * @f]
+   * In physics, this is usually referred to as the contravariant
+   * transformation. Mathematically, it is the push forward of a vector field.
+   *
+   * <li> @p mapping_covariant: maps a field of one-forms on the reference
+   * cell to a field of one-forms on the physical cell. (Theoretically this
+   * would refer to a DerivativeForm<1,dim,1> but we canonically identify this
+   * type with a Tensor<1,dim>). Mathematically, it is the pull back of the
+   * differential form
+   * @f[
+   * \mathbf u(\mathbf x) = J(\hat{\mathbf  x})(J(\hat{\mathbf  x})^{T} J(\hat{\mathbf  x}))^{-1}\hat{\mathbf
+   * u}(\hat{\mathbf  x}).
+   * @f]
+   * Gradients of scalar differentiable functions are transformed this way.
+   *
+   * In the case when dim=spacedim the previous formula reduces to
+   * @f[
+   * \mathbf u(\mathbf x) = J(\hat{\mathbf  x})^{-T}\hat{\mathbf
+   * u}(\hat{\mathbf  x})
+   * @f]
+   * because we assume that the mapping $\mathbf F_K$ is always invertible,
+   * and consequently its Jacobian $J$ is an invertible matrix.
+   *
+   * <li> @p mapping_piola: A field of <i>dim-1</i>-forms on the reference
+   * cell is also represented by a vector field, but again transforms
+   * differently, namely by the Piola transform
+   * @f[
+   *  \mathbf u(\mathbf x) = \frac{1}{\text{det}\;J(\mathbf x)}
+   * J(\mathbf x) \hat{\mathbf  u}(\mathbf x).
+   * @f]
+   * </ul>
+   *
+   * @param[in] input An array (or part of an array) of input objects that
+   * should be mapped.
+   * @param[in] type The kind of mapping to be applied.
+   * @param[in] internal A pointer to an object of type
+   * Mapping::InternalDataBase that contains information previously stored by
+   * the mapping. The object pointed to was created by the get_data(),
+   * get_face_data(), or get_subface_data() function, and will have been
+   * updated as part of a call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() for the current cell, before calling the current
+   * function. In other words, this object also represents with respect to
+   * which cell the transformation should be applied to.
+   * @param[out] output An array (or part of an array) into which the
+   * transformed objects should be placed. (Note that the array view is @p
+   * const, but the tensors it points to are not.)
+   */
+  virtual
+  void
+  transform (const ArrayView<const Tensor<1,dim> >                  &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<1,spacedim> >                   &output) const = 0;
+
+  /**
+   * Transform a field of differential forms from the reference cell to the
+   * physical cell.  It is useful to think of $\mathbf{T} = \nabla \mathbf u$
+   * and $\hat{\mathbf  T} = \hat \nabla \hat{\mathbf  u}$, with $\mathbf u$ a
+   * vector field.  The mapping types currently implemented by derived classes
+   * are:
+   * <ul>
+   * <li> @p mapping_covariant: maps a field of forms on the reference cell to
+   * a field of forms on the physical cell. Mathematically, it is the pull
+   * back of the differential form
+   * @f[
+   * \mathbf T(\mathbf x) = \hat{\mathbf  T}(\hat{\mathbf  x})
+   *                        J(\hat{\mathbf  x})(J(\hat{\mathbf  x})^{T} J(\hat{\mathbf  x}))^{-1}.
+   * @f]
+   * Jacobians of spacedim-vector valued differentiable functions are
+   * transformed this way.
+   *
+   * In the case when dim=spacedim the previous formula reduces to
+   * @f[
+   * \mathbf T(\mathbf x) = \hat{\mathbf  u}(\hat{\mathbf  x})
+   *                        J(\hat{\mathbf  x})^{-1}.
+   * @f]
+   * </ul>
+   *
+   * @note It would have been more reasonable to make this transform a
+   * template function with the rank in <code>DerivativeForm@<1, dim,
+   * rank@></code>. Unfortunately C++ does not allow templatized virtual
+   * functions. This is why we identify <code>DerivativeForm@<1, dim,
+   * 1@></code> with a <code>Tensor@<1,dim@></code> when using
+   * mapping_covariant() in the function transform() above this one.
+   *
+   * @param[in] input An array (or part of an array) of input objects that
+   * should be mapped.
+   * @param[in] type The kind of mapping to be applied.
+   * @param[in] internal A pointer to an object of type
+   * Mapping::InternalDataBase that contains information previously stored by
+   * the mapping. The object pointed to was created by the get_data(),
+   * get_face_data(), or get_subface_data() function, and will have been
+   * updated as part of a call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() for the current cell, before calling the current
+   * function. In other words, this object also represents with respect to
+   * which cell the transformation should be applied to.
+   * @param[out] output An array (or part of an array) into which the
+   * transformed objects should be placed. (Note that the array view is @p
+   * const, but the tensors it points to are not.)
+   */
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<1, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<2,spacedim> >                     &output) const = 0;
+
+  /**
+   * Transform a tensor field from the reference cell to the physical cell.
+   * These tensors are usually the Jacobians in the reference cell of vector
+   * fields that have been pulled back from the physical cell.  The mapping
+   * types currently implemented by derived classes are:
+   * <ul>
+   * <li> @p mapping_contravariant_gradient: it assumes $\mathbf u(\mathbf x)
+   * = J \hat{\mathbf  u}$ so that
+   * @f[
+   * \mathbf T(\mathbf x) =
+   * J(\hat{\mathbf  x}) \hat{\mathbf  T}(\hat{\mathbf  x})
+   * J(\hat{\mathbf  x})^{-1}.
+   * @f]
+   * <li> @p mapping_covariant_gradient: it assumes $\mathbf u(\mathbf x) =
+   * J^{-T} \hat{\mathbf  u}$ so that
+   * @f[
+   * \mathbf T(\mathbf x) =
+   * J(\hat{\mathbf  x})^{-T} \hat{\mathbf  T}(\hat{\mathbf  x})
+   * J(\hat{\mathbf  x})^{-1}.
+   * @f]
+   * <li> @p mapping_piola_gradient: it assumes $\mathbf u(\mathbf x) =
+   * \frac{1}{\text{det}\;J(\mathbf x)} J(\mathbf x) \hat{\mathbf  u}(\mathbf
+   * x)$ so that
+   * @f[
+   * \mathbf T(\mathbf x) =
+   * \frac{1}{\text{det}\;J(\mathbf x)}
+   * J(\hat{\mathbf  x}) \hat{\mathbf  T}(\hat{\mathbf  x})
+   * J(\hat{\mathbf  x})^{-1}.
+   * @f]
+   * </ul>
+   *
+   * @todo The formulas for mapping_covariant_gradient,
+   * mapping_contravariant_gradient and mapping_piola_gradient are only true
+   * as stated for linear mappings. If, for example, the mapping is bilinear
+   * (or has a higher order polynomial degree) then there is a missing term
+   * associated with the derivative of $J$.
+   *
+   * @param[in] input An array (or part of an array) of input objects that
+   * should be mapped.
+   * @param[in] type The kind of mapping to be applied.
+   * @param[in] internal A pointer to an object of type
+   * Mapping::InternalDataBase that contains information previously stored by
+   * the mapping. The object pointed to was created by the get_data(),
+   * get_face_data(), or get_subface_data() function, and will have been
+   * updated as part of a call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() for the current cell, before calling the current
+   * function. In other words, this object also represents with respect to
+   * which cell the transformation should be applied to.
+   * @param[out] output An array (or part of an array) into which the
+   * transformed objects should be placed. (Note that the array view is @p
+   * const, but the tensors it points to are not.)
+   */
+  virtual
+  void
+  transform (const ArrayView<const Tensor<2, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<2,spacedim> >                   &output) const = 0;
+
+  /**
+   * Transform a tensor field from the reference cell to the physical cell.
+   * This tensors are most of times the hessians in the reference cell of
+   * vector fields that have been pulled back from the physical cell.
+   *
+   * The mapping types currently implemented by derived classes are:
+   * <ul>
+   * <li> @p mapping_covariant_gradient: maps a field of forms on the
+   * reference cell to a field of forms on the physical cell. Mathematically,
+   * it is the pull back of the differential form
+   * @f[
+   * \mathbf T_{ijk}(\mathbf x) = \hat{\mathbf  T}_{iJK}(\hat{\mathbf  x}) J_{jJ}^{\dagger} J_{kK}^{\dagger}@f],
+   *
+   * where @f[ J^{\dagger} = J(\hat{\mathbf  x})(J(\hat{\mathbf  x})^{T} J(\hat{\mathbf  x}))^{-1}.
+   * @f]
+   * </ul>
+   *
+   * Hessians of spacedim-vector valued differentiable functions are
+   * transformed this way (After subtraction of the product of the derivative
+   * with the Jacobian gradient).
+   *
+   * In the case when dim=spacedim the previous formula reduces to
+   * @f[J^{\dagger} = J^{-1}@f]
+   *
+   * @param[in] input An array (or part of an array) of input objects that
+   * should be mapped.
+   * @param[in] type The kind of mapping to be applied.
+   * @param[in] internal A pointer to an object of type
+   * Mapping::InternalDataBase that contains information previously stored by
+   * the mapping. The object pointed to was created by the get_data(),
+   * get_face_data(), or get_subface_data() function, and will have been
+   * updated as part of a call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() for the current cell, before calling the current
+   * function. In other words, this object also represents with respect to
+   * which cell the transformation should be applied to.
+   * @param[out] output An array (or part of an array) into which the
+   * transformed objects should be placed. (Note that the array view is @p
+   * const, but the tensors it points to are not.)
+   */
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<2, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<3,spacedim> >                     &output) const = 0;
+
+  /**
+   * Transform a field of 3-differential forms from the reference cell to the
+   * physical cell.  It is useful to think of $\mathbf{T}_{ijk} = D^2_{jk}
+   * \mathbf u_i$ and $\mathbf{\hat T}_{IJK} = \hat D^2_{JK} \mathbf{\hat
+   * u}_I$, with $\mathbf u_i$ a vector field.
+   *
+   * The mapping types currently implemented by derived classes are:
+   * <ul>
+   * <li> @p mapping_contravariant_hessian: it assumes $\mathbf u_i(\mathbf x)
+   * = J_{iI} \hat{\mathbf  u}_I$ so that
+   * @f[
+   * \mathbf T_{ijk}(\mathbf x) =
+   * J_{iI}(\hat{\mathbf  x}) \hat{\mathbf  T}_{IJK}(\hat{\mathbf  x})
+   * J_{jJ}(\hat{\mathbf  x})^{-1} J_{kK}(\hat{\mathbf  x})^{-1}.
+   * @f]
+   * <li> @p mapping_covariant_hessian: it assumes $\mathbf u_i(\mathbf x) =
+   * J_{iI}^{-T} \hat{\mathbf  u}_I$ so that
+   * @f[
+   * \mathbf T_{ijk}(\mathbf x) =
+   * J_iI(\hat{\mathbf  x})^{-1} \hat{\mathbf  T}_{IJK}(\hat{\mathbf  x})
+   * J_{jJ}(\hat{\mathbf  x})^{-1} J_{kK}(\hat{\mathbf  x})^{-1}.
+   * @f]
+   * <li> @p mapping_piola_hessian: it assumes $\mathbf u_i(\mathbf x) =
+   * \frac{1}{\text{det}\;J(\mathbf x)} J_{iI}(\mathbf x) \hat{\mathbf
+   * u}(\mathbf x)$ so that
+   * @f[
+   * \mathbf T_{ijk}(\mathbf x) =
+   * \frac{1}{\text{det}\;J(\mathbf x)}
+   * J_{iI}(\hat{\mathbf  x}) \hat{\mathbf  T}_{IJK}(\hat{\mathbf  x})
+   * J_{jJ}(\hat{\mathbf  x})^{-1} J_{kK}(\hat{\mathbf  x})^{-1}.
+   * @f]
+   * </ul>
+   *
+   * @param[in] input An array (or part of an array) of input objects that
+   * should be mapped.
+   * @param[in] type The kind of mapping to be applied.
+   * @param[in] internal A pointer to an object of type
+   * Mapping::InternalDataBase that contains information previously stored by
+   * the mapping. The object pointed to was created by the get_data(),
+   * get_face_data(), or get_subface_data() function, and will have been
+   * updated as part of a call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() for the current cell, before calling the current
+   * function. In other words, this object also represents with respect to
+   * which cell the transformation should be applied to.
+   * @param[out] output An array (or part of an array) into which the
+   * transformed objects should be placed.
+   */
+  virtual
+  void
+  transform (const ArrayView<const Tensor<3, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<3,spacedim> >                   &output) const = 0;
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * Give class @p FEValues access to the private <tt>get_...data</tt> and
+   * <tt>fill_fe_...values</tt> functions.
+   */
+  friend class FEValuesBase<dim,spacedim>;
+  friend class FEValues<dim,spacedim>;
+  friend class FEFaceValues<dim,spacedim>;
+  friend class FESubfaceValues<dim,spacedim>;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping_c1.h b/include/deal.II/fe/mapping_c1.h
new file mode 100644
index 0000000..8c6512c
--- /dev/null
+++ b/include/deal.II/fe/mapping_c1.h
@@ -0,0 +1,133 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_c1_h
+#define dealii__mapping_c1_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/fe/mapping_q.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup mapping */
+/*@{*/
+
+/**
+ * Mapping class that uses C1 (continuously differentiable) cubic mappings of
+ * the boundary. This class is built atop of MappingQ by simply determining
+ * the interpolation points for a cubic mapping of the boundary differently:
+ * MappingQ chooses them such that they interpolate the boundary, while this
+ * class chooses them such that the discretized boundary is globally
+ * continuously differentiable.
+ *
+ * To use this class, make sure that the Boundary::get_normals_at_vertices()
+ * function is implemented for the user's boundary object.
+ *
+ * @author Wolfgang Bangerth, 2001, 2015
+ */
+template<int dim, int spacedim=dim>
+class MappingC1 : public MappingQ<dim,spacedim>
+{
+public:
+  /**
+   * Constructor. Pass the fixed degree @p 3 down to the base class, as a
+   * cubic mapping suffices to generate a continuous mapping of the boundary.
+   */
+  MappingC1 ();
+
+  /**
+   * Return a pointer to a copy of the present object. The caller of this copy
+   * then assumes ownership of it.
+   */
+  virtual
+  Mapping<dim,spacedim> *clone () const;
+
+protected:
+
+  /**
+   * A class derived from MappingQGeneric that provides the generic mapping
+   * with support points on boundary objects so that the corresponding Q3
+   * mapping ends up being C1.
+   */
+  class MappingC1Generic : public MappingQGeneric<dim,spacedim>
+  {
+  public:
+
+    /**
+     * Constructor.
+     */
+    MappingC1Generic ();
+
+    /**
+     * For <tt>dim=2,3</tt>. Append the support points of all shape functions
+     * located on bounding lines to the vector @p a. Points located on the
+     * line but on vertices are not included.
+     *
+     * Needed by the <tt>compute_support_points_simple(laplace)</tt>
+     * functions. For <tt>dim=1</tt> this function is empty.
+     *
+     * This function chooses the respective points not such that they are
+     * interpolating the boundary (as does the base class), but rather such
+     * that the resulting cubic mapping is a continuous one.
+     */
+    virtual void
+    add_line_support_points (const typename Triangulation<dim>::cell_iterator &cell,
+                             std::vector<Point<dim> > &a) const;
+
+    /**
+     * For <tt>dim=3</tt>. Append the support points of all shape functions
+     * located on bounding faces (quads in 3d) to the vector @p a. Points
+     * located on the line but on vertices are not included.
+     *
+     * Needed by the @p compute_support_points_laplace function. For
+     * <tt>dim=1</tt> and 2 this function is empty.
+     *
+     * This function chooses the respective points not such that they are
+     * interpolating the boundary (as does the base class), but rather such
+     * that the resulting cubic mapping is a continuous one.
+     */
+    virtual void
+    add_quad_support_points(const typename Triangulation<dim>::cell_iterator &cell,
+                            std::vector<Point<dim> > &a) const;
+  };
+};
+
+/*@}*/
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <> void MappingC1<1>::MappingC1Generic::add_line_support_points (
+  const Triangulation<1>::cell_iterator &,
+  std::vector<Point<1> > &) const;
+template <> void MappingC1<2>::MappingC1Generic::add_line_support_points (
+  const Triangulation<2>::cell_iterator &cell,
+  std::vector<Point<2> > &a) const;
+
+template <> void MappingC1<1>::MappingC1Generic::add_quad_support_points (
+  const Triangulation<1>::cell_iterator &,
+  std::vector<Point<1> > &) const;
+template <> void MappingC1<2>::MappingC1Generic::add_quad_support_points (
+  const Triangulation<2>::cell_iterator &,
+  std::vector<Point<2> > &) const;
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping_cartesian.h b/include/deal.II/fe/mapping_cartesian.h
new file mode 100644
index 0000000..f0c9052
--- /dev/null
+++ b/include/deal.II/fe/mapping_cartesian.h
@@ -0,0 +1,268 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_cartesian_h
+#define dealii__mapping_cartesian_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <cmath>
+#include <deal.II/fe/mapping.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup mapping */
+/*@{*/
+
+/**
+ * A class providing a mapping from the reference cell to cells that are
+ * axiparallel.
+ *
+ * This class maps the unit cell to a grid cell with surfaces parallel to the
+ * coordinate lines/planes. It is specifically developed for Cartesian meshes.
+ * In other words, the mapping is meant for cells for which the mapping from
+ * the reference to the real cell is a scaling along the coordinate
+ * directions: The transformation from reference coordinates $\hat {\mathbf
+ * x}$ to real coordinates $\mathbf x$ on each cell is of the form
+ * @f{align*}{
+ * {\mathbf x}(\hat {\mathbf x}) = \begin{pmatrix} h_x & 0 \\ 0 & h_y
+ * \end{pmatrix} \hat{\mathbf x} + {\mathbf v}_0
+ * @f}
+ * in 2d, and
+ * @f{align*}{
+ * {\mathbf x}(\hat {\mathbf x}) = \begin{pmatrix} h_x & 0 & 0 \\ 0 & h_y & 0
+ * \\ 0 & 0 & h_z \end{pmatrix} \hat{\mathbf x} + {\mathbf v}_0
+ * @f}
+ * in 3d, where ${\mathbf v}_0$ is the bottom left vertex and $h_x,h_y,h_z$
+ * are the extents of the cell along the axes.
+ *
+ * The class is intended for efficiency, and it does not do a whole lot of
+ * error checking. If you apply this mapping to a cell that does not conform
+ * to the requirements above, you will get strange results.
+ *
+ * @author Guido Kanschat, 2001; Ralf Hartmann, 2005
+ */
+template <int dim, int spacedim=dim>
+class MappingCartesian : public Mapping<dim,spacedim>
+{
+public:
+
+  // for documentation, see the Mapping base class
+  virtual
+  Mapping<dim, spacedim> *clone () const;
+
+  /**
+   * Always returns @p true because MappingCartesian preserves vertex
+   * locations.
+   */
+  bool preserves_vertex_locations () const;
+
+  /**
+   * @name Mapping points between reference and real cells
+   * @{
+   */
+
+  // for documentation, see the Mapping base class
+  virtual
+  Point<spacedim>
+  transform_unit_to_real_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<dim>                                          &p) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  Point<dim>
+  transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<spacedim>                                     &p) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Functions to transform tensors from reference to real coordinates
+   * @{
+   */
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<1,dim> >                  &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<1,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<1, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<2,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<2, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<2,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<2, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<3,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<3, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<3,spacedim> >                   &output) const;
+
+  /**
+   * @}
+   */
+
+
+private:
+
+  /**
+   * @name Interface with FEValues
+   * @{
+   */
+
+  /**
+   * Storage for internal data of the mapping. See Mapping::InternalDataBase
+   * for an extensive description.
+   *
+   * This includes data that is computed once when the object is created (in
+   * get_data()) as well as data the class wants to store from between the
+   * call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() until possible later calls from the finite
+   * element to functions such as transform(). The latter class of member
+   * variables are marked as 'mutable'.
+   */
+  class InternalData : public Mapping<dim, spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    InternalData (const Quadrature<dim> &quadrature);
+
+    /**
+     * Return an estimate (in bytes) or the memory consumption of this object.
+     */
+    virtual std::size_t memory_consumption () const;
+
+    /**
+     * Extents of the last cell we have seen in the coordinate directions,
+     * i.e., <i>h<sub>x</sub></i>, <i>h<sub>y</sub></i>, <i>h<sub>z</sub></i>.
+     */
+    mutable Tensor<1,dim> cell_extents;
+
+    /**
+     * The volume element
+     */
+    mutable double volume_element;
+
+    /**
+     * Vector of all quadrature points. Especially, all points on all faces.
+     */
+    std::vector<Point<dim> > quadrature_points;
+  };
+
+  // documentation can be found in Mapping::requires_update_flags()
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  // documentation can be found in Mapping::get_data()
+  virtual
+  typename Mapping<dim, spacedim>::InternalDataBase *
+  get_data (const UpdateFlags,
+            const Quadrature<dim> &quadrature) const;
+
+  // documentation can be found in Mapping::get_face_data()
+  virtual
+  typename Mapping<dim, spacedim>::InternalDataBase *
+  get_face_data (const UpdateFlags flags,
+                 const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::get_subface_data()
+  virtual
+  typename Mapping<dim, spacedim>::InternalDataBase *
+  get_subface_data (const UpdateFlags flags,
+                    const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::fill_fe_values()
+  virtual
+  CellSimilarity::Similarity
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                  const CellSimilarity::Similarity                           cell_similarity,
+                  const Quadrature<dim>                                     &quadrature,
+                  const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                  internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_face_values()
+  virtual void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                       const unsigned int                                         face_no,
+                       const Quadrature<dim-1>                                   &quadrature,
+                       const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                       internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_subface_values()
+  virtual void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                          const unsigned int                                         face_no,
+                          const unsigned int                                         subface_no,
+                          const Quadrature<dim-1>                                   &quadrature,
+                          const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                          internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  /**
+   * @}
+   */
+
+
+
+  /**
+   * Do the computation for the <tt>fill_*</tt> functions.
+   */
+  void compute_fill (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                     const unsigned int face_no,
+                     const unsigned int sub_no,
+                     const CellSimilarity::Similarity cell_similarity,
+                     const InternalData &data,
+                     std::vector<Point<dim> > &quadrature_points,
+                     std::vector<Tensor<1,dim> > &normal_vectors) const;
+
+  /**
+   * Value to indicate that a given face or subface number is invalid.
+   */
+  static const unsigned int invalid_face_number = numbers::invalid_unsigned_int;
+};
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping_fe_field.h b/include/deal.II/fe/mapping_fe_field.h
new file mode 100644
index 0000000..cb0a5d7
--- /dev/null
+++ b/include/deal.II/fe/mapping_fe_field.h
@@ -0,0 +1,610 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_fe_h
+#define dealii__mapping_fe_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/thread_management.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup mapping */
+/*@{*/
+
+/**
+ * The MappingFEField is a generalization of the MappingQEulerian class, for
+ * arbitrary vector finite elements. The two main differences are that this
+ * class uses a vector of absolute positions instead of a vector of
+ * displacements, and it allows for arbitrary FiniteElement types, instead of
+ * only FE_Q.
+ *
+ * This class effectively decouples the topology from the geometry, by
+ * relegating all geometrical information to some components of a
+ * FiniteElement vector field. The components that are used for the geometry
+ * can be arbitrarily selected at construction time.
+ *
+ * The idea is to consider the Triangulation as a parameter configuration
+ * space, on which we  construct an arbitrary geometrical mapping, using the
+ * instruments of the deal.II library: a vector of degrees of freedom, a
+ * DoFHandler associated to the geometry of the problem and a ComponentMask
+ * that tells us which components of the FiniteElement to use for the mapping.
+ *
+ * Typically, the DoFHandler operates on a finite element that is constructed
+ * as a system element (FESystem()) from continuous FE_Q() (for iso-parametric
+ * discretizations) or FE_Bernstein() (for iso-geometric discretizations)
+ * objects. An example is shown below:
+ *
+ * @code
+ *    const FE_Q<dim,spacedim> feq(1);
+ *    const FESystem<dim,spacedim> fesystem(feq, spacedim);
+ *    DoFHandler<dim,spacedim> dhq(triangulation);
+ *    dhq.distribute_dofs(fesystem);
+ *    const ComponentMask mask(spacedim, true);
+ *    Vector<double> eulerq(dhq.n_dofs());
+ *    // Fills the euler vector with information from the Triangulation
+ *    VectorTools::get_position_vector(dhq, eulerq, mask);
+ *    MappingFEField<dim,spacedim> map(dhq, eulerq, mask);
+ * @endcode
+ *
+ * @author Luca Heltai, Marco Tezzele 2013, 2015
+ */
+template <int dim, int spacedim=dim,
+          typename VectorType=Vector<double>,
+          typename DoFHandlerType=DoFHandler<dim,spacedim> >
+class MappingFEField : public Mapping<dim,spacedim>
+{
+public:
+  /**
+   * Constructor. The first argument is a VectorType that specifies the
+   * transformation of the domain from the reference to the current
+   * configuration.
+   *
+   * In general this class decouples geometry from topology, allowing users to
+   * define geometries which are only topologically equivalent to the
+   * underlying Triangulation, but which may otherwise be arbitrary.
+   * Differently from what happens in MappingQEulerian, the FiniteElement
+   * field which is passed to the constructor is interpreted as an absolute
+   * geometrical configuration, therefore one has to make sure that the
+   * euler_vector actually represents a valid geometry (i.e., one with no
+   * inverted cells, or with no zero-volume cells).
+   *
+   * If the underlying FiniteElement is a system of FE_Q(), and euler_vector
+   * is initialized using VectorTools::get_position_vector(), then this class
+   * is in all respects identical to MappingQ().
+   *
+   * The optional ComponentMask argument can be used to specify what
+   * components of the FiniteElement to use for the geometrical
+   * transformation. If no mask is specified at construction time, then a
+   * default one is used, which makes this class works in the same way of
+   * MappingQEulerian(), i.e., the first spacedim components of the
+   * FiniteElement are assumed to represent the geometry of the problem.
+   *
+   * Notice that if a mask is specified, it has to match in size the
+   * underlying FiniteElement, and it has to have exactly spacedim non-zero
+   * elements, indicating the components (in order) of the FiniteElement which
+   * will be used for the geometry.
+   *
+   * If an incompatible mask is passed, an exception is thrown.
+   */
+  MappingFEField (const DoFHandlerType &euler_dof_handler,
+                  const VectorType     &euler_vector,
+                  const ComponentMask   mask = ComponentMask());
+
+  /**
+   * Copy constructor.
+   */
+  MappingFEField (const MappingFEField<dim,spacedim,VectorType,DoFHandlerType> &mapping);
+
+  /**
+   * Return a pointer to a copy of the present object. The caller of this copy
+   * then assumes ownership of it.
+   */
+  virtual
+  Mapping<dim,spacedim> *clone () const;
+
+
+
+  /**
+   * Always returns @p false.
+   */
+  virtual
+  bool preserves_vertex_locations () const;
+
+
+  /**
+   * @name Mapping points between reference and real cells
+   * @{
+   */
+
+  // for documentation, see the Mapping base class
+  virtual
+  Point<spacedim>
+  transform_unit_to_real_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<dim>                                 &p) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  Point<dim>
+  transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<spacedim>                            &p) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Functions to transform tensors from reference to real coordinates
+   * @{
+   */
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<1,dim> >                  &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<1,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<1, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<2,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<2, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<2,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<2, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<3,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<3, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<3,spacedim> >                   &output) const;
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * Return the degree of the mapping, i.e. the value which was passed to the
+   * constructor.
+   */
+  unsigned int get_degree () const;
+
+  /**
+   * Return the ComponentMask of the mapping, i.e. which components to use for
+   * the mapping.
+   */
+  ComponentMask get_component_mask () const;
+
+  /**
+   * Exception
+   */
+  DeclException0(ExcInactiveCell);
+
+private:
+
+  /**
+   * @name Interface with FEValues
+   * @{
+   */
+
+  // documentation can be found in Mapping::requires_update_flags()
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+public:
+  /**
+   * Storage for internal data of this mapping. See Mapping::InternalDataBase
+   * for an extensive description.
+   *
+   * This includes data that is computed once when the object is created (in
+   * get_data()) as well as data the class wants to store from between the
+   * call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() until possible later calls from the finite
+   * element to functions such as transform(). The latter class of member
+   * variables are marked as 'mutable', along with scratch arrays.
+   */
+  class InternalData : public Mapping<dim,spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    InternalData(const FiniteElement<dim,spacedim> &fe,
+                 const ComponentMask mask);
+
+    /**
+     * Shape function at quadrature point. Shape functions are in tensor
+     * product order, so vertices must be reordered to obtain transformation.
+     */
+    const double &shape (const unsigned int qpoint,
+                         const unsigned int shape_nr) const;
+
+    /**
+     * Shape function at quadrature point. See above.
+     */
+    double &shape (const unsigned int qpoint,
+                   const unsigned int shape_nr);
+
+    /**
+     * Gradient of shape function in quadrature point. See above.
+     */
+    const Tensor<1,dim> &derivative (const unsigned int qpoint,
+                                     const unsigned int shape_nr) const;
+
+    /**
+     * Gradient of shape function in quadrature point. See above.
+     */
+    Tensor<1,dim> &derivative (const unsigned int qpoint,
+                               const unsigned int shape_nr);
+
+    /**
+     * Second derivative of shape function in quadrature point. See above.
+     */
+    const Tensor<2,dim> &second_derivative (const unsigned int qpoint,
+                                            const unsigned int shape_nr) const;
+
+    /**
+     * Second derivative of shape function in quadrature point. See above.
+     */
+    Tensor<2,dim> &second_derivative (const unsigned int qpoint,
+                                      const unsigned int shape_nr);
+
+    /**
+     * Third derivative of shape function in quadrature point. See above.
+     */
+    const Tensor<3,dim> &third_derivative (const unsigned int qpoint,
+                                           const unsigned int shape_nr) const;
+
+    /**
+     * Fourth derivative of shape function in quadrature point. See above.
+     */
+    Tensor<3,dim> &third_derivative (const unsigned int qpoint,
+                                     const unsigned int shape_nr);
+
+    /**
+     * Fourth derivative of shape function in quadrature point. See above.
+     */
+    const Tensor<4,dim> &fourth_derivative (const unsigned int qpoint,
+                                            const unsigned int shape_nr) const;
+
+    /**
+     * Third derivative of shape function in quadrature point. See above.
+     */
+    Tensor<4,dim> &fourth_derivative (const unsigned int qpoint,
+                                      const unsigned int shape_nr);
+
+    /**
+     * Return an estimate (in bytes) or the memory consumption of this object.
+     */
+    virtual std::size_t memory_consumption () const;
+
+    /**
+     * Values of shape functions. Access by function @p shape.
+     *
+     * Computed once.
+     */
+    std::vector<double> shape_values;
+
+    /**
+     * Values of shape function derivatives. Access by function @p derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<1,dim> > shape_derivatives;
+
+    /**
+     * Values of shape function second derivatives. Access by function @p
+     * second_derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<2,dim> > shape_second_derivatives;
+
+    /**
+     * Values of shape function third derivatives. Access by function @p
+     * third_derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<3,dim> > shape_third_derivatives;
+
+    /**
+     * Values of shape function fourth derivatives. Access by function @p
+     * fourth_derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<4,dim> > shape_fourth_derivatives;
+
+    /**
+     * Unit tangential vectors. Used for the computation of boundary forms and
+     * normal vectors.
+     *
+     * This vector has (dim-1)GeometryInfo::faces_per_cell entries. The first
+     * GeometryInfo::faces_per_cell contain the vectors in the first
+     * tangential direction for each face; the second set of
+     * GeometryInfo::faces_per_cell entries contain the vectors in the second
+     * tangential direction (only in 3d, since there we have 2 tangential
+     * directions per face), etc.
+     *
+     * Filled once.
+     */
+    std::vector<std::vector<Tensor<1,dim> > > unit_tangentials;
+
+    /**
+     * Number of shape functions. If this is a Q1 mapping, then it is simply
+     * the number of vertices per cell. However, since also derived classes
+     * use this class (e.g. the Mapping_Q() class), the number of shape
+     * functions may also be different.
+     */
+    unsigned int n_shape_functions;
+
+    /**
+     * Stores the mask given at construction time. If no mask was specified at
+     * construction time, then a default one is used, which makes this class
+     * works in the same way of MappingQEulerian(), i.e., the first spacedim
+     * components of the FiniteElement are used for the euler_vector and the
+     * euler_dh.
+     *
+     * If a mask is specified, then it has to match the underlying
+     * FiniteElement, and it has to have exactly spacedim non-zero elements,
+     * indicating the components (in order) of the FiniteElement which will be
+     * used for the euler vector and the euler dof handler.
+     */
+    ComponentMask mask;
+
+    /**
+     * Tensors of covariant transformation at each of the quadrature points.
+     * The matrix stored is the Jacobian * G^{-1}, where G = Jacobian^{t} *
+     * Jacobian, is the first fundamental form of the map; if dim=spacedim
+     * then it reduces to the transpose of the inverse of the Jacobian matrix,
+     * which itself is stored in the @p contravariant field of this structure.
+     *
+     * Computed on each cell.
+     */
+    mutable std::vector<DerivativeForm<1,dim, spacedim > >  covariant;
+
+    /**
+     * Tensors of contravariant transformation at each of the quadrature
+     * points. The contravariant matrix is the Jacobian of the transformation,
+     * i.e. $J_{ij}=dx_i/d\hat x_j$.
+     *
+     * Computed on each cell.
+     */
+    mutable std::vector< DerivativeForm<1,dim,spacedim> > contravariant;
+
+    /**
+     * The determinant of the Jacobian in each quadrature point. Filled if
+     * #update_volume_elements.
+     */
+    mutable std::vector<double> volume_elements;
+
+    /**
+     * Auxiliary vectors for internal use.
+     */
+    mutable std::vector<std::vector<Tensor<1,spacedim> > > aux;
+
+    /**
+     * Storage for the indices of the local degrees of freedom.
+     */
+    mutable std::vector<types::global_dof_index> local_dof_indices;
+
+    /**
+     * Storage for local degrees of freedom.
+     */
+    mutable std::vector<double> local_dof_values;
+  };
+
+private:
+
+  // documentation can be found in Mapping::get_data()
+  virtual
+  InternalData *
+  get_data (const UpdateFlags,
+            const Quadrature<dim> &quadrature) const;
+
+  // documentation can be found in Mapping::get_face_data()
+  virtual
+  typename Mapping<dim,spacedim>::InternalDataBase *
+  get_face_data (const UpdateFlags flags,
+                 const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::get_subface_data()
+  virtual
+  typename Mapping<dim,spacedim>::InternalDataBase *
+  get_subface_data (const UpdateFlags flags,
+                    const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::fill_fe_values()
+  virtual
+  CellSimilarity::Similarity
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                  const CellSimilarity::Similarity                           cell_similarity,
+                  const Quadrature<dim>                                     &quadrature,
+                  const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                  internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_face_values()
+  virtual void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                       const unsigned int                                         face_no,
+                       const Quadrature<dim-1>                                   &quadrature,
+                       const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                       internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_subface_values()
+  virtual void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                          const unsigned int                                         face_no,
+                          const unsigned int                                         subface_no,
+                          const Quadrature<dim-1>                                   &quadrature,
+                          const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                          internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * Reference to the vector of shifts.
+   */
+  SmartPointer<const VectorType, MappingFEField<dim,spacedim,VectorType,DoFHandlerType> > euler_vector;
+
+  /**
+   * A FiniteElement object which is only needed in 3D, since it knows how to
+   * reorder shape functions/DoFs on non-standard faces. This is used to
+   * reorder support points in the same way. We could make this a pointer to
+   * prevent construction in 1D and 2D, but since memory and time requirements
+   * are not particularly high this seems unnecessary at the moment.
+   */
+  SmartPointer<const FiniteElement<dim,spacedim>, MappingFEField<dim,spacedim,VectorType,DoFHandlerType> > fe;
+
+
+  /**
+   * Pointer to the DoFHandler to which the mapping vector is associated.
+   */
+  SmartPointer<const DoFHandlerType,MappingFEField<dim,spacedim,VectorType,DoFHandlerType> > euler_dof_handler;
+
+private:
+  /**
+   * Transforms a point @p p on the unit cell to the point @p p_real on the
+   * real cell @p cell and returns @p p_real.
+   *
+   * This function is called by @p transform_unit_to_real_cell and multiple
+   * times (through the Newton iteration) by @p
+   * transform_real_to_unit_cell_internal.
+   *
+   * Takes a reference to an @p InternalData that must already include the
+   * shape values at point @p p and the mapping support points of the cell.
+   *
+   * This @p InternalData argument avoids multiple computations of the shape
+   * values at point @p p and especially multiple computations of the mapping
+   * support points.
+   */
+  Point<spacedim>
+  do_transform_unit_to_real_cell (const InternalData &mdata) const;
+
+
+  /**
+   * Transforms the point @p p on the real cell to the corresponding point on
+   * the unit cell @p cell by a Newton iteration.
+   *
+   * Takes a reference to an @p InternalData that is assumed to be previously
+   * created by the @p get_data function with @p UpdateFlags including @p
+   * update_transformation_values and @p update_transformation_gradients and a
+   * one point Quadrature that includes the given initial guess for the
+   * transformation @p initial_p_unit.  Hence this function assumes that @p
+   * mdata already includes the transformation shape values and gradients
+   * computed at @p initial_p_unit.
+   *
+   * @p mdata will be changed by this function.
+   */
+  Point<dim>
+  do_transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                  const Point<spacedim> &p,
+                                  const Point<dim> &initial_p_unit,
+                                  InternalData &mdata) const;
+
+  /**
+   * Update internal degrees of freedom.
+   */
+  void update_internal_dofs(const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                            const typename MappingFEField<dim, spacedim>::InternalData &data) const;
+
+  /**
+   * See the documentation of the base class for detailed information.
+   */
+  virtual void
+  compute_shapes_virtual (const std::vector<Point<dim> > &unit_points,
+                          typename MappingFEField<dim, spacedim>::InternalData &data) const;
+
+  /*
+   * Which components to use for the mapping.
+   */
+  const ComponentMask fe_mask;
+
+  /**
+   * Mapping between indices in the FE space and the real space. This vector
+   * contains one index for each component of the finite element space. If the
+   * index is one for which the ComponentMask which is used to construct this
+   * element is false, then numbers::invalid_unsigned_int is returned,
+   * otherwise the component in real space is returned. For example, if we
+   * construct the mapping using ComponentMask(spacedim, true), then this
+   * vector contains {0,1,2} in spacedim = 3.
+   */
+  std::vector<unsigned int> fe_to_real;
+
+  void
+  compute_data (const UpdateFlags      update_flags,
+                const Quadrature<dim>  &q,
+                const unsigned int     n_original_q_points,
+                InternalData           &data) const;
+
+  void
+  compute_face_data (const UpdateFlags      update_flags,
+                     const Quadrature<dim>  &q,
+                     const unsigned int     n_original_q_points,
+                     InternalData           &data) const;
+
+
+  /**
+   * Declare other MappingFEField classes friends.
+   */
+  template <int,int,class,class> friend class MappingFEField;
+};
+
+/*@}*/
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping_q.h b/include/deal.II/fe/mapping_q.h
new file mode 100644
index 0000000..6188a4e
--- /dev/null
+++ b/include/deal.II/fe/mapping_q.h
@@ -0,0 +1,375 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_q_h
+#define dealii__mapping_q_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/fe/mapping_q_generic.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, typename PolynomialType> class TensorProductPolynomials;
+
+
+/*!@addtogroup mapping */
+/*@{*/
+
+/**
+ * A class that implements a polynomial mapping $Q_p$ of degree $p$ on cells
+ * at the boundary of the domain (or, if requested in the constructor, for all
+ * cells) and linear mappings for interior cells.
+ *
+ * The class is in fact poorly named since (unless explicitly specified during
+ * the construction of the object, see below), it does not actually use
+ * mappings of degree $p$ <i>everywhere</i>, but only on cells at the
+ * boundary. This is in contrast to the MappingQGeneric class which indeed
+ * does use a polynomial mapping $Q_p$ of degree $p$ everywhere. The point of
+ * the current class is that in many situations, curved domains are only
+ * provided with information about how exactly edges at the boundary are
+ * shaped, but we do not know anything about internal edges. Thus, in the
+ * absence of other information, we can only assume that internal edges are
+ * straight lines, and in that case internal cells may as well be treated is
+ * bilinear quadrilaterals or trilinear hexahedra. (An example of how such
+ * meshes look is shown in step-1 already, but it is also discussed in the
+ * "Results" section of step-6.) Because bi-/trilinear mappings are
+ * significantly cheaper to compute than higher order mappings, it is
+ * advantageous in such situations to use the higher order mapping only on
+ * cells at the boundary of the domain. This class implements exactly this
+ * behavior.
+ *
+ * There are a number of special cases worth considering:
+ * - If you want to use a higher order mapping for all cells, you can
+ * achieve this by setting the second argument to the constructor to true.
+ * This only makes sense if you can actually provide information about how
+ * interior edges and faces of the mesh should be curved. This is typically
+ * done by associating a Manifold with interior cells and edges. A simple
+ * example of this is discussed in the "Results" section of step-6; a full
+ * discussion of manifolds is provided in step-53.
+ * - If you pass true as the second argument to this class, then it
+ * is in fact completely equivalent to generating a MappingQGeneric object
+ * right away.
+ * - This class is also entirely equivalent to MappingQGeneric if the
+ * polynomial degree provided is one. This is because in that case, no
+ * distinction between the mapping used on cells in the interior and on the
+ * boundary of the domain can be made.
+ * - If you are working on meshes embedded in higher space dimensions,
+ * i.e., if dim!=spacedim, then every cell is considered to be at the boundary
+ * of the domain and consequently a higher order mapping is used for all
+ * cells; again this class is then equivalent to using MappingQGeneric right
+ * away.
+ *
+ * @author Ralf Hartmann, 2000, 2001, 2005; Guido Kanschat 2000, 2001,
+ * Wolfgang Bangerth, 2015
+ */
+template <int dim, int spacedim=dim>
+class MappingQ : public Mapping<dim,spacedim>
+{
+public:
+  /**
+   * Constructor.  @p polynomial_degree denotes the polynomial degree of the
+   * polynomials that are used to map cells boundary.
+   *
+   * The second argument determines whether the higher order mapping should
+   * also be used on interior cells. If its value is <code>false</code> (the
+   * default), then a lower order mapping is used in the interior. This is
+   * sufficient for most cases where higher order mappings are only used to
+   * better approximate the boundary. In that case, cells bounded by straight
+   * lines are acceptable in the interior. However, there are cases where one
+   * would also like to use a higher order mapping in the interior. The
+   * MappingQEulerian class is one such case.
+   *
+   * The value of @p use_mapping_q_on_all_cells is ignored if @p dim is not
+   * equal to @p spacedim, i.e., if we are considering meshes on surfaces
+   * embedded into higher dimensional spaces.
+   */
+  MappingQ (const unsigned int polynomial_degree,
+            const bool use_mapping_q_on_all_cells = false);
+
+  /**
+   * Copy constructor.
+   */
+  MappingQ (const MappingQ<dim,spacedim> &mapping);
+
+  /**
+   * Return the degree of the mapping, i.e. the value which was passed to the
+   * constructor.
+   */
+  unsigned int get_degree () const;
+
+  /**
+   * Always returns @p true because the default implementation of functions in
+   * this class preserves vertex locations.
+   */
+  virtual
+  bool preserves_vertex_locations () const;
+
+  /**
+   * Transforms the point @p p on the unit cell to the point @p p_real on the
+   * real cell @p cell and returns @p p_real.
+   */
+  virtual
+  Point<spacedim>
+  transform_unit_to_real_cell (
+    const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+    const Point<dim>                                 &p) const;
+
+  /**
+   * Transforms the point @p p on the real cell to the point @p p_unit on the
+   * unit cell @p cell and returns @p p_unit.
+   *
+   * Uses Newton iteration and the @p transform_unit_to_real_cell function.
+   *
+   * In the codimension one case, this function returns the normal projection
+   * of the real point @p p on the curve or surface identified by the @p cell.
+   *
+   * @note Polynomial mappings from the reference (unit) cell coordinates to
+   * the coordinate system of a real cell are not always invertible if the
+   * point for which the inverse mapping is to be computed lies outside the
+   * cell's boundaries.  In such cases, the current function may fail to
+   * compute a point on the reference cell whose image under the mapping
+   * equals the given point @p p.  If this is the case then this function
+   * throws an exception of type Mapping::ExcTransformationFailed .  Whether
+   * the given point @p p lies outside the cell can therefore be determined by
+   * checking whether the return reference coordinates lie inside of outside
+   * the reference cell (e.g., using GeometryInfo::is_inside_unit_cell) or
+   * whether the exception mentioned above has been thrown.
+   */
+  virtual
+  Point<dim>
+  transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<spacedim>                                     &p) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<1,dim> >                  &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<1,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<1, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<2,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<2, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<2,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<2, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<3,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<3, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<3,spacedim> >                   &output) const;
+
+  /**
+   * Return a pointer to a copy of the present object. The caller of this copy
+   * then assumes ownership of it.
+   */
+  virtual
+  Mapping<dim,spacedim> *clone () const;
+
+
+  /**
+   * @name Interface with FEValues
+   * @{
+   */
+
+protected:
+
+  /**
+   * Storage for internal data of this mapping. See Mapping::InternalDataBase
+   * for an extensive description.
+   *
+   * This includes data that is computed once when the object is created (in
+   * get_data()) as well as data the class wants to store from between the
+   * call to fill_fe_values(), fill_fe_face_values(), or
+   * fill_fe_subface_values() until possible later calls from the finite
+   * element to functions such as transform(). The latter class of member
+   * variables are marked as 'mutable'.
+   *
+   * The current class uses essentially the same fields for storage as the
+   * MappingQGeneric class. Consequently, it inherits from
+   * MappingQGeneric::InternalData, rather than from
+   * Mapping::InternalDataBase. The principal difference to
+   * MappingQGeneric::InternalData is that MappingQ switches between $Q_1$ and
+   * $Q_p$ mappings depending on the cell we are on, so the internal data
+   * object needs to also store a pointer to an InternalData object that
+   * pertains to a $Q_1$ mapping.
+   */
+  class InternalData : public Mapping<dim,spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    InternalData ();
+
+
+    /**
+     * Return an estimate (in bytes) or the memory consumption of this object.
+     */
+    virtual std::size_t memory_consumption () const;
+
+    /**
+     * Flag that is set by the <tt>fill_fe_[[sub]face]_values</tt> function.
+     *
+     * If this flag is @p true we are on an interior cell and the @p
+     * mapping_q1_data is used.
+     */
+    mutable bool use_mapping_q1_on_current_cell;
+
+    /**
+     * A pointer to a structure to store the information for the pure $Q_1$
+     * mapping that is, by default, used on all interior cells.
+     */
+    std_cxx11::unique_ptr<typename MappingQGeneric<dim,spacedim>::InternalData> mapping_q1_data;
+
+    /**
+     * A pointer to a structure to store the information for the full $Q_p$
+     * mapping that is, by default, used on all boundary cells.
+     */
+    std_cxx11::unique_ptr<typename MappingQGeneric<dim,spacedim>::InternalData> mapping_qp_data;
+  };
+
+protected:
+
+  // documentation can be found in Mapping::requires_update_flags()
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  // documentation can be found in Mapping::get_data()
+  virtual
+  InternalData *
+  get_data (const UpdateFlags,
+            const Quadrature<dim> &quadrature) const;
+
+  // documentation can be found in Mapping::get_face_data()
+  virtual
+  InternalData *
+  get_face_data (const UpdateFlags flags,
+                 const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::get_subface_data()
+  virtual
+  InternalData *
+  get_subface_data (const UpdateFlags flags,
+                    const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::fill_fe_values()
+  virtual
+  CellSimilarity::Similarity
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                  const CellSimilarity::Similarity                           cell_similarity,
+                  const Quadrature<dim>                                     &quadrature,
+                  const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                  internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_face_values()
+  virtual void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                       const unsigned int                                         face_no,
+                       const Quadrature<dim-1>                                   &quadrature,
+                       const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                       internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_subface_values()
+  virtual void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                          const unsigned int                                         face_no,
+                          const unsigned int                                         subface_no,
+                          const Quadrature<dim-1>                                   &quadrature,
+                          const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                          internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  /**
+   * @}
+   */
+
+protected:
+
+  /**
+   * The polynomial degree of the cells to be used on all cells at the
+   * boundary of the domain, or everywhere if so specified.
+   */
+  const unsigned int polynomial_degree;
+
+  /**
+   * If this flag is set @p true then @p MappingQ is used on all cells, not
+   * only on boundary cells.
+   */
+  const bool use_mapping_q_on_all_cells;
+
+  /**
+   * Pointer to a Q1 mapping. This mapping is used on interior cells unless
+   * use_mapping_q_on_all_cells was set in the call to the constructor. The
+   * mapping is also used on any cell in the transform_real_to_unit_cell() to
+   * compute a cheap initial guess for the position of the point before we
+   * employ the more expensive Newton iteration using the full mapping.
+   *
+   * @note MappingQEulerian resets this pointer to an object of type
+   * MappingQ1Eulerian to ensure that the Q1 mapping also knows about the
+   * proper shifts and transformations of the Eulerian displacements. This
+   * also means that we really need to store our own Q1 mapping here, rather
+   * than simply resorting to StaticMappingQ1::mapping.
+   *
+   * @note If the polynomial degree used for the current object is one, then
+   * the qp_mapping and q1_mapping variables point to the same underlying
+   * object.
+   */
+  std_cxx11::shared_ptr<const MappingQGeneric<dim,spacedim> > q1_mapping;
+
+  /**
+   * Pointer to a Q_p mapping. This mapping is used on boundary cells unless
+   * use_mapping_q_on_all_cells was set in the call to the constructor (in
+   * which case it is used for all cells).
+   *
+   * @note MappingQEulerian and MappingC1 reset this pointer to an object of
+   * their own implementation to ensure that the Q_p mapping also knows about
+   * the proper shifts and transformations of the Eulerian displacements
+   * (Eulerian case) and proper choice of support points (C1 case).
+   *
+   * @note If the polynomial degree used for the current object is one, then
+   * the qp_mapping and q1_mapping variables point to the same underlying
+   * object.
+   */
+  std_cxx11::shared_ptr<const MappingQGeneric<dim,spacedim> > qp_mapping;
+};
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping_q1.h b/include/deal.II/fe/mapping_q1.h
new file mode 100644
index 0000000..40e0061
--- /dev/null
+++ b/include/deal.II/fe/mapping_q1.h
@@ -0,0 +1,107 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_q1_h
+#define dealii__mapping_q1_h
+
+
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping_q_generic.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup mapping */
+/*@{*/
+
+
+/**
+ * Implementation of a $d$-linear mapping from the reference cell to a general
+ * quadrilateral/hexahedron.
+ *
+ * The mapping implemented by this class maps the reference (unit) cell to a
+ * general grid cell with straight lines in $d$ dimensions. (Note, however,
+ * that in 3D the <i>faces</i> of a general, trilinearly mapped cell may be
+ * curved, even if the edges are not). This is the standard mapping used for
+ * polyhedral domains. It is also the mapping used throughout deal.II for many
+ * functions that come in two variants, one that allows to pass a mapping
+ * argument explicitly and one that simply falls back to the MappingQ1 class
+ * declared here. (Or, in fact, to an object of kind MappingQGeneric(1), which
+ * implements exactly the functionality of this class.)
+ *
+ * The shape functions for this mapping are the same as for the finite element
+ * FE_Q of polynomial degree 1. Therefore, coupling these two yields an
+ * isoparametric element.
+ *
+ * @note This class is, in all reality, nothing more than a different name for
+ * calling MappingQGeneric with a polynomial degree of one as argument.
+ *
+ * @author Guido Kanschat, 2000, 2001; Ralf Hartmann, 2000, 2001, 2005,
+ * Wolfgang Bangerth, 2015
+ */
+template <int dim, int spacedim=dim>
+class MappingQ1 : public MappingQGeneric<dim,spacedim>
+{
+public:
+  /**
+   * Default constructor.
+   */
+  MappingQ1 ();
+
+  // for documentation, see the Mapping base class
+  virtual
+  MappingQ1<dim,spacedim> *clone () const;
+};
+
+
+
+/**
+ * Many places in the library by default use (bi-,tri-)linear mappings unless
+ * users explicitly provide a different mapping to use. In these cases, the
+ * called function has to create a $Q_1$ mapping object, i.e., an object of
+ * kind MappingQGeneric(1). This is costly. It would also be costly to create
+ * such objects as static objects in the affected functions, because static
+ * objects are never destroyed throughout the lifetime of a program, even
+ * though they only have to be created once the first time code runs through a
+ * particular function.
+ *
+ * In order to avoid creation of (static or dynamic) $Q_1$ mapping objects in
+ * these contexts throughout the library, this class defines a static $Q_1$
+ * mapping object. This object can then be used in all of those places where
+ * such an object is needed.
+ */
+template <int dim, int spacedim=dim>
+struct StaticMappingQ1
+{
+  /**
+   * The static $Q_1$ mapping object discussed in the documentation of this
+   * class.
+   */
+  static MappingQGeneric<dim, spacedim> mapping;
+};
+
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping_q1_eulerian.h b/include/deal.II/fe/mapping_q1_eulerian.h
new file mode 100644
index 0000000..356dba9
--- /dev/null
+++ b/include/deal.II/fe/mapping_q1_eulerian.h
@@ -0,0 +1,190 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_q1_eulerian_h
+#define dealii__mapping_q1_eulerian_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/array.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/fe/mapping_q1.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup mapping */
+/*@{*/
+
+/**
+ * Eulerian mapping of general unit cells by $d$-linear shape functions. Each
+ * cell is thus shifted in space by values given to the mapping through a
+ * finite element field.
+ *
+ * <h3>Usage</h3>
+ *
+ * The constructor of this class takes two arguments: a reference to the
+ * vector that defines the mapping from the reference configuration to the
+ * current configuration and a reference to the DoFHandler. The vector should
+ * then represent a (flattened out version of a) vector valued field defined
+ * at nodes defined by the the DoFHandler, where the number of components of
+ * the vector field equals the number of space dimensions. Thus, the
+ * DoFHandler shall operate on a finite element that has as many components as
+ * space dimensions. As an additional requirement, we impose that it have as
+ * many degree of freedom per vertex as there are space dimensions; since this
+ * object only evaluates the finite element field at the vertices, the values
+ * of all other degrees of freedom (not associated to vertices) are ignored.
+ * These requirements are met if the finite element which the given DoFHandler
+ * operates on is constructed as a system element (FESystem) from @p dim
+ * continuous FE_Q() objects.
+ *
+ * In many cases, the shift vector will also be the solution vector of the
+ * problem under investigation. If this is not the case (i.e. the number of
+ * components of the solution variable is not equal to the space dimension,
+ * e.g. for scalar problems in <tt>dim>1</tt> where the Eulerian coordinates
+ * only give a background field) or for coupled problems where more variables
+ * are computed than just the flow field), then a different DoFHandler has to
+ * be set up on the given triangulation, and the shift vector has then to be
+ * associated to it.
+ *
+ * An example is shown below:
+ * @code
+ *    FESystem<dim> fe(FE_Q<dim>(1), dim);
+ *    DoFHandler<dim> flowfield_dof_handler(triangulation);
+ *    flowfield_dof_handler.distribute_dofs(fe);
+ *    Vector<double> map_points(flowfield_dof_handler.n_dofs());
+ *    MappingQ1Eulerian<dim> mymapping(map_points, flowfield_dof_handler);
+ * @endcode
+ *
+ * Note that since the vector of shift values and the dof handler are only
+ * associated to this object at construction time, you have to make sure that
+ * whenever you use this object, the given objects still represent valid data.
+ *
+ * To enable the use of the MappingQ1Eulerian class also in the context of
+ * parallel codes using the PETSc wrapper classes, the type of the vector can
+ * be specified as template parameter <tt>EulerVectorType</tt> Not specifying
+ * this template argument in applications using the PETSc vector classes leads
+ * to the construction of a copy of the vector which is not acccessible
+ * afterwards!
+ *
+ * For more information about the <tt>spacedim</tt> template parameter check
+ * the documentation of FiniteElement or the one of Triangulation.
+ *
+ * @author Michael Stadler, 2001
+ */
+template <int dim, typename VectorType = Vector<double>, int spacedim=dim >
+class MappingQ1Eulerian : public MappingQGeneric<dim,spacedim>
+{
+public:
+
+  /**
+   * Constructor. It takes a <tt>Vector<double> &</tt> as its first argument
+   * to specify the transformation of the whole problem from the reference to
+   * the current configuration. The organization of the elements in the @p
+   * Vector must follow the concept how deal.II stores solutions that are
+   * associated to a triangulation.  This is automatically the case if the @p
+   * Vector represents the solution of the previous step of a nonlinear
+   * problem. Alternatively, the @p Vector can be initialized by
+   * <tt>DoFAccessor::set_dof_values()</tt>.
+   */
+  MappingQ1Eulerian (const VectorType  &euler_transform_vectors,
+                     const DoFHandler<dim,spacedim> &shiftmap_dof_handler);
+
+  /**
+   * Return the mapped vertices of the cell. For the current class, this
+   * function does not use the support points from the geometry of the current
+   * cell but instead evaluates an externally given displacement field in
+   * addition to the geometry of the cell.
+   */
+  virtual
+  std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+  get_vertices (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+
+  /**
+   * Return a pointer to a copy of the present object. The caller of this copy
+   * then assumes ownership of it.
+   */
+  virtual
+  MappingQ1Eulerian<dim,VectorType,spacedim> *clone () const;
+
+  /**
+   * Always returns @p false because MappingQ1Eulerian does not in general
+   * preserve vertex locations (unless the translation vector happens to
+   * provide for zero displacements at vertex locations).
+   */
+  bool preserves_vertex_locations () const;
+
+  /**
+   * Exception.
+   */
+  DeclException0 (ExcInactiveCell);
+
+
+
+protected:
+  /**
+   * Compute mapping-related information for a cell. See the documentation of
+   * Mapping::fill_fe_values() for a discussion of purpose, arguments, and
+   * return value of this function.
+   *
+   * This function overrides the function in the base class since we cannot
+   * use any cell similarity for this class.
+   */
+  virtual
+  CellSimilarity::Similarity
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                  const CellSimilarity::Similarity                           cell_similarity,
+                  const Quadrature<dim>                                     &quadrature,
+                  const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                  internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  /**
+   * Compute the support points of the mapping. For the current class, these
+   * are the vertices, as obtained by calling Mapping::get_vertices(). See the
+   * documentation of MappingQGeneric::compute_mapping_support_points() for
+   * more information.
+   */
+  virtual
+  std::vector<Point<spacedim> >
+  compute_mapping_support_points(const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+
+  /**
+   * Reference to the vector of shifts.
+   */
+  SmartPointer<const VectorType, MappingQ1Eulerian<dim,VectorType,spacedim> > euler_transform_vectors;
+
+  /**
+   * Pointer to the DoFHandler to which the mapping vector is associated.
+   */
+  SmartPointer<const DoFHandler<dim,spacedim>,MappingQ1Eulerian<dim,VectorType,spacedim> > shiftmap_dof_handler;
+};
+
+/*@}*/
+
+/*----------------------------------------------------------------------*/
+
+#ifndef DOXYGEN
+
+template <int dim, typename VectorType, int spacedim>
+inline
+bool
+MappingQ1Eulerian<dim,VectorType,spacedim>::preserves_vertex_locations () const
+{
+  return false;
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/fe/mapping_q_eulerian.h b/include/deal.II/fe/mapping_q_eulerian.h
new file mode 100644
index 0000000..e7bd5ce
--- /dev/null
+++ b/include/deal.II/fe/mapping_q_eulerian.h
@@ -0,0 +1,279 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mapping_q_eulerian_h
+#define dealii__mapping_q_eulerian_h
+
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup mapping */
+/*@{*/
+
+/**
+ * This class is an extension of the MappingQ1Eulerian class to higher order
+ * Qp mappings.  It is useful when one wants to calculate shape function
+ * information on a domain that is deforming as the computation proceeds.
+ *
+ * <h3>Usage</h3>
+ *
+ * The constructor of this class takes three arguments: the polynomial degree
+ * of the desire Qp mapping, a reference to the vector that defines the
+ * mapping from the initial configuration to the current configuration, and a
+ * reference to the DoFHandler. The most common case is to use the solution
+ * vector for the problem under consideration as the shift vector. The key
+ * requirement is that the number of components of the given vector field be
+ * equal to (or possibly greater than) the number of space dimensions. If
+ * there are more components than space dimensions (for example, if one is
+ * working with a coupled problem where there are additional solution
+ * variables), the first <tt>dim</tt> components are assumed to represent the
+ * displacement field, and the remaining components are ignored.  If this
+ * assumption does not hold one may need to set up a separate DoFHandler on
+ * the triangulation and associate the desired shift vector to it.
+ *
+ * Typically, the DoFHandler operates on a finite element that is constructed
+ * as a system element (FESystem) from continuous FE_Q() objects. An example
+ * is shown below:
+ * @code
+ *    FESystem<dim> fe(FE_Q<dim>(2), dim, FE_Q<dim>(1), 1);
+ *    DoFHandler<dim> dof_handler(triangulation);
+ *    dof_handler.distribute_dofs(fe);
+ *    Vector<double> displacement_field(dof_handler.n_dofs());
+ *    // ... compute displacement field somehow...
+ *    MappingQEulerian<dim> q2_mapping(2, dof_handler, displacement_field);
+ * @endcode
+ *
+ * In this example, our element consists of <tt>(dim+1)</tt> components. Only
+ * the first <tt>dim</tt> components will be used, however, to define the Q2
+ * mapping.  The remaining components are ignored.
+ *
+ * Note that it is essential to call the distribute_dofs(...) function before
+ * constructing a mapping object.
+ *
+ * Also note that since the vector of shift values and the dof handler are
+ * only associated to this object at construction time, you have to make sure
+ * that whenever you use this object, the given objects still represent valid
+ * data.
+ *
+ * To enable the use of the MappingQ1Eulerian class also in the context of
+ * parallel codes using the PETSc wrapper classes, the type of the vector can
+ * be specified as template parameter <tt>EulerVectorType</tt> Not specifying
+ * this template argument in applications using the PETSc vector classes leads
+ * to the construction of a copy of the vector which is not accessible
+ * afterwards!
+ *
+ * @author Joshua White, 2008
+ */
+template <int dim, typename VectorType = Vector<double>, int spacedim=dim >
+class MappingQEulerian : public MappingQ<dim, spacedim>
+{
+public:
+  /**
+   * Constructor.
+   *
+   * @param[in] degree The polynomial degree of the desired $Q_p$ mapping.
+   * @param[in] euler_dof_handler A DoFHandler object that defines a finite
+   * element space. This space needs to have at least dim components and the
+   * first dim components of the space will be considered displacements
+   * relative to the original positions of the cells of the triangulation.
+   * @param[in] euler_vector A finite element function in the space defined by
+   * the second argument. The first dim components of this function will be
+   * interpreted as the displacement we use in defining the mapping, relative
+   * to the location of cells of the underlying triangulation.
+   */
+  MappingQEulerian (const unsigned int             degree,
+                    const DoFHandler<dim,spacedim> &euler_dof_handler,
+                    const VectorType               &euler_vector);
+
+  /**
+   * @deprecated Use the constructor with the reverse order of second and
+   * third argument.
+   */
+  MappingQEulerian (const unsigned int             degree,
+                    const VectorType               &euler_vector,
+                    const DoFHandler<dim,spacedim> &euler_dof_handler) DEAL_II_DEPRECATED;
+
+  /**
+   * Return the mapped vertices of the cell. For the current class, this
+   * function does not use the support points from the geometry of the current
+   * cell but instead evaluates an externally given displacement field in
+   * addition to the geometry of the cell.
+   */
+  virtual
+  std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+  get_vertices (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+
+  /**
+   * Return a pointer to a copy of the present object. The caller of this copy
+   * then assumes ownership of it.
+   */
+  virtual
+  Mapping<dim,spacedim> *clone () const;
+
+  /**
+   * Always returns @p false because MappingQ1Eulerian does not in general
+   * preserve vertex locations (unless the translation vector happens to
+   * provide for zero displacements at vertex locations).
+   */
+  bool preserves_vertex_locations () const;
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInactiveCell);
+
+protected:
+  /**
+   * Compute mapping-related information for a cell. See the documentation of
+   * Mapping::fill_fe_values() for a discussion of purpose, arguments, and
+   * return value of this function.
+   *
+   * This function overrides the function in the base class since we cannot
+   * use any cell similarity for this class.
+   */
+  virtual
+  CellSimilarity::Similarity
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                  const CellSimilarity::Similarity                           cell_similarity,
+                  const Quadrature<dim>                                     &quadrature,
+                  const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                  internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const;
+
+  /**
+   * Reference to the vector of shifts.
+   */
+  SmartPointer<const VectorType, MappingQEulerian<dim,VectorType,spacedim> > euler_vector;
+
+  /**
+   * Pointer to the DoFHandler to which the mapping vector is associated.
+   */
+  SmartPointer<const DoFHandler<dim,spacedim>,MappingQEulerian<dim,VectorType,spacedim> > euler_dof_handler;
+
+
+private:
+
+  /**
+   * A class derived from MappingQGeneric that provides the generic mapping
+   * with support points on boundary objects so that the corresponding Q3
+   * mapping ends up being C1.
+   */
+  class MappingQEulerianGeneric : public MappingQGeneric<dim,spacedim>
+  {
+  public:
+
+    /**
+     * Constructor.
+     */
+    MappingQEulerianGeneric (const unsigned int degree,
+                             const MappingQEulerian<dim,VectorType,spacedim> &mapping_q_eulerian);
+
+    /**
+     * Return the mapped vertices of the cell. For the current class, this
+     * function does not use the support points from the geometry of the
+     * current cell but instead evaluates an externally given displacement
+     * field in addition to the geometry of the cell.
+     */
+    virtual
+    std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+    get_vertices (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+
+    /**
+     * Compute the positions of the support points in the current
+     * configuration. See the documentation of
+     * MappingQGeneric::compute_mapping_support_points() for more information.
+     */
+    virtual
+    std::vector<Point<spacedim> >
+    compute_mapping_support_points(const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+
+  private:
+    /**
+     * Reference to the surrounding object off of which we live.
+     */
+    const MappingQEulerian<dim,VectorType,spacedim> &mapping_q_eulerian;
+
+
+    /**
+     * Special quadrature rule used to define the support points in the
+     * reference configuration.
+     */
+
+    class SupportQuadrature : public Quadrature<dim>
+    {
+    public:
+      /**
+       * Constructor, with an argument defining the desired polynomial degree.
+       */
+
+      SupportQuadrature (const unsigned int map_degree);
+
+    };
+
+    /**
+     * A member variable holding the quadrature points in the right order.
+     */
+    const SupportQuadrature support_quadrature;
+
+    /**
+     * FEValues object used to query the the given finite element field at the
+     * support points in the reference configuration.
+     *
+     * The variable is marked as mutable since we have to call
+     * FEValues::reinit from compute_mapping_support_points, a function that
+     * is 'const'.
+     */
+    mutable FEValues<dim,spacedim> fe_values;
+
+    /**
+     * A variable to guard access to the fe_values variable.
+     */
+    mutable Threads::Mutex fe_values_mutex;
+  };
+
+};
+
+/*@}*/
+
+
+/*----------------------------------------------------------------------*/
+
+#ifndef DOXYGEN
+
+template <int dim, typename VectorType, int spacedim>
+inline
+bool
+MappingQEulerian<dim,VectorType,spacedim>::preserves_vertex_locations () const
+{
+  return false;
+}
+
+#endif // DOXYGEN
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // dealii__mapping_q_eulerian_h
diff --git a/include/deal.II/fe/mapping_q_generic.h b/include/deal.II/fe/mapping_q_generic.h
new file mode 100644
index 0000000..2c1e19e
--- /dev/null
+++ b/include/deal.II/fe/mapping_q_generic.h
@@ -0,0 +1,782 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_q_generic_h
+#define dealii__mapping_q_generic_h
+
+
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_q.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int,int> class MappingQ;
+
+
+/*!@addtogroup mapping */
+/*@{*/
+
+
+/**
+ * This class implements the functionality for polynomial mappings $Q_p$ of
+ * polynomial degree $p$ that will be used on all cells of the mesh. The
+ * MappingQ1 and MappingQ classes specialize this behavior slightly.
+ *
+ * The class is poorly named. It should really have been called MappingQ
+ * because it consistently uses $Q_p$ mappings on all cells of a
+ * triangulation. However, the name MappingQ was already taken when we rewrote
+ * the entire class hierarchy for mappings. One might argue that one should
+ * always use MappingQGeneric over the existing class MappingQ (which, unless
+ * explicitly specified during the construction of the object, only uses
+ * mappings of degree $p$ <i>on cells at the boundary of the domain</i>). On
+ * the other hand, there are good reasons to use MappingQ in many situations:
+ * in many situations, curved domains are only provided with information about
+ * how exactly edges at the boundary are shaped, but we do not know anything
+ * about internal edges. Thus, in the absence of other information, we can
+ * only assume that internal edges are straight lines, and in that case
+ * internal cells may as well be treated is bilinear quadrilaterals or
+ * trilinear hexahedra. (An example of how such meshes look is shown in step-1
+ * already, but it is also discussed in the "Results" section of step-6.)
+ * Because bi-/trilinear mappings are significantly cheaper to compute than
+ * higher order mappings, it is advantageous in such situations to use the
+ * higher order mapping only on cells at the boundary of the domain -- i.e.,
+ * the behavior of MappingQ. Of course, MappingQGeneric also uses bilinear
+ * mappings for interior cells as long as it has no knowledge about curvature
+ * of interior edges, but it implements this the expensive way: as a general
+ * $Q_p$ mapping where the mapping support points just <i>happen</i> to be
+ * arranged along linear or bilinear edges or faces.
+ *
+ * There are a number of special cases worth considering:
+ * - If you really want to use a higher order mapping for all cells,
+ * you can do this using the current class, but this only makes sense if you
+ * can actually provide information about how interior edges and faces of the
+ * mesh should be curved. This is typically done by associating a Manifold
+ * with interior cells and edges. A simple example of this is discussed in the
+ * "Results" section of step-6; a full discussion of manifolds is provided in
+ * step-53.
+ * - If you are working on meshes that describe a (curved) manifold
+ * embedded in higher space dimensions, i.e., if dim!=spacedim, then every
+ * cell is at the boundary of the domain you will likely already have attached
+ * a manifold object to all cells that can then also be used by the mapping
+ * classes for higher order mappings.
+ *
+ *
+ * @author Wolfgang Bangerth, 2015
+ */
+template <int dim, int spacedim=dim>
+class MappingQGeneric : public Mapping<dim,spacedim>
+{
+public:
+  /**
+   * Constructor.  @p polynomial_degree denotes the polynomial degree of the
+   * polynomials that are used to map cells from the reference to the real
+   * cell.
+   */
+  MappingQGeneric (const unsigned int polynomial_degree);
+
+  /**
+   * Copy constructor.
+   */
+  MappingQGeneric (const MappingQGeneric<dim,spacedim> &mapping);
+
+  // for documentation, see the Mapping base class
+  virtual
+  Mapping<dim,spacedim> *clone () const;
+
+  /**
+   * Return the degree of the mapping, i.e. the value which was passed to the
+   * constructor.
+   */
+  unsigned int get_degree () const;
+
+  /**
+   * Always returns @p true because the default implementation of functions in
+   * this class preserves vertex locations.
+   */
+  virtual
+  bool preserves_vertex_locations () const;
+
+  /**
+   * @name Mapping points between reference and real cells
+   * @{
+   */
+
+  // for documentation, see the Mapping base class
+  virtual
+  Point<spacedim>
+  transform_unit_to_real_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<dim>                                 &p) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  Point<dim>
+  transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                               const Point<spacedim>                            &p) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Functions to transform tensors from reference to real coordinates
+   * @{
+   */
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<1,dim> >                  &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<1,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<1, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<2,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<2, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<2,spacedim> >                   &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const DerivativeForm<2, dim, spacedim> > &input,
+             const MappingType                                         type,
+             const typename Mapping<dim,spacedim>::InternalDataBase   &internal,
+             const ArrayView<Tensor<3,spacedim> >                     &output) const;
+
+  // for documentation, see the Mapping base class
+  virtual
+  void
+  transform (const ArrayView<const Tensor<3, dim> >                 &input,
+             const MappingType                                       type,
+             const typename Mapping<dim,spacedim>::InternalDataBase &internal,
+             const ArrayView<Tensor<3,spacedim> >                   &output) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Interface with FEValues
+   * @{
+   */
+
+public:
+  /**
+   * Storage for internal data of polynomial mappings. See
+   * Mapping::InternalDataBase for an extensive description.
+   *
+   * For the current class, the InternalData class stores data that is
+   * computed once when the object is created (in get_data()) as well as data
+   * the class wants to store from between the call to fill_fe_values(),
+   * fill_fe_face_values(), or fill_fe_subface_values() until possible later
+   * calls from the finite element to functions such as transform(). The
+   * latter class of member variables are marked as 'mutable'.
+   */
+  class InternalData : public Mapping<dim,spacedim>::InternalDataBase
+  {
+  public:
+    /**
+     * Constructor. The argument denotes the polynomial degree of the mapping
+     * to which this object will correspond.
+     */
+    InternalData(const unsigned int polynomial_degree);
+
+    /**
+     * Initialize the object's member variables related to cell data based on
+     * the given arguments.
+     *
+     * The function also calls compute_shape_function_values() to actually set
+     * the member variables related to the values and derivatives of the
+     * mapping shape functions.
+     */
+    void
+    initialize (const UpdateFlags      update_flags,
+                const Quadrature<dim> &quadrature,
+                const unsigned int     n_original_q_points);
+
+    /**
+     * Initialize the object's member variables related to cell and face data
+     * based on the given arguments. In order to initialize cell data, this
+     * function calls initialize().
+     */
+    void
+    initialize_face (const UpdateFlags      update_flags,
+                     const Quadrature<dim> &quadrature,
+                     const unsigned int     n_original_q_points);
+
+    /**
+     * Compute the values and/or derivatives of the shape functions used for
+     * the mapping.
+     *
+     * Which values, derivatives, or higher order derivatives are computed is
+     * determined by which of the member arrays have nonzero sizes. They are
+     * typically set to their appropriate sizes by the initialize() and
+     * initialize_face() functions, which indeed call this function
+     * internally. However, it is possible (and at times useful) to do the
+     * resizing by hand and then call this function directly. An example is in
+     * a Newton iteration where we update the location of a quadrature point
+     * (e.g., in MappingQ::transform_real_to_uni_cell()) and need to re-
+     * compute the mapping and its derivatives at this location, but have
+     * already sized all internal arrays correctly.
+     */
+    void compute_shape_function_values (const std::vector<Point<dim> > &unit_points);
+
+
+    /**
+     * Shape function at quadrature point. Shape functions are in tensor
+     * product order, so vertices must be reordered to obtain transformation.
+     */
+    const double &shape (const unsigned int qpoint,
+                         const unsigned int shape_nr) const;
+
+    /**
+     * Shape function at quadrature point. See above.
+     */
+    double &shape (const unsigned int qpoint,
+                   const unsigned int shape_nr);
+
+    /**
+     * Gradient of shape function in quadrature point. See above.
+     */
+    const Tensor<1,dim> &derivative (const unsigned int qpoint,
+                                     const unsigned int shape_nr) const;
+
+    /**
+     * Gradient of shape function in quadrature point. See above.
+     */
+    Tensor<1,dim> &derivative (const unsigned int qpoint,
+                               const unsigned int shape_nr);
+
+    /**
+     * Second derivative of shape function in quadrature point. See above.
+     */
+    const Tensor<2,dim> &second_derivative (const unsigned int qpoint,
+                                            const unsigned int shape_nr) const;
+
+    /**
+     * Second derivative of shape function in quadrature point. See above.
+     */
+    Tensor<2,dim> &second_derivative (const unsigned int qpoint,
+                                      const unsigned int shape_nr);
+
+    /**
+     * third derivative of shape function in quadrature point. See above.
+     */
+    const Tensor<3,dim> &third_derivative (const unsigned int qpoint,
+                                           const unsigned int shape_nr) const;
+
+    /**
+     * third derivative of shape function in quadrature point. See above.
+     */
+    Tensor<3,dim> &third_derivative (const unsigned int qpoint,
+                                     const unsigned int shape_nr);
+
+    /**
+     * fourth derivative of shape function in quadrature point. See above.
+     */
+    const Tensor<4,dim> &fourth_derivative (const unsigned int qpoint,
+                                            const unsigned int shape_nr) const;
+
+    /**
+     * fourth derivative of shape function in quadrature point. See above.
+     */
+    Tensor<4,dim> &fourth_derivative (const unsigned int qpoint,
+                                      const unsigned int shape_nr);
+
+    /**
+     * Return an estimate (in bytes) or the memory consumption of this object.
+     */
+    virtual std::size_t memory_consumption () const;
+
+    /**
+     * Values of shape functions. Access by function @p shape.
+     *
+     * Computed once.
+     */
+    std::vector<double> shape_values;
+
+    /**
+     * Values of shape function derivatives. Access by function @p derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<1,dim> > shape_derivatives;
+
+    /**
+     * Values of shape function second derivatives. Access by function @p
+     * second_derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<2,dim> > shape_second_derivatives;
+
+    /**
+     * Values of shape function third derivatives. Access by function @p
+     * second_derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<3,dim> > shape_third_derivatives;
+
+    /**
+     * Values of shape function fourth derivatives. Access by function @p
+     * second_derivative.
+     *
+     * Computed once.
+     */
+    std::vector<Tensor<4,dim> > shape_fourth_derivatives;
+
+    /**
+     * Unit tangential vectors. Used for the computation of boundary forms and
+     * normal vectors.
+     *
+     * This vector has (dim-1)GeometryInfo::faces_per_cell entries. The first
+     * GeometryInfo::faces_per_cell contain the vectors in the first
+     * tangential direction for each face; the second set of
+     * GeometryInfo::faces_per_cell entries contain the vectors in the second
+     * tangential direction (only in 3d, since there we have 2 tangential
+     * directions per face), etc.
+     *
+     * Filled once.
+     */
+    std::vector<std::vector<Tensor<1,dim> > > unit_tangentials;
+
+    /**
+     * The polynomial degree of the mapping. Since the objects here are also
+     * used (with minor adjustments) by MappingQ, we need to store this.
+     */
+    unsigned int polynomial_degree;
+
+    /**
+     * Number of shape functions. If this is a Q1 mapping, then it is simply
+     * the number of vertices per cell. However, since also derived classes
+     * use this class (e.g. the Mapping_Q() class), the number of shape
+     * functions may also be different.
+     *
+     * In general, it is $(p+1)^\text{dim}$, where $p$ is the polynomial
+     * degree of the mapping.
+     */
+    const unsigned int n_shape_functions;
+
+    /**
+     * Tensors of covariant transformation at each of the quadrature points.
+     * The matrix stored is the Jacobian * G^{-1}, where G = Jacobian^{t} *
+     * Jacobian, is the first fundamental form of the map; if dim=spacedim
+     * then it reduces to the transpose of the inverse of the Jacobian matrix,
+     * which itself is stored in the @p contravariant field of this structure.
+     *
+     * Computed on each cell.
+     */
+    mutable std::vector<DerivativeForm<1,dim, spacedim > >  covariant;
+
+    /**
+     * Tensors of contravariant transformation at each of the quadrature
+     * points. The contravariant matrix is the Jacobian of the transformation,
+     * i.e. $J_{ij}=dx_i/d\hat x_j$.
+     *
+     * Computed on each cell.
+     */
+    mutable std::vector< DerivativeForm<1,dim,spacedim> > contravariant;
+
+    /**
+     * Auxiliary vectors for internal use.
+     */
+    mutable std::vector<std::vector<Tensor<1,spacedim> > > aux;
+
+    /**
+     * Stores the support points of the mapping shape functions on the @p
+     * cell_of_current_support_points.
+     */
+    mutable std::vector<Point<spacedim> > mapping_support_points;
+
+    /**
+     * Stores the cell of which the @p mapping_support_points are stored.
+     */
+    mutable typename Triangulation<dim,spacedim>::cell_iterator cell_of_current_support_points;
+
+    /**
+     * The determinant of the Jacobian in each quadrature point. Filled if
+     * #update_volume_elements.
+     */
+    mutable std::vector<double> volume_elements;
+  };
+
+
+  // documentation can be found in Mapping::requires_update_flags()
+  virtual
+  UpdateFlags
+  requires_update_flags (const UpdateFlags update_flags) const;
+
+  // documentation can be found in Mapping::get_data()
+  virtual
+  InternalData *
+  get_data (const UpdateFlags,
+            const Quadrature<dim> &quadrature) const;
+
+  // documentation can be found in Mapping::get_face_data()
+  virtual
+  InternalData *
+  get_face_data (const UpdateFlags flags,
+                 const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::get_subface_data()
+  virtual
+  InternalData *
+  get_subface_data (const UpdateFlags flags,
+                    const Quadrature<dim-1>& quadrature) const;
+
+  // documentation can be found in Mapping::fill_fe_values()
+  virtual
+  CellSimilarity::Similarity
+  fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator     &cell,
+                  const CellSimilarity::Similarity                               cell_similarity,
+                  const Quadrature<dim>                                         &quadrature,
+                  const typename Mapping<dim,spacedim>::InternalDataBase        &internal_data,
+                  dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_face_values()
+  virtual void
+  fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator     &cell,
+                       const unsigned int                                             face_no,
+                       const Quadrature<dim-1>                                       &quadrature,
+                       const typename Mapping<dim,spacedim>::InternalDataBase        &internal_data,
+                       dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &output_data) const;
+
+  // documentation can be found in Mapping::fill_fe_subface_values()
+  virtual void
+  fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator     &cell,
+                          const unsigned int                                             face_no,
+                          const unsigned int                                             subface_no,
+                          const Quadrature<dim-1>                                       &quadrature,
+                          const typename Mapping<dim,spacedim>::InternalDataBase        &internal_data,
+                          dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &output_data) const;
+
+  /**
+   * @}
+   */
+
+protected:
+
+  /**
+   * The degree of the polynomials used as shape functions for the mapping of
+   * cells.
+   */
+  const unsigned int polynomial_degree;
+
+  /*
+   * The default line support points. These are used when computing
+   * the location in real space of the support points on lines and
+   * quads, which are asked to the Manifold<dim,spacedim> class.
+   *
+   * The number of quadrature points depends on the degree of this
+   * class, and it matches the number of degrees of freedom of an
+   * FE_Q<1>(this->degree).
+   */
+  QGaussLobatto<1> line_support_points;
+
+  /**
+   * An FE_Q object which is only needed in 3D, since it knows how to reorder
+   * shape functions/DoFs on non-standard faces. This is used to reorder
+   * support points in the same way.
+   */
+  const std_cxx11::unique_ptr<FE_Q<dim> > fe_q;
+
+  /**
+   * A table of weights by which we multiply the locations of the support
+   * points on the perimeter of a quad to get the location of interior support
+   * points.
+   *
+   * Sizes: support_point_weights_on_quad.size()= number of inner
+   * unit_support_points support_point_weights_on_quad[i].size()= number of
+   * outer unit_support_points, i.e.  unit_support_points on the boundary of
+   * the quad
+   *
+   * For the definition of this vector see equation (8) of the `mapping'
+   * report.
+   */
+  Table<2,double> support_point_weights_on_quad;
+
+  /**
+   * A table of weights by which we multiply the locations of the support
+   * points on the perimeter of a hex to get the location of interior support
+   * points.
+   *
+   * For the definition of this vector see equation (8) of the `mapping'
+   * report.
+   */
+  Table<2,double> support_point_weights_on_hex;
+
+  /**
+   * Return the locations of support points for the mapping. For example, for
+   * $Q_1$ mappings these are the vertices, and for higher order polynomial
+   * mappings they are the vertices plus interior points on edges, faces, and
+   * the cell interior that are placed in consultation with the Manifold
+   * description of the domain and its boundary. However, other classes may
+   * override this function differently. In particular, the MappingQ1Eulerian
+   * class does exactly this by not computing the support points from the
+   * geometry of the current cell but instead evaluating an externally given
+   * displacement field in addition to the geometry of the cell.
+   *
+   * The default implementation of this function is appropriate for most
+   * cases. It takes the locations of support points on the boundary of the
+   * cell from the underlying manifold. Interior support points (ie. support
+   * points in quads for 2d, in hexes for 3d) are then computed using the
+   * solution of a Laplace equation with the position of the outer support
+   * points as boundary values, in order to make the transformation as smooth
+   * as possible.
+   *
+   * The function works its way from the vertices (which it takes from the
+   * given cell) via the support points on the line (for which it calls the
+   * add_line_support_points() function) and the support points on the quad
+   * faces (in 3d, for which it calls the add_quad_support_points() function).
+   * It then adds interior support points that are either computed by
+   * interpolation from the surrounding points using weights computed by
+   * solving a Laplace equation, or if dim<spacedim, it asks the underlying
+   * manifold for the locations of interior points.
+   */
+  virtual
+  std::vector<Point<spacedim> >
+  compute_mapping_support_points (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+
+  /**
+   * Transforms the point @p p on the real cell to the corresponding point on
+   * the unit cell @p cell by a Newton iteration.
+   */
+  Point<dim>
+  transform_real_to_unit_cell_internal (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                        const Point<spacedim> &p,
+                                        const Point<dim> &initial_p_unit) const;
+
+  /**
+   * For <tt>dim=2,3</tt>. Append the support points of all shape functions
+   * located on bounding lines of the given cell to the vector @p a. Points
+   * located on the vertices of a line are not included.
+   *
+   * Needed by the @p compute_support_points() function. For <tt>dim=1</tt>
+   * this function is empty. The function uses the underlying manifold object
+   * of the line (or, if none is set, of the cell) for the location of the
+   * requested points.
+   *
+   * This function is made virtual in order to allow derived classes to choose
+   * shape function support points differently than the present class, which
+   * chooses the points as interpolation points on the boundary.
+   */
+  virtual
+  void
+  add_line_support_points (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                           std::vector<Point<spacedim> > &a) const;
+
+  /**
+   * For <tt>dim=3</tt>. Append the support points of all shape functions
+   * located on bounding faces (quads in 3d) of the given cell to the vector
+   * @p a. Points located on the vertices or lines of a quad are not included.
+   *
+   * Needed by the @p compute_support_points() function. For <tt>dim=1</tt>
+   * and <tt>dim=2</tt> this function is empty. The function uses the
+   * underlying manifold object of the quad (or, if none is set, of the cell)
+   * for the location of the requested points.
+   *
+   * This function is made virtual in order to allow derived classes to choose
+   * shape function support points differently than the present class, which
+   * chooses the points as interpolation points on the boundary.
+   */
+  virtual
+  void
+  add_quad_support_points(const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                          std::vector<Point<spacedim> > &a) const;
+
+  /**
+   * Make MappingQ a friend since it needs to call the fill_fe_values()
+   * functions on its MappingQGeneric(1) sub-object.
+   */
+  template <int, int> friend class MappingQ;
+};
+
+
+
+/*@}*/
+
+/*----------------------------------------------------------------------*/
+
+#ifndef DOXYGEN
+
+template<int dim, int spacedim>
+inline
+const double &
+MappingQGeneric<dim,spacedim>::InternalData::shape (const unsigned int qpoint,
+                                                    const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_values.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_values.size()));
+  return shape_values [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template<int dim, int spacedim>
+inline
+double &
+MappingQGeneric<dim,spacedim>::InternalData::shape (const unsigned int qpoint,
+                                                    const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_values.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_values.size()));
+  return shape_values [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template<int dim, int spacedim>
+inline
+const Tensor<1,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::derivative (const unsigned int qpoint,
+                                                         const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_derivatives.size()));
+  return shape_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template<int dim, int spacedim>
+inline
+Tensor<1,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::derivative (const unsigned int qpoint,
+                                                         const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_derivatives.size()));
+  return shape_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<2,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::second_derivative (const unsigned int qpoint,
+    const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_second_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_second_derivatives.size()));
+  return shape_second_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim>
+inline
+Tensor<2,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::second_derivative (const unsigned int qpoint,
+    const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_second_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_second_derivatives.size()));
+  return shape_second_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+template <int dim, int spacedim>
+inline
+const Tensor<3,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::third_derivative (const unsigned int qpoint,
+    const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_third_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_third_derivatives.size()));
+  return shape_third_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim>
+inline
+Tensor<3,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::third_derivative (const unsigned int qpoint,
+    const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_third_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_third_derivatives.size()));
+  return shape_third_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim>
+inline
+const Tensor<4,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::fourth_derivative (const unsigned int qpoint,
+    const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_fourth_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_fourth_derivatives.size()));
+  return shape_fourth_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim>
+inline
+Tensor<4,dim> &
+MappingQGeneric<dim,spacedim>::InternalData::fourth_derivative (const unsigned int qpoint,
+    const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_fourth_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_fourth_derivatives.size()));
+  return shape_fourth_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+MappingQGeneric<dim,spacedim>::preserves_vertex_locations () const
+{
+  return true;
+}
+
+#endif // DOXYGEN
+
+/* -------------- declaration of explicit specializations ------------- */
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/cell_id.h b/include/deal.II/grid/cell_id.h
new file mode 100644
index 0000000..2e1e3b1
--- /dev/null
+++ b/include/deal.II/grid/cell_id.h
@@ -0,0 +1,178 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__cell_id_h
+#define dealii__cell_id_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <vector>
+#include <iostream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A class to represent a unique ID for a cell in a Triangulation.  This class
+ * stores the index of the coarse cell from which a cell is descendant,
+ * together with information on how to reach the cell from that coarse cell
+ * (i.e., which child index to take on each level when moving from one cell to
+ * its children). The important point about this class is that an object of
+ * the current class uniquely identifies a cell in triangulation, and it even
+ * does so in the context of objects of type
+ * parallel::distributed::Triangulation where the local portion of a mesh may
+ * not store all cells. For example, the CellId computed for a ghost cell on
+ * one processor will be exactly the same as the CellId computed for the very
+ * same cell on the processor that actually owns the cell, although the level
+ * and index of the iterators pointing to that cell <i>within the
+ * triangulation stored on each of the processors</i> may (and in general
+ * will) be different. In other words, CellId provides the tool with which it
+ * is possible to uniquely identify cells in a parallel triangulation, and
+ * consequently makes it possible to exchange data between processors tied to
+ * individual cells.
+ *
+ * @note How this data is internally represented is not of importance (and not
+ * exposed on purpose).
+ *
+ * @todo Does it make sense to implement a more efficient representation
+ * (internally and/or as a string)? If yes, something like a 64bit int as in
+ * p4est would be a good option.
+ */
+class CellId
+{
+public:
+  /**
+   * construct CellId with a given coarse_cell_index and list of child indices
+   */
+  explicit CellId(unsigned int coarse_cell_id_, std::vector<unsigned char> id_)
+    : coarse_cell_id(coarse_cell_id_), id(id_)
+  {}
+
+  /**
+   * construct an empty CellId.
+   */
+  CellId()
+    : coarse_cell_id(-1)
+  {}
+
+  /**
+   * Return a string representation of this CellId.
+   */
+  std::string to_string() const;
+
+  /**
+   * compare two CellIds
+   */
+  bool operator== (const CellId &other) const;
+
+  /**
+   * compare two CellIds
+   */
+  bool operator!= (const CellId &other) const;
+
+  /**
+   * compare two CellIds
+   */
+  bool operator<(const CellId &other) const;
+
+  friend std::istream &operator>> (std::istream &is, CellId &cid);
+  friend std::ostream &operator<< (std::ostream &os, const CellId &cid);
+private:
+  unsigned int coarse_cell_id;
+  std::vector<unsigned char> id;
+};
+
+/**
+ * output CellId into a stream
+ */
+inline std::ostream &operator<< (std::ostream &os, const CellId &cid)
+{
+  os << cid.coarse_cell_id << '_' << cid.id.size() << ':';
+  for (unsigned int i=0; i<cid.id.size(); ++i)
+    os << static_cast<int>(cid.id[i]);
+  return os;
+}
+
+/**
+ * read CellId from a stream
+ */
+inline std::istream &operator>> (std::istream &is, CellId &cid)
+{
+  unsigned int cellid;
+  is >> cellid;
+  if (is.eof())
+    return is;
+
+  cid.coarse_cell_id = cellid;
+  char dummy;
+  is >> dummy;
+  Assert(dummy=='_', ExcMessage("invalid CellId"));
+  unsigned int idsize;
+  is >> idsize;
+  is >> dummy;
+  Assert(dummy==':', ExcMessage("invalid CellId"));
+
+  char value;
+  cid.id.clear();
+  for (unsigned int i=0; i<idsize; ++i)
+    {
+      is >> value;
+      cid.id.push_back(value-'0');
+    }
+  return is;
+}
+
+inline bool
+CellId::operator== (const CellId &other) const
+{
+  if (this->coarse_cell_id != other.coarse_cell_id)
+    return false;
+  return id == other.id;
+}
+
+inline bool
+CellId::operator!= (const CellId &other) const
+{
+  return !(*this == other);
+}
+
+inline
+bool CellId::operator<(const CellId &other) const
+{
+  if (this->coarse_cell_id != other.coarse_cell_id)
+    return this->coarse_cell_id < other.coarse_cell_id;
+
+  unsigned int idx = 0;
+  while (idx < id.size())
+    {
+      if (idx>=other.id.size())
+        return false;
+
+      if (id[idx] != other.id[idx])
+        return id[idx] < other.id[idx];
+
+      ++idx;
+    }
+
+  if (id.size() == other.id.size())
+    return false;
+  return true; // other.id is longer
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/filtered_iterator.h b/include/deal.II/grid/filtered_iterator.h
new file mode 100644
index 0000000..0be6f66
--- /dev/null
+++ b/include/deal.II/grid/filtered_iterator.h
@@ -0,0 +1,1227 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__filtered_iterator_h
+#define dealii__filtered_iterator_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/grid/tria_iterator_base.h>
+
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * In this namespace a number of classes is declared that may be used as
+ * filters in the FilteredIterator class. The filters either check for binary
+ * information (for example, the IteratorFilters::Active filter class checks
+ * whether the object pointed to is active), or for valued information by
+ * comparison with prescribed values (for example, the LevelEqualTo filter
+ * class checks whether the level of the object pointed to by the iterator
+ * under consideration is equal to a value that was given to the filter upon
+ * construction.
+ *
+ * For examples of use of these classes as well as requirements on filters see
+ * the general description of the FilteredIterator class.
+ *
+ * @ingroup Iterators
+ * @author Wolfgang Bangerth, 2002
+ */
+namespace IteratorFilters
+{
+  /**
+   * Filter that evaluates to true if either the iterator points to an active
+   * object or an iterator past the end.
+   *
+   * @ingroup Iterators
+   */
+  class Active
+  {
+  public:
+    /**
+     * Evaluate the iterator and return true if the object is active or past
+     * the end.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+  };
+
+  /**
+   * Filter that evaluates to true if either the iterator points to an object
+   * for which the user flag is set or an iterator past the end. See
+   * @ref GlossUserFlags
+   * for information about user flags.
+   *
+   * @ingroup Iterators
+   */
+  class UserFlagSet
+  {
+  public:
+    /**
+     * Evaluate the iterator and return true if the object has a set user flag
+     * or past the end.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+  };
+
+
+  /**
+   * Filter that evaluates to true if either the iterator points to an object
+   * for which the user flag is not set or an iterator past the end. Inverse
+   * filter to the previous class.
+   *
+   * @ingroup Iterators
+   */
+  class UserFlagNotSet
+  {
+  public:
+    /**
+     * Evaluate the iterator and return true if the object has an unset user
+     * flag or past the end.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+  };
+
+
+  /**
+   * Filter for iterators that evaluates to true if either the iterator is
+   * past the end or the level of the object pointed to is equal to a value
+   * given to the constructor.
+   *
+   * @ingroup Iterators
+   */
+  class LevelEqualTo
+  {
+  public:
+    /**
+     * Constructor. Store the level which iterators shall have to be evaluated
+     * to true.
+     */
+    LevelEqualTo (const unsigned int level);
+
+    /**
+     * Evaluation operator. Returns true if either the level of the object
+     * pointed to is equal to the stored value or the iterator is past the
+     * end.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+
+  protected:
+    /**
+     * Stored value to compare the level with.
+     */
+    const unsigned int level;
+  };
+
+
+
+  /**
+   * Filter for iterators that evaluates to true if either the iterator is
+   * past the end or the subdomain id of the object pointed to is equal to a
+   * value given to the constructor, assuming that the iterator allows
+   * querying for a subdomain id.
+   *
+   * @ingroup Iterators
+   */
+  class SubdomainEqualTo
+  {
+  public:
+    /**
+     * Constructor. Store the subdomain which iterators shall have to be
+     * evaluated to true.
+     */
+    SubdomainEqualTo (const types::subdomain_id subdomain_id);
+
+    /**
+     * Evaluation operator. Returns true if either the subdomain of the object
+     * pointed to is equal to the stored value or the iterator is past the
+     * end.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+
+  protected:
+    /**
+     * Stored value to compare the subdomain with.
+     */
+    const types::subdomain_id subdomain_id;
+  };
+
+
+
+  /**
+   * Filter for iterators that evaluates to true if a cell is owned by the
+   * current processor, i.e., if it is a
+   * @ref GlossLocallyOwnedCell "locally owned cell".
+   *
+   * This class is used in step-32, in connection with the methods of the
+   * @ref distributed
+   * module.
+   *
+   * @ingroup Iterators
+   */
+  class LocallyOwnedCell
+  {
+  public:
+    /**
+     * Evaluation operator. Returns true if the cell is locally owned.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+  };
+
+
+
+  /**
+   * Filter for iterators that evaluates to true if the level subdomain id of
+   * a cell is equal to the current processor id.
+   *
+   * @ingroup Iterators
+   */
+  class LocallyOwnedLevelCell
+  {
+  public:
+    /**
+     * Evaluation operator. Returns true if the level subdomain id of the cell
+     * is equal to the current processor id.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+  };
+
+
+  /**
+   * Filter for iterators that evaluates to true if the iterator of the object
+   * pointed to is equal to a value or set of values given to the constructor,
+   * assuming that the iterator allows querying for a material id.
+   *
+   * @author Jean-Paul Pelteret, Denis Davydov, 2015
+   *
+   * @ingroup Iterators
+   */
+  class MaterialIdEqualTo
+  {
+  public:
+    /**
+     * Constructor. Store the material id which iterators shall have to be
+     * evaluated to true and state if the iterator must be locally owned.
+     */
+    MaterialIdEqualTo (const types::material_id material_id,
+                       const bool only_locally_owned = false);
+
+    /**
+     * Constructor. Store a collection of material ids which iterators shall
+     * have to be evaluated to true and state if the iterator must be locally
+     * owned.
+     */
+    MaterialIdEqualTo (const std::set<types::material_id> material_ids,
+                       const bool only_locally_owned = false);
+
+    /**
+     * Evaluation operator. Returns true if the material id of the object
+     * pointed to is equal within the stored set of value allowable values
+     * and, if required, if the cell is locally owned.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+
+  protected:
+    /**
+     * Stored value to compare the material id with.
+     */
+    const std::set<types::material_id> material_ids;
+    /**
+     * Flag stating whether only locally owned cells must return true.
+     */
+    const bool only_locally_owned;
+  };
+
+  /**
+   * Filter for iterators that evaluates to true if the iterator of the object
+   * pointed to is equal to a value or set of values given to the constructor,
+   * assuming that the iterator allows querying for an active FE index.
+   *
+   * @author Jean-Paul Pelteret, Denis Davydov, 2015
+   *
+   * @ingroup Iterators
+   */
+  class ActiveFEIndexEqualTo
+  {
+  public:
+    /**
+     * Constructor. Store the active FE index which iterators shall have to be
+     * evaluated to true and state if the iterator must be locally owned.
+     */
+    ActiveFEIndexEqualTo (const unsigned int active_fe_index,
+                          const bool only_locally_owned = false);
+
+    /**
+     * Constructor. Store a collection of active FE indices which iterators
+     * shall have to be evaluated to true and state if the iterator must be
+     * locally owned.
+     */
+    ActiveFEIndexEqualTo (const std::set<unsigned int> active_fe_indices,
+                          const bool only_locally_owned = false);
+
+    /**
+     * Evaluation operator. Returns true if the active FE index of the object
+     * pointed to is equal within the stored set of value allowable values
+     * and, if required, if the cell is locally owned.
+     */
+    template <class Iterator>
+    bool operator () (const Iterator &i) const;
+
+  protected:
+    /**
+     * Stored value to compare the material id with.
+     */
+    const std::set<unsigned int> active_fe_indices;
+    /**
+     * Flag stating whether only locally owned cells must return true.
+     */
+    const bool only_locally_owned;
+  };
+}
+
+
+/**
+ * This class provides a certain view on a range of triangulation or
+ * DoFHandler iterators by only iterating over elements that satisfy a given
+ * filter (called a <em>predicate</em>, following the notation of the C++
+ * standard library). Once initialized with a predicate and a value for the
+ * iterator, a filtered iterator hops to the next or previous element that
+ * satisfies the predicate if operators ++ or -- are invoked. Intermediate
+ * iterator values that lie in between but do not satisfy the predicate are
+ * skipped. It is thus very simple to write loops over a certain class of
+ * objects without the need to explicitly write down the condition they have
+ * to satisfy in each loop iteration. This in particular is helpful if
+ * functions are called with a pair of iterators denoting a range on which
+ * they shall act, by choosing a filtered iterator instead of usual ones.
+ *
+ * This class is used in step-18 and step-32.
+ *
+ *
+ * <h3>Predicates</h3>
+ *
+ * The object that represent the condition an iterator has to satisfy only
+ * have to provide an interface that allows to call the evaluation operator,
+ * i.e. <code>bool operator() (const BaseIterator&)</code>. This includes
+ * function pointers as well as classes that implement an <code>bool operator
+ * ()(const BaseIterator&)</code>. Then, the FilteredIterator will skip all
+ * objects where the return value of this function is <code>false</code>.
+ *
+ *
+ * An example of a simple valid predicate is the following: given the function
+ * @code
+ *   template <typename BIterator>
+ *   bool level_equal_to_3 (const BIterator& c)
+ *   {
+ *     return (static_cast<unsigned int>(c->level()) == 3);
+ *   };
+ * @endcode
+ * then
+ * @code
+ *   &level_equal_to_3<typename Triangulation<dim>::active_cell_iterator>
+ * @endcode
+ * is a valid predicate.
+ *
+ * Likewise, given the following binary function
+ * @code
+ *   template <typename BIterator>
+ *   bool level_equal_to (const BIterator&     c,
+ *                        const unsigned int level)
+ *   {
+ *     return (static_cast<unsigned int>(c->level()) == level);
+ *   };
+ * @endcode
+ * then
+ * @code
+ *   std::bind2nd (std::ptr_fun(&level_equal_to<active_cell_iterator>), 3)
+ * @endcode
+ * is another valid predicate (here: a function that returns true if either
+ * the iterator is past the end or the level is equal to the second argument;
+ * this second argument is bound to a fixed value using the @p std::bind2nd
+ * function).
+ *
+ * Finally, classes can be predicates. The following class is one:
+ * @code
+ *   class Active
+ *   {
+ *     public:
+ *       template <class Iterator>
+ *       bool operator () (const Iterator &i) const {
+ *         return (i->active());
+ *       }
+ *   };
+ * @endcode
+ * and objects of this type can be used as predicates. Likewise, this more
+ * complicated one can also be used:
+ * @code
+ *   class SubdomainEqualTo
+ *   {
+ *     public:
+ *       SubdomainEqualTo (const types::subdomain_id subdomain_id)
+ *                   : subdomain_id (subdomain_id) {};
+ *
+ *       template <class Iterator>
+ *       bool operator () (const Iterator &i) const {
+ *         return (i->subdomain_id() == subdomain_id);
+ *       }
+ *
+ *     private:
+ *       const types::subdomain_id subdomain_id;
+ *   };
+ * @endcode
+ * Objects like <code>SubdomainEqualTo(3)</code> can then be used as
+ * predicates.
+ *
+ * Since whenever a predicate is evaluated it is checked that the iterator
+ * checked is actually valid (i.e. not past the end), no checks for this case
+ * have to be performed inside predicates.
+ *
+ * A number of filter classes are already implemented in the IteratorFilters
+ * namespace, but writing different ones is simple following the examples
+ * above.
+ *
+ *
+ * <h3>Initialization of filtered iterators</h3>
+ *
+ * Filtered iterators are given a predicate at construction time which cannot
+ * be changed any more. This behaviour would be expected if the predicate
+ * would have been given as a template parameter to the class, but since that
+ * would make the declaration of filtered iterators a nightmare, we rather
+ * give the predicate as an unchangeable entity to the constructor. Note that
+ * one can assign a filtered iterator with one predicate to another filtered
+ * iterator with another type; yet, this does <em>not</em> change the
+ * predicate of the assigned-to iterator, only the pointer indicating the
+ * iterator is changed.
+ *
+ * If a filtered iterator is not assigned a value of the underlying
+ * (unfiltered) iterator type, the default value is taken. If, however, a
+ * value is given to the constructor, that value has either to be past the
+ * end, or has to satisfy the predicate. For example, if the predicate only
+ * evaluates to true if the level of an object is equal to three, then
+ * <code>tria.begin_active(3)</code> would be a valid choice while
+ * <code>tria.begin()</code> would not since the latter also returns iterators
+ * to non-active cells which always start at level 0.
+ *
+ * Since one often only has some iterator and wants to set a filtered iterator
+ * to the first one that satisfies a predicate (for example, the first one for
+ * which the user flag is set, or the first one with a given subdomain id),
+ * there are assignement functions #set_to_next_positive and
+ * #set_to_previous_positive that assign the next or last previous iterator
+ * that satisfies the predicate, i.e. they follow the list of iterators in
+ * either direction until they find a matching one (or the past-the-end
+ * iterator). Like the <code>operator=</code> they return the resulting value
+ * of the filtered iterator.
+ *
+ *
+ * <h3>Examples</h3>
+ *
+ * The following call counts the number of active cells that have a set user
+ * flag:
+ * @code
+ *   FilteredIterator<typename Triangulation<dim>::active_cell_iterator>
+ *      begin (IteratorFilters::UserFlagSet()),
+ *      end (IteratorFilters::UserFlagSet());
+ *   begin.set_to_next_positive(tria.begin_active());
+ *   end = tria.end();
+ *   n_flagged_cells = std::distance (begin, end);
+ * @endcode
+ * Note that by the @p set_to_next_positive call the first cell with a set
+ * user flag was assigned to the @p begin iterator. For the end iterator, no
+ * such call was necessary, since the past-the-end iterator always satisfies
+ * all predicates.
+ *
+ * The same can be achieved by the following snippet, though harder to read:
+ * @code
+ *   typedef FilteredIterator<typename Triangulation<dim>::active_cell_iterator> FI;
+ *   n_flagged_cells =
+ *      std::distance (FI(IteratorFilters::UserFlagSet())
+ *                            .set_to_next_positive(tria.begin_active()),
+ *                     FI(IteratorFilters::UserFlagSet(), tria.end()));
+ * @endcode
+ * It relies on the fact that if we create an unnamed filtered iterator with a
+ * given predicate but no iterator value and assign it the next positive value
+ * with respect to this predicate, it returns itself which is then used as the
+ * first parameter to the @p std::distance function. This procedure is not
+ * necessary for the end element to this function here, since the past-the-end
+ * iterator always satisfies the predicate so that we can assign this value to
+ * the filtered iterator directly in the constructor.
+ *
+ * Finally, the following loop only assembles the matrix on cells with
+ * subdomain id equal to three:
+ * @code
+ * FilteredIterator<typename Triangulation<dim>::active_cell_iterator>
+ *   cell (IteratorFilters::SubdomainEqualTo(3)),
+ *   endc (IteratorFilters::SubdomainEqualTo(3), tria.end());
+ * cell.set_to_next_positive (tria.begin_active());
+ * for (; cell!=endc; ++cell)
+ *   assemble_local_matrix (cell);
+ * @endcode
+ *
+ * Since comparison between filtered and unfiltered iterators is defined, we
+ * could as well have let the @p endc variable in the last example be of type
+ * Triangulation::active_cell_iterator since it is unchanged and its value
+ * does not depend on the filter.
+ *
+ * @ingroup grid
+ * @ingroup Iterators
+ * @author Wolfgang Bangerth, 2002
+ */
+template <typename BaseIterator>
+class FilteredIterator : public BaseIterator
+{
+public:
+  /**
+   * Typedef to the accessor type of the underlying iterator.
+   */
+  typedef typename BaseIterator::AccessorType AccessorType;
+
+  /**
+   * Constructor. Set the iterator to the default state and use the given
+   * predicate for filtering subsequent assignement and iteration.
+   */
+  template <typename Predicate>
+  FilteredIterator (Predicate p);
+
+  /**
+   * Constructor. Use the given predicate for filtering and initialize the
+   * iterator with the given value.
+   *
+   * If the initial value @p bi does not satisfy the predicate @p p then it is
+   * advanced until we either hit the the past-the-end iterator, or the
+   * predicate is satisfied. This allows, for example, to write code like
+   * @code
+   *   FilteredIterator<typename Triangulation<dim>::active_cell_iterator>
+   *     cell (IteratorFilters::SubdomainEqualTo(13),
+   *           triangulation.begin_active());
+   * @endcode
+   *
+   * If the cell <code>triangulation.begin_active()</code> does not have a
+   * subdomain_id equal to 13, then the iterator will automatically be
+   * advanced to the first cell that has.
+   */
+  template <typename Predicate>
+  FilteredIterator (Predicate           p,
+                    const BaseIterator &bi);
+
+  /**
+   * Copy constructor. Copy the predicate and iterator value of the given
+   * argument.
+   */
+  FilteredIterator (const FilteredIterator &fi);
+
+  /**
+   * Destructor.
+   */
+  ~FilteredIterator ();
+
+  /**
+   * Assignment operator. Copy the iterator value of the argument, but as
+   * discussed in the class documentation, the predicate of the argument is
+   * not copied. The iterator value underlying the argument has to satisfy the
+   * predicate of the object assigned to, as given at its construction time.
+   */
+  FilteredIterator &operator = (const FilteredIterator &fi);
+
+  /**
+   * Assignment operator. Copy the iterator value of the argument, and keep
+   * the predicate of this object. The given iterator value has to satisfy the
+   * predicate of the object assigned to, as given at its construction time.
+   */
+  FilteredIterator &operator = (const BaseIterator &fi);
+
+  /**
+   * Search for the next iterator from @p bi onwards that satisfies the
+   * predicate of this object and assign it to this object.
+   *
+   * Since filtered iterators are automatically converted to the underlying
+   * base iterator type, you can also give a filtered iterator as argument to
+   * this function.
+   */
+  FilteredIterator &
+  set_to_next_positive (const BaseIterator &bi);
+
+  /**
+   * As above, but search for the previous iterator from @p bi backwards that
+   * satisfies the predicate of this object and assign it to this object.
+   *
+   * Since filtered iterators are automatically converted to the underlying
+   * base iterator type, you can also give a filtered iterator as argument to
+   * this function.
+   */
+  FilteredIterator &
+  set_to_previous_positive (const BaseIterator &bi);
+
+  /**
+   * Compare for equality of the underlying iterator values of this and the
+   * given object.
+   *
+   * We do not compare for equality of the predicates.
+   */
+  bool operator == (const FilteredIterator &fi) const;
+
+  /**
+   * Compare for equality of the underlying iterator value of this object with
+   * the given object.
+   *
+   * The predicate of this object is irrelevant for this operation.
+   */
+  bool operator == (const BaseIterator &fi) const;
+
+  /**
+   * Compare for inequality of the underlying iterator values of this and the
+   * given object.
+   *
+   * We do not compare for equality of the predicates.
+   */
+  bool operator != (const FilteredIterator &fi) const;
+
+  /**
+   * Compare for inequality of the underlying iterator value of this object
+   * with the given object.
+   *
+   * The predicate of this object is irrelevant for this operation.
+   */
+  bool operator != (const BaseIterator &fi) const;
+
+  /**
+   * Compare for ordering of the underlying iterator values of this and the
+   * given object.
+   *
+   * We do not compare the predicates.
+   */
+  bool operator <  (const FilteredIterator &fi) const;
+
+  /**
+   * Compare for ordering of the underlying iterator value of this object with
+   * the given object.
+   *
+   * The predicate of this object is irrelevant for this operation.
+   */
+  bool operator <  (const BaseIterator &fi) const;
+
+  /**
+   * Prefix advancement operator: move to the next iterator value satisfying
+   * the predicate and return the new iterator value.
+   */
+  FilteredIterator &operator ++ ();
+
+  /**
+   * Postfix advancement operator: move to the next iterator value satisfying
+   * the predicate and return the old iterator value.
+   */
+  FilteredIterator   operator ++ (int);
+
+  /**
+   * Prefix decrement operator: move to the previous iterator value satisfying
+   * the predicate and return the new iterator value.
+   */
+  FilteredIterator &operator -- ();
+
+  /**
+   * Postfix advancement operator: move to the previous iterator value
+   * satisfying the predicate and return the old iterator value.
+   */
+  FilteredIterator   operator -- (int);
+
+  /**
+   * Exception.
+   */
+  DeclException1 (ExcInvalidElement,
+                  BaseIterator,
+                  << "The element " << arg1
+                  << " with which you want to compare or which you want to"
+                  << " assign from is invalid since it does not satisfy the predicate.");
+
+private:
+
+  /**
+   * Base class to encapsulate a predicate object. Since predicates can be of
+   * different types and we do not want to code these types into the template
+   * parameter list of the filtered iterator class, we use a base class with
+   * an abstract function and templatized derived classes that implement the
+   * use of actual predicate types through the virtual function.
+   *
+   * @ingroup Iterators
+   */
+  class PredicateBase
+  {
+  public:
+    /**
+     * Mark the destructor virtual to allow destruction through pointers to
+     * the base class.
+     */
+    virtual ~PredicateBase () {}
+
+    /**
+     * Abstract function which in derived classes denotes the evaluation of
+     * the predicate on the give iterator.
+     */
+    virtual bool operator () (const BaseIterator &bi) const = 0;
+
+    /**
+     * Generate a copy of this object, i.e. of the actual type of this
+     * pointer.
+     */
+    virtual PredicateBase *clone () const = 0;
+  };
+
+
+  /**
+   * Actual implementation of the above abstract base class. Use a template
+   * parameter to denote the actual type of the predicate and store a copy of
+   * it. When the virtual function is called evaluate the given iterator with
+   * the stored copy of the predicate.
+   *
+   * @ingroup Iterators
+   */
+  template <typename Predicate>
+  class PredicateTemplate : public PredicateBase
+  {
+  public:
+    /**
+     * Constructor. Take a predicate and store a copy of it.
+     */
+    PredicateTemplate (const Predicate &predicate);
+
+    /**
+     * Evaluate the iterator with the stored copy of the predicate.
+     */
+    virtual bool operator () (const BaseIterator &bi) const;
+
+    /**
+     * Generate a copy of this object, i.e. of the actual type of this
+     * pointer.
+     */
+    virtual PredicateBase *clone () const;
+
+  private:
+    /**
+     * Copy of the predicate.
+     */
+    const Predicate predicate;
+  };
+
+  /**
+   * Pointer to an object that encapsulated the actual data type of the
+   * predicate given to the constructor.
+   */
+  const PredicateBase *predicate;
+
+};
+
+
+
+/**
+ * Create an object of type FilteredIterator given the base iterator and
+ * predicate.  This function makes the creation of temporary objects (for
+ * example as function arguments) a lot simpler because one does not have to
+ * explicitly specify the type of the base iterator by hand -- it is deduced
+ * automatically here.
+ *
+ * @author Wolfgang Bangerth @relates FilteredIterator
+ */
+template <typename BaseIterator, typename Predicate>
+FilteredIterator<BaseIterator>
+make_filtered_iterator (const BaseIterator &i,
+                        const Predicate    &p)
+{
+  FilteredIterator<BaseIterator> fi(p);
+  fi.set_to_next_positive (i);
+  return fi;
+}
+
+
+
+/* ------------------ Inline functions and templates ------------ */
+
+
+template <typename BaseIterator>
+template <typename Predicate>
+inline
+FilteredIterator<BaseIterator>::
+FilteredIterator (Predicate p)
+  :
+  predicate (new PredicateTemplate<Predicate>(p))
+{}
+
+
+
+template <typename BaseIterator>
+template <typename Predicate>
+inline
+FilteredIterator<BaseIterator>::
+FilteredIterator (Predicate          p,
+                  const BaseIterator &bi)
+  :
+  BaseIterator (bi),
+  predicate (new PredicateTemplate<Predicate>(p))
+{
+  if ((this->state() == IteratorState::valid) &&
+      ! (*predicate) (*this))
+    set_to_next_positive (bi);
+}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator>::
+FilteredIterator (const FilteredIterator &fi)
+  :
+// this construction looks strange, but without going through the
+// address of fi, GCC would not cast fi to the base class of type
+// BaseIterator but tries to go through constructing a new
+// BaseIterator with an Accessor.
+  BaseIterator (*(BaseIterator *)(&fi)),
+  predicate (fi.predicate->clone ())
+{}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator>::
+~FilteredIterator ()
+{
+  delete predicate;
+  predicate = 0;
+}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator> &
+FilteredIterator<BaseIterator>::
+operator = (const FilteredIterator &fi)
+{
+  Assert ((fi.state() != IteratorState::valid) || (*predicate)(fi),
+          ExcInvalidElement(fi));
+  BaseIterator::operator = (fi);
+  return *this;
+}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator> &
+FilteredIterator<BaseIterator>::
+operator = (const BaseIterator &bi)
+{
+  Assert ((bi.state() != IteratorState::valid) || (*predicate)(bi),
+          ExcInvalidElement(bi));
+  BaseIterator::operator = (bi);
+  return *this;
+}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator> &
+FilteredIterator<BaseIterator>::
+set_to_next_positive (const BaseIterator &bi)
+{
+  BaseIterator::operator = (bi);
+  while ((this->state() == IteratorState::valid) &&
+         ( ! (*predicate)(*this)))
+    BaseIterator::operator++ ();
+
+  return *this;
+}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator> &
+FilteredIterator<BaseIterator>::
+set_to_previous_positive (const BaseIterator &bi)
+{
+  BaseIterator::operator = (bi);
+  while ((this->state() == IteratorState::valid) &&
+         ( ! (*predicate)(*this)))
+    BaseIterator::operator-- ();
+
+  return *this;
+}
+
+
+
+template <typename BaseIterator>
+inline
+bool
+FilteredIterator<BaseIterator>::
+operator == (const FilteredIterator &fi) const
+{
+  return (static_cast<const BaseIterator &>(*this)
+          ==
+          static_cast<const BaseIterator &>(fi));
+}
+
+
+
+template <typename BaseIterator>
+inline
+bool
+FilteredIterator<BaseIterator>::
+operator != (const FilteredIterator &fi) const
+{
+  return (static_cast<const BaseIterator &>(*this)
+          !=
+          static_cast<const BaseIterator &>(fi));
+}
+
+
+
+template <typename BaseIterator>
+inline
+bool
+FilteredIterator<BaseIterator>::
+operator < (const FilteredIterator &fi) const
+{
+  return (static_cast<const BaseIterator &>(*this)
+          <
+          static_cast<const BaseIterator &>(fi));
+}
+
+
+
+
+template <typename BaseIterator>
+inline
+bool
+FilteredIterator<BaseIterator>::
+operator == (const BaseIterator &bi) const
+{
+  return (static_cast<const BaseIterator &>(*this) == bi);
+}
+
+
+
+template <typename BaseIterator>
+inline
+bool
+FilteredIterator<BaseIterator>::
+operator != (const BaseIterator &bi) const
+{
+  return (static_cast<const BaseIterator &>(*this) != bi);
+}
+
+
+
+template <typename BaseIterator>
+inline
+bool
+FilteredIterator<BaseIterator>::
+operator < (const BaseIterator &bi) const
+{
+  return (static_cast<const BaseIterator &>(*this) < bi);
+}
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator> &
+FilteredIterator<BaseIterator>::
+operator ++ ()
+{
+  if (this->state() == IteratorState::valid)
+    do
+      BaseIterator::operator++ ();
+    while ((this->state() == IteratorState::valid) &&
+           !(*predicate) (*this));
+  return *this;
+}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator>
+FilteredIterator<BaseIterator>::
+operator ++ (int)
+{
+  const FilteredIterator old_state = *this;
+
+  if (this->state() == IteratorState::valid)
+    do
+      BaseIterator::operator++ ();
+    while ((this->state() == IteratorState::valid) &&
+           !(*predicate) (*this));
+  return old_state;
+}
+
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator> &
+FilteredIterator<BaseIterator>::
+operator -- ()
+{
+  if (this->state() == IteratorState::valid)
+    do
+      BaseIterator::operator-- ();
+    while ((this->state() == IteratorState::valid) &&
+           !(*predicate) (*this));
+  return *this;
+}
+
+
+
+template <typename BaseIterator>
+inline
+FilteredIterator<BaseIterator>
+FilteredIterator<BaseIterator>::
+operator -- (int)
+{
+  const FilteredIterator old_state = *this;
+
+  if (this->state() == IteratorState::valid)
+    do
+      BaseIterator::operator-- ();
+    while ((this->state() == IteratorState::valid) &&
+           !(*predicate) (*this));
+  return old_state;
+}
+
+
+
+template <typename BaseIterator>
+template <typename Predicate>
+inline
+FilteredIterator<BaseIterator>::PredicateTemplate<Predicate>::
+PredicateTemplate (const Predicate &predicate)
+  :
+  predicate (predicate)
+{}
+
+
+
+template <typename BaseIterator>
+template <typename Predicate>
+bool
+FilteredIterator<BaseIterator>::PredicateTemplate<Predicate>::
+operator () (const BaseIterator &bi) const
+{
+  return predicate(bi);
+}
+
+
+
+template <typename BaseIterator>
+template <typename Predicate>
+typename FilteredIterator<BaseIterator>::PredicateBase *
+FilteredIterator<BaseIterator>::PredicateTemplate<Predicate>::
+clone () const
+{
+  return new PredicateTemplate (predicate);
+}
+
+
+
+namespace IteratorFilters
+{
+
+// ---------------- IteratorFilters::Active ---------
+
+  template <class Iterator>
+  inline
+  bool
+  Active::operator () (const Iterator &i) const
+  {
+    return (i->active());
+  }
+
+
+// ---------------- IteratorFilters::UserFlagSet ---------
+
+  template <class Iterator>
+  inline
+  bool
+  UserFlagSet::operator () (const Iterator &i) const
+  {
+    return (i->user_flag_set());
+  }
+
+
+// ---------------- IteratorFilters::UserFlagNotSet ---------
+
+  template <class Iterator>
+  inline
+  bool
+  UserFlagNotSet::operator () (const Iterator &i) const
+  {
+    return (! i->user_flag_set());
+  }
+
+
+// ---------------- IteratorFilters::LevelEqualTo ---------
+  inline
+  LevelEqualTo::LevelEqualTo (const unsigned int level)
+    :
+    level (level)
+  {}
+
+
+
+  template <class Iterator>
+  inline
+  bool
+  LevelEqualTo::operator () (const Iterator &i) const
+  {
+    return (static_cast<unsigned int>(i->level()) == level);
+  }
+
+
+
+// ---------------- IteratorFilters::SubdomainEqualTo ---------
+  inline
+  SubdomainEqualTo::SubdomainEqualTo (const types::subdomain_id subdomain_id)
+    :
+    subdomain_id (subdomain_id)
+  {}
+
+
+
+  template <class Iterator>
+  inline
+  bool
+  SubdomainEqualTo::operator () (const Iterator &i) const
+  {
+    return (i->subdomain_id() == subdomain_id);
+  }
+
+
+
+// ---------------- IteratorFilters::LocallyOwnedCell ---------
+
+  template <class Iterator>
+  inline
+  bool
+  LocallyOwnedCell::operator () (const Iterator &i) const
+  {
+    return (i->is_locally_owned());
+  }
+
+
+// ---------------- IteratorFilters::LocallyOwnedLevelCell ---------
+
+  template <class Iterator>
+  inline
+  bool
+  LocallyOwnedLevelCell::operator () (const Iterator &i) const
+  {
+    return (i->is_locally_owned_on_level());
+  }
+
+
+
+// ---------------- IteratorFilters::MaterialIdEqualTo ---------
+  inline
+  MaterialIdEqualTo::MaterialIdEqualTo (const types::material_id material_id,
+                                        const bool only_locally_owned)
+    :
+    // Note: matrial_ids is a const member and has to be populated with a
+    // constructor. Unfortunately, C++98/03 does not allow the use of an
+    // initializer list. Therefore, treat material_id as an array of one
+    // element.
+    // This is well defined according to [expr.add].4 (ISO 14882).
+    material_ids (&material_id, &material_id+1),
+    only_locally_owned (only_locally_owned)
+  {}
+
+
+
+  inline
+  MaterialIdEqualTo::MaterialIdEqualTo (const std::set<types::material_id> material_ids,
+                                        const bool only_locally_owned)
+    :
+    material_ids (material_ids),
+    only_locally_owned (only_locally_owned)
+  {}
+
+
+
+  template <class Iterator>
+  inline
+  bool
+  MaterialIdEqualTo::operator () (const Iterator &i) const
+  {
+    return only_locally_owned == true ?
+           (material_ids.find(i->material_id()) != material_ids.end() && i->is_locally_owned()):
+           material_ids.find(i->material_id()) != material_ids.end();
+  }
+
+
+
+// ---------------- IteratorFilters::ActiveFEIndexEqualTo ---------
+  inline
+  ActiveFEIndexEqualTo::ActiveFEIndexEqualTo (const unsigned int active_fe_index,
+                                              const bool only_locally_owned)
+    :
+    // Note: active_fe_indices is a const member and has to be populated
+    // with a constructor. Unfortunately, C++98/03 does not allow the use
+    // of an initializer list. Therefore, treat active_fe_index as an array
+    // of one element.
+    // This is well defined according to [expr.add].4 (ISO 14882).
+    active_fe_indices (&active_fe_index, &active_fe_index+1),
+    only_locally_owned (only_locally_owned)
+  {}
+
+
+
+  inline
+  ActiveFEIndexEqualTo::ActiveFEIndexEqualTo (const std::set<unsigned int> active_fe_indices,
+                                              const bool only_locally_owned)
+    :
+    active_fe_indices (active_fe_indices),
+    only_locally_owned (only_locally_owned)
+  {}
+
+
+
+  template <class Iterator>
+  inline
+  bool
+  ActiveFEIndexEqualTo::operator () (const Iterator &i) const
+  {
+    return only_locally_owned == true ?
+           (active_fe_indices.find(i->active_fe_index()) != active_fe_indices.end() && i->is_locally_owned()):
+           active_fe_indices.find(i->active_fe_index()) != active_fe_indices.end();
+  }
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+/*------------------------- filtered_iterator.h ------------------------*/
+#endif
+/*------------------------- filtered_iterator.h ------------------------*/
+
+
diff --git a/include/deal.II/grid/grid_generator.h b/include/deal.II/grid/grid_generator.h
new file mode 100644
index 0000000..b1c9c77
--- /dev/null
+++ b/include/deal.II/grid/grid_generator.h
@@ -0,0 +1,1138 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__grid_generator_h
+#define dealii__grid_generator_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/array.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/function.h>
+#include <deal.II/grid/tria.h>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class Triangulation;
+template <typename number> class Vector;
+template <typename number> class SparseMatrix;
+
+
+/**
+ * This namespace provides a collection of functions for generating
+ * triangulations for some basic geometries.
+ *
+ * Some of these functions receive a flag @p colorize. If this is set, parts
+ * of the boundary receive different
+ * @ref GlossBoundaryIndicator "boundary indicators"),
+ * allowing them to be distinguished for the purpose of attaching geometry
+ * objects and evaluating different boundary conditions.
+ *
+ * @ingroup grid
+ */
+namespace GridGenerator
+{
+  /**
+   * @name Creating meshes for basic geometries
+   */
+  ///@{
+
+  /**
+   * Initialize the given triangulation with a hypercube (line in 1D, square
+   * in 2D, etc) consisting of exactly one cell. The hypercube volume is the
+   * tensor product interval $[left,right]^{\text{dim}}$ in the present number
+   * of dimensions, where the limits are given as arguments. They default to
+   * zero and unity, then producing the unit hypercube. If the argument @p
+   * colorize is false, all boundary indicators are set to zero ("not
+   * colorized") for 2d and 3d. If it is true, the boundary is colorized as in
+   * hyper_rectangle(). In 1d the indicators are always colorized, see
+   * hyper_rectangle().
+   *
+   * @image html hyper_cubes.png
+   *
+   * If @p dim < @p spacedim, this will create a @p dim dimensional object in
+   * the first @p dim coordinate directions embedded into the @p spacedim
+   * dimensional space with the remaining entries set to zero. For example, a
+   * <tt>Triangulation@<2,3@></tt> will be a square in the xy plane with z=0.
+   *
+   * See also subdivided_hyper_cube() for a coarse mesh consisting of several
+   * cells. See hyper_rectangle(), if different lengths in different ordinate
+   * directions are required.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim, int spacedim>
+  void hyper_cube (Triangulation<dim,spacedim>  &tria,
+                   const double                  left = 0.,
+                   const double                  right= 1.,
+                   const bool                    colorize= false);
+
+  /**
+   * \brief %Triangulation of a d-simplex with (d+1) vertices and mesh cells.
+   *
+   * The @p vertices argument contains a vector with all d+1 vertices of the
+   * simplex. They must be given in an order such that the vectors from the
+   * first vertex to each of the others form a right-handed system. And I am
+   * not happy about the discrimination involved here.
+   *
+   * The meshes generated in two and three dimensions are
+   *
+   * @image html simplex_2d.png
+   * @image html simplex_3d.png
+   *
+   * @param tria The Triangulation to create. It needs to be empty upon
+   * calling this function.
+   *
+   * @param vertices The dim+1 corners of the simplex.
+   *
+   * @note Implemented for <tt>Triangulation@<2,2@></tt>,
+   * <tt>Triangulation@<3,3@></tt>.
+   *
+   * @author Guido Kanschat
+   * @date 2015
+   */
+  template <int dim>
+  void simplex(Triangulation<dim, dim> &tria,
+               const std::vector<Point<dim> > &vertices);
+
+  /**
+   * Same as hyper_cube(), but with the difference that not only one cell is
+   * created but each coordinate direction is subdivided into @p repetitions
+   * cells. Thus, the number of cells filling the given volume is
+   * <tt>repetitions<sup>dim</sup></tt>.
+   *
+   * If @p dim < @p spacedim, this will create a @p dim dimensional object in
+   * the first @p dim coordinate directions embedded into the @p spacedim
+   * dimensional space with the remaining entries set to zero. For example, a
+   * <tt>Triangulation@<2,3@></tt> will be a square in the xy plane with z=0.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim, int spacedim>
+  void subdivided_hyper_cube (Triangulation<dim,spacedim>  &tria,
+                              const unsigned int   repetitions,
+                              const double         left = 0.,
+                              const double         right= 1.);
+
+  /**
+   * Create a coordinate-parallel brick from the two diagonally opposite
+   * corner points @p p1 and @p p2.
+   *
+   * If the @p colorize flag is set, the @p boundary_ids of the surfaces are
+   * assigned, such that the lower one in @p x-direction is 0, the upper one
+   * is 1. The indicators for the surfaces in @p y-direction are 2 and 3, the
+   * ones for @p z are 4 and 5. Additionally, material ids are assigned to the
+   * cells according to the octant their center is in: being in the right half
+   * plane for any coordinate direction <i>x<sub>i</sub></i> adds
+   * 2<sup>i</sup>. For instance, the center point (1,-1,1) yields a material
+   * id 5.
+   *
+   * If @p dim < @p spacedim, this will create a @p dim dimensional object in
+   * the first @p dim coordinate directions embedded into the @p spacedim
+   * dimensional space with the remaining entries set to zero. For example, a
+   * <tt>Triangulation@<2,3@></tt> will be a rectangle in the xy plane with
+   * z=0, defined by the two opposing corners @p p1 and @p p2.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim, int spacedim>
+  void hyper_rectangle (Triangulation<dim,spacedim> &tria,
+                        const Point<dim>            &p1,
+                        const Point<dim>            &p2,
+                        const bool                  colorize = false);
+
+  /**
+   * Create a coordinate-parallel parallelepiped from the two diagonally
+   * opposite corner points @p p1 and @p p2. In direction @p i,
+   * <tt>repetitions[i]</tt> cells are generated.
+   *
+   * To get cells with an aspect ratio different from that of the domain, use
+   * different numbers of subdivisions in different coordinate directions. The
+   * minimum number of subdivisions in each direction is 1. @p repetitions is
+   * a list of integers denoting the number of subdivisions in each coordinate
+   * direction.
+   *
+   * If the @p colorize flag is set, the @p boundary_ids of the surfaces are
+   * assigned, such that the lower one in @p x-direction is 0, the upper one
+   * is 1 (the left and the right vertical face). The indicators for the
+   * surfaces in @p y-direction are 2 and 3, the ones for @p z are 4 and 5.
+   * Additionally, material ids are assigned to the cells according to the
+   * octant their center is in: being in the right half plane for any
+   * coordinate direction <i>x<sub>i</sub></i> adds 2<sup>i</sup>. For
+   * instance, the center point (1,-1,1) yields a material id 5 (this means
+   * that in 2d only material ids 0,1,2,3 are assigned independent from the
+   * number of repetitions).
+   *
+   * Note that the @p colorize flag is ignored in 1d and is assumed to always
+   * be true. That means the boundary indicator is 0 on the left and 1 on the
+   * right.  See step-15 for details.
+   *
+   * If @p dim < @p spacedim, this will create a @p dim dimensional object in
+   * the first @p dim coordinate directions embedded into the @p spacedim
+   * dimensional space with the remaining entries set to zero. For example, a
+   * <tt>Triangulation@<2,3@></tt> will be a rectangle in the xy plane with
+   * z=0, defined by the two opposing corners @p p1 and @p p2.
+   *
+   * @note For an example of the use of this function see the step-28 tutorial
+   * program.
+   *
+   * @param tria The Triangulation to create. It needs to be empty upon
+   * calling this function.
+   *
+   * @param repetitions A vector of dim positive values denoting the number of
+   * cells to generate in that direction.
+   *
+   * @param p1 First corner point.
+   *
+   * @param p2 Second corner opposite to @p p1.
+   *
+   * @param colorize Assign different boundary ids if set to true.
+   *
+   */
+  template <int dim, int spacedim>
+  void
+  subdivided_hyper_rectangle (Triangulation<dim,spacedim>     &tria,
+                              const std::vector<unsigned int> &repetitions,
+                              const Point<dim>                &p1,
+                              const Point<dim>                &p2,
+                              const bool                      colorize=false);
+
+  /**
+   * Like the previous function. However, here the second argument does not
+   * denote the number of subdivisions in each coordinate direction, but a
+   * sequence of step sizes for each coordinate direction. The domain will
+   * therefore be subdivided into <code>step_sizes[i].size()</code> cells in
+   * coordinate direction <code>i</code>, with widths
+   * <code>step_sizes[i][j]</code> for the <code>j</code>th cell.
+   *
+   * This function is therefore the right one to generate graded meshes where
+   * cells are concentrated in certain areas, rather than a uniformly
+   * subdivided mesh as the previous function generates.
+   *
+   * The step sizes have to add up to the dimensions of the hyper rectangle
+   * specified by the points @p p1 and @p p2.
+   */
+  template <int dim>
+  void
+  subdivided_hyper_rectangle (Triangulation<dim>                      &tria,
+                              const std::vector<std::vector<double> > &step_sizes,
+                              const Point<dim>                        &p_1,
+                              const Point<dim>                        &p_2,
+                              const bool                              colorize);
+
+  /**
+   * Like the previous function, but with the following twist: the @p
+   * material_id argument is a dim-dimensional array that, for each cell,
+   * indicates which material_id should be set. In addition, and this is the
+   * major new functionality, if the material_id of a cell is <tt>(unsigned
+   * char)(-1)</tt>, then that cell is deleted from the triangulation, i.e.
+   * the domain will have a void there.
+   *
+   * @note If you need a lot of holes, you may consider cheese().
+   */
+  template <int dim>
+  void
+  subdivided_hyper_rectangle (Triangulation<dim>                       &tria,
+                              const std::vector< std::vector<double> > &spacing,
+                              const Point<dim>                         &p,
+                              const Table<dim,types::material_id>      &material_id,
+                              const bool                                colorize=false);
+
+  /**
+   * \brief Rectangular domain with rectangular pattern of holes
+   *
+   * The domain itself is rectangular, very much as if it had been generated
+   * by subdivided_hyper_rectangle(). The argument <code>holes</code>
+   * specifies how many square holes the domain should have in each coordinate
+   * direction. The total number of mesh cells in that direction is then twice
+   * this number plus one.
+   *
+   * The number of holes in one direction must be at least one.
+   *
+   * An example with two by three holes is
+   *
+   * @image html cheese_2d.png
+   *
+   * If @p dim < @p spacedim, this will create a @p dim dimensional object in
+   * the first @p dim coordinate directions embedded into the @p spacedim
+   * dimensional space with the remaining entries set to zero.
+   *
+   * @param tria The Triangulation to create. It needs to be empty upon
+   * calling this function.
+   *
+   * @param holes Positive number of holes in each of the dim directions.
+   * @author Guido Kanschat
+   * @date 2015
+   */
+  template <int dim, int spacedim>
+  void
+  cheese (Triangulation<dim, spacedim> &tria,
+          const std::vector<unsigned int> &holes);
+
+  /**
+   * A parallelogram. The first corner point is the origin. The @p dim
+   * adjacent points are the ones given in the second argument and the fourth
+   * point will be the sum of these two vectors.  Colorizing is done in the
+   * same way as in hyper_rectangle().
+   *
+   * @note This function is implemented in 2d only.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void
+  parallelogram (Triangulation<dim> &tria,
+                 const Point<dim>  (&corners)[dim],
+                 const bool          colorize=false);
+
+  /**
+   * A parallelepiped. The first corner point is the origin. The @p dim
+   * adjacent points are vectors describing the edges of the parallelepiped
+   * with respect to the origin. Additional points are sums of these dim
+   * vectors. Colorizing is done according to hyper_rectangle().
+   *
+   * @note This function silently reorders the vertices on the cells to
+   * lexicographic ordering (see <code>GridReordering::reorder_grid</code>).
+   * In other words, if reordering of the vertices does occur, the ordering of
+   * vertices in the array of <code>corners</code> will no longer refer to the
+   * same triangulation.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void
+  parallelepiped (Triangulation<dim> &tria,
+                  const Point<dim>  (&corners) [dim],
+                  const bool          colorize = false);
+
+  /**
+   * A subdivided parallelepiped. The first corner point is the origin. The @p
+   * dim adjacent points are vectors describing the edges of the
+   * parallelepiped with respect to the origin. Additional points are sums of
+   * these dim vectors. The variable @p n_subdivisions designates the number
+   * of subdivisions in each of the @p dim directions. Colorizing is done
+   * according to hyper_rectangle().
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void
+  subdivided_parallelepiped (Triangulation<dim>  &tria,
+                             const unsigned int   n_subdivisions,
+                             const Point<dim>   (&corners) [dim],
+                             const bool           colorize = false);
+
+  /**
+   * A subdivided parallelepiped, i.e., the same as above, but where the
+   * number of subdivisions in each of the @p dim directions may vary.
+   * Colorizing is done according to hyper_rectangle().
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void
+  subdivided_parallelepiped (Triangulation<dim>  &tria,
+#ifndef _MSC_VER
+                             const unsigned int(&n_subdivisions)[dim],
+#else
+                             const unsigned int *n_subdivisions,
+#endif
+                             const Point<dim>   (&corners) [dim],
+                             const bool           colorize = false);
+
+  /**
+   * A subdivided parallelepiped.
+   *
+   * @param tria The Triangulation to create. It needs to be empty upon
+   * calling this function.
+   *
+   * @param origin First corner of the parallelepiped.
+   *
+   * @param edges An array of @p dim tensors describing the length and
+   * direction of the edges from @p origin.
+   *
+   * @param subdivisions Number of subdivisions in each of the dim directions.
+   * Each entry must be positive. An empty vector is equivalent to one
+   * subdivision in each direction.
+   *
+   * @param colorize Assign different boundary ids if set to true.
+   *
+   * @note Implemented for all combinations of @p dim and @p spacedim.
+   *
+   * @note You likely need to help the compiler by explicitly specifying the
+   * two template parameters when calling this function.
+   */
+  template <int dim, int spacedim>
+  void
+  subdivided_parallelepiped (Triangulation<dim, spacedim>  &tria,
+                             const Point<spacedim> &origin,
+                             const std_cxx11::array<Tensor<1,spacedim>,dim> &edges,
+                             const std::vector<unsigned int> &subdivisions = std::vector<unsigned int>(),
+                             const bool colorize = false);
+
+  /**
+   * Hypercube with a layer of hypercubes around it. The first two parameters
+   * give the lower and upper bound of the inner hypercube in all coordinate
+   * directions.  @p thickness marks the size of the layer cells.
+   *
+   * If the flag @p colorize is set, the outer cells get material id's
+   * according to the following scheme: extending over the inner cube in (+/-)
+   * x-direction: 1/2. In y-direction 4/8, in z-direction 16/32. The cells at
+   * corners and edges (3d) get these values bitwise or'd.
+   *
+   * Presently only available in 2d and 3d.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void enclosed_hyper_cube (Triangulation<dim> &tria,
+                            const double        left = 0.,
+                            const double        right= 1.,
+                            const double        thickness = 1.,
+                            const bool          colorize = false);
+
+  /**
+   * Initialize the given triangulation with a hyperball, i.e. a circle or a
+   * ball around @p center with given @p radius.
+   *
+   * In order to avoid degenerate cells at the boundaries, the circle is
+   * triangulated by five cells, the ball by seven cells. The diameter of the
+   * center cell is chosen so that the aspect ratio of the boundary cells
+   * after one refinement is optimized.
+   *
+   * This function is declared to exist for triangulations of all space
+   * dimensions, but throws an error if called in 1d.
+   *
+   * You should attach a SphericalManifold to the cells and faces for correct
+   * placement of vertices upon refinement and to be able to use higher order
+   * mappings.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void hyper_ball (Triangulation<dim> &tria,
+                   const Point<dim>   &center = Point<dim>(),
+                   const double        radius = 1.);
+
+  /**
+   * Creates a hyper sphere, i.e., a surface of a ball in @p spacedim
+   * dimensions. This function only exists for dim+1=spacedim in 2 and 3 space
+   * dimensions.
+   *
+   * You should attach a SphericalManifold to the cells and faces for correct
+   * placement of vertices upon refinement and to be able to use higher order
+   * mappings.
+   *
+   * The following pictures are generated with:
+   * @code
+   * Triangulation<2,3>   triangulation;
+   *
+   * static SphericalManifold<2,3> surface_description;
+   *
+   * GridGenerator::hyper_sphere(triangulation);
+   *
+   * triangulation.set_all_manifold_ids(0);
+   * triangulation.set_manifold (0, surface_description);
+   * triangulation.refine_global(3);
+   * @endcode
+   *
+   * See the
+   * @ref manifold "documentation module on manifolds"
+   * for more details.
+   *
+   * @image html sphere.png
+   * @image html sphere_section.png
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+
+  template <int dim, int spacedim>
+  void hyper_sphere (Triangulation<dim,spacedim> &tria,
+                     const Point<spacedim>   &center = Point<spacedim>(),
+                     const double        radius = 1.);
+
+  /**
+   * This class produces a half hyper-ball around @p center, which contains
+   * four elements in 2d and 6 in 3d. The cut plane is perpendicular to the
+   * <i>x</i>-axis.
+   *
+   * The boundary indicators for the final triangulation are 0 for the curved
+   * boundary and 1 for the cut plane.
+   *
+   * The appropriate boundary class is HalfHyperBallBoundary, or
+   * HyperBallBoundary.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void half_hyper_ball (Triangulation<dim> &tria,
+                        const Point<dim>   &center = Point<dim>(),
+                        const double        radius = 1.);
+
+  /**
+   * Create a cylinder around the $x$-axis.  The cylinder extends from
+   * <tt>x=-half_length</tt> to <tt>x=+half_length</tt> and its projection
+   * into the @p yz-plane is a circle of radius @p radius.
+   *
+   * In two dimensions, the cylinder is a rectangle from
+   * <tt>x=-half_length</tt> to <tt>x=+half_length</tt> and from
+   * <tt>y=-radius</tt> to <tt>y=radius</tt>.
+   *
+   * The boundaries are colored according to the following scheme: 0 for the
+   * hull of the cylinder, 1 for the left hand face and 2 for the right hand
+   * face.
+   *
+   * If you want the cylinder to revolve around a different axis than the
+   * $x$-axis, then simply rotate the mesh generated by this function using
+   * the GridTools::transform() function using a rotation operator as
+   * argument.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void cylinder (Triangulation<dim> &tria,
+                 const double        radius = 1.,
+                 const double        half_length = 1.);
+
+  /**
+   * Create a cut cone around the x-axis.  The cone extends from
+   * <tt>x=-half_length</tt> to <tt>x=half_length</tt> and its projection into
+   * the @p yz-plane is a circle of radius @p radius_0 at
+   * <tt>x=-half_length</tt> and a circle of radius @p radius_1 at
+   * <tt>x=+half_length</tt>.  In between the radius is linearly decreasing.
+   *
+   * In two dimensions, the cone is a trapezoid from <tt>x=-half_length</tt>
+   * to <tt>x=+half_length</tt> and from <tt>y=-radius_0</tt> to
+   * <tt>y=radius_0</tt> at <tt>x=-half_length</tt> and from
+   * <tt>y=-radius_1</tt> to <tt>y=radius_1</tt> at <tt>x=+half_length</tt>.
+   * In between the range of <tt>y</tt> is linearly decreasing.
+   *
+   * The boundaries are colored according to the following scheme: 0 for the
+   * hull of the cone, 1 for the left hand face and 2 for the right hand face.
+   *
+   * An example of use can be found in the documentation of the ConeBoundary
+   * class, with which you probably want to associate boundary indicator 0
+   * (the hull of the cone).
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   *
+   * @author Markus Bürg, 2009
+   */
+  template <int dim>
+  void
+  truncated_cone (Triangulation<dim> &tria,
+                  const double        radius_0 = 1.0,
+                  const double        radius_1 = 0.5,
+                  const double        half_length = 1.0);
+
+  /**
+   * \brief A center cell with stacks of cell protruding from each surface.
+   *
+   * Each of the square mesh cells is Cartesian and has size one in each
+   * coordinate direction. The center of cell number zero is the origin.
+   *
+   * @param tria A Triangulation object which has to be empty.
+   *
+   * @param sizes A vector of integers of dimension
+   * GeometryInfo<dim>::faces_per_cell with the following meaning: the legs of
+   * the cross are stacked on the faces of the center cell, in the usual order
+   * of deal.II cells, namely first $-x$, then $x$, then $-y$ and so on. The
+   * corresponding entries in <code>sizes</code> name the number of cells
+   * stacked on this face. All numbers may be zero, thus L- and T-shaped
+   * domains are specializations of this domain.
+   *
+   * @param colorize_cells If colorization is chosen, then the material id of
+   * a cells corresponds to the leg it is in. The id of the center cell is
+   * zero, and then the legs are numbered starting at one.
+   *
+   * Examples in two and three dimensions are
+   *
+   * @image html hyper_cross_2d.png
+   * @image html hyper_cross_3d.png
+   *
+   * @author Guido Kanschat
+   * @date 2015
+   */
+  template <int dim, int spacedim>
+  void hyper_cross(Triangulation<dim, spacedim> &tria,
+                   const std::vector<unsigned int> &sizes,
+                   const bool colorize_cells = false);
+
+  /**
+   * Initialize the given triangulation with a hyper-L (in 2d or 3d)
+   * consisting of exactly <tt>2^dim-1</tt> cells. It produces the hypercube
+   * with the interval [<i>left,right</i>] without the hypercube made out of
+   * the interval [<i>(left+right)/2,right</i>] for each coordinate.  All
+   * faces will have boundary indicator 0. This function will create the
+   * classical L-shape in 2d and it will look like the following in 3d:
+   *
+   * @image html hyper_l.png
+   *
+   * This function is declared to exist for triangulations of all space
+   * dimensions, but throws an error if called in 1d.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void hyper_L (Triangulation<dim> &tria,
+                const double        left = -1.,
+                const double        right= 1.);
+
+  /**
+   * Initialize the given Triangulation with a hypercube with a slit. In each
+   * coordinate direction, the hypercube extends from @p left to @p right.
+   *
+   * In 2d, the split goes in vertical direction from <tt>x=(left+right)/2,
+   * y=left</tt> to the center of the square at <tt>x=y=(left+right)/2</tt>.
+   *
+   * In 3d, the 2d domain is just extended in the <i>z</i>-direction, such
+   * that a plane cuts the lower half of a rectangle in two.  This function is
+   * declared to exist for triangulations of all space dimensions, but throws
+   * an error if called in 1d.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void hyper_cube_slit (Triangulation<dim> &tria,
+                        const double        left = 0.,
+                        const double        right = 1.,
+                        const bool          colorize = false);
+
+  /**
+   * Produce a hyper-shell, the region between two spheres around
+   * <tt>center</tt>, with given <tt>inner_radius</tt> and
+   * <tt>outer_radius</tt>. The number <tt>n_cells</tt> indicates the number
+   * of cells of the resulting triangulation, i.e., how many cells form the
+   * ring (in 2d) or the shell (in 3d).
+   *
+   * If the flag @p colorize is @p true, then the outer boundary will have the
+   * indicator 1, while the inner boundary has id zero. In 3d, this applies to
+   * both the faces and the edges of these boundaries. If the flag is @p
+   * false, both have indicator zero.
+   *
+   * You should attach a SphericalManifold to the cells and faces for correct
+   * placement of vertices upon refinement and to be able to use higher order
+   * mappings. Alternatively, it is also possible to attach a
+   * HyperShellBoundary to the inner and outer boundary. This will create
+   * inferior meshes as described below.
+   *
+   * In 2d, the number <tt>n_cells</tt> of elements for this initial
+   * triangulation can be chosen arbitrarily. If the number of initial cells
+   * is zero (as is the default), then it is computed adaptively such that the
+   * resulting elements have the least aspect ratio.
+   *
+   * In 3d, only certain numbers are allowed, 6 (or the default 0) for a
+   * surface based on a hexahedron (i.e. 6 panels on the inner sphere extruded
+   * in radial direction to form 6 cells), 12 for the rhombic dodecahedron,
+   * and 96 (see below).
+   *
+   * While the SphericalManifold, that is demonstrated in the documentation of
+   * the
+   * @ref manifold "documentation module on manifolds",
+   * creates reasonable meshes for any number of @p n_cells if attached to all
+   * cells and boundaries, the situation is less than ideal when only
+   * attaching a HyperShellBoundary. Then, only vertices on the boundaries are
+   * placed at the correct distance from the center. As an example, the 3d
+   * meshes give rise to the following meshes upon one refinement:
+   *
+   * @image html hypershell3d-6.png
+   * @image html hypershell3d-12.png
+   *
+   * Neither of these meshes is particularly good since one ends up with
+   * poorly shaped cells at the inner edge upon refinement. For example, this
+   * is the middle plane of the mesh for the <code>n_cells=6</code>:
+   *
+   * @image html hyper_shell_6_cross_plane.png
+   *
+   * The mesh generated with <code>n_cells=12</code> is better but still not
+   * good. As a consequence, you may also specify <code>n_cells=96</code> as a
+   * third option. The mesh generated in this way is based on a once refined
+   * version of the one with <code>n_cells=12</code>, where all internal nodes
+   * are re-placed along a shell somewhere between the inner and outer
+   * boundary of the domain. The following two images compare half of the
+   * hyper shell for <code>n_cells=12</code> and <code>n_cells=96</code> (note
+   * that the doubled radial lines on the cross section are artifacts of the
+   * visualization):
+   *
+   * @image html hyper_shell_12_cut.png
+   * @image html hyper_shell_96_cut.png
+   *
+   * @note This function is declared to exist for triangulations of all space
+   * dimensions, but throws an error if called in 1d.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void hyper_shell (Triangulation<dim> &tria,
+                    const Point<dim>   &center,
+                    const double        inner_radius,
+                    const double        outer_radius,
+                    const unsigned int  n_cells = 0,
+                    bool                colorize = false);
+
+  /**
+   * Produce a half hyper-shell, i.e. the space between two circles in two
+   * space dimensions and the region between two spheres in 3d, with given
+   * inner and outer radius and a given number of elements for this initial
+   * triangulation.  However, opposed to the previous function, it does not
+   * produce a whole shell, but only one half of it, namely that part for
+   * which the first component is restricted to non-negative values. The
+   * purpose of this class is to enable computations for solutions which have
+   * rotational symmetry, in which case the half shell in 2d represents a
+   * shell in 3d.
+   *
+   * If the number of initial cells is zero (as is the default), then it is
+   * computed adaptively such that the resulting elements have the least
+   * aspect ratio.
+   *
+   * If colorize is set to true, the inner, outer, and the part of the
+   * boundary where $x=0$, get indicator 0, 1, and 2, respectively. Otherwise
+   * all indicators are set to 0.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void half_hyper_shell (Triangulation<dim> &tria,
+                         const Point<dim>   &center,
+                         const double        inner_radius,
+                         const double        outer_radius,
+                         const unsigned int  n_cells = 0,
+                         const bool          colorize = false);
+
+
+  /**
+   * Produce a domain that is the intersection between a hyper-shell with
+   * given inner and outer radius, i.e. the space between two circles in two
+   * space dimensions and the region between two spheres in 3d, and the
+   * positive quadrant (in 2d) or octant (in 3d). In 2d, this is indeed a
+   * quarter of the full annulus, while the function is a misnomer in 3d
+   * because there the domain is not a quarter but one eighth of the full
+   * shell.
+   *
+   * If the number of initial cells is zero (as is the default), then it is
+   * computed adaptively such that the resulting elements have the least
+   * aspect ratio in 2d.
+   *
+   * If @p colorize is set to true, the inner, outer, left, and right boundary
+   * get indicator 0, 1, 2, and 3 in 2d, respectively. Otherwise all
+   * indicators are set to 0. In 3d indicator 2 is at the face x=0, 3 at y=0,
+   * 4 at z=0.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void quarter_hyper_shell (Triangulation<dim> &tria,
+                            const Point<dim>   &center,
+                            const double        inner_radius,
+                            const double        outer_radius,
+                            const unsigned int  n_cells = 0,
+                            const bool          colorize = false);
+
+  /**
+   * Produce a domain that is the space between two cylinders in 3d, with
+   * given length, inner and outer radius and a given number of elements for
+   * this initial triangulation. If @p n_radial_cells is zero (as is the
+   * default), then it is computed adaptively such that the resulting elements
+   * have the least aspect ratio. The same holds for @p n_axial_cells.
+   *
+   * @note Although this function is declared as a template, it does not make
+   * sense in 1D and 2D.
+   *
+   * @note The triangulation needs to be void upon calling this function.
+   */
+  template <int dim>
+  void cylinder_shell (Triangulation<dim> &tria,
+                       const double        length,
+                       const double        inner_radius,
+                       const double        outer_radius,
+                       const unsigned int  n_radial_cells = 0,
+                       const unsigned int  n_axial_cells = 0);
+
+
+
+  /**
+   * Produce the surface meshing of the torus. The axis of the torus is the
+   * $y$-axis while the plane of the torus is the $x$-$z$ plane. The boundary
+   * of this object can be described by the TorusBoundary class.
+   *
+   * @param tria The triangulation to be filled.
+   *
+   * @param R The radius of the circle, which forms the middle line of the
+   * torus containing the loop of cells. Must be greater than @p r.
+   *
+   * @param r The inner radius of the torus.
+   */
+  void torus (Triangulation<2,3> &tria,
+              const double        R,
+              const double        r);
+
+
+  /**
+   * This class produces a square in the <i>xy</i>-plane with a circular hole
+   * in the middle. Square and circle are centered at the origin. In 3d, this
+   * geometry is extruded in $z$ direction to the interval $[0,L]$.
+   *
+   * @image html cubes_hole.png
+   *
+   * It is implemented in 2d and 3d, and takes the following arguments:
+   *
+   * @param triangulation The triangulation to be filled.
+   * @param inner_radius  Radius of the internal hole.
+   * @param outer_radius Half of the edge length of the square.
+   * @param L  Extension in @p z-direction (only used in 3d).
+   * @param repetitions Number of subdivisions along the @p z-direction.
+   * @param colorize Whether to assign different boundary indicators to
+   * different faces. The colors are given in lexicographic ordering for the
+   * flat faces (0 to 3 in 2d, 0 to 5 in 3d) plus the curved hole (4 in 2d,
+   * and 6 in 3d). If @p colorize is set to false, then flat faces get the
+   * number 0 and the hole gets number 1.
+   */
+  template<int dim>
+  void hyper_cube_with_cylindrical_hole (
+    Triangulation<dim> &triangulation,
+    const double        inner_radius = .25,
+    const double        outer_radius = .5,
+    const double        L = .5,
+    const unsigned int  repetitions = 1,
+    const bool          colorize = false);
+
+  /**
+   * Produce a ring of cells in 3d that is cut open, twisted and glued
+   * together again. This results in a kind of moebius-loop.
+   *
+   * @param tria        The triangulation to be worked on.
+   * @param n_cells     The number of cells in the loop. Must be greater than
+   * 4.
+   * @param n_rotations The number of rotations (Pi/2 each) to be performed
+   * before gluing the loop together.
+   * @param R           The radius of the circle, which forms the middle line
+   * of the torus containing the loop of cells. Must be greater than @p r.
+   * @param r           The radius of the cylinder bend together as loop.
+   */
+  void moebius (Triangulation<3,3> &tria,
+                const unsigned int  n_cells,
+                const unsigned int  n_rotations,
+                const double        R,
+                const double        r);
+
+  ///@}
+
+  /**
+   * @name Creating meshes from other meshes
+   */
+  ///@{
+
+  /**
+   * Given the two triangulations specified as the first two arguments, create
+   * the triangulation that contains the cells of both triangulation and store
+   * it in the third parameter. Previous content of @p result will be deleted.
+   *
+   * This function is most often used to compose meshes for more complicated
+   * geometries if the geometry can be composed of simpler parts for which
+   * functions exist to generate coarse meshes.  For example, the channel mesh
+   * used in step-35 could in principle be created using a mesh created by the
+   * GridGenerator::hyper_cube_with_cylindrical_hole function and several
+   * rectangles, and merging them using the current function. The rectangles
+   * will have to be translated to the right for this, a task that can be done
+   * using the GridTools::shift function (other tools to transform individual
+   * mesh building blocks are GridTools::transform, GridTools::rotate, and
+   * GridTools::scale).
+   *
+   * @note The two input triangulations must be coarse meshes that have no
+   * refined cells.
+   *
+   * @note The function copies the material ids of the cells of the two input
+   * triangulations into the output triangulation but it currently makes no
+   * attempt to do the same for boundary ids. In other words, if the two
+   * coarse meshes have anything but the default boundary indicators, then you
+   * will currently have to set boundary indicators again by hand in the
+   * output triangulation.
+   *
+   * @note For a related operation on refined meshes when both meshes are
+   * derived from the same coarse mesh, see
+   * GridGenerator::create_union_triangulation().
+   */
+  template <int dim, int spacedim>
+  void
+  merge_triangulations (const Triangulation<dim, spacedim> &triangulation_1,
+                        const Triangulation<dim, spacedim> &triangulation_2,
+                        Triangulation<dim, spacedim>       &result);
+
+  /**
+   * Given the two triangulations specified as the first two arguments, create
+   * the triangulation that contains the finest cells of both triangulation
+   * and store it in the third parameter. Previous content of @p result will
+   * be deleted.
+   *
+   * @note This function is intended to create an adaptively refined
+   * triangulation that contains the <i>most refined cells</i> from two input
+   * triangulations that were derived from the <i>same</i> coarse grid by
+   * adaptive refinement. This is an operation sometimes needed when one
+   * solves for two variables of a coupled problem on separately refined
+   * meshes on the same domain (for example because these variables have
+   * boundary layers in different places) but then needs to compute something
+   * that involves both variables or wants to output the result into a single
+   * file. In both cases, in order not to lose information, the two solutions
+   * can not be interpolated onto the respectively other mesh because that may
+   * be coarser than the ones on which the variable was computed. Rather, one
+   * needs to have a mesh for the domain that is at least as fine as each of
+   * the two initial meshes. This function computes such a mesh.
+   *
+   * @note If you want to create a mesh that is the merger of two other coarse
+   * meshes, for example in order to compose a mesh for a complicated geometry
+   * from meshes for simpler geometries, then this is not the function for
+   * you. Instead, consider GridGenerator::merge_triangulations().
+   *
+   * @pre Both of the source conditions need to be available entirely locally.
+   * In other words, they can not be objects of type
+   * parallel::distributed::Triangulation.
+   */
+  template <int dim, int spacedim>
+  void
+  create_union_triangulation (const Triangulation<dim, spacedim> &triangulation_1,
+                              const Triangulation<dim, spacedim> &triangulation_2,
+                              Triangulation<dim, spacedim>       &result);
+
+  /**
+   * This function creates a triangulation that consists of the same cells as
+   * are present in the first argument, except those cells that are listed in
+   * the second argument. The purpose of the function is to generate
+   * geometries <i>subtractively</i> from the geometry described by an
+   * existing triangulation. A prototypical case is a 2d domain with
+   * rectangular holes. This can be achieved by first meshing the entire
+   * domain and then using this function to get rid of the cells that are
+   * located at the holes. Likewise, you could create the mesh that
+   * GridGenerator::hyper_L() produces by starting with a
+   * GridGenerator::hyper_cube(), refining it once, and then calling the
+   * current function with a single cell in the second argument.
+   *
+   * @param[in] input_triangulation The original triangulation that serves as
+   * the template from which the new one is to be created.
+   * @param[in] cells_to_remove A list of cells of the triangulation provided
+   * as first argument that should be removed (i.e., that should not show up
+   * in the result.
+   * @param[out] result The resulting triangulation that consists of the same
+   * cells as are in @p input_triangulation, with the exception of the cells
+   * listed in @p cells_to_remove.
+   *
+   * @pre Because we cannot create triangulations de novo that contain
+   * adaptively refined cells, the input triangulation needs to have all of
+   * its cells on the same level. Oftentimes, this will in fact be the
+   * coarsest level, but it is allowed to pass in a triangulation that has
+   * been refined <i>globally</i> a number of times. The output triangulation
+   * will in that case simply be a mesh with only one level that consists of
+   * the active cells of the input minus the ones listed in the second
+   * argument. However, the input triangulation must not have been
+   * <i>adaptively</i> refined.
+   */
+  template <int dim, int spacedim>
+  void
+  create_triangulation_with_removed_cells (const Triangulation<dim, spacedim> &input_triangulation,
+                                           const std::set<typename Triangulation<dim, spacedim>::active_cell_iterator> &cells_to_remove,
+                                           Triangulation<dim, spacedim>       &result);
+
+
+  /**
+   * Take a 2d Triangulation that is being extruded in z direction by the
+   * total height of @p height using @p n_slices slices (minimum is 2). The
+   * boundary indicators of the faces of @p input are going to be assigned to
+   * the corresponding side walls in z direction. The bottom and top get the
+   * next two free boundary indicators.
+   *
+   * @note The 2d input triangulation @p input must be a coarse mesh that has
+   * no refined cells.
+   */
+  void
+  extrude_triangulation (const Triangulation<2, 2> &input,
+                         const unsigned int         n_slices,
+                         const double               height,
+                         Triangulation<3,3>        &result);
+
+  /**
+   * Given an input triangulation @p in_tria, this function makes a new flat
+   * triangulation @p out_tria which contains a single level with all active
+   * cells of the input triangulation. If @p spacedim1 and @p spacedim2 are
+   * different, only the smallest spacedim components of the vertices are
+   * copied over. This is useful to create a Triangulation<2,3> out of a
+   * Triangulation<2,2>, or to project a Triangulation<2,3> into a
+   * Triangulation<2,2>, by neglecting the z components of the vertices.
+   *
+   * No internal checks are performed on the vertices, which are assumed to
+   * make sense topologically in the target @p spacedim2 dimensional space. If
+   * this is not the case, you will encounter problems when using the
+   * triangulation later on.
+   *
+   * All information about cell manifold_ids and material ids are copied from
+   * one triangulation to the other, and only the boundary manifold_ids and
+   * boundary_ids are copied over from the faces of @p in_tria to the faces of
+   * @p out_tria. If you need to specify manifold ids on interior faces, they
+   * have to be specified manually after the triangulation is created.
+   *
+   * This function will fail if the input Triangulation is of type
+   * parallel::distributed::Triangulation, as well as when the input
+   * Triangulation contains hanging nodes.
+   *
+   * @author Luca Heltai, 2014
+   */
+  template <int dim, int spacedim1, int spacedim2>
+  void flatten_triangulation(const Triangulation<dim,spacedim1> &in_tria,
+                             Triangulation<dim,spacedim2> &out_tria);
+
+  ///@}
+
+  /**
+   * @name Creating lower-dimensional meshes from parts of higher-dimensional
+   * meshes
+   */
+  ///@{
+
+#ifdef _MSC_VER
+  // Microsoft's VC++ has a bug where it doesn't want to recognize that
+  // an implementation (definition) of the extract_boundary_mesh function
+  // matches a declaration. This can apparently only be avoided by
+  // doing some contortion with the return type using the following
+  // intermediate type. This is only used when using MS VC++ and uses
+  // the direct way of doing it otherwise
+  template <template <int,int> class MeshType, int dim, int spacedim>
+  struct ExtractBoundaryMesh
+  {
+    typedef
+    std::map<typename MeshType<dim-1,spacedim>::cell_iterator,
+        typename MeshType<dim,spacedim>::face_iterator>
+        return_type;
+  };
+#endif
+
+  /**
+   * This function implements a boundary subgrid extraction.  Given a
+   * <dim,spacedim>-Triangulation (the "volume mesh") the function extracts a
+   * subset of its boundary (the "surface mesh").  The boundary to be
+   * extracted is specified by a list of boundary_ids.  If none is specified
+   * the whole boundary will be extracted. The function is used in step-38.
+   *
+   * The function also builds a mapping linking the cells on the surface mesh
+   * to the corresponding faces on the volume one. This mapping is the return
+   * value of the function.
+   *
+   * @note The function builds the surface mesh by creating a coarse mesh from
+   * the selected faces of the coarse cells of the volume mesh. It copies the
+   * boundary indicators of these faces to the cells of the coarse surface
+   * mesh. The surface mesh is then refined in the same way as the faces of
+   * the volume mesh are. In order to ensure that the surface mesh has the
+   * same vertices as the volume mesh, it is therefore important that you
+   * assign appropriate boundary objects through Triangulation::set_boundary()
+   * to the surface mesh object before calling this function. If you don't,
+   * the refinement will happen under the assumption that all faces are
+   * straight (i.e using the StraightBoundary class) rather than any curved
+   * boundary object you may want to use to determine the location of new
+   * vertices.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * The map that is returned will be between cell iterators pointing into the
+   * container describing the surface mesh and face iterators of the volume
+   * mesh container. If MeshType is DoFHandler or hp::DoFHandler, then the
+   * function will re-build the triangulation underlying the second argument
+   * and return a map between appropriate iterators into the MeshType
+   * arguments. However, the function will not actually distribute degrees of
+   * freedom on this newly created surface mesh.
+   *
+   * @tparam dim The dimension of the cells of the volume mesh. For example,
+   * if dim==2, then the cells are quadrilaterals that either live in the
+   * plane, or form a surface in a higher-dimensional space. The dimension of
+   * the cells of the surface mesh is consequently dim-1.
+   * @tparam spacedim The dimension of the space in which both the volume and
+   * the surface mesh live.
+   *
+   * @param[in] volume_mesh A container of cells that define the volume mesh.
+   * @param[out] surface_mesh A container whose associated triangulation will
+   * be built to consist of the cells that correspond to the (selected portion
+   * of) the boundary of the volume mesh.
+   * @param[in] boundary_ids A list of boundary indicators denoting that
+   * subset of faces of volume cells for which this function should extract
+   * the surface mesh. If left at its default, i.e., if the set is empty, then
+   * the function operates on <i>all</i> boundary faces.
+   *
+   * @return A map that for each cell of the surface mesh (key) returns an
+   * iterator to the corresponding face of a cell of the volume mesh (value).
+   * The keys include both active and non-active cells of the surface mesh.
+   * For dim=2 (i.e., where volume cells are quadrilaterals and surface cells
+   * are lines), the order of vertices of surface cells and the corresponding
+   * volume faces match. For dim=3 (i.e., where volume cells are hexahedra and
+   * surface cells are quadrilaterals), the order of vertices may not match in
+   * order to ensure that each surface cell has a right-handed coordinate
+   * system when viewed from one of the two sides of the surface connecting
+   * the cells of the surface mesh.
+   *
+   * @note The algorithm outlined above assumes that all faces on higher
+   * refinement levels always have exactly the same boundary indicator as
+   * their parent face. Consequently, we can start with coarse level faces and
+   * build the surface mesh based on that. It would not be very difficult to
+   * extend the function to also copy boundary indicators from finer level
+   * faces to their corresponding surface mesh cells, for example to
+   * accommodate different geometry descriptions in the case of curved
+   * boundaries (but this is not currently implemented).
+   */
+  template <template <int,int> class MeshType, int dim, int spacedim>
+#ifndef _MSC_VER
+  std::map<typename MeshType<dim-1,spacedim>::cell_iterator,
+      typename MeshType<dim,spacedim>::face_iterator>
+#else
+  typename ExtractBoundaryMesh<MeshType,dim,spacedim>::return_type
+#endif
+      extract_boundary_mesh (const MeshType<dim,spacedim>       &volume_mesh,
+                             MeshType<dim-1,spacedim>           &surface_mesh,
+                             const std::set<types::boundary_id> &boundary_ids
+                             = std::set<types::boundary_id>());
+
+  ///@}
+
+
+  /**
+   * @name Exceptions
+   */
+  ///@{
+
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidRadii);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidRepetitions,
+                  int,
+                  << "The number of repetitions " << arg1
+                  << " must be >=1.");
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidRepetitionsDimension,
+                  int,
+                  << "The vector of repetitions  must have "
+                  << arg1 <<" elements.");
+
+  ///@}
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/grid_in.h b/include/deal.II/grid/grid_in.h
new file mode 100644
index 0000000..14976f4
--- /dev/null
+++ b/include/deal.II/grid/grid_in.h
@@ -0,0 +1,634 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__grid_in_h
+#define dealii__grid_in_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/point.h>
+#include <iostream>
+#include <vector>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int space_dim> class Triangulation;
+template <int dim> struct CellData;
+struct SubCellData;
+
+
+/**
+ * This class implements an input mechanism for grid data. It allows to read a
+ * grid structure into a triangulation object. At present, UCD (unstructured
+ * cell data), DB Mesh, XDA, Gmsh, Tecplot, NetCDF, UNV, VTK, and Cubit are
+ * supported as input format for grid data. Any numerical data other than
+ * geometric (vertex locations) and topological (how vertices form cells)
+ * information is ignored.
+ *
+ * @note Since deal.II only supports line, quadrilateral and hexahedral
+ * meshes, the functions in this class can only read meshes that consist
+ * exclusively of such cells. If you absolutely need to work with a mesh that
+ * uses triangles or tetrahedra, then your only option is to convert the mesh
+ * to quadrilaterals and hexahedra. A tool that can do this is tethex, see
+ * http://code.google.com/p/tethex/wiki/Tethex .
+ *
+ * The mesh you read will form the coarsest level of a @p Triangulation
+ * object. As such, it must not contain hanging nodes or other forms or
+ * adaptive refinement and strange things will happen if the mesh represented
+ * by the input file does in fact have them. This is due to the fact that most
+ * mesh description formats do not store neighborship information between
+ * cells, so the grid reading functions have to regenerate it. They do so by
+ * checking whether two cells have a common face. If there are hanging nodes
+ * in a triangulation, adjacent cells have no common (complete) face, so the
+ * grid reader concludes that the adjacent cells have no neighbors along these
+ * faces and must therefore be at the boundary. In effect, an internal crack
+ * of the domain is introduced this way. Since such cases are very hard to
+ * detect (how is GridIn supposed to decide whether a place where the faces of
+ * two small cells coincide with the face or a larger cell is in fact a
+ * hanging node associated with local refinement, or is indeed meant to be a
+ * crack in the domain?), the library does not make any attempt to catch such
+ * situations, and you will get a triangulation that probably does not do what
+ * you want. If your goal is to save and later read again a triangulation that
+ * has been adaptively refined, then this class is not your solution; rather
+ * take a look at the PersistentTriangulation class.
+ *
+ * @note It is not uncommon to experience unexpected problems when reading
+ * generated meshes for the first time using this class. If this applies to
+ * you, be sure to read the documentation right until the end, and also read
+ * the documentation of the GridReordering class.
+ *
+ * To read grid data, the triangulation to be fed with has to be empty. When
+ * giving a file which does not contain the assumed information or which does
+ * not keep to the right format, the state of the triangulation will be
+ * undefined afterwards. Upon input, only lines in one dimension and line and
+ * quads in two dimensions are accepted. All other cell types (e.g. triangles
+ * in two dimensions, quads and hexes in 3d) are rejected. The vertex and cell
+ * numbering in the input file, which need not be consecutively, is lost upon
+ * transfer to the triangulation object, since this one needs consecutively
+ * numbered elements.
+ *
+ * Material indicators are accepted to denote the material ID of cells and to
+ * denote boundary part indication for lines in 2D. Read the according
+ * sections in the documentation of the Triangulation class for further
+ * details.
+ *
+ *
+ * <h3>Supported input formats</h3>
+ *
+ * At present, the following input formats are supported:
+ * <ul>
+ * <li> @p UCD (unstructured cell data) format: this format is used for grid
+ * input as well as data output. If there are data vectors in the input file,
+ * they are ignored, as we are only interested in the grid in this class. The
+ * UCD format requires the vertices to be in following ordering: in 2d
+ * @verbatim
+ *      3-----2
+ *      |     |
+ *      |     |
+ *      |     |
+ *      0-----1
+ * @endverbatim
+ * and in 3d
+ * @verbatim
+ *         7-------6        7-------6
+ *        /|       |       /       /|
+ *       / |       |      /       / |
+ *      /  |       |     /       /  |
+ *     3   |       |    3-------2   |
+ *     |   4-------5    |       |   5
+ *     |  /       /     |       |  /
+ *     | /       /      |       | /
+ *     |/       /       |       |/
+ *     0-------1        0-------1
+ * @endverbatim
+ * Note, that this ordering is different from the deal.II numbering scheme,
+ * see the Triangulation class.  The exact description of the UCD format can
+ * be found in the AVS Explorer manual (see http://www.avs.com).  The @p UCD
+ * format can be read by the read_ucd() function.
+ *
+ * <li> <tt>DB mesh</tt> format: this format is used by the @p BAMG mesh
+ * generator (see http://www-rocq.inria.fr/gamma/cdrom/www/bamg/eng.htm. The
+ * documentation of the format in the @p BAMG manual is very incomplete, so we
+ * don't actually parse many of the fields of the output since we don't know
+ * their meaning, but the data that is read is enough to build up the mesh as
+ * intended by the mesh generator. This format can be read by the
+ * read_dbmesh() function.
+ *
+ * <li> @p XDA format: this is a rather simple format used by the MGF code. We
+ * don't have an exact specification of the format, but the reader can read in
+ * several example files. If the reader does not grok your files, it should be
+ * fairly simple to extend it.
+ *
+ * <li> <tt>Gmsh 1.0 mesh</tt> format: this format is used by the @p GMSH mesh
+ * generator (see http://www.geuz.org/gmsh/). The documentation in the @p GMSH
+ * manual explains how to generate meshes compatible with the deal.II library
+ * (i.e. quads rather than triangles). In order to use this format, Gmsh has
+ * to output the file in the old format 1.0. This is done adding the line
+ * "Mesh.MshFileVersion = 1" to the input file.
+ *
+ * <li> <tt>Gmsh 2.0 mesh</tt> format: this is a variant of the above format.
+ * The read_msh() function automatically determines whether an input file is
+ * version 1 or version 2.
+ *
+ * <li> <tt>Tecplot</tt> format: this format is used by @p TECPLOT and often
+ * serves as a basis for data exchange between different applications. Note,
+ * that currently only the ASCII format is supported, binary data cannot be
+ * read.
+ *
+ * <li> <tt>UNV</tt> format: this format is generated by the Salome mesh
+ * generator, see http://www.salome-platform.org/ . The sections of the format
+ * that the GridIn::read_unv function supports are documented here:
+ * <ul>
+ * <li> section 2411: http://www.sdrl.uc.edu/universal-file-formats-for-modal-
+ * analysis-testing-1/file-format-storehouse/unv_2411.htm
+ * <li> section 2412: http://www.sdrl.uc.edu/universal-file-formats-for-modal-
+ * analysis-testing-1/file-format-storehouse/unv_2412.htm
+ * <li> section 2467: http://www.sdrl.uc.edu/universal-file-formats-for-modal-
+ * analysis-testing-1/file-format-storehouse/unv_2467.htm
+ * <li> all sections of this format, even if they may not be supported in our
+ * reader, can be found here: http://www.sdrl.uc.edu/universal-file-formats-
+ * for-modal-analysis-testing-1/file-format-storehouse/file-formats
+ * </ul>
+ * Note that Salome, let's say in 2D, can only make a quad mesh on an object
+ * that has exactly 4 edges (or 4 pieces of the boundary). That means, that if
+ * you have a more complicated object and would like to mesh it with quads,
+ * you will need to decompose the object into >= 2 separate objects. Then 1)
+ * each of these separate objects is meshed, 2) the appropriate groups of
+ * cells and/or faces associated with each of these separate objects are
+ * created, 3) a compound mesh is built up, and 4) all numbers that might be
+ * associated with some of the internal faces of this compound mesh are
+ * removed.
+ *
+ * <li> <tt>VTK</tt> format: VTK Unstructured Grid Legacy file reader
+ * generator. The reader can handle only Unstructured Grid format of data at
+ * present for 2D & 3D geometries. The documentation for the general legacy
+ * vtk file, including Unstructured Grid format can be found here:
+ * http://www.cacr.caltech.edu/~slombey/asci/vtk/vtk_formats.simple.html
+ *
+ * The VTK format requires the vertices to be in following ordering: in 2d
+ * @verbatim
+ *      3-----2
+ *      |     |
+ *      |     |
+ *      |     |
+ *      0-----1
+ * @endverbatim
+ * and in 3d
+ * @verbatim
+ *         7-------6        7-------6
+ *        /|       |       /       /|
+ *       / |       |      /       / |
+ *      /  |       |     /       /  |
+ *     4   |       |    4-------5   |
+ *     |   3-------2    |       |   2
+ *     |  /       /     |       |  /
+ *     | /       /      |       | /
+ *     |/       /       |       |/
+ *     0-------1        0-------1
+ * @endverbatim
+ *
+ *
+ * <li> <tt>Cubit</tt> format: deal.II doesn't directly support importing from
+ * Cubit at this time. However, Cubit can export in UCD format using a simple
+ * plug-in, and the resulting UCD file can then be read by this class. The
+ * plug-in script can be found on the deal.II wiki page,
+ * http://code.google.com/p/dealii/wiki/MeshInputAndOutput .
+ *
+ * Alternatively, Cubit can generate ABAQUS files that can be read in via the
+ * read_abaqus() function. This may be a better option for geometries with
+ * complex boundary condition surfaces and multiple materials
+ *  - information which is currently not easily obtained through
+ * Cubit's python interface.
+ *
+ * </ul>
+ *
+ * <h3>Structure of input grid data. The GridReordering class</h3>
+ *
+ * It is your duty to use a correct numbering of vertices in the cell list,
+ * i.e. for lines in 1d, you have to first give the vertex with the lower
+ * coordinate value, then that with the higher coordinate value. For
+ * quadrilaterals in two dimensions, the vertex indices in the @p quad list
+ * have to be such that the vertices are numbered in counter-clockwise sense.
+ *
+ * In two dimensions, another difficulty occurs, which has to do with the
+ * sense of a quadrilateral. A quad consists of four lines which have a
+ * direction, which is by definition as follows:
+ * @verbatim
+ *   3-->--2
+ *   |     |
+ *   ^     ^
+ *   |     |
+ *   0-->--1
+ * @endverbatim
+ * Now, two adjacent cells must have a vertex numbering such that the
+ * direction of the common side is the same. For example, the following two
+ * quads
+ * @verbatim
+ *   3---4---5
+ *   |   |   |
+ *   0---1---2
+ * @endverbatim
+ * may be characterised by the vertex numbers <tt>(0 1 4 3)</tt> and <tt>(1 2
+ * 5 4)</tt>, since the middle line would get the direction <tt>1->4</tt> when
+ * viewed from both cells.  The numbering <tt>(0 1 4 3)</tt> and <tt>(5 4 1
+ * 2)</tt> would not be allowed, since the left quad would give the common
+ * line the direction <tt>1->4</tt>, while the right one would want to use
+ * <tt>4->1</tt>, leading to an ambiguity. The Triangulation object is capable
+ * of detecting this special case, which can be eliminated by rotating the
+ * indices of the right quad by two. However, it would not know what to do if
+ * you gave the vertex indices <tt>(4 1 2 5)</tt>, since then it would have to
+ * rotate by one element or three, the decision which to take is not yet
+ * implemented.
+ *
+ * There are more ambiguous cases, where the triangulation may not know what
+ * to do at all without the use of sophisticated algorithms. Furthermore,
+ * similar problems exist in three space dimensions, where faces and lines
+ * have orientations that need to be taken care of.
+ *
+ * For this reason, the <tt>read_*</tt> functions of this class that read in
+ * grids in various input formats call the GridReordering class to bring the
+ * order of vertices that define the cells into an ordering that satisfies the
+ * requirements of the Triangulation class. Be sure to read the documentation
+ * of that class if you experience unexpected problems when reading grids
+ * through this class.
+ *
+ *
+ * <h3>Dealing with distorted mesh cells</h3>
+ *
+ * For each of the mesh reading functions, the last call is always to
+ * Triangulation::create_triangulation(). That function checks whether all the
+ * cells it creates as part of the coarse mesh are distorted or not (where
+ * distortion here means that the Jacobian of the mapping from the reference
+ * cell to the real cell has a non-positive determinant, i.e. the cell is
+ * pinched or twisted; see the entry on
+ * @ref GlossDistorted "distorted cells"
+ * in the glossary). If it finds any such cells, it throws an exception. This
+ * exception is not caught in the grid reader functions of the current class,
+ * and so will propagate through to the function that called it. There, you
+ * can catch and ignore the exception if you are certain that there is no harm
+ * in dealing with such cells. If you were not aware that your mesh had such
+ * cells, your results will likely be of dubious quality at best if you ignore
+ * the exception.
+ *
+ *
+ * @ingroup grid
+ * @ingroup input
+ * @author Wolfgang Bangerth, 1998, 2000, Luca Heltai, 2004, 2007, Jean-Paul
+ * Pelteret 2015, Timo Heister 2015,  Krzysztof Bzowski, 2015
+ */
+
+template <int dim, int spacedim=dim>
+class GridIn
+{
+public:
+  /**
+   * List of possible mesh input formats. These values are used when calling
+   * the function read() in order to determine the actual reader to be called.
+   */
+  enum Format
+  {
+    /// Use GridIn::default_format stored in this object
+    Default,
+    /// Use read_unv()
+    unv,
+    /// Use read_ucd()
+    ucd,
+    /// Use read_abaqus()
+    abaqus,
+    /// Use read_dbmesh()
+    dbmesh,
+    /// Use read_xda()
+    xda,
+    /// Use read_msh()
+    msh,
+    /// Use read_netcdf()
+    netcdf,
+    /// Use read_tecplot()
+    tecplot,
+    /// Use read_vtk()
+    vtk
+  };
+
+  /**
+   * Constructor.
+   */
+  GridIn ();
+
+  /**
+   * Attach this triangulation to be fed with the grid data.
+   */
+  void attach_triangulation (Triangulation<dim,spacedim> &tria);
+
+  /**
+   * Read from the given stream. If no format is given,
+   * GridIn::Format::Default is used.
+   */
+  void read (std::istream &in, Format format=Default);
+
+  /**
+   * Open the file given by the string and call the previous function read().
+   * This function uses the PathSearch mechanism to find files. The file class
+   * used is <code>MESH</code>.
+   */
+  void read (const std::string &in, Format format=Default);
+
+  /**
+   * Read grid data from an vtk file. Numerical data is ignored.
+   *
+   * @author Mayank Sabharwal, Andreas Putz, 2013
+   */
+  void read_vtk(std::istream &in);
+
+  /**
+   * Read grid data from an unv file as generated by the Salome mesh
+   * generator. Numerical data is ignored.
+   *
+   * Note the comments on generating this file format in the general
+   * documentation of this class.
+   */
+  void read_unv(std::istream &in);
+
+  /**
+   * Read grid data from an ucd file. Numerical data is ignored.
+   */
+  void read_ucd (std::istream &in);
+
+  /**
+   * Read grid data from an Abaqus file. Numerical and constitutive data is
+   * ignored.
+   *
+   * @note The current implementation of this mesh reader is suboptimal, and
+   * may therefore be slow for large meshes.
+   *
+   * @note Usage tips for Cubit:
+   * - Multiple material-id's can be defined in the mesh.
+   * This is done by specifying blocksets in the pre-processor.
+   * - Arbitrary surface boundaries can be defined in the mesh.
+   * This is done by specifying sidesets in the pre-processor. In particular,
+   * boundaries are not confined to just surfaces (in 3d) individual element
+   * faces can be added to the sideset as well. This is useful when a boundary
+   * condition is to be applied on a complex shape boundary that is difficult
+   * to define using "surfaces" alone. Similar can be done in 2d.
+   *
+   * @note Compatibility information for this file format is listed below.
+   * - Files generated in Abaqus CAE 6.12 have been verified to be
+   * correctly imported, but older (or newer) versions of Abaqus may also
+   * generate valid input decks.
+   * - Files generated using Cubit 11.x, 12.x and 13.x are valid, but only
+   * when using a specific set of export steps. These are as follows:
+   *     - Go to "Analysis setup mode" by clicking on the disc icon in the
+   * toolbar on the right.
+   *     - Select "Export Mesh" under "Operation" by clicking on the
+   * necessary icon in the toolbar on the right.
+   *     - Select an output file. In Cubit version 11.0 and 12.0 it might be
+   * necessary to click on the browse button and type it in the dialogue that
+   * pops up.
+   *     - Select the dimension to output in.
+   *     - Tick the overwrite box.
+   *     - If using Cubit v12.0 onwards, uncheck the box "Export using Cubit
+   * ID's". An invalid file will encounter errors if this box is left checked.
+   *     - Click apply.
+   */
+  void read_abaqus (std::istream &in);
+
+  /**
+   * Read grid data from a file containing data in the DB mesh format.
+   */
+  void read_dbmesh (std::istream &in);
+
+  /**
+   * Read grid data from a file containing data in the XDA format.
+   */
+  void read_xda (std::istream &in);
+
+  /**
+   * Read grid data from an msh file, either version 1 or version 2 of that
+   * file format. The GMSH formats are documented at
+   * http://www.geuz.org/gmsh/.
+   *
+   * @note The input function of deal.II does not distinguish between newline
+   * and other whitespace. Therefore, deal.II will be able to read files in a
+   * slightly more general format than Gmsh.
+   */
+  void read_msh (std::istream &in);
+
+  /**
+   * Read grid data from a NetCDF file. The only data format currently
+   * supported is the <tt>TAU grid format</tt>.
+   *
+   * This function requires the library to be linked with the NetCDF library.
+   */
+  void read_netcdf (const std::string &filename);
+
+  /**
+   * Read grid data from a file containing tecplot ASCII data. This also works
+   * in the absence of any tecplot installation.
+   */
+  void read_tecplot (std::istream &in);
+
+  /**
+   * Returns the standard suffix for a file in this format.
+   */
+  static std::string default_suffix (const Format format);
+
+  /**
+   * Return the enum Format for the format name.
+   */
+  static Format parse_format (const std::string &format_name);
+
+  /**
+   * Return a list of implemented input formats. The different names are
+   * separated by vertical bar signs (<tt>`|'</tt>) as used by the
+   * ParameterHandler classes.
+   */
+  static std::string get_format_names ();
+
+  /**
+   * Exception
+   */
+  DeclException1(ExcUnknownSectionType,
+                 int,
+                 << "The section type <" << arg1 << "> in an UNV "
+                 << "input file is not implemented.");
+
+  /**
+   * Exception
+   */
+  DeclException1(ExcUnknownElementType,
+                 int,
+                 << "The element type <" << arg1 << "> in an UNV "
+                 << "input file is not implemented.");
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcUnknownIdentifier,
+                  std::string,
+                  << "The identifier <" << arg1 << "> as name of a "
+                  << "part in an UCD input file is unknown or the "
+                  << "respective input routine is not implemented."
+                  << "(Maybe the space dimension of triangulation and "
+                  << "input file do not match?");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcNoTriangulationSelected);
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidVertexIndex,
+                  int, int,
+                  << "While creating cell " << arg1
+                  << ", you are referencing a vertex with index " << arg2
+                  << " but no vertex with this index has been described in the input file.");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidDBMeshFormat);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidDBMESHInput,
+                  std::string,
+                  << "The string <" << arg1 << "> is not recognized at the present"
+                  << " position of a DB Mesh file.");
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcDBMESHWrongDimension,
+                  int,
+                  << "The specified dimension " << arg1
+                  << " is not the same as that of the triangulation to be created.");
+
+  DeclException1 (ExcInvalidGMSHInput,
+                  std::string,
+                  << "The string <" << arg1 << "> is not recognized at the present"
+                  << " position of a Gmsh Mesh file.");
+
+  DeclException1 (ExcGmshUnsupportedGeometry,
+                  int,
+                  << "The Element Identifier <" << arg1 << "> is not "
+                  << "supported in the deal.II library when "
+                  << "reading meshes in " << dim << " dimensions.\n"
+                  << "Supported elements are: \n"
+                  << "ELM-TYPE\n"
+                  << "1 Line (2 nodes, 1 edge).\n"
+                  << "3 Quadrilateral (4 nodes, 4 edges).\n"
+                  << "5 Hexahedron (8 nodes, 12 edges, 6 faces) when in 3d.\n"
+                  << "15 Point (1 node, ignored when read)");
+
+
+  DeclException0 (ExcGmshNoCellInformation);
+protected:
+  /**
+   * Store address of the triangulation to be fed with the data read in.
+   */
+  SmartPointer<Triangulation<dim,spacedim>,GridIn<dim,spacedim> > tria;
+
+  /**
+   * This function can write the raw cell data objects created by the
+   * <tt>read_*</tt> functions in Gnuplot format to a stream. This is
+   * sometimes handy if one would like to see what actually was created, if it
+   * is known that the data is not correct in some way, but the Triangulation
+   * class refuses to generate a triangulation because of these errors. In
+   * particular, the output of this class writes out the cell numbers along
+   * with the direction of the faces of each cell. In particular the latter
+   * information is needed to verify whether the cell data objects follow the
+   * requirements of the ordering of cells and their faces, i.e. that all
+   * faces need to have unique directions and specified orientations with
+   * respect to neighboring cells (see the documentations to this class and
+   * the GridReordering class).
+   *
+   * The output of this function consists of vectors for each line bounding
+   * the cells indicating the direction it has with respect to the orientation
+   * of this cell, and the cell number. The whole output is in a form such
+   * that it can be read in by Gnuplot and generate the full plot without
+   * further ado by the user.
+   */
+  static void debug_output_grid (const std::vector<CellData<dim> > &cells,
+                                 const std::vector<Point<spacedim> > &vertices,
+                                 std::ostream &out);
+
+private:
+
+  /**
+   * Skip empty lines in the input stream, i.e. lines that contain either
+   * nothing or only whitespace.
+   */
+  static void skip_empty_lines (std::istream &in);
+
+  /**
+   * Skip lines of comment that start with the indicated character (e.g.
+   * <tt>#</tt>) following the point where the given input stream presently
+   * is. After the call to this function, the stream is at the start of the
+   * first line after the comment lines, or at the same position as before if
+   * there were no lines of comments.
+   */
+  static void skip_comment_lines (std::istream    &in,
+                                  const char  comment_start);
+
+  /**
+   * This function does the nasty work (due to very lax conventions and
+   * different versions of the tecplot format) of extracting the important
+   * parameters from a tecplot header, contained in the string @p header. The
+   * other variables are output variables, their value has no influence on the
+   * function execution..
+   */
+  static void parse_tecplot_header(std::string   &header,
+                                   std::vector<unsigned int> &tecplot2deal,
+                                   unsigned int  &n_vars,
+                                   unsigned int  &n_vertices,
+                                   unsigned int  &n_cells,
+                                   std::vector<unsigned int> &IJK,
+                                   bool          &structured,
+                                   bool          &blocked);
+
+  /**
+   * Input format used by read() if no format is given.
+   */
+  Format default_format;
+};
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <>
+void
+GridIn<2>::debug_output_grid (const std::vector<CellData<2> > &cells,
+                              const std::vector<Point<2> >    &vertices,
+                              std::ostream                    &out);
+
+
+template <>
+void
+GridIn<2,3>::debug_output_grid (const std::vector<CellData<2> > &cells,
+                                const std::vector<Point<3> >    &vertices,
+                                std::ostream                    &out);
+template <>
+void
+GridIn<3>::debug_output_grid (const std::vector<CellData<3> > &cells,
+                              const std::vector<Point<3> >    &vertices,
+                              std::ostream                    &out);
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/grid_out.h b/include/deal.II/grid/grid_out.h
new file mode 100644
index 0000000..8a378db
--- /dev/null
+++ b/include/deal.II/grid/grid_out.h
@@ -0,0 +1,1629 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__grid_out_h
+#define dealii__grid_out_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/data_out_base.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/fe/mapping.h>
+
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+class ParameterHandler;
+template <int dim, int spacedim> class Triangulation;
+template <int dim, int spacedim> class Mapping;
+
+
+/**
+ * Within this namespace, we define several structures that are used to
+ * describe flags that can be given to grid output routines to modify the
+ * default outfit of the grids written into a file. See the different
+ * subclasses and the documentation of the GridOut class for more details.
+ *
+ * @ingroup output
+ */
+namespace GridOutFlags
+{
+  /**
+   * Flags for grid output in OpenDX format.
+   *
+   * @ingroup output
+   */
+  struct DX
+  {
+    /**
+     * Write cells.
+     */
+    bool write_cells;
+
+    /**
+     * Write faces.
+     */
+    bool write_faces;
+
+    /**
+     * Write field with diameters.
+     */
+    bool write_diameter;
+
+    /**
+     * Write field with area/volume.
+     */
+    bool write_measure;
+
+    /**
+     * Write all faces, including interior faces. If <tt>false</tt>, only
+     * boundary faces are written.
+     */
+    bool write_all_faces;
+
+    /**
+     * Constructor.
+     */
+    DX (const bool write_cells = true,
+        const bool write_faces = false,
+        const bool write_diameter = false,
+        const bool write_measure = false,
+        const bool write_all_faces = true);
+
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+  /**
+   * Flags describing the details of output in MSH format.
+   *
+   * @ingroup output
+   */
+  struct Msh
+  {
+    /**
+     * When writing a mesh, write boundary faces explicitly if their boundary
+     * indicator is not the default boundary indicator, which is zero.  This
+     * is necessary if you later want to re-read the grid and want to get the
+     * same boundary indicators for the different parts of the boundary of the
+     * triangulation.
+     *
+     * It is not necessary if you only want to write the triangulation to view
+     * or print it.
+     *
+     * Default: @p false.
+     */
+    bool write_faces;
+    /**
+     * When writing a mesh, write boundary lines explicitly if their boundary
+     * indicator is not the default boundary indicator, which is zero.  This
+     * is necessary if you later want to re-read the grid and want to get the
+     * same boundary indicators for the different parts of the boundary of the
+     * triangulation.
+     *
+     * It is not necessary if you only want to write the triangulation to view
+     * or print it.
+     *
+     * This is used only if <tt>dim==3</tt>, and ignored in all other cases.
+     *
+     * Default: @p false.
+     */
+    bool write_lines;
+
+    /**
+     * Constructor.
+     */
+    Msh (const bool write_faces    = false,
+         const bool write_lines    = false);
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+
+  /**
+   * Flags describing the details of output in UCD format.
+   *
+   * @ingroup output
+   */
+  struct Ucd
+  {
+    /**
+     * Write a comment at the beginning of the file stating the date of
+     * creation and some other data.  While this is supported by the UCD
+     * format (and the AVS program), some other programs get confused by this,
+     * so the default is to not write a preamble. However, a preamble can be
+     * written using this flag.
+     *
+     * Default: <code>false</code>.
+     */
+    bool write_preamble;
+
+    /**
+     * When writing a mesh, write boundary faces explicitly if their boundary
+     * indicator is not the default boundary indicator, which is zero.  This
+     * is necessary if you later want to re-read the grid and want to get the
+     * same boundary indicators for the different parts of the boundary of the
+     * triangulation.
+     *
+     * It is not necessary if you only want to write the triangulation to view
+     * or print it.
+     *
+     * Default: @p false.
+     */
+    bool write_faces;
+
+    /**
+     * When writing a mesh, write boundary lines explicitly if their boundary
+     * indicator is not the default boundary indicator, which is zero.  This
+     * is necessary if you later want to re-read the grid and want to get the
+     * same boundary indicators for the different parts of the boundary of the
+     * triangulation.
+     *
+     * It is not necessary if you only want to write the triangulation to view
+     * or print it.
+     *
+     * This directive is ignored if <tt>dim!=3</tt>.
+     *
+     * Default: @p false.
+     */
+    bool write_lines;
+
+    /**
+     * Constructor.
+     */
+    Ucd (const bool write_preamble = false,
+         const bool write_faces    = false,
+         const bool write_lines    = false);
+
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+
+  /**
+   * Flags describing the details of output in GNUPLOT format.
+   *
+   * @ingroup output
+   */
+  struct Gnuplot
+  {
+    /**
+     * Write the number of each cell into the output file before starting with
+     * the lines it is composed of, as a comment. This might be useful if you
+     * want to find out details about the grid, for example the position of
+     * cells of which you know the number. It enlarges the size of the output
+     * significantly, however.
+     *
+     * Default: @p false.
+     */
+    bool write_cell_numbers;
+
+    /**
+     * Based on the vertices of the face and #n_boundary_face_points
+     * additional points a tensor product mesh (transformed to the real space)
+     * of (#n_boundary_face_points+2)<sup>dim-1</sup> points is plotted on
+     * each boundary face.
+     */
+    unsigned int n_boundary_face_points;
+
+    /**
+     * Flag. If true also inner cells are plotted with curved boundaries. This
+     * is useful when for e.g.  MappingQEulerian with
+     * #n_boundary_face_points>.
+     */
+    bool curved_inner_cells;
+
+    /**
+     * Constructor.
+     */
+    Gnuplot (const bool         write_cell_number = false,
+             const unsigned int n_boundary_face_points = 2,
+             const bool         curved_inner_cells = false);
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+  /**
+   * Flags describing the details of output for encapsulated postscript.  In
+   * this structure, the flags common to all dimensions are listed. Flags
+   * which are specific to one space dimension only are listed in derived
+   * classes.
+   *
+   * By default, the size of the picture is scaled such that the width equals
+   * 300 units.
+   *
+   * @ingroup output
+   */
+  struct EpsFlagsBase
+  {
+    /**
+     * Enum denoting the possibilities whether the scaling should be done such
+     * that the given @p size equals the width or the height of the resulting
+     * picture.
+     */
+    enum SizeType
+    {
+      width, height
+    };
+
+    /**
+     * See above. Default is @p width.
+     */
+    SizeType size_type;
+
+    /**
+     * Width or height of the output as given in postscript units This usually
+     * is given by the strange unit 1/72 inch. Whether this is height or width
+     * is specified by the flag @p size_type.
+     *
+     * Default is 300.
+     */
+    unsigned int size;
+
+    /**
+     * Width of a line in postscript units. Default is 0.5.
+     */
+    double line_width;
+
+    /**
+     * Should lines with a set @p user_flag be drawn in a different color
+     * (red)?  See
+     * @ref GlossUserFlags
+     * for information about user flags.
+     */
+    bool color_lines_on_user_flag;
+
+    /**
+     * The number of points on a boundary face that are plotted in addition to
+     * the vertices of the face.
+     *
+     * This number is only used if the mapping used is not simply the standard
+     * $Q_1$ mapping (i.e., an object of kind MappingQGeneric(1)) that may
+     * describe edges of cells as curved and that will then be approximated
+     * using line segments with a number of intermediate points as described
+     * by the current variable.
+     */
+    unsigned int n_boundary_face_points;
+
+    /**
+     * Should lines be colored according to their refinement level? This
+     * overrides color_lines_on_user_flag for all levels except level 0.
+     * Colors are: level 0: black, other levels: rainbow scale from blue to
+     * red.
+     */
+    bool color_lines_level;
+
+    /**
+     * Constructor.
+     */
+    EpsFlagsBase (const SizeType     size_type  = width,
+                  const unsigned int size       = 300,
+                  const double       line_width = 0.5,
+                  const bool color_lines_on_user_flag = false,
+                  const unsigned int n_boundary_face_points = 2,
+                  const bool color_lines_level = false);
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+
+  /**
+   * Flags describing the details of output for encapsulated postscript for
+   * all dimensions not explicitly specialized below. Some flags that are
+   * common to all dimensions are listed in the base class.
+   *
+   * This class does not actually exist, we only here declare the general
+   * template and declare explicit specializations below.
+   *
+   * @ingroup output
+   */
+  template <int dim>
+  struct Eps
+  {};
+
+  /**
+   * Flags specific to the output of grids in one space dimensions.
+   *
+   * @ingroup output
+   */
+  template <>
+  struct Eps<1> : public EpsFlagsBase
+  {
+    /**
+     * Constructor.
+     */
+    Eps (const SizeType     size_type  = width,
+         const unsigned int size       = 300,
+         const double       line_width = 0.5,
+         const bool         color_lines_on_user_flag = false,
+         const unsigned int n_boundary_face_points = 2);
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+
+  /**
+   * Flags specific to the output of grids in two space dimensions.
+   *
+   * @ingroup output
+   */
+  template <>
+  struct Eps<2> : public EpsFlagsBase
+  {
+    /**
+     * If this flag is set, then we place the number of the cell into the
+     * middle of each cell. The default value is to not do this.
+     *
+     * The format of the cell number written is <tt>level.index</tt>, or
+     * simply @p index, depending on the value of the following flag.
+     */
+    bool write_cell_numbers;
+    /**
+     * If the cell numbers shall be written, using the above flag, then the
+     * value of this flag determines whether the format shall be
+     * <tt>level.index</tt>, or simply @p index. If @p true, the first format
+     * is taken. Default is @p true.
+     *
+     * The flag has obviously no effect if @p write_cell_numbers is @p false.
+     */
+    bool write_cell_number_level;
+
+    /**
+     * Vertex numbers can be written onto the vertices. This is controlled by
+     * the following flag. Default is @p false.
+     */
+    bool write_vertex_numbers;
+
+    /**
+     * Constructor.
+     */
+    Eps (const SizeType     size_type  = width,
+         const unsigned int size       = 300,
+         const double       line_width = 0.5,
+         const bool         color_lines_on_user_flag = false,
+         const unsigned int n_boundary_face_points = 2,
+         const bool         write_cell_numbers = false,
+         const bool         write_cell_number_level = true,
+         const bool         write_vertex_numbers = false,
+         const bool         color_lines_level = false);
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+  /**
+   * Flags specific to the output of grids in three space dimensions.
+   *
+   * @ingroup output
+   */
+  template <>
+  struct Eps<3> : public EpsFlagsBase
+  {
+    /**
+     * Angle of the line origin-viewer against the z-axis in degrees.
+     *
+     * Default is the Gnuplot-default of 60.
+     */
+    double azimut_angle;
+
+    /**
+     * Angle by which the viewers position projected onto the x-y-plane is
+     * rotated around the z-axis, in positive sense when viewed from above.
+     * The unit are degrees, and zero equals a position above or below the
+     * negative y-axis.
+     *
+     * Default is the Gnuplot-default of 30.
+     */
+    double turn_angle;
+
+    /**
+     * Constructor.
+     */
+    Eps (const SizeType     size_type  = width,
+         const unsigned int size       = 300,
+         const double       line_width = 0.5,
+         const bool         color_lines_on_user_flag = false,
+         const unsigned int n_boundary_face_points = 2,
+         const double       azimut_angle    = 60,
+         const double       turn_angle      = 30);
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+  /**
+   * Flags for XFig output.
+   *
+   * @ingroup output
+   */
+  struct XFig
+  {
+    /**
+     * Draw boundary lines. Default is true.
+     */
+    bool draw_boundary;
+
+    /**
+     * An enum used for deciding which field is used for coloring the cells.
+     */
+    enum Coloring
+    {
+      /// Convert the material id into the cell color
+      material_id,
+      /// Convert the level into the cell color
+      level_number,
+      /// Convert the global subdomain id into the cell color
+      subdomain_id,
+      /// Convert the level subdomain id into the cell color
+      level_subdomain_id
+    } color_by;
+
+    /**
+     * Code level to depth. Default is true. If false, color depends on
+     * material or boundary id.
+     *
+     * Depth of the object is 900-level, if this value is true.
+     */
+    bool level_depth;
+
+    /**
+     * Additional points for curved boundaries. Default is none.
+     */
+    unsigned int n_boundary_face_points;
+
+    /**
+     * Scaling of graph. The default is a unit length of one inch.
+     */
+    Point<2> scaling;
+
+    /**
+     * Offset of the graph. Before scaling, the coordinates are shifted by
+     * this value. Default is zero in each direction.
+     */
+    Point<2> offset;
+
+    /**
+     * Style for filling cells. Default is solid fill (20). This value is
+     * forwarded unchanged into the corresponding field <tt>fill_style</tt> of
+     * the polyline object of XFig.
+     */
+    int fill_style;
+
+    /**
+     * Style for drawing border lines of polygons. Defaults to solid (0) and
+     * is forwarded to XFig.
+     */
+    int line_style;
+
+    /**
+     * Thickness of border lines of polygons. Default is 1.
+     *
+     * Set this to zero to avoid border lines for very fine meshes.
+     */
+    int line_thickness;
+
+    /**
+     * Style for drawing lines at the boundary. Defaults to solid (0).
+     */
+    int boundary_style;
+
+    /**
+     * Thickness of boundary lines. Default is 3.
+     */
+    int boundary_thickness;
+
+    /**
+     * Constructor.
+     */
+    XFig();
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+
+  /**
+   * Flags controlling SVG output.
+   *
+   * @ingroup output
+   */
+  struct Svg
+  {
+    /// Height of the plot in SVG units, computed from width if zero. Defaults to 1000
+    unsigned int height;
+    /// The width of the plot. Computed automatically from height if zero (default)
+    unsigned int width;
+    /// Thickness of the lines between cells
+    unsigned int line_thickness;
+    /// Thickness of lines at the boundary
+    unsigned int boundary_line_thickness;
+
+    /// Margin around the plotted area
+    bool margin;
+
+    /**
+     * Background style.
+     */
+    enum Background
+    {
+      /// Use transparent value of SVG
+      transparent,
+      /// Use white background
+      white,
+      /// Use a gradient from white (top) to steelblue (bottom), and add date and time plus a deal.II logo. Automatically draws a margin.
+      dealii
+    };
+
+    Background background;
+
+    // View angles for the perspective view of the grid; Default is 0, 0 (top view).
+    /**
+     * The azimuth angle measured from ??? in degrees. Default is 0.
+     */
+    int azimuth_angle;
+    /**
+     * The angle from vertically above the xy-plane. Default is 0.
+     */
+    int polar_angle;
+
+    /**
+     * Cell coloring.
+     */
+    enum Coloring
+    {
+      /// No cell coloring
+      none,
+      /// Convert the material id into the cell color (default)
+      material_id,
+      /// Convert the level number into the cell color
+      level_number,
+      /// Convert the subdomain id into the cell color
+      subdomain_id,
+      /// Convert the level subdomain id into the cell color
+      level_subdomain_id
+    };
+
+    Coloring coloring;
+
+    /// Interpret the level number of the cells as altitude over the x-y-plane (useful in the perspective view).
+    bool convert_level_number_to_height;
+
+    /// The factor determining the vertical distance between levels (default = 0.3)
+    float level_height_factor;
+
+    /// Scaling of the font for cell annotations. Defaults to 1.
+    float cell_font_scaling;
+    /// Write level number into each cell. Defaults to true
+    bool label_level_number;
+    /// Write cell index into each cell. Defaults to true
+    bool label_cell_index;
+    /// Write material id of each cell. Defaults to false
+    bool label_material_id;
+    /// Write subdomain id of each cell. Defaults to false
+    bool label_subdomain_id;
+    /// Write level subdomain id of each cell. Defaults to false
+    bool label_level_subdomain_id;
+
+    /// Draw a colorbar next to the plotted grid with respect to the chosen coloring of the cells
+    bool draw_colorbar;
+    /// Draw a legend next to the plotted grid, explaining the label of the cells
+    bool draw_legend;
+
+    /**
+     * Constructor.
+     */
+    Svg(const unsigned int line_thickness = 2,
+        const unsigned int boundary_line_thickness = 4,
+        bool margin = true,
+        const Background background = white,
+        const int azimuth_angle = 0,
+        const int polar_angle = 0,
+        const Coloring coloring = level_number,
+        const bool convert_level_number_to_height = false,
+        const bool label_level_number = true,
+        const bool label_cell_index = true,
+        const bool label_material_id = false,
+        const bool label_subdomain_id = false,
+        const bool draw_colorbar = true,
+        const bool draw_legend = true);
+  };
+
+  /**
+   * Flags for grid output in MathGL format.
+   *
+   * @ingroup output
+   */
+  struct MathGL
+  {
+    /**
+     * Constructor.
+     */
+    MathGL ();
+
+    /**
+     * Draw a bounding box around the graph.
+     */
+    bool draw_bounding_box;
+
+    /**
+     * Declare parameters in ParameterHandler.
+     */
+    static void declare_parameters (ParameterHandler &param);
+
+    /**
+     * Parse parameters of ParameterHandler.
+     */
+    void parse_parameters (ParameterHandler &param);
+  };
+
+
+  /**
+   * Flags for grid output in Vtk format. These flags are the same as those
+   * declared in DataOutBase::VtkFlags.
+   *
+   * @ingroup output
+   */
+  struct Vtk : public DataOutBase::VtkFlags
+  {};
+
+
+  /**
+   * Flags for grid output in Vtu format. These flags are the same as those
+   * declared in DataOutBase::VtuFlags.
+   *
+   * @ingroup output
+   */
+  struct Vtu : public DataOutBase::VtkFlags
+  {};
+}
+
+
+
+/**
+ * This class provides a means to output a triangulation to a file in
+ * different formats. See the enum GridOut::OutputFormat for a list of formats
+ * and the corresponding output function names.
+ *
+ * Usage is simple: either you use the direct form
+ * @code
+ *   ofstream output_file("some_filename");
+ *   GridOut().write_gnuplot (tria, output_file);
+ * @endcode
+ * if you know which format you want to have, or if you want the format to be
+ * a runtime parameter, you can write
+ * @code
+ *   GridOut::OutputFormat grid_format =
+ *                   GridOut::parse_output_format(get_format_name_from_somewhere());
+ *   ofstream output_file("some_filename" + GridOut::default_suffix(output_format));
+ *   GridOut().write (tria, output_file, output_format);
+ * @endcode
+ * The function <tt>get_output_format_names()</tt> provides a list of possible
+ * names of output formats in a string that is understandable by the
+ * ParameterHandler class.
+ *
+ * Note that here, we have created an unnamed object of type GridOut and
+ * called one of its <tt>write_*</tt> functions. This looks like as if the
+ * respective function could really be made @p static. This was not done in
+ * order to allow for parameters to be passed to the different output
+ * functions in a way compatible with the scheme of allowing the right output
+ * format to be selected at run-time through the generic @p write function.
+ *
+ * In order to explain this, consider each function had one or more additional
+ * parameters giving the details of output, for example position of the
+ * spectator for 3d meshed, line thicknesses, etc. While this would allow each
+ * output function any flexibility it needs, it would not allow us to use the
+ * generic function @p write which is given a parameter determining the output
+ * format, since it is impractical to give it a list of parameters for each
+ * and every output format supported which it may then pass on to the
+ * respective output function.
+ *
+ * Rather, we have chosen to let each object of this class GridOut have a set
+ * of parameters for each supported output format. These are collected in
+ * structures GridOutFlags::Eps(), GridOutFlags::Gnuplot(), etc declared in
+ * the GridOutFlags namespace, and you can set your preferred flags like this:
+ * @code
+ *   GridOut grid_out;
+ *   GridOutFlags::Ucd ucd_flags;
+ *   ...    // set some fields in ucd_flags
+ *   grid_out.set_flags (ucd_flags);
+ *   ...
+ *   ...    // write some file with data_out
+ * @endcode
+ * The respective output function then use the so-set flags. By default, they
+ * are set to reasonable values as described above and in the documentation of
+ * the different flags structures. Resetting the flags can be done by calling
+ * <tt>grid_out.set_flags (GridOutFlags::Ucd());</tt>, since the default
+ * constructor of each of the flags structures sets the parameters to their
+ * initial values.
+ *
+ * The advantage of this approach is that it is possible to change the flags
+ * of one or more output formats according to your needs and later use the
+ * generic @p write function; the actual output function then called will use
+ * the flags as set before.
+ *
+ * Note that some of the structures describing the flags of the different
+ * output formats are empty since the respective format does not support any
+ * flags. The structure and the @p set_flags function are provided anyway.
+ * Note also that some of the structures may differ between the dimensions
+ * supported by this class; they then have a template parameter, as usual.
+ *
+ * @ingroup grid
+ * @ingroup output
+ * @author Wolfgang Bangerth, Guido Kanschat, Luca Heltai, Stefan Nauber,
+ * Christian Wülker
+ * @date 1999 - 2013
+ */
+class GridOut
+{
+public:
+  /**
+   * Declaration of a name for each of the different output formats. These are
+   * used by the generic output function write() to determine the actual
+   * output format.
+   */
+  enum OutputFormat
+  {
+    /// Do nothing in write()
+    none,
+    /// write() calls write_dx()
+    dx,
+    /// write() calls write_gnuplot()
+    gnuplot,
+    /// write() calls write_eps()
+    eps,
+    /// write() calls write_ucd()
+    ucd,
+    /// write() calls write_xfig()
+    xfig,
+    /// write() calls write_msh()
+    msh,
+    /// write() calls write_svg()
+    svg,
+    /// write() calls write_mathgl()
+    mathgl,
+    /// write() calls write_vtk()
+    vtk,
+    /// write() calls write_vtu()
+    vtu
+  };
+
+  /**
+   * Constructor.
+   */
+  GridOut ();
+
+  /**
+   * Write triangulation in OpenDX format.
+   *
+   * Cells or faces are written together with their level and their material
+   * id or boundary indicator, resp.
+   *
+   * Not implemented for the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void write_dx (const Triangulation<dim,spacedim> &tria,
+                 std::ostream             &out) const;
+
+  /**
+   * Write the triangulation in the gnuplot format.
+   *
+   * In GNUPLOT format, each cell is written as a sequence of its confining
+   * lines. Apart from the coordinates of the line's end points, the level and
+   * the material of the cell are appended to each line of output. Therefore,
+   * if you let GNUPLOT draw a 2d grid as a 3d plot, you will see more refined
+   * cells being raised against cells with less refinement.  Also, if you draw
+   * a cut through a 3d grid, you can extrude the refinement level in the
+   * direction orthogonal to the cut plane. The same can be done with the
+   * material id, which is plotted after the level.
+   *
+   * A more useful application of this feature is the following: if you use
+   * the GNUPLOT command (for a 2d grid here)
+   * @verbatim
+   * splot [:][:][2.5:3.5] "grid_file.gnuplot" *
+   * @endverbatim
+   * then the whole x- and y-range will be plotted, i.e. the whole grid, but
+   * only those lines with a z-value between 2.5 and 3.5. Since the z-values
+   * were chosen to be the level to which a cell belongs, this results in a
+   * plot of those cells only that belong to level 3 in this example. This
+   * way, it is easy to produce plots of the different levels of grid.
+   *
+   * @p mapping is a pointer to a mapping used for the transformation of cells
+   * at the boundary. If zero, then use standard Q1 mapping.
+   *
+   * Names and values of additional flags controlling the output can be found
+   * in the documentation of the GridOutFlags::Gnuplot() class.
+   *
+   * Not implemented for the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void write_gnuplot (const Triangulation<dim,spacedim> &tria,
+                      std::ostream           &out,
+                      const Mapping<dim,spacedim> *mapping=0) const;
+
+  /**
+   * Write the triangulation in the msh format.
+   *
+   * Msh is the format used by Gmsh and it is described in the gmsh user's
+   * guide. Besides the usual output of the grid only, you can decide through
+   * additional flags (see below, and the documentation of the
+   * GridOutFlags::Msh() class) whether boundary faces with non-zero boundary
+   * indicator shall be written to the file explicitly. This is useful, if you
+   * want to re-read the grid later on, since <tt>deal.II</tt> sets the
+   * boundary indicator to zero by default; therefore, to obtain the same
+   * triangulation as before, you have to specify faces with differing
+   * boundary indicators explicitly, which is done by this flag.
+   *
+   * Names and values of further flags controlling the output can be found in
+   * the documentation of the GridOutFlags::Msh() class.
+   *
+   * Works also in the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void write_msh (const Triangulation<dim,spacedim> &tria,
+                  std::ostream             &out) const;
+
+  /**
+   * Write the triangulation in the ucd format.
+   *
+   * UCD (unstructured cell data) is the format used by AVS and some other
+   * programs. It is described in the AVS developer's guide. Besides the usual
+   * output of the grid only, you can decide through additional flags (see
+   * below, and the documentation of the GridOutFlags::Ucd() class) whether
+   * boundary faces with non-zero boundary indicator shall be written to the
+   * file explicitly. This is useful, if you want to re-read the grid later
+   * on, since <tt>deal.II</tt> sets the boundary indicator to zero by
+   * default; therefore, to obtain the same triangulation as before, you have
+   * to specify faces with differing boundary indicators explicitly, which is
+   * done by this flag.
+   *
+   * Names and values of further flags controlling the output can be found in
+   * the documentation of the GridOutFlags::Ucd() class.
+   *
+   * Works also for the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void write_ucd (const Triangulation<dim,spacedim> &tria,
+                  std::ostream             &out) const;
+
+  /**
+   * Write the triangulation in the encapsulated postscript format.
+   *
+   * In this format, each line of the triangulation is written separately. We
+   * scale the picture such that either x-values or y-values range between
+   * zero and a fixed size. The other axis is scaled by the same factor. Which
+   * axis is taken to compute the scale and the size of the box it shall fit
+   * into is determined by the output flags (see below, and the documentation
+   * of the GridOutFlags::Eps() class).
+   *
+   * The bounding box is close to the triangulation on all four sides, without
+   * an extra frame. The line width is chosen to be 0.5 by default, but can be
+   * changed. The line width is to be compared with the extension of the
+   * picture, of which the default is 300.
+   *
+   * The flag @p color_lines_on_user_flag allows to draw lines with the @p
+   * user_flag set to be drawn in red. The colors black and red are defined as
+   * @p b and @p r in the preamble of the output file and can be changed there
+   * according to need.
+   *
+   * @p mapping is a pointer to a mapping used for the transformation of cells
+   * at the boundary. If zero, then use standard Q1 mapping.
+   *
+   * Names and values of additional flags controlling the output can be found
+   * in the documentation of the GridOutFlags::Eps() class. Especially the
+   * viewpoint for three dimensional grids is of importance here.
+   *
+   * Not implemented for the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void write_eps (const Triangulation<dim, spacedim> &tria,
+                  std::ostream             &out,
+                  const Mapping<dim, spacedim>       *mapping=0) const;
+
+  /**
+   * Write two-dimensional XFig-file.
+   *
+   * This function writes all grid cells as polygons and optionally boundary
+   * lines. Several parameters can be adjusted by the XFigFlags control
+   * object.
+   *
+   * If levels are coded to depth, the complete grid hierarchy is plotted with
+   * fine cells before their parents. This way, levels can be switched on and
+   * off in xfig by selecting levels.
+   *
+   * Polygons are either at depth 900-level or at 900+ at p material_id,
+   * depending on the flag @p level_depth. Accordingly, boundary edges are at
+   * depth 800-level or at 800+ at p boundary_id. Therefore, boundary edges are
+   * always in front of cells.
+   *
+   * Not implemented for the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void write_xfig (const Triangulation<dim, spacedim> &tria,
+                   std::ostream              &out,
+                   const Mapping<dim, spacedim>        *mapping=0) const;
+
+  /**
+   * Write the triangulation in the SVG format.
+   *
+   * SVG (Scalable Vector Graphics) is an XML-based vector image format
+   * developed and maintained by the World Wide Web Consortium (W3C). This
+   * function conforms to the latest specification SVG 1.1, released on August
+   * 16, 2011.
+   *
+   * The cells of the triangulation are written as polygons with additional
+   * lines at the boundary of the triangulation. A coloring of the cells is
+   * further possible in order to visualize a certain property of the cells
+   * such as their level or material id. A colorbar can be drawn to encode the
+   * chosen coloring.  Moreover, a cell label can be added, showing level
+   * index, etc.
+   *
+   * @note This function is currently only implemented for two-dimensional
+   * grids in two space dimensions.
+   */
+  void write_svg (const Triangulation<2,2> &tria,
+                  std::ostream             &out) const;
+
+  /**
+   * Declaration of the same function as above for all other dimensions and
+   * space dimensions. This function is not currently implemented and is only
+   * declared to exist to support dimension independent programming.
+   */
+  template <int dim, int spacedim>
+  void write_svg (const Triangulation<dim,spacedim> &tria,
+                  std::ostream                      &out) const;
+
+
+  /**
+   * Write triangulation in MathGL script format. To interpret this file a
+   * version of MathGL>=2.0.0 is required.
+   *
+   * To get a handle on the resultant MathGL script within a graphical
+   * environment an interpreter is needed. A suggestion to start with is
+   * <code>mglview</code>, which is bundled with MathGL. <code>mglview</code>
+   * can interpret and display small-to-medium MathGL scripts in a graphical
+   * window and enables conversion to other formats such as EPS, PNG, JPG,
+   * SVG, as well as view/display animations. Some minor editing, such as
+   * modifying the lighting or alpha channels, can also be done.
+   *
+   * @note Not implemented for the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void write_mathgl (const Triangulation<dim, spacedim> &tria,
+                     std::ostream             &out) const;
+
+  /**
+   * Write triangulation in VTK format.
+   */
+  template <int dim, int spacedim>
+  void write_vtk (const Triangulation<dim,spacedim> &tria,
+                  std::ostream                      &out) const;
+
+  /**
+   * Write triangulation in VTU format.
+   */
+  template <int dim, int spacedim>
+  void write_vtu (const Triangulation<dim,spacedim> &tria,
+                  std::ostream                      &out) const;
+
+  /**
+   * Write grid to @p out according to the given data format. This function
+   * simply calls the appropriate <tt>write_*</tt> function.
+   */
+  template <int dim, int spacedim>
+  void write (const Triangulation<dim,spacedim> &tria,
+              std::ostream                      &out,
+              const OutputFormat                 output_format,
+              const Mapping<dim,spacedim>       *mapping=0) const;
+
+  /**
+   * Write mesh in default format set by ParameterHandler.
+   */
+  template <int dim, int spacedim>
+  void write (const Triangulation<dim,spacedim> &tria,
+              std::ostream                      &out,
+              const Mapping<dim,spacedim>       *mapping=0) const;
+
+  /**
+   * Set flags for DX output
+   */
+  void set_flags (const GridOutFlags::DX &flags);
+
+  /**
+   * Set flags for GMSH output
+   */
+  void set_flags (const GridOutFlags::Msh &flags);
+
+  /**
+   * Set flags for UCD output
+   */
+  void set_flags (const GridOutFlags::Ucd &flags);
+
+  /**
+   * Set flags for GNUPLOT output
+   */
+  void set_flags (const GridOutFlags::Gnuplot &flags);
+
+  /**
+   * Set flags for EPS output of a one-dimensional triangulation
+   */
+  void set_flags (const GridOutFlags::Eps<1> &flags);
+
+  /**
+   * Set flags for EPS output of a two-dimensional triangulation
+   */
+  void set_flags (const GridOutFlags::Eps<2> &flags);
+
+  /**
+   * Set flags for EPS output of a three-dimensional triangulation
+   */
+  void set_flags (const GridOutFlags::Eps<3> &flags);
+
+  /**
+   * Set flags for EPS output of a three-dimensional triangulation
+   */
+  void set_flags (const GridOutFlags::XFig &flags);
+
+  /**
+   * Set flags for SVG output
+   */
+  void set_flags (const GridOutFlags::Svg &flags);
+
+  /**
+   * Set flags for MathGL output
+   */
+  void set_flags (const GridOutFlags::MathGL &flags);
+
+  /**
+   * Set flags for VTK output
+   */
+  void set_flags (const GridOutFlags::Vtk &flags);
+
+  /**
+   * Set flags for VTU output
+   */
+  void set_flags (const GridOutFlags::Vtu &flags);
+
+  /**
+   * Provide a function that can tell us which suffix a given output format
+   * usually has. For example, it defines the following mappings:
+   * <ul>
+   * <li> @p OpenDX: <tt>.dx</tt>
+   * <li> @p gnuplot: <tt>.gnuplot</tt>
+   * <li> @p ucd: <tt>.inp</tt>
+   * <li> @p eps: <tt>.eps</tt>.
+   * </ul>
+   * Similar mappings are provided for all implemented formats.
+   *
+   * Since this function does not need data from this object, it is static and
+   * can thus be called without creating an object of this class.
+   */
+  static std::string default_suffix (const OutputFormat output_format);
+
+  /**
+   * Default suffix for the default output format selected through
+   * ParameterHandler.
+   */
+  std::string default_suffix () const;
+
+  /**
+   * Return the @p OutputFormat value corresponding to the given string. If
+   * the string does not match any known format, an exception is thrown.
+   *
+   * Since this function does not need data from this object, it is static and
+   * can thus be called without creating an object of this class. Its main
+   * purpose is to allow a program to use any implemented output format
+   * without the need to extend the program's parser each time a new format is
+   * implemented.
+   *
+   * To get a list of presently available format names, e.g. to give it to the
+   * ParameterHandler class, use the function get_output_format_names().
+   */
+  static OutputFormat parse_output_format (const std::string &format_name);
+
+  /**
+   * Return a list of implemented output formats. The different names are
+   * separated by vertical bar signs (<tt>`|'</tt>) as used by the
+   * ParameterHandler classes.
+   */
+  static std::string get_output_format_names ();
+
+  /**
+   * Declare parameters in ParameterHandler.
+   */
+  static void declare_parameters (ParameterHandler &param);
+
+  /**
+   * Parse parameters of ParameterHandler.
+   */
+  void parse_parameters (ParameterHandler &param);
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidState);
+
+private:
+  /**
+   * The default output format, set by a ParameterHandler.
+   */
+  OutputFormat default_format;
+
+  /**
+   * Flags for OpenDX output.
+   */
+  GridOutFlags::DX dx_flags;
+
+  /**
+   * Flags for GMSH output. Can be changed by using the set_flags(const
+   * GridOutFlags::Msh&) function.
+   */
+  GridOutFlags::Msh     msh_flags;
+
+  /**
+   * Flags for UCD output. Can be changed by using the set_flags(const
+   * GridOutFlags::Ucd&) function.
+   */
+  GridOutFlags::Ucd     ucd_flags;
+
+  /**
+   * Flags to be used upon output of GNUPLOT data. Can be changed by using the
+   * set_flags(const GridOutFlags::Gnuplot&) function.
+   */
+  GridOutFlags::Gnuplot gnuplot_flags;
+
+  /**
+   * Flags to be used upon output of EPS data in one space dimension. Can be
+   * changed by using the set_flags(const GridOutFlags::Eps<1>&) function.
+   */
+  GridOutFlags::Eps<1>  eps_flags_1;
+
+  /**
+   * Flags to be used upon output of EPS data in two space dimensions. Can be
+   * changed by using the @p set_flags function.
+   */
+  GridOutFlags::Eps<2>  eps_flags_2;
+
+  /**
+   * Flags to be used upon output of EPS data in three space dimensions. Can
+   * be changed by using the @p set_flags function.
+   */
+  GridOutFlags::Eps<3>  eps_flags_3;
+
+  /**
+   * Flags used for XFig output.
+   */
+  GridOutFlags::XFig xfig_flags;
+
+  /**
+   * Flags used for Svg output.
+   */
+  GridOutFlags::Svg svg_flags;
+
+  /**
+   * Flags for MathGL output.
+   */
+  GridOutFlags::MathGL mathgl_flags;
+
+  /**
+   * Flags for VTK output.
+   */
+  GridOutFlags::Vtk vtk_flags;
+
+  /**
+   * Flags for VTU output.
+   */
+  GridOutFlags::Vtu vtu_flags;
+
+  /**
+   * Write the grid information about faces to @p out. Only those faces are
+   * printed which are on the boundary and which have a boundary indicator not
+   * equal to zero, since the latter is the default for boundary faces.
+   *
+   * Since, in GMSH, geometric elements are continuously numbered, this
+   * function requires a parameter @p next_element_index providing the next
+   * geometric element number. This index should have a numerical value equal
+   * to one more than the index previously used to write a geometric element
+   * to @p out.
+   *
+   * @returns The next unused geometric element index.
+   *
+   * @warning @p next_element_index should be (at least) one larger than the
+   * current number of triangulation elements (lines, cells, faces) that have
+   * been written to @p out. GMSH will not load the saved file correctly if
+   * there are repeated indices.
+   *
+   * This function unfortunately can not be included in the regular @p
+   * write_msh function, since it needs special treatment for the case
+   * <tt>dim==1</tt>, in which case the face iterators are <tt>void*</tt>'s
+   * and lack the member functions which are called. We would not actually
+   * call these functions, but the compiler would complain anyway when
+   * compiling the function for <tt>dim==1</tt>. Bad luck.
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  write_msh_faces (const Triangulation<dim,spacedim> &tria,
+                   const unsigned int                 next_element_index,
+                   std::ostream                      &out) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d. Does nothing.
+   */
+  unsigned int
+  write_msh_faces (const Triangulation<1,1>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  /**
+   * Declaration of the specialization of above function for 1d, 2sd. Does
+   * nothing.
+   */
+  unsigned int
+  write_msh_faces (const Triangulation<1,2>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  unsigned int
+  write_msh_faces (const Triangulation<1,3>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+
+
+
+  /**
+   * Write the grid information about lines to @p out. Only those lines are
+   * printed which are on the boundary and which have a boundary indicator not
+   * equal to zero, since the latter is the default for boundary faces.
+   *
+   * Since, in GMSH, geometric elements are continuously numbered, this
+   * function requires a parameter @p next_element_index providing the next
+   * geometric element number. This index should have a numerical value equal
+   * to one more than the index previously used to write a geometric element
+   * to @p out.
+   *
+   * @returns The next unused geometric element index.
+   *
+   * @warning @p next_element_index should be (at least) one larger than the
+   * current number of triangulation elements (lines, cells, faces) that have
+   * been written to @p out. GMSH will not load the saved file correctly if
+   * there are repeated indices.
+   *
+   * This function unfortunately can not be included in the regular @p
+   * write_msh function, since it needs special treatment for the case
+   * <tt>dim==1</tt> and <tt>dim==2</tt>, in which case the edge iterators are
+   * <tt>void*</tt>'s and lack the member functions which are called. We would
+   * not actually call these functions, but the compiler would complain anyway
+   * when compiling the function for <tt>dim==1/2</tt>. Bad luck.
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  write_msh_lines (const Triangulation<dim,spacedim> &tria,
+                   const unsigned int                 next_element_index,
+                   std::ostream                      &out) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d. Does nothing.
+   */
+  unsigned int
+  write_msh_lines (const Triangulation<1,1>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d, 2sd. Does
+   * nothing.
+   */
+  unsigned int
+  write_msh_lines (const Triangulation<1,2>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  unsigned int
+  write_msh_lines (const Triangulation<1,3>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  /**
+   * Declaration of the specialization of above function for 2d. Does nothing.
+   */
+  unsigned int
+  write_msh_lines (const Triangulation<2,2>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  /**
+   * Declaration of the specialization of above function for 2d, 3sd. Does
+   * nothing.
+   */
+  unsigned int
+  write_msh_lines (const Triangulation<2,3>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+
+  /**
+   * Write the grid information about faces to @p out. Only those faces are
+   * printed which are on the boundary and which have a boundary indicator not
+   * equal to zero, since the latter is the default for boundary faces.
+   *
+   * Since (in the UCD format) geometric elements are continuously numbered,
+   * this function requires a parameter @p next_element_index providing the
+   * next geometric element number. This index should have a numerical value
+   * equal to one more than the index previously used to write a geometric
+   * element to @p out.
+   *
+   * @returns The next unused geometric element index.
+   *
+   * @warning @p next_element_index should be (at least) one larger than the
+   * current number of triangulation elements (lines, cells, faces) that have
+   * been written to @p out. Visualization programs may not load the saved
+   * file correctly if there are repeated indices.
+   *
+   * This function unfortunately can not be included in the regular @p
+   * write_ucd function, since it needs special treatment for the case
+   * <tt>dim==1</tt>, in which case the face iterators are <tt>void*</tt>'s
+   * and lack the member functions which are called. We would not actually
+   * call these functions, but the compiler would complain anyway when
+   * compiling the function for <tt>dim==1</tt>. Bad luck.
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  write_ucd_faces (const Triangulation<dim,spacedim> &tria,
+                   const unsigned int                 next_element_index,
+                   std::ostream                      &out) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d. Does nothing.
+   */
+  unsigned int
+  write_ucd_faces (const Triangulation<1,1>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d, 2sd. Does
+   * nothing.
+   */
+  unsigned int
+  write_ucd_faces (const Triangulation<1,2>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  unsigned int
+  write_ucd_faces (const Triangulation<1,3>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+
+
+  /**
+   * Write the grid information about lines to @p out. Only those lines are
+   * printed which are on the boundary and which have a boundary indicator not
+   * equal to zero, since the latter is the default for boundary lines.
+   *
+   * Since (in the UCD format) geometric elements are continuously numbered,
+   * this function requires a parameter @p next_element_index providing the
+   * next geometric element number. This index should have a numerical value
+   * equal to one more than the index previously used to write a geometric
+   * element to @p out.
+   *
+   * @returns The next unused geometric element index.
+   *
+   * @warning @p next_element_index should be (at least) one larger than the
+   * current number of triangulation elements (lines, cells, faces) that have
+   * been written to @p out. Visualization programs may not load the saved
+   * file correctly if there are repeated indices.
+   *
+   * This function unfortunately can not be included in the regular @p
+   * write_ucd function, since it needs special treatment for the case
+   * <tt>dim==1/2</tt>, in which case the edge iterators are <tt>void*</tt>'s
+   * and lack the member functions which are called. We would not actually
+   * call these functions, but the compiler would complain anyway when
+   * compiling the function for <tt>dim==1/2</tt>. Bad luck.
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  write_ucd_lines (const Triangulation<dim,spacedim> &tria,
+                   const unsigned int                 next_element_index,
+                   std::ostream                      &out) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d. Does nothing.
+   */
+  unsigned int
+  write_ucd_lines (const Triangulation<1,1>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  /**
+   * Declaration of the specialization of above function for 1d, 2sd. Does
+   * nothing.
+   */
+  unsigned int
+  write_ucd_lines (const Triangulation<1,2>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  unsigned int
+  write_ucd_lines (const Triangulation<1,3>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+
+
+  /**
+   * Declaration of the specialization of above function for 2d. Does nothing.
+   */
+  unsigned int
+  write_ucd_lines (const Triangulation<2,2>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+  /**
+   * Declaration of the specialization of above function for 2d, 3sd. Does
+   * nothing.
+   */
+  unsigned int
+  write_ucd_lines (const Triangulation<2,3>      &tria,
+                   const unsigned int             next_element_index,
+                   std::ostream                  &out) const;
+
+
+  /**
+   * This function projects a three-dimensional point (Point<3> point) onto a
+   * two-dimensional image plane, specified by the position of the camera
+   * viewing system (Point<3> camera_position), camera direction (Point<3>
+   * camera_position), camera horizontal (Point<3> camera_horizontal,
+   * necessary for the correct alignment of the later images), and the focus
+   * of the camera (float camera_focus).
+   *
+   * For SVG output of grids.
+   */
+  static Point<2> svg_project_point(Point<3> point,
+                                    Point<3> camera_position,
+                                    Point<3> camera_direction,
+                                    Point<3> camera_horizontal,
+                                    float camera_focus);
+
+  /**
+   * Return the number of faces in the triangulation which have a boundary
+   * indicator not equal to zero. Only these faces are explicitly printed in
+   * the <tt>write_*</tt> functions; all faces with indicator
+   * numbers::internal_face_boundary_id are interior ones and an indicator
+   * with value zero for faces at the boundary  are considered default.
+   *
+   * This function always returns an empty list in one dimension.
+   *
+   * The reason for this function is the same as for write_ucd_faces(). See
+   * there for more information.
+   */
+  template <int dim, int spacedim>
+  unsigned int n_boundary_faces (const Triangulation<dim,spacedim> &tria) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d. Simply
+   * returns zero.
+   */
+  unsigned int n_boundary_faces (const Triangulation<1,1> &tria) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d, 2sd. Simply
+   * returns zero.
+   */
+  unsigned int n_boundary_faces (const Triangulation<1,2> &tria) const;
+  unsigned int n_boundary_faces (const Triangulation<1,3> &tria) const;
+
+  /**
+   * Return the number of lines in the triangulation which have a boundary
+   * indicator not equal to zero. Only these lines are explicitly printed in
+   * the <tt>write_*</tt> functions; all lines with indicator
+   * numbers::internal_face_boundary_id are interior ones and an indicator
+   * with value zero for faces at the boundary are considered default.
+   *
+   * This function always returns an empty list in one and two dimensions.
+   *
+   * The reason for this function is the same as for write_ucd_faces(). See
+   * there for more information.
+   */
+  template <int dim, int spacedim>
+  unsigned int n_boundary_lines (const Triangulation<dim,spacedim> &tria) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d. Simply
+   * returns zero.
+   */
+  unsigned int n_boundary_lines (const Triangulation<1,1> &tria) const;
+
+  /**
+   * Declaration of the specialization of above function for 1d, 2sd. Simply
+   * returns zero.
+   */
+  unsigned int n_boundary_lines (const Triangulation<1,2> &tria) const;
+  unsigned int n_boundary_lines (const Triangulation<1,3> &tria) const;
+
+  /**
+   * Declaration of the specialization of above function for 2d. Simply
+   * returns zero.
+   */
+  unsigned int n_boundary_lines (const Triangulation<2,2> &tria) const;
+  /**
+   * Declaration of the specialization of above function for 2d, 3sd. Simply
+   * returns zero.
+   */
+  unsigned int n_boundary_lines (const Triangulation<2,3> &tria) const;
+};
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/grid_refinement.h b/include/deal.II/grid/grid_refinement.h
new file mode 100644
index 0000000..319e8bb
--- /dev/null
+++ b/include/deal.II/grid/grid_refinement.h
@@ -0,0 +1,362 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__grid_refinement_h
+#define dealii__grid_refinement_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/grid/tria.h>
+
+#include <vector>
+#include <limits>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+template <int dim, int spacedim> class Triangulation;
+template <class T> class Vector;
+
+
+/**
+ * Collection of functions controlling refinement and coarsening of
+ * Triangulation objects.
+ *
+ * The functions in this namespace form two categories. There are the
+ * auxiliary functions refine() and coarsen(). More important for users are
+ * the other functions, which implement refinement strategies, as being found
+ * in the literature on adaptive finite element methods. For mathematical
+ * discussion of these methods, consider works by Dörfler, Morin,
+ * Nochetto, Rannacher, Stevenson and many more.
+ *
+ * @ingroup grid
+ * @author Wolfgang Bangerth, Thomas Richter, Guido Kanschat 1998, 2000, 2009
+ */
+namespace GridRefinement
+{
+  /**
+   * Return a pair of double values of which the first is adjusted refinement
+   * fraction of cells and the second is adjusted coarsening fraction of
+   * cells.
+   *
+   *
+   * @param[in] current_n_cells The current cell number.
+   *
+   * @param[in] max_n_cells The maximal number of cells. If current cell
+   * number @p current_n_cells is already exceeded maximal cell number @p
+   * max_n_cells, refinement fraction of cells will be set to zero and
+   * coarsening fraction of cells will be adjusted to reduce cell number to @
+   * max_n_cells. If cell number is going to be exceeded only upon refinement,
+   * then refinement and coarsening fractions are going to be adjusted with a
+   * same ratio in an attempt to reach the maximum number of cells. Be aware
+   * though that through proliferation of refinement due to
+   * Triangulation::MeshSmoothing, this number is only an indicator. The
+   * default value of this argument is to impose no limit on the number of
+   * cells.
+   *
+   * @param[in] top_fraction_of_cells The requested fraction of cells to be
+   * refined.
+   *
+   * @param[in] bottom_fraction_of_cells The requested fraction of cells to be
+   * coarsened.
+   *
+   * @note Usually you do not need to call this function explicitly. Pass @p
+   * max_n_cells to function refine_and_coarsen_fixed_number() or function
+   * refine_and_coarsen_fixed_fraction() and they will call this function if
+   * necessary.
+   */
+  template <int dim>
+  std::pair<double, double>
+  adjust_refine_and_coarsen_number_fraction (const unsigned int  current_n_cells,
+                                             const unsigned int  max_n_cells,
+                                             const double        top_fraction_of_cells,
+                                             const double        bottom_fraction_of_cells);
+
+  /**
+   * This function provides a refinement strategy with predictable growth of
+   * the mesh.
+   *
+   * The function takes a vector of refinement @p criteria and two values
+   * between zero and one denoting the fractions of cells to be refined and
+   * coarsened. It flags cells for further processing by
+   * Triangulation::execute_coarsening_and_refinement() according to the
+   * following greedy algorithm:
+   *
+   * <ol>
+   *
+   * <li> Sort the cells according to descending values of @p criteria.
+   *
+   * <li> Set the refinement threshold to be the criterion belonging to the
+   * cell at position @p top_fraction_of_cells times
+   * Triangulation::n_active_cells().
+   *
+   * <li> Set the coarsening threshold accordingly using the cell @p
+   * bottom_fraction_of_cells times Triangulation::n_active_cells() from the
+   * end of the sorted list.
+   *
+   * <li> Use these two thresholds in calls to refine() and coarsen(),
+   * respectively.
+   *
+   * </ol>
+   *
+   * As an example, with no coarsening, setting @p top_fraction_of_cells to
+   * 1/3 will result in approximately doubling the number of cells in two
+   * dimensions. The same effect in three dimensions is achieved by refining
+   * 1/7th of the cells. These values are good initial guesses, but should be
+   * adjusted depending on the singularity of approximated function.
+   *
+   * The sorting of criteria is not done actually, since we only need the
+   * threshold values in order to call refine() and coarsen(). The order of
+   * cells with higher and of those with lower criteria is irrelevant. Getting
+   * this value is accomplished by the @p nth_element function of the
+   * <tt>C++</tt> standard library, which takes only linear time in the number
+   * of elements, rather than <tt>N log N</tt> for sorting all values.
+   *
+   * @note This function only sets the coarsening and refinement flags. The
+   * mesh is not changed until you call
+   * Triangulation::execute_coarsening_and_refinement().
+   *
+   * @param[in,out] triangulation The triangulation whose cells this function
+   * is supposed to mark for coarsening and refinement.
+   *
+   * @param[in] criteria The refinement criterion for each mesh cell. Entries
+   * may not be negative.
+   *
+   * @param[in] top_fraction_of_cells The fraction of cells to be refined. If
+   * this number is zero, no cells will be refined. If it equals one, the
+   * result will be flagging for global refinement.
+   *
+   * @param[in] bottom_fraction_of_cells The fraction of cells to be
+   * coarsened. If this number is zero, no cells will be coarsened.
+   *
+   * @param[in] max_n_cells This argument can be used to specify a maximal
+   * number of cells. If this number is going to be exceeded upon refinement,
+   * then refinement and coarsening fractions are going to be adjusted in an
+   * attempt to reach the maximum number of cells. Be aware though that
+   * through proliferation of refinement due to Triangulation::MeshSmoothing,
+   * this number is only an indicator. The default value of this argument is
+   * to impose no limit on the number of cells.
+   */
+  template <int dim, class VectorType, int spacedim>
+  void
+  refine_and_coarsen_fixed_number
+  (Triangulation<dim,spacedim> &triangulation,
+   const VectorType            &criteria,
+   const double                top_fraction_of_cells,
+   const double                bottom_fraction_of_cells,
+   const unsigned int          max_n_cells = std::numeric_limits<unsigned int>::max());
+
+  /**
+   * This function provides a refinement strategy controlling the reduction of
+   * the error estimate.
+   *
+   * Also known as the <b>bulk criterion</b>, this function computes the
+   * thresholds for refinement and coarsening such that the @p criteria of
+   * cells getting flagged for refinement make up for a certain fraction of
+   * the total error. We explain its operation for refinement, coarsening
+   * works analogously.
+   *
+   * Let <i>c<sub>K</sub></i> be the criterion of cell <i>K</i>. Then the
+   * total error estimate is computed by the formula
+   * @f[
+   * E = \sum_{K\in \cal T} c_K.
+   * @f]
+   *
+   * If <i> 0 < a < 1</i> is @p top_fraction, then we refine the
+   * smallest subset $\cal M$ of the Triangulation $\cal T$ such that
+   * @f[
+   * a E \le \sum_{K\in \cal M} c_K
+   * @f]
+   *
+   * The algorithm is performed by the greedy algorithm described in
+   * refine_and_coarsen_fixed_number().
+   *
+   * @note The often used formula with squares on the left and right is
+   * recovered by actually storing the square of <i>c<sub>K</sub></i> in the
+   * vector @p criteria.
+   *
+   * From the point of view of implementation, this time we really need to
+   * sort the array of criteria.  Just like the other strategy described
+   * above, this function only computes the threshold values and then passes
+   * over to refine() and coarsen().
+   *
+   * @param[in,out] tria The triangulation whose cells this function is
+   * supposed to mark for coarsening and refinement.
+   *
+   * @param[in] criteria The refinement criterion computed on each mesh cell.
+   * Entries may not be negative.
+   *
+   * @param[in] top_fraction The fraction of the total estimate which should
+   * be refined. If this number is zero, no cells will be refined. If it
+   * equals one, the result will be flagging for global refinement.
+   *
+   * @param[in] bottom_fraction The fraction of the estimate coarsened. If
+   * this number is zero, no cells will be coarsened.
+   *
+   * @param[in] max_n_cells This argument can be used to specify a maximal
+   * number of cells. If this number is going to be exceeded upon refinement,
+   * then refinement and coarsening fractions are going to be adjusted in an
+   * attempt to reach the maximum number of cells. Be aware though that
+   * through proliferation of refinement due to Triangulation::MeshSmoothing,
+   * this number is only an indicator. The default value of this argument is
+   * to impose no limit on the number of cells.
+   */
+  template <int dim, class VectorType, int spacedim>
+  void
+  refine_and_coarsen_fixed_fraction
+  (Triangulation<dim,spacedim> &tria,
+   const VectorType            &criteria,
+   const double                top_fraction,
+   const double                bottom_fraction,
+   const unsigned int          max_n_cells = std::numeric_limits<unsigned int>::max());
+
+
+
+  /**
+   * Refine the triangulation by flagging certain cells to reach a grid that
+   * is optimal with respect to an objective function that tries to balance
+   * reducing the error and increasing the numerical cost when the mesh is
+   * refined. Specifically, this function makes the assumption that if you
+   * refine a cell $K$ with error indicator $\eta_K$ provided by the second
+   * argument to this function, then the error on the children (for all
+   * children together) will only be $2^{-\text{order}}\eta_K$ where
+   * <code>order</code> is the third argument of this function. This makes the
+   * assumption that the error is only a local property on a mesh and can be
+   * reduced by local refinement -- an assumption that is true for the
+   * interpolation operator, but not for the usual Galerkin projection,
+   * although it is approximately true for elliptic problems where the Greens
+   * function decays quickly and the error here is not too much affected by a
+   * too coarse mesh somewhere else.
+   *
+   * With this, we can define the objective function this function tries to
+   * optimize. Let us assume that the mesh currently has $N_0$ cells. Then, if
+   * we refine the $m$ cells with the largest errors, we expect to get (in $d$
+   * space dimensions)
+   * @f[
+   *   N(m) = (N_0-m) + 2^d m = N_0 + (2^d-1)m
+   * @f]
+   * cells ($N_0-m$ are not refined, and each of the $m$ cells we refine yield
+   * $2^d$ child cells. On the other hand, with refining $m$ cells, and using
+   * the assumptions above, we expect that the error will be
+   * @f[
+   *   \eta^\text{exp}(m)
+   *   =
+   *   \sum_{K, K\; \text{will not be refined}} \eta_K
+   *   +
+   *   \sum_{K, K\; \text{will be refined}} 2^{-\text{order}}\eta_K
+   * @f]
+   * where the first sum extends over $N_0-m$ cells and the second over the
+   * $m$ cells that will be refined. Note that $N(m)$ is an increasing
+   * function of $m$ whereas $\eta^\text{exp}(m)$ is a decreasing function.
+   *
+   * This function then tries to find that number $m$ of cells to refine for
+   * which the objective function
+   * @f[
+   *   J(m) = N(m)^{\text{order}/d} \eta^\text{exp}(m)
+   * @f]
+   * is minimal.
+   *
+   * The rationale for this function is two-fold. First, compared to the
+   * refine_and_coarsen_fixed_fraction() and refine_and_coarsen_fixed_number()
+   * functions, this function has the property that if all refinement
+   * indicators are the same (i.e., we have achieved a mesh where the error
+   * per cell is equilibrated), then the entire mesh is refined. This is based
+   * on the observation that a mesh with equilibrated error indicators is the
+   * optimal mesh (i.e., has the least overall error) among all meshes with
+   * the same number of cells. (For proofs of this, see R. Becker, M. Braack,
+   * R. Rannacher: "Numerical simulation of laminar flames at low Mach number
+   * with adaptive finite elements", Combustion Theory and Modelling, Vol. 3,
+   * Nr. 3, p. 503-534 1999; and W. Bangerth, R. Rannacher: "Adaptive Finite
+   * Element Methods for Differential Equations", Birkhauser, 2003.)
+   *
+   * Second, the function uses the observation that ideally, the error behaves
+   * like $e \approx c N^{-\alpha}$ with some constant $\alpha$ that depends
+   * on the dimension and the finite element degree. It should - given optimal
+   * mesh refinement - not depend so much on the regularity of the solution,
+   * as it is based on the idea, that all singularities can be resolved by
+   * refinement. Mesh refinement is then based on the idea that we want to
+   * make $c=e N^\alpha$ small. This corresponds to the functional $J(m)$
+   * above.
+   *
+   * @note This function was originally implemented by Thomas Richter. It
+   * follows a strategy described in T. Richter, "Parallel Multigrid Method
+   * for Adaptive Finite Elements with Application to 3D Flow Problems", PhD
+   * thesis, University of Heidelberg, 2005. See in particular Section 4.3,
+   * pp. 42-43.
+   */
+  template <int dim, class VectorType, int spacedim>
+  void
+  refine_and_coarsen_optimize (Triangulation<dim,spacedim> &tria,
+                               const VectorType            &criteria,
+                               const unsigned int          order=2);
+
+  /**
+   * Flag all mesh cells for which the value in @p criteria exceeds @p
+   * threshold for refinement, but only flag up to @p max_to_mark cells.
+   *
+   * The vector @p criteria contains a nonnegative value for each active cell,
+   * ordered in the canonical order of of Triangulation::active_cell_iterator.
+   *
+   * The cells are only flagged for refinement, they are not actually refined.
+   * To do so, you have to call
+   * Triangulation::execute_coarsening_and_refinement().
+   *
+   * This function does not implement a refinement strategy, it is more a
+   * helper function for the actual strategies.
+   */
+  template <int dim, class VectorType, int spacedim>
+  void refine (Triangulation<dim,spacedim> &tria,
+               const VectorType            &criteria,
+               const double                threshold,
+               const unsigned int          max_to_mark = numbers::invalid_unsigned_int);
+
+  /**
+   * Flag all mesh cells for which the value in @p criteria is less than @p
+   * threshold for coarsening.
+   *
+   * The vector @p criteria contains a nonnegative value for each active cell,
+   * ordered in the canonical order of of Triangulation::active_cell_iterator.
+   *
+   * The cells are only flagged for coarsening, they are not actually
+   * coarsened. To do so, you have to call
+   * Triangulation::execute_coarsening_and_refinement().
+   *
+   * This function does not implement a refinement strategy, it is more a
+   * helper function for the actual strategies.
+   */
+  template <int dim, class VectorType, int spacedim>
+  void coarsen (Triangulation<dim,spacedim> &tria,
+                const VectorType            &criteria,
+                const double                threshold);
+
+  /**
+   * An exception thrown if the vector with cell criteria contains negative
+   * values
+   */
+  DeclException0(ExcNegativeCriteria);
+
+  /**
+   * One of the threshold parameters causes trouble. Or the refinement and
+   * coarsening thresholds overlap.
+   */
+  DeclException0 (ExcInvalidParameterValue);
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif //dealii__grid_refinement_h
diff --git a/include/deal.II/grid/grid_reordering.h b/include/deal.II/grid/grid_reordering.h
new file mode 100644
index 0000000..a5b49fa
--- /dev/null
+++ b/include/deal.II/grid/grid_reordering.h
@@ -0,0 +1,699 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__grid_reordering_h
+#define dealii__grid_reordering_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/grid/tria.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/**
+ * This class reorders the vertices of cells such that they meet the standard
+ * requirements of the Triangulation class when creating grids, i.e. all lines
+ * have a unique orientation with respect to all neighboring cells. This class
+ * is mainly used when reading in grids from files and converting them to
+ * deal.II triangulations.
+ *
+ * @note In contrast to the rest of the deal.II library, by default this class
+ * uses the old deal.II numbering scheme, which was used up to deal.II version
+ * 5.2 (but the main function of this class takes a flag that specifies
+ * whether it should do an implicit conversion from the new to the old format
+ * before doing its work, and then back again after reordering). In this old
+ * format, the vertex and face ordering in 2d is assumed to be
+ * @verbatim
+ *          2
+ *      3--->---2
+ *      |       |
+ *     3^       ^1
+ *      |       |
+ *      0--->---1
+ *          0
+ * @endverbatim
+ * the vertices in 3d:
+ * @verbatim
+ *         7-------6        7-------6
+ *        /|       |       /       /|
+ *       / |       |      /       / |
+ *      /  |       |     /       /  |
+ *     3   |       |    3-------2   |
+ *     |   4-------5    |       |   5
+ *     |  /       /     |       |  /
+ *     | /       /      |       | /
+ *     |/       /       |       |/
+ *     0-------1        0-------1
+ * @endverbatim
+ * and the faces in 3d:
+ * @verbatim
+ *         *-------*        *-------*
+ *        /|       |       /       /|
+ *       / |   1   |      /   4   / |
+ *      /  |       |     /       /  |
+ *     *   |       |    *-------*   |
+ *     | 5 *-------*    |       | 3 *
+ *     |  /       /     |       |  /
+ *     | /   2   /      |   0   | /
+ *     |/       /       |       |/
+ *     *-------*        *-------*
+ * @endverbatim
+ * After calling the GridReordering::reorder_cells() function the CellData is
+ * still in this old numbering scheme. Hence, for creating a Triangulation
+ * based on the resulting CellData the
+ * Triangulation::create_triangulation_compatibility() (and not the
+ * Triangulation::create_triangulation()) function must be used.  For a
+ * typical use of the reorder_cells() function see the implementation of the
+ * GridIn <code>read_*()</code> functions.
+ *
+ *
+ * <h3>Statement of problems</h3>
+ *
+ * Triangulations in deal.II have a special structure, in that there are not
+ * only cells, but also faces, and in 3d also edges, that are objects of their
+ * own right. Faces and edges have unique orientations, and they have a
+ * specified orientation also with respect to the cells that are adjacent.
+ * Thus, a line that separates two cells in two space dimensions does not only
+ * have a direction, but it must also have a well-defined orientation with
+ * respect to the other lines bounding the two quadrilaterals adjacent to the
+ * first line. Likewise definitions hold for three dimensional cells and the
+ * objects (lines, quads) that separate them.
+ *
+ * For example, in two dimensions, a quad consists of four lines which have a
+ * direction, which is by definition as follows:
+ * @verbatim
+ *   3-->--2
+ *   |     |
+ *   ^     ^
+ *   |     |
+ *   0-->--1
+ * @endverbatim
+ * Now, two adjacent cells must have a vertex numbering such that the
+ * direction of the common side is the same. For example, the following two
+ * quads
+ * @verbatim
+ *   3---4---5
+ *   |   |   |
+ *   0---1---2
+ * @endverbatim
+ * may be characterised by the vertex numbers <tt>(0 1 4 3)</tt> and <tt>(1 2
+ * 5 4)</tt>, since the middle line would get the direction <tt>1->4</tt> when
+ * viewed from both cells.  The numbering <tt>(0 1 4 3)</tt> and <tt>(5 4 1
+ * 2)</tt> would not be allowed, since the left quad would give the common
+ * line the direction <tt>1->4</tt>, while the right one would want to use
+ * <tt>4->1</tt>, leading to an ambiguity.
+ *
+ * As a sidenote, we remark that if one adopts the idea that having directions
+ * of faces is useful, then the orientation of the four faces of a cell as
+ * shown above is almost necessary. In particular, it is not possible to
+ * orient them such that they represent a (counter-)clockwise sense, since
+ * then we couldn't already find a valid orientation of the following patch of
+ * three cells:
+ * @verbatim
+ *       o
+ *     /   \
+ *   o       o
+ *   | \   / |
+ *   |   o   |
+ *   |   |   |
+ *   o---o---o
+ * @endverbatim
+ * (The reader is asked to try to find a conforming choice of line directions;
+ * it will soon be obvious that there can't exists such a thing, even if we
+ * allow that there might be cells with clockwise and counterclockwise
+ * orientation of the lines at the same time.)
+ *
+ * One might argue that the definition of unique directions for faces and
+ * edges, and the definition of directions relative to the cells they bound,
+ * is a misfeature of deal.II. In fact, it makes reading in grids created by
+ * mesh generators rather difficult, as they usually don't follow these
+ * conventions when generating their output. On the other hand, there are good
+ * reasons to introduce such conventions, as they can make programming much
+ * simpler in many cases, leading to an increase in speed of some computations
+ * as one can avoid expensive checks in many places because the orientation of
+ * faces is known by assumption that it is guaranteed by the triangulation.
+ *
+ * The purpose of this class is now to find an ordering for a given set of
+ * cells such that the generated triangulation satisfies all the requirements
+ * stated above. To this end, we will first show some examples why this is a
+ * difficult problem, and then develop algorithms that finds such a
+ * reordering. Note that the algorithm operates on a set of CellData objects
+ * that are used to describe a mesh to the triangulation class. These objects
+ * are, for example, generated by the GridIn class, when reading in grids from
+ * input files.
+ *
+ * As a last question for this first section: is it guaranteed that such
+ * orientations of faces always exist for a given subdivision of a domain into
+ * cells? The linear complexity algorithm described below for 2d also proves
+ * that the answer is yes for 2d. For 3d, the answer is no (which also
+ * underlines that using such orientations might be an -- unfortunately
+ * uncurable -- misfeature of deal.II). A simple counter-example in 3d
+ * illustrates this: take a string of 3d cells and bend it together to a
+ * torus. Since opposing lines in a cell need to have the same direction,
+ * there is a simple ordering for them, for example all lines radially
+ * outward, tangentially clockwise, and axially upward. However, if before
+ * joining the two ends of the string of cells, the string is twisted by 180
+ * degrees, then no such orientation is possible any more, as can easily be
+ * checked. In effect, some meshes could not be used in deal.II. In order to
+ * overcome this problem, the <code>face_rotation</code>,
+ * <code>face_flip</code> and <code>line_orientation</code> flags have been
+ * introduced. With these, it is possible to treat all purely hexahedral
+ * meshes. However, in order to reduce the effect of possible bugs, it should
+ * still be tried to reorder a grid. Only if this procedure fails, the
+ * original connectivity information should be used.
+ *
+ *
+ * <h3>Examples of problems</h3>
+ *
+ * As noted, reordering the vertex lists of cells such that the resulting grid
+ * is not a trivial problem. In particular, it is often not sufficient to only
+ * look at the neighborhood of a cell that cannot be added to a set of other
+ * cells without violating the requirements stated above. We will show two
+ * examples where this is obvious.
+ *
+ * The first such example is the following, which we will call the ``four
+ * cells at the end'' because of the four cells that close of the right end of
+ * a row of three vertical cells each (in the following picture we only show
+ * one such column of three cells at the left, but we will indicate what
+ * happens if we prolong this list):
+ * @verbatim
+ *   9---10-----11
+ *   |   |    / |
+ *   6---7---8  |
+ *   |   |   |  |
+ *   3---4---5  |
+ *   |   |    \ |
+ *   0---1------2
+ * @endverbatim
+ * Assume that you had numbered the vertices in the cells at the left boundary
+ * in a way, that the following line directions are induced:
+ * @verbatim
+ *   9->-10-----11
+ *   ^   ^    / |
+ *   6->-7---8  |
+ *   ^   ^   |  |
+ *   3->-4---5  |
+ *   ^   ^    \ |
+ *   0->-1------2
+ * @endverbatim
+ * (This could for example be done by using the indices <tt>(0 1 4 3)</tt>,
+ * <tt>(3 4 7 6)</tt>, <tt>(6 7 10 9)</tt> for the three cells). Now, you will
+ * not find a way of giving indices for the right cells, without introducing
+ * either ambiguity for one line or other, or without violating that within
+ * each cells, there must be one vertex from which both lines are directed
+ * away and the opposite one to which both adjacent lines point to.
+ *
+ * The solution in this case is to renumber one of the three left cells, e.g.
+ * by reverting the sense of the line between vertices 7 and 10 by numbering
+ * the top left cell by <tt>(9 6 7 10)</tt>:
+ * @verbatim
+ *   9->-10-----11
+ *   v   v    / |
+ *   6->-7---8  |
+ *   ^   ^   |  |
+ *   3->-4---5  |
+ *   ^   ^    \ |
+ *   0->-1------2
+ * @endverbatim
+ *
+ * The point here is the following: assume we wanted to prolong the grid to
+ * the left like this:
+ * @verbatim
+ *   o---o---o---o---o------o
+ *   |   |   |   |   |    / |
+ *   o---o---o---o---o---o  |
+ *   |   |   |   |   |   |  |
+ *   o---o---o---o---o---o  |
+ *   |   |   |   |   |    \ |
+ *   o---o---o---o---o------o
+ * @endverbatim
+ * Then we run into the same problem as above if we order the cells at the
+ * left uniformly, thus forcing us to revert the ordering of one cell (the one
+ * which we could order as <tt>(9 6 7 10)</tt> above). However, since opposite
+ * lines have to have the same direction, this in turn would force us to
+ * rotate the cell left of it, and then the one left to that, and so on until
+ * we reach the left end of the grid. This is therefore an example we we have
+ * to track back right until the first column of three cells to find a
+ * consistent ordering, if we had initially ordered them uniformly.
+ *
+ * As a second example, consider the following simple grid, where the order in
+ * which the cells are numbered is important:
+ * @verbatim
+ *   3-----2-----o-----o ... o-----7-----6
+ *   |     |     |     |     |     |     |
+ *   |  0  |  N  | N-1 | ... |  2  |  1  |
+ *   |     |     |     |     |     |     |
+ *   0-----1-----o-----o ... o-----4-----5
+ * @endverbatim
+ * We have here only indicated the numbers of the vertices that are relevant.
+ * Assume that the user had given the cells 0 and 1 by the vertex indices
+ * <tt>0 1 2 3</tt> and <tt>6 7 4 5</tt>. Then, if we follow this orientation,
+ * the grid after creating the lines for these two cells would look like this:
+ * @verbatim
+ *   3-->--2-----o-----o ... o-----7--<--6
+ *   |     |     |     |     |     |     |
+ *   ^  0  ^  N  | N-1 | ... |  2  v  1  v
+ *   |     |     |     |     |     |     |
+ *   0-->--1-----o-----o ... o-----4--<--5
+ * @endverbatim
+ * Now, since opposite lines must point in the same direction, we can only add
+ * the cells 2 through N-1 to cells 1 such that all vertical lines point down.
+ * Then, however, we cannot add cell N in any direction, as it would have two
+ * opposite lines that do not point in the same direction. We would have to
+ * rotate either cell 0 or 1 in order to be able to add all the other cells
+ * such that the requirements of deal.II triangulations are met.
+ *
+ * These two examples demonstrate that if we have added a certain number of
+ * cells in some orientation of faces and can't add the next one without
+ * introducing faces that had already been added in another direction, then it
+ * might not be sufficient to only rotate cells in the neighborhood of the the
+ * cell that we failed to add. It might be necessary to go back a long way and
+ * rotate cells that have been entered long ago.
+ *
+ *
+ * <h3>Solution</h3>
+ *
+ * From the examples above, it is obvious that if we encounter a cell that
+ * cannot be added to the cells which have already been entered, we can not
+ * usually point to a cell that is the culprit and that must be entered in a
+ * different orientation. Furthermore, even if we knew which cell, there might
+ * be large number of cells that would then cease to fit into the grid and
+ * which we would have to find a different orientation as well (in the second
+ * example above, if we rotated cell 1, then we would have to rotate the cells
+ * 1 through N-1 as well).
+ *
+ * A brute force approach to this problem is the following: if cell N can't be
+ * added, then try to rotate cell N-1. If we can't rotate cell N-1 any more,
+ * then try to rotate cell N-2 and try to add cell N with all orientations of
+ * cell N-1. And so on. Algorithmically, we can visualize this by a tree
+ * structure, where node N has as many children as there are possible
+ * orientations of node N+1 (in two space dimensions, there are four
+ * orientations in which each cell can be constructed from its four vertices;
+ * for example, if the vertex indices are <tt>(0 1 2 3)</tt>, then the four
+ * possibilities would be <tt>(0 1 2 3)</tt>, <tt>(1 2 3 0)</tt>, <tt>(2 3 0
+ * 1)</tt>, and <tt>(3 0 1 2)</tt>). When adding one cell after the other, we
+ * traverse this tree in a depth-first (pre-order) fashion. When we encounter
+ * that one path from the root (cell 0) to a leaf (the last cell) is not
+ * allowed (i.e. that the orientations of the cells which are encoded in the
+ * path through the tree does not lead to a valid triangulation), we have to
+ * track back and try another path through the tree.
+ *
+ * In practice, of course, we do not follow each path to a final node and then
+ * find out whether a path leads to a valid triangulation, but rather use an
+ * inductive argument: if for all previously added cells the triangulation is
+ * a valid one, then we can find out whether a path through the tree can yield
+ * a valid triangulation by checking whether entering the present cell would
+ * introduce any faces that have a nonunique direction; if that is so, then we
+ * can stop following all paths below this point and track back immediately.
+ *
+ * Nevertheless, it is already obvious that the tree has <tt>4**N</tt> leaves
+ * in two space dimensions, since each of the N cells can be added in four
+ * orientations. Most of these nodes can be discarded rapidly, since firstly
+ * the orientation of the first cell is irrelevant, and secondly if we add one
+ * cell that has a neighbor that has already been added, then there are
+ * already only two possible orientations left, so the total number of checks
+ * we have to make until we find a valid way is significantly smaller than
+ * <tt>4**N</tt>. However, the algorithm is still exponential in time and
+ * linear in memory (we only have to store the information for the present
+ * path in form of a stack of orientations of cells that have already been
+ * added).
+ *
+ * In fact, the two examples above show that the exponential estimate is not a
+ * pessimized one: we indeed have to track back to one of the very first cells
+ * there to find a way to add all cells in a consistent fashion.
+ *
+ * This discouraging situation is greatly improved by the fact that we have an
+ * alternative algorithm for 2d that is always linear in runtime (discovered
+ * and implemented by Michael Anderson of TICAM, University of Texas, in
+ * 2003), and that for 3d we can find an algorithm that in practice is usually
+ * only roughly linear in time and memory. We will describe these algorithms
+ * in the following.
+ *
+ *
+ * <h3>The 2d linear complexity algorithm</h3>
+ *
+ * The algorithm uses the fact that opposite faces of a cell need to have the
+ * same orientation. So you start with one arbitrary line, choose an
+ * orientation. Then the orientation of the opposite face is already fixed.
+ * Then go to the two cells across the two faces we have fixed: for them, one
+ * face is fixed, so we can also fix the opposite face. Go on with doing so.
+ * Eventually, we have done this for a string of cells. Then take one of the
+ * non-fixed faces of a cell which has already two fixed faces and do all this
+ * again.
+ *
+ * In more detail, the algorithm is best illustrated using an example. We
+ * consider the mesh below:
+ * @verbatim
+ *   9------10-------11
+ *   |      |        /|
+ *   |      |       / |
+ *   |      |      /  |
+ *   6------7-----8   |
+ *   |      |     |   |
+ *   |      |     |   |
+ *   |      |     |   |
+ *   3------4-----5   |
+ *   |      |      \  |
+ *   |      |       \ |
+ *   |      |        \|
+ *   0------1---------2
+ * @endverbatim
+ * First a cell is chosen ( (0,1,4,3) in this case). A single side of the cell
+ * is oriented arbitrarily (3->4). This choice of orientation is then
+ * propagated through the mesh, across sides and elements. (0->1), (6->7) and
+ * (9->10). The involves edge-hopping and face hopping, giving a path through
+ * the mesh shown in dots.
+ * @verbatim
+ *   9-->--10-------11
+ *   |  .  |        /|
+ *   |  .  |       / |
+ *   |  .  |      /  |
+ *   6-->--7-----8   |
+ *   |  .  |     |   |
+ *   |  .  |     |   |
+ *   |  .  |     |   |
+ *   3-->--4-----5   |
+ *   |  .  |      \  |
+ *   |  X  |       \ |
+ *   |  .  |        \|
+ *   0-->--1---------2
+ * @endverbatim
+ * This is then repeated for the other sides of the chosen element, orienting
+ * more sides of the mesh.
+ * @verbatim
+ *   9-->--10-------11
+ *   |     |        /|
+ *   v.....v.......V |
+ *   |     |      /. |
+ *   6-->--7-----8 . |
+ *   |     |     | . |
+ *   |     |     | . |
+ *   |     |     | . |
+ *   3-->--4-----5 . |
+ *   |     |      \. |
+ *   ^..X..^.......^ |
+ *   |     |        \|
+ *   0-->--1---------2
+ * @endverbatim
+ * Once an element has been completely oriented it need not be considered
+ * further. These elements are filled with o's in the diagrams. We then move
+ * to the next element.
+ * @verbatim
+ *   9-->--10->-----11
+ *   | ooo |  .     /|
+ *   v ooo v  .    V |
+ *   | ooo |  .   /  |
+ *   6-->--7-->--8   |
+ *   |     |  .  |   |
+ *   |     |  .  |   |
+ *   |     |  .  |   |
+ *   3-->--4-->--5   |
+ *   | ooo |  .   \  |
+ *   ^ ooo ^  X    ^ |
+ *   | ooo |  .     \|
+ *   0-->--1-->------2
+ * @endverbatim
+ * Repeating this gives
+ * @verbatim
+ *   9-->--10->-----11
+ *   | ooo | oooooo /|
+ *   v ooo v ooooo V |
+ *   | ooo | oooo /  |
+ *   6-->--7-->--8   |
+ *   |     |     |   |
+ *   ^.....^..X..^...^
+ *   |     |     |   |
+ *   3-->--4-->--5   |
+ *   | ooo | oooo \  |
+ *   ^ ooo ^ ooooo ^ |
+ *   | ooo | oooooo \|
+ *   0-->--1-->------2
+ * @endverbatim
+ * and the final oriented mesh is
+ * @verbatim
+ *   9-->--10->-----11
+ *   |     |        /|
+ *   v     v       V |
+ *   |     |      /  |
+ *   6-->--7-->--8   |
+ *   |     |     |   |
+ *   ^     ^     ^   ^
+ *   |     |     |   |
+ *   3-->--4-->--5   |
+ *   |     |      \  |
+ *   ^     ^       ^ |
+ *   |     |        \|
+ *   0-->--1-->-------2
+ * @endverbatim
+ * It is obvious that this algorithm has linear run-time, since it only ever
+ * touches each face exactly once.
+ *
+ * The algorithm just described is implemented in a specialization of this
+ * class for the 2d case. A similar, but slightly more complex algorithm is
+ * implemented in a specialization for 3d. It using sheets instead of strings
+ * of cells to work on. If a grid is orientable, then the algorithm is able to
+ * do its work in linear time; if it is not orientable, then it aborts in
+ * linear time as well.
+ *
+ * Both algorithms are described in the paper "On orienting edges of
+ * unstructured two- and three-dimensional meshes", R. Agelek, M. Anderson, W.
+ * Bangerth, W. L. Barth (submitted, 2015). A preprint is available as <a
+ * href="http://arxiv.org/abs/1512.02137">arxiv
+ * 1512.02137</a>.
+ *
+ *
+ * <h3>For the curious</h3>
+ *
+ * Prior to the implementation of the algorithms developed by Michael Anderson
+ * and described above, we used a branch-and-cut algorithm initially
+ * implemented in 2000 by Wolfgang Bangerth. Although it is no longer used,
+ * here is how it works, and why it doesn't always work for large meshes since
+ * its run-time can be exponential in bad cases.
+ *
+ * The first observation is that although there are counterexamples, problems
+ * are usually local. For example, in the second example mentioned above, if
+ * we had numbered the cells in a way that neighboring cells have similar cell
+ * numbers, then the amount of backtracking needed is greatly reduced.
+ * Therefore, in the implementation of the algorithm, the first step is to
+ * renumber the cells in a Cuthill-McKee fashion: start with the cell with the
+ * least number of neighbors and assign to it the cell number zero. Then find
+ * all neighbors of this cell and assign to them consecutive further numbers.
+ * Then find their neighbors that have not yet been numbered and assign to
+ * them numbers, and so on. Graphically, this represents finding zones of
+ * cells consecutively further away from the initial cells and number them in
+ * this front-marching way. This already greatly improves locality of problems
+ * and consequently reduced the necessary amount of backtracking.
+ *
+ * The second point is that we can use some methods to prune the tree, which
+ * usually lead to a valid orientation of all cells very quickly.
+ *
+ * The first such method is based on the observation that if we fail to insert
+ * one cell with number N, then this may not be due to cell N-1 unless N-1 is
+ * a direct neighbor of N. The reason is obvious: the chosen orientation of
+ * cell M could only affect the possibilities to add cell N if either it were
+ * a direct neighbor or if there were a sequence of cells that were added
+ * after M and that connected cells M and N. Clearly, for M=N-1, the latter
+ * cannot be the case. Conversely, if we fail to add cell N, then it is not
+ * necessary to track back to cell N-1, but we can track back to the neighbor
+ * of N with the largest cell index and which has already been added.
+ *
+ * Unfortunately, this method can fail to yield a valid path through the tree
+ * if not applied with care. Consider the following situation, initially
+ * extracted from a mesh of 950 cells generated automatically by the program
+ * BAMG (this program usually generates meshes that are quite badly balanced,
+ * often have many -- sometimes 10 or more -- neighbors of one vertex, and
+ * exposed several problems in the initial algorithm; note also that the
+ * example is in 2d where we now have the much better algorithm described
+ * above, but the same observations also apply to 3d):
+ * @verbatim
+ * 13----------14----15
+ * | \         |     |
+ * |  \    4   |  5  |
+ * |   \       |     |
+ * |    12-----10----11
+ * |     |     |     |
+ * |     |     |  7  |
+ * |     |     |     |
+ * |  3  |     8-----9
+ * |     |     |     |
+ * |     |     |  6  |
+ * |     |     |     |
+ * 4-----5-----6-----7
+ * |     |     |     |
+ * |  2  |  1  |  0  |
+ * |     |     |     |
+ * 0-----1-----2-----3
+ * @endverbatim
+ * Note that there is a hole in the middle. Assume now that the user described
+ * the first cell 0 by the vertex numbers <tt>2 3 7 6</tt>, and cell 5 by
+ * <tt>15 14 10 11</tt>, and assume that cells 1, 2, 3, and 4 are numbered
+ * such that 5 can be added in initial rotation. All other cells are numbered
+ * in the usual way, i.e. starting at the bottom left and counting
+ * counterclockwise. Given this description of cells, the algorithm will start
+ * with cell zero and add one cell after the other, up until the sixth one.
+ * Then the situation will be the following:
+ * @verbatim
+ * 13----->---14--<--15
+ * | \         |     |
+ * |  >    4   v  5  v
+ * |   \       |     |
+ * |    12->--10--<--11
+ * |     |     |     |
+ * ^     |     |  7  |
+ * |     |     |     |
+ * |  3  ^     8-->--9
+ * |     |     |     |
+ * |     |     ^  6  ^
+ * |     |     |     |
+ * 4-->--5-->--6-->--7
+ * |     |     |     |
+ * ^  2  ^  1  ^  0  ^
+ * |     |     |     |
+ * 0-->--1-->--2-->--3
+ * @endverbatim
+ * Coming now to cell 7, we see that the two opposite lines at its top and
+ * bottom have different directions; we will therefore find no orientation of
+ * cell 7 in which it can be added without violation of the consistency of the
+ * triangulation. According to the rule stated above, we track back to the
+ * neighbor with greatest index, which is cell 6, but since its bottom line is
+ * to the right, its top line must be to the right as well, so we won't be
+ * able to find an orientation of cell 6 such that 7 will fit into the
+ * triangulation. Then, if we have finished all possible orientations of cell
+ * 6, we track back to the neighbor of 6 with the largest index and which has
+ * been added already. This would be cell 0. However, we know that the
+ * orientation of cell 0 can't be important, so we conclude that there is no
+ * possible way to orient all the lines of the given cells such that they
+ * satisfy the requirements of deal.II triangulations. We know that this can't
+ * be, so it results in an exception be thrown.
+ *
+ * The bottom line of this example is that when we looked at all possible
+ * orientations of cell 6, we couldn't find one such that cell 7 could be
+ * added, and then decided to track back to cell 0. We did not even attempt to
+ * turn cell 5, after which it would be simple to add cell 7. Thus, the
+ * algorithm described above has to be modified: we are only allowed to track
+ * back to that neighbor that has already been added, with the largest cell
+ * index, if we fail to add a cell in any orientation. If we track back
+ * further because we have exhausted all possible orientations but could add
+ * the cell (i.e. we track back since another cell, further down the road
+ * couldn't be added, irrespective of the orientation of the cell which we are
+ * presently considering), then we are not allowed to track back to one of its
+ * neighbors, but have to track back only one cell index.
+ *
+ * The second method to prune the tree is that usually we cannot add a new
+ * cell since the orientation of one of its neighbors that have already been
+ * added is wrong. Thus, if we may try to rotate one of the neighbors (of
+ * course making sure that rotating that neighbor does not violate the
+ * consistency of the triangulation) in order to allow the present cell to be
+ * added.
+ *
+ * While the first method could be explained in terms of backtracking in the
+ * tree of orientations more than one step at once, turning a neighbor means
+ * jumping to a totally different place in the tree. For both methods, one can
+ * find arguments that they will never miss a path that is valid and only skip
+ * paths that are invalid anyway.
+ *
+ * These two methods have proven extremely efficient. We have been able to
+ * read very large grids (several ten thousands of cells) without the need to
+ * track back much. In particular, the time to find an ordering of the cells
+ * was found to be mostly linear in the number of cells, and the time to
+ * reorder them is usually much smaller (for example by one order of
+ * magnitude) than the time needed to read the data from a file, and also to
+ * actually generate the triangulation from this data using the
+ * Triangulation::create_triangulation() function.
+ *
+ * @ingroup grid
+ * @author Wolfgang Bangerth, 2000, Michael Anderson 2003, Ralf Hartmann 2005
+ */
+template <int dim, int spacedim=dim>
+class GridReordering
+{
+public:
+
+  /**
+   * This is the main function, doing what is announced in the general
+   * documentation of this class for dim=2 and 3 and doing nothing for dim=1.
+   *
+   * If a consistent reordering is not possible in dim=3, the original
+   * connectivity data is restored.
+   *
+   * @param original_cells An object that contains the data that describes the
+   * mesh.
+   * @param use_new_style_ordering If true, then use the standard ordering of
+   * vertices within a cell. If false (the default), then use the "old-style"
+   * ordering of vertices within cells used by deal.II before version 5.2 and
+   * as explained in the documentation of this class.
+   */
+  static void reorder_cells (std::vector<CellData<dim> > &original_cells,
+                             const bool use_new_style_ordering = false);
+
+  /**
+   * Grids generated by grid generators may have an orientation of cells which
+   * is the inverse of the orientation required by deal.II.
+   *
+   * In 2d and 3d this function checks whether all cells have negative or
+   * positive measure/volume. In the former case, all cells are inverted. It
+   * does nothing in 1d.
+   *
+   * The inversion of cells might also work when only a subset of all cells
+   * have negative volume. However, grids consisting of a mixture of negative
+   * and positively oriented cells are very likely to be broken. Therefore, an
+   * exception is thrown, in case cells are not uniformly oriented.
+   *
+   * Note, that this function should be called before reorder_cells().
+   */
+  static void invert_all_cells_of_negative_grid(
+    const std::vector<Point<spacedim> > &all_vertices,
+    std::vector<CellData<dim> > &original_cells);
+};
+
+
+// declaration of explicit specializations
+template<>
+void
+GridReordering<2>::reorder_cells (std::vector<CellData<2> > &original_cells,
+                                  const bool);
+
+template<>
+void
+GridReordering<2,3>::reorder_cells (std::vector<CellData<2> > &original_cells,
+                                    const bool);
+
+template<>
+void
+GridReordering<3>::reorder_cells (std::vector<CellData<3> > &original_cells,
+                                  const bool);
+
+template<>
+void
+GridReordering<2>::invert_all_cells_of_negative_grid(const std::vector<Point<2> > &all_vertices,
+                                                     std::vector<CellData<2> >    &cells);
+
+template<>
+void
+GridReordering<2,3>::invert_all_cells_of_negative_grid(const std::vector<Point<3> > &all_vertices,
+                                                       std::vector<CellData<2> >    &cells);
+
+template<>
+void
+GridReordering<3>::invert_all_cells_of_negative_grid(const std::vector<Point<3> > &all_vertices,
+                                                     std::vector<CellData<3> >    &cells);
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/grid_reordering_internal.h b/include/deal.II/grid/grid_reordering_internal.h
new file mode 100644
index 0000000..007cc7e
--- /dev/null
+++ b/include/deal.II/grid/grid_reordering_internal.h
@@ -0,0 +1,635 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__grid_reordering_internal_h
+#define dealii__grid_reordering_internal_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/grid/tria.h>
+
+#include <map>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  /**
+   * Implement the algorithm described in the documentation of the
+   * GridReordering<2> class.
+   *
+   * @author Michael Anderson, 2003
+   */
+  namespace GridReordering2d
+  {
+
+    /**
+     * Check whether a given arrangement of cells is already consistent. If
+     * this is the case, then we skip the reordering pass.
+     *
+     * This function works by looping over all cells, checking whether one of
+     * its faces already exists in a list of edges, and if it already exists
+     * in reverse order, then return @p false. If it is not already in the
+     * list, or in the correct direction, then go on with the next faces or
+     * cell.
+     */
+    bool
+    is_consistent (const std::vector<CellData<2> > &cells);
+
+
+    /**
+     * Defines a variety of variables related to the connectivity of a simple
+     * quad element. This includes the nodes on each edge, which edges come
+     * into each node and what the default deal.II directions are for the
+     * quad.
+     *
+     * @verbatim
+     *       s2
+     *
+     *     +-->--+
+     *     |3   2|
+     * s3  ^     ^ s1
+     *     |0   1|
+     *     +-->--+
+     *
+     *       s0
+     * @endverbatim
+     *
+     * @author Michael Anderson, 2003
+     */
+    class ConnectGlobals
+    {
+    public:
+      /**
+       * The nodes on each edge in anti-clockwise order {
+       * {0,1},{1,2},{2,3},{3,0} }
+       */
+      static const int EdgeToNode[4][2];
+
+      /**
+       * The edges common to each node, in anti-clockwise order {
+       * {3,0},{0,1},{1,2},{2,3} }
+       */
+      static const int NodeToEdge[4][2];
+
+      /**
+       * The nodes on each edge in "default direction order".
+       * {{0,1},{1,2},{3,2},{0,3}}
+       */
+      static const int DefaultOrientation[4][2];
+    };
+
+
+    /**
+     * An enriched quad with information about how the mesh fits together so
+     * that we can move around the mesh efficiently.
+     *
+     * @author Michael Anderson, 2003
+     */
+    class MQuad
+    {
+    public:
+      /**
+       * v0 - v3 are indexes of the vertices of the quad, s0 - s3 are indexes
+       * for the sides of the quad
+       */
+      MQuad (const unsigned int  v0,
+             const unsigned int  v1,
+             const unsigned int  v2,
+             const unsigned int  v3,
+             const unsigned int  s0,
+             const unsigned int  s1,
+             const unsigned int  s2,
+             const unsigned int  s3,
+             const CellData<2>  &cd);
+
+      /**
+       * Stores the vertex numbers
+       */
+      unsigned int v[4];
+      /**
+       * Stores the side numbers
+       */
+      unsigned int side[4];
+
+      /**
+       * Copy of the @p CellData object from which we construct the data of
+       * this object.
+       */
+      CellData<2>  original_cell_data;
+    };
+
+    /**
+     * The enriched side class containing connectivity information.
+     * Orientation is from v0 to v1; Initially this should have v0<v1. After
+     * global orientation could be either way.
+     *
+     * @author Michael Anderson, 2003
+     */
+    struct MSide
+    {
+      /**
+       * Constructor.
+       */
+      MSide (const unsigned int initv0,
+             const unsigned int initv1);
+
+      /**
+       * Return whether the sides are equal, even if their ends are reversed.
+       */
+      bool operator==(const MSide &s2) const;
+
+      /**
+       * Return the opposite.
+       */
+      bool operator!=(const MSide &s2) const;
+
+      unsigned int v0;
+      unsigned int v1;
+      unsigned int Q0;
+      unsigned int Q1;
+
+      /**
+       * Local side numbers on quads 0 and 1.
+       */
+      unsigned int lsn0, lsn1;
+      bool Oriented;
+
+      /**
+       * This class makes a MSide have v0<v1
+       */
+      struct SideRectify;
+
+      /**
+       * Provides a side ordering, s1<s2, without assuming v0<v1 in either of
+       * the sides.
+       */
+      struct SideSortLess;
+    };
+
+
+
+    /**
+     * Implement the 2d algorithm for grid reordering described in the
+     * documentation of the GridReordering class.
+     *
+     * @author Michael Anderson, 2003
+     */
+    class GridReordering
+    {
+    public:
+
+      /**
+       * Do the work intended by this class.
+       */
+      void reorient(std::vector<CellData<2> > &quads);
+    private:
+
+      /**
+       * Sets up the internal data structures so that the we can do side
+       * hopping and face switching efficiently. This means we need a whole
+       * bunch of connectivity information
+       */
+      void build_graph (const std::vector<CellData<2> > &inquads);
+
+      /**
+       * Orient the internal data into deal.II format The orientation
+       * algorithm is as follows
+       *
+       * 1) Find an unoriented quad (A)
+       *
+       * 2) Orient an un_oriented side (s) of (A)
+       *
+       * 3) side hop on (s) of (A) to get (B)
+       *
+       * 4) if opposite side to (s) of (B) is unoriented orient it
+       *
+       * 5) repeat 3) and 4) until side-hopping fails (we've reached a
+       * boundary) or (s) has already been oriented (we've closed a loop or
+       * unoriented sides).
+       *
+       * 6) Repeat 2), 3), 4) and 5) on other unoriented sides of (A)
+       *
+       * 7) Choose a new unoriented A.
+       */
+      void orient();
+
+      /**
+       * Get the (now correctly oriented if we've called orient) quads.
+       */
+      void get_quads(std::vector<CellData<2> > &outquads) const;
+
+      /**
+       * Orient_side(qnum,lsn) orients the local side lsn of the quad qnum in
+       * the triangulation. If the side opposite lsn is oriented then lsn is
+       * oriented to match it. Otherwise it is oriented in the "default"
+       * direction for the quad.
+       */
+      void orient_side (const unsigned int quadnum,
+                        const unsigned int localsidenum);
+
+      /**
+       * Returns true if all sides of the quad quadnum are oriented.
+       */
+      bool is_fully_oriented_quad (const unsigned int quadnum) const;
+
+      /**
+       * Returns true if the side lsn of the quad quadnum is oriented.
+       */
+      bool is_oriented_side (const unsigned int quadnum,
+                             const unsigned int lsn) const;
+
+      /**
+       * Returns true is the side is oriented in the "default" direction
+       */
+      bool is_side_default_oriented (const unsigned int qnum,
+                                     const unsigned int lsn) const;
+
+      /**
+       * Increases UnOrQLoc from it's original value to the next quad with an
+       * unoriented side. Returns true if there was another unoriented quad.
+       */
+      bool get_unoriented_quad (unsigned int &UnOrQLoc) const;
+
+      /**
+       * Sets sidenum to the local sidenumber of an unoriented side of the
+       * quad quadnum. Returns true if such a side exists.
+       */
+      bool get_unoriented_side (const unsigned int quadnum,
+                                unsigned int &sidenum) const;
+
+      /**
+       * side_hop(&qnum, &lsn) has qnum being the quadnumber of a quad in the
+       * triangulation, and a local side number. side_hop then sets qnum to
+       * the quadnumber across the other side of the side, and sets lsn so
+       * that quads[qnum].sides[lsn] is the same before and after the call. If
+       * there is no other quad on the other side of the current quad, then
+       * side_hop returns false.
+       */
+      bool side_hop (unsigned int &qnum,
+                     unsigned int &lsn) const;
+
+      /**
+       * A list of enriched sides/edges of the mesh.
+       */
+      std::vector<MSide> sides;
+      /**
+       * A list of enriched quads in the mesh.
+       */
+      std::vector<MQuad> mquads;
+    };
+  }  // namespace GridReordering2d
+
+
+  /**
+   * Implement the algorithm described in the documentation of the
+   * GridReordering<3> class.
+   *
+   * @author Michael Anderson, 2003
+   */
+  namespace GridReordering3d
+  {
+    /**
+     * A structure indicating the direction of an edge. In the implementation
+     * file, we define three objects, <tt>unoriented_edge</tt>,
+     * <tt>forward_edge</tt>, and <tt>backward_edge</tt>, that denote whether
+     * an edge has already been oriented, whether it is in standard
+     * orientation, or whether it has reverse direction. The state that each
+     * of these objects encode is stored in the <tt>orientation</tt> member
+     * variable -- we would really need only three such values, which we pick
+     * in the implementation file, and make sure when we compare such objects
+     * that only these three special values are actually used.
+     *
+     * The reason for this way of implementing things is as follows. Usually,
+     * such a property would be implemented as an enum. However, in the
+     * previous implementation, a signed integer was used with unoriented=0,
+     * forward=+1, and backward=-1. A number of operations, such as equality
+     * of ordered edges were mapped to checking whether the product of two
+     * edge orientations equals +1. Such arithmetic isn't always portable and
+     * sometimes flagged when using -ftrapv with gcc. Using this class instead
+     * makes sure that there isn't going to be any arithmetic going on on edge
+     * orientations, just comparisons for equality or inequality.
+     *
+     * @author Wolfgang Bangerth, 2005
+     */
+    struct EdgeOrientation
+    {
+      /**
+       * A value indicating the orientation.
+       */
+      char orientation;
+
+      /**
+       * Comparison operator.
+       */
+      bool operator == (const EdgeOrientation &edge_orientation) const;
+
+      /**
+       * Comparison operator.
+       */
+      bool operator != (const EdgeOrientation &edge_orientation) const;
+    };
+
+    /**
+     * During building the connectivity information we don't need all the
+     * heavy duty information about edges that we will need later. So we can
+     * save memory and time by using a light-weight class for edges. It stores
+     * the two vertices, but no direction, so we make the optimization to
+     * store the vertex number in sorted order to allow for easier comparison
+     * of edge objects.
+     */
+    struct CheapEdge
+    {
+      /**
+       * The first node
+       */
+      const unsigned int node0;
+
+      /**
+       * The second node
+       */
+      const unsigned int node1;
+
+      /**
+       * Constructor. Take the vertex numbers and store them sorted.
+       */
+      CheapEdge (const unsigned int n0,
+                 const unsigned int n1);
+
+      /**
+       * Need a partial ordering for sorting algorithms
+       */
+      bool operator< (const CheapEdge &e2) const;
+    };
+
+
+
+    /**
+     * A connectivity and orientation aware edge class.
+     */
+    struct Edge
+    {
+      /**
+       * Simple constructor
+       */
+      Edge (const unsigned int n0,
+            const unsigned int n1);
+
+      /**
+       * The IDs for the end nodes
+       */
+      unsigned int nodes[2];
+
+      /**
+       * Whether the edge has not already been oriented, points from node 0 to
+       * node 1, or the reverse. The initial state of this flag is unoriented.
+       */
+      EdgeOrientation orientation_flag;
+
+      /**
+       * Used to determine which "sheet" or equivalence class of parallel
+       * edges the edge falls in when oriented. numbers::invalid_unsigned_int
+       * means not yet decided. This is also the default value after
+       * construction. Each edge will later be assigned an index greater than
+       * zero.
+       */
+      unsigned int group;
+
+      /**
+       * Indices of neighboring cubes.
+       */
+      std::vector<unsigned int> neighboring_cubes;
+    };
+
+    /**
+     * A connectivity and orientation aware cell.
+     *
+     * The connectivity of the cell is not contained within. (This was for
+     * flexibility in using deal.II's ordering of edges or the XDA format
+     * etc.) For this information we need the ElemInfo class.
+     *
+     * One thing we do know is that the first four edges in the edge class are
+     * parallel, as are the second four, and the third four.
+     *
+     * TODO: Need to move connectivity information out of cell and into edge.
+     */
+    struct Cell
+    {
+      /**
+       * Default Constructor
+       */
+      Cell ();
+
+      /**
+       * The IDs for each of the edges.
+       */
+      unsigned int edges[GeometryInfo<3>::lines_per_cell];
+
+      /**
+       * The IDs for each of the nodes.
+       */
+      unsigned int nodes[GeometryInfo<3>::vertices_per_cell];
+
+      /**
+       * Which way do the edges point.  Whether node 0 of the edge is the base
+       * of the edge in local element (1) or node 1 is the base (-1).
+       */
+      EdgeOrientation local_orientation_flags[GeometryInfo<3>::lines_per_cell];
+
+      /**
+       * An internal flag used to determine whether the cell is in the queue
+       * of cells to be oriented in the current sheet.
+       */
+      bool waiting_to_be_processed;
+    };
+
+
+    /**
+     * This holds all the pieces for orientation together.
+     *
+     * Contains lists of nodes, edges and cells.  As well as the information
+     * about how they all connect together.
+     */
+    class Mesh
+    {
+    public:
+      /**
+       * Default Constructor
+       */
+      Mesh (const std::vector<CellData<3> > &incubes);
+
+      /**
+       * Export the data of this object to the deal.II format that the
+       * Triangulation class wants as input.
+       */
+      void
+      export_to_deal_format (std::vector<CellData<3> > &outcubes) const;
+
+    private:
+      /**
+       * The list of edges
+       */
+      std::vector<Edge> edge_list;
+
+      /**
+       * The list of cells
+       */
+      std::vector<Cell> cell_list;
+
+      /**
+       * Checks whether every cell in the mesh is sensible.
+       */
+      void sanity_check() const;
+
+      /**
+       * Given the cell list, build the edge list and all the connectivity
+       * information and other stuff that we will need later.
+       */
+      void build_connectivity ();
+
+      /**
+       * Unimplemented private copy constructor to disable it.
+       */
+      Mesh (const Mesh &);
+
+      /**
+       * Unimplemented private assignment operator to disable it.
+       */
+      Mesh &operator=(const Mesh &);
+
+      /**
+       * Checks that each edge going into a node is correctly set up.
+       */
+      void sanity_check_node (const Cell        &cell,
+                              const unsigned int local_node_num) const;
+
+      /**
+       * Let the orienter access out private fields.
+       */
+      friend class Orienter;
+    };
+
+
+    /**
+     * The class that orients the edges of a triangulation in 3d. The member
+     * variables basically only store the present state of the algorithm.
+     */
+    class Orienter
+    {
+    public:
+      /**
+       * Orient the given mesh. Creates an object of the present type and lets
+       * that toil away at the task.
+       *
+       * This function is the single entry point to the functionality of this
+       * class.
+       *
+       * Returns, whether a consistent orientation of lines was possible for
+       * the given mesh.
+       */
+      static
+      bool
+      orient_mesh (std::vector<CellData<3> > &incubes);
+
+    private:
+      /**
+       * Internal representation of the given list of cells, including
+       * connectivity information and the like.
+       */
+      Mesh mesh;
+
+      /**
+       * The cube we're looking at presently.
+       */
+      unsigned int cur_posn;
+
+      /**
+       * We have fully oriented all cubes before this one.
+       */
+      unsigned int marker_cube;
+
+      /**
+       * The index of the sheet or equivalence class we are presently
+       * processing.
+       */
+      unsigned int cur_edge_group;
+
+      /**
+       * Indices of the cells to be processed within the present sheet. If a
+       * cell is being processed presently, it is taken from this list.
+       */
+      std::vector<int> sheet_to_process;
+
+
+      /**
+       * Which edges of the current cell have been oriented during the current
+       * iteration. Is reset when moving on to the next cube.
+       */
+      bool edge_orient_array[12];
+
+      /**
+       * Constructor. Take a list of cells and set up the internal data
+       * structures of the mesh member variable.
+       *
+       * Since it is private, the only entry point of this class is the static
+       * function orient_mesh().
+       */
+      Orienter (const std::vector<CellData<3> > &incubes);
+
+      /**
+       * Orient all the edges of a mesh.
+       *
+       * Returns, whether this action was carried out successfully.
+       */
+      bool orient_edges ();
+
+      /**
+       * Given oriented edges, rotate the cubes so that the edges are in
+       * standard direction.
+       */
+      void orient_cubes ();
+
+      bool get_next_unoriented_cube ();
+
+      /**
+       * Return whether the cell with cell number @p cell_num is fully
+       * oriented.
+       */
+      bool is_oriented (const unsigned int cell_num) const;
+
+      bool orient_edges_in_current_cube ();
+      bool orient_edge_set_in_current_cube (const unsigned int edge_set);
+      bool orient_next_unoriented_edge ();
+
+      /**
+       * Return whether the cell is consistently oriented at present (i.e.
+       * only considering those edges that are already oriented. This is a
+       * sanity check that should be called from inside an assert macro.
+       */
+      bool cell_is_consistent (const unsigned int cell_num) const;
+
+
+      void get_adjacent_cubes ();
+      bool get_next_active_cube ();
+    };
+  }  // namespace GridReordering3d
+}  // namespace internal
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/grid_tools.h b/include/deal.II/grid/grid_tools.h
new file mode 100644
index 0000000..6605c29
--- /dev/null
+++ b/include/deal.II/grid/grid_tools.h
@@ -0,0 +1,1688 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__grid_tools_H
+#define dealii__grid_tools_H
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/mapping_q1.h>
+
+#include <bitset>
+#include <list>
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int, int> class DoFHandler;
+template <int, int> class Mapping;
+namespace hp
+{
+  template <int, int> class DoFHandler;
+  template <int, int> class MappingCollection;
+}
+
+class SparsityPattern;
+
+namespace internal
+{
+  template<int dim, int spacedim, class MeshType>
+  class ActiveCellIterator
+  {
+  public:
+    typedef typename MeshType::active_cell_iterator type;
+  };
+
+  template<int dim, int spacedim>
+  class ActiveCellIterator<dim, spacedim, dealii::DoFHandler<dim, spacedim> >
+  {
+  public:
+#ifndef _MSC_VER
+    typedef typename dealii::DoFHandler<dim, spacedim>::active_cell_iterator type;
+#else
+    typedef TriaActiveIterator < dealii::DoFCellAccessor < dealii::DoFHandler<dim, spacedim>, false > > type;
+#endif
+  };
+
+  template<int dim, int spacedim>
+  class ActiveCellIterator<dim, spacedim, dealii::hp::DoFHandler<dim, spacedim> >
+  {
+  public:
+#ifndef _MSC_VER
+    typedef typename dealii::hp::DoFHandler<dim, spacedim>::active_cell_iterator type;
+#else
+    typedef TriaActiveIterator < dealii::DoFCellAccessor < dealii::hp::DoFHandler<dim, spacedim>, false > > type;
+#endif
+  };
+}
+
+/**
+ * This namespace is a collection of algorithms working on triangulations,
+ * such as shifting or rotating triangulations, but also finding a cell that
+ * contains a given point. See the descriptions of the individual functions
+ * for more information.
+ *
+ * @ingroup grid
+ */
+namespace GridTools
+{
+  /**
+   * @name Information about meshes and cells
+   */
+  /*@{*/
+
+  /**
+   * Return the diameter of a triangulation. The diameter is computed using
+   * only the vertices, i.e. if the diameter should be larger than the maximal
+   * distance between boundary vertices due to a higher order mapping, then
+   * this function will not catch this.
+   */
+  template <int dim, int spacedim>
+  double diameter (const Triangulation<dim, spacedim> &tria);
+
+  /**
+   * Compute the volume (i.e. the dim-dimensional measure) of the
+   * triangulation. We compute the measure using the integral $\sum_K \int_K 1
+   * \; dx$ where $K$ are the cells of the given triangulation. The integral
+   * is approximated via quadrature for which we need the mapping argument.
+   *
+   * If the triangulation is a dim-dimensional one embedded in a higher
+   * dimensional space of dimension spacedim, then the value returned is the
+   * dim-dimensional measure. For example, for a two-dimensional triangulation
+   * in three-dimensional space, the value returned is the area of the surface
+   * so described. (This obviously makes sense since the spacedim-dimensional
+   * measure of a dim-dimensional triangulation would always be zero if dim @<
+   * spacedim.
+   *
+   * This function also works for objects of type
+   * parallel::distributed::Triangulation, in which case the function is a
+   * collective operation.
+   *
+   * @param tria The triangulation.
+   * @param mapping An optional argument used to denote the mapping that
+   * should be used when describing whether cells are bounded by straight or
+   * curved faces. The default is to use a $Q_1$ mapping, which corresponds to
+   * straight lines bounding the cells.
+   * @return The dim-dimensional measure of the domain described by the
+   * triangulation, as discussed above.
+   */
+  template <int dim, int spacedim>
+  double volume (const Triangulation<dim,spacedim> &tria,
+                 const Mapping<dim,spacedim> &mapping = (StaticMappingQ1<dim,spacedim>::mapping));
+
+  /**
+   * Return the diameter of the smallest active cell of a triangulation. See
+   * step-24 for an example of use of this function.
+   */
+  template <int dim, int spacedim>
+  double
+  minimal_cell_diameter (const Triangulation<dim, spacedim> &triangulation);
+
+  /**
+   * Return the diameter of the largest active cell of a triangulation.
+   */
+  template <int dim, int spacedim>
+  double
+  maximal_cell_diameter (const Triangulation<dim, spacedim> &triangulation);
+
+  /**
+   * Given a list of vertices (typically obtained using
+   * Triangulation::get_vertices) as the first, and a list of vertex indices
+   * that characterize a single cell as the second argument, return the
+   * measure (area, volume) of this cell. If this is a real cell, then you can
+   * get the same result using <code>cell-@>measure()</code>, but this
+   * function also works for cells that do not exist except that you make it
+   * up by naming its vertices from the list.
+   */
+  template <int dim>
+  double cell_measure (const std::vector<Point<dim> > &all_vertices,
+                       const unsigned int (&vertex_indices)[GeometryInfo<dim>::vertices_per_cell]);
+
+  /*@}*/
+  /**
+   * @name Functions supporting the creation of meshes
+   */
+  /*@{*/
+
+  /**
+   * Remove vertices that are not referenced by any of the cells. This
+   * function is called by all <tt>GridIn::read_*</tt> functions to eliminate
+   * vertices that are listed in the input files but are not used by the cells
+   * in the input file. While these vertices should not be in the input from
+   * the beginning, they sometimes are, most often when some cells have been
+   * removed by hand without wanting to update the vertex lists, as they might
+   * be lengthy.
+   *
+   * This function is called by all <tt>GridIn::read_*</tt> functions as the
+   * triangulation class requires them to be called with used vertices only.
+   * This is so, since the vertices are copied verbatim by that class, so we
+   * have to eliminate unused vertices beforehand.
+   *
+   * Not implemented for the codimension one case.
+   */
+  template <int dim, int spacedim>
+  void delete_unused_vertices (std::vector<Point<spacedim> >    &vertices,
+                               std::vector<CellData<dim> > &cells,
+                               SubCellData                 &subcelldata);
+
+  /**
+   * Remove vertices that are duplicated, due to the input of a structured
+   * grid, for example. If these vertices are not removed, the faces bounded
+   * by these vertices become part of the boundary, even if they are in the
+   * interior of the mesh.
+   *
+   * This function is called by some <tt>GridIn::read_*</tt> functions. Only
+   * the vertices with indices in @p considered_vertices are tested for
+   * equality. This speeds up the algorithm, which is quadratic and thus quite
+   * slow to begin with. However, if you wish to consider all vertices, simply
+   * pass an empty vector.
+   *
+   * Two vertices are considered equal if their difference in each coordinate
+   * direction is less than @p tol.
+   */
+  template <int dim, int spacedim>
+  void delete_duplicated_vertices (std::vector<Point<spacedim> >    &all_vertices,
+                                   std::vector<CellData<dim> > &cells,
+                                   SubCellData                 &subcelldata,
+                                   std::vector<unsigned int>   &considered_vertices,
+                                   const double                 tol=1e-12);
+
+  /*@}*/
+  /**
+   * @name Rotating, stretching and otherwise transforming meshes
+   */
+  /*@{*/
+
+  /**
+   * Transform the vertices of the given triangulation by applying the
+   * function object provided as first argument to all its vertices.
+   *
+   * The transformation given as argument is used to transform each vertex.
+   * Its respective type has to offer a function-like syntax, i.e. the
+   * predicate is either an object of a type that has an <tt>operator()</tt>,
+   * or it is a pointer to the function. In either case, argument and return
+   * value have to be of type <tt>Point@<spacedim@></tt>.
+   *
+   * @note If you are using a parallel::distributed::Triangulation you will
+   * have hanging nodes in your local Triangulation even if your "global" mesh
+   * has no hanging nodes. This will cause issues with wrong positioning of
+   * hanging nodes in ghost cells if you call the current functions: The
+   * vertices of all locally owned cells will be correct, but the vertices of
+   * some ghost cells may not. This means that computations like
+   * KellyErrorEstimator may give wrong answers. A safe approach is to use
+   * this function prior to any refinement in parallel, if that is possible,
+   * but not after you refine the mesh.
+   *
+   * This function is used in the "Possibilities for extensions" section of
+   * step-38. It is also used in step-49 and step-53.
+   */
+  template <int dim, typename Transformation, int spacedim>
+  void transform (const Transformation        &transformation,
+                  Triangulation<dim,spacedim> &triangulation);
+
+  /**
+   * Shift each vertex of the triangulation by the given shift vector. This
+   * function uses the transform() function above, so the requirements on the
+   * triangulation stated there hold for this function as well.
+   */
+  template <int dim, int spacedim>
+  void shift (const Tensor<1,spacedim>    &shift_vector,
+              Triangulation<dim,spacedim> &triangulation);
+
+
+  /**
+   * Rotate all vertices of the given two-dimensional triangulation in
+   * counter-clockwise sense around the origin of the coordinate system by the
+   * given angle (given in radians, rather than degrees). This function uses
+   * the transform() function above, so the requirements on the triangulation
+   * stated there hold for this function as well.
+   */
+  void rotate (const double      angle,
+               Triangulation<2> &triangulation);
+
+  /**
+   * Transform the given triangulation smoothly to a different domain where,
+   * typically, each of the vertices at the boundary of the triangulation is
+   * mapped to the corresponding points in the @p new_points map.
+   *
+   * The way this function works is that it solves a Laplace equation for each
+   * of the dim components of a displacement field that maps the current
+   * domain into one described by @p new_points . The @p new_points array
+   * therefore represents the boundary values of this displacement field. The
+   * function then evaluates this displacement field at each vertex in the
+   * interior and uses it to place the mapped vertex where the displacement
+   * field locates it. Because the solution of the Laplace equation is smooth,
+   * this guarantees a smooth mapping from the old domain to the new one.
+   *
+   * @param[in] new_points The locations where a subset of the existing
+   * vertices are to be placed. Typically, this would be a map from the vertex
+   * indices of all nodes on the boundary to their new locations, thus
+   * completely specifying the geometry of the mapped domain. However, it may
+   * also include interior points if necessary and it does not need to include
+   * all boundary vertices (although you then lose control over the exact
+   * shape of the mapped domain).
+   *
+   * @param[in,out] tria The Triangulation object. This object is changed in-
+   * place, i.e., the previous locations of vertices are overwritten.
+   *
+   * @param[in] coefficient An optional coefficient for the Laplace problem.
+   * Larger values make cells less prone to deformation (effectively
+   * increasing their stiffness). The coefficient is evaluated in the
+   * coordinate system of the old, undeformed configuration of the
+   * triangulation as input, i.e., before the transformation is applied.
+   * Should this function be provided, sensible results can only be expected
+   * if all coefficients are positive.
+   *
+   * @note This function is not currently implemented for the 1d case.
+   */
+  template <int dim>
+  void laplace_transform (const std::map<unsigned int,Point<dim> > &new_points,
+                          Triangulation<dim> &tria,
+                          const Function<dim,double> *coefficient = 0);
+
+  /**
+   * Returns a std::map with all vertices of faces located in the boundary
+   *
+   * @param[in] tria The Triangulation object.
+   */
+  template <int dim, int spacedim>
+  std::map<unsigned int,Point<spacedim> >
+  get_all_vertices_at_boundary (const Triangulation<dim, spacedim> &tria);
+
+  /**
+   * Scale the entire triangulation by the given factor. To preserve the
+   * orientation of the triangulation, the factor must be positive.
+   *
+   * This function uses the transform() function above, so the requirements on
+   * the triangulation stated there hold for this function as well.
+   */
+  template <int dim, int spacedim>
+  void scale (const double        scaling_factor,
+              Triangulation<dim, spacedim> &triangulation);
+
+  /**
+   * Distort the given triangulation by randomly moving around all the
+   * vertices of the grid.  The direction of movement of each vertex is
+   * random, while the length of the shift vector has a value of @p factor
+   * times the minimal length of the active edges adjacent to this vertex.
+   * Note that @p factor should obviously be well below <tt>0.5</tt>.
+   *
+   * If @p keep_boundary is set to @p true (which is the default), then
+   * boundary vertices are not moved.
+   */
+  template <int dim, int spacedim>
+  void distort_random (const double factor,
+                       Triangulation<dim, spacedim> &triangulation,
+                       const bool   keep_boundary=true);
+
+  /*@}*/
+  /**
+   * @name Finding cells and vertices of a triangulation
+   */
+  /*@{*/
+
+  /**
+   * Find and return the number of the used vertex in a given mesh that is
+   * located closest to a given point.
+   *
+   * @param mesh A variable of a type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param p The point for which we want to find the closest vertex.
+   * @return The index of the closest vertex found.
+   *
+   * @author Ralf B. Schulz, 2006
+   */
+  template <int dim, template <int, int> class MeshType, int spacedim>
+  unsigned int
+  find_closest_vertex (const MeshType<dim, spacedim> &mesh,
+                       const Point<spacedim>         &p);
+
+  /**
+   * Find and return a vector of iterators to active cells that surround a
+   * given vertex with index @p vertex_index.
+   *
+   * For locally refined grids, the vertex itself might not be a vertex of all
+   * adjacent cells that are returned. However, it will always be either a
+   * vertex of a cell or be a hanging node located on a face or an edge of it.
+   *
+   * @param container A variable of a type that satisfies the requirements of
+   * the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param vertex_index The index of the vertex for which we try to find
+   * adjacent cells.
+   * @return A vector of cells that lie adjacent to the given vertex.
+   *
+   * @note If the point requested does not lie in any of the cells of the mesh
+   * given, then this function throws an exception of type
+   * GridTools::ExcPointNotFound. You can catch this exception and decide what
+   * to do in that case.
+   *
+   * @note It isn't entirely clear at this time whether the function does the
+   * right thing with anisotropically refined meshes. It needs to be checked
+   * for this case.
+   */
+  template<int dim, template <int, int> class MeshType, int spacedim>
+#ifndef _MSC_VER
+  std::vector<typename MeshType<dim, spacedim>::active_cell_iterator>
+#else
+  std::vector<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type>
+#endif
+  find_cells_adjacent_to_vertex (const MeshType<dim,spacedim> &container,
+                                 const unsigned int            vertex_index);
+
+
+  /**
+   * Find and return an iterator to the active cell that surrounds a given
+   * point.
+   *
+   * This is solely a wrapper function for the function of same name below.  A
+   * Q1 mapping is used for the boundary, and the iterator to the cell in
+   * which the point resides is returned.
+   *
+   * It is recommended to use the other version of this function, as it
+   * simultaneously delivers the local coordinate of the given point without
+   * additional computational cost.
+   *
+   * @param mesh A variable of a type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param p The point for which we want to find the surrounding cell.
+   * @return An iterator into the mesh that points to the surrounding cell.
+   *
+   * @note If the point requested does not lie in any of the cells of the mesh
+   * given, then this function throws an exception of type
+   * GridTools::ExcPointNotFound. You can catch this exception and decide what
+   * to do in that case.
+   *
+   * @note When applied to a triangulation or DoF handler object based on a
+   * parallel::distributed::Triangulation object, the cell returned may in
+   * fact be a ghost or artificial cell (see
+   * @ref GlossArtificialCell
+   * and
+   * @ref GlossGhostCell).
+   * If so, many of the operations one may want to do on this cell (e.g.,
+   * evaluating the solution) may not be possible and you will have to decide
+   * what to do in that case.
+   */
+  template <int dim, template <int,int> class MeshType, int spacedim>
+#ifndef _MSC_VER
+  typename MeshType<dim,spacedim>::active_cell_iterator
+#else
+  typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type
+#endif
+  find_active_cell_around_point (const MeshType<dim,spacedim> &mesh,
+                                 const Point<spacedim>        &p);
+
+  /**
+   * Find and return an iterator to the active cell that surrounds a given
+   * point @p p.
+   *
+   * The algorithm used in this function proceeds by first looking for vertex
+   * located closest to the given point, see find_closest_vertex(). Secondly,
+   * all adjacent cells to this point are found in the mesh, see
+   * find_cells_adjacent_to_vertex(). Lastly, for each of these cells, it is
+   * tested whether the point is inside. This check is performed using
+   * arbitrary boundary mappings.  Still, it is possible that due to roundoff
+   * errors, the point cannot be located exactly inside the unit cell. In this
+   * case, even points at a very small distance outside the unit cell are
+   * allowed.
+   *
+   * If a point lies on the boundary of two or more cells, then the algorithm
+   * tries to identify the cell that is of highest refinement level.
+   *
+   * @param mapping The mapping used to determine whether the given point is
+   * inside a given cell.
+   * @param mesh A variable of a type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param p The point for which we want to find the surrounding cell.
+   * @return An pair of an iterators into the mesh that points to the
+   * surrounding cell, and of the coordinates of that point inside the cell in
+   * the reference coordinates of that cell. This local position might be
+   * located slightly outside an actual unit cell, due to numerical roundoff.
+   * Therefore, the point returned by this function should be projected onto
+   * the unit cell, using GeometryInfo::project_to_unit_cell().  This is not
+   * automatically performed by the algorithm.
+   *
+   * @note If the point requested does not lie in any of the cells of the mesh
+   * given, then this function throws an exception of type
+   * GridTools::ExcPointNotFound. You can catch this exception and decide what
+   * to do in that case.
+   *
+   * @note When applied to a triangulation or DoF handler object based on a
+   * parallel::distributed::Triangulation object, the cell returned may in
+   * fact be a ghost or artificial cell (see
+   * @ref GlossArtificialCell
+   * and
+   * @ref GlossGhostCell).
+   * If so, many of the operations one may want to do on this cell (e.g.,
+   * evaluating the solution) may not be possible and you will have to decide
+   * what to do in that case.
+   */
+  template <int dim, template<int, int> class MeshType, int spacedim>
+#ifndef _MSC_VER
+  std::pair<typename MeshType<dim, spacedim>::active_cell_iterator, Point<dim> >
+#else
+  std::pair<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type, Point<dim> >
+#endif
+  find_active_cell_around_point (const Mapping<dim,spacedim>  &mapping,
+                                 const MeshType<dim,spacedim> &mesh,
+                                 const Point<spacedim>        &p);
+
+  /**
+   * A version of the previous function where we use that mapping on a given
+   * cell that corresponds to the active finite element index of that cell.
+   * This is obviously only useful for hp problems, since the active finite
+   * element index for all other DoF handlers is always zero.
+   *
+   * @note If the point requested does not lie in any of the cells of the mesh
+   * given, then this function throws an exception of type
+   * GridTools::ExcPointNotFound. You can catch this exception and decide what
+   * to do in that case.
+   *
+   * @note When applied to a triangulation or DoF handler object based on a
+   * parallel::distributed::Triangulation object, the cell returned may in
+   * fact be a ghost or artificial cell (see
+   * @ref GlossArtificialCell
+   * and
+   * @ref GlossGhostCell).
+   * If so, many of the operations one may want to do on this cell (e.g.,
+   * evaluating the solution) may not be possible and you will have to decide
+   * what to do in that case.
+   */
+  template <int dim, int spacedim>
+  std::pair<typename hp::DoFHandler<dim, spacedim>::active_cell_iterator, Point<dim> >
+  find_active_cell_around_point (const hp::MappingCollection<dim,spacedim> &mapping,
+                                 const hp::DoFHandler<dim,spacedim>        &mesh,
+                                 const Point<spacedim>                     &p);
+
+  /**
+   * Return a list of all descendants of the given cell that are active. For
+   * example, if the current cell is once refined but none of its children are
+   * any further refined, then the returned list will contain all its
+   * children.
+   *
+   * If the current cell is already active, then the returned list is empty
+   * (because the cell has no children that may be active).
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param cell An iterator pointing to a cell of the mesh.
+   * @return A list of active descendants of the given cell
+   *
+   * @note Since in C++ the MeshType template argument can not be deduced from
+   * a function call, you will have to specify it after the function name, as
+   * for example in
+   * @code
+   *   GridTools::get_active_child_cells<DoFHandler<dim> > (cell)
+   * @endcode
+   */
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  get_active_child_cells (const typename MeshType::cell_iterator &cell);
+
+  /**
+   * Extract the active cells around a given cell @p cell and return them in
+   * the vector @p active_neighbors.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param[in] cell An iterator pointing to a cell of the mesh.
+   * @param[out] active_neighbors A list of active descendants of the given
+   * cell
+   */
+  template <class MeshType>
+  void
+  get_active_neighbors (const typename MeshType::active_cell_iterator        &cell,
+                        std::vector<typename MeshType::active_cell_iterator> &active_neighbors);
+
+  /**
+   * Extract and return the active cell layer around a subdomain (set of
+   * active cells) in the @p mesh (i.e. those that share a common set of
+   * vertices with the subdomain but are not a part of it). Here, the
+   * "subdomain" consists of exactly all of those cells for which the @p
+   * predicate returns @p true.
+   *
+   * An example of a custom predicate is one that checks for a given material
+   * id
+   * @code
+   * template<int dim>
+   * bool
+   * pred_mat_id(const typename Triangulation<dim>::active_cell_iterator & cell)
+   * {
+   *   return cell->material_id() ==  1;
+   * }
+   * @endcode
+   * and we can then extract the layer of cells around this material with the
+   * following call:
+   * @code
+   * GridTools::compute_active_cell_halo_layer(tria, pred_mat_id<dim>);
+   * @endcode
+   *
+   * Predicates that are frequently useful can be found in namespace
+   * IteratorFilters. For example, it is possible to extracting a layer based
+   * on material id
+   * @code
+   * GridTools::compute_active_cell_halo_layer(tria,
+   *                                           IteratorFilters::MaterialIdEqualTo(1, true));
+   * @endcode
+   * or based on a set of active FE indices for an hp::DoFHandler
+   * @code
+   * GridTools::compute_active_cell_halo_layer(hp_dof_handler,
+   *                                           IteratorFilters::ActiveFEIndexEqualTo({1,2}, true));
+   * @endcode
+   * Note that in the last two examples we ensure that the predicate returns
+   * true only for locally owned cells. This means that the halo layer will
+   * not contain any artificial cells.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param[in] mesh A mesh (i.e. objects of type Triangulation, DoFHandler,
+   * or hp::DoFHandler).
+   * @param[in] predicate A function  (or object of a type with an operator())
+   * defining the subdomain around which the halo layer is to be extracted. It
+   * is a function that takes in an active cell and returns a boolean.
+   * @return A list of active cells sharing at least one common vertex with
+   * the predicated subdomain.
+   *
+   * @author Jean-Paul Pelteret, Denis Davydov, Wolfgang Bangerth, 2015
+   */
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  compute_active_cell_halo_layer
+  (const MeshType                                                                    &mesh,
+   const std_cxx11::function<bool (const typename MeshType::active_cell_iterator &)> &predicate);
+
+  /**
+   * Extract and return ghost cells which are the active cell layer around all
+   * locally owned cells. This is most relevant for
+   * parallel::shared::Triangulation where it will return a subset of all
+   * ghost cells on a processor, but for parallel::distributed::Triangulation
+   * this will return all the ghost cells.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * @param[in] mesh A mesh (i.e. objects of type Triangulation, DoFHandler,
+   * or hp::DoFHandler).
+   * @return A list of ghost cells
+   *
+   * @author Jean-Paul Pelteret, Denis Davydov, Wolfgang Bangerth, 2015
+   */
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  compute_ghost_cell_halo_layer (const MeshType &mesh);
+
+
+  /**
+   * Return the adjacent cells of all the vertices. If a vertex is also a
+   * hanging node, the associated coarse cell is also returned. The vertices
+   * are ordered by the vertex index. This is the number returned by the
+   * function <code>cell-@>vertex_index()</code>. Notice that only the indices
+   * marked in the array returned by
+   * Triangulation<dim,spacedim>::get_used_vertices() are used.
+   */
+  template <int dim, int spacedim>
+  std::vector<std::set<typename Triangulation<dim,spacedim>::active_cell_iterator> >
+  vertex_to_cell_map(const Triangulation<dim,spacedim> &triangulation);
+
+  /**
+   * Compute a globally unique index for each vertex and hanging node
+   * associated with a locally owned active cell. The vertices of a ghost cell
+   * that are hanging nodes of a locally owned cells have a global index.
+   * However, the other vertices of the cells that do not <i>touch</i> an
+   * active cell do not have a global index on this processor.
+   *
+   * The key of the map is the local index of the vertex and the value is the
+   * global index. The indices need to be recomputed after refinement or
+   * coarsening and may be different.
+   */
+  template <int dim, int spacedim>
+  std::map<unsigned int, types::global_vertex_index>
+  compute_local_to_global_vertex_index_map(
+    const parallel::distributed::Triangulation<dim,spacedim> &triangulation);
+
+
+  /*@}*/
+  /**
+   * @name Partitions and subdomains of triangulations
+   */
+  /*@{*/
+
+  /**
+   * Produce a sparsity pattern in which nonzero entries indicate that two
+   * cells are connected via a common face. The diagonal entries of the
+   * sparsity pattern are also set.
+   *
+   * The rows and columns refer to the cells as they are traversed in their
+   * natural order using cell iterators.
+   */
+  template <int dim, int spacedim>
+  void
+  get_face_connectivity_of_cells (const Triangulation<dim, spacedim> &triangulation,
+                                  DynamicSparsityPattern             &connectivity);
+
+  /**
+   * As above, but filling a SparsityPattern object instead.
+   *
+   * @deprecated
+   */
+  template <int dim, int spacedim>
+  void
+  get_face_connectivity_of_cells (const Triangulation<dim, spacedim> &triangulation,
+                                  SparsityPattern                    &connectivity) DEAL_II_DEPRECATED;
+
+  /**
+   * Produce a sparsity pattern in which nonzero entries indicate that two
+   * cells are connected via a common vertex. The diagonal entries of the
+   * sparsity pattern are also set.
+   *
+   * The rows and columns refer to the cells as they are traversed in their
+   * natural order using cell iterators.
+   */
+  template <int dim, int spacedim>
+  void
+  get_vertex_connectivity_of_cells (const Triangulation<dim, spacedim> &triangulation,
+                                    DynamicSparsityPattern             &connectivity);
+
+  /**
+   * Use the METIS partitioner to generate a partitioning of the active cells
+   * making up the entire domain. After calling this function, the subdomain
+   * ids of all active cells will have values between zero and @p
+   * n_partitions-1. You can access the subdomain id of a cell by using
+   * <tt>cell-@>subdomain_id()</tt>.
+   *
+   * This function will generate an error if METIS is not installed unless @p
+   * n_partitions is one. I.e., you can write a program so that it runs in the
+   * single-processor single-partition case without METIS installed, and only
+   * requires METIS when multiple partitions are required.
+   */
+  template <int dim, int spacedim>
+  void
+  partition_triangulation (const unsigned int  n_partitions,
+                           Triangulation<dim, spacedim> &triangulation);
+
+  /**
+   * This function does the same as the previous one, i.e. it partitions a
+   * triangulation using METIS into a number of subdomains identified by the
+   * <code>cell-@>subdomain_id()</code> flag.
+   *
+   * The difference to the previous function is the second argument, a
+   * sparsity pattern that represents the connectivity pattern between cells.
+   *
+   * While the function above builds it directly from the triangulation by
+   * considering which cells neighbor each other, this function can take a
+   * more refined connectivity graph. The sparsity pattern needs to be of size
+   * $N\times N$, where $N$ is the number of active cells in the
+   * triangulation. If the sparsity pattern contains an entry at position
+   * $(i,j)$, then this means that cells $i$ and $j$ (in the order in which
+   * they are traversed by active cell iterators) are to be considered
+   * connected; METIS will then try to partition the domain in such a way that
+   * (i) the subdomains are of roughly equal size, and (ii) a minimal number
+   * of connections are broken.
+   *
+   * This function is mainly useful in cases where connections between cells
+   * exist that are not present in the triangulation alone (otherwise the
+   * previous function would be the simpler one to use). Such connections may
+   * include that certain parts of the boundary of a domain are coupled
+   * through symmetric boundary conditions or integrals (e.g. friction contact
+   * between the two sides of a crack in the domain), or if a numerical scheme
+   * is used that not only connects immediate neighbors but a larger
+   * neighborhood of cells (e.g. when solving integral equations).
+   *
+   * In addition, this function may be useful in cases where the default
+   * sparsity pattern is not entirely sufficient. This can happen because the
+   * default is to just consider face neighbors, not neighboring cells that
+   * are connected by edges or vertices. While the latter couple when using
+   * continuous finite elements, they are typically still closely connected in
+   * the neighborship graph, and METIS will not usually cut important
+   * connections in this case. However, if there are vertices in the mesh
+   * where many cells (many more than the common 4 or 6 in 2d and 3d,
+   * respectively) come together, then there will be a significant number of
+   * cells that are connected across a vertex, but several degrees removed in
+   * the connectivity graph built only using face neighbors. In a case like
+   * this, METIS may sometimes make bad decisions and you may want to build
+   * your own connectivity graph.
+   */
+  template <int dim, int spacedim>
+  void
+  partition_triangulation (const unsigned int     n_partitions,
+                           const SparsityPattern &cell_connection_graph,
+                           Triangulation<dim,spacedim>    &triangulation);
+
+  /**
+   * For each active cell, return in the output array to which subdomain (as
+   * given by the <tt>cell->subdomain_id()</tt> function) it belongs. The
+   * output array is supposed to have the right size already when calling this
+   * function.
+   *
+   * This function returns the association of each cell with one subdomain. If
+   * you are looking for the association of each @em DoF with a subdomain, use
+   * the <tt>DoFTools::get_subdomain_association</tt> function.
+   */
+  template <int dim, int spacedim>
+  void
+  get_subdomain_association (const Triangulation<dim, spacedim>  &triangulation,
+                             std::vector<types::subdomain_id> &subdomain);
+
+  /**
+   * Count how many cells are uniquely associated with the given @p subdomain
+   * index.
+   *
+   * This function may return zero if there are no cells with the given @p
+   * subdomain index. This can happen, for example, if you try to partition a
+   * coarse mesh into more partitions (one for each processor) than there are
+   * cells in the mesh.
+   *
+   * This function returns the number of cells associated with one subdomain.
+   * If you are looking for the association of @em DoFs with this subdomain,
+   * use the <tt>DoFTools::count_dofs_with_subdomain_association</tt>
+   * function.
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  count_cells_with_subdomain_association (const Triangulation<dim, spacedim> &triangulation,
+                                          const types::subdomain_id         subdomain);
+
+
+  /**
+   * For a triangulation, return a mask that represents which of its vertices
+   * are "owned" by the current process in the same way as we talk about
+   * locally owned cells or degrees of freedom (see
+   * @ref GlossLocallyOwnedCell
+   * and
+   * @ref GlossLocallyOwnedDof).
+   * For the purpose of this function, we define a locally owned vertex as
+   * follows: a vertex is owned by that processor with the smallest subdomain
+   * id (which equals the MPI rank of that processor) among all owners of
+   * cells adjacent to this vertex. In other words, vertices that are in the
+   * interior of a partition of the triangulation are owned by the owner of
+   * this partition; for vertices that lie on the boundary between two or more
+   * partitions, the owner is the processor with the least subdomain_id among
+   * all adjacent subdomains.
+   *
+   * For sequential triangulations (as opposed to, for example,
+   * parallel::distributed::Triangulation), every user vertex is of course
+   * owned by the current processor, i.e., the function returns
+   * Triangulation::get_used_vertices(). For parallel triangulations, the
+   * returned mask is a subset of what Triangulation::get_used_vertices()
+   * returns.
+   *
+   * @param triangulation The triangulation of which the function evaluates
+   * which vertices are locally owned.
+   * @return The subset of vertices, as described above. The length of the
+   * returned array equals Triangulation.n_vertices() and may, consequently,
+   * be larger than Triangulation::n_used_vertices().
+   */
+  template <int dim, int spacedim>
+  std::vector<bool>
+  get_locally_owned_vertices (const Triangulation<dim,spacedim> &triangulation);
+
+  /*@}*/
+  /**
+   * @name Comparing different meshes
+   */
+  /*@{*/
+
+  /**
+   * Given two meshes (i.e. objects of type Triangulation, DoFHandler, or
+   * hp::DoFHandler) that are based on the same coarse mesh, this function
+   * figures out a set of cells that are matched between the two meshes and
+   * where at most one of the meshes is more refined on this cell. In other
+   * words, it finds the smallest cells that are common to both meshes, and
+   * that together completely cover the domain.
+   *
+   * This function is useful, for example, in time-dependent or nonlinear
+   * application, where one has to integrate a solution defined on one mesh
+   * (e.g., the one from the previous time step or nonlinear iteration)
+   * against the shape functions of another mesh (the next time step, the next
+   * nonlinear iteration). If, for example, the new mesh is finer, then one
+   * has to obtain the solution on the coarse mesh (mesh_1) and interpolate it
+   * to the children of the corresponding cell of mesh_2. Conversely, if the
+   * new mesh is coarser, one has to express the coarse cell shape function by
+   * a linear combination of fine cell shape functions. In either case, one
+   * needs to loop over the finest cells that are common to both
+   * triangulations. This function returns a list of pairs of matching
+   * iterators to cells in the two meshes that can be used to this end.
+   *
+   * Note that the list of these iterators is not necessarily ordered, and
+   * does also not necessarily coincide with the order in which cells are
+   * traversed in one, or both, of the meshes given as arguments.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   */
+  template <typename MeshType>
+  std::list<std::pair<typename MeshType::cell_iterator,
+      typename MeshType::cell_iterator> >
+      get_finest_common_cells (const MeshType &mesh_1,
+                               const MeshType &mesh_2);
+
+  /**
+   * Return true if the two triangulations are based on the same coarse mesh.
+   * This is determined by checking whether they have the same number of cells
+   * on the coarsest level, and then checking that they have the same
+   * vertices.
+   *
+   * The two meshes may have different refinement histories beyond the coarse
+   * mesh.
+   */
+  template <int dim, int spacedim>
+  bool
+  have_same_coarse_mesh (const Triangulation<dim, spacedim> &mesh_1,
+                         const Triangulation<dim, spacedim> &mesh_2);
+
+  /**
+   * The same function as above, but working on arguments of type DoFHandler,
+   * or hp::DoFHandler. This function is provided to allow calling
+   * have_same_coarse_mesh for all types of containers representing
+   * triangulations or the classes built on triangulations.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   */
+  template <typename MeshType>
+  bool
+  have_same_coarse_mesh (const MeshType &mesh_1,
+                         const MeshType &mesh_2);
+
+  /*@}*/
+  /**
+   * @name Dealing with distorted cells
+   */
+  /*@{*/
+
+  /**
+   * Given a triangulation and a list of cells whose children have become
+   * distorted as a result of mesh refinement, try to fix these cells up by
+   * moving the center node around.
+   *
+   * The function returns a list of cells with distorted children that
+   * couldn't be fixed up for whatever reason. The returned list is therefore
+   * a subset of the input argument.
+   *
+   * For a definition of the concept of distorted cells, see the
+   * @ref GlossDistorted "glossary entry".
+   * The first argument passed to the current function is typically the
+   * exception thrown by the Triangulation::execute_coarsening_and_refinement
+   * function.
+   */
+  template <int dim, int spacedim>
+  typename Triangulation<dim,spacedim>::DistortedCellList
+  fix_up_distorted_child_cells (const typename Triangulation<dim,spacedim>::DistortedCellList &distorted_cells,
+                                Triangulation<dim,spacedim> &triangulation);
+
+
+
+
+  /*@}*/
+  /**
+   * @name Extracting and creating patches of cells surrounding a single cell,
+   * and creating triangulation out of them
+   */
+  /*@{*/
+
+
+  /**
+   * This function returns a list of all the active neighbor cells of the
+   * given, active cell.  Here, a neighbor is defined as one having at least
+   * part of a face in common with the given cell, but not edge (in 3d) or
+   * vertex neighbors (in 2d and 3d).
+   *
+   * The first element of the returned list is the cell provided as argument.
+   * The remaining ones are neighbors: The function loops over all faces of
+   * that given cell and checks if that face is not on the boundary of the
+   * domain. Then, if the neighbor cell does not have any children (i.e., it
+   * is either at the same refinement level as the current cell, or coarser)
+   * then this neighbor cell is added to the list of cells. Otherwise, if the
+   * neighbor cell is refined and therefore has children, then this function
+   * loops over all subfaces of current face adds the neighbors behind these
+   * sub-faces to the list to be returned.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   * In C++, the compiler can not determine <code>MeshType</code> from the
+   * function call. You need to specify it as an explicit template argument
+   * following the function name.
+   * @param[in] cell An iterator pointing to a cell of the mesh.
+   * @return A list of active cells that form the patch around the given cell
+   *
+   * @note Patches are often used in defining error estimators that require
+   * the solution of a local problem on the patch surrounding each of the
+   * cells of the mesh. This also requires manipulating the degrees of freedom
+   * associated with the cells of a patch. To this end, there are further
+   * functions working on patches in namespace DoFTools.
+   *
+   * @note In the context of a parallel distributed computation, it only makes
+   * sense to call this function on locally owned cells. This is because the
+   * neighbors of locally owned cells are either locally owned themselves, or
+   * ghost cells. For both, we know that these are in fact the real cells of
+   * the complete, parallel triangulation. We can also query the degrees of
+   * freedom on these.
+   *
+   * @author Arezou Ghesmati, Wolfgang Bangerth, 2014
+   */
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  get_patch_around_cell(const typename MeshType::active_cell_iterator &cell);
+
+
+  /**
+   * This function takes a vector of active cells (hereafter named @p
+   * patch_cells) as input argument, and returns a vector of their parent
+   * cells with the coarsest common level of refinement. In other words, find
+   * that set of cells living at the same refinement level so that all cells
+   * in the input vector are children of the cells in the set, or are in the
+   * set itself.
+   *
+   * @tparam Container In C++, the compiler can not determine the type of
+   * <code>Container</code> from the function call. You need to specify it as
+   * an explicit template argument following the function name. This type has
+   * to satisfy the requirements of a mesh container (see
+   * @ref ConceptMeshType).
+   *
+   * @param[in] patch_cells A vector of active cells for which this function
+   * finds the parents at the coarsest common level. This vector of cells
+   * typically results from calling the function
+   * GridTools::get_patch_around_cell().
+   * @return A list of cells with the coarsest common level of refinement of
+   * the input cells.
+   *
+   * @author Arezou Ghesmati, Wolfgang Bangerth, 2015
+   */
+  template <class Container>
+  std::vector<typename Container::cell_iterator>
+  get_cells_at_coarsest_common_level(const std::vector<typename Container::active_cell_iterator> &patch_cells);
+
+  /**
+   * This function constructs a Triangulation (named @p local_triangulation)
+   * from a given vector of active cells. This vector (which we think of the
+   * cells corresponding to a "patch") contains active cells that are part of
+   * an existing global Triangulation. The goal of this function is to build a
+   * local Triangulation that contains only the active cells given in @p patch
+   * (and potentially a minimum number of additional cells required to form a
+   * valid Triangulation). The function also returns a map that allows to
+   * identify the cells in the output Triangulation and corresponding cells in
+   * the input list.
+   *
+   * The operation implemented by this function is frequently used in the
+   * definition of error estimators that need to solve "local" problems on
+   * each cell and its neighbors. A similar construction is necessary in the
+   * definition of the Clement interpolation operator in which one needs to
+   * solve a local problem on all cells within the support of a shape
+   * function. This function then builds a complete Triangulation from a list
+   * of cells that make up such a patch; one can then later attach a
+   * DoFHandler to such a Triangulation.
+   *
+   * If the list of input cells contains only cells at the same refinement
+   * level, then the output Triangulation simply consists of a Triangulation
+   * containing only exactly these patch cells. On the other hand, if the
+   * input cells live on different refinement levels, i.e., the Triangulation
+   * of which they are part is adaptively refined, then the construction of
+   * the output Triangulation is not so simple because the coarsest level of a
+   * Triangulation can not contain hanging nodes. Rather, we first have to
+   * find the common refinement level of all input cells, along with their
+   * common parents (see GridTools::get_cells_at_coarsest_common_level()),
+   * build a Triangulation from those, and then adaptively refine it so that
+   * the input cells all also exist in the output Triangulation.
+   *
+   * A consequence of this procedure is that that output Triangulation may
+   * contain more active cells than the ones that exist in the input vector.
+   * On the other hand, one typically wants to solve the local problem not on
+   * the entire output Triangulation, but only on those cells of it that
+   * correspond to cells in the input list.  In this case, a user typically
+   * wants to assign degrees of freedom only on cells that are part of the
+   * "patch", and somehow ignore those excessive cells. The current function
+   * supports this common requirement by setting the user flag for the cells
+   * in the output Triangulation that match with cells in the input list.
+   * Cells which are not part of the original patch will not have their @p
+   * user_flag set; we can then avoid assigning degrees of freedom using the
+   * FE_Nothing<dim> element.
+   *
+   * @tparam Container In C++, the compiler can not determine the type of
+   * <code>Container</code> from the function call. You need to specify it as
+   * an explicit template argument following the function name. This type that
+   * satisfies the requirements of a mesh container (see
+   * @ref ConceptMeshType).
+   *
+   * @param[in] patch A vector of active cells from a common triangulation.
+   * These cells may or may not all be at the same refinement level.
+   * @param[out] local_triangulation A triangulation whose active cells
+   * correspond to the given vector of active cells in @p patch.
+   * @param[out] patch_to_global_tria_map A map between the local
+   * triangulation which is built as explained above, and the cell iterators
+   * in the input list.
+   *
+   * @author Arezou Ghesmati, Wolfgang Bangerth, 2015
+   */
+  template <class Container>
+  void
+  build_triangulation_from_patch (
+    const std::vector<typename Container::active_cell_iterator>  &patch,
+    Triangulation<Container::dimension,Container::space_dimension> &local_triangulation,
+    std::map<typename Triangulation<Container::dimension,Container::space_dimension>::active_cell_iterator,
+    typename Container::active_cell_iterator> &patch_to_global_tria_map);
+
+  /**
+   * This function runs through the degrees of freedom defined by the
+   * DoFHandlerType and for each dof constructs a vector of active_cell_iterators
+   * representing the cells of support of the associated basis element
+   * at that degree of freedom. This function was originally designed for the
+   * implementation of local projections, for instance the Clement interpolant,
+   * in conjunction with other local patch functions like
+   * GridTools::build_triangulation_from_patch.
+   *
+   * DoFHandlerType's built on top of Triangulation or
+   * parallel:distributed::Triangulation are supported and handled
+   * appropriately.
+   *
+   * The result is the patch of cells representing the support of the basis
+   * element associated to the degree of freedom.  For instance using an FE_Q
+   * finite element, we obtain the standard patch of cells touching the degree
+   * of freedom and then add other cells that take care of possible hanging node
+   * constraints.  Using a FE_DGQ finite element, the degrees of freedom are
+   * logically considered to be "interior" to the cells so the patch would
+   * consist exclusively of the single cell on which the degree of freedom is
+   * located.
+   *
+   * @tparam DoFHandlerType The DoFHandlerType should be a DoFHandler or
+   * hp::DoFHandler.
+   * @param[in] dof_handler The DoFHandlerType which could be built on a
+   * Triangulation or a parallel::distributed::Triangulation with a finite
+   * element that has degrees of freedom that are logically associated to a
+   * vertex, line, quad, or hex.
+   * @return A map from the global_dof_index of
+   * degrees of freedom on locally relevant cells to vectors containing
+   * DoFHandlerType::active_cell_iterators of cells in the support of the basis
+   * function at that degree of freedom.
+   *
+   *  @author Spencer Patty, 2016
+   *
+   */
+  template <class DoFHandlerType>
+  std::map< types::global_dof_index,std::vector<typename DoFHandlerType::active_cell_iterator> >
+  get_dof_to_support_patch_map(DoFHandlerType &dof_handler);
+
+
+  /*@}*/
+  /**
+   * @name Lower-dimensional meshes for parts of higher-dimensional meshes
+   */
+  /*@{*/
+
+
+#ifdef _MSC_VER
+  // Microsoft's VC++ has a bug where it doesn't want to recognize that
+  // an implementation (definition) of the extract_boundary_mesh function
+  // matches a declaration. This can apparently only be avoided by
+  // doing some contortion with the return type using the following
+  // intermediate type. This is only used when using MS VC++ and uses
+  // the direct way of doing it otherwise
+  template <template <int,int> class MeshType, int dim, int spacedim>
+  struct ExtractBoundaryMesh
+  {
+    typedef
+    std::map<typename MeshType<dim-1,spacedim>::cell_iterator,
+        typename MeshType<dim,spacedim>::face_iterator>
+        return_type;
+  };
+#endif
+
+  /*@}*/
+  /**
+   * @name Dealing with periodic domains
+   */
+  /*@{*/
+
+  /**
+   * Data type that provides all information necessary to create periodicity
+   * constraints and a periodic p4est forest with respect to two 'periodic'
+   * cell faces.
+   */
+  template<typename CellIterator>
+  struct PeriodicFacePair
+  {
+    /**
+     * The cells associated with the two 'periodic' faces.
+     */
+    CellIterator cell[2];
+
+    /**
+     * The local face indices (with respect to the specified cells) of the two
+     * 'periodic' faces.
+     */
+    unsigned int face_idx[2];
+
+    /**
+     * The relative orientation of the first face with respect to the second
+     * face as described in orthogonal_equality() and
+     * DoFTools::make_periodicity_constraints() (and stored as a bitset).
+     */
+    std::bitset<3> orientation;
+
+    /**
+     * A @p dim $\times$ @p dim rotation matrix that describes how vector
+     * valued DoFs of the first face should be modified prior to constraining
+     * to the DoFs of the second face.
+     *
+     * The rotation matrix is used in DoFTools::make_periodicity_constriants()
+     * by applying the rotation to all vector valued blocks listed in the
+     * parameter @p first_vector_components of the finite element space. For
+     * more details see DoFTools::make_periodicity_constraints() and the
+     * glossary
+     * @ref GlossPeriodicConstraints "glossary entry on periodic conditions".
+     */
+    FullMatrix<double> matrix;
+  };
+
+
+  /**
+   * An orthogonal equality test for faces.
+   *
+   * @p face1 and @p face2 are considered equal, if a one to one matching
+   * between its vertices can be achieved via an orthogonal equality relation.
+   *
+   * Here, two vertices <tt>v_1</tt> and <tt>v_2</tt> are considered equal, if
+   * $M\cdot v_1 + offset - v_2$ is parallel to the unit vector in unit
+   * direction @p direction. If the parameter @p matrix is a reference to a
+   * spacedim x spacedim matrix, $M$ is set to @p matrix, otherwise $M$ is the
+   * identity matrix.
+   *
+   * If the matching was successful, the _relative_ orientation of @p face1
+   * with respect to @p face2 is returned in the bitset @p orientation, where
+   * @code
+   * orientation[0] -> face_orientation
+   * orientation[1] -> face_flip
+   * orientation[2] -> face_rotation
+   * @endcode
+   *
+   * In 2D <tt>face_orientation</tt> is always <tt>true</tt>,
+   * <tt>face_rotation</tt> is always <tt>false</tt>, and face_flip has the
+   * meaning of <tt>line_flip</tt>. More precisely in 3d:
+   *
+   * <tt>face_orientation</tt>: <tt>true</tt> if @p face1 and @p face2 have
+   * the same orientation. Otherwise, the vertex indices of @p face1 match the
+   * vertex indices of @p face2 in the following manner:
+   *
+   * @code
+   * face1:           face2:
+   *
+   * 1 - 3            2 - 3
+   * |   |    <-->    |   |
+   * 0 - 2            0 - 1
+   * @endcode
+   *
+   * <tt>face_flip</tt>: <tt>true</tt> if the matched vertices are rotated by
+   * 180 degrees:
+   *
+   * @code
+   * face1:           face2:
+   *
+   * 1 - 0            2 - 3
+   * |   |    <-->    |   |
+   * 3 - 2            0 - 1
+   * @endcode
+   *
+   * <tt>face_rotation</tt>: <tt>true</tt> if the matched vertices are rotated
+   * by 90 degrees counterclockwise:
+   *
+   * @code
+   * face1:           face2:
+   *
+   * 0 - 2            2 - 3
+   * |   |    <-->    |   |
+   * 1 - 3            0 - 1
+   * @endcode
+   *
+   * and any combination of that... More information on the topic can be found
+   * in the
+   * @ref GlossFaceOrientation "glossary"
+   * article.
+   *
+   * @author Matthias Maier, 2012
+   */
+  template<typename FaceIterator>
+  bool
+  orthogonal_equality (std::bitset<3>     &orientation,
+                       const FaceIterator &face1,
+                       const FaceIterator &face2,
+                       const int          direction,
+                       const Tensor<1,FaceIterator::AccessorType::space_dimension> &offset
+                       = Tensor<1,FaceIterator::AccessorType::space_dimension>(),
+                       const FullMatrix<double> &matrix = FullMatrix<double>());
+
+
+  /**
+   * Same function as above, but doesn't return the actual orientation
+   */
+  template<typename FaceIterator>
+  bool
+  orthogonal_equality (const FaceIterator &face1,
+                       const FaceIterator &face2,
+                       const int          direction,
+                       const Tensor<2,FaceIterator::AccessorType::space_dimension> &offset
+                       = Tensor<1,FaceIterator::AccessorType::space_dimension>(),
+                       const FullMatrix<double> &matrix = FullMatrix<double>());
+
+
+  /**
+   * This function will collect periodic face pairs on the coarsest mesh level
+   * of the given @p mesh (a Triangulation or DoFHandler) and add them to the
+   * vector @p matched_pairs leaving the original contents intact.
+   *
+   * Define a 'first' boundary as all boundary faces having boundary_id @p
+   * b_id1 and a 'second' boundary consisting of all faces belonging to @p
+   * b_id2.
+   *
+   * This function tries to match all faces belonging to the first boundary
+   * with faces belonging to the second boundary with the help of
+   * orthogonal_equality().
+   *
+   * The bitset that is returned inside of PeriodicFacePair encodes the
+   * _relative_ orientation of the first face with respect to the second face,
+   * see the documentation of orthogonal_equality() for further details.
+   *
+   * The @p direction refers to the space direction in which periodicity is
+   * enforced. When maching periodic faces this vector component is ignored.
+   *
+   * The @p offset is a vector tangential to the faces that is added to the
+   * location of vertices of the 'first' boundary when attempting to match
+   * them to the corresponding vertices of the 'second' boundary. This can be
+   * used to implement conditions such as $u(0,y)=u(1,y+1)$.
+   *
+   * Optionally, a $dim\times dim$ rotation @p matrix can be specified that
+   * describes how vector valued DoFs of the first face should be modified
+   * prior to constraining to the DoFs of the second face. The @p matrix is
+   * used in two places. First, @p matrix will be supplied to
+   * orthogonal_equality() and used for matching faces: Two vertices $v_1$ and
+   * $v_2$ match if $\text{matrix}\cdot v_1 + \text{offset} - v_2$ is parallel
+   * to the unit vector in unit direction @p direction. (For more details see
+   * DoFTools::make_periodicity_constraints(), the glossary
+   * @ref GlossPeriodicConstraints "glossary entry on periodic conditions"
+   * and step-45). Second, @p matrix will be stored in the PeriodicFacePair
+   * collection @p matched_pairs for further use.
+   *
+   * @tparam MeshType A type that satisfies the requirements of the
+   * @ref ConceptMeshType "MeshType concept".
+   *
+   * @note The created std::vector can be used in
+   * DoFTools::make_periodicity_constraints() and in
+   * parallel::distributed::Triangulation::add_periodicity() to enforce
+   * periodicity algebraically.
+   *
+   * @note Because elements will be added to @p matched_pairs (and existing
+   * entries will be preserved), it is possible to call this function several
+   * times with different boundary ids to generate a vector with all periodic
+   * pairs.
+   *
+   * @author Daniel Arndt, Matthias Maier, 2013 - 2015
+   */
+  template <typename MeshType>
+  void
+  collect_periodic_faces
+  (const MeshType                            &mesh,
+   const types::boundary_id                   b_id1,
+   const types::boundary_id                   b_id2,
+   const int                                  direction,
+   std::vector<PeriodicFacePair<typename MeshType::cell_iterator> > &matched_pairs,
+   const Tensor<1,MeshType::space_dimension> &offset = dealii::Tensor<1,MeshType::space_dimension>(),
+   const FullMatrix<double>                  &matrix = FullMatrix<double>());
+
+
+  /**
+   * This compatibility version of collect_periodic_face_pairs() only works on
+   * grids with cells in
+   * @ref GlossFaceOrientation "standard orientation".
+   *
+   * Instead of defining a 'first' and 'second' boundary with the help of two
+   * boundary_ids this function defines a 'left' boundary as all faces with
+   * local face index <code>2*dimension</code> and boundary indicator @p b_id
+   * and, similarly, a 'right' boundary consisting of all face with local face
+   * index <code>2*dimension+1</code> and boundary indicator @p b_id.
+   *
+   * This function will collect periodic face pairs on the coarsest mesh level
+   * and add them to @p matched_pairs leaving the original contents intact.
+   *
+   * See above function for further details.
+   *
+   * @note This version of collect_periodic_face_pairs() will not work on
+   * meshes with cells not in
+   * @ref GlossFaceOrientation "standard orientation".
+   *
+   * @author Daniel Arndt, Matthias Maier, 2013 - 2015
+   */
+  template <typename MeshType>
+  void
+  collect_periodic_faces
+  (const MeshType                                    &mesh,
+   const types::boundary_id                           b_id,
+   const int                                          direction,
+   std::vector<PeriodicFacePair<typename MeshType::cell_iterator> > &matched_pairs,
+   const dealii::Tensor<1,MeshType::space_dimension> &offset = dealii::Tensor<1,MeshType::space_dimension>(),
+   const FullMatrix<double>                          &matrix = FullMatrix<double>());
+
+  /*@}*/
+  /**
+   * @name Dealing with boundary and manifold ids
+   */
+  /*@{*/
+
+  /**
+   * Copy boundary ids to manifold ids on faces and edges at the boundary. The
+   * default manifold_id for new Triangulation objects is
+   * numbers::invalid_manifold_id. This function copies the boundary_ids of
+   * the boundary faces and edges to the manifold_ids of the same faces and
+   * edges, allowing the user to change the boundary_ids and use them for
+   * boundary conditions regardless of the geometry, which will use
+   * manifold_ids to create new points. Only active cells will be iterated
+   * over. This is a function you'd typically call when there is only one
+   * active level on your Triangulation. Mesh refinement will then inherit
+   * these indicators to child cells, faces, and edges.
+   *
+   * The optional parameter @p reset_boundary_ids, indicates whether this
+   * function should reset the boundary_ids of boundary faces and edges to its
+   * default value 0 after copying its value to the manifold_id. By default,
+   * boundary_ids are left untouched.
+   *
+   * @ingroup manifold
+   * @relatesalso boundary
+   *
+   * @author Luca Heltai, 2015
+   */
+  template <int dim, int spacedim>
+  void copy_boundary_to_manifold_id(Triangulation<dim, spacedim> &tria,
+                                    const bool reset_boundary_ids=false);
+
+  /**
+   * Copy material ids to manifold ids. The default manifold_id for new
+   * Triangulation objects is numbers::invalid_manifold_id. When refinements
+   * occurs, the Triangulation asks where to locate new points to the
+   * underlying manifold.
+   *
+   * When reading a Triangulation from a supported input format, typical
+   * information that can be stored in a file are boundary conditions for
+   * boundary faces (which we store in the boundary_id of the faces), material
+   * types for cells (which we store in the material_id of the cells) and in
+   * some cases subdomain ids for cells (which we store in the subdomain_id of
+   * the cell).
+   *
+   * If you read one of these grids into a Triangulation, you might still want
+   * to use the material_id specified in the input file as a manifold_id
+   * description. In this case you can associate a Manifold object to internal
+   * cells, and this object will be used by the Triangulation to query
+   * Manifold objects for new points. This function iterates over active cells
+   * and copies the material_ids to the manifold_ids.
+   *
+   * The optional parameter @p compute_face_ids, indicates whether this
+   * function should also set the manifold_ids of the faces (both for internal
+   * faces and for faces on the boundary). If set to true, then each face will
+   * get a manifold_id equal to the minimum of the surrounding manifold_ids,
+   * ensuring that a unique manifold id is selected for each face of the
+   * Triangulation. By default, face manifold_ids are not computed.
+   *
+   * @ingroup manifold
+   *
+   * @author Luca Heltai, 2015
+   */
+  template <int dim, int spacedim>
+  void copy_material_to_manifold_id(Triangulation<dim, spacedim> &tria,
+                                    const bool compute_face_ids=false);
+
+
+  /*@}*/
+  /**
+   * @name Exceptions
+   */
+  /*@{*/
+
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidNumberOfPartitions,
+                  int,
+                  << "The number of partitions you gave is " << arg1
+                  << ", but must be greater than zero.");
+  /**
+   * Exception
+   */
+  DeclException1 (ExcNonExistentSubdomain,
+                  int,
+                  << "The subdomain id " << arg1
+                  << " has no cells associated with it.");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcTriangulationHasBeenRefined);
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcScalingFactorNotPositive,
+                  double,
+                  << "The scaling factor must be positive, but is " << arg1);
+  /**
+   * Exception
+   */
+  template <int N>
+  DeclException1 (ExcPointNotFoundInCoarseGrid,
+                  Point<N>,
+                  << "The point <" << arg1
+                  << "> could not be found inside any of the "
+                  << "coarse grid cells.");
+  /**
+   * Exception
+   */
+  template <int N>
+  DeclException1 (ExcPointNotFound,
+                  Point<N>,
+                  << "The point <" << arg1
+                  << "> could not be found inside any of the "
+                  << "subcells of a coarse grid cell.");
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcVertexNotUsed,
+                  unsigned int,
+                  << "The given vertex " << arg1
+                  << " is not used in the given triangulation");
+
+
+  /*@}*/
+
+} /*namespace GridTools*/
+
+
+
+/* ----------------- Template function --------------- */
+
+#ifndef DOXYGEN
+
+namespace GridTools
+{
+
+  template <int dim, typename Predicate, int spacedim>
+  void transform (const Predicate    &predicate,
+                  Triangulation<dim, spacedim> &triangulation)
+  {
+    std::vector<bool> treated_vertices (triangulation.n_vertices(),
+                                        false);
+
+    // loop over all active cells, and
+    // transform those vertices that
+    // have not yet been touched. note
+    // that we get to all vertices in
+    // the triangulation by only
+    // visiting the active cells.
+    typename Triangulation<dim, spacedim>::active_cell_iterator
+    cell = triangulation.begin_active (),
+    endc = triangulation.end ();
+    for (; cell!=endc; ++cell)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        if (treated_vertices[cell->vertex_index(v)] == false)
+          {
+            // transform this vertex
+            cell->vertex(v) = predicate(cell->vertex(v));
+            // and mark it as treated
+            treated_vertices[cell->vertex_index(v)] = true;
+          };
+
+
+    // now fix any vertices on hanging nodes so that we don't create any holes
+    if (dim==2)
+      {
+        typename Triangulation<dim,spacedim>::active_cell_iterator
+        cell = triangulation.begin_active(),
+        endc = triangulation.end();
+        for (; cell!=endc; ++cell)
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->has_children() &&
+                !cell->face(face)->at_boundary())
+              {
+                // this line has children
+                cell->face(face)->child(0)->vertex(1)
+                  = (cell->face(face)->vertex(0) +
+                     cell->face(face)->vertex(1)) / 2;
+              }
+      }
+    else if (dim==3)
+      {
+        typename Triangulation<dim,spacedim>::active_cell_iterator
+        cell = triangulation.begin_active(),
+        endc = triangulation.end();
+        for (; cell!=endc; ++cell)
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->has_children() &&
+                !cell->face(face)->at_boundary())
+              {
+                // this face has hanging nodes
+                cell->face(face)->child(0)->vertex(1)
+                  = (cell->face(face)->vertex(0) + cell->face(face)->vertex(1)) / 2.0;
+                cell->face(face)->child(0)->vertex(2)
+                  = (cell->face(face)->vertex(0) + cell->face(face)->vertex(2)) / 2.0;
+                cell->face(face)->child(1)->vertex(3)
+                  = (cell->face(face)->vertex(1) + cell->face(face)->vertex(3)) / 2.0;
+                cell->face(face)->child(2)->vertex(3)
+                  = (cell->face(face)->vertex(2) + cell->face(face)->vertex(3)) / 2.0;
+
+                // center of the face
+                cell->face(face)->child(0)->vertex(3)
+                  = (cell->face(face)->vertex(0) + cell->face(face)->vertex(1)
+                     + cell->face(face)->vertex(2) + cell->face(face)->vertex(3)) / 4.0;
+              }
+      }
+  }
+
+
+
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  get_active_child_cells (const typename MeshType::cell_iterator &cell)
+  {
+    std::vector<typename MeshType::active_cell_iterator> child_cells;
+
+    if (cell->has_children())
+      {
+        for (unsigned int child=0;
+             child<cell->n_children(); ++child)
+          if (cell->child (child)->has_children())
+            {
+              const std::vector<typename MeshType::active_cell_iterator>
+              children = get_active_child_cells<MeshType> (cell->child(child));
+              child_cells.insert (child_cells.end(),
+                                  children.begin(), children.end());
+            }
+          else
+            child_cells.push_back (cell->child(child));
+      }
+
+    return child_cells;
+  }
+
+
+
+  template <class MeshType>
+  void
+  get_active_neighbors(const typename MeshType::active_cell_iterator        &cell,
+                       std::vector<typename MeshType::active_cell_iterator> &active_neighbors)
+  {
+    active_neighbors.clear ();
+    for (unsigned int n=0; n<GeometryInfo<MeshType::dimension>::faces_per_cell; ++n)
+      if (! cell->at_boundary(n))
+        {
+          if (MeshType::dimension == 1)
+            {
+              // check children of neighbor. note
+              // that in 1d children of the neighbor
+              // may be further refined. In 1d the
+              // case is simple since we know what
+              // children bound to the present cell
+              typename MeshType::cell_iterator
+              neighbor_child = cell->neighbor(n);
+              if (!neighbor_child->active())
+                {
+                  while (neighbor_child->has_children())
+                    neighbor_child = neighbor_child->child (n==0 ? 1 : 0);
+
+                  Assert (neighbor_child->neighbor(n==0 ? 1 : 0)==cell,
+                          ExcInternalError());
+                }
+              active_neighbors.push_back (neighbor_child);
+            }
+          else
+            {
+              if (cell->face(n)->has_children())
+                // this neighbor has children. find
+                // out which border to the present
+                // cell
+                for (unsigned int c=0; c<cell->face(n)->number_of_children(); ++c)
+                  active_neighbors.push_back (cell->neighbor_child_on_subface(n,c));
+              else
+                {
+                  // the neighbor must be active
+                  // himself
+                  Assert(cell->neighbor(n)->active(), ExcInternalError());
+                  active_neighbors.push_back(cell->neighbor(n));
+                }
+            }
+        }
+  }
+
+
+
+// declaration of explicit specializations
+
+  template <>
+  double
+  cell_measure<3>(const std::vector<Point<3> > &all_vertices,
+                  const unsigned int (&vertex_indices) [GeometryInfo<3>::vertices_per_cell]);
+
+  template <>
+  double
+  cell_measure<2>(const std::vector<Point<2> > &all_vertices,
+                  const unsigned int (&vertex_indices) [GeometryInfo<2>::vertices_per_cell]);
+}
+
+#endif
+
+DEAL_II_NAMESPACE_CLOSE
+
+/*----------------------------   grid_tools.h     ---------------------------*/
+/* end of #ifndef dealii__grid_tools_H */
+#endif
+/*----------------------------   grid_tools.h     ---------------------------*/
diff --git a/include/deal.II/grid/intergrid_map.h b/include/deal.II/grid/intergrid_map.h
new file mode 100644
index 0000000..f4413d5
--- /dev/null
+++ b/include/deal.II/grid/intergrid_map.h
@@ -0,0 +1,210 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__intergrid_map_h
+#define dealii__intergrid_map_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * This class provides a map between two grids which are derived from the same
+ * coarse grid. For each cell iterator of the source map, it provides the
+ * respective cell iterator on the destination map, through its <tt>operator
+ * []</tt>.
+ *
+ * Usually, the two grids will be refined differently. Then, the value
+ * returned for an iterator on the source grid will be either:
+ * <ul>
+ * <li> The same cell on the destination grid, if it exists there;
+ * <li> The most refined cell of the destination grid from which the pendant
+ * of the source cell could be obtained by refinement. This cell is always
+ * active and has a refinement level less than that of the source cell.
+ * </ul>
+ * Keys for this map are all cells on the source grid, whether active or not.
+ *
+ * For example, consider these two one-dimensional grids:
+ * @verbatim
+ * Grid 1:
+ *   x--x--x-----x-----------x
+ *    1  2    3        4
+ *
+ * Grid 2:
+ *   x-----x-----x-----x-----x
+ *      1     2     3     4
+ * @endverbatim
+ * (Cell numbers are only given as an example and will not correspond to real
+ * cell iterator's indices.) The mapping from grid 1 to grid 2 will then be as
+ * follows:
+ * @verbatim
+ *    Cell on grid 1         Cell on grid 2
+ *          1  ------------------>  1
+ *          2  ------------------>  1
+ *          3  ------------------>  2
+ *          4  ------------------>  mother cell of cells 3 and 4
+ *                                  (a non-active cell, not shown here)
+ * @endverbatim
+ * Besides the mappings shown here, the non-active cells on grid 1 are also
+ * valid keys. For example, the mapping for the mother cell of cells 1 and 2
+ * on the first grid will point to cell 1 on the second grid.
+ *
+ * @tparam MeshType This class may be used with any class that satisfies the
+ * @ref ConceptMeshType "MeshType concept".
+ * The extension to other classes offering iterator functions and some minor
+ * additional requirements is simple.
+ *
+ * Note that this class could in principle be based on the C++
+ * <tt>std::map<Key,Value></tt> data type. Instead, it uses another data
+ * format which is more effective both in terms of computing time for access
+ * as well as with regard to memory consumption.
+ *
+ *
+ * <h3>Usage</h3>
+ *
+ * In practice, use of this class is as follows:
+ * @code
+ *   // have two grids, which are derived from the same coarse grid
+ *   Triangulation<dim> tria1, tria2;
+ *   DoFHandler<dim> dof_handler_1 (tria1), dof_handler_2 (tria2);
+ *   ...
+ *   // do something with these objects, e.g. refine the triangulations
+ *   // differently, distribute degrees of freedom, etc
+ *   ...
+ *   // create the mapping
+ *   InterGridMap<DoFHandler<dim> > grid_1_to_2_map;
+ *   grid_1_to_2_map.make_mapping (dof_handler_1,
+ *                                 dof_handler_2);
+ *   ...
+ *   typename DoFHandler<dim>::cell_iterator cell = dof_handler_1.begin(),
+ *                                           endc = dof_handler_1.end();
+ *   for (; cell!=endc; ++cell)
+ *     // now do something with the cell of dof_handler_2 corresponding to
+ *     // cell (which is one of dof_handler_1's cells)
+ *     f (grid_1_to_2_map[cell]);
+ * @endcode
+ *
+ * Note that the template parameters to this class have to be given as
+ * <tt>InterGridMap<DoFHandler<2> ></tt>, which here is DoFHandler (and could
+ * equally well be Triangulation, PersistentTriangulation, or hp::DoFHandler).
+ *
+ * @ingroup grid
+ * @author Wolfgang Bangerth, 1999
+ */
+template <class MeshType>
+class InterGridMap : public Subscriptor
+{
+public:
+
+  /**
+   * Typedef to the iterator type of the grid class under consideration.
+   */
+  typedef typename MeshType::cell_iterator cell_iterator;
+
+  /**
+   * Constructor setting the class name arguments in the SmartPointer members.
+   */
+  InterGridMap();
+
+  /**
+   * Create the mapping between the two grids.
+   */
+  void make_mapping (const MeshType &source_grid,
+                     const MeshType &destination_grid);
+
+  /**
+   * Access operator: give a cell on the source grid and receive the
+   * respective cell on the other grid, or if that does not exist, the most
+   * refined cell of which the source cell would be created if it were further
+   * refined.
+   */
+  cell_iterator operator [] (const cell_iterator &source_cell) const;
+
+  /**
+   * Delete all data of this class.
+   */
+  void clear ();
+
+  /**
+   * Return a pointer to the source grid.
+   */
+  const MeshType &get_source_grid () const;
+
+  /**
+   * Return a pointer to the destination grid.
+   */
+  const MeshType &get_destination_grid () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidKey,
+                  cell_iterator,
+                  << "The iterator " << arg1 << " is not valid as key for "
+                  << "this map.");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcIncompatibleGrids);
+
+private:
+  /**
+   * The actual data. Hold one iterator for each cell on each level.
+   */
+  std::vector<std::vector<cell_iterator> > mapping;
+
+  /**
+   * Store a pointer to the source grid.
+   */
+  SmartPointer<const MeshType,InterGridMap<MeshType> > source_grid;
+
+  /**
+   * Likewise for the destination grid.
+   */
+  SmartPointer<const MeshType,InterGridMap<MeshType> > destination_grid;
+
+  /**
+   * Set the mapping for the pair of cells given. These shall match in level
+   * of refinement and all other properties.
+   */
+  void set_mapping (const cell_iterator &src_cell,
+                    const cell_iterator &dst_cell);
+
+  /**
+   * Set the value of the key @p src_cell to @p dst_cell. Do so as well for
+   * all the children and their children of @p src_cell. This function is used
+   * for cells which are more refined on @p src_grid than on @p dst_grid; then
+   * all values of the hierarchy of cells and their children point to one cell
+   * on the @p dst_grid.
+   */
+  void set_entries_to_cell (const cell_iterator &src_cell,
+                            const cell_iterator &dst_cell);
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/magic_numbers.h b/include/deal.II/grid/magic_numbers.h
new file mode 100644
index 0000000..d2802a7
--- /dev/null
+++ b/include/deal.II/grid/magic_numbers.h
@@ -0,0 +1,45 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__magic_numbers_h
+#define dealii__magic_numbers_h
+
+#include <deal.II/base/config.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// This is a list of magic numbers used throughout the library.
+// They are collected in one file to avoid double usage.
+// Naming convention: all names have to start with the sequence
+// "mn_" denoting a magic number, then the library part follows
+// (e.g. "tria_" or "dof_") and finally the purpose.
+
+const unsigned int mn_tria_refine_flags_begin    = 0xa000;
+const unsigned int mn_tria_refine_flags_end      = 0xa001;
+const unsigned int mn_tria_coarsen_flags_begin   = 0xa010;
+const unsigned int mn_tria_coarsen_flags_end     = 0xa011;
+const unsigned int mn_tria_line_user_flags_begin = 0xa100;
+const unsigned int mn_tria_line_user_flags_end   = 0xa101;
+const unsigned int mn_tria_quad_user_flags_begin = 0xa110;
+const unsigned int mn_tria_quad_user_flags_end   = 0xa111;
+const unsigned int mn_tria_hex_user_flags_begin  = 0xa112;
+const unsigned int mn_tria_hex_user_flags_end    = 0xa113;
+const unsigned int mn_persistent_tria_flags_begin= 0xa200;
+const unsigned int mn_persistent_tria_flags_end  = 0xa201;
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/manifold.h b/include/deal.II/grid/manifold.h
new file mode 100644
index 0000000..b3a06b2
--- /dev/null
+++ b/include/deal.II/grid/manifold.h
@@ -0,0 +1,587 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_manifold_h
+#define dealii__tria_manifold_h
+
+
+/*----------------------------   manifold.h     ---------------------------*/
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/point.h>
+#include <deal.II/grid/tria.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int space_dim> class Triangulation;
+
+
+/**
+ * We collect here some helper functions used in the Manifold<dim,spacedim>
+ * classes.
+ */
+namespace Manifolds
+{
+  /**
+   * Given a general mesh iterator, construct a quadrature with the Laplace
+   * weights or with uniform weights according the parameter @p with_laplace,
+   * and with all relevant points of the iterator: vertices, line centers
+   * and/or face centers, which can be called when creating new vertices in
+   * the manifold routines.
+   */
+  template <typename OBJECT>
+  Quadrature<OBJECT::AccessorType::space_dimension>
+  get_default_quadrature(const OBJECT &obj, bool with_laplace = false);
+}
+
+
+/**
+ * This class is used to represent a manifold to a triangulation. When a
+ * triangulation creates a new vertex on this manifold, it determines the new
+ * vertex' coordinates through the following function:
+ *
+ *   @code
+ *     ...
+ *     Point<spacedim> new_vertex = manifold.get_new_point (quadrature);
+ *     ...
+ *   @endcode
+ * @p quadrature is a Quadrature<spacedim> object, which contains a collection
+ * of points in @p spacedim dimension, and a collection of weights (Note that
+ * unlike almost all other cases in the library, we here interpret the points
+ * in the quadrature object to be in real space, not on the reference cell.)
+ *
+ * Internally, the get_new_point() function calls the project_to_manifold()
+ * function after computing the weighted average of the quadrature points.
+ * This allows end users to only overload project_to_manifold() for simple
+ * situations.
+ *
+ * Should a finer control be necessary, then get_new_point() can be
+ * overloaded.
+ *
+ * FlatManifold is the specialization from which StraightBoundary is derived,
+ * where the project_to_manifold() function is the identity.
+ *
+ * @ingroup manifold
+ * @author Luca Heltai, 2014
+ */
+template <int dim, int spacedim=dim>
+class Manifold : public Subscriptor
+{
+public:
+
+
+  /**
+   * Destructor. Does nothing here, but needs to be declared to make it
+   * virtual.
+   */
+  virtual ~Manifold ();
+
+  /**
+   * Return the point which shall become the new vertex surrounded by the
+   * given points which make up the quadrature. We use a quadrature object,
+   * which should be filled with the surrounding points together with
+   * appropriate weights.
+   *
+   * In its default implementation it calls internally the function
+   * project_to_manifold. User classes can get away by simply implementing
+   * that method.
+   */
+  virtual
+  Point<spacedim>
+  get_new_point(const Quadrature<spacedim> &quad) const;
+
+  /**
+   * Given a point which lies close to the given manifold, it modifies it and
+   * projects it to manifold itself.
+   *
+   * This class is used by the default implementation of the function
+   * get_new_point(). It should be made pure virtual, but for historical
+   * reason, derived classes like Boundary<dim, spacedim> do not implement it.
+   * The default behavior of this class, however, is to throw an exception
+   * when called.
+   *
+   * If your manifold is simple, you could implement this function only, and
+   * the default behavior should work out of the box.
+   */
+  virtual
+  Point<spacedim> project_to_manifold (const std::vector<Point<spacedim> > &surrounding_points,
+                                       const Point<spacedim> &candidate) const;
+
+  /**
+   * Backward compatibility interface.  Return the point which shall become
+   * the new middle vertex of the two children of a regular line. In 2D, this
+   * line is a line at the boundary, while in 3d, it is bounding a face at the
+   * boundary (the lines therefore is also on the boundary).
+   *
+   * The default implementation of this function passes its argument to the
+   * Manifolds::get_default_quadrature() function, and then calls the
+   * Manifold<dim,spacedim>::get_new_point() function. User derived classes
+   * can overload Manifold<dim,spacedim>::get_new_point() or
+   * Manifold<dim,spacedim>::project_to_surface(), which is called by the
+   * default implementation of Manifold<dim,spacedim>::get_new_point().
+   */
+  virtual
+  Point<spacedim>
+  get_new_point_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line) const;
+
+  /**
+   * Backward compatibility interface. Return the point which shall become the
+   * common point of the four children of a quad at the boundary in three or
+   * more spatial dimensions. This function therefore is only useful in at
+   * least three dimensions and should not be called for lower dimensions.
+   *
+   * This function is called after the four lines bounding the given @p quad
+   * are refined, so you may want to use the information provided by
+   * <tt>quad->line(i)->child(j)</tt>, <tt>i=0...3</tt>, <tt>j=0,1</tt>.
+   *
+   * The default implementation of this function passes its argument to the
+   * Manifolds::get_default_quadrature() function, and then calls the
+   * Manifold<dim,spacedim>::get_new_point() function. User derived classes
+   * can overload Manifold<dim,spacedim>::get_new_point() or
+   * Manifold<dim,spacedim>::project_to_surface(), which is called by the
+   * default implementation of Manifold<dim,spacedim>::get_new_point().
+   */
+  virtual
+  Point<spacedim>
+  get_new_point_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad) const;
+
+  /**
+   * Backward compatibility interface.  Return the point which shall become
+   * the common point of the eight children of a hex in three or spatial
+   * dimensions. This function therefore is only useful in at least three
+   * dimensions and should not be called for lower dimensions.
+   *
+   * This function is called after the all the bounding objects of the given
+   * @p hex are refined, so you may want to use the information provided by
+   * <tt>hex->quad(i)->line(j)->child(k)</tt>, <tt>i=0...5</tt>,
+   * <tt>j=0...3</tt>, <tt>k=0,1</tt>.
+   *
+   * The default implementation of this function passes its argument to the
+   * Manifolds::get_default_quadrature() function, and then calls the
+   * Manifold<dim,spacedim>::get_new_point() function. User derived classes
+   * can overload Manifold<dim,spacedim>::get_new_point() or
+   * Manifold<dim,spacedim>::project_to_surface(), which is called by the
+   * default implementation of Manifold<dim,spacedim>::get_new_point().
+   */
+  virtual
+  Point<spacedim>
+  get_new_point_on_hex (const typename Triangulation<dim,spacedim>::hex_iterator &hex) const;
+
+
+  /**
+   * Backward compatibility interface. Depending on <tt>dim=2</tt> or
+   * <tt>dim=3</tt> this function calls the get_new_point_on_line or the
+   * get_new_point_on_quad function. It throws an exception for
+   * <tt>dim=1</tt>. This wrapper allows dimension independent programming.
+   */
+  Point<spacedim>
+  get_new_point_on_face (const typename Triangulation<dim,spacedim>::face_iterator &face) const;
+
+
+  /**
+   * Backward compatibility interface.  Depending on <tt>dim=1</tt>,
+   * <tt>dim=2</tt> or <tt>dim=3</tt> this function calls the
+   * get_new_point_on_line, get_new_point_on_quad or the get_new_point_on_hex
+   * function. This wrapper allows dimension independent programming.
+   */
+  Point<spacedim>
+  get_new_point_on_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const;
+};
+
+
+/**
+ * Specialization of Manifold<dim,spacedim>, which represent a possibly
+ * periodic Euclidean space of dimension @p dim embedded in the Euclidean
+ * space of @p spacedim dimensions. The main characteristic of this Manifold
+ * is the fact that the function
+ * FlatManifold<dim,spacedim>::project_to_manifold() is the identity function.
+ *
+ * @ingroup manifold
+ *
+ * @author Luca Heltai, 2014
+ */
+template <int dim, int spacedim=dim>
+class FlatManifold: public Manifold<dim, spacedim>
+{
+public:
+  /**
+   * Default constructor. The optional argument can be used to specify the
+   * periodicity of the spacedim-dimensional manifold (one period per
+   * direction). A periodicity value of zero means that along that direction
+   * there is no periodicity. By default no periodicity is assumed.
+   *
+   * Periodicity affects the way a middle point is computed. It is assumed
+   * that if two points are more than half period distant, then the distance
+   * should be computed by crossing the periodicity boundary, i.e., the
+   * average is computed by adding a full period to the sum of the two. For
+   * example, if along direction 0 we have 2*pi periodicity, then the average
+   * of (2*pi-eps) and (eps) is not pi, but 2*pi (or zero), since, on a
+   * periodic manifold, these two points are at distance 2*eps and not (2*pi-
+   * eps). Special cases are taken into account, to ensure that the behavior
+   * is always as expected. The third argument is used as a relative tolerance
+   * when computing distances.
+   *
+   * Periodicity will be intended in the following way: the domain is
+   * considered to be the box contained in [Point<spacedim>(), periodicity)
+   * where the right extreme is excluded. If any of the components of this box
+   * has zero length, then no periodicity is computed in that direction.
+   * Whenever a function that tries to compute averages is called, an
+   * exception will be thrown if one of the points which you are using for the
+   * average lies outside the periodicity box. The return points are
+   * guaranteed to lie in the periodicity box plus or minus
+   * tolerance*periodicity.norm().
+   */
+  FlatManifold (const Point<spacedim> periodicity=Point<spacedim>(),
+                const double tolerance=1e-10);
+
+  /**
+   * Let the new point be the average sum of surrounding vertices.
+   *
+   * This particular implementation constructs the weighted average of the
+   * surrounding points, and then calls internally the function
+   * project_to_manifold. The reason why we do it this way, is to allow lazy
+   * programmers to implement only the project_to_manifold function for their
+   * own Manifold classes which are small (or trivial) perturbations of a flat
+   * manifold. This is the case whenever the coarse mesh is a decent
+   * approximation of the manifold geometry. In this case, the middle point of
+   * a cell is close to true middle point of the manifold, and a projection
+   * may suffice.
+   *
+   * For most simple geometries, it is possible to get reasonable results by
+   * deriving your own Manifold class from FlatManifold, and write a new
+   * interface only for the project_to_manifold function. You will have good
+   * approximations also with large deformations, as long as in the coarsest
+   * mesh size you are trying to refine, the middle point is not too far from
+   * the manifold mid point, i.e., as long as the coarse mesh size is small
+   * enough.
+   */
+  virtual Point<spacedim>
+  get_new_point(const Quadrature<spacedim> &quad) const;
+
+
+  /**
+   * Project to FlatManifold. This is the identity function for flat,
+   * Euclidean spaces. Note however that this function can be overloaded by
+   * derived classes, which will then benefit from the logic behind the
+   * get_new_point class which are often very similar (if not identical) to
+   * the one implemented in this class.
+   */
+  virtual
+  Point<spacedim> project_to_manifold (const std::vector<Point<spacedim> > &points,
+                                       const Point<spacedim> &candidate) const;
+private:
+  /**
+   * The periodicity of this Manifold. Periodicity affects the way a middle
+   * point is computed. It is assumed that if two points are more than half
+   * period distant, then the distance should be computed by crossing the
+   * periodicity boundary, i.e., the average is computed by adding a full
+   * period to the sum of the two. For example, if along direction 0 we have
+   * 2*pi periodicity, then the average of (2*pi-eps) and (eps) is not pi, but
+   * 2*pi (or zero), since, on a periodic manifold, these two points are at
+   * distance 2*eps and not (2*pi-eps).
+   *
+   * A periodicity 0 along one direction means no periodicity. This is the
+   * default value for all directions.
+   */
+  const Point<spacedim> periodicity;
+
+  DeclException4(ExcPeriodicBox, int, Point<spacedim>, Point<spacedim>, double,
+                 << "The component number " << arg1 << " of the point [ " << arg2
+                 << " ]  is not in the interval [ " << -arg4
+                 << ", " << arg3[arg4] << "), bailing out.");
+
+  /**
+   * Relative tolerance. This tolerance is used to compute distances in double
+   * precision.
+   */
+  const double tolerance;
+};
+
+
+/**
+ * This class describes mappings that can be expressed in terms of charts.
+ * Specifically, this class with its template arguments describes a chart of
+ * dimension chartdim, which is part of a Manifold<dim,spacedim> and is used
+ * in an object of type Triangulation<dim,spacedim>:  It specializes a
+ * Manifold of dimension chartdim embedded in a manifold of dimension
+ * spacedim, for which you have explicit pull_back() and push_forward()
+ * transformations. Its use is explained in great detail in step-53.
+ *
+ * This is a helper class which is useful when you have an explicit map from
+ * an Euclidean space of dimension chartdim to an Euclidean space of dimension
+ * spacedim which represents your manifold, i.e., when your manifold
+ * $\mathcal{M}$ can be represented by a map \f[ F: \mathcal{B} \subset
+ * R^{\text{chartdim}} \mapsto \mathcal{M} \subset R^{\text{spacedim}} \f]
+ * (the push_forward() function) and that admits the inverse transformation
+ * \f[ F^{-1}: \mathcal{M} \subset R^{\text{spacedim}} \mapsto \mathcal{B}
+ * \subset R^{\text{chartdim}} \f] (the pull_back() function).
+ *
+ * The get_new_point() function of the ChartManifold class is implemented by
+ * calling the pull_back() method for all <tt>surrounding_points</tt>,
+ * computing their weighted average in the chartdim Euclidean space, and
+ * calling the push_forward() method with the resulting point, i.e., \f[
+ * p^{\text{new}} = F(\sum_i w_i F^{-1}(p_i)).  \f]
+ *
+ * Derived classes are required to implement the push_forward() and the
+ * pull_back() methods. All other functions required by mappings will then be
+ * provided by this class.
+ *
+ * The dimension arguments @p chartdim, @p dim and @p spacedim must satisfy
+ * the following relationships:
+ *   @code
+ *      dim <= spacedim
+ *      chartdim <= spacedim
+ *   @endcode
+ * However, there is no a priori relationship between @p dim and @p chartdim.
+ * For example, if you want to describe a mapping for an edge (a 1d object) in
+ * a 2d triangulation embedded in 3d space, you could do so by parameterizing
+ * it via a line
+ *   @f[
+ *      F: [0,1] \rightarrow {\mathbb R}^3
+ *   @f]
+ * in which case @p chartdim is 1. On the other hand, there is no reason why
+ * one can't describe this as a mapping
+ *   @f[
+ *      F: {\mathbb R}^3 \rightarrow {\mathbb R}^3
+ *   @f]
+ * in such a way that the line $[0,1]\times \{0\}\times \{0\}$ happens to be
+ * mapped onto the edge in question. Here, @p chartdim is 3. This may seem
+ * cumbersome but satisfies the requirements of an invertible function $F$
+ * just fine as long as it is possible to get from the edge to the pull-back
+ * space and then back again. Finally, given that we are dealing with a 2d
+ * triangulation in 3d, one will often have a mapping from, say, the 2d unit
+ * square or unit disk to the domain in 3d space, and the edge in question may
+ * simply be the mapped edge of the unit domain in 2d space. In this case, @p
+ * chartdim is 2.
+ *
+ * @ingroup manifold
+ *
+ * @author Luca Heltai, 2013, 2014
+ */
+template <int dim, int spacedim=dim, int chartdim=dim>
+class ChartManifold: public Manifold<dim,spacedim>
+{
+public:
+  /**
+   * Constructor. The optional argument can be used to specify the periodicity
+   * of the chartdim-dimensional manifold (one period per direction). A
+   * periodicity value of zero means that along that direction there is no
+   * periodicity. By default no periodicity is assumed.
+   *
+   * Periodicity affects the way a middle point is computed. It is assumed
+   * that if two points are more than half period distant, then the distance
+   * should be computed by crossing the periodicity boundary, i.e., then the
+   * average is computed by adding a full period to the sum of the two. For
+   * example, if along direction 0 we have 2*pi periodicity, then the average
+   * of (2*pi-eps) and (eps) is not pi, but 2*pi (or zero), since, on the
+   * manifold, these two points are at distance 2*eps and not (2*pi-eps)
+   */
+  ChartManifold(const Point<chartdim> periodicity=Point<chartdim>());
+
+  /**
+   * Destructor. Does nothing here, but needs to be declared to make it
+   * virtual.
+   */
+  virtual ~ChartManifold ();
+
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class for more information.
+   */
+  virtual Point<spacedim>
+  get_new_point(const Quadrature<spacedim> &quad) const;
+
+  /**
+   * Pull back the given point in spacedim to the Euclidean chartdim
+   * dimensional space.
+   *
+   * Refer to the general documentation of this class for more information.
+   */
+  virtual Point<chartdim>
+  pull_back(const Point<spacedim> &space_point) const = 0;
+
+  /**
+   * Given a point in the chartdim dimensional Euclidean space, this method
+   * returns a point on the manifold embedded in the spacedim Euclidean space.
+   *
+   * Refer to the general documentation of this class for more information.
+   */
+  virtual Point<spacedim>
+  push_forward(const Point<chartdim> &chart_point) const = 0;
+
+private:
+  /**
+   * The sub_manifold object is used to compute the average of the points in
+   * the chart coordinates system.
+   */
+  const FlatManifold<dim,chartdim> sub_manifold;
+};
+
+
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <>
+Point<1>
+Manifold<1,1>::
+get_new_point_on_face (const Triangulation<1,1>::face_iterator &) const;
+
+template <>
+Point<2>
+Manifold<1,2>::
+get_new_point_on_face (const Triangulation<1,2>::face_iterator &) const;
+
+
+template <>
+Point<3>
+Manifold<1,3>::
+get_new_point_on_face (const Triangulation<1,3>::face_iterator &) const;
+
+
+template <>
+Point<1>
+Manifold<1,1>::
+get_new_point_on_quad (const Triangulation<1,1>::quad_iterator &) const;
+
+template <>
+Point<2>
+Manifold<1,2>::
+get_new_point_on_quad (const Triangulation<1,2>::quad_iterator &) const;
+
+
+template <>
+Point<3>
+Manifold<1,3>::
+get_new_point_on_quad (const Triangulation<1,3>::quad_iterator &) const;
+
+
+template <>
+Point<3>
+Manifold<3,3>::
+get_new_point_on_hex (const Triangulation<3,3>::hex_iterator &) const;
+
+/*---Templated functions---*/
+
+namespace Manifolds
+{
+
+  template <typename OBJECT>
+  Quadrature<OBJECT::AccessorType::space_dimension>
+  get_default_quadrature(const OBJECT &obj, bool with_laplace)
+  {
+    const int spacedim = OBJECT::AccessorType::space_dimension;
+    const int dim = OBJECT::AccessorType::structure_dimension;
+
+    std::vector<Point<spacedim> > sp;
+    std::vector<double> wp;
+
+
+    // note that the exact weights are chosen such as to minimize the
+    // distortion of the four new quads from the optimal shape; their
+    // derivation and values is copied over from the
+    // @p{MappingQ::set_laplace_on_vector} function
+    switch (dim)
+      {
+      case 1:
+        sp.resize(2);
+        wp.resize(2);
+        sp[0] = obj->vertex(0);
+        wp[0] = .5;
+        sp[1] = obj->vertex(1);
+        wp[1] = .5;
+        break;
+      case 2:
+        sp.resize(8);
+        wp.resize(8);
+
+        for (unsigned int i=0; i<4; ++i)
+          {
+            sp[i] = obj->vertex(i);
+            sp[4+i] = ( obj->line(i)->has_children() ?
+                        obj->line(i)->child(0)->vertex(1) :
+                        obj->line(i)->get_manifold().get_new_point_on_line(obj->line(i)) );
+          }
+
+        if (with_laplace)
+          {
+            std::fill(wp.begin(), wp.begin()+4, 1.0/16.0);
+            std::fill(wp.begin()+4, wp.end(), 3.0/16.0);
+          }
+        else
+          std::fill(wp.begin(), wp.end(), 1.0/8.0);
+        break;
+      case 3:
+      {
+        TriaIterator<TriaAccessor<3, 3, 3> > hex
+          = static_cast<TriaIterator<TriaAccessor<3, 3, 3> > >(obj);
+        const unsigned int np =
+          GeometryInfo<dim>::vertices_per_cell+
+          GeometryInfo<dim>::lines_per_cell+
+          GeometryInfo<dim>::faces_per_cell;
+        sp.resize(np);
+        wp.resize(np);
+        std::vector<Point<3> > *sp3 = reinterpret_cast<std::vector<Point<3> > *>(&sp);
+
+        unsigned int j=0;
+
+        // note that the exact weights are chosen such as to minimize the
+        // distortion of the eight new hexes from the optimal shape; their
+        // derivation and values is copied over from the
+        // @p{MappingQ::set_laplace_on_vector} function
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i, ++j)
+          {
+            (*sp3)[j] = hex->vertex(i);
+            wp[j] = 1.0/128.0;
+          }
+        for (unsigned int i=0; i<GeometryInfo<dim>::lines_per_cell; ++i, ++j)
+          {
+            (*sp3)[j] = (hex->line(i)->has_children() ?
+                         hex->line(i)->child(0)->vertex(1) :
+                         hex->line(i)->get_manifold().get_new_point_on_line(hex->line(i)));
+            wp[j] = 7.0/192.0;
+          }
+        for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i, ++j)
+          {
+            (*sp3)[j] = (hex->quad(i)->has_children() ?
+                         hex->quad(i)->isotropic_child(0)->vertex(3) :
+                         hex->quad(i)->get_manifold().get_new_point_on_quad(hex->quad(i)));
+            wp[j] = 1.0/12.0;
+          }
+        // Overwrite the weights with 1/np if we don't want to use
+        // laplace vectors.
+        if (with_laplace == false)
+          std::fill(wp.begin(), wp.end(), 1.0/np);
+      }
+      break;
+      default:
+        Assert(false, ExcInternalError());
+        break;
+      }
+    return Quadrature<spacedim>(sp,wp);
+  }
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/manifold_lib.h b/include/deal.II/grid/manifold_lib.h
new file mode 100644
index 0000000..7869479
--- /dev/null
+++ b/include/deal.II/grid/manifold_lib.h
@@ -0,0 +1,296 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__manifold_lib_h
+#define dealii__manifold_lib_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/grid/manifold.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/function_parser.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Manifold description for a spherical space coordinate system.
+ *
+ * You can use this Manifold object to describe any sphere, circle,
+ * hypersphere or hyperdisc in two or three dimensions, both as a co-dimension
+ * one manifold descriptor or as co-dimension zero manifold descriptor.
+ *
+ * The two template arguments match the meaning of the two template arguments
+ * in Triangulation<dim, spacedim>, however this Manifold can be used to
+ * describe both thin and thick objects, and the behavior is identical when
+ * dim <= spacedim, i.e., the functionality of SphericalManifold<2,3> is
+ * identical to SphericalManifold<3,3>.
+ *
+ * The two dimensional implementation of this class works by transforming
+ * points to spherical coordinates, taking the average in that coordinate
+ * system, and then transforming back the point to Cartesian coordinates. For
+ * the three dimensional case, we use a simpler approach: we take the average
+ * of the norm of the points, and use this value to shift the average point
+ * along the radial direction. In order for this manifold to work correctly,
+ * it cannot be attached to cells containing the center of the coordinate
+ * system. This point is a singular point of the coordinate transformation,
+ * and there taking averages does not make any sense.
+ *
+ * This class is used in step-1 and step-2 to describe the boundaries of
+ * circles. Its use is also discussed in the results section of step-6.
+ *
+ * @ingroup manifold
+ *
+ * @author Luca Heltai, 2014
+ */
+template <int dim, int spacedim = dim>
+class SphericalManifold : public ChartManifold<dim, spacedim, spacedim>
+{
+public:
+  /**
+   * The Constructor takes the center of the spherical coordinates system.
+   * This class uses the pull_back and push_forward mechanism to transform
+   * from Cartesian to spherical coordinate systems, taking into account the
+   * periodicity of base Manifold in two dimensions, while in three dimensions
+   * it takes the middle point, and project it along the radius using the
+   * average radius of the surrounding points.
+   */
+  SphericalManifold(const Point<spacedim> center = Point<spacedim>());
+
+  /**
+   * Pull back the given point from the Euclidean space. Will return the polar
+   * coordinates associated with the point @p space_point. Only used when
+   * spacedim = 2.
+   */
+  virtual Point<spacedim>
+  pull_back(const Point<spacedim> &space_point) const;
+
+  /**
+   * Given a point in the spherical coordinate system, this method returns the
+   * Euclidean coordinates associated to the polar coordinates @p chart_point.
+   * Only used when spacedim = 3.
+   */
+  virtual Point<spacedim>
+  push_forward(const Point<spacedim> &chart_point) const;
+
+  /**
+   * Let the new point be the average sum of surrounding vertices.
+   *
+   * In the two dimensional implementation, we use the pull_back and
+   * push_forward mechanism. For three dimensions, this does not work well, so
+   * we overload the get_new_point function directly.
+   */
+  virtual Point<spacedim>
+  get_new_point(const Quadrature<spacedim> &quad) const;
+
+  /**
+   * The center of the spherical coordinate system.
+   */
+  const Point<spacedim> center;
+private:
+
+  /**
+   * Helper function which returns the periodicity associated with this
+   * coordinate system, according to dim, chartdim, and spacedim.
+   */
+  static Point<spacedim> get_periodicity();
+};
+
+
+/**
+ * Cylindrical Manifold description.  In three dimensions, points are
+ * transformed using a cylindrical coordinate system along the <tt>x-</tt>,
+ * <tt>y-</tt> or <tt>z</tt>-axis (when using the first constructor of this
+ * class), or an arbitrarily oriented cylinder described by the direction of
+ * its axis and a point located on the axis.
+ *
+ * This class was developed to be used in conjunction with the @p cylinder or
+ * @p cylinder_shell functions of GridGenerator. This function will throw an
+ * exception whenever spacedim is not equal to three.
+ *
+ * @ingroup manifold
+ *
+ * @author Luca Heltai, 2014
+ */
+template <int dim, int spacedim = dim>
+class CylindricalManifold : public Manifold<dim,spacedim>
+{
+public:
+  /**
+   * Constructor. Using default values for the constructor arguments yields a
+   * cylinder along the x-axis (<tt>axis=0</tt>). Choose <tt>axis=1</tt> or
+   * <tt>axis=2</tt> for a tube along the y- or z-axis, respectively. The
+   * tolerance value is used to determine if a point is on the axis.
+   */
+  CylindricalManifold (const unsigned int axis = 0,
+                       const double tolerance = 1e-10);
+
+  /**
+   * Constructor. If constructed with this constructor, the manifold described
+   * is a cylinder with an axis that points in direction #direction and goes
+   * through the given #point_on_axis. The direction may be arbitrarily
+   * scaled, and the given point may be any point on the axis. The tolerance
+   * value is used to determine if a point is on the axis.
+   */
+  CylindricalManifold (const Point<spacedim> &direction,
+                       const Point<spacedim> &point_on_axis,
+                       const double tolerance = 1e-10);
+
+  /**
+   * Compute new points on the CylindricalManifold. See the documentation of
+   * the base class for a detailed description of what this function does.
+   */
+  virtual Point<spacedim>
+  get_new_point(const Quadrature<spacedim> &quad) const;
+
+protected:
+  /**
+   * The direction vector of the axis.
+   */
+  const Point<spacedim> direction;
+
+  /**
+   * An arbitrary point on the axis.
+   */
+  const Point<spacedim> point_on_axis;
+
+private:
+  /**
+   * Helper FlatManifold to compute tentative midpoints.
+   */
+  FlatManifold<dim,spacedim> flat_manifold;
+
+  /**
+   * Relative tolerance to measure zero distances.
+   */
+  double tolerance;
+};
+
+
+/**
+ * Manifold description derived from ChartManifold, based on explicit
+ * Function<spacedim> and Function<chartdim> objects describing the
+ * push_forward() and pull_back() functions.
+ *
+ * You can use this Manifold object to describe any arbitrary shape domain, as
+ * long as you can express it in terms of an invertible map, for which you
+ * provide both the forward expression, and the inverse expression.
+ *
+ * In debug mode, a check is performed to verify that the transformations are
+ * actually one the inverse of the other.
+ *
+ * @ingroup manifold
+ *
+ * @author Luca Heltai, 2014
+ */
+template <int dim, int spacedim=dim, int chartdim=dim>
+class FunctionManifold : public ChartManifold<dim, spacedim, chartdim>
+{
+public:
+  /**
+   * Explicit functions constructor. Takes a push_forward function of spacedim
+   * components, and a pull_back function of @p chartdim components. See the
+   * documentation of the base class ChartManifold for the meaning of the
+   * optional @p periodicity argument.
+   *
+   * The tolerance argument is used in debug mode to actually check that the
+   * two functions are one the inverse of the other.
+   */
+  FunctionManifold(const Function<chartdim> &push_forward_function,
+                   const Function<spacedim> &pull_back_function,
+                   const Point<chartdim> periodicity=Point<chartdim>(),
+                   const double tolerance=1e-10);
+
+  /**
+   * Expressions constructor. Takes the expressions of the push_forward
+   * function of spacedim components, and of the pull_back function of @p
+   * chartdim components. See the documentation of the base class
+   * ChartManifold for the meaning of the optional @p periodicity argument.
+   *
+   * The strings should be the readable by the default constructor of the
+   * FunctionParser classes. You can specify custom variable expressions with
+   * the last two optional arguments. If you don't, the default names are
+   * used, i.e., "x,y,z".
+   *
+   * The tolerance argument is used in debug mode to actually check that the
+   * two functions are one the inverse of the other.
+   */
+  FunctionManifold(const std::string push_forward_expression,
+                   const std::string pull_back_expression,
+                   const Point<chartdim> periodicity=Point<chartdim>(),
+                   const typename FunctionParser<spacedim>::ConstMap = typename FunctionParser<spacedim>::ConstMap(),
+                   const std::string chart_vars=FunctionParser<chartdim>::default_variable_names(),
+                   const std::string space_vars=FunctionParser<spacedim>::default_variable_names(),
+                   const double tolerance=1e-10);
+
+  /**
+   * If needed, we delete the pointers we own.
+   */
+  ~FunctionManifold();
+
+  /**
+   * Given a point in the @p chartdim coordinate system, uses the
+   * push_forward_function to compute the push_forward of points in @p
+   * chartdim space dimensions to @p spacedim space dimensions.
+   */
+  virtual Point<spacedim>
+  push_forward(const Point<chartdim> &chart_point) const;
+
+  /**
+   * Given a point in the spacedim coordinate system, uses the
+   * pull_back_function to compute the pull_back of points in @p spacedim
+   * space dimensions to @p chartdim space dimensions.
+   */
+  virtual Point<chartdim>
+  pull_back(const Point<spacedim> &space_point) const;
+
+private:
+  /**
+   * Constants for the FunctionParser classes.
+   */
+  const typename FunctionParser<spacedim>::ConstMap const_map;
+
+  /**
+   * Pointer to the push_forward function.
+   */
+  SmartPointer<const Function<chartdim>,
+               FunctionManifold<dim,spacedim,chartdim> > push_forward_function;
+
+  /**
+   * Pointer to the pull_back function.
+   */
+  SmartPointer<const Function<spacedim>,
+               FunctionManifold<dim,spacedim,chartdim> > pull_back_function;
+
+  /**
+   * Relative tolerance. In debug mode, we check that the two functions
+   * provided at construction time are actually one the inverse of the other.
+   * This value is used as relative tolerance in this check.
+   */
+  const double tolerance;
+
+  /**
+   * Check ownership of the smart pointers. Indicates whether this class is
+   * the owner of the objects pointed to by the previous two member variables.
+   * This value is set in the constructor of the class. If @p true, then the
+   * destructor will delete the function objects pointed to be the two
+   * pointers.
+   */
+  const bool owns_pointers;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/persistent_tria.h b/include/deal.II/grid/persistent_tria.h
new file mode 100644
index 0000000..6e0e3e7
--- /dev/null
+++ b/include/deal.II/grid/persistent_tria.h
@@ -0,0 +1,260 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__persistent_tria_h
+#define dealii__persistent_tria_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/grid/tria.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class handles the history of a triangulation and can rebuild it after
+ * it was deleted some time before. Its main purpose is support for time-
+ * dependent problems where one frequently deletes a triangulation due to
+ * memory pressure and later wants to rebuild it; this class has all the
+ * information to rebuild it exactly as it was before including the mapping of
+ * cell numbers to the geometrical cells.
+ *
+ * Basically, this is a drop-in replacement for the triangulation. Since it is
+ * derived from the Triangulation class, it shares all the functionality, but
+ * it overrides some virtual functions and adds some functions, too. The main
+ * change to the base class is that it overrides the @p
+ * execute_coarsening_and_refinement function, where the new version first
+ * stores all refinement and coarsening flags and only then calls the
+ * respective function of the base class. The stored flags may later be used
+ * to restore the grid just as it was before. Some other functions have been
+ * extended slightly as well, see their documentation for more information.
+ *
+ * We note that since the triangulation is created in exactly the same state
+ * as it was before, other objects working on it should result in the same
+ * state as well. This holds in particular for the DoFHandler object, which
+ * will assign the same degrees of freedom to the original cells and the ones
+ * after reconstruction of the triangulation. You can therefore safely use
+ * data vectors computed on the original grid on the reconstructed grid as
+ * well.
+ *
+ *
+ * <h3>Usage</h3> You can use objects of this class almost in the same way as
+ * objects of the Triangulation class. One of the few differences is that you
+ * can only construct such an object by giving a coarse grid to the
+ * constructor. The coarse grid will be used to base the triangulation on, and
+ * therefore the lifetime of the coarse grid has to be longer than the
+ * lifetime of the object of this class.
+ *
+ * Basically, usage looks like this:
+ * @code
+ *   Triangulation<dim> coarse_grid;
+ *   ...                     // initialize coarse grid
+ *
+ *   PersistentTriangulation<dim> grid (coarse_grid);
+ *
+ *   for (...)
+ *     {
+ *                           // restore grid from coarse grid
+ *                           // and stored refinement flags
+ *       grid.restore ();
+ *       ...                 // do something with the grid
+ *
+ *       ...                 // flag some cells for refinement
+ *                           // or coarsening
+ *       grid.execute_coarsening_and_refinement ();
+ *                           // actually refine grid and store
+ *                           // the flags
+ *
+ *       ...                 // so something more with the grid
+ *
+ *       grid.clear ();      // delete the grid, but keep the
+ *                           // refinement flags for later use
+ *                           // in grid.restore() above
+ *
+ *       ...                 // do something where the grid
+ *                           // is not needed anymore, e.g.
+ *                           // working with another grid
+ *     };
+ * @endcode
+ *
+ * Note that initially, the PersistentTriangulation object does not constitute
+ * a triangulation; it only becomes one after @p restore is first called. Note
+ * also that the @p execute_coarsening_and_refinement stores all necessary
+ * flags for later reconstruction using the @p restore function.
+ * Triangulation::clear() resets the underlying triangulation to a virgin
+ * state, but does not affect the stored refinement flags needed for later
+ * reconstruction and does also not touch the coarse grid which is used within
+ * restore().
+ *
+ * @ingroup grid
+ * @author Wolfgang Bangerth, 1999
+ */
+template <int dim, int spacedim=dim>
+class PersistentTriangulation : public Triangulation<dim, spacedim>
+{
+public:
+  /**
+   * Make the dimension available in function templates.
+   */
+  static const unsigned int dimension = dim;
+  static const unsigned int spacedimension = spacedim;
+
+  /**
+   * Build up the triangulation from the coarse grid in future. Copy smoothing
+   * flags, etc from that grid as well. Note that the initial state of the
+   * triangulation is empty, until @p restore_grid is called for the first
+   * time.
+   *
+   * The coarse grid must persist until the end of this object, since it will
+   * be used upon reconstruction of the grid.
+   */
+  PersistentTriangulation (const Triangulation<dim, spacedim> &coarse_grid);
+
+  /**
+   * Copy constructor. This operation is only allowed, if the triangulation
+   * underlying the object to be copied is presently empty. Refinement flags
+   * as well as the pointer to the coarse grid are copied, however.
+   */
+  PersistentTriangulation (const PersistentTriangulation<dim, spacedim> &old_tria);
+
+  /**
+   * Destructor.
+   */
+  virtual ~PersistentTriangulation ();
+
+  /**
+   * Overloaded version of the same function in the base class which stores
+   * the refinement and coarsening flags for later reconstruction of the
+   * triangulation and after that calls the respective function of the base
+   * class.
+   */
+  virtual void execute_coarsening_and_refinement ();
+
+  /**
+   * Restore the grid according to the saved data. For this, the coarse grid
+   * is copied and the grid is stepwise rebuilt using the saved flags.
+   *
+   * Note that this function will result in an error if the underlying
+   * triangulation is not empty, i.e. it will only succeed if this object is
+   * newly created or the <tt>clear()</tt> function of the base class was
+   * called on it before.
+   *
+   * Repeatedly calls the <tt>restore(unsigned int)</tt> function in a loop
+   * over all refinement steps.
+   */
+  void restore ();
+
+  /**
+   * Differential restore. Performs the @p step_noth local refinement and
+   * coarsening step. Step 0 stands for the copying of the coarse grid.
+   *
+   * This function will only succeed if the triangulation is in just the state
+   * it were if restore would have been called from
+   * <tt>step=0...step_no-1</tt> before.
+   */
+  void restore (const unsigned int step_no);
+
+  /**
+   * Returns the number of refinement and coarsening steps. This is given by
+   * the size of the @p refine_flags vector.
+   */
+  unsigned int n_refinement_steps () const;
+
+  /**
+   * Overload this function to use @p tria as a new coarse grid. The present
+   * triangulation and all refinement and coarsening flags storing its history
+   * are deleted, and the state of the underlying triangulation is reset to be
+   * empty, until @p restore_grid is called the next time.
+   *
+   * The coarse grid must persist until the end of this object, since it will
+   * be used upon reconstruction of the grid.
+   */
+  virtual void copy_triangulation (const Triangulation<dim, spacedim> &tria);
+
+  /**
+   * Throw an error, since this function is not useful in the context of this
+   * class.
+   */
+  virtual void create_triangulation (const std::vector<Point<spacedim> >    &vertices,
+                                     const std::vector<CellData<dim> > &cells,
+                                     const SubCellData                 &subcelldata);
+
+  /**
+   * An overload of the respective function of the base class.
+   *
+   * Throw an error, since this function is not useful in the context of this
+   * class.
+   */
+  virtual void create_triangulation_compatibility (
+    const std::vector<Point<spacedim> >    &vertices,
+    const std::vector<CellData<dim> > &cells,
+    const SubCellData                 &subcelldata);
+
+  /**
+   * Writes all refine and coarsen flags to the ostream @p out.
+   */
+  virtual void write_flags(std::ostream &out) const;
+
+  /**
+   * Reads all refine and coarsen flags that previously were written by
+   * <tt>write_flags(...)</tt>. This is especially useful for rebuilding the
+   * triangulation after the end or breakdown of a program and its restart.
+   */
+  virtual void read_flags(std::istream &in);
+
+  /**
+   * Clears all flags. Retains the same coarse grid.
+   */
+  virtual void clear_flags();
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  virtual std::size_t memory_consumption () const;
+
+  /**
+   * Exception.
+   */
+  DeclException0 (ExcTriaNotEmpty);
+  /**
+   * Exception.
+   */
+  DeclException0 (ExcFlagsNotCleared);
+
+private:
+  /**
+   * This grid shall be used as coarse grid.
+   */
+  SmartPointer<const Triangulation<dim,spacedim>,PersistentTriangulation<dim,spacedim> > coarse_grid;
+
+  /**
+   * Vectors holding the refinement and coarsening flags of the different
+   * sweeps on this time level. The vectors therefore hold the history of the
+   * grid.
+   */
+  std::vector<std::vector<bool> >   refine_flags;
+
+  /**
+   * @ref refine_flags
+   */
+  std::vector<std::vector<bool> >   coarsen_flags;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria.h b/include/deal.II/grid/tria.h
new file mode 100644
index 0000000..f6b8577
--- /dev/null
+++ b/include/deal.II/grid/tria.h
@@ -0,0 +1,3633 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_h
+#define dealii__tria_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/iterator_range.h>
+#include <deal.II/base/std_cxx11/function.h>
+#include <deal.II/grid/tria_iterator_selector.h>
+#include <deal.II/grid/tria_faces.h>
+#include <deal.II/grid/tria_levels.h>
+
+// Ignore deprecation warnings for auto_ptr.
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/signals2.hpp>
+#include <boost/serialization/vector.hpp>
+#include <boost/serialization/map.hpp>
+#include <boost/serialization/split_member.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#include <vector>
+#include <list>
+#include <map>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class Boundary;
+template <int dim, int spacedim> class StraightBoundary;
+template <int dim, int spacedim> class Manifold;
+
+template <int, int, int> class TriaAccessor;
+template <int spacedim> class TriaAccessor<0,1,spacedim>;
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    template <int dim> class TriaLevel;
+    template <int dim> class TriaFaces;
+
+    template <typename> class TriaObjects;
+
+    /**
+     * Forward declaration of a class into which we put much of the
+     * implementation of the Triangulation class. See the .cc file for more
+     * information.
+     */
+    struct Implementation;
+  }
+
+  namespace TriaAccessor
+  {
+    struct Implementation;
+  }
+}
+
+template <int dim, int spacedim> class DoFHandler;
+namespace hp
+{
+  template <int dim, int spacedim> class DoFHandler;
+}
+
+
+/*------------------------------------------------------------------------*/
+
+/**
+ * A structure to describe individual cells and passed as argument to
+ * Triangulation::create_triangulation(). It contains all data needed to
+ * construct a cell, namely the indices of the vertices, the material or
+ * boundary indicator (depending on whether it represents a cell or a face),
+ * and a manifold id to describe the manifold this object belongs to.
+ *
+ * This structure is also used to represent data for faces and edge as part of
+ * the SubCellData class. In that case the #vertices array needs to represent
+ * the vertices of a face or edge of a cell listed in the argument to
+ * Triangulation::create_triangulation() that denotes the faces. It can be
+ * used to attach boundary indicators to faces.
+ *
+ * An example showing how this class can be used is in the
+ * <code>create_coarse_grid()</code> function of step-14.
+ *
+ * @ingroup grid
+ */
+template <int structdim>
+struct CellData
+{
+  /**
+   * Indices of the vertices of this cell.
+   */
+  unsigned int vertices[GeometryInfo<structdim>::vertices_per_cell];
+
+  /**
+   * Material or boundary indicator of this cell. The material_id may be used
+   * to denote different coefficients, etc.
+   *
+   * Note that if this object is part of a SubCellData object, then it
+   * represents a face or edge of a cell. In this case one should use the
+   * field boundary_id instead of material_id.
+   */
+  union
+  {
+    types::boundary_id boundary_id;
+    types::material_id material_id;
+  };
+
+  /**
+   * Manifold identifier of this object. This identifier should be used to
+   * identify the manifold to which this object belongs, and from which this
+   * object will collect information on how to add points upon refinement.
+   */
+  types::manifold_id manifold_id;
+
+  /**
+   * Default constructor. Sets the member variables to the following values: -
+   * vertex indices to invalid values - boundary or material id zero (the
+   * default for boundary or material ids) - manifold id to
+   * numbers::invalid_manifold_id
+   */
+  CellData ();
+};
+
+
+
+/**
+ * Structure to be passed to Triangulation::create_triangulation function to
+ * describe boundary information.
+ *
+ * This structure is the same for all dimensions, since we use an input
+ * function which is the same for all dimensions. The content of objects of
+ * this structure varies with the dimensions, however.
+ *
+ * Since in one dimension, there is no boundary information apart from the two
+ * end points of the interval, this structure does not contain anything and
+ * exists only for consistency, to allow a common interface for all space
+ * dimensions. All fields should always be empty.
+ *
+ * Boundary data in 2D consists of a list of lines which belong to a given
+ * boundary component. A boundary component is a list of lines which are given
+ * a common number describing the boundary condition to hold on this part of
+ * the boundary. The triangulation creation function gives lines not in this
+ * list either the boundary indicator zero (if on the boundary) or
+ * numbers::internal_face_boundary_id (if in the interior).
+ *
+ * You will get an error if you try to set the boundary indicator of an
+ * interior edge or face, i.e., an edge or face that is not at the boundary of
+ * the mesh. However, one may sometimes want to set the manifold indicator to
+ * an interior object. In this case, set its boundary indicator to
+ * numbers::internal_face_boundary_id, to indicate that you understand that it
+ * is an interior object, but set its manifold id to the value you want.
+ *
+ * @ingroup grid
+ */
+struct SubCellData
+{
+  /**
+   * Each record of this vector describes a line on the boundary and its
+   * boundary indicator.
+   */
+  std::vector<CellData<1> > boundary_lines;
+
+  /**
+   * Each record of this vector describes a quad on the boundary and its
+   * boundary indicator.
+   */
+  std::vector<CellData<2> > boundary_quads;
+
+  /**
+   * This function checks whether the vectors which may not be used in a given
+   * dimension are really empty. I.e., whether the <tt>boundary_*</tt> arrays
+   * are empty when in one space dimension and whether the @p boundary_quads
+   * array is empty when in two dimensions.
+   *
+   * Since this structure is the same for all dimensions, the actual dimension
+   * has to be given as a parameter.
+   */
+  bool check_consistency (const unsigned int dim) const;
+};
+
+
+/*------------------------------------------------------------------------*/
+
+
+namespace internal
+{
+  /**
+   * A namespace for classes internal to the triangulation classes and
+   * helpers.
+   */
+  namespace Triangulation
+  {
+
+    /**
+     * Cache class used to store the number of used and active elements (lines
+     * or quads etc) within the levels of a triangulation. This is only the
+     * declaration of the template, concrete instantiations are below.
+     *
+     * In the old days, whenever one wanted to access one of these numbers,
+     * one had to perform a loop over all lines, e.g., and count the elements
+     * until we hit the end iterator. This is time consuming and since access
+     * to the number of lines etc is a rather frequent operation, this was not
+     * an optimal solution.
+     *
+     * @author Wolfgang Bangerth, 1999
+     */
+    template <int dim>
+    struct NumberCache
+    {
+    };
+
+    /**
+     * Cache class used to store the number of used and active elements (lines
+     * or quads etc) within the levels of a triangulation. This specialization
+     * stores the numbers of lines.
+     *
+     * In the old days, whenever one wanted to access one of these numbers,
+     * one had to perform a loop over all lines, e.g., and count the elements
+     * until we hit the end iterator. This is time consuming and since access
+     * to the number of lines etc is a rather frequent operation, this was not
+     * an optimal solution.
+     *
+     * @author Wolfgang Bangerth, 1999
+     */
+    template <>
+    struct NumberCache<1>
+    {
+      /**
+       * The number of levels on which we have used objects.
+       */
+      unsigned int n_levels;
+
+      /**
+       * Number of used lines in the whole triangulation.
+       */
+      unsigned int n_lines;
+
+      /**
+       * Array holding the number of used lines on each level.
+       */
+      std::vector<unsigned int> n_lines_level;
+
+      /**
+       * Number of active lines in the whole triangulation.
+       */
+      unsigned int n_active_lines;
+
+      /**
+       * Array holding the number of active lines on each level.
+       */
+      std::vector<unsigned int> n_active_lines_level;
+
+      /**
+       * Constructor. Set values to zero by default.
+       */
+      NumberCache ();
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize (Archive &ar,
+                      const unsigned int version);
+    };
+
+
+    /**
+     * Cache class used to store the number of used and active elements (lines
+     * or quads etc) within the levels of a triangulation. This specialization
+     * stores the numbers of quads. Due to the inheritance from the base class
+     * NumberCache<1>, the numbers of lines are also within this class.
+     *
+     * In the old days, whenever one wanted to access one of these numbers,
+     * one had to perform a loop over all lines, e.g., and count the elements
+     * until we hit the end iterator. This is time consuming and since access
+     * to the number of lines etc is a rather frequent operation, this was not
+     * an optimal solution.
+     *
+     * @author Wolfgang Bangerth, 1999
+     */
+    template <>
+    struct NumberCache<2> : public NumberCache<1>
+    {
+      /**
+       * Number of used quads in the whole triangulation.
+       */
+      unsigned int n_quads;
+
+      /**
+       * Array holding the number of used quads on each level.
+       */
+      std::vector<unsigned int> n_quads_level;
+
+      /**
+       * Number of active quads in the whole triangulation.
+       */
+      unsigned int n_active_quads;
+
+      /**
+       * Array holding the number of active quads on each level.
+       */
+      std::vector<unsigned int> n_active_quads_level;
+
+      /**
+       * Constructor. Set values to zero by default.
+       */
+      NumberCache ();
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize (Archive &ar,
+                      const unsigned int version);
+    };
+
+
+    /**
+     * Cache class used to store the number of used and active elements (lines
+     * or quads etc) within the levels of a triangulation. This specialization
+     * stores the numbers of hexes. Due to the inheritance from the base class
+     * NumberCache<2>, the numbers of lines and quads are also within this
+     * class.
+     *
+     * In the old days, whenever one wanted to access one of these numbers,
+     * one had to perform a loop over all lines, e.g., and count the elements
+     * until we hit the end . This is time consuming and since access to the
+     * number of lines etc is a rather frequent operation, this was not an
+     * optimal solution.
+     *
+     * @author Wolfgang Bangerth, 1999
+     */
+    template <>
+    struct NumberCache<3> : public NumberCache<2>
+    {
+      /**
+       * Number of used hexes in the whole triangulation.
+       */
+      unsigned int n_hexes;
+
+      /**
+       * Array holding the number of used hexes on each level.
+       */
+      std::vector<unsigned int> n_hexes_level;
+
+      /**
+       * Number of active hexes in the whole triangulation.
+       */
+      unsigned int n_active_hexes;
+
+      /**
+       * Array holding the number of active hexes on each level.
+       */
+      std::vector<unsigned int> n_active_hexes_level;
+
+      /**
+       * Constructor. Set values to zero by default.
+       */
+      NumberCache ();
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize (Archive &ar,
+                      const unsigned int version);
+    };
+  }
+}
+
+
+/*------------------------------------------------------------------------*/
+
+
+/**
+ * Triangulations denote a hierarchy of levels of elements which together form
+ * a @p dim -dimensional manifold in @p spacedim spatial dimensions (if
+ * spacedim is not specified it takes the default value @p spacedim=dim).
+ *
+ * Thus, for example, an object of type @p Triangulation<1,1> (or simply @p
+ * Triangulation<1> since @p spacedim==dim by default) is used to represent
+ * and handle the usual one-dimensional triangulation used in the finite
+ * element method (so, segments on a straight line). On the other hand,
+ * objects such as @p Triangulation<1,2> or @p Triangulation<2,3> (that are
+ * associated with curves in 2D or surfaces in 3D) are the ones one wants to
+ * use in the boundary element method.
+ *
+ * This class is written to be as independent of the dimension as possible
+ * (thus the complex construction of the
+ * dealii::internal::Triangulation::TriaLevel classes) to allow code-sharing,
+ * to allow reducing the need to mirror changes in the code for one dimension
+ * to the code for other dimensions. Nonetheless, some of the functions are
+ * dependent of the dimension and there only exist specialized versions for
+ * distinct dimensions.
+ *
+ * This class satisfies the
+ * @ref ConceptMeshType "MeshType concept"
+ * requirements.
+ *
+ * <h3>Structure and iterators</h3>
+ *
+ * The actual data structure of a Triangulation object is rather complex and
+ * quite inconvenient if one attempted to operate on it directly, since data
+ * is spread over quite a lot of arrays and other places. However, there are
+ * ways powerful enough to work on these data structures without knowing their
+ * exact relations. deal.II uses class local typedefs (see below) to make
+ * things as easy and dimension independent as possible.
+ *
+ * The Triangulation class provides iterators which enable looping over all
+ * cells without knowing the exact representation used to describe them. For
+ * more information see the documentation of <tt>TriaIterator</tt>. Their
+ * names are typedefs imported from the Iterators class (thus making them
+ * local types to this class) and are as follows:
+ *
+ * <ul>
+ * <li> <tt>cell_iterator</tt>: loop over all cells used in the Triangulation
+ * <li> <tt>active_cell_iterator</tt>: loop over all active cells
+ * </ul>
+ *
+ * For <tt>dim==1</tt>, these iterators are mapped as follows:
+ *  @code
+ *    typedef line_iterator        cell_iterator;
+ *    typedef active_line_iterator active_cell_iterator;
+ *  @endcode
+ * while for @p dim==2 we have the additional face iterator:
+ *  @code
+ *    typedef quad_iterator        cell_iterator;
+ *    typedef active_quad_iterator active_cell_iterator;
+ *
+ *    typedef line_iterator        face_iterator;
+ *    typedef active_line_iterator active_face_iterator;
+ *  @endcode
+ *
+ * By using the cell iterators, you can write code independent of the spatial
+ * dimension. The same applies for substructure iterators, where a
+ * substructure is defined as a face of a cell. The face of a cell is a vertex
+ * in 1D and a line in 2D; however, vertices are handled in a different way
+ * and therefore lines have no faces.
+ *
+ * The Triangulation class offers functions like begin_active() which gives
+ * you an iterator to the first active cell. There are quite a lot of
+ * functions returning iterators. Take a look at the class doc to get an
+ * overview.
+ *
+ * Usage of these iterators is similar to usage of standard container
+ * iterators. Some examples taken from the Triangulation source code follow
+ * (notice that in the last two examples the template parameter @p spacedim
+ * has been omitted, so it takes the default value <code>dim</code>).
+ *
+ * <ul>
+ * <li> <em>Counting the number of cells on a specific level</em>
+ *    @code
+ *     template <int dim, int spacedim>
+ *     int Triangulation<dim, spacedim>::n_cells (const int level) const {
+ *        cell_iterator cell = begin (level),
+ *                      endc = end(level);
+ *        int n=0;
+ *        for (; cell!=endc; ++cell)
+ *          ++n;
+ *        return n;
+ *      };
+ *    @endcode
+ * Another way, which uses <tt>std::distance</tt>, would be to write
+ *    @code
+ *      template <int dim>
+ *      int Triangulation<dim>::n_cells (const int level) const {
+ *        int n=0;
+ *        distance (begin(level),
+ *                  (level == levels.size()-1 ?
+ *                   cell_iterator(end()) :
+ *                   begin (level+1)),
+ *                  n);
+ *        return n;
+ *      };
+ *    @endcode
+ *
+ * <li> <em>Refining all cells of a triangulation</em>
+ *    @code
+ *      template <int dim>
+ *      void Triangulation<dim>::refine_global () {
+ *        active_cell_iterator cell = begin_active(),
+ *                             endc = end();
+ *
+ *        for (; cell != endc; ++cell)
+ *          cell->set_refine_flag ();
+ *        execute_coarsening_and_refinement ();
+ *      };
+ *    @endcode
+ * </ul>
+ *
+ *
+ * <h3>Usage</h3>
+ *
+ * Usage of a Triangulation is mainly done through the use of iterators. An
+ * example probably shows best how to use it:
+ *  @code
+ *  void main () {
+ *    Triangulation<2> tria;
+ *
+ *    // read in a coarse grid file
+ *
+ *                                     // we want to log the
+ *                                     // refinement history
+ *    ofstream history ("mesh.history");
+ *
+ *                                     // refine first cell
+ *    tria.begin_active()->set_refine_flag();
+ *    tria.save_refine_flags (history);
+ *    tria.execute_coarsening_and_refinement ();
+ *
+ *                                     // refine first active cell
+ *                                     // on coarsest level
+ *    tria.begin_active()->set_refine_flag ();
+ *    tria.save_refine_flags (history);
+ *    tria.execute_coarsening_and_refinement ();
+ *
+ *    Triangulation<2>::active_cell_iterator cell;
+ *    for (int i=0; i<17; ++i)
+ *      {
+ *                                         // refine the presently
+ *                                         // second last cell 17
+ *                                         // times
+ *        cell = tria.last_active(tria.n_levels()-1);
+ *        --cell;
+ *        cell->set_refine_flag ();
+ *        tria.save_refine_flags (history);
+ *        tria.execute_coarsening_and_refinement ();
+ *      };
+ *                                       // output the grid
+ *    ofstream out("grid.1");
+ *    GridOut::write_gnuplot (tria, out);
+ *  };
+ *  @endcode
+ *
+ *
+ * <h3>Creating a triangulation</h3>
+ *
+ * There are several possibilities to create a triangulation:
+ * <ul>
+ * <li> The most common domains, such as hypercubes (i.e. lines, squares,
+ * cubes, etc), hyper-balls (circles, balls, ...) and some other, more weird
+ * domains such as the L-shape region and higher dimensional generalizations
+ * and others, are provided by the GridGenerator class which takes a
+ * triangulation and fills it by a division of the required domain.
+ *
+ * <li> Reading in a triangulation: By using an object of the GridIn class,
+ * you can read in fairly general triangulations. See there for more
+ * information. The mentioned class uses the interface described directly
+ * below to transfer the data into the triangulation.
+ *
+ * <li> Explicitly creating a triangulation: you can create a triangulation by
+ * providing a list of vertices and a list of cells. Each such cell consists
+ * of a vector storing the indices of the vertices of this cell in the vertex
+ * list. To see how this works, you can take a look at the GridIn<dim>::read_*
+ * functions. The appropriate function to be called is create_triangulation().
+ *
+ * Creating the hierarchical information needed for this library from cells
+ * storing only vertex information can be quite a complex task.  For example
+ * in 2D, we have to create lines between vertices (but only once, though
+ * there are two cells which link these two vertices) and we have to create
+ * neighborhood information. Grids being read in should therefore not be too
+ * large, reading refined grids would be inefficient (although there is
+ * technically no problem in reading grids with several 10.000 or 100.000
+ * cells; the library can handle this without much problems). Apart from the
+ * performance aspect, refined grids do not lend too well to multigrid
+ * algorithms, since solving on the coarsest level is expensive. It is wiser
+ * in any case to read in a grid as coarse as possible and then do the needed
+ * refinement steps.
+ *
+ * It is your duty to guarantee that cells have the correct orientation. To
+ * guarantee this, in the input vector keeping the cell list, the vertex
+ * indices for each cell have to be in a defined order, see the documentation
+ * of GeometryInfo<dim>. In one dimension, the first vertex index must refer
+ * to that vertex with the lower coordinate value. In 2D and 3D, the
+ * corresponding conditions are not easy to verify and no full attempt to do
+ * so is made. If you violate this condition, you may end up with matrix
+ * entries having the wrong sign (clockwise vertex numbering, which results in
+ * a negative area element) of with wrong matrix elements (twisted
+ * quadrilaterals, i.e. two vertices interchanged; this results in a wrong
+ * area element).
+ *
+ * There are more subtle conditions which must be imposed upon the vertex
+ * numbering within cells. They do not only hold for the data read from an UCD
+ * or any other input file, but also for the data passed to
+ * create_triangulation(). See the documentation for the GridIn class for more
+ * details on this, and above all to the GridReordering class that explains
+ * many of the problems and an algorithm to reorder cells such that they
+ * satisfy the conditions outlined above.
+ *
+ * <li> Copying a triangulation: when computing on time dependent meshes or
+ * when using adaptive refinement, you will often want to create a new
+ * triangulation to be the same as another one. This is facilitated by the @p
+ * copy_triangulation function.
+ *
+ * It is guaranteed that vertex, line or cell numbers in the two
+ * triangulations are the same and that two iterators walking on the two
+ * triangulations visit matching cells if they are incremented in parallel. It
+ * may be conceivable to implement a clean-up in the copy operation, which
+ * eliminates holes of unused memory, re-joins scattered data and so on. In
+ * principle this would be a useful operation but guaranteeing some
+ * parallelism in the two triangulations seems more important since usually
+ * data will have to be transferred between the grids.
+ * </ul>
+ *
+ * Finally, there is a special function for folks who like bad grids:
+ * distort_random(). It moves all the vertices in the grid a bit around by a
+ * random value, leaving behind a distorted mesh. Note that you should apply
+ * this function to the final mesh, since refinement smoothes the mesh a bit.
+ *
+ * The function will make sure that vertices on restricted faces (hanging
+ * nodes) will end up in the correct place, i.e. in the middle of the two
+ * other vertices of the mother line, and the analogue in higher space
+ * dimensions (vertices on the boundary are not corrected, so don't distort
+ * boundary vertices in more than two space dimension, i.e. in dimensions
+ * where boundary vertices can be hanging nodes). Applying the algorithm has
+ * another drawback related to the placement of cells, however: the children
+ * of a cell will not occupy the same region of the domain as the mother cell
+ * does. While this is the usual behavior with cells at the boundary, here you
+ * may get into trouble when using multigrid algorithms or when transferring
+ * solutions from coarse to fine grids and back. In general, the use of this
+ * function is only safe if you only use the most refined level of the
+ * triangulation for computations.
+ *
+ *
+ *
+ * <h3>Refinement and coarsening of a triangulation</h3>
+ *
+ * Refinement of a triangulation may be done through several ways. The most
+ * low-level way is directly through iterators: let @p i be an iterator to an
+ * active cell (i.e. the cell pointed to has no children), then the function
+ * call <tt>i->set_refine_flag()</tt> marks the respective cell for
+ * refinement. Marking non-active cells results in an error.
+ *
+ * After all the cells you wanted to mark for refinement, call
+ * execute_coarsening_and_refinement() to actually perform the refinement.
+ * This function itself first calls the @p prepare_coarsening_and_refinement
+ * function to regularize the resulting triangulation: since a face between
+ * two adjacent cells may only be subdivided once (i.e. the levels of two
+ * adjacent cells may differ by one at most; it is not possible to have a cell
+ * refined twice while the neighboring one is not refined), some additional
+ * cells are flagged for refinement to smooth the grid. This enlarges the
+ * number of resulting cells but makes the grid more regular, thus leading to
+ * better approximation properties and, above all, making the handling of data
+ * structures and algorithms much much easier. To be honest, this is mostly an
+ * algorithmic step than one needed by the finite element method.
+ *
+ * To coarsen a grid, the same way as above is possible by using
+ * <tt>i->set_coarsen_flag</tt> and calling
+ * execute_coarsening_and_refinement().
+ *
+ * The reason for first coarsening, then refining is that the refinement
+ * usually adds some additional cells to keep the triangulation regular and
+ * thus satisfies all refinement requests, while the coarsening does not
+ * delete cells not requested for; therefore the refinement will often revert
+ * some effects of coarsening while the opposite is not true. The stated order
+ * of coarsening before refinement will thus normally lead to a result closer
+ * to the intended one.
+ *
+ * Marking cells for refinement 'by hand' through iterators is one way to
+ * produce a new grid, especially if you know what kind of grid you are
+ * looking for, e.g. if you want to have a grid successively refined towards
+ * the boundary or always at the center (see the example programs, they do
+ * exactly these things). There are more advanced functions, however, which
+ * are more suitable for automatic generation of hierarchical grids in the
+ * context of a posteriori error estimation and adaptive finite elements.
+ * These functions can be found in the GridRefinement class.
+ *
+ *
+ * <h3>Smoothing of a triangulation</h3>
+ *
+ * Some degradation of approximation properties has been observed for grids
+ * which are too unstructured. Therefore, prepare_coarsening_and_refinement()
+ * which is automatically called by execute_coarsening_and_refinement() can do
+ * some smoothing of the triangulation. Note that mesh smoothing is only done
+ * for two or more space dimensions, no smoothing is available at present for
+ * one spatial dimension. In the following, let <tt>execute_*</tt> stand for
+ * execute_coarsening_and_refinement().
+ *
+ * For the purpose of smoothing, the Triangulation constructor takes an
+ * argument specifying whether a smoothing step shall be performed on the grid
+ * each time <tt>execute_*</tt> is called. The default is that such a step not
+ * be done, since this results in additional cells being produced, which may
+ * not be necessary in all cases. If switched on, calling <tt>execute_*</tt>
+ * results in flagging additional cells for refinement to avoid vertices as
+ * the ones mentioned. The algorithms for both regularization and smoothing of
+ * triangulations are described below in the section on technical issues. The
+ * reason why this parameter must be given to the constructor rather than to
+ * <tt>execute_*</tt> is that it would result in algorithmic problems if you
+ * called <tt>execute_*</tt> once without and once with smoothing, since then
+ * in some refinement steps would need to be refined twice.
+ *
+ * The parameter taken by the constructor is an integer which may be composed
+ * bitwise by the constants defined in the enum #MeshSmoothing (see there for
+ * the possibilities).
+ *
+ * @note While it is possible to pass all of the flags in #MeshSmoothing to
+ * objects of type parallel::distributed::Triangulation, it is not always
+ * possible to honor all of these smoothing options if they would require
+ * knowledge of refinement/coarsening flags on cells not locally owned by this
+ * processor. As a consequence, for some of these flags, the ultimate number
+ * of cells of the parallel triangulation may depend on the number of
+ * processors into which it is partitioned.
+ *
+ *
+ * <h3>Material and boundary information</h3>
+ *
+ * Each cell, face or edge stores information denoting the material or the
+ * part of the boundary that an object belongs to. The material of a cell may
+ * be used during matrix generation in order to implement different
+ * coefficients in different parts of the domain. It is not used by functions
+ * of the grid and dof handling libraries.
+ *
+ * This material_id may be set upon construction of a triangulation (through
+ * the CellData data structure), or later through use of cell iterators. For a
+ * typical use of this functionality, see the step-28 tutorial program. The
+ * functions of the GridGenerator namespace typically set the material ID of
+ * all cells to zero. When reading a triangulation, the material id must be
+ * specified in the input file (UCD format) or is otherwise set to zero.
+ * Material IDs are inherited by child cells from their parent upon mesh
+ * refinement.
+ *
+ * Boundary indicators on lower dimensional objects (these have no material
+ * id) indicate the number of a boundary component. These are used for two
+ * purposes: First, they specify a boundary curve. When a cell is refined, a
+ * function can be used to place new vertices on this curve. See the section
+ * on boundary approximation below. Furthermore, the weak formulation of the
+ * partial differential equation may have different boundary conditions on
+ * different parts of the boundary. The boundary indicator can be used in
+ * creating the matrix or the right hand side vector to indicate these
+ * different parts of the model (this use is like the material id of cells).
+ * Boundary indicators may be in the range from zero to
+ * numbers::internal_face_boundary_id-1. The value
+ * numbers::internal_face_boundary_id is reserved to denote interior lines (in
+ * 2D) and interior lines and quads (in 3D), which do not have a boundary
+ * indicator. This way, a program can easily determine, whether such an object
+ * is at the boundary or not. Material indicators may be in the range from
+ * zero to numbers::invalid_material_id-1.
+ *
+ * Lines in two dimensions and quads in three dimensions inherit their
+ * boundary indicator to their children upon refinement. You should therefore
+ * make sure that if you have different boundary parts, the different parts
+ * are separated by a vertex (in 2D) or a line (in 3D) such that each boundary
+ * line or quad has a unique boundary indicator.
+ *
+ * By default (unless otherwise specified during creation of a triangulation),
+ * all parts of the boundary have boundary indicator zero. As a historical
+ * wart, this isn't true for 1d meshes, however: For these, leftmost vertices
+ * have boundary indicator zero while rightmost vertices have boundary
+ * indicator one. In either case, the boundary indicator of a face can be
+ * changed using a call of the kind
+ * <code>cell-@>face(1)-@>set_boundary_id(42);</code>.
+ *
+ * @see
+ * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+ *
+ *
+ * <h3>History of a triangulation</h3>
+ *
+ * It is possible to reconstruct a grid from its refinement history, which can
+ * be stored and loaded through the @p save_refine_flags and @p
+ * load_refine_flags functions. Normally, the code will look like this:
+ *   @code
+ *                                 // open output file
+ *     ofstream history("mesh.history");
+ *                                 // do 10 refinement steps
+ *     for (int step=0; step<10; ++step) {
+ *       ...;
+ *       // flag cells according to some criterion
+ *       ...;
+ *       tria.save_refine_flags (history);
+ *       tria.execute_coarsening_and_refinement ();
+ *     };
+ *   @endcode
+ *
+ * If you want to re-create the grid from the stored information, you write:
+ *   @code
+ *                                 // open input file
+ *     ifstream history("mesh.history");
+ *                                 // do 10 refinement steps
+ *     for (int step=0; step<10; ++step) {
+ *       tria.load_refine_flags (history);
+ *       tria.execute_coarsening_and_refinement ();
+ *     };
+ *   @endcode
+ *
+ * The same scheme is employed for coarsening and the coarsening flags.
+ *
+ * You may write other information to the output file between different sets
+ * of refinement information, as long as you read it upon re-creation of the
+ * grid. You should make sure that the other information in the new
+ * triangulation which is to be created from the saved flags, matches that of
+ * the old triangulation, for example the smoothing level; if not, the cells
+ * actually created from the flags may be other ones, since smoothing adds
+ * additional cells, but their number may be depending on the smoothing level.
+ *
+ * There actually are two sets of <tt>save_*_flags</tt> and
+ * <tt>load_*_flags</tt> functions. One takes a stream as argument and
+ * reads/writes the information from/to the stream, thus enabling storing
+ * flags to files. The other set takes an argument of type
+ * <tt>vector<bool></tt>. This enables the user to temporarily store some
+ * flags, e.g. if another function needs them, and restore them afterwards.
+ *
+ *
+ * <h3>User flags and data</h3>
+ *
+ * A triangulation offers one bit per line, quad, etc for user flags. This
+ * field can be accessed as all other data using iterators. Normally, this
+ * user flag is used if an algorithm walks over all cells and needs
+ * information whether another cell, e.g. a neighbor, has already been
+ * processed. See
+ * @ref GlossUserFlags "the glossary for more information".
+ *
+ * There is another set of user data, which can be either an <tt>unsigned
+ * int</tt> or a <tt>void *</tt>, for each line, quad, etc. You can access
+ * these through the functions listed under <tt>User data</tt> in the accessor
+ * classes. Again, see
+ * @ref GlossUserData "the glossary for more information".
+ *
+ * The value of these user indices or pointers is @p NULL by default. Note
+ * that the pointers are not inherited to children upon refinement. Still,
+ * after a remeshing they are available on all cells, where they were set on
+ * the previous mesh.
+ *
+ * The usual warning about the missing type safety of @p void pointers are
+ * obviously in place here; responsibility for correctness of types etc lies
+ * entirely with the user of the pointer.
+ *
+ * @note User pointers and user indices are stored in the same place. In order
+ * to avoid unwanted conversions, Triangulation checks which one of them is in
+ * use and does not allow access to the other one, until clear_user_data() has
+ * been called.
+ *
+ *
+ * <h3>Boundary approximation</h3>
+ *
+ * You can specify a boundary function for each boundary component. If a new
+ * vertex is created on a side or face at the boundary, this function is used
+ * to compute where it will be placed. The boundary indicator of the face will
+ * be used to determine the proper component. See Boundary for the details.
+ * Usage with the Triangulation object is then like this (let @p Ball be a
+ * class derived from Boundary<tt><2></tt>):
+ *
+ *   @code
+ *     void main () {
+ *       Triangulation<2> tria;
+ *                                        // set the boundary function
+ *                                        // for all boundaries with
+ *                                        // boundary indicator 0
+ *       Ball ball;
+ *       tria.set_boundary (0, ball);
+ *
+ *       // read some coarse grid
+ *
+ *
+ *       Triangulation<2>::active_cell_iterator cell, endc;
+ *       for (int i=0; i<8; ++i)
+ *         {
+ *           cell = tria.begin_active();
+ *           endc = tria.end();
+ *
+ *                                            // refine all
+ *                                            // boundary cells
+ *           for (; cell!=endc; ++cell)
+ *             if (cell->at_boundary())
+ *               cell->set_refine_flag();
+ *
+ *           tria.execute_coarsening_and_refinement();
+ *         };
+ *     };
+ *   @endcode
+ *
+ * You should take note of one caveat: if you have concave boundaries, you
+ * must make sure that a new boundary vertex does not lie too much inside the
+ * cell which is to be refined. The reason is that the center vertex is placed
+ * at the point which is the arithmetic mean of the vertices of the original
+ * cell. Therefore if your new boundary vertex is too near the center of the
+ * old quadrilateral or hexahedron, the distance to the midpoint vertex will
+ * become too small, thus generating distorted cells. This issue is discussed
+ * extensively in
+ * @ref GlossDistorted "distorted cells".
+ *
+ *
+ * <h3>Getting notice when a triangulation changes</h3>
+ *
+ * There are cases where one object would like to know whenever a
+ * triangulation is being refined, copied, or modified in a number of other
+ * ways. This could of course be achieved if, in your user code, you tell
+ * every such object whenever you are about to refine the triangulation, but
+ * this will get tedious and is error prone. The Triangulation class
+ * implements a more elegant way to achieve this: signals.
+ *
+ * In essence, a signal is an object (a member of the Triangulation class)
+ * that another object can connect to. A connection is in essence that the
+ * connecting object passes a function object taking a certain number and kind
+ * of arguments. Whenever the owner of the signal wants to indicate a certain
+ * kind of event, it 'triggers' the signal, which in turn means that all
+ * connections of the signal are triggered: in other word, the function
+ * objects are executed and can take the action that is necessary.
+ *
+ * As a simple example, the following code will print something to the output
+ * every time the triangulation has just been refined:
+ *   @code
+ *     void f() {
+ *       std::cout << "Triangulation has been refined." << std::endl;
+ *     }
+ *
+ *     void run () {
+ *       Triangulation<dim> triangulation;
+ *       // fill it somehow
+ *       triangulation.signals.post_refinement.connect (&f);
+ *       triangulation.refine_global (2);
+ *     }
+ *   @endcode
+ * This code will produce output twice, once for each refinement cycle.
+ *
+ * A more interesting application would be the following, akin to what the
+ * FEValues class does. This class stores a pointer to a triangulation and
+ * also an iterator to the cell last handled (so that it can compare the
+ * current cell with the previous one and, for example, decide that there is
+ * no need to re-compute the Jacobian matrix if the new cell is a simple
+ * translation of the previous one). However, whenever the triangulation is
+ * modified, the iterator to the previously handled cell needs to be
+ * invalidated since it now no longer points to any useful cell (or, at the
+ * very least, points to something that may not necessarily resemble the cells
+ * previously handled). The code would look something like this (the real code
+ * has some more error checking and has to handle the case that subsequent
+ * cells might actually belong to different triangulation, but that is of no
+ * concern to us here):
+ *   @code
+ *     template <int dim>
+ *     class FEValues {
+ *         Triangulation<dim>::active_cell_iterator current_cell, previous_cell;
+ *       public:
+ *         void reinit (Triangulation<dim>::active_cell_iterator &cell);
+ *         void invalidate_previous_cell ();
+ *     };
+ *
+ *     template <int dim>
+ *     void
+ *     FEValues<dim>::reinit (Triangulation<dim>::active_cell_iterator &cell) {
+ *       if (previous_cell.status() != valid)
+ *         {
+ *           // previous_cell has not been set. set it now, and register
+ *           // with the triangulation that we want to be informed about
+ *           // mesh refinement
+ *           previous_cell = current_cell;
+ *           previous_cell->get_triangulation().signals.post_refinement
+ *             .connect (std_cxx11::bind (&FEValues<dim>::invalidate_previous_cell,
+ *                                        std_cxx11::ref (*this)));
+ *         }
+ *       else
+ *         previous_cell = current_cell;
+ *
+ *       current_cell = cell;
+ *       ... do something with the cell...
+ *     }
+ *
+ *
+ *     template <int dim>
+ *     void
+ *     FEValues<dim>::invalidate_previous_cell () {
+ *       previous_cell = Triangulation<dim>::active_cell_iterator();
+ *     }
+ *   @endcode
+ * Here, whenever the triangulation is refined, it triggers the post-
+ * refinement signal which calls the function object attached to it. This
+ * function object is the member function
+ * <code>FEValues<dim>::invalidate_previous_cell</code> where we have bound
+ * the single argument (the <code>this</code> pointer of a member function
+ * that otherwise takes no arguments) to the <code>this</code> pointer of the
+ * FEValues object. Note how here there is no need for the code that owns the
+ * triangulation and the FEValues object to inform the latter if the former is
+ * refined. (In practice, the function would want to connect to some of the
+ * other signals that the triangulation offers as well, in particular to
+ * creation and deletion signals.)
+ *
+ * The Triangulation class has a variety of signals that indicate different
+ * actions by which the triangulation can modify itself and potentially
+ * require follow-up action elsewhere. Please refer to Triangulation::Signals
+ * for details.
+ *
+ * <h3>Serializing (loading or storing) triangulations</h3>
+ *
+ * Like many other classes in deal.II, the Triangulation class can stream its
+ * contents to an archive using BOOST's serialization facilities. The data so
+ * stored can later be retrieved again from the archive to restore the
+ * contents of this object. This facility is frequently used to save the state
+ * of a program to disk for possible later resurrection, often in the context
+ * of checkpoint/restart strategies for long running computations or on
+ * computers that aren't very reliable (e.g. on very large clusters where
+ * individual nodes occasionally fail and then bring down an entire MPI job).
+ *
+ * For technical reasons, writing and restoring a Triangulation object is not-
+ * trivial. The primary reason is that unlike many other objects,
+ * triangulations rely on many other objects to which they store pointers or
+ * with which they interface; for example, triangulations store pointers to
+ * objects describing boundaries and manifolds, and they have signals that
+ * store pointers to other objects so they can be notified of changes in the
+ * triangulation (see the section on signals in this introduction). As objects
+ * that are re-loaded at a later time do not usually end up at the same
+ * location in memory as they were when they were saved, dealing with pointers
+ * to other objects is difficult.
+ *
+ * For these reasons, saving a triangulation to an archive does not store all
+ * information, but only certain parts. More specifically, the information
+ * that is stored is everything that defines the mesh such as vertex
+ * locations, vertex indices, how vertices are connected to cells, boundary
+ * indicators, subdomain ids, material ids, etc. On the other hand, the
+ * following information is not stored: - signals - pointers to boundary
+ * objects previously set using Triangulation::set_boundary On the other hand,
+ * since these are objects that are usually set in user code, they can
+ * typically easily be set again in that part of your code in which you re-
+ * load triangulations.
+ *
+ * In a sense, this approach to serialization means that re-loading a
+ * triangulation is more akin to calling the
+ * Triangulation::create_triangulation function and filling it with some
+ * additional content, as that function also does not touch the signals and
+ * boundary objects that belong to this triangulation. In keeping with this
+ * analogy, the Triangulation::load function also triggers the same kinds of
+ * signal as Triangulation::create_triangulation.
+ *
+ *
+ * <h3>Technical details</h3>
+ *
+ * <h4>Algorithms for mesh regularization and smoothing upon refinement</h4>
+ *
+ * We chose an inductive point of view: since upon creation of the
+ * triangulation all cells are on the same level, all regularity assumptions
+ * regarding the maximum difference in level of cells sharing a common face,
+ * edge or vertex hold. Since we use the regularization and smoothing in each
+ * step of the mesh history, when coming to the point of refining it further
+ * the assumptions also hold.
+ *
+ * The regularization and smoothing is done in the @p
+ * prepare_coarsening_and_refinement function, which is called by @p
+ * execute_coarsening_and_refinement at the very beginning.  It decides which
+ * additional cells to flag for refinement by looking at the old grid and the
+ * refinement flags for each cell.
+ *
+ * <ul>
+ * <li> <em>Regularization:</em> The algorithm walks over all cells checking
+ * whether the present cell is flagged for refinement and a neighbor of the
+ * present cell is refined once less than the present one. If so, flag the
+ * neighbor for refinement. Because of the induction above, there may be no
+ * neighbor with level two less than the present one.
+ *
+ * The neighbor thus flagged for refinement may induce more cells which need
+ * to be refined. However, such cells which need additional refinement always
+ * are on one level lower than the present one, so we can get away with only
+ * one sweep over all cells if we do the loop in the reverse way, starting
+ * with those on the highest level. This way, we may flag additional cells on
+ * lower levels, but if these induce more refinement needed, this is performed
+ * later on when we visit them in out backward running loop.
+ *
+ * <li> <em>Smoothing:</em>
+ * <ul>
+ * <li> @p limit_level_difference_at_vertices: First a list is set up which
+ * stores for each vertex the highest level one of the adjacent cells belongs
+ * to. Now, since we did smoothing in the previous refinement steps also, each
+ * cell may only have vertices with levels at most one greater than the level
+ * of the present cell.
+ *
+ * However, if we store the level plus one for cells marked for refinement, we
+ * may end up with cells which have vertices of level two greater than the
+ * cells level. We need to refine this cell also, and need thus also update
+ * the levels of its vertices. This itself may lead to cells needing
+ * refinement, but these are on lower levels, as above, which is why we may do
+ * all kinds of additional flagging in one loop only.
+ *
+ * <li> @p eliminate_unrefined_islands: For each cell we count the number of
+ * neighbors which are refined or flagged for refinement. If this exceeds the
+ * number of neighbors which are not refined and not flagged for refinement,
+ * then the current cell is flagged for refinement. Since this may lead to
+ * cells on the same level which also will need refinement, we will need
+ * additional loops of regularization and smoothing over all cells until
+ * nothing changes any more.
+ *
+ * <li> <tt>eliminate_refined_*_islands</tt>: This one does much the same as
+ * the above one, but for coarsening. If a cell is flagged for refinement or
+ * if all of its children are active and if the number of neighbors which are
+ * either active and not flagged for refinement, or not active but all
+ * children flagged for coarsening equals the total number of neighbors, then
+ * this cell's children are flagged for coarsening or (if this cell was
+ * flagged for refinement) the refine flag is cleared.
+ *
+ * For a description of the distinction between the two versions of the flag
+ * see above in the section about mesh smoothing in the general part of this
+ * classes description.
+ *
+ * The same applies as above: several loops may be necessary.
+ * </ul>
+ * </ul>
+ *
+ * Regularization and smoothing are a bit complementary in that we check
+ * whether we need to set additional refinement flags when being on a cell
+ * flagged for refinement (regularization) or on a cell not flagged for
+ * refinement. This makes readable programming easier.
+ *
+ * All the described algorithms apply only for more than one space dimension,
+ * since for one dimension no restrictions apply. It may be necessary to apply
+ * some smoothing for multigrid algorithms, but this has to be decided upon
+ * later.
+ *
+ *
+ * <h3>Warning</h3>
+ *
+ * It seems impossible to preserve @p constness of a triangulation through
+ * iterator usage. Thus, if you declare pointers to a @p const triangulation
+ * object, you should be well aware that you might involuntarily alter the
+ * data stored in the triangulation.
+ *
+ * @ingroup grid aniso
+ * @author Wolfgang Bangerth, 1998; Ralf Hartmann, 2005
+ */
+template <int dim, int spacedim=dim>
+class Triangulation : public Subscriptor
+{
+private:
+
+  /**
+   * An internal typedef to make the definition of the iterator classes
+   * simpler.
+   */
+  typedef dealii::internal::Triangulation::Iterators<dim, spacedim> IteratorSelector;
+
+public:
+  /**
+   * Default manifold object. This is used for those objects for which no
+   * boundary description has been explicitly set using set_manifold().
+   */
+  static const StraightBoundary<dim,spacedim> straight_boundary;
+
+  /**
+   * Declare some symbolic names for mesh smoothing algorithms. The meaning of
+   * these flags is documented in the Triangulation class.
+   */
+  enum MeshSmoothing
+  {
+    /**
+     * No mesh smoothing at all, except that meshes have to remain one-
+     * irregular.
+     */
+    none                               = 0x0,
+    /**
+     * It can be shown, that degradation of approximation occurs if the
+     * triangulation contains vertices which are member of cells with levels
+     * differing by more than one. One such example is the following:
+     *
+     * @image html limit_level_difference_at_vertices.png ""
+     *
+     * It would seem that in two space dimensions, the maximum jump in levels
+     * between cells sharing a common vertex is two (as in the example above).
+     * However, this is not true if more than four cells meet at a vertex. It
+     * is not uncommon that a coarse (initial) mesh contains vertices at which
+     * six or even eight cells meet, when small features of the domain have to
+     * be resolved even on the coarsest mesh. In that case, the maximum
+     * difference in levels is three or four, respectively. The problem gets
+     * even worse in three space dimensions.
+     *
+     * Looking at an interpolation of the second derivative of the finite
+     * element solution (assuming bilinear finite elements), one sees that the
+     * numerical solution is almost totally wrong, compared with the true
+     * second derivative. Indeed, on regular meshes, there exist sharp
+     * estimations that the H<sup>2</sup>-error is only of order one, so we
+     * should not be surprised; however, the numerical solution may show a
+     * value for the second derivative which may be a factor of ten away from
+     * the true value. These problems are located on the small cell adjacent
+     * to the center vertex, where cells of non-subsequent levels meet, as
+     * well as on the upper and right neighbor of this cell (but with a less
+     * degree of deviation from the true value).
+     *
+     * If the smoothing indicator given to the constructor contains the bit
+     * for #limit_level_difference_at_vertices, situations as the above one
+     * are eliminated by also marking the lower left cell for refinement.
+     *
+     * In case of anisotropic refinement, the level of a cell is not linked to
+     * the refinement of a cell as directly as in case of isotropic
+     * refinement. Furthermore, a cell can be strongly refined in one
+     * direction and not or at least much less refined in another. Therefore,
+     * it is very difficult to decide, which cases should be excluded from the
+     * refinement process. As a consequence, when using anisotropic
+     * refinement, the #limit_level_difference_at_vertices flag must not be
+     * set. On the other hand, the implementation of multigrid methods in
+     * deal.II requires that this bit be set.
+     */
+    limit_level_difference_at_vertices = 0x1,
+    /**
+     * Single cells which are not refined and are surrounded by cells which
+     * are refined usually also lead to a sharp decline in approximation
+     * properties locally. The reason is that the nodes on the faces between
+     * unrefined and refined cells are not real degrees of freedom but carry
+     * constraints. The patch without additional degrees of freedom is thus
+     * significantly larger then the unrefined cell itself. If in the
+     * parameter passed to the constructor the bit for
+     * #eliminate_unrefined_islands is set, all cells which are not flagged
+     * for refinement but which are surrounded by more refined cells than
+     * unrefined cells are flagged for refinement. Cells which are not yet
+     * refined but flagged for that are accounted for the number of refined
+     * neighbors. Cells on the boundary are not accounted for at all. An
+     * unrefined island is, by this definition also a cell which (in 2D) is
+     * surrounded by three refined cells and one unrefined one, or one
+     * surrounded by two refined cells, one unrefined one and is at the
+     * boundary on one side. It is thus not a true island, as the name of the
+     * flag may indicate. However, no better name came to mind to the author
+     * by now.
+     */
+    eliminate_unrefined_islands        = 0x2,
+    /**
+     * A triangulation of patch level 1 consists of patches, i.e. of cells
+     * that are refined once. This flag ensures that a mesh of patch level 1
+     * is still of patch level 1 after coarsening and refinement. It is,
+     * however, the user's responsibility to ensure that the mesh is of patch
+     * level 1 before calling
+     * Triangulation::execute_coarsening_and_refinement() the first time. The
+     * easiest way to achieve this is by calling global_refine(1) straight
+     * after creation of the triangulation.  It follows that if at least one
+     * of the children of a cell is or will be refined than all children need
+     * to be refined. If the #patch_level_1 flag is set, than the flags
+     * #eliminate_unrefined_islands, #eliminate_refined_inner_islands and
+     * #eliminate_refined_boundary_islands will be ignored as they will be
+     * fulfilled automatically.
+     */
+    patch_level_1                      = 0x4,
+    /**
+     * Each coarse grid cell is refined at least once, i.e. the triangulation
+     * might have active cells on level 1 but not on level 0. This flag
+     * ensures that a mesh which has coarsest_level_1 has still
+     * coarsest_level_1 after coarsening and refinement. It is, however, the
+     * user's responsibility to ensure that the mesh has coarsest_level_1
+     * before calling execute_coarsening_and_refinement the first time. The
+     * easiest way to achieve this is by calling global_refine(1) straight
+     * after creation of the triangulation. It follows that active cells on
+     * level 1 may not be coarsened.
+     *
+     * The main use of this flag is to ensure that each cell has at least one
+     * neighbor in each coordinate direction (i.e. each cell has at least a
+     * left or right, and at least an upper or lower neighbor in 2d). This is
+     * a necessary precondition for some algorithms that compute finite
+     * differences between cells. The DerivativeApproximation class is one of
+     * these algorithms that require that a triangulation is coarsest_level_1
+     * unless all cells already have at least one neighbor in each coordinate
+     * direction on the coarsest level.
+     */
+    coarsest_level_1                   = 0x8,
+    /**
+     * This flag is not included in @p maximum_smoothing. The flag is
+     * concerned with the following case: consider the case that an unrefined
+     * and a refined cell share a common face and that one of the children of
+     * the refined cell along the common face is flagged for further
+     * refinement. In that case, the resulting mesh would have more than one
+     * hanging node along one or more of the edges of the triangulation, a
+     * situation that is not allowed. Consequently, in order to perform the
+     * refinement, the coarser of the two original cells is also going to be
+     * refined.
+     *
+     * However, in many cases it is sufficient to refine the coarser of the
+     * two original cells in an anisotropic way to avoid the case of multiple
+     * hanging vertices on a single edge. Doing only the minimal anisotropic
+     * refinement can save cells and degrees of freedom. By specifying this
+     * flag, the library can produce these anisotropic refinements.
+     *
+     * The flag is not included by default since it may lead to
+     * anisotropically refined meshes even though no cell has ever been
+     * refined anisotropically explicitly by a user command. This surprising
+     * fact may lead to programs that do the wrong thing since they are not
+     * written for the additional cases that can happen with anisotropic
+     * meshes, see the discussion in the introduction to step-30.
+     */
+    allow_anisotropic_smoothing        = 0x10,
+    /**
+     * This algorithm seeks for isolated cells which are refined or flagged
+     * for refinement. This definition is unlike that for
+     * #eliminate_unrefined_islands, which would mean that an island is
+     * defined as a cell which is refined but more of its neighbors are not
+     * refined than are refined. For example, in 2D, a cell's refinement would
+     * be reverted if at most one of its neighbors is also refined (or refined
+     * but flagged for coarsening).
+     *
+     * The reason for the change in definition of an island is, that this
+     * option would be a bit dangerous, since if you consider a chain of
+     * refined cells (e.g. along a kink in the solution), the cells at the two
+     * ends would be coarsened, after which the next outermost cells would
+     * need to be coarsened. Therefore, only one loop of flagging cells like
+     * this could be done to avoid eating up the whole chain of refined cells
+     * (`chain reaction'...).
+     *
+     * This algorithm also takes into account cells which are not actually
+     * refined but are flagged for refinement. If necessary, it takes away the
+     * refinement flag.
+     *
+     * Actually there are two versions of this flag,
+     * #eliminate_refined_inner_islands and
+     * #eliminate_refined_boundary_islands. There first eliminates islands
+     * defined by the definition above which are in the interior of the
+     * domain, while the second eliminates only those islands if the cell is
+     * at the boundary. The reason for this split of flags is that one often
+     * wants to eliminate such islands in the interior while those at the
+     * boundary may well be wanted, for example if one refines the mesh
+     * according to a criterion associated with a boundary integral or if one
+     * has rough boundary data.
+     */
+    eliminate_refined_inner_islands    = 0x100,
+    /**
+     * The result of this flag is very similar to
+     * #eliminate_refined_inner_islands. See the documentation there.
+     */
+    eliminate_refined_boundary_islands = 0x200,
+    /**
+     * This flag prevents the occurrence of unrefined islands. In more detail:
+     * It prohibits the coarsening of a cell if 'most of the neighbors' will
+     * be refined after the step.
+     */
+    do_not_produce_unrefined_islands   = 0x400,
+
+    /**
+     * This flag sums up all smoothing algorithms which may be performed upon
+     * refinement by flagging some more cells for refinement.
+     */
+    smoothing_on_refinement            = (limit_level_difference_at_vertices |
+                                          eliminate_unrefined_islands),
+    /**
+     * This flag sums up all smoothing algorithms which may be performed upon
+     * coarsening by flagging some more cells for coarsening.
+     */
+    smoothing_on_coarsening            = (eliminate_refined_inner_islands |
+                                          eliminate_refined_boundary_islands |
+                                          do_not_produce_unrefined_islands),
+
+    /**
+     * This flag includes all the above ones and therefore combines all
+     * smoothing algorithms implemented with the exception of anisotropic
+     * smoothing.
+     */
+    maximum_smoothing                  = 0xffff ^ allow_anisotropic_smoothing
+  };
+
+  /**
+   * A typedef that is used to to identify cell iterators. The concept of
+   * iterators is discussed at length in the
+   * @ref Iterators "iterators documentation module".
+   *
+   * The current typedef identifies cells in a triangulation. The TriaIterator
+   * class works like a pointer that when you dereference it yields an object
+   * of type CellAccessor. CellAccessor is a class that identifies properties
+   * that are specific to cells in a triangulation, but it is derived (and
+   * consequently inherits) from TriaAccessor that describes what you can ask
+   * of more general objects (lines, faces, as well as cells) in a
+   * triangulation.
+   *
+   * @ingroup Iterators
+   */
+  typedef TriaIterator      <CellAccessor<dim,spacedim>         > cell_iterator;
+
+  /**
+   * A typedef that is used to to identify
+   * @ref GlossActive "active cell iterators".
+   * The concept of iterators is discussed at length in the
+   * @ref Iterators "iterators documentation module".
+   *
+   * The current typedef identifies active cells in a triangulation. The
+   * TriaActiveIterator class works like a pointer to active objects that when
+   * you dereference it yields an object of type CellAccessor. CellAccessor is
+   * a class that identifies properties that are specific to cells in a
+   * triangulation, but it is derived (and consequently inherits) from
+   * TriaAccessor that describes what you can ask of more general objects
+   * (lines, faces, as well as cells) in a triangulation.
+   *
+   * @ingroup Iterators
+   */
+  typedef TriaActiveIterator<CellAccessor<dim,spacedim>         > active_cell_iterator;
+
+  typedef TriaIterator      <TriaAccessor<dim-1, dim, spacedim> > face_iterator;
+  typedef TriaActiveIterator<TriaAccessor<dim-1, dim, spacedim> > active_face_iterator;
+
+  typedef typename IteratorSelector::vertex_iterator        vertex_iterator;
+  typedef typename IteratorSelector::active_vertex_iterator active_vertex_iterator;
+
+  typedef typename IteratorSelector::line_iterator        line_iterator;
+  typedef typename IteratorSelector::active_line_iterator active_line_iterator;
+
+  typedef typename IteratorSelector::quad_iterator        quad_iterator;
+  typedef typename IteratorSelector::active_quad_iterator active_quad_iterator;
+
+  typedef typename IteratorSelector::hex_iterator         hex_iterator;
+  typedef typename IteratorSelector::active_hex_iterator  active_hex_iterator;
+
+  /**
+   * A structure that is used as an exception object by the
+   * create_triangulation() function to indicate which cells among the coarse
+   * mesh cells are inverted or severely distorted (see the entry on
+   * @ref GlossDistorted "distorted cells"
+   * in the glossary).
+   *
+   * Objects of this kind are thrown by the create_triangulation() and
+   * execute_coarsening_and_refinement() functions, and they can be caught in
+   * user code if this condition is to be ignored. Note, however, that such
+   * exceptions are only produced if the necessity for this check was
+   * indicated when calling the constructor of the Triangulation class.
+   *
+   * A cell is called <i>deformed</i> if the determinant of the Jacobian of
+   * the mapping from reference cell to real cell is negative at least at one
+   * vertex. This computation is done using the
+   * GeometryInfo::jacobian_determinants_at_vertices function.
+   */
+  struct DistortedCellList : public dealii::ExceptionBase
+  {
+    /**
+     * Destructor. Empty, but needed for the sake of exception specification,
+     * since the base class has this exception specification and the
+     * automatically generated destructor would have a different one due to
+     * member objects.
+     */
+    virtual ~DistortedCellList () throw();
+
+    /**
+     * A list of those cells among the coarse mesh cells that are deformed or
+     * whose children are deformed.
+     */
+    std::list<typename Triangulation<dim,spacedim>::cell_iterator>
+    distorted_cells;
+  };
+
+
+  /**
+   * Make the dimension available in function templates.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Make the space-dimension available in function templates.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * Create an empty triangulation. Do not create any cells.
+   *
+   * @param smooth_grid Determines the level of smoothness of the mesh size
+   * function that should be enforced upon mesh refinement.
+   *
+   * @param check_for_distorted_cells Determines whether the triangulation
+   * should check whether any of the cells that are created by
+   * create_triangulation() or execute_coarsening_and_refinement() are
+   * distorted (see
+   * @ref GlossDistorted "distorted cells").
+   * If set, these two functions may throw an exception if they encounter
+   * distorted cells.
+   */
+  Triangulation (const MeshSmoothing smooth_grid = none,
+                 const bool check_for_distorted_cells = false);
+
+  /**
+   * Copy constructor.
+   *
+   * You should really use the @p copy_triangulation function, so we declare
+   * this function but let it throw an internal error. The reason for this is
+   * that we may want to use triangulation objects in collections. However,
+   * C++ containers require that the objects stored in them are copyable, so
+   * we need to provide a copy constructor. On the other hand, copying
+   * triangulations is so expensive that we do not want such objects copied by
+   * accident, for example in compiler-generated temporary objects. By
+   * defining a copy constructor but throwing an error, we satisfy the formal
+   * requirements of containers, but at the same time disallow actual copies.
+   * Finally, through the exception, one easily finds the places where code
+   * has to be changed to avoid copies.
+   */
+  Triangulation (const Triangulation<dim, spacedim> &t);
+
+  /**
+   * Delete the object and all levels of the hierarchy.
+   */
+  virtual ~Triangulation ();
+
+  /**
+   * Reset this triangulation into a virgin state by deleting all data.
+   *
+   * Note that this operation is only allowed if no subscriptions to this
+   * object exist any more, such as DoFHandler objects using it.
+   */
+  virtual void clear ();
+
+  /**
+   * Sets the mesh smoothing to @p mesh_smoothing. This overrides the
+   * MeshSmoothing given to the constructor. It is allowed to call this
+   * function only if the triangulation is empty.
+   */
+  virtual void set_mesh_smoothing (const MeshSmoothing mesh_smoothing);
+
+  /**
+   * If @p dim==spacedim, assign a boundary object to a certain part of the
+   * boundary of a the triangulation. If a face with boundary number @p number
+   * is refined, this object is used to find the location of new vertices on
+   * the boundary (see the results section of step-49 for a more in-depth
+   * discussion of this, with examples).  It is also used for non-linear
+   * (i.e.: non-Q1) transformations of cells to the unit cell in shape
+   * function calculations.
+   *
+   * If @p dim!=spacedim the boundary object is in fact the exact manifold
+   * that the triangulation is approximating (for example a circle
+   * approximated by a polygon triangulation). As above, the refinement is
+   * made in such a way that the new points are located on the exact manifold.
+   *
+   * Numbers of boundary objects correspond to material numbers of faces at
+   * the boundary, for instance the material id in a UCD input file. They are
+   * not necessarily consecutive but must be in the range
+   * 0-(types::boundary_id-1).  Material IDs on boundaries are also called
+   * boundary indicators and are accessed with accessor functions of that
+   * name.
+   *
+   * The @p boundary_object is not copied and MUST persist until the
+   * triangulation is destroyed. This is also true for triangulations
+   * generated from this one by @p copy_triangulation.
+   *
+   * It is possible to remove or replace the boundary object during the
+   * lifetime of a non-empty triangulation. Usually, this is done before the
+   * first refinement and is dangerous afterwards. Removal of a boundary
+   * object is done by <tt>set_boundary(number)</tt>, i.e. the function of
+   * same name but only one argument. This operation then replaces the
+   * boundary object given before by a straight boundary approximation.
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  void set_boundary (const types::manifold_id   number,
+                     const Boundary<dim,spacedim> &boundary_object);
+
+
+  /**
+   * Reset those parts of the boundary with the given number to use a straight
+   * boundary approximation. This is the default state of a triangulation, and
+   * undoes assignment of a different boundary object by the function of same
+   * name and two arguments.
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  void set_boundary (const types::manifold_id number);
+
+  /**
+   * Assign a manifold object to a certain part of the the triangulation. If
+   * an object with manifold number @p number is refined, this object is used
+   * to find the location of new vertices (see the results section of step-49
+   * for a more in-depth discussion of this, with examples).  It is also used
+   * for non-linear (i.e.: non-Q1) transformations of cells to the unit cell
+   * in shape function calculations.
+   *
+   * The @p manifold_object is not copied and MUST persist until the
+   * triangulation is destroyed. This is also true for triangulations
+   * generated from this one by @p copy_triangulation.
+   *
+   * It is possible to remove or replace the boundary object during the
+   * lifetime of a non-empty triangulation. Usually, this is done before the
+   * first refinement and is dangerous afterwards. Removal of a manifold
+   * object is done by <tt>set_manifold(number)</tt>, i.e. the function of
+   * same name but only one argument. This operation then replaces the
+   * manifold object given before by a straight manifold approximation.
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void set_manifold (const types::manifold_id   number,
+                     const Manifold<dim,spacedim> &manifold_object);
+
+
+  /**
+   * Reset those parts of the triangulation with the given manifold_id to use
+   * a FlatManifold object. This is the default state of a triangulation, and
+   * undoes assignment of a different Manifold object by the function of same
+   * name and two arguments.
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void set_manifold (const types::manifold_id number);
+
+  /**
+   * Set the manifold_id of all cells and faces to the given argument.
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void set_all_manifold_ids (const types::manifold_id number);
+
+  /**
+   * Set the manifold_id of all boundary faces to the given argument.
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void set_all_manifold_ids_on_boundary (const types::manifold_id number);
+
+  /**
+   * Set the manifold_id of all boundary faces and edges with given
+   * boundary_id @p b_id to the given manifold_id @p number.
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void set_all_manifold_ids_on_boundary (const types::boundary_id b_id,
+                                         const types::manifold_id number);
+
+
+  /**
+   * Return a constant reference to a boundary object used for this
+   * triangulation.  Number is the same as in @p set_boundary
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  const Boundary<dim,spacedim> &get_boundary (const types::manifold_id number) const;
+
+  /**
+   * Return a constant reference to a Manifold object used for this
+   * triangulation.  Number is the same as in @p set_manifold
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  const Manifold<dim,spacedim> &get_manifold (const types::manifold_id number) const;
+
+  /**
+   * Returns a vector containing all boundary indicators assigned to boundary
+   * faces of this Triangulation object. Note, that each boundary indicator is
+   * reported only once. The size of the return vector will represent the
+   * number of different indicators (which is greater or equal one).
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  std::vector<types::boundary_id> get_boundary_ids() const;
+
+  /**
+   * Deprecated spelling of get_boundary_ids().
+   *
+   * @deprecated Use get_boundary_ids() instead.
+   */
+  std::vector<types::boundary_id> get_boundary_indicators() const DEAL_II_DEPRECATED;
+
+  /**
+   * Returns a vector containing all manifold indicators assigned to the
+   * objects of this Triangulation. Note, that each manifold indicator is
+   * reported only once. The size of the return vector will represent the
+   * number of different indicators (which is greater or equal one).
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  std::vector<types::manifold_id> get_manifold_ids() const;
+
+  /**
+   * Copy @p old_tria to this triangulation. This operation is not cheap, so
+   * you should be careful with using this. We do not implement this function
+   * as a copy constructor, since it makes it easier to maintain collections
+   * of triangulations if you can assign them values later on.
+   *
+   * Keep in mind that this function also copies the pointer to the boundary
+   * descriptor previously set by the @p set_boundary function and to the
+   * Manifold object previously set by the set_manifold() function. You must
+   * therefore also guarantee that the boundary and manifold objects have a
+   * lifetime at least as long as the copied triangulation.
+   *
+   * This triangulation must be empty beforehand.
+   *
+   * The function is made @p virtual since some derived classes might want to
+   * disable or extend the functionality of this function.
+   *
+   * @note Calling this function triggers the 'copy' signal on old_tria, i.e.
+   * the triangulation being copied <i>from</i>.  It also triggers the
+   * 'create' signal of the current triangulation. See the section on signals
+   * in the general documentation for more information.
+   *
+   * @note The list of connections to signals is not copied from the old to
+   * the new triangulation since these connections were established to monitor
+   * how the old triangulation changes, not how any triangulation it may be
+   * copied to changes.
+   */
+  virtual void copy_triangulation (const Triangulation<dim, spacedim> &old_tria);
+
+  /**
+   * Create a triangulation from a list of vertices and a list of cells, each
+   * of the latter being a list of <tt>1<<dim</tt> vertex indices. The
+   * triangulation must be empty upon calling this function and the cell list
+   * should be useful (connected domain, etc.).
+   *
+   * Material data for the cells is given within the @p cells array, while
+   * boundary information is given in the @p subcelldata field.
+   *
+   * The numbering of vertices within the @p cells array is subject to some
+   * constraints; see the general class documentation for this.
+   *
+   * For conditions when this function can generate a valid triangulation, see
+   * the documentation of this class, and the GridIn and GridReordering class.
+   *
+   * If the <code>check_for_distorted_cells</code> flag was specified upon
+   * creation of this object, at the very end of its operation, the current
+   * function walks over all cells and verifies that none of the cells is
+   * deformed (see the entry on
+   * @ref GlossDistorted "distorted cells"
+   * in the glossary), where we call a cell deformed if the determinant of the
+   * Jacobian of the mapping from reference cell to real cell is negative at
+   * least at one of the vertices (this computation is done using the
+   * GeometryInfo::jacobian_determinants_at_vertices function). If there are
+   * deformed cells, this function throws an exception of kind
+   * DistortedCellList. Since this happens after all data structures have been
+   * set up, you can catch and ignore this exception if you know what you do
+   * -- for example, it may be that the determinant is zero (indicating that
+   * you have collapsed edges in a cell) but that this is ok because you
+   * didn't intend to integrate on this cell anyway. On the other hand,
+   * deformed cells are often a sign of a mesh that is too coarse to resolve
+   * the geometry of the domain, and in this case ignoring the exception is
+   * probably unwise.
+   *
+   * @note This function is used in step-14 .
+   *
+   * @note This function triggers the create signal after doing its work. See
+   * the section on signals in the general documentation of this class.
+   *
+   * @note The check for distorted cells is only done if dim==spacedim, as
+   * otherwise cells can legitimately be twisted if the manifold they describe
+   * is twisted.
+   */
+  virtual void create_triangulation (const std::vector<Point<spacedim> >    &vertices,
+                                     const std::vector<CellData<dim> > &cells,
+                                     const SubCellData                 &subcelldata);
+
+  /**
+   * For backward compatibility, only. This function takes the cell data in
+   * the ordering as requested by deal.II versions up to 5.2, converts it to
+   * the new (lexicographic) ordering and calls create_triangulation().
+   *
+   * @note This function internally calls create_triangulation and therefore
+   * can throw the same exception as the other function.
+   */
+  virtual void create_triangulation_compatibility (
+    const std::vector<Point<spacedim> >    &vertices,
+    const std::vector<CellData<dim> > &cells,
+    const SubCellData                 &subcelldata);
+
+  /**
+   * Revert or flip the direction_flags of a dim<spacedim triangulation, see
+   * @ref GlossDirectionFlag.
+   *
+   * This function throws an exception if dim equals spacedim.
+   */
+  void flip_all_direction_flags();
+
+  /**
+   * @name Mesh refinement
+   * @{
+   */
+
+  /**
+   * Flag all active cells for refinement.  This will refine all cells of all
+   * levels which are not already refined (i.e. only cells are refined which
+   * do not yet have children). The cells are only flagged, not refined, thus
+   * you have the chance to save the refinement flags.
+   */
+  void set_all_refine_flags ();
+
+  /**
+   * Refine all cells @p times times, by alternatingly calling
+   * set_all_refine_flags and execute_coarsening_and_refinement.
+   *
+   * The latter function may throw an exception if it creates cells that are
+   * distorted (see its documentation for an explanation). This exception will
+   * be propagated through this function if that happens, and you may not get
+   * the actual number of refinement steps in that case.
+   *
+   * @note This function triggers the pre- and post-refinement signals before
+   * and after doing each individual refinement cycle (i.e. more than once if
+   * times > 1) . See the section on signals in the general documentation of
+   * this class.
+   */
+  void refine_global (const unsigned int times = 1);
+
+  /**
+   * Execute both refinement and coarsening of the triangulation.
+   *
+   * The function resets all refinement and coarsening flags to false. It uses
+   * the user flags for internal purposes. They will therefore be overwritten
+   * by undefined content.
+   *
+   * To allow user programs to fix up these cells if that is desired, this
+   * function after completing all other work may throw an exception of type
+   * DistortedCellList that contains a list of those cells that have been
+   * refined and have at least one child that is distorted. The function does
+   * not create such an exception if no cells have created distorted children.
+   * Note that for the check for distorted cells to happen, the
+   * <code>check_for_distorted_cells</code> flag has to be specified upon
+   * creation of a triangulation object.
+   *
+   * See the general docs for more information.
+   *
+   * @note This function triggers the pre- and post-refinement signals before
+   * and after doing its work. See the section on signals in the general
+   * documentation of this class.
+   *
+   * @note If the boundary description is sufficiently irregular, it can
+   * happen that some of the children produced by mesh refinement are
+   * distorted (see the extensive discussion on
+   * @ref GlossDistorted "distorted cells").
+   *
+   * @note This function is <tt>virtual</tt> to allow derived classes to
+   * insert hooks, such as saving refinement flags and the like (see e.g. the
+   * PersistentTriangulation class).
+   */
+  virtual void execute_coarsening_and_refinement ();
+
+  /**
+   * Do both preparation for refinement and coarsening as well as mesh
+   * smoothing.
+   *
+   * Regarding the refinement process it fixes the closure of the refinement
+   * in <tt>dim>=2</tt> (make sure that no two cells are adjacent with a
+   * refinement level differing with more than one), etc.  It performs some
+   * mesh smoothing if the according flag was given to the constructor of this
+   * class.  The function returns whether additional cells have been flagged
+   * for refinement.
+   *
+   * See the general doc of this class for more information on smoothing upon
+   * refinement.
+   *
+   * Regarding the coarsening part, flagging and deflagging cells in
+   * preparation of the actual coarsening step are done. This includes
+   * deleting coarsen flags from cells which may not be deleted (e.g. because
+   * one neighbor is more refined than the cell), doing some smoothing, etc.
+   *
+   * The effect is that only those cells are flagged for coarsening which will
+   * actually be coarsened. This includes the fact that all flagged cells
+   * belong to parent cells of which all children are flagged.
+   *
+   * The function returns whether some cells' flagging has been changed in the
+   * process.
+   *
+   * This function uses the user flags, so store them if you still need them
+   * afterwards.
+   */
+  virtual bool prepare_coarsening_and_refinement ();
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name Keeping up with what happens to a triangulation
+   * @{
+   */
+
+
+  /**
+   * Used to inform functions in derived classes how the cell with the given
+   * cell_iterator is going to change. Note that this may me different than
+   * the refine_flag() and coarsen_flag() in the cell_iterator in parallel
+   * calculations because of refinement constraints that this machine does not
+   * see.
+   */
+  enum CellStatus
+  {
+    /**
+     * The cell will not be refined or coarsened and might or might not move
+     * to a different processor.
+     */
+    CELL_PERSIST,
+    /**
+     * The cell will be or was refined.
+     */
+    CELL_REFINE,
+    /**
+     * The children of this cell will be or were coarsened into this cell.
+     */
+    CELL_COARSEN,
+    /**
+     * Invalid status. Will not occur for the user.
+     */
+    CELL_INVALID
+  };
+
+  /**
+   * A structure used to accumulate the results of the cell_weights slot
+   * functions below. It takes an iterator range and returns the sum of
+   * values.
+   */
+  template<typename T>
+  struct CellWeightSum
+  {
+    typedef T result_type;
+
+    template<typename InputIterator>
+    T operator()(InputIterator first, InputIterator last) const
+    {
+      // If there are no slots to call, just return the
+      // default-constructed value
+      if (first == last)
+        return T();
+
+      T sum = *first++;
+      while (first != last)
+        {
+          sum += *first++;
+        }
+
+      return sum;
+    }
+  };
+
+  /**
+   * A structure that has boost::signal objects for a number of actions that a
+   * triangulation can do to itself. Please refer to the "Getting notice when
+   * a triangulation changes" section in the general documentation of the
+   * Triangulation class for more information and examples.
+   *
+   * For documentation on signals, see
+   * http://www.boost.org/doc/libs/release/libs/signals2 .
+   */
+  struct Signals
+  {
+    /**
+     * This signal is triggered whenever the
+     * Triangulation::create_triangulation or
+     * Triangulation::copy_triangulation() is called. This signal is also
+     * triggered when loading a triangulation from an archive via
+     * Triangulation::load().
+     */
+    boost::signals2::signal<void ()> create;
+
+    /**
+     * This signal is triggered at the beginning of execution of the
+     * Triangulation::execute_coarsening_and_refinement() function (which is
+     * itself called by other functions such as Triangulation::refine_global()
+     * ). At the time this signal is triggered, the triangulation is still
+     * unchanged.
+     */
+    boost::signals2::signal<void ()> pre_refinement;
+
+    /**
+     * This signal is triggered at the end of execution of the
+     * Triangulation::execute_coarsening_and_refinement() function when the
+     * triangulation has reached its final state
+     */
+    boost::signals2::signal<void ()> post_refinement;
+
+    /**
+     * This signal is triggered for each cell that is going to be coarsened.
+     *
+     * @note This signal is triggered with the immediate parent cell of a set
+     * of active cells as argument. The children of this parent cell will
+     * subsequently be coarsened away.
+     */
+    boost::signals2::signal<void (const typename Triangulation<dim, spacedim>::cell_iterator &cell)> pre_coarsening_on_cell;
+
+    /**
+     * This signal is triggered for each cell that just has been refined.
+     *
+     * @note The signal parameter @p cell corresponds to the immediate parent
+     * cell of a set of newly created active cells.
+     */
+    boost::signals2::signal<void (const typename Triangulation<dim, spacedim>::cell_iterator &cell)> post_refinement_on_cell;
+
+    /**
+     * This signal is triggered whenever the triangulation owning the signal
+     * is copied by another triangulation using
+     * Triangulation::copy_triangulation() (i.e. it is triggered on the
+     * <i>old</i> triangulation, but the new one is passed as an argument).
+     */
+    boost::signals2::signal<void (const Triangulation<dim, spacedim> &destination_tria)> copy;
+
+    /**
+     * This signal is triggered whenever the Triangulation::clear() function
+     * is called. This signal is also triggered when loading a triangulation
+     * from an archive via Triangulation::load() as the previous content of
+     * the triangulation is first destroyed.
+     */
+    boost::signals2::signal<void ()> clear;
+
+    /**
+     * This is a catch-all signal that is triggered whenever the create,
+     * post_refinement, or clear signals are triggered. In effect, it can be
+     * used to indicate to an object connected to the signal that the
+     * triangulation has been changed, whatever the exact cause of the change.
+     *
+     * @note The cell-level signals @p pre_coarsening_on_cell and @p
+     * post_refinement_on_cell are not connected to this signal.
+     */
+    boost::signals2::signal<void ()> any_change;
+
+    /**
+     * This signal is triggered for each cell during every automatic or manual
+     * repartitioning. This signal is somewhat special in that it is only
+     * triggered for distributed parallel calculations and only if functions
+     * are connected to it. It is intended to allow a weighted repartitioning
+     * of the domain to balance the computational load across processes in a
+     * different way than balancing the number of cells. Any connected
+     * function is expected to take an iterator to a cell, and a CellStatus
+     * argument that indicates whether this cell is going to be refined,
+     * coarsened or left untouched (see the documentation of the CellStatus
+     * enum for more information). The function is expected to return an
+     * unsigned integer, which is interpreted as the additional computational
+     * load of this cell. If this cell is going to be coarsened, the signal is
+     * called for the parent cell and you need to provide the weight of the
+     * future parent cell. If this cell is going to be refined the function
+     * should return a weight, which will be equally assigned to every future
+     * child cell of the current cell. As a reference a value of 1000 is added
+     * for every cell to the total weight. This means a signal return value of
+     * 1000 (resulting in a weight of 2000) means that it is twice as
+     * expensive for a process to handle this particular cell. If several
+     * functions are connected to this signal, their return values will be
+     * summed to calculate the final weight.
+     */
+    boost::signals2::signal<unsigned int (const cell_iterator &,
+                                          const CellStatus),
+                                                CellWeightSum<unsigned int> > cell_weight;
+  };
+
+  /**
+   * Signals for the various actions that a triangulation can do to itself.
+   */
+  mutable Signals signals;
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name History of a triangulation
+   * @{
+   */
+
+  /**
+   * Save the addresses of the cells which are flagged for refinement to @p
+   * out.  For usage, read the general documentation for this class.
+   */
+  void save_refine_flags (std::ostream &out) const;
+
+  /**
+   * Same as above, but store the flags to a bitvector rather than to a file.
+   */
+  void save_refine_flags (std::vector<bool> &v) const;
+
+  /**
+   * Read the information stored by @p save_refine_flags.
+   */
+  void load_refine_flags (std::istream &in);
+
+  /**
+   * Read the information stored by @p save_refine_flags.
+   */
+  void load_refine_flags (const std::vector<bool> &v);
+
+  /**
+   * Analogue to @p save_refine_flags.
+   */
+  void save_coarsen_flags (std::ostream &out) const;
+
+  /**
+   * Same as above, but store the flags to a bitvector rather than to a file.
+   */
+  void save_coarsen_flags (std::vector<bool> &v) const;
+
+  /**
+   * Analogue to @p load_refine_flags.
+   */
+  void load_coarsen_flags (std::istream &out);
+
+  /**
+   * Analogue to @p load_refine_flags.
+   */
+  void load_coarsen_flags (const std::vector<bool> &v);
+
+  /**
+   * Return whether this triangulation has ever undergone anisotropic (as
+   * opposed to only isotropic) refinement.
+   */
+  bool get_anisotropic_refinement_flag() const;
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name User data
+   * @{
+   */
+
+  /**
+   * Clear all user flags.  See also
+   * @ref GlossUserFlags.
+   */
+  void clear_user_flags ();
+
+  /**
+   * Save all user flags. See the general documentation for this class and the
+   * documentation for the @p save_refine_flags for more details.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags (std::ostream &out) const;
+
+  /**
+   * Same as above, but store the flags to a bitvector rather than to a file.
+   * The output vector is resized if necessary.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags (std::vector<bool> &v) const;
+
+  /**
+   * Read the information stored by @p save_user_flags.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags (std::istream &in);
+
+  /**
+   * Read the information stored by @p save_user_flags.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags (const std::vector<bool> &v);
+
+  /**
+   * Clear all user flags on lines.  See also
+   * @ref GlossUserFlags.
+   */
+  void clear_user_flags_line ();
+
+  /**
+   * Save the user flags on lines.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags_line (std::ostream &out) const;
+
+  /**
+   * Same as above, but store the flags to a bitvector rather than to a file.
+   * The output vector is resized if necessary.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags_line (std::vector<bool> &v) const;
+
+  /**
+   * Load the user flags located on lines.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags_line (std::istream &in);
+
+  /**
+   * Load the user flags located on lines.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags_line (const std::vector<bool> &v);
+
+  /**
+   * Clear all user flags on quads.  See also
+   * @ref GlossUserFlags.
+   */
+  void clear_user_flags_quad ();
+
+  /**
+   * Save the user flags on quads.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags_quad (std::ostream &out) const;
+
+  /**
+   * Same as above, but store the flags to a bitvector rather than to a file.
+   * The output vector is resized if necessary.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags_quad (std::vector<bool> &v) const;
+
+  /**
+   * Load the user flags located on quads.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags_quad (std::istream &in);
+
+  /**
+   * Load the user flags located on quads.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags_quad (const std::vector<bool> &v);
+
+
+  /**
+   * Clear all user flags on quads.  See also
+   * @ref GlossUserFlags.
+   */
+  void clear_user_flags_hex ();
+
+  /**
+   * Save the user flags on hexs.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags_hex (std::ostream &out) const;
+
+  /**
+   * Same as above, but store the flags to a bitvector rather than to a file.
+   * The output vector is resized if necessary.  See also
+   * @ref GlossUserFlags.
+   */
+  void save_user_flags_hex (std::vector<bool> &v) const;
+
+  /**
+   * Load the user flags located on hexs.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags_hex (std::istream &in);
+
+  /**
+   * Load the user flags located on hexs.  See also
+   * @ref GlossUserFlags.
+   */
+  void load_user_flags_hex (const std::vector<bool> &v);
+
+  /**
+   * Clear all user pointers and indices and allow the use of both for next
+   * access.  See also
+   * @ref GlossUserData.
+   */
+  void clear_user_data ();
+
+  /**
+   * Save all user indices. The output vector is resized if necessary. See
+   * also
+   * @ref GlossUserData.
+   */
+  void save_user_indices (std::vector<unsigned int> &v) const;
+
+  /**
+   * Read the information stored by save_user_indices().  See also
+   * @ref GlossUserData.
+   */
+  void load_user_indices (const std::vector<unsigned int> &v);
+
+  /**
+   * Save all user pointers. The output vector is resized if necessary.  See
+   * also
+   * @ref GlossUserData.
+   */
+  void save_user_pointers (std::vector<void *> &v) const;
+
+  /**
+   * Read the information stored by save_user_pointers().  See also
+   * @ref GlossUserData.
+   */
+  void load_user_pointers (const std::vector<void *> &v);
+
+  /**
+   * Save the user indices on lines. The output vector is resized if
+   * necessary.  See also
+   * @ref GlossUserData.
+   */
+  void save_user_indices_line (std::vector<unsigned int> &v) const;
+
+  /**
+   * Load the user indices located on lines.  See also
+   * @ref GlossUserData.
+   */
+  void load_user_indices_line (const std::vector<unsigned int> &v);
+
+  /**
+   * Save the user indices on quads. The output vector is resized if
+   * necessary.  See also
+   * @ref GlossUserData.
+   */
+  void save_user_indices_quad (std::vector<unsigned int> &v) const;
+
+  /**
+   * Load the user indices located on quads.  See also
+   * @ref GlossUserData.
+   */
+  void load_user_indices_quad (const std::vector<unsigned int> &v);
+
+  /**
+   * Save the user indices on hexes. The output vector is resized if
+   * necessary.  See also
+   * @ref GlossUserData.
+   */
+  void save_user_indices_hex (std::vector<unsigned int> &v) const;
+
+  /**
+   * Load the user indices located on hexs.  See also
+   * @ref GlossUserData.
+   */
+  void load_user_indices_hex (const std::vector<unsigned int> &v);
+  /**
+   * Save the user indices on lines. The output vector is resized if
+   * necessary.  See also
+   * @ref GlossUserData.
+   */
+  void save_user_pointers_line (std::vector<void *> &v) const;
+
+  /**
+   * Load the user pointers located on lines.  See also
+   * @ref GlossUserData.
+   */
+  void load_user_pointers_line (const std::vector<void *> &v);
+
+  /**
+   * Save the user pointers on quads. The output vector is resized if
+   * necessary.  See also
+   * @ref GlossUserData.
+   */
+  void save_user_pointers_quad (std::vector<void *> &v) const;
+
+  /**
+   * Load the user pointers located on quads.  See also
+   * @ref GlossUserData.
+   */
+  void load_user_pointers_quad (const std::vector<void *> &v);
+
+  /**
+   * Save the user pointers on hexes. The output vector is resized if
+   * necessary.  See also
+   * @ref GlossUserData.
+   */
+  void save_user_pointers_hex (std::vector<void *> &v) const;
+
+  /**
+   * Load the user pointers located on hexs.  See also
+   * @ref GlossUserData.
+   */
+  void load_user_pointers_hex (const std::vector<void *> &v);
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name Cell iterator functions
+   * @{
+   */
+
+  /**
+   * Iterator to the first used cell on level @p level.
+   */
+  cell_iterator        begin       (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first active cell on level @p level. If the given level
+   * does not contain any active cells (i.e., all cells on this level are
+   * further refined, then this function returns
+   * <code>end_active(level)</code> so that loops of the kind
+   *  @code
+   *    for (cell=tria.begin_active(level); cell!=tria.end_active(level); ++cell)
+   *      ...
+   *  @endcode
+   * have zero iterations, as may be expected if there are no active cells on
+   * this level.
+   */
+  active_cell_iterator begin_active(const unsigned int level = 0) const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states.
+   */
+  cell_iterator        end () const;
+
+  /**
+   * Return an iterator which is the first iterator not on level. If @p level
+   * is the last level, then this returns <tt>end()</tt>.
+   */
+  cell_iterator        end (const unsigned int level) const;
+
+  /**
+   * Return an active iterator which is the first active iterator not on the
+   * given level. If @p level is the last level, then this returns
+   * <tt>end()</tt>.
+   */
+  active_cell_iterator end_active (const unsigned int level) const;
+
+
+  /**
+   * Return an iterator pointing to the last used cell.
+   */
+  cell_iterator        last () const;
+
+  /**
+   * Return an iterator pointing to the last active cell.
+   */
+  active_cell_iterator last_active () const;
+
+  /**
+   * @name Cell iterator functions returning ranges of iterators
+   */
+
+  /**
+   * Return an iterator range that contains all cells (active or not) that
+   * make up this triangulation. Such a range is useful to initialize range-
+   * based for loops as supported by C++11. See the example in the
+   * documentation of active_cell_iterators().
+   *
+   * @return The half open range <code>[this->begin(), this->end())</code>
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<cell_iterator>        cell_iterators () const;
+
+  /**
+   * Return an iterator range that contains all active cells that make up this
+   * triangulation. Such a range is useful to initialize range-based for loops
+   * as supported by C++11, see also
+   * @ref CPP11 "C++11 standard".
+   *
+   * Range-based for loops are useful in that they require much less code than
+   * traditional loops (see <a href="http://en.wikipedia.org/wiki/C%2B%2B11
+   * #Range-based_for_loop">here</a> for a discussion of how they work). An
+   * example is that without range-based for loops, one often writes code such
+   * as the following (assuming for a moment that our goal is setting the user
+   * flag on every active cell):
+   * @code
+   *   Triangulation<dim> triangulation;
+   *   ...
+   *   typename Triangulation<dim>::active_cell_iterator
+   *     cell = triangulation.begin_active(),
+   *     endc = triangulation.end();
+   *   for (; cell!=endc; ++cell)
+   *     cell->set_user_flag();
+   * @endcode
+   * Using C++11's range-based for loops, this is now entirely equivalent to
+   * the following:
+   * @code
+   *   Triangulation<dim> triangulation;
+   *   ...
+   *   for (auto cell : triangulation.active_cell_iterators())
+   *     cell->set_user_flag();
+   * @endcode
+   * To use this feature, you need a compiler that supports C++11.
+   *
+   * @return The half open range <code>[this->begin_active(),
+   * this->end())</code>
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<active_cell_iterator> active_cell_iterators () const;
+
+  /**
+   * Return an iterator range that contains all cells (active or not) that
+   * make up the given level of this triangulation. Such a range is useful to
+   * initialize range-based for loops as supported by C++11. See the example
+   * in the documentation of active_cell_iterators().
+   *
+   * @param[in] level A given level in the refinement hierarchy of this
+   * triangulation.
+   * @return The half open range <code>[this->begin(level),
+   * this->end(level))</code>
+   *
+   * @pre level must be less than this->n_levels().
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<cell_iterator>        cell_iterators_on_level (const unsigned int level) const;
+
+  /**
+   * Return an iterator range that contains all active cells that make up the
+   * given level of this triangulation. Such a range is useful to initialize
+   * range-based for loops as supported by C++11. See the example in the
+   * documentation of active_cell_iterators().
+   *
+   * @param[in] level A given level in the refinement hierarchy of this
+   * triangulation.
+   * @return The half open range <code>[this->begin_active(level),
+   * this->end(level))</code>
+   *
+   * @pre level must be less than this->n_levels().
+   *
+   * @ingroup CPP11
+   */
+  IteratorRange<active_cell_iterator> active_cell_iterators_on_level (const unsigned int level) const;
+
+  /*
+   * @}
+   */
+
+  /*---------------------------------------*/
+  /*---------------------------------------*/
+
+  /**
+   * @name Face iterator functions
+   * @{
+   */
+
+  /**
+   * Iterator to the first used face.
+   */
+  face_iterator        begin_face       () const;
+
+  /**
+   * Iterator to the first active face.
+   */
+  active_face_iterator begin_active_face() const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states.
+   */
+  face_iterator        end_face () const;
+
+  /*
+   * @}
+   */
+
+  /*---------------------------------------*/
+  /*---------------------------------------*/
+
+  /**
+   * @name Vertex iterator functions
+   * @{
+   */
+
+  /**
+   * Iterator to the first used vertex. This function can only be used if dim
+   * is not one.
+   */
+  vertex_iterator        begin_vertex() const;
+
+  /**
+   * Iterator to the first active vertex. Because all vertices are active,
+   * begin_vertex() and begin_active_vertex() return the same vertex. This
+   * function can only be used if dim is not one.
+   */
+  active_vertex_iterator begin_active_vertex() const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states. This function can only
+   * be used if dim is not one.
+   */
+  vertex_iterator        end_vertex() const;
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name Information about the triangulation
+   * @{
+   */
+
+  /**
+   * In the following, most functions are provided in two versions, with and
+   * without an argument describing the level. The versions with this argument
+   * are only applicable for objects describing the cells of the present
+   * triangulation. For example: in 2D <tt>n_lines(level)</tt> cannot be
+   * called, only <tt>n_lines()</tt>, as lines are faces in 2D and therefore
+   * have no level.
+   */
+
+  /**
+   * Return the total number of used lines, active or not.
+   */
+  unsigned int n_lines () const;
+
+  /**
+   * Return the total number of used lines, active or not on level @p level.
+   */
+  unsigned int n_lines (const unsigned int level) const;
+
+  /**
+   * Return the total number of active lines.
+   */
+  unsigned int n_active_lines () const;
+
+  /**
+   * Return the total number of active lines, on level @p level.
+   */
+  unsigned int n_active_lines (const unsigned int level) const;
+
+  /**
+   * Return the total number of used quads, active or not.
+   */
+  unsigned int n_quads () const;
+
+  /**
+   * Return the total number of used quads, active or not on level @p level.
+   */
+  unsigned int n_quads (const unsigned int level) const;
+
+  /**
+   * Return the total number of active quads, active or not.
+   */
+  unsigned int n_active_quads () const;
+
+  /**
+   * Return the total number of active quads, active or not on level @p level.
+   */
+  unsigned int n_active_quads (const unsigned int level) const;
+
+  /**
+   * Return the total number of used hexahedra, active or not.
+   */
+  unsigned int n_hexs() const;
+
+  /**
+   * Return the total number of used hexahedra, active or not on level @p
+   * level.
+   */
+  unsigned int n_hexs(const unsigned int level) const;
+
+  /**
+   * Return the total number of active hexahedra, active or not.
+   */
+  unsigned int n_active_hexs() const;
+
+  /**
+   * Return the total number of active hexahedra, active or not on level @p
+   * level.
+   */
+  unsigned int n_active_hexs(const unsigned int level) const;
+
+  /**
+   * Return the total number of used cells, active or not.  Maps to
+   * <tt>n_lines()</tt> in one space dimension and so on.
+   */
+  unsigned int n_cells () const;
+
+  /**
+   * Return the total number of used cells, active or not, on level @p level.
+   * Maps to <tt>n_lines(level)</tt> in one space dimension and so on.
+   */
+  unsigned int n_cells (const unsigned int level) const;
+
+  /**
+   * Return the total number of active cells. Maps to
+   * <tt>n_active_lines()</tt> in one space dimension and so on.
+   */
+  unsigned int n_active_cells () const;
+
+  /**
+   * Return the total number of active cells. For the current class, this is
+   * the same as n_active_cells(). However, the function may be overloaded in
+   * derived classes (e.g., in parallel::distributed::Triangulation) where it
+   * may return a value greater than the number of active cells reported by
+   * the triangulation object on the current processor.
+   */
+  virtual types::global_dof_index n_global_active_cells () const;
+
+
+  /**
+   * Return the total number of active cells on level @p level.  Maps to
+   * <tt>n_active_lines(level)</tt> in one space dimension and so on.
+   */
+  unsigned int n_active_cells (const unsigned int level) const;
+
+  /**
+   * Return the total number of used faces, active or not.  In 2D, the result
+   * equals n_lines(), while in 3D it equals n_quads(). Since there are no
+   * face objects in 1d, the function returns zero in 1d.
+   */
+  unsigned int n_faces () const;
+
+  /**
+   * Return the total number of active faces, active or not.  In 2D, the
+   * result equals n_active_lines(), while in 3D it equals n_active_quads().
+   * Since there are no face objects in 1d, the function returns zero in 1d.
+   */
+  unsigned int n_active_faces () const;
+
+  /**
+   * Return the number of levels in this triangulation.
+   *
+   * @note Internally, triangulations store data in levels, and there may be
+   * more levels in this data structure than one may think -- for example,
+   * imagine a triangulation that we just got by coarsening the highest level
+   * so that it was completely depopulated. That level is not removed, since
+   * it will most likely be repopulated soon by the next refinement process.
+   * As a consequence, if you happened to run through raw cell iterators
+   * (which you can't do as a user of this class, but can internally), then
+   * the number of objects in the levels hierarchy is larger than the level of
+   * the most refined cell plus one. On the other hand, since this is rarely
+   * what a user of this class cares about, the function really just returns
+   * the level of the most refined active cell plus one. (The plus one is
+   * because in a coarse, unrefined mesh, all cells have level zero -- making
+   * the number of levels equal to one.)
+   */
+  unsigned int n_levels () const;
+
+  /**
+   * Return the number of levels in use. This function is equivalent to
+   * n_levels() for a serial Triangulation, but gives the maximum of
+   * n_levels() over all processors for a parallel::distributed::Triangulation
+   * and therefore can be larger than n_levels().
+   */
+  virtual
+  unsigned int n_global_levels () const;
+
+  /**
+   * Return true if the triangulation has hanging nodes.
+   *
+   * The function is made virtual since the result can be interpreted in
+   * different ways, depending on whether the triangulation lives only on a
+   * single processor, or may be distributed as done in the
+   * parallel::distributed::Triangulation class (see there for a description
+   * of what the function is supposed to do in the parallel context).
+   */
+  virtual
+  bool has_hanging_nodes() const;
+
+  /**
+   * Return the total number of vertices.  Some of them may not be used, which
+   * usually happens upon coarsening of a triangulation when some vertices are
+   * discarded, but we do not want to renumber the remaining ones, leading to
+   * holes in the numbers of used vertices.  You can get the number of used
+   * vertices using @p n_used_vertices function.
+   */
+  unsigned int n_vertices () const;
+
+  /**
+   * Return a constant reference to all the vertices present in this
+   * triangulation. Note that not necessarily all vertices in this array are
+   * actually used; for example, if you coarsen a mesh, then some vertices are
+   * deleted, but their positions in this array are unchanged as the indices
+   * of vertices are only allocated once. You can find out about which
+   * vertices are actually used by the function get_used_vertices().
+   */
+  const std::vector<Point<spacedim> > &
+  get_vertices () const;
+
+  /**
+   * Return the number of vertices that are presently in use, i.e. belong to
+   * at least one used element.
+   */
+  unsigned int n_used_vertices () const;
+
+  /**
+   * Return @p true if the vertex with this @p index is used.
+   */
+  bool vertex_used (const unsigned int index) const;
+
+  /**
+   * Return a constant reference to the array of @p bools indicating whether
+   * an entry in the vertex array is used or not.
+   */
+  const std::vector<bool> &
+  get_used_vertices () const;
+
+  /**
+   * Return the maximum number of cells meeting at a common vertex. Since this
+   * number is an invariant under refinement, only the cells on the coarsest
+   * level are considered. The operation is thus reasonably fast. The
+   * invariance is only true for sufficiently many cells in the coarsest
+   * triangulation (e.g. for a single cell one would be returned), so a
+   * minimum of four is returned in two dimensions, 8 in three dimensions,
+   * etc, which is how many cells meet if the triangulation is refined.
+   *
+   * In one space dimension, two is returned.
+   */
+  unsigned int max_adjacent_cells () const;
+
+  /**
+   * This function always returns @p invalid_subdomain_id but is there for
+   * compatibility with the derived @p parallel::distributed::Triangulation
+   * class. For distributed parallel triangulations this function returns the
+   * subdomain id of those cells that are owned by the current processor.
+   */
+  virtual types::subdomain_id locally_owned_subdomain () const;
+
+  /**
+   * Return a reference to the current object.
+   *
+   * This doesn't seem to be very useful but allows to write code that can
+   * access the underlying triangulation for anything that satisfies the
+   * @ref ConceptMeshType "MeshType concept"
+   * (which may not only be a triangulation, but also a DoFHandler, for
+   * example).
+   */
+  Triangulation<dim,spacedim> &
+  get_triangulation ();
+
+  /**
+   * Return a reference to the current object. This is the const-version of
+   * the previous function.
+   */
+  const Triangulation<dim,spacedim> &
+  get_triangulation () const;
+
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name Internal information about the number of objects
+   * @{
+   */
+
+  /**
+   * Total number of lines, used or unused.
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_lines () const;
+
+  /**
+   * Number of lines, used or unused, on the given level.
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_lines (const unsigned int level) const;
+
+  /**
+   * Total number of quads, used or unused.
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_quads () const;
+
+  /**
+   * Number of quads, used or unused, on the given level.
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_quads (const unsigned int level) const;
+
+  /**
+   * Total number of hexs, used or unused.
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_hexs () const;
+
+  /**
+   * Number of hexs, used or unused, on the given level.
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_hexs (const unsigned int level) const;
+
+  /**
+   * Number of cells, used or unused, on the given level.
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_cells (const unsigned int level) const;
+
+  /**
+   * Return the total number of faces, used or not. In 2d, the result equals
+   * n_raw_lines(), while in 3d it equals n_raw_quads().
+   *
+   * @note This function really exports internal information about the
+   * triangulation. It shouldn't be used in applications. The function is only
+   * part of the public interface of this class because it is used in some of
+   * the other classes that build very closely on it (in particular, the
+   * DoFHandler class).
+   */
+  unsigned int n_raw_faces () const;
+
+  /*
+   * @}
+   */
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * This function is made virtual, since a triangulation object might be
+   * accessed through a pointer to this base class, even if the actual object
+   * is a derived class.
+   */
+  virtual std::size_t memory_consumption () const;
+
+  /**
+   * Write the data of this object to a stream for the purpose of
+   * serialization.
+   *
+   * @note This function does not save <i>all</i> member variables of the
+   * current triangulation. Rather, only certain kinds of information are
+   * stored. For more information see the general documentation of this class.
+   */
+  template <class Archive>
+  void save (Archive &ar,
+             const unsigned int version) const;
+
+  /**
+   * Read the data of this object from a stream for the purpose of
+   * serialization. Throw away the previous content.
+   *
+   * @note This function does not reset <i>all</i> member variables of the
+   * current triangulation to the ones of the triangulation that was
+   * previously stored to an archive. Rather, only certain kinds of
+   * information are loaded. For more information see the general
+   * documentation of this class.
+   *
+   * @note This function calls the Triangulation::clear() function and
+   * consequently triggers the "clear" signal. After loading all data from the
+   * archive, it then triggers the "create" signal. For more information on
+   * signals, see the general documentation of this class.
+   */
+  template <class Archive>
+  void load (Archive &ar,
+             const unsigned int version);
+
+  BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+  /**
+   * @name Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcInvalidLevel,
+                  int,
+                  << "The given level " << arg1
+                  << " is not in the valid range!");
+  /**
+   * The function raising this exception can only operate on an empty
+   * Triangulation, i.e., a Triangulation without grid cells.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException2 (ExcTriangulationNotEmpty,
+                  int, int,
+                  << "You are trying to perform an operation on a triangulation "
+                  << "that is only allowed if the triangulation is currently empty. "
+                  << "However, it currently stores " << arg1 << " vertices and has "
+                  << "cells on " << arg2 << " levels.");
+  /**
+   * Trying to re-read a grid, an error occurred.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcGridReadError);
+  /**
+   * Exception
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFacesHaveNoLevel);
+  /**
+   * The triangulation level you accessed is empty.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcEmptyLevel,
+                  int,
+                  << "You tried to do something on level " << arg1
+                  << ", but this level is empty.");
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcNonOrientableTriangulation);
+
+  /**
+   * Exception
+   *
+   * Requested boundary_id not found
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcBoundaryIdNotFound,
+                  types::boundary_id,
+                  << "The given boundary_id " << arg1
+                  << " is not defined in this Triangulation!");
+
+  /*
+   * @}
+   */
+
+protected:
+  /**
+   * Do some smoothing in the process of refining the triangulation. See the
+   * general doc of this class for more information about this.
+   */
+  MeshSmoothing                    smooth_grid;
+
+  /**
+   * Write a bool vector to the given stream, writing a pre- and a postfix
+   * magic number. The vector is written in an almost binary format, i.e. the
+   * bool flags are packed but the data is written as ASCII text.
+   *
+   * The flags are stored in a binary format: for each @p true, a @p 1 bit is
+   * stored, a @p 0 bit otherwise.  The bits are stored as <tt>unsigned
+   * char</tt>, thus avoiding endianess. They are written to @p out in plain
+   * text, thus amounting to 3.6 bits in the output per bits in the input on
+   * the average. Other information (magic numbers and number of elements of
+   * the input vector) is stored as plain text as well. The format should
+   * therefore be interplatform compatible.
+   */
+  static void write_bool_vector (const unsigned int       magic_number1,
+                                 const std::vector<bool> &v,
+                                 const unsigned int       magic_number2,
+                                 std::ostream            &out);
+
+  /**
+   * Re-read a vector of bools previously written by @p write_bool_vector and
+   * compare with the magic numbers.
+   */
+  static void read_bool_vector (const unsigned int       magic_number1,
+                                std::vector<bool>       &v,
+                                const unsigned int       magic_number2,
+                                std::istream            &in);
+
+private:
+  /**
+   * @name Cell iterator functions for internal use
+   * @{
+   */
+
+  /**
+   * Declare a number of iterator types for raw iterators, i.e., iterators
+   * that also iterate over holes in the list of cells left by cells that have
+   * been coarsened away in previous mesh refinement cycles.
+   *
+   * Since users should never have to access these internal properties of how
+   * we store data, these iterator types are made private.
+   */
+  typedef TriaRawIterator   <CellAccessor<dim,spacedim>         > raw_cell_iterator;
+  typedef TriaRawIterator   <TriaAccessor<dim-1, dim, spacedim> > raw_face_iterator;
+  typedef typename IteratorSelector::raw_vertex_iterator          raw_vertex_iterator;
+  typedef typename IteratorSelector::raw_line_iterator            raw_line_iterator;
+  typedef typename IteratorSelector::raw_quad_iterator            raw_quad_iterator;
+  typedef typename IteratorSelector::raw_hex_iterator             raw_hex_iterator;
+
+  /**
+   * Iterator to the first cell, used or not, on level @p level. If a level
+   * has no cells, a past-the-end iterator is returned.
+   */
+  raw_cell_iterator    begin_raw   (const unsigned int level = 0) const;
+
+  /**
+   * Return a raw iterator which is the first iterator not on level. If @p
+   * level is the last level, then this returns <tt>end()</tt>.
+   */
+  raw_cell_iterator    end_raw (const unsigned int level) const;
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name Line iterator functions for internal use
+   * @{
+   */
+
+  /**
+   * Iterator to the first line, used or not, on level @p level. If a level
+   * has no lines, a past-the-end iterator is returned.  If lines are no
+   * cells, i.e. for @p dim>1 no @p level argument must be given.  The same
+   * applies for all the other functions above, of course.
+   */
+  raw_line_iterator
+  begin_raw_line   (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first used line on level @p level.
+   */
+  line_iterator
+  begin_line       (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first active line on level @p level.
+   */
+  active_line_iterator
+  begin_active_line(const unsigned int level = 0) const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states.
+   */
+  line_iterator        end_line () const;
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name Quad iterator functions for internal use
+   * @{
+   */
+
+  /**
+   * Iterator to the first quad, used or not, on the given level. If a level
+   * has no quads, a past-the-end iterator is returned.  If quads are no
+   * cells, i.e. for $dim>2$ no level argument must be given.
+   */
+  raw_quad_iterator
+  begin_raw_quad   (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first used quad on level @p level.
+   */
+  quad_iterator
+  begin_quad       (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first active quad on level @p level.
+   */
+  active_quad_iterator
+  begin_active_quad (const unsigned int level = 0) const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states.
+   */
+  quad_iterator
+  end_quad () const;
+
+  /*
+   * @}
+   */
+
+  /**
+   * @name Hex iterator functions for internal use
+   * @{
+   */
+
+  /**
+   * Iterator to the first hex, used or not, on level @p level. If a level has
+   * no hexs, a past-the-end iterator is returned.
+   */
+  raw_hex_iterator
+  begin_raw_hex   (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first used hex on level @p level.
+   */
+  hex_iterator
+  begin_hex       (const unsigned int level = 0) const;
+
+  /**
+   * Iterator to the first active hex on level @p level.
+   */
+  active_hex_iterator
+  begin_active_hex (const unsigned int level = 0) const;
+
+  /**
+   * Iterator past the end; this iterator serves for comparisons of iterators
+   * with past-the-end or before-the-beginning states.
+   */
+  hex_iterator
+  end_hex () const;
+
+  /*
+   * @}
+   */
+
+
+  /**
+   * The (public) function clear() will only work when the triangulation is
+   * not subscribed to by other users. The clear_despite_subscriptions()
+   * function now allows the triangulation being cleared even when there are
+   * subscriptions.
+   *
+   * Make sure, you know what you do, when calling this function, as its use
+   * is reasonable in very rare cases, only. For example, when the
+   * subscriptions were for the initially empty Triangulation and the
+   * Triangulation object wants to release its memory before throwing an
+   * assertion due to input errors (e.g. in the create_triangulation()
+   * function).
+   */
+  void clear_despite_subscriptions ();
+
+  /**
+   * For all cells, set the active cell indices so that active cells know the
+   * how many-th active cell they are, and all other cells have an invalid
+   * value. This function is called after mesh creation, refinement, and
+   * serialization.
+   */
+  void reset_active_cell_indices ();
+
+  /**
+   * Refine all cells on all levels which were previously flagged for
+   * refinement.
+   *
+   * Note, that this function uses the <tt>line->user_flags</tt> for
+   * <tt>dim=2,3</tt> and the <tt>quad->user_flags</tt> for <tt>dim=3</tt>.
+   *
+   * The function returns a list of cells that have produced children that
+   * satisfy the criteria of
+   * @ref GlossDistorted "distorted cells"
+   * if the <code>check_for_distorted_cells</code> flag was specified upon
+   * creation of this object, at
+   */
+  DistortedCellList execute_refinement ();
+
+  /**
+   * Coarsen all cells which were flagged for coarsening, or rather: delete
+   * all children of those cells of which all child cells are flagged for
+   * coarsening and several other constraints hold (see the general doc of
+   * this class).
+   */
+  void execute_coarsening ();
+
+  /**
+   * Make sure that either all or none of the children of a cell are tagged
+   * for coarsening.
+   */
+  void fix_coarsen_flags ();
+
+  /**
+   * Array of pointers pointing to the objects storing the cell data on the
+   * different levels.
+   */
+  std::vector<dealii::internal::Triangulation::TriaLevel<dim>*> levels;
+
+  /**
+   * Pointer to the faces of the triangulation. In 1d this contains nothing,
+   * in 2D it contains data concerning lines and in 3D quads and lines.  All
+   * of these have no level and are therefore treated separately.
+   */
+  dealii::internal::Triangulation::TriaFaces<dim> *faces;
+
+
+  /**
+   * Array of the vertices of this triangulation.
+   */
+  std::vector<Point<spacedim> >              vertices;
+
+  /**
+   * Array storing a bit-pattern which vertices are used.
+   */
+  std::vector<bool>                     vertices_used;
+
+  /**
+   * Collection of manifold objects. We store only objects, which are not of
+   * type FlatManifold.
+   */
+  std::map<types::manifold_id, SmartPointer<const Manifold<dim,spacedim> , Triangulation<dim, spacedim> > >  manifold;
+
+
+  /**
+   * Flag indicating whether anisotropic refinement took place.
+   */
+  bool                             anisotropic_refinement;
+
+
+  /**
+   * A flag that determines whether we are to check for distorted cells upon
+   * creation and refinement of a mesh.
+   */
+  const bool check_for_distorted_cells;
+
+  /**
+   * Cache to hold the numbers of lines, quads, hexes, etc. These numbers are
+   * set at the end of the refinement and coarsening functions and enable
+   * faster access later on. In the old days, whenever one wanted to access
+   * one of these numbers, one had to perform a loop over all lines, e.g., and
+   * count the elements until we hit the end iterator. This is time consuming
+   * and since access to the number of lines etc is a rather frequent
+   * operation, this was not an optimal solution.
+   */
+  dealii::internal::Triangulation::NumberCache<dim> number_cache;
+
+  /**
+   * A map that relates the number of a boundary vertex to the boundary
+   * indicator. This field is only used in 1d. We have this field because we
+   * store boundary indicator information with faces in 2d and higher where we
+   * have space in the structures that store data for faces, but in 1d there
+   * is no such space for faces.
+   *
+   * The field is declared as a pointer for a rather mundane reason: all other
+   * fields of this class that can be modified by the TriaAccessor hierarchy
+   * are pointers, and so these accessor classes store a const pointer to the
+   * triangulation. We could no longer do so for TriaAccessor<0,1,spacedim> if
+   * this field (that can be modified by TriaAccessor::set_boundary_id) were
+   * not a pointer.
+   */
+  std::map<unsigned int, types::boundary_id> *vertex_to_boundary_id_map_1d;
+
+
+  /**
+   * A map that relates the number of a boundary vertex to the manifold
+   * indicator. This field is only used in 1d. We have this field because we
+   * store manifold indicator information with faces in 2d and higher where we
+   * have space in the structures that store data for faces, but in 1d there
+   * is no such space for faces.
+   *
+   * @note Manifold objects are pretty useless for points since they are
+   * neither refined nor are their interiors mapped. We nevertheless allow
+   * storing manifold ids for points to be consistent in dimension-independent
+   * programs.
+   *
+   * The field is declared as a pointer for a rather mundane reason: all other
+   * fields of this class that can be modified by the TriaAccessor hierarchy
+   * are pointers, and so these accessor classes store a const pointer to the
+   * triangulation. We could no longer do so for TriaAccessor<0,1,spacedim> if
+   * this field (that can be modified by TriaAccessor::set_boundary_id) were
+   * not a pointer.
+   */
+  std::map<unsigned int, types::manifold_id> *vertex_to_manifold_id_map_1d;
+
+  // make a couple of classes friends
+  template <int,int,int> friend class TriaAccessorBase;
+  template <int,int,int> friend class TriaAccessor;
+  friend class TriaAccessor<0, 1, spacedim>;
+
+  friend class CellAccessor<dim, spacedim>;
+
+  friend struct dealii::internal::TriaAccessor::Implementation;
+
+  friend class hp::DoFHandler<dim,spacedim>;
+
+  friend struct dealii::internal::Triangulation::Implementation;
+
+  template <typename>
+  friend class dealii::internal::Triangulation::TriaObjects;
+};
+
+
+#ifndef DOXYGEN
+
+
+template <int structdim>
+inline
+CellData<structdim>::CellData ()
+{
+  for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+    vertices[i] = numbers::invalid_unsigned_int;
+
+  material_id = 0;
+
+  // And the manifold to be invalid
+  manifold_id = numbers::invalid_manifold_id;
+}
+
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    template <class Archive>
+    void NumberCache<1>::serialize (Archive &ar,
+                                    const unsigned int)
+    {
+      ar &n_levels;
+      ar &n_lines &n_lines_level;
+      ar &n_active_lines &n_active_lines_level;
+    }
+
+
+    template <class Archive>
+    void NumberCache<2>::serialize (Archive &ar,
+                                    const unsigned int version)
+    {
+      this->NumberCache<1>::serialize (ar, version);
+
+      ar &n_quads &n_quads_level;
+      ar &n_active_quads &n_active_quads_level;
+    }
+
+
+    template <class Archive>
+    void NumberCache<3>::serialize (Archive &ar,
+                                    const unsigned int version)
+    {
+      this->NumberCache<2>::serialize (ar, version);
+
+      ar &n_hexes &n_hexes_level;
+      ar &n_active_hexes &n_active_hexes_level;
+    }
+
+  }
+}
+
+
+template <int dim, int spacedim>
+inline
+bool
+Triangulation<dim,spacedim>::vertex_used(const unsigned int index) const
+{
+  Assert (index < vertices_used.size(),
+          ExcIndexRange(index, 0, vertices_used.size()));
+  return vertices_used[index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int Triangulation<dim, spacedim>::n_levels () const
+{
+  return number_cache.n_levels;
+}
+
+template <int dim, int spacedim>
+inline
+unsigned int Triangulation<dim, spacedim>::n_global_levels () const
+{
+  return number_cache.n_levels;
+}
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+Triangulation<dim, spacedim>::n_vertices () const
+{
+  return vertices.size();
+}
+
+
+
+template <int dim, int spacedim>
+inline
+const std::vector<Point<spacedim> > &
+Triangulation<dim, spacedim>::get_vertices () const
+{
+  return vertices;
+}
+
+
+template <int dim, int spacedim>
+template <class Archive>
+void
+Triangulation<dim,spacedim>::save (Archive &ar,
+                                   const unsigned int) const
+{
+  // as discussed in the documentation, do not store the signals as
+  // well as boundary and manifold description but everything else
+  ar &smooth_grid;
+  ar &levels;
+  ar &faces;
+  ar &vertices;
+  ar &vertices_used;
+
+  ar &anisotropic_refinement;
+  ar &number_cache;
+
+  ar &check_for_distorted_cells;
+
+  if (dim == 1)
+    {
+      ar &vertex_to_boundary_id_map_1d;
+      ar &vertex_to_manifold_id_map_1d;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class Archive>
+void
+Triangulation<dim,spacedim>::load (Archive &ar,
+                                   const unsigned int)
+{
+  // clear previous content. this also calls the respective signal
+  clear ();
+
+  // as discussed in the documentation, do not store the signals as
+  // well as boundary and manifold description but everything else
+  ar &smooth_grid;
+  ar &levels;
+  ar &faces;
+  ar &vertices;
+  ar &vertices_used;
+
+  ar &anisotropic_refinement;
+  ar &number_cache;
+
+  // the levels do not serialize the active_cell_indices because
+  // they are easy enough to rebuild upon re-loading data. do
+  // this here. don't forget to first resize the fields appropriately
+  {
+    for (unsigned int l=0; l<levels.size(); ++l)
+      levels[l]->active_cell_indices.resize (levels[l]->refine_flags.size());
+    reset_active_cell_indices ();
+  }
+
+
+  bool my_check_for_distorted_cells;
+  ar &my_check_for_distorted_cells;
+
+  Assert (my_check_for_distorted_cells == check_for_distorted_cells,
+          ExcMessage ("The triangulation loaded into here must have the "
+                      "same setting with regard to reporting distorted "
+                      "cell as the one previously stored."));
+
+  if (dim == 1)
+    {
+      ar &vertex_to_boundary_id_map_1d;
+      ar &vertex_to_manifold_id_map_1d;
+    }
+
+  // trigger the create signal to indicate
+  // that new content has been imported into
+  // the triangulation
+  signals.create();
+}
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+template <> unsigned int Triangulation<1,1>::n_raw_lines (const unsigned int level) const;
+template <> unsigned int Triangulation<1,1>::n_quads () const;
+template <> unsigned int Triangulation<1,1>::n_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,1>::n_raw_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<2,2>::n_raw_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,1>::n_raw_hexs (const unsigned int level) const;
+template <> unsigned int Triangulation<1,1>::n_active_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,1>::n_active_quads () const;
+template <> unsigned int Triangulation<1,1>::max_adjacent_cells () const;
+
+
+// -------------------------------------------------------------------
+// -- Explicit specializations for codimension one grids
+
+
+template <> unsigned int Triangulation<1,2>::n_raw_lines (const unsigned int level) const;
+template <> unsigned int Triangulation<1,2>::n_quads () const;
+template <> unsigned int Triangulation<1,2>::n_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,2>::n_raw_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<2,3>::n_raw_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,2>::n_raw_hexs (const unsigned int level) const;
+template <> unsigned int Triangulation<1,2>::n_active_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,2>::n_active_quads () const;
+template <> unsigned int Triangulation<1,2>::max_adjacent_cells () const;
+
+// -------------------------------------------------------------------
+// -- Explicit specializations for codimension two grids
+
+
+template <> unsigned int Triangulation<1,3>::n_raw_lines (const unsigned int level) const;
+template <> unsigned int Triangulation<1,3>::n_quads () const;
+template <> unsigned int Triangulation<1,3>::n_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,3>::n_raw_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<2,3>::n_raw_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,3>::n_raw_hexs (const unsigned int level) const;
+template <> unsigned int Triangulation<1,3>::n_active_quads (const unsigned int level) const;
+template <> unsigned int Triangulation<1,3>::n_active_quads () const;
+template <> unsigned int Triangulation<1,3>::max_adjacent_cells () const;
+
+
+// -------------------------------------------------------------------
+// Explicit invalid things...
+template <>
+const Manifold<2,1> &Triangulation<2, 1>::get_manifold(const types::manifold_id) const;
+template <>
+const Manifold<3,1> &Triangulation<3, 1>::get_manifold(const types::manifold_id) const;
+template <>
+const Manifold<3,2> &Triangulation<3, 2>::get_manifold(const types::manifold_id) const;
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+// Include tria_accessor.h here, so that it is possible for an end
+// user to use the iterators of Triangulation<dim> directly without
+// the need to include tria_accessor.h separately. (Otherwise the
+// iterators are an 'opaque' or 'incomplete' type.)
+#include <deal.II/grid/tria_accessor.h>
+
+#endif
diff --git a/include/deal.II/grid/tria_accessor.h b/include/deal.II/grid/tria_accessor.h
new file mode 100644
index 0000000..3800008
--- /dev/null
+++ b/include/deal.II/grid/tria_accessor.h
@@ -0,0 +1,3191 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_accessor_h
+#define dealii__tria_accessor_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/point.h>
+#include <deal.II/grid/tria_iterator_base.h>
+#include <deal.II/grid/tria_iterator_selector.h>
+#include <deal.II/grid/cell_id.h>
+
+#include <utility>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class Triangulation;
+template <typename Accessor> class TriaRawIterator;
+template <typename Accessor> class TriaIterator;
+template <typename Accessor> class TriaActiveIterator;
+
+template <int dim, int spacedim> class Boundary;
+template <int dim, int spacedim> class Manifold;
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    template <int dim> class TriaObject;
+    template <typename G> class TriaObjects;
+    struct Implementation;
+  }
+
+  namespace TriaAccessor
+  {
+    struct Implementation;
+
+    /**
+     * Implementation of a type with which to store the level of an accessor
+     * object. We only need it for the case that <tt>structdim == dim</tt>.
+     * Otherwise, an empty object is sufficient.
+     */
+    template <int structdim, int dim> struct PresentLevelType
+    {
+      struct type
+      {
+        /**
+         * Default constructor.
+         */
+        type ()
+        {}
+
+        /**
+         * Dummy constructor. Only level zero is allowed.
+         */
+        type (const int level)
+        {
+          Assert (level == 0, ExcInternalError());
+          (void)level; // removes -Wunused-parameter warning in optimized mode
+        }
+
+        /**
+         * Dummy conversion operator. Returns level zero.
+         */
+        operator int () const
+        {
+          return 0;
+        }
+
+        void operator ++ () const
+        {
+          Assert (false, ExcInternalError());
+        }
+
+        void operator -- () const
+        {
+          Assert (false, ExcInternalError());
+        }
+      };
+    };
+
+
+    /**
+     * Implementation of a type with which to store the level of an accessor
+     * object. We only need it for the case that <tt>structdim == dim</tt>.
+     * Otherwise, an empty object is sufficient.
+     */
+    template <int dim> struct PresentLevelType<dim,dim>
+    {
+      typedef int type;
+    };
+
+  }
+}
+template <int structdim, int dim, int spacedim> class TriaAccessor;
+template <int dim, int spacedim>                class TriaAccessor<0, dim, spacedim>;
+template <int spacedim>                         class TriaAccessor<0, 1, spacedim>;
+
+// note: the file tria_accessor.templates.h is included at the end of
+// this file.  this includes a lot of templates. originally, this was
+// only done in debug mode, but led to cyclic reduction problems and
+// so is now on by default.
+
+
+/**
+ * A namespace that contains exception classes used by the accessor classes.
+ */
+namespace TriaAccessorExceptions
+{
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCellNotUsed);
+  /**
+   * The cell is not an
+   * @ref GlossActive "active"
+   * cell, but it already has children. Some operations, like setting
+   * refinement flags or accessing degrees of freedom are only possible on
+   * active cells.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCellNotActive);
+  /**
+   * Trying to access the children of a cell which is in fact active.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCellHasNoChildren);
+  /**
+   * Trying to access the parent of a cell which is in the coarsest level of
+   * the triangulation.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCellHasNoParent);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcUnusedCellAsChild);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcCantSetChildren,
+                  int,
+                  << "You can only set the child index if the cell has no "
+                  << "children, or clear it. The given "
+                  << "index was " << arg1 << " (-1 means: clear children)");
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcUnusedCellAsNeighbor);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcUncaughtCase);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcDereferenceInvalidObject);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCantCompareIterators);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcNeighborIsCoarser);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcNeighborIsNotCoarser);
+  /**
+   * You are trying to access the level of a face, but faces have no inherent
+   * level. The level of a face can only be determined by the level of an
+   * adjacent face, which in turn implies that a face can have several levels.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcFacesHaveNoLevel);
+//TODO: Write documentation!
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcSetOnlyEvenChildren,
+                  int,
+                  << "You can only set the child index of an even numbered child."
+                  << "The number of the child given was " << arg1 << ".");
+}
+
+
+/**
+ * A base class for the accessor classes used by TriaRawIterator and derived
+ * classes.
+ *
+ * This class offers only the basic functionality required by the iterators
+ * (stores the necessary data members, offers comparison operators and the
+ * like), but has no functionality to actually dereference data. This is done
+ * in the derived classes.
+ *
+ * In the implementation, the behavior of this class differs between the cases
+ * where <tt>structdim==dim</tt> (cells of a mesh) and
+ * <tt>structdim<dim</tt> (faces and edges). For the latter, #present_level
+ * is always equal to zero and the constructors may not receive a positive
+ * value there. For cells, any level is possible, but only those within the
+ * range of the levels of the Triangulation are reasonable. Furthermore, the
+ * function objects() returns either the container with all cells on the same
+ * level or the container with all objects of this dimension
+ * (<tt>structdim<dim</tt>).
+ *
+ * Some internals of this class are discussed in
+ * @ref IteratorAccessorInternals.
+ *
+ * @ingroup grid
+ * @ingroup Accessors
+ * @author Wolfgang Bangerth, Guido Kanschat, 1998, 2010
+ */
+template <int structdim, int dim, int spacedim=dim>
+class TriaAccessorBase
+{
+public:
+  /**
+   * Dimension of the space the object represented by this accessor lives in.
+   * For example, if this accessor represents a quad that is part of a two-
+   * dimensional surface in four-dimensional space, then this value is four.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * Dimensionality of the object that the thing represented by this accessor
+   * is part of. For example, if this accessor represents a line that is part
+   * of a hexahedron, then this value will be three.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Dimensionality of the current object represented by this accessor. For
+   * example, if it is line (irrespective of whether it is part of a quad or
+   * hex, and what dimension we are in), then this value equals 1.
+   */
+  static const unsigned int structure_dimension = structdim;
+
+protected:
+  /**
+   * Declare the data type that this accessor class expects to get passed from
+   * the iterator classes. Since the pure triangulation iterators need no
+   * additional data, this data type is @p void.
+   */
+  typedef void AccessorData;
+
+  /**
+   * Constructor. Protected, thus only callable from friend classes.
+   */
+  TriaAccessorBase (const Triangulation<dim,spacedim> *parent =  0,
+                    const int                 level  = -1,
+                    const int                 index  = -1,
+                    const AccessorData             * =  0);
+
+  /**
+   * Copy constructor. Creates an object with exactly the same data.
+   */
+  TriaAccessorBase (const TriaAccessorBase &);
+
+  /**
+   * Copy operator. Since this is only called from iterators, do not return
+   * anything, since the iterator will return itself.
+   *
+   * This method is protected, since it is only to be called from the iterator
+   * class.
+   */
+  void copy_from (const TriaAccessorBase &);
+
+  /**
+   * Copy operator. Creates an object with exactly the same data.
+   */
+  TriaAccessorBase &operator = (const TriaAccessorBase &);
+
+  /**
+   * Ordering of accessors. If #structure_dimension is less than #dimension,
+   * we simply compare the index of such an object. If #structure_dimension
+   * equals #dimension, we compare the level() first, and the index() only if
+   * levels are equal.
+   */
+  bool operator < (const TriaAccessorBase &other) const;
+
+protected:
+  /**
+   * Copy operator. This is normally used in a context like <tt>iterator a,b;
+   * *a=*b;</tt>. Since the meaning is to copy the object pointed to by @p b
+   * to the object pointed to by @p a and since accessors are not real but
+   * virtual objects, this operation is not useful for iterators on
+   * triangulations. We declare this function here private, thus it may not be
+   * used from outside. Furthermore it is not implemented and will give a
+   * linker error if used anyway.
+   */
+  void operator = (const TriaAccessorBase *);
+
+  /**
+   * Compare for equality.
+   */
+  bool operator == (const TriaAccessorBase &) const;
+
+  /**
+   * Compare for inequality.
+   */
+  bool operator != (const TriaAccessorBase &) const;
+
+  /**
+   * @name Advancement of iterators
+   */
+  /**
+   * @{
+   */
+  /**
+   * This operator advances the iterator to the next element.
+   *
+   * For @p dim=1 only: The next element is next on this level if there are
+   * more. If the present element is the last on this level, the first on the
+   * next level is accessed.
+   */
+  void operator ++ ();
+
+  /**
+   * This operator moves the iterator to the previous element.
+   *
+   * For @p dim=1 only: The previous element is previous on this level if
+   * <tt>index>0</tt>. If the present element is the first on this level, the
+   * last on the previous level is accessed.
+   */
+  void operator -- ();
+  /**
+   * @}
+   */
+
+  /**
+   * Access to the other objects of a Triangulation with same dimension.
+   */
+  dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<structdim> > &
+  objects () const;
+
+public:
+  /**
+   * Data type to be used for passing parameters from iterators to the
+   * accessor classes in a unified way, no matter what the type of number of
+   * these parameters is.
+   */
+  typedef void *LocalData;
+
+  /**
+   * @name Iterator address and state
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * For cells, this function returns the level within the mesh hierarchy at
+   * which this cell is located. For all other objects, the function returns
+   * zero.
+   *
+   * @note Within a Triangulation object, cells are uniquely identified by a
+   * pair <code>(level, index)</code> where the former is the cell's
+   * refinement level and the latter is the index of the cell within this
+   * refinement level (the former being what this function returns).
+   * Consequently, there may be multiple cells on different refinement levels
+   * but with the same index within their level. Contrary to this, if the
+   * current object corresponds to a face or edge, then the object is uniquely
+   * identified solely by its index as faces and edges do not have a
+   * refinement level. For these objects, the current function always returns
+   * zero as the level.
+   */
+  int level () const;
+
+  /**
+   * Return the index of the element presently pointed to on the present
+   * level.
+   *
+   * Within a Triangulation object, cells are uniquely identified by a pair
+   * <code>(level, index)</code> where the former is the cell's refinement
+   * level and the latter is the index of the cell within this refinement
+   * level (the latter being what this function returns). Consequently, there
+   * may be multiple cells on different refinement levels but with the same
+   * index within their level. Contrary to this, if the current object
+   * corresponds to a face or edge, then the object is uniquely identified
+   * solely by its index as faces and edges do not have a refinement level.
+   *
+   * @note The indices objects returned by this function are not a contiguous
+   * set of numbers on each level: going from cell to cell, some of the
+   * indices in a level may be unused.
+   *
+   * @note If the triangulation is actually of type
+   * parallel::distributed::Triangulation then the indices are relatively only
+   * to that part of the distributed triangulation that is stored on the
+   * current processor. In other words, cells living in the partitions of the
+   * triangulation stored on different processors may have the same index even
+   * if they refer to the same cell, and the may have different indices even
+   * if they do refer to the same cell (e.g., if a cell is owned by one
+   * processor but is a ghost cell on another).
+   */
+  int index () const;
+
+  /**
+   * Return the state of the iterator.  For the different states an accessor
+   * can be in, refer to the TriaRawIterator documentation.
+   */
+  IteratorState::IteratorStates state () const;
+
+  /**
+   * Return a pointer to the triangulation which the object pointed to by this
+   * class belongs to.
+   */
+  const Triangulation<dim,spacedim> &get_triangulation () const;
+
+  /**
+   * @}
+   */
+protected:
+  /**
+   * The level if this is a cell (<tt>structdim==dim</tt>). Else, contains
+   * zero.
+   */
+  typename dealii::internal::TriaAccessor::PresentLevelType<structdim,dim>::type present_level;
+
+  /**
+   * Used to store the index of the element presently pointed to on the level
+   * presently used.
+   */
+  int present_index;
+
+  /**
+   * Pointer to the triangulation which we act on.
+   */
+  const Triangulation<dim,spacedim> *tria;
+
+private:
+
+  template <typename Accessor> friend class TriaRawIterator;
+  template <typename Accessor> friend class TriaIterator;
+  template <typename Accessor> friend class TriaActiveIterator;
+};
+
+
+
+/**
+ * A class that represents accessor objects to iterators that don't make sense
+ * such as quad iterators in on 1d meshes.  This class can not be used to
+ * create objects (it will in fact throw an exception if this should ever be
+ * attempted but it sometimes allows code to be written in a simpler way in a
+ * dimension independent way. For example, it allows to write code that works
+ * on quad iterators that is dimension independent because quad iterators
+ * (with the current class) exist and are syntactically correct. You can not
+ * expect, however, to ever generate one of these iterators, meaning you need
+ * to expect to wrap the code block in which you use quad iterators into
+ * something like <code>if (dim@>1)</code> -- which makes eminent sense
+ * anyway.
+ *
+ * This class provides the minimal interface necessary for Accessor classes to
+ * interact with Iterator classes. However, this is only for syntactic
+ * correctness, none of the functions do anything but generate errors.
+ *
+ * @ingroup Accessors
+ * @author Wolfgang Bangerth, 2008
+ */
+template <int structdim, int dim, int spacedim=dim>
+class InvalidAccessor :  public TriaAccessorBase<structdim,dim,spacedim>
+{
+public:
+  /**
+   * Propagate typedef from base class to this class.
+   */
+  typedef typename TriaAccessorBase<structdim,dim,spacedim>::AccessorData AccessorData;
+
+  /**
+   * Constructor.  This class is used for iterators that make sense in a given
+   * dimension, for example quads for 1d meshes. Consequently, while the
+   * creation of such objects is syntactically valid, they make no semantic
+   * sense, and we generate an exception when such an object is actually
+   * generated.
+   */
+  InvalidAccessor (const Triangulation<dim,spacedim> *parent     =  0,
+                   const int                 level      = -1,
+                   const int                 index      = -1,
+                   const AccessorData       *local_data =  0);
+
+  /**
+   * Copy constructor.  This class is used for iterators that make sense in a
+   * given dimension, for example quads for 1d meshes. Consequently, while the
+   * creation of such objects is syntactically valid, they make no semantic
+   * sense, and we generate an exception when such an object is actually
+   * generated.
+   */
+  InvalidAccessor (const InvalidAccessor &);
+
+  /**
+   * Conversion from other accessors to the current invalid one. This of
+   * course also leads to a run-time error.
+   */
+  template <typename OtherAccessor>
+  InvalidAccessor (const OtherAccessor &);
+
+  /**
+   * Dummy copy operation.
+   */
+  void copy_from (const InvalidAccessor &);
+
+  /**
+   * Dummy comparison operators.
+   */
+  bool operator == (const InvalidAccessor &) const;
+  bool operator != (const InvalidAccessor &) const;
+
+  /**
+   * Dummy operators to make things compile. Does nothing.
+   */
+  void operator ++ () const;
+  void operator -- () const;
+
+  /**
+   * Dummy function representing whether the accessor points to a used or an
+   * unused object.
+   */
+  bool used () const;
+
+  /**
+   * Dummy function representing whether the accessor points to an object that
+   * has children.
+   */
+  bool has_children () const;
+};
+
+
+
+/**
+ * A class that provides access to objects in a triangulation such as its
+ * vertices, sub-objects, children, geometric information, etc. This class
+ * represents objects of dimension <code>structdim</code> (i.e. 1 for lines, 2
+ * for quads, 3 for hexes) in a triangulation of dimensionality
+ * <code>dim</code> (i.e. 1 for a triangulation of lines, 2 for a
+ * triangulation of quads, and 3 for a triangulation of hexes) that is
+ * embedded in a space of dimensionality <code>spacedim</code> (for
+ * <code>spacedim==dim</code> the triangulation represents a domain in
+ * $R^{dim}$, for <code>spacedim@>dim</code> the triangulation is of a
+ * manifold embedded in a higher dimensional space).
+ *
+ * @ingroup Accessors
+ * @author Wolfgang Bangerth and others, 1998, 2000, 2008
+ */
+template <int structdim, int dim, int spacedim>
+class TriaAccessor : public TriaAccessorBase<structdim, dim, spacedim>
+{
+public:
+  /**
+   * Propagate typedef from base class to this class.
+   */
+  typedef
+  typename TriaAccessorBase<structdim,dim,spacedim>::AccessorData
+  AccessorData;
+
+  /**
+   * Constructor.
+   */
+  TriaAccessor (const Triangulation<dim,spacedim> *parent     =  0,
+                const int                 level      = -1,
+                const int                 index      = -1,
+                const AccessorData       *local_data =  0);
+
+  /**
+   * Conversion constructor. This constructor exists to make certain
+   * constructs simpler to write in dimension independent code. For example,
+   * it allows assigning a face iterator to a line iterator, an operation that
+   * is useful in 2d but doesn't make any sense in 3d. The constructor here
+   * exists for the purpose of making the code conform to C++ but it will
+   * unconditionally abort; in other words, assigning a face iterator to a
+   * line iterator is better put into an if-statement that checks that the
+   * dimension is two, and assign to a quad iterator in 3d (an operator that,
+   * without this constructor would be illegal if we happen to compile for
+   * 2d).
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  TriaAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Another conversion operator between objects that don't make sense, just
+   * like the previous one.
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  TriaAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Test for the element being used or not.  The return value is @p true for
+   * all iterators that are either normal iterators or active iterators, only
+   * raw iterators can return @p false. Since raw iterators are only used in
+   * the interiors of the library, you will not usually need this function.
+   */
+  bool used () const;
+
+  /**
+   * @name Accessing sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Pointer to the @p ith vertex bounding this object. Throw an exception if
+   * <code>dim=1</code>.
+   */
+  typename dealii::internal::Triangulation::Iterators<dim,spacedim>::vertex_iterator
+  vertex_iterator (const unsigned int i) const;
+
+  /**
+   * Return the global index of i-th vertex of the current object. The
+   * convention regarding the numbering of vertices is laid down in the
+   * documentation of the GeometryInfo class.
+   *
+   * Note that the returned value is only the index of the geometrical vertex.
+   * It has nothing to do with possible degrees of freedom associated with it.
+   * For this, see the @p DoFAccessor::vertex_dof_index functions.
+   *
+   * @note Despite the name, the index returned here is only global in the
+   * sense that it is specific to a particular Triangulation object or, in the
+   * case the triangulation is actually of type
+   * parallel::distributed::Triangulation, specific to that part of the
+   * distributed triangulation stored on the current processor.
+   */
+  unsigned int vertex_index (const unsigned int i) const;
+
+  /**
+   * Return a reference to the @p ith vertex. The reference is not const,
+   * i.e., it is possible to call this function on the left hand side of an
+   * assignment, thereby moving the vertex of a cell within the triangulation.
+   * Of course, doing so requires that you ensure that the new location of the
+   * vertex remains useful -- for example, avoiding inverted or otherwise
+   * distorted (see also
+   * @ref GlossDistorted "this glossary entry").
+   *
+   * @note When a cell is refined, its children inherit the position of the
+   * vertex positions of those vertices they share with the mother cell (plus
+   * the locations of the new vertices on edges, faces, and cell interiors
+   * that are created for the new child cells). If the vertex of a cell is
+   * moved, this implies that its children will also use these new locations.
+   * On the other hand, imagine a 2d situation where you have one cell that is
+   * refined (with four children) and then you move the central vertex
+   * connecting all four children. If you coarsen these four children again to
+   * the mother cell, then the location of the moved vertex is lost and if, in
+   * a later step, you refine the mother cell again, the then again new vertex
+   * will be placed again at the same position as the first time around --
+   * i.e., not at the location you had previously moved it to.
+   *
+   * @note The behavior described above is relevant if you have a
+   * parallel::distributed::Triangulation object. There, refining a mesh
+   * always involves a re-partitioning. In other words, vertices of locally
+   * owned cells (see
+   * @ref GlossLocallyOwnedCell "this glossary entry")
+   * that you may have moved to a different location on one processor may be
+   * moved to a different processor upon mesh refinement (even if these
+   * particular cells were not refined) which will re-create their position
+   * based on the position of the coarse cells they previously had, not based
+   * on the position these vertices had on the processor that previously owned
+   * them. In other words, in parallel computations, you will probably have to
+   * move nodes explicitly after every mesh refinement because vertex
+   * positions may or may not be preserved across the re-partitioning that
+   * accompanies mesh refinement.
+   */
+  Point<spacedim> &vertex (const unsigned int i) const;
+
+  /**
+   * Pointer to the @p ith line bounding this object.
+   */
+  typename dealii::internal::Triangulation::Iterators<dim,spacedim>::line_iterator
+  line (const unsigned int i) const;
+
+  /**
+   * Line index of the @p ith line bounding this object.
+   *
+   * Implemented only for <tt>structdim>1</tt>, otherwise an exception
+   * generated.
+   */
+  unsigned int line_index (const unsigned int i) const;
+
+  /**
+   * Pointer to the @p ith quad bounding this object.
+   */
+  typename dealii::internal::Triangulation::Iterators<dim,spacedim>::quad_iterator
+  quad (const unsigned int i) const;
+
+  /**
+   * Quad index of the @p ith quad bounding this object.
+   *
+   * Implemented only for <tt>structdim>2</tt>, otherwise an exception
+   * generated.
+   */
+  unsigned int quad_index (const unsigned int i) const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Orientation of sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return whether the face with index @p face has its normal pointing in the
+   * standard direction (@p true) or whether it is the opposite (@p false).
+   * Which is the standard direction is documented with the GeometryInfo
+   * class. In 1d and 2d, this is always @p true, but in 3d it may be
+   * different, see the respective discussion in the documentation of the
+   * GeometryInfo class.
+   *
+   * This function is really only for internal use in the library unless you
+   * absolutely know what this is all about.
+   */
+  bool face_orientation (const unsigned int face) const;
+
+  /**
+   * Return whether the face with index @p face is rotated by 180 degrees (@p
+   * true) or or not (@p false). In 1d and 2d, this is always @p false, but in
+   * 3d it may be different, see the respective discussion in the
+   * documentation of the GeometryInfo class.
+   *
+   * This function is really only for internal use in the library unless you
+   * absolutely know what this is all about.
+   */
+  bool face_flip (const unsigned int face) const;
+
+  /**
+   * Return whether the face with index @p face is rotated by 90 degrees (@p
+   * true) or or not (@p false). In 1d and 2d, this is always @p false, but in
+   * 3d it may be different, see the respective discussion in the
+   * documentation of the GeometryInfo class.
+   *
+   * This function is really only for internal use in the library unless you
+   * absolutely know what this is all about.
+   */
+  bool face_rotation (const unsigned int face) const;
+
+  /**
+   * Return whether the line with index @p line is oriented in standard
+   * direction. @p true indicates, that the line is oriented from vertex 0 to
+   * vertex 1, whereas it is the other way around otherwise. In 1d and 2d,
+   * this is always @p true, but in 3d it may be different, see the respective
+   * discussion in the documentation of the GeometryInfo class.
+   *
+   * This function is really only for internal use in the library unless you
+   * absolutely know what this is all about.
+   */
+  bool line_orientation (const unsigned int line) const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing children
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Test whether the object has children.
+   */
+  bool has_children () const;
+
+  /**
+   * Return the number of immediate children of this object. The number of
+   * children of an unrefined cell is zero.
+   */
+  unsigned int n_children() const;
+
+  /**
+   * Compute and return the number of active descendants of this objects. For
+   * example, if all of the eight children of a hex are further refined
+   * isotropically exactly once, the returned number will be 64, not 80.
+   *
+   * If the present cell is not refined, one is returned.
+   *
+   * If one considers a triangulation as a forest where the root of each tree
+   * are the coarse mesh cells and nodes have descendants (the children of a
+   * cell), then this function returns the number of terminal nodes in the
+   * sub-tree originating from the current object; consequently, if the
+   * current object is not further refined, the answer is one.
+   */
+  unsigned int number_of_children () const;
+
+  /**
+   * Return the number of times that this object is refined. Note that not all
+   * its children are refined that often (which is why we prepend @p max_),
+   * the returned number is rather the maximum number of refinement in any
+   * branch of children of this object.
+   *
+   * For example, if this object is refined, and one of its children is
+   * refined exactly one more time, then <tt>max_refinement_depth</tt> should
+   * return 2.
+   *
+   * If this object is not refined (i.e. it is active), then the return value
+   * is zero.
+   */
+  unsigned int max_refinement_depth () const;
+
+  /**
+   * Return an iterator to the @p ith child.
+   */
+  TriaIterator<TriaAccessor<structdim,dim,spacedim> >
+  child (const unsigned int i) const;
+
+  /**
+   * Return an iterator to that object that is identical to the ith child for
+   * isotropic refinement. If the current object is refined isotropically,
+   * then the returned object is the ith child. If the current object is
+   * refined anisotropically, the returned child may in fact be a grandchild
+   * of the object, or may not exist at all (in which case an exception is
+   * generated).
+   */
+  TriaIterator<TriaAccessor<structdim,dim,spacedim> >
+  isotropic_child (const unsigned int i) const;
+
+  /**
+   * Return the RefinementCase of this cell.
+   */
+  RefinementCase<structdim> refinement_case () const;
+
+  /**
+   * Index of the @p ith child. The level of the child is one higher than that
+   * of the present cell, if the children of a cell are accessed. The children
+   * of faces have no level. If the child does not exist, -1 is returned.
+   */
+  int child_index (const unsigned int i) const;
+
+  /**
+   * Index of the @p ith isotropic child. See the isotropic_child() function
+   * for a definition of this concept.  If the child does not exist, -1 is
+   * returned.
+   */
+  int isotropic_child_index (const unsigned int i) const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with boundary indicators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the boundary indicator of this object.
+   *
+   * If the return value is the special value
+   * numbers::internal_face_boundary_id, then this object is in the interior
+   * of the domain.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  types::boundary_id boundary_id () const;
+
+  /**
+   * Return the boundary indicator of this object.
+   *
+   * @deprecated This spelling of the function name is deprecated. Use
+   * boundary_id() instead.
+   */
+  types::boundary_id boundary_indicator () const DEAL_II_DEPRECATED;
+
+  /**
+   * Set the boundary indicator of the current object. The same applies as for
+   * the boundary_id() function.
+   *
+   * This function only sets the boundary object of the current object itself,
+   * not the indicators of the ones that bound it. For example, in 3d, if this
+   * function is called on a face, then the boundary indicator of the 4 edges
+   * that bound the face remain unchanged. If you want to set the boundary
+   * indicators of face and edges at the same time, use the
+   * set_all_boundary_ids() function. You can see the result of not using the
+   * correct function in the results section of step-49.
+   *
+   * @warning You should never set the boundary indicator of an interior face
+   * (a face not at the boundary of the domain), or set set the boundary
+   * indicator of an exterior face to numbers::internal_face_boundary_id (this
+   * value is reserved for another purpose). Algorithms may not work or
+   * produce very confusing results if boundary cells have a boundary
+   * indicator of numbers::internal_face_boundary_id or if interior cells have
+   * boundary indicators other than numbers::internal_face_boundary_id.
+   * Unfortunately, the current object has no means of finding out whether it
+   * really is at the boundary of the domain and so cannot determine whether
+   * the value you are trying to set makes sense under the current
+   * circumstances.
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  void set_boundary_id (const types::boundary_id) const;
+
+  /**
+   * Set the boundary indicator of this object.
+   *
+   * @deprecated This spelling of the function name is deprecated. Use
+   * set_boundary_id() instead.
+   */
+  void set_boundary_indicator (const types::boundary_id) const DEAL_II_DEPRECATED;
+
+  /**
+   * Do as set_boundary_id() but also set the boundary indicators of the
+   * objects that bound the current object. For example, in 3d, if
+   * set_boundary_id() is called on a face, then the boundary indicator of the
+   * 4 edges that bound the face remain unchanged. In contrast, if you call
+   * the current function, the boundary indicators of face and edges are all
+   * set to the given value.
+   *
+   * This function is useful if you set boundary indicators of faces in 3d (in
+   * 2d, the function does the same as set_boundary_id()) and you do so
+   * because you want a curved boundary object to represent the part of the
+   * boundary that corresponds to the current face. In that case, the
+   * Triangulation class needs to figure out where to put new vertices upon
+   * mesh refinement, and higher order Mapping objects also need to figure out
+   * where new interpolation points for a curved boundary approximation should
+   * be. In either case, the two classes first determine where interpolation
+   * points on the edges of a boundary face should be, asking the boundary
+   * object, before asking the boundary object for the interpolation points
+   * corresponding to the interior of the boundary face. For this to work
+   * properly, it is not sufficient to have set the boundary indicator for the
+   * face alone, but you also need to set the boundary indicators of the edges
+   * that bound the face. This function does all of this at once. You can see
+   * the result of not using the correct function in the results section of
+   * step-49.
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  void set_all_boundary_ids (const types::boundary_id) const;
+
+  /**
+   * Set the boundary indicator of this object and all that bound it.
+   *
+   * @deprecated This spelling of the function name is deprecated. Use
+   * set_all_boundary_ids() instead.
+   */
+  void set_all_boundary_indicators (const types::boundary_id) const DEAL_II_DEPRECATED;
+
+  /**
+   * Return whether this object is at the boundary. Obviously, the use of this
+   * function is only possible for <tt>dim@>structdim</tt>; however, for
+   * <tt>dim==structdim</tt>, an object is a cell and the CellAccessor class
+   * offers another possibility to determine whether a cell is at the boundary
+   * or not.
+   */
+  bool at_boundary () const;
+
+  /**
+   * Return a constant reference to the manifold object used for this object.
+   * This function exists for backward compatibility and calls get_manifold()
+   * internally.
+   */
+  const Boundary<dim,spacedim> &get_boundary () const;
+
+  /**
+   * Return a constant reference to the manifold object used for this object.
+   *
+   * As explained in
+   * @ref boundary "Boundary and manifold description for triangulations",
+   * the process involved in finding the appropriate manifold description
+   * involves querying both the manifold or boundary indicators. See there for
+   * more information.
+   */
+  const Manifold<dim,spacedim> &get_manifold () const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with manifold indicators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the manifold indicator of this object.
+   *
+   * If the return value is the special value numbers::flat_manifold_id, then
+   * this object is associated with a standard Cartesian Manifold Description.
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  types::manifold_id manifold_id () const;
+
+  /**
+   * Set the manifold indicator.  The same applies as for the
+   * <tt>manifold_id()</tt> function.
+   *
+   * Note that it only sets the manifold object of the current object itself,
+   * not the indicators of the ones that bound it, nor of its children. For
+   * example, in 3d, if this function is called on a face, then the manifold
+   * indicator of the 4 edges that bound the face remain unchanged. If you
+   * want to set the manifold indicators of face, edges and all children at
+   * the same time, use the set_all_manifold_ids() function.
+   *
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void set_manifold_id (const types::manifold_id) const;
+
+  /**
+   * Do as set_manifold_id() but also set the manifold indicators of the
+   * objects that bound the current object. For example, in 3d, if
+   * set_manifold_id() is called on a face, then the manifold indicator of the
+   * 4 edges that bound the face remain unchanged. On the other hand, the
+   * manifold indicators of face and edges are all set at the same time using
+   * the current function.
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void set_all_manifold_ids (const types::manifold_id) const;
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * @name User data
+   */
+  /**
+   * @{
+   */
+  /**
+   * Read the user flag. See
+   * @ref GlossUserFlags
+   * for more information.
+   */
+  bool user_flag_set () const;
+
+  /**
+   * Set the user flag. See
+   * @ref GlossUserFlags
+   * for more information.
+   */
+  void set_user_flag () const;
+
+  /**
+   * Clear the user flag. See
+   * @ref GlossUserFlags
+   * for more information.
+   */
+  void clear_user_flag () const;
+
+  /**
+   * Set the user flag for this and all descendants. See
+   * @ref GlossUserFlags
+   * for more information.
+   */
+  void recursively_set_user_flag () const;
+
+  /**
+   * Clear the user flag for this and all descendants. See
+   * @ref GlossUserFlags
+   * for more information.
+   */
+  void recursively_clear_user_flag () const;
+
+  /**
+   * Reset the user data to zero, independent if pointer or index. See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void clear_user_data () const;
+
+  /**
+   * Set the user pointer to @p p.
+   *
+   * @note User pointers and user indices are mutually exclusive. Therefore,
+   * you can only use one of them, unless you call
+   * Triangulation::clear_user_data() in between.
+   *
+   * See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void set_user_pointer (void *p) const;
+
+  /**
+   * Reset the user pointer to a @p NULL pointer. See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void clear_user_pointer () const;
+
+  /**
+   * Access the value of the user pointer. It is in the responsibility of the
+   * user to make sure that the pointer points to something useful. You should
+   * use the new style cast operator to maintain a minimum of type safety,
+   * e.g.
+   *
+   * @note User pointers and user indices are mutually exclusive. Therefore,
+   * you can only use one of them, unless you call
+   * Triangulation::clear_user_data() in between. <tt>A
+   * *a=static_cast<A*>(cell->user_pointer());</tt>.
+   *
+   * See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void *user_pointer () const;
+
+  /**
+   * Set the user pointer of this object and all its children to the given
+   * value. This is useful for example if all cells of a certain subdomain, or
+   * all faces of a certain part of the boundary should have user pointers
+   * pointing to objects describing this part of the domain or boundary.
+   *
+   * Note that the user pointer is not inherited under mesh refinement, so
+   * after mesh refinement there might be cells or faces that don't have user
+   * pointers pointing to the describing object. In this case, simply loop
+   * over all the elements of the coarsest level that has this information,
+   * and use this function to recursively set the user pointer of all finer
+   * levels of the triangulation.
+   *
+   * @note User pointers and user indices are mutually exclusive. Therefore,
+   * you can only use one of them, unless you call
+   * Triangulation::clear_user_data() in between.
+   *
+   * See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void recursively_set_user_pointer (void *p) const;
+
+  /**
+   * Clear the user pointer of this object and all of its descendants. The
+   * same holds as said for the recursively_set_user_pointer() function. See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void recursively_clear_user_pointer () const;
+
+  /**
+   * Set the user index to @p p.
+   *
+   * @note User pointers and user indices are mutually exclusive. Therefore,
+   * you can only use one of them, unless you call
+   * Triangulation::clear_user_data() in between. See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void set_user_index (const unsigned int p) const;
+
+  /**
+   * Reset the user index to 0. See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void clear_user_index () const;
+
+  /**
+   * Access the value of the user index.
+   *
+   * @note User pointers and user indices are mutually exclusive. Therefore,
+   * you can only use one of them, unless you call
+   * Triangulation::clear_user_data() in between.
+   *
+   * See
+   * @ref GlossUserData
+   * for more information.
+   */
+  unsigned int user_index () const;
+
+  /**
+   * Set the user index of this object and all its children.
+   *
+   * Note that the user index is not inherited under mesh refinement, so after
+   * mesh refinement there might be cells or faces that don't have the
+   * expected user indices. In this case, simply loop over all the elements of
+   * the coarsest level that has this information, and use this function to
+   * recursively set the user index of all finer levels of the triangulation.
+   *
+   * @note User pointers and user indices are mutually exclusive. Therefore,
+   * you can only use one of them, unless you call
+   * Triangulation::clear_user_data() in between.
+   *
+   * See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void recursively_set_user_index (const unsigned int p) const;
+
+  /**
+   * Clear the user index of this object and all of its descendants. The same
+   * holds as said for the recursively_set_user_index() function.
+   *
+   * See
+   * @ref GlossUserData
+   * for more information.
+   */
+  void recursively_clear_user_index () const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Geometric information about an object
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Diameter of the object.
+   *
+   * The diameter of an object is computed to be the largest diagonal. This is
+   * not necessarily the true diameter for objects that may use higher order
+   * mappings, but completely sufficient for most computations.
+   */
+  double diameter () const;
+
+  /**
+   * Length of an object in the direction of the given axis, specified in the
+   * local coordinate system. See the documentation of GeometryInfo for the
+   * meaning and enumeration of the local axes.
+   *
+   * Note that the "length" of an object can be interpreted in a variety of
+   * ways. Here, we choose it as the maximal length of any of the edges of the
+   * object that are parallel to the chosen axis on the reference cell.
+   */
+  double extent_in_direction (const unsigned int axis) const;
+
+  /**
+   * Returns the minimal distance between any two vertices.
+   */
+  double minimum_vertex_distance () const;
+
+  /**
+   * Returns a point belonging to the Manifold<dim,spacedim> where this object
+   * lives, given its parametric coordinates on the reference @p structdim
+   * cell. This function queries the underlying manifold object, and can be
+   * used to obtain the exact geometrical location of arbitrary points on this
+   * object.
+   *
+   * Notice that the argument @p coordinates are the coordinates on the
+   * <em>reference cell</em>, given in reference coordinates. In other words,
+   * the argument provides a weighting between the different vertices. For
+   * example, for lines, calling this function with argument Point<1>(.5), is
+   * equivalent to asking the line for its center.
+   */
+  Point<spacedim> intermediate_point(const Point<structdim> &coordinates) const;
+
+  /**
+   * Center of the object. The center of an object is defined to be the
+   * average of the locations of the vertices. If required, the user may ask
+   * this function to return the average of the point according to the
+   * underlying Manifold object, by setting to true the optional parameter @p
+   * respect_manifold.
+   *
+   * When the geometry of a TriaAccessor is not flat, or when part of the
+   * bounding objects of this TriaAccessor are not flat, the result given by
+   * the TriaAccessor::center() function may not be accurate enough, even when
+   * parameter @p respect_manifold is set to true. If you find this to be
+   * case, than you can further refine the computation of the center by
+   * setting to true the second additional parameter @p
+   * use_laplace_transformation, which will force this function to compute the
+   * location of the center by solving a linear elasticity problem with
+   * Dirichlet boundary conditions set to the location of the bounding
+   * vertices and the centers of the bounding lines and quads.
+   */
+  Point<spacedim> center (const bool respect_manifold=false,
+                          const bool use_laplace_transformation=false) const;
+
+  /**
+   * Barycenter of the object.
+   */
+  Point<spacedim> barycenter () const;
+
+  /**
+   * Compute the dim-dimensional measure of the object. For a dim-dimensional
+   * cell in dim-dimensional space, this equals its volume. On the other hand,
+   * for a 2d cell in 3d space, or if the current object pointed to is a 2d
+   * face of a 3d cell in 3d space, then the function computes the area the
+   * object occupies. For a one-dimensional object, return its length.
+   *
+   * The function only computes the measure of cells, faces or edges assumed
+   * to be represented by (bi-/tri-)linear mappings. In other words, it only
+   * takes into account the locations of the vertices that bound the current
+   * object but not how the interior of the object may actually be mapped. In
+   * most simple cases, this is exactly what you want. However, for objects
+   * that are not "straight", e.g. 2d cells embedded in 3d space as part of a
+   * triangulation of a curved domain, two-dimensional faces of 3d cells that
+   * are not just parallelograms, or for faces that are at the boundary of a
+   * domain that is not just bounded by straight line segments or planes, this
+   * function only computes the dim-dimensional measure of a (bi-/tri-)linear
+   * interpolation of the "real" object as defined by the manifold or boundary
+   * object describing the real geometry of the object in question. If you
+   * want to consider the "real" geometry, you will need to compute the
+   * measure by integrating a function equal to one over the object, which
+   * after applying quadrature equals the summing the JxW values returned by
+   * the FEValues or FEFaceValues object you will want to use for the
+   * integral.
+   */
+  double measure () const;
+
+  /**
+   * Return true if the current object is a translation of the given argument.
+   *
+   * @note For the purpose of a triangulation, cells, faces, etc are only
+   * characterized by their vertices. The current function therefore only
+   * compares the locations of vertices. For many practical applications,
+   * however, it is not only the vertices that determine whether one cell is a
+   * translation of another, but also how the cell is mapped from the
+   * reference cell to its location in real space. For example, if we are
+   * using higher order mappings, then not only do the vertices have to be
+   * translations of each other, but also the points along edges. In these
+   * questions, therefore, it would be appropriate to ask the mapping, not the
+   * current function, whether two objects are translations of each other.
+   */
+  bool
+  is_translation_of (const TriaIterator<TriaAccessor<structdim,dim,spacedim> > &o) const;
+
+  /**
+   * @}
+   */
+
+
+private:
+  /**
+   * Copy the data of the given object into the internal data structures of a
+   * triangulation.
+   */
+  void set (const dealii::internal::Triangulation::TriaObject<structdim> &o) const;
+
+  /**
+   * Set the flag indicating, what <code>line_orientation()</code> will
+   * return.
+   *
+   * It is only possible to set the line_orientation of faces in 3d (i.e.
+   * <code>structdim==2 && dim==3</code>).
+   */
+  void set_line_orientation (const unsigned int line,
+                             const bool         orientation) const;
+
+  /**
+   * Set whether the quad with index @p face has its normal pointing in the
+   * standard direction (@p true) or whether it is the opposite (@p false).
+   * Which is the standard direction is documented with the GeometryInfo
+   * class.
+   *
+   * This function is only for internal use in the library. Setting this flag
+   * to any other value than the one that the triangulation has already set is
+   * bound to bring you disaster.
+   */
+  void set_face_orientation (const unsigned int face,
+                             const bool         orientation) const;
+
+  /**
+   * Set the flag indicating, what <code>face_flip()</code> will return.
+   *
+   * It is only possible to set the face_orientation of cells in 3d (i.e.
+   * <code>structdim==3 && dim==3</code>).
+   */
+  void set_face_flip (const unsigned int face,
+                      const bool         flip) const;
+
+  /**
+   * Set the flag indicating, what <code>face_rotation()</code> will return.
+   *
+   * It is only possible to set the face_orientation of cells in 3d (i.e.
+   * <code>structdim==3 && dim==3</code>).
+   */
+  void set_face_rotation (const unsigned int face,
+                          const bool         rotation) const;
+
+  /**
+   * Set the @p used flag. Only for internal use in the library.
+   */
+  void set_used_flag () const;
+
+  /**
+   * Clear the @p used flag. Only for internal use in the library.
+   */
+  void clear_used_flag () const;
+
+  /**
+   * Set the @p RefinementCase<dim> this TriaObject is refined with. Not
+   * defined for <tt>structdim=1</tt> as lines are always refined resulting in
+   * 2 children lines (isotropic refinement).
+   *
+   * You should know quite exactly what you are doing if you touch this
+   * function. It is exclusively for internal use in the library.
+   */
+  void set_refinement_case (const RefinementCase<structdim> &ref_case) const;
+
+  /**
+   * Clear the RefinementCase<dim> of this TriaObject, i.e. reset it to
+   * RefinementCase<dim>::no_refinement.
+   *
+   * You should know quite exactly what you are doing if you touch this
+   * function. It is exclusively for internal use in the library.
+   */
+  void clear_refinement_case () const;
+
+  /**
+   * Set the index of the ith child. Since the children come at least in
+   * pairs, we need to store the index of only every second child, i.e. of the
+   * even numbered children. Make sure, that the index of child i=0 is set
+   * first. Calling this function for odd numbered children is not allowed.
+   */
+  void set_children (const unsigned int i, const int index) const;
+
+  /**
+   * Clear the child field, i.e. set it to a value which indicates that this
+   * cell has no children.
+   */
+  void clear_children () const;
+
+private:
+  /**
+   * Copy operator. This is normally used in a context like <tt>iterator a,b;
+   * *a=*b;</tt>. Presumably, the intent here is to copy the object pointed to
+   * by @p b to the object pointed to by @p a. However, the result of
+   * dereferencing an iterator is not an object but an accessor; consequently,
+   * this operation is not useful for iterators on triangulations. We declare
+   * this function here private, thus it may not be used from outside.
+   * Furthermore it is not implemented and will give a linker error if used
+   * anyway.
+   */
+  void operator = (const TriaAccessor &);
+
+  template <int, int> friend class Triangulation;
+
+  friend struct dealii::internal::Triangulation::Implementation;
+  friend struct dealii::internal::TriaAccessor::Implementation;
+};
+
+
+
+
+
+
+/**
+ * Specialization of <code>TriaAccessor<structdim, dim, spacedim></code>. This
+ * class represent vertices in a triangulation of dimensionality
+ * <code>dim</code> (i.e. 1 for a triangulation of lines, 2 for a
+ * triangulation of quads, and 3 for a triangulation of hexes) that is
+ * embedded in a space of dimensionality <code>spacedim</code> (for
+ * <code>spacedim==dim</code> the triangulation represents a domain in
+ * ${\mathbb R}^\text{dim}$, for <code>spacedim@>dim</code> the triangulation
+ * is of a manifold embedded in a higher dimensional space).
+ *
+ * @ingroup Accessors
+ * @author Bruno Turcksin, 2015
+ */
+template<int dim, int spacedim>
+class TriaAccessor<0, dim, spacedim>
+{
+public:
+  /**
+   * Dimension of the space the object represented by this accessor lives in.
+   * For example, if this accessor represents a quad that is part of a two-
+   * dimensional surface in four-dimensional space, then this value is four.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * Dimensionality of the object that the thing represented by this accessopr
+   * is part of. For example, if this accessor represents a line that is part
+   * of a hexahedron, then this value will be three.
+   */
+  static const unsigned int dimension = dim;
+
+  /**
+   * Dimensionality of the current object represented by this accessor. For
+   * example, if it is line (irrespective of whether it is part of a quad or
+   * hex, and what dimension we are in), then this value equals 1.
+   */
+  static const unsigned int structure_dimension = 0;
+
+  /**
+   * Pointer to internal data.
+   */
+  typedef void AccessorData;
+
+  /**
+   * Constructor. The second argument is the global index of the vertex we
+   * point to.
+   */
+  TriaAccessor (const Triangulation<dim,spacedim> *tria,
+                const unsigned int    vertex_index);
+
+  /**
+   * Constructor. This constructor exists in order to maintain interface
+   * compatibility with the other accessor classes. @p index can be used to
+   * set the global index of the vertex we point to.
+   */
+  TriaAccessor (const Triangulation<dim,spacedim> *tria  = NULL,
+                const int                          level = 0,
+                const int                          index = 0,
+                const AccessorData                     * = 0);
+
+  /**
+   * Constructor. Should never be called and thus produces an error.
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  TriaAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Constructor. Should never be called and thus produces an error.
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  TriaAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Return the state of the iterator.
+   */
+  IteratorState::IteratorStates state () const;
+
+  /**
+   * Level of this object. Vertices have no level, so this function always
+   * returns zero.
+   */
+  static int level ();
+
+  /**
+   * Index of this object. Returns the global index of the vertex this object
+   * points to.
+   */
+  int index () const;
+
+  /**
+   * @name Advancement of iterators
+   */
+  /**
+   * @{
+   */
+  /**
+   * This operator advances the iterator to the next element.
+   */
+  void operator ++ ();
+
+  /**
+   * This operator moves the iterator to the previous element.
+   */
+  void operator -- ();
+  /**
+   * Compare for equality.
+   */
+  bool operator == (const TriaAccessor &) const;
+
+  /**
+   * Compare for inequality.
+   */
+  bool operator != (const TriaAccessor &) const;
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * @name Accessing sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the global index of i-th vertex of the current object. If @p i is
+   * zero, this returns the index of the current point to which this object
+   * refers. Otherwise, it throws an exception.
+   *
+   * Note that the returned value is only the index of the geometrical vertex.
+   * It has nothing to do with possible degrees of freedom associated with it.
+   * For this, see the @p DoFAccessor::vertex_dof_index functions.
+   *
+   * @note Despite the name, the index returned here is only global in the
+   * sense that it is specific to a particular Triangulation object or, in the
+   * case the triangulation is actually of type
+   * parallel::distributed::Triangulation, specific to that part of the
+   * distributed triangulation stored on the current processor.
+   */
+  unsigned int vertex_index (const unsigned int i = 0) const;
+
+  /**
+   * Return a reference to the @p ith vertex. If i is zero, this returns a
+   * reference to the current point to which this object refers. Otherwise, it
+   * throws an exception.
+   */
+  Point<spacedim> &vertex (const unsigned int i = 0) const;
+
+  /**
+   * Pointer to the @p ith line bounding this object. Will point to an invalid
+   * object.
+   */
+  typename dealii::internal::Triangulation::Iterators<dim,spacedim>::line_iterator
+  static line (const unsigned int);
+
+  /**
+   * Line index of the @p ith line bounding this object. Throws an exception.
+   */
+  static unsigned int line_index (const unsigned int i);
+
+  /**
+   * Pointer to the @p ith quad bounding this object.
+   */
+  static
+  typename dealii::internal::Triangulation::Iterators<dim,spacedim>::quad_iterator
+  quad (const unsigned int i);
+
+  /**
+   * Quad index of the @p ith quad bounding this object. Throws an excption.
+   */
+  static unsigned int quad_index (const unsigned int i);
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * @name Geometric information about an object
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Diameter of the object. This function always returns zero.
+   */
+  double diameter () const;
+
+  /**
+   * Length of an object in the direction of the given axis, specified in the
+   * local coordinate system. See the documentation of GeometryInfo for the
+   * meaning and enumeration of the local axes.
+   *
+   * This function always returns zero.
+   */
+  double extent_in_direction (const unsigned int axis) const;
+
+  /**
+   * Return the center of this object, which of course coincides with the
+   * location of the vertex this object refers to. The parameters @p
+   * respect_manifold and @p use_laplace_transformation are not used. They are
+   * there to provide the same interface as
+   * <code>TriaAccessor<structdim,dim,spacedim></code>.
+   */
+  Point<spacedim> center (const bool respect_manifold=false,
+                          const bool use_laplace_transformation=false) const;
+
+  /**
+   * Compute the dim-dimensional measure of the object. For a dim-dimensional
+   * cell in dim-dimensional space, this equals its volume. On the other hand,
+   * for a 2d cell in 3d space, or if the current object pointed to is a 2d
+   * face of a 3d cell in 3d space, then the function computes the area the
+   * object occupies. For a one-dimensional object, return its length. For a
+   * zero-dimensional object, return zero.
+   */
+  double measure () const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Orientation of sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * @brief Always return false
+   */
+  static bool face_orientation (const unsigned int face);
+
+  /**
+   * @brief Always return false
+   */
+  static bool face_flip (const unsigned int face);
+
+  /**
+   * @brief Always return false
+   */
+  static bool face_rotation (const unsigned int face);
+
+  /**
+   * @brief Always return false
+   */
+  static bool line_orientation (const unsigned int line);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing children
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Test whether the object has children. Always false.
+   */
+  static bool has_children ();
+
+  /**
+   * Return the number of immediate children of this object. This is always
+   * zero.
+   */
+  static unsigned int n_children();
+
+  /**
+   * Compute and return the number of active descendants of this objects.
+   * Always zero.
+   */
+  static unsigned int number_of_children ();
+
+  /**
+   * Return the number of times that this object is refined. Always 0.
+   */
+  static unsigned int max_refinement_depth ();
+
+  /**
+   * @brief Return an invalid object.
+   */
+  static
+  TriaIterator<TriaAccessor<0,dim,spacedim> >
+  child (const unsigned int);
+
+  /**
+   * @brief Return an invalid object.
+   */
+  static
+  TriaIterator<TriaAccessor<0,dim,spacedim> >
+  isotropic_child (const unsigned int);
+
+  /**
+   * Always return no refinement.
+   */
+  static
+  RefinementCase<0> refinement_case ();
+
+  /**
+   * @brief Returns -1
+   */
+  static
+  int child_index (const unsigned int i);
+
+  /**
+   * @brief Returns -1
+   */
+  static
+  int isotropic_child_index (const unsigned int i);
+  /**
+   * @}
+   */
+
+  /**
+   * Return whether the vertex pointed to here is used.
+   */
+  bool used () const;
+
+protected:
+  /**
+   * Copy operator. Since this is only called from iterators, do not return
+   * anything, since the iterator will return itself.
+   *
+   * This method is protected, since it is only to be called from the iterator
+   * class.
+   */
+  void copy_from (const TriaAccessor &);
+
+  /**
+   * Pointer to the triangulation we operate on.
+   */
+  const Triangulation<dim,spacedim> *tria;
+
+  /**
+   * The global vertex index of the vertex this object corresponds to.
+   */
+  unsigned int  global_vertex_index;
+
+private:
+
+  template <typename Accessor> friend class TriaRawIterator;
+  template <typename Accessor> friend class TriaIterator;
+  template <typename Accessor> friend class TriaActiveIterator;
+};
+
+
+
+/**
+ * A class that represents an access to a face in 1d -- i.e. to a point. This
+ * is not a full fledged access from which you can build an iterator: for
+ * example, you can't iterate from one such point to the next. Point also
+ * don't have children, and they don't have neighbors.
+ *
+ * @ingroup Accessors
+ * @author Wolfgang Bangerth, 2010
+ */
+template <int spacedim>
+class TriaAccessor<0, 1, spacedim>
+{
+public:
+  /**
+   * Dimension of the space the object represented by this accessor lives in.
+   * For example, if this accessor represents a quad that is part of a two-
+   * dimensional surface in four-dimensional space, then this value is four.
+   */
+  static const unsigned int space_dimension = spacedim;
+
+  /**
+   * Dimensionality of the object that the thing represented by this accessor
+   * is part of. For example, if this accessor represents a line that is part
+   * of a hexahedron, then this value will be three.
+   */
+  static const unsigned int dimension = 1;
+
+  /**
+   * Dimensionality of the current object represented by this accessor. For
+   * example, if it is line (irrespective of whether it is part of a quad or
+   * hex, and what dimension we are in), then this value equals 1.
+   */
+  static const unsigned int structure_dimension = 0;
+
+  /**
+   * Pointer to internal data.
+   */
+  typedef void AccessorData;
+
+  /**
+   * Whether the vertex represented here is at the left end of the domain, the
+   * right end, or in the interior.
+   */
+  enum VertexKind
+  {
+    left_vertex,
+    interior_vertex,
+    right_vertex
+  };
+
+  /**
+   * Constructor.
+   *
+   * Since there is no mapping from vertices to cells, an accessor object for
+   * a point has no way to figure out whether it is at the boundary of the
+   * domain or not. Consequently, the second argument must be passed by the
+   * object that generates this accessor -- e.g. a 1d cell that can figure out
+   * whether its left or right vertex are at the boundary.
+   *
+   * The third argument is the global index of the vertex we point to.
+   */
+  TriaAccessor (const Triangulation<1,spacedim> *tria,
+                const VertexKind      vertex_kind,
+                const unsigned int    vertex_index);
+
+  /**
+   * Constructor. This constructor exists in order to maintain interface
+   * compatibility with the other accessor classes. However, it doesn't do
+   * anything useful here and so may not actually be called.
+   */
+  TriaAccessor (const Triangulation<1,spacedim> *tria = 0,
+                const int = 0,
+                const int = 0,
+                const AccessorData * = 0);
+
+  /**
+   * Constructor. Should never be called and thus produces an error.
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  TriaAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Constructor. Should never be called and thus produces an error.
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  TriaAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Copy operator. Since this is only called from iterators, do not return
+   * anything, since the iterator will return itself.
+   */
+  void copy_from (const TriaAccessor &);
+
+  /**
+   * Return the state of the iterator. Since an iterator to points can not be
+   * incremented or decremented, its state remains constant, and in particular
+   * equal to IteratorState::valid.
+   */
+  static IteratorState::IteratorStates state ();
+
+  /**
+   * Level of this object. Vertices have no level, so this function always
+   * returns zero.
+   */
+  static int level ();
+
+  /**
+   * Index of this object. Returns the global index of the vertex this object
+   * points to.
+   */
+  int index () const;
+
+  /**
+   * @name Advancement of iterators
+   */
+  /**
+   * @{
+   */
+  /**
+   * This operator advances the iterator to the next element. For points, this
+   * operation is not defined, so you can't iterate over point iterators.
+   */
+  void operator ++ () const;
+
+  /**
+   * This operator moves the iterator to the previous element. For points,
+   * this operation is not defined, so you can't iterate over point iterators.
+   */
+  void operator -- () const;
+  /**
+   * Compare for equality.
+   */
+  bool operator == (const TriaAccessor &) const;
+
+  /**
+   * Compare for inequality.
+   */
+  bool operator != (const TriaAccessor &) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the global index of i-th vertex of the current object. If i is
+   * zero, this returns the index of the current point to which this object
+   * refers. Otherwise, it throws an exception.
+   *
+   * Note that the returned value is only the index of the geometrical vertex.
+   * It has nothing to do with possible degrees of freedom associated with it.
+   * For this, see the @p DoFAccessor::vertex_dof_index functions.
+   *
+   * @note Despite the name, the index returned here is only global in the
+   * sense that it is specific to a particular Triangulation object or, in the
+   * case the triangulation is actually of type
+   * parallel::distributed::Triangulation, specific to that part of the
+   * distributed triangulation stored on the current processor.
+   */
+  unsigned int vertex_index (const unsigned int i = 0) const;
+
+  /**
+   * Return a reference to the @p ith vertex. If i is zero, this returns a
+   * reference to the current point to which this object refers. Otherwise, it
+   * throws an exception.
+   */
+  Point<spacedim> &vertex (const unsigned int i = 0) const;
+
+  /**
+   * Return the center of this object, which of course coincides with the
+   * location of the vertex this object refers to.
+   */
+  Point<spacedim> center () const;
+
+  /**
+   * Pointer to the @p ith line bounding this object. Will point to an invalid
+   * object.
+   */
+  typename dealii::internal::Triangulation::Iterators<1,spacedim>::line_iterator
+  static line (const unsigned int);
+
+  /**
+   * Line index of the @p ith line bounding this object.
+   *
+   * Implemented only for <tt>structdim>1</tt>, otherwise an exception
+   * generated.
+   */
+  static unsigned int line_index (const unsigned int i);
+
+  /**
+   * Pointer to the @p ith quad bounding this object.
+   */
+  static
+  typename dealii::internal::Triangulation::Iterators<1,spacedim>::quad_iterator
+  quad (const unsigned int i);
+
+  /**
+   * Quad index of the @p ith quad bounding this object.
+   *
+   * Implemented only for <tt>structdim>2</tt>, otherwise an exception
+   * generated.
+   */
+  static unsigned int quad_index (const unsigned int i);
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * Return whether this point is at the boundary of the one-dimensional
+   * triangulation we deal with here.
+   */
+  bool at_boundary () const;
+
+  /**
+   * Return the boundary indicator of this object. The convention for one
+   * dimensional triangulations is that left end vertices (of each line
+   * segment from which the triangulation may be constructed) have boundary
+   * indicator zero, and right end vertices have boundary indicator one,
+   * unless explicitly set differently.
+   *
+   * If the return value is the special value
+   * numbers::internal_face_boundary_id, then this object is in the interior
+   * of the domain.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  types::boundary_id boundary_id () const;
+
+  /**
+   * Return the boundary indicator of this object.
+   *
+   * @deprecated This spelling of the function name is deprecated. Use
+   * boundary_id() instead.
+   */
+  types::boundary_id boundary_indicator () const DEAL_II_DEPRECATED;
+
+  /**
+   * Return the manifold indicator of this object.
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  types::manifold_id manifold_id () const;
+
+
+  /**
+   * @name Orientation of sub-objects
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * @brief Always return false
+   */
+  static bool face_orientation (const unsigned int face);
+
+  /**
+   * @brief Always return false
+   */
+  static bool face_flip (const unsigned int face);
+
+  /**
+   * @brief Always return false
+   */
+  static bool face_rotation (const unsigned int face);
+
+  /**
+   * @brief Always return false
+   */
+  static bool line_orientation (const unsigned int line);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing children
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Test whether the object has children. Always false.
+   */
+  static bool has_children ();
+
+  /**
+   * Return the number of immediate children of this object.This is always
+   * zero in dimension 0.
+   */
+  static unsigned int n_children();
+
+  /**
+   * Compute and return the number of active descendants of this objects.
+   * Always zero.
+   */
+  static unsigned int number_of_children ();
+
+  /**
+   * Return the number of times that this object is refined. Always 0.
+   */
+  static unsigned int max_refinement_depth ();
+
+  /**
+   * @brief Return an invalid object
+   */
+  static
+  TriaIterator<TriaAccessor<0,1,spacedim> >
+  child (const unsigned int);
+
+  /**
+   * @brief Return an invalid object
+   */
+  static
+  TriaIterator<TriaAccessor<0,1,spacedim> >
+  isotropic_child (const unsigned int);
+
+  /**
+   * Always return no refinement.
+   */
+  static
+  RefinementCase<0> refinement_case ();
+
+  /**
+   * @brief Returns -1
+   */
+  static
+  int child_index (const unsigned int i);
+
+  /**
+   * @brief Returns -1
+   */
+  static
+  int isotropic_child_index (const unsigned int i);
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with boundary indicators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Set the boundary indicator. The same applies as for the
+   * <tt>boundary_id()</tt> function.
+   *
+   * @warning You should never set the boundary indicator of an interior face
+   * (a face not at the boundary of the domain), or set set the boundary
+   * indicator of an exterior face to numbers::internal_face_boundary_id (this
+   * value is reserved for another purpose). Algorithms may not work or
+   * produce very confusing results if boundary cells have a boundary
+   * indicator of numbers::internal_face_boundary_id or if interior cells have
+   * boundary indicators other than numbers::internal_face_boundary_id.
+   * Unfortunately, the current object has no means of finding out whether it
+   * really is at the boundary of the domain and so cannot determine whether
+   * the value you are trying to set makes sense under the current
+   * circumstances.
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  void
+  set_boundary_id (const types::boundary_id);
+
+  /**
+   * Set the boundary indicator of this object.
+   *
+   * @deprecated This spelling of the function name is deprecated. Use
+   * set_boundary_id() instead.
+   */
+  void set_boundary_indicator (const types::boundary_id) DEAL_II_DEPRECATED;
+
+  /**
+   * Set the manifold indicator of this vertex. This does nothing so far since
+   * manifolds are only used to refine and map objects, but vertices are not
+   * refined and the mapping is trivial. This function is here only to allow
+   * dimension independent programming.
+   */
+  void
+  set_manifold_id (const types::manifold_id);
+
+  /**
+   * Set the boundary indicator of this object and all of its lower-
+   * dimensional sub-objects.  Since this object only represents a single
+   * vertex, there are no lower-dimensional object and this function is
+   * equivalent to calling set_boundary_id() with the same argument.
+   *
+   * @ingroup boundary
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  void
+  set_all_boundary_ids (const types::boundary_id);
+
+  /**
+   * Set the boundary indicator of this object and all that bound it.
+   *
+   * @deprecated This spelling of the function name is deprecated. Use
+   * set_all_boundary_ids() instead.
+   */
+  void set_all_boundary_indicators (const types::boundary_id) DEAL_II_DEPRECATED;
+
+  /**
+   * Set the manifold indicator of this object and all of its lower-
+   * dimensional sub-objects.  Since this object only represents a single
+   * vertex, there are no lower-dimensional object and this function is
+   * equivalent to calling set_manifold_id() with the same argument.
+   *
+   * @ingroup manifold
+   *
+   * @see
+   * @ref GlossManifoldIndicator "Glossary entry on manifold indicators"
+   */
+  void
+  set_all_manifold_ids (const types::manifold_id);
+  /**
+   * @}
+   */
+
+  /**
+   * Return whether the vertex pointed to here is used.
+   */
+  bool used () const;
+
+protected:
+  /**
+   * Pointer to the triangulation we operate on.
+   */
+  const Triangulation<1,spacedim> *tria;
+
+  /**
+   * Whether this is a left end, right end, or interior vertex. This
+   * information is provided by the cell at the time of creation.
+   */
+  VertexKind      vertex_kind;
+
+  /**
+   * The global vertex index of the vertex this object corresponds to.
+   */
+  unsigned int    global_vertex_index;
+};
+
+
+
+
+/**
+ * This class allows access to a cell: a line in one dimension, a quad in two
+ * dimension, etc.
+ *
+ * The following refers to any dimension:
+ *
+ * This class allows access to a <tt>cell</tt>, which is a line in 1D and a
+ * quad in 2D. Cells have more functionality than lines or quads by
+ * themselves, for example they can be flagged for refinement, they have
+ * neighbors, they have the possibility to check whether they are at the
+ * boundary etc. This class offers access to all this data.
+ *
+ * @ingroup grid
+ * @ingroup Accessors
+ * @author Wolfgang Bangerth, 1998, 1999, 2000
+ */
+template <int dim, int spacedim=dim>
+class CellAccessor :  public TriaAccessor<dim,dim,spacedim>
+{
+public:
+  /**
+   * Propagate the AccessorData type into the present class.
+   */
+  typedef typename TriaAccessor<dim,dim,spacedim>::AccessorData AccessorData;
+
+  /**
+   * Define the type of the container this is part of.
+   */
+  typedef Triangulation<dim, spacedim> Container;
+
+  /**
+   * @name Constructors
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Constructor.
+   */
+  CellAccessor (const Triangulation<dim,spacedim> *parent     =  0,
+                const int                 level      = -1,
+                const int                 index      = -1,
+                const AccessorData       *local_data =  0);
+
+  /**
+   * Copy constructor.
+   */
+  CellAccessor (const TriaAccessor<dim,dim,spacedim> &cell_accessor);
+
+  /**
+   * Conversion constructor. This constructor exists to make certain
+   * constructs simpler to write in dimension independent code. For example,
+   * it allows assigning a face iterator to a line iterator, an operation that
+   * is useful in 2d but doesn't make any sense in 3d. The constructor here
+   * exists for the purpose of making the code conform to C++ but it will
+   * unconditionally abort; in other words, assigning a face iterator to a
+   * line iterator is better put into an if-statement that checks that the
+   * dimension is two, and assign to a quad iterator in 3d (an operator that,
+   * without this constructor would be illegal if we happen to compile for
+   * 2d).
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  CellAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * Another conversion operator between objects that don't make sense, just
+   * like the previous one.
+   */
+  template <int structdim2, int dim2, int spacedim2>
+  CellAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &);
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Accessing sub-objects and neighbors
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return a pointer to the @p ith child. Overloaded version which returns a
+   * more reasonable iterator class.
+   */
+  TriaIterator<CellAccessor<dim, spacedim> >
+  child (const unsigned int i) const;
+
+  /**
+   * Return an iterator to the @p ith face of this cell.
+   */
+  TriaIterator<TriaAccessor<dim-1,dim,spacedim> >
+  face (const unsigned int i) const;
+
+  /**
+   * Return the (global) index of the @p ith face of this cell.
+   *
+   * @note Despite the name, the index returned here is only global in the
+   * sense that it is specific to a particular Triangulation object or, in the
+   * case the triangulation is actually of type
+   * parallel::distributed::Triangulation, specific to that part of the
+   * distributed triangulation stored on the current processor.
+   */
+  unsigned int
+  face_index (const unsigned int i) const;
+
+  /**
+   * Return an iterator to that cell that neighbors the present cell on the
+   * given face and subface number.
+   *
+   * To succeed, the present cell must not be further refined, and the
+   * neighbor on the given face must be further refined exactly once; the
+   * returned cell is then a child of that neighbor.
+   *
+   * The function may not be called in 1d, since there we have no subfaces.
+   * The implementation of this function is rather straightforward in 2d, by
+   * first determining which face of the neighbor cell the present cell is
+   * bordering on (this is what the @p neighbor_of_neighbor function does),
+   * and then asking @p GeometryInfo::child_cell_on_subface for the index of
+   * the child.
+   *
+   * However, the situation is more complicated in 3d, since there faces may
+   * have more than one orientation, and we have to use @p face_orientation,
+   * @p face_flip and @p face_rotation for both this and the neighbor cell to
+   * figure out which cell we want to have.
+   *
+   * This can lead to surprising results: if we are sitting on a cell and are
+   * asking for a cell behind subface <tt>sf</tt>, then this means that we are
+   * considering the subface for the face in the natural direction for the
+   * present cell. However, if the face as seen from this cell has
+   * <tt>face_orientation()==false</tt>, then the child of the face that
+   * separates the present cell from the neighboring cell's child is not
+   * necessarily the @p sf-th child of the face of this cell. This is so
+   * because the @p subface_no on a cell corresponds to the subface with
+   * respect to the intrinsic ordering of the present cell, whereas children
+   * of face iterators are computed with respect to the intrinsic ordering of
+   * faces; these two orderings are only identical if the face orientation is
+   * @p true, and reversed otherwise.
+   *
+   * Similarly, effects of <tt>face_flip()==true</tt> and
+   * <tt>face_rotation()==true()</tt>, both of which indicate a non-standard
+   * face have to be considered.
+   *
+   * Fortunately, this is only very rarely of concern, since usually one
+   * simply wishes to loop over all finer neighbors at a given face of an
+   * active cell. Only in the process of refinement of a Triangulation we want
+   * to set neighbor information for both our child cells and the neighbor's
+   * children. Since we can respect orientation of faces from our current cell
+   * in that case, we do NOT respect face_orientation, face_flip and
+   * face_rotation of the present cell within this function, i.e. the returned
+   * neighbor's child is behind subface @p subface concerning the intrinsic
+   * ordering of the given face.
+   */
+  TriaIterator<CellAccessor<dim, spacedim> >
+  neighbor_child_on_subface (const unsigned int face_no,
+                             const unsigned int subface_no) const;
+
+  /**
+   * Return a pointer to the @p ith neighbor.  If the neighbor does not exist,
+   * i.e., if the @p ith face of the current object is at the boundary, then
+   * an invalid iterator is returned.
+   *
+   * The neighbor of a cell has at most the same level as this cell. For
+   * example, consider the following situation:
+   * @image html limit_level_difference_at_vertices.png ""
+   * Here, if you are on the top right cell and you ask for its left neighbor
+   * (which is, according to the conventions spelled out in the GeometryInfo
+   * class, its <i>zeroth</i> neighbor), then you will get the mother cell of
+   * the four small cells at the top left. In other words, the cell you get as
+   * neighbor has the same refinement level as the one you're on right now
+   * (the top right one) and it may have children.
+   *
+   * On the other hand, if you were at the top right cell of the four small
+   * cells at the top left, and you asked for the right neighbor (which is
+   * associated with index <code>i=1</code>), then you would get the large
+   * cell at the top right which in this case has a lower refinement level and
+   * no children of its own.
+   */
+  TriaIterator<CellAccessor<dim, spacedim>  >
+  neighbor (const unsigned int i) const;
+
+  /**
+   * Return the index of the @p ith neighbor.  If the neighbor does not exist,
+   * its index is -1.
+   */
+  int neighbor_index (const unsigned int i) const;
+
+  /**
+   * Return the level of the @p ith neighbor.  If the neighbor does not exist,
+   * its level is -1.
+   */
+  int neighbor_level (const unsigned int i) const;
+
+  /**
+   * Return the how-many'th neighbor this cell is of
+   * <tt>cell->neighbor(neighbor)</tt>, i.e. return the @p face_no such that
+   * <tt>cell->neighbor(neighbor)->neighbor(face_no)==cell</tt>. This function
+   * is the right one if you want to know how to get back from a neighbor to
+   * the present cell.
+   *
+   * Note that this operation is only useful if the neighbor is not coarser
+   * than the present cell. If the neighbor is coarser this function throws an
+   * exception. Use the @p neighbor_of_coarser_neighbor function in that case.
+   */
+  unsigned int neighbor_of_neighbor (const unsigned int neighbor) const;
+
+  /**
+   * Return, whether the neighbor is coarser then the present cell. This is
+   * important in case of ansiotropic refinement where this information does
+   * not depend on the levels of the cells.
+   *
+   * Note, that in an anisotropic setting, a cell can only be coarser than
+   * another one at a given face, not on a general basis. The face of the
+   * finer cell is contained in the corresponding face of the coarser cell,
+   * the finer face is either a child or a grandchild of the coarser face.
+   */
+  bool neighbor_is_coarser (const unsigned int neighbor) const;
+
+  /**
+   * This function is a generalization of the @p neighbor_of_neighbor function
+   * for the case of a coarser neighbor. It returns a pair of numbers, face_no
+   * and subface_no, with the following property, if the neighbor is not
+   * refined: <tt>cell->neighbor(neighbor)->neighbor_child_on_subface(face_no,
+   * subface_no)==cell</tt>. In 3D, a coarser neighbor can still be refined.
+   * In that case subface_no denotes the child index of the neighbors face
+   * that relates to our face:
+   * <tt>cell->neighbor(neighbor)->face(face_no)->child(subface_no)==cell->face(neighbor)</tt>.
+   * This case in 3d and how it can happen is discussed in the introduction of
+   * the step-30 tutorial program.
+   *
+   * This function is impossible for <tt>dim==1</tt>.
+   */
+  std::pair<unsigned int, unsigned int>
+  neighbor_of_coarser_neighbor (const unsigned int neighbor) const;
+
+  /**
+   * This function is a generalization of the @p neighbor_of_neighbor and the
+   * @p neighbor_of_coarser_neighbor functions. It checks whether the neighbor
+   * is coarser or not and calls the respective function. In both cases, only
+   * the face_no is returned.
+   */
+  unsigned int neighbor_face_no (const unsigned int neighbor) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with boundary indicators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return whether the @p ith vertex or face (depending on the dimension) is
+   * part of the boundary. This is true, if the @p ith neighbor does not
+   * exist.
+   */
+  bool at_boundary (const unsigned int i) const;
+
+  /**
+   * Return whether the cell is at the boundary. Being at the boundary is
+   * defined by one face being on the boundary. Note that this does not catch
+   * cases where only one vertex of a quad or of a hex is at the boundary, or
+   * where only one line of a hex is at the boundary while the interiors of
+   * all faces are in the interior of the domain. For the latter case, the @p
+   * has_boundary_lines function is the right one to ask.
+   */
+  bool at_boundary () const;
+
+  /**
+   * This is a slight variation to the @p at_boundary function: for 1 and 2
+   * dimensions, it is equivalent, for three dimensions it returns whether at
+   * least one of the 12 lines of the hexahedron is at a boundary. This, of
+   * course, includes the case where a whole face is at the boundary, but also
+   * some other cases.
+   */
+  bool has_boundary_lines () const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with refinement indicators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the @p RefinementCase<dim> this cell was flagged to be refined
+   * with.  The return value of this function can be compared to a bool to
+   * check if this cell is flagged for any kind of refinement. For example, if
+   * you have previously called cell->set_refine_flag() for a cell, then you
+   * will enter the 'if' block in the following snippet:
+   *
+   * @code
+   * if (cell->refine_flag_set())
+   * {
+   *   // yes, this cell is marked for refinement.
+   * }
+   * @endcode
+   */
+  RefinementCase<dim> refine_flag_set () const;
+
+  /**
+   * Flag the cell pointed to for refinement. This function is only allowed
+   * for active cells. Keeping the default value for @p ref_case will mark
+   * this cell for isotropic refinement.
+   */
+  void set_refine_flag (const RefinementCase<dim> ref_case = RefinementCase<dim>::isotropic_refinement) const;
+
+  /**
+   * Clear the refinement flag.
+   */
+  void clear_refine_flag () const;
+
+  /**
+   * Modify the refinement flag of the cell to ensure (at least) the given
+   * refinement case @p face_refinement_case at face <tt>face_no</tt>, taking
+   * into account orientation, flip and rotation of the face. Return, whether
+   * the refinement flag had to be modified. This function is only allowed for
+   * active cells.
+   */
+  bool flag_for_face_refinement (const unsigned int face_no,
+                                 const RefinementCase<dim-1> &face_refinement_case=RefinementCase<dim-1>::isotropic_refinement) const;
+
+  /**
+   * Modify the refinement flag of the cell to ensure that line
+   * <tt>face_no</tt> will be refined. Return, whether the refinement flag had
+   * to be modified. This function is only allowed for active cells.
+   */
+  bool flag_for_line_refinement (const unsigned int line_no) const;
+
+  /**
+   * Return the SubfaceCase of face <tt>face_no</tt>. Note that this is not
+   * identical to asking <tt>cell->face(face_no)->refinement_case()</tt> since
+   * the latter returns a RefinementCase<dim-1> and thus only considers one
+   * (anisotropic) refinement, whereas this function considers the complete
+   * refinement situation including possible refinement of the face's
+   * children. This function may only be called for active cells in 2d and 3d.
+   */
+  dealii::internal::SubfaceCase<dim> subface_case(const unsigned int face_no) const;
+
+  /**
+   * Return whether the coarsen flag is set or not.
+   */
+  bool coarsen_flag_set () const;
+
+  /**
+   * Flag the cell pointed to for coarsening. This function is only allowed
+   * for active cells.
+   */
+  void set_coarsen_flag () const;
+
+  /**
+   * Clear the coarsen flag.
+   */
+  void clear_coarsen_flag () const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with material indicators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the material id of this cell.
+   *
+   * For a typical use of this function, see the
+   * @ref step_28 "step-28"
+   * tutorial program.
+   *
+   * See the
+   * @ref GlossMaterialId "glossary"
+   * for more information.
+   */
+  types::material_id material_id () const;
+
+  /**
+   * Set the material id of this cell.
+   *
+   * For a typical use of this function, see the
+   * @ref step_28 "step-28"
+   * tutorial program.
+   *
+   * See the
+   * @ref GlossMaterialId "glossary"
+   * for more information.
+   */
+  void set_material_id (const types::material_id new_material_id) const;
+
+  /**
+   * Set the material id of this cell and all its children (and grand-
+   * children, and so on) to the given value.
+   *
+   * See the
+   * @ref GlossMaterialId "glossary"
+   * for more information.
+   */
+  void recursively_set_material_id (const types::material_id new_material_id) const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with subdomain indicators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the subdomain id of this cell.
+   *
+   * See the
+   * @ref GlossSubdomainId "glossary"
+   * for more information.
+   *
+   * @note The subdomain of a cell is a property only defined for active
+   * cells, i.e., cells that are not further refined. Consequently, you can
+   * only call this function if the cell it refers to has no children. For
+   * multigrid methods in parallel, it is also important to know which
+   * processor owns non-active cells, and for this you can call
+   * level_subdomain_id().
+   */
+  types::subdomain_id subdomain_id () const;
+
+  /**
+   * Set the subdomain id of this cell.
+   *
+   * See the
+   * @ref GlossSubdomainId "glossary"
+   * for more information. This function should not be called if you use a
+   * parallel::distributed::Triangulation object.
+   *
+   * @note The subdomain of a cell is a property only defined for active
+   * cells, i.e., cells that are not further refined. Consequently, you can
+   * only call this function if the cell it refers to has no children. For
+   * multigrid methods in parallel, it is also important to know which
+   * processor owns non-active cells, and for this you can call
+   * level_subdomain_id().
+   */
+  void set_subdomain_id (const types::subdomain_id new_subdomain_id) const;
+
+  /**
+   * Get the level subdomain id of this cell. This is used for parallel
+   * multigrid.
+   */
+  types::subdomain_id level_subdomain_id () const;
+
+  /**
+   * Set the level subdomain id of this cell. This is used for parallel
+   * multigrid.
+   */
+  void set_level_subdomain_id (const types::subdomain_id new_level_subdomain_id) const;
+
+
+  /**
+   * Set the subdomain id of this cell (if it is active) or all its terminal
+   * children (and grand-children, and so on, as long as they have no children
+   * of their own) to the given value. Since the subdomain id is a concept
+   * that is only defined for cells that are active (i.e., have no children of
+   * their own), this function only sets the subdomain ids for all children
+   * and grand children of this cell that are actually active, skipping
+   * intermediate child cells.
+   *
+   * See the
+   * @ref GlossSubdomainId "glossary"
+   * for more information. This function should not be called if you use a
+   * parallel::distributed::Triangulation object since there the subdomain id
+   * is implicitly defined by which processor you're on.
+   */
+  void recursively_set_subdomain_id (const types::subdomain_id new_subdomain_id) const;
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with codim 1 cell orientation
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Return the orientation of this cell.
+   *
+   * For the meaning of this flag, see
+   * @ref GlossDirectionFlag.
+   */
+  bool direction_flag () const;
+
+  /**
+   * Return the how many-th active cell the current cell is (assuming the
+   * current cell is indeed active). This is useful, for example, if you are
+   * accessing the elements of a vector with as many entries as there are
+   * active cells. Such vectors are used for estimating the error on each cell
+   * of a triangulation, for specifying refinement criteria passed to the
+   * functions in GridRefinement, and for generating cell-wise output.
+   *
+   * The function throws an exception if the current cell is not active.
+   *
+   * @note If the triangulation this function is called on is of type
+   * parallel::distributed::Triangulation, then active cells may be locally
+   * owned, ghost cells, or artificial (see
+   * @ref GlossLocallyOwnedCell,
+   * @ref GlossGhostCell,
+   * and
+   * @ref GlossArtificialCell).
+   * This function counts over all of them, including ghost and artificial
+   * active cells. This implies that the index returned by this function
+   * uniquely identifies a cell within the triangulation on a single
+   * processor, but does not uniquely identify the cell among the (parts of
+   * the) triangulation that is shared among processors. If you would like to
+   * identify active cells across processors, you need to consider the CellId
+   * of a cell returned by CellAccessor::id().
+   */
+  unsigned int active_cell_index () const;
+
+  /**
+   * Return the index of the parent of this cell within the level of the
+   * triangulation to which the parent cell belongs. The level of the parent
+   * is of course one lower than that of the present cell. If the parent does
+   * not exist (i.e., if the object is at the coarsest level of the mesh
+   * hierarchy), an exception is generated.
+   */
+  int parent_index () const;
+
+  /**
+   * Return an iterator to the parent. If the parent does not exist (i.e., if
+   * the object is at the coarsest level of the mesh hierarchy), an exception
+   * is generated.
+   */
+  TriaIterator<CellAccessor<dim,spacedim> >
+  parent () const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Other functions
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Test whether the cell has children (this is the criterion for activity of
+   * a cell).
+   *
+   * See the
+   * @ref GlossActive "glossary"
+   * for more information.
+   */
+  bool active () const;
+
+  /**
+   * Return whether this cell is owned by the current processor or is owned by
+   * another processor. The function always returns true if applied to an
+   * object of type dealii::Triangulation, but may yield false if the
+   * triangulation is of type parallel::distributed::Triangulation.
+   *
+   * See the
+   * @ref GlossGhostCell "glossary"
+   * and the
+   * @ref distributed
+   * module for more information.
+   *
+   * @post The returned value is equal to <code>!is_ghost() &&
+   * !is_artificial()</code>.
+   *
+   * @note Whether a cell is a ghost cell, artificial, or is locally owned or
+   * is a property that only pertains to cells that are active. Consequently,
+   * you can only call this function if the cell it refers to has no children.
+   */
+  bool is_locally_owned () const;
+
+  /**
+   * Return true if either the Triangulation is not distributed or if
+   * level_subdomain_id() is equal to the id of the current processor.
+   */
+  bool is_locally_owned_on_level () const;
+
+  /**
+   * Return whether this cell exists in the global mesh but (i) is owned by
+   * another processor, i.e. has a subdomain_id different from the one the
+   * current processor owns and (ii) is adjacent to a cell owned by the
+   * current processor.
+   *
+   * This function only makes sense if the triangulation used is of kind
+   * parallel::distributed::Triangulation. In all other cases, the returned
+   * value is always false.
+   *
+   * See the
+   * @ref GlossGhostCell "glossary"
+   * and the
+   * @ref distributed
+   * module for more information.
+   *
+   * @post The returned value is equal to <code>!is_locally_owned() &&
+   * !is_artificial()</code>.
+   *
+   * @note Whether a cell is a ghost cell, artificial, or is locally owned or
+   * is a property that only pertains to cells that are active. Consequently,
+   * you can only call this function if the cell it refers to has no children.
+   */
+  bool is_ghost () const;
+
+  /**
+   * Return whether this cell is artificial, i.e. it isn't one of the cells
+   * owned by the current processor, and it also doesn't border on one. As a
+   * consequence, it exists in the mesh to ensure that each processor has all
+   * coarse mesh cells and that the 2:1 ratio of neighboring cells is
+   * maintained, but it is not one of the cells we should work on on the
+   * current processor. In particular, there is no guarantee that this cell
+   * isn't, in fact, further refined on one of the other processors.
+   *
+   * This function only makes sense if the triangulation used is of kind
+   * parallel::distributed::Triangulation. In all other cases, the returned
+   * value is always false.
+   *
+   * See the
+   * @ref GlossArtificialCell "glossary"
+   * and the
+   * @ref distributed
+   * module for more information.
+   *
+   * @post The returned value is equal to <code>!is_ghost() &&
+   * !is_locally_owned()</code>.
+   *
+   * @note Whether a cell is a ghost cell, artificial, or is locally owned is
+   * a property that only pertains to cells that are active. Consequently, you
+   * can only call this function if the cell it refers to has no children.
+   */
+  bool is_artificial () const;
+
+  /**
+   * Test whether the point @p p is inside this cell. Points on the boundary
+   * are counted as being inside the cell.
+   *
+   * Note that this function assumes that the mapping between unit cell and
+   * real cell is (bi-, tri-)linear, i.e. that faces in 2d and edges in 3d are
+   * straight lines. If you have higher order transformations, results may be
+   * different as to whether a point is in- or outside the cell in real space.
+   *
+   * In case of codim>0, the point is first projected to the manifold where
+   * the cell is embedded and then check if this projection is inside the
+   * cell.
+   */
+  bool point_inside (const Point<spacedim> &p) const;
+
+  /**
+   * Set the neighbor @p i of this cell to the cell pointed to by @p pointer.
+   *
+   * This function shouldn't really be public (but needs to for various
+   * reasons in order not to make a long list of functions friends): it
+   * modifies internal data structures and may leave things. Do not use it
+   * from application codes.
+   */
+  void set_neighbor (const unsigned int i,
+                     const TriaIterator<CellAccessor<dim, spacedim> > &pointer) const;
+
+  /**
+   * Return a unique ID for the current cell. This ID is constructed from the
+   * path in the hierarchy from the coarse father cell and works correctly in
+   * parallel computations using objects of type
+   * parallel::distributed::Triangulation. This function is therefore useful
+   * in providing a unique identifier for cells (active or not) that also
+   * works for parallel triangulations. See the documentation of the CellId
+   * class for more information.
+   *
+   * @note This operation takes O(level) time to compute. In most practical
+   * cases, the number of levels of a triangulation will depend
+   * logarithmically on the number of cells in the triangulation.
+   */
+  CellId id() const;
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcRefineCellNotActive);
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCellFlaggedForRefinement);
+  /**
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcCellFlaggedForCoarsening);
+
+protected:
+  /**
+   * This function assumes that the neighbor is not coarser than the current
+   * cell. In this case it returns the neighbor_of_neighbor() value. If,
+   * however, the neighbor is coarser this function returns an
+   * <code>invalid_unsigned_int</code>.
+   *
+   * This function is not for public use. Use the function
+   * neighbor_of_neighbor() instead which throws an exception if called for a
+   * coarser neighbor. If neighbor is indeed coarser (you get to know this by
+   * e.g. the neighbor_is_coarser() function) then the
+   * neighbor_of_coarser_neighbor() function should be call. If you'd like to
+   * know only the <code>face_no</code> which is required to get back from the
+   * neighbor to the present cell then simply use the neighbor_face_no()
+   * function which can be used for coarser as well as non-coarser neighbors.
+   */
+  unsigned int neighbor_of_neighbor_internal (const unsigned int neighbor) const;
+
+  /**
+   * As for any codim>0 we can use a similar code and c++ does not allow
+   * partial templates. we use this auxiliary function that is then called
+   * from point_inside.
+   */
+  template<int dim_,int spacedim_ >
+  bool point_inside_codim(const Point<spacedim_> &p) const;
+
+
+
+private:
+  /**
+   * Set the active cell index of a cell. This is done at the end of
+   * refinement.
+   */
+  void set_active_cell_index (const unsigned int active_cell_index);
+
+  /**
+   * Set the parent of a cell.
+   */
+  void set_parent (const unsigned int parent_index);
+
+  /**
+   * Set the orientation of this cell.
+   *
+   * For the meaning of this flag, see
+   * @ref GlossDirectionFlag.
+   */
+  void set_direction_flag (const bool new_direction_flag) const;
+  /**
+   * Copy operator. This is normally used in a context like <tt>iterator a,b;
+   * *a=*b;</tt>. Since the meaning is to copy the object pointed to by @p b
+   * to the object pointed to by @p a and since accessors are not real but
+   * virtual objects, this operation is not useful for iterators on
+   * triangulations. We declare this function here private, thus it may not be
+   * used from outside.  Furthermore it is not implemented and will give a
+   * linker error if used anyway.
+   */
+  void operator = (const CellAccessor<dim, spacedim> &);
+
+  template <int, int> friend class Triangulation;
+
+  friend struct dealii::internal::Triangulation::Implementation;
+};
+
+
+
+/* -------------- declaration of explicit
+   specializations and general templates ------------- */
+
+
+template <int structdim, int dim, int spacedim>
+template <typename OtherAccessor>
+InvalidAccessor<structdim, dim, spacedim>::
+InvalidAccessor (const OtherAccessor &)
+{
+  Assert (false,
+          ExcMessage ("You are attempting an illegal conversion between "
+                      "iterator/accessor types. The constructor you call "
+                      "only exists to make certain template constructs "
+                      "easier to write as dimension independent code but "
+                      "the conversion is not valid in the current context."));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+TriaAccessor<structdim,dim,spacedim>::
+TriaAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &)
+{
+  Assert (false,
+          ExcMessage ("You are attempting an illegal conversion between "
+                      "iterator/accessor types. The constructor you call "
+                      "only exists to make certain template constructs "
+                      "easier to write as dimension independent code but "
+                      "the conversion is not valid in the current context."));
+}
+
+
+
+template <int dim, int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+CellAccessor<dim,spacedim>::
+CellAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &)
+{
+  Assert (false,
+          ExcMessage ("You are attempting an illegal conversion between "
+                      "iterator/accessor types. The constructor you call "
+                      "only exists to make certain template constructs "
+                      "easier to write as dimension independent code but "
+                      "the conversion is not valid in the current context."));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+TriaAccessor<structdim,dim,spacedim>::
+TriaAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &)
+{
+  Assert (false,
+          ExcMessage ("You are attempting an illegal conversion between "
+                      "iterator/accessor types. The constructor you call "
+                      "only exists to make certain template constructs "
+                      "easier to write as dimension independent code but "
+                      "the conversion is not valid in the current context."));
+}
+
+
+
+template <int dim, int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+CellAccessor<dim,spacedim>::
+CellAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &)
+{
+  Assert (false,
+          ExcMessage ("You are attempting an illegal conversion between "
+                      "iterator/accessor types. The constructor you call "
+                      "only exists to make certain template constructs "
+                      "easier to write as dimension independent code but "
+                      "the conversion is not valid in the current context."));
+}
+
+template <int dim, int spacedim>
+CellId
+CellAccessor<dim,spacedim>::id() const
+{
+  std::vector<unsigned char> id(this->level(), -1);
+  unsigned int coarse_index;
+
+  CellAccessor<dim,spacedim> ptr = *this;
+  while (ptr.level()>0)
+    {
+      // determine which child we are
+      unsigned char v=-1;
+      for (unsigned int c=0; c<ptr.parent()->n_children(); ++c)
+        {
+          if (ptr.parent()->child_index(c)==ptr.index())
+            {
+              v = c;
+              break;
+            }
+        }
+
+      Assert(v != (unsigned char)-1, ExcInternalError());
+      id[ptr.level()-1] = v;
+
+      ptr.copy_from( *(ptr.parent()));
+    }
+
+  Assert(ptr.level()==0, ExcInternalError());
+  coarse_index = ptr.index();
+
+  return CellId(coarse_index, id);
+}
+
+
+#ifndef DOXYGEN
+
+template <> bool CellAccessor<1,1>::point_inside (const Point<1> &) const;
+template <> bool CellAccessor<2,2>::point_inside (const Point<2> &) const;
+template <> bool CellAccessor<3,3>::point_inside (const Point<3> &) const;
+template <> bool CellAccessor<1,2>::point_inside (const Point<2> &) const;
+template <> bool CellAccessor<1,3>::point_inside (const Point<3> &) const;
+template <> bool CellAccessor<2,3>::point_inside (const Point<3> &) const;
+// -------------------------------------------------------------------
+
+template <> void TriaAccessor<3,3,3>::set_all_manifold_ids (const types::manifold_id) const;
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+// include more templates in debug and optimized mode
+#  include "tria_accessor.templates.h"
+
+#endif
diff --git a/include/deal.II/grid/tria_accessor.templates.h b/include/deal.II/grid/tria_accessor.templates.h
new file mode 100644
index 0000000..031e86d
--- /dev/null
+++ b/include/deal.II/grid/tria_accessor.templates.h
@@ -0,0 +1,3559 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_accessor_templates_h
+#define dealii__tria_accessor_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_levels.h>
+#include <deal.II/grid/tria_faces.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.templates.h>
+#include <deal.II/distributed/tria_base.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace parallel
+{
+  template <int, int> class Triangulation;
+
+  namespace distributed
+  {
+    template <int, int> class Triangulation;
+  }
+
+  namespace shared
+  {
+    template <int, int> class Triangulation;
+  }
+}
+
+
+/*------------------------ Functions: TriaAccessorBase ---------------------------*/
+
+template <int structdim, int dim, int spacedim>
+inline
+TriaAccessorBase<structdim,dim,spacedim>::TriaAccessorBase (
+  const Triangulation<dim,spacedim> *tria,
+  const int                          level,
+  const int                          index,
+  const AccessorData *)
+  :
+  present_level((structdim==dim) ? level : 0),
+  present_index (index),
+  tria (tria)
+{
+
+  // non-cells have no level, so a 0
+  // should have been passed, or a -1
+  // for an end-iterator, or -2 for
+  // an invalid (default constructed)
+  // iterator
+  if (structdim != dim)
+    {
+      Assert ((level == 0) || (level == -1) || (level == -2),
+              ExcInternalError());
+    }
+}
+
+
+template <int structdim, int dim, int spacedim>
+inline
+TriaAccessorBase<structdim,dim,spacedim>::TriaAccessorBase (const TriaAccessorBase<structdim,dim,spacedim> &a)
+  :
+  present_level(a.present_level),
+  present_index(a.present_index),
+  tria(a.tria)
+{}
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessorBase<structdim,dim,spacedim>::copy_from (const TriaAccessorBase<structdim,dim,spacedim> &a)
+{
+  present_level = a.present_level;
+  present_index = a.present_index;
+  tria = a.tria;
+
+  if (structdim != dim)
+    {
+      Assert ((present_level == 0) || (present_level == -1) || (present_level == -2),
+              ExcInternalError());
+    }
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+TriaAccessorBase<structdim,dim,spacedim> &
+TriaAccessorBase<structdim,dim,spacedim>::operator= (const TriaAccessorBase<structdim,dim,spacedim> &a)
+{
+  present_level = a.present_level;
+  present_index = a.present_index;
+  tria = a.tria;
+
+  if (structdim != dim)
+    {
+      Assert ((present_level == 0) || (present_level == -1) || (present_level == -2),
+              ExcInternalError());
+    }
+  return *this;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessorBase<structdim,dim,spacedim>::operator == (const TriaAccessorBase<structdim,dim,spacedim> &a) const
+{
+  Assert (tria == a.tria || tria == 0 || a.tria == 0,
+          TriaAccessorExceptions::ExcCantCompareIterators());
+  return ((present_level == a.present_level) &&
+          (present_index == a.present_index));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessorBase<structdim,dim,spacedim>::operator != (const TriaAccessorBase<structdim,dim,spacedim> &a) const
+{
+  Assert (tria == a.tria || tria == 0 || a.tria == 0,
+          TriaAccessorExceptions::ExcCantCompareIterators());
+  return ((present_level != a.present_level) ||
+          (present_index != a.present_index));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessorBase<structdim,dim,spacedim>::operator < (const TriaAccessorBase<structdim,dim,spacedim> &other) const
+{
+  Assert (tria == other.tria, TriaAccessorExceptions::ExcCantCompareIterators());
+
+  if (present_level != other.present_level)
+    return (present_level < other.present_level);
+
+  return (present_index < other.present_index);
+
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+int
+TriaAccessorBase<structdim,dim,spacedim>::level () const
+{
+  // This is always zero or invalid
+  // if the object is not a cell
+  return present_level;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+int
+TriaAccessorBase<structdim,dim,spacedim>::index () const
+{
+  return present_index;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+IteratorState::IteratorStates
+TriaAccessorBase<structdim,dim,spacedim>::state () const
+{
+  if ((present_level>=0) && (present_index>=0))
+    return IteratorState::valid;
+  else if (present_index==-1)
+    return IteratorState::past_the_end;
+  else
+    return IteratorState::invalid;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+const Triangulation<dim,spacedim> &
+TriaAccessorBase<structdim,dim,spacedim>::get_triangulation () const
+{
+  return *tria;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessorBase<structdim,dim,spacedim>::operator ++ ()
+{
+  // this iterator is used for
+  // objects without level
+  ++this->present_index;
+
+  if (structdim != dim)
+    {
+      // is index still in the range of
+      // the vector? (note that we don't
+      // have to set the level, since
+      // dim!=1 and the object therefore
+      // has no level)
+      if (this->present_index
+          >=
+          static_cast<int>(objects().cells.size()))
+        this->present_index = -1;
+    }
+  else
+    {
+      while (this->present_index
+             >=
+             static_cast<int>(this->tria->levels[this->present_level]->cells.cells.size()))
+        {
+          // no -> go one level up until we find
+          // one with more than zero cells
+          ++this->present_level;
+          this->present_index = 0;
+          // highest level reached?
+          if (this->present_level >= static_cast<int>(this->tria->levels.size()))
+            {
+              // return with past the end pointer
+              this->present_level = this->present_index = -1;
+              return;
+            }
+        }
+    }
+}
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessorBase<structdim,dim,spacedim>::operator -- ()
+{
+  // same as operator++
+  --this->present_index;
+
+  if (structdim != dim)
+    {
+      if (this->present_index < 0)
+        this->present_index = -1;
+    }
+  else
+    {
+      while (this->present_index < 0)
+        {
+          // no -> go one level down
+          --this->present_level;
+          // lowest level reached?
+          if (this->present_level == -1)
+            {
+              // return with past the end pointer
+              this->present_level = this->present_index = -1;
+              return;
+            }
+          // else
+          this->present_index = this->tria->levels[this->present_level]->cells.cells.size()-1;
+        }
+    }
+}
+
+
+namespace internal
+{
+  namespace TriaAccessorBase
+  {
+    /**
+     * Out of a face object, get the sub-objects of dimensionality given by
+     * the last argument.
+     */
+    template <int dim>
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<1> > *
+    get_objects (dealii::internal::Triangulation::TriaFaces<dim> *faces,
+                 const dealii::internal::int2type<1>)
+    {
+      return &faces->lines;
+    }
+
+
+    template <int dim>
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<2> > *
+    get_objects (dealii::internal::Triangulation::TriaFaces<dim> *faces,
+                 const dealii::internal::int2type<2>)
+    {
+      return &faces->quads;
+    }
+
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<1> > *
+    get_objects (dealii::internal::Triangulation::TriaFaces<1> *,
+                 const dealii::internal::int2type<1>)
+    {
+      Assert (false, ExcInternalError());
+      return 0;
+    }
+
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<2> > *
+    get_objects (dealii::internal::Triangulation::TriaFaces<2> *,
+                 const dealii::internal::int2type<2>)
+    {
+      Assert (false, ExcInternalError());
+      return 0;
+    }
+
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<3> > *
+    get_objects (dealii::internal::Triangulation::TriaFaces<3> *,
+                 const dealii::internal::int2type<3>)
+    {
+      Assert (false, ExcInternalError());
+      return 0;
+    }
+
+    /**
+     * This function should never be used, but we need it for the template
+     * instantiation of TriaAccessorBase<dim,dim,spacedim>::objects() const
+     */
+    template <int dim>
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<3> > *
+    get_objects (dealii::internal::Triangulation::TriaFaces<dim> *,
+                 const dealii::internal::int2type<3>)
+    {
+      Assert (false, ExcInternalError());
+      return 0;
+    }
+
+    /**
+     * Copy the above functions for cell objects.
+     */
+    template <int structdim, int dim>
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<structdim> > *
+    get_objects (dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<dim> > *,
+                 const dealii::internal::int2type<structdim>)
+    {
+      Assert (false, ExcInternalError());
+      return 0;
+    }
+
+    template <int dim>
+    inline
+    dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<dim> > *
+    get_objects (dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<dim> > *cells,
+                 const dealii::internal::int2type<dim>)
+    {
+      return cells;
+    }
+  }
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+dealii::internal::Triangulation::TriaObjects<dealii::internal::Triangulation::TriaObject<structdim> > &
+TriaAccessorBase<structdim,dim,spacedim>::objects() const
+{
+  if (structdim != dim)
+    // get sub-objects. note that the
+    // current class is only used for
+    // objects that are *not* cells
+    return *dealii::internal::TriaAccessorBase::get_objects (this->tria->faces,
+                                                             dealii::internal::int2type<structdim> ());
+  else
+    return *dealii::internal::TriaAccessorBase::get_objects (&this->tria->levels[this->present_level]->cells,
+                                                             dealii::internal::int2type<structdim> ());
+}
+
+
+
+/*------------------------ Functions: InvalidAccessor ---------------------------*/
+
+template <int structdim, int dim, int spacedim>
+InvalidAccessor<structdim, dim, spacedim>::
+InvalidAccessor (const Triangulation<dim,spacedim> *,
+                 const int                 ,
+                 const int                 ,
+                 const AccessorData *)
+{
+  Assert (false,
+          ExcMessage ("You are attempting an illegal conversion between "
+                      "iterator/accessor types. The constructor you call "
+                      "only exists to make certain template constructs "
+                      "easier to write as dimension independent code but "
+                      "the conversion is not valid in the current context."));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+InvalidAccessor<structdim, dim, spacedim>::
+InvalidAccessor (const InvalidAccessor &i)
+  :
+  TriaAccessorBase<structdim,dim,spacedim> (static_cast<const TriaAccessorBase<structdim,dim,spacedim>&>(i))
+{
+  Assert (false,
+          ExcMessage ("You are attempting an illegal conversion between "
+                      "iterator/accessor types. The constructor you call "
+                      "only exists to make certain template constructs "
+                      "easier to write as dimension independent code but "
+                      "the conversion is not valid in the current context."));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+InvalidAccessor<structdim, dim, spacedim>::
+copy_from (const InvalidAccessor &)
+{
+  // nothing to do here. we could
+  // throw an exception but we can't
+  // get here without first creating
+  // an object which would have
+  // already thrown
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+bool
+InvalidAccessor<structdim, dim, spacedim>::
+operator == (const InvalidAccessor &) const
+{
+  // nothing to do here. we could
+  // throw an exception but we can't
+  // get here without first creating
+  // an object which would have
+  // already thrown
+  return false;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+bool
+InvalidAccessor<structdim, dim, spacedim>::
+operator != (const InvalidAccessor &) const
+{
+  // nothing to do here. we could
+  // throw an exception but we can't
+  // get here without first creating
+  // an object which would have
+  // already thrown
+  return true;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+bool
+InvalidAccessor<structdim, dim, spacedim>::used () const
+{
+  // nothing to do here. we could
+  // throw an exception but we can't
+  // get here without first creating
+  // an object which would have
+  // already thrown
+  return false;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+bool
+InvalidAccessor<structdim, dim, spacedim>::has_children () const
+{
+  // nothing to do here. we could
+  // throw an exception but we can't
+  // get here without first creating
+  // an object which would have
+  // already thrown
+  return false;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+InvalidAccessor<structdim, dim, spacedim>::operator ++ () const
+{}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+InvalidAccessor<structdim, dim, spacedim>::operator -- () const
+{}
+
+
+
+/*------------------------ Functions: TriaAccessor ---------------------------*/
+
+
+namespace internal
+{
+  namespace TriaAccessor
+  {
+    // make sure that if in the following we
+    // write TriaAccessor
+    // we mean the *class*
+    // dealii::TriaAccessor, not the
+    // enclosing namespace
+    // dealii::internal::TriaAccessor
+    using dealii::TriaAccessor;
+
+    /**
+     * A class with the same purpose as the similarly named class of the
+     * Triangulation class. See there for more information.
+     */
+    struct Implementation
+    {
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int dim, int spacedim>
+      static
+      unsigned int
+      line_index (const TriaAccessor<1, dim, spacedim> &,
+                  const unsigned int)
+      {
+        Assert (false,
+                ExcMessage ("You can't ask for the index of a line bounding "
+                            "a one-dimensional cell because it is not "
+                            "bounded by lines."));
+        return numbers::invalid_unsigned_int;
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      unsigned int
+      line_index (const TriaAccessor<2, dim, spacedim> &accessor,
+                  const unsigned int i)
+      {
+        return accessor.objects().cells[accessor.present_index].face(i);
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      unsigned int
+      line_index (const TriaAccessor<3, dim, spacedim> &accessor,
+                  const unsigned int i)
+      {
+        // get the line index by asking the
+        // quads. first assume standard orientation
+        //
+        // set up a table that for each
+        // line describes a) from which
+        // quad to take it, b) which line
+        // therein it is if the face is
+        // oriented correctly
+        static const unsigned int lookup_table[12][2] =
+        {
+          { 4, 0 }, // take first four lines from bottom face
+          { 4, 1 },
+          { 4, 2 },
+          { 4, 3 },
+
+          { 5, 0 }, // second four lines from top face
+          { 5, 1 },
+          { 5, 2 },
+          { 5, 3 },
+
+          { 0, 0 }, // the rest randomly
+          { 1, 0 },
+          { 0, 1 },
+          { 1, 1 }
+        };
+
+        // respect non-standard faces by calling the
+        // reordering function from GeometryInfo
+
+        const unsigned int quad_index=lookup_table[i][0];
+        const unsigned int std_line_index=lookup_table[i][1];
+
+        const unsigned int line_index=GeometryInfo<dim>::standard_to_real_face_line(
+                                        std_line_index,
+                                        accessor.face_orientation(quad_index),
+                                        accessor.face_flip(quad_index),
+                                        accessor.face_rotation(quad_index));
+
+        return (accessor.quad(quad_index)->line_index(line_index));
+      }
+
+
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int structdim, int dim, int spacedim>
+      static
+      unsigned int
+      quad_index (const TriaAccessor<structdim, dim, spacedim> &,
+                  const unsigned int)
+      {
+        Assert (false,
+                ExcMessage ("You can't ask for the index of a quad bounding "
+                            "a one- or two-dimensional cell because it is not "
+                            "bounded by quads."));
+        return numbers::invalid_unsigned_int;
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      unsigned int
+      quad_index (const TriaAccessor<3, dim, spacedim> &accessor,
+                  const unsigned int i)
+      {
+        Assert (i<GeometryInfo<3>::quads_per_cell,
+                ExcIndexRange(i,0,GeometryInfo<3>::quads_per_cell));
+        return accessor.tria->levels[accessor.present_level]
+               ->cells.cells[accessor.present_index].face(i);
+      }
+
+
+
+      /**
+       * Implementation of the function of some name in the mother class
+       */
+      template <int structdim, int dim, int spacedim>
+      static
+      bool
+      face_orientation (const TriaAccessor<structdim, dim, spacedim> &,
+                        const unsigned int)
+      {
+        /*
+         * Default implementation used in 1d and 2d
+         *
+         * In 1d and 2d, face_orientation is always true
+         */
+
+        return true;
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      bool
+      face_orientation (const TriaAccessor<3, dim, spacedim> &accessor,
+                        const unsigned int face)
+      {
+        return (accessor.tria->levels[accessor.present_level]
+                ->cells.face_orientation(accessor.present_index, face));
+      }
+
+
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int structdim, int dim, int spacedim>
+      static
+      bool
+      face_flip (const TriaAccessor<structdim, dim, spacedim> &,
+                 const unsigned int)
+      {
+        /*
+         * Default implementation used in 1d and 2d
+         *
+         * In 1d, face_flip is always false as there is no such concept as
+         * "flipped" faces in 1d.
+         *
+         * In 2d, we currently only support meshes where all faces are in
+         * standard orientation, so the result is also false. This also
+         * matches the fact that one can *always* orient faces in 2d in such a
+         * way that the don't need to be flipped
+         */
+        return false;
+
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      bool
+      face_flip (const TriaAccessor<3, dim, spacedim> &accessor,
+                 const unsigned int face)
+      {
+        Assert (face<GeometryInfo<3>::faces_per_cell,
+                ExcIndexRange (face, 0, GeometryInfo<3>::faces_per_cell));
+        Assert (accessor.present_index * GeometryInfo<3>::faces_per_cell + face
+                < accessor.tria->levels[accessor.present_level]
+                ->cells.face_flips.size(),
+                ExcInternalError());
+
+        return (accessor.tria->levels[accessor.present_level]
+                ->cells.face_flips[accessor.present_index *
+                                   GeometryInfo<3>::faces_per_cell
+                                   + face]);
+      }
+
+
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int structdim, int dim, int spacedim>
+      static
+      bool
+      face_rotation (const TriaAccessor<structdim, dim, spacedim> &,
+                     const unsigned int)
+      {
+        /*
+         * Default implementation used in 1d and 2d
+         *
+         * In 1d and 2d, face_rotation is always false as there is no such
+         * concept as "rotated" faces in 1d and 2d.
+         */
+        return false;
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      bool
+      face_rotation (const TriaAccessor<3, dim, spacedim> &accessor,
+                     const unsigned int face)
+      {
+        Assert (face<GeometryInfo<3>::faces_per_cell,
+                ExcIndexRange (face, 0, GeometryInfo<3>::faces_per_cell));
+        Assert (accessor.present_index * GeometryInfo<3>::faces_per_cell + face
+                < accessor.tria->levels[accessor.present_level]
+                ->cells.face_rotations.size(),
+                ExcInternalError());
+
+        return (accessor.tria->levels[accessor.present_level]
+                ->cells.face_rotations[accessor.present_index *
+                                       GeometryInfo<3>::faces_per_cell
+                                       + face]);
+      }
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int dim, int spacedim>
+      static
+      bool
+      line_orientation (const TriaAccessor<1, dim, spacedim> &,
+                        const unsigned int)
+      {
+        return true;
+      }
+
+
+      template <int spacedim>
+      static
+      bool
+      line_orientation (const TriaAccessor<2, 2, spacedim> &,
+                        const unsigned int)
+      {
+        // quads in 2d have no non-standard orientation
+        return true;
+      }
+
+
+      template <int spacedim>
+      static
+      bool
+      line_orientation (const TriaAccessor<2, 3, spacedim> &accessor,
+                        const unsigned int line)
+      {
+        // quads as part of 3d hexes can have non-standard orientation
+
+        //TODO: why is this face_orientation, not line_orientation as in the setter function?
+        return accessor.tria->faces->quads.face_orientation(accessor.present_index, line);
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      bool
+      line_orientation (const TriaAccessor<3, dim, spacedim> &accessor,
+                        const unsigned int line)
+      {
+        Assert (accessor.used(), TriaAccessorExceptions::ExcCellNotUsed());
+        Assert (line<GeometryInfo<3>::lines_per_cell,
+                ExcIndexRange (line, 0, GeometryInfo<3>::lines_per_cell));
+
+        // get the line index by asking the
+        // quads. first assume standard orientation
+        //
+        // set up a table that for each
+        // line describes a) from which
+        // quad to take it, b) which line
+        // therein it is if the face is
+        // oriented correctly
+        static const unsigned int lookup_table[12][2] =
+        {
+          { 4, 0 }, // take first four lines from bottom face
+          { 4, 1 },
+          { 4, 2 },
+          { 4, 3 },
+
+          { 5, 0 }, // second four lines from top face
+          { 5, 1 },
+          { 5, 2 },
+          { 5, 3 },
+
+          { 0, 0 }, // the rest randomly
+          { 1, 0 },
+          { 0, 1 },
+          { 1, 1 }
+        };
+
+        const unsigned int quad_index=lookup_table[line][0];
+        const unsigned int std_line_index=lookup_table[line][1];
+
+        const unsigned int line_index=GeometryInfo<dim>::standard_to_real_face_line(
+                                        std_line_index,
+                                        accessor.face_orientation(quad_index),
+                                        accessor.face_flip(quad_index),
+                                        accessor.face_rotation(quad_index));
+
+        // now we got to the correct line and ask
+        // the quad for its line_orientation. however, if
+        // the face is rotated, it might be possible,
+        // that a standard orientation of the line
+        // with respect to the face corresponds to a
+        // non-standard orientation for the line with
+        // respect to the cell.
+        //
+        // set up a table indicating if the two
+        // standard orientations coincide
+        //
+        // first index: two pairs of lines 0(lines
+        // 0/1) and 1(lines 2/3)
+        //
+        // second index: face_orientation; 0:
+        // opposite normal, 1: standard
+        //
+        // third index: face_flip; 0: standard, 1:
+        // face rotated by 180 degrees
+        //
+        // forth index: face_rotation: 0: standard,
+        // 1: face rotated by 90 degrees
+
+        static const bool bool_table[2][2][2][2] =
+        {
+          { { { true, false },   // lines 0/1, face_orientation=false, face_flip=false, face_rotation=false and true
+              { false, true }
+            },  // lines 0/1, face_orientation=false, face_flip=true, face_rotation=false and true
+            { { true, true },    // lines 0/1, face_orientation=true, face_flip=false, face_rotation=false and true
+              { false, false }
+            }
+          },// lines 0/1, face_orientation=true, face_flip=true, face_rotation=false and true
+
+          { { { true, true },    // lines 2/3 ...
+              { false, false }
+            },
+            { { true, false },
+              { false, true }
+            }
+          }
+        };
+
+
+        return (accessor.quad(quad_index)
+                ->line_orientation(line_index)
+                == bool_table[std_line_index/2]
+                [accessor.face_orientation(quad_index)]
+                [accessor.face_flip(quad_index)]
+                [accessor.face_rotation(quad_index)]);
+      }
+
+
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int structdim, int dim, int spacedim>
+      static
+      void
+      set_face_orientation (const TriaAccessor<structdim, dim, spacedim> &,
+                            const unsigned int,
+                            const bool)
+      {
+        Assert (false, ExcInternalError());
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      void
+      set_face_orientation (const TriaAccessor<3, dim, spacedim> &accessor,
+                            const unsigned int face,
+                            const bool value)
+      {
+        Assert (accessor.used(), TriaAccessorExceptions::ExcCellNotUsed());
+        Assert (face<GeometryInfo<3>::faces_per_cell,
+                ExcIndexRange (face, 0, GeometryInfo<3>::faces_per_cell));
+        Assert (accessor.present_index * GeometryInfo<3>::faces_per_cell + face
+                < accessor.tria->levels[accessor.present_level]
+                ->cells.face_orientations.size(),
+                ExcInternalError());
+        accessor.tria->levels[accessor.present_level]
+        ->cells.face_orientations[accessor.present_index *
+                                  GeometryInfo<3>::faces_per_cell
+                                  +
+                                  face] = value;
+      }
+
+
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int structdim, int dim, int spacedim>
+      static
+      void
+      set_face_flip (const TriaAccessor<structdim, dim, spacedim> &,
+                     const unsigned int,
+                     const bool)
+      {
+        Assert (false, ExcInternalError());
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      void
+      set_face_flip (const TriaAccessor<3, dim, spacedim> &accessor,
+                     const unsigned int face,
+                     const bool value)
+      {
+        Assert (face<GeometryInfo<3>::faces_per_cell,
+                ExcIndexRange (face, 0, GeometryInfo<3>::faces_per_cell));
+        Assert (accessor.present_index * GeometryInfo<3>::faces_per_cell + face
+                < accessor.tria->levels[accessor.present_level]
+                ->cells.face_flips.size(),
+                ExcInternalError());
+
+        accessor.tria->levels[accessor.present_level]
+        ->cells.face_flips[accessor.present_index *
+                           GeometryInfo<3>::faces_per_cell
+                           + face] = value;
+      }
+
+
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int structdim, int dim, int spacedim>
+      static
+      void
+      set_face_rotation (const TriaAccessor<structdim, dim, spacedim> &,
+                         const unsigned int,
+                         const bool)
+      {
+        Assert (false, ExcInternalError());
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      void
+      set_face_rotation (const TriaAccessor<3, dim, spacedim> &accessor,
+                         const unsigned int face,
+                         const bool value)
+      {
+        Assert (face<GeometryInfo<3>::faces_per_cell,
+                ExcIndexRange (face, 0, GeometryInfo<3>::faces_per_cell));
+        Assert (accessor.present_index * GeometryInfo<3>::faces_per_cell + face
+                < accessor.tria->levels[accessor.present_level]
+                ->cells.face_rotations.size(),
+                ExcInternalError());
+
+        accessor.tria->levels[accessor.present_level]
+        ->cells.face_rotations[accessor.present_index *
+                               GeometryInfo<3>::faces_per_cell
+                               + face] = value;
+      }
+
+      /**
+       * Implementation of the function of some name in the mother class.
+       */
+      template <int dim, int spacedim>
+      static
+      void
+      set_line_orientation (const TriaAccessor<1, dim, spacedim> &,
+                            const unsigned int,
+                            const bool)
+      {
+        Assert (false, ExcInternalError());
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_line_orientation (const TriaAccessor<2, 2, spacedim> &,
+                            const unsigned int,
+                            const bool)
+      {
+        // quads in 2d have no
+        // non-standard orientation
+        Assert (false, ExcInternalError());
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      set_line_orientation (const TriaAccessor<2, 3, spacedim> &accessor,
+                            const unsigned int line,
+                            const bool value)
+      {
+        Assert (accessor.used(), TriaAccessorExceptions::ExcCellNotUsed());
+        Assert (line<GeometryInfo<3>::lines_per_face,
+                ExcIndexRange (line, 0, GeometryInfo<3>::lines_per_face));
+        Assert (accessor.present_index * GeometryInfo<3>::lines_per_face + line
+                < accessor.tria->faces->quads.line_orientations.size(),
+                ExcInternalError());
+        // quads as part of 3d hexes
+        // can have non-standard
+        // orientation
+        accessor.tria->faces->quads.line_orientations[accessor.present_index *
+                                                      GeometryInfo<3>::lines_per_face
+                                                      + line]
+          = value;
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      void
+      set_line_orientation (const TriaAccessor<3, dim, spacedim> &,
+                            const unsigned int,
+                            const bool)
+      {
+        // it seems like we don't need this
+        // one
+        Assert (false, ExcNotImplemented());
+      }
+
+
+      /**
+       * Implementation of the function of same name in the enclosing class.
+       */
+      template <int dim, int spacedim>
+      static
+      unsigned int
+      vertex_index (const TriaAccessor<1,dim,spacedim> &accessor,
+                    const unsigned int corner)
+      {
+        return accessor.objects().cells[accessor.present_index].face (corner);
+      }
+
+
+      template <int dim, int spacedim>
+      static
+      unsigned int
+      vertex_index (const TriaAccessor<2,dim,spacedim> &accessor,
+                    const unsigned int corner)
+      {
+        // table used to switch the vertices, if the
+        // line orientation is wrong,
+        //
+        // first index: line orientation 0: false or
+        // 1: true=standard
+        //
+        // second index: vertex index to be switched
+        // (or not)
+
+        static const unsigned int switch_table[2][2]= {{1,0},{0,1}};
+
+        return accessor.line(corner%2)
+               ->vertex_index(switch_table[accessor.line_orientation(corner%2)][corner/2]);
+      }
+
+
+
+      template <int dim, int spacedim>
+      static
+      unsigned int
+      vertex_index (const TriaAccessor<3,dim,spacedim> &accessor,
+                    const unsigned int corner)
+      {
+        // get the corner indices by asking either
+        // the bottom or the top face for its
+        // vertices. handle non-standard faces by
+        // calling the vertex reordering function
+        // from GeometryInfo
+
+        // bottom face (4) for first four vertices,
+        // top face (5) for the rest
+        const unsigned int face_index=4+corner/4;
+
+        return accessor.quad(face_index)
+               ->vertex_index(GeometryInfo<dim>
+                              ::standard_to_real_face_vertex(corner%4,
+                                                             accessor.face_orientation(face_index),
+                                                             accessor.face_flip(face_index),
+                                                             accessor.face_rotation(face_index)));
+      }
+    };
+  }
+}
+
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+TriaAccessor<structdim, dim, spacedim>::
+TriaAccessor (const Triangulation<dim,spacedim> *parent,
+              const int                 level,
+              const int                 index,
+              const AccessorData       *local_data)
+  :
+  TriaAccessorBase<structdim,dim,spacedim> (parent, level, index, local_data)
+{}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessor<structdim,dim,spacedim>::used () const
+{
+  Assert (this->state() == IteratorState::valid,
+          TriaAccessorExceptions::ExcDereferenceInvalidObject());
+  return this->objects().used[this->present_index];
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+typename dealii::internal::Triangulation::Iterators<dim,spacedim>::vertex_iterator
+TriaAccessor<structdim,dim,spacedim>::vertex_iterator (const unsigned int i) const
+{
+  return typename dealii::internal::Triangulation::Iterators<dim,spacedim>::vertex_iterator
+         (this->tria, 0, vertex_index (i));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+unsigned int
+TriaAccessor<structdim, dim, spacedim>::
+vertex_index (const unsigned int corner) const
+{
+  Assert (corner<GeometryInfo<structdim>::vertices_per_cell,
+          ExcIndexRange(corner,0,GeometryInfo<structdim>::vertices_per_cell));
+
+  return dealii::internal::TriaAccessor::Implementation::vertex_index (*this, corner);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+Point<spacedim> &
+TriaAccessor<structdim, dim, spacedim>::vertex (const unsigned int i) const
+{
+  return const_cast<Point<spacedim> &> (this->tria->vertices[vertex_index(i)]);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+typename dealii::internal::Triangulation::Iterators<dim,spacedim>::line_iterator
+TriaAccessor<structdim,dim,spacedim>::line (const unsigned int i) const
+{
+  // checks happen in line_index
+  return typename dealii::internal::Triangulation::Iterators<dim,spacedim>::line_iterator
+         (this->tria, 0, line_index (i));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+unsigned int
+TriaAccessor<structdim,dim,spacedim>::line_index (const unsigned int i) const
+{
+  Assert (i < GeometryInfo<structdim>::lines_per_cell,
+          ExcIndexRange (i, 0, GeometryInfo<structdim>::lines_per_cell));
+
+  return dealii::internal::TriaAccessor::Implementation::line_index (*this, i);
+}
+
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+typename dealii::internal::Triangulation::Iterators<dim,spacedim>::quad_iterator
+TriaAccessor<structdim,dim,spacedim>::quad (const unsigned int i) const
+{
+  // checks happen in quad_index
+  return typename dealii::internal::Triangulation::Iterators<dim,spacedim>::quad_iterator
+         (this->tria, 0, quad_index (i));
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+unsigned int
+TriaAccessor<structdim,dim,spacedim>::quad_index (const unsigned int i) const
+{
+  return dealii::internal::TriaAccessor::Implementation::quad_index (*this, i);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessor<structdim,dim,spacedim>::face_orientation (const unsigned int face) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  return dealii::internal::TriaAccessor::Implementation::face_orientation (*this, face);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessor<structdim,dim,spacedim>::face_flip (const unsigned int face) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  return dealii::internal::TriaAccessor::Implementation::face_flip (*this, face);
+}
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessor<structdim,dim,spacedim>::face_rotation (const unsigned int face) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  return dealii::internal::TriaAccessor::Implementation::face_rotation (*this, face);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessor<structdim,dim,spacedim>::line_orientation (const unsigned int line) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (line<GeometryInfo<structdim>::lines_per_cell,
+          ExcIndexRange (line, 0, GeometryInfo<structdim>::lines_per_cell));
+
+  return dealii::internal::TriaAccessor::Implementation::line_orientation (*this, line);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim,dim,spacedim>::set_face_orientation (const unsigned int face,
+                                                            const bool value) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  dealii::internal::TriaAccessor::Implementation::set_face_orientation (*this, face, value);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim,dim,spacedim>::set_face_flip (const unsigned int face,
+                                                     const bool value) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  dealii::internal::TriaAccessor::Implementation::set_face_flip (*this, face, value);
+}
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim,dim,spacedim>::set_face_rotation (const unsigned int face,
+                                                         const bool value) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  dealii::internal::TriaAccessor::Implementation::set_face_rotation (*this, face, value);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim,dim,spacedim>::set_line_orientation (const unsigned int line,
+                                                            const bool value) const
+{
+  Assert (used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (line<GeometryInfo<structdim>::lines_per_cell,
+          ExcIndexRange (line, 0, GeometryInfo<structdim>::lines_per_cell));
+
+  dealii::internal::TriaAccessor::Implementation::set_line_orientation (*this, line, value);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim, dim, spacedim>::set_used_flag () const
+{
+  Assert (this->state() == IteratorState::valid,
+          TriaAccessorExceptions::ExcDereferenceInvalidObject());
+  this->objects().used[this->present_index] = true;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::clear_used_flag () const
+{
+  Assert (this->state() == IteratorState::valid,
+          TriaAccessorExceptions::ExcDereferenceInvalidObject());
+  this->objects().used[this->present_index] = false;
+}
+
+
+template <int structdim, int dim, int spacedim>
+int
+TriaAccessor<structdim, dim, spacedim>::
+child_index (const unsigned int i) const
+{
+  Assert (has_children(), TriaAccessorExceptions::ExcCellHasNoChildren());
+  Assert (i<n_children(),
+          ExcIndexRange (i, 0, n_children()));
+
+  // each set of two children are stored
+  // consecutively, so we only have to find
+  // the location of the set of children
+  const unsigned int n_sets_of_two = GeometryInfo<structdim>::max_children_per_cell/2;
+  return this->objects().children[n_sets_of_two*this->present_index+i/2]+i%2;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+int
+TriaAccessor<structdim, dim, spacedim>::
+isotropic_child_index (const unsigned int i) const
+{
+  Assert (i<GeometryInfo<structdim>::max_children_per_cell,
+          ExcIndexRange (i, 0, GeometryInfo<structdim>::max_children_per_cell));
+
+  switch (structdim)
+    {
+    case 1:
+      return child_index (i);
+    case 2:
+    {
+      const RefinementCase<2>
+      this_refinement_case (static_cast<unsigned char>(refinement_case()));
+
+      Assert (this_refinement_case != RefinementCase<2>::no_refinement,
+              TriaAccessorExceptions::ExcCellHasNoChildren());
+
+      if (this_refinement_case == RefinementCase<2>::cut_xy)
+        return child_index(i);
+      else if ((this_refinement_case == RefinementCase<2>::cut_x)
+               &&
+               (child(i%2)->refinement_case()==RefinementCase<2>::cut_y))
+        return child(i%2)->child_index(i/2);
+      else if ((this_refinement_case == RefinementCase<2>::cut_y)
+               &&
+               (child(i/2)->refinement_case()==RefinementCase<2>::cut_x))
+        return child(i/2)->child_index(i%2);
+      else
+        Assert(false,
+               ExcMessage("This cell has no grandchildren equivalent to isotropic refinement"));
+    }
+
+    case 3:
+      Assert (false, ExcNotImplemented());
+    }
+  return -1;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+RefinementCase<structdim>
+TriaAccessor<structdim, dim, spacedim>::refinement_case() const
+{
+  Assert (this->state() == IteratorState::valid,
+          TriaAccessorExceptions::ExcDereferenceInvalidObject());
+
+  switch (structdim)
+    {
+    case 1:
+      return (RefinementCase<structdim>
+              (this->objects().children[this->present_index] != -1
+               ?
+               // cast the branches
+               // here first to uchar
+               // and then (above) to
+               // RefinementCase<structdim>
+               // so that the
+               // conversion is valid
+               // even for the case
+               // structdim>1 (for
+               // which this part of
+               // the code is dead
+               // anyway)
+               static_cast<unsigned char>(RefinementCase<1>::cut_x) :
+               static_cast<unsigned char>(RefinementCase<1>::no_refinement)));
+
+    default:
+      Assert (static_cast<unsigned int> (this->present_index) <
+              this->objects().refinement_cases.size(),
+              ExcIndexRange(this->present_index, 0,
+                            this->objects().refinement_cases.size()));
+
+      return (static_cast<RefinementCase<structdim> >
+              (this->objects().refinement_cases[this->present_index]));
+    }
+}
+
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+TriaIterator<TriaAccessor<structdim,dim,spacedim> >
+TriaAccessor<structdim,dim,spacedim>::child (const unsigned int i) const
+
+{
+  // checking of 'i' happens in child_index
+  const TriaIterator<TriaAccessor<structdim,dim,spacedim> >
+  q (this->tria,
+     (dim == structdim ? this->level() + 1 : 0),
+     child_index (i));
+
+  Assert ((q.state() == IteratorState::past_the_end) || q->used(),
+          TriaAccessorExceptions::ExcUnusedCellAsChild());
+
+  return q;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+TriaIterator<TriaAccessor<structdim,dim,spacedim> >
+TriaAccessor<structdim,dim,spacedim>::
+isotropic_child (const unsigned int i) const
+{
+  // checking of 'i' happens in child() or
+  // child_index() called below
+  switch (structdim)
+    {
+    case 1:
+      // no anisotropic refinement in 1D
+      return child(i);
+
+    case 2:
+    {
+      const RefinementCase<2>
+      this_refinement_case (static_cast<unsigned char>(refinement_case()));
+
+      Assert (this_refinement_case != RefinementCase<2>::no_refinement,
+              TriaAccessorExceptions::ExcCellHasNoChildren());
+
+      if (this_refinement_case == RefinementCase<2>::cut_xy)
+        return child(i);
+      else if ((this_refinement_case == RefinementCase<2>::cut_x)
+               &&
+               (child(i%2)->refinement_case()==RefinementCase<2>::cut_y))
+        return child(i%2)->child(i/2);
+      else if ((this_refinement_case == RefinementCase<2>::cut_y)
+               &&
+               (child(i/2)->refinement_case()==RefinementCase<2>::cut_x))
+        return child(i/2)->child(i%2);
+      else
+        Assert(false,
+               ExcMessage("This cell has no grandchildren equivalent to isotropic refinement"));
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  // we don't get here but have to return
+  // something...
+  return child(0);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessor<structdim,dim,spacedim>::has_children () const
+{
+  Assert (this->state() == IteratorState::valid,
+          TriaAccessorExceptions::ExcDereferenceInvalidObject());
+
+  // each set of two children are stored
+  // consecutively, so we only have to find
+  // the location of the set of children
+  const unsigned int n_sets_of_two = GeometryInfo<structdim>::max_children_per_cell/2;
+  return (this->objects().children[n_sets_of_two * this->present_index] != -1);
+}
+
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+unsigned int
+TriaAccessor<structdim,dim,spacedim>::n_children () const
+{
+  return GeometryInfo<structdim>::n_children(refinement_case());
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim, dim, spacedim>::
+set_refinement_case (const RefinementCase<structdim> &refinement_case) const
+{
+  Assert (this->state() == IteratorState::valid,
+          TriaAccessorExceptions::ExcDereferenceInvalidObject());
+  Assert (static_cast<unsigned int> (this->present_index) <
+          this->objects().refinement_cases.size(),
+          ExcIndexRange(this->present_index, 0,
+                        this->objects().refinement_cases.size()));
+
+  this->objects().refinement_cases[this->present_index] = refinement_case;
+}
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim, dim, spacedim>::clear_refinement_case () const
+{
+  Assert (this->state() == IteratorState::valid,
+          TriaAccessorExceptions::ExcDereferenceInvalidObject());
+  Assert (static_cast<unsigned int> (this->present_index) <
+          this->objects().refinement_cases.size(),
+          ExcIndexRange(this->present_index, 0,
+                        this->objects().refinement_cases.size()));
+
+  this->objects().refinement_cases[this->present_index]
+    = RefinementCase<structdim>::no_refinement;
+}
+
+
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::set_children (const unsigned int i,
+                                                      const int index) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (i%2==0, TriaAccessorExceptions::ExcSetOnlyEvenChildren(i));
+
+  // each set of two children are stored
+  // consecutively, so we only have to find
+  // the location of the set of children
+  const unsigned int n_sets_of_two = GeometryInfo<structdim>::max_children_per_cell/2;
+
+  Assert ((index==-1) ||
+          (i==0 && !this->has_children() && (index>=0)) ||
+          (i>0  &&  this->has_children() && (index>=0) &&
+           this->objects().children[n_sets_of_two*this->present_index+i/2] == -1),
+          TriaAccessorExceptions::ExcCantSetChildren(index));
+
+  this->objects().children[n_sets_of_two*this->present_index+i/2] = index;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::clear_children () const
+{
+  // each set of two children are stored
+  // consecutively, so we only have to find
+  // the location of the set of children
+  const unsigned int n_sets_of_two = GeometryInfo<structdim>::max_children_per_cell/2;
+
+  for (unsigned int i=0; i<n_sets_of_two; ++i)
+    set_children (2*i,-1);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+bool
+TriaAccessor<structdim,dim,spacedim>::user_flag_set () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  return this->objects().user_flags[this->present_index];
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim,dim,spacedim>::set_user_flag () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->objects().user_flags[this->present_index] = true;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+void
+TriaAccessor<structdim,dim,spacedim>::clear_user_flag () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->objects().user_flags[this->present_index] = false;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim,dim,spacedim>::recursively_set_user_flag () const
+{
+  set_user_flag ();
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_set_user_flag ();
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim,dim,spacedim>::recursively_clear_user_flag () const
+{
+  clear_user_flag ();
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_clear_user_flag ();
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim,dim,spacedim>::clear_user_data () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->objects().clear_user_data(this->present_index);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim,dim,spacedim>::set_user_pointer (void *p) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->objects().user_pointer(this->present_index) = p;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim,dim,spacedim>::clear_user_pointer () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->objects().user_pointer(this->present_index) = 0;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void *TriaAccessor<structdim,dim,spacedim>::user_pointer () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  return this->objects().user_pointer(this->present_index);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim,dim,spacedim>::recursively_set_user_pointer (void *p) const
+{
+  set_user_pointer (p);
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_set_user_pointer (p);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim,dim,spacedim>::recursively_clear_user_pointer () const
+{
+  clear_user_pointer ();
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_clear_user_pointer ();
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim,dim,spacedim>::set_user_index (unsigned int p) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->objects().user_index(this->present_index) = p;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void TriaAccessor<structdim,dim,spacedim>::clear_user_index () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->objects().user_index(this->present_index) = 0;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+unsigned int TriaAccessor<structdim,dim,spacedim>::user_index () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  return this->objects().user_index(this->present_index);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim,dim,spacedim>::recursively_set_user_index (unsigned int p) const
+{
+  set_user_index (p);
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_set_user_index (p);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim,dim,spacedim>::recursively_clear_user_index () const
+{
+  clear_user_index ();
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_clear_user_index ();
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+inline
+unsigned int
+TriaAccessor<structdim,dim,spacedim>::max_refinement_depth () const
+{
+  if (!this->has_children())
+    return 0;
+
+  unsigned int max_depth = 1;
+  for (unsigned int c=0; c<n_children(); ++c)
+    max_depth = std::max (max_depth,
+                          child(c)->max_refinement_depth() + 1);
+  return max_depth;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+unsigned int
+TriaAccessor<structdim, dim, spacedim>::number_of_children () const
+{
+  if (!this->has_children())
+    return 1;
+  else
+    {
+      unsigned int sum = 0;
+      for (unsigned int c=0; c<n_children(); ++c)
+        sum += this->child(c)->number_of_children();
+      return sum;
+    }
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+types::boundary_id
+TriaAccessor<structdim, dim, spacedim>::boundary_id () const
+{
+  Assert (structdim<dim, ExcImpossibleInDim(dim));
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  return this->objects().boundary_or_material_id[this->present_index].boundary_id;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+types::boundary_id
+TriaAccessor<structdim, dim, spacedim>::boundary_indicator () const
+{
+  return boundary_id();
+}
+
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::
+set_boundary_id (const types::boundary_id boundary_ind) const
+{
+  Assert (structdim<dim, ExcImpossibleInDim(dim));
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  this->objects().boundary_or_material_id[this->present_index].boundary_id = boundary_ind;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::
+set_boundary_indicator (const types::boundary_id boundary_ind) const
+{
+  set_boundary_id (boundary_ind);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::
+set_all_boundary_ids (const types::boundary_id boundary_ind) const
+{
+  set_boundary_id (boundary_ind);
+
+  switch (structdim)
+    {
+    case 1:
+      // 1d objects have no sub-objects
+      // where we have to do anything
+      break;
+
+    case 2:
+      // for boundary quads also set
+      // boundary_id of bounding lines
+      for (unsigned int i=0; i<4; ++i)
+        this->line(i)->set_boundary_id (boundary_ind);
+      break;
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::
+set_all_boundary_indicators (const types::boundary_id boundary_ind) const
+{
+  set_all_boundary_ids (boundary_ind);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+bool
+TriaAccessor<structdim, dim, spacedim>::at_boundary () const
+{
+  // error checking is done
+  // in boundary_id()
+  return (boundary_id() != numbers::internal_face_boundary_id);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+const Boundary<dim,spacedim> &
+TriaAccessor<structdim, dim, spacedim>::get_boundary () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  // Get the default (manifold_id)
+  const types::manifold_id mi = this->objects().manifold_id[this->present_index];
+
+  // In case this is not valid, check
+  // the boundary id, after having
+  // casted it to a manifold id
+  if (mi == numbers::invalid_manifold_id)
+    return this->tria->get_boundary(structdim < dim ?
+                                    this->objects().boundary_or_material_id[this->present_index].boundary_id:
+                                    dim < spacedim ?
+                                    this->objects().boundary_or_material_id[this->present_index].material_id:
+                                    numbers::invalid_manifold_id);
+  else
+    return this->tria->get_boundary(mi);
+}
+
+
+template <int structdim, int dim, int spacedim>
+const Manifold<dim,spacedim> &
+TriaAccessor<structdim, dim, spacedim>::get_manifold () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  // Get the default (manifold_id)
+  const types::manifold_id mi = this->objects().manifold_id[this->present_index];
+
+  // In case this is not valid, check
+  // the boundary id, after having
+  // casted it to a manifold id
+  if (mi == numbers::invalid_manifold_id)
+    return this->tria->get_manifold(structdim < dim ?
+                                    this->objects().boundary_or_material_id[this->present_index].boundary_id:
+                                    dim < spacedim ?
+                                    this->objects().boundary_or_material_id[this->present_index].material_id:
+                                    numbers::invalid_manifold_id);
+  else
+    return this->tria->get_manifold(mi);
+}
+
+
+template <int structdim, int dim, int spacedim>
+types::manifold_id
+TriaAccessor<structdim, dim, spacedim>::manifold_id () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  return this->objects().manifold_id[this->present_index];
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::
+set_manifold_id (const types::manifold_id manifold_ind) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+
+  this->objects().manifold_id[this->present_index] = manifold_ind;
+}
+
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::
+set_all_manifold_ids (const types::manifold_id manifold_ind) const
+{
+  set_manifold_id (manifold_ind);
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->set_all_manifold_ids (manifold_ind);
+
+  switch (structdim)
+    {
+    case 1:
+      if (dim == 1)
+        {
+          (*this->tria->vertex_to_manifold_id_map_1d)
+          [vertex_index(0)] = manifold_ind;
+          (*this->tria->vertex_to_manifold_id_map_1d)
+          [vertex_index(1)] = manifold_ind;
+        }
+      break;
+
+    case 2:
+      // for quads also set manifold_id of bounding lines
+      for (unsigned int i=0; i<4; ++i)
+        this->line(i)->set_manifold_id (manifold_ind);
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+double
+TriaAccessor<structdim, dim, spacedim>::diameter () const
+{
+  switch (structdim)
+    {
+    case 1:
+      return (this->vertex(1)-this->vertex(0)).norm();
+    case 2:
+      return std::max((this->vertex(3)-this->vertex(0)).norm(),
+                      (this->vertex(2)-this->vertex(1)).norm());
+    case 3:
+      return std::max( std::max((this->vertex(7)-this->vertex(0)).norm(),
+                                (this->vertex(6)-this->vertex(1)).norm()),
+                       std::max((this->vertex(2)-this->vertex(5)).norm(),
+                                (this->vertex(3)-this->vertex(4)).norm()) );
+    default:
+      Assert (false, ExcNotImplemented());
+      return -1e10;
+    }
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+double
+TriaAccessor<structdim, dim, spacedim>::minimum_vertex_distance () const
+{
+  switch (structdim)
+    {
+    case 1:
+      return (this->vertex(1)-this->vertex(0)).norm();
+    case 2:
+    case 3:
+    {
+      double min = std::numeric_limits<double>::max();
+      for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+        for (unsigned int j=i+1; j<GeometryInfo<structdim>::vertices_per_cell; ++j)
+          min = std::min(min, (this->vertex(i)-this->vertex(j)) * (this->vertex(i)-this->vertex(j)));
+      return std::sqrt(min);
+    }
+    default:
+      Assert (false, ExcNotImplemented());
+      return -1e10;
+    }
+}
+
+
+template <int structdim, int dim, int spacedim>
+bool
+TriaAccessor<structdim, dim, spacedim>::
+is_translation_of (const TriaIterator<TriaAccessor<structdim,dim,spacedim> > &o) const
+{
+  // go through the vertices and check... The
+  // cell is a translation of the previous
+  // one in case the distance between the
+  // individual vertices in the two cell is
+  // the same for all the vertices. So do the
+  // check by first getting the distance on
+  // the first vertex, and then checking
+  // whether all others have the same down to
+  // rounding errors (we have to be careful
+  // here because the calculation of the
+  // displacement between one cell and the
+  // next can already result in the loss of
+  // one or two digits), so we choose 1e-12
+  // times the distance between the zeroth
+  // vertices here.
+  bool is_translation = true;
+  const Tensor<1,spacedim> dist = o->vertex(0) - this->vertex(0);
+  const double tol_square = 1e-24 * dist.norm_square();
+  for (unsigned int i=1; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+    {
+      const Tensor<1,spacedim> dist_new = (o->vertex(i) - this->vertex(i)) - dist;
+      if (dist_new.norm_square() > tol_square)
+        {
+          is_translation = false;
+          break;
+        }
+    }
+  return is_translation;
+}
+
+
+/*------------------------ Functions: TriaAccessor<0,dim,spacedim> -----------------------*/
+
+template <int dim, int spacedim>
+inline
+TriaAccessor<0, dim, spacedim>::
+TriaAccessor (const Triangulation<dim, spacedim> *tria,
+              const unsigned int                  vertex_index)
+  :
+  tria (tria),
+  global_vertex_index (vertex_index)
+{}
+
+
+
+template <int dim, int spacedim>
+inline
+TriaAccessor<0, dim, spacedim>::
+TriaAccessor (const Triangulation<dim,spacedim> *tria,
+              const int /*level*/,
+              const int index,
+              const AccessorData *)
+  :
+  tria (tria),
+  global_vertex_index (index)
+{}
+
+
+
+template <int dim, int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+inline
+TriaAccessor<0, dim, spacedim>::
+TriaAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &)
+  :
+  tria (NULL),
+  global_vertex_index (numbers::invalid_unsigned_int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+}
+
+
+
+template <int dim, int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+inline
+TriaAccessor<0, dim, spacedim>::
+TriaAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &)
+  :
+  tria (NULL),
+  global_vertex_index (numbers::invalid_unsigned_int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+}
+
+
+
+template <int dim, int spacedim>
+inline
+void
+TriaAccessor<0, dim, spacedim>::copy_from (const TriaAccessor &t)
+{
+  tria = t.tria;
+  global_vertex_index = t.global_vertex_index;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+IteratorState::IteratorStates
+TriaAccessor<0, dim, spacedim>::state () const
+{
+  if (global_vertex_index != numbers::invalid_unsigned_int)
+    return IteratorState::valid;
+  else
+    return IteratorState::past_the_end;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+int
+TriaAccessor<0, dim, spacedim>::level ()
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+int
+TriaAccessor<0, dim, spacedim>::index () const
+{
+  return global_vertex_index;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+void
+TriaAccessor<0, dim, spacedim>::operator ++ ()
+{
+  ++global_vertex_index;
+  if (global_vertex_index >= tria->n_vertices())
+    global_vertex_index = numbers::invalid_unsigned_int;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+void
+TriaAccessor<0, dim, spacedim>::operator -- ()
+{
+  if (global_vertex_index != numbers::invalid_unsigned_int)
+    {
+      if (global_vertex_index != 0)
+        --global_vertex_index;
+      else
+        global_vertex_index = numbers::invalid_unsigned_int;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+TriaAccessor<0, dim, spacedim>::operator == (const TriaAccessor &t) const
+{
+  const bool result = ((tria == t.tria)
+                       &&
+                       (global_vertex_index == t.global_vertex_index));
+
+  return result;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+TriaAccessor<0, dim, spacedim>::operator != (const TriaAccessor &t) const
+{
+  return !(*this==t);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+TriaAccessor<0, dim, spacedim>::vertex_index (const unsigned int) const
+{
+  return global_vertex_index;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+Point<spacedim> &
+TriaAccessor<0, dim, spacedim>::vertex (const unsigned int) const
+{
+  return const_cast<Point<spacedim> &> (this->tria->vertices[global_vertex_index]);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+typename dealii::internal::Triangulation::Iterators<dim,spacedim>::line_iterator
+TriaAccessor<0, dim, spacedim>::line (const unsigned int)
+{
+  return typename dealii::internal::Triangulation::Iterators<dim,spacedim>::line_iterator();
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+TriaAccessor<0, dim, spacedim>::line_index (const unsigned int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+  return numbers::invalid_unsigned_int;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+typename dealii::internal::Triangulation::Iterators<dim,spacedim>::quad_iterator
+TriaAccessor<0, dim, spacedim>::quad (const unsigned int)
+{
+  return typename dealii::internal::Triangulation::Iterators<dim,spacedim>::quad_iterator();
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+TriaAccessor<0, dim, spacedim>::quad_index (const unsigned int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+  return numbers::invalid_unsigned_int;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+double
+TriaAccessor<0, dim, spacedim>::diameter () const
+{
+  return 0.;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+double
+TriaAccessor<0, dim, spacedim>::extent_in_direction (const unsigned int) const
+{
+  return 0.;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+Point<spacedim>
+TriaAccessor<0, dim, spacedim>::center (const bool,
+                                        const bool) const
+{
+  return this->tria->vertices[global_vertex_index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+double
+TriaAccessor<0, dim, spacedim>::measure () const
+{
+  return 0.;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool TriaAccessor<0, dim, spacedim>::face_orientation (const unsigned int /*face*/)
+{
+  return false;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool TriaAccessor<0, dim, spacedim>::face_flip (const unsigned int /*face*/)
+{
+  return false;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool TriaAccessor<0, dim, spacedim>::face_rotation (const unsigned int /*face*/)
+{
+  return false;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool TriaAccessor<0, dim, spacedim>::line_orientation (const unsigned int /*line*/)
+{
+  return false;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool TriaAccessor<0, dim, spacedim>::has_children ()
+{
+  return false;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int TriaAccessor<0, dim, spacedim>::n_children()
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int TriaAccessor<0, dim, spacedim>::number_of_children ()
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int TriaAccessor<0, dim, spacedim>::max_refinement_depth ()
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+TriaIterator<TriaAccessor<0,dim,spacedim> >
+TriaAccessor<0, dim, spacedim>::child (const unsigned int)
+{
+  return TriaIterator<TriaAccessor<0,dim,spacedim> >();
+}
+
+
+
+template <int dim, int spacedim>
+inline
+TriaIterator<TriaAccessor<0,dim,spacedim> >
+TriaAccessor<0, dim, spacedim>::isotropic_child (const unsigned int)
+{
+  return TriaIterator<TriaAccessor<0,dim,spacedim> >();
+}
+
+
+
+template <int dim, int spacedim>
+inline
+RefinementCase<0> TriaAccessor<0, dim, spacedim>::refinement_case ()
+{
+  return RefinementCase<0>(RefinementPossibilities<0>::no_refinement);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+int TriaAccessor<0, dim, spacedim>::child_index (const unsigned int)
+{
+  return -1;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+int TriaAccessor<0, dim, spacedim>::isotropic_child_index (const unsigned int)
+{
+  return -1;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool TriaAccessor<0, dim, spacedim>::used () const
+{
+  return tria->vertex_used(global_vertex_index);
+}
+
+
+
+/*------------------------ Functions: TriaAccessor<0,1,spacedim> -----------------------*/
+
+template <int spacedim>
+inline
+TriaAccessor<0, 1, spacedim>::
+TriaAccessor (const Triangulation<1,spacedim> *tria,
+              const VertexKind      vertex_kind,
+              const unsigned int    vertex_index)
+  :
+  tria (tria),
+  vertex_kind (vertex_kind),
+  global_vertex_index (vertex_index)
+{}
+
+
+
+template <int spacedim>
+inline
+TriaAccessor<0, 1, spacedim>::
+TriaAccessor (const Triangulation<1,spacedim> *tria,
+              const int level,
+              const int index,
+              const AccessorData *)
+  :
+  tria (tria),
+  vertex_kind (interior_vertex),
+  global_vertex_index (numbers::invalid_unsigned_int)
+{
+  // in general, calling this constructor should yield an error -- users should
+  // instead call the one immediately above. however, if you create something
+  // like Triangulation<1>::face_iterator() then this calls the default constructor
+  // of the iterator which calls the accessor with argument list (0,-2,-2,0), so
+  // in this particular case accept this call and create an object that corresponds
+  // to the default constructed (invalid) vertex accessor
+  (void)level;
+  (void)index;
+  Assert ((level == -2) && (index == -2), ExcInternalError());
+}
+
+
+
+template <int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+inline
+TriaAccessor<0, 1, spacedim>::
+TriaAccessor (const TriaAccessor<structdim2,dim2,spacedim2> &)
+  :
+  tria (0),
+  vertex_kind (interior_vertex),
+  global_vertex_index (numbers::invalid_unsigned_int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+}
+
+
+
+template <int spacedim>
+template <int structdim2, int dim2, int spacedim2>
+inline
+TriaAccessor<0, 1, spacedim>::
+TriaAccessor (const InvalidAccessor<structdim2,dim2,spacedim2> &)
+  :
+  tria (0),
+  vertex_kind (interior_vertex),
+  global_vertex_index (numbers::invalid_unsigned_int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+}
+
+
+
+template <int spacedim>
+inline
+void
+TriaAccessor<0, 1, spacedim>::copy_from (const TriaAccessor &t)
+{
+  tria = t.tria;
+  vertex_kind = t.vertex_kind;
+  global_vertex_index = t.global_vertex_index;
+}
+
+
+
+template <int spacedim>
+inline
+IteratorState::IteratorStates
+TriaAccessor<0, 1, spacedim>::state ()
+{
+  return IteratorState::valid;
+}
+
+
+template <int spacedim>
+inline
+int
+TriaAccessor<0, 1, spacedim>::level ()
+{
+  return 0;
+}
+
+
+
+template <int spacedim>
+inline
+int
+TriaAccessor<0, 1, spacedim>::index () const
+{
+  return global_vertex_index;
+}
+
+
+
+template <int spacedim>
+inline
+void
+TriaAccessor<0, 1, spacedim>::operator ++ () const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <int spacedim>
+inline
+void
+TriaAccessor<0, 1, spacedim>::operator -- () const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int spacedim>
+inline
+bool
+TriaAccessor<0, 1, spacedim>::operator == (const TriaAccessor &t) const
+{
+  const bool result = ((tria == t.tria)
+                       &&
+                       (global_vertex_index == t.global_vertex_index));
+  // if we point to the same vertex,
+  // make sure we know the same about
+  // it
+  if (result == true)
+    Assert (vertex_kind == t.vertex_kind, ExcInternalError());
+
+  return result;
+}
+
+
+
+template <int spacedim>
+inline
+bool
+TriaAccessor<0, 1, spacedim>::operator != (const TriaAccessor &t) const
+{
+  return !(*this==t);
+}
+
+
+
+template <int spacedim>
+inline
+unsigned int
+TriaAccessor<0, 1, spacedim>::vertex_index (const unsigned int i) const
+{
+  Assert(i==0, ExcIndexRange(i, 0, 1));
+  (void)i;
+  return global_vertex_index;
+}
+
+
+
+template <int spacedim>
+inline
+Point<spacedim> &
+TriaAccessor<0, 1, spacedim>::vertex (const unsigned int i) const
+{
+  Assert(i==0, ExcIndexRange(i, 0, 1));
+  (void)i;
+  return const_cast<Point<spacedim> &> (this->tria->vertices[global_vertex_index]);
+}
+
+
+
+template <int spacedim>
+inline
+Point<spacedim>
+TriaAccessor<0, 1, spacedim>::center () const
+{
+  return this->tria->vertices[global_vertex_index];
+}
+
+
+
+template <int spacedim>
+inline
+typename dealii::internal::Triangulation::Iterators<1,spacedim>::line_iterator
+TriaAccessor<0, 1, spacedim>::line (const unsigned int)
+{
+  return typename dealii::internal::Triangulation::Iterators<1,spacedim>::line_iterator();
+}
+
+
+template <int spacedim>
+inline
+unsigned int
+TriaAccessor<0, 1, spacedim>::line_index (const unsigned int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+  return numbers::invalid_unsigned_int;
+}
+
+
+template <int spacedim>
+inline
+typename dealii::internal::Triangulation::Iterators<1,spacedim>::quad_iterator
+TriaAccessor<0, 1, spacedim>::quad (const unsigned int)
+{
+  return typename dealii::internal::Triangulation::Iterators<1,spacedim>::quad_iterator();
+}
+
+
+
+template <int spacedim>
+inline
+unsigned int
+TriaAccessor<0, 1, spacedim>::quad_index (const unsigned int)
+{
+  Assert(false, ExcImpossibleInDim(0));
+  return numbers::invalid_unsigned_int;
+}
+
+
+template <int spacedim>
+inline
+bool
+TriaAccessor<0, 1, spacedim>::at_boundary () const
+{
+  return vertex_kind != interior_vertex;
+}
+
+
+template <int spacedim>
+inline
+types::boundary_id
+TriaAccessor<0, 1, spacedim>::boundary_id () const
+{
+  switch (vertex_kind)
+    {
+    case left_vertex:
+    case right_vertex:
+    {
+      Assert (tria->vertex_to_boundary_id_map_1d->find (this->vertex_index())
+              != tria->vertex_to_boundary_id_map_1d->end(),
+              ExcInternalError());
+
+      return (*tria->vertex_to_boundary_id_map_1d)[this->vertex_index()];
+    }
+
+    default:
+      return numbers::internal_face_boundary_id;
+    }
+}
+
+
+
+template <int spacedim>
+inline
+types::boundary_id
+TriaAccessor<0, 1, spacedim>::boundary_indicator () const
+{
+  return boundary_id();
+}
+
+
+
+template <int spacedim>
+inline
+types::manifold_id
+TriaAccessor<0, 1, spacedim>::manifold_id () const
+{
+  if ( tria->vertex_to_manifold_id_map_1d->find (this->vertex_index())
+       != tria->vertex_to_manifold_id_map_1d->end())
+    return (*tria->vertex_to_manifold_id_map_1d)[this->vertex_index()];
+  else
+    return numbers::invalid_manifold_id;
+}
+
+
+template <int spacedim>
+inline
+bool TriaAccessor<0, 1, spacedim>::face_orientation (const unsigned int /*face*/)
+{
+  return false;
+}
+
+
+
+template <int spacedim>
+inline
+bool TriaAccessor<0, 1, spacedim>::face_flip (const unsigned int /*face*/)
+{
+  return false;
+}
+
+
+
+template <int spacedim>
+inline
+bool TriaAccessor<0, 1, spacedim>::face_rotation (const unsigned int /*face*/)
+{
+  return false;
+}
+
+
+
+template <int spacedim>
+inline
+bool TriaAccessor<0, 1, spacedim>::line_orientation (const unsigned int /*line*/)
+{
+  return false;
+}
+
+
+
+template <int spacedim>
+inline
+bool TriaAccessor<0, 1, spacedim>::has_children ()
+{
+  return false;
+}
+
+
+
+template <int spacedim>
+inline
+unsigned int TriaAccessor<0, 1, spacedim>::n_children()
+{
+  return 0;
+}
+
+
+
+template <int spacedim>
+inline
+unsigned int TriaAccessor<0, 1, spacedim>::number_of_children ()
+{
+  return 0;
+}
+
+
+
+template <int spacedim>
+inline
+unsigned int TriaAccessor<0, 1, spacedim>::max_refinement_depth ()
+{
+  return 0;
+}
+
+
+template <int spacedim>
+inline
+TriaIterator<TriaAccessor<0,1,spacedim> >
+TriaAccessor<0, 1, spacedim>::child (const unsigned int)
+{
+  return TriaIterator<TriaAccessor<0,1,spacedim> >();
+}
+
+
+template <int spacedim>
+inline
+TriaIterator<TriaAccessor<0,1,spacedim> >
+TriaAccessor<0, 1, spacedim>::isotropic_child (const unsigned int)
+{
+  return TriaIterator<TriaAccessor<0,1,spacedim> >();
+}
+
+
+template <int spacedim>
+inline
+RefinementCase<0> TriaAccessor<0, 1, spacedim>::refinement_case ()
+{
+  return RefinementCase<0>(RefinementPossibilities<0>::no_refinement);
+}
+
+template <int spacedim>
+inline
+int TriaAccessor<0, 1, spacedim>::child_index (const unsigned int)
+{
+  return -1;
+}
+
+
+template <int spacedim>
+inline
+int TriaAccessor<0, 1, spacedim>::isotropic_child_index (const unsigned int)
+{
+  return -1;
+}
+
+
+
+template <int spacedim>
+inline
+void
+TriaAccessor<0, 1, spacedim>::set_boundary_id (const types::boundary_id b)
+{
+  Assert (tria->vertex_to_boundary_id_map_1d->find (this->vertex_index())
+          != tria->vertex_to_boundary_id_map_1d->end(),
+          ExcInternalError());
+
+  (*tria->vertex_to_boundary_id_map_1d)[this->vertex_index()] = b;
+}
+
+
+
+template <int spacedim>
+inline
+void
+TriaAccessor<0, 1, spacedim>::set_boundary_indicator (const types::boundary_id b)
+{
+  set_boundary_id (b);
+}
+
+
+
+
+template <int spacedim>
+inline
+void
+TriaAccessor<0, 1, spacedim>::set_manifold_id (const types::manifold_id b)
+{
+  (*tria->vertex_to_manifold_id_map_1d)[this->vertex_index()] = b;
+}
+
+
+
+template <int spacedim>
+inline
+void TriaAccessor<0, 1, spacedim>::set_all_boundary_ids (const types::boundary_id b)
+{
+  set_boundary_id (b);
+}
+
+
+
+template <int spacedim>
+inline
+void TriaAccessor<0, 1, spacedim>::set_all_boundary_indicators (const types::boundary_id b)
+{
+  set_all_boundary_ids (b);
+}
+
+
+
+template <int spacedim>
+inline
+void TriaAccessor<0, 1, spacedim>::set_all_manifold_ids (const types::manifold_id b)
+{
+  set_manifold_id (b);
+}
+
+
+
+template <int spacedim>
+inline
+bool TriaAccessor<0, 1, spacedim>::used () const
+{
+  return tria->vertex_used(global_vertex_index);
+}
+
+/*------------------------ Functions: CellAccessor<dim,spacedim> -----------------------*/
+
+
+template <int dim, int spacedim>
+inline
+CellAccessor<dim,spacedim>::
+CellAccessor (const Triangulation<dim,spacedim> *parent,
+              const int                 level,
+              const int                 index,
+              const AccessorData       *local_data)
+  :
+  TriaAccessor<dim, dim, spacedim> (parent, level, index, local_data)
+{}
+
+
+
+template <int dim, int spacedim>
+inline
+CellAccessor<dim,spacedim>::CellAccessor (const TriaAccessor<dim,dim,spacedim> &cell_accessor)
+  :
+  TriaAccessor<dim, dim, spacedim> (static_cast<const TriaAccessor<dim, dim, spacedim>&>(cell_accessor))
+{}
+
+
+
+namespace internal
+{
+  namespace CellAccessor
+  {
+    template <int spacedim>
+    inline
+    dealii::TriaIterator<dealii::TriaAccessor<0, 1, spacedim> >
+    get_face (const dealii::CellAccessor<1,spacedim> &cell,
+              const unsigned int i)
+    {
+      dealii::TriaAccessor<0, 1, spacedim>
+      a (&cell.get_triangulation(),
+         ((i == 0) && cell.at_boundary(0)
+          ?
+          dealii::TriaAccessor<0, 1, spacedim>::left_vertex
+          :
+          ((i == 1) && cell.at_boundary(1)
+           ?
+           dealii::TriaAccessor<0, 1, spacedim>::right_vertex
+           :
+           dealii::TriaAccessor<0, 1, spacedim>::interior_vertex)),
+         cell.vertex_index(i));
+      return dealii::TriaIterator<dealii::TriaAccessor<0, 1, spacedim> >(a);
+    }
+
+
+    template <int spacedim>
+    inline
+    dealii::TriaIterator<dealii::TriaAccessor<1, 2, spacedim> >
+    get_face (const dealii::CellAccessor<2,spacedim> &cell,
+              const unsigned int i)
+    {
+      return cell.line(i);
+    }
+
+
+    template <int spacedim>
+    inline
+    dealii::TriaIterator<dealii::TriaAccessor<2, 3, spacedim> >
+    get_face (const dealii::CellAccessor<3,spacedim> &cell,
+              const unsigned int i)
+    {
+      return cell.quad(i);
+    }
+  }
+}
+
+
+
+template <int dim, int spacedim>
+inline
+TriaIterator<TriaAccessor<dim-1, dim, spacedim> >
+CellAccessor<dim,spacedim>::face (const unsigned int i) const
+{
+  return dealii::internal::CellAccessor::get_face (*this, i);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+CellAccessor<dim,spacedim>::face_index (const unsigned int i) const
+{
+  switch (dim)
+    {
+    case 1:
+    {
+      return this->vertex_index(i);
+    }
+
+    case 2:
+      return this->line_index(i);
+
+    case 3:
+      return this->quad_index(i);
+
+    default:
+      return numbers::invalid_unsigned_int;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+inline
+int
+CellAccessor<dim,spacedim>::neighbor_index (const unsigned int i) const
+{
+  AssertIndexRange (i,GeometryInfo<dim>::faces_per_cell);
+  return this->tria->levels[this->present_level]->
+         neighbors[this->present_index*GeometryInfo<dim>::faces_per_cell+i].second;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+int
+CellAccessor<dim,spacedim>::neighbor_level (const unsigned int i) const
+{
+  AssertIndexRange (i, GeometryInfo<dim>::faces_per_cell);
+  return this->tria->levels[this->present_level]->
+         neighbors[this->present_index*GeometryInfo<dim>::faces_per_cell+i].first;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+RefinementCase<dim>
+CellAccessor<dim,spacedim>::refine_flag_set () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  // cells flagged for refinement must be active
+  // (the @p set_refine_flag function checks this,
+  // but activity may change when refinement is
+  // executed and for some reason the refine
+  // flag is not cleared).
+  Assert (this->active() ||  !this->tria->levels[this->present_level]->refine_flags[this->present_index],
+          ExcRefineCellNotActive());
+  return RefinementCase<dim>(this->tria->levels[this->present_level]->refine_flags[this->present_index]);
+}
+
+
+
+template <int dim, int spacedim>
+inline
+void
+CellAccessor<dim,spacedim>::set_refine_flag (const RefinementCase<dim> refinement_case) const
+{
+  Assert (this->used() && this->active(), ExcRefineCellNotActive());
+  Assert (!coarsen_flag_set(),
+          ExcCellFlaggedForCoarsening());
+
+  this->tria->levels[this->present_level]->refine_flags[this->present_index] = refinement_case;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+void
+CellAccessor<dim,spacedim>::clear_refine_flag () const
+{
+  Assert (this->used() && this->active(), ExcRefineCellNotActive());
+  this->tria->levels[this->present_level]->refine_flags[this->present_index] =
+    RefinementCase<dim>::no_refinement;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::flag_for_face_refinement (const unsigned int face_no,
+                                                      const RefinementCase<dim-1> &face_refinement_case) const
+{
+  Assert (dim>1, ExcImpossibleInDim(dim));
+  Assert (face_no<GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange(face_no,0,GeometryInfo<dim>::faces_per_cell));
+  Assert (face_refinement_case < RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(face_refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+
+  // the new refinement case is a combination
+  // of the minimum required one for the given
+  // face refinement and the already existing
+  // flagged refinement case
+  RefinementCase<dim> old_ref_case = refine_flag_set();
+  RefinementCase<dim>
+  new_ref_case = (old_ref_case
+                  | GeometryInfo<dim>::min_cell_refinement_case_for_face_refinement(face_refinement_case,
+                      face_no,
+                      this->face_orientation(face_no),
+                      this->face_flip(face_no),
+                      this->face_rotation(face_no)));
+  set_refine_flag(new_ref_case);
+  // return, whether we had to change the
+  // refinement flag
+  return new_ref_case != old_ref_case;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::flag_for_line_refinement (const unsigned int line_no) const
+{
+  Assert (dim>1, ExcImpossibleInDim(dim));
+  Assert (line_no<GeometryInfo<dim>::lines_per_cell,
+          ExcIndexRange(line_no,0,GeometryInfo<dim>::lines_per_cell));
+
+  // the new refinement case is a combination
+  // of the minimum required one for the given
+  // line refinement and the already existing
+  // flagged refinement case
+  RefinementCase<dim> old_ref_case=refine_flag_set(),
+                      new_ref_case=old_ref_case
+                                   | GeometryInfo<dim>::min_cell_refinement_case_for_line_refinement(line_no);
+  set_refine_flag(new_ref_case);
+  // return, whether we had to change the
+  // refinement flag
+  return new_ref_case != old_ref_case;
+}
+
+
+
+template <>
+inline
+dealii::internal::SubfaceCase<1>
+CellAccessor<1>::subface_case(const unsigned int) const
+{
+  return dealii::internal::SubfaceCase<1>::case_none;
+}
+
+template <>
+inline
+dealii::internal::SubfaceCase<1>
+CellAccessor<1,2>::subface_case(const unsigned int) const
+{
+  return dealii::internal::SubfaceCase<1>::case_none;
+}
+
+
+template <>
+inline
+dealii::internal::SubfaceCase<1>
+CellAccessor<1,3>::subface_case(const unsigned int) const
+{
+  return dealii::internal::SubfaceCase<1>::case_none;
+}
+
+
+template <>
+inline
+dealii::internal::SubfaceCase<2>
+CellAccessor<2>::subface_case(const unsigned int face_no) const
+{
+  Assert(active(), TriaAccessorExceptions::ExcCellNotActive());
+  Assert(face_no<GeometryInfo<2>::faces_per_cell,
+         ExcIndexRange(face_no,0,GeometryInfo<2>::faces_per_cell));
+  return ((face(face_no)->has_children()) ?
+          dealii::internal::SubfaceCase<2>::case_x :
+          dealii::internal::SubfaceCase<2>::case_none);
+}
+
+template <>
+inline
+dealii::internal::SubfaceCase<2>
+CellAccessor<2,3>::subface_case(const unsigned int face_no) const
+{
+  Assert(active(), TriaAccessorExceptions::ExcCellNotActive());
+  Assert(face_no<GeometryInfo<2>::faces_per_cell,
+         ExcIndexRange(face_no,0,GeometryInfo<2>::faces_per_cell));
+  return ((face(face_no)->has_children()) ?
+          dealii::internal::SubfaceCase<2>::case_x :
+          dealii::internal::SubfaceCase<2>::case_none);
+}
+
+
+template <>
+inline
+dealii::internal::SubfaceCase<3>
+CellAccessor<3>::subface_case(const unsigned int face_no) const
+{
+  Assert(active(), TriaAccessorExceptions::ExcCellNotActive());
+  Assert(face_no<GeometryInfo<3>::faces_per_cell,
+         ExcIndexRange(face_no,0,GeometryInfo<3>::faces_per_cell));
+  switch (static_cast<unsigned char> (face(face_no)->refinement_case()))
+    {
+    case RefinementCase<3>::no_refinement:
+      return dealii::internal::SubfaceCase<3>::case_none;
+      break;
+    case RefinementCase<3>::cut_x:
+      if (face(face_no)->child(0)->has_children())
+        {
+          Assert(face(face_no)->child(0)->refinement_case()==RefinementCase<2>::cut_y,
+                 ExcInternalError());
+          if (face(face_no)->child(1)->has_children())
+            {
+              Assert(face(face_no)->child(1)->refinement_case()==RefinementCase<2>::cut_y,
+                     ExcInternalError());
+              return dealii::internal::SubfaceCase<3>::case_x1y2y;
+            }
+          else
+            return dealii::internal::SubfaceCase<3>::case_x1y;
+        }
+      else
+        {
+          if (face(face_no)->child(1)->has_children())
+            {
+              Assert(face(face_no)->child(1)->refinement_case()==RefinementCase<2>::cut_y,
+                     ExcInternalError());
+              return dealii::internal::SubfaceCase<3>::case_x2y;
+            }
+          else
+            return dealii::internal::SubfaceCase<3>::case_x;
+        }
+      break;
+    case RefinementCase<3>::cut_y:
+      if (face(face_no)->child(0)->has_children())
+        {
+          Assert(face(face_no)->child(0)->refinement_case()==RefinementCase<2>::cut_x,
+                 ExcInternalError());
+          if (face(face_no)->child(1)->has_children())
+            {
+              Assert(face(face_no)->child(1)->refinement_case()==RefinementCase<2>::cut_x,
+                     ExcInternalError());
+              return dealii::internal::SubfaceCase<3>::case_y1x2x;
+            }
+          else
+            return dealii::internal::SubfaceCase<3>::case_y1x;
+        }
+      else
+        {
+          if (face(face_no)->child(1)->has_children())
+            {
+              Assert(face(face_no)->child(1)->refinement_case()==RefinementCase<2>::cut_x,
+                     ExcInternalError());
+              return dealii::internal::SubfaceCase<3>::case_y2x;
+            }
+          else
+            return dealii::internal::SubfaceCase<3>::case_y;
+        }
+      break;
+    case RefinementCase<3>::cut_xy:
+      return dealii::internal::SubfaceCase<3>::case_xy;
+      break;
+    default:
+      Assert(false, ExcInternalError());
+    }
+  // we should never get here
+  return dealii::internal::SubfaceCase<3>::case_none;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::coarsen_flag_set () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  // cells flagged for coarsening must be active
+  // (the @p set_refine_flag function checks this,
+  // but activity may change when refinement is
+  // executed and for some reason the refine
+  // flag is not cleared).
+  Assert (this->active() ||  !this->tria->levels[this->present_level]->coarsen_flags[this->present_index],
+          ExcRefineCellNotActive());
+  return this->tria->levels[this->present_level]->coarsen_flags[this->present_index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+void
+CellAccessor<dim,spacedim>::set_coarsen_flag () const
+{
+  Assert (this->used() && this->active(), ExcRefineCellNotActive());
+  Assert (!refine_flag_set(), ExcCellFlaggedForRefinement());
+
+  this->tria->levels[this->present_level]->coarsen_flags[this->present_index] = true;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+void
+CellAccessor<dim,spacedim>::clear_coarsen_flag () const
+{
+  Assert (this->used() && this->active(), ExcRefineCellNotActive());
+  this->tria->levels[this->present_level]->coarsen_flags[this->present_index] = false;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+TriaIterator<CellAccessor<dim,spacedim> >
+CellAccessor<dim,spacedim>::neighbor (const unsigned int i) const
+{
+  TriaIterator<CellAccessor<dim,spacedim> >
+  q (this->tria, neighbor_level (i), neighbor_index (i));
+
+  Assert ((q.state() == IteratorState::past_the_end) || q->used(),
+          TriaAccessorExceptions::ExcUnusedCellAsNeighbor());
+
+  return q;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+TriaIterator<CellAccessor<dim,spacedim> >
+CellAccessor<dim,spacedim>::child (const unsigned int i) const
+{
+  TriaIterator<CellAccessor<dim,spacedim> >
+  q (this->tria, this->present_level+1, this->child_index (i));
+
+  Assert ((q.state() == IteratorState::past_the_end) || q->used(),
+          TriaAccessorExceptions::ExcUnusedCellAsChild());
+
+  return q;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::active () const
+{
+  return !this->has_children();
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::is_locally_owned () const
+{
+  Assert (this->active(),
+          ExcMessage("is_locally_owned() can only be called on active cells!"));
+#ifndef DEAL_II_WITH_MPI
+  return true;
+#else
+  if (is_artificial())
+    return false;
+
+  const parallel::Triangulation<dim,spacedim> *pt
+    = dynamic_cast<const parallel::Triangulation<dim,spacedim> *>(this->tria);
+
+  if (pt == 0)
+    return true;
+  else
+    return (this->subdomain_id() == pt->locally_owned_subdomain());
+
+#endif
+}
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::is_locally_owned_on_level () const
+{
+
+#ifndef DEAL_II_WITH_MPI
+  return true;
+#else
+
+  const parallel::Triangulation<dim,spacedim> *pt
+    = dynamic_cast<const parallel::Triangulation<dim,spacedim> *>(this->tria);
+
+  if (pt == 0)
+    return true;
+  else
+    return (this->level_subdomain_id() == pt->locally_owned_subdomain());
+
+#endif
+}
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::is_ghost () const
+{
+  Assert (this->active(),
+          ExcMessage("is_ghost() can only be called on active cells!"));
+  if (is_artificial() || this->has_children())
+    return false;
+
+#ifndef DEAL_II_WITH_MPI
+  return false;
+#else
+
+  const parallel::Triangulation<dim,spacedim> *pt
+    = dynamic_cast<const parallel::Triangulation<dim,spacedim> *>(this->tria);
+
+  if (pt == 0)
+    return false;
+  else
+    return (this->subdomain_id() != pt->locally_owned_subdomain());
+
+#endif
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+CellAccessor<dim,spacedim>::is_artificial () const
+{
+  Assert (this->active(),
+          ExcMessage("is_artificial() can only be called on active cells!"));
+#ifndef DEAL_II_WITH_MPI
+  return false;
+#else
+
+  const parallel::Triangulation<dim,spacedim> *pt
+    = dynamic_cast<const parallel::Triangulation<dim,spacedim> *>(this->tria);
+
+  if (pt == 0)
+    return false;
+  else
+    return this->subdomain_id() == numbers::artificial_subdomain_id;
+
+#endif
+}
+
+
+
+template <int dim, int spacedim>
+inline
+types::subdomain_id
+CellAccessor<dim, spacedim>::subdomain_id () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (this->active(),
+          ExcMessage("subdomain_id() can only be called on active cells!"));
+  return this->tria->levels[this->present_level]->subdomain_ids[this->present_index];
+}
+
+
+
+template <int dim, int spacedim>
+inline
+unsigned int
+CellAccessor<dim,spacedim>::neighbor_face_no (const unsigned int neighbor) const
+{
+  const unsigned int n2=neighbor_of_neighbor_internal(neighbor);
+  if (n2!=numbers::invalid_unsigned_int)
+    // return this value as the
+    // neighbor is not coarser
+    return n2;
+  else
+    // the neighbor is coarser
+    return neighbor_of_coarser_neighbor(neighbor).first;
+}
+
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_boundary.h b/include/deal.II/grid/tria_boundary.h
new file mode 100644
index 0000000..127b3fe
--- /dev/null
+++ b/include/deal.II/grid/tria_boundary.h
@@ -0,0 +1,489 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_boundary_h
+#define dealii__tria_boundary_h
+
+
+/*----------------------------   boundary-function.h     ---------------------------*/
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/point.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/manifold.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int space_dim> class Triangulation;
+
+
+
+/**
+ * This class is used to represent a boundary to a triangulation. When a
+ * triangulation creates a new vertex on the boundary of the domain, it
+ * determines the new vertex' coordinates through the following code (here in
+ * two dimensions):
+ *   @code
+ *     ...
+ *     Point<2> new_vertex = boundary.get_new_point_on_line (line);
+ *     ...
+ *   @endcode
+ * @p line denotes the line at the boundary that shall be refined and for
+ * which we seek the common point of the two child lines.
+ *
+ * In 3D, a new vertex may be placed on the middle of a line or on the middle
+ * of a side. Respectively, the library calls
+ *   @code
+ *     ...
+ *     Point<3> new_line_vertices[4]
+ *       = { boundary.get_new_point_on_line (face->line(0)),
+ *           boundary.get_new_point_on_line (face->line(1)),
+ *           boundary.get_new_point_on_line (face->line(2)),
+ *           boundary.get_new_point_on_line (face->line(3))  };
+ *     ...
+ *   @endcode
+ * to get the four midpoints of the lines bounding the quad at the boundary,
+ * and after that
+ *   @code
+ *     ...
+ *     Point<3> new_quad_vertex = boundary.get_new_point_on_quad (face);
+ *     ...
+ *   @endcode
+ * to get the midpoint of the face. It is guaranteed that this order (first
+ * lines, then faces) holds, so you can use information from the children of
+ * the four lines of a face, since these already exist at the time the
+ * midpoint of the face is to be computed.
+ *
+ * Since iterators are passed to the functions, you may use information about
+ * boundary indicators and the like, as well as all other information provided
+ * by these objects.
+ *
+ * There are specializations, StraightBoundary, which places the new point
+ * right into the middle of the given points, and HyperBallBoundary creating a
+ * hyperball with given radius around a given center point.
+ *
+ * @ingroup boundary
+ * @author Wolfgang Bangerth, 1999, 2001, 2009, Ralf Hartmann, 2001, 2008,
+ * Luca Heltai, 2014
+ */
+template <int dim, int spacedim=dim>
+class Boundary : public FlatManifold<dim, spacedim>
+{
+public:
+
+  /**
+   * Type keeping information about the normals at the vertices of a face of a
+   * cell. Thus, there are <tt>GeometryInfo<dim>::vertices_per_face</tt>
+   * normal vectors, that define the tangent spaces of the boundary at the
+   * vertices. Note that the vectors stored in this object are not required to
+   * be normalized, nor to actually point outward, as one often will only want
+   * to check for orthogonality to define the tangent plane; if a function
+   * requires the normals to be normalized, then it must do so itself.
+   *
+   * For obvious reasons, this type is not useful in 1d.
+   */
+  typedef Tensor<1,spacedim> FaceVertexNormals[GeometryInfo<dim>::vertices_per_face];
+
+  /**
+   * Destructor. Does nothing here, but needs to be declared to make it
+   * virtual.
+   */
+  virtual ~Boundary ();
+
+
+  /**
+   * Return intermediate points on a line spaced according to the interior
+   * support points of the 1D Gauss-Lobatto quadrature formula.
+   *
+   * The number of points requested is given by the size of the vector @p
+   * points. It is the task of derived classes to arrange the points in
+   * approximately equal distances along the length of the line segment on the
+   * boundary bounded by the vertices of the first argument.
+   *
+   * Among other places in the library, this function is called by the Mapping
+   * classes, for example the @p MappingQGeneric class. On the other hand, not
+   * all mapping classes actually require intermediate points on lines (for
+   * example, $Q_1$ mappings do not). Consequently this function is not made
+   * pure virtual, to allow users to define their own boundary classes without
+   * having to overload this function. However, the default implementation
+   * throws an error in any case and can, consequently, not be used if you use
+   * a mapping that does need the information provided by this function.
+   */
+  virtual
+  void
+  get_intermediate_points_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Return intermediate points on a line spaced according to the tensor
+   * product of the interior support points of the 1D Gauss-Lobatto quadrature
+   * formula.
+   *
+   * The number of points requested is given by the size of the vector @p
+   * points. It is required that this number is a square of another integer,
+   * i.e. <tt>n=points.size()=m*m</tt>. It is the task of the derived classes
+   * to arrange the points such they split the quad into <tt>(m+1)(m+1)</tt>
+   * approximately equal-sized subquads.
+   *
+   * Among other places in the library, this function is called by the Mapping
+   * classes, for example the @p MappingQGeneric class. On the other hand, not
+   * all mapping classes actually require intermediate points on quads (for
+   * example, $Q_1$ mappings do not). Consequently this function is not made
+   * pure virtual, to allow users to define their own boundary classes without
+   * having to overload this function. However, the default implementation
+   * throws an error in any case and can, consequently, not be used if you use
+   * a mapping that does need the information provided by this function.
+   */
+  virtual
+  void
+  get_intermediate_points_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Depending on <tt>dim=2</tt> or <tt>dim=3</tt> this function calls the
+   * get_intermediate_points_on_line or the get_intermediate_points_on_quad
+   * function. It throws an exception for <tt>dim=1</tt>. This wrapper allows
+   * dimension independent programming.
+   */
+  void
+  get_intermediate_points_on_face (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Return the normal vector to the surface at the point p. If p is not in
+   * fact on the surface, but only close-by, try to return something
+   * reasonable, for example the normal vector at the surface point closest to
+   * p.  (The point p will in fact not normally lie on the actual surface, but
+   * rather be a quadrature point mapped by some polynomial mapping; the
+   * mapped surface, however, will not usually coincide with the actual
+   * surface.)
+   *
+   * The face iterator gives an indication which face this function is
+   * supposed to compute the normal vector for.  This is useful if the
+   * boundary of the domain is composed of different nondifferential pieces
+   * (for example when using the StraightBoundary class to approximate a
+   * geometry that is completely described by the coarse mesh, with piecewise
+   * (bi-)linear components between the vertices, but where the boundary may
+   * have a kink at the vertices itself).
+   *
+   * @note Implementations of this function should be able to assume that the
+   * point p lies within or close to the face described by the first argument.
+   * In turn, callers of this function should ensure that this is in fact the
+   * case.
+   */
+  virtual
+  Tensor<1,spacedim>
+  normal_vector (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                 const Point<spacedim> &p) const;
+
+  /**
+   * Compute the normal vectors to the boundary at each vertex of the given
+   * face. It is not required that the normal vectors be normed somehow.
+   * Neither is it required that the normals actually point outward.
+   *
+   * This function is needed to compute data for C1 mappings. The default
+   * implementation is to throw an error, so you need not overload this
+   * function in case you do not intend to use C1 mappings.
+   *
+   * Note that when computing normal vectors at a vertex where the boundary is
+   * not differentiable, you have to make sure that you compute the one-sided
+   * limits, i.e. limit with respect to points inside the given face.
+   */
+  virtual
+  void
+  get_normals_at_vertices (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                           FaceVertexNormals &face_vertex_normals) const;
+
+  /**
+   * Given a candidate point and a line segment characterized by the iterator,
+   * return a point that lies on the surface described by this object. This
+   * function is used in some mesh smoothing algorithms that try to move
+   * around points in order to improve the mesh quality but need to ensure
+   * that points that were on the boundary remain on the boundary.
+   *
+   * If spacedim==1, then the line represented by the line iterator is the
+   * entire space (i.e. it is a cell, not a part of the boundary), and the
+   * returned point equals the given input point.
+   *
+   * Derived classes do not need to implement this function unless mesh
+   * smoothing algorithms are used with a particular boundary object. The
+   * default implementation of this function throws an exception of type
+   * ExcPureFunctionCalled.
+   */
+  virtual
+  Point<spacedim>
+  project_to_surface (const typename Triangulation<dim,spacedim>::line_iterator &line,
+                      const Point<spacedim> &candidate) const;
+
+  /**
+   * Same function as above but for a point that is to be projected onto the
+   * area characterized by the given quad.
+   *
+   * If spacedim<=2, then the surface represented by the quad iterator is the
+   * entire space (i.e. it is a cell, not a part of the boundary), and the
+   * returned point equals the given input point.
+   */
+  virtual
+  Point<spacedim>
+  project_to_surface (const typename Triangulation<dim,spacedim>::quad_iterator &quad,
+                      const Point<spacedim> &candidate) const;
+
+  /**
+   * Same function as above but for a point that is to be projected onto the
+   * area characterized by the given quad.
+   *
+   * If spacedim<=3, then the manifold represented by the hex iterator is the
+   * entire space (i.e. it is a cell, not a part of the boundary), and the
+   * returned point equals the given input point.
+   */
+  virtual
+  Point<spacedim>
+  project_to_surface (const typename Triangulation<dim,spacedim>::hex_iterator &hex,
+                      const Point<spacedim> &candidate) const;
+
+protected:
+  /**
+   * Returns the support points of the Gauss-Lobatto quadrature formula used
+   * for intermediate points.
+   *
+   * @note Since the boundary description is closely tied to the unit cell
+   * support points of MappingQ, new boundary descriptions need to explicitly
+   * use these Gauss-Lobatto points and not equidistant points.
+   */
+  const std::vector<Point<1> > &
+  get_line_support_points (const unsigned int n_intermediate_points) const;
+
+private:
+  /**
+   * Point generator for the intermediate points on a boundary.
+   */
+  mutable std::vector<std_cxx11::shared_ptr<QGaussLobatto<1> > > points;
+
+  /**
+   * Mutex for protecting the points array.
+   */
+  mutable Threads::Mutex mutex;
+};
+
+
+
+/**
+ * Specialization of Boundary<dim,spacedim>, which places the new point right
+ * into the middle of the given points. The middle is defined as the
+ * arithmetic mean of the points.
+ *
+ * This class does not really describe a boundary in the usual sense. By
+ * placing new points in the middle of old ones, it rather assumes that the
+ * boundary of the domain is given by the polygon/polyhedron defined by the
+ * boundary of the initial coarse triangulation.
+ *
+ * @ingroup boundary
+ *
+ * @author Wolfgang Bangerth, 1998, 2001, Ralf Hartmann, 2001
+ */
+template <int dim, int spacedim=dim>
+class StraightBoundary : public Boundary<dim,spacedim>
+{
+public:
+  /**
+   * Default constructor. Some compilers require this for some reasons.
+   */
+  StraightBoundary ();
+
+  /**
+   * Let the new point be the arithmetic mean of the two vertices of the line.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class for more information.
+   */
+  virtual Point<spacedim>
+  get_new_point_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line) const;
+
+  /**
+   * Let the new point be the arithmetic mean of the four vertices of this
+   * quad and the four midpoints of the lines, which are already created at
+   * the time of calling this function.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class for more information.
+   */
+  virtual
+  Point<spacedim>
+  get_new_point_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad) const;
+
+  /**
+   * Gives <tt>n=points.size()</tt> points that splits the StraightBoundary
+   * line into $n+1$ partitions of equal lengths.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  void
+  get_intermediate_points_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Gives <tt>n=points.size()=m*m</tt> points that splits the
+   * StraightBoundary quad into $(m+1)(m+1)$ subquads of equal size.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  void
+  get_intermediate_points_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Implementation of the function declared in the base class.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  Tensor<1,spacedim>
+  normal_vector (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                 const Point<spacedim> &p) const;
+
+  /**
+   * Compute the normals to the boundary at the vertices of the given face.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  void
+  get_normals_at_vertices (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                           typename Boundary<dim,spacedim>::FaceVertexNormals &face_vertex_normals) const;
+
+  /**
+   * Given a candidate point and a line segment characterized by the iterator,
+   * return a point that lies on the surface described by this object. This
+   * function is used in some mesh smoothing algorithms that try to move
+   * around points in order to improve the mesh quality but need to ensure
+   * that points that were on the boundary remain on the boundary.
+   *
+   * The point returned is the projection of the candidate point onto the line
+   * through the two vertices of the given line iterator.
+   *
+   * If spacedim==1, then the line represented by the line iterator is the
+   * entire space (i.e. it is a cell, not a part of the boundary), and the
+   * returned point equals the given input point.
+   */
+  virtual
+  Point<spacedim>
+  project_to_surface (const typename Triangulation<dim,spacedim>::line_iterator &line,
+                      const Point<spacedim> &candidate) const;
+
+  /**
+   * Same function as above but for a point that is to be projected onto the
+   * area characterized by the given quad.
+   *
+   * The point returned is the projection of the candidate point onto the
+   * bilinear surface spanned by the four vertices of the given quad iterator.
+   *
+   * If spacedim<=2, then the surface represented by the quad iterator is the
+   * entire space (i.e. it is a cell, not a part of the boundary), and the
+   * returned point equals the given input point.
+   */
+  virtual
+  Point<spacedim>
+  project_to_surface (const typename Triangulation<dim,spacedim>::quad_iterator &quad,
+                      const Point<spacedim> &candidate) const;
+
+  /**
+   * Same function as above but for a point that is to be projected onto the
+   * area characterized by the given quad.
+   *
+   * The point returned is the projection of the candidate point onto the
+   * trilinear manifold spanned by the eight vertices of the given hex
+   * iterator.
+   *
+   * If spacedim<=3, then the manifold represented by the hex iterator is the
+   * entire space (i.e. it is a cell, not a part of the boundary), and the
+   * returned point equals the given input point.
+   */
+  virtual
+  Point<spacedim>
+  project_to_surface (const typename Triangulation<dim,spacedim>::hex_iterator &hex,
+                      const Point<spacedim> &candidate) const;
+};
+
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <>
+void
+Boundary<1,1>::
+get_intermediate_points_on_face (const Triangulation<1,1>::face_iterator &,
+                                 std::vector<Point<1> > &) const;
+
+template <>
+void
+Boundary<1,2>::
+get_intermediate_points_on_face (const Triangulation<1,2>::face_iterator &,
+                                 std::vector<Point<2> > &) const;
+
+template <>
+void
+Boundary<1,3>::
+get_intermediate_points_on_face (const Triangulation<1,3>::face_iterator &,
+                                 std::vector<Point<3> > &) const;
+template <>
+void
+StraightBoundary<1,1>::
+get_normals_at_vertices (const Triangulation<1,1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const;
+template <>
+void
+StraightBoundary<2,2>::
+get_normals_at_vertices (const Triangulation<2,2>::face_iterator &face,
+                         Boundary<2,2>::FaceVertexNormals &face_vertex_normals) const;
+template <>
+void
+StraightBoundary<3,3>::
+get_normals_at_vertices (const Triangulation<3,3>::face_iterator &face,
+                         Boundary<3,3>::FaceVertexNormals &face_vertex_normals) const;
+
+template <>
+Point<3>
+StraightBoundary<3,3>::
+get_new_point_on_quad (const Triangulation<3,3>::quad_iterator &quad) const;
+
+template <>
+void
+StraightBoundary<3,3>::
+get_intermediate_points_on_quad (const Triangulation<3,3>::quad_iterator &quad,
+                                 std::vector<Point<3> > &points) const;
+
+template <>
+Point<3>
+StraightBoundary<1,3>::
+project_to_surface (const Triangulation<1, 3>::quad_iterator &quad,
+                    const Point<3>  &y) const;
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_boundary_lib.h b/include/deal.II/grid/tria_boundary_lib.h
new file mode 100644
index 0000000..4ac7805
--- /dev/null
+++ b/include/deal.II/grid/tria_boundary_lib.h
@@ -0,0 +1,766 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_boundary_lib_h
+#define dealii__tria_boundary_lib_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/grid/tria_boundary.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Boundary object for the hull of a cylinder.  In three dimensions, points
+ * are projected on a circular tube along the <tt>x-</tt>, <tt>y-</tt> or
+ * <tt>z</tt>-axis (when using the first constructor of this class), or an
+ * arbitrarily oriented cylinder described by the direction of its axis and a
+ * point located on the axis. The radius of the tube can be given
+ * independently. Similar to HyperBallBoundary, new points are projected by
+ * dividing the straight line between the old two points and adjusting the
+ * radius from the axis.
+ *
+ * This class was developed to be used in conjunction with the @p cylinder
+ * function of GridGenerator. It should be used for the hull of the cylinder
+ * only (boundary indicator 0). Its use is discussed in detail in the results
+ * section of step-49.
+ *
+ * This class is derived from StraightBoundary rather than from Boundary,
+ * which would seem natural, since this way we can use the
+ * StraightBoundary::in_between() function.
+ *
+ * @ingroup boundary
+ *
+ * @author Guido Kanschat, 2001, Wolfgang Bangerth, 2007
+ */
+template <int dim, int spacedim = dim>
+class CylinderBoundary : public StraightBoundary<dim,spacedim>
+{
+public:
+  /**
+   * Constructor. Using default values for the constructor arguments yields a
+   * circular tube along the x-axis (<tt>axis=0</tt>). Choose <tt>axis=1</tt>
+   * or <tt>axis=2</tt> for a tube along the y- or z-axis, respectively.
+   */
+  CylinderBoundary (const double radius = 1.0,
+                    const unsigned int axis = 0);
+
+  /**
+   * Constructor. If constructed with this constructor, the boundary described
+   * is a cylinder with an axis that points in direction #direction and goes
+   * through the given #point_on_axis. The direction may be arbitrarily
+   * scaled, and the given point may be any point on the axis.
+   */
+  CylinderBoundary (const double           radius,
+                    const Point<spacedim> &direction,
+                    const Point<spacedim> &point_on_axis);
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual Point<spacedim>
+  get_new_point_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual Point<spacedim>
+  get_new_point_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Calls @p get_intermediate_points_between_points.
+   */
+  virtual void
+  get_intermediate_points_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Only implemented for <tt>dim=3</tt> and for <tt>points.size()==1</tt>.
+   */
+  virtual void
+  get_intermediate_points_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Compute the normals to the boundary at the vertices of the given face.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual void
+  get_normals_at_vertices (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                           typename Boundary<dim,spacedim>::FaceVertexNormals &face_vertex_normals) const;
+
+  /**
+   * Return the radius of the cylinder.
+   */
+  double get_radius () const;
+
+  /**
+   * Exception. Thrown by the @p get_radius if the @p
+   * compute_radius_automatically, see below, flag is set true.
+   */
+  DeclException0 (ExcRadiusNotSet);
+
+
+protected:
+  /**
+   * Radius of the cylinder.
+   */
+  const double radius;
+
+  /**
+   * The direction vector of the axis.
+   */
+  const Point<spacedim> direction;
+
+  /**
+   * An arbitrary point on the axis.
+   */
+  const Point<spacedim> point_on_axis;
+
+private:
+
+  /**
+   * Called by @p get_intermediate_points_on_line and by @p
+   * get_intermediate_points_on_quad.
+   *
+   * Refer to the general documentation of @p get_intermediate_points_on_line
+   * in the documentation of the base class.
+   */
+  void get_intermediate_points_between_points (const Point<spacedim> &p0, const Point<spacedim> &p1,
+                                               std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Given a number for the axis, return a vector that denotes this direction.
+   */
+  static Point<spacedim> get_axis_vector (const unsigned int axis);
+};
+
+
+
+/**
+ * Boundary object for the hull of a (truncated) cone with two different radii
+ * at the two ends. If one radius is chosen to be 0 the object describes the
+ * boundary of a cone. In three dimensions, points are projected on an
+ * arbitrarily oriented (truncated) cone described by the two endpoints and
+ * the corresponding radii. Similar to HyperBallBoundary, new points are
+ * projected by dividing the straight line between the old two points and
+ * adjusting the radius from the axis.
+ *
+ * This class is derived from StraightBoundary rather than from Boundary,
+ * which would seem natural, since this way we can use the
+ * StraightBoundary::in_between() function.
+ *
+ * As an example of use, consider the following code snippet:
+ * @code
+ *  Triangulation<dim> triangulation;
+ *  GridGenerator::truncated_cone (triangulation);
+ *  Point<dim> p1, p2;
+ *  p1[0] = -1;
+ *  p2[0] = 1;
+ *  const ConeBoundary<dim> boundary (1, 0.5, p1, p2);
+ *  triangulation.set_boundary (0, boundary);
+ *  triangulation.refine_global (2);
+ * @endcode
+ * This will produce the following meshes after the two refinements we
+ * perform, in 2d and 3d, respectively:
+ *
+ * @image html cone_2d.png
+ * @image html cone_3d.png
+ *
+ * @author Markus Bürg, 2009
+ */
+template <int dim>
+class ConeBoundary : public StraightBoundary<dim>
+{
+public:
+  /**
+   * Constructor. Here the boundary object is constructed. The points
+   * <tt>x_0</tt> and <tt>x_1</tt> describe the starting and ending points of
+   * the axis of the (truncated) cone. <tt>radius_0</tt> denotes the radius
+   * corresponding to <tt>x_0</tt> and <tt>radius_1</tt> the one corresponding
+   * to <tt>x_1</tt>.
+   */
+  ConeBoundary (const double radius_0,
+                const double radius_1,
+                const Point<dim> x_0,
+                const Point<dim> x_1);
+
+  /**
+   * Return the radius of the (truncated) cone at given point <tt>x</tt> on
+   * the axis.
+   */
+  double get_radius (const Point<dim> x) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  Point<dim>
+  get_new_point_on_line (const typename Triangulation<dim>::line_iterator &line) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  Point<dim>
+  get_new_point_on_quad (const typename Triangulation<dim>::quad_iterator &quad) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Calls @p get_intermediate_points_between_points.
+   */
+  virtual
+  void
+  get_intermediate_points_on_line (const typename Triangulation<dim>::line_iterator &line,
+                                   std::vector<Point<dim> > &points) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Only implemented for <tt>dim=3</tt> and for <tt>points.size()==1</tt>.
+   */
+  virtual
+  void
+  get_intermediate_points_on_quad (const typename Triangulation<dim>::quad_iterator &quad,
+                                   std::vector<Point<dim> > &points) const;
+
+  /**
+   * Compute the normals to the boundary at the vertices of the given face.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  void
+  get_normals_at_vertices (const typename Triangulation<dim>::face_iterator &face,
+                           typename Boundary<dim>::FaceVertexNormals &face_vertex_normals) const;
+
+protected:
+  /**
+   * First radius of the (truncated) cone.
+   */
+  const double radius_0;
+
+  /**
+   * Second radius of the (truncated) cone.
+   */
+  const double radius_1;
+
+  /**
+   * Starting point of the axis.
+   */
+  const Point<dim> x_0;
+
+  /**
+   * Ending point of the axis.
+   */
+  const Point<dim> x_1;
+
+private:
+  /**
+   * Called by @p get_intermediate_points_on_line and by @p
+   * get_intermediate_points_on_quad.
+   *
+   * Refer to the general documentation of @p get_intermediate_points_on_line
+   * in the documentation of the base class.
+   */
+  void
+  get_intermediate_points_between_points (const Point<dim> &p0,
+                                          const Point<dim> &p1,
+                                          std::vector<Point<dim> > &points) const;
+};
+
+
+
+/**
+ * Specialization of Boundary<dim>, which places the new point on the boundary
+ * of a ball in arbitrary dimension. It works by projecting the point in the
+ * middle of the old points onto the ball. The middle is defined as the
+ * arithmetic mean of the points.
+ *
+ * The center of the ball and its radius may be given upon construction of an
+ * object of this type. They default to the origin and a radius of 1.0.
+ *
+ * This class is derived from StraightBoundary rather than from Boundary,
+ * which would seem natural, since this way we can use the
+ * StraightBoundary::in_between() function.
+ *
+ * @ingroup boundary
+ *
+ * @author Wolfgang Bangerth, 1998, Ralf Hartmann, 2001
+ */
+template <int dim, int spacedim=dim>
+class HyperBallBoundary : public StraightBoundary<dim,spacedim>
+{
+public:
+  /**
+   * Constructor
+   */
+  HyperBallBoundary (const Point<spacedim> p      = Point<spacedim>(),
+                     const double     radius = 1.0);
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  Point<spacedim>
+  get_new_point_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  Point<spacedim>
+  get_new_point_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Calls @p get_intermediate_points_between_points.
+   */
+  virtual
+  void
+  get_intermediate_points_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Only implemented for <tt>dim=3</tt> and for <tt>points.size()==1</tt>.
+   */
+  virtual
+  void
+  get_intermediate_points_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad,
+                                   std::vector<Point<spacedim> > &points) const;
+
+  /**
+   * Implementation of the function declared in the base class.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  Tensor<1,spacedim>
+  normal_vector (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                 const Point<spacedim> &p) const;
+
+  /**
+   * Compute the normals to the boundary at the vertices of the given face.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual
+  void
+  get_normals_at_vertices (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                           typename Boundary<dim,spacedim>::FaceVertexNormals &face_vertex_normals) const;
+
+  /**
+   * Return the center of the ball.
+   */
+  Point<spacedim>
+  get_center () const;
+
+  /**
+   * Return the radius of the ball.
+   */
+  double
+  get_radius () const;
+
+  /**
+   * Exception. Thrown by the @p get_radius if the @p
+   * compute_radius_automatically, see below, flag is set true.
+   */
+  DeclException0 (ExcRadiusNotSet);
+
+
+protected:
+
+  /**
+   * Center point of the hyperball.
+   */
+  const Point<spacedim> center;
+
+  /**
+   * Radius of the hyperball.
+   */
+  const double radius;
+
+  /**
+   * This flag is @p false for this class and for all derived classes that set
+   * the radius by the constructor. For example this flag is @p false for the
+   * HalfHyperBallBoundary class but it is @p true for the HyperShellBoundary
+   * class, for example.  The latter class doesn't get its radii by the
+   * constructor but need to compute the radii automatically each time one of
+   * the virtual functions is called.
+   */
+  bool compute_radius_automatically;
+
+private:
+
+  /**
+   * Called by @p get_intermediate_points_on_line and by @p
+   * get_intermediate_points_on_quad.
+   *
+   * Refer to the general documentation of @p get_intermediate_points_on_line
+   * in the documentation of the base class.
+   */
+  void get_intermediate_points_between_points (const Point<spacedim> &p0, const Point<spacedim> &p1,
+                                               std::vector<Point<spacedim> > &points) const;
+};
+
+
+
+/**
+ * Variant of HyperBallBoundary which denotes a half hyper ball where the
+ * first coordinate is restricted to the range $x>=0$ (or $x>=center(0)$). In
+ * two dimensions, this equals the right half circle, in three space
+ * dimensions it is a half ball. This class might be useful for computations
+ * with rotational symmetry, where one dimension is the radius from the axis
+ * of rotation.
+ *
+ * @ingroup boundary
+ *
+ * @author Wolfgang Bangerth, 1999, 2001
+ */
+template <int dim>
+class HalfHyperBallBoundary : public HyperBallBoundary<dim>
+{
+public:
+  /**
+   * Constructor
+   */
+  HalfHyperBallBoundary (const Point<dim> p      = Point<dim>(),
+                         const double     radius = 1.0);
+
+  /**
+   * Check if on the line <tt>x==0</tt>, otherwise pass to the base class.
+   */
+  virtual Point<dim>
+  get_new_point_on_line (const typename Triangulation<dim>::line_iterator &line) const;
+
+  /**
+   * Check if on the line <tt>x==0</tt>, otherwise pass to the base class.
+   */
+  virtual Point<dim>
+  get_new_point_on_quad (const typename Triangulation<dim>::quad_iterator &quad) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Calls @p get_intermediate_points_between_points.
+   */
+  virtual void
+  get_intermediate_points_on_line (const typename Triangulation<dim>::line_iterator &line,
+                                   std::vector<Point<dim> > &points) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Only implemented for <tt>dim=3</tt> and for <tt>points.size()==1</tt>.
+   */
+  virtual void
+  get_intermediate_points_on_quad (const typename Triangulation<dim>::quad_iterator &quad,
+                                   std::vector<Point<dim> > &points) const;
+
+  /**
+   * Compute the normals to the boundary at the vertices of the given face.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual void
+  get_normals_at_vertices (const typename Triangulation<dim>::face_iterator &face,
+                           typename Boundary<dim>::FaceVertexNormals &face_vertex_normals) const;
+};
+
+
+
+/**
+ * Class describing the boundaries of a hyper shell. Only the center of the
+ * two spheres needs to be given, the radii of inner and outer sphere are
+ * computed automatically upon calling one of the virtual functions.
+ *
+ * @ingroup boundary
+ *
+ * @author Wolfgang Bangerth, 1999
+ */
+template <int dim>
+class HyperShellBoundary : public HyperBallBoundary<dim>
+{
+public:
+  /**
+   * Constructor. The center of the spheres defaults to the origin.
+   *
+   * Calls the constructor of its base @p HyperBallBoundary class with a dummy
+   * radius as argument. This radius will be ignored
+   */
+  HyperShellBoundary (const Point<dim> &center = Point<dim>());
+};
+
+
+
+/**
+ * Variant of HyperShellBoundary which denotes a half hyper shell where the
+ * first coordinate is restricted to the range $x>=0$ (or $x>=center(0)$). In
+ * two dimensions, this equals the right half arc, in three space dimensions
+ * it is a half shell. This class might be useful for computations with
+ * rotational symmetry, where one dimension is the radius from the axis of
+ * rotation.
+ *
+ * @ingroup boundary
+ *
+ * @author Wolfgang Bangerth, 2000, 2009
+ */
+template <int dim>
+class HalfHyperShellBoundary : public HyperShellBoundary<dim>
+{
+public:
+  /**
+   * Constructor. The center of the spheres defaults to the origin.
+   *
+   * If the radii are not specified, the class tries to infer them from the
+   * location of points on the boundary. This works in 2d, but not in 3d. As a
+   * consequence, in 3d these radii must be given.
+   */
+  HalfHyperShellBoundary (const Point<dim> &center = Point<dim>(),
+                          const double inner_radius = -1,
+                          const double outer_radius = -1);
+
+  /**
+   * Construct a new point on a line.
+   */
+  virtual Point<dim>
+  get_new_point_on_line (const typename Triangulation<dim>::line_iterator &line) const;
+
+  /**
+   * Construct a new point on a quad.
+   */
+  virtual Point<dim>
+  get_new_point_on_quad (const typename Triangulation<dim>::quad_iterator &quad) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Calls @p get_intermediate_points_between_points.
+   */
+  virtual void
+  get_intermediate_points_on_line (const typename Triangulation<dim>::line_iterator &line,
+                                   std::vector<Point<dim> > &points) const;
+
+  /**
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   *
+   * Only implemented for <tt>dim=3</tt> and for <tt>points.size()==1</tt>.
+   */
+  virtual void
+  get_intermediate_points_on_quad (const typename Triangulation<dim>::quad_iterator &quad,
+                                   std::vector<Point<dim> > &points) const;
+
+  /**
+   * Compute the normals to the boundary at the vertices of the given face.
+   *
+   * Refer to the general documentation of this class and the documentation of
+   * the base class.
+   */
+  virtual void
+  get_normals_at_vertices (const typename Triangulation<dim>::face_iterator &face,
+                           typename Boundary<dim>::FaceVertexNormals &face_vertex_normals) const;
+
+private:
+  /**
+   * Inner and outer radii of the shell.
+   */
+  const double inner_radius;
+  const double outer_radius;
+};
+
+
+/**
+ * Class describing the boundary of the torus. The axis of the torus is the
+ * $y$-axis while the plane of the torus is the $x$-$z$ plane. A torus of this
+ * kind can be generated by GridGenerator::torus.
+ *
+ * This class is only implemented for <tt>dim=2</tt>,<tt>spacedim=3</tt>, that
+ * is, just the surface.
+ */
+template <int dim, int spacedim>
+class TorusBoundary : public Boundary<dim,spacedim>
+{
+public:
+  /**
+   * Constructor.<tt>R</tt> has to be greater than <tt>r</tt>.
+   */
+  TorusBoundary (const double R, const double r);
+
+//Boundary Refinement Functions
+  /**
+   * Construct a new point on a line.
+   */
+  virtual Point<spacedim>
+  get_new_point_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line) const;
+
+  /**
+   * Construct a new point on a quad.
+   */
+  virtual Point<spacedim>
+  get_new_point_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad) const;
+
+  /**
+   * Construct a new points on a line.
+   */
+  virtual void   get_intermediate_points_on_line (
+    const typename Triangulation< dim, spacedim >::line_iterator   &line,
+    std::vector< Point< spacedim > >         &points) const;
+
+  /**
+   * Construct a new points on a quad.
+   */
+  virtual void  get_intermediate_points_on_quad (
+    const typename Triangulation< dim, spacedim >::quad_iterator &quad,
+    std::vector< Point< spacedim > >         &points ) const;
+
+  /**
+   * Get the normal from cartesian coordinates. This normal does not have unit
+   * length.
+   */
+  virtual void get_normals_at_vertices (
+    const typename Triangulation< dim, spacedim >::face_iterator &face,
+    typename Boundary<dim,spacedim>::FaceVertexNormals &face_vertex_normals) const;
+
+private:
+  //Handy functions
+  /**
+   * Function that corrects the value and sign of angle, that is, given
+   * <tt>angle=tan(abs(y/x))</tt>; if <tt> (y > 0) && (x < 0) </tt> then
+   * <tt>correct_angle = Pi - angle</tt>, etc.
+   */
+
+  double           get_correct_angle(const double angle,const double x,const double y) const;
+
+  /**
+   * Get the cartesian coordinates of the Torus, i.e., from
+   * <tt>(theta,phi)</tt> to <tt>(x,y,z)</tt>.
+   */
+  Point<spacedim>  get_real_coord(const Point<dim> &surfP) const;
+
+  /**
+   * Get the surface coordinates of the Torus, i.e., from <tt>(x,y,z)</tt> to
+   * <tt>(theta,phi)</tt>.
+   */
+  Point<dim>       get_surf_coord(const Point<spacedim> &p) const;
+
+  /**
+   * Get the normal from surface coordinates. This normal does not have unit
+   * length.
+   */
+  Point<spacedim>  get_surf_norm_from_sp(const Point<dim> &surfP)      const;
+
+  /**
+   * Get the normal from cartesian coordinates. This normal does not have unit
+   * length.
+   */
+  Point<spacedim>  get_surf_norm(const Point<spacedim> &p) const;
+
+  /**
+   * Inner and outer radii of the shell.
+   */
+  const double R;
+  const double r;
+};
+
+
+
+/* -------------- declaration of explicit specializations ------------- */
+
+#ifndef DOXYGEN
+
+template <>
+Point<1>
+HyperBallBoundary<1>::
+get_new_point_on_quad (const Triangulation<1>::quad_iterator &) const;
+template <>
+void
+HyperBallBoundary<1>::get_intermediate_points_on_line (
+  const Triangulation<1>::line_iterator &,
+  std::vector<Point<1> > &) const;
+template <>
+void
+HyperBallBoundary<3>::get_intermediate_points_on_quad (
+  const Triangulation<3>::quad_iterator &quad,
+  std::vector<Point<3> > &points) const;
+template <>
+void
+HyperBallBoundary<1>::
+get_normals_at_vertices (const Triangulation<1,1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const;
+template <>
+Point<1>
+HalfHyperBallBoundary<1>::
+get_new_point_on_quad (const Triangulation<1>::quad_iterator &) const;
+template <>
+void
+HalfHyperBallBoundary<1>::
+get_intermediate_points_on_quad (const Triangulation<1>::quad_iterator &,
+                                 std::vector<Point<1> > &) const;
+template <>
+void
+HalfHyperBallBoundary<1>::
+get_normals_at_vertices (const Triangulation<1,1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const;
+template <>
+Point<1>
+HalfHyperShellBoundary<1>::
+get_new_point_on_quad (const Triangulation<1>::quad_iterator &) const;
+template <>
+void
+HalfHyperShellBoundary<1>::
+get_intermediate_points_on_quad (const Triangulation<1>::quad_iterator &,
+                                 std::vector<Point<1> > &) const;
+template <>
+void
+HalfHyperShellBoundary<1>::
+get_normals_at_vertices (const Triangulation<1,1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const;
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_faces.h b/include/deal.II/grid/tria_faces.h
new file mode 100644
index 0000000..e199d56
--- /dev/null
+++ b/include/deal.II/grid/tria_faces.h
@@ -0,0 +1,182 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_faces_h
+#define dealii__tria_faces_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/grid/tria_object.h>
+#include <deal.II/grid/tria_objects.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    /**
+     * General template for information belonging to the faces of a
+     * triangulation. These classes are similar to the TriaLevel classes. As
+     * cells are organised in a hierarchical structure of levels, each
+     * triangulation consists of several such TriaLevels. However the faces of
+     * a triangulation, lower dimensional objects like lines in 2D or lines
+     * and quads in 3D, do not have to be based on such a hierarchical
+     * structure. In fact we have to organise them in only one object if we
+     * want to enable anisotropic refinement. Therefore the TriaFaces classes
+     * store the information belonging to the faces of a triangulation
+     * separately from the TriaLevel classes.
+     *
+     * This general template is only provided to enable a specialization
+     * below.
+     *
+     * @author Tobias Leicht, 2006
+     */
+
+    template<int dim>
+    class TriaFaces
+    {
+    private:
+      /**
+       * Make the constructor private so no one can use this general template.
+       * Only the specializations should be used.
+       */
+      TriaFaces();
+    };
+
+
+
+    /**
+     * Faces only have a meaning in <tt>dim@>=1</tt>. In <tt>dim=1</tt> they
+     * are vertices, which are handled differently, so only for
+     * <tt>dim@>=2</tt> the use of TriaFaces is reasonable, for <tt>dim=1</tt>
+     * the class is empty.
+     */
+    template<>
+    class TriaFaces<1>
+    {
+
+    public:
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object. Of course this returns 0.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+    /**
+     * In <tt>dim=2</tt> the cells are quads, the faces accordingly are lines.
+     */
+    template<>
+    class TriaFaces<2>
+    {
+    public:
+      /**
+       * The TriaObject containing the data of lines.
+       */
+      TriaObjects<TriaObject<1> > lines;
+
+    public:
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+    /**
+     * In <tt>dim=3</tt> the cells are hexes, the faces accordingly are quads.
+     * In addition to that we also have to enable the storage of lines.
+     */
+    template<>
+    class TriaFaces<3>
+    {
+    public:
+      /**
+       * The TriaObject containing the data of quads.
+       */
+
+      TriaObjectsQuad3D quads;
+
+      /**
+       * The TriaObject containing the data of lines.
+       */
+      TriaObjects<TriaObject<1> > lines;
+
+    public:
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+
+
+    template <class Archive>
+    void
+    TriaFaces<1>::serialize (Archive &,
+                             const unsigned int)
+    {}
+
+
+
+    template <class Archive>
+    void
+    TriaFaces<2>::serialize (Archive &ar,
+                             const unsigned int)
+    {
+      ar &lines;
+    }
+
+
+
+    template <class Archive>
+    void
+    TriaFaces<3>::serialize (Archive &ar,
+                             const unsigned int)
+    {
+      ar &quads &lines;
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_iterator.h b/include/deal.II/grid/tria_iterator.h
new file mode 100644
index 0000000..1286c72
--- /dev/null
+++ b/include/deal.II/grid/tria_iterator.h
@@ -0,0 +1,1240 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_iterator_h
+#define dealii__tria_iterator_h
+
+
+/*----------------------------   tria-iterator.h     ---------------------------*/
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <deal.II/base/point.h>
+#include <deal.II/grid/tria_iterator_base.h>
+
+#include <iterator>
+
+#include <ostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class Triangulation;
+template <int, int, int> class TriaAccessorBase;
+
+template <typename> class TriaRawIterator;
+template <typename> class TriaIterator;
+template <typename> class TriaActiveIterator;
+
+
+
+// note: in non-debug mode, i.e. with optimizations, the file
+// tria_iterator.templates.h is included at the end of this file.
+// this includes a lot of templates and thus makes compilation
+// slower, but at the same time allows for more aggressive
+// inlining and thus faster code.
+
+
+/**
+ * This class implements an iterator, analogous to those used in the standard
+ * library. It fulfills the requirements of a bidirectional iterator. See the
+ * C++ documentation for further details of iterator specification and usage.
+ *
+ *
+ * In addition to the standard interface, an iterator of this class provides a
+ * <tt>-@></tt> operator, i.e. you can write statements like
+ * @code
+ * i->set_refine_flag ();
+ * @endcode
+ *
+ * Iterators are used whenever a loop over all lines, quads, cells etc.  is to
+ * be performed. These loops can then be coded like this:
+ * @code
+ *   cell_iterator i   = tria.begin();
+ *   cell_iterator end = tria.end();
+ *   for (; i!=end; ++i)
+ *     if (cell->at_boundary())
+ *       cell->set_refine_flag();
+ * @endcode
+ *
+ * Note the usage of <tt>++i</tt> instead of <tt>i++</tt> since this does not
+ * involve temporaries and copying. It is recommended to use a fixed value
+ * <tt>end</tt> inside the loop instead of <tt>tria.end()</tt>, since the
+ * creation and copying of these iterators is rather expensive compared to
+ * normal pointers.
+ *
+ * The objects pointed to are accessors, derived from TriaAccessorBase. Which
+ * kind of accessor is determined by the template argument <em>Accessor</em>.
+ * These accessors are not so much data structures as they are a collection of
+ * functions providing access to the data stored in Triangulation or
+ * DoFHandler objects. Using these accessors, the structure of these classes
+ * is hidden from the application program.
+ *
+ * <h3>Which iterator to use when</h3>
+ *
+ * @attention Application programs will rarely use TriaRawIterator, but rather
+ * one of the derived classes TriaIterator or TriaActiveIterator.
+ *
+ * <ul>
+ * <li> TriaRawIterator objects point to lines, cells, etc in the lists
+ * whether they are used or not (in the vectors, also <i>dead</i> objects are
+ * stored, since deletion in vectors is expensive and we also do not want to
+ * destroy the ordering induced by the numbering in the vectors). Therefore
+ * not all raw iterators point to valid objects.
+ *
+ * <li> The derived class TriaIterator selects the valid cells, that is, cells
+ * used somewhere in the triangulation hierarchy.
+ *
+ * <li> TriaActiveIterator objects which only loop over active cells.
+ * </ul>
+ *
+ * <h3>Purpose</h3>
+ *
+ * Iterators are not much slower than operating directly on the data
+ * structures, since they perform the loops that you had to handcode yourself
+ * anyway. Most iterator and accessor functions are inlined.
+ *
+ * The main functionality of iterators, resides in the <tt>++</tt> and
+ * <tt>--</tt> operators. These move the iterator forward or backward just as
+ * if it were a pointer into an array. Here, this operation is not so easy,
+ * since it may include skipping some elements and the transition between the
+ * triangulation levels. This is completely hidden from the user, though you
+ * can still create an iterator pointing to an arbitrary element.  Actually,
+ * the operation of moving iterators back and forth is not done in the
+ * iterator classes, but rather in the accessor classes. Since these are
+ * passed as template arguments, you can write your own versions here to add
+ * more functionality.
+ *
+ * Furthermore, the iterators described here satisfy the requirement of input
+ * and bidirectional iterators as stated by the C++ standard. It is therefore
+ * possible to use the functions from the algorithm section of the C++
+ * standard, e.g., <em>count_if</em> (see the documentation for Triangulation
+ * for an example) and several others.
+ *
+ * <h3>Implementation</h3>
+ *
+ * The iterator class itself does not have much functionality. It only becomes
+ * useful when assigned an Accessor (the second template parameter), which
+ * really does the access to data. An Accessor has to fulfill some
+ * requirements:
+ *
+ * <ul>
+ * <li> It must have two members named <tt>present_level</tt> and
+ * <tt>present_index</tt> storing the address of the element in the
+ * triangulation presently pointed to. These data have to be accessible by all
+ * triangulation iterators listed above.
+ *
+ * <li> It must have a constructor which takes a Triangulation* and two
+ * unsigned integers, denoting the initial level and index, as well as a data
+ * object depending on its type.
+ *
+ * <li> For the TriaIterator and the TriaActiveIterator class, it must have a
+ * member function <tt>bool used()</tt>, for the latter a member function
+ * <tt>bool active()</tt>.
+ *
+ * <li> It must have void operators <tt>++</tt> and <tt>--</tt>.
+ *
+ * <li> It must declare a local <tt>typedef AccessorData</tt> which states the
+ * data type the accessor expects to get passed as fourth constructor
+ * argument. By declaring a local data type, the respective iterator class may
+ * type-safely enforce that data type to be one of its own constructor
+ * argument types. If an accessor class does not need additional data, this
+ * type shall be <tt>void</tt>.
+ * </ul>
+ *
+ * Then the iterator is able to do what it is supposed to. All of the
+ * necessary functions are implemented in the <tt>Accessor</tt> base class,
+ * but you may write your own version (non-virtual, since we use templates) to
+ * add functionality.
+ *
+ * The accessors provided by the library consists of two groups, determined by
+ * whether they access the data of Triangulation objects or
+ * DoFHandler/hp::DoFHandler objects. They are derived from TriaAccessor and
+ * DoFAccessor, respectively. Each group also has specialized accessors for
+ * cells (as opposed to faces and lines) that offer more functionality such as
+ * accessing neighbors.
+ *
+ * @attention It seems impossible to preserve constness of a triangulation
+ * through iterator usage. Thus, if you declare pointers to a <tt>const</tt>
+ * triangulation object, you should be well aware that you might involuntarily
+ * alter the data stored in the triangulation.
+ *
+ * @note More information on valid and invalid iterators can be found in the
+ * documentation of TriaAccessorBase, where the iterator states are checked
+ * and implemented.
+ *
+ *
+ * <h3>Past-the-end iterators</h3>
+ *
+ * There is a representation of past-the-end-pointers, denoted by special
+ * values of the member variables @p present_level and @p present_index: If
+ * <tt>present_level>=0</tt> and <tt>present_index>=0</tt>, then the object is
+ * valid (there is no check whether the triangulation really has that many
+ * levels or that many cells on the present level when we investigate the
+ * state of an iterator; however, in many places where an iterator is
+ * dereferenced we make this check); if <tt>present_level==-1</tt> and
+ * <tt>present_index==-1</tt>, then the iterator points past the end; in all
+ * other cases, the iterator is considered invalid. You can check this by
+ * calling the <tt>state()</tt> function.
+ *
+ * An iterator is also invalid, if the pointer pointing to the Triangulation
+ * object is invalid or zero.
+ *
+ * Finally, an iterator is invalid, if the element pointed to by @p
+ * present_level and @p present_index is not used, i.e. if the @p used flag is
+ * set to false.
+ *
+ * The last two checks are not made in <tt>state()</tt> since both cases
+ * should only occur upon uninitialized construction through @p memcpy and the
+ * like (the parent triangulation can only be set upon construction). If an
+ * iterator is constructed empty through the empty constructor,
+ * <tt>present_level==-2</tt> and <tt>present_index==-2</tt>. Thus, the
+ * iterator is invalid anyway, regardless of the state of the triangulation
+ * pointer and the state of the element pointed to.
+ *
+ * Past-the-end iterators may also be used to compare an iterator with the <i
+ * >before-the-start</i> value, when running backwards. There is no
+ * distinction between the iterators pointing past the two ends of a vector.
+ *
+ * By defining only one value to be past-the-end and making all other values
+ * invalid provides a second track of security: if we should have forgotten a
+ * check in the library when an iterator is incremented or decremented, we
+ * automatically convert the iterator from the allowed state "past-the-end" to
+ * the disallowed state "invalid" which increases the chance that some time
+ * earlier than for past-the-end iterators an exception is raised.
+ *
+ * @ref Triangulation
+ * @ingroup grid
+ * @ingroup Iterators
+ * @author Wolfgang Bangerth, 1998
+ * @author documentation update Guido Kanschat, 2004
+ */
+template <typename Accessor>
+class TriaRawIterator : public std::iterator<std::bidirectional_iterator_tag,Accessor>
+{
+public:
+  /**
+   * Declare the type of the Accessor for use in the outside world. This way
+   * other functions can use the Accessor's type without knowledge of how the
+   * exact implementation actually is.
+   */
+  typedef Accessor AccessorType;
+
+  /**
+   * Empty constructor. Such an object is not usable!
+   */
+  TriaRawIterator ();
+
+  /**
+   * Copy constructor.
+   */
+  TriaRawIterator (const TriaRawIterator &);
+
+  /**
+   * Construct an iterator from the given accessor; the given accessor needs
+   * not be of the same type as the accessor of this class is, but it needs to
+   * be convertible.
+   *
+   * Through this constructor, it is also possible to construct objects for
+   * derived iterators:
+   * @code
+   * DoFCellAccessor dof_accessor;
+   * Triangulation::active_cell_iterator cell
+   *   = accessor;
+   * @endcode
+   */
+  explicit TriaRawIterator (const Accessor &a);
+
+  /**
+   * Constructor. Assumes that the other accessor type is convertible to the
+   * current one.
+   */
+  template <typename OtherAccessor>
+  explicit TriaRawIterator (const OtherAccessor &a);
+
+  /**
+   * Proper constructor, initialized with the triangulation, the level and
+   * index of the object pointed to. The last parameter is of a type declared
+   * by the accessor class.
+   */
+  TriaRawIterator (const Triangulation<Accessor::dimension,Accessor::space_dimension> *parent,
+                   const int level,
+                   const int index,
+                   const typename AccessorType::AccessorData *local_data = 0);
+
+  /**
+   * This is a conversion operator (constructor) which takes another iterator
+   * type and copies the data; this conversion works, if there is a conversion
+   * path from the @p OtherAccessor class to the @p Accessor class of this
+   * object. One such path would be derived class to base class, which for
+   * example may be used to get a Triangulation::raw_cell_iterator from a
+   * DoFHandler::raw_cell_iterator, since the DoFAccessor class is derived
+   * from the TriaAccessorBase class.
+   */
+  template <typename OtherAccessor>
+  TriaRawIterator (const TriaRawIterator<OtherAccessor> &i);
+
+  /**
+   * Another conversion operator, where we use the pointers to the
+   * Triangulation from a TriaAccessorBase object, while the additional data
+   * is used according to the actual type of Accessor.
+   */
+  TriaRawIterator (const TriaAccessorBase<Accessor::structure_dimension,Accessor::dimension,Accessor::space_dimension> &tria_accessor,
+                   const typename Accessor::AccessorData *local_data);
+
+  /**
+   * Conversion constructor. Same as above with the difference that it
+   * converts from TriaIterator classes (not TriaRawIterator).
+   */
+  template <typename OtherAccessor>
+  TriaRawIterator (const TriaIterator<OtherAccessor> &i);
+
+  /**
+   * Conversion constructor. Same as above with the difference that it
+   * converts from TriaActiveIterator classes (not TriaRawIterator).
+   */
+  template <typename OtherAccessor>
+  TriaRawIterator (const TriaActiveIterator<OtherAccessor> &i);
+
+  /**
+   * @name Dereferencing
+   */
+  /*@{*/
+  /**
+   * Dereferencing operator, returns a reference to an accessor. Usage is thus
+   * like <tt>(*i).index ();</tt>
+   *
+   * This function has to be specialized explicitly for the different @p
+   * Pointees, to allow an
+   * <tt>iterator<1,TriangulationLevel<1>::LinesData></tt> to point to
+   * <tt>tria->lines.cells[index]</tt> while for one dimension higher it has
+   * to point to <tt>tria->quads.cells[index]</tt>.
+   *
+   * You must not dereference invalid or past the end iterators.
+   */
+  const Accessor &operator * () const;
+
+  /**
+   * Dereferencing operator, non- at p const version.
+   */
+  Accessor &operator * ();
+
+  /**
+   * Dereferencing operator, returns a reference of the cell pointed to. Usage
+   * is thus like <tt>i->index ();</tt>
+   *
+   * There is a @p const and a non- at p const version.
+   */
+  const Accessor *operator -> () const;
+
+  /**
+   * Dereferencing operator, non- at p const version.
+   */
+  Accessor *operator -> ();
+
+
+  /**
+   * In order be able to assign end-iterators for different accessors to each
+   * other, we need an access function which returns the accessor regardless
+   * of its state.
+   *
+   * @warning This function should not be used in application programs. It is
+   * only intended for limited purposes inside the library and it makes
+   * debugging much harder.
+   */
+  const Accessor &access_any () const;
+
+  /*@}*/
+
+  /**
+   * Assignment operator.
+   */
+  TriaRawIterator &operator = (const TriaRawIterator &);
+
+  /**
+   * Assignment operator.
+   */
+//    template <class OtherAccessor>
+//    TriaRawIterator & operator = (const TriaRawIterator<OtherAccessor>&);
+
+  /**
+   * Assignment operator.
+   */
+//    template <class OtherAccessor>
+//    TriaRawIterator & operator = (const TriaIterator<OtherAccessor>&);
+
+  /**
+   * Assignment operator.
+   */
+//    template <class OtherAccessor>
+//    TriaRawIterator & operator = (const TriaActiveIterator<OtherAccessor>&);
+
+  /**
+   * Compare for equality.
+   */
+  bool operator == (const TriaRawIterator &) const;
+
+  /**
+   * Compare for inequality.
+   */
+  bool operator != (const TriaRawIterator &) const;
+
+  /**
+   * Ordering relation for iterators.
+   *
+   * This relation attempts a total ordering of cells.
+   *
+   * The relation is defined as follows:
+   *
+   * For objects of <tt>Accessor::structure_dimension <
+   * Accessor::dimension</tt>, we simply compare the index of such an object.
+   * The ordering is lexicographic according to the following hierarchy (in
+   * the sense, that the next test is only applied if the previous was
+   * inconclusive):
+   *
+   * <ol>
+   * <li> The past-the-end iterator is always ordered last. Two past-the-end
+   * iterators rank the same, thus false is returned in that case.</li>
+   *
+   * <li> The level of the cell.</li>
+   * <li> The index of a cell inside the level.</li>
+   * </ol>
+   *
+   * @note The ordering is not consistent between different processor in a
+   * parallel::distributed::Triangulation because we rely on index(), which is
+   * likely not the same.
+   */
+  bool operator < (const TriaRawIterator &) const;
+
+  /**
+   * Another comparison operator, implementing with the same ordering as
+   * #operator<.
+   */
+  bool operator > (const TriaRawIterator &) const;
+
+  /**@name Advancement of iterators*/
+  /*@{*/
+  /**
+   * Prefix <tt>++</tt> operator: <tt>++iterator</tt>. This operator advances
+   * the iterator to the next element and returns a reference to
+   * <tt>*this</tt>.
+   */
+  TriaRawIterator &operator ++ ();
+
+  /**
+   * Postfix <tt>++</tt> operator: <tt>iterator++</tt>. This operator advances
+   * the iterator to the next element, but returns an iterator to the element
+   * previously pointed to.
+   *
+   * Since this operation involves a temporary and a copy operation and since
+   * an @p iterator is quite a large object for a pointer, use the prefix
+   * operator <tt>++iterator</tt> whenever possible, especially in the header
+   * of for loops (<tt>for (; iterator!=end; ++iterator)</tt>) since there you
+   * normally never need the returned value.
+   */
+  TriaRawIterator operator ++ (int);
+
+  /**
+   * Prefix @p -- operator: @p --iterator. This operator moves the iterator to
+   * the previous element and returns a reference to <tt>*this</tt>.
+   */
+  TriaRawIterator &operator -- ();
+
+  /**
+   * Postfix @p -- operator: @p iterator--. This operator moves the iterator
+   * to the previous element, but returns an iterator to the element
+   * previously pointed to.
+   *
+   * The same applies as for the postfix operator++: If possible, avoid it by
+   * using the prefix operator form to avoid the use of a temporary variable.
+   */
+  TriaRawIterator operator -- (int);
+  /*@}*/
+
+  /**
+   * Return the state of the iterator.
+   */
+  IteratorState::IteratorStates state () const;
+
+  /**
+   * Print the iterator to a stream <code>out</code>. The format is
+   * <tt>level.index</tt>.
+   */
+  template <class StreamType>
+  void print (StreamType &out) const;
+
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+
+  /**@name Exceptions*/
+  /*@{*/
+  /**
+   * Exception for TriaObjects with level, i.e. cells.
+   */
+  DeclException1 (ExcDereferenceInvalidCell,
+                  Accessor,
+                  << "You tried to dereference a cell iterator for which this "
+                  << "is not possible. More information on this iterator: "
+                  << "level=" << arg1.level()
+                  << ", index=" << arg1.index()
+                  << ", state="
+                  << (arg1.state() == IteratorState::valid ? "valid" :
+                      (arg1.state() == IteratorState::past_the_end ?
+                       "past_the_end" : "invalid")));
+
+  /**
+   * Exception for lower-dimensional TriaObjects without level, i.e. objects
+   * faces are constructed with.
+   */
+  DeclException1 (ExcDereferenceInvalidObject,
+                  Accessor,
+                  << "You tried to dereference an iterator for which this "
+                  << "is not possible. More information on this iterator: "
+                  << "index=" << arg1.index()
+                  << ", state="
+                  << (arg1.state() == IteratorState::valid ? "valid" :
+                      (arg1.state() == IteratorState::past_the_end ?
+                       "past_the_end" : "invalid")));
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcAdvanceInvalidObject);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidComparison);
+
+  /*@}*/
+protected:
+  /**
+   * Object holding the real data.
+   */
+  Accessor accessor;
+
+
+  /**
+   * Make all other iterator class templates friends of this class. This is
+   * necessary for the implementation of conversion constructors.
+   *
+   * In fact, we would not need them to be friends if they were for different
+   * dimensions, but the compiler dislikes giving a fixed dimension and
+   * variable accessor since then it says that would be a partial
+   * specialization.
+   */
+  template <typename SomeAccessor> friend class TriaRawIterator;
+  template <typename SomeAccessor> friend class TriaIterator;
+  template <typename SomeAccessor> friend class TriaActiveIterator;
+};
+
+
+/**
+ * This specialization of TriaRawIterator provides access only to the
+ * <em>used</em> lines, quads, cells, etc.
+ *
+ * @ingroup grid
+ * @ingroup Iterators
+ */
+template <typename Accessor>
+class TriaIterator : public TriaRawIterator<Accessor>
+{
+public:
+  /**
+   * Empty constructor. Such an object is not usable!
+   */
+  TriaIterator ();
+
+  /**
+   * Copy constructor.
+   */
+  TriaIterator (const TriaIterator<Accessor> &);
+
+  /**
+   * Cross copy constructor from iterators pointing also to non-active
+   * objects.
+   *
+   * If the object pointed to is not past-the-end and is not used, the debug
+   * version raises an error!
+   */
+  TriaIterator (const TriaRawIterator<Accessor> &);
+
+  /**
+   * Proper constructor, initialized with the triangulation, the level and
+   * index of the object pointed to. The last parameter is of a type declared
+   * by the accessor class.
+   *
+   * If the object pointed to is not past-the-end and is not used, the debug
+   * version raises an error!
+   */
+  TriaIterator (const Triangulation<Accessor::dimension,Accessor::space_dimension> *parent,
+                const int                 level,
+                const int                 index,
+                const typename Accessor::AccessorData *local_data = 0);
+
+  /**
+   * Construct from an accessor of type OtherAccessor that is convertible to
+   * the type Accessor.
+   */
+  template <typename OtherAccessor>
+  explicit TriaIterator (const OtherAccessor &a);
+
+  /**
+   * This is a conversion operator (constructor) which takes another iterator
+   * type and copies the data; this conversion works, if there is a conversion
+   * path from the @p OtherAccessor class to the @p Accessor class of this
+   * object. One such path would be derived class to base class, which for
+   * example may be used to get a Triangulation::cell_iterator from a
+   * DoFHandler::cell_iterator, since the DoFAccessor class is derived from
+   * the TriaAccessorBase class.
+   */
+  template <typename OtherAccessor>
+  TriaIterator (const TriaIterator<OtherAccessor> &i);
+
+  /**
+   * Another conversion operator, where we use the pointers to the
+   * Triangulation from a TriaAccessorBase object, while the additional data
+   * is used according to the actual type of Accessor.
+   */
+  TriaIterator (const TriaAccessorBase<Accessor::structure_dimension,Accessor::dimension,Accessor::space_dimension> &tria_accessor,
+                const typename Accessor::AccessorData *local_data);
+
+  /**
+   * Similar conversion operator to the above one, but does a check whether
+   * the iterator points to a used element, which is necessary for raw
+   * iterators.
+   */
+  template <typename OtherAccessor>
+  TriaIterator (const TriaRawIterator<OtherAccessor> &i);
+
+  /**
+   * Similar conversion operator to the above one, but for conversion from
+   * active iterators.
+   */
+  template <typename OtherAccessor>
+  TriaIterator (const TriaActiveIterator<OtherAccessor> &i);
+
+  /**
+   * Assignment operator.
+   */
+  TriaIterator<Accessor> &
+  operator = (const TriaIterator<Accessor> &);
+
+  /**
+   * Cross assignment operator. This assignment is only valid if the given
+   * iterator points to a used element.
+   */
+  TriaIterator<Accessor> &
+  operator = (const TriaRawIterator<Accessor> &);
+
+  /**
+   * Assignment operator. Requires, that Accessor can be copied from
+   * OtherAccessor.
+   */
+  template <class OtherAccessor>
+  TriaIterator<Accessor> &
+  operator = (const TriaIterator<OtherAccessor> &);
+
+  /**
+   * Cross assignment operator. This assignment is only valid if the given
+   * iterator points to a used element. Requires, that Accessor can be copied
+   * from OtherAccessor.
+   */
+  template <class OtherAccessor>
+  TriaIterator<Accessor> &
+  operator = (const TriaRawIterator<OtherAccessor> &);
+
+  /**@name Advancement of iterators*/
+  /*@{*/
+  /**
+   * Prefix <tt>++</tt> operator: <tt>++i</tt>. This operator advances the
+   * iterator to the next used element and returns a reference to
+   * <tt>*this</tt>.
+   */
+  TriaIterator<Accessor> &operator ++ ();
+
+  /**
+   * Postfix <tt>++</tt> operator: <tt>i++</tt>. This operator advances the
+   * iterator to the next used element, but returns an iterator to the element
+   * previously pointed to. Since this involves a temporary and a copy
+   * operation and since an @p active_iterator is quite a large object for a
+   * pointer, use the prefix operator <tt>++i</tt> whenever possible,
+   * especially in the head of for loops (<tt>for (; i!=end; ++i)</tt>) since
+   * there you normally never need the returned value.
+   */
+  TriaIterator<Accessor> operator ++ (int);
+
+  /**
+   * Prefix @p -- operator: @p --i. This operator advances the iterator to the
+   * previous used element and returns a reference to <tt>*this</tt>.
+   */
+  TriaIterator<Accessor> &operator -- ();
+
+  /**
+   * Postfix @p -- operator: @p i--.
+   */
+  TriaIterator<Accessor> operator -- (int);
+  /*@}*/
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcAssignmentOfUnusedObject);
+};
+
+
+/**
+ * This specialization of TriaIterator provides access only to the
+ * <em>active</em> lines, quads, cells, etc. An active cell is a cell which is
+ * not refined and thus a cell on which calculations on the finest level are
+ * done.
+ *
+ * @ingroup grid
+ * @ingroup Iterators
+ */
+template <typename Accessor>
+class TriaActiveIterator : public TriaIterator<Accessor>
+{
+public:
+  /**
+   * Empty constructor. Such an object is not usable!
+   */
+  TriaActiveIterator ();
+
+  /**
+   * Copy constructor.
+   */
+  TriaActiveIterator (const TriaActiveIterator<Accessor> &);
+
+  /**
+   * Cross copy constructor from iterators pointing also to non-active
+   * objects.
+   *
+   * If the object pointed to is not past-the-end and is not active, the debug
+   * version raises an error!
+   */
+  TriaActiveIterator (const TriaRawIterator<Accessor> &);
+
+  /**
+   * Cross copy constructor from iterators pointing also to non-active
+   * objects.
+   *
+   * If the object pointed to is not past-the-end and is not active, the debug
+   * version raises an error!
+   */
+  TriaActiveIterator (const TriaIterator<Accessor> &);
+
+  /**
+   * Proper constructor, initialized with the triangulation, the level and
+   * index of the object pointed to. The last parameter is of a type declared
+   * by the accessor class.
+   *
+   * If the object pointed to is not past-the-end and is not active, the debug
+   * version raises an error!
+   */
+  TriaActiveIterator (const Triangulation<Accessor::dimension,Accessor::space_dimension> *parent,
+                      const int level,
+                      const int index,
+                      const typename Accessor::AccessorData *local_data = 0);
+
+  /**
+   * This is a conversion operator (constructor) which takes another iterator
+   * type and copies the data; this conversion works, if there is a conversion
+   * path from the @p OtherAccessor class to the @p Accessor class of this
+   * object. One such path would be derived class to base class, which for
+   * example may be used to get a Triangulation::active_cell_iterator from a
+   * DoFHandler::active_cell_iterator, since the DoFAccessor class is derived
+   * from the TriaAccessorBase class.
+   */
+  template <typename OtherAccessor>
+  TriaActiveIterator (const TriaActiveIterator<OtherAccessor> &i);
+
+  /**
+   * Another conversion operator, where we use the pointers to the
+   * Triangulation from a TriaAccessorBase object, while the additional data
+   * is used according to the actual type of Accessor.
+   */
+  TriaActiveIterator (const TriaAccessorBase<Accessor::structure_dimension,Accessor::dimension,Accessor::space_dimension> &tria_accessor,
+                      const typename Accessor::AccessorData *local_data);
+
+  /**
+   * Similar conversion operator to the above one, but does a check whether
+   * the iterator points to a used element, and is active, which is necessary
+   * for raw iterators. Since usual iterators are also raw iterators, this
+   * constructor works also for parameters of type
+   * <tt>TriaIterator<OtherAccessor></tt>.
+   */
+  template <typename OtherAccessor>
+  TriaActiveIterator (const TriaRawIterator<OtherAccessor> &i);
+
+  /**
+   * Assignment operator.
+   */
+  TriaActiveIterator<Accessor> &
+  operator = (const TriaActiveIterator<Accessor> &);
+
+  /**
+   * Cross assignment operator. This assignment is only valid if the given
+   * iterator points to an active element.
+   */
+  TriaActiveIterator<Accessor> &
+  operator = (const TriaIterator<Accessor> &);
+
+  /**
+   * Cross assignment operator. This assignment is only valid if the given
+   * iterator points to an active element or past the end.
+   */
+  TriaActiveIterator<Accessor> &
+  operator = (const TriaRawIterator<Accessor> &);
+
+  /**
+   * Assignment operator. Requires, that Accessor can be copied from
+   * OtherAccessor.
+   */
+  template <class OtherAccessor>
+  TriaActiveIterator<Accessor> &
+  operator = (const TriaActiveIterator<OtherAccessor> &);
+
+  /**
+   * Cross assignment operator. This assignment is only valid if the given
+   * iterator points to an active element or past the end. Requires, that
+   * Accessor can be copied from OtherAccessor.
+   */
+  template <class OtherAccessor>
+  TriaActiveIterator<Accessor> &
+  operator = (const TriaRawIterator<OtherAccessor> &);
+
+  /**
+   * Cross assignment operator. This assignment is only valid if the given
+   * iterator points to an active element. Requires, that Accessor can be
+   * copied from OtherAccessor.
+   */
+  template <class OtherAccessor>
+  TriaActiveIterator<Accessor> &
+  operator = (const TriaIterator<OtherAccessor> &);
+
+  /**
+   * Prefix <tt>++</tt> operator: <tt>++i</tt>. This operator advances the
+   * iterator to the next active element and returns a reference to
+   * <tt>*this</tt>.
+   */
+  TriaActiveIterator<Accessor> &operator ++ ();
+
+  /**@name Advancement of iterators*/
+  /*@{*/
+  /**
+   * Postfix <tt>++</tt> operator: <tt>i++</tt>. This operator advances the
+   * iterator to the next active element, but returns an iterator to the
+   * element previously pointed to. Since this involves a temporary and a copy
+   * operation and since an @p active_iterator is quite a large object for a
+   * pointer, use the prefix operator <tt>++i</tt> whenever possible,
+   * especially in the head of for loops (<tt>for (; i!=end; ++i)</tt>) since
+   * there you normally never need the returned value.
+   */
+  TriaActiveIterator<Accessor> operator ++ (int);
+
+  /**
+   * Prefix @p -- operator: @p --i. This operator advances the iterator to the
+   * previous active element and returns a reference to <tt>*this</tt>.
+   */
+  TriaActiveIterator<Accessor> &operator -- ();
+
+  /**
+   * Postfix @p -- operator: @p i--.
+   */
+  TriaActiveIterator<Accessor> operator -- (int);
+  /*@}*/
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcAssignmentOfInactiveObject);
+};
+
+
+/*----------------------- Inline functions -------------------*/
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor>::
+TriaRawIterator (const Accessor &a)
+  :
+  accessor (a)
+{}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaRawIterator<Accessor>::
+TriaRawIterator (const OtherAccessor &a)
+  :
+  accessor (a)
+{}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaRawIterator<Accessor>::
+TriaRawIterator (const TriaRawIterator<OtherAccessor> &i)
+  :
+  accessor (i.accessor)
+{}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaRawIterator<Accessor>::
+TriaRawIterator (const TriaIterator<OtherAccessor> &i)
+  :
+  accessor (i.accessor)
+{}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaRawIterator<Accessor>::
+TriaRawIterator (const TriaActiveIterator<OtherAccessor> &i)
+  :
+  accessor (i.accessor)
+{}
+
+
+
+template <typename Accessor>
+inline
+const Accessor &
+TriaRawIterator<Accessor>::operator * () const
+{
+  Assert (Accessor::structure_dimension!=Accessor::dimension ||
+          state() == IteratorState::valid,
+          ExcDereferenceInvalidCell(accessor));
+  Assert (Accessor::structure_dimension==Accessor::dimension ||
+          state() == IteratorState::valid,
+          ExcDereferenceInvalidObject(accessor));
+
+  return accessor;
+}
+
+
+
+template <typename Accessor>
+inline
+Accessor &
+TriaRawIterator<Accessor>::operator * ()
+{
+  Assert (Accessor::structure_dimension!=Accessor::dimension ||
+          state() == IteratorState::valid,
+          ExcDereferenceInvalidCell(accessor));
+  Assert (Accessor::structure_dimension==Accessor::dimension ||
+          state() == IteratorState::valid,
+          ExcDereferenceInvalidObject(accessor));
+
+  return accessor;
+}
+
+
+
+template <typename Accessor>
+inline
+const Accessor &
+TriaRawIterator<Accessor>::access_any () const
+{
+  return accessor;
+}
+
+
+
+template <typename Accessor>
+inline
+const Accessor *
+TriaRawIterator<Accessor>::operator -> () const
+{
+  return &(this->operator* ());
+}
+
+
+
+template <typename Accessor>
+inline
+Accessor *
+TriaRawIterator<Accessor>::operator -> ()
+{
+  return &(this->operator* ());
+}
+
+
+
+template <typename Accessor>
+inline
+IteratorState::IteratorStates
+TriaRawIterator<Accessor>::state () const
+{
+  return accessor.state ();
+}
+
+
+
+template <typename Accessor>
+inline
+bool
+TriaRawIterator<Accessor>::operator < (const TriaRawIterator<Accessor> &other) const
+{
+  Assert (state() != IteratorState::invalid, ExcDereferenceInvalidObject(accessor));
+  Assert (other.state() != IteratorState::invalid, ExcDereferenceInvalidObject(other.accessor));
+
+  Assert (&accessor.get_triangulation() == &other.accessor.get_triangulation(),
+          ExcInvalidComparison());
+
+  // Deal with iterators past end
+  if (state()==IteratorState::past_the_end)
+    return false;
+  if (other.state()==IteratorState::past_the_end)
+    return true;
+
+  return ((**this) < (*other));
+}
+
+
+
+template <typename Accessor>
+inline
+bool
+TriaRawIterator<Accessor>::operator > (const TriaRawIterator<Accessor> &other) const
+{
+  return (other < *this);
+}
+
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor> &
+TriaRawIterator<Accessor>::operator ++ ()
+{
+  Assert (state() == IteratorState::valid, ExcAdvanceInvalidObject());
+
+  ++accessor;
+  return *this;
+}
+
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor> &
+TriaRawIterator<Accessor>::operator -- ()
+{
+  Assert (state() == IteratorState::valid, ExcAdvanceInvalidObject());
+
+  --accessor;
+  return *this;
+}
+
+
+
+template <typename Accessor>
+template <class StreamType>
+inline
+void
+TriaRawIterator<Accessor>::print (StreamType &out) const
+{
+  if (Accessor::structure_dimension==Accessor::dimension)
+    out << accessor.level() << "." << accessor.index();
+  else
+    out << accessor.index();
+}
+
+
+
+template <typename Accessor>
+inline
+std::size_t
+TriaRawIterator<Accessor>::memory_consumption () const
+{
+  return sizeof(TriaRawIterator<Accessor>);
+}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaIterator<Accessor>::TriaIterator (const TriaIterator<OtherAccessor> &i)
+  :
+  TriaRawIterator<Accessor> (i.accessor)
+{}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaIterator<Accessor>::TriaIterator (const TriaActiveIterator<OtherAccessor> &i)
+  :
+  TriaRawIterator<Accessor> (i.accessor)
+{}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaIterator<Accessor>::TriaIterator (const TriaRawIterator<OtherAccessor> &i)
+  :
+  TriaRawIterator<Accessor> (i.accessor)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || used)"
+  // used() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used(),
+            ExcAssignmentOfUnusedObject());
+#endif
+}
+
+template <typename Accessor>
+template <typename OtherAccessor>
+TriaIterator<Accessor>::TriaIterator (const OtherAccessor &a)
+  :
+  TriaRawIterator<Accessor> (a)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || used)"
+  // used() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used(),
+            ExcAssignmentOfUnusedObject());
+#endif
+}
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator (const TriaActiveIterator<OtherAccessor> &i)
+  :
+  TriaIterator<Accessor> (i.accessor)
+{}
+
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator (const TriaRawIterator<OtherAccessor> &i)
+  :
+  TriaIterator<Accessor> (i)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+}
+
+
+
+/**
+ * Print the address to which this iterator points to @p out. The address is
+ * given by the pair <tt>(level,index)</tt>, where @p index is an index
+ * relative to the level in which the object is that is pointed to.
+ *
+ * @author Wolfgang Bangerth, 1998
+ */
+template <typename Accessor>
+inline
+std::ostream &operator << (std::ostream                        &out,
+                           const TriaRawIterator<Accessor> &i)
+{
+  i.print(out);
+  return out;
+}
+
+
+
+/**
+ * Print the address to which this iterator points to @p out. The address is
+ * given by the pair <tt>(level,index)</tt>, where @p index is an index
+ * relative to the level in which the object is that is pointed to.
+ *
+ * @author Wolfgang Bangerth, 1998
+ */
+template <typename Accessor>
+inline
+std::ostream &operator << (std::ostream                     &out,
+                           const TriaIterator<Accessor> &i)
+{
+  i.print(out);
+  return out;
+}
+
+
+
+/**
+ * Print the address to which this iterator points to @p out. The address is
+ * given by the pair <tt>(level,index)</tt>, where @p index is an index
+ * relative to the level in which the object is that is pointed to.
+ *
+ * @author Wolfgang Bangerth, 1998
+ */
+template <typename Accessor>
+inline
+std::ostream &operator << (std::ostream                           &out,
+                           const TriaActiveIterator<Accessor> &i)
+{
+  i.print(out);
+  return out;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+// if in optimized mode: include more templates
+#ifndef DEBUG
+#  include "tria_iterator.templates.h"
+#endif
+
+
+/*----------------------------   tria-iterator.h     ---------------------------*/
+#endif
+/*----------------------------   tria-iterator.h     ---------------------------*/
diff --git a/include/deal.II/grid/tria_iterator.templates.h b/include/deal.II/grid/tria_iterator.templates.h
new file mode 100644
index 0000000..4dbaa4f
--- /dev/null
+++ b/include/deal.II/grid/tria_iterator.templates.h
@@ -0,0 +1,575 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_iterator_templates_h
+#define dealii__tria_iterator_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/* Note: This file only contains template definitions and will thus
+   not produce an object file. It is rather thought to be included
+   into the *_accessor.cc files.
+*/
+
+
+/*------------------------ Functions: TriaRawIterator ------------------*/
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor>::TriaRawIterator ()
+  :
+  accessor (0, -2, -2, 0)
+{}
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor>::TriaRawIterator (const TriaRawIterator<Accessor> &i)
+  :
+  accessor (i.accessor)
+{}
+
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor>::
+TriaRawIterator (const Triangulation<Accessor::dimension,Accessor::space_dimension> *parent,
+                 const int                 level,
+                 const int                 index,
+                 const typename Accessor::AccessorData *local_data)
+  :
+  accessor (parent, level, index, local_data)
+{}
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor>::TriaRawIterator (
+  const TriaAccessorBase<Accessor::structure_dimension,Accessor::dimension,Accessor::space_dimension> &tria_accessor,
+  const typename Accessor::AccessorData *local_data)
+  :
+  accessor(0, -2, -2, local_data)
+{
+  accessor.copy_from(tria_accessor);
+}
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor> &
+TriaRawIterator<Accessor>::operator = (const TriaRawIterator<Accessor> &i)
+{
+  accessor.copy_from (i.accessor);
+
+  return *this;
+}
+
+
+
+template <typename Accessor>
+inline
+bool
+TriaRawIterator<Accessor>::operator == (const TriaRawIterator<Accessor> &other) const
+{
+  return accessor == other.accessor;
+}
+
+
+template <typename Accessor>
+inline
+bool
+TriaRawIterator<Accessor>::operator != (const TriaRawIterator<Accessor> &other) const
+{
+  return ! (*this == other);
+}
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor>
+TriaRawIterator<Accessor>::operator ++ (int)
+{
+  TriaRawIterator<Accessor> tmp(*this);
+  operator++ ();
+
+  return tmp;
+}
+
+
+template <typename Accessor>
+inline
+TriaRawIterator<Accessor>
+TriaRawIterator<Accessor>::operator -- (int)
+{
+  TriaRawIterator<Accessor> tmp(*this);
+  operator-- ();
+
+  return tmp;
+}
+
+
+/*-----------------------  functions: TriaIterator ---------------*/
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor>::TriaIterator () :
+  TriaRawIterator<Accessor> () {}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor>::TriaIterator (const TriaIterator<Accessor> &i)
+  :
+  TriaRawIterator<Accessor> (i.accessor) {}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor>::TriaIterator (const TriaRawIterator<Accessor> &i)
+  :
+  TriaRawIterator<Accessor> (i.accessor)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || used)"
+  // used() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used(),
+            ExcAssignmentOfUnusedObject());
+#endif
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor>::TriaIterator (const Triangulation<Accessor::dimension,Accessor::space_dimension> *parent,
+                                      const int                 level,
+                                      const int                 index,
+                                      const typename Accessor::AccessorData *local_data) :
+  TriaRawIterator<Accessor> (parent, level, index, local_data)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || used)"
+  // used() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used(),
+            ExcAssignmentOfUnusedObject());
+#endif
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor>::TriaIterator (
+  const TriaAccessorBase<Accessor::structure_dimension,Accessor::dimension,Accessor::space_dimension> &tria_accessor,
+  const typename Accessor::AccessorData *local_data)
+  : TriaRawIterator<Accessor> (tria_accessor, local_data)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || used)"
+  // used() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used(),
+            ExcAssignmentOfUnusedObject());
+#endif
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor> &
+TriaIterator<Accessor>::operator = (const TriaIterator<Accessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+  return *this;
+}
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaIterator<Accessor> &
+TriaIterator<Accessor>::operator = (const TriaIterator<OtherAccessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor> &
+TriaIterator<Accessor>::operator = (const TriaRawIterator<Accessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || used)"
+  // used() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used(),
+            ExcAssignmentOfUnusedObject());
+#endif
+  return *this;
+}
+
+
+template <typename Accessor>
+template <typename OtherAccessor>
+inline
+TriaIterator<Accessor> &
+TriaIterator<Accessor>::operator = (const TriaRawIterator<OtherAccessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || used)"
+  // used() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used(),
+            ExcAssignmentOfUnusedObject());
+#endif
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor> &TriaIterator<Accessor>::operator ++ ()
+{
+  while (TriaRawIterator<Accessor>::operator++(),
+         (this->state() == IteratorState::valid))
+    if (this->accessor.used() == true)
+      return *this;
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor>  TriaIterator<Accessor>::operator ++ (int)
+{
+  TriaIterator<Accessor> tmp(*this);
+  operator++ ();
+
+  return tmp;
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor> &
+TriaIterator<Accessor>::operator -- ()
+{
+  while (TriaRawIterator<Accessor>::operator--(),
+         (this->state() == IteratorState::valid))
+    if (this->accessor.used() == true)
+      return *this;
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaIterator<Accessor>
+TriaIterator<Accessor>::operator -- (int)
+{
+  TriaIterator<Accessor> tmp(*this);
+  operator-- ();
+
+  return tmp;
+}
+
+
+/*-----------------------  functions: TriaActiveIterator ---------------*/
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator () :
+  TriaIterator<Accessor> () {}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator (const TriaActiveIterator<Accessor> &i) :
+  TriaIterator<Accessor> (static_cast<TriaIterator<Accessor> >(i)) {}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator (const TriaRawIterator<Accessor> &i) :
+  TriaIterator<Accessor> (i)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator (const TriaIterator<Accessor> &i) :
+  TriaIterator<Accessor> (i)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator (const Triangulation<Accessor::dimension,Accessor::space_dimension> *parent,
+                                                  const int                 level,
+                                                  const int                 index,
+                                                  const typename Accessor::AccessorData *local_data) :
+  TriaIterator<Accessor> (parent, level, index, local_data)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor>::TriaActiveIterator (
+  const TriaAccessorBase<Accessor::structure_dimension,Accessor::dimension,Accessor::space_dimension> &tria_accessor,
+  const typename Accessor::AccessorData *local_data)
+  : TriaIterator<Accessor> (tria_accessor, local_data)
+{
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator = (const TriaActiveIterator<Accessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+  return *this;
+}
+
+
+template <typename Accessor>
+template <class OtherAccessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator = (const TriaActiveIterator<OtherAccessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator = (const TriaRawIterator<Accessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used() && this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+  return *this;
+}
+
+
+template <typename Accessor>
+template <class OtherAccessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator = (const TriaRawIterator<OtherAccessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.used() && this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+  return *this;
+}
+
+
+template <typename Accessor>
+template <class OtherAccessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator = (const TriaIterator<OtherAccessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator = (const TriaIterator<Accessor> &i)
+{
+  this->accessor.copy_from (i.accessor);
+#ifdef DEBUG
+  // do this like this, because:
+  // if we write
+  // "Assert (IteratorState::past_the_end || !has_children())"
+  // has_children() is called anyway, even if
+  // state==IteratorState::past_the_end, and will then
+  // throw the exception!
+  if (this->state() != IteratorState::past_the_end)
+    Assert (this->accessor.has_children()==false,
+            ExcAssignmentOfInactiveObject());
+#endif
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator ++ ()
+{
+  while (TriaIterator<Accessor>::operator++(),
+         (this->state() == IteratorState::valid))
+    if (this->accessor.has_children() == false)
+      return *this;
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor>
+TriaActiveIterator<Accessor>::operator ++ (int)
+{
+  TriaActiveIterator<Accessor> tmp(*this);
+  operator++ ();
+
+  return tmp;
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor> &
+TriaActiveIterator<Accessor>::operator -- ()
+{
+  while (TriaIterator<Accessor>::operator--(),
+         (this->state() == IteratorState::valid))
+    if (this->accessor.has_children() == false)
+      return *this;
+  return *this;
+}
+
+
+template <typename Accessor>
+inline
+TriaActiveIterator<Accessor> TriaActiveIterator<Accessor>::operator -- (int)
+{
+  TriaActiveIterator<Accessor> tmp(*this);
+  operator-- ();
+
+  return tmp;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_iterator_base.h b/include/deal.II/grid/tria_iterator_base.h
new file mode 100644
index 0000000..38a43df
--- /dev/null
+++ b/include/deal.II/grid/tria_iterator_base.h
@@ -0,0 +1,51 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_iterator_base_h
+#define dealii__tria_iterator_base_h
+
+
+#include <deal.II/base/config.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Namespace in which an enumeration is declared that denotes the states in
+ * which an iterator can be in.
+ *
+ * @ingroup Iterators
+ */
+namespace IteratorState
+{
+
+  /**
+   * The three states an iterator can be in: valid, past-the-end and invalid.
+   */
+  enum IteratorStates
+  {
+    /// Iterator points to a valid object
+    valid,
+    /// Iterator reached end of container
+    past_the_end,
+    /// Iterator is invalid, probably due to an error
+    invalid
+  };
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_iterator_selector.h b/include/deal.II/grid/tria_iterator_selector.h
new file mode 100644
index 0000000..29c48a1
--- /dev/null
+++ b/include/deal.II/grid/tria_iterator_selector.h
@@ -0,0 +1,208 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_iterator_selector_h
+#define dealii__tria_iterator_selector_h
+
+
+#include <deal.II/base/config.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class CellAccessor;
+template <int, int, int> class TriaAccessorBase;
+template <int, int, int> class InvalidAccessor;
+template <int, int, int> class TriaAccessor;
+template <int dim, int spacedim>  class TriaAccessor<0, dim, spacedim>;
+template <typename Accessor> class TriaRawIterator;
+template <typename Accessor> class TriaIterator;
+template <typename Accessor> class TriaActiveIterator;
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    template <int dim, int spacedim>
+    struct Iterators;
+
+    /**
+     * This class implements some types which differ between the dimensions.
+     * These are the declarations for the 1D case only. See the
+     * @ref Iterators
+     * module for more information.
+     *
+     * A @p vertex_iterator is typedef'd to an iterator operating on the @p
+     * vertices member variable of a <tt>Triangulation<1></tt> object.
+     *
+     * A @p line_iterator is typedef'd to an iterator operating on the @p
+     * lines member variable of a <tt>Triangulation<1></tt> object. An @p
+     * active_line_iterator only operates on the active lines. @p
+     * raw_line_iterator objects operate on all lines, used or not.
+     *
+     * Since we are in one dimension, the following identities are declared:
+     *  @code
+     *    typedef raw_line_iterator    raw_cell_iterator;
+     *    typedef line_iterator        cell_iterator;
+     *    typedef active_line_iterator active_cell_iterator;
+     *  @endcode
+     *
+     * To enable the declaration of @p begin_quad and the like in
+     * <tt>Triangulation<1></tt>, the @p quad_iterators are declared as
+     * iterators over InvalidAccessor. Thus these types exist, but are useless
+     * and will certainly make any involuntary use visible. The same holds for
+     * hexahedron iterators.
+     *
+     * The same applies for the @p face_iterator types, since lines have no
+     * substructures apart from vertices, which are handled in a different
+     * way, however.
+     *
+     * @author Wolfgang Bangerth, 1998
+     */
+    template <int spacedim>
+    struct Iterators<1,spacedim>
+    {
+      typedef TriaRawIterator   <dealii::TriaAccessor<0, 1, spacedim> > raw_vertex_iterator;
+      typedef TriaIterator      <dealii::TriaAccessor<0, 1, spacedim> > vertex_iterator;
+      typedef TriaActiveIterator<dealii::TriaAccessor<0, 1, spacedim> > active_vertex_iterator;
+
+      typedef TriaRawIterator   <dealii::CellAccessor<1,spacedim> > raw_line_iterator;
+      typedef TriaIterator      <dealii::CellAccessor<1,spacedim> > line_iterator;
+      typedef TriaActiveIterator<dealii::CellAccessor<1,spacedim> > active_line_iterator;
+
+      typedef TriaRawIterator   <dealii::InvalidAccessor<2,1,spacedim> > raw_quad_iterator;
+      typedef TriaIterator      <dealii::InvalidAccessor<2,1,spacedim> > quad_iterator;
+      typedef TriaActiveIterator<dealii::InvalidAccessor<2,1,spacedim> > active_quad_iterator;
+
+      typedef TriaRawIterator   <dealii::InvalidAccessor<3,1,spacedim> > raw_hex_iterator;
+      typedef TriaIterator      <dealii::InvalidAccessor<3,1,spacedim> > hex_iterator;
+      typedef TriaActiveIterator<dealii::InvalidAccessor<3,1,spacedim> > active_hex_iterator;
+
+      typedef raw_line_iterator raw_cell_iterator;
+    };
+
+
+
+    /**
+     * This class implements some types which differ between the dimensions.
+     * These are the declarations for the 2D case only. See the
+     * @ref Iterators
+     * module for more information.
+     *
+     * A @p vertex_iterator is typedef'd to an iterator operating on the @p
+     * vertices member variable of a <tt>Triangulation<2></tt> object.
+     *
+     * A @p line_iterator is typedef'd to an iterator operating on the @p
+     * lines member variable of a <tt>Triangulation<2></tt> object. An @p
+     * active_line_iterator only operates on the active lines. @p
+     * raw_line_iterator objects operate on all lines, used or not. Using @p
+     * active_line_iterators may not be particularly in 2D useful since it
+     * only operates on unrefined lines. However, also refined lines may bound
+     * unrefined cells if the neighboring cell is refined once more than the
+     * present one.
+     *
+     * Similarly to line iterators, @p quad_iterator, @p raw_quad_iterator and
+     * @p active_quad_iterator are declared.
+     *
+     * To enable the declaration of @p begin_hex and the like in
+     * <tt>Triangulation<[12]></tt>, the @p hex_iterators are declared as
+     * iterators over InvalidAccessor. Thus these types exist, but are useless
+     * and will certainly make any involuntary use visible.
+     *
+     * Since we are in two dimension, the following identities are declared:
+     *  @code
+     *    typedef raw_quad_iterator    raw_cell_iterator;
+     *    typedef quad_iterator        cell_iterator;
+     *    typedef active_quad_iterator active_cell_iterator;
+     *
+     *    typedef raw_line_iterator    raw_face_iterator;
+     *    typedef line_iterator        face_iterator;
+     *    typedef active_line_iterator active_face_iterator;
+     *  @endcode
+     *
+     * @author Wolfgang Bangerth, 1998
+     */
+    template <int spacedim>
+    struct Iterators<2,spacedim>
+    {
+      typedef TriaRawIterator   <dealii::TriaAccessor<0, 2, spacedim> > raw_vertex_iterator;
+      typedef TriaIterator      <dealii::TriaAccessor<0, 2, spacedim> > vertex_iterator;
+      typedef TriaActiveIterator<dealii::TriaAccessor<0, 2, spacedim> > active_vertex_iterator;
+
+      typedef TriaRawIterator   <dealii::TriaAccessor<1, 2, spacedim> > raw_line_iterator;
+      typedef TriaIterator      <dealii::TriaAccessor<1, 2, spacedim> > line_iterator;
+      typedef TriaActiveIterator<dealii::TriaAccessor<1, 2, spacedim> > active_line_iterator;
+
+      typedef TriaRawIterator   <dealii::CellAccessor<2, spacedim> > raw_quad_iterator;
+      typedef TriaIterator      <dealii::CellAccessor<2, spacedim> > quad_iterator;
+      typedef TriaActiveIterator<dealii::CellAccessor<2, spacedim> > active_quad_iterator;
+
+      typedef TriaRawIterator   <dealii::InvalidAccessor<3,2,spacedim> > raw_hex_iterator;
+      typedef TriaIterator      <dealii::InvalidAccessor<3,2,spacedim> > hex_iterator;
+      typedef TriaActiveIterator<dealii::InvalidAccessor<3,2,spacedim> > active_hex_iterator;
+
+      typedef raw_quad_iterator raw_cell_iterator;
+    };
+
+
+    /**
+     * This class implements some types which differ between the dimensions.
+     * These are the declarations for the 3D case only. See the
+     * @ref Iterators
+     * module for more information.
+     *
+     * For the declarations of the data types, more or less the same holds as
+     * for lower dimensions (see <tt>Iterators<[12]></tt>). The dimension
+     * specific data types are here, since we are in three dimensions:
+     *  @code
+     *    typedef raw_hex_iterator    raw_cell_iterator;
+     *    typedef hex_iterator        cell_iterator;
+     *    typedef active_hex_iterator active_cell_iterator;
+     *
+     *    typedef raw_quad_iterator    raw_face_iterator;
+     *    typedef quad_iterator        face_iterator;
+     *    typedef active_quad_iterator active_face_iterator;
+     *  @endcode
+     *
+     * @author Wolfgang Bangerth, 1998
+     */
+    template <int spacedim>
+    struct Iterators<3,spacedim>
+    {
+      typedef TriaRawIterator   <dealii::TriaAccessor<0, 3, spacedim> > raw_vertex_iterator;
+      typedef TriaIterator      <dealii::TriaAccessor<0, 3, spacedim> > vertex_iterator;
+      typedef TriaActiveIterator<dealii::TriaAccessor<0, 3, spacedim> > active_vertex_iterator;
+
+      typedef TriaRawIterator   <dealii::TriaAccessor<1, 3, spacedim> > raw_line_iterator;
+      typedef TriaIterator      <dealii::TriaAccessor<1, 3, spacedim> > line_iterator;
+      typedef TriaActiveIterator<dealii::TriaAccessor<1, 3, spacedim> > active_line_iterator;
+
+      typedef TriaRawIterator   <dealii::TriaAccessor<2, 3, spacedim> > raw_quad_iterator;
+      typedef TriaIterator      <dealii::TriaAccessor<2, 3, spacedim> > quad_iterator;
+      typedef TriaActiveIterator<dealii::TriaAccessor<2, 3, spacedim> > active_quad_iterator;
+
+      typedef TriaRawIterator   <dealii::CellAccessor<3, spacedim> > raw_hex_iterator;
+      typedef TriaIterator      <dealii::CellAccessor<3, spacedim> > hex_iterator;
+      typedef TriaActiveIterator<dealii::CellAccessor<3, spacedim> > active_hex_iterator;
+
+      typedef raw_hex_iterator raw_cell_iterator;
+    };
+
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__tria_iterator_selector_h
diff --git a/include/deal.II/grid/tria_levels.h b/include/deal.II/grid/tria_levels.h
new file mode 100644
index 0000000..79d7ec5
--- /dev/null
+++ b/include/deal.II/grid/tria_levels.h
@@ -0,0 +1,317 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_levels_h
+#define dealii__tria_levels_h
+
+
+#include <deal.II/base/config.h>
+#include <vector>
+#include <deal.II/grid/tria_object.h>
+#include <deal.II/base/point.h>
+#include <deal.II/grid/tria_objects.h>
+
+#include <boost/serialization/utility.hpp>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    /**
+     * Store all information which belongs to one level of the multilevel
+     * hierarchy.
+     *
+     * In TriaLevel, all cell data is stored which is not dependent on the
+     * dimension, e.g. a field to store the refinement flag for the cells
+     * (what a cell actually is is declared elsewhere), etc. See also
+     * TriaObjects for non level-oriented data.
+     *
+     * There is another field, which may fit in here, namely the material data
+     * (for cells) or the boundary indicators (for faces), but since we need
+     * for a line or quad either boundary information or material data, we
+     * store them with the lines and quads rather than with the common data.
+     * Likewise, in 3d, we need boundary indicators for lines and quads (we
+     * need to know how to refine a line if the two adjacent faces have
+     * different boundary indicators), and material data for cells.
+     *
+     * @author Wolfgang Bangerth, Guido Kanschat, 1998, 2007
+     */
+    template <int dim>
+    class TriaLevel
+    {
+    public:
+      /**
+       * @p RefinementCase<dim>::Type flags for the cells to be refined with
+       * or not (RefinementCase<dim>::no_refinement). The meaning what a cell
+       * is, is dimension specific, therefore also the length of this vector
+       * depends on the dimension: in one dimension, the length of this vector
+       * equals the length of the @p lines vector, in two dimensions that of
+       * the @p quads vector, etc.
+       */
+      std::vector<unsigned char> refine_flags;
+
+      /**
+       * Same meaning as the one above, but specifies whether a cell must be
+       * coarsened.
+       */
+      std::vector<bool> coarsen_flags;
+
+
+      /**
+       * An integer that, for every active cell, stores the how many-th active
+       * cell this is. For non-active cells, this value is unused and set to
+       * an invalid value.
+       */
+      std::vector<unsigned int> active_cell_indices;
+
+      /**
+       * Levels and indices of the neighbors of the cells. Convention is, that
+       * the neighbors of the cell with index @p i are stored in the fields
+       * following $i*(2*real\_space\_dimension)$, e.g. in one spatial
+       * dimension, the neighbors of cell 0 are stored in
+       * <tt>neighbors[0]</tt> and <tt>neighbors[1]</tt>, the neighbors of
+       * cell 1 are stored in <tt>neighbors[2]</tt> and <tt>neighbors[3]</tt>,
+       * and so on.
+       *
+       * In neighbors, <tt>neighbors[i].first</tt> is the level, while
+       * <tt>neighbors[i].first</tt> is the index of the neighbor.
+       *
+       * If a neighbor does not exist (cell is at the boundary),
+       * <tt>level=index=-1</tt> is set.
+       *
+       * <em>Conventions:</em> The @p ith neighbor of a cell is the one which
+       * shares the @p ith face (@p Line in 2D, @p Quad in 3D) of this cell.
+       *
+       * The neighbor of a cell has at most the same level as this cell, i.e.
+       * it may or may not be refined.
+       *
+       * In one dimension, a neighbor may have any level less or equal the
+       * level of this cell. If it has the same level, it may be refined an
+       * arbitrary number of times, but the neighbor pointer still points to
+       * the cell on the same level, while the neighbors of the children of
+       * the neighbor may point to this cell or its children.
+       *
+       * In two and more dimensions, the neighbor is either on the same level
+       * and refined (in which case its children have neighbor pointers to
+       * this cell or its direct children), unrefined on the same level or one
+       * level down (in which case its neighbor pointer points to the mother
+       * cell of this cell).
+       */
+      std::vector<std::pair<int,int> > neighbors;
+
+      /**
+       * One integer per cell to store which subdomain it belongs to. This
+       * field is most often used in parallel computations, where it denotes
+       * which processor shall work on the cells with a given subdomain
+       * number.
+       */
+      std::vector<types::subdomain_id> subdomain_ids;
+
+      /**
+       * for parallel multigrid
+       */
+      std::vector<types::subdomain_id> level_subdomain_ids;
+
+      /**
+       * One integer for every consecutive pair of cells to store which index
+       * their parent has.
+       *
+       * (We store this information once for each pair of cells since every
+       * refinement, isotropic or anisotropic, and in any space dimension,
+       * always creates children in multiples of two, so there is no need to
+       * store the parent index for every cell.)
+       */
+      std::vector<int> parents;
+
+      /**
+       * One bool per cell to indicate the direction of the normal true:  use
+       * orientation from vertex false: revert the orientation. See
+       * @ref GlossDirectionFlag.
+       *
+       * This is only used for codim==1 meshes.
+       */
+      std::vector<bool> direction_flags;
+
+      /**
+       * The object containing the data on lines and related functions
+       */
+      TriaObjects<TriaObject<dim> > cells;
+
+
+      /**
+       * Reserve enough space to accommodate @p total_cells cells on this
+       * level. Since there are no @p used flags on this level, you have to
+       * give the total number of cells, not only the number of newly to
+       * accommodate ones, like in the <tt>TriaLevel<N>::reserve_space</tt>
+       * functions, with <tt>N>0</tt>.
+       *
+       * Since the number of neighbors per cell depends on the dimensions, you
+       * have to pass that additionally.
+       */
+
+      void reserve_space (const unsigned int total_cells,
+                          const unsigned int dimension,
+                          const unsigned int space_dimension);
+
+      /**
+       * Check the memory consistency of the different containers. Should only
+       * be called with the preprocessor flag @p DEBUG set. The function
+       * should be called from the functions of the higher TriaLevel classes.
+       */
+      void monitor_memory (const unsigned int true_dimension) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+
+      /**
+       * Exception
+       */
+      DeclException3 (ExcMemoryWasted,
+                      char *, int, int,
+                      << "The container " << arg1 << " contains "
+                      << arg2 << " elements, but it`s capacity is "
+                      << arg3 << ".");
+      /**
+       * Exception
+       */
+      DeclException2 (ExcMemoryInexact,
+                      int, int,
+                      << "The containers have sizes " << arg1 << " and "
+                      << arg2 << ", which is not as expected.");
+    };
+
+//TODO: Replace TriaObjectsHex to avoid this specialization
+
+    /**
+     * Specialization of TriaLevels for 3D. Since we need TriaObjectsHex
+     * instead of TriaObjects. Refer to the documentation of the general class
+     * template for details.
+     */
+    template<>
+    class TriaLevel<3>
+    {
+    public:
+      std::vector<unsigned char> refine_flags;
+      std::vector<bool> coarsen_flags;
+      std::vector<unsigned int> active_cell_indices;
+      std::vector<std::pair<int,int> > neighbors;
+      std::vector<types::subdomain_id> subdomain_ids;
+      std::vector<types::subdomain_id> level_subdomain_ids;
+      std::vector<int> parents;
+
+      // The following is not used
+      // since we don't support
+      // codim=1 meshes in 3d; only
+      // needed to allow
+      // compilation
+      // TODO[TH]: this is no longer true and might be a bug.
+      std::vector<bool> direction_flags;
+
+      TriaObjectsHex cells;
+
+
+      void reserve_space (const unsigned int total_cells,
+                          const unsigned int dimension,
+                          const unsigned int space_dimension);
+      void monitor_memory (const unsigned int true_dimension) const;
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+
+      /**
+       * Exception
+       */
+      DeclException3 (ExcMemoryWasted,
+                      char *, int, int,
+                      << "The container " << arg1 << " contains "
+                      << arg2 << " elements, but it`s capacity is "
+                      << arg3 << ".");
+      /**
+       * Exception
+       */
+      DeclException2 (ExcMemoryInexact,
+                      int, int,
+                      << "The containers have sizes " << arg1 << " and "
+                      << arg2 << ", which is not as expected.");
+    };
+
+
+
+    template <int dim>
+    template <class Archive>
+    void TriaLevel<dim>::serialize(Archive &ar,
+                                   const unsigned int)
+    {
+      ar &refine_flags &coarsen_flags;
+
+      // do not serialize 'active_cell_indices' here. instead of storing them
+      // to the stream and re-reading them again later, we just rebuild them
+      // in Triangulation::load()
+
+      ar &neighbors;
+      ar &subdomain_ids;
+      ar &level_subdomain_ids;
+      ar &parents;
+      ar &direction_flags;
+      ar &cells;
+    }
+
+
+
+    template <class Archive>
+    void TriaLevel<3>::serialize(Archive &ar,
+                                 const unsigned int)
+    {
+      ar &refine_flags &coarsen_flags;
+
+      // do not serialize 'active_cell_indices' here. instead of storing them
+      // to the stream and re-reading them again later, we just rebuild them
+      // in Triangulation::load()
+
+      ar &neighbors;
+      ar &subdomain_ids;
+      ar &level_subdomain_ids;
+      ar &parents;
+      ar &direction_flags;
+      ar &cells;
+    }
+
+  }
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_object.h b/include/deal.II/grid/tria_object.h
new file mode 100644
index 0000000..1981762
--- /dev/null
+++ b/include/deal.II/grid/tria_object.h
@@ -0,0 +1,212 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_object_h
+#define dealii__tria_object_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/geometry_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace Triangulation
+  {
+
+    /**
+     * Class template for the <tt>structdim</tt>-dimensional cells
+     * constituting a dealii::Triangulation of dimension <tt>structdim</tt> or
+     * lower dimensional objects of higher dimensions.  They are characterized
+     * by the (global) indices of their faces, which are cells of dimension
+     * <tt>structdim-1</tt> or vertices if <tt>structdim=1</tt>.
+     *
+     * @author Guido Kanschat, 2007
+     */
+    template <int structdim>
+    class TriaObject
+    {
+    public:
+      static const unsigned int dimension = structdim;
+
+      /**
+       * Default constructor, setting all face indices to invalid values.
+       */
+      TriaObject ();
+
+      /**
+       * Constructor for a line object with the numbers of its two end points.
+       *
+       * Throws an exception if dimension is not one.
+       */
+      TriaObject (const int i0, const int i1);
+
+      /**
+       * Constructor for a quadrilateral object with the numbers of its four
+       * lines.
+       *
+       * Throws an exception if dimension is not two.
+       */
+      TriaObject (const int i0, const int i1,
+                  const int i2, const int i3);
+
+      /**
+       * Constructor for a hexahedron object with the numbers of its six
+       * quadrilaterals.
+       *
+       * Throws an exception if dimension is not two.
+       */
+      TriaObject (const int i0, const int i1,
+                  const int i2, const int i3,
+                  const int i4, const int i5);
+
+
+      /**
+       * Return the index of the ith face object.
+       */
+      int face (const unsigned int i) const;
+
+      /**
+       * Set the index of the ith face object.
+       */
+      void set_face (const unsigned int i, const int index);
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      static std::size_t memory_consumption ();
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+
+    protected:
+      /**
+       * Global indices of the face iterators bounding this cell if dim@>1,
+       * and the two vertex indices in 1d.
+       */
+      int faces[GeometryInfo<structdim>::faces_per_cell];
+    };
+
+//----------------------------------------------------------------------//
+
+    template <int structdim>
+    inline
+    TriaObject<structdim>::TriaObject ()
+    {
+      for (unsigned int i=0; i<GeometryInfo<structdim>::faces_per_cell; ++i)
+        faces[i] = -1;
+    }
+
+
+    template <int structdim>
+    inline
+    TriaObject<structdim>::TriaObject (const int i0,
+                                       const int i1)
+    {
+      Assert (structdim==1, ExcImpossibleInDim(structdim));
+      faces[0] = i0;
+      faces[1] = i1;
+    }
+
+
+    template <int structdim>
+    inline
+    TriaObject<structdim>::TriaObject (const int i0,
+                                       const int i1,
+                                       const int i2,
+                                       const int i3)
+    {
+      Assert (structdim==2, ExcImpossibleInDim(structdim));
+      faces[0] = i0;
+      faces[1] = i1;
+      faces[2] = i2;
+      faces[3] = i3;
+    }
+
+
+    template <int structdim>
+    inline
+    TriaObject<structdim>::TriaObject (const int i0,
+                                       const int i1,
+                                       const int i2,
+                                       const int i3,
+                                       const int i4,
+                                       const int i5)
+    {
+      Assert (structdim==3, ExcImpossibleInDim(structdim));
+      faces[0] = i0;
+      faces[1] = i1;
+      faces[2] = i2;
+      faces[3] = i3;
+      faces[4] = i4;
+      faces[5] = i5;
+    }
+
+
+    template <int structdim>
+    inline
+    int TriaObject<structdim>::face (const unsigned int i) const
+    {
+      Assert (i<GeometryInfo<structdim>::faces_per_cell,
+              ExcIndexRange(i,0,GeometryInfo<structdim>::faces_per_cell));
+      return faces[i];
+    }
+
+
+
+    template <int structdim>
+    inline
+    void TriaObject<structdim>::set_face (const unsigned int i, const int index)
+    {
+      Assert (i<GeometryInfo<structdim>::faces_per_cell,
+              ExcIndexRange(i,0,GeometryInfo<structdim>::faces_per_cell));
+      faces[i] = index;
+    }
+
+
+
+    template <int structdim>
+    inline
+    std::size_t
+    TriaObject<structdim>::memory_consumption ()
+    {
+      return sizeof(TriaObject<structdim>);
+    }
+
+
+    template <int structdim>
+    template <class Archive>
+    void TriaObject<structdim>::serialize(Archive &ar,
+                                          const unsigned int)
+    {
+      ar &faces;
+    }
+
+
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/grid/tria_objects.h b/include/deal.II/grid/tria_objects.h
new file mode 100644
index 0000000..2bc4196
--- /dev/null
+++ b/include/deal.II/grid/tria_objects.h
@@ -0,0 +1,874 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tria_objects_h
+#define dealii__tria_objects_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/grid/tria_object.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+//TODO: This should all be cleaned up. Currently, only a single
+//function in the library makes use of the odd specializations, and
+//this function is Triangulation::execute_refinement() in 3D. I
+//assume, that the other refinement functions would profit from using
+//next_free_single_object() and next_free_pair_object, but they seem
+//to get around it.
+
+//TODO: The TriaObjects class contains a std::vector<G>. This is only an
+//efficient storage scheme if G is relatively well packed, i.e. it's not a
+//bool and then an integer and then a double, etc. Verify that this is
+//actually the case.
+
+template <int dim, int spacedim> class Triangulation;
+template <class Accessor> class TriaRawIterator;
+template <int, int, int> class TriaAccessor;
+
+namespace internal
+{
+  namespace Triangulation
+  {
+
+    /**
+     * General template for information belonging to the geometrical objects
+     * of a triangulation, i.e. lines, quads, hexahedra...  Apart from the
+     * vector of objects additional information is included, namely vectors
+     * indicating the children, the used-status, user-flags, material-ids..
+     *
+     * Objects of these classes are included in the TriaLevel and TriaFaces
+     * classes.
+     *
+     * @author Tobias Leicht, Guido Kanschat, 2006, 2007, 2012
+     */
+
+    template <typename G>
+    class TriaObjects
+    {
+    public:
+      /**
+       * Constructor resetting some data.
+       */
+      TriaObjects();
+
+      /**
+       * Vector of the objects belonging to this level. The index of the
+       * object equals the index in this container.
+       */
+      std::vector<G> cells;
+
+      /**
+       * Index of the even children of an object. Since when objects are
+       * refined, all children are created at the same time, they are appended
+       * to the list at least in pairs after each other. We therefore only
+       * store the index of the even children, the uneven follow immediately
+       * afterwards.
+       *
+       * If an object has no children, -1 is stored in this list. An object is
+       * called active if it has no children. The function
+       * TriaAccessorBase::has_children() tests for this.
+       */
+      std::vector<int>  children;
+
+      /**
+       * Store the refinement case each of the cells is refined with. This
+       * vector might be replaced by vector<vector<bool> > (dim, vector<bool>
+       * (n_cells)) which is more memory efficient.
+       */
+      std::vector<RefinementCase<G::dimension> > refinement_cases;
+
+      /**
+       * Vector storing whether an object is used in the @p cells vector.
+       *
+       * Since it is difficult to delete elements in a @p vector, when an
+       * element is not needed any more (e.g. after derefinement), it is not
+       * deleted from the list, but rather the according @p used flag is set
+       * to @p false.
+       */
+      std::vector<bool> used;
+
+      /**
+       * Make available a field for user data, one bit per object. This field
+       * is usually used when an operation runs over all cells and needs
+       * information whether another cell (e.g. a neighbor) has already been
+       * processed.
+       *
+       * You can clear all used flags using
+       * dealii::Triangulation::clear_user_flags().
+       */
+      std::vector<bool> user_flags;
+
+
+      /**
+       * We use this union to store boundary and material data. Because only
+       * one one out of these two is actually needed here, we use an union.
+       */
+      struct BoundaryOrMaterialId
+      {
+        union
+        {
+          types::boundary_id boundary_id;
+          types::material_id material_id;
+        };
+
+
+        /**
+         * Default constructor.
+         */
+        BoundaryOrMaterialId ();
+
+        /**
+         * Return the size of objects of this kind.
+         */
+        static
+        std::size_t memory_consumption ();
+
+        /**
+         * Read or write the data of this object to or from a stream for the
+         * purpose of serialization
+         */
+        template <class Archive>
+        void serialize(Archive &ar,
+                       const unsigned int version);
+      };
+
+      /**
+       * Store boundary and material data. For example, in one dimension, this
+       * field stores the material id of a line, which is a number between 0
+       * and numbers::invalid_material_id-1. In more than one dimension, lines
+       * have no material id, but they may be at the boundary; then, we store
+       * the boundary indicator in this field, which denotes to which part of
+       * the boundary this line belongs and which boundary conditions hold on
+       * this part. The boundary indicator also is a number between zero and
+       * numbers::internal_face_boundary_id-1; the id
+       * numbers::internal_face_boundary_id is reserved for lines in the
+       * interior and may be used to check whether a line is at the boundary
+       * or not, which otherwise is not possible if you don't know which cell
+       * it belongs to.
+       */
+      std::vector<BoundaryOrMaterialId> boundary_or_material_id;
+
+      /**
+       * Store manifold ids. This field stores the manifold id of each object,
+       * which is a number between 0 and numbers::invalid_manifold_id-1.
+       */
+      std::vector<types::manifold_id> manifold_id;
+
+      /**
+       * Assert that enough space is allocated to accommodate
+       * <code>new_objs_in_pairs</code> new objects, stored in pairs, plus
+       * <code>new_obj_single</code> stored individually. This function does
+       * not only call <code>vector::reserve()</code>, but does really append
+       * the needed elements.
+       *
+       * In 2D e.g. refined lines have to be stored in pairs, whereas new
+       * lines in the interior of refined cells can be stored as single lines.
+       */
+      void reserve_space (const unsigned int new_objs_in_pairs,
+                          const unsigned int new_objs_single = 0);
+
+      /**
+       * Return an iterator to the next free slot for a single object. This
+       * function is only used by dealii::Triangulation::execute_refinement()
+       * in 3D.
+       *
+       * @warning Interestingly, this function is not used for 1D or 2D
+       * triangulations, where it seems the authors of the refinement function
+       * insist on reimplementing its contents.
+       *
+       * @todo This function is not instantiated for the codim-one case
+       */
+      template <int dim, int spacedim>
+      dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >
+      next_free_single_object (const dealii::Triangulation<dim,spacedim> &tria);
+
+      /**
+       * Return an iterator to the next free slot for a pair of objects. This
+       * function is only used by dealii::Triangulation::execute_refinement()
+       * in 3D.
+       *
+       * @warning Interestingly, this function is not used for 1D or 2D
+       * triangulations, where it seems the authors of the refinement function
+       * insist on reimplementing its contents.
+       *
+       * @todo This function is not instantiated for the codim-one case
+       */
+      template <int dim, int spacedim>
+      dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >
+      next_free_pair_object (const dealii::Triangulation<dim,spacedim> &tria);
+
+      /**
+       * Return an iterator to the next free slot for a pair of hexes. Only
+       * implemented for <code>G=Hexahedron</code>.
+       */
+      template <int dim, int spacedim>
+      typename dealii::Triangulation<dim,spacedim>::raw_hex_iterator
+      next_free_hex (const dealii::Triangulation<dim,spacedim> &tria,
+                     const unsigned int               level);
+
+      /**
+       * Clear all the data contained in this object.
+       */
+      void clear();
+
+      /**
+       * The orientation of the face number <code>face</code> of the cell with
+       * number <code>cell</code>. The return value is <code>true</code>, if
+       * the normal vector points the usual way
+       * (GeometryInfo::unit_normal_orientation) and <code>false</code> else.
+       *
+       * The result is always <code>true</code> in this class, but derived
+       * classes will reimplement this.
+       *
+       * @warning There is a bug in the class hierarchy right now. Avoid ever
+       * calling this function through a reference, since you might end up
+       * with the base class function instead of the derived class. Still, we
+       * do not want to make it virtual for efficiency reasons.
+       */
+      bool face_orientation(const unsigned int cell, const unsigned int face) const;
+
+
+      /**
+       * Access to user pointers.
+       */
+      void  *&user_pointer(const unsigned int i);
+
+      /**
+       * Read-only access to user pointers.
+       */
+      const void *user_pointer(const unsigned int i) const;
+
+      /**
+       * Access to user indices.
+       */
+      unsigned int &user_index(const unsigned int i);
+
+      /**
+       * Read-only access to user pointers.
+       */
+      unsigned int user_index(const unsigned int i) const;
+
+      /**
+       * Reset user data to zero.
+       */
+      void clear_user_data(const unsigned int i);
+
+      /**
+       * Clear all user pointers or indices and reset their type, such that
+       * the next access may be either or.
+       */
+      void clear_user_data();
+
+      /**
+       * Clear all user flags.
+       */
+      void clear_user_flags();
+
+      /**
+       * Check the memory consistency of the different containers. Should only
+       * be called with the preprocessor flag @p DEBUG set. The function
+       * should be called from the functions of the higher TriaLevel classes.
+       */
+      void monitor_memory (const unsigned int true_dimension) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+
+      /**
+       * Exception
+       */
+      DeclException3 (ExcMemoryWasted,
+                      char *, int, int,
+                      << "The container " << arg1 << " contains "
+                      << arg2 << " elements, but it`s capacity is "
+                      << arg3 << ".");
+      /**
+       * Exception
+       * @ingroup Exceptions
+       */
+      DeclException2 (ExcMemoryInexact,
+                      int, int,
+                      << "The containers have sizes " << arg1 << " and "
+                      << arg2 << ", which is not as expected.");
+
+      /**
+       * Exception
+       */
+      DeclException2 (ExcWrongIterator,
+                      char *, char *,
+                      << "You asked for the next free " << arg1 << "_iterator, "
+                      "but you can only ask for " << arg2 <<"_iterators.");
+
+      /**
+       * dealii::Triangulation objects can either access a user pointer or a
+       * user index. What you tried to do is trying to access one of those
+       * after using the other.
+       *
+       * @ingroup Exceptions
+       */
+      DeclException0 (ExcPointerIndexClash);
+
+    protected:
+      /**
+       * Counter for next_free_single_* functions
+       */
+      unsigned int next_free_single;
+
+      /**
+       * Counter for next_free_pair_* functions
+       */
+      unsigned int next_free_pair;
+
+      /**
+       * Bool flag for next_free_single_* functions
+       */
+      bool reverse_order_next_free_single;
+
+      /**
+       * The data type storing user pointers or user indices.
+       */
+      struct UserData
+      {
+        union
+        {
+          /// The entry used as user
+          /// pointer.
+          void *p;
+          /// The entry used as user
+          /// index.
+          unsigned int i;
+        };
+
+        /**
+         * Default constructor.
+         */
+        UserData()
+        {
+          p = 0;
+        }
+
+        /**
+         * Write the data of this object to a stream for the purpose of
+         * serialization.
+         */
+        template <class Archive>
+        void serialize (Archive &ar, const unsigned int version);
+      };
+
+      /**
+       * Enum describing the possible types of userdata.
+       */
+      enum UserDataType
+      {
+        /// No userdata used yet.
+        data_unknown,
+        /// UserData contains pointers.
+        data_pointer,
+        /// UserData contains indices.
+        data_index
+      };
+
+
+      /**
+       * Pointer which is not used by the library but may be accessed and set
+       * by the user to handle data local to a line/quad/etc.
+       */
+      std::vector<UserData> user_data;
+
+      /**
+       * In order to avoid confusion between user pointers and indices, this
+       * enum is set by the first function accessing either and subsequent
+       * access will not be allowed to change the type of data accessed.
+       */
+      mutable UserDataType user_data_type;
+    };
+
+    /**
+     * For hexahedra the data of TriaObjects needs to be extended, as we can
+     * obtain faces (quads) in non-standard-orientation, therefore we declare
+     * a class TriaObjectsHex, which additionally contains a bool-vector of
+     * the face-orientations.
+     */
+    class TriaObjectsHex : public TriaObjects<TriaObject<3> >
+    {
+    public:
+      /**
+       * The orientation of the face number <code>face</code> of the cell with
+       * number <code>cell</code>. The return value is <code>true</code>, if
+       * the normal vector points the usual way
+       * (GeometryInfo::unit_normal_orientation) and <code>false</code> if
+       * they point in opposite direction.
+       */
+      bool face_orientation(const unsigned int cell, const unsigned int face) const;
+
+
+      /**
+       * For edges, we enforce a standard convention that opposite edges
+       * should be parallel. Now, that's enforceable in most cases, and we
+       * have code that makes sure that if a mesh allows this to happen, that
+       * we have this convention. We also know that it is always possible to
+       * have opposite faces have parallel normal vectors. (For both things,
+       * see the Agelek, Anderson, Bangerth, Barth paper mentioned in the
+       * publications list.)
+       *
+       * The problem is that we originally had another condition, namely that
+       * faces 0, 2 and 6 have normals that point into the cell, while the
+       * other faces have normals that point outward. It turns out that this
+       * is not always possible. In effect, we have to store whether the
+       * normal vector of each face of each cell follows this convention or
+       * not. If this is so, then this variable stores a @p true value,
+       * otherwise a @p false value.
+       *
+       * In effect, this field has <code>6*n_cells</code> elements, being the
+       * number of cells times the six faces each has.
+       */
+      std::vector<bool> face_orientations;
+
+      /**
+       * flip = rotation by 180 degrees
+       */
+      std::vector<bool> face_flips;
+
+      /**
+       * rotation by 90 degrees
+       */
+      std::vector<bool> face_rotations;
+
+      /**
+       * Assert that enough space is allocated to accommodate
+       * <code>new_objs</code> new objects. This function does not only call
+       * <code>vector::reserve()</code>, but does really append the needed
+       * elements.
+       */
+      void reserve_space (const unsigned int new_objs);
+
+      /**
+       * Clear all the data contained in this object.
+       */
+      void clear();
+
+      /**
+       * Check the memory consistency of the different containers. Should only
+       * be called with the preprocessor flag @p DEBUG set. The function
+       * should be called from the functions of the higher TriaLevel classes.
+       */
+      void monitor_memory (const unsigned int true_dimension) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+
+    /**
+     * For quadrilaterals in 3D the data of TriaObjects needs to be extended,
+     * as we can obtain faces (quads) with lines in non-standard-orientation,
+     * therefore we declare a class TriaObjectsQuad3D, which additionally
+     * contains a bool-vector of the line-orientations.
+     */
+    class TriaObjectsQuad3D: public TriaObjects<TriaObject<2> >
+    {
+    public:
+      /**
+       * The orientation of the face number <code>face</code> of the cell with
+       * number <code>cell</code>. The return value is <code>true</code>, if
+       * the normal vector points the usual way
+       * (GeometryInfo::unit_normal_orientation) and <code>false</code> if
+       * they point in opposite direction.
+       */
+      bool face_orientation(const unsigned int cell, const unsigned int face) const;
+
+
+      /**
+       * In effect, this field has <code>4*n_quads</code> elements, being the
+       * number of quads times the four lines each has.
+       */
+      std::vector<bool> line_orientations;
+
+      /**
+       * Assert that enough space is allocated to accommodate
+       * <code>new_quads_in_pairs</code> new quads, stored in pairs, plus
+       * <code>new_quads_single</code> stored individually. This function does
+       * not only call <code>vector::reserve()</code>, but does really append
+       * the needed elements.
+       */
+      void reserve_space (const unsigned int new_quads_in_pairs,
+                          const unsigned int new_quads_single = 0);
+
+      /**
+       * Clear all the data contained in this object.
+       */
+      void clear();
+
+      /**
+       * Check the memory consistency of the different containers. Should only
+       * be called with the preprocessor flag @p DEBUG set. The function
+       * should be called from the functions of the higher TriaLevel classes.
+       */
+      void monitor_memory (const unsigned int true_dimension) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+//----------------------------------------------------------------------//
+
+
+    template <typename G>
+    inline
+    TriaObjects<G>::BoundaryOrMaterialId::BoundaryOrMaterialId ()
+    {
+      material_id = numbers::invalid_material_id;
+    }
+
+
+
+    template <typename G>
+    std::size_t
+    TriaObjects<G>::BoundaryOrMaterialId::memory_consumption ()
+    {
+      return sizeof(BoundaryOrMaterialId);
+    }
+
+
+
+    template <typename G>
+    template <class Archive>
+    void
+    TriaObjects<G>::BoundaryOrMaterialId::serialize(Archive &ar,
+                                                    const unsigned int /*version*/)
+    {
+      // serialize this
+      // structure by
+      // writing and
+      // reading the larger
+      // of the two values,
+      // in order to make
+      // sure we get all
+      // bits
+      if (sizeof(material_id) > sizeof(boundary_id))
+        ar &material_id;
+      else
+        ar &boundary_id;
+    }
+
+
+    template<typename G>
+    inline
+    bool
+    TriaObjects<G>::
+    face_orientation(const unsigned int, const unsigned int) const
+    {
+      return true;
+    }
+
+
+    template<typename G>
+    inline
+    void *&
+    TriaObjects<G>::user_pointer (const unsigned int i)
+    {
+      Assert(user_data_type == data_unknown || user_data_type == data_pointer,
+             ExcPointerIndexClash());
+      user_data_type = data_pointer;
+
+      Assert(i<user_data.size(), ExcIndexRange(i,0,user_data.size()));
+      return user_data[i].p;
+    }
+
+
+    template<typename G>
+    inline
+    const void *
+    TriaObjects<G>::user_pointer (const unsigned int i) const
+    {
+      Assert(user_data_type == data_unknown || user_data_type == data_pointer,
+             ExcPointerIndexClash());
+      user_data_type = data_pointer;
+
+      Assert(i<user_data.size(), ExcIndexRange(i,0,user_data.size()));
+      return user_data[i].p;
+    }
+
+
+    template<typename G>
+    inline
+    unsigned int &
+    TriaObjects<G>::user_index (const unsigned int i)
+    {
+      Assert(user_data_type == data_unknown || user_data_type == data_index,
+             ExcPointerIndexClash());
+      user_data_type = data_index;
+
+      Assert(i<user_data.size(), ExcIndexRange(i,0,user_data.size()));
+      return user_data[i].i;
+    }
+
+
+    template<typename G>
+    inline
+    void
+    TriaObjects<G>::clear_user_data (const unsigned int i)
+    {
+      Assert(i<user_data.size(), ExcIndexRange(i,0,user_data.size()));
+      user_data[i].i = 0;
+    }
+
+
+    template <typename G>
+    inline
+    TriaObjects<G>::TriaObjects()
+      :
+      reverse_order_next_free_single (false),
+      user_data_type(data_unknown)
+    {}
+
+
+    template<typename G>
+    inline
+    unsigned int TriaObjects<G>::user_index (const unsigned int i) const
+    {
+      Assert(user_data_type == data_unknown || user_data_type == data_index,
+             ExcPointerIndexClash());
+      user_data_type = data_index;
+
+      Assert(i<user_data.size(), ExcIndexRange(i,0,user_data.size()));
+      return user_data[i].i;
+    }
+
+
+    template<typename G>
+    inline
+    void TriaObjects<G>::clear_user_data ()
+    {
+      user_data_type = data_unknown;
+      for (unsigned int i=0; i<user_data.size(); ++i)
+        user_data[i].p = 0;
+    }
+
+
+    template<typename G>
+    inline
+    void TriaObjects<G>::clear_user_flags ()
+    {
+      user_flags.assign(user_flags.size(),false);
+    }
+
+
+    template<typename G>
+    template <class Archive>
+    void
+    TriaObjects<G>::UserData::serialize (Archive &ar,
+                                         const unsigned int)
+    {
+      // serialize this as an integer
+      ar &i;
+    }
+
+
+
+    template <typename G>
+    template <class Archive>
+    void TriaObjects<G>::serialize(Archive &ar,
+                                   const unsigned int)
+    {
+      ar &cells &children;
+      ar &refinement_cases;
+      ar &used;
+      ar &user_flags;
+      ar &boundary_or_material_id;
+      ar &manifold_id;
+      ar &next_free_single &next_free_pair &reverse_order_next_free_single;
+      ar &user_data &user_data_type;
+    }
+
+
+    template <class Archive>
+    void TriaObjectsHex::serialize(Archive &ar,
+                                   const unsigned int version)
+    {
+      this->TriaObjects<TriaObject<3> >::serialize (ar, version);
+
+      ar &face_orientations &face_flips &face_rotations;
+    }
+
+
+    template <class Archive>
+    void TriaObjectsQuad3D::serialize(Archive &ar,
+                                      const unsigned int version)
+    {
+      this->TriaObjects<TriaObject<2> >::serialize (ar, version);
+
+      ar &line_orientations;
+    }
+
+
+//----------------------------------------------------------------------//
+
+    inline
+    bool
+    TriaObjectsHex::face_orientation(const unsigned int cell,
+                                     const unsigned int face) const
+    {
+      Assert (cell < face_orientations.size() / GeometryInfo<3>::faces_per_cell,
+              ExcIndexRange(0, cell, face_orientations.size() / GeometryInfo<3>::faces_per_cell));
+      Assert (face < GeometryInfo<3>::faces_per_cell,
+              ExcIndexRange(0, face, GeometryInfo<3>::faces_per_cell));
+
+      return face_orientations[cell * GeometryInfo<3>::faces_per_cell
+                               + face];
+    }
+
+//----------------------------------------------------------------------//
+
+    inline
+    bool
+    TriaObjectsQuad3D::face_orientation(const unsigned int cell, const unsigned int face) const
+    {
+      return line_orientations[cell * GeometryInfo<2>::faces_per_cell
+                               + face];
+    }
+
+
+//----------------------------------------------------------------------//
+
+    template <class G>
+    template <int dim, int spacedim>
+    dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >
+    TriaObjects<G>::next_free_single_object (const dealii::Triangulation<dim,spacedim> &tria)
+    {
+      // TODO: Think of a way to ensure that we are using the correct triangulation, i.e. the one containing *this.
+
+      int pos=next_free_single,
+          last=used.size()-1;
+      if (!reverse_order_next_free_single)
+        {
+          // first sweep forward, only use really single slots, do not use
+          // pair slots
+          for (; pos<last; ++pos)
+            if (!used[pos])
+              if (used[++pos])
+                {
+                  // this was a single slot
+                  pos-=1;
+                  break;
+                }
+          if (pos>=last)
+            {
+              reverse_order_next_free_single=true;
+              next_free_single=used.size()-1;
+              pos=used.size()-1;
+            }
+          else
+            next_free_single=pos+1;
+        }
+
+      if (reverse_order_next_free_single)
+        {
+          // second sweep, use all slots, even
+          // in pairs
+          for (; pos>=0; --pos)
+            if (!used[pos])
+              break;
+          if (pos>0)
+            next_free_single=pos-1;
+          else
+            // no valid single object anymore
+            return dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >(&tria, -1, -1);
+        }
+
+      return dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >(&tria, 0, pos);
+    }
+
+
+
+    template <class G>
+    template <int dim, int spacedim>
+    dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >
+    TriaObjects<G>::next_free_pair_object (const dealii::Triangulation<dim,spacedim> &tria)
+    {
+      // TODO: Think of a way to ensure that we are using the correct triangulation, i.e. the one containing *this.
+
+      int pos=next_free_pair,
+          last=used.size()-1;
+      for (; pos<last; ++pos)
+        if (!used[pos])
+          if (!used[++pos])
+            {
+              // this was a pair slot
+              pos-=1;
+              break;
+            }
+      if (pos>=last)
+        // no free slot
+        return dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >(&tria, -1, -1);
+      else
+        next_free_pair=pos+2;
+
+      return dealii::TriaRawIterator<dealii::TriaAccessor<G::dimension,dim,spacedim> >(&tria, 0, pos);
+    }
+
+
+
+// declaration of explicit specializations
+
+    template<>
+    void
+    TriaObjects<TriaObject<2> >::monitor_memory (const unsigned int) const;
+
+  }
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/hp/dof_faces.h b/include/deal.II/hp/dof_faces.h
new file mode 100644
index 0000000..93c179c
--- /dev/null
+++ b/include/deal.II/hp/dof_faces.h
@@ -0,0 +1,639 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__hp_dof_faces_h
+#define dealii__hp_dof_faces_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/hp/fe_collection.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace hp
+{
+  template <int dim, int spacedim>
+  class FECollection;
+}
+
+
+namespace internal
+{
+  namespace hp
+  {
+    /**
+     * Store the indices of the degrees of freedom which are located on
+     * objects of dimension @p structdim < dim, i.e., for faces or edges of
+     * cells. This is opposed to the internal::hp::DoFLevels class that stores
+     * the DoF indices on cells.
+     *
+     * The things we store here is very similar to what is stored in the
+     * internal::DoFHandler::DoFObjects classes (see there for more
+     * information, in particular on the layout of the class hierarchy, and
+     * the use of file names).
+     *
+     * <h4>Offset computations</h4>
+     *
+     * For hp methods, not all cells may use the same finite element, and it
+     * is consequently more complicated to determine where the DoF indices for
+     * a given line, quad, or hex are stored. As described in the
+     * documentation of the internal::DoFHandler::DoFLevel class, we can
+     * compute the location of the first line DoF, for example, by calculating
+     * the offset as <code>line_index *
+     * dof_handler.get_fe().dofs_per_line</code>. This of course doesn't work
+     * any more if different lines may have different numbers of degrees of
+     * freedom associated with them. Consequently, rather than using this
+     * simple multiplication, the dofs array has an associated array
+     * dof_offsets. The data corresponding to a line then starts at index
+     * <code>line_dof_offsets[line_index]</code> within the
+     * <code>line_dofs</code> array.
+     *
+     *
+     * <h4>Multiple data sets per object</h4>
+     *
+     * If two adjacent cells use different finite elements, then the face that
+     * they share needs to store DoF indices for both involved finite
+     * elements. While faces therefore have to have at most two sets of DoF
+     * indices, it is easy to see that edges and vertices can have as many
+     * sets of DoF indices associated with them as there are adjacent cells.
+     *
+     * Consequently, for objects that have a lower dimensionality than cells,
+     * we have to store a map from the finite element index to the set of DoF
+     * indices associated. Since real sets are typically very inefficient to
+     * store, and since most of the time we expect the number of individual
+     * keys to be small (frequently, adjacent cells will have the same finite
+     * element, and only a single entry will exist in the map), what we do is
+     * instead to store a linked list. In this format, the first entry
+     * starting at position
+     * <code>lines.dofs[lines.dof_offsets[line_index]]</code> will denote the
+     * finite element index of the set of DoF indices following; after this
+     * set, we will store the finite element index of the second set followed
+     * by the corresponding DoF indices; and so on. Finally, when all finite
+     * element indices adjacent to this object have been covered, we write a
+     * -1 to indicate the end of the list.
+     *
+     * Access to this kind of data, as well as the distinction between cells
+     * and objects of lower dimensionality are encoded in the accessor
+     * functions, DoFObjects::set_dof_index() and DoFLevel::get_dof_index().
+     * They are able to traverse this list and pick out or set a DoF index
+     * given the finite element index and its location within the set of DoFs
+     * corresponding to this finite element.
+     *
+     *
+     * @ingroup hp
+     * @author Tobias Leicht, 2006
+     */
+    template <int structdim>
+    class DoFIndicesOnFacesOrEdges
+    {
+    public:
+      /**
+       * Store the start index for the degrees of freedom of each object in
+       * the @p dofs array.
+       *
+       * The type we store is then obviously the type the @p dofs array uses
+       * for indexing.
+       */
+      std::vector<unsigned int> dof_offsets;
+
+      /**
+       * Store the global indices of the degrees of freedom. See DoFLevel()
+       * for detailed information.
+       */
+      std::vector<types::global_dof_index> dofs;
+
+      /**
+       * Set the global index of the @p local_index-th degree of freedom
+       * located on the object with number @p obj_index to the value given by
+       * @p global_index. The @p dof_handler argument is used to access the
+       * finite element that is to be used to compute the location where this
+       * data is stored.
+       *
+       * The third argument, @p fe_index, denotes which of the finite elements
+       * associated with this object we shall access. Refer to the general
+       * documentation of the internal::hp::DoFLevel class template for more
+       * information.
+       */
+      template <int dim, int spacedim>
+      void
+      set_dof_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                     const unsigned int               obj_index,
+                     const unsigned int               fe_index,
+                     const unsigned int               local_index,
+                     const types::global_dof_index    global_index,
+                     const unsigned int               obj_level);
+
+      /**
+       * Return the global index of the @p local_index-th degree of freedom
+       * located on the object with number @p obj_index. The @p dof_handler
+       * argument is used to access the finite element that is to be used to
+       * compute the location where this data is stored.
+       *
+       * The third argument, @p fe_index, denotes which of the finite elements
+       * associated with this object we shall access. Refer to the general
+       * documentation of the internal::hp::DoFLevel class template for more
+       * information.
+       */
+      template <int dim, int spacedim>
+      types::global_dof_index
+      get_dof_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                     const unsigned int               obj_index,
+                     const unsigned int               fe_index,
+                     const unsigned int               local_index,
+                     const unsigned int               obj_level) const;
+
+      /**
+       * Return the number of finite elements that are active on a given
+       * object. If this is a cell, the answer is of course one. If it is a
+       * face, the answer may be one or two, depending on whether the two
+       * adjacent cells use the same finite element or not. If it is an edge
+       * in 3d, the possible return value may be one or any other value larger
+       * than that.
+       *
+       * If the object is not part of an active cell, then no degrees of
+       * freedom have been distributed and zero is returned.
+       */
+      template <int dim, int spacedim>
+      unsigned int
+      n_active_fe_indices (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                           const unsigned int               obj_index) const;
+
+      /**
+       * Return the fe_index of the n-th active finite element on this object.
+       */
+      template <int dim, int spacedim>
+      types::global_dof_index
+      nth_active_fe_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                           const unsigned int               obj_level,
+                           const unsigned int               obj_index,
+                           const unsigned int               n) const;
+
+      /**
+       * Check whether a given finite element index is used on the present
+       * object or not.
+       */
+      template <int dim, int spacedim>
+      bool
+      fe_index_is_active (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                          const unsigned int               obj_index,
+                          const unsigned int               fe_index,
+                          const unsigned int               obj_level) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+
+
+    /**
+     * These classes are similar to the internal::hp::DoFLevel classes. We
+     * here store information that is associated with faces, rather than
+     * cells, as this information is independent of the hierarchical structure
+     * of cells, which are organized in levels. In 2D we store information on
+     * degrees of freedom located on lines whereas in 3D we store information
+     * on degrees of freedom located on quads and lines. In 1D we do nothing,
+     * as the faces of lines are vertices which are treated separately.
+     *
+     * Apart from the internal::hp::DoFObjects object containing the data to
+     * store (degree of freedom indices) and all the access-functionality to
+     * this data, we do not store any data or provide any functionality.
+     * However, we do implement a function to determine an estimate of the
+     * memory consumption of the contained internal::hp::DoFObjects object(s).
+     *
+     * The data contained isn't usually directly accessed. Rather, except for
+     * some access from the DoFHandler class, access is usually through the
+     * DoFAccessor::set_dof_index() and DoFAccessor::dof_index() functions or
+     * similar functions of derived classes that in turn access the member
+     * variables using the DoFHandler::get_dof_index() and corresponding
+     * setter functions. Knowledge of the actual data format is therefore
+     * encapsulated to the present hierarchy of classes as well as the
+     * ::DoFHandler class.
+     *
+     * @ingroup dofs
+     * @author Tobias Leicht, 2006
+     */
+    template<int dim>
+    class DoFIndicesOnFaces;
+
+
+    /**
+     * Store the indices of degrees of freedom on faces in 1D. As these would
+     * be vertices, which are treated separately, don't do anything.
+     *
+     * @ingroup hp
+     * @author Tobias Leicht, 2006
+     */
+    template<>
+    class DoFIndicesOnFaces<1>
+    {
+    public:
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+    /**
+     * Store the indices of degrees of freedom on faces in 2D, which are
+     * lines.
+     *
+     * @ingroup hp
+     * @author Tobias Leicht, 2006
+     */
+    template<>
+    class DoFIndicesOnFaces<2>
+    {
+    public:
+      /**
+       * Indices of DoFs on the lines that bound cells.
+       */
+      internal::hp::DoFIndicesOnFacesOrEdges<1> lines;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+    /**
+     * Store the indices of degrees of freedom on faces in 3D, which are
+     * quads, additionally also on lines.
+     *
+     * @ingroup hp
+     * @author Tobias Leicht, 2006
+     */
+    template<>
+    class DoFIndicesOnFaces<3>
+    {
+    public:
+      /**
+       * Indices of DoFs on the lines that form the edges of cells.
+       */
+      internal::hp::DoFIndicesOnFacesOrEdges<1> lines;
+
+      /**
+       * Indices of DoFs on the quads that bound cells.
+       */
+      internal::hp::DoFIndicesOnFacesOrEdges<2> quads;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+    };
+
+
+    // --------------------- inline and template functions ------------------
+    template <class Archive>
+    void DoFIndicesOnFaces<1>::serialize(Archive &,
+                                         const unsigned int)
+    {}
+
+
+    template <class Archive>
+    void DoFIndicesOnFaces<2>::serialize(Archive &ar,
+                                         const unsigned int)
+    {
+      ar &lines;
+    }
+
+
+    template <class Archive>
+    void DoFIndicesOnFaces<3>::serialize(Archive &ar,
+                                         const unsigned int)
+    {
+      ar &lines &quads;
+    }
+
+    template <int structdim>
+    template <int dim, int spacedim>
+    inline
+    types::global_dof_index
+    DoFIndicesOnFacesOrEdges<structdim>::
+    get_dof_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                   const unsigned int                obj_index,
+                   const unsigned int                fe_index,
+                   const unsigned int                local_index,
+                   const unsigned int                /*obj_level*/) const
+    {
+      Assert ((fe_index != dealii::hp::DoFHandler<dim,spacedim>::default_fe_index),
+              ExcMessage ("You need to specify a FE index when working "
+                          "with hp DoFHandlers"));
+      Assert (fe_index < dof_handler.get_fe().size(),
+              ExcIndexRange (fe_index, 0, dof_handler.get_fe().size()));
+      Assert (local_index <
+              dof_handler.get_fe()[fe_index].template n_dofs_per_object<structdim>(),
+              ExcIndexRange(local_index, 0,
+                            dof_handler.get_fe()[fe_index]
+                            .template n_dofs_per_object<structdim>()));
+      Assert (obj_index < dof_offsets.size(),
+              ExcIndexRange (obj_index, 0, dof_offsets.size()));
+
+      // make sure we are on an
+      // object for which DoFs have
+      // been allocated at all
+      Assert (dof_offsets[obj_index] != numbers::invalid_unsigned_int,
+              ExcMessage ("You are trying to access degree of freedom "
+                          "information for an object on which no such "
+                          "information is available"));
+
+      Assert (structdim<dim, ExcMessage ("This object can not be used for cells."));
+
+      // there may be multiple finite elements associated with
+      // this object. hop along the list of index sets until we
+      // find the one with the correct fe_index, and then poke
+      // into that part. trigger an exception if we can't find a
+      // set for this particular fe_index
+      const types::global_dof_index starting_offset = dof_offsets[obj_index];
+      const types::global_dof_index *pointer        = &dofs[starting_offset];
+      while (true)
+        {
+          Assert (*pointer != numbers::invalid_dof_index,
+                  ExcInternalError());
+          if (*pointer == fe_index)
+            return *(pointer + 1 + local_index);
+          else
+            pointer += static_cast<types::global_dof_index>(
+                         dof_handler.get_fe()[*pointer]
+                         .template n_dofs_per_object<structdim>() + 1);
+        }
+    }
+
+
+
+    template <int structdim>
+    template <int dim, int spacedim>
+    inline
+    void
+    DoFIndicesOnFacesOrEdges<structdim>::
+    set_dof_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                   const unsigned int                obj_index,
+                   const unsigned int                fe_index,
+                   const unsigned int                local_index,
+                   const types::global_dof_index     global_index,
+                   const unsigned int                /*obj_level*/)
+    {
+      Assert ((fe_index != dealii::hp::DoFHandler<dim,spacedim>::default_fe_index),
+              ExcMessage ("You need to specify a FE index when working "
+                          "with hp DoFHandlers"));
+      Assert (fe_index < dof_handler.get_fe().size(),
+              ExcIndexRange (fe_index, 0, dof_handler.get_fe().size()));
+      Assert (local_index <
+              dof_handler.get_fe()[fe_index].template n_dofs_per_object<structdim>(),
+              ExcIndexRange(local_index, 0,
+                            dof_handler.get_fe()[fe_index]
+                            .template n_dofs_per_object<structdim>()));
+      Assert (obj_index < dof_offsets.size(),
+              ExcIndexRange (obj_index, 0, dof_offsets.size()));
+
+      // make sure we are on an
+      // object for which DoFs have
+      // been allocated at all
+      Assert (dof_offsets[obj_index] != numbers::invalid_unsigned_int,
+              ExcMessage ("You are trying to access degree of freedom "
+                          "information for an object on which no such "
+                          "information is available"));
+
+      Assert (structdim<dim, ExcMessage ("This object can not be used for cells."));
+
+      // there may be multiple finite elements associated with
+      // this object.  hop along the list of index sets until we
+      // find the one with the correct fe_index, and then poke
+      // into that part. trigger an exception if we can't find a
+      // set for this particular fe_index
+      const types::global_dof_index starting_offset = dof_offsets[obj_index];
+      types::global_dof_index      *pointer         = &dofs[starting_offset];
+      while (true)
+        {
+          Assert (*pointer != numbers::invalid_dof_index,
+                  ExcInternalError());
+          if (*pointer == fe_index)
+            {
+              *(pointer + 1 + local_index) = global_index;
+              return;
+            }
+          else
+            pointer += dof_handler.get_fe()[*pointer]
+                       .template n_dofs_per_object<structdim>() + 1;
+        }
+    }
+
+
+
+    template <int structdim>
+    template <int dim, int spacedim>
+    inline
+    unsigned int
+    DoFIndicesOnFacesOrEdges<structdim>::
+    n_active_fe_indices (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                         const unsigned int                obj_index) const
+    {
+      Assert (obj_index < dof_offsets.size(),
+              ExcIndexRange (obj_index, 0, dof_offsets.size()));
+
+      // make sure we are on an
+      // object for which DoFs have
+      // been allocated at all
+      if (dof_offsets[obj_index] == numbers::invalid_unsigned_int)
+        return 0;
+
+      Assert (structdim<dim, ExcMessage ("This object can not be used for cells."));
+
+      // there may be multiple finite elements associated with this
+      // object. hop along the list of index sets until we find the
+      // one with the correct fe_index, and then poke into that
+      // part. trigger an exception if we can't find a set for this
+      // particular fe_index
+      const unsigned int starting_offset = dof_offsets[obj_index];
+      const types::global_dof_index *pointer        = &dofs[starting_offset];
+      unsigned int counter = 0;
+      while (true)
+        {
+          if (*pointer == numbers::invalid_dof_index)
+            // end of list reached
+            return counter;
+          else
+            {
+              ++counter;
+              pointer += dof_handler.get_fe()[*pointer]
+                         .template n_dofs_per_object<structdim>() + 1;
+            }
+        }
+    }
+
+
+
+    template <int structdim>
+    template <int dim, int spacedim>
+    inline
+    types::global_dof_index
+    DoFIndicesOnFacesOrEdges<structdim>::
+    nth_active_fe_index (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                         const unsigned int                /*obj_level*/,
+                         const unsigned int                obj_index,
+                         const unsigned int                n) const
+    {
+      Assert (obj_index < dof_offsets.size(),
+              ExcIndexRange (obj_index, 0, dof_offsets.size()));
+
+      // make sure we are on an
+      // object for which DoFs have
+      // been allocated at all
+      Assert (dof_offsets[obj_index] != numbers::invalid_unsigned_int,
+              ExcMessage ("You are trying to access degree of freedom "
+                          "information for an object on which no such "
+                          "information is available"));
+
+      Assert (structdim<dim, ExcMessage ("This object can not be used for cells."));
+
+      Assert (n < n_active_fe_indices(dof_handler, obj_index),
+              ExcIndexRange (n, 0,
+                             n_active_fe_indices(dof_handler, obj_index)));
+
+      // there may be multiple finite elements associated with
+      // this object. hop along the list of index sets until we
+      // find the one with the correct fe_index, and then poke
+      // into that part. trigger an exception if we can't find a
+      // set for this particular fe_index
+      const unsigned int starting_offset = dof_offsets[obj_index];
+      const types::global_dof_index *pointer = &dofs[starting_offset];
+      unsigned int counter = 0;
+      while (true)
+        {
+          Assert (*pointer != numbers::invalid_dof_index,
+                  ExcInternalError());
+
+          const unsigned int fe_index = *pointer;
+
+          Assert (fe_index < dof_handler.get_fe().size(),
+                  ExcInternalError());
+
+          if (counter == n)
+            return fe_index;
+
+          ++counter;
+          pointer += dof_handler.get_fe()[fe_index]
+                     .template n_dofs_per_object<structdim>() + 1;
+        }
+    }
+
+
+
+    template <int structdim>
+    template <int dim, int spacedim>
+    inline
+    bool
+    DoFIndicesOnFacesOrEdges<structdim>::
+    fe_index_is_active (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler,
+                        const unsigned int                obj_index,
+                        const unsigned int                fe_index,
+                        const unsigned int                /*obj_level*/) const
+    {
+      Assert (obj_index < dof_offsets.size(),
+              ExcIndexRange (obj_index, 0, static_cast<unsigned int>(dof_offsets.size())));
+      Assert ((fe_index != dealii::hp::DoFHandler<dim,spacedim>::default_fe_index),
+              ExcMessage ("You need to specify a FE index when working "
+                          "with hp DoFHandlers"));
+      Assert (fe_index < dof_handler.get_fe().size(),
+              ExcIndexRange (fe_index, 0, dof_handler.get_fe().size()));
+
+      // make sure we are on an
+      // object for which DoFs have
+      // been allocated at all
+      Assert (dof_offsets[obj_index] != numbers::invalid_unsigned_int,
+              ExcMessage ("You are trying to access degree of freedom "
+                          "information for an object on which no such "
+                          "information is available"));
+
+      Assert (structdim<dim, ExcMessage ("This object can not be used for cells."));
+
+      // there may be multiple finite elements associated with
+      // this object. hop along the list of index sets until we
+      // find the one with the correct fe_index, and then poke
+      // into that part. trigger an exception if we can't find a
+      // set for this particular fe_index
+      const types::global_dof_index starting_offset = dof_offsets[obj_index];
+      const types::global_dof_index *pointer = &dofs[starting_offset];
+      while (true)
+        {
+          if (*pointer == numbers::invalid_dof_index)
+            // end of list reached
+            return false;
+          else if (*pointer == fe_index)
+            return true;
+          else
+            pointer += static_cast<types::global_dof_index>(
+                         dof_handler.get_fe()[*pointer]
+                         .template n_dofs_per_object<structdim>()+1);
+        }
+    }
+
+    template <int structdim>
+    template <class Archive>
+    void DoFIndicesOnFacesOrEdges<structdim>::serialize(Archive &ar,
+                                                        const unsigned int)
+    {
+      ar &dofs;
+      ar &dof_offsets;
+    }
+
+
+  } // namespace hp
+
+} // namespace internal
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/hp/dof_handler.h b/include/deal.II/hp/dof_handler.h
new file mode 100644
index 0000000..dd2e24e
--- /dev/null
+++ b/include/deal.II/hp/dof_handler.h
@@ -0,0 +1,1082 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__hp_dof_handler_h
+#define dealii__hp_dof_handler_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/iterator_range.h>
+#include <deal.II/dofs/function_map.h>
+#include <deal.II/dofs/dof_iterator_selector.h>
+#include <deal.II/dofs/number_cache.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/dof_faces.h>
+#include <deal.II/hp/dof_level.h>
+
+#include <vector>
+#include <map>
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace hp
+  {
+    class DoFLevel;
+
+    namespace DoFHandler
+    {
+      struct Implementation;
+    }
+  }
+}
+
+namespace internal
+{
+  namespace DoFAccessor
+  {
+    struct Implementation;
+  }
+
+  namespace DoFCellAccessor
+  {
+    struct Implementation;
+  }
+}
+
+
+
+namespace hp
+{
+
+  /**
+   * Manage the distribution and numbering of the degrees of freedom for hp-
+   * FEM algorithms. This class satisfies the
+   * @ref ConceptMeshType "MeshType concept"
+   * requirements.
+   *
+   * The purpose of this class is to allow for an enumeration of degrees of
+   * freedom in the same way as the ::DoFHandler class, but it allows to use a
+   * different finite element on every cell. To this end, one assigns an
+   * <code>active_fe_index</code> to every cell that indicates which element
+   * within a collection of finite elements (represented by an object of type
+   * hp::FECollection) is the one that lives on this cell. The class then
+   * enumerates the degree of freedom associated with these finite elements on
+   * each cell of a triangulation and, if possible, identifies degrees of
+   * freedom at the interfaces of cells if they match. If neighboring cells
+   * have degrees of freedom along the common interface that do not immediate
+   * match (for example, if you have $Q_2$ and $Q_3$ elements meeting at a
+   * common face), then one needs to compute constraints to ensure that the
+   * resulting finite element space on the mesh remains conforming.
+   *
+   * The whole process of working with objects of this type is explained in
+   * step-27. Many of the algorithms this class implements are described in
+   * the
+   * @ref hp_paper "hp paper".
+   *
+   *
+   * <h3>Active FE indices and their behavior under mesh refinement</h3>
+   *
+   * The typical workflow for using this class is to create a mesh, assign an
+   * active FE index to every active cell, calls
+   * hp::DoFHandler::distribute_dofs(), and then assemble a linear system and
+   * solve a problem on this finite element space. However, one can skip
+   * assigning active FE indices upon mesh refinement in certain
+   * circumstances. In particular, the following rules apply: - Upon mesh
+   * refinement, child cells inherit the active FE index of the parent. - On
+   * the other hand, when coarsening cells, the (now active) parent cell will
+   * not have an active FE index set and you will have to set it explicitly
+   * before calling hp::DoFHandler::distribute_dofs(). In particular, to avoid
+   * stale information to be used by accident, this class deletes the active
+   * FE index of cells that are refined after inheriting this index to the
+   * children; this implies that if the children are coarsened away, the old
+   * value is no longer available on the parent cell.
+   *
+   * @ingroup dofs
+   * @ingroup hp
+   *
+   * @author Wolfgang Bangerth, Oliver Kayser-Herold, 2003, 2004
+   */
+  template <int dim, int spacedim=dim>
+  class DoFHandler : public Subscriptor
+  {
+    typedef dealii::internal::DoFHandler::Iterators<DoFHandler<dim,spacedim>, false> ActiveSelector;
+    typedef dealii::internal::DoFHandler::Iterators<DoFHandler<dim,spacedim>, true> LevelSelector;
+  public:
+    typedef typename ActiveSelector::CellAccessor         cell_accessor;
+    typedef typename ActiveSelector::FaceAccessor         face_accessor;
+
+    typedef typename ActiveSelector::line_iterator        line_iterator;
+    typedef typename ActiveSelector::active_line_iterator active_line_iterator;
+
+    typedef typename ActiveSelector::quad_iterator        quad_iterator;
+    typedef typename ActiveSelector::active_quad_iterator active_quad_iterator;
+
+    typedef typename ActiveSelector::hex_iterator         hex_iterator;
+    typedef typename ActiveSelector::active_hex_iterator  active_hex_iterator;
+
+    /**
+     * A typedef that is used to to identify
+     * @ref GlossActive "active cell iterators".
+     * The concept of iterators is discussed at length in the
+     * @ref Iterators "iterators documentation module".
+     *
+     * The current typedef identifies active cells in a hp::DoFHandler object.
+     * While the actual data type of the typedef is hidden behind a few layers
+     * of (unfortunately necessary) indirections, it is in essence
+     * TriaActiveIterator<DoFCellAccessor>. The TriaActiveIterator class works
+     * like a pointer to active objects that when you dereference it yields an
+     * object of type DoFCellAccessor. DoFCellAccessor is a class that
+     * identifies properties that are specific to cells in a DoFHandler, but
+     * it is derived (and consequently inherits) from both DoFAccessor,
+     * TriaCellAccessor and TriaAccessor that describe what you can ask of
+     * more general objects (lines, faces, as well as cells) in a
+     * triangulation and hp::DoFHandler objects.
+     *
+     * @ingroup Iterators
+     */
+#ifndef _MSC_VER
+    typedef typename ActiveSelector::active_cell_iterator active_cell_iterator;
+#else
+    typedef TriaActiveIterator < dealii::DoFCellAccessor < DoFHandler < dim, spacedim >, false > > active_cell_iterator;
+#endif
+
+    typedef typename LevelSelector::cell_iterator         level_cell_iterator;
+
+    /**
+     * A typedef that is used to to identify cell iterators. The concept of
+     * iterators is discussed at length in the
+     * @ref Iterators "iterators documentation module".
+     *
+     * The current typedef identifies cells in a DoFHandler object. Some of
+     * these cells may in fact be active (see
+     * @ref GlossActive "active cell iterators"
+     * ) in which case they can in fact be asked for the degrees of freedom
+     * that live on them. On the other hand, if the cell is not active, any
+     * such query will result in an error. Note that this is what
+     * distinguishes this typedef from the level_cell_iterator typedef.
+     *
+     * While the actual data type of the typedef is hidden behind a few layers
+     * of (unfortunately necessary) indirections, it is in essence
+     * TriaIterator<DoFCellAccessor>. The TriaIterator class works like a
+     * pointer to objects that when you dereference it yields an object of
+     * type DoFCellAccessor. DoFCellAccessor is a class that identifies
+     * properties that are specific to cells in a DoFHandler, but it is
+     * derived (and consequently inherits) from both DoFAccessor,
+     * TriaCellAccessor and TriaAccessor that describe what you can ask of
+     * more general objects (lines, faces, as well as cells) in a
+     * triangulation and DoFHandler objects.
+     *
+     * @ingroup Iterators
+     */
+#ifndef _MSC_VER
+    typedef typename ActiveSelector::cell_iterator        cell_iterator;
+#else
+    typedef TriaIterator < dealii::DoFCellAccessor < DoFHandler < dim, spacedim >, false > >        cell_iterator;
+#endif
+
+
+    typedef typename ActiveSelector::face_iterator        face_iterator;
+    typedef typename ActiveSelector::active_face_iterator active_face_iterator;
+
+    typedef typename LevelSelector::CellAccessor          level_cell_accessor;
+    typedef typename LevelSelector::FaceAccessor          level_face_accessor;
+
+    typedef typename LevelSelector::face_iterator         level_face_iterator;
+
+    /**
+     * Alias the @p FunctionMap type declared elsewhere.
+     */
+    typedef typename FunctionMap<spacedim>::type FunctionMap;
+
+    /**
+     * Make the dimension available in function templates.
+     */
+    static const unsigned int dimension = dim;
+
+    /**
+     * Make the space dimension available in function templates.
+     */
+    static const unsigned int space_dimension = spacedim;
+
+    /**
+     * When the arrays holding the DoF indices are set up, but before they are
+     * filled with actual values, they are set to an invalid value, in order
+     * to monitor possible problems. This invalid value is the constant
+     * defined here.
+     *
+     * Please note that you should not rely on it having a certain value, but
+     * rather take its symbolic name.
+     */
+    static const types::global_dof_index invalid_dof_index = numbers::invalid_dof_index;
+
+    /**
+     * The default index of the finite element to be used on a given cell. For
+     * the usual, non-hp dealii::DoFHandler class that only supports the same
+     * finite element to be used on all cells, the index of the finite element
+     * needs to be the same on all cells anyway, and by convention we pick
+     * zero for this value. The situation here is different, since the hp
+     * classes support the case where different finite element indices may be
+     * used on different cells. The default index consequently corresponds to
+     * an invalid value.
+     */
+    static const unsigned int default_fe_index = numbers::invalid_unsigned_int;
+
+
+    /**
+     * Constructor. Take @p tria as the triangulation to work on.
+     */
+    DoFHandler (const Triangulation<dim,spacedim> &tria);
+
+    /**
+     * Destructor.
+     */
+    virtual ~DoFHandler ();
+
+    /**
+     * Go through the triangulation and "distribute" the degrees of freedoms
+     * needed for the given finite element. "Distributing" degrees of freedom
+     * involved allocating memory to store the information that describes it
+     * (e.g., whether it is located on a vertex, edge, face, etc) and to
+     * sequentially enumerate all degrees of freedom. In other words, while
+     * the mesh and the finite element object by themselves simply define a
+     * finite element space $V_h$, the process of distributing degrees of
+     * freedom makes sure that there is a basis for this space and that the
+     * shape functions of this basis are enumerated in an indexable,
+     * predictable way.
+     *
+     * The purpose of this function is first discussed in the introduction to
+     * the step-2 tutorial program.
+     *
+     * @note A pointer of the finite element given as argument is stored.
+     * Therefore, the lifetime of the finite element object shall be longer
+     * than that of this object. If you don't want this behavior, you may want
+     * to call the @p clear member function which also releases the lock of
+     * this object to the finite element.
+     */
+    virtual void distribute_dofs (const hp::FECollection<dim,spacedim> &fe);
+
+    /**
+     * Go through the triangulation and set the active FE indices of all
+     * active cells to the values given in @p active_fe_indices.
+     */
+    void set_active_fe_indices (const std::vector<unsigned int> &active_fe_indices);
+
+    /**
+     * Go through the triangulation and store the active FE indices of all
+     * active cells to the vector @p active_fe_indices. This vector is
+     * resized, if necessary.
+     */
+    void get_active_fe_indices (std::vector<unsigned int> &active_fe_indices) const;
+
+    /**
+     * Clear all data of this object and especially delete the lock this
+     * object has to the finite element used the last time when @p
+     * distribute_dofs was called.
+     */
+    virtual void clear ();
+
+    /**
+     * Renumber degrees of freedom based on a list of new dof numbers for all
+     * the dofs.
+     *
+     * @p new_numbers is an array of integers with size equal to the number of
+     * dofs on the present grid. It stores the new indices after renumbering
+     * in the order of the old indices.
+     *
+     * This function is called by the functions in DoFRenumbering function
+     * after computing the ordering of the degrees of freedom. However, you
+     * can call this function yourself, which is necessary if a user wants to
+     * implement an ordering scheme herself, for example downwind numbering.
+     *
+     * The @p new_number array must have a size equal to the number of degrees
+     * of freedom. Each entry must state the new global DoF number of the
+     * degree of freedom referenced.
+     */
+    void renumber_dofs (const std::vector<types::global_dof_index> &new_numbers);
+
+    /**
+     * Return the maximum number of degrees of freedom a degree of freedom in
+     * the given triangulation with the given finite element may couple with.
+     * This is the maximum number of entries per line in the system matrix;
+     * this information can therefore be used upon construction of the
+     * SparsityPattern object.
+     *
+     * The returned number is not really the maximum number but an estimate
+     * based on the finite element and the maximum number of cells meeting at
+     * a vertex. The number holds for the constrained matrix also.
+     *
+     * As for ::DoFHandler::max_couplings_between_dofs(), the result of this
+     * function is often not very accurate for 3d and/or high polynomial
+     * degrees. The consequences are discussed in the documentation of the
+     * module on
+     * @ref Sparsity.
+     */
+    unsigned int max_couplings_between_dofs () const;
+
+    /**
+     * Return the number of degrees of freedom located on the boundary another
+     * dof on the boundary can couple with.
+     *
+     * The number is the same as for @p max_coupling_between_dofs in one
+     * dimension less.
+     *
+     * @note The same applies to this function as to max_couplings_per_dofs()
+     * as regards the performance of this function. Think about one of the
+     * dynamic sparsity pattern classes instead (see
+     * @ref Sparsity).
+     */
+    unsigned int max_couplings_between_boundary_dofs () const;
+
+    /**
+     * @name Cell iterator functions
+     */
+    /*@{*/
+    /**
+     * Iterator to the first used cell on level @p level.
+     */
+    cell_iterator        begin       (const unsigned int level = 0) const;
+
+    /**
+     * Iterator to the first active cell on level @p level. If the given level
+     * does not contain any active cells (i.e., all cells on this level are
+     * further refined, then this function returns
+     * <code>end_active(level)</code> so that loops of the kind
+     *  @code
+     *    for (cell=dof_handler.begin_active(level); cell!=dof_handler.end_active(level); ++cell)
+     *      ...
+     *  @endcode
+     * have zero iterations, as may be expected if there are no active cells
+     * on this level.
+     */
+    active_cell_iterator begin_active(const unsigned int level = 0) const;
+
+    /**
+     * Iterator past the end; this iterator serves for comparisons of
+     * iterators with past-the-end or before-the-beginning states.
+     */
+    cell_iterator        end () const;
+
+    /**
+     * Return an iterator which is the first iterator not on level. If @p
+     * level is the last level, then this returns <tt>end()</tt>.
+     */
+    cell_iterator        end (const unsigned int level) const;
+
+    /**
+     * Return an active iterator which is the first active iterator not on the
+     * given level. If @p level is the last level, then this returns
+     * <tt>end()</tt>.
+     */
+    active_cell_iterator end_active (const unsigned int level) const;
+
+    /**
+     * @name Cell iterator functions returning ranges of iterators
+     */
+
+    /**
+     * Return an iterator range that contains all cells (active or not) that
+     * make up this DoFHandler. Such a range is useful to initialize range-
+     * based for loops as supported by C++11. See the example in the
+     * documentation of active_cell_iterators().
+     *
+     * @return The half open range <code>[this->begin(), this->end())</code>
+     *
+     * @ingroup CPP11
+     */
+    IteratorRange<cell_iterator>        cell_iterators () const;
+
+    /**
+     * Return an iterator range that contains all active cells that make up
+     * this DoFHandler. Such a range is useful to initialize range-based for
+     * loops as supported by C++11, see also
+     * @ref CPP11 "C++11 standard".
+     *
+     * Range-based for loops are useful in that they require much less code
+     * than traditional loops (see <a
+     * href="http://en.wikipedia.org/wiki/C%2B%2B11#Range-
+     * based_for_loop">here</a> for a discussion of how they work). An example
+     * is that without range-based for loops, one often writes code such as
+     * the following:
+     * @code
+     *   DoFHandler<dim> dof_handler;
+     *   ...
+     *   typename DoFHandler<dim>::active_cell_iterator
+     *     cell = dof_handler.begin_active(),
+     *     endc = dof_handler.end();
+     *   for (; cell!=endc; ++cell)
+     *     {
+     *       fe_values.reinit (cell);
+     *       ...do the local integration on 'cell'...;
+     *     }
+     * @endcode
+     * Using C++11's range-based for loops, this is now entirely equivalent to
+     * the following:
+     * @code
+     *   DoFHandler<dim> dof_handler;
+     *   ...
+     *   for (auto cell : dof_handler.active_cell_iterators())
+     *     {
+     *       fe_values.reinit (cell);
+     *       ...do the local integration on 'cell'...;
+     *     }
+     * @endcode
+     * To use this feature, you need a compiler that supports C++11.
+     *
+     * @return The half open range <code>[this->begin_active(),
+     * this->end())</code>
+     *
+     * @ingroup CPP11
+     */
+    IteratorRange<active_cell_iterator> active_cell_iterators () const;
+
+    /**
+     * Return an iterator range that contains all cells (active or not) that
+     * make up the given level of this DoFHandler. Such a range is useful to
+     * initialize range-based for loops as supported by C++11. See the example
+     * in the documentation of active_cell_iterators().
+     *
+     * @param[in] level A given level in the refinement hierarchy of this
+     * triangulation.
+     * @return The half open range <code>[this->begin(level),
+     * this->end(level))</code>
+     *
+     * @pre level must be less than this->n_levels().
+     *
+     * @ingroup CPP11
+     */
+    IteratorRange<cell_iterator>        cell_iterators_on_level (const unsigned int level) const;
+
+    /**
+     * Return an iterator range that contains all active cells that make up
+     * the given level of this DoFHandler. Such a range is useful to
+     * initialize range-based for loops as supported by C++11. See the example
+     * in the documentation of active_cell_iterators().
+     *
+     * @param[in] level A given level in the refinement hierarchy of this
+     * triangulation.
+     * @return The half open range <code>[this->begin_active(level),
+     * this->end(level))</code>
+     *
+     * @pre level must be less than this->n_levels().
+     *
+     * @ingroup CPP11
+     */
+    IteratorRange<active_cell_iterator> active_cell_iterators_on_level (const unsigned int level) const;
+
+    /*
+     * @}
+     */
+
+    /*---------------------------------------*/
+
+
+    /**
+     * Return the global number of degrees of freedom. If the current object
+     * handles all degrees of freedom itself (even if you may intend to solve
+     * your linear system in parallel, such as in step-17 or step-18), then
+     * this number equals the number of locally owned degrees of freedom since
+     * this object doesn't know anything about what you want to do with it and
+     * believes that it owns every degree of freedom it knows about.
+     *
+     * On the other hand, if this object operates on a
+     * parallel::distributed::Triangulation object, then this function returns
+     * the global number of degrees of freedom, accumulated over all
+     * processors.
+     *
+     * In either case, included in the returned number are those DoFs which
+     * are constrained by hanging nodes, see
+     * @ref constraints.
+     */
+    types::global_dof_index n_dofs () const;
+
+    /**
+     * The number of multilevel dofs on given level. Since hp::DoFHandler does
+     * not support multilevel methods yet, this function returns
+     * numbers::invalid_unsigned int independent of its argument.
+     */
+    types::global_dof_index n_dofs(const unsigned int level) const;
+
+    /**
+     * Return the number of degrees of freedom located on the boundary.
+     */
+    types::global_dof_index n_boundary_dofs () const;
+
+    /**
+     * Return the number of degrees of freedom located on those parts of the
+     * boundary which have a boundary indicator listed in the given set. The
+     * reason that a @p map rather than a @p set is used is the same as
+     * described in the section on the @p make_boundary_sparsity_pattern
+     * function.
+     */
+    types::global_dof_index
+    n_boundary_dofs (const FunctionMap &boundary_ids) const;
+
+    /**
+     * Same function, but with different data type of the argument, which is
+     * here simply a list of the boundary indicators under consideration.
+     */
+    types::global_dof_index
+    n_boundary_dofs (const std::set<types::boundary_id> &boundary_ids) const;
+
+    /**
+     * Return the number of degrees of freedom that belong to this process.
+     *
+     * If this is a sequential job, then the result equals that produced by
+     * n_dofs(). On the other hand, if we are operating on a
+     * parallel::distributed::Triangulation, then it includes only the degrees
+     * of freedom that the current processor owns. Note that in this case this
+     * does not include all degrees of freedom that have been distributed on
+     * the current processor's image of the mesh: in particular, some of the
+     * degrees of freedom on the interface between the cells owned by this
+     * processor and cells owned by other processors may be theirs, and
+     * degrees of freedom on ghost cells are also not necessarily included.
+     */
+    types::global_dof_index n_locally_owned_dofs() const;
+
+    /**
+     * Return an IndexSet describing the set of locally owned DoFs as a subset
+     * of 0..n_dofs(). The number of elements of this set equals
+     * n_locally_owned_dofs().
+     */
+    const IndexSet &locally_owned_dofs() const;
+
+
+    /**
+     * Returns a vector that stores the locally owned DoFs of each processor.
+     * If you are only interested in the number of elements each processor
+     * owns then n_dofs_per_processor() is a better choice.
+     *
+     * If this is a sequential job, then the vector has a single element that
+     * equals the IndexSet representing the entire range [0,n_dofs()].
+     */
+    const std::vector<IndexSet> &
+    locally_owned_dofs_per_processor () const;
+
+    /**
+     * Return a vector that stores the number of degrees of freedom each
+     * processor that participates in this triangulation owns locally. The sum
+     * of all these numbers equals the number of degrees of freedom that exist
+     * globally, i.e. what n_dofs() returns.
+     *
+     * Each element of the vector returned by this function equals the number
+     * of elements of the corresponding sets returned by global_dof_indices().
+     *
+     * If this is a sequential job, then the vector has a single element equal
+     * to n_dofs().
+     */
+    const std::vector<types::global_dof_index> &
+    n_locally_owned_dofs_per_processor () const;
+
+    /**
+     * Return a constant reference to the set of finite element objects that
+     * are used by this @p DoFHandler.
+     */
+    const hp::FECollection<dim,spacedim> &get_fe () const;
+
+    /**
+     * Return a constant reference to the triangulation underlying this
+     * object.
+     *
+     * @deprecated Use get_triangulation() instead.
+     */
+    const Triangulation<dim,spacedim> &get_tria () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a constant reference to the triangulation underlying this
+     * object.
+     */
+    const Triangulation<dim,spacedim> &get_triangulation () const;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     *
+     * This function is made virtual, since a dof handler object might be
+     * accessed through a pointers to this base class, although the actual
+     * object might be a derived class.
+     */
+    virtual std::size_t memory_consumption () const;
+
+    /**
+     * Write the data of this object to a stream for the purpose of
+     * serialization.
+     */
+    template <class Archive>
+    void save(Archive &ar, const unsigned int version) const;
+
+    /**
+     * Read the data of this object from a stream for the purpose of
+     * serialization.
+     */
+    template <class Archive>
+    void load(Archive &ar, const unsigned int version);
+
+    BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcInvalidTriangulation);
+    /**
+     * Exception
+     */
+    DeclException0 (ExcNoFESelected);
+    /**
+     * Exception
+     */
+    DeclException0 (ExcRenumberingIncomplete);
+    /**
+     * Exception
+     */
+    DeclException0 (ExcGridsDoNotMatch);
+    /**
+     * Exception
+     */
+    DeclException0 (ExcInvalidBoundaryIndicator);
+    /**
+     * Exception
+     */
+    DeclException1 (ExcMatrixHasWrongSize,
+                    int,
+                    << "The matrix has the wrong dimension " << arg1);
+    /**
+     * Exception
+     */
+    DeclException0 (ExcFunctionNotUseful);
+    /**
+     * Exception
+     */
+    DeclException1 (ExcNewNumbersNotConsecutive,
+                    types::global_dof_index,
+                    << "The given list of new dof indices is not consecutive: "
+                    << "the index " << arg1 << " does not exist.");
+    /**
+     * Exception
+     */
+    DeclException2 (ExcInvalidFEIndex,
+                    int, int,
+                    << "The mesh contains a cell with an active_fe_index of "
+                    << arg1 << ", but the finite element collection only has "
+                    << arg2 << " elements");
+    /**
+     * Exception
+     */
+    DeclException1 (ExcInvalidLevel,
+                    int,
+                    << "The given level " << arg1
+                    << " is not in the valid range!");
+    /**
+     * Exception
+     */
+    DeclException0 (ExcFacesHaveNoLevel);
+    /**
+     * The triangulation level you accessed is empty.
+     */
+    DeclException1 (ExcEmptyLevel,
+                    int,
+                    << "You tried to do something on level " << arg1
+                    << ", but this level is empty.");
+
+  protected:
+
+    /**
+     * Address of the triangulation to work on.
+     */
+    SmartPointer<const Triangulation<dim,spacedim>,DoFHandler<dim,spacedim> > tria;
+
+    /**
+     * Store a pointer to the finite element set given latest for the
+     * distribution of dofs. In order to avoid destruction of the object
+     * before the lifetime of the DoF handler, we subscribe to the finite
+     * element object. To unlock the FE before the end of the lifetime of this
+     * DoF handler, use the <tt>clear()</tt> function (this clears all data of
+     * this object as well, though).
+     */
+    SmartPointer<const hp::FECollection<dim,spacedim>,hp::DoFHandler<dim,spacedim> > finite_elements;
+
+  private:
+
+    /**
+     * Copy constructor. I can see no reason why someone might want to use it,
+     * so I don't provide it. Since this class has pointer members, making it
+     * private prevents the compiler to provide it's own, incorrect one if
+     * anyone chose to copy such an object.
+     */
+    DoFHandler (const DoFHandler &);
+
+    /**
+     * Copy operator. I can see no reason why someone might want to use it, so
+     * I don't provide it. Since this class has pointer members, making it
+     * private prevents the compiler to provide it's own, incorrect one if
+     * anyone chose to copy such an object.
+     */
+    DoFHandler &operator = (const DoFHandler &);
+
+    class MGVertexDoFs
+    {
+    public:
+      MGVertexDoFs ();
+      ~MGVertexDoFs ();
+      types::global_dof_index get_index (const unsigned int level, const unsigned int dof_number) const;
+      void set_index (const unsigned int level, const unsigned int dof_number, const types::global_dof_index index);
+    };
+
+    /**
+     * Free all used memory.
+     */
+    void clear_space ();
+
+    template<int structdim>
+    types::global_dof_index get_dof_index (const unsigned int obj_level, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index) const;
+
+    template<int structdim>
+    void set_dof_index (const unsigned int obj_level, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index) const;
+
+    /**
+     * Create default tables for the active_fe_indices in the
+     * dealii::internal::hp::DoFLevel. They are initialized with a zero
+     * indicator, meaning that fe[0] is going to be used by default.  This
+     * method is called before refinement and before distribute_dofs is
+     * called. It ensures each cell has a valid active_fe_index.
+     */
+    void create_active_fe_table ();
+
+    /**
+     * Functions that will be triggered through signals whenever the
+     * triangulation is modified.
+     *
+     * Here they are used to administrate the the active_fe_fields during the
+     * spatial refinement.
+     */
+    void pre_refinement_action ();
+    void post_refinement_action ();
+
+    /**
+     * Compute identities between DoFs located on vertices. Called from
+     * distribute_dofs().
+     */
+    void
+    compute_vertex_dof_identities (std::vector<types::global_dof_index> &new_dof_indices) const;
+
+    /**
+     * Compute identities between DoFs located on lines. Called from
+     * distribute_dofs().
+     */
+    void
+    compute_line_dof_identities (std::vector<types::global_dof_index> &new_dof_indices) const;
+
+    /**
+     * Compute identities between DoFs located on quads. Called from
+     * distribute_dofs().
+     */
+    void
+    compute_quad_dof_identities (std::vector<types::global_dof_index> &new_dof_indices) const;
+
+    /**
+     * Renumber the objects with the given and all lower structural
+     * dimensions, i.e. renumber vertices by giving a template argument of
+     * zero to the int2type argument, lines and vertices with one, etc.
+     *
+     * Note that in contrast to the public renumber_dofs() function, these
+     * internal functions do not ensure that the new DoFs are contiguously
+     * numbered. The function may therefore also be used to assign different
+     * DoFs the same number, for example to unify hp DoFs corresponding to
+     * different finite elements but co-located on the same entity.
+     */
+    void renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                                 dealii::internal::int2type<0>);
+
+    void renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                                 dealii::internal::int2type<1>);
+
+    void renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                                 dealii::internal::int2type<2>);
+
+    void renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                                 dealii::internal::int2type<3>);
+
+    /**
+     * Space to store the DoF numbers for the different levels. Analogous to
+     * the <tt>levels[]</tt> tree of the Triangulation objects.
+     */
+    std::vector<dealii::internal::hp::DoFLevel *> levels;
+
+    /**
+     * Space to store the DoF numbers for the faces. Analogous to the
+     * <tt>faces</tt> pointer of the Triangulation objects.
+     */
+    dealii::internal::hp::DoFIndicesOnFaces<dim> *faces;
+
+    /**
+     * A structure that contains all sorts of numbers that characterize the
+     * degrees of freedom this object works on.
+     *
+     * For most members of this structure, there is an accessor function in
+     * this class that returns its value.
+     */
+    dealii::internal::DoFHandler::NumberCache number_cache;
+
+    /**
+     * Array to store the indices for degrees of freedom located at vertices.
+     *
+     * The format used here, in the form of a linked list, is the same as used
+     * for the arrays used in the internal::hp::DoFLevel hierarchy. Starting
+     * indices into this array are provided by the vertex_dofs_offsets field.
+     *
+     * Access to this field is generally through the
+     * DoFAccessor::get_vertex_dof_index() and
+     * DoFAccessor::set_vertex_dof_index() functions, encapsulating the actual
+     * data format used to the present class.
+     */
+    std::vector<types::global_dof_index> vertex_dofs;
+
+    /**
+     * For each vertex in the triangulation, store the offset within the
+     * vertex_dofs array where the dofs for this vertex start.
+     *
+     * As for that array, the format is the same as described in the
+     * documentation of hp::DoFLevel.
+     *
+     * Access to this field is generally through the
+     * Accessor::get_vertex_dof_index() and Accessor::set_vertex_dof_index()
+     * functions, encapsulating the actual data format used to the present
+     * class.
+     */
+    std::vector<types::global_dof_index>      vertex_dofs_offsets;
+
+    std::vector<MGVertexDoFs> mg_vertex_dofs;  // we should really remove this field!
+
+    /**
+     * Array to store the information if a cell on some level has children or
+     * not. It is used by the signal slots as a persistent buffer during the
+     * refinement, i.e. from between when pre_refinement_action is called and
+     * when post_refinement_action runs.
+     */
+    std::vector<std::vector<bool> *> has_children;
+
+    /**
+     * A list of connections with which this object connects to the
+     * triangulation to get information about when the triangulation changes.
+     */
+    std::vector<boost::signals2::connection> tria_listeners;
+
+    /**
+     * Make accessor objects friends.
+     */
+    template <int, class, bool> friend class dealii::DoFAccessor;
+    template <class, bool> friend class dealii::DoFCellAccessor;
+    friend struct dealii::internal::DoFAccessor::Implementation;
+    friend struct dealii::internal::DoFCellAccessor::Implementation;
+
+    /**
+     * Likewise for DoFLevel objects since they need to access the vertex dofs
+     * in the functions that set and retrieve vertex dof indices.
+     */
+    template <int> friend class dealii::internal::hp::DoFIndicesOnFacesOrEdges;
+    friend struct dealii::internal::hp::DoFHandler::Implementation;
+  };
+
+
+
+#ifndef DOXYGEN
+
+
+  /* ----------------------- Inline functions ---------------------------------- */
+
+  template <int dim, int spacedim>
+  template <class Archive>
+  void DoFHandler<dim, spacedim>::save(Archive &ar, unsigned int) const
+  {
+    ar &vertex_dofs;
+    ar &vertex_dofs_offsets;
+    ar &number_cache;
+    ar &levels;
+    ar &faces;
+    ar &has_children;
+
+    // write out the number of triangulation cells and later check during
+    // loading that this number is indeed correct;
+    unsigned int n_cells = tria->n_cells();
+
+    ar &n_cells;
+  }
+
+  template <int dim, int spacedim>
+  template <class Archive>
+  void DoFHandler<dim, spacedim>::load(Archive &ar, unsigned int)
+  {
+    ar &vertex_dofs;
+    ar &vertex_dofs_offsets;
+    ar &number_cache;
+
+    // boost::serialization can restore pointers just fine, but if the
+    // pointer object still points to something useful, that object is not
+    // destroyed and we end up with a memory leak. consequently, first delete
+    // previous content before re-loading stuff
+    for (unsigned int i = 0; i<levels.size(); ++i)
+      delete levels[i];
+    for (unsigned int i = 0; i<has_children.size(); ++i)
+      delete has_children[i];
+    levels.resize(0);
+    has_children.resize(0);
+    delete faces;
+    faces = 0;
+
+    ar &levels;
+    ar &faces;
+    ar &has_children;
+
+    // these are the checks that correspond to the last block in the save()
+    // function
+    unsigned int n_cells;
+
+    ar &n_cells;
+
+    AssertThrow(n_cells == tria->n_cells(),
+                ExcMessage("The object being loaded into does not match the triangulation "
+                           "that has been stored previously."));
+  }
+
+  template <int dim, int spacedim>
+  inline
+  types::global_dof_index
+  DoFHandler<dim,spacedim>::n_dofs () const
+  {
+    return number_cache.n_global_dofs;
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  types::global_dof_index
+  DoFHandler<dim,spacedim>::n_dofs (const unsigned int) const
+  {
+    return numbers::invalid_dof_index;
+  }
+
+
+  template <int dim, int spacedim>
+  types::global_dof_index
+  DoFHandler<dim, spacedim>::n_locally_owned_dofs() const
+  {
+    return number_cache.n_locally_owned_dofs;
+  }
+
+
+  template <int dim, int spacedim>
+  const IndexSet &
+  DoFHandler<dim, spacedim>::locally_owned_dofs() const
+  {
+    return number_cache.locally_owned_dofs;
+  }
+
+
+  template <int dim, int spacedim>
+  const std::vector<types::global_dof_index> &
+  DoFHandler<dim, spacedim>::n_locally_owned_dofs_per_processor() const
+  {
+    return number_cache.n_locally_owned_dofs_per_processor;
+  }
+
+
+  template <int dim, int spacedim>
+  const std::vector<IndexSet> &
+  DoFHandler<dim, spacedim>::locally_owned_dofs_per_processor () const
+  {
+    return number_cache.locally_owned_dofs_per_processor;
+  }
+
+
+
+  template<int dim, int spacedim>
+  inline
+  const hp::FECollection<dim,spacedim> &
+  DoFHandler<dim,spacedim>::get_fe () const
+  {
+    Assert (finite_elements != 0,
+            ExcMessage ("No finite element collection is associated with "
+                        "this DoFHandler"));
+    return *finite_elements;
+  }
+
+
+
+  template<int dim, int spacedim>
+  inline
+  const Triangulation<dim,spacedim> &
+  DoFHandler<dim,spacedim>::get_tria () const
+  {
+    return *tria;
+  }
+
+
+
+  template<int dim, int spacedim>
+  inline
+  const Triangulation<dim,spacedim> &
+  DoFHandler<dim,spacedim>::get_triangulation () const
+  {
+    return *tria;
+  }
+
+
+
+  template<int dim, int spacedim>
+  inline
+  DoFHandler<dim, spacedim>::MGVertexDoFs::MGVertexDoFs()
+  {
+    Assert (false, ExcNotImplemented ());
+  }
+
+  template<int dim, int spacedim>
+  inline
+  DoFHandler<dim, spacedim>::MGVertexDoFs::~MGVertexDoFs()
+  {
+    Assert (false, ExcNotImplemented ());
+  }
+
+  template<int dim, int spacedim>
+  inline
+  types::global_dof_index DoFHandler<dim, spacedim>::MGVertexDoFs::get_index (const unsigned int,
+      const unsigned int) const
+  {
+    Assert (false, ExcNotImplemented ());
+    return invalid_dof_index;
+  }
+
+  template<int dim, int spacedim>
+  inline
+  void DoFHandler<dim, spacedim>::MGVertexDoFs::set_index (const unsigned int,
+                                                           const unsigned int,
+                                                           types::global_dof_index)
+  {
+    Assert (false, ExcNotImplemented ());
+  }
+
+
+#endif
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/hp/dof_level.h b/include/deal.II/hp/dof_level.h
new file mode 100644
index 0000000..13d8be8
--- /dev/null
+++ b/include/deal.II/hp/dof_level.h
@@ -0,0 +1,422 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__hp_dof_level_h
+#define dealii__hp_dof_level_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <vector>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace hp
+{
+  template <int, int> class DoFHandler;
+  template <int, int> class FECollection;
+}
+
+
+namespace internal
+{
+  namespace hp
+  {
+    namespace DoFHandler
+    {
+      struct Implementation;
+    }
+  }
+  namespace DoFCellAccessor
+  {
+    struct Implementation;
+  }
+}
+
+
+namespace internal
+{
+  namespace hp
+  {
+    /**
+     * This is the class that stores the degrees of freedom on cells in a hp
+     * hierarchy. Compared to faces and edges, the task here is simple since
+     * each cell can only have a single active finite element index.
+     * Consequently, all we need is one long array with DoF indices and one
+     * array of offsets where each cell's indices start within the array of
+     * indices. This is in contrast to the DoFObjects class where each face or
+     * edge may have more than one associated finite element with
+     * corresponding degrees of freedom.
+     *
+     * The data stored here is represented by three arrays - The @p
+     * active_fe_indices array stores for each cell which finite element is
+     * used on this cell. Since some cells are not active on the current
+     * level, some entries in this array may represent an invalid value. - The
+     * @p dof_indices array stores for each active cell on the current level
+     * the dofs that associated with the <i>interior</i> of the cell, i.e.,
+     * the @p dofs_per_line dofs associated with the line in 1d, and @p
+     * dofs_per_quad and @p dofs_per_hex in 2d and 3d. These numbers are in
+     * general smaller than @p dofs_per_cell. - The @p dof_offsets array
+     * stores, for each cell, the starting point of the dof indices
+     * corresponding to this cell in the @p dof_indices array. This is
+     * analogous to how we store data in compressed row storage for sparse
+     * matrices. For cells that are not active on the current level, we store
+     * an invalid value for the starting index.
+     *
+     * <h3>Compression</h3>
+     *
+     * It is common for the indices stored in @p dof_indices for one cell to
+     * be numbered consecutively. For example, using the standard numbering
+     * (without renumbering DoFs), the quad dofs on the first cell of a mesh
+     * when using a $Q_3$ element will be numbered <tt>12, 13, 14, 15</tt>.
+     * This allows for compression if we only store the first entry and have
+     * some way to mark the DoFs on this object as compressed. Here,
+     * compression means that we know that subsequent DoF indices can be
+     * obtained from the previous ones by just incrementing them by one -- in
+     * other words, we use a variant of doing run-length encoding. The way to
+     * do this is that we use positive FE indices for uncompressed sets of
+     * DoFs and if a set of indices is compressed, then we instead store the
+     * FE index in binary complement (which we can identify by looking at the
+     * sign bit when interpreting the number as a signed one). There are two
+     * functions, compress_data() and uncompress_data() that convert between
+     * the two possible representations.
+     *
+     * Note that compression is not always possible. For example, if one
+     * renumbered the example above using DoFRenumbering::downstream with
+     * $(1,0)^T$ as direction, then they would likely be numbered <tt>12, 14,
+     * 13, 15</tt>, which can not be compressed using run-length encoding.
+     */
+    class DoFLevel
+    {
+    private:
+      /**
+       * The type in which we store the offsets into the dof_indices array.
+       */
+      typedef unsigned int offset_type;
+
+      /**
+       * The type in which we store the active FE index.
+       */
+      typedef unsigned short int active_fe_index_type;
+
+      /**
+       * A signed type that matches the type in which we store the active FE
+       * index. We use this in computing binary complements.
+       */
+      typedef signed short int signed_active_fe_index_type;
+
+      /**
+       * Indices specifying the finite element of hp::FECollection to use for
+       * the different cells on the current level. The vector stores one
+       * element per cell since the active_fe_index is unique for cells.
+       *
+       * If a cell is not active on the level corresponding to the current
+       * object (i.e., it has children on higher levels) then it does not have
+       * an associated fe index and we store an invalid fe index marker
+       * instead.
+       */
+      std::vector<active_fe_index_type> active_fe_indices;
+
+      /**
+       * Store the start index for the degrees of freedom of each object in
+       * the @p dof_indices array. If the cell corresponding to a particular
+       * index in this array is not active on this level, then we do not store
+       * any DoFs for it. In that case, the offset we store here must be an
+       * invalid number and indeed we store
+       * <code>(std::vector<types::global_dof_index>::size_type)(-1)</code>
+       * for it.
+       *
+       * The type we store is then obviously the type the @p dof_indices array
+       * uses for indexing.
+       */
+      std::vector<offset_type> dof_offsets;
+
+      /**
+       * Store the global indices of the degrees of freedom. information. The
+       * dof_offsets field determines where each (active) cell's data is
+       * stored.
+       */
+      std::vector<types::global_dof_index> dof_indices;
+
+      /**
+       * The offsets for each cell of the cache that holds all DoF indices.
+       */
+      std::vector<offset_type> cell_cache_offsets;
+
+      /**
+       * Cache for the DoF indices on cells. The size of this array equals the
+       * sum over all cells of
+       * selected_fe[active_fe_index[cell]].dofs_per_cell.
+       */
+      std::vector<types::global_dof_index> cell_dof_indices_cache;
+
+    public:
+
+      /**
+       * Set the global index of the @p local_index-th degree of freedom
+       * located on the object with number @p obj_index to the value given by
+       * @p global_index. The @p dof_handler argument is used to access the
+       * finite element that is to be used to compute the location where this
+       * data is stored.
+       *
+       * The third argument, @p fe_index, denotes which of the finite elements
+       * associated with this object we shall access. Refer to the general
+       * documentation of the internal::hp::DoFLevel class template for more
+       * information.
+       */
+      void
+      set_dof_index (const unsigned int               obj_index,
+                     const unsigned int               fe_index,
+                     const unsigned int               local_index,
+                     const types::global_dof_index    global_index);
+
+      /**
+       * Return the global index of the @p local_index-th degree of freedom
+       * located on the object with number @p obj_index. The @p dof_handler
+       * argument is used to access the finite element that is to be used to
+       * compute the location where this data is stored.
+       *
+       * The third argument, @p fe_index, denotes which of the finite elements
+       * associated with this object we shall access. Refer to the general
+       * documentation of the internal::hp::DoFLevel class template for more
+       * information.
+       */
+      types::global_dof_index
+      get_dof_index (const unsigned int               obj_index,
+                     const unsigned int               fe_index,
+                     const unsigned int               local_index) const;
+
+      /**
+       * Return the fe_index of the active finite element on this object.
+       */
+      unsigned int
+      active_fe_index (const unsigned int obj_index) const;
+
+      /**
+       * Check whether a given finite element index is used on the present
+       * object or not.
+       */
+      bool
+      fe_index_is_active (const unsigned int               obj_index,
+                          const unsigned int               fe_index) const;
+
+      /**
+       * Set the fe_index of the active finite element on this object.
+       */
+      void
+      set_active_fe_index (const unsigned int obj_index,
+                           const unsigned int fe_index);
+
+      /**
+       * Return a pointer to the beginning of the DoF indices cache for a
+       * given cell.
+       *
+       * @param obj_index The number of the cell we are looking at.
+       * @param dofs_per_cell The number of DoFs per cell for this cell. This
+       * is not used for the hp case but necessary to keep the interface the
+       * same as for the non-hp case.
+       * @return A pointer to the first DoF index for the current cell. The
+       * next dofs_per_cell indices are for the current cell.
+       */
+      const types::global_dof_index *
+      get_cell_cache_start (const unsigned int obj_index,
+                            const unsigned int dofs_per_cell) const;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Read or write the data of this object to or from a stream for the
+       * purpose of serialization
+       */
+      template <class Archive>
+      void serialize(Archive &ar,
+                     const unsigned int version);
+
+    private:
+      /**
+       * Compress the arrays that store dof indices by using a variant of run-
+       * length encoding. See the general documentation of this class for more
+       * information.
+       *
+       * @param fe_collection The object that can tell us how many degrees of
+       * freedom each of the finite elements has that we store in this object.
+       */
+      template <int dim, int spacedim>
+      void compress_data (const dealii::hp::FECollection<dim,spacedim> &fe_collection);
+
+      /**
+       * Uncompress the arrays that store dof indices by using a variant of
+       * run-length encoding. See the general documentation of this class for
+       * more information.
+       *
+       * @param fe_collection The object that can tell us how many degrees of
+       * freedom each of the finite elements has that we store in this object.
+       */
+      template <int dim, int spacedim>
+      void uncompress_data (const dealii::hp::FECollection<dim,spacedim> &fe_collection);
+
+      /**
+       * Make hp::DoFHandler and its auxiliary class a friend since it is the
+       * class that needs to create these data structures.
+       */
+      template <int, int> friend class dealii::hp::DoFHandler;
+      friend struct dealii::internal::hp::DoFHandler::Implementation;
+      friend struct dealii::internal::DoFCellAccessor::Implementation;
+    };
+
+
+    // -------------------- template functions --------------------------------
+
+    inline
+    types::global_dof_index
+    DoFLevel::
+    get_dof_index (const unsigned int                obj_index,
+                   const unsigned int                fe_index,
+                   const unsigned int                local_index) const
+    {
+      (void)fe_index;
+      Assert (obj_index < dof_offsets.size(),
+              ExcIndexRange (obj_index, 0, dof_offsets.size()));
+
+      // make sure we are on an
+      // object for which DoFs have
+      // been allocated at all
+      Assert (dof_offsets[obj_index] != (offset_type)(-1),
+              ExcMessage ("You are trying to access degree of freedom "
+                          "information for an object on which no such "
+                          "information is available"));
+
+      Assert (fe_index == active_fe_indices[obj_index],
+              ExcMessage ("FE index does not match that of the present cell"));
+
+      // see if the dof_indices array has been compressed for this
+      // particular cell
+      if ((signed_active_fe_index_type)active_fe_indices[obj_index]>=0)
+        return dof_indices[dof_offsets[obj_index]+local_index];
+      else
+        return dof_indices[dof_offsets[obj_index]]+local_index;
+    }
+
+
+
+    inline
+    void
+    DoFLevel::
+    set_dof_index (const unsigned int                obj_index,
+                   const unsigned int                fe_index,
+                   const unsigned int                local_index,
+                   const types::global_dof_index     global_index)
+    {
+      (void)fe_index;
+      Assert (obj_index < dof_offsets.size(),
+              ExcIndexRange (obj_index, 0, dof_offsets.size()));
+
+      // make sure we are on an
+      // object for which DoFs have
+      // been allocated at all
+      Assert (dof_offsets[obj_index] != (offset_type)(-1),
+              ExcMessage ("You are trying to access degree of freedom "
+                          "information for an object on which no such "
+                          "information is available"));
+      Assert ((signed_active_fe_index_type)active_fe_indices[obj_index]>=0,
+              ExcMessage ("This function can no longer be called after compressing the dof_indices array"));
+      Assert (fe_index == active_fe_indices[obj_index],
+              ExcMessage ("FE index does not match that of the present cell"));
+      dof_indices[dof_offsets[obj_index]+local_index] = global_index;
+    }
+
+
+
+    inline
+    unsigned int
+    DoFLevel::
+    active_fe_index (const unsigned int obj_index) const
+    {
+      Assert (obj_index < active_fe_indices.size(),
+              ExcIndexRange (obj_index, 0, active_fe_indices.size()));
+
+      if (((signed_active_fe_index_type)active_fe_indices[obj_index]) >= 0)
+        return active_fe_indices[obj_index];
+      else
+        return (active_fe_index_type)~(signed_active_fe_index_type)active_fe_indices[obj_index];
+    }
+
+
+
+    inline
+    bool
+    DoFLevel::
+    fe_index_is_active (const unsigned int                obj_index,
+                        const unsigned int                fe_index) const
+    {
+      return (fe_index == active_fe_index(obj_index));
+    }
+
+
+    inline
+    void
+    DoFLevel::
+    set_active_fe_index (const unsigned int obj_index,
+                         const unsigned int fe_index)
+    {
+      Assert (obj_index < active_fe_indices.size(),
+              ExcIndexRange (obj_index, 0, active_fe_indices.size()));
+
+      active_fe_indices[obj_index] = fe_index;
+    }
+
+
+
+    inline
+    const types::global_dof_index *
+    DoFLevel::get_cell_cache_start (const unsigned int obj_index,
+                                    const unsigned int dofs_per_cell) const
+    {
+      (void)dofs_per_cell;
+      Assert (obj_index < cell_cache_offsets.size(),
+              ExcInternalError());
+      Assert (cell_cache_offsets[obj_index]+dofs_per_cell
+              <=
+              cell_dof_indices_cache.size(),
+              ExcInternalError());
+
+      return &cell_dof_indices_cache[cell_cache_offsets[obj_index]];
+    }
+
+    template <class Archive>
+    inline
+    void
+    DoFLevel::serialize(Archive &ar,
+                        const unsigned int)
+    {
+      ar &this->active_fe_indices;
+      ar &this->cell_cache_offsets;
+      ar &this->cell_dof_indices_cache;
+      ar &this->dof_indices;
+      ar &this->dof_offsets;
+    }
+  } // namespace hp
+
+} // namespace internal
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/hp/fe_collection.h b/include/deal.II/hp/fe_collection.h
new file mode 100644
index 0000000..0e4ec67
--- /dev/null
+++ b/include/deal.II/hp/fe_collection.h
@@ -0,0 +1,622 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_collection_h
+#define dealii__fe_collection_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values_extractors.h>
+#include <deal.II/fe/component_mask.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace hp
+{
+
+  /**
+   * This class acts as a collection of finite element objects used in the
+   * hp::DoFHandler. It is thus to a hp::DoFHandler what a FiniteElement is to
+   * a ::DoFHandler.
+   *
+   * It implements the concepts stated in the
+   * @ref hpcollection
+   * module described in the doxygen documentation.
+   *
+   * In addition to offering access to the elements of the collection, this
+   * class provides access to the maximal number of degrees of freedom per
+   * vertex, line, etc, to allow allocation of as much memory as is necessary
+   * in the worst case when using the finite elements associated with the
+   * cells of a triangulation.
+   *
+   * This class has not yet been implemented for the use in the codimension
+   * one case (<tt>spacedim != dim </tt>).
+   *
+   * @ingroup hp hpcollection
+   *
+   * @author Wolfgang Bangerth, 2003
+   */
+  template <int dim, int spacedim=dim>
+  class FECollection : public Subscriptor
+  {
+  public:
+    /**
+     * Default constructor. Leads to an empty collection that can later be
+     * filled using push_back().
+     */
+    FECollection ();
+
+    /**
+     * Conversion constructor. This constructor creates a FECollection from a
+     * single finite element. More finite element objects can be added with
+     * push_back(), if desired, though it would probably be clearer to add all
+     * mappings the same way.
+     */
+    explicit FECollection (const FiniteElement<dim,spacedim> &fe);
+
+    /**
+     * Constructor. This constructor creates a FECollection from two finite
+     * elements.
+     */
+    FECollection (const FiniteElement<dim,spacedim> &fe1,
+                  const FiniteElement<dim,spacedim> &fe2);
+
+    /**
+     * Constructor. This constructor creates a FECollection from three finite
+     * elements.
+     */
+    FECollection (const FiniteElement<dim,spacedim> &fe1,
+                  const FiniteElement<dim,spacedim> &fe2,
+                  const FiniteElement<dim,spacedim> &fe3);
+
+    /**
+     * Constructor. This constructor creates a FECollection from four finite
+     * elements.
+     */
+    FECollection (const FiniteElement<dim,spacedim> &fe1,
+                  const FiniteElement<dim,spacedim> &fe2,
+                  const FiniteElement<dim,spacedim> &fe3,
+                  const FiniteElement<dim,spacedim> &fe4);
+
+    /**
+     * Constructor. This constructor creates a FECollection from five finite
+     * elements.
+     */
+    FECollection (const FiniteElement<dim,spacedim> &fe1,
+                  const FiniteElement<dim,spacedim> &fe2,
+                  const FiniteElement<dim,spacedim> &fe3,
+                  const FiniteElement<dim,spacedim> &fe4,
+                  const FiniteElement<dim,spacedim> &fe5);
+
+    /**
+     * Constructor. Same as above but for any number of elements. Pointers to
+     * the elements are passed in a vector to this constructor. As above, the
+     * finite element objects pointed to by the argument are not actually used
+     * other than to create copies internally. Consequently, you can delete
+     * these pointers immediately again after calling this constructor.
+     */
+    FECollection (const std::vector<const FiniteElement<dim,spacedim>*> &fes);
+
+    /**
+     * Copy constructor.
+     */
+    FECollection (const FECollection<dim,spacedim> &fe_collection);
+
+    /**
+     * Add a finite element. This function generates a copy of the given
+     * element, i.e. you can do things like <tt>push_back(FE_Q<dim>(1));</tt>.
+     * The internal copy is later destroyed by this object upon destruction of
+     * the entire collection.
+     *
+     * When a new element is added, it needs to have the same number of vector
+     * components as all other elements already in the collection.
+     */
+    void push_back (const FiniteElement<dim,spacedim> &new_fe);
+
+    /**
+     * Get a reference to the given element in this collection.
+     *
+     * @pre @p index must be between zero and the number of elements of the
+     * collection.
+     */
+    const FiniteElement<dim,spacedim> &
+    operator[] (const unsigned int index) const;
+
+    /**
+     * Return the number of finite element objects stored in this collection.
+     */
+    unsigned int size () const;
+
+    /**
+     * Return the number of vector components of the finite elements in this
+     * collection.  This number must be the same for all elements in the
+     * collection.
+     *
+     * This function calls FiniteElement::n_components.  See
+     * @ref GlossComponent "the glossary"
+     * for more information.
+     */
+    unsigned int n_components () const;
+
+    /**
+     * Return the number of vector blocks of the finite elements in this
+     * collection. While this class ensures that all elements stored in it
+     * have the same number of vector components, there is no such guarantees
+     * for the number of blocks each element is made up of (an element may
+     * have fewer blocks than vector components; see
+     * @ref GlossBlock "the glossary"
+     * for more information). For example, you may have an FECollection object
+     * that stores one copy of an FESystem with <code>dim</code> FE_Q objects
+     * and one copy of an FE_RaviartThomas element. Both have <code>dim</code>
+     * vector components but while the former has <code>dim</code> blocks the
+     * latter has only one. Consequently, this function will throw an
+     * assertion if the number of blocks is not the same for all elements. If
+     * they are the same, this function returns the result of
+     * FiniteElement::n_blocks().
+     */
+    unsigned int n_blocks () const;
+
+    /**
+     * Return the maximal number of degrees of freedom per vertex over all
+     * elements of this collection.
+     */
+    unsigned int max_dofs_per_vertex () const;
+
+    /**
+     * Return the maximal number of degrees of freedom per line over all
+     * elements of this collection.
+     */
+    unsigned int max_dofs_per_line () const;
+
+    /**
+     * Return the maximal number of degrees of freedom per quad over all
+     * elements of this collection.
+     */
+    unsigned int max_dofs_per_quad () const;
+
+    /**
+     * Return the maximal number of degrees of freedom per hex over all
+     * elements of this collection.
+     */
+    unsigned int max_dofs_per_hex () const;
+
+    /**
+     * Return the maximal number of degrees of freedom per face over all
+     * elements of this collection.
+     */
+    unsigned int max_dofs_per_face () const;
+
+    /**
+     * Return the maximal number of degrees of freedom per cell over all
+     * elements of this collection.
+     */
+    unsigned int max_dofs_per_cell () const;
+
+    /**
+     * Return an estimate for the memory allocated for this object.
+     */
+    std::size_t memory_consumption () const;
+
+
+    /**
+     * Return whether all elements in this collection implement the hanging
+     * node constraints in the new way, which has to be used to make elements
+     * "hp compatible". If this is not the case, the function returns false,
+     * which implies, that at least one element in the FECollection does not
+     * support the new face interface constraints. On the other hand, if this
+     * method does return true, this does not imply that the hp method will
+     * work!
+     *
+     * This behaviour is related to the fact, that FiniteElement classes,
+     * which provide the new style hanging node constraints might still not
+     * provide them for all possible cases. If FE_Q and FE_RaviartThomas
+     * elements are included in the FECollection and both properly implement
+     * the get_face_interpolation_matrix method, this method will return true.
+     * But the get_face_interpolation_matrix might still fail to find an
+     * interpolation matrix between these two elements.
+     */
+    bool hp_constraints_are_implemented () const;
+
+    /**
+     * Try to find a least dominant finite element inside this FECollection
+     * which dominates other finite elements provided as fe_indices in @p fes
+     * . For example, if FECollection consists of {Q1,Q2,Q3,Q4} and we are
+     * looking for the least dominant FE for Q3 and Q4 (@p fes is {2,3}), then
+     * the answer is Q3 and therefore this function will return its index in
+     * FECollection, namely 2.
+     *
+     * For the purpose of this function by domination we consider either
+     * this_element_dominate or either_element_can_dominate ; therefore the
+     * element can dominate itself. Thus if FECollection contains
+     * {Q1,Q2,Q4,Q3} and @p fes = {3}, the function returns 3.
+     *
+     * If we were not able to find a finite element, the function returns
+     * numbers::invalid_unsigned_int .
+     *
+     * Note that for the cases like when FECollection consists of {FE_Nothing
+     * x FE_Nothing, Q1xQ2, Q2xQ1} with @p fes = {1}, the function will not
+     * find the most dominating element as the default behavior of FE_Nothing
+     * is to return FiniteElementDomination::no_requirements when comparing
+     * for face domination. This, therefore, can't be considered as a
+     * dominating element in the sense described above .
+     */
+    unsigned int
+    find_least_face_dominating_fe (const std::set<unsigned int> &fes) const;
+
+    /**
+     * Return a component mask with as many elements as this object has vector
+     * components and of which exactly the one component is true that
+     * corresponds to the given argument.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @param scalar An object that represents a single scalar vector
+     * component of this finite element.
+     * @return A component mask that is false in all components except for the
+     * one that corresponds to the argument.
+     */
+    ComponentMask
+    component_mask (const FEValuesExtractors::Scalar &scalar) const;
+
+    /**
+     * Return a component mask with as many elements as this object has vector
+     * components and of which exactly the <code>dim</code> components are
+     * true that correspond to the given argument.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @param vector An object that represents dim vector components of this
+     * finite element.
+     * @return A component mask that is false in all components except for the
+     * ones that corresponds to the argument.
+     */
+    ComponentMask
+    component_mask (const FEValuesExtractors::Vector &vector) const;
+
+    /**
+     * Return a component mask with as many elements as this object has vector
+     * components and of which exactly the <code>dim*(dim+1)/2</code>
+     * components are true that correspond to the given argument.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @param sym_tensor An object that represents dim*(dim+1)/2 components of
+     * this finite element that are jointly to be interpreted as forming a
+     * symmetric tensor.
+     * @return A component mask that is false in all components except for the
+     * ones that corresponds to the argument.
+     */
+    ComponentMask
+    component_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const;
+
+    /**
+     * Given a block mask (see
+     * @ref GlossBlockMask "this glossary entry"
+     * ), produce a component mask (see
+     * @ref GlossComponentMask "this glossary entry"
+     * ) that represents the components that correspond to the blocks selected
+     * in the input argument. This is essentially a conversion operator from
+     * BlockMask to ComponentMask.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @param block_mask The mask that selects individual blocks of the finite
+     * element
+     * @return A mask that selects those components corresponding to the
+     * selected blocks of the input argument.
+     */
+    ComponentMask
+    component_mask (const BlockMask &block_mask) const;
+
+    /**
+     * Return a block mask with as many elements as this object has blocks and
+     * of which exactly the one component is true that corresponds to the
+     * given argument. See
+     * @ref GlossBlockMask "the glossary"
+     * for more information.
+     *
+     * @note This function will only succeed if the scalar referenced by the
+     * argument encompasses a complete block. In other words, if, for example,
+     * you pass an extractor for the single $x$ velocity and this object
+     * represents an FE_RaviartThomas object, then the single scalar object
+     * you selected is part of a larger block and consequently there is no
+     * block mask that would represent it. The function will then produce an
+     * exception.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @param scalar An object that represents a single scalar vector
+     * component of this finite element.
+     * @return A component mask that is false in all components except for the
+     * one that corresponds to the argument.
+     */
+    BlockMask
+    block_mask (const FEValuesExtractors::Scalar &scalar) const;
+
+    /**
+     * Return a component mask with as many elements as this object has vector
+     * components and of which exactly the <code>dim</code> components are
+     * true that correspond to the given argument. See
+     * @ref GlossBlockMask "the glossary"
+     * for more information.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @note The same caveat applies as to the version of the function above:
+     * The extractor object passed as argument must be so that it corresponds
+     * to full blocks and does not split blocks of this element.
+     *
+     * @param vector An object that represents dim vector components of this
+     * finite element.
+     * @return A component mask that is false in all components except for the
+     * ones that corresponds to the argument.
+     */
+    BlockMask
+    block_mask (const FEValuesExtractors::Vector &vector) const;
+
+    /**
+     * Return a component mask with as many elements as this object has vector
+     * components and of which exactly the <code>dim*(dim+1)/2</code>
+     * components are true that correspond to the given argument. See
+     * @ref GlossBlockMask "the glossary"
+     * for more information.
+     *
+     * @note The same caveat applies as to the version of the function above:
+     * The extractor object passed as argument must be so that it corresponds
+     * to full blocks and does not split blocks of this element.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @param sym_tensor An object that represents dim*(dim+1)/2 components of
+     * this finite element that are jointly to be interpreted as forming a
+     * symmetric tensor.
+     * @return A component mask that is false in all components except for the
+     * ones that corresponds to the argument.
+     */
+    BlockMask
+    block_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const;
+
+    /**
+     * Given a component mask (see
+     * @ref GlossComponentMask "this glossary entry"
+     * ), produce a block mask (see
+     * @ref GlossBlockMask "this glossary entry"
+     * ) that represents the blocks that correspond to the components selected
+     * in the input argument. This is essentially a conversion operator from
+     * ComponentMask to BlockMask.
+     *
+     * @note This function will only succeed if the components referenced by
+     * the argument encompasses complete blocks. In other words, if, for
+     * example, you pass an component mask for the single $x$ velocity and
+     * this object represents an FE_RaviartThomas object, then the single
+     * component you selected is part of a larger block and consequently there
+     * is no block mask that would represent it. The function will then
+     * produce an exception.
+     *
+     * @note This function is the equivalent of
+     * FiniteElement::component_mask() with the same arguments. It verifies
+     * that it gets the same result from every one of the elements that are
+     * stored in this FECollection. If this is not the case, it throws an
+     * exception.
+     *
+     * @param component_mask The mask that selects individual components of
+     * the finite element
+     * @return A mask that selects those blocks corresponding to the selected
+     * blocks of the input argument.
+     */
+    BlockMask
+    block_mask (const ComponentMask &component_mask) const;
+
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcNoFiniteElements);
+
+  private:
+    /**
+     * Array of pointers to the finite elements stored by this collection.
+     */
+    std::vector<std_cxx11::shared_ptr<const FiniteElement<dim,spacedim> > > finite_elements;
+  };
+
+
+
+  /* --------------- inline functions ------------------- */
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  FECollection<dim,spacedim>::size () const
+  {
+    return finite_elements.size();
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  unsigned int
+  FECollection<dim,spacedim>::n_components () const
+  {
+    Assert (finite_elements.size () > 0, ExcNoFiniteElements());
+
+    // note that there is no need
+    // here to enforce that indeed
+    // all elements have the same
+    // number of components since we
+    // have already done this when
+    // adding a new element to the
+    // collection.
+
+    return finite_elements[0]->n_components ();
+  }
+
+
+  template <int dim, int spacedim>
+  inline
+  const FiniteElement<dim,spacedim> &
+  FECollection<dim,spacedim>::operator[] (const unsigned int index) const
+  {
+    Assert (index < finite_elements.size(),
+            ExcIndexRange (index, 0, finite_elements.size()));
+    return *finite_elements[index];
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::max_dofs_per_vertex () const
+  {
+    Assert (finite_elements.size() > 0, ExcNoFiniteElements());
+
+    unsigned int max = 0;
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      if (finite_elements[i]->dofs_per_vertex > max)
+        max = finite_elements[i]->dofs_per_vertex;
+
+    return max;
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::max_dofs_per_line () const
+  {
+    Assert (finite_elements.size() > 0, ExcNoFiniteElements());
+
+    unsigned int max = 0;
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      if (finite_elements[i]->dofs_per_line > max)
+        max = finite_elements[i]->dofs_per_line;
+
+    return max;
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::max_dofs_per_quad () const
+  {
+    Assert (finite_elements.size() > 0, ExcNoFiniteElements());
+
+    unsigned int max = 0;
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      if (finite_elements[i]->dofs_per_quad > max)
+        max = finite_elements[i]->dofs_per_quad;
+
+    return max;
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::max_dofs_per_hex () const
+  {
+    Assert (finite_elements.size() > 0, ExcNoFiniteElements());
+
+    unsigned int max = 0;
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      if (finite_elements[i]->dofs_per_hex > max)
+        max = finite_elements[i]->dofs_per_hex;
+
+    return max;
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::max_dofs_per_face () const
+  {
+    Assert (finite_elements.size() > 0, ExcNoFiniteElements());
+
+    unsigned int max = 0;
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      if (finite_elements[i]->dofs_per_face > max)
+        max = finite_elements[i]->dofs_per_face;
+
+    return max;
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::max_dofs_per_cell () const
+  {
+    Assert (finite_elements.size() > 0, ExcNoFiniteElements());
+
+    unsigned int max = 0;
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      if (finite_elements[i]->dofs_per_cell > max)
+        max = finite_elements[i]->dofs_per_cell;
+
+    return max;
+  }
+
+
+  template <int dim, int spacedim>
+  bool
+  FECollection<dim,spacedim>::hp_constraints_are_implemented () const
+  {
+    Assert (finite_elements.size() > 0, ExcNoFiniteElements());
+
+    bool hp_constraints = true;
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      hp_constraints = hp_constraints &&
+                       finite_elements[i]->hp_constraints_are_implemented();
+
+    return hp_constraints;
+  }
+
+
+} // namespace hp
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/hp/fe_values.h b/include/deal.II/hp/fe_values.h
new file mode 100644
index 0000000..abe54dc
--- /dev/null
+++ b/include/deal.II/hp/fe_values.h
@@ -0,0 +1,648 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__hp_fe_values_h
+#define dealii__hp_fe_values_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <map>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class FiniteElement;
+
+
+
+namespace internal
+{
+  namespace hp
+  {
+    /**
+     * Base class for the <tt>hp::FE*Values</tt> classes, storing the data
+     * that is common to them. The main task of this class is to provide a
+     * table where for every combination of finite element, mapping, and
+     * quadrature object from their corresponding collection objects there is
+     * a matching ::FEValues, ::FEFaceValues, or ::FESubfaceValues object. To
+     * make things more efficient, however, these FE*Values objects are only
+     * created once requested (lazy allocation).
+     *
+     * The first template parameter denotes the space dimension we are in, the
+     * second the dimensionality of the object that we integrate on, i.e. for
+     * usual @p hp::FEValues it is equal to the first one, while for face
+     * integration it is one less. The third template parameter indicates the
+     * type of underlying non-hp FE*Values base type, i.e. it could either be
+     * dealii::FEValues, dealii::FEFaceValues, or dealii::FESubfaceValues.
+     *
+     * @ingroup hp
+     *
+     * @author Wolfgang Bangerth, 2003
+     */
+    template <int dim, int q_dim, class FEValuesType>
+    class FEValuesBase
+    {
+    public:
+      /**
+       * Constructor. Set the fields of this class to the values indicated by
+       * the parameters to the constructor.
+       */
+      FEValuesBase (const dealii::hp::MappingCollection<dim,FEValuesType::space_dimension> &mapping_collection,
+                    const dealii::hp::FECollection<dim,FEValuesType::space_dimension>      &fe_collection,
+                    const dealii::hp::QCollection<q_dim>     &q_collection,
+                    const dealii::UpdateFlags             update_flags);
+      /**
+       * Constructor. This constructor is equivalent to the other one except
+       * that it makes the object use a $Q_1$ mapping (i.e., an object of type
+       * MappingQGeneric(1)) implicitly.
+       */
+      FEValuesBase (const dealii::hp::FECollection<dim,FEValuesType::space_dimension> &fe_collection,
+                    const dealii::hp::QCollection<q_dim> &q_collection,
+                    const UpdateFlags         update_flags);
+
+      /**
+       * Get a reference to the collection of finite element objects used
+       * here.
+       */
+      const dealii::hp::FECollection<dim,FEValuesType::space_dimension> &
+      get_fe_collection () const;
+
+      /**
+       * Get a reference to the collection of mapping objects used here.
+       */
+      const dealii::hp::MappingCollection<dim,FEValuesType::space_dimension> &
+      get_mapping_collection () const;
+
+      /**
+       * Get a reference to the collection of quadrature objects used here.
+       */
+      const dealii::hp::QCollection<q_dim> &
+      get_quadrature_collection () const;
+
+      /**
+       * Get the underlying update flags.
+       */
+      UpdateFlags get_update_flags() const;
+
+      /**
+       * Return a reference to the @p FEValues object selected by the last
+       * call to select_fe_values(). select_fe_values() in turn is called when
+       * you called the @p reinit function of the <tt>hp::FE*Values</tt> class
+       * the last time.
+       */
+      const FEValuesType &get_present_fe_values () const;
+
+    protected:
+
+      /**
+       * Select a FEValues object suitable for the given FE, quadrature, and
+       * mapping indices. If such an object doesn't yet exist, create one.
+       *
+       * The function returns a writable reference so that derived classes can
+       * also reinit() the selected FEValues object.
+       */
+      FEValuesType &
+      select_fe_values (const unsigned int fe_index,
+                        const unsigned int mapping_index,
+                        const unsigned int q_index);
+
+    protected:
+      /**
+       * A pointer to the collection of finite elements to be used.
+       */
+      const SmartPointer<const dealii::hp::FECollection<dim,FEValuesType::space_dimension>,
+            FEValuesBase<dim,q_dim,FEValuesType> > fe_collection;
+
+      /**
+       * A pointer to the collection of mappings to be used.
+       */
+      const SmartPointer<const dealii::hp::MappingCollection<dim, FEValuesType::space_dimension>,
+            FEValuesBase<dim,q_dim,FEValuesType> > mapping_collection;
+
+      /**
+       * Copy of the quadrature collection object provided to the constructor.
+       */
+      const dealii::hp::QCollection<q_dim> q_collection;
+
+    private:
+      /**
+       * A table in which we store pointers to fe_values objects for different
+       * finite element, mapping, and quadrature objects from our collection.
+       * The first index indicates the index of the finite element within the
+       * fe_collection, the second the index of the mapping within the mapping
+       * collection, and the last one the index of the quadrature formula
+       * within the q_collection.
+       *
+       * Initially, all entries have zero pointers, and we will allocate them
+       * lazily as needed in select_fe_values().
+       */
+      dealii::Table<3,std_cxx11::shared_ptr<FEValuesType> > fe_values_table;
+
+      /**
+       * Set of indices pointing at the fe_values object selected last time
+       * the select_fe_value() function was called.
+       */
+      TableIndices<3> present_fe_values_index;
+
+      /**
+       * Values of the update flags as given to the constructor.
+       */
+      const UpdateFlags update_flags;
+    };
+
+  }
+
+}
+
+
+namespace hp
+{
+
+  /**
+   * An hp equivalent of the ::FEValues class. See the step-27 tutorial
+   * program for examples of use.
+   *
+   * The idea of this class is as follows: when one assembled matrices in the
+   * hp finite element method, there may be different finite elements on
+   * different cells, and consequently one may also want to use different
+   * quadrature formulas for different cells. On the other hand, the
+   * ::FEValues efficiently handles pre-evaluating whatever information is
+   * necessary for a single finite element and quadrature object. This class
+   * brings these concepts together: it provides a "collection" of ::FEValues
+   * objects.
+   *
+   * Upon construction, one passes not one finite element and quadrature
+   * object (and possible a mapping), but a whole collection of type
+   * hp::FECollection and hp::QCollection. Later on, when one sits on a
+   * concrete cell, one would call the reinit() function for this particular
+   * cell, just as one does for a regular ::FEValues object. The difference is
+   * that this time, the reinit() function looks up the active_fe_index of
+   * that cell, if necessary creates a ::FEValues object that matches the
+   * finite element and quadrature formulas with that particular index in
+   * their collections, and then re-initializes it for the current cell. The
+   * ::FEValues object that then fits the finite element and quadrature
+   * formula for the current cell can then be accessed using the
+   * get_present_fe_values() function, and one would work with it just like
+   * with any ::FEValues object for non-hp DoF handler objects.
+   *
+   * The reinit() functions have additional arguments with default values. If
+   * not specified, the function takes the index into the hp::FECollection,
+   * hp::QCollection, and hp::MappingCollection objects from the
+   * active_fe_index of the cell, as explained above. However, one can also
+   * select different indices for a current cell. For example, by specifying a
+   * different index into the hp::QCollection class, one does not need to sort
+   * the quadrature objects in the quadrature collection so that they match
+   * one-to-one the order of finite element objects in the FE collection (even
+   * though choosing such an order is certainly convenient).
+   *
+   * Note that ::FEValues objects are created on the fly, i.e. only as they
+   * are needed. This ensures that we do not create objects for every
+   * combination of finite element, quadrature formula and mapping, but only
+   * those that will actually be needed.
+   *
+   * This class has not yet been implemented for the use in the codimension
+   * one case (<tt>spacedim != dim </tt>).
+   *
+   * @ingroup hp hpcollection
+   * @author Wolfgang Bangerth, 2003
+   */
+  template <int dim, int spacedim=dim>
+  class FEValues : public dealii::internal::hp::FEValuesBase<dim,dim,dealii::FEValues<dim,spacedim> >
+  {
+  public:
+
+    static const unsigned int dimension = dim;
+
+    static const unsigned int space_dimension = spacedim;
+
+    /**
+     * Constructor. Initialize this object with the given parameters.
+     *
+     * The finite element collection parameter is actually ignored, but is in
+     * the signature of this function to make it compatible with the signature
+     * of the respective constructor of the usual FEValues object, with the
+     * respective parameter in that function also being the return value of
+     * the DoFHandler::get_fe() function.
+     */
+    FEValues (const dealii::hp::MappingCollection<dim,spacedim> &mapping_collection,
+              const dealii::hp::FECollection<dim,spacedim>  &fe_collection,
+              const dealii::hp::QCollection<dim>       &q_collection,
+              const UpdateFlags             update_flags);
+
+
+    /**
+     * Constructor. This constructor is equivalent to the other one except
+     * that it makes the object use a $Q_1$ mapping (i.e., an object of type
+     * MappingQGeneric(1)) implicitly.
+     *
+     * The finite element collection parameter is actually ignored, but is in
+     * the signature of this function to make it compatible with the signature
+     * of the respective constructor of the usual FEValues object, with the
+     * respective parameter in that function also being the return value of
+     * the DoFHandler::get_fe() function.
+     */
+    FEValues (const hp::FECollection<dim,spacedim> &fe_collection,
+              const hp::QCollection<dim>      &q_collection,
+              const UpdateFlags            update_flags);
+
+
+    /**
+     * Reinitialize the object for the given cell.
+     *
+     * After the call, you can get an FEValues object using the
+     * get_present_fe_values() function that corresponds to the present cell.
+     * For this FEValues object, we use the additional arguments described
+     * below to determine which finite element, mapping, and quadrature
+     * formula to use. They are order in such a way that the arguments one may
+     * want to change most frequently come first. The rules for these
+     * arguments are as follows:
+     *
+     * If the @p fe_index argument to this function is left at its default
+     * value, then we use that finite element within the hp::FECollection
+     * passed to the constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>. Consequently, the
+     * hp::FECollection argument given to this object should really be the
+     * same as that used in the construction of the hp::DofHandler associated
+     * with the present cell. On the other hand, if a value is given for this
+     * argument, it overrides the choice of
+     * <code>cell-@>active_fe_index()</code>.
+     *
+     * If the @p q_index argument is left at its default value, then we use
+     * that quadrature formula within the hp::QCollection passed to the
+     * constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>, i.e. the same index as that of
+     * the finite element. In this case, there should be a corresponding
+     * quadrature formula for each finite element in the hp::FECollection. As
+     * a special case, if the quadrature collection contains only a single
+     * element (a frequent case if one wants to use the same quadrature object
+     * for all finite elements in an hp discretization, even if that may not
+     * be the most efficient), then this single quadrature is used unless a
+     * different value for this argument is specified. On the other hand, if a
+     * value is given for this argument, it overrides the choice of
+     * <code>cell-@>active_fe_index()</code> or the choice for the single
+     * quadrature.
+     *
+     * If the @p mapping_index argument is left at its default value, then we
+     * use that mapping object within the hp::MappingCollection passed to the
+     * constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>, i.e. the same index as that of
+     * the finite element. As above, if the mapping collection contains only a
+     * single element (a frequent case if one wants to use a $Q_1$ mapping for
+     * all finite elements in an hp discretization), then this single mapping
+     * is used unless a different value for this argument is specified.
+     */
+    template <typename DoFHandlerType, bool lda>
+    void
+    reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,lda> > cell,
+            const unsigned int q_index       = numbers::invalid_unsigned_int,
+            const unsigned int mapping_index = numbers::invalid_unsigned_int,
+            const unsigned int fe_index      = numbers::invalid_unsigned_int);
+
+    /**
+     * Like the previous function, but for non-hp iterators. The reason this
+     * (and the other non-hp iterator) function exists is so that one can use
+     * hp::FEValues not only for hp::DoFhandler objects, but for all sorts of
+     * DoFHandler objects, and triangulations not associated with DoFHandlers
+     * in general.
+     *
+     * Since <code>cell-@>active_fe_index()</code> doesn't make sense for
+     * triangulation iterators, this function chooses the zero-th finite
+     * element, mapping, and quadrature object from the relevant constructions
+     * passed to the constructor of this object. The only exception is if you
+     * specify a value different from the default value for any of these last
+     * three arguments.
+     */
+    void
+    reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+            const unsigned int q_index       = numbers::invalid_unsigned_int,
+            const unsigned int mapping_index = numbers::invalid_unsigned_int,
+            const unsigned int fe_index      = numbers::invalid_unsigned_int);
+
+
+  };
+
+
+
+  /**
+   * This is the equivalent of the hp::FEValues class but for face
+   * integrations, i.e. it is to hp::FEValues what ::FEFaceValues is to
+   * ::FEValues.
+   *
+   * The same comments apply as in the documentation of the hp::FEValues
+   * class. However, it is important to note that it is here more common that
+   * one would want to explicitly specify an index to a particular quadrature
+   * formula in the reinit() functions. This is because the default index
+   * corresponds to the finite element index on the current function. On the
+   * other hand, integration on faces will typically have to happen with a
+   * quadrature formula that is adjusted to the finite elements used on both
+   * sides of a face. If one sorts the elements of the hp::FECollection with
+   * ascending polynomial degree, and matches these finite elements with
+   * corresponding quadrature formulas in the hp::QCollection passed to the
+   * constructor, then the quadrature index passed to the reinit() function
+   * should typically be something like <code>std::max
+   * (cell-@>active_fe_index(), neighbor-@>active_fe_index()</code> to ensure
+   * that a quadrature formula is chosen that is sufficiently accurate for
+   * <em>both</em> finite elements.
+   *
+   * @ingroup hp hpcollection
+   * @author Wolfgang Bangerth, 2003
+   */
+  template <int dim, int spacedim=dim>
+  class FEFaceValues : public dealii::internal::hp::FEValuesBase<dim,dim-1,dealii::FEFaceValues<dim,spacedim> >
+  {
+  public:
+    /**
+     * Constructor. Initialize this object with the given parameters.
+     *
+     * The finite element collection parameter is actually ignored, but is in
+     * the signature of this function to make it compatible with the signature
+     * of the respective constructor of the usual FEValues object, with the
+     * respective parameter in that function also being the return value of
+     * the <tt>DoFHandler::get_fe()</tt> function.
+     */
+    FEFaceValues (const hp::MappingCollection<dim,spacedim> &mapping_collection,
+                  const hp::FECollection<dim,spacedim>  &fe_collection,
+                  const hp::QCollection<dim-1>     &q_collection,
+                  const UpdateFlags             update_flags);
+
+
+    /**
+     * Constructor. This constructor is equivalent to the other one except
+     * that it makes the object use a $Q_1$ mapping (i.e., an object of type
+     * MappingQGeneric(1)) implicitly.
+     *
+     * The finite element collection parameter is actually ignored, but is in
+     * the signature of this function to make it compatible with the signature
+     * of the respective constructor of the usual FEValues object, with the
+     * respective parameter in that function also being the return value of
+     * the <tt>DoFHandler::get_fe()</tt> function.
+     */
+    FEFaceValues (const hp::FECollection<dim,spacedim>  &fe_collection,
+                  const hp::QCollection<dim-1> &q_collection,
+                  const UpdateFlags             update_flags);
+
+    /**
+     * Reinitialize the object for the given cell and face.
+     *
+     * After the call, you can get an FEFaceValues object using the
+     * get_present_fe_values() function that corresponds to the present cell.
+     * For this FEFaceValues object, we use the additional arguments described
+     * below to determine which finite element, mapping, and quadrature
+     * formula to use. They are order in such a way that the arguments one may
+     * want to change most frequently come first. The rules for these
+     * arguments are as follows:
+     *
+     * If the @p fe_index argument to this function is left at its default
+     * value, then we use that finite element within the hp::FECollection
+     * passed to the constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>. Consequently, the
+     * hp::FECollection argument given to this object should really be the
+     * same as that used in the construction of the hp::DofHandler associated
+     * with the present cell. On the other hand, if a value is given for this
+     * argument, it overrides the choice of
+     * <code>cell-@>active_fe_index()</code>.
+     *
+     * If the @p q_index argument is left at its default value, then we use
+     * that quadrature formula within the hp::QCollection passed to the
+     * constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>, i.e. the same index as that of
+     * the finite element. In this case, there should be a corresponding
+     * quadrature formula for each finite element in the hp::FECollection. As
+     * a special case, if the quadrature collection contains only a single
+     * element (a frequent case if one wants to use the same quadrature object
+     * for all finite elements in an hp discretization, even if that may not
+     * be the most efficient), then this single quadrature is used unless a
+     * different value for this argument is specified. On the other hand, if a
+     * value is given for this argument, it overrides the choice of
+     * <code>cell-@>active_fe_index()</code> or the choice for the single
+     * quadrature.
+     *
+     * If the @p mapping_index argument is left at its default value, then we
+     * use that mapping object within the hp::MappingCollection passed to the
+     * constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>, i.e. the same index as that of
+     * the finite element. As above, if the mapping collection contains only a
+     * single element (a frequent case if one wants to use a $Q_1$ mapping for
+     * all finite elements in an hp discretization), then this single mapping
+     * is used unless a different value for this argument is specified.
+     */
+    template <typename DoFHandlerType, bool lda>
+    void
+    reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,lda> > cell,
+            const unsigned int face_no,
+            const unsigned int q_index       = numbers::invalid_unsigned_int,
+            const unsigned int mapping_index = numbers::invalid_unsigned_int,
+            const unsigned int fe_index      = numbers::invalid_unsigned_int);
+
+    /**
+     * Like the previous function, but for non-hp iterators. The reason this
+     * (and the other non-hp iterator) function exists is so that one can use
+     * hp::FEValues not only for hp::DoFhandler objects, but for all sorts of
+     * DoFHandler objects, and triangulations not associated with DoFHandlers
+     * in general.
+     *
+     * Since <code>cell-@>active_fe_index()</code> doesn't make sense for
+     * triangulation iterators, this function chooses the zero-th finite
+     * element, mapping, and quadrature object from the relevant constructions
+     * passed to the constructor of this object. The only exception is if you
+     * specify a value different from the default value for any of these last
+     * three arguments.
+     */
+    void
+    reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+            const unsigned int face_no,
+            const unsigned int q_index       = numbers::invalid_unsigned_int,
+            const unsigned int mapping_index = numbers::invalid_unsigned_int,
+            const unsigned int fe_index      = numbers::invalid_unsigned_int);
+  };
+
+
+
+  /**
+   * This class implements for subfaces what hp::FEFaceValues does for faces.
+   * See there for further documentation.
+   *
+   * @ingroup hp hpcollection
+   * @author Wolfgang Bangerth, 2003
+   */
+  template <int dim, int spacedim=dim>
+  class FESubfaceValues : public dealii::internal::hp::FEValuesBase<dim,dim-1,dealii::FESubfaceValues<dim,spacedim> >
+  {
+  public:
+    /**
+     * Constructor. Initialize this object with the given parameters.
+     *
+     * The finite element collection parameter is actually ignored, but is in
+     * the signature of this function to make it compatible with the signature
+     * of the respective constructor of the usual FEValues object, with the
+     * respective parameter in that function also being the return value of
+     * the <tt>DoFHandler::get_fe()</tt> function.
+     */
+    FESubfaceValues (const hp::MappingCollection<dim,spacedim> &mapping_collection,
+                     const hp::FECollection<dim,spacedim>  &fe_collection,
+                     const hp::QCollection<dim-1>     &q_collection,
+                     const UpdateFlags             update_flags);
+
+
+    /**
+     * Constructor. This constructor is equivalent to the other one except
+     * that it makes the object use a $Q_1$ mapping (i.e., an object of type
+     * MappingQGeneric(1)) implicitly.
+     *
+     * The finite element collection parameter is actually ignored, but is in
+     * the signature of this function to make it compatible with the signature
+     * of the respective constructor of the usual FEValues object, with the
+     * respective parameter in that function also being the return value of
+     * the <tt>DoFHandler::get_fe()</tt> function.
+     */
+    FESubfaceValues (const hp::FECollection<dim,spacedim> &fe_collection,
+                     const hp::QCollection<dim-1>    &q_collection,
+                     const UpdateFlags            update_flags);
+
+    /**
+     * Reinitialize the object for the given cell, face, and subface.
+     *
+     * After the call, you can get an FESubfaceValues object using the
+     * get_present_fe_values() function that corresponds to the present cell.
+     * For this FESubfaceValues object, we use the additional arguments
+     * described below to determine which finite element, mapping, and
+     * quadrature formula to use. They are order in such a way that the
+     * arguments one may want to change most frequently come first. The rules
+     * for these arguments are as follows:
+     *
+     * If the @p q_index argument is left at its default value, then we use
+     * that quadrature formula within the hp::QCollection passed to the
+     * constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>, i.e. the same index as that of
+     * the finite element. In this case, there should be a corresponding
+     * quadrature formula for each finite element in the hp::FECollection. As
+     * a special case, if the quadrature collection contains only a single
+     * element (a frequent case if one wants to use the same quadrature object
+     * for all finite elements in an hp discretization, even if that may not
+     * be the most efficient), then this single quadrature is used unless a
+     * different value for this argument is specified. On the other hand, if a
+     * value is given for this argument, it overrides the choice of
+     * <code>cell-@>active_fe_index()</code> or the choice for the single
+     * quadrature.
+     *
+     * If the @p mapping_index argument is left at its default value, then we
+     * use that mapping object within the hp::MappingCollection passed to the
+     * constructor of this class with index given by
+     * <code>cell-@>active_fe_index()</code>, i.e. the same index as that of
+     * the finite element. As above, if the mapping collection contains only a
+     * single element (a frequent case if one wants to use a $Q_1$ mapping for
+     * all finite elements in an hp discretization), then this single mapping
+     * is used unless a different value for this argument is specified.
+     */
+    template <typename DoFHandlerType, bool lda>
+    void
+    reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,lda> > cell,
+            const unsigned int face_no,
+            const unsigned int subface_no,
+            const unsigned int q_index       = numbers::invalid_unsigned_int,
+            const unsigned int mapping_index = numbers::invalid_unsigned_int,
+            const unsigned int fe_index      = numbers::invalid_unsigned_int);
+
+    /**
+     * Like the previous function, but for non-hp iterators. The reason this
+     * (and the other non-hp iterator) function exists is so that one can use
+     * hp::FEValues not only for hp::DoFhandler objects, but for all sorts of
+     * DoFHandler objects, and triangulations not associated with DoFHandlers
+     * in general.
+     *
+     * Since <code>cell-@>active_fe_index()</code> doesn't make sense for
+     * triangulation iterators, this function chooses the zero-th finite
+     * element, mapping, and quadrature object from the relevant constructions
+     * passed to the constructor of this object. The only exception is if you
+     * specify a value different from the default value for any of these last
+     * three arguments.
+     */
+    void
+    reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+            const unsigned int face_no,
+            const unsigned int subface_no,
+            const unsigned int q_index       = numbers::invalid_unsigned_int,
+            const unsigned int mapping_index = numbers::invalid_unsigned_int,
+            const unsigned int fe_index      = numbers::invalid_unsigned_int);
+  };
+
+}
+
+
+// -------------- inline and template functions --------------
+
+namespace internal
+{
+  namespace hp
+  {
+    template <int dim, int q_dim, class FEValuesType>
+    inline
+    const FEValuesType &
+    FEValuesBase<dim,q_dim,FEValuesType>::get_present_fe_values () const
+    {
+      return *fe_values_table(present_fe_values_index);
+    }
+
+
+
+    template <int dim, int q_dim, class FEValuesType>
+    inline
+    const dealii::hp::FECollection<dim,FEValuesType::space_dimension> &
+    FEValuesBase<dim,q_dim,FEValuesType>::get_fe_collection () const
+    {
+      return *fe_collection;
+    }
+
+
+
+    template <int dim, int q_dim, class FEValuesType>
+    inline
+    const dealii::hp::MappingCollection<dim,FEValuesType::space_dimension> &
+    FEValuesBase<dim,q_dim,FEValuesType>::get_mapping_collection () const
+    {
+      return *mapping_collection;
+    }
+
+
+
+    template <int dim, int q_dim, class FEValuesType>
+    inline
+    const dealii::hp::QCollection<q_dim> &
+    FEValuesBase<dim,q_dim,FEValuesType>::get_quadrature_collection () const
+    {
+      return q_collection;
+    }
+
+
+
+    template <int dim, int q_dim, class FEValuesType>
+    inline
+    dealii::UpdateFlags
+    FEValuesBase<dim,q_dim,FEValuesType>::get_update_flags () const
+    {
+      return update_flags;
+    }
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/hp/mapping_collection.h b/include/deal.II/hp/mapping_collection.h
new file mode 100644
index 0000000..2c33579
--- /dev/null
+++ b/include/deal.II/hp/mapping_collection.h
@@ -0,0 +1,168 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mapping_collection_h
+#define dealii__mapping_collection_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/fe/fe.h>
+
+#include <vector>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace hp
+{
+  /**
+   * This class implements a collection of mapping objects in the same way as
+   * the hp::FECollection implements a collection of finite element classes.
+   *
+   * It implements the concepts stated in the
+   * @ref hpcollection
+   * module described in the doxygen documentation.
+   *
+   * Although it is recommended to supply an appropriate mapping for each
+   * finite element kind used in a hp-computation, the MappingCollection class
+   * implements a conversion constructor from a single mapping.  Therefore it
+   * is possible to offer only a single mapping to the hp::FEValues class
+   * instead of a hp::MappingCollection. This is for the convenience of the
+   * user, as many simple geometries do not require different mappings along
+   * the boundary to achieve optimal convergence rates.  Hence providing a
+   * single mapping object will usually suffice. See the hp::FEValues class
+   * for the rules which mapping will be selected for a given cell.
+   *
+   * @ingroup hp hpcollection
+   *
+   * @author Oliver Kayser-Herold, 2005
+   */
+  template<int dim, int spacedim=dim>
+  class MappingCollection : public Subscriptor
+  {
+  public:
+    /**
+     * Default constructor. Leads to an empty collection that can later be
+     * filled using push_back().
+     */
+    MappingCollection ();
+
+    /**
+     * Conversion constructor. This constructor creates a MappingCollection
+     * from a single mapping. More mappings can be added with push_back(), if
+     * desired, though it would probably be clearer to add all mappings the
+     * same way.
+     */
+    explicit MappingCollection (const Mapping<dim,spacedim> &mapping);
+
+    /**
+     * Copy constructor.
+     */
+    MappingCollection (const MappingCollection<dim,spacedim> &mapping_collection);
+
+    /**
+     * Adds a new mapping to the MappingCollection.  The mappings have to be
+     * added in the order of the active_fe_indices. Thus the reference to the
+     * mapping object for active_fe_index 0 has to be added first, followed by
+     * the mapping object for active_fe_index 1.
+     */
+    void push_back (const Mapping<dim,spacedim> &new_mapping);
+
+    /**
+     * Returns the mapping object which was specified by the user for the
+     * active_fe_index which is provided as a parameter to this method.
+     *
+     * @pre @p index must be between zero and the number of elements of the
+     * collection.
+     */
+    const Mapping<dim,spacedim> &
+    operator[] (const unsigned int index) const;
+
+    /**
+     * Returns the number of mapping objects stored in this container.
+     */
+    unsigned int size () const;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+
+  private:
+    /**
+     * The real container, which stores pointers to the different Mapping
+     * objects.
+     */
+    std::vector<std_cxx11::shared_ptr<const Mapping<dim,spacedim> > > mappings;
+  };
+
+
+  /**
+   * Many places in the library by default use (bi-,tri-)linear mappings
+   * unless users explicitly provide a different mapping to use. In these
+   * cases, the called function has to create a $Q_1$ mapping object, i.e., an
+   * object of kind MappingQGeneric(1). This is costly. It would also be
+   * costly to create such objects as static objects in the affected
+   * functions, because static objects are never destroyed throughout the
+   * lifetime of a program, even though they only have to be created once the
+   * first time code runs through a particular function.
+   *
+   * In order to avoid creation of (static or dynamic) $Q_1$ mapping objects
+   * in these contexts throughout the library, this class defines a static
+   * collection of mappings with a single $Q_1$ mapping object. This
+   * collection can then be used in all of those places where such a
+   * collection is needed.
+   */
+  template<int dim, int spacedim=dim>
+  struct StaticMappingQ1
+  {
+  public:
+    /**
+     * The publicly available static $Q_1$ mapping collection object.
+     */
+    static MappingCollection<dim,spacedim> mapping_collection;
+  };
+
+
+  /* --------------- inline functions ------------------- */
+
+  template<int dim, int spacedim>
+  inline
+  unsigned int
+  MappingCollection<dim,spacedim>::size () const
+  {
+    return mappings.size();
+  }
+
+
+
+  template<int dim, int spacedim>
+  inline
+  const Mapping<dim,spacedim> &
+  MappingCollection<dim,spacedim>::operator[] (const unsigned int index) const
+  {
+    Assert (index < mappings.size (),
+            ExcIndexRange (index, 0, mappings.size ()));
+    return *mappings[index];
+  }
+
+} // namespace hp
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/hp/q_collection.h b/include/deal.II/hp/q_collection.h
new file mode 100644
index 0000000..e18f865
--- /dev/null
+++ b/include/deal.II/hp/q_collection.h
@@ -0,0 +1,230 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__q_collection_h
+#define dealii__q_collection_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/fe/fe.h>
+
+#include <vector>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace hp
+{
+  /**
+   * This class implements a collection of quadrature objects in the same way
+   * as the hp::FECollection implements a collection of finite element
+   * classes.
+   *
+   * It implements the concepts stated in the
+   * @ref hpcollection
+   * module described in the doxygen documentation.
+   *
+   * @ingroup hp hpcollection
+   *
+   * @author Oliver Kayser-Herold, 2005
+   */
+  template <int dim>
+  class QCollection : public Subscriptor
+  {
+  public:
+    /**
+     * Default constructor. Leads to an empty collection that can later be
+     * filled using push_back().
+     */
+    QCollection ();
+
+    /**
+     * Conversion constructor. This constructor creates a QCollection from a
+     * single quadrature rule. More quadrature formulas can be added with
+     * push_back(), if desired, though it would probably be clearer to add all
+     * mappings the same way.
+     */
+    explicit QCollection (const Quadrature<dim> &quadrature);
+
+    /**
+     * Copy constructor.
+     */
+    QCollection (const QCollection<dim> &q_collection);
+
+    /**
+     * Adds a new quadrature rule to the QCollection.  The quadrature rules
+     * have to be added in the same order as for the FECollection for which
+     * this quadrature rule collection is meant. Thus the reference to the
+     * quadrature rule for active_fe_index 0 has to be added first, followed
+     * by the quadrature rule for active_fe_index 1.
+     *
+     * This class creates a copy of the given quadrature object, i.e. you can
+     * do things like <tt>push_back(QGauss<dim>(3));</tt>. The internal copy
+     * is later destroyed by this object upon destruction of the entire
+     * collection.
+     */
+    void push_back (const Quadrature<dim> &new_quadrature);
+
+    /**
+     * Returns a reference to the quadrature rule specified by the argument.
+     *
+     * @pre @p index must be between zero and the number of elements of the
+     * collection.
+     */
+    const Quadrature<dim> &
+    operator[] (const unsigned int index) const;
+
+    /**
+     * Returns the number of quadrature pointers stored in this object.
+     */
+    unsigned int size () const;
+
+    /**
+     * Return the maximum number of quadrature points over all the elements of
+     * the collection. This is mostly useful to initialize arrays to allocate
+     * the maximum amount of memory that may be used when re-sizing later on
+     * to a articular quadrature formula from within this collection.
+     */
+    unsigned int max_n_quadrature_points () const;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcNoQuadrature);
+
+  private:
+    /**
+     * The real container, which stores pointers to the different quadrature
+     * objects.
+     */
+    std::vector<std_cxx11::shared_ptr<const Quadrature<dim> > > quadratures;
+  };
+
+
+
+  /* --------------- inline functions ------------------- */
+
+  template <int dim>
+  inline
+  unsigned int
+  QCollection<dim>::size () const
+  {
+    return quadratures.size();
+  }
+
+
+
+  template <int dim>
+  inline
+  unsigned int
+  QCollection<dim>::max_n_quadrature_points () const
+  {
+    Assert (quadratures.size() > 0,
+            ExcMessage ("You can't call this function for an empty collection"));
+
+    unsigned int m = 0;
+    for (unsigned int i=0; i<quadratures.size(); ++i)
+      if (quadratures[i]->size() > m)
+        m = quadratures[i]->size();
+
+    return m;
+  }
+
+
+
+  template <int dim>
+  inline
+  const Quadrature<dim> &
+  QCollection<dim>::operator[] (const unsigned int index) const
+  {
+    Assert (index < quadratures.size (),
+            ExcIndexRange (index, 0, quadratures.size ()));
+    return *quadratures[index];
+  }
+
+
+
+  template <int dim>
+  inline
+  QCollection<dim>::QCollection ()
+  {}
+
+
+
+  template <int dim>
+  inline
+  QCollection<dim>::QCollection (const Quadrature<dim> &quadrature)
+  {
+    quadratures
+    .push_back (std_cxx11::shared_ptr<const Quadrature<dim> >(new Quadrature<dim>(quadrature)));
+  }
+
+
+
+  template <int dim>
+  inline
+  QCollection<dim>::
+  QCollection (const QCollection<dim> &q_collection)
+    :
+    Subscriptor (),
+    // copy the array
+    // of shared
+    // pointers. nothing
+    // bad should
+    // happen -- they
+    // simply all point
+    // to the same
+    // objects, and the
+    // last one to die
+    // will delete the
+    // mappings
+    quadratures (q_collection.quadratures)
+  {}
+
+
+
+  template <int dim>
+  inline
+  std::size_t
+  QCollection<dim>::memory_consumption () const
+  {
+    return (sizeof(*this) +
+            MemoryConsumption::memory_consumption (quadratures));
+  }
+
+
+  template <int dim>
+  inline
+  void
+  QCollection<dim>::push_back (const Quadrature<dim> &new_quadrature)
+  {
+    quadratures
+    .push_back (std_cxx11::shared_ptr<const Quadrature<dim> >(new Quadrature<dim>(new_quadrature)));
+  }
+
+} // namespace hp
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/advection.h b/include/deal.II/integrators/advection.h
new file mode 100644
index 0000000..e77d09f
--- /dev/null
+++ b/include/deal.II/integrators/advection.h
@@ -0,0 +1,732 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_advection_h
+#define dealii__integrators_advection_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/dof_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LocalIntegrators
+{
+  /**
+   * @brief Local integrators related to advection along a vector field and
+   * its DG formulations
+   *
+   * All advection operators depend on an advection velocity denoted by
+   * <b>w</b> in the formulas below. It is denoted as <tt>velocity</tt> in the
+   * parameter lists.
+   *
+   * The functions cell_matrix() and both upwind_value_matrix() are taking the
+   * equation in weak form, that is, the directional derivative is on the test
+   * function.
+   *
+   * @ingroup Integrators
+   * @author Guido Kanschat
+   * @date 2012
+   */
+  namespace Advection
+  {
+    /**
+     * Advection along the direction <b>w</b> in weak form with derivative on
+     * the test function \f[ m_{ij} = \int_Z u_j\,(\mathbf w \cdot \nabla) v_i
+     * \, dx. \f]
+     *
+     * The FiniteElement in <tt>fe</tt> may be scalar or vector valued. In the
+     * latter case, the advection operator is applied to each component
+     * separately.
+     *
+     * @param M: The advection matrix obtained as result
+     * @param fe: The FEValues object describing the local trial function
+     * space. #update_values and #update_gradients, and #update_JxW_values
+     * must be set.
+     * @param fetest: The FEValues object describing the local test function
+     * space. #update_values and #update_gradients must be set.
+     * @param velocity: The advection velocity, a vector of dimension
+     * <tt>dim</tt>. Each component may either contain a vector of length one,
+     * in which case a constant velocity is assumed, or a vector with as many
+     * entries as quadrature points if the velocity is not constant.
+     * @param factor is an optional multiplication factor for the result.
+     *
+     * @author Guido Kanschat
+     * @date 2012
+     */
+    template<int dim>
+    void cell_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const FEValuesBase<dim> &fetest,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      const unsigned int n_components = fe.get_fe().n_components();
+
+      AssertDimension(velocity.size(), dim);
+      // If the size of the
+      // velocity vectors is one,
+      // then do not increment
+      // between quadrature points.
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+      AssertDimension(M.n(), n_dofs);
+      AssertDimension(M.m(), t_dofs);
+
+      for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const unsigned int vindex = k * v_increment;
+
+          for (unsigned j=0; j<n_dofs; ++j)
+            for (unsigned i=0; i<t_dofs; ++i)
+              for (unsigned int c=0; c<n_components; ++c)
+                {
+                  double wgradv = velocity[0][vindex]
+                                  * fe.shape_grad_component(i,k,c)[0];
+                  for (unsigned int d=1; d<dim; ++d)
+                    wgradv += velocity[d][vindex]
+                              * fe.shape_grad_component(i,k,c)[d];
+                  M(i,j) -= dx * wgradv * fe.shape_value_component(j,k,c);
+                }
+        }
+    }
+
+
+
+    /**
+     * Scalar advection residual operator in strong form
+     *
+     * \f[ r_i = \int_Z  (\mathbf w \cdot \nabla)u\, v_i \, dx. \f]
+     *
+     * \warning This is not the residual consistent with cell_matrix(), but
+     * with its transpose.
+     */
+    template <int dim>
+    inline void
+    cell_residual  (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const std::vector<Tensor<1,dim> > &input,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      double factor = 1.)
+    {
+      const unsigned int nq = fe.n_quadrature_points;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      Assert(input.size() == nq, ExcDimensionMismatch(input.size(), nq));
+      Assert(result.size() == n_dofs, ExcDimensionMismatch(result.size(), n_dofs));
+
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<nq; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned i=0; i<n_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) += dx * input[k][d]
+                           * fe.shape_value(i,k) * velocity[d][k * v_increment];
+        }
+    }
+
+
+
+    /**
+     * Vector-valued advection residual operator in strong form
+     *
+     *
+     * \f[ r_i = \int_Z \bigl((\mathbf w \cdot \nabla) \mathbf u\bigr)
+     * \cdot\mathbf v_i \, dx. \f]
+     *
+     * \warning This is not the residual consistent with cell_matrix(), but
+     * with its transpose.
+     */
+    template <int dim>
+    inline void
+    cell_residual  (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &input,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      double factor = 1.)
+    {
+      const unsigned int nq = fe.n_quadrature_points;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_comp = fe.get_fe().n_components();
+
+      AssertVectorVectorDimension(input, n_comp, fe.n_quadrature_points);
+      Assert(result.size() == n_dofs, ExcDimensionMismatch(result.size(), n_dofs));
+
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<nq; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned i=0; i<n_dofs; ++i)
+            for (unsigned int c=0; c<n_comp; ++c)
+              for (unsigned int d=0; d<dim; ++d)
+                result(i) += dx * input[c][k][d]
+                             * fe.shape_value_component(i,k,c) * velocity[d][k * v_increment];
+        }
+    }
+
+
+
+    /**
+     * Scalar advection residual operator in weak form
+     *
+     * \f[ r_i = \int_Z  (\mathbf w \cdot \nabla)v\, u_i \, dx. \f]
+     */
+    template <int dim>
+    inline void
+    cell_residual  (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const std::vector<double> &input,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      double factor = 1.)
+    {
+      const unsigned int nq = fe.n_quadrature_points;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      Assert(input.size() == nq, ExcDimensionMismatch(input.size(), nq));
+      Assert(result.size() == n_dofs, ExcDimensionMismatch(result.size(), n_dofs));
+
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<nq; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned i=0; i<n_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) -= dx * input[k]
+                           * fe.shape_grad(i,k)[d] * velocity[d][k * v_increment];
+        }
+    }
+
+
+
+    /**
+     * Vector-valued advection residual operator in weak form
+     *
+     *
+     * \f[ r_i = \int_Z \bigl((\mathbf w \cdot \nabla) \mathbf v\bigr)
+     * \cdot\mathbf u_i \, dx. \f]
+     */
+    template <int dim>
+    inline void
+    cell_residual  (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<double> > > &input,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      double factor = 1.)
+    {
+      const unsigned int nq = fe.n_quadrature_points;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_comp = fe.get_fe().n_components();
+
+      AssertVectorVectorDimension(input, n_comp, fe.n_quadrature_points);
+      Assert(result.size() == n_dofs, ExcDimensionMismatch(result.size(), n_dofs));
+
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<nq; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned i=0; i<n_dofs; ++i)
+            for (unsigned int c=0; c<n_comp; ++c)
+              for (unsigned int d=0; d<dim; ++d)
+                result(i) -= dx * input[c][k]
+                             * fe.shape_grad_component(i,k,c)[d] * velocity[d][k * v_increment];
+        }
+    }
+
+
+
+    /**
+     * Upwind flux at the boundary for weak advection operator. This is the
+     * value of the trial function at the outflow boundary and zero else:
+     * @f[
+     * a_{ij} = \int_{\partial\Omega}
+     * [\mathbf w\cdot\mathbf n]_+
+     * u_i v_j \, ds
+     * @f]
+     *
+     * The <tt>velocity</tt> is provided as a VectorSlice, having <tt>dim</tt>
+     * vectors, one for each velocity component. Each of the vectors must
+     * either have only a single entry, if the advection velocity is constant,
+     * or have an entry for each quadrature point.
+     *
+     * The finite element can have several components, in which case each
+     * component is advected by the same velocity.
+     */
+    template <int dim>
+    void upwind_value_matrix(
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const FEValuesBase<dim> &fetest,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      unsigned int n_components = fe.get_fe().n_components();
+      AssertDimension (M.m(), n_dofs);
+      AssertDimension (M.n(), n_dofs);
+
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+
+          double nv = 0.;
+          for (unsigned int d=0; d<dim; ++d)
+            nv += fe.normal_vector(k)[d] * velocity[d][k * v_increment];
+
+          if (nv > 0)
+            {
+              for (unsigned i=0; i<t_dofs; ++i)
+                for (unsigned j=0; j<n_dofs; ++j)
+                  {
+                    if (fe.get_fe().is_primitive())
+                      M(i,j) += dx * nv * fe.shape_value(i,k) * fe.shape_value(j,k);
+                    else
+                      for (unsigned int c=0; c<n_components; ++c)
+                        M(i,j) += dx * nv * fetest.shape_value_component(i,k,c)
+                                  * fe.shape_value_component(j,k,c);
+                  }
+            }
+        }
+    }
+
+
+
+    /**
+     * Scalar case: Residual for upwind flux at the boundary for weak
+     * advection operator. This is the value of the trial function at the
+     * outflow boundary and the value of the incoming boundary condition on
+     * the inflow boundary:
+     * @f[
+     * a_{ij} = \int_{\partial\Omega}
+     * (\mathbf w\cdot\mathbf n)
+     * \widehat u v_j \, ds
+     * @f]
+     *
+     * Here, the numerical flux $\widehat u$ is the upwind value at the face,
+     * namely the finite element function whose values are given in the
+     * argument `input` on the outflow boundary. On the inflow boundary, it is
+     * the inhomogenous boundary value in the argument `data`.
+     *
+     * The <tt>velocity</tt> is provided as a VectorSlice, having <tt>dim</tt>
+     * vectors, one for each velocity component. Each of the vectors must
+     * either have only a single entry, if the advection velocity is constant,
+     * or have an entry for each quadrature point.
+     *
+     * The finite element can have several components, in which case each
+     * component is advected by the same velocity.
+     */
+    template <int dim>
+    inline void
+    upwind_value_residual(
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const std::vector<double> &input,
+      const std::vector<double> &data,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(input.size(), fe.n_quadrature_points);
+      AssertDimension(data.size(), fe.n_quadrature_points);
+
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+
+      for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+
+          double nv = 0.;
+          for (unsigned int d=0; d<dim; ++d)
+            nv += fe.normal_vector(k)[d] * velocity[d][k * v_increment];
+
+          // Always use the upwind value
+          const double val = (nv > 0.) ? input[k] : -data[k];
+
+          for (unsigned i=0; i<n_dofs; ++i)
+            {
+              const double v= fe.shape_value(i,k);
+              result(i) += dx * nv * val *v;
+            }
+        }
+    }
+
+
+
+    /**
+     * Vector-valued case: Residual for upwind flux at the boundary for weak
+     * advection operator. This is the value of the trial function at the
+     * outflow boundary and the value of the incoming boundary condition on
+     * the inflow boundary:
+     * @f[
+     * a_{ij} = \int_{\partial\Omega}
+     * (\mathbf w\cdot\mathbf n)
+     * \widehat u v_j \, ds
+     * @f]
+     *
+     * Here, the numerical flux $\widehat u$ is the upwind value at the face,
+     * namely the finite element function whose values are given in the
+     * argument `input` on the outflow boundary. On the inflow boundary, it is
+     * the inhomogenous boundary value in the argument `data`.
+     *
+     * The <tt>velocity</tt> is provided as a VectorSlice, having <tt>dim</tt>
+     * vectors, one for each velocity component. Each of the vectors must
+     * either have only a single entry, if the advection velocity is constant,
+     * or have an entry for each quadrature point.
+     *
+     * The finite element can have several components, in which case each
+     * component is advected by the same velocity.
+     */
+    template <int dim>
+    inline void
+    upwind_value_residual(
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<double> > > &input,
+      const VectorSlice<const std::vector<std::vector<double> > > &data,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_comp = fe.get_fe().n_components();
+
+      AssertVectorVectorDimension(input, n_comp, fe.n_quadrature_points);
+      AssertVectorVectorDimension(data, n_comp, fe.n_quadrature_points);
+
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe.n_quadrature_points);
+        }
+
+
+      for (unsigned k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+
+          double nv = 0.;
+          for (unsigned int d=0; d<dim; ++d)
+            nv += fe.normal_vector(k)[d] * velocity[d][k * v_increment];
+
+          std::vector<double> val(n_comp);
+
+          for (unsigned int d=0; d<n_comp; ++d)
+            {
+              val[d] = (nv > 0.) ? input[d][k] : -data[d][k];
+              for (unsigned i=0; i<n_dofs; ++i)
+                {
+                  const double v= fe.shape_value_component(i,k,d);
+                  result(i) += dx * nv * val[d] *v;
+                }
+            }
+        }
+    }
+
+
+
+    /**
+     * Upwind flux in the interior for weak advection operator. Matrix entries
+     * correspond to the upwind value of the trial function, multiplied by the
+     * jump of the test functions
+     * @f[
+     * a_{ij} = \int_F \left|\mathbf w
+     * \cdot \mathbf n\right|
+     * u^\uparrow
+     * (v^\uparrow-v^\downarrow)
+     * \,ds
+     * @f]
+     *
+     * The <tt>velocity</tt> is provided as a VectorSlice, having <tt>dim</tt>
+     * vectors, one for each velocity component. Each of the vectors must
+     * either have only a single entry, if the advection velocity is constant,
+     * or have an entry for each quadrature point.
+     *
+     * The finite element can have several components, in which case each
+     * component is advected the same way.
+     */
+    template <int dim>
+    void upwind_value_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const FEValuesBase<dim> &fetest1,
+      const FEValuesBase<dim> &fetest2,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      const double factor = 1.)
+    {
+      const unsigned int n1 = fe1.dofs_per_cell;
+      // Multiply the quadrature point
+      // index below with this factor to
+      // have simpler data for constant
+      // velocities.
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe1.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          double nbeta = fe1.normal_vector(k)[0] * velocity[0][k * v_increment];
+          for (unsigned int d=1; d<dim; ++d)
+            nbeta += fe1.normal_vector(k)[d] * velocity[d][k * v_increment];
+          const double dx_nbeta = factor * std::abs(nbeta) * fe1.JxW(k);
+          FullMatrix<double> &M1 = nbeta > 0. ? M11 : M22;
+          FullMatrix<double> &M2 = nbeta > 0. ? M21 : M12;
+          const FEValuesBase<dim> &fe = nbeta > 0.  ? fe1 : fe2;
+          const FEValuesBase<dim> &fetest = nbeta > 0.  ? fetest1 : fetest2;
+          const FEValuesBase<dim> &fetestn = nbeta > 0.  ? fetest2 : fetest1;
+          for (unsigned i=0; i<n1; ++i)
+            for (unsigned j=0; j<n1; ++j)
+              {
+                if (fe1.get_fe().is_primitive())
+                  {
+                    M1(i,j) += dx_nbeta*fe.shape_value(j,k)*fetest.shape_value(i,k);
+                    M2(i,j) -= dx_nbeta*fe.shape_value(j,k)*fetestn.shape_value(i,k);
+                  }
+                else
+                  {
+                    for (unsigned int d=0; d<fe1.get_fe().n_components(); ++d)
+                      {
+                        M1(i,j) += dx_nbeta*fe.shape_value_component(j,k,d)*fetest.shape_value_component(i,k,d);
+                        M2(i,j) -= dx_nbeta*fe.shape_value_component(j,k,d)*fetestn.shape_value_component(i,k,d);
+                      }
+                  }
+              }
+        }
+    }
+
+
+
+    /**
+     * Scalar case: Upwind flux in the interior for weak advection operator.
+     * Matrix entries correspond to the upwind value of the trial function,
+     * multiplied by the jump of the test functions
+     * @f[
+     * a_{ij} = \int_F \left|\mathbf w
+     * \cdot \mathbf n\right|
+     * u^\uparrow
+     * (v^\uparrow-v^\downarrow)
+     * \,ds
+     * @f]
+     *
+     * The <tt>velocity</tt> is provided as a VectorSlice, having <tt>dim</tt>
+     * vectors, one for each velocity component. Each of the vectors must
+     * either have only a single entry, if the advection velocity is constant,
+     * or have an entry for each quadrature point.
+     *
+     * The finite element can have several components, in which case each
+     * component is advected the same way.
+     */
+    template <int dim>
+    void upwind_face_residual (
+      Vector<double> &result1,
+      Vector<double> &result2,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const std::vector<double> &input1,
+      const std::vector<double> &input2,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      const double factor = 1.)
+    {
+      Assert(fe1.get_fe().n_components() == 1,
+             ExcDimensionMismatch(fe1.get_fe().n_components(), 1));
+      Assert(fe2.get_fe().n_components() == 1,
+             ExcDimensionMismatch(fe2.get_fe().n_components(), 1));
+
+      const unsigned int n1 = fe1.dofs_per_cell;
+      // Multiply the quadrature point
+      // index below with this factor to
+      // have simpler data for constant
+      // velocities.
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe1.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          double nbeta = fe1.normal_vector(k)[0] * velocity[0][k * v_increment];
+          for (unsigned int d=1; d<dim; ++d)
+            nbeta += fe1.normal_vector(k)[d] * velocity[d][k * v_increment];
+          const double dx_nbeta = factor * nbeta * fe1.JxW(k);
+
+          for (unsigned i=0; i<n1; ++i)
+            {
+              const double v1 = fe1.shape_value(i,k);
+              const double v2 = fe2.shape_value(i,k);
+              const double u1 = input1[k];
+              const double u2 = input2[k];
+              if (nbeta > 0)
+                {
+                  result1(i) += dx_nbeta*u1*v1;
+                  result2(i) -= dx_nbeta*u1*v2;
+                }
+              else
+                {
+
+                  result1(i) += dx_nbeta*u2*v1;
+                  result2(i) -= dx_nbeta*u2*v2;
+                }
+            }
+        }
+    }
+
+
+
+    /**
+     * Vector-valued case: Upwind flux in the interior for weak advection
+     * operator. Matrix entries correspond to the upwind value of the trial
+     * function, multiplied by the jump of the test functions
+     * @f[
+     * a_{ij} = \int_F \left|\mathbf w
+     * \cdot \mathbf n\right|
+     * u^\uparrow
+     * (v^\uparrow-v^\downarrow)
+     * \,ds
+     * @f]
+     *
+     * The <tt>velocity</tt> is provided as a VectorSlice, having <tt>dim</tt>
+     * vectors, one for each velocity component. Each of the vectors must
+     * either have only a single entry, if the advection velocity is constant,
+     * or have an entry for each quadrature point.
+     *
+     * The finite element can have several components, in which case each
+     * component is advected the same way.
+     */
+    template <int dim>
+    void upwind_face_residual (
+      Vector<double> &result1,
+      Vector<double> &result2,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const VectorSlice<const std::vector<std::vector<double> > > &input1,
+      const VectorSlice<const std::vector<std::vector<double> > > &input2,
+      const VectorSlice<const std::vector<std::vector<double> > > &velocity,
+      const double factor = 1.)
+    {
+      const unsigned int n_comp = fe1.get_fe().n_components();
+      const unsigned int n1 = fe1.dofs_per_cell;
+      AssertVectorVectorDimension(input1, n_comp, fe1.n_quadrature_points);
+      AssertVectorVectorDimension(input2, n_comp, fe2.n_quadrature_points);
+
+      // Multiply the quadrature point
+      // index below with this factor to
+      // have simpler data for constant
+      // velocities.
+      AssertDimension(velocity.size(), dim);
+      const unsigned int v_increment = (velocity[0].size() == 1) ? 0 : 1;
+      if (v_increment == 1)
+        {
+          AssertVectorVectorDimension(velocity, dim, fe1.n_quadrature_points);
+        }
+
+      for (unsigned k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          double nbeta = fe1.normal_vector(k)[0] * velocity[0][k * v_increment];
+          for (unsigned int d=1; d<dim; ++d)
+            nbeta += fe1.normal_vector(k)[d] * velocity[d][k * v_increment];
+          const double dx_nbeta = factor * nbeta * fe1.JxW(k);
+
+          for (unsigned i=0; i<n1; ++i)
+            for (unsigned int d=0; d<n_comp; ++d)
+              {
+                const double v1 = fe1.shape_value_component(i,k,d);
+                const double v2 = fe2.shape_value_component(i,k,d);
+                const double u1 = input1[d][k];
+                const double u2 = input2[d][k];
+                if (nbeta > 0)
+                  {
+                    result1(i) += dx_nbeta*u1*v1;
+                    result2(i) -= dx_nbeta*u1*v2;
+                  }
+                else
+                  {
+
+                    result1(i) += dx_nbeta*u2*v1;
+                    result2(i) -= dx_nbeta*u2*v2;
+                  }
+              }
+        }
+    }
+
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/divergence.h b/include/deal.II/integrators/divergence.h
new file mode 100644
index 0000000..031bda2
--- /dev/null
+++ b/include/deal.II/integrators/divergence.h
@@ -0,0 +1,629 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_divergence_h
+#define dealii__integrators_divergence_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/dof_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LocalIntegrators
+{
+  /**
+   * @brief Local integrators related to the divergence operator and its
+   * trace.
+   *
+   * @ingroup Integrators
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  namespace Divergence
+  {
+    /**
+     * Auxiliary function. Computes the grad-div-operator from a set of
+     * Hessians.
+     *
+     * @note The third tensor argument is not used in two dimensions and can
+     * for instance duplicate one of the previous.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    Tensor<1,dim>
+    grad_div(
+      const Tensor<2,dim> &h0,
+      const Tensor<2,dim> &h1,
+      const Tensor<2,dim> &h2)
+    {
+      Tensor<1,dim> result;
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          result[d] += h0[d][0];
+          if (dim >=2) result[d] += h1[d][1];
+          if (dim >=3) result[d] += h2[d][2];
+        }
+      return result;
+    }
+
+
+    /**
+     * Cell matrix for divergence. The derivative is on the trial function.
+     * \f[ \int_Z v\nabla \cdot \mathbf u \,dx \f] This is the strong
+     * divergence operator and the trial space should be at least
+     * <b>H</b><sup>div</sup>. The test functions may be discontinuous.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    void cell_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const FEValuesBase<dim> &fetest,
+      double factor = 1.)
+    {
+      unsigned int fecomp = fe.get_fe().n_components();
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      AssertDimension(fecomp, dim);
+      AssertDimension(fetest.get_fe().n_components(), 1);
+      AssertDimension(M.m(), t_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = fe.JxW(k) * factor;
+          for (unsigned int i=0; i<t_dofs; ++i)
+            {
+              const double vv = fetest.shape_value(i,k);
+              for (unsigned int d=0; d<dim; ++d)
+                for (unsigned int j=0; j<n_dofs; ++j)
+                  {
+                    const double du = fe.shape_grad_component(j,k,d)[d];
+                    M(i,j) += dx * du * vv;
+                  }
+            }
+        }
+    }
+
+    /**
+     * The residual of the divergence operator in strong form. \f[ \int_Z
+     * v\nabla \cdot \mathbf u \,dx \f] This is the strong divergence operator
+     * and the trial space should be at least <b>H</b><sup>div</sup>. The test
+     * functions may be discontinuous.
+     *
+     * The function cell_matrix() is the Frechet derivative of this function
+     * with respect to the test functions.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim, typename number>
+    void cell_residual(
+      Vector<number> &result,
+      const FEValuesBase<dim> &fetest,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &input,
+      const double factor = 1.)
+    {
+      AssertDimension(fetest.get_fe().n_components(), 1);
+      AssertVectorVectorDimension(input, dim, fetest.n_quadrature_points);
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      Assert (result.size() == t_dofs, ExcDimensionMismatch(result.size(), t_dofs));
+
+      for (unsigned int k=0; k<fetest.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fetest.JxW(k);
+
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) += dx * input[d][k][d] * fetest.shape_value(i,k);
+        }
+    }
+
+
+    /**
+     * The residual of the divergence operator in weak form. \f[ - \int_Z
+     * \nabla v \cdot \mathbf u \,dx \f] This is the weak divergence operator
+     * and the test space should be at least <b>H</b><sup>1</sup>. The trial
+     * functions may be discontinuous.
+     *
+     * @todo Verify: The function cell_matrix() is the Frechet derivative of
+     * this function with respect to the test functions.
+     *
+     * @author Guido Kanschat
+     * @date 2013
+     */
+    template <int dim, typename number>
+    void cell_residual(
+      Vector<number> &result,
+      const FEValuesBase<dim> &fetest,
+      const VectorSlice<const std::vector<std::vector<double> > > &input,
+      const double factor = 1.)
+    {
+      AssertDimension(fetest.get_fe().n_components(), 1);
+      AssertVectorVectorDimension(input, dim, fetest.n_quadrature_points);
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      Assert (result.size() == t_dofs, ExcDimensionMismatch(result.size(), t_dofs));
+
+      for (unsigned int k=0; k<fetest.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fetest.JxW(k);
+
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) -= dx * input[d][k] * fetest.shape_grad(i,k)[d];
+        }
+    }
+
+
+    /**
+     * Cell matrix for gradient. The derivative is on the trial function. \f[
+     * \int_Z \nabla u \cdot \mathbf v\,dx \f]
+     *
+     * This is the strong gradient and the trial space should be at least in
+     * <i>H</i><sup>1</sup>. The test functions can be discontinuous.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    void gradient_matrix(
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const FEValuesBase<dim> &fetest,
+      double factor = 1.)
+    {
+      unsigned int fecomp = fetest.get_fe().n_components();
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(fecomp, dim);
+      AssertDimension(fe.get_fe().n_components(), 1);
+      AssertDimension(M.m(), t_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = fe.JxW(k) * factor;
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int i=0; i<t_dofs; ++i)
+              {
+                const double vv = fetest.shape_value_component(i,k,d);
+                for (unsigned int j=0; j<n_dofs; ++j)
+                  {
+                    const Tensor<1,dim> &Du = fe.shape_grad(j,k);
+                    M(i,j) += dx * vv * Du[d];
+                  }
+              }
+        }
+    }
+
+    /**
+     * The residual of the gradient operator in strong form. \f[ \int_Z
+     * \mathbf v\cdot\nabla u \,dx \f] This is the strong gradient operator
+     * and the trial space should be at least <b>H</b><sup>1</sup>. The test
+     * functions may be discontinuous.
+     *
+     * The function gradient_matrix() is the Frechet derivative of this
+     * function with respect to the test functions.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim, typename number>
+    void gradient_residual(
+      Vector<number> &result,
+      const FEValuesBase<dim> &fetest,
+      const std::vector<Tensor<1,dim> > &input,
+      const double factor = 1.)
+    {
+      AssertDimension(fetest.get_fe().n_components(), dim);
+      AssertDimension(input.size(), fetest.n_quadrature_points);
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      Assert (result.size() == t_dofs, ExcDimensionMismatch(result.size(), t_dofs));
+
+      for (unsigned int k=0; k<fetest.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fetest.JxW(k);
+
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) += dx * input[k][d] * fetest.shape_value_component(i,k,d);
+        }
+    }
+
+    /**
+     * The residual of the gradient operator in weak form. \f[ -\int_Z
+     * \nabla\cdot \mathbf v u \,dx \f] This is the weak gradient operator and
+     * the test space should be at least <b>H</b><sup>div</sup>. The trial
+     * functions may be discontinuous.
+     *
+     * @todo Verify: The function gradient_matrix() is the Frechet derivative
+     * of this function with respect to the test functions.
+     *
+     * @author Guido Kanschat
+     * @date 2013
+     */
+    template <int dim, typename number>
+    void gradient_residual(
+      Vector<number> &result,
+      const FEValuesBase<dim> &fetest,
+      const std::vector<double> &input,
+      const double factor = 1.)
+    {
+      AssertDimension(fetest.get_fe().n_components(), dim);
+      AssertDimension(input.size(), fetest.n_quadrature_points);
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      Assert (result.size() == t_dofs, ExcDimensionMismatch(result.size(), t_dofs));
+
+      for (unsigned int k=0; k<fetest.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fetest.JxW(k);
+
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) -= dx * input[k] * fetest.shape_grad_component(i,k,d)[d];
+        }
+    }
+
+    /**
+     * The trace of the divergence operator, namely the product of the normal
+     * component of the vector valued trial space and the test space.
+     * @f[ \int_F (\mathbf u\cdot \mathbf n) v \,ds @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template<int dim>
+    void
+    u_dot_n_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const FEValuesBase<dim> &fetest,
+      double factor = 1.)
+    {
+      unsigned int fecomp = fe.get_fe().n_components();
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+
+      AssertDimension(fecomp, dim);
+      AssertDimension(fetest.get_fe().n_components(), 1);
+      AssertDimension(M.m(), t_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const Tensor<1,dim> ndx = factor * fe.JxW(k) * fe.normal_vector(k);
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<dim; ++d)
+                M(i,j) += ndx[d] * fe.shape_value_component(j,k,d)
+                          * fetest.shape_value(i,k);
+        }
+    }
+
+    /**
+     * The trace of the divergence operator, namely the product of the normal
+     * component of the vector valued trial space and the test space.
+     * @f[
+     * \int_F (\mathbf u\cdot \mathbf n) v \,ds
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template<int dim, typename number>
+    void
+    u_dot_n_residual (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fe,
+      const FEValuesBase<dim> &fetest,
+      const VectorSlice<const std::vector<std::vector<double> > > &data,
+      double factor = 1.)
+    {
+      unsigned int fecomp = fe.get_fe().n_components();
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+
+      AssertDimension(fecomp, dim);
+      AssertDimension(fetest.get_fe().n_components(), 1);
+      AssertDimension(result.size(), t_dofs);
+      AssertVectorVectorDimension (data, dim, fe.n_quadrature_points);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const Tensor<1,dim> ndx = factor * fe.normal_vector(k) * fe.JxW(k);
+
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) += ndx[d] * fetest.shape_value(i,k) * data[d][k];
+        }
+    }
+
+    /**
+     * The trace of the gradient operator, namely the product of the normal
+     * component of the vector valued test space and the trial space.
+     * @f[
+     * \int_F u (\mathbf v\cdot \mathbf n) \,ds
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2013
+     */
+    template<int dim, typename number>
+    void
+    u_times_n_residual (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fetest,
+      const std::vector<double> &data,
+      double factor = 1.)
+    {
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+
+      AssertDimension(fetest.get_fe().n_components(), dim);
+      AssertDimension(result.size(), t_dofs);
+      AssertDimension(data.size(), fetest.n_quadrature_points);
+
+      for (unsigned int k=0; k<fetest.n_quadrature_points; ++k)
+        {
+          const Tensor<1,dim> ndx = factor * fetest.normal_vector(k) * fetest.JxW(k);
+
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              result(i) += ndx[d] * fetest.shape_value_component(i,k,d) * data[k];
+        }
+    }
+
+    /**
+     * The trace of the divergence operator, namely the product of the jump of
+     * the normal component of the vector valued trial function and the mean
+     * value of the test function.
+     * @f[
+     * \int_F (\mathbf u_1\cdot \mathbf n_1 + \mathbf u_2 \cdot \mathbf n_2) \frac{v_1+v_2}{2} \,ds
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template<int dim>
+    void
+    u_dot_n_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const FEValuesBase<dim> &fetest1,
+      const FEValuesBase<dim> &fetest2,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe1.dofs_per_cell;
+      const unsigned int t_dofs = fetest1.dofs_per_cell;
+
+      AssertDimension(fe1.get_fe().n_components(), dim);
+      AssertDimension(fe2.get_fe().n_components(), dim);
+      AssertDimension(fetest1.get_fe().n_components(), 1);
+      AssertDimension(fetest2.get_fe().n_components(), 1);
+      AssertDimension(M11.m(), t_dofs);
+      AssertDimension(M11.n(), n_dofs);
+      AssertDimension(M12.m(), t_dofs);
+      AssertDimension(M12.n(), n_dofs);
+      AssertDimension(M21.m(), t_dofs);
+      AssertDimension(M21.n(), n_dofs);
+      AssertDimension(M22.m(), t_dofs);
+      AssertDimension(M22.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe1.JxW(k);
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<dim; ++d)
+                {
+                  const double un1 = fe1.shape_value_component(j,k,d) * fe1.normal_vector(k)[d];
+                  const double un2 =-fe2.shape_value_component(j,k,d) * fe1.normal_vector(k)[d];
+                  const double v1 = fetest1.shape_value(i,k);
+                  const double v2 = fetest2.shape_value(i,k);
+
+                  M11(i,j) += .5 * dx * un1 * v1;
+                  M12(i,j) += .5 * dx * un2 * v1;
+                  M21(i,j) += .5 * dx * un1 * v2;
+                  M22(i,j) += .5 * dx * un2 * v2;
+                }
+        }
+    }
+
+    /**
+     * The weak form of the grad-div operator penalizing volume changes
+     * @f[
+     *  \int_Z \nabla\!\cdot\!u \nabla\!\cdot\!v \,dx
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    void grad_div_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(fe.get_fe().n_components(), dim);
+      AssertDimension(M.m(), n_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              {
+                double dv = 0.;
+                double du = 0.;
+                for (unsigned int d=0; d<dim; ++d)
+                  {
+                    dv += fe.shape_grad_component(i,k,d)[d];
+                    du += fe.shape_grad_component(j,k,d)[d];
+                  }
+
+                M(i,j) += dx * du * dv;
+              }
+        }
+    }
+
+    /**
+     * The weak form of the grad-div residual
+     * @f[
+     *  \int_Z \nabla\!\cdot\!u \nabla\!\cdot\!v \,dx
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2014
+     */
+    template <int dim, typename number>
+    void grad_div_residual (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fetest,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &input,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fetest.dofs_per_cell;
+
+      AssertDimension(fetest.get_fe().n_components(), dim);
+      AssertVectorVectorDimension(input, dim, fetest.n_quadrature_points);
+
+      for (unsigned int k=0; k<fetest.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fetest.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            {
+              double dv = 0.;
+              double du = 0.;
+              for (unsigned int d=0; d<dim; ++d)
+                {
+                  dv += fetest.shape_grad_component(i,k,d)[d];
+                  du += input[d][k][d];
+                }
+
+              result(i) += dx * du * dv;
+            }
+        }
+    }
+
+    /**
+     * The jump of the normal component
+     * @f[
+     * \int_F
+     *  (\mathbf u_1\cdot \mathbf n_1 + \mathbf u_2 \cdot \mathbf n_2)
+     *  (\mathbf v_1\cdot \mathbf n_1 + \mathbf v_2 \cdot \mathbf n_2)
+     * \,ds
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template<int dim>
+    void
+    u_dot_n_jump_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe1.dofs_per_cell;
+
+      AssertDimension(fe1.get_fe().n_components(), dim);
+      AssertDimension(fe2.get_fe().n_components(), dim);
+      AssertDimension(M11.m(), n_dofs);
+      AssertDimension(M11.n(), n_dofs);
+      AssertDimension(M12.m(), n_dofs);
+      AssertDimension(M12.n(), n_dofs);
+      AssertDimension(M21.m(), n_dofs);
+      AssertDimension(M21.n(), n_dofs);
+      AssertDimension(M22.m(), n_dofs);
+      AssertDimension(M22.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe1.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<dim; ++d)
+                {
+                  const double un1 = fe1.shape_value_component(j,k,d) * fe1.normal_vector(k)[d];
+                  const double un2 =-fe2.shape_value_component(j,k,d) * fe1.normal_vector(k)[d];
+                  const double vn1 = fe1.shape_value_component(i,k,d) * fe1.normal_vector(k)[d];
+                  const double vn2 =-fe2.shape_value_component(i,k,d) * fe1.normal_vector(k)[d];
+
+                  M11(i,j) += dx * un1 * vn1;
+                  M12(i,j) += dx * un2 * vn1;
+                  M21(i,j) += dx * un1 * vn2;
+                  M22(i,j) += dx * un2 * vn2;
+                }
+        }
+    }
+
+    /**
+     * The <i>L</i><sup>2</sup>-norm of the divergence over the quadrature set
+     * determined by the FEValuesBase object.
+     *
+     * The vector is expected to consist of dim vectors of length equal to the
+     * number of quadrature points. The number of components of the finite
+     * element has to be equal to the space dimension.
+     *
+     * @author Guido Kanschat
+     * @date 2013
+     */
+    template <int dim>
+    double norm(const FEValuesBase<dim> &fe,
+                const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Du)
+    {
+      unsigned int fecomp = fe.get_fe().n_components();
+
+      AssertDimension(fecomp, dim);
+      AssertVectorVectorDimension (Du, dim, fe.n_quadrature_points);
+
+      double result = 0;
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          double div = Du[0][k][0];
+          for (unsigned int d=1; d<dim; ++d)
+            div += Du[d][k][d];
+          result += div*div*fe.JxW(k);
+        }
+      return result;
+    }
+
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/elasticity.h b/include/deal.II/integrators/elasticity.h
new file mode 100644
index 0000000..db3ce2d
--- /dev/null
+++ b/include/deal.II/integrators/elasticity.h
@@ -0,0 +1,417 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_elasticity_h
+#define dealii__integrators_elasticity_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/dof_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LocalIntegrators
+{
+  /**
+   * @brief Local integrators related to elasticity problems.
+   *
+   * @ingroup Integrators
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  namespace Elasticity
+  {
+    /**
+     * The linear elasticity operator in weak form, namely double contraction
+     * of symmetric gradients.
+     *
+     * \f[ \int_Z \varepsilon(u): \varepsilon(v)\,dx \f]
+     */
+    template <int dim>
+    inline void cell_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(fe.get_fe().n_components(), dim);
+      AssertDimension(M.m(), n_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d1=0; d1<dim; ++d1)
+                for (unsigned int d2=0; d2<dim; ++d2)
+                  M(i,j) += dx * .25 *
+                            (fe.shape_grad_component(j,k,d1)[d2] + fe.shape_grad_component(j,k,d2)[d1]) *
+                            (fe.shape_grad_component(i,k,d1)[d2] + fe.shape_grad_component(i,k,d2)[d1]);
+        }
+    }
+
+
+    /**
+     * Vector-valued residual operator for linear elasticity in weak form
+     *
+     * \f[ - \int_Z \varepsilon(u): \varepsilon(v) \,dx \f]
+     */
+    template <int dim, typename number>
+    inline void
+    cell_residual  (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &input,
+      double factor = 1.)
+    {
+      const unsigned int nq = fe.n_quadrature_points;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      AssertDimension(fe.get_fe().n_components(), dim);
+
+      AssertVectorVectorDimension(input, dim, fe.n_quadrature_points);
+      Assert(result.size() == n_dofs, ExcDimensionMismatch(result.size(), n_dofs));
+
+      for (unsigned int k=0; k<nq; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int d1=0; d1<dim; ++d1)
+              for (unsigned int d2=0; d2<dim; ++d2)
+                {
+                  result(i) += dx * .25 *
+                               (input[d1][k][d2] + input[d2][k][d1]) *
+                               (fe.shape_grad_component(i,k,d1)[d2] + fe.shape_grad_component(i,k,d2)[d1]);
+                }
+        }
+    }
+
+
+    /**
+     * The weak boundary condition of Nitsche type for linear elasticity:
+     * @f[
+     * \int_F \Bigl(\gamma (u-g) \cdot v - n^T \epsilon(u) v - (u-g) \epsilon(v) n^T\Bigr)\;ds.
+     * @f]
+     */
+    template <int dim>
+    inline void nitsche_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      double penalty,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(fe.get_fe().n_components(), dim);
+      AssertDimension(M.m(), n_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d1=0; d1<dim; ++d1)
+                {
+                  const double u = fe.shape_value_component(j,k,d1);
+                  const double v = fe.shape_value_component(i,k,d1);
+                  M(i,j) += dx * 2. * penalty * u * v;
+                  for (unsigned int d2=0; d2<dim; ++d2)
+                    {
+                      // v . nabla u n
+                      M(i,j) -= .5*dx* fe.shape_grad_component(j,k,d1)[d2] *n[d2]* v;
+                      // v (nabla u)^T n
+                      M(i,j) -= .5*dx* fe.shape_grad_component(j,k,d2)[d1] *n[d2]* v;
+                      // u  nabla v n
+                      M(i,j) -= .5*dx* fe.shape_grad_component(i,k,d1)[d2] *n[d2]* u;
+                      // u (nabla v)^T n
+                      M(i,j) -= .5*dx* fe.shape_grad_component(i,k,d2)[d1] *n[d2]* u;
+                    }
+                }
+        }
+    }
+
+    /**
+     * Weak boundary condition for the elasticity operator by Nitsche, namely
+     * on the face <i>F</i> the vector
+     * @f[
+     * \int_F \Bigl(\gamma (u-g) \cdot v - n^T \epsilon(u) v - (u-g) \epsilon(v) n^T\Bigr)\;ds.
+     * @f]
+     *
+     * Here, <i>u</i> is the finite element function whose values and gradient
+     * are given in the arguments <tt>input</tt> and <tt>Dinput</tt>,
+     * respectively. <i>g</i> is the inhomogeneous boundary value in the
+     * argument <tt>data</tt>. $n$ is the outer normal vector and $\gamma$ is
+     * the usual penalty parameter.
+     *
+     * @author Guido Kanschat
+     * @date 2013
+     */
+    template <int dim, typename number>
+    void nitsche_residual (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<double> > > &input,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Dinput,
+      const VectorSlice<const std::vector<std::vector<double> > > &data,
+      double penalty,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      AssertVectorVectorDimension(input, dim, fe.n_quadrature_points);
+      AssertVectorVectorDimension(Dinput, dim, fe.n_quadrature_points);
+      AssertVectorVectorDimension(data, dim, fe.n_quadrature_points);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int d1=0; d1<dim; ++d1)
+              {
+                const double u= input[d1][k];
+                const double v= fe.shape_value_component(i,k,d1);
+                const double g= data[d1][k];
+                result(i) += dx * 2.*penalty * (u-g) * v;
+
+                for (unsigned int d2=0; d2<dim; ++d2)
+                  {
+                    // v . nabla u n
+                    result(i) -= .5*dx* v * Dinput[d1][k][d2] * n[d2];
+                    // v . (nabla u)^T n
+                    result(i) -= .5*dx* v * Dinput[d2][k][d1] * n[d2];
+                    // u  nabla v n
+                    result(i) -= .5*dx * (u-g) * fe.shape_grad_component(i,k,d1)[d2] * n[d2];
+                    // u  (nabla v)^T n
+                    result(i) -= .5*dx * (u-g) * fe.shape_grad_component(i,k,d2)[d1] * n[d2];
+                  }
+              }
+        }
+    }
+
+    /**
+     * Homogeneous weak boundary condition for the elasticity operator by
+     * Nitsche, namely on the face <i>F</i> the vector
+     * @f[
+     * \int_F \Bigl(\gamma u \cdot v - n^T \epsilon(u) v - u \epsilon(v) n^T\Bigr)\;ds.
+     * @f]
+     *
+     * Here, <i>u</i> is the finite element function whose values and gradient
+     * are given in the arguments <tt>input</tt> and <tt>Dinput</tt>,
+     * respectively. $n$ is the outer normal vector and $\gamma$ is the usual
+     * penalty parameter.
+     *
+     * @author Guido Kanschat
+     * @date 2013
+     */
+    template <int dim, typename number>
+    void nitsche_residual_homogeneous (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<double> > > &input,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Dinput,
+      double penalty,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      AssertVectorVectorDimension(input, dim, fe.n_quadrature_points);
+      AssertVectorVectorDimension(Dinput, dim, fe.n_quadrature_points);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int d1=0; d1<dim; ++d1)
+              {
+                const double u= input[d1][k];
+                const double v= fe.shape_value_component(i,k,d1);
+                result(i) += dx * 2.*penalty * u * v;
+
+                for (unsigned int d2=0; d2<dim; ++d2)
+                  {
+                    // v . nabla u n
+                    result(i) -= .5*dx* v * Dinput[d1][k][d2] * n[d2];
+                    // v . (nabla u)^T n
+                    result(i) -= .5*dx* v * Dinput[d2][k][d1] * n[d2];
+                    // u  nabla v n
+                    result(i) -= .5*dx * u * fe.shape_grad_component(i,k,d1)[d2] * n[d2];
+                    // u  (nabla v)^T n
+                    result(i) -= .5*dx * u * fe.shape_grad_component(i,k,d2)[d1] * n[d2];
+                  }
+              }
+        }
+    }
+
+    /**
+     * The interior penalty flux for symmetric gradients.
+     */
+    template <int dim>
+    inline void ip_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const double pen,
+      const double int_factor = 1.,
+      const double ext_factor = -1.)
+    {
+      const unsigned int n_dofs = fe1.dofs_per_cell;
+
+      AssertDimension(fe1.get_fe().n_components(), dim);
+      AssertDimension(fe2.get_fe().n_components(), dim);
+      AssertDimension(M11.m(), n_dofs);
+      AssertDimension(M11.n(), n_dofs);
+      AssertDimension(M12.m(), n_dofs);
+      AssertDimension(M12.n(), n_dofs);
+      AssertDimension(M21.m(), n_dofs);
+      AssertDimension(M21.n(), n_dofs);
+      AssertDimension(M22.m(), n_dofs);
+      AssertDimension(M22.n(), n_dofs);
+
+      const double nu1 = int_factor;
+      const double nu2 = (ext_factor < 0) ? int_factor : ext_factor;
+      const double penalty = .5 * pen * (nu1 + nu2);
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+          const Tensor<1,dim> n = fe1.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d1=0; d1<dim; ++d1)
+                {
+                  const double u1 = fe1.shape_value_component(j,k,d1);
+                  const double u2 = fe2.shape_value_component(j,k,d1);
+                  const double v1 = fe1.shape_value_component(i,k,d1);
+                  const double v2 = fe2.shape_value_component(i,k,d1);
+
+                  M11(i,j) += dx * penalty * u1*v1;
+                  M12(i,j) -= dx * penalty * u2*v1;
+                  M21(i,j) -= dx * penalty * u1*v2;
+                  M22(i,j) += dx * penalty * u2*v2;
+
+                  for (unsigned int d2=0; d2<dim; ++d2)
+                    {
+                      // v . nabla u n
+                      M11(i,j) -= .25 * dx * nu1 * fe1.shape_grad_component(j,k,d1)[d2] * n[d2] * v1;
+                      M12(i,j) -= .25 * dx * nu2 * fe2.shape_grad_component(j,k,d1)[d2] * n[d2] * v1;
+                      M21(i,j) += .25 * dx * nu1 * fe1.shape_grad_component(j,k,d1)[d2] * n[d2] * v2;
+                      M22(i,j) += .25 * dx * nu2 * fe2.shape_grad_component(j,k,d1)[d2] * n[d2] * v2;
+                      // v (nabla u)^T n
+                      M11(i,j) -= .25 * dx * nu1 * fe1.shape_grad_component(j,k,d2)[d1] * n[d2] * v1;
+                      M12(i,j) -= .25 * dx * nu2 * fe2.shape_grad_component(j,k,d2)[d1] * n[d2] * v1;
+                      M21(i,j) += .25 * dx * nu1 * fe1.shape_grad_component(j,k,d2)[d1] * n[d2] * v2;
+                      M22(i,j) += .25 * dx * nu2 * fe2.shape_grad_component(j,k,d2)[d1] * n[d2] * v2;
+                      // u  nabla v n
+                      M11(i,j) -= .25 * dx * nu1 * fe1.shape_grad_component(i,k,d1)[d2] * n[d2] * u1;
+                      M12(i,j) += .25 * dx * nu1 * fe1.shape_grad_component(i,k,d1)[d2] * n[d2] * u2;
+                      M21(i,j) -= .25 * dx * nu2 * fe2.shape_grad_component(i,k,d1)[d2] * n[d2] * u1;
+                      M22(i,j) += .25 * dx * nu2 * fe2.shape_grad_component(i,k,d1)[d2] * n[d2] * u2;
+                      // u (nabla v)^T n
+                      M11(i,j) -= .25 * dx * nu1 * fe1.shape_grad_component(i,k,d2)[d1] * n[d2] * u1;
+                      M12(i,j) += .25 * dx * nu1 * fe1.shape_grad_component(i,k,d2)[d1] * n[d2] * u2;
+                      M21(i,j) -= .25 * dx * nu2 * fe2.shape_grad_component(i,k,d2)[d1] * n[d2] * u1;
+                      M22(i,j) += .25 * dx * nu2 * fe2.shape_grad_component(i,k,d2)[d1] * n[d2] * u2;
+                    }
+                }
+        }
+    }
+    /**
+     * Elasticity residual term for the symmetric interior penalty method.
+     *
+     * @author Guido Kanschat
+     * @date 2013
+     */
+    template<int dim, typename number>
+    void
+    ip_residual(
+      Vector<number> &result1,
+      Vector<number> &result2,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const VectorSlice<const std::vector<std::vector<double> > > &input1,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Dinput1,
+      const VectorSlice<const std::vector<std::vector<double> > > &input2,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Dinput2,
+      double pen,
+      double int_factor = 1.,
+      double ext_factor = -1.)
+    {
+      const unsigned int n1 = fe1.dofs_per_cell;
+
+      AssertDimension(fe1.get_fe().n_components(), dim);
+      AssertDimension(fe2.get_fe().n_components(), dim);
+      AssertVectorVectorDimension(input1, dim, fe1.n_quadrature_points);
+      AssertVectorVectorDimension(Dinput1, dim, fe1.n_quadrature_points);
+      AssertVectorVectorDimension(input2, dim, fe2.n_quadrature_points);
+      AssertVectorVectorDimension(Dinput2, dim, fe2.n_quadrature_points);
+
+      const double nu1 = int_factor;
+      const double nu2 = (ext_factor < 0) ? int_factor : ext_factor;
+      const double penalty = .5 * pen * (nu1 + nu2);
+
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+          const Tensor<1,dim> n = fe1.normal_vector(k);
+
+          for (unsigned int i=0; i<n1; ++i)
+            for (unsigned int d1=0; d1<dim; ++d1)
+              {
+                const double v1 = fe1.shape_value_component(i,k,d1);
+                const double v2 = fe2.shape_value_component(i,k,d1);
+                const double u1 = input1[d1][k];
+                const double u2 = input2[d1][k];
+
+                result1(i) += dx * penalty * u1*v1;
+                result1(i) -= dx * penalty * u2*v1;
+                result2(i) -= dx * penalty * u1*v2;
+                result2(i) += dx * penalty * u2*v2;
+
+                for (unsigned int d2=0; d2<dim; ++d2)
+                  {
+                    // v . nabla u n
+                    result1(i) -= .25*dx* (nu1*Dinput1[d1][k][d2]+nu2*Dinput2[d1][k][d2]) * n[d2] * v1;
+                    result2(i) += .25*dx* (nu1*Dinput1[d1][k][d2]+nu2*Dinput2[d1][k][d2]) * n[d2] * v2;
+                    // v . (nabla u)^T n
+                    result1(i) -= .25*dx* (nu1*Dinput1[d2][k][d1]+nu2*Dinput2[d2][k][d1]) * n[d2] * v1;
+                    result2(i) += .25*dx* (nu1*Dinput1[d2][k][d1]+nu2*Dinput2[d2][k][d1]) * n[d2] * v2;
+                    // u  nabla v n
+                    result1(i) -= .25*dx* nu1*fe1.shape_grad_component(i,k,d1)[d2] * n[d2] * (u1-u2);
+                    result2(i) -= .25*dx* nu2*fe2.shape_grad_component(i,k,d1)[d2] * n[d2] * (u1-u2);
+                    // u  (nabla v)^T n
+                    result1(i) -= .25*dx* nu1*fe1.shape_grad_component(i,k,d2)[d1] * n[d2] * (u1-u2);
+                    result2(i) -= .25*dx* nu2*fe2.shape_grad_component(i,k,d2)[d1] * n[d2] * (u1-u2);
+                  }
+              }
+        }
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/l2.h b/include/deal.II/integrators/l2.h
new file mode 100644
index 0000000..2a8fcd4
--- /dev/null
+++ b/include/deal.II/integrators/l2.h
@@ -0,0 +1,260 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_l2_h
+#define dealii__integrators_l2_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/dof_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LocalIntegrators
+{
+  /**
+   * @brief Local integrators related to <i>L<sup>2</sup></i>-inner products.
+   *
+   * @ingroup Integrators
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  namespace L2
+  {
+    /**
+     * The mass matrix for scalar or vector values finite elements. \f[ \int_Z
+     * uv\,dx \quad \text{or} \quad \int_Z \mathbf u\cdot \mathbf v\,dx \f]
+     *
+     * Likewise, this term can be used on faces, where it computes  the
+     * integrals \f[ \int_F uv\,ds \quad \text{or} \quad \int_F \mathbf u\cdot
+     * \mathbf v\,ds \f]
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim>
+    void mass_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_components = fe.get_fe().n_components();
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = fe.JxW(k) * factor;
+          for (unsigned int i=0; i<n_dofs; ++i)
+            {
+              double Mii = 0.0;
+              for (unsigned int d=0; d<n_components; ++d)
+                Mii += dx
+                       * fe.shape_value_component(i,k,d)
+                       * fe.shape_value_component(i,k,d);
+
+              M(i,i) += Mii;
+
+              for (unsigned int j=i+1; j<n_dofs; ++j)
+                {
+                  double Mij = 0.0;
+                  for (unsigned int d=0; d<n_components; ++d)
+                    Mij += dx
+                           * fe.shape_value_component(j,k,d)
+                           * fe.shape_value_component(i,k,d);
+
+                  M(i,j) += Mij;
+                  M(j,i) += Mij;
+                }
+            }
+        }
+    }
+
+    /**
+     * The weighted mass matrix for scalar or vector values finite elements.
+     * \f[ \int_Z \omega(x) uv\,dx \quad \text{or} \quad \int_Z \omega(x)
+     * \mathbf u\cdot \mathbf v\,dx \f]
+     *
+     * Likewise, this term can be used on faces, where it computes  the
+     * integrals \f[ \int_F \omega(x) uv\,ds \quad \text{or} \quad \int_F
+     * \omega(x) \mathbf u\cdot \mathbf v\,ds \f]
+     *
+     * The size of the vector <tt>weights</tt> must be equal to the number of
+     * quadrature points in the finite element.
+     *
+     * @author Guido Kanschat
+     * @date 2014
+     */
+    template <int dim>
+    void weighted_mass_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const std::vector<double> weights)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_components = fe.get_fe().n_components();
+      AssertDimension(M.m(), n_dofs);
+      AssertDimension(M.n(), n_dofs);
+      AssertDimension(weights.size(), fe.n_quadrature_points);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = fe.JxW(k) * weights[k];
+          for (unsigned int i=0; i<n_dofs; ++i)
+            {
+              double Mii = 0.0;
+              for (unsigned int d=0; d<n_components; ++d)
+                Mii += dx
+                       * fe.shape_value_component(i,k,d)
+                       * fe.shape_value_component(i,k,d);
+
+              M(i,i) += Mii;
+
+              for (unsigned int j=i+1; j<n_dofs; ++j)
+                {
+                  double Mij = 0.0;
+                  for (unsigned int d=0; d<n_components; ++d)
+                    Mij += dx
+                           * fe.shape_value_component(j,k,d)
+                           * fe.shape_value_component(i,k,d);
+
+                  M(i,j) += Mij;
+                  M(j,i) += Mij;
+                }
+            }
+        }
+    }
+
+    /**
+     * <i>L<sup>2</sup></i>-inner product for scalar functions.
+     *
+     * \f[ \int_Z fv\,dx \quad \text{or} \quad \int_F fv\,ds \f]
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim, typename number>
+    void L2 (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fe,
+      const std::vector<double> &input,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      AssertDimension(result.size(), n_dofs);
+      AssertDimension(fe.get_fe().n_components(), 1);
+      AssertDimension(input.size(), fe.n_quadrature_points);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        for (unsigned int i=0; i<n_dofs; ++i)
+          result(i) += fe.JxW(k) * factor * input[k] * fe.shape_value(i,k);
+    }
+
+    /**
+     * <i>L<sup>2</sup></i>-inner product for a slice of a vector valued right
+     * hand side. \f[ \int_Z \mathbf f\cdot \mathbf v\,dx \quad \text{or}
+     * \quad \int_F \mathbf f\cdot \mathbf v\,ds \f]
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim, typename number>
+    void L2 (
+      Vector<number> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<double> > > &input,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int fe_components = fe.get_fe().n_components();
+      const unsigned int n_components = input.size();
+
+      AssertDimension(result.size(), n_dofs);
+      AssertDimension(input.size(), fe_components);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        for (unsigned int i=0; i<n_dofs; ++i)
+          for (unsigned int d=0; d<n_components; ++d)
+            result(i) += fe.JxW(k) * factor * fe.shape_value_component(i,k,d) * input[d][k];
+    }
+
+    /**
+     * The jump matrix between two cells for scalar or vector values finite
+     * elements. Note that the factor $\gamma$ can be used to implement
+     * weighted jumps. \f[ \int_F [\gamma u][\gamma v]\,ds \quad \text{or}
+     * \int_F [\gamma \mathbf u]\cdot [\gamma \mathbf v]\,ds \f]
+     *
+     * Using appropriate weights, this term can be used to penalize violation
+     * of conformity in <i>H<sup>1</sup></i>.
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim>
+    void jump_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const double factor1 = 1.,
+      const double factor2 = 1.)
+    {
+      const unsigned int n1_dofs = fe1.dofs_per_cell;
+      const unsigned int n2_dofs = fe2.dofs_per_cell;
+      const unsigned int n_components = fe1.get_fe().n_components();
+
+      Assert(n1_dofs == n2_dofs, ExcNotImplemented());
+      AssertDimension(n_components, fe2.get_fe().n_components());
+      AssertDimension(M11.m(), n1_dofs);
+      AssertDimension(M12.m(), n1_dofs);
+      AssertDimension(M21.m(), n2_dofs);
+      AssertDimension(M22.m(), n2_dofs);
+      AssertDimension(M11.n(), n1_dofs);
+      AssertDimension(M12.n(), n2_dofs);
+      AssertDimension(M21.n(), n1_dofs);
+      AssertDimension(M22.n(), n2_dofs);
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+
+          for (unsigned int i=0; i<n1_dofs; ++i)
+            for (unsigned int j=0; j<n1_dofs; ++j)
+              for (unsigned int d=0; d<n_components; ++d)
+                {
+                  const double u1 = factor1*fe1.shape_value_component(j,k,d);
+                  const double u2 =-factor2*fe2.shape_value_component(j,k,d);
+                  const double v1 = factor1*fe1.shape_value_component(i,k,d);
+                  const double v2 =-factor2*fe2.shape_value_component(i,k,d);
+
+                  M11(i,j) += dx * u1*v1;
+                  M12(i,j) += dx * u2*v1;
+                  M21(i,j) += dx * u1*v2;
+                  M22(i,j) += dx * u2*v2;
+                }
+        }
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/laplace.h b/include/deal.II/integrators/laplace.h
new file mode 100644
index 0000000..7379965
--- /dev/null
+++ b/include/deal.II/integrators/laplace.h
@@ -0,0 +1,635 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_laplace_h
+#define dealii__integrators_laplace_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/dof_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LocalIntegrators
+{
+  /**
+   * @brief Local integrators related to the Laplacian and its DG formulations
+   *
+   * @ingroup Integrators
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  namespace Laplace
+  {
+    /**
+     * Laplacian in weak form, namely on the cell <i>Z</i> the matrix \f[
+     * \int_Z \nu \nabla u \cdot \nabla v \, dx. \f]
+     *
+     * The FiniteElement in <tt>fe</tt> may be scalar or vector valued. In the
+     * latter case, the Laplacian is applied to each component separately.
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template<int dim>
+    void cell_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_components = fe.get_fe().n_components();
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = fe.JxW(k) * factor;
+          for (unsigned int i=0; i<n_dofs; ++i)
+            {
+              double Mii = 0.0;
+              for (unsigned int d=0; d<n_components; ++d)
+                Mii += dx *
+                       (fe.shape_grad_component(i,k,d) * fe.shape_grad_component(i,k,d));
+
+              M(i,i) += Mii;
+
+              for (unsigned int j=i+1; j<n_dofs; ++j)
+                {
+                  double Mij = 0.0;
+                  for (unsigned int d=0; d<n_components; ++d)
+                    Mij += dx *
+                           (fe.shape_grad_component(j,k,d) * fe.shape_grad_component(i,k,d));
+
+                  M(i,j) += Mij;
+                  M(j,i) += Mij;
+                }
+            }
+        }
+
+    }
+
+    /**
+     * Laplacian residual operator in weak form
+     *
+     * \f[ \int_Z \nu \nabla u \cdot \nabla v \, dx. \f]
+     */
+    template <int dim>
+    inline void
+    cell_residual  (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const std::vector<Tensor<1,dim> > &input,
+      double factor = 1.)
+    {
+      const unsigned int nq = fe.n_quadrature_points;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      Assert(input.size() == nq, ExcDimensionMismatch(input.size(), nq));
+      Assert(result.size() == n_dofs, ExcDimensionMismatch(result.size(), n_dofs));
+
+      for (unsigned int k=0; k<nq; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            result(i) += dx * (input[k] * fe.shape_grad(i,k));
+        }
+    }
+
+
+    /**
+     * Vector-valued Laplacian residual operator in weak form
+     *
+     * \f[ \int_Z \nu \nabla u : \nabla v \, dx. \f]
+     */
+    template <int dim>
+    inline void
+    cell_residual  (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &input,
+      double factor = 1.)
+    {
+      const unsigned int nq = fe.n_quadrature_points;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_comp = fe.get_fe().n_components();
+
+      AssertVectorVectorDimension(input, n_comp, fe.n_quadrature_points);
+      Assert(result.size() == n_dofs, ExcDimensionMismatch(result.size(), n_dofs));
+
+      for (unsigned int k=0; k<nq; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int d=0; d<n_comp; ++d)
+              {
+
+                result(i) += dx * (input[d][k] * fe.shape_grad_component(i,k,d));
+              }
+        }
+    }
+
+
+    /**
+     * Weak boundary condition of Nitsche type for the Laplacian, namely on
+     * the face <i>F</i> the matrix
+     * @f[
+     * \int_F \Bigl(\gamma u v - \partial_n u v - u \partial_n v\Bigr)\;ds.
+     * @f]
+     *
+     * Here, $\gamma$ is the <tt>penalty</tt> parameter suitably computed with
+     * compute_penalty().
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim>
+    void nitsche_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      double penalty,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_comp = fe.get_fe().n_components();
+
+      Assert (M.m() == n_dofs, ExcDimensionMismatch(M.m(), n_dofs));
+      Assert (M.n() == n_dofs, ExcDimensionMismatch(M.n(), n_dofs));
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = fe.JxW(k) * factor;
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<n_comp; ++d)
+                M(i,j) += dx *
+                          (2. * fe.shape_value_component(i,k,d) * penalty * fe.shape_value_component(j,k,d)
+                           - (n * fe.shape_grad_component(i,k,d)) * fe.shape_value_component(j,k,d)
+                           - (n * fe.shape_grad_component(j,k,d)) * fe.shape_value_component(i,k,d));
+        }
+    }
+
+    /**
+     * Weak boundary condition for the Laplace operator by Nitsche, scalar
+     * version, namely on the face <i>F</i> the vector
+     * @f[
+     * \int_F \Bigl(\gamma (u-g) v - \partial_n u v - (u-g) \partial_n v\Bigr)\;ds.
+     * @f]
+     *
+     * Here, <i>u</i> is the finite element function whose values and gradient
+     * are given in the arguments <tt>input</tt> and <tt>Dinput</tt>,
+     * respectively. <i>g</i> is the inhomogeneous boundary value in the
+     * argument <tt>data</tt>. $\gamma$ is the usual penalty parameter.
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim>
+    void nitsche_residual (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const std::vector<double> &input,
+      const std::vector<Tensor<1,dim> > &Dinput,
+      const std::vector<double> &data,
+      double penalty,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      AssertDimension(input.size(), fe.n_quadrature_points);
+      AssertDimension(Dinput.size(), fe.n_quadrature_points);
+      AssertDimension(data.size(), fe.n_quadrature_points);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            {
+              const double dnv = fe.shape_grad(i,k) * n;
+              const double dnu = Dinput[k] * n;
+              const double v= fe.shape_value(i,k);
+              const double u= input[k];
+              const double g= data[k];
+
+              result(i) += dx*(2.*penalty*(u-g)*v - dnv*(u-g) - dnu*v);
+            }
+        }
+    }
+
+    /**
+     * Weak boundary condition for the Laplace operator by Nitsche, vector
+     * valued version, namely on the face <i>F</i> the vector
+     * @f[
+     * \int_F \Bigl(\gamma (\mathbf u- \mathbf g) \cdot \mathbf v
+     * - \partial_n \mathbf u \cdot \mathbf v
+     * - (\mathbf u-\mathbf g) \cdot \partial_n \mathbf v\Bigr)\;ds.
+     * @f]
+     *
+     * Here, <i>u</i> is the finite element function whose values and gradient
+     * are given in the arguments <tt>input</tt> and <tt>Dinput</tt>,
+     * respectively. <i>g</i> is the inhomogeneous boundary value in the
+     * argument <tt>data</tt>. $\gamma$ is the usual penalty parameter.
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim>
+    void nitsche_residual (
+      Vector<double> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<double> > > &input,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Dinput,
+      const VectorSlice<const std::vector<std::vector<double> > > &data,
+      double penalty,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int n_comp = fe.get_fe().n_components();
+      AssertVectorVectorDimension(input, n_comp, fe.n_quadrature_points);
+      AssertVectorVectorDimension(Dinput, n_comp, fe.n_quadrature_points);
+      AssertVectorVectorDimension(data, n_comp, fe.n_quadrature_points);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int d=0; d<n_comp; ++d)
+              {
+                const double dnv = fe.shape_grad_component(i,k,d) * n;
+                const double dnu = Dinput[d][k] * n;
+                const double v= fe.shape_value_component(i,k,d);
+                const double u= input[d][k];
+                const double g= data[d][k];
+
+                result(i) += dx*(2.*penalty*(u-g)*v - dnv*(u-g) - dnu*v);
+              }
+        }
+    }
+
+    /**
+     * Flux for the interior penalty method for the Laplacian, namely on the
+     * face <i>F</i> the matrices associated with the bilinear form
+     * @f[
+     * \int_F \Bigl( \gamma [u][v] - \{\nabla u\}[v\mathbf n] - [u\mathbf
+     * n]\{\nabla v\} \Bigr) \; ds.
+     * @f]
+     *
+     * The penalty parameter should always be the mean value of the penalties
+     * needed for stability on each side. In the case of constant
+     * coefficients, it can be computed using compute_penalty().
+     *
+     * If <tt>factor2</tt> is missing or negative, the factor is assumed the
+     * same on both sides. If factors differ, note that the penalty parameter
+     * has to be computed accordingly.
+     *
+     * @author Guido Kanschat
+     * @date 2008, 2009, 2010
+     */
+    template <int dim>
+    void ip_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      double penalty,
+      double factor1 = 1.,
+      double factor2 = -1.)
+    {
+      const unsigned int n_dofs = fe1.dofs_per_cell;
+      AssertDimension(M11.n(), n_dofs);
+      AssertDimension(M11.m(), n_dofs);
+      AssertDimension(M12.n(), n_dofs);
+      AssertDimension(M12.m(), n_dofs);
+      AssertDimension(M21.n(), n_dofs);
+      AssertDimension(M21.m(), n_dofs);
+      AssertDimension(M22.n(), n_dofs);
+      AssertDimension(M22.m(), n_dofs);
+
+      const double nui = factor1;
+      const double nue = (factor2 < 0) ? factor1 : factor2;
+      const double nu = .5*(nui+nue);
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+          const Tensor<1,dim> n = fe1.normal_vector(k);
+          for (unsigned int d=0; d<fe1.get_fe().n_components(); ++d)
+            {
+              for (unsigned int i=0; i<n_dofs; ++i)
+                {
+                  for (unsigned int j=0; j<n_dofs; ++j)
+                    {
+                      const double vi = fe1.shape_value_component(i,k,d);
+                      const double dnvi = n * fe1.shape_grad_component(i,k,d);
+                      const double ve = fe2.shape_value_component(i,k,d);
+                      const double dnve = n * fe2.shape_grad_component(i,k,d);
+                      const double ui = fe1.shape_value_component(j,k,d);
+                      const double dnui = n * fe1.shape_grad_component(j,k,d);
+                      const double ue = fe2.shape_value_component(j,k,d);
+                      const double dnue = n * fe2.shape_grad_component(j,k,d);
+                      M11(i,j) += dx*(-.5*nui*dnvi*ui-.5*nui*dnui*vi+nu*penalty*ui*vi);
+                      M12(i,j) += dx*( .5*nui*dnvi*ue-.5*nue*dnue*vi-nu*penalty*vi*ue);
+                      M21(i,j) += dx*(-.5*nue*dnve*ui+.5*nui*dnui*ve-nu*penalty*ui*ve);
+                      M22(i,j) += dx*( .5*nue*dnve*ue+.5*nue*dnue*ve+nu*penalty*ue*ve);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Flux for the interior penalty method for the Laplacian applied to the
+     * tangential components of a vector field, namely on the face <i>F</i>
+     * the matrices associated with the bilinear form
+     * @f[
+     * \int_F \Bigl( \gamma [u_\tau][v_\tau] - \{\nabla u_\tau\}[v_\tau\mathbf n] - [u_\tau\mathbf
+     * n]\{\nabla v_\tau\} \Bigr) \; ds.
+     * @f]
+     *
+     * @warning This function is still under development!
+     *
+     * @author Bärbel Janssen, Guido Kanschat
+     * @date 2013
+     */
+    template <int dim>
+    void ip_tangential_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      double penalty,
+      double factor1 = 1.,
+      double factor2 = -1.)
+    {
+      const unsigned int n_dofs = fe1.dofs_per_cell;
+      AssertDimension(fe1.get_fe().n_components(), dim);
+      AssertDimension(fe2.get_fe().n_components(), dim);
+      AssertDimension(M11.n(), n_dofs);
+      AssertDimension(M11.m(), n_dofs);
+      AssertDimension(M12.n(), n_dofs);
+      AssertDimension(M12.m(), n_dofs);
+      AssertDimension(M21.n(), n_dofs);
+      AssertDimension(M21.m(), n_dofs);
+      AssertDimension(M22.n(), n_dofs);
+      AssertDimension(M22.m(), n_dofs);
+
+      const double nui = factor1;
+      const double nue = (factor2 < 0) ? factor1 : factor2;
+      const double nu = .5*(nui+nue);
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+          const Tensor<1,dim> n = fe1.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            {
+              for (unsigned int j=0; j<n_dofs; ++j)
+                {
+                  double u1dotn = 0.;
+                  double v1dotn = 0.;
+                  double u2dotn = 0.;
+                  double v2dotn = 0.;
+
+                  double ngradu1n = 0.;
+                  double ngradv1n = 0.;
+                  double ngradu2n = 0.;
+                  double ngradv2n = 0.;
+
+                  for (unsigned int d=0; d<dim; ++d)
+                    {
+                      u1dotn += n[d]*fe1.shape_value_component(j,k,d);
+                      v1dotn += n[d]*fe1.shape_value_component(i,k,d);
+                      u2dotn += n[d]*fe2.shape_value_component(j,k,d);
+                      v2dotn += n[d]*fe2.shape_value_component(i,k,d);
+
+                      ngradu1n += n*fe1.shape_grad_component(j,k,d)*n[d];
+                      ngradv1n += n*fe1.shape_grad_component(i,k,d)*n[d];
+                      ngradu2n += n*fe2.shape_grad_component(j,k,d)*n[d];
+                      ngradv2n += n*fe2.shape_grad_component(i,k,d)*n[d];
+                    }
+
+                  for (unsigned int d=0; d<fe1.get_fe().n_components(); ++d)
+                    {
+                      const double vi = fe1.shape_value_component(i,k,d)-v1dotn*n[d];
+                      const double dnvi = n * fe1.shape_grad_component(i,k,d)-ngradv1n*n[d];
+
+                      const double ve = fe2.shape_value_component(i,k,d)-v2dotn*n[d];
+                      const double dnve = n * fe2.shape_grad_component(i,k,d)-ngradv2n*n[d];
+
+                      const double ui = fe1.shape_value_component(j,k,d)-u1dotn*n[d];
+                      const double dnui = n * fe1.shape_grad_component(j,k,d)-ngradu1n*n[d];
+
+                      const double ue = fe2.shape_value_component(j,k,d)-u2dotn*n[d];
+                      const double dnue = n * fe2.shape_grad_component(j,k,d)-ngradu2n*n[d];
+
+                      M11(i,j) += dx*(-.5*nui*dnvi*ui-.5*nui*dnui*vi+nu*penalty*ui*vi);
+                      M12(i,j) += dx*( .5*nui*dnvi*ue-.5*nue*dnue*vi-nu*penalty*vi*ue);
+                      M21(i,j) += dx*(-.5*nue*dnve*ui+.5*nui*dnui*ve-nu*penalty*ui*ve);
+                      M22(i,j) += dx*( .5*nue*dnve*ue+.5*nue*dnue*ve+nu*penalty*ue*ve);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Residual term for the symmetric interior penalty method:
+     * @f[
+     * \int_F \Bigl( \gamma [u][v] - \{\nabla u\}[v\mathbf n] - [u\mathbf
+     * n]\{\nabla v\} \Bigr) \; ds.
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2012
+     */
+    template<int dim>
+    void
+    ip_residual(
+      Vector<double> &result1,
+      Vector<double> &result2,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const std::vector<double> &input1,
+      const std::vector<Tensor<1,dim> > &Dinput1,
+      const std::vector<double> &input2,
+      const std::vector<Tensor<1,dim> > &Dinput2,
+      double pen,
+      double int_factor = 1.,
+      double ext_factor = -1.)
+    {
+      Assert(fe1.get_fe().n_components() == 1,
+             ExcDimensionMismatch(fe1.get_fe().n_components(), 1));
+      Assert(fe2.get_fe().n_components() == 1,
+             ExcDimensionMismatch(fe2.get_fe().n_components(), 1));
+
+      const double nui = int_factor;
+      const double nue = (ext_factor < 0) ? int_factor : ext_factor;
+      const double penalty = .5 * pen * (nui + nue);
+
+      const unsigned int n_dofs = fe1.dofs_per_cell;
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+          const Tensor<1,dim> n = fe1.normal_vector(k);
+
+          for (unsigned int i=0; i<n_dofs; ++i)
+            {
+              const double vi = fe1.shape_value(i,k);
+              const Tensor<1,dim> &Dvi = fe1.shape_grad(i,k);
+              const double dnvi = Dvi * n;
+              const double ve = fe2.shape_value(i,k);
+              const Tensor<1,dim> &Dve = fe2.shape_grad(i,k);
+              const double dnve = Dve * n;
+
+              const double ui = input1[k];
+              const Tensor<1,dim> &Dui = Dinput1[k];
+              const double dnui = Dui * n;
+              const double ue = input2[k];
+              const Tensor<1,dim> &Due = Dinput2[k];
+              const double dnue = Due * n;
+
+              result1(i) += dx*(-.5*nui*dnvi*ui-.5*nui*dnui*vi+penalty*ui*vi);
+              result1(i) += dx*( .5*nui*dnvi*ue-.5*nue*dnue*vi-penalty*vi*ue);
+              result2(i) += dx*(-.5*nue*dnve*ui+.5*nui*dnui*ve-penalty*ui*ve);
+              result2(i) += dx*( .5*nue*dnve*ue+.5*nue*dnue*ve+penalty*ue*ve);
+            }
+        }
+    }
+
+
+    /**
+     * Vector-valued residual term for the symmetric interior penalty method:
+     * @f[
+     * \int_F \Bigl( \gamma [\mathbf u]\cdot[\mathbf v]
+     * - \{\nabla \mathbf u\}[\mathbf v\otimes \mathbf n]
+     * - [\mathbf u\otimes \mathbf n]\{\nabla \mathbf v\} \Bigr) \; ds.
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2012
+     */
+    template<int dim>
+    void
+    ip_residual(
+      Vector<double> &result1,
+      Vector<double> &result2,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const VectorSlice<const std::vector<std::vector<double> > > &input1,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Dinput1,
+      const VectorSlice<const std::vector<std::vector<double> > > &input2,
+      const VectorSlice<const std::vector<std::vector<Tensor<1,dim> > > > &Dinput2,
+      double pen,
+      double int_factor = 1.,
+      double ext_factor = -1.)
+    {
+      const unsigned int n_comp = fe1.get_fe().n_components();
+      const unsigned int n1 = fe1.dofs_per_cell;
+
+      AssertVectorVectorDimension(input1, n_comp, fe1.n_quadrature_points);
+      AssertVectorVectorDimension(Dinput1, n_comp, fe1.n_quadrature_points);
+      AssertVectorVectorDimension(input2, n_comp, fe2.n_quadrature_points);
+      AssertVectorVectorDimension(Dinput2, n_comp, fe2.n_quadrature_points);
+
+      const double nui = int_factor;
+      const double nue = (ext_factor < 0) ? int_factor : ext_factor;
+      const double penalty = .5 * pen * (nui + nue);
+
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+          const Tensor<1,dim> n = fe1.normal_vector(k);
+
+          for (unsigned int i=0; i<n1; ++i)
+            for (unsigned int d=0; d<n_comp; ++d)
+              {
+                const double vi = fe1.shape_value_component(i,k,d);
+                const Tensor<1,dim> &Dvi = fe1.shape_grad_component(i,k,d);
+                const double dnvi = Dvi * n;
+                const double ve = fe2.shape_value_component(i,k,d);
+                const Tensor<1,dim> &Dve = fe2.shape_grad_component(i,k,d);
+                const double dnve = Dve * n;
+
+                const double ui = input1[d][k];
+                const Tensor<1,dim> &Dui = Dinput1[d][k];
+                const double dnui = Dui * n;
+                const double ue = input2[d][k];
+                const Tensor<1,dim> &Due = Dinput2[d][k];
+                const double dnue = Due * n;
+
+                result1(i) += dx*(-.5*nui*dnvi*ui-.5*nui*dnui*vi+penalty*ui*vi);
+                result1(i) += dx*( .5*nui*dnvi*ue-.5*nue*dnue*vi-penalty*vi*ue);
+                result2(i) += dx*(-.5*nue*dnve*ui+.5*nui*dnui*ve-penalty*ui*ve);
+                result2(i) += dx*( .5*nue*dnve*ue+.5*nue*dnue*ve+penalty*ue*ve);
+              }
+        }
+    }
+
+
+
+    /**
+     * Auxiliary function computing the penalty parameter for interior penalty
+     * methods on rectangles.
+     *
+     * Computation is done in two steps: first, we compute on each cell
+     * <i>Z<sub>i</sub></i> the value <i>P<sub>i</sub> =
+     * p<sub>i</sub>(p<sub>i</sub>+1)/h<sub>i</sub></i>, where
+     * <i>p<sub>i</sub></i> is the polynomial degree on cell
+     * <i>Z<sub>i</sub></i> and <i>h<sub>i</sub></i> is the length of
+     * <i>Z<sub>i</sub></i> orthogonal to the current face.
+     *
+     * @author Guido Kanschat
+     * @date 2010
+     */
+    template <int dim, int spacedim, typename number>
+    double compute_penalty(
+      const MeshWorker::DoFInfo<dim,spacedim,number> &dinfo1,
+      const MeshWorker::DoFInfo<dim,spacedim,number> &dinfo2,
+      unsigned int deg1,
+      unsigned int deg2)
+    {
+      const unsigned int normal1 = GeometryInfo<dim>::unit_normal_direction[dinfo1.face_number];
+      const unsigned int normal2 = GeometryInfo<dim>::unit_normal_direction[dinfo2.face_number];
+      const unsigned int deg1sq = (deg1 == 0) ? 1 : deg1 * (deg1+1);
+      const unsigned int deg2sq = (deg2 == 0) ? 1 : deg2 * (deg2+1);
+
+      double penalty1 = deg1sq / dinfo1.cell->extent_in_direction(normal1);
+      double penalty2 = deg2sq / dinfo2.cell->extent_in_direction(normal2);
+      if (dinfo1.cell->has_children() ^ dinfo2.cell->has_children())
+        {
+          Assert (dinfo1.face == dinfo2.face, ExcInternalError());
+          Assert (dinfo1.face->has_children(), ExcInternalError());
+          penalty1 *= 2;
+        }
+      const double penalty = 0.5*(penalty1 + penalty2);
+      return penalty;
+    }
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/local_integrators.h b/include/deal.II/integrators/local_integrators.h
new file mode 100644
index 0000000..bcb4ca2
--- /dev/null
+++ b/include/deal.II/integrators/local_integrators.h
@@ -0,0 +1,153 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_local_integrators_h
+#define dealii__integrators_local_integrators_h
+
+// This file only provides definition and documentation of the
+// namespace LocalIntegrators. There is no necessity to include it
+// anywhere in a C++ code. Only doxygen will make use of it.
+
+#include <deal.II/base/config.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * @brief Library of integrals over cells and faces
+ *
+ * This namespace contains application specific local integrals for bilinear
+ * forms, forms and error estimates. It is a collection of functions organized
+ * into namespaces devoted to certain applications. For instance, the
+ * namespace Laplace contains functions for computing cell matrices and cell
+ * residuals for the Laplacian operator, as well as functions for the weak
+ * boundary conditions by Nitsche or the interior penalty discontinuous
+ * Galerkin method. The namespace Maxwell does the same for curl-curl type
+ * problems.
+ *
+ * The namespace L2 contains functions for mass matrices and
+ * <i>L<sup>2</sup></i>-inner products.
+ *
+ * <h3>Notational conventions</h3>
+ *
+ * In most cases, the action of a function in this namespace can be described
+ * by a single integral. We distinguish between integrals over cells <i>Z</i>
+ * and over faces <i>F</i>. If an integral is denoted as
+ * @f[
+ *   \int_Z u \otimes v \,dx,
+ * @f]
+ * it will yield the following results, depending on the type of operation
+ * <ul>
+ * <li> If the function returns a matrix, the entry at position <i>(i,j)</i>
+ * will be the integrated product of test function <i>v<sub>i</sub></i> and
+ * trial function <i>u<sub>j</sub></i> (note the reversion of indices)</li>
+ * <li> If the function returns a vector, then the vector entry at position
+ * <i>i</i> will be the integrated product of the given function <i>u</i> with
+ * the test function <i>v<sub>i</sub></i>.</li>
+ * <li> If the function returns a number, then this number is the integral of
+ * the two given functions <i>u</i> and <i>v</i>.
+ * </ul>
+ *
+ * We will use regular cursive symbols $u$ for scalars and bold symbols
+ * $\mathbf u$ for vectors. Test functions are always <i>v</i> and trial
+ * functions are always <i>u</i>. Parameters are Greek and the face normal
+ * vectors are $\mathbf n = \mathbf n_1 = -\mathbf n_2$.
+ *
+ * <h3>Signature of functions</h3>
+ *
+ * Functions in this namespace follow a generic signature. In the simplest
+ * case, you have two related functions
+ * @code
+ *   template <int dim>
+ *   void
+ *   cell_matrix (
+ *     FullMatrix<double>& M,
+ *     const FEValuesBase<dim>& fe,
+ *     const double factor = 1.);
+ *
+ *   template <int dim>
+ *   void
+ *   cell_residual (
+ *     BlockVector<double>* v,
+ *     const FEValuesBase<dim>& fe,
+ *     const std::vector<Tensor<1,dim> >& input,
+ *     const double factor = 1.);
+ * @endcode
+ *
+ * There is typically a pair of functions for the same operator, the function
+ * <tt>cell_residual</tt> implementing the mapping of the operator from the
+ * finite element space into its dual, and the function <tt>cell_matrix</tt>
+ * generating the bilinear form corresponding to the Frechet derivative of
+ * <tt>cell_residual</tt>.
+ *
+ * The first argument of these functions is the return type, which is
+ * <ul>
+ * <li> FullMatrix<double> for matrices
+ * <li> BlockVector&ltdouble> for vectors
+ * </ul>
+ *
+ * The next argument is the FEValuesBase object representing the finite
+ * element for integration. If the integrated operator maps from one finite
+ * element space into the dual of another (for instance an off-diagonal matrix
+ * in a block system), then first the FEValuesBase for the trial space and
+ * after this the one for the test space are specified.
+ *
+ * This list is followed by the set of required data in the order
+ * <ol>
+ * <li> Data vectors from finite element functions
+ * <li> Data vectors from other objects
+ * <li> Additional data
+ * <li> A factor which is multiplied with the whole result
+ * </ol>
+ *
+ * <h3>Usage</h3>
+ *
+ * The local integrators can be used wherever a local integration loop would
+ * have been implemented instead. The following example is from the
+ * implementation of a Stokes solver, using
+ * MeshWorker::Assembler::LocalBlocksToGlobalBlocks. The matrices are
+ * <ul>
+ * <li> 0: The vector Laplacian for the velocity (here with a vector valued
+ * element)
+ * <li> 1: The divergence matrix
+ * <li> 2: The pressure mass matrix used in the preconditioner
+ * </ul>
+ *
+ * With these matrices, the function called by MeshWorker::loop() could be
+ * written like
+ * @code
+ * using namespace ::dealii:: LocalIntegrators;
+ *
+ * template <int dim>
+ * void MatrixIntegrator<dim>::cell(
+ * MeshWorker::DoFInfo<dim>& dinfo,
+ * typename MeshWorker::IntegrationInfo<dim>& info)
+ * {
+ * Laplace::cell_matrix(dinfo.matrix(0,false).matrix, info.fe_values(0));
+ * Divergence::cell_matrix(dinfo.matrix(1,false).matrix, info.fe_values(0), info.fe_values(1));
+ * L2::cell_matrix(dinfo.matrix(2,false).matrix, info.fe_values(1));
+ * }
+ * @endcode
+ * See step-39 for a worked out example of this code.
+ *
+ * @ingroup Integrators
+ */
+namespace LocalIntegrators
+{
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/maxwell.h b/include/deal.II/integrators/maxwell.h
new file mode 100644
index 0000000..64432a9
--- /dev/null
+++ b/include/deal.II/integrators/maxwell.h
@@ -0,0 +1,462 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_maxwell_h
+#define dealii__integrators_maxwell_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/dof_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LocalIntegrators
+{
+  /**
+   * @brief Local integrators related to curl operators and their traces.
+   *
+   * We use the following conventions for curl operators. First, in three
+   * space dimensions
+   *
+   * @f[
+   * \nabla\times \mathbf u = \begin{pmatrix}
+   *   \partial_3 u_2 - \partial_2 u_3 \\
+   *   \partial_1 u_3 - \partial_3 u_1 \\
+   *   \partial_2 u_1 - \partial_1 u_2
+   * \end{pmatrix}.
+   * @f]
+   *
+   * In two space dimensions, the curl is obtained by extending a vector
+   * <b>u</b> to $(u_1, u_2, 0)^T$ and a scalar <i>p</i> to $(0,0,p)^T$.
+   * Computing the nonzero components, we obtain the scalar curl of a vector
+   * function and the vector curl of a scalar function. The current
+   * implementation exchanges the sign and we have:
+   * @f[
+   *  \nabla \times \mathbf u = \partial_1 u_2 - \partial 2 u_1,
+   *  \qquad
+   *  \nabla \times p = \begin{pmatrix}
+   *    \partial_2 p \\ -\partial_1 p
+   *  \end{pmatrix}
+   * @f]
+   *
+   * @ingroup Integrators
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  namespace Maxwell
+  {
+    /**
+     * Auxiliary function. Given the tensors of <tt>dim</tt> second
+     * derivatives, compute the curl of the curl of a vector function. The
+     * result in two and three dimensions is:
+     * @f[
+     * \nabla\times\nabla\times \mathbf u = \begin{pmatrix}
+     * \partial_1\partial_2 u_2 - \partial_2^2 u_1 \\
+     * \partial_1\partial_2 u_1 - \partial_1^2 u_2
+     * \end{pmatrix}
+     *
+     * \nabla\times\nabla\times \mathbf u = \begin{pmatrix}
+     * \partial_1\partial_2 u_2 + \partial_1\partial_3 u_3
+     * - (\partial_2^2+\partial_3^2) u_1 \\
+     * \partial_2\partial_3 u_3 + \partial_2\partial_1 u_1
+     * - (\partial_3^2+\partial_1^2) u_2 \\
+     * \partial_3\partial_1 u_1 + \partial_3\partial_2 u_2
+     * - (\partial_1^2+\partial_2^2) u_3
+     * \end{pmatrix}
+     * @f]
+     *
+     * @note The third tensor argument is not used in two dimensions and can
+     * for instance duplicate one of the previous.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    Tensor<1,dim>
+    curl_curl (
+      const Tensor<2,dim> &h0,
+      const Tensor<2,dim> &h1,
+      const Tensor<2,dim> &h2)
+    {
+      Tensor<1,dim> result;
+      switch (dim)
+        {
+        case 2:
+          result[0] = h1[0][1]-h0[1][1];
+          result[1] = h0[0][1]-h1[0][0];
+          break;
+        case 3:
+          result[0] = h1[0][1]+h2[0][2]-h0[1][1]-h0[2][2];
+          result[1] = h2[1][2]+h0[1][0]-h1[2][2]-h1[0][0];
+          result[2] = h0[2][0]+h1[2][1]-h2[0][0]-h2[1][1];
+          break;
+        default:
+          Assert(false, ExcNotImplemented());
+        }
+      return result;
+    }
+
+    /**
+     * Auxiliary function. Given <tt>dim</tt> tensors of first derivatives and
+     * a normal vector, compute the tangential curl
+     * @f[
+     * \mathbf n \times \nabla \times u.
+     * @f]
+     *
+     * @note The third tensor argument is not used in two dimensions and can
+     * for instance duplicate one of the previous.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    Tensor<1,dim>
+    tangential_curl (
+      const Tensor<1,dim> &g0,
+      const Tensor<1,dim> &g1,
+      const Tensor<1,dim> &g2,
+      const Tensor<1,dim> &normal)
+    {
+      Tensor<1,dim> result;
+
+      switch (dim)
+        {
+        case 2:
+          result[0] = normal[1] * (g1[0]-g0[1]);
+          result[1] =-normal[0] * (g1[0]-g0[1]);
+          break;
+        case 3:
+          result[0] = normal[2]*(g2[1]-g0[2])+normal[1]*(g1[0]-g0[1]);
+          result[1] = normal[0]*(g0[2]-g1[0])+normal[2]*(g2[1]-g1[2]);
+          result[2] = normal[1]*(g1[0]-g2[1])+normal[0]*(g0[2]-g2[0]);
+          break;
+        default:
+          Assert(false, ExcNotImplemented());
+        }
+      return result;
+    }
+
+    /**
+     * The curl-curl operator
+     * @f[
+     * \int_Z \nabla\!\times\! u \cdot
+     * \nabla\!\times\! v \,dx
+     * @f]
+     * in weak form.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    void curl_curl_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(fe.get_fe().n_components(), dim);
+      AssertDimension(M.m(), n_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      // Depending on the dimension,
+      // the cross product is either
+      // a scalar (2d) or a vector
+      // (3d). Accordingly, in the
+      // latter case we have to sum
+      // up three bilinear forms, but
+      // in 2d, we don't. Thus, we
+      // need to adapt the loop over
+      // all dimensions
+      const unsigned int d_max = (dim==2) ? 1 : dim;
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<d_max; ++d)
+                {
+                  const unsigned int d1 = (d+1)%dim;
+                  const unsigned int d2 = (d+2)%dim;
+
+                  const double cv = fe.shape_grad_component(i,k,d1)[d2] - fe.shape_grad_component(i,k,d2)[d1];
+                  const double cu = fe.shape_grad_component(j,k,d1)[d2] - fe.shape_grad_component(j,k,d2)[d1];
+
+                  M(i,j) += dx * cu * cv;
+                }
+        }
+    }
+
+    /**
+     * The matrix for the curl operator
+     * @f[
+     * \int_Z \nabla\!\times\! u \cdot v \,dx.
+     * @f]
+     *
+     * This is the standard curl operator in 3D and the scalar curl in 2D. The
+     * vector curl operator can be obtained by exchanging test and trial
+     * functions.
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    void curl_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      const FEValuesBase<dim> &fetest,
+      double factor = 1.)
+    {
+      unsigned int t_comp = (dim==3) ? dim : 1;
+      const unsigned int n_dofs = fe.dofs_per_cell;
+      const unsigned int t_dofs = fetest.dofs_per_cell;
+      AssertDimension(fe.get_fe().n_components(), dim);
+      AssertDimension(fetest.get_fe().n_components(), t_comp);
+      AssertDimension(M.m(), t_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      const unsigned int d_max = (dim==2) ? 1 : dim;
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = fe.JxW(k) * factor;
+          for (unsigned int i=0; i<t_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<d_max; ++d)
+                {
+                  const unsigned int d1 = (d+1)%dim;
+                  const unsigned int d2 = (d+2)%dim;
+
+                  const double vv = fetest.shape_value_component(i,k,d);
+                  const double cu = fe.shape_grad_component(j,k,d1)[d2] - fe.shape_grad_component(j,k,d2)[d1];
+                  M(i,j) += dx * cu * vv;
+                }
+        }
+    }
+
+    /**
+     * The matrix for weak boundary condition of Nitsche type for the
+     * tangential component in Maxwell systems.
+     *
+     * @f[
+     * \int_F \biggl( 2\gamma
+     * (u\times n) (v\times n) -
+     * (u\times n)(\nu \nabla\times
+     * v) - (v\times
+     * n)(\nu \nabla\times u)
+     * \biggr)
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    void nitsche_curl_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      double penalty,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(fe.get_fe().n_components(), dim);
+      AssertDimension(M.m(), n_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      // Depending on the
+      // dimension, the cross
+      // product is either a scalar
+      // (2d) or a vector
+      // (3d). Accordingly, in the
+      // latter case we have to sum
+      // up three bilinear forms,
+      // but in 2d, we don't. Thus,
+      // we need to adapt the loop
+      // over all dimensions
+      const unsigned int d_max = (dim==2) ? 1 : dim;
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<d_max; ++d)
+                {
+                  const unsigned int d1 = (d+1)%dim;
+                  const unsigned int d2 = (d+2)%dim;
+
+                  const double cv = fe.shape_grad_component(i,k,d1)[d2] - fe.shape_grad_component(i,k,d2)[d1];
+                  const double cu = fe.shape_grad_component(j,k,d1)[d2] - fe.shape_grad_component(j,k,d2)[d1];
+                  const double v= fe.shape_value_component(i,k,d1)*n(d2) - fe.shape_value_component(i,k,d2)*n(d1);
+                  const double u= fe.shape_value_component(j,k,d1)*n(d2) - fe.shape_value_component(j,k,d2)*n(d1);
+
+                  M(i,j) += dx*(2.*penalty*u*v - cv*u - cu*v);
+                }
+        }
+    }
+    /**
+     * The product of two tangential traces,
+     * @f[
+     * \int_F (u\times n)(v\times n)
+     * \, ds.
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    void tangential_trace_matrix (
+      FullMatrix<double> &M,
+      const FEValuesBase<dim> &fe,
+      double factor = 1.)
+    {
+      const unsigned int n_dofs = fe.dofs_per_cell;
+
+      AssertDimension(fe.get_fe().n_components(), dim);
+      AssertDimension(M.m(), n_dofs);
+      AssertDimension(M.n(), n_dofs);
+
+      // Depending on the
+      // dimension, the cross
+      // product is either a scalar
+      // (2d) or a vector
+      // (3d). Accordingly, in the
+      // latter case we have to sum
+      // up three bilinear forms,
+      // but in 2d, we don't. Thus,
+      // we need to adapt the loop
+      // over all dimensions
+      const unsigned int d_max = (dim==2) ? 1 : dim;
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          const double dx = factor * fe.JxW(k);
+          const Tensor<1,dim> n = fe.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<d_max; ++d)
+                {
+                  const unsigned int d1 = (d+1)%dim;
+                  const unsigned int d2 = (d+2)%dim;
+
+                  const double v= fe.shape_value_component(i,k,d1)*n(d2) - fe.shape_value_component(i,k,d2)*n(d1);
+                  const double u= fe.shape_value_component(j,k,d1)*n(d2) - fe.shape_value_component(j,k,d2)*n(d1);
+
+                  M(i,j) += dx*u*v;
+                }
+        }
+    }
+
+    /**
+     * The interior penalty fluxes for Maxwell systems.
+     *
+     * @f[
+     * \int_F \biggl( \gamma
+     * \{u\times n\}\{v\times n\} -
+     * \{u\times n\}\{\nu \nabla\times
+     * v\}- \{v\times
+     * n\}\{\nu \nabla\times u\}
+     * \biggr)\;dx
+     * @f]
+     *
+     * @author Guido Kanschat
+     * @date 2011
+     */
+    template <int dim>
+    inline void ip_curl_matrix (
+      FullMatrix<double> &M11,
+      FullMatrix<double> &M12,
+      FullMatrix<double> &M21,
+      FullMatrix<double> &M22,
+      const FEValuesBase<dim> &fe1,
+      const FEValuesBase<dim> &fe2,
+      const double pen,
+      const double factor1 = 1.,
+      const double factor2 = -1.)
+    {
+      const unsigned int n_dofs = fe1.dofs_per_cell;
+
+      AssertDimension(fe1.get_fe().n_components(), dim);
+      AssertDimension(fe2.get_fe().n_components(), dim);
+      AssertDimension(M11.m(), n_dofs);
+      AssertDimension(M11.n(), n_dofs);
+      AssertDimension(M12.m(), n_dofs);
+      AssertDimension(M12.n(), n_dofs);
+      AssertDimension(M21.m(), n_dofs);
+      AssertDimension(M21.n(), n_dofs);
+      AssertDimension(M22.m(), n_dofs);
+      AssertDimension(M22.n(), n_dofs);
+
+      const double nu1 = factor1;
+      const double nu2 = (factor2 < 0) ? factor1 : factor2;
+      const double penalty = .5 * pen * (nu1 + nu2);
+
+      // Depending on the
+      // dimension, the cross
+      // product is either a scalar
+      // (2d) or a vector
+      // (3d). Accordingly, in the
+      // latter case we have to sum
+      // up three bilinear forms,
+      // but in 2d, we don't. Thus,
+      // we need to adapt the loop
+      // over all dimensions
+      const unsigned int d_max = (dim==2) ? 1 : dim;
+
+      for (unsigned int k=0; k<fe1.n_quadrature_points; ++k)
+        {
+          const double dx = fe1.JxW(k);
+          const Tensor<1,dim> n = fe1.normal_vector(k);
+          for (unsigned int i=0; i<n_dofs; ++i)
+            for (unsigned int j=0; j<n_dofs; ++j)
+              for (unsigned int d=0; d<d_max; ++d)
+                {
+                  const unsigned int d1 = (d+1)%dim;
+                  const unsigned int d2 = (d+2)%dim;
+                  // curl u, curl v
+                  const double cv1 = nu1*fe1.shape_grad_component(i,k,d1)[d2] - fe1.shape_grad_component(i,k,d2)[d1];
+                  const double cv2 = nu2*fe2.shape_grad_component(i,k,d1)[d2] - fe2.shape_grad_component(i,k,d2)[d1];
+                  const double cu1 = nu1*fe1.shape_grad_component(j,k,d1)[d2] - fe1.shape_grad_component(j,k,d2)[d1];
+                  const double cu2 = nu2*fe2.shape_grad_component(j,k,d1)[d2] - fe2.shape_grad_component(j,k,d2)[d1];
+
+                  // u x n, v x n
+                  const double u1= fe1.shape_value_component(j,k,d1)*n(d2) - fe1.shape_value_component(j,k,d2)*n(d1);
+                  const double u2=-fe2.shape_value_component(j,k,d1)*n(d2) + fe2.shape_value_component(j,k,d2)*n(d1);
+                  const double v1= fe1.shape_value_component(i,k,d1)*n(d2) - fe1.shape_value_component(i,k,d2)*n(d1);
+                  const double v2=-fe2.shape_value_component(i,k,d1)*n(d2) + fe2.shape_value_component(i,k,d2)*n(d1);
+
+                  M11(i,j) += .5*dx*(2.*penalty*u1*v1 - cv1*u1 - cu1*v1);
+                  M12(i,j) += .5*dx*(2.*penalty*v1*u2 - cv1*u2 - cu2*v1);
+                  M21(i,j) += .5*dx*(2.*penalty*u1*v2 - cv2*u1 - cu1*v2);
+                  M22(i,j) += .5*dx*(2.*penalty*u2*v2 - cv2*u2 - cu2*v2);
+                }
+        }
+    }
+
+
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/integrators/patches.h b/include/deal.II/integrators/patches.h
new file mode 100644
index 0000000..b40c9b3
--- /dev/null
+++ b/include/deal.II/integrators/patches.h
@@ -0,0 +1,65 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__integrators_patches_h
+#define dealii__integrators_patches_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/dof_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LocalIntegrators
+{
+  /**
+   * @brief Integrators writing patches with values in quadrature points
+   *
+   * @author Guido Kanschat
+   * @date 2011
+   */
+  namespace Patches
+  {
+    template <int dim>
+    inline
+    void
+    points_and_values(
+      Table<2, double> &result,
+      const FEValuesBase<dim> &fe,
+      const VectorSlice<const std::vector<std::vector<double> > > &input)
+    {
+      const unsigned int n_comp = fe.get_fe().n_components();
+      AssertVectorVectorDimension(input, n_comp, fe.n_quadrature_points);
+      AssertDimension(result.n_rows(), fe.n_quadrature_points);
+      AssertDimension(result.n_cols(), n_comp+dim);
+
+      for (unsigned int k=0; k<fe.n_quadrature_points; ++k)
+        {
+          for (unsigned int d=0; d<dim; ++d)
+            result(k,d) = fe.quadrature_point(k)[d];
+          for (unsigned int i=0; i<n_comp; ++i)
+            result(k,dim+i) = input[i][k];
+        }
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/arpack_solver.h b/include/deal.II/lac/arpack_solver.h
new file mode 100644
index 0000000..63751a8
--- /dev/null
+++ b/include/deal.II/lac/arpack_solver.h
@@ -0,0 +1,553 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__arpack_solver_h
+#define dealii__arpack_solver_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/solver_control.h>
+
+#include <cstring>
+
+
+#ifdef DEAL_II_WITH_ARPACK
+
+DEAL_II_NAMESPACE_OPEN
+
+
+extern "C" void dnaupd_(int *ido, char *bmat, const unsigned int *n, char *which,
+                        const unsigned int *nev, const double *tol, double *resid, int *ncv,
+                        double *v, int *ldv, int *iparam, int *ipntr,
+                        double *workd, double *workl, int *lworkl,
+                        int *info);
+
+extern "C" void dneupd_(int *rvec, char *howmany, int *select, double *d,
+                        double *di, double *z, int *ldz, double *sigmar,
+                        double *sigmai, double *workev, char *bmat,const unsigned int *n, char *which,
+                        const unsigned int *nev, const double *tol, double *resid, int *ncv,
+                        double *v, int *ldv, int *iparam, int *ipntr,
+                        double *workd, double *workl, int *lworkl, int *info);
+
+/**
+ * Interface for using ARPACK. ARPACK is a collection of Fortran77 subroutines
+ * designed to solve large scale eigenvalue problems.  Here we interface to
+ * the routines <code>dneupd</code> and <code>dnaupd</code> of ARPACK.  The
+ * package is designed to compute a few eigenvalues and corresponding
+ * eigenvectors of a general n by n matrix A. It is most appropriate for large
+ * sparse matrices A.
+ *
+ * In this class we make use of the method applied to the generalized
+ * eigenspectrum problem $(A-\lambda B)x=0$, for $x\neq0$; where $A$ is a
+ * system matrix, $B$ is a mass matrix, and $\lambda, x$ are a set of
+ * eigenvalues and eigenvectors respectively.
+ *
+ * The ArpackSolver can be used in application codes with serial objects in
+ * the following way:
+ * @code
+ * SolverControl solver_control (1000, 1e-9);
+ * ArpackSolver (solver_control);
+ * system.solve (A, B, OP, lambda, x, size_of_spectrum);
+ * @endcode
+ * for the generalized eigenvalue problem $Ax=B\lambda x$, where the variable
+ * <code>size_of_spectrum</code> tells ARPACK the number of
+ * eigenvector/eigenvalue pairs to solve for. Here, <code>lambda</code> is a
+ * vector that will contain the eigenvalues computed, <code>x</code> a vector
+ * that will contain the eigenvectors computed, and <code>OP</code> is an
+ * inverse operation for the matrix <code>A</code>. Shift and invert
+ * transformation around zero is applied.
+ *
+ * Through the AdditionalData the user can specify some of the parameters to
+ * be set.
+ *
+ * For further information on how the ARPACK routines <code>dneupd</code> and
+ * <code>dnaupd</code> work and also how to set the parameters appropriately
+ * please take a look into the ARPACK manual.
+ *
+ * @note Whenever you eliminate degrees of freedom using ConstraintMatrix, you
+ * generate spurious eigenvalues and eigenvectors. If you make sure that the
+ * diagonals of eliminated matrix rows are all equal to one, you get a single
+ * additional eigenvalue. But beware that some functions in deal.II set these
+ * diagonals to rather arbitrary (from the point of view of eigenvalue
+ * problems) values. See also
+ * @ref step_36 "step-36"
+ * for an example.
+ *
+ * @author Baerbel Janssen, Agnieszka Miedlar, 2010, Guido Kanschat 2015
+ */
+class ArpackSolver : public Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+
+  /**
+   * An enum that lists the possible choices for which eigenvalues to compute
+   * in the solve() function.
+   */
+  enum WhichEigenvalues
+  {
+    algebraically_largest,
+    algebraically_smallest,
+    largest_magnitude,
+    smallest_magnitude,
+    largest_real_part,
+    smallest_real_part,
+    largest_imaginary_part,
+    smallest_imaginary_part,
+    both_ends
+  };
+
+  /**
+   * Standardized data struct to pipe additional data to the solver, should it
+   * be needed.
+   */
+  struct AdditionalData
+  {
+    const unsigned int number_of_arnoldi_vectors;
+    const WhichEigenvalues eigenvalue_of_interest;
+    AdditionalData(
+      const unsigned int number_of_arnoldi_vectors = 15,
+      const WhichEigenvalues eigenvalue_of_interest = largest_magnitude);
+  };
+
+  /**
+   * Access to the object that controls convergence.
+   */
+  SolverControl &control () const;
+
+  /**
+   * Constructor.
+   */
+  ArpackSolver(SolverControl &control,
+               const AdditionalData &data = AdditionalData());
+
+  /**
+   * Solve the generalized eigensprectrum problem $A x=\lambda B x$ by calling
+   * the <code>dneupd</code> and <code>dnaupd</code> functions of ARPACK.
+   *
+   * The function returns a vector of eigenvalues of length <i>n</i> and a
+   * vector of eigenvectors, where the latter should be twice the size of the
+   * eigenvalue vector. The first <i>n</i> vectors in
+   * <code>eigenvectors</code> will be the real parts of the eigenvectors, the
+   * second <i>n</i> the imaginary parts.
+   *
+   * @param A The operator for which we want to compute eigenvalues. Actually,
+   * this parameter is entirely unused.
+   *
+   * @param B The inner product of the underlying space, typically the mass
+   * matrix. For constrained problems, it can be a partial mass matrix, like
+   * for instance the velocity mass matrix of a Stokes problem. Only its
+   * function <code>vmult()</code> is used.
+   *
+   * @param inverse This is the possibly shifted inverse that is actually used
+   * instead of <code>A</code>. Only its function <code>vmult()</code> is
+   * used.
+   *
+   * @param eigenvalues is a vector of complex numbers in which the
+   * eigenvalues are returned.
+   *
+   * @param eigenvectors is a <b>real</b> vector of eigenvectors, containing
+   * alternatingly the real parts and the imaginary parts of the eigenvectors.
+   * Therefore, its length should be twice the number of eigenvalues. The
+   * vectors have to be initialized to match the matrices.
+   *
+   * @param n_eigenvalues The purpose of this parameter is not clear, but it
+   * is safe to set it to the size of <code>eigenvalues</code> or greater.
+   * Leave it at its default zero, which will be reset to the size of
+   * <code>eigenvalues</code> internally.
+   */
+  template <typename VectorType, typename MatrixType1,
+            typename MatrixType2, typename INVERSE>
+  void solve (const MatrixType1                  &A,
+              const MatrixType2                  &B,
+              const INVERSE                      &inverse,
+              std::vector<std::complex<double> > &eigenvalues,
+              std::vector<VectorType>            &eigenvectors,
+              const unsigned int                  n_eigenvalues = 0);
+
+protected:
+
+  /**
+   * Reference to the object that controls convergence of the iterative
+   * solver.
+   */
+  SolverControl &solver_control;
+
+  /**
+   * Store a copy of the flags for this particular solver.
+   */
+  const AdditionalData additional_data;
+
+private:
+
+  /**
+   * Exceptions.
+   */
+  DeclException2 (ExcInvalidNumberofEigenvalues, int, int,
+                  << "Number of wanted eigenvalues " << arg1
+                  << " is larger that the size of the matrix " << arg2);
+
+  DeclException2 (ExcInvalidNumberofArnoldiVectors, int, int,
+                  << "Number of Arnoldi vectors " << arg1
+                  << " is larger that the size of the matrix " << arg2);
+
+  DeclException2 (ExcSmallNumberofArnoldiVectors, int, int,
+                  << "Number of Arnoldi vectors " << arg1
+                  << " is too small to obtain " << arg2
+                  << " eigenvalues");
+
+  DeclException1 (ExcArpackIdo, int, << "This ido " << arg1
+                  << " is not supported. Check documentation of ARPACK");
+
+  DeclException1 (ExcArpackMode, int, << "This mode " << arg1
+                  << " is not supported. Check documentation of ARPACK");
+
+  DeclException1 (ExcArpackInfodsaupd, int,
+                  << "Error with dsaupd, info " << arg1
+                  << ". Check documentation of ARPACK");
+
+  DeclException1 (ExcArpackInfodneupd, int,
+                  << "Error with dneupd, info " << arg1
+                  << ". Check documentation of ARPACK");
+
+  DeclException1 (ExcArpackInfoMaxIt, int,
+                  << "Maximum number " << arg1
+                  << " of iterations reached.");
+
+  DeclExceptionMsg (ExcArpackNoShifts,
+                    "No shifts could be applied during implicit"
+                    " Arnoldi update, try increasing the number of"
+                    " Arnoldi vectors.");
+};
+
+
+inline
+ArpackSolver::AdditionalData::
+AdditionalData (const unsigned int number_of_arnoldi_vectors,
+                const WhichEigenvalues eigenvalue_of_interest)
+  :
+  number_of_arnoldi_vectors(number_of_arnoldi_vectors),
+  eigenvalue_of_interest(eigenvalue_of_interest)
+{}
+
+
+inline
+ArpackSolver::ArpackSolver (SolverControl &control,
+                            const AdditionalData &data)
+  :
+  solver_control (control),
+  additional_data (data)
+
+{}
+
+
+template <typename VectorType, typename MatrixType1,
+          typename MatrixType2, typename INVERSE>
+inline
+void ArpackSolver::solve (const MatrixType1                  &/*system_matrix*/,
+                          const MatrixType2                  &mass_matrix,
+                          const INVERSE                      &inverse,
+                          std::vector<std::complex<double> > &eigenvalues,
+                          std::vector<VectorType>            &eigenvectors,
+                          const unsigned int                  n_eigenvalues)
+{
+  //inside the routines of ARPACK the
+  //values change magically, so store
+  //them here
+
+  const unsigned int n = eigenvectors[0].size();
+  const unsigned int n_inside_arpack = eigenvectors[0].size();
+
+  // Number of eigenvalues for arpack
+  const unsigned int nev = (n_eigenvalues == 0) ? eigenvalues.size() : n_eigenvalues;
+  AssertIndexRange(eigenvalues.size()-1, nev);
+  /*
+  if(n < 0 || nev <0 || p < 0 || maxit < 0 )
+       std:cout << "All input parameters have to be positive.\n";
+       */
+  Assert (n_eigenvalues < n,
+          ExcInvalidNumberofEigenvalues(nev, n));
+
+  Assert (additional_data.number_of_arnoldi_vectors < n,
+          ExcInvalidNumberofArnoldiVectors(
+            additional_data.number_of_arnoldi_vectors, n));
+
+  Assert (additional_data.number_of_arnoldi_vectors > 2*nev+1,
+          ExcSmallNumberofArnoldiVectors(
+            additional_data.number_of_arnoldi_vectors, nev));
+  // ARPACK mode for dnaupd, here only mode 3
+  int mode = 3;
+
+  // reverse communication parameter
+  int ido = 0;
+
+  /**
+   * 'G' generalized eigenvalue problem 'I' standard eigenvalue problem
+   */
+  char bmat[2] = "G";
+
+  /**
+   * Specify the eigenvalues of interest, possible parameters "LA"
+   * algebraically largest "SA" algebraically smallest "LM" largest magnitude
+   * "SM" smallest magnitude "LR" largest real part "SR" smallest real part
+   * "LI" largest imaginary part "SI" smallest imaginary part "BE" both ends
+   * of spectrum simultaneous
+   */
+  char which[3];
+  switch (additional_data.eigenvalue_of_interest)
+    {
+    case algebraically_largest:
+      std::strcpy (which, "LA");
+      break;
+    case algebraically_smallest:
+      std::strcpy (which, "SA");
+      break;
+    case largest_magnitude:
+      std::strcpy (which, "LM");
+      break;
+    case smallest_magnitude:
+      std::strcpy (which, "SM");
+      break;
+    case largest_real_part:
+      std::strcpy (which, "LR");
+      break;
+    case smallest_real_part:
+      std::strcpy (which, "SR");
+      break;
+    case largest_imaginary_part:
+      std::strcpy (which, "LI");
+      break;
+    case smallest_imaginary_part:
+      std::strcpy (which, "SI");
+      break;
+    case both_ends:
+      std::strcpy (which, "BE");
+      break;
+    }
+
+  // tolerance for ARPACK
+  const double tol = control().tolerance();
+
+  // if the starting vector is used it has to be in resid
+  std::vector<double> resid(n, 1.);
+
+  // number of Arnoldi basis vectors specified
+  // in additional_data
+  int ncv = additional_data.number_of_arnoldi_vectors;
+
+  int ldv = n;
+  std::vector<double> v (ldv*ncv, 0.0);
+
+  //information to the routines
+  std::vector<int> iparam (11, 0);
+
+  iparam[0] = 1;        // shift strategy
+
+  // maximum number of iterations
+  iparam[2] = control().max_steps();
+
+  /**
+   * Sets the mode of dsaupd. 1 is exact shifting, 2 is user-supplied shifts,
+   * 3 is shift-invert mode, 4 is buckling mode, 5 is Cayley mode.
+   */
+
+  iparam[6] = mode;
+  std::vector<int> ipntr (14, 0);
+
+  // work arrays for ARPACK
+  double *workd;
+  workd = new double[3*n];
+
+  for (unsigned int i=0; i<3*n; ++i)
+    workd[i] = 0.0;
+
+  int lworkl = 3*ncv*(ncv + 6);
+  std::vector<double> workl (lworkl, 0.);
+  //information out of the iteration
+  int info = 1;
+
+  while (ido != 99)
+    {
+      // call of ARPACK dnaupd routine
+      dnaupd_(&ido, bmat, &n_inside_arpack, which, &nev, &tol,
+              &resid[0], &ncv, &v[0], &ldv, &iparam[0], &ipntr[0],
+              workd, &workl[0], &lworkl, &info);
+
+      if (ido == 99)
+        break;
+
+      switch (mode)
+        {
+        case 3:
+        {
+          switch (ido)
+            {
+            case -1:
+            {
+
+              VectorType src,dst,tmp;
+              src.reinit(eigenvectors[0]);
+              dst.reinit(src);
+              tmp.reinit(src);
+
+
+              for (size_type i=0; i<src.size(); ++i)
+                src(i) = *(workd+ipntr[0]-1+i);
+
+              // multiplication with mass matrix M
+              mass_matrix.vmult(tmp, src);
+              // solving linear system
+              inverse.vmult(dst,tmp);
+
+              for (size_type i=0; i<dst.size(); ++i)
+                *(workd+ipntr[1]-1+i) = dst(i);
+            }
+            break;
+
+            case  1:
+            {
+
+              VectorType src,dst,tmp, tmp2;
+              src.reinit(eigenvectors[0]);
+              dst.reinit(src);
+              tmp.reinit(src);
+              tmp2.reinit(src);
+
+              for (size_type i=0; i<src.size(); ++i)
+                {
+                  src(i) = *(workd+ipntr[2]-1+i);
+                  tmp(i) = *(workd+ipntr[0]-1+i);
+                }
+              // solving linear system
+              inverse.vmult(dst,src);
+
+              for (size_type i=0; i<dst.size(); ++i)
+                *(workd+ipntr[1]-1+i) = dst(i);
+            }
+            break;
+
+            case  2:
+            {
+
+              VectorType src,dst;
+              src.reinit(eigenvectors[0]);
+              dst.reinit(src);
+
+              for (size_type i=0; i<src.size(); ++i)
+                src(i) = *(workd+ipntr[0]-1+i);
+
+              // Multiplication with mass matrix M
+              mass_matrix.vmult(dst, src);
+
+              for (size_type i=0; i<dst.size(); ++i)
+                *(workd+ipntr[1]-1+i) = dst(i);
+
+            }
+            break;
+
+            default:
+              Assert (false, ExcArpackIdo(ido));
+              break;
+            }
+        }
+        break;
+        default:
+          Assert (false, ExcArpackMode(mode));
+          break;
+        }
+    }
+
+  if (info<0)
+    {
+      Assert (false, ExcArpackInfodsaupd(info));
+    }
+  else
+    {
+      /**
+       * 1 - compute eigenvectors, 0 - only eigenvalues
+       */
+      int rvec = 1;
+
+      // which eigenvectors
+      char howmany = 'A';
+
+      std::vector<int> select (ncv, 1);
+
+      int ldz = n;
+
+      std::vector<double> z (ldz*ncv, 0.);
+
+      double sigmar = 0.0; // real part of the shift
+      double sigmai = 0.0; // imaginary part of the shift
+
+      int lworkev = 3*ncv;
+      std::vector<double> workev (lworkev, 0.);
+
+      std::vector<double> eigenvalues_real (nev, 0.);
+      std::vector<double> eigenvalues_im (nev, 0.);
+
+      // call of ARPACK dneupd routine
+      dneupd_(&rvec, &howmany, &select[0], &eigenvalues_real[0],
+              &eigenvalues_im[0], &z[0], &ldz, &sigmar, &sigmai,
+              &workev[0], bmat, &n_inside_arpack, which, &nev, &tol,
+              &resid[0], &ncv, &v[0], &ldv,
+              &iparam[0], &ipntr[0], workd, &workl[0], &lworkl, &info);
+
+      if (info == 1)
+        {
+          Assert (false, ExcArpackInfoMaxIt(control().max_steps()));
+        }
+      else if (info == 3)
+        {
+          Assert (false, ExcArpackNoShifts());
+        }
+      else if (info!=0)
+        {
+          Assert (false, ExcArpackInfodneupd(info));
+        }
+
+
+      const unsigned int n_eigenvecs = eigenvectors.size();
+      for (size_type i=0; i<n_eigenvecs; ++i)
+        for (unsigned int j=0; j<n; ++j)
+          eigenvectors[i](j) = v[i*n+j];
+
+      delete[] workd;
+
+      AssertDimension (eigenvalues.size(), eigenvalues_real.size());
+      AssertDimension (eigenvalues.size(), eigenvalues_im.size());
+
+      for (size_type i=0; i<eigenvalues.size(); ++i)
+        eigenvalues[i] = std::complex<double> (eigenvalues_real[i],
+                                               eigenvalues_im[i]);
+    }
+}
+
+
+inline
+SolverControl &ArpackSolver::control () const
+{
+  return solver_control;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif
+#endif
diff --git a/include/deal.II/lac/block_indices.h b/include/deal.II/lac/block_indices.h
new file mode 100644
index 0000000..c2e3833
--- /dev/null
+++ b/include/deal.II/lac/block_indices.h
@@ -0,0 +1,449 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_indices_h
+#define dealii__block_indices_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+#include <cstddef>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * @brief Auxiliary class aiding in the handling of block structures like in
+ * BlockVector or FESystem.
+ *
+ * The information obtained from this class falls into two groups. First, it
+ * is possible to obtain the number of blocks, namely size(), the block_size()
+ * for each block and the total_size() of the object described by the block
+ * indices, namely the length of the whole index set. These functions do not
+ * make any assumption on the ordering of the index set.
+ *
+ * If on the other hand the index set is ordered "by blocks", such that each
+ * block forms a consecutive set of indices, this class that manages the
+ * conversion of global indices into a block vector or matrix to the local
+ * indices within this block. This is required, for example, when you address
+ * a global element in a block vector and want to know which element within
+ * which block this is. It is also useful if a matrix is composed of several
+ * blocks, where you have to translate global row and column indices to local
+ * ones.
+ *
+ * @ingroup data @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, Guido Kanschat, 2000, 2007, 2011
+ */
+class BlockIndices : public Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Default constructor. Initialize for zero blocks.
+   */
+  BlockIndices ();
+
+  /**
+   * Constructor. Initialize the number of entries in each block @p i as
+   * <tt>block_sizes[i]</tt>. The number of blocks will be the size of @p
+   * block_sizes.
+   */
+  BlockIndices (const std::vector<size_type> &block_sizes);
+
+  /**
+   * Specialized constructor for a structure with blocks of equal size.
+   */
+  explicit BlockIndices(const unsigned int n_blocks,
+                        const size_type block_size = 0);
+
+  /**
+   * Reinitialize the number of blocks and assign each block the same number
+   * of elements.
+   */
+  void reinit (const unsigned int n_blocks,
+               const size_type n_elements_per_block);
+
+  /**
+   * Reinitialize the number of indices within each block from the given
+   * argument. The number of blocks will be adjusted to the size of
+   * <tt>block_sizes</tt> and the size of block @p i is set to
+   * <tt>block_sizes[i]</tt>.
+   */
+  void reinit (const std::vector<size_type> &block_sizes);
+
+  /**
+   * Add another block of given size to the end of the block structure.
+   */
+  void push_back(const size_type size);
+
+  /**
+   * @name Size information
+   */
+  //@{
+
+  /**
+   * Number of blocks in index field.
+   */
+  unsigned int size () const;
+
+  /**
+   * Return the total number of indices accumulated over all blocks, that is,
+   * the dimension of the vector space of the block vector.
+   */
+  size_type total_size () const;
+
+  /**
+   * The size of the @p ith block.
+   */
+  size_type block_size (const unsigned int i) const;
+
+  /**
+   * String representation of the block sizes. The output is of the form
+   * `[nb->b1,b2,b3|s]`, where `nb` is n_blocks(), `s` is total_size() and
+   * `b1` etc. are the values of block_size().
+   */
+  std::string to_string () const;
+
+  //@}
+
+  /**
+   * @name Index conversion
+   *
+   * Functions in this group assume an object, which was created after sorting
+   * by block, such that each block forms a set of consecutive indices in the
+   * object. If applied to other objects, the numbers obtained from these
+   * functions are meaningless.
+   */
+  //@{
+
+  /**
+   * Return the block and the index within that block for the global index @p
+   * i. The first element of the pair is the block, the second the index
+   * within it.
+   */
+  std::pair<unsigned int,size_type>
+  global_to_local (const size_type i) const;
+
+  /**
+   * Return the global index of @p index in block @p block.
+   */
+  size_type local_to_global (const unsigned int block,
+                             const size_type index) const;
+
+  /**
+   * The start index of the ith block.
+   */
+  size_type block_start (const unsigned int i) const;
+  //@}
+
+  /**
+   * Copy operator.
+   */
+  BlockIndices &operator = (const BlockIndices &b);
+
+  /**
+   * Compare whether two objects are the same, i.e. whether the number of
+   * blocks and the sizes of all blocks are equal.
+   */
+  bool operator == (const BlockIndices &b) const;
+
+  /**
+   * Swap the contents of these two objects.
+   */
+  void swap (BlockIndices &b);
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Number of blocks. While this value could be obtained through
+   * <tt>start_indices.size()-1</tt>, we cache this value for faster access.
+   */
+  unsigned int n_blocks;
+
+  /**
+   * Global starting index of each vector. The last and redundant value is the
+   * total number of entries.
+   */
+  std::vector<size_type> start_indices;
+};
+
+
+/**
+ * Operator for logging BlockIndices. Writes the number of blocks, the size of
+ * each block and the total size of the index field.
+ *
+ * @ref BlockIndices
+ * @author Guido Kanschat
+ * @date 2011
+ */
+inline
+LogStream &
+operator << (LogStream &s, const BlockIndices &bi)
+{
+  const unsigned int n = bi.size();
+  s << n << ":[";
+  // Write first size without leading space
+  if (n>0)
+    s << bi.block_size(0);
+  // Write all other sizes
+  for (unsigned int i=1; i<n; ++i)
+    s << ' ' << bi.block_size(i);
+  s << "]->" << bi.total_size();
+  return s;
+}
+
+
+
+/* ---------------------- template and inline functions ------------------- */
+
+inline
+void
+BlockIndices::reinit (const unsigned int nb,
+                      const size_type block_size)
+{
+  n_blocks = nb;
+  start_indices.resize(n_blocks+1);
+  for (size_type i=0; i<=n_blocks; ++i)
+    start_indices[i] = i * block_size;
+}
+
+
+
+inline
+void
+BlockIndices::reinit (const std::vector<size_type> &block_sizes)
+{
+  if (start_indices.size() != block_sizes.size()+1)
+    {
+      n_blocks = static_cast<unsigned int>(block_sizes.size());
+      start_indices.resize(n_blocks+1);
+    }
+  start_indices[0] = 0;
+  for (size_type i=1; i<=n_blocks; ++i)
+    start_indices[i] = start_indices[i-1] + block_sizes[i-1];
+}
+
+
+inline
+BlockIndices::BlockIndices ()
+  :
+  n_blocks(0),
+  start_indices(1, 0)
+{}
+
+
+
+inline
+BlockIndices::BlockIndices (const unsigned int n_blocks,
+                            const size_type block_size)
+  :
+  n_blocks(n_blocks),
+  start_indices(n_blocks+1)
+{
+  for (size_type i=0; i<=n_blocks; ++i)
+    start_indices[i] = i * block_size;
+}
+
+
+
+inline
+BlockIndices::BlockIndices (const std::vector<size_type> &block_sizes)
+  :
+  n_blocks(static_cast<unsigned int>(block_sizes.size())),
+  start_indices(block_sizes.size()+1)
+{
+  reinit (block_sizes);
+}
+
+
+inline
+void
+BlockIndices::push_back(const size_type sz)
+{
+  start_indices.push_back(start_indices[n_blocks]+sz);
+  ++n_blocks;
+  AssertDimension(start_indices.size(), n_blocks+1);
+}
+
+
+inline
+std::pair<unsigned int,BlockIndices::size_type>
+BlockIndices::global_to_local (const size_type i) const
+{
+  Assert (i<total_size(), ExcIndexRangeType<size_type>(i, 0, total_size()));
+  Assert (n_blocks > 0, ExcLowerRangeType<size_type>(i, size_type(1)));
+
+  unsigned int block = n_blocks-1;
+  while (i < start_indices[block])
+    --block;
+
+  return std::pair<unsigned int,size_type>(block,
+                                           i-start_indices[block]);
+}
+
+
+inline
+BlockIndices::size_type
+BlockIndices::local_to_global (const unsigned int block,
+                               const size_type index) const
+{
+  Assert (block < n_blocks, ExcIndexRange(block, 0, n_blocks));
+  Assert (index < start_indices[block+1]-start_indices[block],
+          ExcIndexRangeType<size_type> (index, 0, start_indices[block+1]-start_indices[block]));
+
+  return start_indices[block]+index;
+}
+
+
+inline
+unsigned int
+BlockIndices::size () const
+{
+  return n_blocks;
+}
+
+
+
+inline
+BlockIndices::size_type
+BlockIndices::total_size () const
+{
+  if (n_blocks == 0) return 0;
+  return start_indices[n_blocks];
+}
+
+
+
+inline
+BlockIndices::size_type
+BlockIndices::block_size (const unsigned int block) const
+{
+  Assert (block < n_blocks, ExcIndexRange(block, 0, n_blocks));
+  return start_indices[block+1]-start_indices[block];
+}
+
+
+
+inline
+std::string
+BlockIndices::to_string () const
+{
+  std::string result = "[" + Utilities::int_to_string(n_blocks) + "->";
+  for (unsigned int i=0; i<n_blocks; ++i)
+    {
+      if (i>0)
+        result += ',';
+      result += Utilities::to_string(block_size(i));
+    }
+  result += "|" + Utilities::to_string(total_size()) + ']';
+  return result;
+}
+
+
+
+inline
+BlockIndices::size_type
+BlockIndices::block_start (const unsigned int block) const
+{
+  Assert (block < n_blocks, ExcIndexRange(block, 0, n_blocks));
+  return start_indices[block];
+}
+
+
+
+inline
+BlockIndices &
+BlockIndices::operator = (const BlockIndices &b)
+{
+  start_indices = b.start_indices;
+  n_blocks = b.n_blocks;
+  return *this;
+}
+
+
+
+inline
+bool
+BlockIndices::operator == (const BlockIndices &b) const
+{
+  if (n_blocks != b.n_blocks)
+    return false;
+
+  for (size_type i=0; i<=n_blocks; ++i)
+    if (start_indices[i] != b.start_indices[i])
+      return false;
+
+  return true;
+}
+
+
+
+inline
+void
+BlockIndices::swap (BlockIndices &b)
+{
+  std::swap(n_blocks, b.n_blocks);
+  std::swap(start_indices, b.start_indices);
+}
+
+
+
+inline
+std::size_t
+BlockIndices::memory_consumption () const
+{
+  return (sizeof(*this) +
+          start_indices.size() * sizeof(start_indices[0]));
+}
+
+
+
+/* ----------------- global functions ---------------------------- */
+
+
+/**
+ * Global function @p swap which overloads the default implementation of the
+ * C++ standard library which uses a temporary object. The function simply
+ * exchanges the data of the two objects.
+ *
+ * @relates BlockIndices
+ * @author Wolfgang Bangerth, 2000
+ */
+inline
+void swap (BlockIndices &u, BlockIndices &v)
+{
+  u.swap (v);
+}
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/block_linear_operator.h b/include/deal.II/lac/block_linear_operator.h
new file mode 100644
index 0000000..db8f6dc
--- /dev/null
+++ b/include/deal.II/lac/block_linear_operator.h
@@ -0,0 +1,850 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_linear_operator_h
+#define dealii__block_linear_operator_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+#include <deal.II/lac/linear_operator.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+DEAL_II_NAMESPACE_OPEN
+
+// Forward declarations:
+
+template <typename Number> class BlockVector;
+
+template <typename Range = BlockVector<double>,
+          typename Domain = Range>
+class BlockLinearOperator;
+
+template <typename Range = BlockVector<double>,
+          typename Domain = Range,
+          typename BlockMatrixType>
+BlockLinearOperator<Range, Domain>
+block_operator(const BlockMatrixType &matrix);
+
+template <size_t m, size_t n,
+          typename Range = BlockVector<double>,
+          typename Domain = Range>
+BlockLinearOperator<Range, Domain>
+block_operator(const std::array<std::array<LinearOperator<typename Range::BlockType, typename Domain::BlockType>, n>, m> &);
+
+template <size_t m,
+          typename Range = BlockVector<double>,
+          typename Domain = Range>
+BlockLinearOperator<Range, Domain>
+block_diagonal_operator(const std::array<LinearOperator<typename Range::BlockType, typename Domain::BlockType>, m> &);
+
+template <size_t m,
+          typename Range = BlockVector<double>,
+          typename Domain = Range>
+BlockLinearOperator<Range, Domain>
+block_diagonal_operator(const LinearOperator<typename Range::BlockType, typename Domain::BlockType> &op);
+
+// This is a workaround for a bug in <=gcc-4.7 that does not like partial
+// template default values in combination with local lambda expressions [1]
+//
+// [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53624
+//
+// Forward declare functions with partial template defaults:
+
+template <typename Range = BlockVector<double>,
+          typename Domain = Range,
+          typename BlockMatrixType>
+BlockLinearOperator<Range, Domain>
+block_diagonal_operator(const BlockMatrixType &block_matrix);
+
+template <typename Range = BlockVector<double>,
+          typename Domain = Range>
+LinearOperator<Domain, Range>
+block_forward_substitution(const BlockLinearOperator<Range, Domain> &,
+                           const BlockLinearOperator<Domain, Range> &);
+
+template <typename Range = BlockVector<double>,
+          typename Domain = Range>
+LinearOperator<Domain, Range>
+block_back_substitution(const BlockLinearOperator<Range, Domain> &,
+                        const BlockLinearOperator<Domain, Range> &);
+
+// end of workaround
+
+
+
+/**
+ * A class to store the concept of a block linear operator.
+ *
+ * This class increases the interface of LinearOperator (which encapsulates
+ * the  @p Matrix interface) by three additional functions:
+ * @code
+ *   std::function<unsigned int()> n_block_rows;
+ *   std::function<unsigned int()> n_block_cols;
+ *   std::function<BlockType(unsigned int, unsigned int)> block;
+ * @endcode
+ * that describe the underlying block structure (of an otherwise opaque)
+ * linear operator.
+ *
+ * Objects of type BlockLinearOperator can be created similarly to
+ * LinearOperator with a wrapper function:
+ * @code
+ * dealii::BlockSparseMatrix<double> A;
+ * const auto block_op_a = block_operator(A);
+ * @endcode
+ *
+ * A BlockLinearOperator can be sliced to a LinearOperator at any time. This
+ * removes all information about the underlying block structure (beacuse above
+ * <code>std::function</code> objects are no longer available) - the linear
+ * operator interface, however, remains intact.
+ *
+ * @note This class makes heavy use of <code>std::function</code> objects and
+ * lambda functions. This flexibiliy comes with a run-time penalty. Only use
+ * this object to encapsulate object with medium to large individual block
+ * sizes, and small block structure (as a rule of thumb, matrix blocks greater
+ * than $1000\times1000$).
+ *
+ * @note This class is only available if deal.II was configured with C++11
+ * support, i.e., if <code>DEAL_II_WITH_CXX11</code> is enabled during cmake
+ * configure.
+ *
+ * @author Matthias Maier, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+class BlockLinearOperator : public LinearOperator<Range, Domain>
+{
+public:
+
+  typedef LinearOperator<typename Range::BlockType, typename Domain::BlockType> BlockType;
+
+  /**
+   * Create an empty BlockLinearOperator object.
+   *
+   * All<code>std::function</code> member objects of this class and its base
+   * class LinearOperator are initialized with default variants that throw an
+   * exception upon invocation.
+   */
+  BlockLinearOperator()
+    : LinearOperator<Range, Domain>()
+  {
+
+    n_block_rows = []() -> unsigned int
+    {
+      Assert(false, ExcMessage("Uninitialized BlockLinearOperator<Range, Domain>::n_block_rows called"));
+      return 0;
+    };
+
+    n_block_cols = []() -> unsigned int
+    {
+      Assert(false, ExcMessage("Uninitialized BlockLinearOperator<Range, Domain>::n_block_cols called"));
+      return 0;
+    };
+
+    block = [](unsigned int, unsigned int) -> BlockType
+    {
+      Assert(false, ExcMessage("Uninitialized BlockLinearOperator<Range, Domain>::block called"));
+      return BlockType();
+    };
+  }
+
+  /**
+   * Default copy constructor.
+   */
+  BlockLinearOperator(const BlockLinearOperator<Range, Domain> &) =
+    default;
+
+  /**
+   * Templated copy constructor that creates a BlockLinearOperator object from
+   * an object @p op for which the conversion function
+   * <code>block_operator</code> is defined.
+   */
+  template<typename Op>
+  BlockLinearOperator(const Op &op)
+  {
+    *this = block_operator<Range, Domain, Op>(op);
+  }
+
+  /**
+   * Create a BlockLinearOperator from a two-dimensional array @p ops of
+   * LinearOperator. This constructor calls the corresponding block_operator()
+   * specialization.
+   */
+  template<size_t m, size_t n>
+  BlockLinearOperator(const std::array<std::array<BlockType, n>, m> &ops)
+  {
+    *this = block_operator<m, n, Range, Domain>(ops);
+  }
+
+  /**
+   * Create a block-diagonal BlockLinearOperator from a one-dimensional array
+   * @p ops of LinearOperator. This constructor calls the corresponding
+   * block_operator() specialization.
+   */
+  template<size_t m>
+  BlockLinearOperator(const std::array<BlockType, m> &ops)
+  {
+    *this = block_diagonal_operator<m, Range, Domain>(ops);
+  }
+
+  /**
+   * Default copy assignment operator.
+   */
+  BlockLinearOperator<Range, Domain> &
+  operator=(const BlockLinearOperator<Range, Domain> &) = default;
+
+  /**
+   * Templated copy assignment operator for an object @p op for which the
+   * conversion function <code>block_operator</code> is defined.
+   */
+  template <typename Op>
+  BlockLinearOperator<Range, Domain> &operator=(const Op &op)
+  {
+    *this = block_operator<Range, Domain, Op>(op);
+    return *this;
+  }
+
+  /**
+   * Copy assignment from a two-dimensional array @p ops of LinearOperator.
+   * This assignment operator calls the corresponding block_operator()
+   * specialization.
+   */
+  template <size_t m, size_t n>
+  BlockLinearOperator<Range, Domain> &
+  operator=(const std::array<std::array<BlockType, n>, m> &ops)
+  {
+    *this = block_operator<m, n, Range, Domain>(ops);
+    return *this;
+  }
+
+  /**
+   * Copy assignment from a one-dimensional array @p ops of LinearOperator
+   * that creates a block-diagonal BlockLinearOperator. This assignment
+   * operator calls the corresponding block_operator() specialization.
+   */
+  template <size_t m>
+  BlockLinearOperator<Range, Domain> &
+  operator=(const std::array<BlockType, m> &ops)
+  {
+    *this = block_diagonal_operator<m, Range, Domain>(ops);
+    return *this;
+  }
+
+  /**
+   * Return the number of blocks in a column (i.e, the number of "block rows",
+   * or the number $m$, if interpreted as a $m\times n$ block system).
+   */
+  std::function<unsigned int()> n_block_rows;
+
+  /**
+   * Return the number of blocks in a row (i.e, the number of "block columns",
+   * or the number $n$, if interpreted as a $m\times n$ block system).
+   */
+  std::function<unsigned int()> n_block_cols;
+
+  /**
+   * Access the block with the given coordinates. This
+   * <code>std::function</code> object returns a LinearOperator representing
+   * the $(i,j)$-th block of the BlockLinearOperator.
+   */
+  std::function<BlockType(unsigned int, unsigned int)> block;
+};
+
+
+
+namespace internal
+{
+  namespace BlockLinearOperator
+  {
+    // Populate the LinearOperator interfaces with the help of the
+    // BlockLinearOperator functions
+    template <typename Range, typename Domain>
+    inline void
+    populate_linear_operator_functions(
+      dealii::BlockLinearOperator<Range, Domain> &op)
+    {
+      op.reinit_range_vector = [=](Range &v, bool omit_zeroing_entries)
+      {
+        const unsigned int m = op.n_block_rows();
+
+        // Reinitialize the block vector to m blocks:
+        v.reinit(m);
+
+        // And reinitialize every individual block with reinit_range_vectors:
+        for (unsigned int i = 0; i < m; ++i)
+          op.block(i, 0).reinit_range_vector(v.block(i), omit_zeroing_entries);
+
+        v.collect_sizes();
+      };
+
+      op.reinit_domain_vector = [=](Domain &v, bool omit_zeroing_entries)
+      {
+        const unsigned int n = op.n_block_cols();
+
+        // Reinitialize the block vector to n blocks:
+        v.reinit(n);
+
+        // And reinitialize every individual block with reinit_domain_vectors:
+        for (unsigned int i = 0; i < n; ++i)
+          op.block(0, i).reinit_domain_vector(v.block(i), omit_zeroing_entries);
+
+        v.collect_sizes();
+      };
+
+      op.vmult = [=](Range &v, const Domain &u)
+      {
+        const unsigned int m = op.n_block_rows();
+        const unsigned int n = op.n_block_cols();
+        Assert(v.n_blocks() == m, ExcDimensionMismatch(v.n_blocks(), m));
+        Assert(u.n_blocks() == n, ExcDimensionMismatch(u.n_blocks(), n));
+
+        for (unsigned int i = 0; i < m; ++i)
+          {
+            op.block(i, 0).vmult(v.block(i), u.block(0));
+            for (unsigned int j = 1; j < n; ++j)
+              op.block(i, j).vmult_add(v.block(i), u.block(j));
+          }
+      };
+
+      op.vmult_add = [=](Range &v, const Domain &u)
+      {
+        const unsigned int m = op.n_block_rows();
+        const unsigned int n = op.n_block_cols();
+        Assert(v.n_blocks() == m, ExcDimensionMismatch(v.n_blocks(), m));
+        Assert(u.n_blocks() == n, ExcDimensionMismatch(u.n_blocks(), n));
+
+        for (unsigned int i = 0; i < m; ++i)
+          for (unsigned int j = 0; j < n; ++j)
+            op.block(i, j).vmult_add(v.block(i), u.block(j));
+      };
+
+      op.Tvmult = [=](Domain &v, const Range &u)
+      {
+        const unsigned int n = op.n_block_cols();
+        const unsigned int m = op.n_block_rows();
+        Assert(v.n_blocks() == n, ExcDimensionMismatch(v.n_blocks(), n));
+        Assert(u.n_blocks() == m, ExcDimensionMismatch(u.n_blocks(), m));
+
+        for (unsigned int i = 0; i < n; ++i)
+          {
+            op.block(0, i).Tvmult(v.block(i), u.block(0));
+            for (unsigned int j = 1; j < m; ++j)
+              op.block(j, i).Tvmult_add(v.block(i), u.block(j));
+          }
+      };
+
+      op.Tvmult_add = [=](Domain &v, const Range &u)
+      {
+        const unsigned int n = op.n_block_cols();
+        const unsigned int m = op.n_block_rows();
+        Assert(v.n_blocks() == n, ExcDimensionMismatch(v.n_blocks(), n));
+        Assert(u.n_blocks() == m, ExcDimensionMismatch(u.n_blocks(), m));
+
+        for (unsigned int i = 0; i < n; ++i)
+          for (unsigned int j = 0; j < m; ++j)
+            op.block(j, i).Tvmult_add(v.block(i), u.block(j));
+      };
+    }
+  } /*namespace BlockLinearOperator*/
+} /*namespace internal*/
+
+
+
+/**
+ * @name Creation of a BlockLinearOperator
+ */
+//@{
+
+/**
+ * @relates BlockLinearOperator
+ *
+ * A function that encapsulates a @p block_matrix into a BlockLinearOperator.
+ *
+ * All changes made on the block structure and individual blocks of @p
+ * block_matrix after the creation of the BlockLinearOperator object are
+ * reflected by the operator object.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range,
+          typename Domain,
+          typename BlockMatrixType>
+BlockLinearOperator<Range, Domain>
+block_operator(const BlockMatrixType &block_matrix)
+{
+  typedef typename BlockLinearOperator<Range, Domain>::BlockType BlockType;
+
+  BlockLinearOperator<Range, Domain> return_op;
+
+  return_op.n_block_rows = [&block_matrix]() -> unsigned int
+  {
+    return block_matrix.n_block_rows();
+  };
+
+  return_op.n_block_cols = [&block_matrix]() -> unsigned int
+  {
+    return block_matrix.n_block_cols();
+  };
+
+  return_op.block = [&block_matrix](unsigned int i, unsigned int j) -> BlockType
+  {
+#ifdef DEBUG
+    const unsigned int m = block_matrix.n_block_rows();
+    const unsigned int n = block_matrix.n_block_cols();
+    Assert(i < m, ExcIndexRange (i, 0, m));
+    Assert(j < n, ExcIndexRange (j, 0, n));
+#endif
+
+    return BlockType(block_matrix.block(i, j));
+  };
+
+  internal::BlockLinearOperator::populate_linear_operator_functions(return_op);
+  return return_op;
+}
+
+
+
+/**
+ * @relates BlockLinearOperator
+ *
+ * A variant of above function that encapsulates a given collection @p ops of
+ * LinearOperators into a block structure. Here, it is assumed that Range and
+ * Domain are blockvectors, i.e., derived from
+ * @ref BlockVectorBase.
+ * The individual linear operators in @p ops must act on the underlying vector
+ * type of the block vectors, i.e., on Domain::BlockType yielding a result in
+ * Range::BlockType.
+ *
+ * The list @p ops is best passed as an initializer list. Consider for example
+ * a linear operator block (acting on Vector<double>)
+ * @code
+ *  op_a00 | op_a01
+ *         |
+ *  ---------------
+ *         |
+ *  op_a10 | op_a11
+ * @endcode
+ * The corresponding block_operator invocation takes the form
+ * @code
+ * block_operator<2, 2, BlockVector<double>>({op_a00, op_a01, op_a10, op_a11});
+ * @endcode
+ *
+ * @ingroup LAOperators
+ */
+template <size_t m, size_t n, typename Range, typename Domain>
+BlockLinearOperator<Range, Domain>
+block_operator(const std::array<std::array<LinearOperator<typename Range::BlockType, typename Domain::BlockType>, n>, m> &ops)
+{
+  static_assert(m > 0 && n > 0,
+                "a blocked LinearOperator must consist of at least one block");
+
+  typedef typename BlockLinearOperator<Range, Domain>::BlockType BlockType;
+
+  BlockLinearOperator<Range, Domain> return_op;
+
+  return_op.n_block_rows = []() -> unsigned int
+  {
+    return m;
+  };
+
+  return_op.n_block_cols = []() -> unsigned int
+  {
+    return n;
+  };
+
+  return_op.block = [ops](unsigned int i, unsigned int j) -> BlockType
+  {
+    Assert(i < m, ExcIndexRange (i, 0, m));
+    Assert(j < n, ExcIndexRange (j, 0, n));
+
+    return ops[i][j];
+  };
+
+  internal::BlockLinearOperator::populate_linear_operator_functions(return_op);
+  return return_op;
+}
+
+
+
+/**
+ * @relates BlockLinearOperator
+ *
+ * This function extracts the diagonal blocks of @p block_matrix (either a
+ * block matrix type or a BlockLinearOperator) and creates a
+ * BlockLinearOperator with the diagonal. Off-diagonal elements are
+ * initialized as null_operator (with correct reinit_range_vector and
+ * reinit_domain_vector methods).
+ *
+ * All changes made on the individual diagonal blocks of @p block_matrix after
+ * the creation of the BlockLinearOperator object are reflected by the
+ * operator object.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range,
+          typename Domain,
+          typename BlockMatrixType>
+BlockLinearOperator<Range, Domain>
+block_diagonal_operator(const BlockMatrixType &block_matrix)
+{
+  typedef typename BlockLinearOperator<Range, Domain>::BlockType BlockType;
+
+  BlockLinearOperator<Range, Domain> return_op;
+
+  return_op.n_block_rows = [&block_matrix]() -> unsigned int
+  {
+    return block_matrix.n_block_rows();
+  };
+
+  return_op.n_block_cols = [&block_matrix]() -> unsigned int
+  {
+    return block_matrix.n_block_cols();
+  };
+
+  return_op.block = [&block_matrix](unsigned int i, unsigned int j) -> BlockType
+  {
+#ifdef DEBUG
+    const unsigned int m = block_matrix.n_block_rows();
+    const unsigned int n = block_matrix.n_block_cols();
+    Assert(m == n, ExcDimensionMismatch(m, n));
+    Assert(i < m, ExcIndexRange (i, 0, m));
+    Assert(j < n, ExcIndexRange (j, 0, n));
+#endif
+    if (i == j)
+      return BlockType(block_matrix.block(i, j));
+    else
+      return null_operator(BlockType(block_matrix.block(i, j)));
+  };
+
+  internal::BlockLinearOperator::populate_linear_operator_functions(return_op);
+  return return_op;
+}
+
+
+
+/**
+ * @relates BlockLinearOperator
+ *
+ * A variant of above function that builds up a block diagonal linear operator
+ * from an array @p ops of diagonal elements (off-diagonal blocks are assumed
+ * to be 0).
+ *
+ * The list @p ops is best passed as an initializer list. Consider for example
+ * a linear operator block (acting on Vector<double>) <code>diag(op_a0, op_a1,
+ * ..., op_am)</code>. The corresponding block_operator invocation takes the
+ * form
+ * @code
+ * block_diagonal_operator<m, BlockVector<double>>({op_00, op_a1, ..., op_am});
+ * @endcode
+ *
+ * @ingroup LAOperators
+ */
+template <size_t m, typename Range, typename Domain>
+BlockLinearOperator<Range, Domain>
+block_diagonal_operator(const std::array<LinearOperator<typename Range::BlockType, typename Domain::BlockType>, m> &ops)
+{
+  static_assert(m > 0,
+                "a blockdiagonal LinearOperator must consist of at least one block");
+
+  typedef typename BlockLinearOperator<Range, Domain>::BlockType BlockType;
+
+  std::array<std::array<BlockType, m>, m> new_ops;
+
+  // This is a bit tricky. We have to make sure that the off-diagonal
+  // elements of return_op.ops are populated correctly. They must be
+  // null_operators, but with correct reinit_domain_vector and
+  // reinit_range_vector functions.
+  for (unsigned int i = 0; i < m; ++i)
+    for (unsigned int j = 0; j < m; ++j)
+      if (i == j)
+        {
+          // diagonal elements are easy:
+          new_ops[i][j] = ops[i];
+        }
+      else
+        {
+          // create a null-operator...
+          new_ops[i][j] = null_operator(ops[i]);
+          // ... and fix up reinit_domain_vector:
+          new_ops[i][j].reinit_domain_vector = ops[j].reinit_domain_vector;
+        }
+
+  return block_operator<m,m,Range,Domain>(new_ops);
+}
+
+
+
+/**
+ * @relates BlockLinearOperator
+ *
+ * A variant of above function that only takes a single LinearOperator
+ * argument @p op and creates a blockdiagonal linear operator with @p m copies
+ * of it.
+ *
+ * @ingroup LAOperators
+ */
+template <size_t m, typename Range, typename Domain>
+BlockLinearOperator<Range, Domain>
+block_diagonal_operator(const LinearOperator<typename Range::BlockType, typename Domain::BlockType> &op)
+{
+  static_assert(m > 0,
+                "a blockdiagonal LinearOperator must consist of at least "
+                "one block");
+
+  typedef typename BlockLinearOperator<Range, Domain>::BlockType BlockType;
+  std::array<BlockType, m> new_ops;
+  new_ops.fill(op);
+
+  return block_diagonal_operator(new_ops);
+}
+
+
+
+//@}
+/**
+ * @name Manipulation of a BlockLinearOperator
+ */
+//@{
+
+/**
+ * @relates LinearOperator
+ * @relates BlockLinearOperator
+ *
+ * This function implements forward substitution to invert a lower block
+ * triangular matrix. As arguments, it takes a BlockLinearOperator @p
+ * block_operator representing a block lower triangular matrix, as well as a
+ * BlockLinearOperator @p diagonal_inverse representing inverses of diagonal
+ * blocks of @p block_operator.
+ *
+ * Let us assume we have a linear system with the following block structure:
+ *
+ * @code
+ * A00 x0 + ...                   = y0
+ * A01 x0 + A11 x1 + ...          = y1
+ * ...        ...
+ * A0n x0 + A1n x1 + ... + Ann xn = yn
+ * @endcode
+ *
+ * First of all, <code>x0 = A00^-1 y0</code>. Then, we can use x0 to recover
+ * x1:
+ * @code
+ *    x1 = A11^-1 ( y1 - A01 x0 )
+ * @endcode
+ * and therefore:
+ * @code
+ *    xn = Ann^-1 ( yn - A0n x0 - ... - A(n-1)n x(n-1) )
+ * @endcode
+ *
+ * @note We are not using all blocks of the BlockLinearOperator arguments:
+ * Just the lower triangular block matrix of @p block_operator is used as well
+ * as the diagonal of @p diagonal_inverse.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Domain, Range>
+block_forward_substitution(const BlockLinearOperator<Range, Domain> &block_operator,
+                           const BlockLinearOperator<Domain, Range> &diagonal_inverse)
+{
+  LinearOperator<Range, Range> return_op;
+
+  return_op.reinit_range_vector = diagonal_inverse.reinit_range_vector;
+  return_op.reinit_domain_vector = diagonal_inverse.reinit_domain_vector;
+
+  return_op.vmult = [block_operator, diagonal_inverse](Range &v, const Range &u)
+  {
+    const unsigned int m = block_operator.n_block_rows();
+    Assert(block_operator.n_block_cols() == m,
+           ExcDimensionMismatch(block_operator.n_block_cols(), m));
+    Assert(diagonal_inverse.n_block_rows() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_rows(), m));
+    Assert(diagonal_inverse.n_block_cols() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_cols(), m));
+    Assert(v.n_blocks() == m, ExcDimensionMismatch(v.n_blocks(), m));
+    Assert(u.n_blocks() == m, ExcDimensionMismatch(u.n_blocks(), m));
+
+    if (m == 0)
+      return;
+
+    diagonal_inverse.block(0, 0).vmult(v.block(0), u.block(0));
+    for (unsigned int i = 1; i < m; ++i)
+      {
+        auto &dst = v.block(i);
+        dst = u.block(i);
+        dst *= -1.;
+        for (unsigned int j = 0; j < i; ++j)
+          block_operator.block(i, j).vmult_add(dst, v.block(j));
+        dst *= -1.;
+        diagonal_inverse.block(i, i).vmult(dst, dst); // uses intermediate storage
+      }
+  };
+
+  return_op.vmult_add = [block_operator, diagonal_inverse](Range &v, const Range &u)
+  {
+    const unsigned int m = block_operator.n_block_rows();
+    Assert(block_operator.n_block_cols() == m,
+           ExcDimensionMismatch(block_operator.n_block_cols(), m));
+    Assert(diagonal_inverse.n_block_rows() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_rows(), m));
+    Assert(diagonal_inverse.n_block_cols() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_cols(), m));
+    Assert(v.n_blocks() == m, ExcDimensionMismatch(v.n_blocks(), m));
+    Assert(u.n_blocks() == m, ExcDimensionMismatch(u.n_blocks(), m));
+
+    if (m == 0)
+      return;
+
+    static GrowingVectorMemory<typename  Range::BlockType> vector_memory;
+    typename Range::BlockType *tmp = vector_memory.alloc();
+
+    diagonal_inverse.block(0, 0).vmult_add(v.block(0), u.block(0));
+
+    for (unsigned int i = 1; i < m; ++i)
+      {
+        diagonal_inverse.block(i, i).reinit_range_vector(*tmp, /*bool omit_zeroing_entries=*/ true);
+        *tmp = u.block(i);
+        *tmp *= -1.;
+        for (unsigned int j = 0; j < i; ++j)
+          block_operator.block(i, j).vmult_add(*tmp, v.block(j));
+        *tmp *= -1.;
+        diagonal_inverse.block(i, i).vmult_add(v.block(i),*tmp);
+      }
+
+    vector_memory.free(tmp);
+  };
+
+  return return_op;
+}
+
+
+
+/**
+ * @relates LinearOperator
+ * @relates BlockLinearOperator
+ *
+ * This function implements back substitution to invert an upper block
+ * triangular matrix. As arguments, it takes a BlockLinearOperator @p
+ * block_operator representing an upper block triangular matrix, as well as a
+ * BlockLinearOperator @p diagonal_inverse representing inverses of diagonal
+ * blocks of @p block_operator.
+ *
+ * Let us assume we have a linear system with the following block structure:
+ *
+ * @code
+ * A00 x0 + A01 x1 + ... + A0n xn = yn
+ *          A11 x1 + ...          = y1
+ *                          ...     ..
+ *                         Ann xn = yn
+ * @endcode
+ *
+ * First of all, <code>xn = Ann^-1 yn</code>. Then, we can use xn to recover
+ * x(n-1):
+ * @code
+ *    x(n-1) = A(n-1)(n-1)^-1 ( y(n-1) - A(n-1)n x(n-1) )
+ * @endcode
+ * and therefore:
+ * @code
+ *    x0 = A00^-1 ( y0 - A0n xn - ... - A01 x1 )
+ * @endcode
+ *
+ * @note We are not using all blocks of the BlockLinearOperator arguments:
+ * Just the upper triangular block matrix of @p block_operator is used as well
+ * as the diagonal of @p diagonal_inverse.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Domain, Range>
+block_back_substitution(const BlockLinearOperator<Range, Domain> &block_operator,
+                        const BlockLinearOperator<Domain, Range> &diagonal_inverse)
+{
+  LinearOperator<Range, Range> return_op;
+
+  return_op.reinit_range_vector = diagonal_inverse.reinit_range_vector;
+  return_op.reinit_domain_vector = diagonal_inverse.reinit_domain_vector;
+
+  return_op.vmult = [block_operator, diagonal_inverse](Range &v, const Range &u)
+  {
+    const unsigned int m = block_operator.n_block_rows();
+    Assert(block_operator.n_block_cols() == m,
+           ExcDimensionMismatch(block_operator.n_block_cols(), m));
+    Assert(diagonal_inverse.n_block_rows() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_rows(), m));
+    Assert(diagonal_inverse.n_block_cols() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_cols(), m));
+    Assert(v.n_blocks() == m, ExcDimensionMismatch(v.n_blocks(), m));
+    Assert(u.n_blocks() == m, ExcDimensionMismatch(u.n_blocks(), m));
+
+    if (m == 0)
+      return;
+
+    diagonal_inverse.block(m-1, m-1).vmult(v.block(m-1),u.block(m-1));
+
+    for (int i = m - 2; i >= 0; --i)
+      {
+        auto &dst = v.block(i);
+        dst = u.block(i);
+        dst *= -1.;
+        for (unsigned int j = i + 1; j < m; ++j)
+          block_operator.block(i, j).vmult_add(dst, v.block(j));
+        dst *= -1.;
+        diagonal_inverse.block(i, i).vmult(dst, dst); // uses intermediate storage
+      }
+  };
+
+  return_op.vmult_add = [block_operator, diagonal_inverse](Range &v, const Range &u)
+  {
+    const unsigned int m = block_operator.n_block_rows();
+    Assert(block_operator.n_block_cols() == m,
+           ExcDimensionMismatch(block_operator.n_block_cols(), m));
+    Assert(diagonal_inverse.n_block_rows() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_rows(), m));
+    Assert(diagonal_inverse.n_block_cols() == m,
+           ExcDimensionMismatch(diagonal_inverse.n_block_cols(), m));
+    Assert(v.n_blocks() == m, ExcDimensionMismatch(v.n_blocks(), m));
+    Assert(u.n_blocks() == m, ExcDimensionMismatch(u.n_blocks(), m));
+    static GrowingVectorMemory<typename  Range::BlockType> vector_memory;
+    typename  Range::BlockType *tmp = vector_memory.alloc();
+
+    if (m == 0)
+      return;
+
+    diagonal_inverse.block(m-1, m-1).vmult_add(v.block(m-1),u.block(m-1));
+
+    for (int i = m - 2; i >= 0; --i)
+      {
+        diagonal_inverse.block(i, i).reinit_range_vector(*tmp, /*bool omit_zeroing_entries=*/ true);
+        *tmp = u.block(i);
+        *tmp *= -1.;
+        for (unsigned int j = i + 1; j < m; ++j)
+          block_operator.block(i, j).vmult_add(*tmp,v.block(j));
+        *tmp *= -1.;
+        diagonal_inverse.block(i, i).vmult_add(v.block(i),*tmp);
+      }
+
+    vector_memory.free(tmp);
+  };
+
+  return return_op;
+}
+
+//@}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_CXX11
+#endif
diff --git a/include/deal.II/lac/block_matrix.h b/include/deal.II/lac/block_matrix.h
new file mode 100644
index 0000000..fac76f8
--- /dev/null
+++ b/include/deal.II/lac/block_matrix.h
@@ -0,0 +1,132 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_matrix_h
+#define dealii__block_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+/**
+ * A matrix with several copies of the same block on the diagonal.
+ *
+ * This matrix implements an @p m by @p m block matrix. Each diagonal block
+ * consists of the same (non-block) matrix, while off-diagonal blocks are
+ * void.
+ *
+ * One special application is a one by one block matrix, allowing to apply the
+ * @p vmult of the original matrix (or preconditioner) to a block vector.
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Guido Kanschat, 2000
+ */
+template <typename MatrixType>
+class BlockDiagonalMatrix : public Subscriptor
+{
+public:
+  /**
+   * Constructor for an @p n_blocks by @p n_blocks matrix with diagonal blocks
+   * @p M.
+   */
+  BlockDiagonalMatrix (const MatrixType   &M,
+                       const unsigned int  n_blocks);
+
+  /**
+   * Matrix-vector-multiplication.
+   */
+  template <typename number1, typename number2>
+  void vmult (BlockVector<number1> &dst,
+              const BlockVector<number2> &src) const;
+
+  /**
+   * Transposed matrix-vector-multiplication.
+   */
+  template <typename number1, typename number2>
+  void Tvmult (BlockVector<number1> &dst,
+               const BlockVector<number2> &src) const;
+private:
+  /**
+   * Number of blocks.
+   */
+  unsigned int num_blocks;
+
+  /**
+   * Diagonal entry.
+   */
+  SmartPointer<const MatrixType,BlockDiagonalMatrix<MatrixType> > matrix;
+};
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+template <typename MatrixType>
+BlockDiagonalMatrix<MatrixType>::BlockDiagonalMatrix (const MatrixType &M,
+                                                      const unsigned int num_blocks)
+  :
+  num_blocks (num_blocks),
+  matrix(&M)
+{}
+
+
+template <typename MatrixType>
+template <typename number1, typename number2>
+void
+BlockDiagonalMatrix<MatrixType>::vmult (BlockVector<number1>       &dst,
+                                        const BlockVector<number2> &src) const
+{
+  Assert (dst.n_blocks()==num_blocks,
+          ExcDimensionMismatch(dst.n_blocks(),num_blocks));
+  Assert (src.n_blocks()==num_blocks,
+          ExcDimensionMismatch(src.n_blocks(),num_blocks));
+
+  for (unsigned int i=0; i<num_blocks; ++i)
+    matrix->vmult (dst.block(i), src.block(i));
+}
+
+
+template <typename MatrixType>
+template <typename number1, typename number2>
+void
+BlockDiagonalMatrix<MatrixType>::Tvmult (BlockVector<number1>       &dst,
+                                         const BlockVector<number2> &src) const
+{
+  Assert (dst.n_blocks()==num_blocks,
+          ExcDimensionMismatch(dst.n_blocks(),num_blocks));
+  Assert (src.n_blocks()==num_blocks,
+          ExcDimensionMismatch(src.n_blocks(),num_blocks));
+
+  for (unsigned int i=0; i<num_blocks; ++i)
+    matrix->Tvmult (dst.block(i), src.block(i));
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/block_matrix_array.h b/include/deal.II/lac/block_matrix_array.h
new file mode 100644
index 0000000..c61b6f3
--- /dev/null
+++ b/include/deal.II/lac/block_matrix_array.h
@@ -0,0 +1,634 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_matrix_array_h
+#define dealii__block_matrix_array_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/table.h>
+
+#include <deal.II/lac/pointer_matrix.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/block_vector.h>
+
+#include <vector>
+#include <map>
+#include <string>
+#include <memory>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+
+/**
+ * Block matrix composed of different single matrices; these matrices may even
+ * be of different types.
+ *
+ * Given a set of arbitrary matrices <i>A<sub>i</sub></i>, this class
+ * implements a block matrix with block entries of the form <i>M<sub>jk</sub>
+ * = s<sub>jk</sub>A<sub>i</sub></i>.  Each <i>A<sub>i</sub></i> may be used
+ * several times with different prefix. The matrices are not copied into the
+ * BlockMatrixArray object, but rather a PointerMatrix referencing each of
+ * them will be stored along with factors and transposition flags.
+ *
+ * Non-zero entries are registered by the function enter(), zero entries are
+ * not stored at all. Using enter() with the same location <tt>(i,j)</tt>
+ * several times will add the corresponding matrices in matrix-vector
+ * multiplications. These matrices will not be actually added, but the
+ * multiplications with them will be summed up.
+ *
+ * @note This mechanism makes it impossible to access single entries of
+ * BlockMatrixArray. In particular, (block) relaxation preconditioners based
+ * on PreconditionRelaxation or PreconditionBlock <b>cannot</b> be used with
+ * this class. If you need a preconditioner for a BlockMatrixArray object, use
+ * BlockTrianglePrecondition.
+ *
+ * <h3>Requirements on MatrixType</h3>
+ *
+ * The template argument <tt>MatrixType</tt> is a class providing the matrix-
+ * vector multiplication functions vmult(), Tvmult(), vmult_add() and
+ * Tvmult_add() used in this class, but with arguments of type
+ * Vector<number> instead of BlockVector<number>. Every matrix
+ * which can be used by PointerMatrix is allowed, in particular SparseMatrix
+ * is a possible entry type.
+ *
+ * <h3>Example program</h3> We document the relevant parts of
+ * <tt>examples/doxygen/block_matrix_array.cc</tt>.
+ *
+ * @dontinclude block_matrix_array.cc
+ *
+ * Obviously, we have to include the header file containing the definition of
+ * BlockMatrixArray:
+ * @skipline block_matrix_array.h
+ *
+ * First, we set up some matrices to be entered into the blocks.
+ * @skip main
+ * @until C.fill
+ *
+ * Now, we are ready to build a <i>2x2</i> BlockMatrixArray.
+ * @line Block
+ * First, we enter the matrix <tt>A</tt> multiplied by 2 in the upper left
+ * block
+ * @line enter
+ * Now -1 times <tt>B1</tt> in the upper right block.
+ * @line enter
+ * We add the transpose of <tt>B2</tt> to the upper right block and continue
+ * in a similar fashion. In the end, the block matrix structure is printed
+ * into an LaTeX table.
+ * @until latex
+ *
+ * Now, we set up vectors to be multiplied with this matrix and do a
+ * multiplication.
+ * @until vmult
+ *
+ * Finally, we solve a linear system with BlockMatrixArray, using no
+ * preconditioning and the conjugate gradient method.
+ * @until Error
+ *
+ * The remaining code of this sample program concerns preconditioning and is
+ * described in the documentation of BlockTrianglePrecondition.
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Guido Kanschat
+ * @date 2000-2005, 2010
+ */
+template <typename number = double, typename BlockVectorType=BlockVector<number> >
+class BlockMatrixArray : public Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Default constructor creating a useless object. initialize() must be
+   * called before using it.
+   */
+  BlockMatrixArray ();
+
+  /**
+   * Constructor fixing the dimensions.
+   */
+  BlockMatrixArray (const unsigned int n_block_rows,
+                    const unsigned int n_block_cols);
+
+  /**
+   * Initialize object completely. This is the function to call for an object
+   * created by the default constructor.
+   */
+  void initialize (const unsigned int n_block_rows,
+                   const unsigned int n_block_cols);
+
+  /**
+   * Adjust the matrix to a new size and delete all blocks.
+   */
+  void reinit (const unsigned int n_block_rows,
+               const unsigned int n_block_cols);
+
+  /**
+   * Add a block matrix entry. The <tt>matrix</tt> is entered into a list of
+   * blocks for multiplication, together with its coordinates <tt>row</tt> and
+   * <tt>col</tt> as well as optional multiplication factor <tt>prefix</tt>
+   * and transpose flag <tt>transpose</tt>.
+   *
+   * @note No check for consistency of block sizes is made. Therefore,
+   * entering a block of wrong dimension here will only lead to a
+   * ExcDimensionMismatch in one of the multiplication functions.
+   */
+  template <typename MatrixType>
+  void enter (const MatrixType   &matrix,
+              const unsigned int  row,
+              const unsigned int  col,
+              const number        prefix = 1.,
+              const bool          transpose = false);
+
+  /**
+   * Delete all entries, i.e. reset the matrix to an empty state.
+   */
+  void clear();
+
+  /**
+   * Number of block-entries per column.
+   */
+  unsigned int n_block_rows () const;
+
+  /**
+   * Number of block-entries per row.
+   */
+  unsigned int n_block_cols () const;
+
+  /**
+   * Matrix-vector multiplication.
+   */
+  void vmult (BlockVectorType &dst,
+              const BlockVectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication adding to <tt>dst</tt>.
+   */
+  void vmult_add(BlockVectorType &dst,
+                 const BlockVectorType &src) const;
+
+  /**
+   * Transposed matrix-vector multiplication.
+   */
+  void Tvmult (BlockVectorType &dst,
+               const BlockVectorType &src) const;
+
+  /**
+   * Transposed matrix-vector multiplication adding to <tt>dst</tt>.
+   */
+  void Tvmult_add (BlockVectorType &dst,
+                   const BlockVectorType &src) const;
+
+  /**
+   * Matrix scalar product between two vectors (at least for a symmetric
+   * matrix).
+   */
+  number matrix_scalar_product (const BlockVectorType &u,
+                                const BlockVectorType &v) const;
+
+  /**
+   * Compute $u^T M u$. This is the square of the norm induced by the matrix
+   * assuming the matrix is symmetric positive definitive.
+   */
+  number matrix_norm_square (const BlockVectorType &u) const;
+
+  /**
+   * Print the block structure as a LaTeX-array. This output will not be very
+   * intuitive, since the current object lacks knowledge about what the
+   * individual blocks represent or how they should be named. Instead, what
+   * you will see is an entry for each block showing all the matrices with
+   * their multiplication factors and possibly transpose marks. The matrices
+   * itself are named successively as they are encountered. If the same matrix
+   * is entered several times, it will be listed with different names.
+   *
+   * As an example, consider the following code:
+   * @code
+   *   FullMatrix<double> A1(4,4);
+   *  FullMatrix<double> A2(4,4);
+   *  FullMatrix<double> B(4,3);
+   *  FullMatrix<double> C(3,3);
+   *
+   *  BlockMatrixArray<double> block(2,2);
+   *
+   *  block.enter(A1,0,0);
+   *  block.enter(A2,0,0,2,true);
+   *  block.enter(B,0,1,-3.);
+   *  block.enter(B,0,1,-3.,true);
+   *  block.enter(C,1,1,1.,true);
+   *
+   *  block.print_latex(std::cout);
+   * @endcode
+   * The current function will then produce output of the following kind:
+   * @code
+   * \begin{array}{cc}
+   *    M0+2xM1^T &     -3xM2-3xM3^T\\
+   *    &      M4^T
+   * \end{array}
+   * @endcode
+   * Note how the individual blocks here are just numbered successively as
+   * <code>M0</code> to <code>M4</code> and that the output misses the fact
+   * that <code>M2</code> and <code>M3</code> are, in fact, the same matrix.
+   * Nevertheless, the output at least gives some kind of idea of the block
+   * structure of this matrix.
+   */
+  template <class StreamType>
+  void print_latex (StreamType &out) const;
+
+protected:
+  /**
+   * Internal data structure.
+   *
+   * For each entry of a BlockMatrixArray, its position, matrix, prefix and
+   * optional transposition must be stored. This structure encapsulates all of
+   * them.
+   *
+   * @author Guido Kanschat, 2000, 2001
+   */
+  class Entry
+  {
+  public:
+    /**
+     * Constructor initializing all data fields. A PointerMatrix object is
+     * generated for <tt>matrix</tt>.
+     */
+    template<typename MatrixType>
+    Entry (const MatrixType &matrix,
+           size_type row,
+           size_type col,
+           number prefix,
+           bool transpose);
+
+    /**
+     * Copy constructor invalidating the old object. Since it is only used for
+     * entering temporary objects into a vector, this is ok.
+     *
+     * For a deep copy, we would need a reproduction operator in
+     * PointerMatixBase.
+     */
+    Entry(const Entry &);
+
+    /**
+     * Destructor, where we delete the PointerMatrix created by the
+     * constructor.
+     */
+    ~Entry();
+
+    /**
+     * Row number in the block matrix.
+     */
+    size_type row;
+
+    /**
+     * Column number in the block matrix.
+     */
+    size_type col;
+
+    /**
+     * Factor in front of the matrix block.
+     */
+    number prefix;
+
+    /**
+     * Indicates that matrix block must be transposed for multiplication.
+     */
+    bool transpose;
+
+    /**
+     * The matrix block itself.
+     */
+    PointerMatrixBase<typename BlockVectorType::BlockType > *matrix;
+  };
+
+  /**
+   * Array of block entries in the matrix.
+   */
+  std::vector<Entry> entries;
+
+private:
+  /**
+   * Number of blocks per column.
+   */
+  unsigned int block_rows;
+  /**
+   * number of blocks per row.
+   */
+  unsigned int block_cols;
+};
+
+/*@}*/
+
+
+/**
+ * Inversion of a block-triangular matrix.
+ *
+ * In this block matrix, the inverses of the diagonal blocks are stored
+ * together with the off-diagonal blocks of a block matrix. Then, forward or
+ * backward insertion is performed block-wise. The diagonal blocks are NOT
+ * inverted for this purpose!
+ *
+ * Like for all preconditioners, the preconditioning operation is performed by
+ * the vmult() member function.
+ *
+ * @note While block indices may be duplicated (see BlockMatrixArray) to add
+ * blocks, this has to be used with caution, since summing up the inverse of
+ * two blocks does not yield the inverse of the sum. While the latter would be
+ * desirable, we can only perform the first.
+ *
+ * The implementation may be a little clumsy, but it should be sufficient as
+ * long as the block sizes are much larger than the number of blocks.
+ *
+ * <h3>Example</h3> Here, we document the second part of
+ * <tt>examples/doxygen/block_matrix_array.cc</tt>. For the beginning of this
+ * file, see BlockMatrixArray.
+ *
+ * In order to set up the preconditioner, we have to compute the inverses of
+ * the diagonal blocks ourselves. Since we used FullMatrix objects, this is
+ * fairly easy.
+ * @dontinclude block_matrix_array.cc
+ * @skip Error
+ * @until Cinv.invert
+ *
+ * After creating a <i>2x2</i> BlockTrianglePrecondition object, we only fill
+ * its diagonals. The scaling factor <i>1/2</i> used for <tt>A</tt> is the
+ * reciprocal of the scaling factor used for the <tt>matrix</tt> itself.
+ * Remember, this preconditioner actually <b>multiplies</b> with the diagonal
+ * blocks.
+ * @until Cinv
+ *
+ * Now, we have a block Jacobi preconditioner, which is still symmetric, since
+ * the blocks are symmetric. Therefore, we can still use the preconditioned
+ * conjugate gradient method.
+ * @until Error
+ *
+ * Now, we enter the subdiagonal block. This is the same as in
+ * <tt>matrix</tt>.
+ * @until B2
+ *
+ * Since the preconditioner is not symmetric anymore, we use the GMRES method
+ * for solving.
+ * @until Error
+ *
+ *
+ * @ingroup Preconditioners
+ * @author Guido Kanschat, 2001, 2005
+ */
+template <typename number = double, typename BlockVectorType = BlockVector<number> >
+class BlockTrianglePrecondition
+  : private BlockMatrixArray<number,BlockVectorType>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Default constructor creating a useless object. initialize() must be
+   * called before using it.
+   */
+  BlockTrianglePrecondition ();
+
+  /**
+   * Constructor. This matrix must be block-quadratic, and <tt>n_blocks</tt>
+   * is the number of blocks in each direction.
+   */
+  BlockTrianglePrecondition (const unsigned int n_blocks);
+
+  /**
+   * Resize preconditioner to a new size and clear all blocks.
+   */
+  void reinit (const unsigned int n_block_rows);
+
+
+  /**
+   * Enter a block. This calls BlockMatrixArray::enter(). Remember that the
+   * diagonal blocks should actually be inverse matrices or preconditioners.
+   */
+  template <typename MatrixType>
+  void enter (const MatrixType &matrix,
+              const size_type   row,
+              const size_type   col,
+              const number      prefix    = 1.,
+              const bool        transpose = false);
+
+  /**
+   * Preconditioning.
+   */
+  void vmult (BlockVectorType &dst,
+              const BlockVectorType &src) const;
+
+  /**
+   * Preconditioning adding to <tt>dst</tt>.
+   */
+  void vmult_add (BlockVectorType &dst,
+                  const BlockVectorType &src) const;
+
+  /**
+   * Transposed preconditioning
+   */
+  void Tvmult (BlockVectorType &dst,
+               const BlockVectorType &src) const;
+
+  /**
+   * Transposed preconditioning adding to <tt>dst</tt>.
+   */
+  void Tvmult_add (BlockVectorType &dst,
+                   const BlockVectorType &src) const;
+
+  /**
+   * Make function of base class available.
+   */
+  using BlockMatrixArray<number,BlockVectorType>::print_latex;
+
+  /**
+   * Make function of base class available.
+   */
+  using BlockMatrixArray<number,BlockVectorType>::n_block_rows;
+
+  /**
+   * Make function of base class available.
+   */
+  using BlockMatrixArray<number,BlockVectorType>::n_block_cols;
+  using BlockMatrixArray<number,BlockVectorType>::clear;
+  using BlockMatrixArray<number,BlockVectorType>::Subscriptor::subscribe;
+  using BlockMatrixArray<number,BlockVectorType>::Subscriptor::unsubscribe;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Each diagonal block must contain one and only one matrix. If this
+   * exception is thrown, you did not enter a matrix here.
+   */
+  DeclException1(ExcNoDiagonal,
+                 size_type,
+                 << "No diagonal entry was added for block " << arg1);
+
+  /**
+   * Each diagonal block must contain one and only one matrix. If this
+   * exception is thrown, you entered a second matrix here.
+   */
+  DeclException1(ExcMultipleDiagonal,
+                 size_type,
+                 << "Inverse diagonal entries may not be added in block "
+                 << arg1);
+  //@}
+private:
+  /**
+   * Add all off-diagonal contributions and return the entry of the diagonal
+   * element for one row.
+   */
+  void do_row (BlockVectorType &dst,
+               size_type row_num) const;
+
+  /**
+   * Flag for backward insertion.
+   */
+  bool backward;
+};
+
+
+#ifndef DOXYGEN
+//---------------------------------------------------------------------------
+
+template <typename number, typename BlockVectorType>
+template <typename MatrixType>
+inline
+BlockMatrixArray<number, BlockVectorType>::Entry::Entry
+(const MatrixType &m,
+ size_type         row,
+ size_type         col,
+ number            prefix,
+ bool              transpose)
+  :
+  row (row),
+  col (col),
+  prefix (prefix),
+  transpose (transpose),
+  matrix (new_pointer_matrix_base(m, typename BlockVectorType::BlockType(), typeid(*this).name()))
+{}
+
+
+
+template <typename number, typename BlockVectorType>
+template <typename MatrixType>
+inline
+void
+BlockMatrixArray<number, BlockVectorType>::enter (const MatrixType &matrix,
+                                                  unsigned int      row,
+                                                  unsigned int      col,
+                                                  number            prefix,
+                                                  bool              transpose)
+{
+  Assert(row<n_block_rows(), ExcIndexRange(row, 0, n_block_rows()));
+  Assert(col<n_block_cols(), ExcIndexRange(col, 0, n_block_cols()));
+  entries.push_back(Entry(matrix, row, col, prefix, transpose));
+}
+
+
+template <typename number, typename BlockVectorType>
+template <class StreamType>
+inline
+void
+BlockMatrixArray<number, BlockVectorType>::print_latex (StreamType &out) const
+{
+  out << "\\begin{array}{"
+      << std::string(n_block_cols(), 'c')
+      << "}" << std::endl;
+
+  Table<2,std::string> array(n_block_rows(), n_block_cols());
+
+  typedef std::map<const PointerMatrixBase<typename BlockVectorType::BlockType > *, std::string> NameMap;
+  NameMap matrix_names;
+
+  typename std::vector<Entry>::const_iterator m = entries.begin();
+  typename std::vector<Entry>::const_iterator end = entries.end();
+
+  size_type matrix_number = 0;
+  for (; m != end ; ++m)
+    {
+      if (matrix_names.find(m->matrix) == matrix_names.end())
+        {
+          std::pair<typename NameMap::iterator, bool> x =
+            matrix_names.insert(
+              std::pair<const PointerMatrixBase<typename BlockVectorType::BlockType >*, std::string> (m->matrix,
+                  std::string("M")));
+          std::ostringstream stream;
+          stream << matrix_number++;
+
+          x.first->second += stream.str();
+        }
+
+      std::ostringstream stream;
+
+      if (array(m->row, m->col) != "" && m->prefix >= 0)
+        stream << "+";
+      if (m->prefix != 1.)
+        stream << m->prefix << 'x';
+      stream << matrix_names.find(m->matrix)->second;
+//      stream << '(' << m->matrix << ')';
+      if (m->transpose)
+        stream << "^T";
+
+      array(m->row, m->col) += stream.str();
+    }
+  for (unsigned int i=0; i<n_block_rows(); ++i)
+    for (unsigned int j=0; j<n_block_cols(); ++j)
+      {
+        out << '\t' << array(i,j);
+        if (j==n_block_cols()-1)
+          {
+            if (i != n_block_rows() - 1)
+              out << "\\\\" << std::endl;
+            else
+              out << std::endl;
+          }
+        else
+          out << " &";
+      }
+  out << "\\end{array}" << std::endl;
+}
+
+template <typename number, typename BlockVectorType>
+template <typename MatrixType>
+inline
+void
+BlockTrianglePrecondition<number, BlockVectorType>::enter (const MatrixType &matrix,
+                                                           size_type         row,
+                                                           size_type         col,
+                                                           number            prefix,
+                                                           bool              transpose)
+{
+  BlockMatrixArray<number, BlockVectorType>::enter(matrix, row, col, prefix, transpose);
+}
+
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/block_matrix_base.h b/include/deal.II/lac/block_matrix_base.h
new file mode 100644
index 0000000..60ea3c4
--- /dev/null
+++ b/include/deal.II/lac/block_matrix_base.h
@@ -0,0 +1,2677 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_matrix_base_h
+#define dealii__block_matrix_base_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/lac/exceptions.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/matrix_iterator.h>
+#include <deal.II/lac/vector.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename> class MatrixIterator;
+
+
+
+/*! @addtogroup Matrix1
+ *@{
+ */
+
+/**
+ * Namespace in which iterators in block matrices are implemented.
+ *
+ * @author Wolfgang Bangerth, 2004
+ */
+namespace BlockMatrixIterators
+{
+  /**
+   * Base class for block matrix accessors, implementing the stepping through
+   * a matrix.
+   */
+  template <class BlockMatrixType>
+  class AccessorBase
+  {
+  public:
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    /**
+     * Typedef the value type of the matrix we point into.
+     */
+    typedef typename BlockMatrixType::value_type value_type;
+
+    /**
+     * Initialize data fields to default values.
+     */
+    AccessorBase ();
+
+    /**
+     * Block row of the element represented by this object.
+     */
+    unsigned int block_row() const;
+
+    /**
+     * Block column of the element represented by this object.
+     */
+    unsigned int block_column() const;
+
+  protected:
+    /**
+     * Block row into which we presently point.
+     */
+    unsigned int row_block;
+
+    /**
+     * Block column into which we presently point.
+     */
+    unsigned int col_block;
+
+    /**
+     * Let the iterator class be a friend.
+     */
+    template <typename>
+    friend class MatrixIterator;
+  };
+
+
+
+  /**
+   * Accessor classes in block matrices.
+   */
+  template <class BlockMatrixType, bool Constness>
+  class Accessor;
+
+
+  /**
+   * Block matrix accessor for non const matrices.
+   */
+  template <class BlockMatrixType>
+  class Accessor<BlockMatrixType, false>
+    :
+    public AccessorBase<BlockMatrixType>
+  {
+  public:
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    /**
+     * Type of the matrix used in this accessor.
+     */
+    typedef BlockMatrixType MatrixType;
+
+    /**
+     * Typedef the value type of the matrix we point into.
+     */
+    typedef typename BlockMatrixType::value_type value_type;
+
+    /**
+     * Constructor. Since we use accessors only for read access, a const
+     * matrix pointer is sufficient.
+     *
+     * Place the iterator at the beginning of the given row of the matrix, or
+     * create the end pointer if @p row equals the total number of rows in the
+     * matrix.
+     */
+    Accessor (BlockMatrixType *m,
+              const size_type row,
+              const size_type col);
+
+    /**
+     * Row number of the element represented by this object.
+     */
+    size_type row() const;
+
+    /**
+     * Column number of the element represented by this object.
+     */
+    size_type column() const;
+
+    /**
+     * Value of the entry at the current position.
+     */
+    value_type value() const;
+
+    /**
+     * Set new value.
+     */
+    void set_value(value_type newval) const;
+
+  protected:
+    /**
+     * The matrix accessed.
+     */
+    BlockMatrixType *matrix;
+
+    /**
+     * Iterator of the underlying matrix class.
+     */
+    typename BlockMatrixType::BlockType::iterator base_iterator;
+
+    /**
+     * Move ahead one element.
+     */
+    void advance ();
+
+    /**
+     * Compare this accessor with another one for equality.
+     */
+    bool operator == (const Accessor &a) const;
+
+    template <typename> friend class MatrixIterator;
+    friend class Accessor<BlockMatrixType, true>;
+  };
+
+  /**
+   * Block matrix accessor for constant matrices, implementing the stepping
+   * through a matrix.
+   */
+  template <class BlockMatrixType>
+  class Accessor<BlockMatrixType, true>
+    :
+    public AccessorBase<BlockMatrixType>
+  {
+  public:
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    /**
+     * Type of the matrix used in this accessor.
+     */
+    typedef const BlockMatrixType MatrixType;
+
+    /**
+     * Typedef the value type of the matrix we point into.
+     */
+    typedef typename BlockMatrixType::value_type value_type;
+
+    /**
+     * Constructor. Since we use accessors only for read access, a const
+     * matrix pointer is sufficient.
+     *
+     * Place the iterator at the beginning of the given row of the matrix, or
+     * create the end pointer if @p row equals the total number of rows in the
+     * matrix.
+     */
+    Accessor (const BlockMatrixType *m,
+              const size_type row,
+              const size_type col);
+
+    /**
+     * Initialize const accessor from non const accessor.
+     */
+    Accessor(const Accessor<BlockMatrixType, false> &);
+
+    /**
+     * Row number of the element represented by this object.
+     */
+    size_type row() const;
+
+    /**
+     * Column number of the element represented by this object.
+     */
+    size_type column() const;
+
+    /**
+     * Value of the entry at the current position.
+     */
+    value_type value() const;
+  protected:
+    /**
+     * The matrix accessed.
+     */
+    const BlockMatrixType *matrix;
+
+    /**
+     * Iterator of the underlying matrix class.
+     */
+    typename BlockMatrixType::BlockType::const_iterator base_iterator;
+
+    /**
+     * Move ahead one element.
+     */
+    void advance ();
+
+    /**
+     * Compare this accessor with another one for equality.
+     */
+    bool operator == (const Accessor &a) const;
+
+    /**
+     * Let the iterator class be a friend.
+     */
+    template <typename>
+    friend class dealii::MatrixIterator;
+  };
+}
+
+
+
+/**
+ * Blocked matrix class. The behaviour of objects of this type is almost as
+ * for the usual matrix objects, with most of the functions being implemented
+ * in both classes. The main difference is that the matrix represented by this
+ * object is composed of an array of matrices (e.g. of type
+ * SparseMatrix<number>) and all accesses to the elements of this object are
+ * relayed to accesses of the base matrices. The actual type of the individual
+ * blocks of this matrix is the type of the template argument, and can, for
+ * example be the usual SparseMatrix or PETScWrappers::SparseMatrix.
+ *
+ * In addition to the usual matrix access and linear algebra functions, there
+ * are functions block() which allow access to the different blocks of the
+ * matrix. This may, for example, be of help when you want to implement Schur
+ * complement methods, or block preconditioners, where each block belongs to a
+ * specific component of the equation you are presently discretizing.
+ *
+ * Note that the numbers of blocks and rows are implicitly determined by the
+ * sparsity pattern objects used.
+ *
+ * Objects of this type are frequently used when a system of differential
+ * equations has solutions with variables that fall into different classes.
+ * For example, solutions of the Stokes or Navier-Stokes equations have @p dim
+ * velocity components and one pressure component. In this case, it may make
+ * sense to consider the linear system of equations as a system of 2x2 blocks,
+ * and one can construct preconditioners or solvers based on this 2x2 block
+ * structure. This class can help you in these cases, as it allows to view the
+ * matrix alternatively as one big matrix, or as a number of individual
+ * blocks.
+ *
+ *
+ * <h3>Inheriting from this class</h3>
+ *
+ * Since this class simply forwards its calls to the subobjects (if necessary
+ * after adjusting indices denoting which subobject is meant), this class is
+ * completely independent of the actual type of the subobject. The functions
+ * that set up block matrices and destroy them, however, have to be
+ * implemented in derived classes. These functions also have to fill the data
+ * members provided by this base class, as they are only used passively in
+ * this class.
+ *
+ *
+ * Most of the functions take a vector or block vector argument. These
+ * functions can, in general, only successfully be compiled if the individual
+ * blocks of this matrix implement the respective functions operating on the
+ * vector type in question. For example, if you have a block sparse matrix
+ * over deal.II SparseMatrix objects, then you will likely not be able to form
+ * the matrix-vector multiplication with a block vector over
+ * PETScWrappers::SparseMatrix objects. If you attempt anyway, you will likely
+ * get a number of compiler errors.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, 2000, 2004
+ */
+template <typename MatrixType>
+class BlockMatrixBase : public Subscriptor
+{
+public:
+  /**
+   * Typedef the type of the underlying matrix.
+   */
+  typedef MatrixType BlockType;
+
+  /**
+   * Type of matrix entries. These are analogous to typedefs in the standard
+   * library containers.
+   */
+  typedef typename BlockType::value_type value_type;
+  typedef value_type             *pointer;
+  typedef const value_type       *const_pointer;
+  typedef value_type             &reference;
+  typedef const value_type       &const_reference;
+  typedef types::global_dof_index size_type;
+
+  typedef
+  MatrixIterator<BlockMatrixIterators::Accessor<BlockMatrixBase, false> >
+  iterator;
+
+  typedef
+  MatrixIterator<BlockMatrixIterators::Accessor<BlockMatrixBase, true> >
+  const_iterator;
+
+
+  /**
+   * Default constructor.
+   */
+  BlockMatrixBase ();
+
+  /**
+   * Destructor.
+   */
+  ~BlockMatrixBase ();
+
+  /**
+   * Copy the matrix given as argument into the current object.
+   *
+   * Copying matrices is an expensive operation that we do not want to happen
+   * by accident through compiler generated code for <code>operator=</code>.
+   * (This would happen, for example, if one accidentally declared a function
+   * argument of the current type <i>by value</i> rather than <i>by
+   * reference</i>.) The functionality of copying matrices is implemented in
+   * this member function instead. All copy operations of objects of this type
+   * therefore require an explicit function call.
+   *
+   * The source matrix may be a matrix of arbitrary type, as long as its data
+   * type is convertible to the data type of this matrix.
+   *
+   * The function returns a reference to <tt>this</tt>.
+   */
+  template <class BlockMatrixType>
+  BlockMatrixBase &
+  copy_from (const BlockMatrixType &source);
+
+  /**
+   * Access the block with the given coordinates.
+   */
+  BlockType &
+  block (const unsigned int row,
+         const unsigned int column);
+
+
+  /**
+   * Access the block with the given coordinates. Version for constant
+   * objects.
+   */
+  const BlockType &
+  block (const unsigned int row,
+         const unsigned int column) const;
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+
+  /**
+   * Return the number of blocks in a column. Returns zero if no sparsity
+   * pattern is presently associated to this matrix.
+   */
+  unsigned int n_block_rows () const;
+
+  /**
+   * Return the number of blocks in a row. Returns zero if no sparsity pattern
+   * is presently associated to this matrix.
+   */
+  unsigned int n_block_cols () const;
+
+  /**
+   * Set the element <tt>(i,j)</tt> to <tt>value</tt>. Throws an error if the
+   * entry does not exist or if <tt>value</tt> is not a finite number. Still,
+   * it is allowed to store zero values in non-existent fields.
+   */
+  void set (const size_type i,
+            const size_type j,
+            const value_type value);
+
+  /**
+   * Set all elements given in a FullMatrix into the sparse matrix locations
+   * given by <tt>indices</tt>. In other words, this function writes the
+   * elements in <tt>full_matrix</tt> into the calling matrix, using the
+   * local-to-global indexing specified by <tt>indices</tt> for both the rows
+   * and the columns of the matrix. This function assumes a quadratic sparse
+   * matrix and a quadratic full_matrix, the usual situation in FE
+   * calculations.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be set anyway or they should be filtered away
+   * (and not change the previous content in the respective element if it
+   * exists). The default value is <tt>false</tt>, i.e., even zero values are
+   * treated.
+   */
+  template <typename number>
+  void set (const std::vector<size_type> &indices,
+            const FullMatrix<number>     &full_matrix,
+            const bool                    elide_zero_values = false);
+
+  /**
+   * Same function as before, but now including the possibility to use
+   * rectangular full_matrices and different local-to-global indexing on rows
+   * and columns, respectively.
+   */
+  template <typename number>
+  void set (const std::vector<size_type> &row_indices,
+            const std::vector<size_type> &col_indices,
+            const FullMatrix<number>     &full_matrix,
+            const bool                    elide_zero_values = false);
+
+  /**
+   * Set several elements in the specified row of the matrix with column
+   * indices as given by <tt>col_indices</tt> to the respective value.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be set anyway or they should be filtered away
+   * (and not change the previous content in the respective element if it
+   * exists). The default value is <tt>false</tt>, i.e., even zero values are
+   * treated.
+   */
+  template <typename number>
+  void set (const size_type row,
+            const std::vector<size_type> &col_indices,
+            const std::vector<number>    &values,
+            const bool                    elide_zero_values = false);
+
+  /**
+   * Set several elements to values given by <tt>values</tt> in a given row in
+   * columns given by col_indices into the sparse matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be inserted anyway or they should be filtered
+   * away. The default value is <tt>false</tt>, i.e., even zero values are
+   * inserted/replaced.
+   */
+  template <typename number>
+  void set (const size_type  row,
+            const size_type  n_cols,
+            const size_type *col_indices,
+            const number    *values,
+            const bool       elide_zero_values = false);
+
+  /**
+   * Add <tt>value</tt> to the element (<i>i,j</i>).  Throws an error if the
+   * entry does not exist or if <tt>value</tt> is not a finite number. Still,
+   * it is allowed to store zero values in non-existent fields.
+   */
+  void add (const size_type i,
+            const size_type j,
+            const value_type value);
+
+  /**
+   * Add all elements given in a FullMatrix<double> into sparse matrix
+   * locations given by <tt>indices</tt>. In other words, this function adds
+   * the elements in <tt>full_matrix</tt> to the respective entries in calling
+   * matrix, using the local-to-global indexing specified by <tt>indices</tt>
+   * for both the rows and the columns of the matrix. This function assumes a
+   * quadratic sparse matrix and a quadratic full_matrix, the usual situation
+   * in FE calculations.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number>
+  void add (const std::vector<size_type> &indices,
+            const FullMatrix<number>     &full_matrix,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Same function as before, but now including the possibility to use
+   * rectangular full_matrices and different local-to-global indexing on rows
+   * and columns, respectively.
+   */
+  template <typename number>
+  void add (const std::vector<size_type> &row_indices,
+            const std::vector<size_type> &col_indices,
+            const FullMatrix<number>     &full_matrix,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Set several elements in the specified row of the matrix with column
+   * indices as given by <tt>col_indices</tt> to the respective value.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number>
+  void add (const size_type row,
+            const std::vector<size_type> &col_indices,
+            const std::vector<number>    &values,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Add an array of values given by <tt>values</tt> in the given global
+   * matrix row at columns specified by col_indices in the sparse matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number>
+  void add (const size_type  row,
+            const size_type  n_cols,
+            const size_type *col_indices,
+            const number    *values,
+            const bool       elide_zero_values = true,
+            const bool       col_indices_are_sorted = false);
+
+  /**
+   * Add <tt>matrix</tt> scaled by <tt>factor</tt> to this matrix, i.e. the
+   * matrix <tt>factor*matrix</tt> is added to <tt>this</tt>. If the sparsity
+   * pattern of the calling matrix does not contain all the elements in the
+   * sparsity pattern of the input matrix, this function will throw an
+   * exception.
+   *
+   * Depending on MatrixType, however, additional restrictions might arise.
+   * Some sparse matrix formats require <tt>matrix</tt> to be based on the
+   * same sparsity pattern as the calling matrix.
+   */
+  void add (const value_type                   factor,
+            const BlockMatrixBase<MatrixType> &matrix);
+
+  /**
+   * Return the value of the entry (i,j).  This may be an expensive operation
+   * and you should always take care where to call this function.  In order to
+   * avoid abuse, this function throws an exception if the wanted element does
+   * not exist in the matrix.
+   */
+  value_type operator () (const size_type i,
+                          const size_type j) const;
+
+  /**
+   * This function is mostly like operator()() in that it returns the value of
+   * the matrix entry <tt>(i,j)</tt>. The only difference is that if this
+   * entry does not exist in the sparsity pattern, then instead of raising an
+   * exception, zero is returned. While this may be convenient in some cases,
+   * note that it is simple to write algorithms that are slow compared to an
+   * optimal solution, since the sparsity of the matrix is not used.
+   */
+  value_type el (const size_type i,
+                 const size_type j) const;
+
+  /**
+   * Return the main diagonal element in the <i>i</i>th row. This function
+   * throws an error if the matrix is not quadratic and also if the diagonal
+   * blocks of the matrix are not quadratic.
+   *
+   * This function is considerably faster than the operator()(), since for
+   * quadratic matrices, the diagonal entry may be the first to be stored in
+   * each row and access therefore does not involve searching for the right
+   * column number.
+   */
+  value_type diag_element (const size_type i) const;
+
+  /**
+   * Call the compress() function on all the subblocks of the matrix.
+   *
+   *
+   * See
+   * @ref GlossCompress "Compressing distributed objects"
+   * for more information.
+   */
+  void compress (::dealii::VectorOperation::values operation);
+
+  /**
+   * Multiply the entire matrix by a fixed factor.
+   */
+  BlockMatrixBase &operator *= (const value_type factor);
+
+  /**
+   * Divide the entire matrix by a fixed factor.
+   */
+  BlockMatrixBase &operator /= (const value_type factor);
+
+  /**
+   * Adding Matrix-vector multiplication. Add $M*src$ on $dst$ with $M$ being
+   * this matrix.
+   */
+  template <class BlockVectorType>
+  void vmult_add (BlockVectorType       &dst,
+                  const BlockVectorType &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add <i>M<sup>T</sup>src</i> to
+   * <i>dst</i> with <i>M</i> being this matrix. This function does the same
+   * as vmult_add() but takes the transposed matrix.
+   */
+  template <class BlockVectorType>
+  void Tvmult_add (BlockVectorType       &dst,
+                   const BlockVectorType &src) const;
+
+  /**
+   * Return the norm of the vector <i>v</i> with respect to the norm induced
+   * by this matrix, i.e. <i>v<sup>T</sup>Mv)</i>. This is useful, e.g. in the
+   * finite element context, where the <i>L<sup>T</sup></i>-norm of a function
+   * equals the matrix norm with respect to the mass matrix of the vector
+   * representing the nodal values of the finite element function. Note that
+   * even though the function's name might suggest something different, for
+   * historic reasons not the norm but its square is returned, as defined
+   * above by the scalar product.
+   *
+   * Obviously, the matrix needs to be square for this operation.
+   */
+  template <class BlockVectorType>
+  value_type
+  matrix_norm_square (const BlockVectorType &v) const;
+
+  /**
+   * Compute the matrix scalar product $\left(u,Mv\right)$.
+   */
+  template <class BlockVectorType>
+  value_type
+  matrix_scalar_product (const BlockVectorType &u,
+                         const BlockVectorType &v) const;
+
+  /**
+   * Compute the residual <i>r=b-Ax</i>. Write the residual into <tt>dst</tt>.
+   */
+  template <class BlockVectorType>
+  value_type residual (BlockVectorType       &dst,
+                       const BlockVectorType &x,
+                       const BlockVectorType &b) const;
+
+  /**
+   * Print the matrix to the given stream, using the format <tt>(line,col)
+   * value</tt>, i.e. one nonzero entry of the matrix per line. The optional
+   * flag outputs the sparsity pattern in a different style according to the
+   * underlying sparse matrix type.
+   */
+  void print (std::ostream &out,
+              const bool    alternative_output = false) const;
+
+  /**
+   * Iterator starting at the first entry.
+   */
+  iterator begin ();
+
+  /**
+   * Final iterator.
+   */
+  iterator end ();
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>.
+   */
+  iterator begin (const size_type r);
+
+  /**
+   * Final iterator of row <tt>r</tt>.
+   */
+  iterator end (const size_type r);
+  /**
+   * Iterator starting at the first entry.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Final iterator.
+   */
+  const_iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>.
+   */
+  const_iterator begin (const size_type r) const;
+
+  /**
+   * Final iterator of row <tt>r</tt>.
+   */
+  const_iterator end (const size_type r) const;
+
+  /**
+   * Return a reference to the underlying BlockIndices data of the rows.
+   */
+  const BlockIndices &get_row_indices () const;
+
+  /**
+   * Return a reference to the underlying BlockIndices data of the columns.
+   */
+  const BlockIndices &get_column_indices () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object. Note that only the memory reserved on the current processor is
+   * returned in case this is called in an MPI-based program.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException4 (ExcIncompatibleRowNumbers,
+                  int, int, int, int,
+                  << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                  << arg3 << ',' << arg4 << "] have differing row numbers.");
+  /**
+   * Exception
+   */
+  DeclException4 (ExcIncompatibleColNumbers,
+                  int, int, int, int,
+                  << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                  << arg3 << ',' << arg4 << "] have differing column numbers.");
+  //@}
+protected:
+  /**
+   * Release all memory and return to a state just like after having called
+   * the default constructor. It also forgets the sparsity pattern it was
+   * previously tied to.
+   *
+   * This calls clear for all sub-matrices and then resets this object to have
+   * no blocks at all.
+   *
+   * This function is protected since it may be necessary to release
+   * additional structures. A derived class can make it public again, if it is
+   * sufficient.
+   */
+  void clear ();
+
+  /**
+   * Index arrays for rows and columns.
+   */
+  BlockIndices row_block_indices;
+  BlockIndices column_block_indices;
+
+  /**
+   * Array of sub-matrices.
+   */
+  Table<2,SmartPointer<BlockType, BlockMatrixBase<MatrixType> > > sub_objects;
+
+  /**
+   * This function collects the sizes of the sub-objects and stores them in
+   * internal arrays, in order to be able to relay global indices into the
+   * matrix to indices into the subobjects. You *must* call this function each
+   * time after you have changed the size of the sub-objects.
+   *
+   * Derived classes should call this function whenever the size of the sub-
+   * objects has changed and the @p X_block_indices arrays need to be updated.
+   *
+   * Note that this function is not public since not all derived classes need
+   * to export its interface. For example, for the usual deal.II SparseMatrix
+   * class, the sizes are implicitly determined whenever reinit() is called,
+   * and individual blocks cannot be resized. For that class, this function
+   * therefore does not have to be public. On the other hand, for the PETSc
+   * classes, there is no associated sparsity pattern object that determines
+   * the block sizes, and for these the function needs to be publicly
+   * available. These classes therefore export this function.
+   */
+  void collect_sizes ();
+
+  /**
+   * Matrix-vector multiplication: let $dst = M*src$ with $M$ being this
+   * matrix.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class BlockVectorType>
+  void vmult_block_block (BlockVectorType       &dst,
+                          const BlockVectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block column.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class BlockVectorType,
+            class VectorType>
+  void vmult_block_nonblock (BlockVectorType          &dst,
+                             const VectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block row.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class BlockVectorType,
+            class VectorType>
+  void vmult_nonblock_block (VectorType    &dst,
+                             const BlockVectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class VectorType>
+  void vmult_nonblock_nonblock (VectorType       &dst,
+                                const VectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication: let $dst = M^T*src$ with $M$ being this
+   * matrix. This function does the same as vmult() but takes the transposed
+   * matrix.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class BlockVectorType>
+  void Tvmult_block_block (BlockVectorType       &dst,
+                           const BlockVectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block row.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class BlockVectorType,
+            class VectorType>
+  void Tvmult_block_nonblock (BlockVectorType  &dst,
+                              const VectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block column.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class BlockVectorType,
+            class VectorType>
+  void Tvmult_nonblock_block (VectorType    &dst,
+                              const BlockVectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block.
+   *
+   * Due to problems with deriving template arguments between the block and
+   * non-block versions of the vmult/Tvmult functions, the actual functions
+   * are implemented in derived classes, with implementations forwarding the
+   * calls to the implementations provided here under a unique name for which
+   * template arguments can be derived by the compiler.
+   */
+  template <class VectorType>
+  void Tvmult_nonblock_nonblock (VectorType       &dst,
+                                 const VectorType &src) const;
+
+
+protected:
+
+  /**
+   * Some matrix types, in particular PETSc, need to synchronize set and add
+   * operations. This has to be done for all matrices in the BlockMatrix. This
+   * routine prepares adding of elements by notifying all blocks. Called by
+   * all internal routines before adding elements.
+   */
+  void prepare_add_operation();
+
+  /**
+   * Notifies all blocks to let them prepare for setting elements, see
+   * prepare_add_operation().
+   */
+  void prepare_set_operation();
+
+
+private:
+
+  /**
+   * A structure containing some fields used by the set() and add() functions
+   * that is used to pre-sort the input fields. Since one can reasonably
+   * expect to call set() and add() from multiple threads at once as long as
+   * the matrix indices that are touched are disjoint, these temporary data
+   * fields need to be guarded by a mutex; the structure therefore contains
+   * such a mutex as a member variable.
+   */
+  struct TemporaryData
+  {
+    /**
+     * Temporary vector for counting the elements written into the individual
+     * blocks when doing a collective add or set.
+     */
+    std::vector<size_type> counter_within_block;
+
+    /**
+     * Temporary vector for column indices on each block when writing local to
+     * global data on each sparse matrix.
+     */
+    std::vector<std::vector<size_type> > column_indices;
+
+    /**
+     * Temporary vector for storing the local values (they need to be
+     * reordered when writing local to global).
+     */
+    std::vector<std::vector<value_type> > column_values;
+
+    /**
+     * A mutex variable used to guard access to the member variables of this
+     * structure;
+     */
+    Threads::Mutex mutex;
+
+    /**
+     * Copy operator. This is needed because the default copy operator of this
+     * class is deleted (since Threads::Mutex is not copyable) and hence the
+     * default copy operator of the enclosing class is also deleted.
+     *
+     * The implementation here simply does nothing -- TemporaryData objects
+     * are just scratch objects that are resized at the beginning of their
+     * use, so there is no point actually copying anything.
+     */
+    TemporaryData &operator = (const TemporaryData &)
+    {
+      return *this;
+    }
+  };
+
+  /**
+   * A set of scratch arrays that can be used by the add() and set() functions
+   * that take pointers to data to pre-sort indices before use. Access from
+   * multiple threads is synchronized via the mutex variable that is part of
+   * the structure.
+   */
+  TemporaryData temporary_data;
+
+  /**
+   * Make the iterator class a friend. We have to work around a compiler bug
+   * here again.
+   */
+  template <typename, bool>
+  friend class BlockMatrixIterators::Accessor;
+
+  template <typename>
+  friend class MatrixIterator;
+};
+
+
+/*@}*/
+
+#ifndef DOXYGEN
+/* ------------------------- Template functions ---------------------- */
+
+
+namespace BlockMatrixIterators
+{
+  template <class BlockMatrixType>
+  inline
+  AccessorBase<BlockMatrixType>::AccessorBase()
+    :
+    row_block(0),
+    col_block(0)
+  {}
+
+
+  template <class BlockMatrixType>
+  inline
+  unsigned int
+  AccessorBase<BlockMatrixType>::block_row() const
+  {
+    Assert (row_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+
+    return row_block;
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  unsigned int
+  AccessorBase<BlockMatrixType>::block_column() const
+  {
+    Assert (col_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+
+    return col_block;
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  Accessor<BlockMatrixType, true>::Accessor (
+    const BlockMatrixType  *matrix,
+    const size_type        row,
+    const size_type        col)
+    :
+    matrix(matrix),
+    base_iterator(matrix->block(0,0).begin())
+  {
+    (void)col;
+    Assert(col==0, ExcNotImplemented());
+
+    // check if this is a regular row or
+    // the end of the matrix
+    if (row < matrix->m())
+      {
+        const std::pair<unsigned int,size_type> indices
+          = matrix->row_block_indices.global_to_local(row);
+
+        // find the first block that does
+        // have an entry in this row
+        for (unsigned int bc=0; bc<matrix->n_block_cols(); ++bc)
+          {
+            base_iterator
+              = matrix->block(indices.first, bc).begin(indices.second);
+            if (base_iterator !=
+                matrix->block(indices.first, bc).end(indices.second))
+              {
+                this->row_block = indices.first;
+                this->col_block = bc;
+                return;
+              }
+          }
+
+        // hm, there is no block that has
+        // an entry in this column. we need
+        // to take the next entry then,
+        // which may be the first entry of
+        // the next row, or recursively the
+        // next row, or so on
+        *this = Accessor (matrix, row+1, 0);
+      }
+    else
+      {
+        // we were asked to create the end
+        // iterator for this matrix
+        this->row_block = numbers::invalid_unsigned_int;
+        this->col_block = numbers::invalid_unsigned_int;
+      }
+  }
+
+
+//   template <class BlockMatrixType>
+//   inline
+//   Accessor<BlockMatrixType, true>::Accessor (const Accessor<BlockMatrixType, true>& other)
+//                :
+//                matrix(other.matrix),
+//                base_iterator(other.base_iterator)
+//   {
+//     this->row_block = other.row_block;
+//     this->col_block = other.col_block;
+//   }
+
+
+  template <class BlockMatrixType>
+  inline
+  Accessor<BlockMatrixType, true>::Accessor (const Accessor<BlockMatrixType, false> &other)
+    :
+    matrix(other.matrix),
+    base_iterator(other.base_iterator)
+  {
+    this->row_block = other.row_block;
+    this->col_block = other.col_block;
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  typename Accessor<BlockMatrixType, true>::size_type
+  Accessor<BlockMatrixType, true>::row() const
+  {
+    Assert (this->row_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+
+    return (matrix->row_block_indices.local_to_global(this->row_block, 0) +
+            base_iterator->row());
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  typename Accessor<BlockMatrixType, true>::size_type
+  Accessor<BlockMatrixType, true>::column() const
+  {
+    Assert (this->col_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+
+    return (matrix->column_block_indices.local_to_global(this->col_block,0) +
+            base_iterator->column());
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  typename Accessor<BlockMatrixType, true>::value_type
+  Accessor<BlockMatrixType, true>::value () const
+  {
+    Assert (this->row_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+    Assert (this->col_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+
+    return base_iterator->value();
+  }
+
+
+
+  template <class BlockMatrixType>
+  inline
+  void
+  Accessor<BlockMatrixType, true>::advance ()
+  {
+    Assert (this->row_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+    Assert (this->col_block != numbers::invalid_unsigned_int,
+            ExcIteratorPastEnd());
+
+    // Remember current row inside block
+    size_type local_row = base_iterator->row();
+
+    // Advance one element inside the
+    // current block
+    ++base_iterator;
+
+    // while we hit the end of the row of a
+    // block (which may happen multiple
+    // times if rows inside a block are
+    // empty), we have to jump to the next
+    // block and take the
+    while (base_iterator ==
+           matrix->block(this->row_block, this->col_block).end(local_row))
+      {
+        // jump to next block in this block
+        // row, if possible, otherwise go
+        // to next row
+        if (this->col_block < matrix->n_block_cols()-1)
+          {
+            ++this->col_block;
+            base_iterator
+              = matrix->block(this->row_block, this->col_block).begin(local_row);
+          }
+        else
+          {
+            // jump back to next row in
+            // first block column
+            this->col_block = 0;
+            ++local_row;
+
+            // see if this has brought us
+            // past the number of rows in
+            // this block. if so see
+            // whether we've just fallen
+            // off the end of the whole
+            // matrix
+            if (local_row == matrix->block(this->row_block, this->col_block).m())
+              {
+                local_row = 0;
+                ++this->row_block;
+                if (this->row_block == matrix->n_block_rows())
+                  {
+                    this->row_block = numbers::invalid_unsigned_int;
+                    this->col_block = numbers::invalid_unsigned_int;
+                    return;
+                  }
+              }
+
+            base_iterator
+              = matrix->block(this->row_block, this->col_block).begin(local_row);
+          }
+      }
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  bool
+  Accessor<BlockMatrixType, true>::operator == (const Accessor &a) const
+  {
+    if (matrix != a.matrix)
+      return false;
+
+    if (this->row_block == a.row_block
+        && this->col_block == a.col_block)
+      // end iterators do not necessarily
+      // have to have the same
+      // base_iterator representation, but
+      // valid iterators have to
+      return (((this->row_block == numbers::invalid_unsigned_int)
+               &&
+               (this->col_block == numbers::invalid_unsigned_int))
+              ||
+              (base_iterator == a.base_iterator));
+
+    return false;
+  }
+
+//----------------------------------------------------------------------//
+
+
+  template <class BlockMatrixType>
+  inline
+  Accessor<BlockMatrixType, false>::Accessor (
+    BlockMatrixType  *matrix,
+    const size_type  row,
+    const size_type  col)
+    :
+    matrix(matrix),
+    base_iterator(matrix->block(0,0).begin())
+  {
+    (void)col;
+    Assert(col==0, ExcNotImplemented());
+    // check if this is a regular row or
+    // the end of the matrix
+    if (row < matrix->m())
+      {
+        const std::pair<unsigned int,size_type> indices
+          = matrix->row_block_indices.global_to_local(row);
+
+        // find the first block that does
+        // have an entry in this row
+        for (size_type bc=0; bc<matrix->n_block_cols(); ++bc)
+          {
+            base_iterator
+              = matrix->block(indices.first, bc).begin(indices.second);
+            if (base_iterator !=
+                matrix->block(indices.first, bc).end(indices.second))
+              {
+                this->row_block = indices.first;
+                this->col_block = bc;
+                return;
+              }
+          }
+
+        // hm, there is no block that has
+        // an entry in this column. we need
+        // to take the next entry then,
+        // which may be the first entry of
+        // the next row, or recursively the
+        // next row, or so on
+        *this = Accessor (matrix, row+1, 0);
+      }
+    else
+      {
+        // we were asked to create the end
+        // iterator for this matrix
+        this->row_block = numbers::invalid_size_type;
+        this->col_block = numbers::invalid_size_type;
+      }
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  typename Accessor<BlockMatrixType, false>::size_type
+  Accessor<BlockMatrixType, false>::row() const
+  {
+    Assert (this->row_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+
+    return (matrix->row_block_indices.local_to_global(this->row_block, 0) +
+            base_iterator->row());
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  typename Accessor<BlockMatrixType, false>::size_type
+  Accessor<BlockMatrixType, false>::column() const
+  {
+    Assert (this->col_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+
+    return (matrix->column_block_indices.local_to_global(this->col_block,0) +
+            base_iterator->column());
+  }
+
+
+  template <class BlockMatrixType>
+  inline
+  typename Accessor<BlockMatrixType, false>::value_type
+  Accessor<BlockMatrixType, false>::value () const
+  {
+    Assert (this->row_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+    Assert (this->col_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+
+    return base_iterator->value();
+  }
+
+
+
+  template <class BlockMatrixType>
+  inline
+  void
+  Accessor<BlockMatrixType, false>::set_value (typename Accessor<BlockMatrixType, false>::value_type newval) const
+  {
+    Assert (this->row_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+    Assert (this->col_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+
+    base_iterator->value() = newval;
+  }
+
+
+
+  template <class BlockMatrixType>
+  inline
+  void
+  Accessor<BlockMatrixType, false>::advance ()
+  {
+    Assert (this->row_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+    Assert (this->col_block != numbers::invalid_size_type,
+            ExcIteratorPastEnd());
+
+    // Remember current row inside block
+    size_type local_row = base_iterator->row();
+
+    // Advance one element inside the
+    // current block
+    ++base_iterator;
+
+    // while we hit the end of the row of a
+    // block (which may happen multiple
+    // times if rows inside a block are
+    // empty), we have to jump to the next
+    // block and take the
+    while (base_iterator ==
+           matrix->block(this->row_block, this->col_block).end(local_row))
+      {
+        // jump to next block in this block
+        // row, if possible, otherwise go
+        // to next row
+        if (this->col_block < matrix->n_block_cols()-1)
+          {
+            ++this->col_block;
+            base_iterator
+              = matrix->block(this->row_block, this->col_block).begin(local_row);
+          }
+        else
+          {
+            // jump back to next row in
+            // first block column
+            this->col_block = 0;
+            ++local_row;
+
+            // see if this has brought us
+            // past the number of rows in
+            // this block. if so see
+            // whether we've just fallen
+            // off the end of the whole
+            // matrix
+            if (local_row == matrix->block(this->row_block, this->col_block).m())
+              {
+                local_row = 0;
+                ++this->row_block;
+                if (this->row_block == matrix->n_block_rows())
+                  {
+                    this->row_block = numbers::invalid_size_type;
+                    this->col_block = numbers::invalid_size_type;
+                    return;
+                  }
+              }
+
+            base_iterator
+              = matrix->block(this->row_block, this->col_block).begin(local_row);
+          }
+      }
+  }
+
+
+
+  template <class BlockMatrixType>
+  inline
+  bool
+  Accessor<BlockMatrixType, false>::operator == (const Accessor &a) const
+  {
+    if (matrix != a.matrix)
+      return false;
+
+    if (this->row_block == a.row_block
+        && this->col_block == a.col_block)
+      // end iterators do not necessarily
+      // have to have the same
+      // base_iterator representation, but
+      // valid iterators have to
+      return (((this->row_block == numbers::invalid_size_type)
+               &&
+               (this->col_block == numbers::invalid_size_type))
+              ||
+              (base_iterator == a.base_iterator));
+
+    return false;
+  }
+}
+
+
+//---------------------------------------------------------------------------
+
+
+template <typename MatrixType>
+inline
+BlockMatrixBase<MatrixType>::BlockMatrixBase ()
+{}
+
+template <typename MatrixType>
+inline
+BlockMatrixBase<MatrixType>::~BlockMatrixBase ()
+{
+  clear ();
+}
+
+
+template <class MatrixType>
+template <class BlockMatrixType>
+inline
+BlockMatrixBase<MatrixType> &
+BlockMatrixBase<MatrixType>::
+copy_from (const BlockMatrixType &source)
+{
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      block(r,c).copy_from (source.block(r,c));
+
+  return *this;
+}
+
+
+template <class MatrixType>
+std::size_t
+BlockMatrixBase<MatrixType>::memory_consumption () const
+{
+  std::size_t mem =
+    MemoryConsumption::memory_consumption(row_block_indices)+
+    MemoryConsumption::memory_consumption(column_block_indices)+
+    MemoryConsumption::memory_consumption(sub_objects)+
+    MemoryConsumption::memory_consumption(temporary_data.counter_within_block)+
+    MemoryConsumption::memory_consumption(temporary_data.column_indices)+
+    MemoryConsumption::memory_consumption(temporary_data.column_values)+
+    sizeof(temporary_data.mutex);
+
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      {
+        MatrixType *p = this->sub_objects[r][c];
+        mem += MemoryConsumption::memory_consumption(*p);
+      }
+
+  return mem;
+}
+
+
+
+template <class MatrixType>
+inline
+void
+BlockMatrixBase<MatrixType>::clear ()
+{
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      {
+        MatrixType *p = this->sub_objects[r][c];
+        this->sub_objects[r][c] = 0;
+        delete p;
+      }
+  sub_objects.reinit (0,0);
+
+  // reset block indices to empty
+  row_block_indices = column_block_indices = BlockIndices ();
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::BlockType &
+BlockMatrixBase<MatrixType>::block (const unsigned int row,
+                                    const unsigned int column)
+{
+  Assert (row<n_block_rows(),
+          ExcIndexRange (row, 0, n_block_rows()));
+  Assert (column<n_block_cols(),
+          ExcIndexRange (column, 0, n_block_cols()));
+
+  return *sub_objects[row][column];
+}
+
+
+
+template <class MatrixType>
+inline
+const typename BlockMatrixBase<MatrixType>::BlockType &
+BlockMatrixBase<MatrixType>::block (const unsigned int row,
+                                    const unsigned int column) const
+{
+  Assert (row<n_block_rows(),
+          ExcIndexRange (row, 0, n_block_rows()));
+  Assert (column<n_block_cols(),
+          ExcIndexRange (column, 0, n_block_cols()));
+
+  return *sub_objects[row][column];
+}
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::size_type
+BlockMatrixBase<MatrixType>::m () const
+{
+  return row_block_indices.total_size();
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::size_type
+BlockMatrixBase<MatrixType>::n () const
+{
+  return column_block_indices.total_size();
+}
+
+
+
+template <class MatrixType>
+inline
+unsigned int
+BlockMatrixBase<MatrixType>::n_block_cols () const
+{
+  return column_block_indices.size();
+}
+
+
+
+template <class MatrixType>
+inline
+unsigned int
+BlockMatrixBase<MatrixType>::n_block_rows () const
+{
+  return row_block_indices.size();
+}
+
+
+
+// Write the single set manually,
+// since the other function has a lot
+// of overhead in that case.
+template <class MatrixType>
+inline
+void
+BlockMatrixBase<MatrixType>::set (const size_type i,
+                                  const size_type j,
+                                  const value_type value)
+{
+  prepare_set_operation();
+
+  AssertIsFinite(value);
+
+  const std::pair<unsigned int,size_type>
+  row_index = row_block_indices.global_to_local (i),
+  col_index = column_block_indices.global_to_local (j);
+  block(row_index.first,col_index.first).set (row_index.second,
+                                              col_index.second,
+                                              value);
+}
+
+
+
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::set (const std::vector<size_type> &row_indices,
+                                  const std::vector<size_type> &col_indices,
+                                  const FullMatrix<number>        &values,
+                                  const bool                       elide_zero_values)
+{
+  Assert (row_indices.size() == values.m(),
+          ExcDimensionMismatch(row_indices.size(), values.m()));
+  Assert (col_indices.size() == values.n(),
+          ExcDimensionMismatch(col_indices.size(), values.n()));
+
+  for (size_type i=0; i<row_indices.size(); ++i)
+    set (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::set (const std::vector<size_type> &indices,
+                                  const FullMatrix<number>        &values,
+                                  const bool                       elide_zero_values)
+{
+  Assert (indices.size() == values.m(),
+          ExcDimensionMismatch(indices.size(), values.m()));
+  Assert (values.n() == values.m(), ExcNotQuadratic());
+
+  for (size_type i=0; i<indices.size(); ++i)
+    set (indices[i], indices.size(), &indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::set (const size_type               row,
+                                  const std::vector<size_type> &col_indices,
+                                  const std::vector<number>    &values,
+                                  const bool                    elide_zero_values)
+{
+  Assert (col_indices.size() == values.size(),
+          ExcDimensionMismatch(col_indices.size(), values.size()));
+
+  set (row, col_indices.size(), &col_indices[0], &values[0],
+       elide_zero_values);
+}
+
+
+
+// This is a very messy function, since
+// we need to calculate to each position
+// the location in the global array.
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::set (const size_type  row,
+                                  const size_type  n_cols,
+                                  const size_type *col_indices,
+                                  const number    *values,
+                                  const bool       elide_zero_values)
+{
+  prepare_set_operation();
+
+  // lock access to the temporary data structure to
+  // allow multiple threads to call this function concurrently
+  Threads::Mutex::ScopedLock lock (temporary_data.mutex);
+
+  // Resize scratch arrays
+  if (temporary_data.column_indices.size() < this->n_block_cols())
+    {
+      temporary_data.column_indices.resize (this->n_block_cols());
+      temporary_data.column_values.resize (this->n_block_cols());
+      temporary_data.counter_within_block.resize (this->n_block_cols());
+    }
+
+  // Resize sub-arrays to n_cols. This
+  // is a bit wasteful, but we resize
+  // only a few times (then the maximum
+  // row length won't increase that
+  // much any more). At least we know
+  // that all arrays are going to be of
+  // the same size, so we can check
+  // whether the size of one is large
+  // enough before actually going
+  // through all of them.
+  if (temporary_data.column_indices[0].size() < n_cols)
+    {
+      for (unsigned int i=0; i<this->n_block_cols(); ++i)
+        {
+          temporary_data.column_indices[i].resize(n_cols);
+          temporary_data.column_values[i].resize(n_cols);
+        }
+    }
+
+  // Reset the number of added elements
+  // in each block to zero.
+  for (unsigned int i=0; i<this->n_block_cols(); ++i)
+    temporary_data.counter_within_block[i] = 0;
+
+  // Go through the column indices to
+  // find out which portions of the
+  // values should be set in which
+  // block of the matrix. We need to
+  // touch all the data, since we can't
+  // be sure that the data of one block
+  // is stored contiguously (in fact,
+  // indices will be intermixed when it
+  // comes from an element matrix).
+  for (size_type j=0; j<n_cols; ++j)
+    {
+      number value = values[j];
+
+      if (value == number() && elide_zero_values == true)
+        continue;
+
+      const std::pair<unsigned int, size_type>
+      col_index = this->column_block_indices.global_to_local(col_indices[j]);
+
+      const size_type local_index = temporary_data.counter_within_block[col_index.first]++;
+
+      temporary_data.column_indices[col_index.first][local_index] = col_index.second;
+      temporary_data.column_values[col_index.first][local_index] = value;
+    }
+
+#ifdef DEBUG
+  // If in debug mode, do a check whether
+  // the right length has been obtained.
+  size_type length = 0;
+  for (unsigned int i=0; i<this->n_block_cols(); ++i)
+    length += temporary_data.counter_within_block[i];
+  Assert (length <= n_cols, ExcInternalError());
+#endif
+
+  // Now we found out about where the
+  // individual columns should start and
+  // where we should start reading out
+  // data. Now let's write the data into
+  // the individual blocks!
+  const std::pair<unsigned int,size_type>
+  row_index = this->row_block_indices.global_to_local (row);
+  for (unsigned int block_col=0; block_col<n_block_cols(); ++block_col)
+    {
+      if (temporary_data.counter_within_block[block_col] == 0)
+        continue;
+
+      block(row_index.first, block_col).set
+      (row_index.second,
+       temporary_data.counter_within_block[block_col],
+       &temporary_data.column_indices[block_col][0],
+       &temporary_data.column_values[block_col][0],
+       false);
+    }
+}
+
+
+
+template <class MatrixType>
+inline
+void
+BlockMatrixBase<MatrixType>::add (const size_type  i,
+                                  const size_type  j,
+                                  const value_type value)
+{
+
+  AssertIsFinite(value);
+
+  prepare_add_operation();
+
+  // save some cycles for zero additions, but
+  // only if it is safe for the matrix we are
+  // working with
+  typedef typename MatrixType::Traits MatrixTraits;
+  if ((MatrixTraits::zero_addition_can_be_elided == true)
+      &&
+      (value == value_type()))
+    return;
+
+  const std::pair<unsigned int,size_type>
+  row_index = row_block_indices.global_to_local (i),
+  col_index = column_block_indices.global_to_local (j);
+  block(row_index.first,col_index.first).add (row_index.second,
+                                              col_index.second,
+                                              value);
+}
+
+
+
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::add (const std::vector<size_type> &row_indices,
+                                  const std::vector<size_type> &col_indices,
+                                  const FullMatrix<number>     &values,
+                                  const bool                    elide_zero_values)
+{
+  Assert (row_indices.size() == values.m(),
+          ExcDimensionMismatch(row_indices.size(), values.m()));
+  Assert (col_indices.size() == values.n(),
+          ExcDimensionMismatch(col_indices.size(), values.n()));
+
+  for (size_type i=0; i<row_indices.size(); ++i)
+    add (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::add (const std::vector<size_type> &indices,
+                                  const FullMatrix<number>     &values,
+                                  const bool                    elide_zero_values)
+{
+  Assert (indices.size() == values.m(),
+          ExcDimensionMismatch(indices.size(), values.m()));
+  Assert (values.n() == values.m(), ExcNotQuadratic());
+
+  for (size_type i=0; i<indices.size(); ++i)
+    add (indices[i], indices.size(), &indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::add (const size_type               row,
+                                  const std::vector<size_type> &col_indices,
+                                  const std::vector<number>    &values,
+                                  const bool                    elide_zero_values)
+{
+  Assert (col_indices.size() == values.size(),
+          ExcDimensionMismatch(col_indices.size(), values.size()));
+
+  add (row, col_indices.size(), &col_indices[0], &values[0],
+       elide_zero_values);
+}
+
+
+
+// This is a very messy function, since
+// we need to calculate to each position
+// the location in the global array.
+template <class MatrixType>
+template <typename number>
+inline
+void
+BlockMatrixBase<MatrixType>::add (const size_type  row,
+                                  const size_type  n_cols,
+                                  const size_type *col_indices,
+                                  const number    *values,
+                                  const bool       elide_zero_values,
+                                  const bool       col_indices_are_sorted)
+{
+  prepare_add_operation();
+
+  // TODO: Look over this to find out
+  // whether we can do that more
+  // efficiently.
+  if (col_indices_are_sorted == true)
+    {
+#ifdef DEBUG
+      // check whether indices really are
+      // sorted.
+      size_type before = col_indices[0];
+      for (size_type i=1; i<n_cols; ++i)
+        if (col_indices[i] <= before)
+          Assert (false, ExcMessage ("Flag col_indices_are_sorted is set, but "
+                                     "indices appear to not be sorted."))
+          else
+            before = col_indices[i];
+#endif
+      const std::pair<unsigned int,size_type>
+      row_index = this->row_block_indices.global_to_local (row);
+
+      if (this->n_block_cols() > 1)
+        {
+          const size_type *first_block = Utilities::lower_bound (col_indices,
+                                                                 col_indices+n_cols,
+                                                                 this->column_block_indices.block_start(1));
+
+          const size_type n_zero_block_indices = first_block - col_indices;
+          block(row_index.first, 0).add (row_index.second,
+                                         n_zero_block_indices,
+                                         col_indices,
+                                         values,
+                                         elide_zero_values,
+                                         col_indices_are_sorted);
+
+          if (n_zero_block_indices < n_cols)
+            this->add(row, n_cols - n_zero_block_indices, first_block,
+                      values + n_zero_block_indices, elide_zero_values,
+                      false);
+        }
+      else
+        {
+          block(row_index.first, 0). add (row_index.second,
+                                          n_cols,
+                                          col_indices,
+                                          values,
+                                          elide_zero_values,
+                                          col_indices_are_sorted);
+        }
+
+      return;
+    }
+
+  // Lock scratch arrays, then resize them
+  Threads::Mutex::ScopedLock lock (temporary_data.mutex);
+
+  if (temporary_data.column_indices.size() < this->n_block_cols())
+    {
+      temporary_data.column_indices.resize (this->n_block_cols());
+      temporary_data.column_values.resize (this->n_block_cols());
+      temporary_data.counter_within_block.resize (this->n_block_cols());
+    }
+
+  // Resize sub-arrays to n_cols. This
+  // is a bit wasteful, but we resize
+  // only a few times (then the maximum
+  // row length won't increase that
+  // much any more). At least we know
+  // that all arrays are going to be of
+  // the same size, so we can check
+  // whether the size of one is large
+  // enough before actually going
+  // through all of them.
+  if (temporary_data.column_indices[0].size() < n_cols)
+    {
+      for (unsigned int i=0; i<this->n_block_cols(); ++i)
+        {
+          temporary_data.column_indices[i].resize(n_cols);
+          temporary_data.column_values[i].resize(n_cols);
+        }
+    }
+
+  // Reset the number of added elements
+  // in each block to zero.
+  for (unsigned int i=0; i<this->n_block_cols(); ++i)
+    temporary_data.counter_within_block[i] = 0;
+
+  // Go through the column indices to
+  // find out which portions of the
+  // values should be written into
+  // which block of the matrix. We need
+  // to touch all the data, since we
+  // can't be sure that the data of one
+  // block is stored contiguously (in
+  // fact, data will be intermixed when
+  // it comes from an element matrix).
+  for (size_type j=0; j<n_cols; ++j)
+    {
+      number value = values[j];
+
+      if (value == number() && elide_zero_values == true)
+        continue;
+
+      const std::pair<unsigned int, size_type>
+      col_index = this->column_block_indices.global_to_local(col_indices[j]);
+
+      const size_type local_index = temporary_data.counter_within_block[col_index.first]++;
+
+      temporary_data.column_indices[col_index.first][local_index] = col_index.second;
+      temporary_data.column_values[col_index.first][local_index] = value;
+    }
+
+#ifdef DEBUG
+  // If in debug mode, do a check whether
+  // the right length has been obtained.
+  size_type length = 0;
+  for (unsigned int i=0; i<this->n_block_cols(); ++i)
+    length += temporary_data.counter_within_block[i];
+  Assert (length <= n_cols, ExcInternalError());
+#endif
+
+  // Now we found out about where the
+  // individual columns should start and
+  // where we should start reading out
+  // data. Now let's write the data into
+  // the individual blocks!
+  const std::pair<unsigned int,size_type>
+  row_index = this->row_block_indices.global_to_local (row);
+  for (unsigned int block_col=0; block_col<n_block_cols(); ++block_col)
+    {
+      if (temporary_data.counter_within_block[block_col] == 0)
+        continue;
+
+      block(row_index.first, block_col).add
+      (row_index.second,
+       temporary_data.counter_within_block[block_col],
+       &temporary_data.column_indices[block_col][0],
+       &temporary_data.column_values[block_col][0],
+       false,
+       col_indices_are_sorted);
+    }
+}
+
+
+
+template <class MatrixType>
+inline
+void
+BlockMatrixBase<MatrixType>::add (const value_type                   factor,
+                                  const BlockMatrixBase<MatrixType> &matrix)
+{
+  AssertIsFinite(factor);
+
+  prepare_add_operation();
+
+  // save some cycles for zero additions, but
+  // only if it is safe for the matrix we are
+  // working with
+  typedef typename MatrixType::Traits MatrixTraits;
+  if ((MatrixTraits::zero_addition_can_be_elided == true)
+      &&
+      (factor == 0))
+    return;
+
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      // This function should throw if the sparsity
+      // patterns of the two blocks differ
+      block(row, col).add(factor, matrix.block(row,col));
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::value_type
+BlockMatrixBase<MatrixType>::operator () (const size_type i,
+                                          const size_type j) const
+{
+  const std::pair<unsigned int,size_type>
+  row_index = row_block_indices.global_to_local (i),
+  col_index = column_block_indices.global_to_local (j);
+  return block(row_index.first,col_index.first) (row_index.second,
+                                                 col_index.second);
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::value_type
+BlockMatrixBase<MatrixType>::el (const size_type i,
+                                 const size_type j) const
+{
+  const std::pair<unsigned int,size_type>
+  row_index = row_block_indices.global_to_local (i),
+  col_index = column_block_indices.global_to_local (j);
+  return block(row_index.first,col_index.first).el (row_index.second,
+                                                    col_index.second);
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::value_type
+BlockMatrixBase<MatrixType>::diag_element (const size_type i) const
+{
+  Assert (n_block_rows() == n_block_cols(),
+          ExcNotQuadratic());
+
+  const std::pair<unsigned int,size_type>
+  index = row_block_indices.global_to_local (i);
+  return block(index.first,index.first).diag_element(index.second);
+}
+
+
+
+template <class MatrixType>
+inline
+void
+BlockMatrixBase<MatrixType>::compress (::dealii::VectorOperation::values operation)
+{
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      block(r,c).compress (operation);
+}
+
+
+
+template <class MatrixType>
+inline
+BlockMatrixBase<MatrixType> &
+BlockMatrixBase<MatrixType>::operator *= (const value_type factor)
+{
+  Assert (n_block_cols() != 0, ExcNotInitialized());
+  Assert (n_block_rows() != 0, ExcNotInitialized());
+
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      block(r,c) *= factor;
+
+  return *this;
+}
+
+
+
+template <class MatrixType>
+inline
+BlockMatrixBase<MatrixType> &
+BlockMatrixBase<MatrixType>::operator /= (const value_type factor)
+{
+  Assert (n_block_cols() != 0, ExcNotInitialized());
+  Assert (n_block_rows() != 0, ExcNotInitialized());
+  Assert (factor !=0, ExcDivideByZero());
+
+  const value_type factor_inv = 1. / factor;
+
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      block(r,c) *= factor_inv;
+
+  return *this;
+}
+
+
+
+template <class MatrixType>
+const BlockIndices &
+BlockMatrixBase<MatrixType>::get_row_indices () const
+{
+  return this->row_block_indices;
+}
+
+
+
+template <class MatrixType>
+const BlockIndices &
+BlockMatrixBase<MatrixType>::get_column_indices () const
+{
+  return this->column_block_indices;
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType>
+void
+BlockMatrixBase<MatrixType>::
+vmult_block_block (BlockVectorType       &dst,
+                   const BlockVectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (src.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_cols()));
+
+  for (size_type row=0; row<n_block_rows(); ++row)
+    {
+      block(row,0).vmult (dst.block(row),
+                          src.block(0));
+      for (size_type col=1; col<n_block_cols(); ++col)
+        block(row,col).vmult_add (dst.block(row),
+                                  src.block(col));
+    };
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType,
+          class VectorType>
+void
+BlockMatrixBase<MatrixType>::
+vmult_nonblock_block (VectorType    &dst,
+                      const BlockVectorType &src) const
+{
+  Assert (n_block_rows() == 1,
+          ExcDimensionMismatch(1, n_block_rows()));
+  Assert (src.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_cols()));
+
+  block(0,0).vmult (dst, src.block(0));
+  for (size_type col=1; col<n_block_cols(); ++col)
+    block(0,col).vmult_add (dst, src.block(col));
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType,
+          class VectorType>
+void
+BlockMatrixBase<MatrixType>::
+vmult_block_nonblock (BlockVectorType  &dst,
+                      const VectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (1 == n_block_cols(),
+          ExcDimensionMismatch(1, n_block_cols()));
+
+  for (size_type row=0; row<n_block_rows(); ++row)
+    block(row,0).vmult (dst.block(row),
+                        src);
+}
+
+
+
+template <class MatrixType>
+template <class VectorType>
+void
+BlockMatrixBase<MatrixType>::
+vmult_nonblock_nonblock (VectorType       &dst,
+                         const VectorType &src) const
+{
+  Assert (1 == n_block_rows(),
+          ExcDimensionMismatch(1, n_block_rows()));
+  Assert (1 == n_block_cols(),
+          ExcDimensionMismatch(1, n_block_cols()));
+
+  block(0,0).vmult (dst, src);
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType>
+void
+BlockMatrixBase<MatrixType>::vmult_add (BlockVectorType       &dst,
+                                        const BlockVectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (src.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_cols()));
+
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row,col).vmult_add (dst.block(row),
+                                src.block(col));
+}
+
+
+
+
+template <class MatrixType>
+template <class BlockVectorType>
+void
+BlockMatrixBase<MatrixType>::
+Tvmult_block_block (BlockVectorType       &dst,
+                    const BlockVectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_cols()));
+  Assert (src.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_rows()));
+
+  dst = 0.;
+
+  for (unsigned int  row=0; row<n_block_rows(); ++row)
+    {
+      for (unsigned int col=0; col<n_block_cols(); ++col)
+        block(row,col).Tvmult_add (dst.block(col),
+                                   src.block(row));
+    };
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType,
+          class VectorType>
+void
+BlockMatrixBase<MatrixType>::
+Tvmult_block_nonblock (BlockVectorType  &dst,
+                       const VectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_cols()));
+  Assert (1 == n_block_rows(),
+          ExcDimensionMismatch(1, n_block_rows()));
+
+  dst = 0.;
+
+  for (unsigned int col=0; col<n_block_cols(); ++col)
+    block(0,col).Tvmult_add (dst.block(col), src);
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType,
+          class VectorType>
+void
+BlockMatrixBase<MatrixType>::
+Tvmult_nonblock_block (VectorType    &dst,
+                       const BlockVectorType &src) const
+{
+  Assert (1 == n_block_cols(),
+          ExcDimensionMismatch(1, n_block_cols()));
+  Assert (src.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_rows()));
+
+  block(0,0).Tvmult (dst, src.block(0));
+
+  for (size_type row=1; row<n_block_rows(); ++row)
+    block(row,0).Tvmult_add (dst, src.block(row));
+}
+
+
+
+template <class MatrixType>
+template <class VectorType>
+void
+BlockMatrixBase<MatrixType>::
+Tvmult_nonblock_nonblock (VectorType       &dst,
+                          const VectorType &src) const
+{
+  Assert (1 == n_block_cols(),
+          ExcDimensionMismatch(1, n_block_cols()));
+  Assert (1 == n_block_rows(),
+          ExcDimensionMismatch(1, n_block_rows()));
+
+  block(0,0).Tvmult (dst, src);
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType>
+void
+BlockMatrixBase<MatrixType>::Tvmult_add (BlockVectorType &dst,
+                                         const BlockVectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_cols()));
+  Assert (src.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_rows()));
+
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row,col).Tvmult_add (dst.block(col),
+                                 src.block(row));
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType>
+typename BlockMatrixBase<MatrixType>::value_type
+BlockMatrixBase<MatrixType>::matrix_norm_square (const BlockVectorType &v) const
+{
+  Assert (n_block_rows() == n_block_cols(), ExcNotQuadratic());
+  Assert (v.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(v.n_blocks(), n_block_rows()));
+
+  value_type norm_sqr = 0;
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      if (row==col)
+        norm_sqr += block(row,col).matrix_norm_square (v.block(row));
+      else
+        norm_sqr += block(row,col).matrix_scalar_product (v.block(row),
+                                                          v.block(col));
+  return norm_sqr;
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType>
+typename BlockMatrixBase<MatrixType>::value_type
+BlockMatrixBase<MatrixType>::
+matrix_scalar_product (const BlockVectorType    &u,
+                       const BlockVectorType &v) const
+{
+  Assert (u.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(u.n_blocks(), n_block_rows()));
+  Assert (v.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(v.n_blocks(), n_block_cols()));
+
+  value_type result = 0;
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      result += block(row,col).matrix_scalar_product (u.block(row),
+                                                      v.block(col));
+  return result;
+}
+
+
+
+template <class MatrixType>
+template <class BlockVectorType>
+typename BlockMatrixBase<MatrixType>::value_type
+BlockMatrixBase<MatrixType>::
+residual (BlockVectorType          &dst,
+          const BlockVectorType &x,
+          const BlockVectorType    &b) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (b.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(b.n_blocks(), n_block_rows()));
+  Assert (x.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(x.n_blocks(), n_block_cols()));
+  // in block notation, the residual is
+  // r_i = b_i - \sum_j A_ij x_j.
+  // this can be written as
+  // r_i = b_i - A_i0 x_0 - \sum_{j>0} A_ij x_j.
+  //
+  // for the first two terms, we can
+  // call the residual function of
+  // A_i0. for the other terms, we
+  // use vmult_add. however, we want
+  // to subtract, so in order to
+  // avoid a temporary vector, we
+  // perform a sign change of the
+  // first two term before, and after
+  // adding up
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    {
+      block(row,0).residual (dst.block(row),
+                             x.block(0),
+                             b.block(row));
+
+      for (size_type i=0; i<dst.block(row).size(); ++i)
+        dst.block(row)(i) = -dst.block(row)(i);
+
+      for (unsigned int col=1; col<n_block_cols(); ++col)
+        block(row,col).vmult_add (dst.block(row),
+                                  x.block(col));
+
+      for (size_type i=0; i<dst.block(row).size(); ++i)
+        dst.block(row)(i) = -dst.block(row)(i);
+    };
+
+  value_type res = 0;
+  for (size_type row=0; row<n_block_rows(); ++row)
+    res += dst.block(row).norm_sqr ();
+  return std::sqrt(res);
+}
+
+
+
+template <class MatrixType>
+inline
+void
+BlockMatrixBase<MatrixType>::print (std::ostream &out,
+                                    const bool    alternative_output) const
+{
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      {
+        if (!alternative_output)
+          out << "Block (" << row << ", " << col << ")" << std::endl;
+
+        block(row, col).print(out, alternative_output);
+      }
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::const_iterator
+BlockMatrixBase<MatrixType>::begin () const
+{
+  return const_iterator(this, 0);
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::const_iterator
+BlockMatrixBase<MatrixType>::end () const
+{
+  return const_iterator(this, m());
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::const_iterator
+BlockMatrixBase<MatrixType>::begin (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return const_iterator(this, r);
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::const_iterator
+BlockMatrixBase<MatrixType>::end (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return const_iterator(this, r+1);
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::iterator
+BlockMatrixBase<MatrixType>::begin ()
+{
+  return iterator(this, 0);
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::iterator
+BlockMatrixBase<MatrixType>::end ()
+{
+  return iterator(this, m());
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::iterator
+BlockMatrixBase<MatrixType>::begin (const size_type r)
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return iterator(this, r);
+}
+
+
+
+template <class MatrixType>
+inline
+typename BlockMatrixBase<MatrixType>::iterator
+BlockMatrixBase<MatrixType>::end (const size_type r)
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return iterator(this, r+1);
+}
+
+
+
+template <class MatrixType>
+void
+BlockMatrixBase<MatrixType>::collect_sizes ()
+{
+  std::vector<size_type> row_sizes (this->n_block_rows());
+  std::vector<size_type> col_sizes (this->n_block_cols());
+
+  // first find out the row sizes
+  // from the first block column
+  for (unsigned int r=0; r<this->n_block_rows(); ++r)
+    row_sizes[r] = sub_objects[r][0]->m();
+  // then check that the following
+  // block columns have the same
+  // sizes
+  for (unsigned int c=1; c<this->n_block_cols(); ++c)
+    for (unsigned int r=0; r<this->n_block_rows(); ++r)
+      Assert (row_sizes[r] == sub_objects[r][c]->m(),
+              ExcIncompatibleRowNumbers (r,0,r,c));
+
+  // finally initialize the row
+  // indices with this array
+  this->row_block_indices.reinit (row_sizes);
+
+
+  // then do the same with the columns
+  for (unsigned int c=0; c<this->n_block_cols(); ++c)
+    col_sizes[c] = sub_objects[0][c]->n();
+  for (unsigned int r=1; r<this->n_block_rows(); ++r)
+    for (unsigned int c=0; c<this->n_block_cols(); ++c)
+      Assert (col_sizes[c] == sub_objects[r][c]->n(),
+              ExcIncompatibleRowNumbers (0,c,r,c));
+
+  // finally initialize the row
+  // indices with this array
+  this->column_block_indices.reinit (col_sizes);
+}
+
+
+
+template <class MatrixType>
+void
+BlockMatrixBase<MatrixType>::prepare_add_operation ()
+{
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row, col).prepare_add();
+}
+
+
+
+template <class MatrixType>
+void
+BlockMatrixBase<MatrixType>::prepare_set_operation ()
+{
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row, col).prepare_set();
+}
+
+#endif // DOXYGEN
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif    // dealii__block_matrix_base_h
diff --git a/include/deal.II/lac/block_sparse_matrix.h b/include/deal.II/lac/block_sparse_matrix.h
new file mode 100644
index 0000000..4297697
--- /dev/null
+++ b/include/deal.II/lac/block_sparse_matrix.h
@@ -0,0 +1,532 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_sparse_matrix_h
+#define dealii__block_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/table.h>
+#include <deal.II/lac/block_matrix_base.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/exceptions.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*! @addtogroup Matrix1
+ *@{
+ */
+
+
+/**
+ * Blocked sparse matrix based on the SparseMatrix class. This class
+ * implements the functions that are specific to the SparseMatrix base objects
+ * for a blocked sparse matrix, and leaves the actual work relaying most of
+ * the calls to the individual blocks to the functions implemented in the base
+ * class. See there also for a description of when this class is useful.
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, 2000, 2004
+ */
+template <typename number>
+class BlockSparseMatrix : public BlockMatrixBase<SparseMatrix<number> >
+{
+public:
+  /**
+   * Typedef the base class for simpler access to its own typedefs.
+   */
+  typedef BlockMatrixBase<SparseMatrix<number> > BaseClass;
+
+  /**
+   * Typedef the type of the underlying matrix.
+   */
+  typedef typename BaseClass::BlockType  BlockType;
+
+  /**
+   * Import the typedefs from the base class.
+   */
+  typedef typename BaseClass::value_type      value_type;
+  typedef typename BaseClass::pointer         pointer;
+  typedef typename BaseClass::const_pointer   const_pointer;
+  typedef typename BaseClass::reference       reference;
+  typedef typename BaseClass::const_reference const_reference;
+  typedef typename BaseClass::size_type       size_type;
+  typedef typename BaseClass::iterator        iterator;
+  typedef typename BaseClass::const_iterator  const_iterator;
+
+  /**
+   * @name Constructors and initialization
+   */
+//@{
+  /**
+   * Constructor; initializes the matrix to be empty, without any structure,
+   * i.e.  the matrix is not usable at all. This constructor is therefore only
+   * useful for matrices which are members of a class. All other matrices
+   * should be created at a point in the data flow where all necessary
+   * information is available.
+   *
+   * You have to initialize the matrix before usage with
+   * reinit(BlockSparsityPattern). The number of blocks per row and column are
+   * then determined by that function.
+   */
+  BlockSparseMatrix ();
+
+  /**
+   * Constructor. Takes the given matrix sparsity structure to represent the
+   * sparsity pattern of this matrix. You can change the sparsity pattern
+   * later on by calling the reinit() function.
+   *
+   * This constructor initializes all sub-matrices with the sub-sparsity
+   * pattern within the argument.
+   *
+   * You have to make sure that the lifetime of the sparsity structure is at
+   * least as long as that of this matrix or as long as reinit() is not called
+   * with a new sparsity structure.
+   */
+  BlockSparseMatrix (const BlockSparsityPattern &sparsity);
+
+  /**
+   * Destructor.
+   */
+  virtual ~BlockSparseMatrix ();
+
+
+
+  /**
+   * Pseudo copy operator only copying empty objects. The sizes of the block
+   * matrices need to be the same.
+   */
+  BlockSparseMatrix &
+  operator = (const BlockSparseMatrix &);
+
+  /**
+   * This operator assigns a scalar to a matrix. Since this does usually not
+   * make much sense (should we set all matrix entries to this value? Only the
+   * nonzero entries of the sparsity pattern?), this operation is only allowed
+   * if the actual value to be assigned is zero. This operator only exists to
+   * allow for the obvious notation <tt>matrix=0</tt>, which sets all elements
+   * of the matrix to zero, but keep the sparsity pattern previously used.
+   */
+  BlockSparseMatrix &
+  operator = (const double d);
+
+  /**
+   * Release all memory and return to a state just like after having called
+   * the default constructor. It also forgets the sparsity pattern it was
+   * previously tied to.
+   *
+   * This calls SparseMatrix::clear on all sub-matrices and then resets this
+   * object to have no blocks at all.
+   */
+  void clear ();
+
+  /**
+   * Reinitialize the sparse matrix with the given sparsity pattern. The
+   * latter tells the matrix how many nonzero elements there need to be
+   * reserved.
+   *
+   * Basically, this function only calls SparseMatrix::reinit() of the sub-
+   * matrices with the block sparsity patterns of the parameter.
+   *
+   * You have to make sure that the lifetime of the sparsity structure is at
+   * least as long as that of this matrix or as long as reinit(const
+   * SparsityPattern &) is not called with a new sparsity structure.
+   *
+   * The elements of the matrix are set to zero by this function.
+   */
+  virtual void reinit (const BlockSparsityPattern &sparsity);
+//@}
+
+  /**
+   * @name Information on the matrix
+   */
+//@{
+  /**
+   * Return whether the object is empty. It is empty if either both dimensions
+   * are zero or no BlockSparsityPattern is associated.
+   */
+  bool empty () const;
+
+  /**
+   * Return the number of entries in a specific row.
+   */
+  size_type get_row_length (const size_type row) const;
+
+  /**
+   * Return the number of nonzero elements of this matrix. Actually, it
+   * returns the number of entries in the sparsity pattern; if any of the
+   * entries should happen to be zero, it is counted anyway.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Return the number of actually nonzero elements. Just counts the number of
+   * actually nonzero elements (with absolute value larger than threshold) of
+   * all the blocks.
+   */
+  size_type n_actually_nonzero_elements (const double threshold = 0.0) const;
+
+  /**
+   * Return a (constant) reference to the underlying sparsity pattern of this
+   * matrix.
+   *
+   * Though the return value is declared <tt>const</tt>, you should be aware
+   * that it may change if you call any nonconstant function of objects which
+   * operate on it.
+   */
+  const BlockSparsityPattern &
+  get_sparsity_pattern () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+//@}
+
+  /**
+   * @name Multiplications
+   */
+//@{
+  /**
+   * Matrix-vector multiplication: let $dst = M*src$ with $M$ being this
+   * matrix.
+   */
+  template <typename block_number>
+  void vmult (BlockVector<block_number>       &dst,
+              const BlockVector<block_number> &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block column.
+   */
+  template <typename block_number,
+            typename nonblock_number>
+  void vmult (BlockVector<block_number>          &dst,
+              const Vector<nonblock_number> &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block row.
+   */
+  template <typename block_number,
+            typename nonblock_number>
+  void vmult (Vector<nonblock_number>    &dst,
+              const BlockVector<block_number> &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block.
+   */
+  template <typename nonblock_number>
+  void vmult (Vector<nonblock_number>       &dst,
+              const Vector<nonblock_number> &src) const;
+
+  /**
+   * Matrix-vector multiplication: let $dst = M^T*src$ with $M$ being this
+   * matrix. This function does the same as vmult() but takes the transposed
+   * matrix.
+   */
+  template <typename block_number>
+  void Tvmult (BlockVector<block_number>       &dst,
+               const BlockVector<block_number> &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block row.
+   */
+  template <typename block_number,
+            typename nonblock_number>
+  void Tvmult (BlockVector<block_number>  &dst,
+               const Vector<nonblock_number> &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block column.
+   */
+  template <typename block_number,
+            typename nonblock_number>
+  void Tvmult (Vector<nonblock_number>    &dst,
+               const BlockVector<block_number> &src) const;
+
+  /**
+   * Matrix-vector multiplication. Just like the previous function, but only
+   * applicable if the matrix has only one block.
+   */
+  template <typename nonblock_number>
+  void Tvmult (Vector<nonblock_number>       &dst,
+               const Vector<nonblock_number> &src) const;
+//@}
+
+  /**
+   * @name Preconditioning methods
+   */
+//@{
+  /**
+   * Apply the Jacobi preconditioner, which multiplies every element of the
+   * <tt>src</tt> vector by the inverse of the respective diagonal element and
+   * multiplies the result with the relaxation parameter <tt>omega</tt>.
+   *
+   * All diagonal blocks must be square matrices for this operation.
+   */
+  template <class BlockVectorType>
+  void precondition_Jacobi (BlockVectorType       &dst,
+                            const BlockVectorType &src,
+                            const number           omega = 1.) const;
+
+  /**
+   * Apply the Jacobi preconditioner to a simple vector.
+   *
+   * The matrix must be a single square block for this.
+   */
+  template <typename number2>
+  void precondition_Jacobi (Vector<number2>       &dst,
+                            const Vector<number2> &src,
+                            const number           omega = 1.) const;
+//@}
+
+  /**
+   * @name Input/Output
+   */
+//@{
+  /**
+   * Print the matrix in the usual format, i.e. as a matrix and not as a list
+   * of nonzero elements. For better readability, elements not in the matrix
+   * are displayed as empty space, while matrix elements which are explicitly
+   * set to zero are displayed as such.
+   *
+   * The parameters allow for a flexible setting of the output format:
+   * <tt>precision</tt> and <tt>scientific</tt> are used to determine the
+   * number format, where <tt>scientific = false</tt> means fixed point
+   * notation.  A zero entry for <tt>width</tt> makes the function compute a
+   * width, but it may be changed to a positive value, if output is crude.
+   *
+   * Additionally, a character for an empty value may be specified.
+   *
+   * Finally, the whole matrix can be multiplied with a common denominator to
+   * produce more readable output, even integers.
+   *
+   * @attention This function may produce <b>large</b> amounts of output if
+   * applied to a large matrix!
+   */
+  void print_formatted (std::ostream       &out,
+                        const unsigned int  precision   = 3,
+                        const bool          scientific  = true,
+                        const unsigned int  width       = 0,
+                        const char         *zero_string = " ",
+                        const double        denominator = 1.) const;
+//@}
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcBlockDimensionMismatch);
+  //@}
+
+private:
+  /**
+   * Pointer to the block sparsity pattern used for this matrix. In order to
+   * guarantee that it is not deleted while still in use, we subscribe to it
+   * using the SmartPointer class.
+   */
+  SmartPointer<const BlockSparsityPattern,BlockSparseMatrix<number> > sparsity_pattern;
+};
+
+
+
+/*@}*/
+/* ------------------------- Template functions ---------------------- */
+
+
+
+template <typename number>
+inline
+BlockSparseMatrix<number> &
+BlockSparseMatrix<number>::operator = (const double d)
+{
+  Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+  for (size_type r=0; r<this->n_block_rows(); ++r)
+    for (size_type c=0; c<this->n_block_cols(); ++c)
+      this->block(r,c) = d;
+
+  return *this;
+}
+
+
+
+template <typename number>
+template <typename block_number>
+inline
+void
+BlockSparseMatrix<number>::vmult (BlockVector<block_number>       &dst,
+                                  const BlockVector<block_number> &src) const
+{
+  BaseClass::vmult_block_block (dst, src);
+}
+
+
+
+template <typename number>
+template <typename block_number,
+          typename nonblock_number>
+inline
+void
+BlockSparseMatrix<number>::vmult (BlockVector<block_number>     &dst,
+                                  const Vector<nonblock_number> &src) const
+{
+  BaseClass::vmult_block_nonblock (dst, src);
+}
+
+
+
+template <typename number>
+template <typename block_number,
+          typename nonblock_number>
+inline
+void
+BlockSparseMatrix<number>::vmult (Vector<nonblock_number>         &dst,
+                                  const BlockVector<block_number> &src) const
+{
+  BaseClass::vmult_nonblock_block (dst, src);
+}
+
+
+
+template <typename number>
+template <typename nonblock_number>
+inline
+void
+BlockSparseMatrix<number>::vmult (Vector<nonblock_number>       &dst,
+                                  const Vector<nonblock_number> &src) const
+{
+  BaseClass::vmult_nonblock_nonblock (dst, src);
+}
+
+
+
+template <typename number>
+template <typename block_number>
+inline
+void
+BlockSparseMatrix<number>::Tvmult (BlockVector<block_number>       &dst,
+                                   const BlockVector<block_number> &src) const
+{
+  BaseClass::Tvmult_block_block (dst, src);
+}
+
+
+
+template <typename number>
+template <typename block_number,
+          typename nonblock_number>
+inline
+void
+BlockSparseMatrix<number>::Tvmult (BlockVector<block_number>     &dst,
+                                   const Vector<nonblock_number> &src) const
+{
+  BaseClass::Tvmult_block_nonblock (dst, src);
+}
+
+
+
+template <typename number>
+template <typename block_number,
+          typename nonblock_number>
+inline
+void
+BlockSparseMatrix<number>::Tvmult (Vector<nonblock_number>         &dst,
+                                   const BlockVector<block_number> &src) const
+{
+  BaseClass::Tvmult_nonblock_block (dst, src);
+}
+
+
+
+template <typename number>
+template <typename nonblock_number>
+inline
+void
+BlockSparseMatrix<number>::Tvmult (Vector<nonblock_number>       &dst,
+                                   const Vector<nonblock_number> &src) const
+{
+  BaseClass::Tvmult_nonblock_nonblock (dst, src);
+}
+
+
+
+template <typename number>
+template <class BlockVectorType>
+inline
+void
+BlockSparseMatrix<number>::
+precondition_Jacobi (BlockVectorType       &dst,
+                     const BlockVectorType &src,
+                     const number           omega) const
+{
+  Assert (this->n_block_rows() == this->n_block_cols(), ExcNotQuadratic());
+  Assert (dst.n_blocks() == this->n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), this->n_block_rows()));
+  Assert (src.n_blocks() == this->n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), this->n_block_cols()));
+
+  // do a diagonal preconditioning. uses only
+  // the diagonal blocks of the matrix
+  for (size_type i=0; i<this->n_block_rows(); ++i)
+    this->block(i,i).precondition_Jacobi (dst.block(i),
+                                          src.block(i),
+                                          omega);
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+BlockSparseMatrix<number>::
+precondition_Jacobi (Vector<number2>       &dst,
+                     const Vector<number2> &src,
+                     const number           omega) const
+{
+  // check number of blocks. the sizes of the
+  // single block is checked in the function
+  // we call
+  Assert (this->n_block_cols() == 1,
+          ExcMessage ("This function only works if the matrix has "
+                      "a single block"));
+  Assert (this->n_block_rows() == 1,
+          ExcMessage ("This function only works if the matrix has "
+                      "a single block"));
+
+  // do a diagonal preconditioning. uses only
+  // the diagonal blocks of the matrix
+  this->block(0,0).precondition_Jacobi (dst, src, omega);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif    // dealii__block_sparse_matrix_h
diff --git a/include/deal.II/lac/block_sparse_matrix.templates.h b/include/deal.II/lac/block_sparse_matrix.templates.h
new file mode 100644
index 0000000..22fa11a
--- /dev/null
+++ b/include/deal.II/lac/block_sparse_matrix.templates.h
@@ -0,0 +1,212 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_sparse_matrix_templates_h
+#define dealii__block_sparse_matrix_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number>
+BlockSparseMatrix<number>::BlockSparseMatrix ()
+{}
+
+
+
+template <typename number>
+BlockSparseMatrix<number>::
+BlockSparseMatrix (const BlockSparsityPattern &sparsity)
+{
+  reinit (sparsity);
+}
+
+
+
+template <typename number>
+BlockSparseMatrix<number>::~BlockSparseMatrix ()
+{
+  // delete previous content of
+  // the subobjects array
+  clear ();
+  sparsity_pattern = 0;
+}
+
+
+
+template <typename number>
+BlockSparseMatrix<number> &
+BlockSparseMatrix<number>::
+operator = (const BlockSparseMatrix<number> &m)
+{
+  Assert (this->row_block_indices == m.row_block_indices,
+          ExcBlockDimensionMismatch());
+  Assert (this->column_block_indices == m.column_block_indices,
+          ExcBlockDimensionMismatch());
+
+  // this operator does not do
+  // anything except than checking
+  // whether the base objects want to
+  // do something
+  for (size_type r=0; r<this->n_block_rows(); ++r)
+    for (size_type c=0; c<this->n_block_cols(); ++c)
+      this->block(r,c) = m.block(r,c);
+
+  return *this;
+}
+
+
+
+template <typename number>
+void
+BlockSparseMatrix<number>::clear ()
+{
+  BlockMatrixBase<SparseMatrix<number> >::clear();
+  sparsity_pattern = 0;
+}
+
+
+
+template <typename number>
+void
+BlockSparseMatrix<number>::
+reinit (const BlockSparsityPattern &sparsity)
+{
+  // first delete previous content of
+  // the subobjects array and delete
+  // the table completely
+  clear ();
+
+  // then associate new sparsity
+  // pattern and resize
+  sparsity_pattern = &sparsity;
+
+  this->row_block_indices    = sparsity.row_indices;
+  this->column_block_indices = sparsity.column_indices;
+
+  this->sub_objects.reinit (sparsity.n_block_rows(),
+                            sparsity.n_block_cols());
+
+  // and reinitialize the blocks
+  for (size_type r=0; r<this->n_block_rows(); ++r)
+    for (size_type c=0; c<this->n_block_cols(); ++c)
+      {
+        BlockType *p = new SparseMatrix<number>();
+        p->reinit (sparsity.block(r,c));
+        this->sub_objects[r][c] = p;
+      }
+}
+
+
+
+template <typename number>
+bool
+BlockSparseMatrix<number>::empty () const
+{
+  for (size_type r=0; r<this->n_block_rows(); ++r)
+    for (size_type c=0; c<this->n_block_cols(); ++c)
+      if (this->block(r,c).empty () == false)
+        return false;
+
+  return true;
+}
+
+
+
+
+template <typename number>
+typename BlockSparseMatrix<number>::size_type
+BlockSparseMatrix<number>::get_row_length (const size_type row) const
+{
+  return sparsity_pattern->row_length(row);
+}
+
+
+
+template <typename number>
+typename BlockSparseMatrix<number>::size_type
+BlockSparseMatrix<number>::n_nonzero_elements () const
+{
+  return sparsity_pattern->n_nonzero_elements ();
+}
+
+
+
+template <typename number>
+typename BlockSparseMatrix<number>::size_type
+BlockSparseMatrix<number>::n_actually_nonzero_elements (const double threshold) const
+{
+  size_type count = 0;
+  for (size_type i=0; i<this->n_block_rows(); ++i)
+    for (size_type j=0; j<this->n_block_cols(); ++j)
+      count += this->sub_objects[i][j]->n_actually_nonzero_elements (threshold);
+
+  return count;
+}
+
+
+
+template <typename number>
+const BlockSparsityPattern &
+BlockSparseMatrix<number>::get_sparsity_pattern () const
+{
+  return *sparsity_pattern;
+}
+
+
+
+template <typename number>
+void
+BlockSparseMatrix<number>::
+print_formatted (std::ostream       &out,
+                 const unsigned int  precision,
+                 const bool          scientific,
+                 const unsigned int  width,
+                 const char         *zero_string,
+                 const double        denominator) const
+{
+  for (size_type r=0; r<this->n_block_rows(); ++r)
+    for (size_type c=0; c<this->n_block_cols(); ++c)
+      {
+        out << "Component (" << r << "," << c << ")" << std::endl;
+        this->block(r,c).print_formatted (out, precision, scientific,
+                                          width, zero_string, denominator);
+      }
+}
+
+
+
+template <typename number>
+std::size_t
+BlockSparseMatrix<number>::memory_consumption () const
+{
+  std::size_t mem = sizeof(*this);
+  mem += MemoryConsumption::memory_consumption (this->sub_objects);
+  for (size_type r=0; r<this->n_block_rows(); ++r)
+    for (size_type c=0; c<this->n_block_cols(); ++c)
+      mem += MemoryConsumption::memory_consumption(*this->sub_objects[r][c]);
+
+  return mem;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // ifdef block_sparse_matrix_templates_h
diff --git a/include/deal.II/lac/block_sparse_matrix_ez.h b/include/deal.II/lac/block_sparse_matrix_ez.h
new file mode 100644
index 0000000..1ef96a9
--- /dev/null
+++ b/include/deal.II/lac/block_sparse_matrix_ez.h
@@ -0,0 +1,541 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_sparse_matrix_ez_h
+#define dealii__block_sparse_matrix_ez_h
+
+
+//TODO: Derive BlockSparseMatrixEZ from BlockMatrixBase, like all the other block matrices as well; this would allow to instantiate a few functions with this template argument as well (in particular ConstraintMatrix::distribute_local_to_global)
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/lac/sparse_matrix_ez.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename Number> class BlockVector;
+
+/*! @addtogroup Matrix1
+ *@{
+ */
+
+
+/**
+ * A block matrix consisting of blocks of type SparseMatrixEZ.
+ *
+ * Like the other Block-objects, this matrix can be used like a
+ * SparseMatrixEZ, when it comes to access to entries. Then, there are
+ * functions for the multiplication with BlockVector and access to the
+ * individual blocks.
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Guido Kanschat, 2002, 2003
+ */
+template<typename Number>
+class BlockSparseMatrixEZ : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Default constructor. The result is an empty object with zero dimensions.
+   */
+  BlockSparseMatrixEZ ();
+
+  /**
+   * Constructor setting up an object with given number of block rows and
+   * columns. The blocks themselves still have zero dimension.
+   */
+  BlockSparseMatrixEZ (const unsigned int block_rows,
+                       const unsigned int block_cols);
+
+  /**
+   * Copy constructor. This is needed for some container classes. It creates
+   * an object of the same number of block rows and columns. Since it calls
+   * the copy constructor of SparseMatrixEZ, the block s must be empty.
+   */
+  BlockSparseMatrixEZ (const BlockSparseMatrixEZ<Number> &);
+
+  /**
+   * Copy operator. Like the copy constructor, this may be called for objects
+   * with empty blocks only.
+   */
+  BlockSparseMatrixEZ &operator = (const BlockSparseMatrixEZ<Number> &);
+
+  /**
+   * This operator assigns a scalar to a matrix. Since this does usually not
+   * make much sense (should we set all matrix entries to this value? Only the
+   * nonzero entries of the sparsity pattern?), this operation is only allowed
+   * if the actual value to be assigned is zero. This operator only exists to
+   * allow for the obvious notation <tt>matrix=0</tt>, which sets all elements
+   * of the matrix to zero, but keep the sparsity pattern previously used.
+   */
+  BlockSparseMatrixEZ &operator = (const double d);
+
+
+  /**
+   * Set matrix to zero dimensions and release memory.
+   */
+  void clear ();
+
+  /**
+   * Initialize to given block numbers.  After this operation, the matrix will
+   * have the block dimensions provided. Each block will have zero dimensions
+   * and must be initialized subsequently. After setting the sizes of the
+   * blocks, collect_sizes() must be called to update internal data
+   * structures.
+   */
+  void reinit (const unsigned int n_block_rows,
+               const unsigned int n_block_cols);
+  /**
+   * This function collects the sizes of the sub-objects and stores them in
+   * internal arrays, in order to be able to relay global indices into the
+   * matrix to indices into the subobjects. You *must* call this function each
+   * time after you have changed the size of the sub-objects.
+   */
+  void collect_sizes ();
+
+  /**
+   * Access the block with the given coordinates.
+   */
+  SparseMatrixEZ<Number> &
+  block (const unsigned int row,
+         const unsigned int column);
+
+
+  /**
+   * Access the block with the given coordinates. Version for constant
+   * objects.
+   */
+  const SparseMatrixEZ<Number> &
+  block (const unsigned int row,
+         const unsigned int column) const;
+
+  /**
+   * Return the number of blocks in a column.
+   */
+  unsigned int n_block_rows () const;
+
+  /**
+   * Return the number of blocks in a row.
+   */
+  unsigned int n_block_cols () const;
+
+  /**
+   * Return whether the object is empty. It is empty if no memory is
+   * allocated, which is the same as that both dimensions are zero. This
+   * function is just the concatenation of the respective call to all sub-
+   * matrices.
+   */
+  bool empty () const;
+
+  /**
+   * Return number of rows of this matrix, which equals the dimension of the
+   * codomain (or range) space. It is the sum of the number of rows over the
+   * sub-matrix blocks of this matrix.
+   *
+   * @deprecated Use m() instead.
+   */
+  size_type n_rows () const DEAL_II_DEPRECATED;
+
+  /**
+   * Return number of columns of this matrix, which equals the dimension of
+   * the domain space. It is the sum of the number of columns over the sub-
+   * matrix blocks of this matrix.
+   *
+   * @deprecated Use n() instead.
+   */
+  size_type n_cols () const DEAL_II_DEPRECATED;
+
+  /**
+   * Return number of rows of this matrix, which equals the dimension of the
+   * codomain (or range) space. It is the sum of the number of rows over the
+   * sub-matrix blocks of this matrix. Recall that the matrix is of size m()
+   * times n().
+   */
+  size_type m () const;
+
+  /**
+   * Return number of columns of this matrix, which equals the dimension of
+   * the domain space. It is the sum of the number of columns over the sub-
+   * matrix blocks of this matrix. Recall that the matrix is of size m() times
+   * n().
+   */
+  size_type n () const;
+
+  /**
+   * Set the element <tt>(i,j)</tt> to @p value.  Throws an error if the entry
+   * does not exist or if <tt>value</tt> is not a finite number. Still, it is
+   * allowed to store zero values in non-existent fields.
+   */
+  void set (const size_type i,
+            const size_type j,
+            const Number value);
+
+  /**
+   * Add @p value to the element <tt>(i,j)</tt>.  Throws an error if the entry
+   * does not exist or if <tt>value</tt> is not a finite number. Still, it is
+   * allowed to store zero values in non-existent fields.
+   */
+  void add (const size_type i, const size_type j,
+            const Number value);
+
+
+  /**
+   * Matrix-vector multiplication: let $dst = M*src$ with $M$ being this
+   * matrix.
+   */
+  template <typename somenumber>
+  void vmult (BlockVector<somenumber>       &dst,
+              const BlockVector<somenumber> &src) const;
+
+  /**
+   * Matrix-vector multiplication: let $dst = M^T*src$ with $M$ being this
+   * matrix. This function does the same as vmult() but takes the transposed
+   * matrix.
+   */
+  template <typename somenumber>
+  void Tvmult (BlockVector<somenumber>       &dst,
+               const BlockVector<somenumber> &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add $M*src$ on $dst$ with $M$ being
+   * this matrix.
+   */
+  template <typename somenumber>
+  void vmult_add (BlockVector<somenumber>       &dst,
+                  const BlockVector<somenumber> &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add $M^T*src$ to $dst$ with $M$
+   * being this matrix. This function does the same as vmult_add() but takes
+   * the transposed matrix.
+   */
+  template <typename somenumber>
+  void Tvmult_add (BlockVector<somenumber>       &dst,
+                   const BlockVector<somenumber> &src) const;
+
+
+  /**
+   * Print statistics. If @p full is @p true, prints a histogram of all
+   * existing row lengths and allocated row lengths. Otherwise, just the
+   * relation of allocated and used entries is shown.
+   */
+  template <class StreamType>
+  void print_statistics (StreamType &s, bool full = false);
+
+private:
+  /**
+   * Object storing and managing the transformation of row indices to indices
+   * of the sub-objects.
+   */
+  BlockIndices    row_indices;
+
+  /**
+   * Object storing and managing the transformation of column indices to
+   * indices of the sub-objects.
+   */
+  BlockIndices    column_indices;
+
+  /**
+   * The actual matrices
+   */
+  Table<2, SparseMatrixEZ<Number> > blocks;
+};
+
+/*@}*/
+/*----------------------------------------------------------------------*/
+
+
+template <typename Number>
+inline
+unsigned int
+BlockSparseMatrixEZ<Number>::n_block_rows () const
+{
+  return row_indices.size();
+}
+
+
+
+template <typename Number>
+inline
+typename BlockSparseMatrixEZ<Number>::size_type
+BlockSparseMatrixEZ<Number>::n_rows () const
+{
+  return row_indices.total_size();
+}
+
+
+
+template <typename Number>
+inline
+unsigned int
+BlockSparseMatrixEZ<Number>::n_block_cols () const
+{
+  return column_indices.size();
+}
+
+
+
+template <typename Number>
+inline
+typename BlockSparseMatrixEZ<Number>::size_type
+BlockSparseMatrixEZ<Number>::n_cols () const
+{
+  return column_indices.total_size();
+}
+
+
+
+template <typename Number>
+inline
+SparseMatrixEZ<Number> &
+BlockSparseMatrixEZ<Number>::block (const unsigned int row,
+                                    const unsigned int column)
+{
+  Assert (row<n_block_rows(), ExcIndexRange (row, 0, n_block_rows()));
+  Assert (column<n_block_cols(), ExcIndexRange (column, 0, n_block_cols()));
+
+  return blocks[row][column];
+}
+
+
+
+template <typename Number>
+inline
+const SparseMatrixEZ<Number> &
+BlockSparseMatrixEZ<Number>::block (const unsigned int row,
+                                    const unsigned int column) const
+{
+  Assert (row<n_block_rows(), ExcIndexRange (row, 0, n_block_rows()));
+  Assert (column<n_block_cols(), ExcIndexRange (column, 0, n_block_cols()));
+
+  return blocks[row][column];
+}
+
+
+
+template <typename Number>
+inline
+typename BlockSparseMatrixEZ<Number>::size_type
+BlockSparseMatrixEZ<Number>::m () const
+{
+  return row_indices.total_size();
+}
+
+
+
+template <typename Number>
+inline
+typename BlockSparseMatrixEZ<Number>::size_type
+BlockSparseMatrixEZ<Number>::n () const
+{
+  return column_indices.total_size();
+}
+
+
+
+template <typename Number>
+inline
+void
+BlockSparseMatrixEZ<Number>::set (const size_type i,
+                                  const size_type j,
+                                  const Number value)
+{
+
+  AssertIsFinite(value);
+
+  const std::pair<size_type,size_type>
+  row_index = row_indices.global_to_local (i),
+  col_index = column_indices.global_to_local (j);
+  block(row_index.first,col_index.first).set (row_index.second,
+                                              col_index.second,
+                                              value);
+}
+
+
+
+template <typename Number>
+inline
+void
+BlockSparseMatrixEZ<Number>::add (const size_type i,
+                                  const size_type j,
+                                  const Number value)
+{
+
+  AssertIsFinite(value);
+
+  const std::pair<unsigned int,size_type>
+  row_index = row_indices.global_to_local (i),
+  col_index = column_indices.global_to_local (j);
+  block(row_index.first,col_index.first).add (row_index.second,
+                                              col_index.second,
+                                              value);
+}
+
+
+template <typename Number>
+template <typename somenumber>
+void
+BlockSparseMatrixEZ<Number>::vmult (BlockVector<somenumber>       &dst,
+                                    const BlockVector<somenumber> &src) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (src.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_cols()));
+
+  dst = 0.;
+
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row,col).vmult_add (dst.block(row),
+                                src.block(col));
+}
+
+
+
+template <typename Number>
+template <typename somenumber>
+void
+BlockSparseMatrixEZ<Number>::
+vmult_add (BlockVector<somenumber>       &dst,
+           const BlockVector<somenumber> &src) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (src.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_cols()));
+
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row,col).vmult_add (dst.block(row),
+                                src.block(col));
+}
+
+
+
+
+template <typename Number>
+template <typename somenumber>
+void
+BlockSparseMatrixEZ<Number>::
+Tvmult (BlockVector<somenumber>       &dst,
+        const BlockVector<somenumber> &src) const
+{
+  Assert (dst.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_cols()));
+  Assert (src.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_rows()));
+
+  dst = 0.;
+
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row,col).Tvmult_add (dst.block(col),
+                                 src.block(row));
+}
+
+
+
+template <typename Number>
+template <typename somenumber>
+void
+BlockSparseMatrixEZ<Number>::
+Tvmult_add (BlockVector<somenumber>       &dst,
+            const BlockVector<somenumber> &src) const
+{
+  Assert (dst.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_cols()));
+  Assert (src.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_rows()));
+
+  for (unsigned int row=0; row<n_block_rows(); ++row)
+    for (unsigned int col=0; col<n_block_cols(); ++col)
+      block(row,col).Tvmult_add (dst.block(col),
+                                 src.block(row));
+}
+
+
+template <typename number>
+template <class StreamType>
+inline
+void
+BlockSparseMatrixEZ<number>::print_statistics (StreamType &out, bool full)
+{
+  size_type used_total = 0;
+  size_type allocated_total = 0;
+  size_type reserved_total = 0;
+  std::vector<size_type> used_by_line_total;
+
+  size_type used;
+  size_type allocated;
+  size_type reserved;
+  std::vector<size_type> used_by_line;
+
+  for (size_type i=0; i<n_block_rows(); ++i)
+    for (size_type j=0; j<n_block_cols(); ++j)
+      {
+        used_by_line.clear();
+        out << "block:\t" << i << '\t' << j << std::endl;
+        block(i,j).compute_statistics (used, allocated, reserved,
+                                       used_by_line, full);
+
+        out << "used:" << used << std::endl
+            << "allocated:" << allocated << std::endl
+            << "reserved:" << reserved << std::endl;
+
+        used_total += used;
+        allocated_total += allocated;
+        reserved_total += reserved;
+
+        if (full)
+          {
+            used_by_line_total.resize(used_by_line.size());
+            for (size_type i=0; i< used_by_line.size(); ++i)
+              if (used_by_line[i] != 0)
+                {
+                  out << "row-entries\t" << i
+                      << "\trows\t" << used_by_line[i]
+                      << std::endl;
+                  used_by_line_total[i] += used_by_line[i];
+                }
+          }
+      }
+  out << "Total" << std::endl
+      << "used:" << used_total << std::endl
+      << "allocated:" << allocated_total << std::endl
+      << "reserved:" << reserved_total << std::endl;
+  for (size_type i=0; i< used_by_line_total.size(); ++i)
+    if (used_by_line_total[i] != 0)
+      {
+        out << "row-entries\t" << i
+            << "\trows\t" << used_by_line_total[i]
+            << std::endl;
+      }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif //dealii__block_sparse_matrix_ez_h
diff --git a/include/deal.II/lac/block_sparse_matrix_ez.templates.h b/include/deal.II/lac/block_sparse_matrix_ez.templates.h
new file mode 100644
index 0000000..1ce5c06
--- /dev/null
+++ b/include/deal.II/lac/block_sparse_matrix_ez.templates.h
@@ -0,0 +1,183 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_sparse_matrix_ez_templates_h
+#define dealii__block_sparse_matrix_ez_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/block_sparse_matrix_ez.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number>
+BlockSparseMatrixEZ<number>::BlockSparseMatrixEZ ()
+{}
+
+
+
+template <typename number>
+BlockSparseMatrixEZ<number>::
+BlockSparseMatrixEZ (const unsigned int rows,
+                     const unsigned int cols)
+  :
+  row_indices (rows, 0),
+  column_indices (cols, 0)
+{}
+
+
+
+//  template <typename number>
+//  BlockSparseMatrixEZ<number>::~BlockSparseMatrixEZ ()
+//  {
+//                                 // delete previous content of
+//                                 // the subobjects array
+//    clear ();
+//  };
+
+
+
+template <typename number>
+BlockSparseMatrixEZ<number> &
+BlockSparseMatrixEZ<number>::
+operator = (const BlockSparseMatrixEZ<number> &m)
+{
+  Assert (n_block_rows() == m.n_block_rows(),
+          ExcDimensionMismatch(n_block_rows(), m.n_block_rows()));
+  Assert (n_block_cols() == m.n_block_cols(),
+          ExcDimensionMismatch(n_block_cols(), m.n_block_cols()));
+  // this operator does not do
+  // anything except than checking
+  // whether the base objects want to
+  // do something
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      block(r,c) = m.block(r,c);
+  return *this;
+}
+
+
+
+template <typename number>
+BlockSparseMatrixEZ<number> &
+BlockSparseMatrixEZ<number>::operator = (const double d)
+{
+  (void)d;
+  Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      block(r,c) = 0;
+
+  return *this;
+}
+
+
+
+template <typename number>
+BlockSparseMatrixEZ<number>::BlockSparseMatrixEZ (
+  const BlockSparseMatrixEZ<number> &m)
+  :
+  Subscriptor (m),
+  row_indices(m.row_indices),
+  column_indices(m.column_indices),
+  blocks(m.blocks)
+{}
+
+
+
+template <typename number>
+void
+BlockSparseMatrixEZ<number>::reinit (const unsigned int rows,
+                                     const unsigned int cols)
+{
+  row_indices.reinit(rows, 0);
+  column_indices.reinit(cols, 0);
+  blocks.reinit(rows, cols);
+}
+
+
+
+template <typename number>
+void
+BlockSparseMatrixEZ<number>::clear ()
+{
+  row_indices.reinit(0, 0);
+  column_indices.reinit(0, 0);
+  blocks.reinit(0, 0);
+}
+
+
+
+template <typename number>
+bool
+BlockSparseMatrixEZ<number>::empty () const
+{
+  for (unsigned int r=0; r<n_block_rows(); ++r)
+    for (unsigned int c=0; c<n_block_cols(); ++c)
+      if (block(r,c).empty () == false)
+        return false;
+  return true;
+}
+
+
+
+template <typename number>
+void
+BlockSparseMatrixEZ<number>::collect_sizes ()
+{
+  const unsigned int rows = n_block_rows();
+  const unsigned int columns = n_block_cols();
+  std::vector<size_type> row_sizes (rows);
+  std::vector<size_type> col_sizes (columns);
+
+  // first find out the row sizes
+  // from the first block column
+  for (unsigned int r=0; r<rows; ++r)
+    row_sizes[r] = blocks[r][0].m();
+  // then check that the following
+  // block columns have the same
+  // sizes
+  for (unsigned int c=1; c<columns; ++c)
+    for (unsigned int r=0; r<rows; ++r)
+      Assert (row_sizes[r] == blocks[r][c].m(),
+              ExcDimensionMismatch (row_sizes[r], blocks[r][c].m()));
+
+  // finally initialize the row
+  // indices with this array
+  row_indices.reinit (row_sizes);
+
+
+  // then do the same with the columns
+  for (unsigned int c=0; c<columns; ++c)
+    col_sizes[c] = blocks[0][c].n();
+  for (unsigned int r=1; r<rows; ++r)
+    for (unsigned int c=0; c<columns; ++c)
+      Assert (col_sizes[c] == blocks[r][c].n(),
+              ExcDimensionMismatch (col_sizes[c], blocks[r][c].n()));
+
+  // finally initialize the row
+  // indices with this array
+  column_indices.reinit (col_sizes);
+}
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // ifdef block_sparse_matrix_templates_h
diff --git a/include/deal.II/lac/block_sparsity_pattern.h b/include/deal.II/lac/block_sparsity_pattern.h
new file mode 100644
index 0000000..14c04a5
--- /dev/null
+++ b/include/deal.II/lac/block_sparsity_pattern.h
@@ -0,0 +1,999 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_sparsity_pattern_h
+#define dealii__block_sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/trilinos_sparsity_pattern.h>
+#include <deal.II/lac/compressed_sparsity_pattern.h>
+#include <deal.II/lac/compressed_set_sparsity_pattern.h>
+#include <deal.II/lac/compressed_simple_sparsity_pattern.h>
+#include <deal.II/lac/block_indices.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number> class BlockSparseMatrix;
+class BlockSparsityPattern;
+class BlockDynamicSparsityPattern;
+#ifdef DEAL_II_WITH_TRILINOS
+namespace TrilinosWrappers
+{
+  class BlockSparsityPattern;
+}
+#endif
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+
+/**
+ * This is the base class for block versions of the sparsity pattern and
+ * dynamic sparsity pattern classes. It has not much functionality, but only
+ * administrates an array of sparsity pattern objects and delegates work to
+ * them. It has mostly the same interface as has the SparsityPattern, and
+ * DynamicSparsityPattern, and simply transforms calls to its member functions
+ * to calls to the respective member functions of the member sparsity
+ * patterns.
+ *
+ * The largest difference between the SparsityPattern and
+ * DynamicSparsityPattern classes and this class is that mostly, the matrices
+ * have different properties and you will want to work on the blocks making up
+ * the matrix rather than the whole matrix. You can access the different
+ * blocks using the <tt>block(row,col)</tt> function.
+ *
+ * Attention: this object is not automatically notified if the size of one of
+ * its subobjects' size is changed. After you initialize the sizes of the
+ * subobjects, you will therefore have to call the <tt>collect_sizes()</tt>
+ * function of this class! Note that, of course, all sub-matrices in a
+ * (block-)row have to have the same number of rows, and that all sub-matrices
+ * in a (block-)column have to have the same number of columns.
+ *
+ * You will in general not want to use this class, but one of the derived
+ * classes.
+ *
+ * @todo Handle optimization of diagonal elements of the underlying
+ * SparsityPattern correctly.
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, 2000, 2001
+ */
+template <typename SparsityPatternType>
+class BlockSparsityPatternBase : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Define a value which is used to indicate that a certain value in the @p
+   * colnums array is unused, i.e. does not represent a certain column number
+   * index.
+   *
+   * This value is only an alias to the respective value of the
+   * SparsityPattern class.
+   */
+  static const size_type invalid_entry = SparsityPattern::invalid_entry;
+
+  /**
+   * Initialize the matrix empty, that is with no memory allocated. This is
+   * useful if you want such objects as member variables in other classes. You
+   * can make the structure usable by calling the reinit() function.
+   */
+  BlockSparsityPatternBase ();
+
+  /**
+   * Initialize the matrix with the given number of block rows and columns.
+   * The blocks themselves are still empty, and you have to call
+   * collect_sizes() after you assign them sizes.
+   */
+  BlockSparsityPatternBase (const size_type n_block_rows,
+                            const size_type n_block_columns);
+
+  /**
+   * Copy constructor. This constructor is only allowed to be called if the
+   * sparsity pattern to be copied is empty, i.e. there are no block allocated
+   * at present. This is for the same reason as for the SparsityPattern, see
+   * there for the details.
+   */
+  BlockSparsityPatternBase (const BlockSparsityPatternBase &bsp);
+
+  /**
+   * Destructor.
+   */
+  ~BlockSparsityPatternBase ();
+
+  /**
+   * Resize the matrix, by setting the number of block rows and columns. This
+   * deletes all blocks and replaces them with uninitialized ones, i.e. ones
+   * for which also the sizes are not yet set. You have to do that by calling
+   * the reinit() functions of the blocks themselves. Do not forget to call
+   * collect_sizes() after that on this object.
+   *
+   * The reason that you have to set sizes of the blocks yourself is that the
+   * sizes may be varying, the maximum number of elements per row may be
+   * varying, etc. It is simpler not to reproduce the interface of the
+   * SparsityPattern class here but rather let the user call whatever function
+   * she desires.
+   */
+  void reinit (const size_type n_block_rows,
+               const size_type n_block_columns);
+
+  /**
+   * Copy operator. For this the same holds as for the copy constructor: it is
+   * declared, defined and fine to be called, but the latter only for empty
+   * objects.
+   */
+  BlockSparsityPatternBase &operator = (const BlockSparsityPatternBase &);
+
+  /**
+   * This function collects the sizes of the sub-objects and stores them in
+   * internal arrays, in order to be able to relay global indices into the
+   * matrix to indices into the subobjects. You *must* call this function each
+   * time after you have changed the size of the sub-objects.
+   */
+  void collect_sizes ();
+
+  /**
+   * Access the block with the given coordinates.
+   */
+  SparsityPatternType &
+  block (const size_type row,
+         const size_type column);
+
+
+  /**
+   * Access the block with the given coordinates. Version for constant
+   * objects.
+   */
+  const SparsityPatternType &
+  block (const size_type row,
+         const size_type column) const;
+
+  /**
+   * Grant access to the object describing the distribution of row indices to
+   * the individual blocks.
+   */
+  const BlockIndices &
+  get_row_indices () const;
+
+  /**
+   * Grant access to the object describing the distribution of column indices
+   * to the individual blocks.
+   */
+  const BlockIndices &
+  get_column_indices () const;
+
+  /**
+   * This function compresses the sparsity structures that this object
+   * represents. It simply calls @p compress for all sub-objects.
+   */
+  void compress ();
+
+  /**
+   * Return the number of blocks in a column.
+   */
+  size_type n_block_rows () const;
+
+  /**
+   * Return the number of blocks in a row.
+   */
+  size_type n_block_cols () const;
+
+  /**
+   * Return whether the object is empty. It is empty if no memory is
+   * allocated, which is the same as that both dimensions are zero. This
+   * function is just the concatenation of the respective call to all sub-
+   * matrices.
+   */
+  bool empty () const;
+
+  /**
+   * Return the maximum number of entries per row. It returns the maximal
+   * number of entries per row accumulated over all blocks in a row, and the
+   * maximum over all rows.
+   */
+  size_type max_entries_per_row () const;
+
+  /**
+   * Add a nonzero entry to the matrix. This function may only be called for
+   * non-compressed sparsity patterns.
+   *
+   * If the entry already exists, nothing bad happens.
+   *
+   * This function simply finds out to which block <tt>(i,j)</tt> belongs and
+   * then relays to that block.
+   */
+  void add (const size_type i, const size_type j);
+
+  /**
+   * Add several nonzero entries to the specified matrix row.  This function
+   * may only be called for non-compressed sparsity patterns.
+   *
+   * If some of the entries already exist, nothing bad happens.
+   *
+   * This function simply finds out to which blocks <tt>(row,col)</tt> for
+   * <tt>col</tt> in the iterator range belong and then relays to those
+   * blocks.
+   */
+  template <typename ForwardIterator>
+  void add_entries (const size_type  row,
+                    ForwardIterator  begin,
+                    ForwardIterator  end,
+                    const bool       indices_are_sorted = false);
+
+  /**
+   * Return number of rows of this matrix, which equals the dimension of the
+   * image space. It is the sum of rows of the (block-)rows of sub-matrices.
+   */
+  size_type n_rows () const;
+
+  /**
+   * Return number of columns of this matrix, which equals the dimension of
+   * the range space. It is the sum of columns of the (block-)columns of sub-
+   * matrices.
+   */
+  size_type n_cols () const;
+
+  /**
+   * Check if a value at a certain position may be non-zero.
+   */
+  bool exists (const size_type i, const size_type j) const;
+
+  /**
+   * Number of entries in a specific row, added up over all the blocks that
+   * form this row.
+   */
+  unsigned int row_length (const size_type row) const;
+
+  /**
+   * Return the number of nonzero elements of this matrix. Actually, it
+   * returns the number of entries in the sparsity pattern; if any of the
+   * entries should happen to be zero, it is counted anyway.
+   *
+   * This function may only be called if the matrix struct is compressed. It
+   * does not make too much sense otherwise anyway.
+   *
+   * In the present context, it is the sum of the values as returned by the
+   * sub-objects.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Print the sparsity of the matrix. The output consists of one line per row
+   * of the format <tt>[i,j1,j2,j3,...]</tt>. <i>i</i> is the row number and
+   * <i>jn</i> are the allocated columns in this row.
+   */
+  void print (std::ostream &out) const;
+
+  /**
+   * Print the sparsity of the matrix in a format that <tt>gnuplot</tt>
+   * understands and which can be used to plot the sparsity pattern in a
+   * graphical way. This is the same functionality implemented for usual
+   * sparsity patterns, see
+   * @ref SparsityPattern.
+   */
+  void print_gnuplot (std::ostream &out) const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException4 (ExcIncompatibleRowNumbers,
+                  int, int, int, int,
+                  << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                  << arg3 << ',' << arg4 << "] have differing row numbers.");
+  /**
+   * Exception
+   */
+  DeclException4 (ExcIncompatibleColNumbers,
+                  int, int, int, int,
+                  << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                  << arg3 << ',' << arg4 << "] have differing column numbers.");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidConstructorCall);
+  //@}
+
+protected:
+
+  /**
+   * Number of block rows.
+   */
+  size_type rows;
+
+  /**
+   * Number of block columns.
+   */
+  size_type columns;
+
+  /**
+   * Array of sparsity patterns.
+   */
+  Table<2,SmartPointer<SparsityPatternType, BlockSparsityPatternBase<SparsityPatternType> > > sub_objects;
+
+  /**
+   * Object storing and managing the transformation of row indices to indices
+   * of the sub-objects.
+   */
+  BlockIndices    row_indices;
+
+  /**
+   * Object storing and managing the transformation of column indices to
+   * indices of the sub-objects.
+   */
+  BlockIndices    column_indices;
+
+private:
+  /**
+   * Temporary vector for counting the elements written into the individual
+   * blocks when doing a collective add or set.
+   */
+  std::vector<size_type > counter_within_block;
+
+  /**
+   * Temporary vector for column indices on each block when writing local to
+   * global data on each sparse matrix.
+   */
+  std::vector<std::vector<size_type > > block_column_indices;
+
+  /**
+   * Make the block sparse matrix a friend, so that it can use our
+   * #row_indices and #column_indices objects.
+   */
+  template <typename number>
+  friend class BlockSparseMatrix;
+};
+
+
+
+/**
+ * This class extends the base class to implement an array of sparsity
+ * patterns that can be used by block sparse matrix objects. It only adds a
+ * few additional member functions, but the main interface stems from the base
+ * class, see there for more information.
+ *
+ * This class is an example of the "static" type of
+ * @ref Sparsity.
+ *
+ * @author Wolfgang Bangerth, 2000, 2001
+ */
+class BlockSparsityPattern : public BlockSparsityPatternBase<SparsityPattern>
+{
+public:
+
+  /**
+   * Initialize the matrix empty, that is with no memory allocated. This is
+   * useful if you want such objects as member variables in other classes. You
+   * can make the structure usable by calling the reinit() function.
+   */
+  BlockSparsityPattern ();
+
+  /**
+   * Initialize the matrix with the given number of block rows and columns.
+   * The blocks themselves are still empty, and you have to call
+   * collect_sizes() after you assign them sizes.
+   */
+  BlockSparsityPattern (const size_type n_rows,
+                        const size_type n_columns);
+
+  /**
+   * Forwarding to BlockSparsityPatternBase::reinit().
+   */
+  void reinit (const size_type n_block_rows,
+               const size_type n_block_columns);
+
+  /**
+   * Initialize the pattern with two BlockIndices for the block structures of
+   * matrix rows and columns as well as a row length vector.
+   *
+   * The row length vector should be in the format produced by DoFTools.
+   * Alternatively, there is a simplified version, where each of the inner
+   * vectors has length one. Then, the corresponding entry is used as the
+   * maximal row length.
+   *
+   * For the diagonal blocks, the inner SparsityPattern is initialized with
+   * optimized diagonals, while this is not done for the off-diagonal blocks.
+   */
+  void reinit (const BlockIndices &row_indices,
+               const BlockIndices &col_indices,
+               const std::vector<std::vector<unsigned int> > &row_lengths);
+
+
+  /**
+   * Return whether the structure is compressed or not, i.e. whether all sub-
+   * matrices are compressed.
+   */
+  bool is_compressed () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Copy data from an object of type BlockDynamicSparsityPattern, i.e. resize
+   * this object to the size of the given argument, and copy over the contents
+   * of each of the subobjects. Previous content of this object is lost.
+   */
+  void copy_from (const BlockDynamicSparsityPattern &dsp);
+};
+
+
+
+/**
+ * This class extends the base class to implement an array of compressed
+ * sparsity patterns that can be used to initialize objects of type
+ * BlockSparsityPattern. It does not add additional member functions, but
+ * rather acts as a @p typedef to introduce the name of this class, without
+ * requiring the user to specify the templated name of the base class. For
+ * information on the interface of this class refer to the base class. The
+ * individual blocks are based on the CompressedSparsityPattern class.
+ *
+ * This class is an example of the "dynamic" type of
+ * @ref Sparsity.
+ *
+ * <h3>Example</h3>
+ *
+ * Usage of this class is very similar to DynamicSparsityPattern, but since
+ * the use of block indices causes some additional complications, we give a
+ * short example.
+ *
+ * @dontinclude block_dynamic_sparsity_pattern.cc
+ *
+ * After the the DoFHandler <tt>dof</tt> and the ConstraintMatrix
+ * <tt>constraints</tt> have been set up with a system element, we must count
+ * the degrees of freedom in each matrix block:
+ *
+ * @skipline dofs_per_block
+ * @until count
+ *
+ * Now, we are ready to set up the BlockDynamicSparsityPattern.
+ *
+ * @until collect
+ *
+ * It is filled as if it were a normal pattern
+ *
+ * @until condense
+ *
+ * In the end, it is copied to a normal BlockSparsityPattern for later use.
+ *
+ * @until copy
+ *
+ * @author Wolfgang Bangerth, 2000, 2001, Guido Kanschat, 2006, 2007
+ */
+
+class BlockDynamicSparsityPattern : public BlockSparsityPatternBase<DynamicSparsityPattern>
+{
+public:
+
+  /**
+   * Initialize the matrix empty, that is with no memory allocated. This is
+   * useful if you want such objects as member variables in other classes. You
+   * can make the structure usable by calling the reinit() function.
+   */
+  BlockDynamicSparsityPattern ();
+
+  /**
+   * Initialize the matrix with the given number of block rows and columns.
+   * The blocks themselves are still empty, and you have to call
+   * collect_sizes() after you assign them sizes.
+   */
+  BlockDynamicSparsityPattern (const size_type n_rows,
+                               const size_type n_columns);
+
+  /**
+   * Initialize the pattern with two BlockIndices for the block structures of
+   * matrix rows and columns. This function is equivalent to calling the
+   * previous constructor with the length of the two index vector and then
+   * entering the index values.
+   */
+  BlockDynamicSparsityPattern (const std::vector<size_type> &row_block_sizes,
+                               const std::vector<size_type> &col_block_sizes);
+
+  /**
+   * Initialize the pattern with symmetric blocks. The number of IndexSets in
+   * the vector determine the number of rows and columns of blocks. The size
+   * of each block is determined by the size() of the respective IndexSet.
+   * Each block only stores the rows given by the values in the IndexSet,
+   * which is useful for distributed memory parallel computations and usually
+   * corresponds to the locally owned DoFs.
+   */
+  BlockDynamicSparsityPattern (const std::vector<IndexSet> &partitioning);
+
+  /**
+   * Initialize the pattern with two BlockIndices for the block structures of
+   * matrix rows and columns.
+   */
+  BlockDynamicSparsityPattern (const BlockIndices &row_indices,
+                               const BlockIndices &col_indices);
+
+
+  /**
+   * Resize the pattern to a tensor product of matrices with dimensions
+   * defined by the arguments.
+   *
+   * The matrix will have as many block rows and columns as there are entries
+   * in the two arguments. The block at position (<i>i,j</i>) will have the
+   * dimensions <tt>row_block_sizes[i]</tt> times <tt>col_block_sizes[j]</tt>.
+   */
+  void reinit (const std::vector<size_type> &row_block_sizes,
+               const std::vector<size_type> &col_block_sizes);
+
+  /**
+   * Resize the pattern with symmetric blocks determined by the size() of each
+   * IndexSet. See the constructor taking a vector of IndexSets for details.
+   */
+  void reinit(const std::vector<IndexSet> &partitioning);
+
+  /**
+   * Resize the matrix to a tensor product of matrices with dimensions defined
+   * by the arguments. The two BlockIndices objects must be initialized and
+   * the sparsity pattern will have the same block structure afterwards.
+   */
+  void reinit (const BlockIndices &row_indices, const BlockIndices &col_indices);
+
+  /**
+   * Access to column number field. Return the column number of the @p index
+   * th entry in row @p row.
+   */
+  size_type column_number (const size_type row,
+                           const unsigned int index) const;
+
+  /**
+   * Allow the use of the reinit functions of the base class as well.
+   */
+  using BlockSparsityPatternBase<DynamicSparsityPattern>::reinit;
+};
+
+/**
+ * @deprecated Use  BlockDynamicSparsityPattern instead.
+ */
+typedef BlockDynamicSparsityPattern BlockCompressedSparsityPattern DEAL_II_DEPRECATED;
+
+/**
+ * @deprecated Use  BlockDynamicSparsityPattern instead.
+ */
+typedef BlockDynamicSparsityPattern BlockCompressedSetSparsityPattern DEAL_II_DEPRECATED;
+
+/**
+ * @deprecated Use  BlockDynamicSparsityPattern instead.
+ */
+typedef BlockDynamicSparsityPattern BlockCompressedSimpleSparsityPattern DEAL_II_DEPRECATED;
+
+
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+
+/**
+ * This class extends the base class to implement an array of Trilinos
+ * sparsity patterns that can be used to initialize Trilinos block sparse
+ * matrices that can be distributed among different processors. It is used in
+ * the same way as the BlockSparsityPattern except that it builds upon the
+ * TrilinosWrappers::SparsityPattern instead of the dealii::SparsityPattern.
+ * See the documentation of the BlockSparsityPattern for examples.
+ *
+ * This class is has properties of the "dynamic" type of
+ * @ref Sparsity
+ * (in the sense that it can extend the memory if too little elements were
+ * allocated), but otherwise is more like the basic deal.II SparsityPattern
+ * (in the sense that the method compress() needs to be called before the
+ * pattern can be used).
+ *
+ * This class is used in step-32.
+ *
+ * @author Martin Kronbichler, 2008, 2009
+ */
+namespace TrilinosWrappers
+{
+  class BlockSparsityPattern :
+    public dealii::BlockSparsityPatternBase<SparsityPattern>
+  {
+  public:
+
+    /**
+     * Initialize the matrix empty, that is with no memory allocated. This is
+     * useful if you want such objects as member variables in other classes.
+     * You can make the structure usable by calling the reinit() function.
+     */
+    BlockSparsityPattern ();
+
+    /**
+     * Initialize the matrix with the given number of block rows and columns.
+     * The blocks themselves are still empty, and you have to call
+     * collect_sizes() after you assign them sizes.
+     */
+    BlockSparsityPattern (const size_type n_rows,
+                          const size_type n_columns);
+
+    /**
+     * Initialize the pattern with two BlockIndices for the block structures
+     * of matrix rows and columns. This function is equivalent to calling the
+     * previous constructor with the length of the two index vector and then
+     * entering the index values.
+     */
+    BlockSparsityPattern (const std::vector<size_type> &row_block_sizes,
+                          const std::vector<size_type> &col_block_sizes);
+
+    /**
+     * Initialize the pattern with an array Epetra_Map that specifies both
+     * rows and columns of the matrix (so the final matrix will be a square
+     * matrix), where the Epetra_Map specifies the parallel distribution of
+     * the degrees of freedom on the individual block.  This function is
+     * equivalent to calling the second constructor with the length of the
+     * mapping vector and then entering the index values.
+     *
+     * @deprecated Use the respective method with IndexSet arguments instead.
+     */
+    BlockSparsityPattern (const std::vector<Epetra_Map> &parallel_partitioning) DEAL_II_DEPRECATED;
+
+    /**
+     * Initialize the pattern with an array of index sets that specifies both
+     * rows and columns of the matrix (so the final matrix will be a square
+     * matrix), where the size() of the IndexSets specifies the size of the
+     * blocks and the values in each IndexSet denotes the rows that are going
+     * to be saved in each block.
+     */
+    BlockSparsityPattern (const std::vector<IndexSet> &parallel_partitioning,
+                          const MPI_Comm &communicator = MPI_COMM_WORLD);
+
+    /**
+     * Initialize the pattern with two arrays of index sets that specify rows
+     * and columns of the matrix, where the size() of the IndexSets specifies
+     * the size of the blocks and the values in each IndexSet denotes the rows
+     * that are going to be saved in each block. The additional index set
+     * writable_rows is used to set all rows that we allow to write locally.
+     * This constructor is used to create matrices that allow several threads
+     * to write simultaneously into the matrix (to different rows, of course),
+     * see the method TrilinosWrappers::SparsityPattern::reinit method with
+     * three index set arguments for more details.
+     */
+    BlockSparsityPattern (const std::vector<IndexSet> &row_parallel_partitioning,
+                          const std::vector<IndexSet> &column_parallel_partitioning,
+                          const std::vector<IndexSet> &writeable_rows,
+                          const MPI_Comm              &communicator = MPI_COMM_WORLD);
+
+    /**
+     * Resize the matrix to a tensor product of matrices with dimensions
+     * defined by the arguments.
+     *
+     * The matrix will have as many block rows and columns as there are
+     * entries in the two arguments. The block at position (<i>i,j</i>) will
+     * have the dimensions <tt>row_block_sizes[i]</tt> times
+     * <tt>col_block_sizes[j]</tt>.
+     */
+    void reinit (const std::vector<size_type> &row_block_sizes,
+                 const std::vector<size_type> &col_block_sizes);
+
+    /**
+     * Resize the matrix to a square tensor product of matrices with parallel
+     * distribution according to the specifications in the array of
+     * Epetra_Maps.
+     *
+     * @deprecated Use the respective method with IndexSet arguments instead.
+     */
+    void reinit (const std::vector<Epetra_Map> &parallel_partitioning) DEAL_II_DEPRECATED;
+
+    /**
+     * Resize the matrix to a square tensor product of matrices. See the
+     * constructor that takes a vector of IndexSets for details.
+     */
+    void reinit (const std::vector<IndexSet> &parallel_partitioning,
+                 const MPI_Comm              &communicator = MPI_COMM_WORLD);
+
+    /**
+     * Resize the matrix to a rectangular block matrices. This method allows
+     * rows and columns to be different, both in the outer block structure and
+     * within the blocks.
+     */
+    void reinit (const std::vector<IndexSet> &row_parallel_partitioning,
+                 const std::vector<IndexSet> &column_parallel_partitioning,
+                 const MPI_Comm              &communicator = MPI_COMM_WORLD);
+
+    /**
+     * Resize the matrix to a rectangular block matrices that furthermore
+     * explicitly specify the writable rows in each of the blocks. This method
+     * is used to create matrices that allow several threads to write
+     * simultaneously into the matrix (to different rows, of course), see the
+     * method TrilinosWrappers::SparsityPattern::reinit method with three
+     * index set arguments for more details.
+     */
+    void reinit (const std::vector<IndexSet> &row_parallel_partitioning,
+                 const std::vector<IndexSet> &column_parallel_partitioning,
+                 const std::vector<IndexSet> &writeable_rows,
+                 const MPI_Comm              &communicator = MPI_COMM_WORLD);
+
+    /**
+     * Allow the use of the reinit functions of the base class as well.
+     */
+    using BlockSparsityPatternBase<SparsityPattern>::reinit;
+  };
+}
+
+#endif
+
+
+/*@}*/
+/*---------------------- Template functions -----------------------------------*/
+
+
+
+template <typename SparsityPatternType>
+inline
+SparsityPatternType &
+BlockSparsityPatternBase<SparsityPatternType>::block (const size_type row,
+                                                      const size_type column)
+{
+  Assert (row<rows, ExcIndexRange(row,0,rows));
+  Assert (column<columns, ExcIndexRange(column,0,columns));
+  return *sub_objects[row][column];
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+const SparsityPatternType &
+BlockSparsityPatternBase<SparsityPatternType>::block (const size_type row,
+                                                      const size_type column) const
+{
+  Assert (row<rows, ExcIndexRange(row,0,rows));
+  Assert (column<columns, ExcIndexRange(column,0,columns));
+  return *sub_objects[row][column];
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+const BlockIndices &
+BlockSparsityPatternBase<SparsityPatternType>::get_row_indices () const
+{
+  return row_indices;
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+const BlockIndices &
+BlockSparsityPatternBase<SparsityPatternType>::get_column_indices () const
+{
+  return column_indices;
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+void
+BlockSparsityPatternBase<SparsityPatternType>::add (const size_type i,
+                                                    const size_type j)
+{
+  // if you get an error here, are
+  // you sure you called
+  // <tt>collect_sizes()</tt> before?
+  const std::pair<size_type,size_type>
+  row_index = row_indices.global_to_local (i),
+  col_index = column_indices.global_to_local (j);
+  sub_objects[row_index.first][col_index.first]->add (row_index.second,
+                                                      col_index.second);
+}
+
+
+
+template <typename SparsityPatternType>
+template <typename ForwardIterator>
+void
+BlockSparsityPatternBase<SparsityPatternType>::add_entries (const size_type row,
+                                                            ForwardIterator begin,
+                                                            ForwardIterator end,
+                                                            const bool      indices_are_sorted)
+{
+  // Resize scratch arrays
+  if (block_column_indices.size() < this->n_block_cols())
+    {
+      block_column_indices.resize (this->n_block_cols());
+      counter_within_block.resize (this->n_block_cols());
+    }
+
+  const size_type n_cols = static_cast<size_type>(end - begin);
+
+  // Resize sub-arrays to n_cols. This
+  // is a bit wasteful, but we resize
+  // only a few times (then the maximum
+  // row length won't increase that
+  // much any more). At least we know
+  // that all arrays are going to be of
+  // the same size, so we can check
+  // whether the size of one is large
+  // enough before actually going
+  // through all of them.
+  if (block_column_indices[0].size() < n_cols)
+    for (size_type i=0; i<this->n_block_cols(); ++i)
+      block_column_indices[i].resize(n_cols);
+
+  // Reset the number of added elements
+  // in each block to zero.
+  for (size_type i=0; i<this->n_block_cols(); ++i)
+    counter_within_block[i] = 0;
+
+  // Go through the column indices to
+  // find out which portions of the
+  // values should be set in which
+  // block of the matrix. We need to
+  // touch all the data, since we can't
+  // be sure that the data of one block
+  // is stored contiguously (in fact,
+  // indices will be intermixed when it
+  // comes from an element matrix).
+  for (ForwardIterator it = begin; it != end; ++it)
+    {
+      const size_type col = *it;
+
+      const std::pair<size_type , size_type>
+      col_index = this->column_indices.global_to_local(col);
+
+      const size_type local_index = counter_within_block[col_index.first]++;
+
+      block_column_indices[col_index.first][local_index] = col_index.second;
+    }
+
+#ifdef DEBUG
+  // If in debug mode, do a check whether
+  // the right length has been obtained.
+  size_type length = 0;
+  for (size_type i=0; i<this->n_block_cols(); ++i)
+    length += counter_within_block[i];
+  Assert (length == n_cols, ExcInternalError());
+#endif
+
+  // Now we found out about where the
+  // individual columns should start and
+  // where we should start reading out
+  // data. Now let's write the data into
+  // the individual blocks!
+  const std::pair<size_type , size_type>
+  row_index = this->row_indices.global_to_local (row);
+  for (size_type block_col=0; block_col<n_block_cols(); ++block_col)
+    {
+      if (counter_within_block[block_col] == 0)
+        continue;
+      sub_objects[row_index.first][block_col]->
+      add_entries (row_index.second,
+                   block_column_indices[block_col].begin(),
+                   block_column_indices[block_col].begin()+counter_within_block[block_col],
+                   indices_are_sorted);
+    }
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+bool
+BlockSparsityPatternBase<SparsityPatternType>::exists (const size_type i,
+                                                       const size_type j) const
+{
+  // if you get an error here, are
+  // you sure you called
+  // <tt>collect_sizes()</tt> before?
+  const std::pair<size_type , size_type>
+  row_index = row_indices.global_to_local (i),
+  col_index = column_indices.global_to_local (j);
+  return sub_objects[row_index.first][col_index.first]->exists (row_index.second,
+         col_index.second);
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+unsigned int
+BlockSparsityPatternBase<SparsityPatternType>::
+row_length (const size_type row) const
+{
+  const std::pair<size_type , size_type>
+  row_index = row_indices.global_to_local (row);
+
+  unsigned int c = 0;
+
+  for (size_type b=0; b<rows; ++b)
+    c += sub_objects[row_index.first][b]->row_length (row_index.second);
+
+  return c;
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+typename BlockSparsityPatternBase<SparsityPatternType>::size_type
+BlockSparsityPatternBase<SparsityPatternType>::n_block_cols () const
+{
+  return columns;
+}
+
+
+
+template <typename SparsityPatternType>
+inline
+typename BlockSparsityPatternBase<SparsityPatternType>::size_type
+BlockSparsityPatternBase<SparsityPatternType>::n_block_rows () const
+{
+  return rows;
+}
+
+
+inline
+BlockDynamicSparsityPattern::size_type
+BlockDynamicSparsityPattern::column_number (const size_type row,
+                                            const unsigned int index) const
+{
+  // .first= ith block, .second = jth row in that block
+  const std::pair<size_type ,size_type >
+  row_index = row_indices.global_to_local (row);
+
+  Assert(index<row_length(row), ExcIndexRange(index, 0, row_length(row)));
+
+  size_type c = 0;
+  size_type block_columns = 0; //sum of n_cols for all blocks to the left
+  for (unsigned int b=0; b<columns; ++b)
+    {
+      unsigned int rowlen = sub_objects[row_index.first][b]->row_length (row_index.second);
+      if (index<c+rowlen)
+        return block_columns+sub_objects[row_index.first][b]->column_number(row_index.second, index-c);
+      c += rowlen;
+      block_columns += sub_objects[row_index.first][b]->n_cols();
+    }
+
+  Assert(false, ExcInternalError());
+  return 0;
+}
+
+
+inline
+void
+BlockSparsityPattern::reinit (
+  const size_type n_block_rows,
+  const size_type n_block_columns)
+{
+  BlockSparsityPatternBase<SparsityPattern>::reinit (
+    n_block_rows, n_block_columns);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/block_vector.h b/include/deal.II/lac/block_vector.h
new file mode 100644
index 0000000..d4a47a6
--- /dev/null
+++ b/include/deal.II/lac/block_vector.h
@@ -0,0 +1,537 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_vector_h
+#define dealii__block_vector_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/lac/block_vector_base.h>
+
+#include <cstdio>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+namespace TrilinosWrappers
+{
+  class Vector;
+  class BlockVector;
+}
+#endif
+
+
+/*! @addtogroup Vectors
+ *@{
+ */
+
+
+/**
+ * An implementation of block vectors based on deal.II vectors. While the base
+ * class provides for most of the interface, this class handles the actual
+ * allocation of vectors and provides functions that are specific to the
+ * underlying vector type.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, Guido Kanschat, 1999, 2000, 2001, 2002, 2004
+ */
+template <typename Number>
+class BlockVector : public BlockVectorBase<Vector<Number> >
+{
+public:
+  /**
+   * Typedef the base class for simpler access to its own typedefs.
+   */
+  typedef BlockVectorBase<Vector<Number> > BaseClass;
+
+  /**
+   * Typedef the type of the underlying vector.
+   */
+  typedef typename BaseClass::BlockType  BlockType;
+
+  /**
+   * Import the typedefs from the base class.
+   */
+  typedef typename BaseClass::value_type      value_type;
+  typedef typename BaseClass::real_type       real_type;
+  typedef typename BaseClass::pointer         pointer;
+  typedef typename BaseClass::const_pointer   const_pointer;
+  typedef typename BaseClass::reference       reference;
+  typedef typename BaseClass::const_reference const_reference;
+  typedef typename BaseClass::size_type       size_type;
+  typedef typename BaseClass::iterator        iterator;
+  typedef typename BaseClass::const_iterator  const_iterator;
+
+  /**
+   * Constructor. There are three ways to use this constructor. First, without
+   * any arguments, it generates an object with no blocks. Given one argument,
+   * it initializes <tt>n_blocks</tt> blocks, but these blocks have size zero.
+   * The third variant finally initializes all blocks to the same size
+   * <tt>block_size</tt>.
+   *
+   * Confer the other constructor further down if you intend to use blocks of
+   * different sizes.
+   */
+  explicit BlockVector (const unsigned int n_blocks = 0,
+                        const size_type block_size = 0);
+
+  /**
+   * Copy Constructor. Dimension set to that of @p v, all components are
+   * copied from @p v.
+   */
+  BlockVector (const BlockVector<Number> &V);
+
+
+#ifdef DEAL_II_WITH_CXX11
+  /**
+   * Move constructor. Creates a new vector by stealing the internal data of
+   * the vector @p v.
+   *
+   * @note This constructor is only available if deal.II is configured with
+   * C++11 support.
+   */
+  BlockVector (BlockVector<Number> &&v);
+#endif
+
+
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+  /**
+   * Copy constructor taking a BlockVector of another data type. This will
+   * fail if there is no conversion path from <tt>OtherNumber</tt> to
+   * <tt>Number</tt>. Note that you may lose accuracy when copying to a
+   * BlockVector with data elements with less accuracy.
+   *
+   * Older versions of gcc did not honor the @p explicit keyword on template
+   * constructors. In such cases, it is easy to accidentally write code that
+   * can be very inefficient, since the compiler starts performing hidden
+   * conversions. To avoid this, this function is disabled if we have detected
+   * a broken compiler during configuration.
+   */
+  template <typename OtherNumber>
+  explicit
+  BlockVector (const BlockVector<OtherNumber> &v);
+#endif
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * A copy constructor taking a (parallel) Trilinos block vector and copying
+   * it into the deal.II own format.
+   */
+  BlockVector (const TrilinosWrappers::BlockVector &v);
+
+#endif
+  /**
+   * Constructor. Set the number of blocks to <tt>block_sizes.size()</tt> and
+   * initialize each block with <tt>block_sizes[i]</tt> zero elements.
+   */
+  BlockVector (const std::vector<size_type> &block_sizes);
+
+  /**
+   * Constructor. Initialize vector to the structure found in the BlockIndices
+   * argument.
+   */
+  BlockVector (const BlockIndices &block_indices);
+
+  /**
+   * Constructor. Set the number of blocks to <tt>block_sizes.size()</tt>.
+   * Initialize the vector with the elements pointed to by the range of
+   * iterators given as second and third argument. Apart from the first
+   * argument, this constructor is in complete analogy to the respective
+   * constructor of the <tt>std::vector</tt> class, but the first argument is
+   * needed in order to know how to subdivide the block vector into different
+   * blocks.
+   */
+  template <typename InputIterator>
+  BlockVector (const std::vector<size_type>    &block_sizes,
+               const InputIterator              first,
+               const InputIterator              end);
+
+  /**
+   * Destructor. Clears memory
+   */
+  ~BlockVector ();
+
+  /**
+   * Call the compress() function on all the subblocks.
+   *
+   * This functionality only needs to be called if using MPI based vectors and
+   * exists in other objects for compatibility.
+   *
+   * See
+   * @ref GlossCompress "Compressing distributed objects"
+   * for more information.
+   */
+  void compress (::dealii::VectorOperation::values operation
+                 =::dealii::VectorOperation::unknown);
+
+  /**
+   * Copy operator: fill all components of the vector with the given scalar
+   * value.
+   */
+  BlockVector &operator= (const value_type s);
+
+  /**
+   * Copy operator for arguments of the same type. Resize the present vector
+   * if necessary.
+   */
+  BlockVector<Number> &
+  operator= (const BlockVector<Number> &v);
+
+#ifdef DEAL_II_WITH_CXX11
+  /**
+   * Move the given vector. This operator replaces the present vector with @p
+   * v by efficiently swapping the internal data structures.
+   *
+   * @note This operator is only available if deal.II is configured with C++11
+   * support.
+   */
+  BlockVector<Number> &operator= (BlockVector<Number> &&v);
+#endif
+
+  /**
+   * Copy operator for template arguments of different types. Resize the
+   * present vector if necessary.
+   */
+  template <class Number2>
+  BlockVector<Number> &
+  operator= (const BlockVector<Number2> &V);
+
+  /**
+   * Copy a regular vector into a block vector.
+   */
+  BlockVector<Number> &
+  operator= (const Vector<Number> &V);
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * A copy constructor from a Trilinos block vector to a deal.II block
+   * vector.
+   */
+  BlockVector<Number> &
+  operator= (const TrilinosWrappers::BlockVector &V);
+#endif
+
+  /**
+   * Reinitialize the BlockVector to contain <tt>n_blocks</tt> blocks of size
+   * <tt>block_size</tt> each.
+   *
+   * If the second argument is left at its default value, then the block
+   * vector allocates the specified number of blocks but leaves them at zero
+   * size. You then need to later reinitialize the individual blocks, and call
+   * collect_sizes() to update the block system's knowledge of its individual
+   * block's sizes.
+   *
+   * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with zeros.
+   */
+  void reinit (const unsigned int n_blocks,
+               const size_type block_size = 0,
+               const bool omit_zeroing_entries = false);
+
+  /**
+   * Reinitialize the BlockVector such that it contains
+   * <tt>block_sizes.size()</tt> blocks. Each block is reinitialized to
+   * dimension <tt>block_sizes[i]</tt>.
+   *
+   * If the number of blocks is the same as before this function was called,
+   * all vectors remain the same and reinit() is called for each vector.
+   *
+   * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with zeros.
+   *
+   * Note that you must call this (or the other reinit() functions) function,
+   * rather than calling the reinit() functions of an individual block, to
+   * allow the block vector to update its caches of vector sizes. If you call
+   * reinit() on one of the blocks, then subsequent actions on this object may
+   * yield unpredictable results since they may be routed to the wrong block.
+   */
+  void reinit (const std::vector<size_type> &block_sizes,
+               const bool                    omit_zeroing_entries=false);
+
+  /**
+   * Reinitialize the BlockVector to reflect the structure found in
+   * BlockIndices.
+   *
+   * If the number of blocks is the same as before this function was called,
+   * all vectors remain the same and reinit() is called for each vector.
+   *
+   * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with zeros.
+   */
+  void reinit (const BlockIndices &block_indices,
+               const bool omit_zeroing_entries=false);
+
+  /**
+   * Change the dimension to that of the vector <tt>V</tt>. The same applies
+   * as for the other reinit() function.
+   *
+   * The elements of <tt>V</tt> are not copied, i.e.  this function is the
+   * same as calling <tt>reinit (V.size(), omit_zeroing_entries)</tt>.
+   *
+   * Note that you must call this (or the other reinit() functions) function,
+   * rather than calling the reinit() functions of an individual block, to
+   * allow the block vector to update its caches of vector sizes. If you call
+   * reinit() of one of the blocks, then subsequent actions of this object may
+   * yield unpredictable results since they may be routed to the wrong block.
+   */
+  template <typename Number2>
+  void reinit (const BlockVector<Number2> &V,
+               const bool                 omit_zeroing_entries=false);
+
+  /**
+   * Multiply each element of this vector by the corresponding element of
+   * <tt>v</tt>.
+   */
+  template <class BlockVector2>
+  void scale (const BlockVector2 &v);
+
+  /**
+   * Swap the contents of this vector and the other vector <tt>v</tt>. One
+   * could do this operation with a temporary variable and copying over the
+   * data elements, but this function is significantly more efficient since it
+   * only swaps the pointers to the data of the two vectors and therefore does
+   * not need to allocate temporary storage and move data around.
+   *
+   * This function is analog to the the swap() function of all C++ standard
+   * containers. Also, there is a global function swap(u,v) that simply calls
+   * <tt>u.swap(v)</tt>, again in analogy to standard functions.
+   */
+  void swap (BlockVector<Number> &v);
+
+  /**
+   * Output of vector in user-defined format.
+   *
+   * This function is deprecated.
+   */
+  void print (const char *format = 0) const DEAL_II_DEPRECATED;
+
+  /**
+   * Print to a stream.
+   */
+  void print (std::ostream       &out,
+              const unsigned int  precision = 3,
+              const bool          scientific = true,
+              const bool          across = true) const;
+
+  /**
+   * Write the vector en bloc to a stream. This is done in a binary mode, so
+   * the output is neither readable by humans nor (probably) by other
+   * computers using a different operating system or number format.
+   */
+  void block_write (std::ostream &out) const;
+
+  /**
+   * Read a vector en block from a file. This is done using the inverse
+   * operations to the above function, so it is reasonably fast because the
+   * bitstream is not interpreted.
+   *
+   * The vector is resized if necessary.
+   *
+   * A primitive form of error checking is performed which will recognize the
+   * bluntest attempts to interpret some data as a vector stored bitwise to a
+   * file, but not more.
+   */
+  void block_read (std::istream &in);
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+  //@}
+};
+
+/*@}*/
+
+#ifndef DOXYGEN
+/*----------------------- Inline functions ----------------------------------*/
+
+
+
+template <typename Number>
+template <typename InputIterator>
+BlockVector<Number>::BlockVector (const std::vector<size_type>    &block_sizes,
+                                  const InputIterator              first,
+                                  const InputIterator              end)
+{
+  // first set sizes of blocks, but
+  // don't initialize them as we will
+  // copy elements soon
+  (void)end;
+  reinit (block_sizes, true);
+  InputIterator start = first;
+  for (size_type b=0; b<block_sizes.size(); ++b)
+    {
+      InputIterator end = start;
+      std::advance (end, static_cast<signed int>(block_sizes[b]));
+      std::copy (start, end, this->block(b).begin());
+      start = end;
+    };
+  Assert (start == end, ExcIteratorRangeDoesNotMatchVectorSize());
+}
+
+
+
+template <typename Number>
+inline
+BlockVector<Number> &
+BlockVector<Number>::operator= (const value_type s)
+{
+
+  AssertIsFinite(s);
+
+  BaseClass::operator= (s);
+  return *this;
+}
+
+
+
+template <typename Number>
+inline
+BlockVector<Number> &
+BlockVector<Number>::operator= (const BlockVector<Number> &v)
+{
+  reinit (v, true);
+  BaseClass::operator= (v);
+  return *this;
+}
+
+
+
+#ifdef DEAL_II_WITH_CXX11
+template <typename Number>
+inline
+BlockVector<Number> &
+BlockVector<Number>::operator= (BlockVector<Number> &&v)
+{
+  swap(v);
+
+  return *this;
+}
+#endif
+
+
+
+template <typename Number>
+inline
+BlockVector<Number> &
+BlockVector<Number>::operator= (const Vector<Number> &v)
+{
+  BaseClass::operator= (v);
+  return *this;
+}
+
+
+
+template <typename Number>
+template <typename Number2>
+inline
+BlockVector<Number> &
+BlockVector<Number>::operator= (const BlockVector<Number2> &v)
+{
+  reinit (v, true);
+  BaseClass::operator= (v);
+  return *this;
+}
+
+template <typename Number>
+inline
+void BlockVector<Number>::compress (::dealii::VectorOperation::values operation)
+{
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->components[i].compress(operation);
+}
+
+
+
+template <typename Number>
+template <class BlockVector2>
+void BlockVector<Number>::scale (const BlockVector2 &v)
+{
+  BaseClass::scale (v);
+}
+
+#endif // DOXYGEN
+
+
+/**
+ * Global function which overloads the default implementation of the C++
+ * standard library which uses a temporary object. The function simply
+ * exchanges the data of the two vectors.
+ *
+ * @relates BlockVector
+ * @author Wolfgang Bangerth, 2000
+ */
+template <typename Number>
+inline
+void swap (BlockVector<Number> &u,
+           BlockVector<Number> &v)
+{
+  u.swap (v);
+}
+
+
+namespace internal
+{
+  namespace LinearOperator
+  {
+    template <typename> class ReinitHelper;
+
+    /**
+     * A helper class internally used in linear_operator.h. Specialization for
+     * BlockVector<number>.
+     */
+    template<typename number>
+    class ReinitHelper<BlockVector<number> >
+    {
+    public:
+      template <typename Matrix>
+      static
+      void reinit_range_vector (const Matrix &matrix,
+                                BlockVector<number> &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.get_row_indices(), omit_zeroing_entries);
+      }
+
+      template <typename Matrix>
+      static
+      void reinit_domain_vector(const Matrix &matrix,
+                                BlockVector<number> &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.get_column_indices(), omit_zeroing_entries);
+      }
+    };
+
+  } /* namespace LinearOperator */
+} /* namespace internal */
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/block_vector.templates.h b/include/deal.II/lac/block_vector.templates.h
new file mode 100644
index 0000000..0f1fcbe
--- /dev/null
+++ b/include/deal.II/lac/block_vector.templates.h
@@ -0,0 +1,221 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_vector_templates_h
+#define dealii__block_vector_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <cmath>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename Number>
+BlockVector<Number>::BlockVector (const unsigned int n_blocks,
+                                  const size_type block_size)
+{
+  reinit (n_blocks, block_size);
+}
+
+
+
+template <typename Number>
+BlockVector<Number>::BlockVector (const std::vector<size_type> &block_sizes)
+{
+  reinit (block_sizes, false);
+}
+
+
+template <typename Number>
+BlockVector<Number>::BlockVector (const BlockIndices &n)
+{
+  reinit (n, false);
+}
+
+
+template <typename Number>
+BlockVector<Number>::BlockVector (const BlockVector<Number> &v)
+  :
+  BlockVectorBase<Vector<Number> > ()
+{
+  this->components.resize (v.n_blocks());
+  this->block_indices = v.block_indices;
+
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->components[i] = v.components[i];
+}
+
+
+#ifdef DEAL_II_WITH_CXX11
+template <typename Number>
+BlockVector<Number>::BlockVector (BlockVector<Number> &&v)
+  :
+  BlockVectorBase<Vector<Number> > ()
+{
+  swap(v);
+}
+#endif
+
+
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+
+template <typename Number>
+template <typename OtherNumber>
+BlockVector<Number>::BlockVector (const BlockVector<OtherNumber> &v)
+{
+  reinit (v, true);
+  *this = v;
+}
+
+#endif
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+template <typename Number>
+BlockVector<Number>::BlockVector (const TrilinosWrappers::BlockVector &v)
+{
+  this->block_indices = v.get_block_indices();
+  this->components.resize(this->n_blocks());
+
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->components[i] = v.block(i);
+
+  BaseClass::collect_sizes();
+}
+
+#endif
+
+
+template <typename Number>
+void BlockVector<Number>::reinit (const unsigned int n_blocks,
+                                  const size_type    block_size,
+                                  const bool         omit_zeroing_entries)
+{
+  std::vector<size_type> block_sizes(n_blocks, block_size);
+  reinit(block_sizes, omit_zeroing_entries);
+}
+
+
+template <typename Number>
+void BlockVector<Number>::reinit (const std::vector<size_type> &block_sizes,
+                                  const bool                    omit_zeroing_entries)
+{
+  this->block_indices.reinit (block_sizes);
+  if (this->components.size() != this->n_blocks())
+    this->components.resize(this->n_blocks());
+
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->components[i].reinit(block_sizes[i], omit_zeroing_entries);
+}
+
+
+template <typename Number>
+void BlockVector<Number>::reinit (
+  const BlockIndices &n,
+  const bool omit_zeroing_entries)
+{
+  this->block_indices = n;
+  if (this->components.size() != this->n_blocks())
+    this->components.resize(this->n_blocks());
+
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->components[i].reinit(n.block_size(i), omit_zeroing_entries);
+}
+
+
+template <typename Number>
+template <typename Number2>
+void BlockVector<Number>::reinit (const BlockVector<Number2> &v,
+                                  const bool omit_zeroing_entries)
+{
+  this->block_indices = v.get_block_indices();
+  if (this->components.size() != this->n_blocks())
+    this->components.resize(this->n_blocks());
+
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->block(i).reinit(v.block(i), omit_zeroing_entries);
+}
+
+
+template <typename Number>
+BlockVector<Number>::~BlockVector ()
+{}
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+template <typename Number>
+inline
+BlockVector<Number> &
+BlockVector<Number>::operator= (const TrilinosWrappers::BlockVector &v)
+{
+  BaseClass::operator= (v);
+  return *this;
+}
+#endif
+
+
+template <typename Number>
+void BlockVector<Number>::swap (BlockVector<Number> &v)
+{
+  std::swap(this->components, v.components);
+
+  dealii::swap (this->block_indices, v.block_indices);
+}
+
+
+
+template <typename Number>
+void BlockVector<Number>::print (std::ostream       &out,
+                                 const unsigned int  precision,
+                                 const bool          scientific,
+                                 const bool          across) const
+{
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    {
+      if (across)
+        out << 'C' << i << ':';
+      else
+        out << "Component " << i << std::endl;
+      this->components[i].print(out, precision, scientific, across);
+    }
+}
+
+
+
+template <typename Number>
+void BlockVector<Number>::block_write (std::ostream &out) const
+{
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->components[i].block_write(out);
+}
+
+
+
+template <typename Number>
+void BlockVector<Number>::block_read (std::istream &in)
+{
+  for (size_type i=0; i<this->n_blocks(); ++i)
+    this->components[i].block_read(in);
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/block_vector_base.h b/include/deal.II/lac/block_vector_base.h
new file mode 100644
index 0000000..6aed46a
--- /dev/null
+++ b/include/deal.II/lac/block_vector_base.h
@@ -0,0 +1,2243 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__block_vector_base_h
+#define dealii__block_vector_base_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/exceptions.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/lac/vector.h>
+#include <vector>
+#include <iterator>
+#include <cmath>
+#include <cstddef>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*! @addtogroup Vectors
+ *@{
+ */
+
+template <typename> class BlockVectorBase;
+
+
+/**
+ * A class that can be used to determine whether a given type is a block
+ * vector type or not. For example,
+ * @code
+ *   IsBlockVector<Vector<double> >::value
+ * @endcode
+ * has the value false, whereas
+ * @code
+ *   IsBlockVector<BlockVector<double> >::value
+ * @endcode
+ * is true. This is sometimes useful in template contexts where we may want to
+ * do things differently depending on whether a template type denotes a
+ * regular or a block vector type.
+ *
+ * @author Wolfgang Bangerth, 2010
+ */
+template <typename VectorType>
+struct IsBlockVector
+{
+private:
+  struct yes_type
+  {
+    char c[1];
+  };
+  struct no_type
+  {
+    char c[2];
+  };
+
+  /**
+   * Overload returning true if the class is derived from BlockVectorBase,
+   * which is what block vectors do.
+   */
+  template <typename T>
+  static yes_type check_for_block_vector (const BlockVectorBase<T> *);
+
+  /**
+   * Catch all for all other potential vector types that are not block
+   * matrices.
+   */
+  static no_type check_for_block_vector (...);
+
+public:
+  /**
+   * A statically computable value that indicates whether the template
+   * argument to this class is a block vector (in fact whether the type is
+   * derived from BlockVectorBase<T>).
+   */
+  static const bool value = (sizeof(check_for_block_vector
+                                    ((VectorType *)0))
+                             ==
+                             sizeof(yes_type));
+};
+
+
+// instantiation of the static member
+template <typename VectorType>
+const bool IsBlockVector<VectorType>::value;
+
+
+
+
+namespace internal
+{
+
+  /**
+   * Namespace in which iterators in block vectors are implemented.
+   *
+   * @author Wolfgang Bangerth, 2001
+   */
+  namespace BlockVectorIterators
+  {
+    /**
+     * Declaration of the general template of a structure which is used to
+     * determine some types based on the template arguments of other classes.
+     */
+    template <class BlockVectorType, bool Constness>
+    struct Types
+    {
+    };
+
+
+
+    /**
+     * Declaration of a specialized template of a structure which is used to
+     * determine some types based on the template arguments of other classes.
+     *
+     * This is for the use of non-const iterators.
+     */
+    template <class BlockVectorType>
+    struct Types<BlockVectorType,false>
+    {
+      /**
+       * Type of the vector underlying the block vector used in non-const
+       * iterators. There, the vector must not be constant.
+       */
+      typedef typename BlockVectorType::BlockType Vector;
+
+      /**
+       * Type of the block vector used in non-const iterators. There, the
+       * block vector must not be constant.
+       */
+      typedef BlockVectorType BlockVector;
+
+      /**
+       * Type of the numbers we point to. Here, they are not constant.
+       */
+      typedef typename BlockVector::value_type value_type;
+
+      /**
+       * Typedef the result of a dereferencing operation for an iterator of
+       * the underlying iterator.
+       */
+      typedef typename Vector::reference dereference_type;
+    };
+
+
+
+    /**
+     * Declaration of a specialized template of a structure which is used to
+     * determine some types based on the template arguments of other classes.
+     *
+     * This is for the use of const_iterator.
+     */
+    template <class BlockVectorType>
+    struct Types<BlockVectorType,true>
+    {
+      /**
+       * Type of the vector underlying the block vector used in
+       * const_iterator. There, the vector must be constant.
+       */
+      typedef const typename BlockVectorType::BlockType Vector;
+
+      /**
+       * Type of the block vector used in const_iterator. There, the block
+       * vector must be constant.
+       */
+      typedef const BlockVectorType BlockVector;
+
+      /**
+       * Type of the numbers we point to. Here, they are constant since the
+       * block vector we use is constant.
+       */
+      typedef const typename BlockVector::value_type value_type;
+
+      /**
+       * Typedef the result of a dereferencing operation for an iterator of
+       * the underlying iterator. Since this is for constant iterators, we can
+       * only return values, not actual references.
+       */
+      typedef value_type dereference_type;
+    };
+
+
+    /**
+     * General random-access iterator class for block vectors. Since we do not
+     * want to have two classes for non-const iterator and const_iterator, we
+     * take a second template argument which denotes whether the vector we
+     * point into is a constant object or not. The first template argument is
+     * always the number type of the block vector in use.
+     *
+     * This class satisfies all requirements of random access iterators
+     * defined in the C++ standard. Operations on these iterators are constant
+     * in the number of elements in the block vector. However, they are
+     * sometimes linear in the number of blocks in the vector, but since that
+     * does rarely change dynamically within an application, this is a
+     * constant and we again have that the iterator satisfies the requirements
+     * of a random access iterator.
+     *
+     * @author Wolfgang Bangerth, 2001
+     */
+    template <class BlockVectorType, bool Constness>
+    class Iterator :
+      public std::iterator<std::random_access_iterator_tag,
+      typename Types<BlockVectorType,Constness>::value_type>
+    {
+    public:
+      /**
+       * Declare the type for container size.
+       */
+      typedef types::global_dof_index size_type;
+
+      /**
+       * Type of the number this iterator points to. Depending on the value of
+       * the second template parameter, this is either a constant or non-const
+       * number.
+       */
+      typedef
+      typename Types<BlockVectorType,Constness>::value_type
+      value_type;
+
+      /**
+       * Declare some typedefs which are standard for iterators and are used
+       * by algorithms to enquire about the specifics of the iterators they
+       * work on.
+       */
+      typedef std::random_access_iterator_tag               iterator_type;
+      typedef std::ptrdiff_t                                difference_type;
+      typedef typename BlockVectorType::reference           reference;
+      typedef value_type                                   *pointer;
+
+      typedef
+      typename Types<BlockVectorType,Constness>::dereference_type
+      dereference_type;
+
+      /**
+       * Typedef the type of the block vector (which differs in constness,
+       * depending on the second template parameter).
+       */
+      typedef
+      typename Types<BlockVectorType,Constness>::BlockVector
+      BlockVector;
+
+      /**
+       * Construct an iterator from a vector to which we point and the global
+       * index of the element pointed to.
+       *
+       * Depending on the value of the <tt>Constness</tt> template argument of
+       * this class, the first argument of this constructor is either is a
+       * const or non-const reference.
+       */
+      Iterator (BlockVector     &parent,
+                const size_type  global_index);
+
+      /**
+       * Copy constructor from an iterator of different constness.
+       *
+       * @note Constructing a non-const iterator from a const iterator does
+       * not make sense. If deal.II was configured with C++11 support, then
+       * attempting this will result in a compile-time error (via
+       * <code>static_assert</code>). If deal.II was not configured with C++11
+       * support, then attempting this will result in a thrown exception in
+       * debug mode.
+       */
+      Iterator (const Iterator<BlockVectorType,!Constness> &c);
+
+
+      /**
+       * Copy constructor.
+       */
+      Iterator (const Iterator<BlockVectorType,Constness> &c);
+
+    private:
+      /**
+       * Constructor used internally in this class. The arguments match
+       * exactly the values of the respective member variables.
+       */
+      Iterator (BlockVector     &parent,
+                const size_type  global_index,
+                const size_type  current_block,
+                const size_type  index_within_block,
+                const size_type  next_break_forward,
+                const size_type  next_break_backward);
+
+    public:
+
+      /**
+       * Copy operator.
+       */
+      Iterator &operator = (const Iterator &c);
+
+      /**
+       * Dereferencing operator. If the template argument <tt>Constness</tt>
+       * is <tt>true</tt>, then no writing to the result is possible, making
+       * this a const_iterator.
+       */
+      dereference_type operator * () const;
+
+      /**
+       * Random access operator, grant access to arbitrary elements relative
+       * to the one presently pointed to.
+       */
+      dereference_type operator [] (const difference_type d) const;
+
+      /**
+       * Prefix increment operator. This operator advances the iterator to the
+       * next element and returns a reference to <tt>*this</tt>.
+       */
+      Iterator &operator ++ ();
+
+      /**
+       * Postfix increment operator. This operator advances the iterator to
+       * the next element and returns a copy of the old value of this
+       * iterator.
+       */
+      Iterator operator ++ (int);
+
+      /**
+       * Prefix decrement operator. This operator retracts the iterator to the
+       * previous element and returns a reference to <tt>*this</tt>.
+       */
+      Iterator &operator -- ();
+
+      /**
+       * Postfix decrement operator. This operator retracts the iterator to
+       * the previous element and returns a copy of the old value of this
+       * iterator.
+       */
+      Iterator operator -- (int);
+
+      /**
+       * Compare for equality of iterators. This operator checks whether the
+       * vectors pointed to are the same, and if not it throws an exception.
+       */
+      template <bool _Constness>
+      bool operator == (const Iterator<BlockVectorType, _Constness> &i) const;
+
+      /**
+       * Compare for inequality of iterators. This operator checks whether the
+       * vectors pointed to are the same, and if not it throws an exception.
+       */
+      template <bool _Constness>
+      bool operator != (const Iterator<BlockVectorType, _Constness> &i) const;
+
+      /**
+       * Check whether this iterators points to an element previous to the one
+       * pointed to by the given argument. This operator checks whether the
+       * vectors pointed to are the same, and if not it throws an exception.
+       */
+      template <bool _Constness>
+      bool operator < (const Iterator<BlockVectorType, _Constness> &i) const;
+
+      /**
+       * Comparison operator alike to the one above.
+       */
+      template <bool _Constness>
+      bool operator <= (const Iterator<BlockVectorType, _Constness> &i) const;
+
+      /**
+       * Comparison operator alike to the one above.
+       */
+      template <bool _Constness>
+      bool operator > (const Iterator<BlockVectorType, _Constness> &i) const;
+
+      /**
+       * Comparison operator alike to the one above.
+       */
+      template <bool _Constness>
+      bool operator >= (const Iterator<BlockVectorType, _Constness> &i) const;
+
+      /**
+       * Return the distance between the two iterators, in elements.
+       */
+      template <bool _Constness>
+      difference_type operator - (const Iterator<BlockVectorType, _Constness> &i) const;
+
+      /**
+       * Return an iterator which is the given number of elements in front of
+       * the present one.
+       */
+      Iterator operator + (const difference_type &d) const;
+
+      /**
+       * Return an iterator which is the given number of elements behind the
+       * present one.
+       */
+      Iterator operator - (const difference_type &d) const;
+
+      /**
+       * Move the iterator <tt>d</tt> elements forward at once, and return the
+       * result.
+       */
+      Iterator &operator += (const difference_type &d);
+
+      /**
+       * Move the iterator <tt>d</tt> elements backward at once, and return
+       * the result.
+       */
+      Iterator &operator -= (const difference_type &d);
+
+      /**
+       * @addtogroup Exceptions
+       * @{
+       */
+
+      /**
+       * Exception thrown when one performs arithmetical comparisons on
+       * iterators belonging to two different block vectors.
+       */
+      DeclExceptionMsg (ExcPointerToDifferentVectors,
+                        "Your program tried to compare iterators pointing to "
+                        "different block vectors. There is no reasonable way "
+                        "to do this.");
+
+      /**
+       * Exception thrown when one attempts to copy construct a non-const
+       * iterator from a const iterator.
+       *
+       * @note when deal.II is compiled with C++11 support this check is
+       * instead performed at compile time via <code>static_assert</code>.
+       */
+      DeclExceptionMsg (ExcCastingAwayConstness,
+                        "Constructing a non-const iterator from a const "
+                        "iterator does not make sense.");
+      //@}
+    private:
+      /**
+       * Pointer to the block vector object to which this iterator points.
+       * Depending on the value of the <tt>Constness</tt> template argument of
+       * this class, this is a <tt>const</tt> or non-<tt>const</tt> pointer.
+       */
+      BlockVector *parent;
+
+      /**
+       * Global index of the element to which we presently point.
+       */
+      size_type     global_index;
+
+      /**
+       * Current block and index within this block of the element presently
+       * pointed to.
+       */
+      unsigned int current_block;
+      size_type index_within_block;
+
+      /**
+       * Indices of the global element address at which we have to move on to
+       * another block when moving forward and backward. These indices are
+       * kept as a cache since this is much more efficient than always asking
+       * the parent object.
+       */
+      size_type next_break_forward;
+      size_type next_break_backward;
+
+      /**
+       * Move forward one element.
+       */
+      void move_forward ();
+
+      /**
+       * Move backward one element.
+       */
+      void move_backward ();
+
+
+      /**
+       * Mark all other instances of this template as friends.
+       */
+      template <typename, bool>
+      friend class Iterator;
+    };
+  }  // namespace BlockVectorIterators
+}  // namespace internal
+
+
+/**
+ * A vector composed of several blocks each representing a vector of its own.
+ *
+ * The BlockVector is a collection of Vectors (e.g. of either deal.II Vector
+ * objects or PETScWrappers::Vector object). Each of the vectors inside can
+ * have a different size.
+ *
+ * The functionality of BlockVector includes everything a Vector can do, plus
+ * the access to a single Vector inside the BlockVector by <tt>block(i)</tt>.
+ * It also has a complete random access iterator, just as the other Vector
+ * classes or the standard C++ library template <tt>std::vector</tt>.
+ * Therefore, all algorithms working on iterators also work with objects of
+ * this class.
+ *
+ * While this base class implements most of the functionality by dispatching
+ * calls to its member functions to the respective functions on each of the
+ * individual blocks, this class does not actually allocate some memory or
+ * change the size of vectors. For this, the constructors, assignment
+ * operators and reinit() functions of derived classes are responsible. This
+ * class only handles the common part that is independent of the actual vector
+ * type the block vector is built on.
+ *
+ *
+ * <h3>Accessing individual blocks, and resizing vectors</h3>
+ *
+ * Apart from using this object as a whole, you can use each block separately
+ * as a vector, using the block() function.  There is a single caveat: if you
+ * have changed the size of one or several blocks, you must call the function
+ * collect_sizes() of the block vector to update its internal structures.
+ *
+ * @attention Warning: If you change the sizes of single blocks without
+ * calling collect_sizes(), results may be unpredictable. The debug version
+ * does not check consistency here for performance reasons!
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, Guido Kanschat, 1999, 2000, 2001, 2002, 2004
+ */
+template <class VectorType>
+class BlockVectorBase : public Subscriptor
+{
+public:
+  /**
+   * Typedef the type of the underlying vector.
+   */
+  typedef VectorType BlockType;
+
+  /*
+   * Declare standard types used in
+   * all containers. These types
+   * parallel those in the
+   * <tt>C++</tt> standard
+   * libraries
+   * <tt>std::vector<...></tt>
+   * class. This includes iterator
+   * types.
+   */
+  typedef typename BlockType::value_type  value_type;
+  typedef value_type                     *pointer;
+  typedef const value_type               *const_pointer;
+  typedef dealii::internal::BlockVectorIterators::Iterator<BlockVectorBase,false> iterator;
+  typedef dealii::internal::BlockVectorIterators::Iterator<BlockVectorBase,true>  const_iterator;
+  typedef typename BlockType::reference       reference;
+  typedef typename BlockType::const_reference const_reference;
+  typedef types::global_dof_index             size_type;
+
+  /**
+   * Declare a type that has holds real-valued numbers with the same precision
+   * as the template argument to this class. If the template argument of this
+   * class is a real data type, then real_type equals the template argument.
+   * If the template argument is a std::complex type then real_type equals the
+   * type underlying the complex numbers.
+   *
+   * This typedef is used to represent the return type of norms.
+   */
+  typedef typename BlockType::real_type real_type;
+
+  /**
+   * A variable that indicates whether this vector supports distributed data
+   * storage. If true, then this vector also needs an appropriate compress()
+   * function that allows communicating recent set or add operations to
+   * individual elements to be communicated to other processors.
+   *
+   * For the current class, the variable equals the value declared for the
+   * type of the individual blocks.
+   */
+  static const bool supports_distributed_data = BlockType::supports_distributed_data;
+
+  /**
+   * Default constructor.
+   */
+  BlockVectorBase ();
+
+  /**
+   * Update internal structures after resizing vectors. Whenever you reinited
+   * a block of a block vector, the internal data structures are corrupted.
+   * Therefore, you should call this function after all blocks got their new
+   * size.
+   */
+  void collect_sizes ();
+
+  /**
+   * Call the compress() function on all the subblocks of the matrix.
+   *
+   * This functionality only needs to be called if using MPI based vectors and
+   * exists in other objects for compatibility.
+   *
+   * See
+   * @ref GlossCompress "Compressing distributed objects"
+   * for more information.
+   */
+  void compress (::dealii::VectorOperation::values operation);
+
+  /**
+   * Access to a single block.
+   */
+  BlockType &
+  block (const unsigned int i);
+
+  /**
+   * Read-only access to a single block.
+   */
+  const BlockType &
+  block (const unsigned int i) const;
+
+  /**
+   * Return a reference on the object that describes the mapping between block
+   * and global indices. The use of this function is highly deprecated and it
+   * should vanish in one of the next versions
+   */
+  const BlockIndices &
+  get_block_indices () const;
+
+  /**
+   * Number of blocks.
+   */
+  unsigned int n_blocks () const;
+
+  /**
+   * Return dimension of the vector. This is the sum of the dimensions of all
+   * components.
+   */
+  std::size_t size () const;
+
+  /**
+   * Return an index set that describes which elements of this vector are
+   * owned by the current processor. Note that this index set does not include
+   * elements this vector may store locally as ghost elements but that are in
+   * fact owned by another processor. As a consequence, the index sets
+   * returned on different processors if this is a distributed vector will
+   * form disjoint sets that add up to the complete index set. Obviously, if a
+   * vector is created on only one processor, then the result would satisfy
+   * @code
+   *   vec.locally_owned_elements() == complete_index_set (vec.size())
+   * @endcode
+   *
+   * For block vectors, this function returns the union of the locally owned
+   * elements of the individual blocks, shifted by their respective index
+   * offsets.
+   */
+  IndexSet locally_owned_elements () const;
+
+  /**
+   * Return an iterator pointing to the first element.
+   */
+  iterator begin ();
+
+  /**
+   * Return an iterator pointing to the first element of a constant block
+   * vector.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Return an iterator pointing to the element past the end.
+   */
+  iterator end ();
+
+  /**
+   * Return an iterator pointing to the element past the end of a constant
+   * block vector.
+   */
+  const_iterator end () const;
+
+  /**
+   * Access components, returns U(i).
+   */
+  value_type operator() (const size_type i) const;
+
+  /**
+   * Access components, returns U(i) as a writeable reference.
+   */
+  reference operator() (const size_type i);
+
+  /**
+   * Access components, returns U(i).
+   *
+   * Exactly the same as operator().
+   */
+  value_type operator[] (const size_type i) const;
+
+  /**
+   * Access components, returns U(i) as a writeable reference.
+   *
+   * Exactly the same as operator().
+   */
+  reference operator[] (const size_type i);
+
+  /**
+   * A collective get operation: instead of getting individual elements of a
+   * vector, this function allows to get a whole set of elements at once. The
+   * indices of the elements to be read are stated in the first argument, the
+   * corresponding values are returned in the second.
+   */
+  template <typename OtherNumber>
+  void extract_subvector_to (const std::vector<size_type> &indices,
+                             std::vector<OtherNumber> &values) const;
+
+  /**
+   * Just as the above, but with pointers. Useful in minimizing copying of
+   * data around.
+   */
+  template <typename ForwardIterator, typename OutputIterator>
+  void extract_subvector_to (ForwardIterator          indices_begin,
+                             const ForwardIterator    indices_end,
+                             OutputIterator           values_begin) const;
+
+  /**
+   * Copy operator: fill all components of the vector with the given scalar
+   * value.
+   */
+  BlockVectorBase &operator = (const value_type s);
+
+  /**
+   * Copy operator for arguments of the same type.
+   */
+  BlockVectorBase &
+  operator= (const BlockVectorBase &V);
+
+  /**
+   * Copy operator for template arguments of different types.
+   */
+  template <class VectorType2>
+  BlockVectorBase &
+  operator= (const BlockVectorBase<VectorType2> &V);
+
+  /**
+   * Copy operator from non-block vectors to block vectors.
+   */
+  BlockVectorBase &
+  operator = (const VectorType &v);
+
+  /**
+   * Check for equality of two block vector types. This operation is only
+   * allowed if the two vectors already have the same block structure.
+   */
+  template <class VectorType2>
+  bool
+  operator == (const BlockVectorBase<VectorType2> &v) const;
+
+  /**
+   * $U = U * V$: scalar product.
+   */
+  value_type operator* (const BlockVectorBase &V) const;
+
+  /**
+   * Return square of the $l_2$-norm.
+   */
+  real_type norm_sqr () const;
+
+  /**
+   * Return the mean value of the elements of this vector.
+   */
+  value_type mean_value () const;
+
+  /**
+   * Return the $l_1$-norm of the vector, i.e. the sum of the absolute values.
+   */
+  real_type l1_norm () const;
+
+  /**
+   * Return the $l_2$-norm of the vector, i.e. the square root of the sum of
+   * the squares of the elements.
+   */
+  real_type l2_norm () const;
+
+  /**
+   * Return the maximum absolute value of the elements of this vector, which
+   * is the $l_\infty$-norm of a vector.
+   */
+  real_type linfty_norm () const;
+
+  /**
+   * Performs a combined operation of a vector addition and a subsequent inner
+   * product, returning the value of the inner product. In other words, the
+   * result of this function is the same as if the user called
+   * @code
+   * this->add(a, V);
+   * return_value = *this * W;
+   * @endcode
+   *
+   * The reason this function exists is that this operation involves less
+   * memory transfer than calling the two functions separately on deal.II's
+   * vector classes (Vector<Number> and
+   * parallel::distributed::Vector<double>). This method only needs to load
+   * three vectors, @p this, @p V, @p W, whereas calling separate methods
+   * means to load the calling vector @p this twice. Since most vector
+   * operations are memory transfer limited, this reduces the time by 25\% (or
+   * 50\% if @p W equals @p this).
+   */
+  value_type add_and_dot (const value_type       a,
+                          const BlockVectorBase &V,
+                          const BlockVectorBase &W);
+
+  /**
+   * Returns true if the given global index is in the local range of this
+   * processor. Asks the corresponding block.
+   */
+  bool in_local_range (const size_type global_index) const;
+
+  /**
+   * Return whether the vector contains only elements with value zero. This
+   * function is mainly for internal consistency check and should seldom be
+   * used when not in debug mode since it uses quite some time.
+   */
+  bool all_zero () const;
+
+  /**
+   * Return @p true if the vector has no negative entries, i.e. all entries
+   * are zero or positive. This function is used, for example, to check
+   * whether refinement indicators are really all positive (or zero).
+   */
+  bool is_non_negative () const;
+
+  /**
+   * Addition operator.  Fast equivalent to <tt>U.add(1, V)</tt>.
+   */
+  BlockVectorBase &
+  operator += (const BlockVectorBase &V);
+
+  /**
+   * Subtraction operator.  Fast equivalent to <tt>U.add(-1, V)</tt>.
+   */
+  BlockVectorBase &
+  operator -= (const BlockVectorBase &V);
+
+
+  /**
+   * A collective add operation: This function adds a whole set of values
+   * stored in @p values to the vector components specified by @p indices.
+   */
+  template <typename Number>
+  void add (const std::vector<size_type> &indices,
+            const std::vector<Number>    &values);
+
+  /**
+   * This is a second collective add operation. As a difference, this function
+   * takes a deal.II vector of values.
+   */
+  template <typename Number>
+  void add (const std::vector<size_type> &indices,
+            const Vector<Number>         &values);
+
+  /**
+   * Take an address where <tt>n_elements</tt> are stored contiguously and add
+   * them into the vector. Handles all cases which are not covered by the
+   * other two <tt>add()</tt> functions above.
+   */
+  template <typename Number>
+  void add (const size_type  n_elements,
+            const size_type *indices,
+            const Number    *values);
+
+  /**
+   * $U(0-DIM)+=s$.  Addition of <tt>s</tt> to all components. Note that
+   * <tt>s</tt> is a scalar and not a vector.
+   */
+  void add (const value_type s);
+
+  /**
+   * U+=V. Simple vector addition, equal to the <tt>operator +=</tt>.
+   *
+   * This function is deprecated use the <tt>operator +=</tt> instead.
+   */
+  void add (const BlockVectorBase &V) DEAL_II_DEPRECATED;
+
+  /**
+   * U+=a*V. Simple addition of a scaled vector.
+   */
+  void add (const value_type a, const BlockVectorBase &V);
+
+  /**
+   * U+=a*V+b*W. Multiple addition of scaled vectors.
+   */
+  void add (const value_type a, const BlockVectorBase &V,
+            const value_type b, const BlockVectorBase &W);
+
+  /**
+   * U=s*U+V. Scaling and simple vector addition.
+   */
+  void sadd (const value_type s, const BlockVectorBase &V);
+
+  /**
+   * U=s*U+a*V. Scaling and simple addition.
+   */
+  void sadd (const value_type s, const value_type a, const BlockVectorBase &V);
+
+  /**
+   * U=s*U+a*V+b*W. Scaling and multiple addition.
+   */
+  void sadd (const value_type s, const value_type a,
+             const BlockVectorBase &V,
+             const value_type b, const BlockVectorBase &W);
+
+  /**
+   * U=s*U+a*V+b*W+c*X. Scaling and multiple addition.
+   */
+  void sadd (const value_type s, const value_type a,
+             const BlockVectorBase &V,
+             const value_type b, const BlockVectorBase &W,
+             const value_type c, const BlockVectorBase &X);
+
+  /**
+   * Scale each element of the vector by a constant value.
+   */
+  BlockVectorBase &operator *= (const value_type factor);
+
+  /**
+   * Scale each element of the vector by the inverse of the given value.
+   */
+  BlockVectorBase &operator /= (const value_type factor);
+
+  /**
+   * Multiply each element of this vector by the corresponding element of
+   * <tt>v</tt>.
+   */
+  template <class BlockVector2>
+  void scale (const BlockVector2 &v);
+
+  /**
+   * U=a*V. Assignment.
+   */
+  template <class BlockVector2>
+  void equ (const value_type a, const BlockVector2 &V);
+
+  /**
+   * U=a*V+b*W. Replacing by sum.
+   */
+  void equ (const value_type a, const BlockVectorBase &V,
+            const value_type b, const BlockVectorBase &W);
+
+  /**
+   * This function does nothing but is there for compatibility with the @p
+   * PETScWrappers::Vector class.
+   *
+   * For the PETSc vector wrapper class, this function updates the ghost
+   * values of the PETSc vector. This is necessary after any modification
+   * before reading ghost values.
+   *
+   * However, for the implementation of this class, it is immaterial and thus
+   * an empty function.
+   */
+  void update_ghost_values () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+protected:
+  /**
+   * Pointer to the array of components.
+   */
+  std::vector<VectorType> components;
+
+  /**
+   * Object managing the transformation between global indices and indices
+   * within the different blocks.
+   */
+  BlockIndices block_indices;
+
+  /**
+   * Make the iterator class a friend.
+   */
+  template <typename N, bool C>
+  friend class dealii::internal::BlockVectorIterators::Iterator;
+
+  template <typename> friend class BlockVectorBase;
+};
+
+
+/*@}*/
+
+/*----------------------- Inline functions ----------------------------------*/
+
+
+#ifndef DOXYGEN
+namespace internal
+{
+  namespace BlockVectorIterators
+  {
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness>::
+    Iterator (const Iterator<BlockVectorType,Constness> &c)
+      :
+      parent (c.parent),
+      global_index (c.global_index),
+      current_block (c.current_block),
+      index_within_block (c.index_within_block),
+      next_break_forward (c.next_break_forward),
+      next_break_backward (c.next_break_backward)
+    {}
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness>::
+    Iterator (const Iterator<BlockVectorType,!Constness> &c)
+      :
+      parent (c.parent),
+      global_index (c.global_index),
+      current_block (c.current_block),
+      index_within_block (c.index_within_block),
+      next_break_forward (c.next_break_forward),
+      next_break_backward (c.next_break_backward)
+    {
+      // Only permit copy-constructing const iterators from non-const
+      // iterators, and not vice versa (i.e., Constness must always be
+      // true). As mentioned above, try to check this at compile time if C++11
+      // support is available.
+#ifdef DEAL_II_WITH_CXX11
+      static_assert(Constness == true,
+                    "Constructing a non-const iterator from a const iterator "
+                    "does not make sense.");
+#else
+      Assert(Constness == true, ExcCastingAwayConstness());
+#endif
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness>::
+    Iterator (BlockVector        &parent,
+              const size_type  global_index,
+              const size_type  current_block,
+              const size_type  index_within_block,
+              const size_type  next_break_forward,
+              const size_type  next_break_backward)
+      :
+      parent (&parent),
+      global_index (global_index),
+      current_block (current_block),
+      index_within_block (index_within_block),
+      next_break_forward (next_break_forward),
+      next_break_backward (next_break_backward)
+    {
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness> &
+    Iterator<BlockVectorType,Constness>::
+    operator = (const Iterator &c)
+    {
+      parent              = c.parent;
+      global_index        = c.global_index;
+      index_within_block  = c.index_within_block;
+      current_block       = c.current_block;
+      next_break_forward  = c.next_break_forward;
+      next_break_backward = c.next_break_backward;
+
+      return *this;
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    typename Iterator<BlockVectorType,Constness>::dereference_type
+    Iterator<BlockVectorType,Constness>::operator * () const
+    {
+      return parent->block(current_block)(index_within_block);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    typename Iterator<BlockVectorType,Constness>::dereference_type
+    Iterator<BlockVectorType,Constness>::operator [] (const difference_type d) const
+    {
+      // if the index pointed to is
+      // still within the block we
+      // currently point into, then we
+      // can save the computation of
+      // the block
+      if ((global_index+d >= next_break_backward) &&
+          (global_index+d <= next_break_forward))
+        return parent->block(current_block)(index_within_block + d);
+
+      // if the index is not within the
+      // block of the block vector into
+      // which we presently point, then
+      // there is no way: we have to
+      // search for the block. this can
+      // be done through the parent
+      // class as well.
+      return (*parent)(global_index+d);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness> &
+    Iterator<BlockVectorType,Constness>::operator ++ ()
+    {
+      move_forward ();
+      return *this;
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness>
+    Iterator<BlockVectorType,Constness>::operator ++ (int)
+    {
+      const Iterator old_value = *this;
+      move_forward ();
+      return old_value;
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness> &
+    Iterator<BlockVectorType,Constness>::operator -- ()
+    {
+      move_backward ();
+      return *this;
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness>
+    Iterator<BlockVectorType,Constness>::operator -- (int)
+    {
+      const Iterator old_value = *this;
+      move_backward ();
+      return old_value;
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    template <bool _Constness>
+    inline
+    bool
+    Iterator<BlockVectorType,Constness>::
+    operator == (const Iterator<BlockVectorType, _Constness> &i) const
+    {
+      Assert (parent == i.parent, ExcPointerToDifferentVectors());
+
+      return (global_index == i.global_index);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    template <bool _Constness>
+    inline
+    bool
+    Iterator<BlockVectorType,Constness>::
+    operator != (const Iterator<BlockVectorType, _Constness> &i) const
+    {
+      Assert (parent == i.parent, ExcPointerToDifferentVectors());
+
+      return (global_index != i.global_index);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    template <bool _Constness>
+    inline
+    bool
+    Iterator<BlockVectorType,Constness>::
+    operator < (const Iterator<BlockVectorType, _Constness> &i) const
+    {
+      Assert (parent == i.parent, ExcPointerToDifferentVectors());
+
+      return (global_index < i.global_index);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    template <bool _Constness>
+    inline
+    bool
+    Iterator<BlockVectorType,Constness>::
+    operator <= (const Iterator<BlockVectorType, _Constness> &i) const
+    {
+      Assert (parent == i.parent, ExcPointerToDifferentVectors());
+
+      return (global_index <= i.global_index);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    template <bool _Constness>
+    inline
+    bool
+    Iterator<BlockVectorType,Constness>::
+    operator > (const Iterator<BlockVectorType, _Constness> &i) const
+    {
+      Assert (parent == i.parent, ExcPointerToDifferentVectors());
+
+      return (global_index > i.global_index);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    template <bool _Constness>
+    inline
+    bool
+    Iterator<BlockVectorType,Constness>::
+    operator >= (const Iterator<BlockVectorType, _Constness> &i) const
+    {
+      Assert (parent == i.parent, ExcPointerToDifferentVectors());
+
+      return (global_index >= i.global_index);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    template <bool _Constness>
+    inline
+    typename Iterator<BlockVectorType,Constness>::difference_type
+    Iterator<BlockVectorType,Constness>::
+    operator - (const Iterator<BlockVectorType, _Constness> &i) const
+    {
+      Assert (parent == i.parent, ExcPointerToDifferentVectors());
+
+      return (static_cast<signed int>(global_index) -
+              static_cast<signed int>(i.global_index));
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness>
+    Iterator<BlockVectorType,Constness>::
+    operator + (const difference_type &d) const
+    {
+      // if the index pointed to is
+      // still within the block we
+      // currently point into, then we
+      // can save the computation of
+      // the block
+      if ((global_index+d >= next_break_backward) &&
+          (global_index+d <= next_break_forward))
+        return Iterator (*parent, global_index+d, current_block,
+                         index_within_block+d,
+                         next_break_forward, next_break_backward);
+      else
+        // outside present block, so
+        // have to seek new block
+        // anyway
+        return Iterator (*parent, global_index+d);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness>
+    Iterator<BlockVectorType,Constness>::
+    operator - (const difference_type &d) const
+    {
+      // if the index pointed to is
+      // still within the block we
+      // currently point into, then we
+      // can save the computation of
+      // the block
+      if ((global_index-d >= next_break_backward) &&
+          (global_index-d <= next_break_forward))
+        return Iterator (*parent, global_index-d, current_block,
+                         index_within_block-d,
+                         next_break_forward, next_break_backward);
+      else
+        // outside present block, so
+        // have to seek new block
+        // anyway
+        return Iterator (*parent, global_index-d);
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness> &
+    Iterator<BlockVectorType,Constness>::
+    operator += (const difference_type &d)
+    {
+      // if the index pointed to is
+      // still within the block we
+      // currently point into, then we
+      // can save the computation of
+      // the block
+      if ((global_index+d >= next_break_backward) &&
+          (global_index+d <= next_break_forward))
+        {
+          global_index       += d;
+          index_within_block += d;
+        }
+      else
+        // outside present block, so
+        // have to seek new block
+        // anyway
+        *this = Iterator (*parent, global_index+d);
+
+      return *this;
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    inline
+    Iterator<BlockVectorType,Constness> &
+    Iterator<BlockVectorType,Constness>::
+    operator -= (const difference_type &d)
+    {
+      // if the index pointed to is
+      // still within the block we
+      // currently point into, then we
+      // can save the computation of
+      // the block
+      if ((global_index-d >= next_break_backward) &&
+          (global_index-d <= next_break_forward))
+        {
+          global_index       -= d;
+          index_within_block -= d;
+        }
+      else
+        // outside present block, so
+        // have to seek new block
+        // anyway
+        *this = Iterator (*parent, global_index-d);
+
+      return *this;
+    }
+
+
+    template <class BlockVectorType, bool Constness>
+    Iterator<BlockVectorType,Constness>::
+    Iterator (BlockVector    &parent,
+              const size_type global_index)
+      :
+      parent (&parent),
+      global_index (global_index)
+    {
+      // find which block we are
+      // in. for this, take into
+      // account that it happens at
+      // times that people want to
+      // initialize iterators
+      // past-the-end
+      if (global_index < parent.size())
+        {
+          const std::pair<size_type, size_type>
+          indices = parent.block_indices.global_to_local(global_index);
+          current_block      = indices.first;
+          index_within_block = indices.second;
+
+          next_break_backward
+            = parent.block_indices.local_to_global (current_block, 0);
+          next_break_forward
+            = (parent.block_indices.local_to_global (current_block, 0)
+               +parent.block_indices.block_size(current_block)-1);
+        }
+      else
+        // past the end. only have one
+        // value for this
+        {
+          this->global_index  = parent.size ();
+          current_block       = parent.n_blocks();
+          index_within_block  = 0;
+          next_break_backward = global_index;
+          next_break_forward  = numbers::invalid_size_type;
+        };
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    void
+    Iterator<BlockVectorType,Constness>::move_forward ()
+    {
+      if (global_index != next_break_forward)
+        ++index_within_block;
+      else
+        {
+          // ok, we traverse a boundary
+          // between blocks:
+          index_within_block = 0;
+          ++current_block;
+
+          // break backwards is now old
+          // break forward
+          next_break_backward = next_break_forward+1;
+
+          // compute new break forward
+          if (current_block < parent->block_indices.size())
+            next_break_forward
+            += parent->block_indices.block_size(current_block);
+          else
+            // if we are beyond the end,
+            // then move the next
+            // boundary arbitrarily far
+            // away
+            next_break_forward = numbers::invalid_size_type;
+        };
+
+      ++global_index;
+    }
+
+
+
+    template <class BlockVectorType, bool Constness>
+    void
+    Iterator<BlockVectorType,Constness>::move_backward ()
+    {
+      if (global_index != next_break_backward)
+        --index_within_block;
+      else if (current_block != 0)
+        {
+          // ok, we traverse a boundary
+          // between blocks:
+          --current_block;
+          index_within_block = parent->block_indices.block_size(current_block)-1;
+
+          // break forwards is now old
+          // break backward
+          next_break_forward = next_break_backward-1;
+
+          // compute new break forward
+          next_break_backward
+          -= parent->block_indices.block_size (current_block);
+        }
+      else
+        // current block was 0, we now
+        // get into unspecified terrain
+        {
+          --current_block;
+          index_within_block = numbers::invalid_size_type;
+          next_break_forward = 0;
+          next_break_backward = 0;
+        };
+
+      --global_index;
+    }
+
+
+  } // namespace BlockVectorIterators
+
+} //namespace internal
+
+
+template <class VectorType>
+inline
+BlockVectorBase<VectorType>::BlockVectorBase ()
+{}
+
+
+
+template <class VectorType>
+inline
+std::size_t
+BlockVectorBase<VectorType>::size () const
+{
+  return block_indices.total_size();
+}
+
+
+
+template <class VectorType>
+inline
+IndexSet
+BlockVectorBase<VectorType>::locally_owned_elements () const
+{
+  IndexSet is (size());
+
+  // copy index sets from blocks into the global one, shifted
+  // by the appropriate amount for each block
+  for (unsigned int b=0; b<n_blocks(); ++b)
+    {
+      IndexSet x = block(b).locally_owned_elements();
+      is.add_indices(x, block_indices.block_start(b));
+    }
+
+  is.compress();
+
+  return is;
+}
+
+
+
+template <class VectorType>
+inline
+unsigned int
+BlockVectorBase<VectorType>::n_blocks () const
+{
+  return block_indices.size();
+}
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::BlockType &
+BlockVectorBase<VectorType>::block (const unsigned int i)
+{
+  Assert(i<n_blocks(), ExcIndexRange(i,0,n_blocks()));
+
+  return components[i];
+}
+
+
+
+template <class VectorType>
+inline
+const typename BlockVectorBase<VectorType>::BlockType &
+BlockVectorBase<VectorType>::block (const unsigned int i) const
+{
+  Assert(i<n_blocks(), ExcIndexRange(i,0,n_blocks()));
+
+  return components[i];
+}
+
+
+
+template <class VectorType>
+inline
+const BlockIndices &
+BlockVectorBase<VectorType>::get_block_indices () const
+{
+  return block_indices;
+}
+
+
+template <class VectorType>
+inline
+void
+BlockVectorBase<VectorType>::collect_sizes ()
+{
+  std::vector<size_type> sizes (n_blocks());
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    sizes[i] = block(i).size();
+
+  block_indices.reinit(sizes);
+}
+
+
+
+template <class VectorType>
+inline
+void
+BlockVectorBase<VectorType>::compress (::dealii::VectorOperation::values operation)
+{
+  for (unsigned int i=0; i<n_blocks(); ++i)
+    block(i).compress (operation);
+}
+
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::iterator
+BlockVectorBase<VectorType>::begin()
+{
+  return iterator(*this, 0U);
+}
+
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::const_iterator
+BlockVectorBase<VectorType>::begin() const
+{
+  return const_iterator(*this, 0U);
+}
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::iterator
+BlockVectorBase<VectorType>::end()
+{
+  return iterator(*this, size());
+}
+
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::const_iterator
+BlockVectorBase<VectorType>::end() const
+{
+  return const_iterator(*this, size());
+}
+
+
+template <class VectorType>
+inline
+bool
+BlockVectorBase<VectorType>::in_local_range
+(const size_type global_index) const
+{
+  const std::pair<size_type,size_type> local_index
+    = block_indices.global_to_local (global_index);
+
+  return components[local_index.first].in_local_range (global_index);
+}
+
+
+template <class VectorType>
+bool
+BlockVectorBase<VectorType>::all_zero () const
+{
+  for (size_type i=0; i<n_blocks(); ++i)
+    if (components[i].all_zero() == false)
+      return false;
+
+  return true;
+}
+
+
+
+template <class VectorType>
+bool
+BlockVectorBase<VectorType>::is_non_negative () const
+{
+  for (size_type i=0; i<n_blocks(); ++i)
+    if (components[i].is_non_negative() == false)
+      return false;
+
+  return true;
+}
+
+
+
+template <class VectorType>
+typename BlockVectorBase<VectorType>::value_type
+BlockVectorBase<VectorType>::
+operator * (const BlockVectorBase<VectorType> &v) const
+{
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+  value_type sum = 0.;
+  for (size_type i=0; i<n_blocks(); ++i)
+    sum += components[i]*v.components[i];
+
+  return sum;
+}
+
+
+template <class VectorType>
+typename BlockVectorBase<VectorType>::real_type
+BlockVectorBase<VectorType>::norm_sqr () const
+{
+  real_type sum = 0.;
+  for (size_type i=0; i<n_blocks(); ++i)
+    sum += components[i].norm_sqr();
+
+  return sum;
+}
+
+
+
+template <class VectorType>
+typename BlockVectorBase<VectorType>::value_type
+BlockVectorBase<VectorType>::mean_value () const
+{
+  value_type sum = 0.;
+  for (size_type i=0; i<n_blocks(); ++i)
+    sum += components[i].mean_value() * components[i].size();
+
+  return sum/size();
+}
+
+
+
+template <class VectorType>
+typename BlockVectorBase<VectorType>::real_type
+BlockVectorBase<VectorType>::l1_norm () const
+{
+  real_type sum = 0.;
+  for (size_type i=0; i<n_blocks(); ++i)
+    sum += components[i].l1_norm();
+
+  return sum;
+}
+
+
+
+template <class VectorType>
+typename BlockVectorBase<VectorType>::real_type
+BlockVectorBase<VectorType>::l2_norm () const
+{
+  return std::sqrt(norm_sqr());
+}
+
+
+
+template <class VectorType>
+typename BlockVectorBase<VectorType>::real_type
+BlockVectorBase<VectorType>::linfty_norm () const
+{
+  real_type sum = 0.;
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      value_type newval = components[i].linfty_norm();
+      if (sum<newval)
+        sum = newval;
+    }
+  return sum;
+}
+
+
+
+template <class VectorType>
+typename BlockVectorBase<VectorType>::value_type
+BlockVectorBase<VectorType>::
+add_and_dot (const typename BlockVectorBase<VectorType>::value_type a,
+             const BlockVectorBase<VectorType> &V,
+             const BlockVectorBase<VectorType> &W)
+{
+  AssertDimension (n_blocks(), V.n_blocks());
+  AssertDimension (n_blocks(), W.n_blocks());
+
+  value_type sum = 0.;
+  for (size_type i=0; i<n_blocks(); ++i)
+    sum += components[i].add_and_dot(a, V.components[i], W.components[i]);
+
+  return sum;
+}
+
+
+
+template <class VectorType>
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator += (const BlockVectorBase<VectorType> &v)
+{
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i] += v.components[i];
+    }
+
+  return *this;
+}
+
+
+
+template <class VectorType>
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator -= (const BlockVectorBase<VectorType> &v)
+{
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i] -= v.components[i];
+    }
+  return *this;
+}
+
+
+
+template <class VectorType>
+template <typename Number>
+inline
+void
+BlockVectorBase<VectorType>::add (const std::vector<size_type> &indices,
+                                  const std::vector<Number>    &values)
+{
+  Assert (indices.size() == values.size(),
+          ExcDimensionMismatch(indices.size(), values.size()));
+  add (indices.size(), &indices[0], &values[0]);
+}
+
+
+
+template <class VectorType>
+template <typename Number>
+inline
+void
+BlockVectorBase<VectorType>::add (const std::vector<size_type> &indices,
+                                  const Vector<Number>         &values)
+{
+  Assert (indices.size() == values.size(),
+          ExcDimensionMismatch(indices.size(), values.size()));
+  const size_type n_indices = indices.size();
+  for (size_type i=0; i<n_indices; ++i)
+    (*this)(indices[i]) += values(i);
+}
+
+
+
+template <class VectorType>
+template <typename Number>
+inline
+void
+BlockVectorBase<VectorType>::add (const size_type  n_indices,
+                                  const size_type *indices,
+                                  const Number    *values)
+{
+  for (size_type i=0; i<n_indices; ++i)
+    (*this)(indices[i]) += values[i];
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::add (const value_type a)
+{
+  AssertIsFinite(a);
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].add(a);
+    }
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::add (const BlockVectorBase<VectorType> &v)
+{
+  *this += v;
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::add (const value_type a,
+                                       const BlockVectorBase<VectorType> &v)
+{
+
+  AssertIsFinite(a);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].add(a, v.components[i]);
+    }
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::add (const value_type a,
+                                       const BlockVectorBase<VectorType> &v,
+                                       const value_type b,
+                                       const BlockVectorBase<VectorType> &w)
+{
+
+  AssertIsFinite(a);
+  AssertIsFinite(b);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+  Assert (n_blocks() == w.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), w.n_blocks()));
+
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].add(a, v.components[i], b, w.components[i]);
+    }
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::sadd (const value_type x,
+                                        const BlockVectorBase<VectorType> &v)
+{
+
+  AssertIsFinite(x);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].sadd(x, v.components[i]);
+    }
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::sadd (const value_type x, const value_type a,
+                                        const BlockVectorBase<VectorType> &v)
+{
+
+  AssertIsFinite(x);
+  AssertIsFinite(a);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].sadd(x, a, v.components[i]);
+    }
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::sadd (const value_type x, const value_type a,
+                                        const BlockVectorBase<VectorType> &v,
+                                        const value_type b,
+                                        const BlockVectorBase<VectorType> &w)
+{
+
+  AssertIsFinite(x);
+  AssertIsFinite(a);
+  AssertIsFinite(b);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+  Assert (n_blocks() == w.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), w.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].sadd(x, a, v.components[i], b, w.components[i]);
+    }
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::sadd (const value_type x, const value_type a,
+                                        const BlockVectorBase<VectorType> &v,
+                                        const value_type b,
+                                        const BlockVectorBase<VectorType> &w,
+                                        const value_type c,
+                                        const BlockVectorBase<VectorType> &y)
+{
+
+  AssertIsFinite(x);
+  AssertIsFinite(a);
+  AssertIsFinite(b);
+  AssertIsFinite(c);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+  Assert (n_blocks() == w.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), w.n_blocks()));
+  Assert (n_blocks() == y.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), y.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].sadd(x, a, v.components[i],
+                         b, w.components[i], c, y.components[i]);
+    }
+}
+
+
+
+template <class VectorType>
+template <class BlockVector2>
+void BlockVectorBase<VectorType>::scale (const BlockVector2 &v)
+{
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+  for (size_type i=0; i<n_blocks(); ++i)
+    components[i].scale(v.block(i));
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::equ (const value_type a,
+                                       const BlockVectorBase<VectorType> &v,
+                                       const value_type b,
+                                       const BlockVectorBase<VectorType> &w)
+{
+
+  AssertIsFinite(a);
+  AssertIsFinite(b);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+  Assert (n_blocks() == w.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), w.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    {
+      components[i].equ( a, v.components[i], b, w.components[i]);
+    }
+}
+
+
+
+template <class VectorType>
+std::size_t
+BlockVectorBase<VectorType>::memory_consumption () const
+{
+  std::size_t mem = sizeof(this->n_blocks());
+  for (size_type i=0; i<this->components.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (this->components[i]);
+  mem += MemoryConsumption::memory_consumption (this->block_indices);
+  return mem;
+}
+
+
+
+template <class VectorType>
+template <class BlockVector2>
+void BlockVectorBase<VectorType>::equ (const value_type    a,
+                                       const BlockVector2 &v)
+{
+
+  AssertIsFinite(a);
+
+  Assert (n_blocks() == v.n_blocks(),
+          ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    components[i].equ( a, v.components[i]);
+}
+
+
+
+template <class VectorType>
+void BlockVectorBase<VectorType>::update_ghost_values () const
+{
+  for (size_type i=0; i<n_blocks(); ++i)
+    block(i).update_ghost_values ();
+}
+
+
+
+template <class VectorType>
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator = (const value_type s)
+{
+
+  AssertIsFinite(s);
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    components[i] = s;
+
+  return *this;
+}
+
+
+template <class VectorType>
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator = (const BlockVectorBase<VectorType> &v)
+{
+  AssertDimension(n_blocks(), v.n_blocks());
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    components[i] = v.components[i];
+
+  return *this;
+}
+
+
+template <class VectorType>
+template <class VectorType2>
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator = (const BlockVectorBase<VectorType2> &v)
+{
+  AssertDimension(n_blocks(), v.n_blocks());
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    components[i] = v.components[i];
+
+  return *this;
+}
+
+
+
+template <class VectorType>
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator = (const VectorType &v)
+{
+  Assert (size() == v.size(),
+          ExcDimensionMismatch(size(), v.size()));
+
+  size_type index_v = 0;
+  for (size_type b=0; b<n_blocks(); ++b)
+    for (size_type i=0; i<block(b).size(); ++i, ++index_v)
+      block(b)(i) = v(index_v);
+
+  return *this;
+}
+
+
+
+template <class VectorType>
+template <class VectorType2>
+inline
+bool
+BlockVectorBase<VectorType>::
+operator == (const BlockVectorBase<VectorType2> &v) const
+{
+  Assert (block_indices == v.block_indices, ExcDifferentBlockIndices());
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    if ( ! (components[i] == v.components[i]))
+      return false;
+
+  return true;
+}
+
+
+
+template <class VectorType>
+inline
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator *= (const value_type factor)
+{
+
+  AssertIsFinite(factor);
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    components[i] *= factor;
+
+  return *this;
+}
+
+
+
+template <class VectorType>
+inline
+BlockVectorBase<VectorType> &
+BlockVectorBase<VectorType>::operator /= (const value_type factor)
+{
+
+  AssertIsFinite(factor);
+  Assert (factor != 0., ExcDivideByZero() );
+
+  for (size_type i=0; i<n_blocks(); ++i)
+    components[i] /= factor;
+
+  return *this;
+}
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::value_type
+BlockVectorBase<VectorType>::operator() (const size_type i) const
+{
+  const std::pair<unsigned int,size_type> local_index
+    = block_indices.global_to_local (i);
+  return components[local_index.first](local_index.second);
+}
+
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::reference
+BlockVectorBase<VectorType>::operator() (const size_type i)
+{
+  const std::pair<unsigned int,size_type> local_index
+    = block_indices.global_to_local (i);
+  return components[local_index.first](local_index.second);
+}
+
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::value_type
+BlockVectorBase<VectorType>::operator[] (const size_type i) const
+{
+  return operator()(i);
+}
+
+
+
+template <class VectorType>
+inline
+typename BlockVectorBase<VectorType>::reference
+BlockVectorBase<VectorType>::operator[] (const size_type i)
+{
+  return operator()(i);
+}
+
+
+
+template <typename VectorType>
+template <typename OtherNumber>
+inline
+void BlockVectorBase<VectorType>::extract_subvector_to (const std::vector<size_type> &indices,
+                                                        std::vector<OtherNumber> &values) const
+{
+  for (size_type i = 0; i < indices.size(); ++i)
+    values[i] = operator()(indices[i]);
+}
+
+
+
+template <typename VectorType>
+template <typename ForwardIterator, typename OutputIterator>
+inline
+void BlockVectorBase<VectorType>::extract_subvector_to (ForwardIterator          indices_begin,
+                                                        const ForwardIterator    indices_end,
+                                                        OutputIterator           values_begin) const
+{
+  while (indices_begin != indices_end)
+    {
+      *values_begin = operator()(*indices_begin);
+      indices_begin++;
+      values_begin++;
+    }
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/chunk_sparse_matrix.h b/include/deal.II/lac/chunk_sparse_matrix.h
new file mode 100644
index 0000000..32124b6
--- /dev/null
+++ b/include/deal.II/lac/chunk_sparse_matrix.h
@@ -0,0 +1,2085 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__chunk_sparse_matrix_h
+#define dealii__chunk_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/chunk_sparsity_pattern.h>
+#include <deal.II/lac/identity_matrix.h>
+#include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<typename number> class Vector;
+template<typename number> class FullMatrix;
+
+/*! @addtogroup Matrix1
+ *@{
+ */
+
+/**
+ * A namespace in which we declare iterators over the elements of sparse
+ * matrices.
+ */
+namespace ChunkSparseMatrixIterators
+{
+  // forward declaration
+  template <typename number, bool Constness>
+  class Iterator;
+
+  /**
+   * General template for sparse matrix accessors. The first template argument
+   * denotes the underlying numeric type, the second the constness of the
+   * matrix.
+   *
+   * The general template is not implemented, only the specializations for the
+   * two possible values of the second template argument. Therefore, the
+   * interface listed here only serves as a template provided since doxygen
+   * does not link the specializations.
+   */
+  template <typename number, bool Constness>
+  class Accessor : public ChunkSparsityPatternIterators::Accessor
+  {
+  public:
+    /**
+     * Value of this matrix entry.
+     */
+    number value() const;
+
+    /**
+     * Value of this matrix entry.
+     */
+    number &value();
+
+    /**
+     * Return a reference to the matrix into which this accessor points. Note
+     * that in the present case, this is a constant reference.
+     */
+    const ChunkSparseMatrix<number> &get_matrix () const;
+  };
+
+
+
+  /**
+   * Accessor class for constant matrices, used in the const_iterators. This
+   * class builds on the accessor classes used for sparsity patterns to loop
+   * over all nonzero entries, and only adds the accessor functions to gain
+   * access to the actual value stored at a certain location.
+   */
+  template <typename number>
+  class Accessor<number,true> : public ChunkSparsityPatternIterators::Accessor
+  {
+  public:
+    /**
+     * Typedef for the type (including constness) of the matrix to be used
+     * here.
+     */
+    typedef const ChunkSparseMatrix<number> MatrixType;
+
+    /**
+     * Constructor.
+     */
+    Accessor (MatrixType         *matrix,
+              const unsigned int  row);
+
+    /**
+     * Constructor. Construct the end accessor for the given matrix.
+     */
+    Accessor (MatrixType         *matrix);
+
+    /**
+     * Copy constructor to get from a non-const accessor to a const accessor.
+     */
+    Accessor (const ChunkSparseMatrixIterators::Accessor<number,false> &a);
+
+    /**
+     * Value of this matrix entry.
+     */
+    number value() const;
+
+    /**
+     * Return a reference to the matrix into which this accessor points. Note
+     * that in the present case, this is a constant reference.
+     */
+    const MatrixType &get_matrix () const;
+
+  private:
+    /**
+     * Pointer to the matrix we use.
+     */
+    MatrixType *matrix;
+
+    /**
+     * Make the advance function of the base class available.
+     */
+    using ChunkSparsityPatternIterators::Accessor::advance;
+
+    /**
+     * Make iterator class a friend.
+     */
+    template <typename, bool>
+    friend class Iterator;
+  };
+
+
+  /**
+   * Accessor class for non-constant matrices, used in the iterators. This
+   * class builds on the accessor classes used for sparsity patterns to loop
+   * over all nonzero entries, and only adds the accessor functions to gain
+   * access to the actual value stored at a certain location.
+   */
+  template <typename number>
+  class Accessor<number,false> : public ChunkSparsityPatternIterators::Accessor
+  {
+  private:
+    /**
+     * Reference class. This is what the accessor class returns when you call
+     * the value() function. The reference acts just as if it were a reference
+     * to the actual value of a matrix entry, i.e. you can read and write it,
+     * you can add and multiply to it, etc, but since the matrix does not give
+     * away the address of this matrix entry, we have to go through functions
+     * to do all this.
+     *
+     * The constructor takes a pointer to an accessor object that describes
+     * which element of the matrix it points to. This creates an ambiguity
+     * when one writes code like iterator->value()=0 (instead of
+     * iterator->value()=0.0), since the right hand side is an integer that
+     * can both be converted to a <tt>number</tt> (i.e., most commonly a
+     * double) or to another object of type <tt>Reference</tt>. The compiler
+     * then complains about not knowing which conversion to take.
+     *
+     * For some reason, adding another overload operator=(int) doesn't seem to
+     * cure the problem. We avoid it, however, by adding a second, dummy
+     * argument to the Reference constructor, that is unused, but makes sure
+     * there is no second matching conversion sequence using a one-argument
+     * right hand side.
+     */
+    class Reference
+    {
+    public:
+      /**
+       * Constructor. For the second argument, see the general class
+       * documentation.
+       */
+      Reference (const Accessor *accessor,
+                 const bool dummy);
+
+      /**
+       * Conversion operator to the data type of the matrix.
+       */
+      operator number () const;
+
+      /**
+       * Set the element of the matrix we presently point to to @p n.
+       */
+      const Reference &operator = (const number n) const;
+
+      /**
+       * Add @p n to the element of the matrix we presently point to.
+       */
+      const Reference &operator += (const number n) const;
+
+      /**
+       * Subtract @p n from the element of the matrix we presently point to.
+       */
+      const Reference &operator -= (const number n) const;
+
+      /**
+       * Multiply the element of the matrix we presently point to by @p n.
+       */
+      const Reference &operator *= (const number n) const;
+
+      /**
+       * Divide the element of the matrix we presently point to by @p n.
+       */
+      const Reference &operator /= (const number n) const;
+
+    private:
+      /**
+       * Pointer to the accessor that denotes which element we presently point
+       * to.
+       */
+      const Accessor *accessor;
+    };
+
+  public:
+    /**
+     * Typedef for the type (including constness) of the matrix to be used
+     * here.
+     */
+    typedef ChunkSparseMatrix<number> MatrixType;
+
+    /**
+     * Constructor.
+     */
+    Accessor (MatrixType         *matrix,
+              const unsigned int  row);
+
+    /**
+     * Constructor. Construct the end accessor for the given matrix.
+     */
+    Accessor (MatrixType         *matrix);
+
+    /**
+     * Value of this matrix entry, returned as a read- and writable reference.
+     */
+    Reference value() const;
+
+    /**
+     * Return a reference to the matrix into which this accessor points. Note
+     * that in the present case, this is a non-constant reference.
+     */
+    MatrixType &get_matrix () const;
+
+  private:
+    /**
+     * Pointer to the matrix we use.
+     */
+    MatrixType *matrix;
+
+    /**
+     * Make the advance function of the base class available.
+     */
+    using ChunkSparsityPatternIterators::Accessor::advance;
+
+    /**
+     * Make iterator class a friend.
+     */
+    template <typename, bool>
+    friend class Iterator;
+  };
+
+
+
+  /**
+   * Iterator for constant and non-constant matrices.
+   *
+   * The first template argument denotes the underlying numeric type, the
+   * second the constness of the matrix.
+   *
+   * Since there is a specialization of this class for
+   * <tt>Constness=false</tt>, this class is for iterators to constant
+   * matrices.
+   */
+  template <typename number, bool Constness>
+  class Iterator
+  {
+  public:
+    /**
+     * Typedef for the matrix type (including constness) we are to operate on.
+     */
+    typedef
+    typename Accessor<number,Constness>::MatrixType
+    MatrixType;
+
+    /**
+     * A typedef for the type you get when you dereference an iterator of the
+     * current kind.
+     */
+    typedef
+    const Accessor<number,Constness> &value_type;
+
+    /**
+     * Constructor. Create an iterator into the matrix @p matrix for the given
+     * row and the index within it.
+     */
+    Iterator (MatrixType        *matrix,
+              const unsigned int row);
+
+    /**
+     * Constructor. Create the end iterator for the given matrix.
+     */
+    Iterator (MatrixType *matrix);
+
+    /**
+     * Conversion constructor to get from a non-const iterator to a const
+     * iterator.
+     */
+    Iterator (const ChunkSparseMatrixIterators::Iterator<number,false> &i);
+
+    /**
+     * Prefix increment.
+     */
+    Iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    Iterator operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor<number,Constness> &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor<number,Constness> *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Iterator &) const;
+
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const Iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * matrix.
+     */
+    bool operator < (const Iterator &) const;
+
+    /**
+     * Comparison operator. Works in the same way as above operator, just the
+     * other way round.
+     */
+    bool operator > (const Iterator &) const;
+
+    /**
+     * Return the distance between the current iterator and the argument. The
+     * distance is given by how many times one has to apply operator++ to the
+     * current iterator to get the argument (for a positive return value), or
+     * operator-- (for a negative return value).
+     */
+    int operator - (const Iterator &p) const;
+
+    /**
+     * Return an iterator that is @p n ahead of the current one.
+     */
+    Iterator operator + (const unsigned int n) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor<number,Constness> accessor;
+  };
+
+}
+
+
+
+/**
+ * Sparse matrix. This class implements the function to store values in the
+ * locations of a sparse matrix denoted by a SparsityPattern. The separation
+ * of sparsity pattern and values is done since one can store data elements of
+ * different type in these locations without the SparsityPattern having to
+ * know this, and more importantly one can associate more than one matrix with
+ * the same sparsity pattern.
+ *
+ * The use of this class is demonstrated in step-51.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Wolfgang Bangerth, 2008
+ */
+template <typename number>
+class ChunkSparseMatrix : public virtual Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Type of matrix entries. This typedef is analogous to <tt>value_type</tt>
+   * in the standard library containers.
+   */
+  typedef number value_type;
+
+  /**
+   * Declare a type that has holds real-valued numbers with the same precision
+   * as the template argument to this class. If the template argument of this
+   * class is a real data type, then real_type equals the template argument.
+   * If the template argument is a std::complex type then real_type equals the
+   * type underlying the complex numbers.
+   *
+   * This typedef is used to represent the return type of norms.
+   */
+  typedef typename numbers::NumberTraits<number>::real_type real_type;
+
+  /**
+   * Typedef of an iterator class walking over all the nonzero entries of this
+   * matrix. This iterator cannot change the values of the matrix.
+   */
+  typedef
+  ChunkSparseMatrixIterators::Iterator<number,true>
+  const_iterator;
+
+  /**
+   * Typedef of an iterator class walking over all the nonzero entries of this
+   * matrix. This iterator @em can change the values of the matrix, but of
+   * course can't change the sparsity pattern as this is fixed once a sparse
+   * matrix is attached to it.
+   */
+  typedef
+  ChunkSparseMatrixIterators::Iterator<number,false>
+  iterator;
+
+  /**
+   * A structure that describes some of the traits of this class in terms of
+   * its run-time behavior. Some other classes (such as the block matrix
+   * classes) that take one or other of the matrix classes as its template
+   * parameters can tune their behavior based on the variables in this class.
+   */
+  struct Traits
+  {
+    /**
+     * It is safe to elide additions of zeros to individual elements of this
+     * matrix.
+     */
+    static const bool zero_addition_can_be_elided = true;
+  };
+
+  /**
+   * @name Constructors and initialization.
+   */
+//@{
+  /**
+   * Constructor; initializes the matrix to be empty, without any structure,
+   * i.e.  the matrix is not usable at all. This constructor is therefore only
+   * useful for matrices which are members of a class. All other matrices
+   * should be created at a point in the data flow where all necessary
+   * information is available.
+   *
+   * You have to initialize the matrix before usage with reinit(const
+   * ChunkSparsityPattern&).
+   */
+  ChunkSparseMatrix ();
+
+  /**
+   * Copy constructor. This constructor is only allowed to be called if the
+   * matrix to be copied is empty. This is for the same reason as for the
+   * ChunkSparsityPattern, see there for the details.
+   *
+   * If you really want to copy a whole matrix, you can do so by using the
+   * copy_from() function.
+   */
+  ChunkSparseMatrix (const ChunkSparseMatrix &);
+
+  /**
+   * Constructor. Takes the given matrix sparsity structure to represent the
+   * sparsity pattern of this matrix. You can change the sparsity pattern
+   * later on by calling the reinit(const ChunkSparsityPattern&) function.
+   *
+   * You have to make sure that the lifetime of the sparsity structure is at
+   * least as long as that of this matrix or as long as reinit(const
+   * ChunkSparsityPattern&) is not called with a new sparsity pattern.
+   *
+   * The constructor is marked explicit so as to disallow that someone passes
+   * a sparsity pattern in place of a sparse matrix to some function, where an
+   * empty matrix would be generated then.
+   */
+  explicit ChunkSparseMatrix (const ChunkSparsityPattern &sparsity);
+
+  /**
+   * Copy constructor: initialize the matrix with the identity matrix. This
+   * constructor will throw an exception if the sizes of the sparsity pattern
+   * and the identity matrix do not coincide, or if the sparsity pattern does
+   * not provide for nonzero entries on the entire diagonal.
+   */
+  ChunkSparseMatrix (const ChunkSparsityPattern &sparsity,
+                     const IdentityMatrix  &id);
+
+  /**
+   * Destructor. Free all memory, but do not release the memory of the
+   * sparsity structure.
+   */
+  virtual ~ChunkSparseMatrix ();
+
+  /**
+   * Copy operator. Since copying entire sparse matrices is a very expensive
+   * operation, we disallow doing so except for the special case of empty
+   * matrices of size zero. This doesn't seem particularly useful, but is
+   * exactly what one needs if one wanted to have a
+   * <code>std::vector@<ChunkSparseMatrix@<double@> @></code>: in that case,
+   * one can create a vector (which needs the ability to copy objects) of
+   * empty matrices that are then later filled with something useful.
+   */
+  ChunkSparseMatrix<number> &operator = (const ChunkSparseMatrix<number> &);
+
+  /**
+   * Copy operator: initialize the matrix with the identity matrix. This
+   * operator will throw an exception if the sizes of the sparsity pattern and
+   * the identity matrix do not coincide, or if the sparsity pattern does not
+   * provide for nonzero entries on the entire diagonal.
+   */
+  ChunkSparseMatrix<number> &
+  operator= (const IdentityMatrix  &id);
+
+  /**
+   * This operator assigns a scalar to a matrix. Since this does usually not
+   * make much sense (should we set all matrix entries to this value?  Only
+   * the nonzero entries of the sparsity pattern?), this operation is only
+   * allowed if the actual value to be assigned is zero. This operator only
+   * exists to allow for the obvious notation <tt>matrix=0</tt>, which sets
+   * all elements of the matrix to zero, but keep the sparsity pattern
+   * previously used.
+   */
+  ChunkSparseMatrix &operator = (const double d);
+
+  /**
+   * Reinitialize the sparse matrix with the given sparsity pattern. The
+   * latter tells the matrix how many nonzero elements there need to be
+   * reserved.
+   *
+   * Regarding memory allocation, the same applies as said above.
+   *
+   * You have to make sure that the lifetime of the sparsity structure is at
+   * least as long as that of this matrix or as long as reinit(const
+   * ChunkSparsityPattern &) is not called with a new sparsity structure.
+   *
+   * The elements of the matrix are set to zero by this function.
+   */
+  virtual void reinit (const ChunkSparsityPattern &sparsity);
+
+  /**
+   * Release all memory and return to a state just like after having called
+   * the default constructor. It also forgets the sparsity pattern it was
+   * previously tied to.
+   */
+  virtual void clear ();
+//@}
+  /**
+   * @name Information on the matrix
+   */
+//@{
+  /**
+   * Return whether the object is empty. It is empty if either both dimensions
+   * are zero or no ChunkSparsityPattern is associated.
+   */
+  bool empty () const;
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+  /**
+   * Return the number of nonzero elements of this matrix. Actually, it
+   * returns the number of entries in the sparsity pattern; if any of the
+   * entries should happen to be zero, it is counted anyway.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Return the number of actually nonzero elements of this matrix.
+   *
+   * Note, that this function does (in contrary to n_nonzero_elements()) not
+   * count all entries of the sparsity pattern but only the ones that are
+   * nonzero.
+   */
+  size_type n_actually_nonzero_elements () const;
+
+  /**
+   * Return a (constant) reference to the underlying sparsity pattern of this
+   * matrix.
+   *
+   * Though the return value is declared <tt>const</tt>, you should be aware
+   * that it may change if you call any nonconstant function of objects which
+   * operate on it.
+   */
+  const ChunkSparsityPattern &get_sparsity_pattern () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object. See MemoryConsumption.
+   */
+  std::size_t memory_consumption () const;
+
+//@}
+  /**
+   * @name Modifying entries
+   */
+//@{
+  /**
+   * Set the element (<i>i,j</i>) to <tt>value</tt>. Throws an error if the
+   * entry does not exist or if <tt>value</tt> is not a finite number. Still,
+   * it is allowed to store zero values in non-existent fields.
+   */
+  void set (const size_type i,
+            const size_type j,
+            const number value);
+
+  /**
+   * Add <tt>value</tt> to the element (<i>i,j</i>).  Throws an error if the
+   * entry does not exist or if <tt>value</tt> is not a finite number. Still,
+   * it is allowed to store zero values in non-existent fields.
+   */
+  void add (const size_type i,
+            const size_type j,
+            const number value);
+
+  /**
+   * Add an array of values given by <tt>values</tt> in the given global
+   * matrix row at columns specified by col_indices in the sparse matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number2>
+  void add (const size_type  row,
+            const size_type  n_cols,
+            const size_type *col_indices,
+            const number2   *values,
+            const bool       elide_zero_values = true,
+            const bool       col_indices_are_sorted = false);
+
+  /**
+   * Multiply the entire matrix by a fixed factor.
+   */
+  ChunkSparseMatrix &operator *= (const number factor);
+
+  /**
+   * Divide the entire matrix by a fixed factor.
+   */
+  ChunkSparseMatrix &operator /= (const number factor);
+
+  /**
+   * Symmetrize the matrix by forming the mean value between the existing
+   * matrix and its transpose, $A = \frac 12(A+A^T)$.
+   *
+   * This operation assumes that the underlying sparsity pattern represents a
+   * symmetric object. If this is not the case, then the result of this
+   * operation will not be a symmetric matrix, since it only explicitly
+   * symmetrizes by looping over the lower left triangular part for efficiency
+   * reasons; if there are entries in the upper right triangle, then these
+   * elements are missed in the symmetrization. Symmetrization of the sparsity
+   * pattern can be obtain by ChunkSparsityPattern::symmetrize().
+   */
+  void symmetrize ();
+
+  /**
+   * Copy the matrix given as argument into the current object.
+   *
+   * Copying matrices is an expensive operation that we do not want to happen
+   * by accident through compiler generated code for <code>operator=</code>.
+   * (This would happen, for example, if one accidentally declared a function
+   * argument of the current type <i>by value</i> rather than <i>by
+   * reference</i>.) The functionality of copying matrices is implemented in
+   * this member function instead. All copy operations of objects of this type
+   * therefore require an explicit function call.
+   *
+   * The source matrix may be a matrix of arbitrary type, as long as its data
+   * type is convertible to the data type of this matrix.
+   *
+   * The function returns a reference to <tt>*this</tt>.
+   */
+  template <typename somenumber>
+  ChunkSparseMatrix<number> &
+  copy_from (const ChunkSparseMatrix<somenumber> &source);
+
+  /**
+   * This function is complete analogous to the
+   * ChunkSparsityPattern::copy_from() function in that it allows to
+   * initialize a whole matrix in one step. See there for more information on
+   * argument types and their meaning. You can also find a small example on
+   * how to use this function there.
+   *
+   * The only difference to the cited function is that the objects which the
+   * inner iterator points to need to be of type <tt>std::pair<unsigned int,
+   * value</tt>, where <tt>value</tt> needs to be convertible to the element
+   * type of this class, as specified by the <tt>number</tt> template
+   * argument.
+   *
+   * Previous content of the matrix is overwritten. Note that the entries
+   * specified by the input parameters need not necessarily cover all elements
+   * of the matrix. Elements not covered remain untouched.
+   */
+  template <typename ForwardIterator>
+  void copy_from (const ForwardIterator begin,
+                  const ForwardIterator end);
+
+  /**
+   * Copy the nonzero entries of a full matrix into this object. Previous
+   * content is deleted. Note that the underlying sparsity pattern must be
+   * appropriate to hold the nonzero entries of the full matrix.
+   */
+  template <typename somenumber>
+  void copy_from (const FullMatrix<somenumber> &matrix);
+
+  /**
+   * Add <tt>matrix</tt> scaled by <tt>factor</tt> to this matrix, i.e. the
+   * matrix <tt>factor*matrix</tt> is added to <tt>this</tt>. This function
+   * throws an error if the sparsity patterns of the two involved matrices do
+   * not point to the same object, since in this case the operation is
+   * cheaper.
+   *
+   * The source matrix may be a sparse matrix over an arbitrary underlying
+   * scalar type, as long as its data type is convertible to the data type of
+   * this matrix.
+   */
+  template <typename somenumber>
+  void add (const number factor,
+            const ChunkSparseMatrix<somenumber> &matrix);
+
+//@}
+  /**
+   * @name Entry Access
+   */
+//@{
+
+  /**
+   * Return the value of the entry (<i>i,j</i>).  This may be an expensive
+   * operation and you should always take care where to call this function. In
+   * order to avoid abuse, this function throws an exception if the required
+   * element does not exist in the matrix.
+   *
+   * In case you want a function that returns zero instead (for entries that
+   * are not in the sparsity pattern of the matrix), use the el() function.
+   *
+   * If you are looping over all elements, consider using one of the iterator
+   * classes instead, since they are tailored better to a sparse matrix
+   * structure.
+   */
+  number operator () (const size_type i,
+                      const size_type j) const;
+
+  /**
+   * This function is mostly like operator()() in that it returns the value of
+   * the matrix entry (<i>i,j</i>). The only difference is that if this entry
+   * does not exist in the sparsity pattern, then instead of raising an
+   * exception, zero is returned. While this may be convenient in some cases,
+   * note that it is simple to write algorithms that are slow compared to an
+   * optimal solution, since the sparsity of the matrix is not used.
+   *
+   * If you are looping over all elements, consider using one of the iterator
+   * classes instead, since they are tailored better to a sparse matrix
+   * structure.
+   */
+  number el (const size_type i,
+             const size_type j) const;
+
+  /**
+   * Return the main diagonal element in the <i>i</i>th row. This function
+   * throws an error if the matrix is not quadratic.
+   *
+   * This function is considerably faster than the operator()(), since for
+   * quadratic matrices, the diagonal entry may be the first to be stored in
+   * each row and access therefore does not involve searching for the right
+   * column number.
+   */
+  number diag_element (const size_type i) const;
+
+  /**
+   * Same as above, but return a writeable reference. You're sure you know
+   * what you do?
+   */
+  number &diag_element (const size_type i);
+
+  /**
+   * Extracts a copy of the values and indices in the given matrix row.
+   *
+   * The user is expected to pass the length of the arrays column_indices and
+   * values, which gives a means for checking that we do not write to
+   * unallocated memory. This method is motivated by a similar method in
+   * Trilinos row matrices and gives faster access to entries in the matrix as
+   * compared to iterators which are quite slow for this matrix type.
+   */
+  void extract_row_copy (const size_type row,
+                         const size_type array_length,
+                         size_type      &row_length,
+                         size_type      *column_indices,
+                         number         *values) const;
+
+//@}
+  /**
+   * @name Matrix vector multiplications
+   */
+//@{
+  /**
+   * Matrix-vector multiplication: let <i>dst = M*src</i> with <i>M</i> being
+   * this matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockChunkSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <class OutVector, class InVector>
+  void vmult (OutVector &dst,
+              const InVector &src) const;
+
+  /**
+   * Matrix-vector multiplication: let <i>dst = M<sup>T</sup>*src</i> with
+   * <i>M</i> being this matrix. This function does the same as vmult() but
+   * takes the transposed matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockChunkSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <class OutVector, class InVector>
+  void Tvmult (OutVector &dst,
+               const InVector &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add <i>M*src</i> on <i>dst</i> with
+   * <i>M</i> being this matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockChunkSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <class OutVector, class InVector>
+  void vmult_add (OutVector &dst,
+                  const InVector &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add <i>M<sup>T</sup>*src</i> to
+   * <i>dst</i> with <i>M</i> being this matrix. This function does the same
+   * as vmult_add() but takes the transposed matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockChunkSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <class OutVector, class InVector>
+  void Tvmult_add (OutVector &dst,
+                   const InVector &src) const;
+
+  /**
+   * Return the square of the norm of the vector $v$ with respect to the norm
+   * induced by this matrix, i.e. $\left(v,Mv\right)$. This is useful, e.g. in
+   * the finite element context, where the $L_2$ norm of a function equals the
+   * matrix norm with respect to the mass matrix of the vector representing
+   * the nodal values of the finite element function.
+   *
+   * Obviously, the matrix needs to be quadratic for this operation, and for
+   * the result to actually be a norm it also needs to be either real
+   * symmetric or complex hermitian.
+   *
+   * The underlying template types of both this matrix and the given vector
+   * should either both be real or complex-valued, but not mixed, for this
+   * function to make sense.
+   */
+  template <typename somenumber>
+  somenumber matrix_norm_square (const Vector<somenumber> &v) const;
+
+  /**
+   * Compute the matrix scalar product $\left(u,Mv\right)$.
+   */
+  template <typename somenumber>
+  somenumber matrix_scalar_product (const Vector<somenumber> &u,
+                                    const Vector<somenumber> &v) const;
+  /**
+   * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+   * defined to be <i>r=b-Mx</i>. Write the residual into <tt>dst</tt>. The
+   * <i>l<sub>2</sub></i> norm of the residual vector is returned.
+   *
+   * Source <i>x</i> and destination <i>dst</i> must not be the same vector.
+   */
+  template <typename somenumber>
+  somenumber residual (Vector<somenumber>       &dst,
+                       const Vector<somenumber> &x,
+                       const Vector<somenumber> &b) const;
+
+//@}
+  /**
+   * @name Matrix norms
+   */
+//@{
+
+  /**
+   * Return the l1-norm of the matrix, that is $|M|_1=max_{all columns
+   * j}\sum_{all rows i} |M_ij|$, (max. sum of columns).  This is the natural
+   * matrix norm that is compatible to the l1-norm for vectors, i.e.
+   * $|Mv|_1\leq |M|_1 |v|_1$.  (cf. Haemmerlin-Hoffmann : Numerische
+   * Mathematik)
+   */
+  real_type l1_norm () const;
+
+  /**
+   * Return the linfty-norm of the matrix, that is $|M|_infty=max_{all rows
+   * i}\sum_{all columns j} |M_ij|$, (max. sum of rows).  This is the natural
+   * matrix norm that is compatible to the linfty-norm of vectors, i.e.
+   * $|Mv|_infty \leq |M|_infty |v|_infty$.  (cf. Haemmerlin-Hoffmann :
+   * Numerische Mathematik)
+   */
+  real_type linfty_norm () const;
+
+  /**
+   * Return the frobenius norm of the matrix, i.e. the square root of the sum
+   * of squares of all entries in the matrix.
+   */
+  real_type frobenius_norm () const;
+//@}
+  /**
+   * @name Preconditioning methods
+   */
+//@{
+
+  /**
+   * Apply the Jacobi preconditioner, which multiplies every element of the
+   * <tt>src</tt> vector by the inverse of the respective diagonal element and
+   * multiplies the result with the relaxation factor <tt>omega</tt>.
+   */
+  template <typename somenumber>
+  void precondition_Jacobi (Vector<somenumber>       &dst,
+                            const Vector<somenumber> &src,
+                            const number              omega = 1.) const;
+
+  /**
+   * Apply SSOR preconditioning to <tt>src</tt>.
+   */
+  template <typename somenumber>
+  void precondition_SSOR (Vector<somenumber>       &dst,
+                          const Vector<somenumber> &src,
+                          const number              om = 1.) const;
+
+  /**
+   * Apply SOR preconditioning matrix to <tt>src</tt>.
+   */
+  template <typename somenumber>
+  void precondition_SOR (Vector<somenumber>       &dst,
+                         const Vector<somenumber> &src,
+                         const number              om = 1.) const;
+
+  /**
+   * Apply transpose SOR preconditioning matrix to <tt>src</tt>.
+   */
+  template <typename somenumber>
+  void precondition_TSOR (Vector<somenumber>       &dst,
+                          const Vector<somenumber> &src,
+                          const number              om = 1.) const;
+
+  /**
+   * Perform SSOR preconditioning in-place.  Apply the preconditioner matrix
+   * without copying to a second vector.  <tt>omega</tt> is the relaxation
+   * parameter.
+   */
+  template <typename somenumber>
+  void SSOR (Vector<somenumber> &v,
+             const number        omega = 1.) const;
+
+  /**
+   * Perform an SOR preconditioning in-place.  <tt>omega</tt> is the
+   * relaxation parameter.
+   */
+  template <typename somenumber>
+  void SOR (Vector<somenumber> &v,
+            const number        om = 1.) const;
+
+  /**
+   * Perform a transpose SOR preconditioning in-place.  <tt>omega</tt> is the
+   * relaxation parameter.
+   */
+  template <typename somenumber>
+  void TSOR (Vector<somenumber> &v,
+             const number        om = 1.) const;
+
+  /**
+   * Perform a permuted SOR preconditioning in-place.
+   *
+   * The standard SOR method is applied in the order prescribed by
+   * <tt>permutation</tt>, that is, first the row <tt>permutation[0]</tt>,
+   * then <tt>permutation[1]</tt> and so on. For efficiency reasons, the
+   * permutation as well as its inverse are required.
+   *
+   * <tt>omega</tt> is the relaxation parameter.
+   */
+  template <typename somenumber>
+  void PSOR (Vector<somenumber> &v,
+             const std::vector<size_type> &permutation,
+             const std::vector<size_type> &inverse_permutation,
+             const number        om = 1.) const;
+
+  /**
+   * Perform a transposed permuted SOR preconditioning in-place.
+   *
+   * The transposed SOR method is applied in the order prescribed by
+   * <tt>permutation</tt>, that is, first the row <tt>permutation[m()-1]</tt>,
+   * then <tt>permutation[m()-2]</tt> and so on. For efficiency reasons, the
+   * permutation as well as its inverse are required.
+   *
+   * <tt>omega</tt> is the relaxation parameter.
+   */
+  template <typename somenumber>
+  void TPSOR (Vector<somenumber> &v,
+              const std::vector<size_type> &permutation,
+              const std::vector<size_type> &inverse_permutation,
+              const number        om = 1.) const;
+
+  /**
+   * Do one SOR step on <tt>v</tt>.  Performs a direct SOR step with right
+   * hand side <tt>b</tt>.
+   */
+  template <typename somenumber>
+  void SOR_step (Vector<somenumber> &v,
+                 const Vector<somenumber> &b,
+                 const number        om = 1.) const;
+
+  /**
+   * Do one adjoint SOR step on <tt>v</tt>.  Performs a direct TSOR step with
+   * right hand side <tt>b</tt>.
+   */
+  template <typename somenumber>
+  void TSOR_step (Vector<somenumber> &v,
+                  const Vector<somenumber> &b,
+                  const number        om = 1.) const;
+
+  /**
+   * Do one SSOR step on <tt>v</tt>.  Performs a direct SSOR step with right
+   * hand side <tt>b</tt> by performing TSOR after SOR.
+   */
+  template <typename somenumber>
+  void SSOR_step (Vector<somenumber> &v,
+                  const Vector<somenumber> &b,
+                  const number        om = 1.) const;
+//@}
+  /**
+   * @name Iterators
+   */
+//@{
+
+  /**
+   * Iterator starting at first entry of the matrix. This is the version for
+   * constant matrices.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Final iterator. This is the version for constant matrices.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  const_iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of the matrix. This is the version
+   * for non-constant matrices.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  iterator begin ();
+
+  /**
+   * Final iterator. This is the version for non-constant matrices.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  iterator end ();
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>. This is the
+   * version for constant matrices.
+   *
+   * Note that if the given row is empty, i.e. does not contain any nonzero
+   * entries, then the iterator returned by this function equals
+   * <tt>end(r)</tt>. Note also that the iterator may not be dereferencable in
+   * that case.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  const_iterator begin (const unsigned int r) const;
+
+  /**
+   * Final iterator of row <tt>r</tt>. It points to the first element past the
+   * end of line @p r, or past the end of the entire sparsity pattern. This is
+   * the version for constant matrices.
+   *
+   * Note that the end iterator is not necessarily dereferencable. This is in
+   * particular the case if it is the end iterator for the last row of a
+   * matrix.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  const_iterator end (const unsigned int r) const;
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>. This is the
+   * version for non-constant matrices.
+   *
+   * Note that if the given row is empty, i.e. does not contain any nonzero
+   * entries, then the iterator returned by this function equals
+   * <tt>end(r)</tt>. Note also that the iterator may not be dereferencable in
+   * that case.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  iterator begin (const unsigned int r);
+
+  /**
+   * Final iterator of row <tt>r</tt>. It points to the first element past the
+   * end of line @p r, or past the end of the entire sparsity pattern. This is
+   * the version for non-constant matrices.
+   *
+   * Note that the end iterator is not necessarily dereferencable. This is in
+   * particular the case if it is the end iterator for the last row of a
+   * matrix.
+   *
+   * Note that due to the layout in ChunkSparseMatrix, iterating over matrix
+   * entries is considerably slower than for a sparse matrix, as the iterator
+   * is travels row-by-row, whereas data is stored in chunks of several rows
+   * and columns.
+   */
+  iterator end (const unsigned int r);
+//@}
+  /**
+   * @name Input/Output
+   */
+//@{
+
+  /**
+   * Print the matrix to the given stream, using the format <tt>(line,col)
+   * value</tt>, i.e. one nonzero entry of the matrix per line.
+   */
+  void print (std::ostream &out) const;
+
+  /**
+   * Print the matrix in the usual format, i.e. as a matrix and not as a list
+   * of nonzero elements. For better readability, elements not in the matrix
+   * are displayed as empty space, while matrix elements which are explicitly
+   * set to zero are displayed as such.
+   *
+   * The parameters allow for a flexible setting of the output format:
+   * <tt>precision</tt> and <tt>scientific</tt> are used to determine the
+   * number format, where <tt>scientific = false</tt> means fixed point
+   * notation.  A zero entry for <tt>width</tt> makes the function compute a
+   * width, but it may be changed to a positive value, if output is crude.
+   *
+   * Additionally, a character for an empty value may be specified.
+   *
+   * Finally, the whole matrix can be multiplied with a common denominator to
+   * produce more readable output, even integers.
+   *
+   * @attention This function may produce <b>large</b> amounts of output if
+   * applied to a large matrix!
+   */
+  void print_formatted (std::ostream       &out,
+                        const unsigned int  precision   = 3,
+                        const bool          scientific  = true,
+                        const unsigned int  width       = 0,
+                        const char         *zero_string = " ",
+                        const double        denominator = 1.) const;
+
+  /**
+   * Print the actual pattern of the matrix. For each entry with an absolute
+   * value larger than threshold, a '*' is printed, a ':' for every value
+   * smaller and a '.' for every entry not allocated.
+   */
+  void print_pattern(std::ostream &out,
+                     const double threshold = 0.) const;
+
+  /**
+   * Write the data of this object en bloc to a file. This is done in a binary
+   * mode, so the output is neither readable by humans nor (probably) by other
+   * computers using a different operating system or number format.
+   *
+   * The purpose of this function is that you can swap out matrices and
+   * sparsity pattern if you are short of memory, want to communicate between
+   * different programs, or allow objects to be persistent across different
+   * runs of the program.
+   */
+  void block_write (std::ostream &out) const;
+
+  /**
+   * Read data that has previously been written by block_write() from a file.
+   * This is done using the inverse operations to the above function, so it is
+   * reasonably fast because the bitstream is not interpreted except for a few
+   * numbers up front.
+   *
+   * The object is resized on this operation, and all previous contents are
+   * lost. Note, however, that no checks are performed whether new data and
+   * the underlying ChunkSparsityPattern object fit together. It is your
+   * responsibility to make sure that the sparsity pattern and the data to be
+   * read match.
+   *
+   * A primitive form of error checking is performed which will recognize the
+   * bluntest attempts to interpret some data as a matrix stored bitwise to a
+   * file that wasn't actually created that way, but not more.
+   */
+  void block_read (std::istream &in);
+//@}
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidIndex,
+                  int, int,
+                  << "You are trying to access the matrix entry with index <"
+                  << arg1 << ',' << arg2
+                  << ">, but this entry does not exist in the sparsity pattern"
+                  "of this matrix."
+                  "\n\n"
+                  "The most common cause for this problem is that you used "
+                  "a method to build the sparsity pattern that did not "
+                  "(completely) take into account all of the entries you "
+                  "will later try to write into. An example would be "
+                  "building a sparsity pattern that does not include "
+                  "the entries you will write into due to constraints "
+                  "on degrees of freedom such as hanging nodes or periodic "
+                  "boundary conditions. In such cases, building the "
+                  "sparsity pattern will succeed, but you will get errors "
+                  "such as the current one at one point or other when "
+                  "trying to write into the entries of the matrix.");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcDifferentChunkSparsityPatterns);
+  /**
+   * Exception
+   */
+  DeclException2 (ExcIteratorRange,
+                  int, int,
+                  << "The iterators denote a range of " << arg1
+                  << " elements, but the given number of rows was " << arg2);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcSourceEqualsDestination);
+  //@}
+private:
+  /**
+   * Pointer to the sparsity pattern used for this matrix. In order to
+   * guarantee that it is not deleted while still in use, we subscribe to it
+   * using the SmartPointer class.
+   */
+  SmartPointer<const ChunkSparsityPattern,ChunkSparseMatrix<number> > cols;
+
+  /**
+   * Array of values for all the nonzero entries. The position within the
+   * matrix, i.e.  the row and column number for a given entry can only be
+   * deduced using the sparsity pattern. The same holds for the more common
+   * operation of finding an entry by its coordinates.
+   */
+  number *val;
+
+  /**
+   * Allocated size of #val. This can be larger than the actually used part if
+   * the size of the matrix was reduced sometime in the past by associating a
+   * sparsity pattern with a smaller size to this object, using the reinit()
+   * function.
+   */
+  size_type max_len;
+
+  /**
+   * Return the location of entry $(i,j)$ within the val array.
+   */
+  size_type compute_location (const size_type i,
+                              const size_type j) const;
+
+  // make all other sparse matrices friends
+  template <typename somenumber> friend class ChunkSparseMatrix;
+
+  /**
+   * Also give access to internal details to the iterator/accessor classes.
+   */
+  template <typename,bool> friend class ChunkSparseMatrixIterators::Iterator;
+  template <typename,bool> friend class ChunkSparseMatrixIterators::Accessor;
+};
+
+/*@}*/
+
+#ifndef DOXYGEN
+/*---------------------- Inline functions -----------------------------------*/
+
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::size_type
+ChunkSparseMatrix<number>::m () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return cols->rows;
+}
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::size_type
+ChunkSparseMatrix<number>::n () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return cols->cols;
+}
+
+
+
+template <typename number>
+inline
+const ChunkSparsityPattern &
+ChunkSparseMatrix<number>::get_sparsity_pattern () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return *cols;
+}
+
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::size_type
+ChunkSparseMatrix<number>::compute_location (const size_type i,
+                                             const size_type j) const
+{
+  const size_type chunk_size = cols->get_chunk_size();
+  const size_type chunk_index
+    = cols->sparsity_pattern(i/chunk_size, j/chunk_size);
+
+  if (chunk_index == ChunkSparsityPattern::invalid_entry)
+    return ChunkSparsityPattern::invalid_entry;
+  else
+    {
+      return (chunk_index * chunk_size * chunk_size
+              +
+              (i % chunk_size) * chunk_size
+              +
+              (j % chunk_size));
+    }
+}
+
+
+template <typename number>
+inline
+void ChunkSparseMatrix<number>::set (const size_type i,
+                                     const size_type j,
+                                     const number value)
+{
+
+  AssertIsFinite(value);
+
+  Assert (cols != 0, ExcNotInitialized());
+  // it is allowed to set elements of the matrix that are not part of the
+  // sparsity pattern, if the value to which we set it is zero
+  const size_type index = compute_location(i,j);
+  Assert ((index != SparsityPattern::invalid_entry) ||
+          (value == 0.),
+          ExcInvalidIndex(i,j));
+
+  if (index != SparsityPattern::invalid_entry)
+    val[index] = value;
+}
+
+
+
+template <typename number>
+inline
+void ChunkSparseMatrix<number>::add (const size_type i,
+                                     const size_type j,
+                                     const number value)
+{
+
+  AssertIsFinite(value);
+
+  Assert (cols != 0, ExcNotInitialized());
+
+  if (value != 0.)
+    {
+      const size_type index = compute_location(i,j);
+      Assert ((index != ChunkSparsityPattern::invalid_entry),
+              ExcInvalidIndex(i,j));
+
+      val[index] += value;
+    }
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void ChunkSparseMatrix<number>::add (const size_type  row,
+                                     const size_type  n_cols,
+                                     const size_type *col_indices,
+                                     const number2   *values,
+                                     const bool       /*elide_zero_values*/,
+                                     const bool       /*col_indices_are_sorted*/)
+{
+  // TODO: could be done more efficiently...
+  for (size_type col=0; col<n_cols; ++col)
+    add(row, col_indices[col], static_cast<number>(values[col]));
+}
+
+
+
+template <typename number>
+inline
+ChunkSparseMatrix<number> &
+ChunkSparseMatrix<number>::operator *= (const number factor)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  const size_type chunk_size = cols->get_chunk_size();
+
+  // multiply all elements of the matrix with the given factor. this includes
+  // the padding elements in chunks that overlap the boundaries of the actual
+  // matrix -- but since multiplication with a number does not violate the
+  // invariant of keeping these elements at zero nothing can happen
+  number             *val_ptr    = val;
+  const number *const end_ptr    = val +
+                                   cols->sparsity_pattern.n_nonzero_elements()
+                                   *
+                                   chunk_size * chunk_size;
+  while (val_ptr != end_ptr)
+    *val_ptr++ *= factor;
+
+  return *this;
+}
+
+
+
+template <typename number>
+inline
+ChunkSparseMatrix<number> &
+ChunkSparseMatrix<number>::operator /= (const number factor)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (factor !=0, ExcDivideByZero());
+
+  const number factor_inv = 1. / factor;
+
+  const size_type chunk_size = cols->get_chunk_size();
+
+  // multiply all elements of the matrix with the given factor. this includes
+  // the padding elements in chunks that overlap the boundaries of the actual
+  // matrix -- but since multiplication with a number does not violate the
+  // invariant of keeping these elements at zero nothing can happen
+  number             *val_ptr    = val;
+  const number *const end_ptr    = val +
+                                   cols->sparsity_pattern.n_nonzero_elements()
+                                   *
+                                   chunk_size * chunk_size;
+
+  while (val_ptr != end_ptr)
+    *val_ptr++ *= factor_inv;
+
+  return *this;
+}
+
+
+
+template <typename number>
+inline
+number ChunkSparseMatrix<number>::operator () (const size_type i,
+                                               const size_type j) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  AssertThrow (compute_location(i,j) != SparsityPattern::invalid_entry,
+               ExcInvalidIndex(i,j));
+  return val[compute_location(i,j)];
+}
+
+
+
+template <typename number>
+inline
+number ChunkSparseMatrix<number>::el (const size_type i,
+                                      const size_type j) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  const size_type index = compute_location(i,j);
+
+  if (index != ChunkSparsityPattern::invalid_entry)
+    return val[index];
+  else
+    return 0;
+}
+
+
+
+template <typename number>
+inline
+number ChunkSparseMatrix<number>::diag_element (const size_type i) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (m() == n(),  ExcNotQuadratic());
+  AssertIndexRange(i, m());
+
+  // Use that the first element in each row of a quadratic matrix is the main
+  // diagonal of the chunk sparsity pattern
+  const size_type chunk_size = cols->get_chunk_size();
+  return val[cols->sparsity_pattern.rowstart[i/chunk_size]
+             *
+             chunk_size * chunk_size
+             +
+             (i % chunk_size) * chunk_size
+             +
+             (i % chunk_size)];
+}
+
+
+
+template <typename number>
+template <typename ForwardIterator>
+inline
+void
+ChunkSparseMatrix<number>::copy_from (const ForwardIterator begin,
+                                      const ForwardIterator end)
+{
+  Assert (static_cast<size_type >(std::distance (begin, end)) == m(),
+          ExcIteratorRange (std::distance (begin, end), m()));
+
+  // for use in the inner loop, we define a typedef to the type of the inner
+  // iterators
+  typedef typename std::iterator_traits<ForwardIterator>::value_type::const_iterator inner_iterator;
+  size_type row=0;
+  for (ForwardIterator i=begin; i!=end; ++i, ++row)
+    {
+      const inner_iterator end_of_row = i->end();
+      for (inner_iterator j=i->begin(); j!=end_of_row; ++j)
+        // write entries
+        set (row, j->first, j->second);
+    }
+}
+
+
+
+//---------------------------------------------------------------------------
+
+
+namespace ChunkSparseMatrixIterators
+{
+  template <typename number>
+  inline
+  Accessor<number,true>::
+  Accessor (const MatrixType   *matrix,
+            const unsigned int  row)
+    :
+    ChunkSparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern(),
+                                             row),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,true>::
+  Accessor (const MatrixType *matrix)
+    :
+    ChunkSparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern()),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,true>::
+  Accessor (const ChunkSparseMatrixIterators::Accessor<number,false> &a)
+    :
+    ChunkSparsityPatternIterators::Accessor (a),
+    matrix (&a.get_matrix())
+  {}
+
+
+
+  template <typename number>
+  inline
+  number
+  Accessor<number, true>::value () const
+  {
+    const unsigned int chunk_size = matrix->get_sparsity_pattern().get_chunk_size();
+    return matrix->val[reduced_index() * chunk_size * chunk_size
+                       +
+                       chunk_row * chunk_size
+                       +
+                       chunk_col];
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, true>::MatrixType &
+  Accessor<number, true>::get_matrix () const
+  {
+    return *matrix;
+  }
+
+
+
+  template <typename number>
+  inline
+  Accessor<number, false>::Reference::Reference (
+    const Accessor *accessor,
+    const bool)
+    :
+    accessor (accessor)
+  {}
+
+
+  template <typename number>
+  inline
+  Accessor<number, false>::Reference::operator number() const
+  {
+    const unsigned int chunk_size = accessor->matrix->get_sparsity_pattern().get_chunk_size();
+    return accessor->matrix->val[accessor->reduced_index() * chunk_size * chunk_size
+                                 +
+                                 accessor->chunk_row * chunk_size
+                                 +
+                                 accessor->chunk_col];
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator = (const number n) const
+  {
+    const unsigned int chunk_size = accessor->matrix->get_sparsity_pattern().get_chunk_size();
+    accessor->matrix->val[accessor->reduced_index() * chunk_size * chunk_size
+                          +
+                          accessor->chunk_row * chunk_size
+                          +
+                          accessor->chunk_col] = n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator += (const number n) const
+  {
+    const unsigned int chunk_size = accessor->matrix->get_sparsity_pattern().get_chunk_size();
+    accessor->matrix->val[accessor->reduced_index() * chunk_size * chunk_size
+                          +
+                          accessor->chunk_row * chunk_size
+                          +
+                          accessor->chunk_col] += n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator -= (const number n) const
+  {
+    const unsigned int chunk_size = accessor->matrix->get_sparsity_pattern().get_chunk_size();
+    accessor->matrix->val[accessor->reduced_index() * chunk_size * chunk_size
+                          +
+                          accessor->chunk_row * chunk_size
+                          +
+                          accessor->chunk_col] -= n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator *= (const number n) const
+  {
+    const unsigned int chunk_size = accessor->matrix->get_sparsity_pattern().get_chunk_size();
+    accessor->matrix->val[accessor->reduced_index() * chunk_size * chunk_size
+                          +
+                          accessor->chunk_row * chunk_size
+                          +
+                          accessor->chunk_col] *= n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator /= (const number n) const
+  {
+    const unsigned int chunk_size = accessor->matrix->get_sparsity_pattern().get_chunk_size();
+    accessor->matrix->val[accessor->reduced_index() * chunk_size * chunk_size
+                          +
+                          accessor->chunk_row * chunk_size
+                          +
+                          accessor->chunk_col] /= n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,false>::
+  Accessor (MatrixType         *matrix,
+            const unsigned int  row)
+    :
+    ChunkSparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern(),
+                                             row),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,false>::
+  Accessor (MatrixType         *matrix)
+    :
+    ChunkSparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern()),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  typename Accessor<number, false>::Reference
+  Accessor<number, false>::value() const
+  {
+    return Reference(this,true);
+  }
+
+
+
+
+  template <typename number>
+  inline
+  typename Accessor<number, false>::MatrixType &
+  Accessor<number, false>::get_matrix () const
+  {
+    return *matrix;
+  }
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness>::
+  Iterator (MatrixType        *matrix,
+            const unsigned int row)
+    :
+    accessor(matrix, row)
+  {}
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness>::
+  Iterator (MatrixType *matrix)
+    :
+    accessor(matrix)
+  {}
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness>::
+  Iterator (const ChunkSparseMatrixIterators::Iterator<number,false> &i)
+    :
+    accessor(*i)
+  {}
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness> &
+  Iterator<number,Constness>::operator++ ()
+  {
+    accessor.advance ();
+    return *this;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number,Constness>
+  Iterator<number,Constness>::operator++ (int)
+  {
+    const Iterator iter = *this;
+    accessor.advance ();
+    return iter;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  const Accessor<number,Constness> &
+  Iterator<number,Constness>::operator* () const
+  {
+    return accessor;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  const Accessor<number,Constness> *
+  Iterator<number,Constness>::operator-> () const
+  {
+    return &accessor;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator == (const Iterator &other) const
+  {
+    return (accessor == other.accessor);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator != (const Iterator &other) const
+  {
+    return ! (*this == other);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator < (const Iterator &other) const
+  {
+    Assert (&accessor.get_matrix() == &other.accessor.get_matrix(),
+            ExcInternalError());
+
+    return (accessor < other.accessor);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator > (const Iterator &other) const
+  {
+    return (other < *this);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  int
+  Iterator<number,Constness>::
+  operator - (const Iterator &other) const
+  {
+    Assert (&accessor.get_matrix() == &other.accessor.get_matrix(),
+            ExcInternalError());
+
+    // TODO: can be optimized
+    int difference = 0;
+    if (*this < other)
+      {
+        Iterator copy = *this;
+        while (copy != other)
+          {
+            ++copy;
+            --difference;
+          }
+      }
+    else
+      {
+        Iterator copy = other;
+        while (copy != *this)
+          {
+            ++copy;
+            ++difference;
+          }
+      }
+    return difference;
+  }
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number,Constness>
+  Iterator<number,Constness>::
+  operator + (const unsigned int n) const
+  {
+    Iterator x = *this;
+    for (unsigned int i=0; i<n; ++i)
+      ++x;
+
+    return x;
+  }
+
+}
+
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::const_iterator
+ChunkSparseMatrix<number>::begin () const
+{
+  return const_iterator(this, 0);
+}
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::const_iterator
+ChunkSparseMatrix<number>::end () const
+{
+  return const_iterator(this);
+}
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::iterator
+ChunkSparseMatrix<number>::begin ()
+{
+  return iterator(this, 0);
+}
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::iterator
+ChunkSparseMatrix<number>::end ()
+{
+  return iterator(this);
+}
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::const_iterator
+ChunkSparseMatrix<number>::begin (const unsigned int r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return const_iterator(this, r);
+}
+
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::const_iterator
+ChunkSparseMatrix<number>::end (const unsigned int r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return const_iterator(this, r+1);
+}
+
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::iterator
+ChunkSparseMatrix<number>::begin (const unsigned int r)
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return iterator(this, r);
+}
+
+
+
+template <typename number>
+inline
+typename ChunkSparseMatrix<number>::iterator
+ChunkSparseMatrix<number>::end (const unsigned int r)
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return iterator(this, r+1);
+}
+
+
+
+
+#endif // DOXYGEN
+
+
+/*----------------------------   chunk_sparse_matrix.h     ---------------------------*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+/*----------------------------   chunk_sparse_matrix.h     ---------------------------*/
diff --git a/include/deal.II/lac/chunk_sparse_matrix.templates.h b/include/deal.II/lac/chunk_sparse_matrix.templates.h
new file mode 100644
index 0000000..b0f6214
--- /dev/null
+++ b/include/deal.II/lac/chunk_sparse_matrix.templates.h
@@ -0,0 +1,1639 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__chunk_sparse_matrix_templates_h
+#define dealii__chunk_sparse_matrix_templates_h
+
+
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/parallel.h>
+#include <deal.II/lac/chunk_sparse_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+
+
+#include <ostream>
+#include <iomanip>
+#include <algorithm>
+#include <functional>
+#include <cmath>
+
+#include <vector>
+#include <numeric>
+
+#include <deal.II/base/thread_management.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+//TODO: the goal of the ChunkSparseMatrix class is to stream data and use
+// the vectorization features of modern processors. to make this happen,
+// we will have to vectorize the functions in the following namespace, either
+// by hand or by using, for example, optimized BLAS versions for them.
+  namespace ChunkSparseMatrix
+  {
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    /**
+     * Add the result of multiplying a chunk of size chunk_size times
+     * chunk_size by a source vector fragment of size chunk_size to the
+     * destination vector fragment.
+     */
+    template <typename MatrixIterator,
+              typename SrcIterator,
+              typename DstIterator>
+    inline
+    void
+    chunk_vmult_add (const size_type      chunk_size,
+                     const MatrixIterator matrix,
+                     const SrcIterator    src,
+                     DstIterator          dst)
+    {
+      MatrixIterator matrix_row = matrix;
+
+      for (size_type i=0; i<chunk_size;
+           ++i,  matrix_row += chunk_size)
+        {
+          typename std::iterator_traits<DstIterator>::value_type
+          sum = 0;
+
+          for (size_type j=0; j<chunk_size; ++j)
+            sum += matrix_row[j] * src[j];
+
+          dst[i] += sum;
+        }
+    }
+
+
+
+    /**
+     * Like the previous function, but subtract. We need this for computing
+     * the residual.
+     */
+    template <typename MatrixIterator,
+              typename SrcIterator,
+              typename DstIterator>
+    inline
+    void
+    chunk_vmult_subtract (const size_type chunk_size,
+                          const MatrixIterator matrix,
+                          const SrcIterator    src,
+                          DstIterator          dst)
+    {
+      MatrixIterator matrix_row = matrix;
+
+      for (size_type i=0; i<chunk_size;
+           ++i,  matrix_row += chunk_size)
+        {
+          typename std::iterator_traits<DstIterator>::value_type
+          sum = 0;
+
+          for (size_type j=0; j<chunk_size; ++j)
+            sum += matrix_row[j] * src[j];
+
+          dst[i] -= sum;
+        }
+    }
+
+
+    /**
+     * Add the result of multiplying the transpose of a chunk of size
+     * chunk_size times chunk_size by a source vector fragment of size
+     * chunk_size to the destination vector fragment.
+     */
+    template <typename MatrixIterator,
+              typename SrcIterator,
+              typename DstIterator>
+    inline
+    void
+    chunk_Tvmult_add (const size_type      chunk_size,
+                      const MatrixIterator matrix,
+                      const SrcIterator    src,
+                      DstIterator          dst)
+    {
+      for (size_type i=0; i<chunk_size; ++i)
+        {
+          typename std::iterator_traits<DstIterator>::value_type
+          sum = 0;
+
+          for (size_type j=0; j<chunk_size; ++j)
+            sum += matrix[j*chunk_size+i] * src[j];
+
+          dst[i] += sum;
+        }
+    }
+
+
+    /**
+     * Produce the result of the matrix scalar product $u^TMv$ for an
+     * individual chunk.
+     */
+    template <typename result_type,
+              typename MatrixIterator,
+              typename SrcIterator1,
+              typename SrcIterator2>
+    inline
+    result_type
+    chunk_matrix_scalar_product (const size_type      chunk_size,
+                                 const MatrixIterator matrix,
+                                 const SrcIterator1   u,
+                                 const SrcIterator2   v)
+    {
+      result_type result = 0;
+
+      MatrixIterator matrix_row = matrix;
+
+      for (size_type i=0; i<chunk_size;
+           ++i,  matrix_row += chunk_size)
+        {
+          typename std::iterator_traits<SrcIterator2>::value_type
+          sum = 0;
+
+          for (size_type j=0; j<chunk_size; ++j)
+            sum += matrix_row[j] * v[j];
+
+          result += u[i] * sum;
+        }
+
+      return result;
+    }
+
+
+
+    /**
+     * Perform a vmult_add using the ChunkSparseMatrix data structures, but
+     * only using a subinterval of the matrix rows.
+     *
+     * In the sequential case, this function is called on all rows, in the
+     * parallel case it may be called on a subrange, at the discretion of the
+     * task scheduler.
+     */
+    template <typename number,
+              typename InVector,
+              typename OutVector>
+    void vmult_add_on_subrange (const ChunkSparsityPattern &cols,
+                                const unsigned int  begin_row,
+                                const unsigned int  end_row,
+                                const number       *values,
+                                const std::size_t  *rowstart,
+                                const size_type    *colnums,
+                                const InVector     &src,
+                                OutVector          &dst)
+    {
+      const size_type m = cols.n_rows();
+      const size_type n = cols.n_cols();
+      const size_type chunk_size = cols.get_chunk_size();
+
+      // loop over all chunks. note that we need to treat the last chunk row
+      // and column differently if they have padding elements
+      const size_type n_filled_last_rows = m % chunk_size;
+      const size_type n_filled_last_cols = n % chunk_size;
+
+      const size_type last_regular_row = n_filled_last_rows > 0 ?
+                                         std::min(m/chunk_size,
+                                                  static_cast<size_type>(end_row)) :
+                                         end_row;
+      const size_type irregular_col = n/chunk_size;
+
+      typename OutVector::iterator dst_ptr = dst.begin()+chunk_size*begin_row;
+      const number *val_ptr= &values[rowstart[begin_row]*chunk_size*chunk_size];
+      const size_type *colnum_ptr = &colnums[rowstart[begin_row]];
+      for (unsigned int chunk_row=begin_row; chunk_row<last_regular_row;
+           ++chunk_row)
+        {
+          const number *const val_end_of_row = &values[rowstart[chunk_row+1] *
+                                                       chunk_size * chunk_size];
+          while (val_ptr != val_end_of_row)
+            {
+              if (*colnum_ptr != irregular_col)
+                chunk_vmult_add (chunk_size,
+                                 val_ptr,
+                                 src.begin() + *colnum_ptr * chunk_size,
+                                 dst_ptr);
+              else
+                // we're at a chunk column that has padding
+                for (size_type r=0; r<chunk_size; ++r)
+                  for (size_type c=0; c<n_filled_last_cols; ++c)
+                    dst_ptr[r] += (val_ptr[r*chunk_size + c] *
+                                   src(*colnum_ptr * chunk_size + c));
+
+              ++colnum_ptr;
+              val_ptr += chunk_size * chunk_size;
+            }
+
+          dst_ptr += chunk_size;
+        }
+
+      // now deal with last chunk row if necessary
+      if (n_filled_last_rows > 0 && end_row == (m/chunk_size+1))
+        {
+          const size_type chunk_row = last_regular_row;
+
+          const number *const val_end_of_row = &values[rowstart[chunk_row+1] *
+                                                       chunk_size * chunk_size];
+          while (val_ptr != val_end_of_row)
+            {
+              if (*colnum_ptr != irregular_col)
+                {
+                  // we're at a chunk row but not column that has padding
+                  for (size_type r=0; r<n_filled_last_rows; ++r)
+                    for (size_type c=0; c<chunk_size; ++c)
+                      dst_ptr[r]
+                      += (val_ptr[r*chunk_size + c] *
+                          src(*colnum_ptr * chunk_size + c));
+                }
+              else
+                // we're at a chunk row and column that has padding
+                for (size_type r=0; r<n_filled_last_rows; ++r)
+                  for (size_type c=0; c<n_filled_last_cols; ++c)
+                    dst_ptr[r]
+                    += (val_ptr[r*chunk_size + c] *
+                        src(*colnum_ptr * chunk_size + c));
+
+              ++colnum_ptr;
+              val_ptr += chunk_size * chunk_size;
+            }
+        }
+      Assert(std::size_t(colnum_ptr-&colnums[0]) == rowstart[end_row],
+             ExcInternalError());
+      Assert(std::size_t(val_ptr-&values[0]) ==
+             rowstart[end_row] * chunk_size * chunk_size,
+             ExcInternalError());
+    }
+  }
+}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number>::ChunkSparseMatrix ()
+  :
+  cols(0, "ChunkSparseMatrix"),
+  val(0),
+  max_len(0)
+{}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number>::ChunkSparseMatrix (const ChunkSparseMatrix &m)
+  :
+  Subscriptor (m),
+  cols(0, "ChunkSparseMatrix"),
+  val(0),
+  max_len(0)
+{
+  Assert (m.cols==0, ExcInvalidConstructorCall());
+  Assert (m.val==0, ExcInvalidConstructorCall());
+  Assert (m.max_len==0, ExcInvalidConstructorCall());
+}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number> &
+ChunkSparseMatrix<number>::operator = (const ChunkSparseMatrix<number> &m)
+{
+  (void)m;
+  Assert (m.cols==0, ExcInvalidConstructorCall());
+  Assert (m.val==0, ExcInvalidConstructorCall());
+  Assert (m.max_len==0, ExcInvalidConstructorCall());
+
+  return *this;
+}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number>::ChunkSparseMatrix (const ChunkSparsityPattern &c)
+  :
+  cols(0, "ChunkSparseMatrix"),
+  val(0),
+  max_len(0)
+{
+  reinit (c);
+}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number>::ChunkSparseMatrix (const ChunkSparsityPattern &c,
+                                              const IdentityMatrix  &id)
+  :
+  cols(0, "ChunkSparseMatrix"),
+  val(0),
+  max_len(0)
+{
+  (void)id;
+  Assert (c.n_rows() == id.m(), ExcDimensionMismatch (c.n_rows(), id.m()));
+  Assert (c.n_cols() == id.n(), ExcDimensionMismatch (c.n_cols(), id.n()));
+
+  reinit (c);
+  for (size_type i=0; i<n(); ++i)
+    this->set(i,i,1.);
+}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number>::~ChunkSparseMatrix ()
+{
+  cols = 0;
+
+  if (val != 0)
+    delete[] val;
+}
+
+
+
+namespace internal
+{
+  namespace ChunkSparseMatrix
+  {
+    template<typename T>
+    void zero_subrange (const unsigned int begin,
+                        const unsigned int end,
+                        T *dst)
+    {
+      std::memset (dst+begin,0,(end-begin)*sizeof(T));
+    }
+  }
+}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number> &
+ChunkSparseMatrix<number>::operator = (const double d)
+{
+  (void)d;
+  Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (cols->sparsity_pattern.compressed || cols->empty(),
+          ChunkSparsityPattern::ExcNotCompressed());
+
+  // do initial zeroing of elements in parallel. Try to achieve a similar
+  // layout as when doing matrix-vector products, as on some NUMA systems, a
+  // memory block is assigned to memory banks where the first access is
+  // generated. For sparse matrices, the first operations is usually the
+  // operator=. The grain size is chosen to reflect the number of rows in
+  // minimum_parallel_grain_size, weighted by the number of nonzero entries
+  // per row on average.
+  const unsigned int matrix_size = cols->sparsity_pattern.n_nonzero_elements()
+                                   * cols->chunk_size * cols->chunk_size;
+  const unsigned int grain_size =
+    internal::SparseMatrix::minimum_parallel_grain_size *
+    (matrix_size+m()) / m();
+  if (matrix_size>grain_size)
+    parallel::apply_to_subranges (0U, matrix_size,
+                                  std_cxx11::bind(&internal::ChunkSparseMatrix::template
+                                                  zero_subrange<number>,
+                                                  std_cxx11::_1, std_cxx11::_2,
+                                                  val),
+                                  grain_size);
+  else if (matrix_size > 0)
+    std::memset (&val[0], 0, matrix_size*sizeof(number));
+
+  return *this;
+}
+
+
+
+template <typename number>
+ChunkSparseMatrix<number> &
+ChunkSparseMatrix<number>::operator= (const IdentityMatrix  &id)
+{
+  (void)id;
+  Assert (cols->n_rows() == id.m(),
+          ExcDimensionMismatch (cols->n_rows(), id.m()));
+  Assert (cols->n_cols() == id.n(),
+          ExcDimensionMismatch (cols->n_cols(), id.n()));
+
+  *this = 0;
+  for (size_type i=0; i<n(); ++i)
+    this->set(i,i,1.);
+
+  return *this;
+}
+
+
+
+template <typename number>
+void
+ChunkSparseMatrix<number>::reinit (const ChunkSparsityPattern &sparsity)
+{
+  cols = &sparsity;
+
+  if (cols->empty())
+    {
+      if (val != 0)
+        delete[] val;
+      val = 0;
+      max_len = 0;
+      return;
+    }
+
+  // allocate not just m() * n() elements but enough so that we can store full
+  // chunks. this entails some padding elements
+  const size_type chunk_size = cols->get_chunk_size();
+  const size_type N = cols->sparsity_pattern.n_nonzero_elements() *
+                      chunk_size * chunk_size;
+  if (N > max_len || max_len == 0)
+    {
+      if (val != 0)
+        delete[] val;
+      val = new number[N];
+      max_len = N;
+    }
+
+  // fill with zeros. do not just fill N elements but all that we allocated to
+  // ensure that also the padding elements are zero and not left at previous
+  // values
+  this->operator=(0.);
+}
+
+
+
+template <typename number>
+void
+ChunkSparseMatrix<number>::clear ()
+{
+  cols = 0;
+  if (val) delete[] val;
+  val = 0;
+  max_len = 0;
+}
+
+
+
+template <typename number>
+bool
+ChunkSparseMatrix<number>::empty () const
+{
+  if (cols == 0)
+    return true;
+  else
+    return cols->empty();
+}
+
+
+
+template <typename number>
+typename ChunkSparseMatrix<number>::size_type
+ChunkSparseMatrix<number>::n_nonzero_elements () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return cols->n_nonzero_elements ();
+}
+
+
+
+template <typename number>
+typename ChunkSparseMatrix<number>::size_type
+ChunkSparseMatrix<number>::n_actually_nonzero_elements () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+
+  // count those elements that are nonzero, even if they lie in the padding
+  // around the matrix. since we have the invariant that padding elements are
+  // zero, nothing bad can happen here
+  const size_type chunk_size = cols->get_chunk_size();
+  return std::count_if(&val[0],
+                       &val[cols->sparsity_pattern.n_nonzero_elements () *
+                            chunk_size * chunk_size],
+                       std::bind2nd(std::not_equal_to<double>(), 0));
+}
+
+
+
+template <typename number>
+void
+ChunkSparseMatrix<number>::symmetrize ()
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (cols->rows == cols->cols, ExcNotQuadratic());
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+ChunkSparseMatrix<number> &
+ChunkSparseMatrix<number>::copy_from (const ChunkSparseMatrix<somenumber> &matrix)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (cols == matrix.cols, ExcDifferentChunkSparsityPatterns());
+
+  // copy everything, including padding elements
+  const size_type chunk_size = cols->get_chunk_size();
+  std::copy (&matrix.val[0],
+             &matrix.val[cols->sparsity_pattern.n_nonzero_elements()
+                         * chunk_size * chunk_size],
+             &val[0]);
+
+  return *this;
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::copy_from (const FullMatrix<somenumber> &matrix)
+{
+  // first delete previous content
+  *this = 0;
+
+  // then copy old matrix
+  for (size_type row=0; row<matrix.m(); ++row)
+    for (size_type col=0; col<matrix.n(); ++col)
+      if (matrix(row,col) != 0)
+        set (row, col, matrix(row,col));
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::add (const number factor,
+                                const ChunkSparseMatrix<somenumber> &matrix)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (cols == matrix.cols, ExcDifferentChunkSparsityPatterns());
+
+  // add everything, including padding elements
+  const size_type     chunk_size = cols->get_chunk_size();
+  number             *val_ptr    = &val[0];
+  const somenumber   *matrix_ptr = &matrix.val[0];
+  const number *const end_ptr    = &val[cols->sparsity_pattern.n_nonzero_elements()
+                                        * chunk_size * chunk_size];
+
+  while (val_ptr != end_ptr)
+    *val_ptr++ += factor **matrix_ptr++;
+}
+
+
+
+template <typename number>
+void
+ChunkSparseMatrix<number>::extract_row_copy (const size_type row,
+                                             const size_type array_length,
+                                             size_type &row_length,
+                                             size_type *column_indices,
+                                             number *values) const
+{
+  (void)array_length;
+  AssertIndexRange(cols->row_length(row), array_length+1);
+  AssertIndexRange(row, m());
+  const unsigned int chunk_size = cols->get_chunk_size();
+  const size_type reduced_row = row/chunk_size;
+
+  SparsityPattern::iterator it = cols->sparsity_pattern.begin(reduced_row),
+                            itend = cols->sparsity_pattern.end(reduced_row);
+  const number *val_ptr = &val[(it-cols->sparsity_pattern.begin(0))*chunk_size*chunk_size
+                               +(row%chunk_size)*chunk_size];
+
+  // find out if we did padding and if this row is affected by it
+  if (cols->n_cols() % chunk_size == 0)
+    {
+      for ( ; it < itend; ++it)
+        {
+          for (unsigned int c=0; c<chunk_size; ++c)
+            {
+              *values++ = val_ptr[c];
+              *column_indices++ = it->column()*chunk_size+c;
+            }
+          val_ptr += chunk_size*chunk_size;
+        }
+      row_length = chunk_size * (cols->sparsity_pattern.row_length(reduced_row));
+    }
+  else
+    {
+      const unsigned int last_chunk_size = cols->n_cols() % chunk_size;
+      row_length = 0;
+      for ( ; it < itend; ++it)
+        {
+          const unsigned int next_chunk_size =
+            (it->column()==cols->sparsity_pattern.n_cols()-1) ?
+            last_chunk_size : chunk_size;
+          for (unsigned int c=0; c<next_chunk_size; ++c, ++row_length)
+            {
+              *values++ = val_ptr[c];
+              *column_indices++ = it->column()*chunk_size+c;
+            }
+          val_ptr += chunk_size*chunk_size;
+        }
+    }
+}
+
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+ChunkSparseMatrix<number>::vmult (OutVector &dst,
+                                  const InVector &src) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  // set the output vector to zero and then add to it the contributions of
+  // vmults from individual chunks. this is what vmult_add does
+  dst = 0;
+  vmult_add (dst, src);
+}
+
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+ChunkSparseMatrix<number>::Tvmult (OutVector &dst,
+                                   const InVector &src) const
+{
+  Assert (val != 0, ExcNotInitialized());
+  Assert (cols != 0, ExcNotInitialized());
+  Assert(n() == dst.size(), ExcDimensionMismatch(n(),dst.size()));
+  Assert(m() == src.size(), ExcDimensionMismatch(m(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  // set the output vector to zero and then add to it the contributions of
+  // vmults from individual chunks. this is what vmult_add does
+  dst = 0;
+  Tvmult_add (dst, src);
+}
+
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+ChunkSparseMatrix<number>::vmult_add (OutVector &dst,
+                                      const InVector &src) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+  parallel::apply_to_subranges (0U, cols->sparsity_pattern.n_rows(),
+                                std_cxx11::bind (&internal::ChunkSparseMatrix::vmult_add_on_subrange
+                                                 <number,InVector,OutVector>,
+                                                 std_cxx11::cref(*cols),
+                                                 std_cxx11::_1, std_cxx11::_2,
+                                                 val,
+                                                 cols->sparsity_pattern.rowstart,
+                                                 cols->sparsity_pattern.colnums,
+                                                 std_cxx11::cref(src),
+                                                 std_cxx11::ref(dst)),
+                                internal::SparseMatrix::minimum_parallel_grain_size/cols->chunk_size+1);
+
+}
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+ChunkSparseMatrix<number>::Tvmult_add (OutVector &dst,
+                                       const InVector &src) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  const size_type n_chunk_rows = cols->sparsity_pattern.n_rows();
+
+  // loop over all chunks. note that we need to treat the last chunk row and
+  // column differently if they have padding elements
+  const bool rows_have_padding = (m() % cols->chunk_size != 0),
+             cols_have_padding = (n() % cols->chunk_size != 0);
+
+  const size_type n_regular_chunk_rows
+    = (rows_have_padding ?
+       n_chunk_rows-1 :
+       n_chunk_rows);
+
+  // like in vmult_add, but don't keep an iterator into dst around since we're
+  // not traversing it sequentially this time
+  const number    *val_ptr    = val;
+  const size_type *colnum_ptr = cols->sparsity_pattern.colnums;
+
+  for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
+    {
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            internal::ChunkSparseMatrix::chunk_Tvmult_add
+            (cols->chunk_size,
+             val_ptr,
+             src.begin() + chunk_row * cols->chunk_size,
+             dst.begin() + *colnum_ptr * cols->chunk_size);
+          else
+            // we're at a chunk column that has padding
+            for (size_type r=0; r<cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                dst(*colnum_ptr * cols->chunk_size + c)
+                += (val_ptr[r*cols->chunk_size + c] *
+                    src(chunk_row * cols->chunk_size + r));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+    }
+
+  // now deal with last chunk row if necessary
+  if (rows_have_padding)
+    {
+      const size_type chunk_row = n_chunk_rows - 1;
+
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            {
+              // we're at a chunk row but not column that has padding
+              for (size_type r=0; r<m() % cols->chunk_size; ++r)
+                for (size_type c=0; c<cols->chunk_size; ++c)
+                  dst(*colnum_ptr * cols->chunk_size + c)
+                  += (val_ptr[r*cols->chunk_size + c] *
+                      src(chunk_row * cols->chunk_size + r));
+            }
+          else
+            // we're at a chunk row and column that has padding
+            for (size_type r=0; r<m() % cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                dst(*colnum_ptr * cols->chunk_size + c)
+                += (val_ptr[r*cols->chunk_size + c] *
+                    src(chunk_row * cols->chunk_size + r));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+    }
+}
+
+
+template <typename number>
+template <typename somenumber>
+somenumber
+ChunkSparseMatrix<number>::matrix_norm_square (const Vector<somenumber> &v) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert(n() == v.size(), ExcDimensionMismatch(n(),v.size()));
+
+  somenumber result = 0;
+
+  ////////////////
+  // like matrix_scalar_product, except that the two vectors are now the same
+
+  const size_type n_chunk_rows = cols->sparsity_pattern.n_rows();
+
+  // loop over all chunks. note that we need to treat the last chunk row and
+  // column differently if they have padding elements
+  const bool rows_have_padding = (m() % cols->chunk_size != 0),
+             cols_have_padding = (n() % cols->chunk_size != 0);
+
+  const size_type n_regular_chunk_rows
+    = (rows_have_padding ?
+       n_chunk_rows-1 :
+       n_chunk_rows);
+
+  const number    *val_ptr    = val;
+  const size_type *colnum_ptr = cols->sparsity_pattern.colnums;
+  typename Vector<somenumber>::const_iterator v_ptr = v.begin();
+
+  for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
+    {
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            result +=
+              internal::ChunkSparseMatrix::
+              chunk_matrix_scalar_product<somenumber>
+              (cols->chunk_size,
+               val_ptr,
+               v_ptr,
+               v.begin() + *colnum_ptr * cols->chunk_size);
+          else
+            // we're at a chunk column that has padding
+            for (size_type r=0; r<cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                result
+                +=
+                  v(chunk_row * cols->chunk_size + r)
+                  * (val_ptr[r*cols->chunk_size + c] *
+                     v(*colnum_ptr * cols->chunk_size + c));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+
+
+      v_ptr += cols->chunk_size;
+    }
+
+  // now deal with last chunk row if necessary
+  if (rows_have_padding)
+    {
+      const size_type chunk_row = n_chunk_rows - 1;
+
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            {
+              // we're at a chunk row but not column that has padding
+              for (size_type r=0; r<m() % cols->chunk_size; ++r)
+                for (size_type c=0; c<cols->chunk_size; ++c)
+                  result
+                  +=
+                    v(chunk_row * cols->chunk_size + r)
+                    * (val_ptr[r*cols->chunk_size + c] *
+                       v(*colnum_ptr * cols->chunk_size + c));
+            }
+          else
+            // we're at a chunk row and column that has padding
+            for (size_type r=0; r<m() % cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                result
+                +=
+                  v(chunk_row * cols->chunk_size + r)
+                  * (val_ptr[r*cols->chunk_size + c] *
+                     v(*colnum_ptr * cols->chunk_size + c));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+    }
+
+  return result;
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+somenumber
+ChunkSparseMatrix<number>::matrix_scalar_product (const Vector<somenumber> &u,
+                                                  const Vector<somenumber> &v) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == u.size(), ExcDimensionMismatch(m(),u.size()));
+  Assert(n() == v.size(), ExcDimensionMismatch(n(),v.size()));
+
+  // the following works like the vmult_add function
+  somenumber result = 0;
+
+  const size_type n_chunk_rows = cols->sparsity_pattern.n_rows();
+
+  // loop over all chunks. note that we need to treat the last chunk row and
+  // column differently if they have padding elements
+  const bool rows_have_padding = (m() % cols->chunk_size != 0),
+             cols_have_padding = (n() % cols->chunk_size != 0);
+
+  const size_type n_regular_chunk_rows
+    = (rows_have_padding ?
+       n_chunk_rows-1 :
+       n_chunk_rows);
+
+  const number    *val_ptr    = val;
+  const size_type *colnum_ptr = cols->sparsity_pattern.colnums;
+  typename Vector<somenumber>::const_iterator u_ptr = u.begin();
+
+  for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
+    {
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            result +=
+              internal::ChunkSparseMatrix::
+              chunk_matrix_scalar_product<somenumber>
+              (cols->chunk_size,
+               val_ptr,
+               u_ptr,
+               v.begin() + *colnum_ptr * cols->chunk_size);
+          else
+            // we're at a chunk column that has padding
+            for (size_type r=0; r<cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                result
+                +=
+                  u(chunk_row * cols->chunk_size + r)
+                  * (val_ptr[r*cols->chunk_size + c] *
+                     v(*colnum_ptr * cols->chunk_size + c));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+
+
+      u_ptr += cols->chunk_size;
+    }
+
+  // now deal with last chunk row if necessary
+  if (rows_have_padding)
+    {
+      const size_type chunk_row = n_chunk_rows - 1;
+
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            {
+              // we're at a chunk row but not column that has padding
+              for (size_type r=0; r<m() % cols->chunk_size; ++r)
+                for (size_type c=0; c<cols->chunk_size; ++c)
+                  result
+                  +=
+                    u(chunk_row * cols->chunk_size + r)
+                    * (val_ptr[r*cols->chunk_size + c] *
+                       v(*colnum_ptr * cols->chunk_size + c));
+            }
+          else
+            // we're at a chunk row and column that has padding
+            for (size_type r=0; r<m() % cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                result
+                +=
+                  u(chunk_row * cols->chunk_size + r)
+                  * (val_ptr[r*cols->chunk_size + c] *
+                     v(*colnum_ptr * cols->chunk_size + c));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+    }
+
+  return result;
+}
+
+
+
+template <typename number>
+typename ChunkSparseMatrix<number>::real_type
+ChunkSparseMatrix<number>::l1_norm () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  const size_type n_chunk_rows = cols->sparsity_pattern.n_rows();
+
+  // loop over all rows and columns; it is safe to also loop over the padding
+  // elements (they are zero) if we make sure that the vector into which we
+  // sum column sums is large enough
+  Vector<real_type> column_sums(cols->sparsity_pattern.n_cols() *
+                                cols->chunk_size);
+
+  for (size_type chunk_row=0; chunk_row<n_chunk_rows; ++chunk_row)
+    for (size_type j=cols->sparsity_pattern.rowstart[chunk_row];
+         j<cols->sparsity_pattern.rowstart[chunk_row+1] ; ++j)
+      for (size_type r=0; r<cols->chunk_size; ++r)
+        for (size_type s=0; s<cols->chunk_size; ++s)
+          column_sums(cols->sparsity_pattern.colnums[j] *
+                      cols->chunk_size + s) +=
+                        numbers::NumberTraits<number>::abs(val[j * cols->chunk_size *
+                                                               cols->chunk_size +
+                                                               r * cols->chunk_size +
+                                                               s]);
+
+  return column_sums.linfty_norm();
+}
+
+
+
+template <typename number>
+typename ChunkSparseMatrix<number>::real_type
+ChunkSparseMatrix<number>::linfty_norm () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  // this function works like l1_norm(). it can be made more efficient
+  // (without allocating a temporary vector) as is done in the SparseMatrix
+  // class but since it is rarely called in time critical places it is
+  // probably not worth it
+  const size_type n_chunk_rows = cols->sparsity_pattern.n_rows();
+
+  // loop over all rows and columns; it is safe to also loop over the padding
+  // elements (they are zero) if we make sure that the vector into which we
+  // sum column sums is large enough
+  Vector<real_type> row_sums(cols->sparsity_pattern.n_rows() *
+                             cols->chunk_size);
+
+  for (size_type chunk_row=0; chunk_row<n_chunk_rows; ++chunk_row)
+    for (size_type j=cols->sparsity_pattern.rowstart[chunk_row];
+         j<cols->sparsity_pattern.rowstart[chunk_row+1] ; ++j)
+      for (size_type r=0; r<cols->chunk_size; ++r)
+        for (size_type s=0; s<cols->chunk_size; ++s)
+          row_sums(chunk_row * cols->chunk_size + r) +=
+            numbers::NumberTraits<number>::abs(val[j * cols->chunk_size *
+                                                   cols->chunk_size +
+                                                   r * cols->chunk_size +
+                                                   s]);
+
+  return row_sums.linfty_norm();
+}
+
+
+
+template <typename number>
+typename ChunkSparseMatrix<number>::real_type
+ChunkSparseMatrix<number>::frobenius_norm () const
+{
+  // simply add up all entries in the sparsity pattern, without taking any
+  // reference to rows or columns
+  //
+  // padding elements are zero, so we can add them up as well
+  real_type norm_sqr = 0;
+  for (const number *ptr = &val[0]; ptr != &val[max_len]; ++ptr)
+    norm_sqr +=  numbers::NumberTraits<number>::abs_square(*ptr);
+
+  return std::sqrt (norm_sqr);
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+somenumber
+ChunkSparseMatrix<number>::residual (Vector<somenumber>       &dst,
+                                     const Vector<somenumber> &u,
+                                     const Vector<somenumber> &b) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(m() == b.size(), ExcDimensionMismatch(m(),b.size()));
+  Assert(n() == u.size(), ExcDimensionMismatch(n(),u.size()));
+
+  Assert (&u != &dst, ExcSourceEqualsDestination());
+
+  // set dst=b, then subtract the result of A*u from it. since the purpose of
+  // the current class is to promote streaming of data rather than more random
+  // access patterns, breaking things up into two loops may be reasonable
+  dst = b;
+
+  /////////
+  // the rest of this function is like vmult_add, except that we subtract
+  // rather than add A*u
+  /////////
+  const size_type n_chunk_rows = cols->sparsity_pattern.n_rows();
+
+  // loop over all chunks. note that we need to treat the last chunk row and
+  // column differently if they have padding elements
+  const bool rows_have_padding = (m() % cols->chunk_size != 0),
+             cols_have_padding = (n() % cols->chunk_size != 0);
+
+  const size_type n_regular_chunk_rows
+    = (rows_have_padding ?
+       n_chunk_rows-1 :
+       n_chunk_rows);
+
+  const number       *val_ptr    = val;
+  const size_type *colnum_ptr = cols->sparsity_pattern.colnums;
+  typename Vector<somenumber>::iterator dst_ptr = dst.begin();
+
+  for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
+    {
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            internal::ChunkSparseMatrix::chunk_vmult_subtract
+            (cols->chunk_size,
+             val_ptr,
+             u.begin() + *colnum_ptr * cols->chunk_size,
+             dst_ptr);
+          else
+            // we're at a chunk column that has padding
+            for (size_type r=0; r<cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                dst(chunk_row * cols->chunk_size + r)
+                -= (val_ptr[r*cols->chunk_size + c] *
+                    u(*colnum_ptr * cols->chunk_size + c));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+
+
+      dst_ptr += cols->chunk_size;
+    }
+
+  // now deal with last chunk row if necessary
+  if (rows_have_padding)
+    {
+      const size_type chunk_row = n_chunk_rows - 1;
+
+      const number *const val_end_of_row = &val[cols->sparsity_pattern.rowstart[chunk_row+1]
+                                                * cols->chunk_size
+                                                * cols->chunk_size];
+      while (val_ptr != val_end_of_row)
+        {
+          if ((cols_have_padding == false)
+              ||
+              (*colnum_ptr != cols->sparsity_pattern.n_cols()-1))
+            {
+              // we're at a chunk row but not column that has padding
+              for (size_type r=0; r<m() % cols->chunk_size; ++r)
+                for (size_type c=0; c<cols->chunk_size; ++c)
+                  dst(chunk_row * cols->chunk_size + r)
+                  -= (val_ptr[r*cols->chunk_size + c] *
+                      u(*colnum_ptr * cols->chunk_size + c));
+            }
+          else
+            // we're at a chunk row and column that has padding
+            for (size_type r=0; r<m() % cols->chunk_size; ++r)
+              for (size_type c=0; c<n() % cols->chunk_size; ++c)
+                dst(chunk_row * cols->chunk_size + r)
+                -= (val_ptr[r*cols->chunk_size + c] *
+                    u(*colnum_ptr * cols->chunk_size + c));
+
+          ++colnum_ptr;
+          val_ptr += cols->chunk_size * cols->chunk_size;
+        }
+
+
+      dst_ptr += cols->chunk_size;
+    }
+
+  // finally compute the norm
+  return dst.l2_norm();
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::precondition_Jacobi (Vector<somenumber>       &dst,
+                                                const Vector<somenumber> &src,
+                                                const number              /*om*/) const
+{
+  (void)dst;
+  (void)src;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+  Assert (dst.size() == n(), ExcDimensionMismatch (dst.size(), n()));
+  Assert (src.size() == n(), ExcDimensionMismatch (src.size(), n()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::precondition_SSOR (Vector<somenumber>       &dst,
+                                              const Vector<somenumber> &src,
+                                              const number              /*om*/) const
+{
+  // to understand how this function works you may want to take a look at the
+  // CVS archives to see the original version which is much clearer...
+  (void)dst;
+  (void)src;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+  Assert (dst.size() == n(), ExcDimensionMismatch (dst.size(), n()));
+  Assert (src.size() == n(), ExcDimensionMismatch (src.size(), n()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::precondition_SOR (Vector<somenumber> &dst,
+                                             const Vector<somenumber> &src,
+                                             const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+
+  dst = src;
+  SOR(dst,om);
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::precondition_TSOR (Vector<somenumber> &dst,
+                                              const Vector<somenumber> &src,
+                                              const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+
+  dst = src;
+  TSOR(dst,om);
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::SOR (Vector<somenumber> &dst,
+                                const number /*om*/) const
+{
+  (void)dst;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::TSOR (Vector<somenumber> &dst,
+                                 const number /*om*/) const
+{
+  (void)dst;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::PSOR (Vector<somenumber> &dst,
+                                 const std::vector<size_type> &permutation,
+                                 const std::vector<size_type> &inverse_permutation,
+                                 const number /*om*/) const
+{
+  (void)dst;
+  (void)permutation;
+  (void)inverse_permutation;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert (m() == permutation.size(),
+          ExcDimensionMismatch(m(), permutation.size()));
+  Assert (m() == inverse_permutation.size(),
+          ExcDimensionMismatch(m(), inverse_permutation.size()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::TPSOR (Vector<somenumber> &dst,
+                                  const std::vector<size_type> &permutation,
+                                  const std::vector<size_type> &inverse_permutation,
+                                  const number /*om*/) const
+{
+  (void)dst;
+  (void)permutation;
+  (void)inverse_permutation;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert (m() == permutation.size(),
+          ExcDimensionMismatch(m(), permutation.size()));
+  Assert (m() == inverse_permutation.size(),
+          ExcDimensionMismatch(m(), inverse_permutation.size()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::SOR_step (Vector<somenumber> &v,
+                                     const Vector<somenumber> &b,
+                                     const number        /*om*/) const
+{
+  (void)v;
+  (void)b;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+  Assert (m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert (m() == b.size(), ExcDimensionMismatch(m(),b.size()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::TSOR_step (Vector<somenumber> &v,
+                                      const Vector<somenumber> &b,
+                                      const number        /*om*/) const
+{
+  (void)v;
+  (void)b;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+  Assert (m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert (m() == b.size(), ExcDimensionMismatch(m(),b.size()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::SSOR_step (Vector<somenumber> &v,
+                                      const Vector<somenumber> &b,
+                                      const number        om) const
+{
+  SOR_step(v,b,om);
+  TSOR_step(v,b,om);
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+ChunkSparseMatrix<number>::SSOR (Vector<somenumber> &dst,
+                                 const number /*om*/) const
+{
+  (void)dst;
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (m() == n(), ExcMessage("This operation is only valid on square matrices."));
+
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <typename number>
+void ChunkSparseMatrix<number>::print (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  Assert (false, ExcNotImplemented());
+
+  AssertThrow (out, ExcIO());
+}
+
+
+template <typename number>
+void ChunkSparseMatrix<number>::print_formatted (std::ostream &out,
+                                                 const unsigned int precision,
+                                                 const bool scientific,
+                                                 const unsigned int width_,
+                                                 const char *zero_string,
+                                                 const double denominator) const
+{
+  AssertThrow (out, ExcIO());
+
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  unsigned int width = width_;
+
+  Assert (false, ExcNotImplemented());
+
+  std::ios::fmtflags old_flags = out.flags();
+  unsigned int old_precision = out.precision (precision);
+
+  if (scientific)
+    {
+      out.setf (std::ios::scientific, std::ios::floatfield);
+      if (!width)
+        width = precision+7;
+    }
+  else
+    {
+      out.setf (std::ios::fixed, std::ios::floatfield);
+      if (!width)
+        width = precision+2;
+    }
+
+  for (size_type i=0; i<m(); ++i)
+    {
+      for (size_type j=0; j<n(); ++j)
+        if (cols->sparsity_pattern(i,j) != SparsityPattern::invalid_entry)
+          out << std::setw(width)
+              << val[cols->sparsity_pattern(i,j)] * denominator << ' ';
+        else
+          out << std::setw(width) << zero_string << ' ';
+      out << std::endl;
+    };
+  AssertThrow (out, ExcIO());
+
+  // reset output format
+  out.precision(old_precision);
+  out.flags (old_flags);
+}
+
+
+
+template <typename number>
+void ChunkSparseMatrix<number>::print_pattern (std::ostream &out,
+                                               const double threshold) const
+{
+  AssertThrow (out, ExcIO());
+
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  const size_type chunk_size = cols->get_chunk_size();
+
+  // loop over all chunk rows and columns, and each time we find something
+  // repeat it chunk_size times in both directions
+  for (size_type i=0; i<cols->sparsity_pattern.n_rows(); ++i)
+    {
+      for (size_type d=0; d<chunk_size; ++d)
+        for (size_type j=0; j<cols->sparsity_pattern.n_cols(); ++j)
+          if (cols->sparsity_pattern(i,j) == SparsityPattern::invalid_entry)
+            {
+              for (size_type e=0; e<chunk_size; ++e)
+                out << '.';
+            }
+          else if (std::fabs(val[cols->sparsity_pattern(i,j)]) > threshold)
+            {
+              for (size_type e=0; e<chunk_size; ++e)
+                out << '*';
+            }
+          else
+            {
+              for (size_type e=0; e<chunk_size; ++e)
+                out << ':';
+            }
+      out << std::endl;
+    }
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <typename number>
+void
+ChunkSparseMatrix<number>::block_write (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // first the simple objects, bracketed in [...]
+  out << '[' << max_len << "][";
+  // then write out real data
+  out.write (reinterpret_cast<const char *>(&val[0]),
+             reinterpret_cast<const char *>(&val[max_len])
+             - reinterpret_cast<const char *>(&val[0]));
+  out << ']';
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <typename number>
+void
+ChunkSparseMatrix<number>::block_read (std::istream &in)
+{
+  AssertThrow (in, ExcIO());
+
+  char c;
+
+  // first read in simple data
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+  in >> max_len;
+
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+
+  // reallocate space
+  delete[] val;
+  val  = new number[max_len];
+
+  // then read data
+  in.read (reinterpret_cast<char *>(&val[0]),
+           reinterpret_cast<char *>(&val[max_len])
+           - reinterpret_cast<char *>(&val[0]));
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+}
+
+
+
+template <typename number>
+std::size_t
+ChunkSparseMatrix<number>::memory_consumption () const
+{
+  return sizeof(*this) + max_len*sizeof(number);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/chunk_sparsity_pattern.h b/include/deal.II/lac/chunk_sparsity_pattern.h
new file mode 100644
index 0000000..97b95d6
--- /dev/null
+++ b/include/deal.II/lac/chunk_sparsity_pattern.h
@@ -0,0 +1,1205 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__chunk_sparsity_pattern_h
+#define dealii__chunk_sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/vector_slice.h>
+
+#include <deal.II/lac/sparsity_pattern.h>
+
+#include <vector>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename> class ChunkSparseMatrix;
+
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+
+
+/**
+ * Iterators on sparsity patterns
+ */
+namespace ChunkSparsityPatternIterators
+{
+  // forward declaration
+  class Iterator;
+
+  /**
+   * Accessor class for iterators into sparsity patterns. This class is also
+   * the base class for both const and non-const accessor classes into sparse
+   * matrices.
+   *
+   * Note that this class only allows read access to elements, providing their
+   * row and column number. It does not allow modifying the sparsity pattern
+   * itself.
+   *
+   * @author Martin Kronbichler
+   * @date 2013
+   */
+  class Accessor
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    Accessor (const ChunkSparsityPattern *matrix,
+              const unsigned int          row);
+
+    /**
+     * Constructor. Construct the end accessor for the given sparsity pattern.
+     */
+    Accessor (const ChunkSparsityPattern *matrix);
+
+    /**
+     * Row number of the element represented by this object. This function can
+     * only be called for entries for which is_valid_entry() is true.
+     */
+    unsigned int row () const;
+
+    /**
+     * Returns the global index from the reduced sparsity pattern.
+     */
+    std::size_t reduced_index() const;
+
+    /**
+     * Column number of the element represented by this object. This function
+     * can only be called for entries for which is_valid_entry() is true.
+     */
+    unsigned int column () const;
+
+    /**
+     * Return whether the sparsity pattern entry pointed to by this iterator
+     * is valid or not. Note that after compressing the sparsity pattern, all
+     * entries are valid. However, before compression, the sparsity pattern
+     * allocated some memory to be used while still adding new nonzero
+     * entries; if you create iterators in this phase of the sparsity
+     * pattern's lifetime, you will iterate over elements that are not valid.
+     * If this is so, then this function will return false.
+     */
+    bool is_valid_entry () const;
+
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Accessor &) const;
+
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * sparsity pattern.
+     */
+    bool operator < (const Accessor &) const;
+
+  protected:
+    /**
+     * The sparsity pattern we operate on accessed.
+     */
+    const ChunkSparsityPattern *sparsity_pattern;
+
+    /**
+     * The accessor of the (reduced) sparsity pattern.
+     */
+    SparsityPatternIterators::Accessor reduced_accessor;
+
+    /**
+     * Current chunk row number.
+     */
+    unsigned int chunk_row;
+
+    /**
+     * Current chunk col number.
+     */
+    unsigned int chunk_col;
+
+    /**
+     * Move the accessor to the next nonzero entry in the matrix.
+     */
+    void advance ();
+
+    /**
+     * Grant access to iterator class.
+     */
+    friend class Iterator;
+  };
+
+
+
+  /**
+   * Iterator that walks over the elements of a sparsity pattern.
+   */
+  class Iterator
+  {
+  public:
+    /**
+     * Constructor. Create an iterator into the sparsity pattern @p sp for the
+     * given row and the index within it.
+     */
+    Iterator (const ChunkSparsityPattern *sp,
+              const unsigned int          row);
+
+    /**
+     * Prefix increment.
+     */
+    Iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    Iterator operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Iterator &) const;
+
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const Iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * matrix.
+     */
+    bool operator < (const Iterator &) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor accessor;
+  };
+}
+
+
+
+/**
+ * Structure representing the sparsity pattern of a sparse matrix. This class
+ * is an example of the "static" type of
+ * @ref Sparsity.
+ * It uses the compressed row storage (CSR) format to store data.
+ *
+ * The use of this class is demonstrated in step-51.
+ *
+ * @author Wolfgang Bangerth, 2008
+ */
+class ChunkSparsityPattern : public Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+  /**
+   * Typedef an iterator class that allows to walk over all nonzero elements
+   * of a sparsity pattern.
+   */
+  typedef ChunkSparsityPatternIterators::Iterator const_iterator;
+
+  /**
+   * Typedef an iterator class that allows to walk over all nonzero elements
+   * of a sparsity pattern.
+   *
+   * Since the iterator does not allow to modify the sparsity pattern, this
+   * type is the same as that for @p const_iterator.
+   */
+  typedef ChunkSparsityPatternIterators::Iterator iterator;
+
+  /**
+   * Define a value which is used to indicate that a certain value in the
+   * colnums array is unused, i.e. does not represent a certain column number
+   * index.
+   *
+   * Indices with this invalid value are used to insert new entries to the
+   * sparsity pattern using the add() member function, and are removed when
+   * calling compress().
+   *
+   * You should not assume that the variable declared here has a certain
+   * value. The initialization is given here only to enable the compiler to
+   * perform some optimizations, but the actual value of the variable may
+   * change over time.
+   */
+  static const size_type invalid_entry = SparsityPattern::invalid_entry;
+
+  /**
+   * Initialize the matrix empty, that is with no memory allocated. This is
+   * useful if you want such objects as member variables in other classes. You
+   * can make the structure usable by calling the reinit() function.
+   */
+  ChunkSparsityPattern ();
+
+  /**
+   * Copy constructor. This constructor is only allowed to be called if the
+   * matrix structure to be copied is empty. This is so in order to prevent
+   * involuntary copies of objects for temporaries, which can use large
+   * amounts of computing time. However, copy constructors are needed if one
+   * wants to place a ChunkSparsityPattern in a container, e.g., to write such
+   * statements like <tt>v.push_back (ChunkSparsityPattern());</tt>, with
+   * <tt>v</tt> a vector of ChunkSparsityPattern objects.
+   *
+   * Usually, it is sufficient to use the explicit keyword to disallow
+   * unwanted temporaries, but this does not work for <tt>std::vector</tt>.
+   * Since copying a structure like this is not useful anyway because multiple
+   * matrices can use the same sparsity structure, copies are only allowed for
+   * empty objects, as described above.
+   */
+  ChunkSparsityPattern (const ChunkSparsityPattern &);
+
+  /**
+   * Initialize a rectangular matrix.
+   *
+   * @arg m number of rows @arg n number of columns @arg max_per_row maximum
+   * number of nonzero entries per row
+   */
+  ChunkSparsityPattern (const size_type m,
+                        const size_type n,
+                        const size_type max_chunks_per_row,
+                        const size_type chunk_size);
+
+  /**
+   * Initialize a rectangular matrix.
+   *
+   * @arg m number of rows @arg n number of columns @arg row_lengths possible
+   * number of nonzero entries for each row.  This vector must have one entry
+   * for each row.
+   */
+  ChunkSparsityPattern (const size_type m,
+                        const size_type n,
+                        const std::vector<size_type> &row_lengths,
+                        const size_type chunk_size);
+
+  /**
+   * Initialize a quadratic matrix of dimension <tt>n</tt> with at most
+   * <tt>max_per_row</tt> nonzero entries per row.
+   *
+   * This constructor automatically enables optimized storage of diagonal
+   * elements. To avoid this, use the constructor taking row and column
+   * numbers separately.
+   */
+  ChunkSparsityPattern (const size_type n,
+                        const size_type max_per_row,
+                        const size_type chunk_size);
+
+  /**
+   * Initialize a quadratic matrix.
+   *
+   * @arg m number of rows and columns @arg row_lengths possible number of
+   * nonzero entries for each row.  This vector must have one entry for each
+   * row.
+   */
+  ChunkSparsityPattern (const size_type                m,
+                        const std::vector<size_type>  &row_lengths,
+                        const size_type                chunk_size);
+
+  /**
+   * Destructor.
+   */
+  ~ChunkSparsityPattern ();
+
+  /**
+   * Copy operator. For this the same holds as for the copy constructor: it is
+   * declared, defined and fine to be called, but the latter only for empty
+   * objects.
+   */
+  ChunkSparsityPattern &operator = (const ChunkSparsityPattern &);
+
+  /**
+   * Reallocate memory and set up data structures for a new matrix with <tt>m
+   * </tt>rows and <tt>n</tt> columns, with at most <tt>max_per_row</tt>
+   * nonzero entries per row.
+   *
+   * This function simply maps its operations to the other <tt>reinit</tt>
+   * function.
+   */
+  void reinit (const size_type m,
+               const size_type n,
+               const size_type max_per_row,
+               const size_type chunk_size);
+
+  /**
+   * Reallocate memory for a matrix of size <tt>m x n</tt>. The number of
+   * entries for each row is taken from the array <tt>row_lengths</tt> which
+   * has to give this number of each row <tt>i=1...m</tt>.
+   *
+   * If <tt>m*n==0</tt> all memory is freed, resulting in a total
+   * reinitialization of the object. If it is nonzero, new memory is only
+   * allocated if the new size extends the old one. This is done to save time
+   * and to avoid fragmentation of the heap.
+   *
+   * If the number of rows equals the number of columns then diagonal elements
+   * are stored first in each row to allow optimized access in relaxation
+   * methods of SparseMatrix.
+   */
+  void reinit (const size_type m,
+               const size_type n,
+               const std::vector<size_type> &row_lengths,
+               const size_type chunk_size);
+
+  /**
+   * Same as above, but with a VectorSlice argument instead.
+   */
+  void reinit (const size_type m,
+               const size_type n,
+               const VectorSlice<const std::vector<size_type> > &row_lengths,
+               const size_type chunk_size);
+
+  /**
+   * This function compresses the sparsity structure that this object
+   * represents.  It does so by eliminating unused entries and sorting the
+   * remaining ones to allow faster access by usage of binary search
+   * algorithms. A special sorting scheme is used for the diagonal entry of
+   * quadratic matrices, which is always the first entry of each row.
+   *
+   * The memory which is no more needed is released.
+   *
+   * SparseMatrix objects require the ChunkSparsityPattern objects they are
+   * initialized with to be compressed, to reduce memory requirements.
+   */
+  void compress ();
+
+  /**
+   * This function can be used as a replacement for reinit(), subsequent calls
+   * to add() and a final call to close() if you know exactly in advance the
+   * entries that will form the matrix sparsity pattern.
+   *
+   * The first two parameters determine the size of the matrix. For the two
+   * last ones, note that a sparse matrix can be described by a sequence of
+   * rows, each of which is represented by a sequence of pairs of column
+   * indices and values. In the present context, the begin() and end()
+   * parameters designate iterators (of forward iterator type) into a
+   * container, one representing one row. The distance between begin() and
+   * end() should therefore be equal to n_rows(). These iterators may be
+   * iterators of <tt>std::vector</tt>, <tt>std::list</tt>, pointers into a
+   * C-style array, or any other iterator satisfying the requirements of a
+   * forward iterator. The objects pointed to by these iterators (i.e. what we
+   * get after applying <tt>operator*</tt> or <tt>operator-></tt> to one of
+   * these iterators) must be a container itself that provides functions
+   * <tt>begin</tt> and <tt>end</tt> designating a range of iterators that
+   * describe the contents of one line. Dereferencing these inner iterators
+   * must either yield a pair of an unsigned integer as column index and a
+   * value of arbitrary type (such a type would be used if we wanted to
+   * describe a sparse matrix with one such object), or simply an unsigned
+   * integer (of we only wanted to describe a sparsity pattern). The function
+   * is able to determine itself whether an unsigned integer or a pair is what
+   * we get after dereferencing the inner iterators, through some template
+   * magic.
+   *
+   * While the order of the outer iterators denotes the different rows of the
+   * matrix, the order of the inner iterator denoting the columns does not
+   * matter, as they are sorted internal to this function anyway.
+   *
+   * Since that all sounds very complicated, consider the following example
+   * code, which may be used to fill a sparsity pattern:
+   * @code
+   * std::vector<std::vector<size_type> > column_indices (n_rows);
+   * for (size_type row=0; row<n_rows; ++row)
+   *         // generate necessary columns in this row
+   *   fill_row (column_indices[row]);
+   *
+   * sparsity.copy_from (n_rows, n_cols,
+   *                     column_indices.begin(),
+   *                     column_indices.end());
+   * @endcode
+   *
+   * Note that this example works since the iterators dereferenced yield
+   * containers with functions <tt>begin</tt> and <tt>end</tt> (namely
+   * <tt>std::vector</tt>s), and the inner iterators dereferenced yield
+   * unsigned integers as column indices. Note that we could have replaced
+   * each of the two <tt>std::vector</tt> occurrences by <tt>std::list</tt>,
+   * and the inner one by <tt>std::set</tt> as well.
+   *
+   * Another example would be as follows, where we initialize a whole matrix,
+   * not only a sparsity pattern:
+   * @code
+   * std::vector<std::map<size_type,double> > entries (n_rows);
+   * for (size_type row=0; row<n_rows; ++row)
+   *         // generate necessary pairs of columns
+   *         // and corresponding values in this row
+   *   fill_row (entries[row]);
+   *
+   * sparsity.copy_from (n_rows, n_cols,
+   *                     column_indices.begin(),
+   *                     column_indices.end());
+   * matrix.reinit (sparsity);
+   * matrix.copy_from (column_indices.begin(),
+   *                   column_indices.end());
+   * @endcode
+   *
+   * This example works because dereferencing iterators of the inner type
+   * yields a pair of unsigned integers and a value, the first of which we
+   * take as column index. As previously, the outer <tt>std::vector</tt> could
+   * be replaced by <tt>std::list</tt>, and the inner <tt>std::map<unsigned
+   * int,double></tt> could be replaced by <tt>std::vector<std::pair<unsigned
+   * int,double> ></tt>, or a list or set of such pairs, as they all return
+   * iterators that point to such pairs.
+   */
+  template <typename ForwardIterator>
+  void copy_from (const size_type n_rows,
+                  const size_type n_cols,
+                  const ForwardIterator begin,
+                  const ForwardIterator end,
+                  const size_type chunk_size);
+
+  /**
+   * Copy data from an object of type DynamicSparsityPattern. Previous content
+   * of this object is lost, and the sparsity pattern is in compressed mode
+   * afterwards.
+   */
+  template <typename SparsityPatternType>
+  void copy_from (const SparsityPatternType &dsp,
+                  const size_type            chunk_size);
+
+  /**
+   * Take a full matrix and use its nonzero entries to generate a sparse
+   * matrix entry pattern for this object.
+   *
+   * Previous content of this object is lost, and the sparsity pattern is in
+   * compressed mode afterwards.
+   */
+  template <typename number>
+  void copy_from (const FullMatrix<number> &matrix,
+                  const size_type chunk_size);
+
+  /**
+   * Set the sparsity pattern of the chunk sparsity pattern to be given by
+   * <tt>chunk_size*chunksize</tt> blocks of the sparsity pattern for chunks
+   * specified. Note that the final number of rows <tt>m</tt> of the sparsity
+   * pattern will be approximately <tt>sparsity_pattern_for_chunks.n_rows() *
+   * chunk_size</tt> (modulo padding elements in the last chunk) and similarly
+   * for the number of columns <tt>n</tt>.
+   *
+   * This is a special initialization option in case you can tell the position
+   * of the chunk already from the beginning without generating the sparsity
+   * pattern using <tt>make_sparsity_pattern</tt> calls. This bypasses the
+   * search for chunks but of course needs to be handled with care in order to
+   * give a correct sparsity pattern.
+   *
+   * Previous content of this object is lost, and the sparsity pattern is in
+   * compressed mode afterwards.
+   */
+  template <typename Sparsity>
+  void create_from (const unsigned int  m,
+                    const unsigned int  n,
+                    const Sparsity     &sparsity_pattern_for_chunks,
+                    const unsigned int  chunk_size,
+                    const bool          optimize_diagonal = true);
+
+  /**
+   * Return whether the object is empty. It is empty if no memory is
+   * allocated, which is the same as that both dimensions are zero.
+   */
+  bool empty () const;
+
+  /**
+   * Return the chunk size given as argument when constructing this object.
+   */
+  size_type get_chunk_size () const;
+
+  /**
+   * Return the maximum number of entries per row. Before compression, this
+   * equals the number given to the constructor, while after compression, it
+   * equals the maximum number of entries actually allocated by the user.
+   */
+  size_type max_entries_per_row () const;
+
+  /**
+   * Add a nonzero entry to the matrix. This function may only be called for
+   * non-compressed sparsity patterns.
+   *
+   * If the entry already exists, nothing bad happens.
+   */
+  void add (const size_type i,
+            const size_type j);
+
+  /**
+   * Make the sparsity pattern symmetric by adding the sparsity pattern of the
+   * transpose object.
+   *
+   * This function throws an exception if the sparsity pattern does not
+   * represent a quadratic matrix.
+   */
+  void symmetrize ();
+
+  /**
+   * Return number of rows of this matrix, which equals the dimension of the
+   * image space.
+   */
+  inline size_type n_rows () const;
+
+  /**
+   * Return number of columns of this matrix, which equals the dimension of
+   * the range space.
+   */
+  inline size_type n_cols () const;
+
+  /**
+   * Check if a value at a certain position may be non-zero.
+   */
+  bool exists (const size_type i,
+               const size_type j) const;
+
+  /**
+   * Number of entries in a specific row.
+   */
+  size_type row_length (const size_type row) const;
+
+  /**
+   * Compute the bandwidth of the matrix represented by this structure. The
+   * bandwidth is the maximum of $|i-j|$ for which the index pair $(i,j)$
+   * represents a nonzero entry of the matrix. Consequently, the maximum
+   * bandwidth a $n\times m$ matrix can have is $\max\{n-1,m-1\}$.
+   */
+  size_type bandwidth () const;
+
+  /**
+   * Return the number of nonzero elements of this matrix. Actually, it
+   * returns the number of entries in the sparsity pattern; if any of the
+   * entries should happen to be zero, it is counted anyway.
+   *
+   * This function may only be called if the matrix struct is compressed. It
+   * does not make too much sense otherwise anyway.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Return whether the structure is compressed or not.
+   */
+  bool is_compressed () const;
+
+  /**
+   * Return whether this object stores only those entries that have been added
+   * explicitly, or if the sparsity pattern contains elements that have been
+   * added through other means (implicitly) while building it. For the current
+   * class, the result is true if and only if it is square because it then
+   * unconditionally stores the diagonal entries whether they have been added
+   * explicitly or not.
+   *
+   * This function mainly serves the purpose of describing the current class
+   * in cases where several kinds of sparsity patterns can be passed as
+   * template arguments.
+   */
+  bool stores_only_added_elements () const;
+
+  /**
+   * Iterator starting at the first entry of the matrix. The resulting
+   * iterator can be used to walk over all nonzero entries of the sparsity
+   * pattern.
+   */
+  iterator begin () const;
+
+  /**
+   * Final iterator.
+   */
+  iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>.
+   *
+   * Note that if the given row is empty, i.e. does not contain any nonzero
+   * entries, then the iterator returned by this function equals
+   * <tt>end(r)</tt>. Note also that the iterator may not be dereferencable in
+   * that case.
+   */
+  iterator begin (const unsigned int r) const;
+
+  /**
+   * Final iterator of row <tt>r</tt>. It points to the first element past the
+   * end of line @p r, or past the end of the entire sparsity pattern.
+   *
+   * Note that the end iterator is not necessarily dereferencable. This is in
+   * particular the case if it is the end iterator for the last row of a
+   * matrix.
+   */
+  iterator end (const unsigned int r) const;
+
+  /**
+   * Write the data of this object en bloc to a file. This is done in a binary
+   * mode, so the output is neither readable by humans nor (probably) by other
+   * computers using a different operating system of number format.
+   *
+   * The purpose of this function is that you can swap out matrices and
+   * sparsity pattern if you are short of memory, want to communicate between
+   * different programs, or allow objects to be persistent across different
+   * runs of the program.
+   */
+  void block_write (std::ostream &out) const;
+
+  /**
+   * Read data that has previously been written by block_write() from a file.
+   * This is done using the inverse operations to the above function, so it is
+   * reasonably fast because the bitstream is not interpreted except for a few
+   * numbers up front.
+   *
+   * The object is resized on this operation, and all previous contents are
+   * lost.
+   *
+   * A primitive form of error checking is performed which will recognize the
+   * bluntest attempts to interpret some data as a vector stored bitwise to a
+   * file, but not more.
+   */
+  void block_read (std::istream &in);
+
+  /**
+   * Print the sparsity of the matrix. The output consists of one line per row
+   * of the format <tt>[i,j1,j2,j3,...]</tt>. <i>i</i> is the row number and
+   * <i>jn</i> are the allocated columns in this row.
+   */
+  void print (std::ostream &out) const;
+
+  /**
+   * Print the sparsity of the matrix in a format that <tt>gnuplot</tt>
+   * understands and which can be used to plot the sparsity pattern in a
+   * graphical way. The format consists of pairs <tt>i j</tt> of nonzero
+   * elements, each representing one entry of this matrix, one per line of the
+   * output file. Indices are counted from zero on, as usual. Since sparsity
+   * patterns are printed in the same way as matrices are displayed, we print
+   * the negative of the column index, which means that the <tt>(0,0)</tt>
+   * element is in the top left rather than in the bottom left corner.
+   *
+   * Print the sparsity pattern in gnuplot by setting the data style to dots
+   * or points and use the <tt>plot</tt> command.
+   */
+  void print_gnuplot (std::ostream &out) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object. See MemoryConsumption.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidNumber,
+                  int,
+                  << "The provided number is invalid here: " << arg1);
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidIndex,
+                  int, int,
+                  << "The given index " << arg1
+                  << " should be less than " << arg2 << ".");
+  /**
+   * Exception
+   */
+  DeclException2 (ExcNotEnoughSpace,
+                  int, int,
+                  << "Upon entering a new entry to row " << arg1
+                  << ": there was no free entry any more. " << std::endl
+                  << "(Maximum number of entries for this row: "
+                  << arg2 << "; maybe the matrix is already compressed?)");
+  /**
+   * Exception
+   */
+  DeclException0 (ExcNotCompressed);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcMatrixIsCompressed);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcEmptyObject);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidConstructorCall);
+  /**
+   * Exception
+   */
+  DeclException2 (ExcIteratorRange,
+                  int, int,
+                  << "The iterators denote a range of " << arg1
+                  << " elements, but the given number of rows was " << arg2);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcMETISNotInstalled);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidNumberOfPartitions,
+                  int,
+                  << "The number of partitions you gave is " << arg1
+                  << ", but must be greater than zero.");
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidArraySize,
+                  int, int,
+                  << "The array has size " << arg1 << " but should have size "
+                  << arg2);
+  //@}
+private:
+  /**
+   * Number of rows that this sparsity structure shall represent.
+   */
+  size_type rows;
+
+  /**
+   * Number of columns that this sparsity structure shall represent.
+   */
+  size_type cols;
+
+  /**
+   * The size of chunks.
+   */
+  size_type chunk_size;
+
+  /**
+   * The reduced sparsity pattern. We store only which chunks exist, with each
+   * chunk a block in the matrix of size chunk_size by chunk_size.
+   */
+  SparsityPattern sparsity_pattern;
+
+  /**
+   * Make all the chunk sparse matrix kinds friends.
+   */
+  template <typename> friend class ChunkSparseMatrix;
+
+  /**
+   * Make the accessor class a friend.
+   */
+  friend class ChunkSparsityPatternIterators::Accessor;
+};
+
+
+/*@}*/
+/*---------------------- Inline functions -----------------------------------*/
+
+#ifndef DOXYGEN
+
+namespace ChunkSparsityPatternIterators
+{
+  inline
+  Accessor::
+  Accessor (const ChunkSparsityPattern *sparsity_pattern,
+            const unsigned int          row)
+    :
+    sparsity_pattern(sparsity_pattern),
+    reduced_accessor(row==sparsity_pattern->n_rows() ?
+                     *sparsity_pattern->sparsity_pattern.end() :
+                     *sparsity_pattern->sparsity_pattern.
+                     begin(row/sparsity_pattern->get_chunk_size())),
+    chunk_row (row==sparsity_pattern->n_rows() ? 0 :
+               row%sparsity_pattern->get_chunk_size()),
+    chunk_col (0)
+  {}
+
+
+
+  inline
+  Accessor::
+  Accessor (const ChunkSparsityPattern *sparsity_pattern)
+    :
+    sparsity_pattern(sparsity_pattern),
+    reduced_accessor(*sparsity_pattern->sparsity_pattern.end()),
+    chunk_row (0),
+    chunk_col (0)
+  {}
+
+
+
+  inline
+  bool
+  Accessor::is_valid_entry () const
+  {
+    return reduced_accessor.is_valid_entry()
+           &&
+           sparsity_pattern->get_chunk_size()*reduced_accessor.row()+chunk_row <
+           sparsity_pattern->n_rows()
+           &&
+           sparsity_pattern->get_chunk_size()*reduced_accessor.column()+chunk_col <
+           sparsity_pattern->n_cols();
+  }
+
+
+
+  inline
+  unsigned int
+  Accessor::row() const
+  {
+    Assert (is_valid_entry() == true, ExcInvalidIterator());
+
+    return sparsity_pattern->get_chunk_size()*reduced_accessor.row() +
+           chunk_row;
+  }
+
+
+
+  inline
+  unsigned int
+  Accessor::column() const
+  {
+    Assert (is_valid_entry() == true, ExcInvalidIterator());
+
+    return sparsity_pattern->get_chunk_size()*reduced_accessor.column() +
+           chunk_col;
+  }
+
+
+
+  inline
+  std::size_t
+  Accessor::reduced_index() const
+  {
+    Assert (is_valid_entry() == true, ExcInvalidIterator());
+
+    return reduced_accessor.index_within_sparsity;
+  }
+
+
+
+
+  inline
+  bool
+  Accessor::operator == (const Accessor &other) const
+  {
+    // no need to check for equality of sparsity patterns as this is done in
+    // the reduced case already and every ChunkSparsityPattern has its own
+    // reduced sparsity pattern
+    return (reduced_accessor == other.reduced_accessor &&
+            chunk_row == other.chunk_row &&
+            chunk_col == other.chunk_col);
+  }
+
+
+
+  inline
+  bool
+  Accessor::operator < (const Accessor &other) const
+  {
+    Assert (sparsity_pattern == other.sparsity_pattern,
+            ExcInternalError());
+
+    if (chunk_row != other.chunk_row)
+      {
+        if (reduced_accessor.index_within_sparsity ==
+            reduced_accessor.sparsity_pattern->n_nonzero_elements())
+          return false;
+        if (other.reduced_accessor.index_within_sparsity ==
+            reduced_accessor.sparsity_pattern->n_nonzero_elements())
+          return true;
+
+        const unsigned int
+        global_row = sparsity_pattern->get_chunk_size()*reduced_accessor.row()
+                     +chunk_row,
+                     other_global_row = sparsity_pattern->get_chunk_size()*
+                                        other.reduced_accessor.row()+other.chunk_row;
+        if (global_row < other_global_row)
+          return true;
+        else if (global_row > other_global_row)
+          return false;
+      }
+
+    return (reduced_accessor.index_within_sparsity <
+            other.reduced_accessor.index_within_sparsity ||
+            (reduced_accessor.index_within_sparsity ==
+             other.reduced_accessor.index_within_sparsity &&
+             chunk_col < other.chunk_col));
+  }
+
+
+  inline
+  void
+  Accessor::advance ()
+  {
+    const unsigned int chunk_size = sparsity_pattern->get_chunk_size();
+    Assert (chunk_row < chunk_size && chunk_col < chunk_size,
+            ExcIteratorPastEnd());
+    Assert (reduced_accessor.row() * chunk_size + chunk_row <
+            sparsity_pattern->n_rows()
+            &&
+            reduced_accessor.column() * chunk_size + chunk_col <
+            sparsity_pattern->n_cols(),
+            ExcIteratorPastEnd());
+    if (chunk_size == 1)
+      {
+        reduced_accessor.advance();
+        return;
+      }
+
+    ++chunk_col;
+
+    // end of chunk
+    if (chunk_col == chunk_size
+        ||
+        reduced_accessor.column() * chunk_size + chunk_col ==
+        sparsity_pattern->n_cols())
+      {
+        const unsigned int reduced_row = reduced_accessor.row();
+        // end of row
+        if (reduced_accessor.index_within_sparsity + 1 ==
+            reduced_accessor.sparsity_pattern->rowstart[reduced_row+1])
+          {
+            ++chunk_row;
+
+            chunk_col = 0;
+
+            // end of chunk rows or end of matrix
+            if (chunk_row == chunk_size ||
+                (reduced_row * chunk_size + chunk_row ==
+                 sparsity_pattern->n_rows()))
+              {
+                chunk_row = 0;
+                reduced_accessor.advance();
+              }
+            // go back to the beginning of the same reduced row but with
+            // chunk_row increased by one
+            else
+              reduced_accessor.index_within_sparsity =
+                reduced_accessor.sparsity_pattern->rowstart[reduced_row];
+          }
+        // advance within chunk
+        else
+          {
+            reduced_accessor.advance();
+            chunk_col = 0;
+          }
+      }
+  }
+
+
+
+  inline
+  Iterator::Iterator (const ChunkSparsityPattern *sparsity_pattern,
+                      const unsigned int          row)
+    :
+    accessor(sparsity_pattern, row)
+  {}
+
+
+
+  inline
+  Iterator &
+  Iterator::operator++ ()
+  {
+    accessor.advance ();
+    return *this;
+  }
+
+
+
+  inline
+  Iterator
+  Iterator::operator++ (int)
+  {
+    const Iterator iter = *this;
+    accessor.advance ();
+    return iter;
+  }
+
+
+
+  inline
+  const Accessor &
+  Iterator::operator* () const
+  {
+    return accessor;
+  }
+
+
+
+  inline
+  const Accessor *
+  Iterator::operator-> () const
+  {
+    return &accessor;
+  }
+
+
+  inline
+  bool
+  Iterator::operator == (const Iterator &other) const
+  {
+    return (accessor == other.accessor);
+  }
+
+
+
+  inline
+  bool
+  Iterator::operator != (const Iterator &other) const
+  {
+    return ! (accessor == other.accessor);
+  }
+
+
+  inline
+  bool
+  Iterator::operator < (const Iterator &other) const
+  {
+    return accessor < other.accessor;
+  }
+
+}
+
+
+
+inline
+ChunkSparsityPattern::iterator
+ChunkSparsityPattern::begin () const
+{
+  return iterator(this, 0);
+}
+
+
+inline
+ChunkSparsityPattern::iterator
+ChunkSparsityPattern::end () const
+{
+  return iterator(this, n_rows());
+}
+
+
+
+inline
+ChunkSparsityPattern::iterator
+ChunkSparsityPattern::begin (const unsigned int r) const
+{
+  Assert (r<n_rows(), ExcIndexRange(r,0,n_rows()));
+  return iterator(this, r);
+}
+
+
+
+inline
+ChunkSparsityPattern::iterator
+ChunkSparsityPattern::end (const unsigned int r) const
+{
+  Assert (r<n_rows(), ExcIndexRange(r,0,n_rows()))
+  return iterator(this, r+1);
+}
+
+
+
+inline
+ChunkSparsityPattern::size_type
+ChunkSparsityPattern::n_rows () const
+{
+  return rows;
+}
+
+
+inline
+ChunkSparsityPattern::size_type
+ChunkSparsityPattern::n_cols () const
+{
+  return cols;
+}
+
+
+
+inline
+ChunkSparsityPattern::size_type
+ChunkSparsityPattern::get_chunk_size () const
+{
+  return chunk_size;
+}
+
+
+
+inline
+bool
+ChunkSparsityPattern::is_compressed () const
+{
+  return sparsity_pattern.compressed;
+}
+
+
+
+template <typename ForwardIterator>
+void
+ChunkSparsityPattern::copy_from (const size_type       n_rows,
+                                 const size_type       n_cols,
+                                 const ForwardIterator begin,
+                                 const ForwardIterator end,
+                                 const size_type       chunk_size)
+{
+  Assert (static_cast<size_type>(std::distance (begin, end)) == n_rows,
+          ExcIteratorRange (std::distance (begin, end), n_rows));
+
+  // first determine row lengths for each row. if the matrix is quadratic,
+  // then we might have to add an additional entry for the diagonal, if that
+  // is not yet present. as we have to call compress anyway later on, don't
+  // bother to check whether that diagonal entry is in a certain row or not
+  const bool is_square = (n_rows == n_cols);
+  std::vector<size_type> row_lengths;
+  row_lengths.reserve(n_rows);
+  for (ForwardIterator i=begin; i!=end; ++i)
+    row_lengths.push_back (std::distance (i->begin(), i->end())
+                           +
+                           (is_square ? 1 : 0));
+  reinit (n_rows, n_cols, row_lengths, chunk_size);
+
+  // now enter all the elements into the matrix
+  size_type row = 0;
+  typedef typename std::iterator_traits<ForwardIterator>::value_type::const_iterator inner_iterator;
+  for (ForwardIterator i=begin; i!=end; ++i, ++row)
+    {
+      const inner_iterator end_of_row = i->end();
+      for (inner_iterator j=i->begin(); j!=end_of_row; ++j)
+        {
+          const size_type col
+            = internal::SparsityPatternTools::get_column_index_from_iterator(*j);
+          Assert (col < n_cols, ExcInvalidIndex(col,n_cols));
+
+          add (row, col);
+        }
+    }
+
+  // finally compress everything. this also sorts the entries within each row
+  compress ();
+}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/compressed_set_sparsity_pattern.h b/include/deal.II/lac/compressed_set_sparsity_pattern.h
new file mode 100644
index 0000000..e6a8b69
--- /dev/null
+++ b/include/deal.II/lac/compressed_set_sparsity_pattern.h
@@ -0,0 +1,38 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__compressed_set_sparsity_pattern_h
+#define dealii__compressed_set_sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+/**
+ * @deprecated Use DynamicSparsityPattern instead.
+ */
+typedef DynamicSparsityPattern CompressedSetSparsityPattern DEAL_II_DEPRECATED;
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/compressed_simple_sparsity_pattern.h b/include/deal.II/lac/compressed_simple_sparsity_pattern.h
new file mode 100644
index 0000000..500bc4f
--- /dev/null
+++ b/include/deal.II/lac/compressed_simple_sparsity_pattern.h
@@ -0,0 +1,51 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__compressed_simple_sparsity_pattern_h
+#define dealii__compressed_simple_sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/exceptions.h>
+#include <deal.II/base/index_set.h>
+
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class SparseMatrix;
+
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+/**
+ * @deprecated Use DynamicSparsityPattern instead.
+ */
+typedef DynamicSparsityPattern CompressedSimpleSparsityPattern DEAL_II_DEPRECATED;
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/compressed_sparsity_pattern.h b/include/deal.II/lac/compressed_sparsity_pattern.h
new file mode 100644
index 0000000..26e531b
--- /dev/null
+++ b/include/deal.II/lac/compressed_sparsity_pattern.h
@@ -0,0 +1,38 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__compressed_sparsity_pattern_h
+#define dealii__compressed_sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+/**
+ * @deprecated Use DynamicSparsityPattern instead.
+ */
+typedef DynamicSparsityPattern CompressedSparsityPattern DEAL_II_DEPRECATED;
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/constrained_linear_operator.h b/include/deal.II/lac/constrained_linear_operator.h
new file mode 100644
index 0000000..729a65e
--- /dev/null
+++ b/include/deal.II/lac/constrained_linear_operator.h
@@ -0,0 +1,321 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__constrained_linear_operator_h
+#define dealii__constrained_linear_operator_h
+
+#include <deal.II/lac/linear_operator.h>
+#include <deal.II/lac/packaged_operation.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * @name Indirectly applying constraints to LinearOperator
+ */
+//@{
+
+
+/**
+ * This function takes a ConstraintMatrix @p constraint_matrix and an operator
+ * exemplar @p exemplar (this exemplar is usually a linear operator that
+ * describes the system matrix - it is only used to create domain and range
+ * vectors of appropriate sizes, its action <tt>vmult</tt> is never used). A
+ * LinearOperator object associated with the "homogeneous action" of the
+ * underlying ConstraintMatrix object is returned:
+ *
+ * Applying the LinearOperator object on a vector <code>u</code> results in a
+ * vector <code>v</code> that stores the result of calling
+ * ConstraintMatrix::distribute() on <code>u</code> - with one important
+ * difference: inhomogeneities are not applied, but always treated as 0
+ * instead.
+ *
+ * The LinearOperator object created by this function is primarily used
+ * internally in constrained_linear_operator() to build up a modified system
+ * of linear equations. How to solve a linear system of equations with this
+ * approach is explained in detail in the
+ * @ref constraints
+ * module.
+ *
+ * @author Mauro Bardelloni, Matthias Maier, 2015
+ *
+ * @note Currently, this function may not work correctly for distributed data
+ * structures.
+ *
+ * @relates LinearOperator
+ * @ingroup constraints
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain> distribute_constraints_linear_operator(
+  const ConstraintMatrix &constraint_matrix,
+  const LinearOperator<Range, Domain> &exemplar)
+{
+  LinearOperator<Range, Domain> return_op = exemplar;
+
+  return_op.vmult_add = [&constraint_matrix](Range &v, const Domain &u)
+  {
+    Assert(!dealii::PointerComparison::equal(&v, &u),
+           dealii::ExcMessage("The domain and range vectors must be different "
+                              "storage locations"));
+
+    for (auto i : v.locally_owned_elements())
+      {
+        if (constraint_matrix.is_constrained(i))
+          {
+            const auto *entries = constraint_matrix.get_constraint_entries(i);
+            for (types::global_dof_index j = 0; j < entries->size(); ++j)
+              {
+                const auto pos = (*entries)[j].first;
+                v(i) += u(pos) * (*entries)[j].second;
+              }
+          }
+        else
+          v(i) += u(i);
+      }
+  };
+
+  return_op.Tvmult_add = [&constraint_matrix](Domain &v, const Range &u)
+  {
+    Assert(!dealii::PointerComparison::equal(&v, &u),
+           dealii::ExcMessage("The domain and range vectors must be different "
+                              "storage locations"));
+
+    for (auto i : v.locally_owned_elements())
+      {
+        if (constraint_matrix.is_constrained(i))
+          {
+            const auto *entries = constraint_matrix.get_constraint_entries(i);
+            for (types::global_dof_index j = 0; j < entries->size(); ++j)
+              {
+                const auto pos = (*entries)[j].first;
+                v(pos) += u(i) * (*entries)[j].second;
+              }
+          }
+        else
+          v(i)+=u(i);
+      }
+  };
+
+  // lambda capture expressions are a C++14 feature...
+  const auto vmult_add = return_op.vmult_add;
+  return_op.vmult = [vmult_add](Range &v, const Domain &u)
+  {
+    v = 0.;
+    vmult_add(v, u);
+  };
+
+  // lambda capture expressions are a C++14 feature...
+  const auto Tvmult_add = return_op.Tvmult_add;
+  return_op.Tvmult = [Tvmult_add](Domain &v, const Range &u)
+  {
+    v = 0.;
+    Tvmult_add(v, u);
+  };
+
+  return return_op;
+}
+
+
+/**
+ * Given a ConstraintMatrix @p constraint_matrix and an operator exemplar @p
+ * exemplar, return a LinearOperator that is the projection to the subspace of
+ * constrained degrees of freedom, i.e. all entries of the result vector that
+ * correspond to unconstrained degrees of freedom are set to zero.
+ *
+ * @author Mauro Bardelloni, Matthias Maier, 2015
+ *
+ * @relates LinearOperator
+ * @ingroup constraints
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain> project_to_constrained_linear_operator(
+  const ConstraintMatrix &constraint_matrix,
+  const LinearOperator<Range, Domain> &exemplar)
+{
+  LinearOperator<Range, Domain> return_op = exemplar;
+
+  return_op.vmult_add = [&constraint_matrix](Range &v, const Domain &u)
+  {
+    for (auto i : v.locally_owned_elements())
+      if (constraint_matrix.is_constrained(i))
+        v(i) += u(i);
+  };
+
+  return_op.Tvmult_add = [&constraint_matrix](Domain &v, const Range &u)
+  {
+    for (auto i : v.locally_owned_elements())
+      if (constraint_matrix.is_constrained(i))
+        v(i) += u(i);
+  };
+
+  return_op.vmult = [&constraint_matrix](Range &v, const Domain &u)
+  {
+    Assert(!dealii::PointerComparison::equal(&v, &u),
+           dealii::ExcMessage("The domain and range vectors must be different "
+                              "storage locations"));
+
+    v = 0.;
+    for (auto i : v.locally_owned_elements())
+      if (constraint_matrix.is_constrained(i))
+        v(i) = u(i);
+  };
+
+  return_op.Tvmult = [&constraint_matrix](Domain &v, const Range &u)
+  {
+    Assert(!dealii::PointerComparison::equal(&v, &u),
+           dealii::ExcMessage("The domain and range vectors must be different "
+                              "storage locations"));
+    v = 0.;
+    for (auto i : v.locally_owned_elements())
+      if (constraint_matrix.is_constrained(i))
+        v(i) = u(i);
+  };
+
+  return return_op;
+}
+
+
+/**
+ * Given a ConstraintMatrix object @p constraint_matrix and a LinearOperator
+ * @p linop, this function creates a LinearOperator object consisting of the
+ * composition of three operations and a regularization:
+ * @code
+ *   Ct * linop * C + Id_c;
+ * @endcode
+ * with
+ * @code
+ *   C = distribute_constraints_linear_operator(constraint_matrix, linop);
+ *   Ct = transpose_operator(C);
+ *   Id_c = project_to_constrained_linear_operator(constraint_matrix, linop);
+ * @endcode
+ * and <code>Id_c</code> is the projection to the subspace consisting of all
+ * vector entries associated with constrained degrees of freedoms.
+ *
+ * This LinearOperator object is used together with
+ * constrained_right_hand_side() to build up the following modified system of
+ * linear equations:
+ * @f[
+ *   (C^T A C + Id_c) x = C^T (b - A\,k)
+ * @f]
+ * with a given (unconstrained) system matrix $A$, right hand side $b$, and
+ * linear constraints $C$ with inhomogeneities $k$.
+ *
+ * A detailed explanation of this approach is given in the
+ * @ref constraints
+ * module.
+ *
+ * @author Mauro Bardelloni, Matthias Maier, 2015
+ *
+ * @note Currently, this function may not work correctly for distributed data
+ * structures.
+ *
+ * @relates LinearOperator
+ * @ingroup constraints
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain>
+constrained_linear_operator(const ConstraintMatrix &constraint_matrix,
+                            const LinearOperator<Range, Domain> &linop)
+{
+  const auto C =
+    distribute_constraints_linear_operator(constraint_matrix, linop);
+  const auto Ct = transpose_operator(C);
+  const auto Id_c =
+    project_to_constrained_linear_operator(constraint_matrix, linop);
+  return Ct * linop * C + Id_c;
+}
+
+
+/**
+ * Given a ConstraintMatrix object @p constraint_matrix, a LinearOperator @p
+ * linop and a right-hand side @p right_hand_side, this function creates a
+ * PackagedOperation that stores the following computation:
+ * @code
+ *   Ct * (right_hand_side - linop * k)
+ * @endcode
+ * with
+ * @code
+ *   C = distribute_constraints_linear_operator(constraint_matrix, linop);
+ *   Ct = transpose_operator(C);
+ * @endcode
+ *
+ * This LinearOperator object is used together with
+ * constrained_right_hand_side() to build up the following modified system of
+ * linear equations:
+ * @f[
+ *   (C^T A C + Id_c) x = C^T (b - A\,k)
+ * @f]
+ * with a given (unconstrained) system matrix $A$, right hand side $b$, and
+ * linear constraints $C$ with inhomogeneities $k$.
+ *
+ * A detailed explanation of this approach is given in the
+ * @ref constraints
+ * module.
+ *
+ * @author Mauro Bardelloni, Matthias Maier, 2015
+ *
+ * @note Currently, this function may not work correctly for distributed data
+ * structures.
+ *
+ * @relates LinearOperator
+ * @ingroup constraints
+ */
+template <typename Range, typename Domain>
+PackagedOperation<Range>
+constrained_right_hand_side(const ConstraintMatrix &constraint_matrix,
+                            const LinearOperator<Range, Domain> &linop,
+                            const Range &right_hand_side)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = linop.reinit_range_vector;
+
+  return_comp.apply_add =
+    [&constraint_matrix, &linop, &right_hand_side](Range &v)
+  {
+    const auto C =
+      distribute_constraints_linear_operator(constraint_matrix, linop);
+    const auto Ct = transpose_operator(C);
+
+    static GrowingVectorMemory<Domain> vector_memory;
+    Domain *k = vector_memory.alloc();
+    linop.reinit_domain_vector(*k, /*bool fast=*/ false);
+    constraint_matrix.distribute(*k);
+
+    v += Ct * (right_hand_side - linop **k);
+
+    vector_memory.free(k);
+  };
+
+  // lambda capture expressions are a C++14 feature...
+  const auto apply_add = return_comp.apply_add;
+  return_comp.apply = [apply_add](Range &v)
+  {
+    v = 0.;
+    apply_add(v);
+  };
+
+  return return_comp;
+}
+
+//@}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_CXX11
+#endif
diff --git a/include/deal.II/lac/constraint_matrix.h b/include/deal.II/lac/constraint_matrix.h
new file mode 100644
index 0000000..e413a6b
--- /dev/null
+++ b/include/deal.II/lac/constraint_matrix.h
@@ -0,0 +1,1808 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__constraint_matrix_h
+#define dealii__constraint_matrix_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/thread_local_storage.h>
+
+#include <deal.II/lac/vector.h>
+
+#include <vector>
+#include <map>
+#include <set>
+#include <utility>
+#include <complex>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template<int dim, class T> class Table;
+template <typename> class FullMatrix;
+class SparsityPattern;
+class DynamicSparsityPattern;
+class BlockSparsityPattern;
+class BlockDynamicSparsityPattern;
+template <typename number> class SparseMatrix;
+template <typename number> class BlockSparseMatrix;
+class BlockIndices;
+
+
+namespace internals
+{
+  class GlobalRowsFromLocal;
+}
+
+
+//TODO[WB]: We should have a function of the kind
+//   ConstraintMatrix::add_constraint (const size_type constrained_dof,
+//                const std::vector<std::pair<size_type, double> > &entries,
+//                const double inhomogeneity = 0);
+// rather than building up constraints piecemeal through add_line/add_entry
+// etc. This would also eliminate the possibility of accidentally changing
+// existing constraints into something pointless, see the discussion on the
+// mailing list on "Tiny bug in interpolate_boundary_values" in Sept. 2010.
+
+/**
+ * This class implements dealing with linear (possibly inhomogeneous)
+ * constraints on degrees of freedom. The concept and origin of such
+ * constraints is extensively described in the
+ * @ref constraints
+ * module. The class is meant to deal with a limited number of constraints
+ * relative to the total number of degrees of freedom, for example a few per
+ * cent up to maybe 30 per cent; and with a linear combination of <i>M</i>
+ * other degrees of freedom where <i>M</i> is also relatively small (no larger
+ * than at most around the average number of entries per row of a linear
+ * system). It is <em>not</em> meant to describe full rank linear systems.
+ *
+ * The algorithms used in the implementation of this class are described in
+ * some detail in the
+ * @ref hp_paper "hp paper".
+ * There is also a significant amount of documentation on how to use this
+ * class in the
+ * @ref constraints
+ * module.
+ *
+ *
+ * <h3>Description of constraints</h3>
+ *
+ * Each "line" in objects of this class corresponds to one constrained degree
+ * of freedom, with the number of the line being <i>i</i>, entered by using
+ * add_line() or add_lines(). The entries in this line are pairs of the form
+ * (<i>j</i>,<i>a<sub>ij</sub></i>), which are added by add_entry() or
+ * add_entries(). The organization is essentially a SparsityPattern, but with
+ * only a few lines containing nonzero elements, and  therefore no data wasted
+ * on the others. For each line, which has been added by the mechanism above,
+ * an elimination of the constrained degree of freedom of the form
+ * @f[
+ *  x_i = \sum_j a_{ij} x_j + b_i
+ * @f]
+ * is performed, where <i>b<sub>i</sub></i> is optional and set by
+ * set_inhomogeneity(). Thus, if a constraint is formulated for instance as a
+ * zero mean value of several degrees of freedom, one of the degrees has to be
+ * chosen to be eliminated.
+ *
+ * Note that the constraints are linear in the <i>x<sub>i</sub></i>, and that
+ * there might be a constant (non-homogeneous) term in the constraint. This is
+ * exactly the form we need for hanging node constraints, where we need to
+ * constrain one degree of freedom in terms of others. There are other
+ * conditions of this form possible, for example for implementing mean value
+ * conditions as is done in the step-11 tutorial program. The name of the
+ * class stems from the fact that these constraints can be represented in
+ * matrix form as <b>X</b> <i>x</i> = <i>b</i>, and this object then describes
+ * the matrix <b>X</b> (and the vector <i>b</i>; originally, the
+ * ConstraintMatrix class was only meant to handle homogenous constraints
+ * where <i>b</i>=0, thus the name). The most frequent way to create/fill
+ * objects of this type is using the DoFTools::make_hanging_node_constraints()
+ * function. The use of these objects is first explained in step-6.
+ *
+ * Objects of the present type are organized in lines (rows), but only those
+ * lines are stored where constraints are present. New constraints are added
+ * by adding new lines using the add_line() function, and then populating it
+ * using the add_entry() function to a given line, or add_entries() to add
+ * more than one entry at a time. The right hand side element, if nonzero, can
+ * be set using the set_inhomogeneity() function. After all constraints have
+ * been added, you need to call close(), which compresses the storage format
+ * and sorts the entries.
+ *
+ * @note Many of the algorithms this class implements are discussed in the
+ * @ref hp_paper.
+ * The algorithms are also related to those shown in <i>M. S. Shephard: Linear
+ * multipoint constraints applied via transformation as part of a direct
+ * stiffness assembly process. Int. J. Numer. Meth. Engrg., vol. 20 (1984),
+ * pp. 2107-2112.</i>, with the difference that the algorithms shown there
+ * completely eliminated constrained degrees of freedom, whereas we usually
+ * keep them as part of the linear system.
+ *
+ * @ingroup dofs
+ * @ingroup constraints
+ * @author Wolfgang Bangerth, Martin Kronbichler, 1998, 2004, 2008, 2009
+ */
+class ConstraintMatrix : public Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * An enum that describes what should happen if the two ConstraintMatrix
+   * objects involved in a call to the merge() function happen to have
+   * constraints on the same degrees of freedom.
+   */
+  enum MergeConflictBehavior
+  {
+    /**
+     * Throw an exception if the two objects concerned have conflicting
+     * constraints on the same degree of freedom.
+     */
+    no_conflicts_allowed,
+
+    /**
+     * In an operation <code>cm1.merge(cm2)</code>, if <code>cm1</code> and
+     * <code>cm2</code> have constraints on the same degree of freedom, take
+     * the one from <code>cm1</code>.
+     */
+    left_object_wins,
+
+    /**
+     * In an operation <code>cm1.merge(cm2)</code>, if <code>cm1</code> and
+     * <code>cm2</code> have constraints on the same degree of freedom, take
+     * the one from <code>cm2</code>.
+     */
+    right_object_wins
+  };
+
+  /**
+   * Constructor. The supplied IndexSet defines which indices might be
+   * constrained inside this ConstraintMatrix. In a calculation with a
+   * DoFHandler object based on parallel::distributed::Triangulation or
+   * parallel::shared::Triangulation, one should use the set of locally
+   * relevant dofs (see
+   * @ref GlossLocallyRelevantDof).
+   *
+   * The given IndexSet allows the ConstraintMatrix to save memory by just not
+   * caring about degrees of freedom that are not of importance to the current
+   * processor. Alternatively, if no such IndexSet is provided, internal data
+   * structures for <i>all</i> possible indices will be created, leading to
+   * memory consumption on every processor that is proportional to the
+   * <i>overall</i> size of the problem, not just proportional to the size of
+   * the portion of the overall problem that is handled by the current
+   * processor.
+   */
+  explicit ConstraintMatrix (const IndexSet &local_constraints = IndexSet());
+
+  /**
+   * Copy constructor
+   */
+  explicit ConstraintMatrix (const ConstraintMatrix &constraint_matrix);
+
+  /**
+   * clear() the ConstraintMatrix object and supply an IndexSet with lines
+   * that may be constrained. This function is only relevant in the
+   * distributed case to supply a different IndexSet. Otherwise this routine
+   * is equivalent to calling clear(). See the constructor for details.
+   */
+  void reinit (const IndexSet &local_constraints = IndexSet());
+
+  /**
+   * Determines if we can store a constraint for the given @p line_index. This
+   * routine only matters in the distributed case and checks if the IndexSet
+   * allows storage of this line. Always returns true if not in the
+   * distributed case.
+   */
+  bool can_store_line (const size_type line_index) const;
+
+  /**
+   * Returns the index set describing locally relevant lines if any are
+   * present. Note that if no local lines were given, this represents an empty
+   * IndexSet, whereas otherwise it contains the global problem size and the
+   * local range.
+   */
+  const IndexSet &get_local_lines() const;
+
+  /**
+   * This function copies the content of @p constraints_in with DoFs that are
+   * element of the IndexSet @p filter. Elements that are not present in the
+   * IndexSet are ignored. All DoFs will be transformed to local index space
+   * of the filter, both the constrained DoFs and the other DoFs these entries
+   * are constrained to. The local index space of the filter is a contiguous
+   * numbering of all (global) DoFs that are elements in the filter.
+   *
+   * If, for example, the filter represents the range <tt>[10,20)</tt>, and
+   * the constraint matrix @p constraints_in includes the global indices
+   * <tt>{7,13,14}</tt>, the indices <tt>{3,4}</tt> are added to the calling
+   * constraint matrix (since 13 and 14 are elements in the filter and element
+   * 13 is the fourth element in the index, and 14 is the fifth).
+   *
+   * This function provides an easy way to create a ConstraintMatrix for
+   * certain vector components in a vector-valued problem from a full
+   * ConstraintMatrix, i.e. extracting a diagonal subblock from a larger
+   * ConstraintMatrix. The block is specified by the IndexSet argument.
+   */
+  void add_selected_constraints (const ConstraintMatrix &constraints_in,
+                                 const IndexSet         &filter);
+
+  /**
+   * @name Adding constraints
+   * @{
+   */
+
+  /**
+   * Add a new line to the matrix. If the line already exists, then the
+   * function simply returns without doing anything.
+   */
+  void add_line (const size_type line);
+
+  /**
+   * Call the first add_line() function for every index <code>i</code> for
+   * which <code>lines[i]</code> is true.
+   *
+   * This function essentially exists to allow adding several constraints of
+   * the form <i>x<sub>i</sub></i>=0 all at once, where the set of indices
+   * <i>i</i> for which these constraints should be added are given by the
+   * argument of this function. On the other hand, just as if the single-
+   * argument add_line() function were called repeatedly, the constraints can
+   * later be modified to include linear dependencies using the add_entry()
+   * function as well as inhomogeneities using set_inhomogeneity().
+   */
+  void add_lines (const std::vector<bool> &lines);
+
+  /**
+   * Call the first add_line() function for every index <code>i</code> that
+   * appears in the argument.
+   *
+   * This function essentially exists to allow adding several constraints of
+   * the form <i>x<sub>i</sub></i>=0 all at once, where the set of indices
+   * <i>i</i> for which these constraints should be added are given by the
+   * argument of this function. On the other hand, just as if the single-
+   * argument add_line() function were called repeatedly, the constraints can
+   * later be modified to include linear dependencies using the add_entry()
+   * function as well as inhomogeneities using set_inhomogeneity().
+   */
+  void add_lines (const std::set<size_type> &lines);
+
+  /**
+   * Call the first add_line() function for every index <code>i</code> that
+   * appears in the argument.
+   *
+   * This function essentially exists to allow adding several constraints of
+   * the form <i>x<sub>i</sub></i>=0 all at once, where the set of indices
+   * <i>i</i> for which these constraints should be added are given by the
+   * argument of this function. On the other hand, just as if the single-
+   * argument add_line() function were called repeatedly, the constraints can
+   * later be modified to include linear dependencies using the add_entry()
+   * function as well as inhomogeneities using set_inhomogeneity().
+   */
+  void add_lines (const IndexSet &lines);
+
+  /**
+   * Add an entry to a given line. The list of lines is searched from the back
+   * to the front, so clever programming would add a new line (which is pushed
+   * to the back) and immediately afterwards fill the entries of that line.
+   * This way, no expensive searching is needed.
+   *
+   * If an entry with the same indices as the one this function call denotes
+   * already exists, then this function simply returns provided that the value
+   * of the entry is the same. Thus, it does no harm to enter a constraint
+   * twice.
+   */
+  void add_entry (const size_type line,
+                  const size_type column,
+                  const double value);
+
+  /**
+   * Add a whole series of entries, denoted by pairs of column indices and
+   * values, to a line of constraints. This function is equivalent to calling
+   * the preceding function several times, but is faster.
+   */
+  void add_entries (const size_type                                  line,
+                    const std::vector<std::pair<size_type,double> > &col_val_pairs);
+
+  /**
+   * Set an inhomogeneity to the constraint line <i>i</i>, according to the
+   * discussion in the general class description.
+   *
+   * @note the line needs to be added with one of the add_line() calls first.
+   */
+  void set_inhomogeneity (const size_type line,
+                          const double    value);
+
+  /**
+   * Close the filling of entries. Since the lines of a matrix of this type
+   * are usually filled in an arbitrary order and since we do not want to use
+   * associative constrainers to store the lines, we need to sort the lines
+   * and within the lines the columns before usage of the matrix. This is done
+   * through this function.
+   *
+   * Also, zero entries are discarded, since they are not needed.
+   *
+   * After closing, no more entries are accepted. If the object was already
+   * closed, then this function returns immediately.
+   *
+   * This function also resolves chains of constraints. For example, degree of
+   * freedom 13 may be constrained to $u_{13} = \frac{u_3}{2} + \frac{u_7}{2}$
+   * while degree of freedom 7 is itself constrained as $u_{7} = \frac{u_2}{2}
+   * + \frac{u_4}{2}$. Then, the resolution will be that $u_{13} =
+   * \frac{u_3}{2} + \frac{u_2}{4} + \frac{u_4}{4}$. Note, however, that
+   * cycles in this graph of constraints are not allowed, i.e. for example
+   * $u_4$ may not be constrained, directly or indirectly, to $u_{13}$ again.
+   */
+  void close ();
+
+  /**
+   * Merge the constraints represented by the object given as argument into
+   * the constraints represented by this object. Both objects may or may not
+   * be closed (by having their function close() called before). If this
+   * object was closed before, then it will be closed afterwards as well.
+   * Note, however, that if the other argument is closed, then merging may be
+   * significantly faster.
+   *
+   * Using the default value of the second arguments, the constraints in each
+   * of the two objects (the old one represented by this object and the
+   * argument) may not refer to the same degree of freedom, i.e. a degree of
+   * freedom that is constrained in one object may not be constrained in the
+   * second. If this is nevertheless the case, an exception is thrown.
+   * However, this behavior can be changed by providing a different value for
+   * the second argument.
+   */
+  void merge (const ConstraintMatrix &other_constraints,
+              const MergeConflictBehavior merge_conflict_behavior = no_conflicts_allowed);
+
+  /**
+   * Shift all entries of this matrix down @p offset rows and over @p offset
+   * columns.
+   *
+   * This function is useful if you are building block matrices, where all
+   * blocks are built by the same DoFHandler object, i.e. the matrix size is
+   * larger than the number of degrees of freedom. Since several matrix rows
+   * and columns correspond to the same degrees of freedom, you'd generate
+   * several constraint objects, then shift them, and finally merge() them
+   * together again.
+   */
+  void shift (const size_type offset);
+
+  /**
+   * Clear all entries of this matrix. Reset the flag determining whether new
+   * entries are accepted or not.
+   *
+   * This function may be called also on objects which are empty or already
+   * cleared.
+   */
+  void clear ();
+
+  /**
+   * @}
+   */
+
+
+  /**
+   * @name Querying constraints
+   * @{
+   */
+
+  /**
+   * Return number of constraints stored in this matrix.
+   */
+  size_type n_constraints () const;
+
+  /**
+   * Return whether the degree of freedom with number @p index is a
+   * constrained one.
+   *
+   * Note that if close() was called before, then this function is
+   * significantly faster, since then the constrained degrees of freedom are
+   * sorted and we can do a binary search, while before close() was called, we
+   * have to perform a linear search through all entries.
+   */
+  bool is_constrained (const size_type index) const;
+
+  /**
+   * Return whether the dof is constrained, and whether it is constrained to
+   * only one other degree of freedom with weight one. The function therefore
+   * returns whether the degree of freedom would simply be eliminated in favor
+   * of exactly one other degree of freedom.
+   *
+   * The function returns @p false if either the degree of freedom is not
+   * constrained at all, or if it is constrained to more than one other degree
+   * of freedom, or if it is constrained to only one degree of freedom but
+   * with a weight different from one.
+   */
+  bool is_identity_constrained (const size_type index) const;
+
+  /**
+   * Return whether the two given degrees of freedom are linked by an equality
+   * constraint that either constrains index1 to be so that
+   * <code>index1=index2</code> or constrains index2 so that
+   * <code>index2=index1</code>.
+   */
+  bool are_identity_constrained (const size_type index1,
+                                 const size_type index2) const;
+
+  /**
+   * Return the maximum number of other dofs that one dof is constrained to.
+   * For example, in 2d a hanging node is constrained only to its two
+   * neighbors, so the returned value would be 2. However, for higher order
+   * elements and/or higher dimensions, or other types of constraints, this
+   * number is no more obvious.
+   *
+   * The name indicates that within the system matrix, references to a
+   * constrained node are indirected to the nodes it is constrained to.
+   */
+  size_type max_constraint_indirections () const;
+
+  /**
+   * Returns <tt>true</tt> in case the dof is constrained and there is a non-
+   * trivial inhomogeneous values set to the dof.
+   */
+  bool is_inhomogeneously_constrained (const size_type index) const;
+
+  /**
+   * Returns <tt>false</tt> if all constraints in the ConstraintMatrix are
+   * homogeneous ones, and <tt>true</tt> if there is at least one
+   * inhomogeneity.
+   */
+  bool has_inhomogeneities () const;
+
+  /**
+   * Returns a pointer to the the vector of entries if a line is constrained,
+   * and a zero pointer in case the dof is not constrained.
+   */
+  const std::vector<std::pair<size_type,double> > *
+  get_constraint_entries (const size_type line) const;
+
+  /**
+   * Returns the value of the inhomogeneity stored in the constrained dof @p
+   * line. Unconstrained dofs also return a zero value.
+   */
+  double get_inhomogeneity (const size_type line) const;
+
+  /**
+   * Print the constraint lines. Mainly for debugging purposes.
+   *
+   * This function writes out all entries in the constraint matrix lines with
+   * their value in the form <tt>row col : value</tt>. Unconstrained lines
+   * containing only one identity entry are not stored in this object and are
+   * not printed.
+   */
+  void print (std::ostream &) const;
+
+  /**
+   * Write the graph of constraints in 'dot' format. 'dot' is a program that
+   * can take a list of nodes and produce a graphical representation of the
+   * graph of constrained degrees of freedom and the degrees of freedom they
+   * are constrained to.
+   *
+   * The output of this function can be used as input to the 'dot' program
+   * that can convert the graph into a graphical representation in postscript,
+   * png, xfig, and a number of other formats.
+   *
+   * This function exists mostly for debugging purposes.
+   */
+  void write_dot (std::ostream &) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Add the constraint indices associated to the indices in the given vector.
+   * After a call to this function, the indices vector contains the initial
+   * elements and all the associated constrained indices. This function sorts
+   * the elements and suppresses duplicates.
+   */
+  void resolve_indices(std::vector<types::global_dof_index> &indices) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Eliminating constraints from linear systems after their creation
+   * @{
+   */
+
+
+  /**
+   * Condense a sparsity pattern. The name of the function mimics the name of
+   * the function we use to condense linear systems, but it is a bit of a
+   * misnomer for the current context. This is because in the context of
+   * linear systems, we eliminate certain rows and columns of the linear
+   * system, i.e., we "reduce" or "condense" the linear system. On the other
+   * hand, in the current context, the functions does not remove nonzero
+   * entries from the sparsity pattern. Rather, it adds those nonzero entry
+   * locations to the sparsity pattern that will later be needed for the
+   * process of condensation of constrained degrees of freedom from a linear
+   * system.
+   *
+   * Since this function adds new nonzero entries to the sparsity pattern, the
+   * given sparsity pattern must not be compressed. The constraint matrix
+   * (i.e., the current object) must be closed. The sparsity pattern is
+   * compressed at the end of the function.
+   */
+  void condense (SparsityPattern &sparsity) const;
+
+  /**
+   * Same function as above, but condenses square block sparsity patterns.
+   */
+  void condense (BlockSparsityPattern &sparsity) const;
+
+  /**
+   * Same function as above, but condenses square compressed sparsity
+   * patterns.
+   */
+  void condense (DynamicSparsityPattern &sparsity) const;
+
+  /**
+   * Same function as above, but condenses square compressed sparsity
+   * patterns.
+   */
+  void condense (BlockDynamicSparsityPattern &sparsity) const;
+
+  /**
+   * Condense a given matrix, i.e., eliminate the rows and columns of the
+   * matrix that correspond to constrained degrees of freedom.
+   *
+   * See the general documentation of this class for more detailed
+   * information.
+   */
+  template<typename number>
+  void condense (SparseMatrix<number> &matrix) const;
+
+  /**
+   * Same function as above, but condenses square block sparse matrices.
+   */
+  template <typename number>
+  void condense (BlockSparseMatrix<number> &matrix) const;
+
+  /**
+   * Condense the given vector in-place. The @p VectorType may be a
+   * Vector<float>, Vector<double>, BlockVector<tt><...></tt>, a PETSc or
+   * Trilinos vector wrapper class, or any other type having the same
+   * interface. Note that this function does not take any inhomogeneity into
+   * account and throws an exception in case there are any inhomogeneities.
+   * Use the function using both a matrix and vector for that case.
+   *
+   * @note This function does not work for MPI vectors. Use condense() with
+   * two vector arguments instead.
+   */
+  template <class VectorType>
+  void condense (VectorType &vec) const;
+
+  /**
+   * The function copies and condenses values from @p vec_ghosted into @p
+   * output. In a serial code it is equivalent to calling condense (vec). If
+   * called in parallel, @p vec_ghosted is supposed to contain ghost elements
+   * while @p output should not.
+   */
+  template <class VectorType>
+  void condense (const VectorType &vec_ghosted,
+                 VectorType       &output) const;
+
+  /**
+   * Condense a given matrix and a given vector by eliminating rows and
+   * columns of the linear system that correspond to constrained degrees of
+   * freedom. The sparsity pattern associated with the matrix needs to be
+   * condensed and compressed.  This function is the appropriate choice for
+   * applying inhomogeneous constraints.
+   *
+   * The constraint matrix object must be closed to call this function.
+   *
+   * See the general documentation of this class for more detailed
+   * information.
+   */
+  template<typename number, class VectorType>
+  void condense (SparseMatrix<number> &matrix,
+                 VectorType           &vector) const;
+
+  /**
+   * Same function as above, but condenses square block sparse matrices and
+   * vectors.
+   */
+  template <typename number, class BlockVectorType>
+  void condense (BlockSparseMatrix<number> &matrix,
+                 BlockVectorType           &vector) const;
+
+  /**
+   * Sets the values of all constrained DoFs in a vector to zero.  The @p
+   * VectorType may be a Vector<float>, Vector<double>,
+   * BlockVector<tt><...></tt>, a PETSc or Trilinos vector wrapper class, or
+   * any other type having the same interface.
+   */
+  template <class VectorType>
+  void set_zero (VectorType &vec) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Eliminating constraints from linear systems during their creation
+   * @{
+   */
+
+  /**
+   * This function takes a vector of local contributions (@p local_vector)
+   * corresponding to the degrees of freedom indices given in @p
+   * local_dof_indices and distributes them to the global vector. In most
+   * cases, these local contributions will be the result of an integration
+   * over a cell or face of a cell. However, as long as @p local_vector and @p
+   * local_dof_indices have the same number of elements, this function is
+   * happy with whatever it is given.
+   *
+   * In contrast to the similar function in the DoFAccessor class, this
+   * function also takes care of constraints, i.e. if one of the elements of
+   * @p local_dof_indices belongs to a constrained node, then rather than
+   * writing the corresponding element of @p local_vector into @p
+   * global_vector, the element is distributed to the entries in the global
+   * vector to which this particular degree of freedom is constrained.
+   *
+   * Thus, by using this function to distribute local contributions to the
+   * global object, one saves the call to the condense function after the
+   * vectors and matrices are fully assembled. On the other hand, by
+   * consequence, the function does not only write into the entries enumerated
+   * by the @p local_dof_indices array, but also (possibly) others as
+   * necessary.
+   *
+   * Note that this function will apply all constraints as if they were
+   * homogeneous. For correctly setting inhomogeneous constraints, use the
+   * similar function with a matrix argument or the function with both matrix
+   * and vector arguments.
+   *
+   * @note This function in itself is thread-safe, i.e., it works properly
+   * also when several threads call it simultaneously. However, the function
+   * call is only thread-safe if the underlying global vector allows for
+   * simultaneous access and the access is not to rows with the same global
+   * index at the same time. This needs to be made sure from the caller's
+   * site. There is no locking mechanism inside this method to prevent data
+   * races.
+   *
+   * @param[in] local_vector Vector of local contributions.
+   * @param[in] local_dof_indices Local degrees of freedom indices
+   * corresponding to the vector of local contributions.
+   * @param[out]  global_vector The global vector to which all local
+   * contributions will be added.
+   */
+  template <class InVector, class OutVector>
+  void
+  distribute_local_to_global (const InVector               &local_vector,
+                              const std::vector<size_type> &local_dof_indices,
+                              OutVector                    &global_vector) const;
+
+  /**
+   * This function takes a vector of local contributions (@p local_vector)
+   * corresponding to the degrees of freedom indices given in @p
+   * local_dof_indices and distributes them to the global vector. In most
+   * cases, these local contributions will be the result of an integration
+   * over a cell or face of a cell. However, as long as @p local_vector and @p
+   * local_dof_indices have the same number of elements, this function is
+   * happy with whatever it is given.
+   *
+   * In contrast to the similar function in the DoFAccessor class, this
+   * function also takes care of constraints, i.e. if one of the elements of
+   * @p local_dof_indices belongs to a constrained node, then rather than
+   * writing the corresponding element of @p local_vector into @p
+   * global_vector, the element is distributed to the entries in the global
+   * vector to which this particular degree of freedom is constrained.
+   *
+   * Thus, by using this function to distribute local contributions to the
+   * global object, one saves the call to the condense function after the
+   * vectors and matrices are fully assembled. On the other hand, by
+   * consequence, the function does not only write into the entries enumerated
+   * by the @p local_dof_indices array, but also (possibly) others as
+   * necessary. This includes writing into diagonal elements of the matrix if
+   * the corresponding degree of freedom is constrained.
+   *
+   * The fourth argument <tt>local_matrix</tt> is intended to be used in case
+   * one wants to apply inhomogeneous constraints on the vector only. Such a
+   * situation could be where one wants to assemble of a right hand side
+   * vector on a problem with inhomogeneous constraints, but the global matrix
+   * has been assembled previously. A typical example of this is a time
+   * stepping algorithm where the stiffness matrix is assembled once, and the
+   * right hand side updated every time step. Note that, however, the entries
+   * in the columns of the local matrix have to be exactly the same as those
+   * that have been written into the global matrix. Otherwise, this function
+   * will not be able to correctly handle inhomogeneities.
+   *
+   * @note This function in itself is thread-safe, i.e., it works properly
+   * also when several threads call it simultaneously. However, the function
+   * call is only thread-safe if the underlying global vector allows for
+   * simultaneous access and the access is not to rows with the same global
+   * index at the same time. This needs to be made sure from the caller's
+   * site. There is no locking mechanism inside this method to prevent data
+   * races.
+   */
+  template <typename VectorType, typename LocalType>
+  void
+  distribute_local_to_global (const Vector<LocalType>      &local_vector,
+                              const std::vector<size_type> &local_dof_indices,
+                              VectorType                   &global_vector,
+                              const FullMatrix<LocalType>  &local_matrix) const;
+
+  /**
+   * Enter a single value into a result vector, obeying constraints.
+   */
+  template <class VectorType>
+  void
+  distribute_local_to_global (const size_type index,
+                              const double    value,
+                              VectorType     &global_vector) const;
+
+  /**
+   * This function takes a pointer to a vector of local contributions (@p
+   * local_vector) corresponding to the degrees of freedom indices given in @p
+   * local_dof_indices and distributes them to the global vector. In most
+   * cases, these local contributions will be the result of an integration
+   * over a cell or face of a cell. However, as long as the entries in @p
+   * local_dof_indices indicate reasonable global vector entries, this
+   * function is happy with whatever it is given.
+   *
+   * If one of the elements of @p local_dof_indices belongs to a constrained
+   * node, then rather than writing the corresponding element of @p
+   * local_vector into @p global_vector, the element is distributed to the
+   * entries in the global vector to which this particular degree of freedom
+   * is constrained.
+   *
+   * Thus, by using this function to distribute local contributions to the
+   * global object, one saves the call to the condense function after the
+   * vectors and matrices are fully assembled. Note that this function
+   * completely ignores inhomogeneous constraints.
+   *
+   * @note This function in itself is thread-safe, i.e., it works properly
+   * also when several threads call it simultaneously. However, the function
+   * call is only thread-safe if the underlying global vector allows for
+   * simultaneous access and the access is not to rows with the same global
+   * index at the same time. This needs to be made sure from the caller's
+   * site. There is no locking mechanism inside this method to prevent data
+   * races.
+   */
+  template <typename ForwardIteratorVec, typename ForwardIteratorInd,
+            class VectorType>
+  void
+  distribute_local_to_global (ForwardIteratorVec local_vector_begin,
+                              ForwardIteratorVec local_vector_end,
+                              ForwardIteratorInd local_indices_begin,
+                              VectorType        &global_vector) const;
+
+  /**
+   * This function takes a matrix of local contributions (@p local_matrix)
+   * corresponding to the degrees of freedom indices given in @p
+   * local_dof_indices and distributes them to the global matrix. In most
+   * cases, these local contributions will be the result of an integration
+   * over a cell or face of a cell. However, as long as @p local_matrix and @p
+   * local_dof_indices have the same number of elements, this function is
+   * happy with whatever it is given.
+   *
+   * In contrast to the similar function in the DoFAccessor class, this
+   * function also takes care of constraints, i.e. if one of the elements of
+   * @p local_dof_indices belongs to a constrained node, then rather than
+   * writing the corresponding element of @p local_matrix into @p
+   * global_matrix, the element is distributed to the entries in the global
+   * matrix to which this particular degree of freedom is constrained.
+   *
+   * With this scheme, we never write into rows or columns of constrained
+   * degrees of freedom. In order to make sure that the resulting matrix can
+   * still be inverted, we need to do something with the diagonal elements
+   * corresponding to constrained nodes. Thus, if a degree of freedom in @p
+   * local_dof_indices is constrained, we distribute the corresponding entries
+   * in the matrix, but also add the absolute value of the diagonal entry of
+   * the local matrix to the corresponding entry in the global matrix.
+   * Assuming the discretized operator is positive definite, this guarantees
+   * that the diagonal entry is always non-zero, positive, and of the same
+   * order of magnitude as the other entries of the matrix. On the other hand,
+   * when solving a source problem $Au=f$ the exact value of the diagonal
+   * element is not important, since the value of the respective degree of
+   * freedom will be overwritten by the distribute() call later on anyway.
+   *
+   * @note The procedure described above adds an unforeseeable number of
+   * artificial eigenvalues to the spectrum of the matrix. Therefore, it is
+   * recommended to use the equivalent function with two local index vectors
+   * in such a case.
+   *
+   * By using this function to distribute local contributions to the global
+   * object, one saves the call to the condense function after the vectors and
+   * matrices are fully assembled.
+   *
+   * @note This function in itself is thread-safe, i.e., it works properly
+   * also when several threads call it simultaneously. However, the function
+   * call is only thread-safe if the underlying global matrix allows for
+   * simultaneous access and the access is not to rows with the same global
+   * index at the same time. This needs to be made sure from the caller's
+   * site. There is no locking mechanism inside this method to prevent data
+   * races.
+   */
+  template <typename MatrixType>
+  void
+  distribute_local_to_global (const FullMatrix<typename MatrixType::value_type> &local_matrix,
+                              const std::vector<size_type> &local_dof_indices,
+                              MatrixType                   &global_matrix) const;
+
+  /**
+   * Does almost the same as the function above but can treat general
+   * rectangular matrices.  The main difference to achieve this is that the
+   * diagonal entries in constrained rows are left untouched instead of being
+   * filled with arbitrary values.
+   *
+   * Since the diagonal entries corresponding to eliminated degrees of freedom
+   * are not set, the result may have a zero eigenvalue, if applied to a
+   * square matrix. This has to be considered when solving the resulting
+   * problems. For solving a source problem $Au=f$, it is possible to set the
+   * diagonal entry after building the matrix by a piece of code of the form
+   *
+   * @code
+   *   for (unsigned int i=0;i<matrix.m();++i)
+   *     if (constraints.is_constrained(i))
+   *       matrix.diag_element(i) = 1.;
+   * @endcode
+   *
+   * The value of one which is used here is arbitrary, but in the context of
+   * Krylov space methods uncritical, since it corresponds to an invariant
+   * subspace. If the other matrix entries are smaller or larger by a factor
+   * close to machine accuracy, it may be advisable to adjust it.
+   *
+   * For solving eigenvalue problems, this will only add one spurious zero
+   * eigenvalue (with a multiplicity that is possibly greater than one).
+   * Taking this into account, nothing else has to be changed.
+   */
+  template <typename MatrixType>
+  void
+  distribute_local_to_global (const FullMatrix<typename MatrixType::value_type> &local_matrix,
+                              const std::vector<size_type> &row_indices,
+                              const std::vector<size_type> &col_indices,
+                              MatrixType                   &global_matrix) const;
+
+  /**
+   * This function simultaneously writes elements into matrix and vector,
+   * according to the constraints specified by the calling ConstraintMatrix.
+   * This function can correctly handle inhomogeneous constraints as well. For
+   * the parameter use_inhomogeneities_for_rhs see the documentation in
+   * @ref constraints
+   * module.
+   *
+   * @note This function in itself is thread-safe, i.e., it works properly
+   * also when several threads call it simultaneously. However, the function
+   * call is only thread-safe if the underlying global matrix and vector allow
+   * for simultaneous access and the access is not to rows with the same
+   * global index at the same time. This needs to be made sure from the
+   * caller's site. There is no locking mechanism inside this method to
+   * prevent data races.
+   */
+  template <typename MatrixType, typename VectorType>
+  void
+  distribute_local_to_global (const FullMatrix<typename MatrixType::value_type> &local_matrix,
+                              const Vector<typename VectorType::value_type>     &local_vector,
+                              const std::vector<size_type>  &local_dof_indices,
+                              MatrixType                    &global_matrix,
+                              VectorType                    &global_vector,
+                              bool                          use_inhomogeneities_for_rhs = false) const;
+
+  /**
+   * Do a similar operation as the distribute_local_to_global() function that
+   * distributes writing entries into a matrix for constrained degrees of
+   * freedom, except that here we don't write into a matrix but only allocate
+   * sparsity pattern entries.
+   *
+   * As explained in the
+   * @ref hp_paper "hp paper"
+   * and in step-27, first allocating a sparsity pattern and later coming back
+   * and allocating additional entries for those matrix entries that will be
+   * written to due to the elimination of constrained degrees of freedom
+   * (using ConstraintMatrix::condense() ), can be a very expensive procedure.
+   * It is cheaper to allocate these entries right away without having to do a
+   * second pass over the sparsity pattern object. This function does exactly
+   * that.
+   *
+   * Because the function only allocates entries in a sparsity pattern, all it
+   * needs to know are the degrees of freedom that couple to each other.
+   * Unlike the previous function, no actual values are written, so the second
+   * input argument is not necessary here.
+   *
+   * The third argument to this function, keep_constrained_entries determines
+   * whether the function shall allocate entries in the sparsity pattern at
+   * all for entries that will later be set to zero upon condensation of the
+   * matrix. These entries are necessary if the matrix is built unconstrained,
+   * and only later condensed. They are not necessary if the matrix is built
+   * using the distribute_local_to_global() function of this class which
+   * distributes entries right away when copying a local matrix into a global
+   * object. The default of this argument is true, meaning to allocate the few
+   * entries that may later be set to zero.
+   *
+   * By default, the function adds entries for all pairs of indices given in
+   * the first argument to the sparsity pattern (unless
+   * keep_constrained_entries is false). However, sometimes one would like to
+   * only add a subset of all of these pairs. In that case, the last argument
+   * can be used which specifies a boolean mask which of the pairs of indices
+   * should be considered. If the mask is false for a pair of indices, then no
+   * entry will be added to the sparsity pattern for this pair, irrespective
+   * of whether one or both of the indices correspond to constrained degrees
+   * of freedom.
+   *
+   * This function is not typically called from user code, but is used in the
+   * DoFTools::make_sparsity_pattern() function when passed a constraint
+   * matrix object.
+   *
+   * @note This function in itself is thread-safe, i.e., it works properly
+   * also when several threads call it simultaneously. However, the function
+   * call is only thread-safe if the underlying global sparsity pattern allows
+   * for simultaneous access and the access is not to rows with the same
+   * global index at the same time. This needs to be made sure from the
+   * caller's site. There is no locking mechanism inside this method to
+   * prevent data races.
+   */
+  template <typename SparsityPatternType>
+  void
+  add_entries_local_to_global (const std::vector<size_type> &local_dof_indices,
+                               SparsityPatternType          &sparsity_pattern,
+                               const bool                    keep_constrained_entries = true,
+                               const Table<2,bool>          &dof_mask                 = default_empty_table) const;
+
+  /**
+   * Similar to the other function, but for non-quadratic sparsity patterns.
+   */
+  template <typename SparsityPatternType>
+  void
+  add_entries_local_to_global (const std::vector<size_type> &row_indices,
+                               const std::vector<size_type> &col_indices,
+                               SparsityPatternType          &sparsity_pattern,
+                               const bool                    keep_constrained_entries = true,
+                               const Table<2,bool>          &dof_mask                 = default_empty_table) const;
+
+  /**
+   * This function imports values from a global vector (@p global_vector) by
+   * applying the constraints to a vector of local values, expressed in
+   * iterator format.  In most cases, the local values will be identified by
+   * the local dof values on a cell. However, as long as the entries in @p
+   * local_dof_indices indicate reasonable global vector entries, this
+   * function is happy with whatever it is given.
+   *
+   * If one of the elements of @p local_dof_indices belongs to a constrained
+   * node, then rather than writing the corresponding element of @p
+   * global_vector into @p local_vector, the constraints are resolved as the
+   * respective distribute function does, i.e., the local entry is constructed
+   * from the global entries to which this particular degree of freedom is
+   * constrained.
+   *
+   * In contrast to the similar function get_dof_values in the DoFAccessor
+   * class, this function does not need the constrained values to be correctly
+   * set (i.e., distribute to be called).
+   */
+  template <typename ForwardIteratorVec, typename ForwardIteratorInd,
+            class VectorType>
+  void
+  get_dof_values (const VectorType  &global_vector,
+                  ForwardIteratorInd local_indices_begin,
+                  ForwardIteratorVec local_vector_begin,
+                  ForwardIteratorVec local_vector_end) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Dealing with constraints after solving a linear system
+   * @{
+   */
+
+  /**
+   * Given a vector, set all constrained degrees of freedom to values so that
+   * the constraints are satisfied. For example, if the current object stores
+   * the constraint $x_3=\frac 12 x_1 + \frac 12 x_2$, then this function will
+   * read the values of $x_1$ and $x_1$ from the given vector and set the
+   * element $x_3$ according to this constraints. Similarly, if the current
+   * object stores the constraint $x_{42}=208$, then this function will set
+   * the 42nd element of the given vector to 208.
+   *
+   * @note If this function is called with a parallel vector @p vec, then the
+   * vector must not contain ghost elements.
+   */
+  template <class VectorType>
+  void distribute (VectorType &vec) const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcMatrixIsClosed);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException0 (ExcMatrixNotClosed);
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcLineInexistant,
+                  size_type,
+                  << "The specified line " << arg1
+                  << " does not exist.");
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException4 (ExcEntryAlreadyExists,
+                  size_type, size_type, double, double,
+                  << "The entry for the indices " << arg1 << " and "
+                  << arg2 << " already exists, but the values "
+                  << arg3 << " (old) and " << arg4 << " (new) differ "
+                  << "by " << (arg4-arg3) << ".");
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException2 (ExcDoFConstrainedToConstrainedDoF,
+                  int, int,
+                  << "You tried to constrain DoF " << arg1
+                  << " to DoF " << arg2
+                  << ", but that one is also constrained. This is not allowed!");
+  /**
+   * Exception.
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcDoFIsConstrainedFromBothObjects,
+                  size_type,
+                  << "Degree of freedom " << arg1
+                  << " is constrained from both object in a merge operation.");
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcDoFIsConstrainedToConstrainedDoF,
+                  size_type,
+                  << "In the given argument a degree of freedom is constrained "
+                  << "to another DoF with number " << arg1
+                  << ", which however is constrained by this object. This is not"
+                  << " allowed.");
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException1 (ExcRowNotStoredHere,
+                  size_type,
+                  << "The index set given to this constraint matrix indicates "
+                  << "constraints for degree of freedom " << arg1
+                  << " should not be stored by this object, but a constraint "
+                  << "is being added.");
+
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException2 (ExcColumnNotStoredHere,
+                  size_type,
+                  size_type,
+                  << "The index set given to this constraint matrix indicates "
+                  << "constraints using degree of freedom " << arg2
+                  << " should not be stored by this object, but a constraint "
+                  << "for degree of freedom " << arg1 <<" uses it.");
+
+  /**
+   * Exception
+   *
+   * @ingroup Exceptions
+   */
+  DeclException2 (ExcIncorrectConstraint,
+                  int, int,
+                  << "While distributing the constraint for DoF "
+                  << arg1 << ", it turns out that one of the processors "
+                  << "who own the " << arg2
+                  << " degrees of freedom that x_" << arg1
+                  << " is constrained against does not know about "
+                  << "the constraint on x_" << arg1
+                  << ". Did you not initialize the ConstraintMatrix "
+                  << "with the appropriate locally_relevant set so "
+                  << "that every processor who owns a DoF that constrains "
+                  << "another DoF also knows about this constraint?");
+
+private:
+
+  /**
+   * This class represents one line of a constraint matrix.
+   */
+  struct ConstraintLine
+  {
+    /**
+     * A data type in which we store the list of entries that make up the
+     * homogenous part of a constraint.
+     */
+    typedef std::vector<std::pair<size_type,double> > Entries;
+
+    /**
+     * Number of this line. Since only very few lines are stored, we can not
+     * assume a specific order and have to store the line number explicitly.
+     */
+    size_type line;
+
+    /**
+     * Row numbers and values of the entries in this line.
+     *
+     * For the reason why we use a vector instead of a map and the
+     * consequences thereof, the same applies as what is said for
+     * ConstraintMatrix::lines.
+     */
+    Entries entries;
+
+    /**
+     * Value of the inhomogeneity.
+     */
+    double inhomogeneity;
+
+    /**
+     * This operator is a bit weird and unintuitive: it compares the line
+     * numbers of two lines. We need this to sort the lines; in fact we could
+     * do this using a comparison predicate.  However, this way, it is easier,
+     * albeit unintuitive since two lines really have no god-given order
+     * relation.
+     */
+    bool operator < (const ConstraintLine &) const;
+
+    /**
+     * This operator is likewise weird: it checks whether the line indices of
+     * the two operands are equal, irrespective of the fact that the contents
+     * of the line may be different.
+     */
+    bool operator == (const ConstraintLine &) const;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+  /**
+   * Store the lines of the matrix.  Entries are usually appended in an
+   * arbitrary order and insertion into a vector is done best at the end, so
+   * the order is unspecified after all entries are inserted. Sorting of the
+   * entries takes place when calling the <tt>close()</tt> function.
+   *
+   * We could, instead of using a vector, use an associative array, like a map
+   * to store the lines. This, however, would mean a much more fragmented heap
+   * since it allocates many small objects, and would additionally make usage
+   * of this matrix much slower.
+   */
+  std::vector<ConstraintLine> lines;
+
+  /**
+   * A list of size_type that contains the position of the ConstraintLine of a
+   * constrained degree of freedom, or numbers::invalid_size_type if the
+   * degree of freedom is not constrained. The numbers::invalid_size_type
+   * return value returns thus whether there is a constraint line for a given
+   * degree of freedom index. Note that this class has no notion of how many
+   * degrees of freedom there really are, so if we check whether there is a
+   * constraint line for a given degree of freedom, then this vector may
+   * actually be shorter than the index of the DoF we check for.
+   *
+   * This field exists since when adding a new constraint line we have to
+   * figure out whether it already exists. Previously, we would simply walk
+   * the unsorted list of constraint lines until we either hit the end or
+   * found it. This algorithm is O(N) if N is the number of constraints, which
+   * makes it O(N^2) when inserting all constraints. For large problems with
+   * many constraints, this could easily take 5-10 per cent of the total run
+   * time. With this field, we can save this time since we find any constraint
+   * in O(1) time or get to know that it a certain degree of freedom is not
+   * constrained.
+   *
+   * To make things worse, traversing the list of existing constraints
+   * requires reads from many different places in memory. Thus, in large 3d
+   * applications, the add_line() function showed up very prominently in the
+   * overall compute time, mainly because it generated a lot of cache misses.
+   * This should also be fixed by using the O(1) algorithm to access the
+   * fields of this array.
+   *
+   * The field is useful in a number of other contexts as well, e.g. when one
+   * needs random access to the constraints as in all the functions that apply
+   * constraints on the fly while add cell contributions into vectors and
+   * matrices.
+   */
+  std::vector<size_type> lines_cache;
+
+  /**
+   * This IndexSet is used to limit the lines to save in the ConstraintMatrix
+   * to a subset. This is necessary, because the lines_cache vector would
+   * become too big in a distributed calculation.
+   */
+  IndexSet local_lines;
+
+  /**
+   * Store whether the arrays are sorted.  If so, no new entries can be added.
+   */
+  bool sorted;
+
+  /**
+   * Internal function to calculate the index of line @p line in the vector
+   * lines_cache using local_lines.
+   */
+  size_type calculate_line_index (const size_type line) const;
+
+  /**
+   * Return @p true if the weight of an entry (the second element of the pair)
+   * equals zero. This function is used to delete entries with zero weight.
+   */
+  static bool check_zero_weight (const std::pair<size_type, double> &p);
+
+  /**
+   * Dummy table that serves as default argument for function
+   * <tt>add_entries_local_to_global()</tt>.
+   */
+  static const Table<2,bool> default_empty_table;
+
+  /**
+   * This function actually implements the local_to_global function for
+   * standard (non-block) matrices.
+   */
+  template <typename MatrixType, typename VectorType>
+  void
+  distribute_local_to_global (const FullMatrix<typename MatrixType::value_type>  &local_matrix,
+                              const Vector<typename VectorType::value_type>      &local_vector,
+                              const std::vector<size_type> &local_dof_indices,
+                              MatrixType                   &global_matrix,
+                              VectorType                   &global_vector,
+                              bool                          use_inhomogeneities_for_rhs,
+                              internal::bool2type<false>) const;
+
+  /**
+   * This function actually implements the local_to_global function for block
+   * matrices.
+   */
+  template <typename MatrixType, typename VectorType>
+  void
+  distribute_local_to_global (const FullMatrix<typename MatrixType::value_type>  &local_matrix,
+                              const Vector<typename VectorType::value_type>      &local_vector,
+                              const std::vector<size_type> &local_dof_indices,
+                              MatrixType                   &global_matrix,
+                              VectorType                   &global_vector,
+                              bool                          use_inhomogeneities_for_rhs,
+                              internal::bool2type<true>) const;
+
+  /**
+   * This function actually implements the local_to_global function for
+   * standard (non-block) sparsity types.
+   */
+  template <typename SparsityPatternType>
+  void
+  add_entries_local_to_global (const std::vector<size_type> &local_dof_indices,
+                               SparsityPatternType          &sparsity_pattern,
+                               const bool                    keep_constrained_entries,
+                               const Table<2,bool>          &dof_mask,
+                               internal::bool2type<false>) const;
+
+  /**
+   * This function actually implements the local_to_global function for block
+   * sparsity types.
+   */
+  template <typename SparsityPatternType>
+  void
+  add_entries_local_to_global (const std::vector<size_type> &local_dof_indices,
+                               SparsityPatternType          &sparsity_pattern,
+                               const bool                    keep_constrained_entries,
+                               const Table<2,bool>          &dof_mask,
+                               internal::bool2type<true>) const;
+
+  /**
+   * Internal helper function for distribute_local_to_global function.
+   *
+   * Creates a list of affected global rows for distribution, including the
+   * local rows where the entries come from. The list is sorted according to
+   * the global row indices.
+   */
+  void
+  make_sorted_row_list (const std::vector<size_type>   &local_dof_indices,
+                        internals::GlobalRowsFromLocal &global_rows) const;
+
+  /**
+   * Internal helper function for add_entries_local_to_global function.
+   *
+   * Creates a list of affected rows for distribution without any additional
+   * information, otherwise similar to the other make_sorted_row_list()
+   * function.
+   */
+  void
+  make_sorted_row_list (const std::vector<size_type> &local_dof_indices,
+                        std::vector<size_type>       &active_dofs) const;
+
+  /**
+   * Internal helper function for distribute_local_to_global function.
+   */
+  template <typename LocalType>
+  LocalType
+  resolve_vector_entry (const size_type                       i,
+                        const internals::GlobalRowsFromLocal &global_rows,
+                        const Vector<LocalType>              &local_vector,
+                        const std::vector<size_type>         &local_dof_indices,
+                        const FullMatrix<LocalType>          &local_matrix) const;
+
+  /**
+   * The assignment operator is not implemented for performance reasons. You
+   * can clear() or reinit() and merge() manually if needed.
+   */
+  ConstraintMatrix &operator= (const ConstraintMatrix &other);
+};
+
+
+
+/* ---------------- template and inline functions ----------------- */
+
+inline
+ConstraintMatrix::ConstraintMatrix (const IndexSet &local_constraints)
+  :
+  lines (),
+  local_lines (local_constraints),
+  sorted (false)
+{
+  // make sure the IndexSet is compressed. Otherwise this can lead to crashes
+  // that are hard to find (only happen in release mode).
+  // see tests/mpi/constraint_matrix_crash_01
+  local_lines.compress();
+}
+
+
+
+inline
+ConstraintMatrix::ConstraintMatrix (const ConstraintMatrix &constraint_matrix)
+  :
+  Subscriptor (),
+  lines (constraint_matrix.lines),
+  lines_cache (constraint_matrix.lines_cache),
+  local_lines (constraint_matrix.local_lines),
+  sorted (constraint_matrix.sorted)
+{}
+
+
+inline
+void
+ConstraintMatrix::add_line (const size_type line)
+{
+  Assert (sorted==false, ExcMatrixIsClosed());
+
+  // the following can happen when we compute with distributed meshes and dof
+  // handlers and we constrain a degree of freedom whose number we don't have
+  // locally. if we don't abort here the program will try to allocate several
+  // terabytes of memory to resize the various arrays below :-)
+  Assert (line != numbers::invalid_size_type,
+          ExcInternalError());
+  const size_type line_index = calculate_line_index (line);
+
+  // check whether line already exists; it may, in which case we can just quit
+  if (is_constrained(line))
+    return;
+
+  // if necessary enlarge vector of existing entries for cache
+  if (line_index >= lines_cache.size())
+    lines_cache.resize (std::max(2*static_cast<size_type>(lines_cache.size()),
+                                 line_index+1),
+                        numbers::invalid_size_type);
+
+  // push a new line to the end of the list
+  lines.push_back (ConstraintLine());
+  lines.back().line = line;
+  lines.back().inhomogeneity = 0.;
+  lines_cache[line_index] = lines.size()-1;
+}
+
+
+
+inline
+void
+ConstraintMatrix::add_entry (const size_type line,
+                             const size_type column,
+                             const double    value)
+{
+  Assert (sorted==false, ExcMatrixIsClosed());
+  Assert (line != column,
+          ExcMessage ("Can't constrain a degree of freedom to itself"));
+
+  // if in debug mode, check whether an entry for this column already exists
+  // and if it's the same as the one entered at present
+  //
+  // in any case: exit the function if an entry for this column already
+  // exists, since we don't want to enter it twice
+  Assert (lines_cache[calculate_line_index(line)] != numbers::invalid_size_type,
+          ExcInternalError());
+  Assert (!local_lines.size() || local_lines.is_element(column),
+          ExcColumnNotStoredHere(line, column));
+  ConstraintLine *line_ptr = &lines[lines_cache[calculate_line_index(line)]];
+  Assert (line_ptr->line == line, ExcInternalError());
+  for (ConstraintLine::Entries::const_iterator
+       p=line_ptr->entries.begin();
+       p != line_ptr->entries.end(); ++p)
+    if (p->first == column)
+      {
+        Assert (std::fabs(p->second - value) < 1.e-14,
+                ExcEntryAlreadyExists(line, column, p->second, value));
+        return;
+      }
+
+  line_ptr->entries.push_back (std::make_pair(column,value));
+}
+
+
+
+inline
+void
+ConstraintMatrix::set_inhomogeneity (const size_type line,
+                                     const double    value)
+{
+  const size_type line_index = calculate_line_index(line);
+  Assert( line_index < lines_cache.size() &&
+          lines_cache[line_index] != numbers::invalid_size_type,
+          ExcMessage("call add_line() before calling set_inhomogeneity()"));
+  Assert(lines_cache[line_index] < lines.size(), ExcInternalError());
+  ConstraintLine *line_ptr = &lines[lines_cache[line_index]];
+  line_ptr->inhomogeneity = value;
+}
+
+
+
+inline
+types::global_dof_index
+ConstraintMatrix::n_constraints () const
+{
+  return lines.size();
+}
+
+
+
+inline
+bool
+ConstraintMatrix::is_constrained (const size_type index) const
+{
+  const size_type line_index = calculate_line_index(index);
+  return ((line_index < lines_cache.size())
+          &&
+          (lines_cache[line_index] != numbers::invalid_size_type));
+}
+
+
+
+inline
+bool
+ConstraintMatrix::is_inhomogeneously_constrained (const size_type index) const
+{
+  // check whether the entry is constrained. could use is_constrained, but
+  // that means computing the line index twice
+  const size_type line_index = calculate_line_index(index);
+  if (line_index >= lines_cache.size() ||
+      lines_cache[line_index] == numbers::invalid_size_type)
+    return false;
+  else
+    {
+      Assert(lines_cache[line_index] < lines.size(), ExcInternalError());
+      return !(lines[lines_cache[line_index]].inhomogeneity == 0);
+    }
+}
+
+
+
+inline
+const std::vector<std::pair<types::global_dof_index,double> > *
+ConstraintMatrix::get_constraint_entries (const size_type line) const
+{
+  // check whether the entry is constrained. could use is_constrained, but
+  // that means computing the line index twice
+  const size_type line_index = calculate_line_index(line);
+  if (line_index >= lines_cache.size() ||
+      lines_cache[line_index] == numbers::invalid_size_type)
+    return 0;
+  else
+    return &lines[lines_cache[line_index]].entries;
+}
+
+
+
+inline
+double
+ConstraintMatrix::get_inhomogeneity (const size_type line) const
+{
+  // check whether the entry is constrained. could use is_constrained, but
+  // that means computing the line index twice
+  const size_type line_index = calculate_line_index(line);
+  if (line_index >= lines_cache.size() ||
+      lines_cache[line_index] == numbers::invalid_size_type)
+    return 0;
+  else
+    return lines[lines_cache[line_index]].inhomogeneity;
+}
+
+
+
+inline types::global_dof_index
+ConstraintMatrix::calculate_line_index (const size_type line) const
+{
+  //IndexSet is unused (serial case)
+  if (!local_lines.size())
+    return line;
+
+  Assert(local_lines.is_element(line),
+         ExcRowNotStoredHere(line));
+
+  return local_lines.index_within_set(line);
+}
+
+
+
+inline bool
+ConstraintMatrix::can_store_line (size_type line_index) const
+{
+  return !local_lines.size() || local_lines.is_element(line_index);
+}
+
+
+
+inline
+const IndexSet &
+ConstraintMatrix::get_local_lines () const
+{
+  return local_lines;
+}
+
+
+
+template <class VectorType>
+inline
+void ConstraintMatrix::distribute_local_to_global (
+  const size_type index,
+  const double    value,
+  VectorType     &global_vector) const
+{
+  Assert (lines.empty() || sorted == true, ExcMatrixNotClosed());
+
+  if (is_constrained(index) == false)
+    global_vector(index) += value;
+  else
+    {
+      const ConstraintLine &position =
+        lines[lines_cache[calculate_line_index(index)]];
+      for (size_type j=0; j<position.entries.size(); ++j)
+        global_vector(position.entries[j].first)
+        += value * position.entries[j].second;
+    }
+}
+
+
+template <typename ForwardIteratorVec, typename ForwardIteratorInd,
+          class VectorType>
+inline
+void ConstraintMatrix::distribute_local_to_global (
+  ForwardIteratorVec local_vector_begin,
+  ForwardIteratorVec local_vector_end,
+  ForwardIteratorInd local_indices_begin,
+  VectorType        &global_vector) const
+{
+  Assert (lines.empty() || sorted == true, ExcMatrixNotClosed());
+  for ( ; local_vector_begin != local_vector_end;
+        ++local_vector_begin, ++local_indices_begin)
+    {
+      if (is_constrained(*local_indices_begin) == false)
+        global_vector(*local_indices_begin) += *local_vector_begin;
+      else
+        {
+          const ConstraintLine &position =
+            lines[lines_cache[calculate_line_index(*local_indices_begin)]];
+          for (size_type j=0; j<position.entries.size(); ++j)
+            global_vector(position.entries[j].first)
+            += *local_vector_begin * position.entries[j].second;
+        }
+    }
+}
+
+
+template <class InVector, class OutVector>
+inline
+void
+ConstraintMatrix::distribute_local_to_global (
+  const InVector               &local_vector,
+  const std::vector<size_type> &local_dof_indices,
+  OutVector                    &global_vector) const
+{
+  Assert (local_vector.size() == local_dof_indices.size(),
+          ExcDimensionMismatch(local_vector.size(), local_dof_indices.size()));
+  distribute_local_to_global (local_vector.begin(), local_vector.end(),
+                              local_dof_indices.begin(), global_vector);
+}
+
+
+
+template <typename ForwardIteratorVec, typename ForwardIteratorInd,
+          class VectorType>
+inline
+void ConstraintMatrix::get_dof_values (const VectorType  &global_vector,
+                                       ForwardIteratorInd local_indices_begin,
+                                       ForwardIteratorVec local_vector_begin,
+                                       ForwardIteratorVec local_vector_end) const
+{
+  Assert (lines.empty() || sorted == true, ExcMatrixNotClosed());
+  for ( ; local_vector_begin != local_vector_end;
+        ++local_vector_begin, ++local_indices_begin)
+    {
+      if (is_constrained(*local_indices_begin) == false)
+        *local_vector_begin = global_vector(*local_indices_begin);
+      else
+        {
+          const ConstraintLine &position =
+            lines[lines_cache[calculate_line_index(*local_indices_begin)]];
+          typename VectorType::value_type value = position.inhomogeneity;
+          for (size_type j=0; j<position.entries.size(); ++j)
+            value += (global_vector(position.entries[j].first) *
+                      position.entries[j].second);
+          *local_vector_begin = value;
+        }
+    }
+}
+
+
+template <typename MatrixType> class BlockMatrixBase;
+template <typename SparsityPatternType> class BlockSparsityPatternBase;
+template <typename number>     class BlockSparseMatrixEZ;
+
+/**
+ * A class that can be used to determine whether a given type is a block
+ * matrix type or not. For example,
+ * @code
+ *   IsBlockMatrix<SparseMatrix<double> >::value
+ * @endcode
+ * has the value false, whereas
+ * @code
+ *   IsBlockMatrix<BlockSparseMatrix<double> >::value
+ * @endcode
+ * is true. This is sometimes useful in template contexts where we may want to
+ * do things differently depending on whether a template type denotes a
+ * regular or a block matrix type.
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, 2009
+ */
+template <typename MatrixType>
+struct IsBlockMatrix
+{
+private:
+  struct yes_type
+  {
+    char c[1];
+  };
+  struct no_type
+  {
+    char c[2];
+  };
+
+  /**
+   * Overload returning true if the class is derived from BlockMatrixBase,
+   * which is what block matrices do (with the exception of
+   * BlockSparseMatrixEZ).
+   */
+  template <typename T>
+  static yes_type check_for_block_matrix (const BlockMatrixBase<T> *);
+
+  /**
+   * Overload returning true if the class is derived from
+   * BlockSparsityPatternBase, which is what block sparsity patterns do.
+   */
+  template <typename T>
+  static yes_type check_for_block_matrix (const BlockSparsityPatternBase<T> *);
+
+  /**
+   * Overload for BlockSparseMatrixEZ, which is the only block matrix not
+   * derived from BlockMatrixBase at the time of writing this class.
+   */
+  template <typename T>
+  static yes_type check_for_block_matrix (const BlockSparseMatrixEZ<T> *);
+
+  /**
+   * Catch all for all other potential matrix types that are not block
+   * matrices.
+   */
+  static no_type check_for_block_matrix (...);
+
+public:
+  /**
+   * A statically computable value that indicates whether the template
+   * argument to this class is a block matrix (in fact whether the type is
+   * derived from BlockMatrixBase<T>).
+   */
+  static const bool value = (sizeof(check_for_block_matrix
+                                    ((MatrixType *)0))
+                             ==
+                             sizeof(yes_type));
+};
+
+
+// instantiation of the static member
+template <typename MatrixType>
+const bool IsBlockMatrix<MatrixType>::value;
+
+
+template <typename MatrixType>
+inline
+void
+ConstraintMatrix::
+distribute_local_to_global (const FullMatrix<typename MatrixType::value_type>     &local_matrix,
+                            const std::vector<size_type> &local_dof_indices,
+                            MatrixType                   &global_matrix) const
+{
+  // create a dummy and hand on to the function actually implementing this
+  // feature in the cm.templates.h file.
+  Vector<typename MatrixType::value_type> dummy(0);
+  distribute_local_to_global (local_matrix, dummy, local_dof_indices,
+                              global_matrix, dummy, false,
+                              dealii::internal::bool2type<IsBlockMatrix<MatrixType>::value>());
+}
+
+
+
+
+template <typename MatrixType, typename VectorType>
+inline
+void
+ConstraintMatrix::
+distribute_local_to_global (const FullMatrix<typename MatrixType::value_type>     &local_matrix,
+                            const Vector<typename VectorType::value_type>         &local_vector,
+                            const std::vector<size_type> &local_dof_indices,
+                            MatrixType                   &global_matrix,
+                            VectorType                   &global_vector,
+                            bool                          use_inhomogeneities_for_rhs) const
+{
+  // enter the internal function with the respective block information set,
+  // the actual implementation follows in the cm.templates.h file.
+  distribute_local_to_global (local_matrix, local_vector, local_dof_indices,
+                              global_matrix, global_vector, use_inhomogeneities_for_rhs,
+                              dealii::internal::bool2type<IsBlockMatrix<MatrixType>::value>());
+}
+
+
+
+
+template <typename SparsityPatternType>
+inline
+void
+ConstraintMatrix::
+add_entries_local_to_global (const std::vector<size_type> &local_dof_indices,
+                             SparsityPatternType          &sparsity_pattern,
+                             const bool                    keep_constrained_entries,
+                             const Table<2,bool>          &dof_mask) const
+{
+  // enter the internal function with the respective block information set,
+  // the actual implementation follows in the cm.templates.h file.
+  add_entries_local_to_global (local_dof_indices, sparsity_pattern,
+                               keep_constrained_entries, dof_mask,
+                               internal::bool2type<IsBlockMatrix<SparsityPatternType>::value>());
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/constraint_matrix.templates.h b/include/deal.II/lac/constraint_matrix.templates.h
new file mode 100644
index 0000000..a62c129
--- /dev/null
+++ b/include/deal.II/lac/constraint_matrix.templates.h
@@ -0,0 +1,2733 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__constraint_matrix_templates_h
+#define dealii__constraint_matrix_templates_h
+
+
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/base/table.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<typename number>
+void
+ConstraintMatrix::condense (SparseMatrix<number> &uncondensed) const
+{
+  Vector<number> dummy (0);
+  condense (uncondensed, dummy);
+}
+
+
+
+template <typename number>
+void
+ConstraintMatrix::condense (BlockSparseMatrix<number> &uncondensed) const
+{
+  BlockVector<number> dummy (0);
+  condense (uncondensed, dummy);
+}
+
+
+
+template<class VectorType>
+void
+ConstraintMatrix::condense (const VectorType &vec_ghosted,
+                            VectorType       &vec) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+
+  // if this is called with different arguments, we need to copy the data over:
+  if (&vec != &vec_ghosted)
+    vec = vec_ghosted;
+
+  // distribute all entries, and set them to zero. do so in
+  // two loops because in the first one we need to add to elements
+  // and in the second one we need to set elements to zero. for
+  // parallel vectors, this can only work if we can put a compress()
+  // in between, but we don't want to call compress() twice per entry
+  for (std::vector<ConstraintLine>::const_iterator
+       constraint_line = lines.begin();
+       constraint_line!=lines.end(); ++constraint_line)
+    {
+      // in case the constraint is
+      // inhomogeneous, this function is not
+      // appropriate. Throw an exception.
+      Assert (constraint_line->inhomogeneity == 0.,
+              ExcMessage ("Inhomogeneous constraint cannot be condensed "
+                          "without any matrix specified."));
+
+      const typename VectorType::value_type old_value = vec_ghosted(constraint_line->line);
+      for (size_type q=0; q!=constraint_line->entries.size(); ++q)
+        if (vec.in_local_range(constraint_line->entries[q].first) == true)
+          vec(constraint_line->entries[q].first)
+          += (static_cast<typename VectorType::value_type>
+              (old_value) *
+              constraint_line->entries[q].second);
+    }
+
+  vec.compress(VectorOperation::add);
+
+  for (std::vector<ConstraintLine>::const_iterator
+       constraint_line = lines.begin();
+       constraint_line!=lines.end(); ++constraint_line)
+    if (vec.in_local_range(constraint_line->line) == true)
+      vec(constraint_line->line) = 0.;
+
+  vec.compress(VectorOperation::insert);
+}
+
+
+
+template <class VectorType>
+void
+ConstraintMatrix::condense (VectorType &vec) const
+{
+  condense(vec, vec);
+}
+
+
+
+template<typename number, class VectorType>
+void
+ConstraintMatrix::condense (SparseMatrix<number> &uncondensed,
+                            VectorType           &vec) const
+{
+  // check whether we work on real vectors
+  // or we just used a dummy when calling
+  // the other function above.
+  const bool use_vectors = vec.size() == 0 ? false : true;
+
+  const SparsityPattern &sparsity = uncondensed.get_sparsity_pattern ();
+
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.is_compressed() == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  if (use_vectors == true)
+    AssertDimension (vec.size(), sparsity.n_rows());
+
+  double average_diagonal = 0;
+  for (size_type i=0; i<uncondensed.m(); ++i)
+    average_diagonal += std::fabs (uncondensed.diag_element(i));
+  average_diagonal /= uncondensed.m();
+
+  // store for each index whether it must be
+  // distributed or not. If entry is
+  // invalid_size_type, no distribution is
+  // necessary.  otherwise, the number states
+  // which line in the constraint matrix
+  // handles this index
+  std::vector<size_type> distribute (sparsity.n_rows(),
+                                     numbers::invalid_size_type);
+
+  for (size_type c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const size_type n_rows = sparsity.n_rows();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      if (distribute[row] == numbers::invalid_size_type)
+        // regular line. loop over cols
+        {
+          for (typename SparseMatrix<number>::iterator
+               entry = uncondensed.begin(row);
+               entry != uncondensed.end(row); ++entry)
+            {
+              const size_type column = entry->column();
+
+              // end of row reached?
+              // this should not
+              // happen, since we only
+              // operate on compressed
+              // matrices!
+              Assert (column != SparsityPattern::invalid_entry,
+                      ExcMatrixNotClosed());
+
+              if (distribute[column] != numbers::invalid_size_type)
+                // distribute entry at
+                // regular row @p row
+                // and irregular column
+                // sparsity.get_column_numbers()[j];
+                // set old entry to
+                // zero
+                {
+                  for (size_type q=0;
+                       q!=lines[distribute[column]].entries.size(); ++q)
+                    uncondensed.add (row,
+                                     lines[distribute[column]].entries[q].first,
+                                     entry->value() *
+                                     lines[distribute[column]].entries[q].second);
+
+                  // need to subtract this element from the
+                  // vector. this corresponds to an
+                  // explicit elimination in the respective
+                  // row of the inhomogeneous constraint in
+                  // the matrix with Gauss elimination
+                  if (use_vectors == true)
+                    vec(row) -=
+                      entry->value() * lines[distribute[column]].inhomogeneity;
+
+                  // set old value to zero
+                  entry->value() = 0.;
+                }
+            }
+        }
+      else
+        // row must be distributed
+        {
+          for (typename SparseMatrix<number>::iterator
+               entry = uncondensed.begin(row);
+               entry != uncondensed.end(row); ++entry)
+            {
+              const size_type column = entry->column();
+
+              // end of row reached?
+              // this should not
+              // happen, since we only
+              // operate on compressed
+              // matrices!
+              Assert (column != SparsityPattern::invalid_entry,
+                      ExcMatrixNotClosed());
+
+              if (distribute[column] == numbers::invalid_size_type)
+                // distribute entry at
+                // irregular row
+                // @p row and regular
+                // column
+                // column. set
+                // old entry to zero
+                {
+                  for (size_type q=0;
+                       q!=lines[distribute[row]].entries.size(); ++q)
+                    uncondensed.add (lines[distribute[row]].entries[q].first,
+                                     column,
+                                     entry->value() *
+                                     lines[distribute[row]].entries[q].second);
+
+                  // set old entry to zero
+                  entry->value() = 0.;
+                }
+              else
+                // distribute entry at
+                // irregular row @p row and
+                // irregular column
+                // @p column set old entry
+                // to one on main
+                // diagonal, zero otherwise
+                {
+                  for (size_type p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                    {
+                      for (size_type q=0;
+                           q!=lines[distribute[column]].entries.size(); ++q)
+                        uncondensed.add (lines[distribute[row]].entries[p].first,
+                                         lines[distribute[column]].entries[q].first,
+                                         entry->value() *
+                                         lines[distribute[row]].entries[p].second *
+                                         lines[distribute[column]].entries[q].second);
+
+                      if (use_vectors == true)
+                        vec(lines[distribute[row]].entries[p].first) -=
+                          entry->value() * lines[distribute[row]].entries[p].second *
+                          lines[distribute[column]].inhomogeneity;
+                    }
+
+                  // set old entry to correct
+                  // value
+                  entry->value() = (row == column ? average_diagonal : 0. );
+                }
+            }
+
+          // take care of vector
+          if (use_vectors == true)
+            {
+              for (size_type q=0; q!=lines[distribute[row]].entries.size(); ++q)
+                vec(lines[distribute[row]].entries[q].first)
+                += (vec(row) * lines[distribute[row]].entries[q].second);
+
+              vec(lines[distribute[row]].line) = 0.;
+            }
+        }
+    }
+}
+
+
+
+template <typename number, class BlockVectorType>
+void
+ConstraintMatrix::condense (BlockSparseMatrix<number> &uncondensed,
+                            BlockVectorType           &vec) const
+{
+  // check whether we work on real vectors
+  // or we just used a dummy when calling
+  // the other function above.
+  const bool use_vectors = vec.n_blocks() == 0 ? false : true;
+
+  const size_type blocks = uncondensed.n_block_rows();
+
+  const BlockSparsityPattern &
+  sparsity = uncondensed.get_sparsity_pattern ();
+
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.is_compressed() == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+          ExcNotQuadratic());
+
+  if (use_vectors == true)
+    {
+      AssertDimension (vec.size(), sparsity.n_rows());
+      AssertDimension (vec.n_blocks(), sparsity.n_block_rows());
+    }
+
+  double average_diagonal = 0;
+  for (size_type b=0; b<uncondensed.n_block_rows(); ++b)
+    for (size_type i=0; i<uncondensed.block(b,b).m(); ++i)
+      average_diagonal += std::fabs (uncondensed.block(b,b).diag_element(i));
+  average_diagonal /= uncondensed.m();
+
+  const BlockIndices &
+  index_mapping = sparsity.get_column_indices();
+
+  // store for each index whether it must be
+  // distributed or not. If entry is
+  // numbers::invalid_size_type,
+  // no distribution is necessary.
+  // otherwise, the number states which line
+  // in the constraint matrix handles this
+  // index
+  std::vector<size_type> distribute (sparsity.n_rows(),
+                                     numbers::invalid_size_type);
+
+  for (size_type c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const size_type n_rows = sparsity.n_rows();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      // get index of this row
+      // within the blocks
+      const std::pair<size_type,size_type>
+      block_index = index_mapping.global_to_local(row);
+      const size_type block_row = block_index.first;
+
+      if (distribute[row] == numbers::invalid_size_type)
+        // regular line. loop over
+        // all columns and see
+        // whether this column must
+        // be distributed
+        {
+
+          // to loop over all entries
+          // in this row, we have to
+          // loop over all blocks in
+          // this blockrow and the
+          // corresponding row
+          // therein
+          for (size_type block_col=0; block_col<blocks; ++block_col)
+            {
+              for (typename SparseMatrix<number>::iterator
+                   entry = uncondensed.block(block_row, block_col).begin(block_index.second);
+                   entry != uncondensed.block(block_row, block_col).end(block_index.second);
+                   ++entry)
+                {
+                  const size_type global_col
+                    = index_mapping.local_to_global(block_col,entry->column());
+
+                  if (distribute[global_col] != numbers::invalid_size_type)
+                    // distribute entry at
+                    // regular row @p row
+                    // and irregular column
+                    // global_col; set old
+                    // entry to zero
+                    {
+                      const double old_value = entry->value ();
+
+                      for (size_type q=0;
+                           q!=lines[distribute[global_col]].entries.size(); ++q)
+                        uncondensed.add (row,
+                                         lines[distribute[global_col]].entries[q].first,
+                                         old_value *
+                                         lines[distribute[global_col]].entries[q].second);
+
+                      // need to subtract this element from the
+                      // vector. this corresponds to an
+                      // explicit elimination in the respective
+                      // row of the inhomogeneous constraint in
+                      // the matrix with Gauss elimination
+                      if (use_vectors == true)
+                        vec(row) -= entry->value() *
+                                    lines[distribute[global_col]].inhomogeneity;
+
+                      entry->value() = 0.;
+                    }
+                }
+            }
+        }
+      else
+        {
+          // row must be
+          // distributed. split the
+          // whole row into the
+          // chunks defined by the
+          // blocks
+          for (size_type block_col=0; block_col<blocks; ++block_col)
+            {
+              for (typename SparseMatrix<number>::iterator
+                   entry = uncondensed.block(block_row, block_col).begin(block_index.second);
+                   entry != uncondensed.block(block_row, block_col).end(block_index.second);
+                   ++entry)
+                {
+                  const size_type global_col
+                    = index_mapping.local_to_global (block_col, entry->column());
+
+                  if (distribute[global_col] ==
+                      numbers::invalid_size_type)
+                    // distribute
+                    // entry at
+                    // irregular
+                    // row @p row
+                    // and regular
+                    // column
+                    // global_col. set
+                    // old entry to
+                    // zero
+                    {
+                      const double old_value = entry->value();
+
+                      for (size_type q=0;
+                           q!=lines[distribute[row]].entries.size(); ++q)
+                        uncondensed.add (lines[distribute[row]].entries[q].first,
+                                         global_col,
+                                         old_value *
+                                         lines[distribute[row]].entries[q].second);
+
+                      entry->value() = 0.;
+                    }
+                  else
+                    // distribute entry at
+                    // irregular row @p row
+                    // and irregular column
+                    // @p global_col set old
+                    // entry to one if on
+                    // main diagonal, zero
+                    // otherwise
+                    {
+                      const double old_value = entry->value ();
+
+                      for (size_type p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                        {
+                          for (size_type q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+                            uncondensed.add (lines[distribute[row]].entries[p].first,
+                                             lines[distribute[global_col]].entries[q].first,
+                                             old_value *
+                                             lines[distribute[row]].entries[p].second *
+                                             lines[distribute[global_col]].entries[q].second);
+
+                          if (use_vectors == true)
+                            vec(lines[distribute[row]].entries[p].first) -=
+                              old_value * lines[distribute[row]].entries[p].second *
+                              lines[distribute[global_col]].inhomogeneity;
+                        }
+
+                      entry->value() = (row == global_col ? average_diagonal : 0. );
+                    }
+                }
+            }
+
+          // take care of vector
+          if (use_vectors == true)
+            {
+              for (size_type q=0; q!=lines[distribute[row]].entries.size(); ++q)
+                vec(lines[distribute[row]].entries[q].first)
+                += (vec(row) * lines[distribute[row]].entries[q].second);
+
+              vec(lines[distribute[row]].line) = 0.;
+            }
+        }
+    }
+}
+
+
+//TODO: I'm sure the following could be made more elegant by using a bit of
+//introspection using static member variables of the various vector
+//classes to dispatch between the different functions, rather than using
+//knowledge of the individual types
+
+// number of functions to select the right implementation for set_zero().
+namespace internal
+{
+  namespace ConstraintMatrix
+  {
+    namespace
+    {
+      typedef types::global_dof_index size_type;
+
+      template<class VEC>
+      void set_zero_parallel(const std::vector<size_type> &cm, VEC &vec, size_type shift = 0)
+      {
+        Assert(!vec.has_ghost_elements(), ExcInternalError());
+        IndexSet locally_owned = vec.locally_owned_elements();
+        for (typename std::vector<size_type>::const_iterator it = cm.begin();
+             it != cm.end(); ++it)
+          {
+            // If shift>0 then we are working on a part of a BlockVector
+            // so vec(i) is actually the global entry i+shift.
+            // We first make sure the line falls into the range of vec,
+            // then check if is part of the local part of the vector, before
+            // finally setting it to 0.
+            if ((*it)<shift)
+              continue;
+            size_type idx = *it - shift;
+            if (idx<vec.size() && locally_owned.is_element(idx))
+              vec(idx) = 0.;
+          }
+      }
+
+      template<typename Number>
+      void set_zero_parallel(const std::vector<size_type> &cm, parallel::distributed::Vector<Number> &vec, size_type shift = 0)
+      {
+        for (typename std::vector<size_type>::const_iterator it = cm.begin();
+             it != cm.end(); ++it)
+          {
+            // If shift>0 then we are working on a part of a BlockVector
+            // so vec(i) is actually the global entry i+shift.
+            // We first make sure the line falls into the range of vec,
+            // then check if is part of the local part of the vector, before
+            // finally setting it to 0.
+            if ((*it)<shift)
+              continue;
+            size_type idx = *it - shift;
+            if (vec.in_local_range(idx))
+              vec(idx) = 0.;
+          }
+        vec.zero_out_ghosts();
+      }
+
+      template<class VEC>
+      void set_zero_in_parallel(const std::vector<size_type> &cm, VEC &vec, internal::bool2type<false>)
+      {
+        set_zero_parallel(cm, vec, 0);
+      }
+
+      // in parallel for BlockVectors
+      template<class VEC>
+      void set_zero_in_parallel(const std::vector<size_type> &cm, VEC &vec, internal::bool2type<true>)
+      {
+        size_type start_shift = 0;
+        for (size_type j=0; j<vec.n_blocks(); ++j)
+          {
+            set_zero_parallel(cm, vec.block(j), start_shift);
+            start_shift += vec.block(j).size();
+          }
+      }
+
+      template<class VEC>
+      void set_zero_serial(const std::vector<size_type> &cm, VEC &vec)
+      {
+        for (typename std::vector<size_type>::const_iterator it = cm.begin();
+             it != cm.end(); ++it)
+          vec(*it) = 0.;
+      }
+
+      template<class VEC>
+      void set_zero_all(const std::vector<size_type> &cm, VEC &vec)
+      {
+        set_zero_in_parallel<VEC>(cm, vec, internal::bool2type<IsBlockVector<VEC>::value>());
+        vec.compress(VectorOperation::insert);
+      }
+
+
+      template<class T>
+      void set_zero_all(const std::vector<size_type> &cm, dealii::Vector<T> &vec)
+      {
+        set_zero_serial(cm, vec);
+      }
+
+      template<class T>
+      void set_zero_all(const std::vector<size_type> &cm, dealii::BlockVector<T> &vec)
+      {
+        set_zero_serial(cm, vec);
+      }
+    }
+  }
+}
+
+
+template <class VectorType>
+void
+ConstraintMatrix::set_zero (VectorType &vec) const
+{
+  // since we lines is a private member, we cannot pass it to the functions
+  // above. therefore, copy the content which is cheap
+  std::vector<size_type> constrained_lines(lines.size());
+  for (unsigned int i=0; i<lines.size(); ++i)
+    constrained_lines[i] = lines[i].line;
+  internal::ConstraintMatrix::set_zero_all(constrained_lines, vec);
+}
+
+
+
+
+template <typename VectorType, typename LocalType>
+void
+ConstraintMatrix::
+distribute_local_to_global (const Vector<LocalType>      &local_vector,
+                            const std::vector<size_type> &local_dof_indices,
+                            VectorType                   &global_vector,
+                            const FullMatrix<LocalType>  &local_matrix) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  AssertDimension (local_vector.size(), local_dof_indices.size());
+  AssertDimension (local_matrix.m(), local_dof_indices.size());
+  AssertDimension (local_matrix.n(), local_dof_indices.size());
+
+  const size_type n_local_dofs = local_vector.size();
+  if (lines.empty())
+    global_vector.add(local_dof_indices, local_vector);
+  else
+    for (size_type i=0; i<n_local_dofs; ++i)
+      {
+        // check whether the current index is
+        // constrained. if not, just write the entry
+        // into the vector. otherwise, need to resolve
+        // the constraint
+        if (is_constrained(local_dof_indices[i]) == false)
+          {
+            global_vector(local_dof_indices[i]) += local_vector(i);
+            continue;
+          }
+
+        // find the constraint line to the given
+        // global dof index
+        const size_type line_index = calculate_line_index (local_dof_indices[i]);
+        const ConstraintLine *position =
+          lines_cache.size() <= line_index ? 0 : &lines[lines_cache[line_index]];
+
+        // Gauss elimination of the matrix columns with the inhomogeneity.
+        // Go through them one by one and again check whether they are
+        // constrained. If so, distribute the constraint
+        const double val = position->inhomogeneity;
+        if (val != 0)
+          for (size_type j=0; j<n_local_dofs; ++j)
+            {
+              if (is_constrained(local_dof_indices[j]) == false)
+                {
+                  global_vector(local_dof_indices[j]) -= val * local_matrix(j,i);
+                  continue;
+                }
+
+              const LocalType matrix_entry = local_matrix(j,i);
+
+              if (matrix_entry == LocalType())
+                continue;
+
+              const ConstraintLine &position_j =
+                lines[lines_cache[calculate_line_index(local_dof_indices[j])]];
+
+              for (size_type q=0; q<position_j.entries.size(); ++q)
+                {
+                  Assert (!(!local_lines.size()
+                            || local_lines.is_element(position_j.entries[q].first))
+                          || is_constrained(position_j.entries[q].first) == false,
+                          ExcMessage ("Tried to distribute to a fixed dof."));
+                  global_vector(position_j.entries[q].first)
+                  -= val * position_j.entries[q].second * matrix_entry;
+                }
+            }
+
+        // now distribute the constraint,
+        // but make sure we don't touch
+        // the entries of fixed dofs
+        for (size_type j=0; j<position->entries.size(); ++j)
+          {
+            Assert (!(!local_lines.size()
+                      || local_lines.is_element(position->entries[j].first))
+                    || is_constrained(position->entries[j].first) == false,
+                    ExcMessage ("Tried to distribute to a fixed dof."));
+            global_vector(position->entries[j].first)
+            += local_vector(i) * position->entries[j].second;
+          }
+      }
+}
+
+
+
+namespace internal
+{
+  namespace
+  {
+    // create an output vector that consists of the input vector's locally owned
+    // elements plus some ghost elements that need to be imported from elsewhere
+    //
+    // this is an operation that is different for all vector types and so we
+    // need a few overloads
+#ifdef DEAL_II_WITH_TRILINOS
+    void
+    import_vector_with_ghost_elements (const TrilinosWrappers::MPI::Vector &vec,
+                                       const IndexSet                      &/*locally_owned_elements*/,
+                                       const IndexSet                      &needed_elements,
+                                       TrilinosWrappers::MPI::Vector       &output,
+                                       const internal::bool2type<false>     /*is_block_vector*/)
+    {
+      Assert(!vec.has_ghost_elements(),
+             ExcGhostsPresent());
+#ifdef DEAL_II_WITH_MPI
+      const Epetra_MpiComm *mpi_comm
+        = dynamic_cast<const Epetra_MpiComm *>(&vec.trilinos_vector().Comm());
+
+      Assert (mpi_comm != 0, ExcInternalError());
+      output.reinit (needed_elements, mpi_comm->GetMpiComm());
+#else
+      output.reinit (needed_elements, MPI_COMM_WORLD);
+#endif
+      output = vec;
+    }
+#endif
+
+#ifdef DEAL_II_WITH_PETSC
+    void
+    import_vector_with_ghost_elements (const PETScWrappers::MPI::Vector &vec,
+                                       const IndexSet                   &locally_owned_elements,
+                                       const IndexSet                   &needed_elements,
+                                       PETScWrappers::MPI::Vector       &output,
+                                       const internal::bool2type<false>  /*is_block_vector*/)
+    {
+      output.reinit (locally_owned_elements, needed_elements, vec.get_mpi_communicator());
+      output = vec;
+    }
+#endif
+
+    template <typename number>
+    void
+    import_vector_with_ghost_elements (const parallel::distributed::Vector<number> &vec,
+                                       const IndexSet                              &locally_owned_elements,
+                                       const IndexSet                              &needed_elements,
+                                       parallel::distributed::Vector<number>       &output,
+                                       const internal::bool2type<false>             /*is_block_vector*/)
+    {
+      // TODO: the in vector might already have all elements. need to find a
+      // way to efficiently avoid the copy then
+      const_cast<parallel::distributed::Vector<number>&>(vec).zero_out_ghosts();
+      output.reinit (locally_owned_elements, needed_elements, vec.get_mpi_communicator());
+      output = vec;
+      output.update_ghost_values();
+    }
+
+
+    // all other vector non-block vector types are sequential and we should
+    // not have this function called at all -- so throw an exception
+    template <typename Vector>
+    void
+    import_vector_with_ghost_elements (const Vector                     &/*vec*/,
+                                       const IndexSet                   &/*locally_owned_elements*/,
+                                       const IndexSet                   &/*needed_elements*/,
+                                       Vector                           &/*output*/,
+                                       const internal::bool2type<false>  /*is_block_vector*/)
+    {
+      Assert (false, ExcMessage ("We shouldn't even get here!"));
+    }
+
+
+    // for block vectors, simply dispatch to the individual blocks
+    template <class VectorType>
+    void
+    import_vector_with_ghost_elements (const VectorType                &vec,
+                                       const IndexSet                  &locally_owned_elements,
+                                       const IndexSet                  &needed_elements,
+                                       VectorType                      &output,
+                                       const internal::bool2type<true>  /*is_block_vector*/)
+    {
+      output.reinit (vec.n_blocks());
+
+      types::global_dof_index block_start = 0;
+      for (unsigned int b=0; b<vec.n_blocks(); ++b)
+        {
+          import_vector_with_ghost_elements (vec.block(b),
+                                             locally_owned_elements.get_view (block_start, block_start+vec.block(b).size()),
+                                             needed_elements.get_view (block_start, block_start+vec.block(b).size()),
+                                             output.block(b),
+                                             internal::bool2type<false>());
+          block_start += vec.block(b).size();
+        }
+
+      output.collect_sizes ();
+    }
+  }
+}
+
+
+template <class VectorType>
+void
+ConstraintMatrix::distribute (VectorType &vec) const
+{
+  Assert (sorted==true, ExcMatrixNotClosed());
+
+  // if the vector type supports parallel storage and if the vector actually
+  // does store only part of the vector, distributing is slightly more
+  // complicated. we might be able to skip the complicated part if one
+  // processor owns everything and pretend that this is a sequential vector,
+  // but it is difficult for the other processors to know whether they should
+  // not do anything or if other processors will create a temporary vector,
+  // exchange data (requiring communication, maybe even with the processors
+  // that do not own anything because of that particular parallel model), and
+  // call compress() finally. the first case here is for the complicated case,
+  // the last else is for the simple case (sequential vector)
+  const IndexSet vec_owned_elements = vec.locally_owned_elements();
+  if (vec.supports_distributed_data == true)
+    {
+      // This processor owns only part of the vector. one may think that
+      // every processor should be able to simply communicate those elements
+      // it owns and for which it knows that they act as sources to constrained
+      // DoFs to the owner of these DoFs. This would lead to a scheme where all
+      // we need to do is to add some local elements to (possibly non-local) ones
+      // and then call compress().
+      //
+      // Alas, this scheme does not work as evidenced by the disaster of bug #51,
+      // see http://code.google.com/p/dealii/issues/detail?id=51 and the
+      // reversion of one attempt that implements this in r29662. Rather, we
+      // need to get a vector that has all the *sources* or constraints we
+      // own locally, possibly as ghost vector elements, then read from them,
+      // and finally throw away the ghosted vector. Implement this in the following.
+      IndexSet needed_elements = vec_owned_elements;
+
+      typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+      for (constraint_iterator it = lines.begin();
+           it != lines.end(); ++it)
+        if (vec_owned_elements.is_element(it->line))
+          for (unsigned int i=0; i<it->entries.size(); ++i)
+            if (!vec_owned_elements.is_element(it->entries[i].first))
+              needed_elements.add_index(it->entries[i].first);
+
+      VectorType ghosted_vector;
+      internal::import_vector_with_ghost_elements (vec,
+                                                   vec_owned_elements, needed_elements,
+                                                   ghosted_vector,
+                                                   internal::bool2type<IsBlockVector<VectorType>::value>());
+
+      for (constraint_iterator it = lines.begin();
+           it != lines.end(); ++it)
+        if (vec_owned_elements.is_element(it->line))
+          {
+            typename VectorType::value_type
+            new_value = it->inhomogeneity;
+            for (unsigned int i=0; i<it->entries.size(); ++i)
+              new_value += (static_cast<typename VectorType::value_type>
+                            (ghosted_vector(it->entries[i].first)) *
+                            it->entries[i].second);
+            AssertIsFinite(new_value);
+            vec(it->line) = new_value;
+          }
+
+      // now compress to communicate the entries that we added to
+      // and that weren't to local processors to the owner
+      //
+      // this shouldn't be strictly necessary but it probably doesn't
+      // hurt either
+      vec.compress (VectorOperation::insert);
+    }
+  else
+    // purely sequential vector (either because the type doesn't
+    // support anything else or because it's completely stored
+    // locally)
+    {
+      std::vector<ConstraintLine>::const_iterator next_constraint = lines.begin();
+      for (; next_constraint != lines.end(); ++next_constraint)
+        {
+          // fill entry in line
+          // next_constraint.line by adding the
+          // different contributions
+          typename VectorType::value_type
+          new_value = next_constraint->inhomogeneity;
+          for (unsigned int i=0; i<next_constraint->entries.size(); ++i)
+            new_value += (static_cast<typename VectorType::value_type>
+                          (vec(next_constraint->entries[i].first)) *
+                          next_constraint->entries[i].second);
+          AssertIsFinite(new_value);
+          vec(next_constraint->line) = new_value;
+        }
+    }
+}
+
+
+
+// Some helper definitions for the local_to_global functions.
+namespace internals
+{
+  typedef types::global_dof_index size_type;
+
+  // this struct contains all the information we need to store about each of
+  // the global entries (global_row): are they obtained directly by some local
+  // entry (local_row) or some constraints (constraint_position). This is not
+  // directly used in the user code, but accessed via the GlobalRowsFromLocal.
+  //
+  // The actions performed here correspond to reshaping the constraint
+  // information from global degrees of freedom to local ones (i.e.,
+  // cell-related DoFs), and also transforming the constraint information from
+  // compressed row storage (each local dof that is constrained has a list of
+  // constraint entries associated to it) into compressed column storage based
+  // on the cell-related DoFs (we have a list of global degrees of freedom,
+  // and to each we have a list of local rows where the entries come from). To
+  // increase the speed, we additionally store whether an entry is generated
+  // directly from the local degrees of freedom or whether it comes from a
+  // constraint.
+  struct Distributing
+  {
+    Distributing (const size_type global_row = numbers::invalid_size_type,
+                  const size_type local_row = numbers::invalid_size_type);
+    Distributing (const Distributing &in);
+    Distributing &operator = (const Distributing &in);
+    bool operator < (const Distributing &in) const
+    {
+      return global_row<in.global_row;
+    };
+
+    size_type global_row;
+    size_type local_row;
+    mutable size_type constraint_position;
+  };
+
+  inline
+  Distributing::Distributing (const size_type global_row,
+                              const size_type local_row) :
+    global_row (global_row),
+    local_row (local_row),
+    constraint_position (numbers::invalid_size_type) {}
+
+  inline
+  Distributing::Distributing (const Distributing &in)
+    :
+    constraint_position (numbers::invalid_size_type)
+  {
+    *this = (in);
+  }
+
+  inline
+  Distributing &Distributing::operator = (const Distributing &in)
+  {
+    global_row = in.global_row;
+    local_row = in.local_row;
+    // the constraints pointer should not contain any data here.
+    Assert (constraint_position == numbers::invalid_size_type,
+            ExcInternalError());
+
+    if (in.constraint_position != numbers::invalid_size_type)
+      {
+        constraint_position = in.constraint_position;
+        in.constraint_position = numbers::invalid_size_type;
+      }
+    return *this;
+  }
+
+
+
+  // this is a cache for constraints that are encountered on a local level.
+  // The functionality is similar to
+  // std::vector<std::vector<std::pair<uint,double> > >, but tuned so that
+  // frequent memory allocation for each entry is avoided. The data is put
+  // into a std::vector<std::pair<uint,double> > and the row length is kept
+  // fixed at row_length. Both the number of rows and the row length can
+  // change is this structure is filled. In that case, the data is
+  // rearranged. This is not directly used in the user code, but accessed via
+  // the GlobalRowsFromLocal.
+  struct DataCache
+  {
+    DataCache ()
+      :
+      row_length (8)
+    {}
+
+    void reinit ()
+    {
+      individual_size.resize(0);
+      data.resize(0);
+    }
+
+    size_type insert_new_index (const std::pair<size_type,double> &pair)
+    {
+      Assert(row_length > 0, ExcInternalError());
+      const unsigned int index = individual_size.size();
+      individual_size.push_back(1);
+      data.resize(individual_size.size()*row_length);
+      data[index*row_length] = pair;
+      individual_size[index] = 1;
+      return index;
+    }
+
+    void append_index (const size_type index,
+                       const std::pair<size_type,double> &pair)
+    {
+      AssertIndexRange (index, individual_size.size());
+      const size_type my_length = individual_size[index];
+      if (my_length == row_length)
+        {
+          AssertDimension(data.size(), individual_size.size()*row_length);
+          // no space left in this row, need to double row_length and
+          // rearrange the data items. Move all items to the right except the
+          // first one, starting at the back. Since individual_size contains
+          // at least one element when we get here, subtracting 1 works fine.
+          data.resize(2*data.size());
+          for (size_type i=individual_size.size()-1; i>0; --i)
+            std::memmove(&data[i*row_length*2], &data[i*row_length],
+                         individual_size[i]*
+                         sizeof(std::pair<size_type,double>));
+          row_length *= 2;
+        }
+      data[index*row_length+my_length] = pair;
+      individual_size[index] = my_length + 1;
+    }
+
+    size_type
+    get_size (const size_type index) const
+    {
+      return individual_size[index];
+    }
+
+    const std::pair<size_type,double> *
+    get_entry (const size_type index) const
+    {
+      return &data[index*row_length];
+    }
+
+    size_type row_length;
+
+    std::vector<std::pair<size_type,double> > data;
+
+    std::vector<size_type> individual_size;
+  };
+
+
+
+  // collects all the global rows from a local contribution (cell) and their
+  // origin (direct/constraint). this is basically a vector consisting of
+  // "Distributing" structs using access via the DataCache. Provides some
+  // specialized sort and insert functions.
+  //
+  // in case there are no constraints, this is basically a list of pairs
+  // <uint,unit> with the first index being the global index and the second
+  // index the local index. The list is sorted with respect to the global
+  // index.
+  //
+  // in case there are constraints, a global dof might get a contribution also
+  // because it gets data from a constrained dof. This means that a global dof
+  // might also have indirect contributions from a local dof via a constraint,
+  // besides the direct ones.
+  //
+  // The actions performed here correspond to reshaping the constraint
+  // information from global degrees of freedom to local ones (i.e.,
+  // cell-related DoFs), and also transforming the constraint information from
+  // compressed row storage (each local dof that is constrained has a list of
+  // constraint entries associated to it) into compressed column storage based
+  // on the cell-related DoFs (we have a list of global degrees of freedom,
+  // and to each we have a list of local rows where the entries come from). To
+  // increase the speed, we additionally store whether an entry is generated
+  // directly from the local degrees of freedom or whether it comes from a
+  // constraint.
+  class GlobalRowsFromLocal
+  {
+  public:
+    GlobalRowsFromLocal ()
+      :
+      n_active_rows (0),
+      n_inhomogeneous_rows (0)
+    {}
+
+    void reinit (const size_type n_local_rows)
+    {
+      total_row_indices.resize(n_local_rows);
+      for (unsigned int i=0; i<n_local_rows; ++i)
+        total_row_indices[i].constraint_position = numbers::invalid_size_type;
+      n_active_rows = n_local_rows;
+      n_inhomogeneous_rows = 0;
+      data_cache.reinit();
+    }
+
+    // implemented below
+    void insert_index (const size_type global_row,
+                       const size_type local_row,
+                       const double       constraint_value);
+    void sort ();
+
+    // Print object for debugging purpose
+    void print(std::ostream &os)
+    {
+      os << "Active rows " << n_active_rows << std::endl
+         << "Constr rows " << n_constraints() << std::endl
+         << "Inhom  rows " << n_inhomogeneous_rows << std::endl
+         << "Local: ";
+      for (size_type i=0 ; i<total_row_indices.size() ; ++i)
+        os << ' ' << std::setw(4) << total_row_indices[i].local_row;
+      os << std::endl
+         << "Global:";
+      for (size_type i=0 ; i<total_row_indices.size() ; ++i)
+        os << ' ' << std::setw(4) << total_row_indices[i].global_row;
+      os << std::endl
+         << "ConPos:";
+      for (size_type i=0 ; i<total_row_indices.size() ; ++i)
+        os << ' ' << std::setw(4) << total_row_indices[i].constraint_position;
+      os << std::endl;
+    }
+
+
+    // return all kind of information on the constraints
+
+    // returns the number of global indices in the struct
+    size_type size () const
+    {
+      return n_active_rows;
+    }
+
+    // returns the number of constraints that are associated to the
+    // counter_index-th entry in the list
+    size_type size (const size_type counter_index) const
+    {
+      return (total_row_indices[counter_index].constraint_position ==
+              numbers::invalid_size_type ?
+              0 :
+              data_cache.get_size(total_row_indices[counter_index].
+                                  constraint_position));
+    }
+
+    // returns the global row of the counter_index-th entry in the list
+    size_type global_row (const size_type counter_index) const
+    {
+      return total_row_indices[counter_index].global_row;
+    }
+
+    // returns the global row of the counter_index-th entry in the list
+    size_type &global_row (const size_type counter_index)
+    {
+      return total_row_indices[counter_index].global_row;
+    }
+
+    // returns the local row in the cell matrix associated with the
+    // counter_index-th entry in the list. Returns invalid_size_type for
+    // constrained rows
+    size_type local_row (const size_type counter_index) const
+    {
+      return total_row_indices[counter_index].local_row;
+    }
+
+    // writable index
+    size_type &local_row (const size_type counter_index)
+    {
+      return total_row_indices[counter_index].local_row;
+    }
+
+    // returns the local row in the cell matrix associated with the
+    // counter_index-th entry in the list in the index_in_constraint-th
+    // position of constraints
+    size_type local_row (const size_type counter_index,
+                         const size_type index_in_constraint) const
+    {
+      return (data_cache.get_entry(total_row_indices[counter_index].constraint_position)
+              [index_in_constraint]).first;
+    }
+
+    // returns the value of the constraint in the counter_index-th entry in
+    // the list in the index_in_constraint-th position of constraints
+    double constraint_value (const size_type counter_index,
+                             const size_type index_in_constraint) const
+    {
+      return (data_cache.get_entry(total_row_indices[counter_index].constraint_position)
+              [index_in_constraint]).second;
+    }
+
+    // returns whether there is one row with indirect contributions (i.e.,
+    // there has been at least one constraint with non-trivial ConstraintLine)
+    bool have_indirect_rows () const
+    {
+      return data_cache.individual_size.empty() == false;
+    }
+
+    // append an entry that is constrained. This means that there is one less
+    // nontrivial row
+    void insert_constraint (const size_type constrained_local_dof)
+    {
+      --n_active_rows;
+      total_row_indices[n_active_rows].local_row = constrained_local_dof;
+      total_row_indices[n_active_rows].global_row = numbers::invalid_size_type;
+    }
+
+    // returns the number of constrained dofs in the structure. Constrained
+    // dofs do not contribute directly to the matrix, but are needed in order
+    // to set matrix diagonals and resolve inhomogeneities
+    size_type n_constraints () const
+    {
+      return total_row_indices.size()-n_active_rows;
+    }
+
+    // returns the number of constrained dofs in the structure that have an
+    // inhomogeneity
+    size_type n_inhomogeneities () const
+    {
+      return n_inhomogeneous_rows;
+    }
+
+    // tells the structure that the ith constraint is
+    // inhomogeneous. inhomogeneous constraints contribute to right hand
+    // sides, so to have fast access to them, put them before homogeneous
+    // constraints
+    void set_ith_constraint_inhomogeneous (const size_type i)
+    {
+      Assert (i >= n_inhomogeneous_rows, ExcInternalError());
+      std::swap (total_row_indices[n_active_rows+i],
+                 total_row_indices[n_active_rows+n_inhomogeneous_rows]);
+      n_inhomogeneous_rows++;
+    }
+
+    // the local row where constraint number i was detected, to find that row
+    // easily when the GlobalRowsToLocal has been set up
+    size_type constraint_origin (size_type i) const
+    {
+      return total_row_indices[n_active_rows+i].local_row;
+    }
+
+    // a vector that contains all the global ids and the corresponding local
+    // ids as well as a pointer to that data where we store how to resolve
+    // constraints.
+    std::vector<Distributing> total_row_indices;
+
+  private:
+    // holds the actual data from the constraints
+    DataCache                 data_cache;
+
+    // how many rows there are, constraints disregarded
+    size_type                 n_active_rows;
+
+    // the number of rows with inhomogeneous constraints
+    size_type                 n_inhomogeneous_rows;
+  };
+
+  // a function that appends an additional row to the list of values, or
+  // appends a value to an already existing row. Similar functionality as for
+  // std::map<size_type,Distributing>, but here done for a
+  // std::vector<Distributing>, much faster for short lists as we have them
+  // here
+  inline
+  void
+  GlobalRowsFromLocal::insert_index (const size_type global_row,
+                                     const size_type local_row,
+                                     const double    constraint_value)
+  {
+    typedef std::vector<Distributing>::iterator index_iterator;
+    index_iterator pos, pos1;
+    Distributing row_value (global_row);
+    std::pair<size_type,double> constraint (local_row, constraint_value);
+
+    // check whether the list was really sorted before entering here
+    for (size_type i=1; i<n_active_rows; ++i)
+      Assert (total_row_indices[i-1] < total_row_indices[i], ExcInternalError());
+
+    pos = Utilities::lower_bound (total_row_indices.begin(),
+                                  total_row_indices.begin()+n_active_rows,
+                                  row_value);
+    if (pos->global_row == global_row)
+      pos1 = pos;
+    else
+      {
+        pos1 = total_row_indices.insert(pos, row_value);
+        ++n_active_rows;
+      }
+
+    if (pos1->constraint_position == numbers::invalid_size_type)
+      pos1->constraint_position = data_cache.insert_new_index (constraint);
+    else
+      data_cache.append_index (pos1->constraint_position, constraint);
+  }
+
+  // this sort algorithm sorts std::vector<Distributing>, but does not take
+  // the constraints into account. this means that in case that constraints
+  // are already inserted, this function does not work as expected. Use
+  // shellsort, which is very fast in case the indices are already sorted
+  // (which is the usual case with DG elements), and not too slow in other
+  // cases
+  inline
+  void
+  GlobalRowsFromLocal::sort ()
+  {
+    size_type i, j, j2, temp, templ, istep;
+    size_type step;
+
+    // check whether the constraints are really empty.
+    const size_type length = size();
+
+    // make sure that we are in the range of the vector
+    AssertIndexRange (length, total_row_indices.size()+1);
+    for (size_type i=0; i<length; ++i)
+      Assert (total_row_indices[i].constraint_position ==
+              numbers::invalid_size_type,
+              ExcInternalError());
+
+    step = length/2;
+    while (step > 0)
+      {
+        for (i=step; i < length; i++)
+          {
+            istep = step;
+            j = i;
+            j2 = j-istep;
+            temp = total_row_indices[i].global_row;
+            templ = total_row_indices[i].local_row;
+            if (total_row_indices[j2].global_row > temp)
+              {
+                while ((j >= istep) && (total_row_indices[j2].global_row > temp))
+                  {
+                    total_row_indices[j].global_row = total_row_indices[j2].global_row;
+                    total_row_indices[j].local_row = total_row_indices[j2].local_row;
+                    j = j2;
+                    j2 -= istep;
+                  }
+                total_row_indices[j].global_row = temp;
+                total_row_indices[j].local_row = templ;
+              }
+          }
+        step = step>>1;
+      }
+  }
+
+
+
+  /**
+   * Scratch data that is used during calls to distribute_local_to_global and
+   * add_entries_local_to_global. In order to avoid frequent memory
+   * allocation, we keep the data alive from one call to the next in a static
+   * variable. Since we want to allow for different number types in matrices,
+   * this is a template.
+   *
+   * Since each thread gets its private version of scratch data out of the
+   * ThreadLocalStorage, no conflicting access can occur. For this to be
+   * valid, we need to make sure that no call within
+   * distribute_local_to_global is made that by itself can spawn tasks.
+   * Otherwise, we might end up in a situation where several threads fight for
+   * the data.
+   *
+   * Access to the scratch data is only through the accessor class which
+   * handles the access as well as marking the data as used.
+   */
+  template <typename Number>
+  class ConstraintMatrixData
+  {
+  public:
+    struct ScratchData
+    {
+      /**
+       * Constructor, does nothing.
+       */
+      ScratchData ()
+        :
+        in_use (false)
+      {}
+
+      /**
+       * Copy constructor, does nothing
+       */
+      ScratchData (const ScratchData &)
+        :
+        in_use (false)
+      {}
+
+      /**
+       * Stores whether the data is currently in use.
+       */
+      bool in_use;
+
+      /**
+       * Temporary array for column indices
+       */
+      std::vector<size_type> columns;
+
+      /**
+       * Temporary array for column values
+       */
+      std::vector<Number>    values;
+
+      /**
+       * Temporary array for block start indices
+       */
+      std::vector<size_type> block_starts;
+
+      /**
+       * Temporary array for vector indices
+       */
+      std::vector<size_type> vector_indices;
+
+      /**
+       * Data array for reorder row/column indices. Use a shared ptr to
+       * global_rows to avoid defining in the .h file
+       */
+      GlobalRowsFromLocal global_rows;
+
+      /**
+       * Data array for reorder row/column indices. Use a shared ptr to
+       * global_rows to avoid defining in the .h file
+       */
+      GlobalRowsFromLocal global_columns;
+    };
+
+    /**
+     * Accessor class to guard access to scratch_data
+     */
+    class ScratchDataAccessor
+    {
+    public:
+      /**
+       * Constructor. Grabs a scratch data object on the current thread and
+       * mark it as used
+       */
+      ScratchDataAccessor()
+        :
+        my_scratch_data(&ConstraintMatrixData::scratch_data.get())
+      {
+        Assert(my_scratch_data->in_use == false,
+               ExcMessage("Access to thread-local scratch data tried, but it is already "
+                          "in use"));
+        my_scratch_data->in_use = true;
+      }
+
+      /**
+       * Destructor. Mark scratch data as available again.
+       */
+      ~ScratchDataAccessor()
+      {
+        my_scratch_data->in_use = false;
+      }
+
+      /**
+       * Dereferencing operator.
+       */
+      ScratchData &operator* ()
+      {
+        return *my_scratch_data;
+      }
+
+      /**
+       * Dereferencing operator.
+       */
+      ScratchData *operator-> ()
+      {
+        return my_scratch_data;
+      }
+
+    private:
+      ScratchData *my_scratch_data;
+    };
+
+  private:
+    /**
+     * The actual data object that contains a scratch data for each thread.
+     */
+    static Threads::ThreadLocalStorage<ScratchData> scratch_data;
+  };
+
+
+
+  // function for block matrices: Find out where in the list of local dofs
+  // (sorted according to global ids) the individual blocks start. Transform
+  // the global indices to block-local indices in order to be able to use
+  // functions like vector.block(1)(block_local_id), instead of
+  // vector(global_id). This avoids transforming indices one-by-one later on.
+  template <class BlockType>
+  inline
+  void
+  make_block_starts (const BlockType        &block_object,
+                     GlobalRowsFromLocal    &global_rows,
+                     std::vector<size_type> &block_starts)
+  {
+    AssertDimension (block_starts.size(), block_object.n_block_rows()+1);
+
+    typedef std::vector<Distributing>::iterator row_iterator;
+    row_iterator block_indices = global_rows.total_row_indices.begin();
+
+    const size_type num_blocks = block_object.n_block_rows();
+    const size_type n_active_rows = global_rows.size();
+
+    // find end of rows.
+    block_starts[0] = 0;
+    for (size_type i=1; i<num_blocks; ++i)
+      {
+        row_iterator first_block =
+          Utilities::lower_bound (block_indices,
+                                  global_rows.total_row_indices.begin()+n_active_rows,
+                                  Distributing(block_object.get_row_indices().block_start(i)));
+        block_starts[i] = first_block - global_rows.total_row_indices.begin();
+        block_indices = first_block;
+      }
+    block_starts[num_blocks] = n_active_rows;
+
+    // transform row indices to block-local index space
+    for (size_type i=block_starts[1]; i<n_active_rows; ++i)
+      global_rows.global_row(i) = block_object.get_row_indices().
+                                  global_to_local(global_rows.global_row(i)).second;
+  }
+
+
+
+  // same as before, but for std::vector<uint> instead of
+  // GlobalRowsFromLocal. Used in functions for sparsity patterns.
+  template <class BlockType>
+  inline
+  void
+  make_block_starts (const BlockType        &block_object,
+                     std::vector<size_type> &row_indices,
+                     std::vector<size_type> &block_starts)
+  {
+    AssertDimension (block_starts.size(), block_object.n_block_rows()+1);
+
+    typedef std::vector<size_type>::iterator row_iterator;
+    row_iterator col_indices = row_indices.begin();
+
+    const size_type num_blocks = block_object.n_block_rows();
+
+    // find end of rows.
+    block_starts[0] = 0;
+    for (size_type i=1; i<num_blocks; ++i)
+      {
+        row_iterator first_block =
+          Utilities::lower_bound (col_indices,
+                                  row_indices.end(),
+                                  block_object.get_row_indices().block_start(i));
+        block_starts[i] = first_block - row_indices.begin();
+        col_indices = first_block;
+      }
+    block_starts[num_blocks] = row_indices.size();
+
+    // transform row indices to local index space
+    for (size_type i=block_starts[1]; i<row_indices.size(); ++i)
+      row_indices[i] = block_object.get_row_indices().
+                       global_to_local(row_indices[i]).second;
+  }
+
+
+
+  // resolves constraints of one column at the innermost loop. goes through
+  // the origin of each global entry and finds out which data we need to
+  // collect.
+  template<typename LocalType>
+  static inline
+  LocalType resolve_matrix_entry (const GlobalRowsFromLocal   &global_rows,
+                                  const GlobalRowsFromLocal &global_cols,
+                                  const size_type            i,
+                                  const size_type            j,
+                                  const size_type            loc_row,
+                                  const FullMatrix<LocalType> &local_matrix)
+  {
+    const size_type loc_col = global_cols.local_row(j);
+    LocalType col_val;
+
+    // case 1: row has direct contribution in local matrix. decide whether col
+    // has a direct contribution. if not, set the value to zero.
+    if (loc_row != numbers::invalid_size_type)
+      {
+        col_val = ((loc_col != numbers::invalid_size_type) ?
+                   local_matrix(loc_row, loc_col) : 0);
+
+        // account for indirect contributions by constraints in column
+        for (size_type p=0; p<global_cols.size(j); ++p)
+          col_val += (local_matrix(loc_row, global_cols.local_row(j,p)) *
+                      global_cols.constraint_value(j,p));
+      }
+
+    // case 2: row has no direct contribution in local matrix
+    else
+      col_val = 0;
+
+    // account for indirect contributions by constraints in row, going trough
+    // the direct and indirect references in the given column.
+    for (size_type q=0; q<global_rows.size(i); ++q)
+      {
+        LocalType add_this = (loc_col != numbers::invalid_size_type)
+                             ? local_matrix(global_rows.local_row(i,q), loc_col) : 0;
+
+        for (size_type p=0; p<global_cols.size(j); ++p)
+          add_this += (local_matrix(global_rows.local_row(i,q),
+                                    global_cols.local_row(j,p))
+                       *
+                       global_cols.constraint_value(j,p));
+        col_val += add_this * global_rows.constraint_value(i,q);
+      }
+    return col_val;
+  }
+
+
+
+  // computes all entries that need to be written into global_rows[i]. Lists
+  // the resulting values in val_ptr, and the corresponding column indices in
+  // col_ptr.
+  template <typename number, typename LocalType>
+  inline
+  void
+  resolve_matrix_row (const GlobalRowsFromLocal &global_rows,
+                      const GlobalRowsFromLocal &global_cols,
+                      const size_type            i,
+                      const size_type            column_start,
+                      const size_type            column_end,
+                      const FullMatrix<LocalType> &local_matrix,
+                      size_type                *&col_ptr,
+                      number                   *&val_ptr)
+  {
+    if (column_end == column_start)
+      return;
+
+    AssertIndexRange (column_end-1, global_cols.size());
+    const size_type loc_row = global_rows.local_row(i);
+
+    // fast function if there are no indirect references to any of the local
+    // rows at all on this set of dofs (saves a lot of checks). the only check
+    // we actually need to perform is whether the matrix element is zero.
+    if (global_rows.have_indirect_rows() == false &&
+        global_cols.have_indirect_rows() == false)
+      {
+        AssertIndexRange(loc_row, local_matrix.m());
+        const LocalType *matrix_ptr = &local_matrix(loc_row, 0);
+
+        for (size_type j=column_start; j<column_end; ++j)
+          {
+            const size_type loc_col = global_cols.local_row(j);
+            AssertIndexRange(loc_col, local_matrix.n());
+            const LocalType col_val = matrix_ptr[loc_col];
+            if (col_val != LocalType ())
+              {
+                *val_ptr++ = static_cast<number> (col_val);
+                *col_ptr++ = global_cols.global_row(j);
+              }
+          }
+      }
+
+    // more difficult part when there are indirect references and when we need
+    // to do some more checks.
+    else
+      {
+        for (size_type j=column_start; j<column_end; ++j)
+          {
+            LocalType col_val = resolve_matrix_entry (global_rows, global_cols, i, j,
+                                                      loc_row, local_matrix);
+
+            // if we got some nontrivial value, append it to the array of
+            // values.
+            if (col_val != LocalType ())
+              {
+                *val_ptr++ = static_cast<number> (col_val);
+                *col_ptr++ = global_cols.global_row(j);
+              }
+          }
+      }
+  }
+
+
+
+  // specialized function that can write into the row of a
+  // SparseMatrix<number>.
+  namespace dealiiSparseMatrix
+  {
+    template <typename SparseMatrixIterator, typename LocalType>
+    static inline
+    void add_value (const LocalType       value,
+                    const size_type       row,
+                    const size_type       column,
+                    SparseMatrixIterator &matrix_values)
+    {
+      (void)row;
+      if (value != LocalType ())
+        {
+          while (matrix_values->column() < column)
+            ++matrix_values;
+          Assert (matrix_values->column() == column,
+                  typename SparseMatrix<typename SparseMatrixIterator::MatrixType::value_type>::ExcInvalidIndex(row, column));
+          matrix_values->value() += value;
+        }
+    }
+  }
+
+
+  // similar as before, now with shortcut for deal.II sparse matrices. this
+  // lets us avoid using extra arrays, and does all the operations just in
+  // place, i.e., in the respective matrix row
+  template <typename number, typename LocalType>
+  inline
+  void
+  resolve_matrix_row (const GlobalRowsFromLocal &global_rows,
+                      const size_type            i,
+                      const size_type            column_start,
+                      const size_type            column_end,
+                      const FullMatrix<LocalType> &local_matrix,
+                      SparseMatrix<number>      *sparse_matrix)
+  {
+    if (column_end == column_start)
+      return;
+
+    AssertIndexRange (column_end-1, global_rows.size());
+    const SparsityPattern &sparsity = sparse_matrix->get_sparsity_pattern();
+
+    if (sparsity.n_nonzero_elements() == 0)
+      return;
+
+    const size_type row = global_rows.global_row(i);
+    const size_type loc_row = global_rows.local_row(i);
+
+    typename SparseMatrix<number>::iterator
+    matrix_values = sparse_matrix->begin(row);
+    const bool optimize_diagonal = sparsity.n_rows() == sparsity.n_cols();
+
+    // distinguish three cases about what can happen for checking whether the
+    // diagonal is the first element of the row. this avoids if statements at
+    // the innermost loop positions
+
+    if (!optimize_diagonal) // case 1: no diagonal optimization in matrix
+      {
+        if (global_rows.have_indirect_rows() == false)
+          {
+            AssertIndexRange (loc_row, local_matrix.m());
+            const LocalType *matrix_ptr = &local_matrix(loc_row, 0);
+
+            for (size_type j=column_start; j<column_end; ++j)
+              {
+                const size_type loc_col = global_rows.local_row(j);
+                const LocalType col_val = matrix_ptr[loc_col];
+                dealiiSparseMatrix::add_value (col_val, row,
+                                               global_rows.global_row(j),
+                                               matrix_values);
+              }
+          }
+        else
+          {
+            for (size_type j=column_start; j<column_end; ++j)
+              {
+                LocalType col_val = resolve_matrix_entry (global_rows, global_rows, i, j,
+                                                          loc_row, local_matrix);
+                dealiiSparseMatrix::add_value (col_val, row,
+                                               global_rows.global_row(j),
+                                               matrix_values);
+              }
+          }
+      }
+    else if (i>=column_start && i<column_end) // case 2: can split loop
+      {
+        ++matrix_values; // jump over diagonal element
+        if (global_rows.have_indirect_rows() == false)
+          {
+            AssertIndexRange (loc_row, local_matrix.m());
+            const LocalType *matrix_ptr = &local_matrix(loc_row, 0);
+
+            sparse_matrix->begin(row)->value() += matrix_ptr[loc_row];
+            for (size_type j=column_start; j<i; ++j)
+              {
+                const size_type loc_col = global_rows.local_row(j);
+                const LocalType col_val = matrix_ptr[loc_col];
+                dealiiSparseMatrix::add_value(col_val, row,
+                                              global_rows.global_row(j),
+                                              matrix_values);
+              }
+            for (size_type j=i+1; j<column_end; ++j)
+              {
+                const size_type loc_col = global_rows.local_row(j);
+                const LocalType col_val = matrix_ptr[loc_col];
+                dealiiSparseMatrix::add_value(col_val, row,
+                                              global_rows.global_row(j),
+                                              matrix_values);
+              }
+          }
+        else
+          {
+            sparse_matrix->begin(row)->value() +=
+              resolve_matrix_entry (global_rows, global_rows, i, i,
+                                    loc_row, local_matrix);
+            for (size_type j=column_start; j<i; ++j)
+              {
+                LocalType col_val = resolve_matrix_entry (global_rows, global_rows, i, j,
+                                                          loc_row, local_matrix);
+                dealiiSparseMatrix::add_value (col_val, row,
+                                               global_rows.global_row(j),
+                                               matrix_values);
+              }
+            for (size_type j=i+1; j<column_end; ++j)
+              {
+                LocalType col_val = resolve_matrix_entry (global_rows, global_rows, i, j,
+                                                          loc_row, local_matrix);
+                dealiiSparseMatrix::add_value (col_val, row,
+                                               global_rows.global_row(j),
+                                               matrix_values);
+              }
+          }
+      }
+    // case 3: can't say - need to check inside the loop
+    else if (global_rows.have_indirect_rows() == false)
+      {
+        ++matrix_values; // jump over diagonal element
+        AssertIndexRange (loc_row, local_matrix.m());
+        const LocalType *matrix_ptr = &local_matrix(loc_row, 0);
+
+        for (size_type j=column_start; j<column_end; ++j)
+          {
+            const size_type loc_col = global_rows.local_row(j);
+            const LocalType col_val = matrix_ptr[loc_col];
+            if (row==global_rows.global_row(j))
+              sparse_matrix->begin(row)->value() += col_val;
+            else
+              dealiiSparseMatrix::add_value(col_val, row,
+                                            global_rows.global_row(j),
+                                            matrix_values);
+          }
+      }
+    else
+      {
+        ++matrix_values; // jump over diagonal element
+        for (size_type j=column_start; j<column_end; ++j)
+          {
+            LocalType col_val = resolve_matrix_entry (global_rows, global_rows, i,
+                                                      j, loc_row, local_matrix);
+            if (row==global_rows.global_row(j))
+              sparse_matrix->begin(row)->value() += col_val;
+            else
+              dealiiSparseMatrix::add_value (col_val, row,
+                                             global_rows.global_row(j),
+                                             matrix_values);
+          }
+      }
+  }
+
+
+
+  // Same function to resolve all entries that will be added to the given
+  // global row global_rows[i] as before, now for sparsity pattern
+  inline
+  void
+  resolve_matrix_row (const GlobalRowsFromLocal     &global_rows,
+                      const size_type                i,
+                      const size_type                column_start,
+                      const size_type                column_end,
+                      const Table<2,bool>           &dof_mask,
+                      std::vector<size_type>::iterator &col_ptr)
+  {
+    if (column_end == column_start)
+      return;
+
+    const size_type loc_row = global_rows.local_row(i);
+
+    // fast function if there are no indirect references to any of the local
+    // rows at all on this set of dofs
+    if (global_rows.have_indirect_rows() == false)
+      {
+        Assert(loc_row < dof_mask.n_rows(),
+               ExcInternalError());
+
+        for (size_type j=column_start; j<column_end; ++j)
+          {
+            const size_type loc_col = global_rows.local_row(j);
+            Assert(loc_col < dof_mask.n_cols(), ExcInternalError());
+
+            if (dof_mask(loc_row,loc_col) == true)
+              *col_ptr++ = global_rows.global_row(j);
+          }
+      }
+
+    // slower functions when there are indirect references and when we need to
+    // do some more checks.
+    else
+      {
+        for (size_type j=column_start; j<column_end; ++j)
+          {
+            const size_type loc_col = global_rows.local_row(j);
+            if (loc_row != numbers::invalid_size_type)
+              {
+                Assert (loc_row < dof_mask.n_rows(), ExcInternalError());
+                if (loc_col != numbers::invalid_size_type)
+                  {
+                    Assert (loc_col < dof_mask.n_cols(), ExcInternalError());
+                    if (dof_mask(loc_row,loc_col) == true)
+                      goto add_this_index;
+                  }
+
+                for (size_type p=0; p<global_rows.size(j); ++p)
+                  if (dof_mask(loc_row,global_rows.local_row(j,p)) == true)
+                    goto add_this_index;
+              }
+
+            for (size_type q=0; q<global_rows.size(i); ++q)
+              {
+                if (loc_col != numbers::invalid_size_type)
+                  {
+                    Assert (loc_col < dof_mask.n_cols(), ExcInternalError());
+                    if (dof_mask(global_rows.local_row(i,q),loc_col) == true)
+                      goto add_this_index;
+                  }
+
+                for (size_type p=0; p<global_rows.size(j); ++p)
+                  if (dof_mask(global_rows.local_row(i,q),
+                               global_rows.local_row(j,p)) == true)
+                    goto add_this_index;
+              }
+
+            continue;
+            // if we got some nontrivial value, append it to the array of
+            // values.
+add_this_index:
+            *col_ptr++ = global_rows.global_row(j);
+          }
+      }
+  }
+
+
+
+  // to make sure that the global matrix remains invertible, we need to do
+  // something with the diagonal elements. add the absolute value of the local
+  // matrix, so the resulting entry will always be positive and furthermore be
+  // in the same order of magnitude as the other elements of the matrix
+  //
+  // note that this also captures the special case that a dof is both
+  // constrained and fixed (this can happen for hanging nodes in 3d that also
+  // happen to be on the boundary). in that case, following the program flow
+  // in distribute_local_to_global, it is realized that when distributing the
+  // row and column no elements of the matrix are actually touched if all the
+  // degrees of freedom to which this dof is constrained are also constrained
+  // (the usual case with hanging nodes in 3d). however, in the line below, we
+  // do actually do something with this dof
+  template <typename MatrixType, typename VectorType>
+  inline void
+  set_matrix_diagonals (const internals::GlobalRowsFromLocal &global_rows,
+                        const std::vector<size_type>         &local_dof_indices,
+                        const FullMatrix<typename MatrixType::value_type>  &local_matrix,
+                        const ConstraintMatrix               &constraints,
+                        MatrixType                           &global_matrix,
+                        VectorType                           &global_vector,
+                        bool                                 use_inhomogeneities_for_rhs)
+  {
+    if (global_rows.n_constraints() > 0)
+      {
+        typename MatrixType::value_type average_diagonal = typename MatrixType::value_type();
+        for (size_type i=0; i<local_matrix.m(); ++i)
+          average_diagonal += std::abs (local_matrix(i,i));
+        average_diagonal /= static_cast<double>(local_matrix.m());
+
+        for (size_type i=0; i<global_rows.n_constraints(); i++)
+          {
+            const size_type local_row = global_rows.constraint_origin(i);
+            const size_type global_row = local_dof_indices[local_row];
+            const typename MatrixType::value_type new_diagonal
+              = (std::abs(local_matrix(local_row,local_row)) != 0 ?
+                 std::abs(local_matrix(local_row,local_row)) : average_diagonal);
+            global_matrix.add(global_row, global_row, new_diagonal);
+
+            // if the use_inhomogeneities_for_rhs flag is set to true, the
+            // inhomogeneities are used to create the global vector. instead
+            // of fill in a zero in the ith components with an inhomogeneity,
+            // we set those to: inhomogeneity(i)*global_matrix (i,i).
+            if (use_inhomogeneities_for_rhs == true)
+              global_vector(global_row) += constraints.get_inhomogeneity(global_row) * new_diagonal;
+          }
+      }
+  }
+
+
+
+  // similar function as the one above for setting matrix diagonals, but now
+  // doing that for sparsity patterns when setting them up using
+  // add_entries_local_to_global. In case we keep constrained entries, add all
+  // the rows and columns related to the constrained dof, otherwise just add
+  // the diagonal
+  template <typename SparsityPatternType>
+  inline void
+  set_sparsity_diagonals (const internals::GlobalRowsFromLocal &global_rows,
+                          const std::vector<size_type>         &local_dof_indices,
+                          const Table<2,bool>                  &dof_mask,
+                          const bool                            keep_constrained_entries,
+                          SparsityPatternType                  &sparsity_pattern)
+  {
+    // if we got constraints, need to add the diagonal element and, if the
+    // user requested so, also the rest of the entries in rows and columns
+    // that have been left out above
+    if (global_rows.n_constraints() > 0)
+      {
+        for (size_type i=0; i<global_rows.n_constraints(); i++)
+          {
+            const size_type local_row = global_rows.constraint_origin(i);
+            const size_type global_row = local_dof_indices[local_row];
+            if (keep_constrained_entries == true)
+              {
+                for (size_type j=0; j<local_dof_indices.size(); ++j)
+                  {
+                    if (dof_mask(local_row,j) == true)
+                      sparsity_pattern.add(global_row,
+                                           local_dof_indices[j]);
+                    if (dof_mask(j,local_row) == true)
+                      sparsity_pattern.add(local_dof_indices[j],
+                                           global_row);
+                  }
+              }
+            else
+              // don't keep constrained entries - just add the diagonal.
+              sparsity_pattern.add(global_row,global_row);
+          }
+      }
+  }
+
+} // end of namespace internals
+
+
+
+// Basic idea of setting up a list of
+// all global dofs: first find all rows and columns
+// that we are going to write touch,
+// and then go through the
+// lines and collect all the local rows that
+// are related to it.
+void
+ConstraintMatrix::
+make_sorted_row_list (const std::vector<size_type>   &local_dof_indices,
+                      internals::GlobalRowsFromLocal &global_rows) const
+{
+  const size_type n_local_dofs = local_dof_indices.size();
+  AssertDimension (n_local_dofs, global_rows.size());
+
+  // when distributing the local data to the global matrix, we can quite
+  // cheaply sort the indices (obviously, this introduces the need for
+  // allocating some memory on the way, but we need to do this only for rows,
+  // whereas the distribution process itself goes over rows and columns). This
+  // has the advantage that when writing into the global matrix, we can make
+  // use of the sortedness.
+
+  // so the first step is to create a sorted list of all row values that are
+  // possible. these values are either the rows from unconstrained dofs, or
+  // some indices introduced by dofs constrained to a combination of some
+  // other dofs. regarding the data type, choose a <tt>std::vector</tt> of a
+  // pair of unsigned ints (for global columns) and internal data (containing
+  // local columns + possible jumps from constraints). Choosing
+  // <tt>std::map</tt> or anything else M.K. knows of would be much more
+  // expensive here!
+
+  // cache whether we have to resolve any indirect rows generated from
+  // resolving constrained dofs.
+  size_type added_rows = 0;
+
+  // first add the indices in an unsorted way and only keep track of the
+  // constraints that appear. They are resolved in a second step.
+  for (size_type i = 0; i<n_local_dofs; ++i)
+    {
+      if (is_constrained(local_dof_indices[i]) == false)
+        {
+          global_rows.global_row(added_rows)  = local_dof_indices[i];
+          global_rows.local_row(added_rows++) = i;
+        }
+      else
+        global_rows.insert_constraint(i);
+    }
+  global_rows.sort();
+
+  const size_type n_constrained_rows = n_local_dofs-added_rows;
+  for (size_type i=0; i<n_constrained_rows; ++i)
+    {
+      const size_type local_row = global_rows.constraint_origin(i);
+      AssertIndexRange(local_row, n_local_dofs);
+      const size_type global_row = local_dof_indices[local_row];
+      Assert (is_constrained(global_row), ExcInternalError());
+      const ConstraintLine &position =
+        lines[lines_cache[calculate_line_index(global_row)]];
+      if (position.inhomogeneity != 0)
+        global_rows.set_ith_constraint_inhomogeneous (i);
+      for (size_type q=0; q<position.entries.size(); ++q)
+        global_rows.insert_index (position.entries[q].first,
+                                  local_row,
+                                  position.entries[q].second);
+    }
+}
+
+
+
+// Same function as before, but now do only extract the global indices that
+// come from the local ones without storing their origin. Used for sparsity
+// pattern generation.
+inline
+void
+ConstraintMatrix::
+make_sorted_row_list (const std::vector<size_type> &local_dof_indices,
+                      std::vector<size_type>       &active_dofs) const
+{
+  const size_type n_local_dofs = local_dof_indices.size();
+  size_type added_rows = 0;
+  for (size_type i = 0; i<n_local_dofs; ++i)
+    {
+      if (is_constrained(local_dof_indices[i]) == false)
+        {
+          active_dofs[added_rows++] = local_dof_indices[i];
+          continue;
+        }
+
+      active_dofs[n_local_dofs-i+added_rows-1] = i;
+    }
+  std::sort (active_dofs.begin(), active_dofs.begin()+added_rows);
+
+  const size_type n_constrained_dofs = n_local_dofs-added_rows;
+  for (size_type i=n_constrained_dofs; i>0; --i)
+    {
+      const size_type local_row = active_dofs.back();
+
+      // remove constrained entry since we are going to resolve it in place
+      active_dofs.pop_back();
+      const size_type global_row = local_dof_indices[local_row];
+      const ConstraintLine &position =
+        lines[lines_cache[calculate_line_index(global_row)]];
+      for (size_type q=0; q<position.entries.size(); ++q)
+        {
+          const size_type new_index = position.entries[q].first;
+          if (active_dofs[active_dofs.size()-i] < new_index)
+            active_dofs.insert(active_dofs.end()-i+1,new_index);
+
+          // make binary search to find where to put the new index in order to
+          // keep the list sorted
+          else
+            {
+              std::vector<size_type>::iterator it =
+                Utilities::lower_bound(active_dofs.begin(),
+                                       active_dofs.end()-i+1,
+                                       new_index);
+              if (*it != new_index)
+                active_dofs.insert(it, new_index);
+            }
+        }
+    }
+}
+
+
+
+// Resolve the constraints from the vector and apply inhomogeneities.
+template< typename LocalType>
+inline
+LocalType
+ConstraintMatrix::
+resolve_vector_entry (const size_type                       i,
+                      const internals::GlobalRowsFromLocal &global_rows,
+                      const Vector<LocalType>              &local_vector,
+                      const std::vector<size_type>         &local_dof_indices,
+                      const FullMatrix<LocalType>          &local_matrix) const
+{
+  const size_type loc_row = global_rows.local_row(i);
+  const size_type n_inhomogeneous_rows = global_rows.n_inhomogeneities();
+  LocalType val = 0;
+  // has a direct contribution from some local entry. If we have inhomogeneous
+  // constraints, compute the contribution of the inhomogeneity in the current
+  // row.
+  if (loc_row != numbers::invalid_size_type)
+    {
+      val = local_vector(loc_row);
+      for (size_type i=0; i<n_inhomogeneous_rows; ++i)
+        val -= (lines[lines_cache[calculate_line_index(local_dof_indices
+                                                       [global_rows.constraint_origin(i)])]].
+                inhomogeneity *
+                local_matrix(loc_row, global_rows.constraint_origin(i)));
+    }
+
+  // go through the indirect contributions
+  for (size_type q=0; q<global_rows.size(i); ++q)
+    {
+      const size_type loc_row_q = global_rows.local_row(i,q);
+      LocalType add_this = local_vector (loc_row_q);
+      for (size_type k=0; k<n_inhomogeneous_rows; ++k)
+        add_this -= (lines[lines_cache[calculate_line_index
+                                       (local_dof_indices
+                                        [global_rows.constraint_origin(k)])]].
+                     inhomogeneity *
+                     local_matrix(loc_row_q,global_rows.constraint_origin(k)));
+      val += add_this * global_rows.constraint_value(i,q);
+    }
+  return val;
+}
+
+
+// internal implementation for distribute_local_to_global for standard
+// (non-block) matrices
+template <typename MatrixType, typename VectorType>
+void
+ConstraintMatrix::distribute_local_to_global (
+  const FullMatrix<typename MatrixType::value_type>     &local_matrix,
+  const Vector<typename VectorType::value_type>         &local_vector,
+  const std::vector<size_type>    &local_dof_indices,
+  MatrixType                      &global_matrix,
+  VectorType                      &global_vector,
+  bool                            use_inhomogeneities_for_rhs,
+  internal::bool2type<false>) const
+{
+  // check whether we work on real vectors or we just used a dummy when
+  // calling the other function above.
+  const bool use_vectors = (local_vector.size() == 0 &&
+                            global_vector.size() == 0) ? false : true;
+  typedef typename MatrixType::value_type number;
+  const bool use_dealii_matrix =
+    types_are_equal<MatrixType,SparseMatrix<number> >::value;
+
+  AssertDimension (local_matrix.n(), local_dof_indices.size());
+  AssertDimension (local_matrix.m(), local_dof_indices.size());
+  Assert (global_matrix.m() == global_matrix.n(), ExcNotQuadratic());
+  if (use_vectors == true)
+    {
+      AssertDimension (local_matrix.m(), local_vector.size());
+      AssertDimension (global_matrix.m(), global_vector.size());
+    }
+  Assert (lines.empty() || sorted == true, ExcMatrixNotClosed());
+
+  const size_type n_local_dofs = local_dof_indices.size();
+
+  typename internals::ConstraintMatrixData<number>::ScratchDataAccessor
+  scratch_data;
+
+  internals::GlobalRowsFromLocal &global_rows = scratch_data->global_rows;
+  global_rows.reinit(n_local_dofs);
+  make_sorted_row_list (local_dof_indices, global_rows);
+
+  const size_type n_actual_dofs = global_rows.size();
+
+  // create arrays for the column data (indices and values) that will then be
+  // written into the matrix. Shortcut for deal.II sparse matrix. We can use
+  // the scratch data if we have a double matrix. Otherwise, we need to create
+  // an array in any case since we cannot know about the actual data type in
+  // the ConstraintMatrix class (unless we do cast). This involves a little
+  // bit of logic to determine the type of the matrix value.
+  std::vector<size_type> &cols = scratch_data->columns;
+  std::vector<number>     &vals = scratch_data->values;
+  SparseMatrix<number> *sparse_matrix
+    = dynamic_cast<SparseMatrix<number> *>(&global_matrix);
+  if (use_dealii_matrix == false)
+    {
+      cols.resize (n_actual_dofs);
+      vals.resize (n_actual_dofs);
+    }
+  else
+    Assert (sparse_matrix != 0, ExcInternalError());
+
+  // now do the actual job. go through all the global rows that we will touch
+  // and call resolve_matrix_row for each of those.
+  for (size_type i=0; i<n_actual_dofs; ++i)
+    {
+      const size_type row = global_rows.global_row(i);
+
+      // calculate all the data that will be written into the matrix row.
+      if (use_dealii_matrix == false)
+        {
+          size_type *col_ptr = &cols[0];
+          // cast is uncritical here and only used to avoid compiler
+          // warnings. We never access a non-double array
+          number *val_ptr = &vals[0];
+          internals::resolve_matrix_row (global_rows, global_rows, i, 0,
+                                         n_actual_dofs,
+                                         local_matrix, col_ptr, val_ptr);
+          const size_type n_values = col_ptr - &cols[0];
+          if (n_values > 0)
+            global_matrix.add(row, n_values, &cols[0], &vals[0], false,
+                              true);
+        }
+      else
+        internals::resolve_matrix_row (global_rows, i, 0, n_actual_dofs,
+                                       local_matrix, sparse_matrix);
+
+      // now to the vectors. besides doing the same job as we did above (i.e.,
+      // distribute the content of the local vector into the global one), need
+      // to account for inhomogeneities here: this corresponds to eliminating
+      // the respective column in the local matrix with value on the right
+      // hand side.
+      if (use_vectors == true)
+        {
+          const number val = resolve_vector_entry (i, global_rows,
+                                                   local_vector,
+                                                   local_dof_indices,
+                                                   local_matrix);
+
+          if (val != number ())
+            global_vector(row) += static_cast<typename VectorType::value_type>(val);
+        }
+    }
+
+  internals::set_matrix_diagonals (global_rows, local_dof_indices,
+                                   local_matrix, *this,
+                                   global_matrix, global_vector, use_inhomogeneities_for_rhs);
+}
+
+
+
+template <typename MatrixType>
+void
+ConstraintMatrix::distribute_local_to_global (
+  const FullMatrix<typename MatrixType::value_type>  &local_matrix,
+  const std::vector<size_type> &row_indices,
+  const std::vector<size_type> &col_indices,
+  MatrixType                   &global_matrix) const
+{
+  typedef typename MatrixType::value_type number;
+
+  AssertDimension (local_matrix.m(), row_indices.size());
+  AssertDimension (local_matrix.n(), col_indices.size());
+  //Assert (sorted == true, ExcMatrixNotClosed());
+
+  const size_type n_local_row_dofs = row_indices.size();
+  const size_type n_local_col_dofs = col_indices.size();
+
+  typename internals::ConstraintMatrixData<number>::ScratchDataAccessor
+  scratch_data;
+  internals::GlobalRowsFromLocal &global_rows = scratch_data->global_rows;
+  global_rows.reinit(n_local_row_dofs);
+  internals::GlobalRowsFromLocal &global_cols = scratch_data->global_columns;
+  global_cols.reinit(n_local_col_dofs);
+  make_sorted_row_list (row_indices, global_rows);
+  make_sorted_row_list (col_indices, global_cols);
+
+  const size_type n_actual_row_dofs = global_rows.size();
+  const size_type n_actual_col_dofs = global_cols.size();
+
+  // create arrays for the column data (indices and values) that will then be
+  // written into the matrix. Shortcut for deal.II sparse matrix
+  std::vector<size_type> &cols = scratch_data->columns;
+  std::vector<number>     &vals = scratch_data->values;
+  cols.resize(n_actual_col_dofs);
+  vals.resize(n_actual_col_dofs);
+
+  // now do the actual job.
+  for (size_type i=0; i<n_actual_row_dofs; ++i)
+    {
+      const size_type row = global_rows.global_row(i);
+
+      // calculate all the data that will be written into the matrix row.
+      size_type *col_ptr = &cols[0];
+      number    *val_ptr = &vals[0];
+      internals::resolve_matrix_row (global_rows, global_cols, i, 0,
+                                     n_actual_col_dofs,
+                                     local_matrix, col_ptr, val_ptr);
+      const size_type n_values = col_ptr - &cols[0];
+      if (n_values > 0)
+        global_matrix.add(row, n_values, &cols[0], &vals[0], false, true);
+    }
+}
+
+
+// similar function as above, but now specialized for block matrices. See the
+// other function for additional comments.
+template <typename MatrixType, typename VectorType>
+void
+ConstraintMatrix::
+distribute_local_to_global (const FullMatrix<typename MatrixType::value_type>  &local_matrix,
+                            const Vector<typename VectorType::value_type>      &local_vector,
+                            const std::vector<size_type> &local_dof_indices,
+                            MatrixType                   &global_matrix,
+                            VectorType                   &global_vector,
+                            bool                          use_inhomogeneities_for_rhs,
+                            internal::bool2type<true>) const
+{
+  const bool use_vectors = (local_vector.size() == 0 &&
+                            global_vector.size() == 0) ? false : true;
+  typedef typename MatrixType::value_type number;
+  const bool use_dealii_matrix =
+    types_are_equal<MatrixType,BlockSparseMatrix<number> >::value;
+
+  AssertDimension (local_matrix.n(), local_dof_indices.size());
+  AssertDimension (local_matrix.m(), local_dof_indices.size());
+  Assert (global_matrix.m() == global_matrix.n(), ExcNotQuadratic());
+  Assert (global_matrix.n_block_rows() == global_matrix.n_block_cols(),
+          ExcNotQuadratic());
+  if (use_vectors == true)
+    {
+      AssertDimension (local_matrix.m(), local_vector.size());
+      AssertDimension (global_matrix.m(), global_vector.size());
+    }
+  Assert (sorted == true, ExcMatrixNotClosed());
+
+  typename internals::ConstraintMatrixData<number>::ScratchDataAccessor
+  scratch_data;
+
+  const size_type n_local_dofs = local_dof_indices.size();
+  internals::GlobalRowsFromLocal &global_rows = scratch_data->global_rows;
+  global_rows.reinit(n_local_dofs);
+
+  make_sorted_row_list (local_dof_indices, global_rows);
+  const size_type n_actual_dofs = global_rows.size();
+
+  std::vector<size_type> &global_indices = scratch_data->vector_indices;
+  if (use_vectors == true)
+    {
+      global_indices.resize(n_actual_dofs);
+      for (size_type i=0; i<n_actual_dofs; ++i)
+        global_indices[i] = global_rows.global_row(i);
+    }
+
+  // additional construct that also takes care of block indices.
+  const size_type num_blocks   = global_matrix.n_block_rows();
+  std::vector<size_type> &block_starts = scratch_data->block_starts;
+  block_starts.resize(num_blocks+1);
+  internals::make_block_starts (global_matrix, global_rows, block_starts);
+
+  std::vector<size_type> &cols = scratch_data->columns;
+  std::vector<number>     &vals = scratch_data->values;
+  if (use_dealii_matrix == false)
+    {
+      cols.resize (n_actual_dofs);
+      vals.resize (n_actual_dofs);
+    }
+
+  // the basic difference to the non-block variant from now onwards is that we
+  // go through the blocks of the matrix separately, which allows us to set
+  // the block entries individually
+  for (size_type block=0; block<num_blocks; ++block)
+    {
+      const size_type next_block = block_starts[block+1];
+      for (size_type i=block_starts[block]; i<next_block; ++i)
+        {
+          const size_type row = global_rows.global_row(i);
+
+          for (size_type block_col=0; block_col<num_blocks; ++block_col)
+            {
+              const size_type start_block = block_starts[block_col],
+                              end_block = block_starts[block_col+1];
+              if (use_dealii_matrix == false)
+                {
+                  size_type *col_ptr = &cols[0];
+                  number *val_ptr = &vals[0];
+                  internals::resolve_matrix_row (global_rows, global_rows, i,
+                                                 start_block, end_block,
+                                                 local_matrix, col_ptr, val_ptr);
+                  const size_type n_values = col_ptr - &cols[0];
+                  if (n_values > 0)
+                    global_matrix.block(block, block_col).add(row, n_values,
+                                                              &cols[0], &vals[0],
+                                                              false, true);
+                }
+              else
+                {
+                  SparseMatrix<number> *sparse_matrix
+                    = dynamic_cast<SparseMatrix<number> *>(&global_matrix.block(block,
+                                                           block_col));
+                  Assert (sparse_matrix != 0, ExcInternalError());
+                  internals::resolve_matrix_row (global_rows, i, start_block,
+                                                 end_block, local_matrix, sparse_matrix);
+                }
+            }
+
+          if (use_vectors == true)
+            {
+              const number val = resolve_vector_entry (i, global_rows,
+                                                       local_vector,
+                                                       local_dof_indices,
+                                                       local_matrix);
+
+              if (val != number ())
+                global_vector(global_indices[i]) +=
+                  static_cast<typename VectorType::value_type>(val);
+            }
+        }
+    }
+
+  internals::set_matrix_diagonals (global_rows, local_dof_indices,
+                                   local_matrix, *this,
+                                   global_matrix, global_vector, use_inhomogeneities_for_rhs);
+}
+
+
+
+template <typename SparsityPatternType>
+void
+ConstraintMatrix::
+add_entries_local_to_global (const std::vector<size_type> &local_dof_indices,
+                             SparsityPatternType          &sparsity_pattern,
+                             const bool                    keep_constrained_entries,
+                             const Table<2,bool>          &dof_mask,
+                             internal::bool2type<false> ) const
+{
+  Assert (sparsity_pattern.n_rows() == sparsity_pattern.n_cols(), ExcNotQuadratic());
+
+  const size_type n_local_dofs = local_dof_indices.size();
+  bool dof_mask_is_active = false;
+  if (dof_mask.n_rows() == n_local_dofs)
+    {
+      dof_mask_is_active = true;
+      AssertDimension (dof_mask.n_cols(), n_local_dofs);
+    }
+
+  internals::ConstraintMatrixData<double>::ScratchDataAccessor scratch_data;
+
+  // if the dof mask is not active, all we have to do is to add some indices
+  // in a matrix format. To do this, we first create an array of all the
+  // indices that are to be added. these indices are the local dof indices
+  // plus some indices that come from constraints.
+  if (dof_mask_is_active == false)
+    {
+      std::vector<size_type> &actual_dof_indices = scratch_data->columns;
+      actual_dof_indices.resize(n_local_dofs);
+      make_sorted_row_list (local_dof_indices, actual_dof_indices);
+      const size_type n_actual_dofs = actual_dof_indices.size();
+
+      // now add the indices we collected above to the sparsity pattern. Very
+      // easy here - just add the same array to all the rows...
+      for (size_type i=0; i<n_actual_dofs; ++i)
+        sparsity_pattern.add_entries(actual_dof_indices[i],
+                                     actual_dof_indices.begin(),
+                                     actual_dof_indices.end(),
+                                     true);
+
+      // need to add the whole row and column structure in case we keep
+      // constrained entries. Unfortunately, we can't use the nice matrix
+      // structure we use elsewhere, so manually add those indices one by one.
+      for (size_type i=0; i<n_local_dofs; i++)
+        if (is_constrained(local_dof_indices[i]))
+          {
+            if (keep_constrained_entries == true)
+              for (size_type j=0; j<n_local_dofs; j++)
+                {
+                  sparsity_pattern.add (local_dof_indices[i], local_dof_indices[j]);
+                  sparsity_pattern.add (local_dof_indices[j], local_dof_indices[i]);
+                }
+            else
+              sparsity_pattern.add (local_dof_indices[i], local_dof_indices[i]);
+          }
+
+      return;
+    }
+
+
+  // complicated case: we need to filter out some indices. then the function
+  // gets similar to the function for distributing matrix entries, see there
+  // for additional comments.
+  internals::GlobalRowsFromLocal &global_rows = scratch_data->global_rows;
+  global_rows.reinit(n_local_dofs);
+  make_sorted_row_list (local_dof_indices, global_rows);
+  const size_type n_actual_dofs = global_rows.size();
+
+  // create arrays for the column indices that will then be written into the
+  // sparsity pattern.
+  std::vector<size_type> &cols = scratch_data->columns;
+  cols.resize(n_actual_dofs);
+
+  for (size_type i=0; i<n_actual_dofs; ++i)
+    {
+      std::vector<size_type>::iterator col_ptr = cols.begin();
+      const size_type row = global_rows.global_row(i);
+      internals::resolve_matrix_row (global_rows, i, 0, n_actual_dofs,
+                                     dof_mask, col_ptr);
+
+      // finally, write all the information that accumulated under the given
+      // process into the global matrix row and into the vector
+      if (col_ptr != cols.begin())
+        sparsity_pattern.add_entries(row, cols.begin(), col_ptr,
+                                     true);
+    }
+  internals::set_sparsity_diagonals (global_rows, local_dof_indices,
+                                     dof_mask, keep_constrained_entries,
+                                     sparsity_pattern);
+}
+
+
+
+
+template <typename SparsityPatternType>
+void
+ConstraintMatrix::
+add_entries_local_to_global (const std::vector<size_type> &row_indices,
+                             const std::vector<size_type> &col_indices,
+                             SparsityPatternType          &sparsity_pattern,
+                             const bool                    keep_constrained_entries,
+                             const Table<2,bool>          &dof_mask) const
+{
+  const size_type n_local_rows = row_indices.size();
+  const size_type n_local_cols = col_indices.size();
+  bool dof_mask_is_active = false;
+  if (dof_mask.n_rows() == n_local_rows && dof_mask.n_cols() == n_local_cols)
+    dof_mask_is_active = true;
+
+  // if constrained entries should be kept, need to add rows and columns of
+  // those to the sparsity pattern
+  if (keep_constrained_entries == true)
+    {
+      for (size_type i=0; i<row_indices.size(); i++)
+        if (is_constrained(row_indices[i]))
+          for (size_type j=0; j<col_indices.size(); j++)
+            sparsity_pattern.add (row_indices[i], col_indices[j]);
+      for (size_type i=0; i<col_indices.size(); i++)
+        if (is_constrained(col_indices[i]))
+          for (size_type j=0; j<row_indices.size(); j++)
+            sparsity_pattern.add (row_indices[j], col_indices[i]);
+    }
+
+  // if the dof mask is not active, all we have to do is to add some indices
+  // in a matrix format. To do this, we first create an array of all the
+  // indices that are to be added. these indices are the local dof indices
+  // plus some indices that come from constraints.
+  if (dof_mask_is_active == false)
+    {
+      std::vector<size_type> actual_row_indices (n_local_rows);
+      std::vector<size_type> actual_col_indices (n_local_cols);
+      make_sorted_row_list (row_indices, actual_row_indices);
+      make_sorted_row_list (col_indices, actual_col_indices);
+      const size_type n_actual_rows = actual_row_indices.size();
+
+      // now add the indices we collected above to the sparsity pattern. Very
+      // easy here - just add the same array to all the rows...
+      for (size_type i=0; i<n_actual_rows; ++i)
+        sparsity_pattern.add_entries(actual_row_indices[i],
+                                     actual_col_indices.begin(),
+                                     actual_col_indices.end(),
+                                     true);
+      return;
+    }
+
+
+  // TODO: implement this
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+
+template <typename SparsityPatternType>
+void
+ConstraintMatrix::
+add_entries_local_to_global (const std::vector<size_type> &local_dof_indices,
+                             SparsityPatternType          &sparsity_pattern,
+                             const bool                    keep_constrained_entries,
+                             const Table<2,bool>          &dof_mask,
+                             internal::bool2type<true> ) const
+{
+  // just as the other add_entries_local_to_global function, but now
+  // specialized for block matrices.
+  Assert (sparsity_pattern.n_rows() == sparsity_pattern.n_cols(), ExcNotQuadratic());
+  Assert (sparsity_pattern.n_block_rows() == sparsity_pattern.n_block_cols(),
+          ExcNotQuadratic());
+
+  const size_type n_local_dofs = local_dof_indices.size();
+  const size_type num_blocks = sparsity_pattern.n_block_rows();
+
+  internals::ConstraintMatrixData<double>::ScratchDataAccessor scratch_data;
+
+  bool dof_mask_is_active = false;
+  if (dof_mask.n_rows() == n_local_dofs)
+    {
+      dof_mask_is_active = true;
+      AssertDimension (dof_mask.n_cols(), n_local_dofs);
+    }
+
+  if (dof_mask_is_active == false)
+    {
+      std::vector<size_type> &actual_dof_indices = scratch_data->columns;
+      actual_dof_indices.resize(n_local_dofs);
+      make_sorted_row_list (local_dof_indices, actual_dof_indices);
+      const size_type n_actual_dofs = actual_dof_indices.size();
+      (void)n_actual_dofs;
+
+      // additional construct that also takes care of block indices.
+      std::vector<size_type> &block_starts = scratch_data->block_starts;
+      block_starts.resize(num_blocks+1);
+      internals::make_block_starts (sparsity_pattern, actual_dof_indices,
+                                    block_starts);
+
+      for (size_type block=0; block<num_blocks; ++block)
+        {
+          const size_type next_block = block_starts[block+1];
+          for (size_type i=block_starts[block]; i<next_block; ++i)
+            {
+              Assert (i<n_actual_dofs, ExcInternalError());
+              const size_type row = actual_dof_indices[i];
+              Assert (row < sparsity_pattern.block(block,0).n_rows(),
+                      ExcInternalError());
+              std::vector<size_type>::iterator index_it = actual_dof_indices.begin();
+              for (size_type block_col = 0; block_col<num_blocks; ++block_col)
+                {
+                  const size_type next_block_col = block_starts[block_col+1];
+                  sparsity_pattern.block(block,block_col).
+                  add_entries(row,
+                              index_it,
+                              actual_dof_indices.begin() + next_block_col,
+                              true);
+                  index_it = actual_dof_indices.begin() + next_block_col;
+                }
+            }
+        }
+
+      for (size_type i=0; i<n_local_dofs; i++)
+        if (is_constrained(local_dof_indices[i]))
+          {
+            if (keep_constrained_entries == true)
+              for (size_type j=0; j<n_local_dofs; j++)
+                {
+                  sparsity_pattern.add (local_dof_indices[i], local_dof_indices[j]);
+                  sparsity_pattern.add (local_dof_indices[j], local_dof_indices[i]);
+                }
+            else
+              sparsity_pattern.add (local_dof_indices[i], local_dof_indices[i]);
+          }
+
+      return;
+    }
+
+  // difficult case with dof_mask, similar to the distribute_local_to_global
+  // function for block matrices
+  internals::GlobalRowsFromLocal &global_rows = scratch_data->global_rows;
+  global_rows.reinit(n_local_dofs);
+  make_sorted_row_list (local_dof_indices, global_rows);
+  const size_type n_actual_dofs = global_rows.size();
+
+  // additional construct that also takes care of block indices.
+  std::vector<size_type> &block_starts = scratch_data->block_starts;
+  block_starts.resize(num_blocks+1);
+  internals::make_block_starts(sparsity_pattern, global_rows, block_starts);
+
+  std::vector<size_type> &cols = scratch_data->columns;
+  cols.resize(n_actual_dofs);
+
+  // the basic difference to the non-block variant from now onwards is that we
+  // go through the blocks of the matrix separately.
+  for (size_type block=0; block<num_blocks; ++block)
+    {
+      const size_type next_block = block_starts[block+1];
+      for (size_type i=block_starts[block]; i<next_block; ++i)
+        {
+          const size_type row = global_rows.global_row(i);
+          for (size_type block_col=0; block_col<num_blocks; ++block_col)
+            {
+              const size_type begin_block = block_starts[block_col],
+                              end_block = block_starts[block_col+1];
+              std::vector<size_type>::iterator col_ptr = cols.begin();
+              internals::resolve_matrix_row (global_rows, i, begin_block,
+                                             end_block, dof_mask, col_ptr);
+
+              sparsity_pattern.block(block, block_col).add_entries(row,
+                                                                   cols.begin(),
+                                                                   col_ptr,
+                                                                   true);
+            }
+        }
+    }
+
+  internals::set_sparsity_diagonals (global_rows, local_dof_indices,
+                                     dof_mask, keep_constrained_entries,
+                                     sparsity_pattern);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/dynamic_sparsity_pattern.h b/include/deal.II/lac/dynamic_sparsity_pattern.h
new file mode 100644
index 0000000..ee85de1
--- /dev/null
+++ b/include/deal.II/lac/dynamic_sparsity_pattern.h
@@ -0,0 +1,1106 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dynamic_sparsity_pattern_h
+#define dealii__dynamic_sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/exceptions.h>
+#include <deal.II/base/index_set.h>
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class SparseMatrix;
+class DynamicSparsityPattern;
+
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+
+/**
+ * Iterators on objects of type DynamicSparsityPattern.
+ */
+namespace DynamicSparsityPatternIterators
+{
+  // forward declaration
+  class Iterator;
+
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Accessor class for iterators into objects of type DynamicSparsityPattern.
+   *
+   * Note that this class only allows read access to elements, providing their
+   * row and column number (or alternatively the index within the complete
+   * sparsity pattern). It does not allow modifying the sparsity pattern
+   * itself.
+   *
+   * @author Wolfgang Bangerth
+   * @date 2015
+   */
+  class Accessor
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    Accessor (const DynamicSparsityPattern *sparsity_pattern,
+              const size_type    row,
+              const unsigned int index_within_row);
+
+    /**
+     * Constructor. Construct the end accessor for the given sparsity pattern.
+     */
+    Accessor (const DynamicSparsityPattern *sparsity_pattern);
+
+    /**
+     * Row number of the element represented by this object.
+     */
+    size_type row () const;
+
+    /**
+     * Index within the current row of the element represented by this object.
+     */
+    size_type index () const;
+
+    /**
+     * Column number of the element represented by this object.
+     */
+    size_type column () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Accessor &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * sparsity pattern.
+     */
+    bool operator < (const Accessor &) const;
+
+  protected:
+    /**
+     * The sparsity pattern we operate on accessed.
+     */
+    const DynamicSparsityPattern *sparsity_pattern;
+
+    /**
+     * The row we currently point into.
+     */
+    size_type current_row;
+
+    /**
+     * A pointer to the element within the current row that we currently point
+     * to.
+     */
+    std::vector<size_type>::const_iterator current_entry;
+
+    /**
+     * A pointer to the end of the current row. We store this to make
+     * comparison against the end of line iterator cheaper as it otherwise
+     * needs to do the IndexSet translation from row index to the index within
+     * the 'lines' array of DynamicSparsityPattern.
+     */
+    std::vector<size_type>::const_iterator end_of_row;
+
+    /**
+     * Move the accessor to the next nonzero entry in the matrix.
+     */
+    void advance ();
+
+    /**
+     * Grant access to iterator class.
+     */
+    friend class Iterator;
+  };
+
+
+
+  /**
+   * An iterator class for walking over the elements of a sparsity pattern.
+   *
+   * The typical use for these iterators is to iterate over the elements of a
+   * sparsity pattern (or, since they also serve as the basis for iterating
+   * over the elements of an associated matrix, over the elements of a sparse
+   * matrix), or over the elements of individual rows. There is no guarantee
+   * that the elements of a row are actually traversed in an order in which
+   * column numbers monotonically increase. See the documentation of the
+   * SparsityPattern class for more information.
+   *
+   * @note This class operates directly on the internal data structures of the
+   * DynamicSparsityPattern class. As a consequence, some operations are cheap
+   * and some are not. In particular, it is cheap to access the column index
+   * of the sparsity pattern entry pointed to. On the other hand, it is
+   * expensive to compute the distance between two iterators. As a
+   * consequence, when you design algorithms that use these iterators, it is
+   * common practice to not loop over <i>all</i> elements of a sparsity
+   * pattern at once, but to have an outer loop over all rows and within this
+   * loop iterate over the elements of this row. This way, you only ever need
+   * to dereference the iterator to obtain the column indices whereas the
+   * (expensive) lookup of the row index can be avoided by using the loop
+   * index instead.
+   */
+  class Iterator
+  {
+  public:
+    /**
+     * Constructor. Create an iterator into the sparsity pattern @p sp for the
+     * given global index (i.e., the index of the given element counting from
+     * the zeroth row).
+     */
+    Iterator (const DynamicSparsityPattern *sp,
+              const size_type    row,
+              const unsigned int index_within_row);
+
+    /**
+     * Constructor. Create an invalid (end) iterator into the sparsity pattern
+     * @p sp.
+     */
+    Iterator (const DynamicSparsityPattern *sp);
+
+    /**
+     * Prefix increment.
+     */
+    Iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    Iterator operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Iterator &) const;
+
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const Iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * matrix.
+     */
+    bool operator < (const Iterator &) const;
+
+    /**
+     * Return the distance between the current iterator and the argument. The
+     * distance is given by how many times one has to apply operator++ to the
+     * current iterator to get the argument (for a positive return value), or
+     * operator-- (for a negative return value).
+     */
+    int operator - (const Iterator &p) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor accessor;
+  };
+}
+
+
+/**
+ * This class acts as an intermediate form of the SparsityPattern class. From
+ * the interface it mostly represents a SparsityPattern object that is kept
+ * compressed at all times. However, since the final sparsity pattern is not
+ * known while constructing it, keeping the pattern compressed at all times
+ * can only be achieved at the expense of either increased memory or run time
+ * consumption upon use. The main purpose of this class is to avoid some
+ * memory bottlenecks, so we chose to implement it memory conservative. The
+ * chosen data format is too unsuited to be used for actual matrices, though.
+ * It is therefore necessary to first copy the data of this object over to an
+ * object of type SparsityPattern before using it in actual matrices.
+ *
+ * Another viewpoint is that this class does not need up front allocation of a
+ * certain amount of memory, but grows as necessary.  An extensive description
+ * of sparsity patterns can be found in the documentation of the
+ * @ref Sparsity
+ * module.
+ *
+ * This class is an example of the "dynamic" type of
+ * @ref Sparsity.
+ * It is used in most tutorial programs in one way or another.
+ *
+ * <h3>Interface</h3>
+ *
+ * Since this class is intended as an intermediate replacement of the
+ * SparsityPattern class, it has mostly the same interface, with small changes
+ * where necessary. In particular, the add() function, and the functions
+ * inquiring properties of the sparsity pattern are the same.
+ *
+ *
+ * <h3>Usage</h3>
+ *
+ * Use this class as follows:
+ * @code
+ * DynamicSparsityPattern dynamic_pattern (dof_handler.n_dofs());
+ * DoFTools::make_sparsity_pattern (dof_handler,
+ *                                  dynamic_pattern);
+ * constraints.condense (dynamic_pattern);
+ *
+ * SparsityPattern sp;
+ * sp.copy_from (dynamic_pattern);
+ * @endcode
+ *
+ * @author Timo Heister, 2008
+ */
+class DynamicSparsityPattern : public Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Typedef an for iterator class that allows to walk over all nonzero
+   * elements of a sparsity pattern.
+   *
+   * Since the iterator does not allow to modify the sparsity pattern, this
+   * type is the same as that for @p const_iterator.
+   */
+  typedef
+  DynamicSparsityPatternIterators::Iterator
+  iterator;
+
+  /**
+   * Typedef for an iterator class that allows to walk over all nonzero
+   * elements of a sparsity pattern.
+   */
+  typedef
+  DynamicSparsityPatternIterators::Iterator
+  const_iterator;
+
+  /**
+   * Initialize as an empty object. This is useful if you want such objects as
+   * member variables in other classes. You can make the structure usable by
+   * calling the reinit() function.
+   */
+  DynamicSparsityPattern ();
+
+  /**
+   * Copy constructor. This constructor is only allowed to be called if the
+   * sparsity structure to be copied is empty. This is so in order to prevent
+   * involuntary copies of objects for temporaries, which can use large
+   * amounts of computing time.  However, copy constructors are needed if you
+   * want to place a DynamicSparsityPattern in a container, e.g. to write such
+   * statements like <tt>v.push_back (DynamicSparsityPattern());</tt>, with @p
+   * v a vector of @p DynamicSparsityPattern objects.
+   */
+  DynamicSparsityPattern (const DynamicSparsityPattern &);
+
+  /**
+   * Initialize a rectangular sparsity pattern with @p m rows and @p n
+   * columns. The @p rowset restricts the storage to elements in rows of this
+   * set.  Adding elements outside of this set has no effect. The default
+   * argument keeps all entries.
+   */
+  DynamicSparsityPattern (const size_type m,
+                          const size_type n,
+                          const IndexSet &rowset = IndexSet());
+
+  /**
+   * Create a square SparsityPattern using the index set.
+   */
+  DynamicSparsityPattern (const IndexSet &indexset);
+
+  /**
+   * Initialize a square pattern of dimension @p n.
+   */
+  DynamicSparsityPattern (const size_type n);
+
+  /**
+   * Copy operator. For this the same holds as for the copy constructor: it is
+   * declared, defined and fine to be called, but the latter only for empty
+   * objects.
+   */
+  DynamicSparsityPattern &operator = (const DynamicSparsityPattern &);
+
+  /**
+   * Reallocate memory and set up data structures for a new sparsity pattern
+   * with @p m rows and @p n columns. The @p rowset restricts the storage to
+   * elements in rows of this set.  Adding elements outside of this set has no
+   * effect. The default argument keeps all entries.
+   */
+  void reinit (const size_type m,
+               const size_type n,
+               const IndexSet &rowset = IndexSet());
+
+  /**
+   * Since this object is kept compressed at all times anyway, this function
+   * does nothing, but is declared to make the interface of this class as much
+   * alike as that of the SparsityPattern class.
+   */
+  void compress ();
+
+  /**
+   * Return whether the object is empty. It is empty if no memory is
+   * allocated, which is the same as that both dimensions are zero.
+   */
+  bool empty () const;
+
+  /**
+   * Return the maximum number of entries per row. Note that this number may
+   * change as entries are added.
+   */
+  size_type max_entries_per_row () const;
+
+  /**
+   * Add a nonzero entry. If the entry already exists, this call does nothing.
+   */
+  void add (const size_type i,
+            const size_type j);
+
+  /**
+   * Add several nonzero entries to the specified row. Already existing
+   * entries are ignored.
+   */
+  template <typename ForwardIterator>
+  void add_entries (const size_type row,
+                    ForwardIterator begin,
+                    ForwardIterator end,
+                    const bool      indices_are_unique_and_sorted = false);
+
+  /**
+   * Check if a value at a certain position may be non-zero.
+   */
+  bool exists (const size_type i,
+               const size_type j) const;
+
+  /**
+   * Make the sparsity pattern symmetric by adding the sparsity pattern of the
+   * transpose object.
+   *
+   * This function throws an exception if the sparsity pattern does not
+   * represent a square matrix.
+   */
+  void symmetrize ();
+
+  /**
+   * Print the sparsity pattern. The output consists of one line per row of
+   * the format <tt>[i,j1,j2,j3,...]</tt>. <i>i</i> is the row number and
+   * <i>jn</i> are the allocated columns in this row.
+   */
+  void print (std::ostream &out) const;
+
+  /**
+   * Print the sparsity pattern in a format that @p gnuplot understands and
+   * which can be used to plot the sparsity pattern in a graphical way. The
+   * format consists of pairs <tt>i j</tt> of nonzero elements, each
+   * representing one entry, one per line of the output file. Indices are
+   * counted from zero on, as usual. Since sparsity patterns are printed in
+   * the same way as matrices are displayed, we print the negative of the
+   * column index, which means that the <tt>(0,0)</tt> element is in the top
+   * left rather than in the bottom left corner.
+   *
+   * Print the sparsity pattern in gnuplot by setting the data style to dots
+   * or points and use the @p plot command.
+   */
+  void print_gnuplot (std::ostream &out) const;
+
+  /**
+   * Return the number of rows, which equals the dimension of the image space.
+   */
+  size_type n_rows () const;
+
+  /**
+   * Return the number of columns, which equals the dimension of the range
+   * space.
+   */
+  size_type n_cols () const;
+
+  /**
+   * Number of entries in a specific row. This function can only be called if
+   * the given row is a member of the index set of rows that we want to store.
+   */
+  size_type row_length (const size_type row) const;
+
+  /**
+   * Access to column number field.  Return the column number of the @p
+   * indexth entry in @p row.
+   */
+  size_type column_number (const size_type row,
+                           const size_type index) const;
+
+  /**
+   * @name Iterators
+   */
+// @{
+
+  /**
+   * Iterator starting at the first entry of the matrix. The resulting
+   * iterator can be used to walk over all nonzero entries of the sparsity
+   * pattern.
+   *
+   * Note the discussion in the general documentation of this class about the
+   * order in which elements are accessed.
+   *
+   * @note If the sparsity pattern has been initialized with an IndexSet that
+   * denotes which rows to store, then iterators will simply skip over rows
+   * that are not stored. In other words, they will look like empty rows, but
+   * no exception will be generated when iterating over such rows.
+   */
+  iterator begin () const;
+
+  /**
+   * Final iterator.
+   */
+  iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>.
+   *
+   * Note that if the given row is empty, i.e. does not contain any nonzero
+   * entries, then the iterator returned by this function equals
+   * <tt>end(r)</tt>. Note also that the iterator may not be dereferencable in
+   * that case.
+   *
+   * Note also the discussion in the general documentation of this class about
+   * the order in which elements are accessed.
+   *
+   * @note If the sparsity pattern has been initialized with an IndexSet that
+   * denotes which rows to store, then iterators will simply skip over rows
+   * that are not stored. In other words, they will look like empty rows, but
+   * no exception will be generated when iterating over such rows.
+   */
+  iterator begin (const size_type r) const;
+
+  /**
+   * Final iterator of row <tt>r</tt>. It points to the first element past the
+   * end of line @p r, or past the end of the entire sparsity pattern.
+   *
+   * Note that the end iterator is not necessarily dereferencable. This is in
+   * particular the case if it is the end iterator for the last row of a
+   * matrix.
+   */
+  iterator end (const size_type r) const;
+
+// @}
+
+  /**
+   * Compute the bandwidth of the matrix represented by this structure. The
+   * bandwidth is the maximum of $|i-j|$ for which the index pair $(i,j)$
+   * represents a nonzero entry of the matrix.
+   */
+  size_type bandwidth () const;
+
+  /**
+   * Return the number of nonzero elements allocated through this sparsity
+   * pattern.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Return the IndexSet that sets which rows are active on the current
+   * processor. It corresponds to the IndexSet given to this class in the
+   * constructor or in the reinit function.
+   */
+  const IndexSet &row_index_set () const;
+
+  /**
+   * return whether this object stores only those entries that have been added
+   * explicitly, or if the sparsity pattern contains elements that have been
+   * added through other means (implicitly) while building it. For the current
+   * class, the result is always true.
+   *
+   * This function mainly serves the purpose of describing the current class
+   * in cases where several kinds of sparsity patterns can be passed as
+   * template arguments.
+   */
+  static
+  bool stores_only_added_elements ();
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  size_type memory_consumption () const;
+
+private:
+  /**
+   * Number of rows that this sparsity structure shall represent.
+   */
+  size_type rows;
+
+  /**
+   * Number of columns that this sparsity structure shall represent.
+   */
+  size_type cols;
+
+  /**
+   * A set that contains the valid rows.
+   */
+
+  IndexSet rowset;
+
+
+  /**
+   * Store some data for each row describing which entries of this row are
+   * nonzero. Data is stored sorted in the @p entries std::vector.  The vector
+   * per row is dynamically growing upon insertion doubling its memory each
+   * time.
+   */
+  struct Line
+  {
+  public:
+    /**
+     * Storage for the column indices of this row. This array is always kept
+     * sorted.
+     */
+    std::vector<size_type> entries;
+
+    /**
+     * Constructor.
+     */
+    Line ();
+
+    /**
+     * Add the given column number to this line.
+     */
+    void add (const size_type col_num);
+
+    /**
+     * Add the columns specified by the iterator range to this line.
+     */
+    template <typename ForwardIterator>
+    void add_entries (ForwardIterator begin,
+                      ForwardIterator end,
+                      const bool indices_are_sorted);
+
+    /**
+     * estimates memory consumption.
+     */
+    size_type memory_consumption () const;
+  };
+
+
+  /**
+   * Actual data: store for each row the set of nonzero entries.
+   */
+  std::vector<Line> lines;
+
+  // make the accessor class a friend
+  friend class DynamicSparsityPatternIterators::Accessor;
+};
+
+/*@}*/
+/*---------------------- Inline functions -----------------------------------*/
+
+
+namespace DynamicSparsityPatternIterators
+{
+  inline
+  Accessor::
+  Accessor (const DynamicSparsityPattern *sparsity_pattern,
+            const size_type    row,
+            const unsigned int index_within_row)
+    :
+    sparsity_pattern(sparsity_pattern),
+    current_row (row),
+    current_entry(((sparsity_pattern->rowset.size()==0)
+                   ?
+                   sparsity_pattern->lines[current_row].entries.begin()
+                   :
+                   sparsity_pattern->lines[sparsity_pattern->rowset.index_within_set(current_row)].entries.begin())
+                  +
+                  index_within_row),
+    end_of_row((sparsity_pattern->rowset.size()==0)
+               ?
+               sparsity_pattern->lines[current_row].entries.end()
+               :
+               sparsity_pattern->lines[sparsity_pattern->rowset.index_within_set(current_row)].entries.end())
+  {
+    AssertIndexRange(current_row, sparsity_pattern->n_rows());
+    Assert ((sparsity_pattern->rowset.size()==0)
+            ||
+            sparsity_pattern->rowset.is_element(current_row),
+            ExcMessage ("You can't create an iterator into a "
+                        "DynamicSparsityPattern's row that is not "
+                        "actually stored by that sparsity pattern "
+                        "based on the IndexSet argument to it."));
+    AssertIndexRange(index_within_row,
+                     ((sparsity_pattern->rowset.size()==0)
+                      ?
+                      sparsity_pattern->lines[current_row].entries.size()
+                      :
+                      sparsity_pattern->lines[sparsity_pattern->rowset.index_within_set(current_row)].entries.size()));
+  }
+
+
+  inline
+  Accessor::
+  Accessor (const DynamicSparsityPattern *sparsity_pattern)
+    :
+    sparsity_pattern(sparsity_pattern),
+    current_row(numbers::invalid_size_type),
+    current_entry(),
+    end_of_row()
+  {}
+
+
+
+  inline
+  size_type
+  Accessor::row() const
+  {
+    Assert (current_row < sparsity_pattern->n_rows(),
+            ExcInternalError());
+
+    return current_row;
+  }
+
+
+  inline
+  size_type
+  Accessor::column() const
+  {
+    Assert (current_row < sparsity_pattern->n_rows(),
+            ExcInternalError());
+
+    return *current_entry;
+  }
+
+
+  inline
+  size_type
+  Accessor::index() const
+  {
+    Assert (current_row < sparsity_pattern->n_rows(),
+            ExcInternalError());
+
+    return (current_entry -
+            ((sparsity_pattern->rowset.size()==0)
+             ?
+             sparsity_pattern->lines[current_row].entries.begin()
+             :
+             sparsity_pattern->lines[sparsity_pattern->rowset.index_within_set(current_row)].entries.begin()));
+  }
+
+
+
+
+  inline
+  bool
+  Accessor::operator == (const Accessor &other) const
+  {
+    // compare the sparsity pattern the iterator points into, the
+    // current row, and the location within this row. ignore the
+    // latter if the row is past-the-end because in that case the
+    // current_entry field may not point to a deterministic location
+    return (sparsity_pattern == other.sparsity_pattern &&
+            current_row == other.current_row &&
+            ((current_row == numbers::invalid_size_type)
+             || (current_entry == other.current_entry)));
+  }
+
+
+
+  inline
+  bool
+  Accessor::operator < (const Accessor &other) const
+  {
+    Assert (sparsity_pattern == other.sparsity_pattern,
+            ExcInternalError());
+
+    // if *this is past-the-end, then it is less than no one
+    if (current_row == numbers::invalid_size_type)
+      return (false);
+    // now *this should be an valid value
+    Assert (current_row < sparsity_pattern->n_rows(),
+            ExcInternalError());
+
+    // if other is past-the-end
+    if (other.current_row == numbers::invalid_size_type)
+      return (true);
+    // now other should be an valid value
+    Assert (other.current_row < sparsity_pattern->n_rows(),
+            ExcInternalError());
+
+    // both iterators are not one-past-the-end
+    return ((current_row < other.current_row) ||
+            ((current_row == other.current_row) &&
+             (current_entry < other.current_entry)));
+  }
+
+
+  inline
+  void
+  Accessor::advance ()
+  {
+    Assert (current_row < sparsity_pattern->n_rows(),
+            ExcInternalError());
+
+    // move to the next element in this row
+    ++current_entry;
+
+    // if this moves us beyond the end of the row, go to the next row
+    // if possible, or set the iterator to an invalid state if not.
+    //
+    // going to the next row is a bit complicated because we may have
+    // to skip over empty rows, and because we also have to avoid rows
+    // that aren't listed in a possibly passed IndexSet argument of
+    // the sparsity pattern. consequently, rather than trying to
+    // duplicate code here, just call the begin() function of the
+    // sparsity pattern itself
+    if (current_entry == end_of_row)
+      {
+        if (current_row+1 < sparsity_pattern->n_rows())
+          *this = *sparsity_pattern->begin(current_row+1);
+        else
+          *this = Accessor(sparsity_pattern);  // invalid object
+      }
+  }
+
+
+
+  inline
+  Iterator::Iterator (const DynamicSparsityPattern *sparsity_pattern,
+                      const size_type    row,
+                      const unsigned int index_within_row)
+    :
+    accessor(sparsity_pattern, row, index_within_row)
+  {}
+
+
+
+  inline
+  Iterator::Iterator (const DynamicSparsityPattern *sparsity_pattern)
+    :
+    accessor(sparsity_pattern)
+  {}
+
+
+
+  inline
+  Iterator &
+  Iterator::operator++ ()
+  {
+    accessor.advance ();
+    return *this;
+  }
+
+
+
+  inline
+  Iterator
+  Iterator::operator++ (int)
+  {
+    const Iterator iter = *this;
+    accessor.advance ();
+    return iter;
+  }
+
+
+
+  inline
+  const Accessor &
+  Iterator::operator* () const
+  {
+    return accessor;
+  }
+
+
+
+  inline
+  const Accessor *
+  Iterator::operator-> () const
+  {
+    return &accessor;
+  }
+
+
+  inline
+  bool
+  Iterator::operator == (const Iterator &other) const
+  {
+    return (accessor == other.accessor);
+  }
+
+
+
+  inline
+  bool
+  Iterator::operator != (const Iterator &other) const
+  {
+    return ! (*this == other);
+  }
+
+
+  inline
+  bool
+  Iterator::operator < (const Iterator &other) const
+  {
+    return accessor < other.accessor;
+  }
+
+
+  inline
+  int
+  Iterator::operator - (const Iterator &other) const
+  {
+    (void)other;
+    Assert (accessor.sparsity_pattern == other.accessor.sparsity_pattern,
+            ExcInternalError());
+    Assert (false, ExcNotImplemented());
+
+    return 0;
+  }
+}
+
+
+inline
+void
+DynamicSparsityPattern::Line::add (const size_type j)
+{
+  // first check the last element (or if line is still empty)
+  if ( (entries.size()==0) || ( entries.back() < j) )
+    {
+      entries.push_back(j);
+      return;
+    }
+
+  // do a binary search to find the place where to insert:
+  std::vector<size_type>::iterator
+  it = Utilities::lower_bound(entries.begin(),
+                              entries.end(),
+                              j);
+
+  // If this entry is a duplicate, exit immediately
+  if (*it == j)
+    return;
+
+  // Insert at the right place in the vector. Vector grows automatically to
+  // fit elements. Always doubles its size.
+  entries.insert(it, j);
+}
+
+
+
+inline
+DynamicSparsityPattern::size_type
+DynamicSparsityPattern::n_rows () const
+{
+  return rows;
+}
+
+
+
+inline
+types::global_dof_index
+DynamicSparsityPattern::n_cols () const
+{
+  return cols;
+}
+
+
+
+inline
+void
+DynamicSparsityPattern::add (const size_type i,
+                             const size_type j)
+{
+  Assert (i<rows, ExcIndexRangeType<size_type>(i, 0, rows));
+  Assert (j<cols, ExcIndexRangeType<size_type>(j, 0, cols));
+
+  if (rowset.size() > 0 && !rowset.is_element(i))
+    return;
+
+  const size_type rowindex =
+    rowset.size()==0 ? i : rowset.index_within_set(i);
+  lines[rowindex].add (j);
+}
+
+
+
+template <typename ForwardIterator>
+inline
+void
+DynamicSparsityPattern::add_entries (const size_type row,
+                                     ForwardIterator begin,
+                                     ForwardIterator end,
+                                     const bool      indices_are_sorted)
+{
+  Assert (row < rows, ExcIndexRangeType<size_type> (row, 0, rows));
+
+  if (rowset.size() > 0 && !rowset.is_element(row))
+    return;
+
+  const size_type rowindex =
+    rowset.size()==0 ? row : rowset.index_within_set(row);
+  lines[rowindex].add_entries (begin, end, indices_are_sorted);
+}
+
+
+
+inline
+DynamicSparsityPattern::Line::Line ()
+{}
+
+
+
+inline
+types::global_dof_index
+DynamicSparsityPattern::row_length (const size_type row) const
+{
+  Assert (row < n_rows(), ExcIndexRangeType<size_type> (row, 0, n_rows()));
+  if (rowset.size() > 0 && !rowset.is_element(row))
+    return 0;
+
+  const size_type rowindex =
+    rowset.size()==0 ? row : rowset.index_within_set(row);
+  return lines[rowindex].entries.size();
+}
+
+
+
+inline
+types::global_dof_index
+DynamicSparsityPattern::column_number (const size_type row,
+                                       const size_type index) const
+{
+  Assert (row < n_rows(), ExcIndexRangeType<size_type> (row, 0, n_rows()));
+  Assert( rowset.size() == 0 || rowset.is_element(row), ExcInternalError());
+
+  const size_type local_row = rowset.size() ? rowset.index_within_set(row) : row;
+  Assert (index < lines[local_row].entries.size(),
+          ExcIndexRangeType<size_type> (index, 0, lines[local_row].entries.size()));
+  return lines[local_row].entries[index];
+}
+
+
+
+inline
+DynamicSparsityPattern::iterator
+DynamicSparsityPattern::begin () const
+{
+  return begin(0);
+}
+
+
+inline
+DynamicSparsityPattern::iterator
+DynamicSparsityPattern::end () const
+{
+  return iterator(this);
+}
+
+
+
+inline
+DynamicSparsityPattern::iterator
+DynamicSparsityPattern::begin (const size_type r) const
+{
+  Assert (r<n_rows(), ExcIndexRangeType<size_type>(r,0,n_rows()));
+
+  // find the first row starting at r that has entries and return the
+  // begin iterator to it. also skip rows for which we do not have
+  // store anything based on the IndexSet given to the sparsity
+  // pattern
+  //
+  // note: row_length(row) returns zero if the row is not locally stored
+  //
+  // TODO: this is way too slow when used in parallel, so do not use it on
+  // non-owned rows
+  size_type row = r;
+  while ((row<n_rows())
+         &&
+         (row_length(row)==0))
+    ++row;
+
+  if (row == n_rows())
+    return iterator(this);
+  else
+    return iterator(this, row, 0);
+}
+
+
+
+inline
+DynamicSparsityPattern::iterator
+DynamicSparsityPattern::end (const size_type r) const
+{
+  Assert (r<n_rows(), ExcIndexRangeType<size_type>(r,0,n_rows()));
+
+  // find the first row after r that has entries and return the begin
+  // iterator to it. also skip rows for which we do not have
+  // store anything based on the IndexSet given to the sparsity
+  // pattern
+  //
+  // note: row_length(row) returns zero if the row is not locally stored
+  unsigned int row = r+1;
+  while ((row<n_rows())
+         &&
+         (row_length(row)==0))
+    ++row;
+
+  if (row == n_rows())
+    return iterator(this);
+  else
+    return iterator(this, row, 0);
+}
+
+
+
+inline
+const IndexSet &
+DynamicSparsityPattern::row_index_set () const
+{
+  return rowset;
+}
+
+
+
+inline
+bool
+DynamicSparsityPattern::stores_only_added_elements ()
+{
+  return true;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/eigen.h b/include/deal.II/lac/eigen.h
new file mode 100644
index 0000000..9d874d2
--- /dev/null
+++ b/include/deal.II/lac/eigen.h
@@ -0,0 +1,433 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__eigen_h
+#define dealii__eigen_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/shifted_matrix.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/solver_minres.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/precondition.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Power method (von Mises) for eigenvalue computations.
+ *
+ * This method determines the largest eigenvalue of a matrix by applying
+ * increasing powers of this matrix to a vector. If there is an eigenvalue $l$
+ * with dominant absolute value, the iteration vectors will become aligned to
+ * its eigenspace and $Ax = lx$.
+ *
+ * A shift parameter allows to shift the spectrum, so it is possible to
+ * compute the smallest eigenvalue, too.
+ *
+ * Convergence of this method is known to be slow.
+ *
+ * @author Guido Kanschat, 2000
+ */
+template <typename VectorType = Vector<double> >
+class EigenPower : private Solver<VectorType>
+{
+public:
+  /**
+   * Declare type of container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Standardized data struct to pipe additional data to the solver.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Shift parameter. This parameter allows to shift the spectrum to compute
+     * a different eigenvalue.
+     */
+    double shift;
+    /**
+     * Constructor. Set the shift parameter.
+     */
+    AdditionalData (const double shift = 0.)
+      :
+      shift(shift)
+    {}
+
+  };
+
+  /**
+   * Constructor.
+   */
+  EigenPower (SolverControl            &cn,
+              VectorMemory<VectorType> &mem,
+              const AdditionalData     &data=AdditionalData());
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~EigenPower ();
+
+  /**
+   * Power method. @p x is the (not necessarily normalized, but nonzero) start
+   * vector for the power method. After the iteration, @p value is the
+   * approximated eigenvalue and @p x is the corresponding eigenvector,
+   * normalized with respect to the l2-norm.
+   */
+  template <typename MatrixType>
+  void
+  solve (double           &value,
+         const MatrixType &A,
+         VectorType       &x);
+
+protected:
+  /**
+   * Shift parameter.
+   */
+  AdditionalData additional_data;
+};
+
+/**
+ * Inverse iteration (Wieland) for eigenvalue computations.
+ *
+ * This class implements an adaptive version of the inverse iteration by
+ * Wieland.
+ *
+ * There are two choices for the stopping criterion: by default, the norm of
+ * the residual $A x - l x$ is computed. Since this might not converge to zero
+ * for non-symmetric matrices with non-trivial Jordan blocks, it can be
+ * replaced by checking the difference of successive eigenvalues. Use
+ * AdditionalData::use_residual for switching this option.
+ *
+ * Usually, the initial guess entering this method is updated after each step,
+ * replacing it with the new approximation of the eigenvalue. Using a
+ * parameter AdditionalData::relaxation between 0 and 1, this update can be
+ * damped. With relaxation parameter 0, no update is performed. This damping
+ * allows for slower adaption of the shift value to make sure that the method
+ * converges to the eigenvalue closest to the initial guess. This can be aided
+ * by the parameter AdditionalData::start_adaption, which indicates the first
+ * iteration step in which the shift value should be adapted.
+ *
+ * @author Guido Kanschat, 2000, 2003
+ */
+template <typename VectorType = Vector<double> >
+class EigenInverse : private Solver<VectorType>
+{
+public:
+  /**
+   * Declare type of container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Standardized data struct to pipe additional data to the solver.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Damping of the updated shift value.
+     */
+    double relaxation;
+
+    /**
+     * Start step of adaptive shift parameter.
+     */
+    unsigned int start_adaption;
+    /**
+     * Flag for the stopping criterion.
+     */
+    bool use_residual;
+    /**
+     * Constructor.
+     */
+    AdditionalData (double relaxation = 1.,
+                    unsigned int start_adaption = 6,
+                    bool use_residual = true)
+      :
+      relaxation(relaxation),
+      start_adaption(start_adaption),
+      use_residual(use_residual)
+    {}
+
+  };
+
+  /**
+   * Constructor.
+   */
+  EigenInverse (SolverControl            &cn,
+                VectorMemory<VectorType> &mem,
+                const AdditionalData     &data=AdditionalData());
+
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~EigenInverse ();
+
+  /**
+   * Inverse method. @p value is the start guess for the eigenvalue and @p x
+   * is the (not necessarily normalized, but nonzero) start vector for the
+   * power method. After the iteration, @p value is the approximated
+   * eigenvalue and @p x is the corresponding eigenvector, normalized with
+   * respect to the l2-norm.
+   */
+  template <typename MatrixType>
+  void
+  solve (double           &value,
+         const MatrixType &A,
+         VectorType       &x);
+
+protected:
+  /**
+   * Flags for execution.
+   */
+  AdditionalData additional_data;
+};
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+
+template <class VectorType>
+EigenPower<VectorType>::EigenPower (SolverControl            &cn,
+                                    VectorMemory<VectorType> &mem,
+                                    const AdditionalData     &data)
+  :
+  Solver<VectorType>(cn, mem),
+  additional_data(data)
+{}
+
+
+
+template <class VectorType>
+EigenPower<VectorType>::~EigenPower ()
+{}
+
+
+
+template <class VectorType>
+template <typename MatrixType>
+void
+EigenPower<VectorType>::solve (double           &value,
+                               const MatrixType &A,
+                               VectorType       &x)
+{
+  SolverControl::State conv=SolverControl::iterate;
+
+  deallog.push("Power method");
+
+  VectorType *Vy = this->memory.alloc ();
+  VectorType &y = *Vy;
+  y.reinit (x);
+  VectorType *Vr = this->memory.alloc ();
+  VectorType &r = *Vr;
+  r.reinit (x);
+
+  double length = x.l2_norm ();
+  double old_length = 0.;
+  x *= 1./length;
+
+  A.vmult (y,x);
+
+  // Main loop
+  int iter=0;
+  for (; conv==SolverControl::iterate; iter++)
+    {
+      y.add(additional_data.shift, x);
+
+      // Compute absolute value of eigenvalue
+      old_length = length;
+      length = y.l2_norm ();
+
+      // do a little trick to compute the sign
+      // with not too much effect of round-off errors.
+      double entry = 0.;
+      size_type i = 0;
+      double thresh = length/x.size();
+      do
+        {
+          Assert (i<x.size(), ExcInternalError());
+          entry = y (i++);
+        }
+      while (std::fabs(entry) < thresh);
+
+      --i;
+
+      // Compute unshifted eigenvalue
+      value = (entry * x (i) < 0.) ? -length : length;
+      value -= additional_data.shift;
+
+      // Update normalized eigenvector
+      x.equ (1/length, y);
+
+      // Compute residual
+      A.vmult (y,x);
+
+      // Check the change of the eigenvalue
+      // Brrr, this is not really a good criterion
+      conv = this->iteration_status (iter, std::fabs(1./length-1./old_length), x);
+    }
+
+  this->memory.free(Vy);
+  this->memory.free(Vr);
+
+  deallog.pop();
+
+  // in case of failure: throw exception
+  AssertThrow(conv == SolverControl::success, SolverControl::NoConvergence (iter,
+              std::fabs(1./length-1./old_length)));
+
+  // otherwise exit as normal
+}
+
+//---------------------------------------------------------------------------
+
+template <class VectorType>
+EigenInverse<VectorType>::EigenInverse (SolverControl            &cn,
+                                        VectorMemory<VectorType> &mem,
+                                        const AdditionalData     &data)
+  :
+  Solver<VectorType>(cn, mem),
+  additional_data(data)
+{}
+
+
+
+template <class VectorType>
+EigenInverse<VectorType>::~EigenInverse ()
+{}
+
+
+
+template <class VectorType>
+template <typename MatrixType>
+void
+EigenInverse<VectorType>::solve (double           &value,
+                                 const MatrixType &A,
+                                 VectorType       &x)
+{
+  deallog.push("Wielandt");
+
+  SolverControl::State conv=SolverControl::iterate;
+
+  // Prepare matrix for solver
+  ShiftedMatrix <MatrixType> A_s(A, -value);
+
+  // Define solver
+  ReductionControl inner_control (5000, 1.e-16, 1.e-5, false, false);
+  PreconditionIdentity prec;
+  SolverGMRES<VectorType>
+  solver(inner_control, this->memory);
+
+  // Next step for recomputing the shift
+  unsigned int goal = additional_data.start_adaption;
+
+  // Auxiliary vector
+  VectorType *Vy = this->memory.alloc ();
+  VectorType &y = *Vy;
+  y.reinit (x);
+  VectorType *Vr = this->memory.alloc ();
+  VectorType &r = *Vr;
+  r.reinit (x);
+
+  double length = x.l2_norm ();
+  double old_value = value;
+
+  x *= 1./length;
+
+  // Main loop
+  double res = -std::numeric_limits<double>::max();
+  size_type iter=0;
+  for (; conv==SolverControl::iterate; iter++)
+    {
+      solver.solve (A_s, y, x, prec);
+
+      // Compute absolute value of eigenvalue
+      length = y.l2_norm ();
+
+      // do a little trick to compute the sign
+      // with not too much effect of round-off errors.
+      double entry = 0.;
+      size_type i = 0;
+      double thresh = length/x.size();
+      do
+        {
+          Assert (i<x.size(), ExcInternalError());
+          entry = y (i++);
+        }
+      while (std::fabs(entry) < thresh);
+
+      --i;
+
+      // Compute unshifted eigenvalue
+      value = (entry * x (i) < 0.) ? -length : length;
+      value = 1./value;
+      value -= A_s.shift ();
+
+      if (iter==goal)
+        {
+          const double new_shift = - additional_data.relaxation * value
+                                   + (1.-additional_data.relaxation) * A_s.shift();
+          A_s.shift(new_shift);
+          ++goal;
+        }
+
+      // Update normalized eigenvector
+      x.equ (1./length, y);
+      // Compute residual
+      if (additional_data.use_residual)
+        {
+          y.equ (value, x);
+          A.vmult(r,x);
+          r.sadd(-1., value, x);
+          res = r.l2_norm();
+          // Check the residual
+          conv = this->iteration_status (iter, res, x);
+        }
+      else
+        {
+          res = std::fabs(1./value-1./old_value);
+          conv = this->iteration_status (iter, res, x);
+        }
+      old_value = value;
+    }
+
+  this->memory.free(Vy);
+  this->memory.free(Vr);
+
+  deallog.pop();
+
+  // in case of failure: throw
+  // exception
+  AssertThrow (conv == SolverControl::success,
+               SolverControl::NoConvergence (iter,
+                                             res));
+  // otherwise exit as normal
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/exceptions.h b/include/deal.II/lac/exceptions.h
new file mode 100644
index 0000000..e40044b
--- /dev/null
+++ b/include/deal.II/lac/exceptions.h
@@ -0,0 +1,72 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__lac_exceptions_h
+#define dealii__lac_exceptions_h
+
+#include <deal.II/base/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace LACExceptions
+{
+  /**
+   * @addtogroup Exceptions
+   */
+  //@{
+
+  /**
+   * This function only works for quadratic matrices.
+   */
+  DeclException0 (ExcNotQuadratic);
+
+  /**
+   * The operation cannot be finished since the matrix is singular.
+   */
+  DeclException0 (ExcSingular);
+
+  /**
+   * Block indices of two block objects are different.
+   */
+  DeclException0 (ExcDifferentBlockIndices);
+
+  /**
+   * An error of a PETSc function was encountered. Check the PETSc
+   * documentation for details.
+   */
+  DeclException1 (ExcPETScError,
+                  int,
+                  << "An error with error number " << arg1
+                  << " occurred while calling a PETSc function");
+
+  /**
+   * An error of a Trilinos function was encountered. Check the Trilinos
+   * documentation for details.
+   */
+  DeclException1 (ExcTrilinosError,
+                  int,
+                  << "An error with error number " << arg1
+                  << " occurred while calling a Trilinos function");
+
+  //@}
+}
+
+
+using namespace LACExceptions;
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/filtered_matrix.h b/include/deal.II/lac/filtered_matrix.h
new file mode 100644
index 0000000..289cfb9
--- /dev/null
+++ b/include/deal.II/lac/filtered_matrix.h
@@ -0,0 +1,1019 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__filtered_matrix_h
+#define dealii__filtered_matrix_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/pointer_matrix.h>
+#include <deal.II/lac/vector_memory.h>
+#include <vector>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class Vector;
+template <class VectorType> class FilteredMatrixBlock;
+
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+
+/**
+ * This class is a wrapper for linear systems of equations with simple
+ * equality constraints fixing individual degrees of freedom to a certain
+ * value such as when using Dirichlet boundary values.
+ *
+ * In order to accomplish this, the vmult(), Tvmult(), vmult_add() and
+ * Tvmult_add functions modify the same function of the original matrix such
+ * as if all constrained entries of the source vector were zero. Additionally,
+ * all constrained entries of the destination vector are set to zero.
+ *
+ * <h3>Usage</h3>
+ *
+ * Usage is simple: create an object of this type, point it to a matrix that
+ * shall be used for $A$ above (either through the constructor, the copy
+ * constructor, or the set_referenced_matrix() function), specify the list of
+ * boundary values or other constraints (through the add_constraints()
+ * function), and then for each required solution modify the right hand side
+ * vector (through apply_constraints()) and use this object as matrix object
+ * in a linear solver. As linear solvers should only use vmult() and
+ * residual() functions of a matrix class, this class should be as good a
+ * matrix as any other for that purpose.
+ *
+ * Furthermore, also the precondition_Jacobi() function is provided (since the
+ * computation of diagonal elements of the filtered matrix $A_X$ is simple),
+ * so you can use this as a preconditioner. Some other functions useful for
+ * matrices are also available.
+ *
+ * A typical code snippet showing the above steps is as follows:
+ * @code
+ *   ... // set up sparse matrix A and right hand side b somehow
+ *
+ *                     // initialize filtered matrix with
+ *                     // matrix and boundary value constraints
+ *   FilteredMatrix<Vector<double> > filtered_A (A);
+ *   filtered_A.add_constraints (boundary_values);
+ *
+ *                     // set up a linear solver
+ *   SolverControl control (1000, 1.e-10, false, false);
+ *   GrowingVectorMemory<Vector<double> > mem;
+ *   SolverCG<Vector<double> > solver (control, mem);
+ *
+ *                     // set up a preconditioner object
+ *   PreconditionJacobi<SparseMatrix<double> > prec;
+ *   prec.initialize (A, 1.2);
+ *   FilteredMatrix<Vector<double> > filtered_prec (prec);
+ *   filtered_prec.add_constraints (boundary_values);
+ *
+ *                     // compute modification of right hand side
+ *   filtered_A.apply_constraints (b, true);
+ *
+ *                     // solve for solution vector x
+ *   solver.solve (filtered_A, x, b, filtered_prec);
+ * @endcode
+ *
+ *
+ * <h3>Connection to other classes</h3>
+ *
+ * The function MatrixTools::apply_boundary_values() does exactly the same
+ * that this class does, except for the fact that that function actually
+ * modifies the matrix. Consequently, it is only possible to solve with a
+ * matrix to which MatrixTools::apply_boundary_values() was applied for one
+ * right hand side and one set of boundary values since the modification of
+ * the right hand side depends on the original matrix.
+ *
+ * While this is a feasible method in cases where only one solution of the
+ * linear system is required, for example in solving linear stationary
+ * systems, one would often like to have the ability to solve multiple times
+ * with the same matrix in nonlinear problems (where one often does not want
+ * to update the Hessian between Newton steps, despite having different right
+ * hand sides in subsequent steps) or time dependent problems, without having
+ * to re-assemble the matrix or copy it to temporary matrices with which one
+ * then can work. For these cases, this class is meant.
+ *
+ *
+ * <h3>Some background</h3> Mathematically speaking, it is used to represent a
+ * system of linear equations $Ax=b$ with the constraint that $B_D x = g_D$,
+ * where $B_D$ is a rectangular matrix with exactly one $1$ in each row, and
+ * these $1$s in those columns representing constrained degrees of freedom
+ * (e.g. for Dirichlet boundary nodes, thus the index $D$) and zeroes for all
+ * other diagonal entries, and $g_D$ having the requested nodal values for
+ * these constrained nodes. Thus, the underdetermined equation $B_D x = g_D$
+ * fixes only the constrained nodes and does not impose any condition on the
+ * others. We note that $B_D B_D^T = 1_D$, where $1_D$ is the identity matrix
+ * with dimension as large as the number of constrained degrees of freedom.
+ * Likewise, $B_D^T B_D$ is the diagonal matrix with diagonal entries $0$ or
+ * $1$ that, when applied to a vector, leaves all constrained nodes untouched
+ * and deletes all unconstrained ones.
+ *
+ * For solving such a system of equations, we first write down the Lagrangian
+ * $L=1/2 x^T A x - x^T b + l^T B_D x$, where $l$ is a Lagrange multiplier for
+ * the constraints. The stationarity condition then reads
+ * @code
+ * [ A   B_D^T ] [x] = [b  ]
+ * [ B_D 0     ] [l] = [g_D]
+ * @endcode
+ *
+ * The first equation then reads $B_D^T l = b-Ax$. On the other hand, if we
+ * left-multiply the first equation by $B_D^T B_D$, we obtain $B_D^T B_D A x +
+ * B_D^T l = B_D^T B_D b$ after equating $B_D B_D^T$ to the identity matrix.
+ * Inserting the previous equality, this yields $(A - B_D^T B_D A) x = (1 -
+ * B_D^T B_D)b$. Since $x=(1 - B_D^T B_D) x + B_D^T B_D x = (1 - B_D^T B_D) x
+ * + B_D^T g_D$, we can restate the linear system: $A_D x = (1 - B_D^T B_D)b -
+ * (1 - B_D^T B_D) A B^T g_D$, where $A_D = (1 - B_D^T B_D) A (1 - B_D^T B_D)$
+ * is the matrix where all rows and columns corresponding to constrained nodes
+ * have been deleted.
+ *
+ * The last system of equation only defines the value of the unconstrained
+ * nodes, while the constrained ones are determined by the equation $B_D x =
+ * g_D$. We can combine these two linear systems by using the zeroed out rows
+ * of $A_D$: if we set the diagonal to $1$ and the corresponding zeroed out
+ * element of the right hand side to that of $g_D$, then this fixes the
+ * constrained elements as well. We can write this as follows: $A_X x = (1 -
+ * B_D^T B_D)b - (1 - B_D^T B_D) A B^T g_D + B_D^T g_D$, where $A_X = A_D +
+ * B_D^T B_D$. Note that the two parts of the latter matrix operate on
+ * disjoint subspaces (the first on the unconstrained nodes, the latter on the
+ * constrained ones).
+ *
+ * In iterative solvers, it is not actually necessary to compute $A_X$
+ * explicitly, since only matrix-vector operations need to be performed. This
+ * can be done in a three-step procedure that first clears all elements in the
+ * incoming vector that belong to constrained nodes, then performs the product
+ * with the matrix $A$, then clears again. This class is a wrapper to this
+ * procedure, it takes a pointer to a matrix with which to perform matrix-
+ * vector products, and does the cleaning of constrained elements itself. This
+ * class therefore implements an overloaded @p vmult function that does the
+ * matrix-vector product, as well as @p Tvmult for transpose matrix-vector
+ * multiplication and @p residual for residual computation, and can thus be
+ * used as a matrix replacement in linear solvers.
+ *
+ * It also has the ability to generate the modification of the right hand
+ * side, through the apply_constraints() function.
+ *
+ *
+ *
+ * <h3>Template arguments</h3>
+ *
+ * This class takes as template arguments a matrix and a vector class. The
+ * former must provide @p vmult, @p vmult_add,  @p Tvmult, and @p residual
+ * member function that operate on the vector type (the second template
+ * argument). The latter template parameter must provide access to individual
+ * elements through <tt>operator()</tt>, assignment through
+ * <tt>operator=</tt>.
+ *
+ *
+ * <h3>Thread-safety</h3>
+ *
+ * The functions that operate as a matrix and do not change the internal state
+ * of this object are synchronised and thus threadsafe. Consequently, you do
+ * not need to serialize calls to @p vmult or @p residual .
+ *
+ * @author Wolfgang Bangerth 2001, Luca Heltai 2006, Guido Kanschat 2007, 2008
+ */
+template <typename VectorType>
+class FilteredMatrix : public Subscriptor
+{
+public:
+  class const_iterator;
+
+  /**
+   * Declare the type of container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Accessor class for iterators
+   */
+  class Accessor
+  {
+    /**
+     * Constructor. Since we use accessors only for read access, a const
+     * matrix pointer is sufficient.
+     */
+    Accessor (const FilteredMatrix<VectorType> *matrix,
+              const size_type                  index);
+
+  public:
+    /**
+     * Row number of the element represented by this object.
+     */
+    size_type row() const;
+
+    /**
+     * Column number of the element represented by this object.
+     */
+    size_type column() const;
+
+    /**
+     * Value of the right hand side for this row.
+     */
+    double value() const;
+
+  private:
+    /**
+     * Advance to next entry
+     */
+    void advance ();
+
+    /**
+     * The matrix accessed.
+     */
+    const FilteredMatrix<VectorType> *matrix;
+
+    /**
+     * Current row number.
+     */
+    size_type index;
+    /*
+     * Make enclosing class a
+     * friend.
+     */
+    friend class const_iterator;
+  };
+
+  /**
+   * Standard-conforming iterator.
+   */
+  class const_iterator
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    const_iterator(const FilteredMatrix<VectorType> *matrix,
+                   const size_type                  index);
+
+    /**
+     * Prefix increment.
+     */
+    const_iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    const_iterator &operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const const_iterator &) const;
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const const_iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     */
+    bool operator < (const const_iterator &) const;
+
+    /**
+     * Comparison operator. Compares just the other way around than the
+     * operator above.
+     */
+    bool operator > (const const_iterator &) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor accessor;
+  };
+
+  /**
+   * Typedef defining a type that represents a pair of degree of freedom index
+   * and the value it shall have.
+   */
+  typedef std::pair<size_type, double> IndexValuePair;
+
+  /**
+   * @name Constructors and initialization
+   */
+//@{
+  /**
+   * Default constructor. You will have to set the matrix to be used later
+   * using initialize().
+   */
+  FilteredMatrix ();
+
+  /**
+   * Copy constructor. Use the matrix and the constraints set in the given
+   * object for the present one as well.
+   */
+  FilteredMatrix (const FilteredMatrix &fm);
+
+  /**
+   * Constructor. Use the given matrix for future operations.
+   *
+   * @arg @p m: The matrix being used in multiplications.
+   *
+   * @arg @p expect_constrained_source: See documentation of
+   * #expect_constrained_source.
+   */
+  template <typename MatrixType>
+  FilteredMatrix (const MatrixType &matrix,
+                  bool              expect_constrained_source = false);
+
+  /**
+   * Copy operator. Take over matrix and constraints from the other object.
+   */
+  FilteredMatrix &operator = (const FilteredMatrix &fm);
+
+  /**
+   * Set the matrix to be used further on. You will probably also want to call
+   * the clear_constraints() function if constraints were previously added.
+   *
+   * @arg @p m: The matrix being used in multiplications.
+   *
+   * @arg @p expect_constrained_source: See documentation of
+   * #expect_constrained_source.
+   */
+  template <typename MatrixType>
+  void initialize (const MatrixType &m,
+                   bool              expect_constrained_source = false);
+
+  /**
+   * Delete all constraints and the matrix pointer.
+   */
+  void clear ();
+//@}
+  /**
+   * @name Managing constraints
+   */
+//@{
+  /**
+   * Add the constraint that the value with index <tt>i</tt> should have the
+   * value <tt>v</tt>.
+   */
+  void add_constraint (const size_type i, const double v);
+
+  /**
+   * Add a list of constraints to the ones already managed by this object. The
+   * actual data type of this list must be so that dereferenced iterators are
+   * pairs of indices and the corresponding values to be enforced on the
+   * respective solution vector's entry. Thus, the data type might be, for
+   * example, a @p std::list or @p std::vector of IndexValuePair objects, but
+   * also a <tt>std::map<unsigned, double></tt>.
+   *
+   * The second component of these pairs will only be used in
+   * apply_constraints(). The first is used to set values to zero in matrix
+   * vector multiplications.
+   *
+   * It is an error if the argument contains an entry for a degree of freedom
+   * that has already been constrained previously.
+   */
+  template <class ConstraintList>
+  void add_constraints (const ConstraintList &new_constraints);
+
+  /**
+   * Delete the list of constraints presently in use.
+   */
+  void clear_constraints ();
+//@}
+  /**
+   * Vector operations
+   */
+//@{
+  /**
+   * Apply the constraints to a right hand side vector. This needs to be done
+   * before starting to solve with the filtered matrix. If the matrix is
+   * symmetric (i.e. the matrix itself, not only its sparsity pattern), set
+   * the second parameter to @p true to use a faster algorithm. Note: This
+   * method is deprecated as matrix_is_symmetric parameter is no longer used.
+   */
+  void apply_constraints (VectorType &v,
+                          const bool matrix_is_symmetric) const DEAL_II_DEPRECATED;
+  /**
+   * Apply the constraints to a right hand side vector. This needs to be done
+   * before starting to solve with the filtered matrix.
+   */
+  void apply_constraints (VectorType &v) const;
+
+  /**
+   * Matrix-vector multiplication: this operation performs pre_filter(),
+   * multiplication with the stored matrix and post_filter() in that order.
+   */
+  void vmult (VectorType       &dst,
+              const VectorType &src) const;
+
+  /**
+   * Matrix-vector multiplication: this operation performs pre_filter(),
+   * transposed multiplication with the stored matrix and post_filter() in
+   * that order.
+   */
+  void Tvmult (VectorType       &dst,
+               const VectorType &src) const;
+
+  /**
+   * Adding matrix-vector multiplication.
+   *
+   * @note The result vector of this multiplication will have the constraint
+   * entries set to zero, independent of the previous value of <tt>dst</tt>.
+   * We excpect that in most cases this is the required behavior.
+   */
+  void vmult_add (VectorType       &dst,
+                  const VectorType &src) const;
+
+  /**
+   * Adding transpose matrix-vector multiplication:
+   *
+   * @note The result vector of this multiplication will have the constraint
+   * entries set to zero, independent of the previous value of <tt>dst</tt>.
+   * We excpect that in most cases this is the required behavior.
+   */
+  void Tvmult_add (VectorType       &dst,
+                   const VectorType &src) const;
+//@}
+
+  /**
+   * @name Iterators
+   */
+//@{
+  /**
+   * Iterator to the first constraint.
+   */
+  const_iterator begin () const;
+  /**
+   * Final iterator.
+   */
+  const_iterator end () const;
+//@}
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object. Since we are not the owner of the matrix referenced, its memory
+   * consumption is not included.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Determine, whether multiplications can expect that the source vector has
+   * all constrained entries set to zero.
+   *
+   * If so, the auxiliary vector can be avoided and memory as well as time can
+   * be saved.
+   *
+   * We expect this for instance in Newton's method, where the residual
+   * already should be zero on constrained nodes. This is, because there is no
+   * test function in these nodes.
+   */
+  bool expect_constrained_source;
+
+  /**
+   * Declare an abbreviation for an iterator into the array constraint pairs,
+   * since that data type is so often used and is rather awkward to write out
+   * each time.
+   */
+  typedef typename std::vector<IndexValuePair>::const_iterator const_index_value_iterator;
+
+  /**
+   * Helper class used to sort pairs of indices and values. Only the index is
+   * considered as sort key.
+   */
+  struct PairComparison
+  {
+    /**
+     * Function comparing the pairs @p i1 and @p i2 for their keys.
+     */
+    bool operator () (const IndexValuePair &i1,
+                      const IndexValuePair &i2) const;
+  };
+
+  /**
+   * Pointer to the sparsity pattern used for this matrix.
+   */
+  std_cxx11::shared_ptr<PointerMatrixBase<VectorType> > matrix;
+
+  /**
+   * Sorted list of pairs denoting the index of the variable and the value to
+   * which it shall be fixed.
+   */
+  std::vector<IndexValuePair> constraints;
+
+  /**
+   * Do the pre-filtering step, i.e. zero out those components that belong to
+   * constrained degrees of freedom.
+   */
+  void pre_filter (VectorType &v) const;
+
+  /**
+   * Do the postfiltering step, i.e. set constrained degrees of freedom to the
+   * value of the input vector, as the matrix contains only ones on the
+   * diagonal for these degrees of freedom.
+   */
+  void post_filter (const VectorType &in,
+                    VectorType       &out) const;
+
+  friend class Accessor;
+  /**
+   * FilteredMatrixBlock accesses pre_filter() and post_filter().
+   */
+  friend class FilteredMatrixBlock<VectorType>;
+};
+
+/*@}*/
+/*---------------------- Inline functions -----------------------------------*/
+
+
+//--------------------------------Iterators--------------------------------------//
+
+template<typename VectorType>
+inline
+FilteredMatrix<VectorType>::Accessor::Accessor
+(const FilteredMatrix<VectorType> *matrix,
+ const size_type                   index)
+  :
+  matrix(matrix),
+  index(index)
+{
+  Assert (index <= matrix->constraints.size(),
+          ExcIndexRange(index, 0, matrix->constraints.size()));
+}
+
+
+
+template<typename VectorType>
+inline
+types::global_dof_index
+FilteredMatrix<VectorType>::Accessor::row() const
+{
+  return matrix->constraints[index].first;
+}
+
+
+
+template<typename VectorType>
+inline
+types::global_dof_index
+FilteredMatrix<VectorType>::Accessor::column() const
+{
+  return matrix->constraints[index].first;
+}
+
+
+
+template<typename VectorType>
+inline
+double
+FilteredMatrix<VectorType>::Accessor::value() const
+{
+  return matrix->constraints[index].second;
+}
+
+
+
+template<typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::Accessor::advance()
+{
+  Assert (index < matrix->constraints.size(), ExcIteratorPastEnd());
+  ++index;
+}
+
+
+
+
+template<typename VectorType>
+inline
+FilteredMatrix<VectorType>::const_iterator::const_iterator
+(const FilteredMatrix<VectorType> *matrix,
+ const size_type                   index)
+  :
+  accessor(matrix, index)
+{}
+
+
+
+template<typename VectorType>
+inline
+typename FilteredMatrix<VectorType>::const_iterator &
+FilteredMatrix<VectorType>::const_iterator::operator++ ()
+{
+  accessor.advance();
+  return *this;
+}
+
+
+template <typename number>
+inline
+const typename FilteredMatrix<number>::Accessor &
+FilteredMatrix<number>::const_iterator::operator* () const
+{
+  return accessor;
+}
+
+
+template <typename number>
+inline
+const typename FilteredMatrix<number>::Accessor *
+FilteredMatrix<number>::const_iterator::operator-> () const
+{
+  return &accessor;
+}
+
+
+template <typename number>
+inline
+bool
+FilteredMatrix<number>::const_iterator::
+operator == (const const_iterator &other) const
+{
+  return (accessor.index == other.accessor.index
+          && accessor.matrix == other.accessor.matrix);
+}
+
+
+template <typename number>
+inline
+bool
+FilteredMatrix<number>::const_iterator::
+operator != (const const_iterator &other) const
+{
+  return ! (*this == other);
+}
+
+
+
+//------------------------------- FilteredMatrix ---------------------------------------//
+
+template <typename number>
+inline
+typename FilteredMatrix<number>::const_iterator
+FilteredMatrix<number>::begin () const
+{
+  return const_iterator(this, 0);
+}
+
+
+template <typename number>
+inline
+typename FilteredMatrix<number>::const_iterator
+FilteredMatrix<number>::end () const
+{
+  return const_iterator(this, constraints.size());
+}
+
+
+template <typename VectorType>
+inline
+bool
+FilteredMatrix<VectorType>::PairComparison::
+operator () (const IndexValuePair &i1,
+             const IndexValuePair &i2) const
+{
+  return (i1.first < i2.first);
+}
+
+
+
+template <typename VectorType>
+template <typename MatrixType>
+inline
+void
+FilteredMatrix<VectorType>::initialize (const MatrixType &m, bool ecs)
+{
+  matrix.reset (new_pointer_matrix_base(m, VectorType()));
+
+  expect_constrained_source = ecs;
+}
+
+
+
+template <typename VectorType>
+inline
+FilteredMatrix<VectorType>::FilteredMatrix ()
+{}
+
+
+
+template <typename VectorType>
+inline
+FilteredMatrix<VectorType>::FilteredMatrix (const FilteredMatrix &fm)
+  :
+  Subscriptor(),
+  expect_constrained_source(fm.expect_constrained_source),
+  matrix(fm.matrix),
+  constraints (fm.constraints)
+{}
+
+
+
+template <typename VectorType>
+template <typename MatrixType>
+inline
+FilteredMatrix<VectorType>::
+FilteredMatrix (const MatrixType &m, bool ecs)
+{
+  initialize (m, ecs);
+}
+
+
+
+template <typename VectorType>
+inline
+FilteredMatrix<VectorType> &
+FilteredMatrix<VectorType>::operator = (const FilteredMatrix &fm)
+{
+  matrix = fm.matrix;
+  expect_constrained_source = fm.expect_constrained_source;
+  constraints = fm.constraints;
+  return *this;
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::add_constraint (const size_type index, const double value)
+{
+  // add new constraint to end
+  constraints.push_back(IndexValuePair(index, value));
+}
+
+
+
+template <typename VectorType>
+template <class ConstraintList>
+inline
+void
+FilteredMatrix<VectorType>::add_constraints (const ConstraintList &new_constraints)
+{
+  // add new constraints to end
+  const size_type old_size = constraints.size();
+  constraints.reserve (old_size + new_constraints.size());
+  constraints.insert (constraints.end(),
+                      new_constraints.begin(),
+                      new_constraints.end());
+  // then merge the two arrays to
+  // form one sorted one
+  std::inplace_merge (constraints.begin(),
+                      constraints.begin()+old_size,
+                      constraints.end(),
+                      PairComparison());
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::clear_constraints ()
+{
+  // swap vectors to release memory
+  std::vector<IndexValuePair> empty;
+  constraints.swap (empty);
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::clear ()
+{
+  clear_constraints();
+  matrix.reset();
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::apply_constraints
+(VectorType &v,
+ const bool  /* matrix_is_symmetric */) const
+{
+  apply_constraints(v);
+}
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::apply_constraints (VectorType &v) const
+{
+  GrowingVectorMemory<VectorType> mem;
+  typename VectorMemory<VectorType>::Pointer tmp_vector(mem);
+  tmp_vector->reinit(v);
+  const_index_value_iterator       i = constraints.begin();
+  const const_index_value_iterator e = constraints.end();
+  for (; i!=e; ++i)
+    {
+      AssertIsFinite(i->second);
+      (*tmp_vector)(i->first) = -i->second;
+    }
+
+  // This vmult is without bc, to get
+  // the rhs correction in a correct
+  // way.
+  matrix->vmult_add(v, *tmp_vector);
+  // finally set constrained
+  // entries themselves
+  for (i=constraints.begin(); i!=e; ++i)
+    {
+      AssertIsFinite(i->second);
+      v(i->first) = i->second;
+    }
+}
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::pre_filter (VectorType &v) const
+{
+  // iterate over all constraints and
+  // zero out value
+  const_index_value_iterator       i = constraints.begin();
+  const const_index_value_iterator e = constraints.end();
+  for (; i!=e; ++i)
+    v(i->first) = 0;
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::post_filter (const VectorType &in,
+                                         VectorType       &out) const
+{
+  // iterate over all constraints and
+  // set value correctly
+  const_index_value_iterator       i = constraints.begin();
+  const const_index_value_iterator e = constraints.end();
+  for (; i!=e; ++i)
+    {
+      AssertIsFinite(in(i->first));
+      out(i->first) = in(i->first);
+    }
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::vmult (VectorType &dst, const VectorType &src) const
+{
+  if (!expect_constrained_source)
+    {
+      GrowingVectorMemory<VectorType> mem;
+      VectorType *tmp_vector = mem.alloc();
+      // first copy over src vector and
+      // pre-filter
+      tmp_vector->reinit(src, true);
+      *tmp_vector = src;
+      pre_filter (*tmp_vector);
+      // then let matrix do its work
+      matrix->vmult (dst, *tmp_vector);
+      mem.free(tmp_vector);
+    }
+  else
+    {
+      matrix->vmult (dst, src);
+    }
+
+  // finally do post-filtering
+  post_filter (src, dst);
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::Tvmult (VectorType &dst, const VectorType &src) const
+{
+  if (!expect_constrained_source)
+    {
+      GrowingVectorMemory<VectorType> mem;
+      VectorType *tmp_vector = mem.alloc();
+      // first copy over src vector and
+      // pre-filter
+      tmp_vector->reinit(src, true);
+      *tmp_vector = src;
+      pre_filter (*tmp_vector);
+      // then let matrix do its work
+      matrix->Tvmult (dst, *tmp_vector);
+      mem.free(tmp_vector);
+    }
+  else
+    {
+      matrix->Tvmult (dst, src);
+    }
+
+  // finally do post-filtering
+  post_filter (src, dst);
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::vmult_add (VectorType &dst, const VectorType &src) const
+{
+  if (!expect_constrained_source)
+    {
+      GrowingVectorMemory<VectorType> mem;
+      VectorType *tmp_vector = mem.alloc();
+      // first copy over src vector and
+      // pre-filter
+      tmp_vector->reinit(src, true);
+      *tmp_vector = src;
+      pre_filter (*tmp_vector);
+      // then let matrix do its work
+      matrix->vmult_add (dst, *tmp_vector);
+      mem.free(tmp_vector);
+    }
+  else
+    {
+      matrix->vmult_add (dst, src);
+    }
+
+  // finally do post-filtering
+  post_filter (src, dst);
+}
+
+
+
+template <typename VectorType>
+inline
+void
+FilteredMatrix<VectorType>::Tvmult_add (VectorType &dst, const VectorType &src) const
+{
+  if (!expect_constrained_source)
+    {
+      GrowingVectorMemory<VectorType> mem;
+      VectorType *tmp_vector = mem.alloc();
+      // first copy over src vector and
+      // pre-filter
+      tmp_vector->reinit(src, true);
+      *tmp_vector = src;
+      pre_filter (*tmp_vector);
+      // then let matrix do its work
+      matrix->Tvmult_add (dst, *tmp_vector);
+      mem.free(tmp_vector);
+    }
+  else
+    {
+      matrix->Tvmult_add (dst, src);
+    }
+
+  // finally do post-filtering
+  post_filter (src, dst);
+}
+
+
+
+template <typename VectorType>
+inline
+std::size_t
+FilteredMatrix<VectorType>::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (matrix) +
+          MemoryConsumption::memory_consumption (constraints));
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+/*----------------------------   filtered_matrix.h     ---------------------------*/
diff --git a/include/deal.II/lac/full_matrix.h b/include/deal.II/lac/full_matrix.h
new file mode 100644
index 0000000..9cadd62
--- /dev/null
+++ b/include/deal.II/lac/full_matrix.h
@@ -0,0 +1,1566 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__full_matrix_h
+#define dealii__full_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/numbers.h>
+#include <deal.II/base/table.h>
+#include <deal.II/lac/exceptions.h>
+#include <deal.II/lac/identity_matrix.h>
+#include <deal.II/base/tensor.h>
+
+#include <vector>
+#include <iomanip>
+#include <cstring>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// forward declarations
+template <typename number> class Vector;
+template <typename number> class LAPACKFullMatrix;
+
+
+/*! @addtogroup Matrix1
+ *@{
+ */
+
+
+/**
+ * Implementation of a classical rectangular scheme of numbers. The data type
+ * of the entries is provided in the template argument <tt>number</tt>.  The
+ * interface is quite fat and in fact has grown every time a new feature was
+ * needed. So, a lot of functions are provided.
+ *
+ * Internal calculations are usually done with the accuracy of the vector
+ * argument to functions. If there is no argument with a number type, the
+ * matrix number type is used.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@>,
+ * @<double@>, @<long double@>, @<std::complex@<float@>@>,
+ * @<std::complex@<double@>@>, @<std::complex@<long double@>@></tt>; others
+ * can be generated in application programs (see the section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Guido Kanschat, Franz-Theo Suttmeier, Wolfgang Bangerth, 1993-2004
+ */
+template <typename number>
+class FullMatrix : public Table<2,number>
+{
+public:
+  /**
+   * A type of used to index into this container. Because we can not expect to
+   * store matrices bigger than what can be indexed by a regular unsigned
+   * integer, <code>unsigned int</code> is completely sufficient as an index
+   * type.
+   */
+  typedef unsigned int size_type;
+
+  /**
+   * Type of matrix entries. This typedef is analogous to <tt>value_type</tt>
+   * in the standard library containers.
+   */
+  typedef number value_type;
+
+
+  /**
+   * Declare a type that has holds real-valued numbers with the same precision
+   * as the template argument to this class. If the template argument of this
+   * class is a real data type, then real_type equals the template argument.
+   * If the template argument is a std::complex type then real_type equals the
+   * type underlying the complex numbers.
+   *
+   * This typedef is used to represent the return type of norms.
+   */
+  typedef typename numbers::NumberTraits<number>::real_type real_type;
+
+
+  class const_iterator;
+
+  /**
+   * Accessor class for iterators
+   */
+  class Accessor
+  {
+  public:
+    /**
+     * Constructor. Since we use accessors only for read access, a const
+     * matrix pointer is sufficient.
+     */
+    Accessor (const FullMatrix<number> *matrix,
+              const size_type row,
+              const size_type col);
+
+    /**
+     * Row number of the element represented by this object.
+     */
+    size_type row() const;
+
+    /**
+     * Column number of the element represented by this object.
+     */
+    size_type column() const;
+
+    /**
+     * Value of this matrix entry.
+     */
+    number value() const;
+
+  protected:
+    /**
+     * The matrix accessed.
+     */
+    const FullMatrix<number> *matrix;
+
+    /**
+     * Current row number.
+     */
+    size_type a_row;
+
+    /**
+     * Current column number.
+     */
+    unsigned short a_col;
+
+    /*
+     * Make enclosing class a friend.
+     */
+    friend class const_iterator;
+  };
+
+  /**
+   * Standard-conforming iterator.
+   */
+  class const_iterator
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    const_iterator(const FullMatrix<number> *matrix,
+                   const size_type row,
+                   const size_type col);
+
+    /**
+     * Prefix increment.
+     */
+    const_iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    const_iterator &operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const const_iterator &) const;
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const const_iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     */
+    bool operator < (const const_iterator &) const;
+
+    /**
+     * Comparison operator. Compares just the other way around than the
+     * operator above.
+     */
+    bool operator > (const const_iterator &) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor accessor;
+  };
+  /**
+   * @name Constructors and initalization.  See also the base class Table.
+   */
+//@{
+
+  /**
+   * Constructor. Initialize the matrix as a square matrix with dimension
+   * <tt>n</tt>.
+   *
+   * In order to avoid the implicit conversion of integers and other types to
+   * a matrix, this constructor is declared <tt>explicit</tt>.
+   *
+   * By default, no memory is allocated.
+   */
+  explicit FullMatrix (const size_type n = 0);
+
+  /**
+   * Constructor. Initialize the matrix as a rectangular matrix.
+   */
+  FullMatrix (const size_type rows,
+              const size_type cols);
+
+  /**
+   * Copy constructor. This constructor does a deep copy of the matrix.
+   * Therefore, it poses a possible efficiency problem, if for example,
+   * function arguments are passed by value rather than by reference.
+   * Unfortunately, we can't mark this copy constructor <tt>explicit</tt>,
+   * since that prevents the use of this class in containers, such as
+   * <tt>std::vector</tt>. The responsibility to check performance of programs
+   * must therefore remain with the user of this class.
+   */
+  FullMatrix (const FullMatrix &);
+
+  /**
+   * Constructor initializing from an array of numbers. The array is arranged
+   * line by line. No range checking is performed.
+   */
+  FullMatrix (const size_type rows,
+              const size_type cols,
+              const number *entries);
+
+  /**
+   * Construct a full matrix that equals the identity matrix of the size of
+   * the argument. Using this constructor, one can easily create an identity
+   * matrix of size <code>n</code> by saying
+   * @code
+   * FullMatrix<double> M(IdentityMatrix(n));
+   * @endcode
+   */
+  FullMatrix (const IdentityMatrix &id);
+  /**
+   * @}
+   */
+
+  /**
+   * @name Copying into and out of other matrices
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Assignment operator.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  FullMatrix<number> &
+  operator = (const FullMatrix<number> &);
+
+  /**
+   * Variable assignment operator.
+   */
+  template <typename number2>
+  FullMatrix<number> &
+  operator = (const FullMatrix<number2> &);
+
+  /**
+   * This operator assigns a scalar to a matrix. To avoid confusion with the
+   * semantics of this function, zero is the only value allowed for
+   * <tt>d</tt>, allowing you to clear a matrix in an intuitive way.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  FullMatrix<number> &
+  operator = (const number d);
+
+  /**
+   * Copy operator to create a full matrix that equals the identity matrix of
+   * the size of the argument. This way, one can easily create an identity
+   * matrix of size <code>n</code> by saying
+   * @code
+   *   M = IdentityMatrix(n);
+   * @endcode
+   */
+  FullMatrix<number> &
+  operator = (const IdentityMatrix &id);
+
+  /**
+   * Assignment operator for a LapackFullMatrix. The calling matrix must be of
+   * the same size as the LAPACK matrix.
+   */
+  template <typename number2>
+  FullMatrix<number> &
+  operator = (const LAPACKFullMatrix<number2> &);
+
+
+  /**
+   * Assignment from different matrix classes. This assignment operator uses
+   * iterators of the typename MatrixType. Therefore, sparse matrices are
+   * possible sources.
+   */
+  template <typename MatrixType>
+  void copy_from (const MatrixType &);
+
+  /**
+   * Transposing assignment from different matrix classes. This assignment
+   * operator uses iterators of the typename MatrixType. Therefore, sparse
+   * matrices are possible sources.
+   */
+  template <typename MatrixType>
+  void copy_transposed (const MatrixType &);
+
+  /**
+   * Fill matrix with elements extracted from a tensor, taking rows included
+   * between <tt>r_i</tt> and <tt>r_j</tt> and columns between <tt>c_i</tt>
+   * and <tt>c_j</tt>. The resulting matrix is then inserted in the
+   * destination matrix at position <tt>(dst_r, dst_c)</tt> Checks on the
+   * indices are made.
+   */
+  template <int dim>
+  void
+  copy_from (const Tensor<2,dim> &T,
+             const size_type src_r_i=0,
+             const size_type src_r_j=dim-1,
+             const size_type src_c_i=0,
+             const size_type src_c_j=dim-1,
+             const size_type dst_r=0,
+             const size_type dst_c=0);
+
+  /**
+   * Insert a submatrix (also rectangular) into a tensor, putting its upper
+   * left element at the specified position <tt>(dst_r, dst_c)</tt> and the
+   * other elements consequently. Default values are chosen so that no
+   * parameter needs to be specified if the size of the tensor and that of the
+   * matrix coincide.
+   */
+  template <int dim>
+  void
+  copy_to(Tensor<2,dim> &T,
+          const size_type src_r_i=0,
+          const size_type src_r_j=dim-1,
+          const size_type src_c_i=0,
+          const size_type src_c_j=dim-1,
+          const size_type dst_r=0,
+          const size_type dst_c=0) const;
+
+  /**
+   * Copy a subset of the rows and columns of another matrix into the current
+   * object.
+   *
+   * @param matrix The matrix from which a subset is to be taken from.
+   * @param row_index_set The set of rows of @p matrix from which to extract.
+   * @param column_index_set The set of columns of @p matrix from which to
+   * extract. @pre The number of elements in @p row_index_set and @p
+   * column_index_set shall be equal to the number of rows and columns in the
+   * current object. In other words, the current object is not resized for
+   * this operation.
+   */
+  template <typename MatrixType, typename index_type>
+  void extract_submatrix_from (const MatrixType              &matrix,
+                               const std::vector<index_type> &row_index_set,
+                               const std::vector<index_type> &column_index_set);
+
+  /**
+   * Copy the elements of the current matrix object into a specified set of
+   * rows and columns of another matrix. Thus, this is a scatter operation.
+   *
+   * @param row_index_set The rows of @p matrix into which to write.
+   * @param column_index_set The columns of @p matrix into which to write.
+   * @param matrix The matrix within which certain elements are to be
+   * replaced. @pre The number of elements in @p row_index_set and @p
+   * column_index_set shall be equal to the number of rows and columns in the
+   * current object. In other words, the current object is not resized for
+   * this operation.
+   */
+  template <typename MatrixType, typename index_type>
+  void
+  scatter_matrix_to (const std::vector<index_type> &row_index_set,
+                     const std::vector<index_type> &column_index_set,
+                     MatrixType                    &matrix) const;
+
+  /**
+   * Fill rectangular block.
+   *
+   * A rectangular block of the matrix <tt>src</tt> is copied into
+   * <tt>this</tt>. The upper left corner of the block being copied is
+   * <tt>(src_offset_i,src_offset_j)</tt>.  The upper left corner of the
+   * copied block is <tt>(dst_offset_i,dst_offset_j)</tt>.  The size of the
+   * rectangular block being copied is the maximum size possible, determined
+   * either by the size of <tt>this</tt> or <tt>src</tt>.
+   */
+  template <typename number2>
+  void fill (const FullMatrix<number2> &src,
+             const size_type dst_offset_i = 0,
+             const size_type dst_offset_j = 0,
+             const size_type src_offset_i = 0,
+             const size_type src_offset_j = 0);
+
+
+  /**
+   * Make function of base class available.
+   */
+  template <typename number2>
+  void fill (const number2 *);
+
+  /**
+   * Fill with permutation of another matrix.
+   *
+   * The matrix <tt>src</tt> is copied into the target. The two permutation
+   * <tt>p_r</tt> and <tt>p_c</tt> operate in a way, such that <tt>result(i,j)
+   * = src(p_r[i], p_c[j])</tt>.
+   *
+   * The vectors may also be a selection from a larger set of integers, if the
+   * matrix <tt>src</tt> is bigger. It is also possible to duplicate rows or
+   * columns by this method.
+   */
+  template <typename number2>
+  void fill_permutation (const FullMatrix<number2>       &src,
+                         const std::vector<size_type> &p_rows,
+                         const std::vector<size_type> &p_cols);
+
+  /**
+   * Set a particular entry of the matrix to a value. Thus, calling
+   * <code>A.set(1,2,3.141);</code> is entirely equivalent to the operation
+   * <code>A(1,2) = 3.141;</code>. This function exists for compatibility with
+   * the various sparse matrix objects.
+   *
+   * @param i The row index of the element to be set.
+   * @param j The columns index of the element to be set.
+   * @param value The value to be written into the element.
+   */
+  void set (const size_type i,
+            const size_type j,
+            const number value);
+  /**
+   * @}
+   */
+  /**
+   * @name Non-modifying operators
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Comparison operator. Be careful with this thing, it may eat up huge
+   * amounts of computing time! It is most commonly used for internal
+   * consistency checks of programs.
+   */
+  bool operator == (const FullMatrix<number> &) const;
+
+  /**
+   * Number of rows of this matrix.  To remember: this matrix is an <i>m x
+   * n</i>-matrix.
+   */
+  size_type m () const;
+
+  /**
+   * Number of columns of this matrix.  To remember: this matrix is an <i>m x
+   * n</i>-matrix.
+   */
+  size_type n () const;
+
+  /**
+   * Return whether the matrix contains only elements with value zero. This
+   * function is mainly for internal consistency checks and should seldom be
+   * used when not in debug mode since it uses quite some time.
+   */
+  bool all_zero () const;
+
+  /**
+   * Return the square of the norm of the vector <tt>v</tt> induced by this
+   * matrix, i.e. <i>(v,Mv)</i>. This is useful, e.g. in the finite element
+   * context, where the <i>L<sup>2</sup></i> norm of a function equals the
+   * matrix norm with respect to the mass matrix of the vector representing
+   * the nodal values of the finite element function.
+   *
+   * Obviously, the matrix needs to be quadratic for this operation, and for
+   * the result to actually be a norm it also needs to be either real
+   * symmetric or complex hermitian.
+   *
+   * The underlying template types of both this matrix and the given vector
+   * should either both be real or complex-valued, but not mixed, for this
+   * function to make sense.
+   */
+  template <typename number2>
+  number2 matrix_norm_square (const Vector<number2> &v) const;
+
+  /**
+   * Build the matrix scalar product <tt>u<sup>T</sup> M v</tt>. This function
+   * is mostly useful when building the cellwise scalar product of two
+   * functions in the finite element context.
+   *
+   * The underlying template types of both this matrix and the given vector
+   * should either both be real or complex-valued, but not mixed, for this
+   * function to make sense.
+   */
+  template <typename number2>
+  number2 matrix_scalar_product (const Vector<number2> &u,
+                                 const Vector<number2> &v) const;
+
+  /**
+   * Return the <i>l<sub>1</sub></i>-norm of the matrix, where $||M||_1 =
+   * \max_j \sum_i |M_{ij}|$ (maximum of the sums over columns).
+   */
+  real_type l1_norm () const;
+
+  /**
+   * Return the $l_\infty$-norm of the matrix, where $||M||_\infty = \max_i
+   * \sum_j |M_{ij}|$ (maximum of the sums over rows).
+   */
+  real_type linfty_norm () const;
+
+  /**
+   * Compute the Frobenius norm of the matrix.  Return value is the root of
+   * the square sum of all matrix entries.
+   *
+   * @note For the timid among us: this norm is not the norm compatible with
+   * the <i>l<sub>2</sub></i>-norm of the vector space.
+   */
+  real_type frobenius_norm () const;
+
+  /**
+   * Compute the relative norm of the skew-symmetric part. The return value is
+   * the Frobenius norm of the skew-symmetric part of the matrix divided by
+   * that of the matrix.
+   *
+   * Main purpose of this function is to check, if a matrix is symmetric
+   * within a certain accuracy, or not.
+   */
+  real_type relative_symmetry_norm2 () const;
+
+  /**
+   * Computes the determinant of a matrix.  This is only implemented for one,
+   * two, and three dimensions, since for higher dimensions the numerical work
+   * explodes.  Obviously, the matrix needs to be quadratic for this function.
+   */
+  number determinant () const;
+
+  /**
+   * Return the trace of the matrix, i.e. the sum of the diagonal values
+   * (which happens to also equal the sum of the eigenvalues of a matrix).
+   * Obviously, the matrix needs to be quadratic for this function.
+   */
+  number trace () const;
+
+  /**
+   * Output of the matrix in user-defined format given by the specified
+   * precision and width. This function saves width and precision of the
+   * stream before setting these given values for output, and restores the
+   * previous values after output.
+   */
+  template <class StreamType>
+  void print (StreamType         &s,
+              const unsigned int  width=5,
+              const unsigned int  precision=2) const;
+
+  /**
+   * Print the matrix and allow formatting of entries.
+   *
+   * The parameters allow for a flexible setting of the output format:
+   *
+   * @arg <tt>precision</tt> denotes the number of trailing digits.
+   *
+   * @arg <tt>scientific</tt> is used to determine the number format, where
+   * <tt>scientific</tt> = <tt>false</tt> means fixed point notation.
+   *
+   * @arg <tt>width</tt> denotes the with of each column. A zero entry for
+   * <tt>width</tt> makes the function compute a width, but it may be changed
+   * to a positive value, if output is crude.
+   *
+   * @arg <tt>zero_string</tt> specifies a string printed for zero entries.
+   *
+   * @arg <tt>denominator</tt> Multiply the whole matrix by this common
+   * denominator to get nicer numbers.
+   *
+   * @arg <tt>threshold</tt>: all entries with absolute value smaller than
+   * this are considered zero.
+   */
+  void print_formatted (std::ostream       &out,
+                        const unsigned int  precision=3,
+                        const bool          scientific  = true,
+                        const unsigned int  width       = 0,
+                        const char         *zero_string = " ",
+                        const double        denominator = 1.,
+                        const double        threshold   = 0.) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+//@}
+///@name Iterator functions
+//@{
+
+  /**
+   * Iterator starting at the first entry.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Final iterator.
+   */
+  const_iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>.
+   */
+  const_iterator begin (const size_type r) const;
+
+  /**
+   * Final iterator of row <tt>r</tt>.
+   */
+  const_iterator end (const size_type r) const;
+
+//@}
+///@name Modifying operators
+//@{
+
+  /**
+   * Scale the entire matrix by a fixed factor.
+   */
+  FullMatrix &operator *= (const number factor);
+
+  /**
+   * Scale the entire matrix by the inverse of the given factor.
+   */
+  FullMatrix &operator /= (const number factor);
+
+  /**
+   * Simple addition of a scaled matrix, i.e. <tt>*this += a*A</tt>.
+   *
+   * The matrix <tt>A</tt> may be a full matrix over an arbitrary underlying
+   * scalar type, as long as its data type is convertible to the data type of
+   * this matrix.
+   */
+  template <typename number2>
+  void add (const number               a,
+            const FullMatrix<number2> &A);
+
+  /**
+   * Multiple addition of scaled matrices, i.e. <tt>*this += a*A + b*B</tt>.
+   *
+   * The matrices <tt>A</tt> and <tt>B</tt> may be a full matrix over an
+   * arbitrary underlying scalar type, as long as its data type is convertible
+   * to the data type of this matrix.
+   */
+  template <typename number2>
+  void add (const number               a,
+            const FullMatrix<number2> &A,
+            const number               b,
+            const FullMatrix<number2> &B);
+
+  /**
+   * Multiple addition of scaled matrices, i.e. <tt>*this += a*A + b*B +
+   * c*C</tt>.
+   *
+   * The matrices <tt>A</tt>, <tt>B</tt> and <tt>C</tt> may be a full matrix
+   * over an arbitrary underlying scalar type, as long as its data type is
+   * convertible to the data type of this matrix.
+   */
+  template <typename number2>
+  void add (const number               a,
+            const FullMatrix<number2> &A,
+            const number               b,
+            const FullMatrix<number2> &B,
+            const number               c,
+            const FullMatrix<number2> &C);
+
+  /**
+   * Add rectangular block.
+   *
+   * A rectangular block of the matrix <tt>src</tt> is added to <tt>this</tt>.
+   * The upper left corner of the block being copied is
+   * <tt>(src_offset_i,src_offset_j)</tt>.  The upper left corner of the
+   * copied block is <tt>(dst_offset_i,dst_offset_j)</tt>.  The size of the
+   * rectangular block being copied is the maximum size possible, determined
+   * either by the size of <tt>this</tt> or <tt>src</tt> and the given
+   * offsets.
+   */
+  template <typename number2>
+  void add (const FullMatrix<number2> &src,
+            const number factor,
+            const size_type dst_offset_i = 0,
+            const size_type dst_offset_j = 0,
+            const size_type src_offset_i = 0,
+            const size_type src_offset_j = 0);
+
+  /**
+   * Weighted addition of the transpose of <tt>B</tt> to <tt>this</tt>.
+   *
+   * <i>A += s B<sup>T</sup></i>
+   */
+  template <typename number2>
+  void Tadd (const number               s,
+             const FullMatrix<number2> &B);
+
+  /**
+   * Add transpose of a rectangular block.
+   *
+   * A rectangular block of the matrix <tt>src</tt> is transposed and
+   * addedadded to <tt>this</tt>. The upper left corner of the block being
+   * copied is <tt>(src_offset_i,src_offset_j)</tt> in the coordinates of the
+   * <b>non</b>-transposed matrix.  The upper left corner of the copied block
+   * is <tt>(dst_offset_i,dst_offset_j)</tt>.  The size of the rectangular
+   * block being copied is the maximum size possible, determined either by the
+   * size of <tt>this</tt> or <tt>src</tt>.
+   */
+  template <typename number2>
+  void Tadd (const FullMatrix<number2> &src,
+             const number               factor,
+             const size_type dst_offset_i = 0,
+             const size_type dst_offset_j = 0,
+             const size_type src_offset_i = 0,
+             const size_type src_offset_j = 0);
+
+  /**
+   * Add a single element at the given position.
+   */
+  void add (const size_type row,
+            const size_type column,
+            const number value);
+
+  /**
+   * Add an array of values given by <tt>values</tt> in the given global
+   * matrix row at columns specified by col_indices in the full matrix. This
+   * function is present for compatibility with the various sparse matrices in
+   * deal.II. In particular, the two boolean fields @p elide_zero_values and
+   * @p col_indices_are_sorted do not impact the performance of this routine,
+   * as opposed to the sparse matrix case and are indeed ignored in the
+   * implementation.
+   */
+  template <typename number2, typename index_type>
+  void add (const size_type     row,
+            const unsigned int  n_cols,
+            const index_type   *col_indices,
+            const number2      *values,
+            const bool          elide_zero_values = true,
+            const bool          col_indices_are_sorted = false);
+
+  /**
+   * <i>A(i,1...n) += s*A(j,1...n)</i>.  Simple addition of rows of this
+   */
+  void add_row (const size_type i,
+                const number    s,
+                const size_type j);
+
+  /**
+   * <i>A(i,1...n) += s*A(j,1...n) + t*A(k,1...n)</i>.  Multiple addition of
+   * rows of this.
+   */
+  void add_row (const size_type i,
+                const number s, const size_type j,
+                const number t, const size_type k);
+
+  /**
+   * <i>A(1...n,i) += s*A(1...n,j)</i>.  Simple addition of columns of this.
+   */
+  void add_col (const size_type i,
+                const number    s,
+                const size_type j);
+
+  /**
+   * <i>A(1...n,i) += s*A(1...n,j) + t*A(1...n,k)</i>.  Multiple addition of
+   * columns of this.
+   */
+  void add_col (const size_type i,
+                const number s, const size_type j,
+                const number t, const size_type k);
+
+  /**
+   * Swap <i>A(i,1...n) <-> A(j,1...n)</i>.  Swap rows i and j of this
+   */
+  void swap_row (const size_type i,
+                 const size_type j);
+
+  /**
+   * Swap <i>A(1...n,i) <-> A(1...n,j)</i>.  Swap columns i and j of this
+   */
+  void swap_col (const size_type i,
+                 const size_type j);
+
+  /**
+   * Add constant to diagonal elements of this, i.e. add a multiple of the
+   * identity matrix.
+   */
+  void diagadd (const number s);
+
+  /**
+   * Assignment <tt>*this = a*A</tt>.
+   */
+  template <typename number2>
+  void equ (const number               a,
+            const FullMatrix<number2> &A);
+
+  /**
+   * Assignment <tt>*this = a*A + b*B</tt>.
+   */
+  template <typename number2>
+  void equ (const number               a,
+            const FullMatrix<number2> &A,
+            const number               b,
+            const FullMatrix<number2> &B);
+
+  /**
+   * Assignment <tt>*this = a*A + b*B + c*C</tt>.
+   */
+  template <typename number2>
+  void equ (const number               a,
+            const FullMatrix<number2> &A,
+            const number               b,
+            const FullMatrix<number2> &B,
+            const number               c,
+            const FullMatrix<number2> &C);
+
+  /**
+   * Symmetrize the matrix by forming the mean value between the existing
+   * matrix and its transpose, <i>A = 1/2(A+A<sup>T</sup>)</i>.
+   *
+   * Obviously the matrix must be quadratic for this operation.
+   */
+  void symmetrize ();
+
+  /**
+   * A=Inverse(A). A must be a square matrix.  Inversion of this matrix by
+   * Gauss-Jordan algorithm with partial pivoting.  This process is well-
+   * behaved for positive definite matrices, but be aware of round-off errors
+   * in the indefinite case.
+   *
+   * In case deal.II was configured with LAPACK, the functions Xgetrf and
+   * Xgetri build an LU factorization and invert the matrix upon that
+   * factorization, providing best performance up to matrices with a few
+   * hundreds rows and columns.
+   *
+   * The numerical effort to invert an <tt>n x n</tt> matrix is of the order
+   * <tt>n**3</tt>.
+   */
+  void gauss_jordan ();
+
+  /**
+   * Assign the inverse of the given matrix to <tt>*this</tt>. This function
+   * is hardcoded for quadratic matrices of dimension one to four. However,
+   * since the amount of code needed grows quickly, the method gauss_jordan()
+   * is invoked implicitly if the dimension is larger.
+   */
+  template <typename number2>
+  void invert (const FullMatrix<number2> &M);
+
+  /**
+   * Assign the Cholesky decomposition of the given matrix to <tt>*this</tt>.
+   * The given matrix must be symmetric positive definite.
+   *
+   * ExcMatrixNotPositiveDefinite will be thrown in the case that the matrix
+   * is not positive definite.
+   */
+  template <typename number2>
+  void cholesky (const FullMatrix<number2> &A);
+
+  /**
+   * <tt>*this(i,j)</tt> = $V(i) W(j)$ where $V,W$ are vectors of the same
+   * length.
+   */
+  template <typename number2>
+  void outer_product (const Vector<number2> &V,
+                      const Vector<number2> &W);
+
+  /**
+   * Assign the left_inverse of the given matrix to <tt>*this</tt>. The
+   * calculation being performed is <i>(A<sup>T</sup>*A)<sup>-1</sup>
+   * *A<sup>T</sup></i>.
+   */
+  template <typename number2>
+  void left_invert (const FullMatrix<number2> &M);
+
+  /**
+   * Assign the right_inverse of the given matrix to <tt>*this</tt>. The
+   * calculation being performed is <i>A<sup>T</sup>*(A*A<sup>T</sup>)
+   * <sup>-1</sup></i>.
+   */
+  template <typename number2>
+  void right_invert (const FullMatrix<number2> &M);
+
+//@}
+///@name Multiplications
+//@{
+
+  /**
+   * Matrix-matrix-multiplication.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A*B</i>
+   *
+   * if (!adding) <i>C = A*B</i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm if the product of the three
+   * matrix dimensions is larger than 300 and BLAS was detected during
+   * configuration. Using BLAS usually results in considerable performance
+   * gains.
+   */
+  template <typename number2>
+  void mmult (FullMatrix<number2>       &C,
+              const FullMatrix<number2> &B,
+              const bool                 adding=false) const;
+
+  /**
+   * Matrix-matrix-multiplication using transpose of <tt>this</tt>.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A<sup>T</sup>*B</i>
+   *
+   * if (!adding) <i>C = A<sup>T</sup>*B</i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm if the product of the three
+   * matrix dimensions is larger than 300 and BLAS was detected during
+   * configuration. Using BLAS usually results in considerable performance
+   * gains.
+   */
+  template <typename number2>
+  void Tmmult (FullMatrix<number2>       &C,
+               const FullMatrix<number2> &B,
+               const bool                 adding=false) const;
+
+  /**
+   * Matrix-matrix-multiplication using transpose of <tt>B</tt>.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A*B<sup>T</sup></i>
+   *
+   * if (!adding) <i>C = A*B<sup>T</sup></i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm if the product of the three
+   * matrix dimensions is larger than 300 and BLAS was detected during
+   * configuration. Using BLAS usually results in considerable performance
+   * gains.
+   */
+  template <typename number2>
+  void mTmult (FullMatrix<number2>       &C,
+               const FullMatrix<number2> &B,
+               const bool                 adding=false) const;
+
+  /**
+   * Matrix-matrix-multiplication using transpose of <tt>this</tt> and
+   * <tt>B</tt>.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A<sup>T</sup>*B<sup>T</sup></i>
+   *
+   * if (!adding) <i>C = A<sup>T</sup>*B<sup>T</sup></i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm if the product of the three
+   * matrix dimensions is larger than 300 and BLAS was detected during
+   * configuration. Using BLAS usually results in considerable performance
+   * gains.
+   */
+  template <typename number2>
+  void TmTmult (FullMatrix<number2>       &C,
+                const FullMatrix<number2> &B,
+                const bool                 adding=false) const;
+
+  /**
+   * Add to the current matrix the triple product <b>B A D</b>. Optionally,
+   * use the transposes of the matrices <b>B</b> and <b>D</b>. The scaling
+   * factor scales the whole product, which is helpful when adding a multiple
+   * of the triple product to the matrix.
+   *
+   * This product was written with the Schur complement <b>B<sup>T</sup>
+   * A<sup>-1</sup> D</b> in mind.  Note that in this case the argument for
+   * <tt>A</tt> must be the inverse of the matrix <b>A</b>.
+   */
+  void triple_product(const FullMatrix<number> &A,
+                      const FullMatrix<number> &B,
+                      const FullMatrix<number> &D,
+                      const bool transpose_B = false,
+                      const bool transpose_D = false,
+                      const number scaling = number(1.));
+
+  /**
+   * Matrix-vector-multiplication.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>w</tt> or added to <tt>w</tt>.
+   *
+   * if (adding) <i>w += A*v</i>
+   *
+   * if (!adding) <i>w = A*v</i>
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <typename number2>
+  void vmult (Vector<number2>       &w,
+              const Vector<number2> &v,
+              const bool             adding=false) const;
+
+  /**
+   * Adding Matrix-vector-multiplication.  <i>w += A*v</i>
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <typename number2>
+  void vmult_add (Vector<number2>       &w,
+                  const Vector<number2> &v) const;
+
+  /**
+   * Transpose matrix-vector-multiplication.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>w</tt> or added to <tt>w</tt>.
+   *
+   * if (adding) <i>w += A<sup>T</sup>*v</i>
+   *
+   * if (!adding) <i>w = A<sup>T</sup>*v</i>
+   *
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <typename number2>
+  void Tvmult (Vector<number2>       &w,
+               const Vector<number2> &v,
+               const bool             adding=false) const;
+
+  /**
+   * Adding transpose matrix-vector-multiplication.  <i>w +=
+   * A<sup>T</sup>*v</i>
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <typename number2>
+  void Tvmult_add (Vector<number2>       &w,
+                   const Vector<number2> &v) const;
+
+  /**
+   * Apply the Jacobi preconditioner, which multiplies every element of the
+   * <tt>src</tt> vector by the inverse of the respective diagonal element and
+   * multiplies the result with the damping factor <tt>omega</tt>.
+   */
+  template <typename somenumber>
+  void precondition_Jacobi (Vector<somenumber>       &dst,
+                            const Vector<somenumber> &src,
+                            const number              omega = 1.) const;
+
+  /**
+   * <i>dst=b-A*x</i>. Residual calculation, returns the
+   * <i>l<sub>2</sub></i>-norm |<i>dst</i>|.
+   *
+   * Source <i>x</i> and destination <i>dst</i> must not be the same vector.
+   */
+  template <typename number2, typename number3>
+  number residual (Vector<number2>       &dst,
+                   const Vector<number2> &x,
+                   const Vector<number3> &b) const;
+
+  /**
+   * Forward elimination of lower triangle.  Inverts the lower triangle of a
+   * rectangular matrix for a given right hand side.
+   *
+   * If the matrix has more columns than rows, this function only operates on
+   * the left quadratic submatrix. If there are more rows, the upper quadratic
+   * part of the matrix is considered.
+   *
+   * @note It is safe to use the same object for @p dst and @p src.
+   */
+  template <typename number2>
+  void forward (Vector<number2>       &dst,
+                const Vector<number2> &src) const;
+
+  /**
+   * Backward elimination of upper triangle.
+   *
+   * See forward()
+   *
+   * @note It is safe to use the same object for @p dst and @p src.
+   */
+  template <typename number2>
+  void backward (Vector<number2>       &dst,
+                 const Vector<number2> &src) const;
+
+  //@}
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcEmptyMatrix);
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcNotRegular,
+                  number,
+                  << "The maximal pivot is " << arg1
+                  << ", which is below the threshold. The matrix may be singular.");
+  /**
+   * Exception
+   */
+  DeclException3 (ExcInvalidDestination,
+                  size_type, size_type, size_type,
+                  << "Target region not in matrix: size in this direction="
+                  << arg1 << ", size of new matrix=" << arg2
+                  << ", offset=" << arg3);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcSourceEqualsDestination);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcMatrixNotPositiveDefinite);
+  //@}
+
+  friend class Accessor;
+};
+
+/**@}*/
+
+#ifndef DOXYGEN
+/*-------------------------Inline functions -------------------------------*/
+
+
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::size_type
+FullMatrix<number>::m() const
+{
+  return this->n_rows();
+}
+
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::size_type
+FullMatrix<number>::n() const
+{
+  return this->n_cols();
+}
+
+
+
+template <typename number>
+FullMatrix<number> &
+FullMatrix<number>::operator = (const number d)
+{
+  Assert (d==number(0), ExcScalarAssignmentOnlyForZeroValue());
+  (void)d; // removes -Wunused-parameter warning in optimized mode
+
+  if (this->n_elements() != 0)
+    this->reset_values();
+
+  return *this;
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void FullMatrix<number>::fill (const number2 *src)
+{
+  Table<2,number>::fill(src);
+}
+
+
+
+template <typename number>
+template <typename MatrixType>
+void
+FullMatrix<number>::copy_from (const MatrixType &M)
+{
+  this->reinit (M.m(), M.n());
+
+  // loop over the elements of the argument matrix row by row, as suggested
+  // in the documentation of the sparse matrix iterator class, and
+  // copy them into the current object
+  for (size_type row = 0; row < M.m(); ++row)
+    {
+      const typename MatrixType::const_iterator end_row = M.end(row);
+      for (typename MatrixType::const_iterator entry = M.begin(row);
+           entry != end_row; ++entry)
+        this->el(row, entry->column()) = entry->value();
+    }
+}
+
+
+
+template <typename number>
+template <typename MatrixType>
+void
+FullMatrix<number>::copy_transposed (const MatrixType &M)
+{
+  this->reinit (M.n(), M.m());
+
+  // loop over the elements of the argument matrix row by row, as suggested
+  // in the documentation of the sparse matrix iterator class, and
+  // copy them into the current object
+  for (size_type row = 0; row < M.m(); ++row)
+    {
+      const typename MatrixType::const_iterator end_row = M.end(row);
+      for (typename MatrixType::const_iterator entry = M.begin(row);
+           entry != end_row; ++entry)
+        this->el(entry->column(), row) = entry->value();
+    }
+}
+
+
+
+template <typename number>
+template <typename MatrixType, typename index_type>
+inline
+void
+FullMatrix<number>::extract_submatrix_from (const MatrixType              &matrix,
+                                            const std::vector<index_type> &row_index_set,
+                                            const std::vector<index_type> &column_index_set)
+{
+  AssertDimension(row_index_set.size(), this->n_rows());
+  AssertDimension(column_index_set.size(), this->n_cols());
+
+  const size_type n_rows_submatrix = row_index_set.size();
+  const size_type n_cols_submatrix = column_index_set.size();
+
+  for (size_type sub_row = 0; sub_row < n_rows_submatrix; ++sub_row)
+    for (size_type sub_col = 0; sub_col < n_cols_submatrix; ++sub_col)
+      (*this)(sub_row, sub_col) = matrix.el(row_index_set[sub_row], column_index_set[sub_col]);
+}
+
+
+
+template <typename number>
+template <typename MatrixType, typename index_type>
+inline
+void
+FullMatrix<number>::scatter_matrix_to (const std::vector<index_type> &row_index_set,
+                                       const std::vector<index_type> &column_index_set,
+                                       MatrixType                    &matrix) const
+{
+  AssertDimension(row_index_set.size(), this->n_rows());
+  AssertDimension(column_index_set.size(), this->n_cols());
+
+  const size_type n_rows_submatrix = row_index_set.size();
+  const size_type n_cols_submatrix = column_index_set.size();
+
+  for (size_type sub_row = 0; sub_row < n_rows_submatrix; ++sub_row)
+    for (size_type sub_col = 0; sub_col < n_cols_submatrix; ++sub_col)
+      matrix.set(row_index_set[sub_row],
+                 column_index_set[sub_col],
+                 (*this)(sub_row, sub_col));
+}
+
+
+template <typename number>
+inline
+void
+FullMatrix<number>::set (const size_type i,
+                         const size_type j,
+                         const number value)
+{
+  (*this)(i,j) = value;
+}
+
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::vmult_add (Vector<number2>       &w,
+                               const Vector<number2> &v) const
+{
+  vmult(w, v, true);
+}
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::Tvmult_add (Vector<number2>       &w,
+                                const Vector<number2> &v) const
+{
+  Tvmult(w, v, true);
+}
+
+
+//---------------------------------------------------------------------------
+
+
+template <typename number>
+inline
+FullMatrix<number>::Accessor::
+Accessor (const FullMatrix<number> *matrix,
+          const size_type r,
+          const size_type c)
+  :
+  matrix(matrix),
+  a_row(r),
+  a_col(c)
+{}
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::size_type
+FullMatrix<number>::Accessor::row() const
+{
+  return a_row;
+}
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::size_type
+FullMatrix<number>::Accessor::column() const
+{
+  return a_col;
+}
+
+
+template <typename number>
+inline
+number
+FullMatrix<number>::Accessor::value() const
+{
+  AssertIsFinite(matrix->el(a_row, a_col));
+  return matrix->el(a_row, a_col);
+}
+
+
+template <typename number>
+inline
+FullMatrix<number>::const_iterator::
+const_iterator(const FullMatrix<number> *matrix,
+               const size_type r,
+               const size_type c)
+  :
+  accessor(matrix, r, c)
+{}
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::const_iterator &
+FullMatrix<number>::const_iterator::operator++ ()
+{
+  Assert (accessor.a_row < accessor.matrix->m(), ExcIteratorPastEnd());
+
+  ++accessor.a_col;
+  if (accessor.a_col >= accessor.matrix->n())
+    {
+      accessor.a_col = 0;
+      accessor.a_row++;
+    }
+  return *this;
+}
+
+
+template <typename number>
+inline
+const typename FullMatrix<number>::Accessor &
+FullMatrix<number>::const_iterator::operator* () const
+{
+  return accessor;
+}
+
+
+template <typename number>
+inline
+const typename FullMatrix<number>::Accessor *
+FullMatrix<number>::const_iterator::operator-> () const
+{
+  return &accessor;
+}
+
+
+template <typename number>
+inline
+bool
+FullMatrix<number>::const_iterator::
+operator == (const const_iterator &other) const
+{
+  return (accessor.row() == other.accessor.row() &&
+          accessor.column() == other.accessor.column());
+}
+
+
+template <typename number>
+inline
+bool
+FullMatrix<number>::const_iterator::
+operator != (const const_iterator &other) const
+{
+  return ! (*this == other);
+}
+
+
+template <typename number>
+inline
+bool
+FullMatrix<number>::const_iterator::
+operator < (const const_iterator &other) const
+{
+  return (accessor.row() < other.accessor.row() ||
+          (accessor.row() == other.accessor.row() &&
+           accessor.column() < other.accessor.column()));
+}
+
+
+template <typename number>
+inline
+bool
+FullMatrix<number>::const_iterator::
+operator > (const const_iterator &other) const
+{
+  return (other < *this);
+}
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::const_iterator
+FullMatrix<number>::begin () const
+{
+  return const_iterator(this, 0, 0);
+}
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::const_iterator
+FullMatrix<number>::end () const
+{
+  return const_iterator(this, m(), 0);
+}
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::const_iterator
+FullMatrix<number>::begin (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return const_iterator(this, r, 0);
+}
+
+
+
+template <typename number>
+inline
+typename FullMatrix<number>::const_iterator
+FullMatrix<number>::end (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  return const_iterator(this, r+1, 0);
+}
+
+
+
+template <typename number>
+inline
+void
+FullMatrix<number>::add (const size_type r, const size_type c, const number v)
+{
+  AssertIndexRange(r, this->m());
+  AssertIndexRange(c, this->n());
+
+  this->operator()(r,c) += v;
+}
+
+
+
+template <typename number>
+template <typename number2, typename index_type>
+inline
+void
+FullMatrix<number>::add (const size_type    row,
+                         const unsigned int n_cols,
+                         const index_type  *col_indices,
+                         const number2     *values,
+                         const bool,
+                         const bool)
+{
+  AssertIndexRange(row, this->m());
+  for (size_type col=0; col<n_cols; ++col)
+    {
+      AssertIndexRange(col_indices[col], this->n());
+      this->operator()(row,col_indices[col]) += values[col];
+    }
+}
+
+
+template <typename number>
+template <class StreamType>
+inline
+void
+FullMatrix<number>::print (StreamType         &s,
+                           const unsigned int  w,
+                           const unsigned int  p) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  // save the state of out stream
+  const unsigned int old_precision = s.precision (p);
+  const unsigned int old_width = s.width (w);
+
+  for (size_type i=0; i<this->m(); ++i)
+    {
+      for (size_type j=0; j<this->n(); ++j)
+        {
+          s.width(w);
+          s.precision(p);
+          s << this->el(i,j);
+        }
+      s << std::endl;
+    }
+
+  // reset output format
+  s.precision(old_precision);
+  s.width(old_width);
+}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/full_matrix.templates.h b/include/deal.II/lac/full_matrix.templates.h
new file mode 100644
index 0000000..e2311be
--- /dev/null
+++ b/include/deal.II/lac/full_matrix.templates.h
@@ -0,0 +1,1865 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__full_matrix_templates_h
+#define dealii__full_matrix_templates_h
+
+
+// TODO: this file has a lot of operations between matrices and matrices or
+// matrices and vectors of different precision. we should go through the
+// file and in each case pick the more accurate data type for intermediate
+// results. currently, the choice is pretty much random. this may also allow
+// us some operations where one operand is complex and the other is not
+// -> use ProductType<T,U> type trait for the results
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/lapack_full_matrix.h>
+#include <deal.II/lac/lapack_templates.h>
+
+#include <vector>
+#include <cmath>
+#include <cstdlib>
+#include <cstdio>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number>
+FullMatrix<number>::FullMatrix (const size_type n)
+  :
+  Table<2,number> (n,n)
+{}
+
+
+template <typename number>
+FullMatrix<number>::FullMatrix (const size_type m,
+                                const size_type n)
+  :
+  Table<2,number> (m, n)
+{}
+
+
+template <typename number>
+FullMatrix<number>::FullMatrix (const size_type m,
+                                const size_type n,
+                                const number *entries)
+  :
+  Table<2,number> (m, n)
+{
+  this->fill (entries);
+}
+
+
+template <typename number>
+FullMatrix<number>::FullMatrix (const FullMatrix &m)
+  :
+  Table<2,number> (m)
+{}
+
+
+
+template <typename number>
+FullMatrix<number>::FullMatrix (const IdentityMatrix &id)
+  :
+  Table<2,number> (id.m(), id.n())
+{
+  for (size_type i=0; i<id.m(); ++i)
+    (*this)(i,i) = 1;
+}
+
+
+template <typename number>
+FullMatrix<number> &
+FullMatrix<number>::operator = (const FullMatrix<number> &M)
+{
+  Table<2,number>::operator=(M);
+  return *this;
+}
+
+
+template <typename number>
+template <typename number2>
+FullMatrix<number> &
+FullMatrix<number>::operator = (const FullMatrix<number2> &M)
+{
+  TableBase<2,number>::operator=(M);
+  return *this;
+}
+
+
+
+template <typename number>
+FullMatrix<number> &
+FullMatrix<number>::operator = (const IdentityMatrix &id)
+{
+  this->reinit (id.m(), id.n());
+  for (size_type i=0; i<id.m(); ++i)
+    (*this)(i,i) = 1.;
+
+  return *this;
+}
+
+
+
+template <typename number>
+template <typename number2>
+FullMatrix<number> &
+FullMatrix<number>::operator = (const LAPACKFullMatrix<number2> &M)
+{
+  Assert (this->m() == M.n_rows(), ExcDimensionMismatch(this->m(), M.n_rows()));
+  Assert (this->n() == M.n_cols(), ExcDimensionMismatch(this->n(), M.n_rows()));
+  for (size_type i=0; i<this->m(); ++i)
+    for (size_type j=0; j<this->n(); ++j)
+      (*this)(i,j) = M(i,j);
+
+  return *this;
+}
+
+
+
+template <typename number>
+bool
+FullMatrix<number>::all_zero () const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  const number *p = &this->values[0];
+  const number *const e = &this->values[0] + this->n_elements();
+  while (p!=e)
+    if (*p++ != number(0.0))
+      return false;
+
+  return true;
+}
+
+
+
+template <typename number>
+FullMatrix<number> &
+FullMatrix<number>::operator *= (const number factor)
+{
+
+  AssertIsFinite(factor);
+
+  number       *p = &(*this)(0,0);
+  const number *e = &(*this)(0,0) + n()*m();
+  while (p != e)
+    *p++ *= factor;
+
+  return *this;
+}
+
+
+
+template <typename number>
+FullMatrix<number> &
+FullMatrix<number>::operator /= (const number factor)
+{
+
+  AssertIsFinite(factor);
+
+  number       *p = &(*this)(0,0);
+  const number *e = &(*this)(0,0) + n()*m();
+
+  const number factor_inv = number(1.)/factor;
+
+  AssertIsFinite(factor_inv);
+
+  while (p != e)
+    *p++ *= factor_inv;
+
+  return *this;
+}
+
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::vmult (Vector<number2> &dst,
+                           const Vector<number2> &src,
+                           const bool adding) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert(dst.size() == m(), ExcDimensionMismatch(dst.size(), m()));
+  Assert(src.size() == n(), ExcDimensionMismatch(src.size(), n()));
+
+  Assert (&src != &dst, ExcSourceEqualsDestination());
+
+  const number *e = &this->values[0];
+  // get access to the data in order to
+  // avoid copying it when using the ()
+  // operator
+  const number2 *src_ptr = &(*const_cast<Vector<number2>*>(&src))(0);
+  const size_type size_m = m(), size_n = n();
+  for (size_type i=0; i<size_m; ++i)
+    {
+      number2 s = adding ? dst(i) : 0.;
+      for (size_type j=0; j<size_n; ++j)
+        s += src_ptr[j] * number2(*(e++));
+      dst(i) = s;
+    }
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::Tvmult (Vector<number2>       &dst,
+                                 const Vector<number2> &src,
+                                 const bool             adding) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert(dst.size() == n(), ExcDimensionMismatch(dst.size(), n()));
+  Assert(src.size() == m(), ExcDimensionMismatch(src.size(), m()));
+
+  Assert (&src != &dst, ExcSourceEqualsDestination());
+
+  const number *e = &this->values[0];
+  number2 *dst_ptr = &dst(0);
+  const size_type size_m = m(), size_n = n();
+
+  // zero out data if we are not adding
+  if (!adding)
+    for (size_type j=0; j<size_n; ++j)
+      dst_ptr[j] = 0.;
+
+  // write the loop in a way that we can
+  // access the data contiguously
+  for (size_type i=0; i<size_m; ++i)
+    {
+      const number2 d = src(i);
+      for (size_type j=0; j<size_n; ++j)
+        dst_ptr[j] += d * number2(*(e++));
+    };
+}
+
+
+template <typename number>
+template <typename number2, typename number3>
+number FullMatrix<number>::residual (Vector<number2> &dst,
+                                     const Vector<number2> &src,
+                                     const Vector<number3> &right) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert(dst.size() == m(), ExcDimensionMismatch(dst.size(), m()));
+  Assert(src.size() == n(), ExcDimensionMismatch(src.size(), n()));
+  Assert(right.size() == m(), ExcDimensionMismatch(right.size(), m()));
+
+  Assert (&src != &dst, ExcSourceEqualsDestination());
+
+  number res = 0.;
+  const size_type size_m = m(),
+                  size_n = n();
+  for (size_type i=0; i<size_n; ++i)
+    {
+      number s = number(right(i));
+      for (size_type j=0; j<size_m; ++j)
+        s -= number(src(j)) * (*this)(i,j);
+      dst(i) = s;
+      res += s*s;
+    }
+  return std::sqrt(res);
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::forward (Vector<number2>       &dst,
+                                  const Vector<number2> &src) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (dst.size() == m(), ExcDimensionMismatch(dst.size(), m()));
+  Assert (src.size() == n(), ExcDimensionMismatch(src.size(), n()));
+
+  size_type i,j;
+  size_type nu = ( (m()<n()) ? m() : n());
+  for (i=0; i<nu; ++i)
+    {
+      number s = number(src(i));
+      for (j=0; j<i; ++j)
+        s -= number(dst(j)) * (*this)(i,j);
+      dst(i) = s/(*this)(i,i);
+      AssertIsFinite(dst(i));
+    }
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::backward (Vector<number2>       &dst,
+                                   const Vector<number2> &src) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  size_type j;
+  size_type nu = (m()<n() ? m() : n());
+  for (int i=nu-1; i>=0; --i)
+    {
+      number2 s = src(i);
+      for (j=i+1; j<nu; ++j)
+        s -= dst(j) * number2((*this)(i,j));
+      dst(i) = s/number2((*this)(i,i));
+      AssertIsFinite(dst(i));
+    }
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::fill (const FullMatrix<number2> &src,
+                               const size_type dst_offset_i,
+                               const size_type dst_offset_j,
+                               const size_type src_offset_i,
+                               const size_type src_offset_j)
+{
+  Assert (dst_offset_i < m(),
+          ExcIndexRange (dst_offset_i, 0, m()));
+  Assert (dst_offset_j < n(),
+          ExcIndexRange (dst_offset_j, 0, n()));
+  Assert (src_offset_i < src.m(),
+          ExcIndexRange (src_offset_i, 0, src.m()));
+  Assert (src_offset_j < src.n(),
+          ExcIndexRange (src_offset_j, 0, src.n()));
+
+  // Compute maximal size of copied block
+  const size_type rows = std::min (m() - dst_offset_i,
+                                   src.m() - src_offset_i);
+  const size_type cols = std::min (n() - dst_offset_j,
+                                   src.n() - src_offset_j);
+
+  for (size_type i=0; i<rows ; ++i)
+    for (size_type j=0; j<cols ; ++j)
+      (*this)(dst_offset_i+i,dst_offset_j+j)
+        = src(src_offset_i+i,src_offset_j+j);
+}
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::fill_permutation (const FullMatrix<number2> &src,
+                                           const std::vector<size_type> &p_rows,
+                                           const std::vector<size_type> &p_cols)
+{
+  Assert (p_rows.size() == this->n_rows(),
+          ExcDimensionMismatch (p_rows.size(), this->n_rows()));
+  Assert (p_cols.size() == this->n_cols(),
+          ExcDimensionMismatch (p_cols.size(), this->n_cols()));
+
+  for (size_type i=0; i<this->n_rows(); ++i)
+    for (size_type j=0; j<this->n_cols(); ++j)
+      (*this)(i,j) = src(p_rows[i], p_cols[j]);
+}
+
+
+
+template <typename number>
+void FullMatrix<number>::add_row (const size_type i,
+                                  const number s,
+                                  const size_type j)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  for (size_type k=0; k<m(); ++k)
+    (*this)(i,k) += s*(*this)(j,k);
+}
+
+
+template <typename number>
+void FullMatrix<number>::add_row (const size_type i,
+                                  const number s,
+                                  const size_type j,
+                                  const number t,
+                                  const size_type k)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  const size_type size_m = m();
+  for (size_type l=0; l<size_m; ++l)
+    (*this)(i,l) += s*(*this)(j,l) + t*(*this)(k,l);
+}
+
+
+template <typename number>
+void FullMatrix<number>::add_col (const size_type i, const number s,
+                                  const size_type j)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  for (size_type k=0; k<n(); ++k)
+    (*this)(k,i) += s*(*this)(k,j);
+}
+
+
+template <typename number>
+void FullMatrix<number>::add_col (const size_type i, const number s,
+                                  const size_type j, const number t,
+                                  const size_type k)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  for (size_t l=0; l<n(); ++l)
+    (*this)(l,i) += s*(*this)(l,j) + t*(*this)(l,k);
+}
+
+
+
+template <typename number>
+void FullMatrix<number>::swap_row (const size_type i,
+                                   const size_type j)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  for (size_type k=0; k<n(); ++k)
+    std::swap ((*this)(i,k),
+               (*this)(j,k));
+}
+
+
+template <typename number>
+void FullMatrix<number>::swap_col (const size_type i,
+                                   const size_type j)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  for (size_type k=0; k<m(); ++k)
+    std::swap ((*this)(k,i),
+               (*this)(k,j));
+}
+
+
+template <typename number>
+void FullMatrix<number>::diagadd (const number src)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert (m() == n(), ExcDimensionMismatch(m(),n()));
+
+  for (size_type i=0; i<n(); ++i)
+    (*this)(i,i) += src;
+}
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::equ (const number               a,
+                              const FullMatrix<number2> &A)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (m() == A.m(), ExcDimensionMismatch(m(), A.m()));
+  Assert (n() == A.n(), ExcDimensionMismatch(n(), A.n()));
+
+  for (size_type i=0; i<m(); ++i)
+    for (size_type j=0; j<n(); ++j)
+      (*this)(i,j) = a * number(A(i,j));
+}
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::equ (const number               a,
+                         const FullMatrix<number2> &A,
+                         const number               b,
+                         const FullMatrix<number2> &B)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (m() == A.m(), ExcDimensionMismatch(m(), A.m()));
+  Assert (n() == A.n(), ExcDimensionMismatch(n(), A.n()));
+  Assert (m() == B.m(), ExcDimensionMismatch(m(), B.m()));
+  Assert (n() == B.n(), ExcDimensionMismatch(n(), B.n()));
+
+  for (size_type i=0; i<m(); ++i)
+    for (size_type j=0; j<n(); ++j)
+      (*this)(i,j) = a * number(A(i,j)) + b * number(B(i,j));
+}
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::equ (const number               a,
+                         const FullMatrix<number2> &A,
+                         const number               b,
+                         const FullMatrix<number2> &B,
+                         const number               c,
+                         const FullMatrix<number2> &C)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (m() == A.m(), ExcDimensionMismatch(m(), A.m()));
+  Assert (n() == A.n(), ExcDimensionMismatch(n(), A.n()));
+  Assert (m() == B.m(), ExcDimensionMismatch(m(), B.m()));
+  Assert (n() == B.n(), ExcDimensionMismatch(n(), B.n()));
+  Assert (m() == C.m(), ExcDimensionMismatch(m(), C.m()));
+  Assert (n() == C.n(), ExcDimensionMismatch(n(), C.n()));
+
+  for (size_type i=0; i<m(); ++i)
+    for (size_type j=0; j<n(); ++j)
+      (*this)(i,j) = a * number(A(i,j)) +
+                     b * number(B(i,j)) +
+                     c * number(C(i,j));
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::mmult (FullMatrix<number2>       &dst,
+                                const FullMatrix<number2> &src,
+                                const bool                 adding) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert (n() == src.m(), ExcDimensionMismatch(n(), src.m()));
+  Assert (dst.n() == src.n(), ExcDimensionMismatch(dst.n(), src.n()));
+  Assert (dst.m() == m(), ExcDimensionMismatch(m(), dst.m()));
+
+  // see if we can use BLAS algorithms for this and if the type for 'number'
+  // works for us (it is usually not efficient to use BLAS for very small
+  // matrices):
+#ifdef DEAL_II_WITH_LAPACK
+  if ((types_are_equal<number,double>::value
+       ||
+       types_are_equal<number,float>::value)
+      &&
+      types_are_equal<number,number2>::value)
+    if (this->n()*this->m()*src.n() > 300)
+      {
+        // In case we have the BLAS function gemm detected by CMake, we
+        // use that algorithm for matrix-matrix multiplication since it
+        // provides better performance than the deal.II native function (it
+        // uses cache and register blocking in order to access local data).
+        //
+        // Note that BLAS/LAPACK stores matrix elements column-wise (i.e., all
+        // values in one column, then all in the next, etc.), whereas the
+        // FullMatrix stores them row-wise.  We ignore that difference, and
+        // give our row-wise data to BLAS, let BLAS build the product of
+        // transpose matrices, and read the result as if it were row-wise
+        // again. In other words, we calculate (B^T A^T)^T, which is AB.
+
+        const int m = src.n();
+        const int n = this->m();
+        const int k = this->n();
+        const char *notrans = "n";
+
+        const number alpha = 1.;
+        const number beta = (adding == true) ? 1. : 0.;
+
+        // Use the BLAS function gemm for calculating the matrix-matrix
+        // product.
+        gemm(notrans, notrans, &m, &n, &k, &alpha, &src(0,0), &m,
+             &this->values[0], &k, &beta, &dst(0,0), &m);
+
+        return;
+      }
+
+#endif
+
+  const size_type m = this->m(), n = src.n(), l = this->n();
+
+  // arrange the loops in a way that we keep write operations low, (writing is
+  // usually more costly than reading), even though we need to access the data
+  // in src not in a contiguous way.
+  for (size_type i=0; i<m; i++)
+    for (size_type j=0; j<n; j++)
+      {
+        number2 add_value = adding ? dst(i,j) : 0.;
+        for (size_type k=0; k<l; k++)
+          add_value += (number2)(*this)(i,k) * (number2)(src(k,j));
+        dst(i,j) = add_value;
+      }
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::Tmmult (FullMatrix<number2>       &dst,
+                                 const FullMatrix<number2> &src,
+                                 const bool                 adding) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert (m() == src.m(), ExcDimensionMismatch(m(), src.m()));
+  Assert (n() == dst.m(), ExcDimensionMismatch(n(), dst.m()));
+  Assert (src.n() == dst.n(), ExcDimensionMismatch(src.n(), dst.n()));
+
+
+  // see if we can use BLAS algorithms for this and if the type for 'number'
+  // works for us (it is usually not efficient to use BLAS for very small
+  // matrices):
+#ifdef DEAL_II_WITH_LAPACK
+  if ((types_are_equal<number,double>::value
+       ||
+       types_are_equal<number,float>::value)
+      &&
+      types_are_equal<number,number2>::value)
+    if (this->n()*this->m()*src.n() > 300)
+      {
+        // In case we have the BLAS function gemm detected by CMake, we
+        // use that algorithm for matrix-matrix multiplication since it
+        // provides better performance than the deal.II native function (it
+        // uses cache and register blocking in order to access local data).
+        //
+        // Note that BLAS/LAPACK stores matrix elements column-wise (i.e., all
+        // values in one column, then all in the next, etc.), whereas the
+        // FullMatrix stores them row-wise.  We ignore that difference, and
+        // give our row-wise data to BLAS, let BLAS build the product of
+        // transpose matrices, and read the result as if it were row-wise
+        // again. In other words, we calculate (B^T A)^T, which is A^T B.
+
+        const int m = src.n();
+        const int n = this->n();
+        const int k = this->m();
+        const char *trans = "t";
+        const char *notrans = "n";
+
+        const number alpha = 1.;
+        const number beta = (adding == true) ? 1. : 0.;
+
+        // Use the BLAS function gemm for calculating the matrix-matrix
+        // product.
+        gemm(notrans, trans, &m, &n, &k, &alpha, &src(0,0), &m,
+             &this->values[0], &n, &beta, &dst(0,0), &m);
+
+        return;
+      }
+
+#endif
+
+  const size_type m = n(), n = src.n(), l = this->m();
+
+  // symmetric matrix if the two matrices are the same
+  if (PointerComparison::equal(this, &src))
+    for (size_type i=0; i<m; ++i)
+      for (size_type j=i; j<m; ++j)
+        {
+          number2 add_value = 0.;
+          for (size_type k=0; k<l; ++k)
+            add_value += (number2)(*this)(k,i) * (number2)(*this)(k,j);
+          if (adding)
+            {
+              dst(i,j) += add_value;
+              if (i<j)
+                dst(j,i) += add_value;
+            }
+          else
+            dst(i,j) = dst(j,i) = add_value;
+        }
+  // arrange the loops in a way that we keep write operations low, (writing is
+  // usually more costly than reading), even though we need to access the data
+  // in src not in a contiguous way. However, we should usually end up in the
+  // optimized gemm operation in case the matrix is big, so this shouldn't be
+  // too bad.
+  else
+    for (size_type i=0; i<m; i++)
+      for (size_type j=0; j<n; j++)
+        {
+          number2 add_value = adding ? dst(i,j) : 0.;
+          for (size_type k=0; k<l; k++)
+            add_value += (number2)(*this)(k,i) * (number2)(src(k,j));
+          dst(i,j) = add_value;
+        }
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::mTmult (FullMatrix<number2>       &dst,
+                                 const FullMatrix<number2> &src,
+                                 const bool                 adding) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert (n() == src.n(), ExcDimensionMismatch(n(), src.n()));
+  Assert (dst.n() == src.m(), ExcDimensionMismatch(dst.n(), src.m()));
+  Assert (dst.m() == m(), ExcDimensionMismatch(m(), dst.m()));
+
+  // see if we can use BLAS algorithms for this and if the type for 'number'
+  // works for us (it is usually not efficient to use BLAS for very small
+  // matrices):
+#ifdef DEAL_II_WITH_LAPACK
+  if ((types_are_equal<number,double>::value
+       ||
+       types_are_equal<number,float>::value)
+      &&
+      types_are_equal<number,number2>::value)
+    if (this->n()*this->m()*src.m() > 300)
+      {
+        // In case we have the BLAS function gemm detected by CMake, we
+        // use that algorithm for matrix-matrix multiplication since it
+        // provides better performance than the deal.II native function (it
+        // uses cache and register blocking in order to access local data).
+        //
+        // Note that BLAS/LAPACK stores matrix elements column-wise (i.e., all
+        // values in one column, then all in the next, etc.), whereas the
+        // FullMatrix stores them row-wise.  We ignore that difference, and
+        // give our row-wise data to BLAS, let BLAS build the product of
+        // transpose matrices, and read the result as if it were row-wise
+        // again. In other words, we calculate (B A^T)^T, which is AB^T.
+
+        const int m = src.m();
+        const int n = this->m();
+        const int k = this->n();
+        const char *notrans = "n";
+        const char *trans = "t";
+
+        const number alpha = 1.;
+        const number beta = (adding == true) ? 1. : 0.;
+
+        // Use the BLAS function gemm for calculating the matrix-matrix
+        // product.
+        gemm(trans, notrans, &m, &n, &k, &alpha, &src(0,0), &k,
+             &this->values[0], &k, &beta, &dst(0,0), &m);
+
+        return;
+      }
+
+#endif
+
+  const size_type m = this->m(), n = src.m(), l = this->n();
+
+  // symmetric matrix if the two matrices are the same
+  if (PointerComparison::equal(this, &src))
+    for (size_type i=0; i<m; ++i)
+      for (size_type j=i; j<m; ++j)
+        {
+          number2 add_value = 0.;
+          for (size_type k=0; k<l; ++k)
+            add_value += (number2)(*this)(i,k) * (number2)(*this)(j,k);
+          if (adding)
+            {
+              dst(i,j) += add_value;
+              if (i<j)
+                dst(j,i) += add_value;
+            }
+          else
+            dst(i,j) = dst(j,i) = add_value;
+        }
+  else
+    // arrange the loops in a way that we keep write operations low, (writing is
+    // usually more costly than reading).
+    for (size_type i=0; i<m; i++)
+      for (size_type j=0; j<n; j++)
+        {
+          number2 add_value = adding ? dst(i,j) : 0.;
+          for (size_type k=0; k<l; k++)
+            add_value += (number2)(*this)(i,k) * (number2)(src(j,k));
+          dst(i,j) = add_value;
+        }
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::TmTmult (FullMatrix<number2>       &dst,
+                                  const FullMatrix<number2> &src,
+                                  const bool                 adding) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert (m() == src.n(), ExcDimensionMismatch(m(), src.n()));
+  Assert (n() == dst.m(), ExcDimensionMismatch(n(), dst.m()));
+  Assert (src.m() == dst.n(), ExcDimensionMismatch(src.m(), dst.n()));
+
+
+  // see if we can use BLAS algorithms for this and if the type for 'number'
+  // works for us (it is usually not efficient to use BLAS for very small
+  // matrices):
+#ifdef DEAL_II_WITH_LAPACK
+  if ((types_are_equal<number,double>::value
+       ||
+       types_are_equal<number,float>::value)
+      &&
+      types_are_equal<number,number2>::value)
+    if (this->n()*this->m()*src.m() > 300)
+      {
+        // In case we have the BLAS function gemm detected by CMake, we
+        // use that algorithm for matrix-matrix multiplication since it
+        // provides better performance than the deal.II native function (it
+        // uses cache and register blocking in order to access local data).
+        //
+        // Note that BLAS/LAPACK stores matrix elements column-wise (i.e., all
+        // values in one column, then all in the next, etc.), whereas the
+        // FullMatrix stores them row-wise.  We ignore that difference, and
+        // give our row-wise data to BLAS, let BLAS build the product of
+        // transpose matrices, and read the result as if it were row-wise
+        // again. In other words, we calculate (B A)^T, which is A^T B^T.
+
+        const int m = src.m();
+        const int n = this->n();
+        const int k = this->m();
+        const char *trans = "t";
+
+        const number alpha = 1.;
+        const number beta = (adding == true) ? 1. : 0.;
+
+        // Use the BLAS function gemm for calculating the matrix-matrix
+        // product.
+        gemm(trans, trans, &m, &n, &k, &alpha, &src(0,0), &k,
+             &this->values[0], &n, &beta, &dst(0,0), &m);
+
+        return;
+      }
+
+#endif
+
+  const size_type m = n(), n = src.m(), l = this->m();
+
+  // arrange the loops in a way that we keep write operations low, (writing is
+  // usually more costly than reading), even though we need to access the data
+  // in the calling matrix in a non-contiguous way, possibly leading to cache
+  // misses. However, we should usually end up in the optimized gemm operation
+  // in case the matrix is big, so this shouldn't be too bad.
+  for (size_type i=0; i<m; i++)
+    for (size_type j=0; j<n; j++)
+      {
+        number2 add_value = adding ? dst(i,j) : 0.;
+        for (size_type k=0; k<l; k++)
+          add_value += (number2)(*this)(k,i) * (number2)(src(j,k));
+        dst(i,j) = add_value;
+      }
+}
+
+
+template <typename number>
+void
+FullMatrix<number>::triple_product(
+  const FullMatrix<number> &A,
+  const FullMatrix<number> &B,
+  const FullMatrix<number> &D,
+  const bool transpose_B,
+  const bool transpose_D,
+  const number scaling)
+{
+  if (transpose_B)
+    {
+      AssertDimension(B.m(), A.m());
+      AssertDimension(B.n(), m());
+    }
+  else
+    {
+      AssertDimension(B.n(), A.m());
+      AssertDimension(B.m(), m());
+    }
+  if (transpose_D)
+    {
+      AssertDimension(D.n(), A.n());
+      AssertDimension(D.m(), n());
+    }
+  else
+    {
+      AssertDimension(D.m(), A.n());
+      AssertDimension(D.n(), n());
+    }
+
+  // For all entries of the product
+  // AD
+  for (size_type i=0; i<A.m(); ++i)
+    for (size_type j=0; j<n(); ++j)
+      {
+        // Compute the entry
+        number ADij = 0.;
+        if (transpose_D)
+          for (size_type k=0; k<A.n(); ++k)
+            ADij += A(i,k)*D(j,k);
+        else
+          for (size_type k=0; k<A.n(); ++k)
+            ADij += A(i,k)*D(k,j);
+        // And add it to this after
+        // multiplying with the right
+        // factor from B
+        if (transpose_B)
+          for (size_type k=0; k<m(); ++k)
+            this->operator()(k,j) += scaling * ADij * B(i,k);
+        else
+          for (size_type k=0; k<m(); ++k)
+            this->operator()(k,j) += scaling * ADij * B(k,i);
+      }
+}
+
+
+template <typename number>
+template <typename number2>
+number2
+FullMatrix<number>::matrix_norm_square (const Vector<number2> &v) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert(m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert(n() == v.size(), ExcDimensionMismatch(n(),v.size()));
+
+  number2 sum = 0.;
+  const size_type n_rows = m();
+  const number *val_ptr = &this->values[0];
+  const number2 *v_ptr;
+
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      number s = 0.;
+      const number *const val_end_of_row = val_ptr+n_rows;
+      v_ptr = v.begin();
+      while (val_ptr != val_end_of_row)
+        s += number(*val_ptr++) * number(*v_ptr++);
+
+      sum += s * number(numbers::NumberTraits<number2>::conjugate(v(row)));
+    }
+
+  return sum;
+}
+
+
+template <typename number>
+template <typename number2>
+number2
+FullMatrix<number>::matrix_scalar_product (const Vector<number2> &u,
+                                           const Vector<number2> &v) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert(m() == u.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert(n() == v.size(), ExcDimensionMismatch(n(),v.size()));
+
+  number2 sum = 0.;
+  const size_type n_rows = m();
+  const size_type n_cols = n();
+  const number *val_ptr = &this->values[0];
+  const number2 *v_ptr;
+
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      number s = 0.;
+      const number *const val_end_of_row = val_ptr+n_cols;
+      v_ptr = v.begin();
+      while (val_ptr != val_end_of_row)
+        s += number(*val_ptr++) * number(*v_ptr++);
+
+      sum += s * number(u(row));
+    }
+
+  return sum;
+}
+
+
+
+template <typename number>
+void
+FullMatrix<number>::symmetrize ()
+{
+  Assert (m() == n(), ExcNotQuadratic());
+
+  const size_type N = m();
+  for (size_type i=0; i<N; ++i)
+    for (size_type j=i+1; j<N; ++j)
+      {
+        const number t = ((*this)(i,j) + (*this)(j,i)) / number(2.);
+        (*this)(i,j) = (*this)(j,i) = t;
+      };
+}
+
+
+template <typename number>
+typename FullMatrix<number>::real_type
+FullMatrix<number>::l1_norm () const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  real_type sum=0, max=0;
+  const size_type n_rows = m(), n_cols = n();
+
+  for (size_type col=0; col<n_cols; ++col)
+    {
+      sum=0;
+      for (size_type row=0; row<n_rows; ++row)
+        sum += std::abs((*this)(row,col));
+      if (sum > max)
+        max = sum;
+    }
+  return max;
+}
+
+
+
+template <typename number>
+typename FullMatrix<number>::real_type
+FullMatrix<number>::linfty_norm () const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  real_type sum=0, max=0;
+  const size_type n_rows = m(), n_cols = n();
+
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      sum=0;
+      for (size_type col=0; col<n_cols; ++col)
+        sum += std::abs((*this)(row,col));
+      if (sum > max)
+        max = sum;
+    }
+  return max;
+}
+
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::add (const number               a,
+                         const FullMatrix<number2> &A)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (m() == A.m(), ExcDimensionMismatch(m(), A.m()));
+  Assert (n() == A.n(), ExcDimensionMismatch(n(), A.n()));
+
+  for (size_type i=0; i<m(); ++i)
+    for (size_type j=0; j<n(); ++j)
+      (*this)(i,j) += a * number(A(i,j));
+}
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::add (const number               a,
+                         const FullMatrix<number2> &A,
+                         const number               b,
+                         const FullMatrix<number2> &B)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (m() == A.m(), ExcDimensionMismatch(m(), A.m()));
+  Assert (n() == A.n(), ExcDimensionMismatch(n(), A.n()));
+  Assert (m() == B.m(), ExcDimensionMismatch(m(), B.m()));
+  Assert (n() == B.n(), ExcDimensionMismatch(n(), B.n()));
+
+  for (size_type i=0; i<m(); ++i)
+    for (size_type j=0; j<n(); ++j)
+      (*this)(i,j) += a * number(A(i,j)) + b * number(B(i,j));
+}
+
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::add (const number               a,
+                         const FullMatrix<number2> &A,
+                         const number               b,
+                         const FullMatrix<number2> &B,
+                         const number               c,
+                         const FullMatrix<number2> &C)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (m() == A.m(), ExcDimensionMismatch(m(), A.m()));
+  Assert (n() == A.n(), ExcDimensionMismatch(n(), A.n()));
+  Assert (m() == B.m(), ExcDimensionMismatch(m(), B.m()));
+  Assert (n() == B.n(), ExcDimensionMismatch(n(), B.n()));
+  Assert (m() == C.m(), ExcDimensionMismatch(m(), C.m()));
+  Assert (n() == C.n(), ExcDimensionMismatch(n(), C.n()));
+
+
+  for (size_type i=0; i<m(); ++i)
+    for (size_type j=0; j<n(); ++j)
+      (*this)(i,j) += a * number(A(i,j)) +
+                      b * number(B(i,j)) +
+                      c * number(C(i,j));
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::add (const FullMatrix<number2> &src,
+                              const number factor,
+                              const size_type dst_offset_i,
+                              const size_type dst_offset_j,
+                              const size_type src_offset_i,
+                              const size_type src_offset_j)
+{
+  Assert (dst_offset_i < m(),
+          ExcIndexRange (dst_offset_i, 0, m()));
+  Assert (dst_offset_j < n(),
+          ExcIndexRange (dst_offset_j, 0, n()));
+  Assert (src_offset_i < src.m(),
+          ExcIndexRange (src_offset_i, 0, src.m()));
+  Assert (src_offset_j < src.n(),
+          ExcIndexRange (src_offset_j, 0, src.n()));
+
+  // Compute maximal size of copied block
+  const size_type rows = std::min (m() - dst_offset_i, src.m() - src_offset_i);
+  const size_type cols = std::min (n() - dst_offset_j, src.n() - src_offset_j);
+
+  for (size_type i=0; i<rows ; ++i)
+    for (size_type j=0; j<cols ; ++j)
+      (*this)(dst_offset_i+i,dst_offset_j+j)
+      += factor * number(src(src_offset_i+i,src_offset_j+j));
+}
+
+
+
+template <typename number>
+template <typename number2>
+void FullMatrix<number>::Tadd (const FullMatrix<number2> &src,
+                               const number factor,
+                               const size_type dst_offset_i,
+                               const size_type dst_offset_j,
+                               const size_type src_offset_i,
+                               const size_type src_offset_j)
+{
+  Assert (dst_offset_i < m(),
+          ExcIndexRange (dst_offset_i, 0, m()));
+  Assert (dst_offset_j < n(),
+          ExcIndexRange (dst_offset_j, 0, n()));
+  Assert (src_offset_i < src.n(),
+          ExcIndexRange (src_offset_i, 0, src.n()));
+  Assert (src_offset_j < src.m(),
+          ExcIndexRange (src_offset_j, 0, src.m()));
+
+  // Compute maximal size of copied block
+  const size_type rows = std::min (m() - dst_offset_i, src.n() - src_offset_j);
+  const size_type cols = std::min (n() - dst_offset_j,
+                                   src.m() - src_offset_i);
+
+
+  for (size_type i=0; i<rows ; ++i)
+    for (size_type j=0; j<cols ; ++j)
+      (*this)(dst_offset_i+i,dst_offset_j+j)
+      += factor * number(src(src_offset_i+j,src_offset_j+i));
+}
+
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::Tadd (const number a,
+                          const FullMatrix<number2> &A)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (m() == n(),     ExcNotQuadratic());
+  Assert (m() == A.m(), ExcDimensionMismatch(m(), A.m()));
+  Assert (n() == A.n(), ExcDimensionMismatch(n(), A.n()));
+
+  for (size_type i=0; i<n(); ++i)
+    for (size_type j=0; j<m(); ++j)
+      (*this)(i,j) += a * number(A(j,i));
+}
+
+
+template <typename number>
+bool
+FullMatrix<number>::operator == (const FullMatrix<number> &M) const
+{
+  // simply pass down to the base class
+  return Table<2,number>::operator==(M);
+}
+
+
+template <typename number>
+number
+FullMatrix<number>::determinant () const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (this->n_cols() == this->n_rows(),
+          ExcDimensionMismatch(this->n_cols(), this->n_rows()));
+
+  switch (this->n_cols())
+    {
+    case 1:
+      return (*this)(0,0);
+    case 2:
+      return (*this)(0,0)*(*this)(1,1) - (*this)(1,0)*(*this)(0,1);
+    case 3:
+      return  ((*this)(0,0)*(*this)(1,1)*(*this)(2,2)
+               -(*this)(0,0)*(*this)(1,2)*(*this)(2,1)
+               -(*this)(1,0)*(*this)(0,1)*(*this)(2,2)
+               +(*this)(1,0)*(*this)(0,2)*(*this)(2,1)
+               +(*this)(2,0)*(*this)(0,1)*(*this)(1,2)
+               -(*this)(2,0)*(*this)(0,2)*(*this)(1,1));
+    default:
+      Assert (false, ExcNotImplemented());
+      return 0;
+    };
+}
+
+
+
+template <typename number>
+number
+FullMatrix<number>::trace () const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (this->n_cols() == this->n_rows(),
+          ExcDimensionMismatch(this->n_cols(), this->n_rows()));
+
+  number tr = 0;
+  for (size_type i=0; i<this->n_rows(); ++i)
+    tr += (*this)(i,i);
+
+  return tr;
+}
+
+
+
+template <typename number>
+typename FullMatrix<number>::real_type
+FullMatrix<number>::frobenius_norm () const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  real_type s = 0.;
+  for (size_type i=0; i<this->n_rows()*this->n_cols(); ++i)
+    s += numbers::NumberTraits<number>::abs_square(this->values[i]);
+  return std::sqrt(s);
+}
+
+
+
+template <typename number>
+typename FullMatrix<number>::real_type
+FullMatrix<number>::relative_symmetry_norm2 () const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  real_type s = 0.;
+  real_type a = 0.;
+  for (size_type i=0; i<this->n_rows(); ++i)
+    for (size_type j=0; j<this->n_cols(); ++j)
+      {
+        const number x_ij = (*this)(i,j);
+        const number x_ji = (*this)(j,i);
+
+        a += numbers::NumberTraits<number>::abs_square(x_ij-x_ji);
+        s += numbers::NumberTraits<number>::abs_square(x_ij);
+      }
+
+  if (s!=0.)
+    return std::sqrt(a)/std::sqrt(s);
+  return 0;
+}
+
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::invert (const FullMatrix<number2> &M)
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+
+  Assert (this->n_cols() == this->n_rows(),
+          ExcNotQuadratic());
+  Assert (this->n_cols() == M.n_cols(),
+          ExcDimensionMismatch(this->n_cols(), M.n_cols()));
+  Assert (this->n_rows() == M.n_rows(),
+          ExcDimensionMismatch(this->n_rows(), M.n_rows()));
+
+  if (PointerComparison::equal(&M, this))
+    {
+      // avoid overwriting source
+      // by destination matrix:
+      FullMatrix<number2> M2 = M;
+      invert(M2);
+    }
+  else
+    switch (this->n_cols())
+      {
+      case 1:
+        (*this)(0,0) = number2(1.0)/M(0,0);
+        return;
+      case 2:
+        // this is Maple output,
+        // thus a bit unstructured
+      {
+        const number2 t4 = number2(1.0)/(M(0,0)*M(1,1)-M(0,1)*M(1,0));
+        (*this)(0,0) = M(1,1)*t4;
+        (*this)(0,1) = -M(0,1)*t4;
+        (*this)(1,0) = -M(1,0)*t4;
+        (*this)(1,1) = M(0,0)*t4;
+        return;
+      };
+
+      case 3:
+      {
+        const number2 t4 = M(0,0)*M(1,1),
+                      t6 = M(0,0)*M(1,2),
+                      t8 = M(0,1)*M(1,0),
+                      t00 = M(0,2)*M(1,0),
+                      t01 = M(0,1)*M(2,0),
+                      t04 = M(0,2)*M(2,0),
+                      t07 = number2(1.0)/(t4*M(2,2)-t6*M(2,1)-t8*M(2,2)+
+                                          t00*M(2,1)+t01*M(1,2)-t04*M(1,1));
+        (*this)(0,0) = (M(1,1)*M(2,2)-M(1,2)*M(2,1))*t07;
+        (*this)(0,1) = -(M(0,1)*M(2,2)-M(0,2)*M(2,1))*t07;
+        (*this)(0,2) = -(-M(0,1)*M(1,2)+M(0,2)*M(1,1))*t07;
+        (*this)(1,0) = -(M(1,0)*M(2,2)-M(1,2)*M(2,0))*t07;
+        (*this)(1,1) = (M(0,0)*M(2,2)-t04)*t07;
+        (*this)(1,2) = -(t6-t00)*t07;
+        (*this)(2,0) = -(-M(1,0)*M(2,1)+M(1,1)*M(2,0))*t07;
+        (*this)(2,1) = -(M(0,0)*M(2,1)-t01)*t07;
+        (*this)(2,2) = (t4-t8)*t07;
+        return;
+      };
+
+      case 4:
+      {
+        // with (linalg);
+        // a:=matrix(4,4);
+        // evalm(a);
+        // ai:=inverse(a);
+        // readlib(C);
+        // C(ai,optimized,filename=x4);
+
+        const number2 t14 = M(0,0)*M(1,1);
+        const number2 t15 = M(2,2)*M(3,3);
+        const number2 t17 = M(2,3)*M(3,2);
+        const number2 t19 = M(0,0)*M(2,1);
+        const number2 t20 = M(1,2)*M(3,3);
+        const number2 t22 = M(1,3)*M(3,2);
+        const number2 t24 = M(0,0)*M(3,1);
+        const number2 t25 = M(1,2)*M(2,3);
+        const number2 t27 = M(1,3)*M(2,2);
+        const number2 t29 = M(1,0)*M(0,1);
+        const number2 t32 = M(1,0)*M(2,1);
+        const number2 t33 = M(0,2)*M(3,3);
+        const number2 t35 = M(0,3)*M(3,2);
+        const number2 t37 = M(1,0)*M(3,1);
+        const number2 t38 = M(0,2)*M(2,3);
+        const number2 t40 = M(0,3)*M(2,2);
+        const number2 t42 = t14*t15-t14*t17-t19*t20+t19*t22+
+                            t24*t25-t24*t27-t29*t15+t29*t17+
+                            t32*t33-t32*t35-t37*t38+t37*t40;
+        const number2 t43 = M(2,0)*M(0,1);
+        const number2 t46 = M(2,0)*M(1,1);
+        const number2 t49 = M(2,0)*M(3,1);
+        const number2 t50 = M(0,2)*M(1,3);
+        const number2 t52 = M(0,3)*M(1,2);
+        const number2 t54 = M(3,0)*M(0,1);
+        const number2 t57 = M(3,0)*M(1,1);
+        const number2 t60 = M(3,0)*M(2,1);
+        const number2 t63 = t43*t20-t43*t22-t46*t33+t46*t35+
+                            t49*t50-t49*t52-t54*t25+t54*t27+
+                            t57*t38-t57*t40-t60*t50+t60*t52;
+        const number2 t65 = number2(1.)/(t42+t63);
+        const number2 t71 = M(0,2)*M(2,1);
+        const number2 t73 = M(0,3)*M(2,1);
+        const number2 t75 = M(0,2)*M(3,1);
+        const number2 t77 = M(0,3)*M(3,1);
+        const number2 t81 = M(0,1)*M(1,2);
+        const number2 t83 = M(0,1)*M(1,3);
+        const number2 t85 = M(0,2)*M(1,1);
+        const number2 t87 = M(0,3)*M(1,1);
+        const number2 t101 = M(1,0)*M(2,2);
+        const number2 t103 = M(1,0)*M(2,3);
+        const number2 t105 = M(2,0)*M(1,2);
+        const number2 t107 = M(2,0)*M(1,3);
+        const number2 t109 = M(3,0)*M(1,2);
+        const number2 t111 = M(3,0)*M(1,3);
+        const number2 t115 = M(0,0)*M(2,2);
+        const number2 t117 = M(0,0)*M(2,3);
+        const number2 t119 = M(2,0)*M(0,2);
+        const number2 t121 = M(2,0)*M(0,3);
+        const number2 t123 = M(3,0)*M(0,2);
+        const number2 t125 = M(3,0)*M(0,3);
+        const number2 t129 = M(0,0)*M(1,2);
+        const number2 t131 = M(0,0)*M(1,3);
+        const number2 t133 = M(1,0)*M(0,2);
+        const number2 t135 = M(1,0)*M(0,3);
+        (*this)(0,0) = (M(1,1)*M(2,2)*M(3,3)-M(1,1)*M(2,3)*M(3,2)-
+                        M(2,1)*M(1,2)*M(3,3)+M(2,1)*M(1,3)*M(3,2)+
+                        M(3,1)*M(1,2)*M(2,3)-M(3,1)*M(1,3)*M(2,2))*t65;
+        (*this)(0,1) = -(M(0,1)*M(2,2)*M(3,3)-M(0,1)*M(2,3)*M(3,2)-
+                         t71*M(3,3)+t73*M(3,2)+t75*M(2,3)-t77*M(2,2))*t65;
+        (*this)(0,2) = (t81*M(3,3)-t83*M(3,2)-t85*M(3,3)+t87*M(3,2)+
+                        t75*M(1,3)-t77*M(1,2))*t65;
+        (*this)(0,3) = -(t81*M(2,3)-t83*M(2,2)-t85*M(2,3)+t87*M(2,2)+
+                         t71*M(1,3)-t73*M(1,2))*t65;
+        (*this)(1,0) = -(t101*M(3,3)-t103*M(3,2)-t105*M(3,3)+t107*M(3,2)+
+                         t109*M(2,3)-t111*M(2,2))*t65;
+        (*this)(1,1) = (t115*M(3,3)-t117*M(3,2)-t119*M(3,3)+t121*M(3,2)+
+                        t123*M(2,3)-t125*M(2,2))*t65;
+        (*this)(1,2) = -(t129*M(3,3)-t131*M(3,2)-t133*M(3,3)+t135*M(3,2)+
+                         t123*M(1,3)-t125*M(1,2))*t65;
+        (*this)(1,3) = (t129*M(2,3)-t131*M(2,2)-t133*M(2,3)+t135*M(2,2)+
+                        t119*M(1,3)-t121*M(1,2))*t65;
+        (*this)(2,0) = (t32*M(3,3)-t103*M(3,1)-t46*M(3,3)+t107*M(3,1)+
+                        t57*M(2,3)-t111*M(2,1))*t65;
+        (*this)(2,1) = -(t19*M(3,3)-t117*M(3,1)-t43*M(3,3)+t121*M(3,1)+
+                         t54*M(2,3)-t125*M(2,1))*t65;
+        (*this)(2,2) = (t14*M(3,3)-t131*M(3,1)-t29*M(3,3)+t135*M(3,1)+
+                        t54*M(1,3)-t125*M(1,1))*t65;
+        (*this)(2,3) = -(t14*M(2,3)-t131*M(2,1)-t29*M(2,3)+t135*M(2,1)+
+                         t43*M(1,3)-t121*M(1,1))*t65;
+        (*this)(3,0) = -(t32*M(3,2)-t101*M(3,1)-t46*M(3,2)+t105*M(3,1)+
+                         t57*M(2,2)-t109*M(2,1))*t65;
+        (*this)(3,1) = (t19*M(3,2)-t115*M(3,1)-t43*M(3,2)+t119*M(3,1)+
+                        t54*M(2,2)-t123*M(2,1))*t65;
+        (*this)(3,2) = -(t14*M(3,2)-t129*M(3,1)-t29*M(3,2)+t133*M(3,1)+
+                         t54*M(1,2)-t123*M(1,1))*t65;
+        (*this)(3,3) = (t14*M(2,2)-t129*M(2,1)-t29*M(2,2)+t133*M(2,1)+
+                        t43*M(1,2)-t119*M(1,1))*t65;
+
+        break;
+      }
+
+
+      default:
+        // if no inversion is
+        // hardcoded, fall back
+        // to use the
+        // Gauss-Jordan algorithm
+        *this = M;
+        gauss_jordan();
+      };
+}
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::cholesky (const FullMatrix<number2> &A)
+{
+  Assert (!A.empty(), ExcEmptyMatrix());
+  Assert (A.n() == A.m(),
+          ExcNotQuadratic());
+  // Matrix must be symmetric.
+  Assert(A.relative_symmetry_norm2() < 1.0e-10, ExcMessage("A must be symmetric."));
+
+  if (PointerComparison::equal(&A, this))
+    {
+      // avoid overwriting source
+      // by destination matrix:
+      FullMatrix<number2> A2 = A;
+      cholesky(A2);
+    }
+  else
+    {
+      /* reinit *this to 0 */
+      this->reinit(A.m(), A.n());
+
+      double SLik2 = 0.0, SLikLjk = 0.0;
+      for (size_type i=0; i< this->n_cols(); i++)
+        {
+          SLik2 = 0.0;
+          for (size_type j = 0; j < i; j++)
+            {
+              SLikLjk = 0.0;
+              for (size_type k =0; k<j; k++)
+                {
+                  SLikLjk += (*this)(i,k)*(*this)(j,k);
+                };
+              (*this)(i,j) = (1./(*this)(j,j))*(A(i,j) - SLikLjk);
+              SLik2 += (*this)(i,j)*(*this)(i,j);
+            }
+          AssertThrow (A(i,i) - SLik2 >= 0,
+                       ExcMatrixNotPositiveDefinite());
+
+          (*this)(i,i) = std::sqrt(A(i,i) - SLik2);
+        }
+    }
+}
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::outer_product (const Vector<number2> &V,
+                                   const Vector<number2> &W)
+{
+  Assert (V.size() == W.size(), ExcMessage("Vectors V, W must be the same size."));
+  this->reinit(V.size(), V.size());
+
+  for (size_type i = 0; i<this->n(); i++)
+    {
+      for (size_type j = 0; j< this->n(); j++)
+        {
+          (*this)(i,j) = V(i)*W(j);
+        }
+    }
+}
+
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::left_invert (const FullMatrix<number2> &A)
+{
+  Assert (!A.empty(), ExcEmptyMatrix());
+  Assert(A.m()>A.n(), ExcDimensionMismatch(A.m(), A.n()));
+  Assert(this->m()==A.n(), ExcDimensionMismatch(this->m(), A.n()));
+  Assert(this->n()==A.m(), ExcDimensionMismatch(this->n(), A.m()));
+
+  FullMatrix<number2> A_t(A.n(),A.m());
+  FullMatrix<number2> A_t_times_A(A.n(),A.n());
+  FullMatrix<number2> A_t_times_A_inv(A.n(),A.n());
+  FullMatrix<number2> left_inv(A.n(),A.m());
+
+  A_t.Tadd(A,1);
+  A_t.mmult(A_t_times_A,A);
+  if (number(A_t_times_A.determinant())==number(0))
+    Assert(false, ExcSingular())
+    else
+      {
+        A_t_times_A_inv.invert(A_t_times_A);
+        A_t_times_A_inv.mmult(left_inv,A_t);
+
+        *this=left_inv;
+      }
+}
+
+template <typename number>
+template <typename number2>
+void
+FullMatrix<number>::right_invert (const FullMatrix<number2> &A)
+{
+  Assert (!A.empty(), ExcEmptyMatrix());
+  Assert(A.n()>A.m(), ExcDimensionMismatch(A.n(), A.m()));
+  Assert(this->m()==A.n(), ExcDimensionMismatch(this->m(), A.n()));
+  Assert(this->n()==A.m(), ExcDimensionMismatch(this->n(), A.m()));
+
+  FullMatrix<number> A_t(A.n(),A.m());
+  FullMatrix<number> A_times_A_t(A.m(),A.m());
+  FullMatrix<number> A_times_A_t_inv(A.m(),A.m());
+  FullMatrix<number> right_inv(A.n(),A.m());
+
+  A_t.Tadd(A,1);
+  A.mmult(A_times_A_t,A_t);
+  if (number(A_times_A_t.determinant())==number(0))
+    Assert(false, ExcSingular())
+    else
+      {
+        A_times_A_t_inv.invert(A_times_A_t);
+        A_t.mmult(right_inv,A_times_A_t_inv);
+
+        *this=right_inv;
+      }
+}
+
+
+template <typename number>
+template <int dim>
+void
+FullMatrix<number>::copy_from (const Tensor<2,dim> &T,
+                               const size_type src_r_i,
+                               const size_type src_r_j,
+                               const size_type src_c_i,
+                               const size_type src_c_j,
+                               const size_type dst_r,
+                               const size_type dst_c)
+{
+
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert(this->m()-dst_r>src_r_j-src_r_i,
+         ExcIndexRange(this->m()-dst_r,0,src_r_j-src_r_i));
+  Assert(this->n()-dst_c>src_c_j-src_c_i,
+         ExcIndexRange(this->n()-dst_c,0,src_c_j-src_c_i));
+  Assert(dim>src_r_j, ExcIndexRange(dim,0,src_r_j));
+  Assert(dim>src_c_j, ExcIndexRange(dim,0,src_r_j));
+  Assert(src_r_j>=src_r_i, ExcIndexRange(src_r_j,0,src_r_i));
+  Assert(src_c_j>=src_c_i, ExcIndexRange(src_r_j,0,src_r_i));
+
+  for (size_type i=0; i<src_r_j-src_r_i+1; i++)
+    for (size_type j=0; j<src_c_j-src_c_i+1; j++)
+      (*this)(i+dst_r,j+dst_c) = number(T[i+src_r_i][j+src_c_i]);
+
+}
+
+
+template <typename number>
+template <int dim>
+void
+FullMatrix<number>::copy_to (Tensor<2,dim> &T,
+                             const size_type src_r_i,
+                             const size_type src_r_j,
+                             const size_type src_c_i,
+                             const size_type src_c_j,
+                             const size_type dst_r,
+                             const size_type dst_c) const
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert(dim-dst_r>src_r_j-src_r_i,
+         ExcIndexRange(dim-dst_r,0,src_r_j-src_r_i));
+  Assert(dim-dst_c>src_c_j-src_c_i,
+         ExcIndexRange(dim-dst_c,0,src_c_j-src_c_i));
+  Assert(this->m()>src_r_j, ExcIndexRange(dim,0,src_r_j));
+  Assert(this->n()>src_c_j, ExcIndexRange(dim,0,src_r_j));
+  Assert(src_r_j>=src_r_i, ExcIndexRange(src_r_j,0,src_r_i));
+  Assert(src_c_j>=src_c_i, ExcIndexRange(src_r_j,0,src_r_i));
+
+
+  for (size_type i=0; i<src_r_j-src_r_i+1; i++)
+    for (size_type j=0; j<src_c_j-src_c_i+1; j++)
+      T[i+dst_r][j+dst_c] = double ((*this)(i+src_r_i,j+src_c_i));
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+FullMatrix<number>::precondition_Jacobi (Vector<somenumber>       &dst,
+                                         const Vector<somenumber> &src,
+                                         const number              om) const
+{
+  Assert (m() == n(), ExcNotQuadratic());
+  Assert (dst.size() == n(), ExcDimensionMismatch (dst.size(), n()));
+  Assert (src.size() == n(), ExcDimensionMismatch (src.size(), n()));
+
+  const size_t n = src.size();
+  somenumber       *dst_ptr = dst.begin();
+  const somenumber *src_ptr = src.begin();
+
+  for (size_type i=0; i<n; ++i, ++dst_ptr, ++src_ptr)
+    *dst_ptr = somenumber(om) **src_ptr / somenumber((*this)(i,i));
+}
+
+
+
+template <typename number>
+void
+FullMatrix<number>::print_formatted (
+  std::ostream       &out,
+  const unsigned int  precision,
+  const bool          scientific,
+  const unsigned int  width_,
+  const char         *zero_string,
+  const double        denominator,
+  const double        threshold) const
+{
+  unsigned int width = width_;
+
+  Assert ((!this->empty()) || (this->n_cols()+this->n_rows()==0),
+          ExcInternalError());
+
+  // set output format, but store old
+  // state
+  std::ios::fmtflags old_flags = out.flags();
+  unsigned int old_precision = out.precision (precision);
+
+  if (scientific)
+    {
+      out.setf (std::ios::scientific, std::ios::floatfield);
+      if (!width)
+        width = precision+7;
+    }
+  else
+    {
+      out.setf (std::ios::fixed, std::ios::floatfield);
+      if (!width)
+        width = precision+2;
+    }
+
+  for (size_type i=0; i<m(); ++i)
+    {
+      for (size_type j=0; j<n(); ++j)
+        if (std::abs((*this)(i,j)) > threshold)
+          out << std::setw(width)
+              << (*this)(i,j) * number(denominator) << ' ';
+        else
+          out << std::setw(width) << zero_string << ' ';
+      out << std::endl;
+    };
+
+  AssertThrow (out, ExcIO());
+  // reset output format
+  out.flags (old_flags);
+  out.precision(old_precision);
+}
+
+
+template <typename number>
+void
+FullMatrix<number>::gauss_jordan ()
+{
+  Assert (!this->empty(), ExcEmptyMatrix());
+  Assert (this->n_cols() == this->n_rows(), ExcNotQuadratic());
+
+  // see if we can use Lapack algorithms
+  // for this and if the type for 'number'
+  // works for us (it is usually not
+  // efficient to use Lapack for very small
+  // matrices):
+#ifdef DEAL_II_WITH_LAPACK
+  if (types_are_equal<number,double>::value
+      ||
+      types_are_equal<number,float>::value)
+    if (this->n_cols() > 15)
+      {
+        // In case we have the LAPACK functions
+        // getrf and getri detected by CMake,
+        // we use these algorithms for inversion
+        // since they provide better performance
+        // than the deal.II native functions.
+        //
+        // Note that BLAS/LAPACK stores matrix
+        // elements column-wise (i.e., all values in
+        // one column, then all in the next, etc.),
+        // whereas the FullMatrix stores them
+        // row-wise.
+        // We ignore that difference, and give our
+        // row-wise data to LAPACK,
+        // let LAPACK build the inverse of the
+        // transpose matrix, and read the result as
+        // if it were row-wise again. In other words,
+        // we just got ((A^T)^{-1})^T, which is
+        // A^{-1}.
+
+        const int nn = this->n();
+
+        // workspace for permutations
+        std::vector<int> ipiv(nn);
+        int info;
+
+        // Use the LAPACK function getrf for
+        // calculating the LU factorization.
+        getrf(&nn, &nn, &this->values[0], &nn, &ipiv[0], &info);
+
+        Assert(info >= 0, ExcInternalError());
+        Assert(info == 0, LACExceptions::ExcSingular());
+
+        // scratch array
+        std::vector<number> inv_work (nn);
+
+        // Use the LAPACK function getri for
+        // calculating the actual inverse using
+        // the LU factorization.
+        getri(&nn, &this->values[0], &nn, &ipiv[0], &inv_work[0], &nn, &info);
+
+        Assert(info >= 0, ExcInternalError());
+        Assert(info == 0, LACExceptions::ExcSingular());
+
+        return;
+      }
+
+#endif
+
+  // otherwise do it by hand. use the
+  // Gauss-Jordan-Algorithm from
+  // Stoer & Bulirsch I (4th Edition)
+  // p. 153
+  const size_type N = n();
+
+  // first get an estimate of the
+  // size of the elements of this
+  // matrix, for later checks whether
+  // the pivot element is large
+  // enough, or whether we have to
+  // fear that the matrix is not
+  // regular
+  double diagonal_sum = 0;
+  for (size_type i=0; i<N; ++i)
+    diagonal_sum += std::abs((*this)(i,i));
+  const double typical_diagonal_element = diagonal_sum/N;
+  (void)typical_diagonal_element;
+
+  // initialize the array that holds
+  // the permutations that we find
+  // during pivot search
+  std::vector<size_type> p(N);
+  for (size_type i=0; i<N; ++i)
+    p[i] = i;
+
+  for (size_type j=0; j<N; ++j)
+    {
+      // pivot search: search that
+      // part of the line on and
+      // right of the diagonal for
+      // the largest element
+      real_type max = std::abs((*this)(j,j));
+      size_type r   = j;
+      for (size_type i=j+1; i<N; ++i)
+        {
+          if (std::abs((*this)(i,j)) > max)
+            {
+              max = std::abs((*this)(i,j));
+              r = i;
+            }
+        }
+      // check whether the pivot is
+      // too small
+      Assert(max > 1.e-16*typical_diagonal_element,
+             ExcNotRegular(max));
+
+      // row interchange
+      if (r>j)
+        {
+          for (size_type k=0; k<N; ++k)
+            std::swap ((*this)(j,k), (*this)(r,k));
+
+          std::swap (p[j], p[r]);
+        }
+
+      // transformation
+      const number hr = number(1.)/(*this)(j,j);
+      (*this)(j,j) = hr;
+      for (size_type k=0; k<N; ++k)
+        {
+          if (k==j) continue;
+          for (size_type i=0; i<N; ++i)
+            {
+              if (i==j) continue;
+              (*this)(i,k) -= (*this)(i,j)*(*this)(j,k)*hr;
+            }
+        }
+      for (size_type i=0; i<N; ++i)
+        {
+          (*this)(i,j) *= hr;
+          (*this)(j,i) *= -hr;
+        }
+      (*this)(j,j) = hr;
+    }
+  // column interchange
+  std::vector<number> hv(N);
+  for (size_type i=0; i<N; ++i)
+    {
+      for (size_type k=0; k<N; ++k)
+        hv[p[k]] = (*this)(i,k);
+      for (size_type k=0; k<N; ++k)
+        (*this)(i,k) = hv[k];
+    }
+}
+
+
+
+template <typename number>
+std::size_t
+FullMatrix<number>::memory_consumption () const
+{
+  return (sizeof(*this) - sizeof (Table<2,number>)
+          +
+          Table<2,number>::memory_consumption());
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/generic_linear_algebra.h b/include/deal.II/lac/generic_linear_algebra.h
new file mode 100644
index 0000000..9130696
--- /dev/null
+++ b/include/deal.II/lac/generic_linear_algebra.h
@@ -0,0 +1,227 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__generic_linear_algebra_h
+#define dealii__generic_linear_algebra_h
+
+#include <deal.II/base/config.h>
+
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/precondition.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * A namespace in which the deal.II linear algebra classes are typedef'ed to
+ * generic names. There are similar namespaces LinearAlgebraPETSc and
+ * LinearAlgebraTrilinos for typedefs to classes that interface with the PETSc
+ * and Trilinos libraries.
+ */
+namespace LinearAlgebraDealII
+{
+  typedef Vector<double> Vector;
+  typedef BlockVector<double> BlockVector;
+
+  typedef SparseMatrix<double> SparseMatrix;
+
+  typedef PreconditionSSOR<SparseMatrix > PreconditionSSOR;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#ifdef DEAL_II_WITH_PETSC
+
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#include <deal.II/lac/petsc_precondition.h>
+#include <deal.II/lac/petsc_solver.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A namespace in which the wrappers to the PETSc linear algebra classes are
+ * typedef'ed to generic names. There are similar namespaces
+ * LinearAlgebraDealII and LinearAlgebraTrilinos for typedefs to deal.II's own
+ * classes and classes that interface with Trilinos.
+ */
+namespace LinearAlgebraPETSc
+{
+  using namespace dealii;
+
+  typedef PETScWrappers::SolverCG SolverCG;
+  typedef PETScWrappers::SolverGMRES SolverGMRES;
+
+  /**
+   * A namespace with typedefs to generic names for parallel PETSc linear
+   * algebra objects.
+   */
+  namespace MPI
+  {
+    /**
+     * Typedef for the vector type used.
+     */
+    typedef PETScWrappers::MPI::Vector Vector;
+
+    /**
+     * Typedef for the type used to describe vectors that consist of multiple
+     * blocks.
+     */
+    typedef PETScWrappers::MPI::BlockVector BlockVector;
+
+    /**
+     * Typedef for the sparse matrix type used.
+     */
+    typedef PETScWrappers::MPI::SparseMatrix SparseMatrix;
+
+    /**
+     * Typedef for the type used to describe sparse matrices that consist of
+     * multiple blocks.
+     */
+    typedef PETScWrappers::MPI::BlockSparseMatrix BlockSparseMatrix;
+
+    typedef dealii::BlockDynamicSparsityPattern BlockCompressedSparsityPattern;
+
+    /**
+     * Typedef for the AMG preconditioner type.
+     */
+    typedef PETScWrappers::PreconditionBoomerAMG PreconditionAMG;
+
+    /**
+     * Typedef for the Incomplete Cholesky preconditioner.
+     */
+    typedef PETScWrappers::PreconditionICC PreconditionIC;
+
+    /**
+     * Typedef for the Incomplete LU decomposition preconditioner.
+     */
+    typedef PETScWrappers::PreconditionILU PreconditionILU;
+
+    /**
+     * Typedef for the Incomplete Jacobi decomposition preconditioner.
+     */
+    typedef PETScWrappers::PreconditionJacobi PreconditionJacobi;
+
+    /**
+     * Typedef for the SSOR preconditioner.
+     */
+    typedef PETScWrappers::PreconditionSSOR PreconditionSSOR;
+
+  }
+
+}
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_WITH_PETSC
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_precondition.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/trilinos_solver.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A namespace in which the wrappers to the Trilinos linear algebra classes
+ * are typedef'ed to generic names. There are similar namespaces
+ * LinearAlgebraDealII and LinearAlgebraPETSc for typedefs to deal.II's own
+ * classes and classes that interface with PETSc.
+ */
+namespace LinearAlgebraTrilinos
+{
+  using namespace dealii;
+
+  typedef TrilinosWrappers::SolverCG SolverCG;
+  typedef TrilinosWrappers::SolverGMRES SolverGMRES;
+
+  /**
+   * A namespace with typedefs to generic names for parallel Trilinos linear
+   * algebra objects.
+   */
+  namespace MPI
+  {
+    /**
+     * Typedef for the vector type used.
+     */
+    typedef TrilinosWrappers::MPI::Vector Vector;
+
+    /**
+     * Typedef for the type used to describe vectors that consist of multiple
+     * blocks.
+     */
+    typedef TrilinosWrappers::MPI::BlockVector BlockVector;
+
+    /**
+     * Typedef for the sparse matrix type used.
+     */
+    typedef TrilinosWrappers::SparseMatrix SparseMatrix;
+
+    /**
+     * Typedef for the type used to describe sparse matrices that consist of
+     * multiple blocks.
+     */
+    typedef TrilinosWrappers::BlockSparseMatrix BlockSparseMatrix;
+
+    typedef TrilinosWrappers::BlockSparsityPattern BlockCompressedSparsityPattern;
+
+    /**
+     * Typedef for the AMG preconditioner type.
+     */
+    typedef TrilinosWrappers::PreconditionAMG PreconditionAMG;
+
+    /**
+     * Typedef for the Incomplete Cholesky preconditioner.
+     */
+    typedef TrilinosWrappers::PreconditionIC PreconditionIC;
+
+    /**
+     * Typedef for the Incomplete LU decomposition preconditioner.
+     */
+    typedef TrilinosWrappers::PreconditionILU PreconditionILU;
+
+    /**
+     * Typedef for the Incomplete Jacobi decomposition preconditioner.
+     */
+    typedef TrilinosWrappers::PreconditionJacobi PreconditionJacobi;
+
+    /**
+     * Typedef for the SSOR preconditioner
+     */
+    typedef TrilinosWrappers::PreconditionSSOR PreconditionSSOR;
+
+
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_WITH_TRILINOS
+
+
+
+#endif
diff --git a/include/deal.II/lac/householder.h b/include/deal.II/lac/householder.h
new file mode 100644
index 0000000..932c51d
--- /dev/null
+++ b/include/deal.II/lac/householder.h
@@ -0,0 +1,321 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__householder_h
+#define dealii__householder_h
+
+
+#include <cmath>
+#include <deal.II/base/config.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/vector_memory.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// forward declarations
+template<typename number> class Vector;
+
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+
+/**
+ * QR-decomposition of a full matrix.
+ *
+ * Whenever an object of this class is created, it copies the matrix given and
+ * computes its QR-decomposition by Householder algorithm. Then, the function
+ * least_squares() can be used to compute the vector <i>x</i> minimizing
+ * <i>||Ax-b||</i> for a given vector <i>b</i>.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Guido Kanschat, 2005
+ */
+template<typename number>
+class Householder : private FullMatrix<number>
+{
+public:
+  /**
+   * Declare type of container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Create an empty object.
+   */
+  Householder ();
+
+  /**
+   * Create an object holding the QR-decomposition of a matrix.
+   */
+  template<typename number2>
+  Householder (const FullMatrix<number2> &);
+
+  /**
+   * Compute the QR-decomposition of another matrix.
+   */
+  template<typename number2>
+  void
+  initialize (const FullMatrix<number2> &);
+
+  /**
+   * Solve the least-squares problem for the right hand side <tt>src</tt>. The
+   * return value is the Euclidean norm of the approximation error.
+   *
+   * @arg @c dst contains the solution of the least squares problem on return.
+   *
+   * @arg @c src contains the right hand side <i>b</i> of the least squares
+   * problem. It will be changed during the algorithm and is unusable on
+   * return.
+   */
+  template<typename number2>
+  double least_squares (Vector<number2> &dst,
+                        const Vector<number2> &src) const;
+
+  /**
+   * This function does the same as the one for BlockVectors.
+   */
+  template<typename number2>
+  double least_squares (BlockVector<number2> &dst,
+                        const BlockVector<number2> &src) const;
+
+  /**
+   * A wrapper to least_squares(), implementing the standard MatrixType
+   * interface.
+   */
+  template<class VectorType>
+  void vmult (VectorType &dst, const VectorType &src) const;
+
+  template<class VectorType>
+  void Tvmult (VectorType &dst, const VectorType &src) const;
+
+
+private:
+  /**
+   * Storage for the diagonal elements of the orthogonal transformation.
+   */
+  std::vector<number> diagonal;
+};
+
+/*@}*/
+
+#ifndef DOXYGEN
+/*-------------------------Inline functions -------------------------------*/
+
+// QR-transformation cf. Stoer 1 4.8.2 (p. 191)
+
+template <typename number>
+Householder<number>::Householder()
+{}
+
+
+
+template <typename number>
+template <typename number2>
+void
+Householder<number>::initialize(const FullMatrix<number2> &M)
+{
+  const size_type m = M.n_rows(), n = M.n_cols();
+  this->reinit(m, n);
+  this->fill(M);
+  Assert (!this->empty(), typename FullMatrix<number2>::ExcEmptyMatrix());
+  diagonal.resize(m);
+
+  // m > n, src.n() = m
+  Assert (this->n_cols() <= this->n_rows(),
+          ExcDimensionMismatch(this->n_cols(), this->n_rows()));
+
+  for (size_type j=0 ; j<n ; ++j)
+    {
+      number2 sigma = 0;
+      size_type i;
+      // sigma = ||v||^2
+      for (i=j ; i<m ; ++i)
+        sigma += this->el(i,j)*this->el(i,j);
+      // We are ready if the column is
+      // empty. Are we?
+      if (std::fabs(sigma) < 1.e-15) return;
+
+      number2 s = (this->el(j,j) < 0) ? std::sqrt(sigma) : -std::sqrt(sigma);
+      //
+      number2 beta = std::sqrt(1./(sigma-s*this->el(j,j)));
+
+      // Make column j the Householder
+      // vector, store first entry in
+      // diagonal
+      diagonal[j] = beta*(this->el(j,j) - s);
+      this->el(j,j) = s;
+
+      for (i=j+1 ; i<m ; ++i)
+        this->el(i,j) *= beta;
+
+
+      // For all subsequent columns do
+      // the Householder reflection
+      for (size_type k=j+1 ; k<n ; ++k)
+        {
+          number2 sum = diagonal[j]*this->el(j,k);
+          for (i=j+1 ; i<m ; ++i)
+            sum += this->el(i,j)*this->el(i,k);
+
+          this->el(j,k) -= sum*this->diagonal[j];
+          for (i=j+1 ; i<m ; ++i)
+            this->el(i,k) -= sum*this->el(i,j);
+        }
+    }
+}
+
+
+template <typename number>
+template <typename number2>
+Householder<number>::Householder(const FullMatrix<number2> &M)
+{
+  initialize(M);
+}
+
+
+template <typename number>
+template <typename number2>
+double
+Householder<number>::least_squares (Vector<number2> &dst,
+                                    const Vector<number2> &src) const
+{
+  Assert (!this->empty(), typename FullMatrix<number2>::ExcEmptyMatrix());
+  AssertDimension(dst.size(), this->n());
+  AssertDimension(src.size(), this->m());
+
+  const size_type m = this->m(), n = this->n();
+
+  GrowingVectorMemory<Vector<number2> > mem;
+  Vector<number2> *aux = mem.alloc();
+  aux->reinit(src, true);
+  *aux = src;
+  // m > n, m = src.n, n = dst.n
+
+  // Multiply Q_n ... Q_2 Q_1 src
+  // Where Q_i = I-v_i v_i^T
+  for (size_type j=0; j<n; ++j)
+    {
+      // sum = v_i^T dst
+      number2 sum = diagonal[j]* (*aux)(j);
+      for (size_type i=j+1 ; i<m ; ++i)
+        sum += static_cast<number2>(this->el(i,j))*(*aux)(i);
+      // dst -= v * sum
+      (*aux)(j) -= sum*diagonal[j];
+      for (size_type i=j+1 ; i<m ; ++i)
+        (*aux)(i) -= sum*static_cast<number2>(this->el(i,j));
+    }
+  // Compute norm of residual
+  number2 sum = 0.;
+  for (size_type i=n ; i<m ; ++i)
+    sum += (*aux)(i) * (*aux)(i);
+  AssertIsFinite(sum);
+
+  // Compute solution
+  this->backward(dst, *aux);
+
+  mem.free(aux);
+
+  return std::sqrt(sum);
+}
+
+template <typename number>
+template <typename number2>
+double
+Householder<number>::least_squares (BlockVector<number2> &dst,
+                                    const BlockVector<number2> &src) const
+{
+  Assert (!this->empty(), typename FullMatrix<number2>::ExcEmptyMatrix());
+  AssertDimension(dst.size(), this->n());
+  AssertDimension(src.size(), this->m());
+
+  const size_type m = this->m(), n = this->n();
+
+  GrowingVectorMemory<BlockVector<number2> > mem;
+  BlockVector<number2> *aux = mem.alloc();
+  aux->reinit(src, true);
+  *aux = src;
+  // m > n, m = src.n, n = dst.n
+
+  // Multiply Q_n ... Q_2 Q_1 src
+  // Where Q_i = I-v_i v_i^T
+  for (size_type j=0; j<n; ++j)
+    {
+      // sum = v_i^T dst
+      number2 sum = diagonal[j]* (*aux)(j);
+      for (size_type i=j+1 ; i<m ; ++i)
+        sum += this->el(i,j)*(*aux)(i);
+      // dst -= v * sum
+      (*aux)(j) -= sum*diagonal[j];
+      for (size_type i=j+1 ; i<m ; ++i)
+        (*aux)(i) -= sum*this->el(i,j);
+    }
+  // Compute norm of residual
+  number2 sum = 0.;
+  for (size_type i=n ; i<m ; ++i)
+    sum += (*aux)(i) * (*aux)(i);
+  AssertIsFinite(sum);
+
+  //backward works for
+  //Vectors only, so copy
+  //them before
+  Vector<number2> v_dst, v_aux;
+  v_dst = dst;
+  v_aux = *aux;
+  // Compute solution
+  this->backward(v_dst, v_aux);
+  //copy the result back
+  //to the BlockVector
+  dst = v_dst;
+
+  mem.free(aux);
+
+  return std::sqrt(sum);
+}
+
+
+template <typename number>
+template <class VectorType>
+void
+Householder<number>::vmult (VectorType &dst, const VectorType &src) const
+{
+  least_squares (dst, src);
+}
+
+
+template <typename number>
+template <class VectorType>
+void
+Householder<number>::Tvmult (VectorType &, const VectorType &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/identity_matrix.h b/include/deal.II/lac/identity_matrix.h
new file mode 100644
index 0000000..6344c3f
--- /dev/null
+++ b/include/deal.II/lac/identity_matrix.h
@@ -0,0 +1,260 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__identity_matrix_h
+#define dealii__identity_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Matrix1
+ *@{
+ */
+
+
+/**
+ * Implementation of a simple class representing the identity matrix of a
+ * given size, i.e. a matrix with entries $A_{ij}=\delta_{ij}$. While it has
+ * the most important ingredients of a matrix, in particular that one can ask
+ * for its size and perform matrix-vector products with it, a matrix of this
+ * type is really only useful in two contexts: preconditioning and
+ * initializing other matrices.
+ *
+ * <h4>Initialization</h4>
+ *
+ * The main usefulness of this class lies in its ability to initialize other
+ * matrix, like this:
+ * @code
+ * FullMatrix<double> identity (IdentityMatrix(10));
+ * @endcode
+ *
+ * This creates a $10\times 10$ matrix with ones on the diagonal and zeros
+ * everywhere else. Most matrix types, in particular FullMatrix and
+ * SparseMatrix, have conversion constructors and assignment operators for
+ * IdentityMatrix, and can therefore be filled rather easily with identity
+ * matrices.
+ *
+ *
+ * <h4>Preconditioning</h4>
+ *
+ * No preconditioning at all is equivalent to preconditioning with
+ * preconditioning with the identity matrix. deal.II has a specialized class
+ * for this purpose, PreconditionIdentity, than can be used in a context as
+ * shown in the documentation of that class. The present class can be used in
+ * much the same way, although without any additional benefit:
+ * @code
+ * SolverControl           solver_control (1000, 1e-12);
+ * SolverCG<>              cg (solver_control);
+ * cg.solve (system_matrix, solution, system_rhs,
+ *          IdentityMatrix(solution.size()));
+ * @endcode
+ *
+ *
+ * @author Wolfgang Bangerth, 2006
+ */
+class IdentityMatrix
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Default constructor. Creates a zero-sized matrix that should be resized
+   * later on using the reinit() function.
+   */
+  IdentityMatrix ();
+
+  /**
+   * Constructor. Creates a identity matrix of size #n.
+   */
+  IdentityMatrix (const size_type n);
+
+  /**
+   * Resize the matrix to be of size #n by #n.
+   */
+  void reinit (const size_type n);
+
+  /**
+   * Number of rows of this matrix. For the present matrix, the number of rows
+   * and columns are equal, of course.
+   */
+  size_type m () const;
+
+  /**
+   * Number of columns of this matrix. For the present matrix, the number of
+   * rows and columns are equal, of course.
+   */
+  size_type n () const;
+
+  /**
+   * Matrix-vector multiplication. For the present case, this of course
+   * amounts to simply copying the input vector to the output vector.
+   */
+  template <typename OutVectorType, typename InVectorType>
+  void vmult (OutVectorType      &out,
+              const InVectorType &in) const;
+
+  /**
+   * Matrix-vector multiplication with addition to the output vector. For the
+   * present case, this of course amounts to simply adding the input vector to
+   * the output vector.
+   */
+  template <typename OutVectorType, typename InVectorType>
+  void vmult_add (OutVectorType      &out,
+                  const InVectorType &in) const;
+
+  /**
+   * Matrix-vector multiplication with the transpose matrix. For the present
+   * case, this of course amounts to simply copying the input vector to the
+   * output vector.
+   */
+  template <typename OutVectorType, typename InVectorType>
+  void Tvmult (OutVectorType      &out,
+               const InVectorType &in) const;
+
+
+  /**
+   * Matrix-vector multiplication with the transpose matrix, with addition to
+   * the output vector. For the present case, this of course amounts to simply
+   * adding the input vector to the output vector.
+   */
+  template <typename OutVectorType, typename InVectorType>
+  void Tvmult_add (OutVectorType      &out,
+                   const InVectorType &in) const;
+private:
+
+  /**
+   * Number of rows and columns of this matrix.
+   */
+  size_type size;
+};
+
+
+
+
+// ------------------------- inline and template functions -------------
+#ifndef DOXYGEN
+
+
+inline
+IdentityMatrix::IdentityMatrix ()
+  :
+  size (0)
+{}
+
+
+
+inline
+IdentityMatrix::IdentityMatrix (const size_type n)
+  :
+  size (n)
+{}
+
+
+
+inline
+void
+IdentityMatrix::reinit (const size_type n)
+{
+  size = n;
+}
+
+
+
+inline
+IdentityMatrix::size_type
+IdentityMatrix::m () const
+{
+  return size;
+}
+
+
+
+inline
+IdentityMatrix::size_type
+IdentityMatrix::n () const
+{
+  return size;
+}
+
+
+
+template <typename OutVectorType, typename InVectorType>
+inline
+void
+IdentityMatrix::vmult (OutVectorType      &out,
+                       const InVectorType &in) const
+{
+  Assert (out.size() == size, ExcDimensionMismatch (out.size(), size));
+  Assert (in.size() == size, ExcDimensionMismatch (in.size(), size));
+
+  out = in;
+}
+
+
+
+template <typename OutVectorType, typename InVectorType>
+inline
+void
+IdentityMatrix::vmult_add (OutVectorType      &out,
+                           const InVectorType &in) const
+{
+  Assert (out.size() == size, ExcDimensionMismatch (out.size(), size));
+  Assert (in.size() == size, ExcDimensionMismatch (in.size(), size));
+
+  out += in;
+}
+
+
+
+template <typename OutVectorType, typename InVectorType>
+inline
+void
+IdentityMatrix::Tvmult (OutVectorType      &out,
+                        const InVectorType &in) const
+{
+  Assert (out.size() == size, ExcDimensionMismatch (out.size(), size));
+  Assert (in.size() == size, ExcDimensionMismatch (in.size(), size));
+
+  out = in;
+}
+
+
+
+template <typename OutVectorType, typename InVectorType>
+inline
+void
+IdentityMatrix::Tvmult_add (OutVectorType      &out,
+                            const InVectorType &in) const
+{
+  Assert (out.size() == size, ExcDimensionMismatch (out.size(), size));
+  Assert (in.size() == size, ExcDimensionMismatch (in.size(), size));
+
+  out += in;
+}
+
+
+#endif
+
+/**@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/iterative_inverse.h b/include/deal.II/lac/iterative_inverse.h
new file mode 100644
index 0000000..8ca447f
--- /dev/null
+++ b/include/deal.II/lac/iterative_inverse.h
@@ -0,0 +1,180 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__iterative_inverse_h
+#define dealii__iterative_inverse_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/solver_selector.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/pointer_matrix.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * Implementation of the inverse of a matrix, using an iterative method.
+ *
+ * The function vmult() of this class starts an iterative solver in order to
+ * approximate the action of the inverse matrix.
+ *
+ * Krylov space methods like SolverCG or SolverBicgstab become inefficient if
+ * solution down to machine accuracy is needed. This is due to the fact, that
+ * round-off errors spoil the orthogonality of the vector sequences.
+ * Therefore, a nested iteration of two methods is proposed: The outer method
+ * is SolverRichardson, since it is robust with respect to round-of errors.
+ * The inner loop is an appropriate Krylov space method, since it is fast.
+ *
+ * @code
+ * // Declare related objects
+ * SparseMatrix<double> A;
+ * Vector<double> x;
+ * Vector<double> b;
+ * GrowingVectorMemory<Vector<double> > mem;
+ *
+ * ReductionControl inner_control (10, 1.e-30, 1.e-2);
+ * PreconditionSSOR <SparseMatrix<double> > inner_precondition;
+ * inner_precondition.initialize (A, 1.2);
+ *
+ * IterativeInverse<Vector<double> > precondition;
+ * precondition.initialize (A, inner_precondition);
+ * precondition.solver.select("cg");
+ * precondition.solver.set_control(inner_control);
+ *
+ * SolverControl outer_control(100, 1.e-16);
+ * SolverRichardson<Vector<double> > outer_iteration;
+ *
+ * outer_iteration.solve (A, x, b, precondition);
+ * @endcode
+ *
+ * Each time we call the inner loop, reduction of the residual by a factor
+ * <tt>1.e-2</tt> is attempted. Since the right hand side vector of the inner
+ * iteration is the residual of the outer loop, the relative errors are far
+ * from machine accuracy, even if the errors of the outer loop are in the
+ * range of machine accuracy.
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @ingroup Matrix2
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template <typename VectorType>
+class IterativeInverse : public Subscriptor
+{
+public:
+  /**
+   * Initialization function. Provide a matrix and preconditioner for the
+   * solve in vmult().
+   */
+  template <typename MatrixType, typename PreconditionerType>
+  void initialize (const MatrixType &, const PreconditionerType &);
+
+  /**
+   * Delete the pointers to matrix and preconditioner.
+   */
+  void clear();
+
+  /**
+   * Solve for right hand side <tt>src</tt>.
+   */
+  void vmult (VectorType &dst, const VectorType &src) const;
+
+  /**
+   * Solve for right hand side <tt>src</tt>, but allow for the fact that the
+   * vectors given to this function have different type from the vectors used
+   * by the inner solver.
+   */
+  template <class OtherVectorType>
+  void vmult (OtherVectorType &dst, const OtherVectorType &src) const;
+
+  /**
+   * The solver, which allows selection of the actual solver as well as
+   * adjustment of parameters.
+   */
+  SolverSelector<VectorType> solver;
+
+private:
+  /**
+   * The matrix in use.
+   */
+  std_cxx11::shared_ptr<PointerMatrixBase<VectorType> > matrix;
+
+  /**
+   * The preconditioner to use.
+   */
+  std_cxx11::shared_ptr<PointerMatrixBase<VectorType> > preconditioner;
+};
+
+
+template <typename VectorType>
+template <typename MatrixType, typename PreconditionerType>
+inline
+void
+IterativeInverse<VectorType>::initialize(const MatrixType &m, const PreconditionerType &p)
+{
+  // dummy variable
+  VectorType *v = 0;
+  matrix = std_cxx11::shared_ptr<PointerMatrixBase<VectorType> > (new_pointer_matrix_base(m, *v));
+  preconditioner = std_cxx11::shared_ptr<PointerMatrixBase<VectorType> > (new_pointer_matrix_base(p, *v));
+}
+
+
+template <typename VectorType>
+inline
+void
+IterativeInverse<VectorType>::clear()
+{
+  matrix = 0;
+  preconditioner = 0;
+}
+
+
+template <typename VectorType>
+inline void
+IterativeInverse<VectorType>::vmult (VectorType &dst, const VectorType &src) const
+{
+  Assert(matrix.get() != 0, ExcNotInitialized());
+  Assert(preconditioner.get() != 0, ExcNotInitialized());
+  dst = 0.;
+  solver.solve(*matrix, dst, src, *preconditioner);
+}
+
+
+template <typename VectorType>
+template <class OtherVectorType>
+inline void
+IterativeInverse<VectorType>::vmult (OtherVectorType &dst, const OtherVectorType &src) const
+{
+  Assert(matrix.get() != 0, ExcNotInitialized());
+  Assert(preconditioner.get() != 0, ExcNotInitialized());
+  GrowingVectorMemory<VectorType> mem;
+  typename VectorMemory<VectorType>::Pointer sol(mem);
+  typename VectorMemory<VectorType>::Pointer rhs(mem);
+  sol->reinit(dst);
+  *rhs = src;
+  solver.solve(*matrix, *sol, *rhs, *preconditioner);
+  dst = *sol;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/lapack_full_matrix.h b/include/deal.II/lac/lapack_full_matrix.h
new file mode 100644
index 0000000..ecf25ee
--- /dev/null
+++ b/include/deal.II/lac/lapack_full_matrix.h
@@ -0,0 +1,837 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__lapack_full_matrix_h
+#define dealii__lapack_full_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/table.h>
+#include <deal.II/lac/lapack_support.h>
+#include <deal.II/lac/vector_memory.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <vector>
+#include <complex>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+template<typename number> class Vector;
+template<typename number> class BlockVector;
+template<typename number> class FullMatrix;
+template<typename number> class SparseMatrix;
+
+
+/**
+ * A variant of FullMatrix using LAPACK functions wherever possible. In order
+ * to do this, the matrix is stored in transposed order. The element access
+ * functions hide this fact by reverting the transposition.
+ *
+ * @note In order to perform LAPACK functions, the class contains a lot of
+ * auxiliary data in the private section. The names of these data vectors are
+ * usually the names chosen for the arguments in the LAPACK documentation.
+ *
+ * @ingroup Matrix1
+ * @author Guido Kanschat, 2005
+ */
+template <typename number>
+class LAPACKFullMatrix : public TransposeTable<number>
+{
+public:
+
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Constructor. Initialize the matrix as a square matrix with dimension
+   * <tt>n</tt>.
+   *
+   * In order to avoid the implicit conversion of integers and other types to
+   * a matrix, this constructor is declared <tt>explicit</tt>.
+   *
+   * By default, no memory is allocated.
+   */
+  explicit LAPACKFullMatrix (const size_type size = 0);
+
+
+  /**
+   * Constructor. Initialize the matrix as a rectangular matrix.
+   */
+  LAPACKFullMatrix (const size_type rows,
+                    const size_type cols);
+
+
+  /**
+   * Copy constructor. This constructor does a deep copy of the matrix.
+   * Therefore, it poses a possible efficiency problem, if for example,
+   * function arguments are passed by value rather than by reference.
+   * Unfortunately, we can't mark this copy constructor <tt>explicit</tt>,
+   * since that prevents the use of this class in containers, such as
+   * <tt>std::vector</tt>. The responsibility to check performance of programs
+   * must therefore remain with the user of this class.
+   */
+  LAPACKFullMatrix (const LAPACKFullMatrix &);
+
+  /**
+   * Assignment operator.
+   */
+  LAPACKFullMatrix<number> &
+  operator = (const LAPACKFullMatrix<number> &);
+
+  /**
+   * Assignment operator from a regular FullMatrix.
+   *
+   * @note Since LAPACK expects matrices in transposed order, this
+   * transposition is included here.
+   */
+  template <typename number2>
+  LAPACKFullMatrix<number> &
+  operator = (const FullMatrix<number2> &);
+
+  /**
+   * Assignment operator from a regular SparseMatrix.
+   *
+   * @note Since LAPACK expects matrices in transposed order, this
+   * transposition is included here.
+   */
+  template <typename number2>
+  LAPACKFullMatrix<number> &
+  operator = (const SparseMatrix<number2> &);
+
+  /**
+   * This operator assigns a scalar to a matrix. To avoid confusion with
+   * constructors, zero is the only value allowed for <tt>d</tt>
+   */
+  LAPACKFullMatrix<number> &
+  operator = (const double d);
+
+  /**
+   * Assignment from different matrix classes, performing the usual conversion
+   * to the transposed format expected by LAPACK. This assignment operator
+   * uses iterators of the typename MatrixType. Therefore, sparse matrices are
+   * possible sources.
+   */
+  template <typename MatrixType>
+  void copy_from (const MatrixType &);
+
+  /**
+   * Regenerate the current matrix by one that has the same properties as if
+   * it were created by the constructor of this class with the same argument
+   * list as this present function.
+   */
+  void reinit (const size_type size);
+
+  /**
+   * Regenerate the current matrix by one that has the same properties as if
+   * it were created by the constructor of this class with the same argument
+   * list as this present function.
+   */
+  void reinit (const size_type rows,
+               const size_type cols);
+
+  /**
+   * Return the dimension of the codomain (or range) space.
+   *
+   * @note The matrix is of dimension $m \times n$.
+   */
+  unsigned int m () const;
+
+  /**
+   * Return the dimension of the domain space.
+   *
+   * @note The matrix is of dimension $m \times n$.
+   */
+  unsigned int n () const;
+
+  /**
+   * Fill rectangular block.
+   *
+   * A rectangular block of the matrix <tt>src</tt> is copied into
+   * <tt>this</tt>. The upper left corner of the block being copied is
+   * <tt>(src_offset_i,src_offset_j)</tt>.  The upper left corner of the
+   * copied block is <tt>(dst_offset_i,dst_offset_j)</tt>.  The size of the
+   * rectangular block being copied is the maximum size possible, determined
+   * either by the size of <tt>this</tt> or <tt>src</tt>.
+   *
+   * The final two arguments allow to enter a multiple of the source or its
+   * transpose.
+   */
+  template<typename MatrixType>
+  void fill (const MatrixType &src,
+             const size_type dst_offset_i = 0,
+             const size_type dst_offset_j = 0,
+             const size_type src_offset_i = 0,
+             const size_type src_offset_j = 0,
+             const number factor = 1.,
+             const bool transpose = false);
+
+
+  /**
+   * Matrix-vector-multiplication.
+   *
+   * Depending on previous transformations recorded in #state, the result of
+   * this function is one of
+   * <ul>
+   * <li> If #state is LAPACKSupport::matrix or LAPACKSupport::inverse_matrix,
+   * this will be a regular matrix vector product using LAPACK gemv().
+   * <li> If #state is LAPACKSupport::svd or LAPACKSupport::inverse_svd, this
+   * function first multiplies with the right transformation matrix, then with
+   * the diagonal matrix of singular values or their reciprocal values, and
+   * finally with the left transformation matrix.
+   * </ul>
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>w</tt> or added to <tt>w</tt>.
+   *
+   * if (adding) <i>w += A*v</i>
+   *
+   * if (!adding) <i>w = A*v</i>
+   *
+   * @note Source and destination must not be the same vector.
+   *
+   * @note The template with @p number2 only exists for compile-time
+   * compatibility with FullMatrix. Only the case @p number2 = @p number is
+   * implemented due to limitations in the underlying LAPACK interface. All
+   * other variants throw an error upon invocation.
+   */
+  template <typename number2>
+  void vmult (Vector<number2>       &w,
+              const Vector<number2> &v,
+              const bool             adding = false) const;
+
+  /**
+   * Specialization of above function for compatible Vector::value_type.
+   */
+  void vmult (Vector<number>       &w,
+              const Vector<number> &v,
+              const bool            adding = false) const;
+
+  /**
+   * Adding Matrix-vector-multiplication.  <i>w += A*v</i>
+   *
+   * See the documentation of vmult() for details on the implementation.
+   */
+  template <typename number2>
+  void vmult_add (Vector<number2>       &w,
+                  const Vector<number2> &v) const;
+
+  /**
+   * Specialization of above function for compatible Vector::value_type.
+   */
+  void vmult_add (Vector<number>       &w,
+                  const Vector<number> &v) const;
+
+  /**
+   * Transpose matrix-vector-multiplication.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>w</tt> or added to <tt>w</tt>.
+   *
+   * if (adding) <i>w += A<sup>T</sup>*v</i>
+   *
+   * if (!adding) <i>w = A<sup>T</sup>*v</i>
+   *
+   * See the documentation of vmult() for details on the implementation.
+   */
+  template <typename number2>
+  void Tvmult (Vector<number2>       &w,
+               const Vector<number2> &v,
+               const bool             adding=false) const;
+
+  /**
+   * Specialization of above function for compatible Vector::value_type.
+   */
+  void Tvmult (Vector<number>       &w,
+               const Vector<number> &v,
+               const bool            adding=false) const;
+
+  /**
+   * Adding transpose matrix-vector-multiplication. <i>w +=
+   * A<sup>T</sup>*v</i>
+   *
+   * See the documentation of vmult() for details on the implementation.
+   */
+  template <typename number2>
+  void Tvmult_add (Vector<number2>       &w,
+                   const Vector<number2> &v) const;
+
+  /**
+   * Specialization of above function for compatible Vector::value_type.
+   */
+  void Tvmult_add (Vector<number>       &w,
+                   const Vector<number> &v) const;
+
+
+  /**
+   * Matrix-matrix-multiplication.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A*B</i>
+   *
+   * if (!adding) <i>C = A*B</i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm.
+   */
+  void mmult (LAPACKFullMatrix<number>       &C,
+              const LAPACKFullMatrix<number> &B,
+              const bool                      adding=false) const;
+
+  /**
+   * Same as before, but stores the result in a FullMatrix, not in a
+   * LAPACKFullMatrix.
+   */
+  void mmult (FullMatrix<number>             &C,
+              const LAPACKFullMatrix<number> &B,
+              const bool                      adding=false) const;
+
+  /**
+   * Matrix-matrix-multiplication using transpose of <tt>this</tt>.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A<sup>T</sup>*B</i>
+   *
+   * if (!adding) <i>C = A<sup>T</sup>*B</i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm.
+   */
+  void Tmmult (LAPACKFullMatrix<number>       &C,
+               const LAPACKFullMatrix<number> &B,
+               const bool                      adding=false) const;
+
+  /**
+   * Same as before, but stores the result in a FullMatrix, not in a
+   * LAPACKFullMatrix.
+   */
+  void Tmmult (FullMatrix<number>             &C,
+               const LAPACKFullMatrix<number> &B,
+               const bool                      adding=false) const;
+
+  /**
+   * Matrix-matrix-multiplication using transpose of <tt>B</tt>.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A*B<sup>T</sup></i>
+   *
+   * if (!adding) <i>C = A*B<sup>T</sup></i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm.
+   */
+  void mTmult (LAPACKFullMatrix<number>       &C,
+               const LAPACKFullMatrix<number> &B,
+               const bool                      adding=false) const;
+
+  /**
+   * Same as before, but stores the result in a FullMatrix, not in a
+   * LAPACKFullMatrix.
+   */
+  void mTmult (FullMatrix<number>             &C,
+               const LAPACKFullMatrix<number> &B,
+               const bool                      adding=false) const;
+
+  /**
+   * Matrix-matrix-multiplication using transpose of <tt>this</tt> and
+   * <tt>B</tt>.
+   *
+   * The optional parameter <tt>adding</tt> determines, whether the result is
+   * stored in <tt>C</tt> or added to <tt>C</tt>.
+   *
+   * if (adding) <i>C += A<sup>T</sup>*B<sup>T</sup></i>
+   *
+   * if (!adding) <i>C = A<sup>T</sup>*B<sup>T</sup></i>
+   *
+   * Assumes that <tt>A</tt> and <tt>B</tt> have compatible sizes and that
+   * <tt>C</tt> already has the right size.
+   *
+   * This function uses the BLAS function Xgemm.
+   */
+  void TmTmult (LAPACKFullMatrix<number>       &C,
+                const LAPACKFullMatrix<number> &B,
+                const bool                      adding=false) const;
+
+  /**
+   * Same as before, but stores the result in a FullMatrix, not in a
+   * LAPACKFullMatrix.
+   */
+  void TmTmult (FullMatrix<number>             &C,
+                const LAPACKFullMatrix<number> &B,
+                const bool                      adding=false) const;
+
+  /**
+   * Compute the LU factorization of the matrix using LAPACK function Xgetrf.
+   */
+  void compute_lu_factorization ();
+
+  /**
+   * Invert the matrix by first computing an LU factorization with the LAPACK
+   * function Xgetrf and then building the actual inverse using Xgetri.
+   */
+  void invert ();
+
+  /**
+   * Solve the linear system with right hand side given by applying
+   * forward/backward substitution to the previously computed LU
+   * factorization. Uses LAPACK function Xgetrs.
+   *
+   * The flag transposed indicates whether the solution of the transposed
+   * system is to be performed.
+   */
+  void apply_lu_factorization (Vector<number> &v,
+                               const bool      transposed) const;
+
+  /**
+   * Solve the linear system with multiple right hand sides (as many as there
+   * are columns in the matrix b) given by applying forward/backward
+   * substitution to the previously computed LU factorization. Uses LAPACK
+   * function Xgetrs.
+   *
+   * The flag transposed indicates whether the solution of the transposed
+   * system is to be performed.
+   */
+  void apply_lu_factorization (LAPACKFullMatrix<number> &B,
+                               const bool                transposed) const;
+
+  /**
+   * Compute eigenvalues of the matrix. After this routine has been called,
+   * eigenvalues can be retrieved using the eigenvalue() function. The matrix
+   * itself will be LAPACKSupport::unusable after this operation.
+   *
+   * The optional arguments allow to compute left and right eigenvectors as
+   * well.
+   *
+   * Note that the function does not return the computed eigenvalues right
+   * away since that involves copying data around between the output arrays of
+   * the LAPACK functions and any return array. This is often unnecessary
+   * since one may not be interested in all eigenvalues at once, but for
+   * example only the extreme ones. In that case, it is cheaper to just have
+   * this function compute the eigenvalues and have a separate function that
+   * returns whatever eigenvalue is requested.
+   *
+   * @note Calls the LAPACK function Xgeev.
+   */
+  void compute_eigenvalues (const bool right_eigenvectors = false,
+                            const bool left_eigenvectors  = false);
+
+  /**
+   * Compute eigenvalues and eigenvectors of a real symmetric matrix. Only
+   * eigenvalues in the interval (lower_bound, upper_bound] are computed with
+   * the absolute tolerance abs_accuracy. An approximate eigenvalue is
+   * accepted as converged when it is determined to lie in an interval [a,b]
+   * of width less than or equal to abs_accuracy + eps * max( |a|,|b| ), where
+   * eps is the machine precision.  If abs_accuracy is less than or equal to
+   * zero, then eps*|t| will be used in its place, where |t| is the 1-norm of
+   * the tridiagonal matrix obtained by reducing A to tridiagonal form.
+   * Eigenvalues will be computed most accurately when abs_accuracy is set to
+   * twice the underflow threshold, not zero.  After this routine has been
+   * called, all eigenvalues in (lower_bound, upper_bound] will be stored in
+   * eigenvalues and the corresponding eigenvectors will be stored in the
+   * columns of eigenvectors, whose dimension is set accordingly.
+   *
+   * @note Calls the LAPACK function Xsyevx. For this to work, deal.II must be
+   * configured to use LAPACK.
+   */
+  void compute_eigenvalues_symmetric (const number        lower_bound,
+                                      const number        upper_bound,
+                                      const number        abs_accuracy,
+                                      Vector<number>     &eigenvalues,
+                                      FullMatrix<number> &eigenvectors);
+
+  /**
+   * Compute generalized eigenvalues and eigenvectors of a real generalized
+   * symmetric eigenproblem of the form itype = 1: $Ax=\lambda B x$ itype = 2:
+   * $ABx=\lambda x$ itype = 3: $BAx=\lambda x$, where A is this matrix.  A
+   * and B are assumed to be symmetric, and B has to be positive definite.
+   * Only eigenvalues in the interval (lower_bound, upper_bound] are computed
+   * with the absolute tolerance abs_accuracy.  An approximate eigenvalue is
+   * accepted as converged when it is determined to lie in an interval [a,b]
+   * of width less than or equal to abs_accuracy + eps * max( |a|,|b| ), where
+   * eps is the machine precision.  If abs_accuracy is less than or equal to
+   * zero, then eps*|t| will be used in its place, where |t| is the 1-norm of
+   * the tridiagonal matrix obtained by reducing A to tridiagonal form.
+   * Eigenvalues will be computed most accurately when abs_accuracy is set to
+   * twice the underflow threshold, not zero.  After this routine has been
+   * called, all eigenvalues in (lower_bound, upper_bound] will be stored in
+   * eigenvalues and the corresponding eigenvectors will be stored in
+   * eigenvectors, whose dimension is set accordingly.
+   *
+   * @note Calls the LAPACK function Xsygvx. For this to work, deal.II must be
+   * configured to use LAPACK.
+   */
+  void compute_generalized_eigenvalues_symmetric (LAPACKFullMatrix<number>     &B,
+                                                  const number                  lower_bound,
+                                                  const number                  upper_bound,
+                                                  const number                  abs_accuracy,
+                                                  Vector<number>               &eigenvalues,
+                                                  std::vector<Vector<number> > &eigenvectors,
+                                                  const int                     itype = 1);
+
+  /**
+   * Same as the other compute_generalized_eigenvalues_symmetric function
+   * except that all eigenvalues are computed and the tolerance is set
+   * automatically.  Note that this function does not return the computed
+   * eigenvalues right away since that involves copying data around between
+   * the output arrays of the LAPACK functions and any return array. This is
+   * often unnecessary since one may not be interested in all eigenvalues at
+   * once, but for example only the extreme ones. In that case, it is cheaper
+   * to just have this function compute the eigenvalues and have a separate
+   * function that returns whatever eigenvalue is requested. Eigenvalues can
+   * be retrieved using the eigenvalue() function.  The number of computed
+   * eigenvectors is equal to eigenvectors.size()
+   *
+   * @note Calls the LAPACK function Xsygv. For this to work, deal.II must be
+   * configured to use LAPACK.
+   */
+  void compute_generalized_eigenvalues_symmetric (LAPACKFullMatrix<number>     &B,
+                                                  std::vector<Vector<number> > &eigenvectors,
+                                                  const int                     itype = 1);
+
+  /**
+   * Compute the singular value decomposition of the matrix using LAPACK
+   * function Xgesdd.
+   *
+   * Requires that the #state is LAPACKSupport::matrix, fills the data members
+   * #wr, #svd_u, and #svd_vt, and leaves the object in the #state
+   * LAPACKSupport::svd.
+   */
+  void compute_svd ();
+
+  /**
+   * Compute the inverse of the matrix by singular value decomposition.
+   *
+   * Requires that #state is either LAPACKSupport::matrix or
+   * LAPACKSupport::svd. In the first case, this function calls compute_svd().
+   * After this function, the object will have the #state
+   * LAPACKSupport::inverse_svd.
+   *
+   * For a singular value decomposition, the inverse is simply computed by
+   * replacing all singular values by their reciprocal values. If the matrix
+   * does not have maximal rank, singular values 0 are not touched, thus
+   * computing the minimal norm right inverse of the matrix.
+   *
+   * The parameter <tt>threshold</tt> determines, when a singular value should
+   * be considered zero. It is the ratio of the smallest to the largest
+   * nonzero singular value <i>s</i><sub>max</sub>. Thus, the inverses of all
+   * singular values less than <i>s</i><sub>max</sub>/<tt>threshold</tt> will
+   * be set to zero.
+   */
+  void compute_inverse_svd (const double threshold = 0.);
+
+  /**
+   * Retrieve eigenvalue after compute_eigenvalues() was called.
+   */
+  std::complex<number>
+  eigenvalue (const size_type i) const;
+
+  /**
+   * Retrieve singular values after compute_svd() or compute_inverse_svd() was
+   * called.
+   */
+  number
+  singular_value (const size_type i) const;
+
+  /**
+   * Print the matrix and allow formatting of entries.
+   *
+   * The parameters allow for a flexible setting of the output format:
+   *
+   * @arg <tt>precision</tt> denotes the number of trailing digits.
+   *
+   * @arg <tt>scientific</tt> is used to determine the number format, where
+   * <tt>scientific</tt> = <tt>false</tt> means fixed point notation.
+   *
+   * @arg <tt>width</tt> denotes the with of each column. A zero entry for
+   * <tt>width</tt> makes the function compute a width, but it may be changed
+   * to a positive value, if output is crude.
+   *
+   * @arg <tt>zero_string</tt> specifies a string printed for zero entries.
+   *
+   * @arg <tt>denominator</tt> Multiply the whole matrix by this common
+   * denominator to get nicer numbers.
+   *
+   * @arg <tt>threshold</tt>: all entries with absolute value smaller than
+   * this are considered zero.
+   */
+  void print_formatted (std::ostream       &out,
+                        const unsigned int  precision   = 3,
+                        const bool          scientific  = true,
+                        const unsigned int  width       = 0,
+                        const char         *zero_string = " ",
+                        const double        denominator = 1.,
+                        const double        threshold   = 0.) const;
+
+private:
+
+  /**
+   * Since LAPACK operations notoriously change the meaning of the matrix
+   * entries, we record the current state after the last operation here.
+   */
+  LAPACKSupport::State state;
+
+  /**
+   * Additional properties of the matrix which may help to select more
+   * efficient LAPACK functions.
+   */
+  LAPACKSupport::Properties properties;
+
+  /**
+   * The working array used for some LAPACK functions.
+   */
+  mutable std::vector<number> work;
+
+  /**
+   * The vector storing the permutations applied for pivoting in the LU-
+   * factorization.
+   *
+   * Also used as the scratch array IWORK for LAPACK functions needing it.
+   */
+  std::vector<int> ipiv;
+
+  /**
+   * Workspace for calculating the inverse matrix from an LU factorization.
+   */
+  std::vector<number> inv_work;
+
+  /**
+   * Real parts of eigenvalues or the singular values. Filled by
+   * compute_eigenvalues() or compute_svd().
+   */
+  std::vector<number> wr;
+
+  /**
+   * Imaginary parts of eigenvalues. Filled by compute_eigenvalues.
+   */
+  std::vector<number> wi;
+
+  /**
+   * Space where left eigenvectors can be stored.
+   */
+  std::vector<number> vl;
+
+  /**
+   * Space where right eigenvectors can be stored.
+   */
+  std::vector<number> vr;
+
+  /**
+   * The matrix <i>U</i> in the singular value decomposition
+   * <i>USV<sup>T</sup></i>.
+   */
+  std_cxx11::shared_ptr<LAPACKFullMatrix<number> > svd_u;
+
+  /**
+   * The matrix <i>V<sup>T</sup></i> in the singular value decomposition
+   * <i>USV<sup>T</sup></i>.
+   */
+  std_cxx11::shared_ptr<LAPACKFullMatrix<number> > svd_vt;
+};
+
+
+
+/**
+ * A preconditioner based on the LU-factorization of LAPACKFullMatrix.
+ *
+ * @ingroup Preconditioners
+ * @author Guido Kanschat, 2006
+ */
+template <typename number>
+class PreconditionLU
+  :
+  public Subscriptor
+{
+public:
+  void initialize(const LAPACKFullMatrix<number> &);
+  void initialize(const LAPACKFullMatrix<number> &,
+                  VectorMemory<Vector<number> > &);
+  void vmult(Vector<number> &, const Vector<number> &) const;
+  void Tvmult(Vector<number> &, const Vector<number> &) const;
+  void vmult(BlockVector<number> &,
+             const BlockVector<number> &) const;
+  void Tvmult(BlockVector<number> &,
+              const BlockVector<number> &) const;
+private:
+  SmartPointer<const LAPACKFullMatrix<number>,PreconditionLU<number> > matrix;
+  SmartPointer<VectorMemory<Vector<number> >,PreconditionLU<number> > mem;
+};
+
+/*---------------------- Inline functions -----------------------------------*/
+
+template <typename number>
+inline
+unsigned int
+LAPACKFullMatrix<number>::m () const
+{
+  return this->n_rows ();
+}
+
+template <typename number>
+inline
+unsigned int
+LAPACKFullMatrix<number>::n () const
+{
+  return this->n_cols ();
+}
+
+template <typename number>
+template <typename MatrixType>
+inline void
+LAPACKFullMatrix<number>::copy_from (const MatrixType &M)
+{
+  this->reinit (M.m(), M.n());
+
+  // loop over the elements of the argument matrix row by row, as suggested
+  // in the documentation of the sparse matrix iterator class, and
+  // copy them into the current object
+  for (size_type row = 0; row < M.m(); ++row)
+    {
+      const typename MatrixType::const_iterator end_row = M.end(row);
+      for (typename MatrixType::const_iterator entry = M.begin(row);
+           entry != end_row; ++entry)
+        this->el(row, entry->column()) = entry->value();
+    }
+
+  state = LAPACKSupport::matrix;
+}
+
+
+
+template <typename number>
+template <typename MatrixType>
+inline void
+LAPACKFullMatrix<number>::fill (const MatrixType &M,
+                                const size_type   dst_offset_i,
+                                const size_type   dst_offset_j,
+                                const size_type   src_offset_i,
+                                const size_type   src_offset_j,
+                                const number      factor,
+                                const bool        transpose)
+{
+  // loop over the elements of the argument matrix row by row, as suggested
+  // in the documentation of the sparse matrix iterator class
+  for (size_type row = src_offset_i; row < M.m(); ++row)
+    {
+      const typename MatrixType::const_iterator end_row = M.end(row);
+      for (typename MatrixType::const_iterator entry = M.begin(row);
+           entry != end_row; ++entry)
+        {
+          const size_type i = transpose ? entry->column() : row;
+          const size_type j = transpose ? row : entry->column();
+
+          const size_type dst_i=dst_offset_i+i-src_offset_i;
+          const size_type dst_j=dst_offset_j+j-src_offset_j;
+          if (dst_i<this->n_rows() && dst_j<this->n_cols())
+            (*this)(dst_i, dst_j) = factor * entry->value();
+        }
+    }
+
+  state = LAPACKSupport::matrix;
+}
+
+
+template <typename number>
+template <typename number2>
+void
+LAPACKFullMatrix<number>::vmult (Vector<number2> &,
+                                 const Vector<number2> &,
+                                 const bool) const
+{
+  Assert(false,
+         ExcMessage("LAPACKFullMatrix<number>::vmult must be called with a "
+                    "matching Vector<double> vector type."));
+}
+
+
+template <typename number>
+template <typename number2>
+void
+LAPACKFullMatrix<number>::vmult_add (Vector<number2> &,
+                                     const Vector<number2> &) const
+{
+  Assert(false,
+         ExcMessage("LAPACKFullMatrix<number>::vmult_add must be called with a "
+                    "matching Vector<double> vector type."));
+}
+
+
+template <typename number>
+template <typename number2>
+void
+LAPACKFullMatrix<number>::Tvmult (Vector<number2> &,
+                                  const Vector<number2> &,
+                                  const bool) const
+{
+  Assert(false,
+         ExcMessage("LAPACKFullMatrix<number>::Tvmult must be called with a "
+                    "matching Vector<double> vector type."));
+}
+
+
+template <typename number>
+template <typename number2>
+void
+LAPACKFullMatrix<number>::Tvmult_add (Vector<number2> &,
+                                      const Vector<number2> &) const
+{
+  Assert(false,
+         ExcMessage("LAPACKFullMatrix<number>::Tvmult_add must be called with a "
+                    "matching Vector<double> vector type."));
+}
+
+
+template <typename number>
+inline std::complex<number>
+LAPACKFullMatrix<number>::eigenvalue (const size_type i) const
+{
+  Assert (state & LAPACKSupport::eigenvalues, ExcInvalidState());
+  Assert (wr.size() == this->n_rows(), ExcInternalError());
+  Assert (wi.size() == this->n_rows(), ExcInternalError());
+  Assert (i<this->n_rows(), ExcIndexRange(i,0,this->n_rows()));
+
+  return std::complex<number>(wr[i], wi[i]);
+}
+
+
+template <typename number>
+inline number
+LAPACKFullMatrix<number>::singular_value (const size_type i) const
+{
+  Assert (state == LAPACKSupport::svd || state == LAPACKSupport::inverse_svd, LAPACKSupport::ExcState(state));
+  AssertIndexRange(i,wr.size());
+
+  return wr[i];
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/lapack_support.h b/include/deal.II/lac/lapack_support.h
new file mode 100644
index 0000000..22c7815
--- /dev/null
+++ b/include/deal.II/lac/lapack_support.h
@@ -0,0 +1,162 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__lapack_support_h
+#define dealii__lapack_support_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace LAPACKSupport
+{
+  /**
+   * Most LAPACK functions change the contents of the matrix applied to to
+   * something which is not a matrix anymore. Therefore, LAPACK matrix classes
+   * in <tt>deal.II</tt> have a state flag indicating what happened to them.
+   *
+   * @author Guido Kanschat, 2005
+   */
+  enum State
+  {
+    /// Contents is actually a matrix.
+    matrix,
+    /// Contents is the inverse of a matrix.
+    inverse_matrix,
+    /// Contents is an LU decomposition.
+    lu,
+    /// Eigenvalue vector is filled
+    eigenvalues,
+    /// Matrix contains singular value decomposition,
+    svd,
+    /// Matrix is the inverse of a singular value decomposition
+    inverse_svd,
+    /// Contents is something useless.
+    unusable = 0x8000
+  };
+
+  /**
+   * Function printing the name of a State.
+   */
+  inline const char *state_name(State s)
+  {
+    switch (s)
+      {
+      case matrix:
+        return "matrix";
+      case inverse_matrix:
+        return "inverse matrix";
+      case lu:
+        return "lu decomposition";
+      case eigenvalues:
+        return "eigenvalues";
+      case svd:
+        return "svd";
+      case inverse_svd:
+        return "inverse_svd";
+      case unusable:
+        return "unusable";
+      default:
+        return "unknown";
+      }
+    return "internal error";
+  }
+
+  /**
+   * A matrix can have certain features allowing for optimization, but hard to
+   * test. These are listed here.
+   */
+  enum Properties
+  {
+    /// No special properties
+    general = 0,
+    /// Matrix is symmetric
+    symmetric = 1,
+    /// Matrix is upper triangular
+    upper_triangle = 2,
+    /// Matrix is lower triangular
+    lower_triangle = 4,
+    /// Matrix is diagonal
+    diagonal = 6,
+    /// Matrix is in upper Hessenberg form
+    hessenberg = 8
+  };
+
+  /**
+   * Character constant.
+   */
+  static const char A = 'A';
+  /**
+   * Character constant.
+   */
+  static const char N = 'N';
+  /**
+   * Character constant.
+   */
+  static const char T = 'T';
+  /**
+   * Character constant.
+   */
+  static const char U = 'U';
+  /**
+   * Character constant.
+   */
+  static const char V = 'V';
+  /**
+   * Integer constant.
+   */
+  static const int zero = 0;
+  /**
+   * Integer constant.
+   */
+  static const int one = 1;
+
+  /**
+   * A LAPACK function returned an error code.
+   */
+  DeclException2(ExcErrorCode, char *, int,
+                 << "The function " << arg1 << " returned with an error code " << arg2);
+
+  /**
+   * Exception thrown when a matrix is not in a suitable state for an
+   * operation. For instance, a LAPACK routine may have left the matrix in an
+   * unusable state, then vmult does not make sense anymore.
+   */
+  DeclException1(ExcState, State,
+                 << "The function cannot be called while the matrix is in state "
+                 << state_name(arg1));
+
+  /**
+   * This exception is thrown if a certain LAPACK function is not available
+   * because no LAPACK installation was detected during configuration.
+   */
+  DeclException1(ExcMissing, char *,
+                 << "When you ran 'cmake' during installation of deal.II, "
+                 << "no suitable installation of the BLAS or LAPACK library could "
+                 << "be found. Consequently, the function <"
+                 << arg1
+                 << "> can not be called. Refer to the doc/readme.html "
+                 << "file for information on how to ensure that deal.II "
+                 << "picks up an existing BLAS and LAPACK installation at "
+                 << "configuration time.");
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/lapack_templates.h b/include/deal.II/lac/lapack_templates.h
new file mode 100644
index 0000000..a3d8896
--- /dev/null
+++ b/include/deal.II/lac/lapack_templates.h
@@ -0,0 +1,1016 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__lapack_templates_h
+#define dealii__lapack_templates_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/lapack_support.h>
+
+extern "C"
+{
+// vector update of the form y += alpha*x with a scalar, x,y vectors
+  void daxpy_ (const int *n, const double *alpha, const double *x,
+               const int *incx, double *y, const int *incy);
+  void saxpy_ (const int *n, const float *alpha, const float *x,
+               const int *incx, float *y, const int *incy);
+// General Matrix
+// Matrix vector product
+  void dgemv_ (const char *trans, const int *m, const int *n,
+               const double *alpha, const double *A, const int *lda,
+               const double *x, const int *incx,
+               const double *b, double *y, const int *incy);
+  void sgemv_ (const char *trans, const int *m, const int *n,
+               const float *alpha, const float *A, const int *lda,
+               const float *x, const int *incx,
+               const float *b, float *y, const int *incy);
+// Matrix matrix product
+  void dgemm_ (const char *transa, const char *transb,
+               const int *m, const int *n, const int *k,
+               const double *alpha, const double *A, const int *lda,
+               const double *B, const int *ldb,
+               const double *beta, double *C, const int *ldc);
+  void sgemm_ (const char *transa, const char *transb,
+               const int *m, const int *n, const int *k,
+               const float *alpha, const float *A, const int *lda,
+               const float *B, const int *ldb,
+               const float *beta, float *C, const int *ldc);
+// Compute LU factorization
+  void dgetrf_ (const int *m, const int *n, double *A,
+                const int *lda, int *ipiv, int *info);
+  void sgetrf_ (const int *m, const int *n, float *A,
+                const int *lda, int *ipiv, int *info);
+// Apply forward/backward substitution to LU factorization
+  void dgetrs_ (const char *trans, const int *n, const int *nrhs,
+                const double *A, const int *lda, const int *ipiv,
+                double *b, const int *ldb, int *info);
+  void sgetrs_ (const char *trans, const int *n, const int *nrhs,
+                const float *A, const int *lda, const int *ipiv,
+                float *b, const int *ldb, int *info);
+// Invert matrix from LU factorization
+  void dgetri_ (const int *n, double *A, const int *lda,
+                int *ipiv, double *inv_work, const int *lwork, int *info);
+  void sgetri_ (const int *n, float *A, const int *lda,
+                int *ipiv, float *inv_work, const int *lwork, int *info);
+// Compute QR factorization (Householder)
+  void dgeqrf_ (const int *m, const int *n, double *A,
+                const int *lda, double *tau, double *work,
+                const int *lwork, int *info);
+  void sgeqrf_ (const int *m, const int *n, float *A,
+                const int *lda, float *tau, float *work,
+                const int *lwork, int *info);
+// Compute vector Q^T B, where Q is the result from dgeqrf_
+  void dormqr_ (const char *side, const char *trans, const int *m,
+                const int *n, const int *k, const double *A, const int *lda,
+                const double *tau, double *B, const int *ldb,
+                double *work, const int *lwork, int *info);
+  void sormqr_ (const char *side, const char *trans, const int *m,
+                const int *n, const int *k, const float *A, const int *lda,
+                const float *tau, float *B, const int *ldb,
+                float *work, const int *lwork, int *info);
+// Compute matrix Q from the result of dgeqrf_
+  void dorgqr_ (const int *m, const int *n, const int *k, const double *A,
+                const int *lda, const double *tau, double *work, const int *lwork,
+                int *info);
+  void sorgqr_ (const int *m, const int *n, const int *k, const float *A,
+                const int *lda, const float *tau, float *work, const int *lwork,
+                int *info);
+// Compute Rx = b
+  void dtrtrs_ (const char *uplo, const char *trans,
+                const char *diag, const int *n, const int *n_rhs,
+                const double *A, const int *lda, double *B, const int *ldb,
+                int *info);
+  void strtrs_ (const char *uplo, const char *trans,
+                const char *diag, const int *n, const int *n_rhs,
+                const float *A, const int *lda, float *B, const int *ldb,
+                int *info);
+// Compute eigenvalues and vectors
+  void dgeev_ (const char *jobvl, const char *jobvr,
+               const int *n, double *A, const int *lda,
+               double *lambda_re, double *lambda_im,
+               double *vl, const int *ldvl,
+               double *vr, const int *ldva,
+               double *work, const int *lwork,
+               int *info);
+  void sgeev_ (const char *jobvl, const char *jobvr,
+               const int *n, float *A, const int *lda,
+               float *lambda_re, float *lambda_im,
+               float *vl, const int *ldvl,
+               float *vr, const int *ldva,
+               float *work, const int *lwork,
+               int *info);
+// Compute eigenvalues and vectors (expert)
+  void dgeevx_ (const char *balanc, const char *jobvl, const char *jobvr,
+                const char *sense,
+                const int *n, double *A, const int *lda,
+                double *lambda_re, double *lambda_im,
+                double *vl, const int *ldvl,
+                double *vr, const int *ldvr,
+                int *ilo, int *ihi,
+                double *scale, double *abnrm,
+                double *rconde, double *rcondv,
+                double *work, const int *lwork,
+                int *iwork, int *info);
+  void sgeevx_ (const char *balanc, const char *jobvl, const char *jobvr,
+                const char *sense,
+                const int *n, float *A, const int *lda,
+                float *lambda_re, float *lambda_im,
+                float *vl, const int *ldvl,
+                float *vr, const int *ldvr,
+                int *ilo, int *ihi,
+                float *scale, float *abnrm,
+                float *rconde, float *rcondv,
+                float *work, const int *lwork,
+                int *iwork, int *info);
+// Eigenvalues for a symmetric matrix
+  void dsyev_ (const char *jobz, const char *uplo, const int *n,
+               double *A, const int *lda, double *w,
+               double *work, const int *lwork, int *info);
+  void ssyev_ (const char *jobz, const char *uplo, const int *n,
+               float *A, const int *lda, float *w,
+               float *work, const int *lwork, int *info);
+// Same functionality as dsyev_ but with more options: E.g.
+// Compute only eigenvalues in a specific interval,
+// Compute only eigenvalues with a specific index,
+// Set tolerance for eigenvalue computation
+  void dsyevx_ (const char *jobz, const char *range,
+                const char *uplo, const int *n, double *A, const int *lda,
+                const double *vl, const double *vu,
+                const int *il, const int *iu, const double *abstol,
+                int *m, double *w, double *z,
+                const int *ldz, double *work, const int *lwork, int *iwork,
+                int *ifail, int *info);
+  void ssyevx_ (const char *jobz, const char *range,
+                const char *uplo, const int *n, float *A, const int *lda,
+                const float *vl, const float *vu,
+                const int *il, const int *iu, const float *abstol,
+                int *m, float *w, float *z,
+                const int *ldz, float *work, const int *lwork, int *iwork,
+                int *ifail, int *info);
+// Generalized eigenvalues and eigenvectors of
+// 1: A*x = lambda*B*x; 2: A*B*x = lambda*x; 3: B*A*x = lambda*x
+// A and B are symmetric and B is definite
+  void dsygv_ (const int *itype, const char *jobz, const char *uplo,
+               const int *n, double *A, const int *lda, double *B,
+               const int *ldb, double *w, double *work,
+               const int *lwork, int *info);
+  void ssygv_ (const int *itype, const char *jobz, const char *uplo,
+               const int *n, float *A, const int *lda, float *B,
+               const int *ldb, float *w, float *work,
+               const int *lwork, int *info);
+// Same functionality as dsygv_ but with more options: E.g.
+// Compute only eigenvalues in a specific interval,
+// Compute only eigenvalues with a specific index,
+// Set tolerance for eigenvalue computation
+  void dsygvx_ (const int *itype, const char *jobz, const char *range,
+                const char *uplo, const int *n, double *A, const int *lda,
+                double *B, const int *ldb, const double *vl, const double *vu,
+                const int *il, const int *iu, const double *abstol,
+                int *m, double *w, double *z,
+                const int *ldz, double *work, const int *lwork, int *iwork,
+                int *ifail, int *info);
+  void ssygvx_ (const int *itype, const char *jobz, const char *range,
+                const char *uplo, const int *n, float *A, const int *lda,
+                float *B, const int *ldb, const float *vl, const float *vu,
+                const int *il, const int *iu, const float *abstol,
+                int *m, float *w, float *z,
+                const int *ldz, float *work, const int *lwork, int *iwork,
+                int *ifail, int *info);
+// Compute singular value decomposition using divide and conquer
+  void dgesdd_ (const char *jobz,
+                const int *m, const int *n, double *A, const int *lda,
+                double *s,
+                double *u, const int *ldu,
+                double *vt, const int *ldvt,
+                double *work, const int *lwork,
+                int *iwork,
+                int *info);
+  void sgesdd_ (const char *jobz,
+                const int *m, const int *n, float *A, const int *lda,
+                float *s,
+                float *u, const int *ldu,
+                float *vt, const int *ldvt,
+                float *work, const int *lwork,
+                int *iwork,
+                int *info);
+// Compute singular value decomposition
+  void dgesvd_ (int *jobu, int *jobvt,
+                const int *n, const int *m, double *A, const int *lda,
+                double *s,
+                double *u, const int *ldu,
+                double *vt, const int *ldvt,
+                double *work, const int *lwork,
+                int *info);
+  void sgesvd_ (int *jobu, int *jobvt,
+                const int *n, const int *m, float *A, const int *lda,
+                float *s,
+                float *u, const int *ldu,
+                float *vt, const int *ldvt,
+                float *work, const int *lwork,
+                int *info);
+// Solve a least squares problem using SVD
+  void dgelsd_ (const int *m, const int *n, const int *nrhs,
+                const double *A, const int *lda,
+                double *B, const int *ldb,
+                double *s, const double *rcond,
+                int *rank,
+                double *work, const int *lwork, int *iwork,
+                int *info);
+  void sgelsd_ (const int *m, const int *n, const int *nrhs,
+                const float *A, const int *lda,
+                float *B, const int *ldb,
+                float *s, const float *rcond,
+                int *rank,
+                float *work, const int *lwork, int *iwork,
+                int *info);
+// Symmetric tridiagonal matrix
+  void dstev_ (const char *jobz, const int *n,
+               double *d, double *e, double *z,
+               const int *ldz, double *work,
+               int *info);
+  void sstev_ (const char *jobz, const int *n,
+               float *d, float *e, float *z,
+               const int *ldz, float *work,
+               int *info);
+
+}
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/// Template wrapper for LAPACK functions daxpy and saxpy
+template<typename number1, typename number2, typename number3>
+inline void
+axpy (const int *, const number1 *, const number2 *, const int *, number3 *, const int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+axpy (const int *n, const double *alpha, const double *x, const int *incx, double *y, const int *incy)
+{
+  daxpy_ (n,alpha,x,incx,y,incy);
+}
+#else
+inline void
+axpy (const int *, const double *, const double *, const int *, double *, const int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("daxpy"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+axpy (const int *n, const float *alpha, const float *x, const int *incx, float *y, const int *incy)
+{
+  saxpy_ (n,alpha,x,incx,y,incy);
+}
+#else
+inline void
+axpy (const int *, const float *, const float *, const int *, float *, const int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("saxpy"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgemv and sgemv
+template<typename number1, typename number2, typename number3, typename number4, typename number5>
+inline void
+gemv (const char *, const int *, const int *, const number1 *, const number2 *, const int *, const number3 *, const int *, const number4 *, number5 *, const int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gemv (const char *trans, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *b, double *y, const int *incy)
+{
+  dgemv_ (trans,m,n,alpha,A,lda,x,incx,b,y,incy);
+}
+#else
+inline void
+gemv (const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgemv"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gemv (const char *trans, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *b, float *y, const int *incy)
+{
+  sgemv_ (trans,m,n,alpha,A,lda,x,incx,b,y,incy);
+}
+#else
+inline void
+gemv (const char *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgemv"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgemm and sgemm
+template<typename number1, typename number2, typename number3, typename number4, typename number5>
+inline void
+gemm (const char *, const char *, const int *, const int *, const int *, const number1 *, const number2 *, const int *, const number3 *, const int *, const number4 *, number5 *, const int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gemm (const char *transa, const char *transb, const int *m, const int *n, const int *k, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
+{
+  dgemm_ (transa,transb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
+}
+#else
+inline void
+gemm (const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgemm"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gemm (const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
+{
+  sgemm_ (transa,transb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
+}
+#else
+inline void
+gemm (const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgemm"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgetrf and sgetrf
+template<typename number1>
+inline void
+getrf (const int *, const int *, number1 *, const int *, int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+getrf (const int *m, const int *n, double *A, const int *lda, int *ipiv, int *info)
+{
+  dgetrf_ (m,n,A,lda,ipiv,info);
+}
+#else
+inline void
+getrf (const int *, const int *, double *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgetrf"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+getrf (const int *m, const int *n, float *A, const int *lda, int *ipiv, int *info)
+{
+  sgetrf_ (m,n,A,lda,ipiv,info);
+}
+#else
+inline void
+getrf (const int *, const int *, float *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgetrf"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgetrs and sgetrs
+template<typename number1, typename number2>
+inline void
+getrs (const char *, const int *, const int *, const number1 *, const int *, const int *, number2 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+getrs (const char *trans, const int *n, const int *nrhs, const double *A, const int *lda, const int *ipiv, double *b, const int *ldb, int *info)
+{
+  dgetrs_ (trans,n,nrhs,A,lda,ipiv,b,ldb,info);
+}
+#else
+inline void
+getrs (const char *, const int *, const int *, const double *, const int *, const int *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgetrs"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+getrs (const char *trans, const int *n, const int *nrhs, const float *A, const int *lda, const int *ipiv, float *b, const int *ldb, int *info)
+{
+  sgetrs_ (trans,n,nrhs,A,lda,ipiv,b,ldb,info);
+}
+#else
+inline void
+getrs (const char *, const int *, const int *, const float *, const int *, const int *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgetrs"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgetri and sgetri
+template<typename number1, typename number2>
+inline void
+getri (const int *, number1 *, const int *, int *, number2 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+getri (const int *n, double *A, const int *lda, int *ipiv, double *inv_work, const int *lwork, int *info)
+{
+  dgetri_ (n,A,lda,ipiv,inv_work,lwork,info);
+}
+#else
+inline void
+getri (const int *, double *, const int *, int *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgetri"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+getri (const int *n, float *A, const int *lda, int *ipiv, float *inv_work, const int *lwork, int *info)
+{
+  sgetri_ (n,A,lda,ipiv,inv_work,lwork,info);
+}
+#else
+inline void
+getri (const int *, float *, const int *, int *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgetri"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgeqrf and sgeqrf
+template<typename number1, typename number2, typename number3>
+inline void
+geqrf (const int *, const int *, number1 *, const int *, number2 *, number3 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+geqrf (const int *m, const int *n, double *A, const int *lda, double *tau, double *work, const int *lwork, int *info)
+{
+  dgeqrf_ (m,n,A,lda,tau,work,lwork,info);
+}
+#else
+inline void
+geqrf (const int *, const int *, double *, const int *, double *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgeqrf"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+geqrf (const int *m, const int *n, float *A, const int *lda, float *tau, float *work, const int *lwork, int *info)
+{
+  sgeqrf_ (m,n,A,lda,tau,work,lwork,info);
+}
+#else
+inline void
+geqrf (const int *, const int *, float *, const int *, float *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgeqrf"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dormqr and sormqr
+template<typename number1, typename number2, typename number3, typename number4>
+inline void
+ormqr (const char *, const char *, const int *, const int *, const int *, const number1 *, const int *, const number2 *, number3 *, const int *, number4 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+ormqr (const char *side, const char *trans, const int *m, const int *n, const int *k, const double *A, const int *lda, const double *tau, double *B, const int *ldb, double *work, const int *lwork, int *info)
+{
+  dormqr_ (side,trans,m,n,k,A,lda,tau,B,ldb,work,lwork,info);
+}
+#else
+inline void
+ormqr (const char *, const char *, const int *, const int *, const int *, const double *, const int *, const double *, double *, const int *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dormqr"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+ormqr (const char *side, const char *trans, const int *m, const int *n, const int *k, const float *A, const int *lda, const float *tau, float *B, const int *ldb, float *work, const int *lwork, int *info)
+{
+  sormqr_ (side,trans,m,n,k,A,lda,tau,B,ldb,work,lwork,info);
+}
+#else
+inline void
+ormqr (const char *, const char *, const int *, const int *, const int *, const float *, const int *, const float *, float *, const int *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sormqr"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dorgqr and sorgqr
+template<typename number1, typename number2, typename number3>
+inline void
+orgqr (const int *, const int *, const int *, const number1 *, const int *, const number2 *, number3 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+orgqr (const int *m, const int *n, const int *k, const double *A, const int *lda, const double *tau, double *work, const int *lwork, int *info)
+{
+  dorgqr_ (m,n,k,A,lda,tau,work,lwork,info);
+}
+#else
+inline void
+orgqr (const int *, const int *, const int *, const double *, const int *, const double *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dorgqr"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+orgqr (const int *m, const int *n, const int *k, const float *A, const int *lda, const float *tau, float *work, const int *lwork, int *info)
+{
+  sorgqr_ (m,n,k,A,lda,tau,work,lwork,info);
+}
+#else
+inline void
+orgqr (const int *, const int *, const int *, const float *, const int *, const float *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sorgqr"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dtrtrs and strtrs
+template<typename number1, typename number2>
+inline void
+trtrs (const char *, const char *, const char *, const int *, const int *, const number1 *, const int *, number2 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+trtrs (const char *uplo, const char *trans, const char *diag, const int *n, const int *n_rhs, const double *A, const int *lda, double *B, const int *ldb, int *info)
+{
+  dtrtrs_ (uplo,trans,diag,n,n_rhs,A,lda,B,ldb,info);
+}
+#else
+inline void
+trtrs (const char *, const char *, const char *, const int *, const int *, const double *, const int *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dtrtrs"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+trtrs (const char *uplo, const char *trans, const char *diag, const int *n, const int *n_rhs, const float *A, const int *lda, float *B, const int *ldb, int *info)
+{
+  strtrs_ (uplo,trans,diag,n,n_rhs,A,lda,B,ldb,info);
+}
+#else
+inline void
+trtrs (const char *, const char *, const char *, const int *, const int *, const float *, const int *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("strtrs"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgeev and sgeev
+template<typename number1, typename number2, typename number3, typename number4, typename number5, typename number6>
+inline void
+geev (const char *, const char *, const int *, number1 *, const int *, number2 *, number3 *, number4 *, const int *, number5 *, const int *, number6 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+geev (const char *jobvl, const char *jobvr, const int *n, double *A, const int *lda, double *lambda_re, double *lambda_im, double *vl, const int *ldvl, double *vr, const int *ldva, double *work, const int *lwork, int *info)
+{
+  dgeev_ (jobvl,jobvr,n,A,lda,lambda_re,lambda_im,vl,ldvl,vr,ldva,work,lwork,info);
+}
+#else
+inline void
+geev (const char *, const char *, const int *, double *, const int *, double *, double *, double *, const int *, double *, const int *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgeev"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+geev (const char *jobvl, const char *jobvr, const int *n, float *A, const int *lda, float *lambda_re, float *lambda_im, float *vl, const int *ldvl, float *vr, const int *ldva, float *work, const int *lwork, int *info)
+{
+  sgeev_ (jobvl,jobvr,n,A,lda,lambda_re,lambda_im,vl,ldvl,vr,ldva,work,lwork,info);
+}
+#else
+inline void
+geev (const char *, const char *, const int *, float *, const int *, float *, float *, float *, const int *, float *, const int *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgeev"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgeevx and sgeevx
+template<typename number1, typename number2, typename number3, typename number4, typename number5, typename number6, typename number7, typename number8, typename number9, typename number10>
+inline void
+geevx (const char *, const char *, const char *, const char *, const int *, number1 *, const int *, number2 *, number3 *, number4 *, const int *, number5 *, const int *, int *, int *, number6 *, number7 *, number8 *, number9 *, number10 *, const int *, int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+geevx (const char *balanc, const char *jobvl, const char *jobvr, const char *sense, const int *n, double *A, const int *lda, double *lambda_re, double *lambda_im, double *vl, const int *ldvl, double *vr, const int *ldvr, int *ilo, int *ihi, double *scale, double *abnrm, double *rconde, double *rcondv, double *work, const int *lwork, int *iwork, int *info)
+{
+  dgeevx_ (balanc,jobvl,jobvr,sense,n,A,lda,lambda_re,lambda_im,vl,ldvl,vr,ldvr,ilo,ihi,scale,abnrm,rconde,rcondv,work,lwork,iwork,info);
+}
+#else
+inline void
+geevx (const char *, const char *, const char *, const char *, const int *, double *, const int *, double *, double *, double *, const int *, double *, const int *, int *, int *, double *, double *, double *, double *, double *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgeevx"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+geevx (const char *balanc, const char *jobvl, const char *jobvr, const char *sense, const int *n, float *A, const int *lda, float *lambda_re, float *lambda_im, float *vl, const int *ldvl, float *vr, const int *ldvr, int *ilo, int *ihi, float *scale, float *abnrm, float *rconde, float *rcondv, float *work, const int *lwork, int *iwork, int *info)
+{
+  sgeevx_ (balanc,jobvl,jobvr,sense,n,A,lda,lambda_re,lambda_im,vl,ldvl,vr,ldvr,ilo,ihi,scale,abnrm,rconde,rcondv,work,lwork,iwork,info);
+}
+#else
+inline void
+geevx (const char *, const char *, const char *, const char *, const int *, float *, const int *, float *, float *, float *, const int *, float *, const int *, int *, int *, float *, float *, float *, float *, float *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgeevx"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dsyev and ssyev
+template<typename number1, typename number2, typename number3>
+inline void
+syev (const char *, const char *, const int *, number1 *, const int *, number2 *, number3 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+syev (const char *jobz, const char *uplo, const int *n, double *A, const int *lda, double *w, double *work, const int *lwork, int *info)
+{
+  dsyev_ (jobz,uplo,n,A,lda,w,work,lwork,info);
+}
+#else
+inline void
+syev (const char *, const char *, const int *, double *, const int *, double *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dsyev"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+syev (const char *jobz, const char *uplo, const int *n, float *A, const int *lda, float *w, float *work, const int *lwork, int *info)
+{
+  ssyev_ (jobz,uplo,n,A,lda,w,work,lwork,info);
+}
+#else
+inline void
+syev (const char *, const char *, const int *, float *, const int *, float *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("ssyev"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dsyevx and ssyevx
+template<typename number1, typename number2, typename number3, typename number4, typename number5, typename number6, typename number7>
+inline void
+syevx (const char *, const char *, const char *, const int *, number1 *, const int *, const number2 *, const number3 *, const int *, const int *, const number4 *, int *, number5 *, number6 *, const int *, number7 *, const int *, int *, int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+syevx (const char *jobz, const char *range, const char *uplo, const int *n, double *A, const int *lda, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, double *work, const int *lwork, int *iwork, int *ifail, int *info)
+{
+  dsyevx_ (jobz,range,uplo,n,A,lda,vl,vu,il,iu,abstol,m,w,z,ldz,work,lwork,iwork,ifail,info);
+}
+#else
+inline void
+syevx (const char *, const char *, const char *, const int *, double *, const int *, const double *, const double *, const int *, const int *, const double *, int *, double *, double *, const int *, double *, const int *, int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dsyevx"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+syevx (const char *jobz, const char *range, const char *uplo, const int *n, float *A, const int *lda, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, float *work, const int *lwork, int *iwork, int *ifail, int *info)
+{
+  ssyevx_ (jobz,range,uplo,n,A,lda,vl,vu,il,iu,abstol,m,w,z,ldz,work,lwork,iwork,ifail,info);
+}
+#else
+inline void
+syevx (const char *, const char *, const char *, const int *, float *, const int *, const float *, const float *, const int *, const int *, const float *, int *, float *, float *, const int *, float *, const int *, int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("ssyevx"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dsygv and ssygv
+template<typename number1, typename number2, typename number3, typename number4>
+inline void
+sygv (const int *, const char *, const char *, const int *, number1 *, const int *, number2 *, const int *, number3 *, number4 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+sygv (const int *itype, const char *jobz, const char *uplo, const int *n, double *A, const int *lda, double *B, const int *ldb, double *w, double *work, const int *lwork, int *info)
+{
+  dsygv_ (itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
+}
+#else
+inline void
+sygv (const int *, const char *, const char *, const int *, double *, const int *, double *, const int *, double *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dsygv"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+sygv (const int *itype, const char *jobz, const char *uplo, const int *n, float *A, const int *lda, float *B, const int *ldb, float *w, float *work, const int *lwork, int *info)
+{
+  ssygv_ (itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
+}
+#else
+inline void
+sygv (const int *, const char *, const char *, const int *, float *, const int *, float *, const int *, float *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("ssygv"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dsygvx and ssygvx
+template<typename number1, typename number2, typename number3, typename number4, typename number5, typename number6, typename number7, typename number8>
+inline void
+sygvx (const int *, const char *, const char *, const char *, const int *, number1 *, const int *, number2 *, const int *, const number3 *, const number4 *, const int *, const int *, const number5 *, int *, number6 *, number7 *, const int *, number8 *, const int *, int *, int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+sygvx (const int *itype, const char *jobz, const char *range, const char *uplo, const int *n, double *A, const int *lda, double *B, const int *ldb, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, double *work, const int *lwork, int *iwork, int *ifail, int *info)
+{
+  dsygvx_ (itype,jobz,range,uplo,n,A,lda,B,ldb,vl,vu,il,iu,abstol,m,w,z,ldz,work,lwork,iwork,ifail,info);
+}
+#else
+inline void
+sygvx (const int *, const char *, const char *, const char *, const int *, double *, const int *, double *, const int *, const double *, const double *, const int *, const int *, const double *, int *, double *, double *, const int *, double *, const int *, int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dsygvx"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+sygvx (const int *itype, const char *jobz, const char *range, const char *uplo, const int *n, float *A, const int *lda, float *B, const int *ldb, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, float *work, const int *lwork, int *iwork, int *ifail, int *info)
+{
+  ssygvx_ (itype,jobz,range,uplo,n,A,lda,B,ldb,vl,vu,il,iu,abstol,m,w,z,ldz,work,lwork,iwork,ifail,info);
+}
+#else
+inline void
+sygvx (const int *, const char *, const char *, const char *, const int *, float *, const int *, float *, const int *, const float *, const float *, const int *, const int *, const float *, int *, float *, float *, const int *, float *, const int *, int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("ssygvx"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgesdd and sgesdd
+template<typename number1, typename number2, typename number3, typename number4, typename number5>
+inline void
+gesdd (const char *, const int *, const int *, number1 *, const int *, number2 *, number3 *, const int *, number4 *, const int *, number5 *, const int *, int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gesdd (const char *jobz, const int *m, const int *n, double *A, const int *lda, double *s, double *u, const int *ldu, double *vt, const int *ldvt, double *work, const int *lwork, int *iwork, int *info)
+{
+  dgesdd_ (jobz,m,n,A,lda,s,u,ldu,vt,ldvt,work,lwork,iwork,info);
+}
+#else
+inline void
+gesdd (const char *, const int *, const int *, double *, const int *, double *, double *, const int *, double *, const int *, double *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgesdd"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gesdd (const char *jobz, const int *m, const int *n, float *A, const int *lda, float *s, float *u, const int *ldu, float *vt, const int *ldvt, float *work, const int *lwork, int *iwork, int *info)
+{
+  sgesdd_ (jobz,m,n,A,lda,s,u,ldu,vt,ldvt,work,lwork,iwork,info);
+}
+#else
+inline void
+gesdd (const char *, const int *, const int *, float *, const int *, float *, float *, const int *, float *, const int *, float *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgesdd"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgesvd and sgesvd
+template<typename number1, typename number2, typename number3, typename number4, typename number5>
+inline void
+gesvd (int *, int *, const int *, const int *, number1 *, const int *, number2 *, number3 *, const int *, number4 *, const int *, number5 *, const int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gesvd (int *jobu, int *jobvt, const int *n, const int *m, double *A, const int *lda, double *s, double *u, const int *ldu, double *vt, const int *ldvt, double *work, const int *lwork, int *info)
+{
+  dgesvd_ (jobu,jobvt,n,m,A,lda,s,u,ldu,vt,ldvt,work,lwork,info);
+}
+#else
+inline void
+gesvd (int *, int *, const int *, const int *, double *, const int *, double *, double *, const int *, double *, const int *, double *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgesvd"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gesvd (int *jobu, int *jobvt, const int *n, const int *m, float *A, const int *lda, float *s, float *u, const int *ldu, float *vt, const int *ldvt, float *work, const int *lwork, int *info)
+{
+  sgesvd_ (jobu,jobvt,n,m,A,lda,s,u,ldu,vt,ldvt,work,lwork,info);
+}
+#else
+inline void
+gesvd (int *, int *, const int *, const int *, float *, const int *, float *, float *, const int *, float *, const int *, float *, const int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgesvd"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dgelsd and sgelsd
+template<typename number1, typename number2, typename number3, typename number4, typename number5>
+inline void
+gelsd (const int *, const int *, const int *, const number1 *, const int *, number2 *, const int *, number3 *, const number4 *, int *, number5 *, const int *, int *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gelsd (const int *m, const int *n, const int *nrhs, const double *A, const int *lda, double *B, const int *ldb, double *s, const double *rcond, int *rank, double *work, const int *lwork, int *iwork, int *info)
+{
+  dgelsd_ (m,n,nrhs,A,lda,B,ldb,s,rcond,rank,work,lwork,iwork,info);
+}
+#else
+inline void
+gelsd (const int *, const int *, const int *, const double *, const int *, double *, const int *, double *, const double *, int *, double *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dgelsd"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+gelsd (const int *m, const int *n, const int *nrhs, const float *A, const int *lda, float *B, const int *ldb, float *s, const float *rcond, int *rank, float *work, const int *lwork, int *iwork, int *info)
+{
+  sgelsd_ (m,n,nrhs,A,lda,B,ldb,s,rcond,rank,work,lwork,iwork,info);
+}
+#else
+inline void
+gelsd (const int *, const int *, const int *, const float *, const int *, float *, const int *, float *, const float *, int *, float *, const int *, int *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sgelsd"));
+}
+#endif
+
+
+/// Template wrapper for LAPACK functions dstev and sstev
+template<typename number1, typename number2, typename number3, typename number4>
+inline void
+stev (const char *, const int *, number1 *, number2 *, number3 *, const int *, number4 *, int *)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+stev (const char *jobz, const int *n, double *d, double *e, double *z, const int *ldz, double *work, int *info)
+{
+  dstev_ (jobz,n,d,e,z,ldz,work,info);
+}
+#else
+inline void
+stev (const char *, const int *, double *, double *, double *, const int *, double *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("dstev"));
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_LAPACK
+inline void
+stev (const char *jobz, const int *n, float *d, float *e, float *z, const int *ldz, float *work, int *info)
+{
+  sstev_ (jobz,n,d,e,z,ldz,work,info);
+}
+#else
+inline void
+stev (const char *, const int *, float *, float *, float *, const int *, float *, int *)
+{
+  Assert (false, LAPACKSupport::ExcMissing("sstev"));
+}
+#endif
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/linear_operator.h b/include/deal.II/lac/linear_operator.h
new file mode 100644
index 0000000..66cd6c3
--- /dev/null
+++ b/include/deal.II/lac/linear_operator.h
@@ -0,0 +1,1121 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__linear_operator_h
+#define dealii__linear_operator_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/vector_memory.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#include <array>
+#include <functional>
+#include <type_traits>
+
+DEAL_II_NAMESPACE_OPEN
+
+// Forward declarations:
+
+template <typename Number> class Vector;
+
+template <typename Range = Vector<double>, typename Domain = Range>
+class LinearOperator;
+
+template <typename Range = Vector<double>,
+          typename Domain = Range,
+          typename OperatorExemplar,
+          typename Matrix>
+LinearOperator<Range, Domain> linear_operator (const OperatorExemplar &,
+                                               const Matrix &);
+
+template <typename Range = Vector<double>,
+          typename Domain = Range,
+          typename Matrix>
+LinearOperator<Range, Domain> linear_operator (const Matrix &);
+
+template <typename Range = Vector<double>,
+          typename Domain = Range>
+LinearOperator<Range, Domain>
+null_operator(const LinearOperator<Range, Domain> &);
+
+
+/**
+ * A class to store the abstract concept of a linear operator.
+ *
+ * The class essentially consists of <code>std::function</code> objects that
+ * store the knowledge of how to apply the linear operator by implementing the
+ * abstract @p Matrix interface:
+ * @code
+ *   std::function<void(Range &, const Domain &)> vmult;
+ *   std::function<void(Range &, const Domain &)> vmult_add;
+ *   std::function<void(Domain &, const Range &)> Tvmult;
+ *   std::function<void(Domain &, const Range &)> Tvmult_add;
+ * @endcode
+ *
+ * But, in contrast to a usual matrix object, the domain and range of the
+ * linear operator are also bound to the LinearOperator class on the type
+ * level. Because of this, <code>LinearOperator <Range, Domain></code> has two
+ * additional function objects
+ * @code
+ *   std::function<void(Range &, bool)> reinit_range_vector;
+ *   std::function<void(Domain &, bool)> reinit_domain_vector;
+ * @endcode
+ * that store the knowledge how to initialize (resize + internal data
+ * structures) an arbitrary vector of the @p Range and @p Domain space.
+ *
+ * The primary purpose of this class is to provide syntactic sugar for complex
+ * matrix-vector operations and free the user from having to create, set up
+ * and handle intermediate storage locations by hand.
+ *
+ * As an example consider the operation $(A+k\,B)\,C$, where $A$, $B$ and $C$
+ * denote (possible different) matrices. In order to construct a
+ * LinearOperator <code>op</code> that stores the knowledge of this operation,
+ * one can write:
+ *
+ * @code
+ * dealii::SparseMatrix<double> A, B, C;
+ * const double k = ...;
+ *
+ * // Setup and assembly of matrices
+ *
+ * const auto op_a = linear_operator(A);
+ * const auto op_b = linear_operator(B);
+ * const auto op_c = linear_operator(C);
+ *
+ * const auto op = (op_a + k * op_b) * op_c;
+ * @endcode
+ *
+ * @note This class makes heavy use of <code>std::function</code> objects and
+ * lambda functions. This flexibility comes with a run-time penalty. Only use
+ * this object to encapsulate matrix object of medium to large size (as a rule
+ * of thumb, sparse matrices with a size $1000\times1000$, or larger).
+ *
+ * @note This class is only available if deal.II was configured with C++11
+ * support, i.e., if <code>DEAL_II_WITH_CXX11</code> is enabled during cmake
+ * configure.
+ *
+ * @author Luca Heltai, Matthias Maier, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain> class LinearOperator
+{
+public:
+
+  /**
+   * Create an empty LinearOperator object. All <code>std::function</code>
+   * member objects are initialized with default variants that throw an
+   * exception upon invocation.
+   */
+  LinearOperator()
+    :
+    is_null_operator(false)
+  {
+
+    vmult = [](Range &, const Domain &)
+    {
+      Assert(false, ExcMessage("Uninitialized LinearOperator<Range, "
+                               "Domain>::vmult called"));
+    };
+
+    vmult_add = [](Range &, const Domain &)
+    {
+      Assert(false, ExcMessage("Uninitialized LinearOperator<Range, "
+                               "Domain>::vmult_add called"));
+    };
+
+    Tvmult = [](Domain &, const Range &)
+    {
+      Assert(false, ExcMessage("Uninitialized LinearOperator<Range, "
+                               "Domain>::Tvmult called"));
+    };
+
+    Tvmult_add = [](Domain &, const Range &)
+    {
+      Assert(false, ExcMessage("Uninitialized LinearOperator<Range, "
+                               "Domain>::Tvmult_add called"));
+    };
+
+    reinit_range_vector = [](Range &, bool)
+    {
+      Assert(false, ExcMessage("Uninitialized LinearOperator<Range, "
+                               "Domain>::reinit_range_vector method called"));
+    };
+
+    reinit_domain_vector = [](Domain &, bool)
+    {
+      Assert(false, ExcMessage("Uninitialized LinearOperator<Range, "
+                               "Domain>::reinit_domain_vector method called"));
+    };
+  }
+
+  /**
+   * Default copy constructor.
+   */
+  LinearOperator (const LinearOperator<Range, Domain> &) = default;
+
+  /**
+   * Templated copy constructor that creates a LinearOperator object from an
+   * object @p op for which the conversion function
+   * <code>linear_operator</code> is defined.
+   */
+  template<typename Op,
+           typename = typename std::enable_if<!std::is_base_of<LinearOperator<Range, Domain>, Op>::value>::type>
+  LinearOperator (const Op &op)
+  {
+    *this = linear_operator<Range, Domain, Op>(op);
+  }
+
+  /**
+   * Default copy assignment operator.
+   */
+  LinearOperator<Range, Domain> &
+  operator=(const LinearOperator<Range, Domain> &) = default;
+
+  /**
+   * Templated copy assignment operator for an object @p op for which the
+   * conversion function <code>linear_operator</code> is defined.
+   */
+  template <typename Op,
+            typename = typename std::enable_if<!std::is_base_of<LinearOperator<Range, Domain>, Op>::value>::type>
+  LinearOperator<Range, Domain> &operator=(const Op &op)
+  {
+    *this = linear_operator<Range, Domain, Op>(op);
+    return *this;
+  }
+
+  /**
+   * Application of the LinearOperator object to a vector u of the @p Domain
+   * space giving a vector v of the @p Range space.
+   */
+  std::function<void(Range &v, const Domain &u)> vmult;
+
+  /**
+   * Application of the LinearOperator object to a vector u of the @p Domain
+   * space. The result is added to the vector v.
+   */
+  std::function<void(Range &v, const Domain &u)> vmult_add;
+
+  /**
+   * Application of the transpose LinearOperator object to a vector u of the
+   * @p Range space giving a vector v of the @p Domain space.
+   */
+  std::function<void(Domain &v, const Range &u)> Tvmult;
+
+  /**
+   * Application of the transpose LinearOperator object to a vector @p u of
+   * the @p Range space.The result is added to the vector @p v.
+   */
+  std::function<void(Domain &v, const Range &u)> Tvmult_add;
+
+  /**
+   * Initializes a vector v of the Range space to be directly usable as the
+   * destination parameter in an application of vmult. Similar to the reinit
+   * functions of the vector classes, the boolean determines whether a fast
+   * initalization is done, i.e., if it is set to false the content of the
+   * vector is set to 0.
+   */
+  std::function<void(Range &v, bool omit_zeroing_entries)> reinit_range_vector;
+
+  /**
+   * Initializes a vector of the Domain space to be directly usable as the
+   * source parameter in an application of vmult. Similar to the reinit
+   * functions of the vector classes, the boolean determines whether a fast
+   * initalization is done, i.e., if it is set to false the content of the
+   * vector is set to 0.
+   */
+  std::function<void(Domain &v, bool omit_zeroing_entries)> reinit_domain_vector;
+
+  /**
+   * @name In-place vector space operations
+   */
+  //@{
+
+  /**
+   * Addition with a LinearOperator @p second_op with the same @p Domain and
+   * @p Range.
+   */
+  LinearOperator<Range, Domain> &
+  operator+=(const LinearOperator<Range, Domain> &second_op)
+  {
+    *this = *this + second_op;
+    return *this;
+  }
+
+  /**
+   * Subtraction with a LinearOperator @p second_op with the same @p Domain
+   * and @p Range.
+   */
+  LinearOperator<Range, Domain> &
+  operator-=(const LinearOperator<Range, Domain> &second_op)
+  {
+    *this = *this - second_op;
+    return *this;
+  }
+
+  /**
+   * Composition of the LinearOperator with an endomorphism @p second_op of
+   * the @p Domain space.
+   */
+  LinearOperator<Range, Domain> &
+  operator*=(const LinearOperator<Domain, Domain> &second_op)
+  {
+    *this = *this * second_op;
+    return *this;
+  }
+
+  /**
+   * Scalar multiplication of the LinearOperator with @p number from the
+   * right.
+   */
+  LinearOperator<Range, Domain>
+  operator*=(typename Domain::value_type  number)
+  {
+    *this = *this * number;
+    return *this;
+  }
+
+  /**
+   * This bool is used to determine whether a linear operator is a null
+   * operator. In this case the class is able to optimize some operations like
+   * multiplication or addition.
+   */
+  bool is_null_operator;
+
+  //@}
+};
+
+
+/**
+ * @name Vector space operations
+ */
+//@{
+
+/**
+ * @relates LinearOperator
+ *
+ * Addition of two linear operators @p first_op and @p second_op given by
+ * $(\text{first\_op}+\text{second\_op})x := \text{first\_op}(x) +
+ * \text{second\_op}(x)$
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain>
+operator+(const LinearOperator<Range, Domain> &first_op,
+          const LinearOperator<Range, Domain> &second_op)
+{
+  if (first_op.is_null_operator)
+    {
+      return second_op;
+    }
+  else if (second_op.is_null_operator)
+    {
+      return first_op;
+    }
+  else
+    {
+      LinearOperator<Range, Domain> return_op;
+
+      return_op.reinit_range_vector = first_op.reinit_range_vector;
+      return_op.reinit_domain_vector = first_op.reinit_domain_vector;
+
+      // ensure to have valid computation objects by catching first_op and
+      // second_op by value
+
+      return_op.vmult = [first_op, second_op](Range &v, const Domain &u)
+      {
+        first_op.vmult(v, u);
+        second_op.vmult_add(v, u);
+      };
+
+      return_op.vmult_add = [first_op, second_op](Range &v, const Domain &u)
+      {
+        first_op.vmult_add(v, u);
+        second_op.vmult_add(v, u);
+      };
+
+      return_op.Tvmult = [first_op, second_op](Domain &v, const Range &u)
+      {
+        second_op.Tvmult(v, u);
+        first_op.Tvmult_add(v, u);
+      };
+
+      return_op.Tvmult_add = [first_op, second_op](Domain &v, const Range &u)
+      {
+        second_op.Tvmult_add(v, u);
+        first_op.Tvmult_add(v, u);
+      };
+
+      return return_op;
+    }
+}
+
+
+/**
+ * @relates LinearOperator
+ *
+ * Subtraction of two linear operators @p first_op and @p second_op given by
+ * $(\text{first\_op}-\text{second\_op})x := \text{first\_op}(x) -
+ * \text{second\_op}(x)$
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain>
+operator-(const LinearOperator<Range, Domain> &first_op,
+          const LinearOperator<Range, Domain> &second_op)
+{
+  if (first_op.is_null_operator)
+    {
+      return -1 * second_op;
+    }
+  else if (second_op.is_null_operator)
+    {
+      return first_op;
+    }
+  else
+    {
+      // implement with addition and scalar multiplication
+      return first_op + (-1. * second_op);
+    }
+}
+
+
+/**
+ * @relates LinearOperator
+ *
+ * Scalar multiplication of a ScalarOperator object @p op with @p number from
+ * the left.
+ *
+ * The @p Domain and @p Range types must implement the following
+ * <code>operator*=</code> member functions accepting the appropriate scalar
+ * Number type for rescaling:
+ *
+ * @code
+ * Domain & operator *=(Domain::value_type);
+ * Range & operator *=(Range::value_type);
+ * @endcode
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain>
+operator*(typename Range::value_type  number,
+          const LinearOperator<Range, Domain> &op)
+{
+  static_assert(
+    std::is_convertible<typename Range::value_type, typename Domain::value_type>::value,
+    "Range and Domain must have implicitly convertible 'value_type's");
+
+  if (op.is_null_operator)
+    {
+      return op;
+    }
+  else if (number == 0.)
+    {
+      return null_operator(op);
+    }
+  else
+    {
+      LinearOperator<Range, Domain> return_op = op;
+
+      // ensure to have valid computation objects by catching number and op by
+      // value
+
+      return_op.vmult = [number, op](Range &v, const Domain &u)
+      {
+        op.vmult(v,u);
+        v *= number;
+      };
+
+      return_op.vmult_add = [number, op](Range &v, const Domain &u)
+      {
+        v /= number;
+        op.vmult_add(v,u);
+        v *= number;
+      };
+
+      return_op.Tvmult = [number, op](Domain &v, const Range &u)
+      {
+        op.Tvmult(v,u);
+        v *= number;
+      };
+
+      return_op.Tvmult_add = [number, op](Domain &v, const Range &u)
+      {
+        v /= number;
+        op.Tvmult_add(v,u);
+        v *= number;
+      };
+
+      return return_op;
+    }
+}
+
+
+/**
+ * @relates LinearOperator
+ *
+ * Scalar multiplication of a ScalarOperator object from the right.
+ *
+ * The @p Domain and @p Range types must implement the following
+ * <code>operator*=</code> member functions for rescaling:
+ *
+ * @code
+ * Domain & operator *=(Domain::value_type);
+ * Range & operator *=(Range::value_type);
+ * @endcode
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain>
+operator*(const LinearOperator<Range, Domain> &op,
+          typename Domain::value_type  number)
+{
+  static_assert(
+    std::is_convertible<typename Range::value_type, typename Domain::value_type>::value,
+    "Range and Domain must have implicitly convertible 'value_type's");
+
+  return number * op;
+}
+
+//@}
+
+
+/**
+ * @name Composition and manipulation of a LinearOperator
+ */
+//@{
+
+/**
+ * @relates LinearOperator
+ *
+ * Composition of two linear operators @p first_op and @p second_op given by
+ * $(\text{first\_op}*\text{second\_op})x :=
+ * \text{first\_op}(\text{second\_op}(x))$
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Intermediate, typename Domain>
+LinearOperator<Range, Domain>
+operator*(const LinearOperator<Range, Intermediate> &first_op,
+          const LinearOperator<Intermediate, Domain> &second_op)
+{
+  if (first_op.is_null_operator || second_op.is_null_operator)
+    {
+      LinearOperator<Range, Domain> return_op;
+      return_op.reinit_domain_vector = second_op.reinit_domain_vector;
+      return_op.reinit_range_vector  = first_op.reinit_range_vector;
+      return null_operator(return_op);
+    }
+  else
+    {
+      LinearOperator<Range, Domain> return_op;
+
+      return_op.reinit_domain_vector = second_op.reinit_domain_vector;
+      return_op.reinit_range_vector  = first_op.reinit_range_vector;
+
+      // ensure to have valid computation objects by catching first_op and
+      // second_op by value
+
+      return_op.vmult = [first_op, second_op](Range &v, const Domain &u)
+      {
+        static GrowingVectorMemory<Intermediate> vector_memory;
+
+        Intermediate *i = vector_memory.alloc();
+        second_op.reinit_range_vector(*i, /*bool omit_zeroing_entries =*/ true);
+        second_op.vmult(*i, u);
+        first_op.vmult(v, *i);
+        vector_memory.free(i);
+      };
+
+      return_op.vmult_add = [first_op, second_op](Range &v, const Domain &u)
+      {
+        static GrowingVectorMemory<Intermediate> vector_memory;
+
+        Intermediate *i = vector_memory.alloc();
+        second_op.reinit_range_vector(*i, /*bool omit_zeroing_entries =*/ true);
+        second_op.vmult(*i, u);
+        first_op.vmult_add(v, *i);
+        vector_memory.free(i);
+      };
+
+      return_op.Tvmult = [first_op, second_op](Domain &v, const Range &u)
+      {
+        static GrowingVectorMemory<Intermediate> vector_memory;
+
+        Intermediate *i = vector_memory.alloc();
+        first_op.reinit_domain_vector(*i, /*bool omit_zeroing_entries =*/ true);
+        first_op.Tvmult(*i, u);
+        second_op.Tvmult(v, *i);
+        vector_memory.free(i);
+      };
+
+      return_op.Tvmult_add = [first_op, second_op](Domain &v, const Range &u)
+      {
+        static GrowingVectorMemory<Intermediate> vector_memory;
+
+        Intermediate *i = vector_memory.alloc();
+        first_op.reinit_domain_vector(*i, /*bool omit_zeroing_entries =*/ true);
+        first_op.Tvmult(*i, u);
+        second_op.Tvmult_add(v, *i);
+        vector_memory.free(i);
+      };
+
+      return return_op;
+    }
+}
+
+/**
+ * @relates LinearOperator
+ *
+ * Returns the transpose linear operations of @p op.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Domain, Range>
+transpose_operator(const LinearOperator<Range, Domain> &op)
+{
+  LinearOperator<Domain, Range> return_op;
+
+  return_op.reinit_range_vector = op.reinit_domain_vector;
+  return_op.reinit_domain_vector = op.reinit_range_vector;
+
+  return_op.vmult = op.Tvmult;
+  return_op.vmult_add = op.Tvmult_add;
+  return_op.Tvmult = op.vmult;
+  return_op.Tvmult_add = op.vmult_add;
+
+  return return_op;
+}
+
+/**
+ * @relates LinearOperator
+ *
+ * Returns an object representing the inverse of the LinearOperator @p op.
+ *
+ * The function takes references @p solver and @p preconditioner to an
+ * iterative solver and a preconditioner that are used in the
+ * <code>vmult</code> and <code>Tvmult</code> implementations of the
+ * LinearOperator object.
+ *
+ * The LinearOperator object that is created stores a reference to @p solver
+ * and @p preconditioner. Thus, both objects must remain a valid reference for
+ * the whole lifetime of the LinearOperator object. Internal data structures
+ * of the @p solver object will be modified upon invocation of
+ * <code>vmult</code> or <code>Tvmult</code>.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Solver, typename Preconditioner>
+LinearOperator<typename Solver::vector_type, typename Solver::vector_type>
+inverse_operator(const LinearOperator<typename Solver::vector_type, typename Solver::vector_type> &op,
+                 Solver &solver,
+                 const Preconditioner &preconditioner)
+{
+  typedef typename Solver::vector_type Vector;
+
+  LinearOperator<Vector, Vector> return_op;
+
+  return_op.reinit_range_vector = op.reinit_domain_vector;
+  return_op.reinit_domain_vector = op.reinit_range_vector;
+
+  return_op.vmult = [op, &solver, &preconditioner](Vector &v, const Vector &u)
+  {
+    op.reinit_range_vector(v, /*bool omit_zeroing_entries =*/ false);
+    solver.solve(op, v, u, preconditioner);
+  };
+
+  return_op.vmult_add =
+    [op, &solver, &preconditioner](Vector &v, const Vector &u)
+  {
+    static GrowingVectorMemory<typename Solver::vector_type> vector_memory;
+
+    Vector *v2 = vector_memory.alloc();
+    op.reinit_range_vector(*v2, /*bool omit_zeroing_entries =*/ false);
+    solver.solve(op, *v2, u, preconditioner);
+    v += *v2;
+    vector_memory.free(v2);
+  };
+
+  return_op.Tvmult = [op, &solver, &preconditioner]( Vector &v, const
+                                                     Vector &u)
+  {
+    op.reinit_range_vector(v, /*bool omit_zeroing_entries =*/ false);
+    solver.solve(transpose_operator(op), v, u, preconditioner);
+  };
+
+  return_op.Tvmult_add =
+    [op, &solver, &preconditioner](Vector &v, const Vector &u)
+  {
+    static GrowingVectorMemory<typename Solver::vector_type> vector_memory;
+
+    Vector *v2 = vector_memory.alloc();
+    op.reinit_range_vector(*v2, /*bool omit_zeroing_entries =*/ false);
+    solver.solve(transpose_operator(op), *v2, u, preconditioner);
+    v += *v2;
+    vector_memory.free(v2);
+  };
+
+  return return_op;
+}
+
+//@}
+
+
+/**
+ * @name Creation of a LinearOperator
+ */
+//@{
+
+/**
+ * @relates LinearOperator
+ *
+ * Returns a LinearOperator that is the identity of the vector space @p Range.
+ *
+ * The function takes an <code>std::function</code> object @p reinit_vector as
+ * an argument to initialize the <code>reinit_range_vector</code> and
+ * <code>reinit_domain_vector</code> objects of the LinearOperator object.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+LinearOperator<Range, Range>
+identity_operator(const std::function<void(Range &, bool)> &reinit_vector)
+{
+  LinearOperator<Range, Range> return_op;
+
+  return_op.reinit_range_vector = reinit_vector;
+  return_op.reinit_domain_vector = reinit_vector;
+
+  return_op.vmult = [](Range &v, const Range &u)
+  {
+    v = u;
+  };
+
+  return_op.vmult_add = [](Range &v, const Range &u)
+  {
+    v += u;
+  };
+
+  return_op.Tvmult = [](Range &v, const Range &u)
+  {
+    v = u;
+  };
+
+  return_op.Tvmult_add = [](Range &v, const Range &u)
+  {
+    v += u;
+  };
+
+  return return_op;
+}
+
+
+/**
+ * @relates LinearOperator
+ *
+ * Returns a nulled variant of the LinearOperator @p op, i.e. with optimized
+ * LinearOperator::vmult, LinearOperator::vmult_add, etc. functions and with
+ * LinearOperator::is_null_operator set to true.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+LinearOperator<Range, Domain>
+null_operator(const LinearOperator<Range, Domain> &op)
+{
+  auto return_op = op;
+
+  return_op.is_null_operator = true;
+
+  return_op.vmult = [](Range &v, const Domain &)
+  {
+    v = 0.;
+  };
+
+  return_op.vmult_add = [](Range &, const Domain &)
+  {};
+
+  return_op.Tvmult = [](Domain &v, const Range &)
+  {
+    v = 0.;
+  };
+
+  return_op.Tvmult_add = [](Domain &, const Range &)
+  {};
+
+  return return_op;
+}
+
+
+namespace internal
+{
+  namespace LinearOperator
+  {
+    /**
+     * A helper class that is responsible for the initialization of a vector
+     * to be directly usable as the destination parameter, or source parameter
+     * in an application of vmult of a matrix.
+     *
+     * The generic version of this class just calls
+     * <code>Vector::reinit()</code> with the result of
+     * <code>Matrix::m()</code> or <code>Matrix::n()</code>, respectively.
+     * This class is specialized for more complicated data structures, such as
+     * TrilinosWrappers::MPI::Vector, etc.
+     */
+    template<typename Vector>
+    class ReinitHelper
+    {
+    public:
+      /**
+       * Initializes a vector v of the Range space to be directly usable as
+       * the destination parameter in an application of vmult. Similar to the
+       * reinit functions of the vector classes, the boolean determines
+       * whether a fast initalization is done, i.e., if it is set to false the
+       * content of the vector is set to 0.
+       *
+       * The generic version of this class just calls
+       * <code>Vector::reinit()</code> with the result of
+       * <code>Matrix::m()</code>.
+       */
+      template <typename Matrix>
+      static
+      void reinit_range_vector (const Matrix &matrix, Vector &v, bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.m(), omit_zeroing_entries);
+      }
+
+      /**
+       * Initializes a vector of the Domain space to be directly usable as the
+       * source parameter in an application of vmult. Similar to the reinit
+       * functions of the vector classes, the boolean determines whether a
+       * fast initalization is done, i.e., if it is set to false the content
+       * of the vector is set to 0.
+       *
+       * The generic version of this class just calls
+       * <code>Vector::reinit()</code> with the result of
+       * <code>Matrix::n()</code>.
+       */
+      template <typename Matrix>
+      static
+      void reinit_domain_vector (const Matrix &matrix, Vector &v, bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.n(), omit_zeroing_entries);
+      }
+    };
+  } /* namespace LinearOperator */
+} /* namespace internal */
+
+
+namespace
+{
+  // A trait class that determines whether type T provides public
+  // (templated or non-templated) vmult_add and Tvmult_add member functions
+  template <typename Range, typename Domain, typename T>
+  class has_vmult_add
+  {
+    template <typename C>
+    static std::false_type test(...);
+
+    template <typename C>
+    static std::true_type test(decltype(&C::vmult_add),
+                               decltype(&C::Tvmult_add));
+
+    // Work around a bug with icc (up to version 15) that fails during type
+    // deduction in an SFINAE scenario
+#ifndef DEAL_II_ICC_SFINAE_BUG
+
+    template <typename C>
+    static std::true_type test(decltype(&C::template vmult_add<Range>),
+                               decltype(&C::template Tvmult_add<Range>));
+
+    template <typename C>
+    static std::true_type test(decltype(&C::template vmult_add<Range, Domain>),
+                               decltype(&C::template Tvmult_add<Domain, Range>));
+#endif
+
+  public:
+    // type is std::true_type if Matrix provides vmult_add and Tvmult_add,
+    // otherwise it is std::false_type
+
+    typedef decltype(test<T>(0, 0)) type;
+  };
+
+
+  // A helper function to apply a given vmult, or Tvmult to a vector with
+  // intermediate storage
+  template <typename Function, typename Range, typename Domain>
+  void apply_with_intermediate_storage(Function function, Range &v,
+                                       const Domain &u, bool add)
+  {
+    static GrowingVectorMemory<Range> vector_memory;
+
+    Range *i = vector_memory.alloc();
+    i->reinit(v, /*bool omit_zeroing_entries =*/true);
+
+    function(*i, u);
+
+    if (add)
+      v += *i;
+    else
+      v = *i;
+
+    vector_memory.free(i);
+  }
+
+
+  // A helper class to add a reduced matrix interface to a LinearOperator
+  // (typically provided by Preconditioner classes)
+  template <typename Range, typename Domain>
+  class MatrixInterfaceWithoutVmultAdd
+  {
+  public:
+    template <typename Matrix>
+    void operator()(LinearOperator<Range, Domain> &op, const Matrix &matrix)
+    {
+      op.vmult = [&matrix](Range &v, const Domain &u)
+      {
+        if (PointerComparison::equal(&v, &u))
+          {
+            // If v and u are the same memory location use intermediate storage
+            apply_with_intermediate_storage([&matrix](Range &b, const Domain &a)
+            {
+              matrix.vmult(b, a);
+            },
+            v, u, /*bool add =*/ false);
+          }
+        else
+          {
+            matrix.vmult(v,u);
+          }
+      };
+
+      op.vmult_add = [&matrix](Range &v, const Domain &u)
+      {
+        // use intermediate storage to implement vmult_add with vmult
+        apply_with_intermediate_storage([&matrix](Range &b, const Domain &a)
+        {
+          matrix.vmult(b, a);
+        },
+        v, u, /*bool add =*/ true);
+      };
+
+      op.Tvmult = [&matrix](Domain &v, const Range &u)
+      {
+        if (PointerComparison::equal(&v, &u))
+          {
+            // If v and u are the same memory location use intermediate storage
+            apply_with_intermediate_storage([&matrix](Domain &b, const Range &a)
+            {
+              matrix.Tvmult(b, a);
+            },
+            v, u, /*bool add =*/ false);
+          }
+        else
+          {
+            matrix.Tvmult(v,u);
+          }
+
+      };
+
+      op.Tvmult_add = [&matrix](Domain &v, const Range &u)
+      {
+        // use intermediate storage to implement Tvmult_add with Tvmult
+        apply_with_intermediate_storage([&matrix](Domain &b, const Range &a)
+        {
+          matrix.Tvmult(b, a);
+        },
+        v, u, /*bool add =*/ true);
+      };
+    }
+  };
+
+
+  // A helper class to add the full matrix interface to a LinearOperator
+  template <typename Range, typename Domain>
+  class MatrixInterfaceWithVmultAdd
+  {
+  public:
+    template <typename Matrix>
+    void operator()(LinearOperator<Range, Domain> &op, const Matrix &matrix)
+    {
+      // As above ...
+
+      MatrixInterfaceWithoutVmultAdd<Range, Domain>().operator()(op, matrix);
+
+      // ... but add native vmult_add and Tvmult_add variants:
+
+      op.vmult_add = [&matrix](Range &v, const Domain &u)
+      {
+        if (PointerComparison::equal(&v, &u))
+          {
+            apply_with_intermediate_storage([&matrix](Range &b, const Domain &a)
+            {
+              matrix.vmult(b, a);
+            },
+            v, u, /*bool add =*/ false);
+          }
+        else
+          {
+            matrix.vmult_add(v,u);
+          }
+      };
+
+      op.Tvmult_add = [&matrix](Domain &v, const Range &u)
+      {
+        if (PointerComparison::equal(&v, &u))
+          {
+            apply_with_intermediate_storage([&matrix](Domain &b, const Range &a)
+            {
+              matrix.Tvmult(b, a);
+            },
+            v, u, /*bool add =*/ true);
+          }
+        else
+          {
+            matrix.Tvmult_add(v,u);
+          }
+      };
+    }
+  };
+
+} /* namespace */
+
+
+/**
+ * @relates LinearOperator
+ *
+ * A function that encapsulates generic @p matrix objects that act on a
+ * compatible Vector type into a LinearOperator. The LinearOperator object
+ * that is created stores a reference to the matrix object. Thus, @p matrix
+ * must remain a valid reference for the whole lifetime of the LinearOperator
+ * object.
+ *
+ * All changes made on @p matrix after the creation of the LinearOperator
+ * object are reflected by the operator object. For example, it is a valid
+ * procedure to first create a LinearOperator and resize, reassemble the
+ * matrix later.
+ *
+ * The Matrix class in question must provide the following minimal interface:
+ *
+ * @code
+ * class Matrix
+ * {
+ * public:
+ *   // (type specific) information how to create a Range and Domain vector
+ *   // with appropriate size and internal layout
+ *
+ *   // Application of matrix to vector src, writes the result into dst.
+ *   vmult(Range &dst, const Domain &src);
+ *
+ *   // Application of the transpose of matrix to vector src, writes the
+ *   // result into dst. (Depending on the usage of the linear operator
+ *   // class this can be a dummy implementation throwing an error.)
+ *   Tvmult(Range &dst, const Domain &src);
+ * };
+ * @endcode
+ *
+ * The following (optional) interface is used if available:
+ *
+ * @code
+ * class Matrix
+ * {
+ * public:
+ *   // Application of matrix to vector src, adds the result to dst.
+ *   vmult_add(Range &dst, const Domain &src);
+ *
+ *   // Application of the transpose of matrix to vector src, adds the
+ *   // result to dst.
+ *   Tvmult_add(Range &dst, const Domain &src);
+ * };
+ * @endcode
+ *
+ * If the Matrix does not provide <code>vmult_add</code> and
+ * <code>Tvmult_add</code>, they are implemented in terms of
+ * <code>vmult</code> and <code>Tvmult</code> (requiring intermediate
+ * storage).
+ *
+ * @author Matthias Maier, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain, typename Matrix>
+LinearOperator<Range, Domain> linear_operator(const Matrix &matrix)
+{
+  // implement with the more generic variant below...
+  return linear_operator<Range, Domain, Matrix, Matrix>(matrix, matrix);
+}
+
+
+/**
+ * @relates LinearOperator
+ *
+ * Variant of above function that takes an operator object @p
+ * operator_exemplar as an additional reference. This object is used to
+ * populate the reinit_domain_vector and reinit_range_vector function objects.
+ * The reference @p matrix is used to construct vmult, Tvmult, etc.
+ *
+ * This variant can, for example, be used to encapsulate preconditioners (that
+ * typically do not expose any information about the underlying matrix).
+ *
+ * @author Matthias Maier, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range,
+          typename Domain,
+          typename OperatorExemplar,
+          typename Matrix>
+LinearOperator<Range, Domain>
+linear_operator(const OperatorExemplar &operator_exemplar, const Matrix &matrix)
+{
+  LinearOperator<Range, Domain> return_op;
+
+  // Always store a reference to matrix and operator_exemplar in the lambda
+  // functions. This ensures that a modification of the matrix after the
+  // creation of a LinearOperator wrapper is respected - further a matrix
+  // or an operator_exemplar cannot usually be copied...
+
+  return_op.reinit_range_vector = [&operator_exemplar](Range &v, bool omit_zeroing_entries)
+  {
+    internal::LinearOperator::ReinitHelper<Range>::reinit_range_vector(operator_exemplar, v, omit_zeroing_entries);
+  };
+
+  return_op.reinit_domain_vector = [&operator_exemplar](Domain &v, bool omit_zeroing_entries)
+  {
+    internal::LinearOperator::ReinitHelper<Domain>::reinit_domain_vector(operator_exemplar, v, omit_zeroing_entries);
+  };
+
+  typename std::conditional<
+  has_vmult_add<Range, Domain, Matrix>::type::value,
+                MatrixInterfaceWithVmultAdd<Range, Domain>,
+                MatrixInterfaceWithoutVmultAdd<Range, Domain>>::type().
+                operator()(return_op, matrix);
+
+  return return_op;
+}
+
+//@}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_CXX11
+#endif
diff --git a/include/deal.II/lac/matrix_block.h b/include/deal.II/lac/matrix_block.h
new file mode 100644
index 0000000..8cec4e6
--- /dev/null
+++ b/include/deal.II/lac/matrix_block.h
@@ -0,0 +1,1118 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__matrix_block_h
+#define dealii__matrix_block_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/algorithms/any_data.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename MatrixType> class MatrixBlock;
+
+namespace internal
+{
+  template <typename MatrixType>
+  void
+  reinit(MatrixBlock<MatrixType> &v,
+         const BlockSparsityPattern &p);
+
+  template <typename number>
+  void
+  reinit(MatrixBlock<dealii::SparseMatrix<number> > &v,
+         const BlockSparsityPattern &p);
+}
+
+/**
+ * A wrapper around a matrix object, storing the coordinates in a block matrix
+ * as well.
+ *
+ * This class is an alternative to BlockMatrixBase, if you only want to
+ * generate a single block of the system, not the whole system. Using the
+ * add() functions of this class, it is possible to use the standard
+ * assembling functions used for block matrices, but only enter in one of the
+ * blocks and still avoiding the index computations involved.  The reason for
+ * this class is, that we may need a different number of matrices for
+ * different blocks in a block system. For example, a preconditioner for the
+ * Oseen system can be built as a block system, where the pressure block is of
+ * the form <b>M</b><sup>-1</sup><b>FA</b><sup>-1</sup> with <b>M</b> the
+ * pressure mass matrix, <b>A</b> the pressure Laplacian and <b>F</b> the
+ * advection diffusion operator applied to the pressure space. Since only a
+ * single matrix is needed for the other blocks, using BlockSparseMatrix or
+ * similar would be a waste of memory.
+ *
+ * While the add() functions make a MatrixBlock appear like a block matrix for
+ * assembling, the functions vmult(), Tvmult(), vmult_add(), and Tvmult_add()
+ * make it behave like a MatrixType, when it comes to applying it to a vector.
+ * This behavior allows us to store MatrixBlock objects in vectors, for
+ * instance in MGLevelObject without extracting the #matrix first.
+ *
+ * MatrixBlock comes handy when using BlockMatrixArray. Once the MatrixBlock
+ * has been properly initialized and filled, it can be used in the simplest
+ * case as:
+ * @code
+ * MatrixBlockVector<SparseMatrix<double> > > blocks;
+ *
+ * ...
+ *
+ * BlockMatrixArray matrix (n_blocks, n_blocks);
+ *
+ * for (size_type i=0;i<blocks.size;++i)
+ *   matrix.enter(blocks.block(i).row, blocks.block(i).column, blocks.matrix(i));
+ * @endcode
+ *
+ * Here, we have not gained very much, except that we do not need to set up
+ * empty blocks in the block system.
+ *
+ * @note This class expects, that the row and column BlockIndices objects for
+ * the system are equal. If they are not, some functions will throw
+ * ExcNotImplemented.
+ *
+ * @todo Example for the product preconditioner of the pressure Schur
+ * complement.
+ *
+ * @ingroup Matrix2
+ * @ingroup vector_valued
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Guido Kanschat, 2006
+ */
+template <typename MatrixType>
+class MatrixBlock
+  : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Declare a type for matrix entries.
+   */
+  typedef typename MatrixType::value_type value_type;
+
+  /**
+   * Constructor rendering an uninitialized object.
+   */
+  MatrixBlock();
+
+  /**
+   * Copy constructor.
+   */
+  MatrixBlock(const MatrixBlock<MatrixType> &M);
+
+  /**
+   * Constructor setting block coordinates, but not initializing the matrix.
+   */
+
+  MatrixBlock(size_type i, size_type j);
+
+  /**
+   * Reinitialize the matrix for a new BlockSparsityPattern. This adjusts the
+   * #matrix as well as the #row_indices and #column_indices.
+   *
+   * @note The row and column block structure of the sparsity pattern must be
+   * equal.
+   */
+  void reinit(const BlockSparsityPattern &sparsity);
+
+  operator MatrixType &();
+  operator const MatrixType &() const;
+
+  /**
+   * Add <tt>value</tt> to the element (<i>i,j</i>). Throws an error if the
+   * entry does not exist or if it is in a different block.
+   */
+  void add (const size_type                       i,
+            const size_type                       j,
+            const typename MatrixType::value_type value);
+
+  /**
+   * Add all elements in a FullMatrix into sparse matrix locations given by
+   * <tt>indices</tt>. This function assumes a quadratic sparse matrix and a
+   * quadratic full_matrix.  The global locations are translated into
+   * locations in this block and ExcBlockIndexMismatch is thrown, if the
+   * global index does not point into the block referred to by #row and
+   * #column.
+   *
+   * @todo <tt>elide_zero_values</tt> is currently ignored.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number>
+  void add (const std::vector<size_type> &indices,
+            const FullMatrix<number>     &full_matrix,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Add all elements in a FullMatrix into global locations given by
+   * <tt>row_indices</tt> and <tt>col_indices</tt>, respectively. The global
+   * locations are translated into locations in this block and
+   * ExcBlockIndexMismatch is thrown, if the global index does not point into
+   * the block referred to by #row and #column.
+   *
+   * @todo <tt>elide_zero_values</tt> is currently ignored.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number>
+  void add (const std::vector<size_type> &row_indices,
+            const std::vector<size_type> &col_indices,
+            const FullMatrix<number>        &full_matrix,
+            const bool                       elide_zero_values = true);
+
+  /**
+   * Set several elements in the specified row of the matrix with column
+   * indices as given by <tt>col_indices</tt> to the respective value. This is
+   * the function doing the actual work for the ones adding full matrices. The
+   * global locations <tt>row_index</tt> and <tt>col_indices</tt> are
+   * translated into locations in this block and ExcBlockIndexMismatch is
+   * thrown, if the global index does not point into the block referred to by
+   * #row and #column.
+   *
+   * @todo <tt>elide_zero_values</tt> is currently ignored.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number>
+  void add (const size_type               row_index,
+            const std::vector<size_type> &col_indices,
+            const std::vector<number>    &values,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Add an array of values given by <tt>values</tt> in the given global
+   * matrix row at columns specified by col_indices in the sparse matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number>
+  void add (const size_type  row,
+            const size_type  n_cols,
+            const size_type *col_indices,
+            const number    *values,
+            const bool       elide_zero_values = true,
+            const bool       col_indices_are_sorted = false);
+
+  /**
+   * Matrix-vector-multiplication, forwarding to the same function in
+   * MatrixType. No index computations are done, thus, the vectors need to
+   * have sizes matching #matrix.
+   */
+  template<class VectorType>
+  void vmult (VectorType &w, const VectorType &v) const;
+
+  /**
+   * Matrix-vector-multiplication, forwarding to the same function in
+   * MatrixType. No index computations are done, thus, the vectors need to
+   * have sizes matching #matrix.
+   */
+  template<class VectorType>
+  void vmult_add (VectorType &w, const VectorType &v) const;
+
+  /**
+   * Matrix-vector-multiplication, forwarding to the same function in
+   * MatrixType. No index computations are done, thus, the vectors need to
+   * have sizes matching #matrix.
+   */
+  template<class VectorType>
+  void Tvmult (VectorType &w, const VectorType &v) const;
+
+  /**
+   * Matrix-vector-multiplication, forwarding to the same function in
+   * MatrixType. No index computations are done, thus, the vectors need to
+   * have sizes matching #matrix.
+   */
+  template<class VectorType>
+  void Tvmult_add (VectorType &w, const VectorType &v) const;
+
+  /**
+   * The memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * The block number computed from an index by using BlockIndices does not
+   * match the block coordinates stored in this object.
+   */
+  DeclException2(ExcBlockIndexMismatch, size_type, size_type,
+                 << "Block index " << arg1 << " does not match " << arg2);
+
+  /**
+   * Row coordinate.  This is the position of the data member matrix on the
+   * global matrix.
+   */
+  size_type row;
+  /**
+   * Column coordinate.  This is the position of the data member matrix on the
+   * global matrix.
+   */
+  size_type column;
+
+  /**
+   * The matrix itself
+   */
+  MatrixType matrix;
+
+private:
+  /**
+   * The row BlockIndices of the whole system. Using row(), this allows us to
+   * find the index of the first row degree of freedom for this block.
+   */
+  BlockIndices row_indices;
+  /**
+   * The column BlockIndices of the whole system. Using column(), this allows
+   * us to find the index of the first column degree of freedom for this
+   * block.
+   */
+  BlockIndices column_indices;
+
+  template <class OTHER_MatrixType>
+  friend
+  void dealii::internal::reinit(MatrixBlock<OTHER_MatrixType> &,
+                                const BlockSparsityPattern &);
+
+  template <typename number>
+  friend
+  void internal::reinit(MatrixBlock<dealii::SparseMatrix<number> > &v,
+                        const BlockSparsityPattern &p);
+};
+
+
+/**
+ * A vector of MatrixBlock, which is implemented using shared pointers, in
+ * order to allow for copying and rearranging. Each matrix block can be
+ * identified by name.
+ *
+ * @relates MatrixBlock
+ * @ingroup vector_valued
+ * @author Baerbel Janssen, Guido Kanschat, 2010
+ */
+template <typename MatrixType>
+class MatrixBlockVector
+  :
+  private AnyData
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * The type of object stored.
+   */
+  typedef MatrixBlock<MatrixType> value_type;
+
+  /**
+   * The pointer type used for storing the objects. We use a shard pointer,
+   * such that they get deleted automatically when not used anymore.
+   */
+  typedef std_cxx11::shared_ptr<value_type> ptr_type;
+
+  /**
+   * Add a new matrix block at the position <tt>(row,column)</tt> in the block
+   * system.
+   */
+  void add(size_type row, size_type column, const std::string &name);
+
+  /**
+   * For matrices using a SparsityPattern, this function reinitializes each
+   * matrix in the vector with the correct pattern from the block system.
+   */
+  void reinit(const BlockSparsityPattern &sparsity);
+
+  /**
+   * Clears the object.
+   *
+   * Since often only clearing of the individual matrices is desired, but not
+   * removing the blocks themselves, there is an optional argument. If the
+   * argument is missing or @p false, all matrices will be empty, but the size
+   * of this object and the block positions will not change. If @p
+   * really_clean is @p true, then the object will contain no blocks at the
+   * end.
+   */
+  void clear (bool really_clean = false);
+
+  /**
+   * The memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Access a constant reference to the block at position <i>i</i>.
+   */
+  const value_type &block(size_type i) const;
+
+  /**
+   * Access a reference to the block at position <i>i</i>.
+   */
+  value_type &block(size_type i);
+
+  /**
+   * Access the matrix at position <i>i</i> for read and write access.
+   */
+  MatrixType &matrix(size_type i);
+
+  /**
+   * import functions from private base class
+   */
+  using AnyData::subscribe;
+  using AnyData::unsubscribe;
+  using AnyData::size;
+  using AnyData::name;
+};
+
+
+/**
+ * A vector of MGLevelObject<MatrixBlock>, which is implemented using shared
+ * pointers, in order to allow for copying and rearranging. Each matrix block
+ * can be identified by name.
+ *
+ * @relates MatrixBlock
+ * @ingroup vector_valued
+ * @author Baerbel Janssen, Guido Kanschat, 2010
+ */
+template <typename MatrixType>
+class MGMatrixBlockVector
+  : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * The type of object stored.
+   */
+  typedef MGLevelObject<MatrixBlock<MatrixType> > value_type;
+  /**
+   * Constructor, determining which matrices should be stored.
+   *
+   * If <tt>edge_matrices</tt> is true, then objects for edge matrices for
+   * discretizations with degrees of freedom on faces are allocated.
+   *
+   * If <tt>edge_flux_matrices</tt> is true, then objects for DG fluxes on the
+   * refinement edge are allocated.
+   */
+  MGMatrixBlockVector(const bool edge_matrices = false,
+                      const bool edge_flux_matrices = false);
+
+  /**
+   * The number of blocks.
+   */
+  unsigned int size () const;
+
+  /**
+   * Add a new matrix block at the position <tt>(row,column)</tt> in the block
+   * system. The third argument allows to give the matrix a name for later
+   * identification.
+   */
+  void add(size_type row, size_type column, const std::string &name);
+
+  /**
+   * For matrices using a SparsityPattern, this function reinitializes each
+   * matrix in the vector with the correct pattern from the block system.
+   *
+   * This function reinitializes the level matrices.
+   */
+  void reinit_matrix(const MGLevelObject<BlockSparsityPattern> &sparsity);
+  /**
+   * For matrices using a SparsityPattern, this function reinitializes each
+   * matrix in the vector with the correct pattern from the block system.
+   *
+   * This function reinitializes the matrices for degrees of freedom on the
+   * refinement edge.
+   */
+  void reinit_edge(const MGLevelObject<BlockSparsityPattern> &sparsity);
+  /**
+   * For matrices using a SparsityPattern, this function reinitializes each
+   * matrix in the vector with the correct pattern from the block system.
+   *
+   * This function reinitializes the flux matrices over the refinement edge.
+   */
+  void reinit_edge_flux(const MGLevelObject<BlockSparsityPattern> &sparsity);
+
+  /**
+   * Clears the object.
+   *
+   * Since often only clearing of the individual matrices is desired, but not
+   * removing the blocks themselves, there is an optional argument. If the
+   * argument is missing or @p false, all matrices will be empty, but the size
+   * of this object and the block positions will not change. If @p
+   * really_clean is @p true, then the object will contain no blocks at the
+   * end.
+   */
+  void clear (bool really_clean = false);
+
+  /**
+   * Access a constant reference to the matrix block at position <i>i</i>.
+   */
+  const value_type &block(size_type i) const;
+
+  /**
+   * Access a reference to the matrix block at position <i>i</i>.
+   */
+  value_type &block(size_type i);
+
+  /**
+   * Access a constant reference to the edge matrix block at position
+   * <i>i</i>.
+   */
+  const value_type &block_in(size_type i) const;
+
+  /**
+   * Access a reference to the edge matrix block at position <i>i</i>.
+   */
+  value_type &block_in(size_type i);
+
+  /**
+   * Access a constant reference to the edge matrix block at position
+   * <i>i</i>.
+   */
+  const value_type &block_out(size_type i) const;
+
+  /**
+   * Access a reference to the edge matrix block at position <i>i</i>.
+   */
+  value_type &block_out(size_type i);
+
+  /**
+   * Access a constant reference to the  edge flux matrix block at position
+   * <i>i</i>.
+   */
+  const value_type &block_up(size_type i) const;
+
+  /**
+   * Access a reference to the  edge flux matrix block at position <i>i</i>.
+   */
+  value_type &block_up(size_type i);
+
+  /**
+   * Access a constant reference to the  edge flux matrix block at position
+   * <i>i</i>.
+   */
+  const value_type &block_down(size_type i) const;
+
+  /**
+   * Access a reference to the edge flux matrix block at position <i>i</i>.
+   */
+  value_type &block_down(size_type i);
+
+  /**
+   * The memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+private:
+  /// Clear one of the matrix objects
+  void clear_object(AnyData &);
+
+  /// Flag for storing matrices_in and matrices_out
+  const bool edge_matrices;
+
+  /// Flag for storing flux_matrices_up and flux_matrices_down
+  const bool edge_flux_matrices;
+
+  /// The level matrices
+  AnyData matrices;
+  /// The matrix from the interior of a level to the refinement edge
+  AnyData matrices_in;
+  /// The matrix from the refinement edge to the interior of a level
+  AnyData matrices_out;
+  /// The DG flux from a level to the lower level
+  AnyData flux_matrices_down;
+  /// The DG flux from the lower level to a level
+  AnyData flux_matrices_up;
+};
+
+
+//----------------------------------------------------------------------//
+
+namespace internal
+{
+  template <typename MatrixType>
+  void
+  reinit(MatrixBlock<MatrixType> &v,
+         const BlockSparsityPattern &p)
+  {
+    v.row_indices = p.get_row_indices();
+    v.column_indices = p.get_column_indices();
+  }
+
+
+  template <typename number>
+  void
+  reinit(MatrixBlock<dealii::SparseMatrix<number> > &v,
+         const BlockSparsityPattern &p)
+  {
+    v.row_indices = p.get_row_indices();
+    v.column_indices = p.get_column_indices();
+    v.matrix.reinit(p.block(v.row, v.column));
+  }
+}
+
+
+template <typename MatrixType>
+inline
+MatrixBlock<MatrixType>::MatrixBlock ()
+  :
+  row(numbers::invalid_size_type),
+  column(numbers::invalid_size_type)
+{}
+
+
+template <typename MatrixType>
+inline
+MatrixBlock<MatrixType>::MatrixBlock (const MatrixBlock<MatrixType> &M)
+  :
+  Subscriptor(),
+  row(M.row),
+  column(M.column),
+  matrix(M.matrix),
+  row_indices(M.row_indices),
+  column_indices(M.column_indices)
+{}
+
+
+template <typename MatrixType>
+inline
+MatrixBlock<MatrixType>::MatrixBlock (size_type i, size_type j)
+  :
+  row(i), column(j)
+{}
+
+
+template <typename MatrixType>
+inline
+void
+MatrixBlock<MatrixType>::reinit (const BlockSparsityPattern &sparsity)
+{
+  internal::reinit(*this, sparsity);
+}
+
+
+template <typename MatrixType>
+inline
+MatrixBlock<MatrixType>::operator MatrixType &()
+{
+  return matrix;
+}
+
+
+template <typename MatrixType>
+inline
+MatrixBlock<MatrixType>::operator const MatrixType &() const
+{
+  return matrix;
+}
+
+
+template <typename MatrixType>
+inline void
+MatrixBlock<MatrixType>::add (const size_type gi,
+                              const size_type gj,
+                              const typename MatrixType::value_type value)
+{
+  Assert(row_indices.size() != 0, ExcNotInitialized());
+  Assert(column_indices.size() != 0, ExcNotInitialized());
+
+  const std::pair<unsigned int, size_type> bi
+    = row_indices.global_to_local(gi);
+  const std::pair<unsigned int, size_type> bj
+    = column_indices.global_to_local(gj);
+
+  Assert (bi.first == row, ExcBlockIndexMismatch(bi.first, row));
+  Assert (bj.first == column, ExcBlockIndexMismatch(bj.first, column));
+
+  matrix.add(bi.second, bj.second, value);
+}
+
+
+template <typename MatrixType>
+template <typename number>
+inline
+void
+MatrixBlock<MatrixType>::add (const std::vector<size_type> &r_indices,
+                              const std::vector<size_type> &c_indices,
+                              const FullMatrix<number>     &values,
+                              const bool                    elide_zero_values)
+{
+  Assert(row_indices.size() != 0, ExcNotInitialized());
+  Assert(column_indices.size() != 0, ExcNotInitialized());
+
+  AssertDimension (r_indices.size(), values.m());
+  AssertDimension (c_indices.size(), values.n());
+
+  for (size_type i=0; i<row_indices.size(); ++i)
+    add (r_indices[i], c_indices.size(), &c_indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+template <typename MatrixType>
+template <typename number>
+inline
+void
+MatrixBlock<MatrixType>::add (const size_type  b_row,
+                              const size_type  n_cols,
+                              const size_type *col_indices,
+                              const number    *values,
+                              const bool,
+                              const bool)
+{
+  Assert(row_indices.size() != 0, ExcNotInitialized());
+  Assert(column_indices.size() != 0, ExcNotInitialized());
+
+  const std::pair<unsigned int, size_type> bi
+    = row_indices.global_to_local(b_row);
+
+  // In debug mode, we check whether
+  // all indices are in the correct
+  // block.
+
+  // Actually, for the time being, we
+  // leave it at this. While it may
+  // not be the most efficient way,
+  // it is at least thread safe.
+//#ifdef DEBUG
+  Assert(bi.first == row, ExcBlockIndexMismatch(bi.first, row));
+
+  for (size_type j=0; j<n_cols; ++j)
+    {
+      const std::pair<unsigned int, size_type> bj
+        = column_indices.global_to_local(col_indices[j]);
+      Assert(bj.first == column, ExcBlockIndexMismatch(bj.first, column));
+
+      matrix.add(bi.second, bj.second, values[j]);
+    }
+//#endif
+}
+
+
+template <typename MatrixType>
+template <typename number>
+inline
+void
+MatrixBlock<MatrixType>::add (const std::vector<size_type> &indices,
+                              const FullMatrix<number>     &values,
+                              const bool                    elide_zero_values)
+{
+  Assert(row_indices.size() != 0, ExcNotInitialized());
+  Assert(column_indices.size() != 0, ExcNotInitialized());
+
+  AssertDimension (indices.size(), values.m());
+  Assert (values.n() == values.m(), ExcNotQuadratic());
+
+  for (size_type i=0; i<indices.size(); ++i)
+    add (indices[i], indices.size(), &indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <typename MatrixType>
+template <typename number>
+inline
+void
+MatrixBlock<MatrixType>::add (const size_type               row,
+                              const std::vector<size_type> &col_indices,
+                              const std::vector<number>    &values,
+                              const bool                    elide_zero_values)
+{
+  Assert(row_indices.size() != 0, ExcNotInitialized());
+  Assert(column_indices.size() != 0, ExcNotInitialized());
+
+  AssertDimension (col_indices.size(), values.size());
+  add (row, col_indices.size(), &col_indices[0], &values[0],
+       elide_zero_values);
+}
+
+
+template <typename MatrixType>
+template <class VectorType>
+inline
+void
+MatrixBlock<MatrixType>::vmult (VectorType &w, const VectorType &v) const
+{
+  matrix.vmult(w,v);
+}
+
+
+template <typename MatrixType>
+template <class VectorType>
+inline
+void
+MatrixBlock<MatrixType>::vmult_add (VectorType &w, const VectorType &v) const
+{
+  matrix.vmult_add(w,v);
+}
+
+
+template <typename MatrixType>
+template <class VectorType>
+inline
+void
+MatrixBlock<MatrixType>::Tvmult (VectorType &w, const VectorType &v) const
+{
+  matrix.Tvmult(w,v);
+}
+
+
+template <typename MatrixType>
+template <class VectorType>
+inline
+void
+MatrixBlock<MatrixType>::Tvmult_add (VectorType &w, const VectorType &v) const
+{
+  matrix.Tvmult_add(w,v);
+}
+
+
+template <typename MatrixType>
+inline
+std::size_t
+MatrixBlock<MatrixType>::memory_consumption () const
+{
+  return (sizeof(*this)
+          + MemoryConsumption::memory_consumption(matrix)
+          - sizeof(matrix));
+}
+
+//----------------------------------------------------------------------//
+
+template <typename MatrixType>
+inline void
+MatrixBlockVector<MatrixType>::add(size_type          row,
+                                   size_type          column,
+                                   const std::string &name)
+{
+  ptr_type p(new value_type(row, column));
+  AnyData::add(p, name);
+}
+
+
+template <typename MatrixType>
+inline void
+MatrixBlockVector<MatrixType>::reinit (const BlockSparsityPattern &sparsity)
+{
+  for (size_type i=0; i<this->size(); ++i)
+    {
+      block(i).reinit(sparsity);
+    }
+}
+
+
+template <typename MatrixType>
+inline void
+MatrixBlockVector<MatrixType>::clear (bool really_clean)
+{
+  if (really_clean)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+  else
+    {
+      for (size_type i=0; i<this->size(); ++i)
+        matrix(i).clear();
+    }
+}
+
+
+
+template <typename MatrixType>
+inline const MatrixBlock<MatrixType> &
+MatrixBlockVector<MatrixType>::block (size_type i) const
+{
+  return *this->read<ptr_type>(i);
+}
+
+
+template <typename MatrixType>
+inline MatrixBlock<MatrixType> &
+MatrixBlockVector<MatrixType>::block (size_type i)
+{
+  return *this->entry<ptr_type>(i);
+}
+
+
+template <typename MatrixType>
+inline MatrixType &
+MatrixBlockVector<MatrixType>::matrix (size_type i)
+{
+  return this->entry<ptr_type>(i)->matrix;
+}
+
+
+
+//----------------------------------------------------------------------//
+
+template <typename MatrixType>
+inline
+MGMatrixBlockVector<MatrixType>::MGMatrixBlockVector(const bool e, const bool f)
+  :
+  edge_matrices(e),
+  edge_flux_matrices(f)
+{}
+
+
+template <typename MatrixType>
+inline
+unsigned int
+MGMatrixBlockVector<MatrixType>::size () const
+{
+  return matrices.size();
+}
+
+
+template <typename MatrixType>
+inline void
+MGMatrixBlockVector<MatrixType>::add(
+  size_type row, size_type column,
+  const std::string &name)
+{
+  MGLevelObject<MatrixBlock<MatrixType> > p(0, 1);
+  p[0].row = row;
+  p[0].column = column;
+
+  matrices.add(p, name);
+  if (edge_matrices)
+    {
+      matrices_in.add(p, name);
+      matrices_out.add(p, name);
+    }
+  if (edge_flux_matrices)
+    {
+      flux_matrices_up.add(p, name);
+      flux_matrices_down.add(p, name);
+    }
+}
+
+
+template <typename MatrixType>
+inline const MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block(size_type i) const
+{
+  return *matrices.read<const MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block(size_type i)
+{
+  return *matrices.entry<MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline const MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_in(size_type i) const
+{
+  return *matrices_in.read<const MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_in(size_type i)
+{
+  return *matrices_in.entry<MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline const MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_out(size_type i) const
+{
+  return *matrices_out.read<const MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_out(size_type i)
+{
+  return *matrices_out.entry<MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline const MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_up(size_type i) const
+{
+  return *flux_matrices_up.read<const MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_up(size_type i)
+{
+  return *flux_matrices_up.entry<MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline const MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_down(size_type i) const
+{
+  return *flux_matrices_down.read<const MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline MGLevelObject<MatrixBlock<MatrixType> > &
+MGMatrixBlockVector<MatrixType>::block_down(size_type i)
+{
+  return *flux_matrices_down.entry<MGLevelObject<MatrixType>* >(i);
+}
+
+
+template <typename MatrixType>
+inline void
+MGMatrixBlockVector<MatrixType>::reinit_matrix(const MGLevelObject<BlockSparsityPattern> &sparsity)
+{
+  for (size_type i=0; i<this->size(); ++i)
+    {
+      MGLevelObject<MatrixBlock<MatrixType> > &o = block(i);
+      const size_type row = o[o.min_level()].row;
+      const size_type col = o[o.min_level()].column;
+
+      o.resize(sparsity.min_level(), sparsity.max_level());
+      for (size_type level = o.min_level(); level <= o.max_level(); ++level)
+        {
+          o[level].row = row;
+          o[level].column = col;
+          internal::reinit(o[level], sparsity[level]);
+        }
+    }
+}
+
+
+template <typename MatrixType>
+inline void
+MGMatrixBlockVector<MatrixType>::reinit_edge(const MGLevelObject<BlockSparsityPattern> &sparsity)
+{
+  for (size_type i=0; i<this->size(); ++i)
+    {
+      MGLevelObject<MatrixBlock<MatrixType> > &o = block(i);
+      const size_type row = o[o.min_level()].row;
+      const size_type col = o[o.min_level()].column;
+
+      block_in(i).resize(sparsity.min_level(), sparsity.max_level());
+      block_out(i).resize(sparsity.min_level(), sparsity.max_level());
+      for (size_type level = o.min_level(); level <= o.max_level(); ++level)
+        {
+          block_in(i)[level].row = row;
+          block_in(i)[level].column = col;
+          internal::reinit(block_in(i)[level], sparsity[level]);
+          block_out(i)[level].row = row;
+          block_out(i)[level].column = col;
+          internal::reinit(block_out(i)[level], sparsity[level]);
+        }
+    }
+}
+
+
+template <typename MatrixType>
+inline void
+MGMatrixBlockVector<MatrixType>::reinit_edge_flux
+(const MGLevelObject<BlockSparsityPattern> &sparsity)
+{
+  for (size_type i=0; i<this->size(); ++i)
+    {
+      MGLevelObject<MatrixBlock<MatrixType> > &o = block(i);
+      const size_type row = o[o.min_level()].row;
+      const size_type col = o[o.min_level()].column;
+
+      block_up(i).resize(sparsity.min_level(), sparsity.max_level());
+      block_down(i).resize(sparsity.min_level(), sparsity.max_level());
+      for (size_type level = o.min_level(); level <= o.max_level(); ++level)
+        {
+          block_up(i)[level].row = row;
+          block_up(i)[level].column = col;
+          internal::reinit(block_up(i)[level], sparsity[level]);
+          block_down(i)[level].row = row;
+          block_down(i)[level].column = col;
+          internal::reinit(block_down(i)[level], sparsity[level]);
+        }
+
+    }
+}
+
+
+template <typename MatrixType>
+inline void
+MGMatrixBlockVector<MatrixType>::clear_object (AnyData &mo)
+{
+  for (size_type i=0; i<mo.size(); ++i)
+    {
+      MGLevelObject<MatrixBlock<MatrixType> > &o = mo.entry<MGLevelObject<MatrixType>* >(i);
+      for (size_type level = o.min_level(); level <= o.max_level(); ++level)
+        o[level].matrix.clear();
+    }
+}
+
+
+template <typename MatrixType>
+inline void
+MGMatrixBlockVector<MatrixType>::clear (bool really_clean)
+{
+  if (really_clean)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+  else
+    {
+      clear_object(matrices);
+      clear_object(matrices_in);
+      clear_object(matrices_out);
+      clear_object(flux_matrices_up);
+      clear_object(flux_matrices_down);
+    }
+}
+
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/matrix_iterator.h b/include/deal.II/lac/matrix_iterator.h
new file mode 100644
index 0000000..a3997cb
--- /dev/null
+++ b/include/deal.II/lac/matrix_iterator.h
@@ -0,0 +1,227 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__matrix_iterator_h
+#define dealii__matrix_iterator_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Iterator for constant and non-constant matrices.
+ *
+ * This iterator is abstracted from the actual matrix type and can be used for
+ * any matrix having the required ACCESSOR type.
+ *
+ * @author Guido Kanschat, 2006, based on previous a implementation
+ */
+template <class ACCESSOR>
+class MatrixIterator
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Typedef for the matrix type (including constness) we are to operate on.
+   */
+  typedef typename ACCESSOR::MatrixType MatrixType;
+
+  /**
+   * Constructor. Create an iterator into the matrix <tt>matrix</tt> for the
+   * given <tt>row</tt> and the <tt>index</tt> within it.
+   */
+  MatrixIterator (MatrixType      *matrix,
+                  const size_type  row = 0,
+                  const size_type  index = 0);
+
+  /**
+   * Copy from another matrix iterator. Mostly implemented to allow
+   * initialization of a constant iterator from a non constant, this function
+   * only requires that a conversion from the other iterator's accessor to
+   * this accessor object is possible.
+   */
+  template <class OtherAccessor>
+  MatrixIterator(const MatrixIterator<OtherAccessor> &other);
+
+  /**
+   * Prefix increment.
+   */
+  MatrixIterator &operator++ ();
+
+  /**
+   * Postfix increment.
+   */
+  MatrixIterator operator++ (int);
+
+  /**
+   * Dereferencing operator.
+   */
+  const ACCESSOR &operator* () const;
+
+  /**
+   * Dereferencing operator.
+   */
+  const ACCESSOR *operator-> () const;
+
+  /**
+   * Comparison. True, if both accessors are equal.
+   */
+  bool operator == (const MatrixIterator &) const;
+
+  /**
+   * Inverse of <tt>==</tt>.
+   */
+  bool operator != (const MatrixIterator &) const;
+
+  /**
+   * Comparison operator. Result is true if either the first row number is
+   * smaller or if the row numbers are equal and the first index is smaller.
+   *
+   * This function is only valid if both iterators point into the same matrix.
+   */
+  bool operator < (const MatrixIterator &) const;
+
+  /**
+   * Comparison operator. Works in the same way as above operator, just the
+   * other way round.
+   */
+  bool operator > (const MatrixIterator &) const;
+
+private:
+  /**
+   * Store an object of the accessor class.
+   */
+  ACCESSOR accessor;
+
+  /**
+   * Allow other iterators access to private data.
+   */
+  template <class OtherAccessor> friend class MatrixIterator;
+};
+
+
+//----------------------------------------------------------------------//
+
+template <class ACCESSOR>
+inline
+MatrixIterator<ACCESSOR>::
+MatrixIterator (MatrixType      *matrix,
+                const size_type  r,
+                const size_type  i)
+  :
+  accessor(matrix, r, i)
+{}
+
+
+template <class ACCESSOR>
+template <class OtherAccessor>
+inline
+MatrixIterator<ACCESSOR>::
+MatrixIterator (const MatrixIterator<OtherAccessor> &other)
+  :
+  accessor(other.accessor)
+{}
+
+
+template <class ACCESSOR>
+inline
+MatrixIterator<ACCESSOR> &
+MatrixIterator<ACCESSOR>::operator++ ()
+{
+  accessor.advance ();
+  return *this;
+}
+
+
+template <class ACCESSOR>
+inline
+MatrixIterator<ACCESSOR>
+MatrixIterator<ACCESSOR>::operator++ (int)
+{
+  const MatrixIterator iter = *this;
+  accessor.advance ();
+  return iter;
+}
+
+
+template <class ACCESSOR>
+inline
+const ACCESSOR &
+MatrixIterator<ACCESSOR>::operator* () const
+{
+  return accessor;
+}
+
+
+template <class ACCESSOR>
+inline
+const ACCESSOR *
+MatrixIterator<ACCESSOR>::operator-> () const
+{
+  return &accessor;
+}
+
+
+template <class ACCESSOR>
+inline
+bool
+MatrixIterator<ACCESSOR>::
+operator == (const MatrixIterator &other) const
+{
+  return (accessor == other.accessor);
+}
+
+
+template <class ACCESSOR>
+inline
+bool
+MatrixIterator<ACCESSOR>::
+operator != (const MatrixIterator &other) const
+{
+  return ! (*this == other);
+}
+
+
+template <class ACCESSOR>
+inline
+bool
+MatrixIterator<ACCESSOR>::
+operator < (const MatrixIterator &other) const
+{
+  Assert (&accessor.get_matrix() == &other.accessor.get_matrix(),
+          ExcInternalError());
+
+  return (accessor < other.accessor);
+}
+
+
+template <class ACCESSOR>
+inline
+bool
+MatrixIterator<ACCESSOR>::
+operator > (const MatrixIterator &other) const
+{
+  return (other < *this);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/matrix_lib.h b/include/deal.II/lac/matrix_lib.h
new file mode 100644
index 0000000..3cec3a1
--- /dev/null
+++ b/include/deal.II/lac/matrix_lib.h
@@ -0,0 +1,754 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__matrix_lib_h
+#define dealii__matrix_lib_h
+
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/pointer_matrix.h>
+#include <deal.II/lac/solver_richardson.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<typename number> class Vector;
+template<typename number> class BlockVector;
+template<typename number> class SparseMatrix;
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+
+/**
+ * Poor man's matrix product of two quadratic matrices. Stores two quadratic
+ * matrices #m1 and #m2 of arbitrary types and implements matrix-vector
+ * multiplications for the product <i>M<sub>1</sub>M<sub>2</sub></i> by
+ * performing multiplication with both factors consecutively. Because the
+ * types of the matrices are opaque (i.e., they can be arbitrary), you can
+ * stack products of three or more matrices by making one of the two matrices
+ * an object of the current type handles be a ProductMatrix itself.
+ *
+ * Here is an example multiplying two different FullMatrix objects:
+ * @include product_matrix.cc
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @author Guido Kanschat, 2000, 2001, 2002, 2005
+ */
+template<typename VectorType>
+class ProductMatrix : public PointerMatrixBase<VectorType>
+{
+public:
+  /**
+   * Standard constructor. Matrices and the memory pool must be added later
+   * using initialize().
+   */
+  ProductMatrix();
+
+  /**
+   * Constructor only assigning the memory pool. Matrices must be added by
+   * reinit() later.
+   */
+  ProductMatrix(VectorMemory<VectorType> &mem);
+
+  /**
+   * Constructor.  Additionally to the two constituting matrices, a memory
+   * pool for the auxiliary vector must be provided.
+   */
+  template <typename MatrixType1, typename MatrixType2>
+  ProductMatrix (const MatrixType1        &m1,
+                 const MatrixType2        &m2,
+                 VectorMemory<VectorType> &mem);
+
+  /**
+   * Destructor.
+   */
+  ~ProductMatrix();
+
+  /**
+   * Change the matrices.
+   */
+  template <typename MatrixType1, typename MatrixType2>
+  void reinit (const MatrixType1 &m1, const MatrixType2 &m2);
+
+  /**
+   * Change the matrices and memory pool.
+   */
+  template <typename MatrixType1, typename MatrixType2>
+  void initialize (const MatrixType1 &m1,
+                   const MatrixType2 &m2,
+                   VectorMemory<VectorType> &mem);
+
+  // Doc in PointerMatrixBase
+  void clear();
+
+  /**
+   * Matrix-vector product <i>w = m1 * m2 * v</i>.
+   */
+  virtual void vmult (VectorType       &w,
+                      const VectorType &v) const;
+
+  /**
+   * Transposed matrix-vector product <i>w = m2<sup>T</sup> * m1<sup>T</sup> *
+   * v</i>.
+   */
+  virtual void Tvmult (VectorType       &w,
+                       const VectorType &v) const;
+
+  /**
+   * Adding matrix-vector product <i>w += m1 * m2 * v</i>
+   */
+  virtual void vmult_add (VectorType       &w,
+                          const VectorType &v) const;
+
+  /**
+   * Adding, transposed matrix-vector product <i>w += m2<sup>T</sup> *
+   * m1<sup>T</sup> * v</i>.
+   */
+  virtual void Tvmult_add (VectorType       &w,
+                           const VectorType &v) const;
+
+private:
+  /**
+   * The left matrix of the product.
+   */
+  PointerMatrixBase<VectorType> *m1;
+
+  /**
+   * The right matrix of the product.
+   */
+  PointerMatrixBase<VectorType> *m2;
+
+  /**
+   * Memory for auxiliary vector.
+   */
+  SmartPointer<VectorMemory<VectorType>,ProductMatrix<VectorType> > mem;
+};
+
+
+/**
+ * A matrix that is the multiple of another matrix.
+ *
+ * Matrix-vector products of this matrix are composed of those of the original
+ * matrix with the vector and then scaling of the result by a constant factor.
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @author Guido Kanschat, 2007
+ */
+template<typename VectorType>
+class ScaledMatrix : public Subscriptor
+{
+public:
+  /**
+   * Constructor leaving an uninitialized object.
+   */
+  ScaledMatrix ();
+  /**
+   * Constructor with initialization.
+   */
+  template <typename MatrixType>
+  ScaledMatrix (const MatrixType &M, const double factor);
+
+  /**
+   * Destructor
+   */
+  ~ScaledMatrix ();
+  /**
+   * Initialize for use with a new matrix and factor.
+   */
+  template <typename MatrixType>
+  void initialize (const MatrixType &M, const double factor);
+
+  /**
+   * Reset the object to its original state.
+   */
+  void clear ();
+
+  /**
+   * Matrix-vector product.
+   */
+  void vmult (VectorType &w, const VectorType &v) const;
+
+  /**
+   * Transposed matrix-vector product.
+   */
+  void Tvmult (VectorType &w, const VectorType &v) const;
+
+private:
+  /**
+   * The matrix.
+   */
+  PointerMatrixBase<VectorType> *m;
+  /**
+   * The scaling factor;
+   */
+  double factor;
+};
+
+
+
+/**
+ * Poor man's matrix product of two sparse matrices. Stores two matrices #m1
+ * and #m2 of arbitrary type SparseMatrix and implements matrix-vector
+ * multiplications for the product <i>M<sub>1</sub>M<sub>2</sub></i> by
+ * performing multiplication with both factors consecutively.
+ *
+ * The documentation of ProductMatrix applies with exception that these
+ * matrices here may be rectangular.
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @author Guido Kanschat, 2000, 2001, 2002, 2005
+ */
+template<typename number, typename vector_number>
+class ProductSparseMatrix : public PointerMatrixBase<Vector<vector_number> >
+{
+public:
+  /**
+   * Define the type of matrices used.
+   */
+  typedef SparseMatrix<number> MatrixType;
+
+  /**
+   * Define the type of vectors we plly this matrix to.
+   */
+  typedef Vector<vector_number> VectorType;
+
+  /**
+   * Constructor.  Additionally to the two constituting matrices, a memory
+   * pool for the auxiliary vector must be provided.
+   */
+  ProductSparseMatrix (const MatrixType         &m1,
+                       const MatrixType         &m2,
+                       VectorMemory<VectorType> &mem);
+
+  /**
+   * Constructor leaving an uninitialized matrix. initialize() must be called,
+   * before the matrix can be used.
+   */
+  ProductSparseMatrix();
+
+  void initialize (const MatrixType         &m1,
+                   const MatrixType         &m2,
+                   VectorMemory<VectorType> &mem);
+
+  // Doc in PointerMatrixBase
+  void clear();
+
+  /**
+   * Matrix-vector product <i>w = m1 * m2 * v</i>.
+   */
+  virtual void vmult (VectorType       &w,
+                      const VectorType &v) const;
+
+  /**
+   * Transposed matrix-vector product <i>w = m2<sup>T</sup> * m1<sup>T</sup> *
+   * v</i>.
+   */
+  virtual void Tvmult (VectorType       &w,
+                       const VectorType &v) const;
+
+  /**
+   * Adding matrix-vector product <i>w += m1 * m2 * v</i>
+   */
+  virtual void vmult_add (VectorType       &w,
+                          const VectorType &v) const;
+
+  /**
+   * Adding, transposed matrix-vector product <i>w += m2<sup>T</sup> *
+   * m1<sup>T</sup> * v</i>.
+   */
+  virtual void Tvmult_add (VectorType       &w,
+                           const VectorType &v) const;
+
+private:
+  /**
+   * The left matrix of the product.
+   */
+  SmartPointer<const MatrixType,ProductSparseMatrix<number,vector_number> > m1;
+
+  /**
+   * The right matrix of the product.
+   */
+  SmartPointer<const MatrixType,ProductSparseMatrix<number,vector_number> > m2;
+
+  /**
+   * Memory for auxiliary vector.
+   */
+  SmartPointer<VectorMemory<VectorType>,ProductSparseMatrix<number,vector_number>  > mem;
+};
+
+
+/**
+ * Mean value filter.  The vmult() functions of this matrix filter out mean
+ * values of the vector.  If the vector is of type BlockVector, then an
+ * additional parameter selects a single component for this operation.
+ *
+ * In mathematical terms, this class acts as if it was the matrix $I-\frac
+ * 1n{\mathbf 1}_n{\mathbf 1}_n^T$ where ${\mathbf 1}_n$ is a vector of size
+ * $n$ that has only ones as its entries. Thus, taking the dot product between
+ * a vector $\mathbf v$ and $\frac 1n {\mathbf 1}_n$ yields the <i>mean
+ * value</i> of the entries of ${\mathbf v}$. Consequently, $ \left[I-\frac
+ * 1n{\mathbf 1}_n{\mathbf 1}_n^T\right] \mathbf v = \mathbf v - \left[\frac
+ * 1n {\mathbf v} \cdot {\mathbf 1}_n\right]{\mathbf 1}_n$ subtracts from every
+ * vector element the mean value of all elements.
+ *
+ * @author Guido Kanschat, 2002, 2003
+ */
+class MeanValueFilter : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Constructor, optionally selecting a component.
+   */
+  MeanValueFilter(const size_type component = numbers::invalid_size_type);
+
+  /**
+   * Subtract mean value from @p v.
+   */
+  template <typename number>
+  void filter (Vector<number> &v) const;
+
+  /**
+   * Subtract mean value from @p v.
+   */
+  template <typename number>
+  void filter (BlockVector<number> &v) const;
+
+  /**
+   * Return the source vector with subtracted mean value.
+   */
+  template <typename number>
+  void vmult (Vector<number>       &dst,
+              const Vector<number> &src) const;
+
+  /**
+   * Add source vector with subtracted mean value to dest.
+   */
+  template <typename number>
+  void vmult_add (Vector<number>       &dst,
+                  const Vector<number> &src) const;
+
+  /**
+   * Return the source vector with subtracted mean value in selected
+   * component.
+   */
+  template <typename number>
+  void vmult (BlockVector<number>       &dst,
+              const BlockVector<number> &src) const;
+
+  /**
+   * Add a source to dest, where the mean value in the selected component is
+   * subtracted.
+   */
+  template <typename number>
+  void vmult_add (BlockVector<number>       &dst,
+                  const BlockVector<number> &src) const;
+
+
+  /**
+   * Not implemented.
+   */
+  template <typename VectorType>
+  void Tvmult(VectorType &, const VectorType &) const;
+
+  /**
+   * Not implemented.
+   */
+  template <typename VectorType>
+  void Tvmult_add(VectorType &, const VectorType &) const;
+
+private:
+  /**
+   * Component for filtering block vectors.
+   */
+  const size_type component;
+};
+
+
+
+/**
+ * Objects of this type represent the inverse of a matrix as computed
+ * approximately by using the SolverRichardson iterative solver. In other
+ * words, if you set up an object of the current type for a matrix $A$, then
+ * calling the vmult() function with arguments $v,w$ amounts to setting
+ * $w=A^{-1}v$ by solving the linear system $Aw=v$ using the Richardson solver
+ * with a preconditioner that can be chosen. Similarly, this class allows to
+ * also multiple with the transpose of the inverse (i.e., the inverse of the
+ * transpose) using the function SolverRichardson::Tsolve().
+ *
+ * The functions vmult() and Tvmult() approximate the inverse iteratively
+ * starting with the vector <tt>dst</tt>. Functions vmult_add() and
+ * Tvmult_add() start the iteration with a zero vector. All of the matrix-
+ * vector multiplication functions expect that the Richardson solver with the
+ * given preconditioner actually converge. If the Richardson solver does not
+ * converge within the specified number of iterations, the exception that will
+ * result in the solver will simply be propagated to the caller of the member
+ * function of the current class.
+ *
+ * @note A more powerful version of this class is provided by the
+ * IterativeInverse class.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @author Guido Kanschat, 2005
+ */
+template<typename VectorType>
+class InverseMatrixRichardson : public Subscriptor
+{
+public:
+  /**
+   * Constructor, initializing the solver with a control and memory object.
+   * The inverted matrix and the preconditioner are added in initialize().
+   */
+  InverseMatrixRichardson (SolverControl            &control,
+                           VectorMemory<VectorType> &mem);
+  /**
+   * Since we use two pointers, we must implement a destructor.
+   */
+  ~InverseMatrixRichardson();
+
+  /**
+   * Initialization function. Provide a solver object, a matrix, and another
+   * preconditioner for this.
+   */
+  template <typename MatrixType, typename PreconditionerType>
+  void initialize (const MatrixType &,
+                   const PreconditionerType &);
+
+  /**
+   * Access to the SolverControl object used by the solver.
+   */
+  SolverControl &control() const;
+  /**
+   * Execute solver.
+   */
+  void vmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Execute solver.
+   */
+  void vmult_add (VectorType &, const VectorType &) const;
+
+  /**
+   * Execute transpose solver.
+   */
+  void Tvmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Execute transpose solver.
+   */
+  void Tvmult_add (VectorType &, const VectorType &) const;
+
+private:
+  /**
+   * A reference to the provided VectorMemory object.
+   */
+  VectorMemory<VectorType> &mem;
+
+  /**
+   * The solver object.
+   */
+  mutable SolverRichardson<VectorType> solver;
+
+  /**
+   * The matrix in use.
+   */
+  PointerMatrixBase<VectorType> *matrix;
+
+  /**
+   * The preconditioner to use.
+   */
+  PointerMatrixBase<VectorType> *precondition;
+};
+
+
+
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+
+template<typename VectorType>
+inline
+ScaledMatrix<VectorType>::ScaledMatrix()
+  :
+  m(0)
+{}
+
+
+
+template<typename VectorType>
+template<typename MatrixType>
+inline
+ScaledMatrix<VectorType>::ScaledMatrix(const MatrixType &mat, const double factor)
+  :
+  m(new_pointer_matrix_base(mat, VectorType())),
+  factor(factor)
+{}
+
+
+
+template<typename VectorType>
+template<typename MatrixType>
+inline
+void
+ScaledMatrix<VectorType>::initialize(const MatrixType &mat, const double f)
+{
+  if (m) delete m;
+  m = new_pointer_matrix_base(mat, VectorType());
+  factor = f;
+}
+
+
+
+template<typename VectorType>
+inline
+void
+ScaledMatrix<VectorType>::clear()
+{
+  if (m) delete m;
+  m = 0;
+}
+
+
+
+template<typename VectorType>
+inline
+ScaledMatrix<VectorType>::~ScaledMatrix()
+{
+  clear ();
+}
+
+
+template<typename VectorType>
+inline
+void
+ScaledMatrix<VectorType>::vmult (VectorType &w, const VectorType &v) const
+{
+  m->vmult(w, v);
+  w *= factor;
+}
+
+
+template<typename VectorType>
+inline
+void
+ScaledMatrix<VectorType>::Tvmult (VectorType &w, const VectorType &v) const
+{
+  m->Tvmult(w, v);
+  w *= factor;
+}
+
+
+//---------------------------------------------------------------------------
+
+template<typename VectorType>
+ProductMatrix<VectorType>::ProductMatrix ()
+  : m1(0), m2(0), mem(0)
+{}
+
+
+template<typename VectorType>
+ProductMatrix<VectorType>::ProductMatrix (VectorMemory<VectorType> &m)
+  : m1(0), m2(0), mem(&m)
+{}
+
+
+template<typename VectorType>
+template<typename MatrixType1, typename MatrixType2>
+ProductMatrix<VectorType>::ProductMatrix (const MatrixType1        &mat1,
+                                          const MatrixType2        &mat2,
+                                          VectorMemory<VectorType> &m)
+  : mem(&m)
+{
+  m1 = new PointerMatrix<MatrixType1, VectorType>(&mat1, typeid(*this).name());
+  m2 = new PointerMatrix<MatrixType2, VectorType>(&mat2, typeid(*this).name());
+}
+
+
+template<typename VectorType>
+template<typename MatrixType1, typename MatrixType2>
+void
+ProductMatrix<VectorType>::reinit (const MatrixType1 &mat1, const MatrixType2 &mat2)
+{
+  if (m1) delete m1;
+  if (m2) delete m2;
+  m1 = new PointerMatrix<MatrixType1, VectorType>(&mat1, typeid(*this).name());
+  m2 = new PointerMatrix<MatrixType2, VectorType>(&mat2, typeid(*this).name());
+}
+
+
+template<typename VectorType>
+template<typename MatrixType1, typename MatrixType2>
+void
+ProductMatrix<VectorType>::initialize (const MatrixType1        &mat1,
+                                       const MatrixType2        &mat2,
+                                       VectorMemory<VectorType> &memory)
+{
+  mem = &memory;
+  if (m1) delete m1;
+  if (m2) delete m2;
+  m1 = new PointerMatrix<MatrixType1, VectorType>(&mat1, typeid(*this).name());
+  m2 = new PointerMatrix<MatrixType2, VectorType>(&mat2, typeid(*this).name());
+}
+
+
+template<typename VectorType>
+ProductMatrix<VectorType>::~ProductMatrix ()
+{
+  if (m1) delete m1;
+  if (m2) delete m2;
+}
+
+
+template<typename VectorType>
+void
+ProductMatrix<VectorType>::clear ()
+{
+  if (m1) delete m1;
+  m1 = 0;
+  if (m2) delete m2;
+  m2 = 0;
+}
+
+
+template<typename VectorType>
+void
+ProductMatrix<VectorType>::vmult (VectorType &dst, const VectorType &src) const
+{
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m1 != 0, ExcNotInitialized());
+  Assert (m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(dst);
+  m2->vmult (*v, src);
+  m1->vmult (dst, *v);
+  mem->free(v);
+}
+
+
+template<typename VectorType>
+void
+ProductMatrix<VectorType>::vmult_add (VectorType &dst, const VectorType &src) const
+{
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m1 != 0, ExcNotInitialized());
+  Assert (m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(dst);
+  m2->vmult (*v, src);
+  m1->vmult_add (dst, *v);
+  mem->free(v);
+}
+
+
+template<typename VectorType>
+void
+ProductMatrix<VectorType>::Tvmult (VectorType &dst, const VectorType &src) const
+{
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m1 != 0, ExcNotInitialized());
+  Assert (m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(dst);
+  m1->Tvmult (*v, src);
+  m2->Tvmult (dst, *v);
+  mem->free(v);
+}
+
+
+template<typename VectorType>
+void
+ProductMatrix<VectorType>::Tvmult_add (VectorType &dst, const VectorType &src) const
+{
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m1 != 0, ExcNotInitialized());
+  Assert (m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(dst);
+  m1->Tvmult (*v, src);
+  m2->Tvmult_add (dst, *v);
+  mem->free(v);
+}
+
+
+
+//---------------------------------------------------------------------------
+
+template <typename VectorType>
+inline void
+MeanValueFilter::Tvmult(VectorType &, const VectorType &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template <typename VectorType>
+inline void
+MeanValueFilter::Tvmult_add(VectorType &, const VectorType &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+//-----------------------------------------------------------------------//
+
+template <typename VectorType>
+template <typename MatrixType, typename PreconditionerType>
+inline void
+InverseMatrixRichardson<VectorType>::initialize (const MatrixType &m,
+                                                 const PreconditionerType &p)
+{
+  if (matrix != 0)
+    delete matrix;
+  matrix = new PointerMatrix<MatrixType, VectorType>(&m);
+  if (precondition != 0)
+    delete precondition;
+  precondition = new PointerMatrix<PreconditionerType, VectorType>(&p);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/matrix_lib.templates.h b/include/deal.II/lac/matrix_lib.templates.h
new file mode 100644
index 0000000..3b9ade0
--- /dev/null
+++ b/include/deal.II/lac/matrix_lib.templates.h
@@ -0,0 +1,208 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__matrix_lib_templates_h
+#define dealii__matrix_lib_templates_h
+
+#include <deal.II/lac/matrix_lib.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number>
+void
+MeanValueFilter::filter(Vector<number> &v) const
+{
+  number mean = v.mean_value();
+
+  for (size_type i=0; i<v.size(); ++i)
+    v(i) -= mean;
+}
+
+
+
+template <typename number>
+void
+MeanValueFilter::vmult(Vector<number> &dst,
+                       const Vector<number> &src) const
+{
+  Assert (dst.size() == src.size(),
+          ExcDimensionMismatch(dst.size(), src.size()));
+
+  number mean = src.mean_value();
+
+  for (size_type i=0; i<dst.size(); ++i)
+    dst(i) = src(i) - mean;
+}
+
+
+
+template <typename number>
+void
+MeanValueFilter::vmult_add(Vector<number> &dst,
+                           const Vector<number> &src) const
+{
+  Assert (dst.size() == src.size(),
+          ExcDimensionMismatch(dst.size(), src.size()));
+
+  number mean = src.mean_value();
+
+  for (size_type i=0; i<dst.size(); ++i)
+    dst(i) += src(i) - mean;
+}
+
+
+
+template <typename number>
+void
+MeanValueFilter::filter(BlockVector<number> &v) const
+{
+  Assert (component != numbers::invalid_unsigned_int,
+          ExcNotInitialized());
+
+  for (unsigned int i=0; i<v.n_blocks(); ++i)
+    if (i == component)
+      vmult(v.block(i), v.block(i));
+    else
+      v.block(i) = v.block(i);
+}
+
+
+
+template <typename number>
+void
+MeanValueFilter::vmult(BlockVector<number> &dst,
+                       const BlockVector<number> &src) const
+{
+  Assert (component != numbers::invalid_unsigned_int,
+          ExcNotInitialized());
+
+  Assert (dst.n_blocks() == src.n_blocks(),
+          ExcDimensionMismatch(dst.n_blocks(), src.n_blocks()));
+
+  for (unsigned int i=0; i<dst.n_blocks(); ++i)
+    if (i == component)
+      vmult(dst.block(i), src.block(i));
+    else
+      dst.block(i) = src.block(i);
+}
+
+
+
+template <typename number>
+void
+MeanValueFilter::vmult_add(BlockVector<number> &dst,
+                           const BlockVector<number> &src) const
+{
+  Assert (component != numbers::invalid_unsigned_int,
+          ExcNotInitialized());
+
+  Assert (dst.n_blocks() == src.n_blocks(),
+          ExcDimensionMismatch(dst.n_blocks(), src.n_blocks()));
+
+  for (unsigned int i=0; i<dst.n_blocks(); ++i)
+    if (i == component)
+      vmult_add(dst.block(i), src.block(i));
+    else
+      dst.block(i).add(src.block(i));
+}
+
+
+//----------------------------------------------------------------------//
+
+
+template <typename VectorType>
+InverseMatrixRichardson<VectorType>::InverseMatrixRichardson(
+  SolverControl &c,
+  VectorMemory<VectorType> &m)
+  :
+  mem(m),
+  solver(c,m),
+  matrix(0),
+  precondition(0)
+{}
+
+
+template <typename VectorType>
+InverseMatrixRichardson<VectorType>::~InverseMatrixRichardson()
+{
+  if (matrix != 0) delete matrix;
+  if (precondition != 0) delete precondition;
+}
+
+
+template <typename VectorType>
+void
+InverseMatrixRichardson<VectorType>::vmult(VectorType &dst, const VectorType &src) const
+{
+  Assert (matrix != 0, ExcNotInitialized());
+  Assert (precondition != 0, ExcNotInitialized());
+  dst = 0.;
+  solver.solve(*matrix, dst, src, *precondition);
+}
+
+
+
+template <typename VectorType>
+void
+InverseMatrixRichardson<VectorType>::vmult_add(VectorType &dst, const VectorType &src) const
+{
+  Assert (matrix != 0, ExcNotInitialized());
+  Assert (precondition != 0, ExcNotInitialized());
+  VectorType *aux = mem.alloc();
+  aux->reinit(dst);
+
+  solver.solve(*matrix, *aux, src, *precondition);
+
+  dst += *aux;
+  mem.free(aux);
+}
+
+
+
+template <typename VectorType>
+void
+InverseMatrixRichardson<VectorType>::Tvmult(VectorType &dst, const VectorType &src) const
+{
+  Assert (matrix != 0, ExcNotInitialized());
+  Assert (precondition != 0, ExcNotInitialized());
+  dst = 0.;
+  solver.Tsolve(*matrix, dst, src, *precondition);
+}
+
+
+
+template <typename VectorType>
+void
+InverseMatrixRichardson<VectorType>::Tvmult_add(VectorType &dst, const VectorType &src) const
+{
+  Assert (matrix != 0, ExcNotInitialized());
+  Assert (precondition != 0, ExcNotInitialized());
+  VectorType *aux = mem.alloc();
+  aux->reinit(dst);
+
+  solver.Tsolve(*matrix, *aux, src, *precondition);
+
+  dst += *aux;
+  mem.free(aux);
+}
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/matrix_out.h b/include/deal.II/lac/matrix_out.h
new file mode 100644
index 0000000..9acc1c2
--- /dev/null
+++ b/include/deal.II/lac/matrix_out.h
@@ -0,0 +1,412 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__matrix_out_h
+#define dealii__matrix_out_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/data_out_base.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#endif
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Output a matrix in graphical form using the generic format independent
+ * output routines of the base class. The matrix is converted into a list of
+ * patches on a 2d domain where the height is given by the elements of the
+ * matrix. The functions of the base class can then write this "mountain
+ * representation" of the matrix in a variety of graphical output formats. The
+ * coordinates of the matrix output are that the columns run with increasing
+ * x-axis, as usual, starting from zero, while the rows run into the negative
+ * y-axis, also starting from zero. Note that due to some internal
+ * restrictions, this class can only output one matrix at a time, i.e. it can
+ * not take advantage of the multiple dataset capabilities of the base class.
+ *
+ * A typical usage of this class would be as follows:
+ * @code
+ *    FullMatrix<double> M;
+ *    ...                // fill matrix M with some values
+ *
+ *                       // now write out M:
+ *    MatrixOut matrix_out;
+ *    std::ofstream out ("M.gnuplot");
+ *    matrix_out.build_patches (M, "M");
+ *    matrix_out.write_gnuplot (out);
+ * @endcode
+ * Of course, you can as well choose a different graphical output format.
+ * Also, this class supports any matrix, not only of type FullMatrix, as long
+ * as it satisfies a number of requirements, stated with the member functions
+ * of this class.
+ *
+ * The generation of patches through the build_patches() function can be
+ * modified by giving it an object holding certain flags. See the
+ * documentation of the members of the Options class for a description of
+ * these flags.
+ *
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 2001
+ */
+class MatrixOut : public DataOutInterface<2,2>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Class holding various variables which are used to modify the output of
+   * the MatrixOut class.
+   */
+  struct Options
+  {
+    /**
+     * If @p true, only show the absolute values of the matrix entries, rather
+     * than their true values including the sign. Default value is @p false.
+     */
+    bool         show_absolute_values;
+
+    /**
+     * If larger than one, do not show each element of the matrix, but rather
+     * an average over a number of entries. The number of output patches is
+     * accordingly smaller. This flag determines how large each shown block
+     * shall be (in rows/columns). For example, if it is two, then always four
+     * entries are collated into one.
+     *
+     * Default value is one.
+     */
+    unsigned int block_size;
+
+    /**
+     * If true, plot discontinuous patches, one for each entry.
+     */
+    bool discontinuous;
+
+    /**
+     * Default constructor. Set all elements of this structure to their
+     * default values.
+     */
+    Options (const bool         show_absolute_values = false,
+             const unsigned int block_size           = 1,
+             const bool         discontinuous        = false);
+  };
+
+  /**
+   * Destructor. Declared in order to make it virtual.
+   */
+  virtual ~MatrixOut ();
+
+  /**
+   * Generate a list of patches from the given matrix and use the given string
+   * as the name of the data set upon writing to a file. Once patches have
+   * been built, you can use the functions of the base class to write the data
+   * into a files, using one of the supported output formats.
+   *
+   * You may give a structure holding various options. See the description of
+   * the fields of this structure for more information.
+   *
+   * Note that this function requires that we can extract elements of the
+   * matrix, which is done using the get_element() function declared in an
+   * internal namespace. By adding specializations, you can extend this class
+   * to other matrix classes which are not presently supported. Furthermore,
+   * we need to be able to extract the size of the matrix, for which we assume
+   * that the matrix type offers member functions <tt>m()</tt> and
+   * <tt>n()</tt>, which return the number of rows and columns, respectively.
+   */
+  template <class Matrix>
+  void build_patches (const Matrix      &matrix,
+                      const std::string &name,
+                      const Options      options = Options(false, 1, false));
+
+private:
+
+  /**
+   * Abbreviate the somewhat lengthy name for the dealii::DataOutBase::Patch
+   * class.
+   */
+  typedef DataOutBase::Patch<2,2> Patch;
+
+  /**
+   * This is a list of patches that is created each time build_patches() is
+   * called. These patches are used in the output routines of the base
+   * classes.
+   */
+  std::vector<Patch> patches;
+
+  /**
+   * Name of the matrix to be written.
+   */
+  std::string name;
+
+  /**
+   * Function by which the base class's functions get to know what patches
+   * they shall write to a file.
+   */
+  virtual const std::vector<Patch> &
+  get_patches () const;
+
+  /**
+   * Virtual function through which the names of data sets are obtained by the
+   * output functions of the base class.
+   */
+  virtual std::vector<std::string> get_dataset_names () const;
+
+  /**
+   * Get the value of the matrix at gridpoint <tt>(i,j)</tt>. Depending on the
+   * given flags, this can mean different things, for example if only absolute
+   * values shall be shown then the absolute value of the matrix entry is
+   * taken. If the block size is larger than one, then an average of several
+   * matrix entries is taken.
+   */
+  template <class Matrix>
+  static double get_gridpoint_value (const Matrix       &matrix,
+                                     const size_type     i,
+                                     const size_type     j,
+                                     const Options      &options);
+};
+
+
+/* ---------------------- Template and inline functions ------------- */
+
+
+namespace internal
+{
+  namespace MatrixOut
+  {
+    namespace
+    {
+      /**
+       * Return the element with given indices of a sparse matrix.
+       */
+      template <typename number>
+      double get_element (const dealii::SparseMatrix<number> &matrix,
+                          const types::global_dof_index             i,
+                          const types::global_dof_index             j)
+      {
+        return matrix.el(i,j);
+      }
+
+
+
+      /**
+       * Return the element with given indices of a block sparse matrix.
+       */
+      template <typename number>
+      double get_element (const dealii::BlockSparseMatrix<number> &matrix,
+                          const types::global_dof_index                  i,
+                          const types::global_dof_index                  j)
+      {
+        return matrix.el(i,j);
+      }
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+      /**
+       * Return the element with given indices of a Trilinos sparse matrix.
+       */
+      inline
+      double get_element (const TrilinosWrappers::SparseMatrix &matrix,
+                          const types::global_dof_index             i,
+                          const types::global_dof_index             j)
+      {
+        return matrix.el(i,j);
+      }
+
+
+
+      /**
+       * Return the element with given indices of a Trilinos block sparse
+       * matrix.
+       */
+      inline
+      double get_element (const TrilinosWrappers::BlockSparseMatrix &matrix,
+                          const types::global_dof_index                  i,
+                          const types::global_dof_index                  j)
+      {
+        return matrix.el(i,j);
+      }
+#endif
+
+
+#ifdef DEAL_II_WITH_PETSC
+      // no need to do anything: PETSc matrix objects do not distinguish
+      // between operator() and el(i,j), so we can safely access elements
+      // through the generic function below
+#endif
+
+
+      /**
+       * Return the element with given indices from any matrix type for which
+       * no specialization of this function was declared above. This will call
+       * <tt>operator()</tt> on the matrix.
+       */
+      template <class Matrix>
+      double get_element (const Matrix       &matrix,
+                          const types::global_dof_index     i,
+                          const types::global_dof_index     j)
+      {
+        return matrix(i,j);
+      }
+    }
+  }
+}
+
+
+
+template <class Matrix>
+inline
+double
+MatrixOut::get_gridpoint_value (const Matrix   &matrix,
+                                const size_type i,
+                                const size_type j,
+                                const Options  &options)
+{
+  // special case if block size is
+  // one since we then don't need all
+  // that loop overhead
+  if (options.block_size == 1)
+    {
+      if (options.show_absolute_values == true)
+        return std::fabs(internal::MatrixOut::get_element (matrix, i, j));
+      else
+        return internal::MatrixOut::get_element (matrix, i, j);
+    }
+
+  // if blocksize greater than one,
+  // then compute average of elements
+  double average = 0;
+  size_type n_elements = 0;
+  for (size_type row=i*options.block_size;
+       row < std::min(size_type(matrix.m()),
+                      size_type((i+1)*options.block_size)); ++row)
+    for (size_type col=j*options.block_size;
+         col < std::min(size_type(matrix.m()),
+                        size_type((j+1)*options.block_size)); ++col, ++n_elements)
+      if (options.show_absolute_values == true)
+        average += std::fabs(internal::MatrixOut::get_element (matrix, row, col));
+      else
+        average += internal::MatrixOut::get_element (matrix, row, col);
+  average /= n_elements;
+  return average;
+}
+
+
+
+template <class Matrix>
+void
+MatrixOut::build_patches (const Matrix      &matrix,
+                          const std::string &name,
+                          const Options      options)
+{
+  size_type
+  gridpoints_x = (matrix.n() / options.block_size
+                  +
+                  (matrix.n() % options.block_size != 0 ? 1 : 0)),
+                 gridpoints_y = (matrix.m() / options.block_size
+                                 +
+                                 (matrix.m() % options.block_size != 0 ? 1 : 0));
+
+  // If continuous, the number of
+  // plotted patches is matrix size-1
+  if (!options.discontinuous)
+    {
+      --gridpoints_x;
+      --gridpoints_y;
+    }
+
+  // first clear old data and set it
+  // to virgin state
+  patches.clear ();
+  patches.resize ((gridpoints_x) * (gridpoints_y));
+
+  // now build the patches
+  size_type index=0;
+  for (size_type i=0; i<gridpoints_y; ++i)
+    for (size_type j=0; j<gridpoints_x; ++j, ++index)
+      {
+        // within each patch, order
+        // the points in such a way
+        // that if some graphical
+        // output program (such as
+        // gnuplot) plots the
+        // quadrilaterals as two
+        // triangles, then the
+        // diagonal of the
+        // quadrilateral which cuts
+        // it into the two printed
+        // triangles is parallel to
+        // the diagonal of the
+        // matrix, rather than
+        // perpendicular to it. this
+        // has the advantage that,
+        // for example, the unit
+        // matrix is plotted as a
+        // straight rim, rather than
+        // as a series of bumps and
+        // valleys along the diagonal
+        patches[index].vertices[0](0) = j;
+        patches[index].vertices[0](1) = static_cast<signed int>(-i);
+        patches[index].vertices[1](0) = j;
+        patches[index].vertices[1](1) = static_cast<signed int>(-i-1);
+        patches[index].vertices[2](0) = j+1;
+        patches[index].vertices[2](1) = static_cast<signed int>(-i);
+        patches[index].vertices[3](0) = j+1;
+        patches[index].vertices[3](1) = static_cast<signed int>(-i-1);
+        // next scale all the patch
+        // coordinates by the block
+        // size, to get original
+        // coordinates
+        for (unsigned int v=0; v<4; ++v)
+          patches[index].vertices[v] *= options.block_size;
+
+        patches[index].n_subdivisions = 1;
+
+        patches[index].data.reinit (1,4);
+        if (options.discontinuous)
+          {
+            patches[index].data(0,0) = get_gridpoint_value(matrix, i, j, options);
+            patches[index].data(0,1) = get_gridpoint_value(matrix, i, j, options);
+            patches[index].data(0,2) = get_gridpoint_value(matrix, i, j, options);
+            patches[index].data(0,3) = get_gridpoint_value(matrix, i, j, options);
+          }
+        else
+          {
+            patches[index].data(0,0) = get_gridpoint_value(matrix, i,   j,   options);
+            patches[index].data(0,1) = get_gridpoint_value(matrix, i+1, j,   options);
+            patches[index].data(0,2) = get_gridpoint_value(matrix, i,   j+1, options);
+            patches[index].data(0,3) = get_gridpoint_value(matrix, i+1, j+1, options);
+          }
+      };
+
+  // finally set the name
+  this->name = name;
+}
+
+
+
+/*----------------------------   matrix_out.h     ---------------------------*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+/*----------------------------   matrix_out.h     ---------------------------*/
diff --git a/include/deal.II/lac/packaged_operation.h b/include/deal.II/lac/packaged_operation.h
new file mode 100644
index 0000000..6750437
--- /dev/null
+++ b/include/deal.II/lac/packaged_operation.h
@@ -0,0 +1,835 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__packaged_operation_h
+#define dealii__packaged_operation_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/vector_memory.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+#include <functional>
+
+DEAL_II_NAMESPACE_OPEN
+
+// Forward declarations:
+template <typename Number> class Vector;
+template <typename Range, typename Domain> class LinearOperator;
+template <typename Range = Vector<double> > class PackagedOperation;
+
+
+/**
+ * A class to store a computation.
+ *
+ * The PackagedOperation class allows lazy evaluation of expressions involving
+ * vectors and linear operators. This is done by storing the computational
+ * expression and only performing the computation when either the object is
+ * implicitly converted to a vector object, or <code>apply</code> (or
+ * <code>apply_add</code>) is invoked by hand. This avoids unnecessary
+ * temporary storage of intermediate results.
+ *
+ * The class essentially consists of <code>std::function</code> objects that
+ * store the knowledge of how to generate the result of a computation and
+ * store it in a vector:
+ * @code
+ *   std::function<void(Range &)> apply;
+ *   std::function<void(Range &)> apply_add;
+ * @endcode
+ *
+ * Similar to the LinearOperator class it also has knowledge about how to
+ * initialize a vector of the @p Range space:
+ * @code
+ *   std::function<void(Range &, bool)> reinit_vector;
+ * @endcode
+ *
+ * As an example consider the addition of multiple vectors
+ * @code
+ *   dealii::Vector<double> a, b, c, d;
+ *   // ..
+ *   dealii::Vector<double> result = a + b - c + d;
+ * @endcode
+ * or the computation of a residual $b-Ax$:
+ * @code
+ *   dealii::SparseMatrix<double> A;
+ *   dealii::Vector<double> b, x;
+ *   // ..
+ *   const auto op_a = linear_operator(A);
+ *
+ *   dealii::Vector<double> residual =  b - op_a * x;
+ * @endcode
+ * The expression <code>residual</code> is of type
+ * <code>PackagedOperation<dealii::Vector<double>></code>. It stores
+ * references to <code>A</code>, <code>b</code> and <code>x</code> and defers
+ * the actual computation until <code>apply</code>, or <code>apply_add</code>
+ * are explicitly invoked,
+ * @code
+ *   dealii::Vector<double> y;
+ *   residual.reinit_vector(y);
+ *   residual.apply(y);
+ *   residual.apply_add(y);
+ * @endcode
+ * or until the @p PackagedOperation object is implicitly converted:
+ * @code
+ *   dealii::Vector<double> y;
+ *   y = residual;
+ *   y += residual;
+ *   y -= residual;
+ * @endcode
+ *
+ * @author Matthias Maier, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range> class PackagedOperation
+{
+public:
+
+  /**
+   * Create an empty PackagedOperation object. All <code>std::function</code>
+   * member objects are initialized with default variants that throw an
+   * exception upon invocation.
+   */
+  PackagedOperation ()
+  {
+    apply = [](Range &)
+    {
+      Assert(false,
+             ExcMessage("Uninitialized PackagedOperation<Range>::apply called"));
+    };
+
+    apply_add = [](Range &)
+    {
+      Assert(false,
+             ExcMessage("Uninitialized PackagedOperation<Range>::apply_add called"));
+    };
+
+    reinit_vector = [](Range &, bool)
+    {
+      Assert(false,
+             ExcMessage("Uninitialized PackagedOperation<Range>::reinit_vector "
+                        "method called"));
+    };
+  }
+
+  /**
+   * Default copy constructor.
+   */
+  PackagedOperation (const PackagedOperation<Range> &) = default;
+
+  /**
+   * Constructor that creates a PackagedOperation object from a reference
+   * vector @p u. The PackagedOperation returns @p u.
+   *
+   * The PackagedOperation object that is created stores a reference to @p u.
+   * Thus, the vector must remain a valid reference for the whole lifetime of
+   * the PackagedOperation object. All changes made on @p u after the creation
+   * of the PackagedOperation object are reflected by the operator object.
+   */
+  PackagedOperation (const Range &u)
+  {
+    *this = u;
+  }
+
+  /**
+   * Default copy assignment operator.
+   */
+  PackagedOperation<Range> &operator=(const PackagedOperation<Range> &) = default;
+
+  /**
+   * Copy assignment operator that creates a PackagedOperation object from a
+   * reference vector @p u. The PackagedOperation returns @p u.
+   *
+   * The PackagedOperation object that is created stores a reference to @p u.
+   * Thus, the vector must remain a valid reference for the whole lifetime of
+   * the PackagedOperation object. All changes made on @p u after the creation
+   * of the PackagedOperation object are reflected by the operator object.
+   */
+  PackagedOperation<Range> &operator=(const Range &u)
+  {
+    apply = [&u](Range &v)
+    {
+      v = u;
+    };
+
+    apply_add = [&u](Range &v)
+    {
+      v += u;
+    };
+
+    reinit_vector = [&u](Range &v, bool omit_zeroing_entries)
+    {
+      v.reinit(u, omit_zeroing_entries);
+    };
+
+    return *this;
+  }
+
+  /**
+   * Convert a PackagedOperation to its result.
+   *
+   * This conversion operator creates a vector of the Range space and calls
+   * <code>apply()</code> on it.
+   */
+  operator Range() const
+  {
+    Range result_vector;
+
+    reinit_vector(result_vector, /*bool omit_zeroing_entries=*/ true);
+    apply(result_vector);
+
+    return result_vector;
+  }
+
+  /**
+   * @name In-place vector space operations
+   */
+  //@{
+
+  /**
+   * Addition with a PackagedOperation @p second_comp with the same @p Range.
+   */
+  PackagedOperation<Range> &operator+=(const PackagedOperation<Range> &second_comp)
+  {
+    *this = *this + second_comp;
+    return *this;
+  }
+
+  /**
+   * Subtraction with a PackagedOperation @p second_comp with the same @p
+   * Range.
+   */
+  PackagedOperation<Range> &operator-=(const PackagedOperation<Range> &second_comp)
+  {
+    *this = *this - second_comp;
+    return *this;
+  }
+
+  /**
+   * Add a constant @p offset (of the @p Range space) to the result of a
+   * PackagedOperation.
+   */
+  PackagedOperation<Range> &operator+=(const Range &offset)
+  {
+    *this = *this + PackagedOperation<Range>(offset);
+    return *this;
+  }
+
+  /**
+   * Subtract a constant @p offset (of the @p Range space) from the result of
+   * a PackagedOperation.
+   */
+  PackagedOperation<Range> &operator-=(const Range &offset)
+  {
+    *this = *this - PackagedOperation<Range>(offset);
+    return *this;
+  }
+
+  /**
+   * Scalar multiplication of the PackagedOperation with a @p number.
+   */
+  PackagedOperation<Range> &operator*=(typename Range::value_type number)
+  {
+    *this = *this * number;
+    return *this;
+  }
+  //@}
+
+  /**
+   * Store the result of the PackagedOperation in a vector v of the @p Range
+   * space.
+   */
+  std::function<void(Range &v)> apply;
+
+  /**
+   * Add the result of the PackagedOperation to a vector v of the @p Range
+   * space.
+   */
+  std::function<void(Range &v)> apply_add;
+
+  /**
+   * Initializes a vector v of the Range space to be directly usable as the
+   * destination parameter in an application of apply, or apply_add. Similar
+   * to the reinit functions of the vector classes, the boolean determines
+   * whether a fast initialization is done, i.e., if it is set to false the
+   * content of the vector is set to 0.
+   */
+  std::function<void(Range &v, bool omit_zeroing_entries)> reinit_vector;
+};
+
+
+/**
+ * @name Vector space operations
+ */
+//@{
+
+/**
+ * @relates PackagedOperation
+ *
+ * Addition of two PackagedOperation objects @p first_comp and @p second_comp
+ * given by vector space addition of the corresponding results.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range>
+operator+(const PackagedOperation<Range> &first_comp,
+          const PackagedOperation<Range> &second_comp)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = first_comp.reinit_vector;
+
+  // ensure to have valid PackagedOperation objects by catching first_comp and
+  // second_comp by value
+
+  return_comp.apply = [first_comp, second_comp](Range &v)
+  {
+    first_comp.apply(v);
+    second_comp.apply_add(v);
+  };
+
+  return_comp.apply_add = [first_comp, second_comp](Range &v)
+  {
+    first_comp.apply_add(v);
+    second_comp.apply_add(v);
+  };
+
+  return return_comp;
+}
+
+/**
+ * @relates PackagedOperation
+ *
+ * Subtraction of two PackagedOperation objects @p first_comp and @p
+ * second_comp given by vector space addition of the corresponding results.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range>
+operator-(const PackagedOperation<Range> &first_comp,
+          const PackagedOperation<Range> &second_comp)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = first_comp.reinit_vector;
+
+  // ensure to have valid PackagedOperation objects by catching first_comp and
+  // second_comp by value
+
+  return_comp.apply = [first_comp, second_comp](Range &v)
+  {
+    second_comp.apply(v);
+    v *= -1.;
+    first_comp.apply_add(v);
+  };
+
+  return_comp.apply_add = [first_comp, second_comp](Range &v)
+  {
+    first_comp.apply_add(v);
+    v *= -1.;
+    second_comp.apply_add(v);
+    v *= -1.;
+  };
+
+  return return_comp;
+}
+
+/**
+ * @relates PackagedOperation
+ *
+ * Scalar multiplication of a PackagedOperation objects @p comp with a scalar
+ * @p number given by a scaling PackagedOperation result with @p number.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range>
+operator*(const PackagedOperation<Range> &comp,
+          typename Range::value_type number)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = comp.reinit_vector;
+
+  // the trivial case: number is zero
+  if (number == 0.)
+    {
+      return_comp.apply = [](Range &v)
+      {
+        v = 0.;
+      };
+
+      return_comp.apply_add = [](Range &)
+      {
+      };
+    }
+  else
+    {
+      return_comp.apply = [comp, number](Range &v)
+      {
+        comp.apply(v);
+        v *= number;
+      };
+
+      return_comp.apply_add = [comp, number](Range &v)
+      {
+        v /= number;
+        comp.apply_add(v);
+        v *= number;
+      };
+    }
+
+  return return_comp;
+}
+
+/**
+ * @relates PackagedOperation
+ *
+ * Scalar multiplication of a PackagedOperation objects @p comp with a scalar
+ * @p number given by a scaling PackagedOperation result with @p number.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range>
+operator*(typename Range::value_type number,
+          const PackagedOperation<Range> &comp)
+{
+  return comp * number;
+}
+
+/**
+ * @relates PackagedOperation
+ *
+ * Add a constant @p offset (of the @p Range space) to the result of a
+ * PackagedOperation.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range> operator+(const PackagedOperation<Range> &comp,
+                                   const Range &offset)
+{
+  return comp + PackagedOperation<Range>(offset);
+}
+
+/**
+ * @relates PackagedOperation
+ *
+ * Add a constant @p offset (of the @p Range space) to the result of a
+ * PackagedOperation.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range> operator+(const Range &offset,
+                                   const PackagedOperation<Range> &comp)
+{
+  return PackagedOperation<Range>(offset) + comp;
+}
+
+/**
+ * @relates PackagedOperation
+ *
+ * Subtract a constant @p offset (of the @p Range space) from the result of a
+ * PackagedOperation.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range> operator-(const PackagedOperation<Range> &comp,
+                                   const Range &offset)
+{
+  return comp - PackagedOperation<Range>(offset);
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Subtract a computational result from a constant @p offset (of the @p Range
+ * space). The result is a PackagedOperation object that applies this
+ * computation.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range>
+PackagedOperation<Range> operator-(const Range &offset,
+                                   const PackagedOperation<Range> &comp)
+{
+  return PackagedOperation<Range>(offset) - comp;
+}
+
+//@}
+
+
+/**
+ * @name Creation of a PackagedOperation object
+ */
+//@{
+
+namespace
+{
+  // Poor man's trait class that determines whether type T is a vector:
+  // FIXME: Implement this as a proper type trait - similar to
+  // isBlockVector
+
+  template <typename T>
+  class has_vector_interface
+  {
+    template <typename C>
+    static std::false_type test(...);
+
+    template <typename C>
+    static std::true_type test(decltype(&C::operator+=),
+                               decltype(&C::operator-=),
+                               decltype(&C::l2_norm));
+
+  public:
+    // type is std::true_type if Matrix provides vmult_add and Tvmult_add,
+    // otherwise it is std::false_type
+
+    typedef decltype(test<T>(0, 0, 0)) type;
+  };
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Create a PackagedOperation object that stores the addition of two vectors.
+ *
+ * The PackagedOperation object that is created stores a reference to @p u and
+ * @p v. Thus, the vectors must remain valid references for the whole lifetime
+ * of the PackagedOperation object. All changes made on @p u or @p v after the
+ * creation of the PackagedOperation object are reflected by the operator
+ * object.
+ *
+ * @ingroup LAOperators
+ */
+
+template <typename Range,
+          typename = typename std::enable_if<has_vector_interface<Range>::type::value>::type>
+PackagedOperation<Range> operator+(const Range &u, const Range &v)
+{
+  PackagedOperation<Range> return_comp;
+
+  // ensure to have valid PackagedOperation objects by catching op by value
+  // u is caught by reference
+
+  return_comp.reinit_vector = [&u](Range &x, bool omit_zeroing_entries)
+  {
+    x.reinit(u, omit_zeroing_entries);
+  };
+
+  return_comp.apply = [&u, &v](Range &x)
+  {
+    x = u;
+    x += v;
+  };
+
+  return_comp.apply_add = [&u, &v](Range &x)
+  {
+    x += u;
+    x += v;
+  };
+
+  return return_comp;
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Create a PackagedOperation object that stores the subtraction of two
+ * vectors.
+ *
+ * The PackagedOperation object that is created stores a reference to @p u and
+ * @p v. Thus, the vectors must remain valid references for the whole lifetime
+ * of the PackagedOperation object. All changes made on @p u or @p v after the
+ * creation of the PackagedOperation object are reflected by the operator
+ * object.
+ *
+ * @ingroup LAOperators
+ */
+
+template <typename Range,
+          typename = typename std::enable_if<has_vector_interface<Range>::type::value>::type>
+PackagedOperation<Range> operator-(const Range &u, const Range &v)
+{
+  PackagedOperation<Range> return_comp;
+
+  // ensure to have valid PackagedOperation objects by catching op by value
+  // u is catched by reference
+
+  return_comp.reinit_vector = [&u](Range &x, bool omit_zeroing_entries)
+  {
+    x.reinit(u, omit_zeroing_entries);
+  };
+
+  return_comp.apply = [&u, &v](Range &x)
+  {
+    x = u;
+    x -= v;
+  };
+
+  return_comp.apply_add = [&u, &v](Range &x)
+  {
+    x += u;
+    x -= v;
+  };
+
+  return return_comp;
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Create a PackagedOperation object that stores the scaling of a vector with
+ * a @p number.
+ *
+ * The PackagedOperation object that is created stores a reference to @p u.
+ * Thus, the vectors must remain valid references for the whole lifetime of
+ * the PackagedOperation object. All changes made on @p u or @p v after the
+ * creation of the PackagedOperation object are reflected by the operator
+ * object.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range,
+          typename = typename std::enable_if<has_vector_interface<Range>::type::value>::type>
+PackagedOperation<Range> operator*(const Range &u,
+                                   typename Range::value_type number)
+{
+  return PackagedOperation<Range>(u) * number;
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Create a PackagedOperation object that stores the scaling of a vector with
+ * a @p number.
+ *
+ * The PackagedOperation object that is created stores a reference to @p u.
+ * Thus, the vectors must remain valid references for the whole lifetime of
+ * the PackagedOperation object. All changes made on @p u or @p v after the
+ * creation of the PackagedOperation object are reflected by the operator
+ * object.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range,
+          typename = typename std::enable_if<has_vector_interface<Range>::type::value>::type>
+PackagedOperation<Range> operator*(typename Range::value_type number,
+                                   const Range &u)
+{
+  return number * PackagedOperation<Range>(u);
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Create a PackagedOperation object from a LinearOperator and a reference to
+ * a vector @p u of the Domain space. The object stores the PackagedOperation
+ * $\text{op} \,u$ (in matrix notation). <code>return</code>
+ * (<code>return_add</code>) are implemented with <code>vmult(__1,u)</code>
+ * (<code>vmult_add(__1,u)</code>).
+ *
+ * The PackagedOperation object that is created stores a reference to @p u.
+ * Thus, the vector must remain a valid reference for the whole lifetime of
+ * the PackagedOperation object. All changes made on @p u after the creation
+ * of the PackagedOperation object are reflected by the operator object.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+PackagedOperation<Range>
+operator*(const LinearOperator<Range, Domain> &op,
+          const Domain &u)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = op.reinit_range_vector;
+
+  // ensure to have valid PackagedOperation objects by catching op by value
+  // u is caught by reference
+
+  return_comp.apply = [op, &u](Range &v)
+  {
+    op.vmult(v, u);
+  };
+
+  return_comp.apply_add = [op, &u](Range &v)
+  {
+    op.vmult_add(v, u);
+  };
+
+  return return_comp;
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Create a PackagedOperation object from a LinearOperator and a reference to
+ * a vector @p u of the Range space. The object stores the PackagedOperation
+ * $\text{op}^T \,u$ (in matrix notation). <code>return</code>
+ * (<code>return_add</code>) are implemented with <code>Tvmult(__1,u)</code>
+ * (<code>Tvmult_add(__1,u)</code>).
+ *
+ * The PackagedOperation object that is created stores a reference to @p u.
+ * Thus, the vector must remain a valid reference for the whole lifetime of
+ * the PackagedOperation object. All changes made on @p u after the creation
+ * of the PackagedOperation object are reflected by the operator object.
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+PackagedOperation<Domain>
+operator*(const Range &u,
+          const LinearOperator<Range, Domain> &op)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = op.reinit_domain_vector;
+
+  // ensure to have valid PackagedOperation objects by catching op by value
+  // u is caught by reference
+
+  return_comp.apply = [op, &u](Domain &v)
+  {
+    op.Tvmult(v, u);
+  };
+
+  return_comp.apply_add = [op, &u](Domain &v)
+  {
+    op.Tvmult_add(v, u);
+  };
+
+  return return_comp;
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Composition of a PackagedOperation object with a LinearOperator. The object
+ * stores the computation $\text{op} \,comp$ (in matrix notation).
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+PackagedOperation<Range>
+operator*(const LinearOperator<Range, Domain> &op,
+          const PackagedOperation<Domain> &comp)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = op.reinit_range_vector;
+
+  // ensure to have valid PackagedOperation objects by catching op by value
+  // u is caught by reference
+
+  return_comp.apply = [op, comp](Domain &v)
+  {
+    static GrowingVectorMemory<Range> vector_memory;
+
+    Range *i = vector_memory.alloc();
+    op.reinit_domain_vector(*i, /*bool omit_zeroing_entries =*/ true);
+
+    comp.apply(*i);
+    op.vmult(v, *i);
+
+    vector_memory.free(i);
+  };
+
+  return_comp.apply_add = [op, comp](Domain &v)
+  {
+    static GrowingVectorMemory<Range> vector_memory;
+
+    Range *i = vector_memory.alloc();
+    op.reinit_range_vector(*i, /*bool omit_zeroing_entries =*/ true);
+
+    comp.apply(*i);
+    op.vmult_add(v, *i);
+
+    vector_memory.free(i);
+  };
+
+  return return_comp;
+}
+
+
+/**
+ * @relates PackagedOperation
+ *
+ * Composition of a PackagedOperation object with a LinearOperator. The object
+ * stores the computation $\text{op}^T \,comp$ (in matrix notation).
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range, typename Domain>
+PackagedOperation<Domain>
+operator*(const PackagedOperation<Range> &comp,
+          const LinearOperator<Range, Domain> &op)
+{
+  PackagedOperation<Range> return_comp;
+
+  return_comp.reinit_vector = op.reinit_domain_vector;
+
+  // ensure to have valid PackagedOperation objects by catching op by value
+  // u is caught by reference
+
+  return_comp.apply = [op, comp](Domain &v)
+  {
+    static GrowingVectorMemory<Range> vector_memory;
+
+    Range *i = vector_memory.alloc();
+    op.reinit_range_vector(*i, /*bool omit_zeroing_entries =*/ true);
+
+    comp.apply(*i);
+    op.Tvmult(v, *i);
+
+    vector_memory.free(i);
+  };
+
+  return_comp.apply_add = [op, comp](Domain &v)
+  {
+    static GrowingVectorMemory<Range> vector_memory;
+
+    Range *i = vector_memory.alloc();
+    op.reinit_range_vector(*i, /*bool omit_zeroing_entries =*/ true);
+
+    comp.apply(*i);
+    op.Tvmult_add(v, *i);
+
+    vector_memory.free(i);
+  };
+
+  return return_comp;
+}
+
+//@}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_CXX11
+#endif
diff --git a/include/deal.II/lac/parallel_block_vector.h b/include/deal.II/lac/parallel_block_vector.h
new file mode 100644
index 0000000..2d59287
--- /dev/null
+++ b/include/deal.II/lac/parallel_block_vector.h
@@ -0,0 +1,1012 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__parallel_block_vector_h
+#define dealii__parallel_block_vector_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/lac/block_vector_base.h>
+#include <deal.II/lac/parallel_vector.h>
+
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/trilinos_parallel_block_vector.h>
+
+
+#include <cstdio>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace parallel
+{
+  namespace distributed
+  {
+
+    /*! @addtogroup Vectors
+     *@{
+     */
+
+
+    /**
+     * An implementation of block vectors based on distributed deal.II
+     * vectors. While the base class provides for most of the interface, this
+     * class handles the actual allocation of vectors and provides functions
+     * that are specific to the underlying vector type.
+     *
+     * @note Instantiations for this template are provided for <tt>@<float@>
+     * and @<double@></tt>; others can be generated in application programs
+     * (see the section on
+     * @ref Instantiations
+     * in the manual).
+     *
+     * @see
+     * @ref GlossBlockLA "Block (linear algebra)"
+     * @author Katharina Kormann, Martin Kronbichler, 2011
+     */
+    template <typename Number>
+    class BlockVector : public BlockVectorBase<Vector<Number> >
+    {
+    public:
+      /**
+       * Typedef the base class for simpler access to its own typedefs.
+       */
+      typedef BlockVectorBase<Vector<Number> > BaseClass;
+
+      /**
+       * Typedef the type of the underlying vector.
+       */
+      typedef typename BaseClass::BlockType  BlockType;
+
+      /**
+       * Import the typedefs from the base class.
+       */
+      typedef typename BaseClass::value_type      value_type;
+      typedef typename BaseClass::real_type       real_type;
+      typedef typename BaseClass::pointer         pointer;
+      typedef typename BaseClass::const_pointer   const_pointer;
+      typedef typename BaseClass::reference       reference;
+      typedef typename BaseClass::const_reference const_reference;
+      typedef typename BaseClass::size_type       size_type;
+      typedef typename BaseClass::iterator        iterator;
+      typedef typename BaseClass::const_iterator  const_iterator;
+
+      /**
+       * Constructor. There are three ways to use this constructor. First,
+       * without any arguments, it generates an object with no blocks. Given
+       * one argument, it initializes <tt>num_blocks</tt> blocks, but these
+       * blocks have size zero. The third variant finally initializes all
+       * blocks to the same size <tt>block_size</tt>.
+       *
+       * Confer the other constructor further down if you intend to use blocks
+       * of different sizes.
+       */
+      explicit BlockVector (const size_type num_blocks = 0,
+                            const size_type block_size = 0);
+
+      /**
+       * Copy-Constructor. Dimension set to that of V, all components are
+       * copied from V
+       */
+      BlockVector (const BlockVector<Number> &V);
+
+
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+      /**
+       * Copy constructor taking a BlockVector of another data type. This will
+       * fail if there is no conversion path from <tt>OtherNumber</tt> to
+       * <tt>Number</tt>. Note that you may lose accuracy when copying to a
+       * BlockVector with data elements with less accuracy.
+       *
+       * Older versions of gcc did not honor the @p explicit keyword on
+       * template constructors. In such cases, it is easy to accidentally
+       * write code that can be very inefficient, since the compiler starts
+       * performing hidden conversions. To avoid this, this function is
+       * disabled if we have detected a broken compiler during configuration.
+       */
+      template <typename OtherNumber>
+      explicit
+      BlockVector (const BlockVector<OtherNumber> &v);
+#endif
+
+      /**
+       * Constructor. Set the number of blocks to <tt>block_sizes.size()</tt>
+       * and initialize each block with <tt>block_sizes[i]</tt> zero elements.
+       */
+      BlockVector (const std::vector<size_type> &block_sizes);
+
+      /**
+       * Construct a block vector with an IndexSet for the local range and
+       * ghost entries for each block.
+       */
+      BlockVector (const std::vector<IndexSet> &local_ranges,
+                   const std::vector<IndexSet> &ghost_indices,
+                   const MPI_Comm  communicator);
+
+      /**
+       * Same as above but the ghost indices are assumed to be empty.
+       */
+      BlockVector (const std::vector<IndexSet> &local_ranges,
+                   const MPI_Comm  communicator);
+
+      /**
+       * Destructor. Clears memory.
+       */
+      ~BlockVector ();
+
+      /**
+       * Copy operator: fill all components of the vector with the given
+       * scalar value.
+       */
+      BlockVector &operator = (const value_type s);
+
+      /**
+       * Copy operator for arguments of the same type. Resize the present
+       * vector if necessary.
+       */
+      BlockVector &
+      operator= (const BlockVector &V);
+
+      /**
+       * Copy operator for template arguments of different types. Resize the
+       * present vector if necessary.
+       */
+      template <class Number2>
+      BlockVector &
+      operator= (const BlockVector<Number2> &V);
+
+      /**
+       * Copy a regular vector into a block vector.
+       */
+      BlockVector &
+      operator= (const Vector<Number> &V);
+
+#ifdef DEAL_II_WITH_PETSC
+      /**
+       * Copy the content of a PETSc vector into the calling vector. This
+       * function assumes that the vectors layouts have already been
+       * initialized to match.
+       *
+       * This operator is only available if deal.II was configured with PETSc.
+       */
+      BlockVector<Number> &
+      operator = (const PETScWrappers::MPI::BlockVector &petsc_vec);
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+      /**
+       * Copy the content of a Trilinos vector into the calling vector. This
+       * function assumes that the vectors layouts have already been
+       * initialized to match.
+       *
+       * This operator is only available if deal.II was configured with
+       * Trilinos.
+       */
+      BlockVector<Number> &
+      operator = (const TrilinosWrappers::MPI::BlockVector &trilinos_vec);
+#endif
+
+      /**
+       * Reinitialize the BlockVector to contain <tt>num_blocks</tt> blocks of
+       * size <tt>block_size</tt> each.
+       *
+       * If the second argument is left at its default value, then the block
+       * vector allocates the specified number of blocks but leaves them at
+       * zero size. You then need to later reinitialize the individual blocks,
+       * and call collect_sizes() to update the block system's knowledge of
+       * its individual block's sizes.
+       *
+       * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+       * zeros.
+       */
+      void reinit (const size_type num_blocks,
+                   const size_type block_size = 0,
+                   const bool omit_zeroing_entries = false);
+
+      /**
+       * Reinitialize the BlockVector such that it contains
+       * <tt>block_sizes.size()</tt> blocks. Each block is reinitialized to
+       * dimension <tt>block_sizes[i]</tt>.
+       *
+       * If the number of blocks is the same as before this function was
+       * called, all vectors remain the same and reinit() is called for each
+       * vector.
+       *
+       * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+       * zeros.
+       *
+       * Note that you must call this (or the other reinit() functions)
+       * function, rather than calling the reinit() functions of an individual
+       * block, to allow the block vector to update its caches of vector
+       * sizes. If you call reinit() on one of the blocks, then subsequent
+       * actions on this object may yield unpredictable results since they may
+       * be routed to the wrong block.
+       */
+      void reinit (const std::vector<size_type> &N,
+                   const bool                    omit_zeroing_entries=false);
+
+      /**
+       * Change the dimension to that of the vector <tt>V</tt>. The same
+       * applies as for the other reinit() function.
+       *
+       * The elements of <tt>V</tt> are not copied, i.e.  this function is the
+       * same as calling <tt>reinit (V.size(), omit_zeroing_entries)</tt>.
+       *
+       * Note that you must call this (or the other reinit() functions)
+       * function, rather than calling the reinit() functions of an individual
+       * block, to allow the block vector to update its caches of vector
+       * sizes. If you call reinit() of one of the blocks, then subsequent
+       * actions of this object may yield unpredictable results since they may
+       * be routed to the wrong block.
+       */
+      template <typename Number2>
+      void reinit (const BlockVector<Number2> &V,
+                   const bool                 omit_zeroing_entries=false);
+
+      /**
+       * This function copies the data that has accumulated in the data buffer
+       * for ghost indices to the owning processor. For the meaning of the
+       * argument @p operation, see the entry on
+       * @ref GlossCompress "Compressing distributed vectors and matrices"
+       * in the glossary.
+       *
+       * There are two variants for this function. If called with argument @p
+       * VectorOperation::add adds all the data accumulated in ghost elements
+       * to the respective elements on the owning processor and clears the
+       * ghost array afterwards. If called with argument @p
+       * VectorOperation::insert, a set operation is performed. Since setting
+       * elements in a vector with ghost elements is ambiguous (as one can set
+       * both the element on the ghost site as well as the owning site), this
+       * operation makes the assumption that all data is set correctly on the
+       * owning processor. Upon call of compress(VectorOperation::insert), all
+       * ghost entries are therefore simply zeroed out (using
+       * zero_ghost_values()). In debug mode, a check is performed that makes
+       * sure that the data set is actually consistent between processors,
+       * i.e., whenever a non-zero ghost element is found, it is compared to
+       * the value on the owning processor and an exception is thrown if these
+       * elements do not agree.
+       *
+       */
+      void compress (::dealii::VectorOperation::values operation);
+
+      /**
+       * Fills the data field for ghost indices with the values stored in the
+       * respective positions of the owning processor. This function is needed
+       * before reading from ghosts. The function is @p const even though
+       * ghost data is changed. This is needed to allow functions with a @p
+       * const vector to perform the data exchange without creating
+       * temporaries.
+       */
+      void update_ghost_values () const;
+
+      /**
+       * This method zeros the entries on ghost dofs, but does not touch
+       * locally owned DoFs.
+       *
+       * After calling this method, read access to ghost elements of the
+       * vector is forbidden and an exception is thrown. Only write access to
+       * ghost elements is allowed in this state.
+       */
+      void zero_out_ghosts ();
+
+      /**
+       * Returns if this Vector contains ghost elements.
+       */
+      bool has_ghost_elements() const;
+
+      /**
+       * Return whether the vector contains only elements with value zero.
+       * This function is mainly for internal consistency checks and should
+       * seldom be used when not in debug mode since it uses quite some time.
+       */
+      bool all_zero () const;
+
+      /**
+       * Return @p true if the vector has no negative entries, i.e. all
+       * entries are zero or positive. This function is used, for example, to
+       * check whether refinement indicators are really all positive (or
+       * zero).
+       *
+       * The function obviously only makes sense if the template argument of
+       * this class is a real type. If it is a complex type, then an exception
+       * is thrown.
+       */
+      bool is_non_negative () const;
+
+      /**
+       * Checks for equality of the two vectors.
+       */
+      template <typename Number2>
+      bool operator == (const BlockVector<Number2> &v) const;
+
+      /**
+       * Checks for inequality of the two vectors.
+       */
+      template <typename Number2>
+      bool operator != (const BlockVector<Number2> &v) const;
+
+      /**
+       * Perform the inner product of two vectors.
+       */
+      template <typename Number2>
+      Number operator * (const BlockVector<Number2> &V) const;
+
+      /**
+       * Computes the square of the l<sub>2</sub> norm of the vector (i.e.,
+       * the sum of the squares of all entries among all processors).
+       */
+      real_type norm_sqr () const;
+
+      /**
+       * Computes the mean value of all the entries in the vector.
+       */
+      Number mean_value () const;
+
+      /**
+       * Returns the l<sub>1</sub> norm of the vector (i.e., the sum of the
+       * absolute values of all entries among all processors).
+       */
+      real_type l1_norm () const;
+
+      /**
+       * Returns the l<sub>2</sub> norm of the vector (i.e., square root of
+       * the sum of the square of all entries among all processors).
+       */
+      real_type l2_norm () const;
+
+      /**
+       * Returns the l<sub>p</sub> norm with real @p p of the vector (i.e.,
+       * the pth root of sum of the pth power of all entries among all
+       * processors).
+       */
+      real_type lp_norm (const real_type p) const;
+
+      /**
+       * Returns the maximum norm of the vector (i.e., maximum absolute value
+       * among all entries among all processors).
+       */
+      real_type linfty_norm () const;
+
+      /**
+       * Performs a combined operation of a vector addition and a subsequent
+       * inner product, returning the value of the inner product. In other
+       * words, the result of this function is the same as if the user called
+       * @code
+       * this->add(a, V);
+       * return_value = *this * W;
+       * @endcode
+       *
+       * The reason this function exists is that this operation involves less
+       * memory transfer than calling the two functions separately. This
+       * method only needs to load three vectors, @p this, @p V, @p W, whereas
+       * calling separate methods means to load the calling vector @p this
+       * twice. Since most vector operations are memory transfer limited, this
+       * reduces the time by 25\% (or 50\% if @p W equals @p this).
+       */
+      Number add_and_dot (const Number               a,
+                          const BlockVector<Number> &V,
+                          const BlockVector<Number> &W);
+
+      /**
+       * Multiply each element of this vector by the corresponding element of
+       * <tt>v</tt>.
+       */
+      template <class BlockVector2>
+      void scale (const BlockVector2 &v);
+
+      /**
+       * Swap the contents of this vector and the other vector <tt>v</tt>. One
+       * could do this operation with a temporary variable and copying over
+       * the data elements, but this function is significantly more efficient
+       * since it only swaps the pointers to the data of the two vectors and
+       * therefore does not need to allocate temporary storage and move data
+       * around.
+       *
+       * Limitation: right now this function only works if both vectors have
+       * the same number of blocks. If needed, the numbers of blocks should be
+       * exchanged, too.
+       *
+       * This function is analog to the the swap() function of all C++
+       * standard containers. Also, there is a global function swap(u,v) that
+       * simply calls <tt>u.swap(v)</tt>, again in analogy to standard
+       * functions.
+       */
+      void swap (BlockVector<Number> &v);
+
+      /**
+       * @addtogroup Exceptions
+       * @{
+       */
+
+      /**
+       * Exception
+       */
+      DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+      //@}
+    };
+
+    /*@}*/
+
+#ifndef DOXYGEN
+    /*----------------------- Inline functions ----------------------------------*/
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number>::BlockVector (const size_type n_blocks,
+                                      const size_type block_size)
+    {
+      reinit (n_blocks, block_size);
+    }
+
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number>::BlockVector (const std::vector<size_type> &n)
+    {
+      reinit (n, false);
+    }
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number>::BlockVector (const std::vector<IndexSet> &local_ranges,
+                                      const std::vector<IndexSet> &ghost_indices,
+                                      const MPI_Comm  communicator)
+    {
+      std::vector<size_type> sizes(local_ranges.size());
+      for (unsigned int i=0; i<local_ranges.size(); ++i)
+        sizes[i] = local_ranges[i].size();
+
+      this->block_indices.reinit(sizes);
+      this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->block(i).reinit(local_ranges[i], ghost_indices[i], communicator);
+    }
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number>::BlockVector (const std::vector<IndexSet> &local_ranges,
+                                      const MPI_Comm  communicator)
+    {
+      std::vector<size_type> sizes(local_ranges.size());
+      for (unsigned int i=0; i<local_ranges.size(); ++i)
+        sizes[i] = local_ranges[i].size();
+
+      this->block_indices.reinit(sizes);
+      this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->block(i).reinit(local_ranges[i], communicator);
+    }
+
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number>::BlockVector (const BlockVector<Number> &v)
+      :
+      BlockVectorBase<Vector<Number> > ()
+    {
+      this->components.resize (v.n_blocks());
+      this->block_indices = v.block_indices;
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.components[i];
+    }
+
+
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+
+    template <typename Number>
+    template <typename OtherNumber>
+    inline
+    BlockVector<Number>::BlockVector (const BlockVector<OtherNumber> &v)
+    {
+      reinit (v, true);
+      *this = v;
+    }
+
+#endif
+
+
+
+    template <typename Number>
+    inline
+    void BlockVector<Number>::reinit (const size_type n_bl,
+                                      const size_type bl_sz,
+                                      const bool         omit_zeroing_entries)
+    {
+      std::vector<size_type> n(n_bl, bl_sz);
+      reinit(n, omit_zeroing_entries);
+    }
+
+
+    template <typename Number>
+    inline
+    void BlockVector<Number>::reinit (const std::vector<size_type> &n,
+                                      const bool                    omit_zeroing_entries)
+    {
+      this->block_indices.reinit (n);
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i].reinit(n[i], omit_zeroing_entries);
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    void BlockVector<Number>::reinit (const BlockVector<Number2> &v,
+                                      const bool omit_zeroing_entries)
+    {
+      this->block_indices = v.get_block_indices();
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->block(i).reinit(v.block(i), omit_zeroing_entries);
+    }
+
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number>::~BlockVector ()
+    {}
+
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number> &
+    BlockVector<Number>::operator = (const value_type s)
+    {
+
+      AssertIsFinite(s);
+
+      BaseClass::operator = (s);
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number> &
+    BlockVector<Number>::operator = (const BlockVector &v)
+    {
+      // we only allow assignment to vectors with the same number of blocks
+      // or to an empty BlockVector
+      Assert (this->n_blocks() == 0 || this->n_blocks() == v.n_blocks(),
+              ExcDimensionMismatch(this->n_blocks(), v.n_blocks()));
+
+      if (this->n_blocks() != v.n_blocks())
+        reinit(v.n_blocks(), true);
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.block(i);
+
+      this->collect_sizes();
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    inline
+    BlockVector<Number> &
+    BlockVector<Number>::operator = (const Vector<Number> &v)
+    {
+      BaseClass::operator = (v);
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    BlockVector<Number> &
+    BlockVector<Number>::operator = (const BlockVector<Number2> &v)
+    {
+      reinit (v, true);
+      BaseClass::operator = (v);
+      return *this;
+    }
+
+
+
+#ifdef DEAL_II_WITH_PETSC
+
+    template <typename Number>
+    inline
+    BlockVector<Number> &
+    BlockVector<Number>::operator = (const PETScWrappers::MPI::BlockVector &petsc_vec)
+    {
+      AssertDimension(this->n_blocks(), petsc_vec.n_blocks());
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->block(i) = petsc_vec.block(i);
+
+      return *this;
+    }
+
+#endif
+
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+    template <typename Number>
+    inline
+    BlockVector<Number> &
+    BlockVector<Number>::operator = (const TrilinosWrappers::MPI::BlockVector &trilinos_vec)
+    {
+      AssertDimension(this->n_blocks(), trilinos_vec.n_blocks());
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->block(i) = trilinos_vec.block(i);
+
+      return *this;
+    }
+
+#endif
+
+
+
+    template <typename Number>
+    inline
+    void
+    BlockVector<Number>::compress (::dealii::VectorOperation::values operation)
+    {
+      // start all requests for all blocks before finishing the transfers as
+      // this saves repeated synchronizations
+      for (unsigned int block=0; block<this->n_blocks(); ++block)
+        this->block(block).compress_start(block*10 + 8273, operation);
+      for (unsigned int block=0; block<this->n_blocks(); ++block)
+        this->block(block).compress_finish(operation);
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    BlockVector<Number>::update_ghost_values () const
+    {
+      for (unsigned int block=0; block<this->n_blocks(); ++block)
+        this->block(block).update_ghost_values_start(block*10 + 9923);
+      for (unsigned int block=0; block<this->n_blocks(); ++block)
+        this->block(block).update_ghost_values_finish();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    BlockVector<Number>::zero_out_ghosts ()
+    {
+      for (unsigned int block=0; block<this->n_blocks(); ++block)
+        this->block(block).zero_out_ghosts();
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    BlockVector<Number>::has_ghost_elements () const
+    {
+      bool has_ghost_elements = false;
+      for (unsigned int block=0; block<this->n_blocks(); ++block)
+        if (this->block(block).has_ghost_elements() == true)
+          has_ghost_elements = true;
+      return has_ghost_elements;
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    BlockVector<Number>::all_zero () const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+
+      // use int instead of bool. in order to make global reduction operations
+      // work also when MPI_Init was not called, only call MPI_Allreduce
+      // commands when there is more than one processor (note that reinit()
+      // functions handle this case correctly through the job_supports_mpi()
+      // query). this is the same in all the functions below
+      int local_result = -1;
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result = std::max(local_result,
+                                -static_cast<int>(this->block(i).all_zero_local()));
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return -Utilities::MPI::max(local_result,
+                                    this->block(0).partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    BlockVector<Number>::is_non_negative () const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+      int local_result = -1;
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result = std::max(local_result,
+                                -static_cast<int>(this->block(i).is_non_negative_local()));
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::max(local_result,
+                                   this->block(0).partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    bool
+    BlockVector<Number>::operator == (const BlockVector<Number2> &v) const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+      AssertDimension (this->n_blocks(), v.n_blocks());
+
+      // MPI does not support bools, so use unsigned int instead. Two vectors
+      // are equal if the check for non-equal fails on all processors
+      unsigned int local_result = 0;
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result = std::max(local_result,
+                                static_cast<unsigned int>(!this->block(i).vectors_equal_local(v.block(i))));
+      unsigned int result =
+        this->block(0).partitioner->n_mpi_processes() > 1
+        ?
+        Utilities::MPI::max(local_result, this->block(0).partitioner->get_communicator())
+        :
+        local_result;
+      return result==0;
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    bool
+    BlockVector<Number>::operator != (const BlockVector<Number2> &v) const
+    {
+      return !(operator == (v));
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    Number
+    BlockVector<Number>::operator * (const BlockVector<Number2> &v) const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+      AssertDimension (this->n_blocks(), v.n_blocks());
+
+      Number local_result = Number();
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result += this->block(i).inner_product_local(v.block(i));
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result,
+                                    this->block(0).partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename BlockVector<Number>::real_type
+    BlockVector<Number>::norm_sqr () const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+
+      real_type local_result = real_type();
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result += this->block(i).norm_sqr_local();
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result,
+                                    this->block(0).partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    BlockVector<Number>::mean_value () const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+
+      Number local_result = Number();
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result += this->block(i).mean_value_local()*(real_type)this->block(i).partitioner->local_size();
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result,
+                                    this->block(0).partitioner->get_communicator())/
+               (real_type)this->size();
+      else
+        return local_result/(real_type)this->size();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename BlockVector<Number>::real_type
+    BlockVector<Number>::l1_norm () const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+
+      real_type local_result = real_type();
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result += this->block(i).l1_norm_local();
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result,
+                                    this->block(0).partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename BlockVector<Number>::real_type
+    BlockVector<Number>::l2_norm () const
+    {
+      return std::sqrt(norm_sqr());
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename BlockVector<Number>::real_type
+    BlockVector<Number>::lp_norm (const real_type p) const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+
+      real_type local_result = real_type();
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result += std::pow(this->block(i).lp_norm_local(p), p);
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return std::pow (Utilities::MPI::sum(local_result,
+                                             this->block(0).partitioner->get_communicator()),
+                         static_cast<real_type>(1.0/p));
+      else
+        return std::pow (local_result, static_cast<real_type>(1.0/p));
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename BlockVector<Number>::real_type
+    BlockVector<Number>::linfty_norm () const
+    {
+      Assert (this->n_blocks() > 0, ExcEmptyObject());
+
+      real_type local_result = real_type();
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result = std::max(local_result, this->block(i).linfty_norm_local());
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::max (local_result,
+                                    this->block(0).partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    BlockVector<Number>::add_and_dot (const Number               a,
+                                      const BlockVector<Number> &V,
+                                      const BlockVector<Number> &W)
+    {
+      Number local_result = Number();
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        local_result += this->block(i).add_and_dot_local(a, V.block(i), W.block(i));
+
+      if (this->block(0).partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result,
+                                    this->block(0).partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    BlockVector<Number>::swap (BlockVector<Number> &v)
+    {
+      Assert (this->n_blocks() == v.n_blocks(),
+              ExcDimensionMismatch(this->n_blocks(), v.n_blocks()));
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        dealii::swap (this->components[i], v.components[i]);
+      dealii::swap (this->block_indices, v.block_indices);
+    }
+
+
+
+    template <typename Number>
+    template <class BlockVector2>
+    void BlockVector<Number>::scale (const BlockVector2 &v)
+    {
+      BaseClass::scale (v);
+    }
+
+#endif // DOXYGEN
+
+  } // end of namespace distributed
+
+} // end of namespace parallel
+
+/**
+ * Global function which overloads the default implementation of the C++
+ * standard library which uses a temporary object. The function simply
+ * exchanges the data of the two vectors.
+ *
+ * @relates BlockVector
+ * @author Katharina Kormann, Martin Kronbichler, 2011
+ */
+template <typename Number>
+inline
+void swap (parallel::distributed::BlockVector<Number> &u,
+           parallel::distributed::BlockVector<Number> &v)
+{
+  u.swap (v);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/parallel_vector.h b/include/deal.II/lac/parallel_vector.h
new file mode 100644
index 0000000..099d02f
--- /dev/null
+++ b/include/deal.II/lac/parallel_vector.h
@@ -0,0 +1,2438 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__parallel_vector_h
+#define dealii__parallel_vector_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/mpi.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/types.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/partitioner.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/vector_view.h>
+
+#include <cstring>
+#include <iomanip>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+#ifdef DEAL_II_WITH_PETSC
+namespace PETScWrappers
+{
+  namespace MPI
+  {
+    class Vector;
+  }
+}
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+namespace TrilinosWrappers
+{
+  namespace MPI
+  {
+    class Vector;
+  }
+}
+#endif
+
+
+namespace parallel
+{
+  namespace distributed
+  {
+    template <typename Number> class BlockVector;
+
+    /*! @addtogroup Vectors
+     *@{
+     */
+
+
+    /**
+     * Implementation of a parallel vector class. The design of this class is
+     * similar to the standard ::dealii::Vector class in deal.II, with the
+     * exception that storage is distributed with MPI.
+     *
+     * The vector is designed for the following scheme of parallel
+     * partitioning:
+     * <ul>
+     * <li> The indices held by individual processes (locally owned part) in
+     * the MPI parallelization form a contiguous range
+     * <code>[my_first_index,my_last_index)</code>.
+     * <li> Ghost indices residing on arbitrary positions of other processors
+     * are allowed. It is in general more efficient if ghost indices are
+     * clustered, since they are stored as a set of intervals. The
+     * communication pattern of the ghost indices is determined when calling
+     * the function <code>reinit (locally_owned, ghost_indices,
+     * communicator)</code>, and retained until the partitioning is changed.
+     * This allows for efficient parallel communication of indices. In
+     * particular, it stores the communication pattern, rather than having to
+     * compute it again for every communication. For more information on ghost
+     * vectors, see also the
+     * @ref GlossGhostedVector "glossary entry on vectors with ghost elements".
+     * <li> Besides the usual global access operator() it is also possible to
+     * access vector entries in the local index space with the function @p
+     * local_element(). Locally owned indices are placed first, [0,
+     * local_size()), and then all ghost indices follow after them
+     * contiguously, [local_size(), local_size()+n_ghost_entries()).
+     * </ul>
+     *
+     * Functions related to parallel functionality:
+     * <ul>
+     * <li> The function <code>compress()</code> goes through the data
+     * associated with ghost indices and communicates it to the owner process,
+     * which can then add it to the correct position. This can be used e.g.
+     * after having run an assembly routine involving ghosts that fill this
+     * vector. Note that the @p insert mode of @p compress() does not set the
+     * elements included in ghost entries but simply discards them, assuming
+     * that the owning processor has set them to the desired value already
+     * (See also the
+     * @ref GlossCompress "glossary entry on compress").
+     * <li> The <code>update_ghost_values()</code> function imports the data
+     * from the owning processor to the ghost indices in order to provide read
+     * access to the data associated with ghosts.
+     * <li> It is possible to split the above functions into two phases, where
+     * the first initiates the communication and the second one finishes it.
+     * These functions can be used to overlap communication with computations
+     * in other parts of the code.
+     * <li> Of course, reduction operations (like norms) make use of
+     * collective all-to-all MPI communications.
+     * </ul>
+     *
+     * This vector can take two different states with respect to ghost
+     * elements:
+     * <ul>
+     * <li> After creation and whenever zero_out_ghosts() is called (or
+     * <code>operator= (0.)</code>), the vector does only allow writing into
+     * ghost elements but not reading from ghost elements.
+     * <li> After a call to update_ghost_values(), the vector does not allow
+     * writing into ghost elements but only reading from them. This is to
+     * avoid undesired ghost data artifacts when calling compress() after
+     * modifying some vector entries. The current status of the ghost entries
+     * (read mode or write mode) can be queried by the method
+     * has_ghost_elements(), which returns <code>true</code> exactly when
+     * ghost elements have been updated and <code>false</code> otherwise,
+     * irrespective of the actual number of ghost entries in the vector layout
+     * (for that information, use n_ghost_entries() instead).
+     * </ul>
+     *
+     * This vector uses the facilities of the class dealii::Vector<Number> for
+     * implementing the operations on the local range of the vector. In
+     * particular, it also inherits thread parallelism that splits most
+     * vector-vector operations into smaller chunks if the program uses
+     * multiple threads. This may or may not be desired when working also with
+     * MPI.
+     *
+     * <h4>Limitations regarding the vector size</h4>
+     *
+     * This vector class is based on two different number types for indexing.
+     * The so-called global index type encodes the overall size of the vector.
+     * Its type is types::global_dof_index. The largest possible value is
+     * <code>2^32-1</code> or approximately 4 billion in case 64 bit integers
+     * are disabled at configuration of deal.II (default case) or
+     * <code>2^64-1</code> or approximately <code>10^19</code> if 64 bit
+     * integers are enabled (see the glossary entry on
+     * @ref GlobalDoFIndex
+     * for further information).
+     *
+     * The second relevant index type is the local index used within one MPI
+     * rank. As opposed to the global index, the implementation assumes 32-bit
+     * unsigned integers unconditionally. In other words, to actually use a
+     * vector with more than four billion entries, you need to use MPI with
+     * more than one rank (which in general is a safe assumption since four
+     * billion entries consume at least 16 GB of memory for floats or 32 GB of
+     * memory for doubles) and enable 64-bit indices. If more than 4 billion
+     * local elements are present, the implementation tries to detect that,
+     * which triggers an exception and aborts the code. Note, however, that
+     * the detection of overflow is tricky and the detection mechanism might
+     * fail in some circumstances. Therefore, it is strongly recommended to
+     * not rely on this class to automatically detect the unsupported case.
+     *
+     * @author Katharina Kormann, Martin Kronbichler, 2010, 2011
+     */
+    template <typename Number>
+    class Vector : public Subscriptor
+    {
+    public:
+      /**
+       * Declare standard types used in all containers. These types parallel
+       * those in the <tt>C++</tt> standard libraries <tt>vector<...></tt>
+       * class.
+       */
+      typedef Number                                            value_type;
+      typedef value_type                                       *pointer;
+      typedef const value_type                                 *const_pointer;
+      typedef value_type                                       *iterator;
+      typedef const value_type                                 *const_iterator;
+      typedef value_type                                       &reference;
+      typedef const value_type                                 &const_reference;
+      typedef types::global_dof_index                           size_type;
+      typedef typename numbers::NumberTraits<Number>::real_type real_type;
+
+      /**
+       * A variable that indicates whether this vector supports distributed
+       * data storage. If true, then this vector also needs an appropriate
+       * compress() function that allows communicating recent set or add
+       * operations to individual elements to be communicated to other
+       * processors.
+       *
+       * For the current class, the variable equals true, since it does
+       * support parallel data storage.
+       */
+      static const bool supports_distributed_data = true;
+
+      /**
+       * @name 1: Basic Object-handling
+       */
+      //@{
+      /**
+       * Empty constructor.
+       */
+      Vector ();
+
+      /**
+       * Copy constructor. Uses the parallel partitioning of @p in_vector.
+       */
+      Vector (const Vector<Number> &in_vector);
+
+      /**
+       * Constructs a parallel vector of the given global size without any
+       * actual parallel distribution.
+       */
+      Vector (const size_type size);
+
+      /**
+       * Constructs a parallel vector. The local range is specified by @p
+       * locally_owned_set (note that this must be a contiguous interval,
+       * multiple intervals are not possible). The IndexSet @p ghost_indices
+       * specifies ghost indices, i.e., indices which one might need to read
+       * data from or accumulate data from. It is allowed that the set of
+       * ghost indices also contains the local range, but it does not need to.
+       *
+       * This function involves global communication, so it should only be
+       * called once for a given layout. Use the constructor with
+       * Vector<Number> argument to create additional vectors with the same
+       * parallel layout.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      Vector (const IndexSet &local_range,
+              const IndexSet &ghost_indices,
+              const MPI_Comm  communicator);
+
+      /**
+       * Same constructor as above but without any ghost indices.
+       */
+      Vector (const IndexSet &local_range,
+              const MPI_Comm  communicator);
+
+      /**
+       * Create the vector based on the parallel partitioning described in @p
+       * partitioner. The input argument is a shared pointer, which store the
+       * partitioner data only once and share it between several vectors with
+       * the same layout.
+       */
+      Vector (const std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> &partitioner);
+
+      /**
+       * Destructor.
+       */
+      ~Vector ();
+
+      /**
+       * Sets the global size of the vector to @p size without any actual
+       * parallel distribution.
+       */
+      void reinit (const size_type size,
+                   const bool      omit_zeroing_entries = false);
+
+      /**
+       * Uses the parallel layout of the input vector @p in_vector and
+       * allocates memory for this vector. Recommended initialization function
+       * when several vectors with the same layout should be created.
+       *
+       * If the flag @p omit_zeroing_entries is set to false, the memory will
+       * be initialized with zero, otherwise the memory will be untouched (and
+       * the user must make sure to fill it with reasonable data before using
+       * it).
+       */
+      template <typename Number2>
+      void reinit(const Vector<Number2> &in_vector,
+                  const bool             omit_zeroing_entries = false);
+
+      /**
+       * Initialize the vector. The local range is specified by @p
+       * locally_owned_set (note that this must be a contiguous interval,
+       * multiple intervals are not possible). The IndexSet @p ghost_indices
+       * specifies ghost indices, i.e., indices which one might need to read
+       * data from or accumulate data from. It is allowed that the set of
+       * ghost indices also contains the local range, but it does not need to.
+       *
+       * This function involves global communication, so it should only be
+       * called once for a given layout. Use the @p reinit function with
+       * Vector<Number> argument to create additional vectors with the same
+       * parallel layout.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      void reinit (const IndexSet &local_range,
+                   const IndexSet &ghost_indices,
+                   const MPI_Comm  communicator);
+
+      /**
+       * Same as above, but without ghost entries.
+       */
+      void reinit (const IndexSet &local_range,
+                   const MPI_Comm  communicator);
+
+      /**
+       * Initialize the vector given to the parallel partitioning described in
+       * @p partitioner. The input argument is a shared pointer, which store
+       * the partitioner data only once and share it between several vectors
+       * with the same layout.
+       */
+      void reinit (const std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> &partitioner);
+
+      /**
+       * Swap the contents of this vector and the other vector @p v. One could
+       * do this operation with a temporary variable and copying over the data
+       * elements, but this function is significantly more efficient since it
+       * only swaps the pointers to the data of the two vectors and therefore
+       * does not need to allocate temporary storage and move data around.
+       *
+       * This function is analog to the the @p swap function of all C++
+       * standard containers. Also, there is a global function
+       * <tt>swap(u,v)</tt> that simply calls <tt>u.swap(v)</tt>, again in
+       * analogy to standard functions.
+       *
+       * This function is virtual in order to allow for derived classes to
+       * handle memory separately.
+       */
+      void swap (Vector<Number> &v);
+
+      /**
+       * Assigns the vector to the parallel partitioning of the input vector
+       * @p in_vector, and copies all the data.
+       *
+       * If one of the input vector or the calling vector (to the left of the
+       * assignment operator) had ghost elements set before this operation,
+       * the calling vector will have ghost values set. Otherwise, it will be
+       * in write mode. If the input vector does not have any ghost elements
+       * at all, the vector will also update its ghost values in analogy to
+       * the respective setting the Trilinos and PETSc vectors.
+       */
+      Vector<Number> &
+      operator = (const Vector<Number> &in_vector);
+
+      /**
+       * Assigns the vector to the parallel partitioning of the input vector
+       * @p in_vector, and copies all the data.
+       *
+       * If one of the input vector or the calling vector (to the left of the
+       * assignment operator) had ghost elements set before this operation,
+       * the calling vector will have ghost values set. Otherwise, it will be
+       * in write mode. If the input vector does not have any ghost elements
+       * at all, the vector will also update its ghost values in analogy to
+       * the respective setting the Trilinos and PETSc vectors.
+       */
+      template <typename Number2>
+      Vector<Number> &
+      operator = (const Vector<Number2> &in_vector);
+
+#ifdef DEAL_II_WITH_PETSC
+      /**
+       * Copy the content of a PETSc vector into the calling vector. This
+       * function assumes that the vectors layouts have already been
+       * initialized to match.
+       *
+       * This operator is only available if deal.II was configured with PETSc.
+       */
+      Vector<Number> &
+      operator = (const PETScWrappers::MPI::Vector &petsc_vec);
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+      /**
+       * Copy the content of a Trilinos vector into the calling vector. This
+       * function assumes that the vectors layouts have already been
+       * initialized to match.
+       *
+       * This operator is only available if deal.II was configured with
+       * Trilinos.
+       */
+      Vector<Number> &
+      operator = (const TrilinosWrappers::MPI::Vector &trilinos_vec);
+#endif
+
+      /**
+       * This method copies the local range from another vector with the same
+       * local range, but possibly different layout of ghost indices.
+       *
+       * This function is deprecated.
+       */
+      void copy_from (const Vector<Number> &in_vector,
+                      const bool            call_update_ghost_values = false) DEAL_II_DEPRECATED;
+
+      /**
+       * Sets all elements of the vector to the scalar @p s. If the scalar is
+       * zero, also ghost elements are set to zero, otherwise they remain
+       * unchanged.
+       */
+      Vector<Number> &operator = (const Number s);
+
+      /**
+       * This function copies the data that has accumulated in the data buffer
+       * for ghost indices to the owning processor. For the meaning of the
+       * argument @p operation, see the entry on
+       * @ref GlossCompress "Compressing distributed vectors and matrices"
+       * in the glossary.
+       *
+       * There are two variants for this function. If called with argument @p
+       * VectorOperation::add adds all the data accumulated in ghost elements
+       * to the respective elements on the owning processor and clears the
+       * ghost array afterwards. If called with argument @p
+       * VectorOperation::insert, a set operation is performed. Since setting
+       * elements in a vector with ghost elements is ambiguous (as one can set
+       * both the element on the ghost site as well as the owning site), this
+       * operation makes the assumption that all data is set correctly on the
+       * owning processor. Upon call of compress(VectorOperation::insert), all
+       * ghost entries are thus simply zeroed out (using zero_ghost_values()).
+       * In debug mode, a check is performed for whether the data set is
+       * actually consistent between processors, i.e., whenever a non-zero
+       * ghost element is found, it is compared to the value on the owning
+       * processor and an exception is thrown if these elements do not agree.
+       */
+      void compress (::dealii::VectorOperation::values operation);
+
+      /**
+       * Fills the data field for ghost indices with the values stored in the
+       * respective positions of the owning processor. This function is needed
+       * before reading from ghosts. The function is @p const even though
+       * ghost data is changed. This is needed to allow functions with a @p
+       * const vector to perform the data exchange without creating
+       * temporaries.
+       *
+       * After calling this method, write access to ghost elements of the
+       * vector is forbidden and an exception is thrown. Only read access to
+       * ghost elements is allowed in this state. Note that all subsequent
+       * operations on this vector, like global vector addition, etc., will
+       * also update the ghost values by a call to this method after the
+       * operation. However, global reduction operations like norms or the
+       * inner product will always ignore ghost elements in order to avoid
+       * counting the ghost data more than once. To allow writing to ghost
+       * elements again, call zero_out_ghosts().
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      void update_ghost_values () const;
+
+      /**
+       * Initiates communication for the @p compress() function with non-
+       * blocking communication. This function does not wait for the transfer
+       * to finish, in order to allow for other computations during the time
+       * it takes until all data arrives.
+       *
+       * Before the data is actually exchanged, the function must be followed
+       * by a call to @p compress_finish().
+       *
+       * In case this function is called for more than one vector before @p
+       * compress_finish() is invoked, it is mandatory to specify a unique
+       * communication channel to each such call, in order to avoid several
+       * messages with the same ID that will corrupt this operation.
+       */
+      void compress_start (const unsigned int communication_channel = 0,
+                           ::dealii::VectorOperation::values operation = VectorOperation::add);
+
+      /**
+       * For all requests that have been initiated in compress_start, wait for
+       * the communication to finish. Once it is finished, add or set the data
+       * (depending on the flag operation) to the respective positions in the
+       * owning processor, and clear the contents in the ghost data fields.
+       * The meaning of this argument is the same as in compress().
+       *
+       * This function should be called exactly once per vector after calling
+       * compress_start, otherwise the result is undefined. In particular, it
+       * is not well-defined to call compress_start on the same vector again
+       * before compress_finished has been called. However, there is no
+       * warning to prevent this situation.
+       *
+       * Must follow a call to the @p compress_start function.
+       */
+      void compress_finish (::dealii::VectorOperation::values operation);
+
+      /**
+       * Initiates communication for the @p update_ghost_values() function
+       * with non-blocking communication. This function does not wait for the
+       * transfer to finish, in order to allow for other computations during
+       * the time it takes until all data arrives.
+       *
+       * Before the data is actually exchanged, the function must be followed
+       * by a call to @p update_ghost_values_finish().
+       *
+       * In case this function is called for more than one vector before @p
+       * update_ghost_values_finish() is invoked, it is mandatory to specify a
+       * unique communication channel to each such call, in order to avoid
+       * several messages with the same ID that will corrupt this operation.
+       */
+      void update_ghost_values_start (const unsigned int communication_channel = 0) const;
+
+
+      /**
+       * For all requests that have been started in update_ghost_values_start,
+       * wait for the communication to finish.
+       *
+       * Must follow a call to the @p update_ghost_values_start function
+       * before reading data from ghost indices.
+       */
+      void update_ghost_values_finish () const;
+
+      /**
+       * This method zeros the entries on ghost dofs, but does not touch
+       * locally owned DoFs.
+       *
+       * After calling this method, read access to ghost elements of the
+       * vector is forbidden and an exception is thrown. Only write access to
+       * ghost elements is allowed in this state.
+       */
+      void zero_out_ghosts ();
+
+      /**
+       * Returns whether the vector currently is in a state where ghost values
+       * can be read or not. This is the same functionality as other parallel
+       * vectors have. If this method returns false, this only means that
+       * read-access to ghost elements is prohibited whereas write access is
+       * still possible (to those entries specified as ghosts during
+       * initialization), not that there are no ghost elements at all.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      bool has_ghost_elements() const;
+
+      /**
+       * Return whether the vector contains only elements with value zero.
+       * This is a collective operation. This function is expensive, because
+       * potentially all elements have to be checked.
+       */
+      bool all_zero () const;
+
+      /**
+       * Return @p true if the vector has no negative entries, i.e. all
+       * entries are zero or positive. This function is used, for example, to
+       * check whether refinement indicators are really all positive (or
+       * zero).
+       *
+       * The function obviously only makes sense if the template argument of
+       * this class is a real type. If it is a complex type, then an exception
+       * is thrown.
+       */
+      bool is_non_negative () const;
+
+      /**
+       * Checks for equality of the two vectors.
+       */
+      template <typename Number2>
+      bool operator == (const Vector<Number2> &v) const;
+
+      /**
+       * Checks for inequality of the two vectors.
+       */
+      template <typename Number2>
+      bool operator != (const Vector<Number2> &v) const;
+
+      /**
+       * Perform the inner product of two vectors.
+       */
+      template <typename Number2>
+      Number operator * (const Vector<Number2> &V) const;
+
+      /**
+       * Computes the square of the l<sub>2</sub> norm of the vector (i.e.,
+       * the sum of the squares of all entries among all processors).
+       */
+      real_type norm_sqr () const;
+
+      /**
+       * Computes the mean value of all the entries in the vector.
+       */
+      Number mean_value () const;
+
+      /**
+       * Returns the l<sub>1</sub> norm of the vector (i.e., the sum of the
+       * absolute values of all entries among all processors).
+       */
+      real_type l1_norm () const;
+
+      /**
+       * Returns the l<sub>2</sub> norm of the vector (i.e., square root of
+       * the sum of the square of all entries among all processors).
+       */
+      real_type l2_norm () const;
+
+      /**
+       * Returns the l<sub>p</sub> norm with real @p p of the vector (i.e.,
+       * the pth root of sum of the pth power of all entries among all
+       * processors).
+       */
+      real_type lp_norm (const real_type p) const;
+
+      /**
+       * Returns the maximum norm of the vector (i.e., maximum absolute value
+       * among all entries among all processors).
+       */
+      real_type linfty_norm () const;
+
+      /**
+       * Performs a combined operation of a vector addition and a subsequent
+       * inner product, returning the value of the inner product. In other
+       * words, the result of this function is the same as if the user called
+       * @code
+       * this->add(a, V);
+       * return_value = *this * W;
+       * @endcode
+       *
+       * The reason this function exists is that this operation involves less
+       * memory transfer than calling the two functions separately. This
+       * method only needs to load three vectors, @p this, @p V, @p W, whereas
+       * calling separate methods means to load the calling vector @p this
+       * twice. Since most vector operations are memory transfer limited, this
+       * reduces the time by 25\% (or 50\% if @p W equals @p this).
+       */
+      Number add_and_dot (const Number          a,
+                          const Vector<Number> &V,
+                          const Vector<Number> &W);
+
+      /**
+       * Returns the global size of the vector, equal to the sum of the number
+       * of locally owned indices among all the processors.
+       */
+      size_type size () const;
+
+      /**
+       * Returns the local size of the vector, i.e., the number of indices
+       * owned locally.
+       */
+      size_type local_size() const;
+
+      /**
+       * Returns the half-open interval that specifies the locally owned range
+       * of the vector. Note that <code>local_size() == local_range().second -
+       * local_range().first</code>.
+       */
+      std::pair<size_type, size_type> local_range () const;
+
+      /**
+       * Returns true if the given global index is in the local range of this
+       * processor.
+       */
+      bool in_local_range (const size_type global_index) const;
+
+      /**
+       * Return an index set that describes which elements of this vector are
+       * owned by the current processor. Note that this index set does not
+       * include elements this vector may store locally as ghost elements but
+       * that are in fact owned by another processor. As a consequence, the
+       * index sets returned on different processors if this is a distributed
+       * vector will form disjoint sets that add up to the complete index set.
+       * Obviously, if a vector is created on only one processor, then the
+       * result would satisfy
+       * @code
+       *   vec.locally_owned_elements() == complete_index_set (vec.size())
+       * @endcode
+       */
+      IndexSet locally_owned_elements () const;
+
+      /**
+       * Returns the number of ghost elements present on the vector.
+       *
+       * This function is deprecated.
+       */
+      size_type n_ghost_entries () const DEAL_II_DEPRECATED;
+
+      /**
+       * Return an index set that describes which elements of this vector are
+       * not owned by the current processor but can be written into or read
+       * from locally (ghost elements).
+       *
+       * This function is deprecated.
+       */
+      const IndexSet &ghost_elements() const DEAL_II_DEPRECATED;
+
+      /**
+       * Returns whether the given global index is a ghost index on the
+       * present processor. Returns false for indices that are owned locally
+       * and for indices not present at all.
+       *
+       * This function is deprecated.
+       */
+      bool is_ghost_entry (const types::global_dof_index global_index) const DEAL_II_DEPRECATED;
+
+      /**
+       * Make the @p Vector class a bit like the <tt>vector<></tt> class of
+       * the C++ standard library by returning iterators to the start and end
+       * of the <i>locally owned</i> elements of this vector.
+       *
+       * It holds that end() - begin() == local_size().
+       */
+      iterator begin ();
+
+      /**
+       * Return constant iterator to the start of the locally owned elements
+       * of the vector.
+       */
+      const_iterator begin () const;
+
+      /**
+       * Return an iterator pointing to the element past the end of the array
+       * of locally owned entries.
+       */
+      iterator end ();
+
+      /**
+       * Return a constant iterator pointing to the element past the end of
+       * the array of the locally owned entries.
+       */
+      const_iterator end () const;
+      //@}
+
+
+      /**
+       * @name 2: Data-Access
+       */
+      //@{
+
+      /**
+       * Read access to the data in the position corresponding to @p
+       * global_index. The index must be either in the local range of the
+       * vector or be specified as a ghost index at construction.
+       *
+       * Performance: <tt>O(1)</tt> for locally owned elements that represent
+       * a contiguous range and <tt>O(log(n<sub>ranges</sub>))</tt> for ghost
+       * elements (quite fast, but slower than local_element()).
+       */
+      Number operator () (const size_type global_index) const;
+
+      /**
+       * Read and write access to the data in the position corresponding to @p
+       * global_index. The index must be either in the local range of the
+       * vector or be specified as a ghost index at construction.
+       *
+       * Performance: <tt>O(1)</tt> for locally owned elements that represent
+       * a contiguous range and <tt>O(log(n<sub>ranges</sub>))</tt> for ghost
+       * elements (quite fast, but slower than local_element()).
+       */
+      Number &operator () (const size_type global_index);
+
+      /**
+       * Read access to the data in the position corresponding to @p
+       * global_index. The index must be either in the local range of the
+       * vector or be specified as a ghost index at construction.
+       *
+       * This function does the same thing as operator().
+       */
+      Number operator [] (const size_type global_index) const;
+
+      /**
+       * Read and write access to the data in the position corresponding to @p
+       * global_index. The index must be either in the local range of the
+       * vector or be specified as a ghost index at construction.
+       *
+       * This function does the same thing as operator().
+       */
+      Number &operator [] (const size_type global_index);
+
+      /**
+       * A collective get operation: instead of getting individual elements of
+       * a vector, this function allows to get a whole set of elements at
+       * once. The indices of the elements to be read are stated in the first
+       * argument, the corresponding values are returned in the second.
+       */
+      template <typename OtherNumber>
+      void extract_subvector_to (const std::vector<size_type> &indices,
+                                 std::vector<OtherNumber> &values) const;
+
+      /**
+       * Just as the above, but with pointers. Useful in minimizing copying of
+       * data around.
+       */
+      template <typename ForwardIterator, typename OutputIterator>
+      void extract_subvector_to (ForwardIterator          indices_begin,
+                                 const ForwardIterator    indices_end,
+                                 OutputIterator           values_begin) const;
+
+      /**
+       * Read access to the data field specified by @p local_index. Locally
+       * owned indices can be accessed with indices
+       * <code>[0,local_size)</code>, and ghost indices with indices
+       * <code>[local_size,local_size+ n_ghost_entries]</code>.
+       *
+       * Performance: Direct array access (fast).
+       */
+      Number local_element (const size_type local_index) const;
+
+      /**
+       * Read and write access to the data field specified by @p local_index.
+       * Locally owned indices can be accessed with indices
+       * <code>[0,local_size)</code>, and ghost indices with indices
+       * <code>[local_size,local_size+n_ghosts]</code>.
+       *
+       * Performance: Direct array access (fast).
+       */
+      Number &local_element (const size_type local_index);
+      //@}
+
+
+      /**
+       * @name 3: Modification of vectors
+       */
+      //@{
+
+      /**
+       * Add the given vector to the present one.
+       */
+      Vector<Number> &operator += (const Vector<Number> &V);
+
+      /**
+       * Subtract the given vector from the present one.
+       */
+      Vector<Number> &operator -= (const Vector<Number> &V);
+
+      /**
+       * A collective add operation: This function adds a whole set of values
+       * stored in @p values to the vector components specified by @p indices.
+       */
+      template <typename OtherNumber>
+      void add (const std::vector<size_type>   &indices,
+                const std::vector<OtherNumber>  &values);
+
+      /**
+       * This is a second collective add operation. As a difference, this
+       * function takes a deal.II vector of values.
+       */
+      template <typename OtherNumber>
+      void add (const std::vector<size_type>        &indices,
+                const ::dealii::Vector<OtherNumber> &values);
+
+      /**
+       * Take an address where <tt>n_elements</tt> are stored contiguously and
+       * add them into the vector. Handles all cases which are not covered by
+       * the other two <tt>add()</tt> functions above.
+       */
+      template <typename OtherNumber>
+      void add (const size_type    n_elements,
+                const size_type   *indices,
+                const OtherNumber  *values);
+
+      /**
+       * Addition of @p s to all components. Note that @p s is a scalar and
+       * not a vector.
+       */
+      void add (const Number s);
+
+      /**
+       * Simple vector addition, equal to the <tt>operator +=</tt>.
+       *
+       * @deprecated Use the <tt>operator +=</tt> instead.
+       */
+      void add (const Vector<Number> &V) DEAL_II_DEPRECATED;
+
+      /**
+       * Simple addition of a multiple of a vector, i.e. <tt>*this +=
+       * a*V</tt>.
+       */
+      void add (const Number a, const Vector<Number> &V);
+
+      /**
+       * Multiple addition of scaled vectors, i.e. <tt>*this += a*V+b*W</tt>.
+       */
+      void add (const Number a, const Vector<Number> &V,
+                const Number b, const Vector<Number> &W);
+
+      /**
+       * Scaling and simple vector addition, i.e.  <tt>*this =
+       * s*(*this)+V</tt>.
+       */
+      void sadd (const Number          s,
+                 const Vector<Number> &V);
+
+      /**
+       * Scaling and simple addition, i.e.  <tt>*this = s*(*this)+a*V</tt>.
+       */
+      void sadd (const Number          s,
+                 const Number          a,
+                 const Vector<Number> &V);
+
+      /**
+       * Scaling and multiple addition.
+       *
+       * This function is deprecated.
+       */
+      void sadd (const Number          s,
+                 const Number          a,
+                 const Vector<Number> &V,
+                 const Number          b,
+                 const Vector<Number> &W) DEAL_II_DEPRECATED;
+
+      /**
+       * Scaling and multiple addition.  <tt>*this = s*(*this)+a*V + b*W +
+       * c*X</tt>.
+       *
+       * This function is deprecated.
+       */
+      void sadd (const Number          s,
+                 const Number          a,
+                 const Vector<Number> &V,
+                 const Number          b,
+                 const Vector<Number> &W,
+                 const Number          c,
+                 const Vector<Number> &X) DEAL_II_DEPRECATED;
+
+      /**
+       * Scale each element of the vector by a constant value.
+       */
+      Vector<Number> &operator *= (const Number factor);
+
+      /**
+       * Scale each element of the vector by the inverse of the given value.
+       */
+      Vector<Number> &operator /= (const Number factor);
+
+      /**
+       * Scale each element of this vector by the corresponding element in the
+       * argument. This function is mostly meant to simulate multiplication
+       * (and immediate re-assignment) by a diagonal scaling matrix.
+       */
+      void scale (const Vector<Number> &scaling_factors);
+
+      /**
+       * Scale each element of this vector by the corresponding element in the
+       * argument. This function is mostly meant to simulate multiplication
+       * (and immediate re-assignment) by a diagonal scaling matrix.
+       */
+      template <typename Number2>
+      void scale (const Vector<Number2> &scaling_factors);
+
+      /**
+       * Assignment <tt>*this = a*u</tt>.
+       */
+      void equ (const Number a, const Vector<Number> &u);
+
+      /**
+       * Assignment <tt>*this = a*u</tt>.
+       */
+      template <typename Number2>
+      void equ (const Number a, const Vector<Number2> &u);
+
+      /**
+       * Assignment <tt>*this = a*u + b*v</tt>.
+       *
+       * This function is deprecated.
+       */
+      void equ (const Number a, const Vector<Number> &u,
+                const Number b, const Vector<Number> &v) DEAL_II_DEPRECATED;
+
+      /**
+       * Assignment <tt>*this = a*u + b*v + b*w</tt>.
+       *
+       * This function is deprecated.
+       */
+      void equ (const Number a, const Vector<Number> &u,
+                const Number b, const Vector<Number> &v,
+                const Number c, const Vector<Number> &w) DEAL_II_DEPRECATED;
+
+      /**
+       * Compute the elementwise ratio of the two given vectors, that is let
+       * <tt>this[i] = a[i]/b[i]</tt>. This is useful for example if you want
+       * to compute the cellwise ratio of true to estimated error.
+       *
+       * This vector is appropriately scaled to hold the result.
+       *
+       * If any of the <tt>b[i]</tt> is zero, the result is undefined. No
+       * attempt is made to catch such situations.
+       */
+      void ratio (const Vector<Number> &a,
+                  const Vector<Number> &b) DEAL_II_DEPRECATED;
+      //@}
+
+
+      /**
+       * @name 4: Mixed stuff
+       */
+      //@{
+      /**
+       * Return a reference to the MPI communicator object in use with this
+       * vector.
+       */
+      const MPI_Comm &get_mpi_communicator () const;
+
+      /**
+       * Return the MPI partitioner that describes the parallel layout of the
+       * vector. This object can be used to initialize another vector with the
+       * respective reinit() call, for additional queries regarding the
+       * parallel communication, or the compatibility of partitioners.
+       */
+      std_cxx11::shared_ptr<const Utilities::MPI::Partitioner>
+      get_partitioner () const;
+
+      /**
+       * Checks whether the given partitioner is compatible with the
+       * partitioner used for this vector. Two partitioners are compatible if
+       * they have the same local size and the same ghost indices. They do not
+       * necessarily need to be the same data field of the shared pointer.
+       * This is a local operation only, i.e., if only some processors decide
+       * that the partitioning is not compatible, only these processors will
+       * return @p false, whereas the other processors will return @p true.
+       */
+      bool
+      partitioners_are_compatible (const Utilities::MPI::Partitioner &part) const;
+
+      /**
+       * Checks whether the given partitioner is compatible with the
+       * partitioner used for this vector. Two partitioners are compatible if
+       * they have the same local size and the same ghost indices. They do not
+       * necessarily need to be the same data field. As opposed to
+       * partitioners_are_compatible(), this method checks for compatibility
+       * among all processors and the method only returns @p true if the
+       * partitioner is the same on all processors.
+       *
+       * This method performs global communication, so make sure to use it
+       * only in a context where all processors call it the same number of
+       * times.
+       */
+      bool
+      partitioners_are_globally_compatible (const Utilities::MPI::Partitioner &part) const;
+
+      /**
+       * Prints the vector to the output stream @p out.
+       */
+      void print (std::ostream       &out,
+                  const unsigned int  precision  = 3,
+                  const bool          scientific = true,
+                  const bool          across     = true) const;
+
+      /**
+       * Returns the memory consumption of this class in bytes.
+       */
+      std::size_t memory_consumption () const;
+      //@}
+
+      /**
+       * Exception
+       */
+      DeclException3 (ExcNonMatchingElements,
+                      double, double, unsigned int,
+                      << "Called compress(VectorOperation::insert), but"
+                      << " the element received from a remote processor, value "
+                      << std::setprecision(16) << arg1
+                      << ", does not match with the value "
+                      << std::setprecision(16) << arg2
+                      << " on the owner processor " << arg3);
+
+      /**
+       * Exception
+       */
+      DeclException4 (ExcAccessToNonLocalElement,
+                      size_type, size_type, size_type, size_type,
+                      << "You tried to access element " << arg1
+                      << " of a distributed vector, but this element is not "
+                      << "stored on the current processor. Note: The range of "
+                      << "locally owned elements is " << arg2 << " to "
+                      << arg3 << ", and there are " << arg4 << " ghost elements "
+                      << "that this vector can access.");
+
+    private:
+      /**
+       * Local part of all_zero().
+       */
+      bool all_zero_local () const;
+
+      /**
+       * Local part of is_non_negative().
+       */
+      bool is_non_negative_local () const;
+
+      /**
+       * Local part of operator==.
+       */
+      template <typename Number2>
+      bool vectors_equal_local (const Vector<Number2> &v) const;
+
+      /**
+       * Local part of the inner product of two vectors.
+       */
+      template <typename Number2>
+      Number inner_product_local (const Vector<Number2> &V) const;
+
+      /**
+       * Local part of norm_sqr().
+       */
+      real_type norm_sqr_local () const;
+
+      /**
+       * Local part of mean_value().
+       */
+      Number mean_value_local () const;
+
+      /**
+       * Local part of l1_norm().
+       */
+      real_type l1_norm_local () const;
+
+      /**
+       * Local part of lp_norm().
+       */
+      real_type lp_norm_local (const real_type p) const;
+
+      /**
+       * Local part of linfty_norm().
+       */
+      real_type linfty_norm_local () const;
+
+      /**
+       * Local part of the addition followed by an inner product of two
+       * vectors.
+       */
+      Number add_and_dot_local (const Number          a,
+                                const Vector<Number> &V,
+                                const Vector<Number> &W);
+
+      /**
+       * Shared pointer to store the parallel partitioning information. This
+       * information can be shared between several vectors that have the same
+       * partitioning.
+       */
+      std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> partitioner;
+
+      /**
+       * The size that is currently allocated in the val array.
+       */
+      size_type allocated_size;
+
+      /**
+       * Pointer to the array of local elements of this vector.
+       */
+      Number         *val;
+
+      /**
+       * Temporary storage that holds the data that is sent to this processor
+       * in @p compress() or sent from this processor in @p
+       * update_ghost_values.
+       */
+      mutable Number *import_data;
+
+      /**
+       * Stores whether the vector currently allows for reading ghost elements
+       * or not. Note that this is to ensure consistent ghost data and does
+       * not indicate whether the vector actually can store ghost elements. In
+       * particular, when assembling a vector we do not allow reading
+       * elements, only writing them.
+       */
+      mutable bool vector_is_ghosted;
+
+      /**
+       * Provide this class with all functionality of ::dealii::Vector by
+       * creating a VectorView object.
+       */
+      VectorView<Number> vector_view;
+
+#ifdef DEAL_II_WITH_MPI
+      /**
+       * A vector that collects all requests from @p compress() operations.
+       * This class uses persistent MPI communicators, i.e., the communication
+       * channels are stored during successive calls to a given function. This
+       * reduces the overhead involved with setting up the MPI machinery, but
+       * it does not remove the need for a receive operation to be posted
+       * before the data can actually be sent.
+       */
+      std::vector<MPI_Request>   compress_requests;
+
+      /**
+       * A vector that collects all requests from @p update_ghost_values()
+       * operations. This class uses persistent MPI communicators.
+       */
+      mutable std::vector<MPI_Request>   update_ghost_values_requests;
+#endif
+
+      /**
+       * A lock that makes sure that the @p compress and @p
+       * update_ghost_values functions give reasonable results also when used
+       * with several threads.
+       */
+      mutable Threads::Mutex mutex;
+
+      /**
+       * A helper function that clears the compress_requests and
+       * update_ghost_values_requests field. Used in reinit functions.
+       */
+      void clear_mpi_requests ();
+
+      /**
+       * A helper function that is used to resize the val array.
+       */
+      void resize_val (const size_type new_allocated_size);
+
+      /*
+       * Make all other vector types friends.
+       */
+      template <typename Number2> friend class Vector;
+
+      /**
+       * Make BlockVector type friends.
+       */
+      template <typename Number2> friend class BlockVector;
+    };
+
+    /*@}*/
+
+
+    /*----------------------- Inline functions ----------------------------------*/
+
+#ifndef DOXYGEN
+
+    template <typename Number>
+    inline
+    Vector<Number>::Vector ()
+      :
+      partitioner (new Utilities::MPI::Partitioner()),
+      allocated_size (0),
+      val (0),
+      import_data (0),
+      vector_is_ghosted (false),
+      vector_view (0, static_cast<Number *>(0))
+    {}
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number>::Vector (const Vector<Number> &v)
+      :
+      Subscriptor(),
+      allocated_size (0),
+      val (0),
+      import_data (0),
+      vector_is_ghosted (false),
+      vector_view (0, static_cast<Number *>(0))
+    {
+      reinit (v, true);
+      vector_view = v.vector_view;
+      zero_out_ghosts();
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number>::Vector (const IndexSet &local_range,
+                            const IndexSet &ghost_indices,
+                            const MPI_Comm  communicator)
+      :
+      allocated_size (0),
+      val (0),
+      import_data (0),
+      vector_is_ghosted (false),
+      vector_view (0, static_cast<Number *>(0))
+    {
+      reinit (local_range, ghost_indices, communicator);
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number>::Vector (const IndexSet &local_range,
+                            const MPI_Comm  communicator)
+      :
+      allocated_size (0),
+      val (0),
+      import_data (0),
+      vector_is_ghosted (false),
+      vector_view (0, static_cast<Number *>(0))
+    {
+      reinit (local_range, communicator);
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number>::Vector (const size_type size)
+      :
+      allocated_size (0),
+      val (0),
+      import_data (0),
+      vector_is_ghosted (false),
+      vector_view (0, static_cast<Number *>(0))
+    {
+      reinit (size, false);
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number>::
+    Vector (const std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> &partitioner)
+      :
+      allocated_size (0),
+      val (0),
+      import_data (0),
+      vector_is_ghosted (false),
+      vector_view (0, static_cast<Number *>(0))
+    {
+      reinit (partitioner);
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number>::~Vector ()
+    {
+      resize_val(0);
+
+      if (import_data != 0)
+        delete[] import_data;
+      import_data = 0;
+
+      clear_mpi_requests();
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number> &
+    Vector<Number>::operator = (const Vector<Number> &c)
+    {
+#ifdef _MSC_VER
+      return this->operator=<Number>(c);
+#else
+      return this->template operator=<Number>(c);
+#endif
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    Vector<Number> &
+    Vector<Number>::operator = (const Vector<Number2> &c)
+    {
+      Assert (c.partitioner.get() != 0, ExcNotInitialized());
+
+      // we update ghost values whenever one of the input or output vector
+      // already held ghost values or when we import data from a vector with
+      // the same local range but different ghost layout
+      bool must_update_ghost_values = c.vector_is_ghosted;
+
+      // check whether the two vectors use the same parallel partitioner. if
+      // not, check if all local ranges are the same (that way, we can
+      // exchange data between different parallel layouts). One variant which
+      // is included here and necessary for compatibility with the other
+      // distributed vector classes (Trilinos, PETSc) is the case when vector
+      // c does not have any ghosts (constructed without ghost elements given)
+      // but the current vector does: In that case, we need to exchange data
+      // also when none of the two vector had updated its ghost values before.
+      if (partitioner.get() == 0)
+        reinit (c, true);
+      else if (partitioner.get() != c.partitioner.get())
+        {
+          // local ranges are also the same if both partitioners are empty
+          // (even if they happen to define the empty range as [0,0) or [c,c)
+          // for some c!=0 in a different way).
+          int local_ranges_are_identical =
+            (local_range() == c.local_range() ||
+             (local_range().second == local_range().first &&
+              c.local_range().second == c.local_range().first));
+          if ((c.partitioner->n_mpi_processes() > 1 &&
+               Utilities::MPI::min(local_ranges_are_identical,
+                                   c.partitioner->get_communicator()) == 0)
+              ||
+              !local_ranges_are_identical)
+            reinit (c, true);
+          else
+            must_update_ghost_values |= vector_is_ghosted;
+
+          must_update_ghost_values |=
+            (c.partitioner->ghost_indices_initialized() == false &&
+             partitioner->ghost_indices_initialized() == true);
+        }
+      else
+        must_update_ghost_values |= vector_is_ghosted;
+
+      // Need to explicitly downcast to dealii::Vector to make templated
+      // operator= available.
+      AssertDimension(vector_view.size(), c.vector_view.size());
+      static_cast<dealii::Vector<Number> &>(vector_view) = c.vector_view;
+
+      if (must_update_ghost_values)
+        update_ghost_values();
+      else
+        zero_out_ghosts();
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::compress (::dealii::VectorOperation::values operation)
+    {
+      compress_start (0, operation);
+      compress_finish(operation);
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::update_ghost_values () const
+    {
+      update_ghost_values_start ();
+      update_ghost_values_finish ();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::zero_out_ghosts ()
+    {
+      std::fill_n (&val[partitioner->local_size()],
+                   partitioner->n_ghost_indices(),
+                   Number());
+      vector_is_ghosted = false;
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::has_ghost_elements () const
+    {
+      return vector_is_ghosted;
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::all_zero_local () const
+    {
+      return partitioner->local_size()>0 ? vector_view.all_zero () : true;
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::all_zero () const
+    {
+      // use int instead of bool. in order to make global reduction operations
+      // work also when MPI_Init was not called, only call MPI_Allreduce
+      // commands when there is more than one processor (note that reinit()
+      // functions handle this case correctly through the job_supports_mpi()
+      // query). this is the same in all the functions below
+      int local_result = -static_cast<int>(all_zero_local());
+      if (partitioner->n_mpi_processes() > 1)
+        return -Utilities::MPI::max(local_result,
+                                    partitioner->get_communicator());
+      else
+        return -local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::is_non_negative_local () const
+    {
+      return partitioner->local_size()>0 ? vector_view.is_non_negative () : true;
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::is_non_negative () const
+    {
+      int local_result = -static_cast<int>(is_non_negative_local());
+      if (partitioner->n_mpi_processes() > 1)
+        return -Utilities::MPI::max(local_result,
+                                    partitioner->get_communicator());
+      else
+        return -local_result;
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    bool
+    Vector<Number>::vectors_equal_local (const Vector<Number2> &v) const
+    {
+      return partitioner->local_size()>0 ?
+             vector_view.template operator == <Number2>(v.vector_view)
+             : true;
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    bool
+    Vector<Number>::operator == (const Vector<Number2> &v) const
+    {
+      // MPI does not support bools, so use unsigned int instead. Two vectors
+      // are equal if the check for non-equal fails on all processors
+      unsigned int local_result = static_cast<int>(!vectors_equal_local(v));
+      unsigned int result =
+        partitioner->n_mpi_processes() > 1
+        ?
+        Utilities::MPI::max(local_result, partitioner->get_communicator())
+        :
+        local_result;
+      return result==0;
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    bool
+    Vector<Number>::operator != (const Vector<Number2> &v) const
+    {
+      return !(operator == (v));
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    Number
+    Vector<Number>::inner_product_local(const Vector<Number2> &V) const
+    {
+      // on some processors, the size might be zero, which is not allowed by
+      // the dealii::Vector class. Therefore, insert a check here
+      return (partitioner->local_size()>0 ?
+              vector_view.operator* (V.vector_view)
+              : Number());
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    Number
+    Vector<Number>::operator * (const Vector<Number2> &V) const
+    {
+      Number local_result = inner_product_local(V);
+      if (partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result,
+                                    partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::norm_sqr_local () const
+    {
+      return partitioner->local_size()>0 ? vector_view.norm_sqr() : real_type();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::norm_sqr () const
+    {
+      real_type local_result = norm_sqr_local();
+      if (partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum(local_result,
+                                   partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    Vector<Number>::mean_value_local () const
+    {
+      Assert (partitioner->size()!=0, ExcEmptyObject());
+      return (partitioner->local_size() ?
+              vector_view.mean_value()
+              : Number());
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    Vector<Number>::mean_value () const
+    {
+      Number local_result = mean_value_local();
+      if (partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result *
+                                    (real_type)partitioner->local_size(),
+                                    partitioner->get_communicator())
+               /(real_type)partitioner->size();
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::l1_norm_local () const
+    {
+      return partitioner->local_size() ? vector_view.l1_norm() : real_type();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::l1_norm () const
+    {
+      real_type local_result = l1_norm_local();
+      if (partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum(local_result,
+                                   partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::l2_norm () const
+    {
+      return std::sqrt(norm_sqr());
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::lp_norm_local (const real_type p) const
+    {
+      return partitioner->local_size() ? vector_view.lp_norm(p) : real_type();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::lp_norm (const real_type p) const
+    {
+      const real_type local_result = lp_norm_local(p);
+      if (partitioner->n_mpi_processes() > 1)
+        return std::pow (Utilities::MPI::sum(std::pow(local_result,p),
+                                             partitioner->get_communicator()),
+                         static_cast<real_type>(1.0/p));
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::linfty_norm_local () const
+    {
+      return partitioner->local_size() ? vector_view.linfty_norm() : real_type();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::real_type
+    Vector<Number>::linfty_norm () const
+    {
+      const real_type local_result = linfty_norm_local();
+      if (partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::max (local_result,
+                                    partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    Vector<Number>::add_and_dot_local(const Number          a,
+                                      const Vector<Number> &V,
+                                      const Vector<Number> &W)
+    {
+      // on some processors, the size might be zero, which is not allowed by
+      // the dealii::Vector class. Therefore, insert a check here
+      return (partitioner->local_size()>0 ?
+              vector_view.add_and_dot(a, V.vector_view, W.vector_view)
+              : Number());
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    Vector<Number>::add_and_dot (const Number          a,
+                                 const Vector<Number> &V,
+                                 const Vector<Number> &W)
+    {
+      Number local_result = add_and_dot_local(a, V, W);
+      if (partitioner->n_mpi_processes() > 1)
+        return Utilities::MPI::sum (local_result,
+                                    partitioner->get_communicator());
+      else
+        return local_result;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::size_type
+    Vector<Number>::size () const
+    {
+      return partitioner->size();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::size_type
+    Vector<Number>::local_size () const
+    {
+      return partitioner->local_size();
+    }
+
+
+
+    template <typename Number>
+    inline
+    std::pair<typename Vector<Number>::size_type,
+        typename Vector<Number>::size_type>
+        Vector<Number>::local_range () const
+    {
+      return partitioner->local_range();
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::in_local_range
+    (const size_type global_index) const
+    {
+      return partitioner->in_local_range (global_index);
+    }
+
+
+
+    template <typename Number>
+    inline
+    IndexSet
+    Vector<Number>::locally_owned_elements() const
+    {
+      IndexSet is (size());
+
+      is.add_range (local_range().first, local_range().second);
+
+      return is;
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::size_type
+    Vector<Number>::n_ghost_entries () const
+    {
+      return partitioner->n_ghost_indices();
+    }
+
+
+
+    template <typename Number>
+    inline
+    const IndexSet &
+    Vector<Number>::ghost_elements() const
+    {
+      return partitioner->ghost_indices();
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::is_ghost_entry (const size_type global_index) const
+    {
+      return partitioner->is_ghost_entry (global_index);
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::iterator
+    Vector<Number>::begin ()
+    {
+      return vector_view.begin();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::const_iterator
+    Vector<Number>::begin () const
+    {
+      return vector_view.begin();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::iterator
+    Vector<Number>::end ()
+    {
+      return vector_view.end();
+    }
+
+
+
+    template <typename Number>
+    inline
+    typename Vector<Number>::const_iterator
+    Vector<Number>::end () const
+    {
+      return vector_view.end();
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    Vector<Number>::operator() (const size_type global_index) const
+    {
+      Assert (in_local_range (global_index) ||
+              partitioner->ghost_indices().is_element(global_index),
+              ExcAccessToNonLocalElement(global_index, local_range().first,
+                                         local_range().second,
+                                         partitioner->ghost_indices().n_elements()));
+      // do not allow reading a vector which is not in ghost mode
+      Assert (in_local_range (global_index) || vector_is_ghosted == true,
+              ExcMessage("You tried to read a ghost element of this vector, "
+                         "but it has not imported its ghost values."));
+      return val[partitioner->global_to_local(global_index)];
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number &
+    Vector<Number>::operator() (const size_type global_index)
+    {
+      Assert (in_local_range (global_index) ||
+              partitioner->ghost_indices().is_element(global_index),
+              ExcAccessToNonLocalElement(global_index, local_range().first,
+                                         local_range().second,
+                                         partitioner->ghost_indices().n_elements()));
+      // we would like to prevent reading ghosts from a vector that does not
+      // have them imported, but this is not possible because we might be in a
+      // part of the code where the vector has enabled ghosts but is non-const
+      // (then, the compiler picks this method according to the C++ rule book
+      // even if a human would pick the const method when this subsequent use
+      // is just a read)
+      return val[partitioner->global_to_local (global_index)];
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    Vector<Number>::operator[] (const size_type global_index) const
+    {
+      return operator()(global_index);
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number &
+    Vector<Number>::operator[] (const size_type global_index)
+    {
+      return operator()(global_index);
+    }
+
+
+
+    template <typename Number>
+    template <typename OtherNumber>
+    inline
+    void Vector<Number>::extract_subvector_to (const std::vector<size_type> &indices,
+                                               std::vector<OtherNumber> &values) const
+    {
+      for (size_type i = 0; i < indices.size(); ++i)
+        values[i] = operator()(indices[i]);
+    }
+
+
+
+    template <typename Number>
+    template <typename ForwardIterator, typename OutputIterator>
+    inline
+    void Vector<Number>::extract_subvector_to (ForwardIterator          indices_begin,
+                                               const ForwardIterator    indices_end,
+                                               OutputIterator           values_begin) const
+    {
+      while (indices_begin != indices_end)
+        {
+          *values_begin = operator()(*indices_begin);
+          indices_begin++;
+          values_begin++;
+        }
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number
+    Vector<Number>::local_element (const size_type local_index) const
+    {
+      AssertIndexRange (local_index,
+                        partitioner->local_size()+
+                        partitioner->n_ghost_indices());
+      // do not allow reading a vector which is not in ghost mode
+      Assert (local_index < local_size() || vector_is_ghosted == true,
+              ExcMessage("You tried to read a ghost element of this vector, "
+                         "but it has not imported its ghost values."));
+      return val[local_index];
+    }
+
+
+
+    template <typename Number>
+    inline
+    Number &
+    Vector<Number>::local_element (const size_type local_index)
+    {
+      AssertIndexRange (local_index,
+                        partitioner->local_size()+
+                        partitioner->n_ghost_indices());
+      return val[local_index];
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number> &
+    Vector<Number>::operator = (const Number s)
+    {
+      // if we call Vector::operator=0, we want to zero out all the entries
+      // plus ghosts.
+      if (partitioner->local_size() > 0)
+        vector_view.dealii::template Vector<Number>::operator= (s);
+      if (s==Number())
+        zero_out_ghosts();
+
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number> &
+    Vector<Number>::operator += (const Vector<Number> &v)
+    {
+      AssertDimension (local_size(), v.local_size());
+
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size()>0)
+        vector_view += v.vector_view;
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number> &
+    Vector<Number>::operator -= (const Vector<Number> &v)
+    {
+      AssertDimension (local_size(), v.local_size());
+
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size()>0)
+        vector_view -= v.vector_view;
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    template <typename OtherNumber>
+    inline
+    void
+    Vector<Number>::add (const std::vector<size_type> &indices,
+                         const std::vector<OtherNumber>  &values)
+    {
+      AssertDimension (indices.size(), values.size());
+      add (indices.size(), &indices[0], &values[0]);
+    }
+
+
+
+    template <typename Number>
+    template <typename OtherNumber>
+    inline
+    void
+    Vector<Number>::add (const std::vector<size_type>    &indices,
+                         const ::dealii::Vector<OtherNumber> &values)
+    {
+      AssertDimension (indices.size(), values.size());
+      add (indices.size(), &indices[0], values.begin());
+    }
+
+
+
+    template <typename Number>
+    template <typename OtherNumber>
+    inline
+    void
+    Vector<Number>::add (const size_type    n_indices,
+                         const size_type   *indices,
+                         const OtherNumber *values)
+    {
+      for (size_type i=0; i<n_indices; ++i)
+        {
+          Assert (numbers::is_finite(values[i]),
+                  ExcMessage("The given value is not finite but either infinite or Not A Number (NaN)"));
+          this->operator()(indices[i]) += values[i];
+        }
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::add (const Number a)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.add (a);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::add (const Vector<Number> &v)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.add (v.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::add (const Number a,
+                         const Vector<Number> &v)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.add (a, v.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::add (const Number a,
+                         const Vector<Number> &v,
+                         const Number b,
+                         const Vector<Number> &w)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.add (a, v.vector_view, b, w.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::sadd (const Number x,
+                          const Vector<Number> &v)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.sadd (x, v.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::sadd (const Number x,
+                          const Number a,
+                          const Vector<Number> &v)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.sadd (x, a, v.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::sadd (const Number x,
+                          const Number a,
+                          const Vector<Number> &v,
+                          const Number b,
+                          const Vector<Number> &w)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.sadd (x, a, v.vector_view, b, w.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::sadd (const Number s,
+                          const Number a,
+                          const Vector<Number> &v,
+                          const Number b,
+                          const Vector<Number> &w,
+                          const Number c,
+                          const Vector<Number> &x)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.sadd (s, a, v.vector_view, b, w.vector_view,
+                          c, x.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number> &
+    Vector<Number>::operator *= (const Number factor)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view *= factor;
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    inline
+    Vector<Number> &
+    Vector<Number>::operator /= (const Number factor)
+    {
+      operator *= (1./factor);
+      return *this;
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::scale (const Vector<Number> &scaling_factors)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.scale (scaling_factors.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    void
+    Vector<Number>::scale (const Vector<Number2> &scaling_factors)
+    {
+      if (local_size())
+        vector_view.template scale<Number2> (scaling_factors.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::equ (const Number a,
+                         const Vector<Number> &v)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.equ (a, v.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    inline
+    void
+    Vector<Number>::equ (const Number a,
+                         const Vector<Number2> &v)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.equ (a, v.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::equ (const Number a,
+                         const Vector<Number> &v,
+                         const Number b,
+                         const Vector<Number> &w)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.equ (a, v.vector_view, b, w.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::equ (const Number a,
+                         const Vector<Number> &v,
+                         const Number b,
+                         const Vector<Number> &w,
+                         const Number c,
+                         const Vector<Number> &x)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.equ (a, v.vector_view, b, w.vector_view,
+                         c, x.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    void
+    Vector<Number>::ratio (const Vector<Number> &a,
+                           const Vector<Number> &b)
+    {
+      // dealii::Vector does not allow empty fields but this might happen on
+      // some processors for parallel implementation
+      if (local_size())
+        vector_view.ratio (a.vector_view, b.vector_view);
+
+      if (vector_is_ghosted)
+        update_ghost_values();
+    }
+
+
+
+    template <typename Number>
+    inline
+    const MPI_Comm &
+    Vector<Number>::get_mpi_communicator() const
+    {
+      return partitioner->get_communicator();
+    }
+
+
+
+    template <typename Number>
+    inline
+    std_cxx11::shared_ptr<const Utilities::MPI::Partitioner>
+    Vector<Number>::get_partitioner () const
+    {
+      return partitioner;
+    }
+
+#endif  // ifndef DOXYGEN
+
+  } // end of namespace distributed
+
+} // end of namespace parallel
+
+
+
+/**
+ * Global function @p swap which overloads the default implementation of the
+ * C++ standard library which uses a temporary object. The function simply
+ * exchanges the data of the two vectors.
+ *
+ * @relates Vector
+ * @author Katharina Kormann, Martin Kronbichler, 2011
+ */
+template <typename Number>
+inline
+void swap (parallel::distributed::Vector<Number> &u,
+           parallel::distributed::Vector<Number> &v)
+{
+  u.swap (v);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/parallel_vector.templates.h b/include/deal.II/lac/parallel_vector.templates.h
new file mode 100644
index 0000000..a3d585f
--- /dev/null
+++ b/include/deal.II/lac/parallel_vector.templates.h
@@ -0,0 +1,810 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__parallel_vector_templates_h
+#define dealii__parallel_vector_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/vector_view.h>
+
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace parallel
+{
+  namespace distributed
+  {
+
+    template <typename Number>
+    void
+    Vector<Number>::clear_mpi_requests ()
+    {
+#ifdef DEAL_II_WITH_MPI
+      for (size_type j=0; j<compress_requests.size(); j++)
+        MPI_Request_free(&compress_requests[j]);
+      compress_requests.clear();
+      for (size_type j=0; j<update_ghost_values_requests.size(); j++)
+        MPI_Request_free(&update_ghost_values_requests[j]);
+      update_ghost_values_requests.clear();
+#endif
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::resize_val (const size_type new_alloc_size)
+    {
+      if (new_alloc_size > allocated_size)
+        {
+          Assert (((allocated_size > 0 && val != 0) ||
+                   val == 0), ExcInternalError());
+          if (val != 0)
+            free(val);
+
+          Utilities::System::posix_memalign ((void **)&val, 64, sizeof(Number)*new_alloc_size);
+
+          allocated_size = new_alloc_size;
+        }
+      else if (new_alloc_size == 0)
+        {
+          if (val != 0)
+            free(val);
+          val = 0;
+          allocated_size = 0;
+        }
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::reinit (const size_type size,
+                            const bool      omit_zeroing_entries)
+    {
+      clear_mpi_requests();
+      // check whether we need to reallocate
+      resize_val (size);
+
+      // reset vector view
+      vector_view.reinit (size, val);
+
+      // delete previous content in import data
+      if (import_data != 0)
+        delete[] import_data;
+      import_data = 0;
+
+      // set partitioner to serial version
+      partitioner.reset (new Utilities::MPI::Partitioner (size));
+
+      // set entries to zero if so requested
+      if (omit_zeroing_entries == false)
+        this->operator = (Number());
+
+      vector_is_ghosted = false;
+    }
+
+
+
+    template <typename Number>
+    template <typename Number2>
+    void
+    Vector<Number>::reinit (const Vector<Number2> &v,
+                            const bool             omit_zeroing_entries)
+    {
+      clear_mpi_requests();
+      Assert (v.partitioner.get() != 0, ExcNotInitialized());
+
+      // check whether the partitioners are
+      // different (check only if the are allocated
+      // differently, not if the actual data is
+      // different)
+      if (partitioner.get() != v.partitioner.get())
+        {
+          partitioner = v.partitioner;
+          const size_type new_allocated_size = partitioner->local_size() +
+                                               partitioner->n_ghost_indices();
+          resize_val (new_allocated_size);
+          vector_view.reinit (partitioner->local_size(), val);
+        }
+      else
+        Assert (vector_view.size() == partitioner->local_size(),
+                ExcInternalError());
+
+      if (omit_zeroing_entries == false)
+        this->operator= (Number());
+
+      if (import_data != 0)
+        {
+          delete [] import_data;
+
+          // do not reallocate import_data directly, but only upon request. It
+          // is only used as temporary storage for compress() and
+          // update_ghost_values, and we might have vectors where we never
+          // call these methods and hence do not need to have the storage.
+          import_data = 0;
+        }
+
+      vector_is_ghosted = false;
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::reinit (const IndexSet &locally_owned_indices,
+                            const IndexSet &ghost_indices,
+                            const MPI_Comm  communicator)
+    {
+      // set up parallel partitioner with index sets and communicator
+      std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> new_partitioner
+      (new Utilities::MPI::Partitioner (locally_owned_indices,
+                                        ghost_indices, communicator));
+      reinit (new_partitioner);
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::reinit (const IndexSet &locally_owned_indices,
+                            const MPI_Comm  communicator)
+    {
+      // set up parallel partitioner with index sets and communicator
+      std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> new_partitioner
+      (new Utilities::MPI::Partitioner (locally_owned_indices,
+                                        communicator));
+      reinit (new_partitioner);
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::reinit (const std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> &partitioner_in)
+    {
+      clear_mpi_requests();
+      partitioner = partitioner_in;
+
+      // set vector size and allocate memory
+      const size_type new_allocated_size = partitioner->local_size() +
+                                           partitioner->n_ghost_indices();
+      resize_val (new_allocated_size);
+      vector_view.reinit (partitioner->local_size(), val);
+
+      // initialize to zero
+      this->operator= (Number());
+
+      if (import_data != 0)
+        {
+          delete [] import_data;
+
+          // do not reallocate import_data directly, but only upon request. It
+          // is only used as temporary storage for compress() and
+          // update_ghost_values, and we might have vectors where we never
+          // call these methods and hence do not need to have the storage.
+          import_data = 0;
+        }
+
+      vector_is_ghosted = false;
+    }
+
+
+
+#ifdef DEAL_II_WITH_PETSC
+
+    namespace internal
+    {
+      template <typename PETSC_Number, typename Number>
+      void copy_petsc_vector (const PETSC_Number *petsc_start_ptr,
+                              const PETSC_Number *petsc_end_ptr,
+                              Number *ptr)
+      {
+        std::copy(petsc_start_ptr, petsc_end_ptr, ptr);
+      }
+
+      template <typename PETSC_Number, typename Number>
+      void copy_petsc_vector (const std::complex<PETSC_Number> *petsc_start_ptr,
+                              const std::complex<PETSC_Number> *petsc_end_ptr,
+                              std::complex<Number> *ptr)
+      {
+        std::copy(petsc_start_ptr, petsc_end_ptr, ptr);
+      }
+
+      template <typename PETSC_Number, typename Number>
+      void copy_petsc_vector (const std::complex<PETSC_Number> *petsc_start_ptr,
+                              const std::complex<PETSC_Number> *petsc_end_ptr,
+                              Number *ptr)
+      {
+        AssertThrow(false, ExcMessage("Tried to copy complex -> real"));
+      }
+    }
+
+    template <typename Number>
+    Vector<Number> &
+    Vector<Number>::operator = (const PETScWrappers::MPI::Vector &petsc_vec)
+    {
+      Assert(petsc_vec.locally_owned_elements() == locally_owned_elements(),
+             StandardExceptions::ExcInvalidState());
+
+      // get a representation of the vector and copy it
+      PetscScalar *start_ptr;
+      int ierr = VecGetArray (static_cast<const Vec &>(petsc_vec), &start_ptr);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      const size_type vec_size = local_size();
+      internal::copy_petsc_vector (start_ptr, start_ptr + vec_size, begin());
+
+      // restore the representation of the vector
+      ierr = VecRestoreArray (static_cast<const Vec &>(petsc_vec), &start_ptr);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      // spread ghost values between processes?
+      if (vector_is_ghosted || petsc_vec.has_ghost_elements())
+        update_ghost_values();
+
+      // return a pointer to this object per normal c++ operator overloading
+      // semantics
+      return *this;
+    }
+
+#endif
+
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+    template <typename Number>
+    Vector<Number> &
+    Vector<Number>::operator = (const TrilinosWrappers::MPI::Vector &trilinos_vec)
+    {
+      if (trilinos_vec.has_ghost_elements() == false)
+        {
+          Assert(trilinos_vec.locally_owned_elements() == locally_owned_elements(),
+                 StandardExceptions::ExcInvalidState());
+        }
+      else
+        // ghosted trilinos vector must contain the local range of this vector
+        // which is contiguous
+        {
+          Assert((trilinos_vec.locally_owned_elements() & locally_owned_elements())
+                 == locally_owned_elements(),
+                 StandardExceptions::ExcInvalidState());
+        }
+
+      // create on trilinos data
+      const std::size_t start_index =
+        trilinos_vec.vector_partitioner().NumMyElements() > 0 ?
+        trilinos_vec.vector_partitioner().
+        LID(static_cast<TrilinosWrappers::types::int_type>(this->local_range().first)) : 0;
+      const VectorView<double> in_view (local_size(), trilinos_vec.begin()+start_index);
+      static_cast<dealii::Vector<Number>&>(vector_view) =
+        static_cast<const dealii::Vector<double>&>(in_view);
+
+      // spread ghost values between processes?
+      if (vector_is_ghosted || trilinos_vec.has_ghost_elements())
+        update_ghost_values();
+
+      // return a pointer to this object per normal c++ operator overloading
+      // semantics
+      return *this;
+    }
+
+#endif
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::copy_from (const Vector<Number> &c,
+                               const bool            call_update_ghost_values)
+    {
+      AssertDimension (local_range().first, c.local_range().first);
+      AssertDimension (local_range().second, c.local_range().second);
+      AssertDimension (vector_view.size(), c.vector_view.size());
+      vector_view = c.vector_view;
+      if (call_update_ghost_values == true)
+        update_ghost_values();
+      else
+        vector_is_ghosted = false;
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::compress_start (const unsigned int counter,
+                                    ::dealii::VectorOperation::values operation)
+    {
+      (void)counter;
+      (void)operation;
+      Assert (vector_is_ghosted == false,
+              ExcMessage ("Cannot call compress() on a ghosted vector"));
+
+#ifdef DEAL_II_WITH_MPI
+      // nothing to do for insert (only need to zero ghost entries in
+      // compress_finish()). in debug mode we want to check consistency
+      // of the inserted data, therefore the communication is still
+      // initialized. Having different code in debug and optimized mode is
+      // somewhat dangerous, but it really saves communication so it seems
+      // still worthwhile.
+#ifndef DEBUG
+      if (operation == VectorOperation::insert)
+        return;
+#endif
+
+      const Utilities::MPI::Partitioner &part = *partitioner;
+
+      // nothing to do when we neither have import
+      // nor ghost indices.
+      if (part.n_ghost_indices()==0 && part.n_import_indices()==0)
+        return;
+
+      // make this function thread safe
+      Threads::Mutex::ScopedLock lock (mutex);
+
+      const unsigned int n_import_targets = part.import_targets().size();
+      const unsigned int n_ghost_targets  = part.ghost_targets().size();
+
+      // Need to send and receive the data. Use non-blocking communication,
+      // where it is generally less overhead to first initiate the receive and
+      // then actually send the data
+      if (compress_requests.size() == 0)
+        {
+          // set channels in different range from update_ghost_values channels
+          const unsigned int channel = counter + 400;
+          unsigned int current_index_start = 0;
+          compress_requests.resize (n_import_targets + n_ghost_targets);
+
+          // allocate import_data in case it is not set up yet
+          if (import_data == 0)
+            import_data = new Number[part.n_import_indices()];
+          for (unsigned int i=0; i<n_import_targets; i++)
+            {
+              AssertThrow (static_cast<size_type>(part.import_targets()[i].second)*
+                           sizeof(Number) <
+                           static_cast<size_type>(std::numeric_limits<int>::max()),
+                           ExcMessage("Index overflow: Maximum message size in MPI is 2GB. "
+                                      "The number of ghost entries times the size of 'Number' "
+                                      "exceeds this value. This is not supported."));
+              MPI_Recv_init (&import_data[current_index_start],
+                             part.import_targets()[i].second*sizeof(Number),
+                             MPI_BYTE,
+                             part.import_targets()[i].first,
+                             part.import_targets()[i].first +
+                             part.n_mpi_processes()*channel,
+                             part.get_communicator(),
+                             &compress_requests[i]);
+              current_index_start += part.import_targets()[i].second;
+            }
+          AssertDimension(current_index_start, part.n_import_indices());
+
+          Assert (part.local_size() == vector_view.size(), ExcInternalError());
+          current_index_start = part.local_size();
+          for (unsigned int i=0; i<n_ghost_targets; i++)
+            {
+              AssertThrow (static_cast<size_type>(part.ghost_targets()[i].second)*
+                           sizeof(Number) <
+                           static_cast<size_type>(std::numeric_limits<int>::max()),
+                           ExcMessage("Index overflow: Maximum message size in MPI is 2GB. "
+                                      "The number of ghost entries times the size of 'Number' "
+                                      "exceeds this value. This is not supported."));
+              MPI_Send_init (&this->val[current_index_start],
+                             part.ghost_targets()[i].second*sizeof(Number),
+                             MPI_BYTE,
+                             part.ghost_targets()[i].first,
+                             part.this_mpi_process() +
+                             part.n_mpi_processes()*channel,
+                             part.get_communicator(),
+                             &compress_requests[n_import_targets+i]);
+              current_index_start += part.ghost_targets()[i].second;
+            }
+          AssertDimension (current_index_start,
+                           part.local_size()+part.n_ghost_indices());
+        }
+
+      AssertDimension(n_import_targets + n_ghost_targets,
+                      compress_requests.size());
+      if (compress_requests.size() > 0)
+        {
+          int ierr = MPI_Startall(compress_requests.size(),&compress_requests[0]);
+          (void)ierr;
+          Assert (ierr == MPI_SUCCESS, ExcInternalError());
+        }
+#endif
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::compress_finish (::dealii::VectorOperation::values operation)
+    {
+#ifdef DEAL_II_WITH_MPI
+
+      // in optimized mode, no communication was started, so leave the
+      // function directly (and only clear ghosts)
+#ifndef DEBUG
+      if (operation == VectorOperation::insert)
+        {
+          zero_out_ghosts();
+          return;
+        }
+#endif
+
+      const Utilities::MPI::Partitioner &part = *partitioner;
+
+      // nothing to do when we neither have import nor ghost indices.
+      if (part.n_ghost_indices()==0 && part.n_import_indices()==0)
+        return;
+
+      // make this function thread safe
+      Threads::Mutex::ScopedLock lock (mutex);
+
+      const unsigned int n_import_targets = part.import_targets().size();
+      const unsigned int n_ghost_targets  = part.ghost_targets().size();
+
+      if (operation != dealii::VectorOperation::insert)
+        AssertDimension (n_ghost_targets+n_import_targets,
+                         compress_requests.size());
+
+      // first wait for the receive to complete
+      if (compress_requests.size() > 0 && n_import_targets > 0)
+        {
+          int ierr = MPI_Waitall (n_import_targets, &compress_requests[0],
+                                  MPI_STATUSES_IGNORE);
+          (void)ierr;
+          Assert (ierr == MPI_SUCCESS, ExcInternalError());
+
+          Number *read_position = import_data;
+          std::vector<std::pair<unsigned int, unsigned int> >::const_iterator
+          my_imports = part.import_indices().begin();
+
+          // If the operation is no insertion, add the imported data to the
+          // local values. For insert, nothing is done here (but in debug mode
+          // we assert that the specified value is either zero or matches with
+          // the ones already present
+          if (operation != dealii::VectorOperation::insert)
+            for ( ; my_imports!=part.import_indices().end(); ++my_imports)
+              for (unsigned int j=my_imports->first; j<my_imports->second; j++)
+                local_element(j) += *read_position++;
+          else
+            for ( ; my_imports!=part.import_indices().end(); ++my_imports)
+              for (unsigned int j=my_imports->first; j<my_imports->second;
+                   j++, read_position++)
+                Assert(*read_position == 0. ||
+                       std::abs(local_element(j) - *read_position) <=
+                       std::abs(local_element(j)) * 1000. *
+                       std::numeric_limits<Number>::epsilon(),
+                       ExcNonMatchingElements(*read_position, local_element(j),
+                                              part.this_mpi_process()));
+          AssertDimension(read_position-import_data,part.n_import_indices());
+        }
+
+      if (compress_requests.size() > 0 && n_ghost_targets > 0)
+        {
+          int ierr = MPI_Waitall (n_ghost_targets,
+                                  &compress_requests[n_import_targets],
+                                  MPI_STATUSES_IGNORE);
+          (void)ierr;
+          Assert (ierr == MPI_SUCCESS, ExcInternalError());
+        }
+      else
+        AssertDimension (part.n_ghost_indices(), 0);
+
+      zero_out_ghosts ();
+#else
+      (void)operation;
+#endif
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::update_ghost_values_start (const unsigned int counter) const
+    {
+#ifdef DEAL_II_WITH_MPI
+      const Utilities::MPI::Partitioner &part = *partitioner;
+
+      // nothing to do when we neither have import nor ghost indices.
+      if (part.n_ghost_indices()==0 && part.n_import_indices()==0)
+        return;
+
+      // make this function thread safe
+      Threads::Mutex::ScopedLock lock (mutex);
+
+      const unsigned int n_import_targets = part.import_targets().size();
+      const unsigned int n_ghost_targets = part.ghost_targets().size();
+
+      // Need to send and receive the data. Use non-blocking communication,
+      // where it is generally less overhead to first initiate the receive and
+      // then actually send the data
+      if (update_ghost_values_requests.size() == 0)
+        {
+          Assert (part.local_size() == vector_view.size(),
+                  ExcInternalError());
+          size_type current_index_start = part.local_size();
+          update_ghost_values_requests.resize (n_import_targets+n_ghost_targets);
+          for (unsigned int i=0; i<n_ghost_targets; i++)
+            {
+              // allow writing into ghost indices even though we are in a
+              // const function
+              MPI_Recv_init (const_cast<Number *>(&val[current_index_start]),
+                             part.ghost_targets()[i].second*sizeof(Number),
+                             MPI_BYTE,
+                             part.ghost_targets()[i].first,
+                             part.ghost_targets()[i].first +
+                             counter*part.n_mpi_processes(),
+                             part.get_communicator(),
+                             &update_ghost_values_requests[i]);
+              current_index_start += part.ghost_targets()[i].second;
+            }
+          AssertDimension (current_index_start,
+                           part.local_size()+part.n_ghost_indices());
+
+          // allocate import_data in case it is not set up yet
+          if (import_data == 0 && part.n_import_indices() > 0)
+            import_data = new Number[part.n_import_indices()];
+          current_index_start = 0;
+          for (unsigned int i=0; i<n_import_targets; i++)
+            {
+              MPI_Send_init (&import_data[current_index_start],
+                             part.import_targets()[i].second*sizeof(Number),
+                             MPI_BYTE, part.import_targets()[i].first,
+                             part.this_mpi_process() +
+                             part.n_mpi_processes()*counter,
+                             part.get_communicator(),
+                             &update_ghost_values_requests[n_ghost_targets+i]);
+              current_index_start += part.import_targets()[i].second;
+            }
+          AssertDimension (current_index_start, part.n_import_indices());
+        }
+
+      // copy the data that is actually to be send to the import_data field
+      if (part.n_import_indices() > 0)
+        {
+          Assert (import_data != 0, ExcInternalError());
+          Number *write_position = import_data;
+          std::vector<std::pair<unsigned int, unsigned int> >::const_iterator
+          my_imports = part.import_indices().begin();
+          for ( ; my_imports!=part.import_indices().end(); ++my_imports)
+            for (unsigned int j=my_imports->first; j<my_imports->second; j++)
+              *write_position++ = local_element(j);
+        }
+
+      AssertDimension (n_import_targets+n_ghost_targets,
+                       update_ghost_values_requests.size());
+      if (update_ghost_values_requests.size() > 0)
+        {
+          int ierr = MPI_Startall(update_ghost_values_requests.size(),
+                                  &update_ghost_values_requests[0]);
+          (void)ierr;
+          Assert (ierr == MPI_SUCCESS, ExcInternalError());
+        }
+#else
+      (void)counter;
+#endif
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::update_ghost_values_finish () const
+    {
+#ifdef DEAL_II_WITH_MPI
+      // wait for both sends and receives to complete, even though only
+      // receives are really necessary. this gives (much) better performance
+      AssertDimension (partitioner->ghost_targets().size() +
+                       partitioner->import_targets().size(),
+                       update_ghost_values_requests.size());
+      if (update_ghost_values_requests.size() > 0)
+        {
+          // make this function thread safe
+          Threads::Mutex::ScopedLock lock (mutex);
+
+          int ierr = MPI_Waitall (update_ghost_values_requests.size(),
+                                  &update_ghost_values_requests[0],
+                                  MPI_STATUSES_IGNORE);
+          (void)ierr;
+          Assert (ierr == MPI_SUCCESS, ExcInternalError());
+        }
+#endif
+      vector_is_ghosted = true;
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::swap (Vector<Number> &v)
+    {
+#ifdef DEAL_II_WITH_MPI
+
+#ifdef DEBUG
+      if (Utilities::MPI::job_supports_mpi())
+        {
+          // make sure that there are not outstanding requests from updating
+          // ghost values or compress
+          int flag = 1;
+          if (update_ghost_values_requests.size()>0)
+            {
+              int ierr = MPI_Testall (update_ghost_values_requests.size(),
+                                      &update_ghost_values_requests[0],
+                                      &flag, MPI_STATUSES_IGNORE);
+              Assert (ierr == MPI_SUCCESS, ExcInternalError());
+              Assert (flag == 1,
+                      ExcMessage("MPI found unfinished update_ghost_values() requests"
+                                 "when calling swap, which is not allowed"));
+            }
+          if (compress_requests.size()>0)
+            {
+              int ierr = MPI_Testall (compress_requests.size(), &compress_requests[0],
+                                      &flag, MPI_STATUSES_IGNORE);
+              Assert (ierr == MPI_SUCCESS, ExcInternalError());
+              Assert (flag == 1,
+                      ExcMessage("MPI found unfinished compress() requests "
+                                 "when calling swap, which is not allowed"));
+            }
+        }
+#endif
+
+      std::swap (compress_requests, v.compress_requests);
+      std::swap (update_ghost_values_requests, v.update_ghost_values_requests);
+#endif
+
+      std::swap (partitioner,       v.partitioner);
+      std::swap (allocated_size,    v.allocated_size);
+      std::swap (val,               v.val);
+      std::swap (import_data,       v.import_data);
+      std::swap (vector_is_ghosted, v.vector_is_ghosted);
+
+      // vector view cannot be swapped so reset it manually (without touching
+      // the vector elements)
+      vector_view.reinit (partitioner->local_size(), val);
+      v.vector_view.reinit (v.partitioner->local_size(), v.val);
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::partitioners_are_compatible
+    (const Utilities::MPI::Partitioner &part) const
+    {
+      return partitioner->is_compatible (part);
+    }
+
+
+
+    template <typename Number>
+    inline
+    bool
+    Vector<Number>::partitioners_are_globally_compatible
+    (const Utilities::MPI::Partitioner &part) const
+    {
+      return partitioner->is_globally_compatible (part);
+    }
+
+
+
+    template <typename Number>
+    std::size_t
+    Vector<Number>::memory_consumption () const
+    {
+      std::size_t memory = sizeof(*this);
+      memory += sizeof (Number) * static_cast<std::size_t>(allocated_size);
+
+      // if the partitioner is shared between more processors, just count a
+      // fraction of that memory, since we're not actually using more memory
+      // for it.
+      if (partitioner.use_count() > 0)
+        memory += partitioner->memory_consumption()/partitioner.use_count()+1;
+      if (import_data != 0)
+        memory += (static_cast<std::size_t>(partitioner->n_import_indices())*
+                   sizeof(Number));
+      return memory;
+    }
+
+
+
+    template <typename Number>
+    void
+    Vector<Number>::print (std::ostream      &out,
+                           const unsigned int precision,
+                           const bool         scientific,
+                           const bool         across) const
+    {
+      Assert (partitioner.get() !=0, ExcInternalError());
+      AssertThrow (out, ExcIO());
+      std::ios::fmtflags old_flags = out.flags();
+      unsigned int old_precision = out.precision (precision);
+
+      out.precision (precision);
+      if (scientific)
+        out.setf (std::ios::scientific, std::ios::floatfield);
+      else
+        out.setf (std::ios::fixed, std::ios::floatfield);
+
+      // to make the vector write out all the information in order, use as
+      // many barriers as there are processors and start writing when it's our
+      // turn
+#ifdef DEAL_II_WITH_MPI
+      if (partitioner->n_mpi_processes() > 1)
+        for (unsigned int i=0; i<partitioner->this_mpi_process(); i++)
+          MPI_Barrier (partitioner->get_communicator());
+#endif
+
+      out << "Process #" << partitioner->this_mpi_process() << std::endl
+          << "Local range: [" << partitioner->local_range().first << ", "
+          << partitioner->local_range().second << "), global size: "
+          << partitioner->size() << std::endl
+          << "Vector data:" << std::endl;
+      if (across)
+        for (size_type i=0; i<partitioner->local_size(); ++i)
+          out << local_element(i) << ' ';
+      else
+        for (size_type i=0; i<partitioner->local_size(); ++i)
+          out << local_element(i) << std::endl;
+      out << std::endl;
+
+      if (vector_is_ghosted)
+        {
+          out << "Ghost entries (global index / value):" << std::endl;
+          if (across)
+            for (size_type i=0; i<partitioner->n_ghost_indices(); ++i)
+              out << '(' << partitioner->ghost_indices().nth_index_in_set(i)
+                  << '/' << local_element(partitioner->local_size()+i) << ") ";
+          else
+            for (size_type i=0; i<partitioner->n_ghost_indices(); ++i)
+              out << '(' << partitioner->ghost_indices().nth_index_in_set(i)
+                  << '/' << local_element(partitioner->local_size()+i) << ")"
+                  << std::endl;
+          out << std::endl;
+        }
+      out << std::flush;
+
+#ifdef DEAL_II_WITH_MPI
+      if (partitioner->n_mpi_processes() > 1)
+        {
+          MPI_Barrier (partitioner->get_communicator());
+
+          for (unsigned int i=partitioner->this_mpi_process()+1;
+               i<partitioner->n_mpi_processes(); i++)
+            MPI_Barrier (partitioner->get_communicator());
+        }
+#endif
+
+      AssertThrow (out, ExcIO());
+      // reset output format
+      out.flags (old_flags);
+      out.precision(old_precision);
+    }
+
+  } // end of namespace distributed
+
+} // end of namespace parallel
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/parpack_solver.h b/include/deal.II/lac/parpack_solver.h
new file mode 100644
index 0000000..935fe0e
--- /dev/null
+++ b/include/deal.II/lac/parpack_solver.h
@@ -0,0 +1,870 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__parpack_solver_h
+#define dealii__parpack_solver_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/base/index_set.h>
+
+#include <cstring>
+
+
+#ifdef DEAL_II_ARPACK_WITH_PARPACK
+
+DEAL_II_NAMESPACE_OPEN
+
+extern "C" {
+
+  // http://www.mathkeisan.com/usersguide/man/pdnaupd.html
+  void pdnaupd_(MPI_Fint *comm, int *ido, char *bmat, int *n, char *which,
+                int *nev, double *tol, double *resid, int *ncv,
+                double *v, int *nloc, int *iparam, int *ipntr,
+                double *workd, double *workl, int *lworkl,
+                int *info);
+
+  // http://www.mathkeisan.com/usersguide/man/pdsaupd.html
+  void pdsaupd_(MPI_Fint *comm, int *ido, char *bmat, int *n, char *which,
+                int *nev, double *tol, double *resid, int *ncv,
+                double *v, int *nloc, int *iparam, int *ipntr,
+                double *workd, double *workl, int *lworkl,
+                int *info);
+
+  // http://www.mathkeisan.com/usersguide/man/pdneupd.html
+  void pdneupd_(MPI_Fint *comm, int *rvec, char *howmany, int *select, double *d,
+                double *di, double *z, int *ldz, double *sigmar,
+                double *sigmai, double *workev, char *bmat, int *n, char *which,
+                int *nev, double *tol, double *resid, int *ncv,
+                double *v, int *nloc, int *iparam, int *ipntr,
+                double *workd, double *workl, int *lworkl, int *info);
+
+  // http://www.mathkeisan.com/usersguide/man/pdseupd.html
+  void pdseupd_(MPI_Fint *comm, int *rvec, char *howmany, int *select, double *d,
+                double *z, int *ldz, double *sigmar,
+                char *bmat, int *n, char *which,
+                int *nev, double *tol, double *resid, int *ncv,
+                double *v, int *nloc, int *iparam, int *ipntr,
+                double *workd, double *workl, int *lworkl, int *info);
+
+  // other resources:
+  //    http://acts.nersc.gov/superlu/example5/pnslac.c.html
+  //    https://github.com/phpisciuneri/tijo/blob/master/dvr_parpack.cpp
+
+}
+
+/**
+ * Interface for using PARPACK. PARPACK is a collection of Fortran77
+ * subroutines designed to solve large scale eigenvalue problems. Here we
+ * interface to the routines <code>pdneupd</code>, <code>pdseupd</code>,
+ * <code>pdnaupd</code>, <code>pdsaupd</code> of PARPACK.  The package is
+ * designed to compute a few eigenvalues and corresponding eigenvectors of a
+ * general n by n matrix A. It is most appropriate for large sparse matrices
+ * A.
+ *
+ * In this class we make use of the method applied to the generalized
+ * eigenspectrum problem $(A-\lambda B)x=0$, for $x\neq0$; where $A$ is a
+ * system matrix, $B$ is a mass matrix, and $\lambda, x$ are a set of
+ * eigenvalues and eigenvectors respectively.
+ *
+ * The ArpackSolver can be used in application codes in the following way:
+ * @code
+ *   SolverControl solver_control (1000, 1e-9);
+ *   const unsigned int num_arnoldi_vectors = 2*size_of_spectrum + 2;
+ *   PArpackSolver<V>::AdditionalData
+ *     additional_data(num_arnoldi_vectors,
+ *                     dealii::PArpackSolver<V>::largest_magnitude,
+ *                     true);
+ *
+ *    PArpackSolver<V> eigensolver (solver_control,
+ *                                  mpi_communicator,
+ *                                  additional_data);
+ *    eigensolver.set_shift(sigma);
+ *    eigensolver.reinit(locally_owned_dofs);
+ *    eigensolver.solve (A,
+ *                       B,
+ *                       OP,
+ *                       lambda,
+ *                       x,
+ *                       size_of_spectrum);
+ * @endcode
+ * for the generalized eigenvalue problem $Ax=B\lambda x$, where the variable
+ * <code>size_of_spectrum</code> tells PARPACK the number of
+ * eigenvector/eigenvalue pairs to solve for. Here, <code>lambda</code> is a
+ * vector that will contain the eigenvalues computed, <code>x</code> a vector
+ * of objects of type <code>V</code> that will contain the eigenvectors
+ * computed. <code>OP</code> is an inverse operation for the matrix <code>A -
+ * sigma * B</code>, where <code> sigma </code> is a shift value, set to zero
+ * by default.
+ *
+ * Through the AdditionalData the user can specify some of the parameters to
+ * be set.
+ *
+ * The class is intended to be used with MPI and can work on arbitrary vector
+ * and matrix distributed classes.  Both symmetric and non-symmetric
+ * <code>A</code> are supported.
+ *
+ * For further information on how the PARPACK routines <code>pdneupd</code>,
+ * <code>pdseupd</code>, <code>pdnaupd</code>, <code>pdsaupd</code> work and
+ * also how to set the parameters appropriately please take a look into the
+ * PARPACK manual.
+ *
+ * @author Denis Davydov, 2015.
+ */
+template <typename VectorType>
+class PArpackSolver : public Subscriptor
+{
+public:
+  /**
+   * Declare the type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * An enum that lists the possible choices for which eigenvalues to compute
+   * in the solve() function.
+   *
+   * A particular choice is limited based on symmetric or non-symmetric matrix
+   * <code>A</code> considered.
+   */
+  enum WhichEigenvalues
+  {
+    algebraically_largest,
+    algebraically_smallest,
+    largest_magnitude,
+    smallest_magnitude,
+    largest_real_part,
+    smallest_real_part,
+    largest_imaginary_part,
+    smallest_imaginary_part,
+    both_ends
+  };
+
+  /**
+   * Auxiliary class to represent <code>A-sigma*B</code> operator.
+   */
+  template <typename MatrixType>
+  class Shift : public dealii::Subscriptor
+  {
+  public:
+
+    /**
+     * Constructor.
+     */
+    Shift (const MatrixType &A,
+           const MatrixType &B,
+           const double      sigma)
+      :
+      A(A),
+      B(B),
+      sigma(sigma)
+    {}
+
+    /**
+     * Apply <code>A-sigma * B</code>
+     */
+    void vmult (VectorType &dst, const VectorType &src) const
+    {
+      B.vmult(dst,src);
+      dst *= (-sigma);
+      A.vmult_add(dst,src);
+    }
+
+    /**
+     * Apply <code>A^T-sigma * B^T</code>
+     */
+    void Tvmult (VectorType &dst, const VectorType &src) const
+    {
+      B.Tvmult(dst,src);
+      dst *= (-sigma);
+      A.Tvmult_add(dst,src);
+    }
+
+  private:
+    const MatrixType &A;
+    const MatrixType &B;
+    const double sigma;
+  };
+
+  /**
+   * Standardized data struct to pipe additional data to the solver, should it
+   * be needed.
+   */
+  struct AdditionalData
+  {
+    const unsigned int number_of_arnoldi_vectors;
+    const WhichEigenvalues eigenvalue_of_interest;
+    const bool symmetric;
+    AdditionalData(
+      const unsigned int number_of_arnoldi_vectors = 15,
+      const WhichEigenvalues eigenvalue_of_interest = largest_magnitude,
+      const bool symmetric = false);
+  };
+
+  /**
+   * Access to the object that controls convergence.
+   */
+  SolverControl &control () const;
+
+  /**
+   * Constructor.
+   */
+  PArpackSolver(SolverControl &control,
+                const MPI_Comm &mpi_communicator,
+                const AdditionalData &data = AdditionalData());
+
+  /**
+   * Initialise internal variables.
+   */
+  void reinit(const dealii::IndexSet &locally_owned_dofs );
+
+  /**
+   * Set desired shift value.
+   */
+  void set_shift(const double s );
+
+  /**
+   * Solve the generalized eigensprectrum problem $A x=\lambda B x$ by calling
+   * the <code>pd(n/s)eupd</code> and <code>pd(n/s)aupd</code> functions of
+   * PARPACK.
+   */
+  template <typename MatrixType1,
+            typename MatrixType2, typename INVERSE>
+  void solve
+  (const MatrixType1                  &A,
+   const MatrixType2                  &B,
+   const INVERSE                      &inverse,
+   std::vector<std::complex<double> > &eigenvalues,
+   std::vector<VectorType>            &eigenvectors,
+   const unsigned int                  n_eigenvalues);
+
+  std::size_t memory_consumption() const;
+
+protected:
+
+  /**
+   * Reference to the object that controls convergence of the iterative
+   * solver.
+   */
+  SolverControl &solver_control;
+
+  /**
+   * Store a copy of the flags for this particular solver.
+   */
+  const AdditionalData additional_data;
+
+  // keep MPI communicator non-const as Arpack functions are not const either:
+
+  /**
+   * C++ MPI communicator.
+   */
+  MPI_Comm mpi_communicator;
+
+  /**
+   * Fortran MPI communicator.
+   */
+  MPI_Fint mpi_communicator_fortran;
+
+  // C++98 guarantees that the elements of a vector are stored contiguously
+
+  /**
+   * Length of the work array workl.
+   */
+  int lworkl;
+
+  /**
+   * Double precision  work array of length lworkl
+   */
+  std::vector<double> workl;
+
+  /**
+   * Double precision  work array of length 3*N
+   */
+  std::vector<double> workd;
+
+  /**
+   * Number of local degrees of freedom.
+   */
+  int nloc;
+
+  /**
+   * Number of Arnoldi basis vectors specified in additional_data
+   */
+  int ncv;
+
+
+  /**
+   * The leading dimension of the array v
+   */
+  int ldv;
+
+  /**
+   * Double precision vector of size ldv by NCV.  Will contains the final set
+   * of Arnoldi basis vectors.
+   */
+  std::vector<double> v;
+
+  /**
+   * The initial residual vector, possibly from a previous run.  On output, it
+   * contains the final residual vector.
+   */
+  std::vector<double> resid;
+
+  /**
+   * The leading dimension of the array Z equal to nloc.
+   */
+  int ldz;
+
+  /**
+   * A vector of minimum size of nloc by NEV+1.  Z contains the B-orthonormal
+   * Ritz vectors of the eigensystem A*z = lambda*B*z corresponding to the
+   * Ritz value approximations.
+   */
+  std::vector<double> z;
+
+  /**
+   * The size of the workev array.
+   */
+  int lworkev;
+
+  /**
+   * Double precision  work array of dimension 3* NCV.
+   */
+  std::vector<double> workev;
+
+  /**
+   * A vector of dimension NCV.
+   */
+  std::vector<int> select;
+
+  /**
+   * Temporary vectors used between Arpack and deal.II
+   */
+  VectorType src,dst,tmp;
+
+  /**
+   * Indices of local degrees of freedom.
+   */
+  std::vector< types::global_dof_index > local_indices;
+
+  /**
+   * The shift value to be applied during solution
+   */
+  double shift_value;
+
+private:
+
+  /**
+   * PArpackExcInfoPdnaupds.
+   */
+  DeclException2 (PArpackExcConvergedEigenvectors, int, int,
+                  << arg1 << "eigenpairs were requested, but only"
+                  << arg2 << " converged");
+
+  DeclException2 (PArpackExcInvalidNumberofEigenvalues, int, int,
+                  << "Number of wanted eigenvalues " << arg1
+                  << " is larger that the size of the matrix " << arg2);
+
+  DeclException2 (PArpackExcInvalidEigenvectorSize, int, int,
+                  << "Number of wanted eigenvalues " << arg1
+                  << " is larger that the size of eigenvectors " << arg2);
+
+  DeclException2 (PArpackExcInvalidEigenvalueSize, int, int,
+                  << "Number of wanted eigenvalues " << arg1
+                  << " is larger that the size of eigenvalues " << arg2);
+
+  DeclException2 (PArpackExcInvalidNumberofArnoldiVectors, int, int,
+                  << "Number of Arnoldi vectors " << arg1
+                  << " is larger that the size of the matrix " << arg2);
+
+  DeclException2 (PArpackExcSmallNumberofArnoldiVectors, int, int,
+                  << "Number of Arnoldi vectors " << arg1
+                  << " is too small to obtain " << arg2
+                  << " eigenvalues");
+
+  DeclException1 (PArpackExcIdo, int, << "This ido " << arg1
+                  << " is not supported. Check documentation of ARPACK");
+
+  DeclException1 (PArpackExcMode, int, << "This mode " << arg1
+                  << " is not supported. Check documentation of ARPACK");
+
+  DeclException1 (PArpackExcInfoPdnaupd, int,
+                  << "Error with Pdnaupd, info " << arg1
+                  << ". Check documentation of ARPACK");
+
+  DeclException1 (PArpackExcInfoPdneupd, int,
+                  << "Error with Pdneupd, info " << arg1
+                  << ". Check documentation of ARPACK");
+
+  DeclException1 (PArpackExcInfoMaxIt, int,
+                  << "Maximum number " << arg1
+                  << " of iterations reached.");
+
+  DeclException1 (PArpackExcNoShifts, int,
+                  << "No shifts could be applied during implicit"
+                  << " Arnoldi update, try increasing the number of"
+                  << " Arnoldi vectors.");
+};
+
+template <typename VectorType>
+std::size_t
+PArpackSolver<VectorType>::memory_consumption() const
+{
+  return  MemoryConsumption::memory_consumption (double()) *
+          (workl.size()  +
+           workd.size()  +
+           v.size()      +
+           resid.size()  +
+           z.size()      +
+           workev.size()  )       +
+          src.memory_consumption() +
+          dst.memory_consumption() +
+          tmp.memory_consumption() +
+          MemoryConsumption::memory_consumption (types::global_dof_index()) * local_indices.size();
+}
+
+template <typename VectorType>
+PArpackSolver<VectorType>::AdditionalData::
+AdditionalData (const unsigned int     number_of_arnoldi_vectors,
+                const WhichEigenvalues eigenvalue_of_interest,
+                const bool             symmetric)
+  :
+  number_of_arnoldi_vectors(number_of_arnoldi_vectors),
+  eigenvalue_of_interest(eigenvalue_of_interest),
+  symmetric(symmetric)
+{}
+
+template <typename VectorType>
+PArpackSolver<VectorType>::PArpackSolver (SolverControl        &control,
+                                          const MPI_Comm       &mpi_communicator,
+                                          const AdditionalData &data)
+  :
+  solver_control (control),
+  additional_data (data),
+  mpi_communicator( mpi_communicator ),
+  mpi_communicator_fortran ( MPI_Comm_c2f( mpi_communicator ) ),
+  shift_value(0.0)
+
+{}
+
+template <typename VectorType>
+void PArpackSolver<VectorType>::set_shift(const double s )
+{
+  shift_value = s;
+}
+
+template <typename VectorType>
+void PArpackSolver<VectorType>::reinit(const dealii::IndexSet &locally_owned_dofs)
+{
+  // store local indices to write to vectors
+  locally_owned_dofs.fill_index_vector(local_indices);
+
+  // scalars
+  nloc = locally_owned_dofs.n_elements ();
+  ncv  = additional_data.number_of_arnoldi_vectors;
+
+  Assert (local_indices.size() == nloc, ExcInternalError() );
+
+  // vectors
+  ldv = nloc;
+  v.resize (ldv*ncv, 0.0);
+
+  // TODO: add optional input for resid
+  resid.resize(nloc, 1.0);
+
+  // work arrays for ARPACK
+  workd.resize(3*nloc,0.0);
+
+  lworkl = additional_data.symmetric ?
+           ncv*ncv + 8*ncv
+           :
+           3*ncv*ncv+6*ncv;
+  workl.resize (lworkl, 0.);
+
+  ldz = nloc;
+  z.resize (ldz*ncv, 0.); // TODO we actually need only ldz*nev
+
+  // WORKEV  Double precision  work array of dimension 3*NCV.
+  lworkev = additional_data.symmetric ?
+            0 /*not used in symmetric case*/
+            :
+            3*ncv;
+  workev.resize (lworkev, 0.);
+
+  select.resize (ncv, 0);
+
+  // deal.II vectors:
+  src.reinit (locally_owned_dofs,mpi_communicator);
+  dst.reinit (locally_owned_dofs,mpi_communicator);
+  tmp.reinit (locally_owned_dofs,mpi_communicator);
+
+}
+
+template <typename VectorType>
+template <typename MatrixType1,typename MatrixType2, typename INVERSE>
+void PArpackSolver<VectorType>::solve
+(const MatrixType1                  &/*system_matrix*/,
+ const MatrixType2                  &mass_matrix,
+ const INVERSE                      &inverse,
+ std::vector<std::complex<double> > &eigenvalues,
+ std::vector<VectorType>            &eigenvectors,
+ const unsigned int                  n_eigenvalues)
+{
+
+  Assert (n_eigenvalues <= eigenvectors.size(),
+          PArpackExcInvalidEigenvectorSize(n_eigenvalues, eigenvectors.size()));
+
+  Assert (n_eigenvalues <= eigenvalues.size(),
+          PArpackExcInvalidEigenvalueSize(n_eigenvalues, eigenvalues.size()));
+
+
+  Assert (n_eigenvalues < mass_matrix.m(),
+          PArpackExcInvalidNumberofEigenvalues(n_eigenvalues, mass_matrix.m()));
+
+  Assert (additional_data.number_of_arnoldi_vectors < mass_matrix.m(),
+          PArpackExcInvalidNumberofArnoldiVectors(
+            additional_data.number_of_arnoldi_vectors, mass_matrix.m()));
+
+  Assert (additional_data.number_of_arnoldi_vectors > 2*n_eigenvalues+1,
+          PArpackExcSmallNumberofArnoldiVectors(
+            additional_data.number_of_arnoldi_vectors, n_eigenvalues));
+  // ARPACK mode for dnaupd, here only
+  //  Mode 3:  K*x = lambda*M*x, K symmetric, M symmetric positive semi-definite
+  //c           ===> OP = (inv[K - sigma*M])*M  and  B = M.
+  //c           ===> Shift-and-Invert mode
+  int mode = 3;
+
+  // reverse communication parameter
+  // must be zero on the first call to pdnaupd
+  int ido = 0;
+
+  // 'G' generalized eigenvalue problem
+  // 'I' standard eigenvalue problem
+  char bmat[2] = "G";
+
+  // Specify the eigenvalues of interest, possible parameters:
+  // "LA" algebraically largest
+  // "SA" algebraically smallest
+  // "LM" largest magnitude
+  // "SM" smallest magnitude
+  // "LR" largest real part
+  // "SR" smallest real part
+  // "LI" largest imaginary part
+  // "SI" smallest imaginary part
+  // "BE" both ends of spectrum simultaneous
+
+  char which[3];
+  switch (additional_data.eigenvalue_of_interest)
+    {
+    case algebraically_largest:
+      std::strcpy (which, "LA");
+      break;
+    case algebraically_smallest:
+      std::strcpy (which, "SA");
+      break;
+    case largest_magnitude:
+      std::strcpy (which, "LM");
+      break;
+    case smallest_magnitude:
+      std::strcpy (which, "SM");
+      break;
+    case largest_real_part:
+      std::strcpy (which, "LR");
+      break;
+    case smallest_real_part:
+      std::strcpy (which, "SR");
+      break;
+    case largest_imaginary_part:
+      std::strcpy (which, "LI");
+      break;
+    case smallest_imaginary_part:
+      std::strcpy (which, "SI");
+      break;
+    case both_ends:
+      std::strcpy (which, "BE");
+      break;
+    }
+
+  // tolerance for ARPACK
+  double tol = control().tolerance();
+
+  //information to the routines
+  std::vector<int> iparam (11, 0);
+
+  iparam[0] = 1;
+  // shift strategy: exact shifts with respect to the current Hessenberg matrix H.
+
+  // maximum number of iterations
+  iparam[2] = control().max_steps();
+
+  // Parpack currently works only for NB = 1
+  iparam[3] = 1;
+
+  // Sets the mode of dsaupd:
+  // 1 is exact shifting,
+  // 2 is user-supplied shifts,
+  // 3 is shift-invert mode,
+  // 4 is buckling mode,
+  // 5 is Cayley mode.
+
+  iparam[6] = mode;
+  std::vector<int> ipntr (14, 0);
+
+  //information out of the iteration
+  //  If INFO .EQ. 0, a random initial residual vector is used.
+  //  If INFO .NE. 0, RESID contains the initial residual vector,
+  //  possibly from a previous run.
+  // Typical choices in this situation might be to use the final value
+  // of the starting vector from the previous eigenvalue calculation
+  int info = 1;
+
+  // Number of eigenvalues of OP to be computed. 0 < NEV < N.
+  int nev = n_eigenvalues;
+  int n_inside_arpack = nloc;
+
+  while (ido != 99)
+    {
+      // call of ARPACK pdnaupd routine
+      if (additional_data.symmetric)
+        pdsaupd_(&mpi_communicator_fortran,&ido, bmat, &n_inside_arpack, which, &nev, &tol,
+                 &resid[0], &ncv, &v[0], &ldv, &iparam[0], &ipntr[0],
+                 &workd[0], &workl[0], &lworkl, &info);
+      else
+        pdnaupd_(&mpi_communicator_fortran,&ido, bmat, &n_inside_arpack, which, &nev, &tol,
+                 &resid[0], &ncv, &v[0], &ldv, &iparam[0], &ipntr[0],
+                 &workd[0], &workl[0], &lworkl, &info);
+
+      if (ido == 99)
+        break;
+
+      switch (mode)
+        {
+//        OP = (inv[K - sigma*M])*M
+        case 3:
+        {
+          switch (ido)
+            {
+//            compute  Y = OP * X  where
+//            IPNTR(1) is the pointer into WORKD for X,
+//            IPNTR(2) is the pointer into WORKD for Y.
+            case -1:
+            {
+              const int shift_x = ipntr[0]-1;
+              const int shift_y = ipntr[1]-1;
+              Assert (shift_x>=0, dealii::ExcInternalError() );
+              Assert (shift_x+nloc <= workd.size(), dealii::ExcInternalError() );
+              Assert (shift_y>=0, dealii::ExcInternalError() );
+              Assert (shift_y+nloc <= workd.size(), dealii::ExcInternalError() );
+
+              src = 0.0;
+              src.add (nloc,
+                       &local_indices[0],
+                       &workd[0]+shift_x );
+              src.compress (VectorOperation::add);
+
+              // multiplication with mass matrix M
+              mass_matrix.vmult(tmp, src);
+              // solving linear system
+              inverse.vmult(dst,tmp);
+
+              // store the result
+              dst.extract_subvector_to (local_indices.begin(),
+                                        local_indices.end(),
+                                        &workd[0]+shift_y  );
+            }
+            break;
+
+//            compute  Y = OP * X where
+//            IPNTR(1) is the pointer into WORKD for X,
+//            IPNTR(2) is the pointer into WORKD for Y.
+//            In mode 3,4 and 5, the vector B * X is already
+//            available in WORKD(ipntr(3)).  It does not
+//            need to be recomputed in forming OP * X.
+            case  1:
+            {
+              const int shift_x   = ipntr[0]-1;
+              const int shift_y   = ipntr[1]-1;
+              const int shift_b_x = ipntr[2]-1;
+
+              Assert (shift_x>=0, dealii::ExcInternalError() );
+              Assert (shift_x+nloc <= workd.size(), dealii::ExcInternalError() );
+              Assert (shift_y>=0, dealii::ExcInternalError() );
+              Assert (shift_y+nloc <= workd.size(), dealii::ExcInternalError() );
+              Assert (shift_b_x>=0, dealii::ExcInternalError() );
+              Assert (shift_b_x+nloc <= workd.size(), dealii::ExcInternalError() );
+              Assert (shift_y>=0, dealii::ExcInternalError() );
+              Assert (shift_y+nloc <= workd.size(), dealii::ExcInternalError() );
+
+              src = 0.0; // B*X
+              src.add (nloc,
+                       &local_indices[0],
+                       &workd[0]+shift_b_x );
+
+              tmp = 0.0; // X
+              tmp.add (nloc,
+                       &local_indices[0],
+                       &workd[0]+shift_x);
+
+              src.compress (VectorOperation::add);
+              tmp.compress (VectorOperation::add);
+
+              // solving linear system
+              inverse.vmult(dst,src);
+
+              // store the result
+              dst.extract_subvector_to (local_indices.begin(),
+                                        local_indices.end(),
+                                        &workd[0]+shift_y  );
+
+            }
+            break;
+
+//            compute  Y = B * X  where
+//            IPNTR(1) is the pointer into WORKD for X,
+//            IPNTR(2) is the pointer into WORKD for Y.
+            case  2:
+            {
+
+              const int shift_x = ipntr[0]-1;
+              const int shift_y = ipntr[1]-1;
+              Assert (shift_x>=0, dealii::ExcInternalError() );
+              Assert (shift_x+nloc <= workd.size(), dealii::ExcInternalError() );
+              Assert (shift_y>=0, dealii::ExcInternalError() );
+              Assert (shift_y+nloc <= workd.size(), dealii::ExcInternalError() );
+
+              src = 0.0;
+              src.add (nloc,
+                       &local_indices[0],
+                       &workd[0]+shift_x );
+              src.compress (VectorOperation::add);
+
+              // Multiplication with mass matrix M
+              mass_matrix.vmult(dst, src);
+
+              // store the result
+              dst.extract_subvector_to (local_indices.begin(),
+                                        local_indices.end(),
+                                        &workd[0]+shift_y);
+
+            }
+            break;
+
+            default:
+              Assert (false, PArpackExcIdo(ido));
+              break;
+            }
+        }
+        break;
+        default:
+          Assert (false, PArpackExcMode(mode));
+          break;
+        }
+    }
+
+  if (info<0)
+    {
+      Assert (false, PArpackExcInfoPdnaupd(info));
+    }
+  else
+    {
+      // 1 - compute eigenvectors,
+      // 0 - only eigenvalues
+      int rvec = 1;
+
+      // which eigenvectors
+      char howmany[4] = "All";
+
+      double sigmar = shift_value; // real part of the shift
+      double sigmai = 0.0; // imaginary part of the shift
+
+      std::vector<double> eigenvalues_real (n_eigenvalues, 0.);
+      std::vector<double> eigenvalues_im (n_eigenvalues, 0.);
+
+      // call of ARPACK pdneupd routine
+      if (additional_data.symmetric)
+        pdseupd_(&mpi_communicator_fortran, &rvec, howmany, &select[0], &eigenvalues_real[0],
+                 &z[0], &ldz, &sigmar,
+                 bmat, &n_inside_arpack, which, &nev, &tol,
+                 &resid[0], &ncv, &v[0], &ldv,
+                 &iparam[0], &ipntr[0], &workd[0], &workl[0], &lworkl, &info);
+      else
+        pdneupd_(&mpi_communicator_fortran, &rvec, howmany, &select[0], &eigenvalues_real[0],
+                 &eigenvalues_im[0], &z[0], &ldz, &sigmar, &sigmai,
+                 &workev[0], bmat, &n_inside_arpack, which, &nev, &tol,
+                 &resid[0], &ncv, &v[0], &ldv,
+                 &iparam[0], &ipntr[0], &workd[0], &workl[0], &lworkl, &info);
+
+      if (info == 1)
+        {
+          Assert (false, PArpackExcInfoMaxIt(control().max_steps()));
+        }
+      else if (info == 3)
+        {
+          Assert (false, PArpackExcNoShifts(1));
+        }
+      else if (info!=0)
+        {
+          Assert (false, PArpackExcInfoPdneupd(info));
+        }
+
+      for (size_type i=0; i<n_eigenvalues; ++i)
+        {
+          eigenvectors[i] = 0.0;
+          Assert (i*nloc + nloc <= v.size(), dealii::ExcInternalError() );
+
+          eigenvectors[i].add (nloc,
+                               &local_indices[0],
+                               &v[i*nloc] );
+          eigenvectors[i].compress (VectorOperation::add);
+        }
+
+      for (size_type i=0; i<n_eigenvalues; ++i)
+        eigenvalues[i] = std::complex<double> (eigenvalues_real[i],
+                                               eigenvalues_im[i]);
+    }
+
+  Assert (iparam[4] == n_eigenvalues,
+          PArpackExcConvergedEigenvectors(iparam[4], n_eigenvalues));
+
+  // both PDNAUPD and PDSAUPD compute eigenpairs of inv[A - sigma*M]*M
+  // with respect to a semi-inner product defined by M.
+
+  // resid likely contains residual with respect to M-norm.
+  {
+
+    tmp = 0.0;
+    tmp.add (nloc,
+             &local_indices[0],
+             &resid[0]);
+    solver_control.check  ( iparam[2], tmp.l2_norm() );
+  }
+
+
+}
+
+template <typename VectorType>
+SolverControl &PArpackSolver<VectorType>::control () const
+{
+  return solver_control;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif
+#endif
diff --git a/include/deal.II/lac/petsc_block_sparse_matrix.h b/include/deal.II/lac/petsc_block_sparse_matrix.h
new file mode 100644
index 0000000..c4d7233
--- /dev/null
+++ b/include/deal.II/lac/petsc_block_sparse_matrix.h
@@ -0,0 +1,341 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_block_sparse_matrix_h
+#define dealii__petsc_block_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/table.h>
+#  include <deal.II/lac/block_matrix_base.h>
+#  include <deal.II/lac/petsc_sparse_matrix.h>
+#  include <deal.II/lac/petsc_block_vector.h>
+#  include <deal.II/lac/exceptions.h>
+
+#  include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace PETScWrappers
+{
+
+  /*! @addtogroup PETScWrappers
+   *@{
+   */
+
+  /**
+   * Blocked sparse matrix based on the PETScWrappers::SparseMatrix class.
+   * This class implements the functions that are specific to the PETSc
+   * SparseMatrix base objects for a blocked sparse matrix, and leaves the
+   * actual work relaying most of the calls to the individual blocks to the
+   * functions implemented in the base class. See there also for a description
+   * of when this class is useful.
+   *
+   * In contrast to the deal.II-type SparseMatrix class, the PETSc matrices do
+   * not have external objects for the sparsity patterns. Thus, one does not
+   * determine the size of the individual blocks of a block matrix of this
+   * type by attaching a block sparsity pattern, but by calling reinit() to
+   * set the number of blocks and then by setting the size of each block
+   * separately. In order to fix the data structures of the block matrix, it
+   * is then necessary to let it know that we have changed the sizes of the
+   * underlying matrices. For this, one has to call the collect_sizes()
+   * function, for much the same reason as is documented with the
+   * BlockSparsityPattern class.
+   *
+   * @ingroup Matrix1 @see
+   * @ref GlossBlockLA "Block (linear algebra)"
+   * @author Wolfgang Bangerth, 2004
+   */
+  class BlockSparseMatrix : public BlockMatrixBase<PETScWrappers::SparseMatrix>
+  {
+  public:
+    /**
+     * Typedef the base class for simpler access to its own typedefs.
+     */
+    typedef BlockMatrixBase<SparseMatrix> BaseClass;
+
+    /**
+     * Typedef the type of the underlying matrix.
+     */
+    typedef BaseClass::BlockType  BlockType;
+
+    /**
+     * Import the typedefs from the base class.
+     */
+    typedef BaseClass::value_type      value_type;
+    typedef BaseClass::pointer         pointer;
+    typedef BaseClass::const_pointer   const_pointer;
+    typedef BaseClass::reference       reference;
+    typedef BaseClass::const_reference const_reference;
+    typedef BaseClass::size_type       size_type;
+    typedef BaseClass::iterator        iterator;
+    typedef BaseClass::const_iterator  const_iterator;
+
+    /**
+     * Constructor; initializes the matrix to be empty, without any structure,
+     * i.e.  the matrix is not usable at all. This constructor is therefore
+     * only useful for matrices which are members of a class. All other
+     * matrices should be created at a point in the data flow where all
+     * necessary information is available.
+     *
+     * You have to initialize the matrix before usage with
+     * reinit(BlockSparsityPattern). The number of blocks per row and column
+     * are then determined by that function.
+     */
+    BlockSparseMatrix ();
+
+    /**
+     * Destructor.
+     */
+    ~BlockSparseMatrix ();
+
+    /**
+     * Pseudo copy operator only copying empty objects. The sizes of the block
+     * matrices need to be the same.
+     */
+    BlockSparseMatrix &
+    operator = (const BlockSparseMatrix &);
+
+    /**
+     * This operator assigns a scalar to a matrix. Since this does usually not
+     * make much sense (should we set all matrix entries to this value? Only
+     * the nonzero entries of the sparsity pattern?), this operation is only
+     * allowed if the actual value to be assigned is zero. This operator only
+     * exists to allow for the obvious notation <tt>matrix=0</tt>, which sets
+     * all elements of the matrix to zero, but keep the sparsity pattern
+     * previously used.
+     */
+    BlockSparseMatrix &
+    operator = (const double d);
+
+    /**
+     * Resize the matrix, by setting the number of block rows and columns.
+     * This deletes all blocks and replaces them by uninitialized ones, i.e.
+     * ones for which also the sizes are not yet set. You have to do that by
+     * calling the @p reinit functions of the blocks themselves. Do not forget
+     * to call collect_sizes() after that on this object.
+     *
+     * The reason that you have to set sizes of the blocks yourself is that
+     * the sizes may be varying, the maximum number of elements per row may be
+     * varying, etc. It is simpler not to reproduce the interface of the @p
+     * SparsityPattern class here but rather let the user call whatever
+     * function she desires.
+     */
+    void reinit (const size_type n_block_rows,
+                 const size_type n_block_columns);
+
+    /**
+     * This function collects the sizes of the sub-objects and stores them in
+     * internal arrays, in order to be able to relay global indices into the
+     * matrix to indices into the subobjects. You *must* call this function
+     * each time after you have changed the size of the sub-objects.
+     */
+    void collect_sizes ();
+
+    /**
+     * Matrix-vector multiplication: let $dst = M*src$ with $M$ being this
+     * matrix.
+     */
+    void vmult (BlockVector       &dst,
+                const BlockVector &src) const;
+
+    /**
+     * Matrix-vector multiplication. Just like the previous function, but only
+     * applicable if the matrix has only one block column.
+     */
+    void vmult (BlockVector          &dst,
+                const Vector &src) const;
+
+    /**
+     * Matrix-vector multiplication. Just like the previous function, but only
+     * applicable if the matrix has only one block row.
+     */
+    void vmult (Vector    &dst,
+                const BlockVector &src) const;
+
+    /**
+     * Matrix-vector multiplication. Just like the previous function, but only
+     * applicable if the matrix has only one block.
+     */
+    void vmult (Vector       &dst,
+                const Vector &src) const;
+
+    /**
+     * Matrix-vector multiplication: let $dst = M^T*src$ with $M$ being this
+     * matrix. This function does the same as vmult() but takes the transposed
+     * matrix.
+     */
+    void Tvmult (BlockVector       &dst,
+                 const BlockVector &src) const;
+
+    /**
+     * Matrix-vector multiplication. Just like the previous function, but only
+     * applicable if the matrix has only one block row.
+     */
+    void Tvmult (BlockVector  &dst,
+                 const Vector &src) const;
+
+    /**
+     * Matrix-vector multiplication. Just like the previous function, but only
+     * applicable if the matrix has only one block column.
+     */
+    void Tvmult (Vector    &dst,
+                 const BlockVector &src) const;
+
+    /**
+     * Matrix-vector multiplication. Just like the previous function, but only
+     * applicable if the matrix has only one block.
+     */
+    void Tvmult (Vector       &dst,
+                 const Vector &src) const;
+
+    /**
+     * Make the clear() function in the base class visible, though it is
+     * protected.
+     */
+    using BlockMatrixBase<SparseMatrix>::clear;
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception
+     */
+    DeclException4 (ExcIncompatibleRowNumbers,
+                    int, int, int, int,
+                    << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                    << arg3 << ',' << arg4 << "] have differing row numbers.");
+    /**
+     * Exception
+     */
+    DeclException4 (ExcIncompatibleColNumbers,
+                    int, int, int, int,
+                    << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                    << arg3 << ',' << arg4 << "] have differing column numbers.");
+    ///@}
+  };
+
+
+
+  /*@}*/
+
+// ------------- inline and template functions -----------------
+
+  inline
+  BlockSparseMatrix &
+  BlockSparseMatrix::operator = (const double d)
+  {
+    Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+    for (size_type r=0; r<this->n_block_rows(); ++r)
+      for (size_type c=0; c<this->n_block_cols(); ++c)
+        this->block(r,c) = d;
+
+    return *this;
+  }
+
+
+
+  inline
+  void
+  BlockSparseMatrix::vmult (BlockVector       &dst,
+                            const BlockVector &src) const
+  {
+    BaseClass::vmult_block_block (dst, src);
+  }
+
+
+
+  inline
+  void
+  BlockSparseMatrix::vmult (BlockVector  &dst,
+                            const Vector &src) const
+  {
+    BaseClass::vmult_block_nonblock (dst, src);
+  }
+
+
+
+  inline
+  void
+  BlockSparseMatrix::vmult (Vector            &dst,
+                            const BlockVector &src) const
+  {
+    BaseClass::vmult_nonblock_block (dst, src);
+  }
+
+
+
+  inline
+  void
+  BlockSparseMatrix::vmult (Vector       &dst,
+                            const Vector &src) const
+  {
+    BaseClass::vmult_nonblock_nonblock (dst, src);
+  }
+
+
+  inline
+  void
+  BlockSparseMatrix::Tvmult (BlockVector       &dst,
+                             const BlockVector &src) const
+  {
+    BaseClass::Tvmult_block_block (dst, src);
+  }
+
+
+
+  inline
+  void
+  BlockSparseMatrix::Tvmult (BlockVector  &dst,
+                             const Vector &src) const
+  {
+    BaseClass::Tvmult_block_nonblock (dst, src);
+  }
+
+
+
+  inline
+  void
+  BlockSparseMatrix::Tvmult (Vector            &dst,
+                             const BlockVector &src) const
+  {
+    BaseClass::Tvmult_nonblock_block (dst, src);
+  }
+
+
+
+  inline
+  void
+  BlockSparseMatrix::Tvmult (Vector       &dst,
+                             const Vector &src) const
+  {
+    BaseClass::Tvmult_nonblock_nonblock (dst, src);
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif    // DEAL_II_WITH_PETSC
+
+#endif    // dealii__petsc_block_sparse_matrix_h
diff --git a/include/deal.II/lac/petsc_block_vector.h b/include/deal.II/lac/petsc_block_vector.h
new file mode 100644
index 0000000..9dc85b4
--- /dev/null
+++ b/include/deal.II/lac/petsc_block_vector.h
@@ -0,0 +1,466 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_block_vector_h
+#define dealii__petsc_block_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/petsc_parallel_block_vector.h>
+#  include <deal.II/lac/block_indices.h>
+#  include <deal.II/lac/block_vector_base.h>
+#  include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace PETScWrappers
+{
+  /*! @addtogroup PETScWrappers
+   *@{
+   */
+
+  /**
+   * An implementation of block vectors based on the vector class implemented
+   * in PETScWrappers. While the base class provides for most of the
+   * interface, this class handles the actual allocation of vectors and
+   * provides functions that are specific to the underlying vector type.
+   *
+   * This class is deprecated, use PETScWrappers::MPI::BlockVector.
+   *
+   * @ingroup Vectors
+   *
+   * @see
+   * @ref GlossBlockLA "Block (linear algebra)"
+   * @author Wolfgang Bangerth, 2004
+   */
+  class BlockVector : public BlockVectorBase<Vector>
+  {
+  public:
+    /**
+     * Typedef the base class for simpler access to its own typedefs.
+     */
+    typedef BlockVectorBase<Vector> BaseClass;
+
+    /**
+     * Typedef the type of the underlying vector.
+     */
+    typedef BaseClass::BlockType  BlockType;
+
+    /**
+     * Import the typedefs from the base class.
+     */
+    typedef BaseClass::value_type      value_type;
+    typedef BaseClass::pointer         pointer;
+    typedef BaseClass::const_pointer   const_pointer;
+    typedef BaseClass::reference       reference;
+    typedef BaseClass::const_reference const_reference;
+    typedef BaseClass::size_type       size_type;
+    typedef BaseClass::iterator        iterator;
+    typedef BaseClass::const_iterator  const_iterator;
+
+    /**
+     * Constructor. There are three ways to use this constructor. First,
+     * without any arguments, it generates an object with no blocks. Given one
+     * argument, it initializes <tt>num_blocks</tt> blocks, but these blocks
+     * have size zero. The third variant finally initializes all blocks to the
+     * same size <tt>block_size</tt>.
+     *
+     * Confer the other constructor further down if you intend to use blocks
+     * of different sizes.
+     */
+    explicit BlockVector (const unsigned int num_blocks = 0,
+                          const size_type    block_size = 0);
+
+    /**
+     * Copy-Constructor. Dimension set to that of V, all components are copied
+     * from V
+     */
+    BlockVector (const BlockVector  &V);
+
+    /**
+     * Copy-constructor: copy the values from a PETSc wrapper parallel block
+     * vector class.
+     *
+     *
+     * Note that due to the communication model of MPI, @em all processes have
+     * to actually perform this operation, even if they do not use the result.
+     * It is not sufficient if only one processor tries to copy the elements
+     * from the other processors over to its own process space.
+     */
+    explicit BlockVector (const MPI::BlockVector &v);
+
+    /**
+     * Constructor. Set the number of blocks to <tt>n.size()</tt> and
+     * initialize each block with <tt>n[i]</tt> zero elements.
+     */
+    BlockVector (const std::vector<size_type> &n);
+
+    /**
+     * Constructor. Set the number of blocks to <tt>n.size()</tt>. Initialize
+     * the vector with the elements pointed to by the range of iterators given
+     * as second and third argument. Apart from the first argument, this
+     * constructor is in complete analogy to the respective constructor of the
+     * <tt>std::vector</tt> class, but the first argument is needed in order
+     * to know how to subdivide the block vector into different blocks.
+     */
+    template <typename InputIterator>
+    BlockVector (const std::vector<size_type> &n,
+                 const InputIterator           first,
+                 const InputIterator           end);
+
+    /**
+     * Destructor. Clears memory
+     */
+    ~BlockVector ();
+
+    /**
+     * Copy operator: fill all components of the vector with the given scalar
+     * value.
+     */
+    BlockVector &operator = (const value_type s);
+
+    /**
+     * Copy operator for arguments of the same type.
+     */
+    BlockVector &
+    operator= (const BlockVector &V);
+
+    /**
+     * Copy all the elements of the parallel block vector @p v into this local
+     * vector. Note that due to the communication model of MPI, @em all
+     * processes have to actually perform this operation, even if they do not
+     * use the result. It is not sufficient if only one processor tries to
+     * copy the elements from the other processors over to its own process
+     * space.
+     */
+    BlockVector &
+    operator = (const MPI::BlockVector &v);
+
+    /**
+     * Reinitialize the BlockVector to contain <tt>num_blocks</tt> blocks of
+     * size <tt>block_size</tt> each.
+     *
+     * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+     * zeros.
+     */
+    void reinit (const unsigned int num_blocks,
+                 const size_type    block_size,
+                 const bool omit_zeroing_entries = false);
+
+    /**
+     * Reinitialize the BlockVector such that it contains
+     * <tt>block_sizes.size()</tt> blocks. Each block is reinitialized to
+     * dimension <tt>block_sizes[i]</tt>.
+     *
+     * If the number of blocks is the same as before this function was called,
+     * all vectors remain the same and reinit() is called for each vector.
+     *
+     * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+     * zeros.
+     *
+     * Note that you must call this (or the other reinit() functions)
+     * function, rather than calling the reinit() functions of an individual
+     * block, to allow the block vector to update its caches of vector sizes.
+     * If you call reinit() on one of the blocks, then subsequent actions on
+     * this object may yield unpredictable results since they may be routed to
+     * the wrong block.
+     */
+    void reinit (const std::vector<size_type> &N,
+                 const bool                   omit_zeroing_entries=false);
+
+    /**
+     * Change the dimension to that of the vector <tt>V</tt>. The same applies
+     * as for the other reinit() function.
+     *
+     * The elements of <tt>V</tt> are not copied, i.e.  this function is the
+     * same as calling <tt>reinit (V.size(), omit_zeroing_entries)</tt>.
+     *
+     * Note that you must call this (or the other reinit() functions)
+     * function, rather than calling the reinit() functions of an individual
+     * block, to allow the block vector to update its caches of vector sizes.
+     * If you call reinit() of one of the blocks, then subsequent actions of
+     * this object may yield unpredictable results since they may be routed to
+     * the wrong block.
+     */
+    void reinit (const BlockVector &V,
+                 const bool         omit_zeroing_entries=false);
+
+    /**
+     * Change the number of blocks to <tt>num_blocks</tt>. The individual
+     * blocks will get initialized with zero size, so it is assumed that the
+     * user resizes the individual blocks by herself in an appropriate way,
+     * and calls <tt>collect_sizes</tt> afterwards.
+     */
+    void reinit (const unsigned int num_blocks);
+
+    /**
+     * Swap the contents of this vector and the other vector <tt>v</tt>. One
+     * could do this operation with a temporary variable and copying over the
+     * data elements, but this function is significantly more efficient since
+     * it only swaps the pointers to the data of the two vectors and therefore
+     * does not need to allocate temporary storage and move data around.
+     *
+     * Limitation: right now this function only works if both vectors have the
+     * same number of blocks. If needed, the numbers of blocks should be
+     * exchanged, too.
+     *
+     * This function is analog to the the swap() function of all C++ standard
+     * containers. Also, there is a global function swap(u,v) that simply
+     * calls <tt>u.swap(v)</tt>, again in analogy to standard functions.
+     */
+    void swap (BlockVector &v);
+
+    /**
+     * Print to a stream.
+     */
+    void print (std::ostream       &out,
+                const unsigned int  precision = 3,
+                const bool          scientific = true,
+                const bool          across = true) const;
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+    ///@}
+  } DEAL_II_DEPRECATED;
+
+  /*@}*/
+
+  /*----------------------- Inline functions ----------------------------------*/
+
+
+
+  inline
+  BlockVector::BlockVector (const unsigned int n_blocks,
+                            const size_type    block_size)
+  {
+    reinit (n_blocks, block_size);
+  }
+
+
+
+  inline
+  BlockVector::BlockVector (const std::vector<size_type> &n)
+  {
+    reinit (n, false);
+  }
+
+
+  inline
+  BlockVector::BlockVector (const BlockVector &v)
+    :
+    BlockVectorBase<Vector > ()
+  {
+    this->components.resize (v.n_blocks());
+    block_indices = v.block_indices;
+
+    for (unsigned int i=0; i<this->n_blocks(); ++i)
+      this->components[i] = v.components[i];
+  }
+
+
+
+  inline
+  BlockVector::BlockVector (const MPI::BlockVector &v)
+    :
+    BlockVectorBase<Vector > ()
+  {
+    this->components.resize (v.get_block_indices().size());
+    block_indices = v.get_block_indices();
+
+    for (unsigned int i=0; i<this->n_blocks(); ++i)
+      this->components[i] = v.block(i);
+  }
+
+
+
+  template <typename InputIterator>
+  BlockVector::BlockVector (const std::vector<size_type> &n,
+                            const InputIterator           first,
+                            const InputIterator           end)
+  {
+    // first set sizes of blocks, but
+    // don't initialize them as we will
+    // copy elements soon
+    (void)end;
+    reinit (n, true);
+    InputIterator start = first;
+    for (unsigned int b=0; b<n.size(); ++b)
+      {
+        InputIterator end = start;
+        std::advance (end, static_cast<signed int>(n[b]));
+
+        for (size_type i=0; i<n[b]; ++i, ++start)
+          this->block(b)(i) = *start;
+      }
+    Assert (start == end, ExcIteratorRangeDoesNotMatchVectorSize());
+  }
+
+
+
+  inline
+  BlockVector &
+  BlockVector::operator = (const value_type s)
+  {
+    BaseClass::operator = (s);
+    return *this;
+  }
+
+
+
+  inline
+  BlockVector &
+  BlockVector::operator = (const BlockVector &v)
+  {
+    BaseClass::operator = (v);
+    return *this;
+  }
+
+
+
+  inline
+  BlockVector &
+  BlockVector::operator = (const MPI::BlockVector &v)
+  {
+    BaseClass::operator = (v);
+    return *this;
+  }
+
+
+
+  inline
+  BlockVector::~BlockVector ()
+  {}
+
+
+  inline
+  void
+  BlockVector::reinit (const unsigned int n_bl,
+                       const size_type    bl_sz,
+                       const bool         omit_zeroing_entries)
+  {
+    std::vector<size_type> n(n_bl, bl_sz);
+    reinit(n, omit_zeroing_entries);
+  }
+
+
+
+  inline
+  void
+  BlockVector::reinit (const std::vector<size_type> &n,
+                       const bool                    omit_zeroing_entries)
+  {
+    block_indices.reinit (n);
+    if (this->components.size() != this->n_blocks())
+      this->components.resize(this->n_blocks());
+
+    for (unsigned int i=0; i<this->n_blocks(); ++i)
+      this->components[i].reinit(n[i], omit_zeroing_entries);
+  }
+
+
+  inline
+  void
+  BlockVector::reinit (const BlockVector &v,
+                       const bool omit_zeroing_entries)
+  {
+    block_indices = v.get_block_indices();
+    if (this->components.size() != this->n_blocks())
+      this->components.resize(this->n_blocks());
+
+    for (unsigned int i=0; i<this->n_blocks(); ++i)
+      block(i).reinit(v.block(i), omit_zeroing_entries);
+  }
+
+
+
+  inline
+  void
+  BlockVector::reinit (const unsigned int num_blocks)
+  {
+    reinit (num_blocks, 0, true);
+  }
+
+
+
+  inline
+  void
+  BlockVector::swap (BlockVector &v)
+  {
+    Assert (this->n_blocks() == v.n_blocks(),
+            ExcDimensionMismatch(this->n_blocks(), v.n_blocks()));
+
+    for (unsigned int i=0; i<this->n_blocks(); ++i)
+      this->components[i].swap (v.components[i]);
+    ::dealii::swap (this->block_indices, v.block_indices);
+  }
+
+
+
+  inline
+  void
+  BlockVector::print (std::ostream       &out,
+                      const unsigned int  precision,
+                      const bool          scientific,
+                      const bool          across) const
+  {
+    for (unsigned int i=0; i<this->n_blocks(); ++i)
+      {
+        if (across)
+          out << 'C' << i << ':';
+        else
+          out << "Component " << i << std::endl;
+        this->components[i].print(out, precision, scientific, across);
+      }
+  }
+
+
+
+
+  /**
+   * Global function which overloads the default implementation of the C++
+   * standard library which uses a temporary object. The function simply
+   * exchanges the data of the two vectors.
+   *
+   * @relates PETScWrappers::BlockVector
+   * @author Wolfgang Bangerth, 2000
+   */
+  inline
+  void swap (BlockVector &u,
+             BlockVector &v)
+  {
+    u.swap (v);
+  }
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif  // DEAL_II_WITH_PETSC
+
+#endif
diff --git a/include/deal.II/lac/petsc_full_matrix.h b/include/deal.II/lac/petsc_full_matrix.h
new file mode 100644
index 0000000..f77ba2d
--- /dev/null
+++ b/include/deal.II/lac/petsc_full_matrix.h
@@ -0,0 +1,111 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_full_matrix_h
+#define dealii__petsc_full_matrix_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/petsc_matrix_base.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace PETScWrappers
+{
+  /*! @addtogroup PETScWrappers
+   *@{
+   */
+
+  /**
+   * Implementation of a sequential dense matrix class based on PETSC. All the
+   * functionality is actually in the base class, except for the calls to
+   * generate a sequential dense matrix. This is possible since PETSc only
+   * works on an abstract matrix type and internally distributes to functions
+   * that do the actual work depending on the actual matrix type (much like
+   * using virtual functions). Only the functions creating a matrix of
+   * specific type differ, and are implemented in this particular class.
+   *
+   * @ingroup Matrix1
+   * @author Wolfgang Bangerth, 2004
+   */
+  class FullMatrix : public MatrixBase
+  {
+  public:
+
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+
+    /**
+     * Default constructor. Create an empty matrix.
+     */
+    FullMatrix ();
+
+
+    /**
+     * Create a full matrix of dimensions @p m times @p n.
+     */
+    FullMatrix (const size_type m,
+                const size_type n);
+
+
+    /**
+     * Throw away the present matrix and generate one that has the same
+     * properties as if it were created by the constructor of this class with
+     * the same argument list as the present function.
+     */
+    void reinit (const size_type m,
+                 const size_type n);
+
+
+    /**
+     * Return a reference to the MPI communicator object in use with this
+     * matrix. Since this is a sequential matrix, it returns the MPI_COMM_SELF
+     * communicator.
+     */
+    virtual const MPI_Comm &get_mpi_communicator () const;
+
+  private:
+
+    /**
+     * Do the actual work for the respective reinit() function and the
+     * matching constructor, i.e. create a matrix. Getting rid of the previous
+     * matrix is left to the caller.
+     */
+    void do_reinit (const size_type m,
+                    const size_type n);
+
+  };
+
+  /*@}*/
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_full_matrix.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_full_matrix.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_matrix_base.h b/include/deal.II/lac/petsc_matrix_base.h
new file mode 100644
index 0000000..51bcad3
--- /dev/null
+++ b/include/deal.II/lac/petsc_matrix_base.h
@@ -0,0 +1,1559 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_matrix_base_h
+#define dealii__petsc_matrix_base_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/lac/full_matrix.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/vector.h>
+
+#  include <petscmat.h>
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+
+#  include <vector>
+#  include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename Matrix> class BlockMatrixBase;
+
+
+namespace PETScWrappers
+{
+  // forward declarations
+  class VectorBase;
+  class MatrixBase;
+
+  namespace MatrixIterators
+  {
+    /**
+     * This class acts as an iterator walking over the elements of PETSc
+     * matrices. Since PETSc offers a uniform interface for all types of
+     * matrices, this iterator can be used to access both sparse and full
+     * matrices.
+     *
+     * Note that PETSc does not give any guarantees as to the order of
+     * elements within each row. Note also that accessing the elements of a
+     * full matrix surprisingly only shows the nonzero elements of the matrix,
+     * not all elements.
+     *
+     * @ingroup PETScWrappers
+     * @author Guido Kanschat, Roy Stogner, Wolfgang Bangerth, 2004
+     */
+    class const_iterator
+    {
+    private:
+      /**
+       * Accessor class for iterators
+       */
+      class Accessor
+      {
+      public:
+        /**
+         * Declare type for container size.
+         */
+        typedef types::global_dof_index size_type;
+
+        /**
+         * Constructor. Since we use accessors only for read access, a const
+         * matrix pointer is sufficient.
+         */
+        Accessor (const MatrixBase *matrix,
+                  const size_type   row,
+                  const size_type   index);
+
+        /**
+         * Copy constructor.
+         */
+        Accessor (const Accessor &a);
+
+        /**
+         * Row number of the element represented by this object.
+         */
+        size_type row() const;
+
+        /**
+         * Index in row of the element represented by this object.
+         */
+        size_type index() const;
+
+        /**
+         * Column number of the element represented by this object.
+         */
+        size_type column() const;
+
+        /**
+         * Value of this matrix entry.
+         */
+        PetscScalar value() const;
+
+        /**
+         * Exception
+         */
+        DeclException0 (ExcBeyondEndOfMatrix);
+        /**
+         * Exception
+         */
+        DeclException3 (ExcAccessToNonlocalRow,
+                        int, int, int,
+                        << "You tried to access row " << arg1
+                        << " of a distributed matrix, but only rows "
+                        << arg2 << " through " << arg3
+                        << " are stored locally and can be accessed.");
+
+      private:
+        /**
+         * The matrix accessed.
+         */
+        mutable MatrixBase *matrix;
+
+        /**
+         * Current row number.
+         */
+        size_type a_row;
+
+        /**
+         * Current index in row.
+         */
+        size_type a_index;
+
+        /**
+         * Cache where we store the column indices of the present row. This is
+         * necessary, since PETSc makes access to the elements of its matrices
+         * rather hard, and it is much more efficient to copy all column
+         * entries of a row once when we enter it than repeatedly asking PETSc
+         * for individual ones. This also makes some sense since it is likely
+         * that we will access them sequentially anyway.
+         *
+         * In order to make copying of iterators/accessor of acceptable
+         * performance, we keep a shared pointer to these entries so that more
+         * than one accessor can access this data if necessary.
+         */
+        std_cxx11::shared_ptr<const std::vector<size_type> > colnum_cache;
+
+        /**
+         * Similar cache for the values of this row.
+         */
+        std_cxx11::shared_ptr<const std::vector<PetscScalar> > value_cache;
+
+        /**
+         * Discard the old row caches (they may still be used by other
+         * accessors) and generate new ones for the row pointed to presently
+         * by this accessor.
+         */
+        void visit_present_row ();
+
+        /**
+         * Make enclosing class a friend.
+         */
+        friend class const_iterator;
+      };
+
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef types::global_dof_index size_type;
+
+      /**
+       * Constructor. Create an iterator into the matrix @p matrix for the
+       * given row and the index within it.
+       */
+      const_iterator (const MatrixBase *matrix,
+                      const size_type   row,
+                      const size_type   index);
+
+      /**
+       * Prefix increment.
+       */
+      const_iterator &operator++ ();
+
+      /**
+       * Postfix increment.
+       */
+      const_iterator operator++ (int);
+
+      /**
+       * Dereferencing operator.
+       */
+      const Accessor &operator* () const;
+
+      /**
+       * Dereferencing operator.
+       */
+      const Accessor *operator-> () const;
+
+      /**
+       * Comparison. True, if both iterators point to the same matrix
+       * position.
+       */
+      bool operator == (const const_iterator &) const;
+      /**
+       * Inverse of <tt>==</tt>.
+       */
+      bool operator != (const const_iterator &) const;
+
+      /**
+       * Comparison operator. Result is true if either the first row number is
+       * smaller or if the row numbers are equal and the first index is
+       * smaller.
+       */
+      bool operator < (const const_iterator &) const;
+
+      /**
+       * Exception
+       */
+      DeclException2 (ExcInvalidIndexWithinRow,
+                      int, int,
+                      << "Attempt to access element " << arg2
+                      << " of row " << arg1
+                      << " which doesn't have that many elements.");
+
+    private:
+      /**
+       * Store an object of the accessor class.
+       */
+      Accessor accessor;
+    };
+
+  }
+
+
+  /**
+   * Base class for all matrix classes that are implemented on top of the
+   * PETSc matrix types. Since in PETSc all matrix types (i.e. sequential and
+   * parallel, sparse, blocked, etc.)  are built by filling the contents of an
+   * abstract object that is only referenced through a pointer of a type that
+   * is independent of the actual matrix type, we can implement almost all
+   * functionality of matrices in this base class. Derived classes will then
+   * only have to provide the functionality to create one or the other kind of
+   * matrix.
+   *
+   * The interface of this class is modeled after the existing SparseMatrix
+   * class in deal.II. It has almost the same member functions, and is often
+   * exchangeable. However, since PETSc only supports a single scalar type
+   * (either double, float, or a complex data type), it is not templated, and
+   * only works with whatever your PETSc installation has defined the data
+   * type PetscScalar to.
+   *
+   * Note that PETSc only guarantees that operations do what you expect if the
+   * functions @p MatAssemblyBegin and @p MatAssemblyEnd have been called
+   * after matrix assembly. Therefore, you need to call
+   * SparseMatrix::compress() before you actually use the matrix. This also
+   * calls @p MatCompress that compresses the storage format for sparse
+   * matrices by discarding unused elements. PETSc allows to continue with
+   * assembling the matrix after calls to these functions, but since there are
+   * no more free entries available after that any more, it is better to only
+   * call SparseMatrix::compress() once at the end of the assembly stage and
+   * before the matrix is actively used.
+   *
+   * @ingroup PETScWrappers
+   * @ingroup Matrix1
+   * @author Wolfgang Bangerth, 2004
+   */
+  class MatrixBase : public Subscriptor
+  {
+  public:
+    /**
+     * Declare a typedef for the iterator class.
+     */
+    typedef MatrixIterators::const_iterator const_iterator;
+
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    /**
+     * Declare a typedef in analogy to all the other container classes.
+     */
+    typedef PetscScalar value_type;
+
+    /**
+     * Default constructor.
+     */
+    MatrixBase ();
+
+    /**
+     * Destructor. Made virtual so that one can use pointers to this class.
+     */
+    virtual ~MatrixBase ();
+
+    /**
+     * This operator assigns a scalar to a matrix. Since this does usually not
+     * make much sense (should we set all matrix entries to this value? Only
+     * the nonzero entries of the sparsity pattern?), this operation is only
+     * allowed if the actual value to be assigned is zero. This operator only
+     * exists to allow for the obvious notation <tt>matrix=0</tt>, which sets
+     * all elements of the matrix to zero, but keeps the sparsity pattern
+     * previously used.
+     */
+    MatrixBase &
+    operator = (const value_type d);
+    /**
+     * Release all memory and return to a state just like after having called
+     * the default constructor.
+     */
+    void clear ();
+
+    /**
+     * Set the element (<i>i,j</i>) to @p value.
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds a new entry to the matrix if
+     * it didn't exist before, very much in contrast to the SparseMatrix class
+     * which throws an error if the entry does not exist. If <tt>value</tt> is
+     * not a finite number an exception is thrown.
+     */
+    void set (const size_type   i,
+              const size_type   j,
+              const PetscScalar value);
+
+    /**
+     * Set all elements given in a FullMatrix<double> into the sparse matrix
+     * locations given by <tt>indices</tt>. In other words, this function
+     * writes the elements in <tt>full_matrix</tt> into the calling matrix,
+     * using the local-to-global indexing specified by <tt>indices</tt> for
+     * both the rows and the columns of the matrix. This function assumes a
+     * quadratic sparse matrix and a quadratic full_matrix, the usual
+     * situation in FE calculations.
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds some new entries to the matrix
+     * if they didn't exist before, very much in contrast to the SparseMatrix
+     * class which throws an error if the entry does not exist.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be inserted anyway or they should be
+     * filtered away. The default value is <tt>false</tt>, i.e., even zero
+     * values are inserted/replaced.
+     */
+    void set (const std::vector<size_type>  &indices,
+              const FullMatrix<PetscScalar> &full_matrix,
+              const bool                     elide_zero_values = false);
+
+    /**
+     * Same function as before, but now including the possibility to use
+     * rectangular full_matrices and different local-to-global indexing on
+     * rows and columns, respectively.
+     */
+    void set (const std::vector<size_type>  &row_indices,
+              const std::vector<size_type>  &col_indices,
+              const FullMatrix<PetscScalar> &full_matrix,
+              const bool                     elide_zero_values = false);
+
+    /**
+     * Set several elements in the specified row of the matrix with column
+     * indices as given by <tt>col_indices</tt> to the respective value.
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds some new entries to the matrix
+     * if they didn't exist before, very much in contrast to the SparseMatrix
+     * class which throws an error if the entry does not exist.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be inserted anyway or they should be
+     * filtered away. The default value is <tt>false</tt>, i.e., even zero
+     * values are inserted/replaced.
+     */
+    void set (const size_type                 row,
+              const std::vector<size_type >  &col_indices,
+              const std::vector<PetscScalar>  &values,
+              const bool                      elide_zero_values = false);
+
+    /**
+     * Set several elements to values given by <tt>values</tt> in a given row
+     * in columns given by col_indices into the sparse matrix.
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds some new entries to the matrix
+     * if they didn't exist before, very much in contrast to the SparseMatrix
+     * class which throws an error if the entry does not exist.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be inserted anyway or they should be
+     * filtered away. The default value is <tt>false</tt>, i.e., even zero
+     * values are inserted/replaced.
+     */
+    void set (const size_type    row,
+              const size_type    n_cols,
+              const size_type   *col_indices,
+              const PetscScalar  *values,
+              const bool         elide_zero_values = false);
+
+    /**
+     * Add @p value to the element (<i>i,j</i>).
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds a new entry to the matrix if
+     * it didn't exist before, very much in contrast to the SparseMatrix class
+     * which throws an error if the entry does not exist. If <tt>value</tt> is
+     * not a finite number an exception is thrown.
+     */
+    void add (const size_type   i,
+              const size_type   j,
+              const PetscScalar value);
+
+    /**
+     * Add all elements given in a FullMatrix<double> into sparse matrix
+     * locations given by <tt>indices</tt>. In other words, this function adds
+     * the elements in <tt>full_matrix</tt> to the respective entries in
+     * calling matrix, using the local-to-global indexing specified by
+     * <tt>indices</tt> for both the rows and the columns of the matrix. This
+     * function assumes a quadratic sparse matrix and a quadratic full_matrix,
+     * the usual situation in FE calculations.
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds some new entries to the matrix
+     * if they didn't exist before, very much in contrast to the SparseMatrix
+     * class which throws an error if the entry does not exist.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be added anyway or these should be
+     * filtered away and only non-zero data is added. The default value is
+     * <tt>true</tt>, i.e., zero values won't be added into the matrix.
+     */
+    void add (const std::vector<size_type>  &indices,
+              const FullMatrix<PetscScalar> &full_matrix,
+              const bool                     elide_zero_values = true);
+
+    /**
+     * Same function as before, but now including the possibility to use
+     * rectangular full_matrices and different local-to-global indexing on
+     * rows and columns, respectively.
+     */
+    void add (const std::vector<size_type>  &row_indices,
+              const std::vector<size_type>  &col_indices,
+              const FullMatrix<PetscScalar> &full_matrix,
+              const bool                     elide_zero_values = true);
+
+    /**
+     * Set several elements in the specified row of the matrix with column
+     * indices as given by <tt>col_indices</tt> to the respective value.
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds some new entries to the matrix
+     * if they didn't exist before, very much in contrast to the SparseMatrix
+     * class which throws an error if the entry does not exist.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be added anyway or these should be
+     * filtered away and only non-zero data is added. The default value is
+     * <tt>true</tt>, i.e., zero values won't be added into the matrix.
+     */
+    void add (const size_type                 row,
+              const std::vector<size_type>   &col_indices,
+              const std::vector<PetscScalar>  &values,
+              const bool                      elide_zero_values = true);
+
+    /**
+     * Add an array of values given by <tt>values</tt> in the given global
+     * matrix row at columns specified by col_indices in the sparse matrix.
+     *
+     * If the present object (from a derived class of this one) happens to be
+     * a sparse matrix, then this function adds some new entries to the matrix
+     * if they didn't exist before, very much in contrast to the SparseMatrix
+     * class which throws an error if the entry does not exist.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be added anyway or these should be
+     * filtered away and only non-zero data is added. The default value is
+     * <tt>true</tt>, i.e., zero values won't be added into the matrix.
+     */
+    void add (const size_type    row,
+              const size_type    n_cols,
+              const size_type   *col_indices,
+              const PetscScalar  *values,
+              const bool         elide_zero_values = true,
+              const bool         col_indices_are_sorted = false);
+
+    /**
+     * Remove all elements from this <tt>row</tt> by setting them to zero. The
+     * function does not modify the number of allocated nonzero entries, it
+     * only sets some entries to zero. It may drop them from the sparsity
+     * pattern, though (but retains the allocated memory in case new entries
+     * are again added later).
+     *
+     * This operation is used in eliminating constraints (e.g. due to hanging
+     * nodes) and makes sure that we can write this modification to the matrix
+     * without having to read entries (such as the locations of non-zero
+     * elements) from it -- without this operation, removing constraints on
+     * parallel matrices is a rather complicated procedure.
+     *
+     * The second parameter can be used to set the diagonal entry of this row
+     * to a value different from zero. The default is to set it to zero.
+     */
+    void clear_row (const size_type   row,
+                    const PetscScalar new_diag_value = 0);
+
+    /**
+     * Same as clear_row(), except that it works on a number of rows at once.
+     *
+     * The second parameter can be used to set the diagonal entries of all
+     * cleared rows to something different from zero. Note that all of these
+     * diagonal entries get the same value -- if you want different values for
+     * the diagonal entries, you have to set them by hand.
+     */
+    void clear_rows (const std::vector<size_type> &rows,
+                     const PetscScalar             new_diag_value = 0);
+
+    /**
+     * PETSc matrices store their own sparsity patterns. So, in analogy to our
+     * own SparsityPattern class, this function compresses the sparsity
+     * pattern and allows the resulting matrix to be used in all other
+     * operations where before only assembly functions were allowed. This
+     * function must therefore be called once you have assembled the matrix.
+     *
+     * See
+     * @ref GlossCompress "Compressing distributed objects"
+     * for more information.
+     */
+    void compress (const VectorOperation::values operation);
+
+    /**
+     * Return the value of the entry (<i>i,j</i>).  This may be an expensive
+     * operation and you should always take care where to call this function.
+     * In contrast to the respective function in the @p MatrixBase class, we
+     * don't throw an exception if the respective entry doesn't exist in the
+     * sparsity pattern of this class, since PETSc does not transmit this
+     * information.
+     *
+     * This function is therefore exactly equivalent to the <tt>el()</tt>
+     * function.
+     */
+    PetscScalar operator () (const size_type i,
+                             const size_type j) const;
+
+    /**
+     * Return the value of the matrix entry (<i>i,j</i>). If this entry does
+     * not exist in the sparsity pattern, then zero is returned. While this
+     * may be convenient in some cases, note that it is simple to write
+     * algorithms that are slow compared to an optimal solution, since the
+     * sparsity of the matrix is not used.
+     */
+    PetscScalar el (const size_type i,
+                    const size_type j) const;
+
+    /**
+     * Return the main diagonal element in the <i>i</i>th row. This function
+     * throws an error if the matrix is not quadratic.
+     *
+     * Since we do not have direct access to the underlying data structure,
+     * this function is no faster than the elementwise access using the el()
+     * function. However, we provide this function for compatibility with the
+     * SparseMatrix class.
+     */
+    PetscScalar diag_element (const size_type i) const;
+
+    /**
+     * Return the number of rows in this matrix.
+     */
+    size_type m () const;
+
+    /**
+     * Return the number of columns in this matrix.
+     */
+    size_type n () const;
+
+    /**
+     * Return the local dimension of the matrix, i.e. the number of rows
+     * stored on the present MPI process. For sequential matrices, this number
+     * is the same as m(), but for parallel matrices it may be smaller.
+     *
+     * To figure out which elements exactly are stored locally, use
+     * local_range().
+     */
+    size_type local_size () const;
+
+    /**
+     * Return a pair of indices indicating which rows of this matrix are
+     * stored locally. The first number is the index of the first row stored,
+     * the second the index of the one past the last one that is stored
+     * locally. If this is a sequential matrix, then the result will be the
+     * pair (0,m()), otherwise it will be a pair (i,i+n), where
+     * <tt>n=local_size()</tt>.
+     */
+    std::pair<size_type, size_type>
+    local_range () const;
+
+    /**
+     * Return whether @p index is in the local range or not, see also
+     * local_range().
+     */
+    bool in_local_range (const size_type index) const;
+
+    /**
+     * Return a reference to the MPI communicator object in use with this
+     * matrix. This function has to be implemented in derived classes.
+     */
+    virtual const MPI_Comm &get_mpi_communicator () const = 0;
+
+    /**
+     * Return the number of nonzero elements of this matrix. Actually, it
+     * returns the number of entries in the sparsity pattern; if any of the
+     * entries should happen to be zero, it is counted anyway.
+     */
+    size_type n_nonzero_elements () const;
+
+    /**
+     * Number of entries in a specific row.
+     */
+    size_type row_length (const size_type row) const;
+
+    /**
+     * Return the l1-norm of the matrix, that is $|M|_1=max_{all columns
+     * j}\sum_{all rows i} |M_ij|$, (max. sum of columns). This is the natural
+     * matrix norm that is compatible to the l1-norm for vectors, i.e.
+     * $|Mv|_1\leq |M|_1 |v|_1$. (cf. Haemmerlin-Hoffmann: Numerische
+     * Mathematik)
+     */
+    PetscReal l1_norm () const;
+
+    /**
+     * Return the linfty-norm of the matrix, that is $|M|_infty=max_{all rows
+     * i}\sum_{all columns j} |M_ij|$, (max. sum of rows). This is the natural
+     * matrix norm that is compatible to the linfty-norm of vectors, i.e.
+     * $|Mv|_infty \leq |M|_infty |v|_infty$. (cf. Haemmerlin-Hoffmann:
+     * Numerische Mathematik)
+     */
+    PetscReal linfty_norm () const;
+
+    /**
+     * Return the frobenius norm of the matrix, i.e. the square root of the
+     * sum of squares of all entries in the matrix.
+     */
+    PetscReal frobenius_norm () const;
+
+
+    /**
+     * Return the square of the norm of the vector $v$ with respect to the
+     * norm induced by this matrix, i.e. $\left(v,Mv\right)$. This is useful,
+     * e.g. in the finite element context, where the $L_2$ norm of a function
+     * equals the matrix norm with respect to the mass matrix of the vector
+     * representing the nodal values of the finite element function.
+     *
+     * Obviously, the matrix needs to be quadratic for this operation.
+     *
+     * The implementation of this function is not as efficient as the one in
+     * the @p MatrixBase class used in deal.II (i.e. the original one, not the
+     * PETSc wrapper class) since PETSc doesn't support this operation and
+     * needs a temporary vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then the given
+     * vector has to be a distributed vector as well. Conversely, if the
+     * matrix is not distributed, then neither may the vector be.
+     */
+    PetscScalar matrix_norm_square (const VectorBase &v) const;
+
+
+    /**
+     * Compute the matrix scalar product $\left(u,Mv\right)$.
+     *
+     * The implementation of this function is not as efficient as the one in
+     * the @p MatrixBase class used in deal.II (i.e. the original one, not the
+     * PETSc wrapper class) since PETSc doesn't support this operation and
+     * needs a temporary vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then both vectors
+     * have to be distributed vectors as well. Conversely, if the matrix is
+     * not distributed, then neither of the vectors may be.
+     */
+    PetscScalar matrix_scalar_product (const VectorBase &u,
+                                       const VectorBase &v) const;
+
+
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+    /**
+     * Return the trace of the matrix, i.e. the sum of all diagonal entries in
+     * the matrix.
+     */
+    PetscScalar trace () const;
+#endif
+
+    /**
+     * Multiply the entire matrix by a fixed factor.
+     */
+    MatrixBase &operator *= (const PetscScalar factor);
+
+    /**
+     * Divide the entire matrix by a fixed factor.
+     */
+    MatrixBase &operator /= (const PetscScalar factor);
+
+    /**
+     * Add the matrix @p other scaled by the factor @p factor to the current
+     * matrix.
+     */
+    MatrixBase &add (const MatrixBase &other,
+                     const PetscScalar factor);
+
+    /**
+     * Matrix-vector multiplication: let <i>dst = M*src</i> with <i>M</i>
+     * being this matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then both vectors
+     * have to be distributed vectors as well. Conversely, if the matrix is
+     * not distributed, then neither of the vectors may be.
+     */
+    void vmult (VectorBase       &dst,
+                const VectorBase &src) const;
+
+    /**
+     * Matrix-vector multiplication: let <i>dst = M<sup>T</sup>*src</i> with
+     * <i>M</i> being this matrix. This function does the same as vmult() but
+     * takes the transposed matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then both vectors
+     * have to be distributed vectors as well. Conversely, if the matrix is
+     * not distributed, then neither of the vectors may be.
+     */
+    void Tvmult (VectorBase       &dst,
+                 const VectorBase &src) const;
+
+    /**
+     * Adding Matrix-vector multiplication. Add <i>M*src</i> on <i>dst</i>
+     * with <i>M</i> being this matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then both vectors
+     * have to be distributed vectors as well. Conversely, if the matrix is
+     * not distributed, then neither of the vectors may be.
+     */
+    void vmult_add (VectorBase       &dst,
+                    const VectorBase &src) const;
+
+    /**
+     * Adding Matrix-vector multiplication. Add <i>M<sup>T</sup>*src</i> to
+     * <i>dst</i> with <i>M</i> being this matrix. This function does the same
+     * as vmult_add() but takes the transposed matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then both vectors
+     * have to be distributed vectors as well. Conversely, if the matrix is
+     * not distributed, then neither of the vectors may be.
+     */
+    void Tvmult_add (VectorBase       &dst,
+                     const VectorBase &src) const;
+
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned.
+     *
+     * Source <i>x</i> and destination <i>dst</i> must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then all vectors
+     * have to be distributed vectors as well. Conversely, if the matrix is
+     * not distributed, then neither of the vectors may be.
+     */
+    PetscScalar residual (VectorBase       &dst,
+                          const VectorBase &x,
+                          const VectorBase &b) const;
+
+    /**
+     * Iterator starting at the first entry.
+     */
+    const_iterator begin () const;
+
+    /**
+     * Final iterator.
+     */
+    const_iterator end () const;
+
+    /**
+     * Iterator starting at the first entry of row @p r.
+     *
+     * Note that if the given row is empty, i.e. does not contain any nonzero
+     * entries, then the iterator returned by this function equals
+     * <tt>end(r)</tt>. Note also that the iterator may not be dereferencable
+     * in that case.
+     */
+    const_iterator begin (const size_type r) const;
+
+    /**
+     * Final iterator of row <tt>r</tt>. It points to the first element past
+     * the end of line @p r, or past the end of the entire sparsity pattern.
+     *
+     * Note that the end iterator is not necessarily dereferencable. This is
+     * in particular the case if it is the end iterator for the last row of a
+     * matrix.
+     */
+    const_iterator end (const size_type r) const;
+
+    /**
+     * Conversion operator to gain access to the underlying PETSc type. If you
+     * do this, you cut this class off some information it may need, so this
+     * conversion operator should only be used if you know what you do. In
+     * particular, it should only be used for read-only operations into the
+     * matrix.
+     */
+    operator Mat () const;
+
+    /**
+     * Make an in-place transpose of a matrix.
+     */
+    void transpose ();
+
+    /**
+     * Test whether a matrix is symmetric.  Default tolerance is
+     * $1000\times32$-bit machine precision.
+     */
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+    PetscBool
+#endif
+    is_symmetric (const double tolerance = 1.e-12);
+
+    /**
+     * Test whether a matrix is Hermitian, i.e. it is the complex conjugate of
+     * its transpose. Default tolerance is $1000\times32$-bit machine
+     * precision.
+     */
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+    PetscBool
+#endif
+    is_hermitian (const double tolerance = 1.e-12);
+
+    /**
+     * Print the PETSc matrix object values using PETSc internal matrix viewer
+     * function <tt>MatView</tt>. The default format prints the non- zero
+     * matrix elements. For other valid view formats, consult
+     * http://www.mcs.anl.gov/petsc/petsc-
+     * current/docs/manualpages/Mat/MatView.html
+     */
+    void write_ascii (const PetscViewerFormat format = PETSC_VIEWER_DEFAULT);
+
+    /**
+     * Print the elements of a matrix to the given output stream.
+     *
+     * @param[in,out] out The output stream to which to write.
+     * @param[in] alternative_output This argument is ignored. It exists for
+     * compatibility with similar functions in other matrix classes.
+     */
+    void print (std::ostream &out,
+                const bool    alternative_output = false) const;
+
+    /**
+     * Returns the number bytes consumed by this matrix on this CPU.
+     */
+    std::size_t memory_consumption() const;
+
+    /**
+     * Exception
+     */
+    DeclException1 (ExcPETScError,
+                    int,
+                    << "An error with error number " << arg1
+                    << " occurred while calling a PETSc function");
+    /**
+     * Exception
+     */
+    DeclException0 (ExcSourceEqualsDestination);
+
+    /**
+     * Exception.
+     */
+    DeclException2 (ExcWrongMode,
+                    int, int,
+                    << "You tried to do a "
+                    << (arg1 == 1 ?
+                        "'set'" :
+                        (arg1 == 2 ?
+                         "'add'" : "???"))
+                    << " operation but the matrix is currently in "
+                    << (arg2 == 1 ?
+                        "'set'" :
+                        (arg2 == 2 ?
+                         "'add'" : "???"))
+                    << " mode. You first have to call 'compress()'.");
+
+  protected:
+    /**
+     * A generic matrix object in PETSc. The actual type, a sparse matrix, is
+     * set in the constructor.
+     */
+    Mat matrix;
+
+    /**
+     * Store whether the last action was a write or add operation.
+     */
+    VectorOperation::values last_action;
+
+    /**
+     * Ensure that the add/set mode that is required for actions following
+     * this call is compatible with the current mode. Should be called from
+     * all internal functions accessing matrix elements.
+     */
+    void prepare_action(const VectorOperation::values new_action);
+
+    /**
+     * Internal function that checks that there are no pending insert/add
+     * operations. Throws an exception otherwise. Useful before calling any
+     * PETSc internal functions modifying the matrix.
+     */
+    void assert_is_compressed();
+
+    /**
+     * For some matrix storage formats, in particular for the PETSc
+     * distributed blockmatrices, set and add operations on individual
+     * elements can not be freely mixed. Rather, one has to synchronize
+     * operations when one wants to switch from setting elements to adding to
+     * elements. BlockMatrixBase automatically synchronizes the access by
+     * calling this helper function for each block. This function ensures that
+     * the matrix is in a state that allows adding elements; if it previously
+     * already was in this state, the function does nothing.
+     */
+    void prepare_add();
+    /**
+     * Same as prepare_add() but prepare the matrix for setting elements if
+     * the representation of elements in this class requires such an
+     * operation.
+     */
+    void prepare_set();
+
+
+
+  private:
+
+    /**
+     * purposefully not implemented
+     */
+    MatrixBase(const MatrixBase &);
+    /**
+     * purposefully not implemented
+     */
+    MatrixBase &operator=(const MatrixBase &);
+
+    /**
+     * An internal array of integer values that is used to store the column
+     * indices when adding/inserting local data into the (large) sparse
+     * matrix.
+     */
+    std::vector<PetscInt> column_indices;
+
+    /**
+     * An internal array of double values that is used to store the column
+     * indices when adding/inserting local data into the (large) sparse
+     * matrix.
+     */
+    std::vector<PetscScalar> column_values;
+
+
+    /**
+     * To allow calling protected prepare_add() and prepare_set().
+     */
+    template <class> friend class dealii::BlockMatrixBase;
+  };
+
+
+
+#ifndef DOXYGEN
+// -------------------------- inline and template functions ----------------------
+
+
+  namespace MatrixIterators
+  {
+
+    inline
+    const_iterator::Accessor::
+    Accessor (const MatrixBase *matrix,
+              const size_type   row,
+              const size_type   index)
+      :
+      matrix(const_cast<MatrixBase *>(matrix)),
+      a_row(row),
+      a_index(index)
+    {
+      visit_present_row ();
+    }
+
+
+    inline
+    const_iterator::Accessor::
+    Accessor (const Accessor &a)
+      :
+      matrix(a.matrix),
+      a_row(a.a_row),
+      a_index(a.a_index),
+      colnum_cache (a.colnum_cache),
+      value_cache (a.value_cache)
+    {}
+
+
+    inline
+    const_iterator::Accessor::size_type
+    const_iterator::Accessor::row() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return a_row;
+    }
+
+
+    inline
+    const_iterator::Accessor::size_type
+    const_iterator::Accessor::column() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return (*colnum_cache)[a_index];
+    }
+
+
+    inline
+    const_iterator::Accessor::size_type
+    const_iterator::Accessor::index() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return a_index;
+    }
+
+
+    inline
+    PetscScalar
+    const_iterator::Accessor::value() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return (*value_cache)[a_index];
+    }
+
+
+    inline
+    const_iterator::
+    const_iterator(const MatrixBase *matrix,
+                   const size_type   row,
+                   const size_type   index)
+      :
+      accessor(matrix, row, index)
+    {}
+
+
+
+    inline
+    const_iterator &
+    const_iterator::operator++ ()
+    {
+      Assert (accessor.a_row < accessor.matrix->m(), ExcIteratorPastEnd());
+
+      ++accessor.a_index;
+
+      // if at end of line: do one step, then
+      // cycle until we find a row with a
+      // nonzero number of entries
+      if (accessor.a_index >= accessor.colnum_cache->size())
+        {
+          accessor.a_index = 0;
+          ++accessor.a_row;
+
+          while ((accessor.a_row < accessor.matrix->m())
+                 &&
+                 (accessor.matrix->row_length(accessor.a_row) == 0))
+            ++accessor.a_row;
+
+          accessor.visit_present_row();
+        }
+      return *this;
+    }
+
+
+    inline
+    const_iterator
+    const_iterator::operator++ (int)
+    {
+      const const_iterator old_state = *this;
+      ++(*this);
+      return old_state;
+    }
+
+
+    inline
+    const const_iterator::Accessor &
+    const_iterator::operator* () const
+    {
+      return accessor;
+    }
+
+
+    inline
+    const const_iterator::Accessor *
+    const_iterator::operator-> () const
+    {
+      return &accessor;
+    }
+
+
+    inline
+    bool
+    const_iterator::
+    operator == (const const_iterator &other) const
+    {
+      return (accessor.a_row == other.accessor.a_row &&
+              accessor.a_index == other.accessor.a_index);
+    }
+
+
+    inline
+    bool
+    const_iterator::
+    operator != (const const_iterator &other) const
+    {
+      return ! (*this == other);
+    }
+
+
+    inline
+    bool
+    const_iterator::
+    operator < (const const_iterator &other) const
+    {
+      return (accessor.row() < other.accessor.row() ||
+              (accessor.row() == other.accessor.row() &&
+               accessor.index() < other.accessor.index()));
+    }
+
+  }
+
+
+
+  // Inline the set() and add()
+  // functions, since they will be
+  // called frequently, and the
+  // compiler can optimize away
+  // some unnecessary loops when
+  // the sizes are given at
+  // compile time.
+  inline
+  void
+  MatrixBase::set (const size_type   i,
+                   const size_type   j,
+                   const PetscScalar value)
+  {
+    AssertIsFinite(value);
+
+    set (i, 1, &j, &value, false);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const std::vector<size_type>  &indices,
+                   const FullMatrix<PetscScalar> &values,
+                   const bool                     elide_zero_values)
+  {
+    Assert (indices.size() == values.m(),
+            ExcDimensionMismatch(indices.size(), values.m()));
+    Assert (values.m() == values.n(), ExcNotQuadratic());
+
+    for (size_type i=0; i<indices.size(); ++i)
+      set (indices[i], indices.size(), &indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const std::vector<size_type>  &row_indices,
+                   const std::vector<size_type>  &col_indices,
+                   const FullMatrix<PetscScalar> &values,
+                   const bool                     elide_zero_values)
+  {
+    Assert (row_indices.size() == values.m(),
+            ExcDimensionMismatch(row_indices.size(), values.m()));
+    Assert (col_indices.size() == values.n(),
+            ExcDimensionMismatch(col_indices.size(), values.n()));
+
+    for (size_type i=0; i<row_indices.size(); ++i)
+      set (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const size_type                 row,
+                   const std::vector<size_type>   &col_indices,
+                   const std::vector<PetscScalar>  &values,
+                   const bool                      elide_zero_values)
+  {
+    Assert (col_indices.size() == values.size(),
+            ExcDimensionMismatch(col_indices.size(), values.size()));
+
+    set (row, col_indices.size(), &col_indices[0], &values[0],
+         elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const size_type    row,
+                   const size_type    n_cols,
+                   const size_type   *col_indices,
+                   const PetscScalar  *values,
+                   const bool         elide_zero_values)
+  {
+    (void)elide_zero_values;
+
+    prepare_action(VectorOperation::insert);
+
+    const PetscInt petsc_i = row;
+    PetscInt *col_index_ptr;
+
+    PetscScalar const *col_value_ptr;
+    int n_columns;
+
+    // If we don't elide zeros, the pointers are already available...
+#ifndef PETSC_USE_64BIT_INDICES
+    if (elide_zero_values == false)
+      {
+        col_index_ptr = (int *)col_indices;
+        col_value_ptr = values;
+        n_columns = n_cols;
+      }
+    else
+#endif
+      {
+        // Otherwise, extract nonzero values in each row and get the
+        // respective index.
+        if (column_indices.size() < n_cols)
+          {
+            column_indices.resize(n_cols);
+            column_values.resize(n_cols);
+          }
+
+        n_columns = 0;
+        for (size_type j=0; j<n_cols; ++j)
+          {
+            const PetscScalar value = values[j];
+            AssertIsFinite(value);
+            if (value != PetscScalar())
+              {
+                column_indices[n_columns] = col_indices[j];
+                column_values[n_columns] = value;
+                n_columns++;
+              }
+          }
+        Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+        col_index_ptr = &column_indices[0];
+        col_value_ptr = &column_values[0];
+      }
+
+    const int ierr
+      = MatSetValues (matrix, 1, &petsc_i, n_columns, col_index_ptr,
+                      col_value_ptr, INSERT_VALUES);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const size_type   i,
+                   const size_type   j,
+                   const PetscScalar value)
+  {
+
+    AssertIsFinite(value);
+
+    if (value == PetscScalar())
+      {
+        // we have to check after using Insert/Add in any case to be
+        // consistent with the MPI communication model (see the comments in
+        // the documentation of TrilinosWrappers::Vector), but we can save
+        // some work if the addend is zero. However, these actions are done
+        // in case we pass on to the other function.
+        prepare_action(VectorOperation::add);
+
+        return;
+      }
+    else
+      add (i, 1, &j, &value, false);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const std::vector<size_type>  &indices,
+                   const FullMatrix<PetscScalar> &values,
+                   const bool                     elide_zero_values)
+  {
+    Assert (indices.size() == values.m(),
+            ExcDimensionMismatch(indices.size(), values.m()));
+    Assert (values.m() == values.n(), ExcNotQuadratic());
+
+    for (size_type i=0; i<indices.size(); ++i)
+      add (indices[i], indices.size(), &indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const std::vector<size_type>  &row_indices,
+                   const std::vector<size_type>  &col_indices,
+                   const FullMatrix<PetscScalar> &values,
+                   const bool                     elide_zero_values)
+  {
+    Assert (row_indices.size() == values.m(),
+            ExcDimensionMismatch(row_indices.size(), values.m()));
+    Assert (col_indices.size() == values.n(),
+            ExcDimensionMismatch(col_indices.size(), values.n()));
+
+    for (size_type i=0; i<row_indices.size(); ++i)
+      add (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const size_type                 row,
+                   const std::vector<size_type>   &col_indices,
+                   const std::vector<PetscScalar>  &values,
+                   const bool                      elide_zero_values)
+  {
+    Assert (col_indices.size() == values.size(),
+            ExcDimensionMismatch(col_indices.size(), values.size()));
+
+    add (row, col_indices.size(), &col_indices[0], &values[0],
+         elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const size_type    row,
+                   const size_type    n_cols,
+                   const size_type   *col_indices,
+                   const PetscScalar *values,
+                   const bool         elide_zero_values,
+                   const bool          /*col_indices_are_sorted*/)
+  {
+    (void)elide_zero_values;
+
+    prepare_action(VectorOperation::add);
+
+    const PetscInt petsc_i = row;
+    PetscInt *col_index_ptr;
+
+    PetscScalar const *col_value_ptr;
+    int n_columns;
+
+    // If we don't elide zeros, the pointers are already available...
+#ifndef PETSC_USE_64BIT_INDICES
+    if (elide_zero_values == false)
+      {
+        col_index_ptr = (int *)col_indices;
+        col_value_ptr = values;
+        n_columns = n_cols;
+      }
+    else
+#endif
+      {
+        // Otherwise, extract nonzero values in each row and get the
+        // respective index.
+        if (column_indices.size() < n_cols)
+          {
+            column_indices.resize(n_cols);
+            column_values.resize(n_cols);
+          }
+
+        n_columns = 0;
+        for (size_type j=0; j<n_cols; ++j)
+          {
+            const PetscScalar value = values[j];
+            AssertIsFinite(value);
+            if (value != PetscScalar())
+              {
+                column_indices[n_columns] = col_indices[j];
+                column_values[n_columns] = value;
+                n_columns++;
+              }
+          }
+        Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+        col_index_ptr = &column_indices[0];
+        col_value_ptr = &column_values[0];
+      }
+
+    const int ierr
+      = MatSetValues (matrix, 1, &petsc_i, n_columns, col_index_ptr,
+                      col_value_ptr, ADD_VALUES);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+
+
+
+  inline
+  PetscScalar
+  MatrixBase::operator() (const size_type i,
+                          const size_type j) const
+  {
+    return el(i,j);
+  }
+
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::begin() const
+  {
+    return const_iterator(this, 0, 0);
+  }
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::end() const
+  {
+    return const_iterator(this, m(), 0);
+  }
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::begin(const size_type r) const
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+    if (row_length(r) > 0)
+      return const_iterator(this, r, 0);
+    else
+      return end (r);
+  }
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::end(const size_type r) const
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+
+    // place the iterator on the first entry
+    // past this line, or at the end of the
+    // matrix
+    for (size_type i=r+1; i<m(); ++i)
+      if (row_length(i) > 0)
+        return const_iterator(this, i, 0);
+
+    // if there is no such line, then take the
+    // end iterator of the matrix
+    return end();
+  }
+
+
+
+  inline
+  bool
+  MatrixBase::in_local_range (const size_type index) const
+  {
+    PetscInt begin, end;
+
+    const int ierr = MatGetOwnershipRange (static_cast<const Mat &>(matrix),
+                                           &begin, &end);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return ((index >= static_cast<size_type>(begin)) &&
+            (index < static_cast<size_type>(end)));
+  }
+
+
+
+  inline
+  void
+  MatrixBase::prepare_action(const VectorOperation::values new_action)
+  {
+    if (last_action == VectorOperation::unknown)
+      last_action = new_action;
+
+    Assert (last_action == new_action, ExcWrongMode (last_action, new_action));
+  }
+
+
+
+  inline
+  void
+  MatrixBase::assert_is_compressed ()
+  {
+    // compress() sets the last action to none, which allows us to check if there
+    // are pending add/insert operations:
+    AssertThrow (last_action == VectorOperation::unknown,
+                 ExcMessage("Error: missing compress() call."));
+  }
+
+
+
+  inline
+  void
+  MatrixBase::prepare_add()
+  {
+    prepare_action(VectorOperation::add);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::prepare_set()
+  {
+    prepare_action(VectorOperation::insert);
+  }
+
+#endif // DOXYGEN
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_WITH_PETSC
+
+
+/*----------------------------   petsc_matrix_base.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_matrix_base.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_matrix_free.h b/include/deal.II/lac/petsc_matrix_free.h
new file mode 100644
index 0000000..1f690fe
--- /dev/null
+++ b/include/deal.II/lac/petsc_matrix_free.h
@@ -0,0 +1,311 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_matrix_free_h
+#define dealii__petsc_matrix_free_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <deal.II/lac/petsc_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace PETScWrappers
+{
+  /**
+   * Implementation of a parallel matrix class based on PETSc
+   * <tt>MatShell</tt> matrix-type. This base class implements only the
+   * interface to the PETSc matrix object, while all the functionality is
+   * contained in the matrix-vector multiplication which must be reimplemented
+   * in derived classes.
+   *
+   * This interface is an addition to the dealii::MatrixFree class to realize
+   * user-defined matrix-classes together with PETSc solvers and
+   * functionalities. See also the documentation of dealii::MatrixFree class
+   * and step-37 and step-48.
+   *
+   * Similar to other matrix classes in namespaces PETScWrappers and
+   * PETScWrappers::MPI, the MatrixFree class provides the usual matrix-vector
+   * multiplication <tt>vmult(VectorBase &dst, const VectorBase &src)</tt>
+   * which is pure virtual and must be reimplemented in derived classes.
+   * Besides the usual interface, this class has a matrix-vector
+   * multiplication <tt>vmult(Vec  &dst, const Vec  &src)</tt> taking PETSc
+   * Vec objects, which will be called by <tt>matrix_free_mult(Mat A, Vec src,
+   * Vec dst)</tt> registered as matrix-vector multiplication of this PETSc
+   * matrix object. The default implementation of the vmult function in the
+   * base class translates the given PETSc <tt>Vec*</tt> vectors into a
+   * deal.II vector, calls the usual vmult function with the usual interface
+   * and converts the result back to PETSc <tt>Vec*</tt>. This could be made
+   * much more efficient in derived classes without allocating new memory.
+   *
+   * @ingroup PETScWrappers
+   * @ingroup Matrix1
+   * @author Wolfgang Bangerth, Martin Steigemann, 2012
+   */
+  class MatrixFree : public MatrixBase
+  {
+  public:
+
+    /**
+     * Default constructor. Create an empty matrix object.
+     */
+    MatrixFree ();
+
+    /**
+     * Create a matrix object of dimensions @p m times @p n with communication
+     * happening over the provided @p communicator.
+     *
+     * For the meaning of the @p local_rows and @p local_columns parameters,
+     * see the PETScWrappers::MPI::SparseMatrix class documentation.
+     *
+     * As other PETSc matrices, also the the matrix-free object needs to have
+     * a size and to perform matrix vector multiplications efficiently in
+     * parallel also @p local_rows and @p local_columns. But in contrast to
+     * PETSc::SparseMatrix classes a PETSc matrix-free object does not need
+     * any estimation of non_zero entries and has no option
+     * <tt>is_symmetric</tt>.
+     */
+    MatrixFree (const MPI_Comm     &communicator,
+                const unsigned int  m,
+                const unsigned int  n,
+                const unsigned int  local_rows,
+                const unsigned int  local_columns);
+
+    /**
+     * Create a matrix object of dimensions @p m times @p n with communication
+     * happening over the provided @p communicator.
+     *
+     * As other PETSc matrices, also the the matrix-free object needs to have
+     * a size and to perform matrix vector multiplications efficiently in
+     * parallel also @p local_rows and @p local_columns. But in contrast to
+     * PETSc::SparseMatrix classes a PETSc matrix-free object does not need
+     * any estimation of non_zero entries and has no option
+     * <tt>is_symmetric</tt>.
+     */
+    MatrixFree (const MPI_Comm     &communicator,
+                const unsigned int  m,
+                const unsigned int  n,
+                const std::vector<unsigned int> &local_rows_per_process,
+                const std::vector<unsigned int> &local_columns_per_process,
+                const unsigned int  this_process);
+
+    /**
+     * Constructor for the serial case: Same function as
+     * <tt>MatrixFree()</tt>, see above, with <tt>communicator =
+     * MPI_COMM_WORLD</tt>.
+     */
+    MatrixFree (const unsigned int  m,
+                const unsigned int  n,
+                const unsigned int  local_rows,
+                const unsigned int  local_columns);
+
+    /**
+     * Constructor for the serial case: Same function as
+     * <tt>MatrixFree()</tt>, see above, with <tt>communicator =
+     * MPI_COMM_WORLD</tt>.
+     */
+    MatrixFree (const unsigned int  m,
+                const unsigned int  n,
+                const std::vector<unsigned int> &local_rows_per_process,
+                const std::vector<unsigned int> &local_columns_per_process,
+                const unsigned int  this_process);
+
+    /**
+     * Throw away the present matrix and generate one that has the same
+     * properties as if it were created by the constructor of this class with
+     * the same argument list as the present function.
+     */
+    void reinit (const MPI_Comm     &communicator,
+                 const unsigned int  m,
+                 const unsigned int  n,
+                 const unsigned int  local_rows,
+                 const unsigned int  local_columns);
+
+    /**
+     * Throw away the present matrix and generate one that has the same
+     * properties as if it were created by the constructor of this class with
+     * the same argument list as the present function.
+     */
+    void reinit (const MPI_Comm     &communicator,
+                 const unsigned int  m,
+                 const unsigned int  n,
+                 const std::vector<unsigned int> &local_rows_per_process,
+                 const std::vector<unsigned int> &local_columns_per_process,
+                 const unsigned int  this_process);
+
+    /**
+     * Calls the @p reinit() function above with <tt>communicator =
+     * MPI_COMM_WORLD</tt>.
+     */
+    void reinit (const unsigned int  m,
+                 const unsigned int  n,
+                 const unsigned int  local_rows,
+                 const unsigned int  local_columns);
+
+    /**
+     * Calls the @p reinit() function above with <tt>communicator =
+     * MPI_COMM_WORLD</tt>.
+     */
+    void reinit (const unsigned int  m,
+                 const unsigned int  n,
+                 const std::vector<unsigned int> &local_rows_per_process,
+                 const std::vector<unsigned int> &local_columns_per_process,
+                 const unsigned int  this_process);
+
+    /**
+     * Release all memory and return to a state just like after having called
+     * the default constructor.
+     */
+    void clear ();
+
+    /**
+     * Return a reference to the MPI communicator object in use with this
+     * matrix.
+     */
+    const MPI_Comm &get_mpi_communicator () const;
+
+    /**
+     * Matrix-vector multiplication: let <i>dst = M*src</i> with <i>M</i>
+     * being this matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix (of type PETScWrappers::MPI::SparseMatrix), then both vectors
+     * have to be distributed vectors as well. Conversely, if the matrix is
+     * not distributed, then neither of the vectors may be.
+     */
+    virtual
+    void vmult (VectorBase       &dst,
+                const VectorBase &src) const = 0;
+
+    /**
+     * Matrix-vector multiplication: let <i>dst = M<sup>T</sup>*src</i> with
+     * <i>M</i> being this matrix. This function does the same as @p vmult()
+     * but takes the transposed matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix then both vectors have to be distributed vectors as well.
+     * Conversely, if the matrix is not distributed, then neither of the
+     * vectors may be.
+     */
+    virtual
+    void Tvmult (VectorBase       &dst,
+                 const VectorBase &src) const = 0;
+
+    /**
+     * Adding Matrix-vector multiplication. Add <i>M*src</i> on <i>dst</i>
+     * with <i>M</i> being this matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix then both vectors have to be distributed vectors as well.
+     * Conversely, if the matrix is not distributed, then neither of the
+     * vectors may be.
+     */
+    virtual
+    void vmult_add (VectorBase       &dst,
+                    const VectorBase &src) const = 0;
+
+    /**
+     * Adding Matrix-vector multiplication. Add <i>M<sup>T</sup>*src</i> to
+     * <i>dst</i> with <i>M</i> being this matrix. This function does the same
+     * as @p vmult_add() but takes the transposed matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * Note that if the current object represents a parallel distributed
+     * matrix then both vectors have to be distributed vectors as well.
+     * Conversely, if the matrix is not distributed, then neither of the
+     * vectors may be.
+     */
+    virtual
+    void Tvmult_add (VectorBase       &dst,
+                     const VectorBase &src) const = 0;
+
+    /**
+     * The matrix-vector multiplication called by @p matrix_free_mult(). This
+     * function can be reimplemented in derived classes for efficiency. The
+     * default implementation copies the given vectors into
+     * PETScWrappers::*::Vector and calls <tt>vmult(VectorBase &dst, const
+     * VectorBase &src)</tt> which is purely virtual and must be reimplemented
+     * in derived classes.
+     */
+    virtual
+    void vmult (Vec  &dst, const Vec  &src) const;
+
+  private:
+
+    /**
+     * Copy of the communicator object to be used for this parallel matrix-
+     * free object.
+     */
+    MPI_Comm  communicator;
+
+    /**
+     * Callback-function registered as the matrix-vector multiplication of
+     * this matrix-free object called by PETSc routines. This function must be
+     * static and takes a PETSc matrix @p A, and vectors @p src and @p dst,
+     * where <i>dst = A*src</i>
+     *
+     * Source and destination must not be the same vector.
+     *
+     * This function calls <tt>vmult(Vec &dst, const Vec &src)</tt> which
+     * should be reimplemented in derived classes.
+     */
+    static int matrix_free_mult (Mat  A, Vec  src, Vec  dst);
+
+    /**
+     * Do the actual work for the respective @p reinit() function and the
+     * matching constructor, i.e. create a matrix object. Getting rid of the
+     * previous matrix is left to the caller.
+     */
+    void do_reinit (const unsigned int  m,
+                    const unsigned int  n,
+                    const unsigned int  local_rows,
+                    const unsigned int  local_columns);
+  };
+
+
+
+// -------- template and inline functions ----------
+
+  inline
+  const MPI_Comm &
+  MatrixFree::get_mpi_communicator () const
+  {
+    return communicator;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+
+/*----------------------------   petsc_matrix_free.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_matrix_free.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_parallel_block_sparse_matrix.h b/include/deal.II/lac/petsc_parallel_block_sparse_matrix.h
new file mode 100644
index 0000000..1f79fa6
--- /dev/null
+++ b/include/deal.II/lac/petsc_parallel_block_sparse_matrix.h
@@ -0,0 +1,358 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_parallel_block_sparse_matrix_h
+#define dealii__petsc_parallel_block_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/table.h>
+#  include <deal.II/lac/block_matrix_base.h>
+#  include <deal.II/lac/block_sparsity_pattern.h>
+#  include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#  include <deal.II/lac/petsc_parallel_block_vector.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace PETScWrappers
+{
+  namespace MPI
+  {
+
+    /*! @addtogroup PETScWrappers
+     *@{
+     */
+
+    /**
+     * Blocked sparse matrix based on the PETScWrappers::SparseMatrix class.
+     * This class implements the functions that are specific to the PETSc
+     * SparseMatrix base objects for a blocked sparse matrix, and leaves the
+     * actual work relaying most of the calls to the individual blocks to the
+     * functions implemented in the base class. See there also for a
+     * description of when this class is useful.
+     *
+     * In contrast to the deal.II-type SparseMatrix class, the PETSc matrices
+     * do not have external objects for the sparsity patterns. Thus, one does
+     * not determine the size of the individual blocks of a block matrix of
+     * this type by attaching a block sparsity pattern, but by calling
+     * reinit() to set the number of blocks and then by setting the size of
+     * each block separately. In order to fix the data structures of the block
+     * matrix, it is then necessary to let it know that we have changed the
+     * sizes of the underlying matrices. For this, one has to call the
+     * collect_sizes() function, for much the same reason as is documented
+     * with the BlockSparsityPattern class.
+     *
+     * @ingroup Matrix1 @see
+     * @ref GlossBlockLA "Block (linear algebra)"
+     * @author Wolfgang Bangerth, 2004
+     */
+    class BlockSparseMatrix : public BlockMatrixBase<SparseMatrix>
+    {
+    public:
+      /**
+       * Typedef the base class for simpler access to its own typedefs.
+       */
+      typedef BlockMatrixBase<SparseMatrix> BaseClass;
+
+      /**
+       * Typedef the type of the underlying matrix.
+       */
+      typedef BaseClass::BlockType  BlockType;
+
+      /**
+       * Import the typedefs from the base class.
+       */
+      typedef BaseClass::value_type      value_type;
+      typedef BaseClass::pointer         pointer;
+      typedef BaseClass::const_pointer   const_pointer;
+      typedef BaseClass::reference       reference;
+      typedef BaseClass::const_reference const_reference;
+      typedef BaseClass::size_type       size_type;
+      typedef BaseClass::iterator        iterator;
+      typedef BaseClass::const_iterator  const_iterator;
+
+      /**
+       * Constructor; initializes the matrix to be empty, without any
+       * structure, i.e.  the matrix is not usable at all. This constructor is
+       * therefore only useful for matrices which are members of a class. All
+       * other matrices should be created at a point in the data flow where
+       * all necessary information is available.
+       *
+       * You have to initialize the matrix before usage with
+       * reinit(BlockSparsityPattern). The number of blocks per row and column
+       * are then determined by that function.
+       */
+      BlockSparseMatrix ();
+
+      /**
+       * Destructor.
+       */
+      ~BlockSparseMatrix ();
+
+      /**
+       * Pseudo copy operator only copying empty objects. The sizes of the
+       * block matrices need to be the same.
+       */
+      BlockSparseMatrix &
+      operator = (const BlockSparseMatrix &);
+
+      /**
+       * This operator assigns a scalar to a matrix. Since this does usually
+       * not make much sense (should we set all matrix entries to this value?
+       * Only the nonzero entries of the sparsity pattern?), this operation is
+       * only allowed if the actual value to be assigned is zero. This
+       * operator only exists to allow for the obvious notation
+       * <tt>matrix=0</tt>, which sets all elements of the matrix to zero, but
+       * keep the sparsity pattern previously used.
+       */
+      BlockSparseMatrix &
+      operator = (const double d);
+
+      /**
+       * Resize the matrix, by setting the number of block rows and columns.
+       * This deletes all blocks and replaces them with uninitialized ones,
+       * i.e.  ones for which also the sizes are not yet set. You have to do
+       * that by calling the @p reinit functions of the blocks themselves. Do
+       * not forget to call collect_sizes() after that on this object.
+       *
+       * The reason that you have to set sizes of the blocks yourself is that
+       * the sizes may be varying, the maximum number of elements per row may
+       * be varying, etc. It is simpler not to reproduce the interface of the
+       * SparsityPattern class here but rather let the user call whatever
+       * function she desires.
+       */
+      void reinit (const size_type n_block_rows,
+                   const size_type n_block_columns);
+
+
+      /**
+       * Efficiently reinit the block matrix for a parallel computation. Only
+       * the BlockSparsityPattern of the Simple type can efficiently store
+       * large sparsity patterns in parallel, so this is the only supported
+       * argument. The IndexSets describe the locally owned range of DoFs for
+       * each block. Note that each IndexSet needs to be contiguous. For a
+       * symmetric structure hand in the same vector for the first two
+       * arguments.
+       */
+      void reinit(const std::vector<IndexSet> &rows,
+                  const std::vector<IndexSet> &cols,
+                  const BlockDynamicSparsityPattern &bdsp,
+                  const MPI_Comm &com);
+
+
+      /**
+       * Same as above but for a symmetric structure only.
+       */
+      void reinit(const std::vector<IndexSet> &sizes,
+                  const BlockDynamicSparsityPattern &bdsp,
+                  const MPI_Comm &com);
+
+
+
+      /**
+       * Matrix-vector multiplication: let $dst = M*src$ with $M$ being this
+       * matrix.
+       */
+      void vmult (BlockVector       &dst,
+                  const BlockVector &src) const;
+
+      /**
+       * Matrix-vector multiplication. Just like the previous function, but
+       * only applicable if the matrix has only one block column.
+       */
+      void vmult (BlockVector          &dst,
+                  const Vector &src) const;
+
+      /**
+       * Matrix-vector multiplication. Just like the previous function, but
+       * only applicable if the matrix has only one block row.
+       */
+      void vmult (Vector    &dst,
+                  const BlockVector &src) const;
+
+      /**
+       * Matrix-vector multiplication. Just like the previous function, but
+       * only applicable if the matrix has only one block.
+       */
+      void vmult (Vector       &dst,
+                  const Vector &src) const;
+
+      /**
+       * Matrix-vector multiplication: let $dst = M^T*src$ with $M$ being this
+       * matrix. This function does the same as vmult() but takes the
+       * transposed matrix.
+       */
+      void Tvmult (BlockVector       &dst,
+                   const BlockVector &src) const;
+
+      /**
+       * Matrix-vector multiplication. Just like the previous function, but
+       * only applicable if the matrix has only one block row.
+       */
+      void Tvmult (BlockVector  &dst,
+                   const Vector &src) const;
+
+      /**
+       * Matrix-vector multiplication. Just like the previous function, but
+       * only applicable if the matrix has only one block column.
+       */
+      void Tvmult (Vector    &dst,
+                   const BlockVector &src) const;
+
+      /**
+       * Matrix-vector multiplication. Just like the previous function, but
+       * only applicable if the matrix has only one block.
+       */
+      void Tvmult (Vector       &dst,
+                   const Vector &src) const;
+
+      /**
+       * This function collects the sizes of the sub-objects and stores them
+       * in internal arrays, in order to be able to relay global indices into
+       * the matrix to indices into the subobjects. You *must* call this
+       * function each time after you have changed the size of the sub-
+       * objects.
+       */
+      void collect_sizes ();
+
+      /**
+       * Return a reference to the MPI communicator object in use with this
+       * matrix.
+       */
+      const MPI_Comm &get_mpi_communicator () const;
+
+      /**
+       * Make the clear() function in the base class visible, though it is
+       * protected.
+       */
+      using BlockMatrixBase<SparseMatrix>::clear;
+    };
+
+
+
+    /*@}*/
+
+// ------------- inline and template functions -----------------
+
+    inline
+    BlockSparseMatrix &
+    BlockSparseMatrix::operator = (const double d)
+    {
+      Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+      for (size_type r=0; r<this->n_block_rows(); ++r)
+        for (size_type c=0; c<this->n_block_cols(); ++c)
+          this->block(r,c) = d;
+
+      return *this;
+    }
+
+
+
+    inline
+    void
+    BlockSparseMatrix::vmult (BlockVector       &dst,
+                              const BlockVector &src) const
+    {
+      BaseClass::vmult_block_block (dst, src);
+    }
+
+
+
+    inline
+    void
+    BlockSparseMatrix::vmult (BlockVector  &dst,
+                              const Vector &src) const
+    {
+      BaseClass::vmult_block_nonblock (dst, src);
+    }
+
+
+
+    inline
+    void
+    BlockSparseMatrix::vmult (Vector            &dst,
+                              const BlockVector &src) const
+    {
+      BaseClass::vmult_nonblock_block (dst, src);
+    }
+
+
+
+    inline
+    void
+    BlockSparseMatrix::vmult (Vector       &dst,
+                              const Vector &src) const
+    {
+      BaseClass::vmult_nonblock_nonblock (dst, src);
+    }
+
+
+    inline
+    void
+    BlockSparseMatrix::Tvmult (BlockVector       &dst,
+                               const BlockVector &src) const
+    {
+      BaseClass::Tvmult_block_block (dst, src);
+    }
+
+
+
+    inline
+    void
+    BlockSparseMatrix::Tvmult (BlockVector  &dst,
+                               const Vector &src) const
+    {
+      BaseClass::Tvmult_block_nonblock (dst, src);
+    }
+
+
+
+    inline
+    void
+    BlockSparseMatrix::Tvmult (Vector            &dst,
+                               const BlockVector &src) const
+    {
+      BaseClass::Tvmult_nonblock_block (dst, src);
+    }
+
+
+
+    inline
+    void
+    BlockSparseMatrix::Tvmult (Vector       &dst,
+                               const Vector &src) const
+    {
+      BaseClass::Tvmult_nonblock_nonblock (dst, src);
+    }
+
+  }
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif    // DEAL_II_WITH_PETSC
+
+#endif    // dealii__petsc_parallel_block_sparse_matrix_h
diff --git a/include/deal.II/lac/petsc_parallel_block_vector.h b/include/deal.II/lac/petsc_parallel_block_vector.h
new file mode 100644
index 0000000..ac8fa4e
--- /dev/null
+++ b/include/deal.II/lac/petsc_parallel_block_vector.h
@@ -0,0 +1,545 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_parallel_block_vector_h
+#define dealii__petsc_parallel_block_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <deal.II/lac/block_indices.h>
+#  include <deal.II/lac/block_vector_base.h>
+#  include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace PETScWrappers
+{
+  // forward declaration
+  class BlockVector;
+
+  namespace MPI
+  {
+
+    /*! @addtogroup PETScWrappers
+     *@{
+     */
+
+    /**
+     * An implementation of block vectors based on the parallel vector class
+     * implemented in PETScWrappers. While the base class provides for most of
+     * the interface, this class handles the actual allocation of vectors and
+     * provides functions that are specific to the underlying vector type.
+     *
+     * The model of distribution of data is such that each of the blocks is
+     * distributed across all MPI processes named in the MPI communicator.
+     * I.e. we don't just distribute the whole vector, but each component. In
+     * the constructors and reinit() functions, one therefore not only has to
+     * specify the sizes of the individual blocks, but also the number of
+     * elements of each of these blocks to be stored on the local process.
+     *
+     * @ingroup Vectors @see
+     * @ref GlossBlockLA "Block (linear algebra)"
+     * @author Wolfgang Bangerth, 2004
+     */
+    class BlockVector : public BlockVectorBase<Vector>
+    {
+    public:
+      /**
+       * Typedef the base class for simpler access to its own typedefs.
+       */
+      typedef BlockVectorBase<Vector> BaseClass;
+
+      /**
+       * Typedef the type of the underlying vector.
+       */
+      typedef BaseClass::BlockType  BlockType;
+
+      /**
+       * Import the typedefs from the base class.
+       */
+      typedef BaseClass::value_type      value_type;
+      typedef BaseClass::pointer         pointer;
+      typedef BaseClass::const_pointer   const_pointer;
+      typedef BaseClass::reference       reference;
+      typedef BaseClass::const_reference const_reference;
+      typedef BaseClass::size_type       size_type;
+      typedef BaseClass::iterator        iterator;
+      typedef BaseClass::const_iterator  const_iterator;
+
+      /**
+       * Default constructor. Generate an empty vector without any blocks.
+       */
+      BlockVector ();
+
+      /**
+       * Constructor. Generate a block vector with @p n_blocks blocks, each of
+       * which is a parallel vector across @p communicator with @p block_size
+       * elements of which @p local_size elements are stored on the present
+       * process.
+       */
+      explicit BlockVector (const unsigned int  n_blocks,
+                            const MPI_Comm     &communicator,
+                            const size_type     block_size,
+                            const size_type     local_size);
+
+      /**
+       * Copy constructor. Set all the properties of the parallel vector to
+       * those of the given argument and copy the elements.
+       */
+      BlockVector (const BlockVector  &V);
+
+      /**
+       * Constructor. Set the number of blocks to <tt>block_sizes.size()</tt>
+       * and initialize each block with <tt>block_sizes[i]</tt> zero elements.
+       * The individual blocks are distributed across the given communicator,
+       * and each store <tt>local_elements[i]</tt> elements on the present
+       * process.
+       */
+      BlockVector (const std::vector<size_type> &block_sizes,
+                   const MPI_Comm               &communicator,
+                   const std::vector<size_type> &local_elements);
+
+      /**
+       * Create a BlockVector with parallel_partitioning.size() blocks, each
+       * initialized with the given IndexSet.
+       */
+      explicit BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                            const MPI_Comm &communicator = MPI_COMM_WORLD);
+
+      /**
+       * Same as above, but include ghost elements
+       */
+      BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                   const std::vector<IndexSet> &ghost_indices,
+                   const MPI_Comm &communicator);
+
+
+
+      /**
+       * Destructor. Clears memory
+       */
+      ~BlockVector ();
+
+      /**
+       * Copy operator: fill all components of the vector that are locally
+       * stored with the given scalar value.
+       */
+      BlockVector &operator= (const value_type s);
+
+      /**
+       * Copy operator for arguments of the same type.
+       */
+      BlockVector &
+      operator= (const BlockVector &V);
+
+      /**
+       * Copy the given sequential (non-distributed) block vector into the
+       * present parallel block vector. It is assumed that they have the same
+       * size, and this operation does not change the partitioning of the
+       * parallel vectors by which its elements are distributed across several
+       * MPI processes. What this operation therefore does is to copy that
+       * chunk of the given vector @p v that corresponds to elements of the
+       * target vector that are stored locally, and copies them, for each of
+       * the individual blocks of this object. Elements that are not stored
+       * locally are not touched.
+       *
+       * This being a parallel vector, you must make sure that @em all
+       * processes call this function at the same time. It is not possible to
+       * change the local part of a parallel vector on only one process,
+       * independent of what other processes do, with this function.
+       */
+      BlockVector &
+      operator= (const PETScWrappers::BlockVector &v);
+
+      /**
+       * Reinitialize the BlockVector to contain @p n_blocks of size @p
+       * block_size, each of which stores @p local_size elements locally. The
+       * @p communicator argument denotes which MPI channel each of these
+       * blocks shall communicate.
+       *
+       * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+       * zeros.
+       */
+      void reinit (const unsigned int  n_blocks,
+                   const MPI_Comm     &communicator,
+                   const size_type     block_size,
+                   const size_type     local_size,
+                   const bool omit_zeroing_entries = false);
+
+      /**
+       * Reinitialize the BlockVector such that it contains
+       * <tt>block_sizes.size()</tt> blocks. Each block is reinitialized to
+       * dimension <tt>block_sizes[i]</tt>. Each of them stores
+       * <tt>local_sizes[i]</tt> elements on the present process.
+       *
+       * If the number of blocks is the same as before this function was
+       * called, all vectors remain the same and reinit() is called for each
+       * vector.
+       *
+       * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+       * zeros.
+       *
+       * Note that you must call this (or the other reinit() functions)
+       * function, rather than calling the reinit() functions of an individual
+       * block, to allow the block vector to update its caches of vector
+       * sizes. If you call reinit() of one of the blocks, then subsequent
+       * actions on this object may yield unpredictable results since they may
+       * be routed to the wrong block.
+       */
+      void reinit (const std::vector<size_type> &block_sizes,
+                   const MPI_Comm               &communicator,
+                   const std::vector<size_type> &local_sizes,
+                   const bool                    omit_zeroing_entries=false);
+
+      /**
+       * Change the dimension to that of the vector <tt>V</tt>. The same
+       * applies as for the other reinit() function.
+       *
+       * The elements of <tt>V</tt> are not copied, i.e.  this function is the
+       * same as calling <tt>reinit (V.size(), omit_zeroing_entries)</tt>.
+       *
+       * Note that you must call this (or the other reinit() functions)
+       * function, rather than calling the reinit() functions of an individual
+       * block, to allow the block vector to update its caches of vector
+       * sizes. If you call reinit() on one of the blocks, then subsequent
+       * actions on this object may yield unpredictable results since they may
+       * be routed to the wrong block.
+       */
+      void reinit (const BlockVector &V,
+                   const bool         omit_zeroing_entries=false);
+
+      /**
+       * Reinitialize the BlockVector using IndexSets. See the constructor
+       * with the same arguments for details.
+       */
+      void reinit (const std::vector<IndexSet> &parallel_partitioning,
+                   const MPI_Comm              &communicator);
+
+      /**
+       * Same as above but include ghost entries.
+       */
+      void reinit (const std::vector<IndexSet> &parallel_partitioning,
+                   const std::vector<IndexSet> &ghost_entries,
+                   const MPI_Comm              &communicator);
+
+      /**
+       * Change the number of blocks to <tt>num_blocks</tt>. The individual
+       * blocks will get initialized with zero size, so it is assumed that the
+       * user resizes the individual blocks by herself in an appropriate way,
+       * and calls <tt>collect_sizes</tt> afterwards.
+       */
+      void reinit (const unsigned int num_blocks);
+
+      /**
+       * Returns if this vector is a ghosted vector (and thus read-only).
+       */
+      bool has_ghost_elements() const;
+
+      /**
+       * Return a reference to the MPI communicator object in use with this
+       * vector.
+       */
+      const MPI_Comm &get_mpi_communicator () const;
+
+      /**
+       * Swap the contents of this vector and the other vector <tt>v</tt>. One
+       * could do this operation with a temporary variable and copying over
+       * the data elements, but this function is significantly more efficient
+       * since it only swaps the pointers to the data of the two vectors and
+       * therefore does not need to allocate temporary storage and move data
+       * around.
+       *
+       * Limitation: right now this function only works if both vectors have
+       * the same number of blocks. If needed, the numbers of blocks should be
+       * exchanged, too.
+       *
+       * This function is analog to the the swap() function of all C++
+       * standard containers. Also, there is a global function swap(u,v) that
+       * simply calls <tt>u.swap(v)</tt>, again in analogy to standard
+       * functions.
+       */
+      void swap (BlockVector &v);
+
+      /**
+       * Print to a stream.
+       */
+      void print (std::ostream       &out,
+                  const unsigned int  precision = 3,
+                  const bool          scientific = true,
+                  const bool          across = true) const;
+
+      /**
+       * Exception
+       */
+      DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+      /**
+       * Exception
+       */
+      DeclException0 (ExcNonMatchingBlockVectors);
+    };
+
+    /*@}*/
+
+    /*----------------------- Inline functions ----------------------------------*/
+
+
+    inline
+    BlockVector::BlockVector ()
+    {}
+
+
+
+    inline
+    BlockVector::BlockVector (const unsigned int  n_blocks,
+                              const MPI_Comm     &communicator,
+                              const size_type     block_size,
+                              const size_type     local_size)
+    {
+      reinit (n_blocks, communicator, block_size, local_size);
+    }
+
+
+
+    inline
+    BlockVector::BlockVector (const std::vector<size_type> &block_sizes,
+                              const MPI_Comm     &communicator,
+                              const std::vector<size_type> &local_elements)
+    {
+      reinit (block_sizes, communicator, local_elements, false);
+    }
+
+
+    inline
+    BlockVector::BlockVector (const BlockVector &v)
+      :
+      BlockVectorBase<Vector > ()
+    {
+      this->components.resize (v.n_blocks());
+      this->block_indices = v.block_indices;
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.components[i];
+    }
+
+    inline
+    BlockVector::BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                              const MPI_Comm              &communicator)
+    {
+      reinit(parallel_partitioning, communicator);
+    }
+
+    inline
+    BlockVector::BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                              const std::vector<IndexSet> &ghost_indices,
+                              const MPI_Comm &communicator)
+    {
+      reinit(parallel_partitioning, ghost_indices, communicator);
+    }
+
+    inline
+    BlockVector &
+    BlockVector::operator= (const value_type s)
+    {
+      BaseClass::operator= (s);
+      return *this;
+    }
+
+    inline
+    BlockVector &
+    BlockVector::operator= (const BlockVector &v)
+    {
+      // we only allow assignment to vectors with the same number of blocks
+      // or to an empty BlockVector
+      Assert (n_blocks() == 0 || n_blocks() == v.n_blocks(),
+              ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+      if (this->n_blocks() != v.n_blocks())
+        reinit(v.n_blocks());
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.block(i);
+
+      collect_sizes();
+
+      return *this;
+    }
+
+    inline
+    BlockVector::~BlockVector ()
+    {}
+
+
+    inline
+    void
+    BlockVector::reinit (const unsigned int  n_blocks,
+                         const MPI_Comm     &communicator,
+                         const size_type     block_size,
+                         const size_type     local_size,
+                         const bool omit_zeroing_entries)
+    {
+      reinit(std::vector<size_type>(n_blocks, block_size),
+             communicator,
+             std::vector<size_type>(n_blocks, local_size),
+             omit_zeroing_entries);
+    }
+
+
+
+    inline
+    void
+    BlockVector::reinit (const std::vector<size_type> &block_sizes,
+                         const MPI_Comm               &communicator,
+                         const std::vector<size_type> &local_sizes,
+                         const bool                    omit_zeroing_entries)
+    {
+      this->block_indices.reinit (block_sizes);
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->components[i].reinit(communicator, block_sizes[i],
+                                   local_sizes[i], omit_zeroing_entries);
+    }
+
+
+    inline
+    void
+    BlockVector::reinit (const BlockVector &v,
+                         const bool omit_zeroing_entries)
+    {
+      this->block_indices = v.get_block_indices();
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        block(i).reinit(v.block(i), omit_zeroing_entries);
+    }
+
+    inline
+    void
+    BlockVector::reinit (const std::vector<IndexSet> &parallel_partitioning,
+                         const MPI_Comm              &communicator)
+    {
+      std::vector<size_type> sizes(parallel_partitioning.size());
+      for (unsigned int i=0; i<parallel_partitioning.size(); ++i)
+        sizes[i] = parallel_partitioning[i].size();
+
+      this->block_indices.reinit(sizes);
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        block(i).reinit(parallel_partitioning[i], communicator);
+    }
+
+    inline
+    void
+    BlockVector::reinit (const std::vector<IndexSet> &parallel_partitioning,
+                         const std::vector<IndexSet> &ghost_entries,
+                         const MPI_Comm              &communicator)
+    {
+      std::vector<types::global_dof_index> sizes(parallel_partitioning.size());
+      for (unsigned int i=0; i<parallel_partitioning.size(); ++i)
+        sizes[i] = parallel_partitioning[i].size();
+
+      this->block_indices.reinit(sizes);
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        block(i).reinit(parallel_partitioning[i], ghost_entries[i], communicator);
+    }
+
+
+
+    inline
+    const MPI_Comm &
+    BlockVector::get_mpi_communicator () const
+    {
+      return block(0).get_mpi_communicator();
+    }
+
+    inline
+    bool
+    BlockVector::has_ghost_elements() const
+    {
+      bool ghosted=block(0).has_ghost_elements();
+#ifdef DEBUG
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        Assert(block(i).has_ghost_elements()==ghosted, ExcInternalError());
+#endif
+      return ghosted;
+    }
+
+
+    inline
+    void
+    BlockVector::swap (BlockVector &v)
+    {
+      std::swap(this->components, v.components);
+
+      ::dealii::swap (this->block_indices, v.block_indices);
+    }
+
+
+
+    inline
+    void
+    BlockVector::print (std::ostream       &out,
+                        const unsigned int  precision,
+                        const bool          scientific,
+                        const bool          across) const
+    {
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        {
+          if (across)
+            out << 'C' << i << ':';
+          else
+            out << "Component " << i << std::endl;
+          this->components[i].print(out, precision, scientific, across);
+        }
+    }
+
+
+
+    /**
+     * Global function which overloads the default implementation of the C++
+     * standard library which uses a temporary object. The function simply
+     * exchanges the data of the two vectors.
+     *
+     * @relates PETScWrappers::MPI::BlockVector
+     * @author Wolfgang Bangerth, 2000
+     */
+    inline
+    void swap (BlockVector &u,
+               BlockVector &v)
+    {
+      u.swap (v);
+    }
+
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif  // DEAL_II_WITH_PETSC
+
+#endif
diff --git a/include/deal.II/lac/petsc_parallel_sparse_matrix.h b/include/deal.II/lac/petsc_parallel_sparse_matrix.h
new file mode 100644
index 0000000..b97810e
--- /dev/null
+++ b/include/deal.II/lac/petsc_parallel_sparse_matrix.h
@@ -0,0 +1,457 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_parallel_sparse_matrix_h
+#define dealii__petsc_parallel_sparse_matrix_h
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// forward declaration
+template <typename Matrix> class BlockMatrixBase;
+
+
+namespace PETScWrappers
+{
+  namespace MPI
+  {
+
+
+
+    /**
+     * Implementation of a parallel sparse matrix class based on PETSC, with
+     * rows of the matrix distributed across an MPI network. All the
+     * functionality is actually in the base class, except for the calls to
+     * generate a parallel sparse matrix. This is possible since PETSc only
+     * works on an abstract matrix type and internally distributes to
+     * functions that do the actual work depending on the actual matrix type
+     * (much like using virtual functions). Only the functions creating a
+     * matrix of specific type differ, and are implemented in this particular
+     * class.
+     *
+     * There are a number of comments on the communication model as well as
+     * access to individual elements in the documentation to the parallel
+     * vector class. These comments apply here as well.
+     *
+     *
+     * <h3>Partitioning of matrices</h3>
+     *
+     * PETSc partitions parallel matrices so that each MPI process "owns" a
+     * certain number of rows (i.e. only this process stores the respective
+     * entries in these rows). The number of rows each process owns has to be
+     * passed to the constructors and reinit() functions via the argument @p
+     * local_rows. The individual values passed as @p local_rows on all the
+     * MPI processes of course have to add up to the global number of rows of
+     * the matrix.
+     *
+     * In addition to this, PETSc also partitions the rectangular chunk of the
+     * matrix it owns (i.e. the @p local_rows times n() elements in the
+     * matrix), so that matrix vector multiplications can be performed
+     * efficiently. This column-partitioning therefore has to match the
+     * partitioning of the vectors with which the matrix is multiplied, just
+     * as the row-partitioning has to match the partitioning of destination
+     * vectors. This partitioning is passed to the constructors and reinit()
+     * functions through the @p local_columns variable, which again has to add
+     * up to the global number of columns in the matrix. The name @p
+     * local_columns may be named inappropriately since it does not reflect
+     * that only these columns are stored locally, but it reflects the fact
+     * that these are the columns for which the elements of incoming vectors
+     * are stored locally.
+     *
+     * To make things even more complicated, PETSc needs a very good estimate
+     * of the number of elements to be stored in each row to be efficient.
+     * Otherwise it spends most of the time with allocating small chunks of
+     * memory, a process that can slow down programs to a crawl if it happens
+     * to often. As if a good estimate of the number of entries per row isn't
+     * even, it even needs to split this as follows: for each row it owns, it
+     * needs an estimate for the number of elements in this row that fall into
+     * the columns that are set apart for this process (see above), and the
+     * number of elements that are in the rest of the columns.
+     *
+     * Since in general this information is not readily available, most of the
+     * initializing functions of this class assume that all of the number of
+     * elements you give as an argument to @p n_nonzero_per_row or by @p
+     * row_lengths fall into the columns "owned" by this process, and none
+     * into the other ones. This is a fair guess for most of the rows, since
+     * in a good domain partitioning, nodes only interact with nodes that are
+     * within the same subdomain. It does not hold for nodes on the interfaces
+     * of subdomain, however, and for the rows corresponding to these nodes,
+     * PETSc will have to allocate additional memory, a costly process.
+     *
+     * The only way to avoid this is to tell PETSc where the actual entries of
+     * the matrix will be. For this, there are constructors and reinit()
+     * functions of this class that take a CompressedSparsityPattern object
+     * containing all this information. While in the general case it is
+     * sufficient if the constructors and reinit() functions know the number
+     * of local rows and columns, the functions getting a sparsity pattern
+     * also need to know the number of local rows (@p local_rows_per_process)
+     * and columns (@p local_columns_per_process) for all other processes, in
+     * order to compute which parts of the matrix are which. Thus, it is not
+     * sufficient to just count the number of degrees of freedom that belong
+     * to a particular process, but you have to have the numbers for all
+     * processes available at all processes.
+     *
+     * @ingroup PETScWrappers
+     * @ingroup Matrix1
+     * @author Wolfgang Bangerth, 2004
+     */
+    class SparseMatrix : public MatrixBase
+    {
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef types::global_dof_index size_type;
+
+      /**
+       * A structure that describes some of the traits of this class in terms
+       * of its run-time behavior. Some other classes (such as the block
+       * matrix classes) that take one or other of the matrix classes as its
+       * template parameters can tune their behavior based on the variables in
+       * this class.
+       */
+      struct Traits
+      {
+        /**
+         * It is not safe to elide additions of zeros to individual elements
+         * of this matrix. The reason is that additions to the matrix may
+         * trigger collective operations synchronising buffers on multiple
+         * processes. If an addition is elided on one process, this may lead
+         * to other processes hanging in an infinite waiting loop.
+         */
+        static const bool zero_addition_can_be_elided = false;
+      };
+
+      /**
+       * Default constructor. Create an empty matrix.
+       */
+      SparseMatrix ();
+
+      /**
+       * Destructor to free the PETSc object.
+       */
+      ~SparseMatrix ();
+
+      /**
+       * Create a sparse matrix of dimensions @p m times @p n, with an initial
+       * guess of @p n_nonzero_per_row and @p n_offdiag_nonzero_per_row
+       * nonzero elements per row (see documentation of the MatCreateAIJ PETSc
+       * function for more information about these parameters). PETSc is able
+       * to cope with the situation that more than this number of elements are
+       * later allocated for a row, but this involves copying data, and is
+       * thus expensive.
+       *
+       * For the meaning of the @p local_row and @p local_columns parameters,
+       * see the class documentation.
+       *
+       * The @p is_symmetric flag determines whether we should tell PETSc that
+       * the matrix is going to be symmetric (as indicated by the call
+       * <tt>MatSetOption(mat, MAT_SYMMETRIC)</tt>. Note that the PETSc
+       * documentation states that one cannot form an ILU decomposition of a
+       * matrix for which this flag has been set to @p true, only an ICC. The
+       * default value of this flag is @p false.
+       */
+      SparseMatrix (const MPI_Comm  &communicator,
+                    const size_type  m,
+                    const size_type  n,
+                    const size_type  local_rows,
+                    const size_type  local_columns,
+                    const size_type  n_nonzero_per_row,
+                    const bool       is_symmetric = false,
+                    const size_type  n_offdiag_nonzero_per_row = 0);
+
+      /**
+       * Initialize a rectangular matrix with @p m rows and @p n columns. The
+       * maximal number of nonzero entries for diagonal and off- diagonal
+       * blocks of each row is given by the @p row_lengths and @p
+       * offdiag_row_lengths arrays.
+       *
+       * For the meaning of the @p local_row and @p local_columns parameters,
+       * see the class documentation.
+       *
+       * Just as for the other constructors: PETSc is able to cope with the
+       * situation that more than this number of elements are later allocated
+       * for a row, but this involves copying data, and is thus expensive.
+       *
+       * The @p is_symmetric flag determines whether we should tell PETSc that
+       * the matrix is going to be symmetric (as indicated by the call
+       * <tt>MatSetOption(mat, MAT_SYMMETRIC)</tt>. Note that the PETSc
+       * documentation states that one cannot form an ILU decomposition of a
+       * matrix for which this flag has been set to @p true, only an ICC. The
+       * default value of this flag is @p false.
+       */
+      SparseMatrix (const MPI_Comm               &communicator,
+                    const size_type               m,
+                    const size_type               n,
+                    const size_type               local_rows,
+                    const size_type               local_columns,
+                    const std::vector<size_type> &row_lengths,
+                    const bool                    is_symmetric = false,
+                    const std::vector<size_type> &offdiag_row_lengths = std::vector<size_type>());
+
+      /**
+       * Initialize using the given sparsity pattern with communication
+       * happening over the provided @p communicator.
+       *
+       * For the meaning of the @p local_rows_per_process and @p
+       * local_columns_per_process parameters, see the class documentation.
+       *
+       * Note that PETSc can be very slow if you do not provide it with a good
+       * estimate of the lengths of rows. Using the present function is a very
+       * efficient way to do this, as it uses the exact number of nonzero
+       * entries for each row of the matrix by using the given sparsity
+       * pattern argument. If the @p preset_nonzero_locations flag is @p true,
+       * this function in addition not only sets the correct row sizes up
+       * front, but also pre-allocated the correct nonzero entries in the
+       * matrix.
+       *
+       * PETsc allows to later add additional nonzero entries to a matrix, by
+       * simply writing to these elements. However, this will then lead to
+       * additional memory allocations which are very inefficient and will
+       * greatly slow down your program. It is therefore significantly more
+       * efficient to get memory allocation right from the start.
+       */
+      template <typename SparsityPatternType>
+      SparseMatrix (const MPI_Comm               &communicator,
+                    const SparsityPatternType    &sparsity_pattern,
+                    const std::vector<size_type> &local_rows_per_process,
+                    const std::vector<size_type> &local_columns_per_process,
+                    const unsigned int            this_process,
+                    const bool                    preset_nonzero_locations = true);
+
+      /**
+       * This operator assigns a scalar to a matrix. Since this does usually
+       * not make much sense (should we set all matrix entries to this value?
+       * Only the nonzero entries of the sparsity pattern?), this operation is
+       * only allowed if the actual value to be assigned is zero. This
+       * operator only exists to allow for the obvious notation
+       * <tt>matrix=0</tt>, which sets all elements of the matrix to zero, but
+       * keep the sparsity pattern previously used.
+       */
+      SparseMatrix &operator = (const value_type d);
+
+
+      /**
+       * Make a copy of the PETSc matrix @p other. It is assumed that both
+       * matrices have the same SparsityPattern.
+       */
+      void copy_from(const SparseMatrix &other);
+
+      /**
+       * Throw away the present matrix and generate one that has the same
+       * properties as if it were created by the constructor of this class
+       * with the same argument list as the present function.
+       */
+      void reinit (const MPI_Comm     &communicator,
+                   const size_type m,
+                   const size_type n,
+                   const size_type local_rows,
+                   const size_type local_columns,
+                   const size_type n_nonzero_per_row,
+                   const bool      is_symmetric = false,
+                   const size_type n_offdiag_nonzero_per_row = 0);
+
+      /**
+       * Throw away the present matrix and generate one that has the same
+       * properties as if it were created by the constructor of this class
+       * with the same argument list as the present function.
+       */
+      void reinit (const MPI_Comm               &communicator,
+                   const size_type               m,
+                   const size_type               n,
+                   const size_type               local_rows,
+                   const size_type               local_columns,
+                   const std::vector<size_type> &row_lengths,
+                   const bool                    is_symmetric = false,
+                   const std::vector<size_type> &offdiag_row_lengths = std::vector<size_type>());
+
+      /**
+       * Initialize using the given sparsity pattern with communication
+       * happening over the provided @p communicator.
+       *
+       * Note that PETSc can be very slow if you do not provide it with a good
+       * estimate of the lengths of rows. Using the present function is a very
+       * efficient way to do this, as it uses the exact number of nonzero
+       * entries for each row of the matrix by using the given sparsity
+       * pattern argument. If the @p preset_nonzero_locations flag is @p true,
+       * this function in addition not only sets the correct row sizes up
+       * front, but also pre-allocated the correct nonzero entries in the
+       * matrix.
+       *
+       * PETsc allows to later add additional nonzero entries to a matrix, by
+       * simply writing to these elements. However, this will then lead to
+       * additional memory allocations which are very inefficient and will
+       * greatly slow down your program. It is therefore significantly more
+       * efficient to get memory allocation right from the start.
+       */
+      template <typename SparsityPatternType>
+      void reinit (const MPI_Comm               &communicator,
+                   const SparsityPatternType    &sparsity_pattern,
+                   const std::vector<size_type> &local_rows_per_process,
+                   const std::vector<size_type> &local_columns_per_process,
+                   const unsigned int            this_process,
+                   const bool                    preset_nonzero_locations = true);
+
+      /**
+       * Create a matrix where the size() of the IndexSets determine the
+       * global number of rows and columns and the entries of the IndexSet
+       * give the rows and columns for the calling processor. Note that only
+       * contiguous IndexSets are supported.
+       */
+      template <typename SparsityPatternType>
+      void reinit (const IndexSet            &local_rows,
+                   const IndexSet            &local_columns,
+                   const SparsityPatternType &sparsity_pattern,
+                   const MPI_Comm            &communicator);
+
+      /**
+       * Initialize this matrix to have the same structure as @p other. This
+       * will not copy the values of the other matrix, but you can use
+       * copy_from() for this.
+       */
+      void reinit (const SparseMatrix &other);
+
+      /**
+       * Return a reference to the MPI communicator object in use with this
+       * matrix.
+       */
+      virtual const MPI_Comm &get_mpi_communicator () const;
+
+      /**
+       * @addtogroup Exceptions
+       * @{
+       */
+      /**
+       * Exception
+       */
+      DeclException2 (ExcLocalRowsTooLarge,
+                      int, int,
+                      << "The number of local rows " << arg1
+                      << " must be larger than the total number of rows " << arg2);
+      //@}
+
+      /**
+       * Return the square of the norm of the vector $v$ with respect to the
+       * norm induced by this matrix, i.e. $\left(v^\ast,Mv\right)$. This is
+       * useful, e.g. in the finite element context, where the $L_2$ norm of a
+       * function equals the matrix norm with respect to the mass matrix of
+       * the vector representing the nodal values of the finite element
+       * function.
+       *
+       * Obviously, the matrix needs to be quadratic for this operation.
+       *
+       * The implementation of this function is not as efficient as the one in
+       * the @p MatrixBase class used in deal.II (i.e. the original one, not
+       * the PETSc wrapper class) since PETSc doesn't support this operation
+       * and needs a temporary vector.
+       */
+      PetscScalar matrix_norm_square (const Vector &v) const;
+
+      /**
+       * Compute the matrix scalar product $\left(u^\ast,Mv\right)$.
+       *
+       * The implementation of this function is not as efficient as the one in
+       * the @p MatrixBase class used in deal.II (i.e. the original one, not
+       * the PETSc wrapper class) since PETSc doesn't support this operation
+       * and needs a temporary vector.
+       */
+      PetscScalar matrix_scalar_product (const Vector &u,
+                                         const Vector &v) const;
+
+    private:
+
+      /**
+       * Copy of the communicator object to be used for this parallel vector.
+       */
+      MPI_Comm communicator;
+
+      /**
+       * Do the actual work for the respective reinit() function and the
+       * matching constructor, i.e. create a matrix. Getting rid of the
+       * previous matrix is left to the caller.
+       */
+      void do_reinit (const size_type m,
+                      const size_type n,
+                      const size_type local_rows,
+                      const size_type local_columns,
+                      const size_type n_nonzero_per_row,
+                      const bool      is_symmetric = false,
+                      const size_type n_offdiag_nonzero_per_row = 0);
+
+      /**
+       * Same as previous function.
+       */
+      void do_reinit (const size_type               m,
+                      const size_type               n,
+                      const size_type               local_rows,
+                      const size_type               local_columns,
+                      const std::vector<size_type> &row_lengths,
+                      const bool                    is_symmetric = false,
+                      const std::vector<size_type> &offdiag_row_lengths = std::vector<size_type>());
+
+      /**
+       * Same as previous functions.
+       */
+      template <typename SparsityPatternType>
+      void do_reinit (const SparsityPatternType    &sparsity_pattern,
+                      const std::vector<size_type> &local_rows_per_process,
+                      const std::vector<size_type> &local_columns_per_process,
+                      const unsigned int            this_process,
+                      const bool                    preset_nonzero_locations);
+
+      /**
+       * Same as previous functions.
+       */
+      template <typename SparsityPatternType>
+      void do_reinit (const IndexSet            &local_rows,
+                      const IndexSet            &local_columns,
+                      const SparsityPatternType &sparsity_pattern);
+
+      /**
+       * To allow calling protected prepare_add() and prepare_set().
+       */
+      friend class BlockMatrixBase<SparseMatrix>;
+    };
+
+
+
+// -------- template and inline functions ----------
+
+    inline
+    const MPI_Comm &
+    SparseMatrix::get_mpi_communicator () const
+    {
+      return communicator;
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_parallel_sparse_matrix.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_parallel_sparse_matrix.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_parallel_vector.h b/include/deal.II/lac/petsc_parallel_vector.h
new file mode 100644
index 0000000..6b66846
--- /dev/null
+++ b/include/deal.II/lac/petsc_parallel_vector.h
@@ -0,0 +1,600 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_parallel_vector_h
+#define dealii__petsc_parallel_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/vector.h>
+#  include <deal.II/lac/petsc_vector_base.h>
+#  include <deal.II/base/index_set.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+// forward declaration
+template <typename> class Vector;
+class IndexSet;
+
+
+/*! @addtogroup PETScWrappers
+ *@{
+ */
+namespace PETScWrappers
+{
+  /**
+   * Namespace for PETSc classes that work in parallel over MPI, such as
+   * distributed vectors and matrices.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  namespace MPI
+  {
+
+    /**
+     * Implementation of a parallel vector class based on PETSC and using MPI
+     * communication to synchronise distributed operations. All the
+     * functionality is actually in the base class, except for the calls to
+     * generate a parallel vector. This is possible since PETSc only works on
+     * an abstract vector type and internally distributes to functions that do
+     * the actual work depending on the actual vector type (much like using
+     * virtual functions). Only the functions creating a vector of specific
+     * type differ, and are implemented in this particular class.
+     *
+     *
+     * <h3>Parallel communication model</h3>
+     *
+     * The parallel functionality of PETSc is built on top of the Message
+     * Passing Interface (MPI). MPI's communication model is built on
+     * collective communications: if one process wants something from another,
+     * that other process has to be willing to accept this communication. A
+     * process cannot query data from another process by calling a remote
+     * function, without that other process expecting such a transaction. The
+     * consequence is that most of the operations in the base class of this
+     * class have to be called collectively. For example, if you want to
+     * compute the l2 norm of a parallel vector, @em all processes across
+     * which this vector is shared have to call the @p l2_norm function. If
+     * you don't do this, but instead only call the @p l2_norm function on one
+     * process, then the following happens: This one process will call one of
+     * the collective MPI functions and wait for all the other processes to
+     * join in on this. Since the other processes don't call this function,
+     * you will either get a time-out on the first process, or, worse, by the
+     * time the next a call to a PETSc function generates an MPI message on
+     * the other processes, you will get a cryptic message that only a subset
+     * of processes attempted a communication. These bugs can be very hard to
+     * figure out, unless you are well-acquainted with the communication model
+     * of MPI, and know which functions may generate MPI messages.
+     *
+     * One particular case, where an MPI message may be generated unexpectedly
+     * is discussed below.
+     *
+     *
+     * <h3>Accessing individual elements of a vector</h3>
+     *
+     * PETSc does allow read access to individual elements of a vector, but in
+     * the distributed case only to elements that are stored locally. We
+     * implement this through calls like <tt>d=vec(i)</tt>. However, if you
+     * access an element outside the locally stored range, an exception is
+     * generated.
+     *
+     * In contrast to read access, PETSc (and the respective deal.II wrapper
+     * classes) allow to write (or add) to individual elements of vectors,
+     * even if they are stored on a different process. You can do this
+     * writing, for example, <tt>vec(i)=d</tt> or <tt>vec(i)+=d</tt>, or
+     * similar operations. There is one catch, however, that may lead to very
+     * confusing error messages: PETSc requires application programs to call
+     * the compress() function when they switch from adding, to elements to
+     * writing to elements. The reasoning is that all processes might
+     * accumulate addition operations to elements, even if multiple processes
+     * write to the same elements. By the time we call compress() the next
+     * time, all these additions are executed. However, if one process adds to
+     * an element, and another overwrites to it, the order of execution would
+     * yield non-deterministic behavior if we don't make sure that a
+     * synchronisation with compress() happens in between.
+     *
+     * In order to make sure these calls to compress() happen at the
+     * appropriate time, the deal.II wrappers keep a state variable that store
+     * which is the presently allowed operation: additions or writes. If it
+     * encounters an operation of the opposite kind, it calls compress() and
+     * flips the state. This can sometimes lead to very confusing behavior, in
+     * code that may for example look like this:
+     * @code
+     *   PETScWrappers::MPI::Vector vector;
+     *   ...
+     *                   // do some write operations on the vector
+     *   for (unsigned int i=0; i<vector.size(); ++i)
+     *     vector(i) = i;
+     *
+     *                   // do some additions to vector elements, but
+     *                   // only for some elements
+     *   for (unsigned int i=0; i<vector.size(); ++i)
+     *     if (some_condition(i) == true)
+     *       vector(i) += 1;
+     *
+     *                   // do another collective operation
+     *   const double norm = vector.l2_norm();
+     * @endcode
+     *
+     * This code can run into trouble: by the time we see the first addition
+     * operation, we need to flush the overwrite buffers for the vector, and
+     * the deal.II library will do so by calling compress(). However, it will
+     * only do so for all processes that actually do an addition -- if the
+     * condition is never true for one of the processes, then this one will
+     * not get to the actual compress() call, whereas all the other ones do.
+     * This gets us into trouble, since all the other processes hang in the
+     * call to flush the write buffers, while the one other process advances
+     * to the call to compute the l2 norm. At this time, you will get an error
+     * that some operation was attempted by only a subset of processes. This
+     * behavior may seem surprising, unless you know that write/addition
+     * operations on single elements may trigger this behavior.
+     *
+     * The problem described here may be avoided by placing additional calls
+     * to compress(), or making sure that all processes do the same type of
+     * operations at the same time, for example by placing zero additions if
+     * necessary.
+     *
+     * @see
+     * @ref GlossGhostedVector "vectors with ghost elements"
+     *
+     * @ingroup PETScWrappers
+     * @ingroup Vectors
+     * @author Wolfgang Bangerth, 2004
+     */
+    class Vector : public VectorBase
+    {
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef types::global_dof_index size_type;
+
+      /**
+       * A variable that indicates whether this vector supports distributed
+       * data storage. If true, then this vector also needs an appropriate
+       * compress() function that allows communicating recent set or add
+       * operations to individual elements to be communicated to other
+       * processors.
+       *
+       * For the current class, the variable equals true, since it does
+       * support parallel data storage.
+       */
+      static const bool supports_distributed_data = true;
+
+      /**
+       * Default constructor. Initialize the vector as empty.
+       */
+      Vector ();
+
+      /**
+       * Constructor. Set dimension to @p n and initialize all elements with
+       * zero.
+       *
+       * @arg local_size denotes the size of the chunk that shall be stored on
+       * the present process.
+       *
+       * @arg communicator denotes the MPI communicator over which the
+       * different parts of the vector shall communicate
+       *
+       * The constructor is made explicit to avoid accidents like this:
+       * <tt>v=0;</tt>. Presumably, the user wants to set every element of the
+       * vector to zero, but instead, what happens is this call:
+       * <tt>v=Vector@<number@>(0);</tt>, i.e. the vector is replaced by one
+       * of length zero.
+       */
+      explicit Vector (const MPI_Comm  &communicator,
+                       const size_type  n,
+                       const size_type  local_size);
+
+
+      /**
+       * Copy-constructor from deal.II vectors. Sets the dimension to that of
+       * the given vector, and copies all elements.
+       *
+       * @arg local_size denotes the size of the chunk that shall be stored on
+       * the present process.
+       *
+       * @arg communicator denotes the MPI communicator over which the
+       * different parts of the vector shall communicate
+       */
+      template <typename Number>
+      explicit Vector (const MPI_Comm               &communicator,
+                       const dealii::Vector<Number> &v,
+                       const size_type               local_size);
+
+
+      /**
+       * Copy-constructor the values from a PETSc wrapper vector class.
+       *
+       * @arg local_size denotes the size of the chunk that shall be stored on
+       * the present process.
+       *
+       * @arg communicator denotes the MPI communicator over which the
+       * different parts of the vector shall communicate
+       */
+      explicit Vector (const MPI_Comm     &communicator,
+                       const VectorBase   &v,
+                       const size_type     local_size);
+
+      /**
+       * Constructs a new parallel ghosted PETSc vector from an IndexSet. Note
+       * that @p local must be contiguous and the global size of the vector is
+       * determined by local.size(). The global indices in @p ghost are
+       * supplied as ghost indices that can also be read locally.
+       *
+       * Note that the @p ghost IndexSet may be empty and that any indices
+       * already contained in @p local are ignored during construction. That
+       * way, the ghost parameter can equal the set of locally relevant
+       * degrees of freedom, see step-32.
+       *
+       * @note This operation always creates a ghosted vector.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      Vector (const IndexSet &local,
+              const IndexSet &ghost,
+              const MPI_Comm &communicator);
+
+      /**
+       * Constructs a new parallel PETSc vector from an IndexSet. This creates
+       * a non ghosted vector.
+       */
+      explicit Vector (const IndexSet &local,
+                       const MPI_Comm &communicator);
+
+      /**
+       * Release all memory and return to a state just like after having
+       * called the default constructor.
+       */
+      void clear ();
+
+      /**
+       * Copy the given vector. Resize the present vector if necessary. Also
+       * take over the MPI communicator of @p v.
+       */
+      Vector &operator= (const Vector &v);
+
+      /**
+       * Copy the given sequential (non-distributed) vector into the present
+       * parallel vector. It is assumed that they have the same size, and this
+       * operation does not change the partitioning of the parallel vector by
+       * which its elements are distributed across several MPI processes. What
+       * this operation therefore does is to copy that chunk of the given
+       * vector @p v that corresponds to elements of the target vector that
+       * are stored locally, and copies them. Elements that are not stored
+       * locally are not touched.
+       *
+       * This being a parallel vector, you must make sure that @em all
+       * processes call this function at the same time. It is not possible to
+       * change the local part of a parallel vector on only one process,
+       * independent of what other processes do, with this function.
+       */
+      Vector &operator= (const PETScWrappers::Vector &v);
+
+      /**
+       * Set all components of the vector to the given number @p s. Simply
+       * pass this down to the base class, but we still need to declare this
+       * function to make the example given in the discussion about making the
+       * constructor explicit work.
+       */
+      Vector &operator= (const PetscScalar s);
+
+      /**
+       * Copy the values of a deal.II vector (as opposed to those of the PETSc
+       * vector wrapper class) into this object.
+       *
+       * Contrary to the case of sequential vectors, this operators requires
+       * that the present vector already has the correct size, since we need
+       * to have a partition and a communicator present which we otherwise
+       * can't get from the source vector.
+       */
+      template <typename number>
+      Vector &operator= (const dealii::Vector<number> &v);
+
+      /**
+       * Change the dimension of the vector to @p N. It is unspecified how
+       * resizing the vector affects the memory allocation of this object;
+       * i.e., it is not guaranteed that resizing it to a smaller size
+       * actually also reduces memory consumption, or if for efficiency the
+       * same amount of memory is used
+       *
+       * @p local_size denotes how many of the @p N values shall be stored
+       * locally on the present process. for less data.
+       *
+       * @p communicator denotes the MPI communicator henceforth to be used
+       * for this vector.
+       *
+       * If @p omit_zeroing_entries is false, the vector is filled by zeros.
+       * Otherwise, the elements are left an unspecified state.
+       */
+      void reinit (const MPI_Comm  &communicator,
+                   const size_type  N,
+                   const size_type  local_size,
+                   const bool       omit_zeroing_entries = false);
+
+      /**
+       * Change the dimension to that of the vector @p v, and also take over
+       * the partitioning into local sizes as well as the MPI communicator.
+       * The same applies as for the other @p reinit function.
+       *
+       * The elements of @p v are not copied, i.e. this function is the same
+       * as calling <tt>reinit(v.size(), v.local_size(),
+       * omit_zeroing_entries)</tt>.
+       */
+      void reinit (const Vector &v,
+                   const bool    omit_zeroing_entries = false);
+
+      /**
+       * Reinit as a vector without ghost elements. See the constructor with
+       * same signature for more details.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      void reinit (const IndexSet &local,
+                   const IndexSet &ghost,
+                   const MPI_Comm &communicator);
+
+      /**
+       * Reinit as a vector without ghost elements. See constructor with same
+       * signature for more details.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      void reinit (const IndexSet &local,
+                   const MPI_Comm &communicator);
+
+      /**
+       * Return a reference to the MPI communicator object in use with this
+       * vector.
+       */
+      const MPI_Comm &get_mpi_communicator () const;
+
+      /**
+       * Print to a stream. @p precision denotes the desired precision with
+       * which values shall be printed, @p scientific whether scientific
+       * notation shall be used. If @p across is @p true then the vector is
+       * printed in a line, while if @p false then the elements are printed on
+       * a separate line each.
+       *
+       * @note This function overloads the one in the base class to ensure
+       * that the right thing happens for parallel vectors that are
+       * distributed across processors.
+       */
+      void print (std::ostream       &out,
+                  const unsigned int  precision  = 3,
+                  const bool          scientific = true,
+                  const bool          across     = true) const;
+
+      /**
+       * @copydoc PETScWrappers::VectorBase::all_zero()
+       *
+       * @note This function overloads the one in the base class to make this
+       * a collective operation.
+       */
+      bool all_zero () const;
+
+    protected:
+      /**
+       * Create a vector of length @p n. For this class, we create a parallel
+       * vector. @p n denotes the total size of the vector to be created. @p
+       * local_size denotes how many of these elements shall be stored
+       * locally.
+       */
+      virtual void create_vector (const size_type n,
+                                  const size_type local_size);
+
+
+
+      /**
+       * Create a vector of global length @p n, local size @p local_size and
+       * with the specified ghost indices. Note that you need to call
+       * update_ghost_values() before accessing those.
+       */
+      virtual void create_vector (const size_type n,
+                                  const size_type local_size,
+                                  const IndexSet &ghostnodes);
+
+
+    private:
+      /**
+       * Copy of the communicator object to be used for this parallel vector.
+       */
+      MPI_Comm communicator;
+    };
+
+
+// ------------------ template and inline functions -------------
+
+
+    /**
+     * Global function @p swap which overloads the default implementation of
+     * the C++ standard library which uses a temporary object. The function
+     * simply exchanges the data of the two vectors.
+     *
+     * @relates PETScWrappers::MPI::Vector
+     * @author Wolfgang Bangerth, 2004
+     */
+    inline
+    void swap (Vector &u, Vector &v)
+    {
+      u.swap (v);
+    }
+
+
+#ifndef DOXYGEN
+
+    template <typename number>
+    Vector::Vector (const MPI_Comm         &communicator,
+                    const dealii::Vector<number> &v,
+                    const size_type         local_size)
+      :
+      communicator (communicator)
+    {
+      Vector::create_vector (v.size(), local_size);
+
+      *this = v;
+    }
+
+
+
+    inline
+    Vector &
+    Vector::operator= (const PetscScalar s)
+    {
+      VectorBase::operator= (s);
+
+      return *this;
+    }
+
+
+
+    inline
+    Vector &
+    Vector::operator= (const Vector &v)
+    {
+      // make sure left- and right-hand side of the assignment are compress()'ed:
+      Assert(v.last_action == VectorOperation::unknown,
+             internal::VectorReference::ExcWrongMode (VectorOperation::unknown,
+                                                      v.last_action));
+      Assert(last_action == VectorOperation::unknown,
+             internal::VectorReference::ExcWrongMode (VectorOperation::unknown,
+                                                      last_action));
+
+
+      if (v.size()==0)
+        {
+          // this happens if v has not been initialized to something useful:
+          // Vector x,v;x=v;
+          // we skip the code below and create a simple serial vector of
+          // length 0
+
+          int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+          ierr = VecDestroy (vector);
+#else
+          ierr = VecDestroy (&vector);
+#endif
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+          const int n = 0;
+          ierr = VecCreateSeq (PETSC_COMM_SELF, n, &vector);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+          ghosted = false;
+          ghost_indices.clear();
+          return *this;
+        }
+
+      // if the vectors have different sizes,
+      // then first resize the present one
+      if (size() != v.size())
+        {
+          if (v.has_ghost_elements())
+            reinit( v.locally_owned_elements(), v.ghost_indices, v.communicator);
+          else
+            reinit (v.communicator, v.size(), v.local_size(), true);
+        }
+
+      const int ierr = VecCopy (v.vector, vector);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      if (has_ghost_elements())
+        {
+          int ierr;
+
+          ierr = VecGhostUpdateBegin(vector, INSERT_VALUES, SCATTER_FORWARD);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+          ierr = VecGhostUpdateEnd(vector, INSERT_VALUES, SCATTER_FORWARD);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+        }
+      return *this;
+    }
+
+
+
+    template <typename number>
+    inline
+    Vector &
+    Vector::operator= (const dealii::Vector<number> &v)
+    {
+      Assert (size() == v.size(),
+              ExcDimensionMismatch (size(), v.size()));
+
+      // FIXME: the following isn't necessarily fast, but this is due to
+      // the fact that PETSc doesn't offer an inlined access operator.
+      //
+      // if someone wants to contribute some code: to make this code
+      // faster, one could either first convert all values to PetscScalar,
+      // and then set them all at once using VecSetValues. This has the
+      // drawback that it could take quite some memory, if the vector is
+      // large, and it would in addition allocate memory on the heap, which
+      // is expensive. an alternative would be to split the vector into
+      // chunks of, say, 128 elements, convert a chunk at a time and set it
+      // in the output vector using VecSetValues. since 128 elements is
+      // small enough, this could easily be allocated on the stack (as a
+      // local variable) which would make the whole thing much more
+      // efficient.
+      //
+      // a second way to make things faster is for the special case that
+      // number==PetscScalar. we could then declare a specialization of
+      // this template, and omit the conversion. the problem with this is
+      // that the best we can do is to use VecSetValues, but this isn't
+      // very efficient either: it wants to see an array of indices, which
+      // in this case a) again takes up a whole lot of memory on the heap,
+      // and b) is totally dumb since its content would simply be the
+      // sequence 0,1,2,3,...,n. the best of all worlds would probably be a
+      // function in Petsc that would take a pointer to an array of
+      // PetscScalar values and simply copy n elements verbatim into the
+      // vector...
+      for (size_type i=0; i<v.size(); ++i)
+        (*this)(i) = v(i);
+
+      compress (::dealii::VectorOperation::insert);
+
+      return *this;
+    }
+
+
+
+    inline
+    const MPI_Comm &
+    Vector::get_mpi_communicator () const
+    {
+      return communicator;
+    }
+
+#endif // DOXYGEN
+  }
+}
+
+/**@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_parallel_vector.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_parallel_vector.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_precondition.h b/include/deal.II/lac/petsc_precondition.h
new file mode 100644
index 0000000..957f539
--- /dev/null
+++ b/include/deal.II/lac/petsc_precondition.h
@@ -0,0 +1,950 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_precondition_h
+#define dealii__petsc_precondition_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/exceptions.h>
+#  include <petscpc.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace PETScWrappers
+{
+  // forward declarations
+  class MatrixBase;
+  class VectorBase;
+  class SolverBase;
+
+
+  /**
+   * Base class for preconditioner classes using the PETSc functionality. The
+   * classes in this hierarchy don't do a whole lot, except for providing a
+   * function that sets the preconditioner and certain parameters on the
+   * preconditioning context of the solver. These classes are basically here
+   * only to allow a similar interface as already used for the deal.II solver
+   * and preconditioner classes.
+   *
+   * Note that derived classes only provide interfaces to the relevant
+   * functionality of PETSc. PETSc does not implement all preconditioners for
+   * all matrix types. In particular, some preconditioners are not going to
+   * work for parallel jobs, such as for example the ILU preconditioner.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionerBase
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    PreconditionerBase ();
+
+    /**
+     * Destructor.
+     */
+    virtual ~PreconditionerBase ();
+
+    /**
+     * Apply the preconditioner once to the given src vector.
+     */
+    void vmult (VectorBase       &dst,
+                const VectorBase &src) const;
+
+
+    /**
+     * Gives access to the underlying PETSc object.
+     */
+    const PC &get_pc () const;
+
+  protected:
+    /**
+     * the PETSc preconditioner object
+     */
+    PC pc;
+
+    /**
+     * A pointer to the matrix that acts as a preconditioner.
+     */
+    Mat matrix;
+
+    /**
+     * Internal function to create the PETSc preconditioner object. Fails if
+     * called twice.
+     */
+    void create_pc ();
+
+    /**
+     * Conversion operator to get a representation of the matrix that
+     * represents this preconditioner. We use this inside the actual solver,
+     * where we need to pass this matrix to the PETSc solvers.
+     */
+    operator Mat () const;
+
+    /**
+     * Make the solver class a friend, since it needs to call the conversion
+     * operator.
+     */
+    friend class SolverBase;
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc Jacobi
+   * preconditioner.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionJacobi : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionJacobi ();
+
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionJacobi (const MatrixBase     &matrix,
+                        const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Same as above but without setting a matrix to form the preconditioner.
+     * Intended to be used with SLEPc objects.
+     */
+    PreconditionJacobi (const MPI_Comm communicator,
+                        const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+
+    /**
+     * Initializes the preconditioner object without knowing a particular
+     * matrix. This function sets up appropriate parameters to the underlying
+     * PETSc object after it has been created.
+     */
+    void initialize();
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc Block Jacobi
+   * preconditioner. The blocking structure of the matrix is determined by the
+   * association of degrees of freedom to the individual processors in an MPI-
+   * parallel job. If you use this preconditioner on a sequential job (or an
+   * MPI job with only one process) then the entire matrix is the only block.
+   *
+   * By default, PETSc uses an ILU(0) decomposition of each diagonal block of
+   * the matrix for preconditioning. This can be changed, as is explained in
+   * the relevant section of the PETSc manual, but is not implemented here.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionBlockJacobi : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionBlockJacobi ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionBlockJacobi (const MatrixBase     &matrix,
+                             const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Same as above but without setting a matrix to form the preconditioner.
+     * Intended to be used with SLEPc objects.
+     */
+    PreconditionBlockJacobi (const MPI_Comm communicator,
+                             const AdditionalData &additional_data = AdditionalData());
+
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+
+    /**
+     * Initializes the preconditioner object without knowing a particular
+     * matrix. This function sets up appropriate parameters to the underlying
+     * PETSc object after it has been created.
+     */
+    void initialize();
+
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc SOR
+   * preconditioner.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionSOR : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the damping parameter to one.
+       */
+      AdditionalData (const double omega = 1);
+
+      /**
+       * Relaxation parameter.
+       */
+      double omega;
+    };
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionSOR ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionSOR (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc SSOR
+   * preconditioner.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionSSOR : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the damping parameter to one.
+       */
+      AdditionalData (const double omega = 1);
+
+      /**
+       * Relaxation parameter.
+       */
+      double omega;
+    };
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionSSOR ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionSSOR (const MatrixBase     &matrix,
+                      const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc Eisenstat
+   * preconditioner.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionEisenstat : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the damping parameter to one.
+       */
+      AdditionalData (const double omega = 1);
+
+      /**
+       * Relaxation parameter.
+       */
+      double omega;
+    };
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionEisenstat ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionEisenstat (const MatrixBase     &matrix,
+                           const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc Incomplete
+   * Cholesky preconditioner.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionICC : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the fill-in parameter to zero.
+       */
+      AdditionalData (const unsigned int levels = 0);
+
+      /**
+       * Fill-in parameter.
+       */
+      unsigned int levels;
+    };
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionICC ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionICC (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc ILU
+   * preconditioner.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, Timo Heister, 2004, 2011
+   */
+  class PreconditionILU : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the fill-in parameter to zero.
+       */
+      AdditionalData (const unsigned int levels = 0);
+
+      /**
+       * Fill-in parameter.
+       */
+      unsigned int levels;
+    };
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionILU ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionILU (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the PETSc LU preconditioner.
+   * The LU decomposition is only implemented for single processor machines.
+   * It should provide a convenient interface to another direct solver.
+   *
+   * See the comment in the base class
+   * @ref PreconditionerBase
+   * for when this preconditioner may or may not work.
+   *
+   * @ingroup PETScWrappers
+   * @author Oliver Kayser-Herold, 2004
+   */
+  class PreconditionLU : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. (Default values taken from function PCCreate_LU of the
+       * PetSC lib.)
+       */
+      AdditionalData (const double pivoting = 1.e-6,
+                      const double zero_pivot = 1.e-12,
+                      const double damping = 0.0);
+
+      /**
+       * Determines, when Pivoting is done during LU decomposition. 0.0
+       * indicates no pivoting, and 1.0 complete pivoting. Confer PetSC manual
+       * for more details.
+       */
+      double pivoting;
+
+      /**
+       * Size at which smaller pivots are declared to be zero. Confer PetSC
+       * manual for more details.
+       */
+      double zero_pivot;
+
+      /**
+       * This quantity is added to the diagonal of the matrix during
+       * factorisation.
+       */
+      double damping;
+    };
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionLU ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionLU (const MatrixBase     &matrix,
+                    const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+
+
+
+
+  /**
+   * A class that implements the interface to use the BoomerAMG algebraic
+   * multigrid preconditioner from the HYPRE suite. Note that PETSc has to be
+   * configured with HYPRE (e.g. with --download-hypre=1).
+   *
+   * The preconditioner does support parallel distributed computations. See
+   * step-40 for an example.
+   *
+   * @ingroup PETScWrappers
+   * @author Timo Heister, 2010
+   */
+  class PreconditionBoomerAMG : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. Note that BoomerAMG offers a lot more options to set
+       * than what is exposed here.
+       */
+      AdditionalData (
+        const bool symmetric_operator = false,
+        const double strong_threshold = 0.25,
+        const double max_row_sum = 0.9,
+        const unsigned int aggressive_coarsening_num_levels = 0,
+        const bool output_details = false
+      );
+
+      /**
+       * Set this flag to true if you have a symmetric system matrix and you
+       * want to use a solver which assumes a symmetric preconditioner like
+       * CG. The relaxation is done with SSOR/Jacobi when set to true and with
+       * SOR/Jacobi otherwise.
+       */
+      bool symmetric_operator;
+
+      /**
+       * Threshold of when nodes are considered strongly connected. See
+       * HYPRE_BoomerAMGSetStrongThreshold(). Recommended values are 0.25 for
+       * 2d and 0.5 for 3d problems, but it is problem dependent.
+       */
+      double strong_threshold;
+
+      /**
+       * If set to a value smaller than 1.0 then diagonally dominant parts of
+       * the matrix are treated as having no strongly connected nodes. If the
+       * row sum weighted by the diagonal entry is bigger than the given
+       * value, it is considered diagonally dominant. This feature is turned
+       * of by setting the value to 1.0. This is the default as some matrices
+       * can result in having only diagonally dominant entries and thus no
+       * multigrid levels are constructed. The default in BoomerAMG for this
+       * is 0.9. When you try this, check for a reasonable number of levels
+       * created.
+       */
+      double max_row_sum;
+
+      /**
+       * Number of levels of aggressive coarsening. Increasing this value
+       * reduces the construction time and memory requirements but may
+       * decrease effectiveness.
+       */
+      unsigned int aggressive_coarsening_num_levels;
+
+      /**
+       * Setting this flag to true produces debug output from HYPRE, when the
+       * preconditioner is constructed.
+       */
+      bool output_details;
+    };
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionBoomerAMG ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionBoomerAMG (const MatrixBase     &matrix,
+                           const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Same as above but without setting a matrix to form the preconditioner.
+     * Intended to be used with SLEPc objects.
+     */
+    PreconditionBoomerAMG (const MPI_Comm communicator,
+                           const AdditionalData &additional_data = AdditionalData());
+
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+
+    /**
+     * Initializes the preconditioner object without knowing a particular
+     * matrix. This function sets up appropriate parameters to the underlying
+     * PETSc object after it has been created.
+     */
+    void initialize();
+
+  };
+
+
+
+  /**
+   * A class that implements the interface to use the ParaSails sparse
+   * approximate inverse preconditioner from the HYPRE suite. Note that PETSc
+   * has to be configured with HYPRE (e.g. with --download-hypre=1).
+   *
+   * ParaSails uses least-squares minimization to compute a sparse approximate
+   * inverse. The sparsity pattern used is the pattern of a power of a
+   * sparsified matrix. ParaSails also uses a post-filtering technique to
+   * reduce the cost of applying the preconditioner.
+   *
+   * ParaSails solves symmetric positive definite (SPD) problems using a
+   * factorized SPD preconditioner and can also solve general (nonsymmetric
+   * and/or indefinite) problems with a nonfactorized preconditioner. The
+   * problem type has to be set in @p AdditionalData.
+   *
+   * The preconditioner does support parallel distributed computations.
+   *
+   * @ingroup PETScWrappers
+   * @author Martin Steigemann, 2012
+   */
+  class PreconditionParaSails : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor.
+       */
+      AdditionalData (
+        const unsigned int symmetric = 1,
+        const unsigned int n_levels = 1,
+        const double threshold = 0.1,
+        const double filter = 0.05,
+        const bool output_details = false
+      );
+
+      /**
+       * This parameter specifies the type of problem to solve:
+       * <ul>
+       * <li> @p 0: nonsymmetric and/or indefinite problem, and nonsymmetric
+       * preconditioner
+       * <li> @p 1: SPD problem, and SPD (factored) preconditioner
+       * <li> @p 2: nonsymmetric, definite problem, and SPD (factored)
+       * preconditioner
+       * </ul>
+       * Default is <tt>symmetric = 1</tt>.
+       */
+      unsigned int symmetric;
+
+      /**
+       * The sparsity pattern used for the approximate inverse is the pattern
+       * of a power <tt>B^m</tt> where <tt>B</tt> has been sparsified from the
+       * given matrix <tt>A</tt>, <tt>n_level</tt> is equal to <tt>m+1</tt>.
+       * Default value is <tt>n_levels = 1</tt>.
+       */
+      unsigned int n_levels;
+
+      /**
+       * Sparsification is performed by dropping nonzeros which are smaller
+       * than <tt>thresh</tt> in magnitude. Lower values of <tt>thresh</tt>
+       * lead to more accurate, but also more expensive preconditioners.
+       * Default value is <tt>thresh = 0.1</tt>. Setting <tt>thresh < 0</tt> a
+       * threshold is selected automatically, such that <tt>-thresh</tt>
+       * represents the fraction of nonzero elements that are dropped. For
+       * example, if <tt>thresh = -0.9</tt>, then <tt>B</tt> will contain
+       * about ten percent of the nonzeros of the given matrix <tt>A</tt>.
+       */
+      double threshold;
+
+      /**
+       * Filtering is a post-processing procedure, <tt>filter</tt> represents
+       * a fraction of nonzero elements that are dropped after creating the
+       * approximate inverse sparsity pattern. Default value is <tt>filter =
+       * 0.05</tt>. Setting <tt>filter < 0</tt> a value is selected
+       * automatically, such that <tt>-filter</tt> represents the fraction of
+       * nonzero elements that are dropped. For example, if <tt>thresh =
+       * -0.9</tt>, then about 90 percent of the entries in the computed
+       * approximate inverse are dropped.
+       */
+      double filter;
+
+      /**
+       * Setting this flag to true produces output from HYPRE, when the
+       * preconditioner is constructed.
+       */
+      bool output_details;
+    };
+
+
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionParaSails ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any.
+     */
+    PreconditionParaSails (const MatrixBase     &matrix,
+                           const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  private:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * A class that implements a non-preconditioned method.
+   *
+   * @ingroup PETScWrappers
+   * @author Martin Steigemann, 2012
+   */
+  class PreconditionNone : public PreconditionerBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Empty Constructor. You need to call initialize() before using this
+     * object.
+     */
+    PreconditionNone ();
+
+    /**
+     * Constructor. Take the matrix which is used to form the preconditioner,
+     * and additional flags if there are any. The matrix is completely ignored
+     * in computations.
+     */
+    PreconditionNone (const MatrixBase     &matrix,
+                      const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Initializes the preconditioner object and calculate all data that is
+     * necessary for applying it in a solver. This function is automatically
+     * called when calling the constructor with the same arguments and is only
+     * used if you create the preconditioner without arguments. The matrix is
+     * completely ignored in computations.
+     */
+    void initialize (const MatrixBase     &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+  private:
+    /**
+     * Store a copy of the flags for this particular preconditioner.
+     */
+    AdditionalData additional_data;
+  };
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_precondition.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_precondition.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_solver.h b/include/deal.II/lac/petsc_solver.h
new file mode 100644
index 0000000..61adf24
--- /dev/null
+++ b/include/deal.II/lac/petsc_solver.h
@@ -0,0 +1,1012 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_solver_h
+#define dealii__petsc_solver_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/solver_control.h>
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+
+#  include <petscksp.h>
+
+#ifdef DEAL_II_WITH_SLEPC
+#include <deal.II/lac/slepc_spectral_transformation.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+#ifdef DEAL_II_WITH_SLEPC
+namespace SLEPcWrappers
+{
+  // forward declarations
+  class TransformationBase;
+}
+#endif
+
+namespace PETScWrappers
+{
+  // forward declarations
+  class MatrixBase;
+  class VectorBase;
+  class PreconditionerBase;
+
+
+  /**
+   * Base class for solver classes using the PETSc solvers. Since solvers in
+   * PETSc are selected based on flags passed to a generic solver object,
+   * basically all the actual solver calls happen in this class, and derived
+   * classes simply set the right flags to select one solver or another, or to
+   * set certain parameters for individual solvers.
+   *
+   * Optionally, the user can create a solver derived from the SolverBase
+   * class and can set the default arguments necessary to solve the linear
+   * system of equations with SolverControl. These default options can be
+   * overridden by specifying command line arguments of the form @p -ksp_*.
+   * For example, @p -ksp_monitor_true_residual prints out true residual norm
+   * (unpreconditioned) at each iteration and @p -ksp_view provides
+   * information about the linear solver and the preconditioner used in the
+   * current context. The type of the solver can also be changed during
+   * runtime by specifying @p -ksp_type {richardson, cg, gmres, fgmres, ..} to
+   * dynamically test the optimal solver along with a suitable preconditioner
+   * set using @p -pc_type {jacobi, bjacobi, ilu, lu, ..}. There are several
+   * other command line options available to modify the behavior of the PETSc
+   * linear solver and can be obtained from the <a
+   * href="http://www.mcs.anl.gov/petsc">documentation and manual pages</a>.
+   *
+   * @note Repeated calls to solve() on a solver object with a Preconditioner
+   * must be used with care. The preconditioner is initialized in the first
+   * call to solve() and subsequent calls reuse the solver and preconditioner
+   * object. This is done for performance reasons. The solver and
+   * preconditioner can be reset by calling reset().
+   *
+   * One of the gotchas of PETSc is that -- in particular in MPI mode -- it
+   * often does not produce very helpful error messages. In order to save
+   * other users some time in searching a hard to track down error, here is
+   * one situation and the error message one gets there: when you don't
+   * specify an MPI communicator to your solver's constructor. In this case,
+   * you will get an error of the following form from each of your parallel
+   * processes:
+   * @verbatim
+   *   [1]PETSC ERROR: PCSetVector() line 1173 in src/ksp/pc/interface/precon.c
+   *   [1]PETSC ERROR:   Arguments must have same communicators!
+   *   [1]PETSC ERROR:   Different communicators in the two objects: Argument # 1 and 2!
+   *   [1]PETSC ERROR: KSPSetUp() line 195 in src/ksp/ksp/interface/itfunc.c
+   * @endverbatim
+   *
+   * This error, on which one can spend a very long time figuring out what
+   * exactly goes wrong, results from not specifying an MPI communicator. Note
+   * that the communicator @em must match that of the matrix and all vectors
+   * in the linear system which we want to solve. Aggravating the situation is
+   * the fact that the default argument to the solver classes, @p
+   * PETSC_COMM_SELF, is the appropriate argument for the sequential case
+   * (which is why it is the default argument), so this error only shows up in
+   * parallel mode.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverBase
+  {
+  public:
+    /**
+     * Constructor. Takes the solver control object and the MPI communicator
+     * over which parallel computations are to happen.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverBase (SolverControl  &cn,
+                const MPI_Comm &mpi_communicator);
+
+    /**
+     * Destructor.
+     */
+    virtual ~SolverBase ();
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt>. Depending on the information
+     * provided by derived classes and the object passed as a preconditioner,
+     * one of the linear solvers and preconditioners of PETSc is chosen.
+     * Repeated calls to solve() do not reconstruct the preconditioner for
+     * performance reasons. See class Documentation.
+     */
+    void
+    solve (const MatrixBase         &A,
+           VectorBase               &x,
+           const VectorBase         &b,
+           const PreconditionerBase &preconditioner);
+
+
+    /**
+     * Resets the contained preconditioner and solver object. See class
+     * description for more details.
+     */
+    virtual void reset();
+
+
+    /**
+     * Sets a prefix name for the solver object. Useful when customizing the
+     * PETSc KSP object with command-line options.
+     */
+    void set_prefix(const std::string &prefix);
+
+
+    /**
+     * Access to object that controls convergence.
+     */
+    SolverControl &control() const;
+
+    /**
+     * initialize the solver with the preconditioner. This function is
+     * intended for use with SLEPc spectral transformation class.
+     */
+    void initialize(const PreconditionerBase &preconditioner);
+
+    /**
+     * Exception
+     */
+    DeclException1 (ExcPETScError,
+                    int,
+                    << "An error with error number " << arg1
+                    << " occurred while calling a PETSc function");
+
+  protected:
+
+    /**
+     * Reference to the object that controls convergence of the iterative
+     * solver. In fact, for these PETSc wrappers, PETSc does so itself, but we
+     * copy the data from this object before starting the solution process,
+     * and copy the data back into it afterwards.
+     */
+    SolverControl &solver_control;
+
+    /**
+     * Copy of the MPI communicator object to be used for the solver.
+     */
+    const MPI_Comm mpi_communicator;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is requested by the derived class.
+     */
+    virtual void set_solver_type (KSP &ksp) const = 0;
+
+    /**
+     * Solver prefix name to qualify options specific to the PETSc KSP object
+     * in the current context. Note: A hyphen (-) must NOT be given at the
+     * beginning of the prefix name. The first character of all runtime
+     * options is AUTOMATICALLY the hyphen.
+     */
+    std::string prefix_name;
+
+  private:
+    /**
+     * A function that is used in PETSc as a callback to check on convergence.
+     * It takes the information provided from PETSc and checks it against
+     * deal.II's own SolverControl objects to see if convergence has been
+     * reached.
+     */
+    static
+    PetscErrorCode convergence_test (KSP                 ksp,
+                                     const PetscInt      iteration,
+                                     const PetscReal     residual_norm,
+                                     KSPConvergedReason *reason,
+                                     void               *solver_control);
+
+    /**
+     * A structure that contains the PETSc solver and preconditioner objects.
+     * This object is preserved between subsequent calls to the solver if the
+     * same preconditioner is used as in the previous solver step. This may
+     * save some computation time, if setting up a preconditioner is
+     * expensive, such as in the case of an ILU for example.
+     *
+     * The actual declaration of this class is complicated by the fact that
+     * PETSc changed its solver interface completely and incompatibly between
+     * versions 2.1.6 and 2.2.0 :-(
+     *
+     * Objects of this type are explicitly created, but are destroyed when the
+     * surrounding solver object goes out of scope, or when we assign a new
+     * value to the pointer to this object. The respective *Destroy functions
+     * are therefore written into the destructor of this object, even though
+     * the object does not have a constructor.
+     */
+    struct SolverData
+    {
+      /**
+       * Destructor
+       */
+      ~SolverData ();
+
+      /**
+       * Object for Krylov subspace solvers.
+       */
+      KSP  ksp;
+    };
+
+    /**
+     * Pointer to an object that stores the solver context. This is recreated
+     * in the main solver routine if necessary.
+     */
+    std_cxx11::shared_ptr<SolverData> solver_data;
+
+#ifdef DEAL_II_WITH_SLEPC
+    /**
+     * Make the transformation class a friend, since it needs to set the KSP
+     * solver.
+     */
+    friend class SLEPcWrappers::TransformationBase;
+#endif
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc Richardson
+   * solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverRichardson : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the damping parameter to one.
+       */
+      explicit
+      AdditionalData (const double omega = 1);
+
+      /**
+       * Relaxation parameter.
+       */
+      double omega;
+    };
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverRichardson (SolverControl        &cn,
+                      const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                      const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc Chebyshev (or,
+   * prior version 3.3, Chebychev) solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverChebychev : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverChebychev (SolverControl        &cn,
+                     const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                     const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc CG solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverCG : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverCG (SolverControl        &cn,
+              const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+              const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc BiCG solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverBiCG : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverBiCG (SolverControl        &cn,
+                const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc GMRES solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverGMRES : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the number of temporary vectors to 30,
+       * i.e. do a restart every 30 iterations.
+       */
+      AdditionalData (const unsigned int restart_parameter = 30,
+                      const bool right_preconditioning = false);
+
+      /**
+       * Maximum number of tmp vectors.
+       */
+      unsigned int restart_parameter;
+
+      /**
+       * Flag for right preconditioning.
+       */
+      bool right_preconditioning;
+    };
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverGMRES (SolverControl        &cn,
+                 const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                 const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc BiCGStab
+   * solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverBicgstab : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverBicgstab (SolverControl        &cn,
+                    const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                    const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+  /**
+   * An implementation of the solver interface using the PETSc CG Squared
+   * solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverCGS : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverCGS (SolverControl        &cn,
+               const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+               const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc TFQMR solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverTFQMR : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverTFQMR (SolverControl        &cn,
+                 const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                 const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc TFQMR-2 solver
+   * (called TCQMR in PETSc). Note that this solver had a serious bug in
+   * versions up to and including PETSc 2.1.6, in that it did not check
+   * convergence and always returned an error code. Thus, this class will
+   * abort with an error indicating failure to converge with PETSc 2.1.6 and
+   * prior. This should be fixed in later versions of PETSc, though.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverTCQMR : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverTCQMR (SolverControl        &cn,
+                 const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                 const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc CR solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverCR : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverCR (SolverControl        &cn,
+              const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+              const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the PETSc Least Squares
+   * solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SolverLSQR : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverLSQR (SolverControl        &cn,
+                const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+
+  /**
+   * An implementation of the solver interface using the PETSc PREONLY solver.
+   * Actually this is NOT a real solution algorithm. solve() only applies the
+   * preconditioner once and returns immediately. Its only purpose is to
+   * provide a solver object, when the preconditioner should be used as a real
+   * solver. It is very useful in conjunction with the complete LU
+   * decomposition preconditioner <tt> PreconditionLU </tt>, which in
+   * conjunction with this solver class becomes a direct solver.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004, Oliver Kayser-Herold, 2004
+   */
+  class SolverPreOnly : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object. However, PETSc solvers want to have an MPI
+     * communicator context over which computations are parallelized. By
+     * default, @p PETSC_COMM_SELF is used here, but you can change this. Note
+     * that for single processor (non-MPI) versions, this parameter does not
+     * have any effect.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     *
+     * Note that the communicator used here must match the communicator used
+     * in the system matrix, solution, and right hand side object of the solve
+     * to be done with this solver. Otherwise, PETSc will generate hard to
+     * track down errors, see the documentation of the SolverBase class.
+     */
+    SolverPreOnly (SolverControl        &cn,
+                   const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                   const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    /**
+     * Function that takes a Krylov Subspace Solver context object, and sets
+     * the type of solver that is appropriate for this class.
+     */
+    virtual void set_solver_type (KSP &ksp) const;
+  };
+
+  /**
+   * An implementation of the solver interface using the sparse direct MUMPS
+   * solver through PETSc. This class has the usual interface of all other
+   * solver classes but it is of course different in that it doesn't implement
+   * an iterative solver. As a consequence, things like the SolverControl
+   * object have no particular meaning here.
+   *
+   * MUMPS allows to make use of symmetry in this matrix. In this class this
+   * is made possible by the set_symmetric_mode() function. If your matrix is
+   * symmetric, you can use this class as follows:
+   * @code
+   *    SolverControl cn;
+   *    PETScWrappers::SparseDirectMUMPS solver(cn, mpi_communicator);
+   *    solver.set_symmetric_mode(true);
+   *    solver.solve(system_matrix, solution, system_rhs);
+   * @endcode
+   *
+   * @note The class internally calls KSPSetFromOptions thus you are able to
+   * use all the PETSc parameters for MATSOLVERMUMPS package. See
+   * http://www.mcs.anl.gov/petsc/petsc-
+   * current/docs/manualpages/Mat/MATSOLVERMUMPS.html
+   *
+   * @ingroup PETScWrappers
+   * @author Daniel Brauss, Alexander Grayver, 2012
+   */
+  class SparseDirectMUMPS : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data structure to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {};
+    /**
+     * Constructor
+     */
+    SparseDirectMUMPS (SolverControl        &cn,
+                       const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                       const AdditionalData &data = AdditionalData());
+
+    /**
+     * The method to solve the linear system.
+     */
+    void solve (const MatrixBase &A,
+                VectorBase       &x,
+                const VectorBase &b);
+
+    /**
+     * The method allows to take advantage if the system matrix is symmetric
+     * by using LDL^T decomposition instead of more expensive LU. The argument
+     * indicates whether the matrix is symmetric or not.
+     */
+    void set_symmetric_mode (const bool flag);
+
+  protected:
+    /**
+     * Store a copy of flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+    virtual void set_solver_type (KSP &ksp) const;
+
+  private:
+    /**
+     * A function that is used in PETSc as a callback to check convergence. It
+     * takes the information provided from PETSc and checks it against
+     * deal.II's own SolverControl objects to see if convergence has been
+     * reached.
+     */
+    static
+    PetscErrorCode convergence_test (KSP                ksp,
+                                     const PetscInt     iteration,
+                                     const PetscReal    residual_norm,
+                                     KSPConvergedReason *reason,
+                                     void               *solver_control);
+
+    /**
+     * A structure that contains the PETSc solver and preconditioner objects.
+     * Since the solve member function in the base is not used here, the
+     * private SolverData struct located in the base could not be used either.
+     */
+    struct SolverDataMUMPS
+    {
+      /**
+       * Destructor
+       */
+      ~SolverDataMUMPS ();
+
+      KSP ksp;
+      PC  pc;
+    };
+
+    std_cxx11::shared_ptr<SolverDataMUMPS> solver_data;
+
+    /**
+     * Flag specifies whether matrix being factorized is symmetric or not. It
+     * influences the type of the used preconditioner (PCLU or PCCHOLESKY)
+     */
+    bool symmetric_mode;
+  };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_solver.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_solver.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_sparse_matrix.h b/include/deal.II/lac/petsc_sparse_matrix.h
new file mode 100644
index 0000000..655379c
--- /dev/null
+++ b/include/deal.II/lac/petsc_sparse_matrix.h
@@ -0,0 +1,278 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_sparse_matrix_h
+#define dealii__petsc_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+// forward declaration
+template <typename MatrixType> class BlockMatrixBase;
+
+
+namespace PETScWrappers
+{
+  /**
+   * Implementation of a sequential sparse matrix class based on PETSC. All
+   * the functionality is actually in the base class, except for the calls to
+   * generate a sequential sparse matrix. This is possible since PETSc only
+   * works on an abstract matrix type and internally distributes to functions
+   * that do the actual work depending on the actual matrix type (much like
+   * using virtual functions). Only the functions creating a matrix of
+   * specific type differ, and are implemented in this particular class.
+   *
+   * @ingroup PETScWrappers
+   * @ingroup Matrix1
+   * @author Wolfgang Bangerth, 2004
+   */
+  class SparseMatrix : public MatrixBase
+  {
+  public:
+
+    /**
+     * A structure that describes some of the traits of this class in terms of
+     * its run-time behavior. Some other classes (such as the block matrix
+     * classes) that take one or other of the matrix classes as its template
+     * parameters can tune their behavior based on the variables in this
+     * class.
+     */
+    struct Traits
+    {
+      /**
+       * It is safe to elide additions of zeros to individual elements of this
+       * matrix.
+       */
+      static const bool zero_addition_can_be_elided = true;
+    };
+
+    /**
+     * Default constructor. Create an empty matrix.
+     */
+    SparseMatrix ();
+
+    /**
+     * Create a sparse matrix of dimensions @p m times @p n, with an initial
+     * guess of @p n_nonzero_per_row nonzero elements per row. PETSc is able
+     * to cope with the situation that more than this number of elements is
+     * later allocated for a row, but this involves copying data, and is thus
+     * expensive.
+     *
+     * The @p is_symmetric flag determines whether we should tell PETSc that
+     * the matrix is going to be symmetric (as indicated by the call
+     * <tt>MatSetOption(mat, MAT_SYMMETRIC)</tt>. Note that the PETSc
+     * documentation states that one cannot form an ILU decomposition of a
+     * matrix for which this flag has been set to @p true, only an ICC. The
+     * default value of this flag is @p false.
+     */
+    SparseMatrix (const size_type m,
+                  const size_type n,
+                  const size_type n_nonzero_per_row,
+                  const bool      is_symmetric = false);
+
+    /**
+     * Initialize a rectangular matrix with @p m rows and @p n columns.  The
+     * maximal number of nonzero entries for each row separately is given by
+     * the @p row_lengths array.
+     *
+     * Just as for the other constructors: PETSc is able to cope with the
+     * situation that more than this number of elements is later allocated for
+     * a row, but this involves copying data, and is thus expensive.
+     *
+     * The @p is_symmetric flag determines whether we should tell PETSc that
+     * the matrix is going to be symmetric (as indicated by the call
+     * <tt>MatSetOption(mat, MAT_SYMMETRIC)</tt>. Note that the PETSc
+     * documentation states that one cannot form an ILU decomposition of a
+     * matrix for which this flag has been set to @p true, only an ICC. The
+     * default value of this flag is @p false.
+     */
+    SparseMatrix (const size_type               m,
+                  const size_type               n,
+                  const std::vector<size_type> &row_lengths,
+                  const bool                    is_symmetric = false);
+
+    /**
+     * Initialize a sparse matrix using the given sparsity pattern.
+     *
+     * Note that PETSc can be very slow if you do not provide it with a good
+     * estimate of the lengths of rows. Using the present function is a very
+     * efficient way to do this, as it uses the exact number of nonzero
+     * entries for each row of the matrix by using the given sparsity pattern
+     * argument. If the @p preset_nonzero_locations flag is @p true, this
+     * function in addition not only sets the correct row sizes up front, but
+     * also pre-allocated the correct nonzero entries in the matrix.
+     *
+     * PETsc allows to later add additional nonzero entries to a matrix, by
+     * simply writing to these elements. However, this will then lead to
+     * additional memory allocations which are very inefficient and will
+     * greatly slow down your program. It is therefore significantly more
+     * efficient to get memory allocation right from the start.
+     */
+    template <typename SparsityPatternType>
+    explicit SparseMatrix (const SparsityPatternType &sparsity_pattern,
+                           const bool                 preset_nonzero_locations = true);
+
+    /**
+     * This operator assigns a scalar to a matrix. Since this does usually not
+     * make much sense (should we set all matrix entries to this value? Only
+     * the nonzero entries of the sparsity pattern?), this operation is only
+     * allowed if the actual value to be assigned is zero. This operator only
+     * exists to allow for the obvious notation <tt>matrix=0</tt>, which sets
+     * all elements of the matrix to zero, but keep the sparsity pattern
+     * previously used.
+     */
+    SparseMatrix &operator = (const double d);
+
+    /**
+     * Throw away the present matrix and generate one that has the same
+     * properties as if it were created by the constructor of this class with
+     * the same argument list as the present function.
+     */
+    void reinit (const size_type m,
+                 const size_type n,
+                 const size_type n_nonzero_per_row,
+                 const bool      is_symmetric = false);
+
+    /**
+     * Throw away the present matrix and generate one that has the same
+     * properties as if it were created by the constructor of this class with
+     * the same argument list as the present function.
+     */
+    void reinit (const size_type               m,
+                 const size_type               n,
+                 const std::vector<size_type> &row_lengths,
+                 const bool                    is_symmetric = false);
+
+    /**
+     * Initialize a sparse matrix using the given sparsity pattern.
+     *
+     * Note that PETSc can be very slow if you do not provide it with a good
+     * estimate of the lengths of rows. Using the present function is a very
+     * efficient way to do this, as it uses the exact number of nonzero
+     * entries for each row of the matrix by using the given sparsity pattern
+     * argument. If the @p preset_nonzero_locations flag is @p true, this
+     * function in addition not only sets the correct row sizes up front, but
+     * also pre-allocated the correct nonzero entries in the matrix.
+     *
+     * PETsc allows to later add additional nonzero entries to a matrix, by
+     * simply writing to these elements. However, this will then lead to
+     * additional memory allocations which are very inefficient and will
+     * greatly slow down your program. It is therefore significantly more
+     * efficient to get memory allocation right from the start.
+     *
+     * Despite the fact that it would seem to be an obvious win, setting the
+     * @p preset_nonzero_locations flag to @p true doesn't seem to accelerate
+     * program. Rather on the contrary, it seems to be able to slow down
+     * entire programs somewhat. This is surprising, since we can use
+     * efficient function calls into PETSc that allow to create multiple
+     * entries at once; nevertheless, given the fact that it is inefficient,
+     * the respective flag has a default value equal to @p false.
+     */
+    template <typename SparsityPatternType>
+    void reinit (const SparsityPatternType &sparsity_pattern,
+                 const bool                 preset_nonzero_locations = true);
+
+    /**
+     * Return a reference to the MPI communicator object in use with this
+     * matrix. Since this is a sequential matrix, it returns the MPI_COMM_SELF
+     * communicator.
+     */
+    virtual const MPI_Comm &get_mpi_communicator () const;
+
+    /**
+     * Return the square of the norm of the vector $v$ with respect to the
+     * norm induced by this matrix, i.e. $\left(v,Mv\right)$. This is useful,
+     * e.g. in the finite element context, where the $L_2$ norm of a function
+     * equals the matrix norm with respect to the mass matrix of the vector
+     * representing the nodal values of the finite element function.
+     *
+     * Obviously, the matrix needs to be quadratic for this operation.
+     *
+     * The implementation of this function is not as efficient as the one in
+     * the @p MatrixBase class used in deal.II (i.e. the original one, not the
+     * PETSc wrapper class) since PETSc doesn't support this operation and
+     * needs a temporary vector.
+     */
+    PetscScalar matrix_norm_square (const VectorBase &v) const;
+
+    /**
+     * Compute the matrix scalar product $\left(u,Mv\right)$.
+     *
+     * The implementation of this function is not as efficient as the one in
+     * the @p MatrixBase class used in deal.II (i.e. the original one, not the
+     * PETSc wrapper class) since PETSc doesn't support this operation and
+     * needs a temporary vector.
+     */
+    PetscScalar matrix_scalar_product (const VectorBase &u,
+                                       const VectorBase &v) const;
+
+  private:
+
+    /**
+     * Purposefully not implemented
+     */
+    SparseMatrix(const SparseMatrix &);
+    /**
+     * Purposefully not implemented
+     */
+    SparseMatrix &operator= (const SparseMatrix &);
+
+    /**
+     * Do the actual work for the respective reinit() function and the
+     * matching constructor, i.e. create a matrix. Getting rid of the previous
+     * matrix is left to the caller.
+     */
+    void do_reinit (const size_type m,
+                    const size_type n,
+                    const size_type n_nonzero_per_row,
+                    const bool      is_symmetric = false);
+
+    /**
+     * Same as previous function.
+     */
+    void do_reinit (const size_type               m,
+                    const size_type               n,
+                    const std::vector<size_type> &row_lengths,
+                    const bool                    is_symmetric = false);
+
+    /**
+     * Same as previous function.
+     */
+    template <typename SparsityPatternType>
+    void do_reinit (const SparsityPatternType &sparsity_pattern,
+                    const bool                 preset_nonzero_locations);
+
+    /**
+     * To allow calling protected prepare_add() and prepare_set().
+     */
+    friend class BlockMatrixBase<SparseMatrix>;
+  };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_sparse_matrix.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_sparse_matrix.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_vector.h b/include/deal.II/lac/petsc_vector.h
new file mode 100644
index 0000000..f1fb5f5
--- /dev/null
+++ b/include/deal.II/lac/petsc_vector.h
@@ -0,0 +1,401 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_vector_h
+#define dealii__petsc_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/petsc_vector_base.h>
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <deal.II/lac/vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace PETScWrappers
+{
+  /*! @addtogroup PETScWrappers
+   *@{
+   */
+
+  /**
+   * Implementation of a sequential vector class based on PETSC. All the
+   * functionality is actually in the base class, except for the calls to
+   * generate a sequential vector. This is possible since PETSc only works on
+   * an abstract vector type and internally distributes to functions that do
+   * the actual work depending on the actual vector type (much like using
+   * virtual functions). Only the functions creating a vector of specific type
+   * differ, and are implemented in this particular class.
+   *
+   * This class is deprecated, use PETScWrappers::MPI::Vector instead.
+   *
+   * @ingroup Vectors
+   * @author Wolfgang Bangerth, 2004
+   */
+  class Vector : public VectorBase
+  {
+  public:
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    /**
+     * A variable that indicates whether this vector supports distributed data
+     * storage. If true, then this vector also needs an appropriate compress()
+     * function that allows communicating recent set or add operations to
+     * individual elements to be communicated to other processors.
+     *
+     * For the current class, the variable equals false, since it does not
+     * support parallel data storage. If you do need parallel data storage,
+     * use PETScWrappers::MPI::Vector.
+     */
+    static const bool supports_distributed_data = false;
+
+    /**
+     * Default constructor. Initialize the vector as empty.
+     */
+    Vector ();
+
+    /**
+     * Constructor. Set dimension to @p n and initialize all elements with
+     * zero.
+     *
+     * The constructor is made explicit to avoid accidents like this:
+     * <tt>v=0;</tt>. Presumably, the user wants to set every element of the
+     * vector to zero, but instead, what happens is this call:
+     * <tt>v=Vector@<number@>(0);</tt>, i.e. the vector is replaced by one of
+     * length zero.
+     */
+    explicit Vector (const size_type n);
+
+    /**
+     * Copy-constructor from deal.II vectors. Sets the dimension to that of
+     * the given vector, and copies all elements.
+     */
+    template <typename Number>
+    explicit Vector (const dealii::Vector<Number> &v);
+
+    /**
+     * Construct it from an existing PETSc Vector of type Vec. Note: this does
+     * not copy the contents and just keeps a pointer. You need to make sure
+     * the vector is not used twice at the same time or destroyed while in
+     * use. This class does not destroy the PETSc object. Handle with care!
+     */
+    explicit Vector (const Vec &v);
+
+    /**
+     * Copy-constructor the values from a PETSc wrapper vector class.
+     */
+    Vector (const Vector &v);
+
+    /**
+     * Copy-constructor: copy the values from a PETSc wrapper parallel vector
+     * class.
+     *
+     * Note that due to the communication model of MPI, @em all processes have
+     * to actually perform this operation, even if they do not use the result.
+     * It is not sufficient if only one processor tries to copy the elements
+     * from the other processors over to its own process space.
+     */
+    explicit Vector (const MPI::Vector &v);
+
+    /**
+     * Release all memory and return to a state just like after having called
+     * the default constructor.
+     */
+    void clear ();
+
+    /**
+     * Copy the given vector. Resize the present vector if necessary.
+     */
+    Vector &operator = (const Vector &v);
+
+    /**
+     * Copy all the elements of the parallel vector @p v into this local
+     * vector. Note that due to the communication model of MPI, @em all
+     * processes have to actually perform this operation, even if they do not
+     * use the result. It is not sufficient if only one processor tries to
+     * copy the elements from the other processors over to its own process
+     * space.
+     */
+    Vector &operator = (const MPI::Vector &v);
+
+    /**
+     * Set all components of the vector to the given number @p s. Simply pass
+     * this down to the base class, but we still need to declare this function
+     * to make the example given in the discussion about making the
+     * constructor explicit work.
+     *
+     * Since the semantics of assigning a scalar to a vector are not
+     * immediately clear, this operator should really only be used if you want
+     * to set the entire vector to zero. This allows the intuitive notation
+     * <tt>v=0</tt>. Assigning other values is deprecated and may be
+     * disallowed in the future.
+     */
+    Vector &operator = (const PetscScalar s);
+
+    /**
+     * Copy the values of a deal.II vector (as opposed to those of the PETSc
+     * vector wrapper class) into this object.
+     */
+    template <typename number>
+    Vector &operator = (const dealii::Vector<number> &v);
+
+    /**
+     * Change the dimension of the vector to @p N. It is unspecified how
+     * resizing the vector affects the memory allocation of this object; i.e.,
+     * it is not guaranteed that resizing it to a smaller size actually also
+     * reduces memory consumption, or if for efficiency the same amount of
+     * memory is used for less data.
+     *
+     * If @p omit_zeroing_entries is false, the vector is filled by zeros.
+     * Otherwise, the elements are left an unspecified state.
+     */
+    void reinit (const size_type N,
+                 const bool      omit_zeroing_entries = false);
+
+    /**
+     * Change the dimension to that of the vector @p v. The same applies as
+     * for the other reinit() function.
+     *
+     * The elements of @p v are not copied, i.e.  this function is the same as
+     * calling <tt>reinit (v.size(), omit_zeroing_entries)</tt>.
+     */
+    void reinit (const Vector &v,
+                 const bool    omit_zeroing_entries = false);
+
+  protected:
+    /**
+     * Create a vector of length @p n. For this class, we create a sequential
+     * vector. @p n denotes the total size of the vector to be created.
+     */
+    void create_vector (const size_type n);
+  } DEAL_II_DEPRECATED;
+
+  /*@}*/
+
+// ------------------ template and inline functions -------------
+
+
+  /**
+   * Global function @p swap which overloads the default implementation of the
+   * C++ standard library which uses a temporary object. The function simply
+   * exchanges the data of the two vectors.
+   *
+   * @relates PETScWrappers::Vector
+   * @author Wolfgang Bangerth, 2004
+   */
+  inline
+  void swap (Vector &u, Vector &v)
+  {
+    u.swap (v);
+  }
+
+
+#ifndef DOXYGEN
+
+  template <typename number>
+  Vector::Vector (const dealii::Vector<number> &v)
+  {
+    Vector::create_vector (v.size());
+
+    *this = v;
+  }
+
+
+
+  inline
+  Vector::Vector (const Vec &v)
+    :
+    VectorBase(v)
+  {}
+
+
+  inline
+  Vector &
+  Vector::operator = (const PetscScalar s)
+  {
+    VectorBase::operator = (s);
+
+    return *this;
+  }
+
+
+  inline
+  Vector &
+  Vector::operator = (const Vector &v)
+  {
+    // if the vectors have different sizes,
+    // then first resize the present one
+    if (size() != v.size())
+      reinit (v.size(), true);
+
+    const int ierr = VecCopy (v.vector, vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  Vector &
+  Vector::operator = (const MPI::Vector &v)
+  {
+    int ierr;
+    if (attained_ownership)
+      {
+        // the petsc function we call wants to
+        // generate the vector itself, so destroy
+        // the old one first
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+        ierr = VecDestroy (vector);
+#else
+        ierr = VecDestroy (&vector);
+#endif
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+
+    attained_ownership = true;
+
+    // then do the gather
+    // operation. <rant>petsc has changed its
+    // interface again, and replaced a single
+    // function call by several calls that
+    // are hard to understand. gets me all
+    // annoyed at their development
+    // model</rant>
+#if DEAL_II_PETSC_VERSION_LT(2,2,0)
+    ierr = VecConvertMPIToSeqAll (static_cast<const Vec &>(v),
+                                  &vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#else
+
+    VecScatter ctx;
+
+    ierr = VecScatterCreateToAll (static_cast<const Vec &>(v), &ctx, &vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if ((PETSC_VERSION_MAJOR == 2) && \
+     ((PETSC_VERSION_MINOR < 3) || \
+      ((PETSC_VERSION_MINOR == 3) &&            \
+       (PETSC_VERSION_SUBMINOR < 3))))
+    ierr = VecScatterBegin (static_cast<const Vec &>(v), vector,
+                            INSERT_VALUES, SCATTER_FORWARD, ctx);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = VecScatterEnd (static_cast<const Vec &>(v), vector,
+                          INSERT_VALUES, SCATTER_FORWARD, ctx);
+
+#else
+
+    ierr = VecScatterBegin (ctx,static_cast<const Vec &>(v), vector,
+                            INSERT_VALUES, SCATTER_FORWARD);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = VecScatterEnd (ctx, static_cast<const Vec &>(v), vector,
+                          INSERT_VALUES, SCATTER_FORWARD);
+
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ierr = VecScatterDestroy (ctx);
+#else
+    ierr = VecScatterDestroy (&ctx);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  Vector &
+  Vector::operator = (const dealii::Vector<number> &v)
+  {
+    reinit (v.size(), true);
+    // the following isn't necessarily fast,
+    // but this is due to the fact that PETSc
+    // doesn't offer an inlined access
+    // operator.
+    //
+    // if someone wants to contribute some
+    // code: to make this code faster, one
+    // could either first convert all values
+    // to PetscScalar, and then set them all
+    // at once using VecSetValues. This has
+    // the drawback that it could take quite
+    // some memory, if the vector is large,
+    // and it would in addition allocate
+    // memory on the heap, which is
+    // expensive. an alternative would be to
+    // split the vector into chunks of, say,
+    // 128 elements, convert a chunk at a
+    // time and set it in the output vector
+    // using VecSetValues. since 128 elements
+    // is small enough, this could easily be
+    // allocated on the stack (as a local
+    // variable) which would make the whole
+    // thing much more efficient.
+    //
+    // a second way to make things faster is
+    // for the special case that
+    // number==PetscScalar. we could then
+    // declare a specialization of this
+    // template, and omit the conversion. the
+    // problem with this is that the best we
+    // can do is to use VecSetValues, but
+    // this isn't very efficient either: it
+    // wants to see an array of indices,
+    // which in this case a) again takes up a
+    // whole lot of memory on the heap, and
+    // b) is totally dumb since its content
+    // would simply be the sequence
+    // 0,1,2,3,...,n. the best of all worlds
+    // would probably be a function in Petsc
+    // that would take a pointer to an array
+    // of PetscScalar values and simply copy
+    // n elements verbatim into the vector...
+    for (size_type i=0; i<v.size(); ++i)
+      (*this)(i) = v(i);
+
+    compress (::dealii::VectorOperation::insert);
+
+    return *this;
+  }
+#endif // DOXYGEN
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_vector.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_vector.h     ---------------------------*/
diff --git a/include/deal.II/lac/petsc_vector_base.h b/include/deal.II/lac/petsc_vector_base.h
new file mode 100644
index 0000000..42ef242
--- /dev/null
+++ b/include/deal.II/lac/petsc_vector_base.h
@@ -0,0 +1,1274 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__petsc_vector_base_h
+#define dealii__petsc_vector_base_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/vector.h>
+
+#  include <vector>
+#  include <utility>
+
+#  include <petscvec.h>
+#  include <deal.II/base/index_set.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declaration
+template <typename number> class Vector;
+
+
+/**
+ * A namespace in which wrapper classes for PETSc objects reside.
+ *
+ * @ingroup PETScWrappers
+ * @ingroup Vectors
+ * @author Wolfgang Bangerth, 2004
+ */
+namespace PETScWrappers
+{
+  // forward declaration
+  class VectorBase;
+
+  /**
+   * @cond internal
+   */
+
+  /**
+   * A namespace for internal implementation details of the PETScWrapper
+   * members.
+   * @ingroup PETScWrappers
+   */
+  namespace internal
+  {
+    /**
+     * Since access to PETSc vectors only goes through functions, rather than
+     * by obtaining a reference to a vector element, we need a wrapper class
+     * that acts as if it was a reference, and basically redirects all
+     * accesses (read and write) to member functions of this class.
+     *
+     * This class implements such a wrapper: it is initialized with a vector
+     * and an element within it, and has a conversion operator to extract the
+     * scalar value of this element. It also has a variety of assignment
+     * operator for writing to this one element.
+     * @ingroup PETScWrappers
+     */
+    class VectorReference
+    {
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef types::global_dof_index size_type;
+
+    private:
+      /**
+       * Constructor. It is made private so as to only allow the actual vector
+       * class to create it.
+       */
+      VectorReference (const VectorBase  &vector,
+                       const size_type   index);
+
+
+    public:
+
+      /**
+       * This looks like a copy operator, but does something different than
+       * usual. In particular, it does not copy the member variables of this
+       * reference. Rather, it handles the situation where we have two vectors
+       * @p v and @p w, and assign elements like in <tt>v(i)=w(i)</tt>. Here,
+       * both left and right hand side of the assignment have data type
+       * VectorReference, but what we really mean is to assign the vector
+       * elements represented by the two references. This operator implements
+       * this operation. Note also that this allows us to make the assignment
+       * operator const.
+       */
+      const VectorReference &operator = (const VectorReference &r) const;
+
+      /**
+       * The same function as above, but for non-const reference objects. The
+       * function is needed since the compiler might otherwise automatically
+       * generate a copy operator for non-const objects.
+       */
+      VectorReference &operator = (const VectorReference &r);
+
+      /**
+       * Set the referenced element of the vector to <tt>s</tt>.
+       */
+      const VectorReference &operator = (const PetscScalar &s) const;
+
+      /**
+       * Add <tt>s</tt> to the referenced element of the vector.
+       */
+      const VectorReference &operator += (const PetscScalar &s) const;
+
+      /**
+       * Subtract <tt>s</tt> from the referenced element of the vector.
+       */
+      const VectorReference &operator -= (const PetscScalar &s) const;
+
+      /**
+       * Multiply the referenced element of the vector by <tt>s</tt>.
+       */
+      const VectorReference &operator *= (const PetscScalar &s) const;
+
+      /**
+       * Divide the referenced element of the vector by <tt>s</tt>.
+       */
+      const VectorReference &operator /= (const PetscScalar &s) const;
+
+      /**
+       * Return the real part of the value of the referenced element.
+       */
+      PetscReal real () const;
+
+      /**
+       * Return the imaginary part of the value of the referenced element.
+       *
+       * @note This operation is not defined for real numbers and an exception
+       * is thrown.
+       */
+      PetscReal imag () const;
+
+      /**
+       * Convert the reference to an actual value, i.e. return the value of
+       * the referenced element of the vector.
+       */
+      operator PetscScalar () const;
+
+      /**
+       * Exception
+       */
+      DeclException1 (ExcPETScError,
+                      int,
+                      << "An error with error number " << arg1
+                      << " occurred while calling a PETSc function");
+      /**
+       * Exception
+       */
+      DeclException3 (ExcAccessToNonlocalElement,
+                      int, int, int,
+                      << "You tried to access element " << arg1
+                      << " of a distributed vector, but only elements "
+                      << arg2 << " through " << arg3
+                      << " are stored locally and can be accessed.");
+      /**
+       * Exception.
+       */
+      DeclException2 (ExcWrongMode,
+                      int, int,
+                      << "You tried to do a "
+                      << (arg1 == 1 ?
+                          "'set'" :
+                          (arg1 == 2 ?
+                           "'add'" : "???"))
+                      << " operation but the vector is currently in "
+                      << (arg2 == 1 ?
+                          "'set'" :
+                          (arg2 == 2 ?
+                           "'add'" : "???"))
+                      << " mode. You first have to call 'compress()'.");
+
+    private:
+      /**
+       * Point to the vector we are referencing.
+       */
+      const VectorBase   &vector;
+
+      /**
+       * Index of the referenced element of the vector.
+       */
+      const size_type index;
+
+      /**
+       * Make the vector class a friend, so that it can create objects of the
+       * present type.
+       */
+      friend class ::dealii::PETScWrappers::VectorBase;
+    };
+  }
+  /**
+   * @endcond
+   */
+
+
+  /**
+   * Base class for all vector classes that are implemented on top of the
+   * PETSc vector types. Since in PETSc all vector types (i.e. sequential and
+   * parallel ones) are built by filling the contents of an abstract object
+   * that is only referenced through a pointer of a type that is independent
+   * of the actual vector type, we can implement almost all functionality of
+   * vectors in this base class. Derived classes will then only have to
+   * provide the functionality to create one or the other kind of vector.
+   *
+   * The interface of this class is modeled after the existing Vector class in
+   * deal.II. It has almost the same member functions, and is often
+   * exchangeable. However, since PETSc only supports a single scalar type
+   * (either double, float, or a complex data type), it is not templated, and
+   * only works with whatever your PETSc installation has defined the data
+   * type @p PetscScalar to.
+   *
+   * Note that PETSc only guarantees that operations do what you expect if the
+   * functions @p VecAssemblyBegin and @p VecAssemblyEnd have been called
+   * after vector assembly. Therefore, you need to call Vector::compress()
+   * before you actually use the vector.
+   *
+   * @ingroup PETScWrappers
+   * @author Wolfgang Bangerth, 2004
+   */
+  class VectorBase : public Subscriptor
+  {
+  public:
+    /**
+     * Declare some of the standard types used in all containers. These types
+     * parallel those in the <tt>C++</tt> standard libraries
+     * <tt>vector<...></tt> class.
+     */
+    typedef PetscScalar               value_type;
+    typedef PetscReal                 real_type;
+    typedef types::global_dof_index   size_type;
+    typedef internal::VectorReference reference;
+    typedef const internal::VectorReference const_reference;
+
+    /**
+     * Default constructor. It doesn't do anything, derived classes will have
+     * to initialize the data.
+     */
+    VectorBase ();
+
+    /**
+     * Copy constructor. Sets the dimension to that of the given vector, and
+     * copies all elements.
+     */
+    VectorBase (const VectorBase &v);
+
+    /**
+     * Initialize a Vector from a PETSc Vec object. Note that we do not copy
+     * the vector and we do not attain ownership, so we do not destroy the
+     * PETSc object in the destructor.
+     */
+    explicit VectorBase (const Vec &v);
+
+    /**
+     * Destructor
+     */
+    virtual ~VectorBase ();
+
+    /**
+     * Release all memory and return to a state just like after having called
+     * the default constructor.
+     */
+    virtual void clear ();
+
+    /**
+     * Compress the underlying representation of the PETSc object, i.e. flush
+     * the buffers of the vector object if it has any. This function is
+     * necessary after writing into a vector element-by-element and before
+     * anything else can be done on it.
+     *
+     * See
+     * @ref GlossCompress "Compressing distributed objects"
+     * for more information.
+     */
+    void compress (const VectorOperation::values operation);
+
+    /**
+     * Set all components of the vector to the given number @p s. Simply pass
+     * this down to the individual block objects, but we still need to declare
+     * this function to make the example given in the discussion about making
+     * the constructor explicit work.
+     *
+     *
+     * Since the semantics of assigning a scalar to a vector are not
+     * immediately clear, this operator should really only be used if you want
+     * to set the entire vector to zero. This allows the intuitive notation
+     * <tt>v=0</tt>. Assigning other values is deprecated and may be
+     * disallowed in the future.
+     */
+    VectorBase &operator = (const PetscScalar s);
+
+    /**
+     * Test for equality. This function assumes that the present vector and
+     * the one to compare with have the same size already, since comparing
+     * vectors of different sizes makes not much sense anyway.
+     */
+    bool operator == (const VectorBase &v) const;
+
+    /**
+     * Test for inequality. This function assumes that the present vector and
+     * the one to compare with have the same size already, since comparing
+     * vectors of different sizes makes not much sense anyway.
+     */
+    bool operator != (const VectorBase &v) const;
+
+    /**
+     * Return the global dimension of the vector.
+     */
+    size_type size () const;
+
+    /**
+     * Return the local dimension of the vector, i.e. the number of elements
+     * stored on the present MPI process. For sequential vectors, this number
+     * is the same as size(), but for parallel vectors it may be smaller.
+     *
+     * To figure out which elements exactly are stored locally, use
+     * local_range().
+     */
+    size_type local_size () const;
+
+    /**
+     * Return a pair of indices indicating which elements of this vector are
+     * stored locally. The first number is the index of the first element
+     * stored, the second the index of the one past the last one that is
+     * stored locally. If this is a sequential vector, then the result will be
+     * the pair (0,N), otherwise it will be a pair (i,i+n), where
+     * <tt>n=local_size()</tt>.
+     */
+    std::pair<size_type, size_type>
+    local_range () const;
+
+    /**
+     * Return whether @p index is in the local range or not, see also
+     * local_range().
+     */
+    bool in_local_range (const size_type index) const;
+
+    /**
+     * Return an index set that describes which elements of this vector are
+     * owned by the current processor. Note that this index set does not
+     * include elements this vector may store locally as ghost elements but
+     * that are in fact owned by another processor. As a consequence, the
+     * index sets returned on different processors if this is a distributed
+     * vector will form disjoint sets that add up to the complete index set.
+     * Obviously, if a vector is created on only one processor, then the
+     * result would satisfy
+     * @code
+     *   vec.locally_owned_elements() == complete_index_set (vec.size())
+     * @endcode
+     */
+    IndexSet locally_owned_elements () const;
+
+    /**
+     * Return if the vector contains ghost elements.
+     *
+     * @see
+     * @ref GlossGhostedVector "vectors with ghost elements"
+     */
+    bool has_ghost_elements() const;
+
+    /**
+     * Provide access to a given element, both read and write.
+     */
+    reference
+    operator () (const size_type index);
+
+    /**
+     * Provide read-only access to an element.
+     */
+    PetscScalar
+    operator () (const size_type index) const;
+
+    /**
+     * Provide access to a given element, both read and write.
+     *
+     * Exactly the same as operator().
+     */
+    reference
+    operator [] (const size_type index);
+
+    /**
+     * Provide read-only access to an element.
+     *
+     * Exactly the same as operator().
+     */
+    PetscScalar
+    operator [] (const size_type index) const;
+
+    /**
+     * A collective set operation: instead of setting individual elements of a
+     * vector, this function allows to set a whole set of elements at once.
+     * The indices of the elements to be set are stated in the first argument,
+     * the corresponding values in the second.
+     */
+    void set (const std::vector<size_type>   &indices,
+              const std::vector<PetscScalar>  &values);
+
+    /**
+     * A collective get operation: instead of getting individual elements of a
+     * vector, this function allows to get a whole set of elements at once.
+     * The indices of the elements to be read are stated in the first
+     * argument, the corresponding values are returned in the second.
+     */
+    void extract_subvector_to (const std::vector<size_type> &indices,
+                               std::vector<PetscScalar> &values) const;
+
+    /**
+     * Just as the above, but with pointers. Useful in minimizing copying of
+     * data around.
+     */
+    template <typename ForwardIterator, typename OutputIterator>
+    void extract_subvector_to (const ForwardIterator    indices_begin,
+                               const ForwardIterator    indices_end,
+                               OutputIterator           values_begin) const;
+
+    /**
+     * A collective add operation: This function adds a whole set of values
+     * stored in @p values to the vector components specified by @p indices.
+     */
+    void add (const std::vector<size_type> &indices,
+              const std::vector<PetscScalar>  &values);
+
+    /**
+     * This is a second collective add operation. As a difference, this
+     * function takes a deal.II vector of values.
+     */
+    void add (const std::vector<size_type>        &indices,
+              const ::dealii::Vector<PetscScalar> &values);
+
+    /**
+     * Take an address where <tt>n_elements</tt> are stored contiguously and
+     * add them into the vector. Handles all cases which are not covered by
+     * the other two <tt>add()</tt> functions above.
+     */
+    void add (const size_type    n_elements,
+              const size_type   *indices,
+              const PetscScalar  *values);
+
+    /**
+     * Return the scalar product of two vectors. The vectors must have the
+     * same size.
+     *
+     * For complex valued vector, this gives$\left(v^\ast,vec\right)$.
+     */
+    PetscScalar operator * (const VectorBase &vec) const;
+
+    /**
+     * Return square of the $l_2$-norm.
+     */
+    real_type norm_sqr () const;
+
+    /**
+     * Return the mean value of the elements of this vector.
+     */
+    PetscScalar mean_value () const;
+
+    /**
+     * $l_1$-norm of the vector. The sum of the absolute values.
+     */
+    real_type l1_norm () const;
+
+    /**
+     * $l_2$-norm of the vector.  The square root of the sum of the squares of
+     * the elements.
+     */
+    real_type l2_norm () const;
+
+    /**
+     * $l_p$-norm of the vector. The pth root of the sum of the pth powers of
+     * the absolute values of the elements.
+     */
+    real_type lp_norm (const real_type p) const;
+
+    /**
+     * $l_\infty$-norm of the vector. Return the value of the vector element
+     * with the maximum absolute value.
+     */
+    real_type linfty_norm () const;
+
+    /**
+     * Performs a combined operation of a vector addition and a subsequent
+     * inner product, returning the value of the inner product. In other
+     * words, the result of this function is the same as if the user called
+     * @code
+     * this->add(a, V);
+     * return_value = *this * W;
+     * @endcode
+     *
+     * The reason this function exists is for compatibility with deal.II's own
+     * vector classes which can implement this functionality with less memory
+     * transfer. However, for PETSc vectors such a combined operation is not
+     * natively supported and thus the cost is completely equivalent as
+     * calling the two methods separately.
+     */
+    PetscScalar add_and_dot (const PetscScalar a,
+                             const VectorBase &V,
+                             const VectorBase &W);
+
+    /**
+     * Normalize vector by dividing by the $l_2$-norm of the vector. Return
+     * the vector norm before normalization.
+     *
+     * This function is deprecated.
+     */
+    real_type normalize () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return the value of the vector element with the largest negative value.
+     */
+    real_type min () const;
+
+    /**
+     * Return the value of the vector element with the largest positive value.
+     */
+    real_type max () const;
+
+    /**
+     * Replace every element in a vector with its absolute value.
+     *
+     * This function is deprecated.
+     */
+    VectorBase &abs () DEAL_II_DEPRECATED;
+
+    /**
+     * Conjugate a vector.
+     *
+     * This function is deprecated.
+     */
+    VectorBase &conjugate () DEAL_II_DEPRECATED;
+
+    /**
+     * A collective piecewise multiply operation on <code>this</code> vector
+     * with itself. TODO: The model for this function should be similar to add
+     * ().
+     *
+     * This function is deprecated.
+     */
+    VectorBase &mult () DEAL_II_DEPRECATED;
+
+    /**
+     * Same as above, but a collective piecewise multiply operation of
+     * <code>this</code> vector with <b>v</b>.
+     *
+     * This function is deprecated.
+     */
+    VectorBase &mult (const VectorBase &v) DEAL_II_DEPRECATED;
+
+    /**
+     * Same as above, but a collective piecewise multiply operation of
+     * <b>u</b> with <b>v</b>.
+     *
+     * This function is deprecated.
+     */
+    VectorBase &mult (const VectorBase &u,
+                      const VectorBase &v) DEAL_II_DEPRECATED;
+
+    /**
+     * Return whether the vector contains only elements with value zero. This
+     * is a collective operation. This function is expensive, because
+     * potentially all elements have to be checked.
+     */
+    bool all_zero () const;
+
+    /**
+     * Return @p true if the vector has no negative entries, i.e. all entries
+     * are zero or positive. This function is used, for example, to check
+     * whether refinement indicators are really all positive (or zero).
+     */
+    bool is_non_negative () const;
+
+    /**
+     * Multiply the entire vector by a fixed factor.
+     */
+    VectorBase &operator *= (const PetscScalar factor);
+
+    /**
+     * Divide the entire vector by a fixed factor.
+     */
+    VectorBase &operator /= (const PetscScalar factor);
+
+    /**
+     * Add the given vector to the present one.
+     */
+    VectorBase &operator += (const VectorBase &V);
+
+    /**
+     * Subtract the given vector from the present one.
+     */
+    VectorBase &operator -= (const VectorBase &V);
+
+    /**
+     * Addition of @p s to all components. Note that @p s is a scalar and not
+     * a vector.
+     */
+    void add (const PetscScalar s);
+
+    /**
+     * Simple vector addition, equal to the <tt>operator +=</tt>.
+     *
+     * @deprecated Use the <tt>operator +=</tt> instead.
+     */
+    void add (const VectorBase &V) DEAL_II_DEPRECATED;
+
+    /**
+     * Simple addition of a multiple of a vector, i.e. <tt>*this += a*V</tt>.
+     */
+    void add (const PetscScalar a, const VectorBase &V);
+
+    /**
+     * Multiple addition of scaled vectors, i.e. <tt>*this += a*V+b*W</tt>.
+     */
+    void add (const PetscScalar a, const VectorBase &V,
+              const PetscScalar b, const VectorBase &W);
+
+    /**
+     * Scaling and simple vector addition, i.e. <tt>*this = s*(*this)+V</tt>.
+     */
+    void sadd (const PetscScalar s,
+               const VectorBase     &V);
+
+    /**
+     * Scaling and simple addition, i.e. <tt>*this = s*(*this)+a*V</tt>.
+     */
+    void sadd (const PetscScalar s,
+               const PetscScalar a,
+               const VectorBase     &V);
+
+    /**
+     * Scaling and multiple addition.
+     *
+     * This function is deprecated.
+     */
+    void sadd (const PetscScalar s,
+               const PetscScalar a,
+               const VectorBase     &V,
+               const PetscScalar b,
+               const VectorBase     &W) DEAL_II_DEPRECATED;
+
+    /**
+     * Scaling and multiple addition. <tt>*this = s*(*this)+a*V + b*W +
+     * c*X</tt>.
+     *
+     * This function is deprecated.
+     */
+    void sadd (const PetscScalar s,
+               const PetscScalar a,
+               const VectorBase     &V,
+               const PetscScalar b,
+               const VectorBase     &W,
+               const PetscScalar c,
+               const VectorBase     &X) DEAL_II_DEPRECATED;
+
+    /**
+     * Scale each element of this vector by the corresponding element in the
+     * argument. This function is mostly meant to simulate multiplication (and
+     * immediate re-assignment) by a diagonal scaling matrix.
+     */
+    void scale (const VectorBase &scaling_factors);
+
+    /**
+     * Assignment <tt>*this = a*V</tt>.
+     */
+    void equ (const PetscScalar a, const VectorBase &V);
+
+    /**
+     * Assignment <tt>*this = a*V + b*W</tt>.
+     *
+     * This function is deprecated.
+     */
+    void equ (const PetscScalar a, const VectorBase &V,
+              const PetscScalar b, const VectorBase &W) DEAL_II_DEPRECATED;
+
+    /**
+     * Compute the elementwise ratio of the two given vectors, that is let
+     * <tt>this[i] = a[i]/b[i]</tt>. This is useful for example if you want to
+     * compute the cellwise ratio of true to estimated error.
+     *
+     * This vector is appropriately scaled to hold the result.
+     *
+     * If any of the <tt>b[i]</tt> is zero, the result is undefined. No
+     * attempt is made to catch such situations.
+     */
+    void ratio (const VectorBase &a,
+                const VectorBase &b) DEAL_II_DEPRECATED;
+
+    /**
+     * Prints the PETSc vector object values using PETSc internal vector
+     * viewer function <tt>VecView</tt>. The default format prints the
+     * vector's contents, including indices of vector elements. For other
+     * valid view formats, consult http://www.mcs.anl.gov/petsc/petsc-
+     * current/docs/manualpages/Vec/VecView.html
+     */
+    void write_ascii (const PetscViewerFormat format = PETSC_VIEWER_DEFAULT) ;
+
+    /**
+     * Print to a stream. @p precision denotes the desired precision with
+     * which values shall be printed, @p scientific whether scientific
+     * notation shall be used. If @p across is @p true then the vector is
+     * printed in a line, while if @p false then the elements are printed on a
+     * separate line each.
+     */
+    void print (std::ostream       &out,
+                const unsigned int  precision  = 3,
+                const bool          scientific = true,
+                const bool          across     = true) const;
+
+    /**
+     * Swap the contents of this vector and the other vector @p v. One could
+     * do this operation with a temporary variable and copying over the data
+     * elements, but this function is significantly more efficient since it
+     * only swaps the pointers to the data of the two vectors and therefore
+     * does not need to allocate temporary storage and move data around.
+     *
+     * This function is analog to the the @p swap function of all C++ standard
+     * containers. Also, there is a global function <tt>swap(u,v)</tt> that
+     * simply calls <tt>u.swap(v)</tt>, again in analogy to standard
+     * functions.
+     */
+    void swap (VectorBase &v);
+
+    /**
+     * Conversion operator to gain access to the underlying PETSc type. If you
+     * do this, you cut this class off some information it may need, so this
+     * conversion operator should only be used if you know what you do. In
+     * particular, it should only be used for read-only operations into the
+     * vector.
+     */
+    operator const Vec &() const;
+
+    /**
+     * Estimate for the memory consumption (not implemented for this class).
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * Return a reference to the MPI communicator object in use with this
+     * object.
+     */
+    virtual const MPI_Comm &get_mpi_communicator () const;
+
+  protected:
+    /**
+     * A generic vector object in PETSc. The actual type, a sequential vector,
+     * is set in the constructor.
+     */
+    Vec vector;
+
+    /**
+     * Denotes if this vector has ghost indices associated with it. This means
+     * that at least one of the processes in a parallel program has at least
+     * one ghost index.
+     */
+    bool ghosted;
+
+    /**
+     * This vector contains the global indices of the ghost values. The
+     * location in this vector denotes the local numbering, which is used in
+     * PETSc.
+     */
+    IndexSet ghost_indices;
+
+    /**
+     * Store whether the last action was a write or add operation. This
+     * variable is @p mutable so that the accessor classes can write to it,
+     * even though the vector object they refer to is constant.
+     */
+    mutable VectorOperation::values last_action;
+
+    /**
+     * Make the reference class a friend.
+     */
+    friend class internal::VectorReference;
+
+    /**
+     * Specifies if the vector is the owner of the PETSc Vec. This is true if
+     * it got created by this class and determines if it gets destructed in
+     * the destructor.
+     */
+    bool attained_ownership;
+
+    /**
+     * Collective set or add operation: This function is invoked by the
+     * collective @p set and @p add with the @p add_values flag set to the
+     * corresponding value.
+     */
+    void do_set_add_operation (const size_type    n_elements,
+                               const size_type   *indices,
+                               const PetscScalar  *values,
+                               const bool         add_values);
+
+
+  };
+
+
+
+// ------------------- inline and template functions --------------
+
+  /**
+   * Global function @p swap which overloads the default implementation of the
+   * C++ standard library which uses a temporary object. The function simply
+   * exchanges the data of the two vectors.
+   *
+   * @relates PETScWrappers::VectorBase
+   * @author Wolfgang Bangerth, 2004
+   */
+  inline
+  void swap (VectorBase &u, VectorBase &v)
+  {
+    u.swap (v);
+  }
+
+#ifndef DOXYGEN
+  namespace internal
+  {
+    inline
+    VectorReference::VectorReference (const VectorBase  &vector,
+                                      const size_type   index)
+      :
+      vector (vector),
+      index (index)
+    {}
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const VectorReference &r) const
+    {
+      // as explained in the class
+      // documentation, this is not the copy
+      // operator. so simply pass on to the
+      // "correct" assignment operator
+      *this = static_cast<PetscScalar> (r);
+
+      return *this;
+    }
+
+
+
+    inline
+    VectorReference &
+    VectorReference::operator = (const VectorReference &r)
+    {
+      // as explained in the class
+      // documentation, this is not the copy
+      // operator. so simply pass on to the
+      // "correct" assignment operator
+      *this = static_cast<PetscScalar> (r);
+
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const PetscScalar &value) const
+    {
+      Assert ((vector.last_action == VectorOperation::insert)
+              ||
+              (vector.last_action == VectorOperation::unknown),
+              ExcWrongMode (VectorOperation::insert,
+                            vector.last_action));
+
+      Assert (!vector.has_ghost_elements(), ExcGhostsPresent());
+
+      const PetscInt petsc_i = index;
+
+      const int ierr
+        = VecSetValues (vector, 1, &petsc_i, &value, INSERT_VALUES);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      vector.last_action = VectorOperation::insert;
+
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator += (const PetscScalar &value) const
+    {
+      Assert ((vector.last_action == VectorOperation::add)
+              ||
+              (vector.last_action == VectorOperation::unknown),
+              ExcWrongMode (VectorOperation::add,
+                            vector.last_action));
+
+      Assert (!vector.has_ghost_elements(), ExcGhostsPresent());
+
+      vector.last_action = VectorOperation::add;
+
+      // we have to do above actions in any
+      // case to be consistent with the MPI
+      // communication model (see the
+      // comments in the documentation of
+      // PETScWrappers::MPI::Vector), but we
+      // can save some work if the addend is
+      // zero
+      if (value == PetscScalar())
+        return *this;
+
+      // use the PETSc function to add something
+      const PetscInt petsc_i = index;
+      const int ierr
+        = VecSetValues (vector, 1, &petsc_i, &value, ADD_VALUES);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator -= (const PetscScalar &value) const
+    {
+      Assert ((vector.last_action == VectorOperation::add)
+              ||
+              (vector.last_action == VectorOperation::unknown),
+              ExcWrongMode (VectorOperation::add,
+                            vector.last_action));
+
+      Assert (!vector.has_ghost_elements(), ExcGhostsPresent());
+
+      vector.last_action = VectorOperation::add;
+
+      // we have to do above actions in any
+      // case to be consistent with the MPI
+      // communication model (see the
+      // comments in the documentation of
+      // PETScWrappers::MPI::Vector), but we
+      // can save some work if the addend is
+      // zero
+      if (value == PetscScalar())
+        return *this;
+
+      // use the PETSc function to
+      // add something
+      const PetscInt petsc_i = index;
+      const PetscScalar subtractand = -value;
+      const int ierr
+        = VecSetValues (vector, 1, &petsc_i, &subtractand, ADD_VALUES);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator *= (const PetscScalar &value) const
+    {
+      Assert ((vector.last_action == VectorOperation::insert)
+              ||
+              (vector.last_action == VectorOperation::unknown),
+              ExcWrongMode (VectorOperation::insert,
+                            vector.last_action));
+
+      Assert (!vector.has_ghost_elements(), ExcGhostsPresent());
+
+      vector.last_action = VectorOperation::insert;
+
+      // we have to do above actions in any
+      // case to be consistent with the MPI
+      // communication model (see the
+      // comments in the documentation of
+      // PETScWrappers::MPI::Vector), but we
+      // can save some work if the factor is
+      // one
+      if (value == 1.)
+        return *this;
+
+      const PetscInt petsc_i = index;
+      const PetscScalar new_value
+        = static_cast<PetscScalar>(*this) * value;
+
+      const int ierr
+        = VecSetValues (vector, 1, &petsc_i, &new_value, INSERT_VALUES);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator /= (const PetscScalar &value) const
+    {
+      Assert ((vector.last_action == VectorOperation::insert)
+              ||
+              (vector.last_action == VectorOperation::unknown),
+              ExcWrongMode (VectorOperation::insert,
+                            vector.last_action));
+
+      Assert (!vector.has_ghost_elements(), ExcGhostsPresent());
+
+      vector.last_action = VectorOperation::insert;
+
+      // we have to do above actions in any
+      // case to be consistent with the MPI
+      // communication model (see the
+      // comments in the documentation of
+      // PETScWrappers::MPI::Vector), but we
+      // can save some work if the factor is
+      // one
+      if (value == 1.)
+        return *this;
+
+      const PetscInt petsc_i = index;
+      const PetscScalar new_value
+        = static_cast<PetscScalar>(*this) / value;
+
+      const int ierr
+        = VecSetValues (vector, 1, &petsc_i, &new_value, INSERT_VALUES);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      return *this;
+    }
+
+
+
+    inline
+    PetscReal
+    VectorReference::real () const
+    {
+#ifndef PETSC_USE_COMPLEX
+      return static_cast<PetscScalar>(*this);
+#else
+      return PetscRealPart (static_cast<PetscScalar>(*this));
+#endif
+    }
+
+
+
+    inline
+    PetscReal
+    VectorReference::imag () const
+    {
+#ifndef PETSC_USE_COMPLEX
+      return PetscReal (0);
+#else
+      return PetscImaginaryPart (static_cast<PetscScalar>(*this));
+#endif
+    }
+
+  } // namespace internal
+
+  inline
+  bool
+  VectorBase::in_local_range (const size_type index) const
+  {
+    PetscInt begin, end;
+    const int ierr = VecGetOwnershipRange (static_cast<const Vec &>(vector),
+                                           &begin, &end);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return ((index >= static_cast<size_type>(begin)) &&
+            (index < static_cast<size_type>(end)));
+  }
+
+
+  inline
+  IndexSet
+  VectorBase::locally_owned_elements() const
+  {
+    IndexSet is (size());
+
+    // PETSc only allows for contiguous local ranges, so this is simple
+    const std::pair<size_type, size_type> x = local_range();
+    is.add_range (x.first, x.second);
+    return is;
+  }
+
+
+
+  inline
+  bool
+  VectorBase::has_ghost_elements() const
+  {
+    return ghosted;
+  }
+
+
+
+  inline
+  internal::VectorReference
+  VectorBase::operator () (const size_type index)
+  {
+    return internal::VectorReference (*this, index);
+  }
+
+
+
+  inline
+  PetscScalar
+  VectorBase::operator () (const size_type index) const
+  {
+    return static_cast<PetscScalar>(internal::VectorReference (*this, index));
+  }
+
+
+
+  inline
+  internal::VectorReference
+  VectorBase::operator [] (const size_type index)
+  {
+    return operator()(index);
+  }
+
+
+
+  inline
+  PetscScalar
+  VectorBase::operator [] (const size_type index) const
+  {
+    return operator()(index);
+  }
+
+  inline
+  const MPI_Comm &
+  VectorBase::get_mpi_communicator () const
+  {
+    static MPI_Comm comm;
+    PetscObjectGetComm((PetscObject)vector, &comm);
+    return comm;
+  }
+
+  inline
+  void VectorBase::extract_subvector_to (const std::vector<size_type> &indices,
+                                         std::vector<PetscScalar> &values) const
+  {
+    extract_subvector_to(&(indices[0]), &(indices[0]) + indices.size(),  &(values[0]));
+  }
+
+  template <typename ForwardIterator, typename OutputIterator>
+  inline
+  void VectorBase::extract_subvector_to (const ForwardIterator    indices_begin,
+                                         const ForwardIterator    indices_end,
+                                         OutputIterator           values_begin) const
+  {
+    const PetscInt n_idx = static_cast<PetscInt>(indices_end - indices_begin);
+    if (n_idx == 0)
+      return;
+
+    // if we are dealing
+    // with a parallel vector
+    if (ghosted )
+      {
+
+        int ierr;
+
+        // there is the possibility
+        // that the vector has
+        // ghost elements. in that
+        // case, we first need to
+        // figure out which
+        // elements we own locally,
+        // then get a pointer to
+        // the elements that are
+        // stored here (both the
+        // ones we own as well as
+        // the ghost elements). in
+        // this array, the locally
+        // owned elements come
+        // first followed by the
+        // ghost elements whose
+        // position we can get from
+        // an index set
+        PetscInt begin, end;
+        ierr = VecGetOwnershipRange (vector, &begin, &end);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        Vec locally_stored_elements = PETSC_NULL;
+        ierr = VecGhostGetLocalForm(vector, &locally_stored_elements);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        PetscInt lsize;
+        ierr = VecGetSize(locally_stored_elements, &lsize);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        PetscScalar *ptr;
+        ierr = VecGetArray(locally_stored_elements, &ptr);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        for (PetscInt i=0; i<n_idx; ++i)
+          {
+            const unsigned int index = *(indices_begin+i);
+            if ( index>=static_cast<unsigned int>(begin)
+                 && index<static_cast<unsigned int>(end) )
+              {
+                //local entry
+                *(values_begin+i) = *(ptr+index-begin);
+              }
+            else
+              {
+                //ghost entry
+                const unsigned int ghostidx
+                  = ghost_indices.index_within_set(index);
+
+                Assert(ghostidx+end-begin<(unsigned int)lsize, ExcInternalError());
+                *(values_begin+i) = *(ptr+ghostidx+end-begin);
+              }
+          }
+
+        ierr = VecRestoreArray(locally_stored_elements, &ptr);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        ierr = VecGhostRestoreLocalForm(vector, &locally_stored_elements);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      }
+    // if the vector is local or the
+    // caller, then simply access the
+    // element we are interested in
+    else
+      {
+        int ierr;
+
+        PetscInt begin, end;
+        ierr = VecGetOwnershipRange (vector, &begin, &end);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        PetscScalar *ptr;
+        ierr = VecGetArray(vector, &ptr);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        for (PetscInt i=0; i<n_idx; ++i)
+          {
+            const unsigned int index = *(indices_begin+i);
+
+            Assert(index>=static_cast<unsigned int>(begin)
+                   && index<static_cast<unsigned int>(end), ExcInternalError());
+
+            *(values_begin+i) = *(ptr+index-begin);
+          }
+
+        ierr = VecRestoreArray(vector, &ptr);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      }
+  }
+
+#endif // DOXYGEN
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
+
+/*----------------------------   petsc_vector_base.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_vector_base.h     ---------------------------*/
diff --git a/include/deal.II/lac/pointer_matrix.h b/include/deal.II/lac/pointer_matrix.h
new file mode 100644
index 0000000..8fc7ddc
--- /dev/null
+++ b/include/deal.II/lac/pointer_matrix.h
@@ -0,0 +1,900 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__pointer_matrix_h
+#define dealii__pointer_matrix_h
+
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/vector_memory.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<typename VectorType> class VectorMemory;
+
+class IdentityMatrix;
+template <typename number> class FullMatrix;
+template <typename number> class LAPACKFullMatrix;
+template <typename number> class SparseMatrix;
+template <typename number> class BlockSparseMatrix;
+template <typename number> class SparseMatrixEZ;
+template <typename number> class BlockSparseMatrixEZ;
+template <typename number> class TridiagonalMatrix;
+template <typename number, typename BlockVectorType> class BlockMatrixArray;
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+/**
+ * Abstract class for use in iterations.  This class provides the interface
+ * required by LAC solver classes. It allows to use different concrete matrix
+ * classes in the same context, as long as they apply to the same vector
+ * class.
+ *
+ * @author Guido Kanschat, 2000, 2001, 2002
+ */
+template<typename VectorType>
+class PointerMatrixBase : public Subscriptor
+{
+public:
+  /**
+   * Value type of this matrix. since the matrix itself is unknown, we take
+   * the value type of the vector. Therefore, matrix entries must be
+   * convertible to vector entries.
+   *
+   * This was defined to make this matrix a possible template argument to
+   * BlockMatrixArray.
+   */
+  typedef typename VectorType::value_type value_type;
+
+  /**
+   * Virtual destructor.  Does nothing except making sure that the destructor
+   * of any derived class is called whenever a pointer-to-base-class object is
+   * destroyed.
+   */
+  virtual ~PointerMatrixBase ();
+
+  /**
+   * Reset the object to its original state.
+   */
+  virtual void clear () = 0;
+
+  /**
+   * Matrix-vector product.
+   */
+  virtual void vmult (VectorType       &dst,
+                      const VectorType &src) const = 0;
+
+  /**
+   * Transposed matrix-vector product.
+   */
+  virtual void Tvmult (VectorType       &dst,
+                       const VectorType &src) const = 0;
+
+  /**
+   * Matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void vmult_add (VectorType       &dst,
+                          const VectorType &src) const = 0;
+
+  /**
+   * Transposed matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void Tvmult_add (VectorType       &dst,
+                           const VectorType &src) const = 0;
+};
+
+
+/**
+ * A pointer to be used as a matrix.  This class stores a pointer to a matrix
+ * and can be used as a matrix itself in iterative methods.
+ *
+ * The main purpose for the existence of this class is its base class, which
+ * only has a vector as template argument. Therefore, this interface provides
+ * an abstract base class for matrices.
+ *
+ * @author Guido Kanschat 2000, 2001, 2002
+ */
+template<typename MatrixType, typename VectorType>
+class PointerMatrix : public PointerMatrixBase<VectorType>
+{
+public:
+  /**
+   * Constructor.  The pointer in the argument is stored in this class. As
+   * usual, the lifetime of <tt>*M</tt> must be longer than the one of the
+   * PointerMatrix.
+   *
+   * If <tt>M</tt> is zero, no matrix is stored.
+   */
+  PointerMatrix (const MatrixType *M=0);
+
+  /**
+   * Constructor.
+   *
+   * This class internally stores a pointer to a matrix via a SmartPointer
+   * object. The SmartPointer class allows to associate a name with the object
+   * pointed to that identifies the object that has the pointer, in order to
+   * identify objects that still refer to the object pointed to. The @p name
+   * argument to this function is used to this end, i.e., you can in essence
+   * assign a name to the current PointerMatrix object.
+   */
+  PointerMatrix(const char *name);
+
+  /**
+   * Constructor. <tt>M</tt> points to a matrix which must live longer than
+   * the PointerMatrix.
+   *
+   * This class internally stores a pointer to a matrix via a SmartPointer
+   * object. The SmartPointer class allows to associate a name with the object
+   * pointed to that identifies the object that has the pointer, in order to
+   * identify objects that still refer to the object pointed to. The @p name
+   * argument to this function is used to this end, i.e., you can in essence
+   * assign a name to the current PointerMatrix object.
+   */
+  PointerMatrix(const MatrixType *M,
+                const char *name);
+
+  // Use doc from base class
+  virtual void clear();
+
+  /**
+   * Return whether the object is empty.
+   */
+  bool empty () const;
+
+  /**
+   * Assign a new matrix pointer. Deletes the old pointer and releases its
+   * matrix.
+   * @see SmartPointer
+   */
+  const PointerMatrix &operator= (const MatrixType *M);
+
+  /**
+   * Matrix-vector product.
+   */
+  virtual void vmult (VectorType       &dst,
+                      const VectorType &src) const;
+
+  /**
+   * Transposed matrix-vector product.
+   */
+  virtual void Tvmult (VectorType       &dst,
+                       const VectorType &src) const;
+
+  /**
+   * Matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void vmult_add (VectorType       &dst,
+                          const VectorType &src) const;
+
+  /**
+   * Transposed matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void Tvmult_add (VectorType       &dst,
+                           const VectorType &src) const;
+
+private:
+  /**
+   * The pointer to the actual matrix.
+   */
+  SmartPointer<const MatrixType,PointerMatrix<MatrixType,VectorType> > m;
+};
+
+
+/**
+ * A pointer to be used as a matrix.  This class stores a pointer to a matrix
+ * and can be used as a matrix itself in iterative methods.
+ *
+ * The main purpose for the existence of this class is its base class, which
+ * only has a vector as template argument. Therefore, this interface provides
+ * an abstract base class for matrices.
+ *
+ * This class differs form PointerMatrix by its additional VectorMemory object
+ * and by the fact that it implements the functions vmult_add() and
+ * Tvmult_add() only using vmult() and Tvmult() of the MatrixType.
+ *
+ * @author Guido Kanschat 2006
+ */
+template<typename MatrixType, typename VectorType>
+class PointerMatrixAux : public PointerMatrixBase<VectorType>
+{
+public:
+  /**
+   * Constructor.  The pointer in the argument is stored in this class. As
+   * usual, the lifetime of <tt>*M</tt> must be longer than the one of the
+   * PointerMatrixAux.
+   *
+   * If <tt>M</tt> is zero, no matrix is stored.
+   *
+   * If <tt>mem</tt> is zero, then GrowingVectorMemory is used.
+   */
+  PointerMatrixAux (VectorMemory<VectorType> *mem = 0,
+                    const MatrixType         *M = 0);
+
+  /**
+   * Constructor not using a matrix.
+   *
+   * This class internally stores a pointer to a matrix via a SmartPointer
+   * object. The SmartPointer class allows to associate a name with the object
+   * pointed to that identifies the object that has the pointer, in order to
+   * identify objects that still refer to the object pointed to. The @p name
+   * argument to this function is used to this end, i.e., you can in essence
+   * assign a name to the current PointerMatrix object.
+   */
+  PointerMatrixAux(VectorMemory<VectorType> *mem,
+                   const char               *name);
+
+  /**
+   * Constructor. <tt>M</tt> points to a matrix which must live longer than
+   * the PointerMatrixAux.
+   *
+   * This class internally stores a pointer to a matrix via a SmartPointer
+   * object. The SmartPointer class allows to associate a name with the object
+   * pointed to that identifies the object that has the pointer, in order to
+   * identify objects that still refer to the object pointed to. The @p name
+   * argument to this function is used to this end, i.e., you can in essence
+   * assign a name to the current PointerMatrix object.
+   */
+  PointerMatrixAux(VectorMemory<VectorType> *mem,
+                   const MatrixType         *M,
+                   const char               *name);
+
+  // Use doc from base class
+  virtual void clear();
+
+  /**
+   * Return whether the object is empty.
+   */
+  bool empty () const;
+
+  /**
+   * Assign a new VectorMemory object for getting auxiliary vectors.
+   */
+  void set_memory(VectorMemory<VectorType> *mem);
+
+  /**
+   * Assign a new matrix pointer. Deletes the old pointer and releases its
+   * matrix.
+   * @see SmartPointer
+   */
+  const PointerMatrixAux &operator= (const MatrixType *M);
+
+  /**
+   * Matrix-vector product.
+   */
+  virtual void vmult (VectorType       &dst,
+                      const VectorType &src) const;
+
+  /**
+   * Transposed matrix-vector product.
+   */
+  virtual void Tvmult (VectorType       &dst,
+                       const VectorType &src) const;
+
+  /**
+   * Matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void vmult_add (VectorType       &dst,
+                          const VectorType &src) const;
+
+  /**
+   * Transposed matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void Tvmult_add (VectorType       &dst,
+                           const VectorType &src) const;
+
+private:
+  /**
+   * The backup memory if none was provided.
+   */
+  mutable GrowingVectorMemory<VectorType> my_memory;
+
+  /**
+   * Object for getting the auxiliary vector.
+   */
+  mutable SmartPointer<VectorMemory<VectorType>,PointerMatrixAux<MatrixType,VectorType> > mem;
+
+  /**
+   * The pointer to the actual matrix.
+   */
+  SmartPointer<const MatrixType,PointerMatrixAux<MatrixType,VectorType> > m;
+};
+
+
+
+/**
+ * Implement matrix multiplications for a vector using the PointerMatrixBase
+ * functionality. Objects of this class can be used in block matrices.
+ *
+ * Implements a matrix with image dimension 1 by using the scalar product
+ * (#vmult()) and scalar multiplication (#Tvmult()) functions of the Vector
+ * class.
+ *
+ * @author Guido Kanschat, 2006
+ */
+template <typename number>
+class PointerMatrixVector : public PointerMatrixBase<Vector<number> >
+{
+public:
+  /**
+   * Constructor.  The pointer in the argument is stored in this class. As
+   * usual, the lifetime of <tt>*M</tt> must be longer than the one of the
+   * PointerMatrix.
+   *
+   * If <tt>M</tt> is zero, no matrix is stored.
+   */
+  PointerMatrixVector (const Vector<number> *M=0);
+
+  /**
+   * Constructor.
+   *
+   * This class internally stores a pointer to a matrix via a SmartPointer
+   * object. The SmartPointer class allows to associate a name with the object
+   * pointed to that identifies the object that has the pointer, in order to
+   * identify objects that still refer to the object pointed to. The @p name
+   * argument to this function is used to this end, i.e., you can in essence
+   * assign a name to the current PointerMatrix object.
+   */
+  PointerMatrixVector (const char *name);
+
+  /**
+   * Constructor. <tt>M</tt> points to a matrix which must live longer than
+   * the PointerMatrix.
+   *
+   * This class internally stores a pointer to a matrix via a SmartPointer
+   * object. The SmartPointer class allows to associate a name with the object
+   * pointed to that identifies the object that has the pointer, in order to
+   * identify objects that still refer to the object pointed to. The @p name
+   * argument to this function is used to this end, i.e., you can in essence
+   * assign a name to the current PointerMatrix object.
+   */
+  PointerMatrixVector (const Vector<number> *M,
+                       const char *name);
+
+  // Use doc from base class
+  virtual void clear();
+
+  /**
+   * Return whether the object is empty.
+   */
+  bool empty () const;
+
+  /**
+   * Assign a new matrix pointer. Deletes the old pointer and releases its
+   * matrix.
+   * @see SmartPointer
+   */
+  const PointerMatrixVector &operator= (const Vector<number> *M);
+
+  /**
+   * Matrix-vector product, actually the scalar product of <tt>src</tt> and
+   * the vector representing this matrix.
+   *
+   * The dimension of <tt>dst</tt> is 1, while that of <tt>src</tt> is the
+   * size of the vector representing this matrix.
+   */
+  virtual void vmult (Vector<number> &dst,
+                      const Vector<number> &src) const;
+
+  /**
+   * Transposed matrix-vector product, actually the multiplication of the
+   * vector representing this matrix with <tt>src(0)</tt>.
+   *
+   * The dimension of <tt>src</tt> is 1, while that of <tt>dst</tt> is the
+   * size of the vector representing this matrix.
+   */
+  virtual void Tvmult (Vector<number> &dst,
+                       const Vector<number> &src) const;
+
+  /**
+   * Matrix-vector product, adding to <tt>dst</tt>.
+   *
+   * The dimension of <tt>dst</tt> is 1, while that of <tt>src</tt> is the
+   * size of the vector representing this matrix.
+   */
+  virtual void vmult_add (Vector<number> &dst,
+                          const Vector<number> &src) const;
+
+  /**
+   * Transposed matrix-vector product, adding to <tt>dst</tt>.
+   *
+   * The dimension of <tt>src</tt> is 1, while that of <tt>dst</tt> is the
+   * size of the vector representing this matrix.
+   */
+  virtual void Tvmult_add (Vector<number> &dst,
+                           const Vector<number> &src) const;
+
+private:
+  /**
+   * The pointer to the actual matrix.
+   */
+  SmartPointer<const Vector<number>,PointerMatrixVector<number> > m;
+};
+
+
+
+/**
+ * This function helps you creating a PointerMatrixBase object if you do not
+ * want to provide the full template arguments of PointerMatrix or
+ * PointerMatrixAux.
+ *
+ * Note that this function by default creates a PointerMatrixAux, emulating
+ * the functions <tt>vmult_add</tt> and <tt>Tvmult_add</tt>, using an
+ * auxiliary vector. It is overloaded for the library matrix classes
+ * implementing these functions themselves. If you have such a class, you
+ * should overload the function in order to save memory and time.
+ *
+ * The result is a PointerMatrixBase* pointing to <tt>matrix</tt>. The
+ * <tt>VectorType</tt> argument is a dummy just used to determine the template
+ * arguments.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrixAux
+ */
+template <typename VectorType, typename MatrixType>
+inline
+PointerMatrixBase<VectorType> *
+new_pointer_matrix_base(MatrixType &matrix, const VectorType &, const char *name = "PointerMatrixAux")
+{
+  return new PointerMatrixAux<MatrixType, VectorType>(0, &matrix, name);
+}
+
+/**
+ * Specialized version for IdentityMatrix.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename numberv>
+PointerMatrixBase<Vector<numberv> > *
+new_pointer_matrix_base(const IdentityMatrix &matrix, const Vector<numberv> &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<IdentityMatrix, Vector<numberv> >(&matrix, name);
+}
+
+
+/**
+ * Specialized version for FullMatrix.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename numberv, typename numberm>
+PointerMatrixBase<Vector<numberv> > *
+new_pointer_matrix_base(const FullMatrix<numberm> &matrix, const Vector<numberv> &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<FullMatrix<numberm>, Vector<numberv> >(&matrix, name);
+}
+
+
+/**
+ * Specialized version for LAPACKFullMatrix.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename numberv, typename numberm>
+PointerMatrixBase<Vector<numberv> > *
+new_pointer_matrix_base(const LAPACKFullMatrix<numberm> &matrix, const Vector<numberv> &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<LAPACKFullMatrix<numberm>, Vector<numberv> >(&matrix, name);
+}
+
+
+/**
+ * Specialized version for SparseMatrix.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename numberv, typename numberm>
+PointerMatrixBase<Vector<numberv> > *
+new_pointer_matrix_base(const SparseMatrix<numberm> &matrix, const Vector<numberv> &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<SparseMatrix<numberm>, Vector<numberv> >(&matrix, name);
+}
+
+
+/**
+ * Specialized version for BlockSparseMatrix.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename VectorType, typename numberm>
+PointerMatrixBase<VectorType> *
+new_pointer_matrix_base(const BlockSparseMatrix<numberm> &matrix, const VectorType &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<BlockSparseMatrix<numberm>, VectorType>(&matrix, name);
+}
+
+
+/**
+ * Specialized version for SparseMatrixEZ.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename numberv, typename numberm>
+PointerMatrixBase<Vector<numberv> > *
+new_pointer_matrix_base(const SparseMatrixEZ<numberm> &matrix, const Vector<numberv> &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<SparseMatrixEZ<numberm>, Vector<numberv> >(&matrix, name);
+}
+
+
+/**
+ * Specialized version for BlockSparseMatrixEZ.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename VectorType, typename numberm>
+PointerMatrixBase<VectorType> *
+new_pointer_matrix_base(const BlockSparseMatrixEZ<numberm> &matrix, const VectorType &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<BlockSparseMatrixEZ<numberm>, VectorType>(&matrix, name);
+}
+
+
+/**
+ * Specialized version for BlockMatrixArray.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename numberv, typename numberm, typename BLOCK_VectorType>
+PointerMatrixBase<BLOCK_VectorType> *
+new_pointer_matrix_base(const BlockMatrixArray<numberm,BLOCK_VectorType> &matrix, const BLOCK_VectorType &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<BlockMatrixArray<numberm,BLOCK_VectorType>, BlockVector<numberv> >(&matrix, name);
+}
+
+
+/**
+ * Specialized version for TridiagonalMatrix.
+ *
+ * @relates PointerMatrixBase @relates PointerMatrix
+ */
+template <typename numberv, typename numberm>
+PointerMatrixBase<Vector<numberv> > *
+new_pointer_matrix_base(const TridiagonalMatrix<numberm> &matrix, const Vector<numberv> &, const char *name = "PointerMatrix")
+{
+  return new PointerMatrix<TridiagonalMatrix<numberm>, Vector<numberv> >(&matrix, name);
+}
+
+
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+template<typename VectorType>
+inline
+PointerMatrixBase<VectorType>::~PointerMatrixBase ()
+{}
+
+
+
+//----------------------------------------------------------------------//
+
+
+template<typename MatrixType, typename VectorType>
+PointerMatrix<MatrixType, VectorType>::PointerMatrix (const MatrixType *M)
+  : m(M, typeid(*this).name())
+{}
+
+
+template<typename MatrixType, typename VectorType>
+PointerMatrix<MatrixType, VectorType>::PointerMatrix (const char *name)
+  : m(0, name)
+{}
+
+
+template<typename MatrixType, typename VectorType>
+PointerMatrix<MatrixType, VectorType>::PointerMatrix (const MatrixType *M,
+                                                      const char       *name)
+  : m(M, name)
+{}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrix<MatrixType, VectorType>::clear ()
+{
+  m = 0;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline const PointerMatrix<MatrixType, VectorType> &
+PointerMatrix<MatrixType, VectorType>::operator= (const MatrixType *M)
+{
+  m = M;
+  return *this;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline bool
+PointerMatrix<MatrixType, VectorType>::empty () const
+{
+  if (m == 0)
+    return true;
+  return m->empty();
+}
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrix<MatrixType, VectorType>::vmult (VectorType       &dst,
+                                              const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->vmult (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrix<MatrixType, VectorType>::Tvmult (VectorType       &dst,
+                                               const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->Tvmult (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrix<MatrixType, VectorType>::vmult_add (VectorType       &dst,
+                                                  const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->vmult_add (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrix<MatrixType, VectorType>::Tvmult_add (VectorType       &dst,
+                                                   const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->Tvmult_add (dst, src);
+}
+
+
+
+//----------------------------------------------------------------------//
+
+
+template<typename MatrixType, typename VectorType>
+PointerMatrixAux<MatrixType, VectorType>::PointerMatrixAux (VectorMemory<VectorType> *mem,
+                                                            const MatrixType *M)
+  : mem(mem, typeid(*this).name()),
+    m(M, typeid(*this).name())
+{
+  if (mem == 0) mem = &my_memory;
+}
+
+
+template<typename MatrixType, typename VectorType>
+PointerMatrixAux<MatrixType, VectorType>::PointerMatrixAux (VectorMemory<VectorType> *mem,
+                                                            const char               *name)
+  : mem(mem, name),
+    m(0, name)
+{
+  if (mem == 0) mem = &my_memory;
+}
+
+
+template<typename MatrixType, typename VectorType>
+PointerMatrixAux<MatrixType, VectorType>::PointerMatrixAux (VectorMemory<VectorType> *mem,
+                                                            const MatrixType         *M,
+                                                            const char               *name)
+  : mem(mem, name),
+    m(M, name)
+{
+  if (mem == 0) mem = &my_memory;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrixAux<MatrixType, VectorType>::clear ()
+{
+  m = 0;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline const PointerMatrixAux<MatrixType, VectorType> &
+PointerMatrixAux<MatrixType, VectorType>::operator= (const MatrixType *M)
+{
+  m = M;
+  return *this;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrixAux<MatrixType, VectorType>::set_memory(VectorMemory<VectorType> *M)
+{
+  mem = M;
+  if (mem == 0)
+    mem = &my_memory;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline bool
+PointerMatrixAux<MatrixType, VectorType>::empty () const
+{
+  if (m == 0)
+    return true;
+  return m->empty();
+}
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrixAux<MatrixType, VectorType>::vmult (VectorType       &dst,
+                                                 const VectorType &src) const
+{
+  if (mem == 0)
+    mem = &my_memory;
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m != 0, ExcNotInitialized());
+  m->vmult (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrixAux<MatrixType, VectorType>::Tvmult (VectorType       &dst,
+                                                  const VectorType &src) const
+{
+  if (mem == 0)
+    mem = &my_memory;
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m != 0, ExcNotInitialized());
+  m->Tvmult (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrixAux<MatrixType, VectorType>::vmult_add (VectorType       &dst,
+                                                     const VectorType &src) const
+{
+  if (mem == 0)
+    mem = &my_memory;
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m != 0, ExcNotInitialized());
+  VectorType *v = mem->alloc();
+  v->reinit(dst);
+  m->vmult (*v, src);
+  dst += *v;
+  mem->free(v);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+PointerMatrixAux<MatrixType, VectorType>::Tvmult_add (VectorType       &dst,
+                                                      const VectorType &src) const
+{
+  if (mem == 0)
+    mem = &my_memory;
+  Assert (mem != 0, ExcNotInitialized());
+  Assert (m != 0, ExcNotInitialized());
+  VectorType *v = mem->alloc();
+  v->reinit(dst);
+  m->Tvmult (*v, src);
+  dst += *v;
+  mem->free(v);
+}
+
+
+//----------------------------------------------------------------------//
+
+
+template<typename number>
+PointerMatrixVector<number>::PointerMatrixVector (const Vector<number> *M)
+  : m(M, typeid(*this).name())
+{}
+
+
+template<typename number>
+PointerMatrixVector<number>::PointerMatrixVector (const char *name)
+  : m(0, name)
+{}
+
+
+template<typename number>
+PointerMatrixVector<number>::PointerMatrixVector (const Vector<number> *M,
+                                                  const char           *name)
+  : m(M, name)
+{}
+
+
+template<typename number>
+inline void
+PointerMatrixVector<number>::clear ()
+{
+  m = 0;
+}
+
+
+template<typename number>
+inline const PointerMatrixVector<number> &
+PointerMatrixVector<number>::operator= (const Vector<number> *M)
+{
+  m = M;
+  return *this;
+}
+
+
+template<typename number>
+inline bool
+PointerMatrixVector<number>::empty () const
+{
+  if (m == 0)
+    return true;
+  return m->empty();
+}
+
+template<typename number>
+inline void
+PointerMatrixVector<number>::vmult (Vector<number>       &dst,
+                                    const Vector<number> &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  Assert (dst.size() == 1, ExcDimensionMismatch(dst.size(), 1));
+
+  dst(0) = *m * src;
+}
+
+
+template<typename number>
+inline void
+PointerMatrixVector<number>::Tvmult (Vector<number>       &dst,
+                                     const Vector<number> &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  Assert(src.size() == 1, ExcDimensionMismatch(src.size(), 1));
+
+  dst.equ (src(0), *m);
+}
+
+
+template<typename number>
+inline void
+PointerMatrixVector<number>::vmult_add (Vector<number>       &dst,
+                                        const Vector<number> &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  Assert (dst.size() == 1, ExcDimensionMismatch(dst.size(), 1));
+
+  dst(0) += *m * src;
+}
+
+
+template<typename number>
+inline void
+PointerMatrixVector<number>::Tvmult_add (Vector<number>       &dst,
+                                         const Vector<number> &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  Assert(src.size() == 1, ExcDimensionMismatch(src.size(), 1));
+
+  dst.add (src(0), *m);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/precondition.h b/include/deal.II/lac/precondition.h
new file mode 100644
index 0000000..5cf413b
--- /dev/null
+++ b/include/deal.II/lac/precondition.h
@@ -0,0 +1,2001 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__precondition_h
+#define dealii__precondition_h
+
+// This file contains simple preconditioners.
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/parallel.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/lac/tridiagonal_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/vector_memory.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+
+template <typename number> class Vector;
+template <typename number> class SparseMatrix;
+namespace parallel
+{
+  namespace distributed
+  {
+    template <typename number> class Vector;
+  }
+}
+
+
+
+/*! @addtogroup Preconditioners
+ *@{
+ */
+
+
+/**
+ * No preconditioning.  This class helps you, if you want to use a linear
+ * solver without preconditioning. All solvers in LAC require a
+ * preconditioner. Therefore, you must use the identity provided here to avoid
+ * preconditioning. It can be used in the following way:
+ *
+ * @code
+ * SolverControl           solver_control (1000, 1e-12);
+ * SolverCG<>              cg (solver_control);
+ * cg.solve (system_matrix, solution, system_rhs,
+ *          PreconditionIdentity());
+ * @endcode
+ *
+ * See the step-3 tutorial program for an example and additional explanations.
+ *
+ * Alternatively, the IdentityMatrix class can be used to precondition in this
+ * way.
+ *
+ * @author Guido Kanschat, 1999; extension for full compatibility with
+ * LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+class PreconditionIdentity : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * This function is only present to provide the interface of a
+   * preconditioner to be handed to a smoother.  This does nothing.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Constructor.
+     */
+    AdditionalData () {}
+  };
+
+  /**
+   * Constructor, sets the domain and range sizes to their defaults.
+   */
+  PreconditionIdentity();
+
+  /**
+   * The matrix argument is ignored and here just for compatibility with more
+   * complex preconditioners.
+   */
+  template <typename MatrixType>
+  void initialize (const MatrixType     &matrix,
+                   const AdditionalData &additional_data = AdditionalData());
+
+  /**
+   * Apply preconditioner.
+   */
+  template<class VectorType>
+  void vmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner. Since this is the identity, this function
+   * is the same as vmult().
+   */
+  template<class VectorType>
+  void Tvmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply preconditioner, adding to the previous value.
+   */
+  template<class VectorType>
+  void vmult_add (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner, adding. Since this is the identity, this
+   * function is the same as vmult_add().
+   */
+  template<class VectorType>
+  void Tvmult_add (VectorType &, const VectorType &) const;
+
+  /**
+   * This function is only present to provide the interface of a
+   * preconditioner to be handed to a smoother.  This does nothing.
+   */
+  void clear () {}
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   *
+   * @note This function should only be called if the preconditioner has been
+   * initialized.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   *
+   * @note This function should only be called if the preconditioner has been
+   * initialized.
+   */
+  size_type n () const;
+
+private:
+  /**
+   * The dimension of the range space.
+   */
+  size_type n_rows;
+
+  /**
+   * The dimension of the domain space.
+   */
+  size_type n_columns;
+};
+
+
+
+/**
+ * Preconditioning with Richardson's method. This preconditioner just scales
+ * the vector with a constant relaxation factor provided by the AdditionalData
+ * object.
+ *
+ * In Krylov-space methods, this preconditioner should not have any effect.
+ * Using SolverRichardson, the two relaxation parameters will be just
+ * multiplied. Still, this class is useful in multigrid smoother objects
+ * (MGSmootherRelaxation).
+ *
+ * @author Guido Kanschat, 2005; extension for full compatibility with
+ * LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+class PreconditionRichardson : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Parameters for Richardson preconditioner.
+   */
+  class AdditionalData
+  {
+  public:
+    /**
+     * Constructor. Block size must be given since there is no reasonable
+     * default parameter.
+     */
+    AdditionalData (const double relaxation = 1.);
+
+    /**
+     * Relaxation parameter.
+     */
+    double relaxation;
+  };
+
+  /**
+   * Constructor, sets the relaxation parameter, domain and range sizes to
+   * their default.
+   */
+  PreconditionRichardson();
+
+  /**
+   * Change the relaxation parameter.
+   */
+  void initialize (const AdditionalData &parameters);
+
+  /**
+   * Change the relaxation parameter in a way consistent with other
+   * preconditioners. The matrix argument is ignored and here just for
+   * compatibility with more complex preconditioners.
+   */
+  template <typename MatrixType>
+  void initialize (const MatrixType     &matrix,
+                   const AdditionalData &parameters);
+
+  /**
+   * Apply preconditioner.
+   */
+  template<class VectorType>
+  void vmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner. Since this is the identity, this function
+   * is the same as vmult().
+   */
+  template<class VectorType>
+  void Tvmult (VectorType &, const VectorType &) const;
+  /**
+   * Apply preconditioner, adding to the previous value.
+   */
+  template<class VectorType>
+  void vmult_add (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner, adding. Since this is the identity, this
+   * function is the same as vmult_add().
+   */
+  template<class VectorType>
+  void Tvmult_add (VectorType &, const VectorType &) const;
+
+  /**
+   * This function is only present to provide the interface of a
+   * preconditioner to be handed to a smoother.  This does nothing.
+   */
+  void clear () {}
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   *
+   * @note This function should only be called if the preconditioner has been
+   * initialized.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   *
+   * @note This function should only be called if the preconditioner has been
+   * initialized.
+   */
+  size_type n () const;
+
+private:
+  /**
+   * The relaxation parameter multiplied with the vectors.
+   */
+  double relaxation;
+
+  /**
+   * The dimension of the range space.
+   */
+  size_type n_rows;
+
+  /**
+   * The dimension of the domain space.
+   */
+  size_type n_columns;
+};
+
+
+
+/**
+ * Preconditioner using a matrix-builtin function.  This class forms a
+ * preconditioner suitable for the LAC solver classes. Since many
+ * preconditioning methods are based on matrix entries, these have to be
+ * implemented as member functions of the underlying matrix implementation.
+ * This class now is intended to allow easy access to these member functions
+ * from LAC solver classes.
+ *
+ * It seems that all builtin preconditioners have a relaxation parameter, so
+ * please use PreconditionRelaxation for these.
+ *
+ * You will usually not want to create a named object of this type, although
+ * possible. The most common use is like this:
+ * @code
+ *    SolverGMRES<SparseMatrix<double>,
+ *                Vector<double> >      gmres(control,memory,500);
+ *
+ *    gmres.solve (matrix, solution, right_hand_side,
+ *                 PreconditionUseMatrix<SparseMatrix<double>,Vector<double> >
+ *                 (matrix,&SparseMatrix<double>::template precondition_Jacobi<double>));
+ * @endcode
+ * This creates an unnamed object to be passed as the fourth parameter to the
+ * solver function of the SolverGMRES class. It assumes that the SparseMatrix
+ * class has a function <tt>precondition_Jacobi</tt> taking two vectors
+ * (source and destination) as parameters (Actually, there is no function like
+ * that, the existing function takes a third parameter, denoting the
+ * relaxation parameter; this example is therefore only meant to illustrate
+ * the general idea).
+ *
+ * Note that due to the default template parameters, the above example could
+ * be written shorter as follows:
+ * @code
+ *    ...
+ *    gmres.solve (matrix, solution, right_hand_side,
+ *                 PreconditionUseMatrix<>
+ *                   (matrix,&SparseMatrix<double>::template precondition_Jacobi<double>));
+ * @endcode
+ *
+ * @author Guido Kanschat, Wolfgang Bangerth, 1999
+ */
+template<typename MatrixType = SparseMatrix<double>, class VectorType = Vector<double> >
+class PreconditionUseMatrix : public Subscriptor
+{
+public:
+  /**
+   * Type of the preconditioning function of the matrix.
+   */
+  typedef void ( MatrixType::* function_ptr)(VectorType &, const VectorType &) const;
+
+  /**
+   * Constructor.  This constructor stores a reference to the matrix object
+   * for later use and selects a preconditioning method, which must be a
+   * member function of that matrix.
+   */
+  PreconditionUseMatrix(const MatrixType  &M,
+                        const function_ptr method);
+
+  /**
+   * Execute preconditioning. Calls the function passed to the constructor of
+   * this object with the two arguments given here.
+   */
+  void vmult (VectorType       &dst,
+              const VectorType &src) const;
+
+private:
+  /**
+   * Pointer to the matrix in use.
+   */
+  const MatrixType &matrix;
+
+  /**
+   * Pointer to the preconditioning function.
+   */
+  const function_ptr precondition;
+};
+
+
+
+/**
+ * Base class for other preconditioners. Here, only some common features
+ * Jacobi, SOR and SSOR preconditioners are implemented. For preconditioning,
+ * refer to derived classes.
+ *
+ * @author Guido Kanschat, 2000; extension for full compatibility with
+ * LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+template<typename MatrixType = SparseMatrix<double> >
+class PreconditionRelaxation : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef typename MatrixType::size_type size_type;
+
+  /**
+   * Class for parameters.
+   */
+  class AdditionalData
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    AdditionalData (const double relaxation = 1.);
+
+    /**
+     * Relaxation parameter.
+     */
+    double relaxation;
+  };
+
+  /**
+   * Initialize matrix and relaxation parameter. The matrix is just stored in
+   * the preconditioner object. The relaxation parameter should be larger than
+   * zero and smaller than 2 for numerical reasons. It defaults to 1.
+   */
+  void initialize (const MatrixType     &A,
+                   const AdditionalData &parameters = AdditionalData());
+
+  /**
+   * Release the matrix and reset its pointer.
+   */
+  void clear();
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+protected:
+  /**
+   * Pointer to the matrix object.
+   */
+  SmartPointer<const MatrixType, PreconditionRelaxation<MatrixType> > A;
+
+  /**
+   * Relaxation parameter.
+   */
+  double relaxation;
+};
+
+
+
+/**
+ * Jacobi preconditioner using matrix built-in function.  The
+ * <tt>MatrixType</tt> class used is required to have a function
+ * <tt>precondition_Jacobi(VectorType&, const VectorType&, double</tt>). This
+ * class satisfies the
+ * @ref ConceptRelaxationType "relaxation concept".
+ *
+ * @code
+ *     // Declare related objects
+ *
+ * SparseMatrix<double> A;
+ * Vector<double> x;
+ * Vector<double> b;
+ * SolverCG<> solver(...);
+ *
+ * //...initialize and build A
+ *
+ *     // Define and initialize preconditioner
+ *
+ * PreconditionJacobi<SparseMatrix<double> > precondition;
+ * precondition.initialize (A, .6);
+ *
+ * solver.solve (A, x, b, precondition);
+ * @endcode
+ *
+ * @author Guido Kanschat, 2000
+ */
+template <typename MatrixType = SparseMatrix<double> >
+class PreconditionJacobi : public PreconditionRelaxation<MatrixType>
+{
+public:
+  /**
+   * Apply preconditioner.
+   */
+  template<class VectorType>
+  void vmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner. Since this is a symmetric preconditioner,
+   * this function is the same as vmult().
+   */
+  template<class VectorType>
+  void Tvmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Perform one step of the preconditioned Richardson iteration.
+   */
+  template<class VectorType>
+  void step (VectorType &x, const VectorType &rhs) const;
+
+  /**
+   * Perform one transposed step of the preconditioned Richardson iteration.
+   */
+  template<class VectorType>
+  void Tstep (VectorType &x, const VectorType &rhs) const;
+};
+
+
+/**
+ * SOR preconditioner using matrix built-in function.
+ *
+ * Assuming the matrix <i>A = D + L + U</i> is split into its diagonal
+ * <i>D</i> as well as the strict lower and upper triangles <i>L</i> and
+ * <i>U</i>, then the SOR preconditioner with relaxation parameter <i>r</i> is
+ * @f[
+ *  P^{-1} = r (D+rL)^{-1}.
+ * @f]
+ * It is this operator <i>P<sup>-1</sup></i>, which is implemented by vmult()
+ * through forward substitution. Analogously, Tvmult() implements the
+ * operation of <i>r(D+rU)<sup>-1</sup></i>.
+ *
+ * The SOR iteration itself can be directly written as
+ * @f[
+ *  x^{k+1} = x^k - r D^{-1} \bigl(L x^{k+1} + U x^k - b\bigr).
+ * @f]
+ * Using the right hand side <i>b</i> and the previous iterate <i>x</i>, this
+ * is the operation implemented by step().
+ *
+ * The MatrixType class used is required to have functions
+ * <tt>precondition_SOR(VectorType&, const VectorType&, double)</tt> and
+ * <tt>precondition_TSOR(VectorType&, const VectorType&, double)</tt>. This
+ * class satisfies the
+ * @ref ConceptRelaxationType "relaxation concept".
+ *
+ * @code
+ *     // Declare related objects
+ *
+ * SparseMatrix<double> A;
+ * Vector<double> x;
+ * Vector<double> b;
+ * SolverCG<> solver(...);
+ *
+ * //...initialize and build A
+ *
+ *     // Define and initialize preconditioner
+ *
+ * PreconditionSOR<SparseMatrix<double> > precondition;
+ * precondition.initialize (A, .6);
+ *
+ * solver.solve (A, x, b, precondition);
+ * @endcode
+ *
+ * @author Guido Kanschat, 2000
+ */
+template <typename MatrixType = SparseMatrix<double> >
+class PreconditionSOR : public PreconditionRelaxation<MatrixType>
+{
+public:
+  /**
+   * Apply preconditioner.
+   */
+  template<class VectorType>
+  void vmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner.
+   */
+  template<class VectorType>
+  void Tvmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Perform one step of the preconditioned Richardson iteration.
+   */
+  template<class VectorType>
+  void step (VectorType &x, const VectorType &rhs) const;
+
+  /**
+   * Perform one transposed step of the preconditioned Richardson iteration.
+   */
+  template<class VectorType>
+  void Tstep (VectorType &x, const VectorType &rhs) const;
+};
+
+
+
+/**
+ * SSOR preconditioner using matrix built-in function.  The
+ * <tt>MatrixType</tt> class used is required to have a function
+ * <tt>precondition_SSOR(VectorType&, const VectorType&, double)</tt>. This
+ * class satisfies the
+ * @ref ConceptRelaxationType "relaxation concept".
+ *
+ * @code
+ *     // Declare related objects
+ *
+ * SparseMatrix<double> A;
+ * Vector<double> x;
+ * Vector<double> b;
+ * SolverCG<> solver(...);
+ *
+ * //...initialize and build A
+ *
+ *     // Define and initialize preconditioner
+ *
+ * PreconditionSSOR<SparseMatrix<double> > precondition;
+ * precondition.initialize (A, .6);
+ *
+ * solver.solve (A, x, b, precondition);
+ * @endcode
+ *
+ * @author Guido Kanschat, 2000
+ */
+template <typename MatrixType = SparseMatrix<double> >
+class PreconditionSSOR : public PreconditionRelaxation<MatrixType>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef typename MatrixType::size_type size_type;
+
+  /**
+   * A typedef to the base class.
+   */
+  typedef PreconditionRelaxation<MatrixType> BaseClass;
+
+
+  /**
+   * Initialize matrix and relaxation parameter. The matrix is just stored in
+   * the preconditioner object. The relaxation parameter should be larger than
+   * zero and smaller than 2 for numerical reasons. It defaults to 1.
+   */
+  void initialize (const MatrixType &A,
+                   const typename BaseClass::AdditionalData &parameters = typename BaseClass::AdditionalData());
+
+  /**
+   * Apply preconditioner.
+   */
+  template<class VectorType>
+  void vmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner. Since this is a symmetric preconditioner,
+   * this function is the same as vmult().
+   */
+  template<class VectorType>
+  void Tvmult (VectorType &, const VectorType &) const;
+
+
+  /**
+   * Perform one step of the preconditioned Richardson iteration
+   */
+  template<class VectorType>
+  void step (VectorType &x, const VectorType &rhs) const;
+
+  /**
+   * Perform one transposed step of the preconditioned Richardson iteration.
+   */
+  template<class VectorType>
+  void Tstep (VectorType &x, const VectorType &rhs) const;
+
+private:
+  /**
+   * An array that stores for each matrix row where the first position after
+   * the diagonal is located.
+   */
+  std::vector<std::size_t> pos_right_of_diagonal;
+};
+
+
+/**
+ * Permuted SOR preconditioner using matrix built-in function.  The
+ * <tt>MatrixType</tt> class used is required to have functions
+ * <tt>PSOR(VectorType&, const VectorType&, double)</tt> and
+ * <tt>TPSOR(VectorType&, const VectorType&, double)</tt>.
+ *
+ * @code
+ *     // Declare related objects
+ *
+ * SparseMatrix<double> A;
+ * Vector<double> x;
+ * Vector<double> b;
+ * SolverCG<> solver(...);
+ *
+ * //...initialize and build A
+ *
+ * std::vector<unsigned int> permutation(x.size());
+ * std::vector<unsigned int> inverse_permutation(x.size());
+ *
+ * //...fill permutation and its inverse with reasonable values
+ *
+ *     // Define and initialize preconditioner
+ *
+ * PreconditionPSOR<SparseMatrix<double> > precondition;
+ * precondition.initialize (A, permutation, inverse_permutation, .6);
+ *
+ * solver.solve (A, x, b, precondition);
+ * @endcode
+ *
+ * @author Guido Kanschat, 2003; extension for full compatibility with
+ * LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+template <typename MatrixType = SparseMatrix<double> >
+class PreconditionPSOR : public PreconditionRelaxation<MatrixType>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef typename MatrixType::size_type size_type;
+
+  /**
+   * Parameters for PreconditionPSOR.
+   */
+  class AdditionalData
+  {
+  public:
+    /**
+     * Constructor. For the parameters' description, see below.
+     *
+     * The permutation vectors are stored as a reference. Therefore, it has to
+     * be assured that the lifetime of the vector exceeds the lifetime of the
+     * preconditioner.
+     *
+     * The relaxation parameter should be larger than zero and smaller than 2
+     * for numerical reasons. It defaults to 1.
+     */
+    AdditionalData (const std::vector<size_type> &permutation,
+                    const std::vector<size_type> &inverse_permutation,
+                    const typename PreconditionRelaxation<MatrixType>::AdditionalData
+                    &parameters = typename PreconditionRelaxation<MatrixType>::AdditionalData());
+
+    /**
+     * Storage for the permutation vector.
+     */
+    const std::vector<size_type> &permutation;
+    /**
+     * Storage for the inverse permutation vector.
+     */
+    const std::vector<size_type> &inverse_permutation;
+    /**
+     * Relaxation parameters
+     */
+    typename PreconditionRelaxation<MatrixType>::AdditionalData parameters;
+  };
+
+  /**
+   * Initialize matrix and relaxation parameter. The matrix is just stored in
+   * the preconditioner object.
+   *
+   * The permutation vector is stored as a pointer. Therefore, it has to be
+   * assured that the lifetime of the vector exceeds the lifetime of the
+   * preconditioner.
+   *
+   * The relaxation parameter should be larger than zero and smaller than 2
+   * for numerical reasons. It defaults to 1.
+   */
+  void initialize (const MatrixType             &A,
+                   const std::vector<size_type> &permutation,
+                   const std::vector<size_type> &inverse_permutation,
+                   const typename PreconditionRelaxation<MatrixType>::AdditionalData &
+                   parameters = typename PreconditionRelaxation<MatrixType>::AdditionalData());
+
+  /**
+   * Initialize matrix and relaxation parameter. The matrix is just stored in
+   * the preconditioner object.
+   *
+   * For more detail about possible parameters, see the class documentation
+   * and the documentation of the PreconditionPSOR::AdditionalData class.
+   *
+   * After this function is called the preconditioner is ready to be used
+   * (using the <code>vmult</code> function of derived classes).
+   */
+  void initialize (const MatrixType &A,
+                   const AdditionalData &additional_data);
+
+  /**
+   * Apply preconditioner.
+   */
+  template<class VectorType>
+  void vmult (VectorType &, const VectorType &) const;
+
+  /**
+   * Apply transpose preconditioner.
+   */
+  template<class VectorType>
+  void Tvmult (VectorType &, const VectorType &) const;
+private:
+  /**
+   * Storage for the permutation vector.
+   */
+  const std::vector<size_type> *permutation;
+  /**
+   * Storage for the inverse permutation vector.
+   */
+  const std::vector<size_type> *inverse_permutation;
+};
+
+
+
+/**
+ * Preconditioning with a Chebyshev polynomial for symmetric positive definite
+ * matrices. This preconditioner is similar to a Jacobi preconditioner if the
+ * degree variable is set to one, otherwise some higher order polynomial
+ * corrections are used. This preconditioner needs access to the diagonal of
+ * the matrix it acts on and needs a respective <tt>vmult</tt> implementation.
+ * However, it does not need to explicitly know the matrix entries.
+ *
+ * This class is useful e.g. in multigrid smoother objects, since it is
+ * trivially %parallel (assuming that matrix-vector products are %parallel).
+ *
+ * @author Martin Kronbichler, 2009; extension for full compatibility with
+ * LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+template <typename MatrixType=SparseMatrix<double>, class VectorType=Vector<double> >
+class PreconditionChebyshev : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Standardized data struct to pipe additional parameters to the
+   * preconditioner.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Constructor.
+     */
+    AdditionalData (const unsigned int degree              = 0,
+                    const double       smoothing_range     = 0.,
+                    const bool         nonzero_starting    = false,
+                    const unsigned int eig_cg_n_iterations = 8,
+                    const double       eig_cg_residual     = 1e-2,
+                    const double       max_eigenvalue      = 1);
+
+    /**
+     * This determines the degree of the Chebyshev polynomial. The degree of
+     * the polynomial gives the number of matrix-vector products to be
+     * performed for one application of the vmult() operation. Degree zero
+     * corresponds to a damped Jacobi method.
+     */
+    unsigned int degree;
+
+    /**
+     * This sets the range between the largest eigenvalue in the matrix and
+     * the smallest eigenvalue to be treated. If the parameter is zero, an
+     * estimate for the largest and for the smallest eigenvalue will be
+     * calculated internally. Otherwise, the Chebyshev polynomial will act in
+     * the interval $[\lambda_\mathrm{max}/ \tt{smoothing\_range},
+     * \lambda_\mathrm{max}]$, where $\lambda_\mathrm{max}$ is an estimate of
+     * the maximum eigenvalue of the matrix. A choice of
+     * <tt>smoothing_range</tt> between 5 and 20 is useful in case the
+     * preconditioner is used as a smoother in multigrid.
+     */
+    double smoothing_range;
+
+    /**
+     * When this flag is set to <tt>true</tt>, it enables the method
+     * <tt>vmult(dst, src)</tt> to use non-zero data in the vector
+     * <tt>dst</tt>, appending to it the Chebyshev corrections. This can be
+     * useful in some situations (e.g. when used for high-frequency error
+     * smoothing in a multigrid algorithm), but not the way the solver classes
+     * expect a preconditioner to work (where one ignores the content in
+     * <tt>dst</tt> for the preconditioner application).
+     */
+    bool nonzero_starting;
+
+    /**
+     * Maximum number of CG iterations performed for finding the maximum
+     * eigenvalue. If set to zero, no computations are performed and the
+     * eigenvalues according to the given input are used instead.
+     */
+    unsigned int eig_cg_n_iterations;
+
+    /**
+     * Tolerance for CG iterations performed for finding the maximum
+     * eigenvalue.
+     */
+    double eig_cg_residual;
+
+    /**
+     * Maximum eigenvalue to work with. Only in effect if @p
+     * eig_cg_n_iterations is set to zero, otherwise this parameter is
+     * ignored.
+     */
+    double max_eigenvalue;
+
+    /**
+     * Stores the inverse of the diagonal of the underlying matrix.
+     */
+    VectorType matrix_diagonal_inverse;
+  };
+
+  PreconditionChebyshev ();
+
+  /**
+   * Initialize function. Takes the matrix which is used to form the
+   * preconditioner, and additional flags if there are any. This function
+   * works only if the input matrix has an operator <tt>el(i,i)</tt> for
+   * accessing all the elements in the diagonal. Alternatively, the diagonal
+   * can be supplied with the help of the AdditionalData field.
+   *
+   * This function calculates an estimate of the eigenvalue range of the
+   * matrix weighted by its diagonal using a modified CG iteration in case the
+   * given number of iterations is positive.
+   */
+  void initialize (const MatrixType     &matrix,
+                   const AdditionalData &additional_data = AdditionalData());
+
+  /**
+   * Computes the action of the preconditioner on <tt>src</tt>, storing the
+   * result in <tt>dst</tt>.
+   */
+  void vmult (VectorType       &dst,
+              const VectorType &src) const;
+
+  /**
+   * Computes the action of the transposed preconditioner on <tt>src</tt>,
+   * storing the result in <tt>dst</tt>.
+   */
+  void Tvmult (VectorType       &dst,
+               const VectorType &src) const;
+
+  /**
+   * Resets the preconditioner.
+   */
+  void clear ();
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+private:
+
+  /**
+   * A pointer to the underlying matrix.
+   */
+  SmartPointer<const MatrixType,PreconditionChebyshev<MatrixType,VectorType> > matrix_ptr;
+
+  /**
+   * Internal vector used for the <tt>vmult</tt> operation.
+   */
+  mutable VectorType update1;
+
+  /**
+   * Internal vector used for the <tt>vmult</tt> operation.
+   */
+  mutable VectorType update2;
+
+  /**
+   * Stores the additional data provided to the initialize function.
+   */
+  AdditionalData data;
+
+  /**
+   * Average of the largest and smallest eigenvalue under consideration.
+   */
+  double theta;
+
+  /**
+   * Half the interval length between the largest and smallest eigenvalue
+   * under consideration.
+   */
+  double delta;
+
+  /**
+   * Stores whether the preconditioner has been set up.
+   */
+  bool is_initialized;
+};
+
+
+
+/*@}*/
+/* ---------------------------------- Inline functions ------------------- */
+
+#ifndef DOXYGEN
+
+inline
+PreconditionIdentity::PreconditionIdentity ()
+  :
+  n_rows (0),
+  n_columns (0)
+{}
+
+template <typename MatrixType>
+inline void
+PreconditionIdentity::initialize (const MatrixType &matrix,
+                                  const PreconditionIdentity::AdditionalData &)
+{
+  n_rows = matrix.m();
+  n_columns = matrix.n();
+}
+
+
+template<class VectorType>
+inline void
+PreconditionIdentity::vmult (VectorType &dst, const VectorType &src) const
+{
+  dst = src;
+}
+
+
+
+template<class VectorType>
+inline void
+PreconditionIdentity::Tvmult (VectorType &dst, const VectorType &src) const
+{
+  dst = src;
+}
+
+template<class VectorType>
+inline void
+PreconditionIdentity::vmult_add (VectorType &dst, const VectorType &src) const
+{
+  dst.add(src);
+}
+
+
+
+template<class VectorType>
+inline void
+PreconditionIdentity::Tvmult_add (VectorType &dst, const VectorType &src) const
+{
+  dst.add(src);
+}
+
+inline PreconditionIdentity::size_type
+PreconditionIdentity::m () const
+{
+  Assert(n_rows != 0, ExcNotInitialized());
+  return n_rows;
+}
+
+inline PreconditionIdentity::size_type
+PreconditionIdentity::n () const
+{
+  Assert(n_columns != 0, ExcNotInitialized());
+  return n_columns;
+}
+
+//---------------------------------------------------------------------------
+
+inline
+PreconditionRichardson::AdditionalData::AdditionalData (const double relaxation)
+  :
+  relaxation(relaxation)
+{}
+
+
+inline
+PreconditionRichardson::PreconditionRichardson ()
+  :
+  relaxation(0),
+  n_rows (0),
+  n_columns (0)
+{
+  AdditionalData add_data;
+  relaxation=add_data.relaxation;
+}
+
+
+
+inline void
+PreconditionRichardson::initialize
+(const PreconditionRichardson::AdditionalData &parameters)
+{
+  relaxation = parameters.relaxation;
+}
+
+
+
+template <typename MatrixType>
+inline void
+PreconditionRichardson::initialize
+(const MatrixType                             &matrix,
+ const PreconditionRichardson::AdditionalData &parameters)
+{
+  relaxation = parameters.relaxation;
+  n_rows = matrix.m();
+  n_columns = matrix.n();
+}
+
+
+
+template<class VectorType>
+inline void
+PreconditionRichardson::vmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<size_type, typename VectorType::size_type>::value,
+    "PreconditionRichardson and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  dst.equ(relaxation,src);
+}
+
+
+
+template<class VectorType>
+inline void
+PreconditionRichardson::Tvmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<size_type, typename VectorType::size_type>::value,
+    "PreconditionRichardson and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  dst.equ(relaxation,src);
+}
+
+template<class VectorType>
+inline void
+PreconditionRichardson::vmult_add (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<size_type, typename VectorType::size_type>::value,
+    "PreconditionRichardson and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  dst.add(relaxation,src);
+}
+
+
+
+template<class VectorType>
+inline void
+PreconditionRichardson::Tvmult_add (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<size_type, typename VectorType::size_type>::value,
+    "PreconditionRichardson and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  dst.add(relaxation,src);
+}
+
+inline PreconditionRichardson::size_type
+PreconditionRichardson::m () const
+{
+  Assert(n_rows != 0, ExcNotInitialized());
+  return n_rows;
+}
+
+inline PreconditionRichardson::size_type
+PreconditionRichardson::n () const
+{
+  Assert(n_columns != 0, ExcNotInitialized());
+  return n_columns;
+}
+
+//---------------------------------------------------------------------------
+
+template <typename MatrixType>
+inline void
+PreconditionRelaxation<MatrixType>::initialize (const MatrixType     &rA,
+                                                const AdditionalData &parameters)
+{
+  A = &rA;
+  relaxation = parameters.relaxation;
+}
+
+
+template <typename MatrixType>
+inline void
+PreconditionRelaxation<MatrixType>::clear ()
+{
+  A = 0;
+}
+
+template <typename MatrixType>
+inline typename PreconditionRelaxation<MatrixType>::size_type
+PreconditionRelaxation<MatrixType>::m () const
+{
+  Assert (A!=0, ExcNotInitialized());
+  return A->m();
+}
+
+template <typename MatrixType>
+inline typename PreconditionRelaxation<MatrixType>::size_type
+PreconditionRelaxation<MatrixType>::n () const
+{
+  Assert (A!=0, ExcNotInitialized());
+  return A->n();
+}
+
+//---------------------------------------------------------------------------
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionJacobi<MatrixType>::vmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionJacobi<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionJacobi and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->precondition_Jacobi (dst, src, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionJacobi<MatrixType>::Tvmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionJacobi<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionJacobi and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->precondition_Jacobi (dst, src, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionJacobi<MatrixType>::step (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionJacobi<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionJacobi and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->Jacobi_step (dst, src, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionJacobi<MatrixType>::Tstep (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionJacobi<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionJacobi and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  step (dst, src);
+}
+
+
+
+//---------------------------------------------------------------------------
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSOR<MatrixType>::vmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->precondition_SOR (dst, src, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSOR<MatrixType>::Tvmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->precondition_TSOR (dst, src, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSOR<MatrixType>::step (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->SOR_step (dst, src, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSOR<MatrixType>::Tstep (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->TSOR_step (dst, src, this->relaxation);
+}
+
+
+
+//---------------------------------------------------------------------------
+
+template <typename MatrixType>
+inline void
+PreconditionSSOR<MatrixType>::initialize (const MatrixType                     &rA,
+                                          const typename BaseClass::AdditionalData &parameters)
+{
+  this->PreconditionRelaxation<MatrixType>::initialize (rA, parameters);
+
+  // in case we have a SparseMatrix class, we can extract information about
+  // the diagonal.
+  const SparseMatrix<typename MatrixType::value_type> *mat =
+    dynamic_cast<const SparseMatrix<typename MatrixType::value_type> *>(&*this->A);
+
+  // calculate the positions first after the diagonal.
+  if (mat != 0)
+    {
+      const size_type n = this->A->n();
+      pos_right_of_diagonal.resize(n, static_cast<std::size_t>(-1));
+      for (size_type row=0; row<n; ++row)
+        {
+          // find the first element in this line which is on the right of the
+          // diagonal.  we need to precondition with the elements on the left
+          // only. note: the first entry in each line denotes the diagonal
+          // element, which we need not check.
+          typename SparseMatrix<typename MatrixType::value_type>::const_iterator
+          it = mat->begin(row)+1;
+          for ( ; it < mat->end(row); ++it)
+            if (it->column() > row)
+              break;
+          pos_right_of_diagonal[row] = it - mat->begin();
+        }
+    }
+}
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSSOR<MatrixType>::vmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->precondition_SSOR (dst, src, this->relaxation, pos_right_of_diagonal);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSSOR<MatrixType>::Tvmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->precondition_SSOR (dst, src, this->relaxation, pos_right_of_diagonal);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSSOR<MatrixType>::step (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  this->A->SSOR_step (dst, src, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionSSOR<MatrixType>::Tstep (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionSSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionSSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  step (dst, src);
+}
+
+
+
+//---------------------------------------------------------------------------
+
+template <typename MatrixType>
+inline void
+PreconditionPSOR<MatrixType>::initialize
+(const MatrixType             &rA,
+ const std::vector<size_type> &p,
+ const std::vector<size_type> &ip,
+ const typename PreconditionRelaxation<MatrixType>::AdditionalData &parameters)
+{
+  permutation = &p;
+  inverse_permutation = &ip;
+  PreconditionRelaxation<MatrixType>::initialize(rA, parameters);
+}
+
+
+template <typename MatrixType>
+inline void
+PreconditionPSOR<MatrixType>::initialize (const MatrixType     &A,
+                                          const AdditionalData &additional_data)
+{
+  initialize(A,
+             additional_data.permutation,
+             additional_data.inverse_permutation,
+             additional_data.parameters);
+}
+
+
+template <typename MatrixType>
+template <typename VectorType>
+inline void
+PreconditionPSOR<MatrixType>::vmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionPSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionPSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  dst = src;
+  this->A->PSOR (dst, *permutation, *inverse_permutation, this->relaxation);
+}
+
+
+
+template <typename MatrixType>
+template<class VectorType>
+inline void
+PreconditionPSOR<MatrixType>::Tvmult (VectorType &dst, const VectorType &src) const
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<typename PreconditionPSOR<MatrixType>::size_type, typename VectorType::size_type>::value,
+    "PreconditionPSOR and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+
+  Assert (this->A!=0, ExcNotInitialized());
+  dst = src;
+  this->A->TPSOR (dst, *permutation, *inverse_permutation, this->relaxation);
+}
+
+template <typename MatrixType>
+PreconditionPSOR<MatrixType>::AdditionalData::AdditionalData
+(const std::vector<size_type> &permutation,
+ const std::vector<size_type> &inverse_permutation,
+ const typename PreconditionRelaxation<MatrixType>::AdditionalData &parameters)
+  :
+  permutation(permutation),
+  inverse_permutation(inverse_permutation),
+  parameters(parameters)
+{
+
+}
+
+
+//---------------------------------------------------------------------------
+
+
+template<typename MatrixType, class VectorType>
+PreconditionUseMatrix<MatrixType,VectorType>::PreconditionUseMatrix(const MatrixType   &M,
+    const function_ptr method)
+  :
+  matrix(M), precondition(method)
+{}
+
+
+
+template<typename MatrixType, class VectorType>
+void
+PreconditionUseMatrix<MatrixType,VectorType>::vmult (VectorType       &dst,
+                                                     const VectorType &src) const
+{
+  (matrix.*precondition)(dst, src);
+}
+
+//---------------------------------------------------------------------------
+
+template<typename MatrixType>
+inline
+PreconditionRelaxation<MatrixType>::AdditionalData::
+AdditionalData (const double relaxation)
+  :
+  relaxation (relaxation)
+{}
+
+
+
+//---------------------------------------------------------------------------
+
+namespace internal
+{
+  namespace PreconditionChebyshev
+  {
+    // for deal.II vectors, perform updates for Chebyshev preconditioner all
+    // at once to reduce memory transfer. Here, we select between general
+    // vectors and deal.II vectors where we expand the loop over the (local)
+    // size of the vector
+
+    // generic part for non-deal.II vectors
+    template <typename VectorType>
+    inline
+    void
+    vector_updates (const VectorType &src,
+                    const VectorType &matrix_diagonal_inverse,
+                    const bool       start_zero,
+                    const double     factor1,
+                    const double     factor2,
+                    VectorType       &update1,
+                    VectorType       &update2,
+                    VectorType       &dst)
+    {
+      if (start_zero)
+        {
+          dst.equ (factor2, src);
+          dst.scale (matrix_diagonal_inverse);
+          update1.equ(-1.,dst);
+        }
+      else
+        {
+          update2 -= src;
+          update2.scale (matrix_diagonal_inverse);
+          if (factor1 == 0.)
+            update1.equ(factor2, update2);
+          else
+            update1.sadd(factor1, factor2, update2);
+          dst -= update1;
+        }
+    }
+
+    // worker routine for deal.II vectors. Because of vectorization, we need
+    // to put the loop into an extra structure because the virtual function of
+    // VectorUpdatesRange prevents the compiler from applying vectorization.
+    template <typename Number>
+    struct VectorUpdater
+    {
+      VectorUpdater (const Number *src,
+                     const Number *matrix_diagonal_inverse,
+                     const bool    start_zero,
+                     const Number  factor1,
+                     const Number  factor2,
+                     Number       *update1,
+                     Number       *update2,
+                     Number       *dst)
+        :
+        src (src),
+        matrix_diagonal_inverse (matrix_diagonal_inverse),
+        do_startup (factor1 == Number()),
+        start_zero (start_zero),
+        factor1 (factor1),
+        factor2 (factor2),
+        update1 (update1),
+        update2 (update2),
+        dst (dst)
+      {}
+
+      void
+      apply_to_subrange (const std::size_t begin,
+                         const std::size_t end) const
+      {
+        // To circumvent a bug in gcc
+        // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63945), we create copies
+        // of the variables factor1 and factor2 and do not check based on
+        // factor1.
+        const Number factor1 = this->factor1;
+        const Number factor2 = this->factor2;
+        if (do_startup)
+          {
+            if (start_zero)
+              DEAL_II_OPENMP_SIMD_PRAGMA
+              for (std::size_t i=begin; i<end; ++i)
+                {
+                  dst[i] = factor2 * src[i] * matrix_diagonal_inverse[i];
+                  update1[i] = -dst[i];
+                }
+            else
+              DEAL_II_OPENMP_SIMD_PRAGMA
+              for (std::size_t i=begin; i<end; ++i)
+                {
+                  update1[i] = ((update2[i]-src[i]) *
+                                factor2*matrix_diagonal_inverse[i]);
+                  dst[i] -= update1[i];
+                }
+          }
+        else
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (std::size_t i=begin; i<end; ++i)
+            {
+              const Number update =
+                factor1 * update1[i] + factor2 *
+                ((update2[i] - src[i]) * matrix_diagonal_inverse[i]);
+              update1[i] = update;
+              dst[i] -= update;
+            }
+      }
+
+      const Number *src;
+      const Number *matrix_diagonal_inverse;
+      const bool do_startup;
+      const bool start_zero;
+      const Number factor1;
+      const Number factor2;
+      mutable Number *update1;
+      mutable Number *update2;
+      mutable Number *dst;
+    };
+
+    template<typename Number>
+    struct VectorUpdatesRange : public parallel::ParallelForInteger
+    {
+      VectorUpdatesRange(const VectorUpdater<Number> &updater,
+                         const std::size_t size)
+        :
+        updater (updater)
+      {
+        if (size < internal::Vector::minimum_parallel_grain_size)
+          apply_to_subrange (0, size);
+        else
+          apply_parallel (0, size,
+                          internal::Vector::minimum_parallel_grain_size);
+      }
+
+      ~VectorUpdatesRange() {}
+
+      virtual void
+      apply_to_subrange (const std::size_t begin,
+                         const std::size_t end) const
+      {
+        updater.apply_to_subrange(begin, end);
+      }
+
+      const VectorUpdater<Number> &updater;
+    };
+
+    // selection for deal.II vector
+    template <typename Number>
+    inline
+    void
+    vector_updates (const ::dealii::Vector<Number> &src,
+                    const ::dealii::Vector<Number> &matrix_diagonal_inverse,
+                    const bool    start_zero,
+                    const double  factor1,
+                    const double  factor2,
+                    ::dealii::Vector<Number> &update1,
+                    ::dealii::Vector<Number> &update2,
+                    ::dealii::Vector<Number> &dst)
+    {
+      VectorUpdater<Number> upd(src.begin(), matrix_diagonal_inverse.begin(),
+                                start_zero, factor1, factor2,
+                                update1.begin(), update2.begin(), dst.begin());
+      VectorUpdatesRange<Number>(upd, src.size());
+    }
+
+    // selection for parallel deal.II vector
+    template <typename Number>
+    inline
+    void
+    vector_updates (const parallel::distributed::Vector<Number> &src,
+                    const parallel::distributed::Vector<Number> &matrix_diagonal_inverse,
+                    const bool    start_zero,
+                    const double  factor1,
+                    const double  factor2,
+                    parallel::distributed::Vector<Number> &update1,
+                    parallel::distributed::Vector<Number> &update2,
+                    parallel::distributed::Vector<Number> &dst)
+    {
+      VectorUpdater<Number> upd(src.begin(), matrix_diagonal_inverse.begin(),
+                                start_zero, factor1, factor2,
+                                update1.begin(), update2.begin(), dst.begin());
+      VectorUpdatesRange<Number>(upd, src.local_size());
+    }
+
+    template <typename VectorType>
+    struct DiagonalPreconditioner
+    {
+      DiagonalPreconditioner (const VectorType &vector)
+        :
+        diagonal_vector(vector)
+      {}
+
+      void vmult (VectorType       &dst,
+                  const VectorType &src) const
+      {
+        dst = src;
+        dst.scale(diagonal_vector);
+      }
+
+      const VectorType &diagonal_vector;
+    };
+
+    struct EigenvalueTracker
+    {
+    public:
+      void slot(const std::vector<double> &eigenvalues)
+      {
+        values = eigenvalues;
+      }
+
+      std::vector<double> values;
+    };
+  }
+}
+
+
+
+template <typename MatrixType, class VectorType>
+inline
+PreconditionChebyshev<MatrixType,VectorType>::AdditionalData::
+AdditionalData (const unsigned int degree,
+                const double       smoothing_range,
+                const bool         nonzero_starting,
+                const unsigned int eig_cg_n_iterations,
+                const double       eig_cg_residual,
+                const double       max_eigenvalue)
+  :
+  degree  (degree),
+  smoothing_range (smoothing_range),
+  nonzero_starting (nonzero_starting),
+  eig_cg_n_iterations (eig_cg_n_iterations),
+  eig_cg_residual (eig_cg_residual),
+  max_eigenvalue (max_eigenvalue)
+{}
+
+
+
+template <typename MatrixType, class VectorType>
+inline
+PreconditionChebyshev<MatrixType,VectorType>::PreconditionChebyshev ()
+  :
+  is_initialized (false)
+{
+#ifdef DEAL_II_WITH_CXX11
+  static_assert(
+    std::is_same<size_type, typename VectorType::size_type>::value,
+    "PreconditionChebyshev and VectorType must have the same size_type.");
+#endif // DEAL_II_WITH_CXX11
+}
+
+
+
+template <typename MatrixType, class VectorType>
+inline
+void
+PreconditionChebyshev<MatrixType,VectorType>::initialize
+(const MatrixType     &matrix,
+ const AdditionalData &additional_data)
+{
+  matrix_ptr = &matrix;
+  data = additional_data;
+  if (data.matrix_diagonal_inverse.size() != matrix.m())
+    {
+      Assert(data.matrix_diagonal_inverse.size() == 0,
+             ExcMessage("Matrix diagonal vector set but not sized correctly"));
+      data.matrix_diagonal_inverse.reinit(matrix.m());
+      for (unsigned int i=0; i<matrix.m(); ++i)
+        data.matrix_diagonal_inverse(i) = 1./matrix.el(i,i);
+    }
+
+
+  // calculate largest eigenvalue using a hand-tuned CG iteration on the
+  // matrix weighted by its diagonal. we start with a vector that consists of
+  // ones only, weighted by the length.
+  double max_eigenvalue, min_eigenvalue;
+  if (data.eig_cg_n_iterations > 0)
+    {
+      Assert (additional_data.eig_cg_n_iterations > 2,
+              ExcMessage ("Need to set at least two iterations to find eigenvalues."));
+
+      // set a very strict tolerance to force at least two iterations
+      ReductionControl control (data.eig_cg_n_iterations, 1e-35, 1e-10);
+      GrowingVectorMemory<VectorType> memory;
+      VectorType *rhs = memory.alloc();
+      VectorType *dummy = memory.alloc();
+      rhs->reinit(data.matrix_diagonal_inverse);
+      dummy->reinit(data.matrix_diagonal_inverse);
+
+      // heuristically, a right hand side close to a constant has been shown
+      // to quickly reveal the largest eigenvalue. however, avoid to use the
+      // exact constant because that might be not in the range space of some
+      // matrices (purely Neumann matrices with constant mode filtered out by
+      // orthogonal projection in the matrix-vector product)
+      *rhs = 1./std::sqrt(static_cast<double>(matrix.m()));
+      if (rhs->locally_owned_elements().is_element(0))
+        (*rhs)(0) = 0.;
+      rhs->compress(VectorOperation::insert);
+
+      internal::PreconditionChebyshev::EigenvalueTracker eigenvalue_tracker;
+      SolverCG<VectorType> solver (control, memory);
+      solver.connect_eigenvalues_slot(std_cxx11::bind(&internal::PreconditionChebyshev::EigenvalueTracker::slot,
+                                                      &eigenvalue_tracker,
+                                                      std_cxx11::_1));
+      internal::PreconditionChebyshev::DiagonalPreconditioner<VectorType>
+      preconditioner(data.matrix_diagonal_inverse);
+      try
+        {
+          solver.solve(matrix, *dummy, *rhs, preconditioner);
+        }
+      catch (SolverControl::NoConvergence &)
+        {
+        }
+
+      memory.free(dummy);
+      memory.free(rhs);
+
+      // read the eigenvalues from the attached eigenvalue tracker
+      if (eigenvalue_tracker.values.empty())
+        min_eigenvalue = max_eigenvalue = 1;
+      else
+        {
+          min_eigenvalue = eigenvalue_tracker.values.front();
+          max_eigenvalue = eigenvalue_tracker.values.back();
+        }
+
+      // include a safety factor since the CG method will in general not be
+      // converged
+      max_eigenvalue *= 1.2;
+    }
+  else
+    {
+      max_eigenvalue = data.max_eigenvalue;
+      min_eigenvalue = data.max_eigenvalue/data.smoothing_range;
+    }
+
+  const double alpha = (data.smoothing_range > 1. ?
+                        max_eigenvalue / data.smoothing_range :
+                        std::min(0.9*max_eigenvalue, min_eigenvalue));
+  delta = (max_eigenvalue-alpha)*0.5;
+  theta = (max_eigenvalue+alpha)*0.5;
+
+  update1.reinit (data.matrix_diagonal_inverse, true);
+  update2.reinit (data.matrix_diagonal_inverse, true);
+
+  is_initialized = true;
+}
+
+
+
+template <typename MatrixType, class VectorType>
+inline
+void
+PreconditionChebyshev<MatrixType,VectorType>::vmult (VectorType       &dst,
+                                                     const VectorType &src) const
+{
+  Assert (is_initialized, ExcMessage("Preconditioner not initialized"));
+  double rhok  = delta / theta,  sigma = theta / delta;
+  if (data.nonzero_starting && !dst.all_zero())
+    {
+      matrix_ptr->vmult (update2, dst);
+      internal::PreconditionChebyshev::vector_updates
+      (src, data.matrix_diagonal_inverse, false, 0., 1./theta, update1,
+       update2, dst);
+    }
+  else
+    internal::PreconditionChebyshev::vector_updates
+    (src, data.matrix_diagonal_inverse, true, 0., 1./theta, update1,
+     update2, dst);
+
+  for (unsigned int k=0; k<data.degree; ++k)
+    {
+      matrix_ptr->vmult (update2, dst);
+      const double rhokp = 1./(2.*sigma-rhok);
+      const double factor1 = rhokp * rhok, factor2 = 2.*rhokp/delta;
+      rhok = rhokp;
+      internal::PreconditionChebyshev::vector_updates
+      (src, data.matrix_diagonal_inverse, false, factor1, factor2, update1,
+       update2, dst);
+    }
+}
+
+
+
+template <typename MatrixType, class VectorType>
+inline
+void
+PreconditionChebyshev<MatrixType,VectorType>::Tvmult (VectorType       &dst,
+                                                      const VectorType &src) const
+{
+  Assert (is_initialized, ExcMessage("Preconditioner not initialized"));
+  double rhok  = delta / theta,  sigma = theta / delta;
+  if (data.nonzero_starting && !dst.all_zero())
+    {
+      matrix_ptr->Tvmult (update2, dst);
+      internal::PreconditionChebyshev::vector_updates
+      (src, data.matrix_diagonal_inverse, false, 0., 1./theta, update1,
+       update2, dst);
+    }
+  else
+    internal::PreconditionChebyshev::vector_updates
+    (src, data.matrix_diagonal_inverse, true, 0., 1./theta, update1,
+     update2, dst);
+
+  for (unsigned int k=0; k<data.degree; ++k)
+    {
+      matrix_ptr->Tvmult (update2, dst);
+      const double rhokp = 1./(2.*sigma-rhok);
+      const double factor1 = rhokp * rhok, factor2 = 2.*rhokp/delta;
+      rhok = rhokp;
+      internal::PreconditionChebyshev::vector_updates
+      (src, data.matrix_diagonal_inverse, false, factor1, factor2, update1,
+       update2, dst);
+    }
+}
+
+
+
+template <typename MatrixType, typename VectorType>
+inline
+void PreconditionChebyshev<MatrixType,VectorType>::clear ()
+{
+  is_initialized = false;
+  matrix_ptr = 0;
+  data.matrix_diagonal_inverse.reinit(0);
+  update1.reinit(0);
+  update2.reinit(0);
+}
+
+
+template <typename MatrixType, typename VectorType>
+inline
+typename PreconditionChebyshev<MatrixType,VectorType>::size_type
+PreconditionChebyshev<MatrixType,VectorType>::m () const
+{
+  Assert (matrix_ptr!=0, ExcNotInitialized());
+  return matrix_ptr->m();
+}
+
+
+template <typename MatrixType, typename VectorType>
+inline
+typename PreconditionChebyshev<MatrixType,VectorType>::size_type
+PreconditionChebyshev<MatrixType,VectorType>::n () const
+{
+  Assert (matrix_ptr!=0, ExcNotInitialized());
+  return matrix_ptr->n();
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/precondition_block.h b/include/deal.II/lac/precondition_block.h
new file mode 100644
index 0000000..fcaab69
--- /dev/null
+++ b/include/deal.II/lac/precondition_block.h
@@ -0,0 +1,1102 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__precondition_block_h
+#define dealii__precondition_block_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/precondition_block_base.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<typename MatrixType, typename inverse_type>
+class PreconditionBlockJacobi;
+
+/*! @addtogroup Preconditioners
+ *@{
+ */
+
+
+/**
+ * Base class for actual block preconditioners. This class assumes the
+ * <tt>MatrixType</tt> consisting of invertible blocks of @p blocksize on the
+ * diagonal and provides the inversion of the diagonal blocks of the matrix.
+ * It is not necessary for this class that the matrix be block diagonal;
+ * rather, it applies to matrices of arbitrary structure with the minimal
+ * property of having invertible blocks on the diagonal. Still the matrix must
+ * have access to single matrix entries. Therefore, BlockMatrixArray and
+ * similar classes are not a possible matrix class template arguments.
+ *
+ * The block matrix structure used by this class is given, e.g., for the DG
+ * method for the transport equation. For a downstream numbering the matrices
+ * even have got a block lower left matrix structure, i.e. the matrices are
+ * empty above the diagonal blocks.
+ *
+ * @note This class is intended to be used for matrices whose structure is
+ * given by local contributions from disjoint cells, such as for DG methods.
+ * It is not intended for problems where the block structure results from
+ * different physical variables such as in the Stokes equations considered in
+ * step-22.
+ *
+ * For all matrices that are empty above and below the diagonal blocks (i.e.
+ * for all block diagonal matrices) the @p BlockJacobi preconditioner is a
+ * direct solver. For all matrices that are empty only above the diagonal
+ * blocks (e.g. the matrices one gets by the DG method with downstream
+ * numbering) @p BlockSOR is a direct solver.
+ *
+ * This first implementation of the @p PreconditionBlock assumes the matrix
+ * has blocks each of the same block size. Varying block sizes within the
+ * matrix must still be implemented if needed.
+ *
+ * The first template parameter denotes the type of number representation in
+ * the sparse matrix, the second denotes the type of number representation in
+ * which the inverted diagonal block matrices are stored within this class by
+ * <tt>invert_diagblocks()</tt>. If you don't want to use the block inversion
+ * as an exact solver, but rather as a preconditioner, you may probably want
+ * to store the inverted blocks with less accuracy than the original matrix;
+ * for example, <tt>number==double, inverse_type=float</tt> might be a viable
+ * choice.
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Ralf Hartmann, Guido Kanschat
+ * @date 1999, 2000, 2010
+ */
+template<typename MatrixType, typename inverse_type = typename MatrixType::value_type>
+class PreconditionBlock
+  : public virtual Subscriptor,
+    protected PreconditionBlockBase<inverse_type>
+{
+private:
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+  /**
+   * Value type for inverse matrices.
+   */
+  typedef inverse_type value_type;
+
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Parameters for block preconditioners.
+   */
+  class AdditionalData
+  {
+  public:
+    /**
+     * Constructor. Block size must be given since there is no reasonable
+     * default parameter.
+     */
+    AdditionalData (const size_type block_size,
+                    const double relaxation = 1.,
+                    const bool invert_diagonal = true,
+                    const bool same_diagonal = false);
+
+    /**
+     * Relaxation parameter.
+     */
+    double relaxation;
+
+    /**
+     * Block size.
+     */
+    size_type block_size;
+
+    /**
+     * Invert diagonal during initialization.
+     */
+    bool invert_diagonal;
+
+    /**
+     * Assume all diagonal blocks are equal to save memory.
+     */
+    bool same_diagonal;
+    /**
+     * Choose the inversion method for the blocks.
+     */
+    typename PreconditionBlockBase<inverse_type>::Inversion inversion;
+
+    /**
+     * The if #inversion is SVD, the threshold below which a singular value
+     * will be considered zero and thus not inverted. This parameter is used
+     * in the call to LAPACKFullMatrix::compute_inverse_svd().
+     */
+    double threshold;
+  };
+
+
+  /**
+   * Constructor.
+   */
+  PreconditionBlock(bool store_diagonals = false);
+
+  /**
+   * Destructor.
+   */
+  ~PreconditionBlock();
+
+  /**
+   * Initialize matrix and block size.  We store the matrix and the block size
+   * in the preconditioner object. In a second step, the inverses of the
+   * diagonal blocks may be computed.
+   *
+   * Additionally, a relaxation parameter for derived classes may be provided.
+   */
+  void initialize (const MatrixType &A,
+                   const AdditionalData parameters);
+protected:
+  /**
+   * Initialize matrix and block size for permuted preconditioning.
+   * Additionally to the parameters of the other initialize() function, we
+   * hand over two index vectors with the permutation and its inverse. For the
+   * meaning of these vectors see PreconditionBlockSOR.
+   *
+   * In a second step, the inverses of the diagonal blocks may be computed.
+   * Make sure you use invert_permuted_diagblocks() to yield consistent data.
+   *
+   * Additionally, a relaxation parameter for derived classes may be provided.
+   */
+  void initialize (const MatrixType &A,
+                   const std::vector<size_type> &permutation,
+                   const std::vector<size_type> &inverse_permutation,
+                   const AdditionalData parameters);
+
+  /**
+   * Set either the permutation of rows or the permutation of blocks,
+   * depending on the size of the vector.
+   *
+   * If the size of the permutation vectors is equal to the dimension of the
+   * linear system, it is assumed that rows are permuted individually. In this
+   * case, set_permutation() must be called before initialize(), since the
+   * diagonal blocks are built from the permuted entries of the matrix.
+   *
+   * If the size of the permutation vector is not equal to the dimension of
+   * the system, the diagonal blocks are computed from the unpermuted entries.
+   * Instead, the relaxation methods step() and Tstep() are executed applying
+   * the blocks in the order given by the permutation vector. They will throw
+   * an exception if length of this vector is not equal to the number of
+   * blocks.
+   *
+   * @note Permutation of blocks can only be applied to the relaxation
+   * operators step() and Tstep(), not to the preconditioning operators
+   * vmult() and Tvmult().
+   *
+   * @note It is safe to call set_permutation() before initialize(), while the
+   * other order is only admissible for block permutation.
+   */
+  void set_permutation(const std::vector<size_type> &permutation,
+                       const std::vector<size_type> &inverse_permutation);
+
+  /**
+   * Replacement of invert_diagblocks() for permuted preconditioning.
+   */
+  void invert_permuted_diagblocks(const std::vector<size_type> &permutation,
+                                  const std::vector<size_type> &inverse_permutation);
+public:
+  /**
+   * Deletes the inverse diagonal block matrices if existent, sets the
+   * blocksize to 0, hence leaves the class in the state that it had directly
+   * after calling the constructor.
+   */
+  void clear();
+
+  /**
+   * Checks whether the object is empty.
+   */
+  bool empty () const;
+
+  /**
+   * Read-only access to entries. This function is only possible if the
+   * inverse diagonal blocks are stored.
+   */
+  value_type el(size_type i,
+                size_type j) const;
+
+  /**
+   * Stores the inverse of the diagonal blocks in @p inverse. This costs some
+   * additional memory - for DG methods about 1/3 (for double inverses) or 1/6
+   * (for float inverses) of that used for the matrix - but it makes the
+   * preconditioning much faster.
+   *
+   * It is not allowed to call this function twice (will produce an error)
+   * before a call of <tt>clear(...)</tt>  because at the second time there
+   * already exist the inverse matrices.
+   *
+   * After this function is called, the lock on the matrix given through the
+   * @p use_matrix function is released, i.e. you may overwrite of delete it.
+   * You may want to do this in case you use this matrix to precondition
+   * another matrix.
+   */
+  void invert_diagblocks();
+
+  /**
+   * Perform one block relaxation step in forward numbering.
+   *
+   * Depending on the arguments @p dst and @p pref, this performs an SOR step
+   * (both reference the same vector) of a Jacobi step (both different
+   * vectors). For the Jacobi step, the calling function must copy @p dst to
+   * @p pref after this.
+   *
+   * @note If a permutation is set, it is automatically honored by this
+   * function.
+   */
+  template <typename number2>
+  void forward_step (Vector<number2>       &dst,
+                     const Vector<number2> &prev,
+                     const Vector<number2> &src,
+                     const bool             transpose_diagonal) const;
+
+  /**
+   * Perform one block relaxation step in backward numbering.
+   *
+   * Depending on the arguments @p dst and @p pref, this performs an SOR step
+   * (both reference the same vector) of a Jacobi step (both different
+   * vectors). For the Jacobi step, the calling function must copy @p dst to
+   * @p pref after this.
+   *
+   * @note If a permutation is set, it is automatically honored by this
+   * function.
+   */
+  template <typename number2>
+  void backward_step (Vector<number2>       &dst,
+                      const Vector<number2> &prev,
+                      const Vector<number2> &src,
+                      const bool             transpose_diagonal) const;
+
+
+  /**
+   * Return the size of the blocks.
+   */
+  size_type block_size () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * For non-overlapping block preconditioners, the block size must divide the
+   * matrix size. If not, this exception gets thrown.
+   */
+  DeclException2 (ExcWrongBlockSize,
+                  int, int,
+                  << "The blocksize " << arg1
+                  << " and the size of the matrix " << arg2
+                  << " do not match.");
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInverseMatricesAlreadyExist);
+
+  //@}
+
+protected:
+  /**
+   * Size of the blocks. Each diagonal block is assumed to be of the same
+   * size.
+   */
+  size_type blocksize;
+
+  /**
+   * Pointer to the matrix. Make sure that the matrix exists as long as this
+   * class needs it, i.e. until calling @p invert_diagblocks, or (if the
+   * inverse matrices should not be stored) until the last call of the
+   * preconditoining @p vmult function of the derived classes.
+   */
+  SmartPointer<const MatrixType,PreconditionBlock<MatrixType,inverse_type> > A;
+  /**
+   * Relaxation parameter to be used by derived classes.
+   */
+  double relaxation;
+
+  /**
+   * The permutation vector
+   */
+  std::vector<size_type> permutation;
+
+  /**
+   * The inverse permutation vector
+   */
+  std::vector<size_type> inverse_permutation;
+};
+
+
+
+/**
+ * Block Jacobi preconditioning. See PreconditionBlock for requirements on the
+ * matrix. This class satisfies the
+ * @ref ConceptRelaxationType "relaxation concept".
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Ralf Hartmann, Guido Kanschat, 1999, 2000, 2003
+ */
+template<typename MatrixType, typename inverse_type = typename MatrixType::value_type>
+class PreconditionBlockJacobi : public virtual Subscriptor,
+  private PreconditionBlock<MatrixType, inverse_type>
+{
+private:
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Standard-conforming iterator.
+   */
+  class const_iterator
+  {
+  private:
+    /**
+     * Accessor class for iterators
+     */
+    class Accessor
+    {
+    public:
+      /**
+       * Constructor. Since we use accessors only for read access, a const
+       * matrix pointer is sufficient.
+       */
+      Accessor (const PreconditionBlockJacobi<MatrixType, inverse_type> *matrix,
+                const size_type row);
+
+      /**
+       * Row number of the element represented by this object.
+       */
+      size_type row() const;
+
+      /**
+       * Column number of the element represented by this object.
+       */
+      size_type column() const;
+
+      /**
+       * Value of this matrix entry.
+       */
+      inverse_type value() const;
+
+    protected:
+      /**
+       * The matrix accessed.
+       */
+      const PreconditionBlockJacobi<MatrixType, inverse_type> *matrix;
+
+      /**
+       * Save block size here for further reference.
+       */
+      size_type bs;
+
+      /**
+       * Current block number.
+       */
+      size_type a_block;
+
+      /**
+       * Iterator inside block.
+       */
+      typename FullMatrix<inverse_type>::const_iterator b_iterator;
+
+      /**
+       * End of current block.
+       */
+      typename FullMatrix<inverse_type>::const_iterator b_end;
+
+      /**
+       * Make enclosing class a friend.
+       */
+      friend class const_iterator;
+    };
+
+  public:
+    /**
+     * Constructor.
+     */
+    const_iterator(const PreconditionBlockJacobi<MatrixType, inverse_type> *matrix,
+                   const size_type row);
+
+    /**
+     * Prefix increment.
+     */
+    const_iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    const_iterator &operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const const_iterator &) const;
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const const_iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     */
+    bool operator < (const const_iterator &) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor accessor;
+  };
+
+  /**
+   * import functions from private base class
+   */
+  using PreconditionBlock<MatrixType, inverse_type>::initialize;
+  using PreconditionBlock<MatrixType, inverse_type>::clear;
+  using PreconditionBlock<MatrixType, inverse_type>::empty;
+  using PreconditionBlock<MatrixType, inverse_type>::el;
+  using PreconditionBlock<MatrixType, inverse_type>::set_same_diagonal;
+  using PreconditionBlock<MatrixType, inverse_type>::invert_diagblocks;
+  using PreconditionBlock<MatrixType, inverse_type>::block_size;
+  using PreconditionBlockBase<inverse_type>::size;
+  using PreconditionBlockBase<inverse_type>::inverse;
+  using PreconditionBlockBase<inverse_type>::inverse_householder;
+  using PreconditionBlockBase<inverse_type>::inverse_svd;
+  using PreconditionBlockBase<inverse_type>::log_statistics;
+  using PreconditionBlock<MatrixType, inverse_type>::set_permutation;
+
+  /**
+   * Execute block Jacobi preconditioning.
+   *
+   * This function will automatically use the inverse matrices if they exist,
+   * if not then BlockJacobi will need much time inverting the diagonal block
+   * matrices in each preconditioning step.
+   */
+  template <typename number2>
+  void vmult (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Same as @p vmult, since Jacobi is symmetric.
+   */
+  template <typename number2>
+  void Tvmult (Vector<number2> &, const Vector<number2> &) const;
+  /**
+   * Execute block Jacobi preconditioning, adding to @p dst.
+   *
+   * This function will automatically use the inverse matrices if they exist,
+   * if not then BlockJacobi will need much time inverting the diagonal block
+   * matrices in each preconditioning step.
+   */
+  template <typename number2>
+  void vmult_add (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Same as @p vmult_add, since Jacobi is symmetric.
+   */
+  template <typename number2>
+  void Tvmult_add (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Perform one step of the Jacobi iteration.
+   */
+  template <typename number2>
+  void step (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Perform one step of the Jacobi iteration.
+   */
+  template <typename number2>
+  void Tstep (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Iterator starting at the first entry.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Final iterator.
+   */
+  const_iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of row @p r.
+   */
+  const_iterator begin (const size_type r) const;
+
+  /**
+   * Final iterator of row @p r.
+   */
+  const_iterator end (const size_type r) const;
+
+
+private:
+  /**
+   * Actual implementation of the preconditioner.
+   *
+   * Depending on @p adding, the result of preconditioning is added to the
+   * destination vector.
+   */
+  template <typename number2>
+  void do_vmult (Vector<number2> &,
+                 const Vector<number2> &,
+                 bool adding) const;
+
+  friend class Accessor;
+  friend class const_iterator;
+};
+
+
+
+/**
+ * Block SOR preconditioning. This class satisfies the
+ * @ref ConceptRelaxationType "relaxation concept".
+ *
+ * The functions @p vmult and @p Tvmult execute a (transposed) block-SOR step,
+ * based on the blocks in PreconditionBlock. The elements outside the diagonal
+ * blocks may be distributed arbitrarily.
+ *
+ * See PreconditionBlock for requirements on the matrix. The blocks used in
+ * this class must be contiguous and non-overlapping. An overlapping Schwarz
+ * relaxation method can be found in RelaxationBlockSOR; that class does not
+ * offer preconditioning, though.
+ *
+ * <h3>Permutations</h3>
+ *
+ * Optionally, the entries of the source vector can be treated in the order of
+ * the indices in the permutation vector set by #set_permutation (or the
+ * opposite order for Tvmult()). The inverse permutation is used for storing
+ * elements back into this vector. This functionality is automatically enabled
+ * after a call to set_permutation() with vectors of nonzero size.
+ *
+ * @note The diagonal blocks, like the matrix, are not permuted! Therefore,
+ * the permutation vector can only swap whole blocks. It may not change the
+ * order inside blocks or swap single indices between blocks.
+ *
+ * <h3>Instantiations</h3>
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Ralf Hartmann, Guido Kanschat, 1999, 2000, 2001, 2002, 2003
+ */
+template<typename MatrixType, typename inverse_type = typename MatrixType::value_type>
+class PreconditionBlockSOR : public virtual Subscriptor,
+  protected PreconditionBlock<MatrixType, inverse_type>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Default constructor.
+   */
+  PreconditionBlockSOR();
+
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+  /**
+   * import types and functions from protected base class.
+   */
+  using typename PreconditionBlock<MatrixType,inverse_type>::AdditionalData;
+  using PreconditionBlock<MatrixType, inverse_type>::initialize;
+  using PreconditionBlock<MatrixType, inverse_type>::clear;
+  using PreconditionBlock<MatrixType, inverse_type>::empty;
+  using PreconditionBlockBase<inverse_type>::size;
+  using PreconditionBlockBase<inverse_type>::inverse;
+  using PreconditionBlockBase<inverse_type>::inverse_householder;
+  using PreconditionBlockBase<inverse_type>::inverse_svd;
+  using PreconditionBlock<MatrixType, inverse_type>::el;
+  using PreconditionBlock<MatrixType, inverse_type>::set_same_diagonal;
+  using PreconditionBlock<MatrixType, inverse_type>::invert_diagblocks;
+  using PreconditionBlock<MatrixType, inverse_type>::set_permutation;
+  using PreconditionBlockBase<inverse_type>::log_statistics;
+
+  /**
+   * Execute block SOR preconditioning.
+   *
+   * This function will automatically use the inverse matrices if they exist,
+   * if not then BlockSOR will waste much time inverting the diagonal block
+   * matrices in each preconditioning step.
+   *
+   * For matrices which are empty above the diagonal blocks BlockSOR is a
+   * direct solver.
+   */
+  template <typename number2>
+  void vmult (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Execute block SOR preconditioning.
+   *
+   * Warning: this function performs normal @p vmult without adding. The
+   * reason for its existence is that BlockMatrixArray requires the adding
+   * version by default. On the other hand, adding requires an additional
+   * auxiliary vector, which is not desirable.
+   *
+   * @see vmult
+   */
+  template <typename number2>
+  void vmult_add (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Backward application of vmult().
+   *
+   * In the current implementation, this is not the transpose of vmult(). It
+   * is a transposed Gauss-Seidel algorithm applied to the whole matrix, but
+   * the diagonal blocks being inverted are not transposed. Therefore, it is
+   * the transposed, if the diagonal blocks are symmetric.
+   */
+  template <typename number2>
+  void Tvmult (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Execute backward block SOR preconditioning.
+   *
+   * Warning: this function performs normal @p vmult without adding. The
+   * reason for its existence is that BlockMatrixArray requires the adding
+   * version by default. On the other hand, adding requires an additional
+   * auxiliary vector, which is not desirable.
+   *
+   * @see vmult
+   */
+  template <typename number2>
+  void Tvmult_add (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Perform one step of the SOR iteration.
+   */
+  template <typename number2>
+  void step (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Perform one step of the transposed SOR iteration.
+   */
+  template <typename number2>
+  void Tstep (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+protected:
+  /**
+   * Constructor to be used by PreconditionBlockSSOR.
+   */
+  PreconditionBlockSOR(bool store);
+
+  /**
+   * Implementation of the forward substitution loop called by vmult() and
+   * vmult_add().
+   *
+   * If a #permutation is set by set_permutation(), it will automatically be
+   * honored by this function.
+   *
+   * The parameter @p adding does not have any function, yet.
+   */
+  template <typename number2>
+  void forward (Vector<number2> &,
+                const Vector<number2> &,
+                const bool transpose_diagonal,
+                const bool adding) const;
+
+  /**
+   * Implementation of the backward substitution loop called by Tvmult() and
+   * Tvmult_add().
+   *
+   * If a #permutation is set by set_permutation(), it will automatically be
+   * honored by this function.
+   *
+   * The parameter @p adding does not have any function, yet.
+   */
+  template <typename number2>
+  void backward (Vector<number2> &,
+                 const Vector<number2> &,
+                 const bool transpose_diagonal,
+                 const bool adding) const;
+};
+
+
+/**
+ * Block SSOR preconditioning. This class satisfies the
+ * @ref ConceptRelaxationType "relaxation concept".
+ *
+ * The functions @p vmult and @p Tvmult execute a block-SSOR step, based on
+ * the implementation in PreconditionBlockSOR.  This class requires storage of
+ * the diagonal blocks and their inverses.
+ *
+ * See PreconditionBlock for requirements on the matrix. The blocks used in
+ * this class must be contiguous and non-overlapping. An overlapping Schwarz
+ * relaxation method can be found in RelaxationBlockSSOR; that class does not
+ * offer preconditioning, though.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Ralf Hartmann, Guido Kanschat, 1999, 2000
+ */
+template<typename MatrixType, typename inverse_type = typename MatrixType::value_type>
+class PreconditionBlockSSOR : public virtual Subscriptor,
+  private PreconditionBlockSOR<MatrixType, inverse_type>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+  /**
+   * Constructor.
+   */
+  PreconditionBlockSSOR ();
+
+  // Keep AdditionalData accessible
+  using typename PreconditionBlockSOR<MatrixType,inverse_type>::AdditionalData;
+
+  // The following are the
+  // functions of the base classes
+  // which we want to keep
+  // accessible.
+  /**
+   * Make initialization function publicly available.
+   */
+  using PreconditionBlockSOR<MatrixType,inverse_type>::initialize;
+  using PreconditionBlockSOR<MatrixType,inverse_type>::clear;
+  using PreconditionBlockBase<inverse_type>::size;
+  using PreconditionBlockBase<inverse_type>::inverse;
+  using PreconditionBlockBase<inverse_type>::inverse_householder;
+  using PreconditionBlockBase<inverse_type>::inverse_svd;
+  using PreconditionBlockBase<inverse_type>::log_statistics;
+  using PreconditionBlockSOR<MatrixType,inverse_type>::set_permutation;
+  using PreconditionBlockSOR<MatrixType, inverse_type>::empty;
+  using PreconditionBlockSOR<MatrixType, inverse_type>::el;
+  using PreconditionBlockSOR<MatrixType,inverse_type>::set_same_diagonal;
+  using PreconditionBlockSOR<MatrixType,inverse_type>::invert_diagblocks;
+
+  /**
+   * Execute block SSOR preconditioning.
+   *
+   * This function will automatically use the inverse matrices if they exist,
+   * if not then BlockSOR will waste much time inverting the diagonal block
+   * matrices in each preconditioning step.
+   */
+  template <typename number2>
+  void vmult (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Same as vmult()
+   */
+  template <typename number2>
+  void Tvmult (Vector<number2> &, const Vector<number2> &) const;
+
+  /**
+   * Perform one step of the SOR iteration.
+   */
+  template <typename number2>
+  void step (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Perform one step of the transposed SOR iteration.
+   */
+  template <typename number2>
+  void Tstep (Vector<number2> &dst, const Vector<number2> &rhs) const;
+};
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+#ifndef DOXYGEN
+
+template<typename MatrixType, typename inverse_type>
+inline bool
+PreconditionBlock<MatrixType, inverse_type>::empty () const
+{
+  if (A == 0)
+    return true;
+  return A->empty();
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline inverse_type
+PreconditionBlock<MatrixType, inverse_type>::el (
+  size_type i,
+  size_type j) const
+{
+  const size_type bs = blocksize;
+  const unsigned int nb = i/bs;
+
+  const FullMatrix<inverse_type> &B = this->inverse(nb);
+
+  const size_type ib = i % bs;
+  const size_type jb = j % bs;
+
+  if (jb + nb*bs != j)
+    {
+      return 0.;
+    }
+
+  return B(ib, jb);
+}
+
+//---------------------------------------------------------------------------
+
+template<typename MatrixType, typename inverse_type>
+inline
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::Accessor::
+Accessor (const PreconditionBlockJacobi<MatrixType, inverse_type> *matrix,
+          const size_type row)
+  :
+  matrix(matrix),
+  b_iterator(&matrix->inverse(0), 0, 0),
+  b_end(&matrix->inverse(0), 0, 0)
+{
+  bs = matrix->block_size();
+  a_block = row / bs;
+
+  // This is the end accessor, which
+  // does not have a valid block.
+  if (a_block == matrix->size())
+    return;
+
+  const size_type r = row % bs;
+
+  b_iterator = matrix->inverse(a_block).begin(r);
+  b_end = matrix->inverse(a_block).end();
+
+  Assert (a_block < matrix->size(),
+          ExcIndexRange(a_block, 0, matrix->size()));
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+typename PreconditionBlockJacobi<MatrixType, inverse_type>::size_type
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::Accessor::row() const
+{
+  Assert (a_block < matrix->size(),
+          ExcIteratorPastEnd());
+
+  return bs * a_block + b_iterator->row();
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+typename PreconditionBlockJacobi<MatrixType, inverse_type>::size_type
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::Accessor::column() const
+{
+  Assert (a_block < matrix->size(),
+          ExcIteratorPastEnd());
+
+  return bs * a_block + b_iterator->column();
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+inverse_type
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::Accessor::value() const
+{
+  Assert (a_block < matrix->size(),
+          ExcIteratorPastEnd());
+
+  return b_iterator->value();
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::
+const_iterator(const PreconditionBlockJacobi<MatrixType, inverse_type> *matrix,
+               const size_type row)
+  :
+  accessor(matrix, row)
+{}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+typename PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator &
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::operator++ ()
+{
+  Assert (*this != accessor.matrix->end(), ExcIteratorPastEnd());
+
+  ++accessor.b_iterator;
+  if (accessor.b_iterator == accessor.b_end)
+    {
+      ++accessor.a_block;
+
+      if (accessor.a_block < accessor.matrix->size())
+        {
+          accessor.b_iterator = accessor.matrix->inverse(accessor.a_block).begin();
+          accessor.b_end = accessor.matrix->inverse(accessor.a_block).end();
+        }
+    }
+  return *this;
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+const typename PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::Accessor &
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::operator* () const
+{
+  return accessor;
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+const typename PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::Accessor *
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::operator-> () const
+{
+  return &accessor;
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+bool
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::
+operator == (const const_iterator &other) const
+{
+  if (accessor.a_block == accessor.matrix->size() &&
+      accessor.a_block == other.accessor.a_block)
+    return true;
+
+  if (accessor.a_block != other.accessor.a_block)
+    return false;
+
+  return (accessor.row() == other.accessor.row() &&
+          accessor.column() == other.accessor.column());
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+bool
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::
+operator != (const const_iterator &other) const
+{
+  return ! (*this == other);
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+bool
+PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator::
+operator < (const const_iterator &other) const
+{
+  return (accessor.row() < other.accessor.row() ||
+          (accessor.row() == other.accessor.row() &&
+           accessor.column() < other.accessor.column()));
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+typename PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator
+PreconditionBlockJacobi<MatrixType, inverse_type>::begin () const
+{
+  return const_iterator(this, 0);
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+typename PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator
+PreconditionBlockJacobi<MatrixType, inverse_type>::end () const
+{
+  return const_iterator(this, this->size() * this->block_size());
+}
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+typename PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator
+PreconditionBlockJacobi<MatrixType, inverse_type>::begin (
+  const size_type r) const
+{
+  Assert (r < this->A->m(), ExcIndexRange(r, 0, this->A->m()));
+  return const_iterator(this, r);
+}
+
+
+
+template<typename MatrixType, typename inverse_type>
+inline
+typename PreconditionBlockJacobi<MatrixType, inverse_type>::const_iterator
+PreconditionBlockJacobi<MatrixType, inverse_type>::end (
+  const size_type r) const
+{
+  Assert (r < this->A->m(), ExcIndexRange(r, 0, this->A->m()));
+  return const_iterator(this, r+1);
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/precondition_block.templates.h b/include/deal.II/lac/precondition_block.templates.h
new file mode 100644
index 0000000..410620c
--- /dev/null
+++ b/include/deal.II/lac/precondition_block.templates.h
@@ -0,0 +1,1067 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__precondition_block_templates_h
+#define dealii__precondition_block_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/householder.h>
+#include <deal.II/lac/precondition_block.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<typename MatrixType, typename inverse_type>
+PreconditionBlock<MatrixType, inverse_type>::AdditionalData::
+AdditionalData (const size_type block_size,
+                const double    relaxation,
+                const bool      invert_diagonal,
+                const bool      same_diagonal)
+  :
+  relaxation (relaxation),
+  block_size(block_size),
+  invert_diagonal(invert_diagonal),
+  same_diagonal(same_diagonal),
+  inversion(PreconditionBlockBase<inverse_type>::gauss_jordan),
+  threshold(0.)
+{}
+
+
+template <typename number>
+PreconditionBlockBase<number>::~PreconditionBlockBase ()
+{}
+
+
+template <typename MatrixType, typename inverse_type>
+PreconditionBlock<MatrixType,inverse_type>::PreconditionBlock (bool store)
+  : PreconditionBlockBase<inverse_type>(store),
+    blocksize(0),
+    A(0, typeid(*this).name())
+{}
+
+
+template <typename MatrixType, typename inverse_type>
+PreconditionBlock<MatrixType,inverse_type>::~PreconditionBlock ()
+{}
+
+
+template <typename MatrixType, typename inverse_type>
+void PreconditionBlock<MatrixType,inverse_type>::clear ()
+{
+  PreconditionBlockBase<inverse_type>::clear();
+  blocksize     = 0;
+  A = 0;
+}
+
+
+template <typename MatrixType, typename inverse_type>
+void PreconditionBlock<MatrixType,inverse_type>::initialize
+(const MatrixType     &M,
+ const AdditionalData  parameters)
+{
+  const size_type bsize = parameters.block_size;
+
+  clear();
+  Assert (M.m() == M.n(), ExcNotQuadratic());
+  A = &M;
+  Assert (bsize>0, ExcIndexRange(bsize, 1, M.m()));
+  Assert (A->m()%bsize==0, ExcWrongBlockSize(bsize, A->m()));
+  blocksize=bsize;
+  relaxation = parameters.relaxation;
+  const unsigned int nblocks = A->m()/bsize;
+  this->reinit(nblocks, blocksize, parameters.same_diagonal,
+               parameters.inversion);
+
+  if (parameters.invert_diagonal)
+    {
+      if (permutation.size() == M.m())
+        invert_permuted_diagblocks(permutation, inverse_permutation);
+      else
+        invert_diagblocks();
+    }
+}
+
+
+template <typename MatrixType, typename inverse_type>
+void PreconditionBlock<MatrixType,inverse_type>::initialize
+(const MatrixType             &M,
+ const std::vector<size_type> &permutation,
+ const std::vector<size_type> &inverse_permutation,
+ const AdditionalData          parameters)
+{
+  set_permutation(permutation, inverse_permutation);
+  initialize(M, parameters);
+}
+
+template <typename MatrixType, typename inverse_type>
+void PreconditionBlock<MatrixType,inverse_type>::invert_permuted_diagblocks
+(const std::vector<size_type> &permutation,
+ const std::vector<size_type> &inverse_permutation)
+{
+  Assert (A!=0, ExcNotInitialized());
+  Assert (blocksize!=0, ExcNotInitialized());
+
+  const MatrixType &M=*A;
+  Assert (this->inverses_ready()==0, ExcInverseMatricesAlreadyExist());
+  AssertDimension (permutation.size(), M.m());
+  AssertDimension (inverse_permutation.size(), M.m());
+
+  FullMatrix<inverse_type> M_cell(blocksize);
+
+  if (this->same_diagonal())
+    {
+      deallog << "PreconditionBlock uses only one diagonal block" << std::endl;
+
+      for (size_type row_cell=0; row_cell<blocksize; ++row_cell)
+        {
+          typename MatrixType::const_iterator entry = M.begin(row_cell);
+          const typename MatrixType::const_iterator row_end = M.end(row_cell);
+          while (entry != row_end)
+            {
+              if (entry->column() < blocksize)
+                M_cell(row_cell, entry->column()) = entry->value();
+              ++entry;
+            }
+        }
+      if (this->store_diagonals())
+        this->diagonal(0) = M_cell;
+      switch (this->inversion)
+        {
+        case PreconditionBlockBase<inverse_type>::gauss_jordan:
+          this->inverse(0).invert(M_cell);
+          break;
+        case PreconditionBlockBase<inverse_type>::householder:
+          this->inverse_householder(0).initialize(M_cell);
+          break;
+        case PreconditionBlockBase<inverse_type>::svd:
+          this->inverse_svd(0) = M_cell;
+          this->inverse_svd(0).compute_inverse_svd(0.);
+          break;
+        default:
+          Assert(false, ExcNotImplemented());
+
+        }
+    }
+  else
+    {
+      // cell_row, cell_column are the
+      // numbering of the blocks (cells).
+      // row_cell, column_cell are the local
+      // numbering of the unknowns in the
+      // blocks.
+      // row, column are the global numbering
+      // of the unknowns.
+      M_cell = 0;
+
+      for (unsigned int cell=0; cell<this->size(); ++cell)
+        {
+          const size_type cell_start = cell*blocksize;
+          for (size_type row_cell=0; row_cell<blocksize; ++row_cell)
+            {
+              const size_type urow = row_cell + cell_start;
+
+              const size_type row = permutation[urow];
+
+              typename MatrixType::const_iterator entry = M.begin(row);
+              const typename MatrixType::const_iterator row_end = M.end(row);
+
+              for (; entry != row_end; ++entry)
+                {
+                  //if (entry->column()<cell_start)
+                  if (inverse_permutation[entry->column()]<cell_start)
+                    continue;
+
+                  const size_type column_cell = inverse_permutation[entry->column()]-cell_start;
+                  if (column_cell >= blocksize)
+                    continue;
+                  M_cell(row_cell, column_cell) = entry->value();
+                }
+            }
+
+          if (this->store_diagonals())
+            this->diagonal(cell) = M_cell;
+          switch (this->inversion)
+            {
+            case PreconditionBlockBase<inverse_type>::gauss_jordan:
+              this->inverse(cell).invert(M_cell);
+              break;
+            case PreconditionBlockBase<inverse_type>::householder:
+              this->inverse_householder(cell).initialize(M_cell);
+              break;
+            case PreconditionBlockBase<inverse_type>::svd:
+              this->inverse_svd(cell) = M_cell;
+              this->inverse_svd(cell).compute_inverse_svd(0.);
+              break;
+            default:
+              Assert(false, ExcNotImplemented());
+
+            }
+        }
+    }
+  this->inverses_computed(true);
+}
+
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlock<MatrixType,inverse_type>::forward_step
+(Vector<number2>       &dst,
+ const Vector<number2> &prev,
+ const Vector<number2> &src,
+ const bool             transpose_diagonal) const
+{
+  Assert (this->A!=0, ExcNotInitialized());
+
+  const MatrixType &M=*this->A;
+
+  if (permutation.size() != 0)
+    Assert (permutation.size() == M.m() || permutation.size() == this->size(),
+            ExcMessage("Permutation vector size must be equal to either the number of blocks or the dimension of the system"));
+
+  const bool permuted = (permutation.size() == M.m());
+  const bool cell_permuted = (permutation.size() == this->size());
+
+  Vector<number2> b_cell(this->blocksize), x_cell(this->blocksize);
+
+  // cell_row, cell_column are the
+  // numbering of the blocks (cells).
+  // row_cell, column_cell are the local
+  // numbering of the unknowns in the
+  // blocks.
+  // row, column are the global numbering
+  // of the unknowns.
+  size_type row, row_cell;
+  number2 b_cell_row;
+  // The diagonal block if the
+  // inverses were not precomputed
+  FullMatrix<number> M_cell(this->blocksize);
+
+  // Loop over all blocks
+  for (unsigned int rawcell=0; rawcell < this->size(); ++rawcell)
+    {
+      const unsigned int cell = cell_permuted ? permutation[rawcell] : rawcell;
+      const size_type block_start = cell*this->blocksize;
+      const size_type permuted_block_start = permuted
+                                             ? permutation[block_start]
+                                             : block_start;
+
+//       deallog << std::endl << cell << '-' << block_start
+//            << '-' << permuted_block_start << (permuted ? 't' : 'f') << '\t';
+
+      for (row = permuted_block_start, row_cell = 0;
+           row_cell < this->blocksize;
+           ++row_cell, ++row)
+        {
+//        deallog << ' ' << row;
+          const typename MatrixType::const_iterator row_end = M.end(row);
+          typename MatrixType::const_iterator entry = M.begin(row);
+
+          b_cell_row=src(row);
+          for (; entry != row_end; ++entry)
+            {
+              const size_type column = entry->column();
+              const size_type inverse_permuted_column = permuted
+                                                        ? inverse_permutation[column]
+                                                        : column;
+              b_cell_row -= entry->value() * prev(column);
+//TODO:[GK] Find out if this is really once column and once permuted
+              if (!this->inverses_ready()
+                  && inverse_permuted_column >= block_start
+                  && inverse_permuted_column < block_start + this->blocksize)
+                {
+                  const size_type column_cell = column - block_start;
+                  if (transpose_diagonal)
+                    M_cell(column_cell, row_cell) = entry->value();
+                  else
+                    M_cell(row_cell, column_cell) = entry->value();
+                }
+            }
+          b_cell(row_cell)=b_cell_row;
+        }
+      if (this->inverses_ready())
+        {
+          if (transpose_diagonal)
+            this->inverse_Tvmult(cell, x_cell, b_cell);
+          else
+            this->inverse_vmult(cell, x_cell, b_cell);
+        }
+      else
+        {
+          Householder<number> house(M_cell);
+          house.least_squares(x_cell,b_cell);
+        }
+
+      // distribute x_cell to dst
+      for (row=permuted_block_start, row_cell=0;
+           row_cell<this->blocksize;
+           ++row_cell, ++row)
+        dst(row) = prev(row) + this->relaxation*x_cell(row_cell);
+    }
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlock<MatrixType,inverse_type>::backward_step
+(Vector<number2>       &dst,
+ const Vector<number2> &prev,
+ const Vector<number2> &src,
+ const bool             transpose_diagonal) const
+{
+  Assert (this->A!=0, ExcNotInitialized());
+
+  const MatrixType &M=*this->A;
+
+  if (permutation.size() != 0)
+    Assert (permutation.size() == M.m() || permutation.size() == this->size(),
+            ExcMessage("Permutation vector size must be equal to either the number of blocks or the dimension of the system"));
+
+  const bool permuted = (permutation.size() == M.m());
+  const bool cell_permuted = (permutation.size() == this->size());
+
+  Vector<number2> b_cell(this->blocksize), x_cell(this->blocksize);
+
+  // cell_row, cell_column are the
+  // numbering of the blocks (cells).
+  // row_cell, column_cell are the local
+  // numbering of the unknowns in the
+  // blocks.
+  // row, column are the global numbering
+  // of the unknowns.
+  size_type row, row_cell;
+  number2 b_cell_row;
+
+  FullMatrix<number> M_cell(this->blocksize);
+  for (unsigned int rawcell=this->size(); rawcell!=0 ;)
+    {
+      --rawcell;
+      const unsigned int cell = cell_permuted ? permutation[rawcell] : rawcell;
+      const size_type block_start = cell*this->blocksize;
+      const size_type block_end = block_start + this->blocksize;
+      const size_type permuted_block_start = permuted
+                                             ? permutation[block_start]
+                                             : block_start;
+      for (row = permuted_block_start, row_cell = 0;
+           row_cell<this->blocksize;
+           ++row_cell, ++row)
+        {
+          const typename MatrixType::const_iterator row_end = M.end(row);
+          typename MatrixType::const_iterator entry = M.begin(row);
+
+          b_cell_row=src(row);
+          for (; entry != row_end; ++entry)
+            {
+              const size_type column = entry->column();
+              const size_type inverse_permuted_column = permuted
+                                                        ? inverse_permutation[column]
+                                                        : column;
+              b_cell_row -= entry->value() * prev(column);
+              if (!this->inverses_ready()
+                  && inverse_permuted_column < block_end
+                  && column >= block_start)
+                {
+                  const size_type column_cell = column - block_start;
+                  // We need the
+                  // transpose of the
+                  // diagonal block,
+                  // so we switch row
+                  // and column
+                  // indices
+                  if (transpose_diagonal)
+                    M_cell(column_cell, row_cell) = entry->value();
+                  else
+                    M_cell(row_cell, column_cell) = entry->value();
+                }
+            }
+          b_cell(row_cell)=b_cell_row;
+        }
+      if (this->inverses_ready())
+        {
+          if (transpose_diagonal)
+            this->inverse_Tvmult(cell, x_cell, b_cell);
+          else
+            this->inverse_vmult(cell, x_cell, b_cell);
+        }
+      else
+        {
+          Householder<number> house(M_cell);
+          house.least_squares(x_cell,b_cell);
+        }
+
+
+      // distribute x_cell to dst
+      for (row=permuted_block_start, row_cell=0;
+           row_cell<this->blocksize;
+           ++row_cell, ++row)
+        dst(row) = prev(row) + this->relaxation*x_cell(row_cell);
+    }
+}
+
+
+template <typename MatrixType, typename inverse_type>
+typename PreconditionBlock<MatrixType,inverse_type>::size_type
+PreconditionBlock<MatrixType,inverse_type>::block_size() const
+{
+  return blocksize;
+}
+
+
+template <typename MatrixType, typename inverse_type>
+void PreconditionBlock<MatrixType,inverse_type>::invert_diagblocks()
+{
+  Assert (A!=0, ExcNotInitialized());
+  Assert (blocksize!=0, ExcNotInitialized());
+
+  const MatrixType &M=*A;
+  Assert (this->inverses_ready()==0, ExcInverseMatricesAlreadyExist());
+
+  FullMatrix<inverse_type> M_cell(blocksize);
+
+  if (this->same_diagonal())
+    {
+      deallog << "PreconditionBlock uses only one diagonal block" << std::endl;
+      for (size_type row_cell=0; row_cell<blocksize; ++row_cell)
+        {
+          typename MatrixType::const_iterator entry = M.begin(row_cell);
+          const typename MatrixType::const_iterator row_end = M.end(row_cell);
+          while (entry != row_end)
+            {
+              if (entry->column() < blocksize)
+                M_cell(row_cell, entry->column()) = entry->value();
+              ++entry;
+            }
+        }
+      if (this->store_diagonals())
+        this->diagonal(0) = M_cell;
+      switch (this->inversion)
+        {
+        case PreconditionBlockBase<inverse_type>::gauss_jordan:
+          this->inverse(0).invert(M_cell);
+          break;
+        case PreconditionBlockBase<inverse_type>::householder:
+          this->inverse_householder(0).initialize(M_cell);
+          break;
+        case PreconditionBlockBase<inverse_type>::svd:
+          this->inverse_svd(0) = M_cell;
+          this->inverse_svd(0).compute_inverse_svd(1.e-12);
+          break;
+        default:
+          Assert(false, ExcNotImplemented());
+        }
+    }
+  else
+    {
+      M_cell = 0;
+
+      for (unsigned int cell=0; cell<this->size(); ++cell)
+        {
+          const size_type cell_start = cell*blocksize;
+          for (size_type row_cell=0; row_cell<blocksize; ++row_cell)
+            {
+              const size_type row = row_cell + cell_start;
+              typename MatrixType::const_iterator entry = M.begin(row);
+              const typename MatrixType::const_iterator row_end = M.end(row);
+
+              for (; entry != row_end; ++entry)
+                {
+                  if (entry->column()<cell_start)
+                    continue;
+
+                  const size_type column_cell = entry->column()-cell_start;
+                  if (column_cell >= blocksize)
+                    continue;
+                  M_cell(row_cell, column_cell) = entry->value();
+                }
+            }
+
+          if (this->store_diagonals())
+            this->diagonal(cell) = M_cell;
+          switch (this->inversion)
+            {
+            case PreconditionBlockBase<inverse_type>::gauss_jordan:
+              this->inverse(cell).invert(M_cell);
+              break;
+            case PreconditionBlockBase<inverse_type>::householder:
+              this->inverse_householder(cell).initialize(M_cell);
+              break;
+            case PreconditionBlockBase<inverse_type>::svd:
+              this->inverse_svd(cell) = M_cell;
+              this->inverse_svd(cell).compute_inverse_svd(1.e-12);
+              break;
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+    }
+  this->inverses_computed(true);
+}
+
+
+
+template <typename MatrixType, typename inverse_type>
+void PreconditionBlock<MatrixType,inverse_type>::set_permutation
+(const std::vector<size_type> &p,
+ const std::vector<size_type> &i)
+{
+  Assert (p.size() == i.size(), ExcDimensionMismatch(p.size(), i.size()));
+
+  if (this->inverses_ready())
+    {
+      AssertDimension(p.size(), this->size());
+    }
+
+  permutation.resize(p.size());
+  inverse_permutation.resize(p.size());
+  for (unsigned int k=0; k<p.size(); ++k)
+    {
+      permutation[k] = p[k];
+      inverse_permutation[k] = i[k];
+    }
+}
+
+
+template <typename MatrixType, typename inverse_type>
+std::size_t
+PreconditionBlock<MatrixType,inverse_type>::memory_consumption () const
+{
+  return (sizeof(*this)
+          - sizeof(PreconditionBlockBase<inverse_type>)
+          + PreconditionBlockBase<inverse_type>::memory_consumption());
+}
+
+
+
+
+/*--------------------- PreconditionBlockJacobi -----------------------*/
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockJacobi<MatrixType,inverse_type>
+::do_vmult (Vector<number2>       &dst,
+            const Vector<number2> &src,
+            bool                   adding) const
+{
+  Assert(this->A!=0, ExcNotInitialized());
+
+  const MatrixType &M=*this->A;
+
+  Vector<number2> b_cell(this->blocksize), x_cell(this->blocksize);
+
+  // cell_row, cell_column are the
+  // numbering of the blocks (cells).
+  // row_cell, column_cell are the local
+  // numbering of the unknowns in the
+  // blocks.
+  // row, column are the global numbering
+  // of the unknowns.
+  size_type row, row_cell, begin_diag_block=0;
+
+  if (!this->inverses_ready())
+    {
+      FullMatrix<number> M_cell(this->blocksize);
+      for (unsigned int cell=0; cell < this->size(); ++cell)
+        {
+          for (row=cell*this->blocksize, row_cell=0;
+               row_cell<this->blocksize;
+               ++row_cell, ++row)
+            {
+              b_cell(row_cell)=src(row);
+              for (size_type column_cell=0, column=cell*this->blocksize;
+                   column_cell<this->blocksize; ++column_cell, ++column)
+                M_cell(row_cell,column_cell)=M(row,column);
+            }
+          Householder<number> house(M_cell);
+          house.least_squares(x_cell,b_cell);
+          // distribute x_cell to dst
+          for (row=cell*this->blocksize, row_cell=0;
+               row_cell<this->blocksize;
+               ++row_cell, ++row)
+            if (adding)
+              dst(row)+=x_cell(row_cell);
+            else
+              dst(row)=x_cell(row_cell);
+
+          begin_diag_block+=this->blocksize;
+        }
+    }
+  else
+    for (unsigned int cell=0; cell < this->size(); ++cell)
+      {
+        for (row=cell*this->blocksize, row_cell=0;
+             row_cell<this->blocksize;
+             ++row_cell, ++row)
+          {
+            b_cell(row_cell)=src(row);
+          }
+        this->inverse_vmult(cell, x_cell, b_cell);
+        // distribute x_cell to dst
+        for (row=cell*this->blocksize, row_cell=0;
+             row_cell<this->blocksize;
+             ++row_cell, ++row)
+          if (adding)
+            dst(row)+=x_cell(row_cell);
+          else
+            dst(row)=x_cell(row_cell);
+
+        begin_diag_block+=this->blocksize;
+      }
+  dst *= this->relaxation;
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockJacobi<MatrixType,inverse_type>
+::vmult (Vector<number2>       &dst,
+         const Vector<number2> &src) const
+{
+  do_vmult(dst, src, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockJacobi<MatrixType,inverse_type>
+::Tvmult (Vector<number2>       &dst,
+          const Vector<number2> &src) const
+{
+  do_vmult(dst, src, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockJacobi<MatrixType,inverse_type>
+::vmult_add (Vector<number2>       &dst,
+             const Vector<number2> &src) const
+{
+  do_vmult(dst, src, true);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockJacobi<MatrixType,inverse_type>
+::Tvmult_add (Vector<number2>       &dst,
+              const Vector<number2> &src) const
+{
+  do_vmult(dst, src, true);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockJacobi<MatrixType,inverse_type>
+::step (Vector<number2>       &dst,
+        const Vector<number2> &src) const
+{
+  GrowingVectorMemory<Vector<number2> > mem;
+  typename VectorMemory<Vector<number2> >::Pointer aux(mem);
+  aux->reinit(dst);
+
+  this->forward_step(*aux, dst, src, false);
+  dst = *aux;
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockJacobi<MatrixType,inverse_type>
+::Tstep (Vector<number2>       &dst,
+         const Vector<number2> &src) const
+{
+  GrowingVectorMemory<Vector<number2> > mem;
+  typename VectorMemory<Vector<number2> >::Pointer aux(mem);
+  aux->reinit(dst);
+
+  this->backward_step(*aux, dst, src, true);
+  dst = *aux;
+}
+
+
+
+
+/*--------------------- PreconditionBlockSOR -----------------------*/
+
+
+template <typename MatrixType, typename inverse_type>
+PreconditionBlockSOR<MatrixType,inverse_type>::PreconditionBlockSOR ()
+  : PreconditionBlock<MatrixType,inverse_type> (false)
+
+{}
+
+template <typename MatrixType, typename inverse_type>
+PreconditionBlockSOR<MatrixType,inverse_type>::PreconditionBlockSOR (bool store)
+  : PreconditionBlock<MatrixType,inverse_type> (store)
+
+{}
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>::forward
+(Vector<number2>       &dst,
+ const Vector<number2> &src,
+ const bool             transpose_diagonal,
+ const bool) const
+{
+  Assert (this->A!=0, ExcNotInitialized());
+
+  const MatrixType &M=*this->A;
+  const bool permuted = (this->permutation.size() != 0);
+  if (permuted)
+    {
+      Assert (this->permutation.size() == M.m(), ExcDimensionMismatch(this->permutation.size(), M.m()));
+    }
+
+  Vector<number2> b_cell(this->blocksize), x_cell(this->blocksize);
+
+  // cell_row, cell_column are the
+  // numbering of the blocks (cells).
+  // row_cell, column_cell are the local
+  // numbering of the unknowns in the
+  // blocks.
+  // row, column are the global numbering
+  // of the unknowns.
+  size_type row, row_cell, block_start=0;
+  number2 b_cell_row;
+  // The diagonal block if the
+  // inverses were not precomputed
+  FullMatrix<number> M_cell(this->blocksize);
+
+  for (unsigned int cell=0; cell < this->size(); ++cell)
+    {
+      const size_type permuted_block_start = permuted
+                                             ? this->permutation[block_start]
+                                             :block_start;
+
+      for (row = permuted_block_start, row_cell = 0;
+           row_cell < this->blocksize;
+           ++row_cell, ++row)
+        {
+          const typename MatrixType::const_iterator row_end = M.end(row);
+          typename MatrixType::const_iterator entry = M.begin(row);
+
+          b_cell_row=src(row);
+          for (; entry != row_end; ++entry)
+            {
+              const size_type column = entry->column();
+              const size_type inverse_permuted_column = permuted
+                                                        ? this->inverse_permutation[column]
+                                                        : column;
+
+              if (inverse_permuted_column < block_start)
+                b_cell_row -= entry->value() * dst(column);
+              else if (!this->inverses_ready() && column < block_start + this->blocksize)
+                {
+                  const size_type column_cell = column - block_start;
+                  if (transpose_diagonal)
+                    M_cell(column_cell, row_cell) = entry->value();
+                  else
+                    M_cell(row_cell, column_cell) = entry->value();
+                }
+            }
+          b_cell(row_cell)=b_cell_row;
+        }
+      if (this->inverses_ready())
+        {
+          if (transpose_diagonal)
+            this->inverse_Tvmult(cell, x_cell, b_cell);
+          else
+            this->inverse_vmult(cell, x_cell, b_cell);
+        }
+      else
+        {
+          Householder<number> house(M_cell);
+          house.least_squares(x_cell,b_cell);
+        }
+
+      // distribute x_cell to dst
+      for (row=permuted_block_start, row_cell=0;
+           row_cell<this->blocksize;
+           ++row_cell, ++row)
+        dst(row)=this->relaxation*x_cell(row_cell);
+
+      block_start+=this->blocksize;
+    }
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>::backward
+(Vector<number2>       &dst,
+ const Vector<number2> &src,
+ const bool              transpose_diagonal,
+ const bool) const
+{
+  Assert (this->A!=0, ExcNotInitialized());
+
+  const MatrixType &M=*this->A;
+  const bool permuted = (this->permutation.size() != 0);
+  if (permuted)
+    {
+      Assert (this->permutation.size() == M.m(), ExcDimensionMismatch(this->permutation.size(), M.m()));
+    }
+
+  Vector<number2> b_cell(this->blocksize), x_cell(this->blocksize);
+
+  // cell_row, cell_column are the
+  // numbering of the blocks (cells).
+  // row_cell, column_cell are the local
+  // numbering of the unknowns in the
+  // blocks.
+  // row, column are the global numbering
+  // of the unknowns.
+  size_type row, row_cell;
+  size_type block_end=this->blocksize * this->size();
+  number2 b_cell_row;
+
+  FullMatrix<number> M_cell(this->blocksize);
+  for (unsigned int cell=this->size(); cell!=0 ;)
+    {
+      --cell;
+      const size_type block_start = block_end - this->blocksize;
+      // Collect upper triangle
+      const size_type permuted_block_start = (this->permutation.size() != 0)
+                                             ? this->permutation[block_start]
+                                             : block_start;
+      for (row = permuted_block_start, row_cell = 0;
+           row_cell<this->blocksize;
+           ++row_cell, ++row)
+        {
+          const typename MatrixType::const_iterator row_end = M.end(row);
+          typename MatrixType::const_iterator entry = M.begin(row);
+
+          b_cell_row=src(row);
+          for (; entry != row_end; ++entry)
+            {
+              const size_type column = entry->column();
+              const size_type inverse_permuted_column = permuted
+                                                        ? this->inverse_permutation[column]
+                                                        : column;
+              if (inverse_permuted_column >= block_end)
+                b_cell_row -= entry->value() * dst(column);
+              else if (!this->inverses_ready() && column >= block_start)
+                {
+                  const size_type column_cell = column - block_start;
+                  // We need the
+                  // transpose of the
+                  // diagonal block,
+                  // so we switch row
+                  // and column
+                  // indices
+                  if (transpose_diagonal)
+                    M_cell(column_cell, row_cell) = entry->value();
+                  else
+                    M_cell(row_cell, column_cell) = entry->value();
+                }
+            }
+          b_cell(row_cell)=b_cell_row;
+        }
+      if (this->inverses_ready())
+        {
+          if (transpose_diagonal)
+            this->inverse_Tvmult(cell, x_cell, b_cell);
+          else
+            this->inverse_vmult(cell, x_cell, b_cell);
+        }
+      else
+        {
+          Householder<number> house(M_cell);
+          house.least_squares(x_cell,b_cell);
+        }
+
+
+      // distribute x_cell to dst
+      for (row=permuted_block_start, row_cell=0;
+           row_cell<this->blocksize;
+           ++row_cell, ++row)
+        dst(row)=this->relaxation*x_cell(row_cell);
+      block_end = block_start;
+
+    }
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>
+::vmult (Vector<number2>       &dst,
+         const Vector<number2> &src) const
+{
+  forward(dst, src, false, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>
+::vmult_add (Vector<number2>       &dst,
+             const Vector<number2> &src) const
+{
+  forward(dst, src, false, true);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>
+::Tvmult (Vector<number2>       &dst,
+          const Vector<number2> &src) const
+{
+  backward(dst, src, true, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>
+::Tvmult_add (Vector<number2>       &dst,
+              const Vector<number2> &src) const
+{
+  backward(dst, src, true, true);
+}
+
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>
+::step (Vector<number2>       &dst,
+        const Vector<number2> &src) const
+{
+  this->forward_step(dst, dst, src, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSOR<MatrixType,inverse_type>
+::Tstep (Vector<number2>       &dst,
+         const Vector<number2> &src) const
+{
+  this->backward_step(dst, dst, src, true);
+}
+
+
+
+
+//---------------------------------------------------------------------------
+
+
+template <typename MatrixType, typename inverse_type>
+PreconditionBlockSSOR<MatrixType,inverse_type>::PreconditionBlockSSOR ()
+  : PreconditionBlockSOR<MatrixType,inverse_type> (true)
+
+{}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSSOR<MatrixType,inverse_type>::vmult (Vector<number2>       &dst,
+                                                            const Vector<number2> &src) const
+{
+  Vector<number2> help;
+  help.reinit(dst);
+
+  this->forward(help, src, false, false);
+
+  Vector<inverse_type> cell_src(this->blocksize);
+  Vector<inverse_type> cell_dst(this->blocksize);
+  const double scaling = (2.-this->relaxation)/this->relaxation;
+
+  // Multiply with diagonal blocks
+  for (unsigned int cell=0; cell < this->size(); ++cell)
+    {
+      size_type row = cell*this->blocksize;
+
+      for (size_type row_cell=0; row_cell<this->blocksize; ++row_cell)
+        cell_src(row_cell)=help(row+row_cell);
+
+      this->diagonal(cell).vmult(cell_dst, cell_src);
+
+      for (size_type row_cell=0; row_cell<this->blocksize; ++row_cell)
+        help(row+row_cell) = scaling * cell_dst(row_cell);
+    }
+
+  this->backward(dst, help, false, false);
+}
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSSOR<MatrixType,inverse_type>::Tvmult (Vector<number2>       &dst,
+    const Vector<number2> &src) const
+{
+  Vector<number2> help;
+  help.reinit(dst);
+
+  this->backward(help, src, true, false);
+
+  Vector<inverse_type> cell_src(this->blocksize);
+  Vector<inverse_type> cell_dst(this->blocksize);
+  const double scaling = (2.-this->relaxation)/this->relaxation;
+
+  // Multiply with diagonal blocks
+  for (unsigned int cell=0; cell < this->size(); ++cell)
+    {
+      size_type row = cell*this->blocksize;
+
+      for (size_type row_cell=0; row_cell<this->blocksize; ++row_cell)
+        cell_src(row_cell)=help(row+row_cell);
+
+      this->diagonal(cell).Tvmult(cell_dst, cell_src);
+
+      for (size_type row_cell=0; row_cell<this->blocksize; ++row_cell)
+        help(row+row_cell) = scaling * cell_dst(row_cell);
+    }
+
+  this->forward(dst, help, true, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSSOR<MatrixType,inverse_type>
+::step (Vector<number2>       &dst,
+        const Vector<number2> &src) const
+{
+  this->forward_step(dst, dst, src, false);
+  this->backward_step(dst, dst, src, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void PreconditionBlockSSOR<MatrixType,inverse_type>
+::Tstep (Vector<number2>       &dst,
+         const Vector<number2> &src) const
+{
+  this->backward_step(dst, dst, src, true);
+  this->forward_step(dst, dst, src, true);
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/precondition_block_base.h b/include/deal.II/lac/precondition_block_base.h
new file mode 100644
index 0000000..5ce3025
--- /dev/null
+++ b/include/deal.II/lac/precondition_block_base.h
@@ -0,0 +1,679 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__precondition_block_base_h
+#define dealii__precondition_block_base_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/householder.h>
+#include <deal.II/lac/lapack_full_matrix.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class FullMatrix;
+template <typename number> class Vector;
+
+/**
+ * A class storing the inverse diagonal blocks for block preconditioners and
+ * block relaxation methods.
+ *
+ * This class does the book keeping for preconditioners and relaxation methods
+ * based on inverting blocks on the diagonal of a matrix. It allows us to
+ * either store all diagonal blocks and their inverses or the same block for
+ * each entry, and it keeps track of the choice. Thus, after initializing it
+ * and filling the inverse diagonal blocks correctly, a derived class can use
+ * inverse() with an integer argument referring to the block number.
+ *
+ * Additionally, it allows the storage of the original diagonal blocks, not
+ * only the inverses. These are for instance used in the intermediate step of
+ * the SSOR preconditioner.
+ *
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template <typename number>
+class PreconditionBlockBase
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Choose a method for inverting the blocks, and thus the data type for the
+   * inverses.
+   */
+  enum Inversion
+  {
+    /**
+     * Use the standard Gauss-Jacobi method implemented in
+     * FullMatrix::inverse().
+     */
+    gauss_jordan,
+    /**
+     * Use QR decomposition of the Householder class.
+     */
+    householder,
+    /**
+     * Use the singular value decomposition of LAPACKFullMatrix.
+     */
+    svd
+  };
+
+  /**
+   * Constructor initializing default values.
+   */
+  PreconditionBlockBase(bool store_diagonals = false,
+                        Inversion method = gauss_jordan);
+
+  /**
+   * The virtual destructor
+   */
+  ~PreconditionBlockBase();
+
+  /**
+   * Deletes the inverse diagonal block matrices if existent hence leaves the
+   * class in the state that it had directly after calling the constructor.
+   */
+  void clear();
+
+  /**
+   * Resize to this number of diagonal blocks with the given block size. If
+   * <tt>compress</tt> is true, then only one block will be stored.
+   */
+  void reinit(unsigned int nblocks, size_type blocksize, bool compress,
+              Inversion method = gauss_jordan);
+
+  /**
+   * Tell the class that inverses are computed.
+   */
+  void inverses_computed(bool are_they);
+
+  /**
+   * Use only the inverse of the first diagonal block to save memory and
+   * computation time.
+   *
+   * Possible applications: computing on a cartesian grid, all diagonal blocks
+   * are the same or all diagonal blocks are at least similar and inversion of
+   * one of them still yields a preconditioner.
+   */
+  void set_same_diagonal ();
+
+  /**
+   * Does the matrix use only one diagonal block?
+   */
+  bool same_diagonal () const;
+
+  /**
+   * Check, whether diagonal blocks (not their inverses) should be stored.
+   */
+  bool store_diagonals() const;
+
+  /**
+   * Return true, if inverses are ready for use.
+   */
+  bool inverses_ready () const;
+
+  /**
+   * Checks whether the object is empty.
+   */
+  bool empty () const;
+
+  /**
+   * The number of blocks.
+   */
+  unsigned int size() const;
+
+  /**
+   * Read-only access to entries. This function is only possible if the
+   * inverse diagonal blocks are stored.
+   */
+  number el(size_type i, size_type j) const;
+
+  /**
+   * Multiply with the inverse block at position <tt>i</tt>.
+   */
+  template <typename number2>
+  void inverse_vmult(size_type i, Vector<number2> &dst, const Vector<number2> &src) const;
+
+  /**
+   * Multiply with the transposed inverse block at position <tt>i</tt>.
+   */
+  template <typename number2>
+  void inverse_Tvmult(size_type i, Vector<number2> &dst, const Vector<number2> &src) const;
+
+  /**
+   * Access to the inverse diagonal blocks if Inversion is #gauss_jordan.
+   */
+  FullMatrix<number> &inverse (size_type i);
+
+  /**
+   * Access to the inverse diagonal blocks if Inversion is #householder.
+   */
+  Householder<number> &inverse_householder (size_type i);
+
+  /**
+   * Access to the inverse diagonal blocks if Inversion is #householder.
+   */
+  LAPACKFullMatrix<number> &inverse_svd (size_type i);
+
+  /**
+   * Access to the inverse diagonal blocks.
+   */
+  const FullMatrix<number> &inverse (size_type i) const;
+
+  /**
+   * Access to the inverse diagonal blocks if Inversion is #householder.
+   */
+  const Householder<number> &inverse_householder (size_type i) const;
+
+  /**
+   * Access to the inverse diagonal blocks if Inversion is #householder.
+   */
+  const LAPACKFullMatrix<number> &inverse_svd (size_type i) const;
+
+  /**
+   * Access to the diagonal blocks.
+   */
+  FullMatrix<number> &diagonal (size_type i);
+
+  /**
+   * Access to the diagonal blocks.
+   */
+  const FullMatrix<number> &diagonal (size_type i) const;
+
+  /**
+   * Print some statistics about the inverses to @p deallog. Output depends on
+   * #Inversion. It is richest for svd, where we obtain statistics on extremal
+   * singular values and condition numbers.
+   */
+  void log_statistics () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * You are trying to access a diagonal block (not its inverse), but you
+   * decided not to store the diagonal blocks.
+   */
+  DeclException0 (ExcDiagonalsNotStored);
+
+  /**
+   * You are accessing a diagonal block, assuming that it has a certain type.
+   * But, the method used for inverting the diagonal blocks does not use this
+   * type
+   */
+  DeclException0 (ExcInverseNotAvailable);
+
+protected:
+  /**
+   * The method used for inverting blocks.
+   */
+  Inversion inversion;
+
+private:
+  /**
+   * The number of (inverse) diagonal blocks, if only one is stored.
+   */
+  unsigned int n_diagonal_blocks;
+
+  /**
+   * Storage of the inverse matrices of the diagonal blocks matrices as
+   * <tt>FullMatrix<number></tt> matrices, if Inversion #gauss_jordan is used.
+   * Using <tt>number=float</tt> saves memory in comparison with
+   * <tt>number=double</tt>, but may introduce numerical instability.
+   */
+  std::vector<FullMatrix<number> > var_inverse_full;
+
+  /**
+   * Storage of the inverse matrices of the diagonal blocks matrices as
+   * <tt>Householder</tt> matrices if Inversion #householder is used. Using
+   * <tt>number=float</tt> saves memory in comparison with
+   * <tt>number=double</tt>, but may introduce numerical instability.
+   */
+  std::vector<Householder<number> > var_inverse_householder;
+
+  /**
+   * Storage of the inverse matrices of the diagonal blocks matrices as
+   * <tt>LAPACKFullMatrix</tt> matrices if Inversion #svd is used. Using
+   * <tt>number=float</tt> saves memory in comparison with
+   * <tt>number=double</tt>, but may introduce numerical instability.
+   */
+  std::vector<LAPACKFullMatrix<number> > var_inverse_svd;
+
+  /**
+   * Storage of the original diagonal blocks.
+   *
+   * Used by the blocked SSOR method.
+   */
+  std::vector<FullMatrix<number> > var_diagonal;
+
+
+  /**
+   * This is true, if the field #var_diagonal is to be used.
+   */
+  bool var_store_diagonals;
+
+  /**
+   * This is true, if only one inverse is stored.
+   */
+  bool var_same_diagonal;
+
+  /**
+   * The inverse matrices are usable. Set by the parent class via
+   * inverses_computed().
+   */
+  bool var_inverses_ready;
+};
+
+//----------------------------------------------------------------------//
+
+template <typename number>
+inline
+PreconditionBlockBase<number>::PreconditionBlockBase(
+  bool store, Inversion method)
+  :
+  inversion(method),
+  n_diagonal_blocks(0),
+  var_store_diagonals(store),
+  var_same_diagonal(false),
+  var_inverses_ready(false)
+{}
+
+
+template <typename number>
+inline
+void
+PreconditionBlockBase<number>::clear()
+{
+  if (var_inverse_full.size()!=0)
+    var_inverse_full.erase(var_inverse_full.begin(), var_inverse_full.end());
+  if (var_inverse_householder.size()!=0)
+    var_inverse_householder.erase(var_inverse_householder.begin(), var_inverse_householder.end());
+  if (var_inverse_svd.size()!=0)
+    var_inverse_svd.erase(var_inverse_svd.begin(), var_inverse_svd.end());
+  if (var_diagonal.size()!=0)
+    var_diagonal.erase(var_diagonal.begin(), var_diagonal.end());
+  var_same_diagonal = false;
+  var_inverses_ready = false;
+  n_diagonal_blocks = 0;
+}
+
+template <typename number>
+inline
+void
+PreconditionBlockBase<number>::reinit(unsigned int n, size_type b, bool compress,
+                                      Inversion method)
+{
+  inversion = method;
+  var_same_diagonal = compress;
+  var_inverses_ready = false;
+  n_diagonal_blocks = n;
+
+  if (compress)
+    {
+      switch (inversion)
+        {
+        case gauss_jordan:
+          var_inverse_full.resize(1);
+          var_inverse_full[0].reinit(b,b);
+          break;
+        case householder:
+          var_inverse_householder.resize(1);
+          break;
+        case svd:
+          var_inverse_svd.resize(1);
+          var_inverse_svd[0].reinit(b,b);
+          break;
+        default:
+          Assert(false, ExcNotImplemented());
+        }
+
+      if (store_diagonals())
+        {
+          var_diagonal.resize(1);
+          var_diagonal[0].reinit(b,b);
+        }
+    }
+  else
+    {
+      // set the arrays to the right
+      // size. we could do it like this:
+      // var_inverse = vector<>(nblocks,FullMatrix<>())
+      // but this would involve copying many
+      // FullMatrix objects.
+      //
+      // the following is a neat trick which
+      // avoids copying
+      if (store_diagonals())
+        {
+          std::vector<FullMatrix<number> >
+          tmp(n, FullMatrix<number>(b));
+          var_diagonal.swap (tmp);
+        }
+
+      switch (inversion)
+        {
+        case gauss_jordan:
+        {
+          std::vector<FullMatrix<number> >
+          tmp(n, FullMatrix<number>(b));
+          var_inverse_full.swap (tmp);
+          break;
+        }
+        case householder:
+          var_inverse_householder.resize(n);
+          break;
+        case svd:
+        {
+          std::vector<LAPACKFullMatrix<number> >
+          tmp(n, LAPACKFullMatrix<number>(b));
+          var_inverse_svd.swap (tmp);
+          break;
+        }
+        default:
+          Assert(false, ExcNotImplemented());
+        }
+    }
+}
+
+
+template <typename number>
+inline
+unsigned int
+PreconditionBlockBase<number>::size() const
+{
+  return n_diagonal_blocks;
+}
+
+template <typename number>
+template <typename number2>
+inline
+void
+PreconditionBlockBase<number>::inverse_vmult(
+  size_type i, Vector<number2> &dst, const Vector<number2> &src) const
+{
+  const size_type ii = same_diagonal() ? 0U : i;
+
+  switch (inversion)
+    {
+    case gauss_jordan:
+      AssertIndexRange (ii, var_inverse_full.size());
+      var_inverse_full[ii].vmult(dst, src);
+      break;
+    case householder:
+      AssertIndexRange (ii, var_inverse_householder.size());
+      var_inverse_householder[ii].vmult(dst, src);
+      break;
+    case svd:
+      AssertIndexRange (ii, var_inverse_svd.size());
+      var_inverse_svd[ii].vmult(dst, src);
+      break;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+PreconditionBlockBase<number>::inverse_Tvmult(
+  size_type i, Vector<number2> &dst, const Vector<number2> &src) const
+{
+  const size_type ii = same_diagonal() ? 0U : i;
+
+  switch (inversion)
+    {
+    case gauss_jordan:
+      AssertIndexRange (ii, var_inverse_full.size());
+      var_inverse_full[ii].Tvmult(dst, src);
+      break;
+    case householder:
+      AssertIndexRange (ii, var_inverse_householder.size());
+      var_inverse_householder[ii].Tvmult(dst, src);
+      break;
+    case svd:
+      AssertIndexRange (ii, var_inverse_svd.size());
+      var_inverse_svd[ii].Tvmult(dst, src);
+      break;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+template <typename number>
+inline
+const FullMatrix<number> &
+PreconditionBlockBase<number>::inverse(size_type i) const
+{
+  if (same_diagonal())
+    return var_inverse_full[0];
+
+  Assert (i < var_inverse_full.size(), ExcIndexRange(i,0,var_inverse_full.size()));
+  return var_inverse_full[i];
+}
+
+
+template <typename number>
+inline
+const Householder<number> &
+PreconditionBlockBase<number>::inverse_householder(size_type i) const
+{
+  if (same_diagonal())
+    return var_inverse_householder[0];
+
+  AssertIndexRange (i, var_inverse_householder.size());
+  return var_inverse_householder[i];
+}
+
+
+template <typename number>
+inline
+const LAPACKFullMatrix<number> &
+PreconditionBlockBase<number>::inverse_svd(size_type i) const
+{
+  if (same_diagonal())
+    return var_inverse_svd[0];
+
+  AssertIndexRange (i, var_inverse_svd.size());
+  return var_inverse_svd[i];
+}
+
+
+template <typename number>
+inline
+const FullMatrix<number> &
+PreconditionBlockBase<number>::diagonal(size_type i) const
+{
+  Assert(store_diagonals(), ExcDiagonalsNotStored());
+
+  if (same_diagonal())
+    return var_diagonal[0];
+
+  Assert (i < var_diagonal.size(), ExcIndexRange(i,0,var_diagonal.size()));
+  return var_diagonal[i];
+}
+
+
+template <typename number>
+inline
+FullMatrix<number> &
+PreconditionBlockBase<number>::inverse(size_type i)
+{
+  Assert(var_inverse_full.size() != 0, ExcInverseNotAvailable());
+
+  if (same_diagonal())
+    return var_inverse_full[0];
+
+  Assert (i < var_inverse_full.size(), ExcIndexRange(i,0,var_inverse_full.size()));
+  return var_inverse_full[i];
+}
+
+
+template <typename number>
+inline
+Householder<number> &
+PreconditionBlockBase<number>::inverse_householder(size_type i)
+{
+  Assert(var_inverse_householder.size() != 0, ExcInverseNotAvailable());
+
+  if (same_diagonal())
+    return var_inverse_householder[0];
+
+  AssertIndexRange (i, var_inverse_householder.size());
+  return var_inverse_householder[i];
+}
+
+
+template <typename number>
+inline
+LAPACKFullMatrix<number> &
+PreconditionBlockBase<number>::inverse_svd(size_type i)
+{
+  Assert(var_inverse_svd.size() != 0, ExcInverseNotAvailable());
+
+  if (same_diagonal())
+    return var_inverse_svd[0];
+
+  AssertIndexRange (i, var_inverse_svd.size());
+  return var_inverse_svd[i];
+}
+
+
+template <typename number>
+inline
+FullMatrix<number> &
+PreconditionBlockBase<number>::diagonal(size_type i)
+{
+  Assert(store_diagonals(), ExcDiagonalsNotStored());
+
+  if (same_diagonal())
+    return var_diagonal[0];
+
+  Assert (i < var_diagonal.size(), ExcIndexRange(i,0,var_diagonal.size()));
+  return var_diagonal[i];
+}
+
+
+template <typename number>
+inline bool
+PreconditionBlockBase<number>::same_diagonal() const
+{
+  return var_same_diagonal;
+}
+
+
+template <typename number>
+inline bool
+PreconditionBlockBase<number>::store_diagonals() const
+{
+  return var_store_diagonals;
+}
+
+
+template <typename number>
+inline void
+PreconditionBlockBase<number>::inverses_computed(bool x)
+{
+  var_inverses_ready = x;
+}
+
+
+template <typename number>
+inline bool
+PreconditionBlockBase<number>::inverses_ready() const
+{
+  return var_inverses_ready;
+}
+
+
+template <typename number>
+inline void
+PreconditionBlockBase<number>::log_statistics () const
+{
+  deallog << "PreconditionBlockBase: " << size() << " blocks; ";
+
+  if (inversion == svd)
+    {
+      unsigned int kermin = 100000000, kermax = 0;
+      double sigmin = 1.e300, sigmax= -1.e300;
+      double kappamin = 1.e300, kappamax= -1.e300;
+
+      for (size_type b=0; b<size(); ++b)
+        {
+          const LAPACKFullMatrix<number> &matrix = inverse_svd(b);
+          size_type k=1;
+          while (k <= matrix.n_cols() && matrix.singular_value(matrix.n_cols()-k) == 0)
+            ++k;
+          const double s0 = matrix.singular_value(0);
+          const double sm = matrix.singular_value(matrix.n_cols()-k);
+          const double co = sm/s0;
+
+          if (kermin > k) kermin = k-1;
+          if (kermax < k) kermax = k-1;
+          if (s0 < sigmin) sigmin = s0;
+          if (sm > sigmax) sigmax = sm;
+          if (co < kappamin) kappamin = co;
+          if (co > kappamax) kappamax = co;
+        }
+      deallog << "dim ker [" << kermin << ':' << kermax
+              << "] sigma [" << sigmin << ':' << sigmax
+              << "] kappa [" << kappamin << ':' << kappamax << ']' << std::endl;
+
+    }
+  else if (inversion == householder)
+    {
+    }
+  else if (inversion == gauss_jordan)
+    {
+    }
+  else
+    {
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+template <typename number>
+inline
+std::size_t
+PreconditionBlockBase<number>::memory_consumption () const
+{
+  std::size_t mem = sizeof(*this);
+  for (size_type i=0; i<var_inverse_full.size(); ++i)
+    mem += MemoryConsumption::memory_consumption(var_inverse_full[i]);
+  for (size_type i=0; i<var_diagonal.size(); ++i)
+    mem += MemoryConsumption::memory_consumption(var_diagonal[i]);
+  return mem;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/precondition_selector.h b/include/deal.II/lac/precondition_selector.h
new file mode 100644
index 0000000..164018c
--- /dev/null
+++ b/include/deal.II/lac/precondition_selector.h
@@ -0,0 +1,297 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__precondition_selector_h
+#define dealii__precondition_selector_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <class number> class Vector;
+template <class number> class SparseMatrix;
+
+
+/*! @addtogroup Preconditioners
+ *@{
+ */
+
+/**
+ * Selects the preconditioner. The constructor of this class takes the name of
+ * the preconditioning and the damping parameter @p omega of the
+ * preconditioning and the @p use_matrix function takes the matrix that is
+ * used by the matrix-builtin precondition functions. Each time, the
+ * <tt>operator()</tt> function is called, this preselected preconditioner,
+ * this matrix and this @p omega is used for the preconditioning. This class
+ * is designed for being used as argument of the @p solve function of a @p
+ * Solver and it covers the selection of all matrix-builtin precondition
+ * functions. The selection of other preconditioners, like BlockSOR or ILU
+ * should be handled in derived classes by the user.
+ *
+ * <h3>Usage</h3> The simplest use of this class is the following:
+ * @code
+ *                                  // generate a @p SolverControl and
+ *                                  // a @p VectorMemory
+ * SolverControl control;
+ * VectorMemory<Vector<double> > memory;
+ *                                  // generate a solver
+ * SolverCG<SparseMatrix<double>, Vector<double> > solver(control, memory);
+ *                                  // generate a @p PreconditionSelector
+ * PreconditionSelector<SparseMatrix<double>, Vector<double> >
+ *   preconditioning("jacobi", 1.);
+ *                                  // give a matrix whose diagonal entries
+ *                                  // are to be used for the preconditioning.
+ *                                  // Generally the matrix of the linear
+ *                                  // equation system Ax=b.
+ * preconditioning.use_matrix(A);
+ *                                  // call the @p solve function with this
+ *                                  // preconditioning as last argument
+ * solver.solve(A,x,b,preconditioning);
+ * @endcode
+ * The same example where also the @p SolverSelector class is used reads
+ * @code
+ *                                  // generate a @p SolverControl and
+ *                                  // a @p VectorMemory
+ * SolverControl control;
+ * VectorMemory<Vector<double> > memory;
+ *                                  // generate a @p SolverSelector that
+ *                                  // calls the @p SolverCG
+ * SolverSelector<SparseMatrix<double>, Vector<double> >
+ *   solver_selector("cg", control, memory);
+ *                                  // generate a @p PreconditionSelector
+ * PreconditionSelector<SparseMatrix<double>, Vector<double> >
+ *   preconditioning("jacobi", 1.);
+ *
+ * preconditioning.use_matrix(A);
+ *
+ * solver_selector.solve(A,x,b,preconditioning);
+ * @endcode
+ * Now the use of the @p SolverSelector in combination with the @p
+ * PreconditionSelector allows the user to select both, the solver and the
+ * preconditioner, at the beginning of his program and each time the solver is
+ * started (that is several times e.g. in a nonlinear iteration) this
+ * preselected solver and preconditioner is called.
+ *
+ * @author Ralf Hartmann, 1999; extension for full compatibility with
+ * LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+template <typename MatrixType = SparseMatrix<double>,
+          typename VectorType = dealii::Vector<double> >
+class PreconditionSelector : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef typename MatrixType::size_type size_type;
+
+  /**
+   * Constructor. @p omega denotes the damping parameter of the
+   * preconditioning.
+   */
+  PreconditionSelector (const std::string                     &preconditioning,
+                        const typename VectorType::value_type &omega=1.);
+
+  /**
+   * Destructor.
+   */
+  virtual ~PreconditionSelector();
+
+  /**
+   * Takes the matrix that is needed for preconditionings that involves a
+   * matrix. e.g. for @p precondition_jacobi, <tt>~_sor</tt>, <tt>~_ssor</tt>.
+   */
+  void use_matrix(const MatrixType &M);
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+  /**
+   * Precondition procedure. Calls the preconditioning that was specified in
+   * the constructor.
+   */
+  virtual void vmult (VectorType &dst, const VectorType &src) const;
+
+  /**
+   * Transpose precondition procedure. Calls the preconditioning that was
+   * specified in the constructor.
+   */
+  virtual void Tvmult (VectorType &dst, const VectorType &src) const;
+
+  /**
+   * Get the names of all implemented preconditionings.
+   */
+  static std::string get_precondition_names();
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+
+  /**
+   * Exception.
+   */
+  DeclException0 (ExcNoMatrixGivenToUse);
+
+  //@}
+protected:
+
+  /**
+   * Stores the name of the preconditioning.
+   */
+  std::string preconditioning;
+
+private:
+  /**
+   * Matrix that is used for the matrix-builtin preconditioning function. cf.
+   * also @p PreconditionUseMatrix.
+   */
+  SmartPointer<const MatrixType,PreconditionSelector<MatrixType,VectorType> > A;
+
+  /**
+   * Stores the damping parameter of the preconditioner.
+   */
+  const typename VectorType::value_type omega;
+};
+
+/*@}*/
+/* --------------------- Inline and template functions ------------------- */
+
+
+template <typename MatrixType, typename VectorType>
+PreconditionSelector<MatrixType,VectorType>
+::PreconditionSelector(const std::string                     &preconditioning,
+                       const typename VectorType::value_type &omega) :
+  preconditioning(preconditioning),
+  omega(omega)  {}
+
+
+template <typename MatrixType, typename VectorType>
+PreconditionSelector<MatrixType,VectorType>::~PreconditionSelector()
+{
+  // release the matrix A
+  A=0;
+}
+
+
+template <typename MatrixType, typename VectorType>
+void PreconditionSelector<MatrixType,VectorType>::use_matrix(const MatrixType &M)
+{
+  A=&M;
+}
+
+
+template <typename MatrixType, typename VectorType>
+inline typename PreconditionSelector<MatrixType,VectorType>::size_type
+PreconditionSelector<MatrixType,VectorType>::m () const
+{
+  Assert(A!=0, ExcNoMatrixGivenToUse());
+  return A->m();
+}
+
+
+template <typename MatrixType, typename VectorType>
+inline typename PreconditionSelector<MatrixType,VectorType>::size_type
+PreconditionSelector<MatrixType,VectorType>::n () const
+{
+  Assert(A!=0, ExcNoMatrixGivenToUse());
+  return A->n();
+}
+
+
+
+template <typename MatrixType, typename VectorType>
+void PreconditionSelector<MatrixType,VectorType>::vmult (VectorType &dst,
+                                                         const VectorType &src) const
+{
+  if (preconditioning=="none")
+    {
+      dst=src;
+    }
+  else
+    {
+      Assert(A!=0, ExcNoMatrixGivenToUse());
+
+      if (preconditioning=="jacobi")
+        {
+          A->precondition_Jacobi(dst,src,omega);
+        }
+      else if (preconditioning=="sor")
+        {
+          A->precondition_SOR(dst,src,omega);
+        }
+      else if (preconditioning=="ssor")
+        {
+          A->precondition_SSOR(dst,src,omega);
+        }
+      else
+        Assert(false,ExcNotImplemented());
+    }
+}
+
+
+template <typename MatrixType, typename VectorType>
+void PreconditionSelector<MatrixType,VectorType>::Tvmult (VectorType &dst,
+                                                          const VectorType &src) const
+{
+  if (preconditioning=="none")
+    {
+      dst=src;
+    }
+  else
+    {
+      Assert(A!=0, ExcNoMatrixGivenToUse());
+
+      if (preconditioning=="jacobi")
+        {
+          A->precondition_Jacobi(dst,src,omega); // Symmetric operation
+        }
+      else if (preconditioning=="sor")
+        {
+          A->precondition_TSOR(dst,src,omega);
+        }
+      else if (preconditioning=="ssor")
+        {
+          A->precondition_SSOR(dst,src,omega); // Symmetric operation
+        }
+      else
+        Assert(false,ExcNotImplemented());
+    }
+}
+
+
+template <typename MatrixType, typename VectorType>
+std::string PreconditionSelector<MatrixType,VectorType>::get_precondition_names()
+{
+  return "none|jacobi|sor|ssor";
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/relaxation_block.h b/include/deal.II/lac/relaxation_block.h
new file mode 100644
index 0000000..ffeb83c
--- /dev/null
+++ b/include/deal.II/lac/relaxation_block.h
@@ -0,0 +1,477 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__relaxation_block_h
+#define dealii__relaxation_block_h
+
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/precondition_block_base.h>
+#include <deal.II/lac/sparsity_pattern.h>
+
+#include <vector>
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Base class for the implementation of overlapping, multiplicative Schwarz
+ * relaxation methods and smoothers.
+ *
+ * This class uses the infrastructure provided by PreconditionBlockBase. It
+ * adds functions to initialize with a block list and to do the relaxation
+ * step. The actual relaxation method with the interface expected by
+ * SolverRelaxation and MGSmootherRelaxation is in the derived classes.
+ *
+ * This class allows for more general relaxation methods than
+ * PreconditionBlock, since the index sets may be arbitrary and overlapping,
+ * while there only contiguous, disjoint sets of equal size are allowed. As a
+ * drawback, this class cannot be used as a preconditioner, since its
+ * implementation relies on a straight forward implementation of the Gauss-
+ * Seidel process.
+ *
+ * @ingroup Preconditioners
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template <typename MatrixType, typename inverse_type=typename MatrixType::value_type>
+class RelaxationBlock :
+  protected PreconditionBlockBase<inverse_type>
+{
+private:
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+  /**
+   * Value type for inverse matrices.
+   */
+  typedef inverse_type value_type;
+
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Parameters for block relaxation methods. In addition to typical control
+   * parameters like #relaxation, this object also contains the block
+   * structure in #block_list and an optional ordering of the blocks in
+   * #order.
+   */
+  class AdditionalData : public Subscriptor
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    AdditionalData (const double relaxation = 1.,
+                    const bool invert_diagonal = true,
+                    const bool same_diagonal = false);
+
+    /**
+     * The mapping from indices to blocks. Each row of this pattern enumerates
+     * the indices constituting a diagonal block to be inverted.
+     */
+    SparsityPattern block_list;
+
+    /**
+     * Relaxation parameter.
+     */
+    double relaxation;
+
+    /**
+     * Invert diagonal during initialization. Alternatively, diagonal blocks
+     * are inverted on the fly, whenever they are used. While inverting blocks
+     * in advance requires more memory, it usually saves a lot of computation.
+     * See #same_diagonal on how you can avoid memory overhead.
+     */
+    bool invert_diagonal;
+
+    /**
+     * Assume all diagonal blocks are equal to save memory. If this flag is
+     * true, then only the first diagonal block of the matrix is inverted and
+     * stored. It is then used for all other blocks.
+     *
+     * \note Avoid setting this true if your blocks are not equal, in
+     * particular if their sizes differ.
+     */
+    bool same_diagonal;
+    /**
+     * Choose the inversion method for the blocks.
+     */
+    typename PreconditionBlockBase<inverse_type>::Inversion inversion;
+
+    /**
+     * If #inversion is SVD, we can compute the Penrose-Moore inverse of the
+     * blocks. In order to do so, we can specify here the threshold below
+     * which a singular value will be considered zero and thus not inverted.
+     * This parameter is used in the call to
+     * LAPACKFullMatrix::compute_inverse_svd().
+     */
+    double threshold;
+
+    /**
+     * The order in which blocks should be traversed. This vector can initiate
+     * several modes of execution:
+     *
+     * <ol>
+     *
+     * <li>If the length of the vector is zero, then the relaxation method
+     * will be executed from first to last block.</li>
+     *
+     * <li> If the length is one, then the inner vector must have the same
+     * size as the number of blocks. The relaxation method is applied in the
+     * order given in this vector.</li>
+     *
+     * <li> If the outer vector has length greater one, then the relaxation
+     * method is applied several times, each time in the order given by the
+     * inner vector of the corresponding index. This mode can for instance be
+     * used for ADI methods and similar direction sweeps.</li>
+     *
+     * </ol>
+     */
+    std::vector<std::vector<unsigned int> > order;
+    /**
+     * Return the memory allocated in this object.
+     */
+    std::size_t memory_consumption() const;
+  };
+
+  /**
+   * Initialize matrix and additional information. In a second step, the
+   * inverses of the diagonal blocks may be computed.
+   *
+   * Note that AdditionalData, different from other preconditioners, defines
+   * quite large objects, and that therefore the object is not copied, but
+   * rather a pointer is stored. Thus, the lifetime of
+   * <code>additional_data</code> hast to exceed the lifetime of this object.
+   */
+  void initialize (const MatrixType     &A,
+                   const AdditionalData &parameters);
+
+  /**
+   * Deletes the inverse diagonal block matrices if existent, sets the
+   * blocksize to 0, hence leaves the class in the state that it had directly
+   * after calling the constructor.
+   */
+  void clear();
+
+  /**
+   * Checks whether the object is empty.
+   */
+  bool empty () const;
+
+  /**
+   * Read-only access to entries.  This function is only possible if the
+   * inverse diagonal blocks are stored.
+   */
+  value_type el(size_type i,
+                size_type j) const;
+
+  /**
+   * Stores the inverse of the diagonal blocks in @p inverse. This costs some
+   * additional memory - for DG methods about 1/3 (for double inverses) or 1/6
+   * (for float inverses) of that used for the matrix - but it makes the
+   * preconditioning much faster.
+   *
+   * It is not allowed to call this function twice (will produce an error)
+   * before a call of <tt>clear(...)</tt> because at the second time there
+   * already exist the inverse matrices.
+   *
+   * After this function is called, the lock on the matrix given through the
+   * @p use_matrix function is released, i.e. you may overwrite of delete it.
+   * You may want to do this in case you use this matrix to precondition
+   * another matrix.
+   */
+  void invert_diagblocks();
+
+protected:
+  /**
+   * Perform one block relaxation step.
+   *
+   * Depending on the arguments @p dst and @p pref, this performs an SOR step
+   * (both reference the same vector) of a Jacobi step (both are different
+   * vectors). For the Jacobi step, the calling function must copy @p dst to
+   * @p pref after this.
+   */
+  template <typename number2>
+  void do_step (
+    Vector<number2>       &dst,
+    const Vector<number2> &prev,
+    const Vector<number2> &src,
+    const bool backward) const;
+  /**
+   * Pointer to the matrix. Make sure that the matrix exists as long as this
+   * class needs it, i.e. until calling @p invert_diagblocks, or (if the
+   * inverse matrices should not be stored) until the last call of the
+   * preconditioning @p vmult function of the derived classes.
+   */
+  SmartPointer<const MatrixType,RelaxationBlock<MatrixType,inverse_type> > A;
+
+  /**
+   * Control information.
+   */
+  SmartPointer<const AdditionalData, RelaxationBlock<MatrixType,inverse_type> > additional_data;
+};
+
+
+/**
+ * Block Jacobi (additive Schwarz) method with possibly overlapping blocks.
+ *
+ * This class implements the step() and Tstep() functions expected by the
+ * @ref ConceptRelaxationType "relaxation concept".
+ * They perform an additive Schwarz method on the blocks provided in the block
+ * list of AdditionalData. Differing from PreconditionBlockJacobi, these
+ * blocks may be of varying size, non- contiguous, and overlapping. On the
+ * other hand, this class does not implement the preconditioner interface
+ * expected by Solver objects.
+ *
+ * @ingroup Preconditioners
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template<typename MatrixType, typename inverse_type = typename MatrixType::value_type>
+class RelaxationBlockJacobi : public virtual Subscriptor,
+  protected RelaxationBlock<MatrixType, inverse_type>
+{
+public:
+  /**
+   * Default constructor.
+   */
+//    RelaxationBlockJacobi();
+
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+  /**
+   * Make type publicly available.
+   */
+  using typename RelaxationBlock<MatrixType,inverse_type>::AdditionalData;
+
+  /**
+   * Make initialization function publicly available.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::initialize;
+
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::clear;
+
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::empty;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::size;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse_householder;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse_svd;
+  using PreconditionBlockBase<inverse_type>::log_statistics;
+  /**
+   * Perform one step of the Jacobi iteration.
+   */
+  template <typename number2>
+  void step (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Perform one step of the Jacobi iteration.
+   */
+  template <typename number2>
+  void Tstep (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Return the memory allocated in this object.
+   */
+  std::size_t memory_consumption() const;
+};
+
+
+/**
+ * Block Gauss-Seidel method with possibly overlapping blocks.
+ *
+ * This class implements the step() and Tstep() functions expected by the
+ * @ref ConceptRelaxationType "relaxation concept".
+ * They perform a multiplicative Schwarz method on the blocks provided in the
+ * block list of AdditionalData.  Differing from PreconditionBlockSOR, these
+ * blocks may be of varying size, non-contiguous, and overlapping. On the
+ * other hand, this class does not implement the preconditioner interface
+ * expected by Solver objects.
+ *
+ * @ingroup Preconditioners
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template<typename MatrixType, typename inverse_type = typename MatrixType::value_type>
+class RelaxationBlockSOR : public virtual Subscriptor,
+  protected RelaxationBlock<MatrixType, inverse_type>
+{
+public:
+  /**
+   * Default constructor.
+   */
+//    RelaxationBlockSOR();
+
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+  /**
+   * Make type publicly available.
+   */
+  using typename RelaxationBlock<MatrixType,inverse_type>::AdditionalData;
+
+  /**
+   * Make initialization function publicly available.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::initialize;
+
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::clear;
+
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::empty;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::size;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse_householder;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse_svd;
+  using PreconditionBlockBase<inverse_type>::log_statistics;
+  /**
+   * Perform one step of the SOR iteration.
+   */
+  template <typename number2>
+  void step (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Perform one step of the transposed SOR iteration.
+   */
+  template <typename number2>
+  void Tstep (Vector<number2> &dst, const Vector<number2> &rhs) const;
+};
+
+
+/**
+ * Symmetric block Gauss-Seidel method with possibly overlapping blocks.
+ *
+ * This class implements the step() and Tstep() functions expected by the
+ * @ref ConceptRelaxationType "relaxation concept".
+ * They perform a multiplicative Schwarz method on the blocks provided in the
+ * block list of AdditionalData in symmetric fashion. Differing from
+ * PreconditionBlockSSOR, these blocks may be of varying size, non-contiguous,
+ * and overlapping. On the other hand, this class does not implement the
+ * preconditioner interface expected by Solver objects.
+ *
+ * @ingroup Preconditioners
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template<typename MatrixType, typename inverse_type = typename MatrixType::value_type>
+class RelaxationBlockSSOR : public virtual Subscriptor,
+  protected RelaxationBlock<MatrixType, inverse_type>
+{
+public:
+  /**
+   * Define number type of matrix.
+   */
+  typedef typename MatrixType::value_type number;
+
+  /**
+   * Make type publicly available.
+   */
+  using typename RelaxationBlock<MatrixType,inverse_type>::AdditionalData;
+
+  /**
+   * Make initialization function publicly available.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::initialize;
+
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::clear;
+
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::empty;
+
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::size;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse_householder;
+  /**
+   * Make function of base class public again.
+   */
+  using RelaxationBlock<MatrixType, inverse_type>::inverse_svd;
+  using PreconditionBlockBase<inverse_type>::log_statistics;
+  /**
+   * Perform one step of the SOR iteration.
+   */
+  template <typename number2>
+  void step (Vector<number2> &dst, const Vector<number2> &rhs) const;
+
+  /**
+   * Perform one step of the transposed SOR iteration.
+   */
+  template <typename number2>
+  void Tstep (Vector<number2> &dst, const Vector<number2> &rhs) const;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/relaxation_block.templates.h b/include/deal.II/lac/relaxation_block.templates.h
new file mode 100644
index 0000000..ce4174f
--- /dev/null
+++ b/include/deal.II/lac/relaxation_block.templates.h
@@ -0,0 +1,299 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__relaxation_block_templates_h
+#define dealii__relaxation_block_templates_h
+
+#include <deal.II/lac/relaxation_block.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/vector_memory.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename MatrixType, typename inverse_type>
+inline
+RelaxationBlock<MatrixType,inverse_type>::AdditionalData::AdditionalData
+(const double relaxation,
+ const bool   invert_diagonal,
+ const bool   same_diagonal)
+  :
+  relaxation(relaxation),
+  invert_diagonal(invert_diagonal),
+  same_diagonal(same_diagonal),
+  inversion(PreconditionBlockBase<inverse_type>::gauss_jordan),
+  threshold(0.)
+{}
+
+
+template <typename MatrixType, typename inverse_type>
+inline
+std::size_t
+RelaxationBlock<MatrixType,inverse_type>::AdditionalData::memory_consumption() const
+{
+  std::size_t result = sizeof(*this)
+                       + block_list.memory_consumption()
+                       - sizeof(block_list);
+  for (unsigned int i=0; i<order.size(); ++i)
+    result += MemoryConsumption::memory_consumption(order[i]);
+  return result;
+}
+
+
+template <typename MatrixType, typename inverse_type>
+inline
+void
+RelaxationBlock<MatrixType,inverse_type>::initialize (const MatrixType     &M,
+                                                      const AdditionalData &parameters)
+{
+  Assert (parameters.invert_diagonal, ExcNotImplemented());
+
+  clear();
+//  Assert (M.m() == M.n(), ExcNotQuadratic());
+  A = &M;
+  additional_data = ¶meters;
+  this->inversion = parameters.inversion;
+
+  this->reinit(additional_data->block_list.n_rows(), 0, additional_data->same_diagonal,
+               additional_data->inversion);
+
+  if (additional_data->invert_diagonal)
+    invert_diagblocks();
+}
+
+
+template <typename MatrixType, typename inverse_type>
+inline
+void
+RelaxationBlock<MatrixType,inverse_type>::clear ()
+{
+  A = 0;
+  additional_data = 0;
+  PreconditionBlockBase<inverse_type>::clear ();
+}
+
+
+template <typename MatrixType, typename inverse_type>
+inline
+void
+RelaxationBlock<MatrixType,inverse_type>::invert_diagblocks ()
+{
+  const MatrixType &M=*A;
+  FullMatrix<inverse_type> M_cell;
+
+  if (this->same_diagonal())
+    {
+      Assert(false, ExcNotImplemented());
+    }
+  else
+    {
+      for (size_type block=0; block<additional_data->block_list.n_rows(); ++block)
+        {
+          const size_type bs = additional_data->block_list.row_length(block);
+          M_cell.reinit(bs, bs);
+
+          // Copy rows for this block
+          // into the matrix for the
+          // diagonal block
+          SparsityPattern::iterator row
+            = additional_data->block_list.begin(block);
+          for (size_type row_cell=0; row_cell<bs; ++row_cell, ++row)
+            {
+//TODO:[GK] Optimize here
+              for (typename MatrixType::const_iterator entry = M.begin(row->column());
+                   entry != M.end(row->column()); ++entry)
+                {
+                  const size_type column = entry->column();
+                  const size_type col_cell = additional_data->block_list.row_position(block, column);
+                  if (col_cell != numbers::invalid_size_type)
+                    M_cell(row_cell, col_cell) = entry->value();
+                }
+            }
+          // Now M_cell contains the
+          // diagonal block. Now
+          // store it and its
+          // inverse, if so requested.
+          if (this->store_diagonals())
+            {
+              this->diagonal(block).reinit(bs, bs);
+              this->diagonal(block) = M_cell;
+            }
+          switch (this->inversion)
+            {
+            case PreconditionBlockBase<inverse_type>::gauss_jordan:
+              this->inverse(block).reinit(bs, bs);
+              this->inverse(block).invert(M_cell);
+              break;
+            case PreconditionBlockBase<inverse_type>::householder:
+              this->inverse_householder(block).initialize(M_cell);
+              break;
+            case PreconditionBlockBase<inverse_type>::svd:
+              this->inverse_svd(block).reinit(bs, bs);
+              this->inverse_svd(block) = M_cell;
+              this->inverse_svd(block).compute_inverse_svd(additional_data->threshold);
+              break;
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+    }
+  this->inverses_computed(true);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+inline
+void
+RelaxationBlock<MatrixType,inverse_type>::do_step (Vector<number2>       &dst,
+                                                   const Vector<number2> &prev,
+                                                   const Vector<number2> &src,
+                                                   const bool             backward) const
+{
+  Assert (additional_data->invert_diagonal, ExcNotImplemented());
+
+  const MatrixType &M=*this->A;
+  Vector<number2> b_cell, x_cell;
+
+  const bool permutation_empty = additional_data->order.size() == 0;
+  const unsigned int n_permutations = (permutation_empty)
+                                      ? 1U : additional_data->order.size();
+  const size_type n_blocks = additional_data->block_list.n_rows();
+
+  if (!permutation_empty)
+    for (unsigned int i=0; i<additional_data->order.size(); ++i)
+      AssertDimension(additional_data->order[i].size(), this->size());
+
+  for (unsigned int perm=0; perm<n_permutations; ++perm)
+    {
+      for (unsigned int bi=0; bi<n_blocks; ++bi)
+        {
+          const unsigned int raw_block = backward ? (n_blocks - bi - 1) : bi;
+          const unsigned int block = permutation_empty
+                                     ? raw_block
+                                     : (backward
+                                        ? (additional_data->order[n_permutations-1-perm][raw_block])
+                                        : (additional_data->order[perm][raw_block]));
+
+          const size_type bs = additional_data->block_list.row_length(block);
+
+          b_cell.reinit(bs);
+          x_cell.reinit(bs);
+          // Collect off-diagonal parts
+          SparsityPattern::iterator row = additional_data->block_list.begin(block);
+          for (size_type row_cell=0; row_cell<bs; ++row_cell, ++row)
+            {
+              b_cell(row_cell) = src(row->column());
+              for (typename MatrixType::const_iterator entry = M.begin(row->column());
+                   entry != M.end(row->column()); ++entry)
+                b_cell(row_cell) -= entry->value() * prev(entry->column());
+            }
+          // Apply inverse diagonal
+          this->inverse_vmult(block, x_cell, b_cell);
+#ifdef DEBUG
+          for (unsigned int i=0; i<x_cell.size(); ++i)
+            {
+              AssertIsFinite(x_cell(i));
+            }
+#endif
+          // Store in result vector
+          row=additional_data->block_list.begin(block);
+          for (size_type row_cell=0; row_cell<bs; ++row_cell, ++row)
+            dst(row->column()) = prev(row->column()) + additional_data->relaxation * x_cell(row_cell);
+        }
+    }
+}
+
+
+//----------------------------------------------------------------------//
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void RelaxationBlockJacobi<MatrixType,inverse_type>::step
+(Vector<number2>       &dst,
+ const Vector<number2> &src) const
+{
+  GrowingVectorMemory<Vector<number2> > mem;
+  typename VectorMemory<Vector<number2> >::Pointer aux = mem;
+  aux->reinit(dst, false);
+  *aux = dst;
+  this->do_step(dst, *aux, src, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void RelaxationBlockJacobi<MatrixType,inverse_type>::Tstep
+(Vector<number2>       &dst,
+ const Vector<number2> &src) const
+{
+  GrowingVectorMemory<Vector<number2> > mem;
+  typename VectorMemory<Vector<number2> >::Pointer aux = mem;
+  aux->reinit(dst, false);
+  *aux = dst;
+  this->do_step(dst, *aux, src, true);
+}
+
+
+//----------------------------------------------------------------------//
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void RelaxationBlockSOR<MatrixType,inverse_type>::step
+(Vector<number2> &dst,
+ const Vector<number2> &src) const
+{
+  this->do_step(dst, dst, src, false);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void RelaxationBlockSOR<MatrixType,inverse_type>::Tstep
+(Vector<number2>       &dst,
+ const Vector<number2> &src) const
+{
+  this->do_step(dst, dst, src, true);
+}
+
+
+//----------------------------------------------------------------------//
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void RelaxationBlockSSOR<MatrixType,inverse_type>::step
+(Vector<number2>       &dst,
+ const Vector<number2> &src) const
+{
+  this->do_step(dst, dst, src, false);
+  this->do_step(dst, dst, src, true);
+}
+
+
+template <typename MatrixType, typename inverse_type>
+template <typename number2>
+void RelaxationBlockSSOR<MatrixType,inverse_type>::Tstep
+(Vector<number2>       &dst,
+ const Vector<number2> &src) const
+{
+  this->do_step(dst, dst, src, true);
+  this->do_step(dst, dst, src, false);
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif
diff --git a/include/deal.II/lac/schur_complement.h b/include/deal.II/lac/schur_complement.h
new file mode 100644
index 0000000..fcdb7c0
--- /dev/null
+++ b/include/deal.II/lac/schur_complement.h
@@ -0,0 +1,503 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__schur_complement_h
+#define dealii__schur_complement_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/linear_operator.h>
+#include <deal.II/lac/packaged_operation.h>
+
+#ifdef DEAL_II_WITH_CXX11
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * @name Creation of a LinearOperator related to the Schur Complement
+ */
+//@{
+
+/**
+ * @relates LinearOperator
+ *
+ * Returns a LinearOperator that performs the operations associated with the
+ * Schur complement. There are two additional helper functions,
+ * condense_schur_rhs() and postprocess_schur_solution(), that are likely
+ * necessary to be used in order to perform any useful tasks in linear algebra
+ * with this operator.
+ *
+ * We construct the definition of the Schur complement in the following way:
+ *
+ * Consider a general system of linear equations that can be decomposed into
+ * two major sets of equations:
+ * @f{eqnarray*}{
+ * \mathbf{K}\mathbf{d} = \mathbf{f}
+ * \quad \Rightarrow\quad
+ * \left(\begin{array}{cc}
+ *    A & B \\ C & D
+ * \end{array}\right)
+ * \left(\begin{array}{cc}
+ *    x \\ y
+ * \end{array}\right)
+ * =
+ * \left(\begin{array}{cc}
+ *    f \\ g
+ * \end{array}\right),
+ * @f}
+ * where $ A,B,C,D $  represent general subblocks of the matrix $ \mathbf{K} $
+ * and, similarly, general subvectors of $ \mathbf{d},\mathbf{f} $ are given
+ * by $ x,y,f,g $ .
+ *
+ * This is equivalent to the following two statements:
+ * @f{eqnarray*}{
+ *   (1) \quad Ax + By &=& f \\
+ *   (2) \quad Cx + Dy &=& g \quad .
+ * @f}
+ *
+ * Assuming that $ A,D $ are both square and invertible, we could then perform
+ * one of two possible substitutions,
+ * @f{eqnarray*}{
+ *   (3) \quad x &=& A^{-1}(f - By) \quad \text{from} \quad (1) \\
+ *   (4) \quad y &=& D^{-1}(g - Cx) \quad \text{from} \quad (2) ,
+ * @f}
+ * which amount to performing block Gaussian elimination on this system of
+ * equations.
+ *
+ * For the purpose of the current implementation, we choose to substitute (3)
+ * into (2)
+ * @f{eqnarray*}{
+ *   C \: A^{-1}(f - By) + Dy &=& g \\
+ *   -C \: A^{-1} \: By + Dy &=& g - C \: A^{-1} \: f \quad .
+ * @f}
+ * This leads to the result
+ * @f[
+ *   (5) \quad (D - C\: A^{-1} \:B)y  = g - C \: A^{-1} f
+ *       \quad \Rightarrow \quad Sy = g'
+ * @f]
+ * with $ S = (D - C\: A^{-1} \:B) $ being the Schur complement and the
+ * modified right-hand side vector $ g' = g - C \: A^{-1} f $ arising from the
+ * condensation step. Note that for this choice of $ S $, submatrix $ D $ need
+ * not be invertible and may thus be the null matrix. Ideally $ A $ should be
+ * well-conditioned.
+ *
+ * So for any arbitrary vector $ a $, the Schur complement performs the
+ * following operation:
+ * @f[
+ *   (6) \quad Sa = (D - C \: A^{-1} \: B)a
+ * @f]
+ *
+ * A typical set of steps needed the solve a linear system (1),(2) would be:
+ * 1. Define the inverse matrix @p A_inv (using inverse_operator()).
+ * 2. Define the Schur complement $ S $ (using schur_complement()).
+ * 3. Define iterative inverse matrix $ S^{-1} $ such that (6)
+ * holds. It is necessary to use a solver with a preconditioner to compute the
+ * approximate inverse operation of $ S $ since we never compute $ S $
+ * directly, but rather the result of its operation. To achieve this, one may
+ * again use the inverse_operator() in conjunction with the Schur complement
+ * that we've just constructed. Observe that the both $ S $ and its
+ * preconditioner operate over the same space as $ D $.
+ * 4. Perform pre-processing step on the RHS of (5) using
+ * condense_schur_rhs():
+ *    @f[
+ *      g' = g - C \: A^{-1} \: f
+ *    @f]
+ * 5. Solve for $ y $ in (5):
+ *    @f[
+ *      y =  S^{-1} g'
+ *    @f]
+ * 6. Perform the post-processing step from (3) using
+ * postprocess_schur_solution():
+ *    @f[
+ *      x =  A^{-1} (f - By)
+ *    @f]
+ *
+ * An illustration of typical usage of this operator for a fully coupled
+ * system is given below.
+ * @code
+ *    #include<deal.II/lac/schur_complement.h>
+ *
+ *    // Given BlockMatrix K and BlockVectors d,F
+ *
+ *    // Decomposition of tangent matrix
+ *    const auto A = linear_operator(K.block(0,0));
+ *    const auto B = linear_operator(K.block(0,1));
+ *    const auto C = linear_operator(K.block(1,0));
+ *    const auto D = linear_operator(K.block(1,1));
+ *
+ *    // Decomposition of solution vector
+ *    auto x = d.block(0);
+ *    auto y = d.block(1);
+ *
+ *    // Decomposition of RHS vector
+ *    auto f = F.block(0);
+ *    auto g = F.block(1);
+ *
+ *    // Construction of inverse of Schur complement
+ *    const auto prec_A = PreconditionSelector<...>(A);
+ *    const auto A_inv = inverse_operator<...>(A,prec_A);
+ *    const auto S = schur_complement(A_inv,B,C,D);
+ *    const auto S_prec = PreconditionSelector<...>(D); // D and S operate on same space
+ *    const auto S_inv = inverse_operator<...>(S,...,prec_S);
+ *
+ *    // Solve reduced block system
+ *    auto rhs = condense_schur_rhs (A_inv,C,f,g); // PackagedOperation that represents the condensed form of g
+ *    y = S_inv * rhs; // Solve for y
+ *    x = postprocess_schur_solution (A_inv,B,y,f); // Compute x using resolved solution y
+ * @endcode
+ *
+ * In the above example, the preconditioner for $ S $ was defined as the
+ * preconditioner for $ D $, which is valid since they operate on the same
+ * space. However, if $ D $ and $ S $ are too dissimilar, then this may lead
+ * to a large number of solver iterations as $ \text{prec}(D) $ is not a good
+ * approximation for $ S^{-1} $.
+ *
+ * A better preconditioner in such a case would be one that provides a more
+ * representative approximation for $ S^{-1} $. One approach is shown in
+ * step-22, where $ D $ is the null matrix and the preconditioner for $ S^{-1}
+ * $ is derived from the mass matrix over this space.
+ *
+ * From another viewpoint, a similar result can be achieved by first
+ * constructing an object that represents an approximation for $ S $ wherein
+ * expensive operation, namely $ A^{-1} $, is approximated. Thereafter we
+ * construct the approximate inverse operator $ \tilde{S}^{-1} $ which is then
+ * used as the preconditioner for computing $ S^{-1} $.
+ * @code
+ *    // Construction of approximate inverse of Schur complement
+ *    const auto A_inv_approx = linear_operator(preconditioner_A);
+ *    const auto S_approx = schur_complement(A_inv_approx,B,C,D);
+ *    const auto S_approx_prec = PreconditionSelector<...>(D); // D and S_approx operate on same space
+ *    const auto S_inv_approx = inverse_operator(S_approx,...,S_approx_prec); // Inner solver: Typically limited to few iterations using IterationNumberControl
+ *
+ *    // Construction of exact inverse of Schur complement
+ *    const auto S = schur_complement(A_inv,B,C,D);
+ *    const auto S_inv = inverse_operator(S,...,S_inv_approx); // Outer solver
+ *
+ *    // Solve reduced block system
+ *    auto rhs = condense_schur_rhs (A_inv,C,f,g);
+ *    y = S_inv * rhs; // Solve for y
+ *    x = postprocess_schur_solution (A_inv,B,y,f);
+ * @endcode
+ * Note that due to the construction of @c S_inv_approx and subsequently @c
+ * S_inv, there are a pair of nested iterative solvers which could
+ * collectively consume a lot of resources. Therefore care should be taken in
+ * the choices leading to the construction of the iterative inverse_operators.
+ * One might consider the use of a IterationNumberControl (or a similar
+ * mechanism) to limit the number of inner solver iterations. This controls
+ * the accuracy of the approximate inverse operation $ \tilde{S}^{-1} $ which
+ * acts only as the preconditioner for $ S^{-1} $. Furthermore, the
+ * preconditioner to $ \tilde{S}^{-1} $, which in this example is $
+ * \text{prec}(D) $, should ideally be computationally inexpensive.
+ *
+ * However, if an iterative solver based on IterationNumberControl is used as
+ * a preconditioner then the preconditioning operation is not a linear
+ * operation. Here a flexible solver like SolverFGMRES (flexible GMRES) is
+ * best employed as an outer solver in order to deal with the variable
+ * behaviour of the preconditioner. Otherwise the iterative solver can
+ * stagnate somewhere near the tolerance of the preconditioner or generally
+ * behave erratically. Alternatively, using a ReductionControl would ensure
+ * that the preconditioner always solves to the same tolerance, thereby
+ * rendering its behaviour constant.
+ *
+ * Further examples of this functionality can be found in the test-suite, such
+ * as <code>tests/lac/schur_complement_01.cc</code> . The solution of a multi-
+ * component problem (namely step-22) using the schur_complement can be found
+ * in <code>tests/lac/schur_complement_03.cc</code> .
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Jean-Paul Pelteret, Matthias Maier, Martin Kronbichler, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range_1, typename Domain_1,
+          typename Range_2, typename Domain_2>
+LinearOperator<Range_2, Domain_2>
+schur_complement(const LinearOperator<Domain_1, Range_1> &A_inv,
+                 const LinearOperator<Range_1, Domain_2> &B,
+                 const LinearOperator<Range_2, Domain_1> &C,
+                 const LinearOperator<Range_2, Domain_2> &D)
+{
+  LinearOperator<Range_2, Domain_2> return_op;
+
+  return_op.reinit_range_vector = D.reinit_range_vector;
+  return_op.reinit_domain_vector = D.reinit_domain_vector;
+
+  // ensure to have valid computation objects by catching
+  // A_inv,B,C,D by value
+
+  return_op.vmult_add = [A_inv,B,C,D](Range_2 &dst_g, const Domain_2 &src_y)
+  {
+    static GrowingVectorMemory<Range_1>  vector_memory_f;
+    static GrowingVectorMemory<Range_2>  vector_memory_g;
+    static GrowingVectorMemory<Domain_1> vector_memory_x;
+
+    Range_1  &tmp_f = *(vector_memory_f.alloc());
+    Range_2  &tmp_g = *(vector_memory_g.alloc());
+    Domain_1 &tmp_x = *(vector_memory_x.alloc());
+
+    // Reinitialise in context of how they'll be used
+    B.reinit_range_vector(tmp_f, /*bool omit_zeroing_entries =*/ true);
+    A_inv.reinit_range_vector(tmp_x, /*bool omit_zeroing_entries =*/ true);
+    C.reinit_range_vector(tmp_g, /*bool omit_zeroing_entries =*/ true);
+
+    // Need to form dst_g such that dst_g = S*src_y = (D - C*A_inv*B) src_y
+    if (D.is_null_operator == false)
+      D.vmult_add (dst_g, src_y); // dst_g += D*src_y (length y)
+
+    B.vmult (tmp_f, src_y); // tmp_f = B*src_y (length x)
+    try
+      {
+        A_inv.vmult (tmp_x, tmp_f); // tmp_x = A_inv*B*src_y (length x)
+      }
+    catch (...)
+      {
+        AssertThrow(false,
+                    ExcMessage("No convergence in A_inv vmult operation"));
+      }
+    C.vmult (tmp_g, tmp_x); // tmp_g = C*A_inv*B*src_y (length y)
+    dst_g -= tmp_g; // dst_g += D*src_y - C*A_inv*B*src_y
+
+    vector_memory_x.free(&tmp_x);
+    vector_memory_g.free(&tmp_g);
+    vector_memory_f.free(&tmp_f);
+  };
+
+  const auto vmult_add = return_op.vmult_add;
+  return_op.vmult = [vmult_add](Range_2 &dst_g, const Domain_2 &src_y)
+  {
+    dst_g = 0.;
+    vmult_add(dst_g, src_y);
+  };
+
+  return_op.Tvmult_add = [A_inv,B,C,D](Domain_2 &dst_g, const Range_2 &src_y)
+  {
+    static GrowingVectorMemory<Domain_1> vector_memory_f;
+    static GrowingVectorMemory<Domain_2> vector_memory_g;
+    static GrowingVectorMemory<Range_1>  vector_memory_x;
+
+    Domain_1 &tmp_f = *(vector_memory_f.alloc());
+    Domain_2 &tmp_g = *(vector_memory_g.alloc());
+    Range_1  &tmp_x = *(vector_memory_x.alloc());
+
+    // Reinitialise in context of how they'll be used
+    C.reinit_domain_vector(tmp_f, /*bool omit_zeroing_entries =*/ true);
+    A_inv.reinit_domain_vector(tmp_x, /*bool omit_zeroing_entries =*/ true);
+    B.reinit_domain_vector(tmp_g, /*bool omit_zeroing_entries =*/ true);
+
+    // Need to form y such that dst such that dst_g = S*src_y = (D^T - B^T*A_inv^T*C^T) src_y
+    if (D.is_null_operator == false)
+      D.Tvmult_add (dst_g, src_y); // dst_g += D^T*src_y (length y)
+
+    C.Tvmult (tmp_f, src_y); // tmp_f = C^T*src_y (length x)
+    try
+      {
+        A_inv.Tvmult (tmp_x, tmp_f); // tmp_x = A_inv^T*C^T*src_y (length x)
+      }
+    catch (...)
+      {
+        AssertThrow(false,
+                    ExcMessage("No convergence in A_inv Tvmult operation"));
+      }
+    B.Tvmult (tmp_g, tmp_x); // tmp_g = B^T*A_inv^T*C^T*src_y (length y)
+    dst_g -= tmp_g; // dst_g += D^T*src_y - B^T*A_inv^T*C^T*src_y
+
+    vector_memory_x.free(&tmp_x);
+    vector_memory_g.free(&tmp_g);
+    vector_memory_f.free(&tmp_f);
+  };
+
+  const auto Tvmult_add = return_op.Tvmult_add;
+  return_op.Tvmult = [Tvmult_add](Domain_2 &dst_g, const Range_2 &src_y)
+  {
+    dst_g = 0.;
+    Tvmult_add(dst_g, src_y);
+  };
+
+  return return_op;
+}
+
+//@}
+
+
+/**
+ * @name Creation of PackagedOperation objects related to the Schur Complement
+ */
+//@{
+
+/**
+ * @relates PackagedOperation
+ *
+ * For the system of equations
+ * @f{eqnarray*}{
+ *   Ax + By &=& f \\
+ *   Cx + Dy &=& g \quad ,
+ * @f}
+ * this operation performs the pre-processing (condensation) step on the RHS
+ * subvector @p g so that the Schur complement can be used to solve this
+ * system of equations. More specifically, it produces an object that
+ * represents the condensed form of the subvector @p g, namely
+ * @f[
+ *   g' = g - C \: A^{-1} \: f
+ * @f]
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Jean-Paul Pelteret, Matthias Maier, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range_1, typename Domain_1,
+          typename Range_2>
+PackagedOperation<Range_2>
+condense_schur_rhs (const LinearOperator<Range_1, Domain_1> &A_inv,
+                    const LinearOperator<Range_2, Domain_1> &C,
+                    const Range_1                           &f,
+                    const Range_2                           &g)
+{
+  PackagedOperation<Range_2> return_comp;
+
+  return_comp.reinit_vector = C.reinit_range_vector;
+
+  // ensure to have valid computation objects by catching
+  // A_inv,C,f,g by value
+
+  return_comp.apply_add = [A_inv,C,f,g](Range_2 &g_star)
+  {
+
+    static GrowingVectorMemory<Range_1> vector_memory_f;
+    static GrowingVectorMemory<Range_2> vector_memory_g;
+
+    Range_1 &tmp_f1 = *(vector_memory_f.alloc());
+    Range_2 &tmp_g1 = *(vector_memory_g.alloc());
+    Range_2 &tmp_g2 = *(vector_memory_g.alloc());
+
+    // Reinitialise in context of how they'll be used
+    A_inv.reinit_range_vector(tmp_f1, /*bool omit_zeroing_entries =*/ true);
+    C.reinit_range_vector(tmp_g1, /*bool omit_zeroing_entries =*/ true);
+
+    // Condensation on RHS of one field
+    // Need to form g* such that g* = g - C*A_inv*f
+    try
+      {
+        A_inv.vmult(tmp_f1, f); // tmp_f1 = A_inv * f
+      }
+    catch (...)
+      {
+        AssertThrow(false,
+                    ExcMessage("No convergence in A_inv vmult operation"));
+      }
+    C.vmult(tmp_g1, tmp_f1); // tmp2 = C * A_inv * f
+
+    g_star += g;
+    g_star -= tmp_g1; // tmp_g2 = g - C * A_inv * f
+
+    vector_memory_g.free(&tmp_g2);
+    vector_memory_g.free(&tmp_g1);
+    vector_memory_f.free(&tmp_f1);
+  };
+
+  const auto apply_add = return_comp.apply_add;
+  return_comp.apply = [apply_add](Range_2 &g_star)
+  {
+    g_star = 0.;
+    apply_add(g_star);
+  };
+
+  return return_comp;
+}
+
+/**
+ * @relates PackagedOperation
+ *
+ * For the system of equations
+ * @f{eqnarray*}{
+ *   Ax + By &=& f \\
+ *   Cx + Dy &=& g \quad ,
+ * @f}
+ * this operation performs the post-processing step of the Schur complement to
+ * solve for the second subvector @p x once subvector @p y is known, with the
+ * result that
+ * @f[
+ *   x =  A^{-1}(f - By)
+ * @f]
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Jean-Paul Pelteret, Matthias Maier, 2015
+ *
+ * @ingroup LAOperators
+ */
+template <typename Range_1, typename Domain_1,
+          typename Domain_2>
+PackagedOperation<Domain_1>
+postprocess_schur_solution (const LinearOperator<Range_1, Domain_1> &A_inv,
+                            const LinearOperator<Range_1, Domain_2> &B,
+                            const Domain_2                          &y,
+                            const Range_1                           &f)
+{
+  PackagedOperation<Domain_1> return_comp;
+
+  return_comp.reinit_vector = A_inv.reinit_domain_vector;
+
+  // ensure to have valid computation objects by catching
+  // A_inv,B,y,f by value
+
+  return_comp.apply_add = [A_inv,B,y,f](Domain_1 &x)
+  {
+    static GrowingVectorMemory<Range_1> vector_memory_f;
+
+    Range_1 &tmp_f1 = *(vector_memory_f.alloc());
+    Range_1 &tmp_f2 = *(vector_memory_f.alloc());
+
+    // Reinitialise in context of how they'll be used
+    B.reinit_range_vector(tmp_f1, /*bool omit_zeroing_entries =*/ true);
+
+    // Solve for second field
+    // Need to form x such that x = A_inv*(f - B*y)
+    B.vmult(tmp_f1, y); // tmp_f1 = B*y
+    tmp_f2 = f;
+    tmp_f2 -= tmp_f1; // tmp_f2 = f - B*y
+    try
+      {
+        A_inv.vmult_add(x, tmp_f2); // x = A_inv*(f-B*y)
+      }
+    catch (...)
+      {
+        AssertThrow(false,
+                    ExcMessage("No convergence in A_inv vmult operation"));
+      }
+
+    vector_memory_f.free(&tmp_f2);
+    vector_memory_f.free(&tmp_f1);
+  };
+
+  const auto apply_add = return_comp.apply_add;
+  return_comp.apply = [apply_add](Domain_1 &x)
+  {
+    x = 0.;
+    apply_add(x);
+  };
+
+  return return_comp;
+}
+
+//@}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_CXX11
+#endif
diff --git a/include/deal.II/lac/schur_matrix.h b/include/deal.II/lac/schur_matrix.h
new file mode 100644
index 0000000..8f09828
--- /dev/null
+++ b/include/deal.II/lac/schur_matrix.h
@@ -0,0 +1,344 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__schur_matrix_h
+#define dealii__schur_matrix_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/block_vector.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+/**
+ * Schur complement of a block matrix.
+ *
+ * Given a non-singular matrix @p A (often positive definite) and a positive
+ * semi-definite matrix @p C as well as matrices @p B and @p Dt of full rank,
+ * this class implements a new matrix, the Schur complement a the system of
+ * equations of the structure
+ *
+ * @verbatim
+ * /        \  /   \     /   \
+ * |  A  Dt |  | u |  -  | f |
+ * | -B  C  |  | p |  -  | g |
+ * \        /  \   /     \   /
+ * @endverbatim
+ *
+ * Multiplication with the Schur matrix @p S is the operation
+ * @verbatim
+ * S p = C p + B A-inverse Dt-transpose p,
+ * @endverbatim
+ * which is an operation within the space for @p p.
+ *
+ * The data handed to the Schur matrix are as follows:
+ *
+ * @p A: the inverse of @p A is stored, instead of @p A. This allows the
+ * application to use the most efficient form of inversion, iterative or
+ * direct.
+ *
+ * @p B, @p C: these matrices are stored "as is".
+ *
+ * @p Dt: the computation of the Schur complement involves the function @p
+ * Tvmult of the matrix @p Dt, not @p vmult! This way, it is sufficient to
+ * build only one matrix @p B for the symmetric Schur complement and use it
+ * twice.
+ *
+ * All matrices involved are of arbitrary type and vectors are BlockVectors.
+ * This way, @p SchurMatrix can be coupled with any matrix classes providing
+ * @p vmult and @p Tvmult and can be even nested. Since SmartPointers of
+ * matrices are stored, the matrix blocks should be derived from Subscriptor.
+ *
+ * Since the Schur complement of a matrix corresponds to a Gaussian block
+ * elimination, the right hand side of the condensed system must be
+ * preprocessed. Furthermore, the eliminated variable must be reconstructed
+ * after solving.
+ *
+ * @verbatim
+ *   g = g + B A-inverse f
+ *   u = A-inverse (f - D-transpose p)
+ * @endverbatim
+ *
+ * Applying these transformations, the solution of the system above by a @p
+ * SchurMatrix @p schur is coded as follows:
+ *
+ * @code
+ *   schur.prepare_rhs (g, f);
+ *   solver.solve (schur, p, g, precondition);
+ *   schur.postprocess (u, p);
+ * @endcode
+ *
+ * @see
+ * @ref GlossBlockLA "Block (linear algebra)"
+ * @author Guido Kanschat, 2000, 2001, 2002
+ */
+template <class MA_inverse, class MB, class MDt, class MC>
+class SchurMatrix : public Subscriptor
+{
+public:
+
+  /**
+   * Constructor. This constructor receives all the matrices needed.
+   * Furthermore, it gets a reference to a memory structure for obtaining
+   * block vectors.
+   *
+   * Optionally, the length of the @p u-vector can be provided.
+   *
+   * For the meaning of the matrices see the class documentation.
+   */
+  SchurMatrix(const MA_inverse &Ainv,
+              const MB &B,
+              const MDt &Dt,
+              const MC &C,
+              VectorMemory<BlockVector<double> > &mem,
+              const std::vector<types::global_dof_index> &signature = std::vector<types::global_dof_index>(0));
+
+  /**
+   * Do block elimination of the right hand side. Given right hand sides for
+   * both components of the block system, this function provides the right
+   * hand side for the Schur complement.
+   *
+   * The result is stored in the first argument, which is also part of the
+   * input data. If it is necessary to conserve the data, @p dst must be
+   * copied before calling this function. This is reasonable, since in many
+   * cases, only the pre-processed right hand side is needed.
+   */
+  void prepare_rhs (BlockVector<double> &dst,
+                    const BlockVector<double> &src) const;
+
+  /**
+   * Multiplication with the Schur complement.
+   */
+  void vmult (BlockVector<double> &dst,
+              const BlockVector<double> &src) const;
+
+//  void Tmult(BlockVector<double>& dst, const BlockVector<double>& src) const;
+
+  /**
+   * Computation of the residual of the Schur complement.
+   */
+  double residual (BlockVector<double> &dst,
+                   const BlockVector<double> &src,
+                   const BlockVector<double> &rhs) const;
+
+  /**
+   * Compute the eliminated variable from the solution of the Schur complement
+   * problem.
+   */
+  void postprocess (BlockVector<double> &dst,
+                    const BlockVector<double> &src,
+                    const BlockVector<double> &rhs) const;
+
+  /**
+   * Select debugging information for log-file.  Debug level 1 is defined and
+   * writes the norm of every vector before and after each operation. Debug
+   * level 0 turns off debugging information.
+   */
+  void debug_level(unsigned int l);
+private:
+  /**
+   * No copy constructor.
+   */
+  SchurMatrix (const SchurMatrix<MA_inverse, MB, MDt, MC> &);
+  /**
+   * No assignment.
+   */
+  SchurMatrix &operator = (const SchurMatrix<MA_inverse, MB, MDt, MC> &);
+
+  /**
+   * Pointer to inverse of upper left block.
+   */
+  const SmartPointer<const MA_inverse,SchurMatrix<MA_inverse,MB,MDt,MC> > Ainv;
+  /**
+   * Pointer to lower left block.
+   */
+  const SmartPointer<const MB,SchurMatrix<MA_inverse,MB,MDt,MC> > B;
+  /**
+   * Pointer to transpose of upper right block.
+   */
+  const SmartPointer<const MDt,SchurMatrix<MA_inverse,MB,MDt,MC> > Dt;
+  /**
+   * Pointer to lower right block.
+   */
+  const SmartPointer<const MC,SchurMatrix<MA_inverse,MB,MDt,MC> > C;
+  /**
+   * Auxiliary memory for vectors.
+   */
+  VectorMemory<BlockVector<double> > &mem;
+
+  /**
+   * Optional signature of the @p u-vector.
+   */
+  std::vector<types::global_dof_index> signature;
+
+  /**
+   * Switch for debugging information.
+   */
+  unsigned int debug;
+};
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+template <class MA_inverse, class MB, class MDt, class MC>
+SchurMatrix<MA_inverse, MB, MDt, MC>
+::SchurMatrix(const MA_inverse &Ainv,
+              const MB &B,
+              const MDt &Dt,
+              const MC &C,
+              VectorMemory<BlockVector<double> > &mem,
+              const std::vector<types::global_dof_index> &signature)
+  : Ainv(&Ainv), B(&B), Dt(&Dt), C(&C),
+    mem(mem),
+    signature(signature),
+    debug(0)
+{
+}
+
+
+template <class MA_inverse, class MB, class MDt, class MC>
+void
+SchurMatrix<MA_inverse, MB, MDt, MC>
+::debug_level(unsigned int l)
+{
+  debug = l;
+}
+
+
+template <class MA_inverse, class MB, class MDt, class MC>
+void SchurMatrix<MA_inverse, MB, MDt, MC>
+::vmult(BlockVector<double> &dst,
+        const BlockVector<double> &src) const
+{
+  deallog.push("Schur");
+  if (debug > 0)
+    deallog << "src:" << src.l2_norm() << std::endl;
+
+  C->vmult(dst, src);
+  if (debug > 0)
+    deallog << "C:" << dst.l2_norm() << std::endl;
+
+  BlockVector<double> *h1 = mem.alloc();
+  if (signature.size()>0)
+    h1->reinit(signature);
+  else
+    h1->reinit(B->n_block_cols(), src.block(0).size());
+  Dt->Tvmult(*h1,src);
+  if (debug > 0)
+    deallog << "Dt:" << h1->l2_norm() << std::endl;
+
+  BlockVector<double> *h2 = mem.alloc();
+  h2->reinit(*h1);
+  Ainv->vmult(*h2, *h1);
+  if (debug > 0)
+    deallog << "Ainverse:" << h2->l2_norm() << std::endl;
+
+  mem.free(h1);
+  B->vmult_add(dst, *h2);
+  if (debug > 0)
+    deallog << "dst:" << dst.l2_norm() << std::endl;
+
+  mem.free(h2);
+  deallog.pop();
+}
+
+
+template <class MA_inverse, class MB, class MDt, class MC>
+double SchurMatrix<MA_inverse, MB, MDt, MC>
+::residual(BlockVector<double> &dst,
+           const BlockVector<double> &src,
+           const BlockVector<double> &rhs) const
+{
+  vmult(dst, src);
+  dst *= -1.;
+  dst += rhs;
+  return dst.l2_norm();
+}
+
+
+template <class MA_inverse, class MB, class MDt, class MC>
+void SchurMatrix<MA_inverse, MB, MDt, MC>
+::prepare_rhs(BlockVector<double> &dst,
+              const BlockVector<double> &src) const
+{
+  Assert (src.n_blocks() == B->n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), B->n_block_cols()));
+  Assert (dst.n_blocks() == B->n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), B->n_block_rows()));
+
+  deallog.push("Schur-prepare");
+  if (debug > 0)
+    deallog << "src:" << src.l2_norm() << std::endl;
+  BlockVector<double> *h1 = mem.alloc();
+  if (signature.size()>0)
+    h1->reinit(signature);
+  else
+    h1->reinit(B->n_block_cols(), src.block(0).size());
+  Ainv->vmult(*h1, src);
+  if (debug > 0)
+    deallog << "Ainverse:" << h1->l2_norm() << std::endl;
+  B->vmult_add(dst, *h1);
+  if (debug > 0)
+    deallog << "dst:" << dst.l2_norm() << std::endl;
+  mem.free(h1);
+  deallog.pop();
+}
+
+
+template <class MA_inverse, class MB, class MDt, class MC>
+void SchurMatrix<MA_inverse, MB, MDt, MC>
+::postprocess(BlockVector<double> &dst,
+              const BlockVector<double> &src,
+              const BlockVector<double> &rhs) const
+{
+  Assert (dst.n_blocks() == B->n_block_cols(),
+          ExcDimensionMismatch(dst.n_blocks(), B->n_block_cols()));
+  Assert (rhs.n_blocks() == B->n_block_cols(),
+          ExcDimensionMismatch(rhs.n_blocks(), B->n_block_cols()));
+  Assert (src.n_blocks() == B->n_block_rows(),
+          ExcDimensionMismatch(src.n_blocks(), B->n_block_rows()));
+
+  deallog.push("Schur-post");
+  if (debug > 0)
+    deallog << "src:" << src.l2_norm() << std::endl;
+  BlockVector<double> *h1 = mem.alloc();
+  if (signature.size()>0)
+    h1->reinit(signature);
+  else
+    h1->reinit(B->n_block_cols(), src.block(0).size());
+  Dt->Tvmult(*h1, src);
+  if (debug > 0)
+    deallog << "Dt:" << h1->l2_norm() << std::endl;
+  h1->sadd(-1.,rhs);
+  Ainv->vmult(dst,*h1);
+  if (debug > 0)
+    deallog << "dst:" << dst.l2_norm() << std::endl;
+  mem.free(h1);
+  deallog.pop();
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/shifted_matrix.h b/include/deal.II/lac/shifted_matrix.h
new file mode 100644
index 0000000..6cc0c69
--- /dev/null
+++ b/include/deal.II/lac/shifted_matrix.h
@@ -0,0 +1,279 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__shifted_matrix_h
+#define dealii__shifted_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Matrix2
+ *@{
+ */
+
+/**
+ * Matrix with shifted diagonal values.
+ *
+ * Given a matrix <tt>A</tt>, this class implements a matrix-vector product
+ * with <i>A+s I</i>, where <i>s</i> is a provided shift parameter.
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @author Guido Kanschat, 2000, 2001
+ */
+template<typename MatrixType>
+class ShiftedMatrix
+{
+public:
+  /**
+   * Constructor.  Provide the base matrix and a shift parameter.
+   */
+  ShiftedMatrix (const MatrixType &A, const double sigma);
+
+  /**
+   * Set the shift parameter.
+   */
+  void shift (const double sigma);
+
+  /**
+   * Access to the shift parameter.
+   */
+  double shift () const;
+
+  /**
+   * Matrix-vector-product.
+   */
+  template <typename VectorType>
+  void vmult (VectorType &dst, const VectorType &src) const;
+
+  /**
+   * Residual.
+   */
+  template <typename VectorType>
+  double residual (VectorType &dst, const VectorType &src, const VectorType &rhs) const;
+
+private:
+  /**
+   * Storage for base matrix.
+   */
+  SmartPointer<const MatrixType,ShiftedMatrix<MatrixType> > A;
+
+  /**
+   * Auxiliary vector.
+   */
+  //    VectorType aux;
+  /**
+   * Shift parameter.
+   */
+  double sigma;
+};
+
+
+
+/**
+ * Matrix with shifted diagonal values with respect to a certain scalar
+ * product.
+ *
+ * Given a matrix <tt>A</tt>, this class implements a matrix-vector product
+ * with <i>A+s M</i>, where <i>s</i> is a provided shift parameter and
+ * <tt>M</tt> is the matrix representing the identity
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @author Guido Kanschat, 2001
+ */
+template<typename MatrixType, class MatrixType2, class VectorType>
+class ShiftedMatrixGeneralized
+{
+public:
+  /**
+   * Constructor. Provide the base matrix and a shift parameter.
+   */
+  ShiftedMatrixGeneralized (const MatrixType  &A,
+                            const MatrixType2 &M,
+                            const double       sigma);
+
+  /**
+   * Set the shift parameter.
+   */
+  void shift (const double sigma);
+
+  /**
+   * Access to the shift parameter.
+   */
+  double shift () const;
+
+  /**
+   * Matrix-vector-product.
+   */
+  void vmult (VectorType &dst, const VectorType &src) const;
+
+  /**
+   * Residual.
+   */
+  double residual (VectorType &dst, const VectorType &src, const VectorType &rhs) const;
+
+private:
+  /**
+   * Storage for base matrix.
+   */
+  SmartPointer<const MatrixType,ShiftedMatrixGeneralized<MatrixType,MatrixType2,VectorType> > A;
+  /**
+   * Storage for mass matrix.
+   */
+  SmartPointer<const MatrixType2,ShiftedMatrixGeneralized<MatrixType,MatrixType2,VectorType> > M;
+
+  /**
+   * Auxiliary vector.
+   */
+  mutable VectorType aux;
+
+  /**
+   * Shift parameter.
+   */
+  double sigma;
+};
+
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+template <typename MatrixType>
+inline
+ShiftedMatrix<MatrixType>::ShiftedMatrix (const MatrixType &A,
+                                          const double      sigma)
+  :
+  A(&A), sigma(sigma)
+{}
+
+
+
+template <typename MatrixType>
+inline void
+ShiftedMatrix<MatrixType>::shift (const double s)
+{
+  sigma = s;
+}
+
+
+template <typename MatrixType>
+inline double
+ShiftedMatrix<MatrixType>::shift () const
+{
+  return sigma;
+}
+
+
+
+template <typename MatrixType>
+template <class VectorType>
+inline void
+ShiftedMatrix<MatrixType>::vmult (VectorType &dst, const VectorType &src) const
+{
+  A->vmult(dst, src);
+  if (sigma != 0.)
+    dst.add(sigma, src);
+}
+
+
+template <typename MatrixType>
+template <class VectorType>
+inline double
+ShiftedMatrix<MatrixType>::residual (VectorType       &dst,
+                                     const VectorType &src,
+                                     const VectorType &rhs) const
+{
+  A->vmult(dst, src);
+  if (sigma != 0.)
+    dst.add(sigma, src);
+  dst.sadd(-1.,1.,rhs);
+  return dst.l2_norm ();
+}
+
+
+//---------------------------------------------------------------------------
+template <typename MatrixType, class MatrixType2, class VectorType>
+inline
+ShiftedMatrixGeneralized<MatrixType, MatrixType2, VectorType>
+::ShiftedMatrixGeneralized (const MatrixType  &A,
+                            const MatrixType2 &M,
+                            const double       sigma)
+  :
+  A(&A), M(&M), sigma(sigma)
+{}
+
+
+template <typename MatrixType, class MatrixType2, class VectorType>
+inline void
+ShiftedMatrixGeneralized<MatrixType, MatrixType2, VectorType>::shift (const double s)
+{
+  sigma = s;
+}
+
+template <typename MatrixType, class MatrixType2, class VectorType>
+inline double
+ShiftedMatrixGeneralized<MatrixType, MatrixType2, VectorType>::shift () const
+{
+  return sigma;
+}
+
+
+template <typename MatrixType, class MatrixType2, class VectorType>
+inline void
+ShiftedMatrixGeneralized<MatrixType, MatrixType2, VectorType>::vmult
+(VectorType       &dst,
+ const VectorType &src) const
+{
+  A->vmult(dst, src);
+  if (sigma != 0.)
+    {
+      aux.reinit(dst);
+      M->vmult(aux, src);
+      dst.add(sigma, aux);
+    }
+}
+
+
+template <typename MatrixType, class MatrixType2, class VectorType>
+inline double
+ShiftedMatrixGeneralized<MatrixType, MatrixType2, VectorType>::residual
+(VectorType       &dst,
+ const VectorType &src,
+ const VectorType &rhs) const
+{
+  A->vmult(dst, src);
+  if (sigma != 0.)
+    {
+      aux.reinit(dst);
+      M->vmult(aux, src);
+      dst.add(sigma, aux);
+    }
+  dst.sadd(-1.,1.,rhs);
+  return dst.l2_norm ();
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/slepc_solver.h b/include/deal.II/lac/slepc_solver.h
new file mode 100644
index 0000000..1120c9f
--- /dev/null
+++ b/include/deal.II/lac/slepc_solver.h
@@ -0,0 +1,825 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__slepc_solver_h
+#define dealii__slepc_solver_h
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_SLEPC
+
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/solver_control.h>
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <deal.II/lac/slepc_spectral_transformation.h>
+
+#  include <petscconf.h>
+#  include <petscksp.h>
+#  include <slepceps.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Base namespace for solver classes using the SLEPc solvers which are
+ * selected based on flags passed to the eigenvalue problem solver context.
+ * Derived classes set the right flags to set the right solver.
+ *
+ * The SLEPc solvers are intended to be used for solving the generalized
+ * eigenspectrum problem $(A-\lambda B)x=0$, for $x\neq0$; where $A$ is a
+ * system matrix, $B$ is a mass matrix, and $\lambda, x$ are a set of
+ * eigenvalues and eigenvectors respectively. The emphasis is on methods and
+ * techniques appropriate for problems in which the associated matrices are
+ * sparse. Most of the methods offered by the SLEPc library are projection
+ * methods or other methods with similar properties; and wrappers are provided
+ * to interface to SLEPc solvers that handle both of these problem sets.
+ *
+ * SLEPcWrappers can be implemented in application codes in the following way:
+ * @code
+ *  SolverControl solver_control (1000, 1e-9);
+ *  SolverArnoldi system (solver_control, mpi_communicator);
+ *  system.solve (A, B, lambda, x, size_of_spectrum);
+ * @endcode
+ * for the generalized eigenvalue problem $Ax=B\lambda x$, where the variable
+ * <code>const unsigned int size_of_spectrum</code> tells SLEPc the number of
+ * eigenvector/eigenvalue pairs to solve for. Additional options and solver
+ * parameters can be passed to the SLEPc solvers before calling
+ * <code>solve()</code>. For example, if the matrices of the general
+ * eigenspectrum problem are not hermitian and the lower eigenvalues are
+ * wanted only, the following code can be implemented before calling
+ * <code>solve()</code>:
+ * @code
+ *  system.set_problem_type (EPS_NHEP);
+ *  system.set_which_eigenpairs (EPS_SMALLEST_REAL);
+ * @endcode
+ * These options can also be set at the command line.
+ *
+ * See also <code>step-36</code> for a hands-on example.
+ *
+ * For cases when spectral transformations are used in conjunction with
+ * Krylov-type solvers or Davidson-type eigensolvers are employed one can
+ * additionally specify which linear solver and preconditioner to use. This
+ * can be achieved as follows
+ * @code
+ *   PETScWrappers::PreconditionBoomerAMG::AdditionalData data;
+ *   data.symmetric_operator = true;
+ *   PETScWrappers::PreconditionBoomerAMG preconditioner(mpi_communicator, data);
+ *   SolverControl linear_solver_control (dof_handler.n_dofs(), 1e-12,false,false);
+ *   PETScWrappers::SolverCG  linear_solver(linear_solver_control,mpi_communicator);
+ *   linear_solver.initialize(preconditioner);
+ *   SolverControl solver_control (100, 1e-9,false,false);
+ *   SLEPcWrappers::SolverKrylovSchur eigensolver(solver_control,mpi_communicator);
+ *   SLEPcWrappers::TransformationShift spectral_transformation(mpi_communicator);
+ *   spectral_transformation.set_solver(linear_solver);
+ *   eigensolver.set_transformation(spectral_transformation);
+ *   eigensolver.solve (stiffness_matrix,mass_matrix,eigenvalues,eigenfunctions,eigenfunctions.size());
+ * @endcode
+ *
+ * In order to support this usage case, different from PETSc wrappers, the
+ * classes in this namespace are written in such a way that the underlying
+ * SLEPc objects are initialized in constructors. By doing so one also avoid
+ * caching of different settings (such as target eigenvalue or type of the
+ * problem); instead those are applied straight away when the corresponding
+ * functions of the wrapper classes are called.
+ *
+ * An alternative implementation to the one above is to use the API internals
+ * directly within the application code. In this way the calling sequence
+ * requires calling several of SolverBase functions rather than just one. This
+ * freedom is intended for use of the SLEPcWrappers that require a greater
+ * handle on the eigenvalue problem solver context. See also the API of, for
+ * example:
+ * @code
+ * template <typename OutputVector>
+ * void
+ * SolverBase::solve (const PETScWrappers::MatrixBase &A,
+ *                   const PETScWrappers::MatrixBase &B,
+ *                   std::vector<PetscScalar>        &eigenvalues,
+ *                   std::vector<OutputVector>       &eigenvectors,
+ *                   const unsigned int               n_eigenpairs)
+ * { ... }
+ * @endcode
+ * as an example on how to do this.
+ *
+ * For further information and explanations on handling the
+ * @ref SLEPcWrappers "SLEPcWrappers",
+ * see also the
+ * @ref PETScWrappers "PETScWrappers",
+ * on which they depend.
+ *
+ * @ingroup SLEPcWrappers
+ *
+ * @author Toby D. Young 2008, 2009, 2010, 2011, 2013; and Rickard Armiento
+ * 2008; and Denis Davydov 2015.
+ *
+ * @note Various tweaks and enhancements contributed by Eloy Romero and Jose
+ * E. Roman 2009, 2010.
+ */
+namespace SLEPcWrappers
+{
+
+  /**
+   * Base class for solver classes using the SLEPc solvers. Since solvers in
+   * SLEPc are selected based on flags passed to a generic solver object,
+   * basically all the actual solver calls happen in this class, and derived
+   * classes simply set the right flags to select one solver or another, or to
+   * set certain parameters for individual solvers.
+   */
+  class SolverBase
+  {
+  public:
+    /**
+     * Constructor. Takes the MPI communicator over which parallel
+     * computations are to happen.
+     */
+    SolverBase (SolverControl &cn,
+                const MPI_Comm &mpi_communicator);
+
+    /**
+     * Destructor.
+     */
+    virtual ~SolverBase ();
+
+    /**
+     * Composite method that solves the eigensystem $Ax=\lambda x$. The
+     * eigenvector sent in has to have at least one element that we can use as
+     * a template when resizing, since we do not know the parameters of the
+     * specific vector class used (i.e. local_dofs for MPI vectors). However,
+     * while copying eigenvectors, at least twice the memory size of
+     * <tt>eigenvectors</tt> is being used (and can be more). To avoid doing
+     * this, the fairly standard calling sequence executed here is used: Set
+     * up matrices for solving; Actually solve the system; Gather the
+     * solution(s).
+     *
+     * @note Note that the number of converged eigenvectors can be larger than
+     * the number of eigenvectors requested; this is due to a round off error
+     * (success) of the eigenproblem solver context. If this is found to be
+     * the case we simply do not bother with more eigenpairs than requested,
+     * but handle that it may be more than specified by ignoring any extras.
+     * By default one eigenvector/eigenvalue pair is computed.
+     */
+    template <typename OutputVector>
+    void
+    solve (const PETScWrappers::MatrixBase &A,
+           std::vector<PetscScalar>        &eigenvalues,
+           std::vector<OutputVector>       &eigenvectors,
+           const unsigned int               n_eigenpairs = 1);
+
+    /**
+     * Same as above, but here a composite method for solving the system $A
+     * x=\lambda B x$, for real matrices, vectors, and values $A, B, x,
+     * \lambda$.
+     */
+    template <typename OutputVector>
+    void
+    solve (const PETScWrappers::MatrixBase &A,
+           const PETScWrappers::MatrixBase &B,
+           std::vector<PetscScalar>        &eigenvalues,
+           std::vector<OutputVector>       &eigenvectors,
+           const unsigned int               n_eigenpairs = 1);
+
+    /**
+     * Same as above, but here a composite method for solving the system $A
+     * x=\lambda B x$ with real matrices $A, B$ and imaginary eigenpairs $x,
+     * \lambda$.
+     */
+    template <typename OutputVector>
+    void
+    solve (const PETScWrappers::MatrixBase &A,
+           const PETScWrappers::MatrixBase &B,
+           std::vector<double>             &real_eigenvalues,
+           std::vector<double>             &imag_eigenvalues,
+           std::vector<OutputVector>       &real_eigenvectors,
+           std::vector<OutputVector>       &imag_eigenvectors,
+           const unsigned int               n_eigenpairs = 1);
+
+    /**
+     * Set the initial vector for the solver.
+     */
+    void
+    set_initial_vector
+    (const PETScWrappers::VectorBase &this_initial_vector) DEAL_II_DEPRECATED;
+
+    /**
+     * Set the initial vector space for the solver.
+     *
+     * By default, SLEPc initializes the starting vector or the initial
+     * subspace randomly.
+     */
+    template <typename Vector>
+    void
+    set_initial_space
+    (const std::vector<Vector> &initial_space);
+
+    /**
+     * Set the spectral transformation to be used.
+     */
+    void
+    set_transformation (SLEPcWrappers::TransformationBase &this_transformation);
+
+    /**
+     * Set target eigenvalues in the spectrum to be computed. By default, no
+     * target is set.
+     */
+    void
+    set_target_eigenvalue (const PetscScalar &this_target);
+
+    /**
+     * Indicate which part of the spectrum is to be computed. By default
+     * largest magnitude eigenvalues are computed.
+     *
+     * @note For other allowed values see the SLEPc documentation.
+     */
+    void
+    set_which_eigenpairs (EPSWhich set_which);
+
+    /**
+     * Specify the type of the eigenspectrum problem. This can be used to
+     * exploit known symmetries of the matrices that make up the
+     * standard/generalized eigenspectrum problem.  By default a non-Hermitian
+     * problem is assumed.
+     *
+     * @note For other allowed values see the SLEPc documentation.
+     */
+    void
+    set_problem_type (EPSProblemType set_problem);
+
+    /**
+     * Take the information provided from SLEPc and checks it against
+     * deal.II's own SolverControl objects to see if convergence has been
+     * reached.
+     */
+    void
+    get_solver_state (const SolverControl::State state);
+
+    /**
+     * Exception. Standard exception.
+     */
+    DeclException0 (ExcSLEPcWrappersUsageError);
+
+    /**
+     * Exception. SLEPc error with error number.
+     */
+    DeclException1 (ExcSLEPcError,
+                    int,
+                    << "    An error with error number " << arg1
+                    << " occurred while calling a SLEPc function");
+
+    /**
+     * Exception. Convergence failure on the number of eigenvectors.
+     */
+    DeclException2 (ExcSLEPcEigenvectorConvergenceMismatchError,
+                    int, int,
+                    << "    The number of converged eigenvectors is " << arg1
+                    << " but " << arg2 << " were requested. ");
+
+    /**
+     * Access to the object that controls convergence.
+     */
+    SolverControl &control () const;
+
+  protected:
+
+    /**
+     * Reference to the object that controls convergence of the iterative
+     * solver.
+     */
+    SolverControl &solver_control;
+
+    /**
+     * Copy of the MPI communicator object to be used for the solver.
+     */
+    const MPI_Comm mpi_communicator;
+
+    /**
+     * Solve the linear system for <code>n_eigenpairs</code> eigenstates.
+     * Parameter <code>n_converged</code> contains the actual number of
+     * eigenstates that have  converged; this can be both fewer or more than
+     * n_eigenpairs, depending on the SLEPc eigensolver used.
+     */
+    void
+    solve (const unsigned int n_eigenpairs,
+           unsigned int *n_converged);
+
+    /**
+     * Access the real parts of solutions for a solved eigenvector problem,
+     * pair index solutions, $\text{index}\,\in\,0\hdots
+     * \text{n\_converged}-1$.
+     */
+    void
+    get_eigenpair (const unsigned int         index,
+                   PetscScalar               &eigenvalues,
+                   PETScWrappers::VectorBase &eigenvectors);
+
+    /**
+     * Access the real and imaginary parts of solutions for a solved
+     * eigenvector problem, pair index solutions, $\text{index}\,\in\,0\hdots
+     * \text{n\_converged}-1$.
+     */
+    void
+    get_eigenpair (const unsigned int         index,
+                   double                    &real_eigenvalues,
+                   double                    &imag_eigenvalues,
+                   PETScWrappers::VectorBase &real_eigenvectors,
+                   PETScWrappers::VectorBase &imag_eigenvectors);
+
+    /**
+     * Initialize solver for the linear system $Ax=\lambda x$. (Note: this is
+     * required before calling solve ())
+     */
+    void
+    set_matrices (const PETScWrappers::MatrixBase &A);
+
+    /**
+     * Same as above, but here initialize solver for the linear system $A
+     * x=\lambda B x$.
+     */
+    void
+    set_matrices (const PETScWrappers::MatrixBase &A,
+                  const PETScWrappers::MatrixBase &B);
+
+  protected:
+
+    /**
+     * Objects for Eigenvalue Problem Solver.
+     */
+    EPS eps;
+
+  private:
+    /**
+     * Convergence reason.
+     */
+    EPSConvergedReason reason;
+
+
+    /**
+     * A function that can be used in SLEPc as a callback to check on
+     * convergence.
+     *
+     * @note This function is not used currently.
+     */
+    static
+    int
+    convergence_test (EPS          eps,
+                      PetscScalar  real_eigenvalue,
+                      PetscScalar  imag_eigenvalue,
+                      PetscReal    residual_norm,
+                      PetscReal   *estimated_error,
+                      void        *solver_control);
+  };
+
+  /**
+   * An implementation of the solver interface using the SLEPc Krylov-Schur
+   * solver. Usage: All spectrum, all problem types, complex.
+   *
+   * @ingroup SLEPcWrappers
+   *
+   * @author Toby D. Young 2008
+   */
+  class SolverKrylovSchur : public SolverBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional data to the solver, should
+     * it be needed.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * SLEPc solvers will want to have an MPI communicator context over which
+     * computations are parallelized. By default, this carries the same
+     * behaviour as the PETScWrappers, but you can change that.
+     */
+    SolverKrylovSchur (SolverControl        &cn,
+                       const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                       const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the solver interface using the SLEPc Arnoldi solver.
+   * Usage: All spectrum, all problem types, complex.
+   *
+   * @ingroup SLEPcWrappers
+   *
+   * @author Toby D. Young 2008, 2011
+   */
+  class SolverArnoldi : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver, should
+     * it be needed.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the option of delayed
+       * reorthogonalization to false, i.e. don't do it.
+       */
+      AdditionalData (const bool delayed_reorthogonalization = false);
+
+      /**
+       * Flag for delayed reorthogonalization.
+       */
+      bool delayed_reorthogonalization;
+    };
+
+    /**
+     * SLEPc solvers will want to have an MPI communicator context over which
+     * computations are parallelized. By default, this carries the same
+     * behaviour as the PETScWrappers, but you can change that.
+     */
+    SolverArnoldi (SolverControl        &cn,
+                   const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                   const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the solver interface using the SLEPc Lanczos solver.
+   * Usage: All spectrum, all problem types, complex.
+   *
+   * @ingroup SLEPcWrappers
+   *
+   * @author Toby D. Young 2009; and Denis Davydov 2015;
+   */
+  class SolverLanczos : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver, should
+     * it be needed.
+     */
+    struct AdditionalData
+    {
+      /**
+       * The type of reorthogonalization used during the Lanczos iteration.
+       */
+      EPSLanczosReorthogType reorthog;
+
+      /**
+       * Constructor. By default sets the type of reorthogonalization used
+       * during the Lanczos iteration to full.
+       */
+      AdditionalData(const EPSLanczosReorthogType r  = EPS_LANCZOS_REORTHOG_FULL);
+    };
+
+    /**
+     * SLEPc solvers will want to have an MPI communicator context over which
+     * computations are parallelized. By default, this carries the same
+     * behaviour as the PETScWrappers, but you can change that.
+     */
+    SolverLanczos (SolverControl        &cn,
+                   const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                   const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the solver interface using the SLEPc Power solver.
+   * Usage: Largest values of spectrum only, all problem types, complex.
+   *
+   * @ingroup SLEPcWrappers
+   *
+   * @author Toby D. Young 2010
+   */
+  class SolverPower : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver, should
+     * it be needed.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * SLEPc solvers will want to have an MPI communicator context over which
+     * computations are parallelized. By default, this carries the same
+     * behaviour as the PETScWrappers, but you can change that.
+     */
+    SolverPower (SolverControl        &cn,
+                 const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                 const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the solver interface using the SLEPc Davidson
+   * solver. Usage: All problem types.
+   *
+   * @ingroup SLEPcWrappers
+   *
+   * @author Toby D. Young 2010; Denis Davydov 2015
+   */
+  class SolverGeneralizedDavidson : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver, should
+     * it be needed.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Use double expansion in search subspace.
+       */
+      bool double_expansion;
+
+      /**
+       * Constructor. By default set double_expansion to false.
+       */
+      AdditionalData(bool double_expansion = false);
+    };
+
+    /**
+     * SLEPc solvers will want to have an MPI communicator context over which
+     * computations are parallelized. By default, this carries the same
+     * behaviour as the PETScWrappers, but you can change that.
+     */
+    SolverGeneralizedDavidson (SolverControl        &cn,
+                               const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                               const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the solver interface using the SLEPc Jacobi-Davidson
+   * solver. Usage: All problem types.
+   *
+   * @ingroup SLEPcWrappers
+   *
+   * @author Toby D. Young 2013
+   */
+  class SolverJacobiDavidson : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver, should
+     * it be needed.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * SLEPc solvers will want to have an MPI communicator context over which
+     * computations are parallelized. By default, this carries the same
+     * behaviour as the PETScWrappers, but you can change that.
+     */
+    SolverJacobiDavidson (SolverControl        &cn,
+                          const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                          const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+
+  /**
+   * An implementation of the solver interface using the SLEPc LAPACK direct
+   * solver.
+   *
+   * @ingroup SLEPcWrappers
+   *
+   * @author Toby D. Young 2013
+   */
+  class SolverLAPACK : public SolverBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional data to the solver, should
+     * it be needed.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * SLEPc solvers will want to have an MPI communicator context over which
+     * computations are parallelized. By default, this carries the same
+     * behaviour as the PETScWrappers, but you can change that.
+     */
+    SolverLAPACK (SolverControl        &cn,
+                  const MPI_Comm       &mpi_communicator = PETSC_COMM_SELF,
+                  const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  // --------------------------- inline and template functions -----------
+  /**
+   * This is declared here to make it possible to take a std::vector of
+   * different PETScWrappers vector types
+   */
+  // todo: The logic of these functions can be simplified without breaking backward compatibility...
+
+  template <typename OutputVector>
+  void
+  SolverBase::solve (const PETScWrappers::MatrixBase &A,
+                     std::vector<PetscScalar>        &eigenvalues,
+                     std::vector<OutputVector>       &eigenvectors,
+                     const unsigned int               n_eigenpairs)
+  {
+    // Panic if the number of eigenpairs wanted is out of bounds.
+    AssertThrow ((n_eigenpairs > 0) && (n_eigenpairs <= A.m ()),
+                 ExcSLEPcWrappersUsageError());
+
+    // Set the matrices of the problem
+    set_matrices (A);
+
+    // and solve
+    unsigned int n_converged = 0;
+    solve (n_eigenpairs, &n_converged);
+
+    if (n_converged > n_eigenpairs)
+      n_converged = n_eigenpairs;
+    AssertThrow (n_converged == n_eigenpairs,
+                 ExcSLEPcEigenvectorConvergenceMismatchError(n_converged, n_eigenpairs));
+
+    AssertThrow (eigenvectors.size() != 0, ExcSLEPcWrappersUsageError());
+    eigenvectors.resize (n_converged, eigenvectors.front());
+    eigenvalues.resize (n_converged);
+
+    for (unsigned int index=0; index<n_converged; ++index)
+      get_eigenpair (index, eigenvalues[index], eigenvectors[index]);
+  }
+
+  template <typename OutputVector>
+  void
+  SolverBase::solve (const PETScWrappers::MatrixBase &A,
+                     const PETScWrappers::MatrixBase &B,
+                     std::vector<PetscScalar>        &eigenvalues,
+                     std::vector<OutputVector>       &eigenvectors,
+                     const unsigned int                  n_eigenpairs)
+  {
+    // Guard against incompatible matrix sizes:
+    AssertThrow (A.m() == B.m (), ExcDimensionMismatch(A.m(), B.m()));
+    AssertThrow (A.n() == B.n (), ExcDimensionMismatch(A.n(), B.n()));
+
+    // Panic if the number of eigenpairs wanted is out of bounds.
+    AssertThrow ((n_eigenpairs>0) && (n_eigenpairs<=A.m ()),
+                 ExcSLEPcWrappersUsageError());
+
+    // Set the matrices of the problem
+    set_matrices (A, B);
+
+    // and solve
+    unsigned int n_converged = 0;
+    solve (n_eigenpairs, &n_converged);
+
+    if (n_converged>=n_eigenpairs)
+      n_converged = n_eigenpairs;
+
+    AssertThrow (n_converged==n_eigenpairs,
+                 ExcSLEPcEigenvectorConvergenceMismatchError(n_converged, n_eigenpairs));
+    AssertThrow (eigenvectors.size() != 0, ExcSLEPcWrappersUsageError());
+
+    eigenvectors.resize (n_converged, eigenvectors.front());
+    eigenvalues.resize (n_converged);
+
+    for (unsigned int index=0; index<n_converged; ++index)
+      get_eigenpair (index, eigenvalues[index], eigenvectors[index]);
+  }
+
+  template <typename OutputVector>
+  void
+  SolverBase::solve (const PETScWrappers::MatrixBase &A,
+                     const PETScWrappers::MatrixBase &B,
+                     std::vector<double>             &real_eigenvalues,
+                     std::vector<double>             &imag_eigenvalues,
+                     std::vector<OutputVector>       &real_eigenvectors,
+                     std::vector<OutputVector>       &imag_eigenvectors,
+                     const unsigned int                  n_eigenpairs)
+  {
+    // Guard against incompatible matrix sizes:
+    AssertThrow (A.m() == B.m (), ExcDimensionMismatch(A.m(), B.m()));
+    AssertThrow (A.n() == B.n (), ExcDimensionMismatch(A.n(), B.n()));
+
+    // and incompatible eigenvalue/eigenvector sizes
+    AssertThrow (real_eigenvalues.size() == imag_eigenvalues.size(),
+                 ExcDimensionMismatch(real_eigenvalues.size(), imag_eigenvalues.size()));
+    AssertThrow (real_eigenvectors.size() == imag_eigenvectors.size (),
+                 ExcDimensionMismatch(real_eigenvectors.size(), imag_eigenvectors.size()));
+
+    // Panic if the number of eigenpairs wanted is out of bounds.
+    AssertThrow ((n_eigenpairs>0) && (n_eigenpairs<=A.m ()),
+                 ExcSLEPcWrappersUsageError());
+
+    // Set the matrices of the problem
+    set_matrices (A, B);
+
+    // and solve
+    unsigned int n_converged = 0;
+    solve (n_eigenpairs, &n_converged);
+
+    if (n_converged>=n_eigenpairs)
+      n_converged = n_eigenpairs;
+
+    AssertThrow (n_converged==n_eigenpairs,
+                 ExcSLEPcEigenvectorConvergenceMismatchError(n_converged, n_eigenpairs));
+    AssertThrow ((real_eigenvectors.size()!=0) && (imag_eigenvectors.size()!=0),
+                 ExcSLEPcWrappersUsageError());
+
+    real_eigenvectors.resize (n_converged, real_eigenvectors.front());
+    imag_eigenvectors.resize (n_converged, imag_eigenvectors.front());
+    real_eigenvalues.resize (n_converged);
+    imag_eigenvalues.resize (n_converged);
+
+    for (unsigned int index=0; index<n_converged; ++index)
+      get_eigenpair (index,
+                     real_eigenvalues[index], imag_eigenvalues[index],
+                     real_eigenvectors[index], imag_eigenvectors[index]);
+  }
+
+  template <typename Vector>
+  void
+  SolverBase::set_initial_space(const std::vector<Vector> &this_initial_space)
+  {
+    int ierr;
+    std::vector<Vec> vecs(this_initial_space.size());
+
+    for (unsigned int i = 0; i < this_initial_space.size(); i++)
+      {
+        Assert(this_initial_space[i].l2_norm()>0.0,
+               ExcMessage("Initial vectors should be nonzero."));
+        vecs[i] = this_initial_space[i];
+      }
+
+    // if the eigensolver supports only a single initial vector, but several
+    // guesses are provided, then all except the first one will be discarded.
+    // One could still build a vector that is rich in the directions of all guesses,
+    // by taking a linear combination of them. (TODO: make function virtual?)
+
+#if DEAL_II_PETSC_VERSION_LT(3,1,0)
+    ierr = EPSSetInitialVector (eps, &vecs[0]);
+#else
+    ierr = EPSSetInitialSpace (eps, vecs.size(), &vecs[0]);
+#endif
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_SLEPC
+
+/*----------------------------   slepc_solver.h  ---------------------------*/
+
+#endif
+
+/*----------------------------   slepc_solver.h  ---------------------------*/
diff --git a/include/deal.II/lac/slepc_spectral_transformation.h b/include/deal.II/lac/slepc_spectral_transformation.h
new file mode 100644
index 0000000..530fe02
--- /dev/null
+++ b/include/deal.II/lac/slepc_spectral_transformation.h
@@ -0,0 +1,300 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__slepc_spectral_transformation_h
+#define dealii__slepc_spectral_transformation_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_SLEPC
+
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/petsc_solver.h>
+
+#  include <petscksp.h>
+#  include <slepceps.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  // forward declarations
+  class SolverBase;
+}
+
+namespace SLEPcWrappers
+{
+
+  /**
+   * Base class for spectral transformation classes using the SLEPc solvers
+   * which are selected based on flags passed to the spectral transformation.
+   *
+   * <code>SLEPcWrappers::TransformationXXX</code>, where <code>XXX</code> is
+   * your favourite transformation type, can then be implemented in
+   * application codes in the following way for <code>XXX=INVERT</code> with
+   * the solver object <code>eigensolver</code>:
+   * @code
+   *  // Set a transformation, this one shifts the eigenspectrum by 3.142..
+   *  SLEPcWrappers::TransformationShift::AdditionalData additional_data (3.142);
+   *  SLEPcWrappers::TransformationShift shift (mpi_communicator,additional_data);
+   *  eigensolver.set_transformation (shift);
+   * @endcode
+   * and later calling the <code>solve()</code> function as usual:
+   * @code
+   *  SolverControl solver_control (1000, 1e-9);
+   *  SolverArnoldi system (solver_control, mpi_communicator);
+   *  eigensolver.solve (A, B, lambda, x, size_of_spectrum);
+   * @endcode
+   *
+   * @note These options can also be set at the command line.
+   *
+   * @ingroup SLEPcWrappers
+   * @author Toby D. Young 2009, 2013; and Denis Davydov 2015.
+   */
+  class TransformationBase
+  {
+  protected:
+
+    /**
+     * Constructor.
+     */
+    TransformationBase (const MPI_Comm &mpi_communicator);
+
+  public:
+    /**
+     * Destructor.
+     */
+    virtual ~TransformationBase ();
+
+    /**
+     * Set a flag to indicate how the transformed matrices are being stored in
+     * the spectral transformations.
+     *
+     * The possible values are given by the enumerator STMatMode in the SLEPc
+     * library http://www.grycap.upv.es/slepc/documentation/current/docs/manua
+     * lpages/ST/STMatMode.html
+     */
+    void set_matrix_mode(const STMatMode mode);
+
+    /**
+     * Set solver to be used when solving a system of linear algebraic
+     * equations inside the eigensolver.
+     */
+    void
+    set_solver(const PETScWrappers::SolverBase &solver);
+
+  protected:
+    /**
+     * SLEPc spectral transformation object.
+     */
+    ST st;
+
+    /**
+     * Make the solver class a friend, since it needs to set spectral
+     * transformation object.
+     */
+    friend class SolverBase;
+
+  };
+
+  /**
+   * An implementation of the transformation interface using the SLEPc Shift.
+   *
+   * @ingroup SLEPcWrappers
+   * @author Toby D. Young 2009
+   */
+  class TransformationShift : public TransformationBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+
+      /**
+       * Constructor. By default, set the shift parameter to zero.
+       */
+      AdditionalData (const double shift_parameter = 0);
+
+      /**
+       * Shift parameter.
+       */
+      const double shift_parameter;
+    };
+
+
+    /**
+     * Constructor.
+     */
+    TransformationShift (const MPI_Comm &mpi_communicator,
+                         const AdditionalData &data = AdditionalData());
+
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the transformation interface using the SLEPc Shift
+   * and Invert.
+   *
+   * @ingroup SLEPcWrappers
+   * @author Toby D. Young 2009
+   */
+  class TransformationShiftInvert : public TransformationBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the shift parameter to zero.
+       */
+      AdditionalData (const double shift_parameter = 0);
+
+      /**
+       * Shift parameter.
+       */
+      const double shift_parameter;
+    };
+
+
+    /**
+     * Constructor.
+     */
+    TransformationShiftInvert (const MPI_Comm &mpi_communicator,
+                               const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the transformation interface using the SLEPc
+   * Spectrum Folding. This transformation type has been removed in SLEPc
+   * 3.5.0 and thus cannot be used in the newer versions.
+   *
+   * @ingroup SLEPcWrappers
+   * @author Toby D. Young 2009
+   */
+  class TransformationSpectrumFolding : public TransformationBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the shift parameter to zero.
+       */
+      AdditionalData (const double shift_parameter = 0);
+
+      /**
+       * Shift parameter.
+       */
+      const double shift_parameter;
+    };
+
+
+    /**
+     * Constructor.
+     */
+    TransformationSpectrumFolding (const MPI_Comm &mpi_communicator,
+                                   const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+  /**
+   * An implementation of the transformation interface using the SLEPc Cayley.
+   *
+   * @ingroup SLEPcWrappers
+   * @author Toby D. Young 2009
+   */
+  class TransformationCayley : public TransformationBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. Requires two shift parameters
+       */
+      AdditionalData (const double shift_parameter     = 0,
+                      const double antishift_parameter = 0);
+
+      /**
+       * Shift parameter.
+       */
+      const double shift_parameter;
+
+      /**
+       * Antishift parameter.
+       */
+      const double antishift_parameter;
+    };
+
+
+    /**
+     * Constructor.
+     */
+    TransformationCayley (const MPI_Comm &mpi_communicator,
+                          const AdditionalData &data = AdditionalData());
+
+  protected:
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_SLEPC
+
+/*--------------------   slepc_spectral_transformation.h   ------------------*/
+
+#endif
+
+/*--------------------   slepc_spectral_transformation.h   ------------------*/
diff --git a/include/deal.II/lac/solver.h b/include/deal.II/lac/solver.h
new file mode 100644
index 0000000..f0851e8
--- /dev/null
+++ b/include/deal.II/lac/solver.h
@@ -0,0 +1,547 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_h
+#define dealii__solver_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/solver_control.h>
+
+// Ignore deprecation warnings for auto_ptr.
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/signals2.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class Vector;
+
+/**
+ * A base class for iterative linear solvers. This class provides interfaces
+ * to a memory pool and the objects that determine whether a solver has
+ * converged.
+ *
+ *
+ * <h3>Requirements common to derived solver classes</h3>
+ *
+ * Since iterative solvers do not rely on any special structure of matrices or
+ * the format of storage but only require that matrices and vectors define
+ * certain operations such as matrix-vector products, or scalar products
+ * between vectors, this class as well as the derived classes and their member
+ * functions implementing concrete linear solvers are templated on the types
+ * of matrices and vectors. However, there are some common requirements a
+ * matrix or vector type must fulfill to qualify as an acceptable type for the
+ * solvers in this hierarchy. These requirements are listed below.
+ *
+ * The classes we show below are not any concrete class. Rather, they are
+ * intended to form a `signature' which a concrete class has to conform to.
+ * Note that the matrix and vector classes within this library of course
+ * conform to this interface; therefore, SparseMatrix and Vector are good
+ * examples for these classes as they provide the necessary signatures of
+ * member functions.
+ *
+ * @code
+ * class Matrix
+ * {
+ *   public:
+ *                        // Application of matrix to vector src.
+ *                        // Write result into dst
+ *     void vmult (VectorType       &dst,
+ *                 const VectorType &src) const;
+ *
+ *                        // Application of transpose to a vector.
+ *                        // Only used by some iterative methods.
+ *     void Tvmult (VectorType       &dst,
+ *                  const VectorType &src) const;
+ * };
+ *
+ *
+ * class Vector
+ * {
+ *   public:
+ *                        // Resize the current object to have
+ *                        // the same size and layout as the model_vector
+ *                        // argument provided. The second argument
+ *                        // indicates whether to clear the current
+ *                        // object after resizing.
+ *                        // The second argument must have
+ *                        // a default value equal to false
+ *     void reinit (const Vector &model_vector,
+ *                  const bool  leave_elements_uninitialized = false);
+ *
+ *                        // Inner product between the current object
+ *                        // and the argument
+ *     double operator * (const Vector &v) const;
+ *
+ *                        // Addition of vectors
+ *     void add (const Vector &x);
+ *
+ *                        // Scaled addition of vectors
+ *     void add (const double  a,
+ *               const Vector &x);
+ *
+ *                        // Scaled addition of vectors
+ *     void sadd (const double  a,
+ *                const double  b,
+ *                const Vector &x);
+ *
+ *                        // Scaled assignment of a vector
+ *     void equ (const double  a,
+ *               const Vector &x);
+ *
+ *                        // Combined scaled addition of vector x into
+ *                        // the current object and subsequent inner
+ *                        // product of the current object with v
+ *     double add_and_dot (const double  a,
+ *                         const Vector &x,
+ *                         const Vector &v);
+ *
+ *                        // Multiply the elements of the current
+ *                        // object by a fixed value
+ *     Vector & operator *= (const double a);
+ *
+ *                        // Return the l2 norm of the vector
+ *     double l2_norm () const;
+ * };
+ * @endcode
+ *
+ * In addition, for some solvers there has to be a global function
+ * <tt>swap(VectorType &a, VectorType &b)</tt> that exchanges the values of
+ * the two vectors.
+ *
+ * Finally, the solvers also expect an instantiation of
+ * GrowingVectorMemory@<VectorType@>. These instantiations are provided by the
+ * deal.II library for the built-in vector types, but must be explicitly added
+ * for user-provided vector classes. Otherwise, the linker will complain that
+ * it cannot find the constructors and destructors of GrowingVectorMemory that
+ * happen in the @p Solver class.
+ *
+ * @code
+ * // Definition and implementation of vector class
+ * class UserVector { ... };
+ *
+ * // Create explicit instantiation for the vector class. If your project
+ * // consists of multiple files, including header files, this instantiation
+ * // must be put in a <code>.cc</code> file in order to instantiate only
+ * // once.
+ * #include <deal.II/lac/vector_memory.templates.h>
+ *
+ * template class VectorMemory<UserVector>;
+ * template class GrowingVectorMemory<UserVector>;
+ * @endcode
+ *
+ * The preconditioners used must have the same interface as matrices, i.e. in
+ * particular they have to provide a member function @p vmult which denotes
+ * the application of the preconditioner.
+ *
+ *
+ * <h3>AdditionalData</h3>
+ *
+ * Several solvers need additional data, like the damping parameter @p omega
+ * of the @p SolverRichardson class or the maximum number of temporary vectors
+ * of @p SolverGMRES.  To have a standardized way of constructing solvers,
+ * each solver class has a <tt>struct AdditionalData</tt> as a member, and
+ * constructors of all solver classes take such an argument. Some solvers need
+ * no additional data, or may not at the current time. For these solvers the
+ * struct @p AdditionalData is empty and calling the constructor may be done
+ * without giving the additional structure as an argument as a default @p
+ * AdditionalData is set by default.
+ *
+ * With this, creating a solver looks like
+ * @code
+ *                               // GMRES with restart every 50 iterations
+ * SolverGMRES solver_gmres (solver_control, vector_memory,
+ *                           SolverGMRES::AdditionalData(50));
+ *
+ *                               // Richardson with omega=0.8
+ * SolverRichardson solver_richardson (solver_control, vector_memory,
+ *                                     SolverGMRES::AdditionalData(0.8));
+ *
+ *                               // CG with default AdditionalData
+ * SolverCG solver_cg (solver_control, vector_memory);
+ * @endcode
+ *
+ * Using a unified constructor parameter list for all solvers supports the @p
+ * SolverSelector class; the unified interface enables us to use this class
+ * unchanged even if the number of types of parameters to a certain solver
+ * changes and it is still possible in a simple way to give these additional
+ * data to the @p SolverSelector object for each solver which it may use.
+ *
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The Solver class, being the base class for all of the iterative solvers
+ * such as SolverCG, SolverGMRES, etc, provides the facilities by which actual
+ * solver implementations determine whether the iteration is converged, not
+ * yet converged, or has failed. Typically, this is done using an object of
+ * type SolverControl that is passed to the solver classes's constructors and
+ * from them down to the constructor of this base class. Every one of the
+ * tutorial programs that solves a linear problem (starting with step-3) uses
+ * this method and it is described in detail there. However, the underlying
+ * mechanism is more general and allows for many other uses to observe how the
+ * linear solver iterations progress.
+ *
+ * The basic approach is that the iterative solvers invoke a <i>signal</i> at
+ * the end of each iteration to determine whether the solution is converged. A
+ * signal is a class that has, conceptually, a list of pointers to functions
+ * and every time the signal is invoked, each of these functions are called.
+ * In the language of signals, the functions called are called <i>slots</i>
+ * and one can attach any number of slots to a signal. (The implementation of
+ * signals and slots we use here is the one from the BOOST.signals2 library.)
+ * A number of details may clarify what is happening underneath: - In reality,
+ * the signal object does not store pointers to functions, but function
+ * objects as slots. Each slot must conform to a particular signature: here,
+ * it is an object that can be called with three arguments (the number of the
+ * current linear iteration, the current residual, and the current iterate;
+ * more specifics are discussed in the documentation of the connect()
+ * function). A pointer to a function with this argument list satisfies the
+ * requirements, but you can also pass a member function whose
+ * <code>this</code> argument has been bound using the
+ * <code>std_cxx11::bind</code> mechanism (see the example below). - Each of
+ * the slots will return a value that indicates whether the iteration should
+ * continue, should stop because it has succeeded, or stop because it has
+ * failed. The return type of slots is therefore of type SolverControl::State.
+ * The returned values from all of the slots will then have to be combined
+ * before they are returned to the iterative solver that invoked the signal.
+ * The way this works is that if at least one slot returned
+ * SolverControl::failure, then the combined value is SolverControl::failure;
+ * otherwise, if at least one slot returned SolverControl::iterate, then this
+ * is going to be the return value of the signal; finally, only if all slots
+ * return SolverControl::success will the signal's return value be
+ * SolverControl::success. - It may of course be that a particular slot has
+ * been connected to the signal only to observe how the solution or a specific
+ * part of it converges, but has no particular opinion on whether the
+ * iteration should continue or not. In such cases, the slot should just
+ * return SolverControl::success, which is the weakest of all return values
+ * according to the rules laid out above.
+ *
+ * Given all this, it should now be obvious how the SolverControl object fits
+ * into this scheme: when a SolverControl object is passed to the constructor
+ * of the current class, we simply connect the SolverControl::check() function
+ * of that object as a slot to the signal we maintain here. In other words,
+ * since a Solver object is always constructed using a SolverControl object,
+ * there is always at least one slot associated with the signal, namely the
+ * one that determines convergence.
+ *
+ * On the other hand, using the connect() member function, it is possible to
+ * connect any number of other slots to the signal to observe whatever it is
+ * you want to observe. The connect() function also returns an object that
+ * describes the connection from the signal to the slot, and the corresponding
+ * BOOST functions then allow you to disconnect the slot if you want.
+ *
+ * An example may illuminate these issues. In the step-3 tutorial program, let
+ * us add a member function as follows to the main class:
+ * @code
+ *  SolverControl::State
+ *  Step3::write_intermediate_solution (const unsigned int    iteration,
+ *                                      const double          , //check_value
+ *                                      const Vector<double> &current_iterate) const
+ *    {
+ *      DataOut<2> data_out;
+ *      data_out.attach_dof_handler (dof_handler);
+ *      data_out.add_data_vector (current_iterate, "solution");
+ *      data_out.build_patches ();
+ *
+ *      std::ofstream output ((std::string("solution-")
+ *                             + Utilities::int_to_string(iteration,4) + ".vtu").c_str());
+ *      data_out.write_vtu (output);
+ *
+ *      return SolverControl::success;
+ *    }
+ * @endcode
+ * The function satisfies the signature necessary to be a slot for the signal
+ * discussed above, with the exception that it is a member function and
+ * consequently requires a <code>this</code> pointer. What the function does
+ * is to take the vector given as last argument and write it into a file in
+ * VTU format with a file name derived from the number of the iteration.
+ *
+ * This function can then be hooked into the CG solver by modifying the
+ * <code>Step3::solve()</code> function as follows:
+ * @code
+ * void Step3::solve ()
+ * {
+ *   SolverControl           solver_control (1000, 1e-12);
+ *   SolverCG<>              solver (solver_control);
+ *
+ *   solver.connect (std_cxx11::bind (&Step3::write_intermediate_solution,
+ *                                    this,
+ *                                    std_cxx11::_1,
+ *                                    std_cxx11::_2,
+ *                                    std_cxx11::_3));
+ *   solver.solve (system_matrix, solution, system_rhs,
+ *                 PreconditionIdentity());
+ * }
+ * @endcode
+ * The use of <code>std_cxx11::bind</code> here ensures that we convert the
+ * member function with its three arguments plus the <code>this</code>
+ * pointer, to a function that only takes three arguments, by fixing the
+ * implicit <code>this</code> argument of the function to the
+ * <code>this</code> pointer in the current function.
+ *
+ * It is well understood that the CG method is a smoothing iteration (in the
+ * same way as the more commonly used Jacobi or SSOR iterations are
+ * smoothers). The code above therefore allows to observe how the solution
+ * becomes smoother and smoother in every iteration. This is best observed by
+ * initializing the solution vector with randomly distributed numbers in
+ * $[-1,1]$, using code such as
+ * @code
+ *   for (unsigned int i=0; i<solution.size(); ++i)
+ *     solution(i) = 2.*rand()/RAND_MAX-1;
+ * @endcode
+ * Using this, the slot will then generate files that when visualized look
+ * like this over the course of iterations zero to five: <table> <tr> <td>
+ * @image html "cg-monitor-smoothing-0.png"
+ * </td> <td>
+ * @image html "cg-monitor-smoothing-1.png"
+ * </td> <td>
+ * @image html "cg-monitor-smoothing-2.png"
+ * </td> </tr> <tr> <td>
+ * @image html "cg-monitor-smoothing-3.png"
+ * </td> <td>
+ * @image html "cg-monitor-smoothing-4.png"
+ * </td> <td>
+ * @image html "cg-monitor-smoothing-5.png"
+ * </td> </tr> </table>
+ *
+ * @ingroup Solvers
+ * @author Wolfgang Bangerth, Guido Kanschat, Ralf Hartmann, 1997-2001, 2005,
+ * 2014
+ */
+template <class VectorType = Vector<double> >
+class Solver : public Subscriptor
+{
+public:
+  /**
+   * A typedef for the underlying vector type
+   */
+  typedef VectorType vector_type;
+
+  /**
+   * Constructor. Takes a control object which evaluates the conditions for
+   * convergence, and an object that allows solvers to allocate memory for
+   * temporary objects.
+   *
+   * Of both objects, a reference is stored, so it is the user's
+   * responsibility to guarantee that the lifetime of the two arguments is at
+   * least as long as that of the solver object.
+   */
+  Solver (SolverControl            &solver_control,
+          VectorMemory<VectorType> &vector_memory);
+
+  /**
+   * Constructor. Takes a control object which evaluates the conditions for
+   * convergence. In contrast to the other constructor, this constructor
+   * designates an internal object of type GrowingVectorMemory to allocate
+   * memory.
+   *
+   * A reference to the control object is stored, so it is the user's
+   * responsibility to guarantee that the lifetime of the argument is at least
+   * as long as that of the solver object.
+   */
+  Solver (SolverControl &solver_control);
+
+  /**
+   * Connect a function object that will be called periodically within
+   * iterative solvers. This function is used to attach monitors to iterative
+   * solvers, either to determine when convergence has happened, or simply to
+   * observe the progress of an iteration. See the documentation of this class
+   * for more information.
+   *
+   * @param slot A function object specified here will, with each call,
+   * receive the number of the current iteration, the value that is used to
+   * check for convergence (typically the residual of the current iterate with
+   * respect to the linear system to be solved) and the currently best
+   * available guess for the current iterate. Note that some solvers do not
+   * update the approximate solution in every iteration but only after
+   * convergence or failure has been determined (GMRES is an example); in such
+   * cases, the vector passed as the last argument to the signal is simply the
+   * best approximate at the time the signal is called, but not the vector
+   * that will be returned if the signal's return value indicates that the
+   * iteration should be terminated. The function object must return a
+   * SolverControl::State value that indicates whether the iteration should
+   * continue, has failed, or has succeeded. The results of all connected
+   * functions will then be combined to determine what should happen with the
+   * iteration.
+   *
+   * @return A connection object that represents the connection from the
+   * signal to the function object. It can be used to disconnect the function
+   * object again from the signal. See the documentation of the BOOST Signals2
+   * library for more information on connection management.
+   */
+  boost::signals2::connection
+  connect (const std_cxx11::function<SolverControl::State (const unsigned int iteration,
+                                                           const double       check_value,
+                                                           const VectorType   &current_iterate)> &slot);
+
+
+
+protected:
+  /**
+   * A static vector memory object to be used whenever no such object has been
+   * given to the constructor.
+   */
+  mutable GrowingVectorMemory<VectorType> static_vector_memory;
+
+  /**
+   * A reference to an object that provides memory for auxiliary vectors.
+   */
+  VectorMemory<VectorType> &memory;
+
+private:
+  /**
+   * A class whose operator() combines two states indicating whether we should
+   * continue iterating or stop, and returns a state that dominates. The rules
+   * are: - If one of the two states indicates failure, return failure. -
+   * Otherwise, if one of the two states indicates to continue iterating, then
+   * continue iterating. - Otherwise, return success.
+   */
+  struct StateCombiner
+  {
+    typedef SolverControl::State result_type;
+
+    SolverControl::State operator() (const SolverControl::State state1,
+                                     const SolverControl::State state2) const;
+
+    template <typename Iterator>
+    SolverControl::State operator() (const Iterator begin,
+                                     const Iterator end) const;
+  };
+
+protected:
+  /**
+   * A signal that iterative solvers can execute at the end of every iteration
+   * (or in an otherwise periodic fashion) to find out whether we should
+   * continue iterating or not. The signal may call one or more slots that
+   * each will make this determination by themselves, and the result over all
+   * slots (function calls) will be determined by the StateCombiner object.
+   *
+   * The arguments passed to the signal are (i) the number of the current
+   * iteration; (ii) the value that is used to determine convergence
+   * (oftentimes the residual, but in other cases other quantities may be used
+   * as long as they converge to zero as the iterate approaches the solution
+   * of the linear system); and (iii) a vector that corresponds to the current
+   * best guess for the solution at the point where the signal is called. Note
+   * that some solvers do not update the approximate solution in every
+   * iteration but only after convergence or failure has been determined
+   * (GMRES is an example); in such cases, the vector passed as the last
+   * argument to the signal is simply the best approximate at the time the
+   * signal is called, but not the vector that will be returned if the
+   * signal's return value indicates that the iteration should be terminated.
+   */
+  boost::signals2::signal<SolverControl::State (const unsigned int iteration,
+                                                const double       check_value,
+                                                const VectorType   &current_iterate),
+                                                      StateCombiner> iteration_status;
+};
+
+
+/*-------------------------------- Inline functions ------------------------*/
+
+
+template <class VectorType>
+inline
+SolverControl::State
+Solver<VectorType>::StateCombiner::operator ()(const SolverControl::State state1,
+                                               const SolverControl::State state2) const
+{
+  if ((state1 == SolverControl::failure)
+      ||
+      (state2 == SolverControl::failure))
+    return SolverControl::failure;
+  else if ((state1 == SolverControl::iterate)
+           ||
+           (state2 == SolverControl::iterate))
+    return SolverControl::iterate;
+  else
+    return SolverControl::success;
+}
+
+
+template <class VectorType>
+template <typename Iterator>
+inline
+SolverControl::State
+Solver<VectorType>::StateCombiner::operator ()(const Iterator begin,
+                                               const Iterator end) const
+{
+  Assert (begin != end, ExcMessage ("You can't combine iterator states if no state is given."));
+
+  // combine the first with all of the following states
+  SolverControl::State state = *begin;
+  Iterator p = begin;
+  ++p;
+  for (; p != end; ++p)
+    state = this->operator()(state, *p);
+
+  return state;
+}
+
+
+template<class VectorType>
+inline
+Solver<VectorType>::Solver (SolverControl        &solver_control,
+                            VectorMemory<VectorType> &vector_memory)
+  :
+  memory(vector_memory)
+{
+  // connect the solver control object to the signal. SolverControl::check
+  // only takes two arguments, the iteration and the check_value, and so
+  // we simply ignore the third argument that is passed in whenever the
+  // signal is executed
+  connect (std_cxx11::bind(&SolverControl::check,
+                           std_cxx11::ref(solver_control),
+                           std_cxx11::_1,
+                           std_cxx11::_2));
+}
+
+
+
+template<class VectorType>
+inline
+Solver<VectorType>::Solver (SolverControl &solver_control)
+  :
+  // use the static memory object this class owns
+  memory(static_vector_memory)
+{
+  // connect the solver control object to the signal. SolverControl::check
+  // only takes two arguments, the iteration and the check_value, and so
+  // we simply ignore the third argument that is passed in whenever the
+  // signal is executed
+  connect (std_cxx11::bind(&SolverControl::check,
+                           std_cxx11::ref(solver_control),
+                           std_cxx11::_1,
+                           std_cxx11::_2));
+}
+
+
+
+template<class VectorType>
+inline
+boost::signals2::connection
+Solver<VectorType>::
+connect (const std_cxx11::function<SolverControl::State (const unsigned int iteration,
+                                                         const double       check_value,
+                                                         const VectorType   &current_iterate)> &slot)
+{
+  return iteration_status.connect (slot);
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_bicgstab.h b/include/deal.II/lac/solver_bicgstab.h
new file mode 100644
index 0000000..7b097eb
--- /dev/null
+++ b/include/deal.II/lac/solver_bicgstab.h
@@ -0,0 +1,502 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_bicgstab_h
+#define dealii__solver_bicgstab_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <cmath>
+#include <deal.II/base/subscriptor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Bicgstab algorithm by van der Vorst.
+ *
+ * For the requirements on matrices and vectors in order to work with this
+ * class, see the documentation of the Solver base class.
+ *
+ * Like all other solver classes, this class has a local structure called @p
+ * AdditionalData which is used to pass additional parameters to the solver,
+ * like damping parameters or the number of temporary vectors. We use this
+ * additional structure instead of passing these values directly to the
+ * constructor because this makes the use of the @p SolverSelector and other
+ * classes much easier and guarantees that these will continue to work even if
+ * number or type of the additional parameters for a certain solver changes.
+ *
+ * The Bicgstab-method has two additional parameters: the first is a boolean,
+ * deciding whether to compute the actual residual in each step (@p true) or
+ * to use the length of the computed orthogonal residual (@p false). Note that
+ * computing the residual causes a third matrix-vector-multiplication, though
+ * no additional preconditioning, in each step. The reason for doing this is,
+ * that the size of the orthogonalized residual computed during the iteration
+ * may be larger by orders of magnitude than the true residual. This is due to
+ * numerical instabilities related to badly conditioned matrices. Since this
+ * instability results in a bad stopping criterion, the default for this
+ * parameter is @p true. Whenever the user knows that the estimated residual
+ * works reasonably as well, the flag should be set to @p false in order to
+ * increase the performance of the solver.
+ *
+ * The second parameter is the size of a breakdown criterion. It is difficult
+ * to find a general good criterion, so if things do not work for you, try to
+ * change this value.
+ *
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The solve() function of this class uses the mechanism described in the
+ * Solver base class to determine convergence. This mechanism can also be used
+ * to observe the progress of the iteration.
+ *
+ */
+template <typename VectorType = Vector<double> >
+class SolverBicgstab : public Solver<VectorType>
+{
+public:
+  /**
+   * There are two possibilities to compute the residual: one is an estimate
+   * using the computed value @p tau. The other is exact computation using
+   * another matrix vector multiplication. This increases the costs of the
+   * algorithm, so it is should be set to false whenever the problem allows
+   * it.
+   *
+   * Bicgstab is susceptible to breakdowns, so we need a parameter telling us,
+   * which numbers are considered zero.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Constructor.
+     *
+     * The default is to perform an exact residual computation and breakdown
+     * parameter 1e-10.
+     */
+    explicit
+    AdditionalData(const bool   exact_residual = true,
+                   const double breakdown      = 1.e-10)
+      : exact_residual(exact_residual),
+        breakdown(breakdown)
+    {}
+    /**
+     * Flag for exact computation of residual.
+     */
+    bool exact_residual;
+    /**
+     * Breakdown threshold.
+     */
+    double breakdown;
+  };
+
+  /**
+   * Constructor.
+   */
+  SolverBicgstab (SolverControl            &cn,
+                  VectorMemory<VectorType> &mem,
+                  const AdditionalData     &data=AdditionalData());
+
+  /**
+   * Constructor. Use an object of type GrowingVectorMemory as a default to
+   * allocate memory.
+   */
+  SolverBicgstab (SolverControl        &cn,
+                  const AdditionalData &data=AdditionalData());
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~SolverBicgstab ();
+
+  /**
+   * Solve primal problem only.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void
+  solve (const MatrixType         &A,
+         VectorType               &x,
+         const VectorType         &b,
+         const PreconditionerType &precondition);
+
+protected:
+  /**
+   * Computation of the stopping criterion.
+   */
+  template <typename MatrixType>
+  double criterion (const MatrixType &A, const VectorType &x, const VectorType &b);
+
+  /**
+   * Interface for derived class.  This function gets the current iteration
+   * vector, the residual and the update vector in each step. It can be used
+   * for a graphical output of the convergence history.
+   */
+  virtual void print_vectors(const unsigned int step,
+                             const VectorType   &x,
+                             const VectorType   &r,
+                             const VectorType   &d) const;
+
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vx;
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vr;
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vrbar;
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vp;
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vy;
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vz;
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vt;
+  /**
+   * Auxiliary vector.
+   */
+  VectorType *Vv;
+  /**
+   * Right hand side vector.
+   */
+  const VectorType *Vb;
+
+  /**
+   * Auxiliary value.
+   */
+  double alpha;
+  /**
+   * Auxiliary value.
+   */
+  double beta;
+  /**
+   * Auxiliary value.
+   */
+  double omega;
+  /**
+   * Auxiliary value.
+   */
+  double rho;
+  /**
+   * Auxiliary value.
+   */
+  double rhobar;
+
+  /**
+   * Current iteration step.
+   */
+  unsigned int step;
+
+  /**
+   * Residual.
+   */
+  double res;
+
+  /**
+   * Additional parameters.
+   */
+  AdditionalData additional_data;
+
+private:
+  /**
+   * Everything before the iteration loop.
+   */
+  template <typename MatrixType>
+  SolverControl::State start(const MatrixType &A);
+
+  /**
+   * A structure returned by the iterate() function representing what it found
+   * is happening during the iteration.
+   */
+  struct IterationResult
+  {
+    bool                 breakdown;
+    SolverControl::State state;
+    unsigned int         last_step;
+    double               last_residual;
+
+    IterationResult (const bool breakdown,
+                     const SolverControl::State state,
+                     const unsigned int         last_step,
+                     const double               last_residual);
+  };
+
+  /**
+   * The iteration loop itself. The function returns a structure indicating
+   * what happened in this function.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  IterationResult
+  iterate(const MatrixType         &A,
+          const PreconditionerType &precondition);
+};
+
+/*@}*/
+/*-------------------------Inline functions -------------------------------*/
+
+#ifndef DOXYGEN
+
+
+template<typename VectorType>
+SolverBicgstab<VectorType>::IterationResult::IterationResult
+(const bool                 breakdown,
+ const SolverControl::State state,
+ const unsigned int         last_step,
+ const double               last_residual)
+  :
+  breakdown (breakdown),
+  state (state),
+  last_step (last_step),
+  last_residual (last_residual)
+{}
+
+
+template<typename VectorType>
+SolverBicgstab<VectorType>::SolverBicgstab (SolverControl            &cn,
+                                            VectorMemory<VectorType> &mem,
+                                            const AdditionalData     &data)
+  :
+  Solver<VectorType>(cn,mem),
+  additional_data(data)
+{}
+
+
+
+template<typename VectorType>
+SolverBicgstab<VectorType>::SolverBicgstab (SolverControl        &cn,
+                                            const AdditionalData &data)
+  :
+  Solver<VectorType>(cn),
+  additional_data(data)
+{}
+
+
+
+template<typename VectorType>
+SolverBicgstab<VectorType>::~SolverBicgstab ()
+{}
+
+
+
+template <typename VectorType>
+template <typename MatrixType>
+double
+SolverBicgstab<VectorType>::criterion (const MatrixType &A,
+                                       const VectorType &x,
+                                       const VectorType &b)
+{
+  A.vmult(*Vt, x);
+  Vt->add(-1.,b);
+  res = Vt->l2_norm();
+
+  return res;
+}
+
+
+
+template <typename VectorType >
+template <typename MatrixType>
+SolverControl::State
+SolverBicgstab<VectorType>::start (const MatrixType &A)
+{
+  A.vmult(*Vr, *Vx);
+  Vr->sadd(-1.,1.,*Vb);
+  res = Vr->l2_norm();
+
+  return this->iteration_status(step, res, *Vx);
+}
+
+
+
+template<typename VectorType>
+void
+SolverBicgstab<VectorType>::print_vectors(const unsigned int,
+                                          const VectorType &,
+                                          const VectorType &,
+                                          const VectorType &) const
+{}
+
+
+
+template<typename VectorType>
+template<typename MatrixType, typename PreconditionerType>
+typename SolverBicgstab<VectorType>::IterationResult
+SolverBicgstab<VectorType>::iterate(const MatrixType         &A,
+                                    const PreconditionerType &precondition)
+{
+//TODO:[GK] Implement "use the length of the computed orthogonal residual" in the BiCGStab method.
+  SolverControl::State state = SolverControl::iterate;
+  alpha = omega = rho = 1.;
+
+  VectorType &r = *Vr;
+  VectorType &rbar = *Vrbar;
+  VectorType &p = *Vp;
+  VectorType &y = *Vy;
+  VectorType &z = *Vz;
+  VectorType &t = *Vt;
+  VectorType &v = *Vv;
+
+  rbar = r;
+  bool startup = true;
+
+  do
+    {
+      ++step;
+
+      rhobar = r*rbar;
+      beta   = rhobar * alpha / (rho * omega);
+      rho    = rhobar;
+      if (startup == true)
+        {
+          p = r;
+          startup = false;
+        }
+      else
+        {
+          p.sadd(beta, 1., r);
+          p.add(-beta*omega, v);
+        }
+
+      precondition.vmult(y,p);
+      A.vmult(v,y);
+      rhobar = rbar * v;
+
+      alpha = rho/rhobar;
+
+//TODO:[?] Find better breakdown criterion
+
+      if (std::fabs(alpha) > 1.e10)
+        return IterationResult(true, state, step, res);
+
+      res = std::sqrt(r.add_and_dot(-alpha, v, r));
+
+      // check for early success, see the lac/bicgstab_early testcase as to
+      // why this is necessary
+      //
+      // note: the vector *Vx we pass to the iteration_status signal here is only
+      // the current approximation, not the one we will return with,
+      // which will be x=*Vx + alpha*y
+      if (this->iteration_status(step, res, *Vx) == SolverControl::success)
+        {
+          Vx->add(alpha, y);
+          print_vectors(step, *Vx, r, y);
+          return IterationResult(false, SolverControl::success, step, res);
+        }
+
+      precondition.vmult(z,r);
+      A.vmult(t,z);
+      rhobar = t*r;
+      omega = rhobar/(t*t);
+      Vx->add(alpha, y, omega, z);
+
+      if (additional_data.exact_residual)
+        {
+          r.add(-omega, t);
+          res = criterion(A, *Vx, *Vb);
+        }
+      else
+        res = std::sqrt(r.add_and_dot(-omega, t, r));
+
+      state = this->iteration_status(step, res, *Vx);
+      print_vectors(step, *Vx, r, y);
+    }
+  while (state == SolverControl::iterate);
+  return IterationResult(false, state, step, res);
+}
+
+
+template<typename VectorType>
+template<typename MatrixType, typename PreconditionerType>
+void
+SolverBicgstab<VectorType>::solve(const MatrixType         &A,
+                                  VectorType               &x,
+                                  const VectorType         &b,
+                                  const PreconditionerType &precondition)
+{
+  deallog.push("Bicgstab");
+  Vr    = this->memory.alloc();
+  Vr->reinit(x, true);
+  Vrbar = this->memory.alloc();
+  Vrbar->reinit(x, true);
+  Vp    = this->memory.alloc();
+  Vp->reinit(x, true);
+  Vy    = this->memory.alloc();
+  Vy->reinit(x, true);
+  Vz    = this->memory.alloc();
+  Vz->reinit(x, true);
+  Vt    = this->memory.alloc();
+  Vt->reinit(x, true);
+  Vv    = this->memory.alloc();
+  Vv->reinit(x, true);
+
+  Vx = &x;
+  Vb = &b;
+
+  step = 0;
+
+  IterationResult state(false,SolverControl::failure,0,0);
+
+  // iterate while the inner iteration returns a breakdown
+  do
+    {
+      if (step != 0)
+        deallog << "Restart step " << step << std::endl;
+      if (start(A) == SolverControl::success)
+        {
+          state.state = SolverControl::success;
+          break;
+        }
+      state = iterate(A, precondition);
+    }
+  while (state.breakdown == true);
+
+  this->memory.free(Vr);
+  this->memory.free(Vrbar);
+  this->memory.free(Vp);
+  this->memory.free(Vy);
+  this->memory.free(Vz);
+  this->memory.free(Vt);
+  this->memory.free(Vv);
+
+  deallog.pop();
+
+  // in case of failure: throw exception
+  AssertThrow(state.state == SolverControl::success,
+              SolverControl::NoConvergence (state.last_step,
+                                            state.last_residual));
+  // otherwise exit as normal
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_cg.h b/include/deal.II/lac/solver_cg.h
new file mode 100644
index 0000000..2cfaeb0
--- /dev/null
+++ b/include/deal.II/lac/solver_cg.h
@@ -0,0 +1,662 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_cg_h
+#define dealii__solver_cg_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/tridiagonal_matrix.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/subscriptor.h>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declaration
+class PreconditionIdentity;
+
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Preconditioned cg method for symmetric positive definite matrices. This
+ * class is used first in step-3 and step-4, but is used in many other
+ * tutorial programs as well. Like all other solver classes, it can work on
+ * any kind of vector and matrix as long as they satisfy certain requirements
+ * (for the requirements on matrices and vectors in order to work with this
+ * class, see the documentation of the Solver base class). The type of the
+ * solution vector must be passed as template argument, and defaults to
+ * dealii::Vector<double>.
+ *
+ * Like all other solver classes, this class has a local structure called @p
+ * AdditionalData which is used to pass additional parameters to the solver.
+ * For this class, there is (among other things) a switch allowing for
+ * additional output for the computation of eigenvalues of the matrix.
+ *
+ * @note This version of CG is taken from D. Braess's book "Finite Elements".
+ * It requires a symmetric preconditioner (i.e., for example, SOR is not a
+ * possible choice).
+ *
+ *
+ * <h3>Eigenvalue computation</h3>
+ *
+ * The cg-method performs an orthogonal projection of the original
+ * preconditioned linear system to another system of smaller dimension.
+ * Furthermore, the projected matrix @p T is tri-diagonal. Since the
+ * projection is orthogonal, the eigenvalues of @p T approximate those of the
+ * original preconditioned matrix @p PA. In fact, after @p n steps, where @p n
+ * is the dimension of the original system, the eigenvalues of both matrices
+ * are equal. But, even for small numbers of iteration steps, the condition
+ * number of @p T is a good estimate for the one of @p PA.
+ *
+ * After @p m steps the matrix T_m can be written in terms of the coefficients
+ * @p alpha and @p beta as the tri-diagonal matrix with diagonal elements
+ * <tt>1/alpha_0</tt>, <tt>1/alpha_1 + beta_0/alpha_0</tt>, ...,
+ * <tt>1/alpha_{m-1</tt>+beta_{m-2}/alpha_{m-2}} and off-diagonal elements
+ * <tt>sqrt(beta_0)/alpha_0</tt>, ..., <tt>sqrt(beta_{m-2</tt>)/alpha_{m-2}}.
+ * The eigenvalues of this matrix can be computed by postprocessing.
+ *
+ * @see Y. Saad: "Iterative methods for Sparse Linear Systems", section 6.7.3
+ * for details.
+ *
+ * The coefficients, eigenvalues and condition number (computed as the ratio
+ * of the largest over smallest eigenvalue) can be obtained by connecting a
+ * function as a slot to the solver using one of the functions @p
+ * connect_coefficients_slot, @p connect_eigenvalues_slot and @p
+ * connect_condition_number_slot. These slots will then be called from the
+ * solver with the estimates as argument.
+ *
+ * @deprecated Alternatively these estimates can be written to deallog by
+ * setting flags in @p AdditionalData.
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The solve() function of this class uses the mechanism described in the
+ * Solver base class to determine convergence. This mechanism can also be used
+ * to observe the progress of the iteration.
+ *
+ *
+ * @author W. Bangerth, G. Kanschat, R. Becker and F.-T. Suttmeier
+ */
+template <typename VectorType = Vector<double> >
+class SolverCG : public Solver<VectorType>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Standardized data struct to pipe additional data to the solver.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Write coefficients alpha and beta to the log file for later use in
+     * eigenvalue estimates.
+     */
+    bool log_coefficients;
+
+    /**
+     * Compute the condition number of the projected matrix.
+     *
+     * @note Requires LAPACK support.
+     */
+    bool compute_condition_number;
+
+    /**
+     * Compute the condition number of the projected matrix in each step.
+     *
+     * @note Requires LAPACK support.
+     */
+    bool compute_all_condition_numbers;
+
+    /**
+     * Compute all eigenvalues of the projected matrix.
+     *
+     * @note Requires LAPACK support.
+     */
+    bool compute_eigenvalues;
+
+    /**
+     * Constructor. Initialize data fields.  Confer the description of those.
+     * @deprecated Instead use: connect_coefficients_slot,
+     * connect_condition_number_slot, and connect_eigenvalues_slot.
+     */
+    explicit
+    AdditionalData (const bool log_coefficients,
+                    const bool compute_condition_number = false,
+                    const bool compute_all_condition_numbers = false,
+                    const bool compute_eigenvalues = false) DEAL_II_DEPRECATED;
+
+    /**
+     * Constructor. Initializes all data fields to false.
+     */
+    AdditionalData();
+  };
+
+  /**
+   * Constructor.
+   */
+  SolverCG (SolverControl            &cn,
+            VectorMemory<VectorType> &mem,
+            const AdditionalData     &data = AdditionalData());
+
+  /**
+   * Constructor. Use an object of type GrowingVectorMemory as a default to
+   * allocate memory.
+   */
+  SolverCG (SolverControl        &cn,
+            const AdditionalData &data=AdditionalData());
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~SolverCG ();
+
+  /**
+   * Solve the linear system $Ax=b$ for x.
+   */
+  template <typename MatrixType, typename PreconditionerType>
+  void
+  solve (const MatrixType         &A,
+         VectorType               &x,
+         const VectorType         &b,
+         const PreconditionerType &precondition);
+
+  /**
+   * Connect a slot to retrieve the CG coefficients. The slot will be called
+   * with alpha as the first argument and with beta as the second argument,
+   * where alpha and beta follow the notation in Y. Saad: "Iterative methods
+   * for Sparse Linear Systems", section 6.7. Called once per iteration
+   */
+  boost::signals2::connection
+  connect_coefficients_slot(
+    const std_cxx11::function<void (double,double)> &slot);
+
+  /**
+   * Connect a slot to retrieve the estimated condition number. Called on each
+   * iteration if every_iteration=true, otherwise called once when iterations
+   * are ended (i.e., either because convergence has been achieved, or because
+   * divergence has been detected).
+   */
+  boost::signals2::connection
+  connect_condition_number_slot(const std_cxx11::function<void (double)> &slot,
+                                const bool every_iteration=false);
+
+  /**
+   * Connect a slot to retrieve the estimated eigenvalues. Called on each
+   * iteration if every_iteration=true, otherwise called once when iterations
+   * are ended (i.e., either because convergence has been achieved, or because
+   * divergence has been detected).
+   */
+  boost::signals2::connection
+  connect_eigenvalues_slot(
+    const std_cxx11::function<void (const std::vector<double> &)> &slot,
+    const bool every_iteration=false);
+
+protected:
+  /**
+   * Implementation of the computation of the norm of the residual. This can
+   * be replaced by a more problem oriented functional in a derived class.
+   */
+  virtual double criterion();
+
+  /**
+   * Interface for derived class. This function gets the current iteration
+   * vector, the residual and the update vector in each step. It can be used
+   * for a graphical output of the convergence history.
+   */
+  virtual void print_vectors(const unsigned int step,
+                             const VectorType   &x,
+                             const VectorType   &r,
+                             const VectorType   &d) const;
+
+  /**
+   * Estimates the eigenvalues from diagonal and offdiagonal. Uses these
+   * estimate to compute the condition number. Calls the signals
+   * eigenvalues_signal and cond_signal with these estimates as arguments.
+   * Outputs the eigenvalues/condition-number to deallog if
+   * log_eigenvalues/log_cond is true.
+   */
+  static void
+  compute_eigs_and_cond(
+    const std::vector<double> &diagonal,
+    const std::vector<double> &offdiagonal,
+    const boost::signals2::signal<void (const std::vector<double> &)> &eigenvalues_signal,
+    const boost::signals2::signal<void (double)> &cond_signal,
+    const bool log_eigenvalues,
+    const bool log_cond);
+
+  /**
+   * Temporary vectors, allocated through the @p VectorMemory object at the
+   * start of the actual solution process and deallocated at the end.
+   */
+  VectorType *Vr;
+  VectorType *Vp;
+  VectorType *Vz;
+
+  /**
+   * Within the iteration loop, the square of the residual vector is stored in
+   * this variable. The function @p criterion uses this variable to compute
+   * the convergence value, which in this class is the norm of the residual
+   * vector and thus the square root of the @p res2 value.
+   */
+  double res2;
+
+  /**
+   * Additional parameters.
+   */
+  AdditionalData additional_data;
+
+  /**
+   * Signal used to retrieve the CG coefficients. Called on each iteration.
+   */
+  boost::signals2::signal<void (double,double)> coefficients_signal;
+
+  /**
+   * Signal used to retrieve the estimated condition number. Called once when
+   * all iterations are ended.
+   */
+  boost::signals2::signal<void (double)> condition_number_signal;
+
+  /**
+   * Signal used to retrieve the estimated condition numbers. Called on each
+   * iteration.
+   */
+  boost::signals2::signal<void (double)> all_condition_numbers_signal;
+
+  /**
+   * Signal used to retrieve the estimated eigenvalues. Called once when all
+   * iterations are ended.
+   */
+  boost::signals2::signal<void (const std::vector<double> &)> eigenvalues_signal;
+
+  /**
+   * Signal used to retrieve the estimated eigenvalues. Called on each
+   * iteration.
+   */
+  boost::signals2::signal<void (const std::vector<double> &)> all_eigenvalues_signal;
+
+private:
+  void cleanup();
+};
+
+/*@}*/
+
+/*------------------------- Implementation ----------------------------*/
+
+#ifndef DOXYGEN
+
+template <typename VectorType>
+inline
+SolverCG<VectorType>::AdditionalData::
+AdditionalData (const bool log_coefficients,
+                const bool compute_condition_number,
+                const bool compute_all_condition_numbers,
+                const bool compute_eigenvalues)
+  :
+  log_coefficients (log_coefficients),
+  compute_condition_number(compute_condition_number),
+  compute_all_condition_numbers(compute_all_condition_numbers),
+  compute_eigenvalues(compute_eigenvalues)
+{}
+
+
+
+template <typename VectorType>
+inline
+SolverCG<VectorType>::AdditionalData::
+AdditionalData ()
+  :
+  log_coefficients (false),
+  compute_condition_number(false),
+  compute_all_condition_numbers(false),
+  compute_eigenvalues(false)
+{}
+
+
+
+template <typename VectorType>
+SolverCG<VectorType>::SolverCG (SolverControl        &cn,
+                                VectorMemory<VectorType> &mem,
+                                const AdditionalData     &data)
+  :
+  Solver<VectorType>(cn,mem),
+  additional_data(data)
+{}
+
+
+
+template <typename VectorType>
+SolverCG<VectorType>::SolverCG (SolverControl        &cn,
+                                const AdditionalData &data)
+  :
+  Solver<VectorType>(cn),
+  additional_data(data)
+{}
+
+
+
+template <typename VectorType>
+SolverCG<VectorType>::~SolverCG ()
+{}
+
+
+
+template <typename VectorType>
+double
+SolverCG<VectorType>::criterion()
+{
+  return std::sqrt(res2);
+}
+
+
+
+template <typename VectorType>
+void
+SolverCG<VectorType>::cleanup()
+{
+  this->memory.free(Vr);
+  this->memory.free(Vp);
+  this->memory.free(Vz);
+  deallog.pop();
+}
+
+
+
+template <typename VectorType>
+void
+SolverCG<VectorType>::print_vectors(const unsigned int,
+                                    const VectorType &,
+                                    const VectorType &,
+                                    const VectorType &) const
+{}
+
+
+
+template <typename VectorType>
+inline void
+SolverCG<VectorType>::compute_eigs_and_cond
+(const std::vector<double> &diagonal,
+ const std::vector<double> &offdiagonal,
+ const boost::signals2::signal<void (const std::vector<double> &)> &eigenvalues_signal,
+ const boost::signals2::signal<void (double)>                      &cond_signal,
+ const bool                log_eigenvalues,
+ const bool                log_cond)
+{
+  //Avoid computing eigenvalues unless they are needed.
+  if (!cond_signal.empty()|| !eigenvalues_signal.empty()  || log_cond ||
+      log_eigenvalues)
+    {
+      TridiagonalMatrix<double> T(diagonal.size(), true);
+      for (size_type i=0; i<diagonal.size(); ++i)
+        {
+          T(i,i) = diagonal[i];
+          if (i< diagonal.size()-1)
+            T(i,i+1) = offdiagonal[i];
+        }
+      T.compute_eigenvalues();
+      //Need two eigenvalues to estimate the condition number.
+      if (diagonal.size()>1)
+        {
+          double condition_number=T.eigenvalue(T.n()-1)/T.eigenvalue(0);
+          cond_signal(condition_number);
+          //Send to deallog
+          if (log_cond)
+            {
+              deallog << "Condition number estimate: " <<
+                      condition_number << std::endl;
+            }
+        }
+      //Avoid copying the eigenvalues of T to a vector unless a signal is
+      //connected.
+      if (!eigenvalues_signal.empty())
+        {
+          std::vector<double> eigenvalues(T.n());
+          for (unsigned int j = 0; j < T.n(); ++j)
+            {
+              eigenvalues.at(j)=T.eigenvalue(j);
+            }
+          eigenvalues_signal(eigenvalues);
+        }
+      if (log_eigenvalues)
+        {
+          for (size_type i=0; i<T.n(); ++i)
+            deallog << ' ' << T.eigenvalue(i);
+          deallog << std::endl;
+        }
+    }
+
+}
+
+
+
+template <typename VectorType>
+template <typename MatrixType, typename PreconditionerType>
+void
+SolverCG<VectorType>::solve (const MatrixType         &A,
+                             VectorType               &x,
+                             const VectorType         &b,
+                             const PreconditionerType &precondition)
+{
+  SolverControl::State conv=SolverControl::iterate;
+
+  deallog.push("cg");
+
+  // Memory allocation
+  Vr = this->memory.alloc();
+  Vz = this->memory.alloc();
+  Vp = this->memory.alloc();
+  // Should we build the matrix for
+  // eigenvalue computations?
+  const bool do_eigenvalues = !condition_number_signal.empty()
+                              |!all_condition_numbers_signal.empty()
+                              |!eigenvalues_signal.empty()
+                              |!all_eigenvalues_signal.empty()
+                              | additional_data.compute_condition_number
+                              | additional_data.compute_all_condition_numbers
+                              | additional_data.compute_eigenvalues;
+  double eigen_beta_alpha = 0;
+
+  // vectors used for eigenvalue
+  // computations
+  std::vector<double> diagonal;
+  std::vector<double> offdiagonal;
+
+  int  it=0;
+  double res = -std::numeric_limits<double>::max();
+
+  try
+    {
+      // define some aliases for simpler access
+      VectorType &g = *Vr;
+      VectorType &d = *Vz;
+      VectorType &h = *Vp;
+      // resize the vectors, but do not set
+      // the values since they'd be overwritten
+      // soon anyway.
+      g.reinit(x, true);
+      d.reinit(x, true);
+      h.reinit(x, true);
+
+      double gh,alpha,beta;
+
+      // compute residual. if vector is
+      // zero, then short-circuit the
+      // full computation
+      if (!x.all_zero())
+        {
+          A.vmult(g,x);
+          g.add(-1.,b);
+        }
+      else
+        g.equ(-1.,b);
+      res = g.l2_norm();
+
+      conv = this->iteration_status(0, res, x);
+      if (conv != SolverControl::iterate)
+        {
+          cleanup();
+          return;
+        }
+
+      if (types_are_equal<PreconditionerType,PreconditionIdentity>::value == false)
+        {
+          precondition.vmult(h,g);
+
+          d.equ(-1.,h);
+
+          gh = g*h;
+        }
+      else
+        {
+          d.equ(-1.,g);
+          gh = res*res;
+        }
+
+      while (conv == SolverControl::iterate)
+        {
+          it++;
+          A.vmult(h,d);
+
+          alpha = d*h;
+          Assert(alpha != 0., ExcDivideByZero());
+          alpha = gh/alpha;
+
+          x.add(alpha,d);
+          res = std::sqrt(g.add_and_dot(alpha, h, g));
+
+          print_vectors(it, x, g, d);
+
+          conv = this->iteration_status(it, res, x);
+          if (conv != SolverControl::iterate)
+            break;
+
+          if (types_are_equal<PreconditionerType,PreconditionIdentity>::value
+              == false)
+            {
+              precondition.vmult(h,g);
+
+              beta = gh;
+              Assert(beta != 0., ExcDivideByZero());
+              gh   = g*h;
+              beta = gh/beta;
+              d.sadd(beta,-1.,h);
+            }
+          else
+            {
+              beta = gh;
+              gh = res*res;
+              beta = gh/beta;
+              d.sadd(beta,-1.,g);
+            }
+
+          this->coefficients_signal(alpha,beta);
+          if (additional_data.log_coefficients)
+            deallog << "alpha-beta:" << alpha << '\t' << beta << std::endl;
+          // set up the vectors
+          // containing the diagonal
+          // and the off diagonal of
+          // the projected matrix.
+          if (do_eigenvalues)
+            {
+              diagonal.push_back(1./alpha + eigen_beta_alpha);
+              eigen_beta_alpha = beta/alpha;
+              offdiagonal.push_back(std::sqrt(beta)/alpha);
+            }
+          compute_eigs_and_cond(diagonal,offdiagonal,all_eigenvalues_signal,
+                                all_condition_numbers_signal,false,
+                                additional_data.compute_all_condition_numbers);
+        }
+    }
+  catch (...)
+    {
+      cleanup();
+      throw;
+    }
+  compute_eigs_and_cond(diagonal,offdiagonal,eigenvalues_signal,
+                        condition_number_signal,
+                        additional_data.compute_eigenvalues,
+                        (additional_data.compute_condition_number &&
+                         !additional_data.compute_all_condition_numbers));
+
+  // Deallocate Memory
+  cleanup();
+  // in case of failure: throw exception
+  if (conv != SolverControl::success)
+    AssertThrow(false, SolverControl::NoConvergence (it, res));
+  // otherwise exit as normal
+}
+
+
+
+template<typename VectorType>
+boost::signals2::connection
+SolverCG<VectorType>::connect_coefficients_slot
+(const std_cxx11::function<void(double,double)> &slot)
+{
+  return coefficients_signal.connect(slot);
+}
+
+
+
+template<typename VectorType>
+boost::signals2::connection
+SolverCG<VectorType>::connect_condition_number_slot
+(const std_cxx11::function<void(double)> &slot,
+ const bool                              every_iteration)
+{
+  if (every_iteration)
+    {
+      return all_condition_numbers_signal.connect(slot);
+    }
+  else
+    {
+      return condition_number_signal.connect(slot);
+    }
+}
+
+
+
+template<typename VectorType>
+boost::signals2::connection
+SolverCG<VectorType>::connect_eigenvalues_slot
+(const std_cxx11::function<void (const std::vector<double> &)> &slot,
+ const bool                                                    every_iteration)
+{
+  if (every_iteration)
+    {
+      return all_eigenvalues_signal.connect(slot);
+    }
+  else
+    {
+      return eigenvalues_signal.connect(slot);
+    }
+}
+
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_control.h b/include/deal.II/lac/solver_control.h
new file mode 100644
index 0000000..77eb325
--- /dev/null
+++ b/include/deal.II/lac/solver_control.h
@@ -0,0 +1,633 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_control_h
+#define dealii__solver_control_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+class ParameterHandler;
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Control class to determine convergence of iterative solvers.
+ *
+ * Used by iterative methods to determine whether the iteration should be
+ * continued. To this end, the virtual function <tt>check()</tt> is called in
+ * each iteration with the current iteration step and the value indicating
+ * convergence (usually the residual).
+ *
+ * After the iteration has terminated, the functions last_value() and
+ * last_step() can be used to obtain information about the final state of the
+ * iteration.
+ *
+ * check() can be replaced in derived classes to allow for more sophisticated
+ * tests.
+ *
+ *
+ * <h3>State</h3> The return states of the check function are of type #State,
+ * which is an enum local to this class. It indicates the state the solver is
+ * in.
+ *
+ * The possible values of State are
+ * <ul>
+ * <li> <tt>iterate = 0</tt>: continue the iteration.
+ * <li> @p success: the goal is reached, the iterative method can terminate
+ * successfully.
+ * <li> @p failure: the iterative method should stop because convergence could
+ * not be achieved or at least was not achieved within the given maximal
+ * number of iterations.
+ * </ul>
+ *
+ * @author Guido Kanschat
+ */
+class SolverControl : public Subscriptor
+{
+public:
+
+  /**
+   * Enum denoting the different states a solver can be in. See the general
+   * documentation of this class for more information.
+   */
+  enum State
+  {
+    /// Continue iteration
+    iterate = 0,
+    /// Stop iteration, goal reached
+    success,
+    /// Stop iteration, goal not reached
+    failure
+  };
+
+
+
+  /**
+   * Class to be thrown upon failing convergence of an iterative solver, when
+   * either the number of iterations exceeds the limit or the residual fails
+   * to reach the desired limit, e.g. in the case of a break-down.
+   *
+   * The residual in the last iteration, as well as the iteration number of
+   * the last step are stored in this object and can be recovered upon
+   * catching an exception of this class.
+   */
+
+  class NoConvergence : public dealii::ExceptionBase
+  {
+  public:
+    NoConvergence (const unsigned int last_step,
+                   const double       last_residual)
+      : last_step (last_step), last_residual(last_residual)
+    {}
+
+    virtual ~NoConvergence () throw () {}
+
+    virtual void print_info (std::ostream &out) const
+    {
+      out << "Iterative method reported convergence failure in step "
+          << last_step << ". The residual in the last step was " << last_residual
+          << ".\n\n"
+          << "This error message can indicate that you have simply not allowed "
+          << "a sufficiently large number of iterations for your iterative solver "
+          << "to converge. This often happens when you increase the size of your "
+          << "problem. In such cases, the last residual will likely still be very "
+          << "small, and you can make the error go away by increasing the allowed "
+          << "number of iterations when setting up the SolverControl object that "
+          << "determines the maximal number of iterations you allow."
+          << "\n\n"
+          << "The other situation where this error may occur is when your matrix "
+          << "is not invertible (e.g., your matrix has a null-space), or if you "
+          << "try to apply the wrong solver to a matrix (e.g., using CG for a "
+          << "matrix that is not symmetric or not positive definite). In these "
+          << "cases, the residual in the last iteration is likely going to be large."
+          << std::endl;
+    }
+
+    /**
+     * Iteration number of the last step.
+     */
+    const unsigned int last_step;
+
+    /**
+     * Residual in the last step.
+     */
+    const double       last_residual;
+  };
+
+
+
+  /**
+   * Constructor. The parameters @p n and @p tol are the maximum number of
+   * iteration steps before failure and the tolerance to determine success of
+   * the iteration.
+   *
+   * @p log_history specifies whether the history (i.e. the value to be
+   * checked and the number of the iteration step) shall be printed to @p
+   * deallog stream.  Default is: do not print. Similarly, @p log_result
+   * specifies the whether the final result is logged to @p deallog. Default
+   * is yes.
+   */
+  SolverControl (const unsigned int n           = 100,
+                 const double       tol         = 1.e-10,
+                 const bool         log_history = false,
+                 const bool         log_result  = true);
+
+  /**
+   * Virtual destructor is needed as there are virtual functions in this
+   * class.
+   */
+  virtual ~SolverControl();
+
+  /**
+   * Interface to parameter file.
+   */
+  static void declare_parameters (ParameterHandler &param);
+
+  /**
+   * Read parameters from file.
+   */
+  void parse_parameters (ParameterHandler &param);
+
+  /**
+   * Decide about success or failure of an iteration.  This function gets the
+   * current iteration step to determine, whether the allowed number of steps
+   * has been exceeded and returns @p failure in this case. If @p check_value
+   * is below the prescribed tolerance, it returns @p success. In all other
+   * cases @p iterate is returned to suggest continuation of the iterative
+   * procedure.
+   *
+   * The iteration is also aborted if the residual becomes a denormalized
+   * value (@p NaN). Note, however, that this check is only performed if the
+   * @p isnan function is provided by the operating system, which is not
+   * always true. CMake checks this with the 'check_01_cxx_features.cmake'
+   * test and sets the flag @p DEAL_II_HAVE_ISNAN in the include file
+   * <tt>deal.II/base/config.h</tt> if this function was found.
+   *
+   * <tt>check()</tt> additionally preserves @p step and @p check_value. These
+   * values are accessible by <tt>last_value()</tt> and <tt>last_step()</tt>.
+   *
+   * Derived classes may overload this function, e.g. to log the convergence
+   * indicators (@p check_value) or to do other computations.
+   */
+  virtual State check (const unsigned int step,
+                       const double   check_value);
+
+  /**
+   * Return the result of the last check operation.
+   */
+  State last_check() const;
+
+  /**
+   * Return the initial convergence criterion.
+   */
+  double initial_value() const;
+
+  /**
+   * Return the convergence value of last iteration step for which @p check
+   * was called by the solver.
+   */
+  double last_value() const;
+
+  /**
+   * Number of last iteration step.
+   */
+  unsigned int last_step() const;
+
+  /**
+   * Maximum number of steps.
+   */
+  unsigned int max_steps () const;
+
+  /**
+   * Change maximum number of steps.
+   */
+  unsigned int set_max_steps (const unsigned int);
+
+  /**
+   * Enables the failure check. Solving is stopped with @p ReturnState @p
+   * failure if <tt>residual>failure_residual</tt> with
+   * <tt>failure_residual:=rel_failure_residual*first_residual</tt>.
+   */
+  void set_failure_criterion (const double rel_failure_residual);
+
+  /**
+   * Disables failure check and resets @p relative_failure_residual and @p
+   * failure_residual to zero.
+   */
+  void clear_failure_criterion ();
+
+  /**
+   * Tolerance.
+   */
+  double tolerance () const;
+
+  /**
+   * Change tolerance.
+   */
+  double set_tolerance (const double);
+
+  /**
+   * Enables writing residuals of each step into a vector for later analysis.
+   */
+  void enable_history_data();
+
+  /**
+   * Average error reduction over all steps.
+   *
+   * Requires enable_history_data()
+   */
+  double average_reduction() const;
+  /**
+   * Error reduction of the last step; for stationary iterations, this
+   * approximates the norm of the iteration matrix.
+   *
+   * Requires enable_history_data()
+   */
+  double final_reduction() const;
+
+  /**
+   * Error reduction of any iteration step.
+   *
+   * Requires enable_history_data()
+   */
+  double step_reduction(unsigned int step) const;
+
+  /**
+   * Log each iteration step. Use @p log_frequency for skipping steps.
+   */
+  void log_history (const bool);
+
+  /**
+   * Returns the @p log_history flag.
+   */
+  bool log_history () const;
+
+  /**
+   * Set logging frequency.
+   */
+  unsigned int log_frequency (unsigned int);
+
+  /**
+   * Log start and end step.
+   */
+  void log_result (const bool);
+
+  /**
+   * Returns the @p log_result flag.
+   */
+  bool log_result () const;
+
+  /**
+   * This exception is thrown if a function operating on the vector of history
+   * data of a SolverControl object id called, but storage of history data was
+   * not enabled by enable_history_data().
+   */
+  DeclException0(ExcHistoryDataRequired);
+
+protected:
+  /**
+   * Maximum number of steps.
+   */
+  unsigned int maxsteps;
+
+  /**
+   * Prescribed tolerance to be achieved.
+   */
+  double       tol;
+
+  /**
+   * Result of last check operation.
+   */
+  State        lcheck;
+
+  /**
+   * Initial value.
+   */
+  double       initial_val;
+
+  /**
+   * Last value of the convergence criterion.
+   */
+  double       lvalue;
+
+  /**
+   * Last step.
+   */
+  unsigned int lstep;
+
+  /**
+   * Is set to @p true by @p set_failure_criterion and enables failure
+   * checking.
+   */
+  bool         check_failure;
+
+  /**
+   * Stores the @p rel_failure_residual set by @p set_failure_criterion
+   */
+  double       relative_failure_residual;
+
+  /**
+   * @p failure_residual equals the first residual multiplied by @p
+   * relative_crit set by @p set_failure_criterion (see there).
+   *
+   * Until the first residual is known it is 0.
+   */
+  double       failure_residual;
+
+  /**
+   * Log convergence history to @p deallog.
+   */
+  bool         m_log_history;
+
+  /**
+   * Log only every nth step.
+   */
+  unsigned int m_log_frequency;
+
+  /**
+   * Log iteration result to @p deallog.  If true, after finishing the
+   * iteration, a statement about failure or success together with @p lstep
+   * and @p lvalue are logged.
+   */
+  bool         m_log_result;
+
+  /**
+   * Control over the storage of history data. Set by enable_history_data().
+   */
+  bool         history_data_enabled;
+
+  /**
+   * Vector storing the result after each iteration step for later statistical
+   * analysis.
+   *
+   * Use of this vector is enabled by enable_history_data().
+   */
+  std::vector<double> history_data;
+};
+
+
+/**
+ * Specialization of @p SolverControl which returns @p success if either the
+ * specified tolerance is achieved or if the initial residual (or whatever
+ * criterion was chosen by the solver class) is reduced by a given factor.
+ * This is useful in cases where you don't want to solve exactly, but rather
+ * want to gain two digits or if the maximal number of iterations is achieved.
+ * For example: The maximal number of iterations is 20, the reduction factor
+ * is 1% and the tolerance is 0.1%. The initial residual is 2.5. The process
+ * will break if 20 iteration are completed or the new residual is less then
+ * 2.5*1% or if it is less then 0.1%.
+ *
+ * @author Guido Kanschat
+ */
+class ReductionControl : public SolverControl
+{
+public:
+  /**
+   * Constructor.  Provide the reduction factor in addition to arguments that
+   * have the same meaning as those of the constructor of the SolverControl
+   * constructor.
+   */
+  ReductionControl (const unsigned int maxiter = 100,
+                    const double   tolerance   = 1.e-10,
+                    const double   reduce      = 1.e-2,
+                    const bool     log_history = false,
+                    const bool     log_result  = true);
+
+  /**
+   * Initialize with a SolverControl object. The result will emulate
+   * SolverControl by setting @p reduce to zero.
+   */
+  ReductionControl (const SolverControl &c);
+
+  /**
+   * Assign a SolverControl object to ReductionControl. The result of the
+   * assignment will emulate SolverControl by setting @p reduce to zero.
+   */
+  ReductionControl &operator= (const SolverControl &c);
+
+  /**
+   * Virtual destructor is needed as there are virtual functions in this
+   * class.
+   */
+  virtual ~ReductionControl();
+
+  /**
+   * Interface to parameter file.
+   */
+  static void declare_parameters (ParameterHandler &param);
+
+  /**
+   * Read parameters from file.
+   */
+  void parse_parameters (ParameterHandler &param);
+
+  /**
+   * Decide about success or failure of an iteration.  This function calls the
+   * one in the base class, but sets the tolerance to <tt>reduction * initial
+   * value</tt> upon the first iteration.
+   */
+  virtual State check (const unsigned int step,
+                       const double   check_value);
+
+  /**
+   * Reduction factor.
+   */
+  double reduction () const;
+
+  /**
+   * Change reduction factor.
+   */
+  double set_reduction (const double);
+
+protected:
+  /**
+   * Desired reduction factor.
+   */
+  double reduce;
+
+  /**
+   * Reduced tolerance. Stop iterations if either this value is achieved or if
+   * the base class indicates success.
+   */
+  double reduced_tol;
+};
+
+/**
+ * Specialization of @p SolverControl which returns @p success if a given
+ * number of iteration was performed, irrespective of the actual residual.
+ * This is useful in cases where you don't want to solve exactly, but rather
+ * want to perform a fixed number of iterations, e.g. in an inner solver. The
+ * arguments given to this class are exactly the same as for the SolverControl
+ * class and the solver terminates similarly when one of the given tolerance
+ * or the maximum iteration count were reached. The only difference to
+ * SolverControl is that the solver returns success in the latter case.
+ *
+ * @author Martin Kronbichler
+ */
+class IterationNumberControl : public SolverControl
+{
+public:
+  /**
+   * Constructor.  Provide exactly the same arguments as the constructor of
+   * the SolverControl class.
+   */
+  IterationNumberControl (const unsigned int maxiter = 100,
+                          const double       tolerance = 1e-12,
+                          const bool     log_history = false,
+                          const bool     log_result  = true);
+
+  /**
+   * Initialize with a SolverControl object. The result will emulate
+   * SolverControl by setting the reduction target to zero.
+   */
+  IterationNumberControl (const SolverControl &c);
+
+  /**
+   * Assign a SolverControl object to ReductionControl. The result of the
+   * assignment will emulate SolverControl by setting the reduction target to
+   * zero.
+   */
+  IterationNumberControl &operator= (const SolverControl &c);
+
+  /**
+   * Virtual destructor is needed as there are virtual functions in this
+   * class.
+   */
+  virtual ~IterationNumberControl();
+
+  /**
+   * Decide about success or failure of an iteration. This function bases
+   * success solely on the fact if a given number of iterations was reached or
+   * the check value reached exactly zero.
+   */
+  virtual State check (const unsigned int step,
+                       const double   check_value);
+};
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+#ifndef DOXYGEN
+
+inline unsigned int
+SolverControl::max_steps () const
+{
+  return maxsteps;
+}
+
+
+
+inline unsigned int
+SolverControl::set_max_steps (const unsigned int newval)
+{
+  unsigned int old = maxsteps;
+  maxsteps = newval;
+  return old;
+}
+
+
+
+inline void
+SolverControl::set_failure_criterion (const double rel_failure_residual)
+{
+  relative_failure_residual=rel_failure_residual;
+  check_failure=true;
+}
+
+
+
+inline void
+SolverControl::clear_failure_criterion ()
+{
+  relative_failure_residual=0;
+  failure_residual=0;
+  check_failure=false;
+}
+
+
+
+inline double
+SolverControl::tolerance () const
+{
+  return tol;
+}
+
+
+
+inline double
+SolverControl::set_tolerance (const double t)
+{
+  double old = tol;
+  tol = t;
+  return old;
+}
+
+
+inline void
+SolverControl::log_history (const bool newval)
+{
+  m_log_history = newval;
+}
+
+
+
+inline bool
+SolverControl::log_history () const
+{
+  return m_log_history;
+}
+
+
+inline void
+SolverControl::log_result (const bool newval)
+{
+  m_log_result = newval;
+}
+
+
+inline bool
+SolverControl::log_result () const
+{
+  return m_log_result;
+}
+
+
+inline double
+ReductionControl::reduction () const
+{
+  return reduce;
+}
+
+
+inline double
+ReductionControl::set_reduction (const double t)
+{
+  double old = reduce;
+  reduce = t;
+  return old;
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_gmres.h b/include/deal.II/lac/solver_gmres.h
new file mode 100644
index 0000000..c3e031f
--- /dev/null
+++ b/include/deal.II/lac/solver_gmres.h
@@ -0,0 +1,1223 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_gmres_h
+#define dealii__solver_gmres_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/householder.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/lapack_full_matrix.h>
+#include <deal.II/lac/vector.h>
+
+#include <vector>
+#include <cmath>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+namespace internal
+{
+  /**
+   * A namespace for a helper class to the GMRES solver.
+   */
+  namespace SolverGMRES
+  {
+    /**
+     * Class to hold temporary vectors.  This class automatically allocates a
+     * new vector, once it is needed.
+     *
+     * A future version should also be able to shift through vectors
+     * automatically, avoiding restart.
+     */
+
+    template <typename VectorType>
+    class TmpVectors
+    {
+    public:
+      /**
+       * Constructor. Prepares an array of @p VectorType of length @p
+       * max_size.
+       */
+      TmpVectors(const unsigned int       max_size,
+                 VectorMemory<VectorType> &vmem);
+
+      /**
+       * Delete all allocated vectors.
+       */
+      ~TmpVectors();
+
+      /**
+       * Get vector number @p i. If this vector was unused before, an error
+       * occurs.
+       */
+      VectorType &operator[] (const unsigned int i) const;
+
+      /**
+       * Get vector number @p i. Allocate it if necessary.
+       *
+       * If a vector must be allocated, @p temp is used to reinit it to the
+       * proper dimensions.
+       */
+      VectorType &operator() (const unsigned int i,
+                              const VectorType   &temp);
+
+    private:
+      /**
+       * Pool were vectors are obtained from.
+       */
+      VectorMemory<VectorType> &mem;
+
+      /**
+       * Field for storing the vectors.
+       */
+      std::vector<VectorType *> data;
+
+      /**
+       * Offset of the first vector. This is for later when vector rotation
+       * will be implemented.
+       */
+      unsigned int offset;
+    };
+  }
+}
+
+/**
+ * Implementation of the Restarted Preconditioned Direct Generalized Minimal
+ * Residual Method. The stopping criterion is the norm of the residual.
+ *
+ * The AdditionalData structure contains the number of temporary vectors used.
+ * The size of the Arnoldi basis is this number minus three. Additionally, it
+ * allows you to choose between right or left preconditioning. The default is
+ * left preconditioning. Finally it includes a flag indicating whether or not
+ * the default residual is used as stopping criterion.
+ *
+ *
+ * <h3>Left versus right preconditioning</h3>
+ *
+ * @p AdditionalData allows you to choose between left and right
+ * preconditioning. As expected, this switches between solving for the systems
+ * <i>P<sup>-1</sup>A</i> and <i>AP<sup>-1</sup></i>, respectively.
+ *
+ * A second consequence is the type of residual which is used to measure
+ * convergence. With left preconditioning, this is the <b>preconditioned</b>
+ * residual, while with right preconditioning, it is the residual of the
+ * unpreconditioned system.
+ *
+ * Optionally, this behavior can be overridden by using the flag
+ * AdditionalData::use_default_residual. A <tt>true</tt> value refers to the
+ * behavior described in the previous paragraph, while <tt>false</tt> reverts
+ * it. Be aware though that additional residuals have to be computed in this
+ * case, impeding the overall performance of the solver.
+ *
+ *
+ * <h3>The size of the Arnoldi basis</h3>
+ *
+ * The maximal basis size is controlled by AdditionalData::max_n_tmp_vectors,
+ * and it is this number minus 2. If the number of iteration steps exceeds
+ * this number, all basis vectors are discarded and the iteration starts anew
+ * from the approximation obtained so far.
+ *
+ * Note that the minimizing property of GMRes only pertains to the Krylov
+ * space spanned by the Arnoldi basis. Therefore, restarted GMRes is
+ * <b>not</b> minimizing anymore. The choice of the basis length is a trade-
+ * off between memory consumption and convergence speed, since a longer basis
+ * means minimization over a larger space.
+ *
+ * For the requirements on matrices and vectors in order to work with this
+ * class, see the documentation of the Solver base class.
+ *
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The solve() function of this class uses the mechanism described in the
+ * Solver base class to determine convergence. This mechanism can also be used
+ * to observe the progress of the iteration.
+ *
+ *
+ * <h3>Eigenvalue and condition number estimates</h3>
+ *
+ * This class can estimate eigenvalues and condition number during the
+ * solution process. This is done by creating the Hessenberg matrix during the
+ * inner iterations. The eigenvalues are estimated as the eigenvalues of the
+ * Hessenberg matrix and the condition number is estimated as the ratio of the
+ * largest and smallest singular value of the Hessenberg matrix. The estimates
+ * can be obtained by connecting a function as a slot using @p
+ * connect_condition_number_slot and @p connect_eigenvalues_slot. These slots
+ * will then be called from the solver with the estimates as argument.
+ *
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, Ralf Hartmann.
+ */
+template <class VectorType = Vector<double> >
+class SolverGMRES : public Solver<VectorType>
+{
+public:
+  /**
+   * Standardized data struct to pipe additional data to the solver.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Constructor. By default, set the number of temporary vectors to 30,
+     * i.e. do a restart every 28 iterations. Also set preconditioning from
+     * left, the residual of the stopping criterion to the default residual,
+     * and re-orthogonalization only if necessary.
+     */
+    explicit
+    AdditionalData (const unsigned int max_n_tmp_vectors = 30,
+                    const bool right_preconditioning = false,
+                    const bool use_default_residual = true,
+                    const bool force_re_orthogonalization = false);
+
+    /**
+     * Constructor.
+     * @deprecated To obtain the estimated eigenvalues instead use:
+     * connect_eigenvalues_slot
+     */
+    AdditionalData (const unsigned int max_n_tmp_vectors,
+                    const bool right_preconditioning,
+                    const bool use_default_residual,
+                    const bool force_re_orthogonalization,
+                    const bool compute_eigenvalues) DEAL_II_DEPRECATED;
+
+    /**
+     * Maximum number of temporary vectors. This parameter controls the size
+     * of the Arnoldi basis, which for historical reasons is
+     * #max_n_tmp_vectors-2.
+     */
+    unsigned int    max_n_tmp_vectors;
+
+    /**
+     * Flag for right preconditioning.
+     *
+     * @note Change between left and right preconditioning will also change
+     * the way residuals are evaluated. See the corresponding section in the
+     * SolverGMRES.
+     */
+    bool right_preconditioning;
+
+    /**
+     * Flag for the default residual that is used to measure convergence.
+     */
+    bool use_default_residual;
+
+    /**
+     * Flag to force re-orthogonalization of orthonormal basis in every step.
+     * If set to false, the solver automatically checks for loss of
+     * orthogonality every 5 iterations and enables re-orthogonalization only
+     * if necessary.
+     */
+    bool force_re_orthogonalization;
+
+    /**
+     * Compute all eigenvalues of the Hessenberg matrix generated while
+     * solving, i.e., the projected system matrix. This gives an approximation
+     * of the eigenvalues of the (preconditioned) system matrix. Since the
+     * Hessenberg matrix is thrown away at restart, the eigenvalues are
+     * printed for every 30 iterations.
+     *
+     * @note Requires LAPACK support.
+     */
+    bool compute_eigenvalues;
+  };
+
+  /**
+   * Constructor.
+   */
+  SolverGMRES (SolverControl            &cn,
+               VectorMemory<VectorType> &mem,
+               const AdditionalData     &data=AdditionalData());
+
+  /**
+   * Constructor. Use an object of type GrowingVectorMemory as a default to
+   * allocate memory.
+   */
+  SolverGMRES (SolverControl        &cn,
+               const AdditionalData &data=AdditionalData());
+
+  /**
+   * Solve the linear system $Ax=b$ for x.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void
+  solve (const MatrixType         &A,
+         VectorType               &x,
+         const VectorType         &b,
+         const PreconditionerType &precondition);
+
+  /**
+   * Connect a slot to retrieve the estimated condition number. Called on each
+   * outer iteration if every_iteration=true, otherwise called once when
+   * iterations are ended (i.e., either because convergence has been achieved,
+   * or because divergence has been detected).
+   */
+  boost::signals2::connection
+  connect_condition_number_slot(const std_cxx11::function<void (double)> &slot,
+                                const bool every_iteration=false);
+
+  /**
+   * Connect a slot to retrieve the estimated eigenvalues. Called on each
+   * outer iteration if every_iteration=true, otherwise called once when
+   * iterations are ended (i.e., either because convergence has been achieved,
+   * or because divergence has been detected).
+   */
+  boost::signals2::connection
+  connect_eigenvalues_slot(
+    const std_cxx11::function<void (const std::vector<std::complex<double> > &)> &slot,
+    const bool every_iteration=false);
+
+
+  DeclException1 (ExcTooFewTmpVectors,
+                  int,
+                  << "The number of temporary vectors you gave ("
+                  << arg1 << ") is too small. It should be at least 10 for "
+                  << "any results, and much more for reasonable ones.");
+
+protected:
+
+  /**
+   * Includes the maximum number of tmp vectors.
+   */
+  AdditionalData additional_data;
+
+  /**
+   * Signal used to retrieve the estimated condition number. Called once when
+   * all iterations are ended.
+   */
+  boost::signals2::signal<void (double)> condition_number_signal;
+
+  /**
+   * Signal used to retrieve the estimated condition numbers. Called on each
+   * outer iteration.
+   */
+  boost::signals2::signal<void (double)> all_condition_numbers_signal;
+
+  /**
+   * Signal used to retrieve the estimated eigenvalues. Called once when all
+   * iterations are ended.
+   */
+  boost::signals2::signal<void (const std::vector<std::complex<double> > &)> eigenvalues_signal;
+
+  /**
+   * Signal used to retrieve the estimated eigenvalues. Called on each outer
+   * iteration.
+   */
+  boost::signals2::signal<void (const std::vector<std::complex<double> > &)> all_eigenvalues_signal;
+
+
+  /**
+   * Implementation of the computation of the norm of the residual.
+   */
+  virtual double criterion();
+
+  /**
+   * Transformation of an upper Hessenberg matrix into tridiagonal structure
+   * by givens rotation of the last column
+   */
+  void givens_rotation (Vector<double> &h,  Vector<double> &b,
+                        Vector<double> &ci, Vector<double> &si,
+                        int col) const;
+
+  /**
+   * Orthogonalize the vector @p vv against the @p dim (orthogonal) vectors
+   * given by the first argument using the modified Gram-Schmidt algorithm.
+   * The factors used for orthogonalization are stored in @p h. The boolean @p
+   * re_orthogonalize specifies whether the modified Gram-Schmidt algorithm
+   * should be applied twice. The algorithm checks loss of orthogonality in
+   * the procedure every fifth step and sets the flag to true in that case.
+   * All subsequent iterations use re-orthogonalization.
+   */
+  static double
+  modified_gram_schmidt (const internal::SolverGMRES::TmpVectors<VectorType> &orthogonal_vectors,
+                         const unsigned int                                  dim,
+                         const unsigned int                                  accumulated_iterations,
+                         VectorType                                          &vv,
+                         Vector<double>                                      &h,
+                         bool                                                &re_orthogonalize);
+
+  /**
+   * Estimates the eigenvalues from the Hessenberg matrix, H_orig, generated
+   * during the inner iterations. Uses these estimate to compute the condition
+   * number. Calls the signals eigenvalues_signal and cond_signal with these
+   * estimates as arguments. Outputs the eigenvalues to deallog if
+   * log_eigenvalues is true.
+   */
+  static void
+  compute_eigs_and_cond(
+    const FullMatrix<double> &H_orig ,
+    const unsigned int dim,
+    const boost::signals2::signal<void (const std::vector<std::complex<double> > &)> &eigenvalues_signal,
+    const boost::signals2::signal<void(double)> &cond_signal,
+    const bool log_eigenvalues);
+
+  /**
+   * Projected system matrix
+   */
+  FullMatrix<double> H;
+
+  /**
+   * Auxiliary matrix for inverting @p H
+   */
+  FullMatrix<double> H1;
+
+
+private:
+  /**
+   * No copy constructor.
+   */
+  SolverGMRES (const SolverGMRES<VectorType> &);
+};
+
+/**
+ * Implementation of the Generalized minimal residual method with flexible
+ * preconditioning method.
+ *
+ * This version of the GMRES method allows for the use of a different
+ * preconditioner in each iteration step. Therefore, it is also more robust
+ * with respect to inaccurate evaluation of the preconditioner. An important
+ * application is also the use of a Krylov space method inside the
+ * preconditioner. As opposed to SolverGMRES which allows one to choose
+ * between left and right preconditioning, this solver always applies the
+ * preconditioner from the right.
+ *
+ * FGMRES needs two vectors in each iteration steps yielding a total of
+ * <tt>2*SolverFGMRES::AdditionalData::max_basis_size+1</tt> auxiliary
+ * vectors.
+ *
+ * Caveat: Documentation of this class is not up to date. There are also a few
+ * parameters of GMRES we would like to introduce here.
+ *
+ * @author Guido Kanschat, 2003
+ */
+template <class VectorType = Vector<double> >
+class SolverFGMRES : public Solver<VectorType>
+{
+public:
+  /**
+   * Standardized data struct to pipe additional data to the solver.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Constructor. By default, set the maximum basis size to 30.
+     */
+    explicit
+    AdditionalData(const unsigned int max_basis_size = 30,
+                   const bool /*use_default_residual*/ = true)
+      :
+      max_basis_size(max_basis_size)
+    {}
+
+    /**
+     * Maximum number of tmp vectors.
+     */
+    unsigned int    max_basis_size;
+  };
+
+  /**
+   * Constructor.
+   */
+  SolverFGMRES (SolverControl            &cn,
+                VectorMemory<VectorType> &mem,
+                const AdditionalData     &data=AdditionalData());
+
+  /**
+   * Constructor. Use an object of type GrowingVectorMemory as a default to
+   * allocate memory.
+   */
+  SolverFGMRES (SolverControl        &cn,
+                const AdditionalData &data=AdditionalData());
+
+  /**
+   * Solve the linear system $Ax=b$ for x.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void
+  solve (const MatrixType         &A,
+         VectorType               &x,
+         const VectorType         &b,
+         const PreconditionerType &precondition);
+
+private:
+
+  /**
+   * Additional flags.
+   */
+  AdditionalData additional_data;
+
+  /**
+   * Projected system matrix
+   */
+  FullMatrix<double> H;
+
+  /**
+   * Auxiliary matrix for inverting @p H
+   */
+  FullMatrix<double> H1;
+};
+
+/*@}*/
+/* --------------------- Inline and template functions ------------------- */
+
+
+#ifndef DOXYGEN
+namespace internal
+{
+  namespace SolverGMRES
+  {
+    template <class VectorType>
+    inline
+    TmpVectors<VectorType>::
+    TmpVectors (const unsigned int       max_size,
+                VectorMemory<VectorType> &vmem)
+      :
+      mem(vmem),
+      data (max_size, 0),
+      offset(0)
+    {}
+
+
+    template <class VectorType>
+    inline
+    TmpVectors<VectorType>::~TmpVectors ()
+    {
+      for (typename std::vector<VectorType *>::iterator v = data.begin();
+           v != data.end(); ++v)
+        if (*v != 0)
+          mem.free(*v);
+    }
+
+
+    template <class VectorType>
+    inline VectorType &
+    TmpVectors<VectorType>::operator[] (const unsigned int i) const
+    {
+      Assert (i+offset<data.size(),
+              ExcIndexRange(i, -offset, data.size()-offset));
+
+      Assert (data[i-offset] != 0, ExcNotInitialized());
+      return *data[i-offset];
+    }
+
+
+    template <class VectorType>
+    inline VectorType &
+    TmpVectors<VectorType>::operator() (const unsigned int i,
+                                        const VectorType       &temp)
+    {
+      Assert (i+offset<data.size(),
+              ExcIndexRange(i,-offset, data.size()-offset));
+      if (data[i-offset] == 0)
+        {
+          data[i-offset] = mem.alloc();
+          data[i-offset]->reinit(temp);
+        }
+      return *data[i-offset];
+    }
+
+    // A comparator for better printing eigenvalues
+    inline
+    bool complex_less_pred(const std::complex<double> &x,
+                           const std::complex<double> &y)
+    {
+      return x.real() < y.real() || (x.real() == y.real() && x.imag() < y.imag());
+    }
+  }
+}
+
+
+
+template <class VectorType>
+inline
+SolverGMRES<VectorType>::AdditionalData::
+AdditionalData (const unsigned int max_n_tmp_vectors,
+                const bool         right_preconditioning,
+                const bool         use_default_residual,
+                const bool         force_re_orthogonalization)
+  :
+  max_n_tmp_vectors(max_n_tmp_vectors),
+  right_preconditioning(right_preconditioning),
+  use_default_residual(use_default_residual),
+  force_re_orthogonalization(force_re_orthogonalization),
+  compute_eigenvalues(false)
+{}
+
+
+
+template <class VectorType>
+inline
+SolverGMRES<VectorType>::AdditionalData::
+AdditionalData (const unsigned int max_n_tmp_vectors,
+                const bool         right_preconditioning,
+                const bool         use_default_residual,
+                const bool         force_re_orthogonalization,
+                const bool         compute_eigenvalues)
+  :
+  max_n_tmp_vectors(max_n_tmp_vectors),
+  right_preconditioning(right_preconditioning),
+  use_default_residual(use_default_residual),
+  force_re_orthogonalization(force_re_orthogonalization),
+  compute_eigenvalues(compute_eigenvalues)
+{}
+
+
+
+template <class VectorType>
+SolverGMRES<VectorType>::SolverGMRES (SolverControl            &cn,
+                                      VectorMemory<VectorType> &mem,
+                                      const AdditionalData     &data)
+  :
+  Solver<VectorType> (cn,mem),
+  additional_data(data)
+{}
+
+
+
+template <class VectorType>
+SolverGMRES<VectorType>::SolverGMRES (SolverControl        &cn,
+                                      const AdditionalData &data) :
+  Solver<VectorType> (cn),
+  additional_data(data)
+{}
+
+
+
+template <class VectorType>
+inline
+void
+SolverGMRES<VectorType>::givens_rotation (Vector<double> &h,
+                                          Vector<double> &b,
+                                          Vector<double> &ci,
+                                          Vector<double> &si,
+                                          int            col) const
+{
+  for (int i=0 ; i<col ; i++)
+    {
+      const double s = si(i);
+      const double c = ci(i);
+      const double dummy = h(i);
+      h(i)   =  c*dummy + s*h(i+1);
+      h(i+1) = -s*dummy + c*h(i+1);
+    };
+
+  const double r = 1./std::sqrt(h(col)*h(col) + h(col+1)*h(col+1));
+  si(col) = h(col+1) *r;
+  ci(col) = h(col)   *r;
+  h(col)  =  ci(col)*h(col) + si(col)*h(col+1);
+  b(col+1)= -si(col)*b(col);
+  b(col) *=  ci(col);
+}
+
+
+
+template <class VectorType>
+inline
+double
+SolverGMRES<VectorType>::modified_gram_schmidt
+(const internal::SolverGMRES::TmpVectors<VectorType> &orthogonal_vectors,
+ const unsigned int                                  dim,
+ const unsigned int                                  accumulated_iterations,
+ VectorType                                          &vv,
+ Vector<double>                                      &h,
+ bool                                                &re_orthogonalize)
+{
+  Assert(dim > 0, ExcInternalError());
+  const unsigned int inner_iteration = dim - 1;
+
+  // need initial norm for detection of re-orthogonalization, see below
+  double norm_vv_start = 0;
+  if (re_orthogonalize == false && inner_iteration % 5 == 4)
+    norm_vv_start = vv.l2_norm();
+
+  // Orthogonalization
+  h(0) = vv * orthogonal_vectors[0];
+  for (unsigned int i=1 ; i<dim ; ++i)
+    h(i) = vv.add_and_dot(-h(i-1), orthogonal_vectors[i-1], orthogonal_vectors[i]);
+  double norm_vv = std::sqrt(vv.add_and_dot(-h(dim-1), orthogonal_vectors[dim-1], vv));
+
+  // Re-orthogonalization if loss of orthogonality detected. For the test, use
+  // a strategy discussed in C. T. Kelley, Iterative Methods for Linear and
+  // Nonlinear Equations, SIAM, Philadelphia, 1995: Compare the norm of vv
+  // after orthogonalization with its norm when starting the
+  // orthogonalization. If vv became very small (here: less than the square
+  // root of the machine precision times 10), it is almost in the span of the
+  // previous vectors, which indicates loss of precision.
+  if (re_orthogonalize == false && inner_iteration % 5 == 4)
+    {
+      if (norm_vv > 10. * norm_vv_start *
+          std::sqrt(std::numeric_limits<typename VectorType::value_type>::epsilon()))
+        return norm_vv;
+
+      else
+        {
+          re_orthogonalize = true;
+          deallog << "Re-orthogonalization enabled at step "
+                  << accumulated_iterations << std::endl;
+        }
+    }
+
+  if (re_orthogonalize == true)
+    {
+      double htmp = vv * orthogonal_vectors[0];
+      h(0) += htmp;
+      for (unsigned int i=1 ; i<dim ; ++i)
+        {
+          htmp = vv.add_and_dot(-htmp, orthogonal_vectors[i-1], orthogonal_vectors[i]);
+          h(i) += htmp;
+        }
+      norm_vv = std::sqrt(vv.add_and_dot(-htmp, orthogonal_vectors[dim-1], vv));
+    }
+
+  return norm_vv;
+}
+
+
+
+template<class VectorType>
+inline void
+SolverGMRES<VectorType>::compute_eigs_and_cond
+(const FullMatrix<double>                     &H_orig,
+ const unsigned int                           dim,
+ const boost::signals2::signal<void (const std::vector<std::complex<double> > &)> &eigenvalues_signal,
+ const boost::signals2::signal<void (double)> &cond_signal,
+ const bool                                   log_eigenvalues)
+{
+  //Avoid copying the Hessenberg matrix if it isn't needed.
+  if (!eigenvalues_signal.empty() || !cond_signal.empty() || log_eigenvalues )
+    {
+      LAPACKFullMatrix<double> mat(dim,dim);
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=0; j<dim; ++j)
+          mat(i,j) = H_orig(i,j);
+      //Avoid computing eigenvalues if they are not needed.
+      if (!eigenvalues_signal.empty() || log_eigenvalues )
+        {
+          //Copy mat so that we can compute svd below. Necessary since
+          //compute_eigenvalues will leave mat in state LAPACKSupport::unusable.
+          LAPACKFullMatrix<double> mat_eig(mat);
+          mat_eig.compute_eigenvalues();
+          std::vector<std::complex<double> > eigenvalues(dim);
+          for (unsigned int i=0; i<mat_eig.n(); ++i)
+            eigenvalues[i] = mat_eig.eigenvalue(i);
+          //Sort eigenvalues for nicer output.
+          std::sort(eigenvalues.begin(), eigenvalues.end(),
+                    internal::SolverGMRES::complex_less_pred);
+          eigenvalues_signal(eigenvalues);
+          if (log_eigenvalues)
+            {
+              deallog << "Eigenvalue estimate: ";
+              for (unsigned int i=0; i<mat_eig.n(); ++i)
+                deallog << ' ' << eigenvalues[i];
+              deallog << std::endl;
+            }
+        }
+      //Calculate condition number, avoid calculating the svd if a slot
+      //isn't connected. Need at least a 2-by-2 matrix to do the estimate.
+      if (!cond_signal.empty() && (mat.n()>1))
+        {
+          mat.compute_svd();
+          double condition_number=mat.singular_value(0)/mat.singular_value(mat.n()-1);
+          cond_signal(condition_number);
+        }
+    }
+}
+
+
+
+template<class VectorType>
+template<typename MatrixType, typename PreconditionerType>
+void
+SolverGMRES<VectorType>::solve (const MatrixType         &A,
+                                VectorType               &x,
+                                const VectorType         &b,
+                                const PreconditionerType &precondition)
+{
+  // this code was written a very long time ago by people not associated with
+  // deal.II. we don't make any guarantees to its optimality or that it even
+  // works as expected...
+
+//TODO:[?] Check, why there are two different start residuals.
+//TODO:[GK] Make sure the parameter in the constructor means maximum basis size
+
+  deallog.push("GMRES");
+  const unsigned int n_tmp_vectors = additional_data.max_n_tmp_vectors;
+
+  // Generate an object where basis vectors are stored.
+  internal::SolverGMRES::TmpVectors<VectorType> tmp_vectors (n_tmp_vectors, this->memory);
+
+  // number of the present iteration; this
+  // number is not reset to zero upon a
+  // restart
+  unsigned int accumulated_iterations = 0;
+
+  const bool do_eigenvalues=
+    !condition_number_signal.empty()
+    |!all_condition_numbers_signal.empty()
+    |!eigenvalues_signal.empty()
+    |!all_eigenvalues_signal.empty()
+    |additional_data.compute_eigenvalues;
+  // for eigenvalue computation, need to collect the Hessenberg matrix (before
+  // applying Givens rotations)
+  FullMatrix<double> H_orig;
+  if (do_eigenvalues)
+    H_orig.reinit(n_tmp_vectors, n_tmp_vectors-1);
+
+  // matrix used for the orthogonalization process later
+  H.reinit(n_tmp_vectors, n_tmp_vectors-1);
+
+  // some additional vectors, also used in the orthogonalization
+  dealii::Vector<double>
+  gamma(n_tmp_vectors),
+        ci   (n_tmp_vectors-1),
+        si   (n_tmp_vectors-1),
+        h    (n_tmp_vectors-1);
+
+
+  unsigned int dim = 0;
+
+  SolverControl::State iteration_state = SolverControl::iterate;
+  double last_res = -std::numeric_limits<double>::max();
+
+  // switch to determine whether we want a left or a right preconditioner. at
+  // present, left is default, but both ways are implemented
+  const bool left_precondition = !additional_data.right_preconditioning;
+
+  // Per default the left preconditioned GMRes uses the preconditioned
+  // residual and the right preconditioned GMRes uses the unpreconditioned
+  // residual as stopping criterion.
+  const bool use_default_residual = additional_data.use_default_residual;
+
+  // define two aliases
+  VectorType &v = tmp_vectors(0, x);
+  VectorType &p = tmp_vectors(n_tmp_vectors-1, x);
+
+  // Following vectors are needed
+  // when not the default residuals
+  // are used as stopping criterion
+  VectorType *r=0;
+  VectorType *x_=0;
+  dealii::Vector<double> *gamma_=0;
+  if (!use_default_residual)
+    {
+      r=this->memory.alloc();
+      x_=this->memory.alloc();
+      r->reinit(x);
+      x_->reinit(x);
+
+      gamma_ = new dealii::Vector<double> (gamma.size());
+    }
+
+  bool re_orthogonalize = additional_data.force_re_orthogonalization;
+
+  ///////////////////////////////////////////////////////////////////////////
+  // outer iteration: loop until we either reach convergence or the maximum
+  // number of iterations is exceeded. each cycle of this loop amounts to one
+  // restart
+  do
+    {
+      // reset this vector to the right size
+      h.reinit (n_tmp_vectors-1);
+
+      if (left_precondition)
+        {
+          A.vmult(p,x);
+          p.sadd(-1.,1.,b);
+          precondition.vmult(v,p);
+        }
+      else
+        {
+          A.vmult(v,x);
+          v.sadd(-1.,1.,b);
+        };
+
+      double rho = v.l2_norm();
+
+      // check the residual here as well since it may be that we got the exact
+      // (or an almost exact) solution vector at the outset. if we wouldn't
+      // check here, the next scaling operation would produce garbage
+      if (use_default_residual)
+        {
+          last_res = rho;
+          iteration_state = this->iteration_status (accumulated_iterations, rho, x);
+
+          if (iteration_state != SolverControl::iterate)
+            break;
+        }
+      else
+        {
+          deallog << "default_res=" << rho << std::endl;
+
+          if (left_precondition)
+            {
+              A.vmult(*r,x);
+              r->sadd(-1.,1.,b);
+            }
+          else
+            precondition.vmult(*r,v);
+
+          double res = r->l2_norm();
+          last_res = res;
+          iteration_state = this->iteration_status (accumulated_iterations, res, x);
+
+          if (iteration_state != SolverControl::iterate)
+            break;
+        }
+
+      gamma(0) = rho;
+
+      v *= 1./rho;
+
+      // inner iteration doing at most as many steps as there are temporary
+      // vectors. the number of steps actually been done is propagated outside
+      // through the @p dim variable
+      for (unsigned int inner_iteration=0;
+           ((inner_iteration < n_tmp_vectors-2)
+            &&
+            (iteration_state==SolverControl::iterate));
+           ++inner_iteration)
+        {
+          ++accumulated_iterations;
+          // yet another alias
+          VectorType &vv = tmp_vectors(inner_iteration+1, x);
+
+          if (left_precondition)
+            {
+              A.vmult(p, tmp_vectors[inner_iteration]);
+              precondition.vmult(vv,p);
+            }
+          else
+            {
+              precondition.vmult(p, tmp_vectors[inner_iteration]);
+              A.vmult(vv,p);
+            }
+
+          dim = inner_iteration+1;
+
+          const double s = modified_gram_schmidt(tmp_vectors, dim,
+                                                 accumulated_iterations,
+                                                 vv, h, re_orthogonalize);
+          h(inner_iteration+1) = s;
+
+          //s=0 is a lucky breakdown, the solver will reach convergence,
+          //but we must not divide by zero here.
+          if (s != 0)
+            vv *= 1./s;
+
+          // for eigenvalues, get the resulting coefficients from the
+          // orthogonalization process
+          if (do_eigenvalues)
+            for (unsigned int i=0; i<dim+1; ++i)
+              H_orig(i,inner_iteration) = h(i);
+
+          //  Transformation into tridiagonal structure
+          givens_rotation(h,gamma,ci,si,inner_iteration);
+
+          //  append vector on matrix
+          for (unsigned int i=0; i<dim; ++i)
+            H(i,inner_iteration) = h(i);
+
+          //  default residual
+          rho = std::fabs(gamma(dim));
+
+          if (use_default_residual)
+            {
+              last_res = rho;
+              iteration_state = this->iteration_status (accumulated_iterations, rho, x);
+            }
+          else
+            {
+              deallog << "default_res=" << rho << std::endl;
+
+              dealii::Vector<double> h_(dim);
+              *x_=x;
+              *gamma_=gamma;
+              H1.reinit(dim+1,dim);
+
+              for (unsigned int i=0; i<dim+1; ++i)
+                for (unsigned int j=0; j<dim; ++j)
+                  H1(i,j) = H(i,j);
+
+              H1.backward(h_,*gamma_);
+
+              if (left_precondition)
+                for (unsigned int i=0 ; i<dim; ++i)
+                  x_->add(h_(i), tmp_vectors[i]);
+              else
+                {
+                  p = 0.;
+                  for (unsigned int i=0; i<dim; ++i)
+                    p.add(h_(i), tmp_vectors[i]);
+                  precondition.vmult(*r,p);
+                  x_->add(1.,*r);
+                };
+              A.vmult(*r,*x_);
+              r->sadd(-1.,1.,b);
+              // Now *r contains the unpreconditioned residual!!
+              if (left_precondition)
+                {
+                  const double res=r->l2_norm();
+                  last_res = res;
+
+                  iteration_state = this->iteration_status (accumulated_iterations, res, x);
+                }
+              else
+                {
+                  precondition.vmult(*x_, *r);
+                  const double preconditioned_res=x_->l2_norm();
+                  last_res = preconditioned_res;
+
+                  iteration_state = this->iteration_status (accumulated_iterations,
+                                                            preconditioned_res, x);
+                }
+            }
+        };
+      // end of inner iteration. now calculate the solution from the temporary
+      // vectors
+      h.reinit(dim);
+      H1.reinit(dim+1,dim);
+
+      for (unsigned int i=0; i<dim+1; ++i)
+        for (unsigned int j=0; j<dim; ++j)
+          H1(i,j) = H(i,j);
+
+      compute_eigs_and_cond(H_orig,dim,all_eigenvalues_signal,
+                            all_condition_numbers_signal,
+                            additional_data.compute_eigenvalues);
+
+      H1.backward(h,gamma);
+
+      if (left_precondition)
+        for (unsigned int i=0 ; i<dim; ++i)
+          x.add(h(i), tmp_vectors[i]);
+      else
+        {
+          p = 0.;
+          for (unsigned int i=0; i<dim; ++i)
+            p.add(h(i), tmp_vectors[i]);
+          precondition.vmult(v,p);
+          x.add(1.,v);
+        };
+      // end of outer iteration. restart if no convergence and the number of
+      // iterations is not exceeded
+    }
+  while (iteration_state == SolverControl::iterate);
+
+  compute_eigs_and_cond(H_orig,dim,eigenvalues_signal,condition_number_signal,
+                        false);
+  if (!use_default_residual)
+    {
+      this->memory.free(r);
+      this->memory.free(x_);
+
+      delete gamma_;
+    }
+
+  deallog.pop();
+
+  // in case of failure: throw exception
+  AssertThrow(iteration_state == SolverControl::success,
+              SolverControl::NoConvergence (accumulated_iterations,
+                                            last_res));
+}
+
+
+
+template<class VectorType>
+boost::signals2::connection
+SolverGMRES<VectorType>::connect_condition_number_slot
+(const std_cxx11::function<void(double)> &slot,
+ const bool every_iteration)
+{
+  if (every_iteration)
+    {
+      return all_condition_numbers_signal.connect(slot);
+    }
+  else
+    {
+      return condition_number_signal.connect(slot);
+    }
+}
+
+
+
+template<class VectorType>
+boost::signals2::connection
+SolverGMRES<VectorType>::connect_eigenvalues_slot
+(const std_cxx11::function<void (const std::vector<std::complex<double> > &)> &slot,
+ const bool every_iteration)
+{
+  if (every_iteration)
+    {
+      return all_eigenvalues_signal.connect(slot);
+    }
+  else
+    {
+      return eigenvalues_signal.connect(slot);
+    }
+}
+
+
+
+template<class VectorType>
+double
+SolverGMRES<VectorType>::criterion ()
+{
+  // dummy implementation. this function is not needed for the present
+  // implementation of gmres
+  Assert (false, ExcInternalError());
+  return 0;
+}
+
+
+//----------------------------------------------------------------------//
+
+template <class VectorType>
+SolverFGMRES<VectorType>::SolverFGMRES (SolverControl            &cn,
+                                        VectorMemory<VectorType> &mem,
+                                        const AdditionalData     &data)
+  :
+  Solver<VectorType> (cn, mem),
+  additional_data(data)
+{}
+
+
+
+template <class VectorType>
+SolverFGMRES<VectorType>::SolverFGMRES (SolverControl        &cn,
+                                        const AdditionalData &data)
+  :
+  Solver<VectorType> (cn),
+  additional_data(data)
+{}
+
+
+
+template<class VectorType>
+template<typename MatrixType, typename PreconditionerType>
+void
+SolverFGMRES<VectorType>::solve (const MatrixType         &A,
+                                 VectorType               &x,
+                                 const VectorType         &b,
+                                 const PreconditionerType &precondition)
+{
+  deallog.push("FGMRES");
+
+  SolverControl::State iteration_state = SolverControl::iterate;
+
+  const unsigned int basis_size = additional_data.max_basis_size;
+
+  // Generate an object where basis vectors are stored.
+  typename internal::SolverGMRES::TmpVectors<VectorType> v (basis_size, this->memory);
+  typename internal::SolverGMRES::TmpVectors<VectorType> z (basis_size, this->memory);
+
+  // number of the present iteration; this number is not reset to zero upon a
+  // restart
+  unsigned int accumulated_iterations = 0;
+
+  // matrix used for the orthogonalization process later
+  H.reinit(basis_size+1, basis_size);
+
+  // Vectors for projected system
+  Vector<double> projected_rhs;
+  Vector<double> y;
+
+  // Iteration starts here
+  double res = -std::numeric_limits<double>::max();
+
+  VectorType *aux = this->memory.alloc();
+  aux->reinit(x);
+  do
+    {
+      A.vmult(*aux, x);
+      aux->sadd(-1., 1., b);
+
+      double beta = aux->l2_norm();
+      res = beta;
+      iteration_state = this->iteration_status(accumulated_iterations, res, x);
+      if (iteration_state == SolverControl::success)
+        break;
+
+      H.reinit(basis_size+1, basis_size);
+      double a = beta;
+
+      for (unsigned int j=0; j<basis_size; ++j)
+        {
+          if (a != 0) // treat lucky breakdown
+            v(j,x).equ(1./a, *aux);
+          else
+            v(j,x) = 0.;
+
+
+          precondition.vmult(z(j,x), v[j]);
+          A.vmult(*aux, z[j]);
+
+          // Gram-Schmidt
+          H(0,j) = *aux * v[0];
+          for (unsigned int i=1; i<=j; ++i)
+            H(i,j) = aux->add_and_dot(-H(i-1,j), v[i-1], v[i]);
+          H(j+1,j) = a = std::sqrt(aux->add_and_dot(-H(j,j), v[j], *aux));
+
+          // Compute projected solution
+
+          if (j>0)
+            {
+              H1.reinit(j+1,j);
+              projected_rhs.reinit(j+1);
+              y.reinit(j);
+              projected_rhs(0) = beta;
+              H1.fill(H);
+
+              // check convergence. note that the vector 'x' we pass to the
+              // criterion is not the final solution we compute if we
+              // decide to jump out of the iteration (we update 'x' again
+              // right after the current loop)
+              Householder<double> house(H1);
+              res = house.least_squares(y, projected_rhs);
+              iteration_state = this->iteration_status(++accumulated_iterations, res, x);
+              if (iteration_state != SolverControl::iterate)
+                break;
+            }
+        }
+
+      // Update solution vector
+      for (unsigned int j=0; j<y.size(); ++j)
+        x.add(y(j), z[j]);
+    }
+  while (iteration_state == SolverControl::iterate);
+
+  this->memory.free(aux);
+
+  deallog.pop();
+  // in case of failure: throw exception
+  if (iteration_state != SolverControl::success)
+    AssertThrow(false, SolverControl::NoConvergence (accumulated_iterations,
+                                                     res));
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_minres.h b/include/deal.II/lac/solver_minres.h
new file mode 100644
index 0000000..23d8258
--- /dev/null
+++ b/include/deal.II/lac/solver_minres.h
@@ -0,0 +1,387 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_minres_h
+#define dealii__solver_minres_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/base/logstream.h>
+#include <cmath>
+#include <deal.II/base/subscriptor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Minimal residual method for symmetric matrices.
+ *
+ * For the requirements on matrices and vectors in order to work with this
+ * class, see the documentation of the Solver base class.
+ *
+ * Like all other solver classes, this class has a local structure called @p
+ * AdditionalData which is used to pass additional parameters to the solver,
+ * like damping parameters or the number of temporary vectors. We use this
+ * additional structure instead of passing these values directly to the
+ * constructor because this makes the use of the @p SolverSelector and other
+ * classes much easier and guarantees that these will continue to work even if
+ * number or type of the additional parameters for a certain solver changes.
+ *
+ * However, since the MinRes method does not need additional data, the
+ * respective structure is empty and does not offer any functionality. The
+ * constructor has a default argument, so you may call it without the
+ * additional parameter.
+ *
+ * The preconditioner has to be positive definite and symmetric
+ *
+ * The algorithm is taken from the Master thesis of Astrid Battermann with
+ * some changes. The full text can be found at
+ * http://scholar.lib.vt.edu/theses/public/etd-12164379662151/etd-title.html
+ *
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The solve() function of this class uses the mechanism described in the
+ * Solver base class to determine convergence. This mechanism can also be used
+ * to observe the progress of the iteration.
+ *
+ *
+ * @author Thomas Richter, 2000, Luca Heltai, 2006
+ */
+template <class VectorType = Vector<double> >
+class SolverMinRes : public Solver<VectorType>
+{
+public:
+  /**
+   * Standardized data struct to pipe additional data to the solver. This
+   * solver does not need additional data yet.
+   */
+  struct AdditionalData
+  {
+  };
+
+  /**
+   * Constructor.
+   */
+  SolverMinRes (SolverControl            &cn,
+                VectorMemory<VectorType> &mem,
+                const AdditionalData     &data=AdditionalData());
+
+  /**
+   * Constructor. Use an object of type GrowingVectorMemory as a default to
+   * allocate memory.
+   */
+  SolverMinRes (SolverControl        &cn,
+                const AdditionalData &data=AdditionalData());
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~SolverMinRes ();
+
+  /**
+   * Solve the linear system $Ax=b$ for x.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void
+  solve (const MatrixType         &A,
+         VectorType               &x,
+         const VectorType         &b,
+         const PreconditionerType &precondition);
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcPreconditionerNotDefinite);
+  //@}
+
+protected:
+  /**
+   * Implementation of the computation of the norm of the residual.
+   */
+  virtual double criterion();
+  /**
+   * Interface for derived class. This function gets the current iteration
+   * vector, the residual and the update vector in each step. It can be used
+   * for a graphical output of the convergence history.
+   */
+  virtual void print_vectors(const unsigned int step,
+                             const VectorType   &x,
+                             const VectorType   &r,
+                             const VectorType   &d) const;
+
+  /**
+   * Temporary vectors, allocated through the @p VectorMemory object at the
+   * start of the actual solution process and deallocated at the end.
+   */
+  VectorType *Vu0, *Vu1, *Vu2;
+  VectorType *Vm0, *Vm1, *Vm2;
+  VectorType *Vv;
+
+  /**
+   * Within the iteration loop, the square of the residual vector is stored in
+   * this variable. The function @p criterion uses this variable to compute
+   * the convergence value, which in this class is the norm of the residual
+   * vector and thus the square root of the @p res2 value.
+   */
+  double res2;
+};
+
+/*@}*/
+/*------------------------- Implementation ----------------------------*/
+
+#ifndef DOXYGEN
+
+template<class VectorType>
+SolverMinRes<VectorType>::SolverMinRes (SolverControl            &cn,
+                                        VectorMemory<VectorType> &mem,
+                                        const AdditionalData &)
+  :
+  Solver<VectorType>(cn,mem)
+{}
+
+
+
+template<class VectorType>
+SolverMinRes<VectorType>::SolverMinRes (SolverControl        &cn,
+                                        const AdditionalData &)
+  :
+  Solver<VectorType>(cn)
+{}
+
+
+template<class VectorType>
+SolverMinRes<VectorType>::~SolverMinRes ()
+{}
+
+
+
+template<class VectorType>
+double
+SolverMinRes<VectorType>::criterion()
+{
+  return res2;
+}
+
+
+template<class VectorType>
+void
+SolverMinRes<VectorType>::print_vectors(const unsigned int,
+                                        const VectorType &,
+                                        const VectorType &,
+                                        const VectorType &) const
+{}
+
+
+
+template<class VectorType>
+template<typename MatrixType, typename PreconditionerType>
+void
+SolverMinRes<VectorType>::solve (const MatrixType         &A,
+                                 VectorType               &x,
+                                 const VectorType         &b,
+                                 const PreconditionerType &precondition)
+{
+  SolverControl::State conv=SolverControl::iterate;
+
+  deallog.push("minres");
+
+  // Memory allocation
+  Vu0  = this->memory.alloc();
+  Vu1  = this->memory.alloc();
+  Vu2  = this->memory.alloc();
+  Vv   = this->memory.alloc();
+  Vm0  = this->memory.alloc();
+  Vm1  = this->memory.alloc();
+  Vm2  = this->memory.alloc();
+  // define some aliases for simpler access
+  typedef VectorType *vecptr;
+  vecptr u[3] = {Vu0, Vu1, Vu2};
+  vecptr m[3] = {Vm0, Vm1, Vm2};
+  VectorType &v   = *Vv;
+  // resize the vectors, but do not set
+  // the values since they'd be overwritten
+  // soon anyway.
+  u[0]->reinit(b,true);
+  u[1]->reinit(b,true);
+  u[2]->reinit(b,true);
+  m[0]->reinit(b,true);
+  m[1]->reinit(b,true);
+  m[2]->reinit(b,true);
+  v.reinit(b,true);
+
+  // some values needed
+  double delta[3] = { 0, 0, 0 };
+  double f[2] = { 0, 0 };
+  double e[2] = { 0, 0 };
+
+  double r_l2 = 0;
+  double r0   = 0;
+  double tau = 0;
+  double c    = 0;
+  double gamma = 0;
+  double s = 0;
+  double d_ = 0;
+  double d = 0;
+
+  // The iteration step.
+  unsigned int j = 1;
+
+
+  // Start of the solution process
+  A.vmult(*m[0],x);
+  *u[1] = b;
+  *u[1] -= *m[0];
+  // Precondition is applied.
+  // The preconditioner has to be
+  // positive definite and symmetric
+
+  // M v = u[1]
+  precondition.vmult (v,*u[1]);
+
+  delta[1] = v * (*u[1]);
+  // Preconditioner positive
+  Assert (delta[1]>=0, ExcPreconditionerNotDefinite());
+
+  r0 = std::sqrt(delta[1]);
+  r_l2 = r0;
+
+
+  u[0]->reinit(b);
+  delta[0] = 1.;
+  m[0]->reinit(b);
+  m[1]->reinit(b);
+  m[2]->reinit(b);
+
+  conv = this->iteration_status(0, r_l2, x);
+
+  while (conv==SolverControl::iterate)
+    {
+      if (delta[1]!=0)
+        v *= 1./std::sqrt(delta[1]);
+      else
+        v.reinit(b);
+
+      A.vmult(*u[2],v);
+      u[2]->add (-std::sqrt(delta[1]/delta[0]), *u[0]);
+
+      gamma = *u[2] * v;
+      u[2]->add (-gamma / std::sqrt(delta[1]), *u[1]);
+      *m[0] = v;
+
+      // precondition: solve M v = u[2]
+      // Preconditioner has to be positive
+      // definite and symmetric.
+      precondition.vmult(v,*u[2]);
+
+      delta[2] = v * (*u[2]);
+
+      Assert (delta[2]>=0, ExcPreconditionerNotDefinite());
+
+      if (j==1)
+        {
+          d_ = gamma;
+          e[1] = std::sqrt(delta[2]);
+        }
+      if (j>1)
+        {
+          d_ = s * e[0] - c * gamma;
+          e[0] = c * e[0] + s * gamma;
+          f[1] = s * std::sqrt(delta[2]);
+          e[1] = -c * std::sqrt(delta[2]);
+        }
+
+      d = std::sqrt (d_*d_ + delta[2]);
+
+      if (j>1)
+        tau *= s / c;
+      c = d_ / d;
+      tau *= c;
+
+      s = std::sqrt(delta[2]) / d;
+
+      if (j==1)
+        tau = r0 * c;
+
+      m[0]->add (-e[0], *m[1]);
+      if (j>1)
+        m[0]->add (-f[0], *m[2]);
+      *m[0] *= 1./d;
+      x.add (tau, *m[0]);
+      r_l2 *= std::fabs(s);
+
+      conv = this->iteration_status(j, r_l2, x);
+
+      // next iteration step
+      ++j;
+      // All vectors have to be shifted
+      // one iteration step.
+      // This should be changed one time.
+      //
+      // the previous code was like this:
+      //   m[2] = m[1];
+      //   m[1] = m[0];
+      // but it can be made more efficient,
+      // since the value of m[0] is no more
+      // needed in the next iteration
+      swap (*m[2], *m[1]);
+      swap (*m[1], *m[0]);
+
+      // likewise, but reverse direction:
+      //   u[0] = u[1];
+      //   u[1] = u[2];
+      swap (*u[0], *u[1]);
+      swap (*u[1], *u[2]);
+
+      // these are scalars, so need
+      // to bother
+      f[0] = f[1];
+      e[0] = e[1];
+      delta[0] = delta[1];
+      delta[1] = delta[2];
+    }
+
+  // Deallocation of Memory
+  this->memory.free(Vu0);
+  this->memory.free(Vu1);
+  this->memory.free(Vu2);
+  this->memory.free(Vv);
+  this->memory.free(Vm0);
+  this->memory.free(Vm1);
+  this->memory.free(Vm2);
+  // Output
+  deallog.pop ();
+
+  // in case of failure: throw exception
+  AssertThrow(conv == SolverControl::success,
+              SolverControl::NoConvergence (j, r_l2));
+
+  // otherwise exit as normal
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_qmrs.h b/include/deal.II/lac/solver_qmrs.h
new file mode 100644
index 0000000..412d237
--- /dev/null
+++ b/include/deal.II/lac/solver_qmrs.h
@@ -0,0 +1,429 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_qmrs_h
+#define dealii__solver_qmrs_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/base/logstream.h>
+#include <cmath>
+#include <deal.II/base/subscriptor.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Quasi-minimal residual method for symmetric matrices.
+ *
+ * The QMRS method is supposed to solve symmetric indefinite linear systems
+ * with symmetric, not necessarily definite preconditioners. This version of
+ * QMRS is adapted from Freund/Nachtigal: Software for simplified Lanczos and
+ * QMR algorithms, Appl. Num. Math. 19 (1995), pp. 319-341
+ *
+ * This version is for right preconditioning only, since then only the
+ * preconditioner is used: left preconditioning seems to require the inverse.
+ *
+ * For the requirements on matrices and vectors in order to work with this
+ * class, see the documentation of the Solver base class.
+ *
+ * Like all other solver classes, this class has a local structure called @p
+ * AdditionalData which is used to pass additional parameters to the solver,
+ * like damping parameters or the number of temporary vectors. We use this
+ * additional structure instead of passing these values directly to the
+ * constructor because this makes the use of the @p SolverSelector and other
+ * classes much easier and guarantees that these will continue to work even if
+ * number or type of the additional parameters for a certain solver changes.
+ *
+ * However, since the QMRS method does not need additional data, the
+ * respective structure is empty and does not offer any functionality. The
+ * constructor has a default argument, so you may call it without the
+ * additional parameter.
+ *
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The solve() function of this class uses the mechanism described in the
+ * Solver base class to determine convergence. This mechanism can also be used
+ * to observe the progress of the iteration.
+ *
+ *
+ * @author Guido Kanschat, 1999
+ */
+template <typename VectorType = Vector<double> >
+class SolverQMRS : public Solver<VectorType>
+{
+public:
+  /**
+   * Standardized data struct to pipe additional data to the solver.
+   *
+   * There are two possibilities to compute the residual: one is an estimate
+   * using the computed value @p tau. The other is exact computation using
+   * another matrix vector multiplication.
+   *
+   * QMRS, is susceptible to breakdowns, so we need a parameter telling us,
+   * which numbers are considered zero. The proper breakdown criterion is very
+   * unclear, so experiments may be necessary here.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Constructor.
+     *
+     * The default is no exact residual computation and breakdown parameter
+     * 1e-16.
+     */
+    explicit
+    AdditionalData(bool exact_residual = false,
+                   double breakdown=1.e-16) :
+      exact_residual(exact_residual),
+      breakdown(breakdown)
+    {}
+
+    /**
+     * Flag for exact computation of residual.
+     */
+    bool exact_residual;
+
+    /**
+     * Breakdown threshold.
+     */
+    double breakdown;
+  };
+
+  /**
+   * Constructor.
+   */
+  SolverQMRS (SolverControl            &cn,
+              VectorMemory<VectorType> &mem,
+              const AdditionalData     &data=AdditionalData());
+
+  /**
+   * Constructor. Use an object of type GrowingVectorMemory as a default to
+   * allocate memory.
+   */
+  SolverQMRS (SolverControl        &cn,
+              const AdditionalData &data=AdditionalData());
+
+  /**
+   * Solve the linear system $Ax=b$ for x.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void
+  solve (const MatrixType         &A,
+         VectorType               &x,
+         const VectorType         &b,
+         const PreconditionerType &precondition);
+
+  /**
+   * Interface for derived class. This function gets the current iteration
+   * vector, the residual and the update vector in each step. It can be used
+   * for a graphical output of the convergence history.
+   */
+  virtual void print_vectors (const unsigned int step,
+                              const VectorType   &x,
+                              const VectorType   &r,
+                              const VectorType   &d) const;
+protected:
+  /**
+   * Implementation of the computation of the norm of the residual.
+   */
+  virtual double criterion();
+
+  /**
+   * Temporary vectors, allocated through the @p VectorMemory object at the
+   * start of the actual solution process and deallocated at the end.
+   */
+  VectorType *Vv;
+  VectorType *Vp;
+  VectorType *Vq;
+  VectorType *Vt;
+  VectorType *Vd;
+  /**
+   * Iteration vector.
+   */
+  VectorType *Vx;
+  /**
+   * RHS vector.
+   */
+  const VectorType *Vb;
+
+  /**
+   * Within the iteration loop, the square of the residual vector is stored in
+   * this variable. The function @p criterion uses this variable to compute
+   * the convergence value, which in this class is the norm of the residual
+   * vector and thus the square root of the @p res2 value.
+   */
+  double res2;
+
+  /**
+   * Additional parameters.
+   */
+  AdditionalData additional_data;
+
+private:
+
+  /**
+   * A structure returned by the iterate() function representing what it found
+   * is happening during the iteration.
+   */
+  struct IterationResult
+  {
+    SolverControl::State state;
+    double               last_residual;
+
+    IterationResult (const SolverControl::State state,
+                     const double               last_residual);
+  };
+
+
+  /**
+   * The iteration loop itself. The function returns a structure indicating
+   * what happened in this function.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  IterationResult
+  iterate (const MatrixType         &A,
+           const PreconditionerType &precondition);
+
+  /**
+   * Number of the current iteration (accumulated over restarts)
+   */
+  unsigned int step;
+};
+
+/*@}*/
+/*------------------------- Implementation ----------------------------*/
+
+#ifndef DOXYGEN
+
+template<class VectorType>
+SolverQMRS<VectorType>::IterationResult::IterationResult (const SolverControl::State state,
+                                                          const double               last_residual)
+  :
+  state (state),
+  last_residual (last_residual)
+{}
+
+
+template<class VectorType>
+SolverQMRS<VectorType>::SolverQMRS (SolverControl            &cn,
+                                    VectorMemory<VectorType> &mem,
+                                    const AdditionalData     &data)
+  :
+  Solver<VectorType>(cn,mem),
+  additional_data(data)
+{}
+
+
+
+template<class VectorType>
+SolverQMRS<VectorType>::SolverQMRS(SolverControl        &cn,
+                                   const AdditionalData &data)
+  :
+  Solver<VectorType>(cn),
+  additional_data(data)
+{}
+
+
+
+template<class VectorType>
+double
+SolverQMRS<VectorType>::criterion()
+{
+  return std::sqrt(res2);
+}
+
+
+
+template<class VectorType>
+void
+SolverQMRS<VectorType>::print_vectors(const unsigned int,
+                                      const VectorType &,
+                                      const VectorType &,
+                                      const VectorType &) const
+{}
+
+
+
+template<class VectorType>
+template<typename MatrixType, typename PreconditionerType>
+void
+SolverQMRS<VectorType>::solve (const MatrixType         &A,
+                               VectorType               &x,
+                               const VectorType         &b,
+                               const PreconditionerType &precondition)
+{
+  deallog.push("QMRS");
+
+  // Memory allocation
+  Vv  = this->memory.alloc();
+  Vp  = this->memory.alloc();
+  Vq  = this->memory.alloc();
+  Vt  = this->memory.alloc();
+  Vd  = this->memory.alloc();
+
+  Vx = &x;
+  Vb = &b;
+  // resize the vectors, but do not set
+  // the values since they'd be overwritten
+  // soon anyway.
+  Vv->reinit(x, true);
+  Vp->reinit(x, true);
+  Vq->reinit(x, true);
+  Vt->reinit(x, true);
+
+  step = 0;
+
+  IterationResult state (SolverControl::failure,0);
+
+  do
+    {
+      if (step > 0)
+        deallog << "Restart step " << step << std::endl;
+      state = iterate(A, precondition);
+    }
+  while (state.state == SolverControl::iterate);
+
+  // Deallocate Memory
+  this->memory.free(Vv);
+  this->memory.free(Vp);
+  this->memory.free(Vq);
+  this->memory.free(Vt);
+  this->memory.free(Vd);
+
+  // Output
+  deallog.pop();
+
+  // in case of failure: throw exception
+  AssertThrow(state.state == SolverControl::success,
+              SolverControl::NoConvergence (step,
+                                            state.last_residual));
+  // otherwise exit as normal
+}
+
+
+
+template<class VectorType>
+template<typename MatrixType, typename PreconditionerType>
+typename SolverQMRS<VectorType>::IterationResult
+SolverQMRS<VectorType>::iterate(const MatrixType         &A,
+                                const PreconditionerType &precondition)
+{
+  /* Remark: the matrix A in the article is the preconditioned matrix.
+   * Therefore, we have to precondition x before we compute the first residual.
+   * In step 1 we replace p by q to avoid one preconditioning step.
+   * There are still two steps left, making this algorithm expensive.
+   */
+
+  SolverControl::State state = SolverControl::iterate;
+
+  // define some aliases for simpler access
+  VectorType &v  = *Vv;
+  VectorType &p  = *Vp;
+  VectorType &q  = *Vq;
+  VectorType &t  = *Vt;
+  VectorType &d  = *Vd;
+  VectorType &x  = *Vx;
+  const VectorType &b = *Vb;
+
+  int  it=0;
+
+  double tau, rho, theta=0, sigma, alpha, psi, theta_old, rho_old, beta;
+  double res;
+
+  d.reinit(x);
+
+  // Apply right preconditioning to x
+  precondition.vmult(q,x);
+  // Preconditioned residual
+  A.vmult(v,q);
+  v.sadd(-1.,1.,b);
+  res = v.l2_norm();
+
+  if (this->iteration_status(step, res, x) == SolverControl::success)
+    return IterationResult(SolverControl::success, res);
+
+  p = v;
+
+  precondition.vmult(q,p);
+
+  tau = v.norm_sqr();
+  rho = q*v;
+
+  while (state == SolverControl::iterate)
+    {
+      step++;
+      it++;
+      // Step 1
+      A.vmult(t,q);
+      // Step 2
+      sigma = q*t;
+
+//TODO:[?] Find a really good breakdown criterion. The absolute one detects breakdown instead of convergence
+      if (std::fabs(sigma/rho) < additional_data.breakdown)
+        return IterationResult(SolverControl::iterate, std::fabs(sigma/rho));
+      // Step 3
+      alpha = rho/sigma;
+
+      v.add(-alpha,t);
+      // Step 4
+      theta_old = theta;
+      theta = v*v/tau;
+      psi = 1./(1.+theta);
+      tau *= theta*psi;
+
+      d.sadd(psi*theta_old, psi*alpha, p);
+      x.add(d);
+
+      print_vectors(step,x,v,d);
+      // Step 5
+      if (additional_data.exact_residual)
+        {
+          A.vmult(q,x);
+          q.sadd(-1.,1.,b);
+          res = q.l2_norm();
+        }
+      else
+        res = std::sqrt((it+1)*tau);
+      state = this->iteration_status(step,res,x);
+      if ((state == SolverControl::success)
+          || (state == SolverControl::failure))
+        return IterationResult(state, res);
+      // Step 6
+      if (std::fabs(rho) < additional_data.breakdown)
+        return IterationResult(SolverControl::iterate, std::fabs(rho));
+      // Step 7
+      rho_old = rho;
+      precondition.vmult(q,v);
+      rho = q*v;
+
+      beta = rho/rho_old;
+      p.sadd(beta,v);
+      precondition.vmult(q,p);
+    }
+  return IterationResult(SolverControl::success, std::fabs(rho));
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_relaxation.h b/include/deal.II/lac/solver_relaxation.h
new file mode 100644
index 0000000..fad56a2
--- /dev/null
+++ b/include/deal.II/lac/solver_relaxation.h
@@ -0,0 +1,164 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_relaxation_h
+#define dealii__solver_relaxation_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/base/subscriptor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * Implementation of an iterative solver based on relaxation methods. The
+ * stopping criterion is the norm of the residual.
+ *
+ * For the requirements on matrices and vectors in order to work with this
+ * class, see the documentation of the Solver base class.
+ *
+ * Like all other solver classes, this class has a local structure called @p
+ * AdditionalData which is used to pass additional parameters to the solver,
+ * like damping parameters or the number of temporary vectors. We use this
+ * additional structure instead of passing these values directly to the
+ * constructor because this makes the use of the @p SolverSelector and other
+ * classes much easier and guarantees that these will continue to work even if
+ * number or type of the additional parameters for a certain solver changes.
+ * AdditionalData of this class currently does not contain any data.
+ *
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The solve() function of this class uses the mechanism described in the
+ * Solver base class to determine convergence. This mechanism can also be used
+ * to observe the progress of the iteration.
+ *
+ *
+ * @ingroup Solvers
+ * @author Guido Kanschat
+ * @date 2010
+ */
+template <typename VectorType = Vector<double> >
+class SolverRelaxation : public Solver<VectorType>
+{
+public:
+  /**
+   * Standardized data struct to pipe additional data to the solver. There is
+   * no data in here for relaxation methods.
+   */
+  struct AdditionalData {};
+
+  /**
+   * Constructor.
+   */
+  SolverRelaxation (SolverControl        &cn,
+                    const AdditionalData &data=AdditionalData());
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~SolverRelaxation ();
+
+  /**
+   * Solve the system $Ax = b$ using the relaxation method $x_{k+1} =
+   * R(x_k,b)$. The matrix <i>A</i> itself is only used to compute the
+   * residual.
+   */
+  template<typename MatrixType, class RelaxationType>
+  void
+  solve (const MatrixType     &A,
+         VectorType           &x,
+         const VectorType     &b,
+         const RelaxationType &R);
+};
+
+//----------------------------------------------------------------------//
+
+template <class VectorType>
+SolverRelaxation<VectorType>::SolverRelaxation (SolverControl        &cn,
+                                                const AdditionalData &)
+  :
+  Solver<VectorType> (cn)
+{}
+
+
+
+template <class VectorType>
+SolverRelaxation<VectorType>::~SolverRelaxation()
+{}
+
+
+template <class VectorType>
+template <typename MatrixType, class RelaxationType>
+void
+SolverRelaxation<VectorType>::solve (const MatrixType     &A,
+                                     VectorType           &x,
+                                     const VectorType     &b,
+                                     const RelaxationType &R)
+{
+  GrowingVectorMemory<VectorType> mem;
+  SolverControl::State conv=SolverControl::iterate;
+
+  // Memory allocation
+  typename VectorMemory<VectorType>::Pointer Vr(mem);
+  VectorType &r  = *Vr;
+  r.reinit(x);
+  typename VectorMemory<VectorType>::Pointer Vd(mem);
+  VectorType &d  = *Vd;
+  d.reinit(x);
+
+  deallog.push("Relaxation");
+
+  int iter=0;
+  try
+    {
+      // Main loop
+      for (; conv==SolverControl::iterate; iter++)
+        {
+          // Compute residual
+          A.vmult(r,x);
+          r.sadd(-1.,1.,b);
+
+          // The required norm of the
+          // (preconditioned)
+          // residual is computed in
+          // criterion() and stored
+          // in res.
+          conv = this->iteration_status (iter, r.l2_norm(), x);
+          if (conv != SolverControl::iterate)
+            break;
+          R.step(x,b);
+        }
+    }
+  catch (...)
+    {
+      deallog.pop();
+      throw;
+    }
+  deallog.pop();
+
+  // in case of failure: throw exception
+  AssertThrow(conv == SolverControl::success,
+              SolverControl::NoConvergence (iter, r.l2_norm()));
+  // otherwise exit as normal
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_richardson.h b/include/deal.II/lac/solver_richardson.h
new file mode 100644
index 0000000..20f07b2
--- /dev/null
+++ b/include/deal.II/lac/solver_richardson.h
@@ -0,0 +1,385 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_richardson_h
+#define dealii__solver_richardson_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/base/subscriptor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Implementation of the preconditioned Richardson iteration method. The
+ * stopping criterion is the norm of the residual.
+ *
+ * For the requirements on matrices and vectors in order to work with this
+ * class, see the documentation of the Solver base class.
+ *
+ * Like all other solver classes, this class has a local structure called @p
+ * AdditionalData which is used to pass additional parameters to the solver,
+ * like damping parameters or the number of temporary vectors. We use this
+ * additional structure instead of passing these values directly to the
+ * constructor because this makes the use of the @p SolverSelector and other
+ * classes much easier and guarantees that these will continue to work even if
+ * number or type of the additional parameters for a certain solver changes.
+ *
+ * For the Richardson method, the additional data is the damping parameter,
+ * which is the only content of the @p AdditionalData structure. By default,
+ * the constructor of the structure sets it to one.
+ *
+ *
+ * <h3>Observing the progress of linear solver iterations</h3>
+ *
+ * The solve() function of this class uses the mechanism described in the
+ * Solver base class to determine convergence. This mechanism can also be used
+ * to observe the progress of the iteration.
+ *
+ *
+ * @author Ralf Hartmann
+ */
+template <class VectorType = Vector<double> >
+class SolverRichardson : public Solver<VectorType>
+{
+public:
+  /**
+   * Standardized data struct to pipe additional data to the solver.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Constructor. By default, set the damping parameter to one.
+     */
+    explicit
+    AdditionalData (const double omega                       = 1,
+                    const bool   use_preconditioned_residual = false);
+
+    /**
+     * Relaxation parameter.
+     */
+    double omega;
+
+    /**
+     * Parameter for stopping criterion.
+     */
+    bool use_preconditioned_residual;
+
+  };
+
+  /**
+   * Constructor.
+   */
+  SolverRichardson (SolverControl            &cn,
+                    VectorMemory<VectorType> &mem,
+                    const AdditionalData     &data=AdditionalData());
+
+  /**
+   * Constructor. Use an object of type GrowingVectorMemory as a default to
+   * allocate memory.
+   */
+  SolverRichardson (SolverControl        &cn,
+                    const AdditionalData &data=AdditionalData());
+
+  /**
+   * Virtual destructor.
+   */
+  virtual ~SolverRichardson ();
+
+  /**
+   * Solve the linear system $Ax=b$ for x.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void
+  solve (const MatrixType         &A,
+         VectorType               &x,
+         const VectorType         &b,
+         const PreconditionerType &precondition);
+
+  /**
+   * Solve $A^Tx=b$ for $x$.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void
+  Tsolve (const MatrixType         &A,
+          VectorType               &x,
+          const VectorType         &b,
+          const PreconditionerType &precondition);
+
+  /**
+   * Set the damping-coefficient. Default is 1., i.e. no damping.
+   */
+  void set_omega (const double om=1.);
+
+  /**
+   * Interface for derived class. This function gets the current iteration
+   * vector, the residual and the update vector in each step. It can be used
+   * for a graphical output of the convergence history.
+   */
+  virtual void print_vectors (const unsigned int step,
+                              const VectorType &x,
+                              const VectorType &r,
+                              const VectorType &d) const;
+
+protected:
+  /**
+   * Implementation of the computation of the norm of the residual.
+   */
+  virtual typename VectorType::value_type criterion();
+
+  /**
+   * Residual. Temporary vector allocated through the VectorMemory object at
+   * the start of the actual solution process and deallocated at the end.
+   */
+  VectorType *Vr;
+  /**
+   * Preconditioned residual. Temporary vector allocated through the
+   * VectorMemory object at the start of the actual solution process and
+   * deallocated at the end.
+   */
+  VectorType *Vd;
+
+  /**
+   * Control parameters.
+   */
+  AdditionalData additional_data;
+
+  /**
+   * Within the iteration loop, the norm of the residual is stored in this
+   * variable. The function @p criterion uses this variable to compute the
+   * convergence value, which in this class is the norm of the residual vector
+   * and thus the square root of the @p res2 value.
+   */
+  typename VectorType::value_type res;
+};
+
+/*@}*/
+/*----------------- Implementation of the Richardson Method ------------------*/
+
+#ifndef DOXYGEN
+
+template <class VectorType>
+inline
+SolverRichardson<VectorType>::AdditionalData::
+AdditionalData (const double omega,
+                const bool   use_preconditioned_residual)
+  :
+  omega(omega),
+  use_preconditioned_residual(use_preconditioned_residual)
+{}
+
+
+template <class VectorType>
+SolverRichardson<VectorType>::SolverRichardson(SolverControl            &cn,
+                                               VectorMemory<VectorType> &mem,
+                                               const AdditionalData     &data)
+  :
+  Solver<VectorType> (cn,mem),
+  additional_data(data)
+{}
+
+
+
+template <class VectorType>
+SolverRichardson<VectorType>::SolverRichardson(SolverControl        &cn,
+                                               const AdditionalData &data)
+  :
+  Solver<VectorType> (cn),
+  additional_data(data)
+{}
+
+
+
+template <class VectorType>
+SolverRichardson<VectorType>::~SolverRichardson()
+{}
+
+
+template <class VectorType>
+template <typename MatrixType, typename PreconditionerType>
+void
+SolverRichardson<VectorType>::solve (const MatrixType         &A,
+                                     VectorType               &x,
+                                     const VectorType         &b,
+                                     const PreconditionerType &precondition)
+{
+  SolverControl::State conv=SolverControl::iterate;
+
+  double last_criterion = -std::numeric_limits<double>::max();
+
+  unsigned int iter = 0;
+
+  // Memory allocation
+  Vr  = this->memory.alloc();
+  VectorType &r  = *Vr;
+  r.reinit(x);
+  Vd  = this->memory.alloc();
+  VectorType &d  = *Vd;
+  d.reinit(x);
+
+  deallog.push("Richardson");
+
+  try
+    {
+      // Main loop
+      while (conv==SolverControl::iterate)
+        {
+          // Do not use residual,
+          // but do it in 2 steps
+          A.vmult(r,x);
+          r.sadd(-1.,1.,b);
+          precondition.vmult(d,r);
+
+          // The required norm of the
+          // (preconditioned)
+          // residual is computed in
+          // criterion() and stored
+          // in res.
+          last_criterion = criterion();
+          conv = this->iteration_status (iter, last_criterion, x);
+          if (conv != SolverControl::iterate)
+            break;
+
+          x.add(additional_data.omega,d);
+          print_vectors(iter,x,r,d);
+
+          ++iter;
+        }
+    }
+  catch (...)
+    {
+      this->memory.free(Vr);
+      this->memory.free(Vd);
+      deallog.pop();
+      throw;
+    }
+  // Deallocate Memory
+  this->memory.free(Vr);
+  this->memory.free(Vd);
+  deallog.pop();
+
+  // in case of failure: throw exception
+  if (conv != SolverControl::success)
+    AssertThrow(false, SolverControl::NoConvergence (iter,
+                                                     last_criterion));
+  // otherwise exit as normal
+}
+
+
+template <class VectorType>
+template <typename MatrixType, typename PreconditionerType>
+void
+SolverRichardson<VectorType>::Tsolve (const MatrixType         &A,
+                                      VectorType               &x,
+                                      const VectorType         &b,
+                                      const PreconditionerType &precondition)
+{
+  SolverControl::State conv=SolverControl::iterate;
+  double last_criterion = -std::numeric_limits<double>::max();
+
+  unsigned int iter = 0;
+
+  // Memory allocation
+  Vr  = this->memory.alloc();
+  VectorType &r  = *Vr;
+  r.reinit(x);
+  Vd  =this-> memory.alloc();
+  VectorType &d  = *Vd;
+  d.reinit(x);
+
+  deallog.push("RichardsonT");
+
+  try
+    {
+      // Main loop
+      while (conv==SolverControl::iterate)
+        {
+          // Do not use Tresidual,
+          // but do it in 2 steps
+          A.Tvmult(r,x);
+          r.sadd(-1.,1.,b);
+          precondition.Tvmult(d,r);
+
+          last_criterion = criterion();
+          conv = this->iteration_status (iter, last_criterion, x);
+          if (conv != SolverControl::iterate)
+            break;
+
+          x.add(additional_data.omega,d);
+          print_vectors(iter,x,r,d);
+
+          ++iter;
+        }
+    }
+  catch (...)
+    {
+      this->memory.free(Vr);
+      this->memory.free(Vd);
+      deallog.pop();
+      throw;
+    }
+
+  // Deallocate Memory
+  this->memory.free(Vr);
+  this->memory.free(Vd);
+  deallog.pop();
+  // in case of failure: throw exception
+  if (conv != SolverControl::success)
+    AssertThrow(false, SolverControl::NoConvergence (iter, last_criterion));
+
+  // otherwise exit as normal
+}
+
+
+template <class VectorType>
+void
+SolverRichardson<VectorType>::print_vectors(const unsigned int,
+                                            const VectorType &,
+                                            const VectorType &,
+                                            const VectorType &) const
+{}
+
+
+
+template <class VectorType>
+inline typename VectorType::value_type
+SolverRichardson<VectorType>::criterion()
+{
+  if (!additional_data.use_preconditioned_residual)
+    res = Vr->l2_norm();
+  else
+    res = Vd->l2_norm();
+  return res;
+}
+
+
+template <class VectorType>
+inline void
+SolverRichardson<VectorType>::set_omega (const double om)
+{
+  additional_data.omega=om;
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/solver_selector.h b/include/deal.II/lac/solver_selector.h
new file mode 100644
index 0000000..99591e2
--- /dev/null
+++ b/include/deal.II/lac/solver_selector.h
@@ -0,0 +1,361 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solver_selector_h
+#define dealii__solver_selector_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/solver_bicgstab.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/solver_minres.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/solver_richardson.h>
+#include <deal.II/lac/precondition.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup Solvers */
+/*@{*/
+
+/**
+ * Selects a solver by changing a parameter.
+ *
+ * By calling the @p solve function of this @p SolverSelector, it selects the
+ * @p solve function of that @p Solver that was specified in the constructor
+ * of this class.
+ *
+ * <h3>Usage</h3> The simplest use of this class is the following:
+ * @code
+ *                                  // generate a @p SolverControl and
+ *                                  // a @p VectorMemory
+ * SolverControl control;
+ * VectorMemory<Vector<double> > memory;
+ *                                  // Line 3:
+ *                                  //
+ *                                  // generate a @p SolverSelector that
+ *                                  // calls the @p SolverCG
+ * SolverSelector<Vector<double> >
+ *   solver_selector("cg", control, memory);
+ *                                  // generate e.g. a @p PreconditionRelaxation
+ * PreconditionRelaxation<SparseMatrix<double>, Vector<double> >
+ *   preconditioning(A, &SparseMatrix<double>
+ *                   ::template precondition_SSOR<double>,0.8);
+ *                                  // call the @p solve function with this
+ *                                  // preconditioning as last argument
+ * solver_selector.solve(A,x,b,preconditioning);
+ * @endcode
+ * But the full usefulness of the @p SolverSelector class is not clear until
+ * the presentation of the following example that assumes the user using the
+ * @p ParameterHandler class and having declared a "solver" entry, e.g. with
+ * @code
+ * Parameter_Handler prm;
+ * prm.declare_entry ("solver", "none",
+ *                    Patterns::Selection(SolverSelector<>::get_solver_names()));
+ * ...
+ * @endcode
+ * Assuming that in the users parameter file there exists the line
+ * @verbatim
+ * set solver = cg
+ * @endverbatim
+ * then `Line 3' of the above example reads
+ * @code
+ * SolverSelector<SparseMatrix<double>, Vector<double> >
+ *   solver_selector(prm.get("solver"), control, memory);
+ * @endcode
+ *
+ *
+ * If at some time there exists a new solver "xyz" then the user does not need
+ * to change his program. Only in the implementation of the @p SolverSelector
+ * the calling of this solver has to be added and each user with program lines
+ * quoted above only needs to 'set solver = xyz' in his parameter file to get
+ * access to that new solver.  :-)
+ *
+ * (By the way, thanks to Wolfgang for implementing the @p ParameterHandler.)
+ *
+ * @author Ralf Hartmann, 1999
+ */
+template <typename VectorType = Vector<double> >
+class SolverSelector : public Subscriptor
+{
+public:
+  /**
+   * A typedef for the underlying vector type
+   */
+  typedef VectorType vector_type;
+
+  /**
+   * Constructor, filling in default values
+   */
+  SolverSelector ();
+
+  /**
+   * Destructor
+   */
+  ~SolverSelector();
+
+  /**
+   * Solver procedure. Calls the @p solve function of the @p solver whose @p
+   * SolverName was specified in the constructor.
+   *
+   */
+  template<class Matrix, class Preconditioner>
+  void solve (const Matrix         &A,
+              VectorType           &x,
+              const VectorType     &b,
+              const Preconditioner &precond) const;
+
+  /**
+   * Select a new solver. Note that all solver names used in this class are
+   * all lower case.
+   */
+  void select(const std::string &name);
+
+  /**
+   * Set a new SolverControl. This needs to be set before solving.
+   */
+  void set_control(SolverControl &ctrl);
+
+  /**
+   * Set the additional data. For more info see the @p Solver class.
+   */
+  void set_data(const typename SolverRichardson<VectorType>
+                ::AdditionalData &data);
+
+  /**
+   * Set the additional data. For more info see the @p Solver class.
+   */
+  void set_data(const typename SolverCG<VectorType>
+                ::AdditionalData &data);
+
+  /**
+   * Set the additional data. For more info see the @p Solver class.
+   */
+  void set_data(const typename SolverMinRes<VectorType>
+                ::AdditionalData &data);
+
+  /**
+   * Set the additional data. For more info see the @p Solver class.
+   */
+  void set_data(const typename SolverBicgstab<VectorType>
+                ::AdditionalData &data);
+
+  /**
+   * Set the additional data. For more info see the @p Solver class.
+   */
+  void set_data(const typename SolverGMRES<VectorType>
+                ::AdditionalData &data);
+
+  /**
+   * Set the additional data. For more info see the @p Solver class.
+   */
+  void set_data(const typename SolverFGMRES<VectorType>
+                ::AdditionalData &data);
+
+  /**
+   * Get the names of all implemented solvers.
+   */
+  static std::string get_solver_names ();
+
+  /**
+   * Exception.
+   */
+  DeclException1 (ExcSolverDoesNotExist,
+                  std::string, << "Solver " << arg1 << " does not exist. Use one of "
+                  << std::endl << get_solver_names());
+
+
+
+protected:
+  /**
+   * Stores the @p SolverControl that is needed in the constructor of each @p
+   * Solver class. This can be changed with @p set_control().
+   */
+  SmartPointer< SolverControl, SolverSelector< VectorType > > control;
+
+  /**
+   * Stores the name of the solver.
+   */
+  std::string solver_name;
+
+private:
+  /**
+   * Stores the additional data.
+   */
+  typename SolverRichardson<VectorType>::AdditionalData richardson_data;
+
+  /**
+   * Stores the additional data.
+   */
+  typename SolverCG<VectorType>::AdditionalData cg_data;
+
+  /**
+   * Stores the additional data.
+   */
+  typename SolverMinRes<VectorType>::AdditionalData minres_data;
+
+  /**
+   * Stores the additional data.
+   */
+  typename SolverBicgstab<VectorType>::AdditionalData bicgstab_data;
+
+  /**
+   * Stores the additional data.
+   */
+  typename SolverGMRES<VectorType>::AdditionalData gmres_data;
+
+  /**
+   * Stores the additional data.
+   */
+  typename SolverFGMRES<VectorType>::AdditionalData fgmres_data;
+};
+
+/*@}*/
+/* --------------------- Inline and template functions ------------------- */
+
+
+template <typename VectorType>
+SolverSelector<VectorType>::SolverSelector()
+{}
+
+
+template <typename VectorType>
+SolverSelector<VectorType>::~SolverSelector()
+{}
+
+
+template <typename VectorType>
+void
+SolverSelector<VectorType>::select(const std::string &name)
+{
+  solver_name = name;
+}
+
+
+template <typename VectorType>
+template<class Matrix, class Preconditioner>
+void
+SolverSelector<VectorType>::solve (const Matrix         &A,
+                                   VectorType           &x,
+                                   const VectorType     &b,
+                                   const Preconditioner &precond) const
+{
+  if (solver_name=="richardson")
+    {
+      SolverRichardson<VectorType> solver(*control, richardson_data);
+      solver.solve(A,x,b,precond);
+    }
+  else if (solver_name=="cg")
+    {
+      SolverCG<VectorType> solver(*control, cg_data);
+      solver.solve(A,x,b,precond);
+    }
+  else if (solver_name=="minres")
+    {
+      SolverMinRes<VectorType> solver(*control, minres_data);
+      solver.solve(A,x,b,precond);
+    }
+  else if (solver_name=="bicgstab")
+    {
+      SolverBicgstab<VectorType> solver(*control, bicgstab_data);
+      solver.solve(A,x,b,precond);
+    }
+  else if (solver_name=="gmres")
+    {
+      SolverGMRES<VectorType> solver(*control, gmres_data);
+      solver.solve(A,x,b,precond);
+    }
+  else if (solver_name=="fgmres")
+    {
+      SolverFGMRES<VectorType> solver(*control, fgmres_data);
+      solver.solve(A,x,b,precond);
+    }
+  else
+    Assert(false,ExcSolverDoesNotExist(solver_name));
+}
+
+
+template <typename VectorType>
+void SolverSelector<VectorType>::set_control (SolverControl &ctrl)
+{
+  control=&ctrl;
+}
+
+
+template <typename VectorType>
+std::string SolverSelector<VectorType>::get_solver_names()
+{
+  return "richardson|cg|bicgstab|gmres|fgmres|minres";
+}
+
+
+template <typename VectorType>
+void SolverSelector<VectorType>::set_data
+(const typename SolverGMRES<VectorType>::AdditionalData &data)
+{
+  gmres_data=data;
+}
+
+
+template <typename VectorType>
+void SolverSelector<VectorType>::set_data
+(const typename SolverFGMRES<VectorType>::AdditionalData &data)
+{
+  fgmres_data=data;
+}
+
+
+template <typename VectorType>
+void SolverSelector<VectorType>::set_data
+(const typename SolverRichardson<VectorType>::AdditionalData &data)
+{
+  richardson_data=data;
+}
+
+
+template <typename VectorType>
+void SolverSelector<VectorType>::set_data(
+  const typename SolverCG<VectorType>::AdditionalData &data)
+{
+  cg_data=data;
+}
+
+
+template <typename VectorType>
+void SolverSelector<VectorType>::set_data
+(const typename SolverMinRes<VectorType>::AdditionalData &data)
+{
+  minres_data=data;
+}
+
+
+template <typename VectorType>
+void SolverSelector<VectorType>::set_data
+(const typename SolverBicgstab<VectorType>::AdditionalData &data)
+{
+  bicgstab_data=data;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/sparse_decomposition.h b/include/deal.II/lac/sparse_decomposition.h
new file mode 100644
index 0000000..2d89dda
--- /dev/null
+++ b/include/deal.II/lac/sparse_decomposition.h
@@ -0,0 +1,424 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_decomposition_h
+#define dealii__sparse_decomposition_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/sparse_matrix.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Preconditioners
+ *@{
+ */
+
+/**
+ * Abstract base class for incomplete decompositions of a sparse matrix into
+ * sparse factors. This class can't be used by itself, but only as the base
+ * class of derived classes that actually implement particular decompositions
+ * such as SparseILU or SparseMIC.
+ *
+ * The decomposition is stored as a sparse matrix which is why this class is
+ * derived from the SparseMatrix. Since it is not a matrix in the usual sense
+ * (the stored entries are not those of a matrix, but of the two factors of
+ * the original matrix), the derivation is <tt>protected</tt> rather than
+ * <tt>public</tt>.
+ *
+ *
+ * <h3>Fill-in</h3>
+ *
+ * Sparse decompositions are frequently used with additional fill-in, i.e.,
+ * the sparsity structure of the decomposition is denser than that of the
+ * matrix to be decomposed. The initialize() function of this class allows
+ * this fill-in via the AdditionalData object as long as all entries present
+ * in the original matrix are present in the decomposition also, i.e. the
+ * sparsity pattern of the decomposition is a superset of the sparsity pattern
+ * in the original matrix.
+ *
+ * Such fill-in can be accomplished by various ways, one of which is the copy-
+ * constructor of the SparsityPattern class that allows the addition of side-
+ * diagonals to a given sparsity structure.
+ *
+ *
+ * <h3>Unified use of preconditioners</h3>
+ *
+ * While objects of this class can not be used directly (this class is only a
+ * base class for others implementing actual decompositions), derived classes
+ * such as SparseILU and SparseMIC can be used in the usual form as
+ * preconditioners. For example, this works:
+ * @code
+ * SparseILU<double> ilu;
+ * ilu.initialize(matrix, SparseILU<double>::AdditionalData(...));
+ *
+ * somesolver.solve (A, x, f, ilu);
+ * @endcode
+ *
+ * Through the AdditionalData object it is possible to specify additional
+ * parameters of the LU decomposition.
+ *
+ * 1/ The matrix diagonal can be strengthened by adding
+ * <code>strengthen_diagonal</code> times the sum of the absolute row entries
+ * of each row to the respective diagonal entries. By default no strengthening
+ * is performed.
+ *
+ * 2/ By default, each initialize() function call creates its own sparsity.
+ * For that, it copies the sparsity of <code>matrix</code> and adds a specific
+ * number of extra off diagonal entries specified by
+ * <code>extra_off_diagonals</code>.
+ *
+ * 3/ By setting <code>use_previous_sparsity=true</code> the sparsity is not
+ * recreated but the sparsity of the previous initialize() call is reused
+ * (recycled). This might be useful when several linear problems on the same
+ * sparsity need to solved, as for example several Newton iteration steps on
+ * the same triangulation. The default is <code>false</code>.
+ *
+ * 4/ It is possible to give a user defined sparsity to
+ * <code>use_this_sparsity</code>. Then, no sparsity is created but
+ * <code>*use_this_sparsity</code> is used to store the decomposed matrix. For
+ * restrictions on the sparsity see section `Fill-in' above).
+ *
+ *
+ * <h3>Particular implementations</h3>
+ *
+ * It is enough to override the initialize() and vmult() methods to implement
+ * particular LU decompositions, like the true LU, or the Cholesky
+ * decomposition. Additionally, if that decomposition needs fine tuned
+ * diagonal strengthening on a per row basis, it may override the
+ * get_strengthen_diagonal() method.
+ *
+ * @author Stephen "Cheffo" Kolaroff, 2002, based on SparseILU implementation
+ * by Wolfgang Bangerth; unified interface: Ralf Hartmann, 2003; extension for
+ * full compatibility with LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+template <typename number>
+class SparseLUDecomposition : protected SparseMatrix<number>,
+  public virtual Subscriptor
+{
+protected:
+  /**
+   * Constructor. Does nothing.
+   *
+   * Call the initialize() function before using this object as preconditioner
+   * (vmult()).
+   */
+  SparseLUDecomposition ();
+
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef typename SparseMatrix<number>::size_type size_type;
+
+  /**
+   * Destruction. Mark the destructor pure to ensure that this class isn't
+   * used directly, but only its derived classes.
+   */
+  virtual ~SparseLUDecomposition () = 0;
+
+  /**
+   * Deletes all member variables. Leaves the class in the state that it had
+   * directly after calling the constructor
+   */
+  virtual void clear();
+
+  /**
+   * Parameters for SparseDecomposition.
+   */
+  class AdditionalData
+  {
+  public:
+    /**
+     * Constructor. For the parameters' description, see below.
+     */
+    AdditionalData (const double strengthen_diagonal=0,
+                    const unsigned int extra_off_diagonals=0,
+                    const bool use_previous_sparsity=false,
+                    const SparsityPattern *use_this_sparsity=0);
+
+    /**
+     * <code>strengthen_diag</code> times the sum of absolute row entries is
+     * added to the diagonal entries.
+     *
+     * Per default, this value is zero, i.e. the diagonal is not strengthened.
+     */
+    double strengthen_diagonal;
+
+    /**
+     * By default, the <code>initialize(matrix, data)</code> function creates
+     * its own sparsity. This sparsity has the same SparsityPattern as
+     * <code>matrix</code> with some extra off diagonals the number of which
+     * is specified by <code>extra_off_diagonals</code>.
+     *
+     * The user can give a SparsityPattern to <code>use_this_sparsity</code>.
+     * Then this sparsity is used and the <code>extra_off_diagonals</code>
+     * argument is ignored.
+     */
+    unsigned int extra_off_diagonals;
+
+    /**
+     * If this flag is true the initialize() function uses the same sparsity
+     * that was used during the previous initialize() call.
+     *
+     * This might be useful when several linear problems on the same sparsity
+     * need to solved, as for example several Newton iteration steps on the
+     * same triangulation.
+     */
+    bool use_previous_sparsity;
+
+    /**
+     * When a SparsityPattern is given to this argument, the initialize()
+     * function calls <code>reinit(*use_this_sparsity)</code> causing this
+     * sparsity to be used.
+     *
+     * Note that the sparsity structures of <code>*use_this_sparsity</code>
+     * and the matrix passed to the initialize function need not be equal.
+     * Fill-in is allowed, as well as filtering out some elements in the
+     * matrix.
+     */
+    const SparsityPattern *use_this_sparsity;
+  };
+
+  /**
+   * This function needs to be called before an object of this class is used
+   * as preconditioner.
+   *
+   * For more detail about possible parameters, see the class documentation
+   * and the documentation of the SparseLUDecomposition::AdditionalData class.
+   *
+   * According to the <code>parameters</code>, this function creates a new
+   * SparsityPattern or keeps the previous sparsity or takes the sparsity
+   * given by the user to <code>data</code>. Then, this function performs the
+   * LU decomposition.
+   *
+   * After this function is called the preconditioner is ready to be used
+   * (using the <code>vmult</code> function of derived classes).
+   */
+  template <typename somenumber>
+  void initialize (const SparseMatrix<somenumber> &matrix,
+                   const AdditionalData parameters);
+
+  /**
+   * Return whether the object is empty. It calls the inherited
+   * SparseMatrix::empty() function.
+   */
+  bool empty () const;
+
+  /**
+   * Return the dimension of the codomain (or range) space. It calls the
+   * inherited SparseMatrix::m() function. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. It calls the  inherited
+   * SparseMatrix::n() function. To remember: the matrix is of dimension $m
+   * \times n$.
+   */
+  size_type n () const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add <i>M*src</i> on <i>dst</i> with
+   * <i>M</i> being this matrix.
+   *
+   * Source and destination must not be the same vector.
+   *
+   */
+  template <class OutVector, class InVector>
+  void vmult_add (OutVector &dst,
+                  const InVector &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add <i>M<sup>T</sup>*src</i> to
+   * <i>dst</i> with <i>M</i> being this matrix. This function does the same
+   * as vmult_add() but takes the transposed matrix.
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <class OutVector, class InVector>
+  void Tvmult_add (OutVector &dst,
+                   const InVector &src) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  virtual std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidStrengthening,
+                  double,
+                  << "The strengthening parameter " << arg1
+                  << " is not greater or equal than zero!");
+  //@}
+protected:
+  /**
+   * Copies the passed SparseMatrix onto this object. This object's sparsity
+   * pattern remains unchanged.
+   */
+  template<typename somenumber>
+  void copy_from (const SparseMatrix<somenumber> &matrix);
+
+  /**
+   * Performs the strengthening loop. For each row calculates the sum of
+   * absolute values of its elements, determines the strengthening factor
+   * (through get_strengthen_diagonal()) sf and multiplies the diagonal entry
+   * with <code>sf+1</code>.
+   */
+  virtual void strengthen_diagonal_impl ();
+
+  /**
+   * In the decomposition phase, computes a strengthening factor for the
+   * diagonal entry in row <code>row</code> with sum of absolute values of its
+   * elements <code>rowsum</code>.
+   *
+   * @note The default implementation in SparseLUDecomposition returns
+   * <code>strengthen_diagonal</code>'s value.
+   */
+  virtual number get_strengthen_diagonal(const number rowsum, const size_type row) const;
+
+  /**
+   * The default strengthening value, returned by get_strengthen_diagonal().
+   */
+  double  strengthen_diagonal;
+
+  /**
+   * For every row in the underlying SparsityPattern, this array contains a
+   * pointer to the row's first afterdiagonal entry. Becomes available after
+   * invocation of decompose().
+   */
+  std::vector<const size_type *> prebuilt_lower_bound;
+
+  /**
+   * Fills the #prebuilt_lower_bound array.
+   */
+  void prebuild_lower_bound ();
+
+private:
+
+  /**
+   * In general this pointer is zero except for the case that no
+   * SparsityPattern is given to this class. Then, a SparsityPattern is
+   * created and is passed down to the SparseMatrix base class.
+   *
+   * Nevertheless, the SparseLUDecomposition needs to keep ownership of this
+   * sparsity. It keeps this pointer to it enabling it to delete this sparsity
+   * at destruction time.
+   */
+  SparsityPattern *own_sparsity;
+};
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+#ifndef DOXYGEN
+
+template <typename number>
+inline number
+SparseLUDecomposition<number>::
+get_strengthen_diagonal(const number /*rowsum*/,
+                        const size_type /*row*/) const
+{
+  return strengthen_diagonal;
+}
+
+
+
+template <typename number>
+inline bool
+SparseLUDecomposition<number>::empty () const
+{
+  return SparseMatrix<number>::empty();
+}
+
+
+template <typename number>
+inline typename SparseLUDecomposition<number>::size_type
+SparseLUDecomposition<number>::m () const
+{
+  return SparseMatrix<number>::m();
+}
+
+
+template <typename number>
+inline typename SparseLUDecomposition<number>::size_type
+SparseLUDecomposition<number>::n () const
+{
+  return SparseMatrix<number>::n();
+}
+
+// Note: This function is required for full compatibility with
+// the LinearOperator class. ::MatrixInterfaceWithVmultAdd
+// picks up the vmult_add function in the protected SparseMatrix
+// base class.
+template <typename number>
+template <class OutVector, class InVector>
+inline void
+SparseLUDecomposition<number>::vmult_add (OutVector &dst,
+                                          const InVector &src) const
+{
+  OutVector tmp;
+  tmp.reinit(dst);
+  this->vmult(tmp, src);
+  dst += tmp;
+}
+
+// Note: This function is required for full compatibility with
+// the LinearOperator class. ::MatrixInterfaceWithVmultAdd
+// picks up the vmult_add function in the protected SparseMatrix
+// base class.
+template <typename number>
+template <class OutVector, class InVector>
+inline void
+SparseLUDecomposition<number>::Tvmult_add (OutVector &dst,
+                                           const InVector &src) const
+{
+  OutVector tmp;
+  tmp.reinit(dst);
+  this->Tvmult(tmp, src);
+  dst += tmp;
+}
+
+//---------------------------------------------------------------------------
+
+
+template <typename number>
+SparseLUDecomposition<number>::AdditionalData::AdditionalData (
+  const double strengthen_diag,
+  const unsigned int extra_off_diag,
+  const bool use_prev_sparsity,
+  const SparsityPattern *use_this_spars):
+  strengthen_diagonal(strengthen_diag),
+  extra_off_diagonals(extra_off_diag),
+  use_previous_sparsity(use_prev_sparsity),
+  use_this_sparsity(use_this_spars)
+{}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__sparse_decomposition_h
diff --git a/include/deal.II/lac/sparse_decomposition.templates.h b/include/deal.II/lac/sparse_decomposition.templates.h
new file mode 100644
index 0000000..665e9cb
--- /dev/null
+++ b/include/deal.II/lac/sparse_decomposition.templates.h
@@ -0,0 +1,235 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__sparse_decomposition_templates_h
+#define dealii__sparse_decomposition_templates_h
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/sparse_decomposition.h>
+#include <algorithm>
+#include <cstring>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<typename number>
+SparseLUDecomposition<number>::SparseLUDecomposition()
+  :
+  SparseMatrix<number>(),
+  own_sparsity(0)
+{}
+
+
+
+template<typename number>
+SparseLUDecomposition<number>::~SparseLUDecomposition()
+{
+  clear();
+}
+
+
+template<typename number>
+void SparseLUDecomposition<number>::clear()
+{
+  std::vector<const size_type *> tmp;
+  tmp.swap (prebuilt_lower_bound);
+
+  SparseMatrix<number>::clear();
+
+  if (own_sparsity)
+    {
+      delete own_sparsity;
+      own_sparsity=0;
+    }
+}
+
+
+
+template<typename number>
+template <typename somenumber>
+void SparseLUDecomposition<number>::initialize (
+  const SparseMatrix<somenumber> &matrix,
+  const AdditionalData data)
+{
+  const SparsityPattern &matrix_sparsity=matrix.get_sparsity_pattern();
+
+  const SparsityPattern *sparsity_pattern_to_use = 0;
+
+  if (data.use_this_sparsity)
+    sparsity_pattern_to_use = data.use_this_sparsity;
+  else if (data.use_previous_sparsity &&
+           !this->empty() &&
+           (this->m()==matrix.m()))
+    {
+      // Use the sparsity that was
+      // previously used. This is
+      // particularly useful when
+      // matrix entries change but
+      // not the sparsity, as for the
+      // case of several Newton
+      // iteration steps on an
+      // unchanged grid.
+      sparsity_pattern_to_use = &this->get_sparsity_pattern();
+    }
+  else if (data.extra_off_diagonals==0)
+    {
+      // Use same sparsity as matrix
+      sparsity_pattern_to_use = &matrix_sparsity;
+    }
+  else
+    {
+      // Create new sparsity
+
+      // for the case that
+      // own_sparsity wasn't deleted
+      // before (e.g. by clear()), do
+      // it here
+      if (own_sparsity)
+        {
+          // release the sparsity
+          SparseMatrix<number>::clear();
+          // delete it
+          delete own_sparsity;
+        }
+
+      // and recreate
+      own_sparsity = new SparsityPattern(matrix_sparsity,
+                                         matrix_sparsity.max_entries_per_row()
+                                         +2*data.extra_off_diagonals,
+                                         data.extra_off_diagonals);
+      own_sparsity->compress();
+      sparsity_pattern_to_use = own_sparsity;
+    }
+
+  // now use this sparsity pattern
+  Assert (sparsity_pattern_to_use->n_rows()==sparsity_pattern_to_use->n_cols(),
+          typename SparsityPattern::ExcDiagonalNotOptimized());
+  {
+    std::vector<const size_type *> tmp;
+    tmp.swap (prebuilt_lower_bound);
+  }
+  SparseMatrix<number>::reinit (*sparsity_pattern_to_use);
+}
+
+
+
+template<typename number>
+void
+SparseLUDecomposition<number>::prebuild_lower_bound()
+{
+  const size_type *const
+  column_numbers = this->get_sparsity_pattern().colnums;
+  const std::size_t *const
+  rowstart_indices = this->get_sparsity_pattern().rowstart;
+  const size_type N = this->m();
+
+  prebuilt_lower_bound.resize (N);
+
+  for (size_type row=0; row<N; row++)
+    {
+      prebuilt_lower_bound[row]
+        = Utilities::lower_bound (&column_numbers[rowstart_indices[row]+1],
+                                  &column_numbers[rowstart_indices[row+1]],
+                                  row);
+    }
+}
+
+template <typename number>
+template <typename somenumber>
+void
+SparseLUDecomposition<number>::copy_from (const SparseMatrix<somenumber> &matrix)
+{
+  // check whether we use the same sparsity
+  // pattern as the input matrix
+  if (&this->get_sparsity_pattern() == &matrix.get_sparsity_pattern())
+    {
+      const somenumber *input_ptr = matrix.val;
+      number *this_ptr = this->val;
+      const number *const end_ptr = this_ptr + this->n_nonzero_elements();
+      if (types_are_equal<somenumber, number>::value == true)
+        std::memcpy (this_ptr, input_ptr, this->n_nonzero_elements()*sizeof(number));
+      else
+        for ( ; this_ptr != end_ptr; ++input_ptr, ++this_ptr)
+          *this_ptr = *input_ptr;
+      return;
+    }
+
+  // preset the elements by zero. this needs to be written in a slightly
+  // awkward way so that we find the corresponding function in the base class.
+  SparseMatrix<number>::operator= (number(0));
+
+  // both allow more and less entries in the new matrix
+  for (size_type row=0; row<this->m(); ++row)
+    {
+      typename SparseMatrix<number>::iterator index = this->begin(row);
+      typename SparseMatrix<somenumber>::const_iterator
+      in_index = matrix.begin(row);
+      index->value() = in_index->value();
+      ++index, ++in_index;
+      while (index < this->end(row) && in_index < matrix.end(row))
+        {
+          while (index->column() < in_index->column() && index < this->end(row))
+            ++index;
+          while (in_index->column() < index->column() && in_index < matrix.end(row))
+            ++in_index;
+
+          index->value() = in_index->value();
+          ++index, ++in_index;
+        }
+    }
+}
+
+
+
+template <typename number>
+void
+SparseLUDecomposition<number>::strengthen_diagonal_impl ()
+{
+  for (size_type row=0; row<this->m(); ++row)
+    {
+      // get the global index of the first
+      // non-diagonal element in this row
+      Assert (this->m() == this->n(),  ExcNotImplemented());
+      typename SparseMatrix<number>::iterator
+      diagonal_element = this->begin(row);
+
+      number rowsum = 0;
+      for (typename SparseMatrix<number>::iterator
+           p = diagonal_element + 1;
+           p != this->end(row); ++p)
+        rowsum += std::fabs(p->value());
+
+      diagonal_element->value() += this->get_strengthen_diagonal (rowsum, row)  *
+                                   rowsum;
+    }
+}
+
+
+
+template <typename number>
+std::size_t
+SparseLUDecomposition<number>::memory_consumption () const
+{
+  return (SparseMatrix<number>::memory_consumption () +
+          MemoryConsumption::memory_consumption(prebuilt_lower_bound));
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__sparse_decomposition_templates_h
diff --git a/include/deal.II/lac/sparse_direct.h b/include/deal.II/lac/sparse_direct.h
new file mode 100644
index 0000000..89b703e
--- /dev/null
+++ b/include/deal.II/lac/sparse_direct.h
@@ -0,0 +1,352 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_direct_h
+#define dealii__sparse_direct_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/sparse_matrix_ez.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_UMFPACK
+#  include <umfpack.h>
+#endif
+#ifndef SuiteSparse_long
+#  define SuiteSparse_long long int
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class provides an interface to the sparse direct solver UMFPACK (see
+ * <a href="http://www.cise.ufl.edu/research/sparse/umfpack">this link</a>).
+ * UMFPACK is a set of routines for solving non-symmetric sparse linear
+ * systems, Ax=b, using the Unsymmetric-pattern MultiFrontal method and direct
+ * sparse LU factorization. Matrices may have symmetric or unsymmetric
+ * sparsity patterns, and may have unsymmetric entries. The use of this class
+ * is explained in the
+ * @ref step_22 "step-22"
+ * and
+ * @ref step_29 "step-29"
+ * tutorial programs.
+ *
+ * This matrix class implements the usual interface of preconditioners, that
+ * is a function initialize(const SparseMatrix<double>&matrix,const
+ * AdditionalData) for initializing and the whole set of vmult() functions
+ * common to all matrices. Implemented here are only vmult() and vmult_add(),
+ * which perform multiplication with the inverse matrix. Furthermore, this
+ * class provides an older interface, consisting of the functions factorize()
+ * and solve(). Both interfaces are interchangeable.
+ *
+ * @note This class exists if the <a
+ * href="http://faculty.cse.tamu.edu/davis/suitesparse.html">UMFPACK</a>
+ * interface was not explicitly disabled during configuration.
+ *
+ * @note UMFPACK has its own license, independent of that of deal.II. If you
+ * want to use the UMFPACK you have to accept that license. It is linked to
+ * from the deal.II ReadMe file. UMFPACK is included courtesy of its author,
+ * <a href="http://www.cise.ufl.edu/~davis/">Timothy A. Davis</a>.
+ *
+ *
+ * <h4>Instantiations</h4>
+ *
+ * There are instantiations of this class for SparseMatrix<double>,
+ * SparseMatrix<float>, SparseMatrixEZ<float>, SparseMatrixEZ<double>,
+ * BlockSparseMatrix<double>, and BlockSparseMatrix<float>.
+ *
+ * @ingroup Solvers Preconditioners
+ *
+ * @author Wolfgang Bangerth, 2004; extension for full compatibility with
+ * LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+class SparseDirectUMFPACK : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Dummy class needed for the usual initialization interface of
+   * preconditioners.
+   */
+  class AdditionalData
+  {};
+
+
+  /**
+   * Constructor. See the documentation of this class for the meaning of the
+   * parameters to this function.
+   */
+  SparseDirectUMFPACK ();
+
+  /**
+   * Destructor.
+   */
+  ~SparseDirectUMFPACK ();
+
+  /**
+   * @name Setting up a sparse factorization
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * This function does nothing. It is only here to provide a interface
+   * consistent with other sparse direct solvers.
+   */
+  void initialize (const SparsityPattern &sparsity_pattern);
+
+  /**
+   * Factorize the matrix. This function may be called multiple times for
+   * different matrices, after the object of this class has been initialized
+   * for a certain sparsity pattern. You may therefore save some computing
+   * time if you want to invert several matrices with the same sparsity
+   * pattern. However, note that the bulk of the computing time is actually
+   * spent in the factorization, so this functionality may not always be of
+   * large benefit.
+   *
+   * In contrast to the other direct solver classes, the initialisation method
+   * does nothing. Therefore initialise is not automatically called by this
+   * method, when the initialization step has not been performed yet.
+   *
+   * This function copies the contents of the matrix into its own storage; the
+   * matrix can therefore be deleted after this operation, even if subsequent
+   * solves are required.
+   */
+  template <class Matrix>
+  void factorize (const Matrix &matrix);
+
+  /**
+   * Initialize memory and call SparseDirectUMFPACK::factorize.
+   */
+  template <class Matrix>
+  void initialize(const Matrix &matrix,
+                  const AdditionalData additional_data = AdditionalData());
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Functions that represent the inverse of a matrix
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Preconditioner interface function. Usually, given the source vector, this
+   * method returns an approximate solution of <i>Ax = b</i>. As this class
+   * provides a wrapper to a direct solver, here it is actually the exact
+   * solution (exact within the range of numerical accuracy of course).
+   *
+   * In other words, this function actually multiplies with the exact inverse
+   * of the matrix, $A^{-1}$.
+   */
+  void vmult (Vector<double> &dst,
+              const Vector<double> &src) const;
+
+  /**
+   * Same as before, but for block vectors.
+   */
+  void vmult (BlockVector<double> &dst,
+              const BlockVector<double> &src) const;
+
+  /**
+   * Same as before, but uses the transpose of the matrix, i.e. this function
+   * multiplies with $A^{-T}$.
+   */
+  void Tvmult (Vector<double> &dst,
+               const Vector<double> &src) const;
+
+  /**
+   * Same as before, but for block vectors
+   */
+  void Tvmult (BlockVector<double> &dst,
+               const BlockVector<double> &src) const;
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Functions that solve linear systems
+   */
+  /**
+   * @{
+   */
+
+  /**
+   * Solve for a certain right hand side vector. This function may be called
+   * multiple times for different right hand side vectors after the matrix has
+   * been factorized. This yields a big saving in computing time, since the
+   * actual solution is fast, compared to the factorization of the matrix.
+   *
+   * The solution will be returned in place of the right hand side vector.
+   *
+   * If the factorization has not happened before, strange things will happen.
+   * Note that we can't actually call the factorize() function from here if it
+   * has not yet been called, since we have no access to the actual matrix.
+   *
+   * If @p transpose is set to true this function solves for the transpose of
+   * the matrix, i.e. $x=A^{-T}b$.
+   */
+  void solve (Vector<double> &rhs_and_solution, bool transpose = false) const;
+
+  /**
+   * Same as before, but for block vectors.
+   */
+  void solve (BlockVector<double> &rhs_and_solution, bool transpose = false) const;
+
+  /**
+   * Call the two functions factorize() and solve() in that order, i.e.
+   * perform the whole solution process for the given right hand side vector.
+   *
+   * The solution will be returned in place of the right hand side vector.
+   */
+  template <class Matrix>
+  void solve (const Matrix   &matrix,
+              Vector<double> &rhs_and_solution,
+              bool            transpose = false);
+
+  /**
+   * Same as before, but for block vectors.
+   */
+  template <class Matrix>
+  void solve (const Matrix        &matrix,
+              BlockVector<double> &rhs_and_solution,
+              bool                 transpose = false);
+
+  /**
+   * @}
+   */
+
+  /**
+   * One of the UMFPack routines threw an error. The error code is included in
+   * the output and can be looked up in the UMFPack user manual. The name of
+   * the routine is included for reference.
+   */
+  DeclException2 (ExcUMFPACKError, char *, int,
+                  << "UMFPACK routine " << arg1
+                  << " returned error status " << arg2 << "."
+                  << "\n\n"
+                  << ("A complete list of error codes can be found in the file "
+                      "<bundled/umfpack/UMFPACK/Include/umfpack.h>."
+                      "\n\n"
+                      "That said, the two most common errors that can happen are "
+                      "that your matrix cannot be factorized because it is "
+                      "rank deficient, and that UMFPACK runs out of memory "
+                      "because your problem is too large."
+                      "\n\n"
+                      "The first of these cases most often happens if you "
+                      "forget terms in your bilinear form necessary to ensure "
+                      "that the matrix has full rank, or if your equation has a "
+                      "spatially variable coefficient (or nonlinearity) that is "
+                      "supposed to be strictly positive but, for whatever "
+                      "reasons, is negative or zero. In either case, you probably "
+                      "want to check your assembly procedure. Similarly, a "
+                      "matrix can be rank deficient if you forgot to apply the "
+                      "appropriate boundary conditions. For example, the "
+                      "Laplace equation without boundary conditions has a "
+                      "single zero eigenvalue and its rank is therefore "
+                      "deficient by one."
+                      "\n\n"
+                      "The other common situation is that you run out of memory."
+                      "On a typical laptop or desktop, it should easily be possible "
+                      "to solve problems with 100,000 unknowns in 2d. If you are "
+                      "solving problems with many more unknowns than that, in "
+                      "particular if you are in 3d, then you may be running out "
+                      "of memory and you will need to consider iterative "
+                      "solvers instead of the direct solver employed by "
+                      "UMFPACK."));
+
+private:
+  /**
+   * The dimension of the range space.
+   */
+  size_type _m;
+
+  /**
+   * The dimension of the domain space.
+   */
+  size_type _n;
+
+  /**
+   * The UMFPACK routines allocate objects in which they store information
+   * about symbolic and numeric values of the decomposition. The actual data
+   * type of these objects is opaque, and only passed around as void pointers.
+   */
+  void *symbolic_decomposition;
+  void *numeric_decomposition;
+
+  /**
+   * Free all memory that hasn't been freed yet.
+   */
+  void clear ();
+
+  /**
+   * Make sure that the arrays Ai and Ap are sorted in each row. UMFPACK wants
+   * it this way. We need to have three versions of this function, one for the
+   * usual SparseMatrix, one for the SparseMatrixEZ, and one for the
+   * BlockSparseMatrix classes
+   */
+  template <typename number>
+  void sort_arrays (const SparseMatrixEZ<number> &);
+
+  template <typename number>
+  void sort_arrays (const SparseMatrix<number> &);
+
+  template <typename number>
+  void sort_arrays (const BlockSparseMatrix<number> &);
+
+  /**
+   * The arrays in which we store the data for the solver. SuiteSparse_long
+   * has to be used here for Windows 64 build, if we used only long int,
+   * compilation would fail.
+   */
+  std::vector<SuiteSparse_long> Ap;
+  std::vector<SuiteSparse_long> Ai;
+  std::vector<double> Ax;
+
+  /**
+   * Control and info arrays for the solver routines.
+   */
+  std::vector<double> control;
+};
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__sparse_direct_h
diff --git a/include/deal.II/lac/sparse_ilu.h b/include/deal.II/lac/sparse_ilu.h
new file mode 100644
index 0000000..53c342b
--- /dev/null
+++ b/include/deal.II/lac/sparse_ilu.h
@@ -0,0 +1,165 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__sparse_ilu_h
+#define dealii__sparse_ilu_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/sparse_decomposition.h>
+#include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Preconditioners
+ *@{
+ */
+
+/**
+ * This class computes an Incomplete LU (ILU) decomposition of a sparse
+ * matrix, using either the same sparsity pattern or a different one. By
+ * incomplete we mean that unlike the exact decomposition, the incomplete one
+ * is also computed using sparse factors, and entries in the decomposition
+ * that do not fit into the given sparsity structure are discarded.
+ *
+ * The algorithm used by this class is essentially a copy of the algorithm
+ * given in the book Y. Saad: "Iterative methods for sparse linear systems",
+ * second edition, in section 10.3.2.
+ *
+ *
+ * <h3>Usage and state management</h3>
+ *
+ * Refer to SparseLUDecomposition documentation for suggested usage and state
+ * management. This class is used in the
+ * @ref step_22 "step-22"
+ * tutorial program.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Wolfgang Bangerth, 2008, 2009; unified interface: Ralf Hartmann
+ */
+template <typename number>
+class SparseILU : public SparseLUDecomposition<number>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef typename SparseLUDecomposition<number>::size_type size_type;
+
+  /**
+   * Constructor. Does nothing.
+   *
+   * Call the @p initialize function before using this object as
+   * preconditioner.
+   */
+  SparseILU ();
+
+  /**
+   * Make SparseLUDecomposition::AdditionalData accessible to this class as
+   * well.
+   */
+  typedef
+  typename SparseLUDecomposition<number>::AdditionalData
+  AdditionalData;
+
+  /**
+   * Perform the incomplete LU factorization of the given matrix.
+   *
+   * This function needs to be called before an object of this class is used
+   * as preconditioner.
+   *
+   * For more details about possible parameters, see the class documentation
+   * of SparseLUDecomposition and the documentation of the @p
+   * SparseLUDecomposition::AdditionalData class.
+   *
+   * According to the @p parameters, this function creates a new
+   * SparsityPattern or keeps the previous sparsity or takes the sparsity
+   * given by the user to @p data. Then, this function performs the LU
+   * decomposition.
+   *
+   * After this function is called the preconditioner is ready to be used.
+   */
+  template <typename somenumber>
+  void initialize (const SparseMatrix<somenumber> &matrix,
+                   const AdditionalData &parameters = AdditionalData());
+
+  /**
+   * Apply the incomplete decomposition, i.e. do one forward-backward step
+   * $dst=(LU)^{-1}src$.
+   *
+   * The initialize() function needs to be called before.
+   */
+  template <typename somenumber>
+  void vmult (Vector<somenumber>       &dst,
+              const Vector<somenumber> &src) const;
+
+
+  /**
+   * Apply the transpose of the incomplete decomposition, i.e. do one forward-
+   * backward step $dst=(LU)^{-T}src$.
+   *
+   * The initialize() function needs to be called before.
+   */
+  template <typename somenumber>
+  void Tvmult (Vector<somenumber>       &dst,
+               const Vector<somenumber> &src) const;
+
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidStrengthening,
+                  double,
+                  << "The strengthening parameter " << arg1
+                  << " is not greater or equal than zero!");
+  /**
+   * Exception
+   */
+  DeclException1 (ExcZeroPivot,
+                  size_type,
+                  << "While computing the ILU decomposition, the algorithm "
+                  "found a zero pivot on the diagonal of row "
+                  << arg1
+                  << ". This must stop the ILU algorithm because it means "
+                  "that the matrix for which you try to compute a "
+                  "decomposition is singular.");
+  //@}
+};
+
+/*@}*/
+//---------------------------------------------------------------------------
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__sparse_ilu_h
diff --git a/include/deal.II/lac/sparse_ilu.templates.h b/include/deal.II/lac/sparse_ilu.templates.h
new file mode 100644
index 0000000..53931fa
--- /dev/null
+++ b/include/deal.II/lac/sparse_ilu.templates.h
@@ -0,0 +1,303 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_ilu_templates_h
+#define dealii__sparse_ilu_templates_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/sparse_ilu.h>
+
+#include <algorithm>
+#include <cmath>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number>
+SparseILU<number>::SparseILU ()
+{}
+
+
+
+template <typename number>
+template <typename somenumber>
+void SparseILU<number>::initialize (const SparseMatrix<somenumber> &matrix,
+                                    const AdditionalData &data)
+{
+  SparseLUDecomposition<number>::initialize(matrix, data);
+
+  Assert (matrix.m()==matrix.n(), ExcNotQuadratic ());
+  Assert (this->m()==this->n(),   ExcNotQuadratic ());
+  Assert (matrix.m()==this->m(),  ExcDimensionMismatch(matrix.m(), this->m()));
+
+  Assert (data.strengthen_diagonal>=0,
+          ExcInvalidStrengthening (data.strengthen_diagonal));
+
+  this->strengthen_diagonal = data.strengthen_diagonal;
+  this->prebuild_lower_bound ();
+  this->copy_from (matrix);
+
+  if (data.strengthen_diagonal>0)
+    this->strengthen_diagonal_impl();
+
+  // in the following, we implement algorithm 10.4 in the book by Saad by
+  // translating in essence the algorithm given at the end of section 10.3.2,
+  // using the names of variables used there
+  const SparsityPattern     &sparsity = this->get_sparsity_pattern();
+  const std::size_t *const ia    = sparsity.rowstart;
+  const size_type *const ja      = sparsity.colnums;
+
+  number *luval = this->SparseMatrix<number>::val;
+
+  const size_type N = this->m();
+  size_type jrow = 0;
+
+  std::vector<size_type> iw (N, numbers::invalid_size_type);
+
+  for (size_type k=0; k<N; ++k)
+    {
+      const size_type j1 = ia[k],
+                      j2 = ia[k+1]-1;
+
+      for (size_type j=j1; j<=j2; ++j)
+        iw[ja[j]] = j;
+
+      // the algorithm in the book works on the elements of row k left of the
+      // diagonal. however, since we store the diagonal element at the first
+      // position, start at the element after the diagonal and run as long as
+      // we don't walk into the right half
+      size_type j = j1+1;
+
+      // pathological case: the current row of the matrix has only the
+      // diagonal entry. then we have nothing to do.
+      if (j > j2)
+        goto label_200;
+
+label_150:
+
+      jrow = ja[j];
+      if (jrow >= k)
+        goto label_200;
+
+      // actual computations:
+      {
+        number t1 = luval[j] * luval[ia[jrow]];
+        luval[j] = t1;
+
+        // jj runs from just right of the diagonal to the end of the row
+        size_type jj = ia[jrow]+1;
+        while (ja[jj] < jrow)
+          ++jj;
+        for (; jj<ia[jrow+1]; ++jj)
+          {
+            const size_type jw = iw[ja[jj]];
+            if (jw != numbers::invalid_size_type)
+              luval[jw] -= t1 * luval[jj];
+          }
+
+        ++j;
+        if (j<=j2)
+          goto label_150;
+      }
+
+label_200:
+
+      // in the book there is an assertion that we have hit the diagonal
+      // element, i.e. that jrow==k. however, we store the diagonal element at
+      // the front, so jrow must actually be larger than k or j is already in
+      // the next row
+      Assert ((jrow > k) || (j==ia[k+1]), ExcInternalError());
+
+      // now we have to deal with the diagonal element. in the book it is
+      // located at position 'j', but here we use the convention of storing
+      // the diagonal element first, so instead of j we use uptr[k]=ia[k]
+      Assert (luval[ia[k]] != 0, ExcZeroPivot(k));
+
+      luval[ia[k]] = 1./luval[ia[k]];
+
+      for (size_type j=j1; j<=j2; ++j)
+        iw[ja[j]] = numbers::invalid_size_type;
+    }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void SparseILU<number>::vmult (Vector<somenumber>       &dst,
+                               const Vector<somenumber> &src) const
+{
+  Assert (dst.size() == src.size(), ExcDimensionMismatch(dst.size(), src.size()));
+  Assert (dst.size() == this->m(), ExcDimensionMismatch(dst.size(), this->m()));
+
+  const size_type N=dst.size();
+  const std::size_t *const rowstart_indices
+    = this->get_sparsity_pattern().rowstart;
+  const size_type *const column_numbers
+    = this->get_sparsity_pattern().colnums;
+
+  // solve LUx=b in two steps:
+  // first Ly = b, then
+  //       Ux = y
+  //
+  // first a forward solve. since
+  // the diagonal values of L are
+  // one, there holds
+  // y_i = b_i
+  //       - sum_{j=0}^{i-1} L_{ij}y_j
+  // we split the y_i = b_i off and
+  // perform it at the outset of the
+  // loop
+  dst = src;
+  for (size_type row=0; row<N; ++row)
+    {
+      // get start of this row. skip the
+      // diagonal element
+      const size_type *const rowstart = &column_numbers[rowstart_indices[row]+1];
+      // find the position where the part
+      // right of the diagonal starts
+      const size_type *const first_after_diagonal = this->prebuilt_lower_bound[row];
+
+      somenumber dst_row = dst(row);
+      const number *luval = this->SparseMatrix<number>::val +
+                            (rowstart - column_numbers);
+      for (const size_type *col=rowstart; col!=first_after_diagonal; ++col, ++luval)
+        dst_row -= *luval * dst(*col);
+      dst(row) = dst_row;
+    }
+
+  // now the backward solve. same
+  // procedure, but we need not set
+  // dst before, since this is already
+  // done.
+  //
+  // note that we need to scale now,
+  // since the diagonal is not equal to
+  // one now
+  for (int row=N-1; row>=0; --row)
+    {
+      // get end of this row
+      const size_type *const rowend = &column_numbers[rowstart_indices[row+1]];
+      // find the position where the part
+      // right of the diagonal starts
+      const size_type *const first_after_diagonal = this->prebuilt_lower_bound[row];
+
+      somenumber dst_row = dst(row);
+      const number *luval = this->SparseMatrix<number>::val +
+                            (first_after_diagonal - column_numbers);
+      for (const size_type *col=first_after_diagonal; col!=rowend; ++col, ++luval)
+        dst_row -= *luval * dst(*col);
+
+      // scale by the diagonal element.
+      // note that the diagonal element
+      // was stored inverted
+      dst(row) = dst_row * this->diag_element(row);
+    }
+}
+
+
+template <typename number>
+template <typename somenumber>
+void SparseILU<number>::Tvmult (Vector<somenumber>       &dst,
+                                const Vector<somenumber> &src) const
+{
+  Assert (dst.size() == src.size(), ExcDimensionMismatch(dst.size(), src.size()));
+  Assert (dst.size() == this->m(), ExcDimensionMismatch(dst.size(), this->m()));
+
+  const size_type N=dst.size();
+  const std::size_t *const rowstart_indices
+    = this->get_sparsity_pattern().rowstart;
+  const size_type *const column_numbers
+    = this->get_sparsity_pattern().colnums;
+
+  // solve (LU)'x=b in two steps:
+  // first U'y = b, then
+  //       L'x = y
+  //
+  // first a forward solve. Due to the
+  // fact that the transpose of U'
+  // is not easily accessible, a
+  // temporary vector is required.
+  Vector<somenumber> tmp (N);
+
+  dst = src;
+  for (size_type row=0; row<N; ++row)
+    {
+      dst(row) -= tmp (row);
+      // scale by the diagonal element.
+      // note that the diagonal element
+      // was stored inverted
+      dst(row) *= this->diag_element(row);
+
+      // get end of this row
+      const size_type *const rowend = &column_numbers[rowstart_indices[row+1]];
+      // find the position where the part
+      // right of the diagonal starts
+      const size_type *const first_after_diagonal = this->prebuilt_lower_bound[row];
+
+      const somenumber dst_row = dst (row);
+      const number *luval = this->SparseMatrix<number>::val +
+                            (first_after_diagonal - column_numbers);
+      for (const size_type *col=first_after_diagonal; col!=rowend; ++col, ++luval)
+        tmp(*col) += *luval * dst_row;
+    }
+
+  // now the backward solve. same
+  // procedure, but we need not set
+  // dst before, since this is already
+  // done.
+  //
+  // note that we no scaling is required
+  // now, since the diagonal is one
+  // now
+  tmp = 0;
+  for (int row=N-1; row>=0; --row)
+    {
+      dst(row) -= tmp (row);
+
+      // get start of this row. skip the
+      // diagonal element
+      const size_type *const rowstart = &column_numbers[rowstart_indices[row]+1];
+      // find the position where the part
+      // right of the diagonal starts
+      const size_type *const first_after_diagonal = this->prebuilt_lower_bound[row];
+
+      const somenumber dst_row = dst (row);
+      const number *luval = this->SparseMatrix<number>::val +
+                            (rowstart - column_numbers);
+      for (const size_type *col=rowstart; col!=first_after_diagonal; ++col, ++luval)
+        tmp(*col) += *luval * dst_row;
+    }
+}
+
+
+template <typename number>
+std::size_t
+SparseILU<number>::memory_consumption () const
+{
+  return SparseLUDecomposition<number>::memory_consumption ();
+}
+
+
+
+/*----------------------------   sparse_ilu.templates.h     ---------------------------*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+/*----------------------------   sparse_ilu.templates.h     ---------------------------*/
diff --git a/include/deal.II/lac/sparse_matrix.h b/include/deal.II/lac/sparse_matrix.h
new file mode 100644
index 0000000..1cde275
--- /dev/null
+++ b/include/deal.II/lac/sparse_matrix.h
@@ -0,0 +1,2425 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_matrix_h
+#define dealii__sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/identity_matrix.h>
+#include <deal.II/lac/exceptions.h>
+#include <deal.II/lac/vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class Vector;
+template <typename number> class FullMatrix;
+template <typename Matrix> class BlockMatrixBase;
+template <typename number> class SparseILU;
+
+#ifdef DEAL_II_WITH_TRILINOS
+namespace TrilinosWrappers
+{
+  class SparseMatrix;
+}
+#endif
+
+/**
+ * @addtogroup Matrix1
+ * @{
+ */
+
+/**
+ * A namespace in which we declare iterators over the elements of sparse
+ * matrices.
+ */
+namespace SparseMatrixIterators
+{
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  // forward declaration
+  template <typename number, bool Constness>
+  class Iterator;
+
+  /**
+   * General template for sparse matrix accessors. The first template argument
+   * denotes the underlying numeric type, the second the constness of the
+   * matrix.
+   *
+   * The general template is not implemented, only the specializations for the
+   * two possible values of the second template argument. Therefore, the
+   * interface listed here only serves as a template provided since doxygen
+   * does not link the specializations.
+   */
+  template <typename number, bool Constness>
+  class Accessor : public SparsityPatternIterators::Accessor
+  {
+  public:
+    /**
+     * Value of this matrix entry.
+     */
+    number value() const;
+
+    /**
+     * Value of this matrix entry.
+     */
+    number &value();
+
+    /**
+     * Return a reference to the matrix into which this accessor points. Note
+     * that in the present case, this is a constant reference.
+     */
+    const SparseMatrix<number> &get_matrix () const;
+  };
+
+
+
+  /**
+   * Accessor class for constant matrices, used in the const_iterators. This
+   * class builds on the accessor classes used for sparsity patterns to loop
+   * over all nonzero entries, and only adds the accessor functions to gain
+   * access to the actual value stored at a certain location.
+   */
+  template <typename number>
+  class Accessor<number,true> : public SparsityPatternIterators::Accessor
+  {
+  public:
+    /**
+     * Typedef for the type (including constness) of the matrix to be used
+     * here.
+     */
+    typedef const SparseMatrix<number> MatrixType;
+
+    /**
+     * Constructor.
+     */
+    Accessor (MatrixType         *matrix,
+              const std::size_t   index_within_matrix);
+
+    /**
+     * Constructor. Construct the end accessor for the given matrix.
+     */
+    Accessor (MatrixType         *matrix);
+
+    /**
+     * Copy constructor to get from a non-const accessor to a const accessor.
+     */
+    Accessor (const SparseMatrixIterators::Accessor<number,false> &a);
+
+    /**
+     * Value of this matrix entry.
+     */
+    number value() const;
+
+    /**
+     * Return a reference to the matrix into which this accessor points. Note
+     * that in the present case, this is a constant reference.
+     */
+    const MatrixType &get_matrix () const;
+
+  private:
+    /**
+     * Pointer to the matrix we use.
+     */
+    MatrixType *matrix;
+
+    /**
+     * Make the advance function of the base class available.
+     */
+    using SparsityPatternIterators::Accessor::advance;
+
+    /**
+     * Make iterator class a friend.
+     */
+    template <typename, bool>
+    friend class Iterator;
+  };
+
+
+  /**
+   * Accessor class for non-constant matrices, used in the iterators. This
+   * class builds on the accessor classes used for sparsity patterns to loop
+   * over all nonzero entries, and only adds the accessor functions to gain
+   * access to the actual value stored at a certain location.
+   */
+  template <typename number>
+  class Accessor<number,false> : public SparsityPatternIterators::Accessor
+  {
+  private:
+    /**
+     * Reference class. This is what the accessor class returns when you call
+     * the value() function. The reference acts just as if it were a reference
+     * to the actual value of a matrix entry, i.e. you can read and write it,
+     * you can add and multiply to it, etc, but since the matrix does not give
+     * away the address of this matrix entry, we have to go through functions
+     * to do all this.
+     *
+     * The constructor takes a pointer to an accessor object that describes
+     * which element of the matrix it points to. This creates an ambiguity
+     * when one writes code like iterator->value()=0 (instead of
+     * iterator->value()=0.0), since the right hand side is an integer that
+     * can both be converted to a <tt>number</tt> (i.e., most commonly a
+     * double) or to another object of type <tt>Reference</tt>. The compiler
+     * then complains about not knowing which conversion to take.
+     *
+     * For some reason, adding another overload operator=(int) doesn't seem to
+     * cure the problem. We avoid it, however, by adding a second, dummy
+     * argument to the Reference constructor, that is unused, but makes sure
+     * there is no second matching conversion sequence using a one-argument
+     * right hand side.
+     *
+     * The testcase oliver_01 checks that this actually works as intended.
+     */
+    class Reference
+    {
+    public:
+      /**
+       * Constructor. For the second argument, see the general class
+       * documentation.
+       */
+      Reference (const Accessor *accessor,
+                 const bool dummy);
+
+      /**
+       * Conversion operator to the data type of the matrix.
+       */
+      operator number () const;
+
+      /**
+       * Set the element of the matrix we presently point to to @p n.
+       */
+      const Reference &operator = (const number n) const;
+
+      /**
+       * Add @p n to the element of the matrix we presently point to.
+       */
+      const Reference &operator += (const number n) const;
+
+      /**
+       * Subtract @p n from the element of the matrix we presently point to.
+       */
+      const Reference &operator -= (const number n) const;
+
+      /**
+       * Multiply the element of the matrix we presently point to by @p n.
+       */
+      const Reference &operator *= (const number n) const;
+
+      /**
+       * Divide the element of the matrix we presently point to by @p n.
+       */
+      const Reference &operator /= (const number n) const;
+
+    private:
+      /**
+       * Pointer to the accessor that denotes which element we presently point
+       * to.
+       */
+      const Accessor *accessor;
+    };
+
+  public:
+    /**
+     * Typedef for the type (including constness) of the matrix to be used
+     * here.
+     */
+    typedef SparseMatrix<number> MatrixType;
+
+    /**
+     * Constructor.
+     */
+    Accessor (MatrixType         *matrix,
+              const std::size_t   index);
+
+    /**
+     * Constructor. Construct the end accessor for the given matrix.
+     */
+    Accessor (MatrixType         *matrix);
+
+    /**
+     * Value of this matrix entry, returned as a read- and writable reference.
+     */
+    Reference value() const;
+
+    /**
+     * Return a reference to the matrix into which this accessor points. Note
+     * that in the present case, this is a non-constant reference.
+     */
+    MatrixType &get_matrix () const;
+
+  private:
+    /**
+     * Pointer to the matrix we use.
+     */
+    MatrixType *matrix;
+
+    /**
+     * Make the advance function of the base class available.
+     */
+    using SparsityPatternIterators::Accessor::advance;
+
+    /**
+     * Make iterator class a friend.
+     */
+    template <typename, bool>
+    friend class Iterator;
+
+    /**
+     * Make the SparseMatrix class a friend so that it, in turn, can declare
+     * the Reference class a friend.
+     */
+    template <typename> friend class dealii::SparseMatrix;
+  };
+
+
+
+  /**
+   * Iterator for constant and non-constant matrices.
+   *
+   * The typical use for these iterators is to iterate over the elements of a
+   * sparse matrix or over the elements of individual rows. Note that there is
+   * no guarantee that the elements of a row are actually traversed in an
+   * order in which columns monotonically increase. See the documentation of
+   * the SparsityPattern class for more information.
+   *
+   * The first template argument denotes the underlying numeric type, the
+   * second the constness of the matrix.
+   *
+   * Since there is a specialization of this class for
+   * <tt>Constness=false</tt>, this class is for iterators to constant
+   * matrices.
+   *
+   * @note This class operates directly on the internal data structures of the
+   * SparsityPattern and SparseMatrix classes. As a consequence, some
+   * operations are cheap and some are not. In particular, it is cheap to
+   * access the column index and the value of an entry pointed to. On the
+   * other hand, it is expensive to access the row index (this requires
+   * $O(\log(N))$ operations for a matrix with $N$ row). As a consequence,
+   * when you design algorithms that use these iterators, it is common
+   * practice to not loop over <i>all</i> elements of a sparse matrix at once,
+   * but to have an outer loop over all rows and within this loop iterate over
+   * the elements of this row. This way, you only ever need to dereference the
+   * iterator to obtain the column indices and values whereas the (expensive)
+   * lookup of the row index can be avoided by using the loop index instead.
+   */
+  template <typename number, bool Constness>
+  class Iterator
+  {
+  public:
+    /**
+     * Typedef for the matrix type (including constness) we are to operate on.
+     */
+    typedef
+    typename Accessor<number,Constness>::MatrixType
+    MatrixType;
+
+    /**
+     * A typedef for the type you get when you dereference an iterator of the
+     * current kind.
+     */
+    typedef
+    const Accessor<number,Constness> &value_type;
+
+    /**
+     * Constructor. Create an iterator into the matrix @p matrix for the given
+     * index in the complete matrix (counting from the zeroth entry).
+     */
+    Iterator (MatrixType        *matrix,
+              const std::size_t  index_within_matrix);
+
+    /**
+     * Constructor. Create the end iterator for the given matrix.
+     */
+    Iterator (MatrixType *matrix);
+
+    /**
+     * Conversion constructor to get from a non-const iterator to a const
+     * iterator.
+     */
+    Iterator (const SparseMatrixIterators::Iterator<number,false> &i);
+
+    /**
+     * Prefix increment.
+     */
+    Iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    Iterator operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor<number,Constness> &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor<number,Constness> *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Iterator &) const;
+
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const Iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * matrix.
+     */
+    bool operator < (const Iterator &) const;
+
+    /**
+     * Comparison operator. Works in the same way as above operator, just the
+     * other way round.
+     */
+    bool operator > (const Iterator &) const;
+
+    /**
+     * Return the distance between the current iterator and the argument. The
+     * distance is given by how many times one has to apply operator++ to the
+     * current iterator to get the argument (for a positive return value), or
+     * operator-- (for a negative return value).
+     */
+    int operator - (const Iterator &p) const;
+
+    /**
+     * Return an iterator that is @p n ahead of the current one.
+     */
+    Iterator operator + (const size_type n) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor<number,Constness> accessor;
+  };
+
+}
+
+/**
+ * @}
+ */
+
+
+//TODO: Add multithreading to the other vmult functions.
+
+/**
+ * Sparse matrix. This class implements the functionality to store matrix
+ * entry values in the locations denoted by a SparsityPattern. See
+ * @ref Sparsity
+ * for a discussion about the separation between sparsity patterns and
+ * matrices.
+ *
+ * The elements of a SparseMatrix are stored in the same order in which the
+ * SparsityPattern class stores its entries. Within each row, elements are
+ * generally stored left-to-right in increasing column index order; the
+ * exception to this rule is that if the matrix is square (m() == n()), then
+ * the diagonal entry is stored as the first element in each row to make
+ * operations like applying a Jacobi or SSOR preconditioner faster. As a
+ * consequence, if you traverse the elements of a row of a SparseMatrix with
+ * the help of iterators into this object (using SparseMatrix::begin and
+ * SparseMatrix::end) you will find that the elements are not sorted by column
+ * index within each row whenever the matrix is square.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @ingroup Matrix1
+ * @author Essentially everyone who has ever worked on deal.II
+ * @date 1994-2013
+ */
+template <typename number>
+class SparseMatrix : public virtual Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Type of the matrix entries. This typedef is analogous to
+   * <tt>value_type</tt> in the standard library containers.
+   */
+  typedef number value_type;
+
+  /**
+   * Declare a type that has holds real-valued numbers with the same precision
+   * as the template argument to this class. If the template argument of this
+   * class is a real data type, then real_type equals the template argument.
+   * If the template argument is a std::complex type then real_type equals the
+   * type underlying the complex numbers.
+   *
+   * This typedef is used to represent the return type of norms.
+   */
+  typedef typename numbers::NumberTraits<number>::real_type real_type;
+
+  /**
+   * Typedef of an iterator class walking over all the nonzero entries of this
+   * matrix. This iterator cannot change the values of the matrix.
+   */
+  typedef
+  SparseMatrixIterators::Iterator<number,true>
+  const_iterator;
+
+  /**
+   * Typedef of an iterator class walking over all the nonzero entries of this
+   * matrix. This iterator @em can change the values of the matrix, but of
+   * course can't change the sparsity pattern as this is fixed once a sparse
+   * matrix is attached to it.
+   */
+  typedef
+  SparseMatrixIterators::Iterator<number,false>
+  iterator;
+
+  /**
+   * A structure that describes some of the traits of this class in terms of
+   * its run-time behavior. Some other classes (such as the block matrix
+   * classes) that take one or other of the matrix classes as its template
+   * parameters can tune their behavior based on the variables in this class.
+   */
+  struct Traits
+  {
+    /**
+     * It is safe to elide additions of zeros to individual elements of this
+     * matrix.
+     */
+    static const bool zero_addition_can_be_elided = true;
+  };
+
+  /**
+   * @name Constructors and initialization
+   */
+//@{
+  /**
+   * Constructor; initializes the matrix to be empty, without any structure,
+   * i.e.  the matrix is not usable at all. This constructor is therefore only
+   * useful for matrices which are members of a class. All other matrices
+   * should be created at a point in the data flow where all necessary
+   * information is available.
+   *
+   * You have to initialize the matrix before usage with reinit(const
+   * SparsityPattern&).
+   */
+  SparseMatrix ();
+
+  /**
+   * Copy constructor. This constructor is only allowed to be called if the
+   * matrix to be copied is empty. This is for the same reason as for the
+   * SparsityPattern, see there for the details.
+   *
+   * If you really want to copy a whole matrix, you can do so by using the
+   * copy_from() function.
+   */
+  SparseMatrix (const SparseMatrix &);
+
+  /**
+   * Constructor. Takes the given matrix sparsity structure to represent the
+   * sparsity pattern of this matrix. You can change the sparsity pattern
+   * later on by calling the reinit(const SparsityPattern&) function.
+   *
+   * You have to make sure that the lifetime of the sparsity structure is at
+   * least as long as that of this matrix or as long as reinit(const
+   * SparsityPattern&) is not called with a new sparsity pattern.
+   *
+   * The constructor is marked explicit so as to disallow that someone passes
+   * a sparsity pattern in place of a sparse matrix to some function, where an
+   * empty matrix would be generated then.
+   */
+  explicit SparseMatrix (const SparsityPattern &sparsity);
+
+  /**
+   * Copy constructor: initialize the matrix with the identity matrix. This
+   * constructor will throw an exception if the sizes of the sparsity pattern
+   * and the identity matrix do not coincide, or if the sparsity pattern does
+   * not provide for nonzero entries on the entire diagonal.
+   */
+  SparseMatrix (const SparsityPattern &sparsity,
+                const IdentityMatrix  &id);
+
+  /**
+   * Destructor. Free all memory, but do not release the memory of the
+   * sparsity structure.
+   */
+  virtual ~SparseMatrix ();
+
+  /**
+   * Copy operator. Since copying entire sparse matrices is a very expensive
+   * operation, we disallow doing so except for the special case of empty
+   * matrices of size zero. This doesn't seem particularly useful, but is
+   * exactly what one needs if one wanted to have a
+   * <code>std::vector@<SparseMatrix@<double@> @></code>: in that case, one
+   * can create a vector (which needs the ability to copy objects) of empty
+   * matrices that are then later filled with something useful.
+   */
+  SparseMatrix<number> &operator = (const SparseMatrix<number> &);
+
+  /**
+   * Copy operator: initialize the matrix with the identity matrix. This
+   * operator will throw an exception if the sizes of the sparsity pattern and
+   * the identity matrix do not coincide, or if the sparsity pattern does not
+   * provide for nonzero entries on the entire diagonal.
+   */
+  SparseMatrix<number> &
+  operator= (const IdentityMatrix  &id);
+
+  /**
+   * This operator assigns a scalar to a matrix. Since this does usually not
+   * make much sense (should we set all matrix entries to this value?  Only
+   * the nonzero entries of the sparsity pattern?), this operation is only
+   * allowed if the actual value to be assigned is zero. This operator only
+   * exists to allow for the obvious notation <tt>matrix=0</tt>, which sets
+   * all elements of the matrix to zero, but keep the sparsity pattern
+   * previously used.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  SparseMatrix &operator = (const double d);
+
+  /**
+   * Reinitialize the sparse matrix with the given sparsity pattern. The
+   * latter tells the matrix how many nonzero elements there need to be
+   * reserved.
+   *
+   * Regarding memory allocation, the same applies as said above.
+   *
+   * You have to make sure that the lifetime of the sparsity structure is at
+   * least as long as that of this matrix or as long as reinit(const
+   * SparsityPattern &) is not called with a new sparsity structure.
+   *
+   * The elements of the matrix are set to zero by this function.
+   */
+  virtual void reinit (const SparsityPattern &sparsity);
+
+  /**
+   * Release all memory and return to a state just like after having called
+   * the default constructor. It also forgets the sparsity pattern it was
+   * previously tied to.
+   */
+  virtual void clear ();
+//@}
+  /**
+   * @name Information on the matrix
+   */
+//@{
+  /**
+   * Return whether the object is empty. It is empty if either both dimensions
+   * are zero or no SparsityPattern is associated.
+   */
+  bool empty () const;
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+  /**
+   * Return the number of entries in a specific row.
+   */
+  size_type get_row_length (const size_type row) const;
+
+  /**
+   * Return the number of nonzero elements of this matrix. Actually, it
+   * returns the number of entries in the sparsity pattern; if any of the
+   * entries should happen to be zero, it is counted anyway.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Return the number of actually nonzero elements of this matrix. It is
+   * possible to specify the parameter <tt>threshold</tt> in order to count
+   * only the elements that have absolute value greater than the threshold.
+   *
+   * Note, that this function does (in contrary to n_nonzero_elements()) not
+   * count all entries of the sparsity pattern but only the ones that are
+   * nonzero (or whose absolute value is greater than threshold).
+   */
+  size_type n_actually_nonzero_elements (const double threshold = 0.) const;
+
+  /**
+   * Return a (constant) reference to the underlying sparsity pattern of this
+   * matrix.
+   *
+   * Though the return value is declared <tt>const</tt>, you should be aware
+   * that it may change if you call any nonconstant function of objects which
+   * operate on it.
+   */
+  const SparsityPattern &get_sparsity_pattern () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object. See MemoryConsumption.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Dummy function for compatibility with distributed, parallel matrices.
+   */
+  void compress (::dealii::VectorOperation::values);
+
+//@}
+  /**
+   * @name Modifying entries
+   */
+//@{
+  /**
+   * Set the element (<i>i,j</i>) to <tt>value</tt>. Throws an error if the
+   * entry does not exist or if <tt>value</tt> is not a finite number. Still,
+   * it is allowed to store zero values in non-existent fields.
+   */
+  void set (const size_type i,
+            const size_type j,
+            const number value);
+
+  /**
+   * Set all elements given in a FullMatrix into the sparse matrix locations
+   * given by <tt>indices</tt>. In other words, this function writes the
+   * elements in <tt>full_matrix</tt> into the calling matrix, using the
+   * local-to-global indexing specified by <tt>indices</tt> for both the rows
+   * and the columns of the matrix. This function assumes a quadratic sparse
+   * matrix and a quadratic full_matrix, the usual situation in FE
+   * calculations.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be set anyway or they should be filtered away
+   * (and not change the previous content in the respective element if it
+   * exists). The default value is <tt>false</tt>, i.e., even zero values are
+   * treated.
+   */
+  template <typename number2>
+  void set (const std::vector<size_type> &indices,
+            const FullMatrix<number2>       &full_matrix,
+            const bool                       elide_zero_values = false);
+
+  /**
+   * Same function as before, but now including the possibility to use
+   * rectangular full_matrices and different local-to-global indexing on rows
+   * and columns, respectively.
+   */
+  template <typename number2>
+  void set (const std::vector<size_type> &row_indices,
+            const std::vector<size_type> &col_indices,
+            const FullMatrix<number2>    &full_matrix,
+            const bool                    elide_zero_values = false);
+
+  /**
+   * Set several elements in the specified row of the matrix with column
+   * indices as given by <tt>col_indices</tt> to the respective value.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be set anyway or they should be filtered away
+   * (and not change the previous content in the respective element if it
+   * exists). The default value is <tt>false</tt>, i.e., even zero values are
+   * treated.
+   */
+  template <typename number2>
+  void set (const size_type               row,
+            const std::vector<size_type> &col_indices,
+            const std::vector<number2>   &values,
+            const bool                    elide_zero_values = false);
+
+  /**
+   * Set several elements to values given by <tt>values</tt> in a given row in
+   * columns given by col_indices into the sparse matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be inserted anyway or they should be filtered
+   * away. The default value is <tt>false</tt>, i.e., even zero values are
+   * inserted/replaced.
+   */
+  template <typename number2>
+  void set (const size_type  row,
+            const size_type  n_cols,
+            const size_type *col_indices,
+            const number2   *values,
+            const bool       elide_zero_values = false);
+
+  /**
+   * Add <tt>value</tt> to the element (<i>i,j</i>).  Throws an error if the
+   * entry does not exist or if <tt>value</tt> is not a finite number. Still,
+   * it is allowed to store zero values in non-existent fields.
+   */
+  void add (const size_type i,
+            const size_type j,
+            const number value);
+
+  /**
+   * Add all elements given in a FullMatrix<double> into sparse matrix
+   * locations given by <tt>indices</tt>. In other words, this function adds
+   * the elements in <tt>full_matrix</tt> to the respective entries in calling
+   * matrix, using the local-to-global indexing specified by <tt>indices</tt>
+   * for both the rows and the columns of the matrix. This function assumes a
+   * quadratic sparse matrix and a quadratic full_matrix, the usual situation
+   * in FE calculations.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number2>
+  void add (const std::vector<size_type> &indices,
+            const FullMatrix<number2>    &full_matrix,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Same function as before, but now including the possibility to use
+   * rectangular full_matrices and different local-to-global indexing on rows
+   * and columns, respectively.
+   */
+  template <typename number2>
+  void add (const std::vector<size_type> &row_indices,
+            const std::vector<size_type> &col_indices,
+            const FullMatrix<number2>    &full_matrix,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Set several elements in the specified row of the matrix with column
+   * indices as given by <tt>col_indices</tt> to the respective value.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number2>
+  void add (const size_type               row,
+            const std::vector<size_type> &col_indices,
+            const std::vector<number2>   &values,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Add an array of values given by <tt>values</tt> in the given global
+   * matrix row at columns specified by col_indices in the sparse matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number2>
+  void add (const size_type  row,
+            const size_type  n_cols,
+            const size_type *col_indices,
+            const number2   *values,
+            const bool       elide_zero_values = true,
+            const bool       col_indices_are_sorted = false);
+
+  /**
+   * Multiply the entire matrix by a fixed factor.
+   */
+  SparseMatrix &operator *= (const number factor);
+
+  /**
+   * Divide the entire matrix by a fixed factor.
+   */
+  SparseMatrix &operator /= (const number factor);
+
+  /**
+   * Symmetrize the matrix by forming the mean value between the existing
+   * matrix and its transpose, $A = \frac 12(A+A^T)$.
+   *
+   * This operation assumes that the underlying sparsity pattern represents a
+   * symmetric object. If this is not the case, then the result of this
+   * operation will not be a symmetric matrix, since it only explicitly
+   * symmetrizes by looping over the lower left triangular part for efficiency
+   * reasons; if there are entries in the upper right triangle, then these
+   * elements are missed in the symmetrization. Symmetrization of the sparsity
+   * pattern can be obtain by SparsityPattern::symmetrize().
+   */
+  void symmetrize ();
+
+  /**
+   * Copy the matrix given as argument into the current object.
+   *
+   * Copying matrices is an expensive operation that we do not want to happen
+   * by accident through compiler generated code for <code>operator=</code>.
+   * (This would happen, for example, if one accidentally declared a function
+   * argument of the current type <i>by value</i> rather than <i>by
+   * reference</i>.) The functionality of copying matrices is implemented in
+   * this member function instead. All copy operations of objects of this type
+   * therefore require an explicit function call.
+   *
+   * The source matrix may be a matrix of arbitrary type, as long as its data
+   * type is convertible to the data type of this matrix.
+   *
+   * The function returns a reference to <tt>*this</tt>.
+   */
+  template <typename somenumber>
+  SparseMatrix<number> &
+  copy_from (const SparseMatrix<somenumber> &source);
+
+  /**
+   * This function is complete analogous to the SparsityPattern::copy_from()
+   * function in that it allows to initialize a whole matrix in one step. See
+   * there for more information on argument types and their meaning. You can
+   * also find a small example on how to use this function there.
+   *
+   * The only difference to the cited function is that the objects which the
+   * inner iterator points to need to be of type <tt>std::pair<unsigned int,
+   * value</tt>, where <tt>value</tt> needs to be convertible to the element
+   * type of this class, as specified by the <tt>number</tt> template
+   * argument.
+   *
+   * Previous content of the matrix is overwritten. Note that the entries
+   * specified by the input parameters need not necessarily cover all elements
+   * of the matrix. Elements not covered remain untouched.
+   */
+  template <typename ForwardIterator>
+  void copy_from (const ForwardIterator begin,
+                  const ForwardIterator end);
+
+  /**
+   * Copy the nonzero entries of a full matrix into this object. Previous
+   * content is deleted. Note that the underlying sparsity pattern must be
+   * appropriate to hold the nonzero entries of the full matrix.
+   */
+  template <typename somenumber>
+  void copy_from (const FullMatrix<somenumber> &matrix);
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * Copy the given Trilinos matrix to this one. The operation triggers an
+   * assertion if the sparsity patterns of the current object does not contain
+   * the location of a non-zero entry of the given argument.
+   *
+   * This function assumes that the two matrices have the same sizes.
+   *
+   * The function returns a reference to <tt>*this</tt>.
+   */
+  SparseMatrix<number> &
+  copy_from (const TrilinosWrappers::SparseMatrix &matrix);
+#endif
+
+  /**
+   * Add <tt>matrix</tt> scaled by <tt>factor</tt> to this matrix, i.e. the
+   * matrix <tt>factor*matrix</tt> is added to <tt>this</tt>. This function
+   * throws an error if the sparsity patterns of the two involved matrices do
+   * not point to the same object, since in this case the operation is
+   * cheaper.
+   *
+   * The source matrix may be a sparse matrix over an arbitrary underlying
+   * scalar type, as long as its data type is convertible to the data type of
+   * this matrix.
+   */
+  template <typename somenumber>
+  void add (const number factor,
+            const SparseMatrix<somenumber> &matrix);
+
+//@}
+  /**
+   * @name Entry Access
+   */
+//@{
+
+  /**
+   * Return the value of the entry (<i>i,j</i>).  This may be an expensive
+   * operation and you should always take care where to call this function. In
+   * order to avoid abuse, this function throws an exception if the required
+   * element does not exist in the matrix.
+   *
+   * In case you want a function that returns zero instead (for entries that
+   * are not in the sparsity pattern of the matrix), use the el() function.
+   *
+   * If you are looping over all elements, consider using one of the iterator
+   * classes instead, since they are tailored better to a sparse matrix
+   * structure.
+   */
+  number operator () (const size_type i,
+                      const size_type j) const;
+
+  /**
+   * This function is mostly like operator()() in that it returns the value of
+   * the matrix entry (<i>i,j</i>). The only difference is that if this entry
+   * does not exist in the sparsity pattern, then instead of raising an
+   * exception, zero is returned. While this may be convenient in some cases,
+   * note that it is simple to write algorithms that are slow compared to an
+   * optimal solution, since the sparsity of the matrix is not used.
+   *
+   * If you are looping over all elements, consider using one of the iterator
+   * classes instead, since they are tailored better to a sparse matrix
+   * structure.
+   */
+  number el (const size_type i,
+             const size_type j) const;
+
+  /**
+   * Return the main diagonal element in the <i>i</i>th row. This function
+   * throws an error if the matrix is not quadratic (see
+   * SparsityPattern::optimize_diagonal()).
+   *
+   * This function is considerably faster than the operator()(), since for
+   * quadratic matrices, the diagonal entry may be the first to be stored in
+   * each row and access therefore does not involve searching for the right
+   * column number.
+   */
+  number diag_element (const size_type i) const;
+
+  /**
+   * Same as above, but return a writeable reference. You're sure you know
+   * what you do?
+   */
+  number &diag_element (const size_type i);
+
+//@}
+  /**
+   * @name Multiplications
+   */
+//@{
+  /**
+   * Matrix-vector multiplication: let <i>dst = M*src</i> with <i>M</i> being
+   * this matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  template <class OutVector, class InVector>
+  void vmult (OutVector &dst,
+              const InVector &src) const;
+
+  /**
+   * Matrix-vector multiplication: let <i>dst = M<sup>T</sup>*src</i> with
+   * <i>M</i> being this matrix. This function does the same as vmult() but
+   * takes the transposed matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <class OutVector, class InVector>
+  void Tvmult (OutVector &dst,
+               const InVector &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add <i>M*src</i> on <i>dst</i> with
+   * <i>M</i> being this matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  template <class OutVector, class InVector>
+  void vmult_add (OutVector &dst,
+                  const InVector &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add <i>M<sup>T</sup>*src</i> to
+   * <i>dst</i> with <i>M</i> being this matrix. This function does the same
+   * as vmult_add() but takes the transposed matrix.
+   *
+   * Note that while this function can operate on all vectors that offer
+   * iterator classes, it is only really effective for objects of type
+   * @ref Vector.
+   * For all classes for which iterating over elements, or random member
+   * access is expensive, this function is not efficient. In particular, if
+   * you want to multiply with BlockVector objects, you should consider using
+   * a BlockSparseMatrix as well.
+   *
+   * Source and destination must not be the same vector.
+   */
+  template <class OutVector, class InVector>
+  void Tvmult_add (OutVector &dst,
+                   const InVector &src) const;
+
+  /**
+   * Return the square of the norm of the vector $v$ with respect to the norm
+   * induced by this matrix, i.e. $\left(v,Mv\right)$. This is useful, e.g. in
+   * the finite element context, where the $L_2$ norm of a function equals the
+   * matrix norm with respect to the mass matrix of the vector representing
+   * the nodal values of the finite element function.
+   *
+   * Obviously, the matrix needs to be quadratic for this operation, and for
+   * the result to actually be a norm it also needs to be either real
+   * symmetric or complex hermitian.
+   *
+   * The underlying template types of both this matrix and the given vector
+   * should either both be real or complex-valued, but not mixed, for this
+   * function to make sense.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  template <typename somenumber>
+  somenumber matrix_norm_square (const Vector<somenumber> &v) const;
+
+  /**
+   * Compute the matrix scalar product $\left(u,Mv\right)$.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  template <typename somenumber>
+  somenumber matrix_scalar_product (const Vector<somenumber> &u,
+                                    const Vector<somenumber> &v) const;
+
+  /**
+   * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+   * defined to be <i>r=b-Mx</i>. Write the residual into <tt>dst</tt>. The
+   * <i>l<sub>2</sub></i> norm of the residual vector is returned.
+   *
+   * Source <i>x</i> and destination <i>dst</i> must not be the same vector.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  template <typename somenumber>
+  somenumber residual (Vector<somenumber>       &dst,
+                       const Vector<somenumber> &x,
+                       const Vector<somenumber> &b) const;
+
+  /**
+   * Perform the matrix-matrix multiplication <tt>C = A * B</tt>, or, if an
+   * optional vector argument is given, <tt>C = A * diag(V) * B</tt>, where
+   * <tt>diag(V)</tt> defines a diagonal matrix with the vector entries.
+   *
+   * This function assumes that the calling matrix <tt>A</tt> and <tt>B</tt>
+   * have compatible sizes. The size of <tt>C</tt> will be set within this
+   * function.
+   *
+   * The content as well as the sparsity pattern of the matrix C will be
+   * changed by this function, so make sure that the sparsity pattern is not
+   * used somewhere else in your program. This is an expensive operation, so
+   * think twice before you use this function.
+   *
+   * There is an optional flag <tt>rebuild_sparsity_pattern</tt> that can be
+   * used to bypass the creation of a new sparsity pattern and instead uses
+   * the sparsity pattern stored in <tt>C</tt>. In that case, make sure that
+   * it really fits. The default is to rebuild the sparsity pattern.
+   *
+   * @note Rebuilding the sparsity pattern requires changing it. This means
+   * that all other matrices that are associated with this sparsity pattern
+   * will then have invalid entries.
+   */
+  template <typename numberB, typename numberC>
+  void mmult (SparseMatrix<numberC>       &C,
+              const SparseMatrix<numberB> &B,
+              const Vector<number>        &V = Vector<number>(),
+              const bool                   rebuild_sparsity_pattern = true) const;
+
+  /**
+   * Perform the matrix-matrix multiplication with the transpose of
+   * <tt>this</tt>, i.e., <tt>C = A<sup>T</sup> * B</tt>, or, if an optional
+   * vector argument is given, <tt>C = A<sup>T</sup> * diag(V) * B</tt>, where
+   * <tt>diag(V)</tt> defines a diagonal matrix with the vector entries.
+   *
+   * This function assumes that the calling matrix <tt>A</tt> and <tt>B</tt>
+   * have compatible sizes. The size of <tt>C</tt> will be set within this
+   * function.
+   *
+   * The content as well as the sparsity pattern of the matrix C will be
+   * changed by this function, so make sure that the sparsity pattern is not
+   * used somewhere else in your program. This is an expensive operation, so
+   * think twice before you use this function.
+   *
+   * There is an optional flag <tt>rebuild_sparsity_pattern</tt> that can be
+   * used to bypass the creation of a new sparsity pattern and instead uses
+   * the sparsity pattern stored in <tt>C</tt>. In that case, make sure that
+   * it really fits. The default is to rebuild the sparsity pattern.
+   *
+   * @note Rebuilding the sparsity pattern requires changing it. This means
+   * that all other matrices that are associated with this sparsity pattern
+   * will then have invalid entries.
+   */
+  template <typename numberB, typename numberC>
+  void Tmmult (SparseMatrix<numberC>       &C,
+               const SparseMatrix<numberB> &B,
+               const Vector<number>       &V = Vector<number>(),
+               const bool                   rebuild_sparsity_pattern = true) const;
+
+//@}
+  /**
+   * @name Matrix norms
+   */
+//@{
+
+  /**
+   * Return the $l_1$-norm of the matrix, that is $|M|_1=\max_{\mathrm{all\
+   * columns\ }j}\sum_{\mathrm{all\ rows\ } i} |M_{ij}|$, (max. sum of
+   * columns).  This is the natural matrix norm that is compatible to the
+   * $l_1$-norm for vectors, i.e.  $|Mv|_1\leq |M|_1 |v|_1$. (cf. Haemmerlin-
+   * Hoffmann: Numerische Mathematik)
+   */
+  real_type l1_norm () const;
+
+  /**
+   * Return the $l_\infty$-norm of the matrix, that is
+   * $|M|_\infty=\max_{\mathrm{all\ rows\ }i}\sum_{\mathrm{all\ columns\ }j}
+   * |M_{ij}|$, (max. sum of rows).  This is the natural matrix norm that is
+   * compatible to the $l_\infty$-norm of vectors, i.e.  $|Mv|_\infty \leq
+   * |M|_\infty |v|_\infty$.  (cf. Haemmerlin-Hoffmann: Numerische Mathematik)
+   */
+  real_type linfty_norm () const;
+
+  /**
+   * Return the frobenius norm of the matrix, i.e. the square root of the sum
+   * of squares of all entries in the matrix.
+   */
+  real_type frobenius_norm () const;
+//@}
+  /**
+   * @name Preconditioning methods
+   */
+//@{
+
+  /**
+   * Apply the Jacobi preconditioner, which multiplies every element of the
+   * <tt>src</tt> vector by the inverse of the respective diagonal element and
+   * multiplies the result with the relaxation factor <tt>omega</tt>.
+   */
+  template <typename somenumber>
+  void precondition_Jacobi (Vector<somenumber>       &dst,
+                            const Vector<somenumber> &src,
+                            const number              omega = 1.) const;
+
+  /**
+   * Apply SSOR preconditioning to <tt>src</tt> with damping <tt>omega</tt>.
+   * The optional argument <tt>pos_right_of_diagonal</tt> is supposed to
+   * provide an array where each entry specifies the position just right of
+   * the diagonal in the global array of nonzeros.
+   */
+  template <typename somenumber>
+  void precondition_SSOR (Vector<somenumber>             &dst,
+                          const Vector<somenumber>       &src,
+                          const number                    omega = 1.,
+                          const std::vector<std::size_t> &pos_right_of_diagonal=std::vector<std::size_t>()) const;
+
+  /**
+   * Apply SOR preconditioning matrix to <tt>src</tt>.
+   */
+  template <typename somenumber>
+  void precondition_SOR (Vector<somenumber>       &dst,
+                         const Vector<somenumber> &src,
+                         const number              om = 1.) const;
+
+  /**
+   * Apply transpose SOR preconditioning matrix to <tt>src</tt>.
+   */
+  template <typename somenumber>
+  void precondition_TSOR (Vector<somenumber>       &dst,
+                          const Vector<somenumber> &src,
+                          const number              om = 1.) const;
+
+  /**
+   * Perform SSOR preconditioning in-place.  Apply the preconditioner matrix
+   * without copying to a second vector.  <tt>omega</tt> is the relaxation
+   * parameter.
+   */
+  template <typename somenumber>
+  void SSOR (Vector<somenumber> &v,
+             const number        omega = 1.) const;
+
+  /**
+   * Perform an SOR preconditioning in-place.  <tt>omega</tt> is the
+   * relaxation parameter.
+   */
+  template <typename somenumber>
+  void SOR (Vector<somenumber> &v,
+            const number        om = 1.) const;
+
+  /**
+   * Perform a transpose SOR preconditioning in-place.  <tt>omega</tt> is the
+   * relaxation parameter.
+   */
+  template <typename somenumber>
+  void TSOR (Vector<somenumber> &v,
+             const number        om = 1.) const;
+
+  /**
+   * Perform a permuted SOR preconditioning in-place.
+   *
+   * The standard SOR method is applied in the order prescribed by
+   * <tt>permutation</tt>, that is, first the row <tt>permutation[0]</tt>,
+   * then <tt>permutation[1]</tt> and so on. For efficiency reasons, the
+   * permutation as well as its inverse are required.
+   *
+   * <tt>omega</tt> is the relaxation parameter.
+   */
+  template <typename somenumber>
+  void PSOR (Vector<somenumber> &v,
+             const std::vector<size_type> &permutation,
+             const std::vector<size_type> &inverse_permutation,
+             const number        om = 1.) const;
+
+  /**
+   * Perform a transposed permuted SOR preconditioning in-place.
+   *
+   * The transposed SOR method is applied in the order prescribed by
+   * <tt>permutation</tt>, that is, first the row <tt>permutation[m()-1]</tt>,
+   * then <tt>permutation[m()-2]</tt> and so on. For efficiency reasons, the
+   * permutation as well as its inverse are required.
+   *
+   * <tt>omega</tt> is the relaxation parameter.
+   */
+  template <typename somenumber>
+  void TPSOR (Vector<somenumber> &v,
+              const std::vector<size_type> &permutation,
+              const std::vector<size_type> &inverse_permutation,
+              const number        om = 1.) const;
+
+  /**
+   * Do one Jacobi step on <tt>v</tt>.  Performs a direct Jacobi step with
+   * right hand side <tt>b</tt>. This function will need an auxiliary vector,
+   * which is acquired from GrowingVectorMemory.
+   */
+  template <typename somenumber>
+  void Jacobi_step (Vector<somenumber> &v,
+                    const Vector<somenumber> &b,
+                    const number        om = 1.) const;
+
+  /**
+   * Do one SOR step on <tt>v</tt>.  Performs a direct SOR step with right
+   * hand side <tt>b</tt>.
+   */
+  template <typename somenumber>
+  void SOR_step (Vector<somenumber> &v,
+                 const Vector<somenumber> &b,
+                 const number        om = 1.) const;
+
+  /**
+   * Do one adjoint SOR step on <tt>v</tt>.  Performs a direct TSOR step with
+   * right hand side <tt>b</tt>.
+   */
+  template <typename somenumber>
+  void TSOR_step (Vector<somenumber> &v,
+                  const Vector<somenumber> &b,
+                  const number        om = 1.) const;
+
+  /**
+   * Do one SSOR step on <tt>v</tt>.  Performs a direct SSOR step with right
+   * hand side <tt>b</tt> by performing TSOR after SOR.
+   */
+  template <typename somenumber>
+  void SSOR_step (Vector<somenumber> &v,
+                  const Vector<somenumber> &b,
+                  const number        om = 1.) const;
+//@}
+  /**
+   * @name Iterators
+   */
+//@{
+
+  /**
+   * Return an iterator pointing to the first element of the matrix.
+   *
+   * Note the discussion in the general documentation of this class about the
+   * order in which elements are accessed.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Like the function above, but for non-const matrices.
+   */
+  iterator begin ();
+
+  /**
+   * Return an iterator pointing the element past the last one of this matrix.
+   */
+  const_iterator end () const;
+
+  /**
+   * Like the function above, but for non-const matrices.
+   */
+  iterator end ();
+
+  /**
+   * Return an iterator pointing to the first element of row @p r.
+   *
+   * Note that if the given row is empty, i.e. does not contain any nonzero
+   * entries, then the iterator returned by this function equals
+   * <tt>end(r)</tt>. The returned iterator may not be dereferencable in that
+   * case if neither row @p r nor any of the following rows contain any
+   * nonzero entries.
+   */
+  const_iterator begin (const size_type r) const;
+
+  /**
+   * Like the function above, but for non-const matrices.
+   */
+  iterator begin (const size_type r);
+
+  /**
+   * Return an iterator pointing the element past the last one of row @p r ,
+   * or past the end of the entire sparsity pattern if none of the rows after
+   * @p r contain any entries at all.
+   *
+   * Note that the end iterator is not necessarily dereferencable. This is in
+   * particular the case if it is the end iterator for the last row of a
+   * matrix.
+   */
+  const_iterator end (const size_type r) const;
+
+  /**
+   * Like the function above, but for non-const matrices.
+   */
+  iterator end (const size_type r);
+//@}
+  /**
+   * @name Input/Output
+   */
+//@{
+
+  /**
+   * Print the matrix to the given stream, using the format <tt>(row,column)
+   * value</tt>, i.e. one nonzero entry of the matrix per line. If
+   * <tt>across</tt> is true, print all entries on a single line, using the
+   * format row,column:value.
+   *
+   * If the argument <tt>diagonal_first</tt> is true, diagonal elements of
+   * quadratic matrices are printed first in their row, corresponding to the
+   * internal storage scheme. If it is false, the elements in a row are
+   * written in ascending column order.
+   */
+  template <class StreamType>
+  void print (StreamType &out,
+              const bool  across = false,
+              const bool  diagonal_first = true) const;
+
+  /**
+   * Print the matrix in the usual format, i.e. as a matrix and not as a list
+   * of nonzero elements. For better readability, elements not in the matrix
+   * are displayed as empty space, while matrix elements which are explicitly
+   * set to zero are displayed as such.
+   *
+   * The parameters allow for a flexible setting of the output format:
+   * <tt>precision</tt> and <tt>scientific</tt> are used to determine the
+   * number format, where <tt>scientific = false</tt> means fixed point
+   * notation.  A zero entry for <tt>width</tt> makes the function compute a
+   * width, but it may be changed to a positive value, if output is crude.
+   *
+   * Additionally, a character for an empty value may be specified.
+   *
+   * Finally, the whole matrix can be multiplied with a common denominator to
+   * produce more readable output, even integers.
+   *
+   * @attention This function may produce <b>large</b> amounts of output if
+   * applied to a large matrix!
+   */
+  void print_formatted (std::ostream       &out,
+                        const unsigned int  precision   = 3,
+                        const bool          scientific  = true,
+                        const unsigned int  width       = 0,
+                        const char         *zero_string = " ",
+                        const double        denominator = 1.) const;
+
+  /**
+   * Print the actual pattern of the matrix. For each entry with an absolute
+   * value larger than threshold, a '*' is printed, a ':' for every value
+   * smaller and a '.' for every entry not allocated.
+   */
+  void print_pattern(std::ostream &out,
+                     const double threshold = 0.) const;
+
+  /**
+   * Write the data of this object en bloc to a file. This is done in a binary
+   * mode, so the output is neither readable by humans nor (probably) by other
+   * computers using a different operating system of number format.
+   *
+   * The purpose of this function is that you can swap out matrices and
+   * sparsity pattern if you are short of memory, want to communicate between
+   * different programs, or allow objects to be persistent across different
+   * runs of the program.
+   */
+  void block_write (std::ostream &out) const;
+
+  /**
+   * Read data that has previously been written by block_write() from a file.
+   * This is done using the inverse operations to the above function, so it is
+   * reasonably fast because the bitstream is not interpreted except for a few
+   * numbers up front.
+   *
+   * The object is resized on this operation, and all previous contents are
+   * lost. Note, however, that no checks are performed whether new data and
+   * the underlying SparsityPattern object fit together. It is your
+   * responsibility to make sure that the sparsity pattern and the data to be
+   * read match.
+   *
+   * A primitive form of error checking is performed which will recognize the
+   * bluntest attempts to interpret some data as a matrix stored bitwise to a
+   * file that wasn't actually created that way, but not more.
+   */
+  void block_read (std::istream &in);
+  //@}
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidIndex,
+                  int, int,
+                  << "You are trying to access the matrix entry with index <"
+                  << arg1 << ',' << arg2
+                  << ">, but this entry does not exist in the sparsity pattern "
+                  "of this matrix."
+                  "\n\n"
+                  "The most common cause for this problem is that you used "
+                  "a method to build the sparsity pattern that did not "
+                  "(completely) take into account all of the entries you "
+                  "will later try to write into. An example would be "
+                  "building a sparsity pattern that does not include "
+                  "the entries you will write into due to constraints "
+                  "on degrees of freedom such as hanging nodes or periodic "
+                  "boundary conditions. In such cases, building the "
+                  "sparsity pattern will succeed, but you will get errors "
+                  "such as the current one at one point or other when "
+                  "trying to write into the entries of the matrix.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcDifferentSparsityPatterns,
+                    "When copying one sparse matrix into another, "
+                    "or when adding one sparse matrix to another, "
+                    "both matrices need to refer to the same "
+                    "sparsity pattern.");
+  /**
+   * Exception
+   */
+  DeclException2 (ExcIteratorRange,
+                  int, int,
+                  << "The iterators denote a range of " << arg1
+                  << " elements, but the given number of rows was " << arg2);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcSourceEqualsDestination);
+  //@}
+
+protected:
+  /**
+   * For some matrix storage formats, in particular for the PETSc distributed
+   * blockmatrices, set and add operations on individual elements can not be
+   * freely mixed. Rather, one has to synchronize operations when one wants to
+   * switch from setting elements to adding to elements.  BlockMatrixBase
+   * automatically synchronizes the access by calling this helper function for
+   * each block.  This function ensures that the matrix is in a state that
+   * allows adding elements; if it previously already was in this state, the
+   * function does nothing.
+   */
+  void prepare_add();
+
+  /**
+   * Same as prepare_add() but prepare the matrix for setting elements if the
+   * representation of elements in this class requires such an operation.
+   */
+  void prepare_set();
+
+private:
+  /**
+   * Pointer to the sparsity pattern used for this matrix. In order to
+   * guarantee that it is not deleted while still in use, we subscribe to it
+   * using the SmartPointer class.
+   */
+  SmartPointer<const SparsityPattern,SparseMatrix<number> > cols;
+
+  /**
+   * Array of values for all the nonzero entries. The position within the
+   * matrix, i.e.  the row and column number for a given entry can only be
+   * deduced using the sparsity pattern. The same holds for the more common
+   * operation of finding an entry by its coordinates.
+   */
+  number *val;
+
+  /**
+   * Allocated size of #val. This can be larger than the actually used part if
+   * the size of the matrix was reduced sometime in the past by associating a
+   * sparsity pattern with a smaller size to this object, using the reinit()
+   * function.
+   */
+  std::size_t max_len;
+
+  // make all other sparse matrices friends
+  template <typename somenumber> friend class SparseMatrix;
+  template <typename somenumber> friend class SparseLUDecomposition;
+  template <typename> friend class SparseILU;
+
+  /**
+   * To allow it calling private prepare_add() and prepare_set().
+   */
+  template <typename> friend class BlockMatrixBase;
+
+  /**
+   * Also give access to internal details to the iterator/accessor classes.
+   */
+  template <typename,bool> friend class SparseMatrixIterators::Iterator;
+  template <typename,bool> friend class SparseMatrixIterators::Accessor;
+
+#ifndef DEAL_II_MSVC
+  // Visual studio is choking on the following friend declaration, probably
+  // because Reference is only defined in a specialization. It looks like
+  // the library is compiling without this line, though.
+  template <typename number2> friend class SparseMatrixIterators::Accessor<number2, false>::Reference;
+#endif
+};
+
+#ifndef DOXYGEN
+/*---------------------- Inline functions -----------------------------------*/
+
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::size_type SparseMatrix<number>::m () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return cols->rows;
+}
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::size_type SparseMatrix<number>::n () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return cols->cols;
+}
+
+
+// Inline the set() and add() functions, since they will be called frequently.
+template <typename number>
+inline
+void
+SparseMatrix<number>::set (const size_type i,
+                           const size_type j,
+                           const number       value)
+{
+  AssertIsFinite(value);
+
+  const size_type index = cols->operator()(i, j);
+
+  // it is allowed to set elements of the matrix that are not part of the
+  // sparsity pattern, if the value to which we set it is zero
+  if (index == SparsityPattern::invalid_entry)
+    {
+      Assert ((index != SparsityPattern::invalid_entry) ||
+              (value == number()),
+              ExcInvalidIndex(i, j));
+      return;
+    }
+
+  val[index] = value;
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+SparseMatrix<number>::set (const std::vector<size_type> &indices,
+                           const FullMatrix<number2>    &values,
+                           const bool                    elide_zero_values)
+{
+  Assert (indices.size() == values.m(),
+          ExcDimensionMismatch(indices.size(), values.m()));
+  Assert (values.m() == values.n(), ExcNotQuadratic());
+
+  for (size_type i=0; i<indices.size(); ++i)
+    set (indices[i], indices.size(), &indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+SparseMatrix<number>::set (const std::vector<size_type> &row_indices,
+                           const std::vector<size_type> &col_indices,
+                           const FullMatrix<number2>    &values,
+                           const bool                    elide_zero_values)
+{
+  Assert (row_indices.size() == values.m(),
+          ExcDimensionMismatch(row_indices.size(), values.m()));
+  Assert (col_indices.size() == values.n(),
+          ExcDimensionMismatch(col_indices.size(), values.n()));
+
+  for (size_type i=0; i<row_indices.size(); ++i)
+    set (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+SparseMatrix<number>::set (const size_type               row,
+                           const std::vector<size_type> &col_indices,
+                           const std::vector<number2>   &values,
+                           const bool                    elide_zero_values)
+{
+  Assert (col_indices.size() == values.size(),
+          ExcDimensionMismatch(col_indices.size(), values.size()));
+
+  set (row, col_indices.size(), &col_indices[0], &values[0],
+       elide_zero_values);
+}
+
+
+
+template <typename number>
+inline
+void
+SparseMatrix<number>::add (const size_type i,
+                           const size_type j,
+                           const number    value)
+{
+  AssertIsFinite(value);
+
+  if (value == number())
+    return;
+
+  const size_type index = cols->operator()(i, j);
+
+  // it is allowed to add elements to the matrix that are not part of the
+  // sparsity pattern, if the value to which we set it is zero
+  if (index == SparsityPattern::invalid_entry)
+    {
+      Assert ((index != SparsityPattern::invalid_entry) ||
+              (value == number()),
+              ExcInvalidIndex(i, j));
+      return;
+    }
+
+  val[index] += value;
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+SparseMatrix<number>::add (const std::vector<size_type> &indices,
+                           const FullMatrix<number2>    &values,
+                           const bool                    elide_zero_values)
+{
+  Assert (indices.size() == values.m(),
+          ExcDimensionMismatch(indices.size(), values.m()));
+  Assert (values.m() == values.n(), ExcNotQuadratic());
+
+  for (size_type i=0; i<indices.size(); ++i)
+    add (indices[i], indices.size(), &indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+SparseMatrix<number>::add (const std::vector<size_type> &row_indices,
+                           const std::vector<size_type> &col_indices,
+                           const FullMatrix<number2>    &values,
+                           const bool                    elide_zero_values)
+{
+  Assert (row_indices.size() == values.m(),
+          ExcDimensionMismatch(row_indices.size(), values.m()));
+  Assert (col_indices.size() == values.n(),
+          ExcDimensionMismatch(col_indices.size(), values.n()));
+
+  for (size_type i=0; i<row_indices.size(); ++i)
+    add (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+         elide_zero_values);
+}
+
+
+
+template <typename number>
+template <typename number2>
+inline
+void
+SparseMatrix<number>::add (const size_type               row,
+                           const std::vector<size_type> &col_indices,
+                           const std::vector<number2>   &values,
+                           const bool                    elide_zero_values)
+{
+  Assert (col_indices.size() == values.size(),
+          ExcDimensionMismatch(col_indices.size(), values.size()));
+
+  add (row, col_indices.size(), &col_indices[0], &values[0],
+       elide_zero_values);
+}
+
+
+
+template <typename number>
+inline
+SparseMatrix<number> &
+SparseMatrix<number>::operator *= (const number factor)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  number             *val_ptr    = &val[0];
+  const number *const end_ptr    = &val[cols->n_nonzero_elements()];
+
+  while (val_ptr != end_ptr)
+    *val_ptr++ *= factor;
+
+  return *this;
+}
+
+
+
+template <typename number>
+inline
+SparseMatrix<number> &
+SparseMatrix<number>::operator /= (const number factor)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (factor != number(), ExcDivideByZero());
+
+  const number factor_inv = number(1.) / factor;
+
+  number             *val_ptr    = &val[0];
+  const number *const end_ptr    = &val[cols->n_nonzero_elements()];
+
+  while (val_ptr != end_ptr)
+    *val_ptr++ *= factor_inv;
+
+  return *this;
+}
+
+
+
+template <typename number>
+inline
+number SparseMatrix<number>::operator () (const size_type i,
+                                          const size_type j) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (cols->operator()(i,j) != SparsityPattern::invalid_entry,
+          ExcInvalidIndex(i,j));
+  return val[cols->operator()(i,j)];
+}
+
+
+
+template <typename number>
+inline
+number SparseMatrix<number>::el (const size_type i,
+                                 const size_type j) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  const size_type index = cols->operator()(i,j);
+
+  if (index != SparsityPattern::invalid_entry)
+    return val[index];
+  else
+    return 0;
+}
+
+
+
+template <typename number>
+inline
+number SparseMatrix<number>::diag_element (const size_type i) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (m() == n(),  ExcNotQuadratic());
+  AssertIndexRange(i, m());
+
+  // Use that the first element in each row of a quadratic matrix is the main
+  // diagonal
+  return val[cols->rowstart[i]];
+}
+
+
+
+template <typename number>
+inline
+number &SparseMatrix<number>::diag_element (const size_type i)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (m() == n(),  ExcNotQuadratic());
+  AssertIndexRange(i, m());
+
+  // Use that the first element in each row of a quadratic matrix is the main
+  // diagonal
+  return val[cols->rowstart[i]];
+}
+
+
+
+template <typename number>
+template <typename ForwardIterator>
+void
+SparseMatrix<number>::copy_from (const ForwardIterator begin,
+                                 const ForwardIterator end)
+{
+  Assert (static_cast<size_type>(std::distance (begin, end)) == m(),
+          ExcIteratorRange (std::distance (begin, end), m()));
+
+  // for use in the inner loop, we define a typedef to the type of the inner
+  // iterators
+  typedef typename std::iterator_traits<ForwardIterator>::value_type::const_iterator inner_iterator;
+  size_type row=0;
+  for (ForwardIterator i=begin; i!=end; ++i, ++row)
+    {
+      const inner_iterator end_of_row = i->end();
+      for (inner_iterator j=i->begin(); j!=end_of_row; ++j)
+        // write entries
+        set (row, j->first, j->second);
+    };
+}
+
+
+//---------------------------------------------------------------------------
+
+
+namespace SparseMatrixIterators
+{
+  template <typename number>
+  inline
+  Accessor<number,true>::
+  Accessor (const MatrixType   *matrix,
+            const std::size_t   index_within_matrix)
+    :
+    SparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern(),
+                                        index_within_matrix),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,true>::
+  Accessor (const MatrixType *matrix)
+    :
+    SparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern()),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,true>::
+  Accessor (const SparseMatrixIterators::Accessor<number,false> &a)
+    :
+    SparsityPatternIterators::Accessor (a),
+    matrix (&a.get_matrix())
+  {}
+
+
+
+  template <typename number>
+  inline
+  number
+  Accessor<number, true>::value () const
+  {
+    AssertIndexRange(index_within_sparsity, matrix->n_nonzero_elements());
+    return matrix->val[index_within_sparsity];
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, true>::MatrixType &
+  Accessor<number, true>::get_matrix () const
+  {
+    return *matrix;
+  }
+
+
+
+  template <typename number>
+  inline
+  Accessor<number, false>::Reference::Reference (
+    const Accessor *accessor,
+    const bool)
+    :
+    accessor (accessor)
+  {}
+
+
+  template <typename number>
+  inline
+  Accessor<number, false>::Reference::operator number() const
+  {
+    AssertIndexRange(accessor->index_within_sparsity, accessor->matrix->n_nonzero_elements());
+    return accessor->matrix->val[accessor->index_within_sparsity];
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator = (const number n) const
+  {
+    AssertIndexRange(accessor->index_within_sparsity, accessor->matrix->n_nonzero_elements());
+    accessor->matrix->val[accessor->index_within_sparsity] = n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator += (const number n) const
+  {
+    AssertIndexRange(accessor->index_within_sparsity, accessor->matrix->n_nonzero_elements());
+    accessor->matrix->val[accessor->index_within_sparsity] += n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator -= (const number n) const
+  {
+    AssertIndexRange(accessor->index_within_sparsity, accessor->matrix->n_nonzero_elements());
+    accessor->matrix->val[accessor->index_within_sparsity] -= n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator *= (const number n) const
+  {
+    AssertIndexRange(accessor->index_within_sparsity, accessor->matrix->n_nonzero_elements());
+    accessor->matrix->val[accessor->index_within_sparsity] *= n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  const typename Accessor<number, false>::Reference &
+  Accessor<number, false>::Reference::operator /= (const number n) const
+  {
+    AssertIndexRange(accessor->index_within_sparsity, accessor->matrix->n_nonzero_elements());
+    accessor->matrix->val[accessor->index_within_sparsity] /= n;
+    return *this;
+  }
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,false>::
+  Accessor (MatrixType         *matrix,
+            const std::size_t   index)
+    :
+    SparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern(),
+                                        index),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  Accessor<number,false>::
+  Accessor (MatrixType         *matrix)
+    :
+    SparsityPatternIterators::Accessor (&matrix->get_sparsity_pattern()),
+    matrix (matrix)
+  {}
+
+
+
+  template <typename number>
+  inline
+  typename Accessor<number, false>::Reference
+  Accessor<number, false>::value() const
+  {
+    return Reference(this,true);
+  }
+
+
+
+
+  template <typename number>
+  inline
+  typename Accessor<number, false>::MatrixType &
+  Accessor<number, false>::get_matrix () const
+  {
+    return *matrix;
+  }
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness>::
+  Iterator (MatrixType        *matrix,
+            const std::size_t  index)
+    :
+    accessor(matrix, index)
+  {}
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness>::
+  Iterator (MatrixType *matrix)
+    :
+    accessor(matrix)
+  {}
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness>::
+  Iterator (const SparseMatrixIterators::Iterator<number,false> &i)
+    :
+    accessor(*i)
+  {}
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number, Constness> &
+  Iterator<number,Constness>::operator++ ()
+  {
+    accessor.advance ();
+    return *this;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number,Constness>
+  Iterator<number,Constness>::operator++ (int)
+  {
+    const Iterator iter = *this;
+    accessor.advance ();
+    return iter;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  const Accessor<number,Constness> &
+  Iterator<number,Constness>::operator* () const
+  {
+    return accessor;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  const Accessor<number,Constness> *
+  Iterator<number,Constness>::operator-> () const
+  {
+    return &accessor;
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator == (const Iterator &other) const
+  {
+    return (accessor == other.accessor);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator != (const Iterator &other) const
+  {
+    return ! (*this == other);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator < (const Iterator &other) const
+  {
+    Assert (&accessor.get_matrix() == &other.accessor.get_matrix(),
+            ExcInternalError());
+
+    return (accessor < other.accessor);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  bool
+  Iterator<number,Constness>::
+  operator > (const Iterator &other) const
+  {
+    return (other < *this);
+  }
+
+
+  template <typename number, bool Constness>
+  inline
+  int
+  Iterator<number,Constness>::
+  operator - (const Iterator &other) const
+  {
+    Assert (&accessor.get_matrix() == &other.accessor.get_matrix(),
+            ExcInternalError());
+
+    return (*this)->index_within_sparsity - other->index_within_sparsity;
+  }
+
+
+
+  template <typename number, bool Constness>
+  inline
+  Iterator<number,Constness>
+  Iterator<number,Constness>::
+  operator + (const size_type n) const
+  {
+    Iterator x = *this;
+    for (size_type i=0; i<n; ++i)
+      ++x;
+
+    return x;
+  }
+
+}
+
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::const_iterator
+SparseMatrix<number>::begin () const
+{
+  return const_iterator(this, 0);
+}
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::const_iterator
+SparseMatrix<number>::end () const
+{
+  return const_iterator(this);
+}
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::iterator
+SparseMatrix<number>::begin ()
+{
+  return iterator (this, 0);
+}
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::iterator
+SparseMatrix<number>::end ()
+{
+  return iterator(this, cols->rowstart[cols->rows]);
+}
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::const_iterator
+SparseMatrix<number>::begin (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+
+  return const_iterator(this, cols->rowstart[r]);
+}
+
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::const_iterator
+SparseMatrix<number>::end (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+
+  return const_iterator(this, cols->rowstart[r+1]);
+}
+
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::iterator
+SparseMatrix<number>::begin (const size_type r)
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+
+  return iterator(this, cols->rowstart[r]);
+}
+
+
+
+template <typename number>
+inline
+typename SparseMatrix<number>::iterator
+SparseMatrix<number>::end (const size_type r)
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+
+  return iterator(this, cols->rowstart[r+1]);
+}
+
+
+
+template <typename number>
+template <class StreamType>
+inline
+void SparseMatrix<number>::print (StreamType &out,
+                                  const bool  across,
+                                  const bool  diagonal_first) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  bool hanging_diagonal = false;
+  number diagonal = number();
+
+  for (size_type i=0; i<cols->rows; ++i)
+    {
+      for (size_type j=cols->rowstart[i]; j<cols->rowstart[i+1]; ++j)
+        {
+          if (!diagonal_first && i == cols->colnums[j])
+            {
+              diagonal = val[j];
+              hanging_diagonal = true;
+            }
+          else
+            {
+              if (hanging_diagonal && cols->colnums[j]>i)
+                {
+                  if (across)
+                    out << ' ' << i << ',' << i << ':' << diagonal;
+                  else
+                    out << '(' << i << ',' << i << ") " << diagonal << std::endl;
+                  hanging_diagonal = false;
+                }
+              if (across)
+                out << ' ' << i << ',' << cols->colnums[j] << ':' << val[j];
+              else
+                out << "(" << i << "," << cols->colnums[j] << ") " << val[j] << std::endl;
+            }
+        }
+      if (hanging_diagonal)
+        {
+          if (across)
+            out << ' ' << i << ',' << i << ':' << diagonal;
+          else
+            out << '(' << i << ',' << i << ") " << diagonal << std::endl;
+          hanging_diagonal = false;
+        }
+    }
+  if (across)
+    out << std::endl;
+}
+
+
+template <typename number>
+inline
+void
+SparseMatrix<number>::
+prepare_add()
+{
+  //nothing to do here
+}
+
+
+
+template <typename number>
+inline
+void
+SparseMatrix<number>::
+prepare_set()
+{
+  //nothing to do here
+}
+
+#endif // DOXYGEN
+
+
+/*----------------------------   sparse_matrix.h     ---------------------------*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+/*----------------------------   sparse_matrix.h     ---------------------------*/
diff --git a/include/deal.II/lac/sparse_matrix.templates.h b/include/deal.II/lac/sparse_matrix.templates.h
new file mode 100644
index 0000000..4f145c5
--- /dev/null
+++ b/include/deal.II/lac/sparse_matrix.templates.h
@@ -0,0 +1,2000 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__sparse_matrix_templates_h
+#define dealii__sparse_matrix_templates_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/parallel.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/vector_memory.h>
+
+#include <ostream>
+#include <iomanip>
+#include <algorithm>
+#include <functional>
+#include <cmath>
+#include <vector>
+#include <numeric>
+#include <deal.II/base/std_cxx11/bind.h>
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number>
+SparseMatrix<number>::SparseMatrix ()
+  :
+  cols(0, "SparseMatrix"),
+  val(0),
+  max_len(0)
+{}
+
+
+
+template <typename number>
+SparseMatrix<number>::SparseMatrix (const SparseMatrix &m)
+  :
+  Subscriptor (m),
+  cols(0, "SparseMatrix"),
+  val(0),
+  max_len(0)
+{
+  Assert (m.cols==0, ExcInvalidConstructorCall());
+  Assert (m.val==0, ExcInvalidConstructorCall());
+  Assert (m.max_len==0, ExcInvalidConstructorCall());
+}
+
+
+
+template <typename number>
+SparseMatrix<number> &
+SparseMatrix<number>::operator = (const SparseMatrix<number> &m)
+{
+  (void)m;
+  Assert (m.cols==0, ExcInvalidConstructorCall());
+  Assert (m.val==0, ExcInvalidConstructorCall());
+  Assert (m.max_len==0, ExcInvalidConstructorCall());
+
+  return *this;
+}
+
+
+
+template <typename number>
+SparseMatrix<number>::SparseMatrix (const SparsityPattern &c)
+  :
+  cols(0, "SparseMatrix"),
+  val(0),
+  max_len(0)
+{
+  reinit (c);
+}
+
+
+
+template <typename number>
+SparseMatrix<number>::SparseMatrix (const SparsityPattern &c,
+                                    const IdentityMatrix  &id)
+  :
+  cols(0, "SparseMatrix"),
+  val(0),
+  max_len(0)
+{
+  (void)id;
+  Assert (c.n_rows() == id.m(), ExcDimensionMismatch (c.n_rows(), id.m()));
+  Assert (c.n_cols() == id.n(), ExcDimensionMismatch (c.n_cols(), id.n()));
+
+  reinit (c);
+  for (size_type i=0; i<n(); ++i)
+    this->set(i,i,1.);
+}
+
+
+
+template <typename number>
+SparseMatrix<number>::~SparseMatrix ()
+{
+  cols = 0;
+
+  if (val != 0)
+    delete[] val;
+}
+
+
+
+namespace internal
+{
+  namespace SparseMatrix
+  {
+    typedef types::global_dof_index size_type;
+
+    template<typename T>
+    void zero_subrange (const size_type begin,
+                        const size_type end,
+                        T *dst)
+    {
+      std::memset (dst+begin,0,(end-begin)*sizeof(T));
+    }
+  }
+}
+
+
+
+template <typename number>
+SparseMatrix<number> &
+SparseMatrix<number>::operator = (const double d)
+{
+  (void)d;
+  Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (cols->compressed || cols->empty(), SparsityPattern::ExcNotCompressed());
+
+  // do initial zeroing of elements in parallel. Try to achieve a similar
+  // layout as when doing matrix-vector products, as on some NUMA systems, a
+  // memory block is assigned to memory banks where the first access is
+  // generated. For sparse matrices, the first operations is usually the
+  // operator=. The grain size is chosen to reflect the number of rows in
+  // minimum_parallel_grain_size, weighted by the number of nonzero entries
+  // per row on average.
+  const size_type matrix_size = cols->n_nonzero_elements();
+  const size_type grain_size =
+    internal::SparseMatrix::minimum_parallel_grain_size *
+    (cols->n_nonzero_elements()+m()) / m();
+  if (matrix_size>grain_size)
+    parallel::apply_to_subranges (0U, matrix_size,
+                                  std_cxx11::bind(&internal::SparseMatrix::template
+                                                  zero_subrange<number>,
+                                                  std_cxx11::_1, std_cxx11::_2,
+                                                  val),
+                                  grain_size);
+  else if (matrix_size > 0)
+    std::memset (&val[0], 0, matrix_size*sizeof(number));
+
+  return *this;
+}
+
+
+
+template <typename number>
+SparseMatrix<number> &
+SparseMatrix<number>::operator= (const IdentityMatrix  &id)
+{
+  (void)id;
+  Assert (cols->n_rows() == id.m(),
+          ExcDimensionMismatch (cols->n_rows(), id.m()));
+  Assert (cols->n_cols() == id.n(),
+          ExcDimensionMismatch (cols->n_cols(), id.n()));
+
+  *this = 0;
+  for (size_type i=0; i<n(); ++i)
+    this->set(i,i,1.);
+
+  return *this;
+}
+
+
+
+template <typename number>
+void
+SparseMatrix<number>::reinit (const SparsityPattern &sparsity)
+{
+  cols = &sparsity;
+
+  if (cols->empty())
+    {
+      if (val != 0)
+        delete[] val;
+      val = 0;
+      max_len = 0;
+      return;
+    }
+
+  const std::size_t N = cols->n_nonzero_elements();
+  if (N > max_len || max_len == 0)
+    {
+      if (val != 0)
+        delete[] val;
+      val = new number[N];
+      max_len = N;
+    }
+
+  *this = 0.;
+}
+
+
+
+template <typename number>
+void
+SparseMatrix<number>::clear ()
+{
+  cols = 0;
+  if (val) delete[] val;
+  val = 0;
+  max_len = 0;
+}
+
+
+
+template <typename number>
+bool
+SparseMatrix<number>::empty () const
+{
+  if (cols == 0)
+    return true;
+  else
+    return cols->empty();
+}
+
+
+
+template <typename number>
+typename SparseMatrix<number>::size_type
+SparseMatrix<number>::get_row_length (const size_type row) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return cols->row_length(row);
+}
+
+
+
+template <typename number>
+typename SparseMatrix<number>::size_type
+SparseMatrix<number>::n_nonzero_elements () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return cols->n_nonzero_elements ();
+}
+
+
+
+template <typename number>
+typename SparseMatrix<number>::size_type
+SparseMatrix<number>::n_actually_nonzero_elements (const double threshold) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (threshold >= 0, ExcMessage ("Negative threshold!"));
+  size_type nnz = 0;
+  const size_type nnz_alloc = n_nonzero_elements();
+  for (size_type i=0; i<nnz_alloc; ++i)
+    if (std::abs(val[i]) > threshold)
+      ++nnz;
+  return nnz;
+}
+
+
+
+template <typename number>
+void
+SparseMatrix<number>::symmetrize ()
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (cols->rows == cols->cols, ExcNotQuadratic());
+
+  const size_type n_rows = m();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      // first skip diagonal entry
+      number             *val_ptr = &val[cols->rowstart[row]];
+      if (m() == n())
+        ++val_ptr;
+      const size_type *colnum_ptr = &cols->colnums[cols->rowstart[row]+1];
+      const number    *const val_end_of_row = &val[cols->rowstart[row+1]];
+
+      // treat lower left triangle
+      while ((val_ptr != val_end_of_row) && (*colnum_ptr<row))
+        {
+          // compute the mean of this
+          // and the transpose value
+          const number mean_value = (*val_ptr +
+                                     val[(*cols)(*colnum_ptr,row)]) / number(2.0);
+          // set this value and the
+          // transpose one to the
+          // mean
+          *val_ptr = mean_value;
+          set (*colnum_ptr, row, mean_value);
+
+          // advance pointers
+          ++val_ptr;
+          ++colnum_ptr;
+        };
+    };
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+SparseMatrix<number> &
+SparseMatrix<number>::copy_from (const SparseMatrix<somenumber> &matrix)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (cols == matrix.cols, ExcDifferentSparsityPatterns());
+
+  std::copy (&matrix.val[0], &matrix.val[cols->n_nonzero_elements()],
+             &val[0]);
+
+  return *this;
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::copy_from (const FullMatrix<somenumber> &matrix)
+{
+  // first delete previous content
+  *this = 0;
+
+  // then copy old matrix
+  for (size_type row=0; row<matrix.m(); ++row)
+    for (size_type col=0; col<matrix.n(); ++col)
+      if (matrix(row,col) != somenumber())
+        set (row, col, number(matrix(row,col)));
+}
+
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+template <typename number>
+SparseMatrix<number> &
+SparseMatrix<number>::copy_from (const TrilinosWrappers::SparseMatrix &matrix)
+{
+  Assert (m() == matrix.m(), ExcDimensionMismatch(m(), matrix.m()));
+  Assert (n() == matrix.n(), ExcDimensionMismatch(n(), matrix.n()));
+
+  // first delete previous content
+  *this = 0;
+
+  std::vector < TrilinosScalar > value_cache;
+  std::vector<size_type> colnum_cache;
+
+  for (size_type row = 0; row < matrix.m(); ++row)
+    {
+      value_cache.resize(matrix.n());
+      colnum_cache.resize(matrix.n());
+
+      // copy column indices and values and at the same time enquire about the
+      // length of the row
+      int ncols;
+      int ierr
+        = matrix.trilinos_matrix().ExtractGlobalRowCopy
+          (row, matrix.row_length(row), ncols,
+           &(value_cache[0]),
+           reinterpret_cast<TrilinosWrappers::types::int_type *>(&(colnum_cache[0])));
+      (void)ierr;
+      Assert (ierr==0, ExcTrilinosError(ierr));
+
+      // resize arrays to the size actually used
+      value_cache.resize(ncols);
+      colnum_cache.resize(ncols);
+
+      // then copy everything in one swoop
+      this->set(row,
+                colnum_cache,
+                value_cache);
+    }
+
+  return *this;
+}
+
+#endif
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::add (const number factor,
+                           const SparseMatrix<somenumber> &matrix)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert (cols == matrix.cols, ExcDifferentSparsityPatterns());
+
+  number             *val_ptr    = &val[0];
+  const somenumber   *matrix_ptr = &matrix.val[0];
+  const number *const end_ptr    = &val[cols->n_nonzero_elements()];
+
+  while (val_ptr != end_ptr)
+    *val_ptr++ += factor * number(*matrix_ptr++);
+}
+
+
+
+namespace internal
+{
+  namespace SparseMatrix
+  {
+    /**
+     * Perform a vmult using the SparseMatrix data structures, but only using
+     * a subinterval for the row indices.
+     *
+     * In the sequential case, this function is called on all rows, in the
+     * parallel case it may be called on a subrange, at the discretion of the
+     * task scheduler.
+     */
+    template <typename number,
+              typename InVector,
+              typename OutVector>
+    void vmult_on_subrange (const size_type    begin_row,
+                            const size_type    end_row,
+                            const number      *values,
+                            const std::size_t  *rowstart,
+                            const size_type   *colnums,
+                            const InVector    &src,
+                            OutVector         &dst,
+                            const bool         add)
+    {
+      const number    *val_ptr    = &values[rowstart[begin_row]];
+      const size_type *colnum_ptr = &colnums[rowstart[begin_row]];
+      typename OutVector::iterator dst_ptr = dst.begin() + begin_row;
+
+      if (add == false)
+        for (size_type row=begin_row; row<end_row; ++row)
+          {
+            typename OutVector::value_type s = 0.;
+            const number *const val_end_of_row = &values[rowstart[row+1]];
+            while (val_ptr != val_end_of_row)
+              s += typename OutVector::value_type(*val_ptr++) * typename OutVector::value_type(src(*colnum_ptr++));
+            *dst_ptr++ = s;
+          }
+      else
+        for (size_type row=begin_row; row<end_row; ++row)
+          {
+            typename OutVector::value_type s = *dst_ptr;
+            const number *const val_end_of_row = &values[rowstart[row+1]];
+            while (val_ptr != val_end_of_row)
+              s += typename OutVector::value_type(*val_ptr++) * typename OutVector::value_type(src(*colnum_ptr++));
+            *dst_ptr++ = s;
+          }
+    }
+  }
+}
+
+
+template <typename number>
+template <typename number2>
+void
+SparseMatrix<number>::add (const size_type  row,
+                           const size_type  n_cols,
+                           const size_type *col_indices,
+                           const number2   *values,
+                           const bool       elide_zero_values,
+                           const bool       col_indices_are_sorted)
+{
+  Assert (cols != 0, ExcNotInitialized());
+
+  // if we have sufficiently many columns
+  // and sorted indices it is faster to
+  // just go through the column indices and
+  // look whether we found one, rather than
+  // doing many binary searches
+  if (elide_zero_values == false && col_indices_are_sorted == true &&
+      n_cols > 3)
+    {
+      // check whether the given indices are
+      // really sorted
+#ifdef DEBUG
+      for (size_type i=1; i<n_cols; ++i)
+        Assert (col_indices[i] > col_indices[i-1],
+                ExcMessage("List of indices is unsorted or contains duplicates."));
+#endif
+
+      const size_type *this_cols =
+        &cols->colnums[cols->rowstart[row]];
+      const size_type row_length_1 = cols->row_length(row)-1;
+      number *val_ptr = &val[cols->rowstart[row]];
+
+      if (m() == n())
+        {
+
+          // find diagonal and add it if found
+          Assert (this_cols[0] == row, ExcInternalError());
+          const size_type *diag_pos =
+            Utilities::lower_bound (col_indices,
+                                    col_indices+n_cols,
+                                    row);
+          const size_type diag = diag_pos - col_indices;
+          size_type post_diag = diag;
+          if (diag != n_cols && *diag_pos == row)
+            {
+              val_ptr[0] += *(values+(diag_pos-col_indices));
+              ++post_diag;
+            }
+
+          // add indices before diagonal
+          size_type counter = 1;
+          for (size_type i=0; i<diag; ++i)
+            {
+              while (this_cols[counter]<col_indices[i] && counter<row_length_1)
+                ++counter;
+
+              Assert ((this_cols[counter] == col_indices[i])
+                      ||
+                      (values[i] == number2()),
+                      ExcInvalidIndex(row,col_indices[i]));
+
+              val_ptr[counter] += values[i];
+            }
+
+          // add indices after diagonal
+          for (size_type i=post_diag; i<n_cols; ++i)
+            {
+              while (this_cols[counter]<col_indices[i] && counter<row_length_1)
+                ++counter;
+
+              Assert ((this_cols[counter] == col_indices[i])
+                      ||
+                      (values[i] == number2()),
+                      ExcInvalidIndex(row,col_indices[i]));
+
+              val_ptr[counter] += values[i];
+            }
+
+          Assert (counter < cols->row_length(row),
+                  ExcMessage("Specified invalid column indices in add function."));
+        }
+      else
+        {
+          size_type counter = 0;
+          for (size_type i=0; i<n_cols; ++i)
+            {
+              while (this_cols[counter]<col_indices[i] && counter<row_length_1)
+                ++counter;
+
+              Assert ((this_cols[counter] == col_indices[i])
+                      ||
+                      (values[i] == number2()),
+                      ExcInvalidIndex(row,col_indices[i]));
+
+              val_ptr[counter] += values[i];
+            }
+          Assert (counter < cols->row_length(row),
+                  ExcMessage("Specified invalid column indices in add function."));
+        }
+      return;
+    }
+
+  // unsorted case: first, search all the
+  // indices to find out which values we
+  // actually need to add.
+  const size_type *const my_cols = cols->colnums;
+  size_type index = cols->rowstart[row];
+  const size_type next_row_index = cols->rowstart[row+1];
+
+  for (size_type j=0; j<n_cols; ++j)
+    {
+      const number value = number(values[j]);
+      AssertIsFinite(value);
+
+#ifdef DEBUG
+      if (elide_zero_values==true && value == number())
+        continue;
+#else
+      if (value == number())
+        continue;
+#endif
+
+      // check whether the next index to add is
+      // the next present index in the sparsity
+      // pattern (otherwise, do a binary
+      // search)
+      if (index < next_row_index && my_cols[index] == col_indices[j])
+        goto add_value;
+
+      index = cols->operator()(row, col_indices[j]);
+
+      // it is allowed to add elements to
+      // the matrix that are not part of
+      // the sparsity pattern, if the
+      // value we add is zero
+      if (index == SparsityPattern::invalid_entry)
+        {
+          Assert (value == number(), ExcInvalidIndex(row,col_indices[j]));
+          continue;
+        }
+
+add_value:
+      val[index] += value;
+      ++index;
+    }
+}
+
+
+
+template <typename number>
+template <typename number2>
+void
+SparseMatrix<number>::set (const size_type  row,
+                           const size_type  n_cols,
+                           const size_type *col_indices,
+                           const number2   *values,
+                           const bool       elide_zero_values)
+{
+  Assert (cols != 0, ExcNotInitialized());
+  AssertIndexRange(row, m());
+
+  // First, search all the indices to find
+  // out which values we actually need to
+  // set.
+  const size_type *my_cols = cols->colnums;
+  std::size_t index = cols->rowstart[row], next_index = index;
+  const std::size_t next_row_index = cols->rowstart[row+1];
+
+  if (elide_zero_values == true)
+    {
+      for (size_type j=0; j<n_cols; ++j)
+        {
+          const number value = number(values[j]);
+          AssertIsFinite(value);
+
+          if (value == number())
+            continue;
+
+          // check whether the next index to set is
+          // the next present index in the sparsity
+          // pattern (otherwise, do a binary
+          // search)
+          if (index != next_row_index && my_cols[index] == col_indices[j])
+            goto set_value;
+
+          next_index = cols->operator()(row, col_indices[j]);
+
+          // it is allowed to set elements in
+          // the matrix that are not part of
+          // the sparsity pattern, if the
+          // value to which we set it is zero
+          if (next_index == SparsityPattern::invalid_entry)
+            {
+              Assert (false, ExcInvalidIndex(row,col_indices[j]));
+              continue;
+            }
+          index = next_index;
+
+set_value:
+          val[index] = value;
+          ++index;
+        }
+    }
+  else
+    {
+      // same code as above, but now check for zeros
+      for (size_type j=0; j<n_cols; ++j)
+        {
+          const number value = number(values[j]);
+          AssertIsFinite(value);
+
+          if (index != next_row_index && my_cols[index] == col_indices[j])
+            goto set_value_checked;
+
+          next_index = cols->operator()(row, col_indices[j]);
+
+          if (next_index == SparsityPattern::invalid_entry)
+            {
+              Assert (value == number(), ExcInvalidIndex(row,col_indices[j]));
+              continue;
+            }
+          index = next_index;
+
+set_value_checked:
+          val[index] = value;
+          ++index;
+        }
+    }
+}
+
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+SparseMatrix<number>::vmult (OutVector &dst,
+                             const InVector &src) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  parallel::apply_to_subranges (0U, m(),
+                                std_cxx11::bind (&internal::SparseMatrix::vmult_on_subrange
+                                                 <number,InVector,OutVector>,
+                                                 std_cxx11::_1, std_cxx11::_2,
+                                                 val,
+                                                 cols->rowstart,
+                                                 cols->colnums,
+                                                 std_cxx11::cref(src),
+                                                 std_cxx11::ref(dst),
+                                                 false),
+                                internal::SparseMatrix::minimum_parallel_grain_size);
+}
+
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+SparseMatrix<number>::Tvmult (OutVector &dst,
+                              const InVector &src) const
+{
+  Assert (val != 0, ExcNotInitialized());
+  Assert (cols != 0, ExcNotInitialized());
+  Assert(n() == dst.size(), ExcDimensionMismatch(n(),dst.size()));
+  Assert(m() == src.size(), ExcDimensionMismatch(m(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  dst = 0;
+
+  for (size_type i=0; i<m(); i++)
+    {
+      for (size_type j=cols->rowstart[i]; j<cols->rowstart[i+1] ; j++)
+        {
+          const size_type p = cols->colnums[j];
+          dst(p) += typename OutVector::value_type(val[j]) * typename OutVector::value_type(src(i));
+        }
+    }
+}
+
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+SparseMatrix<number>::vmult_add (OutVector &dst,
+                                 const InVector &src) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  parallel::apply_to_subranges (0U, m(),
+                                std_cxx11::bind (&internal::SparseMatrix::vmult_on_subrange
+                                                 <number,InVector,OutVector>,
+                                                 std_cxx11::_1, std_cxx11::_2,
+                                                 val,
+                                                 cols->rowstart,
+                                                 cols->colnums,
+                                                 std_cxx11::cref(src),
+                                                 std_cxx11::ref(dst),
+                                                 true),
+                                internal::SparseMatrix::minimum_parallel_grain_size);
+}
+
+
+
+template <typename number>
+template <class OutVector, class InVector>
+void
+SparseMatrix<number>::Tvmult_add (OutVector &dst,
+                                  const InVector &src) const
+{
+  Assert (val != 0, ExcNotInitialized());
+  Assert (cols != 0, ExcNotInitialized());
+  Assert(n() == dst.size(), ExcDimensionMismatch(n(),dst.size()));
+  Assert(m() == src.size(), ExcDimensionMismatch(m(),src.size()));
+
+  Assert (!PointerComparison::equal(&src, &dst), ExcSourceEqualsDestination());
+
+  for (size_type i=0; i<m(); i++)
+    for (size_type j=cols->rowstart[i]; j<cols->rowstart[i+1] ; j++)
+      {
+        const size_type p = cols->colnums[j];
+        dst(p) += typename OutVector::value_type(val[j]) * typename OutVector::value_type(src(i));
+      }
+}
+
+
+namespace internal
+{
+  namespace SparseMatrix
+  {
+    /**
+     * Perform a vmult using the SparseMatrix data structures, but only using
+     * a subinterval for the row indices.
+     *
+     * In the sequential case, this function is called on all rows, in the
+     * parallel case it may be called on a subrange, at the discretion of the
+     * task scheduler.
+     */
+    template <typename number,
+              typename InVector>
+    typename InVector::value_type
+    matrix_norm_sqr_on_subrange (const size_type    begin_row,
+                                 const size_type    end_row,
+                                 const number      *values,
+                                 const std::size_t  *rowstart,
+                                 const size_type   *colnums,
+                                 const InVector    &v)
+    {
+      typename InVector::value_type norm_sqr=0.;
+
+      for (size_type i=begin_row; i<end_row; ++i)
+        {
+          typename InVector::value_type s = 0;
+          for (size_type j=rowstart[i]; j<rowstart[i+1] ; j++)
+            s += typename InVector::value_type(values[j]) * v(colnums[j]);
+          norm_sqr += v(i) * numbers::NumberTraits<typename InVector::value_type>::conjugate(s);
+        }
+      return norm_sqr;
+    }
+  }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+somenumber
+SparseMatrix<number>::matrix_norm_square (const Vector<somenumber> &v) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert(n() == v.size(), ExcDimensionMismatch(n(),v.size()));
+
+  return
+    parallel::accumulate_from_subranges<somenumber>
+    (std_cxx11::bind (&internal::SparseMatrix::matrix_norm_sqr_on_subrange
+                      <number,Vector<somenumber> >,
+                      std_cxx11::_1, std_cxx11::_2,
+                      val, cols->rowstart, cols->colnums,
+                      std_cxx11::cref(v)),
+     0, m(),
+     internal::SparseMatrix::minimum_parallel_grain_size);
+}
+
+
+
+namespace internal
+{
+  namespace SparseMatrix
+  {
+    /**
+     * Perform a vmult using the SparseMatrix data structures, but only using
+     * a subinterval for the row indices.
+     *
+     * In the sequential case, this function is called on all rows, in the
+     * parallel case it may be called on a subrange, at the discretion of the
+     * task scheduler.
+     */
+    template <typename number,
+              typename InVector>
+    typename InVector::value_type
+    matrix_scalar_product_on_subrange (const size_type    begin_row,
+                                       const size_type    end_row,
+                                       const number      *values,
+                                       const std::size_t  *rowstart,
+                                       const size_type   *colnums,
+                                       const InVector    &u,
+                                       const InVector    &v)
+    {
+      typename InVector::value_type norm_sqr=0.;
+
+      for (size_type i=begin_row; i<end_row; ++i)
+        {
+          typename InVector::value_type s = 0;
+          for (size_type j=rowstart[i]; j<rowstart[i+1] ; j++)
+            s += typename InVector::value_type(values[j]) * v(colnums[j]);
+          norm_sqr += u(i) * numbers::NumberTraits<typename InVector::value_type>::conjugate(s);
+        }
+      return norm_sqr;
+    }
+  }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+somenumber
+SparseMatrix<number>::matrix_scalar_product (const Vector<somenumber> &u,
+                                             const Vector<somenumber> &v) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == u.size(), ExcDimensionMismatch(m(),u.size()));
+  Assert(n() == v.size(), ExcDimensionMismatch(n(),v.size()));
+
+  return
+    parallel::accumulate_from_subranges<somenumber>
+    (std_cxx11::bind (&internal::SparseMatrix::matrix_scalar_product_on_subrange
+                      <number,Vector<somenumber> >,
+                      std_cxx11::_1, std_cxx11::_2,
+                      val, cols->rowstart, cols->colnums,
+                      std_cxx11::cref(u),
+                      std_cxx11::cref(v)),
+     0, m(),
+     internal::SparseMatrix::minimum_parallel_grain_size);
+}
+
+
+
+template <typename number>
+template <typename numberB, typename numberC>
+void
+SparseMatrix<number>::mmult (SparseMatrix<numberC>       &C,
+                             const SparseMatrix<numberB> &B,
+                             const Vector<number>        &V,
+                             const bool                   rebuild_sparsity_C) const
+{
+  const bool use_vector = V.size() == n() ? true : false;
+  Assert (n() == B.m(), ExcDimensionMismatch(n(), B.m()));
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (B.cols != 0, ExcNotInitialized());
+  Assert (C.cols != 0, ExcNotInitialized());
+
+  const SparsityPattern &sp_A = *cols;
+  const SparsityPattern &sp_B = *B.cols;
+
+  // clear previous content of C
+  if  (rebuild_sparsity_C == true)
+    {
+      // we are about to change the sparsity pattern of C. this can not work
+      // if either A or B use the same sparsity pattern
+      Assert (&C.get_sparsity_pattern() != &this->get_sparsity_pattern(),
+              ExcMessage ("Can't use the same sparsity pattern for "
+                          "different matrices if it is to be rebuilt."));
+      Assert (&C.get_sparsity_pattern() != &B.get_sparsity_pattern(),
+              ExcMessage ("Can't use the same sparsity pattern for "
+                          "different matrices if it is to be rebuilt."));
+
+      // need to change the sparsity pattern of C, so cast away const-ness.
+      SparsityPattern &sp_C =
+        *(const_cast<SparsityPattern *>(&C.get_sparsity_pattern()));
+      C.clear();
+      sp_C.reinit (0,0,0);
+
+      // create a sparsity pattern for the matrix. we will go through all the
+      // rows in the matrix A, and for each column in a row we add the whole
+      // row of matrix B with that row number. This means that we will insert
+      // a lot of entries to each row, which is best handled by the
+      // DynamicSparsityPattern class.
+      {
+        DynamicSparsityPattern dsp (m(), B.n());
+        for (size_type i = 0; i < dsp.n_rows(); ++i)
+          {
+            const size_type *rows = &sp_A.colnums[sp_A.rowstart[i]];
+            const size_type *const end_rows =
+              &sp_A.colnums[sp_A.rowstart[i+1]];
+            for (; rows != end_rows; ++rows)
+              {
+                const size_type col = *rows;
+                size_type *new_cols = const_cast<size_type *>
+                                      (&sp_B.colnums[sp_B.rowstart[col]]);
+                size_type *end_new_cols = const_cast<size_type *>
+                                          (&sp_B.colnums[sp_B.rowstart[col+1]]);
+
+                // if B has a diagonal, need to add that manually. this way,
+                // we maintain sortedness.
+                if (sp_B.n_rows() == sp_B.n_cols())
+                  {
+                    ++new_cols;
+                    dsp.add(i, col);
+                  }
+
+                dsp.add_entries (i, new_cols, end_new_cols, true);
+              }
+          }
+        sp_C.copy_from (dsp);
+      }
+
+      // reinit matrix C from that information
+      C.reinit (sp_C);
+    }
+
+  Assert (C.m() == m(), ExcDimensionMismatch(C.m(), m()));
+  Assert (C.n() == B.n(), ExcDimensionMismatch(C.n(), B.n()));
+
+  // create an array that caches some
+  // elements that are going to be written
+  // into the new matrix.
+  unsigned int max_n_cols_B = 0;
+  for (size_type i=0; i<B.m(); ++i)
+    max_n_cols_B = std::max (max_n_cols_B, sp_B.row_length(i));
+  std::vector<numberC> new_entries(max_n_cols_B);
+
+  // now compute the actual entries: a matrix-matrix product involves three
+  // nested loops. One over the rows of A, for each row we then loop over all
+  // the columns, and then we need to multiply each element with all the
+  // elements in that row in B.
+  for (size_type i=0; i<C.m(); ++i)
+    {
+      const size_type *rows = &sp_A.colnums[sp_A.rowstart[i]];
+      const size_type *const end_rows = &sp_A.colnums[sp_A.rowstart[i+1]];
+      for (; rows != end_rows; ++rows)
+        {
+          const number A_val = val[rows-&sp_A.colnums[sp_A.rowstart[0]]];
+          const size_type col = *rows;
+          const size_type *new_cols =
+            (&sp_B.colnums[sp_B.rowstart[col]]);
+
+          // special treatment for diagonal
+          if (sp_B.n_rows() == sp_B.n_cols())
+            {
+              C.add (i, *new_cols,
+                     numberC(A_val) * numberC(B.val[new_cols-&sp_B.colnums[sp_B.rowstart[0]]]) *
+                     numberC(use_vector ? V(col) : 1));
+              ++new_cols;
+            }
+
+          // now the innermost loop that goes over all the elements in row
+          // 'col' of matrix B. Cache the elements, and then write them into C
+          // at once
+          numberC *new_ptr = &new_entries[0];
+          const numberB *B_val_ptr =
+            &B.val[new_cols-&sp_B.colnums[sp_B.rowstart[0]]];
+          const numberB *const end_cols = &B.val[sp_B.rowstart[col+1]];
+          for (; B_val_ptr != end_cols; ++B_val_ptr)
+            *new_ptr++ = numberC(A_val) * numberC(*B_val_ptr) * numberC(use_vector ? V(col) : 1);
+
+          C.add (i, new_ptr-&new_entries[0], new_cols, &new_entries[0],
+                 false, true);
+        }
+    }
+}
+
+
+
+
+template <typename number>
+template <typename numberB, typename numberC>
+void
+SparseMatrix<number>::Tmmult (SparseMatrix<numberC>       &C,
+                              const SparseMatrix<numberB> &B,
+                              const Vector<number>        &V,
+                              const bool                   rebuild_sparsity_C) const
+{
+  const bool use_vector = V.size() == m() ? true : false;
+  Assert (m() == B.m(), ExcDimensionMismatch(m(), B.m()));
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (B.cols != 0, ExcNotInitialized());
+  Assert (C.cols != 0, ExcNotInitialized());
+
+  const SparsityPattern &sp_A = *cols;
+  const SparsityPattern &sp_B = *B.cols;
+
+  // clear previous content of C
+  if  (rebuild_sparsity_C == true)
+    {
+      // we are about to change the sparsity pattern of C. this can not work
+      // if either A or B use the same sparsity pattern
+      Assert (&C.get_sparsity_pattern() != &this->get_sparsity_pattern(),
+              ExcMessage ("Can't use the same sparsity pattern for "
+                          "different matrices if it is to be rebuilt."));
+      Assert (&C.get_sparsity_pattern() != &B.get_sparsity_pattern(),
+              ExcMessage ("Can't use the same sparsity pattern for "
+                          "different matrices if it is to be rebuilt."));
+
+      // need to change the sparsity pattern of C, so cast away const-ness.
+      SparsityPattern &sp_C =
+        *(const_cast<SparsityPattern *>(&C.get_sparsity_pattern()));
+      C.clear();
+      sp_C.reinit (0,0,0);
+
+      // create a sparsity pattern for the matrix. we will go through all the
+      // rows in the matrix A, and for each column in a row we add the whole
+      // row of matrix B with that row number. This means that we will insert
+      // a lot of entries to each row, which is best handled by the
+      // DynamicSparsityPattern class.
+      {
+        DynamicSparsityPattern dsp (n(), B.n());
+        for (size_type i = 0; i < sp_A.n_rows(); ++i)
+          {
+            const size_type *rows =
+              &sp_A.colnums[sp_A.rowstart[i]];
+            const size_type *const end_rows =
+              &sp_A.colnums[sp_A.rowstart[i+1]];
+            // cast away constness to conform with dsp.add_entries interface
+            size_type *new_cols = const_cast<size_type *>
+                                  (&sp_B.colnums[sp_B.rowstart[i]]);
+            size_type *end_new_cols = const_cast<size_type *>
+                                      (&sp_B.colnums[sp_B.rowstart[i+1]]);
+
+            if (sp_B.n_rows() == sp_B.n_cols())
+              ++new_cols;
+
+            for (; rows != end_rows; ++rows)
+              {
+                const size_type row = *rows;
+
+                // if B has a diagonal, need to add that manually. this way,
+                // we maintain sortedness.
+                if (sp_B.n_rows() == sp_B.n_cols())
+                  dsp.add(row, i);
+
+                dsp.add_entries (row, new_cols, end_new_cols, true);
+              }
+          }
+        sp_C.copy_from (dsp);
+      }
+
+      // reinit matrix C from that information
+      C.reinit (sp_C);
+    }
+
+  Assert (C.m() == n(), ExcDimensionMismatch(C.m(), n()));
+  Assert (C.n() == B.n(), ExcDimensionMismatch(C.n(), B.n()));
+
+  // create an array that caches some
+  // elements that are going to be written
+  // into the new matrix.
+  unsigned int max_n_cols_B = 0;
+  for (size_type i=0; i<B.m(); ++i)
+    max_n_cols_B = std::max (max_n_cols_B, sp_B.row_length(i));
+  std::vector<numberC> new_entries(max_n_cols_B);
+
+  // now compute the actual entries: a matrix-matrix product involves three
+  // nested loops. One over the rows of A, for each row we then loop over all
+  // the columns, and then we need to multiply each element with all the
+  // elements in that row in B.
+  for (size_type i=0; i<m(); ++i)
+    {
+      const size_type *rows = &sp_A.colnums[sp_A.rowstart[i]];
+      const size_type *const end_rows = &sp_A.colnums[sp_A.rowstart[i+1]];
+      const size_type *new_cols = &sp_B.colnums[sp_B.rowstart[i]];
+      if (sp_B.n_rows() == sp_B.n_cols())
+        ++new_cols;
+
+      const numberB *const end_cols = &B.val[sp_B.rowstart[i+1]];
+
+      for (; rows != end_rows; ++rows)
+        {
+          const size_type row = *rows;
+          const number A_val = val[rows-&sp_A.colnums[sp_A.rowstart[0]]];
+
+          // special treatment for diagonal
+          if (sp_B.n_rows () == sp_B.n_cols())
+            C.add (row, i,
+                   numberC(A_val) * numberC(B.val[new_cols-1-&sp_B.colnums[sp_B.rowstart[0]]]) *
+                   numberC(use_vector ? V(i) : 1));
+
+          // now the innermost loop that goes over all the elements in row
+          // 'col' of matrix B. Cache the elements, and then write them into C
+          // at once
+          numberC *new_ptr = &new_entries[0];
+          const numberB *B_val_ptr =
+            &B.val[new_cols-&sp_B.colnums[sp_B.rowstart[0]]];
+          for (; B_val_ptr != end_cols; ++B_val_ptr)
+            *new_ptr++ = numberC(A_val) * numberC(*B_val_ptr) * numberC(use_vector ? V(i) : 1);
+
+          C.add (row, new_ptr-&new_entries[0], new_cols, &new_entries[0],
+                 false, true);
+        }
+    }
+}
+
+
+
+template <typename number>
+typename SparseMatrix<number>::real_type
+SparseMatrix<number>::l1_norm () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  Vector<real_type> column_sums(n());
+  const size_type n_rows = m();
+  for (size_type row=0; row<n_rows; ++row)
+    for (size_type j=cols->rowstart[row]; j<cols->rowstart[row+1] ; ++j)
+      column_sums(cols->colnums[j]) += numbers::NumberTraits<number>::abs(val[j]);
+
+  return column_sums.linfty_norm();
+}
+
+
+
+template <typename number>
+typename SparseMatrix<number>::real_type
+SparseMatrix<number>::linfty_norm () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  const number *val_ptr = &val[cols->rowstart[0]];
+
+  real_type max=0;
+  const size_type n_rows = m();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      real_type sum = 0;
+      const number *const val_end_of_row = &val[cols->rowstart[row+1]];
+      while (val_ptr != val_end_of_row)
+        sum += numbers::NumberTraits<number>::abs(*val_ptr++);
+      if (sum > max)
+        max = sum;
+    }
+  return max;
+}
+
+
+
+template <typename number>
+typename SparseMatrix<number>::real_type
+SparseMatrix<number>::frobenius_norm () const
+{
+  // simply add up all entries in the
+  // sparsity pattern, without taking any
+  // reference to rows or columns
+  real_type norm_sqr = 0;
+  const size_type n_rows = m();
+  for (const number *ptr = &val[0];
+       ptr != &val[cols->rowstart[n_rows]]; ++ptr)
+    norm_sqr +=  numbers::NumberTraits<number>::abs_square(*ptr);
+
+  return std::sqrt (norm_sqr);
+}
+
+
+
+namespace internal
+{
+  namespace SparseMatrix
+  {
+    /**
+     * Perform a vmult using the SparseMatrix data structures, but only using
+     * a subinterval for the row indices.
+     *
+     * In the sequential case, this function is called on all rows, in the
+     * parallel case it may be called on a subrange, at the discretion of the
+     * task scheduler.
+     */
+    template <typename number,
+              typename InVector,
+              typename OutVector>
+    typename OutVector::value_type
+    residual_sqr_on_subrange (const size_type    begin_row,
+                              const size_type    end_row,
+                              const number      *values,
+                              const std::size_t  *rowstart,
+                              const size_type   *colnums,
+                              const InVector    &u,
+                              const InVector    &b,
+                              OutVector         &dst)
+    {
+      typename OutVector::value_type norm_sqr=0.;
+
+      for (size_type i=begin_row; i<end_row; ++i)
+        {
+          typename OutVector::value_type s = b(i);
+          for (size_type j=rowstart[i]; j<rowstart[i+1] ; j++)
+            s -= typename OutVector::value_type(values[j]) * u(colnums[j]);
+          dst(i) = s;
+          norm_sqr += s * numbers::NumberTraits<typename OutVector::value_type>::conjugate(s);
+        }
+      return norm_sqr;
+    }
+  }
+}
+
+
+template <typename number>
+template <typename somenumber>
+somenumber
+SparseMatrix<number>::residual (Vector<somenumber>       &dst,
+                                const Vector<somenumber> &u,
+                                const Vector<somenumber> &b) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(m() == b.size(), ExcDimensionMismatch(m(),b.size()));
+  Assert(n() == u.size(), ExcDimensionMismatch(n(),u.size()));
+
+  Assert (&u != &dst, ExcSourceEqualsDestination());
+
+  return
+    std::sqrt (parallel::accumulate_from_subranges<somenumber>
+               (std_cxx11::bind (&internal::SparseMatrix::residual_sqr_on_subrange
+                                 <number,Vector<somenumber>,Vector<somenumber> >,
+                                 std_cxx11::_1, std_cxx11::_2,
+                                 val, cols->rowstart, cols->colnums,
+                                 std_cxx11::cref(u),
+                                 std_cxx11::cref(b),
+                                 std_cxx11::ref(dst)),
+                0, m(),
+                internal::SparseMatrix::minimum_parallel_grain_size));
+}
+
+
+namespace
+{
+  // assert that the matrix has no zeros on the diagonal. this is important
+  // for preconditioners such as Jacobi or SSOR
+  template <typename number>
+  void AssertNoZerosOnDiagonal (const SparseMatrix<number> &matrix)
+  {
+#ifdef DEBUG
+    for (typename SparseMatrix<number>::size_type row=0; row<matrix.m(); ++row)
+      Assert(matrix.diag_element(row) != number(),
+             ExcMessage("There is a zero on the diagonal of this matrix "
+                        "in row "
+                        +
+                        Utilities::to_string(row)
+                        +
+                        ". The preconditioner you selected cannot work if that "
+                        "is the case because one of its steps requires "
+                        "division by the diagonal elements of the matrix."
+                        "\n\n"
+                        "You should check whether you have correctly "
+                        "assembled the matrix that you use for this "
+                        "preconditioner. If it is correct that there are "
+                        "zeros on the diagonal, then you will have to chose "
+                        "a different preconditioner."));
+#else
+    (void)matrix;
+#endif
+  }
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::precondition_Jacobi (Vector<somenumber>       &dst,
+                                           const Vector<somenumber> &src,
+                                           const number              om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+  AssertDimension (dst.size(), n());
+  AssertDimension (src.size(), n());
+
+  AssertNoZerosOnDiagonal(*this);
+
+  const size_type n = src.size();
+  somenumber            *dst_ptr = dst.begin();
+  const somenumber      *src_ptr = src.begin();
+  const std::size_t  *rowstart_ptr = &cols->rowstart[0];
+
+  // optimize the following loop for
+  // the case that the relaxation
+  // factor is one. In that case, we
+  // can save one FP multiplication
+  // per row
+  //
+  // note that for square matrices,
+  // the diagonal entry is the first
+  // in each row, i.e. at index
+  // rowstart[i]. and we do have a
+  // square matrix by above assertion
+  if (om != number(1.))
+    for (size_type i=0; i<n; ++i, ++dst_ptr, ++src_ptr, ++rowstart_ptr)
+      *dst_ptr = somenumber(om) **src_ptr / somenumber(val[*rowstart_ptr]);
+  else
+    for (size_type i=0; i<n; ++i, ++dst_ptr, ++src_ptr, ++rowstart_ptr)
+      *dst_ptr = *src_ptr / somenumber(val[*rowstart_ptr]);
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::precondition_SSOR (Vector<somenumber>              &dst,
+                                         const Vector<somenumber>        &src,
+                                         const number                     om,
+                                         const std::vector<std::size_t>  &pos_right_of_diagonal) const
+{
+  // to understand how this function works
+  // you may want to take a look at the CVS
+  // archives to see the original version
+  // which is much clearer...
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+  AssertDimension (dst.size(), n());
+  AssertDimension (src.size(), n());
+
+  AssertNoZerosOnDiagonal(*this);
+
+  const size_type    n            = src.size();
+  const std::size_t *rowstart_ptr = &cols->rowstart[0];
+  somenumber        *dst_ptr      = &dst(0);
+
+  // case when we have stored the position
+  // just right of the diagonal (then we
+  // don't have to search for it).
+  if (pos_right_of_diagonal.size() != 0)
+    {
+      Assert (pos_right_of_diagonal.size() == dst.size(),
+              ExcDimensionMismatch (pos_right_of_diagonal.size(), dst.size()));
+
+      // forward sweep
+      for (size_type row=0; row<n; ++row, ++dst_ptr, ++rowstart_ptr)
+        {
+          *dst_ptr = src(row);
+          const std::size_t first_right_of_diagonal_index =
+            pos_right_of_diagonal[row];
+          Assert (first_right_of_diagonal_index <= *(rowstart_ptr+1),
+                  ExcInternalError());
+          number s = 0;
+          for (size_type j=(*rowstart_ptr)+1; j<first_right_of_diagonal_index; ++j)
+            s += val[j] * number(dst(cols->colnums[j]));
+
+          // divide by diagonal element
+          *dst_ptr -= s * om;
+          *dst_ptr /= val[*rowstart_ptr];
+        }
+
+      rowstart_ptr = &cols->rowstart[0];
+      dst_ptr      = &dst(0);
+      for ( ; rowstart_ptr!=&cols->rowstart[n]; ++rowstart_ptr, ++dst_ptr)
+        *dst_ptr *= somenumber(om*(number(2.)-om)) * somenumber(val[*rowstart_ptr]);
+
+      // backward sweep
+      rowstart_ptr = &cols->rowstart[n-1];
+      dst_ptr      = &dst(n-1);
+      for (int row=n-1; row>=0; --row, --rowstart_ptr, --dst_ptr)
+        {
+          const size_type end_row = *(rowstart_ptr+1);
+          const size_type first_right_of_diagonal_index
+            = pos_right_of_diagonal[row];
+          number s = 0;
+          for (size_type j=first_right_of_diagonal_index; j<end_row; ++j)
+            s += val[j] * number(dst(cols->colnums[j]));
+
+          *dst_ptr -= s * om;
+          *dst_ptr /= val[*rowstart_ptr];
+        };
+      return;
+    }
+
+  // case when we need to get the position
+  // of the first element right of the
+  // diagonal manually for each sweep.
+  // forward sweep
+  for (size_type row=0; row<n; ++row, ++dst_ptr, ++rowstart_ptr)
+    {
+      *dst_ptr = src(row);
+      // find the first element in this line
+      // which is on the right of the diagonal.
+      // we need to precondition with the
+      // elements on the left only.
+      // note: the first entry in each
+      // line denotes the diagonal element,
+      // which we need not check.
+      const size_type first_right_of_diagonal_index
+        = (Utilities::lower_bound (&cols->colnums[*rowstart_ptr+1],
+                                   &cols->colnums[*(rowstart_ptr+1)],
+                                   row)
+           -
+           &cols->colnums[0]);
+
+      number s = 0;
+      for (size_type j=(*rowstart_ptr)+1; j<first_right_of_diagonal_index; ++j)
+        s += val[j] * number(dst(cols->colnums[j]));
+
+      // divide by diagonal element
+      *dst_ptr -= s * om;
+      Assert(val[*rowstart_ptr] != number(), ExcDivideByZero());
+      *dst_ptr /= val[*rowstart_ptr];
+    };
+
+  rowstart_ptr = &cols->rowstart[0];
+  dst_ptr      = &dst(0);
+  for (size_type row=0; row<n; ++row, ++rowstart_ptr, ++dst_ptr)
+    *dst_ptr *= somenumber((number(2.)-om)) * somenumber(val[*rowstart_ptr]);
+
+  // backward sweep
+  rowstart_ptr = &cols->rowstart[n-1];
+  dst_ptr      = &dst(n-1);
+  for (int row=n-1; row>=0; --row, --rowstart_ptr, --dst_ptr)
+    {
+      const size_type end_row = *(rowstart_ptr+1);
+      const size_type first_right_of_diagonal_index
+        = (Utilities::lower_bound (&cols->colnums[*rowstart_ptr+1],
+                                   &cols->colnums[end_row],
+                                   static_cast<size_type>(row)) -
+           &cols->colnums[0]);
+      number s = 0;
+      for (size_type j=first_right_of_diagonal_index; j<end_row; ++j)
+        s += val[j] * number(dst(cols->colnums[j]));
+      *dst_ptr -= s * om;
+      Assert(val[*rowstart_ptr] != number(), ExcDivideByZero());
+      *dst_ptr /= val[*rowstart_ptr];
+    };
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::precondition_SOR (Vector<somenumber> &dst,
+                                        const Vector<somenumber> &src,
+                                        const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  dst = src;
+  SOR(dst,om);
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::precondition_TSOR (Vector<somenumber> &dst,
+                                         const Vector<somenumber> &src,
+                                         const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  dst = src;
+  TSOR(dst,om);
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::SOR (Vector<somenumber> &dst,
+                           const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+  AssertDimension (dst.size(), n());
+
+  AssertNoZerosOnDiagonal(*this);
+
+  for (size_type row=0; row<m(); ++row)
+    {
+      somenumber s = dst(row);
+      for (size_type j=cols->rowstart[row]; j<cols->rowstart[row+1]; ++j)
+        {
+          const size_type col = cols->colnums[j];
+          if (col < row)
+            s -= somenumber(val[j]) * dst(col);
+        }
+
+      dst(row) = s * somenumber(om) / somenumber(val[cols->rowstart[row]]);
+    }
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::TSOR (Vector<somenumber> &dst,
+                            const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+  AssertDimension (dst.size(), n());
+
+  AssertNoZerosOnDiagonal(*this);
+
+  size_type row=m()-1;
+  while (true)
+    {
+      somenumber s = dst(row);
+      for (size_type j=cols->rowstart[row]; j<cols->rowstart[row+1]; ++j)
+        if (cols->colnums[j] > row)
+          s -= somenumber(val[j]) * dst(cols->colnums[j]);
+
+      dst(row) = s * somenumber(om) / somenumber(val[cols->rowstart[row]]);
+
+      if (row == 0)
+        break;
+
+      --row;
+    }
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::PSOR (Vector<somenumber> &dst,
+                            const std::vector<size_type> &permutation,
+                            const std::vector<size_type> &inverse_permutation,
+                            const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert (m() == permutation.size(),
+          ExcDimensionMismatch(m(), permutation.size()));
+  Assert (m() == inverse_permutation.size(),
+          ExcDimensionMismatch(m(), inverse_permutation.size()));
+
+  AssertNoZerosOnDiagonal(*this);
+
+  for (size_type urow=0; urow<m(); ++urow)
+    {
+      const size_type row = permutation[urow];
+      somenumber s = dst(row);
+
+      for (size_type j=cols->rowstart[row]; j<cols->rowstart[row+1]; ++j)
+        {
+          const size_type col = cols->colnums[j];
+          if (inverse_permutation[col] < urow)
+            {
+              s -= somenumber(val[j]) * dst(col);
+            }
+        }
+
+      dst(row) = s * somenumber(om) / somenumber(val[cols->rowstart[row]]);
+    }
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::TPSOR (Vector<somenumber> &dst,
+                             const std::vector<size_type> &permutation,
+                             const std::vector<size_type> &inverse_permutation,
+                             const number om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert (m() == permutation.size(),
+          ExcDimensionMismatch(m(), permutation.size()));
+  Assert (m() == inverse_permutation.size(),
+          ExcDimensionMismatch(m(), inverse_permutation.size()));
+
+  AssertNoZerosOnDiagonal(*this);
+
+  for (size_type urow=m(); urow != 0;)
+    {
+      --urow;
+      const size_type row = permutation[urow];
+      somenumber s = dst(row);
+      for (size_type j=cols->rowstart[row]; j<cols->rowstart[row+1]; ++j)
+        {
+          const size_type col = cols->colnums[j];
+          if (inverse_permutation[col] > urow)
+            s -= somenumber(val[j]) * dst(col);
+        }
+
+      dst(row) = s * somenumber(om) / somenumber(val[cols->rowstart[row]]);
+    }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::Jacobi_step (Vector<somenumber> &v,
+                                   const Vector<somenumber> &b,
+                                   const number        om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+
+  Assert (m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert (m() == b.size(), ExcDimensionMismatch(m(),b.size()));
+
+  GrowingVectorMemory<Vector<somenumber> > mem;
+  typename VectorMemory<Vector<somenumber> >::Pointer w(mem);
+  w->reinit(v);
+
+  if (!v.all_zero())
+    {
+      vmult (*w, v);
+      *w -= b;
+    }
+  else
+    w->equ (-1.,b);
+  precondition_Jacobi (*w, *w, om);
+  v -= *w;
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::SOR_step (Vector<somenumber> &v,
+                                const Vector<somenumber> &b,
+                                const number        om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+  Assert (m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert (m() == b.size(), ExcDimensionMismatch(m(),b.size()));
+
+  AssertNoZerosOnDiagonal(*this);
+
+  for (size_type row=0; row<m(); ++row)
+    {
+      somenumber s = b(row);
+      for (size_type j=cols->rowstart[row]; j<cols->rowstart[row+1]; ++j)
+        {
+          s -= somenumber(val[j]) * v(cols->colnums[j]);
+        }
+      v(row) += s * somenumber(om) / somenumber(val[cols->rowstart[row]]);
+    }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::TSOR_step (Vector<somenumber> &v,
+                                 const Vector<somenumber> &b,
+                                 const number        om) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+  Assert (m() == v.size(), ExcDimensionMismatch(m(),v.size()));
+  Assert (m() == b.size(), ExcDimensionMismatch(m(),b.size()));
+
+  AssertNoZerosOnDiagonal(*this);
+
+  for (int row=m()-1; row>=0; --row)
+    {
+      somenumber s = b(row);
+      for (size_type j=cols->rowstart[row]; j<cols->rowstart[row+1]; ++j)
+        {
+          s -= somenumber(val[j]) * v(cols->colnums[j]);
+        }
+      v(row) += s * somenumber(om) / somenumber(val[cols->rowstart[row]]);
+    }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::SSOR_step (Vector<somenumber> &v,
+                                 const Vector<somenumber> &b,
+                                 const number        om) const
+{
+  SOR_step(v,b,om);
+  TSOR_step(v,b,om);
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrix<number>::SSOR (Vector<somenumber> &dst,
+                            const number om) const
+{
+//TODO: Is this called anywhere? If so, multiplication with om(2-om)D is missing
+  Assert(false, ExcNotImplemented());
+
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+  AssertDimension (m(), n());
+  Assert (m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+
+  AssertNoZerosOnDiagonal(*this);
+
+  const size_type n = dst.size();
+  size_type j;
+  somenumber s;
+
+  for (size_type i=0; i<n; i++)
+    {
+      s = 0.;
+      for (j=cols->rowstart[i]; j<cols->rowstart[i+1] ; j++)
+        {
+          const size_type p = cols->colnums[j];
+          if (p != SparsityPattern::invalid_entry)
+            {
+              if (i>j)
+                s += somenumber(val[j]) * dst(p);
+            }
+        }
+      dst(i) -= s * somenumber(om);
+      dst(i) /= somenumber(val[cols->rowstart[i]]);
+    }
+
+  for (int i=n-1; i>=0; i--)  // this time, i is signed, but always positive!
+    {
+      s = 0.;
+      for (j=cols->rowstart[i]; j<cols->rowstart[i+1] ; j++)
+        {
+          const size_type p = cols->colnums[j];
+          if (p != SparsityPattern::invalid_entry)
+            {
+              if (static_cast<size_type>(i) < j)
+                s += somenumber(val[j]) * dst(p);
+            }
+        }
+      dst(i) -= s * somenumber(om) / somenumber(val[cols->rowstart[i]]);
+    }
+}
+
+
+
+template <typename number>
+const SparsityPattern &
+SparseMatrix<number>::get_sparsity_pattern () const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  return *cols;
+}
+
+
+
+template <typename number>
+void SparseMatrix<number>::print_formatted (std::ostream &out,
+                                            const unsigned int precision,
+                                            const bool scientific,
+                                            const unsigned int width_,
+                                            const char *zero_string,
+                                            const double denominator) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  unsigned int width = width_;
+
+  std::ios::fmtflags old_flags = out.flags();
+  unsigned int old_precision = out.precision (precision);
+
+  if (scientific)
+    {
+      out.setf (std::ios::scientific, std::ios::floatfield);
+      if (!width)
+        width = precision+7;
+    }
+  else
+    {
+      out.setf (std::ios::fixed, std::ios::floatfield);
+      if (!width)
+        width = precision+2;
+    }
+
+  for (size_type i=0; i<m(); ++i)
+    {
+      for (size_type j=0; j<n(); ++j)
+        if ((*cols)(i,j) != SparsityPattern::invalid_entry)
+          out << std::setw(width)
+              << val[cols->operator()(i,j)] * number(denominator) << ' ';
+        else
+          out << std::setw(width) << zero_string << ' ';
+      out << std::endl;
+    };
+  AssertThrow (out, ExcIO());
+
+  // reset output format
+  out.precision(old_precision);
+  out.flags (old_flags);
+}
+
+
+
+template <typename number>
+void SparseMatrix<number>::print_pattern (std::ostream &out,
+                                          const double threshold) const
+{
+  Assert (cols != 0, ExcNotInitialized());
+  Assert (val != 0, ExcNotInitialized());
+
+  for (size_type i=0; i<m(); ++i)
+    {
+      for (size_type j=0; j<n(); ++j)
+        if ((*cols)(i,j) == SparsityPattern::invalid_entry)
+          out << '.';
+        else if (std::abs(val[cols->operator()(i,j)]) > threshold)
+          out << '*';
+        else
+          out << ':';
+      out << std::endl;
+    };
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <typename number>
+void
+SparseMatrix<number>::block_write (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // first the simple objects,
+  // bracketed in [...]
+  out << '[' << max_len << "][";
+  // then write out real data
+  out.write (reinterpret_cast<const char *>(&val[0]),
+             reinterpret_cast<const char *>(&val[max_len])
+             - reinterpret_cast<const char *>(&val[0]));
+  out << ']';
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <typename number>
+void
+SparseMatrix<number>::block_read (std::istream &in)
+{
+  AssertThrow (in, ExcIO());
+
+  char c;
+
+  // first read in simple data
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+  in >> max_len;
+
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+
+  // reallocate space
+  delete[] val;
+  val  = new number[max_len];
+
+  // then read data
+  in.read (reinterpret_cast<char *>(&val[0]),
+           reinterpret_cast<char *>(&val[max_len])
+           - reinterpret_cast<char *>(&val[0]));
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+}
+
+
+template <typename number>
+void
+SparseMatrix<number>::compress (::dealii::VectorOperation::values)
+{}
+
+
+template <typename number>
+std::size_t
+SparseMatrix<number>::memory_consumption () const
+{
+  return max_len*static_cast<std::size_t>(sizeof(number)) + sizeof(*this);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/sparse_matrix_ez.h b/include/deal.II/lac/sparse_matrix_ez.h
new file mode 100644
index 0000000..c78c25e
--- /dev/null
+++ b/include/deal.II/lac/sparse_matrix_ez.h
@@ -0,0 +1,1592 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_matrix_ez_h
+#define dealii__sparse_matrix_ez_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/exceptions.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<typename number> class Vector;
+template<typename number> class FullMatrix;
+
+/**
+ * @addtogroup Matrix1
+ * @{
+ */
+
+/**
+ * Sparse matrix without sparsity pattern.
+ *
+ * Instead of using a pre-assembled sparsity pattern, this matrix builds the
+ * pattern on the fly. Filling the matrix may consume more time as with @p
+ * SparseMatrix, since large memory movements may be involved. To help
+ * optimizing things, an expected row-length may be provided to the
+ * constructor, as well as an increment size for rows.
+ *
+ * The storage structure: like with the usual sparse matrix, it is attempted
+ * to store only non-zero elements. these are stored in a single data array @p
+ * data. They are ordered by row and inside each row by column number. Each
+ * row is described by its starting point in the data array and its length.
+ * These are stored in the @p row_info array, together with additional useful
+ * information.
+ *
+ * Due to the structure, gaps may occur between rows. Whenever a new entry
+ * must be created, an attempt is made to use the gap in its row. If the gap
+ * is full, the row must be extended and all subsequent rows must be shifted
+ * backwards. This is a very expensive operation and should be avoided as much
+ * as possible.
+ *
+ * This is, where the optimization parameters, provided to the constructor or
+ * to the function @p reinit come in. @p default_row_length is the amount of
+ * entries that will be allocated for each row on initialization (the actual
+ * length of the rows is still zero). This means, that @p default_row_length
+ * entries can be added to this row without shifting other rows. If less
+ * entries are added, the additional memory will be wasted.
+ *
+ * If the space for a row is not sufficient, then it is enlarged by @p
+ * default_increment entries. This way, the subsequent rows are not shifted by
+ * single entries very often.
+ *
+ * Finally, the @p default_reserve allocates extra space at the end of the
+ * data array. This space is used whenever a row must be enlarged. Since @p
+ * std::vector doubles the capacity every time it must increase it, this value
+ * should allow for all the growth needed.
+ *
+ * Suggested settings: @p default_row_length should be the length of a typical
+ * row, for instance the size of the stencil in regular parts of the grid.
+ * Then, @p default_increment may be the expected amount of entries added to
+ * the row by having one hanging node. This way, a good compromise between
+ * memory consumption and speed should be achieved. @p default_reserve should
+ * then be an estimate for the number of hanging nodes times @p
+ * default_increment.
+ *
+ * Letting @p default_increment zero causes an exception whenever a row
+ * overflows.
+ *
+ * If the rows are expected to be filled more or less from first to last,
+ * using a @p default_row_length of zero may not be such a bad idea.
+ *
+ * The name of this matrix is in reference to a publication of the Internal
+ * Revenue Service of the United States of America. I hope some other aliens
+ * will appreciate it. By the way, the suffix makes sense by pronouncing it
+ * the American way.
+ *
+ * @author Guido Kanschat
+ * @date 2002, 2010
+ */
+template <typename number>
+class SparseMatrixEZ : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * The class for storing the column number of an entry together with its
+   * value.
+   */
+  struct Entry
+  {
+    /**
+     * Standard constructor. Sets @p column to @p invalid.
+     */
+    Entry();
+
+    /**
+     * Constructor. Fills column and value.
+     */
+    Entry (const size_type column,
+           const number &value);
+
+    /**
+     * The column number.
+     */
+    size_type column;
+
+    /**
+     * The value there.
+     */
+    number value;
+
+    /**
+     * Non-existent column number.
+     */
+    static const size_type invalid = numbers::invalid_size_type;
+  };
+
+  /**
+   * Structure for storing information on a matrix row. One object for each
+   * row will be stored in the matrix.
+   */
+  struct RowInfo
+  {
+    /**
+     * Constructor.
+     */
+    RowInfo (size_type start = Entry::invalid);
+
+    /**
+     * Index of first entry of the row in the data field.
+     */
+    size_type start;
+    /**
+     * Number of entries in this row.
+     */
+    unsigned short length;
+    /**
+     * Position of the diagonal element relative tor the start index.
+     */
+    unsigned short diagonal;
+    /**
+     * Value for non-existing diagonal.
+     */
+    static const unsigned short
+    invalid_diagonal = static_cast<unsigned short>(-1);
+  };
+
+public:
+
+  /**
+   * Standard-conforming iterator.
+   */
+  class const_iterator
+  {
+  private:
+    /**
+     * Accessor class for iterators
+     */
+    class Accessor
+    {
+    public:
+      /**
+       * Constructor. Since we use accessors only for read access, a const
+       * matrix pointer is sufficient.
+       */
+      Accessor (const SparseMatrixEZ<number> *matrix,
+                const size_type               row,
+                const unsigned short          index);
+
+      /**
+       * Row number of the element represented by this object.
+       */
+      size_type row() const;
+
+      /**
+       * Index in row of the element represented by this object.
+       */
+      unsigned short index() const;
+
+      /**
+       * Column number of the element represented by this object.
+       */
+      size_type column() const;
+
+      /**
+       * Value of this matrix entry.
+       */
+      number value() const;
+
+    protected:
+      /**
+       * The matrix accessed.
+       */
+      const SparseMatrixEZ<number> *matrix;
+
+      /**
+       * Current row number.
+       */
+      size_type a_row;
+
+      /**
+       * Current index in row.
+       */
+      unsigned short a_index;
+
+      /**
+       * Make enclosing class a friend.
+       */
+      friend class const_iterator;
+    };
+
+  public:
+    /**
+     * Constructor.
+     */
+    const_iterator(const SparseMatrixEZ<number> *matrix,
+                   const size_type               row,
+                   const unsigned short          index);
+
+    /**
+     * Prefix increment. This always returns a valid entry or <tt>end()</tt>.
+     */
+    const_iterator &operator++ ();
+
+    /**
+     * Postfix increment. This always returns a valid entry or <tt>end()</tt>.
+     */
+    const_iterator &operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const const_iterator &) const;
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const const_iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     */
+    bool operator < (const const_iterator &) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor accessor;
+
+    /**
+     * Make the enclosing class a friend. This is only necessary since icc7
+     * otherwise wouldn't allow us to make const_iterator::Accessor a friend,
+     * stating that it can't access this class -- this is of course bogus,
+     * since granting friendship doesn't need access to the class being
+     * granted friendship...
+     */
+  };
+
+  /**
+   * Type of matrix entries. This typedef is analogous to <tt>value_type</tt>
+   * in the standard library containers.
+   */
+  typedef number value_type;
+
+  /**
+   * @name Constructors and initialization
+   */
+//@{
+  /**
+   * Constructor. Initializes an empty matrix of dimension zero times zero.
+   */
+  SparseMatrixEZ ();
+
+  /**
+   * Dummy copy constructor. This is here for use in containers. It may only
+   * be called for empty objects.
+   *
+   * If you really want to copy a whole matrix, you can do so by using the @p
+   * copy_from function.
+   */
+  SparseMatrixEZ (const SparseMatrixEZ &);
+
+  /**
+   * Constructor. Generates a matrix of the given size, ready to be filled.
+   * The optional parameters @p default_row_length and @p default_increment
+   * allow for preallocating memory. Providing these properly is essential for
+   * an efficient assembling of the matrix.
+   */
+  explicit SparseMatrixEZ (const size_type n_rows,
+                           const size_type n_columns,
+                           const size_type default_row_length = 0,
+                           const unsigned int default_increment = 1);
+
+  /**
+   * Destructor. Free all memory, but do not release the memory of the
+   * sparsity structure.
+   */
+  ~SparseMatrixEZ ();
+
+  /**
+   * Pseudo operator only copying empty objects.
+   */
+  SparseMatrixEZ<number> &operator = (const SparseMatrixEZ<number> &);
+
+  /**
+   * This operator assigns a scalar to a matrix. Since this does usually not
+   * make much sense (should we set all matrix entries to this value? Only the
+   * nonzero entries of the sparsity pattern?), this operation is only allowed
+   * if the actual value to be assigned is zero. This operator only exists to
+   * allow for the obvious notation <tt>matrix=0</tt>, which sets all elements
+   * of the matrix to zero, but keep the sparsity pattern previously used.
+   */
+  SparseMatrixEZ<number> &operator = (const double d);
+
+  /**
+   * Reinitialize the sparse matrix to the dimensions provided. The matrix
+   * will have no entries at this point. The optional parameters @p
+   * default_row_length, @p default_increment and @p reserve allow for
+   * preallocating memory. Providing these properly is essential for an
+   * efficient assembling of the matrix.
+   */
+  void reinit (const size_type n_rows,
+               const size_type n_columns,
+               size_type default_row_length = 0,
+               unsigned int default_increment = 1,
+               size_type reserve = 0);
+
+  /**
+   * Release all memory and return to a state just like after having called
+   * the default constructor. It also forgets its sparsity pattern.
+   */
+  void clear ();
+//@}
+  /**
+   * @name Information on the matrix
+   */
+//@{
+  /**
+   * Return whether the object is empty. It is empty if both dimensions are
+   * zero.
+   */
+  bool empty () const;
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   */
+  size_type n () const;
+
+  /**
+   * Return the number of entries in a specific row.
+   */
+  size_type get_row_length (const size_type row) const;
+
+  /**
+   * Return the number of nonzero elements of this matrix.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Print statistics. If @p full is @p true, prints a histogram of all
+   * existing row lengths and allocated row lengths. Otherwise, just the
+   * relation of allocated and used entries is shown.
+   */
+  template <class StreamType>
+  void print_statistics (StreamType &s, bool full = false);
+
+  /**
+   * Compute numbers of entries.
+   *
+   * In the first three arguments, this function returns the number of entries
+   * used, allocated and reserved by this matrix.
+   *
+   * If the final argument is true, the number of entries in each line is
+   * printed as well.
+   */
+  void compute_statistics (size_type &used,
+                           size_type &allocated,
+                           size_type &reserved,
+                           std::vector<size_type> &used_by_line,
+                           const bool compute_by_line) const;
+//@}
+  /**
+   * @name Modifying entries
+   */
+//@{
+  /**
+   * Set the element <tt>(i,j)</tt> to @p value.
+   *
+   * If <tt>value</tt> is not a finite number an exception is thrown.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   *
+   * If anyway a new element will be inserted and it does not exist, allocates
+   * the entry.
+   *
+   * @note You may need to insert some zero elements to keep a symmetric
+   * sparsity pattern for the matrix.
+   */
+  void set (const size_type i, const size_type j,
+            const number value, const bool elide_zero_values = true);
+
+  /**
+   * Add @p value to the element <tt>(i,j)</tt>. Allocates the entry if it
+   * does not exist. Filters out zeroes automatically. If <tt>value</tt> is
+   * not a finite number an exception is thrown.
+   */
+  void add (const size_type i,
+            const size_type j,
+            const number value);
+
+  /**
+   * Add all elements given in a FullMatrix<double> into sparse matrix
+   * locations given by <tt>indices</tt>. In other words, this function adds
+   * the elements in <tt>full_matrix</tt> to the respective entries in calling
+   * matrix, using the local-to-global indexing specified by <tt>indices</tt>
+   * for both the rows and the columns of the matrix. This function assumes a
+   * quadratic sparse matrix and a quadratic full_matrix, the usual situation
+   * in FE calculations.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number2>
+  void add (const std::vector<size_type> &indices,
+            const FullMatrix<number2>    &full_matrix,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Same function as before, but now including the possibility to use
+   * rectangular full_matrices and different local-to-global indexing on rows
+   * and columns, respectively.
+   */
+  template <typename number2>
+  void add (const std::vector<size_type> &row_indices,
+            const std::vector<size_type> &col_indices,
+            const FullMatrix<number2>       &full_matrix,
+            const bool                       elide_zero_values = true);
+
+  /**
+   * Set several elements in the specified row of the matrix with column
+   * indices as given by <tt>col_indices</tt> to the respective value.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number2>
+  void add (const size_type               row,
+            const std::vector<size_type> &col_indices,
+            const std::vector<number2>   &values,
+            const bool                    elide_zero_values = true);
+
+  /**
+   * Add an array of values given by <tt>values</tt> in the given global
+   * matrix row at columns specified by col_indices in the sparse matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   */
+  template <typename number2>
+  void add (const size_type  row,
+            const size_type  n_cols,
+            const size_type *col_indices,
+            const number2   *values,
+            const bool       elide_zero_values = true,
+            const bool       col_indices_are_sorted = false);
+
+  /**
+   * Copy the matrix given as argument into the current object.
+   *
+   * Copying matrices is an expensive operation that we do not want to happen
+   * by accident through compiler generated code for <code>operator=</code>.
+   * (This would happen, for example, if one accidentally declared a function
+   * argument of the current type <i>by value</i> rather than <i>by
+   * reference</i>.) The functionality of copying matrices is implemented in
+   * this member function instead. All copy operations of objects of this type
+   * therefore require an explicit function call.
+   *
+   * The source matrix may be a matrix of arbitrary type, as long as its data
+   * type is convertible to the data type of this matrix.
+   *
+   * The optional parameter <tt>elide_zero_values</tt> can be used to specify
+   * whether zero values should be added anyway or these should be filtered
+   * away and only non-zero data is added. The default value is <tt>true</tt>,
+   * i.e., zero values won't be added into the matrix.
+   *
+   * The function returns a reference to @p this.
+   */
+  template <typename MatrixType>
+  SparseMatrixEZ<number> &
+  copy_from (const MatrixType &source, const bool elide_zero_values = true);
+
+  /**
+   * Add @p matrix scaled by @p factor to this matrix.
+   *
+   * The source matrix may be a matrix of arbitrary type, as long as its data
+   * type is convertible to the data type of this matrix and it has the
+   * standard @p const_iterator.
+   */
+  template <typename MatrixType>
+  void add (const number      factor,
+            const MatrixType &matrix);
+//@}
+  /**
+   * @name Entry Access
+   */
+//@{
+  /**
+   * Return the value of the entry (i,j).  This may be an expensive operation
+   * and you should always take care where to call this function.  In order to
+   * avoid abuse, this function throws an exception if the required element
+   * does not exist in the matrix.
+   *
+   * In case you want a function that returns zero instead (for entries that
+   * are not in the sparsity pattern of the matrix), use the @p el function.
+   */
+  number operator () (const size_type i,
+                      const size_type j) const;
+
+  /**
+   * Return the value of the entry (i,j). Returns zero for all non-existing
+   * entries.
+   */
+  number el (const size_type i,
+             const size_type j) const;
+//@}
+  /**
+   * @name Multiplications
+   */
+//@{
+  /**
+   * Matrix-vector multiplication: let $dst = M*src$ with $M$ being this
+   * matrix.
+   */
+  template <typename somenumber>
+  void vmult (Vector<somenumber>       &dst,
+              const Vector<somenumber> &src) const;
+
+  /**
+   * Matrix-vector multiplication: let $dst = M^T*src$ with $M$ being this
+   * matrix. This function does the same as @p vmult but takes the transposed
+   * matrix.
+   */
+  template <typename somenumber>
+  void Tvmult (Vector<somenumber>       &dst,
+               const Vector<somenumber> &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add $M*src$ on $dst$ with $M$ being
+   * this matrix.
+   */
+  template <typename somenumber>
+  void vmult_add (Vector<somenumber>       &dst,
+                  const Vector<somenumber> &src) const;
+
+  /**
+   * Adding Matrix-vector multiplication. Add $M^T*src$ to $dst$ with $M$
+   * being this matrix. This function does the same as @p vmult_add but takes
+   * the transposed matrix.
+   */
+  template <typename somenumber>
+  void Tvmult_add (Vector<somenumber>       &dst,
+                   const Vector<somenumber> &src) const;
+//@}
+  /**
+   * @name Matrix norms
+   */
+//@{
+  /**
+   * Frobenius-norm of the matrix.
+   */
+  number l2_norm () const;
+//@}
+  /**
+   * @name Preconditioning methods
+   */
+//@{
+  /**
+   * Apply the Jacobi preconditioner, which multiplies every element of the @p
+   * src vector by the inverse of the respective diagonal element and
+   * multiplies the result with the damping factor @p omega.
+   */
+  template <typename somenumber>
+  void precondition_Jacobi (Vector<somenumber>       &dst,
+                            const Vector<somenumber> &src,
+                            const number              omega = 1.) const;
+
+  /**
+   * Apply SSOR preconditioning to @p src.
+   */
+  template <typename somenumber>
+  void precondition_SSOR (Vector<somenumber>       &dst,
+                          const Vector<somenumber> &src,
+                          const number              om = 1.,
+                          const std::vector<std::size_t> &pos_right_of_diagonal = std::vector<std::size_t>()) const;
+
+  /**
+   * Apply SOR preconditioning matrix to @p src. The result of this method is
+   * $dst = (om D - L)^{-1} src$.
+   */
+  template <typename somenumber>
+  void precondition_SOR (Vector<somenumber>       &dst,
+                         const Vector<somenumber> &src,
+                         const number              om = 1.) const;
+
+  /**
+   * Apply transpose SOR preconditioning matrix to @p src. The result of this
+   * method is $dst = (om D - U)^{-1} src$.
+   */
+  template <typename somenumber>
+  void precondition_TSOR (Vector<somenumber>       &dst,
+                          const Vector<somenumber> &src,
+                          const number              om = 1.) const;
+
+  /**
+   * Add the matrix @p A conjugated by @p B, that is, $B A B^T$ to this
+   * object. If the parameter @p transpose is true, compute $B^T A B$.
+   *
+   * This function requires that @p B has a @p const_iterator traversing all
+   * matrix entries and that @p A has a function <tt>el(i,j)</tt> for access
+   * to a specific entry.
+   */
+  template <typename MatrixTypeA, typename MatrixTypeB>
+  void conjugate_add (const MatrixTypeA &A,
+                      const MatrixTypeB &B,
+                      const bool         transpose = false);
+//@}
+  /**
+   * @name Iterators
+   */
+//@{
+  /**
+   * Iterator starting at the first existing entry.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Final iterator.
+   */
+  const_iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of row @p r. If this row is empty,
+   * the result is <tt>end(r)</tt>, which does NOT point into row @p r.
+   */
+  const_iterator begin (const size_type r) const;
+
+  /**
+   * Final iterator of row @p r. The result may be different from
+   * <tt>end()</tt>!
+   */
+  const_iterator end (const size_type r) const;
+//@}
+  /**
+   * @name Input/Output
+   */
+//@{
+  /**
+   * Print the matrix to the given stream, using the format <tt>(line,col)
+   * value</tt>, i.e. one nonzero entry of the matrix per line.
+   */
+  void print (std::ostream &out) const;
+
+  /**
+   * Print the matrix in the usual format, i.e. as a matrix and not as a list
+   * of nonzero elements. For better readability, elements not in the matrix
+   * are displayed as empty space, while matrix elements which are explicitly
+   * set to zero are displayed as such.
+   *
+   * The parameters allow for a flexible setting of the output format: @p
+   * precision and @p scientific are used to determine the number format,
+   * where @p scientific = @p false means fixed point notation.  A zero entry
+   * for @p width makes the function compute a width, but it may be changed to
+   * a positive value, if output is crude.
+   *
+   * Additionally, a character for an empty value may be specified.
+   *
+   * Finally, the whole matrix can be multiplied with a common denominator to
+   * produce more readable output, even integers.
+   *
+   * This function may produce @em large amounts of output if applied to a
+   * large matrix!
+   */
+  void print_formatted (std::ostream       &out,
+                        const unsigned int  precision   = 3,
+                        const bool          scientific  = true,
+                        const unsigned int  width       = 0,
+                        const char         *zero_string = " ",
+                        const double        denominator = 1.) const;
+
+  /**
+   * Write the data of this object in binary mode to a file.
+   *
+   * Note that this binary format is platform dependent.
+   */
+  void block_write (std::ostream &out) const;
+
+  /**
+   * Read data that has previously been written by @p block_write.
+   *
+   * The object is resized on this operation, and all previous contents are
+   * lost.
+   *
+   * A primitive form of error checking is performed which will recognize the
+   * bluntest attempts to interpret some data as a vector stored bitwise to a
+   * file, but not more.
+   */
+  void block_read (std::istream &in);
+//@}
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception for missing diagonal entry.
+   */
+  DeclException0(ExcNoDiagonal);
+
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidEntry,
+                  int, int,
+                  << "The entry with index (" << arg1 << ',' << arg2
+                  << ") does not exist.");
+
+  DeclException2(ExcEntryAllocationFailure,
+                 int, int,
+                 << "An entry with index (" << arg1 << ',' << arg2
+                 << ") cannot be allocated.");
+  //@}
+private:
+  /**
+   * Find an entry and return a const pointer. Return a zero-pointer if the
+   * entry does not exist.
+   */
+  const Entry *locate (const size_type row,
+                       const size_type col) const;
+
+  /**
+   * Find an entry and return a writable pointer. Return a zero-pointer if the
+   * entry does not exist.
+   */
+  Entry *locate (const size_type row,
+                 const size_type col);
+
+  /**
+   * Find an entry or generate it.
+   */
+  Entry *allocate (const size_type row,
+                   const size_type col);
+
+  /**
+   * Version of @p vmult which only performs its actions on the region defined
+   * by <tt>[begin_row,end_row)</tt>. This function is called by @p vmult in
+   * the case of enabled multithreading.
+   */
+  template <typename somenumber>
+  void threaded_vmult (Vector<somenumber>       &dst,
+                       const Vector<somenumber> &src,
+                       const size_type           begin_row,
+                       const size_type           end_row) const;
+
+  /**
+   * Version of @p matrix_norm_square which only performs its actions on the
+   * region defined by <tt>[begin_row,end_row)</tt>. This function is called
+   * by @p matrix_norm_square in the case of enabled multithreading.
+   */
+  template <typename somenumber>
+  void threaded_matrix_norm_square (const Vector<somenumber> &v,
+                                    const size_type           begin_row,
+                                    const size_type           end_row,
+                                    somenumber               *partial_sum) const;
+
+  /**
+   * Version of @p matrix_scalar_product which only performs its actions on
+   * the region defined by <tt>[begin_row,end_row)</tt>. This function is
+   * called by @p matrix_scalar_product in the case of enabled multithreading.
+   */
+  template <typename somenumber>
+  void threaded_matrix_scalar_product (const Vector<somenumber> &u,
+                                       const Vector<somenumber> &v,
+                                       const size_type           begin_row,
+                                       const size_type           end_row,
+                                       somenumber               *partial_sum) const;
+
+  /**
+   * Number of columns. This is used to check vector dimensions only.
+   */
+  size_type n_columns;
+
+  /**
+   * Info structure for each row.
+   */
+  std::vector<RowInfo> row_info;
+
+  /**
+   * Data storage.
+   */
+  std::vector<Entry> data;
+
+  /**
+   * Increment when a row grows.
+   */
+  unsigned int increment;
+
+  /**
+   * Remember the user provided default row length.
+   */
+  unsigned int saved_default_row_length;
+};
+
+/**
+ * @}
+ */
+/*---------------------- Inline functions -----------------------------------*/
+
+template <typename number>
+inline
+SparseMatrixEZ<number>::Entry::Entry(const size_type column,
+                                     const number &value)
+  :
+  column(column),
+  value(value)
+{}
+
+
+
+template <typename number>
+inline
+SparseMatrixEZ<number>::Entry::Entry()
+  :
+  column(invalid),
+  value(0)
+{}
+
+
+template <typename number>
+inline
+SparseMatrixEZ<number>::RowInfo::RowInfo(size_type start)
+  :
+  start(start),
+  length(0),
+  diagonal(invalid_diagonal)
+{}
+
+
+//---------------------------------------------------------------------------
+template <typename number>
+inline
+SparseMatrixEZ<number>::const_iterator::Accessor::
+Accessor (const SparseMatrixEZ<number> *matrix,
+          const size_type               r,
+          const unsigned short          i)
+  :
+  matrix(matrix),
+  a_row(r),
+  a_index(i)
+{}
+
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::size_type
+SparseMatrixEZ<number>::const_iterator::Accessor::row() const
+{
+  return a_row;
+}
+
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::size_type
+SparseMatrixEZ<number>::const_iterator::Accessor::column() const
+{
+  return matrix->data[matrix->row_info[a_row].start+a_index].column;
+}
+
+
+template <typename number>
+inline
+unsigned short
+SparseMatrixEZ<number>::const_iterator::Accessor::index() const
+{
+  return a_index;
+}
+
+
+
+template <typename number>
+inline
+number
+SparseMatrixEZ<number>::const_iterator::Accessor::value() const
+{
+  return matrix->data[matrix->row_info[a_row].start+a_index].value;
+}
+
+
+template <typename number>
+inline
+SparseMatrixEZ<number>::const_iterator::
+const_iterator(const SparseMatrixEZ<number> *matrix,
+               const size_type               r,
+               const unsigned short          i)
+  :
+  accessor(matrix, r, i)
+{
+  // Finish if this is the end()
+  if (r==accessor.matrix->m() && i==0) return;
+
+  // Make sure we never construct an
+  // iterator pointing to a
+  // non-existing entry
+
+  // If the index points beyond the
+  // end of the row, try the next
+  // row.
+  if (accessor.a_index >= accessor.matrix->row_info[accessor.a_row].length)
+    {
+      do
+        {
+          ++accessor.a_row;
+        }
+      // Beware! If the next row is
+      // empty, iterate until a
+      // non-empty row is found or we
+      // hit the end of the matrix.
+      while (accessor.a_row < accessor.matrix->m()
+             && accessor.matrix->row_info[accessor.a_row].length == 0);
+    }
+}
+
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::const_iterator &
+SparseMatrixEZ<number>::const_iterator::operator++ ()
+{
+  Assert (accessor.a_row < accessor.matrix->m(), ExcIteratorPastEnd());
+
+  // Increment column index
+  ++(accessor.a_index);
+  // If index exceeds number of
+  // entries in this row, proceed
+  // with next row.
+  if (accessor.a_index >= accessor.matrix->row_info[accessor.a_row].length)
+    {
+      accessor.a_index = 0;
+      // Do this loop to avoid
+      // elements in empty rows
+      do
+        {
+          ++accessor.a_row;
+        }
+      while (accessor.a_row < accessor.matrix->m()
+             && accessor.matrix->row_info[accessor.a_row].length == 0);
+    }
+  return *this;
+}
+
+
+template <typename number>
+inline
+const typename SparseMatrixEZ<number>::const_iterator::Accessor &
+SparseMatrixEZ<number>::const_iterator::operator* () const
+{
+  return accessor;
+}
+
+
+template <typename number>
+inline
+const typename SparseMatrixEZ<number>::const_iterator::Accessor *
+SparseMatrixEZ<number>::const_iterator::operator-> () const
+{
+  return &accessor;
+}
+
+
+template <typename number>
+inline
+bool
+SparseMatrixEZ<number>::const_iterator::operator == (
+  const const_iterator &other) const
+{
+  return (accessor.row() == other.accessor.row() &&
+          accessor.index() == other.accessor.index());
+}
+
+
+template <typename number>
+inline
+bool
+SparseMatrixEZ<number>::const_iterator::
+operator != (const const_iterator &other) const
+{
+  return ! (*this == other);
+}
+
+
+template <typename number>
+inline
+bool
+SparseMatrixEZ<number>::const_iterator::
+operator < (const const_iterator &other) const
+{
+  return (accessor.row() < other.accessor.row() ||
+          (accessor.row() == other.accessor.row() &&
+           accessor.index() < other.accessor.index()));
+}
+
+
+//---------------------------------------------------------------------------
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::size_type SparseMatrixEZ<number>::m () const
+{
+  return row_info.size();
+}
+
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::size_type SparseMatrixEZ<number>::n () const
+{
+  return n_columns;
+}
+
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::Entry *
+SparseMatrixEZ<number>::locate (const size_type row,
+                                const size_type col)
+{
+  Assert (row<m(), ExcIndexRange(row,0,m()));
+  Assert (col<n(), ExcIndexRange(col,0,n()));
+
+  const RowInfo &r = row_info[row];
+  const size_type end = r.start + r.length;
+  for (size_type i=r.start; i<end; ++i)
+    {
+      Entry *const entry = &data[i];
+      if (entry->column == col)
+        return entry;
+      if (entry->column == Entry::invalid)
+        return 0;
+    }
+  return 0;
+}
+
+
+
+template <typename number>
+inline
+const typename SparseMatrixEZ<number>::Entry *
+SparseMatrixEZ<number>::locate (const size_type row,
+                                const size_type col) const
+{
+  SparseMatrixEZ<number> *t = const_cast<SparseMatrixEZ<number>*> (this);
+  return t->locate(row,col);
+}
+
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::Entry *
+SparseMatrixEZ<number>::allocate (const size_type row,
+                                  const size_type col)
+{
+  Assert (row<m(), ExcIndexRange(row,0,m()));
+  Assert (col<n(), ExcIndexRange(col,0,n()));
+
+  RowInfo &r = row_info[row];
+  const size_type end = r.start + r.length;
+
+  size_type i = r.start;
+  // If diagonal exists and this
+  // column is higher, start only
+  // after diagonal.
+  if (r.diagonal != RowInfo::invalid_diagonal && col >= row)
+    i += r.diagonal;
+  // Find position of entry
+  while (i<end && data[i].column < col) ++i;
+
+  // entry found
+  if (i != end && data[i].column == col)
+    return &data[i];
+
+  // Now, we must insert the new
+  // entry and move all successive
+  // entries back.
+
+  // If no more space is available
+  // for this row, insert new
+  // elements into the vector.
+//TODO:[GK] We should not extend this row if i<end
+  if (row != row_info.size()-1)
+    {
+      if (end >= row_info[row+1].start)
+        {
+          // Failure if increment 0
+          Assert(increment!=0,ExcEntryAllocationFailure(row,col));
+
+          // Insert new entries
+          data.insert(data.begin()+end, increment, Entry());
+          // Update starts of
+          // following rows
+          for (size_type rn=row+1; rn<row_info.size(); ++rn)
+            row_info[rn].start += increment;
+        }
+    }
+  else
+    {
+      if (end >= data.size())
+        {
+          // Here, appending a block
+          // does not increase
+          // performance.
+          data.push_back(Entry());
+        }
+    }
+
+  Entry *entry = &data[i];
+  // Save original entry
+  Entry temp = *entry;
+  // Insert new entry here to
+  // make sure all entries
+  // are ordered by column
+  // index
+  entry->column = col;
+  entry->value = 0;
+  // Update row_info
+  ++r.length;
+  if (col == row)
+    r.diagonal = i - r.start;
+  else if (col<row && r.diagonal!= RowInfo::invalid_diagonal)
+    ++r.diagonal;
+
+  if (i == end)
+    return entry;
+
+  // Move all entries in this
+  // row up by one
+  for (size_type j = i+1; j < end; ++j)
+    {
+      // There should be no invalid
+      // entry below end
+      Assert (data[j].column != Entry::invalid, ExcInternalError());
+
+//TODO[GK]: This could be done more efficiently by moving starting at the top rather than swapping starting at the bottom
+      std::swap (data[j], temp);
+    }
+  Assert (data[end].column == Entry::invalid, ExcInternalError());
+
+  data[end] = temp;
+
+  return entry;
+}
+
+
+
+template <typename number>
+inline
+void SparseMatrixEZ<number>::set (const size_type i,
+                                  const size_type j,
+                                  const number value,
+                                  const bool elide_zero_values)
+{
+  AssertIsFinite(value);
+
+  Assert (i<m(), ExcIndexRange(i,0,m()));
+  Assert (j<n(), ExcIndexRange(j,0,n()));
+
+  if (elide_zero_values && value == 0.)
+    {
+      Entry *entry = locate(i,j);
+      if (entry != 0)
+        entry->value = 0.;
+    }
+  else
+    {
+      Entry *entry = allocate(i,j);
+      entry->value = value;
+    }
+}
+
+
+
+template <typename number>
+inline
+void SparseMatrixEZ<number>::add (const size_type i,
+                                  const size_type j,
+                                  const number value)
+{
+
+  AssertIsFinite(value);
+
+  Assert (i<m(), ExcIndexRange(i,0,m()));
+  Assert (j<n(), ExcIndexRange(j,0,n()));
+
+  // ignore zero additions
+  if (value == 0)
+    return;
+
+  Entry *entry = allocate(i,j);
+  entry->value += value;
+}
+
+
+template <typename number>
+template <typename number2>
+void SparseMatrixEZ<number>::add (const std::vector<size_type> &indices,
+                                  const FullMatrix<number2>    &full_matrix,
+                                  const bool                    elide_zero_values)
+{
+//TODO: This function can surely be made more efficient
+  for (size_type i=0; i<indices.size(); ++i)
+    for (size_type j=0; j<indices.size(); ++j)
+      if ((full_matrix(i,j) != 0) || (elide_zero_values == false))
+        add (indices[i], indices[j], full_matrix(i,j));
+}
+
+
+
+template <typename number>
+template <typename number2>
+void SparseMatrixEZ<number>::add (const std::vector<size_type> &row_indices,
+                                  const std::vector<size_type> &col_indices,
+                                  const FullMatrix<number2>    &full_matrix,
+                                  const bool                    elide_zero_values)
+{
+//TODO: This function can surely be made more efficient
+  for (size_type i=0; i<row_indices.size(); ++i)
+    for (size_type j=0; j<col_indices.size(); ++j)
+      if ((full_matrix(i,j) != 0) || (elide_zero_values == false))
+        add (row_indices[i], col_indices[j], full_matrix(i,j));
+}
+
+
+
+
+template <typename number>
+template <typename number2>
+void SparseMatrixEZ<number>::add (const size_type               row,
+                                  const std::vector<size_type> &col_indices,
+                                  const std::vector<number2>   &values,
+                                  const bool                    elide_zero_values)
+{
+//TODO: This function can surely be made more efficient
+  for (size_type j=0; j<col_indices.size(); ++j)
+    if ((values[j] != 0) || (elide_zero_values == false))
+      add (row, col_indices[j], values[j]);
+}
+
+
+
+template <typename number>
+template <typename number2>
+void SparseMatrixEZ<number>::add (const size_type  row,
+                                  const size_type  n_cols,
+                                  const size_type *col_indices,
+                                  const number2   *values,
+                                  const bool       elide_zero_values,
+                                  const bool      /*col_indices_are_sorted*/)
+{
+//TODO: This function can surely be made more efficient
+  for (size_type j=0; j<n_cols; ++j)
+    if ((values[j] != 0) || (elide_zero_values == false))
+      add (row, col_indices[j], values[j]);
+}
+
+
+
+
+template <typename number>
+inline
+number SparseMatrixEZ<number>::el (const size_type i,
+                                   const size_type j) const
+{
+  const Entry *entry = locate(i,j);
+  if (entry)
+    return entry->value;
+  return 0.;
+}
+
+
+
+template <typename number>
+inline
+number SparseMatrixEZ<number>::operator() (const size_type i,
+                                           const size_type j) const
+{
+  const Entry *entry = locate(i,j);
+  if (entry)
+    return entry->value;
+  Assert(false, ExcInvalidEntry(i,j));
+  return 0.;
+}
+
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::const_iterator
+SparseMatrixEZ<number>::begin () const
+{
+  const_iterator result(this, 0, 0);
+  return result;
+}
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::const_iterator
+SparseMatrixEZ<number>::end () const
+{
+  return const_iterator(this, m(), 0);
+}
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::const_iterator
+SparseMatrixEZ<number>::begin (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  const_iterator result (this, r, 0);
+  return result;
+}
+
+template <typename number>
+inline
+typename SparseMatrixEZ<number>::const_iterator
+SparseMatrixEZ<number>::end (const size_type r) const
+{
+  Assert (r<m(), ExcIndexRange(r,0,m()));
+  const_iterator result(this, r+1, 0);
+  return result;
+}
+
+template<typename number>
+template <typename MatrixType>
+inline
+SparseMatrixEZ<number> &
+SparseMatrixEZ<number>::copy_from (const MatrixType &M, const bool elide_zero_values)
+{
+  reinit(M.m(),
+         M.n(),
+         this->saved_default_row_length,
+         this->increment);
+
+  // loop over the elements of the argument matrix row by row, as suggested
+  // in the documentation of the sparse matrix iterator class, and
+  // copy them into the current object
+  for (size_type row = 0; row < M.m(); ++row)
+    {
+      const typename MatrixType::const_iterator end_row = M.end(row);
+      for (typename MatrixType::const_iterator entry = M.begin(row);
+           entry != end_row; ++entry)
+        set(row, entry->column(), entry->value(), elide_zero_values);
+    }
+
+  return *this;
+}
+
+template<typename number>
+template <typename MatrixType>
+inline
+void
+SparseMatrixEZ<number>::add (const number      factor,
+                             const MatrixType &M)
+{
+  Assert (M.m() == m(), ExcDimensionMismatch(M.m(), m()));
+  Assert (M.n() == n(), ExcDimensionMismatch(M.n(), n()));
+
+  if (factor == 0.)
+    return;
+
+  // loop over the elements of the argument matrix row by row, as suggested
+  // in the documentation of the sparse matrix iterator class, and
+  // add them into the current object
+  for (size_type row = 0; row < M.m(); ++row)
+    {
+      const typename MatrixType::const_iterator end_row = M.end(row);
+      for (typename MatrixType::const_iterator entry = M.begin(row);
+           entry != end_row; ++entry)
+        if (entry->value() != 0)
+          add(row, entry->column(), factor * entry->value());
+    }
+}
+
+
+
+template<typename number>
+template <typename MatrixTypeA, typename MatrixTypeB>
+inline void
+SparseMatrixEZ<number>::conjugate_add (const MatrixTypeA &A,
+                                       const MatrixTypeB &B,
+                                       const bool         transpose)
+{
+// Compute the result
+// r_ij = \sum_kl b_ik b_jl a_kl
+
+//    Assert (n() == B.m(), ExcDimensionMismatch(n(), B.m()));
+//    Assert (m() == B.m(), ExcDimensionMismatch(m(), B.m()));
+//    Assert (A.n() == B.n(), ExcDimensionMismatch(A.n(), B.n()));
+//    Assert (A.m() == B.n(), ExcDimensionMismatch(A.m(), B.n()));
+
+  // Somehow, we have to avoid making
+  // this an operation of complexity
+  // n^2. For the transpose case, we
+  // can go through the non-zero
+  // elements of A^-1 and use the
+  // corresponding rows of B only.
+  // For the non-transpose case, we
+  // must find a trick.
+  typename MatrixTypeB::const_iterator b1 = B.begin();
+  const typename MatrixTypeB::const_iterator b_final = B.end();
+  if (transpose)
+    while (b1 != b_final)
+      {
+        const size_type i = b1->column();
+        const size_type k = b1->row();
+        typename MatrixTypeB::const_iterator b2 = B.begin();
+        while (b2 != b_final)
+          {
+            const size_type j = b2->column();
+            const size_type l = b2->row();
+
+            const typename MatrixTypeA::value_type a = A.el(k,l);
+
+            if (a != 0.)
+              add (i, j, a * b1->value() * b2->value());
+            ++b2;
+          }
+        ++b1;
+      }
+  else
+    {
+      // Determine minimal and
+      // maximal row for a column in
+      // advance.
+
+      std::vector<size_type> minrow(B.n(), B.m());
+      std::vector<size_type> maxrow(B.n(), 0);
+      while (b1 != b_final)
+        {
+          const size_type r = b1->row();
+          if (r < minrow[b1->column()])
+            minrow[b1->column()] = r;
+          if (r > maxrow[b1->column()])
+            maxrow[b1->column()] = r;
+          ++b1;
+        }
+
+      typename MatrixTypeA::const_iterator ai = A.begin();
+      const typename MatrixTypeA::const_iterator ae = A.end();
+
+      while (ai != ae)
+        {
+          const typename MatrixTypeA::value_type a = ai->value();
+          // Don't do anything if
+          // this entry is zero.
+          if (a == 0.) continue;
+
+          // Now, loop over all rows
+          // having possibly a
+          // nonzero entry in column
+          // ai->row()
+          b1 = B.begin(minrow[ai->row()]);
+          const typename MatrixTypeB::const_iterator
+          be1 = B.end(maxrow[ai->row()]);
+          const typename MatrixTypeB::const_iterator
+          be2 = B.end(maxrow[ai->column()]);
+
+          while (b1 != be1)
+            {
+              const double b1v = b1->value();
+              // We need the product
+              // of both. If it is
+              // zero, we can save
+              // the work
+              if (b1->column() == ai->row() && (b1v != 0.))
+                {
+                  const size_type i = b1->row();
+
+                  typename MatrixTypeB::const_iterator
+                  b2 = B.begin(minrow[ai->column()]);
+                  while (b2 != be2)
+                    {
+                      if (b2->column() == ai->column())
+                        {
+                          const size_type j = b2->row();
+                          add (i, j, a * b1v * b2->value());
+                        }
+                      ++b2;
+                    }
+                }
+              ++b1;
+            }
+          ++ai;
+        }
+    }
+}
+
+
+template <typename number>
+template <class StreamType>
+inline
+void
+SparseMatrixEZ<number>::print_statistics(StreamType &out, bool full)
+{
+  size_type used;
+  size_type allocated;
+  size_type reserved;
+  std::vector<size_type> used_by_line;
+
+  compute_statistics (used, allocated, reserved, used_by_line, full);
+
+  out << "SparseMatrixEZ:used      entries:" << used << std::endl
+      << "SparseMatrixEZ:allocated entries:" << allocated << std::endl
+      << "SparseMatrixEZ:reserved  entries:" << reserved << std::endl;
+
+  if (full)
+    {
+      for (size_type i=0; i< used_by_line.size(); ++i)
+        if (used_by_line[i] != 0)
+          out << "SparseMatrixEZ:entries\t" << i
+              << "\trows\t" << used_by_line[i]
+              << std::endl;
+
+    }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+/*----------------------------   sparse_matrix.h     ---------------------------*/
diff --git a/include/deal.II/lac/sparse_matrix_ez.templates.h b/include/deal.II/lac/sparse_matrix_ez.templates.h
new file mode 100644
index 0000000..db92893
--- /dev/null
+++ b/include/deal.II/lac/sparse_matrix_ez.templates.h
@@ -0,0 +1,615 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_matrix_ez_templates_h
+#define dealii__sparse_matrix_ez_templates_h
+
+
+#include <deal.II/lac/sparse_matrix_ez.h>
+#include <deal.II/lac/vector.h>
+
+#include <iostream>
+#include <iomanip>
+#include <algorithm>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+//---------------------------------------------------------------------------
+
+template <typename number>
+SparseMatrixEZ<number>::SparseMatrixEZ()
+  : n_columns (0),
+    increment (1),
+    saved_default_row_length (0)
+{}
+
+
+template <typename number>
+SparseMatrixEZ<number>::SparseMatrixEZ(const SparseMatrixEZ<number> &m)
+  :
+  Subscriptor (m),
+  n_columns (0)
+{
+  Assert(m.n() == 0, ExcNotImplemented());
+  Assert(m.m() == 0, ExcNotImplemented());
+}
+
+
+template <typename number>
+SparseMatrixEZ<number>::SparseMatrixEZ(const size_type    n_rows,
+                                       const size_type    n_cols,
+                                       const size_type    default_row_length,
+                                       const unsigned int default_increment)
+{
+  reinit(n_rows, n_cols, default_row_length, default_increment);
+}
+
+
+template <typename number>
+SparseMatrixEZ<number>::~SparseMatrixEZ()
+{}
+
+
+template <typename number>
+SparseMatrixEZ<number> &
+SparseMatrixEZ<number>::operator= (const SparseMatrixEZ<number> &m)
+{
+  (void)m;
+  Assert (m.empty(), ExcInvalidConstructorCall());
+  return *this;
+}
+
+
+template <typename number>
+SparseMatrixEZ<number> &
+SparseMatrixEZ<number>::operator = (const double d)
+{
+  (void)d;
+  Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+  typename std::vector<Entry>::iterator e = data.begin();
+  const typename std::vector<Entry>::iterator end = data.end();
+
+  while (e != end)
+    {
+      (e++)->value = 0.;
+    }
+
+  return *this;
+}
+
+
+
+template <typename number>
+void
+SparseMatrixEZ<number>::reinit(const size_type n_rows,
+                               const size_type n_cols,
+                               size_type       default_row_length,
+                               unsigned int    default_increment,
+                               size_type       reserve)
+{
+  clear();
+
+  saved_default_row_length = default_row_length;
+  increment = default_increment;
+
+  n_columns = n_cols;
+  row_info.resize(n_rows);
+  if (reserve != 0)
+    data.reserve(reserve);
+  data.resize(default_row_length * n_rows);
+
+  for (size_type i=0; i<n_rows; ++i)
+    row_info[i].start = i * default_row_length;
+}
+
+
+template <typename number>
+void
+SparseMatrixEZ<number>::clear()
+{
+  n_columns = 0;
+  row_info.resize(0);
+  data.resize(0);
+}
+
+
+template <typename number>
+bool
+SparseMatrixEZ<number>::empty() const
+{
+  return ((n_columns == 0) && (row_info.size()==0));
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::vmult (Vector<somenumber> &dst,
+                               const Vector<somenumber> &src) const
+{
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  const size_type end_row = row_info.size();
+  for (size_type row = 0; row < end_row; ++row)
+    {
+      const RowInfo &ri = row_info[row];
+      typename std::vector<Entry>::const_iterator
+      entry = data.begin() + ri.start;
+      double s = 0.;
+      for (unsigned short i=0; i<ri.length; ++i,++entry)
+        {
+          Assert (entry->column != Entry::invalid,
+                  ExcInternalError());
+          s += entry->value * src(entry->column);
+        }
+      dst(row) = s;
+    }
+}
+
+
+template <typename number>
+number
+SparseMatrixEZ<number>::l2_norm () const
+{
+  number sum = 0.;
+  const_iterator start = begin();
+  const_iterator final = end();
+
+  while (start != final)
+    {
+      const double value = start->value();
+      sum += value*value;
+      ++start;
+    }
+  return std::sqrt(sum);
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::Tvmult (Vector<somenumber> &dst,
+                                const Vector<somenumber> &src) const
+{
+  dst = 0.;
+  Tvmult_add(dst, src);
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::vmult_add (Vector<somenumber> &dst,
+                                   const Vector<somenumber> &src) const
+{
+  Assert(m() == dst.size(), ExcDimensionMismatch(m(),dst.size()));
+  Assert(n() == src.size(), ExcDimensionMismatch(n(),src.size()));
+
+  const size_type end_row = row_info.size();
+  for (size_type row = 0; row < end_row; ++row)
+    {
+      const RowInfo &ri = row_info[row];
+      typename std::vector<Entry>::const_iterator
+      entry = data.begin() + ri.start;
+      double s = 0.;
+      for (unsigned short i=0; i<ri.length; ++i,++entry)
+        {
+          Assert (entry->column != Entry::invalid,
+                  ExcInternalError());
+          s += entry->value * src(entry->column);
+        }
+      dst(row) += s;
+    }
+}
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::Tvmult_add (Vector<somenumber> &dst,
+                                    const Vector<somenumber> &src) const
+{
+  Assert(n() == dst.size(), ExcDimensionMismatch(n(),dst.size()));
+  Assert(m() == src.size(), ExcDimensionMismatch(m(),src.size()));
+
+  const size_type end_row = row_info.size();
+  for (size_type row = 0; row < end_row; ++row)
+    {
+      const RowInfo &ri = row_info[row];
+      typename std::vector<Entry>::const_iterator
+      entry = data.begin() + ri.start;
+      for (unsigned short i=0; i<ri.length; ++i,++entry)
+        {
+          Assert (entry->column != Entry::invalid,
+                  ExcInternalError());
+          dst(entry->column) += entry->value * src(row);
+        }
+    }
+}
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::precondition_Jacobi (Vector<somenumber>       &dst,
+                                             const Vector<somenumber> &src,
+                                             const number              om) const
+{
+  Assert (m() == n(), ExcNotQuadratic());
+  Assert (dst.size() == n(), ExcDimensionMismatch (dst.size(), n()));
+  Assert (src.size() == n(), ExcDimensionMismatch (src.size(), n()));
+
+  somenumber              *dst_ptr = dst.begin();
+  const somenumber        *src_ptr = src.begin();
+  typename std::vector<RowInfo>::const_iterator ri = row_info.begin();
+  const typename std::vector<RowInfo>::const_iterator end = row_info.end();
+
+  for (; ri != end; ++dst_ptr, ++src_ptr, ++ri)
+    {
+      Assert (ri->diagonal != RowInfo::invalid_diagonal, ExcNoDiagonal());
+      *dst_ptr = om **src_ptr / data[ri->start + ri->diagonal].value;
+    }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::precondition_SOR (Vector<somenumber>       &dst,
+                                          const Vector<somenumber> &src,
+                                          const number              om) const
+{
+  Assert (m() == n(), ExcNotQuadratic());
+  Assert (dst.size() == n(), ExcDimensionMismatch (dst.size(), n()));
+  Assert (src.size() == n(), ExcDimensionMismatch (src.size(), n()));
+
+  somenumber       *dst_ptr = dst.begin();
+  const somenumber *src_ptr = src.begin();
+  typename std::vector<RowInfo>::const_iterator ri = row_info.begin();
+  const typename std::vector<RowInfo>::const_iterator end = row_info.end();
+
+  for (; ri != end; ++dst_ptr, ++src_ptr, ++ri)
+    {
+      Assert (ri->diagonal != RowInfo::invalid_diagonal, ExcNoDiagonal());
+      number s = *src_ptr;
+      const size_type end_row = ri->start + ri->diagonal;
+      for (size_type i=ri->start; i<end_row; ++i)
+        s -= data[i].value * dst(data[i].column);
+
+      *dst_ptr = om * s / data[ri->start + ri->diagonal].value;
+    }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::precondition_TSOR (Vector<somenumber>       &dst,
+                                           const Vector<somenumber> &src,
+                                           const number              om) const
+{
+  Assert (m() == n(), ExcNotQuadratic());
+  Assert (dst.size() == n(), ExcDimensionMismatch (dst.size(), n()));
+  Assert (src.size() == n(), ExcDimensionMismatch (src.size(), n()));
+
+  somenumber       *dst_ptr = dst.begin()+dst.size()-1;
+  const somenumber *src_ptr = src.begin()+src.size()-1;
+  typename std::vector<RowInfo>::const_reverse_iterator
+  ri = row_info.rbegin();
+  const typename std::vector<RowInfo>::const_reverse_iterator
+  end = row_info.rend();
+
+  for (; ri != end; --dst_ptr, --src_ptr, ++ri)
+    {
+      Assert (ri->diagonal != RowInfo::invalid_diagonal, ExcNoDiagonal());
+      number s = *src_ptr;
+      const size_type end_row = ri->start + ri->length;
+      for (size_type i=ri->start+ri->diagonal+1; i<end_row; ++i)
+        s -= data[i].value * dst(data[i].column);
+
+      *dst_ptr = om * s / data[ri->start + ri->diagonal].value;
+    }
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMatrixEZ<number>::precondition_SSOR (Vector<somenumber>       &dst,
+                                           const Vector<somenumber> &src,
+                                           const number              om,
+                                           const std::vector<std::size_t> &) const
+{
+  Assert (m() == n(), ExcNotQuadratic());
+  Assert (dst.size() == n(), ExcDimensionMismatch (dst.size(), n()));
+  Assert (src.size() == n(), ExcDimensionMismatch (src.size(), n()));
+
+  somenumber       *dst_ptr = dst.begin();
+  const somenumber *src_ptr = src.begin();
+  typename std::vector<RowInfo>::const_iterator ri;
+  const typename std::vector<RowInfo>::const_iterator end = row_info.end();
+
+  // Forward
+  for (ri = row_info.begin(); ri != end; ++dst_ptr, ++src_ptr, ++ri)
+    {
+      Assert (ri->diagonal != RowInfo::invalid_diagonal, ExcNoDiagonal());
+      number s = 0;
+      const size_type end_row = ri->start + ri->diagonal;
+      for (size_type i=ri->start; i<end_row; ++i)
+        s += data[i].value * dst(data[i].column);
+
+      *dst_ptr = *src_ptr - s * om;
+      *dst_ptr /= data[ri->start + ri->diagonal].value;
+    }
+  // Diagonal
+  dst_ptr = dst.begin();
+  for (ri = row_info.begin(); ri != end; ++dst_ptr, ++ri)
+    *dst_ptr *= om*(2.-om) * data[ri->start + ri->diagonal].value;
+
+  // Backward
+  typename std::vector<RowInfo>::const_reverse_iterator rri;
+  const typename std::vector<RowInfo>::const_reverse_iterator
+  rend = row_info.rend();
+  dst_ptr = dst.begin()+dst.size()-1;
+  for (rri = row_info.rbegin(); rri != rend; --dst_ptr, ++rri)
+    {
+      const size_type end_row = rri->start + rri->length;
+      number s = 0;
+      for (size_type i=rri->start+rri->diagonal+1; i<end_row; ++i)
+        s += data[i].value * dst(data[i].column);
+
+      *dst_ptr -= s * om;
+      *dst_ptr /= data[rri->start + rri->diagonal].value;
+    }
+}
+
+
+
+template <typename number>
+std::size_t
+SparseMatrixEZ<number>::memory_consumption() const
+{
+  return
+    sizeof (*this)
+    + sizeof(size_type) * row_info.capacity()
+    + sizeof(typename SparseMatrixEZ<number>::Entry) * data.capacity();
+}
+
+
+
+template <typename number>
+typename SparseMatrixEZ<number>::size_type
+SparseMatrixEZ<number>::get_row_length (const size_type row) const
+{
+  return row_info[row].length;
+}
+
+
+
+template <typename number>
+typename SparseMatrixEZ<number>::size_type
+SparseMatrixEZ<number>::n_nonzero_elements() const
+{
+  typename std::vector<RowInfo>::const_iterator row = row_info.begin();
+  const typename std::vector<RowInfo>::const_iterator endrow = row_info.end();
+
+  // Add up entries actually used
+  size_type used = 0;
+  for (; row != endrow ; ++ row)
+    used += row->length;
+  return used;
+}
+
+
+
+template <typename number>
+void
+SparseMatrixEZ<number>::compute_statistics(
+  size_type &used,
+  size_type &allocated,
+  size_type &reserved,
+  std::vector<size_type> &used_by_line,
+  const bool full) const
+{
+  typename std::vector<RowInfo>::const_iterator row = row_info.begin();
+  const typename std::vector<RowInfo>::const_iterator endrow = row_info.end();
+
+  // Add up entries actually used
+  used = 0;
+  size_type max_length = 0;
+  for (; row != endrow ; ++ row)
+    {
+      used += row->length;
+      if (max_length < row->length)
+        max_length = row->length;
+    }
+
+  // Number of entries allocated is
+  // position of last entry used
+  --row;
+  allocated = row->start + row->length;
+  reserved = data.capacity();
+
+
+  if (full)
+    {
+      used_by_line.resize(max_length+1);
+
+      for (row = row_info.begin() ; row != endrow; ++row)
+        {
+          ++used_by_line[row->length];
+        }
+    }
+}
+
+
+template <typename number>
+void
+SparseMatrixEZ<number>::print (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  const_iterator i = begin();
+  const const_iterator e = end();
+  while (i != e)
+    {
+      out << i->row() << '\t'
+          << i->column() << '\t'
+          <<i->value() << std::endl;
+      ++i;
+    }
+}
+
+
+template <typename number>
+void
+SparseMatrixEZ<number>::print_formatted ( std::ostream          &out,
+                                          const unsigned int     precision,
+                                          const bool             scientific,
+                                          const unsigned int     width_,
+                                          const char            *zero_string,
+                                          const double           denominator) const
+{
+  AssertThrow (out, ExcIO());
+  Assert (m() != 0, ExcNotInitialized());
+  Assert (n() != 0, ExcNotInitialized());
+
+  unsigned int width = width_;
+
+  std::ios::fmtflags old_flags = out.flags();
+  unsigned int old_precision = out.precision (precision);
+
+  if (scientific)
+    {
+      out.setf (std::ios::scientific, std::ios::floatfield);
+      if (!width)
+        width = precision+7;
+    }
+  else
+    {
+      out.setf (std::ios::fixed, std::ios::floatfield);
+      if (!width)
+        width = precision+2;
+    }
+
+  // TODO: Skip nonexistant entries
+  for (size_type i=0; i<m(); ++i)
+    {
+      for (size_type j=0; j<n(); ++j)
+        {
+          const Entry *entry = locate(i,j);
+          if (entry)
+            out << std::setw(width)
+                << entry->value *denominator << ' ';
+          else
+            out << std::setw(width) << zero_string << ' ';
+        }
+      out << std::endl;
+    };
+
+  // reset output format
+  out.precision(old_precision);
+  out.flags (old_flags);
+}
+
+
+template <typename number>
+void
+SparseMatrixEZ<number>::block_write (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // first the simple objects,
+  // bracketed in [...]
+  out << '[' << row_info.size() << "]["
+      << n_columns << "]["
+      << data.size() << "]["
+      << increment << "][";
+  // then write out real data
+  typename std::vector<RowInfo>::const_iterator r = row_info.begin();
+  out.write(reinterpret_cast<const char *>(&*r),
+            sizeof(RowInfo) * row_info.size());
+
+  out << "][";
+
+  typename std::vector<Entry>::const_iterator d = data.begin();
+  out.write(reinterpret_cast<const char *>(&*d),
+            sizeof(Entry) * data.size());
+
+  out << ']';
+
+  AssertThrow (out, ExcIO());
+}
+
+
+#define DEAL_II_CHECK_INPUT(in,a,c) \
+  {in >> c; AssertThrow(c == a, \
+                        ExcMessage("Unexpected character in input stream"));}
+
+template <typename number>
+void
+SparseMatrixEZ<number>::block_read (std::istream &in)
+{
+  AssertThrow (in, ExcIO());
+
+  char c;
+  int n;
+  // first read in simple data
+  DEAL_II_CHECK_INPUT(in,'[',c);
+  in >> n;
+  row_info.resize(n);
+
+  DEAL_II_CHECK_INPUT(in,']',c);
+  DEAL_II_CHECK_INPUT(in,'[',c);
+  in >> n_columns;
+
+  DEAL_II_CHECK_INPUT(in,']',c);
+  DEAL_II_CHECK_INPUT(in,'[',c);
+  in >> n;
+  data.resize(n);
+
+  DEAL_II_CHECK_INPUT(in,']',c);
+  DEAL_II_CHECK_INPUT(in,'[',c);
+  in >> increment;
+
+  DEAL_II_CHECK_INPUT(in,']',c);
+  DEAL_II_CHECK_INPUT(in,'[',c);
+
+  // then read data
+  in.read(reinterpret_cast<char *>(&row_info[0]),
+          sizeof(RowInfo) * row_info.size());
+
+  DEAL_II_CHECK_INPUT(in,']',c);
+  DEAL_II_CHECK_INPUT(in,'[',c);
+
+  in.read(reinterpret_cast<char *>(&data[0]),
+          sizeof(Entry) * data.size());
+
+  DEAL_II_CHECK_INPUT(in,']',c);
+}
+
+#undef DEAL_II_CHECK_INPUT
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__sparse_matrix_ez_templates_h
diff --git a/include/deal.II/lac/sparse_mic.h b/include/deal.II/lac/sparse_mic.h
new file mode 100644
index 0000000..91b4d8c
--- /dev/null
+++ b/include/deal.II/lac/sparse_mic.h
@@ -0,0 +1,181 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_mic_h
+#define dealii__sparse_mic_h
+
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/sparse_decomposition.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Preconditioners
+ *@{
+ */
+
+/**
+ * Implementation of the Modified Incomplete Cholesky (MIC(0)) preconditioner
+ * for symmetric matrices. This class conforms to the state and usage
+ * specification in SparseLUDecomposition.
+ *
+ *
+ * <h3>The decomposition</h3>
+ *
+ * Let a symmetric, positive-definite, sparse matrix $A$ be in the form $A = D
+ * - L - L^T$, where $D$ is the diagonal part of $A$ and $-L$ is a strictly
+ * lower triangular matrix. The MIC(0) decomposition of the matrix $A$ is
+ * defined by $B = (X-L)X^{-1}(X-L^T)$, where $X$ is a diagonal matrix defined
+ * by the condition $\text{rowsum}(A) = \text{rowsum}(B)$.
+ *
+ * @author Stephen "Cheffo" Kolaroff, 2002, unified interface: Ralf Hartmann
+ * 2003; extension for full compatibility with LinearOperator class: Jean-Paul
+ * Pelteret, 2015.
+ */
+template <typename number>
+class SparseMIC : public SparseLUDecomposition<number>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Constructor. Does nothing, so you have to call @p decompose sometimes
+   * afterwards.
+   */
+  SparseMIC ();
+
+  /**
+   * Destructor.
+   */
+  virtual ~SparseMIC();
+
+  /**
+   * Deletes all member variables. Leaves the class in the state that it had
+   * directly after calling the constructor
+   */
+  virtual void clear();
+
+  /**
+   * Make the @p AdditionalData type in the base class accessible to this
+   * class as well.
+   */
+  typedef
+  typename SparseLUDecomposition<number>::AdditionalData
+  AdditionalData;
+
+  /**
+   * Perform the incomplete LU factorization of the given matrix.
+   *
+   * This function needs to be called before an object of this class is used
+   * as preconditioner.
+   *
+   * For more details about possible parameters, see the class documentation
+   * of SparseLUDecomposition and the documentation of the @p
+   * SparseLUDecomposition::AdditionalData class.
+   *
+   * According to the @p parameters, this function creates a new
+   * SparsityPattern or keeps the previous sparsity or takes the sparsity
+   * given by the user to @p data. Then, this function performs the MIC
+   * decomposition.
+   *
+   * After this function is called the preconditioner is ready to be used.
+   */
+  template <typename somenumber>
+  void initialize (const SparseMatrix<somenumber> &matrix,
+                   const AdditionalData &parameters = AdditionalData());
+
+  /**
+   * Apply the incomplete decomposition, i.e. do one forward-backward step
+   * $dst=(LU)^{-1}src$.
+   *
+   * Call @p initialize before calling this function.
+   */
+  template <typename somenumber>
+  void vmult (Vector<somenumber>       &dst,
+              const Vector<somenumber> &src) const;
+
+  /**
+   * Apply the transpose of the incomplete decomposition, i.e. do one forward-
+   * backward step $dst=(LU)^{-1}src$.
+   *
+   * Call @p initialize before calling this function.
+   *
+   * @note This function has not yet been implemented
+   *
+   */
+  template <typename somenumber>
+  void Tvmult (Vector<somenumber>       &dst,
+               const Vector<somenumber> &src) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcStrengthenDiagonalTooSmall);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidStrengthening,
+                  double,
+                  << "The strengthening parameter " << arg1
+                  << " is not greater or equal than zero!");
+  /**
+   * Exception
+   */
+  DeclException2(ExcDecompositionNotStable, int, double,
+                 << "The diagonal element (" <<arg1<<","<<arg1<<") is "
+                 << arg2 <<", but must be positive");
+
+  //@}
+private:
+  /**
+   * Values of the computed diagonal.
+   */
+  std::vector<number> diag;
+
+  /**
+   * Inverses of the the diagonal: precomputed for faster vmult.
+   */
+  std::vector<number> inv_diag;
+
+  /**
+   * Values of the computed "inner sums", i.e. per-row sums of the elements
+   * laying on the right side of the diagonal.
+   */
+  std::vector<number> inner_sums;
+
+  /**
+   * Compute the row-th "inner sum".
+   */
+  number get_rowsum (const size_type row) const;
+};
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif  // dealii__
diff --git a/include/deal.II/lac/sparse_mic.templates.h b/include/deal.II/lac/sparse_mic.templates.h
new file mode 100644
index 0000000..0a5de1a
--- /dev/null
+++ b/include/deal.II/lac/sparse_mic.templates.h
@@ -0,0 +1,216 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_mic_templates_h
+#define dealii__sparse_mic_templates_h
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/sparse_mic.h>
+#include <deal.II/lac/vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number>
+SparseMIC<number>::SparseMIC ()
+  :
+  diag(0),
+  inv_diag(0),
+  inner_sums(0)
+{}
+
+
+
+template <typename number>
+SparseMIC<number>::~SparseMIC()
+{
+  clear();
+}
+
+
+template <typename number>
+void SparseMIC<number>::clear()
+{
+  {
+    std::vector<number> tmp;
+    tmp.swap (diag);
+  }
+  {
+    std::vector<number> tmp;
+    tmp.swap (inv_diag);
+  }
+  {
+    std::vector<number> tmp;
+    tmp.swap (inner_sums);
+  }
+
+  SparseLUDecomposition<number>::clear();
+}
+
+
+template <typename number>
+template <typename somenumber>
+inline
+void SparseMIC<number>::initialize (const SparseMatrix<somenumber> &matrix,
+                                    const AdditionalData &data)
+{
+  Assert (matrix.m()==matrix.n(), ExcNotQuadratic ());
+  Assert (data.strengthen_diagonal>=0, ExcInvalidStrengthening (data.strengthen_diagonal));
+
+  SparseLUDecomposition<number>::initialize(matrix, data);
+  this->strengthen_diagonal = data.strengthen_diagonal;
+  this->prebuild_lower_bound ();
+  this->copy_from (matrix);
+
+  Assert (this->m()==this->n(),   ExcNotQuadratic ());
+  Assert (matrix.m()==this->m(),  ExcDimensionMismatch(matrix.m(), this->m()));
+
+  if (data.strengthen_diagonal > 0)
+    this->strengthen_diagonal_impl ();
+
+  // MIC implementation: (S. Margenov lectures)
+  // x[i] = a[i][i] - sum(k=1, i-1,
+  //              a[i][k]/x[k]*sum(j=k+1, N, a[k][j]))
+
+  // TODO: for sake of simplicity,
+  // those are placed here. A better
+  // implementation would store this
+  // values in the underlying sparse
+  // matrix itself.
+  diag.resize (this->m());
+  inv_diag.resize (this->m());
+  inner_sums.resize (this->m());
+
+  // precalc sum(j=k+1, N, a[k][j]))
+  for (size_type row=0; row<this->m(); row++)
+    inner_sums[row] = get_rowsum(row);
+
+  for (size_type row=0; row<this->m(); row++)
+    {
+      const number temp = this->begin(row)->value();
+      number temp1 = 0;
+
+      // work on the lower left part of the matrix. we know
+      // it's symmetric, so we can work with this alone
+      for (typename SparseMatrix<somenumber>::const_iterator
+           p = matrix.begin(row)+1;
+           (p != matrix.end(row)) && (p->column() < row);
+           ++p)
+        temp1 += p->value() / diag[p->column()] * inner_sums[p->column()];
+
+      Assert(temp-temp1 > 0, ExcStrengthenDiagonalTooSmall());
+      diag[row] = temp - temp1;
+
+      inv_diag[row] = 1.0/diag[row];
+    }
+}
+
+
+
+template <typename number>
+inline number
+SparseMIC<number>::get_rowsum (const size_type row) const
+{
+  Assert(this->m()==this->n(), ExcNotQuadratic());
+
+  number rowsum = 0;
+  for (typename SparseMatrix<number>::const_iterator
+       p = this->begin(row)+1;
+       p != this->end(row); ++p)
+    if (p->column() > row)
+      rowsum += p->value();
+
+  return rowsum;
+}
+
+
+
+template <typename number>
+template <typename somenumber>
+void
+SparseMIC<number>::vmult (Vector<somenumber>       &dst,
+                          const Vector<somenumber> &src) const
+{
+  Assert (dst.size() == src.size(), ExcDimensionMismatch(dst.size(), src.size()));
+  Assert (dst.size() == this->m(), ExcDimensionMismatch(dst.size(), this->m()));
+
+  const size_type N=dst.size();
+  // We assume the underlying matrix A is: A = X - L - U, where -L and -U are
+  // strictly lower- and upper- diagonal parts of the system.
+  //
+  // Solve (X-L)X{-1}(X-U) x = b in 3 steps:
+  dst = src;
+  for (size_type row=0; row<N; ++row)
+    {
+      // Now: (X-L)u = b
+
+      // get start of this row. skip
+      // the diagonal element
+      for (typename SparseMatrix<number>::const_iterator
+           p = this->begin(row)+1;
+           (p != this->end(row)) && (p->column() < row);
+           ++p)
+        dst(row) -= p->value() * dst(p->column());
+
+      dst(row) *= inv_diag[row];
+    }
+
+  // Now: v = Xu
+  for (size_type row=0; row<N; row++)
+    dst(row) *= diag[row];
+
+  // x = (X-U)v
+  for (int row=N-1; row>=0; --row)
+    {
+      // get end of this row
+      for (typename SparseMatrix<number>::const_iterator
+           p = this->begin(row)+1;
+           p != this->end(row);
+           ++p)
+        if (p->column() > static_cast<size_type>(row))
+          dst(row) -= p->value() * dst(p->column());
+
+      dst(row) *= inv_diag[row];
+    }
+}
+
+
+// Exists for full compatibility with the LinearOperator class
+template <typename number>
+template <typename somenumber>
+void
+SparseMIC<number>::Tvmult (Vector<somenumber>       &/*dst*/,
+                           const Vector<somenumber> &/*src*/) const
+{
+  AssertThrow(false, ExcNotImplemented());
+}
+
+
+
+template <typename number>
+std::size_t
+SparseMIC<number>::memory_consumption () const
+{
+  return (SparseLUDecomposition<number>::memory_consumption () +
+          MemoryConsumption::memory_consumption(diag) +
+          MemoryConsumption::memory_consumption(inv_diag) +
+          MemoryConsumption::memory_consumption(inner_sums));
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__sparse_mic_templates_h
diff --git a/include/deal.II/lac/sparse_vanka.h b/include/deal.II/lac/sparse_vanka.h
new file mode 100644
index 0000000..f20a1bb
--- /dev/null
+++ b/include/deal.II/lac/sparse_vanka.h
@@ -0,0 +1,604 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_vanka_h
+#define dealii__sparse_vanka_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/multithread_info.h>
+
+#include <vector>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename number> class FullMatrix;
+template <typename number> class SparseMatrix;
+template <typename number> class Vector;
+
+template <typename number> class SparseVanka;
+template <typename number> class SparseBlockVanka;
+
+/*! @addtogroup Preconditioners
+ *@{
+ */
+
+/**
+ * Point-wise Vanka preconditioning. This class does Vanka preconditioning  on
+ * a point-wise base. Vanka preconditioners are used for saddle point problems
+ * like Stokes' problem or problems arising in optimization where Lagrange
+ * multipliers occur and the Newton method matrix has a zero block. With these
+ * matrices the application of Jacobi or Gauss-Seidel methods is impossible,
+ * because some diagonal elements are zero in the rows of the Lagrange
+ * multiplier. The approach of Vanka is to solve a small (usually indefinite)
+ * system of equations for each Langrange multiplier variable (we will also
+ * call the pressure in Stokes' equation a Langrange multiplier since it can
+ * be interpreted as such).
+ *
+ * Objects of this class are constructed by passing a vector of indices of the
+ * degrees of freedom of the Lagrange multiplier. In the actual
+ * preconditioning method, these rows are traversed in the order in which the
+ * appear in the matrix. Since this is a Gauß-Seidel like procedure, remember
+ * to have a good ordering in advance (for transport dominated problems,
+ * Cuthill-McKee algorithms are a good means for this, if points on the inflow
+ * boundary are chosen as starting points for the renumbering).
+ *
+ * For each selected degree of freedom, a local system of equations is built
+ * by the degree of freedom itself and all other values coupling immediately,
+ * i.e. the set of degrees of freedom considered for the local system of
+ * equations for degree of freedom @p i is @p i itself and all @p j such that
+ * the element <tt>(i,j)</tt> is a nonzero entry in the sparse matrix under
+ * consideration. The elements <tt>(j,i)</tt> are not considered. We now pick
+ * all matrix entries from rows and columns out of the set of degrees of
+ * freedom just described out of the global matrix and put it into a local
+ * matrix, which is subsequently inverted. This system may be of different
+ * size for each degree of freedom, depending for example on the local
+ * neighborhood of the respective node on a computational grid.
+ *
+ * The right hand side is built up in the same way, i.e. by copying all
+ * entries that coupled with the one under present consideration, but it is
+ * augmented by all degrees of freedom coupling with the degrees from the set
+ * described above (i.e. the DoFs coupling second order to the present one).
+ * The reason for this is, that the local problems to be solved shall have
+ * Dirichlet boundary conditions on the second order coupling DoFs, so we have
+ * to take them into account but eliminate them before actually solving; this
+ * elimination is done by the modification of the right hand side, and in the
+ * end these degrees of freedom do not occur in the matrix and solution vector
+ * any more at all.
+ *
+ * This local system is solved and the values are updated into the destination
+ * vector.
+ *
+ * Remark: the Vanka method is a non-symmetric preconditioning method.
+ *
+ *
+ * <h3>Example of Use</h3> This little example is taken from a program doing
+ * parameter optimization. The Lagrange multiplier is the third component of
+ * the finite element used. The system is solved by the GMRES method.
+ * @code
+ *    // tag the Lagrange multiplier variable
+ *    vector<bool> signature(3);
+ *    signature[0] = signature[1] = false;
+ *    signature[2] = true;
+ *
+ *    // tag all dofs belonging to the Lagrange multiplier
+ *    vector<bool> selected_dofs (dof.n_dofs(), false);
+ *    DoFTools::extract_dofs(dof, signature, p_select);
+ *    // create the Vanka object
+ *    SparseVanka<double> vanka (global_matrix, selected_dofs);
+ *
+ *    // create the solver
+ *    SolverGMRES<> gmres(control,memory,504);
+ *
+ *    // solve
+ *    gmres.solve (global_matrix, solution, right_hand_side,
+ *                 vanka);
+ * @endcode
+ *
+ *
+ * <h4>Implementor's remark</h4> At present, the local matrices are built up
+ * such that the degree of freedom associated with the local Lagrange
+ * multiplier is the first one. Thus, usually the upper left entry in the
+ * local matrix is zero. It is not clear to me (W.B.) whether this might pose
+ * some problems in the inversion of the local matrices. Maybe someone would
+ * like to check this.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Guido Kanschat, Wolfgang Bangerth; 1999, 2000; extension for full
+ * compatibility with LinearOperator class: Jean-Paul Pelteret, 2015
+ */
+template<typename number>
+class SparseVanka
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Constructor. Does nothing.
+   *
+   * Call the initialize() function before using this object as preconditioner
+   * (vmult()).
+   */
+  SparseVanka ();
+
+  /**
+   * Constructor. Gets the matrix for preconditioning and a bit vector with
+   * entries @p true for all rows to be updated. A reference to this vector
+   * will be stored, so it must persist longer than the Vanka object. The same
+   * is true for the matrix.
+   *
+   * The matrix @p M which is passed here may or may not be the same matrix
+   * for which this object shall act as preconditioner. In particular, it is
+   * conceivable that the preconditioner is build up for one matrix once, but
+   * is used for subsequent steps in a nonlinear process as well, where the
+   * matrix changes in each step slightly.
+   *
+   * If @p conserve_mem is @p false, then the inverses of the local systems
+   * are computed now; if the flag is @p true, then they are computed every
+   * time the preconditioner is applied. This saves some memory, but makes
+   * preconditioning very slow. Note also, that if the flag is @p false, then
+   * the contents of the matrix @p M at the time of calling this constructor
+   * are used, while if the flag is @p true, then the values in @p M at the
+   * time of preconditioning are used. This may lead to different results,
+   * obviously, of @p M changes.
+   *
+   * The parameter @p n_threads determines how many threads shall be used in
+   * parallel when building the inverses of the diagonal blocks. This
+   * parameter is ignored if not in multithreaded mode.
+   */
+  SparseVanka(const SparseMatrix<number> &M,
+              const std::vector<bool>    &selected,
+              const bool                  conserve_memory = false,
+              const unsigned int          n_threads       = MultithreadInfo::n_threads());
+
+  /**
+   * Destructor. Delete all allocated matrices.
+   */
+  ~SparseVanka();
+
+  /**
+   * Parameters for SparseVanka.
+   */
+  class AdditionalData
+  {
+  public:
+    /**
+     * Constructor. For the parameters' description, see below.
+     */
+    AdditionalData (const std::vector<bool> &selected,
+                    const bool               conserve_memory = false,
+                    const unsigned int       n_threads       = MultithreadInfo::n_threads());
+
+    /**
+     * Indices of those degrees of freedom that we shall work on.
+     */
+    const std::vector<bool> &selected;
+
+    /**
+     * Conserve memory flag.
+     */
+    const bool conserve_mem;
+
+    /**
+     * Number of threads to be used when building the inverses. Only relevant
+     * in multithreaded mode.
+     */
+    const unsigned int n_threads;
+  };
+
+
+  /**
+   * If the default constructor is used then this function needs to be called
+   * before an object of this class is used as preconditioner.
+   *
+   * For more detail about possible parameters, see the class documentation
+   * and the documentation of the SparseVanka::AdditionalData class.
+   *
+   * After this function is called the preconditioner is ready to be used
+   * (using the <code>vmult</code> function of derived classes).
+   */
+  void initialize (const SparseMatrix<number> &M,
+                   const AdditionalData       &additional_data);
+
+  /**
+   * Do the preconditioning. This function takes the residual in @p src and
+   * returns the resulting update vector in @p dst.
+   */
+  template<typename number2>
+  void vmult (Vector<number2>       &dst,
+              const Vector<number2> &src) const;
+
+  /**
+   * Apply transpose preconditioner. This function takes the residual in @p
+   * src  and returns the resulting update vector in @p dst.
+   */
+  template<typename number2>
+  void Tvmult (Vector<number2>       &dst,
+               const Vector<number2> &src) const;
+
+  /**
+   * Return the dimension of the codomain (or range) space. To remember: the
+   * matrix is of dimension $m \times n$.
+   *
+   * @note This function should only be called if the preconditioner has been
+   * initialized.
+   */
+  size_type m () const;
+
+  /**
+   * Return the dimension of the domain space. To remember: the matrix is of
+   * dimension $m \times n$.
+   *
+   * @note This function should only be called if the preconditioner has been
+   * initialized.
+   */
+  size_type n () const;
+
+protected:
+  /**
+   * Apply the inverses corresponding to those degrees of freedom that have a
+   * @p true value in @p dof_mask, to the @p src vector and move the result
+   * into @p dst. Actually, only values for allowed indices are written to @p
+   * dst, so the application of this function only does what is announced in
+   * the general documentation if the given mask sets all values to zero
+   *
+   * The reason for providing the mask anyway is that in derived classes we
+   * may want to apply the preconditioner to parts of the matrix only, in
+   * order to parallelize the application. Then, it is important to only write
+   * to some slices of @p dst, in order to eliminate the dependencies of
+   * threads of each other.
+   *
+   * If a null pointer is passed instead of a pointer to the @p dof_mask (as
+   * is the default value), then it is assumed that we shall work on all
+   * degrees of freedom. This is then equivalent to calling the function with
+   * a <tt>vector<bool>(n_dofs,true)</tt>.
+   *
+   * The @p vmult of this class of course calls this function with a null
+   * pointer
+   */
+  template<typename number2>
+  void apply_preconditioner (Vector<number2>         &dst,
+                             const Vector<number2>   &src,
+                             const std::vector<bool> *const dof_mask = 0) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Pointer to the matrix.
+   */
+  SmartPointer<const SparseMatrix<number>,SparseVanka<number> > matrix;
+
+  /**
+   * Conserve memory flag.
+   */
+  bool conserve_mem;
+
+  /**
+   * Indices of those degrees of freedom that we shall work on.
+   */
+  const std::vector<bool> *selected;
+
+  /**
+   * Number of threads to be used when building the inverses. Only relevant in
+   * multithreaded mode.
+   */
+  unsigned int n_threads;
+
+  /**
+   * Array of inverse matrices, one for each degree of freedom. Only those
+   * elements will be used that are tagged in @p selected.
+   */
+  mutable std::vector<SmartPointer<FullMatrix<float>,SparseVanka<number> > > inverses;
+
+  /**
+   * The dimension of the range space.
+   */
+  size_type _m;
+
+  /**
+   * The dimension of the domain space.
+   */
+  size_type _n;
+
+  /**
+   * Compute the inverses of all selected diagonal elements.
+   */
+  void compute_inverses ();
+
+  /**
+   * Compute the inverses at positions in the range <tt>[begin,end)</tt>. In
+   * non-multithreaded mode, <tt>compute_inverses()</tt> calls this function
+   * with the whole range, but in multithreaded mode, several copies of this
+   * function are spawned.
+   */
+  void compute_inverses (const size_type begin,
+                         const size_type end);
+
+  /**
+   * Compute the inverse of the block located at position @p row. Since the
+   * vector is used quite often, it is generated only once in the caller of
+   * this function and passed to this function which first clears it. Reusing
+   * the vector makes the process significantly faster than in the case where
+   * this function re-creates it each time.
+   */
+  void compute_inverse (const size_type         row,
+                        std::vector<size_type> &local_indices);
+
+  /**
+   * Make the derived class a friend. This seems silly, but is actually
+   * necessary, since derived classes can only access non-public members
+   * through their @p this pointer, but not access these members as member
+   * functions of other objects of the type of this base class (i.e. like
+   * <tt>x.f()</tt>, where @p x is an object of the base class, and @p f one
+   * of it's non-public member functions).
+   *
+   * Now, in the case of the @p SparseBlockVanka class, we would like to take
+   * the address of a function of the base class in order to call it through
+   * the multithreading framework, so the derived class has to be a friend.
+   */
+  template <typename T> friend class SparseBlockVanka;
+};
+
+
+
+/**
+ * Block version of the sparse Vanka preconditioner. This class divides the
+ * matrix into blocks and works on the diagonal blocks only, which of course
+ * reduces the efficiency as preconditioner, but is perfectly parallelizable.
+ * The constructor takes a parameter into how many blocks the matrix shall be
+ * subdivided and then lets the underlying class do the work. Division of the
+ * matrix is done in several ways which are described in detail below.
+ *
+ * This class is probably useless if you don't have a multiprocessor system,
+ * since then the amount of work per preconditioning step is the same as for
+ * the @p SparseVanka class, but preconditioning properties are worse. On the
+ * other hand, if you have a multiprocessor system, the worse preconditioning
+ * quality (leading to more iterations of the linear solver) usually is well
+ * balanced by the increased speed of application due to the parallelization,
+ * leading to an overall decrease in elapsed wall-time for solving your linear
+ * system. It should be noted that the quality as preconditioner reduces with
+ * growing number of blocks, so there may be an optimal value (in terms of
+ * wall-time per linear solve) for the number of blocks.
+ *
+ * To facilitate writing portable code, if the number of blocks into which the
+ * matrix is to be subdivided, is set to one, then this class acts just like
+ * the @p SparseVanka class. You may therefore want to set the number of
+ * blocks equal to the number of processors you have.
+ *
+ * Note that the parallelization is done if <tt>deal.II</tt> was configured
+ * for multithread use and that the number of threads which is spawned equals
+ * the number of blocks. This is reasonable since you will not want to set the
+ * number of blocks unnecessarily large, since, as mentioned, this reduces the
+ * preconditioning properties.
+ *
+ *
+ * <h3>Splitting the matrix into blocks</h3>
+ *
+ * Splitting the matrix into blocks is always done in a way such that the
+ * blocks are not necessarily of equal size, but such that the number of
+ * selected degrees of freedom for which a local system is to be solved is
+ * equal between blocks. The reason for this strategy to subdivision is load-
+ * balancing for multithreading. There are several possibilities to actually
+ * split the matrix into blocks, which are selected by the flag @p
+ * blocking_strategy that is passed to the constructor. By a block, we will in
+ * the sequel denote a list of indices of degrees of freedom; the algorithm
+ * will work on each block separately, i.e. the solutions of the local systems
+ * corresponding to a degree of freedom of one block will only be used to
+ * update the degrees of freedom belonging to the same block, but never to
+ * update degrees of freedoms of other blocks. A block can be a consecutive
+ * list of indices, as in the first alternative below, or a nonconsecutive
+ * list of indices. Of course, we assume that the intersection of each two
+ * blocks is empty and that the union of all blocks equals the interval
+ * <tt>[0,N)</tt>, where @p N is the number of degrees of freedom of the
+ * system of equations.
+ *
+ * <ul>
+ * <li> @p index_intervals: Here, we chose the blocks to be intervals
+ * <tt>[a_i,a_{i+1</tt>)}, i.e. consecutive degrees of freedom are usually
+ * also within the same block. This is a reasonable strategy, if the degrees
+ * of freedom have, for example, be re-numbered using the Cuthill-McKee
+ * algorithm, in which spatially neighboring degrees of freedom have
+ * neighboring indices. In that case, coupling in the matrix is usually
+ * restricted to the vicinity of the diagonal as well, and we can simply cut
+ * the matrix into blocks.
+ *
+ * The bounds of the intervals, i.e. the @p a_i above, are chosen such that
+ * the number of degrees of freedom on which we shall work (i.e. usually the
+ * degrees of freedom corresponding to Lagrange multipliers) is about the same
+ * in each block; this does not mean, however, that the sizes of the blocks
+ * are equal, since the blocks also comprise the other degrees of freedom for
+ * which no local system is solved. In the extreme case, consider that all
+ * Lagrange multipliers are sorted to the end of the range of DoF indices,
+ * then the first block would be very large, since it comprises all other DoFs
+ * and some Lagrange multipliers, while all other blocks are rather small and
+ * comprise only Langrange multipliers. This strategy therefore does not only
+ * depend on the order in which the Lagrange DoFs are sorted, but also on the
+ * order in which the other DoFs are sorted. It is therefore necessary to note
+ * that this almost renders the capability as preconditioner useless if the
+ * degrees of freedom are numbered by component, i.e. all Lagrange multipliers
+ * en bloc.
+ *
+ * <li> @p adaptive: This strategy is a bit more clever in cases where the
+ * Langrange DoFs are clustered, as in the example above. It works as follows:
+ * it first groups the Lagrange DoFs into blocks, using the same strategy as
+ * above. However, instead of grouping the other DoFs into the blocks of
+ * Lagrange DoFs with nearest DoF index, it decides for each non-Lagrange DoF
+ * to put it into the block of Lagrange DoFs which write to this non-Lagrange
+ * DoF most often. This makes it possible to even sort the Lagrange DoFs to
+ * the end and still associate spatially neighboring non-Lagrange DoFs to the
+ * same blocks where the respective Lagrange DoFs are, since they couple to
+ * each other while spatially distant DoFs don't couple.
+ *
+ * The additional computational effort to sorting the non-Lagrange DoFs is not
+ * very large compared with the inversion of the local systems and applying
+ * the preconditioner, so this strategy might be reasonable if you want to
+ * sort your degrees of freedom by component. If the degrees of freedom are
+ * not sorted by component, the results of the both strategies outlined above
+ * does not differ much. However, unlike the first strategy, the performance
+ * of the second strategy does not deteriorate if the DoFs are renumbered by
+ * component.
+ * </ul>
+ *
+ *
+ * <h3>Typical results</h3>
+ *
+ * As a prototypical test case, we use a nonlinear problem from optimization,
+ * which leads to a series of saddle point problems, each of which is solved
+ * using GMRES with Vanka as preconditioner. The equation had approx. 850
+ * degrees of freedom. With the non-blocked version @p SparseVanka (or @p
+ * SparseBlockVanka with <tt>n_blocks==1</tt>), the following numbers of
+ * iterations is needed to solver the linear system in each nonlinear step:
+ * @verbatim
+ *   101 68 64 53 35 21
+ * @endverbatim
+ *
+ * With four blocks, we need the following numbers of iterations
+ * @verbatim
+ *   124 88 83 66 44 28
+ * @endverbatim
+ * As can be seen, more iterations are needed. However, in terms of computing
+ * time, the first version needs 72 seconds wall time (and 79 seconds CPU
+ * time, which is more than wall time since some other parts of the program
+ * were parallelized as well), while the second version needed 53 second wall
+ * time (and 110 seconds CPU time) on a four processor machine. The total time
+ * is in both cases dominated by the linear solvers. In this case, it is
+ * therefore worth while using the blocked version of the preconditioner if
+ * wall time is more important than CPU time.
+ *
+ * The results with the block version above were obtained with the first
+ * blocking strategy and the degrees of freedom were not numbered by
+ * component. Using the second strategy does not much change the numbers of
+ * iterations (at most by one in each step) and they also do not change when
+ * the degrees of freedom are sorted by component, while the first strategy
+ * significantly deteriorated.
+ *
+ * @author Wolfgang Bangerth, 2000
+ */
+template<typename number>
+class SparseBlockVanka : public SparseVanka<number>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Enumeration of the different methods by which the DoFs are distributed to
+   * the blocks on which we are to work.
+   */
+  enum BlockingStrategy
+  {
+    index_intervals, adaptive
+  };
+
+  /**
+   * Constructor. Pass all arguments except for @p n_blocks to the base class.
+   */
+  SparseBlockVanka (const SparseMatrix<number> &M,
+                    const std::vector<bool>    &selected,
+                    const unsigned int          n_blocks,
+                    const BlockingStrategy      blocking_strategy,
+                    const bool                  conserve_memory = false,
+                    const unsigned int          n_threads       = MultithreadInfo::n_threads());
+
+  /**
+   * Apply the preconditioner.
+   */
+  template<typename number2>
+  void vmult (Vector<number2>       &dst,
+              const Vector<number2> &src) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Store the number of blocks.
+   */
+  const unsigned int n_blocks;
+
+  /**
+   * In this field, we precompute for each block which degrees of freedom
+   * belong to it. Thus, if <tt>dof_masks[i][j]==true</tt>, then DoF @p j
+   * belongs to block @p i. Of course, no other <tt>dof_masks[l][j]</tt> may
+   * be @p true for <tt>l!=i</tt>. This computation is done in the
+   * constructor, to avoid recomputing each time the preconditioner is called.
+   */
+  std::vector<std::vector<bool> > dof_masks;
+
+  /**
+   * Compute the contents of the field @p dof_masks. This function is called
+   * from the constructor.
+   */
+  void compute_dof_masks (const SparseMatrix<number> &M,
+                          const std::vector<bool>    &selected,
+                          const BlockingStrategy      blocking_strategy);
+};
+
+/*@}*/
+/* ---------------------------------- Inline functions ------------------- */
+
+#ifndef DOXYGEN
+
+template<typename number>
+inline typename SparseVanka<number>::size_type
+SparseVanka<number>::m () const
+{
+  Assert(_m != 0, ExcNotInitialized());
+  return _m;
+}
+
+template<typename number>
+inline typename SparseVanka<number>::size_type
+SparseVanka<number>::n () const
+{
+  Assert(_n != 0, ExcNotInitialized());
+  return _n;
+}
+
+template<typename number>
+template<typename number2>
+inline void
+SparseVanka<number>::Tvmult (Vector<number2>       &dst,
+                             const Vector<number2> &src) const
+{
+  AssertThrow(false, ExcNotImplemented());
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/sparse_vanka.templates.h b/include/deal.II/lac/sparse_vanka.templates.h
new file mode 100644
index 0000000..112ef7e
--- /dev/null
+++ b/include/deal.II/lac/sparse_vanka.templates.h
@@ -0,0 +1,671 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparse_vanka_templates_h
+#define dealii__sparse_vanka_templates_h
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/sparse_vanka.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/vector.h>
+
+#include <algorithm>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<typename number>
+SparseVanka<number>::SparseVanka()
+  :
+  matrix (),
+  conserve_mem (false),
+  selected (),
+  n_threads (0),
+  inverses (),
+  _m (0),
+  _n (0)
+{
+
+}
+
+template<typename number>
+SparseVanka<number>::SparseVanka(const SparseMatrix<number> &M,
+                                 const std::vector<bool>    &selected_dofs,
+                                 const bool                  conserve_mem,
+                                 const unsigned int          n_threads)
+  :
+  matrix (&M, typeid(*this).name()),
+  conserve_mem (conserve_mem),
+  selected (&selected_dofs),
+  n_threads (n_threads),
+  inverses (M.m(), 0),
+  _m (M.m()),
+  _n (M.n())
+{
+  Assert (M.m() == M.n(), ExcNotQuadratic ());
+  Assert (M.m() == selected->size(), ExcDimensionMismatch(M.m(), selected->size()));
+
+  if (conserve_mem == false)
+    compute_inverses ();
+}
+
+
+template<typename number>
+SparseVanka<number>::~SparseVanka()
+{
+  typename std::vector<SmartPointer<FullMatrix<float>,SparseVanka<number> > >::iterator i;
+  for (i=inverses.begin(); i!=inverses.end(); ++i)
+    {
+      FullMatrix<float> *p = *i;
+      *i = 0;
+      if (p != 0) delete p;
+    }
+}
+
+
+template<typename number>
+void
+SparseVanka<number>::initialize(const SparseMatrix<number> &M,
+                                const AdditionalData       &additional_data)
+{
+  matrix = &M;
+  conserve_mem = additional_data.conserve_mem;
+  selected = &(additional_data.selected);
+  n_threads = additional_data.n_threads;
+  inverses.resize(M.m());
+  _m = M.m();
+  _n = M.n();
+
+  Assert (M.m() == M.n(), ExcNotQuadratic ());
+  Assert (M.m() == selected->size(), ExcDimensionMismatch(M.m(), selected->size()));
+
+  if (conserve_mem == false)
+    compute_inverses ();
+}
+
+template <typename number>
+void
+SparseVanka<number>::compute_inverses ()
+{
+  Assert(matrix != 0, ExcNotInitialized());
+  Assert(selected != 0, ExcNotInitialized());
+
+#ifndef DEAL_II_WITH_THREADS
+  compute_inverses (0, matrix->m());
+#else
+  const size_type n_inverses = std::count (selected->begin(),
+                                           selected->end(),
+                                           true);
+
+  const size_type n_inverses_per_thread = std::max(n_inverses / n_threads,
+                                                   static_cast<size_type> (1U));
+
+  // set up start and end index
+  // for each of the
+  // threads. note that we have
+  // to work somewhat to get this
+  // appropriate, since the
+  // indices for which inverses
+  // have to be computed may not
+  // be evenly distributed in the
+  // vector. as an extreme
+  // example consider numbering
+  // of DoFs by component, then
+  // all indices for which we
+  // have to do work will be
+  // consecutive, with other
+  // consecutive regions where we
+  // do not have to do something
+  std::vector<std::pair<size_type, unsigned int> > blocking (n_threads);
+
+  unsigned int c       = 0;
+  unsigned int thread  = 0;
+  blocking[0].first = 0;
+
+  for (size_type i=0; (i<matrix->m()) && (thread+1<n_threads); ++i)
+    {
+      if ((*selected)[i] == true)
+        ++c;
+      if (c == n_inverses_per_thread)
+        {
+          blocking[thread].second  = i;
+          blocking[thread+1].first = i;
+          ++thread;
+
+          c = 0;
+        };
+    };
+  blocking[n_threads-1].second = matrix->m();
+
+  typedef void (SparseVanka<number>::*FunPtr)(const size_type,
+                                              const size_type);
+  const FunPtr fun_ptr = &SparseVanka<number>::compute_inverses;
+
+  // Now spawn the threads
+  Threads::ThreadGroup<> threads;
+  for (unsigned int i=0; i<n_threads; ++i)
+    threads += Threads::new_thread (fun_ptr, *this,
+                                    blocking[i].first,
+                                    blocking[i].second);
+  threads.join_all ();
+#endif
+}
+
+
+template <typename number>
+void
+SparseVanka<number>::compute_inverses (const size_type begin,
+                                       const size_type end)
+{
+  // set-up the vector that will be used
+  // by the functions which we call
+  // below.
+  std::vector<size_type> local_indices;
+
+  // traverse all rows of the matrix
+  // which are selected
+  for (size_type row=begin; row<end; ++row)
+    if ((*selected)[row] == true)
+      compute_inverse (row, local_indices);
+}
+
+
+
+template <typename number>
+void
+SparseVanka<number>::compute_inverse (const size_type         row,
+                                      std::vector<size_type> &local_indices)
+{
+  Assert(matrix != 0, ExcNotInitialized());
+  Assert(selected != 0, ExcNotInitialized());
+
+  // first define an alias to the sparsity
+  // pattern of the matrix, since this
+  // will be used quite often
+  const SparsityPattern &structure
+    = matrix->get_sparsity_pattern();
+
+  const size_type row_length = structure.row_length(row);
+
+  inverses[row] = new FullMatrix<float> (row_length, row_length);
+
+  // collect the dofs that couple
+  // with @p row
+  local_indices.resize (row_length);
+  for (size_type i=0; i<row_length; ++i)
+    local_indices[i] = structure.column_number(row, i);
+
+  // Build local matrix
+  inverses[row]->extract_submatrix_from (*matrix,
+                                         local_indices,
+                                         local_indices);
+
+  // Compute inverse
+  inverses[row]->gauss_jordan();
+}
+
+
+template<typename number>
+template<typename number2>
+void
+SparseVanka<number>::vmult (Vector<number2>       &dst,
+                            const Vector<number2> &src) const
+{
+  Assert(matrix != 0, ExcNotInitialized());
+  Assert(selected != 0, ExcNotInitialized());
+
+  // first set output vector to zero
+  dst = 0;
+  // then pass on to the function
+  // that actually does the work
+  apply_preconditioner (dst, src);
+}
+
+
+template<typename number>
+template<typename number2>
+void
+SparseVanka<number>::apply_preconditioner (Vector<number2>         &dst,
+                                           const Vector<number2>   &src,
+                                           const std::vector<bool> *const dof_mask) const
+{
+  Assert (dst.size() == src.size(),
+          ExcDimensionMismatch(dst.size(), src.size()));
+  Assert (dst.size() == matrix->m(),
+          ExcDimensionMismatch(dst.size(), src.size()));
+
+  // first define an alias to the sparsity
+  // pattern of the matrix, since this
+  // will be used quite often
+  const SparsityPattern &structure
+    = matrix->get_sparsity_pattern();
+
+
+  // store whether we shall work on
+  // the whole matrix, or only on
+  // blocks. this variable is used to
+  // optimize access to vectors a
+  // little bit.
+  const bool range_is_restricted = (dof_mask != 0);
+
+  // space to be used for local
+  // systems. allocate as much memory
+  // as is the maximum. this
+  // eliminates the need to
+  // re-allocate memory inside the
+  // loop.
+  FullMatrix<float> local_matrix (structure.max_entries_per_row(),
+                                  structure.max_entries_per_row());
+  Vector<float> b (structure.max_entries_per_row());
+  Vector<float> x (structure.max_entries_per_row());
+
+  std::map<size_type, size_type> local_index;
+
+  // traverse all rows of the matrix
+  // which are selected
+  const size_type n = matrix->m();
+  for (size_type row=0; row<n; ++row)
+    if (((*selected)[row] == true) &&
+        ((range_is_restricted == false) || ((*dof_mask)[row] == true)))
+      {
+        const size_type row_length = structure.row_length(row);
+
+        // if we don't store the
+        // inverse matrices, then alias
+        // the entry in the global
+        // vector to the local matrix
+        // to be used
+        if (conserve_mem == true)
+          {
+            inverses[row] = &local_matrix;
+            inverses[row]->reinit (row_length, row_length);
+          };
+
+        b.reinit (row_length);
+        x.reinit (row_length);
+        // mapping between:
+        // 1 column number of all
+        //   entries in this row, and
+        // 2 the position within this
+        //   row (as stored in the
+        //   SparsityPattern object
+        //
+        // since we do not explicitly
+        // consider nonsymmetric sparsity
+        // patterns, the first element
+        // of each entry simply denotes
+        // all degrees of freedom that
+        // couple with @p row.
+        local_index.clear ();
+        for (size_type i=0; i<row_length; ++i)
+          local_index.insert(std::pair<size_type, size_type>
+                             (structure.column_number(row, i), i));
+
+        // Build local matrix and rhs
+        for (std::map<size_type, size_type>::const_iterator is=local_index.begin();
+             is!=local_index.end(); ++is)
+          {
+            // irow loops over all DoFs that
+            // couple with the present DoF
+            const size_type irow = is->first;
+            // index of DoF irow in the matrix
+            // row corresponding to DoF @p row.
+            // runs between 0 and row_length
+            const size_type i = is->second;
+
+            // copy rhs
+            b(i) = src(irow);
+
+            // for all the DoFs that irow
+            // couples with
+            // number of DoFs coupling to
+            // irow (including irow itself)
+            for (typename SparseMatrix<number>::const_iterator p=matrix->begin(irow);
+                 p != matrix->end(irow); ++p)
+              {
+                // find out whether this DoF
+                // (that couples with @p irow,
+                // which itself couples with
+                // @p row) also couples with
+                // @p row.
+                const std::map<size_type, size_type>::const_iterator js
+                  = local_index.find(p->column());
+                // if not, then still use
+                // this dof to modify the rhs
+                //
+                // note that if so, we already
+                // have copied the entry above
+                if (js == local_index.end())
+                  {
+                    if (!range_is_restricted ||
+                        ((*dof_mask)[p->column()] == true))
+                      b(i) -= p->value() * dst(p->column());
+                  }
+                else
+                  // if so, then build the
+                  // matrix out of it
+                  if (conserve_mem == true)
+                    (*inverses[row])(i,js->second) = p->value();
+              }
+          }
+
+        // Compute new values
+        if (conserve_mem == true)
+          inverses[row]->gauss_jordan();
+
+        // apply preconditioner
+        inverses[row]->vmult(x,b);
+
+        // Distribute new values
+        for (std::map<size_type, size_type>::const_iterator is=local_index.begin();
+             is!=local_index.end(); ++is)
+          {
+            const size_type irow = is->first;
+            const size_type i = is->second;
+
+            if (!range_is_restricted ||
+                ((*dof_mask)[irow] == true))
+              dst(irow) = x(i);
+            // do nothing if not in
+            // the range
+          }
+
+        // if we don't store the
+        // inverses, then unalias the
+        // local matrix
+        if (conserve_mem == true)
+          inverses[row] = 0;
+      }
+}
+
+
+
+template <typename number>
+std::size_t
+SparseVanka<number>::memory_consumption () const
+{
+  std::size_t mem = (sizeof(*this) +
+                     MemoryConsumption::memory_consumption (*selected));
+  for (size_type i=0; i<inverses.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (*inverses[i]);
+
+  return mem;
+}
+
+
+template <typename number>
+SparseVanka<number>::AdditionalData::AdditionalData (
+  const std::vector<bool> &selected,
+  const bool               conserve_mem,
+  const unsigned int       n_threads)
+  :
+  selected(selected),
+  conserve_mem (conserve_mem),
+  n_threads (n_threads)
+{}
+
+
+//---------------------------------------------------------------------------
+
+
+template <typename number>
+SparseBlockVanka<number>::SparseBlockVanka (const SparseMatrix<number> &M,
+                                            const std::vector<bool>    &selected,
+                                            const unsigned int          n_blocks,
+                                            const BlockingStrategy      blocking_strategy,
+                                            const bool                  conserve_memory,
+                                            const unsigned int          n_threads)
+  :
+  SparseVanka<number> (M, selected, conserve_memory, n_threads),
+  n_blocks (n_blocks),
+  dof_masks (n_blocks,
+             std::vector<bool>(M.m(), false))
+{
+  compute_dof_masks (M, selected, blocking_strategy);
+}
+
+
+template <typename number>
+void
+SparseBlockVanka<number>::compute_dof_masks (const SparseMatrix<number> &M,
+                                             const std::vector<bool>    &selected,
+                                             const BlockingStrategy      blocking_strategy)
+{
+  Assert (n_blocks > 0, ExcInternalError());
+
+  const size_type n_inverses = std::count (selected.begin(),
+                                           selected.end(),
+                                           true);
+
+  const size_type n_inverses_per_block = std::max(n_inverses / n_blocks,
+                                                  static_cast<size_type> (1U));
+
+  // precompute the splitting points
+  std::vector<std::pair<size_type, size_type> > intervals (n_blocks);
+
+  // set up start and end index for
+  // each of the blocks. note that
+  // we have to work somewhat to get
+  // this appropriate, since the
+  // indices for which inverses have
+  // to be computed may not be evenly
+  // distributed in the vector. as an
+  // extreme example consider
+  // numbering of DoFs by component,
+  // then all indices for which we
+  // have to do work will be
+  // consecutive, with other
+  // consecutive regions where we do
+  // not have to do something
+  if (true)
+    {
+      unsigned int c       = 0;
+      unsigned int block   = 0;
+      intervals[0].first   = 0;
+
+      for (size_type i=0; (i<M.m()) && (block+1<n_blocks); ++i)
+        {
+          if (selected[i] == true)
+            ++c;
+          if (c == n_inverses_per_block)
+            {
+              intervals[block].second  = i;
+              intervals[block+1].first = i;
+              ++block;
+
+              c = 0;
+            };
+        };
+      intervals[n_blocks-1].second = M.m();
+    };
+
+  // now transfer the knowledge on
+  // the splitting points into the
+  // vector<bool>s that the base
+  // class wants to see. the way how
+  // we do this depends on the
+  // requested blocking strategy
+  switch (blocking_strategy)
+    {
+    case index_intervals:
+    {
+      for (unsigned int block=0; block<n_blocks; ++block)
+        std::fill_n (dof_masks[block].begin()+intervals[block].first,
+                     intervals[block].second - intervals[block].first,
+                     true);
+      break;
+    };
+
+    case adaptive:
+    {
+      // the splitting points for
+      // the DoF have been computed
+      // above already, but we will
+      // only use them to split the
+      // Lagrange dofs into
+      // blocks. splitting the
+      // remaining dofs will be
+      // done now.
+
+      // first count how often the
+      // Lagrange dofs of each
+      // block access the different
+      // dofs
+      Table<2,size_type> access_count (n_blocks, M.m());
+
+      // set-up the map that will
+      // be used to store the
+      // indices each Lagrange dof
+      // accesses
+      std::map<size_type, size_type> local_index;
+      const SparsityPattern &structure = M.get_sparsity_pattern();
+
+      for (size_type row=0; row<M.m(); ++row)
+        if (selected[row] == true)
+          {
+            // first find out to
+            // which block the
+            // present row belongs
+            unsigned int block_number = 0;
+            while (row>=intervals[block_number].second)
+              ++block_number;
+            Assert (block_number < n_blocks, ExcInternalError());
+
+            // now traverse the
+            // matrix structure to
+            // find out to which
+            // dofs number the
+            // present index wants
+            // to write
+            const size_type row_length = structure.row_length(row);
+            for (size_type i=0; i<row_length; ++i)
+              ++access_count[block_number][structure.column_number(row, i)];
+          };
+
+      // now we know that block @p i
+      // wants to write to DoF @p j
+      // as often as
+      // <tt>access_count[i][j]</tt>
+      // times. Let @p j be allotted
+      // to the block which
+      // accesses it most often.
+      //
+      // if it is a Lagrange dof,
+      // the of course we leave it
+      // to the block we put it
+      // into in the first place
+      for (size_type row=0; row<M.m(); ++row)
+        if (selected[row] == true)
+          {
+            unsigned int block_number = 0;
+            while (row>=intervals[block_number].second)
+              ++block_number;
+
+            dof_masks[block_number][row] = true;
+          }
+        else
+          {
+            // find out which block
+            // accesses this dof
+            // the most often
+            size_type max_accesses        = 0;
+            unsigned int max_access_block = 0;
+            for (unsigned int block=0; block<n_blocks; ++block)
+              if (access_count[block][row] > max_accesses)
+                {
+                  max_accesses = access_count[block][row];
+                  max_access_block = block;
+                };
+            dof_masks[max_access_block][row] = true;
+          };
+
+      break;
+    };
+
+    default:
+      Assert (false, ExcInternalError());
+    };
+}
+
+
+
+template <typename number>
+template <typename number2>
+void SparseBlockVanka<number>::vmult (Vector<number2>       &dst,
+                                      const Vector<number2> &src) const
+{
+  dst = 0;
+
+  // if no blocking is required, pass
+  // down to the underlying class
+  if (n_blocks == 1)
+    this->apply_preconditioner (dst, src);
+  else
+    // otherwise: blocking requested
+    {
+#ifdef DEAL_II_WITH_THREADS
+      // spawn threads. since
+      // some compilers have
+      // trouble finding out
+      // which 'encapsulate'
+      // function to take of all
+      // those possible ones if
+      // we simply drop in the
+      // address of an overloaded
+      // template member
+      // function, make it
+      // simpler for the compiler
+      // by giving it the correct
+      // type right away:
+      typedef void (SparseVanka<number>::*mem_fun_p)
+      (Vector<number2> &,
+       const Vector<number2> &,
+       const std::vector<bool> *const) const;
+      const mem_fun_p comp
+        = &SparseVanka<number>::template apply_preconditioner<number2>;
+      Threads::ThreadGroup<> threads;
+      for (unsigned int block=0; block<n_blocks; ++block)
+        threads += Threads::new_thread (comp,
+                                        *static_cast<const SparseVanka<number>*>(this),
+                                        dst, src,&dof_masks[block]);
+      threads.join_all ();
+#else
+      for (unsigned int block=0; block<n_blocks; ++block)
+        this->apply_preconditioner (dst, src,
+                                    &dof_masks[block]);
+#endif
+    }
+}
+
+
+
+template <typename number>
+std::size_t
+SparseBlockVanka<number>::memory_consumption () const
+{
+  std::size_t mem = SparseVanka<number>::memory_consumption();
+  for (size_type i=0; i<dof_masks.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (dof_masks[i]);
+  return mem;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/sparsity_pattern.h b/include/deal.II/lac/sparsity_pattern.h
new file mode 100644
index 0000000..3403d76
--- /dev/null
+++ b/include/deal.II/lac/sparsity_pattern.h
@@ -0,0 +1,1569 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparsity_pattern_h
+#define dealii__sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <boost/serialization/array.hpp>
+#include <boost/serialization/split_member.hpp>
+
+#include <vector>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+class SparsityPattern;
+class ChunkSparsityPattern;
+template <typename number> class FullMatrix;
+template <typename number> class SparseMatrix;
+template <typename number> class SparseLUDecomposition;
+template <typename number> class SparseILU;
+template <typename VectorType> class VectorSlice;
+
+namespace ChunkSparsityPatternIterators
+{
+  class Accessor;
+}
+
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+namespace internals
+{
+  namespace SparsityPatternTools
+  {
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    /**
+     * Helper function to get the column index from a dereferenced iterator in
+     * the copy_from() function, if the inner iterator type points to plain
+     * unsigned integers.
+     */
+    size_type
+    get_column_index_from_iterator (const size_type i);
+
+    /**
+     * Helper function to get the column index from a dereferenced iterator in
+     * the copy_from() function, if the inner iterator type points to pairs of
+     * unsigned integers and some other value.
+     */
+    template <typename value>
+    size_type
+    get_column_index_from_iterator (const std::pair<size_type, value> &i);
+
+    /**
+     * Likewise, but sometimes needed for certain types of containers that
+     * make the first element of the pair constant (such as
+     * <tt>std::map</tt>).
+     */
+    template <typename value>
+    size_type
+    get_column_index_from_iterator (const std::pair<const size_type, value> &i);
+
+  }
+}
+
+
+/**
+ * Iterators on objects of type SparsityPattern.
+ */
+namespace SparsityPatternIterators
+{
+  // forward declaration
+  class Iterator;
+
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Accessor class for iterators into sparsity patterns. This class is also
+   * the base class for both const and non-const accessor classes into sparse
+   * matrices.
+   *
+   * Note that this class only allows read access to elements, providing their
+   * row and column number (or alternatively the index within the complete
+   * sparsity pattern). It does not allow modifying the sparsity pattern
+   * itself.
+   *
+   * @author Wolfgang Bangerth
+   * @date 2004
+   */
+  class Accessor
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    Accessor (const SparsityPattern *matrix,
+              const std::size_t      index_within_sparsity);
+
+    /**
+     * Constructor. Construct the end accessor for the given sparsity pattern.
+     */
+    Accessor (const SparsityPattern *matrix);
+
+    /**
+     * Row number of the element represented by this object. This function can
+     * only be called for entries for which is_valid_entry() is true.
+     */
+    size_type row () const;
+
+    /**
+     * Index within the current row of the element represented by this object.
+     * This function can only be called for entries for which is_valid_entry()
+     * is true.
+     */
+    size_type index () const;
+
+    /**
+     * Column number of the element represented by this object. This function
+     * can only be called for entries for which is_valid_entry() is true.
+     */
+    size_type column () const;
+
+    /**
+     * Return whether the sparsity pattern entry pointed to by this iterator
+     * is valid or not. Note that after compressing the sparsity pattern, all
+     * entries are valid. However, before compression, the sparsity pattern
+     * allocated some memory to be used while still adding new nonzero
+     * entries; if you create iterators in this phase of the sparsity
+     * pattern's lifetime, you will iterate over elements that are not valid.
+     * If this is so, then this function will return false.
+     */
+    bool is_valid_entry () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Accessor &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * sparsity pattern.
+     */
+    bool operator < (const Accessor &) const;
+
+  protected:
+    /**
+     * The sparsity pattern we operate on accessed.
+     */
+    const SparsityPattern *sparsity_pattern;
+
+    /**
+     * Index in global sparsity pattern. This index represents the location
+     * the iterator/accessor points to within the array of the SparsityPattern
+     * class that stores the column numbers. It is also the index within the
+     * values array of a sparse matrix that stores the corresponding value of
+     * this site.
+     */
+    std::size_t index_within_sparsity;
+
+    /**
+     * Move the accessor to the next nonzero entry in the matrix.
+     */
+    void advance ();
+
+    /**
+     * Grant access to iterator class.
+     */
+    friend class Iterator;
+
+    /**
+     * Grant access to accessor class of ChunkSparsityPattern.
+     */
+    friend class ChunkSparsityPatternIterators::Accessor;
+  };
+
+
+
+  /**
+   * An iterator class for walking over the elements of a sparsity pattern.
+   *
+   * The typical use for these iterators is to iterate over the elements of a
+   * sparsity pattern (or, since they also serve as the basis for iterating
+   * over the elements of an associated matrix, over the elements of a sparse
+   * matrix), or over the elements of individual rows. There is no guarantee
+   * that the elements of a row are actually traversed in an order in which
+   * column numbers monotonically increase. See the documentation of the
+   * SparsityPattern class for more information.
+   *
+   * @note This class operates directly on the internal data structures of the
+   * SparsityPattern class. As a consequence, some operations are cheap and
+   * some are not. In particular, it is cheap to access the column index of
+   * the sparsity pattern entry pointed to. On the other hand, it is expensive
+   * to access the row index (this requires $O(\log(N))$ operations for a
+   * matrix with $N$ row). As a consequence, when you design algorithms that
+   * use these iterators, it is common practice to not loop over <i>all</i>
+   * elements of a sparsity pattern at once, but to have an outer loop over
+   * all rows and within this loop iterate over the elements of this row. This
+   * way, you only ever need to dereference the iterator to obtain the column
+   * indices whereas the (expensive) lookup of the row index can be avoided by
+   * using the loop index instead.
+   */
+  class Iterator
+  {
+  public:
+    /**
+     * Constructor. Create an iterator into the sparsity pattern @p sp for the
+     * given global index (i.e., the index of the given element counting from
+     * the zeroth row).
+     */
+    Iterator (const SparsityPattern *sp,
+              const std::size_t      index_within_sparsity);
+
+    /**
+     * Prefix increment.
+     */
+    Iterator &operator++ ();
+
+    /**
+     * Postfix increment.
+     */
+    Iterator operator++ (int);
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor &operator* () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    const Accessor *operator-> () const;
+
+    /**
+     * Comparison. True, if both iterators point to the same matrix position.
+     */
+    bool operator == (const Iterator &) const;
+
+    /**
+     * Inverse of <tt>==</tt>.
+     */
+    bool operator != (const Iterator &) const;
+
+    /**
+     * Comparison operator. Result is true if either the first row number is
+     * smaller or if the row numbers are equal and the first index is smaller.
+     *
+     * This function is only valid if both iterators point into the same
+     * matrix.
+     */
+    bool operator < (const Iterator &) const;
+
+    /**
+     * Return the distance between the current iterator and the argument. The
+     * distance is given by how many times one has to apply operator++ to the
+     * current iterator to get the argument (for a positive return value), or
+     * operator-- (for a negative return value).
+     */
+    int operator - (const Iterator &p) const;
+
+  private:
+    /**
+     * Store an object of the accessor class.
+     */
+    Accessor accessor;
+  };
+}
+
+
+
+/**
+ * A class that can store which elements of a matrix are nonzero (or, in fact,
+ * <i>may</i> be nonzero) and for which we have to allocate memory to store
+ * their values. This class is an example of the "static" type of sparsity
+ * patters (see
+ * @ref Sparsity).
+ * It uses the <a
+ * href="https://en.wikipedia.org/wiki/Sparse_matrix">compressed row storage
+ * (CSR)</a> format to store data, and is used as the basis for the
+ * SparseMatrix class.
+ *
+ * The elements of a SparsityPattern, corresponding to the places where
+ * SparseMatrix objects can store nonzero entries, are stored row-by-row.
+ * Within each row, elements are generally stored left-to-right in increasing
+ * column index order; the exception to this rule is that if the matrix is
+ * square (n_rows() == n_columns()), then the diagonal entry is stored as the
+ * first element in each row to make operations like applying a Jacobi or SSOR
+ * preconditioner faster. As a consequence, if you traverse the elements of a
+ * row of a SparsityPattern with the help of iterators into this object (using
+ * SparsityPattern::begin and SparsityPattern::end) you will find that the
+ * elements are not sorted by column index within each row whenever the matrix
+ * is square (the first item will be the diagonal, followed by the other
+ * entries sorted by column index).
+ *
+ * @note While this class forms the basis upon which SparseMatrix objects base
+ * their storage format, and thus plays a central role in setting up linear
+ * systems, it is rarely set up directly due to the way it stores its
+ * information. Rather, one typically goes through an intermediate format
+ * first, see for example the step-2 tutorial program as well as the
+ * documentation module
+ * @ref Sparsity.
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat and others
+ */
+class SparsityPattern : public Subscriptor
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Typedef an iterator class that allows to walk over all nonzero elements
+   * of a sparsity pattern.
+   */
+  typedef
+  SparsityPatternIterators::Iterator
+  const_iterator;
+
+  /**
+   * Typedef an iterator class that allows to walk over all nonzero elements
+   * of a sparsity pattern.
+   *
+   * Since the iterator does not allow to modify the sparsity pattern, this
+   * type is the same as that for @p const_iterator.
+   */
+  typedef
+  SparsityPatternIterators::Iterator
+  iterator;
+
+
+  /**
+   * Define a value which is used to indicate that a certain value in the
+   * #colnums array is unused, i.e. does not represent a certain column number
+   * index.
+   *
+   * Indices with this invalid value are used to insert new entries to the
+   * sparsity pattern using the add() member function, and are removed when
+   * calling compress().
+   *
+   * You should not assume that the variable declared here has a certain
+   * value. The initialization is given here only to enable the compiler to
+   * perform some optimizations, but the actual value of the variable may
+   * change over time.
+   */
+  static const size_type invalid_entry = numbers::invalid_size_type;
+
+  /**
+   * @name Construction and setup Constructors, destructor; functions
+   * initializing, copying and filling an object.
+   */
+// @{
+  /**
+   * Initialize the matrix empty, that is with no memory allocated. This is
+   * useful if you want such objects as member variables in other classes. You
+   * can make the structure usable by calling the reinit() function.
+   */
+  SparsityPattern ();
+
+  /**
+   * Copy constructor. This constructor is only allowed to be called if the
+   * matrix structure to be copied is empty. This is so in order to prevent
+   * involuntary copies of objects for temporaries, which can use large
+   * amounts of computing time. However, copy constructors are needed if one
+   * wants to place a SparsityPattern in a container, e.g., to write such
+   * statements like <tt>v.push_back (SparsityPattern());</tt>, with
+   * <tt>v</tt> a vector of SparsityPattern objects.
+   *
+   * Usually, it is sufficient to use the explicit keyword to disallow
+   * unwanted temporaries, but this does not work for <tt>std::vector</tt>s.
+   * Since copying a structure like this is not useful anyway because multiple
+   * matrices can use the same sparsity structure, copies are only allowed for
+   * empty objects, as described above.
+   */
+  SparsityPattern (const SparsityPattern &);
+
+  /**
+   * Initialize a rectangular pattern of size <tt>m x n</tt>.
+   *
+   * @param[in] m The number of rows.
+   * @param[in] n The number of columns.
+   * @param[in] max_per_row Maximum number of nonzero entries per row.
+   */
+  SparsityPattern (const size_type m,
+                   const size_type n,
+                   const unsigned int max_per_row);
+
+
+  /**
+   * Initialize a rectangular pattern of size <tt>m x n</tt>.
+   *
+   * @param[in] m The number of rows.
+   * @param[in] n The number of columns.
+   * @param[in] row_lengths Possible number of nonzero entries for each row.
+   * This vector must have one entry for each row.
+   */
+  SparsityPattern (const size_type               m,
+                   const size_type               n,
+                   const std::vector<unsigned int> &row_lengths);
+
+  /**
+   * Initialize a quadratic pattern of dimension <tt>m</tt> with at most
+   * <tt>max_per_row</tt> nonzero entries per row.
+   *
+   * This constructor automatically enables optimized storage of diagonal
+   * elements. To avoid this, use the constructor taking row and column
+   * numbers separately.
+   */
+  SparsityPattern (const size_type m,
+                   const unsigned int max_per_row);
+
+  /**
+   * Initialize a quadratic pattern of size <tt>m x m</tt>.
+   *
+   * @param[in] m The number of rows and columns.
+   * @param[in] row_lengths Maximum number of nonzero entries for each row.
+   * This vector must have one entry for each row.
+   */
+  SparsityPattern (const size_type m,
+                   const std::vector<unsigned int> &row_lengths);
+
+  /**
+   * Make a copy with extra off-diagonals.
+   *
+   * This constructs objects intended for the application of the ILU(n)-method
+   * or other incomplete decompositions.  Therefore, additional to the
+   * original entry structure, space for <tt>extra_off_diagonals</tt> side-
+   * diagonals is provided on both sides of the main diagonal.
+   *
+   * <tt>max_per_row</tt> is the maximum number of nonzero elements per row
+   * which this structure is to hold. It is assumed that this number is
+   * sufficiently large to accommodate both the elements in <tt>original</tt>
+   * as well as the new off-diagonal elements created by this constructor. You
+   * will usually want to give the same number as you gave for
+   * <tt>original</tt> plus the number of side diagonals times two. You may
+   * however give a larger value if you wish to add further nonzero entries
+   * for the decomposition based on other criteria than their being on side-
+   * diagonals.
+   *
+   * This function requires that <tt>original</tt> refers to a quadratic
+   * matrix structure.  It must be compressed. The matrix structure is not
+   * compressed after this function finishes.
+   */
+  SparsityPattern (const SparsityPattern  &original,
+                   const unsigned int        max_per_row,
+                   const size_type        extra_off_diagonals);
+
+  /**
+   * Destructor.
+   */
+  ~SparsityPattern ();
+
+  /**
+   * Copy operator. For this the same holds as for the copy constructor: it is
+   * declared, defined and fine to be called, but the latter only for empty
+   * objects.
+   */
+  SparsityPattern &operator = (const SparsityPattern &);
+
+  /**
+   * Reallocate memory and set up data structures for a new matrix with <tt>m
+   * </tt>rows and <tt>n</tt> columns, with at most <tt>max_per_row</tt>
+   * nonzero entries per row.
+   *
+   * This function simply maps its operations to the other <tt>reinit</tt>
+   * function.
+   */
+  void reinit (const size_type m,
+               const size_type n,
+               const unsigned int max_per_row);
+
+
+  /**
+   * Reallocate memory for a matrix of size <tt>m x n</tt>. The number of
+   * entries for each row is taken from the array <tt>row_lengths</tt> which
+   * has to give this number of each row <tt>i=1...m</tt>.
+   *
+   * If <tt>m*n==0</tt> all memory is freed, resulting in a total
+   * reinitialization of the object. If it is nonzero, new memory is only
+   * allocated if the new size extends the old one. This is done to save time
+   * and to avoid fragmentation of the heap.
+   *
+   * If the number of rows equals the number of columns and the last parameter
+   * is true, diagonal elements are stored first in each row to allow
+   * optimized access in relaxation methods of SparseMatrix.
+   */
+  void reinit (const size_type               m,
+               const size_type               n,
+               const std::vector<unsigned int> &row_lengths);
+
+
+  /**
+   * Same as above, but with a VectorSlice argument instead.
+   */
+  void reinit (const size_type                                   m,
+               const size_type                                   n,
+               const VectorSlice<const std::vector<unsigned int> > &row_lengths);
+
+  /**
+   * This function compresses the sparsity structure that this object
+   * represents.  It does so by eliminating unused entries and sorting the
+   * remaining ones to allow faster access by usage of binary search
+   * algorithms. A special sorting scheme is used for the diagonal entry of
+   * quadratic matrices, which is always the first entry of each row.
+   *
+   * The memory which is no more needed is released.
+   *
+   * SparseMatrix objects require the SparsityPattern objects they are
+   * initialized with to be compressed, to reduce memory requirements.
+   */
+  void compress ();
+
+
+  /**
+   * This function can be used as a replacement for reinit(), subsequent calls
+   * to add() and a final call to close() if you know exactly in advance the
+   * entries that will form the matrix sparsity pattern.
+   *
+   * The first two parameters determine the size of the matrix. For the two
+   * last ones, note that a sparse matrix can be described by a sequence of
+   * rows, each of which is represented by a sequence of pairs of column
+   * indices and values. In the present context, the begin() and end()
+   * parameters designate iterators (of forward iterator type) into a
+   * container, one representing one row. The distance between begin() and
+   * end() should therefore be equal to n_rows(). These iterators may be
+   * iterators of <tt>std::vector</tt>, <tt>std::list</tt>, pointers into a
+   * C-style array, or any other iterator satisfying the requirements of a
+   * forward iterator. The objects pointed to by these iterators (i.e. what we
+   * get after applying <tt>operator*</tt> or <tt>operator-></tt> to one of
+   * these iterators) must be a container itself that provides functions
+   * <tt>begin</tt> and <tt>end</tt> designating a range of iterators that
+   * describe the contents of one line. Dereferencing these inner iterators
+   * must either yield a pair of an unsigned integer as column index and a
+   * value of arbitrary type (such a type would be used if we wanted to
+   * describe a sparse matrix with one such object), or simply an unsigned
+   * integer (of we only wanted to describe a sparsity pattern). The function
+   * is able to determine itself whether an unsigned integer or a pair is what
+   * we get after dereferencing the inner iterators, through some template
+   * magic.
+   *
+   * While the order of the outer iterators denotes the different rows of the
+   * matrix, the order of the inner iterator denoting the columns does not
+   * matter, as they are sorted internal to this function anyway.
+   *
+   * Since that all sounds very complicated, consider the following example
+   * code, which may be used to fill a sparsity pattern:
+   * @code
+   * std::vector<std::vector<unsigned int> > column_indices (n_rows);
+   * for (unsigned int row=0; row<n_rows; ++row)
+   *         // generate necessary columns in this row
+   *   fill_row (column_indices[row]);
+   *
+   * sparsity.copy_from (n_rows, n_cols,
+   *                     column_indices.begin(),
+   *                     column_indices.end());
+   * @endcode
+   *
+   * Note that this example works since the iterators dereferenced yield
+   * containers with functions <tt>begin</tt> and <tt>end</tt> (namely
+   * <tt>std::vector</tt>s), and the inner iterators dereferenced yield
+   * unsigned integers as column indices. Note that we could have replaced
+   * each of the two <tt>std::vector</tt> occurrences by <tt>std::list</tt>,
+   * and the inner one by <tt>std::set</tt> as well.
+   *
+   * Another example would be as follows, where we initialize a whole matrix,
+   * not only a sparsity pattern:
+   * @code
+   * std::vector<std::map<unsigned int,double> > entries (n_rows);
+   * for (unsigned int row=0; row<n_rows; ++row)
+   *         // generate necessary pairs of columns
+   *         // and corresponding values in this row
+   *   fill_row (entries[row]);
+   *
+   * sparsity.copy_from (n_rows, n_cols,
+   *                     column_indices.begin(),
+   *                     column_indices.end());
+   * matrix.reinit (sparsity);
+   * matrix.copy_from (column_indices.begin(),
+   *                   column_indices.end());
+   * @endcode
+   *
+   * This example works because dereferencing iterators of the inner type
+   * yields a pair of unsigned integers and a value, the first of which we
+   * take as column index. As previously, the outer <tt>std::vector</tt> could
+   * be replaced by <tt>std::list</tt>, and the inner <tt>std::map<unsigned
+   * int,double></tt> could be replaced by <tt>std::vector<std::pair<unsigned
+   * int,double> ></tt>, or a list or set of such pairs, as they all return
+   * iterators that point to such pairs.
+   */
+  template <typename ForwardIterator>
+  void copy_from (const size_type n_rows,
+                  const size_type n_cols,
+                  const ForwardIterator begin,
+                  const ForwardIterator end);
+
+  /**
+   * Copy data from an object of type DynamicSparsityPattern. Although not a
+   * compressed sparsity pattern, this function is also instantiated if the
+   * argument is of type SparsityPattern (i.e., the current class). Previous
+   * content of this object is lost, and the sparsity pattern is in compressed
+   * mode afterwards.
+   */
+  template <typename SparsityPatternType>
+  void copy_from (const SparsityPatternType &dsp);
+
+
+  /**
+   * Take a full matrix and use its nonzero entries to generate a sparse
+   * matrix entry pattern for this object.
+   *
+   * Previous content of this object is lost, and the sparsity pattern is in
+   * compressed mode afterwards.
+   */
+  template <typename number>
+  void copy_from (const FullMatrix<number> &matrix);
+
+  /**
+   * Make the sparsity pattern symmetric by adding the sparsity pattern of the
+   * transpose object.
+   *
+   * This function throws an exception if the sparsity pattern does not
+   * represent a quadratic matrix.
+   */
+  void symmetrize ();
+
+  /**
+   * Add a nonzero entry to the matrix.  This function may only be called for
+   * non-compressed sparsity patterns.
+   *
+   * If the entry already exists, nothing bad happens.
+   */
+  void add (const size_type i,
+            const size_type j);
+
+  /**
+   * Add several nonzero entries to the specified matrix row.  This function
+   * may only be called for non-compressed sparsity patterns.
+   *
+   * If some of the entries already exist, nothing bad happens.
+   */
+  template <typename ForwardIterator>
+  void add_entries (const size_type row,
+                    ForwardIterator begin,
+                    ForwardIterator end,
+                    const bool      indices_are_sorted = false);
+
+// @}
+
+
+
+
+  /**
+   * @name Iterators
+   */
+// @{
+
+  /**
+   * Iterator starting at the first entry of the matrix. The resulting
+   * iterator can be used to walk over all nonzero entries of the sparsity
+   * pattern.
+   *
+   * Note the discussion in the general documentation of this class about the
+   * order in which elements are accessed.
+   */
+  iterator begin () const;
+
+  /**
+   * Final iterator.
+   */
+  iterator end () const;
+
+  /**
+   * Iterator starting at the first entry of row <tt>r</tt>.
+   *
+   * Note that if the given row is empty, i.e. does not contain any nonzero
+   * entries, then the iterator returned by this function equals
+   * <tt>end(r)</tt>. Note also that the iterator may not be dereferencable in
+   * that case.
+   *
+   * Note also the discussion in the general documentation of this class about
+   * the order in which elements are accessed.
+   */
+  iterator begin (const size_type r) const;
+
+  /**
+   * Final iterator of row <tt>r</tt>. It points to the first element past the
+   * end of line @p r, or past the end of the entire sparsity pattern.
+   *
+   * Note that the end iterator is not necessarily dereferencable. This is in
+   * particular the case if it is the end iterator for the last row of a
+   * matrix.
+   */
+  iterator end (const size_type r) const;
+
+
+// @}
+  /**
+   * @name Querying information
+   */
+// @{
+  /**
+   * Test for equality of two SparsityPatterns.
+   */
+  bool operator == (const SparsityPattern &)  const;
+
+  /**
+   * Return whether the object is empty. It is empty if no memory is
+   * allocated, which is the same as that both dimensions are zero.
+   */
+  bool empty () const;
+
+  /**
+   * Return the maximum number of entries per row. Before compression, this
+   * equals the number given to the constructor, while after compression, it
+   * equals the maximum number of entries actually allocated by the user.
+   */
+  size_type max_entries_per_row () const;
+
+  /**
+   * Compute the bandwidth of the matrix represented by this structure. The
+   * bandwidth is the maximum of $|i-j|$ for which the index pair $(i,j)$
+   * represents a nonzero entry of the matrix. Consequently, the maximum
+   * bandwidth a $n\times m$ matrix can have is $\max\{n-1,m-1\}$, a diagonal
+   * matrix has bandwidth 0, and there are at most $2*q+1$ entries per row if
+   * the bandwidth is $q$. The returned quantity is sometimes called "half
+   * bandwidth" in the literature.
+   */
+  size_type bandwidth () const;
+
+  /**
+   * Return the number of nonzero elements of this matrix. Actually, it
+   * returns the number of entries in the sparsity pattern; if any of the
+   * entries should happen to be zero, it is counted anyway.
+   *
+   * This function may only be called if the matrix struct is compressed. It
+   * does not make too much sense otherwise anyway.
+   */
+  size_type n_nonzero_elements () const;
+
+  /**
+   * Return whether the structure is compressed or not.
+   */
+  bool is_compressed () const;
+
+  /**
+   * Return number of rows of this matrix, which equals the dimension of the
+   * image space.
+   */
+  size_type n_rows () const;
+
+  /**
+   * Return number of columns of this matrix, which equals the dimension of
+   * the range space.
+   */
+  size_type n_cols () const;
+
+  /**
+   * Number of entries in a specific row.
+   */
+  unsigned int row_length (const size_type row) const;
+
+  /**
+   * Return whether this object stores only those entries that have been added
+   * explicitly, or if the sparsity pattern contains elements that have been
+   * added through other means (implicitly) while building it. For the current
+   * class, the result is false if and only if it is square because it then
+   * unconditionally stores the diagonal entries whether they have been added
+   * explicitly or not.
+   *
+   * This function mainly serves the purpose of describing the current class
+   * in cases where several kinds of sparsity patterns can be passed as
+   * template arguments.
+   */
+  bool stores_only_added_elements () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object. See MemoryConsumption.
+   */
+  std::size_t memory_consumption () const;
+
+// @}
+  /**
+   * @name Accessing entries
+   */
+// @{
+  /**
+   * Return the index of the matrix element with row number <tt>i</tt> and
+   * column number <tt>j</tt>. If the matrix element is not a nonzero one,
+   * return SparsityPattern::invalid_entry.
+   *
+   * This function is usually called by the SparseMatrix::operator()(). It may
+   * only be called for compressed sparsity patterns, since in this case
+   * searching whether the entry exists can be done quite fast with a binary
+   * sort algorithm because the column numbers are sorted.
+   *
+   * If <tt>m</tt> is the number of entries in <tt>row</tt>, then the
+   * complexity of this function is <i>log(m)</i> if the sparsity pattern is
+   * compressed.
+   *
+   * @note This function is not cheap since it has to search through all of
+   * the elements of the given row <tt>i</tt> to find whether index <tt>j</tt>
+   * exists. Thus, it is more expensive than necessary in cases where you want
+   * to loop over all of the nonzero elements of this sparsity pattern (or of
+   * a sparse matrix associated with it) or of a single row. In such cases, it
+   * is more efficient to use iterators over the elements of the sparsity
+   * pattern or of the sparse matrix.
+   */
+  size_type operator() (const size_type i,
+                        const size_type j) const;
+
+  /**
+   * This is the inverse operation to operator()(): given a global index, find
+   * out row and column of the matrix entry to which it belongs. The returned
+   * value is the pair composed of row and column index.
+   *
+   * This function may only be called if the sparsity pattern is closed. The
+   * global index must then be between zero and n_nonzero_elements().
+   *
+   * If <tt>N</tt> is the number of rows of this matrix, then the complexity
+   * of this function is <i>log(N)</i>.
+   */
+  std::pair<size_type, size_type>
+  matrix_position (const size_type global_index) const;
+
+  /**
+   * Check if a value at a certain position may be non-zero.
+   */
+  bool exists (const size_type i,
+               const size_type j) const;
+
+  /**
+   * The index of a global matrix entry in its row.
+   *
+   * This function is analogous to operator(), but it computes the index not
+   * with respect to the total field, but only with respect to the row
+   * <tt>j</tt>.
+   */
+  size_type row_position(const size_type i,
+                         const size_type j) const;
+
+  /**
+   * Access to column number field.  Return the column number of the
+   * <tt>index</tt>th entry in <tt>row</tt>. Note that if diagonal elements
+   * are optimized, the first element in each row is the diagonal element,
+   * i.e. <tt>column_number(row,0)==row</tt>.
+   *
+   * If the sparsity pattern is already compressed, then (except for the
+   * diagonal element), the entries are sorted by columns, i.e.
+   * <tt>column_number(row,i)</tt> <tt><</tt> <tt>column_number(row,i+1)</tt>.
+   */
+  size_type column_number (const size_type row,
+                           const unsigned int index) const;
+
+
+// @}
+  /**
+   * @name Input/Output
+   */
+// @{
+  /**
+   * Write the data of this object en bloc to a file. This is done in a binary
+   * mode, so the output is neither readable by humans nor (probably) by other
+   * computers using a different operating system or number format.
+   *
+   * The purpose of this function is that you can swap out matrices and
+   * sparsity pattern if you are short of memory, want to communicate between
+   * different programs, or allow objects to be persistent across different
+   * runs of the program.
+   */
+  void block_write (std::ostream &out) const;
+
+  /**
+   * Read data that has previously been written by block_write() from a file.
+   * This is done using the inverse operations to the above function, so it is
+   * reasonably fast because the bitstream is not interpreted except for a few
+   * numbers up front.
+   *
+   * The object is resized on this operation, and all previous contents are
+   * lost.
+   *
+   * A primitive form of error checking is performed which will recognize the
+   * bluntest attempts to interpret some data as a vector stored bitwise to a
+   * file, but not more.
+   */
+  void block_read (std::istream &in);
+
+  /**
+   * Print the sparsity of the matrix. The output consists of one line per row
+   * of the format <tt>[i,j1,j2,j3,...]</tt>. <i>i</i> is the row number and
+   * <i>jn</i> are the allocated columns in this row.
+   */
+  void print (std::ostream &out) const;
+
+  /**
+   * Print the sparsity of the matrix in a format that <tt>gnuplot</tt>
+   * understands and which can be used to plot the sparsity pattern in a
+   * graphical way. The format consists of pairs <tt>i j</tt> of nonzero
+   * elements, each representing one entry of this matrix, one per line of the
+   * output file. Indices are counted from zero on, as usual. Since sparsity
+   * patterns are printed in the same way as matrices are displayed, we print
+   * the negative of the column index, which means that the <tt>(0,0)</tt>
+   * element is in the top left rather than in the bottom left corner.
+   *
+   * Print the sparsity pattern in gnuplot by setting the data style to dots
+   * or points and use the <tt>plot</tt> command.
+   */
+  void print_gnuplot (std::ostream &out) const;
+
+  /**
+   * Prints the sparsity of the matrix in a .svg file which can be opened in a
+   * web browser. The .svg file contains squares which correspond to the
+   * entries in the matrix. An entry in the matrix which contains a non-zero
+   * value corresponds with a red square while a zero-valued entry in the
+   * matrix correspond with a white square.
+   */
+  void print_svg (std::ostream &out) const;
+
+
+  /**
+   * Write the data of this object to a stream for the purpose of
+   * serialization
+   */
+  template <class Archive>
+  void save (Archive &ar, const unsigned int version) const;
+
+  /**
+   * Read the data of this object from a stream for the purpose of
+   * serialization
+   */
+  template <class Archive>
+  void load (Archive &ar, const unsigned int version);
+
+  BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+// @}
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+  /**
+   * You tried to add an element to a row, but there was no space left.
+   */
+  DeclException2 (ExcNotEnoughSpace,
+                  int, int,
+                  << "Upon entering a new entry to row " << arg1
+                  << ": there was no free entry any more. " << std::endl
+                  << "(Maximum number of entries for this row: "
+                  << arg2 << "; maybe the matrix is already compressed?)");
+  /**
+   * The operation is only allowed after the SparsityPattern has been set up
+   * and compress() was called.
+   */
+  DeclException0 (ExcNotCompressed);
+  /**
+   * This operation changes the structure of the SparsityPattern and is not
+   * possible after compress() has been called.
+   */
+  DeclException0 (ExcMatrixIsCompressed);
+  /**
+   * Exception
+   */
+  DeclException0 (ExcInvalidConstructorCall);
+  /**
+   * This exception is thrown if the matrix does not follow the convention of
+   * storing diagonal elements first in row. Refer to
+   * SparityPattern::optimize_diagonal() for more information.
+   */
+  DeclException0 (ExcDiagonalNotOptimized);
+  /**
+   * Exception
+   */
+  DeclException2 (ExcIteratorRange,
+                  int, int,
+                  << "The iterators denote a range of " << arg1
+                  << " elements, but the given number of rows was " << arg2);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidNumberOfPartitions,
+                  int,
+                  << "The number of partitions you gave is " << arg1
+                  << ", but must be greater than zero.");
+  //@}
+private:
+  /**
+   * Maximum number of rows that can be stored in the #rowstart array.  Since
+   * reallocation of that array only happens if the present one is too small,
+   * but never when the size of this matrix structure shrinks, #max_dim might
+   * be larger than #rows and in this case #rowstart has more elements than
+   * are used.
+   */
+  size_type max_dim;
+
+  /**
+   * Number of rows that this sparsity structure shall represent.
+   */
+  size_type rows;
+
+  /**
+   * Number of columns that this sparsity structure shall represent.
+   */
+  size_type cols;
+
+  /**
+   * Size of the actually allocated array #colnums. Here, the same applies as
+   * for the #rowstart array, i.e. it may be larger than the actually used
+   * part of the array.
+   */
+  size_type max_vec_len;
+
+  /**
+   * Maximum number of elements per row. This is set to the value given to the
+   * reinit() function (or to the constructor), or to the maximum row length
+   * computed from the vectors in case the more flexible constructors or
+   * reinit versions are called. Its value is more or less meaningless after
+   * compress() has been called.
+   */
+  unsigned int max_row_length;
+
+  /**
+   * Array which hold for each row which is the first element in #colnums
+   * belonging to that row. Note that the size of the array is one larger than
+   * the number of rows, because the last element is used for
+   * <tt>row</tt>=#rows, i.e. the row past the last used one. The value of
+   * #rowstart[#rows]} equals the index of the element past the end in
+   * #colnums; this way, we are able to write loops like <tt>for
+   * (i=rowstart[k]; i<rowstart[k+1]; ++i)</tt> also for the last row.
+   *
+   * Note that the actual size of the allocated memory may be larger than the
+   * region that is used. The actual number of elements that was allocated is
+   * stored in #max_dim.
+   */
+  std::size_t *rowstart;
+
+  /**
+   * Array of column numbers. In this array, we store for each non-zero
+   * element its column number. The column numbers for the elements in row
+   * <i>r</i> are stored within the index range
+   * #rowstart[<i>r</i>]...#rowstart[<i>r+1</i>]. Therefore to find out
+   * whether a given element (<i>r,c</i>) exists, we have to check whether the
+   * column number <i>c</i> exists in the above-mentioned range within this
+   * array. If it exists, say at position <i>p</i> within this array, the
+   * value of the respective element in the sparse matrix will also be at
+   * position <i>p</i> of the values array of that class.
+   *
+   * At the beginning, all elements of this array are set to @p -1 indicating
+   * invalid (unused) column numbers (diagonal elements are preset if
+   * optimized storage is requested, though). Now, if nonzero elements are
+   * added, one column number in the row's respective range after the other is
+   * set to the column number of the added element. When compress is called,
+   * unused elements (indicated by column numbers @p -1) are eliminated by
+   * copying the column number of subsequent rows and the column numbers
+   * within each row (with possible exception of the diagonal element) are
+   * sorted, such that finding whether an element exists and determining its
+   * position can be done by a binary search.
+   */
+  size_type *colnums;
+
+  /**
+   * Store whether the compress() function was called for this object.
+   */
+  bool compressed;
+
+  /**
+   * Is special treatment of diagonals enabled?
+   */
+  bool store_diagonal_first_in_row;
+
+  /**
+   * Make all sparse matrices friends of this class.
+   */
+  template <typename number> friend class SparseMatrix;
+  template <typename number> friend class SparseLUDecomposition;
+  template <typename number> friend class SparseILU;
+  template <typename number> friend class ChunkSparseMatrix;
+
+  friend class ChunkSparsityPattern;
+
+  /**
+   * Also give access to internal details to the iterator/accessor classes.
+   */
+  friend class SparsityPatternIterators::Iterator;
+  friend class SparsityPatternIterators::Accessor;
+  friend class ChunkSparsityPatternIterators::Accessor;
+};
+
+
+/*@}*/
+/*---------------------- Inline functions -----------------------------------*/
+
+#ifndef DOXYGEN
+
+
+namespace SparsityPatternIterators
+{
+  inline
+  Accessor::
+  Accessor (const SparsityPattern *sparsity_pattern,
+            const std::size_t      i)
+    :
+    sparsity_pattern(sparsity_pattern),
+    index_within_sparsity(i)
+  {}
+
+
+  inline
+  Accessor::
+  Accessor (const SparsityPattern *sparsity_pattern)
+    :
+    sparsity_pattern(sparsity_pattern),
+    index_within_sparsity(sparsity_pattern->rowstart[sparsity_pattern->rows])
+  {}
+
+
+  inline
+  bool
+  Accessor::is_valid_entry () const
+  {
+    return (index_within_sparsity < sparsity_pattern->rowstart[sparsity_pattern->rows]
+            &&
+            sparsity_pattern->colnums[index_within_sparsity]
+            != SparsityPattern::invalid_entry);
+  }
+
+
+  inline
+  size_type
+  Accessor::row() const
+  {
+    Assert (is_valid_entry() == true, ExcInvalidIterator());
+
+    const std::size_t *insert_point =
+      std::upper_bound(sparsity_pattern->rowstart,
+                       sparsity_pattern->rowstart + sparsity_pattern->rows + 1,
+                       index_within_sparsity);
+    return insert_point - sparsity_pattern->rowstart - 1;
+  }
+
+
+  inline
+  size_type
+  Accessor::column() const
+  {
+    Assert (is_valid_entry() == true, ExcInvalidIterator());
+
+    return (sparsity_pattern->colnums[index_within_sparsity]);
+  }
+
+
+  inline
+  size_type
+  Accessor::index() const
+  {
+    Assert (is_valid_entry() == true, ExcInvalidIterator());
+
+    return index_within_sparsity - sparsity_pattern->rowstart[row()];
+  }
+
+
+
+
+  inline
+  bool
+  Accessor::operator == (const Accessor &other) const
+  {
+    return (sparsity_pattern == other.sparsity_pattern &&
+            index_within_sparsity == other.index_within_sparsity);
+  }
+
+
+
+  inline
+  bool
+  Accessor::operator < (const Accessor &other) const
+  {
+    Assert (sparsity_pattern == other.sparsity_pattern,
+            ExcInternalError());
+
+    return index_within_sparsity < other.index_within_sparsity;
+  }
+
+
+  inline
+  void
+  Accessor::advance ()
+  {
+    Assert (index_within_sparsity < sparsity_pattern->rowstart[sparsity_pattern->rows],
+            ExcIteratorPastEnd());
+    ++index_within_sparsity;
+  }
+
+
+
+  inline
+  Iterator::Iterator (const SparsityPattern *sparsity_pattern,
+                      const std::size_t      i)
+    :
+    accessor(sparsity_pattern, i)
+  {}
+
+
+
+  inline
+  Iterator &
+  Iterator::operator++ ()
+  {
+    accessor.advance ();
+    return *this;
+  }
+
+
+
+  inline
+  Iterator
+  Iterator::operator++ (int)
+  {
+    const Iterator iter = *this;
+    accessor.advance ();
+    return iter;
+  }
+
+
+
+  inline
+  const Accessor &
+  Iterator::operator* () const
+  {
+    return accessor;
+  }
+
+
+
+  inline
+  const Accessor *
+  Iterator::operator-> () const
+  {
+    return &accessor;
+  }
+
+
+  inline
+  bool
+  Iterator::operator == (const Iterator &other) const
+  {
+    return (accessor == other.accessor);
+  }
+
+
+
+  inline
+  bool
+  Iterator::operator != (const Iterator &other) const
+  {
+    return ! (*this == other);
+  }
+
+
+  inline
+  bool
+  Iterator::operator < (const Iterator &other) const
+  {
+    return accessor < other.accessor;
+  }
+
+
+  inline
+  int
+  Iterator::operator - (const Iterator &other) const
+  {
+    Assert (accessor.sparsity_pattern == other.accessor.sparsity_pattern,
+            ExcInternalError());
+
+    return (*this)->index_within_sparsity - other->index_within_sparsity;
+  }
+}
+
+
+
+inline
+SparsityPattern::iterator
+SparsityPattern::begin () const
+{
+  return iterator(this, rowstart[0]);
+}
+
+
+inline
+SparsityPattern::iterator
+SparsityPattern::end () const
+{
+  return iterator(this, rowstart[rows]);
+}
+
+
+
+inline
+SparsityPattern::iterator
+SparsityPattern::begin (const size_type r) const
+{
+  Assert (r<n_rows(), ExcIndexRangeType<size_type>(r,0,n_rows()));
+
+  return iterator(this, rowstart[r]);
+}
+
+
+
+inline
+SparsityPattern::iterator
+SparsityPattern::end (const size_type r) const
+{
+  Assert (r<n_rows(), ExcIndexRangeType<size_type>(r,0,n_rows()));
+
+  return iterator(this, rowstart[r+1]);
+}
+
+
+
+inline
+SparsityPattern::size_type
+SparsityPattern::n_rows () const
+{
+  return rows;
+}
+
+
+inline
+SparsityPattern::size_type
+SparsityPattern::n_cols () const
+{
+  return cols;
+}
+
+
+inline
+bool
+SparsityPattern::is_compressed () const
+{
+  return compressed;
+}
+
+
+inline
+bool
+SparsityPattern::stores_only_added_elements () const
+{
+  return (store_diagonal_first_in_row == false);
+}
+
+
+
+inline
+unsigned int
+SparsityPattern::row_length (const size_type row) const
+{
+  Assert(row<rows, ExcIndexRangeType<size_type>(row,0,rows));
+  return rowstart[row+1]-rowstart[row];
+}
+
+
+
+inline
+SparsityPattern::size_type
+SparsityPattern::column_number (const size_type row,
+                                const unsigned int index) const
+{
+  Assert(row<rows, ExcIndexRangeType<size_type>(row,0,rows));
+  Assert(index<row_length(row), ExcIndexRange(index,0,row_length(row)));
+
+  return colnums[rowstart[row]+index];
+}
+
+
+inline
+SparsityPattern::size_type
+SparsityPattern::n_nonzero_elements () const
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+  Assert (compressed, ExcNotCompressed());
+  return rowstart[rows]-rowstart[0];
+}
+
+
+
+template <class Archive>
+inline
+void
+SparsityPattern::save (Archive &ar, const unsigned int) const
+{
+  // forward to serialization function in the base class.
+  ar   &static_cast<const Subscriptor &>(*this);
+
+  ar &max_dim &rows &cols &max_vec_len &max_row_length &compressed &store_diagonal_first_in_row;
+
+  ar &boost::serialization::make_array(rowstart, max_dim + 1);
+  ar &boost::serialization::make_array(colnums, max_vec_len);
+}
+
+
+
+template <class Archive>
+inline
+void
+SparsityPattern::load (Archive &ar, const unsigned int)
+{
+  // forward to serialization function in the base class.
+  ar   &static_cast<Subscriptor &>(*this);
+
+  ar &max_dim &rows &cols &max_vec_len &max_row_length &compressed &store_diagonal_first_in_row;
+
+  rowstart = new std::size_t [max_dim + 1];
+  colnums = new size_type [max_vec_len];
+
+  ar &boost::serialization::make_array(rowstart, max_dim + 1);
+  ar &boost::serialization::make_array(colnums, max_vec_len);
+}
+
+
+
+inline
+bool
+SparsityPattern::operator == (const SparsityPattern &sp2)  const
+{
+  // it isn't quite necessary to compare *all* member variables. by only
+  // comparing the essential ones, we can say that two sparsity patterns are
+  // equal even if one is compressed and the other is not (in which case some
+  // of the member variables are not yet set correctly)
+  if (rows != sp2.rows ||
+      cols != sp2.cols ||
+      compressed != sp2.compressed ||
+      store_diagonal_first_in_row != sp2.store_diagonal_first_in_row)
+    return false;
+
+  for (size_type i = 0; i < rows+1; ++i)
+    if (rowstart[i] != sp2.rowstart[i])
+      return false;
+
+  for (size_type i = 0; i < rowstart[rows]; ++i)
+    if (colnums[i] != sp2.colnums[i])
+      return false;
+
+  return true;
+}
+
+
+
+namespace internal
+{
+  namespace SparsityPatternTools
+  {
+    /**
+     * Declare type for container size.
+     */
+    typedef types::global_dof_index size_type;
+
+    inline
+    size_type
+    get_column_index_from_iterator (const size_type i)
+    {
+      return i;
+    }
+
+
+
+    template <typename value>
+    inline
+    size_type
+    get_column_index_from_iterator (const std::pair<size_type, value> &i)
+    {
+      return i.first;
+    }
+
+
+
+    template <typename value>
+    inline
+    size_type
+    get_column_index_from_iterator (const std::pair<const size_type, value> &i)
+    {
+      return i.first;
+    }
+  }
+}
+
+
+
+template <typename ForwardIterator>
+void
+SparsityPattern::copy_from (const size_type       n_rows,
+                            const size_type       n_cols,
+                            const ForwardIterator begin,
+                            const ForwardIterator end)
+{
+  Assert (static_cast<size_type>(std::distance (begin, end)) == n_rows,
+          ExcIteratorRange (std::distance (begin, end), n_rows));
+
+  // first determine row lengths for each row. if the matrix is quadratic,
+  // then we might have to add an additional entry for the diagonal, if that
+  // is not yet present. as we have to call compress anyway later on, don't
+  // bother to check whether that diagonal entry is in a certain row or not
+  const bool is_square = (n_rows == n_cols);
+  std::vector<unsigned int> row_lengths;
+  row_lengths.reserve(n_rows);
+  for (ForwardIterator i=begin; i!=end; ++i)
+    row_lengths.push_back (std::distance (i->begin(), i->end())
+                           +
+                           (is_square ? 1 : 0));
+  reinit (n_rows, n_cols, row_lengths);
+
+  // now enter all the elements into the matrix. note that if the matrix is
+  // quadratic, then we already have the diagonal element preallocated
+  //
+  // for use in the inner loop, we define a typedef to the type of the inner
+  // iterators
+  size_type row = 0;
+  typedef typename std::iterator_traits<ForwardIterator>::value_type::const_iterator inner_iterator;
+  for (ForwardIterator i=begin; i!=end; ++i, ++row)
+    {
+      size_type *cols = &colnums[rowstart[row]] + (is_square ? 1 : 0);
+      const inner_iterator end_of_row = i->end();
+      for (inner_iterator j=i->begin(); j!=end_of_row; ++j)
+        {
+          const size_type col
+            = internal::SparsityPatternTools::get_column_index_from_iterator(*j);
+          Assert (col < n_cols, ExcIndexRange(col,0,n_cols));
+
+          if ((col!=row) || !is_square)
+            *cols++ = col;
+        }
+    }
+
+  // finally compress everything. this also sorts the entries within each row
+  compress ();
+}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/sparsity_tools.h b/include/deal.II/lac/sparsity_tools.h
new file mode 100644
index 0000000..e5a8204
--- /dev/null
+++ b/include/deal.II/lac/sparsity_tools.h
@@ -0,0 +1,260 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__sparsity_tools_h
+#define dealii__sparsity_tools_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/sparsity_pattern.h>
+
+#include <vector>
+
+#ifdef DEAL_II_WITH_MPI
+#include <mpi.h>
+#include <deal.II/base/index_set.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*! @addtogroup Sparsity
+ *@{
+ */
+
+/**
+ * A namespace for functions that deal with things that one can do on sparsity
+ * patterns, such as renumbering rows and columns (or degrees of freedom if
+ * you want) according to the connectivity, or partitioning degrees of
+ * freedom.
+ */
+namespace SparsityTools
+{
+  /**
+   * Use the METIS partitioner to generate a partitioning of the degrees of
+   * freedom represented by this sparsity pattern. In effect, we view this
+   * sparsity pattern as a graph of connections between various degrees of
+   * freedom, where each nonzero entry in the sparsity pattern corresponds to
+   * an edge between two nodes in the connection graph. The goal is then to
+   * decompose this graph into groups of nodes so that a minimal number of
+   * edges are cut by the boundaries between node groups. This partitioning is
+   * done by METIS. Note that METIS can only partition symmetric sparsity
+   * patterns, and that of course the sparsity pattern has to be square. We do
+   * not check for symmetry of the sparsity pattern, since this is an
+   * expensive operation, but rather leave this as the responsibility of
+   * caller of this function.
+   *
+   * After calling this function, the output array will have values between
+   * zero and @p n_partitions-1 for each node (i.e. row or column of the
+   * matrix).
+   *
+   * This function will generate an error if METIS is not installed unless @p
+   * n_partitions is one. I.e., you can write a program so that it runs in the
+   * single-processor single-partition case without METIS installed, and only
+   * requires METIS when multiple partitions are required.
+   *
+   * Note that the sparsity pattern itself is not changed by calling this
+   * function. However, you will likely use the information generated by
+   * calling this function to renumber degrees of freedom, after which you
+   * will of course have to regenerate the sparsity pattern.
+   *
+   * This function will rarely be called separately, since in finite element
+   * methods you will want to partition the mesh, not the matrix. This can be
+   * done by calling @p GridTools::partition_triangulation.
+   */
+  void partition (const SparsityPattern     &sparsity_pattern,
+                  const unsigned int         n_partitions,
+                  std::vector<unsigned int> &partition_indices);
+
+  /**
+   * For a given sparsity pattern, compute a re-enumeration of row/column
+   * indices based on the algorithm by Cuthill-McKee.
+   *
+   * This algorithm is a graph renumbering algorithm in which we attempt to
+   * find a new numbering of all nodes of a graph based on their connectivity
+   * to other nodes (i.e. the edges that connect nodes). This connectivity is
+   * here represented by the sparsity pattern. In many cases within the
+   * library, the nodes represent degrees of freedom and edges are nonzero
+   * entries in a matrix, i.e. pairs of degrees of freedom that couple through
+   * the action of a bilinear form.
+   *
+   * The algorithms starts at a node, searches the other nodes for those which
+   * are coupled with the one we started with and numbers these in a certain
+   * way. It then finds the second level of nodes, namely those that couple
+   * with those of the previous level (which were those that coupled with the
+   * initial node) and numbers these. And so on. For the details of the
+   * algorithm, especially the numbering within each level, we refer the
+   * reader to the book of Schwarz (H. R. Schwarz: Methode der finiten
+   * Elemente).
+   *
+   * These algorithms have one major drawback: they require a good starting
+   * node, i.e. node that will have number zero in the output array. A
+   * starting node forming the initial level of nodes can thus be given by the
+   * user, e.g. by exploiting knowledge of the actual topology of the domain.
+   * It is also possible to give several starting indices, which may be used
+   * to simulate a simple upstream numbering (by giving the inflow nodes as
+   * starting values) or to make preconditioning faster (by letting the
+   * Dirichlet boundary indices be starting points).
+   *
+   * If no starting index is given, one is chosen automatically, namely one
+   * with the smallest coordination number (the coordination number is the
+   * number of other nodes this node couples with). This node is usually
+   * located on the boundary of the domain. There is, however, large ambiguity
+   * in this when using the hierarchical meshes used in this library, since in
+   * most cases the computational domain is not approximated by tilting and
+   * deforming elements and by plugging together variable numbers of elements
+   * at vertices, but rather by hierarchical refinement. There is therefore a
+   * large number of nodes with equal coordination numbers. The renumbering
+   * algorithms will therefore not give optimal results.
+   *
+   * If the graph has two or more unconnected components and if no starting
+   * indices are given, the algorithm will number each component
+   * consecutively. However, this requires the determination of a starting
+   * index for each component; as a consequence, the algorithm will produce an
+   * exception if starting indices are given, taking the latter as an
+   * indication that the caller of the function would like to override the
+   * part of the algorithm that chooses starting indices.
+   */
+  void
+  reorder_Cuthill_McKee (const DynamicSparsityPattern &sparsity,
+                         std::vector<DynamicSparsityPattern::size_type> &new_indices,
+                         const std::vector<DynamicSparsityPattern::size_type> &starting_indices = std::vector<DynamicSparsityPattern::size_type>());
+
+  /**
+   * As above, but taking a SparsityPattern object instead.
+   *
+   * @deprecated
+   */
+  void
+  reorder_Cuthill_McKee (const SparsityPattern &sparsity,
+                         std::vector<SparsityPattern::size_type> &new_indices,
+                         const std::vector<SparsityPattern::size_type> &starting_indices = std::vector<SparsityPattern::size_type>()) DEAL_II_DEPRECATED;
+
+  /**
+   * For a given sparsity pattern, compute a re-enumeration of row/column
+   * indices in a hierarchical way, similar to what
+   * DoFRenumbering::hierarchical does for degrees of freedom on
+   * hierarchically refined meshes.
+   *
+   * This algorithm first selects a node with the minimum number of neighbors
+   * and puts that node and its direct neighbors into one chunk. Next, it
+   * selects one of the neighbors of the already selected nodes, adds the node
+   * and its direct neighbors that are not part of one of the previous chunks,
+   * into the next. After this sweep, neighboring nodes are grouped together.
+   * To ensure a similar grouping on a more global level, this grouping is
+   * called recursively on the groups so formed. The recursion stops when no
+   * further grouping is possible. Eventually, the ordering obtained by this
+   * method passes through the indices represented in the sparsity pattern in
+   * a z-like way.
+   *
+   * If the graph has two or more unconnected components, the algorithm will
+   * number each component consecutively, starting with the components with
+   * the lowest number of nodes.
+   */
+  void
+  reorder_hierarchical (const DynamicSparsityPattern                   &sparsity,
+                        std::vector<DynamicSparsityPattern::size_type> &new_indices);
+
+#ifdef DEAL_II_WITH_MPI
+  /**
+   * Communicate rows in a dynamic sparsity pattern over MPI.
+   *
+   * @param dsp A dynamic sparsity pattern that has been built locally and for
+   * which we need to exchange entries with other processors to make sure that
+   * each processor knows all the elements of the rows of a matrix it stores
+   * and that may eventually be written to. This sparsity pattern will be
+   * changed as a result of this function: All entries in rows that belong to
+   * a different processor are sent to them and added there.
+   *
+   * @param rows_per_cpu A vector containing the number of of rows per CPU for
+   * determining ownership. This is typically the value returned by
+   * DoFHandler::locally_owned_dofs_per_processor.
+   *
+   * @param mpi_comm The MPI communicator shared between the processors that
+   * participate in this operation.
+   *
+   * @param myrange The range of elements stored locally. This should be the
+   * one used in the constructor of the DynamicSparsityPattern, and should
+   * also be the locally relevant set. Only rows contained in myrange are
+   * checked in dsp for transfer. This function needs to be used with
+   * PETScWrappers::MPI::SparseMatrix for it to work correctly in a parallel
+   * computation.
+   */
+  void distribute_sparsity_pattern
+  (DynamicSparsityPattern                               &dsp,
+   const std::vector<DynamicSparsityPattern::size_type> &rows_per_cpu,
+   const MPI_Comm                                       &mpi_comm,
+   const IndexSet                                       &myrange);
+
+  /**
+   * Similar to the function above, but for BlockDynamicSparsityPattern
+   * instead.
+   *
+   * @param[in,out] dsp The locally built sparsity pattern to be modified.
+   * @param owned_set_per_cpu Typically the value given by
+   * DoFHandler::locally_owned_dofs_per_processor.
+   *
+   * @param mpi_comm The MPI communicator to use.
+   *
+   * @param myrange Typically the locally relevant DoFs.
+   */
+  void distribute_sparsity_pattern
+  (BlockDynamicSparsityPattern &dsp,
+   const std::vector<IndexSet> &owned_set_per_cpu,
+   const MPI_Comm              &mpi_comm,
+   const IndexSet              &myrange);
+
+#endif
+
+
+  /**
+   * Exception
+   */
+  DeclException0 (ExcMETISNotInstalled);
+  /**
+   * Exception
+   */
+  DeclException1 (ExcInvalidNumberOfPartitions,
+                  int,
+                  << "The number of partitions you gave is " << arg1
+                  << ", but must be greater than zero.");
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcMETISError,
+                  int,
+                  << "    An error with error number " << arg1
+                  << " occurred while calling a METIS function");
+
+  /**
+   * Exception
+   */
+  DeclException2 (ExcInvalidArraySize,
+                  int, int,
+                  << "The array has size " << arg1 << " but should have size "
+                  << arg2);
+}
+
+/**
+ * @}
+ */
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/swappable_vector.h b/include/deal.II/lac/swappable_vector.h
new file mode 100644
index 0000000..b94a271
--- /dev/null
+++ b/include/deal.II/lac/swappable_vector.h
@@ -0,0 +1,218 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__swappable_vector_h
+#define dealii__swappable_vector_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/vector.h>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*! @addtogroup Vectors
+ *@{
+ */
+
+/**
+ * This class is a wrapper to the @p Vector class which allows to swap out the
+ * data to a file and reload it later on. It handles the management of the
+ * name of the file where the data is to be stored temporarily and removes the
+ * file is necessary at the end of the lifetime of the vector.
+ *
+ * There are functions to swap the data to a file, and to reload it. There is
+ * also a function @p alert that can be used to signal to an object of this
+ * class that the data will be needed shortly, so the object can initiate that
+ * the data be loaded already. While in non-multithreading mode, this function
+ * has no effect since @p reload has to be called afterwards anyway. On the
+ * other hand, in multithreading mode, the data is preloaded in the background
+ * using a thread on its own, and may already be available at the time when @p
+ * reload is called. If it is not available, @p reload waits until the
+ * detached thread has loaded the data.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@> and
+ * @<double@></tt>; others can be generated in application programs (see the
+ * section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Wolfgang Bangerth, 1999, 2000
+ */
+template <typename number>
+class SwappableVector : public Vector<number>
+{
+public:
+  /**
+   * Constructor. Does nothing apart from calling the constructor of the
+   * underlying class.
+   */
+  SwappableVector ();
+
+  /**
+   * Copy constructor. Copies the data if @p v contains some, but does not do
+   * so if @p v is empty or has its data swapped out. In the latter case, warn
+   * about that. In particular do not take over ownership of any files that @p
+   * v might own.
+   */
+  SwappableVector (const SwappableVector &v);
+
+  /**
+   * Destructor. If this class still owns a file to which temporary data was
+   * stored, then it is deleted.
+   */
+  virtual ~SwappableVector ();
+
+  /**
+   * Copy operator. Do mostly the same as the copy constructor does; if
+   * necessary, delete temporary files owned by this object at first.
+   */
+  SwappableVector &operator = (const SwappableVector &);
+
+  /**
+   * Swap out the data of this vector to the file of which the name is given.
+   * It is assumed that the file can be overwritten if it exists, and
+   * ownership of this file is assumed by this object. The file is deleted
+   * either upon calling @p kill_file, or on destruction of this object.
+   *
+   * The content of the vector is cleared and it size is reset to zero.
+   *
+   * If this object owns another file, for example when @p swap_out but no @p
+   * kill_file has previously been called, then that is deleted first.
+   */
+  void swap_out (const std::string &filename);
+
+  /**
+   * Reload the data of this vector from the file to which it has been stored
+   * previously using @p swap_out. Since the file is not deleted after
+   * reloading, you can call @p reload multiple times, in between you may do
+   * everything with the vector, including changing its size.
+   *
+   * This function resets the size of the vector to the number of elements
+   * there were upon calling @p swap_out before.
+   */
+  void reload ();
+
+  /**
+   * Calling this function can be used to alert this vector that it will need
+   * to reload its data soon. It has no effect if the data of the vector is
+   * already present, and it has no effect in single-thread mode as well, but
+   * in multithread mode, it spawns another thread that reads the data in
+   * parallel to the usual execution of the program, such that when @p reload
+   * is called, the data may eventually be available already. It might
+   * therefore be worthwhile to call this function some time in advance, if
+   * you know that the data will be needed, and loading takes some time, for
+   * instance if the file to which the data was written is not in a local tmp
+   * directory.
+   *
+   * Calling this function multiple times before calling @p reload is allowed
+   * and has no effect for subsequent calls. Calling this function while the
+   * data is still or already in memory is allowed and has no effect.
+   */
+  void alert ();
+
+
+  /**
+   * Remove the file to which the data has been stored the last time. After
+   * this, the object does not own any file any more, so of course you can't
+   * call @p reload no more.
+   *
+   * If this object does not own a file, for example since @p swap_out was not
+   * called, or because @p kill_file has been called previously, then this
+   * function does nothing.
+   */
+  void kill_file ();
+
+  /**
+   * Return the name of the file to which the data was stored the last time
+   * you called @p swap_out. If @p swap_out was not called, or if in between
+   * @p kill_file was called, then the filename is an empty string.
+   */
+  const std::string &get_filename () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+  /**
+   * Exception.
+   */
+  DeclException0 (ExcSizeZero);
+  /**
+   * Exception.
+   */
+  DeclException0 (ExcSizeNonzero);
+  /**
+   * Exception.
+   */
+  DeclException1 (ExcInvalidFilename,
+                  std::string,
+                  << "The filename <" << arg1
+                  << "> is not a valid one here.");
+  /**
+   * Exception.
+   */
+  DeclException0 (ExcInvalidCopyOperation);
+  //@}
+private:
+  /**
+   * Name of the file to which data was swapped out. If no data is presently
+   * swapped out (i.e. before calling @p swap_out and after @p kill_file), the
+   * string is empty, indicating no ownership of files.
+   */
+  std::string filename;
+
+  /**
+   * If in multithread mode, then the @p alert function has functionality, but
+   * needs to coordinate with the @p reload function. This is done through the
+   * following lock.
+   *
+   * If not in MT mode, then the class used here is empty, and we can as well
+   * get away with it.
+   */
+  Threads::Mutex lock;
+
+  /**
+   * Flag by which the @p alert function signifies that the data has been
+   * preloaded already. This flag is always @p false in non-MT mode.
+   */
+  bool data_is_preloaded;
+
+  /**
+   * Internal function that actually reloads the vector. Called from @p reload
+   * and @p alert.
+   *
+   * The parameter specifies whether the function shall set @p
+   * data_is_preloaded or not. The calling functions can't sometimes do this
+   * themselves, since they call this function detached, so this function has
+   * to signal success itself if this is required.
+   */
+  void reload_vector (const bool set_flag);
+};
+
+/*@}*/
+/*----------------------------   swappable_vector.h     ---------------------------*/
+/* end of #ifndef dealii__swappable_vector_h */
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+/*----------------------------   swappable_vector.h     ---------------------------*/
diff --git a/include/deal.II/lac/swappable_vector.templates.h b/include/deal.II/lac/swappable_vector.templates.h
new file mode 100644
index 0000000..de3706e
--- /dev/null
+++ b/include/deal.II/lac/swappable_vector.templates.h
@@ -0,0 +1,258 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__swappable_vector_templates_h
+#define dealii__swappable_vector_templates_h
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/swappable_vector.h>
+#include <fstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number>
+SwappableVector<number>::SwappableVector ()
+  :
+  data_is_preloaded (false)
+{}
+
+
+
+template <typename number>
+SwappableVector<number>::SwappableVector (const SwappableVector<number> &v) :
+  Vector<number>(v),
+  filename (),
+  data_is_preloaded (false)
+{
+  Assert (v.filename == "", ExcInvalidCopyOperation());
+}
+
+
+
+template <typename number>
+SwappableVector<number>::~SwappableVector ()
+{
+  // if the vector was stored in a
+  // file previously, and that has
+  // not been deleted in the
+  // meantime, then we kill that file
+  // first, before killing the vector
+  // itself
+
+  if (filename != "")
+    kill_file ();
+}
+
+
+
+template <typename number>
+SwappableVector<number> &
+SwappableVector<number>::operator= (const SwappableVector<number> &v)
+{
+  // if necessary, first delete data
+  if (filename != "")
+    kill_file ();
+
+  // if in MT mode, block all other
+  // operations. if not in MT mode,
+  // this is a no-op
+  Threads::Mutex::ScopedLock lock(this->lock);
+
+  Vector<number>::operator = (v);
+  data_is_preloaded = false;
+
+  return *this;
+}
+
+
+
+template <typename number>
+void SwappableVector<number>::swap_out (const std::string &name)
+{
+  // if the vector was stored in
+  // another file previously, and
+  // that has not been deleted in the
+  // meantime, then we kill that file
+  // first
+  if (filename != "")
+    kill_file ();
+
+  filename = name;
+
+  Assert (this->size() != 0, ExcSizeZero());
+
+  // if in MT mode, block all other
+  // operations. if not in MT mode,
+  // this is a no-op
+  Threads::Mutex::ScopedLock lock(this->lock);
+
+  //  check that we have not called
+  //  @p alert without the respective
+  //  @p reload function
+  Assert (data_is_preloaded == false, ExcInternalError());
+
+  std::ofstream tmp_out(filename.c_str());
+  this->block_write (tmp_out);
+  tmp_out.close ();
+
+  this->reinit (0);
+}
+
+
+
+template <typename number>
+void SwappableVector<number>::reload ()
+{
+  // if in MT mode: synchronise with
+  // possibly existing @p alert
+  // calls. if not in MT mode, this
+  // is a no-op
+  lock.acquire ();
+
+  // if data was already preloaded,
+  // then there is no more need to
+  // load it
+  if (data_is_preloaded == false)
+    // reload data. note that this
+    // function also releases the
+    // lock
+    reload_vector (false);
+  else
+    {
+      // clear flag since no more
+      // needed
+      data_is_preloaded = false;
+
+      // release lock. the lock is
+      // also released in the other
+      // branch of the if-clause
+      lock.release ();
+    };
+}
+
+
+
+template <typename number>
+void SwappableVector<number>::alert ()
+{
+#ifndef DEAL_II_WITH_THREADS
+  // note: this function does nothing
+  // in non-MT mode
+  return;
+#else
+
+  // synchronise with possible other
+  // invocations of this function and
+  // other functions in this class
+  lock.acquire ();
+
+  // calling this function multiple
+  // times does no harm:
+  if ( (data_is_preloaded == true) ||
+       // calling this function while the
+       // vector is active does no harm
+       // either
+       (this->size() != 0))
+    lock.release ();
+  else
+    // data has not been preloaded so
+    // far, so go on! For this, start
+    // a detached thread
+    Threads::new_thread (&SwappableVector<number>::reload_vector, *this, true);
+  // note that reload_vector also
+  // releases the lock
+#endif
+}
+
+
+
+template <typename number>
+void SwappableVector<number>::reload_vector (const bool set_flag)
+{
+  (void)set_flag;
+
+  Assert (filename != "", ExcInvalidFilename (filename));
+  Assert (this->size() == 0, ExcSizeNonzero());
+
+  std::ifstream tmp_in(filename.c_str());
+  this->block_read (tmp_in);
+  tmp_in.close ();
+
+#ifdef DEAL_II_WITH_THREADS
+  // release the lock that was acquired by the calling functions
+
+  // set the flag if so required
+  if (set_flag)
+    data_is_preloaded = true;
+  lock.release ();
+#endif
+}
+
+
+
+template <typename number>
+void SwappableVector<number>::kill_file ()
+{
+  // if in MT mode, wait for other
+  // operations to finish first
+  // (there should be none, but who
+  // knows). if not in MT mode,
+  // this is a no-op
+  Threads::Mutex::ScopedLock lock(this->lock);
+
+  // this is too bad: someone
+  // requested the vector in advance,
+  // but never got to fetch it. this
+  // is most probably an error, not?
+  Assert (data_is_preloaded == false, ExcInternalError());
+
+  if (filename != "")
+    {
+      int status = std::remove (filename.c_str());
+      AssertThrow (status == 0, ExcInternalError());
+
+      filename = "";
+    };
+}
+
+
+
+template <typename number>
+const std::string &
+SwappableVector<number>::get_filename () const
+{
+  return filename;
+}
+
+
+
+template <typename number>
+std::size_t
+SwappableVector<number>::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (filename) +
+          sizeof(lock) +
+          MemoryConsumption::memory_consumption (data_is_preloaded) +
+          Vector<number>::memory_consumption ());
+}
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // dealii__swappable_vector_templates_h
diff --git a/include/deal.II/lac/transpose_matrix.h b/include/deal.II/lac/transpose_matrix.h
new file mode 100644
index 0000000..bfb4790
--- /dev/null
+++ b/include/deal.II/lac/transpose_matrix.h
@@ -0,0 +1,208 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__transpose_matrix_h
+#define dealii__transpose_matrix_h
+
+
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/lac/pointer_matrix.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * The transpose of a given matrix.  This auxiliary class swaps the effect of
+ * vmult() and Tvmult() as well as vmult_add() and Tvmult_add().
+ *
+ * The implementation is analogous to the class PointerMatrix.
+ *
+ * @note The transposed matrix is never actually assembled. Instead, only the
+ * matrix vector multiplication is performed in a transposed way.
+ *
+ * @deprecated If deal.II was configured with C++11 support, use the
+ * LinearOperator class instead, see the module on
+ * @ref LAOperators "linear operators"
+ * for further details.
+ *
+ * @ingroup Matrix2
+ * @author Guido Kanschat, 2006
+ */
+template<typename MatrixType, typename VectorType>
+class
+  TransposeMatrix : public PointerMatrixBase<VectorType>
+{
+public:
+  /**
+   * Constructor.  The pointer in the argument is stored in this class. As
+   * usual, the lifetime of <tt>*M</tt> must be longer than the one of the
+   * PointerMatrix.
+   *
+   * If <tt>M</tt> is zero, no matrix is stored.
+   */
+  TransposeMatrix (const MatrixType *M=0);
+
+  /**
+   * Constructor. The name argument is used to identify the SmartPointer for
+   * this object.
+   */
+  TransposeMatrix(const char *name);
+
+  /**
+   * Constructor. <tt>M</tt> points to a matrix which must live longer than
+   * the TransposeMatrix. The name argument is used to identify the
+   * SmartPointer for this object.
+   */
+  TransposeMatrix(const MatrixType *M,
+                  const char       *name);
+
+  // Use doc from base class
+  virtual void clear();
+
+  /**
+   * Return whether the object is empty.
+   */
+  bool empty () const;
+
+  /**
+   * Assign a new matrix pointer. Deletes the old pointer and releases its
+   * matrix.
+   * @see SmartPointer
+   */
+  const TransposeMatrix &operator= (const MatrixType *M);
+
+  /**
+   * Matrix-vector product.
+   */
+  virtual void vmult (VectorType       &dst,
+                      const VectorType &src) const;
+
+  /**
+   * Transposed matrix-vector product.
+   */
+  virtual void Tvmult (VectorType       &dst,
+                       const VectorType &src) const;
+
+  /**
+   * Matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void vmult_add (VectorType       &dst,
+                          const VectorType &src) const;
+
+  /**
+   * Transposed matrix-vector product, adding to <tt>dst</tt>.
+   */
+  virtual void Tvmult_add (VectorType       &dst,
+                           const VectorType &src) const;
+
+private:
+  /**
+   * The pointer to the actual matrix.
+   */
+  SmartPointer<const MatrixType,TransposeMatrix<MatrixType,VectorType> > m;
+};
+
+
+//----------------------------------------------------------------------//
+
+
+template<typename MatrixType, typename VectorType>
+TransposeMatrix<MatrixType, VectorType>::TransposeMatrix (const MatrixType *M)
+  : m(M)
+{}
+
+
+template<typename MatrixType, typename VectorType>
+TransposeMatrix<MatrixType, VectorType>::TransposeMatrix (const char *name)
+  : m(0, name)
+{}
+
+
+template<typename MatrixType, typename VectorType>
+TransposeMatrix<MatrixType, VectorType>::TransposeMatrix (const MatrixType *M,
+                                                          const char       *name)
+  : m(M, name)
+{}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+TransposeMatrix<MatrixType, VectorType>::clear ()
+{
+  m = 0;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline const TransposeMatrix<MatrixType, VectorType> &
+TransposeMatrix<MatrixType, VectorType>::operator= (const MatrixType *M)
+{
+  m = M;
+  return *this;
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline bool
+TransposeMatrix<MatrixType, VectorType>::empty () const
+{
+  if (m == 0)
+    return true;
+  return m->empty();
+}
+
+template<typename MatrixType, typename VectorType>
+inline void
+TransposeMatrix<MatrixType, VectorType>::vmult (VectorType       &dst,
+                                                const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->Tvmult (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+TransposeMatrix<MatrixType, VectorType>::Tvmult (VectorType       &dst,
+                                                 const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->vmult (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+TransposeMatrix<MatrixType, VectorType>::vmult_add (VectorType       &dst,
+                                                    const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->Tvmult_add (dst, src);
+}
+
+
+template<typename MatrixType, typename VectorType>
+inline void
+TransposeMatrix<MatrixType, VectorType>::Tvmult_add (VectorType       &dst,
+                                                     const VectorType &src) const
+{
+  Assert (m != 0, ExcNotInitialized());
+  m->vmult_add (dst, src);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/tridiagonal_matrix.h b/include/deal.II/lac/tridiagonal_matrix.h
new file mode 100644
index 0000000..e928d5b
--- /dev/null
+++ b/include/deal.II/lac/tridiagonal_matrix.h
@@ -0,0 +1,401 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__tridiagonal_matrix_h
+#define dealii__tridiagonal_matrix_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/lac/lapack_support.h>
+
+#include <vector>
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+template<typename number> class Vector;
+
+
+/*! @addtogroup Matrix1
+ *@{
+ */
+
+
+/**
+ * A quadratic tridiagonal matrix. That is, a matrix where all entries are
+ * zero, except the diagonal and the entries left and right of it.
+ *
+ * The matrix has an additional symmetric mode, in which case only the upper
+ * triangle of the matrix is stored and mirrored to the lower one for matrix
+ * vector operations.
+ *
+ * @ingroup Matrix1
+ * @author Guido Kanschat, 2005, 2006
+ */
+template<typename number>
+class TridiagonalMatrix
+{
+public:
+  ///@name Constructors
+  //@{
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * @name Constructors and initialization.
+   */
+  /**
+   * Constructor generating an empty matrix of dimension <tt>n</tt>.
+   */
+  TridiagonalMatrix(size_type    n = 0,
+                    bool symmetric = false);
+
+  /**
+   * Reinitialize the matrix to a new size and reset all entries to zero. The
+   * symmetry properties may be set as well.
+   */
+  void reinit(size_type n,
+              bool symmetric = false);
+
+
+//@}
+///@name Non-modifying operators
+//@{
+
+  /**
+   * Number of rows of this matrix. To remember: this matrix is an <i>m x
+   * m</i>-matrix.
+   */
+  size_type m () const;
+
+  /**
+   * Number of columns of this matrix. To remember: this matrix is an <i>n x
+   * n</i>-matrix.
+   */
+  size_type n () const;
+
+  /**
+   * Return whether the matrix contains only elements with value zero. This
+   * function is mainly for internal consistency checks and should seldom be
+   * used when not in debug mode since it uses quite some time.
+   */
+  bool all_zero () const;
+
+
+
+//@}
+///@name Element access
+//@{
+  /**
+   * Read-only access to a value. This is restricted to the case where
+   * <i>|i-j| <= 1</i>.
+   */
+  number operator()(size_type i, size_type j) const;
+
+  /**
+   * Read-write access to a value. This is restricted to the case where
+   * <i>|i-j| <= 1</i>.
+   *
+   * @note In case of symmetric storage technique, the entries <i>(i,j)</i>
+   * and <i>(j,i)</i> are identified and <b>both</b> exist. This must be taken
+   * into account if adding up is used for matrix assembling in order not to
+   * obtain doubled entries.
+   */
+  number &operator()(size_type i, size_type j);
+
+//@}
+///@name Multiplications with vectors
+//@{
+
+  /**
+   * Matrix-vector-multiplication. Multiplies <tt>v</tt> from the right and
+   * stores the result in <tt>w</tt>.
+   *
+   * If the optional parameter <tt>adding</tt> is <tt>true</tt>, the result is
+   * added to <tt>w</tt>.
+   *
+   * Source and destination must not be the same vector.
+   */
+  void vmult (Vector<number>       &w,
+              const Vector<number> &v,
+              const bool            adding=false) const;
+
+  /**
+   * Adding Matrix-vector-multiplication. Same as vmult() with parameter
+   * <tt>adding=true</tt>, but widely used in <tt>deal.II</tt> classes.
+   *
+   * Source and destination must not be the same vector.
+   */
+  void vmult_add (Vector<number>       &w,
+                  const Vector<number> &v) const;
+
+  /**
+   * Transpose matrix-vector-multiplication. Multiplies <tt>v<sup>T</sup></tt>
+   * from the left and stores the result in <tt>w</tt>.
+   *
+   * If the optional parameter <tt>adding</tt> is <tt>true</tt>, the result is
+   * added to <tt>w</tt>.
+   *
+   * Source and destination must not be the same vector.
+   */
+  void Tvmult (Vector<number>       &w,
+               const Vector<number> &v,
+               const bool            adding=false) const;
+
+  /**
+   * Adding transpose matrix-vector-multiplication. Same as Tvmult() with
+   * parameter <tt>adding=true</tt>, but widely used in <tt>deal.II</tt>
+   * classes.
+   *
+   * Source and destination must not be the same vector.
+   */
+  void Tvmult_add (Vector<number>       &w,
+                   const Vector<number> &v) const;
+
+  /**
+   * Build the matrix scalar product <tt>u^T M v</tt>. This function is mostly
+   * useful when building the cellwise scalar product of two functions in the
+   * finite element context.
+   */
+  number matrix_scalar_product (const Vector<number> &u,
+                                const Vector<number> &v) const;
+
+  /**
+   * Return the square of the norm of the vector <tt>v</tt> with respect to
+   * the norm induced by this matrix, i.e. <i>(v,Mv)</i>. This is useful, e.g.
+   * in the finite element context, where the <i>L<sup>2</sup></i> norm of a
+   * function equals the matrix norm with respect to the mass matrix of the
+   * vector representing the nodal values of the finite element function.
+   *
+   * Obviously, the matrix needs to be quadratic for this operation.
+   */
+  number matrix_norm_square (const Vector<number> &v) const;
+
+//@}
+///@name Matrixnorms
+//@{
+
+  /**
+   * Return the $l_1$-norm of the matrix, i.e. $|M|_1=max_{all columns
+   * j}\sum_{all rows i} |M_ij|$, (max. sum of columns). This is the natural
+   * matrix norm that is compatible to the $l_1$-norm for vectors, i.e.
+   * $|Mv|_1\leq |M|_1 |v|_1$. (cf. Rannacher Numerik0)
+   */
+  number l1_norm () const;
+
+  /**
+   * Return the $l_\infty$-norm of the matrix, i.e. $|M|_\infty=\max_{all rows
+   * i}\sum_{all columns j} |M_{ij}|$, (max. sum of rows). This is the natural
+   * matrix norm that is compatible to the $l_\infty$-norm of vectors, i.e.
+   * $|Mv|_\infty \leq |M|_\infty |v|_\infty$.
+   */
+  number linfty_norm () const;
+
+  /**
+   * The Frobenius norm of the matrix. Return value is the root of the square
+   * sum of all matrix entries.
+   */
+  number frobenius_norm () const;
+
+  /**
+   * Compute the relative norm of the skew-symmetric part. The return value is
+   * the Frobenius norm of the skew-symmetric part of the matrix divided by
+   * that of the matrix.
+   *
+   * Main purpose of this function is to check, if a matrix is symmetric
+   * within a certain accuracy, or not.
+   */
+  number relative_symmetry_norm2 () const;
+//@}
+///@name LAPACK operations
+//@{
+  /**
+   * Compute the eigenvalues of the symmetric tridiagonal matrix.
+   *
+   * @note This function requires configuration of deal.II with LAPACK
+   * support. Additionally, the matrix must use symmetric storage technique.
+   */
+  void compute_eigenvalues();
+  /**
+   * After calling compute_eigenvalues(), you can access each eigenvalue here.
+   */
+  number eigenvalue(const size_type i) const;
+//@}
+///@name Miscellanea
+//@{
+  /**
+   * Output of the matrix in user-defined format.
+   */
+  template <class OutputStream>
+  void print(OutputStream &s,
+             const unsigned int  width=5,
+             const unsigned int  precision=2) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+  //@}
+
+private:
+  /**
+   * The diagonal entries.
+   */
+  std::vector<number> diagonal;
+  /**
+   * The entries left of the diagonal. The entry with index zero is always
+   * zero, since the first row has no entry left of the diagonal. Therefore,
+   * the length of this vector is the same as that of #diagonal.
+   *
+   * The length of this vector is zero for symmetric storage. In this case,
+   * the second element of #left is identified with the first element of
+   * #right.
+   */
+  std::vector<number> left;
+  /**
+   * The entries right of the diagonal. The last entry is always zero, since
+   * the last row has no entry right of the diagonal. Therefore, the length of
+   * this vector is the same as that of #diagonal.
+   */
+  std::vector<number> right;
+
+  /**
+   * If this flag is true, only the entries to the right of the diagonal are
+   * stored and the matrix is assumed symmetric.
+   */
+  bool is_symmetric;
+
+  /**
+   * The state of the matrix. Normally, the state is LAPACKSupport::matrix,
+   * indicating that the object can be used for regular matrix operations.
+   *
+   * See explanation of this data type for details.
+   */
+  LAPACKSupport::State state;
+};
+
+/**@}*/
+
+//---------------------------------------------------------------------------
+#ifndef DOXYGEN
+
+template<typename number>
+types::global_dof_index
+TridiagonalMatrix<number>::m() const
+{
+  return diagonal.size();
+}
+
+
+
+template<typename number>
+types::global_dof_index
+TridiagonalMatrix<number>::n() const
+{
+  return diagonal.size();
+}
+
+
+template<typename number>
+inline
+number
+TridiagonalMatrix<number>::operator()(size_type i, size_type j) const
+{
+  Assert(i<n(), ExcIndexRange(i,0,n()));
+  Assert(j<n(), ExcIndexRange(j,0,n()));
+  Assert (i<=j+1, ExcIndexRange(i,j-1,j+2));
+  Assert (j<=i+1, ExcIndexRange(j,i-1,i+2));
+
+  if (j==i)
+    return diagonal[i];
+  if (j==i-1)
+    {
+      if (is_symmetric)
+        return right[i-1];
+      else
+        return left[i];
+    }
+
+  if (j==i+1)
+    return right[i];
+
+  Assert (false, ExcInternalError());
+  return 0;
+}
+
+
+template<typename number>
+inline
+number &
+TridiagonalMatrix<number>::operator()(size_type i, size_type j)
+{
+  Assert(i<n(), ExcIndexRange(i,0,n()));
+  Assert(j<n(), ExcIndexRange(j,0,n()));
+  Assert (i<=j+1, ExcIndexRange(i,j-1,j+2));
+  Assert (j<=i+1, ExcIndexRange(j,i-1,i+2));
+
+  if (j==i)
+    return diagonal[i];
+  if (j==i-1)
+    {
+      if (is_symmetric)
+        return right[i-1];
+      else
+        return left[i];
+    }
+
+  if (j==i+1)
+    return right[i];
+
+  Assert (false, ExcInternalError());
+  return diagonal[0];
+}
+
+
+template <typename number>
+template <class OutputStream>
+void
+TridiagonalMatrix<number>::print (
+  OutputStream &s,
+  const unsigned int width,
+  const unsigned int) const
+{
+  for (size_type i=0; i<n(); ++i)
+    {
+      if (i>0)
+        s << std::setw(width) << (*this)(i,i-1);
+      else
+        s << std::setw(width) << "";
+
+      s << ' ' << (*this)(i,i) << ' ';
+
+      if (i<n()-1)
+        s << std::setw(width) << (*this)(i,i+1);
+
+      s << std::endl;
+    }
+}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
+
diff --git a/include/deal.II/lac/trilinos_block_sparse_matrix.h b/include/deal.II/lac/trilinos_block_sparse_matrix.h
new file mode 100644
index 0000000..7e7ce17
--- /dev/null
+++ b/include/deal.II/lac/trilinos_block_sparse_matrix.h
@@ -0,0 +1,573 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_block_sparse_matrix_h
+#define dealii__trilinos_block_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/table.h>
+#  include <deal.II/base/template_constraints.h>
+#  include <deal.II/lac/block_matrix_base.h>
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_block_vector.h>
+#  include <deal.II/lac/full_matrix.h>
+#  include <deal.II/lac/exceptions.h>
+
+#  include <cmath>
+
+#  define TrilinosScalar double
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+class BlockSparsityPattern;
+template <typename number> class BlockSparseMatrix;
+
+
+namespace TrilinosWrappers
+{
+
+  /*! @addtogroup TrilinosWrappers
+   *@{
+   */
+
+  /**
+   * Blocked sparse matrix based on the TrilinosWrappers::SparseMatrix class.
+   * This class implements the functions that are specific to the Trilinos
+   * SparseMatrix base objects for a blocked sparse matrix, and leaves the
+   * actual work relaying most of the calls to the individual blocks to the
+   * functions implemented in the base class. See there also for a description
+   * of when this class is useful.
+   *
+   * In contrast to the deal.II-type SparseMatrix class, the Trilinos matrices
+   * do not have external objects for the sparsity patterns. Thus, one does
+   * not determine the size of the individual blocks of a block matrix of this
+   * type by attaching a block sparsity pattern, but by calling reinit() to
+   * set the number of blocks and then by setting the size of each block
+   * separately. In order to fix the data structures of the block matrix, it
+   * is then necessary to let it know that we have changed the sizes of the
+   * underlying matrices. For this, one has to call the collect_sizes()
+   * function, for much the same reason as is documented with the
+   * BlockSparsityPattern class.
+   *
+   * @ingroup Matrix1 @see
+   * @ref GlossBlockLA "Block (linear algebra)"
+   * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+   */
+  class BlockSparseMatrix : public BlockMatrixBase<SparseMatrix>
+  {
+  public:
+    /**
+     * Typedef the base class for simpler access to its own typedefs.
+     */
+    typedef BlockMatrixBase<SparseMatrix> BaseClass;
+
+    /**
+     * Typedef the type of the underlying matrix.
+     */
+    typedef BaseClass::BlockType  BlockType;
+
+    /**
+     * Import the typedefs from the base class.
+     */
+    typedef BaseClass::value_type      value_type;
+    typedef BaseClass::pointer         pointer;
+    typedef BaseClass::const_pointer   const_pointer;
+    typedef BaseClass::reference       reference;
+    typedef BaseClass::const_reference const_reference;
+    typedef BaseClass::size_type       size_type;
+    typedef BaseClass::iterator        iterator;
+    typedef BaseClass::const_iterator  const_iterator;
+
+    /**
+     * Constructor; initializes the matrix to be empty, without any structure,
+     * i.e.  the matrix is not usable at all. This constructor is therefore
+     * only useful for matrices which are members of a class. All other
+     * matrices should be created at a point in the data flow where all
+     * necessary information is available.
+     *
+     * You have to initialize the matrix before usage with
+     * reinit(BlockSparsityPattern). The number of blocks per row and column
+     * are then determined by that function.
+     */
+    BlockSparseMatrix ();
+
+    /**
+     * Destructor.
+     */
+    ~BlockSparseMatrix ();
+
+    /**
+     * Pseudo copy operator only copying empty objects. The sizes of the block
+     * matrices need to be the same.
+     */
+    BlockSparseMatrix &
+    operator = (const BlockSparseMatrix &);
+
+    /**
+     * This operator assigns a scalar to a matrix. Since this does usually not
+     * make much sense (should we set all matrix entries to this value? Only
+     * the nonzero entries of the sparsity pattern?), this operation is only
+     * allowed if the actual value to be assigned is zero. This operator only
+     * exists to allow for the obvious notation <tt>matrix=0</tt>, which sets
+     * all elements of the matrix to zero, but keep the sparsity pattern
+     * previously used.
+     */
+    BlockSparseMatrix &
+    operator = (const double d);
+
+    /**
+     * Resize the matrix, by setting the number of block rows and columns.
+     * This deletes all blocks and replaces them with uninitialized ones, i.e.
+     * ones for which also the sizes are not yet set. You have to do that by
+     * calling the @p reinit functions of the blocks themselves. Do not forget
+     * to call collect_sizes() after that on this object.
+     *
+     * The reason that you have to set sizes of the blocks yourself is that
+     * the sizes may be varying, the maximum number of elements per row may be
+     * varying, etc. It is simpler not to reproduce the interface of the @p
+     * SparsityPattern class here but rather let the user call whatever
+     * function she desires.
+     */
+    void reinit (const size_type n_block_rows,
+                 const size_type n_block_columns);
+
+    /**
+     * Resize the matrix, by using an array of Epetra maps to determine the
+     * %parallel distribution of the individual matrices. This function
+     * assumes that a quadratic block matrix is generated.
+     */
+    template <typename BlockSparsityPatternType>
+    void reinit (const std::vector<Epetra_Map>  &input_maps,
+                 const BlockSparsityPatternType &block_sparsity_pattern,
+                 const bool                      exchange_data = false);
+
+    /**
+     * Resize the matrix, by using an array of index sets to determine the
+     * %parallel distribution of the individual matrices. This function
+     * assumes that a quadratic block matrix is generated.
+     */
+    template <typename BlockSparsityPatternType>
+    void reinit (const std::vector<IndexSet>    &input_maps,
+                 const BlockSparsityPatternType &block_sparsity_pattern,
+                 const MPI_Comm                 &communicator  = MPI_COMM_WORLD,
+                 const bool                      exchange_data = false);
+
+    /**
+     * Resize the matrix and initialize it by the given sparsity pattern.
+     * Since no distribution map is given, the result is a block matrix for
+     * which all elements are stored locally.
+     */
+    template <typename BlockSparsityPatternType>
+    void reinit (const BlockSparsityPatternType &block_sparsity_pattern);
+
+    /**
+     * This function initializes the Trilinos matrix using the deal.II sparse
+     * matrix and the entries stored therein. It uses a threshold to copy only
+     * elements whose modulus is larger than the threshold (so zeros in the
+     * deal.II matrix can be filtered away).
+     *
+     * @deprecated Use the respective method with IndexSet arguments instead.
+     */
+    void reinit (const std::vector<Epetra_Map>             &input_maps,
+                 const ::dealii::BlockSparseMatrix<double> &deal_ii_sparse_matrix,
+                 const double                               drop_tolerance=1e-13) DEAL_II_DEPRECATED;
+
+    /**
+     * This function initializes the Trilinos matrix using the deal.II sparse
+     * matrix and the entries stored therein. It uses a threshold to copy only
+     * elements whose modulus is larger than the threshold (so zeros in the
+     * deal.II matrix can be filtered away). Since no Epetra_Map is given, all
+     * the elements will be locally stored.
+     */
+    void reinit (const ::dealii::BlockSparseMatrix<double> &deal_ii_sparse_matrix,
+                 const double                               drop_tolerance=1e-13);
+
+    /**
+     * Returns the state of the matrix, i.e., whether compress() needs to be
+     * called after an operation requiring data exchange. Does only return
+     * non-true values when used in <tt>debug</tt> mode, since it is quite
+     * expensive to keep track of all operations that lead to the need for
+     * compress().
+     */
+    bool is_compressed () const;
+
+    /**
+     * This function collects the sizes of the sub-objects and stores them in
+     * internal arrays, in order to be able to relay global indices into the
+     * matrix to indices into the subobjects. You *must* call this function
+     * each time after you have changed the size of the sub-objects. Note that
+     * this is a collective operation, i.e., it needs to be called on all MPI
+     * processes. This command internally calls the method
+     * <tt>compress()</tt>, so you don't need to call that function in case
+     * you use <tt>collect_sizes()</tt>.
+     */
+    void collect_sizes ();
+
+    /**
+     * Return the number of nonzero elements of this matrix.
+     */
+    size_type n_nonzero_elements () const;
+
+    /**
+     * Return a vector of the underlying Trilinos Epetra_Map that sets the
+     * partitioning of the domain space of this block matrix, i.e., the
+     * partitioning of the individual block vectors this matrix has to be
+     * multiplied with.
+     *
+     * @deprecated Use the methods of the individual matrices based on
+     * IndexSet arguments.
+     */
+    std::vector<Epetra_Map> domain_partitioner () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a vector of the underlying Trilinos Epetra_Map that sets the
+     * partitioning of the range space of this block matrix, i.e., the
+     * partitioning of the individual block vectors that are the result from
+     * matrix-vector products.
+     *
+     * @deprecated Use the methods of the individual matrices based on
+     * IndexSet arguments.
+     */
+    std::vector<Epetra_Map> range_partitioner () const DEAL_II_DEPRECATED;
+
+
+    /**
+     * Matrix-vector multiplication: let $dst = M*src$ with $M$ being this
+     * matrix. The vector types can be block vectors or non-block vectors
+     * (only if the matrix has only one row or column, respectively), and need
+     * to define TrilinosWrappers::SparseMatrix::vmult.
+     */
+    template <typename VectorType1, typename VectorType2>
+    void vmult (VectorType1       &dst,
+                const VectorType2 &src) const;
+
+    /**
+     * Matrix-vector multiplication: let $dst = M^T*src$ with $M$ being this
+     * matrix. This function does the same as vmult() but takes the transposed
+     * matrix.
+     */
+    template <typename VectorType1, typename VectorType2>
+    void Tvmult (VectorType1       &dst,
+                 const VectorType2 &src) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned.
+     *
+     * Source <i>x</i> and destination <i>dst</i> must not be the same vector.
+     *
+     * Note that both vectors have to be distributed vectors generated using
+     * the same Map as was used for the matrix in case you work on a
+     * distributed memory architecture, using the interface in the
+     * TrilinosWrappers::MPI::BlockVector class.
+     */
+    TrilinosScalar residual (MPI::BlockVector       &dst,
+                             const MPI::BlockVector &x,
+                             const MPI::BlockVector &b) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned.
+     *
+     * Source <i>x</i> and destination <i>dst</i> must not be the same vector.
+     *
+     * Note that both vectors have to be distributed vectors generated using
+     * the same Map as was used for the matrix in case you work on a
+     * distributed memory architecture, using the interface in the
+     * TrilinosWrappers::BlockVector class. Since the block matrix is in
+     * general distributed among processes, this function only works when
+     * running the program on one processor.
+     */
+    TrilinosScalar residual (BlockVector       &dst,
+                             const BlockVector &x,
+                             const BlockVector &b) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned. Just like
+     * the previous function, but only applicable if the matrix only has one
+     * block row.
+     */
+    TrilinosScalar residual (MPI::BlockVector       &dst,
+                             const MPI::Vector      &x,
+                             const MPI::BlockVector &b) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned. Just like
+     * the previous function, but only applicable if the matrix only has one
+     * block row.
+     */
+    TrilinosScalar residual (BlockVector       &dst,
+                             const Vector      &x,
+                             const BlockVector &b) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned. Just like
+     * the previous function, but only applicable if the matrix only has one
+     * block column.
+     */
+    TrilinosScalar residual (MPI::Vector            &dst,
+                             const MPI::BlockVector &x,
+                             const MPI::Vector      &b) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned. Just like
+     * the previous function, but only applicable if the matrix only has one
+     * block column.
+     */
+    TrilinosScalar residual (Vector            &dst,
+                             const BlockVector &x,
+                             const Vector      &b) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned. Just like
+     * the previous function, but only applicable if the matrix only has one
+     * block.
+     */
+    TrilinosScalar residual (VectorBase       &dst,
+                             const VectorBase &x,
+                             const VectorBase &b) const;
+
+    /**
+     * Make the clear() function in the base class visible, though it is
+     * protected.
+     */
+    using BlockMatrixBase<SparseMatrix>::clear;
+
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+
+    /**
+     * Exception
+     */
+    DeclException4 (ExcIncompatibleRowNumbers,
+                    int, int, int, int,
+                    << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                    << arg3 << ',' << arg4 << "] have differing row numbers.");
+
+    /**
+     * Exception
+     */
+    DeclException4 (ExcIncompatibleColNumbers,
+                    int, int, int, int,
+                    << "The blocks [" << arg1 << ',' << arg2 << "] and ["
+                    << arg3 << ',' << arg4 << "] have differing column numbers.");
+    ///@}
+
+  private:
+    /**
+     * Internal version of (T)vmult with two block vectors
+     */
+    template <typename VectorType1, typename VectorType2>
+    void vmult (VectorType1       &dst,
+                const VectorType2 &src,
+                const bool         transpose,
+                const dealii::internal::bool2type<true>,
+                const dealii::internal::bool2type<true>) const;
+
+    /**
+     * Internal version of (T)vmult where the source vector is a block vector
+     * but the destination vector is a non-block vector
+     */
+    template <typename VectorType1, typename VectorType2>
+    void vmult (VectorType1       &dst,
+                const VectorType2 &src,
+                const bool         transpose,
+                const dealii::internal::bool2type<false>,
+                const dealii::internal::bool2type<true>) const;
+
+    /**
+     * Internal version of (T)vmult where the source vector is a non-block
+     * vector but the destination vector is a block vector
+     */
+    template <typename VectorType1, typename VectorType2>
+    void vmult (VectorType1       &dst,
+                const VectorType2 &src,
+                const bool         transpose,
+                const dealii::internal::bool2type<true>,
+                const dealii::internal::bool2type<false>) const;
+
+    /**
+     * Internal version of (T)vmult where both source vector and the
+     * destination vector are non-block vectors (only defined if the matrix
+     * consists of only one block)
+     */
+    template <typename VectorType1, typename VectorType2>
+    void vmult (VectorType1       &dst,
+                const VectorType2 &src,
+                const bool         transpose,
+                const dealii::internal::bool2type<false>,
+                const dealii::internal::bool2type<false>) const;
+  };
+
+
+
+  /*@}*/
+
+// ------------- inline and template functions -----------------
+
+
+
+  inline
+  BlockSparseMatrix &
+  BlockSparseMatrix::operator = (const double d)
+  {
+    Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+    for (size_type r=0; r<this->n_block_rows(); ++r)
+      for (size_type c=0; c<this->n_block_cols(); ++c)
+        this->block(r,c) = d;
+
+    return *this;
+  }
+
+
+
+  inline
+  bool
+  BlockSparseMatrix::is_compressed () const
+  {
+    bool compressed = true;
+    for (size_type row=0; row<n_block_rows(); ++row)
+      for (size_type col=0; col<n_block_cols(); ++col)
+        if (block(row, col).is_compressed() == false)
+          {
+            compressed = false;
+            break;
+          }
+
+    return compressed;
+  }
+
+
+
+  template <typename VectorType1, typename VectorType2>
+  inline
+  void
+  BlockSparseMatrix::vmult (VectorType1       &dst,
+                            const VectorType2 &src) const
+  {
+    vmult(dst, src, false,
+          dealii::internal::bool2type<IsBlockVector<VectorType1>::value>(),
+          dealii::internal::bool2type<IsBlockVector<VectorType2>::value>());
+  }
+
+
+
+  template <typename VectorType1, typename VectorType2>
+  inline
+  void
+  BlockSparseMatrix::Tvmult (VectorType1       &dst,
+                             const VectorType2 &src) const
+  {
+    vmult(dst, src, true,
+          dealii::internal::bool2type<IsBlockVector<VectorType1>::value>(),
+          dealii::internal::bool2type<IsBlockVector<VectorType2>::value>());
+  }
+
+
+
+  template <typename VectorType1, typename VectorType2>
+  inline
+  void
+  BlockSparseMatrix::vmult (VectorType1       &dst,
+                            const VectorType2 &src,
+                            const bool         transpose,
+                            dealii::internal::bool2type<true>,
+                            dealii::internal::bool2type<true>) const
+  {
+    if (transpose == true)
+      BaseClass::Tvmult_block_block (dst, src);
+    else
+      BaseClass::vmult_block_block (dst, src);
+  }
+
+
+
+
+  template <typename VectorType1, typename VectorType2>
+  inline
+  void
+  BlockSparseMatrix::vmult (VectorType1       &dst,
+                            const VectorType2 &src,
+                            const bool         transpose,
+                            dealii::internal::bool2type<false>,
+                            dealii::internal::bool2type<true>) const
+  {
+    if (transpose == true)
+      BaseClass::Tvmult_nonblock_block (dst, src);
+    else
+      BaseClass::vmult_nonblock_block (dst, src);
+  }
+
+
+
+  template <typename VectorType1, typename VectorType2>
+  inline
+  void
+  BlockSparseMatrix::vmult (VectorType1       &dst,
+                            const VectorType2 &src,
+                            const bool         transpose,
+                            dealii::internal::bool2type<true>,
+                            dealii::internal::bool2type<false>) const
+  {
+    if (transpose == true)
+      BaseClass::Tvmult_block_nonblock (dst, src);
+    else
+      BaseClass::vmult_block_nonblock (dst, src);
+  }
+
+
+
+  template <typename VectorType1, typename VectorType2>
+  inline
+  void
+  BlockSparseMatrix::vmult (VectorType1       &dst,
+                            const VectorType2 &src,
+                            const bool         transpose,
+                            dealii::internal::bool2type<false>,
+                            dealii::internal::bool2type<false>) const
+  {
+    if (transpose == true)
+      BaseClass::Tvmult_nonblock_nonblock (dst, src);
+    else
+      BaseClass::vmult_nonblock_nonblock (dst, src);
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif    // DEAL_II_WITH_TRILINOS
+
+#endif    // dealii__trilinos_block_sparse_matrix_h
diff --git a/include/deal.II/lac/trilinos_block_vector.h b/include/deal.II/lac/trilinos_block_vector.h
new file mode 100644
index 0000000..2792388
--- /dev/null
+++ b/include/deal.II/lac/trilinos_block_vector.h
@@ -0,0 +1,501 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_block_vector_h
+#define dealii__trilinos_block_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/lac/trilinos_vector.h>
+#  include <deal.II/lac/trilinos_parallel_block_vector.h>
+#  include <deal.II/lac/block_indices.h>
+#  include <deal.II/lac/block_vector_base.h>
+#  include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declaration
+template <typename Number> class BlockVector;
+
+/*! @addtogroup TrilinosWrappers
+ *@{
+ */
+
+namespace TrilinosWrappers
+{
+  // forward declaration
+  namespace MPI
+  {
+    class BlockVector;
+  }
+  class BlockVector;
+  class BlockSparseMatrix;
+
+
+  /**
+   * An implementation of block vectors based on the vector class implemented
+   * in TrilinosWrappers. While the base class provides for most of the
+   * interface, this class handles the actual allocation of vectors and
+   * provides functions that are specific to the underlying vector type.
+   *
+   * In contrast to the class MPI::BlockVector, this class is based on a
+   * localized version of the vectors, which means that the whole vector is
+   * stored on each processor. Note that matrix vector products with this
+   * block vector class do only work in case the program is run on only one
+   * processor, since the Trilinos matrices are inherently parallel.
+   *
+   * This class is deprecated, use TrilinosWrappers::MPI::BlockVector instead.
+   *
+   * @ingroup Vectors
+   * @ingroup TrilinosWrappers @see
+   * @ref GlossBlockLA "Block (linear algebra)"
+   * @author Martin Kronbichler, 2008
+   */
+  class BlockVector : public BlockVectorBase<Vector>
+  {
+  public:
+    /**
+     * Typedef the base class for simpler access to its own typedefs.
+     */
+    typedef BlockVectorBase<Vector> BaseClass;
+
+    /**
+     * Typedef the type of the underlying vector.
+     */
+    typedef BaseClass::BlockType  BlockType;
+
+    /**
+     * Import the typedefs from the base class.
+     */
+    typedef BaseClass::value_type      value_type;
+    typedef BaseClass::pointer         pointer;
+    typedef BaseClass::const_pointer   const_pointer;
+    typedef BaseClass::reference       reference;
+    typedef BaseClass::const_reference const_reference;
+    typedef BaseClass::size_type       size_type;
+    typedef BaseClass::iterator        iterator;
+    typedef BaseClass::const_iterator  const_iterator;
+
+    /**
+     * Default constructor. Generate an empty vector without any blocks.
+     */
+    BlockVector ();
+
+    /**
+     * Constructor. Generate a block vector with as many blocks as there are
+     * entries in Input_Maps.  For this non-distributed vector, the %parallel
+     * partitioning is not used, just the global size of the partitioner.
+     */
+    explicit BlockVector (const std::vector<Epetra_Map> &partitioner) DEAL_II_DEPRECATED;
+
+    /**
+     * Constructor. Generate a block vector with as many blocks as there are
+     * entries in Input_Maps.  For this non-distributed vector, the %parallel
+     * partitioning is not used, just the global size of the partitioner.
+     */
+    explicit BlockVector (const std::vector<IndexSet> &partitioner,
+                          const MPI_Comm              &communicator = MPI_COMM_WORLD) DEAL_II_DEPRECATED;
+
+    /**
+     * Copy-Constructor. Set all the properties of the non-%parallel vector to
+     * those of the given %parallel vector and import the elements.
+     */
+    BlockVector (const MPI::BlockVector &V) DEAL_II_DEPRECATED;
+
+    /**
+     * Copy-Constructor. Set all the properties of the vector to those of the
+     * given input vector and copy the elements.
+     */
+    BlockVector (const BlockVector  &V) DEAL_II_DEPRECATED;
+
+    /**
+     * Creates a block vector consisting of <tt>num_blocks</tt> components,
+     * but there is no content in the individual components and the user has
+     * to fill appropriate data using a reinit of the blocks.
+     */
+    explicit BlockVector (const size_type num_blocks) DEAL_II_DEPRECATED;
+
+    /**
+     * Constructor. Set the number of blocks to <tt>n.size()</tt> and
+     * initialize each block with <tt>n[i]</tt> zero elements.
+     *
+     * References BlockVector.reinit().
+     */
+    explicit BlockVector (const std::vector<size_type> &N) DEAL_II_DEPRECATED;
+
+    /**
+     * Constructor. Set the number of blocks to <tt>n.size()</tt>. Initialize
+     * the vector with the elements pointed to by the range of iterators given
+     * as second and third argument. Apart from the first argument, this
+     * constructor is in complete analogy to the respective constructor of the
+     * <tt>std::vector</tt> class, but the first argument is needed in order
+     * to know how to subdivide the block vector into different blocks.
+     */
+    template <typename InputIterator>
+    BlockVector (const std::vector<size_type> &n,
+                 const InputIterator           first,
+                 const InputIterator           end) DEAL_II_DEPRECATED;
+
+    /**
+     * Destructor. Clears memory
+     */
+    ~BlockVector ();
+
+    /**
+     * Copy operator: fill all components of the vector that are locally
+     * stored with the given scalar value.
+     */
+    BlockVector &
+    operator = (const value_type s);
+
+    /**
+     * Copy operator for a distributed Trilinos vector to a localized one.
+     */
+    BlockVector &
+    operator = (const MPI::BlockVector &V);
+
+    /**
+     * Copy operator for arguments of the same type.
+     */
+    BlockVector &
+    operator = (const BlockVector &V);
+
+    /**
+     * Another copy function. This one takes a deal.II block vector and copies
+     * it into a TrilinosWrappers block vector. Note that the number of blocks
+     * has to be the same in the vector as in the input vector. Use the
+     * reinit() command for resizing the BlockVector or for changing the
+     * internal structure of the block components.
+     *
+     * Since Trilinos only works on doubles, this function is limited to
+     * accept only one possible number type in the deal.II vector.
+     */
+    template <typename Number>
+    BlockVector &
+    operator = (const ::dealii::BlockVector<Number> &V);
+
+    /**
+     * Reinitialize the BlockVector to contain as many blocks as there are
+     * Epetra_Maps given in the input argument, according to the global size
+     * of the individual components described in the maps. Note that the
+     * resulting vector will be stored completely on each process. The
+     * Epetra_Map is useful when data exchange with a distributed vector based
+     * on the same Epetra_map is intended. In that case, the same communicator
+     * is used for data exchange.
+     *
+     * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+     * zeros.
+     */
+    void reinit (const std::vector<Epetra_Map> &partitioning,
+                 const bool                     omit_zeroing_entries = false);
+
+    /**
+     * Reinitialize the BlockVector to contain as many blocks as there are
+     * index sets given in the input argument, according to the global size of
+     * the individual components described in the index set, and using a given
+     * MPI communicator. The MPI communicator is useful when data exchange
+     * with a distributed vector based on the same initialization is intended.
+     * In that case, the same communicator is used for data exchange.
+     *
+     * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+     * zeros.
+     */
+    void reinit (const std::vector<IndexSet> &partitioning,
+                 const MPI_Comm              &communicator = MPI_COMM_WORLD,
+                 const bool                   omit_zeroing_entries = false);
+
+    /**
+     * Reinitialize the BlockVector to contain as many blocks as there are
+     * elements in the first argument, and with the respective sizes. Since no
+     * distribution map is given, all vectors are local vectors.
+     *
+     * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+     * zeros.
+     */
+    void reinit (const std::vector<size_type> &N,
+                 const bool                    omit_zeroing_entries=false);
+
+    /**
+     * Reinitialize the vector in the same way as the given to a distributed
+     * block vector. The elements will be copied in this process.
+     */
+    void reinit (const MPI::BlockVector &V);
+
+    /**
+     * Change the dimension to that of the vector <tt>V</tt>. The same applies
+     * as for the other reinit() function.
+     *
+     * The elements of <tt>V</tt> are not copied, i.e.  this function is the
+     * same as calling <tt>reinit (V.size(), omit_zeroing_entries)</tt>.
+     *
+     * Note that you must call this (or the other reinit() functions)
+     * function, rather than calling the reinit() functions of an individual
+     * block, to allow the block vector to update its caches of vector sizes.
+     * If you call reinit() on one of the blocks, then subsequent actions on
+     * this object may yield unpredictable results since they may be routed to
+     * the wrong block.
+     */
+    void reinit (const BlockVector &V,
+                 const bool omit_zeroing_entries = false);
+
+    /**
+     * Change the number of blocks to <tt>num_blocks</tt>. The individual
+     * blocks will get initialized with zero size, so it is assumed that the
+     * user resizes the individual blocks by herself in an appropriate way,
+     * and calls <tt>collect_sizes</tt> afterwards.
+     */
+    void reinit (const size_type num_blocks);
+
+    /**
+     * Swap the contents of this vector and the other vector <tt>v</tt>. One
+     * could do this operation with a temporary variable and copying over the
+     * data elements, but this function is significantly more efficient since
+     * it only swaps the pointers to the data of the two vectors and therefore
+     * does not need to allocate temporary storage and move data around.
+     *
+     * Limitation: right now this function only works if both vectors have the
+     * same number of blocks. If needed, the numbers of blocks should be
+     * exchanged, too.
+     *
+     * This function is analog to the the swap() function of all C++ standard
+     * containers. Also, there is a global function swap(u,v) that simply
+     * calls <tt>u.swap(v)</tt>, again in analogy to standard functions.
+     */
+    void swap (BlockVector &v);
+
+    /**
+     * Print to a stream.
+     */
+    void print (std::ostream       &out,
+                const unsigned int  precision = 3,
+                const bool          scientific = true,
+                const bool          across = true) const;
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcNonMatchingBlockVectors);
+
+    /**
+     * Exception
+     */
+    DeclException2 (ExcNonLocalizedMap,
+                    int, int,
+                    << "For the generation of a localized vector the map has "
+                    << "to assign all elements to all vectors! "
+                    << "local_size = global_size is a necessary condition, but"
+                    << arg1 << " != " << arg2 << " was given!");
+
+  };
+
+
+
+  /*----------------------- Inline functions ----------------------------------*/
+
+
+
+  inline
+  BlockVector::BlockVector ()
+  {}
+
+
+
+  inline
+  BlockVector::BlockVector (const std::vector<Epetra_Map> &partitioning)
+  {
+    reinit (partitioning);
+  }
+
+
+
+  inline
+  BlockVector::BlockVector (const std::vector<IndexSet> &partitioning,
+                            const MPI_Comm              &communicator)
+  {
+    reinit (partitioning, communicator);
+  }
+
+
+
+  inline
+  BlockVector::BlockVector (const std::vector<size_type> &N)
+  {
+    reinit (N);
+  }
+
+
+
+  template <typename InputIterator>
+  BlockVector::BlockVector (const std::vector<size_type> &n,
+                            const InputIterator           first,
+                            const InputIterator           end)
+  {
+    // first set sizes of blocks, but
+    // don't initialize them as we will
+    // copy elements soon
+    (void)end;
+    reinit (n, true);
+    InputIterator start = first;
+    for (size_type b=0; b<n.size(); ++b)
+      {
+        InputIterator end = start;
+        std::advance (end, static_cast<size_type>(n[b]));
+
+        for (size_type i=0; i<n[b]; ++i, ++start)
+          this->block(b)(i) = *start;
+      }
+    Assert (start == end, ExcIteratorRangeDoesNotMatchVectorSize());
+  }
+
+
+
+  inline
+  BlockVector::BlockVector (const size_type num_blocks)
+  {
+    reinit (num_blocks);
+  }
+
+
+
+  inline
+  BlockVector::~BlockVector()
+  {}
+
+
+
+  inline
+  BlockVector::BlockVector (const MPI::BlockVector &v)
+  {
+    reinit (v);
+  }
+
+
+
+  inline
+  BlockVector::BlockVector (const BlockVector &v)
+    :
+    BlockVectorBase<Vector > ()
+  {
+    this->components.resize (v.n_blocks());
+    this->block_indices = v.block_indices;
+
+    for (size_type i=0; i<this->n_blocks(); ++i)
+      this->components[i] = v.components[i];
+  }
+
+
+  inline
+  void
+  BlockVector::swap (BlockVector &v)
+  {
+    Assert (n_blocks() == v.n_blocks(),
+            ExcDimensionMismatch(n_blocks(),v.n_blocks()));
+
+    for (unsigned int row=0; row<n_blocks(); ++row)
+      block(row).swap (v.block(row));
+  }
+
+
+  template <typename Number>
+  BlockVector &
+  BlockVector::operator = (const ::dealii::BlockVector<Number> &v)
+  {
+    if (n_blocks() != v.n_blocks())
+      {
+        std::vector<size_type> block_sizes (v.n_blocks(), 0);
+        block_indices.reinit (block_sizes);
+        if (components.size() != n_blocks())
+          components.resize(n_blocks());
+      }
+
+    for (size_type i=0; i<this->n_blocks(); ++i)
+      this->components[i] = v.block(i);
+
+    collect_sizes();
+
+    return *this;
+  }
+
+
+  /**
+   * Global function which overloads the default implementation of the C++
+   * standard library which uses a temporary object. The function simply
+   * exchanges the data of the two vectors.
+   *
+   * @relates TrilinosWrappers::BlockVector
+   * @author Martin Kronbichler, 2008
+   */
+  inline
+  void swap (BlockVector &u,
+             BlockVector &v)
+  {
+    u.swap (v);
+  }
+
+} /* namespace TrilinosWrappers */
+
+/*@}*/
+
+
+namespace internal
+{
+  namespace LinearOperator
+  {
+    template <typename> class ReinitHelper;
+
+    /**
+     * A helper class internally used in linear_operator.h. Specialization for
+     * TrilinosWrappers::BlockVector.
+     */
+    template<>
+    class ReinitHelper<TrilinosWrappers::BlockVector>
+    {
+    public:
+      template <typename Matrix>
+      static
+      void reinit_range_vector (const Matrix &matrix,
+                                TrilinosWrappers::BlockVector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.range_partitioner(), omit_zeroing_entries);
+      }
+
+      template <typename Matrix>
+      static
+      void reinit_domain_vector(const Matrix &matrix,
+                                TrilinosWrappers::BlockVector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.domain_partitioner(), omit_zeroing_entries);
+      }
+    };
+
+  } /* namespace LinearOperator */
+} /* namespace internal */
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif  // DEAL_II_WITH_TRILINOS
+
+#endif
diff --git a/include/deal.II/lac/trilinos_parallel_block_vector.h b/include/deal.II/lac/trilinos_parallel_block_vector.h
new file mode 100644
index 0000000..9213f88
--- /dev/null
+++ b/include/deal.II/lac/trilinos_parallel_block_vector.h
@@ -0,0 +1,525 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_parallel_block_vector_h
+#define dealii__trilinos_parallel_block_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/lac/trilinos_vector.h>
+#  include <deal.II/lac/block_indices.h>
+#  include <deal.II/lac/block_vector_base.h>
+#  include <deal.II/lac/exceptions.h>
+
+#  include <functional>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declaration
+template <typename Number> class BlockVector;
+
+/*! @addtogroup TrilinosWrappers
+ *@{
+ */
+
+namespace TrilinosWrappers
+{
+  // forward declaration
+  namespace MPI
+  {
+    class BlockVector;
+  }
+  class BlockVector;
+  class BlockSparseMatrix;
+
+
+  namespace MPI
+  {
+    /**
+     * An implementation of block vectors based on the vector class
+     * implemented in TrilinosWrappers. While the base class provides for most
+     * of the interface, this class handles the actual allocation of vectors
+     * and provides functions that are specific to the underlying vector type.
+     *
+     * The model of distribution of data is such that each of the blocks is
+     * distributed across all MPI processes named in the MPI communicator.
+     * I.e. we don't just distribute the whole vector, but each component. In
+     * the constructors and reinit() functions, one therefore not only has to
+     * specify the sizes of the individual blocks, but also the number of
+     * elements of each of these blocks to be stored on the local process.
+     *
+     * @ingroup Vectors
+     * @ingroup TrilinosWrappers @see
+     * @ref GlossBlockLA "Block (linear algebra)"
+     * @author Martin Kronbichler, Wolfgang Bangerth, 2008, 2009
+     */
+    class BlockVector : public BlockVectorBase<Vector>
+    {
+    public:
+      /**
+       * Typedef the base class for simpler access to its own typedefs.
+       */
+      typedef BlockVectorBase<Vector> BaseClass;
+
+      /**
+       * Typedef the type of the underlying vector.
+       */
+      typedef BaseClass::BlockType  BlockType;
+
+      /**
+       * Import the typedefs from the base class.
+       */
+      typedef BaseClass::value_type      value_type;
+      typedef BaseClass::pointer         pointer;
+      typedef BaseClass::const_pointer   const_pointer;
+      typedef BaseClass::reference       reference;
+      typedef BaseClass::const_reference const_reference;
+      typedef BaseClass::size_type       size_type;
+      typedef BaseClass::iterator        iterator;
+      typedef BaseClass::const_iterator  const_iterator;
+
+      /**
+       * Default constructor. Generate an empty vector without any blocks.
+       */
+      BlockVector ();
+
+      /**
+       * Constructor. Generate a block vector with as many blocks as there are
+       * entries in @p partitioning. Each Epetra_Map contains the layout of
+       * the distribution of data among the MPI processes.
+       *
+       * This function is deprecated.
+       */
+      explicit BlockVector (const std::vector<Epetra_Map> &parallel_partitioning) DEAL_II_DEPRECATED;
+
+      /**
+       * Constructor. Generate a block vector with as many blocks as there are
+       * entries in @p partitioning.  Each IndexSet together with the MPI
+       * communicator contains the layout of the distribution of data among
+       * the MPI processes.
+       */
+      explicit BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                            const MPI_Comm              &communicator = MPI_COMM_WORLD);
+
+      /**
+       * Creates a BlockVector with ghost elements. See the respective
+       * reinit() method for more details. @p ghost_values may contain any
+       * elements in @p parallel_partitioning, they will be ignored.
+       */
+      BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                   const std::vector<IndexSet> &ghost_values,
+                   const MPI_Comm              &communicator,
+                   const bool                   vector_writable = false);
+
+      /**
+       * Copy-Constructor. Set all the properties of the parallel vector to
+       * those of the given argument and copy the elements.
+       */
+      BlockVector (const BlockVector  &v);
+
+#ifdef DEAL_II_WITH_CXX11
+      /**
+       * Move constructor. Creates a new vector by stealing the internal data
+       * of the vector @p v.
+       *
+       * @note This constructor is only available if deal.II is configured
+       * with C++11 support.
+       */
+      BlockVector (BlockVector &&v);
+#endif
+
+      /**
+       * Creates a block vector consisting of <tt>num_blocks</tt> components,
+       * but there is no content in the individual components and the user has
+       * to fill appropriate data using a reinit of the blocks.
+       */
+      explicit BlockVector (const size_type num_blocks);
+
+      /**
+       * Destructor. Clears memory
+       */
+      ~BlockVector ();
+
+      /**
+       * Copy operator: fill all components of the vector that are locally
+       * stored with the given scalar value.
+       */
+      BlockVector &operator= (const value_type s);
+
+      /**
+       * Copy operator for arguments of the same type.
+       */
+      BlockVector &operator= (const BlockVector &v);
+
+#ifdef DEAL_II_WITH_CXX11
+      /**
+       * Move the given vector. This operator replaces the present vector with
+       * @p v by efficiently swapping the internal data structures.
+       *
+       * @note This operator is only available if deal.II is configured with
+       * C++11 support.
+       */
+      BlockVector &operator= (BlockVector &&v);
+#endif
+
+      /**
+       * Copy operator for arguments of the localized Trilinos vector type.
+       */
+      BlockVector &
+      operator= (const ::dealii::TrilinosWrappers::BlockVector &v);
+
+      /**
+       * Another copy function. This one takes a deal.II block vector and
+       * copies it into a TrilinosWrappers block vector. Note that the number
+       * of blocks has to be the same in the vector as in the input vector.
+       * Use the reinit() command for resizing the BlockVector or for changing
+       * the internal structure of the block components.
+       *
+       * Since Trilinos only works on doubles, this function is limited to
+       * accept only one possible number type in the deal.II vector.
+       */
+      template <typename Number>
+      BlockVector &operator= (const ::dealii::BlockVector<Number> &v);
+
+      /**
+       * Reinitialize the BlockVector to contain as many blocks as there are
+       * Epetra_Maps given in the input argument, according to the parallel
+       * distribution of the individual components described in the maps.
+       *
+       * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+       * zeros.
+       *
+       * This function is deprecated.
+       */
+      void reinit (const std::vector<Epetra_Map> &parallel_partitioning,
+                   const bool                     omit_zeroing_entries = false) DEAL_II_DEPRECATED;
+
+      /**
+       * Reinitialize the BlockVector to contain as many blocks as there are
+       * index sets given in the input argument, according to the parallel
+       * distribution of the individual components described in the maps.
+       *
+       * If <tt>omit_zeroing_entries==false</tt>, the vector is filled with
+       * zeros.
+       */
+      void reinit (const std::vector<IndexSet> &parallel_partitioning,
+                   const MPI_Comm              &communicator = MPI_COMM_WORLD,
+                   const bool                   omit_zeroing_entries = false);
+
+      /**
+       * Reinit functionality. This function destroys the old vector content
+       * and generates a new one based on the input partitioning. In addition
+       * to just specifying one index set as in all the other methods above,
+       * this method allows to supply an additional set of ghost entries.
+       * There are two different versions of a vector that can be created. If
+       * the flag @p vector_writable is set to @p false, the vector only
+       * allows read access to the joint set of @p parallel_partitioning and
+       * @p ghost_entries. The effect of the reinit method is then equivalent
+       * to calling the other reinit method with an index set containing both
+       * the locally owned entries and the ghost entries.
+       *
+       * If the flag @p vector_writable is set to true, this creates an
+       * alternative storage scheme for ghost elements that allows multiple
+       * threads to write into the vector (for the other reinit methods, only
+       * one thread is allowed to write into the ghost entries at a time).
+       */
+      void reinit (const std::vector<IndexSet> &partitioning,
+                   const std::vector<IndexSet> &ghost_values,
+                   const MPI_Comm              &communicator = MPI_COMM_WORLD,
+                   const bool      vector_writable = false);
+
+
+      /**
+       * Change the dimension to that of the vector <tt>V</tt>. The same
+       * applies as for the other reinit() function.
+       *
+       * The elements of <tt>V</tt> are not copied, i.e.  this function is the
+       * same as calling <tt>reinit (V.size(), omit_zeroing_entries)</tt>.
+       *
+       * Note that you must call this (or the other reinit() functions)
+       * function, rather than calling the reinit() functions of an individual
+       * block, to allow the block vector to update its caches of vector
+       * sizes. If you call reinit() on one of the blocks, then subsequent
+       * actions on this object may yield unpredictable results since they may
+       * be routed to the wrong block.
+       */
+      void reinit (const BlockVector &V,
+                   const bool omit_zeroing_entries = false);
+
+      /**
+       * Change the number of blocks to <tt>num_blocks</tt>. The individual
+       * blocks will get initialized with zero size, so it is assumed that the
+       * user resizes the individual blocks by herself in an appropriate way,
+       * and calls <tt>collect_sizes</tt> afterwards.
+       */
+      void reinit (const size_type num_blocks);
+
+      /**
+       * This reinit function is meant to be used for parallel calculations
+       * where some non-local data has to be used. The typical situation where
+       * one needs this function is the call of the
+       * FEValues<dim>::get_function_values function (or of some derivatives)
+       * in parallel. Since it is usually faster to retrieve the data in
+       * advance, this function can be called before the assembly forks out to
+       * the different processors. What this function does is the following:
+       * It takes the information in the columns of the given matrix and looks
+       * which data couples between the different processors. That data is
+       * then queried from the input vector. Note that you should not write to
+       * the resulting vector any more, since the some data can be stored
+       * several times on different processors, leading to unpredictable
+       * results. In particular, such a vector cannot be used for matrix-
+       * vector products as for example done during the solution of linear
+       * systems.
+       */
+      void import_nonlocal_data_for_fe (const TrilinosWrappers::BlockSparseMatrix &m,
+                                        const BlockVector                         &v);
+
+      /**
+       * Returns the state of the vector, i.e., whether compress() needs to be
+       * called after an operation requiring data exchange. Does only return
+       * non-true values when used in <tt>debug</tt> mode, since it is quite
+       * expensive to keep track of all operations that lead to the need for
+       * compress().
+       *
+       * This function is deprecated.
+       */
+      bool is_compressed () const DEAL_II_DEPRECATED;
+
+      /**
+       * Returns if this Vector contains ghost elements.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      bool has_ghost_elements() const;
+
+      /**
+       * Swap the contents of this vector and the other vector <tt>v</tt>. One
+       * could do this operation with a temporary variable and copying over
+       * the data elements, but this function is significantly more efficient
+       * since it only swaps the pointers to the data of the two vectors and
+       * therefore does not need to allocate temporary storage and move data
+       * around.
+       *
+       * Limitation: right now this function only works if both vectors have
+       * the same number of blocks. If needed, the numbers of blocks should be
+       * exchanged, too.
+       *
+       * This function is analog to the the swap() function of all C++
+       * standard containers. Also, there is a global function swap(u,v) that
+       * simply calls <tt>u.swap(v)</tt>, again in analogy to standard
+       * functions.
+       */
+      void swap (BlockVector &v);
+
+      /**
+       * Print to a stream.
+       */
+      void print (std::ostream       &out,
+                  const unsigned int  precision = 3,
+                  const bool          scientific = true,
+                  const bool          across = true) const;
+
+      /**
+       * Exception
+       */
+      DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+
+      /**
+       * Exception
+       */
+      DeclException0 (ExcNonMatchingBlockVectors);
+    };
+
+
+
+    /*----------------------- Inline functions ----------------------------------*/
+
+
+    inline
+    BlockVector::BlockVector ()
+    {}
+
+
+
+    inline
+    BlockVector::BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                              const MPI_Comm              &communicator)
+    {
+      reinit (parallel_partitioning, communicator, false);
+    }
+
+
+
+    inline
+    BlockVector::BlockVector (const std::vector<IndexSet> &parallel_partitioning,
+                              const std::vector<IndexSet> &ghost_values,
+                              const MPI_Comm              &communicator,
+                              const bool                   vector_writable)
+    {
+      reinit(parallel_partitioning, ghost_values, communicator,
+             vector_writable);
+    }
+
+
+
+    inline
+    BlockVector::BlockVector (const size_type num_blocks)
+    {
+      reinit (num_blocks);
+    }
+
+
+
+    inline
+    BlockVector::BlockVector (const BlockVector &v)
+      :
+      BlockVectorBase<Vector > ()
+    {
+      this->components.resize (v.n_blocks());
+      this->block_indices = v.block_indices;
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.components[i];
+    }
+
+
+
+#ifdef DEAL_II_WITH_CXX11
+    inline
+    BlockVector::BlockVector (BlockVector &&v)
+    {
+      // initialize a minimal, valid object and swap
+      reinit (0);
+      swap(v);
+    }
+#endif
+
+
+
+    template <typename Number>
+    BlockVector &
+    BlockVector::operator= (const ::dealii::BlockVector<Number> &v)
+    {
+      if (n_blocks() != v.n_blocks())
+        {
+          std::vector<size_type> block_sizes (v.n_blocks(), 0);
+          block_indices.reinit (block_sizes);
+          if (components.size() != n_blocks())
+            components.resize(n_blocks());
+        }
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.block(i);
+
+      collect_sizes();
+
+      return *this;
+    }
+
+
+
+    inline
+    bool
+    BlockVector::has_ghost_elements() const
+    {
+      bool ghosted=block(0).has_ghost_elements();
+#ifdef DEBUG
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        Assert(block(i).has_ghost_elements()==ghosted, ExcInternalError());
+#endif
+      return ghosted;
+    }
+
+
+
+    inline
+    void
+    BlockVector::swap (BlockVector &v)
+    {
+      std::swap(this->components, v.components);
+
+      dealii::swap(this->block_indices, v.block_indices);
+    }
+
+
+
+    /**
+     * Global function which overloads the default implementation of the C++
+     * standard library which uses a temporary object. The function simply
+     * exchanges the data of the two vectors.
+     *
+     * @relates TrilinosWrappers::MPI::BlockVector
+     * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+     */
+    inline
+    void swap (BlockVector &u,
+               BlockVector &v)
+    {
+      u.swap (v);
+    }
+
+  } /* namespace MPI */
+
+} /* namespace TrilinosWrappers */
+
+/*@}*/
+
+
+namespace internal
+{
+  namespace LinearOperator
+  {
+    template <typename> class ReinitHelper;
+
+    /**
+     * A helper class internally used in linear_operator.h. Specialization for
+     * TrilinosWrappers::MPI::BlockVector.
+     */
+    template<>
+    class ReinitHelper<TrilinosWrappers::MPI::BlockVector>
+    {
+    public:
+      template <typename Matrix>
+      static
+      void reinit_range_vector (const Matrix &matrix,
+                                TrilinosWrappers::MPI::BlockVector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.range_partitioner(), omit_zeroing_entries);
+      }
+
+      template <typename Matrix>
+      static
+      void reinit_domain_vector(const Matrix &matrix,
+                                TrilinosWrappers::MPI::BlockVector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.domain_partitioner(), omit_zeroing_entries);
+      }
+    };
+
+  } /* namespace LinearOperator */
+} /* namespace internal */
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif  // DEAL_II_WITH_TRILINOS
+
+#endif
diff --git a/include/deal.II/lac/trilinos_precondition.h b/include/deal.II/lac/trilinos_precondition.h
new file mode 100644
index 0000000..303d95a
--- /dev/null
+++ b/include/deal.II/lac/trilinos_precondition.h
@@ -0,0 +1,1998 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_precondition_h
+#define dealii__trilinos_precondition_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+
+#  include <deal.II/lac/trilinos_vector_base.h>
+#  include <deal.II/lac/parallel_vector.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  ifdef DEAL_II_WITH_MPI
+#    include <Epetra_MpiComm.h>
+#  else
+#    include <Epetra_SerialComm.h>
+#  endif
+#  include <Epetra_Map.h>
+
+#  include <Teuchos_ParameterList.hpp>
+#  include <Epetra_RowMatrix.h>
+#  include <Epetra_Vector.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+// forward declarations
+class Ifpack_Preconditioner;
+class Ifpack_Chebyshev;
+namespace ML_Epetra
+{
+  class MultiLevelPreconditioner;
+}
+
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+template <typename number> class SparseMatrix;
+template <typename number> class Vector;
+class SparsityPattern;
+
+/*! @addtogroup TrilinosWrappers
+ *@{
+ */
+
+namespace TrilinosWrappers
+{
+  // forward declarations
+  class SparseMatrix;
+  class BlockSparseMatrix;
+  class SolverBase;
+
+  /**
+   * The base class for all preconditioners based on Trilinos sparse matrices.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionBase : public Subscriptor
+  {
+  public:
+    /**
+     * Declare the type for container size.
+     */
+    typedef dealii::types::global_dof_index size_type;
+
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {};
+
+    /**
+     * Constructor. Does not do anything. The <tt>initialize</tt> function of
+     * the derived classes will have to create the preconditioner from a given
+     * sparse matrix.
+     */
+    PreconditionBase ();
+
+    /**
+     * Copy constructor.
+     */
+    PreconditionBase (const PreconditionBase &);
+
+    /**
+     * Destructor.
+     */
+    ~PreconditionBase ();
+
+    /**
+     * Destroys the preconditioner, leaving an object like just after having
+     * called the constructor.
+     */
+    void clear ();
+
+    /**
+     * Apply the preconditioner.
+     */
+    virtual void vmult (VectorBase       &dst,
+                        const VectorBase &src) const;
+
+    /**
+     * Apply the transpose preconditioner.
+     */
+    virtual void Tvmult (VectorBase       &dst,
+                         const VectorBase &src) const;
+
+    /**
+     * Apply the preconditioner on deal.II data structures instead of the ones
+     * provided in the Trilinos wrapper class.
+     */
+    virtual void vmult (dealii::Vector<double>       &dst,
+                        const dealii::Vector<double> &src) const;
+
+    /**
+     * Apply the transpose preconditioner on deal.II data structures instead
+     * of the ones provided in the Trilinos wrapper class.
+     */
+    virtual void Tvmult (dealii::Vector<double>       &dst,
+                         const dealii::Vector<double> &src) const;
+
+    /**
+     * Apply the preconditioner on deal.II parallel data structures instead of
+     * the ones provided in the Trilinos wrapper class.
+     */
+    virtual void vmult (dealii::parallel::distributed::Vector<double>       &dst,
+                        const dealii::parallel::distributed::Vector<double> &src) const;
+
+    /**
+     * Apply the transpose preconditioner on deal.II parallel data structures
+     * instead of the ones provided in the Trilinos wrapper class.
+     */
+    virtual void Tvmult (dealii::parallel::distributed::Vector<double>       &dst,
+                         const dealii::parallel::distributed::Vector<double> &src) const;
+
+    /**
+     * Return a reference to the underlaying Trilinos Epetra_Operator. So you
+     * can use the preconditioner with unwrapped Trilinos solver.
+     *
+     * Calling this function from an uninitialized object will cause an
+     * exception.
+     */
+    Epetra_Operator &trilinos_operator() const;
+
+    /**
+     * Exception.
+     */
+    DeclException1 (ExcNonMatchingMaps,
+                    std::string,
+                    << "The sparse matrix the preconditioner is based on "
+                    << "uses a map that is not compatible to the one in vector "
+                    << arg1
+                    << ". Check preconditioner and matrix setup.");
+
+    friend class SolverBase;
+
+  protected:
+    /**
+     * This is a pointer to the preconditioner object that is used when
+     * applying the preconditioner.
+     */
+    std_cxx11::shared_ptr<Epetra_Operator> preconditioner;
+
+    /**
+     * Internal communication pattern in case the matrix needs to be copied
+     * from deal.II format.
+     */
+#ifdef DEAL_II_WITH_MPI
+    Epetra_MpiComm     communicator;
+#else
+    Epetra_SerialComm  communicator;
+#endif
+
+    /**
+     * Internal Trilinos map in case the matrix needs to be copied from
+     * deal.II format.
+     */
+    std_cxx11::shared_ptr<Epetra_Map>   vector_distributor;
+  };
+
+
+  /**
+   * A wrapper class for a (pointwise) Jacobi preconditioner for Trilinos
+   * matrices. This preconditioner works both in serial and in parallel,
+   * depending on the matrix it is based on.
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   * For the Jacobi preconditioner, these options are the damping parameter
+   * <tt>omega</tt> and a <tt>min_diagonal</tt> argument that can be used to
+   * make the preconditioner work even if the matrix contains some zero
+   * elements on the diagonal. The default settings are 1 for the damping
+   * parameter and zero for the diagonal augmentation.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionJacobi : public PreconditionBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner. The parameter <tt>omega</tt> specifies the relaxation
+     * parameter in the Jacobi preconditioner. The parameter
+     * <tt>min_diagonal</tt> can be used to make the application of the
+     * preconditioner also possible when some diagonal elements are zero. In a
+     * default application this would mean that we divide by zero, so by
+     * setting the parameter <tt>min_diagonal</tt> to a small nonzero value
+     * the SOR will work on a matrix that is not too far away from the one we
+     * want to treat.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the damping parameter to one, and do not
+       * modify the diagonal.
+       */
+      AdditionalData (const double       omega = 1,
+                      const double       min_diagonal = 0,
+                      const unsigned int n_sweeps = 1);
+
+      /**
+       * This specifies the relaxation parameter in the Jacobi preconditioner.
+       */
+      double omega;
+
+      /**
+       * This specifies the minimum value the diagonal elements should have.
+       * This might be necessary when the Jacobi preconditioner is used on
+       * matrices with zero diagonal elements. In that case, a straight-
+       * forward application would not be possible since we would divide by
+       * zero.
+       */
+      double min_diagonal;
+
+      /**
+       * Sets how many times the given operation should be applied during the
+       * vmult() operation.
+       */
+      unsigned int n_sweeps;
+    };
+
+    /**
+     * Take the sparse matrix the preconditioner object should be built of,
+     * and additional flags (damping parameter, etc.) if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+
+  /**
+   * A wrapper class for a (pointwise) SSOR preconditioner for Trilinos
+   * matrices. This preconditioner works both in serial and in parallel,
+   * depending on the matrix it is based on.
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   * For the SSOR preconditioner, these options are the damping/relaxation
+   * parameter <tt>omega</tt>, a <tt>min_diagonal</tt> argument that can be
+   * used to make the preconditioner work even if the matrix contains some
+   * zero elements on the diagonal, and a parameter <tt>overlap</tt> that
+   * determines if and how much overlap there should be between the matrix
+   * partitions on the various MPI processes. The default settings are 1 for
+   * the relaxation parameter, 0 for the diagonal augmentation and 0 for the
+   * overlap.
+   *
+   * Note that a parallel application of the SSOR preconditioner is actually a
+   * block-Jacobi preconditioner with block size equal to the local matrix
+   * size. Spoken more technically, this parallel operation is an <a
+   * href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with an SSOR <em>approximate solve</em> as inner
+   * solver, based on the outer parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Wolfgang Bangerth, 2008
+   */
+  class PreconditionSSOR : public PreconditionBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner. The parameter <tt>omega</tt> specifies the relaxation
+     * parameter in the SSOR preconditioner. The parameter
+     * <tt>min_diagonal</tt> can be used to make the application of the
+     * preconditioner also possible when some diagonal elements are zero. In a
+     * default application this would mean that we divide by zero, so by
+     * setting the parameter <tt>min_diagonal</tt> to a small nonzero value
+     * the SOR will work on a matrix that is not too far away from the one we
+     * want to treat. Finally, <tt>overlap</tt> governs the overlap of the
+     * partitions when the preconditioner runs in parallel, forming a so-
+     * called additive Schwarz preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the damping parameter to one, we do not
+       * modify the diagonal, and there is no overlap (i.e. in parallel, we
+       * run a BlockJacobi preconditioner, where each block is inverted
+       * approximately by an SSOR).
+       */
+      AdditionalData (const double       omega = 1,
+                      const double       min_diagonal = 0,
+                      const unsigned int overlap = 0,
+                      const unsigned int n_sweeps = 1);
+
+      /**
+       * This specifies the (over-) relaxation parameter in the SSOR
+       * preconditioner.
+       */
+      double omega;
+
+      /**
+       * This specifies the minimum value the diagonal elements should have.
+       * This might be necessary when the SSOR preconditioner is used on
+       * matrices with zero diagonal elements. In that case, a straight-
+       * forward application would not be possible since we divide by the
+       * diagonal element.
+       */
+      double min_diagonal;
+
+      /**
+       * This determines how large the overlap of the local matrix portions on
+       * each processor in a parallel application should be.
+       */
+      unsigned int overlap;
+
+      /**
+       * Sets how many times the given operation should be applied during the
+       * vmult() operation.
+       */
+      unsigned int n_sweeps;
+    };
+
+    /**
+     * Take the sparse matrix the preconditioner object should be built of,
+     * and additional flags (damping parameter, overlap in parallel
+     * computations, etc.) if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+
+  /**
+   * A wrapper class for a (pointwise) SOR preconditioner for Trilinos
+   * matrices. This preconditioner works both in serial and in parallel,
+   * depending on the matrix it is based on.
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   * For the SOR preconditioner, these options are the damping/relaxation
+   * parameter <tt>omega</tt>, a <tt>min_diagonal</tt> argument that can be
+   * used to make the preconditioner work even if the matrix contains some
+   * zero elements on the diagonal, and a parameter <tt>overlap</tt> that
+   * determines if and how much overlap there should be between the matrix
+   * partitions on the various MPI processes. The default settings are 1 for
+   * the relaxation parameter, 0 for the diagonal augmentation and 0 for the
+   * overlap.
+   *
+   * Note that a parallel application of the SOR preconditioner is actually a
+   * block-Jacobi preconditioner with block size equal to the local matrix
+   * size. Spoken more technically, this parallel operation is an <a
+   * href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with an SOR <em>approximate solve</em> as inner
+   * solver, based on the outer parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionSOR : public PreconditionBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner. The parameter <tt>omega</tt> specifies the relaxation
+     * parameter in the SOR preconditioner. The parameter
+     * <tt>min_diagonal</tt> can be used to make the application of the
+     * preconditioner also possible when some diagonal elements are zero. In a
+     * default application this would mean that we divide by zero, so by
+     * setting the parameter <tt>min_diagonal</tt> to a small nonzero value
+     * the SOR will work on a matrix that is not too far away from the one we
+     * want to treat. Finally, <tt>overlap</tt> governs the overlap of the
+     * partitions when the preconditioner runs in parallel, forming a so-
+     * called additive Schwarz preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the damping parameter to one, we do not
+       * modify the diagonal, and there is no overlap (i.e. in parallel, we
+       * run a BlockJacobi preconditioner, where each block is inverted
+       * approximately by an SOR.
+       */
+      AdditionalData (const double       omega = 1,
+                      const double       min_diagonal = 0,
+                      const unsigned int overlap = 0,
+                      const unsigned int n_sweeps = 1);
+
+      /**
+       * This specifies the (over-) relaxation parameter in the SOR
+       * preconditioner.
+       */
+      double omega;
+
+      /**
+       * This specifies the minimum value the diagonal elements should have.
+       * This might be necessary when the SOR preconditioner is used on
+       * matrices with zero diagonal elements. In that case, a straight-
+       * forward application would not be possible since we divide by the
+       * diagonal element.
+       */
+      double min_diagonal;
+
+      /**
+       * This determines how large the overlap of the local matrix portions on
+       * each processor in a parallel application should be.
+       */
+      unsigned int overlap;
+
+      /**
+       * Sets how many times the given operation should be applied during the
+       * vmult() operation.
+       */
+      unsigned int n_sweeps;
+    };
+
+    /**
+     * Take the sparse matrix the preconditioner object should be built of,
+     * and additional flags (damping parameter, overlap in parallel
+     * computations etc.) if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+  /**
+   * A wrapper class for a block Jacobi preconditioner for Trilinos matrices.
+   * As opposed to PreconditionSOR where each row is treated separately, this
+   * scheme collects block of a given size and inverts a full matrix for all
+   * these rows simultaneously. Trilinos allows to select several strategies
+   * for selecting which rows form a block, including "linear" (i.e., divide
+   * the local range of the matrix in slices of the block size), "greedy" or
+   * "metis". Note that the term <em>block Jacobi</em> does not relate to
+   * possible blocks in the MPI setting, but small blocks of dense matrices
+   * extracted from the sparse matrix local to each processor.
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2014
+   */
+  class PreconditionBlockJacobi : public PreconditionBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner. The parameter <tt>block_size</tt> sets the size of
+     * small blocks. It is recommended to choose this parameter not too large
+     * (a few hundreds at most) since this implementation uses a dense matrix
+     * for the block. The parameter <tt>block_creation_type</tt> allows to
+     * pass the strategy for finding the blocks to Ifpack. The parameter
+     * <tt>omega</tt> specifies the relaxation parameter in the SOR
+     * preconditioner. The parameter <tt>min_diagonal</tt> can be used to make
+     * the application of the preconditioner also possible when some diagonal
+     * elements are zero. In a default application this would mean that we
+     * divide by zero, so by setting the parameter <tt>min_diagonal</tt> to a
+     * small nonzero value the SOR will work on a matrix that is not too far
+     * away from the one we want to treat. Finally, <tt>overlap</tt> governs
+     * the overlap of the partitions when the preconditioner runs in parallel,
+     * forming a so-called additive Schwarz preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, use a block size of 1, use linear
+       * subdivision of the rows, set the damping parameter to one, and do not
+       * modify the diagonal.
+       */
+      AdditionalData (const unsigned int block_size = 1,
+                      const std::string  block_creation_type = "linear",
+                      const double       omega = 1,
+                      const double       min_diagonal = 0,
+                      const unsigned int n_sweeps = 1);
+
+      /**
+       * This specifies the size of blocks.
+       */
+      unsigned int block_size;
+
+      /**
+       * Strategy for creation of blocks passed on to Ifpack block relaxation
+       * (variable 'partitioner: type') with this string as the given value.
+       * Available types in Ifpack include "linear" (i.e., divide the local
+       * range of the matrix in slices of the block size), "greedy" "metis".
+       * For a full list, see the documentation of Ifpack.
+       */
+      std::string block_creation_type;
+
+      /**
+       * This specifies the (over-) relaxation parameter in the Jacobi
+       * preconditioner.
+       */
+      double omega;
+
+      /**
+       * This specifies the minimum value the diagonal elements should have.
+       * This might be necessary when the block Jacobi preconditioner is used
+       * on matrices with zero diagonal elements. In that case, a straight-
+       * forward application would not be possible since we divide by the
+       * diagonal element.
+       */
+      double min_diagonal;
+
+      /**
+       * Sets how many times the given operation should be applied during the
+       * vmult() operation.
+       */
+      unsigned int n_sweeps;
+    };
+
+    /**
+     * Take the sparse matrix the preconditioner object should be built of,
+     * and additional flags (damping parameter, etc.) if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+
+  /**
+   * A wrapper class for a block SSOR preconditioner for Trilinos matrices. As
+   * opposed to PreconditionSSOR where each row is treated separately (point-
+   * wise), this scheme collects block of a given size and inverts a full
+   * matrix for all these rows simultaneously. Trilinos allows to select
+   * several strategies for selecting which rows form a block, including
+   * "linear" (i.e., divide the local range of the matrix in slices of the
+   * block size), "greedy" or "metis".
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   *
+   * Note that a parallel application of this preconditioner is actually a
+   * block-Jacobi preconditioner with (outer) block size equal to the local
+   * matrix size. Spoken more technically, this parallel operation is an <a
+   * href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with a block SSOR <em>approximate solve</em> as inner
+   * solver, based on the outer parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2014
+   */
+  class PreconditionBlockSSOR : public PreconditionBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner. The parameter <tt>block_size</tt> sets the size of
+     * small blocks. It is recommended to choose this parameter not too large
+     * (a few hundreds at most) since this implementation uses a dense matrix
+     * for the block. The parameter <tt>block_creation_type</tt> allows to
+     * pass the strategy for finding the blocks to Ifpack. The parameter
+     * <tt>omega</tt> specifies the relaxation parameter in the SSOR
+     * preconditioner. The parameter <tt>min_diagonal</tt> can be used to make
+     * the application of the preconditioner also possible when some diagonal
+     * elements are zero. In a default application this would mean that we
+     * divide by zero, so by setting the parameter <tt>min_diagonal</tt> to a
+     * small nonzero value the SOR will work on a matrix that is not too far
+     * away from the one we want to treat. Finally, <tt>overlap</tt> governs
+     * the overlap of the partitions when the preconditioner runs in parallel,
+     * forming a so-called additive Schwarz preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, use a block size of 1, use linear
+       * subdivision of the rows, set the damping parameter to one, we do not
+       * modify the diagonal, and there is no overlap (i.e. in parallel, we
+       * run a BlockJacobi preconditioner, where each block is inverted
+       * approximately by a block SOR).
+       */
+      AdditionalData (const unsigned int block_size = 1,
+                      const std::string  block_creation_type = "linear",
+                      const double       omega = 1,
+                      const double       min_diagonal = 0,
+                      const unsigned int overlap = 0,
+                      const unsigned int n_sweeps = 1);
+
+      /**
+       * This specifies the size of blocks.
+       */
+      unsigned int block_size;
+
+      /**
+       * Strategy for creation of blocks passed on to Ifpack block relaxation
+       * (variable 'partitioner: type') with this string as the given value.
+       * Available types in Ifpack include "linear" (i.e., divide the local
+       * range of the matrix in slices of the block size), "greedy" "metis".
+       * For a full list, see the documentation of Ifpack.
+       */
+      std::string block_creation_type;
+
+      /**
+       * This specifies the (over-) relaxation parameter in the SOR
+       * preconditioner.
+       */
+      double omega;
+
+      /**
+       * This specifies the minimum value the diagonal elements should have.
+       * This might be necessary when the SSOR preconditioner is used on
+       * matrices with zero diagonal elements. In that case, a straight-
+       * forward application would not be possible since we divide by the
+       * diagonal element.
+       */
+      double min_diagonal;
+
+      /**
+       * This determines how large the overlap of the local matrix portions on
+       * each processor in a parallel application should be.
+       */
+      unsigned int overlap;
+
+      /**
+       * Sets how many times the given operation should be applied during the
+       * vmult() operation.
+       */
+      unsigned int n_sweeps;
+    };
+
+    /**
+     * Take the sparse matrix the preconditioner object should be built of,
+     * and additional flags (damping parameter, overlap in parallel
+     * computations, etc.) if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+
+  /**
+   * A wrapper class for a block SOR preconditioner for Trilinos matrices. As
+   * opposed to PreconditionSOR where each row is treated separately, this
+   * scheme collects block of a given size and inverts a full matrix for all
+   * these rows simultaneously. Trilinos allows to select several strategies
+   * for selecting which rows form a block, including "linear" (i.e., divide
+   * the local range of the matrix in slices of the block size), "greedy" or
+   * "metis".
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   *
+   * Note that a parallel application of this preconditioner is actually a
+   * block-Jacobi preconditioner with (outer) block size equal to the local
+   * matrix size. Spoken more technically, this parallel operation is an <a
+   * href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with a block SOR <em>approximate solve</em> as inner
+   * solver, based on the outer parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2014
+   */
+  class PreconditionBlockSOR : public PreconditionBase
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional flags to the
+     * preconditioner. The parameter <tt>block_size</tt> sets the size of
+     * small blocks. It is recommended to choose this parameter not too large
+     * (a few hundreds at most) since this implementation uses a dense matrix
+     * for the block. The parameter <tt>block_creation_type</tt> allows to
+     * pass the strategy for finding the blocks to Ifpack. The parameter
+     * <tt>omega</tt> specifies the relaxation parameter in the SOR
+     * preconditioner. The parameter <tt>min_diagonal</tt> can be used to make
+     * the application of the preconditioner also possible when some diagonal
+     * elements are zero. In a default application this would mean that we
+     * divide by zero, so by setting the parameter <tt>min_diagonal</tt> to a
+     * small nonzero value the SOR will work on a matrix that is not too far
+     * away from the one we want to treat. Finally, <tt>overlap</tt> governs
+     * the overlap of the partitions when the preconditioner runs in parallel,
+     * forming a so-called additive Schwarz preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, use a block size of 1, use linear
+       * subdivision of the rows, set the damping parameter to one, we do not
+       * modify the diagonal, and there is no overlap (i.e. in parallel, we
+       * run a BlockJacobi preconditioner, where each block is inverted
+       * approximately by a block SOR).
+       */
+      AdditionalData (const unsigned int block_size = 1,
+                      const std::string  block_creation_type = "linear",
+                      const double       omega = 1,
+                      const double       min_diagonal = 0,
+                      const unsigned int overlap = 0,
+                      const unsigned int n_sweeps = 1);
+
+      /**
+       * This specifies the size of blocks.
+       */
+      unsigned int block_size;
+
+      /**
+       * Strategy for creation of blocks passed on to Ifpack block relaxation
+       * (variable 'partitioner: type') with this string as the given value.
+       * Available types in Ifpack include "linear" (i.e., divide the local
+       * range of the matrix in slices of the block size), "greedy" "metis".
+       * For a full list, see the documentation of Ifpack.
+       */
+      std::string block_creation_type;
+
+      /**
+       * This specifies the (over-) relaxation parameter in the SOR
+       * preconditioner.
+       */
+      double omega;
+
+      /**
+       * This specifies the minimum value the diagonal elements should have.
+       * This might be necessary when the SOR preconditioner is used on
+       * matrices with zero diagonal elements. In that case, a straight-
+       * forward application would not be possible since we divide by the
+       * diagonal element.
+       */
+      double min_diagonal;
+
+      /**
+       * This determines how large the overlap of the local matrix portions on
+       * each processor in a parallel application should be.
+       */
+      unsigned int overlap;
+
+      /**
+       * Sets how many times the given operation should be applied during the
+       * vmult() operation.
+       */
+      unsigned int n_sweeps;
+    };
+
+    /**
+     * Take the sparse matrix the preconditioner object should be built of,
+     * and additional flags (damping parameter, overlap in parallel
+     * computations etc.) if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+  /**
+   * A wrapper class for an incomplete Cholesky factorization (IC)
+   * preconditioner for @em symmetric Trilinos matrices. This preconditioner
+   * works both in serial and in parallel, depending on the matrix it is based
+   * on. In general, an incomplete factorization does not take all fill-in
+   * elements that would appear in a full factorization (that is the basis for
+   * a direct solve). Trilinos allows to set the amount of fill-in elements,
+   * governed by the additional data argument <tt>ic_fill</tt>, so one can
+   * gradually choose between a factorization on the sparse matrix structure
+   * only (<tt>ic_fill=0</tt>) to a full factorization (<tt>ic_fill</tt> in
+   * the range of 10 to 50, depending on the spatial dimension of the PDE
+   * problem and the degree of the finite element basis functions; generally,
+   * more required fill-in elements require this parameter to be set to a
+   * higher integer value).
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   * Besides the fill-in argument, these options are some options for
+   * perturbations (see the documentation of the AdditionalData structure for
+   * details), and a parameter <tt>overlap</tt> that determines if and how
+   * much overlap there should be between the matrix partitions on the various
+   * MPI processes.  The default settings are 0 for the additional fill-in, 0
+   * for the absolute augmentation tolerance, 1 for the relative augmentation
+   * tolerance, 0 for the overlap.
+   *
+   * Note that a parallel application of the IC preconditioner is actually a
+   * block-Jacobi preconditioner with block size equal to the local matrix
+   * size. Spoken more technically, this parallel operation is an <a
+   * href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with an IC <em>approximate solve</em> as inner solver,
+   * based on the (outer) parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionIC : public PreconditionBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional parameters to the
+     * preconditioner. The Trilinos IC decomposition allows for some fill-in,
+     * so it actually is a threshold incomplete Cholesky factorization. The
+     * amount of fill-in, and hence, the amount of memory used by this
+     * preconditioner, is controlled by the parameter <tt>ic_fill</tt>, which
+     * specifies this as a double. When forming the preconditioner, for
+     * certain problems bad conditioning (or just bad luck) can cause the
+     * preconditioner to be very poorly conditioned. Hence it can help to add
+     * diagonal perturbations to the original matrix and form the
+     * preconditioner for this slightly better matrix. <tt>ic_atol</tt> is an
+     * absolute perturbation that is added to the diagonal before forming the
+     * prec, and <tt>ic_rtol</tt> is a scaling factor $rtol \geq 1$. The last
+     * parameter specifies the overlap of the partitions when the
+     * preconditioner runs in parallel.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the drop tolerance to 0, the level of
+       * extra fill-ins is set to be zero (just use the matrix structure, do
+       * not generate any additional fill-in), the tolerance level are 0 and
+       * 1, respectively, and the overlap in case of a parallel execution is
+       * zero. This overlap in a block-application of the IC in the parallel
+       * case makes the preconditioner a so-called additive Schwarz
+       * preconditioner.
+       */
+      AdditionalData (const unsigned int ic_fill = 0,
+                      const double       ic_atol = 0.,
+                      const double       ic_rtol = 1.,
+                      const unsigned int overlap = 0);
+
+      /**
+       * This specifies the amount of additional fill-in elements besides the
+       * sparse matrix structure. When <tt>ic_fill</tt> is large, this means
+       * that many fill-ins will be added, so that the IC preconditioner comes
+       * closer to a direct sparse Cholesky decomposition. Note, however, that
+       * this will drastically increase the memory requirement, especially
+       * when the preconditioner is used in 3D.
+       */
+      unsigned int ic_fill;
+
+      /**
+       * This specifies the amount of an absolute perturbation that will be
+       * added to the diagonal of the matrix, which sometimes can help to get
+       * better preconditioners.
+       */
+      double ic_atol;
+
+      /**
+       * This specifies the factor by which the diagonal of the matrix will be
+       * scaled, which sometimes can help to get better preconditioners.
+       */
+      double ic_rtol;
+
+      /**
+       * This determines how large the overlap of the local matrix portions on
+       * each processor in a parallel application should be.
+       */
+      unsigned int overlap;
+    };
+
+    /**
+     * Initialize function. Takes the matrix the preconditioner should be
+     * computed of, and additional flags if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+  /**
+   * A wrapper class for an incomplete LU factorization (ILU(k))
+   * preconditioner for Trilinos matrices. This preconditioner works both in
+   * serial and in parallel, depending on the matrix it is based on. In
+   * general, an incomplete factorization does not take all fill-in elements
+   * that would appear in a full factorization (that is the basis for a direct
+   * solve). Trilinos allows to set the amount of fill-in elements, governed
+   * by the additional data argument <tt>ilu_fill</tt>, so one can gradually
+   * choose between a factorization on the sparse matrix structure only
+   * (<tt>ilu_fill=0</tt>) to a full factorization (<tt>ilu_fill</tt> in the
+   * range of 10 to 50, depending on the spatial dimension of the PDE problem
+   * and the degree of the finite element basis functions; generally, more
+   * required fill-in elements require this parameter to be set to a higher
+   * integer value).
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   * See the documentation of the AdditionalData structure for details.
+   *
+   * Note that a parallel application of the ILU preconditioner is actually a
+   * block-Jacobi preconditioner with block size equal to the local matrix
+   * size. Spoken more technically, this parallel operation is an <a
+   * href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with an ILU <em>approximate solve</em> as inner
+   * solver, based on the (outer) parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionILU : public PreconditionBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional parameters to the
+     * preconditioner:
+     * <ul>
+     *
+     * <li> @p ilu_fill: This specifies the amount of additional fill-in
+     * elements besides the original sparse matrix structure. If $k$ is @p
+     * fill, the sparsity pattern of $A^{k+1}$ is used for the storage of the
+     * result of the Gaussian elimination. This is known as ILU($k$) in the
+     * literature.  When @p fill is large, the preconditioner comes closer to
+     * a (direct) sparse LU decomposition. Note, however, that this will
+     * drastically increase the memory requirement, especially when the
+     * preconditioner is used in 3D.
+     *
+     * <li> @p ilu_atol and @p ilu_rtol: These two parameters allow
+     * perturbation of the diagonal of the matrix, which sometimes can help to
+     * get better preconditioners especially in the case of bad conditioning.
+     * Before factorization, the diagonal entry $a_{ii}$ is replaced by
+     * $\alpha sign(a_{ii}) + \beta a_{ii}$, where $\alpha\geq 0$ is the
+     * absolute threshold @p ilu_atol and $\beta\geq 1$ is the relative
+     * threshold @p ilu_rtol. The default values ($\alpha = 0$, $\beta = 1$)
+     * therefore use the original, unmodified diagonal entry. Suggested values
+     * are in the order of $10^{-5}$ to $10^{-2}$ for @p ilu_atol and 1.01 for
+     * @p ilu_rtol.
+     *
+     * <li> @p overlap: This determines how large the overlap of the local
+     * matrix portions on each processor in a parallel application should be.
+     * An overlap of 0 corresponds to a block diagonal decomposition on each
+     * processor, an overlap of 1 will additionally include a row j if there
+     * is a nonzero entry in column j in one of the own rows. Higher overlap
+     * numbers work accordingly in a recursive fashion. Increasing @p overlap
+     * will increase communication and storage cost. According to the IFPACK
+     * documentation, an overlap of 1 is often effective and values of more
+     * than 3 are rarely needed.
+     *
+     * </ul>
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor with default values for all parameters.
+       */
+      AdditionalData (const unsigned int ilu_fill = 0,
+                      const double       ilu_atol = 0.,
+                      const double       ilu_rtol = 1.,
+                      const unsigned int overlap  = 0);
+
+      /**
+       * Additional fill-in, see class documentation above.
+       */
+      unsigned int ilu_fill;
+
+      /**
+       * The amount of perturbation to add to diagonal entries. See the class
+       * documentation above for details.
+       */
+      double ilu_atol;
+
+      /**
+       * Scaling actor for diagonal entries. See the class documentation above
+       * for details.
+       */
+      double ilu_rtol;
+
+      /**
+       * Overlap between processors. See the class documentation for details.
+       */
+      unsigned int overlap;
+    };
+
+    /**
+     * Initialize function. Takes the matrix which is used to form the
+     * preconditioner, and additional flags if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+
+
+
+  /**
+   * A wrapper class for a thresholded incomplete LU factorization (ILU-T)
+   * preconditioner for Trilinos matrices. This preconditioner works both in
+   * serial and in parallel, depending on the matrix it is based on. In
+   * general, an incomplete factorization does not take all fill-in elements
+   * that would appear in a full factorization (that is the basis for a direct
+   * solve). For the ILU-T preconditioner, the parameter <tt>ilut_drop</tt>
+   * lets the user specify which elements should be dropped (i.e., should not
+   * be part of the incomplete decomposition). Trilinos calculates first the
+   * complete factorization for one row, and then skips those elements that
+   * are lower than the threshold. This is the main difference to the non-
+   * thresholded ILU preconditioner, where the parameter <tt>ilut_fill</tt>
+   * governs the incomplete factorization structure. This parameter is
+   * available here as well, but provides only some extra information here.
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   * Besides the fill-in arguments, these options are some options for
+   * perturbations (see the documentation of the AdditionalData structure for
+   * details), and a parameter <tt>overlap</tt> that determines if and how
+   * much overlap there should be between the matrix partitions on the various
+   * MPI processes. The default settings are 0 for the additional fill-in, 0
+   * for the absolute augmentation tolerance, 1 for the relative augmentation
+   * tolerance, 0 for the overlap.
+   *
+   * Note that a parallel application of the ILU-T preconditioner is actually
+   * a block-Jacobi preconditioner with block size equal to the local matrix
+   * size. Spoken more technically, this parallel operation is an <a
+   * href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with an ILU <em>approximate solve</em> as inner
+   * solver, based on the (outer) parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2009
+   */
+  class PreconditionILUT : public PreconditionBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional parameters to the
+     * preconditioner. The Trilinos ILU-T decomposition allows for some fill-
+     * in, so it actually is a threshold incomplete LU factorization. The
+     * amount of fill-in, and hence, the amount of memory used by this
+     * preconditioner, is controlled by the parameters <tt>ilut_drop</tt> and
+     * <tt>ilut_fill</tt>, which specifies a threshold about which values
+     * should form the incomplete factorization and the level of additional
+     * fill-in. When forming the preconditioner, for certain problems bad
+     * conditioning (or just bad luck) can cause the preconditioner to be very
+     * poorly conditioned. Hence it can help to add diagonal perturbations to
+     * the original matrix and form the preconditioner for this slightly
+     * better matrix. <tt>ilut_atol</tt> is an absolute perturbation that is
+     * added to the diagonal before forming the prec, and <tt>ilu_rtol</tt> is
+     * a scaling factor $rtol \geq 1$. The last parameter specifies the
+     * overlap of the partitions when the preconditioner runs in parallel.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, no element will be dropped, the level of
+       * extra fill-ins is set to be zero (just use the matrix structure, do
+       * not generate any additional fill-in except the one that results from
+       * non-dropping large elements), the tolerance level are 0 and 1,
+       * respectively, and the overlap in case of a parallel execution is
+       * zero. This overlap in a block-application of the ILU in the parallel
+       * case makes the preconditioner a so-called additive Schwarz
+       * preconditioner.
+       */
+      AdditionalData (const double       ilut_drop = 0.,
+                      const unsigned int ilut_fill = 0,
+                      const double       ilut_atol = 0.,
+                      const double       ilut_rtol = 1.,
+                      const unsigned int overlap  = 0);
+
+      /**
+       * This specifies the relative size of elements which should be dropped
+       * when forming an incomplete LU decomposition with threshold.
+       */
+      double ilut_drop;
+
+      /**
+       * This specifies the amount of additional fill-in elements besides the
+       * sparse matrix structure. When <tt>ilu_fill</tt> is large, this means
+       * that many fill-ins will be added, so that the ILU preconditioner
+       * comes closer to a (direct) sparse LU decomposition. Note, however,
+       * that this will drastically increase the memory requirement,
+       * especially when the preconditioner is used in 3D.
+       */
+      unsigned int ilut_fill;
+
+      /**
+       * This specifies the amount of an absolute perturbation that will be
+       * added to the diagonal of the matrix, which sometimes can help to get
+       * better preconditioners.
+       */
+      double ilut_atol;
+
+      /**
+       * This specifies the factor by which the diagonal of the matrix will be
+       * scaled, which sometimes can help to get better preconditioners.
+       */
+      double ilut_rtol;
+
+      /**
+       * This determines how large the overlap of the local matrix portions on
+       * each processor in a parallel application should be.
+       */
+      unsigned int overlap;
+    };
+
+    /**
+     * Initialize function. Takes the matrix which is used to form the
+     * preconditioner, and additional flags if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+  /**
+   * A wrapper class for a sparse direct LU decomposition on parallel blocks
+   * for Trilinos matrices. When run in serial, this corresponds to a direct
+   * solve on the matrix.
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   *
+   * Note that a parallel application of the block direct solve preconditioner
+   * is actually a block-Jacobi preconditioner with block size equal to the
+   * local matrix size. Spoken more technically, this parallel operation is an
+   * <a href="http://en.wikipedia.org/wiki/Additive_Schwarz_method">additive
+   * Schwarz method</a> with an <em>exact solve</em> as inner solver, based on
+   * the (outer) parallel partitioning.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionBlockwiseDirect : public PreconditionBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional parameters to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor.
+       */
+      AdditionalData (const unsigned int overlap  = 0);
+
+
+      /**
+       * This determines how large the overlap of the local matrix portions on
+       * each processor in a parallel application should be.
+       */
+      unsigned int overlap;
+    };
+
+    /**
+     * Initialize function. Takes the matrix which is used to form the
+     * preconditioner, and additional flags if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+
+
+
+  /**
+   * A wrapper class for a Chebyshev preconditioner for Trilinos matrices.
+   *
+   * The AdditionalData data structure allows to set preconditioner options.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionChebyshev : public PreconditionBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional parameters to the
+     * preconditioner.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor.
+       */
+      AdditionalData (const unsigned int degree           = 1,
+                      const double       max_eigenvalue   = 10.,
+                      const double       eigenvalue_ratio = 30.,
+                      const double       min_eigenvalue   = 1.,
+                      const double       min_diagonal     = 1e-12,
+                      const bool         nonzero_starting = false);
+
+      /**
+       * This determines the degree of the Chebyshev polynomial. The degree of
+       * the polynomial gives the number of matrix-vector products to be
+       * performed for one application of the vmult() operation.
+       */
+      unsigned int degree;
+
+      /**
+       * This sets the maximum eigenvalue of the matrix, which needs to be set
+       * properly for appropriate performance of the Chebyshev preconditioner.
+       */
+      double max_eigenvalue;
+
+      /**
+       * This sets the ratio between the maximum and the minimum eigenvalue.
+       */
+      double eigenvalue_ratio;
+
+      /**
+       * This sets the minimum eigenvalue, which is an optional parameter only
+       * used internally for checking whether we use an identity matrix.
+       */
+      double min_eigenvalue;
+
+      /**
+       * This sets a threshold below which the diagonal element will not be
+       * inverted in the Chebyshev algorithm.
+       */
+      double min_diagonal;
+
+      /**
+       * When this flag is set to <tt>true</tt>, it enables the method
+       * <tt>vmult(dst, src)</tt> to use non-zero data in the vector
+       * <tt>dst</tt>, appending to it the Chebyshev corrections. This can be
+       * useful in some situations (e.g. when used for high-frequency error
+       * smoothing), but not the way the solver classes expect a
+       * preconditioner to work (where one ignores the content in <tt>dst</tt>
+       * for the preconditioner application). The user should really know what
+       * she is doing when touching this flag.
+       */
+      bool nonzero_starting;
+    };
+
+    /**
+     * Initialize function. Takes the matrix which is used to form the
+     * preconditioner, and additional flags if there are any.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+  };
+
+
+
+  /**
+   * This class implements an algebraic multigrid (AMG) preconditioner based
+   * on the Trilinos ML implementation, which is a black-box preconditioner
+   * that works well for many PDE-based linear problems.  What this class does
+   * is twofold.  When the initialize() function is invoked, a ML
+   * preconditioner object is created based on the matrix that we want the
+   * preconditioner to be based on. A call of the respective
+   * <code>vmult</code> function does call the respective operation in the
+   * Trilinos package, where it is called <code>ApplyInverse</code>. Use of
+   * this class is explained in the step-31 tutorial program.
+   *
+   * Since the Trilinos objects we want to use are heavily dependent on Epetra
+   * objects, we recommend using this class in conjunction with Trilinos
+   * (Epetra) sparse matrices and vectors. There is support for use with
+   * matrices of the deal.II::SparseMatrix class and corresponding vectors,
+   * too, but this requires generating a copy of the matrix, which is slower
+   * and takes (much) more memory. When doing such a copy operation, we can
+   * still profit from the fact that some of the entries in the preconditioner
+   * matrix are zero and hence can be neglected.
+   *
+   * The implementation is able to distinguish between matrices from elliptic
+   * problems and convection dominated problems. We use the standard options
+   * provided by Trilinos ML for elliptic problems, except that we use a
+   * Chebyshev smoother instead of a symmetric Gauss-Seidel smoother.  For
+   * most elliptic problems, Chebyshev provides a better damping of high
+   * frequencies (in the algebraic sense) than Gauss-Seidel (SSOR), and is
+   * faster (Chebyshev requires only some matrix-vector products, whereas SSOR
+   * requires substitutions which are more expensive). Moreover, Chebyshev is
+   * perfectly parallel in the sense that it does not degenerate when used on
+   * many processors. SSOR, on the other hand, gets more Jacobi-like on many
+   * processors.
+   *
+   * For proper functionality of this class we recommend using Trilinos v9.0
+   * and higher. Older versions may have problems with generating the coarse-
+   * matrix structure when using matrices with many nonzero entries per row
+   * (i.e., matrices stemming from higher order finite element
+   * discretizations).
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Martin Kronbichler, 2008
+   */
+  class PreconditionAMG : public PreconditionBase
+  {
+  public:
+
+    /**
+     * A data structure that is used to control details of how the algebraic
+     * multigrid is set up. The flags detailed in here are then passed to the
+     * Trilinos ML implementation. A structure of the current type are passed
+     * to the constructor of PreconditionAMG.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, we pretend to work on elliptic problems with
+       * linear finite elements on a scalar equation.
+       */
+      AdditionalData (const bool                             elliptic = true,
+                      const bool                             higher_order_elements = false,
+                      const unsigned int                     n_cycles = 1,
+                      const bool                             w_cyle = false,
+                      const double                           aggregation_threshold = 1e-4,
+                      const std::vector<std::vector<bool> > &constant_modes = std::vector<std::vector<bool> > (0),
+                      const unsigned int                     smoother_sweeps = 2,
+                      const unsigned int                     smoother_overlap = 0,
+                      const bool                             output_details = false,
+                      const char                            *smoother_type = "Chebyshev",
+                      const char                            *coarse_type = "Amesos-KLU");
+
+      /**
+       * Determines whether the AMG preconditioner should be optimized for
+       * elliptic problems (ML option smoothed aggregation SA, using a
+       * Chebyshev smoother) or for non-elliptic problems (ML option non-
+       * symmetric smoothed aggregation NSSA, smoother is SSOR with
+       * underrelaxation).
+       */
+      bool elliptic;
+
+      /**
+       * Determines whether the matrix that the preconditioner is built upon
+       * is generated from linear or higher-order elements.
+       */
+      bool higher_order_elements;
+
+      /**
+       * Defines how many multigrid cycles should be performed by the
+       * preconditioner.
+       */
+      unsigned int n_cycles;
+
+      /**
+       * Defines whether a w-cycle should be used instead of the standard
+       * setting of a v-cycle.
+       */
+      bool w_cycle;
+
+      /**
+       * This threshold tells the AMG setup how the coarsening should be
+       * performed. In the AMG used by ML, all points that strongly couple
+       * with the tentative coarse-level point form one aggregate. The term
+       * <em>strong coupling</em> is controlled by the variable
+       * <tt>aggregation_threshold</tt>, meaning that all elements that are
+       * not smaller than <tt>aggregation_threshold</tt> times the diagonal
+       * element do couple strongly.
+       */
+      double aggregation_threshold;
+
+      /**
+       * Specifies the constant modes (near null space) of the matrix. This
+       * parameter tells AMG whether we work on a scalar equation (where the
+       * near null space only consists of ones, and default value is OK) or on
+       * a vector-valued equation. For vector-valued equation problem with
+       * <tt>n_component</tt>, the provided @p constant_modes should fulfill
+       * the following requirements:
+       * <ul>
+       * <li>  n_component.size() == <tt>n_component</tt> </li>
+       * <li>  n_component[*].size() == n_dof_local or n_component[*].size()
+       * == n_dof_global </li>
+       * <li>  n_component[<tt>ic</tt>][<tt>id</tt>] ==
+       * "<tt>id</tt><em>th</em> DoF is corresponding to component <tt>ic</tt>
+       * </li>
+       * </ul>
+       */
+      std::vector<std::vector<bool> > constant_modes;
+
+      /**
+       * Determines how many sweeps of the smoother should be performed. When
+       * the flag <tt>elliptic</tt> is set to <tt>true</tt>, i.e., for
+       * elliptic or almost elliptic problems, the polynomial degree of the
+       * Chebyshev smoother is set to <tt>smoother_sweeps</tt>. The term
+       * sweeps refers to the number of matrix-vector products performed in
+       * the Chebyshev case. In the non-elliptic case,
+       * <tt>smoother_sweeps</tt> sets the number of SSOR relaxation sweeps
+       * for post-smoothing to be performed.
+       */
+      unsigned int smoother_sweeps;
+
+      /**
+       * Determines the overlap in the SSOR/Chebyshev error smoother when run
+       * in parallel.
+       */
+      unsigned int smoother_overlap;
+
+      /**
+       * If this flag is set to <tt>true</tt>, then internal information from
+       * the ML preconditioner is printed to screen. This can be useful when
+       * debugging the preconditioner.
+       */
+      bool output_details;
+
+      /**
+       * Determines which smoother to use for the AMG cycle. Possibilities for
+       * smoother_type are the following:
+       * <ul>
+       * <li>  "Aztec" </li>
+       * <li>  "IFPACK" </li>
+       * <li>  "Jacobi" </li>
+       * <li>  "ML symmetric Gauss-Seidel" </li>
+       * <li>  "symmetric Gauss-Seidel" </li>
+       * <li>  "ML Gauss-Seidel" </li>
+       * <li>  "Gauss-Seidel" </li>
+       * <li>  "block Gauss-Seidel" </li>
+       * <li>  "symmetric block Gauss-Seidel" </li>
+       * <li>  "Chebyshev" </li>
+       * <li>  "MLS" </li>
+       * <li>  "Hiptmair" </li>
+       * <li>  "Amesos-KLU" </li>
+       * <li>  "Amesos-Superlu" </li>
+       * <li>  "Amesos-UMFPACK" </li>
+       * <li>  "Amesos-Superludist" </li>
+       * <li>  "Amesos-MUMPS" </li>
+       * <li>  "user-defined" </li>
+       * <li>  "SuperLU" </li>
+       * <li>  "IFPACK-Chebyshev" </li>
+       * <li>  "self" </li>
+       * <li>  "do-nothing" </li>
+       * <li>  "IC" </li>
+       * <li>  "ICT" </li>
+       * <li>  "ILU" </li>
+       * <li>  "ILUT" </li>
+       * <li>  "Block Chebyshev" </li>
+       * <li>  "IFPACK-Block Chebyshev" </li>
+       * </ul>
+       */
+      const char *smoother_type;
+
+      /**
+       * Determines which solver to use on the coarsest level. The same
+       * settings as for the smoother type are possible.
+       */
+      const char *coarse_type;
+    };
+
+    /**
+     * Destructor.
+     */
+    ~PreconditionAMG();
+
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. The function uses the matrix
+     * format specified in TrilinosWrappers::SparseMatrix.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. As opposed to the other initialize
+     * function above, this function uses an abstract interface to an object
+     * of type Epetra_RowMatrix which allows a user to pass quite general
+     * objects to the ML preconditioner.
+     *
+     * This initialization routine is useful in cases where the operator to be
+     * preconditioned is not a TrilinosWrappers::SparseMatrix object but still
+     * allows to get a copy of the entries in each of the locally owned matrix
+     * rows (method ExtractMyRowCopy) and implements a matrix-vector product
+     * (methods Multiply or Apply). An example are operators which provide
+     * faster matrix-vector multiplications than possible with matrix entries
+     * (matrix-free methods). These implementations can be beneficially
+     * combined with Chebyshev smoothers that only perform matrix-vector
+     * products. The interface class Epetra_RowMatrix is very flexible to
+     * enable this kind of implementation.
+     */
+    void initialize (const Epetra_RowMatrix &matrix,
+                     const AdditionalData   &additional_data = AdditionalData());
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. The function uses the matrix
+     * format specified in TrilinosWrappers::SparseMatrix.
+     *
+     * This function is similar to the one above, but allows the user to set
+     * all the options of the Trilinos ML preconditioner. In order to find out
+     * about all the options for ML, we refer to the <a
+     * href=http://trilinos.sandia.gov/packages/ml/mlguide5.pdf>ML user's
+     * guide</a>. In particular, users need to follow the ML instructions in
+     * case a vector-valued problem ought to be solved.
+     */
+    void initialize (const SparseMatrix           &matrix,
+                     const Teuchos::ParameterList &ml_parameters);
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. As opposed to the other initialize
+     * function above, this function uses an abstract interface to an object
+     * of type Epetra_RowMatrix which allows a user to pass quite general
+     * objects to the ML preconditioner.
+     */
+    void initialize (const Epetra_RowMatrix       &matrix,
+                     const Teuchos::ParameterList &ml_parameters);
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. This function takes a deal.ii
+     * matrix and copies the content into a Trilinos matrix, so the function
+     * can be considered rather inefficient.
+     */
+    template <typename number>
+    void initialize (const ::dealii::SparseMatrix<number> &deal_ii_sparse_matrix,
+                     const AdditionalData                 &additional_data = AdditionalData(),
+                     const double                          drop_tolerance = 1e-13,
+                     const ::dealii::SparsityPattern      *use_this_sparsity = 0);
+
+    /**
+     * This function can be used for a faster recalculation of the
+     * preconditioner construction when the matrix entries underlying the
+     * preconditioner have changed, but the matrix sparsity pattern has
+     * remained the same. What this function does is taking the already
+     * generated coarsening structure, computing the AMG prolongation and
+     * restriction according to a smoothed aggregation strategy and then
+     * building the whole multilevel hierarchy. This function can be
+     * considerably faster than the initialize function, since the coarsening
+     * pattern is usually the most difficult thing to do when setting up the
+     * AMG ML preconditioner.
+     */
+    void reinit ();
+
+    /**
+     * Destroys the preconditioner, leaving an object like just after having
+     * called the constructor.
+     */
+    void clear ();
+
+    /**
+     * Prints an estimate of the memory consumption of this class.
+     */
+    size_type memory_consumption () const;
+
+  private:
+    /**
+     * A copy of the deal.II matrix into Trilinos format.
+     */
+    std_cxx11::shared_ptr<SparseMatrix> trilinos_matrix;
+  };
+
+
+
+#if defined(DOXYGEN) || DEAL_II_TRILINOS_VERSION_GTE(11,14,0)
+  /**
+   * This class implements an algebraic multigrid (AMG) preconditioner based
+   * on the Trilinos MueLu implementation, which is a black-box preconditioner
+   * that works well for many PDE-based linear problems. The interface of
+   * PreconditionerAMGMueLu is the same as the interface of PreconditionerAMG
+   * except for the higher_order_elements parameter which does not exist in
+   * PreconditionerAMGMueLu.
+   *
+   * This class requires Trilinos 11.14 or higher.
+   *
+   * This interface should not be considered as stable.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Bruno Turcksin, 2014
+   */
+  class PreconditionAMGMueLu : public PreconditionBase
+  {
+  public:
+
+
+    /**
+     * A data structure that is used to control details of how the algebraic
+     * multigrid is set up. The flags detailed in here are then passed to the
+     * Trilinos MueLu implementation. A structure of the current type are
+     * passed to the constructor of PreconditionAMGMueLu.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, we pretend to work on elliptic problems with
+       * linear finite elements on a scalar equation.
+       */
+      AdditionalData (const bool                             elliptic = true,
+                      const unsigned int                     n_cycles = 1,
+                      const bool                             w_cyle = false,
+                      const double                           aggregation_threshold = 1e-4,
+                      const std::vector<std::vector<bool> > &constant_modes = std::vector<std::vector<bool> > (0),
+                      const unsigned int                     smoother_sweeps = 2,
+                      const unsigned int                     smoother_overlap = 0,
+                      const bool                             output_details = false,
+                      const char                            *smoother_type = "Chebyshev",
+                      const char                            *coarse_type = "Amesos-KLU");
+
+      /**
+       * Determines whether the AMG preconditioner should be optimized for
+       * elliptic problems (MueLu option smoothed aggregation SA, using a
+       * Chebyshev smoother) or for non-elliptic problems (MueLu option non-
+       * symmetric smoothed aggregation NSSA, smoother is SSOR with
+       * underrelaxation).
+       */
+      bool elliptic;
+
+      /**
+       * Defines how many multigrid cycles should be performed by the
+       * preconditioner.
+       */
+      unsigned int n_cycles;
+
+      /**
+       * Defines whether a w-cycle should be used instead of the standard
+       * setting of a v-cycle.
+       */
+      bool w_cycle;
+
+      /**
+       * This threshold tells the AMG setup how the coarsening should be
+       * performed. In the AMG used by MueLu, all points that strongly couple
+       * with the tentative coarse-level point form one aggregate. The term
+       * <em>strong coupling</em> is controlled by the variable
+       * <tt>aggregation_threshold</tt>, meaning that all elements that are
+       * not smaller than <tt>aggregation_threshold</tt> times the diagonal
+       * element do couple strongly.
+       */
+      double aggregation_threshold;
+
+      /**
+       * Specifies the constant modes (near null space) of the matrix. This
+       * parameter tells AMG whether we work on a scalar equation (where the
+       * near null space only consists of ones) or on a vector-valued
+       * equation.
+       */
+      std::vector<std::vector<bool> > constant_modes;
+
+      /**
+       * Determines how many sweeps of the smoother should be performed. When
+       * the flag <tt>elliptic</tt> is set to <tt>true</tt>, i.e., for
+       * elliptic or almost elliptic problems, the polynomial degree of the
+       * Chebyshev smoother is set to <tt>smoother_sweeps</tt>. The term
+       * sweeps refers to the number of matrix-vector products performed in
+       * the Chebyshev case. In the non-elliptic case,
+       * <tt>smoother_sweeps</tt> sets the number of SSOR relaxation sweeps
+       * for post-smoothing to be performed.
+       */
+      unsigned int smoother_sweeps;
+
+      /**
+       * Determines the overlap in the SSOR/Chebyshev error smoother when run
+       * in parallel.
+       */
+      unsigned int smoother_overlap;
+
+      /**
+       * If this flag is set to <tt>true</tt>, then internal information from
+       * the ML preconditioner is printed to screen. This can be useful when
+       * debugging the preconditioner.
+       */
+      bool output_details;
+
+      /**
+       * Determines which smoother to use for the AMG cycle. Possibilities for
+       * smoother_type are the following:
+       * <ul>
+       * <li>  "Aztec" </li>
+       * <li>  "IFPACK" </li>
+       * <li>  "Jacobi" </li>
+       * <li>  "ML symmetric Gauss-Seidel" </li>
+       * <li>  "symmetric Gauss-Seidel" </li>
+       * <li>  "ML Gauss-Seidel" </li>
+       * <li>  "Gauss-Seidel" </li>
+       * <li>  "block Gauss-Seidel" </li>
+       * <li>  "symmetric block Gauss-Seidel" </li>
+       * <li>  "Chebyshev" </li>
+       * <li>  "MLS" </li>
+       * <li>  "Hiptmair" </li>
+       * <li>  "Amesos-KLU" </li>
+       * <li>  "Amesos-Superlu" </li>
+       * <li>  "Amesos-UMFPACK" </li>
+       * <li>  "Amesos-Superludist" </li>
+       * <li>  "Amesos-MUMPS" </li>
+       * <li>  "user-defined" </li>
+       * <li>  "SuperLU" </li>
+       * <li>  "IFPACK-Chebyshev" </li>
+       * <li>  "self" </li>
+       * <li>  "do-nothing" </li>
+       * <li>  "IC" </li>
+       * <li>  "ICT" </li>
+       * <li>  "ILU" </li>
+       * <li>  "ILUT" </li>
+       * <li>  "Block Chebyshev" </li>
+       * <li>  "IFPACK-Block Chebyshev" </li>
+       * </ul>
+       */
+      const char *smoother_type;
+
+      /**
+       * Determines which solver to use on the coarsest level. The same
+       * settings as for the smoother type are possible.
+       */
+      const char *coarse_type;
+    };
+
+    /**
+     * Destructor.
+     */
+    ~PreconditionAMGMueLu();
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. The function uses the matrix
+     * format specified in TrilinosWrappers::SparseMatrix.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     const AdditionalData &additional_data = AdditionalData());
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. As opposed to the other initialize
+     * function above, this function uses an object of type
+     * Epetra_CrsMatrixCrs.
+     */
+    void initialize (const Epetra_CrsMatrix &matrix,
+                     const AdditionalData   &additional_data = AdditionalData());
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. The function uses the matrix
+     * format specified in TrilinosWrappers::SparseMatrix.
+     *
+     * This function is similar to the one above, but allows the user to set
+     * most of the options of the Trilinos ML preconditioner. In order to find
+     * out about all the options for ML, we refer to the <a
+     * href=http://trilinos.sandia.gov/packages/ml/mlguide5.pdf>ML user's
+     * guide</a>. Not all ML options have a corresponding MueLu option.
+     */
+    void initialize (const SparseMatrix   &matrix,
+                     Teuchos::ParameterList &muelu_parameters);
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. As opposed to the other initialize
+     * function above, this function uses an object of type Epetra_CrsMatrix.
+     */
+    void initialize (const Epetra_CrsMatrix &matrix,
+                     Teuchos::ParameterList &muelu_parameters);
+
+    /**
+     * Let Trilinos compute a multilevel hierarchy for the solution of a
+     * linear system with the given matrix. This function takes a deal.ii
+     * matrix and copies the content into a Trilinos matrix, so the function
+     * can be considered rather inefficient.
+     */
+    template <typename number>
+    void initialize (const ::dealii::SparseMatrix<number> &deal_ii_sparse_matrix,
+                     const AdditionalData                 &additional_data = AdditionalData(),
+                     const double                          drop_tolerance = 1e-13,
+                     const ::dealii::SparsityPattern      *use_this_sparsity = 0);
+
+    /**
+     * Destroys the preconditioner, leaving an object like just after having
+     * called the constructor.
+     */
+    void clear ();
+
+    /**
+     * Prints an estimate of the memory consumption of this class.
+     */
+    size_type memory_consumption () const;
+
+  private:
+    /**
+     * A copy of the deal.II matrix into Trilinos format.
+     */
+    std_cxx11::shared_ptr<SparseMatrix> trilinos_matrix;
+  };
+#endif
+
+
+
+  /**
+   * A wrapper class for an identity preconditioner for Trilinos matrices.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Preconditioners
+   * @author Bruno Turcksin, 2013
+   */
+  class PreconditionIdentity : public PreconditionBase
+  {
+  public:
+
+    /**
+     * Apply the preconditioner, i.e., dst = src.
+     */
+    void vmult (VectorBase       &dst,
+                const VectorBase &src) const;
+
+    /**
+     * Apply the transport conditioner, i.e., dst = src.
+     */
+    void Tvmult (VectorBase       &dst,
+                 const VectorBase &src) const;
+
+    /**
+     * Apply the preconditioner on deal.II data structures instead of the ones
+     * provided in the Trilinos wrapper class, i.e., dst = src.
+     */
+    void vmult (dealii::Vector<double>       &dst,
+                const dealii::Vector<double> &src) const;
+
+    /**
+     * Apply the transpose preconditioner on deal.II data structures instead
+     * of the ones provided in the Trilinos wrapper class, i.e. dst = src.
+     */
+    void Tvmult (dealii::Vector<double>       &dst,
+                 const dealii::Vector<double> &src) const;
+
+    /**
+     * Apply the preconditioner on deal.II parallel data structures instead of
+     * the ones provided in the Trilinos wrapper class, i.e., dst = src.
+     */
+    void vmult (parallel::distributed::Vector<double>       &dst,
+                const dealii::parallel::distributed::Vector<double> &src) const;
+
+    /**
+     * Apply the transpose preconditioner on deal.II parallel data structures
+     * instead of the ones provided in the Trilinos wrapper class, i.e., dst =
+     * src.
+     */
+    void Tvmult (parallel::distributed::Vector<double>       &dst,
+                 const dealii::parallel::distributed::Vector<double> &src) const;
+  };
+
+
+
+// -------------------------- inline and template functions ----------------------
+
+
+#ifndef DOXYGEN
+
+  inline
+  void
+  PreconditionBase::vmult (VectorBase       &dst,
+                           const VectorBase &src) const
+  {
+    Assert (dst.vector_partitioner().SameAs(preconditioner->OperatorRangeMap()),
+            ExcNonMatchingMaps("dst"));
+    Assert (src.vector_partitioner().SameAs(preconditioner->OperatorDomainMap()),
+            ExcNonMatchingMaps("src"));
+
+    const int ierr = preconditioner->ApplyInverse (src.trilinos_vector(),
+                                                   dst.trilinos_vector());
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+  inline
+  void
+  PreconditionBase::Tvmult (VectorBase       &dst,
+                            const VectorBase &src) const
+  {
+    Assert (dst.vector_partitioner().SameAs(preconditioner->OperatorRangeMap()),
+            ExcNonMatchingMaps("dst"));
+    Assert (src.vector_partitioner().SameAs(preconditioner->OperatorDomainMap()),
+            ExcNonMatchingMaps("src"));
+
+    preconditioner->SetUseTranspose(true);
+    const int ierr = preconditioner->ApplyInverse (src.trilinos_vector(),
+                                                   dst.trilinos_vector());
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    preconditioner->SetUseTranspose(false);
+  }
+
+
+  // For the implementation of the <code>vmult</code> function with deal.II
+  // data structures we note that invoking a call of the Trilinos
+  // preconditioner requires us to use Epetra vectors as well. We do this by
+  // providing a view, i.e., feed Trilinos with a pointer to the data, so we
+  // avoid copying the content of the vectors during the iteration (this
+  // function is only useful when used in serial anyway). In the declaration
+  // of the right hand side, we need to cast the source vector (that is
+  // <code>const</code> in all deal.II calls) to non-constant value, as this
+  // is the way Trilinos wants to have them.
+  inline
+  void PreconditionBase::vmult (dealii::Vector<double>       &dst,
+                                const dealii::Vector<double> &src) const
+  {
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(dst.size()),
+                     preconditioner->OperatorDomainMap().NumMyElements());
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(src.size()),
+                     preconditioner->OperatorRangeMap().NumMyElements());
+    Epetra_Vector tril_dst (View, preconditioner->OperatorDomainMap(),
+                            dst.begin());
+    Epetra_Vector tril_src (View, preconditioner->OperatorRangeMap(),
+                            const_cast<double *>(src.begin()));
+
+    const int ierr = preconditioner->ApplyInverse (tril_src, tril_dst);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+  inline
+  void PreconditionBase::Tvmult (dealii::Vector<double>       &dst,
+                                 const dealii::Vector<double> &src) const
+  {
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(dst.size()),
+                     preconditioner->OperatorDomainMap().NumMyElements());
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(src.size()),
+                     preconditioner->OperatorRangeMap().NumMyElements());
+    Epetra_Vector tril_dst (View, preconditioner->OperatorDomainMap(),
+                            dst.begin());
+    Epetra_Vector tril_src (View, preconditioner->OperatorRangeMap(),
+                            const_cast<double *>(src.begin()));
+
+    preconditioner->SetUseTranspose(true);
+    const int ierr = preconditioner->ApplyInverse (tril_src, tril_dst);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    preconditioner->SetUseTranspose(false);
+  }
+
+
+
+  inline
+  void
+  PreconditionBase::vmult (parallel::distributed::Vector<double>       &dst,
+                           const parallel::distributed::Vector<double> &src) const
+  {
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(dst.local_size()),
+                     preconditioner->OperatorDomainMap().NumMyElements());
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(src.local_size()),
+                     preconditioner->OperatorRangeMap().NumMyElements());
+    Epetra_Vector tril_dst (View, preconditioner->OperatorDomainMap(),
+                            dst.begin());
+    Epetra_Vector tril_src (View, preconditioner->OperatorRangeMap(),
+                            const_cast<double *>(src.begin()));
+
+    const int ierr = preconditioner->ApplyInverse (tril_src, tril_dst);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+  inline
+  void
+  PreconditionBase::Tvmult (parallel::distributed::Vector<double>       &dst,
+                            const parallel::distributed::Vector<double> &src) const
+  {
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(dst.local_size()),
+                     preconditioner->OperatorDomainMap().NumMyElements());
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(src.local_size()),
+                     preconditioner->OperatorRangeMap().NumMyElements());
+    Epetra_Vector tril_dst (View, preconditioner->OperatorDomainMap(),
+                            dst.begin());
+    Epetra_Vector tril_src (View, preconditioner->OperatorRangeMap(),
+                            const_cast<double *>(src.begin()));
+
+    preconditioner->SetUseTranspose(true);
+    const int ierr = preconditioner->ApplyInverse (tril_src, tril_dst);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    preconditioner->SetUseTranspose(false);
+  }
+
+  inline
+  Epetra_Operator &
+  PreconditionBase::trilinos_operator () const
+  {
+    AssertThrow (preconditioner, ExcMessage("Trying to dereference a null pointer."));
+    return (*preconditioner);
+  }
+
+#endif
+
+}
+
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
+
+/*----------------------------   trilinos_precondition.h     ---------------------------*/
+
+#endif
+/*----------------------------   trilinos_precondition.h     ---------------------------*/
diff --git a/include/deal.II/lac/trilinos_solver.h b/include/deal.II/lac/trilinos_solver.h
new file mode 100644
index 0000000..9ab13fc
--- /dev/null
+++ b/include/deal.II/lac/trilinos_solver.h
@@ -0,0 +1,660 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_solver_h
+#define dealii__trilinos_solver_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/solver_control.h>
+#  include <deal.II/lac/vector.h>
+#  include <deal.II/lac/parallel_vector.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Epetra_LinearProblem.h>
+#  include <AztecOO.h>
+#  include <Epetra_Operator.h>
+#  include <Amesos.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  // forward declarations
+  class SparseMatrix;
+  class VectorBase;
+  class PreconditionBase;
+
+
+  /**
+   * Base class for solver classes using the Trilinos solvers. Since solvers
+   * in Trilinos are selected based on flags passed to a generic solver
+   * object, basically all the actual solver calls happen in this class, and
+   * derived classes simply set the right flags to select one solver or
+   * another, or to set certain parameters for individual solvers. For a
+   * general discussion on the Trilinos solver package AztecOO, we refer to
+   * the <a href = "http://trilinos.sandia.gov/packages/aztecoo/AztecOOUserGui
+   * de.pdf">AztecOO user guide</a>.
+   *
+   * This solver class can also be used as a standalone class, where the
+   * respective Krylov method is set via the flag <tt>solver_name</tt>. This
+   * can be done at runtime (e.g., when parsing the solver from a
+   * ParameterList) and is similar to the deal.II class SolverSelector.
+   *
+   * @ingroup TrilinosWrappers
+   * @author Martin Kronbichler, 2008, 2009
+   */
+  class SolverBase
+  {
+  public:
+
+    /**
+     * Enumeration object that is set in the constructor of the derived
+     * classes and tells Trilinos which solver to use. This option can also be
+     * set in the user program, so one might use this base class instead of
+     * one of the specialized derived classes when the solver should be set at
+     * runtime. Currently enabled options are:
+     */
+    enum SolverName {cg, cgs, gmres, bicgstab, tfqmr} solver_name;
+
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+
+    struct AdditionalData
+    {
+      /**
+       * Sets the additional data field to the desired output format and puts
+       * the restart parameter in case the derived class is GMRES.
+       *
+       * TODO: Find a better way for setting the GMRES restart parameter since
+       * it is quite inelegant to set a specific option of one solver in the
+       * base class for all solvers.
+       */
+      explicit
+      AdditionalData (const bool         output_solver_details   = false,
+                      const unsigned int gmres_restart_parameter = 30);
+
+      /**
+       * Enables/disables the output of solver details (residual in each
+       * iterations etc.).
+       */
+      const bool output_solver_details;
+
+      /**
+       * Restart parameter for GMRES solver.
+       */
+      const unsigned int gmres_restart_parameter;
+    };
+
+    /**
+     * Constructor. Takes the solver control object and creates the solver.
+     */
+    SolverBase (SolverControl  &cn);
+
+    /**
+     * Second constructor. This constructor takes an enum object that
+     * specifies the solver name and sets the appropriate Krylov method.
+     */
+    SolverBase (const enum SolverName  solver_name,
+                SolverControl         &cn);
+
+    /**
+     * Destructor.
+     */
+    virtual ~SolverBase ();
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt>. Depending on the information
+     * provided by derived classes and the object passed as a preconditioner,
+     * one of the linear solvers and preconditioners of Trilinos is chosen.
+     */
+    void
+    solve (const SparseMatrix     &A,
+           VectorBase             &x,
+           const VectorBase       &b,
+           const PreconditionBase &preconditioner);
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt> where <tt>A</tt> is an operator.
+     * This function can be used for matrix free computation. Depending on the
+     * information provided by derived classes and the object passed as a
+     * preconditioner, one of the linear solvers and preconditioners of
+     * Trilinos is chosen.
+     */
+    void
+    solve (Epetra_Operator        &A,
+           VectorBase             &x,
+           const VectorBase       &b,
+           const PreconditionBase &preconditioner);
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt>. Depending on the information
+     * provided by derived classes and the object passed as a preconditioner,
+     * one of the linear solvers and preconditioners of Trilinos is chosen.
+     * This class works with matrices according to the TrilinosWrappers
+     * format, but can take deal.II vectors as argument. Since deal.II are
+     * serial vectors (not distributed), this function does only what you
+     * expect in case the matrix is locally owned. Otherwise, an exception
+     * will be thrown.
+     */
+    void
+    solve (const SparseMatrix           &A,
+           dealii::Vector<double>       &x,
+           const dealii::Vector<double> &b,
+           const PreconditionBase       &preconditioner);
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt> where <tt>A</tt> is an operator.
+     * This function can be used for matrix free computations. Depending on
+     * the information provided by derived classes and the object passed as a
+     * preconditioner, one of the linear solvers and preconditioners of
+     * Trilinos is chosen. This class works with matrices according to the
+     * TrilinosWrappers format, but can take deal.II vectors as argument.
+     * Since deal.II are serial vectors (not distributed), this function does
+     * only what you expect in case the matrix is locally owned. Otherwise, an
+     * exception will be thrown.
+     */
+    void
+    solve (Epetra_Operator              &A,
+           dealii::Vector<double>       &x,
+           const dealii::Vector<double> &b,
+           const PreconditionBase       &preconditioner);
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt> for deal.II's parallel
+     * distributed vectors. Depending on the information provided by derived
+     * classes and the object passed as a preconditioner, one of the linear
+     * solvers and preconditioners of Trilinos is chosen.
+     */
+    void
+    solve (const SparseMatrix                                  &A,
+           dealii::parallel::distributed::Vector<double>       &x,
+           const dealii::parallel::distributed::Vector<double> &b,
+           const PreconditionBase                              &preconditioner);
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt> where <tt>A</tt> is an operator.
+     * This function can be used for matrix free computation. Depending on the
+     * information provided by derived classes and the object passed as a
+     * preconditioner, one of the linear solvers and preconditioners of
+     * Trilinos is chosen.
+     */
+    void
+    solve (Epetra_Operator                                     &A,
+           dealii::parallel::distributed::Vector<double>       &x,
+           const dealii::parallel::distributed::Vector<double> &b,
+           const PreconditionBase                              &preconditioner);
+
+
+    /**
+     * Access to object that controls convergence.
+     */
+    SolverControl &control() const;
+
+    /**
+     * Exception
+     */
+    DeclException1 (ExcTrilinosError,
+                    int,
+                    << "An error with error number " << arg1
+                    << " occurred while calling a Trilinos function");
+
+  protected:
+
+    /**
+     * Reference to the object that controls convergence of the iterative
+     * solver. In fact, for these Trilinos wrappers, Trilinos does so itself,
+     * but we copy the data from this object before starting the solution
+     * process, and copy the data back into it afterwards.
+     */
+    SolverControl &solver_control;
+
+  private:
+
+    /**
+     * The solve function is used to set properly the Epetra_LinearProblem,
+     * once it is done this function solves the linear problem.
+     */
+    void do_solve(const PreconditionBase &preconditioner);
+
+    /**
+     * A structure that collects the Trilinos sparse matrix, the right hand
+     * side vector and the solution vector, which is passed down to the
+     * Trilinos solver.
+     */
+    std_cxx11::shared_ptr<Epetra_LinearProblem> linear_problem;
+
+    /**
+     * A structure that contains the Trilinos solver and preconditioner
+     * objects.
+     */
+    AztecOO solver;
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the Trilinos CG solver.
+   *
+   * @ingroup TrilinosWrappers
+   * @author Martin Kronbichler, 2008
+   */
+  class SolverCG : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+
+    struct AdditionalData
+    {
+      /**
+       * Sets the additional data field to the desired output format.
+       */
+      explicit
+      AdditionalData (const bool output_solver_details = false);
+
+      /**
+       * Enables/disables the output of solver details (residual in each
+       * iterations etc.).
+       */
+      bool output_solver_details;
+    };
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     */
+    SolverCG (SolverControl        &cn,
+              const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the Trilinos CGS solver.
+   *
+   * @ingroup TrilinosWrappers
+   * @author Martin Kronbichler, 2008
+   */
+  class SolverCGS : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Sets the additional data field to the desired output format.
+       */
+      explicit
+      AdditionalData (const bool output_solver_details = false);
+
+      /**
+       * Enables/disables the output of solver details (residual in each
+       * iterations etc.).
+       */
+      bool output_solver_details;
+    };
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     */
+    SolverCGS (SolverControl        &cn,
+               const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the Trilinos GMRES
+   * solver.
+   *
+   * @author Martin Kronbichler, 2008
+   */
+  class SolverGMRES : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Constructor. By default, set the number of temporary vectors to 30,
+       * i.e. do a restart every 30 iterations.
+       */
+      explicit
+      AdditionalData (const bool         output_solver_details = false,
+                      const unsigned int restart_parameter = 30);
+
+      /**
+       * Enables/disables the output of solver details (residual in each
+       * iterations etc.).
+       */
+      bool output_solver_details;
+
+      /**
+       * Maximum number of tmp vectors.
+       */
+      unsigned int restart_parameter;
+    };
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     */
+    SolverGMRES (SolverControl        &cn,
+                 const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the Trilinos BiCGStab
+   * solver.
+   *
+   * @ingroup TrilinosWrappers
+   * @author Martin Kronbichler, 2008
+   */
+  class SolverBicgstab : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Sets the additional data field to the desired output format.
+       */
+      explicit
+      AdditionalData (const bool output_solver_details = false);
+
+      /**
+       * Enables/disables the output of solver details (residual in each
+       * iterations etc.).
+       */
+      bool output_solver_details;
+    };
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     */
+    SolverBicgstab (SolverControl        &cn,
+                    const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * An implementation of the solver interface using the Trilinos TFQMR
+   * solver.
+   *
+   * @ingroup TrilinosWrappers
+   * @author Martin Kronbichler, 2008
+   */
+  class SolverTFQMR : public SolverBase
+  {
+  public:
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+    struct AdditionalData
+    {
+      /**
+       * Sets the additional data field to the desired output format.
+       */
+      explicit
+      AdditionalData (const bool output_solver_details = false);
+
+      /**
+       * Enables/disables the output of solver details (residual in each
+       * iterations etc.).
+       */
+      bool output_solver_details;
+    };
+
+    /**
+     * Constructor. In contrast to deal.II's own solvers, there is no need to
+     * give a vector memory object.
+     *
+     * The last argument takes a structure with additional, solver dependent
+     * flags for tuning.
+     */
+    SolverTFQMR (SolverControl        &cn,
+                 const AdditionalData &data = AdditionalData());
+
+  protected:
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+
+
+  /**
+   * An implementation of Trilinos direct solvers (using the Amesos package).
+   * The data field AdditionalData::solver_type can be used to specify the
+   * type of solver. It allows the use of built-in solvers Amesos_Klu as well
+   * as third-party solvers Amesos_Superludist or Amesos_Mumps.
+   *
+   * For instructions on how to install Trilinos for use with direct solvers
+   * other than KLU, see the link to the Trilinos installation instructions
+   * linked to from the deal.II ReadMe file.
+   *
+   * @ingroup TrilinosWrappers
+   * @author Martin Kronbichler, 2009, Uwe Köcher, 2014
+   */
+  class SolverDirect
+  {
+  public:
+
+    /**
+     * Standardized data struct to pipe additional data to the solver.
+     */
+
+    struct AdditionalData
+    {
+      /**
+       * Sets the additional data field to the desired output format.
+       */
+      explicit
+      AdditionalData (const bool output_solver_details = false,
+                      const std::string &solver_type = "Amesos_Klu");
+
+      /**
+       * Enables/disables the output of solver details (residual in each
+       * iterations etc.).
+       */
+      bool output_solver_details;
+
+      /**
+       * Set the solver type (for third party solver support of Trilinos
+       * Amesos package). Possibilities are:
+       * <ul>
+       * <li>  "Amesos_Lapack" </li>
+       * <li>  "Amesos_Scalapack" </li>
+       * <li>  "Amesos_Klu" </li>
+       * <li>  "Amesos_Umfpack" </li>
+       * <li>  "Amesos_Pardiso" </li>
+       * <li>  "Amesos_Taucs" </li>
+       * <li>  "Amesos_Superlu" </li>
+       * <li>  "Amesos_Superludist" </li>
+       * <li>  "Amesos_Dscpack" </li>
+       * <li>  "Amesos_Mumps" </li>
+       * </ul>
+       * Note that the availability of these solvers in deal.II depends on
+       * which solvers were set when configuring Trilinos.
+       */
+      std::string solver_type;
+    };
+
+    /**
+     * Constructor. Takes the solver control object and creates the solver.
+     */
+    SolverDirect (SolverControl  &cn,
+                  const AdditionalData &data = AdditionalData());
+
+    /**
+     * Destructor.
+     */
+    virtual ~SolverDirect ();
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt>. Creates a factorization of the
+     * matrix with the package chosen from the additional data structure and
+     * performs the solve. Note that there is no need for a preconditioner
+     * here.
+     */
+    void
+    solve (const SparseMatrix     &A,
+           VectorBase             &x,
+           const VectorBase       &b);
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt>. This class works with Trilinos
+     * matrices, but takes deal.II serial vectors as argument. Since these
+     * vectors are not distributed, this function does only what you expect in
+     * case the matrix is serial (i.e., locally owned). Otherwise, an
+     * exception will be thrown.
+     */
+    void
+    solve (const SparseMatrix           &A,
+           dealii::Vector<double>       &x,
+           const dealii::Vector<double> &b);
+
+    /**
+     * Solve the linear system <tt>Ax=b</tt> for deal.II's own parallel
+     * vectors. Creates a factorization of the matrix with the package chosen
+     * from the additional data structure and performs the solve. Note that
+     * there is no need for a preconditioner here.
+     */
+    void
+    solve (const SparseMatrix                                  &A,
+           dealii::parallel::distributed::Vector<double>       &x,
+           const dealii::parallel::distributed::Vector<double> &b);
+
+    /**
+     * Access to object that controls convergence.
+     */
+    SolverControl &control() const;
+
+    /**
+     * Exception
+     */
+    DeclException1 (ExcTrilinosError,
+                    int,
+                    << "An error with error number " << arg1
+                    << " occurred while calling a Trilinos function");
+
+  private:
+    /**
+     * Actually performs the operations for solving the linear system,
+     * including the factorization and forward and backward substitution.
+     */
+    void do_solve();
+
+    /**
+     * Reference to the object that controls convergence of the iterative
+     * solver. In fact, for these Trilinos wrappers, Trilinos does so itself,
+     * but we copy the data from this object before starting the solution
+     * process, and copy the data back into it afterwards.
+     */
+    SolverControl &solver_control;
+
+    /**
+     * A structure that collects the Trilinos sparse matrix, the right hand
+     * side vector and the solution vector, which is passed down to the
+     * Trilinos solver.
+     */
+    std_cxx11::shared_ptr<Epetra_LinearProblem> linear_problem;
+
+    /**
+     * A structure that contains the Trilinos solver and preconditioner
+     * objects.
+     */
+    std_cxx11::shared_ptr<Amesos_BaseSolver> solver;
+
+    /**
+     * Store a copy of the flags for this particular solver.
+     */
+    const AdditionalData additional_data;
+  };
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
+
+/*----------------------------   trilinos_solver.h     ---------------------------*/
+
+#endif
+/*----------------------------   trilinos_solver.h     ---------------------------*/
diff --git a/include/deal.II/lac/trilinos_sparse_matrix.h b/include/deal.II/lac/trilinos_sparse_matrix.h
new file mode 100644
index 0000000..279039c
--- /dev/null
+++ b/include/deal.II/lac/trilinos_sparse_matrix.h
@@ -0,0 +1,2704 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_sparse_matrix_h
+#define dealii__trilinos_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/base/index_set.h>
+#  include <deal.II/lac/full_matrix.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/trilinos_vector.h>
+#  include <deal.II/lac/vector_view.h>
+
+#  include <vector>
+#  include <cmath>
+#  include <memory>
+
+#  define TrilinosScalar double
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Epetra_FECrsMatrix.h>
+#  include <Epetra_Map.h>
+#  include <Epetra_CrsGraph.h>
+#  include <Epetra_MultiVector.h>
+#  ifdef DEAL_II_WITH_MPI
+#    include <Epetra_MpiComm.h>
+#    include "mpi.h"
+#  else
+#    include "Epetra_SerialComm.h"
+#  endif
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+class Epetra_Export;
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+template <typename MatrixType> class BlockMatrixBase;
+
+template <typename number> class SparseMatrix;
+class SparsityPattern;
+class DynamicSparsityPattern;
+
+
+namespace TrilinosWrappers
+{
+  // forward declarations
+  class SparseMatrix;
+  class SparsityPattern;
+
+  /**
+   * Iterators for Trilinos matrices
+   */
+  namespace SparseMatrixIterators
+  {
+    // forward declaration
+    template <bool Constness> class Iterator;
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcBeyondEndOfMatrix);
+
+    /**
+     * Exception
+     */
+    DeclException3 (ExcAccessToNonlocalRow,
+                    std::size_t, std::size_t, std::size_t,
+                    << "You tried to access row " << arg1
+                    << " of a distributed sparsity pattern, "
+                    << " but only rows " << arg2 << " through " << arg3
+                    << " are stored locally and can be accessed.");
+
+    /**
+     * Handling of indices for both constant and non constant Accessor objects
+     *
+     * For a regular dealii::SparseMatrix, we would use an accessor for the
+     * sparsity pattern. For Trilinos matrices, this does not seem so simple,
+     * therefore, we write a little base class here.
+     *
+     * @author Guido Kanschat
+     * @date 2012
+     */
+    class AccessorBase
+    {
+    public:
+      /**
+       * Declare the type for container size.
+       */
+      typedef dealii::types::global_dof_index size_type;
+
+      /**
+       * Constructor.
+       */
+      AccessorBase (SparseMatrix *matrix,
+                    const size_type  row,
+                    const size_type  index);
+
+      /**
+       * Row number of the element represented by this object.
+       */
+      size_type row() const;
+
+      /**
+       * Index in row of the element represented by this object.
+       */
+      size_type index() const;
+
+      /**
+       * Column number of the element represented by this object.
+       */
+      size_type column() const;
+
+    protected:
+      /**
+       * Pointer to the matrix object. This object should be handled as a
+       * const pointer or non-const by the appropriate derived classes. In
+       * order to be able to implement both, it is not const here, so handle
+       * with care!
+       */
+      mutable SparseMatrix *matrix;
+      /**
+       * Current row number.
+       */
+      size_type a_row;
+
+      /**
+       * Current index in row.
+       */
+      size_type a_index;
+
+      /**
+       * Discard the old row caches (they may still be used by other
+       * accessors) and generate new ones for the row pointed to presently by
+       * this accessor.
+       */
+      void visit_present_row ();
+
+      /**
+       * Cache where we store the column indices of the present row. This is
+       * necessary, since Trilinos makes access to the elements of its
+       * matrices rather hard, and it is much more efficient to copy all
+       * column entries of a row once when we enter it than repeatedly asking
+       * Trilinos for individual ones. This also makes some sense since it is
+       * likely that we will access them sequentially anyway.
+       *
+       * In order to make copying of iterators/accessor of acceptable
+       * performance, we keep a shared pointer to these entries so that more
+       * than one accessor can access this data if necessary.
+       */
+      std_cxx11::shared_ptr<std::vector<size_type> > colnum_cache;
+
+      /**
+       * Cache for the values of this row.
+       */
+      std_cxx11::shared_ptr<std::vector<TrilinosScalar> > value_cache;
+    };
+
+    /**
+     * General template for sparse matrix accessors. The first template
+     * argument denotes the underlying numeric type, the second the constness
+     * of the matrix.
+     *
+     * The general template is not implemented, only the specializations for
+     * the two possible values of the second template argument. Therefore, the
+     * interface listed here only serves as a template provided since doxygen
+     * does not link the specializations.
+     */
+    template <bool Constess>
+    class Accessor : public AccessorBase
+    {
+      /**
+       * Value of this matrix entry.
+       */
+      TrilinosScalar value() const;
+
+      /**
+       * Value of this matrix entry.
+       */
+      TrilinosScalar &value();
+    };
+
+    /**
+     * The specialization for a const Accessor.
+     */
+    template<>
+    class Accessor<true> : public AccessorBase
+    {
+    public:
+      /**
+       * Typedef for the type (including constness) of the matrix to be used
+       * here.
+       */
+      typedef const SparseMatrix MatrixType;
+
+      /**
+       * Constructor. Since we use accessors only for read access, a const
+       * matrix pointer is sufficient.
+       */
+      Accessor (MatrixType *matrix,
+                const size_type  row,
+                const size_type  index);
+
+      /**
+       * Copy constructor to get from a const or non-const accessor to a const
+       * accessor.
+       */
+      template <bool Other>
+      Accessor (const Accessor<Other> &a);
+
+      /**
+       * Value of this matrix entry.
+       */
+      TrilinosScalar value() const;
+
+    private:
+      /**
+       * Make iterator class a friend.
+       */
+      template <bool> friend class Iterator;
+    };
+
+    /**
+     * The specialization for a mutable Accessor.
+     */
+    template<>
+    class Accessor<false> : public AccessorBase
+    {
+      class Reference
+      {
+      public:
+        /**
+         * Constructor.
+         */
+        Reference (const Accessor<false> &accessor);
+
+        /**
+         * Conversion operator to the data type of the matrix.
+         */
+        operator TrilinosScalar () const;
+
+        /**
+         * Set the element of the matrix we presently point to to @p n.
+         */
+        const Reference &operator = (const TrilinosScalar n) const;
+
+        /**
+         * Add @p n to the element of the matrix we presently point to.
+         */
+        const Reference &operator += (const TrilinosScalar n) const;
+
+        /**
+         * Subtract @p n from the element of the matrix we presently point to.
+         */
+        const Reference &operator -= (const TrilinosScalar n) const;
+
+        /**
+         * Multiply the element of the matrix we presently point to by @p n.
+         */
+        const Reference &operator *= (const TrilinosScalar n) const;
+
+        /**
+         * Divide the element of the matrix we presently point to by @p n.
+         */
+        const Reference &operator /= (const TrilinosScalar n) const;
+
+      private:
+        /**
+         * Pointer to the accessor that denotes which element we presently
+         * point to.
+         */
+        Accessor &accessor;
+      };
+
+    public:
+      /**
+       * Typedef for the type (including constness) of the matrix to be used
+       * here.
+       */
+      typedef SparseMatrix MatrixType;
+
+      /**
+       * Constructor. Since we use accessors only for read access, a const
+       * matrix pointer is sufficient.
+       */
+      Accessor (MatrixType *matrix,
+                const size_type  row,
+                const size_type  index);
+
+      /**
+       * Value of this matrix entry.
+       */
+      Reference value() const;
+
+    private:
+      /**
+       * Make iterator class a friend.
+       */
+      template <bool> friend class Iterator;
+      /**
+       * Make Reference object a friend.
+       */
+      friend class Reference;
+    };
+
+    /**
+     * This class acts as an iterator walking over the elements of Trilinos
+     * matrices. The implementation of this class is similar to the one for
+     * PETSc matrices.
+     *
+     * Note that Trilinos stores the elements within each row in ascending
+     * order. This is opposed to the deal.II sparse matrix style where the
+     * diagonal element (if it exists) is stored before all other values, and
+     * the PETSc sparse matrices, where one can't guarantee a certain order of
+     * the elements.
+     *
+     * @ingroup TrilinosWrappers
+     * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+     */
+    template <bool Constness>
+    class Iterator
+    {
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef dealii::types::global_dof_index size_type;
+
+      /**
+       * Typedef for the matrix type (including constness) we are to operate
+       * on.
+       */
+      typedef typename Accessor<Constness>::MatrixType MatrixType;
+
+      /**
+       * Constructor. Create an iterator into the matrix @p matrix for the
+       * given row and the index within it.
+       */
+      Iterator (MatrixType *matrix,
+                const size_type  row,
+                const size_type  index);
+
+      /**
+       * Copy constructor with optional change of constness.
+       */
+      template <bool Other>
+      Iterator(const Iterator<Other> &other);
+
+      /**
+       * Prefix increment.
+       */
+      Iterator<Constness> &operator++ ();
+
+      /**
+       * Postfix increment.
+       */
+      Iterator<Constness> operator++ (int);
+
+      /**
+       * Dereferencing operator.
+       */
+      const Accessor<Constness> &operator* () const;
+
+      /**
+       * Dereferencing operator.
+       */
+      const Accessor<Constness> *operator-> () const;
+
+      /**
+       * Comparison. True, if both iterators point to the same matrix
+       * position.
+       */
+      bool operator == (const Iterator<Constness> &) const;
+
+      /**
+       * Inverse of <tt>==</tt>.
+       */
+      bool operator != (const Iterator<Constness> &) const;
+
+      /**
+       * Comparison operator. Result is true if either the first row number is
+       * smaller or if the row numbers are equal and the first index is
+       * smaller.
+       */
+      bool operator < (const Iterator<Constness> &) const;
+
+      /**
+       * Comparison operator. The opposite of the previous operator
+       */
+      bool operator > (const Iterator<Constness> &) const;
+
+      /**
+       * Exception
+       */
+      DeclException2 (ExcInvalidIndexWithinRow,
+                      size_type, size_type,
+                      << "Attempt to access element " << arg2
+                      << " of row " << arg1
+                      << " which doesn't have that many elements.");
+
+    private:
+      /**
+       * Store an object of the accessor class.
+       */
+      Accessor<Constness> accessor;
+
+      template <bool Other> friend class Iterator;
+    };
+
+  }
+
+
+  /**
+   * This class implements a wrapper to use the Trilinos distributed sparse
+   * matrix class Epetra_FECrsMatrix. This is precisely the kind of matrix we
+   * deal with all the time - we most likely get it from some assembly
+   * process, where also entries not locally owned might need to be written
+   * and hence need to be forwarded to the owner process.  This class is
+   * designed to be used in a distributed memory architecture with an MPI
+   * compiler on the bottom, but works equally well also for serial processes.
+   * The only requirement for this class to work is that Trilinos has been
+   * installed with the same compiler as is used for generating deal.II.
+   *
+   * The interface of this class is modeled after the existing SparseMatrix
+   * class in deal.II. It has almost the same member functions, and is often
+   * exchangeable. However, since Trilinos only supports a single scalar type
+   * (double), it is not templated, and only works with doubles.
+   *
+   * Note that Trilinos only guarantees that operations do what you expect if
+   * the functions @p GlobalAssemble has been called after matrix assembly.
+   * Therefore, you need to call SparseMatrix::compress() before you actually
+   * use the matrix. This also calls @p FillComplete that compresses the
+   * storage format for sparse matrices by discarding unused elements.
+   * Trilinos allows to continue with assembling the matrix after calls to
+   * these functions, though.
+   *
+   * <h3>Thread safety of Trilinos matrices</h3>
+   *
+   * When writing into Trilinos matrices from several threads in shared
+   * memory, several things must be kept in mind as there is no built-in locks
+   * in this class to prevent data races. Simultaneous access to the same
+   * matrix row at the same time can lead to data races and must be explicitly
+   * avoided by the user. However, it is possible to access <b>different</b>
+   * rows of the matrix from several threads simultaneously under the
+   * following three conditions:
+   * <ul>
+   * <li> The matrix uses only one MPI process.
+   * <li> The matrix has been initialized with the reinit() method with a
+   * DynamicSparsityPattern (that includes the set of locally relevant rows,
+   * i.e., the rows that an assembly routine will possibly write into).
+   * <li> The matrix has been initialized from a
+   * TrilinosWrappers::SparsityPattern object that in turn has been
+   * initialized with the reinit function specifying three index sets, one for
+   * the rows, one for the columns and for the larger set of @p
+   * writeable_rows, and the operation is an addition. At some point in the
+   * future, Trilinos support might be complete enough such that initializing
+   * from a TrilinosWrappers::SparsityPattern that has been filled by a
+   * function similar to DoFTools::make_sparsity_pattern always results in a
+   * matrix that allows several processes to write into the same matrix row.
+   * However, Trilinos until version at least 11.12 does not correctly support
+   * this feature.
+   * </ul>
+   *
+   * Note that all other reinit methods and constructors of
+   * TrilinosWrappers::SparsityPattern will result in a matrix that needs to
+   * allocate off-processor entries on demand, which breaks thread-safety. Of
+   * course, using the respective reinit method for the block Trilinos
+   * sparsity pattern and block matrix also results in thread-safety.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Matrix1
+   * @author Martin Kronbichler, Wolfgang Bangerth, 2008, 2009
+   */
+  class SparseMatrix : public Subscriptor
+  {
+  public:
+    /**
+     * Declare the type for container size.
+     */
+    typedef dealii::types::global_dof_index size_type;
+
+    /**
+     * A structure that describes some of the traits of this class in terms of
+     * its run-time behavior. Some other classes (such as the block matrix
+     * classes) that take one or other of the matrix classes as its template
+     * parameters can tune their behavior based on the variables in this
+     * class.
+     */
+    struct Traits
+    {
+      /**
+       * It is safe to elide additions of zeros to individual elements of this
+       * matrix.
+       */
+      static const bool zero_addition_can_be_elided = true;
+    };
+
+    /**
+     * Declare a typedef for the iterator class.
+     */
+    typedef SparseMatrixIterators::Iterator<false> iterator;
+
+    /**
+     * Declare a typedef for the const iterator class.
+     */
+    typedef SparseMatrixIterators::Iterator<true> const_iterator;
+
+    /**
+     * Declare a typedef in analogy to all the other container classes.
+     */
+    typedef TrilinosScalar value_type;
+
+    /**
+     * @name Constructors and initialization.
+     */
+//@{
+    /**
+     * Default constructor. Generates an empty (zero-size) matrix.
+     */
+    SparseMatrix ();
+
+    /**
+     * Generate a matrix that is completely stored locally, having #m rows and
+     * #n columns.
+     *
+     * The number of columns entries per row is specified as the maximum
+     * number of entries argument.
+     */
+    SparseMatrix (const size_type  m,
+                  const size_type  n,
+                  const unsigned int  n_max_entries_per_row);
+
+    /**
+     * Generate a matrix that is completely stored locally, having #m rows and
+     * #n columns.
+     *
+     * The vector <tt>n_entries_per_row</tt> specifies the number of entries
+     * in each row.
+     */
+    SparseMatrix (const size_type                  m,
+                  const size_type                  n,
+                  const std::vector<unsigned int> &n_entries_per_row);
+
+    /**
+     * Generate a matrix from a Trilinos sparsity pattern object.
+     */
+    SparseMatrix (const SparsityPattern &InputSparsityPattern);
+
+    /**
+     * Destructor. Made virtual so that one can use pointers to this class.
+     */
+    virtual ~SparseMatrix ();
+
+    /**
+     * This function initializes the Trilinos matrix with a deal.II sparsity
+     * pattern, i.e. it makes the Trilinos Epetra matrix know the position of
+     * nonzero entries according to the sparsity pattern. This function is
+     * meant for use in serial programs, where there is no need to specify how
+     * the matrix is going to be distributed among different processors. This
+     * function works in %parallel, too, but it is recommended to manually
+     * specify the %parallel partitioning of the matrix using an Epetra_Map.
+     * When run in %parallel, it is currently necessary that each processor
+     * holds the sparsity_pattern structure because each processor sets its
+     * rows.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    template<typename SparsityPatternType>
+    void reinit (const SparsityPatternType &sparsity_pattern);
+
+    /**
+     * This function reinitializes the Trilinos sparse matrix from a (possibly
+     * distributed) Trilinos sparsity pattern.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     *
+     * If you want to write to the matrix from several threads and use MPI,
+     * you need to use this reinit method with a sparsity pattern that has
+     * been created with explicitly stating writeable rows. In all other
+     * cases, you cannot mix MPI with multithreaded writing into the matrix.
+     */
+    void reinit (const SparsityPattern &sparsity_pattern);
+
+    /**
+     * This function copies the layout of @p sparse_matrix to the calling
+     * matrix. The values are not copied, but you can use copy_from() for
+     * this.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    void reinit (const SparseMatrix &sparse_matrix);
+
+    /**
+     * This function initializes the Trilinos matrix using the deal.II sparse
+     * matrix and the entries stored therein. It uses a threshold to copy only
+     * elements with modulus larger than the threshold (so zeros in the
+     * deal.II matrix can be filtered away).
+     *
+     * The optional parameter <tt>copy_values</tt> decides whether only the
+     * sparsity structure of the input matrix should be used or the matrix
+     * entries should be copied, too.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a deadlock.
+     *
+     * @note If a different sparsity pattern is given in the last argument
+     * (i.e., one that differs from the one used in the sparse matrix given in
+     * the first argument), then the resulting Trilinos matrix will have the
+     * sparsity pattern so given. This of course also means that all entries
+     * in the given matrix that are not part of this separate sparsity pattern
+     * will in fact be dropped.
+     */
+    template <typename number>
+    void reinit (const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                 const double                          drop_tolerance=1e-13,
+                 const bool                            copy_values=true,
+                 const ::dealii::SparsityPattern      *use_this_sparsity=0);
+
+    /**
+     * This reinit function takes as input a Trilinos Epetra_CrsMatrix and
+     * copies its sparsity pattern. If so requested, even the content (values)
+     * will be copied.
+     */
+    void reinit (const Epetra_CrsMatrix &input_matrix,
+                 const bool              copy_values = true);
+//@}
+    /**
+     * @name Constructors and initialization using an Epetra_Map description
+     */
+//@{
+    /**
+     * Constructor using an Epetra_Map to describe the %parallel partitioning.
+     * The parameter @p n_max_entries_per_row sets the number of nonzero
+     * entries in each row that will be allocated. Note that this number does
+     * not need to be exact, and it is even allowed that the actual matrix
+     * structure has more nonzero entries than specified in the constructor.
+     * However it is still advantageous to provide good estimates here since
+     * this will considerably increase the performance of the matrix setup.
+     * However, there is no effect in the performance of matrix-vector
+     * products, since Trilinos reorganizes the matrix memory prior to use (in
+     * the compress() step).
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparseMatrix (const Epetra_Map  &parallel_partitioning,
+                  const size_type    n_max_entries_per_row = 0) DEAL_II_DEPRECATED;
+
+    /**
+     * Same as before, but now set a value of nonzeros for each matrix row.
+     * Since we know the number of elements in the matrix exactly in this
+     * case, we can already allocate the right amount of memory, which makes
+     * the creation process including the insertion of nonzero elements by the
+     * respective SparseMatrix::reinit call considerably faster.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparseMatrix (const Epetra_Map                &parallel_partitioning,
+                  const std::vector<unsigned int> &n_entries_per_row) DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different Epetra maps for rows and columns. This interface is meant to
+     * be used for generating rectangular matrices, where one map describes
+     * the %parallel partitioning of the dofs associated with the matrix rows
+     * and the other one the partitioning of dofs in the matrix columns. Note
+     * that there is no real parallelism along the columns – the
+     * processor that owns a certain row always owns all the column elements,
+     * no matter how far they might be spread out. The second Epetra_Map is
+     * only used to specify the number of columns and for internal
+     * arrangements when doing matrix-vector products with vectors based on
+     * that column map.
+     *
+     * The integer input @p n_max_entries_per_row defines the number of
+     * columns entries per row that will be allocated.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparseMatrix (const Epetra_Map &row_parallel_partitioning,
+                  const Epetra_Map &col_parallel_partitioning,
+                  const size_type   n_max_entries_per_row = 0) DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different Epetra maps for rows and columns. This interface is meant to
+     * be used for generating rectangular matrices, where one map specifies
+     * the %parallel distribution of degrees of freedom associated with matrix
+     * rows and the second one specifies the %parallel distribution the dofs
+     * associated with columns in the matrix. The second map also provides
+     * information for the internal arrangement in matrix vector products
+     * (i.e., the distribution of vector this matrix is to be multiplied
+     * with), but is not used for the distribution of the columns –
+     * rather, all column elements of a row are stored on the same processor
+     * in any case. The vector <tt>n_entries_per_row</tt> specifies the number
+     * of entries in each row of the newly generated matrix.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparseMatrix (const Epetra_Map                &row_parallel_partitioning,
+                  const Epetra_Map                &col_parallel_partitioning,
+                  const std::vector<unsigned int> &n_entries_per_row) DEAL_II_DEPRECATED;
+
+    /**
+     * This function is initializes the Trilinos Epetra matrix according to
+     * the specified sparsity_pattern, and also reassigns the matrix rows to
+     * different processes according to a user-supplied Epetra map. In
+     * programs following the style of the tutorial programs, this function
+     * (and the respective call for a rectangular matrix) are the natural way
+     * to initialize the matrix size, its distribution among the MPI processes
+     * (if run in %parallel) as well as the location of non-zero elements.
+     * Trilinos stores the sparsity pattern internally, so it won't be needed
+     * any more after this call, in contrast to the deal.II own object. The
+     * optional argument @p exchange_data can be used for reinitialization
+     * with a sparsity pattern that is not fully constructed. This feature is
+     * only implemented for input sparsity patterns of type
+     * DynamicSparsityPattern. If the flag is not set, each processor just
+     * sets the elements in the sparsity pattern that belong to its rows.
+     *
+     * If the sparsity pattern given to this function is of type
+     * DynamicSparsity pattern, then a matrix will be created that allows
+     * several threads to write into different rows of the matrix at the same
+     * also with MPI, as opposed to most other reinit() methods.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    template<typename SparsityPatternType>
+    void reinit (const Epetra_Map          &parallel_partitioning,
+                 const SparsityPatternType &sparsity_pattern,
+                 const bool                 exchange_data = false) DEAL_II_DEPRECATED;
+
+    /**
+     * This function is similar to the other initialization function above,
+     * but now also reassigns the matrix rows and columns according to two
+     * user-supplied Epetra maps.  To be used for rectangular matrices. The
+     * optional argument @p exchange_data can be used for reinitialization
+     * with a sparsity pattern that is not fully constructed. This feature is
+     * only implemented for input sparsity patterns of type
+     * DynamicSparsityPattern.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    template<typename SparsityPatternType>
+    void reinit (const Epetra_Map          &row_parallel_partitioning,
+                 const Epetra_Map          &col_parallel_partitioning,
+                 const SparsityPatternType &sparsity_pattern,
+                 const bool                 exchange_data = false) DEAL_II_DEPRECATED;
+
+    /**
+     * This function initializes the Trilinos matrix using the deal.II sparse
+     * matrix and the entries stored therein. It uses a threshold to copy only
+     * elements with modulus larger than the threshold (so zeros in the
+     * deal.II matrix can be filtered away). In contrast to the other reinit
+     * function with deal.II sparse matrix argument, this function takes a
+     * %parallel partitioning specified by the user instead of internally
+     * generating it.
+     *
+     * The optional parameter <tt>copy_values</tt> decides whether only the
+     * sparsity structure of the input matrix should be used or the matrix
+     * entries should be copied, too.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    template <typename number>
+    void reinit (const Epetra_Map                     &parallel_partitioning,
+                 const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                 const double                          drop_tolerance=1e-13,
+                 const bool                            copy_values=true,
+                 const ::dealii::SparsityPattern      *use_this_sparsity=0) DEAL_II_DEPRECATED;
+
+    /**
+     * This function is similar to the other initialization function with
+     * deal.II sparse matrix input above, but now takes Epetra maps for both
+     * the rows and the columns of the matrix. Chosen for rectangular
+     * matrices.
+     *
+     * The optional parameter <tt>copy_values</tt> decides whether only the
+     * sparsity structure of the input matrix should be used or the matrix
+     * entries should be copied, too.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    template <typename number>
+    void reinit (const Epetra_Map                      &row_parallel_partitioning,
+                 const Epetra_Map                      &col_parallel_partitioning,
+                 const ::dealii::SparseMatrix<number>  &dealii_sparse_matrix,
+                 const double                           drop_tolerance=1e-13,
+                 const bool                             copy_values=true,
+                 const ::dealii::SparsityPattern      *use_this_sparsity=0) DEAL_II_DEPRECATED;
+//@}
+    /**
+     * @name Constructors and initialization using an IndexSet description
+     */
+//@{
+    /**
+     * Constructor using an IndexSet and an MPI communicator to describe the
+     * %parallel partitioning. The parameter @p n_max_entries_per_row sets the
+     * number of nonzero entries in each row that will be allocated. Note that
+     * this number does not need to be exact, and it is even allowed that the
+     * actual matrix structure has more nonzero entries than specified in the
+     * constructor. However it is still advantageous to provide good estimates
+     * here since this will considerably increase the performance of the
+     * matrix setup. However, there is no effect in the performance of matrix-
+     * vector products, since Trilinos reorganizes the matrix memory prior to
+     * use (in the compress() step).
+     */
+    SparseMatrix (const IndexSet    &parallel_partitioning,
+                  const MPI_Comm    &communicator = MPI_COMM_WORLD,
+                  const unsigned int n_max_entries_per_row = 0);
+
+    /**
+     * Same as before, but now set the number of nonzeros in each matrix row
+     * separately. Since we know the number of elements in the matrix exactly
+     * in this case, we can already allocate the right amount of memory, which
+     * makes the creation process including the insertion of nonzero elements
+     * by the respective SparseMatrix::reinit call considerably faster.
+     */
+    SparseMatrix (const IndexSet                  &parallel_partitioning,
+                  const MPI_Comm                  &communicator,
+                  const std::vector<unsigned int> &n_entries_per_row);
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different IndexSet partitions for row and columns. This interface is
+     * meant to be used for generating rectangular matrices, where the first
+     * index set describes the %parallel partitioning of the degrees of
+     * freedom associated with the matrix rows and the second one the
+     * partitioning of the matrix columns. The second index set specifies the
+     * partitioning of the vectors this matrix is to be multiplied with, not
+     * the distribution of the elements that actually appear in the matrix.
+     *
+     * The parameter @p n_max_entries_per_row defines how much memory will be
+     * allocated for each row. This number does not need to be accurate, as
+     * the structure is reorganized in the compress() call.
+     */
+    SparseMatrix (const IndexSet  &row_parallel_partitioning,
+                  const IndexSet  &col_parallel_partitioning,
+                  const MPI_Comm  &communicator = MPI_COMM_WORLD,
+                  const size_type  n_max_entries_per_row = 0);
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different Epetra maps for rows and columns. This interface is meant to
+     * be used for generating rectangular matrices, where one map specifies
+     * the %parallel distribution of degrees of freedom associated with matrix
+     * rows and the second one specifies the %parallel distribution the dofs
+     * associated with columns in the matrix. The second map also provides
+     * information for the internal arrangement in matrix vector products
+     * (i.e., the distribution of vector this matrix is to be multiplied
+     * with), but is not used for the distribution of the columns –
+     * rather, all column elements of a row are stored on the same processor
+     * in any case. The vector <tt>n_entries_per_row</tt> specifies the number
+     * of entries in each row of the newly generated matrix.
+     */
+    SparseMatrix (const IndexSet                  &row_parallel_partitioning,
+                  const IndexSet                  &col_parallel_partitioning,
+                  const MPI_Comm                  &communicator,
+                  const std::vector<unsigned int> &n_entries_per_row);
+
+    /**
+     * This function is initializes the Trilinos Epetra matrix according to
+     * the specified sparsity_pattern, and also reassigns the matrix rows to
+     * different processes according to a user-supplied index set and
+     * %parallel communicator. In programs following the style of the tutorial
+     * programs, this function (and the respective call for a rectangular
+     * matrix) are the natural way to initialize the matrix size, its
+     * distribution among the MPI processes (if run in %parallel) as well as
+     * the location of non-zero elements. Trilinos stores the sparsity pattern
+     * internally, so it won't be needed any more after this call, in contrast
+     * to the deal.II own object. The optional argument @p exchange_data can
+     * be used for reinitialization with a sparsity pattern that is not fully
+     * constructed. This feature is only implemented for input sparsity
+     * patterns of type DynamicSparsityPattern. If the flag is not set, each
+     * processor just sets the elements in the sparsity pattern that belong to
+     * its rows.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    template<typename SparsityPatternType>
+    void reinit (const IndexSet            &parallel_partitioning,
+                 const SparsityPatternType &sparsity_pattern,
+                 const MPI_Comm            &communicator  = MPI_COMM_WORLD,
+                 const bool                 exchange_data = false);
+
+    /**
+     * This function is similar to the other initialization function above,
+     * but now also reassigns the matrix rows and columns according to two
+     * user-supplied index sets.  To be used for rectangular matrices. The
+     * optional argument @p exchange_data can be used for reinitialization
+     * with a sparsity pattern that is not fully constructed. This feature is
+     * only implemented for input sparsity patterns of type
+     * DynamicSparsityPattern.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    template<typename SparsityPatternType>
+    void reinit (const IndexSet            &row_parallel_partitioning,
+                 const IndexSet            &col_parallel_partitioning,
+                 const SparsityPatternType &sparsity_pattern,
+                 const MPI_Comm            &communicator  = MPI_COMM_WORLD,
+                 const bool                 exchange_data = false);
+
+    /**
+     * This function initializes the Trilinos matrix using the deal.II sparse
+     * matrix and the entries stored therein. It uses a threshold to copy only
+     * elements with modulus larger than the threshold (so zeros in the
+     * deal.II matrix can be filtered away). In contrast to the other reinit
+     * function with deal.II sparse matrix argument, this function takes a
+     * %parallel partitioning specified by the user instead of internally
+     * generating it.
+     *
+     * The optional parameter <tt>copy_values</tt> decides whether only the
+     * sparsity structure of the input matrix should be used or the matrix
+     * entries should be copied, too.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    template <typename number>
+    void reinit (const IndexSet                       &parallel_partitioning,
+                 const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                 const MPI_Comm                       &communicator = MPI_COMM_WORLD,
+                 const double                          drop_tolerance=1e-13,
+                 const bool                            copy_values=true,
+                 const ::dealii::SparsityPattern      *use_this_sparsity=0);
+
+    /**
+     * This function is similar to the other initialization function with
+     * deal.II sparse matrix input above, but now takes index sets for both
+     * the rows and the columns of the matrix. Chosen for rectangular
+     * matrices.
+     *
+     * The optional parameter <tt>copy_values</tt> decides whether only the
+     * sparsity structure of the input matrix should be used or the matrix
+     * entries should be copied, too.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    template <typename number>
+    void reinit (const IndexSet                        &row_parallel_partitioning,
+                 const IndexSet                        &col_parallel_partitioning,
+                 const ::dealii::SparseMatrix<number>  &dealii_sparse_matrix,
+                 const MPI_Comm                        &communicator = MPI_COMM_WORLD,
+                 const double                           drop_tolerance=1e-13,
+                 const bool                             copy_values=true,
+                 const ::dealii::SparsityPattern      *use_this_sparsity=0);
+//@}
+    /**
+     * @name Information on the matrix
+     */
+//@{
+
+    /**
+     * Return the number of rows in this matrix.
+     */
+    size_type m () const;
+
+    /**
+     * Return the number of columns in this matrix.
+     */
+    size_type n () const;
+
+    /**
+     * Return the local dimension of the matrix, i.e. the number of rows
+     * stored on the present MPI process. For sequential matrices, this number
+     * is the same as m(), but for %parallel matrices it may be smaller.
+     *
+     * To figure out which elements exactly are stored locally, use
+     * local_range().
+     */
+    unsigned int local_size () const;
+
+    /**
+     * Return a pair of indices indicating which rows of this matrix are
+     * stored locally. The first number is the index of the first row stored,
+     * the second the index of the one past the last one that is stored
+     * locally. If this is a sequential matrix, then the result will be the
+     * pair (0,m()), otherwise it will be a pair (i,i+n), where
+     * <tt>n=local_size()</tt>.
+     */
+    std::pair<size_type, size_type>
+    local_range () const;
+
+    /**
+     * Return whether @p index is in the local range or not, see also
+     * local_range().
+     */
+    bool in_local_range (const size_type index) const;
+
+    /**
+     * Return the number of nonzero elements of this matrix.
+     */
+    size_type n_nonzero_elements () const;
+
+    /**
+     * Number of entries in a specific row.
+     */
+    unsigned int row_length (const size_type row) const;
+
+    /**
+     * Returns the state of the matrix, i.e., whether compress() needs to be
+     * called after an operation requiring data exchange. A call to compress()
+     * is also needed when the method set() has been called (even when working
+     * in serial).
+     */
+    bool is_compressed () const;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object. Note that only the memory reserved on the current processor is
+     * returned in case this is called in an MPI-based program.
+     */
+    size_type memory_consumption () const;
+
+    /**
+     * Return the MPI communicator object in use with this matrix.
+     */
+    MPI_Comm get_mpi_communicator () const;
+
+//@}
+    /**
+     * @name Modifying entries
+     */
+//@{
+
+    /**
+     * This operator assigns a scalar to a matrix. Since this does usually not
+     * make much sense (should we set all matrix entries to this value?  Only
+     * the nonzero entries of the sparsity pattern?), this operation is only
+     * allowed if the actual value to be assigned is zero. This operator only
+     * exists to allow for the obvious notation <tt>matrix=0</tt>, which sets
+     * all elements of the matrix to zero, but keeps the sparsity pattern
+     * previously used.
+     */
+    SparseMatrix &
+    operator = (const double d);
+
+    /**
+     * Release all memory and return to a state just like after having called
+     * the default constructor.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    void clear ();
+
+    /**
+     * This command does two things:
+     * <ul>
+     * <li> If the matrix was initialized without a sparsity pattern, elements
+     * have been added manually using the set() command. When this process is
+     * completed, a call to compress() reorganizes the internal data
+     * structures (sparsity pattern) so that a fast access to data is possible
+     * in matrix-vector products.
+     * <li> If the matrix structure has already been fixed (either by
+     * initialization with a sparsity pattern or by calling compress() during
+     * the setup phase), this command does the %parallel exchange of data.
+     * This is necessary when we perform assembly on more than one (MPI)
+     * process, because then some non-local row data will accumulate on nodes
+     * that belong to the current's processor element, but are actually held
+     * by another. This command is usually called after all elements have been
+     * traversed.
+     * </ul>
+     *
+     * In both cases, this function compresses the data structures and allows
+     * the resulting matrix to be used in all other operations like matrix-
+     * vector products. This is a collective operation, i.e., it needs to be
+     * run on all processors when used in %parallel.
+     *
+     * See
+     * @ref GlossCompress "Compressing distributed objects"
+     * for more information.
+     */
+    void compress (::dealii::VectorOperation::values operation);
+
+    /**
+     * Set the element (<i>i,j</i>) to @p value.
+     *
+     * This function is able to insert new elements into the matrix as long as
+     * compress() has not been called, so the sparsity pattern will be
+     * extended. When compress() is called for the first time (or in case the
+     * matrix is initialized from a sparsity pattern), no new elements can be
+     * added and an insertion of elements at positions which have not been
+     * initialized will throw an exception.
+     *
+     * For the case that the matrix is constructed without a sparsity pattern
+     * and new matrix entries are added on demand, please note the following
+     * behavior imposed by the underlying Epetra_FECrsMatrix data structure:
+     * If the same matrix entry is inserted more than once, the matrix entries
+     * will be added upon calling compress() (since Epetra does not track
+     * values to the same entry before the final compress() is called), even
+     * if VectorOperation::insert is specified as argument to compress(). In
+     * the case you cannot make sure that matrix entries are only set once,
+     * initialize the matrix with a sparsity pattern to fix the matrix
+     * structure before inserting elements.
+     */
+    void set (const size_type i,
+              const size_type j,
+              const TrilinosScalar value);
+
+    /**
+     * Set all elements given in a FullMatrix<double> into the sparse matrix
+     * locations given by <tt>indices</tt>. In other words, this function
+     * writes the elements in <tt>full_matrix</tt> into the calling matrix,
+     * using the local-to-global indexing specified by <tt>indices</tt> for
+     * both the rows and the columns of the matrix. This function assumes a
+     * quadratic sparse matrix and a quadratic full_matrix, the usual
+     * situation in FE calculations.
+     *
+     * This function is able to insert new elements into the matrix as long as
+     * compress() has not been called, so the sparsity pattern will be
+     * extended. After compress() has been called for the first time or the
+     * matrix has been initialized from a sparsity pattern, extending the
+     * sparsity pattern is no longer possible and an insertion of elements at
+     * positions which have not been initialized will throw an exception.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be inserted anyway or they should be
+     * filtered away. The default value is <tt>false</tt>, i.e., even zero
+     * values are inserted/replaced.
+     *
+     * For the case that the matrix is constructed without a sparsity pattern
+     * and new matrix entries are added on demand, please note the following
+     * behavior imposed by the underlying Epetra_FECrsMatrix data structure:
+     * If the same matrix entry is inserted more than once, the matrix entries
+     * will be added upon calling compress() (since Epetra does not track
+     * values to the same entry before the final compress() is called), even
+     * if VectorOperation::insert is specified as argument to compress(). In
+     * the case you cannot make sure that matrix entries are only set once,
+     * initialize the matrix with a sparsity pattern to fix the matrix
+     * structure before inserting elements.
+     */
+    void set (const std::vector<size_type>     &indices,
+              const FullMatrix<TrilinosScalar> &full_matrix,
+              const bool                        elide_zero_values = false);
+
+    /**
+     * Same function as before, but now including the possibility to use
+     * rectangular full_matrices and different local-to-global indexing on
+     * rows and columns, respectively.
+     */
+    void set (const std::vector<size_type>     &row_indices,
+              const std::vector<size_type>     &col_indices,
+              const FullMatrix<TrilinosScalar> &full_matrix,
+              const bool                        elide_zero_values = false);
+
+    /**
+     * Set several elements in the specified row of the matrix with column
+     * indices as given by <tt>col_indices</tt> to the respective value.
+     *
+     * This function is able to insert new elements into the matrix as long as
+     * compress() has not been called, so the sparsity pattern will be
+     * extended. After compress() has been called for the first time or the
+     * matrix has been initialized from a sparsity pattern, extending the
+     * sparsity pattern is no longer possible and an insertion of elements at
+     * positions which have not been initialized will throw an exception.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be inserted anyway or they should be
+     * filtered away. The default value is <tt>false</tt>, i.e., even zero
+     * values are inserted/replaced.
+     *
+     * For the case that the matrix is constructed without a sparsity pattern
+     * and new matrix entries are added on demand, please note the following
+     * behavior imposed by the underlying Epetra_FECrsMatrix data structure:
+     * If the same matrix entry is inserted more than once, the matrix entries
+     * will be added upon calling compress() (since Epetra does not track
+     * values to the same entry before the final compress() is called), even
+     * if VectorOperation::insert is specified as argument to compress(). In
+     * the case you cannot make sure that matrix entries are only set once,
+     * initialize the matrix with a sparsity pattern to fix the matrix
+     * structure before inserting elements.
+     */
+    void set (const size_type                    row,
+              const std::vector<size_type>      &col_indices,
+              const std::vector<TrilinosScalar> &values,
+              const bool                         elide_zero_values = false);
+
+    /**
+     * Set several elements to values given by <tt>values</tt> in a given row
+     * in columns given by col_indices into the sparse matrix.
+     *
+     * This function is able to insert new elements into the matrix as long as
+     * compress() has not been called, so the sparsity pattern will be
+     * extended. After compress() has been called for the first time or the
+     * matrix has been initialized from a sparsity pattern, extending the
+     * sparsity pattern is no longer possible and an insertion of elements at
+     * positions which have not been initialized will throw an exception.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be inserted anyway or they should be
+     * filtered away. The default value is <tt>false</tt>, i.e., even zero
+     * values are inserted/replaced.
+     *
+     * For the case that the matrix is constructed without a sparsity pattern
+     * and new matrix entries are added on demand, please note the following
+     * behavior imposed by the underlying Epetra_FECrsMatrix data structure:
+     * If the same matrix entry is inserted more than once, the matrix entries
+     * will be added upon calling compress() (since Epetra does not track
+     * values to the same entry before the final compress() is called), even
+     * if VectorOperation::insert is specified as argument to compress(). In
+     * the case you cannot make sure that matrix entries are only set once,
+     * initialize the matrix with a sparsity pattern to fix the matrix
+     * structure before inserting elements.
+     */
+    void set (const size_type       row,
+              const size_type       n_cols,
+              const size_type      *col_indices,
+              const TrilinosScalar *values,
+              const bool            elide_zero_values = false);
+
+    /**
+     * Add @p value to the element (<i>i,j</i>).
+     *
+     * Just as the respective call in deal.II SparseMatrix<Number> class (but
+     * in contrast to the situation for PETSc based matrices), this function
+     * throws an exception if an entry does not exist in the sparsity pattern.
+     * Moreover, if <tt>value</tt> is not a finite number an exception is
+     * thrown.
+     */
+    void add (const size_type      i,
+              const size_type      j,
+              const TrilinosScalar value);
+
+    /**
+     * Add all elements given in a FullMatrix<double> into sparse matrix
+     * locations given by <tt>indices</tt>. In other words, this function adds
+     * the elements in <tt>full_matrix</tt> to the respective entries in
+     * calling matrix, using the local-to-global indexing specified by
+     * <tt>indices</tt> for both the rows and the columns of the matrix. This
+     * function assumes a quadratic sparse matrix and a quadratic full_matrix,
+     * the usual situation in FE calculations.
+     *
+     * Just as the respective call in deal.II SparseMatrix<Number> class (but
+     * in contrast to the situation for PETSc based matrices), this function
+     * throws an exception if an entry does not exist in the sparsity pattern.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be added anyway or these should be
+     * filtered away and only non-zero data is added. The default value is
+     * <tt>true</tt>, i.e., zero values won't be added into the matrix.
+     */
+    void add (const std::vector<size_type>  &indices,
+              const FullMatrix<TrilinosScalar> &full_matrix,
+              const bool                        elide_zero_values = true);
+
+    /**
+     * Same function as before, but now including the possibility to use
+     * rectangular full_matrices and different local-to-global indexing on
+     * rows and columns, respectively.
+     */
+    void add (const std::vector<size_type>     &row_indices,
+              const std::vector<size_type>     &col_indices,
+              const FullMatrix<TrilinosScalar> &full_matrix,
+              const bool                        elide_zero_values = true);
+
+    /**
+     * Set several elements in the specified row of the matrix with column
+     * indices as given by <tt>col_indices</tt> to the respective value.
+     *
+     * Just as the respective call in deal.II SparseMatrix<Number> class (but
+     * in contrast to the situation for PETSc based matrices), this function
+     * throws an exception if an entry does not exist in the sparsity pattern.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be added anyway or these should be
+     * filtered away and only non-zero data is added. The default value is
+     * <tt>true</tt>, i.e., zero values won't be added into the matrix.
+     */
+    void add (const size_type                    row,
+              const std::vector<size_type>      &col_indices,
+              const std::vector<TrilinosScalar> &values,
+              const bool                         elide_zero_values = true);
+
+    /**
+     * Add an array of values given by <tt>values</tt> in the given global
+     * matrix row at columns specified by col_indices in the sparse matrix.
+     *
+     * Just as the respective call in deal.II SparseMatrix<Number> class (but
+     * in contrast to the situation for PETSc based matrices), this function
+     * throws an exception if an entry does not exist in the sparsity pattern.
+     *
+     * The optional parameter <tt>elide_zero_values</tt> can be used to
+     * specify whether zero values should be added anyway or these should be
+     * filtered away and only non-zero data is added. The default value is
+     * <tt>true</tt>, i.e., zero values won't be added into the matrix.
+     */
+    void add (const size_type       row,
+              const size_type       n_cols,
+              const size_type      *col_indices,
+              const TrilinosScalar *values,
+              const bool            elide_zero_values = true,
+              const bool            col_indices_are_sorted = false);
+
+    /**
+     * Multiply the entire matrix by a fixed factor.
+     */
+    SparseMatrix &operator *= (const TrilinosScalar factor);
+
+    /**
+     * Divide the entire matrix by a fixed factor.
+     */
+    SparseMatrix &operator /= (const TrilinosScalar factor);
+
+    /**
+     * Copy the given (Trilinos) matrix (sparsity pattern and entries).
+     */
+    void copy_from (const SparseMatrix &source);
+
+    /**
+     * Add <tt>matrix</tt> scaled by <tt>factor</tt> to this matrix, i.e. the
+     * matrix <tt>factor*matrix</tt> is added to <tt>this</tt>. If the
+     * sparsity pattern of the calling matrix does not contain all the
+     * elements in the sparsity pattern of the input matrix, this function
+     * will throw an exception.
+     */
+    void add (const TrilinosScalar  factor,
+              const SparseMatrix   &matrix);
+
+    /**
+     * Remove all elements from this <tt>row</tt> by setting them to zero. The
+     * function does not modify the number of allocated nonzero entries, it
+     * only sets the entries to zero.
+     *
+     * This operation is used in eliminating constraints (e.g. due to hanging
+     * nodes) and makes sure that we can write this modification to the matrix
+     * without having to read entries (such as the locations of non-zero
+     * elements) from it — without this operation, removing constraints
+     * on %parallel matrices is a rather complicated procedure.
+     *
+     * The second parameter can be used to set the diagonal entry of this row
+     * to a value different from zero. The default is to set it to zero.
+     *
+     * @note If the matrix is stored in parallel across multiple processors
+     * using MPI, this function only touches rows that are locally stored and
+     * simply ignores all other row indices. Further, in the context of
+     * parallel computations, you will get into trouble if you clear a row
+     * while other processors still have pending writes or additions into the
+     * same row. In other words, if another processor still wants to add
+     * something to an element of a row and you call this function to zero out
+     * the row, then the next time you call compress() may add the remote
+     * value to the zero you just created. Consequently, you will want to call
+     * compress() after you made the last modifications to a matrix and before
+     * starting to clear rows.
+     */
+    void clear_row (const size_type      row,
+                    const TrilinosScalar new_diag_value = 0);
+
+    /**
+     * Same as clear_row(), except that it works on a number of rows at once.
+     *
+     * The second parameter can be used to set the diagonal entries of all
+     * cleared rows to something different from zero. Note that all of these
+     * diagonal entries get the same value -- if you want different values for
+     * the diagonal entries, you have to set them by hand.
+     *
+     * @note If the matrix is stored in parallel across multiple processors
+     * using MPI, this function only touches rows that are locally stored and
+     * simply ignores all other row indices. Further, in the context of
+     * parallel computations, you will get into trouble if you clear a row
+     * while other processors still have pending writes or additions into the
+     * same row. In other words, if another processor still wants to add
+     * something to an element of a row and you call this function to zero out
+     * the row, then the next time you call compress() may add the remote
+     * value to the zero you just created. Consequently, you will want to call
+     * compress() after you made the last modifications to a matrix and before
+     * starting to clear rows.
+     */
+    void clear_rows (const std::vector<size_type> &rows,
+                     const TrilinosScalar          new_diag_value = 0);
+
+    /**
+     * Sets an internal flag so that all operations performed by the matrix,
+     * i.e., multiplications, are done in transposed order. However, this does
+     * not reshape the matrix to transposed form directly, so care should be
+     * taken when using this flag.
+     */
+    void transpose ();
+
+//@}
+    /**
+     * @name Entry Access
+     */
+//@{
+
+    /**
+     * Return the value of the entry (<i>i,j</i>).  This may be an expensive
+     * operation and you should always take care where to call this function.
+     * As in the deal.II sparse matrix class, we throw an exception if the
+     * respective entry doesn't exist in the sparsity pattern of this class,
+     * which is requested from Trilinos. Moreover, an exception will be thrown
+     * when the requested element is not saved on the calling process.
+     */
+    TrilinosScalar operator () (const size_type i,
+                                const size_type j) const;
+
+    /**
+     * Return the value of the matrix entry (<i>i,j</i>). If this entry does
+     * not exist in the sparsity pattern, then zero is returned. While this
+     * may be convenient in some cases, note that it is simple to write
+     * algorithms that are slow compared to an optimal solution, since the
+     * sparsity of the matrix is not used.  On the other hand, if you want to
+     * be sure the entry exists, you should use operator() instead.
+     *
+     * The lack of error checking in this function can also yield surprising
+     * results if you have a parallel matrix. In that case, just because you
+     * get a zero result from this function does not mean that either the
+     * entry does not exist in the sparsity pattern or that it does but has a
+     * value of zero. Rather, it could also be that it simply isn't stored on
+     * the current processor; in that case, it may be stored on a different
+     * processor, and possibly so with a nonzero value.
+     */
+    TrilinosScalar el (const size_type i,
+                       const size_type j) const;
+
+    /**
+     * Return the main diagonal element in the <i>i</i>th row. This function
+     * throws an error if the matrix is not quadratic and it also throws an
+     * error if <i>(i,i)</i> is not element of the local matrix.  See also the
+     * comment in trilinos_sparse_matrix.cc.
+     */
+    TrilinosScalar diag_element (const size_type i) const;
+
+//@}
+    /**
+     * @name Multiplications
+     */
+//@{
+
+    /**
+     * Matrix-vector multiplication: let <i>dst = M*src</i> with <i>M</i>
+     * being this matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * This function can be called with several different vector objects,
+     * namely TrilinosWrappers::Vector, TrilinosWrappers::MPI::Vector as well
+     * as deal.II's own vector classes Vector<double> and
+     * parallel::distributed::Vector<double>.
+     *
+     * Note that both vectors have to be distributed vectors generated using
+     * the same Map as was used for the matrix in case you work on a
+     * distributed memory architecture, using the interface in the
+     * TrilinosWrappers::VectorBase class (or one of the two derived classes
+     * Vector and MPI::Vector).
+     *
+     * In case of a localized Vector, this function will only work when
+     * running on one processor, since the matrix object is inherently
+     * distributed. Otherwise, and exception will be thrown.
+     */
+    template<typename VectorType>
+    void vmult (VectorType       &dst,
+                const VectorType &src) const;
+
+    /**
+     * Matrix-vector multiplication: let <i>dst = M<sup>T</sup>*src</i> with
+     * <i>M</i> being this matrix. This function does the same as vmult() but
+     * takes the transposed matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * This function can be called with several different vector objects,
+     * namely TrilinosWrappers::Vector, TrilinosWrappers::MPI::Vector as well
+     * as deal.II's own vector classes Vector<double> and
+     * parallel::distributed::Vector<double>.
+     *
+     * Note that both vectors have to be distributed vectors generated using
+     * the same Map as was used for the matrix in case you work on a
+     * distributed memory architecture, using the interface in the
+     * TrilinosWrappers::VectorBase class (or one of the two derived classes
+     * Vector and MPI::Vector).
+     *
+     * In case of a localized Vector, this function will only work when
+     * running on one processor, since the matrix object is inherently
+     * distributed. Otherwise, and exception will be thrown.
+     */
+    template <typename VectorType>
+    void Tvmult (VectorType       &dst,
+                 const VectorType &src) const;
+
+    /**
+     * Adding matrix-vector multiplication. Add <i>M*src</i> on <i>dst</i>
+     * with <i>M</i> being this matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * This function can be called with several different vector objects,
+     * namely TrilinosWrappers::Vector, TrilinosWrappers::MPI::Vector as well
+     * as deal.II's own vector classes Vector<double> and
+     * parallel::distributed::Vector<double>.
+     *
+     * When using a vector of type TrilinosWrappers::MPI::Vector, both vectors
+     * have to be distributed vectors generated using the same Map as was used
+     * for the matrix rows and columns in case you work on a distributed
+     * memory architecture, using the interface in the
+     * TrilinosWrappers::VectorBase class.
+     *
+     * In case of a localized Vector (i.e., TrilinosWrappers::Vector or
+     * Vector<double>), this function will only work when running on one
+     * processor, since the matrix object is inherently distributed.
+     * Otherwise, and exception will be thrown.
+     *
+     */
+    template<typename VectorType>
+    void vmult_add (VectorType       &dst,
+                    const VectorType &src) const;
+
+    /**
+     * Adding matrix-vector multiplication. Add <i>M<sup>T</sup>*src</i> to
+     * <i>dst</i> with <i>M</i> being this matrix. This function does the same
+     * as vmult_add() but takes the transposed matrix.
+     *
+     * Source and destination must not be the same vector.
+     *
+     * This function can be called with several different vector objects,
+     * namely TrilinosWrappers::Vector, TrilinosWrappers::MPI::Vector as well
+     * as deal.II's own vector classes Vector<double> and
+     * parallel::distributed::Vector<double>.
+     *
+     * When using a vector of type TrilinosWrappers::MPI::Vector, both vectors
+     * have to be distributed vectors generated using the same Map as was used
+     * for the matrix rows and columns in case you work on a distributed
+     * memory architecture, using the interface in the
+     * TrilinosWrappers::VectorBase class.
+     *
+     * In case of a localized Vector (i.e., TrilinosWrappers::Vector or
+     * Vector<double>), this function will only work when running on one
+     * processor, since the matrix object is inherently distributed.
+     * Otherwise, and exception will be thrown.
+     */
+    template <typename VectorType>
+    void Tvmult_add (VectorType       &dst,
+                     const VectorType &src) const;
+
+    /**
+     * Return the square of the norm of the vector $v$ with respect to the
+     * norm induced by this matrix, i.e., $\left(v,Mv\right)$. This is useful,
+     * e.g. in the finite element context, where the $L_2$ norm of a function
+     * equals the matrix norm with respect to the mass matrix of the vector
+     * representing the nodal values of the finite element function.
+     *
+     * Obviously, the matrix needs to be quadratic for this operation.
+     *
+     * The implementation of this function is not as efficient as the one in
+     * the @p SparseMatrix class used in deal.II (i.e. the original one, not
+     * the Trilinos wrapper class) since Trilinos doesn't support this
+     * operation and needs a temporary vector.
+     *
+     * Note that both vectors have to be distributed vectors generated using
+     * the same Map as was used for the matrix in case you work on a
+     * distributed memory architecture, using the interface in the
+     * TrilinosWrappers::VectorBase class (or one of the two derived classes
+     * Vector and MPI::Vector).
+     *
+     * In case of a localized Vector, this function will only work when
+     * running on one processor, since the matrix object is inherently
+     * distributed. Otherwise, and exception will be thrown.
+     */
+    TrilinosScalar matrix_norm_square (const VectorBase &v) const;
+
+    /**
+     * Compute the matrix scalar product $\left(u,Mv\right)$.
+     *
+     * The implementation of this function is not as efficient as the one in
+     * the @p SparseMatrix class used in deal.II (i.e. the original one, not
+     * the Trilinos wrapper class) since Trilinos doesn't support this
+     * operation and needs a temporary vector.
+     *
+     * Note that both vectors have to be distributed vectors generated using
+     * the same Map as was used for the matrix in case you work on a
+     * distributed memory architecture, using the interface in the
+     * TrilinosWrappers::VectorBase class (or one of the two derived classes
+     * Vector and MPI::Vector).
+     *
+     * In case of a localized Vector, this function will only work when
+     * running on one processor, since the matrix object is inherently
+     * distributed. Otherwise, and exception will be thrown.
+     */
+    TrilinosScalar matrix_scalar_product (const VectorBase &u,
+                                          const VectorBase &v) const;
+
+    /**
+     * Compute the residual of an equation <i>Mx=b</i>, where the residual is
+     * defined to be <i>r=b-Mx</i>. Write the residual into @p dst. The
+     * <i>l<sub>2</sub></i> norm of the residual vector is returned.
+     *
+     * Source <i>x</i> and destination <i>dst</i> must not be the same vector.
+     *
+     * Note that both vectors have to be distributed vectors generated using
+     * the same Map as was used for the matrix in case you work on a
+     * distributed memory architecture, using the interface in the
+     * TrilinosWrappers::VectorBase class (or one of the two derived classes
+     * Vector and MPI::Vector).
+     *
+     * In case of a localized Vector, this function will only work when
+     * running on one processor, since the matrix object is inherently
+     * distributed. Otherwise, and exception will be thrown.
+     */
+    TrilinosScalar residual (VectorBase       &dst,
+                             const VectorBase &x,
+                             const VectorBase &b) const;
+
+    /**
+     * Perform the matrix-matrix multiplication <tt>C = A * B</tt>, or, if an
+     * optional vector argument is given, <tt>C = A * diag(V) * B</tt>, where
+     * <tt>diag(V)</tt> defines a diagonal matrix with the vector entries.
+     *
+     * This function assumes that the calling matrix <tt>A</tt> and <tt>B</tt>
+     * have compatible sizes. The size of <tt>C</tt> will be set within this
+     * function.
+     *
+     * The content as well as the sparsity pattern of the matrix C will be
+     * changed by this function, so make sure that the sparsity pattern is not
+     * used somewhere else in your program. This is an expensive operation, so
+     * think twice before you use this function.
+     */
+    void mmult (SparseMatrix       &C,
+                const SparseMatrix &B,
+                const VectorBase   &V = VectorBase()) const;
+
+
+    /**
+     * Perform the matrix-matrix multiplication with the transpose of
+     * <tt>this</tt>, i.e., <tt>C = A<sup>T</sup> * B</tt>, or, if an optional
+     * vector argument is given, <tt>C = A<sup>T</sup> * diag(V) * B</tt>,
+     * where <tt>diag(V)</tt> defines a diagonal matrix with the vector
+     * entries.
+     *
+     * This function assumes that the calling matrix <tt>A</tt> and <tt>B</tt>
+     * have compatible sizes. The size of <tt>C</tt> will be set within this
+     * function.
+     *
+     * The content as well as the sparsity pattern of the matrix C will be
+     * changed by this function, so make sure that the sparsity pattern is not
+     * used somewhere else in your program. This is an expensive operation, so
+     * think twice before you use this function.
+     */
+    void Tmmult (SparseMatrix       &C,
+                 const SparseMatrix &B,
+                 const VectorBase   &V = VectorBase()) const;
+
+//@}
+    /**
+     * @name Matrix norms
+     */
+//@{
+
+    /**
+     * Return the <i>l</i><sub>1</sub>-norm of the matrix, that is $|M|_1=
+     * \max_{\mathrm{all\ columns\ } j} \sum_{\mathrm{all\ rows\ } i}
+     * |M_{ij}|$, (max. sum of columns).  This is the natural matrix norm that
+     * is compatible to the l1-norm for vectors, i.e.  $|Mv|_1 \leq |M|_1
+     * |v|_1$.  (cf. Haemmerlin-Hoffmann: Numerische Mathematik)
+     */
+    TrilinosScalar l1_norm () const;
+
+    /**
+     * Return the linfty-norm of the matrix, that is
+     * $|M|_\infty=\max_{\mathrm{all\ rows\ } i}\sum_{\mathrm{all\ columns\ }
+     * j} |M_{ij}|$, (max. sum of rows).  This is the natural matrix norm that
+     * is compatible to the linfty-norm of vectors, i.e.  $|Mv|_\infty \leq
+     * |M|_\infty |v|_\infty$.  (cf. Haemmerlin-Hoffmann: Numerische
+     * Mathematik)
+     */
+    TrilinosScalar linfty_norm () const;
+
+    /**
+     * Return the frobenius norm of the matrix, i.e. the square root of the
+     * sum of squares of all entries in the matrix.
+     */
+    TrilinosScalar frobenius_norm () const;
+
+//@}
+    /**
+     * @name Access to underlying Trilinos data
+     */
+//@{
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_CrsMatrix
+     * data.
+     */
+    const Epetra_CrsMatrix &trilinos_matrix () const;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_CrsGraph
+     * data that stores the sparsity pattern of the matrix.
+     */
+    const Epetra_CrsGraph &trilinos_sparsity_pattern () const;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the partitioning of the domain space of this matrix, i.e., the
+     * partitioning of the vectors this matrix has to be multiplied with.
+     *
+     * @deprecated Use locally_owned_domain_indices() instead.
+     */
+    const Epetra_Map &domain_partitioner ()  const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the partitioning of the range space of this matrix, i.e., the
+     * partitioning of the vectors that are result from matrix-vector
+     * products.
+     *
+     * @deprecated Use locally_owned_range_indices() instead.
+     */
+    const Epetra_Map &range_partitioner () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the partitioning of the matrix rows. Equal to the partitioning of
+     * the range.
+     *
+     * @deprecated Use locally_owned_range_indices() instead.
+     */
+    const Epetra_Map &row_partitioner () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the partitioning of the matrix columns. This is in general not
+     * equal to the partitioner Epetra_Map for the domain because of overlap
+     * in the matrix.
+     *
+     * @deprecated Usually not necessary. If desired, access it via the
+     * Epetra_CrsMatrix.
+     */
+    const Epetra_Map &col_partitioner () const DEAL_II_DEPRECATED;
+//@}
+
+    /**
+     * @name Partitioners
+     */
+//@{
+
+    /**
+     * Return the partitioning of the domain space of this matrix, i.e., the
+     * partitioning of the vectors this matrix has to be multiplied with.
+     */
+    IndexSet locally_owned_domain_indices() const;
+
+    /**
+     * Return the partitioning of the range space of this matrix, i.e., the
+     * partitioning of the vectors that are result from matrix-vector
+     * products.
+     */
+    IndexSet locally_owned_range_indices() const;
+
+//@}
+
+    /**
+     * @name Iterators
+     */
+//@{
+
+    /**
+     * Return an iterator pointing to the first element of the matrix.
+     *
+     * The elements accessed by iterators within each row are ordered in the
+     * way in which Trilinos stores them, though the implementation guarantees
+     * that all elements of one row are accessed before the elements of the
+     * next row. If your algorithm relies on visiting elements within one row,
+     * you will need to consult with the Trilinos documentation on the order
+     * in which it stores data. It is, however, generally not a good and long-
+     * term stable idea to rely on the order in which receive elements if you
+     * iterate over them.
+     *
+     * When you iterate over the elements of a parallel matrix, you will only
+     * be able to access the locally owned rows. (You can access the other
+     * rows as well, but they will look empty.) In that case, you probably
+     * want to call the begin() function that takes the row as an argument to
+     * limit the range of elements to loop over.
+     */
+    const_iterator begin () const;
+
+    /**
+     * Like the function above, but for non-const matrices.
+     */
+    iterator begin ();
+
+    /**
+     * Return an iterator pointing the element past the last one of this
+     * matrix.
+     */
+    const_iterator end () const;
+
+    /**
+     * Like the function above, but for non-const matrices.
+     */
+    iterator end ();
+
+    /**
+     * Return an iterator pointing to the first element of row @p r.
+     *
+     * Note that if the given row is empty, i.e. does not contain any nonzero
+     * entries, then the iterator returned by this function equals
+     * <tt>end(r)</tt>. The returned iterator may not be dereferencable in
+     * that case if neither row @p r nor any of the following rows contain any
+     * nonzero entries.
+     *
+     * The elements accessed by iterators within each row are ordered in the
+     * way in which Trilinos stores them, though the implementation guarantees
+     * that all elements of one row are accessed before the elements of the
+     * next row. If your algorithm relies on visiting elements within one row,
+     * you will need to consult with the Trilinos documentation on the order
+     * in which it stores data. It is, however, generally not a good and long-
+     * term stable idea to rely on the order in which receive elements if you
+     * iterate over them.
+     *
+     * @note When you access the elements of a parallel matrix, you can only
+     * access the elements of rows that are actually stored locally. (You can
+     * access the other rows as well, but they will look empty.) Even then, if
+     * another processor has since written into, or added to, an element of
+     * the matrix that is stored on the current processor, then you will still
+     * see the old value of this entry unless you have called compress()
+     * between modifying the matrix element on the remote processor and
+     * accessing it on the current processor. See the documentation of the
+     * compress() function for more information.
+     */
+    const_iterator begin (const size_type r) const;
+
+    /**
+     * Like the function above, but for non-const matrices.
+     */
+    iterator begin (const size_type r);
+
+    /**
+     * Return an iterator pointing the element past the last one of row @p r ,
+     * or past the end of the entire sparsity pattern if none of the rows
+     * after @p r contain any entries at all.
+     *
+     * Note that the end iterator is not necessarily dereferencable. This is
+     * in particular the case if it is the end iterator for the last row of a
+     * matrix.
+     */
+    const_iterator end (const size_type r) const;
+
+    /**
+     * Like the function above, but for non-const matrices.
+     */
+    iterator end (const size_type r);
+
+//@}
+    /**
+     * @name Input/Output
+     */
+//@{
+
+    /**
+     * Abstract Trilinos object that helps view in ASCII other Trilinos
+     * objects. Currently this function is not implemented.  TODO: Not
+     * implemented.
+     */
+    void write_ascii ();
+
+    /**
+     * Print the matrix to the given stream, using the format <tt>(line,col)
+     * value</tt>, i.e. one nonzero entry of the matrix per line. The optional
+     * flag outputs the sparsity pattern in Trilinos style, where the data is
+     * sorted according to the processor number when printed to the stream, as
+     * well as a summary of the matrix like the global size.
+     */
+    void print (std::ostream &out,
+                const bool    write_extended_trilinos_info = false) const;
+
+//@}
+    /**
+     * @addtogroup Exceptions
+     *
+     */
+//@{
+    /**
+     * Exception
+     */
+    DeclException1 (ExcTrilinosError,
+                    int,
+                    << "An error with error number " << arg1
+                    << " occurred while calling a Trilinos function");
+
+    /**
+     * Exception
+     */
+    DeclException2 (ExcInvalidIndex,
+                    size_type, size_type,
+                    << "The entry with index <" << arg1 << ',' << arg2
+                    << "> does not exist.");
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcSourceEqualsDestination);
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcMatrixNotCompressed);
+
+    /**
+     * Exception
+     */
+    DeclException4 (ExcAccessToNonLocalElement,
+                    size_type, size_type, size_type, size_type,
+                    << "You tried to access element (" << arg1
+                    << "/" << arg2 << ")"
+                    << " of a distributed matrix, but only rows "
+                    << arg3 << " through " << arg4
+                    << " are stored locally and can be accessed.");
+
+    /**
+     * Exception
+     */
+    DeclException2 (ExcAccessToNonPresentElement,
+                    size_type, size_type,
+                    << "You tried to access element (" << arg1
+                    << "/" << arg2 << ")"
+                    << " of a sparse matrix, but it appears to not"
+                    << " exist in the Trilinos sparsity pattern.");
+//@}
+
+
+
+  protected:
+
+    /**
+     * For some matrix storage formats, in particular for the PETSc
+     * distributed blockmatrices, set and add operations on individual
+     * elements can not be freely mixed. Rather, one has to synchronize
+     * operations when one wants to switch from setting elements to adding to
+     * elements.  BlockMatrixBase automatically synchronizes the access by
+     * calling this helper function for each block.  This function ensures
+     * that the matrix is in a state that allows adding elements; if it
+     * previously already was in this state, the function does nothing.
+     */
+    void prepare_add();
+
+    /**
+     * Same as prepare_add() but prepare the matrix for setting elements if
+     * the representation of elements in this class requires such an
+     * operation.
+     */
+    void prepare_set();
+
+
+
+  private:
+    /**
+     * Copy constructor is disabled.
+     */
+    SparseMatrix (const SparseMatrix &);
+    /**
+     * operator= is disabled.
+     */
+    SparseMatrix &operator = (const SparseMatrix &);
+
+    /**
+     * Pointer to the user-supplied Epetra Trilinos mapping of the matrix
+     * columns that assigns parts of the matrix to the individual processes.
+     */
+    std_cxx11::shared_ptr<Epetra_Map> column_space_map;
+
+    /**
+     * A sparse matrix object in Trilinos to be used for finite element based
+     * problems which allows for assembling into non-local elements.  The
+     * actual type, a sparse matrix, is set in the constructor.
+     */
+    std_cxx11::shared_ptr<Epetra_FECrsMatrix> matrix;
+
+    /**
+     * A sparse matrix object in Trilinos to be used for collecting the non-
+     * local elements if the matrix was constructed from a Trilinos sparsity
+     * pattern with the respective option.
+     */
+    std_cxx11::shared_ptr<Epetra_CrsMatrix> nonlocal_matrix;
+
+    /**
+     * An export object used to communicate the nonlocal matrix.
+     */
+    std_cxx11::shared_ptr<Epetra_Export>    nonlocal_matrix_exporter;
+
+    /**
+     * Trilinos doesn't allow to mix additions to matrix entries and
+     * overwriting them (to make synchronisation of %parallel computations
+     * simpler). The way we do it is to, for each access operation, store
+     * whether it is an insertion or an addition. If the previous one was of
+     * different type, then we first have to flush the Trilinos buffers;
+     * otherwise, we can simply go on. Luckily, Trilinos has an object for
+     * this which does already all the %parallel communications in such a
+     * case, so we simply use their model, which stores whether the last
+     * operation was an addition or an insertion.
+     */
+    Epetra_CombineMode last_action;
+
+    /**
+     * A boolean variable to hold information on whether the vector is
+     * compressed or not.
+     */
+    bool compressed;
+
+    /**
+     * To allow calling protected prepare_add() and prepare_set().
+     */
+    friend class BlockMatrixBase<SparseMatrix>;
+  };
+
+
+
+// -------------------------- inline and template functions ----------------------
+
+
+#ifndef DOXYGEN
+
+  namespace SparseMatrixIterators
+  {
+    inline
+    AccessorBase::AccessorBase(SparseMatrix *matrix, size_type row, size_type index)
+      :
+      matrix(matrix),
+      a_row(row),
+      a_index(index)
+    {
+      visit_present_row ();
+    }
+
+
+    inline
+    AccessorBase::size_type
+    AccessorBase::row() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return a_row;
+    }
+
+
+    inline
+    AccessorBase::size_type
+    AccessorBase::column() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return (*colnum_cache)[a_index];
+    }
+
+
+    inline
+    AccessorBase::size_type
+    AccessorBase::index() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return a_index;
+    }
+
+
+    inline
+    Accessor<true>::Accessor (MatrixType *matrix,
+                              const size_type  row,
+                              const size_type  index)
+      :
+      AccessorBase(const_cast<SparseMatrix *>(matrix), row, index)
+    {}
+
+
+    template <bool Other>
+    inline
+    Accessor<true>::Accessor(const Accessor<Other> &other)
+      :
+      AccessorBase(other)
+    {}
+
+
+    inline
+    TrilinosScalar
+    Accessor<true>::value() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return (*value_cache)[a_index];
+    }
+
+
+    inline
+    Accessor<false>::Reference::Reference (
+      const Accessor<false> &acc)
+      :
+      accessor(const_cast<Accessor<false>&>(acc))
+    {}
+
+
+    inline
+    Accessor<false>::Reference::operator TrilinosScalar () const
+    {
+      return (*accessor.value_cache)[accessor.a_index];
+    }
+
+    inline
+    const Accessor<false>::Reference &
+    Accessor<false>::Reference::operator = (const TrilinosScalar n) const
+    {
+      (*accessor.value_cache)[accessor.a_index] = n;
+      accessor.matrix->set(accessor.row(), accessor.column(),
+                           static_cast<TrilinosScalar>(*this));
+      return *this;
+    }
+
+
+    inline
+    const Accessor<false>::Reference &
+    Accessor<false>::Reference::operator += (const TrilinosScalar n) const
+    {
+      (*accessor.value_cache)[accessor.a_index] += n;
+      accessor.matrix->set(accessor.row(), accessor.column(),
+                           static_cast<TrilinosScalar>(*this));
+      return *this;
+    }
+
+
+    inline
+    const Accessor<false>::Reference &
+    Accessor<false>::Reference::operator -= (const TrilinosScalar n) const
+    {
+      (*accessor.value_cache)[accessor.a_index] -= n;
+      accessor.matrix->set(accessor.row(), accessor.column(),
+                           static_cast<TrilinosScalar>(*this));
+      return *this;
+    }
+
+
+    inline
+    const Accessor<false>::Reference &
+    Accessor<false>::Reference::operator *= (const TrilinosScalar n) const
+    {
+      (*accessor.value_cache)[accessor.a_index] *= n;
+      accessor.matrix->set(accessor.row(), accessor.column(),
+                           static_cast<TrilinosScalar>(*this));
+      return *this;
+    }
+
+
+    inline
+    const Accessor<false>::Reference &
+    Accessor<false>::Reference::operator /= (const TrilinosScalar n) const
+    {
+      (*accessor.value_cache)[accessor.a_index] /= n;
+      accessor.matrix->set(accessor.row(), accessor.column(),
+                           static_cast<TrilinosScalar>(*this));
+      return *this;
+    }
+
+
+    inline
+    Accessor<false>::Accessor (MatrixType *matrix,
+                               const size_type  row,
+                               const size_type  index)
+      :
+      AccessorBase(matrix, row, index)
+    {}
+
+
+    inline
+    Accessor<false>::Reference
+    Accessor<false>::value() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return Reference(*this);
+    }
+
+
+
+    template <bool Constness>
+    inline
+    Iterator<Constness>::Iterator(MatrixType *matrix,
+                                  const size_type  row,
+                                  const size_type  index)
+      :
+      accessor(matrix, row, index)
+    {}
+
+
+    template <bool Constness>
+    template <bool Other>
+    inline
+    Iterator<Constness>::Iterator(const Iterator<Other> &other)
+      :
+      accessor(other.accessor)
+    {}
+
+
+    template <bool Constness>
+    inline
+    Iterator<Constness> &
+    Iterator<Constness>::operator++ ()
+    {
+      Assert (accessor.a_row < accessor.matrix->m(), ExcIteratorPastEnd());
+
+      ++accessor.a_index;
+
+      // If at end of line: do one
+      // step, then cycle until we
+      // find a row with a nonzero
+      // number of entries.
+      if (accessor.a_index >= accessor.colnum_cache->size())
+        {
+          accessor.a_index = 0;
+          ++accessor.a_row;
+
+          while ((accessor.a_row < accessor.matrix->m())
+                 &&
+                 ((accessor.matrix->in_local_range (accessor.a_row) == false)
+                  ||
+                  (accessor.matrix->row_length(accessor.a_row) == 0)))
+            ++accessor.a_row;
+
+          accessor.visit_present_row();
+        }
+      return *this;
+    }
+
+
+    template <bool Constness>
+    inline
+    Iterator<Constness>
+    Iterator<Constness>::operator++ (int)
+    {
+      const Iterator<Constness> old_state = *this;
+      ++(*this);
+      return old_state;
+    }
+
+
+
+    template <bool Constness>
+    inline
+    const Accessor<Constness> &
+    Iterator<Constness>::operator* () const
+    {
+      return accessor;
+    }
+
+
+
+    template <bool Constness>
+    inline
+    const Accessor<Constness> *
+    Iterator<Constness>::operator-> () const
+    {
+      return &accessor;
+    }
+
+
+
+    template <bool Constness>
+    inline
+    bool
+    Iterator<Constness>::operator == (const Iterator<Constness> &other) const
+    {
+      return (accessor.a_row == other.accessor.a_row &&
+              accessor.a_index == other.accessor.a_index);
+    }
+
+
+
+    template <bool Constness>
+    inline
+    bool
+    Iterator<Constness>::operator != (const Iterator<Constness> &other) const
+    {
+      return ! (*this == other);
+    }
+
+
+
+    template <bool Constness>
+    inline
+    bool
+    Iterator<Constness>::operator < (const Iterator<Constness> &other) const
+    {
+      return (accessor.row() < other.accessor.row() ||
+              (accessor.row() == other.accessor.row() &&
+               accessor.index() < other.accessor.index()));
+    }
+
+
+    template <bool Constness>
+    inline
+    bool
+    Iterator<Constness>::operator > (const Iterator<Constness> &other) const
+    {
+      return (other < *this);
+    }
+
+  }
+
+
+
+  inline
+  SparseMatrix::const_iterator
+  SparseMatrix::begin() const
+  {
+    return begin(0);
+  }
+
+
+
+  inline
+  SparseMatrix::const_iterator
+  SparseMatrix::end() const
+  {
+    return const_iterator(this, m(), 0);
+  }
+
+
+
+  inline
+  SparseMatrix::const_iterator
+  SparseMatrix::begin(const size_type r) const
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+    if (in_local_range (r)
+        &&
+        (row_length(r) > 0))
+      return const_iterator(this, r, 0);
+    else
+      return end (r);
+  }
+
+
+
+  inline
+  SparseMatrix::const_iterator
+  SparseMatrix::end(const size_type r) const
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+
+    // place the iterator on the first entry
+    // past this line, or at the end of the
+    // matrix
+    for (size_type i=r+1; i<m(); ++i)
+      if (in_local_range (i)
+          &&
+          (row_length(i) > 0))
+        return const_iterator(this, i, 0);
+
+    // if there is no such line, then take the
+    // end iterator of the matrix
+    return end();
+  }
+
+
+
+  inline
+  SparseMatrix::iterator
+  SparseMatrix::begin()
+  {
+    return begin(0);
+  }
+
+
+
+  inline
+  SparseMatrix::iterator
+  SparseMatrix::end()
+  {
+    return iterator(this, m(), 0);
+  }
+
+
+
+  inline
+  SparseMatrix::iterator
+  SparseMatrix::begin(const size_type r)
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+    if (in_local_range (r)
+        &&
+        (row_length(r) > 0))
+      return iterator(this, r, 0);
+    else
+      return end (r);
+  }
+
+
+
+  inline
+  SparseMatrix::iterator
+  SparseMatrix::end(const size_type r)
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+
+    // place the iterator on the first entry
+    // past this line, or at the end of the
+    // matrix
+    for (size_type i=r+1; i<m(); ++i)
+      if (in_local_range (i)
+          &&
+          (row_length(i) > 0))
+        return iterator(this, i, 0);
+
+    // if there is no such line, then take the
+    // end iterator of the matrix
+    return end();
+  }
+
+
+
+  inline
+  bool
+  SparseMatrix::in_local_range (const size_type index) const
+  {
+    TrilinosWrappers::types::int_type begin, end;
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    begin = matrix->RowMap().MinMyGID();
+    end = matrix->RowMap().MaxMyGID()+1;
+#else
+    begin = matrix->RowMap().MinMyGID64();
+    end = matrix->RowMap().MaxMyGID64()+1;
+#endif
+
+    return ((index >= static_cast<size_type>(begin)) &&
+            (index < static_cast<size_type>(end)));
+  }
+
+
+
+  inline
+  bool
+  SparseMatrix::is_compressed () const
+  {
+    return compressed;
+  }
+
+
+
+  // Inline the set() and add() functions, since they will be called
+  // frequently, and the compiler can optimize away some unnecessary loops
+  // when the sizes are given at compile time.
+  inline
+  void
+  SparseMatrix::set (const size_type      i,
+                     const size_type      j,
+                     const TrilinosScalar value)
+  {
+
+    AssertIsFinite(value);
+
+    set (i, 1, &j, &value, false);
+  }
+
+
+
+  inline
+  void
+  SparseMatrix::set (const std::vector<size_type>  &indices,
+                     const FullMatrix<TrilinosScalar> &values,
+                     const bool                        elide_zero_values)
+  {
+    Assert (indices.size() == values.m(),
+            ExcDimensionMismatch(indices.size(), values.m()));
+    Assert (values.m() == values.n(), ExcNotQuadratic());
+
+    for (size_type i=0; i<indices.size(); ++i)
+      set (indices[i], indices.size(), &indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  SparseMatrix::add (const size_type      i,
+                     const size_type      j,
+                     const TrilinosScalar value)
+  {
+    AssertIsFinite(value);
+
+    if (value == 0)
+      {
+        // we have to check after Insert/Add in any case to be consistent
+        // with the MPI communication model (see the comments in the
+        // documentation of TrilinosWrappers::Vector), but we can save some
+        // work if the addend is zero. However, these actions are done in case
+        // we pass on to the other function.
+
+        // TODO: fix this (do not run compress here, but fail)
+        if (last_action == Insert)
+          {
+            int ierr;
+            ierr = matrix->GlobalAssemble(*column_space_map,
+                                          matrix->RowMap(), false);
+
+            Assert (ierr == 0, ExcTrilinosError(ierr));
+            (void)ierr; // removes -Wunused-but-set-variable in optimized mode
+          }
+
+        last_action = Add;
+
+        return;
+      }
+    else
+      add (i, 1, &j, &value, false);
+  }
+
+
+
+  // inline "simple" functions that are called frequently and do only involve
+  // a call to some Trilinos function.
+  inline
+  SparseMatrix::size_type
+  SparseMatrix::m () const
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    return matrix->NumGlobalRows();
+#else
+    return matrix->NumGlobalRows64();
+#endif
+  }
+
+
+
+  inline
+  SparseMatrix::size_type
+  SparseMatrix::n () const
+  {
+    // If the matrix structure has not been fixed (i.e., we did not have a
+    // sparsity pattern), it does not know about the number of columns so we
+    // must always take this from the additional column space map
+    Assert(column_space_map.get() != 0, ExcInternalError());
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    return column_space_map->NumGlobalElements();
+#else
+    return column_space_map->NumGlobalElements64();
+#endif
+  }
+
+
+
+  inline
+  unsigned int
+  SparseMatrix::local_size () const
+  {
+    return matrix -> NumMyRows();
+  }
+
+
+
+  inline
+  std::pair<SparseMatrix::size_type, SparseMatrix::size_type>
+  SparseMatrix::local_range () const
+  {
+    size_type begin, end;
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    begin = matrix->RowMap().MinMyGID();
+    end = matrix->RowMap().MaxMyGID()+1;
+#else
+    begin = matrix->RowMap().MinMyGID64();
+    end = matrix->RowMap().MaxMyGID64()+1;
+#endif
+
+    return std::make_pair (begin, end);
+  }
+
+
+
+  inline
+  SparseMatrix::size_type
+  SparseMatrix::n_nonzero_elements () const
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    return matrix->NumGlobalNonzeros();
+#else
+    return matrix->NumGlobalNonzeros64();
+#endif
+  }
+
+
+
+  template <typename SparsityPatternType>
+  inline
+  void SparseMatrix::reinit (const IndexSet            &parallel_partitioning,
+                             const SparsityPatternType &sparsity_pattern,
+                             const MPI_Comm            &communicator,
+                             const bool                 exchange_data)
+  {
+    reinit (parallel_partitioning, parallel_partitioning,
+            sparsity_pattern, communicator, exchange_data);
+  }
+
+
+
+  template <typename number>
+  inline
+  void SparseMatrix::reinit (const IndexSet                       &parallel_partitioning,
+                             const ::dealii::SparseMatrix<number> &sparse_matrix,
+                             const MPI_Comm                       &communicator,
+                             const double                          drop_tolerance,
+                             const bool                            copy_values,
+                             const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    Epetra_Map map = parallel_partitioning.make_trilinos_map (communicator, false);
+    reinit (parallel_partitioning, parallel_partitioning, sparse_matrix,
+            drop_tolerance, copy_values, use_this_sparsity);
+  }
+
+
+
+  inline
+  const Epetra_CrsMatrix &
+  SparseMatrix::trilinos_matrix () const
+  {
+    return static_cast<const Epetra_CrsMatrix &>(*matrix);
+  }
+
+
+
+  inline
+  const Epetra_CrsGraph &
+  SparseMatrix::trilinos_sparsity_pattern () const
+  {
+    return matrix->Graph();
+  }
+
+
+
+  inline
+  IndexSet
+  SparseMatrix::locally_owned_domain_indices () const
+  {
+    return IndexSet(matrix->DomainMap());
+  }
+
+
+
+  inline
+  IndexSet
+  SparseMatrix::locally_owned_range_indices () const
+  {
+    return IndexSet(matrix->RangeMap());
+  }
+
+
+
+  inline
+  void
+  SparseMatrix::prepare_add()
+  {
+    //nothing to do here
+  }
+
+
+
+  inline
+  void
+  SparseMatrix::prepare_set()
+  {
+    //nothing to do here
+  }
+
+
+
+#endif // DOXYGEN
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_WITH_TRILINOS
+
+
+/*-----------------------   trilinos_sparse_matrix.h     --------------------*/
+
+#endif
+/*-----------------------   trilinos_sparse_matrix.h     --------------------*/
diff --git a/include/deal.II/lac/trilinos_sparsity_pattern.h b/include/deal.II/lac/trilinos_sparsity_pattern.h
new file mode 100644
index 0000000..ff3a993
--- /dev/null
+++ b/include/deal.II/lac/trilinos_sparsity_pattern.h
@@ -0,0 +1,1529 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_sparsity_pattern_h
+#define dealii__trilinos_sparsity_pattern_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/base/index_set.h>
+#  include <deal.II/lac/exceptions.h>
+
+#  include <vector>
+#  include <cmath>
+#  include <memory>
+
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Epetra_FECrsGraph.h>
+#  include <Epetra_Map.h>
+#  ifdef DEAL_II_WITH_MPI
+#    include <Epetra_MpiComm.h>
+#    include "mpi.h"
+#  else
+#    include "Epetra_SerialComm.h"
+#  endif
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+class SparsityPattern;
+class DynamicSparsityPattern;
+
+namespace TrilinosWrappers
+{
+  // forward declarations
+  class SparsityPattern;
+
+  namespace SparsityPatternIterators
+  {
+    // forward declaration
+    class Iterator;
+
+    /**
+     * Accessor class for iterators into sparsity patterns. This class is also
+     * the base class for both const and non-const accessor classes into
+     * sparse matrices.
+     *
+     * Note that this class only allows read access to elements, providing
+     * their row and column number. It does not allow modifying the sparsity
+     * pattern itself.
+     *
+     * @ingroup TrilinosWrappers
+     * @author Wolfgang Bangerth, Martin Kronbichler, Guido Kanschat
+     * @date 2004, 2008, 2012
+     */
+    class Accessor
+    {
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef dealii::types::global_dof_index size_type;
+
+      /**
+       * Constructor.
+       */
+      Accessor (const SparsityPattern *sparsity_pattern,
+                const size_type        row,
+                const size_type        index);
+
+      /**
+       * Copy constructor.
+       */
+      Accessor (const Accessor &a);
+
+      /**
+       * Row number of the element represented by this object.
+       */
+      size_type row() const;
+
+      /**
+       * Index in row of the element represented by this object.
+       */
+      size_type index() const;
+
+      /**
+       * Column number of the element represented by this object.
+       */
+      size_type column() const;
+
+      /**
+       * Exception
+       */
+      DeclException0 (ExcBeyondEndOfSparsityPattern);
+
+      /**
+       * Exception
+       */
+      DeclException3 (ExcAccessToNonlocalRow,
+                      size_type, size_type, size_type,
+                      << "You tried to access row " << arg1
+                      << " of a distributed sparsity pattern, "
+                      << " but only rows " << arg2 << " through " << arg3
+                      << " are stored locally and can be accessed.");
+
+    private:
+      /**
+       * The matrix accessed.
+       */
+      mutable SparsityPattern *sparsity_pattern;
+
+      /**
+       * Current row number.
+       */
+      size_type a_row;
+
+      /**
+       * Current index in row.
+       */
+      size_type a_index;
+
+      /**
+       * Cache where we store the column indices of the present row. This is
+       * necessary, since Trilinos makes access to the elements of its
+       * matrices rather hard, and it is much more efficient to copy all
+       * column entries of a row once when we enter it than repeatedly asking
+       * Trilinos for individual ones. This also makes some sense since it is
+       * likely that we will access them sequentially anyway.
+       *
+       * In order to make copying of iterators/accessor of acceptable
+       * performance, we keep a shared pointer to these entries so that more
+       * than one accessor can access this data if necessary.
+       */
+      std_cxx11::shared_ptr<const std::vector<size_type> > colnum_cache;
+
+      /**
+       * Discard the old row caches (they may still be used by other
+       * accessors) and generate new ones for the row pointed to presently by
+       * this accessor.
+       */
+      void visit_present_row ();
+
+      /**
+       * Make enclosing class a friend.
+       */
+      friend class Iterator;
+    };
+
+    /**
+     * Iterator class for sparsity patterns of type
+     * TrilinosWrappers::SparsityPattern. Access to individual elements of the
+     * sparsity pattern is handled by the Accessor class in this namespace.
+     */
+    class Iterator
+    {
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef dealii::types::global_dof_index size_type;
+
+      /**
+       * Constructor. Create an iterator into the matrix @p matrix for the
+       * given row and the index within it.
+       */
+      Iterator (const SparsityPattern *sparsity_pattern,
+                const size_type        row,
+                const size_type        index);
+
+      /**
+       * Copy constructor.
+       */
+      Iterator (const Iterator &i);
+
+      /**
+       * Prefix increment.
+       */
+      Iterator &operator++ ();
+
+      /**
+       * Postfix increment.
+       */
+      Iterator operator++ (int);
+
+      /**
+       * Dereferencing operator.
+       */
+      const Accessor &operator* () const;
+
+      /**
+       * Dereferencing operator.
+       */
+      const Accessor *operator-> () const;
+
+      /**
+       * Comparison. True, if both iterators point to the same matrix
+       * position.
+       */
+      bool operator == (const Iterator &) const;
+
+      /**
+       * Inverse of <tt>==</tt>.
+       */
+      bool operator != (const Iterator &) const;
+
+      /**
+       * Comparison operator. Result is true if either the first row number is
+       * smaller or if the row numbers are equal and the first index is
+       * smaller.
+       */
+      bool operator < (const Iterator &) const;
+
+      /**
+       * Exception
+       */
+      DeclException2 (ExcInvalidIndexWithinRow,
+                      size_type, size_type,
+                      << "Attempt to access element " << arg2
+                      << " of row " << arg1
+                      << " which doesn't have that many elements.");
+
+    private:
+      /**
+       * Store an object of the accessor class.
+       */
+      Accessor accessor;
+
+      friend class TrilinosWrappers::SparsityPattern;
+    };
+
+  }
+
+
+  /**
+   * This class implements a wrapper class to use the Trilinos distributed
+   * sparsity pattern class Epetra_FECrsGraph. This class is designed to be
+   * used for construction of %parallel Trilinos matrices. The functionality
+   * of this class is modeled after the existing sparsity pattern classes,
+   * with the difference that this class can work fully in %parallel according
+   * to a partitioning of the sparsity pattern rows.
+   *
+   * This class has many similarities to the  DynamicSparsityPattern, since it
+   * can dynamically add elements to the pattern without any memory being
+   * previously reserved for it. However, it also has a method
+   * SparsityPattern::compress(), that finalizes the pattern and enables its
+   * use with Trilinos sparse matrices.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Sparsity
+   * @author Martin Kronbichler, 2008
+   */
+  class SparsityPattern : public Subscriptor
+  {
+  public:
+
+    /**
+     * Declare type for container size.
+     */
+    typedef dealii::types::global_dof_index size_type;
+
+    /**
+     * Declare a typedef for the iterator class.
+     */
+    typedef SparsityPatternIterators::Iterator const_iterator;
+
+    /**
+     * @name Basic constructors and initialization.
+     */
+//@{
+    /**
+     * Default constructor. Generates an empty (zero-size) sparsity pattern.
+     */
+    SparsityPattern ();
+
+    /**
+     * Generate a sparsity pattern that is completely stored locally, having
+     * $m$ rows and $n$ columns. The resulting matrix will be completely
+     * stored locally, too.
+     *
+     * It is possible to specify the number of columns entries per row using
+     * the optional @p n_entries_per_row argument. However, this value does
+     * not need to be accurate or even given at all, since one does usually
+     * not have this kind of information before building the sparsity pattern
+     * (the usual case when the function DoFTools::make_sparsity_pattern() is
+     * called). The entries are allocated dynamically in a similar manner as
+     * for the deal.II DynamicSparsityPattern classes. However, a good
+     * estimate will reduce the setup time of the sparsity pattern.
+     */
+    SparsityPattern (const size_type  m,
+                     const size_type  n,
+                     const size_type  n_entries_per_row = 0);
+
+    /**
+     * Generate a sparsity pattern that is completely stored locally, having
+     * $m$ rows and $n$ columns. The resulting matrix will be completely
+     * stored locally, too.
+     *
+     * The vector <tt>n_entries_per_row</tt> specifies the number of entries
+     * in each row (an information usually not available, though).
+     */
+    SparsityPattern (const size_type               m,
+                     const size_type               n,
+                     const std::vector<size_type> &n_entries_per_row);
+
+    /**
+     * Copy constructor. Sets the calling sparsity pattern to be the same as
+     * the input sparsity pattern.
+     */
+    SparsityPattern (const SparsityPattern &input_sparsity_pattern);
+
+    /**
+     * Destructor. Made virtual so that one can use pointers to this class.
+     */
+    virtual ~SparsityPattern ();
+
+    /**
+     * Initialize a sparsity pattern that is completely stored locally, having
+     * $m$ rows and $n$ columns. The resulting matrix will be completely
+     * stored locally.
+     *
+     * The number of columns entries per row is specified as the maximum
+     * number of entries argument.  This does not need to be an accurate
+     * number since the entries are allocated dynamically in a similar manner
+     * as for the deal.II DynamicSparsityPattern classes, but a good estimate
+     * will reduce the setup time of the sparsity pattern.
+     */
+    void
+    reinit (const size_type  m,
+            const size_type  n,
+            const size_type  n_entries_per_row = 0);
+
+    /**
+     * Initialize a sparsity pattern that is completely stored locally, having
+     * $m$ rows and $n$ columns. The resulting matrix will be completely
+     * stored locally.
+     *
+     * The vector <tt>n_entries_per_row</tt> specifies the number of entries
+     * in each row.
+     */
+    void
+    reinit (const size_type               m,
+            const size_type               n,
+            const std::vector<size_type> &n_entries_per_row);
+
+    /**
+     * Copy function. Sets the calling sparsity pattern to be the same as the
+     * input sparsity pattern.
+     */
+    void
+    copy_from (const SparsityPattern &input_sparsity_pattern);
+
+    /**
+     * Copy function from one of the deal.II sparsity patterns. If used in
+     * parallel, this function uses an ad-hoc partitioning of the rows and
+     * columns.
+     */
+    template<typename SparsityPatternType>
+    void
+    copy_from (const SparsityPatternType &nontrilinos_sparsity_pattern);
+
+    /**
+     * Copy operator. This operation is only allowed for empty objects, to
+     * avoid potentially very costly operations automatically synthesized by
+     * the compiler. Use copy_from() instead if you know that you really want
+     * to copy a sparsity pattern with non-trivial content.
+     */
+    SparsityPattern &operator = (const SparsityPattern &input_sparsity_pattern);
+
+    /**
+     * Release all memory and return to a state just like after having called
+     * the default constructor.
+     *
+     * This is a collective operation that needs to be called on all
+     * processors in order to avoid a dead lock.
+     */
+    void clear ();
+
+    /**
+     * In analogy to our own SparsityPattern class, this function compresses
+     * the sparsity pattern and allows the resulting pattern to be used for
+     * actually generating a (Trilinos-based) matrix. This function also
+     * exchanges non-local data that might have accumulated during the
+     * addition of new elements. This function must therefore be called once
+     * the structure is fixed. This is a collective operation, i.e., it needs
+     * to be run on all processors when used in parallel.
+     */
+    void compress ();
+//@}
+    /**
+     * @name Constructors and initialization using an Epetra_Map description
+     */
+//@{
+
+    /**
+     * Constructor for a square sparsity pattern using an Epetra_map for the
+     * description of the %parallel partitioning. Moreover, the number of
+     * nonzero entries in the rows of the sparsity pattern can be specified.
+     * Note that this number does not need to be exact, and it is allowed that
+     * the actual sparsity structure has more nonzero entries than specified
+     * in the constructor (the usual case when the function
+     * DoFTools::make_sparsity_pattern() is called). However it is still
+     * advantageous to provide good estimates here since a good value will
+     * avoid repeated allocation of memory, which considerably increases the
+     * performance when creating the sparsity pattern.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparsityPattern (const Epetra_Map &parallel_partitioning,
+                     const size_type   n_entries_per_row = 0) DEAL_II_DEPRECATED;
+
+    /**
+     * Same as before, but now use the exact number of nonzeros in each m row.
+     * Since we know the number of elements in the sparsity pattern exactly in
+     * this case, we can already allocate the right amount of memory, which
+     * makes the creation process by the respective SparsityPattern::reinit
+     * call considerably faster. However, this is a rather unusual situation,
+     * since knowing the number of entries in each row is usually connected to
+     * knowing the indices of nonzero entries, which the sparsity pattern is
+     * designed to describe.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparsityPattern (const Epetra_Map             &parallel_partitioning,
+                     const std::vector<size_type> &n_entries_per_row) DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different Epetra maps for rows and columns. This interface is meant to
+     * be used for generating rectangular sparsity pattern, where one map
+     * describes the %parallel partitioning of the dofs associated with the
+     * sparsity pattern rows and the other one of the sparsity pattern
+     * columns. Note that there is no real parallelism along the columns
+     * – the processor that owns a certain row always owns all the
+     * column elements, no matter how far they might be spread out. The second
+     * Epetra_Map is only used to specify the number of columns and for
+     * specifying the correct domain space when performing matrix-vector
+     * products with vectors based on the same column map.
+     *
+     * The number of columns entries per row is specified as the maximum
+     * number of entries argument.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparsityPattern (const Epetra_Map   &row_parallel_partitioning,
+                     const Epetra_Map   &col_parallel_partitioning,
+                     const size_type     n_entries_per_row = 0) DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different Epetra maps for rows and columns. This interface is meant to
+     * be used for generating rectangular matrices, where one map specifies
+     * the %parallel distribution of rows and the second one specifies the
+     * distribution of degrees of freedom associated with matrix columns. This
+     * second map is however not used for the distribution of the columns
+     * themselves – rather, all column elements of a row are stored on
+     * the same processor. The vector <tt>n_entries_per_row</tt> specifies the
+     * number of entries in each row of the newly generated matrix.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    SparsityPattern (const Epetra_Map             &row_parallel_partitioning,
+                     const Epetra_Map             &col_parallel_partitioning,
+                     const std::vector<size_type> &n_entries_per_row) DEAL_II_DEPRECATED;
+
+    /**
+     * Reinitialization function for generating a square sparsity pattern
+     * using an Epetra_Map for the description of the %parallel partitioning
+     * and the number of nonzero entries in the rows of the sparsity pattern.
+     * Note that this number does not need to be exact, and it is even allowed
+     * that the actual sparsity structure has more nonzero entries than
+     * specified in the constructor. However it is still advantageous to
+     * provide good estimates here since this will considerably increase the
+     * performance when creating the sparsity pattern.
+     *
+     * This function does not create any entries by itself, but provides the
+     * correct data structures that can be used by the respective add()
+     * function.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    void
+    reinit (const Epetra_Map &parallel_partitioning,
+            const size_type   n_entries_per_row = 0) DEAL_II_DEPRECATED;
+
+    /**
+     * Same as before, but now use the exact number of nonzeros in each m row.
+     * Since we know the number of elements in the sparsity pattern exactly in
+     * this case, we can already allocate the right amount of memory, which
+     * makes process of adding entries to the sparsity pattern considerably
+     * faster. However, this is a rather unusual situation, since knowing the
+     * number of entries in each row is usually connected to knowing the
+     * indices of nonzero entries, which the sparsity pattern is designed to
+     * describe.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    void
+    reinit (const Epetra_Map             &parallel_partitioning,
+            const std::vector<size_type> &n_entries_per_row) DEAL_II_DEPRECATED;
+
+    /**
+     * This reinit function is similar to the one above, but it now takes two
+     * different Epetra maps for rows and columns. This interface is meant to
+     * be used for generating rectangular sparsity pattern, where one map
+     * describes the %parallel partitioning of the dofs associated with the
+     * sparsity pattern rows and the other one of the sparsity pattern
+     * columns. Note that there is no real parallelism along the columns
+     * – the processor that owns a certain row always owns all the
+     * column elements, no matter how far they might be spread out. The second
+     * Epetra_Map is only used to specify the number of columns and for
+     * internal arrangements when doing matrix-vector products with vectors
+     * based on that column map.
+     *
+     * The number of columns entries per row is specified by the argument
+     * <tt>n_entries_per_row</tt>.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    void
+    reinit (const Epetra_Map   &row_parallel_partitioning,
+            const Epetra_Map   &col_parallel_partitioning,
+            const size_type     n_entries_per_row = 0) DEAL_II_DEPRECATED;
+
+    /**
+     * This reinit function is similar to the one above, but it now takes two
+     * different Epetra maps for rows and columns. This interface is meant to
+     * be used for generating rectangular matrices, where one map specifies
+     * the %parallel distribution of rows and the second one specifies the
+     * distribution of degrees of freedom associated with matrix columns. This
+     * second map is however not used for the distribution of the columns
+     * themselves – rather, all column elements of a row are stored on
+     * the same processor. The vector <tt>n_entries_per_row</tt> specifies the
+     * number of entries in each row of the newly generated matrix.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    void
+    reinit (const Epetra_Map             &row_parallel_partitioning,
+            const Epetra_Map             &col_parallel_partitioning,
+            const std::vector<size_type> &n_entries_per_row) DEAL_II_DEPRECATED;
+
+    /**
+     * Reinit function. Takes one of the deal.II sparsity patterns and a
+     * %parallel partitioning of the rows and columns for initializing the
+     * current Trilinos sparsity pattern. The optional argument @p
+     * exchange_data can be used for reinitialization with a sparsity pattern
+     * that is not fully constructed. This feature is only implemented for
+     * input sparsity patterns of type DynamicSparsityPattern.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    template<typename SparsityPatternType>
+    void
+    reinit (const Epetra_Map          &row_parallel_partitioning,
+            const Epetra_Map          &col_parallel_partitioning,
+            const SparsityPatternType &nontrilinos_sparsity_pattern,
+            const bool                 exchange_data = false) DEAL_II_DEPRECATED;
+
+    /**
+     * Reinit function. Takes one of the deal.II sparsity patterns and a
+     * %parallel partitioning of the rows and columns for initializing the
+     * current Trilinos sparsity pattern. The optional argument @p
+     * exchange_data can be used for reinitialization with a sparsity pattern
+     * that is not fully constructed. This feature is only implemented for
+     * input sparsity patterns of type DynamicSparsityPattern.
+     *
+     * @deprecated Use the respective method with IndexSet argument instead.
+     */
+    template<typename SparsityPatternType>
+    void
+    reinit (const Epetra_Map          &parallel_partitioning,
+            const SparsityPatternType &nontrilinos_sparsity_pattern,
+            const bool                 exchange_data = false) DEAL_II_DEPRECATED;
+//@}
+    /**
+     * @name Constructors and initialization using an IndexSet description
+     */
+//@{
+
+    /**
+     * Constructor for a square sparsity pattern using an IndexSet and an MPI
+     * communicator for the description of the %parallel partitioning.
+     * Moreover, the number of nonzero entries in the rows of the sparsity
+     * pattern can be specified. Note that this number does not need to be
+     * exact, and it is even allowed that the actual sparsity structure has
+     * more nonzero entries than specified in the constructor. However it is
+     * still advantageous to provide good estimates here since a good value
+     * will avoid repeated allocation of memory, which considerably increases
+     * the performance when creating the sparsity pattern.
+     */
+    SparsityPattern (const IndexSet  &parallel_partitioning,
+                     const MPI_Comm  &communicator = MPI_COMM_WORLD,
+                     const size_type  n_entries_per_row = 0);
+
+    /**
+     * Same as before, but now use the exact number of nonzeros in each m row.
+     * Since we know the number of elements in the sparsity pattern exactly in
+     * this case, we can already allocate the right amount of memory, which
+     * makes the creation process by the respective SparsityPattern::reinit
+     * call considerably faster. However, this is a rather unusual situation,
+     * since knowing the number of entries in each row is usually connected to
+     * knowing the indices of nonzero entries, which the sparsity pattern is
+     * designed to describe.
+     */
+    SparsityPattern (const IndexSet                  &parallel_partitioning,
+                     const MPI_Comm                  &communicator,
+                     const std::vector<size_type> &n_entries_per_row);
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different index sets to describe the %parallel partitioning of rows and
+     * columns. This interface is meant to be used for generating rectangular
+     * sparsity pattern. Note that there is no real parallelism along the
+     * columns – the processor that owns a certain row always owns all
+     * the column elements, no matter how far they might be spread out. The
+     * second Epetra_Map is only used to specify the number of columns and for
+     * internal arrangements when doing matrix-vector products with vectors
+     * based on that column map.
+     *
+     * The number of columns entries per row is specified as the maximum
+     * number of entries argument.
+     */
+    SparsityPattern (const IndexSet  &row_parallel_partitioning,
+                     const IndexSet  &col_parallel_partitioning,
+                     const MPI_Comm  &communicator = MPI_COMM_WORLD,
+                     const size_type  n_entries_per_row = 0);
+
+    /**
+     * This constructor is similar to the one above, but it now takes two
+     * different index sets for rows and columns. This interface is meant to
+     * be used for generating rectangular matrices, where one map specifies
+     * the %parallel distribution of rows and the second one specifies the
+     * distribution of degrees of freedom associated with matrix columns. This
+     * second map is however not used for the distribution of the columns
+     * themselves – rather, all column elements of a row are stored on
+     * the same processor. The vector <tt>n_entries_per_row</tt> specifies the
+     * number of entries in each row of the newly generated matrix.
+     */
+    SparsityPattern (const IndexSet               &row_parallel_partitioning,
+                     const IndexSet               &col_parallel_partitioning,
+                     const MPI_Comm               &communicator,
+                     const std::vector<size_type> &n_entries_per_row);
+
+    /**
+     * This constructor constructs general sparsity patterns, possible non-
+     * square ones. Constructing a sparsity pattern this way allows the user
+     * to explicitly specify the rows into which we are going to add elements.
+     * This set is required to be a superset of the first index set @p
+     * row_parallel_partitioning that includes also rows that are owned by
+     * another processor (ghost rows). Note that elements can only be added to
+     * rows specified by @p writable_rows.
+     *
+     * This method is beneficial when the rows to which a processor is going
+     * to write can be determined before actually inserting elements into the
+     * matrix. For the typical parallel::distributed::Triangulation class used
+     * in deal.II, we know that a processor only will add row elements for
+     * what we call the locally relevant dofs (see
+     * DoFTools::extract_locally_relevant_dofs). The other constructors
+     * methods use general Trilinos facilities that allow to add elements to
+     * arbitrary rows (as done by all the other reinit functions). However,
+     * this flexibility come at a cost, the most prominent being that adding
+     * elements into the same matrix from multiple threads in shared memory is
+     * not safe whenever MPI is used. For these settings, the current method
+     * is the one to choose: It will store the off-processor data as an
+     * additional sparsity pattern (that is then passed to the Trilinos matrix
+     * via the reinit mehtod) which can be organized in such a way that
+     * thread-safety can be ensured (as long as the user makes sure to never
+     * write into the same matrix row simultaneously, of course).
+     */
+    SparsityPattern (const IndexSet  &row_parallel_partitioning,
+                     const IndexSet  &col_parallel_partitioning,
+                     const IndexSet  &writable_rows,
+                     const MPI_Comm  &communicator = MPI_COMM_WORLD,
+                     const size_type  n_entries_per_row = 0);
+
+    /**
+     * Reinitialization function for generating a square sparsity pattern
+     * using an IndexSet and an MPI communicator for the description of the
+     * %parallel partitioning and the number of nonzero entries in the rows of
+     * the sparsity pattern. Note that this number does not need to be exact,
+     * and it is even allowed that the actual sparsity structure has more
+     * nonzero entries than specified in the constructor. However it is still
+     * advantageous to provide good estimates here since this will
+     * considerably increase the performance when creating the sparsity
+     * pattern.
+     *
+     * This function does not create any entries by itself, but provides the
+     * correct data structures that can be used by the respective add()
+     * function.
+     */
+    void
+    reinit (const IndexSet  &parallel_partitioning,
+            const MPI_Comm  &communicator = MPI_COMM_WORLD,
+            const size_type  n_entries_per_row = 0);
+
+    /**
+     * Same as before, but now use the exact number of nonzeros in each m row.
+     * Since we know the number of elements in the sparsity pattern exactly in
+     * this case, we can already allocate the right amount of memory, which
+     * makes process of adding entries to the sparsity pattern considerably
+     * faster. However, this is a rather unusual situation, since knowing the
+     * number of entries in each row is usually connected to knowing the
+     * indices of nonzero entries, which the sparsity pattern is designed to
+     * describe.
+     */
+    void
+    reinit (const IndexSet               &parallel_partitioning,
+            const MPI_Comm               &communicator,
+            const std::vector<size_type> &n_entries_per_row);
+
+    /**
+     * This reinit function is similar to the one above, but it now takes two
+     * different index sets for rows and columns. This interface is meant to
+     * be used for generating rectangular sparsity pattern, where one index
+     * set describes the %parallel partitioning of the dofs associated with
+     * the sparsity pattern rows and the other one of the sparsity pattern
+     * columns. Note that there is no real parallelism along the columns
+     * – the processor that owns a certain row always owns all the
+     * column elements, no matter how far they might be spread out. The second
+     * IndexSet is only used to specify the number of columns and for internal
+     * arrangements when doing matrix-vector products with vectors based on an
+     * EpetraMap based on that IndexSet.
+     *
+     * The number of columns entries per row is specified by the argument
+     * <tt>n_entries_per_row</tt>.
+     */
+    void
+    reinit (const IndexSet  &row_parallel_partitioning,
+            const IndexSet  &col_parallel_partitioning,
+            const MPI_Comm  &communicator = MPI_COMM_WORLD,
+            const size_type  n_entries_per_row = 0);
+
+    /**
+     * This reinit function is used to specify general matrices, possibly non-
+     * square ones. In addition to the arguments of the other reinit method
+     * above, it allows the user to explicitly specify the rows into which we
+     * are going to add elements. This set is a superset of the first index
+     * set @p row_parallel_partitioning that includes also rows that are owned
+     * by another processor (ghost rows).
+     *
+     * This method is beneficial when the rows to which a processor is going
+     * to write can be determined before actually inserting elements into the
+     * matrix. For the typical parallel::distributed::Triangulation class used
+     * in deal.II, we know that a processor only will add row elements for
+     * what we call the locally relevant dofs (see
+     * DoFTools::extract_locally_relevant_dofs). Trilinos matrices allow to
+     * add elements to arbitrary rows (as done by all the other reinit
+     * functions) and this is what all the other reinit methods do, too.
+     * However, this flexibility come at a cost, the most prominent being that
+     * adding elements into the same matrix from multiple threads in shared
+     * memory is not safe whenever MPI is used. For these settings, the
+     * current method is the one to choose: It will store the off-processor
+     * data as an additional sparsity pattern (that is then passed to the
+     * Trilinos matrix via the reinit method) which can be organized in such a
+     * way that thread-safety can be ensured (as long as the user makes sure
+     * to never write into the same matrix row simultaneously, of course).
+     */
+    void
+    reinit (const IndexSet  &row_parallel_partitioning,
+            const IndexSet  &col_parallel_partitioning,
+            const IndexSet  &writeable_rows,
+            const MPI_Comm  &communicator = MPI_COMM_WORLD,
+            const size_type  n_entries_per_row = 0);
+
+    /**
+     * Same as before, but now using a vector <tt>n_entries_per_row</tt> for
+     * specifying the number of entries in each row of the sparsity pattern.
+     */
+    void
+    reinit (const IndexSet               &row_parallel_partitioning,
+            const IndexSet               &col_parallel_partitioning,
+            const MPI_Comm               &communicator,
+            const std::vector<size_type> &n_entries_per_row);
+
+    /**
+     * Reinit function. Takes one of the deal.II sparsity patterns and the
+     * %parallel partitioning of the rows and columns specified by two index
+     * sets and a %parallel communicator for initializing the current Trilinos
+     * sparsity pattern. The optional argument @p exchange_data can be used
+     * for reinitialization with a sparsity pattern that is not fully
+     * constructed. This feature is only implemented for input sparsity
+     * patterns of type DynamicSparsityPattern.
+     */
+    template<typename SparsityPatternType>
+    void
+    reinit (const IndexSet            &row_parallel_partitioning,
+            const IndexSet            &col_parallel_partitioning,
+            const SparsityPatternType &nontrilinos_sparsity_pattern,
+            const MPI_Comm            &communicator  = MPI_COMM_WORLD,
+            const bool                 exchange_data = false);
+
+    /**
+     * Reinit function. Takes one of the deal.II sparsity patterns and a
+     * %parallel partitioning of the rows and columns for initializing the
+     * current Trilinos sparsity pattern. The optional argument @p
+     * exchange_data can be used for reinitialization with a sparsity pattern
+     * that is not fully constructed. This feature is only implemented for
+     * input sparsity patterns of type DynamicSparsityPattern.
+     */
+    template<typename SparsityPatternType>
+    void
+    reinit (const IndexSet            &parallel_partitioning,
+            const SparsityPatternType &nontrilinos_sparsity_pattern,
+            const MPI_Comm            &communicator  = MPI_COMM_WORLD,
+            const bool                 exchange_data = false);
+//@}
+    /**
+     * @name Information on the sparsity pattern
+     */
+//@{
+
+    /**
+     * Returns the state of the sparsity pattern, i.e., whether compress()
+     * needs to be called after an operation requiring data exchange.
+     */
+    bool is_compressed () const;
+
+    /**
+     * Gives the maximum number of entries per row on the current processor.
+     */
+    unsigned int max_entries_per_row () const;
+
+    /**
+     * Return the number of rows in this sparsity pattern.
+     */
+    size_type n_rows () const;
+
+    /**
+     * Return the number of columns in this sparsity pattern.
+     */
+    size_type n_cols () const;
+
+    /**
+     * Return the local dimension of the sparsity pattern, i.e. the number of
+     * rows stored on the present MPI process. In the sequential case, this
+     * number is the same as n_rows(), but for parallel matrices it may be
+     * smaller.
+     *
+     * To figure out which elements exactly are stored locally, use
+     * local_range().
+     */
+    unsigned int local_size () const;
+
+    /**
+     * Return a pair of indices indicating which rows of this sparsity pattern
+     * are stored locally. The first number is the index of the first row
+     * stored, the second the index of the one past the last one that is
+     * stored locally. If this is a sequential matrix, then the result will be
+     * the pair (0,n_rows()), otherwise it will be a pair (i,i+n), where
+     * <tt>n=local_size()</tt>.
+     */
+    std::pair<size_type, size_type>
+    local_range () const;
+
+    /**
+     * Return whether @p index is in the local range or not, see also
+     * local_range().
+     */
+    bool in_local_range (const size_type index) const;
+
+    /**
+     * Return the number of nonzero elements of this sparsity pattern.
+     */
+    size_type n_nonzero_elements () const;
+
+    /**
+     * Number of entries in a specific row.
+     */
+    size_type row_length (const size_type row) const;
+
+    /**
+     * Compute the bandwidth of the matrix represented by this structure. The
+     * bandwidth is the maximum of $|i-j|$ for which the index pair $(i,j)$
+     * represents a nonzero entry of the matrix. Consequently, the maximum
+     * bandwidth a $n\times m$ matrix can have is $\max\{n-1,m-1\}$.
+     */
+    size_type bandwidth () const;
+
+    /**
+     * Return whether the object is empty. It is empty if no memory is
+     * allocated, which is the same as when both dimensions are zero.
+     */
+    bool empty () const;
+
+    /**
+     * Return whether the index (<i>i,j</i>) exists in the sparsity pattern
+     * (i.e., it may be non-zero) or not.
+     */
+    bool exists (const size_type i,
+                 const size_type j) const;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object. Currently not implemented for this class.
+     */
+    std::size_t memory_consumption () const;
+
+//@}
+    /**
+     * @name Adding entries
+     */
+//@{
+    /**
+     * Add the element (<i>i,j</i>) to the sparsity pattern.
+     */
+    void add (const size_type i,
+              const size_type j);
+
+
+    /**
+     * Add several elements in one row to the sparsity pattern.
+     */
+    template <typename ForwardIterator>
+    void add_entries (const size_type  row,
+                      ForwardIterator  begin,
+                      ForwardIterator  end,
+                      const bool       indices_are_sorted = false);
+//@}
+    /**
+     * @name Access of underlying Trilinos data
+     */
+//@{
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_CrsGraph
+     * data that stores the sparsity pattern.
+     */
+    const Epetra_FECrsGraph &trilinos_sparsity_pattern () const;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the parallel partitioning of the domain space of this sparsity
+     * pattern, i.e., the partitioning of the vectors matrices based on this
+     * sparsity pattern are multiplied with.
+     *
+     * @deprecated Use locally_owned_domain_indices() instead.
+     */
+    const Epetra_Map &domain_partitioner () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the partitioning of the range space of this sparsity pattern,
+     * i.e., the partitioning of the vectors that are result from matrix-
+     * vector products.
+     *
+     * @deprecated Use locally_owned_range_indices() instead.
+     */
+    const Epetra_Map &range_partitioner () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the partitioning of the sparsity pattern rows. Equal to the
+     * partitioning of the range.
+     *
+     * @deprecated Use locally_owned_range_indices() instead.
+     */
+    const Epetra_Map &row_partitioner () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the partitioning of the sparsity pattern columns. This is in
+     * general not equal to the partitioner Epetra_Map for the domain because
+     * of overlap in the matrix.
+     *
+     * @deprecated Usually not necessary. If desired, access via the
+     * Epetra_FECrsGraph.
+     */
+    const Epetra_Map &col_partitioner () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return a const reference to the communicator used for this object.
+     *
+     * @deprecated Use get_mpi_communicator instead.
+     */
+    const Epetra_Comm &trilinos_communicator () const DEAL_II_DEPRECATED;
+
+    /**
+     * Return the MPI communicator object in use with this matrix.
+     */
+    MPI_Comm get_mpi_communicator () const;
+//@}
+
+    /**
+     * @name Partitioners
+     */
+//@{
+
+    /**
+     * Return the partitioning of the domain space of this pattern, i.e., the
+     * partitioning of the vectors a matrix based on this sparsity pattern has
+     * to be multiplied with.
+     */
+    IndexSet locally_owned_domain_indices() const;
+
+    /**
+     * Return the partitioning of the range space of this pattern, i.e., the
+     * partitioning of the vectors that are the result from matrix-vector
+     * products from a matrix based on this pattern.
+     */
+    IndexSet locally_owned_range_indices() const;
+
+//@}
+
+    /**
+     * @name Iterators
+     */
+//@{
+
+    /**
+     * Iterator starting at the first entry.
+     */
+    const_iterator begin () const;
+
+    /**
+     * Final iterator.
+     */
+    const_iterator end () const;
+
+    /**
+     * Iterator starting at the first entry of row @p r.
+     *
+     * Note that if the given row is empty, i.e. does not contain any nonzero
+     * entries, then the iterator returned by this function equals
+     * <tt>end(r)</tt>. Note also that the iterator may not be dereferencable
+     * in that case.
+     */
+    const_iterator begin (const size_type r) const;
+
+    /**
+     * Final iterator of row <tt>r</tt>. It points to the first element past
+     * the end of line @p r, or past the end of the entire sparsity pattern.
+     *
+     * Note that the end iterator is not necessarily dereferencable. This is
+     * in particular the case if it is the end iterator for the last row of a
+     * matrix.
+     */
+    const_iterator end (const size_type r) const;
+
+//@}
+    /**
+     * @name Input/Output
+     */
+//@{
+
+    /**
+     * Abstract Trilinos object that helps view in ASCII other Trilinos
+     * objects. Currently this function is not implemented.  TODO: Not
+     * implemented.
+     */
+    void write_ascii ();
+
+    /**
+     * Print (the locally owned part of) the sparsity pattern to the given
+     * stream, using the format <tt>(line,col)</tt>. The optional flag outputs
+     * the sparsity pattern in Trilinos style, where even the according
+     * processor number is printed to the stream, as well as a summary before
+     * actually writing the entries.
+     */
+    void print (std::ostream &out,
+                const bool    write_extended_trilinos_info = false) const;
+
+    /**
+     * Print the sparsity of the matrix in a format that <tt>gnuplot</tt>
+     * understands and which can be used to plot the sparsity pattern in a
+     * graphical way. The format consists of pairs <tt>i j</tt> of nonzero
+     * elements, each representing one entry of this matrix, one per line of
+     * the output file. Indices are counted from zero on, as usual. Since
+     * sparsity patterns are printed in the same way as matrices are
+     * displayed, we print the negative of the column index, which means that
+     * the <tt>(0,0)</tt> element is in the top left rather than in the bottom
+     * left corner.
+     *
+     * Print the sparsity pattern in gnuplot by setting the data style to dots
+     * or points and use the <tt>plot</tt> command.
+     */
+    void print_gnuplot (std::ostream &out) const;
+
+//@}
+    /**
+     * @addtogroup Exceptions
+     * @{
+     */
+    /**
+     * Exception
+     */
+    DeclException1 (ExcTrilinosError,
+                    int,
+                    << "An error with error number " << arg1
+                    << " occurred while calling a Trilinos function");
+
+    /**
+     * Exception
+     */
+    DeclException2 (ExcInvalidIndex,
+                    size_type, size_type,
+                    << "The entry with index <" << arg1 << ',' << arg2
+                    << "> does not exist.");
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcSourceEqualsDestination);
+
+    /**
+     * Exception
+     */
+    DeclException4 (ExcAccessToNonLocalElement,
+                    size_type, size_type, size_type, size_type,
+                    << "You tried to access element (" << arg1
+                    << "/" << arg2 << ")"
+                    << " of a distributed matrix, but only rows "
+                    << arg3 << " through " << arg4
+                    << " are stored locally and can be accessed.");
+
+    /**
+     * Exception
+     */
+    DeclException2 (ExcAccessToNonPresentElement,
+                    size_type, size_type,
+                    << "You tried to access element (" << arg1
+                    << "/" << arg2 << ")"
+                    << " of a sparse matrix, but it appears to not"
+                    << " exist in the Trilinos sparsity pattern.");
+    //@}
+  private:
+
+    /**
+     * Pointer to the user-supplied Epetra Trilinos mapping of the matrix
+     * columns that assigns parts of the matrix to the individual processes.
+     */
+    std_cxx11::shared_ptr<Epetra_Map> column_space_map;
+
+    /**
+     * A sparsity pattern object in Trilinos to be used for finite element
+     * based problems which allows for adding non-local elements to the
+     * pattern.
+     */
+    std_cxx11::shared_ptr<Epetra_FECrsGraph> graph;
+
+    /**
+     * A sparsity pattern object for the non-local part of the sparsity
+     * pattern that is going to be sent to the owning processor. Only used
+     * when the particular constructor or reinit method with writable_rows
+     * argument is set
+     */
+    std_cxx11::shared_ptr<Epetra_CrsGraph> nonlocal_graph;
+
+    friend class SparseMatrix;
+    friend class SparsityPatternIterators::Accessor;
+    friend class SparsityPatternIterators::Iterator;
+  };
+
+
+
+// -------------------------- inline and template functions ----------------------
+
+
+#ifndef DOXYGEN
+
+  namespace SparsityPatternIterators
+  {
+
+    inline
+    Accessor::Accessor (const SparsityPattern *sp,
+                        const size_type        row,
+                        const size_type        index)
+      :
+      sparsity_pattern(const_cast<SparsityPattern *>(sp)),
+      a_row(row),
+      a_index(index)
+    {
+      visit_present_row ();
+    }
+
+
+    inline
+    Accessor::Accessor (const Accessor &a)
+      :
+      sparsity_pattern(a.sparsity_pattern),
+      a_row(a.a_row),
+      a_index(a.a_index),
+      colnum_cache (a.colnum_cache)
+    {}
+
+
+    inline
+    Accessor::size_type
+    Accessor::row() const
+    {
+      Assert (a_row < sparsity_pattern->n_rows(), ExcBeyondEndOfSparsityPattern());
+      return a_row;
+    }
+
+
+
+    inline
+    Accessor::size_type
+    Accessor::column() const
+    {
+      Assert (a_row < sparsity_pattern->n_rows(), ExcBeyondEndOfSparsityPattern());
+      return (*colnum_cache)[a_index];
+    }
+
+
+
+    inline
+    Accessor::size_type
+    Accessor::index() const
+    {
+      Assert (a_row < sparsity_pattern->n_rows(), ExcBeyondEndOfSparsityPattern());
+      return a_index;
+    }
+
+
+
+    inline
+    Iterator::Iterator(const SparsityPattern *sp,
+                       const size_type        row,
+                       const size_type        index)
+      :
+      accessor(sp, row, index)
+    {}
+
+
+    inline
+    Iterator::Iterator(const Iterator &i)
+      :
+      accessor(i.accessor)
+    {}
+
+
+
+    inline
+    Iterator &
+    Iterator::operator++ ()
+    {
+      Assert (accessor.a_row < accessor.sparsity_pattern->n_rows(),
+              ExcIteratorPastEnd());
+
+      ++accessor.a_index;
+
+      // If at end of line: do one
+      // step, then cycle until we
+      // find a row with a nonzero
+      // number of entries.
+      if (accessor.a_index >= accessor.colnum_cache->size())
+        {
+          accessor.a_index = 0;
+          ++accessor.a_row;
+
+          while ((accessor.a_row < accessor.sparsity_pattern->n_rows())
+                 &&
+                 (accessor.sparsity_pattern->row_length(accessor.a_row) == 0))
+            ++accessor.a_row;
+
+          accessor.visit_present_row();
+        }
+      return *this;
+    }
+
+
+
+    inline
+    Iterator
+    Iterator::operator++ (int)
+    {
+      const Iterator old_state = *this;
+      ++(*this);
+      return old_state;
+    }
+
+
+
+    inline
+    const Accessor &
+    Iterator::operator* () const
+    {
+      return accessor;
+    }
+
+
+
+    inline
+    const Accessor *
+    Iterator::operator-> () const
+    {
+      return &accessor;
+    }
+
+
+
+    inline
+    bool
+    Iterator::operator == (const Iterator &other) const
+    {
+      return (accessor.a_row == other.accessor.a_row &&
+              accessor.a_index == other.accessor.a_index);
+    }
+
+
+
+    inline
+    bool
+    Iterator::operator != (const Iterator &other) const
+    {
+      return ! (*this == other);
+    }
+
+
+
+    inline
+    bool
+    Iterator::operator < (const Iterator &other) const
+    {
+      return (accessor.row() < other.accessor.row() ||
+              (accessor.row() == other.accessor.row() &&
+               accessor.index() < other.accessor.index()));
+    }
+
+  }
+
+
+
+  inline
+  SparsityPattern::const_iterator
+  SparsityPattern::begin() const
+  {
+    return const_iterator(this, 0, 0);
+  }
+
+
+
+  inline
+  SparsityPattern::const_iterator
+  SparsityPattern::end() const
+  {
+    return const_iterator(this, n_rows(), 0);
+  }
+
+
+
+  inline
+  SparsityPattern::const_iterator
+  SparsityPattern::begin(const size_type r) const
+  {
+    Assert (r < n_rows(), ExcIndexRangeType<size_type>(r, 0, n_rows()));
+    if (row_length(r) > 0)
+      return const_iterator(this, r, 0);
+    else
+      return end (r);
+  }
+
+
+
+  inline
+  SparsityPattern::const_iterator
+  SparsityPattern::end(const size_type r) const
+  {
+    Assert (r < n_rows(), ExcIndexRangeType<size_type>(r, 0, n_rows()));
+
+    // place the iterator on the first entry
+    // past this line, or at the end of the
+    // matrix
+    for (size_type i=r+1; i<n_rows(); ++i)
+      if (row_length(i) > 0)
+        return const_iterator(this, i, 0);
+
+    // if there is no such line, then take the
+    // end iterator of the matrix
+    return end();
+  }
+
+
+
+  inline
+  bool
+  SparsityPattern::in_local_range (const size_type index) const
+  {
+    TrilinosWrappers::types::int_type begin, end;
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    begin = graph->RowMap().MinMyGID();
+    end = graph->RowMap().MaxMyGID()+1;
+#else
+    begin = graph->RowMap().MinMyGID64();
+    end = graph->RowMap().MaxMyGID64()+1;
+#endif
+
+    return ((index >= static_cast<size_type>(begin)) &&
+            (index < static_cast<size_type>(end)));
+  }
+
+
+
+  inline
+  bool
+  SparsityPattern::is_compressed () const
+  {
+    return graph->Filled();
+  }
+
+
+
+  inline
+  bool
+  SparsityPattern::empty () const
+  {
+    return ((n_rows() == 0) && (n_cols() == 0));
+  }
+
+
+
+  inline
+  void
+  SparsityPattern::add (const size_type i,
+                        const size_type j)
+  {
+    add_entries (i, &j, &j+1);
+  }
+
+
+
+  template <typename ForwardIterator>
+  inline
+  void
+  SparsityPattern::add_entries (const size_type row,
+                                ForwardIterator begin,
+                                ForwardIterator end,
+                                const bool      /*indices_are_sorted*/)
+  {
+    if (begin == end)
+      return;
+
+    // verify that the size of the data type Trilinos expects matches that the
+    // iterator points to. we allow for some slippage between signed and
+    // unsigned and only compare that they are both either 32 or 64 bit. to
+    // write this test properly, not that we cannot compare the size of
+    // '*begin' because 'begin' may be an iterator and '*begin' may be an
+    // accessor class. consequently, we need to somehow get an actual value
+    // from it which we can by evaluating an expression such as when
+    // multiplying the value produced by 2
+    Assert (sizeof(TrilinosWrappers::types::int_type) ==
+            sizeof((*begin)*2),
+            ExcNotImplemented());
+
+    TrilinosWrappers::types::int_type *col_index_ptr =
+      (TrilinosWrappers::types::int_type *)(&*begin);
+    const int n_cols = static_cast<int>(end - begin);
+
+    int ierr;
+    if ( graph->RowMap().LID(static_cast<TrilinosWrappers::types::int_type>(row)) != -1)
+      ierr = graph->InsertGlobalIndices (row, n_cols, col_index_ptr);
+    else if (nonlocal_graph.get() != 0)
+      {
+        // this is the case when we have explicitly set the off-processor rows
+        // and want to create a separate matrix object for them (to retain
+        // thread-safety)
+        Assert (nonlocal_graph->RowMap().LID(static_cast<TrilinosWrappers::types::int_type>(row)) != -1,
+                ExcMessage("Attempted to write into off-processor matrix row "
+                           "that has not be specified as being writable upon "
+                           "initialization"));
+        ierr = nonlocal_graph->InsertGlobalIndices (row, n_cols, col_index_ptr);
+      }
+    else
+      ierr = graph->InsertGlobalIndices
+             (1, (TrilinosWrappers::types::int_type *)&row, n_cols, col_index_ptr);
+
+    AssertThrow (ierr >= 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  const Epetra_FECrsGraph &
+  SparsityPattern::trilinos_sparsity_pattern () const
+  {
+    return *graph;
+  }
+
+
+
+  inline
+  IndexSet
+  SparsityPattern::locally_owned_domain_indices () const
+  {
+    return IndexSet(static_cast<const Epetra_Map &>(graph->DomainMap()));
+  }
+
+
+
+  inline
+  IndexSet
+  SparsityPattern::locally_owned_range_indices () const
+  {
+    return IndexSet(static_cast<const Epetra_Map &>(graph->RangeMap()));
+  }
+
+#endif // DOXYGEN
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_WITH_TRILINOS
+
+
+/*--------------------   trilinos_sparsity_pattern.h     --------------------*/
+
+#endif
+/*--------------------   trilinos_sparsity_pattern.h     --------------------*/
diff --git a/include/deal.II/lac/trilinos_vector.h b/include/deal.II/lac/trilinos_vector.h
new file mode 100644
index 0000000..990311d
--- /dev/null
+++ b/include/deal.II/lac/trilinos_vector.h
@@ -0,0 +1,1046 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_vector_h
+#define dealii__trilinos_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/base/index_set.h>
+#  include <deal.II/base/utilities.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/vector.h>
+#  include <deal.II/lac/trilinos_vector_base.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include "Epetra_Map.h"
+#  include "Epetra_LocalMap.h"
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// forward declaration
+template <typename> class Vector;
+
+/**
+ * @addtogroup TrilinosWrappers
+ * @{
+ */
+namespace TrilinosWrappers
+{
+  class SparseMatrix;
+
+  namespace
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    // define a helper function that queries the global ID of local ID of
+    // an Epetra_BlockMap object  by calling either the 32- or 64-bit
+    // function necessary.
+    inline
+    int gid(const Epetra_BlockMap &map, int i)
+    {
+      return map.GID(i);
+    }
+#else
+    // define a helper function that queries the global ID of local ID of
+    // an Epetra_BlockMap object  by calling either the 32- or 64-bit
+    // function necessary.
+    inline
+    long long int gid(const Epetra_BlockMap &map, int i)
+    {
+      return map.GID64(i);
+    }
+#endif
+  }
+
+  /**
+   * Namespace for Trilinos vector classes that work in parallel over MPI.
+   * This namespace is restricted to vectors only, whereas matrices are always
+   * MPI based when run on more than one processor.
+   *
+   * @ingroup TrilinosWrappers
+   * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+   */
+  namespace MPI
+  {
+    class BlockVector;
+
+    /**
+     * This class implements a wrapper to use the Trilinos distributed vector
+     * class Epetra_FEVector. This class is derived from the
+     * TrilinosWrappers::VectorBase class and provides all functionality
+     * included there.
+     *
+     * Note that Trilinos only guarantees that operations do what you expect
+     * if the function @p GlobalAssemble has been called after vector assembly
+     * in order to distribute the data. This is necessary since some processes
+     * might have accumulated data of elements that are not owned by
+     * themselves, but must be sent to the owning process. In order to avoid
+     * using the wrong data, you need to call Vector::compress() before you
+     * actually use the vectors.
+     *
+     * <h3>Parallel communication model</h3>
+     *
+     * The parallel functionality of Trilinos is built on top of the Message
+     * Passing Interface (MPI). MPI's communication model is built on
+     * collective communications: if one process wants something from another,
+     * that other process has to be willing to accept this communication. A
+     * process cannot query data from another process by calling a remote
+     * function, without that other process expecting such a transaction. The
+     * consequence is that most of the operations in the base class of this
+     * class have to be called collectively. For example, if you want to
+     * compute the l2 norm of a parallel vector, @em all processes across
+     * which this vector is shared have to call the @p l2_norm function. If
+     * you don't do this, but instead only call the @p l2_norm function on one
+     * process, then the following happens: This one process will call one of
+     * the collective MPI functions and wait for all the other processes to
+     * join in on this. Since the other processes don't call this function,
+     * you will either get a time-out on the first process, or, worse, by the
+     * time the next a call to a Trilinos function generates an MPI message on
+     * the other processes, you will get a cryptic message that only a subset
+     * of processes attempted a communication. These bugs can be very hard to
+     * figure out, unless you are well-acquainted with the communication model
+     * of MPI, and know which functions may generate MPI messages.
+     *
+     * One particular case, where an MPI message may be generated unexpectedly
+     * is discussed below.
+     *
+     *
+     * <h3>Accessing individual elements of a vector</h3>
+     *
+     * Trilinos does of course allow read access to individual elements of a
+     * vector, but in the distributed case only to elements that are stored
+     * locally. We implement this through calls like <tt>d=vec(i)</tt>.
+     * However, if you access an element outside the locally stored range, an
+     * exception is generated.
+     *
+     * In contrast to read access, Trilinos (and the respective deal.II
+     * wrapper classes) allow to write (or add) to individual elements of
+     * vectors, even if they are stored on a different process. You can do
+     * this by writing into or adding to elements using the syntax
+     * <tt>vec(i)=d</tt> or <tt>vec(i)+=d</tt>, or similar operations. There
+     * is one catch, however, that may lead to very confusing error messages:
+     * Trilinos requires application programs to call the compress() function
+     * when they switch from performing a set of operations that add to
+     * elements, to performing a set of operations that write to elements. The
+     * reasoning is that all processes might accumulate addition operations to
+     * elements, even if multiple processes write to the same elements. By the
+     * time we call compress() the next time, all these additions are
+     * executed. However, if one process adds to an element, and another
+     * overwrites to it, the order of execution would yield non-deterministic
+     * behavior if we don't make sure that a synchronization with compress()
+     * happens in between.
+     *
+     * In order to make sure these calls to compress() happen at the
+     * appropriate time, the deal.II wrappers keep a state variable that store
+     * which is the presently allowed operation: additions or writes. If it
+     * encounters an operation of the opposite kind, it calls compress() and
+     * flips the state. This can sometimes lead to very confusing behavior, in
+     * code that may for example look like this:
+     *
+     * @code
+     * TrilinosWrappers::Vector vector;
+     * // do some write operations on the vector
+     * for (size_type i=0; i<vector->size(); ++i)
+     *   vector(i) = i;
+     *
+     *                   // do some additions to vector elements, but
+     *                   // only for some elements
+     *   for (size_type i=0; i<vector->size(); ++i)
+     *     if (some_condition(i) == true)
+     *       vector(i) += 1;
+     *
+     *                   // do another collective operation
+     *   const double norm = vector->l2_norm();
+     * @endcode
+     *
+     * This code can run into trouble: by the time we see the first addition
+     * operation, we need to flush the overwrite buffers for the vector, and
+     * the deal.II library will do so by calling compress(). However, it will
+     * only do so for all processes that actually do an addition -- if the
+     * condition is never true for one of the processes, then this one will
+     * not get to the actual compress() call, whereas all the other ones do.
+     * This gets us into trouble, since all the other processes hang in the
+     * call to flush the write buffers, while the one other process advances
+     * to the call to compute the l2 norm. At this time, you will get an error
+     * that some operation was attempted by only a subset of processes. This
+     * behavior may seem surprising, unless you know that write/addition
+     * operations on single elements may trigger this behavior.
+     *
+     * The problem described here may be avoided by placing additional calls
+     * to compress(), or making sure that all processes do the same type of
+     * operations at the same time, for example by placing zero additions if
+     * necessary.
+     *
+     *
+     * <h3>Ghost elements of vectors</h3>
+     *
+     * Parallel vectors come in two kinds: without and with ghost elements.
+     * Vectors without ghost elements uniquely partition the vector elements
+     * between processors: each vector entry has exactly one processor that
+     * owns it. For such vectors, you can read those elements that the
+     * processor you are currently on owns, and you can write into any element
+     * whether you own it or not: if you don't own it, the value written or
+     * added to a vector element will be shipped to the processor that owns
+     * this vector element the next time you call compress(), as described
+     * above.
+     *
+     * What we call a 'ghosted' vector (see
+     * @ref GlossGhostedVector "vectors with ghost elements"
+     * ) is simply a view of the parallel vector where the element
+     * distributions overlap. The 'ghosted' Trilinos vector in itself has no
+     * idea of which entries are ghosted and which are locally owned. In fact,
+     * a ghosted vector may not even store all of the elements a non- ghosted
+     * vector would store on the current processor.  Consequently, for
+     * Trilinos vectors, there is no notion of an 'owner' of vector elements
+     * in the way we have it in the the non-ghost case view.
+     *
+     * This explains why we do not allow writing into ghosted vectors on the
+     * Trilinos side: Who would be responsible for taking care of the
+     * duplicated entries, given that there is not such information as locally
+     * owned indices? In other words, since a processor doesn't know which
+     * other processors own an element, who would it send a value to if one
+     * were to write to it? The only possibility would be to send this
+     * information to <i>all</i> other processors, but that is clearly not
+     * practical. Thus, we only allow reading from ghosted vectors, which
+     * however we do very often.
+     *
+     * So how do you fill a ghosted vector if you can't write to it? This only
+     * happens through the assignment with a non-ghosted vector. It can go
+     * both ways (non-ghosted is assigned to a ghosted vector, and a ghosted
+     * vector is assigned to a non-ghosted one; the latter one typically only
+     * requires taking out the locally owned part as most often ghosted
+     * vectors store a superset of elements of non-ghosted ones). In general,
+     * you send data around with that operation and it all depends on the
+     * different views of the two vectors. Trilinos also allows you to get
+     * subvectors out of a big vector that way.
+     *
+     *
+     * <h3>Thread safety of Trilinos vectors</h3>
+     *
+     * When writing into Trilinos vectors from several threads in shared
+     * memory, several things must be kept in mind as there is no built-in
+     * locks in this class to prevent data races. Simultaneous access to the
+     * same vector entry at the same time results in data races and must be
+     * explicitly avoided by the user. However, it is possible to access
+     * <b>different</b> entries of the vector from several threads
+     * simultaneously when only one MPI process is present or the vector has
+     * been constructed with an additional index set for ghost entries in
+     * write mode.
+     *
+     * @ingroup TrilinosWrappers
+     * @ingroup Vectors
+     * @author Martin Kronbichler, Wolfgang Bangerth, 2008, 2009
+     */
+    class Vector : public VectorBase
+    {
+    public:
+      /**
+       * Declare type for container size.
+       */
+      typedef dealii::types::global_dof_index size_type;
+
+      /**
+       * A variable that indicates whether this vector supports distributed
+       * data storage. If true, then this vector also needs an appropriate
+       * compress() function that allows communicating recent set or add
+       * operations to individual elements to be communicated to other
+       * processors.
+       *
+       * For the current class, the variable equals true, since it does
+       * support parallel data storage.
+       */
+      static const bool supports_distributed_data = true;
+
+      /**
+       * @name Basic constructors and initialization.
+       */
+      //@{
+      /**
+       * Default constructor that generates an empty (zero size) vector. The
+       * function <tt>reinit()</tt> will have to give the vector the correct
+       * size and distribution among processes in case of an MPI run.
+       */
+      Vector ();
+
+      /**
+       * Copy constructor using the given vector.
+       */
+      Vector (const Vector &v);
+
+#ifdef DEAL_II_WITH_CXX11
+      /**
+       * Move constructor. Creates a new vector by stealing the internal data
+       * of the vector @p v.
+       *
+       * @note This constructor is only available if deal.II is configured
+       * with C++11 support.
+       */
+      Vector (Vector &&v);
+#endif
+
+      /**
+       * Destructor.
+       */
+      ~Vector ();
+
+      /**
+       * Reinit functionality. This function sets the calling vector to the
+       * dimension and the parallel distribution of the input vector, but does
+       * not copy the elements in <tt>v</tt>. If <tt>omit_zeroing_entries</tt>
+       * is not <tt>true</tt>, the elements in the vector are initialized with
+       * zero, otherwise the content will be left unchanged and the user has
+       * to set all elements.
+       *
+       * This function has a third argument, <tt>allow_different_maps</tt>,
+       * that allows for an exchange of data between two equal-sized vectors
+       * (but being distributed differently among the processors). A trivial
+       * application of this function is to generate a replication of a whole
+       * vector on each machine, when the calling vector is built according to
+       * the localized vector class TrilinosWrappers::Vector, and <tt>v</tt>
+       * is a distributed vector. In this case, the variable
+       * <tt>omit_zeroing_entries</tt> needs to be set to <tt>false</tt>,
+       * since it does not make sense to exchange data between differently
+       * parallelized vectors without touching the elements.
+       */
+      void reinit (const VectorBase &v,
+                   const bool        omit_zeroing_entries = false,
+                   const bool        allow_different_maps = false);
+
+      /**
+       * Create vector by merging components from a block vector.
+       */
+      void reinit (const BlockVector &v,
+                   const bool         import_data = false);
+
+      /**
+       * Set all components of the vector to the given number @p s. Simply
+       * pass this down to the base class, but we still need to declare this
+       * function to make the example given in the discussion about making the
+       * constructor explicit work.
+       */
+      Vector &operator= (const TrilinosScalar s);
+
+      /**
+       * Copy the given vector. Resize the present vector if necessary. In
+       * this case, also the Epetra_Map that designs the parallel partitioning
+       * is taken from the input vector.
+       */
+      Vector &operator= (const Vector &v);
+
+#ifdef DEAL_II_WITH_CXX11
+      /**
+       * Move the given vector. This operator replaces the present vector with
+       * @p v by efficiently swapping the internal data structures.
+       *
+       * @note This operator is only available if deal.II is configured with
+       * C++11 support.
+       */
+      Vector &operator= (Vector &&v);
+#endif
+
+      /**
+       * Copy operator from a given localized vector (present on all
+       * processes) in TrilinosWrappers format to the current distributed
+       * vector. This function assumes that the calling vector (left hand
+       * object) already is of the same size as the right hand side vector.
+       * Otherwise, an exception will be thrown.
+       */
+      Vector &operator= (const ::dealii::TrilinosWrappers::Vector &v);
+
+      /**
+       * Another copy function. This one takes a deal.II vector and copies it
+       * into a TrilinosWrapper vector. Note that since we do not provide any
+       * Epetra_map that tells about the partitioning of the vector among the
+       * MPI processes, the size of the TrilinosWrapper vector has to be the
+       * same as the size of the input vector. In order to change the map, use
+       * the reinit(const Epetra_Map &input_map) function.
+       */
+      template <typename Number>
+      Vector &operator= (const ::dealii::Vector<Number> &v);
+
+      /**
+       * This reinit function is meant to be used for parallel calculations
+       * where some non-local data has to be used. The typical situation where
+       * one needs this function is the call of the
+       * FEValues<dim>::get_function_values function (or of some derivatives)
+       * in parallel. Since it is usually faster to retrieve the data in
+       * advance, this function can be called before the assembly forks out to
+       * the different processors. What this function does is the following:
+       * It takes the information in the columns of the given matrix and looks
+       * which data couples between the different processors. That data is
+       * then queried from the input vector. Note that you should not write to
+       * the resulting vector any more, since the some data can be stored
+       * several times on different processors, leading to unpredictable
+       * results. In particular, such a vector cannot be used for matrix-
+       * vector products as for example done during the solution of linear
+       * systems.
+       */
+      void import_nonlocal_data_for_fe
+      (const dealii::TrilinosWrappers::SparseMatrix &matrix,
+       const Vector                                 &vector);
+//@}
+      /**
+       * @name Initialization with an Epetra_Map
+       */
+//@{
+      /**
+       * This constructor takes an Epetra_Map that already knows how to
+       * distribute the individual components among the MPI processors. Since
+       * it also includes information about the size of the vector, this is
+       * all we need to generate a parallel vector.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * This function is deprecated.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      explicit Vector (const Epetra_Map &parallel_partitioning)  DEAL_II_DEPRECATED;
+
+      /**
+       * Copy constructor from the TrilinosWrappers vector class. Since a
+       * vector of this class does not necessarily need to be distributed
+       * among processes, the user needs to supply us with an Epetra_Map that
+       * sets the partitioning details.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * This function is deprecated.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      Vector (const Epetra_Map &parallel_partitioning,
+              const VectorBase &v) DEAL_II_DEPRECATED;
+
+      /**
+       * Reinitialize from a deal.II vector. The Epetra_Map specifies the
+       * %parallel partitioning.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * This function is deprecated.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      template <typename number>
+      void reinit (const Epetra_Map             &parallel_partitioner,
+                   const dealii::Vector<number> &v) DEAL_II_DEPRECATED;
+
+      /**
+       * Reinit functionality. This function destroys the old vector content
+       * and generates a new one based on the input map.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * This function is deprecated.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      void reinit (const Epetra_Map &parallel_partitioning,
+                   const bool        omit_zeroing_entries = false) DEAL_II_DEPRECATED;
+
+      /**
+       * Copy-constructor from deal.II vectors. Sets the dimension to that of
+       * the given vector, and copies all elements.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * This function is deprecated.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      template <typename Number>
+      Vector (const Epetra_Map             &parallel_partitioning,
+              const dealii::Vector<Number> &v) DEAL_II_DEPRECATED;
+//@}
+      /**
+       * @name Initialization with an IndexSet
+       */
+//@{
+      /**
+       * This constructor takes an IndexSet that defines how to distribute the
+       * individual components among the MPI processors. Since it also
+       * includes information about the size of the vector, this is all we
+       * need to generate a %parallel vector.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      explicit Vector (const IndexSet &parallel_partitioning,
+                       const MPI_Comm &communicator = MPI_COMM_WORLD);
+
+      /**
+       * Creates a ghosted parallel vector.
+       *
+       * Depending on whether the @p ghost argument uniquely subdivides
+       * elements among processors or not, the resulting vector may or may not
+       * have ghost elements. See the general documentation of this class for
+       * more information.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      Vector (const IndexSet &local,
+              const IndexSet &ghost,
+              const MPI_Comm &communicator = MPI_COMM_WORLD);
+
+      /**
+       * Copy constructor from the TrilinosWrappers vector class. Since a
+       * vector of this class does not necessarily need to be distributed
+       * among processes, the user needs to supply us with an IndexSet and an
+       * MPI communicator that set the partitioning details.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      Vector (const IndexSet   &parallel_partitioning,
+              const VectorBase &v,
+              const MPI_Comm   &communicator = MPI_COMM_WORLD);
+
+      /**
+       * Copy-constructor from deal.II vectors. Sets the dimension to that of
+       * the given vector, and copies all the elements.
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      template <typename Number>
+      Vector (const IndexSet               &parallel_partitioning,
+              const dealii::Vector<Number> &v,
+              const MPI_Comm               &communicator = MPI_COMM_WORLD);
+
+      /**
+       * Reinit functionality. This function destroys the old vector content
+       * and generates a new one based on the input partitioning.  The flag
+       * <tt>omit_zeroing_entries</tt> determines whether the vector should be
+       * filled with zero (false) or left untouched (true).
+       *
+       *
+       * Depending on whether the @p parallel_partitioning argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      void reinit (const IndexSet &parallel_partitioning,
+                   const MPI_Comm &communicator = MPI_COMM_WORLD,
+                   const bool      omit_zeroing_entries = false);
+
+      /**
+       * Reinit functionality. This function destroys the old vector content
+       * and generates a new one based on the input partitioning. In addition
+       * to just specifying one index set as in all the other methods above,
+       * this method allows to supply an additional set of ghost entries.
+       * There are two different versions of a vector that can be created. If
+       * the flag @p vector_writable is set to @p false, the vector only
+       * allows read access to the joint set of @p parallel_partitioning and
+       * @p ghost_entries. The effect of the reinit method is then equivalent
+       * to calling the other reinit method with an index set containing both
+       * the locally owned entries and the ghost entries.
+       *
+       * If the flag @p vector_writable is set to true, this creates an
+       * alternative storage scheme for ghost elements that allows multiple
+       * threads to write into the vector (for the other reinit methods, only
+       * one thread is allowed to write into the ghost entries at a time).
+       *
+       * Depending on whether the @p ghost_entries argument uniquely
+       * subdivides elements among processors or not, the resulting vector may
+       * or may not have ghost elements. See the general documentation of this
+       * class for more information.
+       *
+       * @see
+       * @ref GlossGhostedVector "vectors with ghost elements"
+       */
+      void reinit (const IndexSet &locally_owned_entries,
+                   const IndexSet &ghost_entries,
+                   const MPI_Comm &communicator = MPI_COMM_WORLD,
+                   const bool      vector_writable = false);
+//@}
+    };
+
+
+
+
+// ------------------- inline and template functions --------------
+
+
+    /**
+     * Global function @p swap which overloads the default implementation of
+     * the C++ standard library which uses a temporary object. The function
+     * simply exchanges the data of the two vectors.
+     *
+     * @relates TrilinosWrappers::MPI::Vector
+     * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+     */
+    inline
+    void swap (Vector &u, Vector &v)
+    {
+      u.swap (v);
+    }
+
+
+#ifndef DOXYGEN
+
+    template <typename number>
+    Vector::Vector (const Epetra_Map             &input_map,
+                    const dealii::Vector<number> &v)
+    {
+      reinit (input_map, v);
+    }
+
+
+
+    template <typename number>
+    Vector::Vector (const IndexSet               &parallel_partitioner,
+                    const dealii::Vector<number> &v,
+                    const MPI_Comm               &communicator)
+    {
+      *this = Vector(parallel_partitioner.make_trilinos_map (communicator, true),
+                     v);
+    }
+
+
+
+
+    template <typename number>
+    void Vector::reinit (const Epetra_Map             &parallel_partitioner,
+                         const dealii::Vector<number> &v)
+    {
+      if (vector.get() == 0 || vector->Map().SameAs(parallel_partitioner) == false)
+        vector.reset (new Epetra_FEVector(parallel_partitioner));
+
+      has_ghosts = vector->Map().UniqueGIDs()==false;
+
+      const int size = parallel_partitioner.NumMyElements();
+
+      // Need to copy out values, since the deal.II might not use doubles, so
+      // that a direct access is not possible.
+      for (int i=0; i<size; ++i)
+        (*vector)[0][i] = v(gid(parallel_partitioner,i));
+    }
+
+
+    inline
+    Vector &
+    Vector::operator= (const TrilinosScalar s)
+    {
+      VectorBase::operator= (s);
+
+      return *this;
+    }
+
+
+    template <typename Number>
+    Vector &
+    Vector::operator= (const ::dealii::Vector<Number> &v)
+    {
+      if (size() != v.size())
+        {
+          vector.reset (new Epetra_FEVector(Epetra_Map
+                                            (static_cast<TrilinosWrappers::types::int_type>(v.size()), 0,
+#ifdef DEAL_II_WITH_MPI
+                                             Epetra_MpiComm(MPI_COMM_SELF)
+#else
+                                             Epetra_SerialComm()
+#endif
+                                            )));
+        }
+
+      reinit (vector_partitioner(), v);
+      return *this;
+    }
+
+
+#endif /* DOXYGEN */
+
+  } /* end of namespace MPI */
+
+
+
+  /**
+   * This class is a specialization of a Trilinos vector to a localized
+   * version. The purpose of this class is to provide a copy interface from
+   * the possibly parallel Vector class to a local vector on each processor,
+   * in order to be able to access all elements in the vector or to apply
+   * certain deal.II functions.
+   *
+   * This class is deprecated, use TrilinosWrappers::MPI::Vector instead.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Vectors
+   * @author Martin Kronbichler, 2008
+   */
+  class Vector : public VectorBase
+  {
+  public:
+    /**
+     * Declare type for container size.
+     */
+    typedef dealii::types::global_dof_index size_type;
+
+    /**
+     * A variable that indicates whether this vector supports distributed data
+     * storage. If true, then this vector also needs an appropriate compress()
+     * function that allows communicating recent set or add operations to
+     * individual elements to be communicated to other processors.
+     *
+     * For the current class, the variable equals false, since it does not
+     * support parallel data storage.  If you do need parallel data storage,
+     * use TrilinosWrappers::MPI::Vector.
+     */
+    static const bool supports_distributed_data = false;
+
+    /**
+     * Default constructor that generates an empty (zero size) vector. The
+     * function <tt>reinit()</tt> will have to give the vector the correct
+     * size.
+     */
+    Vector () DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor takes as input the number of elements in the vector.
+     */
+    explicit Vector (const size_type n) DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor takes as input the number of elements in the vector.
+     * If the map is not localized, i.e., if there are some elements that are
+     * not present on all processes, only the global size of the map will be
+     * taken and a localized map will be generated internally. In other words,
+     * which element of the @p partitioning argument are set is in fact
+     * ignored, the only thing that matters is the size of the index space
+     * described by this argument.
+     */
+    explicit Vector (const Epetra_Map &partitioning) DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor takes as input the number of elements in the vector.
+     * If the index set is not localized, i.e., if there are some elements
+     * that are not present on all processes, only the global size of the
+     * index set will be taken and a localized version will be generated
+     * internally. In other words, which element of the @p partitioning
+     * argument are set is in fact ignored, the only thing that matters is the
+     * size of the index space described by this argument.
+     */
+    explicit Vector (const IndexSet &partitioning,
+                     const MPI_Comm &communicator = MPI_COMM_WORLD) DEAL_II_DEPRECATED;
+
+    /**
+     * This constructor takes a (possibly parallel) Trilinos Vector and
+     * generates a localized version of the whole content on each processor.
+     */
+    explicit Vector (const VectorBase &V) DEAL_II_DEPRECATED;
+
+    /**
+     * Copy-constructor from deal.II vectors. Sets the dimension to that of
+     * the given vector, and copies all elements.
+     */
+    template <typename Number>
+    explicit Vector (const dealii::Vector<Number> &v) DEAL_II_DEPRECATED;
+
+    /**
+     * Reinit function that resizes the vector to the size specified by
+     * <tt>n</tt>.
+     */
+    void reinit (const size_type n,
+                 const bool      omit_zeroing_entries = false);
+
+    /**
+     * Initialization with an Epetra_Map. Similar to the call in the other
+     * class MPI::Vector, with the difference that now a copy on all processes
+     * is generated. This initialization function is appropriate when the data
+     * in the localized vector should be imported from a distributed vector
+     * that has been initialized with the same communicator. The variable
+     * <tt>omit_zeroing_entries</tt> determines whether the vector should be
+     * filled with zero or left untouched.
+     *
+     * Which element of the @p input_map argument are set is in fact ignored,
+     * the only thing that matters is the size of the index space described by
+     * this argument.
+     */
+    void reinit (const Epetra_Map &input_map,
+                 const bool        omit_zeroing_entries = false);
+
+    /**
+     * Initialization with an IndexSet. Similar to the call in the other class
+     * MPI::Vector, with the difference that now a copy on all processes is
+     * generated. This initialization function is appropriate in case the data
+     * in the localized vector should be imported from a distributed vector
+     * that has been initialized with the same communicator. The variable
+     * <tt>omit_zeroing_entries</tt> determines whether the vector should be
+     * filled with zero (false) or left untouched (true).
+     *
+     * Which element of the @p input_map argument are set is in fact ignored,
+     * the only thing that matters is the size of the index space described by
+     * this argument.
+     */
+    void reinit (const IndexSet   &input_map,
+                 const MPI_Comm   &communicator = MPI_COMM_WORLD,
+                 const bool        omit_zeroing_entries = false);
+
+    /**
+     * Reinit function. Takes the information of a Vector and copies
+     * everything to the calling vector, now also allowing different maps.
+     */
+    void reinit (const VectorBase &V,
+                 const bool        omit_zeroing_entries = false,
+                 const bool        allow_different_maps = false);
+
+    /**
+     * Set all components of the vector to the given number @p s. Simply pass
+     * this down to the base class, but we still need to declare this function
+     * to make the example given in the discussion about making the
+     * constructor explicit work.
+     */
+    Vector &operator= (const TrilinosScalar s);
+
+    /**
+     * Sets the left hand argument to the (parallel) Trilinos Vector.
+     * Equivalent to the @p reinit function.
+     */
+    Vector &operator= (const MPI::Vector &v);
+
+    /**
+     * Sets the left hand argument to the deal.II vector.
+     */
+    template <typename Number>
+    Vector &operator= (const ::dealii::Vector<Number> &v);
+
+    /**
+     * Copy operator. Copies both the dimension and the content in the right
+     * hand argument.
+     */
+    Vector &operator= (const Vector &v);
+
+    /**
+     * This function does nothing but is there for compatibility with the @p
+     * PETScWrappers::Vector class.
+     *
+     * For the PETSc vector wrapper class, this function updates the ghost
+     * values of the PETSc vector. This is necessary after any modification
+     * before reading ghost values.
+     *
+     * However, for the implementation of this class, it is immaterial and
+     * thus an empty function.
+     */
+    void update_ghost_values () const;
+  };
+
+
+
+// ------------------- inline and template functions --------------
+
+
+  /**
+   * Global function @p swap which overloads the default implementation of the
+   * C++ standard library which uses a temporary object. The function simply
+   * exchanges the data of the two vectors.
+   *
+   * @relates TrilinosWrappers::Vector
+   * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+   */
+  inline
+  void swap (Vector &u, Vector &v)
+  {
+    u.swap (v);
+  }
+
+
+#ifndef DOXYGEN
+
+  template <typename number>
+  Vector::Vector (const dealii::Vector<number> &v)
+  {
+    Epetra_LocalMap map ((TrilinosWrappers::types::int_type)v.size(), 0, Utilities::Trilinos::comm_self());
+    vector.reset (new Epetra_FEVector(map));
+    *this = v;
+  }
+
+
+
+  inline
+  Vector &
+  Vector::operator= (const TrilinosScalar s)
+  {
+    VectorBase::operator= (s);
+
+    return *this;
+  }
+
+
+
+  template <typename Number>
+  Vector &
+  Vector::operator= (const ::dealii::Vector<Number> &v)
+  {
+    if (size() != v.size())
+      {
+        vector.reset();
+
+        Epetra_LocalMap map ((TrilinosWrappers::types::int_type)v.size(), 0,
+                             Utilities::Trilinos::comm_self());
+        vector.reset (new Epetra_FEVector(map));
+      }
+
+    const Epetra_Map &map = vector_partitioner();
+    const TrilinosWrappers::types::int_type size = map.NumMyElements();
+
+    Assert (map.MaxLID() == size-1,
+            ExcDimensionMismatch(map.MaxLID(), size-1));
+
+    // Need to copy out values, since the
+    // deal.II might not use doubles, so
+    // that a direct access is not possible.
+    for (TrilinosWrappers::types::int_type i=0; i<size; ++i)
+      (*vector)[0][i] = v(i);
+
+    return *this;
+  }
+
+
+
+  inline
+  void
+  Vector::update_ghost_values () const
+  {}
+
+
+#endif
+
+} /* namespace TrilinosWrappers */
+
+/*@}*/
+
+
+namespace internal
+{
+  namespace LinearOperator
+  {
+    template <typename> class ReinitHelper;
+
+    /**
+     * A helper class internally used in linear_operator.h. Specialization for
+     * TrilinosWrappers::MPI::Vector.
+     */
+    template<>
+    class ReinitHelper<TrilinosWrappers::MPI::Vector>
+    {
+    public:
+      template <typename Matrix>
+      static
+      void reinit_range_vector (const Matrix &matrix,
+                                TrilinosWrappers::MPI::Vector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.locally_owned_range_indices(), matrix.get_mpi_communicator(), omit_zeroing_entries);
+      }
+
+      template <typename Matrix>
+      static
+      void reinit_domain_vector(const Matrix &matrix,
+                                TrilinosWrappers::MPI::Vector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.locally_owned_domain_indices(), matrix.get_mpi_communicator(), omit_zeroing_entries);
+      }
+    };
+
+    /**
+     * A helper class internally used in linear_operator.h. Specialization for
+     * TrilinosWrappers::Vector.
+     */
+    template<>
+    class ReinitHelper<TrilinosWrappers::Vector>
+    {
+    public:
+      template <typename Matrix>
+      static
+      void reinit_range_vector (const Matrix &matrix,
+                                TrilinosWrappers::Vector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.locally_owned_range_indices(),
+                 matrix.get_mpi_communicator(),
+                 omit_zeroing_entries);
+      }
+
+      template <typename Matrix>
+      static
+      void reinit_domain_vector(const Matrix &matrix,
+                                TrilinosWrappers::Vector &v,
+                                bool omit_zeroing_entries)
+      {
+        v.reinit(matrix.locally_owned_domain_indices(),
+                 matrix.get_mpi_communicator(),
+                 omit_zeroing_entries);
+      }
+    };
+
+  } /* namespace LinearOperator */
+} /* namespace internal */
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
+
+/*----------------------------   trilinos_vector.h     ---------------------------*/
+
+#endif
+/*----------------------------   trilinos_vector.h     ---------------------------*/
diff --git a/include/deal.II/lac/trilinos_vector_base.h b/include/deal.II/lac/trilinos_vector_base.h
new file mode 100644
index 0000000..e2722eb
--- /dev/null
+++ b/include/deal.II/lac/trilinos_vector_base.h
@@ -0,0 +1,2019 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__trilinos_vector_base_h
+#define dealii__trilinos_vector_base_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#include <deal.II/base/utilities.h>
+#  include <deal.II/base/std_cxx11/shared_ptr.h>
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/vector.h>
+
+#  include <vector>
+#  include <utility>
+#  include <memory>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  define TrilinosScalar double
+#  include "Epetra_ConfigDefs.h"
+#  ifdef DEAL_II_WITH_MPI // only if MPI is installed
+#    include "mpi.h"
+#    include "Epetra_MpiComm.h"
+#  else
+#    include "Epetra_SerialComm.h"
+#  endif
+#  include "Epetra_FEVector.h"
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declaration
+template <typename number> class Vector;
+
+
+/**
+ * @addtogroup TrilinosWrappers
+ * @{
+ */
+namespace TrilinosWrappers
+{
+  // forward declaration
+  class VectorBase;
+
+  /**
+   * @cond internal
+   */
+
+  /**
+   * A namespace for internal implementation details of the TrilinosWrapper
+   * members.
+   *
+   * @ingroup TrilinosWrappers
+   */
+  namespace internal
+  {
+    /**
+     * Declare type for container size.
+     */
+    typedef dealii::types::global_dof_index size_type;
+
+    /**
+     * This class implements a wrapper for accessing the Trilinos vector in
+     * the same way as we access deal.II objects: it is initialized with a
+     * vector and an element within it, and has a conversion operator to
+     * extract the scalar value of this element. It also has a variety of
+     * assignment operator for writing to this one element.
+     *
+     * @ingroup TrilinosWrappers
+     */
+    class VectorReference
+    {
+    private:
+      /**
+       * Constructor. It is made private so as to only allow the actual vector
+       * class to create it.
+       */
+      VectorReference (VectorBase     &vector,
+                       const size_type  index);
+
+    public:
+
+      /**
+       * This looks like a copy operator, but does something different than
+       * usual. In particular, it does not copy the member variables of this
+       * reference. Rather, it handles the situation where we have two vectors
+       * @p v and @p w, and assign elements like in <tt>v(i)=w(i)</tt>. Here,
+       * both left and right hand side of the assignment have data type
+       * VectorReference, but what we really mean is to assign the vector
+       * elements represented by the two references. This operator implements
+       * this operation. Note also that this allows us to make the assignment
+       * operator const.
+       */
+      const VectorReference &
+      operator = (const VectorReference &r) const;
+
+      /**
+       * Same as above but for non-const reference objects.
+       */
+      const VectorReference &
+      operator = (const VectorReference &r);
+
+      /**
+       * Set the referenced element of the vector to <tt>s</tt>.
+       */
+      const VectorReference &
+      operator = (const TrilinosScalar &s) const;
+
+      /**
+       * Add <tt>s</tt> to the referenced element of the vector->
+       */
+      const VectorReference &
+      operator += (const TrilinosScalar &s) const;
+
+      /**
+       * Subtract <tt>s</tt> from the referenced element of the vector->
+       */
+      const VectorReference &
+      operator -= (const TrilinosScalar &s) const;
+
+      /**
+       * Multiply the referenced element of the vector by <tt>s</tt>.
+       */
+      const VectorReference &
+      operator *= (const TrilinosScalar &s) const;
+
+      /**
+       * Divide the referenced element of the vector by <tt>s</tt>.
+       */
+      const VectorReference &
+      operator /= (const TrilinosScalar &s) const;
+
+      /**
+       * Convert the reference to an actual value, i.e. return the value of
+       * the referenced element of the vector.
+       */
+      operator TrilinosScalar () const;
+
+      /**
+       * Exception
+       */
+      DeclException1 (ExcTrilinosError,
+                      int,
+                      << "An error with error number " << arg1
+                      << " occurred while calling a Trilinos function");
+
+    private:
+      /**
+       * Point to the vector we are referencing.
+       */
+      VectorBase   &vector;
+
+      /**
+       * Index of the referenced element of the vector.
+       */
+      const size_type  index;
+
+      /**
+       * Make the vector class a friend, so that it can create objects of the
+       * present type.
+       */
+      friend class ::dealii::TrilinosWrappers::VectorBase;
+    };
+  }
+  /**
+   * @endcond
+   */
+
+
+  /**
+   * Base class for the two types of Trilinos vectors, the distributed memory
+   * vector MPI::Vector and a localized vector Vector. The latter is designed
+   * for use in either serial implementations or as a localized copy on each
+   * processor.  The implementation of this class is based on the Trilinos
+   * vector class Epetra_FEVector, the (parallel) partitioning of which is
+   * governed by an Epetra_Map. This means that the vector type is generic and
+   * can be done in this base class, while the definition of the partition map
+   * (and hence, the constructor and reinit function) will have to be done in
+   * the derived classes. The Epetra_FEVector is precisely the kind of vector
+   * we deal with all the time - we probably get it from some assembly
+   * process, where also entries not locally owned might need to written and
+   * hence need to be forwarded to the owner. The only requirement for this
+   * class to work is that Trilinos is installed with the same compiler as is
+   * used for compilation of deal.II.
+   *
+   * The interface of this class is modeled after the existing Vector class in
+   * deal.II. It has almost the same member functions, and is often
+   * exchangeable. However, since Trilinos only supports a single scalar type
+   * (double), it is not templated, and only works with that type.
+   *
+   * Note that Trilinos only guarantees that operations do what you expect if
+   * the function @p GlobalAssemble has been called after vector assembly in
+   * order to distribute the data. Therefore, you need to call
+   * Vector::compress() before you actually use the vectors.
+   *
+   * @ingroup TrilinosWrappers
+   * @ingroup Vectors
+   * @author Martin Kronbichler, 2008
+   */
+  class VectorBase : public Subscriptor
+  {
+  public:
+    /**
+     * Declare some of the standard types used in all containers. These types
+     * parallel those in the <tt>C</tt> standard libraries
+     * <tt>vector<...></tt> class.
+     */
+    typedef TrilinosScalar                  value_type;
+    typedef TrilinosScalar                  real_type;
+    typedef dealii::types::global_dof_index size_type;
+    typedef value_type                     *iterator;
+    typedef const value_type               *const_iterator;
+    typedef internal::VectorReference       reference;
+    typedef const internal::VectorReference const_reference;
+
+    /**
+     * @name 1: Basic Object-handling
+     */
+    //@{
+
+    /**
+     * Default constructor that generates an empty (zero size) vector. The
+     * function <tt>reinit()</tt> will have to give the vector the correct
+     * size and distribution among processes in case of an MPI run.
+     */
+    VectorBase ();
+
+    /**
+     * Copy constructor. Sets the dimension to that of the given vector, and
+     * copies all the elements.
+     */
+    VectorBase (const VectorBase &v);
+
+    /**
+     * Destructor
+     */
+    virtual ~VectorBase ();
+
+    /**
+     * Release all memory and return to a state just like after having called
+     * the default constructor.
+     */
+    void clear ();
+
+    /**
+     * Reinit functionality, sets the dimension and possibly the parallel
+     * partitioning (Epetra_Map) of the calling vector to the settings of the
+     * input vector.
+     */
+    void reinit (const VectorBase &v,
+                 const bool        omit_zeroing_entries = false);
+
+    /**
+     * Compress the underlying representation of the Trilinos object, i.e.
+     * flush the buffers of the vector object if it has any. This function is
+     * necessary after writing into a vector element-by-element and before
+     * anything else can be done on it.
+     *
+     * The (defaulted) argument can be used to specify the compress mode
+     * (<code>Add</code> or <code>Insert</code>) in case the vector has not
+     * been written to since the last time this function was called. The
+     * argument is ignored if the vector has been added or written to since
+     * the last time compress() was called.
+     *
+     * See
+     * @ref GlossCompress "Compressing distributed objects"
+     * for more information.
+     */
+    void compress (::dealii::VectorOperation::values operation);
+
+    /**
+     * Returns the state of the vector, i.e., whether compress() has already
+     * been called after an operation requiring data exchange.
+     *
+     * This function is deprecated.
+     */
+    bool is_compressed () const DEAL_II_DEPRECATED;
+
+    /**
+     * Set all components of the vector to the given number @p s. Simply pass
+     * this down to the Trilinos Epetra object, but we still need to declare
+     * this function to make the example given in the discussion about making
+     * the constructor explicit work.
+     *
+     * Since the semantics of assigning a scalar to a vector are not
+     * immediately clear, this operator should really only be used if you want
+     * to set the entire vector to zero. This allows the intuitive notation
+     * <tt>v=0</tt>. Assigning other values is deprecated and may be
+     * disallowed in the future.
+     */
+    VectorBase &
+    operator = (const TrilinosScalar s);
+
+    /**
+     * Copy function. This function takes a VectorBase vector and copies all
+     * the elements. The target vector will have the same parallel
+     * distribution as the calling vector.
+     */
+    VectorBase &
+    operator = (const VectorBase &v);
+
+    /**
+     * Another copy function. This one takes a deal.II vector and copies it
+     * into a TrilinosWrapper vector. Note that since we do not provide any
+     * Epetra_map that tells about the partitioning of the vector among the
+     * MPI processes, the size of the TrilinosWrapper vector has to be the
+     * same as the size of the input vector. In order to change the map, use
+     * the reinit(const Epetra_Map &input_map) function.
+     */
+    template <typename Number>
+    VectorBase &
+    operator = (const ::dealii::Vector<Number> &v);
+
+    /**
+     * Test for equality. This function assumes that the present vector and
+     * the one to compare with have the same size already, since comparing
+     * vectors of different sizes makes not much sense anyway.
+     */
+    bool operator == (const VectorBase &v) const;
+
+    /**
+     * Test for inequality. This function assumes that the present vector and
+     * the one to compare with have the same size already, since comparing
+     * vectors of different sizes makes not much sense anyway.
+     */
+    bool operator != (const VectorBase &v) const;
+
+    /**
+     * Return the global dimension of the vector.
+     */
+    size_type size () const;
+
+    /**
+     * Return the local dimension of the vector, i.e. the number of elements
+     * stored on the present MPI process. For sequential vectors, this number
+     * is the same as size(), but for parallel vectors it may be smaller.
+     *
+     * To figure out which elements exactly are stored locally, use
+     * local_range().
+     *
+     * If the vector contains ghost elements, they are included in this
+     * number.
+     */
+    size_type local_size () const;
+
+    /**
+     * Return a pair of indices indicating which elements of this vector are
+     * stored locally. The first number is the index of the first element
+     * stored, the second the index of the one past the last one that is
+     * stored locally. If this is a sequential vector, then the result will be
+     * the pair <code>(0,N)</code>, otherwise it will be a pair
+     * <code>(i,i+n)</code>, where <code>n=local_size()</code> and
+     * <code>i</code> is the first element of the vector stored on this
+     * processor, corresponding to the half open interval $[i,i+n)$
+     *
+     * @note The description above is true most of the time, but not always.
+     * In particular, Trilinos vectors need not store contiguous ranges of
+     * elements such as $[i,i+n)$. Rather, it can store vectors where the
+     * elements are distributed in an arbitrary way across all processors and
+     * each processor simply stores a particular subset, not necessarily
+     * contiguous. In this case, this function clearly makes no sense since it
+     * could, at best, return a range that includes all elements that are
+     * stored locally. Thus, the function only succeeds if the locally stored
+     * range is indeed contiguous. It will trigger an assertion if the local
+     * portion of the vector is not contiguous.
+     */
+    std::pair<size_type, size_type> local_range () const;
+
+    /**
+     * Return whether @p index is in the local range or not, see also
+     * local_range().
+     *
+     * @note The same limitation for the applicability of this function
+     * applies as listed in the documentation of local_range().
+     */
+    bool in_local_range (const size_type index) const;
+
+    /**
+     * Return an index set that describes which elements of this vector are
+     * owned by the current processor. Note that this index set does not
+     * include elements this vector may store locally as ghost elements but
+     * that are in fact owned by another processor. As a consequence, the
+     * index sets returned on different processors if this is a distributed
+     * vector will form disjoint sets that add up to the complete index set.
+     * Obviously, if a vector is created on only one processor, then the
+     * result would satisfy
+     * @code
+     *   vec.locally_owned_elements() == complete_index_set (vec.size())
+     * @endcode
+     */
+    IndexSet locally_owned_elements () const;
+
+    /**
+     * Return if the vector contains ghost elements. This answer is true if
+     * there are ghost elements on at least one process.
+     *
+     * @see
+     * @ref GlossGhostedVector "vectors with ghost elements"
+     */
+    bool has_ghost_elements() const;
+
+    /**
+     * Return the scalar (inner) product of two vectors. The vectors must have
+     * the same size.
+     */
+    TrilinosScalar operator * (const VectorBase &vec) const;
+
+    /**
+     * Return square of the $l_2$-norm.
+     */
+    real_type norm_sqr () const;
+
+    /**
+     * Mean value of the elements of this vector.
+     */
+    TrilinosScalar mean_value () const;
+
+    /**
+     * Compute the minimal value of the elements of this vector.
+     *
+     * This function is deprecated use min() instead.
+     */
+    TrilinosScalar minimal_value () const DEAL_II_DEPRECATED;
+
+    /**
+     * Compute the minimal value of the elements of this vector.
+     */
+    TrilinosScalar min () const;
+
+    /**
+     * Compute the maximal value of the elements of this vector.
+     */
+    TrilinosScalar max () const;
+
+    /**
+     * $l_1$-norm of the vector.  The sum of the absolute values.
+     */
+    real_type l1_norm () const;
+
+    /**
+     * $l_2$-norm of the vector.  The square root of the sum of the squares of
+     * the elements.
+     */
+    real_type l2_norm () const;
+
+    /**
+     * $l_p$-norm of the vector. The <i>p</i>th root of the sum of the
+     * <i>p</i>th powers of the absolute values of the elements.
+     */
+    real_type lp_norm (const TrilinosScalar p) const;
+
+    /**
+     * Maximum absolute value of the elements.
+     */
+    real_type linfty_norm () const;
+
+    /**
+     * Performs a combined operation of a vector addition and a subsequent
+     * inner product, returning the value of the inner product. In other
+     * words, the result of this function is the same as if the user called
+     * @code
+     * this->add(a, V);
+     * return_value = *this * W;
+     * @endcode
+     *
+     * The reason this function exists is for compatibility with deal.II's own
+     * vector classes which can implement this functionality with less memory
+     * transfer. However, for Trilinos vectors such a combined operation is
+     * not natively supported and thus the cost is completely equivalent as
+     * calling the two methods separately.
+     */
+    TrilinosScalar add_and_dot (const TrilinosScalar a,
+                                const VectorBase    &V,
+                                const VectorBase    &W);
+
+    /**
+     * Return whether the vector contains only elements with value zero. This
+     * is a collective operation. This function is expensive, because
+     * potentially all elements have to be checked.
+     */
+    bool all_zero () const;
+
+    /**
+     * Return @p true if the vector has no negative entries, i.e. all entries
+     * are zero or positive. This function is used, for example, to check
+     * whether refinement indicators are really all positive (or zero).
+     */
+    bool is_non_negative () const;
+    //@}
+
+
+    /**
+     * @name 2: Data-Access
+     */
+    //@{
+
+    /**
+     * Provide access to a given element, both read and write.
+     *
+     * When using a vector distributed with MPI, this operation only makes
+     * sense for elements that are actually present on the calling processor.
+     * Otherwise, an exception is thrown. This is different from the
+     * <code>el()</code> function below that always succeeds (but returns zero
+     * on non-local elements).
+     */
+    reference
+    operator () (const size_type index);
+
+    /**
+     * Provide read-only access to an element.
+     *
+     * When using a vector distributed with MPI, this operation only makes
+     * sense for elements that are actually present on the calling processor.
+     * Otherwise, an exception is thrown. This is different from the
+     * <code>el()</code> function below that always succeeds (but returns zero
+     * on non-local elements).
+     */
+    TrilinosScalar
+    operator () (const size_type index) const;
+
+    /**
+     * Provide access to a given element, both read and write.
+     *
+     * Exactly the same as operator().
+     */
+    reference
+    operator [] (const size_type index);
+
+    /**
+     * Provide read-only access to an element.
+     *
+     * Exactly the same as operator().
+     */
+    TrilinosScalar
+    operator [] (const size_type index) const;
+
+    /**
+     * A collective get operation: instead of getting individual elements of a
+     * vector, this function allows to get a whole set of elements at once.
+     * The indices of the elements to be read are stated in the first
+     * argument, the corresponding values are returned in the second.
+     */
+    void extract_subvector_to (const std::vector<size_type> &indices,
+                               std::vector<TrilinosScalar> &values) const;
+
+    /**
+     * Just as the above, but with pointers.  Useful in minimizing copying of
+     * data around.
+     */
+    template <typename ForwardIterator, typename OutputIterator>
+    void extract_subvector_to (ForwardIterator          indices_begin,
+                               const ForwardIterator    indices_end,
+                               OutputIterator           values_begin) const;
+
+    /**
+     * Return the value of the vector entry <i>i</i>. Note that this function
+     * does only work properly when we request a data stored on the local
+     * processor. In case the elements sits on another process, this function
+     * returns 0 which might or might not be appropriate in a given situation.
+     * If you rely on consistent results, use the access functions () or []
+     * that throw an assertion in case a non-local element is used.
+     *
+     * This function is deprecated.
+     */
+    TrilinosScalar el (const size_type index) const DEAL_II_DEPRECATED;
+
+    /**
+     * Make the Vector class a bit like the <tt>vector<></tt> class of the C++
+     * standard library by returning iterators to the start and end of the
+     * locally owned elements of this vector. The ordering of local elements
+     * corresponds to the one given by the global indices in case the vector
+     * is constructed from an IndexSet or other methods in deal.II (note that
+     * an Epetra_Map can contain elements in arbitrary orders, though).
+     *
+     * It holds that end() - begin() == local_size().
+     */
+    iterator begin ();
+
+    /**
+     * Return constant iterator to the start of the locally owned elements of
+     * the vector.
+     */
+    const_iterator begin () const;
+
+    /**
+     * Return an iterator pointing to the element past the end of the array of
+     * locally owned entries.
+     */
+    iterator end ();
+
+    /**
+     * Return a constant iterator pointing to the element past the end of the
+     * array of the locally owned entries.
+     */
+    const_iterator end () const;
+
+    //@}
+
+
+    /**
+     * @name 3: Modification of vectors
+     */
+    //@{
+
+    /**
+     * A collective set operation: instead of setting individual elements of a
+     * vector, this function allows to set a whole set of elements at once.
+     * The indices of the elements to be set are stated in the first argument,
+     * the corresponding values in the second.
+     */
+    void set (const std::vector<size_type>    &indices,
+              const std::vector<TrilinosScalar>  &values);
+
+    /**
+     * This is a second collective set operation. As a difference, this
+     * function takes a deal.II vector of values.
+     */
+    void set (const std::vector<size_type>        &indices,
+              const ::dealii::Vector<TrilinosScalar> &values);
+
+    /**
+     * This collective set operation is of lower level and can handle anything
+     * else — the only thing you have to provide is an address where all
+     * the indices are stored and the number of elements to be set.
+     */
+    void set (const size_type       n_elements,
+              const size_type      *indices,
+              const TrilinosScalar *values);
+
+    /**
+     * A collective add operation: This function adds a whole set of values
+     * stored in @p values to the vector components specified by @p indices.
+     */
+    void add (const std::vector<size_type>      &indices,
+              const std::vector<TrilinosScalar> &values);
+
+    /**
+     * This is a second collective add operation. As a difference, this
+     * function takes a deal.II vector of values.
+     */
+    void add (const std::vector<size_type>           &indices,
+              const ::dealii::Vector<TrilinosScalar> &values);
+
+    /**
+     * Take an address where <tt>n_elements</tt> are stored contiguously and
+     * add them into the vector. Handles all cases which are not covered by
+     * the other two <tt>add()</tt> functions above.
+     */
+    void add (const size_type       n_elements,
+              const size_type      *indices,
+              const TrilinosScalar *values);
+
+    /**
+     * Multiply the entire vector by a fixed factor.
+     */
+    VectorBase &operator *= (const TrilinosScalar factor);
+
+    /**
+     * Divide the entire vector by a fixed factor.
+     */
+    VectorBase &operator /= (const TrilinosScalar factor);
+
+    /**
+     * Add the given vector to the present one.
+     */
+    VectorBase &operator += (const VectorBase &V);
+
+    /**
+     * Subtract the given vector from the present one.
+     */
+    VectorBase &operator -= (const VectorBase &V);
+
+    /**
+     * Addition of @p s to all components. Note that @p s is a scalar and not
+     * a vector.
+     */
+    void add (const TrilinosScalar s);
+
+    /**
+     * Simple vector addition, equal to the <tt>operator +=</tt>.
+     *
+     * Though, if the second argument <tt>allow_different_maps</tt> is set,
+     * then it is possible to add data from a vector that uses a different
+     * map, i.e., a vector whose elements are split across processors
+     * differently. This may include vectors with ghost elements, for example.
+     * In general, however, adding vectors with a different element-to-
+     * processor map requires communicating data among processors and,
+     * consequently, is a slower operation than when using vectors using the
+     * same map.
+     */
+    void add (const VectorBase &V,
+              const bool        allow_different_maps = false);
+
+    /**
+     * Simple addition of a multiple of a vector, i.e. <tt>*this += a*V</tt>.
+     */
+    void add (const TrilinosScalar  a,
+              const VectorBase     &V);
+
+    /**
+     * Multiple addition of scaled vectors, i.e. <tt>*this += a*V + b*W</tt>.
+     */
+    void add (const TrilinosScalar  a,
+              const VectorBase     &V,
+              const TrilinosScalar  b,
+              const VectorBase     &W);
+
+    /**
+     * Scaling and simple vector addition, i.e.  <tt>*this = s*(*this) +
+     * V</tt>.
+     */
+    void sadd (const TrilinosScalar  s,
+               const VectorBase     &V);
+
+    /**
+     * Scaling and simple addition, i.e.  <tt>*this = s*(*this) + a*V</tt>.
+     */
+    void sadd (const TrilinosScalar  s,
+               const TrilinosScalar  a,
+               const VectorBase     &V);
+
+    /**
+     * Scaling and multiple addition.
+     *
+     * This function is deprecated.
+     */
+    void sadd (const TrilinosScalar  s,
+               const TrilinosScalar  a,
+               const VectorBase     &V,
+               const TrilinosScalar  b,
+               const VectorBase     &W) DEAL_II_DEPRECATED;
+
+    /**
+     * Scaling and multiple addition.  <tt>*this = s*(*this) + a*V + b*W +
+     * c*X</tt>.
+     *
+     * This function is deprecated.
+     */
+    void sadd (const TrilinosScalar  s,
+               const TrilinosScalar  a,
+               const VectorBase     &V,
+               const TrilinosScalar  b,
+               const VectorBase     &W,
+               const TrilinosScalar  c,
+               const VectorBase     &X) DEAL_II_DEPRECATED;
+
+    /**
+     * Scale each element of this vector by the corresponding element in the
+     * argument. This function is mostly meant to simulate multiplication (and
+     * immediate re-assignment) by a diagonal scaling matrix.
+     */
+    void scale (const VectorBase &scaling_factors);
+
+    /**
+     * Assignment <tt>*this = a*V</tt>.
+     */
+    void equ (const TrilinosScalar  a,
+              const VectorBase     &V);
+
+    /**
+     * Assignment <tt>*this = a*V + b*W</tt>.
+     *
+     * This function is deprecated.
+     */
+    void equ (const TrilinosScalar  a,
+              const VectorBase     &V,
+              const TrilinosScalar  b,
+              const VectorBase     &W) DEAL_II_DEPRECATED;
+
+    /**
+     * Compute the elementwise ratio of the two given vectors, that is let
+     * <tt>this[i] = a[i]/b[i]</tt>. This is useful for example if you want to
+     * compute the cellwise ratio of true to estimated error.
+     *
+     * This vector is appropriately scaled to hold the result.
+     *
+     * If any of the <tt>b[i]</tt> is zero, the result is undefined. No
+     * attempt is made to catch such situations.
+     */
+    void ratio (const VectorBase &a,
+                const VectorBase &b) DEAL_II_DEPRECATED;
+    //@}
+
+
+    /**
+     * @name 4: Mixed stuff
+     */
+    //@{
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_MultiVector
+     * class.
+     */
+    const Epetra_MultiVector &trilinos_vector () const;
+
+    /**
+     * Return a (modifyable) reference to the underlying Trilinos
+     * Epetra_FEVector class.
+     */
+    Epetra_FEVector &trilinos_vector ();
+
+    /**
+     * Return a const reference to the underlying Trilinos Epetra_Map that
+     * sets the parallel partitioning of the vector.
+     */
+    const Epetra_Map &vector_partitioner () const;
+
+    /**
+     * Output of vector in user-defined format in analogy to the
+     * dealii::Vector class.
+     *
+     * This function is deprecated.
+     */
+    void print (const char *format = 0) const DEAL_II_DEPRECATED;
+
+    /**
+     * Print to a stream. @p precision denotes the desired precision with
+     * which values shall be printed, @p scientific whether scientific
+     * notation shall be used. If @p across is @p true then the vector is
+     * printed in a line, while if @p false then the elements are printed on a
+     * separate line each.
+     */
+    void print (std::ostream       &out,
+                const unsigned int  precision  = 3,
+                const bool          scientific = true,
+                const bool          across     = true) const;
+
+    /**
+     * Swap the contents of this vector and the other vector @p v. One could
+     * do this operation with a temporary variable and copying over the data
+     * elements, but this function is significantly more efficient since it
+     * only swaps the pointers to the data of the two vectors and therefore
+     * does not need to allocate temporary storage and move data around. Note
+     * that the vectors need to be of the same size and base on the same map.
+     *
+     * This function is analog to the the @p swap function of all C standard
+     * containers. Also, there is a global function <tt>swap(u,v)</tt> that
+     * simply calls <tt>u.swap(v)</tt>, again in analogy to standard
+     * functions.
+     */
+    void swap (VectorBase &v);
+
+    /**
+     * Estimate for the memory consumption in bytes.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * Return a reference to the MPI communicator object in use with this
+     * object.
+     */
+    const MPI_Comm &get_mpi_communicator () const;
+    //@}
+
+    /**
+     * Exception
+     */
+    DeclException0 (ExcDifferentParallelPartitioning);
+
+    /**
+     * Exception
+     */
+    DeclException1 (ExcTrilinosError,
+                    int,
+                    << "An error with error number " << arg1
+                    << " occurred while calling a Trilinos function");
+
+    /**
+     * Exception
+     */
+    DeclException4 (ExcAccessToNonLocalElement,
+                    size_type, size_type, size_type, size_type,
+                    << "You tried to access element " << arg1
+                    << " of a distributed vector, but this element is not stored "
+                    << "on the current processor. Note: There are "
+                    << arg2 << " elements stored "
+                    << "on the current processor from within the range "
+                    << arg3 << " through " << arg4
+                    << " but Trilinos vectors need not store contiguous "
+                    << "ranges on each processor, and not every element in "
+                    << "this range may in fact be stored locally.");
+
+
+  private:
+    /**
+     * Trilinos doesn't allow to mix additions to matrix entries and
+     * overwriting them (to make synchronisation of parallel computations
+     * simpler). The way we do it is to, for each access operation, store
+     * whether it is an insertion or an addition. If the previous one was of
+     * different type, then we first have to flush the Trilinos buffers;
+     * otherwise, we can simply go on.  Luckily, Trilinos has an object for
+     * this which does already all the parallel communications in such a case,
+     * so we simply use their model, which stores whether the last operation
+     * was an addition or an insertion.
+     */
+    Epetra_CombineMode last_action;
+
+    /**
+     * A boolean variable to hold information on whether the vector is
+     * compressed or not.
+     */
+    bool compressed;
+
+    /**
+     * Whether this vector has ghost elements. This is true on all processors
+     * even if only one of them has any ghost elements.
+     */
+    bool has_ghosts;
+
+    /**
+     * Pointer to the actual Epetra vector object. This may represent a vector
+     * that is in fact distributed among multiple processors. The object
+     * requires an existing Epetra_Map for storing data when setting it up.
+     */
+    std_cxx11::shared_ptr<Epetra_FEVector> vector;
+
+    /**
+     * A vector object in Trilinos to be used for collecting the non-local
+     * elements if the vector was constructed with an additional IndexSet
+     * describing ghost elements.
+     */
+    std_cxx11::shared_ptr<Epetra_MultiVector> nonlocal_vector;
+
+    /**
+     * Make the reference class a friend.
+     */
+    friend class internal::VectorReference;
+    friend class Vector;
+    friend class MPI::Vector;
+  };
+
+
+
+
+// ------------------- inline and template functions --------------
+
+  /**
+   * Global function swap which overloads the default implementation of the C
+   * standard library which uses a temporary object. The function simply
+   * exchanges the data of the two vectors.
+   *
+   * @relates TrilinosWrappers::VectorBase
+   * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+   */
+  inline
+  void swap (VectorBase &u, VectorBase &v)
+  {
+    u.swap (v);
+  }
+
+
+#ifndef DOXYGEN
+
+  namespace internal
+  {
+    inline
+    VectorReference::VectorReference (VectorBase      &vector,
+                                      const size_type  index)
+      :
+      vector (vector),
+      index (index)
+    {}
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const VectorReference &r) const
+    {
+      // as explained in the class
+      // documentation, this is not the copy
+      // operator. so simply pass on to the
+      // "correct" assignment operator
+      *this = static_cast<TrilinosScalar> (r);
+
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const VectorReference &r)
+    {
+      // as above
+      *this = static_cast<TrilinosScalar> (r);
+
+      return *this;
+    }
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const TrilinosScalar &value) const
+    {
+      vector.set (1, &index, &value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator += (const TrilinosScalar &value) const
+    {
+      vector.add (1, &index, &value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator -= (const TrilinosScalar &value) const
+    {
+      TrilinosScalar new_value = -value;
+      vector.add (1, &index, &new_value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator *= (const TrilinosScalar &value) const
+    {
+      TrilinosScalar new_value = static_cast<TrilinosScalar>(*this) * value;
+      vector.set (1, &index, &new_value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator /= (const TrilinosScalar &value) const
+    {
+      TrilinosScalar new_value = static_cast<TrilinosScalar>(*this) / value;
+      vector.set (1, &index, &new_value);
+      return *this;
+    }
+  }
+
+
+
+  inline
+  bool
+  VectorBase::is_compressed () const
+  {
+    return compressed;
+  }
+
+
+
+  inline
+  bool
+  VectorBase::in_local_range (const size_type index) const
+  {
+    std::pair<size_type, size_type> range = local_range();
+
+    return ((index >= range.first) && (index <  range.second));
+  }
+
+
+
+  inline
+  IndexSet
+  VectorBase::locally_owned_elements() const
+  {
+    IndexSet is (size());
+
+    // easy case: local range is contiguous
+    if (vector->Map().LinearMap())
+      {
+        const std::pair<size_type, size_type> x = local_range();
+        is.add_range (x.first, x.second);
+      }
+    else if (vector->Map().NumMyElements() > 0)
+      {
+        const size_type n_indices = vector->Map().NumMyElements();
+#ifndef DEAL_II_WITH_64BIT_INDICES
+        unsigned int *vector_indices = (unsigned int *)vector->Map().MyGlobalElements();
+#else
+        size_type *vector_indices = (size_type *)vector->Map().MyGlobalElements64();
+#endif
+        is.add_indices(vector_indices, vector_indices+n_indices);
+        is.compress();
+      }
+
+    return is;
+  }
+
+
+
+  inline
+  bool
+  VectorBase::has_ghost_elements() const
+  {
+    return has_ghosts;
+  }
+
+
+
+  inline
+  internal::VectorReference
+  VectorBase::operator () (const size_type index)
+  {
+    return internal::VectorReference (*this, index);
+  }
+
+
+
+  inline
+  internal::VectorReference
+  VectorBase::operator [] (const size_type index)
+  {
+    return operator() (index);
+  }
+
+
+  inline
+  TrilinosScalar
+  VectorBase::operator [] (const size_type index) const
+  {
+    return operator() (index);
+  }
+
+
+
+  inline
+  void VectorBase::extract_subvector_to (const std::vector<size_type> &indices,
+                                         std::vector<TrilinosScalar>  &values) const
+  {
+    for (size_type i = 0; i < indices.size(); ++i)
+      values[i] = operator()(indices[i]);
+  }
+
+
+
+  template <typename ForwardIterator, typename OutputIterator>
+  inline
+  void VectorBase::extract_subvector_to (ForwardIterator          indices_begin,
+                                         const ForwardIterator    indices_end,
+                                         OutputIterator           values_begin) const
+  {
+    while (indices_begin != indices_end)
+      {
+        *values_begin = operator()(*indices_begin);
+        indices_begin++;
+        values_begin++;
+      }
+  }
+
+
+
+  inline
+  VectorBase::iterator
+  VectorBase::begin()
+  {
+    return (*vector)[0];
+  }
+
+
+
+  inline
+  VectorBase::iterator
+  VectorBase::end()
+  {
+    return (*vector)[0]+local_size();
+  }
+
+
+
+  inline
+  VectorBase::const_iterator
+  VectorBase::begin() const
+  {
+    return (*vector)[0];
+  }
+
+
+
+  inline
+  VectorBase::const_iterator
+  VectorBase::end() const
+  {
+    return (*vector)[0]+local_size();
+  }
+
+
+
+  inline
+  void
+  VectorBase::reinit (const VectorBase &v,
+                      const bool        omit_zeroing_entries)
+  {
+    Assert (vector.get() != 0,
+            ExcMessage("Vector has not been constructed properly."));
+
+    if (omit_zeroing_entries == false ||
+        vector_partitioner().SameAs(v.vector_partitioner())==false)
+      vector.reset (new Epetra_FEVector(*v.vector));
+
+    if (v.nonlocal_vector.get() != 0)
+      nonlocal_vector.reset(new Epetra_MultiVector(v.nonlocal_vector->Map(), 1));
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator = (const TrilinosScalar s)
+  {
+    AssertIsFinite(s);
+
+    const int ierr = vector->PutScalar(s);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    if (nonlocal_vector.get() != 0)
+      nonlocal_vector->PutScalar(0.);
+
+    return *this;
+  }
+
+
+
+  inline
+  void
+  VectorBase::set (const std::vector<size_type>      &indices,
+                   const std::vector<TrilinosScalar>  &values)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    set (indices.size(), &indices[0], &values[0]);
+  }
+
+
+
+  inline
+  void
+  VectorBase::set (const std::vector<size_type>           &indices,
+                   const ::dealii::Vector<TrilinosScalar> &values)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    set (indices.size(), &indices[0], values.begin());
+  }
+
+
+
+  inline
+  void
+  VectorBase::set (const size_type       n_elements,
+                   const size_type      *indices,
+                   const TrilinosScalar *values)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    if (last_action == Add)
+      vector->GlobalAssemble(Add);
+
+    if (last_action != Insert)
+      last_action = Insert;
+
+    for (size_type i=0; i<n_elements; ++i)
+      {
+        const size_type row = indices[i];
+        const TrilinosWrappers::types::int_type local_row = vector->Map().LID(static_cast<TrilinosWrappers::types::int_type>(row));
+        if (local_row != -1)
+          (*vector)[0][local_row] = values[i];
+        else
+          {
+            const int ierr = vector->ReplaceGlobalValues (1,
+                                                          (const TrilinosWrappers::types::int_type *)(&row),
+                                                          &values[i]);
+            AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+            compressed = false;
+          }
+        // in set operation, do not use the pre-allocated vector for nonlocal
+        // entries even if it exists. This is to ensure that we really only
+        // set the elements touched by the set() method and not all contained
+        // in the nonlocal entries vector (there is no way to distinguish them
+        // on the receiving processor)
+      }
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const std::vector<size_type>      &indices,
+                   const std::vector<TrilinosScalar>  &values)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    add (indices.size(), &indices[0], &values[0]);
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const std::vector<size_type>           &indices,
+                   const ::dealii::Vector<TrilinosScalar> &values)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    add (indices.size(), &indices[0], values.begin());
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const size_type       n_elements,
+                   const size_type      *indices,
+                   const TrilinosScalar *values)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    if (last_action != Add)
+      {
+        if (last_action == Insert)
+          vector->GlobalAssemble(Insert);
+        last_action = Add;
+      }
+
+    for (size_type i=0; i<n_elements; ++i)
+      {
+        const size_type row = indices[i];
+        const TrilinosWrappers::types::int_type local_row = vector->Map().LID(static_cast<TrilinosWrappers::types::int_type>(row));
+        if (local_row != -1)
+          (*vector)[0][local_row] += values[i];
+        else if (nonlocal_vector.get() == 0)
+          {
+            const int ierr = vector->SumIntoGlobalValues (1,
+                                                          (const TrilinosWrappers::types::int_type *)(&row),
+                                                          &values[i]);
+            AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+            compressed = false;
+          }
+        else
+          {
+            // use pre-allocated vector for non-local entries if it exists for
+            // addition operation
+            const TrilinosWrappers::types::int_type my_row = nonlocal_vector->Map().LID(static_cast<TrilinosWrappers::types::int_type>(row));
+            Assert(my_row != -1,
+                   ExcMessage("Attempted to write into off-processor vector entry "
+                              "that has not be specified as being writable upon "
+                              "initialization"));
+            (*nonlocal_vector)[0][my_row] += values[i];
+            compressed = false;
+          }
+      }
+  }
+
+
+
+  inline
+  VectorBase::size_type
+  VectorBase::size () const
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    return (size_type) (vector->Map().MaxAllGID() + 1 - vector->Map().MinAllGID());
+#else
+    return (size_type) (vector->Map().MaxAllGID64() + 1 - vector->Map().MinAllGID64());
+#endif
+  }
+
+
+
+  inline
+  VectorBase::size_type
+  VectorBase::local_size () const
+  {
+    return (size_type) vector->Map().NumMyElements();
+  }
+
+
+
+  inline
+  std::pair<VectorBase::size_type, VectorBase::size_type>
+  VectorBase::local_range () const
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    const TrilinosWrappers::types::int_type begin = vector->Map().MinMyGID();
+    const TrilinosWrappers::types::int_type end = vector->Map().MaxMyGID()+1;
+#else
+    const TrilinosWrappers::types::int_type begin = vector->Map().MinMyGID64();
+    const TrilinosWrappers::types::int_type end = vector->Map().MaxMyGID64()+1;
+#endif
+
+    Assert (end-begin == vector->Map().NumMyElements(),
+            ExcMessage ("This function only makes sense if the elements that this "
+                        "vector stores on the current processor form a contiguous range. "
+                        "This does not appear to be the case for the current vector."));
+
+    return std::make_pair (begin, end);
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::operator * (const VectorBase &vec) const
+  {
+    Assert (vector->Map().SameAs(vec.vector->Map()),
+            ExcDifferentParallelPartitioning());
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar result;
+
+    const int ierr = vector->Dot(*(vec.vector), &result);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return result;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::norm_sqr () const
+  {
+    const TrilinosScalar d = l2_norm();
+    return d*d;
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::mean_value () const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar mean;
+    const int ierr = vector->MeanValue (&mean);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return mean;
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::minimal_value () const
+  {
+    return min();
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::min () const
+  {
+    TrilinosScalar min_value;
+    const int ierr = vector->MinValue (&min_value);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return min_value;
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::max () const
+  {
+    TrilinosScalar max_value;
+    const int ierr = vector->MaxValue (&max_value);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return max_value;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::l1_norm () const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar d;
+    const int ierr = vector->Norm1 (&d);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return d;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::l2_norm () const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar d;
+    const int ierr = vector->Norm2 (&d);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return d;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::lp_norm (const TrilinosScalar p) const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar norm = 0;
+    TrilinosScalar sum=0;
+    const size_type n_local = local_size();
+
+    // loop over all the elements because
+    // Trilinos does not support lp norms
+    for (size_type i=0; i<n_local; i++)
+      sum += std::pow(std::fabs((*vector)[0][i]), p);
+
+    norm = std::pow(sum, static_cast<TrilinosScalar>(1./p));
+
+    return norm;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::linfty_norm () const
+  {
+    // while we disallow the other
+    // norm operations on ghosted
+    // vectors, this particular norm
+    // is safe to run even in the
+    // presence of ghost elements
+    TrilinosScalar d;
+    const int ierr = vector->NormInf (&d);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return d;
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::add_and_dot (const TrilinosScalar a,
+                           const VectorBase &V,
+                           const VectorBase &W)
+  {
+    this->add(a, V);
+    return *this * W;
+  }
+
+
+
+  // inline also scalar products, vector
+  // additions etc. since they are all
+  // representable by a single Trilinos
+  // call. This reduces the overhead of the
+  // wrapper class.
+  inline
+  VectorBase &
+  VectorBase::operator *= (const TrilinosScalar a)
+  {
+    AssertIsFinite(a);
+
+    const int ierr = vector->Scale(a);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator /= (const TrilinosScalar a)
+  {
+    AssertIsFinite(a);
+
+    const TrilinosScalar factor = 1./a;
+
+    AssertIsFinite(factor);
+
+    const int ierr = vector->Scale(factor);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator += (const VectorBase &v)
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+    Assert (vector->Map().SameAs(v.vector->Map()),
+            ExcDifferentParallelPartitioning());
+
+    const int ierr = vector->Update (1.0, *(v.vector), 1.0);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator -= (const VectorBase &v)
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+    Assert (vector->Map().SameAs(v.vector->Map()),
+            ExcDifferentParallelPartitioning());
+
+    const int ierr = vector->Update (-1.0, *(v.vector), 1.0);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const TrilinosScalar s)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(s);
+
+    size_type n_local = local_size();
+    for (size_type i=0; i<n_local; i++)
+      (*vector)[0][i] += s;
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const TrilinosScalar  a,
+                   const VectorBase     &v)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+
+    AssertIsFinite(a);
+
+    const int ierr = vector->Update(a, *(v.vector), 1.);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const TrilinosScalar  a,
+                   const VectorBase     &v,
+                   const TrilinosScalar  b,
+                   const VectorBase     &w)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+    Assert (local_size() == w.local_size(),
+            ExcDimensionMismatch(local_size(), w.local_size()));
+
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+
+    const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), 1.);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const VectorBase     &v)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (size() == v.size(),
+            ExcDimensionMismatch (size(), v.size()));
+
+    AssertIsFinite(s);
+
+    // We assume that the vectors have the same Map
+    // if the local size is the same and if the vectors are not ghosted
+    if (local_size() == v.local_size() && !v.has_ghost_elements())
+      {
+        Assert (this->vector->Map().SameAs(v.vector->Map())==true,
+                ExcDifferentParallelPartitioning());
+        const int ierr = vector->Update(1., *(v.vector), s);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+    else
+      {
+        (*this) *= s;
+        this->add(v, true);
+      }
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const TrilinosScalar  a,
+                    const VectorBase     &v)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (size() == v.size(),
+            ExcDimensionMismatch (size(), v.size()));
+    AssertIsFinite(s);
+    AssertIsFinite(a);
+
+    // We assume that the vectors have the same Map
+    // if the local size is the same and if the vectors are not ghosted
+    if (local_size() == v.local_size() && !v.has_ghost_elements())
+      {
+        Assert (this->vector->Map().SameAs(v.vector->Map())==true,
+                ExcDifferentParallelPartitioning());
+        const int ierr = vector->Update(a, *(v.vector), s);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+    else
+      {
+        (*this) *= s;
+        VectorBase tmp = v;
+        tmp *= a;
+        this->add(tmp, true);
+      }
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const TrilinosScalar  a,
+                    const VectorBase     &v,
+                    const TrilinosScalar  b,
+                    const VectorBase     &w)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (size() == v.size(),
+            ExcDimensionMismatch (size(), v.size()));
+    Assert (size() == w.size(),
+            ExcDimensionMismatch (size(), w.size()));
+    AssertIsFinite(s);
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+
+    // We assume that the vectors have the same Map
+    // if the local size is the same and if the vectors are not ghosted
+    if (local_size() == v.local_size() && !v.has_ghost_elements() &&
+        local_size() == w.local_size() && !w.has_ghost_elements())
+      {
+        Assert (this->vector->Map().SameAs(v.vector->Map())==true,
+                ExcDifferentParallelPartitioning());
+        Assert (this->vector->Map().SameAs(w.vector->Map())==true,
+                ExcDifferentParallelPartitioning());
+        const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), s);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+    else
+      {
+        this->sadd( s, a, v);
+        this->sadd(1., b, w);
+      }
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const TrilinosScalar  a,
+                    const VectorBase     &v,
+                    const TrilinosScalar  b,
+                    const VectorBase     &w,
+                    const TrilinosScalar  c,
+                    const VectorBase     &x)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (size() == v.size(),
+            ExcDimensionMismatch (size(), v.size()));
+    Assert (size() == w.size(),
+            ExcDimensionMismatch (size(), w.size()));
+    Assert (size() == x.size(),
+            ExcDimensionMismatch (size(), x.size()));
+    AssertIsFinite(s);
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+    AssertIsFinite(c);
+
+    // We assume that the vectors have the same Map
+    // if the local size is the same and if the vectors are not ghosted
+    if (local_size() == v.local_size() && !v.has_ghost_elements() &&
+        local_size() == w.local_size() && !w.has_ghost_elements() &&
+        local_size() == x.local_size() && !x.has_ghost_elements())
+      {
+        Assert (this->vector->Map().SameAs(v.vector->Map())==true,
+                ExcDifferentParallelPartitioning());
+        Assert (this->vector->Map().SameAs(w.vector->Map())==true,
+                ExcDifferentParallelPartitioning());
+        Assert (this->vector->Map().SameAs(x.vector->Map())==true,
+                ExcDifferentParallelPartitioning());
+
+        // Update member can only
+        // input two other vectors so
+        // do it in two steps
+        const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), s);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        const int jerr = vector->Update(c, *(x.vector), 1.);
+        Assert (jerr == 0, ExcTrilinosError(jerr));
+        (void)jerr; // removes -Wunused-parameter warning in optimized mode
+      }
+    else
+      {
+        this->sadd( s, a, v);
+        this->sadd(1., b, w);
+        this->sadd(1., c, x);
+      }
+  }
+
+
+
+  inline
+  void
+  VectorBase::scale (const VectorBase &factors)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == factors.local_size(),
+            ExcDimensionMismatch(local_size(), factors.local_size()));
+
+    const int ierr = vector->Multiply (1.0, *(factors.vector), *vector, 0.0);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::equ (const TrilinosScalar  a,
+                   const VectorBase     &v)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(a);
+
+    // If we don't have the same map, copy.
+    if (vector->Map().SameAs(v.vector->Map())==false)
+      {
+        this->sadd(0., a, v);
+      }
+    else
+      {
+        // Otherwise, just update
+        int ierr = vector->Update(a, *v.vector, 0.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Zero;
+      }
+
+  }
+
+
+
+  inline
+  void
+  VectorBase::ratio (const VectorBase &v,
+                     const VectorBase &w)
+  {
+    Assert (v.local_size() == w.local_size(),
+            ExcDimensionMismatch (v.local_size(), w.local_size()));
+    Assert (local_size() == w.local_size(),
+            ExcDimensionMismatch (local_size(), w.local_size()));
+
+    const int ierr = vector->ReciprocalMultiply(1.0, *(w.vector),
+                                                *(v.vector), 0.0);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  const Epetra_MultiVector &
+  VectorBase::trilinos_vector () const
+  {
+    return static_cast<const Epetra_MultiVector &>(*vector);
+  }
+
+
+
+  inline
+  Epetra_FEVector &
+  VectorBase::trilinos_vector ()
+  {
+    return *vector;
+  }
+
+
+
+  inline
+  const Epetra_Map &
+  VectorBase::vector_partitioner () const
+  {
+    return static_cast<const Epetra_Map &>(vector->Map());
+  }
+
+
+
+  inline
+  const MPI_Comm &
+  VectorBase::get_mpi_communicator () const
+  {
+    static MPI_Comm comm;
+
+#ifdef DEAL_II_WITH_MPI
+
+    const Epetra_MpiComm *mpi_comm
+      = dynamic_cast<const Epetra_MpiComm *>(&vector->Map().Comm());
+    comm = mpi_comm->Comm();
+
+#else
+
+    comm = MPI_COMM_SELF;
+
+#endif
+
+    return comm;
+  }
+
+
+
+#endif // DOXYGEN
+
+}
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
+
+/*----------------------------   trilinos_vector_base.h     ---------------------------*/
+
+#endif
+/*----------------------------   trilinos_vector_base.h     ---------------------------*/
diff --git a/include/deal.II/lac/vector.h b/include/deal.II/lac/vector.h
new file mode 100644
index 0000000..3754326
--- /dev/null
+++ b/include/deal.II/lac/vector.h
@@ -0,0 +1,1465 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__vector_h
+#define dealii__vector_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/index_set.h>
+#include <boost/serialization/array.hpp>
+#include <boost/serialization/split_member.hpp>
+
+#include <cstdio>
+#include <iostream>
+#include <cstring>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+#ifdef DEAL_II_WITH_PETSC
+namespace PETScWrappers
+{
+  class Vector;
+  namespace MPI
+  {
+    class Vector;
+  }
+}
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+namespace TrilinosWrappers
+{
+  namespace MPI
+  {
+    class Vector;
+  }
+  class Vector;
+}
+#endif
+
+template<typename number> class LAPACKFullMatrix;
+
+template <typename> class BlockVector;
+
+template <typename> class VectorView;
+
+
+
+
+/*! @addtogroup Vectors
+ *@{
+ */
+
+/**
+ * This enum keeps track of the current operation in parallel linear algebra
+ * objects like Vectors and Matrices.
+ *
+ * It is used in the various compress() functions. They also exist in serial
+ * codes for compatibility and are empty there.
+ *
+ * See
+ * @ref GlossCompress "Compressing distributed objects"
+ * for more information.
+ */
+struct VectorOperation
+{
+  enum values { unknown, insert, add };
+};
+
+
+/**
+ * Numerical vector of data.  For this class there are different types of
+ * functions available. The first type of function initializes the vector,
+ * changes its size, or computes the norm of the vector in order to measure
+ * its length in a suitable norm. The second type helps us to manipulate the
+ * components of the vector. The third type defines the algebraic operations
+ * for vectors, while the last type defines a few input and output functions.
+ * As opposed to the array of the C++ standard library called @p vector (with
+ * a lowercase "v"), this class implements an element of a vector space
+ * suitable for numerical computations.
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@>,
+ * @<double@>, @<long double@>, @<std::complex@<float@>@>,
+ * @<std::complex@<double@>@>, @<std::complex@<long double@>@></tt>; others
+ * can be generated in application programs (see the section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Guido Kanschat, Franz-Theo Suttmeier, Wolfgang Bangerth
+ */
+template <typename Number>
+class Vector : public Subscriptor
+{
+public:
+  /**
+   * Declare standard types used in all containers. These types parallel those
+   * in the <tt>C++</tt> standard libraries <tt>vector<...></tt> class.
+   */
+  typedef Number                                            value_type;
+  typedef value_type                                       *pointer;
+  typedef const value_type                                 *const_pointer;
+  typedef value_type                                       *iterator;
+  typedef const value_type                                 *const_iterator;
+  typedef value_type                                       &reference;
+  typedef const value_type                                 &const_reference;
+  typedef types::global_dof_index                           size_type;
+
+  /**
+   * Declare a type that has holds real-valued numbers with the same precision
+   * as the template argument to this class. If the template argument of this
+   * class is a real data type, then real_type equals the template argument.
+   * If the template argument is a std::complex type then real_type equals the
+   * type underlying the complex numbers.
+   *
+   * This typedef is used to represent the return type of norms.
+   */
+  typedef typename numbers::NumberTraits<Number>::real_type real_type;
+
+  /**
+   * A variable that indicates whether this vector supports distributed data
+   * storage. If true, then this vector also needs an appropriate compress()
+   * function that allows communicating recent set or add operations to
+   * individual elements to be communicated to other processors.
+   *
+   * For the current class, the variable equals false, since it does not
+   * support parallel data storage.
+   */
+  static const bool supports_distributed_data = false;
+
+public:
+
+  /**
+   * @name Basic object handling
+   */
+  //@{
+  /**
+   * Constructor. Create a vector of dimension zero.
+   */
+  Vector ();
+
+  /**
+   * Copy constructor. Sets the dimension to that of the given vector, and
+   * copies all elements.
+   *
+   * We would like to make this constructor explicit, but standard containers
+   * insist on using it implicitly.
+   */
+  Vector (const Vector<Number> &v);
+
+#ifdef DEAL_II_WITH_CXX11
+  /**
+   * Move constructor. Creates a new vector by stealing the internal data of
+   * the vector @p v.
+   *
+   * @note This constructor is only available if deal.II is configured with
+   * C++11 support.
+   */
+  Vector (Vector<Number> &&v);
+#endif
+
+
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+  /**
+   * Copy constructor taking a vector of another data type. This will fail if
+   * there is no conversion path from @p OtherNumber to @p Number. Note that
+   * you may lose accuracy when copying to a vector with data elements with
+   * less accuracy.
+   *
+   * Older versions of gcc did not honor the @p explicit keyword on template
+   * constructors. In such cases, it is easy to accidentally write code that
+   * can be very inefficient, since the compiler starts performing hidden
+   * conversions. To avoid this, this function is disabled if we have detected
+   * a broken compiler during configuration.
+   */
+  template <typename OtherNumber>
+  explicit
+  Vector (const Vector<OtherNumber> &v);
+#endif
+
+#ifdef DEAL_II_WITH_PETSC
+  /**
+   * Another copy constructor: copy the values from a sequential PETSc wrapper
+   * vector class. This copy constructor is only available if PETSc was
+   * detected during configuration time.
+   */
+  Vector (const PETScWrappers::Vector &v);
+
+  /**
+   * Another copy constructor: copy the values from a parallel PETSc wrapper
+   * vector class. This copy constructor is only available if PETSc was
+   * detected during configuration time.
+   *
+   * Note that due to the communication model used in MPI, this operation can
+   * only succeed if all processes do it at the same time. I.e., it is not
+   * possible for only one process to obtain a copy of a parallel vector while
+   * the other jobs do something else.
+   */
+  Vector (const PETScWrappers::MPI::Vector &v);
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * Another copy constructor: copy the values from a Trilinos wrapper vector.
+   * This copy constructor is only available if Trilinos was detected during
+   * configuration time.
+   *
+   * Note that due to the communication model used in MPI, this operation can
+   * only succeed if all processes do it at the same time. This means that it
+   * is not possible for only one process to obtain a copy of a parallel
+   * vector while the other jobs do something else. This call will rather
+   * result in a copy of the vector on all processors.
+   */
+  Vector (const TrilinosWrappers::MPI::Vector &v);
+
+  /**
+   * Another copy constructor: copy the values from a localized Trilinos
+   * wrapper vector. This copy constructor is only available if Trilinos was
+   * detected during configuration time.
+   */
+  Vector (const TrilinosWrappers::Vector &v);
+#endif
+
+  /**
+   * Constructor. Set dimension to @p n and initialize all elements with zero.
+   *
+   * The constructor is made explicit to avoid accidents like this:
+   * <tt>v=0;</tt>. Presumably, the user wants to set every element of the
+   * vector to zero, but instead, what happens is this call:
+   * <tt>v=Vector@<number@>(0);</tt>, i.e. the vector is replaced by one of
+   * length zero.
+   */
+  explicit Vector (const size_type n);
+
+  /**
+   * Initialize the vector with a given range of values pointed to by the
+   * iterators. This function is there in analogy to the @p std::vector class.
+   */
+  template <typename InputIterator>
+  Vector (const InputIterator first,
+          const InputIterator last);
+
+  /**
+   * Destructor, deallocates memory. Made virtual to allow for derived classes
+   * to behave properly.
+   */
+  virtual ~Vector ();
+
+  /**
+   * This function does nothing but is there for compatibility with the @p
+   * PETScWrappers::Vector class.
+   *
+   * For the PETSc vector wrapper class, this function compresses the
+   * underlying representation of the PETSc object, i.e. flushes the buffers
+   * of the vector object if it has any. This function is necessary after
+   * writing into a vector element-by-element and before anything else can be
+   * done on it.
+   *
+   * However, for the implementation of this class, it is immaterial and thus
+   * an empty function.
+   */
+  void compress (::dealii::VectorOperation::values operation
+                 =::dealii::VectorOperation::unknown) const;
+
+  /**
+   * Change the dimension of the vector to @p N. The reserved memory for this
+   * vector remains unchanged if possible, to make things faster; this may
+   * waste some memory, so keep this in mind.  However, if <tt>N==0</tt> all
+   * memory is freed, i.e. if you want to resize the vector and release the
+   * memory not needed, you have to first call <tt>reinit(0)</tt> and then
+   * <tt>reinit(N)</tt>. This cited behaviour is analogous to that of the
+   * standard library containers.
+   *
+   * If @p omit_zeroing_entries is false, the vector is filled by zeros.
+   * Otherwise, the elements are left an unspecified state.
+   *
+   * This function is virtual in order to allow for derived classes to handle
+   * memory separately.
+   */
+  virtual void reinit (const size_type N,
+                       const bool      omit_zeroing_entries=false);
+
+  /**
+   * Change the dimension to that of the vector @p V. The same applies as for
+   * the other @p reinit function.
+   *
+   * The elements of @p V are not copied, i.e.  this function is the same as
+   * calling <tt>reinit (V.size(), omit_zeroing_entries)</tt>.
+   */
+  template <typename Number2>
+  void reinit (const Vector<Number2> &V,
+               const bool            omit_zeroing_entries=false);
+
+  /**
+   * Swap the contents of this vector and the other vector @p v. One could do
+   * this operation with a temporary variable and copying over the data
+   * elements, but this function is significantly more efficient since it only
+   * swaps the pointers to the data of the two vectors and therefore does not
+   * need to allocate temporary storage and move data around.
+   *
+   * This function is analog to the the @p swap function of all C++ standard
+   * containers. Also, there is a global function <tt>swap(u,v)</tt> that
+   * simply calls <tt>u.swap(v)</tt>, again in analogy to standard functions.
+   *
+   * This function is virtual in order to allow for derived classes to handle
+   * memory separately.
+   */
+  virtual void swap (Vector<Number> &v);
+
+  /**
+   * Set all components of the vector to the given number @p s. Simply pass
+   * this down to the individual block objects, but we still need to declare
+   * this function to make the example given in the discussion about making
+   * the constructor explicit work.
+   *
+   * Since the semantics of assigning a scalar to a vector are not immediately
+   * clear, this operator should really only be used if you want to set the
+   * entire vector to zero. This allows the intuitive notation <tt>v=0</tt>.
+   * Assigning other values is deprecated and may be disallowed in the future.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  Vector<Number> &operator= (const Number s);
+
+  /**
+   * Copy the given vector. Resize the present vector if necessary.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  Vector<Number> &operator= (const Vector<Number> &v);
+
+#ifdef DEAL_II_WITH_CXX11
+  /**
+   * Move the given vector. This operator replaces the present vector with @p
+   * v by efficiently swapping the internal data structures.
+   *
+   * @note This operator is only available if deal.II is configured with C++11
+   * support.
+   */
+  Vector<Number> &operator= (Vector<Number> &&v);
+#endif
+
+  /**
+   * Copy the given vector. Resize the present vector if necessary.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  template <typename Number2>
+  Vector<Number> &operator= (const Vector<Number2> &v);
+
+  /**
+   * Copy operator for assigning a block vector to a regular vector.
+   */
+  Vector<Number> &operator= (const BlockVector<Number> &v);
+
+#ifdef DEAL_II_WITH_PETSC
+  /**
+   * Another copy operator: copy the values from a sequential PETSc wrapper
+   * vector class. This operator is only available if PETSc was detected
+   * during configuration time.
+   */
+  Vector<Number> &
+  operator= (const PETScWrappers::Vector &v);
+
+  /**
+   * Another copy operator: copy the values from a parallel PETSc wrapper
+   * vector class. This operator is only available if PETSc was detected
+   * during configuration time.
+   *
+   * Note that due to the communication model used in MPI, this operation can
+   * only succeed if all processes do it at the same time. I.e., it is not
+   * possible for only one process to obtain a copy of a parallel vector while
+   * the other jobs do something else.
+   */
+  Vector<Number> &
+  operator= (const PETScWrappers::MPI::Vector &v);
+#endif
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * Another copy operator: copy the values from a (sequential or parallel,
+   * depending on the underlying compiler) Trilinos wrapper vector class. This
+   * operator is only available if Trilinos was detected during configuration
+   * time.
+   *
+   * Note that due to the communication model used in MPI, this operation can
+   * only succeed if all processes do it at the same time. I.e., it is not
+   * possible for only one process to obtain a copy of a parallel vector while
+   * the other jobs do something else.
+   */
+  Vector<Number> &
+  operator= (const TrilinosWrappers::MPI::Vector &v);
+
+  /**
+   * Another copy operator: copy the values from a sequential Trilinos wrapper
+   * vector class. This operator is only available if Trilinos was detected
+   * during configuration time.
+   */
+  Vector<Number> &
+  operator= (const TrilinosWrappers::Vector &v);
+#endif
+
+  /**
+   * Test for equality. This function assumes that the present vector and the
+   * one to compare with have the same size already, since comparing vectors
+   * of different sizes makes not much sense anyway.
+   */
+  template <typename Number2>
+  bool operator== (const Vector<Number2> &v) const;
+
+  /**
+   * Test for inequality. This function assumes that the present vector and
+   * the one to compare with have the same size already, since comparing
+   * vectors of different sizes makes not much sense anyway.
+   */
+  template <typename Number2>
+  bool operator != (const Vector<Number2> &v) const;
+
+  //@}
+
+
+  /**
+   * @name Scalar products, norms and related operations
+   */
+  //@{
+
+  /**
+   * Return the scalar product of two vectors.  The return type is the
+   * underlying type of @p this vector, so the return type and the accuracy
+   * with which it the result is computed depend on the order of the arguments
+   * of this vector.
+   *
+   * For complex vectors, the scalar product is implemented as
+   * $\left<v,w\right>=\sum_i v_i \bar{w_i}$.
+   *
+   * @dealiiOperationIsMultithreaded The algorithm uses pairwise summation
+   * with the same order of summation in every run, which gives fully
+   * repeatable results from one run to another.
+   */
+  template <typename Number2>
+  Number operator * (const Vector<Number2> &V) const;
+
+  /**
+   * Return square of the $l_2$-norm.
+   *
+   * @dealiiOperationIsMultithreaded The algorithm uses pairwise summation
+   * with the same order of summation in every run, which gives fully
+   * repeatable results from one run to another.
+   */
+  real_type norm_sqr () const;
+
+  /**
+   * Mean value of the elements of this vector.
+   *
+   * @dealiiOperationIsMultithreaded The algorithm uses pairwise summation
+   * with the same order of summation in every run, which gives fully
+   * repeatable results from one run to another.
+   */
+  Number mean_value () const;
+
+  /**
+   * $l_1$-norm of the vector. The sum of the absolute values.
+   *
+   * @dealiiOperationIsMultithreaded The algorithm uses pairwise summation
+   * with the same order of summation in every run, which gives fully
+   * repeatable results from one run to another.
+   */
+  real_type l1_norm () const;
+
+  /**
+   * $l_2$-norm of the vector. The square root of the sum of the squares of
+   * the elements.
+   *
+   * @dealiiOperationIsMultithreaded The algorithm uses pairwise summation
+   * with the same order of summation in every run, which gives fully
+   * repeatable results from one run to another.
+   */
+  real_type l2_norm () const;
+
+  /**
+   * $l_p$-norm of the vector. The pth root of the sum of the pth powers of
+   * the absolute values of the elements.
+   *
+   * @dealiiOperationIsMultithreaded The algorithm uses pairwise summation
+   * with the same order of summation in every run, which gives fully
+   * repeatable results from one run to another.
+   */
+  real_type lp_norm (const real_type p) const;
+
+  /**
+   * Maximum absolute value of the elements.
+   */
+  real_type linfty_norm () const;
+
+  /**
+   * Performs a combined operation of a vector addition and a subsequent inner
+   * product, returning the value of the inner product. In other words, the
+   * result of this function is the same as if the user called
+   * @code
+   * this->add(a, V);
+   * return_value = *this * W;
+   * @endcode
+   *
+   * The reason this function exists is that this operation involves less
+   * memory transfer than calling the two functions separately. This method
+   * only needs to load three vectors, @p this, @p V, @p W, whereas calling
+   * separate methods means to load the calling vector @p this twice. Since
+   * most vector operations are memory transfer limited, this reduces the time
+   * by 25\% (or 50\% if @p W equals @p this).
+   *
+   * @dealiiOperationIsMultithreaded The algorithm uses pairwise summation
+   * with the same order of summation in every run, which gives fully
+   * repeatable results from one run to another.
+   */
+  Number add_and_dot (const Number          a,
+                      const Vector<Number> &V,
+                      const Vector<Number> &W);
+
+  //@}
+
+
+  /**
+   * @name Data access
+   */
+  //@{
+
+  /**
+   * Make the @p Vector class a bit like the <tt>vector<></tt> class of the
+   * C++ standard library by returning iterators to the start and end of the
+   * elements of this vector.
+   */
+  iterator begin ();
+
+  /**
+   * Return constant iterator to the start of the vectors.
+   */
+  const_iterator begin () const;
+
+  /**
+   * Return an iterator pointing to the element past the end of the array.
+   */
+  iterator end ();
+
+  /**
+   * Return a constant iterator pointing to the element past the end of the
+   * array.
+   */
+  const_iterator end () const;
+
+  /**
+   * Access the value of the @p ith component.
+   */
+  Number operator() (const size_type i) const;
+
+  /**
+   * Access the @p ith component as a writeable reference.
+   */
+  Number &operator() (const size_type i);
+
+  /**
+   * Access the value of the @p ith component.
+   *
+   * Exactly the same as operator().
+   */
+  Number operator[] (const size_type i) const;
+
+  /**
+   * Access the @p ith component as a writeable reference.
+   *
+   * Exactly the same as operator().
+   */
+  Number &operator[] (const size_type i);
+
+  /**
+   * A collective get operation: instead of getting individual elements of a
+   * vector, this function allows to get a whole set of elements at once. The
+   * indices of the elements to be read are stated in the first argument, the
+   * corresponding values are returned in the second.
+   */
+  template <typename OtherNumber>
+  void extract_subvector_to (const std::vector<size_type> &indices,
+                             std::vector<OtherNumber> &values) const;
+
+  /**
+   * Just as the above, but with pointers. Useful in minimizing copying of
+   * data around.
+   */
+  template <typename ForwardIterator, typename OutputIterator>
+  void extract_subvector_to (ForwardIterator       indices_begin,
+                             const ForwardIterator indices_end,
+                             OutputIterator        values_begin) const;
+  //@}
+
+
+  /**
+   * @name Modification of vectors
+   */
+  //@{
+
+  /**
+   * Add the given vector to the present one.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  Vector<Number> &operator += (const Vector<Number> &V);
+
+  /**
+   * Subtract the given vector from the present one.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  Vector<Number> &operator -= (const Vector<Number> &V);
+
+  /**
+   * A collective add operation: This function adds a whole set of values
+   * stored in @p values to the vector components specified by @p indices.
+   */
+  template <typename OtherNumber>
+  void add (const std::vector<size_type>   &indices,
+            const std::vector<OtherNumber>  &values);
+
+  /**
+   * This is a second collective add operation. As a difference, this function
+   * takes a deal.II vector of values.
+   */
+  template <typename OtherNumber>
+  void add (const std::vector<size_type> &indices,
+            const Vector<OtherNumber>    &values);
+
+  /**
+   * Take an address where <tt>n_elements</tt> are stored contiguously and add
+   * them into the vector. Handles all cases which are not covered by the
+   * other two <tt>add()</tt> functions above.
+   */
+  template <typename OtherNumber>
+  void add (const size_type    n_elements,
+            const size_type   *indices,
+            const OtherNumber  *values);
+
+  /**
+   * Addition of @p s to all components. Note that @p s is a scalar and not a
+   * vector.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void add (const Number s);
+
+  /**
+   * Simple vector addition, equal to the <tt>operator +=</tt>.
+   *
+   * @deprecated Use the <tt>operator +=</tt> instead.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void add (const Vector<Number> &V) DEAL_II_DEPRECATED;
+
+
+  /**
+   * Multiple addition of scaled vectors, i.e. <tt>*this += a*V+b*W</tt>.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void add (const Number a, const Vector<Number> &V,
+            const Number b, const Vector<Number> &W);
+
+  /**
+   * Simple addition of a multiple of a vector, i.e. <tt>*this += a*V</tt>.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void add (const Number a, const Vector<Number> &V);
+
+  /**
+   * Scaling and simple vector addition, i.e.  <tt>*this = s*(*this)+V</tt>.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void sadd (const Number          s,
+             const Vector<Number> &V);
+
+  /**
+   * Scaling and simple addition, i.e.  <tt>*this = s*(*this)+a*V</tt>.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void sadd (const Number          s,
+             const Number          a,
+             const Vector<Number> &V);
+
+  /**
+   * Scaling and multiple addition.
+   *
+   * This function is deprecated.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void sadd (const Number          s,
+             const Number          a,
+             const Vector<Number> &V,
+             const Number          b,
+             const Vector<Number> &W) DEAL_II_DEPRECATED;
+
+  /**
+   * Scaling and multiple addition.  <tt>*this = s*(*this)+a*V + b*W +
+   * c*X</tt>.
+   *
+   * This function is deprecated.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void sadd (const Number          s,
+             const Number          a,
+             const Vector<Number> &V,
+             const Number          b,
+             const Vector<Number> &W,
+             const Number          c,
+             const Vector<Number> &X) DEAL_II_DEPRECATED;
+
+  /**
+   * Scale each element of the vector by a constant value.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  Vector<Number> &operator *= (const Number factor);
+
+  /**
+   * Scale each element of the vector by the inverse of the given value.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  Vector<Number> &operator /= (const Number factor);
+
+  /**
+   * Scale each element of this vector by the corresponding element in the
+   * argument. This function is mostly meant to simulate multiplication (and
+   * immediate re-assignment) by a diagonal scaling matrix.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void scale (const Vector<Number> &scaling_factors);
+
+  /**
+   * Scale each element of this vector by the corresponding element in the
+   * argument. This function is mostly meant to simulate multiplication (and
+   * immediate re-assignment) by a diagonal scaling matrix.
+   */
+  template <typename Number2>
+  void scale (const Vector<Number2> &scaling_factors);
+
+  /**
+   * Assignment <tt>*this = a*u</tt>.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void equ (const Number a, const Vector<Number> &u);
+
+  /**
+   * Assignment <tt>*this = a*u</tt>.
+   */
+  template <typename Number2>
+  void equ (const Number a, const Vector<Number2> &u);
+
+  /**
+   * Assignment <tt>*this = a*u + b*v</tt>.
+   *
+   * This function is deprecated.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void equ (const Number a, const Vector<Number> &u,
+            const Number b, const Vector<Number> &v) DEAL_II_DEPRECATED;
+
+  /**
+   * Assignment <tt>*this = a*u + b*v + b*w</tt>.
+   *
+   * This function is deprecated.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void equ (const Number a, const Vector<Number> &u,
+            const Number b, const Vector<Number> &v,
+            const Number c, const Vector<Number> &w) DEAL_II_DEPRECATED;
+
+  /**
+   * Compute the elementwise ratio of the two given vectors, that is let
+   * <tt>this[i] = a[i]/b[i]</tt>. This is useful for example if you want to
+   * compute the cellwise ratio of true to estimated error.
+   *
+   * This vector is appropriately scaled to hold the result.
+   *
+   * If any of the <tt>b[i]</tt> is zero, the result is undefined. No attempt
+   * is made to catch such situations.
+   *
+   * @dealiiOperationIsMultithreaded
+   */
+  void ratio (const Vector<Number> &a,
+              const Vector<Number> &b) DEAL_II_DEPRECATED;
+
+  /**
+   * This function does nothing but is there for compatibility with the @p
+   * PETScWrappers::Vector class.
+   *
+   * For the PETSc vector wrapper class, this function updates the ghost
+   * values of the PETSc vector. This is necessary after any modification
+   * before reading ghost values.
+   *
+   * However, for the implementation of this class, it is immaterial and thus
+   * an empty function.
+   */
+  void update_ghost_values () const;
+  //@}
+
+
+  /**
+   * @name Input and output
+   */
+  //@{
+  /**
+   * Output of vector in user-defined format. For complex-valued vectors, the
+   * format should include specifiers for both the real and imaginary parts.
+   *
+   * This function is deprecated.
+   */
+  void print (const char *format = 0) const DEAL_II_DEPRECATED;
+
+  /**
+   * Print to a stream. @p precision denotes the desired precision with which
+   * values shall be printed, @p scientific whether scientific notation shall
+   * be used. If @p across is @p true then the vector is printed in a line,
+   * while if @p false then the elements are printed on a separate line each.
+   */
+  void print (std::ostream &out,
+              const unsigned int precision  = 3,
+              const bool scientific = true,
+              const bool across     = true) const;
+
+  /**
+   * Print to a LogStream. <tt>width</tt> is used as argument to the std::setw
+   * manipulator, if printing across.  If @p across is @p true then the vector
+   * is printed in a line, while if @p false then the elements are printed on
+   * a separate line each.
+   *
+   * This function is deprecated.
+   */
+  void print (LogStream &out,
+              const unsigned int width = 6,
+              const bool across = true) const DEAL_II_DEPRECATED;
+
+  /**
+   * Write the vector en bloc to a file. This is done in a binary mode, so the
+   * output is neither readable by humans nor (probably) by other computers
+   * using a different operating system or number format.
+   */
+  void block_write (std::ostream &out) const;
+
+  /**
+   * Read a vector en block from a file. This is done using the inverse
+   * operations to the above function, so it is reasonably fast because the
+   * bitstream is not interpreted.
+   *
+   * The vector is resized if necessary.
+   *
+   * A primitive form of error checking is performed which will recognize the
+   * bluntest attempts to interpret some data as a vector stored bitwise to a
+   * file, but not more.
+   */
+  void block_read (std::istream &in);
+
+  /**
+   * Write the data of this object to a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void save (Archive &ar, const unsigned int version) const;
+
+  /**
+   * Read the data of this object from a stream for the purpose of
+   * serialization.
+   */
+  template <class Archive>
+  void load (Archive &ar, const unsigned int version);
+
+  BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+  /**
+   * @}
+   */
+
+  /**
+   * @name Information about the object
+   */
+  //@{
+
+  /**
+   * Returns true if the given global index is in the local range of this
+   * processor.  Since this is not a distributed vector the method always
+   * returns true.
+   */
+  bool in_local_range (const size_type global_index) const;
+
+  /**
+   * Return an index set that describes which elements of this vector are
+   * owned by the current processor. Note that this index set does not include
+   * elements this vector may store locally as ghost elements but that are in
+   * fact owned by another processor. As a consequence, the index sets
+   * returned on different processors if this is a distributed vector will
+   * form disjoint sets that add up to the complete index set. Obviously, if a
+   * vector is created on only one processor, then the result would satisfy
+   * @code
+   *   vec.locally_owned_elements() == complete_index_set (vec.size())
+   * @endcode
+   *
+   * Since the current data type does not support parallel data storage across
+   * different processors, the returned index set is the complete index set.
+   */
+  IndexSet locally_owned_elements () const;
+
+  /**
+   * Return dimension of the vector.
+   */
+  std::size_t size () const;
+
+  /**
+   * Return whether the vector contains only elements with value zero. This
+   * function is mainly for internal consistency checks and should seldom be
+   * used when not in debug mode since it uses quite some time.
+   */
+  bool all_zero () const;
+
+  /**
+   * Return @p true if the vector has no negative entries, i.e. all entries
+   * are zero or positive. This function is used, for example, to check
+   * whether refinement indicators are really all positive (or zero).
+   *
+   * The function obviously only makes sense if the template argument of this
+   * class is a real type. If it is a complex type, then an exception is
+   * thrown.
+   */
+  bool is_non_negative () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+  //@}
+
+protected:
+
+  /**
+   * Dimension. Actual number of components contained in the vector.  Get this
+   * number by calling <tt>size()</tt>.
+   */
+  size_type vec_size;
+
+  /**
+   * Amount of memory actually reserved for this vector. This number may be
+   * greater than @p vec_size if a @p reinit was called with less memory
+   * requirements than the vector needed last time. At present @p reinit does
+   * not free memory when the number of needed elements is reduced.
+   */
+  size_type max_vec_size;
+
+  /**
+   * Pointer to the array of elements of this vector.
+   */
+  Number *val;
+
+  /**
+   * Make all other vector types friends.
+   */
+  template <typename Number2> friend class Vector;
+
+  /**
+   * LAPACK matrices need access to the data.
+   */
+  template <typename Number2> friend class LAPACKFullMatrix;
+
+  /**
+   * VectorView will access the pointer.
+   */
+  friend class VectorView<Number>;
+
+private:
+
+  /**
+   * Allocate and align @p val along 64-byte boundaries. The size of the
+   * allocated memory is determined by @p max_vec_size .
+   */
+  void allocate();
+
+  /**
+   * Deallocate @p val.
+   */
+  void deallocate();
+};
+
+/*@}*/
+/*----------------------- Inline functions ----------------------------------*/
+
+
+#ifndef DOXYGEN
+
+
+template <typename Number>
+inline
+Vector<Number>::Vector ()
+  :
+  vec_size(0),
+  max_vec_size(0),
+  val(0)
+{}
+
+
+
+template <typename Number>
+template <typename InputIterator>
+Vector<Number>::Vector (const InputIterator first, const InputIterator last)
+  :
+  vec_size (0),
+  max_vec_size (0),
+  val (0)
+{
+  // allocate memory. do not initialize it, as we will copy over to it in a
+  // second
+  reinit (std::distance (first, last), true);
+  std::copy (first, last, begin());
+}
+
+
+
+template <typename Number>
+inline
+Vector<Number>::Vector (const size_type n)
+  :
+  vec_size(0),
+  max_vec_size(0),
+  val(0)
+{
+  reinit (n, false);
+}
+
+
+
+template <typename Number>
+inline
+Vector<Number>::~Vector ()
+{
+  if (val)
+    {
+      deallocate();
+      val=0;
+    }
+}
+
+
+
+template <typename Number>
+inline
+void Vector<Number>::reinit (const size_type n,
+                             const bool omit_zeroing_entries)
+{
+  if (n==0)
+    {
+      if (val) deallocate();
+      val = 0;
+      max_vec_size = vec_size = 0;
+      return;
+    };
+
+  if (n>max_vec_size)
+    {
+      if (val) deallocate();
+      max_vec_size = n;
+      allocate();
+    };
+  vec_size = n;
+  if (omit_zeroing_entries == false)
+    *this = static_cast<Number>(0);
+}
+
+
+
+// declare function that is implemented in vector.templates.h
+namespace internal
+{
+  namespace Vector
+  {
+    template <typename T, typename U>
+    void copy_vector (const dealii::Vector<T> &src,
+                      dealii::Vector<U>       &dst);
+  }
+}
+
+
+
+template <typename Number>
+inline
+Vector<Number> &
+Vector<Number>::operator= (const Vector<Number> &v)
+{
+  dealii::internal::Vector::copy_vector (v, *this);
+  return *this;
+}
+
+
+
+#ifdef DEAL_II_WITH_CXX11
+template <typename Number>
+inline
+Vector<Number> &
+Vector<Number>::operator= (Vector<Number> &&v)
+{
+  swap(v);
+
+  return *this;
+}
+#endif
+
+
+
+template <typename Number>
+template <typename Number2>
+inline
+Vector<Number> &
+Vector<Number>::operator= (const Vector<Number2> &v)
+{
+  internal::Vector::copy_vector (v, *this);
+  return *this;
+}
+
+
+
+template <typename Number>
+inline
+std::size_t Vector<Number>::size () const
+{
+  return vec_size;
+}
+
+
+template <typename Number>
+inline
+bool Vector<Number>::in_local_range
+(const size_type) const
+{
+  return true;
+}
+
+
+
+template <typename Number>
+inline
+typename Vector<Number>::iterator
+Vector<Number>::begin ()
+{
+  return &val[0];
+}
+
+
+
+template <typename Number>
+inline
+typename Vector<Number>::const_iterator
+Vector<Number>::begin () const
+{
+  return &val[0];
+}
+
+
+
+template <typename Number>
+inline
+typename Vector<Number>::iterator
+Vector<Number>::end ()
+{
+  return &val[vec_size];
+}
+
+
+
+template <typename Number>
+inline
+typename Vector<Number>::const_iterator
+Vector<Number>::end () const
+{
+  return &val[vec_size];
+}
+
+
+
+template <typename Number>
+inline
+Number Vector<Number>::operator() (const size_type i) const
+{
+  Assert (i<vec_size, ExcIndexRange(i,0,vec_size));
+  return val[i];
+}
+
+
+
+template <typename Number>
+inline
+Number &Vector<Number>::operator() (const size_type i)
+{
+  Assert (i<vec_size, ExcIndexRangeType<size_type>(i,0,vec_size));
+  return val[i];
+}
+
+
+
+template <typename Number>
+inline
+Number Vector<Number>::operator[] (const size_type i) const
+{
+  return operator()(i);
+}
+
+
+
+template <typename Number>
+inline
+Number &Vector<Number>::operator[] (const size_type i)
+{
+  return operator()(i);
+}
+
+
+
+template <typename Number>
+template <typename OtherNumber>
+inline
+void Vector<Number>::extract_subvector_to (const std::vector<size_type> &indices,
+                                           std::vector<OtherNumber> &values) const
+{
+  for (size_type i = 0; i < indices.size(); ++i)
+    values[i] = operator()(indices[i]);
+}
+
+
+
+template <typename Number>
+template <typename ForwardIterator, typename OutputIterator>
+inline
+void Vector<Number>::extract_subvector_to (ForwardIterator          indices_begin,
+                                           const ForwardIterator    indices_end,
+                                           OutputIterator           values_begin) const
+{
+  while (indices_begin != indices_end)
+    {
+      *values_begin = operator()(*indices_begin);
+      indices_begin++;
+      values_begin++;
+    }
+}
+
+
+
+template <typename Number>
+inline
+Vector<Number> &
+Vector<Number>::operator /= (const Number factor)
+{
+  AssertIsFinite(factor);
+  Assert (factor != Number(0.), ExcZero() );
+
+  this->operator *= (Number(1.)/factor);
+  return *this;
+}
+
+
+
+template <typename Number>
+template <typename OtherNumber>
+inline
+void
+Vector<Number>::add (const std::vector<size_type> &indices,
+                     const std::vector<OtherNumber>  &values)
+{
+  Assert (indices.size() == values.size(),
+          ExcDimensionMismatch(indices.size(), values.size()));
+  add (indices.size(), &indices[0], &values[0]);
+}
+
+
+
+template <typename Number>
+template <typename OtherNumber>
+inline
+void
+Vector<Number>::add (const std::vector<size_type> &indices,
+                     const Vector<OtherNumber>    &values)
+{
+  Assert (indices.size() == values.size(),
+          ExcDimensionMismatch(indices.size(), values.size()));
+  add (indices.size(), &indices[0], values.val);
+}
+
+
+
+template <typename Number>
+template <typename OtherNumber>
+inline
+void
+Vector<Number>::add (const size_type  n_indices,
+                     const size_type *indices,
+                     const OtherNumber  *values)
+{
+  for (size_type i=0; i<n_indices; ++i)
+    {
+      Assert (indices[i] < vec_size, ExcIndexRange(indices[i],0,vec_size));
+      Assert (numbers::is_finite(values[i]),
+              ExcMessage("The given value is not finite but either infinite or Not A Number (NaN)"));
+
+      val[indices[i]] += values[i];
+    }
+}
+
+
+
+template <typename Number>
+template <typename Number2>
+inline
+bool
+Vector<Number>::operator != (const Vector<Number2> &v) const
+{
+  return ! (*this == v);
+}
+
+
+
+template <typename Number>
+inline
+void
+Vector<Number>::compress (::dealii::VectorOperation::values) const
+{}
+
+
+
+template <typename Number>
+inline
+void
+Vector<Number>::update_ghost_values () const
+{}
+
+
+
+// Moved from vector.templates.h as an inline function by Luca Heltai
+// on 2009/04/12 to prevent strange compiling errors, after making
+// swap virtual.
+template <typename Number>
+inline
+void
+Vector<Number>::swap (Vector<Number> &v)
+{
+  std::swap (vec_size,     v.vec_size);
+  std::swap (max_vec_size, v.max_vec_size);
+  std::swap (val,          v.val);
+}
+
+
+
+template <typename Number>
+template <class Archive>
+inline
+void
+Vector<Number>::save (Archive &ar, const unsigned int) const
+{
+  // forward to serialization function in the base class.
+  ar &static_cast<const Subscriptor &>(*this);
+
+  ar &vec_size &max_vec_size ;
+  ar &boost::serialization::make_array(val, max_vec_size);
+}
+
+
+
+template <typename Number>
+template <class Archive>
+inline
+void
+Vector<Number>::load (Archive &ar, const unsigned int)
+{
+  // get rid of previous content
+  deallocate();
+
+  // the load stuff again from the archive
+  ar &static_cast<Subscriptor &>(*this);
+  ar &vec_size &max_vec_size ;
+
+  allocate();
+  ar &boost::serialization::make_array(val, max_vec_size);
+}
+
+#endif
+
+
+/*! @addtogroup Vectors
+ *@{
+ */
+
+
+/**
+ * Global function @p swap which overloads the default implementation of the
+ * C++ standard library which uses a temporary object. The function simply
+ * exchanges the data of the two vectors.
+ *
+ * @relates Vector
+ * @author Wolfgang Bangerth, 2000
+ */
+template <typename Number>
+inline
+void swap (Vector<Number> &u, Vector<Number> &v)
+{
+  u.swap (v);
+}
+
+
+/**
+ * Output operator writing a vector to a stream.
+ */
+template <typename number>
+inline
+std::ostream &
+operator << (std::ostream &os, const Vector<number> &v)
+{
+  v.print(os);
+  return os;
+}
+
+/**
+ * Output operator writing a vector to a LogStream.
+ */
+template <typename number>
+inline
+LogStream &
+operator << (LogStream &os, const Vector<number> &v)
+{
+  v.print(os);
+  return os;
+}
+
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/vector.templates.h b/include/deal.II/lac/vector.templates.h
new file mode 100644
index 0000000..8a3bc24
--- /dev/null
+++ b/include/deal.II/lac/vector.templates.h
@@ -0,0 +1,2078 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__vector_templates_h
+#define dealii__vector_templates_h
+
+
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/numbers.h>
+#include <deal.II/base/parallel.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/base/vectorization.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+
+#ifdef DEAL_II_WITH_PETSC
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  include <deal.II/lac/trilinos_vector.h>
+#endif
+
+
+#include <cmath>
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  typedef types::global_dof_index size_type;
+
+  template <typename T>
+  bool is_non_negative (const T &t)
+  {
+    return t >= 0;
+  }
+
+
+  template <typename T>
+  bool is_non_negative (const std::complex<T> &)
+  {
+    Assert (false,
+            ExcMessage ("Complex numbers do not have an ordering."));
+
+    return false;
+  }
+
+
+  template <typename T>
+  void print (const T    &t,
+              const char *format)
+  {
+    if (format != 0)
+      std::printf (format, t);
+    else
+      std::printf (" %5.2f", double(t));
+  }
+
+
+
+  template <typename T>
+  void print (const std::complex<T> &t,
+              const char            *format)
+  {
+    if (format != 0)
+      std::printf (format, t.real(), t.imag());
+    else
+      std::printf (" %5.2f+%5.2fi",
+                   double(t.real()), double(t.imag()));
+  }
+
+  // call std::copy, except for in
+  // the case where we want to copy
+  // from std::complex to a
+  // non-complex type
+  template <typename T, typename U>
+  void copy (const T *begin,
+             const T *end,
+             U       *dest)
+  {
+    std::copy (begin, end, dest);
+  }
+
+  template <typename T, typename U>
+  void copy (const std::complex<T> *begin,
+             const std::complex<T> *end,
+             std::complex<U>       *dest)
+  {
+    std::copy (begin, end, dest);
+  }
+
+  template <typename T, typename U>
+  void copy (const std::complex<T> *,
+             const std::complex<T> *,
+             U *)
+  {
+    Assert (false, ExcMessage ("Can't convert a vector of complex numbers "
+                               "into a vector of reals/doubles"));
+  }
+
+  template <typename Functor>
+  void vectorized_transform(Functor &functor,
+                            size_type vec_size)
+  {
+#ifndef DEAL_II_WITH_THREADS
+    functor(0,vec_size);
+#else
+    if (vec_size>internal::Vector::minimum_parallel_grain_size)
+      {
+        tbb::parallel_for (tbb::blocked_range<size_type> (0,
+                                                          vec_size,
+                                                          internal::Vector::minimum_parallel_grain_size),
+                           functor,
+                           tbb::auto_partitioner());
+      }
+    else if (vec_size > 0)
+      functor(0,vec_size);
+#endif
+  }
+
+
+  // Define the functors necessary to use SIMD with TBB.
+  template <typename Number>
+  struct Vectorization_multiply_factor
+  {
+    Number *val;
+    Number factor;
+
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] *= factor;
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] *= factor;
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_add_av
+  {
+    Number *val;
+    Number *v_val;
+    Number factor;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] += factor*v_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] += factor*v_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_sadd_xav
+  {
+    Number *val;
+    Number *v_val;
+    Number a;
+    Number x;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = x*val[i] + a*v_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = x*val[i] + a*v_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_subtract_v
+  {
+    Number *val;
+    Number *v_val;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] -= v_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] -= v_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_add_factor
+  {
+    Number *val;
+    Number factor;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] += factor;
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] += factor;
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_add_v
+  {
+    Number *val;
+    Number *v_val;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] += v_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] += v_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_add_avpbw
+  {
+    Number *val;
+    Number *v_val;
+    Number *w_val;
+    Number a;
+    Number b;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = val[i] + a*v_val[i] + b*w_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = val[i] + a*v_val[i] + b*w_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_sadd_xv
+  {
+    Number *val;
+    Number *v_val;
+    Number x;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = x*val[i] + v_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = x*val[i] + v_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_sadd_xavbw
+  {
+    Number *val;
+    Number *v_val;
+    Number *w_val;
+    Number x;
+    Number a;
+    Number b;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = x*val[i] + a*v_val[i] + b*w_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = x*val[i] + a*v_val[i] + b*w_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_scale
+  {
+    Number *val;
+    Number *v_val;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] *= v_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] *= v_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_equ_au
+  {
+    Number *val;
+    Number *u_val;
+    Number a;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a*u_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a*u_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_equ_aubv
+  {
+    Number *val;
+    Number *u_val;
+    Number *v_val;
+    Number a;
+    Number b;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a*u_val[i] + b*v_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a*u_val[i] + b*v_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_equ_aubvcw
+  {
+    Number *val;
+    Number *u_val;
+    Number *v_val;
+    Number *w_val;
+    Number a;
+    Number b;
+    Number c;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a*u_val[i] + b*v_val[i] + c*w_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a*u_val[i] + b*v_val[i] + c*w_val[i];
+        }
+    }
+  };
+
+  template <typename Number>
+  struct Vectorization_ratio
+  {
+    Number *val;
+    Number *a_val;
+    Number *b_val;
+#ifdef DEAL_II_WITH_THREADS
+    void operator() (const tbb::blocked_range<size_type> &range) const
+    {
+      operator()(range.begin(),range.end());
+    }
+#endif
+
+    void operator() (const size_type begin, const size_type end) const
+    {
+      if (parallel::internal::EnableOpenMPSimdFor<Number>::value)
+        {
+          DEAL_II_OPENMP_SIMD_PRAGMA
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a_val[i]/b_val[i];
+        }
+      else
+        {
+          for (size_type i=begin; i<end; ++i)
+            val[i] = a_val[i]/b_val[i];
+        }
+    }
+  };
+
+}
+
+
+
+template <typename Number>
+Vector<Number>::Vector (const Vector<Number> &v)
+  :
+  Subscriptor(),
+  vec_size(v.size()),
+  max_vec_size(v.size()),
+  val(0)
+{
+  if (vec_size != 0)
+    {
+      allocate();
+      *this = v;
+    }
+}
+
+
+
+#ifdef DEAL_II_WITH_CXX11
+template <typename Number>
+Vector<Number>::Vector (Vector<Number> &&v)
+  :
+  Subscriptor(),
+  vec_size(0),
+  max_vec_size(0),
+  val(0)
+{
+  swap(v);
+}
+#endif
+
+
+
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+template <typename Number>
+template <typename OtherNumber>
+Vector<Number>::Vector (const Vector<OtherNumber> &v)
+  :
+  Subscriptor(),
+  vec_size(v.size()),
+  max_vec_size(v.size()),
+  val(0)
+{
+  if (vec_size != 0)
+    {
+      allocate();
+      *this = v;
+    }
+}
+#endif
+
+
+#ifdef DEAL_II_WITH_PETSC
+
+template <typename Number>
+Vector<Number>::Vector (const PETScWrappers::Vector &v)
+  :
+  Subscriptor(),
+  vec_size(v.size()),
+  max_vec_size(v.size()),
+  val(0)
+{
+  if (vec_size != 0)
+    {
+      allocate();
+
+      // get a representation of the vector
+      // and copy it
+      PetscScalar *start_ptr;
+      int ierr = VecGetArray (static_cast<const Vec &>(v), &start_ptr);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      internal::copy (start_ptr, start_ptr+vec_size, begin());
+
+      // restore the representation of the
+      // vector
+      ierr = VecRestoreArray (static_cast<const Vec &>(v), &start_ptr);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+    }
+}
+
+
+
+template <typename Number>
+Vector<Number>::Vector (const PETScWrappers::MPI::Vector &v)
+  :
+  Subscriptor(),
+  vec_size(0),
+  max_vec_size(0),
+  val(0)
+{
+  if (v.size() != 0)
+    {
+      // do this in a two-stage process:
+      // first convert to a sequential petsc
+      // vector, then copy that
+      PETScWrappers::Vector seq (v);
+      *this = seq;
+    }
+}
+
+#endif
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+template <typename Number>
+Vector<Number>::Vector (const TrilinosWrappers::MPI::Vector &v)
+  :
+  Subscriptor(),
+  vec_size(v.size()),
+  max_vec_size(v.size()),
+  val(0)
+{
+  if (vec_size != 0)
+    {
+      allocate();
+
+      // Copy the distributed vector to
+      // a local one at all
+      // processors. TODO: There could
+      // be a better solution than
+      // this, but it has not yet been
+      // found.
+      TrilinosWrappers::Vector localized_vector (v);
+
+      // get a representation of the vector
+      // and copy it
+      TrilinosScalar **start_ptr;
+
+      int ierr = localized_vector.trilinos_vector().ExtractView (&start_ptr);
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      std::copy (start_ptr[0], start_ptr[0]+vec_size, begin());
+    }
+}
+
+
+
+template <typename Number>
+Vector<Number>::Vector (const TrilinosWrappers::Vector &v)
+  :
+  Subscriptor(),
+  vec_size(v.size()),
+  max_vec_size(v.size()),
+  val(0)
+{
+  if (vec_size != 0)
+    {
+      allocate();
+
+      // get a representation of the vector
+      // and copy it
+      TrilinosScalar **start_ptr;
+
+      int ierr = v.trilinos_vector().ExtractView (&start_ptr);
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      std::copy (start_ptr[0], start_ptr[0]+vec_size, begin());
+    }
+}
+
+#endif
+
+
+template <typename Number>
+template <typename Number2>
+void Vector<Number>::reinit (const Vector<Number2> &v,
+                             const bool omit_zeroing_entries)
+{
+  reinit (v.size(), omit_zeroing_entries);
+}
+
+
+
+template <typename Number>
+bool
+Vector<Number>::all_zero () const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  for (size_type i=0; i<vec_size; ++i)
+    if (val[i] != Number(0))
+      return false;
+  return true;
+}
+
+
+
+template <typename Number>
+bool
+Vector<Number>::is_non_negative () const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  for (size_type i=0; i<vec_size; ++i)
+    if ( ! internal::is_non_negative (val[i]))
+      return false;
+
+  return true;
+}
+
+
+
+namespace internal
+{
+  namespace Vector
+  {
+    template <typename T>
+    void set_subrange (const T            s,
+                       const typename dealii::Vector<T>::size_type begin,
+                       const typename dealii::Vector<T>::size_type end,
+                       dealii::Vector<T> &dst)
+    {
+      if (s == T())
+        std::memset ((dst.begin()+begin),0,(end-begin)*sizeof(T));
+      else
+        std::fill (&*(dst.begin()+begin), &*(dst.begin()+end), s);
+    }
+
+
+    template <typename T>
+    void copy_subrange (const typename dealii::Vector<T>::size_type         begin,
+                        const typename dealii::Vector<T>::size_type         end,
+                        const dealii::Vector<T> &src,
+                        dealii::Vector<T>       &dst)
+    {
+      memcpy(&*(dst.begin()+begin), &*(src.begin()+begin),
+             (end-begin)*sizeof(T));
+    }
+
+
+    template <typename T, typename U>
+    void copy_subrange (const typename dealii::Vector<T>::size_type         begin,
+                        const typename dealii::Vector<T>::size_type         end,
+                        const dealii::Vector<T> &src,
+                        dealii::Vector<U>       &dst)
+    {
+      const T *src_ptr = src.begin();
+      U *dst_ptr = dst.begin();
+      DEAL_II_OPENMP_SIMD_PRAGMA
+      for (typename dealii::Vector<T>::size_type i=begin; i<end; ++i)
+        dst_ptr[i] = src_ptr[i];
+    }
+
+
+    template <typename T, typename U>
+    void copy_vector (const dealii::Vector<T> &src,
+                      dealii::Vector<U>       &dst)
+    {
+      if (PointerComparison::equal(&src, &dst))
+        return;
+
+      const typename dealii::Vector<T>::size_type vec_size = src.size();
+      const typename dealii::Vector<U>::size_type dst_size = dst.size();
+      if (dst_size != vec_size)
+        dst.reinit (vec_size, true);
+      if (vec_size>internal::Vector::minimum_parallel_grain_size)
+        parallel::apply_to_subranges (0U, vec_size,
+                                      std_cxx11::bind(&internal::Vector::template
+                                                      copy_subrange <T,U>,
+                                                      std_cxx11::_1,
+                                                      std_cxx11::_2,
+                                                      std_cxx11::cref(src),
+                                                      std_cxx11::ref(dst)),
+                                      internal::Vector::minimum_parallel_grain_size);
+      else if (vec_size > 0)
+        copy_subrange (0U, vec_size, src, dst);
+    }
+  }
+}
+
+
+
+template <typename Number>
+Vector<Number> &
+Vector<Number>::operator= (const Number s)
+{
+  AssertIsFinite(s);
+  if (s != Number())
+    Assert (vec_size!=0, ExcEmptyObject());
+  if (vec_size>internal::Vector::minimum_parallel_grain_size)
+    parallel::apply_to_subranges (0U, vec_size,
+                                  std_cxx11::bind(&internal::Vector::template
+                                                  set_subrange<Number>,
+                                                  s, std_cxx11::_1, std_cxx11::_2, std_cxx11::ref(*this)),
+                                  internal::Vector::minimum_parallel_grain_size);
+  else if (vec_size > 0)
+    internal::Vector::set_subrange<Number>(s, 0U, vec_size, *this);
+
+  return *this;
+}
+
+
+
+#ifdef DEAL_II_BOOST_BIND_COMPILER_BUG
+template <>
+Vector<std::complex<float> > &
+Vector<std::complex<float> >::operator= (const std::complex<float> s)
+{
+  AssertIsFinite(s);
+  if (s != std::complex<float>())
+    Assert (vec_size!=0, ExcEmptyObject());
+  if (vec_size!=0)
+    std::fill (begin(), end(), s);
+
+  return *this;
+}
+#endif
+
+
+
+template <typename Number>
+Vector<Number> &Vector<Number>::operator *= (const Number factor)
+{
+  AssertIsFinite(factor);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  internal::Vectorization_multiply_factor<Number> vector_multiply;
+  vector_multiply.val = val;
+  vector_multiply.factor = factor;
+
+  internal::vectorized_transform(vector_multiply,vec_size);
+
+  return *this;
+}
+
+
+
+template <typename Number>
+void
+Vector<Number>::add (const Number a,
+                     const Vector<Number> &v)
+{
+  AssertIsFinite(a);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+
+  internal::Vectorization_add_av<Number> vector_add_av;
+  vector_add_av.val = val;
+  vector_add_av.v_val = v.val;
+  vector_add_av.factor = a;
+  internal::vectorized_transform(vector_add_av,vec_size);
+}
+
+
+
+template <typename Number>
+void
+Vector<Number>::sadd (const Number x,
+                      const Number a,
+                      const Vector<Number> &v)
+{
+  AssertIsFinite(x);
+  AssertIsFinite(a);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+
+  internal::Vectorization_sadd_xav<Number> vector_sadd_xav;
+  vector_sadd_xav.val = val;
+  vector_sadd_xav.v_val = v.val;
+  vector_sadd_xav.a = a;
+  vector_sadd_xav.x = x;
+  internal::vectorized_transform(vector_sadd_xav,vec_size);
+}
+
+
+
+namespace internal
+{
+  namespace Vector
+  {
+    // All sums over all the vector entries (l2-norm, inner product, etc.) are
+    // performed with the same code, using a templated operation defined
+    // here. There are always two versions defined, a standard one that covers
+    // most cases and a vectorized one which is only for equal types and float
+    // and double.
+    template <typename Number, typename Number2>
+    struct Dot
+    {
+      Number
+      operator() (const Number *&X, const Number2 *&Y, const Number &, Number *&) const
+      {
+        return *X++ * Number(numbers::NumberTraits<Number2>::conjugate(*Y++));
+      }
+
+      VectorizedArray<Number>
+      do_vectorized(const Number *&X, const Number *&Y, const Number &, Number *&) const
+      {
+        VectorizedArray<Number> x, y;
+        x.load(X);
+        y.load(Y);
+        X += VectorizedArray<Number>::n_array_elements;
+        Y += VectorizedArray<Number>::n_array_elements;
+        return x * y;
+      }
+    };
+
+    template <typename Number, typename RealType>
+    struct Norm2
+    {
+      RealType
+      operator() (const Number  *&X, const Number  *&, const RealType &, Number *&) const
+      {
+        return numbers::NumberTraits<Number>::abs_square(*X++);
+      }
+
+      VectorizedArray<Number>
+      do_vectorized(const Number *&X, const Number *&, const Number &, Number *&) const
+      {
+        VectorizedArray<Number> x;
+        x.load(X);
+        X += VectorizedArray<Number>::n_array_elements;
+        return x * x;
+      }
+    };
+
+    template <typename Number, typename RealType>
+    struct Norm1
+    {
+      RealType
+      operator() (const Number  *&X, const Number  *&, const RealType &, Number *&) const
+      {
+        return numbers::NumberTraits<Number>::abs(*X++);
+      }
+
+      VectorizedArray<Number>
+      do_vectorized(const Number *&X, const Number *&, const Number &, Number *&) const
+      {
+        VectorizedArray<Number> x;
+        x.load(X);
+        X += VectorizedArray<Number>::n_array_elements;
+        return std::abs(x);
+      }
+    };
+
+    template <typename Number, typename RealType>
+    struct NormP
+    {
+      RealType
+      operator() (const Number  *&X, const Number  *&, const RealType &p, Number *&) const
+      {
+        return std::pow(numbers::NumberTraits<Number>::abs(*X++), p);
+      }
+
+      VectorizedArray<Number>
+      do_vectorized(const Number *&X, const Number *&, const Number &p, Number *&) const
+      {
+        VectorizedArray<Number> x;
+        x.load(X);
+        X += VectorizedArray<Number>::n_array_elements;
+        return std::pow(std::abs(x),p);
+      }
+    };
+
+    template <typename Number>
+    struct MeanValue
+    {
+      Number
+      operator() (const Number  *&X, const Number  *&, const Number &, Number *&) const
+      {
+        return *X++;
+      }
+
+      VectorizedArray<Number>
+      do_vectorized(const Number *&X, const Number *&, const Number &, Number *&) const
+      {
+        VectorizedArray<Number> x;
+        x.load(X);
+        X += VectorizedArray<Number>::n_array_elements;
+        return x;
+      }
+    };
+
+    template <typename Number>
+    struct AddAndDot
+    {
+      Number
+      operator() (const Number *&V, const Number *&W, const Number &a,
+                  Number *&X) const
+      {
+        *X += a **V++;
+        return *X++ * Number(numbers::NumberTraits<Number>::conjugate(*W++));
+      }
+
+      VectorizedArray<Number>
+      do_vectorized(const Number *&V, const Number *&W, const Number &a,
+                    Number *&X) const
+      {
+        VectorizedArray<Number> x, w, v;
+        x.load(X);
+        v.load(V);
+        x += a * v;
+        x.store(X);
+        // may only load from W after storing in X because the pointers might
+        // point to the same memory
+        w.load(W);
+        X += VectorizedArray<Number>::n_array_elements;
+        V += VectorizedArray<Number>::n_array_elements;
+        W += VectorizedArray<Number>::n_array_elements;
+        return x * w;
+      }
+    };
+
+
+
+    // this is the inner working routine for the accumulation loops
+    // below. This is the standard case where the loop bounds are known. We
+    // pulled this function out of the regular accumulate routine because we
+    // might do this thing vectorized (see specialized function below)
+    template <typename Operation, typename Number, typename Number2,
+              typename ResultType, typename size_type>
+    void
+    accumulate_regular(const Operation &op,
+                       const Number   *&X,
+                       const Number2  *&Y,
+                       const ResultType power,
+                       const size_type  n_chunks,
+                       Number         *&Z,
+                       ResultType (&outer_results)[128],
+                       internal::bool2type<false>,
+                       const unsigned int start_chunk=0)
+    {
+      AssertIndexRange(start_chunk, n_chunks+1);
+      for (size_type i=start_chunk; i<n_chunks; ++i)
+        {
+          ResultType r0 = op(X, Y, power, Z);
+          ResultType r1 = op(X, Y, power, Z);
+          ResultType r2 = op(X, Y, power, Z);
+          ResultType r3 = op(X, Y, power, Z);
+          for (size_type j=1; j<8; ++j)
+            {
+              r0 += op(X, Y, power, Z);
+              r1 += op(X, Y, power, Z);
+              r2 += op(X, Y, power, Z);
+              r3 += op(X, Y, power, Z);
+            }
+          r0 += r1;
+          r2 += r3;
+          outer_results[i] = r0 + r2;
+        }
+    }
+
+
+
+    // this is the inner working routine for the accumulation loops
+    // below. This is the specialized case where the loop bounds are known and
+    // where we can vectorize. In that case, we request the 'do_vectorized'
+    // routine of the operation instead of the regular one which does several
+    // operations at once.
+    template <typename Operation, typename Number, typename size_type>
+    void
+    accumulate_regular(const Operation &op,
+                       const Number   *&X,
+                       const Number   *&Y,
+                       const Number     power,
+                       const size_type  n_chunks,
+                       Number         *&Z,
+                       Number (&outer_results)[128],
+                       internal::bool2type<true>)
+    {
+      for (size_type i=0; i<n_chunks/VectorizedArray<Number>::n_array_elements; ++i)
+        {
+          VectorizedArray<Number> r0 = op.do_vectorized(X, Y, power, Z);
+          VectorizedArray<Number> r1 = op.do_vectorized(X, Y, power, Z);
+          VectorizedArray<Number> r2 = op.do_vectorized(X, Y, power, Z);
+          VectorizedArray<Number> r3 = op.do_vectorized(X, Y, power, Z);
+          for (size_type j=1; j<8; ++j)
+            {
+              r0 += op.do_vectorized(X, Y, power, Z);
+              r1 += op.do_vectorized(X, Y, power, Z);
+              r2 += op.do_vectorized(X, Y, power, Z);
+              r3 += op.do_vectorized(X, Y, power, Z);
+            }
+          r0 += r1;
+          r2 += r3;
+          r0 += r2;
+          r0.store(&outer_results[i*VectorizedArray<Number>::n_array_elements]);
+        }
+
+      // If we are treating a case where the vector length is not divisible by
+      // the vectorization length, need the other routine for the cleanup work
+      if (n_chunks % VectorizedArray<Number>::n_array_elements != 0)
+        accumulate_regular(op, X, Y, power, n_chunks, Z, outer_results,
+                           internal::bool2type<false>(),
+                           (n_chunks/VectorizedArray<Number>::n_array_elements)
+                           * VectorizedArray<Number>::n_array_elements);
+    }
+
+
+
+    // this is the main working loop for all vector sums using the templated
+    // operation above. it accumulates the sums using a block-wise summation
+    // algorithm with post-update. this blocked algorithm has been proposed in
+    // a similar form by Castaldo, Whaley and Chronopoulos (SIAM
+    // J. Sci. Comput. 31, 1156-1174, 2008) and we use the smallest possible
+    // block size, 2. Sometimes it is referred to as pairwise summation. The
+    // worst case error made by this algorithm is on the order O(eps *
+    // log2(vec_size)), whereas a naive summation is O(eps * vec_size). Even
+    // though the Kahan summation is even more accurate with an error O(eps)
+    // by carrying along remainders not captured by the main sum, that involves
+    // additional costs which are not worthwhile. See the Wikipedia article on
+    // the Kahan summation algorithm.
+
+    // The algorithm implemented here has the additional benefit that it is
+    // easily parallelized without changing the order of how the elements are
+    // added (floating point addition is not associative). For the same vector
+    // size and minimum_parallel_grainsize, the blocks are always the
+    // same and added pairwise. At the innermost level, eight values are added
+    // consecutively in order to better balance multiplications and additions.
+
+    // The code returns the result as the last argument in order to make
+    // spawning tasks simpler and use automatic template deduction.
+    template <typename Operation, typename Number, typename Number2,
+              typename ResultType, typename size_type>
+    void accumulate (const Operation   &op,
+                     const Number      *X,
+                     const Number2     *Y,
+                     const ResultType   power,
+                     const size_type    vec_size,
+                     Number            *Z,
+                     ResultType        &result,
+                     const int          depth = -1)
+    {
+      (void)depth;
+
+      if (vec_size <= 4096)
+        {
+          // the vector is short enough so we perform the summation. first
+          // work on the regular part. The innermost 32 values are expanded in
+          // order to obtain known loop bounds for most of the work.
+          const Number *X_original = X;
+          (void)X_original;
+          ResultType outer_results [128];
+          size_type n_chunks = vec_size / 32;
+          const size_type remainder = vec_size % 32;
+          Assert (remainder == 0 || n_chunks < 128, ExcInternalError());
+
+          // Select between the regular version and vectorized version based
+          // on the number types we are given. To choose the vectorized
+          // version often enough, we need to have all tasks but the last one
+          // to be divisible by the vectorization length
+          accumulate_regular(op, X, Y, power, n_chunks, Z, outer_results,
+                             internal::bool2type<(types_are_equal<Number,Number2>::value &&
+                                                  (types_are_equal<Number,double>::value ||
+                                                   types_are_equal<Number,float>::value))>());
+
+          // now work on the remainder, i.e., the last up to 32 values. Use
+          // switch statement with fall-through to work on these values.
+          if (remainder > 0)
+            {
+              const size_type inner_chunks = remainder / 8;
+              Assert (inner_chunks <= 3, ExcInternalError());
+              const size_type remainder_inner = remainder % 8;
+              ResultType r0 = ResultType(), r1 = ResultType(),
+                         r2 = ResultType();
+              switch (inner_chunks)
+                {
+                case 3:
+                  r2 = op(X, Y, power, Z);
+                  for (size_type j=1; j<8; ++j)
+                    r2 += op(X, Y, power, Z);
+                // no break
+                case 2:
+                  r1 = op(X, Y, power, Z);
+                  for (size_type j=1; j<8; ++j)
+                    r1 += op(X, Y, power, Z);
+                  r1 += r2;
+                // no break
+                case 1:
+                  r2 = op(X, Y, power, Z);
+                  for (size_type j=1; j<8; ++j)
+                    r2 += op(X, Y, power, Z);
+                // no break
+                default:
+                  for (size_type j=0; j<remainder_inner; ++j)
+                    r0 += op(X, Y, power, Z);
+                  r0 += r2;
+                  r0 += r1;
+                  outer_results[n_chunks] = r0;
+                  break;
+                }
+              n_chunks++;
+            }
+          AssertDimension(static_cast<size_type> (X - X_original), vec_size);
+
+          // now sum the results from the chunks
+          // recursively
+          while (n_chunks > 1)
+            {
+              if (n_chunks % 2 == 1)
+                outer_results[n_chunks++] = ResultType();
+              for (size_type i=0; i<n_chunks; i+=2)
+                outer_results[i/2] = outer_results[i] + outer_results[i+1];
+              n_chunks /= 2;
+            }
+          result = outer_results[0];
+        }
+#ifdef DEAL_II_WITH_THREADS
+      else if (MultithreadInfo::n_threads() > 1 &&
+               vec_size > 4 * internal::Vector::minimum_parallel_grain_size &&
+               depth != 0)
+        {
+          // split the vector into smaller pieces to be worked on recursively
+          // and create tasks for them. Make pieces divisible by 1024.
+          const size_type new_size = (vec_size / 4096) * 1024;
+          ResultType r0, r1, r2, r3;
+
+          // find out how many recursions we should make (avoid too deep
+          // hierarchies of tasks on large vectors), max use 8 *
+          // MultithreadInfo::n_threads()
+          int next_depth = depth;
+          if (depth == -1)
+            next_depth = 8 * MultithreadInfo::n_threads();
+          next_depth /= 4;
+
+          Threads::TaskGroup<> task_group;
+          task_group += Threads::new_task(&accumulate<Operation,Number,Number2,
+                                          ResultType,size_type>,
+                                          op, X, Y, power, new_size, Z, r0, next_depth);
+          task_group += Threads::new_task(&accumulate<Operation,Number,Number2,
+                                          ResultType,size_type>,
+                                          op, X+new_size, Y+new_size, power,
+                                          new_size, Z+new_size, r1, next_depth);
+          task_group += Threads::new_task(&accumulate<Operation,Number,Number2,
+                                          ResultType,size_type>,
+                                          op, X+2*new_size, Y+2*new_size, power,
+                                          new_size, Z+2*new_size, r2, next_depth);
+          task_group += Threads::new_task(&accumulate<Operation,Number,Number2,
+                                          ResultType,size_type>,
+                                          op, X+3*new_size, Y+3*new_size, power,
+                                          vec_size-3*new_size, Z+3*new_size, r3,
+                                          next_depth);
+          task_group.join_all();
+          r0 += r1;
+          r2 += r3;
+          result = r0 + r2;
+        }
+#endif
+      else
+        {
+          // split vector into four pieces and work on the pieces
+          // recursively. Make pieces (except last) divisible by 1024.
+          const size_type new_size = (vec_size / 4096) * 1024;
+          ResultType r0, r1, r2, r3;
+          accumulate (op, X, Y, power, new_size, Z, r0);
+          accumulate (op, X+new_size, Y+new_size, power, new_size, Z+new_size, r1);
+          accumulate (op, X+2*new_size, Y+2*new_size, power, new_size, Z+2*new_size, r2);
+          accumulate (op, X+3*new_size, Y+3*new_size, power, vec_size-3*new_size,
+                      Z+3*new_size, r3);
+          r0 += r1;
+          r2 += r3;
+          result = r0 + r2;
+        }
+    }
+  }
+}
+
+
+
+template <typename Number>
+template <typename Number2>
+Number Vector<Number>::operator * (const Vector<Number2> &v) const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  if (PointerComparison::equal (this, &v))
+    return norm_sqr();
+
+  Assert (vec_size == v.size(),
+          ExcDimensionMismatch(vec_size, v.size()));
+
+  Number sum;
+  internal::Vector::accumulate (internal::Vector::Dot<Number,Number2>(),
+                                val, v.val, Number(), vec_size, val, sum);
+  AssertIsFinite(sum);
+
+  return sum;
+}
+
+
+
+template <typename Number>
+typename Vector<Number>::real_type
+Vector<Number>::norm_sqr () const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  real_type sum;
+  internal::Vector::accumulate (internal::Vector::Norm2<Number,real_type>(),
+                                val, val, real_type(), vec_size, val, sum);
+
+  AssertIsFinite(sum);
+
+  return sum;
+}
+
+
+
+template <typename Number>
+Number Vector<Number>::mean_value () const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  Number sum;
+  internal::Vector::accumulate (internal::Vector::MeanValue<Number>(),
+                                val, val, Number(), vec_size, val, sum);
+
+  return sum / real_type(size());
+}
+
+
+
+template <typename Number>
+typename Vector<Number>::real_type
+Vector<Number>::l1_norm () const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  real_type sum;
+  internal::Vector::accumulate (internal::Vector::Norm1<Number,real_type>(),
+                                val, val, real_type(), vec_size, val, sum);
+
+  return sum;
+}
+
+
+
+template <typename Number>
+typename Vector<Number>::real_type
+Vector<Number>::l2_norm () const
+{
+  // if l2_norm()^2 is finite and non-zero, the answer is computed as
+  // std::sqrt(norm_sqr()). If norm_sqr() is infinite or zero, the l2 norm
+  // might still be finite. In that case, recompute it (this is a rare case,
+  // so working on the vector twice is uncritical and paid off by the extended
+  // precision) using the BLAS approach with a weight, see e.g. dnrm2.f.
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  real_type norm_square;
+  internal::Vector::accumulate (internal::Vector::Norm2<Number,real_type>(),
+                                val, val, real_type(), vec_size, val, norm_square);
+  if (numbers::is_finite(norm_square) &&
+      norm_square >= std::numeric_limits<real_type>::min())
+    return std::sqrt(norm_square);
+  else
+    {
+      real_type scale = 0.;
+      real_type sum = 1.;
+      for (size_type i=0; i<vec_size; ++i)
+        {
+          if (val[i] != Number())
+            {
+              const real_type abs_x =
+                numbers::NumberTraits<Number>::abs(val[i]);
+              if (scale < abs_x)
+                {
+                  sum = 1. + sum * (scale/abs_x) * (scale/abs_x);
+                  scale = abs_x;
+                }
+              else
+                sum += (abs_x/scale) * (abs_x/scale);
+            }
+        }
+      AssertIsFinite(scale*std::sqrt(sum));
+      return scale * std::sqrt(sum);
+    }
+}
+
+
+
+template <typename Number>
+typename Vector<Number>::real_type
+Vector<Number>::lp_norm (const real_type p) const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  if (p == 1.)
+    return l1_norm();
+  else if (p == 2.)
+    return l2_norm();
+
+  real_type sum;
+  internal::Vector::accumulate (internal::Vector::NormP<Number,real_type>(),
+                                val, val, p, vec_size, val, sum);
+
+  if (numbers::is_finite(sum) && sum >= std::numeric_limits<real_type>::min())
+    return std::pow(sum, static_cast<real_type>(1./p));
+  else
+    {
+      real_type scale = 0.;
+      real_type sum = 1.;
+      for (size_type i=0; i<vec_size; ++i)
+        {
+          if (val[i] != Number())
+            {
+              const real_type abs_x =
+                numbers::NumberTraits<Number>::abs(val[i]);
+              if (scale < abs_x)
+                {
+                  sum = 1. + sum * std::pow(scale/abs_x, p);
+                  scale = abs_x;
+                }
+              else
+                sum += std::pow(abs_x/scale, p);
+            }
+        }
+      return scale * std::pow(sum, static_cast<real_type>(1./p));
+    }
+}
+
+
+
+template <>
+Vector<int>::real_type
+Vector<int>::lp_norm (const real_type) const
+{
+  Assert(false, ExcMessage("No lp norm for integer vectors"));
+  return -1;
+}
+
+
+
+template <typename Number>
+typename Vector<Number>::real_type
+Vector<Number>::linfty_norm () const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  real_type max = 0.;
+
+  for (size_type i=0; i<vec_size; ++i)
+    max = std::max (numbers::NumberTraits<Number>::abs(val[i]), max);
+
+  return max;
+}
+
+
+
+template <typename Number>
+Number
+Vector<Number>::add_and_dot (const Number          a,
+                             const Vector<Number> &V,
+                             const Vector<Number> &W)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  AssertDimension (vec_size, V.size());
+  AssertDimension (vec_size, W.size());
+
+  Number sum;
+  internal::Vector::accumulate (internal::Vector::AddAndDot<Number>(),
+                                V.val, W.val, a, vec_size, val, sum);
+  AssertIsFinite(sum);
+
+  return sum;
+}
+
+
+
+template <typename Number>
+Vector<Number> &Vector<Number>::operator += (const Vector<Number> &v)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  add (v);
+  return *this;
+}
+
+
+
+template <typename Number>
+Vector<Number> &Vector<Number>::operator -= (const Vector<Number> &v)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+
+  internal::Vectorization_subtract_v<Number> vector_subtract;
+  vector_subtract.val = val;
+  vector_subtract.v_val = v.val;
+  internal::vectorized_transform(vector_subtract,vec_size);
+
+  return *this;
+}
+
+
+
+template <typename Number>
+void Vector<Number>::add (const Number v)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  internal::Vectorization_add_factor<Number> vector_add;
+  vector_add.val = val;
+  vector_add.factor = v;
+  internal::vectorized_transform(vector_add,vec_size);
+}
+
+
+
+template <typename Number>
+void Vector<Number>::add (const Vector<Number> &v)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+
+  internal::Vectorization_add_v<Number> vector_add;
+  vector_add.val = val;
+  vector_add.v_val = v.val;
+  internal::vectorized_transform(vector_add,vec_size);
+}
+
+
+
+template <typename Number>
+void Vector<Number>::add (const Number a, const Vector<Number> &v,
+                          const Number b, const Vector<Number> &w)
+{
+  AssertIsFinite(a);
+  AssertIsFinite(b);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+  Assert (vec_size == w.vec_size, ExcDimensionMismatch(vec_size, w.vec_size));
+
+  internal::Vectorization_add_avpbw<Number> vector_add;
+  vector_add.val = val;
+  vector_add.v_val = v.val;
+  vector_add.w_val = w.val;
+  vector_add.a = a;
+  vector_add.b = b;
+  internal::vectorized_transform(vector_add,vec_size);
+}
+
+
+
+template <typename Number>
+void Vector<Number>::sadd (const Number x,
+                           const Vector<Number> &v)
+{
+  AssertIsFinite(x);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+
+  internal::Vectorization_sadd_xv<Number> vector_sadd;
+  vector_sadd.val = val;
+  vector_sadd.v_val = v.val;
+  vector_sadd.x = x;
+  internal::vectorized_transform(vector_sadd,vec_size);
+}
+
+
+
+template <typename Number>
+void Vector<Number>::sadd (const Number x, const Number a,
+                           const Vector<Number> &v, const Number b,
+                           const Vector<Number> &w)
+{
+  AssertIsFinite(x);
+  AssertIsFinite(a);
+  AssertIsFinite(b);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+  Assert (vec_size == w.vec_size, ExcDimensionMismatch(vec_size, w.vec_size));
+
+  internal::Vectorization_sadd_xavbw<Number> vector_sadd;
+  vector_sadd.val = val;
+  vector_sadd.v_val = v.val;
+  vector_sadd.w_val = w.val;
+  vector_sadd.x = x;
+  vector_sadd.a = a;
+  vector_sadd.b = b;
+  internal::vectorized_transform(vector_sadd,vec_size);
+}
+
+
+template <typename Number>
+void Vector<Number>::sadd (const Number x, const Number a,
+                           const Vector<Number> &v, const Number b,
+                           const Vector<Number> &w, const Number c,
+                           const Vector<Number> &y)
+{
+  sadd (x, a, v, b, w);
+  add (c, y);
+}
+
+
+
+template <typename Number>
+void Vector<Number>::scale (const Vector<Number> &s)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == s.vec_size, ExcDimensionMismatch(vec_size, s.vec_size));
+
+  internal::Vectorization_scale<Number> vector_scale;
+  vector_scale.val = val;
+  vector_scale.v_val = s.val;
+  internal::vectorized_transform(vector_scale,vec_size);
+}
+
+
+
+template <typename Number>
+template <typename Number2>
+void Vector<Number>::scale (const Vector<Number2> &s)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == s.vec_size, ExcDimensionMismatch(vec_size, s.vec_size));
+
+  for (size_type i=0; i<vec_size; ++i)
+    val[i] *= Number(s.val[i]);
+}
+
+
+
+template <typename Number>
+void Vector<Number>::equ (const Number a,
+                          const Vector<Number> &u)
+{
+  AssertIsFinite(a);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == u.vec_size, ExcDimensionMismatch(vec_size, u.vec_size));
+
+  internal::Vectorization_equ_au<Number> vector_equ;
+  vector_equ.val = val;
+  vector_equ.u_val = u.val;
+  vector_equ.a = a;
+  internal::vectorized_transform(vector_equ,vec_size);
+}
+
+
+
+template <typename Number>
+template <typename Number2>
+void Vector<Number>::equ (const Number a,
+                          const Vector<Number2> &u)
+{
+  AssertIsFinite(a);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == u.vec_size, ExcDimensionMismatch(vec_size, u.vec_size));
+
+  // set the result vector to a*u. we have to
+  // convert the elements of u to the type of
+  // the result vector. this is necessary
+  // because
+  // operator*(complex<float>,complex<double>)
+  // is not defined by default
+  for (size_type i=0; i<vec_size; ++i)
+    val[i] = a * Number(u.val[i]);
+}
+
+
+
+template <typename Number>
+void Vector<Number>::equ (const Number a, const Vector<Number> &u,
+                          const Number b, const Vector<Number> &v)
+{
+  AssertIsFinite(a);
+  AssertIsFinite(b);
+
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == u.vec_size, ExcDimensionMismatch(vec_size, u.vec_size));
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+
+  internal::Vectorization_equ_aubv<Number> vector_equ;
+  vector_equ.val = val;
+  vector_equ.u_val = u.val;
+  vector_equ.v_val = v.val;
+  vector_equ.a = a;
+  vector_equ.b = b;
+  internal::vectorized_transform(vector_equ,vec_size);
+}
+
+
+template <typename Number>
+void Vector<Number>::equ (const Number a, const Vector<Number> &u,
+                          const Number b, const Vector<Number> &v,
+                          const Number c, const Vector<Number> &w)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == u.vec_size, ExcDimensionMismatch(vec_size, u.vec_size));
+  Assert (vec_size == v.vec_size, ExcDimensionMismatch(vec_size, v.vec_size));
+  Assert (vec_size == w.vec_size, ExcDimensionMismatch(vec_size, w.vec_size));
+
+  internal::Vectorization_equ_aubvcw<Number> vector_equ;
+  vector_equ.val = val;
+  vector_equ.u_val = u.val;
+  vector_equ.v_val = v.val;
+  vector_equ.w_val = w.val;
+  vector_equ.a = a;
+  vector_equ.b = b;
+  vector_equ.c = c;
+  internal::vectorized_transform(vector_equ,vec_size);
+}
+
+
+template <typename Number>
+void Vector<Number>::ratio (const Vector<Number> &a,
+                            const Vector<Number> &b)
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (a.vec_size == b.vec_size,
+          ExcDimensionMismatch (a.vec_size, b.vec_size));
+
+  // no need to reinit with zeros, since
+  // we overwrite them anyway
+  reinit (a.size(), true);
+
+  internal::Vectorization_ratio<Number> vector_ratio;
+  vector_ratio.val = val;
+  vector_ratio.a_val = a.val;
+  vector_ratio.b_val = b.val;
+  internal::vectorized_transform(vector_ratio,vec_size);
+}
+
+
+
+template <typename Number>
+Vector<Number> &
+Vector<Number>::operator= (const BlockVector<Number> &v)
+{
+  if (v.size() != vec_size)
+    reinit (v.size(), true);
+
+  size_type this_index = 0;
+  for (size_type b=0; b<v.n_blocks(); ++b)
+    for (size_type i=0; i<v.block(b).size(); ++i, ++this_index)
+      val[this_index] = v.block(b)(i);
+
+  return *this;
+}
+
+
+
+#ifdef DEAL_II_WITH_PETSC
+
+template <typename Number>
+Vector<Number> &
+Vector<Number>::operator= (const PETScWrappers::Vector &v)
+{
+  if (v.size() != vec_size)
+    reinit (v.size(), true);
+  if (vec_size != 0)
+    {
+      // get a representation of the vector
+      // and copy it
+      PetscScalar *start_ptr;
+      int ierr = VecGetArray (static_cast<const Vec &>(v), &start_ptr);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      internal::copy (start_ptr, start_ptr+vec_size, begin());
+
+      // restore the representation of the
+      // vector
+      ierr = VecRestoreArray (static_cast<const Vec &>(v), &start_ptr);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+    }
+
+  return *this;
+}
+
+
+
+template <typename Number>
+Vector<Number> &
+Vector<Number>::operator= (const PETScWrappers::MPI::Vector &v)
+{
+  // do this in a two-stage process:
+  // first convert to a sequential petsc
+  // vector, then copy that
+  PETScWrappers::Vector seq (v);
+  *this = seq;
+
+  return *this;
+}
+
+#endif
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+template <typename Number>
+Vector<Number> &
+Vector<Number>::operator= (const TrilinosWrappers::MPI::Vector &v)
+{
+  // Generate a localized version
+  // of the Trilinos vectors and
+  // then call the other =
+  // operator.
+  TrilinosWrappers::Vector localized_vector (v);
+  *this = localized_vector;
+  return *this;
+}
+
+
+
+template <typename Number>
+Vector<Number> &
+Vector<Number>::operator= (const TrilinosWrappers::Vector &v)
+{
+  if (v.size() != vec_size)
+    reinit (v.size(), true);
+  if (vec_size != 0)
+    {
+      // get a representation of the vector
+      // and copy it
+      TrilinosScalar **start_ptr;
+      int ierr = v.trilinos_vector().ExtractView (&start_ptr);
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      std::copy (start_ptr[0], start_ptr[0]+vec_size, begin());
+    }
+
+  return *this;
+}
+
+#endif
+
+template <typename Number>
+template <typename Number2>
+bool
+Vector<Number>::operator== (const Vector<Number2> &v) const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  Assert (vec_size == v.size(), ExcDimensionMismatch(vec_size, v.size()));
+
+  // compare the two vector. we have to
+  // convert the elements of v to the type of
+  // the result vector. this is necessary
+  // because
+  // operator==(complex<float>,complex<double>)
+  // is not defined by default
+  for (size_type i=0; i<vec_size; ++i)
+    if (val[i] != Number(v.val[i]))
+      return false;
+
+  return true;
+}
+
+
+
+template <typename Number>
+void Vector<Number>::print (const char *format) const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  for (size_type j=0; j<size(); ++j)
+    internal::print (val[j], format);
+  std::printf ("\n");
+}
+
+
+
+template <typename Number>
+void Vector<Number>::print (std::ostream      &out,
+                            const unsigned int precision,
+                            const bool         scientific,
+                            const bool         across) const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+  AssertThrow (out, ExcIO());
+
+  std::ios::fmtflags old_flags = out.flags();
+  unsigned int old_precision = out.precision (precision);
+
+  out.precision (precision);
+  if (scientific)
+    out.setf (std::ios::scientific, std::ios::floatfield);
+  else
+    out.setf (std::ios::fixed, std::ios::floatfield);
+
+  if (across)
+    for (size_type i=0; i<size(); ++i)
+      out << val[i] << ' ';
+  else
+    for (size_type i=0; i<size(); ++i)
+      out << val[i] << std::endl;
+  out << std::endl;
+
+  AssertThrow (out, ExcIO());
+  // reset output format
+  out.flags (old_flags);
+  out.precision(old_precision);
+}
+
+
+
+template <typename Number>
+void
+Vector<Number>::print (LogStream &out, const unsigned int width, const bool across) const
+{
+  Assert (vec_size!=0, ExcEmptyObject());
+
+  if (across)
+    for (size_type i=0; i<size(); ++i)
+      out << std::setw(width) << val[i] << ' ';
+  else
+    for (size_type i=0; i<size(); ++i)
+      out << val[i] << std::endl;
+  out << std::endl;
+}
+
+
+template <typename Number>
+void Vector<Number>::block_write (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // other version of the following
+  //  out << size() << std::endl << '[';
+  // reason: operator<< seems to use
+  // some resources that lead to
+  // problems in a multithreaded
+  // environment
+  const size_type sz = size();
+  char buf[16];
+
+#ifdef DEAL_II_WITH_64BIT_INDICES
+  std::sprintf(buf, "%llu", sz);
+#else
+  std::sprintf(buf, "%u", sz);
+#endif
+  std::strcat(buf, "\n[");
+
+  out.write(buf, std::strlen(buf));
+  out.write (reinterpret_cast<const char *>(begin()),
+             reinterpret_cast<const char *>(end())
+             - reinterpret_cast<const char *>(begin()));
+
+  // out << ']';
+  const char outro = ']';
+  out.write (&outro, 1);
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <typename Number>
+void Vector<Number>::block_read (std::istream &in)
+{
+  AssertThrow (in, ExcIO());
+
+  size_type sz;
+
+  char buf[16];
+
+
+  in.getline(buf,16,'\n');
+  sz=std::atoi(buf);
+
+  // fast initialization, since the
+  // data elements are overwritten anyway
+  reinit (sz, true);
+
+  char c;
+  //  in >> c;
+  in.read (&c, 1);
+  AssertThrow (c=='[', ExcIO());
+
+  in.read (reinterpret_cast<char *>(begin()),
+           reinterpret_cast<const char *>(end())
+           - reinterpret_cast<const char *>(begin()));
+
+  //  in >> c;
+  in.read (&c, 1);
+  AssertThrow (c==']', ExcIO());
+}
+
+
+
+template <typename Number>
+IndexSet
+Vector<Number>::locally_owned_elements() const
+{
+  return complete_index_set(size());
+}
+
+
+
+template <typename Number>
+std::size_t
+Vector<Number>::memory_consumption () const
+{
+  return sizeof(*this) + (max_vec_size * sizeof(Number));
+}
+
+
+
+template <typename Number>
+void
+Vector<Number>::allocate()
+{
+  // make sure that we don't create a memory leak
+  Assert (val == 0, ExcInternalError());
+
+  // then allocate memory with the proper alignment requirements of 64 bytes
+  Utilities::System::posix_memalign ((void **)&val, 64, sizeof(Number)*max_vec_size);
+}
+
+
+
+template <typename Number>
+void
+Vector<Number>::deallocate()
+{
+  free(val);
+  val = 0;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/vector_memory.h b/include/deal.II/lac/vector_memory.h
new file mode 100644
index 0000000..85473db
--- /dev/null
+++ b/include/deal.II/lac/vector_memory.h
@@ -0,0 +1,396 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__vector_memory_h
+#define dealii__vector_memory_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/vector.h>
+
+#include <vector>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup VMemory */
+/*@{*/
+
+/**
+ * Memory management base class for vectors. This is an abstract base class
+ * used, among other places, by all iterative methods to allocate space for
+ * auxiliary vectors.
+ *
+ * The purpose of this class is as follows: in iterative solvers and other
+ * places, one needs to allocate temporary storage for vectors, for example
+ * for auxiliary vectors. One could allocate and release them anew every time,
+ * but this may be expensive in some situations if it has to happen very
+ * frequently. A common case for this is when an iterative method is used to
+ * invert a matrix in each iteration an outer solver, such as when inverting a
+ * matrix block for a Schur complement solver.
+ *
+ * In such situations, allocating and deallocating vectors anew in each call
+ * to the inner solver is expensive and leads to memory fragmentation. The
+ * present class allows to avoid this by offering an interface that other
+ * classes can use to allocate and deallocate vectors. Different derived
+ * classes then implement different strategies to provide temporary storage
+ * vectors to using classes.
+ *
+ * For example, the PrimitiveVectorMemory class simply allocates and
+ * deallocated vectors each time it is asked for a vector. It is an
+ * appropriate implementation to use for iterative solvers that are called
+ * only once, or very infrequently.
+ *
+ * On the other hand, the GrowingVectorMemory class never returns memory space
+ * to the operating system memory management subsystem during its lifetime; it
+ * only marks them as unused and allows them to be reused next time a vector
+ * is requested.
+ *
+ * Yet other classes, when implemented, could provide even other strategies
+ * for memory management.
+ *
+ * @author Guido Kanschat, 1998-2003
+ */
+template<typename VectorType = dealii::Vector<double> >
+class VectorMemory : public Subscriptor
+{
+public:
+
+  /**
+   * Virtual destructor is needed as there are virtual functions in this
+   * class.
+   */
+  virtual ~VectorMemory () {}
+
+  /**
+   * Return a pointer to a new vector. The number of elements or their
+   * subdivision into blocks (if applicable) is unspecified and users of this
+   * function should reset vectors to their proper size. The same holds for
+   * the contents of vectors: they are unspecified.
+   */
+  virtual VectorType *alloc () = 0;
+
+  /**
+   * Return a vector and indicate that it is not going to be used any further
+   * by the instance that called alloc() to get a pointer to it.
+   */
+  virtual void free (const VectorType *const) = 0;
+
+  /**
+   * @addtogroup Exceptions
+   * @{
+   */
+
+  /**
+   * No more available vectors.
+   */
+  DeclException0(ExcNoMoreVectors);
+  /**
+   * Vector was not allocated from this memory pool.
+   */
+  DeclException0(ExcNotAllocatedHere);
+
+  //@}
+  /**
+   * Pointer to vectors allocated from VectorMemory objects. This pointer is
+   * safe in the sense that it automatically calls free() when it is
+   * destroyed, thus relieving the user from using vector management functions
+   * at all.
+   *
+   * @author Guido Kanschat, 2009
+   */
+  class Pointer
+  {
+  public:
+    /**
+     * Constructor, automatically allocating a vector from @p mem.
+     */
+    Pointer(VectorMemory<VectorType> &mem);
+    /**
+     * Destructor, automatically releasing the vector from the memory #pool.
+     */
+    ~Pointer();
+
+    /**
+     * Conversion to regular pointer.
+     */
+    operator VectorType *() const;
+
+    /**
+     * Dereferencing operator.
+     */
+    VectorType &operator * () const;
+
+    /**
+     * Dereferencing operator.
+     */
+    VectorType *operator -> () const;
+  private:
+    /**
+     * The memory pool used.
+     */
+    SmartPointer<VectorMemory<VectorType>,Pointer> pool;
+    /**
+     * The pointer to the vector.
+     */
+    VectorType *v;
+  };
+};
+
+
+
+/**
+ * Simple memory management. See the documentation of the base class for a
+ * description of its purpose.
+ *
+ * This class allocates and deletes vectors as needed from the global heap,
+ * i.e. performs no specially adapted actions for memory management.
+ */
+template<typename VectorType = dealii::Vector<double> >
+class PrimitiveVectorMemory : public VectorMemory<VectorType>
+{
+public:
+  /**
+   * Constructor.
+   */
+  PrimitiveVectorMemory () {}
+
+  /**
+   * Return a pointer to a new vector. The number of elements or their
+   * subdivision into blocks (if applicable) is unspecified and users of this
+   * function should reset vectors to their proper size. The same holds for
+   * the contents of vectors: they are unspecified.
+   *
+   * For the present class, calling this function will allocate a new vector
+   * on the heap and returning a pointer to it.
+   */
+  virtual VectorType *alloc ()
+  {
+    return new VectorType();
+  }
+
+  /**
+   * Return a vector and indicate that it is not going to be used any further
+   * by the instance that called alloc() to get a pointer to it.
+   *
+   *
+   * For the present class, this means that the vector is returned to the
+   * global heap.
+   */
+  virtual void free (const VectorType *const v)
+  {
+    delete v;
+  }
+};
+
+
+
+/**
+ * A pool based memory management class. See the documentation of the base
+ * class for a description of its purpose.
+ *
+ * Each time a vector is requested from this class, it checks if it has one
+ * available and returns its address, or allocates a new one on the heap. If a
+ * vector is returned, through the free() member function, it doesn't return
+ * it to the operating system memory subsystem, but keeps it around unused for
+ * later use if alloc() is called again, or until the object is destroyed. The
+ * class therefore avoid the overhead of repeatedly allocating memory on the
+ * heap if temporary vectors are required and released frequently; on the
+ * other hand, it doesn't release once-allocated memory at the earliest
+ * possible time and may therefore lead to an increased overall memory
+ * consumption.
+ *
+ * All GrowingVectorMemory objects of the same vector type use the same memory
+ * Pool. Therefore, functions can create such a VectorMemory object whenever
+ * needed without performance penalty. A drawback of this policy might be that
+ * vectors once allocated are only released at the end of the program run.
+ * Nevertheless, the since they are reused, this should be of no concern.
+ * Additionally, the destructor of the Pool warns about memory leaks.
+ *
+ * @author Guido Kanschat, 1999, 2007
+ */
+template<typename VectorType = dealii::Vector<double> >
+class GrowingVectorMemory : public VectorMemory<VectorType>
+{
+public:
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Constructor.  The argument allows to preallocate a certain number of
+   * vectors. The default is not to do this.
+   */
+  GrowingVectorMemory (const size_type initial_size = 0,
+                       const bool log_statistics = false);
+
+  /**
+   * Destructor. Release all vectors. This destructor also offers some
+   * statistic on the number of allocated vectors.
+   *
+   * The log file will also contain a warning message, if there are allocated
+   * vectors left.
+   */
+  virtual ~GrowingVectorMemory();
+
+  /**
+   * Return a pointer to a new vector. The number of elements or their
+   * subdivision into blocks (if applicable) is unspecified and users of this
+   * function should reset vectors to their proper size. The same holds for
+   * the contents of vectors: they are unspecified.
+   */
+  virtual VectorType *alloc ();
+
+  /**
+   * Return a vector and indicate that it is not going to be used any further
+   * by the instance that called alloc() to get a pointer to it.
+   *
+   * For the present class, this means retaining the vector for later reuse by
+   * the alloc() method.
+   */
+  virtual void free (const VectorType *const);
+
+  /**
+   * Release all vectors that are not currently in use.
+   */
+  static void release_unused_memory ();
+
+  /**
+   * Memory consumed by this class and all currently allocated vectors.
+   */
+  virtual std::size_t memory_consumption() const;
+
+private:
+  /**
+   * Type to enter into the array. First component will be a flag telling
+   * whether the vector is used, second the vector itself.
+   */
+  typedef std::pair<bool, VectorType *> entry_type;
+
+  /**
+   * The class providing the actual storage for the memory pool.
+   *
+   * This is where the actual storage for GrowingVectorMemory is provided.
+   * Only one of these pools is used for each vector type, thus allocating all
+   * vectors from the same storage.
+   *
+   * @author Guido Kanschat, 2007
+   */
+  struct Pool
+  {
+    /**
+     * Standard constructor creating an empty pool
+     */
+    Pool();
+    /**
+     * Destructor. Frees memory and warns about memory leaks
+     */
+    ~Pool();
+    /**
+     * Create data vector; does nothing after first initialization
+     */
+    void initialize(const size_type size);
+    /**
+     * Pointer to the storage object
+     */
+    std::vector<entry_type> *data;
+  };
+
+  /**
+   * Array of allocated vectors.
+   */
+  static Pool pool;
+
+  /**
+   * Overall number of allocations. Only used for bookkeeping and to generate
+   * output at the end of an object's lifetime.
+   */
+  size_type total_alloc;
+  /**
+   * Number of vectors currently allocated in this object; used for detecting
+   * memory leaks.
+   */
+  size_type current_alloc;
+
+  /**
+   * A flag controlling the logging of statistics by the destructor.
+   */
+  bool log_statistics;
+
+  /**
+   * Mutex to synchronise access to internal data of this object from multiple
+   * threads.
+   */
+  static Threads::Mutex mutex;
+};
+
+/*@}*/
+
+#ifndef DOXYGEN
+/* --------------------- inline functions ---------------------- */
+
+
+template <typename VectorType>
+inline
+VectorMemory<VectorType>::Pointer::Pointer(VectorMemory<VectorType> &mem)
+  :
+  pool(&mem, typeid(*this).name()), v(0)
+{
+  v = pool->alloc();
+}
+
+
+template <typename VectorType>
+inline
+VectorMemory<VectorType>::Pointer::~Pointer()
+{
+  pool->free(v);
+}
+
+
+template <typename VectorType>
+inline
+VectorMemory<VectorType>::Pointer::operator VectorType *() const
+{
+  return v;
+}
+
+
+template <typename VectorType>
+inline
+VectorType &VectorMemory<VectorType>::Pointer::operator * () const
+{
+  return *v;
+}
+
+
+template <typename VectorType>
+inline
+VectorType *VectorMemory<VectorType>::Pointer::operator -> () const
+{
+  return v;
+}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/vector_memory.templates.h b/include/deal.II/lac/vector_memory.templates.h
new file mode 100644
index 0000000..85d433a
--- /dev/null
+++ b/include/deal.II/lac/vector_memory.templates.h
@@ -0,0 +1,210 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__vector_memory_templates_h
+#define dealii__vector_memory_templates_h
+
+
+#include <deal.II/lac/vector_memory.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename VectorType>
+typename GrowingVectorMemory<VectorType>::Pool GrowingVectorMemory<VectorType>::pool;
+
+template <typename VectorType>
+Threads::Mutex GrowingVectorMemory<VectorType>::mutex;
+
+template <typename VectorType>
+inline
+GrowingVectorMemory<VectorType>::Pool::Pool()
+  :
+  data(0)
+{}
+
+
+
+template <typename VectorType>
+inline
+GrowingVectorMemory<VectorType>::Pool::~Pool()
+{
+  // Nothing to do if memory was unused.
+  if (data == 0) return;
+
+  // First, delete all remaining
+  // vectors. Actually, there should
+  // be none, if there is no memory
+  // leak
+  for (typename std::vector<entry_type>::iterator i=data->begin();
+       i != data->end();
+       ++i)
+    {
+      delete i->second;
+    }
+  delete data;
+}
+
+
+template <typename VectorType>
+inline
+void
+GrowingVectorMemory<VectorType>::Pool::initialize(const size_type size)
+{
+  if (data == 0)
+    {
+      data = new std::vector<entry_type>(size);
+
+      for (typename std::vector<entry_type>::iterator i= data->begin();
+           i != data->end();
+           ++i)
+        {
+          i->first = false;
+          i->second = new VectorType;
+        }
+    }
+}
+
+
+template <typename VectorType>
+inline
+GrowingVectorMemory<VectorType>::GrowingVectorMemory (const size_type initial_size,
+                                                      const bool log_statistics)
+
+  :
+  total_alloc(0),
+  current_alloc(0),
+  log_statistics(log_statistics)
+{
+  Threads::Mutex::ScopedLock lock(mutex);
+  pool.initialize(initial_size);
+}
+
+
+template<typename VectorType>
+inline
+GrowingVectorMemory<VectorType>::~GrowingVectorMemory()
+{
+  AssertNothrow(current_alloc == 0,
+                StandardExceptions::ExcMemoryLeak(current_alloc));
+  if (log_statistics)
+    {
+      deallog << "GrowingVectorMemory:Overall allocated vectors: "
+              << total_alloc << std::endl;
+      deallog << "GrowingVectorMemory:Maximum allocated vectors: "
+              << pool.data->size() << std::endl;
+    }
+}
+
+
+
+template<typename VectorType>
+inline
+VectorType *
+GrowingVectorMemory<VectorType>::alloc ()
+{
+  Threads::Mutex::ScopedLock lock(mutex);
+  ++total_alloc;
+  ++current_alloc;
+  // see if there is a free vector
+  // available in our list
+  for (typename std::vector<entry_type>::iterator i=pool.data->begin();
+       i != pool.data->end(); ++i)
+    {
+      if (i->first == false)
+        {
+          i->first = true;
+          return (i->second);
+        }
+    }
+
+  // no free vector found, so let's
+  // just allocate a new one
+  const entry_type t (true, new VectorType);
+  pool.data->push_back(t);
+
+  return t.second;
+}
+
+
+
+template<typename VectorType>
+inline
+void
+GrowingVectorMemory<VectorType>::free(const VectorType *const v)
+{
+  Threads::Mutex::ScopedLock lock(mutex);
+  for (typename std::vector<entry_type>::iterator i=pool.data->begin();
+       i != pool.data->end(); ++i)
+    {
+      if (v == (i->second))
+        {
+          i->first = false;
+          --current_alloc;
+          return;
+        }
+    }
+  Assert(false, typename VectorMemory<VectorType>::ExcNotAllocatedHere());
+}
+
+
+
+template<typename VectorType>
+inline
+void
+GrowingVectorMemory<VectorType>::release_unused_memory ()
+{
+  Threads::Mutex::ScopedLock lock(mutex);
+
+  std::vector<entry_type> new_data;
+
+  if (pool.data != 0)
+    {
+      const typename std::vector<entry_type>::const_iterator
+      end = pool.data->end();
+      for (typename std::vector<entry_type>::const_iterator
+           i = pool.data->begin(); i != end ; ++i)
+        if (i->first == false)
+          delete i->second;
+        else
+          new_data.push_back (*i);
+
+      *pool.data = new_data;
+    }
+}
+
+
+
+template<typename VectorType>
+inline
+std::size_t
+GrowingVectorMemory<VectorType>::memory_consumption () const
+{
+  Threads::Mutex::ScopedLock lock(mutex);
+
+  std::size_t result = sizeof (*this);
+  const typename std::vector<entry_type>::const_iterator
+  end = pool.data->end();
+  for (typename std::vector<entry_type>::const_iterator
+       i = pool.data->begin(); i != end ; ++i)
+    result += sizeof (*i) + i->second->memory_consumption();
+
+  return result;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/lac/vector_view.h b/include/deal.II/lac/vector_view.h
new file mode 100644
index 0000000..1d3dcc9
--- /dev/null
+++ b/include/deal.II/lac/vector_view.h
@@ -0,0 +1,313 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__vector_view_h
+#define dealii__vector_view_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/lac/vector.h>
+
+#include <cstdio>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*! @addtogroup Vectors
+ *@{
+ */
+
+/**
+ * View of a numerical vector of data. This class provides an interface
+ * compatible with the Vector<double> class (from which it is inherited), that
+ * allows fast access to locations of memory already allocated with arrays of
+ * type Number.
+ *
+ * This is in the same style of the vector view in the Trilinos library.
+ *
+ * You should consider using the VectorView object ONLY when ALL of the
+ * following requirements are met:
+ *
+ * 1. Your application requires a Vector<Number> object.
+ *
+ * 2. All you have at your disposal is a Number* pointer.
+ *
+ * 3. You are ABSOLUTELY SURE that the above pointer points to a valid area of
+ * memory of the correct size.
+ *
+ * 4. You really believe that making a copy of that memory would be too
+ * expensive.
+ *
+ * 5. You really know what you are doing.
+ *
+ * Notice that NO CHECKS are performed on the actual memory, and if you try to
+ * access illegal areas of memory, your computer will suffer from it. Once
+ * again, use this class ONLY if you know exactly what you are doing.
+ *
+ * Two constructors are provided. One for read-write access, and one for read
+ * only access, and you are allowed to use this class on objects of type const
+ * Number*. However you should be aware of the fact that the constness of the
+ * array pointed to is ignored, which means that you should only use the const
+ * constructor when the actual object you are constructing is itself a
+ * constant object. As a corollary, you will be allowed to call even functions
+ * of the base class that change data of the array; this being a violation of
+ * the C++ type model, you should make sure that this only happens if it is
+ * really valid and, in general, if you know what you are doing.
+ *
+ * Since this class does not own the memory that you are accessing, you have
+ * to make sure that the lifetime of the section of memory you are viewing is
+ * longer than this object. No attempt is made to ensure that this is the
+ * case.
+ *
+ * An example usage of this class is the following:
+ *
+ * @code
+ * // Create an array of length 5;
+ * double * array = new double[5];
+ * // Now create a view of the above array that is compatible with the
+ * // Vector<double> class
+ * VectorView<double> view(5, array);
+ *
+ * view(1) = 4;
+ *
+ * // The following line should output 4.
+ * cout << array[1] << endl;
+ *
+ * // If debug mode is on, then the following triggers an exception:
+ * view(6) = 4;
+ *
+ * // But notice that no checks are performed, so this is legal but WILL
+ * // NOT work
+ * VectorView<double> wrong_view(10, array);
+ *
+ * // Now no assert will be thrown if you type wrong_view(6), but most
+ * // likely a segfault will occur.
+ * view(6) = 4;
+ *
+ * // Notice that this construction is legal. It will create a copy of
+ * // the array.
+ * const Vector<double> const_copy(view);
+ *
+ * // Now this is the correct way to instantiate a constant view of the
+ * // above vector:
+ * const VectorView<double> correct_const_copy_view(5, const_copy.begin());
+ *
+ * // While this will compile, BUT WILL NOT COMPLAIN if you try to write
+ * // on it!
+ * VectorView<double> wrong_const_copy_view(5, const_copy.begin());
+ *
+ * // Now writing to elements of wrong_const_copy_view is allowed, and
+ * // will change the same memory as the const_copy object.
+ * wrong_const_copy_view(1) = 5;
+ *
+ * if(copy_view(1) == wrong_const_copy_view(1)) cout << "Tautology";
+ *
+ * @endcode
+ *
+ *
+ * @note Instantiations for this template are provided for <tt>@<float@>,
+ * @<double@>, @<long double@>, @<std::complex@<float@>@>,
+ * @<std::complex@<double@>@>, @<std::complex@<long double@>@></tt>; others
+ * can be generated in application programs (see the section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @author Luca Heltai, 2009
+ */
+template<typename Number>
+class VectorView : public Vector<Number>
+{
+public:
+
+  /**
+   * Declare type for container size.
+   */
+  typedef types::global_dof_index size_type;
+
+  /**
+   * Read write constructor. Takes the size of the vector, just like the
+   * standard one, but the data is picked starting from the location of the
+   * pointer @p ptr.
+   */
+  VectorView(const size_type new_size, Number *ptr);
+
+  /**
+   * The constant constructor is the same as above, however you will not be
+   * able to access the data for write access.
+   *
+   * You should only use this class by constructing it as a const
+   * VectorView<double>(size, ptr) object.
+   *
+   * Undefined behavior will occur if you construct it as a non const object
+   * or attempt to write on it.
+   */
+  VectorView(const size_type new_size, const Number *ptr);
+
+  /**
+   * This destructor will only reset the internal sizes and the internal
+   * pointers, but it will NOT clear the memory.
+   */
+  ~VectorView();
+
+  /**
+   * The reinit function of this object has a behavior which is different from
+   * the one of the base class. VectorView does not handle memory, and you
+   * should not attempt to resize the memory that is pointed to by this object
+   * using the reinit function. You can, however, resize the view that you
+   * have of the original object. Notice that it is your own responsibility to
+   * ensure that the memory you are pointing to is big enough.
+   *
+   * Similarly to what happens in the base class, if 'omit_zeroing_entries' is
+   * false, then the entire content of the vector is set to 0, otherwise the
+   * content of the memory is left unchanged.
+   *
+   * Notice that the following snippet of code may not produce what you
+   * expect:
+   *
+   * @code
+   * // Create a vector of length 1.
+   * Vector<double> long_vector(1);
+   *
+   * // Make a view of it
+   * VectorView<double> view_of_long_vector(1, long_vector.begin());
+   *
+   * // Resize the original vector to a bigger size
+   * long_vector.reinit(100);
+   *
+   * // And the view, leaving the memory untouched
+   * view_of_long_vector.reinit(100, true);
+   * @endcode
+   *
+   * In the above case, the Vector<double>::reinit method is called, and a NEW
+   * area of memory is reserved, possibly not starting at the same place as
+   * before. However, the VectorView<double> object keeps pointing to the same
+   * old area. After the two reinits, any call to view_of_long_vector(i), with
+   * i>0 might cause an attempt to access invalid areas of memory, or might
+   * function properly, depending on whether or not the system was able to
+   * allocate some memory consecutively after the original allocation.
+   *
+   * In any case, you should not rely on this behavior, and you should only
+   * call this reinit function if you really know what you are doing.
+   */
+  virtual void reinit (const size_type N,
+                       const bool         omit_zeroing_entries=false);
+
+  /**
+   * This reinit function is equivalent to constructing a new object with the
+   * given size, starting from the pointer ptr.
+   */
+  void reinit(const size_type N, Number *ptr);
+
+  /**
+   * This reinit function is equivalent to constructing a new object with the
+   * given size, starting from the pointer ptr. The same considerations made
+   * for the constructor apply here.
+   */
+  void reinit(const size_type N, const Number *ptr);
+
+  /**
+   * This function is here to prevent memory corruption. It should never be
+   * called, and will throw an exception if you try to do so.
+   */
+  virtual void swap (Vector<Number> &v);
+};
+
+
+
+/*@}*/
+/*----------------------- Inline functions ----------------------------------*/
+
+#ifndef DOXYGEN
+
+template<typename Number>
+inline
+VectorView<Number>::VectorView(const size_type new_size, Number *ptr)
+{
+  this->vec_size      = new_size;
+  this->max_vec_size  = new_size;
+  this->val           = ptr;
+}
+
+
+
+template<typename Number>
+inline
+VectorView<Number>::VectorView(const size_type new_size, const Number *ptr)
+{
+  this->vec_size      = new_size;
+  this->max_vec_size  = new_size;
+  this->val           = const_cast<Number *>(ptr);
+}
+
+
+
+template<typename Number>
+inline
+VectorView<Number>::~VectorView()
+{
+  // avoid that the base class releases
+  // memory it doesn't own
+  this->vec_size = 0;
+  this->max_vec_size = 0;
+  this->val = 0;
+}
+
+
+template<typename Number>
+inline
+void VectorView<Number>::reinit(const size_type N,
+                                const bool omit_zeroing_entries)
+{
+  this->vec_size = N;
+  this->max_vec_size = N;
+  if (omit_zeroing_entries == false)
+    Vector<Number>::operator=(static_cast<Number>(0));
+}
+
+
+template<typename Number>
+inline
+void VectorView<Number>::reinit(const size_type new_size, Number *ptr)
+{
+  this->vec_size      = new_size;
+  this->max_vec_size  = new_size;
+  this->val           = ptr;
+}
+
+
+template<typename Number>
+inline
+void VectorView<Number>::reinit(const size_type new_size, const Number *ptr)
+{
+  this->vec_size      = new_size;
+  this->max_vec_size  = new_size;
+  this->val           = const_cast<Number *>(ptr);
+}
+
+
+template<typename Number>
+inline
+void VectorView<Number>::swap(Vector<Number> &)
+{
+  AssertThrow(false, ExcMessage("Cant' swap a VectorView with a Vector!"));
+}
+
+#endif
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/dof_info.h b/include/deal.II/matrix_free/dof_info.h
new file mode 100644
index 0000000..49d2599
--- /dev/null
+++ b/include/deal.II/matrix_free/dof_info.h
@@ -0,0 +1,558 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_dof_info_h
+#define dealii__matrix_free_dof_info_h
+
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/vectorization.h>
+#include <deal.II/base/partitioner.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/matrix_free/helper_functions.h>
+
+#include <deal.II/base/std_cxx11/array.h>
+
+#include <memory>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+    /**
+     * The class that stores the indices of the degrees of freedom for all the
+     * cells. Essentially, this is a smart number cache in the style of a
+     * DoFHandler that also embeds the description of constraints directly on
+     * the cell level without the need to refer to the external
+     * ConstraintMatrix.
+     *
+     * This class only stores index relations. The weights for hanging node
+     * constraints are stored in a different field. This is because a
+     * different field allows for the same compressed weight data on different
+     * DoFHandlers for vector-valued problems. There, the indices might be
+     * constrained differently on different components (e.g. Dirichlet
+     * conditions only on selected components), whereas the weights from
+     * hanging nodes are the same and need to be stored only once. The
+     * combination will be handled in the MatrixFree class.
+     *
+     * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+     */
+    struct DoFInfo
+    {
+      /**
+       * Default empty constructor.
+       */
+      DoFInfo ();
+
+      /**
+       * Copy constructor.
+       */
+      DoFInfo (const DoFInfo &dof_info);
+
+      /**
+       * Clears all data fields in this class.
+       */
+      void clear ();
+
+
+      /**
+       * Returns a pointer to the first index in the DoF row @p row.
+       */
+      const unsigned int *begin_indices (const unsigned int row) const;
+
+      /**
+       * Returns a pointer to the one past the last DoF index in the row @p
+       * row.
+       */
+      const unsigned int *end_indices (const unsigned int row) const;
+
+      /**
+       * Returns the number of entries in the indices field for the given row.
+       */
+      unsigned int row_length_indices (const unsigned int row) const;
+
+      /**
+       * Returns a pointer to the first constraint indicator in the row @p
+       * row.
+       */
+      const std::pair<unsigned short,unsigned short> *
+      begin_indicators (const unsigned int row) const;
+
+      /**
+       * Returns a pointer to the one past the last constraint indicator in
+       * the row @p row.
+       */
+      const std::pair<unsigned short,unsigned short> *
+      end_indicators (const unsigned int row) const;
+
+      /**
+       * Returns the number of entries in the constraint indicator field for
+       * the given row.
+       */
+      unsigned int row_length_indicators (const unsigned int row) const;
+
+      /**
+       * Returns a pointer to the first index in the DoF row @p row for plain
+       * indices (i.e., the entries where constraints are not embedded).
+       */
+      const unsigned int *begin_indices_plain (const unsigned int row) const;
+
+      /**
+       * Returns a pointer to the one past the last DoF index in the row @p
+       * row (i.e., the entries where constraints are not embedded).
+       */
+      const unsigned int *end_indices_plain (const unsigned int row) const;
+
+      /**
+       * Returns the FE index for a given finite element degree. If not in hp
+       * mode, this function always returns index 0. If an index is not found
+       * in hp mode, it returns max_fe_degree, i.e., one index past the last
+       * valid one.
+       */
+      unsigned int fe_index_from_degree (const unsigned int fe_degree) const;
+
+
+      /**
+       * Returns the FE index for a given finite element degree. If not in hp
+       * mode or if the index is not found, this function always returns index
+       * 0. Hence, this function does not check whether the given degree is
+       * actually present.
+       */
+      unsigned int
+      fe_index_from_dofs_per_cell (const unsigned int dofs_per_cell) const;
+
+      /**
+       * This internal method takes the local indices on a cell and fills them
+       * into this class. It resolves the constraints and distributes the
+       * results. Ghost indices, i.e., indices that are located on another
+       * processor, get a temporary number by this function, and will later be
+       * assigned the correct index after all the ghost indices have been
+       * collected by the call to @p assign_ghosts.
+       */
+      void read_dof_indices (const std::vector<types::global_dof_index> &local_indices,
+                             const std::vector<unsigned int> &lexicographic_inv,
+                             const ConstraintMatrix          &constraints,
+                             const unsigned int               cell_number,
+                             ConstraintValues<double> &constraint_values,
+                             bool                            &cell_at_boundary);
+
+      /**
+       * This method assigns the correct indices to ghost indices from the
+       * temporary numbering employed by the @p read_dof_indices function. The
+       * numbers are localized with respect to the MPI process, and ghosts
+       * start at the end of the locally owned range. This way, we get direct
+       * access to all vector entries.
+       */
+      void assign_ghosts(const std::vector<unsigned int> &boundary_cells);
+
+      /**
+       * Reorganizes cells for serial (non-thread-parallelized) such that
+       * boundary cells are places in the middle. This way, computations and
+       * communication can be overlapped. Should only be called by one DoFInfo
+       * object when used on a system of several DoFHandlers.
+       */
+      void compute_renumber_serial (const std::vector<unsigned int> &boundary_cells,
+                                    const SizeInfo                  &size_info,
+                                    std::vector<unsigned int>       &renumbering);
+
+      /**
+       * Reorganizes cells in the hp case without parallelism such that all
+       * cells with the same FE index are placed consecutively. Should only be
+       * called by one DoFInfo object when used on a system of several
+       * DoFHandlers.
+       */
+      void compute_renumber_hp_serial (SizeInfo                  &size_info,
+                                       std::vector<unsigned int> &renumbering,
+                                       std::vector<unsigned int> &irregular_cells);
+
+      /**
+       * Computes the initial renumbering of cells such that all cells with
+       * ghosts are put first. This is the first step before building the
+       * thread graph and used to overlap computations and communication.
+       */
+      void compute_renumber_parallel (const std::vector<unsigned int> &boundary_cells,
+                                      SizeInfo                        &size_info,
+                                      std::vector<unsigned int>       &renumbering);
+
+      /**
+       * This method reorders the way cells are gone through based on a given
+       * renumbering of the cells. It also takes @p vectorization_length cells
+       * together and interprets them as one cell only, as is needed for
+       * vectorization.
+       */
+      void reorder_cells (const SizeInfo                  &size_info,
+                          const std::vector<unsigned int> &renumbering,
+                          const std::vector<unsigned int> &constraint_pool_row_index,
+                          const std::vector<unsigned int> &irregular_cells,
+                          const unsigned int               vectorization_length);
+
+      /**
+       * This helper function determines a block size if the user decided not
+       * to force a block size through MatrixFree::AdditionalData. This is
+       * computed based on the number of hardware threads on the system and
+       * the number of macro cells that we should work on.
+       */
+      void guess_block_size (const SizeInfo &size_info,
+                             TaskInfo       &task_info);
+
+      /**
+       * This method goes through all cells that have been filled into @p
+       * dof_indices and finds out which cells can be worked on independently
+       * and which ones are neighboring and need to be done at different times
+       * when used in parallel.
+       *
+       * The strategy is based on a two-level approach. The outer level is
+       * subdivided into partitions similar to the type of neighbors in
+       * Cuthill-McKee, and the inner level is subdivided via colors (for
+       * chunks within the same color, can work independently). One task is
+       * represented by a chunk of cells. The cell chunks are formed before
+       * subdivision into partitions and colors.
+       */
+      void
+      make_thread_graph_partition_color (SizeInfo                  &size_info,
+                                         TaskInfo                  &task_info,
+                                         std::vector<unsigned int> &renumbering,
+                                         std::vector<unsigned int> &irregular_cells,
+                                         const bool                 hp_bool);
+
+      /**
+       * This function goes through all cells that have been filled into @p
+       * dof_indices and finds out which cells can be worked on independently
+       * and which ones are neighboring and need to be done at different times
+       * when used in parallel.
+       *
+       * The strategy is based on a two-level approach. The outer level is
+       * subdivided into partitions similar to the type of neighbors in
+       * Cuthill-McKee, and the inner level is again subdivided into Cuthill-
+       * McKee-like partitions (partitions whose level differs by more than 2
+       * can be worked on independently). One task is represented by a chunk
+       * of cells. The cell chunks are formed after subdivision into the two
+       * levels of partitions.
+       */
+      void
+      make_thread_graph_partition_partition (SizeInfo                  &size_info,
+                                             TaskInfo                  &task_info,
+                                             std::vector<unsigned int> &renumbering,
+                                             std::vector<unsigned int> &irregular_cells,
+                                             const bool                 hp_bool);
+
+      /**
+       * This function computes the connectivity of the currently stored
+       * indices and fills the structure into a sparsity pattern. The
+       * parameter block_size can be used to specify whether several cells
+       * should be treated as one.
+       */
+      void
+      make_connectivity_graph (const SizeInfo                  &size_info,
+                               const TaskInfo                  &task_info,
+                               const std::vector<unsigned int> &renumbering,
+                               const std::vector<unsigned int> &irregular_cells,
+                               const bool                       do_blocking,
+                               DynamicSparsityPattern &connectivity) const;
+
+      /**
+       * Renumbers the degrees of freedom to give good access for this class.
+       */
+      void renumber_dofs (std::vector<types::global_dof_index> &renumbering);
+
+      /**
+       * Returns the memory consumption in bytes of this class.
+       */
+      std::size_t memory_consumption() const;
+
+      /**
+       * Prints a detailed summary of memory consumption in the different
+       * structures of this class to the given output stream.
+       */
+      template <typename StreamType>
+      void print_memory_consumption(StreamType     &out,
+                                    const SizeInfo &size_info) const;
+
+      /**
+       * Prints a representation of the indices in the class to the given
+       * output stream.
+       */
+      template <typename Number>
+      void print (const std::vector<Number>       &constraint_pool_data,
+                  const std::vector<unsigned int> &constraint_pool_row_index,
+                  std::ostream                    &out) const;
+
+      /**
+       * Stores the rowstart indices of the compressed row storage in the @p
+       * dof_indices and @p constraint_indicator fields. These two fields are
+       * always accessed together, so it is simpler to keep just one variable
+       * for them. This also obviates keeping two rowstart vectors in synch.
+       *
+       * In addition, the third field stores whether a particular cell has a
+       * certain structure in the indices, like indices for vector-valued
+       * problems or for cells where not all vector components are filled.
+       */
+      std::vector<std_cxx11::array<unsigned int, 3> > row_starts;
+
+      /**
+       * Stores the indices of the degrees of freedom for each cell. These
+       * indices are computed in MPI-local index space, i.e., each processor
+       * stores the locally owned indices as numbers between <tt>0</tt> and
+       * <tt>n_locally_owned_dofs-1</tt> and ghost indices in the range
+       * <tt>n_locally_owned_dofs</tt> to
+       * <tt>n_locally_owned_dofs+n_ghost_dofs</tt>. The translation between
+       * this MPI-local index space and the global numbering of degrees of
+       * freedom is stored in the @p vector_partitioner data structure.  This
+       * array also includes the indirect contributions from constraints,
+       * which are described by the @p constraint_indicator field. Because of
+       * variable lengths of rows, this would be a vector of a vector.
+       * However, we use one contiguous memory region and store the rowstart
+       * in the variable @p row_starts.
+       */
+      std::vector<unsigned int> dof_indices;
+
+      /**
+       * This variable describes the position of constraints in terms of the
+       * local numbering of degrees of freedom on a cell. The first number
+       * stores the distance from one constrained degree of freedom to the
+       * next. This allows to identify the position of constrained DoFs as we
+       * loop through the local degrees of freedom of the cell when reading
+       * from or writing to a vector. The second number stores the index of
+       * the constraint weights, stored in the variable constraint_pool_data.
+       */
+      std::vector<std::pair<unsigned short,unsigned short> > constraint_indicator;
+
+      /**
+       * This stores the parallel partitioning that can be used to set up
+       * vectors. The partitioner includes the description of the local range
+       * in the vector, and also includes how the ghosts look like. This
+       * enables initialization of vectors based on the DoFInfo field.
+       */
+      std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> vector_partitioner;
+
+      /**
+       * This stores a (sorted) list of all locally owned degrees of freedom
+       * that are constrained.
+       */
+      std::vector<unsigned int> constrained_dofs;
+
+      /**
+       * Stores the rowstart indices of the compressed row storage in the @p
+       * plain_dof_indices fields.
+       */
+      std::vector<unsigned int> row_starts_plain_indices;
+
+      /**
+       * Stores the indices of the degrees of freedom for each cell. This
+       * array does not include the indirect contributions from constraints,
+       * which are included in @p dof_indices. Because of variable lengths of
+       * rows, this would be a vector of a vector. However, we use one
+       * contiguous memory region and store the rowstart in the variable @p
+       * row_starts_plain_indices.
+       */
+      std::vector<unsigned int> plain_dof_indices;
+
+      /**
+       * Stores the dimension of the underlying DoFHandler. Since the indices
+       * are not templated, this is the variable that makes the dimension
+       * accessible in the (rare) cases it is needed inside this class.
+       */
+      unsigned int dimension;
+
+      /**
+       * Stores the number of components in the DoFHandler where the indices
+       * have been read from.
+       */
+      unsigned int n_components;
+
+      /**
+       * Stores the number of degrees of freedom per cell.
+       */
+      std::vector<unsigned int> dofs_per_cell;
+
+      /**
+       * Stores the number of degrees of freedom per face.
+       */
+      std::vector<unsigned int> dofs_per_face;
+
+      /**
+       * Informs on whether plain indices are cached.
+       */
+      bool store_plain_indices;
+
+      /**
+       * Stores the index of the active finite element in the hp case.
+       */
+      std::vector<unsigned int> cell_active_fe_index;
+
+      /**
+       * Stores the maximum degree of different finite elements for the hp
+       * case.
+       */
+      unsigned int max_fe_index;
+
+      /**
+       * This variable stores the dofs per cell and the finite element degree
+       * associated for all fe indices in the underlying element for easier
+       * access to data in the hp case.
+       */
+      std::vector<std::pair<unsigned int,unsigned int> > fe_index_conversion;
+
+      /**
+       * Temporarily stores the numbers of ghosts during setup. Cleared when
+       * calling @p assign_ghosts. Then, all information is collected by the
+       * partitioner.
+       */
+      std::vector<types::global_dof_index> ghost_dofs;
+    };
+
+
+    /*----------------------- Inline functions ----------------------------------*/
+
+#ifndef DOXYGEN
+
+    inline
+    const unsigned int *
+    DoFInfo::begin_indices (const unsigned int row) const
+    {
+      AssertIndexRange (row, row_starts.size()-1);
+      const unsigned int index = row_starts[row][0];
+      AssertIndexRange(index, dof_indices.size()+1);
+      return dof_indices.empty() ? 0 : &dof_indices[0] + index;
+    }
+
+
+
+    inline
+    const unsigned int *
+    DoFInfo::end_indices (const unsigned int row) const
+    {
+      AssertIndexRange (row, row_starts.size()-1);
+      const unsigned int index = row_starts[row+1][0];
+      AssertIndexRange(index, dof_indices.size()+1);
+      return dof_indices.empty() ? 0 : &dof_indices[0] + index;
+    }
+
+
+
+    inline
+    unsigned int
+    DoFInfo::row_length_indices (const unsigned int row) const
+    {
+      AssertIndexRange (row, row_starts.size()-1);
+      return (row_starts[row+1][0] - row_starts[row][0]);
+    }
+
+
+
+    inline
+    const std::pair<unsigned short,unsigned short> *
+    DoFInfo::begin_indicators (const unsigned int row) const
+    {
+      AssertIndexRange (row, row_starts.size()-1);
+      const unsigned int index = row_starts[row][1];
+      AssertIndexRange (index, constraint_indicator.size()+1);
+      return constraint_indicator.empty() ? 0 : &constraint_indicator[0] + index;
+    }
+
+
+
+    inline
+    const std::pair<unsigned short,unsigned short> *
+    DoFInfo::end_indicators (const unsigned int row) const
+    {
+      AssertIndexRange (row, row_starts.size()-1);
+      const unsigned int index = row_starts[row+1][1];
+      AssertIndexRange (index, constraint_indicator.size()+1);
+      return constraint_indicator.empty() ? 0 : &constraint_indicator[0] + index;
+    }
+
+
+
+    inline
+    unsigned int
+    DoFInfo::row_length_indicators (const unsigned int row) const
+    {
+      AssertIndexRange (row, row_starts.size()-1);
+      return (row_starts[row+1][1] - row_starts[row][1]);
+    }
+
+
+
+    inline
+    const unsigned int *
+    DoFInfo::begin_indices_plain (const unsigned int row) const
+    {
+      // if we have no constraints, should take the data from dof_indices
+      if (row_length_indicators(row) == 0)
+        {
+          Assert (row_starts_plain_indices[row]==numbers::invalid_unsigned_int,
+                  ExcInternalError());
+          return begin_indices(row);
+        }
+      else
+        {
+          AssertDimension (row_starts.size(), row_starts_plain_indices.size());
+          const unsigned int index = row_starts_plain_indices[row];
+          AssertIndexRange(index, plain_dof_indices.size()+1);
+          return plain_dof_indices.empty() ? 0 : &plain_dof_indices[0] + index;
+        }
+    }
+
+
+
+    inline
+    const unsigned int *
+    DoFInfo::end_indices_plain (const unsigned int row) const
+    {
+      return begin_indices_plain(row) +
+             dofs_per_cell[(cell_active_fe_index.size()==0)?
+                           0:cell_active_fe_index[row]];
+    }
+
+
+
+    inline
+    unsigned int
+    DoFInfo::fe_index_from_degree (const unsigned int fe_degree) const
+    {
+      const unsigned int n_indices = fe_index_conversion.size();
+      for (unsigned int i=0; i<n_indices; ++i)
+        if (fe_index_conversion[i].first == fe_degree)
+          return i;
+      return n_indices;
+    }
+
+
+
+    inline
+    unsigned int
+    DoFInfo::fe_index_from_dofs_per_cell (const unsigned int dofs_per_cell) const
+    {
+      for (unsigned int i=0; i<fe_index_conversion.size(); ++i)
+        if (fe_index_conversion[i].second == dofs_per_cell)
+          return i;
+      return 0;
+    }
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+#endif  // ifndef DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/dof_info.templates.h b/include/deal.II/matrix_free/dof_info.templates.h
new file mode 100644
index 0000000..33789b3
--- /dev/null
+++ b/include/deal.II/matrix_free/dof_info.templates.h
@@ -0,0 +1,2009 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/matrix_free/dof_info.h>
+#include <deal.II/matrix_free/helper_functions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+
+    struct ConstraintComparator
+    {
+      bool operator()(const std::pair<types::global_dof_index,double> &p1,
+                      const std::pair<types::global_dof_index,double> &p2) const
+      {
+        return p1.second < p2.second;
+      }
+    };
+
+    /**
+     * A struct that takes entries describing a constraint and puts them into
+     * a sorted list where duplicates are filtered out
+     */
+    template <typename Number>
+    struct ConstraintValues
+    {
+      ConstraintValues();
+
+      /**
+       * This function inserts some constrained entries to the collection of
+       * all values. It stores the (reordered) numbering of the dofs
+       * (according to the ordering that matches with the function) in
+       * new_indices, and returns the storage position the double array for
+       * access later on.
+       */
+      unsigned short
+      insert_entries (const std::vector<std::pair<types::global_dof_index,double> > &entries);
+
+      std::vector<std::pair<types::global_dof_index, double> > constraint_entries;
+      std::vector<types::global_dof_index> constraint_indices;
+
+      std::pair<std::vector<Number>, types::global_dof_index> next_constraint;
+      std::map<std::vector<Number>, types::global_dof_index, FPArrayComparator<double> > constraints;
+    };
+
+
+    template <typename Number>
+    ConstraintValues<Number>::ConstraintValues ()
+      :
+      constraints(FPArrayComparator<double>(1.))
+    {}
+
+    template <typename Number>
+    unsigned short
+    ConstraintValues<Number>::
+    insert_entries (const std::vector<std::pair<types::global_dof_index,double> > &entries)
+    {
+      next_constraint.first.resize(entries.size());
+      if (entries.size() > 0)
+        {
+          constraint_indices.resize(entries.size());
+          constraint_entries = entries;
+          std::sort(constraint_entries.begin(), constraint_entries.end(),
+                    ConstraintComparator());
+          for (types::global_dof_index j=0; j<constraint_entries.size(); j++)
+            {
+              // copy the indices of the constraint entries after sorting.
+              constraint_indices[j] = constraint_entries[j].first;
+
+              // one_constraint takes the weights of the constraint
+              next_constraint.first[j] = constraint_entries[j].second;
+            }
+        }
+      next_constraint.second = constraints.size();
+
+      // check whether or not constraint is already in pool. the initial
+      // implementation computed a hash value based on the truncated array (to
+      // given accuracy around 1e-13) in order to easily detect different
+      // arrays and then made a fine-grained check when the hash values were
+      // equal. this was quite lengthy and now we use a std::map with a
+      // user-defined comparator to compare floating point arrays to a
+      // tolerance 1e-13.
+      std::pair<typename std::map<std::vector<double>, types::global_dof_index,
+          FPArrayComparator<double> >::iterator,
+          bool> it = constraints.insert(next_constraint);
+
+      types::global_dof_index insert_position = numbers::invalid_dof_index;
+      if (it.second == false)
+        insert_position = it.first->second;
+      else
+        insert_position = next_constraint.second;
+
+      // we want to store the result as a short variable, so we have to make
+      // sure that the result does not exceed the limits when casting.
+      Assert(insert_position < (1<<(8*sizeof(unsigned short))),
+             ExcInternalError());
+      return static_cast<unsigned short>(insert_position);
+    }
+
+
+
+    // ----------------- actual DoFInfo functions -----------------------------
+
+    DoFInfo::DoFInfo ()
+    {
+      clear();
+    }
+
+
+    DoFInfo::DoFInfo (const DoFInfo &dof_info_in)
+      :
+      row_starts (dof_info_in.row_starts),
+      dof_indices (dof_info_in.dof_indices),
+      constraint_indicator (dof_info_in.constraint_indicator),
+      vector_partitioner (dof_info_in.vector_partitioner),
+      constrained_dofs (dof_info_in.constrained_dofs),
+      row_starts_plain_indices (dof_info_in.row_starts_plain_indices),
+      plain_dof_indices (dof_info_in.plain_dof_indices),
+      dimension (dof_info_in.dimension),
+      n_components (dof_info_in.n_components),
+      dofs_per_cell (dof_info_in.dofs_per_cell),
+      dofs_per_face (dof_info_in.dofs_per_face),
+      store_plain_indices (dof_info_in.store_plain_indices),
+      cell_active_fe_index (dof_info_in.cell_active_fe_index),
+      max_fe_index (dof_info_in.max_fe_index),
+      fe_index_conversion (dof_info_in.fe_index_conversion),
+      ghost_dofs (dof_info_in.ghost_dofs)
+    {}
+
+
+
+    void
+    DoFInfo::clear ()
+    {
+      row_starts.clear();
+      dof_indices.clear();
+      constraint_indicator.clear();
+      vector_partitioner.reset();
+      ghost_dofs.clear();
+      dofs_per_cell.clear();
+      dofs_per_face.clear();
+      dimension = 2;
+      n_components = 0;
+      row_starts_plain_indices.clear();
+      plain_dof_indices.clear();
+      store_plain_indices = false;
+      cell_active_fe_index.clear();
+      max_fe_index = 0;
+      fe_index_conversion.clear();
+    }
+
+
+
+    void
+    DoFInfo::read_dof_indices (const std::vector<types::global_dof_index> &local_indices,
+                               const std::vector<unsigned int> &lexicographic_inv,
+                               const ConstraintMatrix          &constraints,
+                               const unsigned int               cell_number,
+                               ConstraintValues<double> &constraint_values,
+                               bool                            &cell_at_boundary)
+    {
+      Assert (vector_partitioner.get() !=0, ExcInternalError());
+      const unsigned int n_mpi_procs = vector_partitioner->n_mpi_processes();
+      const types::global_dof_index first_owned = vector_partitioner->local_range().first;
+      const types::global_dof_index last_owned  = vector_partitioner->local_range().second;
+      Assert (last_owned-first_owned < std::numeric_limits<unsigned int>::max(),
+              ExcMessage("The size local range of owned indices must not "
+                         "exceed the size of unsigned int"));
+      const unsigned int n_owned     = last_owned - first_owned;
+      std::pair<unsigned short,unsigned short> constraint_iterator (0,0);
+
+      unsigned int dofs_this_cell = (cell_active_fe_index.empty()) ?
+                                    dofs_per_cell[0] : dofs_per_cell[cell_active_fe_index[cell_number]];
+      for (unsigned int i=0; i<dofs_this_cell; i++)
+        {
+          types::global_dof_index current_dof =
+            local_indices[lexicographic_inv[i]];
+          const std::vector<std::pair<types::global_dof_index,double> >
+          *entries_ptr =
+            constraints.get_constraint_entries(current_dof);
+
+          // dof is constrained
+          if (entries_ptr != 0)
+            {
+              // in case we want to access plain indices, we need to know
+              // about the location of constrained indices as well (all the
+              // other indices are collected by the cases below)
+              if (current_dof < first_owned || current_dof >= last_owned)
+                {
+                  ghost_dofs.push_back (current_dof);
+                  cell_at_boundary = true;
+                }
+
+              // check whether this dof is identity constrained to another
+              // dof. then we can simply insert that dof and there is no need
+              // to actually resolve the constraint entries
+              const std::vector<std::pair<types::global_dof_index,double> >
+              &entries = *entries_ptr;
+              const types::global_dof_index n_entries = entries.size();
+              if (n_entries == 1 && std::fabs(entries[0].second-1.)<1e-14)
+                {
+                  current_dof = entries[0].first;
+                  goto no_constraint;
+                }
+
+              // append a new index to the indicators
+              constraint_indicator.push_back (constraint_iterator);
+              constraint_indicator.back().second =
+                constraint_values.insert_entries (entries);
+
+              // reset constraint iterator for next round
+              constraint_iterator.first = 0;
+
+              // add the local_to_global indices computed in the
+              // insert_entries function. transform the index to local index
+              // space or mark it as ghost if necessary
+              if (n_entries > 0)
+                {
+                  const std::vector<types::global_dof_index> &constraint_indices =
+                    constraint_values.constraint_indices;
+                  for (unsigned int j=0; j<n_entries; ++j)
+                    {
+                      if (n_mpi_procs > 1 &&
+                          (constraint_indices[j] < first_owned ||
+                           constraint_indices[j] >= last_owned))
+                        {
+                          dof_indices.push_back (n_owned + ghost_dofs.size());
+
+                          // collect ghosts so that we can later construct an
+                          // IndexSet for them. also store whether the current
+                          // cell is on the boundary
+                          ghost_dofs.push_back(constraint_indices[j]);
+                          cell_at_boundary = true;
+                        }
+                      else
+                        // not ghost, so transform to the local index space
+                        // directly
+                        dof_indices.push_back
+                        (static_cast<unsigned int>(constraint_indices[j] -
+                                                   first_owned));
+                    }
+                }
+            }
+          else
+            {
+no_constraint:
+              // Not constrained, we simply have to add the local index to the
+              // indices_local_to_global list and increment constraint
+              // iterator. transform to local index space/mark as ghost
+              if (n_mpi_procs > 1 &&
+                  (current_dof < first_owned ||
+                   current_dof >= last_owned))
+                {
+                  ghost_dofs.push_back(current_dof);
+                  current_dof = n_owned + ghost_dofs.size()-1;
+                  cell_at_boundary = true;
+                }
+              else
+                current_dof -= first_owned;
+
+              dof_indices.push_back (static_cast<unsigned int>(current_dof));
+
+              // make sure constraint_iterator.first is always within the
+              // bounds of unsigned short
+              Assert (constraint_iterator.first <
+                      (1<<(8*sizeof(unsigned short)))-1,
+                      ExcInternalError());
+              constraint_iterator.first++;
+            }
+        }
+      row_starts[cell_number+1][0] = dof_indices.size();
+      row_starts[cell_number+1][1] = constraint_indicator.size();
+      row_starts[cell_number+1][2] = 0;
+
+      // now to the plain indices: in case we have constraints on this cell,
+      // store the indices without the constraints resolve once again
+      if (store_plain_indices == true)
+        {
+          if (cell_number == 0)
+            row_starts_plain_indices.resize (row_starts.size());
+          row_starts_plain_indices[cell_number] = plain_dof_indices.size();
+          bool cell_has_constraints = (row_starts[cell_number+1][1] >
+                                       row_starts[cell_number][1]);
+          if (cell_has_constraints == true)
+            {
+              for (unsigned int i=0; i<dofs_this_cell; ++i)
+                {
+                  types::global_dof_index current_dof =
+                    local_indices[lexicographic_inv[i]];
+                  if (n_mpi_procs > 1 &&
+                      (current_dof < first_owned ||
+                       current_dof >= last_owned))
+                    {
+                      ghost_dofs.push_back(current_dof);
+                      current_dof = n_owned + ghost_dofs.size()-1;
+                      cell_at_boundary = true;
+                    }
+                  else
+                    current_dof -= first_owned;
+                  plain_dof_indices.push_back (static_cast<unsigned int>
+                                               (current_dof));
+                }
+            }
+        }
+    }
+
+
+
+    void
+    DoFInfo::assign_ghosts (const std::vector<unsigned int> &boundary_cells)
+    {
+      Assert (boundary_cells.size() < row_starts.size(), ExcInternalError());
+
+      // sort ghost dofs and compress out duplicates
+      const unsigned int n_owned  = (vector_partitioner->local_range().second-
+                                     vector_partitioner->local_range().first);
+      const std::size_t n_ghosts = ghost_dofs.size();
+      unsigned int      n_unique_ghosts= 0;
+#ifdef DEBUG
+      for (std::vector<unsigned int>::iterator dof = dof_indices.begin();
+           dof!=dof_indices.end(); ++dof)
+        AssertIndexRange (*dof, n_owned+n_ghosts);
+#endif
+
+      std::vector<unsigned int> ghost_numbering (n_ghosts);
+      IndexSet ghost_indices (vector_partitioner->size());
+      if (n_ghosts > 0)
+        {
+          // since we need to go back to the local_to_global indices and
+          // replace the temporary numbering of ghosts by the real number in
+          // the index set, we need to store these values
+          std::vector<std::pair<types::global_dof_index,unsigned int> > ghost_origin(n_ghosts);
+          for (std::size_t i=0; i<n_ghosts; ++i)
+            {
+              ghost_origin[i].first = ghost_dofs[i];
+              ghost_origin[i].second = i;
+            }
+          std::sort (ghost_origin.begin(), ghost_origin.end());
+
+          types::global_dof_index last_contiguous_start = ghost_origin[0].first;
+          ghost_numbering[ghost_origin[0].second] = 0;
+          for (std::size_t i=1; i<n_ghosts; i++)
+            {
+              if (ghost_origin[i].first > ghost_origin[i-1].first+1)
+                {
+                  ghost_indices.add_range (last_contiguous_start,
+                                           ghost_origin[i-1].first+1);
+                  last_contiguous_start = ghost_origin[i].first;
+                }
+              if (ghost_origin[i].first>ghost_origin[i-1].first)
+                ++n_unique_ghosts;
+              ghost_numbering[ghost_origin[i].second] = n_unique_ghosts;
+            }
+          ++n_unique_ghosts;
+          ghost_indices.add_range (last_contiguous_start,
+                                   ghost_origin.back().first+1);
+          ghost_indices.compress();
+
+          // make sure that we got the correct local numbering of the ghost
+          // dofs. the ghost index set should store the same number
+          {
+            AssertDimension (n_unique_ghosts, ghost_indices.n_elements());
+            for (std::size_t i=0; i<n_ghosts; ++i)
+              Assert (ghost_numbering[i] ==
+                      ghost_indices.index_within_set(ghost_dofs[i]),
+                      ExcInternalError());
+          }
+
+          // apply correct numbering for ghost indices: We previously just
+          // enumerated them according to their appearance in the
+          // local_to_global structure. Above, we derived a relation between
+          // this enumeration and the actual number
+          const unsigned int n_boundary_cells = boundary_cells.size();
+          for (unsigned int i=0; i<n_boundary_cells; ++i)
+            {
+              unsigned int *data_ptr = const_cast<unsigned int *> (begin_indices(boundary_cells[i]));
+
+              const unsigned int *row_end = end_indices(boundary_cells[i]);
+              for ( ; data_ptr != row_end; ++data_ptr)
+                *data_ptr = ((*data_ptr < n_owned)
+                             ?
+                             *data_ptr
+                             :
+                             n_owned +
+                             ghost_numbering[*data_ptr - n_owned]);
+
+              // now the same procedure for plain indices
+              if (store_plain_indices == true)
+                {
+                  if (row_length_indicators(boundary_cells[i]) > 0)
+                    {
+                      unsigned int *data_ptr = const_cast<unsigned int *> (begin_indices_plain(boundary_cells[i]));
+                      const unsigned int *row_end = end_indices_plain(boundary_cells[i]);
+                      for ( ; data_ptr != row_end; ++data_ptr)
+                        *data_ptr = ((*data_ptr < n_owned)
+                                     ?
+                                     *data_ptr
+                                     :
+                                     n_owned +
+                                     ghost_numbering[*data_ptr - n_owned]);
+                    }
+                }
+            }
+        }
+
+      std::vector<types::global_dof_index> empty;
+      ghost_dofs.swap(empty);
+
+      // set the ghost indices now. need to cast away constness here, but that
+      // is uncritical since we reset the Partitioner in the same initialize
+      // call as this call here.
+      Utilities::MPI::Partitioner *vec_part =
+        const_cast<Utilities::MPI::Partitioner *>(vector_partitioner.get());
+      vec_part->set_ghost_indices (ghost_indices);
+    }
+
+
+
+    void
+    DoFInfo::compute_renumber_serial (const std::vector<unsigned int> &boundary_cells,
+                                      const SizeInfo                  &size_info,
+                                      std::vector<unsigned int>       &renumbering)
+    {
+      std::vector<unsigned int> reverse_numbering (size_info.n_active_cells,
+                                                   numbers::invalid_unsigned_int);
+      const unsigned int n_boundary_cells = boundary_cells.size();
+      for (unsigned int j=0; j<n_boundary_cells; ++j)
+        reverse_numbering[boundary_cells[j]] =
+          j + size_info.vectorization_length*size_info.boundary_cells_start;
+      unsigned int counter = 0;
+      unsigned int j = 0;
+      while (counter < size_info.n_active_cells &&
+             counter < size_info.vectorization_length * size_info.boundary_cells_start)
+        {
+          if (reverse_numbering[j] == numbers::invalid_unsigned_int)
+            reverse_numbering[j] = counter++;
+          j++;
+        }
+      counter = std::min (size_info.vectorization_length*
+                          size_info.boundary_cells_start+n_boundary_cells,
+                          size_info.n_active_cells);
+      if (counter < size_info.n_active_cells)
+        {
+          for ( ; j<size_info.n_active_cells; ++j)
+            if (reverse_numbering[j] == numbers::invalid_unsigned_int)
+              reverse_numbering[j] = counter++;
+        }
+      AssertDimension (counter, size_info.n_active_cells);
+      renumbering = Utilities::invert_permutation (reverse_numbering);
+    }
+
+
+
+    void
+    DoFInfo::compute_renumber_hp_serial (SizeInfo                  &size_info,
+                                         std::vector<unsigned int> &renumbering,
+                                         std::vector<unsigned int> &irregular_cells)
+    {
+      if (max_fe_index < 2)
+        return;
+      const unsigned int n_active_cells = size_info.n_active_cells;
+      const unsigned int vectorization_length = size_info.vectorization_length;
+      irregular_cells.resize (0);
+      irregular_cells.resize (size_info.n_macro_cells+3*max_fe_index);
+      std::vector<std::vector<unsigned int> > renumbering_fe_index;
+      renumbering_fe_index.resize(max_fe_index);
+      unsigned int counter,n_macro_cells_before = 0;
+      const unsigned int
+      start_bound = std::min (size_info.n_active_cells,
+                              size_info.boundary_cells_start*vectorization_length),
+                    end_bound   = std::min (size_info.n_active_cells,
+                                            size_info.boundary_cells_end*vectorization_length);
+      for (counter=0; counter<start_bound; counter++)
+        {
+          renumbering_fe_index[cell_active_fe_index[renumbering[counter]]].
+          push_back(renumbering[counter]);
+        }
+      counter = 0;
+      for (unsigned int j=0; j<max_fe_index; j++)
+        {
+          for (unsigned int jj=0; jj<renumbering_fe_index[j].size(); jj++)
+            renumbering[counter++] = renumbering_fe_index[j][jj];
+          irregular_cells[renumbering_fe_index[j].size()/vectorization_length+
+                          n_macro_cells_before] =
+                            renumbering_fe_index[j].size()%vectorization_length;
+          n_macro_cells_before += (renumbering_fe_index[j].size()+vectorization_length-1)/
+                                  vectorization_length;
+          renumbering_fe_index[j].resize(0);
+        }
+      unsigned int new_boundary_start = n_macro_cells_before;
+      for (counter = start_bound; counter < end_bound; counter++)
+        {
+          renumbering_fe_index[cell_active_fe_index[renumbering[counter]]].
+          push_back(renumbering[counter]);
+        }
+      counter = start_bound;
+      for (unsigned int j=0; j<max_fe_index; j++)
+        {
+          for (unsigned int jj=0; jj<renumbering_fe_index[j].size(); jj++)
+            renumbering[counter++] = renumbering_fe_index[j][jj];
+          irregular_cells[renumbering_fe_index[j].size()/vectorization_length+
+                          n_macro_cells_before] =
+                            renumbering_fe_index[j].size()%vectorization_length;
+          n_macro_cells_before += (renumbering_fe_index[j].size()+vectorization_length-1)/
+                                  vectorization_length;
+          renumbering_fe_index[j].resize(0);
+        }
+      unsigned int new_boundary_end = n_macro_cells_before;
+      for (counter=end_bound; counter<n_active_cells; counter++)
+        {
+          renumbering_fe_index[cell_active_fe_index[renumbering[counter]]].
+          push_back(renumbering[counter]);
+        }
+      counter = end_bound;
+      for (unsigned int j=0; j<max_fe_index; j++)
+        {
+          for (unsigned int jj=0; jj<renumbering_fe_index[j].size(); jj++)
+            renumbering[counter++] = renumbering_fe_index[j][jj];
+          irregular_cells[renumbering_fe_index[j].size()/vectorization_length+
+                          n_macro_cells_before] =
+                            renumbering_fe_index[j].size()%vectorization_length;
+          n_macro_cells_before += (renumbering_fe_index[j].size()+vectorization_length-1)/
+                                  vectorization_length;
+        }
+      AssertIndexRange (n_macro_cells_before,
+                        size_info.n_macro_cells + 3*max_fe_index+1);
+      irregular_cells.resize (n_macro_cells_before);
+      size_info.n_macro_cells = n_macro_cells_before;
+      size_info.boundary_cells_start = new_boundary_start;
+      size_info.boundary_cells_end = new_boundary_end;
+    }
+
+
+
+    void
+    DoFInfo::compute_renumber_parallel (const std::vector<unsigned int> &boundary_cells,
+                                        SizeInfo                        &size_info,
+                                        std::vector<unsigned int>       &renumbering)
+    {
+      std::vector<unsigned int> reverse_numbering (size_info.n_active_cells,
+                                                   numbers::invalid_unsigned_int);
+      const unsigned int n_boundary_cells = boundary_cells.size();
+      for (unsigned int j=0; j<n_boundary_cells; ++j)
+        reverse_numbering[boundary_cells[j]] = j;
+      unsigned int counter = n_boundary_cells;
+      for (unsigned int j=0; j<size_info.n_active_cells; ++j)
+        if (reverse_numbering[j] == numbers::invalid_unsigned_int)
+          reverse_numbering[j] = counter++;
+
+      size_info.boundary_cells_end   = (size_info.boundary_cells_end -
+                                        size_info.boundary_cells_start);
+      size_info.boundary_cells_start = 0;
+
+      AssertDimension (counter, size_info.n_active_cells);
+      renumbering = Utilities::invert_permutation (reverse_numbering);
+    }
+
+
+
+    void
+    DoFInfo::reorder_cells (const SizeInfo                  &size_info,
+                            const std::vector<unsigned int> &renumbering,
+                            const std::vector<unsigned int> &constraint_pool_row_index,
+                            const std::vector<unsigned int> &irregular_cells,
+                            const unsigned int               vectorization_length)
+    {
+      // first reorder the active fe index.
+      if (cell_active_fe_index.size() > 0)
+        {
+          std::vector<unsigned int> new_active_fe_index;
+          new_active_fe_index.reserve (size_info.n_macro_cells);
+          std::vector<unsigned int> fe_indices(vectorization_length);
+          unsigned int position_cell = 0;
+          for (unsigned int cell=0; cell<size_info.n_macro_cells; ++cell)
+            {
+              const unsigned int n_comp = (irregular_cells[cell] > 0 ?
+                                           irregular_cells[cell] : vectorization_length);
+              for (unsigned int j=0; j<n_comp; ++j)
+                fe_indices[j]=cell_active_fe_index[renumbering[position_cell+j]];
+
+              // by construction, all cells should have the same fe index.
+              for (unsigned int j=1; j<n_comp; ++j)
+                Assert (fe_indices[j] == fe_indices[0], ExcInternalError());
+
+              new_active_fe_index.push_back(fe_indices[0]);
+              position_cell += n_comp;
+            }
+          std::swap (new_active_fe_index, cell_active_fe_index);
+        }
+
+      std::vector<std_cxx11::array<unsigned int, 3> > new_row_starts;
+      std::vector<unsigned int> new_dof_indices;
+      std::vector<std::pair<unsigned short,unsigned short> >
+      new_constraint_indicator;
+      std::vector<unsigned int> new_plain_indices, new_rowstart_plain;
+      unsigned int position_cell = 0;
+      new_row_starts.resize (size_info.n_macro_cells + 1);
+      new_dof_indices.reserve (dof_indices.size());
+      new_constraint_indicator.reserve (constraint_indicator.size());
+      if (store_plain_indices == true)
+        {
+          new_rowstart_plain.resize (size_info.n_macro_cells + 1,
+                                     numbers::invalid_unsigned_int);
+          new_plain_indices.reserve (plain_dof_indices.size());
+        }
+
+      // copy the indices and the constraint indicators to the new data field:
+      // Store the indices in a way so that adjacent data fields in local
+      // vectors are adjacent, i.e., first dof index 0 for all vectors, then
+      // dof index 1 for all vectors, and so on. This involves some extra
+      // resorting.
+      std::vector<const unsigned int *> glob_indices (vectorization_length);
+      std::vector<const unsigned int *> plain_glob_indices (vectorization_length);
+      std::vector<const std::pair<unsigned short,unsigned short>*>
+      constr_ind(vectorization_length), constr_end(vectorization_length);
+      std::vector<unsigned int> index(vectorization_length);
+      for (unsigned int i=0; i<size_info.n_macro_cells; ++i)
+        {
+          const unsigned int dofs_mcell =
+            dofs_per_cell[cell_active_fe_index.size() == 0 ? 0 :
+                          cell_active_fe_index[i]] * vectorization_length;
+          new_row_starts[i][0] = new_dof_indices.size();
+          new_row_starts[i][1] = new_constraint_indicator.size();
+          new_row_starts[i][2] = irregular_cells[i];
+
+          const unsigned int n_comp = (irregular_cells[i]>0 ?
+                                       irregular_cells[i] : vectorization_length);
+
+          for (unsigned int j=0; j<n_comp; ++j)
+            {
+              glob_indices[j] = begin_indices(renumbering[position_cell+j]);
+              constr_ind[j] = begin_indicators(renumbering[position_cell+j]);
+              constr_end[j] = end_indicators(renumbering[position_cell+j]);
+              index[j] = 0;
+            }
+
+          bool has_constraints = false;
+          if (store_plain_indices == true)
+            {
+              for (unsigned int j=0; j<n_comp; ++j)
+                if (begin_indicators(renumbering[position_cell+j]) <
+                    end_indicators(renumbering[position_cell+j]))
+                  {
+                    plain_glob_indices[j] =
+                      begin_indices_plain (renumbering[position_cell+j]);
+                    has_constraints = true;
+                  }
+                else
+                  plain_glob_indices[j] =
+                    begin_indices (renumbering[position_cell+j]);
+              if (has_constraints == true)
+                new_rowstart_plain[i] = new_plain_indices.size();
+            }
+
+          unsigned int m_ind_local = 0, m_index = 0;
+          while (m_ind_local < dofs_mcell)
+            for (unsigned int j=0; j<vectorization_length; ++j)
+              {
+                // last cell: nothing to do
+                if (j >= n_comp)
+                  {
+                    ++m_ind_local;
+                    continue;
+                  }
+
+                // otherwise, check if we are a constrained dof. The dof is
+                // not constrained if we are at the end of the row for the
+                // constraints (indi[j] == n_indi[j]) or if the local index[j]
+                // is smaller than the next position for a constraint. Then,
+                // just copy it. otherwise, copy all the entries that come
+                // with this dof
+                if (constr_ind[j] == constr_end[j] ||
+                    index[j] < constr_ind[j]->first)
+                  {
+                    new_dof_indices.push_back (*glob_indices[j]);
+                    ++m_index;
+                    ++index[j];
+                    ++glob_indices[j];
+                  }
+                else
+                  {
+                    const unsigned short constraint_loc = constr_ind[j]->second;
+                    new_constraint_indicator.push_back
+                    (std::pair<unsigned short,unsigned short> (m_index, constraint_loc));
+                    for (unsigned int k=constraint_pool_row_index[constraint_loc];
+                         k<constraint_pool_row_index[constraint_loc+1];
+                         ++k, ++glob_indices[j])
+                      new_dof_indices.push_back (*glob_indices[j]);
+                    ++constr_ind[j];
+                    m_index = 0;
+                    index[j] = 0;
+                  }
+                if (store_plain_indices==true && has_constraints==true)
+                  new_plain_indices.push_back (*plain_glob_indices[j]++);
+                ++m_ind_local;
+              }
+
+          for (unsigned int j=0; j<n_comp; ++j)
+            Assert (glob_indices[j]==end_indices(renumbering[position_cell+j]),
+                    ExcInternalError());
+          position_cell += n_comp;
+        }
+      AssertDimension (position_cell+1, row_starts.size());
+
+      new_row_starts[size_info.n_macro_cells][0] = new_dof_indices.size();
+      new_row_starts[size_info.n_macro_cells][1] = new_constraint_indicator.size();
+      new_row_starts[size_info.n_macro_cells][2] = 0;
+
+      AssertDimension(dof_indices.size(), new_dof_indices.size());
+      AssertDimension(constraint_indicator.size(),
+                      new_constraint_indicator.size());
+
+      new_row_starts.swap (row_starts);
+      new_dof_indices.swap (dof_indices);
+      new_constraint_indicator.swap (constraint_indicator);
+      new_plain_indices.swap (plain_dof_indices);
+      new_rowstart_plain.swap (row_starts_plain_indices);
+
+#ifdef DEBUG
+      // sanity check 1: all indices should be smaller than the number of dofs
+      // locally owned plus the number of ghosts
+      const unsigned int index_range = (vector_partitioner->local_range().second-
+                                        vector_partitioner->local_range().first)
+                                       + vector_partitioner->ghost_indices().n_elements();
+      for (std::size_t i=0; i<dof_indices.size(); ++i)
+        AssertIndexRange (dof_indices[i], index_range);
+
+      // sanity check 2: for the constraint indicators, the first index should
+      // be smaller than the number of indices in the row, and the second
+      // index should be smaller than the number of constraints in the
+      // constraint pool.
+      for (unsigned int row=0; row<size_info.n_macro_cells; ++row)
+        {
+          const unsigned int row_length_ind = row_length_indices(row);
+          const std::pair<unsigned short,unsigned short>
+          *con_it = begin_indicators(row), * end_con = end_indicators(row);
+          for ( ; con_it != end_con; ++con_it)
+            {
+              AssertIndexRange (con_it->first, row_length_ind+1);
+              AssertIndexRange (con_it->second,
+                                constraint_pool_row_index.size()-1);
+            }
+        }
+
+      // sanity check 3: all non-boundary cells should have indices that only
+      // refer to the locally owned range
+      const unsigned int local_size = (vector_partitioner->local_range().second-
+                                       vector_partitioner->local_range().first);
+      for (unsigned int row=0; row<size_info.boundary_cells_start; ++row)
+        {
+          const unsigned int *ptr     = begin_indices(row);
+          const unsigned int *end_ptr = end_indices  (row);
+          for ( ; ptr != end_ptr; ++ptr)
+            AssertIndexRange (*ptr, local_size);
+        }
+      for (unsigned int row=size_info.boundary_cells_end;
+           row<size_info.n_macro_cells; ++row)
+        {
+          const unsigned int *ptr     = begin_indices(row);
+          const unsigned int *end_ptr = end_indices  (row);
+          for ( ; ptr != end_ptr; ++ptr)
+            AssertIndexRange (*ptr, local_size);
+        }
+#endif
+    }
+
+
+
+    void DoFInfo::guess_block_size (const SizeInfo &size_info,
+                                    TaskInfo       &task_info)
+    {
+      // user did not say a positive number, so we have to guess
+      if (task_info.block_size == 0)
+        {
+          // we would like to have enough work to do, so as first guess, try
+          // to get 50 times as many chunks as we have threads on the system.
+          task_info.block_size =
+            size_info.n_macro_cells / (MultithreadInfo::n_threads() * 50);
+
+          // if there are too few degrees of freedom per cell, need to
+          // increase the block size
+          const unsigned int minimum_parallel_grain_size = 500;
+          if (dofs_per_cell[0] * task_info.block_size <
+              minimum_parallel_grain_size)
+            task_info.block_size = (minimum_parallel_grain_size /
+                                    dofs_per_cell[0] + 1);
+        }
+      if (task_info.block_size > size_info.n_macro_cells)
+        task_info.block_size = size_info.n_macro_cells;
+    }
+
+
+
+    void DoFInfo::make_thread_graph_partition_color
+    (SizeInfo                  &size_info,
+     TaskInfo                  &task_info,
+     std::vector<unsigned int> &renumbering,
+     std::vector<unsigned int> &irregular_cells,
+     const bool                 hp_bool)
+    {
+      if (size_info.n_macro_cells == 0)
+        return;
+
+      const std::size_t vectorization_length = size_info.vectorization_length;
+      Assert (vectorization_length > 0, ExcInternalError());
+
+      guess_block_size (size_info, task_info);
+
+      // set up partitions. if we just use coloring without partitions, do
+      // nothing here, assume all cells to belong to the zero partition (that
+      // we otherwise use for MPI boundary cells)
+      unsigned int start_up = 0,
+                   start_nonboundary = numbers::invalid_unsigned_int;
+      if (task_info.use_coloring_only == false)
+        {
+          start_nonboundary =
+            std::min(((size_info.boundary_cells_end+task_info.block_size-1)/
+                      task_info.block_size)*task_info.block_size,
+                     size_info.n_macro_cells);
+          start_up = start_nonboundary;
+          size_info.boundary_cells_end = start_nonboundary;
+        }
+      else
+        {
+          start_nonboundary = size_info.n_macro_cells;
+          start_up = size_info.n_macro_cells;
+          size_info.boundary_cells_start = 0;
+          size_info.boundary_cells_end = size_info.n_macro_cells;
+        }
+      if (hp_bool == true)
+        {
+          irregular_cells.resize (0);
+          irregular_cells.resize (size_info.n_macro_cells+2*max_fe_index);
+          std::vector<std::vector<unsigned int> > renumbering_fe_index;
+          renumbering_fe_index.resize(max_fe_index);
+          unsigned int counter,n_macro_cells_before = 0;
+          for (counter=0; counter<start_nonboundary*vectorization_length;
+               counter++)
+            {
+              renumbering_fe_index[cell_active_fe_index[renumbering[counter]]].
+              push_back(renumbering[counter]);
+            }
+          counter = 0;
+          for (unsigned int j=0; j<max_fe_index; j++)
+            {
+              for (unsigned int jj=0; jj<renumbering_fe_index[j].size(); jj++)
+                renumbering[counter++] = renumbering_fe_index[j][jj];
+              irregular_cells[renumbering_fe_index[j].size()/vectorization_length+
+                              n_macro_cells_before] =
+                                renumbering_fe_index[j].size()%vectorization_length;
+              n_macro_cells_before += (renumbering_fe_index[j].size()+vectorization_length-1)/
+                                      vectorization_length;
+              renumbering_fe_index[j].resize(0);
+            }
+
+          unsigned int new_boundary_end = n_macro_cells_before;
+          for (counter=start_nonboundary*vectorization_length;
+               counter<size_info.n_active_cells; counter++)
+            {
+              renumbering_fe_index[cell_active_fe_index.empty() ? 0 :
+                                   cell_active_fe_index[renumbering[counter]]].
+              push_back(renumbering[counter]);
+            }
+          counter = start_nonboundary * vectorization_length;
+          for (unsigned int j=0; j<max_fe_index; j++)
+            {
+              for (unsigned int jj=0; jj<renumbering_fe_index[j].size(); jj++)
+                renumbering[counter++] = renumbering_fe_index[j][jj];
+              irregular_cells[renumbering_fe_index[j].size()/vectorization_length+
+                              n_macro_cells_before] =
+                                renumbering_fe_index[j].size()%vectorization_length;
+              n_macro_cells_before += (renumbering_fe_index[j].size()+vectorization_length-1)/
+                                      vectorization_length;
+            }
+          AssertIndexRange (n_macro_cells_before,
+                            size_info.n_macro_cells + 2*max_fe_index+1);
+          irregular_cells.resize (n_macro_cells_before);
+          size_info.n_macro_cells = n_macro_cells_before;
+          size_info.boundary_cells_start = 0;
+          size_info.boundary_cells_end = new_boundary_end;
+          task_info.n_blocks = (size_info.n_macro_cells+task_info.block_size-1)
+                               /task_info.block_size;
+          task_info.block_size_last = size_info.n_macro_cells%task_info.block_size;
+          if (task_info.block_size_last == 0)
+            task_info.block_size_last = task_info.block_size;
+        }
+
+      // assume that all FEs have the same connectivity graph, so take the
+      // zeroth FE
+      task_info.n_blocks = (size_info.n_macro_cells+task_info.block_size-1)/
+                           task_info.block_size;
+      task_info.block_size_last = size_info.n_macro_cells-
+                                  (task_info.block_size*(task_info.n_blocks-1));
+
+      // create the connectivity graph with internal blocking
+      DynamicSparsityPattern connectivity;
+      make_connectivity_graph (size_info, task_info, renumbering,irregular_cells,
+                               true, connectivity);
+
+      // Create cell-block  partitioning.
+      unsigned int partition = 0, counter = 0;
+      bool work = true;
+
+      // For each block of cells, this variable saves to which partitions the
+      // block belongs. Initialize all to n_macro_cells to mark them as not
+      // yet assigned a partition.
+      std::vector<unsigned int> cell_partition(task_info.n_blocks,
+                                               size_info.n_macro_cells);
+      std::vector<unsigned int> neighbor_list;
+      std::vector<unsigned int> neighbor_neighbor_list;
+
+      // In element j of this variable, one puts the old number of the block
+      // that should be the jth block in the new numeration.
+      std::vector<unsigned int> partition_list      (task_info.n_blocks,0);
+      std::vector<unsigned int> partition_color_list(task_info.n_blocks,0);
+
+      // This vector points to the start of each partition.
+      std::vector<unsigned int> partition_blocks (2,0);
+      std::vector<unsigned int> cell_color(task_info.n_blocks,
+                                           size_info.n_macro_cells);
+      std::vector<bool> color_finder;
+
+      // this performs a classical breath-first search in the connectivity
+      // graph of the cell chunks
+      while (work)
+        {
+          // put all cells up to begin_inner_cells into first partition. if
+          // the numbers do not add up exactly, assign an additional block
+          if (start_nonboundary>0 && start_up == start_nonboundary)
+            {
+              unsigned int n_blocks = ((start_nonboundary+task_info.block_size-1)
+                                       /task_info.block_size);
+              for (unsigned int cell=0; cell<n_blocks; ++cell)
+                {
+                  cell_partition[cell] = partition;
+                  neighbor_list.push_back(cell);
+                  partition_list[counter++] = cell;
+                  partition_blocks.back()++;
+                }
+            }
+          else
+            {
+              // To start up, set the start_up cell to partition and list all
+              // its neighbors.
+              AssertIndexRange(start_up, cell_partition.size());
+              cell_partition[start_up] = partition;
+              neighbor_list.push_back(start_up);
+              partition_list[counter++] = start_up;
+              partition_blocks.back()++;
+            }
+
+          while (neighbor_list.size()>0)
+            {
+              partition++;
+              partition_blocks.push_back(partition_blocks.back());
+              for (unsigned int j=0; j<neighbor_list.size(); ++j)
+                {
+                  Assert(cell_partition[neighbor_list[j]]==partition-1,
+                         ExcInternalError());
+                  DynamicSparsityPattern::iterator neighbor =
+                    connectivity.begin(neighbor_list[j]),
+                    end = connectivity.end(neighbor_list[j]);
+                  for (; neighbor!=end ; ++neighbor)
+                    {
+                      if (cell_partition[neighbor->column()]==size_info.n_macro_cells)
+                        {
+                          partition_blocks.back()++;
+                          cell_partition[neighbor->column()] = partition;
+                          neighbor_neighbor_list.push_back(neighbor->column());
+                          partition_list[counter++] = neighbor->column();
+                        }
+                    }
+                }
+              neighbor_list = neighbor_neighbor_list;
+              neighbor_neighbor_list.resize(0);
+            }
+
+          // One has to check if the graph is not connected so we have to find
+          // another partition.
+          work = false;
+          for (unsigned int j=start_up; j<task_info.n_blocks; ++j)
+            if (cell_partition[j] == size_info.n_macro_cells)
+              {
+                start_up = j;
+                work = true;
+                break;
+              }
+        }
+      AssertDimension (partition_blocks[partition], task_info.n_blocks);
+
+
+      // Color the cells within each partition
+      task_info.partition_color_blocks_row_index.resize(partition+1);
+      unsigned int color_counter = 0, index_counter = 0;
+      for (unsigned int part=0; part<partition; part++)
+        {
+          task_info.partition_color_blocks_row_index[part] = index_counter;
+          unsigned int max_color = 0;
+          for (unsigned int k=partition_blocks[part]; k<partition_blocks[part+1];
+               k++)
+            {
+              unsigned int cell = partition_list[k];
+              unsigned int n_neighbors = connectivity.row_length(cell);
+
+              // In the worst case, each neighbor has a different color. So we
+              // find at least one available color between 0 and n_neighbors.
+              color_finder.resize(n_neighbors+1);
+              for (unsigned int j=0; j<=n_neighbors; ++j)
+                color_finder[j]=true;
+              DynamicSparsityPattern::iterator
+              neighbor = connectivity.begin(cell),
+              end      = connectivity.end(cell);
+              for (; neighbor!=end ; ++neighbor)
+                {
+                  // Mark the color that a neighbor within the partition has
+                  // as taken
+                  if (cell_partition[neighbor->column()] == part &&
+                      cell_color[neighbor->column()] <= n_neighbors)
+                    color_finder[cell_color[neighbor->column()]] = false;
+                }
+              // Choose the smallest color that is not taken for the block
+              cell_color[cell]=0;
+              while (color_finder[cell_color[cell]] == false)
+                cell_color[cell]++;
+              if (cell_color[cell] > max_color)
+                max_color = cell_color[cell];
+            }
+          // Reorder within partition: First, all blocks that belong the 0 and
+          // then so on until those with color max (Note that the smaller the
+          // number the larger the partition)
+          for (unsigned int color=0; color<=max_color; color++)
+            {
+              task_info.partition_color_blocks_data.push_back(color_counter);
+              index_counter++;
+              for (unsigned int k=partition_blocks[part];
+                   k<partition_blocks[part+1]; k++)
+                {
+                  unsigned int cell=partition_list[k];
+                  if (cell_color[cell] == color)
+                    {
+                      partition_color_list[color_counter++] = cell;
+                    }
+                }
+            }
+        }
+      task_info.partition_color_blocks_data.push_back(task_info.n_blocks);
+      task_info.partition_color_blocks_row_index[partition] = index_counter;
+      AssertDimension (color_counter, task_info.n_blocks);
+
+      partition_list = renumbering;
+
+      // in debug mode, check that the partition color list is one-to-one
+#ifdef DEBUG
+      {
+        std::vector<unsigned int> sorted_pc_list (partition_color_list);
+        std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
+        for (unsigned int i=0; i<sorted_pc_list.size(); ++i)
+          Assert(sorted_pc_list[i] == i, ExcInternalError());
+      }
+#endif
+
+      // set the start list for each block and compute the renumbering of
+      // cells
+      std::vector<unsigned int> block_start(size_info.n_macro_cells+1);
+      std::vector<unsigned int> irregular(size_info.n_macro_cells);
+
+      unsigned int mcell_start=0;
+      block_start[0] = 0;
+      for (unsigned int block=0; block<task_info.n_blocks; block++)
+        {
+          block_start[block+1] = block_start[block];
+          for (unsigned int mcell=mcell_start; mcell<
+               std::min(mcell_start+task_info.block_size,
+                        size_info.n_macro_cells);
+               ++mcell)
+            {
+              unsigned int n_comp = (irregular_cells[mcell]>0)
+                                    ?irregular_cells[mcell]:size_info.vectorization_length;
+              block_start[block+1] += n_comp;
+              ++counter;
+            }
+          mcell_start += task_info.block_size;
+        }
+      counter = 0;
+      unsigned int counter_macro = 0;
+      for (unsigned int block=0; block<task_info.n_blocks; block++)
+        {
+          unsigned int present_block = partition_color_list[block];
+          for (unsigned int cell = block_start[present_block];
+               cell<block_start[present_block+1]; ++cell)
+            renumbering[counter++] = partition_list[cell];
+          unsigned int this_block_size = (present_block == task_info.n_blocks-1)?
+                                         task_info.block_size_last:task_info.block_size;
+          for (unsigned int j=0; j<this_block_size; j++)
+            irregular[counter_macro++] =
+              irregular_cells[present_block*task_info.block_size+j];
+          if (present_block == task_info.n_blocks-1)
+            task_info.position_short_block = block;
+        }
+      irregular_cells.swap(irregular);
+      AssertDimension (counter, size_info.n_active_cells);
+      AssertDimension (counter_macro, size_info.n_macro_cells);
+
+      // check that the renumbering is one-to-one
+#ifdef DEBUG
+      {
+        std::vector<unsigned int> sorted_renumbering (renumbering);
+        std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
+        for (unsigned int i=0; i<sorted_renumbering.size(); ++i)
+          Assert(sorted_renumbering[i] == i, ExcInternalError());
+      }
+#endif
+      AssertDimension(counter,size_info.n_active_cells);
+      task_info.evens = (partition+1)/2;
+      task_info.odds  = (partition)/2;
+      task_info.n_blocked_workers = task_info.odds-
+                                    (task_info.odds+task_info.evens+1)%2;
+      task_info.n_workers = task_info.partition_color_blocks_data.size()-1-
+                            task_info.n_blocked_workers;
+    }
+
+
+
+    void
+    DoFInfo::make_thread_graph_partition_partition
+    (SizeInfo                  &size_info,
+     TaskInfo                  &task_info,
+     std::vector<unsigned int> &renumbering,
+     std::vector<unsigned int> &irregular_cells,
+     const bool                 hp_bool)
+    {
+      if (size_info.n_macro_cells == 0)
+        return;
+
+      const std::size_t vectorization_length = size_info.vectorization_length;
+      Assert (vectorization_length > 0, ExcInternalError());
+
+      guess_block_size (size_info, task_info);
+
+      // assume that all FEs have the same connectivity graph, so take the
+      // zeroth FE
+      task_info.n_blocks = (size_info.n_macro_cells+task_info.block_size-1)/
+                           task_info.block_size;
+      task_info.block_size_last = size_info.n_macro_cells-
+                                  (task_info.block_size*(task_info.n_blocks-1));
+      task_info.position_short_block = task_info.n_blocks-1;
+      unsigned int cluster_size = task_info.block_size*vectorization_length;
+
+      // create the connectivity graph without internal blocking
+      DynamicSparsityPattern connectivity;
+      make_connectivity_graph (size_info, task_info, renumbering,irregular_cells,
+                               false, connectivity);
+
+      // Create cell-block  partitioning.
+
+      // For each block of cells, this variable saves to which partitions the
+      // block belongs. Initialize all to n_macro_cells to mark them as not
+      // yet assigned a partition.
+      std::vector<unsigned int> cell_partition (size_info.n_active_cells,
+                                                size_info.n_active_cells);
+      std::vector<unsigned int> neighbor_list;
+      std::vector<unsigned int> neighbor_neighbor_list;
+
+      // In element j of this variable, one puts the old number of the block
+      // that should be the jth block in the new numeration.
+      std::vector<unsigned int> partition_list(size_info.n_active_cells,0);
+      std::vector<unsigned int> partition_partition_list(size_info.n_active_cells,0);
+
+      // This vector points to the start of each partition.
+      std::vector<unsigned int> partition_size(2,0);
+
+      unsigned int partition = 0,start_up=0,counter=0;
+      unsigned int start_nonboundary = vectorization_length * size_info.boundary_cells_end;
+      if (start_nonboundary > size_info.n_active_cells)
+        start_nonboundary = size_info.n_active_cells;
+      bool work = true;
+      unsigned int remainder = cluster_size;
+
+      // this performs a classical breath-first search in the connectivity
+      // graph of the cells under the restriction that the size of the
+      // partitions should be a multiple of the given block size
+      while (work)
+        {
+          // put the cells with neighbors on remote MPI processes up front
+          if (start_nonboundary>0)
+            {
+              for (unsigned int cell=0; cell<start_nonboundary; ++cell)
+                {
+                  const unsigned int cell_nn = renumbering[cell];
+                  cell_partition[cell_nn] = partition;
+                  neighbor_list.push_back(cell_nn);
+                  partition_list[counter++] = cell_nn;
+                  partition_size.back()++;
+                }
+              remainder -= (start_nonboundary%cluster_size);
+              if (remainder == cluster_size)
+                remainder = 0;
+
+              // adjust end of boundary cells to the remainder
+              size_info.boundary_cells_end += (remainder+vectorization_length-1)/vectorization_length;
+            }
+          else
+            {
+              // To start up, set the start_up cell to partition and list all
+              // its neighbors.
+              cell_partition[start_up] = partition;
+              neighbor_list.push_back(start_up);
+              partition_list[counter++] = start_up;
+              partition_size.back()++;
+              start_up++;
+              remainder--;
+              if (remainder == cluster_size)
+                remainder = 0;
+            }
+          int index_before = neighbor_list.size(), index = index_before,
+              index_stop = 0;
+          while (remainder>0)
+            {
+              if (index==index_stop)
+                {
+                  index = neighbor_list.size();
+                  if (index == index_before)
+                    {
+                      neighbor_list.resize(0);
+                      goto not_connect;
+                    }
+                  index_stop = index_before;
+                  index_before = index;
+                }
+              index--;
+              unsigned int additional = neighbor_list[index];
+              DynamicSparsityPattern::iterator neighbor =
+                connectivity.begin(additional),
+                end = connectivity.end(additional);
+              for (; neighbor!=end ; ++neighbor)
+                {
+                  if (cell_partition[neighbor->column()]==size_info.n_active_cells)
+                    {
+                      partition_size.back()++;
+                      cell_partition[neighbor->column()] = partition;
+                      neighbor_list.push_back(neighbor->column());
+                      partition_list[counter++] = neighbor->column();
+                      remainder--;
+                      if (remainder == 0)
+                        break;
+                    }
+                }
+            }
+
+          while (neighbor_list.size()>0)
+            {
+              partition++;
+              unsigned int partition_counter = 0;
+              partition_size.push_back(partition_size.back());
+
+              for (unsigned int j=0; j<neighbor_list.size(); ++j)
+                {
+                  Assert(cell_partition[neighbor_list[j]]==partition-1,
+                         ExcInternalError());
+                  DynamicSparsityPattern::iterator neighbor =
+                    connectivity.begin(neighbor_list[j]),
+                    end = connectivity.end(neighbor_list[j]);
+                  for (; neighbor!=end ; ++neighbor)
+                    {
+                      if (cell_partition[neighbor->column()]==size_info.n_active_cells)
+                        {
+                          partition_size.back()++;
+                          cell_partition[neighbor->column()] = partition;
+                          neighbor_neighbor_list.push_back(neighbor->column());
+                          partition_list[counter++] = neighbor->column();
+                          partition_counter++;
+                        }
+                    }
+                }
+              remainder = cluster_size-(partition_counter%cluster_size);
+              if (remainder == cluster_size)
+                remainder = 0;
+              int index_stop = 0;
+              int index_before = neighbor_neighbor_list.size(), index = index_before;
+              while (remainder>0)
+                {
+                  if (index==index_stop)
+                    {
+                      index = neighbor_neighbor_list.size();
+                      if (index == index_before)
+                        {
+                          neighbor_neighbor_list.resize(0);
+                          break;
+                        }
+                      index_stop = index_before;
+                      index_before = index;
+                    }
+                  index--;
+                  unsigned int additional = neighbor_neighbor_list[index];
+                  DynamicSparsityPattern::iterator neighbor =
+                    connectivity.begin(additional),
+                    end = connectivity.end(additional);
+                  for (; neighbor!=end ; ++neighbor)
+                    {
+                      if (cell_partition[neighbor->column()]==size_info.n_active_cells)
+                        {
+                          partition_size.back()++;
+                          cell_partition[neighbor->column()] = partition;
+                          neighbor_neighbor_list.push_back(neighbor->column());
+                          partition_list[counter++] = neighbor->column();
+                          remainder--;
+                          if (remainder == 0)
+                            break;
+                        }
+                    }
+                }
+
+              neighbor_list = neighbor_neighbor_list;
+              neighbor_neighbor_list.resize(0);
+            }
+not_connect:
+          // One has to check if the graph is not connected so we have to find
+          // another partition.
+          work = false;
+          for (unsigned int j=start_up; j<size_info.n_active_cells; ++j)
+            if (cell_partition[j] == size_info.n_active_cells)
+              {
+                start_up = j;
+                work = true;
+                if (remainder == 0)
+                  remainder = cluster_size;
+                break;
+              }
+        }
+      if (remainder != 0)
+        partition++;
+
+      for (unsigned int j=0; j<renumbering.size(); j++)
+        renumbering[j] = 0;
+      irregular_cells.back() = 0;
+      irregular_cells.resize(size_info.n_active_cells);
+      unsigned int n_macro_cells_before = 0;
+      {
+        // Create partitioning within partitions.
+
+        // For each block of cells, this variable saves to which partitions
+        // the block belongs. Initialize all to n_macro_cells to mark them as
+        // not yet assigned a partition.
+        std::vector<unsigned int> cell_partition_l2(size_info.n_active_cells,
+                                                    size_info.n_active_cells);
+        task_info.partition_color_blocks_row_index.resize(partition+1,0);
+        task_info.partition_color_blocks_data.resize(1,0);
+
+        start_up = 0;
+        counter = 0;
+        unsigned int missing_macros;
+        for (unsigned int part=0; part<partition; ++part)
+          {
+            neighbor_neighbor_list.resize(0);
+            neighbor_list.resize(0);
+            bool work = true;
+            unsigned int partition_l2 = 0;
+            start_up = partition_size[part];
+            unsigned int partition_counter = 0;
+            while (work)
+              {
+                if (neighbor_list.size()==0)
+                  {
+                    work = false;
+                    partition_counter = 0;
+                    for (unsigned int j=start_up; j<partition_size[part+1]; ++j)
+                      if (cell_partition[partition_list[j]] == part &&
+                          cell_partition_l2[partition_list[j]] == size_info.n_active_cells)
+                        {
+                          start_up = j;
+                          work = true;
+                          partition_counter = 1;
+                          // To start up, set the start_up cell to partition
+                          // and list all its neighbors.
+                          AssertIndexRange (start_up, partition_size[part+1]);
+                          cell_partition_l2[partition_list[start_up]] =
+                            partition_l2;
+                          neighbor_neighbor_list.push_back
+                          (partition_list[start_up]);
+                          partition_partition_list[counter++] =
+                            partition_list[start_up];
+                          start_up++;
+                          break;
+                        }
+                  }
+                else
+                  {
+                    partition_counter = 0;
+                    for (unsigned int j=0; j<neighbor_list.size(); ++j)
+                      {
+                        Assert(cell_partition[neighbor_list[j]]==part,
+                               ExcInternalError());
+                        Assert(cell_partition_l2[neighbor_list[j]]==partition_l2-1,
+                               ExcInternalError());
+                        DynamicSparsityPattern::iterator neighbor =
+                          connectivity.begin(neighbor_list[j]),
+                          end = connectivity.end(neighbor_list[j]);
+                        for (; neighbor!=end ; ++neighbor)
+                          {
+                            if (cell_partition[neighbor->column()] == part &&
+                                cell_partition_l2[neighbor->column()]==
+                                size_info.n_active_cells)
+                              {
+                                cell_partition_l2[neighbor->column()] = partition_l2;
+                                neighbor_neighbor_list.push_back(neighbor->column());
+                                partition_partition_list[counter++] = neighbor->column();
+                                partition_counter++;
+                              }
+                          }
+                      }
+                  }
+                if (partition_counter>0)
+                  {
+                    int index_before = neighbor_neighbor_list.size(),
+                        index = index_before;
+                    {
+                      // put the cells into separate lists for each FE index
+                      // within one partition-partition
+                      missing_macros = 0;
+                      std::vector<unsigned int> remaining_per_macro_cell
+                      (max_fe_index);
+                      std::vector<std::vector<unsigned int> >
+                      renumbering_fe_index;
+                      unsigned int cell;
+                      bool filled = true;
+                      if (hp_bool == true)
+                        {
+                          renumbering_fe_index.resize(max_fe_index);
+                          for (cell=counter-partition_counter; cell<counter; ++cell)
+                            {
+                              renumbering_fe_index
+                              [cell_active_fe_index.empty() ? 0 :
+                               cell_active_fe_index[partition_partition_list
+                                                    [cell]]].
+                              push_back(partition_partition_list[cell]);
+                            }
+                          // check how many more cells are needed in the lists
+                          for (unsigned int j=0; j<max_fe_index; j++)
+                            {
+                              remaining_per_macro_cell[j] =
+                                renumbering_fe_index[j].size()%vectorization_length;
+                              if (remaining_per_macro_cell[j] != 0)
+                                filled = false;
+                              missing_macros += ((renumbering_fe_index[j].size()+
+                                                  vectorization_length-1)/vectorization_length);
+                            }
+                        }
+                      else
+                        {
+                          remaining_per_macro_cell.resize(1);
+                          remaining_per_macro_cell[0] = partition_counter%
+                                                        vectorization_length;
+                          missing_macros = partition_counter/vectorization_length;
+                          if (remaining_per_macro_cell[0] != 0)
+                            {
+                              filled = false;
+                              missing_macros++;
+                            }
+                        }
+                      missing_macros = task_info.block_size -
+                                       (missing_macros%task_info.block_size);
+
+                      // now we realized that there are some cells missing.
+                      while (missing_macros>0 || filled == false)
+                        {
+                          if (index==0)
+                            {
+                              index = neighbor_neighbor_list.size();
+                              if (index == index_before)
+                                {
+                                  if (missing_macros != 0)
+                                    {
+                                      neighbor_neighbor_list.resize(0);
+                                    }
+                                  start_up--;
+                                  break;// not connected - start again
+                                }
+                              index_before = index;
+                            }
+                          index--;
+                          unsigned int additional = neighbor_neighbor_list
+                                                    [index];
+
+                          // go through the neighbors of the last cell in the
+                          // current partition and check if we find some to
+                          // fill up with.
+                          DynamicSparsityPattern::iterator
+                          neighbor = connectivity.begin(additional),
+                          end = connectivity.end(additional);
+                          for (; neighbor!=end ; ++neighbor)
+                            {
+                              if (cell_partition[neighbor->column()] == part &&
+                                  cell_partition_l2[neighbor->column()] ==
+                                  size_info.n_active_cells)
+                                {
+                                  unsigned int this_index = 0;
+                                  if (hp_bool == true)
+                                    this_index = cell_active_fe_index.empty() ? 0 :
+                                                 cell_active_fe_index[neighbor->column()];
+
+                                  // Only add this cell if we need more macro
+                                  // cells in the current block or if there is
+                                  // a macro cell with the FE index that is
+                                  // not yet fully populated
+                                  if (missing_macros > 0 ||
+                                      remaining_per_macro_cell[this_index] > 0)
+                                    {
+                                      cell_partition_l2[neighbor->column()] = partition_l2;
+                                      neighbor_neighbor_list.push_back(neighbor->column());
+                                      if (hp_bool == true)
+                                        renumbering_fe_index[this_index].
+                                        push_back(neighbor->column());
+                                      partition_partition_list[counter] =
+                                        neighbor->column();
+                                      counter++;
+                                      partition_counter++;
+                                      if (remaining_per_macro_cell[this_index]
+                                          == 0 && missing_macros > 0)
+                                        missing_macros--;
+                                      remaining_per_macro_cell[this_index]++;
+                                      if (remaining_per_macro_cell[this_index]
+                                          == vectorization_length)
+                                        {
+                                          remaining_per_macro_cell[this_index] = 0;
+                                        }
+                                      if (missing_macros == 0)
+                                        {
+                                          filled = true;
+                                          for (unsigned int fe_ind=0;
+                                               fe_ind<max_fe_index; ++fe_ind)
+                                            if (remaining_per_macro_cell[fe_ind]!=0)
+                                              filled = false;
+                                        }
+                                      if (filled == true)
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                      if (hp_bool == true)
+                        {
+                          // set the renumbering according to their active FE
+                          // index within one partition-partition which was
+                          // implicitly assumed above
+                          cell = counter - partition_counter;
+                          for (unsigned int j=0; j<max_fe_index; j++)
+                            {
+                              for (unsigned int jj=0; jj<renumbering_fe_index[j].
+                                   size(); jj++)
+                                renumbering[cell++] =
+                                  renumbering_fe_index[j][jj];
+                              if (renumbering_fe_index[j].size()%vectorization_length != 0)
+                                irregular_cells[renumbering_fe_index[j].size()/
+                                                vectorization_length+
+                                                n_macro_cells_before] =
+                                                  renumbering_fe_index[j].size()%vectorization_length;
+                              n_macro_cells_before += (renumbering_fe_index[j].
+                                                       size()+vectorization_length-1)/
+                                                      vectorization_length;
+                              renumbering_fe_index[j].resize(0);
+                            }
+                        }
+                      else
+                        {
+                          n_macro_cells_before += partition_counter/vectorization_length;
+                          if (partition_counter%vectorization_length != 0)
+                            {
+                              irregular_cells[n_macro_cells_before] =
+                                partition_counter%vectorization_length;
+                              n_macro_cells_before++;
+                            }
+                        }
+                    }
+                    task_info.partition_color_blocks_data.
+                    push_back(n_macro_cells_before);
+                    partition_l2++;
+                  }
+                neighbor_list = neighbor_neighbor_list;
+                neighbor_neighbor_list.resize(0);
+              }
+            task_info.partition_color_blocks_row_index[part+1] =
+              task_info.partition_color_blocks_row_index[part] + partition_l2;
+          }
+      }
+
+      if (size_info.boundary_cells_end>0)
+        size_info.boundary_cells_end = task_info.partition_color_blocks_data
+                                       [task_info.partition_color_blocks_row_index[1]];
+
+      if (hp_bool == false)
+        renumbering.swap(partition_partition_list);
+      irregular_cells.resize(n_macro_cells_before);
+      size_info.n_macro_cells = n_macro_cells_before;
+
+      task_info.evens = (partition+1)/2;
+      task_info.odds  = partition/2;
+      task_info.n_blocked_workers =
+        task_info.odds-(task_info.odds+task_info.evens+1)%2;
+      task_info.n_workers = task_info.evens+task_info.odds-
+                            task_info.n_blocked_workers;
+      task_info.partition_evens.resize(partition);
+      task_info.partition_odds.resize(partition);
+      task_info.partition_n_blocked_workers.resize(partition);
+      task_info.partition_n_workers.resize(partition);
+      for (unsigned int part=0; part<partition; part++)
+        {
+          task_info.partition_evens[part] =
+            (task_info.partition_color_blocks_row_index[part+1]-
+             task_info.partition_color_blocks_row_index[part]+1)/2;
+          task_info.partition_odds[part] =
+            (task_info.partition_color_blocks_row_index[part+1]-
+             task_info.partition_color_blocks_row_index[part])/2;
+          task_info.partition_n_blocked_workers[part] =
+            task_info.partition_odds[part]-(task_info.partition_odds[part]+
+                                            task_info.partition_evens[part]+1)%2;
+          task_info.partition_n_workers[part] =
+            task_info.partition_evens[part]+task_info.partition_odds[part]-
+            task_info.partition_n_blocked_workers[part];
+        }
+    }
+
+
+    namespace internal
+    {
+      // rudimentary version of a vector that keeps entries always ordered
+      class ordered_vector : public std::vector<types::global_dof_index>
+      {
+      public:
+        ordered_vector ()
+        {
+          reserve (2000);
+        }
+
+        void reserve (const std::size_t size)
+        {
+          if (size > 0)
+            this->std::vector<types::global_dof_index>::reserve (size);
+        }
+
+
+        // insert a given entry. dat is a pointer within this vector (the user
+        // needs to make sure that it really stays there)
+        void insert (const unsigned int entry,
+                     std::vector<types::global_dof_index>::iterator &dat)
+        {
+          AssertIndexRange (static_cast<std::size_t>(dat - begin()), size()+1);
+          AssertIndexRange (static_cast<std::size_t>(end() - dat), size()+1);
+          AssertIndexRange (size(), capacity());
+          while (dat != end() && *dat < entry)
+            ++dat;
+
+          if (dat == end())
+            {
+              push_back(entry);
+              dat = end();
+            }
+          else if (*dat > entry)
+            {
+              dat = this->std::vector<types::global_dof_index>::insert (dat, entry);
+              ++dat;
+            }
+          else
+            ++dat;
+        }
+      };
+    }
+
+
+    void
+    DoFInfo::make_connectivity_graph
+    (const SizeInfo                  &size_info,
+     const TaskInfo                  &task_info,
+     const std::vector<unsigned int> &renumbering,
+     const std::vector<unsigned int> &irregular_cells,
+     const bool                       do_blocking,
+     DynamicSparsityPattern &connectivity) const
+    {
+      AssertDimension (row_starts.size()-1, size_info.n_active_cells);
+      const unsigned int n_rows =
+        (vector_partitioner->local_range().second-
+         vector_partitioner->local_range().first)
+        + vector_partitioner->ghost_indices().n_elements();
+      const unsigned int n_blocks = (do_blocking == true) ?
+                                    task_info.n_blocks : size_info.n_active_cells;
+
+      // first determine row lengths
+      std::vector<unsigned int> row_lengths(n_rows);
+      unsigned int cell_start = 0, mcell_start = 0;
+      std::vector<unsigned int> scratch;
+      for (unsigned int block = 0; block < n_blocks; ++block)
+        {
+          // if we have the blocking variant (used in the coloring scheme), we
+          // want to build a graph with the blocks with interaction with
+          // remote MPI processes up front. in the non-blocking variant, we do
+          // not do this here. TODO: unify this approach!!!
+          if (do_blocking == true)
+            {
+              scratch.clear();
+              for (unsigned int mcell=mcell_start; mcell<
+                   std::min(mcell_start+task_info.block_size,
+                            size_info.n_macro_cells);
+                   ++mcell)
+                {
+                  unsigned int n_comp = (irregular_cells[mcell]>0)
+                                        ?irregular_cells[mcell]:size_info.vectorization_length;
+                  for (unsigned int cell = cell_start; cell < cell_start+n_comp;
+                       ++cell)
+                    scratch.insert(scratch.end(),
+                                   begin_indices(renumbering[cell]),
+                                   end_indices(renumbering[cell]));
+                  cell_start += n_comp;
+                }
+              std::sort(scratch.begin(), scratch.end());
+              const unsigned int n_unique =
+                std::unique(scratch.begin(), scratch.end())-scratch.begin();
+              for (unsigned int i=0; i<n_unique; ++i)
+                row_lengths[scratch[i]]++;
+              mcell_start += task_info.block_size;
+            }
+          else
+            {
+              scratch.clear();
+              scratch.insert(scratch.end(),
+                             begin_indices(block), end_indices(block));
+              std::sort(scratch.begin(), scratch.end());
+              const unsigned int n_unique =
+                std::unique(scratch.begin(), scratch.end())-scratch.begin();
+              for (unsigned int i=0; i<n_unique; ++i)
+                row_lengths[scratch[i]]++;
+            }
+        }
+
+      // disregard dofs that only sit on one cell
+      for (unsigned int row=0; row<n_rows; ++row)
+        if (row_lengths[row] == 1)
+          row_lengths[row] = 0;
+
+      SparsityPattern connectivity_dof (n_rows, n_blocks, row_lengths);
+      cell_start = 0, mcell_start = 0;
+      for (unsigned int block = 0; block < n_blocks; ++block)
+        {
+          // if we have the blocking variant (used in the coloring scheme), we
+          // want to build a graph with the blocks with interaction with
+          // remote MPI processes up front. in the non-blocking variant, we do
+          // not do this here. TODO: unify this approach!!!
+          if (do_blocking == true)
+            {
+              for (unsigned int mcell=mcell_start; mcell<
+                   std::min(mcell_start+task_info.block_size,
+                            size_info.n_macro_cells);
+                   ++mcell)
+                {
+                  unsigned int n_comp = (irregular_cells[mcell]>0)
+                                        ?irregular_cells[mcell]:size_info.vectorization_length;
+                  for (unsigned int cell = cell_start; cell < cell_start+n_comp;
+                       ++cell)
+                    {
+                      const unsigned int
+                      *it = begin_indices (renumbering[cell]),
+                       *end_cell = end_indices (renumbering[cell]);
+                      for ( ; it != end_cell; ++it)
+                        if (row_lengths[*it]>0)
+                          connectivity_dof.add(*it, block);
+                    }
+                  cell_start += n_comp;
+                }
+              mcell_start += task_info.block_size;
+            }
+          else
+            {
+              const unsigned int
+              *it = begin_indices (block),
+               *end_cell = end_indices (block);
+              for ( ; it != end_cell; ++it)
+                if (row_lengths[*it]>0)
+                  connectivity_dof.add(*it, block);
+            }
+        }
+      connectivity_dof.compress();
+
+      connectivity.reinit (n_blocks, n_blocks);
+      internal::ordered_vector row_entries;
+      cell_start = 0;
+      mcell_start = 0;
+      for (unsigned int block=0;  block < n_blocks; ++block)
+        {
+          row_entries.clear();
+
+          if (do_blocking==true)
+            {
+              for (unsigned int mcell=mcell_start; mcell<
+                   std::min(mcell_start+task_info.block_size,
+                            size_info.n_macro_cells);
+                   ++mcell)
+                {
+                  unsigned int n_comp = (irregular_cells[mcell]>0)
+                                        ?irregular_cells[mcell]:size_info.vectorization_length;
+                  for (unsigned int cell = cell_start; cell < cell_start+n_comp;
+                       ++cell)
+                    {
+                      // apply renumbering when we do blocking
+                      const unsigned int
+                      *it = begin_indices (renumbering[cell]),
+                       *end_cell = end_indices (renumbering[cell]);
+                      for ( ; it != end_cell; ++it)
+                        if (row_lengths[*it] > 0)
+                          {
+                            SparsityPattern::iterator sp = connectivity_dof.begin(*it);
+                            // jump over diagonal for square patterns
+                            if (connectivity_dof.n_rows()==connectivity_dof.n_cols())
+                              ++sp;
+                            row_entries.reserve (row_entries.size() + end_cell - it);
+                            std::vector<types::global_dof_index>::iterator insert_pos = row_entries.begin();
+                            for ( ; sp != connectivity_dof.end(*it); ++sp)
+                              if (sp->column() >= block)
+                                break;
+                              else
+                                row_entries.insert (sp->column(), insert_pos);
+                          }
+                    }
+                  cell_start +=n_comp;
+                }
+              mcell_start += task_info.block_size;
+            }
+          else
+            {
+              const unsigned int *it = begin_indices (block),
+                                  * end_cell = end_indices (block);
+              for ( ; it != end_cell; ++it)
+                if (row_lengths[*it] > 0)
+                  {
+                    SparsityPattern::iterator sp = connectivity_dof.begin(*it);
+                    // jump over diagonal for square patterns
+                    if (connectivity_dof.n_rows()==connectivity_dof.n_cols())
+                      ++sp;
+                    row_entries.reserve (row_entries.size() + end_cell - it);
+                    std::vector<types::global_dof_index>::iterator insert_pos = row_entries.begin();
+                    for ( ; sp != connectivity_dof.end(*it); ++sp)
+                      if (sp->column() >= block)
+                        break;
+                      else
+                        row_entries.insert (sp->column(), insert_pos);
+                  }
+            }
+          connectivity.add_entries (block, row_entries.begin(), row_entries.end());
+        }
+      connectivity.symmetrize ();
+    }
+
+
+
+    void DoFInfo::renumber_dofs (std::vector<types::global_dof_index> &renumbering)
+    {
+      // first renumber all locally owned degrees of freedom
+      AssertDimension (vector_partitioner->local_size(),
+                       vector_partitioner->size());
+      const unsigned int local_size = vector_partitioner->local_size();
+      renumbering.resize (0);
+      renumbering.resize (local_size, numbers::invalid_dof_index);
+
+      types::global_dof_index counter = 0;
+      std::vector<unsigned int>::iterator dof_ind = dof_indices.begin(),
+                                          end_ind = dof_indices.end();
+      for ( ; dof_ind != end_ind; ++dof_ind)
+        {
+          if (*dof_ind < local_size)
+            {
+              if (renumbering[*dof_ind] == numbers::invalid_dof_index)
+                renumbering[*dof_ind] = counter++;
+              *dof_ind = renumbering[*dof_ind];
+            }
+        }
+
+      AssertIndexRange (counter, local_size+1);
+      for (std::size_t i=0; i<renumbering.size(); ++i)
+        if (renumbering[i] == numbers::invalid_dof_index)
+          renumbering[i] = counter++;
+
+      // adjust the constrained DoFs
+      std::vector<unsigned int> new_constrained_dofs (constrained_dofs.size());
+      for (std::size_t i=0; i<constrained_dofs.size(); ++i)
+        new_constrained_dofs[i] = renumbering[constrained_dofs[i]];
+
+      // the new constrained DoFs should be sorted already as they are not
+      // contained in dof_indices and then get contiguous numbers
+#ifdef DEBUG
+      for (std::size_t i=1; i<new_constrained_dofs.size(); ++i)
+        Assert (new_constrained_dofs[i] > new_constrained_dofs[i-1], ExcInternalError());
+#endif
+      std::swap (constrained_dofs, new_constrained_dofs);
+
+      // transform indices to global index space
+      for (std::size_t i=0; i<renumbering.size(); ++i)
+        renumbering[i] = vector_partitioner->local_to_global(renumbering[i]);
+
+      AssertDimension (counter, renumbering.size());
+    }
+
+
+
+    std::size_t
+    DoFInfo::memory_consumption () const
+    {
+      std::size_t memory = sizeof(*this);
+      memory += (row_starts.capacity()*sizeof(std_cxx11::array<unsigned int,3>));
+      memory += MemoryConsumption::memory_consumption (dof_indices);
+      memory += MemoryConsumption::memory_consumption (row_starts_plain_indices);
+      memory += MemoryConsumption::memory_consumption (plain_dof_indices);
+      memory += MemoryConsumption::memory_consumption (constraint_indicator);
+      memory += MemoryConsumption::memory_consumption (*vector_partitioner);
+      return memory;
+    }
+
+
+
+    template <typename StreamType>
+    void
+    DoFInfo::print_memory_consumption (StreamType     &out,
+                                       const SizeInfo &size_info) const
+    {
+      out << "       Memory row starts indices:    ";
+      size_info.print_memory_statistics
+      (out, (row_starts.capacity()*sizeof(std_cxx11::array<unsigned int, 3>)));
+      out << "       Memory dof indices:           ";
+      size_info.print_memory_statistics
+      (out, MemoryConsumption::memory_consumption (dof_indices));
+      out << "       Memory constraint indicators: ";
+      size_info.print_memory_statistics
+      (out, MemoryConsumption::memory_consumption (constraint_indicator));
+      out << "       Memory plain indices:         ";
+      size_info.print_memory_statistics
+      (out, MemoryConsumption::memory_consumption (row_starts_plain_indices)+
+       MemoryConsumption::memory_consumption (plain_dof_indices));
+      out << "       Memory vector partitioner:    ";
+      size_info.print_memory_statistics
+      (out, MemoryConsumption::memory_consumption (*vector_partitioner));
+    }
+
+
+
+    template <typename Number>
+    void
+    DoFInfo::print (const std::vector<Number>       &constraint_pool_data,
+                    const std::vector<unsigned int> &constraint_pool_row_index,
+                    std::ostream                    &out) const
+    {
+      const unsigned int n_rows = row_starts.size() - 1;
+      for (unsigned int row=0 ; row<n_rows ; ++row)
+        {
+          out << "Entries row " << row << ": ";
+          const unsigned int *glob_indices = begin_indices(row),
+                              *end_row = end_indices(row);
+          unsigned int index = 0;
+          const std::pair<unsigned short,unsigned short>
+          *con_it = begin_indicators(row),
+           * end_con = end_indicators(row);
+          for ( ; con_it != end_con; ++con_it)
+            {
+              for ( ; index<con_it->first; index++)
+                {
+                  Assert (glob_indices+index != end_row, ExcInternalError());
+                  out << glob_indices[index] << " ";
+                }
+
+              out << "[ ";
+              for (unsigned int k=constraint_pool_row_index[con_it->second];
+                   k<constraint_pool_row_index[con_it->second+1];
+                   k++,index++)
+                {
+                  Assert (glob_indices+index != end_row, ExcInternalError());
+                  out << glob_indices[index] << "/"
+                      << constraint_pool_data[k];
+                  if (k<constraint_pool_row_index[con_it->second+1]-1)
+                    out << " ";
+                }
+              out << "] ";
+            }
+          glob_indices += index;
+          for (; glob_indices != end_row; ++glob_indices)
+            out << *glob_indices << " ";
+          out << std::endl;
+        }
+    }
+
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/matrix_free/fe_evaluation.h b/include/deal.II/matrix_free/fe_evaluation.h
new file mode 100644
index 0000000..b2c52d6
--- /dev/null
+++ b/include/deal.II/matrix_free/fe_evaluation.h
@@ -0,0 +1,6757 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_fe_evaluation_h
+#define dealii__matrix_free_fe_evaluation_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/symmetric_tensor.h>
+#include <deal.II/base/vectorization.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/matrix_free/matrix_free.h>
+#include <deal.II/matrix_free/shape_info.h>
+#include <deal.II/matrix_free/mapping_data_on_the_fly.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+// forward declarations
+namespace parallel
+{
+  namespace distributed
+  {
+    template <typename> class Vector;
+  }
+}
+namespace internal
+{
+  DeclException0 (ExcAccessToUninitializedField);
+}
+
+template <int dim, int fe_degree, int n_q_points_1d = fe_degree+1,
+          int n_components_ = 1, typename Number = double > class FEEvaluation;
+
+
+/**
+ * This is the base class for the FEEvaluation classes. This class is a base
+ * class and needs usually not be called in user code. It does not have any
+ * public constructor. The usage is through the class FEEvaluation instead. It
+ * implements a reinit method that is used to set pointers so that operations
+ * on quadrature points can be performed quickly, access functions to vectors
+ * for the @p read_dof_values, @p set_dof_values, and @p
+ * distributed_local_to_global functions, as well as methods to access values
+ * and gradients of finite element functions.
+ *
+ * This class has three template arguments:
+ *
+ * @param dim Dimension in which this class is to be used
+ *
+ * @param n_components Number of vector components when solving a system of
+ * PDEs. If the same operation is applied to several components of a PDE (e.g.
+ * a vector Laplace equation), they can be applied simultaneously with one
+ * call (and often more efficiently)
+ *
+ * @param Number Number format, usually @p double or @p float
+ *
+ * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+ */
+template <int dim, int n_components_, typename Number>
+class FEEvaluationBase
+{
+public:
+  typedef Number                            number_type;
+  typedef Tensor<1,n_components_,VectorizedArray<Number> > value_type;
+  typedef Tensor<1,n_components_,Tensor<1,dim,VectorizedArray<Number> > > gradient_type;
+  static const unsigned int dimension     = dim;
+  static const unsigned int n_components  = n_components_;
+
+  /**
+   * @name 1: General operations
+   */
+  //@{
+  /**
+   * Initializes the operation pointer to the current cell. Unlike the reinit
+   * functions taking a cell iterator as argument below and the
+   * FEValues::reinit() methods, where the information related to a particular
+   * cell is generated in the reinit call, this function is very cheap since
+   * all data is pre-computed in @p matrix_free, and only a few indices have
+   * to be set appropriately.
+   */
+  void reinit (const unsigned int cell);
+
+  /**
+   * Initialize the data to the current cell using a TriaIterator object as
+   * usual in FEValues. The argument is either of type
+   * DoFHandler::active_cell_iterator or DoFHandler::level_cell_iterator. This
+   * option is only available if the FEEvaluation object was created with a
+   * finite element, quadrature formula and correct update flags and
+   * <b>without</b> a MatrixFree object. This initialization method loses the
+   * ability to use vectorization, see also the description of the
+   * FEEvaluation class. When this reinit method is used, FEEvaluation can
+   * also read from vectors (but less efficient than with data coming from
+   * MatrixFree).
+   */
+  template <typename DoFHandlerType, bool level_dof_access>
+  void reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> > &cell);
+
+  /**
+   * Initialize the data to the current cell using a TriaIterator object as
+   * usual in FEValues. This option is only available if the FEEvaluation
+   * object was created with a finite element, quadrature formula and correct
+   * update flags and <b>without</b> a MatrixFree object. This initialization
+   * method loses the ability to use vectorization, see also the description
+   * of the FEEvaluation class. When this reinit method is used, FEEvaluation
+   * can <b>not</b> read from vectors because no DoFHandler information is
+   * available.
+   */
+  void reinit (const typename Triangulation<dim>::cell_iterator &cell);
+
+  /**
+   * For the transformation information stored in MappingInfo, this function
+   * returns the index which belongs to the current cell as specified in @p
+   * reinit. Note that MappingInfo has different fields for Cartesian cells,
+   * cells with affine mapping and with general mappings, so in order to
+   * access the correct data, this interface must be used together with
+   * get_cell_type.
+   */
+  unsigned int get_cell_data_number() const;
+
+  /**
+   * Returns the type of the cell the @p reinit function has been called for.
+   * Valid values are @p cartesian for Cartesian cells (which allows for
+   * considerable data compression), @p affine for cells with affine mappings,
+   * and @p general for general cells without any compressed storage applied.
+   */
+  internal::MatrixFreeFunctions::CellType get_cell_type() const;
+
+  /**
+   * Returns a reference to the ShapeInfo object currently in use.
+   */
+  const internal::MatrixFreeFunctions::ShapeInfo<Number> &
+  get_shape_info() const;
+
+  /**
+   * Fills the JxW values currently used.
+   */
+  void
+  fill_JxW_values(AlignedVector<VectorizedArray<Number> > &JxW_values) const;
+
+  //@}
+
+  /**
+   * @name 2: Reading from and writing to vectors
+   */
+  //@{
+  /**
+   * For the vector @p src, read out the values on the degrees of freedom of
+   * the current cell, and store them internally. Similar functionality as the
+   * function DoFAccessor::get_interpolated_dof_values when no constraints are
+   * present, but it also includes constraints from hanging nodes, so one can
+   * see it as a similar function to ConstraintMatrix::read_dof_values as
+   * well. Note that if vectorization is enabled, the DoF values for several
+   * cells are set.
+   *
+   * If some constraints on the vector are inhomogeneous, use the function
+   * read_dof_values_plain instead and provide the vector with useful data
+   * also in constrained positions by calling ConstraintMatrix::distribute.
+   * When accessing vector entries during the solution of linear systems, the
+   * temporary solution should always have homogeneous constraints and this
+   * method is the correct one.
+   *
+   * If this class was constructed without a MatrixFree object and the
+   * information is acquired on the fly through a
+   * DoFHandler<dim>::cell_iterator, only one single cell is used by this
+   * class and this function extracts the values of the underlying components
+   * on the given cell. This call is slower than the ones done through a
+   * MatrixFree object and lead to a structure that does not effectively use
+   * vectorization in the evaluate routines based on these values (instead,
+   * VectorizedArray::n_array_elements same copies are worked on).
+   */
+  template <typename VectorType>
+  void read_dof_values (const VectorType &src);
+
+  /**
+   * For a collection of several vector @p src, read out the values on the
+   * degrees of freedom of the current cell for @p n_components (template
+   * argument), starting at @p first_index, and store them internally. Similar
+   * functionality as the function ConstraintMatrix::read_dof_values.  Note
+   * that if vectorization is enabled, the DoF values for several cells are
+   * set.
+   */
+  template <typename VectorType>
+  void read_dof_values (const std::vector<VectorType> &src,
+                        const unsigned int             first_index=0);
+
+  /**
+   * Reads data from several vectors. Same as other function with std::vector,
+   * but accepts a vector of pointers to vectors.
+   */
+  template <typename VectorType>
+  void read_dof_values (const std::vector<VectorType *> &src,
+                        const unsigned int              first_index=0);
+
+  /**
+   * For the vector @p src, read out the values on the degrees of freedom of
+   * the current cell, and store them internally. Similar functionality as the
+   * function DoFAccessor::get_interpolated_dof_values. As opposed to the
+   * read_dof_values function, this function reads out the plain entries from
+   * vectors, without taking stored constraints into account. This way of
+   * access is appropriate when the constraints have been distributed on the
+   * vector by a call to ConstraintMatrix::distribute previously. This
+   * function is also necessary when inhomogeneous constraints are to be used,
+   * as MatrixFree can only handle homogeneous constraints. Note that if
+   * vectorization is enabled, the DoF values for several cells are set.
+   *
+   * If this class was constructed without a MatrixFree object and the
+   * information is acquired on the fly through a
+   * DoFHandler<dim>::cell_iterator, only one single cell is used by this
+   * class and this function extracts the values of the underlying components
+   * on the given cell. This call is slower than the ones done through a
+   * MatrixFree object and lead to a structure that does not effectively use
+   * vectorization in the evaluate routines based on these values (instead,
+   * VectorizedArray::n_array_elements same copies are worked on).
+   */
+  template <typename VectorType>
+  void read_dof_values_plain (const VectorType &src);
+
+  /**
+   * For a collection of several vector @p src, read out the values on the
+   * degrees of freedom of the current cell for @p n_components (template
+   * argument), starting at @p first_index, and store them internally. Similar
+   * functionality as the function DoFAccessor::read_dof_values.  Note that if
+   * vectorization is enabled, the DoF values for several cells are set.
+   */
+  template <typename VectorType>
+  void read_dof_values_plain (const std::vector<VectorType> &src,
+                              const unsigned int             first_index=0);
+
+  /**
+   * Reads data from several vectors without resolving constraints. Same as
+   * other function with std::vector, but accepts a vector of pointers to
+   * vectors.
+   */
+  template <typename VectorType>
+  void read_dof_values_plain (const std::vector<VectorType *> &src,
+                              const unsigned int              first_index=0);
+
+  /**
+   * Takes the values stored internally on dof values of the current cell and
+   * sums them into the vector @p dst. The function also applies constraints
+   * during the write operation. The functionality is hence similar to the
+   * function ConstraintMatrix::distribute_local_to_global. If vectorization
+   * is enabled, the DoF values for several cells are used.
+   *
+   * If this class was constructed without a MatrixFree object and the
+   * information is acquired on the fly through a
+   * DoFHandler<dim>::cell_iterator, only one single cell is used by this
+   * class and this function extracts the values of the underlying components
+   * on the given cell. This call is slower than the ones done through a
+   * MatrixFree object and lead to a structure that does not effectively use
+   * vectorization in the evaluate routines based on these values (instead,
+   * VectorizedArray::n_array_elements same copies are worked on).
+   */
+  template<typename VectorType>
+  void distribute_local_to_global (VectorType &dst) const;
+
+  /**
+   * Takes the values stored internally on dof values of the current cell for
+   * a vector-valued problem consisting of @p n_components (template argument)
+   * and sums them into the collection of vectors vector @p dst, starting at
+   * index @p first_index. The function also applies constraints during the
+   * write operation. The functionality is hence similar to the function
+   * ConstraintMatrix::distribute_local_to_global. If vectorization is
+   * enabled, the DoF values for several cells are used.
+   */
+  template<typename VectorType>
+  void distribute_local_to_global (std::vector<VectorType> &dst,
+                                   const unsigned int       first_index=0) const;
+
+  /**
+   * Writes data to several vectors. Same as other function with std::vector,
+   * but accepts a vector of pointers to vectors.
+   */
+  template<typename VectorType>
+  void distribute_local_to_global (std::vector<VectorType *> &dst,
+                                   const unsigned int       first_index=0) const;
+
+  /**
+   * Takes the values stored internally on dof values of the current cell and
+   * sums them into the vector @p dst. The function also applies constraints
+   * during the write operation. The functionality is hence similar to the
+   * function ConstraintMatrix::distribute_local_to_global.  Note that if
+   * vectorization is enabled, the DoF values for several cells are used.
+   *
+   * If this class was constructed without a MatrixFree object and the
+   * information is acquired on the fly through a
+   * DoFHandler<dim>::cell_iterator, only one single cell is used by this
+   * class and this function extracts the values of the underlying components
+   * on the given cell. This call is slower than the ones done through a
+   * MatrixFree object and lead to a structure that does not effectively use
+   * vectorization in the evaluate routines based on these values (instead,
+   * VectorizedArray::n_array_elements same copies are worked on).
+   */
+  template<typename VectorType>
+  void set_dof_values (VectorType &dst) const;
+
+  /**
+   * Takes the values stored internally on dof values of the current cell for
+   * a vector-valued problem consisting of @p n_components (template argument)
+   * and sums them into the collection of vectors vector @p dst, starting at
+   * index @p first_index. The function also applies constraints during the
+   * write operation. The functionality is hence similar to the function
+   * ConstraintMatrix::distribute_local_to_global.  Note that if vectorization
+   * is enabled, the DoF values for several cells are used.
+   */
+  template<typename VectorType>
+  void set_dof_values (std::vector<VectorType> &dst,
+                       const unsigned int       first_index=0) const;
+
+  /**
+   * Writes data to several vectors. Same as other function with std::vector,
+   * but accepts a vector of pointers to vectors.
+   */
+  template<typename VectorType>
+  void set_dof_values (std::vector<VectorType *> &dst,
+                       const unsigned int        first_index=0) const;
+
+  //@}
+
+  /**
+   * @name 3: Data access
+   */
+  //@{
+  /**
+   * Returns the value stored for the local degree of freedom with index @p
+   * dof. If the object is vector-valued, a vector-valued return argument is
+   * given. Note that when vectorization is enabled, values from several cells
+   * are grouped together. If @p set_dof_values was called last, the value
+   * corresponds to the one set there. If @p integrate was called last, it
+   * instead corresponds to the value of the integrated function with the test
+   * function of the given index.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  value_type get_dof_value (const unsigned int dof) const;
+
+  /**
+   * Write a value to the field containing the degrees of freedom with
+   * component @p dof. Writes to the same field as is accessed through @p
+   * get_dof_value. Therefore, the original data that was read from a vector
+   * is overwritten as soon as a value is submitted.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  void submit_dof_value (const value_type   val_in,
+                         const unsigned int dof);
+
+  /**
+   * Returns the value of a finite element function at quadrature point number
+   * @p q_point after a call to @p evaluate(true,...), or the value that has
+   * been stored there with a call to @p submit_value. If the object is
+   * vector-valued, a vector-valued return argument is given. Note that when
+   * vectorization is enabled, values from several cells are grouped together.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  value_type get_value (const unsigned int q_point) const;
+
+  /**
+   * Write a value to the field containing the values on quadrature points
+   * with component @p q_point. Access to the same field as through @p
+   * get_value. If applied before the function @p integrate(true,...) is
+   * called, this specifies the value which is tested by all basis function on
+   * the current cell and integrated over.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  void submit_value (const value_type   val_in,
+                     const unsigned int q_point);
+
+  /**
+   * Returns the gradient of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true,...), or the value
+   * that has been stored there with a call to @p submit_gradient.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  gradient_type get_gradient (const unsigned int q_point) const;
+
+  /**
+   * Write a contribution that is tested by the gradient to the field
+   * containing the values on quadrature points with component @p q_point.
+   * Access to the same field as through @p get_gradient. If applied before
+   * the function @p integrate(...,true) is called, this specifies what is
+   * tested by all basis function gradients on the current cell and integrated
+   * over.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  void submit_gradient(const gradient_type grad_in,
+                       const unsigned int  q_point);
+
+  /**
+   * Returns the Hessian of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true). If only the
+   * diagonal or even the trace of the Hessian, the Laplacian, is needed, use
+   * the other functions below.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  Tensor<1,n_components_,Tensor<2,dim,VectorizedArray<Number> > >
+  get_hessian (const unsigned int q_point) const;
+
+  /**
+   * Returns the diagonal of the Hessian of a finite element function at
+   * quadrature point number @p q_point after a call to @p evaluate(...,true).
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  gradient_type get_hessian_diagonal (const unsigned int q_point) const;
+
+  /**
+   * Returns the Laplacian (i.e., the trace of the Hessian) of a finite
+   * element function at quadrature point number @p q_point after a call to @p
+   * evaluate(...,true). Compared to the case when computing the full Hessian,
+   * some operations can be saved when only the Laplacian is requested.
+   *
+   * Note that the derived class FEEvaluationAccess overloads this operation
+   * with specializations for the scalar case (n_components == 1) and for the
+   * vector-valued case (n_components == dim).
+   */
+  value_type get_laplacian (const unsigned int q_point) const;
+
+  /**
+   * Takes values on quadrature points, multiplies by the Jacobian determinant
+   * and quadrature weights (JxW) and sums the values for all quadrature
+   * points on the cell. The result is a scalar, representing the integral
+   * over the function over the cell. If a vector-element is used, the
+   * resulting components are still separated. Moreover, if vectorization is
+   * enabled, the integral values of several cells are represented together.
+   */
+  value_type integrate_value () const;
+
+  //@}
+
+  /**
+   * @name 4: Access to internal data
+   */
+  //@{
+  /**
+   * Returns a read-only pointer to the first field of the dof values. This is
+   * the data field the read_dof_values() functions write into. First come the
+   * the dof values for the first component, then all values for the second
+   * component, and so on. This is related to the internal data structures
+   * used in this class. In general, it is safer to use the get_dof_value()
+   * function instead.
+   */
+  const VectorizedArray<Number> *begin_dof_values () const;
+
+  /**
+   * Returns a read and write pointer to the first field of the dof values.
+   * This is the data field the read_dof_values() functions write into. First
+   * come the the dof values for the first component, then all values for the
+   * second component, and so on. This is related to the internal data
+   * structures used in this class. In general, it is safer to use the
+   * get_dof_value() function instead.
+   */
+  VectorizedArray<Number> *begin_dof_values ();
+
+  /**
+   * Returns a read-only pointer to the first field of function values on
+   * quadrature points. First come the function values on all quadrature
+   * points for the first component, then all values for the second component,
+   * and so on. This is related to the internal data structures used in this
+   * class. The raw data after a call to @p evaluate only contains unit cell
+   * operations, so possible transformations, quadrature weights etc. must be
+   * applied manually. In general, it is safer to use the get_value() function
+   * instead, which does all the transformation internally.
+   */
+  const VectorizedArray<Number> *begin_values () const;
+
+  /**
+   * Returns a read and write pointer to the first field of function values on
+   * quadrature points. First come the function values on all quadrature
+   * points for the first component, then all values for the second component,
+   * and so on. This is related to the internal data structures used in this
+   * class. The raw data after a call to @p evaluate only contains unit cell
+   * operations, so possible transformations, quadrature weights etc. must be
+   * applied manually. In general, it is safer to use the get_value() function
+   * instead, which does all the transformation internally.
+   */
+  VectorizedArray<Number> *begin_values ();
+
+  /**
+   * Returns a read-only pointer to the first field of function gradients on
+   * quadrature points. First comes the x-component of the gradient for the
+   * first component on all quadrature points, then the y-component, and so
+   * on. Next comes the x-component of the second component, and so on. This
+   * is related to the internal data structures used in this class. The raw
+   * data after a call to @p evaluate only contains unit cell operations, so
+   * possible transformations, quadrature weights etc. must be applied
+   * manually. In general, it is safer to use the get_gradient() function
+   * instead, which does all the transformation internally.
+   */
+  const VectorizedArray<Number> *begin_gradients () const;
+
+  /**
+   * Returns a read and write pointer to the first field of function gradients
+   * on quadrature points. First comes the x-component of the gradient for the
+   * first component on all quadrature points, then the y-component, and so
+   * on. Next comes the x-component of the second component, and so on. This
+   * is related to the internal data structures used in this class. The raw
+   * data after a call to @p evaluate only contains unit cell operations, so
+   * possible transformations, quadrature weights etc. must be applied
+   * manually. In general, it is safer to use the get_gradient() function
+   * instead, which does all the transformation internally.
+   */
+  VectorizedArray<Number> *begin_gradients ();
+
+  /**
+   * Returns a read-only pointer to the first field of function hessians on
+   * quadrature points. First comes the xx-component of the hessian for the
+   * first component on all quadrature points, then the yy-component, zz-
+   * component in (3D), then the xy-component, and so on. Next comes the xx-
+   * component of the second component, and so on. This is related to the
+   * internal data structures used in this class. The raw data after a call to
+   * @p evaluate only contains unit cell operations, so possible
+   * transformations, quadrature weights etc. must be applied manually. In
+   * general, it is safer to use the get_laplacian() or get_hessian()
+   * functions instead, which does all the transformation internally.
+   */
+  const VectorizedArray<Number> *begin_hessians () const;
+
+  /**
+   * Returns a read and write pointer to the first field of function hessians
+   * on quadrature points. First comes the xx-component of the hessian for the
+   * first component on all quadrature points, then the yy-component, zz-
+   * component in (3D), then the xy-component, and so on. Next comes the xx-
+   * component of the second component, and so on. This is related to the
+   * internal data structures used in this class. The raw data after a call to
+   * @p evaluate only contains unit cell operations, so possible
+   * transformations, quadrature weights etc. must be applied manually. In
+   * general, it is safer to use the get_laplacian() or get_hessian()
+   * functions instead, which does all the transformation internally.
+   */
+  VectorizedArray<Number> *begin_hessians ();
+
+  /**
+   * Returns the numbering of local degrees of freedom within the evaluation
+   * routines of FEEvaluation in terms of the standard numbering on finite
+   * elements.
+   */
+  const std::vector<unsigned int> &
+  get_internal_dof_numbering() const;
+
+  //@}
+
+protected:
+
+  /**
+   * Constructor. Made protected to prevent users from directly using this
+   * class. Takes all data stored in MatrixFree. If applied to problems with
+   * more than one finite element or more than one quadrature formula selected
+   * during construction of @p matrix_free, @p fe_no and @p quad_no allow to
+   * select the appropriate components.
+   */
+  FEEvaluationBase (const MatrixFree<dim,Number> &matrix_free,
+                    const unsigned int            fe_no,
+                    const unsigned int            quad_no,
+                    const unsigned int            fe_degree,
+                    const unsigned int            n_q_points);
+
+  /**
+   * Constructor that comes with reduced functionality and works similar as
+   * FEValues. The arguments are similar to the ones passed to the constructor
+   * of FEValues, with the notable difference that FEEvaluation expects a one-
+   * dimensional quadrature formula, Quadrature<1>, instead of a @p dim
+   * dimensional one. The finite element can be both scalar or vector valued,
+   * but this method always only selects a scalar base element at a time (with
+   * @p n_components copies as specified by the class template argument). For
+   * vector-valued elements, the optional argument @p first_selected_component
+   * allows to specify the index of the base element to be used for
+   * evaluation. Note that the internal data structures always assume that the
+   * base element is primitive, non-primitive are not supported currently.
+   *
+   * As known from FEValues, a call to the reinit method with a
+   * Triangulation::cell_iterator is necessary to make the geometry and
+   * degrees of freedom of the current class known. If the iterator includes
+   * DoFHandler information (i.e., it is a DoFHandler::cell_iterator or
+   * similar), the initialization allows to also read from or write to vectors
+   * in the standard way for DoFHandler::active_cell_iterator types for one
+   * cell at a time. However, this approach is much slower than the path with
+   * MatrixFree with MPI since index translation has to be done. As only one
+   * cell at a time is used, this method does not vectorize over several
+   * elements (which is most efficient for vector operations), but only
+   * possibly within the element if the evaluate/integrate routines are
+   * combined inside user code (e.g. for computing cell matrices).
+   *
+   * The optional FEEvaluationBase object allows several FEEvaluation objects
+   * to share the geometry evaluation, i.e., the underlying mapping and
+   * quadrature points do only need to be evaluated once. This only works if
+   * the quadrature formulas are the same. Otherwise, a new evaluation object
+   * is created. Make sure to not pass an optional object around when you
+   * intend to use the FEEvaluation object in %parallel with another one
+   * because otherwise the intended sharing may create race conditions.
+   */
+  template <int n_components_other>
+  FEEvaluationBase (const Mapping<dim>       &mapping,
+                    const FiniteElement<dim> &fe,
+                    const Quadrature<1>      &quadrature,
+                    const UpdateFlags         update_flags,
+                    const unsigned int        first_selected_component,
+                    const FEEvaluationBase<dim,n_components_other,Number> *other);
+
+  /**
+   * Copy constructor. If FEEvaluationBase was constructed from a mapping, fe,
+   * quadrature, and update flags, the underlying geometry evaluation based on
+   * FEValues will be deep-copied in order to allow for using in parallel with
+   * threads.
+   */
+  FEEvaluationBase (const FEEvaluationBase &other);
+
+  /**
+   * A unified function to read from and write into vectors based on the given
+   * template operation. It can perform the operation for @p read_dof_values,
+   * @p distribute_local_to_global, and @p set_dof_values. It performs the
+   * operation for several vectors at a time.
+   */
+  template<typename VectorType, typename VectorOperation>
+  void read_write_operation (const VectorOperation &operation,
+                             VectorType            *vectors[]) const;
+
+  /**
+   * For a collection of several vector @p src, read out the values on the
+   * degrees of freedom of the current cell for @p n_components (template
+   * argument), and store them internally. Similar functionality as the
+   * function DoFAccessor::read_dof_values. Note that if vectorization is
+   * enabled, the DoF values for several cells are set.
+   */
+  template<typename VectorType>
+  void read_dof_values_plain (const VectorType *src_data[]);
+
+  /**
+   * Internal data fields that store the values. Derived classes will know the
+   * length of all arrays at compile time and allocate the memory on the
+   * stack. This makes it possible to cheaply set up a FEEvaluation object and
+   * write thread-safe programs by letting each thread own a private object of
+   * this type. In this base class, only pointers to the actual data are
+   * stored.
+   *
+   * This field stores the values for local degrees of freedom (e.g. after
+   * reading out from a vector but before applying unit cell transformations
+   * or before distributing them into a result vector). The methods
+   * get_dof_value() and submit_dof_value() read from or write to this field.
+   */
+  VectorizedArray<Number> *values_dofs[n_components];
+
+  /**
+   * This field stores the values of the finite element function on quadrature
+   * points after applying unit cell transformations or before integrating.
+   * The methods get_value() and submit_value() access this field.
+   */
+  VectorizedArray<Number> *values_quad[n_components];
+
+  /**
+   * This field stores the gradients of the finite element function on
+   * quadrature points after applying unit cell transformations or before
+   * integrating. The methods get_gradient() and submit_gradient() (as well as
+   * some specializations like get_symmetric_gradient() or get_divergence())
+   * access this field.
+   */
+  VectorizedArray<Number> *gradients_quad[n_components][dim];
+
+  /**
+   * This field stores the Hessians of the finite element function on
+   * quadrature points after applying unit cell transformations. The methods
+   * get_hessian(), get_laplacian(), get_hessian_diagonal() access this field.
+   */
+  VectorizedArray<Number> *hessians_quad[n_components][(dim*(dim+1))/2];
+
+  /**
+   * Stores the number of the quadrature formula of the present cell.
+   */
+  const unsigned int quad_no;
+
+  /**
+   * Stores the number of components in the finite element as detected in the
+   * MatrixFree storage class for comparison with the template argument.
+   */
+  const unsigned int n_fe_components;
+
+  /**
+   * Stores the active fe index for this class for efficient indexing in the
+   * hp case.
+   */
+  const unsigned int active_fe_index;
+
+  /**
+   * Stores the active quadrature index for this class for efficient indexing
+   * in the hp case.
+   */
+  const unsigned int active_quad_index;
+
+  /**
+   * Stores a pointer to the underlying data.
+   */
+  const MatrixFree<dim,Number> *matrix_info;
+
+  /**
+   * Stores a pointer to the underlying DoF indices and constraint description
+   * for the component specified at construction. Also contained in
+   * matrix_info, but it simplifies code if we store a reference to it.
+   */
+  const internal::MatrixFreeFunctions::DoFInfo *dof_info;
+
+  /**
+   * Stores a pointer to the underlying transformation data from unit to real
+   * cells for the given quadrature formula specified at construction. Also
+   * contained in matrix_info, but it simplifies code if we store a reference
+   * to it.
+   */
+  const internal::MatrixFreeFunctions::MappingInfo<dim,Number> *mapping_info;
+
+  /**
+   * In case the class is initialized from MappingFEEvaluation instead of
+   * MatrixFree, this data structure holds the evaluated shape data.
+   */
+  std_cxx11::shared_ptr<internal::MatrixFreeFunctions::ShapeInfo<Number> > stored_shape_info;
+
+  /**
+   * Stores a pointer to the unit cell shape data, i.e., values, gradients and
+   * Hessians in 1D at the quadrature points that constitute the tensor
+   * product. Also contained in matrix_info, but it simplifies code if we
+   * store a reference to it.
+   */
+  const internal::MatrixFreeFunctions::ShapeInfo<Number> *data;
+
+  /**
+   * A pointer to the Cartesian Jacobian information of the present cell. Only
+   * set to a useful value if on a Cartesian cell, otherwise zero.
+   */
+  const Tensor<1,dim,VectorizedArray<Number> > *cartesian_data;
+
+  /**
+   * A pointer to the Jacobian information of the present cell. Only set to a
+   * useful value if on a non-Cartesian cell.
+   */
+  const Tensor<2,dim,VectorizedArray<Number> > *jacobian;
+
+  /**
+   * A pointer to the Jacobian determinant of the present cell. If on a
+   * Cartesian cell or on a cell with constant Jacobian, this is just the
+   * Jacobian determinant, otherwise the Jacobian determinant times the
+   * quadrature weight.
+   */
+  const VectorizedArray<Number> *J_value;
+
+  /**
+   * A pointer to the quadrature weights of the underlying quadrature formula.
+   */
+  const VectorizedArray<Number> *quadrature_weights;
+
+  /**
+   * A pointer to the quadrature points on the present cell.
+   */
+  const Point<dim,VectorizedArray<Number> > *quadrature_points;
+
+  /**
+   * A pointer to the diagonal part of the Jacobian gradient on the present
+   * cell. Only set to a useful value if on a general cell with non-constant
+   * Jacobian.
+   */
+  const Tensor<2,dim,VectorizedArray<Number> > *jacobian_grad;
+
+  /**
+   * A pointer to the upper diagonal part of the Jacobian gradient on the
+   * present cell. Only set to a useful value if on a general cell with non-
+   * constant Jacobian.
+   */
+  const Tensor<1,(dim>1?dim*(dim-1)/2:1),Tensor<1,dim,VectorizedArray<Number> > > * jacobian_grad_upper;
+
+  /**
+   * After a call to reinit(), stores the number of the cell we are currently
+   * working with.
+   */
+  unsigned int cell;
+
+  /**
+   * Stores the type of the cell we are currently working with after a call to
+   * reinit(). Valid values are @p cartesian, @p affine and @p general, which
+   * have different implications on how the Jacobian transformations are
+   * stored internally in MappingInfo.
+   */
+  internal::MatrixFreeFunctions::CellType cell_type;
+
+  /**
+   * The stride to access the correct data in MappingInfo.
+   */
+  unsigned int cell_data_number;
+
+  /**
+   * Debug information to track whether dof values have been initialized
+   * before accessed. Used to control exceptions when uninitialized data is
+   * used.
+   */
+  bool dof_values_initialized;
+
+  /**
+   * Debug information to track whether values on quadrature points have been
+   * initialized before accessed. Used to control exceptions when
+   * uninitialized data is used.
+   */
+  bool values_quad_initialized;
+
+  /**
+   * Debug information to track whether gradients on quadrature points have
+   * been initialized before accessed. Used to control exceptions when
+   * uninitialized data is used.
+   */
+  bool gradients_quad_initialized;
+
+  /**
+   * Debug information to track whether Hessians on quadrature points have
+   * been initialized before accessed. Used to control exceptions when
+   * uninitialized data is used.
+   */
+  bool hessians_quad_initialized;
+
+  /**
+   * Debug information to track whether values on quadrature points have been
+   * submitted for integration before the integration is actually stared. Used
+   * to control exceptions when uninitialized data is used.
+   */
+  bool values_quad_submitted;
+
+  /**
+   * Debug information to track whether gradients on quadrature points have
+   * been submitted for integration before the integration is actually stared.
+   * Used to control exceptions when uninitialized data is used.
+   */
+  bool gradients_quad_submitted;
+
+  /**
+   * Geometry data that can be generated FEValues on the fly with the
+   * respective constructor.
+   */
+  std_cxx1x::shared_ptr<internal::MatrixFreeFunctions::MappingDataOnTheFly<dim,Number> > mapped_geometry;
+
+  /**
+   * For use with on-the-fly evaluation, provide a data structure to store the
+   * global dof indices on the current cell from a reinit call.
+   */
+  std::vector<types::global_dof_index> old_style_dof_indices;
+
+  /**
+   * For a FiniteElement with more than one finite element, select at which
+   * component this data structure should start.
+   */
+  const unsigned int first_selected_component;
+
+  /**
+   * A temporary data structure necessary to read degrees of freedom when no
+   * MatrixFree object was given at initialization.
+   */
+  mutable std::vector<types::global_dof_index> local_dof_indices;
+
+  /**
+   * Make other FEEvaluationBase as well as FEEvaluation objects friends.
+   */
+  template <int, int, typename> friend class FEEvaluationBase;
+  template <int, int, int, int, typename> friend class FEEvaluation;
+};
+
+
+
+/**
+ * This class provides access to the data fields of the FEEvaluation classes.
+ * Generic access is achieved through the base class, and specializations for
+ * scalar and vector-valued elements are defined separately.
+ *
+ * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+ */
+template <int dim, int n_components_, typename Number>
+class FEEvaluationAccess : public FEEvaluationBase<dim,n_components_,Number>
+{
+public:
+  typedef Number                            number_type;
+  typedef Tensor<1,n_components_,VectorizedArray<Number> > value_type;
+  typedef Tensor<1,n_components_,Tensor<1,dim,VectorizedArray<Number> > > gradient_type;
+  static const unsigned int dimension     = dim;
+  static const unsigned int n_components  = n_components_;
+  typedef FEEvaluationBase<dim,n_components_, Number> BaseClass;
+
+protected:
+  /**
+   * Constructor. Made protected to prevent initialization in user code. Takes
+   * all data stored in MatrixFree. If applied to problems with more than one
+   * finite element or more than one quadrature formula selected during
+   * construction of @p matrix_free, @p fe_no and @p quad_no allow to select
+   * the appropriate components.
+   */
+  FEEvaluationAccess (const MatrixFree<dim,Number> &matrix_free,
+                      const unsigned int            fe_no,
+                      const unsigned int            quad_no,
+                      const unsigned int            fe_degree,
+                      const unsigned int            n_q_points);
+
+  /**
+   * Constructor with reduced functionality for similar usage of FEEvaluation
+   * as FEValues, including matrix assembly.
+   */
+  template <int n_components_other>
+  FEEvaluationAccess (const Mapping<dim>       &mapping,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<1>      &quadrature,
+                      const UpdateFlags         update_flags,
+                      const unsigned int        first_selected_component,
+                      const FEEvaluationBase<dim,n_components_other,Number> *other);
+
+  /**
+   * Copy constructor
+   */
+  FEEvaluationAccess (const FEEvaluationAccess &other);
+};
+
+
+
+
+/**
+ * This class provides access to the data fields of the FEEvaluation classes.
+ * Partial specialization for scalar fields that defines access with simple
+ * data fields, i.e., scalars for the values and Tensor<1,dim> for the
+ * gradients.
+ *
+ * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+ */
+template <int dim, typename Number>
+class FEEvaluationAccess<dim,1,Number> : public FEEvaluationBase<dim,1,Number>
+{
+public:
+  typedef Number                                 number_type;
+  typedef VectorizedArray<Number>                value_type;
+  typedef Tensor<1,dim,VectorizedArray<Number> > gradient_type;
+  static const unsigned int dimension          = dim;
+  typedef FEEvaluationBase<dim,1,Number>         BaseClass;
+
+  /**
+   * Returns the value stored for the local degree of freedom with index @p
+   * dof. If the object is vector-valued, a vector-valued return argument is
+   * given. Note that when vectorization is enabled, values from several cells
+   * are grouped together. If @p set_dof_values was called last, the value
+   * corresponds to the one set there. If @p integrate was called last, it
+   * instead corresponds to the value of the integrated function with the test
+   * function of the given index.
+   */
+  value_type get_dof_value (const unsigned int dof) const;
+
+  /**
+   * Write a value to the field containing the degrees of freedom with
+   * component @p dof. Access to the same field as through @p get_dof_value.
+   */
+  void submit_dof_value (const value_type   val_in,
+                         const unsigned int dof);
+
+  /**
+   * Returns the value of a finite element function at quadrature point number
+   * @p q_point after a call to @p evaluate(true,...), or the value that has
+   * been stored there with a call to @p submit_value. If the object is
+   * vector-valued, a vector-valued return argument is given. Note that when
+   * vectorization is enabled, values from several cells are grouped together.
+   */
+  value_type get_value (const unsigned int q_point) const;
+
+  /**
+   * Write a value to the field containing the values on quadrature points
+   * with component @p q_point. Access to the same field as through @p
+   * get_value. If applied before the function @p integrate(true,...) is
+   * called, this specifies the value which is tested by all basis function on
+   * the current cell and integrated over.
+   */
+  void submit_value (const value_type   val_in,
+                     const unsigned int q_point);
+
+  /**
+   * Returns the gradient of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true,...), or the value
+   * that has been stored there with a call to @p submit_gradient.
+   */
+  gradient_type get_gradient (const unsigned int q_point) const;
+
+  /**
+   * Write a contribution that is tested by the gradient to the field
+   * containing the values on quadrature points with component @p q_point.
+   * Access to the same field as through @p get_gradient. If applied before
+   * the function @p integrate(...,true) is called, this specifies what is
+   * tested by all basis function gradients on the current cell and integrated
+   * over.
+   */
+  void submit_gradient(const gradient_type grad_in,
+                       const unsigned int  q_point);
+
+  /**
+   * Returns the Hessian of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true). If only the
+   * diagonal part of the Hessian or its trace, the Laplacian, are needed, use
+   * the respective functions below.
+   */
+  Tensor<2,dim,VectorizedArray<Number> >
+  get_hessian (unsigned int q_point) const;
+
+  /**
+   * Returns the diagonal of the Hessian of a finite element function at
+   * quadrature point number @p q_point after a call to @p evaluate(...,true).
+   */
+  gradient_type get_hessian_diagonal (const unsigned int q_point) const;
+
+  /**
+   * Returns the Laplacian of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true).
+   */
+  value_type get_laplacian (const unsigned int q_point) const;
+
+  /**
+   * Takes values on quadrature points, multiplies by the Jacobian determinant
+   * and quadrature weights (JxW) and sums the values for all quadrature
+   * points on the cell. The result is a scalar, representing the integral
+   * over the function over the cell. If a vector-element is used, the
+   * resulting components are still separated. Moreover, if vectorization is
+   * enabled, the integral values of several cells are represented together.
+   */
+  value_type integrate_value () const;
+
+protected:
+  /**
+   * Constructor. Made protected to avoid initialization in user code. Takes
+   * all data stored in MatrixFree. If applied to problems with more than one
+   * finite element or more than one quadrature formula selected during
+   * construction of @p matrix_free, @p fe_no and @p quad_no allow to select
+   * the appropriate components.
+   */
+  FEEvaluationAccess (const MatrixFree<dim,Number> &matrix_free,
+                      const unsigned int            fe_no,
+                      const unsigned int            quad_no,
+                      const unsigned int            fe_degree,
+                      const unsigned int            n_q_points);
+
+  /**
+   * Constructor with reduced functionality for similar usage of FEEvaluation
+   * as FEValues, including matrix assembly.
+   */
+  template <int n_components_other>
+  FEEvaluationAccess (const Mapping<dim>       &mapping,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<1>      &quadrature,
+                      const UpdateFlags         update_flags,
+                      const unsigned int        first_selected_component,
+                      const FEEvaluationBase<dim,n_components_other,Number> *other);
+
+  /**
+   * Copy constructor
+   */
+  FEEvaluationAccess (const FEEvaluationAccess &other);
+};
+
+
+
+/**
+ * This class provides access to the data fields of the FEEvaluation classes.
+ * Partial specialization for fields with as many components as the underlying
+ * space dimension, i.e., values are of type Tensor<1,dim> and gradients of
+ * type Tensor<2,dim>. Provides some additional functions for access, like the
+ * symmetric gradient and divergence.
+ *
+ * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+ */
+template <int dim, typename Number>
+class FEEvaluationAccess<dim,dim,Number> : public FEEvaluationBase<dim,dim,Number>
+{
+public:
+  typedef Number                            number_type;
+  typedef Tensor<1,dim,VectorizedArray<Number> > value_type;
+  typedef Tensor<2,dim,VectorizedArray<Number> > gradient_type;
+  static const unsigned int dimension     = dim;
+  static const unsigned int n_components  = dim;
+  typedef FEEvaluationBase<dim,dim,Number> BaseClass;
+
+  /**
+   * Returns the gradient of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true,...).
+   */
+  gradient_type get_gradient (const unsigned int q_point) const;
+
+  /**
+   * Returns the divergence of a vector-valued finite element at quadrature
+   * point number @p q_point after a call to @p evaluate(...,true,...).
+   */
+  VectorizedArray<Number> get_divergence (const unsigned int q_point) const;
+
+  /**
+   * Returns the symmetric gradient of a vector-valued finite element at
+   * quadrature point number @p q_point after a call to @p
+   * evaluate(...,true,...). It corresponds to <tt>0.5
+   * (grad+grad<sup>T</sup>)</tt>.
+   */
+  SymmetricTensor<2,dim,VectorizedArray<Number> >
+  get_symmetric_gradient (const unsigned int q_point) const;
+
+  /**
+   * Returns the curl of the vector field, $nabla \times v$ after a call to @p
+   * evaluate(...,true,...).
+   */
+  Tensor<1,(dim==2?1:dim),VectorizedArray<Number> >
+  get_curl (const unsigned int q_point) const;
+
+  /**
+   * Returns the Hessian of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true). If only the
+   * diagonal of the Hessian or its trace, the Laplacian, is needed, use the
+   * respective functions.
+   */
+  Tensor<3,dim,VectorizedArray<Number> >
+  get_hessian (const unsigned int q_point) const;
+
+  /**
+   * Returns the diagonal of the Hessian of a finite element function at
+   * quadrature point number @p q_point after a call to @p evaluate(...,true).
+   */
+  gradient_type get_hessian_diagonal (const unsigned int q_point) const;
+
+  /**
+   * Write a contribution that is tested by the gradient to the field
+   * containing the values on quadrature points with component @p q_point.
+   * Access to the same field as through @p get_gradient. If applied before
+   * the function @p integrate(...,true) is called, this specifies what is
+   * tested by all basis function gradients on the current cell and integrated
+   * over.
+   */
+  void submit_gradient(const gradient_type grad_in,
+                       const unsigned int  q_point);
+
+  /**
+   * Write a contribution that is tested by the gradient to the field
+   * containing the values on quadrature points with component @p q_point.
+   * This function is an alternative to the other submit_gradient function
+   * when using a system of fixed number of equations which happens to
+   * coincide with the dimension for some dimensions, but not all. To allow
+   * for dimension-independent programming, this function can be used instead.
+   */
+  void submit_gradient(const Tensor<1,dim,Tensor<1,dim,VectorizedArray<Number> > > grad_in,
+                       const unsigned int q_point);
+
+  /**
+   * Write a contribution that is tested by the divergence to the field
+   * containing the values on quadrature points with component @p q_point.
+   * Access to the same field as through @p get_gradient. If applied before
+   * the function @p integrate(...,true) is called, this specifies what is
+   * tested by all basis function gradients on the current cell and integrated
+   * over.
+   */
+  void submit_divergence (const VectorizedArray<Number> div_in,
+                          const unsigned int q_point);
+
+  /**
+   * Write a contribution that is tested by the gradient to the field
+   * containing the values on quadrature points with component @p q_point.
+   * Access to the same field as through @p get_gradient. If applied before
+   * the function @p integrate(...,true) is called, this specifies the
+   * gradient which is tested by all basis function gradients on the current
+   * cell and integrated over.
+   */
+  void submit_symmetric_gradient(const SymmetricTensor<2,dim,VectorizedArray<Number> > grad_in,
+                                 const unsigned int      q_point);
+
+  /**
+   * Write the components of a curl containing the values on quadrature point
+   * @p q_point. Access to the same data field as through @p get_gradient.
+   */
+  void submit_curl (const Tensor<1,dim==2?1:dim,VectorizedArray<Number> > curl_in,
+                    const unsigned int q_point);
+
+protected:
+  /**
+   * Constructor. Made protected to avoid initialization in user code. Takes
+   * all data stored in MatrixFree. If applied to problems with more than one
+   * finite element or more than one quadrature formula selected during
+   * construction of @p matrix_free, @p fe_no and @p quad_no allow to select
+   * the appropriate components.
+   */
+  FEEvaluationAccess (const MatrixFree<dim,Number> &matrix_free,
+                      const unsigned int            fe_no,
+                      const unsigned int            quad_no,
+                      const unsigned int            dofs_per_cell,
+                      const unsigned int            n_q_points);
+
+  /**
+   * Constructor with reduced functionality for similar usage of FEEvaluation
+   * as FEValues, including matrix assembly.
+   */
+  template <int n_components_other>
+  FEEvaluationAccess (const Mapping<dim>       &mapping,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<1>      &quadrature,
+                      const UpdateFlags         update_flags,
+                      const unsigned int        first_selected_component,
+                      const FEEvaluationBase<dim,n_components_other,Number> *other);
+
+  /**
+   * Copy constructor
+   */
+  FEEvaluationAccess (const FEEvaluationAccess &other);
+};
+
+
+/**
+ * This class provides access to the data fields of the FEEvaluation classes.
+ * Partial specialization for scalar fields in 1d that defines access with
+ * simple data fields, i.e., scalars for the values and Tensor<1,1> for the
+ * gradients.
+ *
+ * @author Katharina Kormann and Martin Kronbichler, 2010, 2011, Shiva
+ * Rudraraju, 2014
+ */
+template <typename Number>
+class FEEvaluationAccess<1,1,Number> : public FEEvaluationBase<1,1,Number>
+{
+public:
+  typedef Number                                 number_type;
+  typedef VectorizedArray<Number>                value_type;
+  typedef Tensor<1,1,VectorizedArray<Number> >   gradient_type;
+  static const unsigned int dimension          = 1;
+  typedef FEEvaluationBase<1,1,Number>           BaseClass;
+
+  /**
+   * Returns the value stored for the local degree of freedom with index @p
+   * dof. If the object is vector-valued, a vector-valued return argument is
+   * given. Note that when vectorization is enabled, values from several cells
+   * are grouped together. If @p set_dof_values was called last, the value
+   * corresponds to the one set there. If @p integrate was called last, it
+   * instead corresponds to the value of the integrated function with the test
+   * function of the given index.
+   */
+  value_type get_dof_value (const unsigned int dof) const;
+
+  /**
+   * Write a value to the field containing the degrees of freedom with
+   * component @p dof. Access to the same field as through @p get_dof_value.
+   */
+  void submit_dof_value (const value_type   val_in,
+                         const unsigned int dof);
+
+  /**
+   * Returns the value of a finite element function at quadrature point number
+   * @p q_point after a call to @p evaluate(true,...), or the value that has
+   * been stored there with a call to @p submit_value. If the object is
+   * vector-valued, a vector-valued return argument is given. Note that when
+   * vectorization is enabled, values from several cells are grouped together.
+   */
+  value_type get_value (const unsigned int q_point) const;
+
+  /**
+   * Write a value to the field containing the values on quadrature points
+   * with component @p q_point. Access to the same field as through @p
+   * get_value. If applied before the function @p integrate(true,...) is
+   * called, this specifies the value which is tested by all basis function on
+   * the current cell and integrated over.
+   */
+  void submit_value (const value_type   val_in,
+                     const unsigned int q_point);
+
+  /**
+   * Returns the gradient of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true,...), or the value
+   * that has been stored there with a call to @p submit_gradient.
+   */
+  gradient_type get_gradient (const unsigned int q_point) const;
+
+  /**
+   * Write a contribution that is tested by the gradient to the field
+   * containing the values on quadrature points with component @p q_point.
+   * Access to the same field as through @p get_gradient. If applied before
+   * the function @p integrate(...,true) is called, this specifies what is
+   * tested by all basis function gradients on the current cell and integrated
+   * over.
+   */
+  void submit_gradient(const gradient_type grad_in,
+                       const unsigned int  q_point);
+
+  /**
+   * Returns the Hessian of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true). If only the
+   * diagonal part of the Hessian or its trace, the Laplacian, are needed, use
+   * the respective functions below.
+   */
+  Tensor<2,1,VectorizedArray<Number> >
+  get_hessian (unsigned int q_point) const;
+
+  /**
+   * Returns the diagonal of the Hessian of a finite element function at
+   * quadrature point number @p q_point after a call to @p evaluate(...,true).
+   */
+  gradient_type get_hessian_diagonal (const unsigned int q_point) const;
+
+  /**
+   * Returns the Laplacian of a finite element function at quadrature point
+   * number @p q_point after a call to @p evaluate(...,true).
+   */
+  value_type get_laplacian (const unsigned int q_point) const;
+
+  /**
+   * Takes values on quadrature points, multiplies by the Jacobian determinant
+   * and quadrature weights (JxW) and sums the values for all quadrature
+   * points on the cell. The result is a scalar, representing the integral
+   * over the function over the cell. If a vector-element is used, the
+   * resulting components are still separated. Moreover, if vectorization is
+   * enabled, the integral values of several cells are represented together.
+   */
+  value_type integrate_value () const;
+
+protected:
+  /**
+   * Constructor. Made protected to avoid initialization in user code. Takes
+   * all data stored in MatrixFree. If applied to problems with more than one
+   * finite element or more than one quadrature formula selected during
+   * construction of @p matrix_free, @p fe_no and @p quad_no allow to select
+   * the appropriate components.
+   */
+  FEEvaluationAccess (const MatrixFree<1,Number> &matrix_free,
+                      const unsigned int          fe_no,
+                      const unsigned int          quad_no,
+                      const unsigned int          fe_degree,
+                      const unsigned int          n_q_points);
+
+  /**
+   * Constructor with reduced functionality for similar usage of FEEvaluation
+   * as FEValues, including matrix assembly.
+   */
+  template <int n_components_other>
+  FEEvaluationAccess (const Mapping<1>       &mapping,
+                      const FiniteElement<1> &fe,
+                      const Quadrature<1>    &quadrature,
+                      const UpdateFlags       update_flags,
+                      const unsigned int      first_selected_component,
+                      const FEEvaluationBase<1,n_components_other,Number> *other);
+
+  /**
+   * Copy constructor
+   */
+  FEEvaluationAccess (const FEEvaluationAccess &other);
+};
+
+
+
+/**
+ * The class that provides all functions necessary to evaluate functions at
+ * quadrature points and cell integrations. In functionality, this class is
+ * similar to FEValues<dim>, however, it includes a lot of specialized
+ * functions that make it much faster (between 5 and 500, depending on the
+ * polynomial order).
+ *
+ * <h3>Usage and initialization</h3>
+ *
+ * <h4>Fast usage in combination with MatrixFree</h4>
+ *
+ * The first and foremost way of usage is to initialize this class from a
+ * MatrixFree object that caches everything related to the degrees of freedom
+ * and the mapping information. This way, it is possible to use vectorization
+ * for applying a vector operation for several cells at once. This setting is
+ * explained in the step-37 and step-48 tutorial programs. For vector-valued
+ * problems, the deal.II test suite includes a few additional examples as
+ * well, e.g. the Stokes operator found at
+ * https://github.com/dealii/dealii/blob/master/tests/matrix_free/matrix_vector_stokes_noflux.cc
+ *
+ * For most operator evaluation tasks, this path provides the most efficient
+ * solution by combining pre-computed data for the mapping (Jacobian
+ * transformations for the geometry description) with on-the-fly evaluation of
+ * basis functions. In other words, the framework provides a trade-off between
+ * memory usage and initialization of objects that is suitable for matrix-free
+ * operator evaluation.
+ *
+ * <h4>Usage without pre-initialized MatrixFree object</h4>
+ *
+ * The second form of usage is to initialize FEEvaluation from geometry
+ * information generated by FEValues. This allows to apply the integration
+ * loops on the fly without prior initialization of MatrixFree objects. This
+ * can be useful when the memory and initialization cost of MatrixFree is not
+ * acceptable, e.g. when a different number of quadrature points should be
+ * used for one single evaluation in error computation. Also, when using the
+ * routines of this class to assemble matrices the trade-off implied by the
+ * MatrixFree class may not be desired. In such a case, the cost to initialize
+ * the necessary geometry data on the fly is comparably low and thus avoiding
+ * a global object MatrixFree can be useful. When used in this way, reinit
+ * methods reminiscent from FEValues with a cell iterator are to be used.
+ * However, note that this model results in working on a single cell at a
+ * time, with geometry data duplicated in all components of the vectorized
+ * array. Thus, vectorization is only useful when it can apply the same
+ * operation on different data, e.g. when performing matrix assembly.
+ *
+ * As an example, consider the following code to assemble the contributions to
+ * the Laplace matrix:
+ *
+ * @code
+ * FEEvaluation<dim,fe_degree> fe_eval (mapping, finite_element,
+ *                                      QGauss<1>(fe_degree+1), flags);
+ * for (cell = dof_handler.begin_active();
+ *      cell != dof_handler.end();
+ *      ++cell)
+ *   {
+ *     fe_eval.reinit(cell);
+ *     for (unsigned int i=0; i<dofs_per_cell;
+ *          i += VectorizedArray<double>::n_array_elements)
+ *       {
+ *         const unsigned int n_items =
+ *           i+VectorizedArray<double>::n_array_elements > dofs_per_cell ?
+ *           (dofs_per_cell - i) : VectorizedArray<double>::n_array_elements;
+ *
+ *         // Set n_items unit vectors
+ *         for (unsigned int j=0; j<dofs_per_cell; ++j)
+ *           fe_eval.begin_dof_values()[j]  = VectorizedArray<double>();
+ *         for (unsigned int v=0; v<n_items; ++v)
+ *           fe_eval.begin_dof_values()[i+v][v] = 1.;
+ *
+ *         // Apply operator on unit vector
+ *         fe_eval.evaluate(true, true);
+ *         for (unsigned int q=0; q<n_q_points; ++q)
+ *           {
+ *             fe_eval.submit_value(10.*fe_eval.get_value(q), q);
+ *             fe_eval.submit_gradient(fe_eval.get_gradient(q), q);
+ *           }
+ *         fe_eval.integrate(true, true);
+ *
+ *         // Insert computed entries in matrix
+ *         for (unsigned int v=0; v<n_items; ++v)
+ *           for (unsigned int j=0; j<dofs_per_cell; ++j)
+ *             cell_matrix(fe_eval.get_internal_dof_numbering()[j],
+ *                         fe_eval.get_internal_dof_numbering()[i+v])
+ *               = fe_eval.begin_dof_values()[j][v];
+ *       }
+ *     ...
+ *   }
+ * @endcode
+ *
+ * This code generates the columns of the cell matrix with the loop over @p i
+ * above. The way this is done is the following: FEEvaluation's routines focus
+ * on the evaluation of finite element operators, so the way to get a cell
+ * matrix out of an operator evaluation is to apply it to all the unit vectors
+ * on the cell. This might seem inefficient but the evaluation routines used
+ * here are so quick that they still work much faster than what is possible
+ * with FEValues.
+ *
+ * Due to vectorization, we can actually generate matrix data for several unit
+ * vectors at a time (e.g. 4). The variable @p n_items make sure that we do
+ * the last iteration where the number of cell dofs is not divisible by the
+ * vectorization length correctly. Also note that we need to get the internal
+ * dof numbering applied by fe_eval because FEEvaluation internally uses a
+ * lexicographic numbering of degrees of freedom. This is necessary to
+ * efficiently work with tensor products where all degrees of freedom along a
+ * dimension must be laid out in a regular way.
+ *
+ * <h3>Description of evaluation routines</h3>
+ *
+ * This class contains specialized evaluation routines for several elements
+ * based on tensor-product quadrature formulas and tensor-product-like shape
+ * functions, including standard FE_Q or FE_DGQ elements and quadrature points
+ * symmetric around 0.5 (like Gauss quadrature), FE_DGP elements based on
+ * truncated tensor products as well as the faster case of Gauss-Lobatto
+ * elements with Gauss-Lobatto quadrature which give diagonal mass matrices
+ * and quicker evaluation internally. The main benefit of this class is the
+ * evaluation of all shape functions in all quadrature or integration over all
+ * shape functions in <code>dim (fe_degree+1)<sup>dim+1</sup> </code>
+ * operations instead of the slower <code>
+ * (fe_degree+1)<sup>2*dim</sup></code> complexity in the evaluation routines
+ * of FEValues.
+ *
+ * Note that many of the operations available through this class are inherited
+ * from the base class FEEvaluationBase, in particular reading from and
+ * writing to vectors. Also, the class inherits from FEEvaluationAccess that
+ * implements access to values, gradients and Hessians of the finite element
+ * function on quadrature points.
+ *
+ * This class assumes that shape functions of the FiniteElement under
+ * consideration do <em>not</em> depend on the actual shape of the cells in
+ * real space. Currently, other finite elements cannot be treated with the
+ * matrix-free concept.
+ *
+ * This class has five template arguments:
+ *
+ * @param dim Dimension in which this class is to be used
+ *
+ * @param fe_degree Degree of the tensor product finite element with
+ * fe_degree+1 degrees of freedom per coordinate direction
+ *
+ * @param n_q_points_1d Number of points in the quadrature formula in 1D,
+ * defaults to fe_degree+1
+ *
+ * @param n_components Number of vector components when solving a system of
+ * PDEs. If the same operation is applied to several components of a PDE (e.g.
+ * a vector Laplace equation), they can be applied simultaneously with one
+ * call (and often more efficiently). Defaults to 1.
+ *
+ * @param Number Number format, usually @p double or @p float. Defaults to @p
+ * double
+ *
+ * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+ */
+template <int dim, int fe_degree, int n_q_points_1d, int n_components_,
+          typename Number >
+class FEEvaluation : public FEEvaluationAccess<dim,n_components_,Number>
+{
+public:
+  typedef FEEvaluationAccess<dim,n_components_,Number> BaseClass;
+  typedef Number                            number_type;
+  typedef typename BaseClass::value_type    value_type;
+  typedef typename BaseClass::gradient_type gradient_type;
+  static const unsigned int dimension     = dim;
+  static const unsigned int n_components  = n_components_;
+  static const unsigned int n_q_points    = Utilities::fixed_int_power<n_q_points_1d,dim>::value;
+  static const unsigned int tensor_dofs_per_cell = Utilities::fixed_int_power<fe_degree+1,dim>::value;
+
+  /**
+   * Constructor. Takes all data stored in MatrixFree. If applied to problems
+   * with more than one finite element or more than one quadrature formula
+   * selected during construction of @p matrix_free, @p fe_no and @p quad_no
+   * allow to select the appropriate components.
+   */
+  FEEvaluation (const MatrixFree<dim,Number> &matrix_free,
+                const unsigned int            fe_no   = 0,
+                const unsigned int            quad_no = 0);
+
+  /**
+   * Constructor that comes with reduced functionality and works similar as
+   * FEValues. The arguments are similar to the ones passed to the constructor
+   * of FEValues, with the notable difference that FEEvaluation expects a one-
+   * dimensional quadrature formula, Quadrature<1>, instead of a @p dim
+   * dimensional one. The finite element can be both scalar or vector valued,
+   * but this method always only selects a scalar base element at a time (with
+   * @p n_components copies as specified by the class template). For vector-
+   * valued elements, the optional argument @p first_selected_component allows
+   * to specify the index of the base element to be used for evaluation. Note
+   * that the internal data structures always assume that the base element is
+   * primitive, non-primitive are not supported currently.
+   *
+   * As known from FEValues, a call to the reinit method with a
+   * Triangulation<dim>::cell_iterator is necessary to make the geometry and
+   * degrees of freedom of the current class known. If the iterator includes
+   * DoFHandler information (i.e., it is a DoFHandler<dim>::cell_iterator or
+   * similar), the initialization allows to also read from or write to vectors
+   * in the standard way for DoFHandler<dim>::active_cell_iterator types for
+   * one cell at a time. However, this approach is much slower than the path
+   * with MatrixFree with MPI since index translation has to be done. As only
+   * one cell at a time is used, this method does not vectorize over several
+   * elements (which is most efficient for vector operations), but only
+   * possibly within the element if the evaluate/integrate routines are
+   * combined inside user code (e.g. for computing cell matrices).
+   */
+  FEEvaluation (const Mapping<dim>       &mapping,
+                const FiniteElement<dim> &fe,
+                const Quadrature<1>      &quadrature,
+                const UpdateFlags         update_flags,
+                const unsigned int        first_selected_component = 0);
+
+  /**
+   * Constructor for the reduced functionality. This constructor is equivalent
+   * to the other one except that it makes the object use a $Q_1$ mapping
+   * (i.e., an object of type MappingQGeneric(1)) implicitly.
+   */
+  FEEvaluation (const FiniteElement<dim> &fe,
+                const Quadrature<1>      &quadrature,
+                const UpdateFlags         update_flags,
+                const unsigned int        first_selected_component = 0);
+
+  /**
+   * Constructor for the reduced functionality. Similar to the other
+   * constructor with FiniteElement argument but using another
+   * FEEvaluationBase object to provide info about the geometry. This allows
+   * several FEEvaluation objects to share the geometry evaluation, i.e., the
+   * underlying mapping and quadrature points do only need to be evaluated
+   * once. Make sure to not pass an optional object around when you intend to
+   * use the FEEvaluation object in %parallel to the given one because
+   * otherwise the intended sharing may create race conditions.
+   */
+  template <int n_components_other>
+  FEEvaluation (const FiniteElement<dim> &fe,
+                const FEEvaluationBase<dim,n_components_other,Number> &other,
+                const unsigned int        first_selected_component = 0);
+
+  /**
+   * Copy constructor. If FEEvaluationBase was constructed from a mapping, fe,
+   * quadrature, and update flags, the underlying geometry evaluation based on
+   * FEValues will be deep-copied in order to allow for using in parallel with
+   * threads.
+   */
+  FEEvaluation (const FEEvaluation &other);
+
+  /**
+   * Evaluates the function values, the gradients, and the Laplacians of the
+   * FE function given at the DoF values in the input vector at the quadrature
+   * points on the unit cell.  The function arguments specify which parts
+   * shall actually be computed. Needs to be called before the functions @p
+   * get_value(), @p get_gradient() or @p get_laplacian give useful
+   * information (unless these values have been set manually).
+   */
+  void evaluate (const bool evaluate_val,
+                 const bool evaluate_grad,
+                 const bool evaluate_hess = false);
+
+  /**
+   * This function takes the values and/or gradients that are stored on
+   * quadrature points, tests them by all the basis functions/gradients on the
+   * cell and performs the cell integration. The two function arguments @p
+   * integrate_val and @p integrate_grad are used to enable/disable some of
+   * values or gradients.
+   */
+  void integrate (const bool integrate_val,
+                  const bool integrate_grad);
+
+  /**
+   * Returns the q-th quadrature point stored in MappingInfo.
+   */
+  Point<dim,VectorizedArray<Number> >
+  quadrature_point (const unsigned int q_point) const;
+
+  /**
+   * The number of scalar degrees of freedom on the cell. Usually close to
+   * tensor_dofs_per_cell, but depends on the actual element selected and is
+   * thus not static.
+   */
+  const unsigned int dofs_per_cell;
+
+private:
+  /**
+   * Internally stored variables for the different data fields.
+   */
+  VectorizedArray<Number> my_data_array[n_components*(tensor_dofs_per_cell+1+(dim*dim+2*dim+1)*n_q_points)];
+
+  /**
+   * Checks if the template arguments regarding degree of the element
+   * corresponds to the actual element used at initialization.
+   */
+  void check_template_arguments(const unsigned int fe_no);
+
+  /**
+   * Sets the pointers of the base class to my_data_array.
+   */
+  void set_data_pointers();
+
+  /**
+   * Function pointer for the evaluate function
+   */
+  void (*evaluate_funct) (const internal::MatrixFreeFunctions::ShapeInfo<Number> &,
+                          VectorizedArray<Number> *values_dofs_actual[],
+                          VectorizedArray<Number> *values_quad[],
+                          VectorizedArray<Number> *gradients_quad[][dim],
+                          VectorizedArray<Number> *hessians_quad[][(dim*(dim+1))/2],
+                          const bool               evaluate_val,
+                          const bool               evaluate_grad,
+                          const bool               evaluate_lapl);
+
+  /**
+   * Function pointer for the integrate function
+   */
+  void (*integrate_funct)(const internal::MatrixFreeFunctions::ShapeInfo<Number> &,
+                          VectorizedArray<Number> *values_dofs_actual[],
+                          VectorizedArray<Number> *values_quad[],
+                          VectorizedArray<Number> *gradients_quad[][dim],
+                          const bool               evaluate_val,
+                          const bool               evaluate_grad);
+};
+
+
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+    // a helper function to compute the number of DoFs of a DGP element at compile
+    // time, depending on the degree
+    template <int dim, int degree>
+    struct DGP_dofs_per_cell
+    {
+      // this division is always without remainder
+      static const unsigned int value =
+        (DGP_dofs_per_cell<dim-1,degree>::value * (degree+dim)) / dim;
+    };
+
+    // base specialization: 1d elements have 'degree+1' degrees of freedom
+    template <int degree>
+    struct DGP_dofs_per_cell<1,degree>
+    {
+      static const unsigned int value = degree+1;
+    };
+  }
+}
+
+
+/*----------------------- Inline functions ----------------------------------*/
+
+#ifndef DOXYGEN
+
+
+
+/*----------------------- FEEvaluationBase ----------------------------------*/
+
+template <int dim, int n_components_, typename Number>
+inline
+FEEvaluationBase<dim,n_components_,Number>
+::FEEvaluationBase (const MatrixFree<dim,Number> &data_in,
+                    const unsigned int fe_no_in,
+                    const unsigned int quad_no_in,
+                    const unsigned int fe_degree,
+                    const unsigned int n_q_points)
+  :
+  quad_no            (quad_no_in),
+  n_fe_components    (data_in.get_dof_info(fe_no_in).n_components),
+  active_fe_index    (data_in.get_dof_info(fe_no_in).fe_index_from_degree
+                      (fe_degree)),
+  active_quad_index  (data_in.get_mapping_info().
+                      mapping_data_gen[quad_no_in].
+                      quad_index_from_n_q_points(n_q_points)),
+  matrix_info        (&data_in),
+  dof_info           (&data_in.get_dof_info(fe_no_in)),
+  mapping_info       (&data_in.get_mapping_info()),
+  data               (&data_in.get_shape_info
+                      (fe_no_in, quad_no_in, active_fe_index,
+                       active_quad_index)),
+  cartesian_data     (0),
+  jacobian           (0),
+  J_value            (0),
+  quadrature_weights (mapping_info->mapping_data_gen[quad_no].
+                      quadrature_weights[active_quad_index].begin()),
+  quadrature_points  (0),
+  jacobian_grad      (0),
+  jacobian_grad_upper(0),
+  cell               (numbers::invalid_unsigned_int),
+  cell_type          (internal::MatrixFreeFunctions::undefined),
+  cell_data_number   (0),
+  first_selected_component (0)
+{
+  for (unsigned int c=0; c<n_components_; ++c)
+    {
+      values_dofs[c] = 0;
+      values_quad[c] = 0;
+      for (unsigned int d=0; d<dim; ++d)
+        gradients_quad[c][d] = 0;
+      for (unsigned int d=0; d<(dim*dim+dim)/2; ++d)
+        hessians_quad[c][d] = 0;
+    }
+  Assert (matrix_info->mapping_initialized() == true,
+          ExcNotInitialized());
+  AssertDimension (matrix_info->get_size_info().vectorization_length,
+                   VectorizedArray<Number>::n_array_elements);
+  AssertDimension (data->dofs_per_cell,
+                   dof_info->dofs_per_cell[active_fe_index]/n_fe_components);
+  AssertDimension (data->n_q_points,
+                   mapping_info->mapping_data_gen[quad_no].n_q_points[active_quad_index]);
+  Assert (n_fe_components == 1 ||
+          n_components == 1 ||
+          n_components == n_fe_components,
+          ExcMessage ("The underlying FE is vector-valued. In this case, the "
+                      "template argument n_components must be a the same "
+                      "as the number of underlying vector components."));
+
+
+  // do not check for correct dimensions of data fields here, should be done
+  // in derived classes
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template <int n_components_other>
+inline
+FEEvaluationBase<dim,n_components_,Number>
+::FEEvaluationBase (const Mapping<dim>       &mapping,
+                    const FiniteElement<dim> &fe,
+                    const Quadrature<1>      &quadrature,
+                    const UpdateFlags         update_flags,
+                    const unsigned int        first_selected_component,
+                    const FEEvaluationBase<dim,n_components_other,Number> *other)
+  :
+  quad_no            (-1),
+  n_fe_components    (n_components_),
+  active_fe_index    (-1),
+  active_quad_index  (-1),
+  matrix_info        (0),
+  dof_info           (0),
+  mapping_info       (0),
+  // select the correct base element from the given FE component
+  stored_shape_info  (new internal::MatrixFreeFunctions::ShapeInfo<Number>(quadrature, fe, fe.component_to_base_index(first_selected_component).first)),
+  data               (stored_shape_info.get()),
+  cartesian_data     (0),
+  jacobian           (0),
+  J_value            (0),
+  quadrature_weights (0),
+  quadrature_points  (0),
+  jacobian_grad      (0),
+  jacobian_grad_upper(0),
+  cell               (0),
+  cell_type          (internal::MatrixFreeFunctions::general),
+  cell_data_number   (0),
+  // keep the number of the selected component within the current base element
+  // for reading dof values
+  first_selected_component (fe.component_to_base_index(first_selected_component).second)
+{
+  const unsigned int base_element_number =
+    fe.component_to_base_index(first_selected_component).first;
+  for (unsigned int c=0; c<n_components_; ++c)
+    {
+      values_dofs[c] = 0;
+      values_quad[c] = 0;
+      for (unsigned int d=0; d<dim; ++d)
+        gradients_quad[c][d] = 0;
+      for (unsigned int d=0; d<(dim*dim+dim)/2; ++d)
+        hessians_quad[c][d] = 0;
+    }
+
+  Assert(other == 0 || other->mapped_geometry.get() != 0, ExcInternalError());
+  if (other != 0 &&
+      other->mapped_geometry->get_quadrature() == quadrature)
+    mapped_geometry = other->mapped_geometry;
+  else
+    mapped_geometry.reset(new internal::MatrixFreeFunctions::
+                          MappingDataOnTheFly<dim,Number>(mapping, quadrature,
+                                                          update_flags));
+  jacobian = mapped_geometry->get_inverse_jacobians().begin();
+  J_value = mapped_geometry->get_JxW_values().begin();
+  quadrature_points = mapped_geometry->get_quadrature_points().begin();
+
+  Assert(fe.element_multiplicity(base_element_number) == 1 ||
+         fe.element_multiplicity(base_element_number)-first_selected_component >= n_components_,
+         ExcMessage("The underlying element must at least contain as many "
+                    "components as requested by this class"));
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+FEEvaluationBase<dim,n_components_,Number>
+::FEEvaluationBase (const FEEvaluationBase<dim,n_components_,Number> &other)
+  :
+  quad_no            (other.quad_no),
+  n_fe_components    (other.n_fe_components),
+  active_fe_index    (other.active_fe_index),
+  active_quad_index  (other.active_quad_index),
+  matrix_info        (other.matrix_info),
+  dof_info           (other.dof_info),
+  mapping_info       (other.mapping_info),
+  stored_shape_info  (other.stored_shape_info),
+  data               (other.data),
+  cartesian_data     (other.cartesian_data),
+  jacobian           (other.jacobian),
+  J_value            (other.J_value),
+  quadrature_weights (other.quadrature_weights),
+  quadrature_points  (other.quadrature_points),
+  jacobian_grad      (other.jacobian_grad),
+  jacobian_grad_upper(other.jacobian_grad_upper),
+  cell               (other.cell),
+  cell_type          (other.cell_type),
+  cell_data_number   (other.cell_data_number),
+  first_selected_component (other.first_selected_component)
+{
+  for (unsigned int c=0; c<n_components_; ++c)
+    {
+      values_dofs[c] = 0;
+      values_quad[c] = 0;
+      for (unsigned int d=0; d<dim; ++d)
+        gradients_quad[c][d] = 0;
+      for (unsigned int d=0; d<(dim*dim+dim)/2; ++d)
+        hessians_quad[c][d] = 0;
+    }
+
+  // Create deep copy of mapped geometry for use in parallel...
+  if (other.mapped_geometry.get() != 0)
+    {
+      mapped_geometry.reset
+      (new internal::MatrixFreeFunctions::
+       MappingDataOnTheFly<dim,Number>(other.mapped_geometry->get_fe_values().get_mapping(),
+                                       other.mapped_geometry->get_quadrature(),
+                                       other.mapped_geometry->get_fe_values().get_update_flags()));
+      jacobian = mapped_geometry->get_inverse_jacobians().begin();
+      J_value = mapped_geometry->get_JxW_values().begin();
+      quadrature_points = mapped_geometry->get_quadrature_points().begin();
+    }
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>::reinit (const unsigned int cell_in)
+{
+  Assert (mapped_geometry == 0,
+          ExcMessage("FEEvaluation was initialized without a matrix-free object."
+                     " Integer indexing is not possible"));
+  if (mapped_geometry != 0)
+    return;
+  Assert (dof_info != 0, ExcNotInitialized());
+  Assert (mapping_info != 0, ExcNotInitialized());
+  AssertIndexRange (cell_in, dof_info->row_starts.size()-1);
+  AssertDimension (((dof_info->cell_active_fe_index.size() > 0) ?
+                    dof_info->cell_active_fe_index[cell_in] : 0),
+                   active_fe_index);
+  cell = cell_in;
+  cell_type = mapping_info->get_cell_type(cell);
+  cell_data_number = mapping_info->get_cell_data_index(cell);
+
+  if (mapping_info->quadrature_points_initialized == true)
+    {
+      AssertIndexRange (cell_data_number, mapping_info->
+                        mapping_data_gen[quad_no].rowstart_q_points.size());
+      const unsigned int index = mapping_info->mapping_data_gen[quad_no].
+                                 rowstart_q_points[cell];
+      AssertIndexRange (index, mapping_info->mapping_data_gen[quad_no].
+                        quadrature_points.size());
+      quadrature_points =
+        &mapping_info->mapping_data_gen[quad_no].quadrature_points[index];
+    }
+
+  if (cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      cartesian_data = &mapping_info->cartesian_data[cell_data_number].first;
+      J_value        = &mapping_info->cartesian_data[cell_data_number].second;
+    }
+  else if (cell_type == internal::MatrixFreeFunctions::affine)
+    {
+      jacobian  = &mapping_info->affine_data[cell_data_number].first;
+      J_value   = &mapping_info->affine_data[cell_data_number].second;
+    }
+  else
+    {
+      const unsigned int rowstart = mapping_info->
+                                    mapping_data_gen[quad_no].rowstart_jacobians[cell_data_number];
+      AssertIndexRange (rowstart, mapping_info->
+                        mapping_data_gen[quad_no].jacobians.size());
+      jacobian =
+        &mapping_info->mapping_data_gen[quad_no].jacobians[rowstart];
+      if (mapping_info->JxW_values_initialized == true)
+        {
+          AssertIndexRange (rowstart, mapping_info->
+                            mapping_data_gen[quad_no].JxW_values.size());
+          J_value = &(mapping_info->mapping_data_gen[quad_no].
+                      JxW_values[rowstart]);
+        }
+      if (mapping_info->second_derivatives_initialized == true)
+        {
+          AssertIndexRange(rowstart, mapping_info->
+                           mapping_data_gen[quad_no].jacobians_grad_diag.size());
+          jacobian_grad = &mapping_info->mapping_data_gen[quad_no].
+                          jacobians_grad_diag[rowstart];
+          AssertIndexRange(rowstart, mapping_info->
+                           mapping_data_gen[quad_no].jacobians_grad_upper.size());
+          jacobian_grad_upper = &mapping_info->mapping_data_gen[quad_no].
+                                jacobians_grad_upper[rowstart];
+        }
+    }
+
+#ifdef DEBUG
+  dof_values_initialized      = false;
+  values_quad_initialized     = false;
+  gradients_quad_initialized  = false;
+  hessians_quad_initialized   = false;
+#endif
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template <typename DoFHandlerType, bool level_dof_access>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,level_dof_access> > &cell)
+{
+  Assert(matrix_info == 0,
+         ExcMessage("Cannot use initialization from cell iterator if "
+                    "initialized from MatrixFree object. Use variant for "
+                    "on the fly computation with arguments as for FEValues "
+                    "instead"));
+  Assert(mapped_geometry.get() != 0, ExcNotInitialized());
+  mapped_geometry->reinit(static_cast<typename Triangulation<dim>::cell_iterator>(cell));
+  local_dof_indices.resize(cell->get_fe().dofs_per_cell);
+  if (level_dof_access)
+    cell->get_mg_dof_indices(local_dof_indices);
+  else
+    cell->get_dof_indices(local_dof_indices);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::reinit (const typename Triangulation<dim>::cell_iterator &cell)
+{
+  Assert(matrix_info == 0,
+         ExcMessage("Cannot use initialization from cell iterator if "
+                    "initialized from MatrixFree object. Use variant for "
+                    "on the fly computation with arguments as for FEValues "
+                    "instead"));
+  Assert(mapped_geometry.get() != 0, ExcNotInitialized());
+  mapped_geometry->reinit(cell);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+unsigned int
+FEEvaluationBase<dim,n_components_,Number>
+::get_cell_data_number () const
+{
+  Assert (cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  return cell_data_number;
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+internal::MatrixFreeFunctions::CellType
+FEEvaluationBase<dim,n_components_,Number>::get_cell_type () const
+{
+  Assert (cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  return cell_type;
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+const internal::MatrixFreeFunctions::ShapeInfo<Number> &
+FEEvaluationBase<dim,n_components_,Number>::get_shape_info() const
+{
+  Assert(data != 0, ExcInternalError());
+  return *data;
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::fill_JxW_values(AlignedVector<VectorizedArray<Number> > &JxW_values) const
+{
+  AssertDimension(JxW_values.size(), data->n_q_points);
+  Assert (this->J_value != 0, ExcNotImplemented());
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian ||
+      this->cell_type == internal::MatrixFreeFunctions::affine)
+    {
+      Assert (this->mapping_info != 0, ExcNotImplemented());
+      VectorizedArray<Number> J = this->J_value[0];
+      for (unsigned int q=0; q<this->data->n_q_points; ++q)
+        JxW_values[q] = J * this->quadrature_weights[q];
+    }
+  else
+    for (unsigned int q=0; q<data->n_q_points; ++q)
+      JxW_values[q] = this->J_value[q];
+}
+
+
+
+namespace internal
+{
+  // write access to generic vectors that have operator ().
+  template <typename VectorType>
+  inline
+  typename VectorType::value_type &
+  vector_access (VectorType         &vec,
+                 const unsigned int  entry)
+  {
+    return vec(entry);
+  }
+
+
+
+  // read access to generic vectors that have operator ().
+  template <typename VectorType>
+  inline
+  typename VectorType::value_type
+  vector_access (const VectorType   &vec,
+                 const unsigned int  entry)
+  {
+    return vec(entry);
+  }
+
+
+
+  // write access to distributed MPI vectors that have a local_element(uint)
+  // method to access data in local index space, which is what we use in
+  // DoFInfo and hence in read_dof_values etc.
+  template <typename Number>
+  inline
+  Number &
+  vector_access (parallel::distributed::Vector<Number> &vec,
+                 const unsigned int                     entry)
+  {
+    return vec.local_element(entry);
+  }
+
+
+
+  // read access to distributed MPI vectors that have a local_element(uint)
+  // method to access data in local index space, which is what we use in
+  // DoFInfo and hence in read_dof_values etc.
+  template <typename Number>
+  inline
+  Number
+  vector_access (const parallel::distributed::Vector<Number> &vec,
+                 const unsigned int                           entry)
+  {
+    return vec.local_element(entry);
+  }
+
+
+
+  // this is to make sure that the parallel partitioning in the
+  // parallel::distributed::Vector is really the same as stored in MatrixFree
+  template <typename VectorType>
+  inline
+  void check_vector_compatibility (const VectorType                             &vec,
+                                   const internal::MatrixFreeFunctions::DoFInfo &dof_info)
+  {
+    AssertDimension (vec.size(),
+                     dof_info.vector_partitioner->size());
+  }
+
+  template <typename Number>
+  inline
+  void check_vector_compatibility (const parallel::distributed::Vector<Number>  &vec,
+                                   const internal::MatrixFreeFunctions::DoFInfo &dof_info)
+  {
+    Assert (vec.partitioners_are_compatible(*dof_info.vector_partitioner),
+            ExcMessage("The parallel layout of the given vector is not "
+                       "compatible with the parallel partitioning in MatrixFree. "
+                       "Use MatrixFree::initialize_dof_vector to get a "
+                       "compatible vector."));
+  }
+
+  // A class to use the same code to read from and write to vector
+  template <typename Number>
+  struct VectorReader
+  {
+    template <typename VectorType>
+    void process_dof (const unsigned int  index,
+                      VectorType         &vec,
+                      Number             &res) const
+    {
+      res = vector_access (const_cast<const VectorType &>(vec), index);
+    }
+
+    template <typename VectorType>
+    void process_dof_global (const types::global_dof_index index,
+                             VectorType         &vec,
+                             Number             &res) const
+    {
+      res = const_cast<const VectorType &>(vec)(index);
+    }
+
+    void pre_constraints (const Number &,
+                          Number       &res) const
+    {
+      res = Number();
+    }
+
+    template <typename VectorType>
+    void process_constraint (const unsigned int index,
+                             const Number       weight,
+                             VectorType        &vec,
+                             Number            &res) const
+    {
+      res += weight * vector_access (const_cast<const VectorType &>(vec), index);
+    }
+
+    void post_constraints (const Number &sum,
+                           Number       &write_pos) const
+    {
+      write_pos = sum;
+    }
+
+    void process_empty (Number &res) const
+    {
+      res = Number();
+    }
+  };
+
+  // A class to use the same code to read from and write to vector
+  template <typename Number>
+  struct VectorDistributorLocalToGlobal
+  {
+    template <typename VectorType>
+    void process_dof (const unsigned int  index,
+                      VectorType         &vec,
+                      Number             &res) const
+    {
+      vector_access (vec, index) += res;
+    }
+
+    template <typename VectorType>
+    void process_dof_global (const types::global_dof_index index,
+                             VectorType         &vec,
+                             Number             &res) const
+    {
+      vec(index) += res;
+    }
+
+    void pre_constraints (const Number &input,
+                          Number       &res) const
+    {
+      res = input;
+    }
+
+    template <typename VectorType>
+    void process_constraint (const unsigned int index,
+                             const Number       weight,
+                             VectorType        &vec,
+                             Number            &res) const
+    {
+      vector_access (vec, index) += weight * res;
+    }
+
+    void post_constraints (const Number &,
+                           Number &) const
+    {
+    }
+
+    void process_empty (Number &) const
+    {
+    }
+  };
+
+
+  // A class to use the same code to read from and write to vector
+  template <typename Number>
+  struct VectorSetter
+  {
+    template <typename VectorType>
+    void process_dof (const unsigned int  index,
+                      VectorType         &vec,
+                      Number             &res) const
+    {
+      vector_access (vec, index) = res;
+    }
+
+    template <typename VectorType>
+    void process_dof_global (const types::global_dof_index index,
+                             VectorType         &vec,
+                             Number             &res) const
+    {
+      vec(index) = res;
+    }
+
+    void pre_constraints (const Number &,
+                          Number &) const
+    {
+    }
+
+    template <typename VectorType>
+    void process_constraint (const unsigned int,
+                             const Number,
+                             VectorType &,
+                             Number &) const
+    {
+    }
+
+    void post_constraints (const Number &,
+                           Number &) const
+    {
+    }
+
+    void process_empty (Number &) const
+    {
+    }
+  };
+
+  // allows to select between block vectors and non-block vectors, which
+  // allows to use a unified interface for extracting blocks on block vectors
+  // and doing nothing on usual vectors
+  template <typename VectorType, bool>
+  struct BlockVectorSelector {};
+
+  template <typename VectorType>
+  struct BlockVectorSelector<VectorType,true>
+  {
+    typedef typename VectorType::BlockType BaseVectorType;
+
+    static BaseVectorType *get_vector_component (VectorType &vec,
+                                                 const unsigned int component)
+    {
+      AssertIndexRange (component, vec.n_blocks());
+      return &vec.block(component);
+    }
+  };
+
+  template <typename VectorType>
+  struct BlockVectorSelector<VectorType,false>
+  {
+    typedef VectorType BaseVectorType;
+
+    static BaseVectorType *get_vector_component (VectorType &vec,
+                                                 const unsigned int)
+    {
+      return &vec;
+    }
+  };
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType, typename VectorOperation>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_write_operation (const VectorOperation &operation,
+                        VectorType            *src[]) const
+{
+  // This functions processes all the functions read_dof_values,
+  // distribute_local_to_global, and set_dof_values with the same code. The
+  // distinction between these three cases is made by the input
+  // VectorOperation that either reads values from a vector and puts the data
+  // into the local data field or write local data into the vector. Certain
+  // operations are no-ops for the given use case.
+
+  // Case 1: No MatrixFree object given, simple case because we do not need to
+  // process constraints and need not care about vectorization
+  if (matrix_info == 0)
+    {
+      Assert (!local_dof_indices.empty(), ExcNotInitialized());
+
+      unsigned int index = first_selected_component * this->data->dofs_per_cell;
+      for (unsigned int comp = 0; comp<n_components; ++comp)
+        {
+          for (unsigned int i=0; i<this->data->dofs_per_cell; ++i, ++index)
+            {
+              operation.process_dof_global(local_dof_indices[this->data->lexicographic_numbering[index]],
+                                           *src[0], values_dofs[comp][i][0]);
+              for (unsigned int v=1; v<VectorizedArray<Number>::n_array_elements; ++v)
+                operation.process_empty(values_dofs[comp][i][v]);
+            }
+        }
+      return;
+    }
+
+  Assert (dof_info != 0, ExcNotInitialized());
+  Assert (matrix_info->indices_initialized() == true,
+          ExcNotInitialized());
+  Assert (cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+
+  // loop over all local dofs. ind_local holds local number on cell, index
+  // iterates over the elements of index_local_to_global and dof_indices
+  // points to the global indices stored in index_local_to_global
+  const unsigned int *dof_indices = dof_info->begin_indices(cell);
+  const std::pair<unsigned short,unsigned short> *indicators =
+    dof_info->begin_indicators(cell);
+  const std::pair<unsigned short,unsigned short> *indicators_end =
+    dof_info->end_indicators(cell);
+  unsigned int ind_local = 0;
+  const unsigned int dofs_per_cell = this->data->dofs_per_cell;
+
+  const unsigned int n_irreg_components_filled = dof_info->row_starts[cell][2];
+  const bool at_irregular_cell = n_irreg_components_filled > 0;
+
+  // scalar case (or case when all components have the same degrees of freedom
+  // and sit on a different vector each)
+  if (n_fe_components == 1)
+    {
+      const unsigned int n_local_dofs =
+        VectorizedArray<Number>::n_array_elements * dofs_per_cell;
+      for (unsigned int comp=0; comp<n_components; ++comp)
+        internal::check_vector_compatibility (*src[comp], *dof_info);
+      Number *local_data [n_components];
+      for (unsigned int comp=0; comp<n_components; ++comp)
+        local_data[comp] =
+          const_cast<Number *>(&values_dofs[comp][0][0]);
+
+      // standard case where there are sufficiently many cells to fill all
+      // vectors
+      if (at_irregular_cell == false)
+        {
+          // check whether there is any constraint on the current cell
+          if (indicators != indicators_end)
+            {
+              for ( ; indicators != indicators_end; ++indicators)
+                {
+                  // run through values up to next constraint
+                  for (unsigned int j=0; j<indicators->first; ++j)
+                    for (unsigned int comp=0; comp<n_components; ++comp)
+                      operation.process_dof (dof_indices[j], *src[comp],
+                                             local_data[comp][ind_local+j]);
+
+                  ind_local += indicators->first;
+                  dof_indices   += indicators->first;
+
+                  // constrained case: build the local value as a linear
+                  // combination of the global value according to constraints
+                  Number value [n_components];
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    operation.pre_constraints (local_data[comp][ind_local],
+                                               value[comp]);
+
+                  const Number *data_val =
+                    matrix_info->constraint_pool_begin(indicators->second);
+                  const Number *end_pool =
+                    matrix_info->constraint_pool_end(indicators->second);
+                  for ( ; data_val != end_pool; ++data_val, ++dof_indices)
+                    for (unsigned int comp=0; comp<n_components; ++comp)
+                      operation.process_constraint (*dof_indices, *data_val,
+                                                    *src[comp], value[comp]);
+
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    operation.post_constraints (value[comp],
+                                                local_data[comp][ind_local]);
+
+                  ind_local++;
+                }
+
+              // get the dof values past the last constraint
+              for (; ind_local < n_local_dofs; ++dof_indices, ++ind_local)
+                {
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    operation.process_dof (*dof_indices, *src[comp],
+                                           local_data[comp][ind_local]);
+                }
+            }
+          else
+            {
+              // no constraint at all: compiler can unroll at least the
+              // vectorization loop
+              AssertDimension (dof_info->end_indices(cell)-dof_indices,
+                               static_cast<int>(n_local_dofs));
+              for (unsigned int j=0; j<n_local_dofs; j+=VectorizedArray<Number>::n_array_elements)
+                for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    operation.process_dof (dof_indices[j+v], *src[comp],
+                                           local_data[comp][j+v]);
+            }
+        }
+
+      // non-standard case: need to fill in zeros for those components that
+      // are not present (a bit more expensive), but there is not more than
+      // one such cell
+      else
+        {
+          Assert (n_irreg_components_filled > 0, ExcInternalError());
+          for ( ; indicators != indicators_end; ++indicators)
+            {
+              for (unsigned int j=0; j<indicators->first; ++j)
+                {
+                  // non-constrained case: copy the data from the global
+                  // vector, src, to the local one, local_src.
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    operation.process_dof (dof_indices[j], *src[comp],
+                                           local_data[comp][ind_local]);
+
+                  // here we jump over all the components that are artificial
+                  ++ind_local;
+                  while (ind_local % VectorizedArray<Number>::n_array_elements
+                         >= n_irreg_components_filled)
+                    {
+                      for (unsigned int comp=0; comp<n_components; ++comp)
+                        operation.process_empty (local_data[comp][ind_local]);
+                      ++ind_local;
+                    }
+                }
+              dof_indices += indicators->first;
+
+              // constrained case: build the local value as a linear
+              // combination of the global value according to constraint
+              Number value [n_components];
+              for (unsigned int comp=0; comp<n_components; ++comp)
+                operation.pre_constraints (local_data[comp][ind_local],
+                                           value[comp]);
+
+              const Number *data_val =
+                matrix_info->constraint_pool_begin(indicators->second);
+              const Number *end_pool =
+                matrix_info->constraint_pool_end(indicators->second);
+
+              for ( ; data_val != end_pool; ++data_val, ++dof_indices)
+                for (unsigned int comp=0; comp<n_components; ++comp)
+                  operation.process_constraint (*dof_indices, *data_val,
+                                                *src[comp], value[comp]);
+
+              for (unsigned int comp=0; comp<n_components; ++comp)
+                operation.post_constraints (value[comp],
+                                            local_data[comp][ind_local]);
+              ind_local++;
+              while (ind_local % VectorizedArray<Number>::n_array_elements
+                     >= n_irreg_components_filled)
+                {
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    operation.process_empty (local_data[comp][ind_local]);
+                  ++ind_local;
+                }
+            }
+          for (; ind_local<n_local_dofs; ++dof_indices)
+            {
+              Assert (dof_indices != dof_info->end_indices(cell),
+                      ExcInternalError());
+
+              // non-constrained case: copy the data from the global vector,
+              // src, to the local one, local_dst.
+              for (unsigned int comp=0; comp<n_components; ++comp)
+                operation.process_dof (*dof_indices, *src[comp],
+                                       local_data[comp][ind_local]);
+              ++ind_local;
+              while (ind_local % VectorizedArray<Number>::n_array_elements
+                     >= n_irreg_components_filled)
+                {
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    operation.process_empty(local_data[comp][ind_local]);
+                  ++ind_local;
+                }
+            }
+        }
+    }
+  else
+    // case with vector-valued finite elements where all components are
+    // included in one single vector. Assumption: first come all entries to
+    // the first component, then all entries to the second one, and so
+    // on. This is ensured by the way MatrixFree reads out the indices.
+    {
+      internal::check_vector_compatibility (*src[0], *dof_info);
+      Assert (n_fe_components == n_components_, ExcNotImplemented());
+      const unsigned int n_local_dofs =
+        dofs_per_cell*VectorizedArray<Number>::n_array_elements * n_components;
+      Number   *local_data =
+        const_cast<Number *>(&values_dofs[0][0][0]);
+      if (at_irregular_cell == false)
+        {
+          // check whether there is any constraint on the current cell
+          if (indicators != indicators_end)
+            {
+              for ( ; indicators != indicators_end; ++indicators)
+                {
+                  // run through values up to next constraint
+                  for (unsigned int j=0; j<indicators->first; ++j)
+                    operation.process_dof (dof_indices[j], *src[0],
+                                           local_data[ind_local+j]);
+                  ind_local += indicators->first;
+                  dof_indices   += indicators->first;
+
+                  // constrained case: build the local value as a linear
+                  // combination of the global value according to constraints
+                  Number value;
+                  operation.pre_constraints (local_data[ind_local], value);
+
+                  const Number *data_val =
+                    matrix_info->constraint_pool_begin(indicators->second);
+                  const Number *end_pool =
+                    matrix_info->constraint_pool_end(indicators->second);
+
+                  for ( ; data_val != end_pool; ++data_val, ++dof_indices)
+                    operation.process_constraint (*dof_indices, *data_val,
+                                                  *src[0], value);
+
+                  operation.post_constraints (value, local_data[ind_local]);
+                  ind_local++;
+                }
+
+              // get the dof values past the last constraint
+              for (; ind_local<n_local_dofs; ++dof_indices, ++ind_local)
+                operation.process_dof (*dof_indices, *src[0],
+                                       local_data[ind_local]);
+              Assert (dof_indices == dof_info->end_indices(cell),
+                      ExcInternalError());
+            }
+          else
+            {
+              // no constraint at all: compiler can unroll at least the
+              // vectorization loop
+              AssertDimension (dof_info->end_indices(cell)-dof_indices,
+                               static_cast<int>(n_local_dofs));
+              for (unsigned int j=0; j<n_local_dofs; j+=VectorizedArray<Number>::n_array_elements)
+                for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+                  operation.process_dof (dof_indices[j+v], *src[0],
+                                         local_data[j+v]);
+            }
+        }
+
+      // non-standard case: need to fill in zeros for those components that
+      // are not present (a bit more expensive), but there is not more than
+      // one such cell
+      else
+        {
+          Assert (n_irreg_components_filled > 0, ExcInternalError());
+          for ( ; indicators != indicators_end; ++indicators)
+            {
+              for (unsigned int j=0; j<indicators->first; ++j)
+                {
+                  // non-constrained case: copy the data from the global
+                  // vector, src, to the local one, local_src.
+                  operation.process_dof (dof_indices[j], *src[0],
+                                         local_data[ind_local]);
+
+                  // here we jump over all the components that are artificial
+                  ++ind_local;
+                  while (ind_local % VectorizedArray<Number>::n_array_elements
+                         >= n_irreg_components_filled)
+                    {
+                      operation.process_empty (local_data[ind_local]);
+                      ++ind_local;
+                    }
+                }
+              dof_indices += indicators->first;
+
+              // constrained case: build the local value as a linear
+              // combination of the global value according to constraint
+              Number value;
+              operation.pre_constraints (local_data[ind_local], value);
+
+              const Number *data_val =
+                matrix_info->constraint_pool_begin(indicators->second);
+              const Number *end_pool =
+                matrix_info->constraint_pool_end(indicators->second);
+
+              for ( ; data_val != end_pool; ++data_val, ++dof_indices)
+                operation.process_constraint (*dof_indices, *data_val,
+                                              *src[0], value);
+
+              operation.post_constraints (value, local_data[ind_local]);
+              ind_local++;
+              while (ind_local % VectorizedArray<Number>::n_array_elements
+                     >= n_irreg_components_filled)
+                {
+                  operation.process_empty (local_data[ind_local]);
+                  ++ind_local;
+                }
+            }
+          for (; ind_local<n_local_dofs; ++dof_indices)
+            {
+              Assert (dof_indices != dof_info->end_indices(cell),
+                      ExcInternalError());
+
+              // non-constrained case: copy the data from the global vector,
+              // src, to the local one, local_dst.
+              operation.process_dof (*dof_indices, *src[0],
+                                     local_data[ind_local]);
+              ++ind_local;
+              while (ind_local % VectorizedArray<Number>::n_array_elements
+                     >= n_irreg_components_filled)
+                {
+                  operation.process_empty (local_data[ind_local]);
+                  ++ind_local;
+                }
+            }
+        }
+    }
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_dof_values (const VectorType &src)
+{
+  // select between block vectors and non-block vectors. Note that the number
+  // of components is checked in the internal data
+  typename internal::BlockVectorSelector<VectorType,
+           IsBlockVector<VectorType>::value>::BaseVectorType *src_data[n_components];
+  for (unsigned int d=0; d<n_components; ++d)
+    src_data[d] = internal::BlockVectorSelector<VectorType, IsBlockVector<VectorType>::value>::get_vector_component(const_cast<VectorType &>(src), d);
+
+  internal::VectorReader<Number> reader;
+  read_write_operation (reader, src_data);
+
+#ifdef DEBUG
+  dof_values_initialized = true;
+#endif
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_dof_values (const std::vector<VectorType> &src,
+                   const unsigned int             first_index)
+{
+  AssertIndexRange (first_index, src.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= src.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, src.size()));
+
+  VectorType *src_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    src_data[comp] = const_cast<VectorType *>(&src[comp+first_index]);
+
+  internal::VectorReader<Number> reader;
+  read_write_operation (reader, src_data);
+
+#ifdef DEBUG
+  dof_values_initialized = true;
+#endif
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_dof_values (const std::vector<VectorType *> &src,
+                   const unsigned int              first_index)
+{
+  AssertIndexRange (first_index, src.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= src.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, src.size()));
+
+  const VectorType *src_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    src_data[comp] = const_cast<VectorType *>(src[comp+first_index]);
+
+  internal::VectorReader<Number> reader;
+  read_write_operation (reader, src_data);
+
+#ifdef DEBUG
+  dof_values_initialized = true;
+#endif
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_dof_values_plain (const VectorType &src)
+{
+  // select between block vectors and non-block vectors. Note that the number
+  // of components is checked in the internal data
+  const typename internal::BlockVectorSelector<VectorType,
+        IsBlockVector<VectorType>::value>::BaseVectorType *src_data[n_components];
+  for (unsigned int d=0; d<n_components; ++d)
+    src_data[d] = internal::BlockVectorSelector<VectorType, IsBlockVector<VectorType>::value>::get_vector_component(const_cast<VectorType &>(src), d);
+
+  read_dof_values_plain (src_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_dof_values_plain (const std::vector<VectorType> &src,
+                         const unsigned int             first_index)
+{
+  AssertIndexRange (first_index, src.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= src.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, src.size()));
+  const VectorType *src_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    src_data[comp] = &src[comp+first_index];
+  read_dof_values_plain (src_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_dof_values_plain (const std::vector<VectorType *> &src,
+                         const unsigned int              first_index)
+{
+  AssertIndexRange (first_index, src.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= src.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, src.size()));
+  const VectorType *src_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    src_data[comp] = src[comp+first_index];
+  read_dof_values_plain (src_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::distribute_local_to_global (VectorType &dst) const
+{
+  Assert (dof_values_initialized==true,
+          internal::ExcAccessToUninitializedField());
+
+  // select between block vectors and non-block vectors. Note that the number
+  // of components is checked in the internal data
+  typename internal::BlockVectorSelector<VectorType,
+           IsBlockVector<VectorType>::value>::BaseVectorType *dst_data[n_components];
+  for (unsigned int d=0; d<n_components; ++d)
+    dst_data[d] = internal::BlockVectorSelector<VectorType, IsBlockVector<VectorType>::value>::get_vector_component(dst, d);
+
+  internal::VectorDistributorLocalToGlobal<Number> distributor;
+  read_write_operation (distributor, dst_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::distribute_local_to_global (std::vector<VectorType>  &dst,
+                              const unsigned int        first_index) const
+{
+  AssertIndexRange (first_index, dst.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= dst.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, dst.size()));
+  Assert (dof_values_initialized==true,
+          internal::ExcAccessToUninitializedField());
+
+  VectorType *dst_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    dst_data[comp] = &dst[comp+first_index];
+
+  internal::VectorDistributorLocalToGlobal<Number> distributor;
+  read_write_operation (distributor, dst_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::distribute_local_to_global (std::vector<VectorType *>  &dst,
+                              const unsigned int         first_index) const
+{
+  AssertIndexRange (first_index, dst.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= dst.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, dst.size()));
+  Assert (dof_values_initialized==true,
+          internal::ExcAccessToUninitializedField());
+
+  VectorType *dst_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    dst_data[comp] = dst[comp+first_index];
+
+  internal::VectorDistributorLocalToGlobal<Number> distributor;
+  read_write_operation (distributor, dst_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::set_dof_values (VectorType &dst) const
+{
+  Assert (dof_values_initialized==true,
+          internal::ExcAccessToUninitializedField());
+
+  // select between block vectors and non-block vectors. Note that the number
+  // of components is checked in the internal data
+  typename internal::BlockVectorSelector<VectorType,
+           IsBlockVector<VectorType>::value>::BaseVectorType *dst_data[n_components];
+  for (unsigned int d=0; d<n_components; ++d)
+    dst_data[d] = internal::BlockVectorSelector<VectorType, IsBlockVector<VectorType>::value>::get_vector_component(dst, d);
+
+  internal::VectorSetter<Number> setter;
+  read_write_operation (setter, dst_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::set_dof_values (std::vector<VectorType>  &dst,
+                  const unsigned int        first_index) const
+{
+  AssertIndexRange (first_index, dst.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= dst.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, dst.size()));
+
+  Assert (dof_values_initialized==true,
+          internal::ExcAccessToUninitializedField());
+
+  VectorType *dst_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    dst_data[comp] = &dst[comp+first_index];
+
+  internal::VectorSetter<Number> setter;
+  read_write_operation (setter, dst_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::set_dof_values (std::vector<VectorType *>  &dst,
+                  const unsigned int         first_index) const
+{
+  AssertIndexRange (first_index, dst.size());
+  Assert (n_fe_components == 1, ExcNotImplemented());
+  Assert ((n_fe_components == 1 ?
+           (first_index+n_components <= dst.size()) : true),
+          ExcIndexRange (first_index + n_components_, 0, dst.size()));
+
+  Assert (dof_values_initialized==true,
+          internal::ExcAccessToUninitializedField());
+
+  VectorType *dst_data [n_components];
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    dst_data[comp] = dst[comp+first_index];
+
+  internal::VectorSetter<Number> setter;
+  read_write_operation (setter, dst_data);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+template<typename VectorType>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::read_dof_values_plain (const VectorType *src[])
+{
+  // Case without MatrixFree initialization object
+  if (matrix_info == 0)
+    {
+      internal::VectorReader<Number> reader;
+      read_write_operation (reader, src);
+      return;
+    }
+
+  // this is different from the other three operations because we do not use
+  // constraints here, so this is a separate function.
+  Assert (dof_info != 0, ExcNotInitialized());
+  Assert (matrix_info->indices_initialized() == true,
+          ExcNotInitialized());
+  Assert (cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  Assert (dof_info->store_plain_indices == true, ExcNotInitialized());
+
+  // loop over all local dofs. ind_local holds local number on cell, index
+  // iterates over the elements of index_local_to_global and dof_indices
+  // points to the global indices stored in index_local_to_global
+  const unsigned int *dof_indices = dof_info->begin_indices_plain(cell);
+  const unsigned int dofs_per_cell = this->data->dofs_per_cell;
+
+  const unsigned int n_irreg_components_filled = dof_info->row_starts[cell][2];
+  const bool at_irregular_cell = n_irreg_components_filled > 0;
+
+  // scalar case (or case when all components have the same degrees of freedom
+  // and sit on a different vector each)
+  if (n_fe_components == 1)
+    {
+      const unsigned int n_local_dofs =
+        VectorizedArray<Number>::n_array_elements * dofs_per_cell;
+      for (unsigned int comp=0; comp<n_components; ++comp)
+        internal::check_vector_compatibility (*src[comp], *dof_info);
+      Number *local_src_number [n_components];
+      for (unsigned int comp=0; comp<n_components; ++comp)
+        local_src_number[comp] = &values_dofs[comp][0][0];
+
+      // standard case where there are sufficiently many cells to fill all
+      // vectors
+      if (at_irregular_cell == false)
+        {
+          for (unsigned int j=0; j<n_local_dofs; ++j)
+            for (unsigned int comp=0; comp<n_components; ++comp)
+              local_src_number[comp][j] =
+                internal::vector_access (*src[comp], dof_indices[j]);
+        }
+
+      // non-standard case: need to fill in zeros for those components that
+      // are not present (a bit more expensive), but there is not more than
+      // one such cell
+      else
+        {
+          Assert (n_irreg_components_filled > 0, ExcInternalError());
+          for (unsigned int ind_local=0; ind_local<n_local_dofs;
+               ++dof_indices)
+            {
+              // non-constrained case: copy the data from the global vector,
+              // src, to the local one, local_dst.
+              for (unsigned int comp=0; comp<n_components; ++comp)
+                local_src_number[comp][ind_local] =
+                  internal::vector_access (*src[comp], *dof_indices);
+              ++ind_local;
+              while (ind_local % VectorizedArray<Number>::n_array_elements >= n_irreg_components_filled)
+                {
+                  for (unsigned int comp=0; comp<n_components; ++comp)
+                    local_src_number[comp][ind_local] = 0.;
+                  ++ind_local;
+                }
+            }
+        }
+    }
+  else
+    // case with vector-valued finite elements where all components are
+    // included in one single vector. Assumption: first come all entries to
+    // the first component, then all entries to the second one, and so
+    // on. This is ensured by the way MatrixFree reads out the indices.
+    {
+      internal::check_vector_compatibility (*src[0], *dof_info);
+      Assert (n_fe_components == n_components_, ExcNotImplemented());
+      const unsigned int n_local_dofs =
+        dofs_per_cell * VectorizedArray<Number>::n_array_elements * n_components;
+      Number *local_src_number = &values_dofs[0][0][0];
+      if (at_irregular_cell == false)
+        {
+          for (unsigned int j=0; j<n_local_dofs; ++j)
+            local_src_number[j] =
+              internal::vector_access (*src[0], dof_indices[j]);
+        }
+
+      // non-standard case: need to fill in zeros for those components that
+      // are not present (a bit more expensive), but there is not more than
+      // one such cell
+      else
+        {
+          Assert (n_irreg_components_filled > 0, ExcInternalError());
+          for (unsigned int ind_local=0; ind_local<n_local_dofs; ++dof_indices)
+            {
+              // non-constrained case: copy the data from the global vector,
+              // src, to the local one, local_dst.
+              local_src_number[ind_local] =
+                internal::vector_access (*src[0], *dof_indices);
+              ++ind_local;
+              while (ind_local % VectorizedArray<Number>::n_array_elements >= n_irreg_components_filled)
+                {
+                  local_src_number[ind_local] = 0.;
+                  ++ind_local;
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+  dof_values_initialized = true;
+#endif
+}
+
+
+
+
+/*------------------------------ access to data fields ----------------------*/
+
+template <int dim, int n_components, typename Number>
+inline
+const std::vector<unsigned int> &
+FEEvaluationBase<dim,n_components,Number>::
+get_internal_dof_numbering() const
+{
+  return data->lexicographic_numbering;
+}
+
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+const VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_dof_values () const
+{
+  return &values_dofs[0][0];
+}
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_dof_values ()
+{
+#ifdef DEBUG
+  dof_values_initialized = true;
+#endif
+  return &values_dofs[0][0];
+}
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+const VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_values () const
+{
+  Assert (values_quad_initialized || values_quad_submitted,
+          ExcNotInitialized());
+  return &values_quad[0][0];
+}
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_values ()
+{
+#ifdef DEBUG
+  values_quad_submitted = true;
+#endif
+  return &values_quad[0][0];
+}
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+const VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_gradients () const
+{
+  Assert (gradients_quad_initialized || gradients_quad_submitted,
+          ExcNotInitialized());
+  return &gradients_quad[0][0][0];
+}
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_gradients ()
+{
+#ifdef DEBUG
+  gradients_quad_submitted = true;
+#endif
+  return &gradients_quad[0][0][0];
+}
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+const VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_hessians () const
+{
+  Assert (hessians_quad_initialized, ExcNotInitialized());
+  return &hessians_quad[0][0][0];
+}
+
+
+
+template <int dim, int n_components, typename Number>
+inline
+VectorizedArray<Number> *
+FEEvaluationBase<dim,n_components,Number>::
+begin_hessians ()
+{
+  return &hessians_quad[0][0][0];
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+Tensor<1,n_components_,VectorizedArray<Number> >
+FEEvaluationBase<dim,n_components_,Number>
+::get_dof_value (const unsigned int dof) const
+{
+  AssertIndexRange (dof, this->data->dofs_per_cell);
+  Tensor<1,n_components_,VectorizedArray<Number> > return_value;
+  for (unsigned int comp=0; comp<n_components; comp++)
+    return_value[comp] = this->values_dofs[comp][dof];
+  return return_value;
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+Tensor<1,n_components_,VectorizedArray<Number> >
+FEEvaluationBase<dim,n_components_,Number>
+::get_value (const unsigned int q_point) const
+{
+  Assert (this->values_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  Tensor<1,n_components_,VectorizedArray<Number> > return_value;
+  for (unsigned int comp=0; comp<n_components; comp++)
+    return_value[comp] = this->values_quad[comp][q_point];
+  return return_value;
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+Tensor<1,n_components_,Tensor<1,dim,VectorizedArray<Number> > >
+FEEvaluationBase<dim,n_components_,Number>
+::get_gradient (const unsigned int q_point) const
+{
+  Assert (this->gradients_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+
+  Tensor<1,n_components_,Tensor<1,dim,VectorizedArray<Number> > > grad_out;
+
+  // Cartesian cell
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      for (unsigned int comp=0; comp<n_components; comp++)
+        for (unsigned int d=0; d<dim; ++d)
+          grad_out[comp][d] = (this->gradients_quad[comp][d][q_point] *
+                               cartesian_data[0][d]);
+    }
+  // cell with general/affine Jacobian
+  else
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        jacobian[q_point] : jacobian[0];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        {
+          for (unsigned int d=0; d<dim; ++d)
+            {
+              grad_out[comp][d] = (jac[d][0] *
+                                   this->gradients_quad[comp][0][q_point]);
+              for (unsigned int e=1; e<dim; ++e)
+                grad_out[comp][d] += (jac[d][e] *
+                                      this->gradients_quad[comp][e][q_point]);
+            }
+        }
+    }
+  return grad_out;
+}
+
+
+
+namespace internal
+{
+  // compute tmp = hess_unit(u) * J^T. do this manually because we do not
+  // store the lower diagonal because of symmetry
+  template <typename Number>
+  inline
+  void
+  hessian_unit_times_jac (const Tensor<2,1,VectorizedArray<Number> > &jac,
+                          const VectorizedArray<Number> *const hessians_quad[1],
+                          const unsigned int             q_point,
+                          VectorizedArray<Number>       (&tmp)[1][1])
+  {
+    tmp[0][0] = jac[0][0] * hessians_quad[0][q_point];
+  }
+
+  template <typename Number>
+  inline
+  void
+  hessian_unit_times_jac (const Tensor<2,2,VectorizedArray<Number> > &jac,
+                          const VectorizedArray<Number> *const hessians_quad[3],
+                          const unsigned int             q_point,
+                          VectorizedArray<Number>       (&tmp)[2][2])
+  {
+    for (unsigned int d=0; d<2; ++d)
+      {
+        tmp[0][d] = (jac[d][0] * hessians_quad[0][q_point] +
+                     jac[d][1] * hessians_quad[2][q_point]);
+        tmp[1][d] = (jac[d][0] * hessians_quad[2][q_point] +
+                     jac[d][1] * hessians_quad[1][q_point]);
+      }
+  }
+
+  template <typename Number>
+  inline
+  void
+  hessian_unit_times_jac (const Tensor<2,3,VectorizedArray<Number> > &jac,
+                          const VectorizedArray<Number> *const hessians_quad[6],
+                          const unsigned int             q_point,
+                          VectorizedArray<Number>       (&tmp)[3][3])
+  {
+    for (unsigned int d=0; d<3; ++d)
+      {
+        tmp[0][d] = (jac[d][0] * hessians_quad[0][q_point] +
+                     jac[d][1] * hessians_quad[3][q_point] +
+                     jac[d][2] * hessians_quad[4][q_point]);
+        tmp[1][d] = (jac[d][0] * hessians_quad[3][q_point] +
+                     jac[d][1] * hessians_quad[1][q_point] +
+                     jac[d][2] * hessians_quad[5][q_point]);
+        tmp[2][d] = (jac[d][0] * hessians_quad[4][q_point] +
+                     jac[d][1] * hessians_quad[5][q_point] +
+                     jac[d][2] * hessians_quad[2][q_point]);
+      }
+  }
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+Tensor<1,n_components_,Tensor<2,dim,VectorizedArray<Number> > >
+FEEvaluationBase<dim,n_components_,Number>
+::get_hessian (const unsigned int q_point) const
+{
+  Assert (this->hessians_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+
+  Tensor<2,dim,VectorizedArray<Number> > hessian_out [n_components];
+
+  // Cartesian cell
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      const Tensor<1,dim,VectorizedArray<Number> > &jac = cartesian_data[0];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        for (unsigned int d=0; d<dim; ++d)
+          {
+            hessian_out[comp][d][d] = (this->hessians_quad[comp][d][q_point] *
+                                       jac[d] * jac[d]);
+            switch (dim)
+              {
+              case 1:
+                break;
+              case 2:
+                hessian_out[comp][0][1] = (this->hessians_quad[comp][2][q_point] *
+                                           jac[0] * jac[1]);
+                break;
+              case 3:
+                hessian_out[comp][0][1] = (this->hessians_quad[comp][3][q_point] *
+                                           jac[0] * jac[1]);
+                hessian_out[comp][0][2] = (this->hessians_quad[comp][4][q_point] *
+                                           jac[0] * jac[2]);
+                hessian_out[comp][1][2] = (this->hessians_quad[comp][5][q_point] *
+                                           jac[1] * jac[2]);
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+            for (unsigned int e=d+1; e<dim; ++e)
+              hessian_out[comp][e][d] = hessian_out[comp][d][e];
+          }
+    }
+  // cell with general Jacobian
+  else if (this->cell_type == internal::MatrixFreeFunctions::general)
+    {
+      Assert (this->mapping_info->second_derivatives_initialized == true,
+              ExcNotInitialized());
+      const Tensor<2,dim,VectorizedArray<Number> > &jac = jacobian[q_point];
+      const Tensor<2,dim,VectorizedArray<Number> > &jac_grad = jacobian_grad[q_point];
+      const Tensor<1,(dim>1?dim*(dim-1)/2:1),
+            Tensor<1,dim,VectorizedArray<Number> > >
+            & jac_grad_UT = jacobian_grad_upper[q_point];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        {
+          // compute laplacian before the gradient because it needs to access
+          // unscaled gradient data
+          VectorizedArray<Number> tmp[dim][dim];
+          internal::hessian_unit_times_jac (jac, this->hessians_quad[comp],
+                                            q_point, tmp);
+
+          // compute first part of hessian, J * tmp = J * hess_unit(u) * J^T
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int e=d; e<dim; ++e)
+              {
+                hessian_out[comp][d][e] = jac[d][0] * tmp[0][e];
+                for (unsigned int f=1; f<dim; ++f)
+                  hessian_out[comp][d][e] += jac[d][f] * tmp[f][e];
+              }
+
+          // add diagonal part of J' * grad(u)
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int e=0; e<dim; ++e)
+              hessian_out[comp][d][d] += (jac_grad[d][e] *
+                                          this->gradients_quad[comp][e][q_point]);
+
+          // add off-diagonal part of J' * grad(u)
+          for (unsigned int d=0, count=0; d<dim; ++d)
+            for (unsigned int e=d+1; e<dim; ++e, ++count)
+              for (unsigned int f=0; f<dim; ++f)
+                hessian_out[comp][d][e] += (jac_grad_UT[count][f] *
+                                            this->gradients_quad[comp][f][q_point]);
+
+          // take symmetric part
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int e=d+1; e<dim; ++e)
+              hessian_out[comp][e][d] = hessian_out[comp][d][e];
+        }
+    }
+  // cell with general Jacobian, but constant within the cell
+  else // if (this->cell_type == internal::MatrixFreeFunctions::affine)
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac = jacobian[0];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        {
+          // compute laplacian before the gradient because it needs to access
+          // unscaled gradient data
+          VectorizedArray<Number> tmp[dim][dim];
+          internal::hessian_unit_times_jac (jac, this->hessians_quad[comp],
+                                            q_point, tmp);
+
+          // compute first part of hessian, J * tmp = J * hess_unit(u) * J^T
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int e=d; e<dim; ++e)
+              {
+                hessian_out[comp][d][e] = jac[d][0] * tmp[0][e];
+                for (unsigned int f=1; f<dim; ++f)
+                  hessian_out[comp][d][e] += jac[d][f] * tmp[f][e];
+              }
+
+          // no J' * grad(u) part here because the Jacobian is constant
+          // throughout the cell and hence, its derivative is zero
+
+          // take symmetric part
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int e=d+1; e<dim; ++e)
+              hessian_out[comp][e][d] = hessian_out[comp][d][e];
+        }
+    }
+  return Tensor<1,n_components_,Tensor<2,dim,VectorizedArray<Number> > >(hessian_out);
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+Tensor<1,n_components_,Tensor<1,dim,VectorizedArray<Number> > >
+FEEvaluationBase<dim,n_components_,Number>
+::get_hessian_diagonal (const unsigned int q_point) const
+{
+  Assert (this->hessians_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+
+  Tensor<1,n_components_,Tensor<1,dim,VectorizedArray<Number> > > hessian_out;
+
+  // Cartesian cell
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      const Tensor<1,dim,VectorizedArray<Number> > &jac = cartesian_data[0];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        for (unsigned int d=0; d<dim; ++d)
+          hessian_out[comp][d] = (this->hessians_quad[comp][d][q_point] *
+                                  jac[d] * jac[d]);
+    }
+  // cell with general Jacobian
+  else if (this->cell_type == internal::MatrixFreeFunctions::general)
+    {
+      Assert (this->mapping_info->second_derivatives_initialized == true,
+              ExcNotInitialized());
+      const Tensor<2,dim,VectorizedArray<Number> > &jac = jacobian[q_point];
+      const Tensor<2,dim,VectorizedArray<Number> > &jac_grad = jacobian_grad[q_point];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        {
+          // compute laplacian before the gradient because it needs to access
+          // unscaled gradient data
+          VectorizedArray<Number> tmp[dim][dim];
+          internal::hessian_unit_times_jac (jac, this->hessians_quad[comp],
+                                            q_point, tmp);
+
+          // compute only the trace part of hessian, J * tmp = J *
+          // hess_unit(u) * J^T
+          for (unsigned int d=0; d<dim; ++d)
+            {
+              hessian_out[comp][d] = jac[d][0] * tmp[0][d];
+              for (unsigned int f=1; f<dim; ++f)
+                hessian_out[comp][d] += jac[d][f] * tmp[f][d];
+            }
+
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int e=0; e<dim; ++e)
+              hessian_out[comp][d] += (jac_grad[d][e] *
+                                       this->gradients_quad[comp][e][q_point]);
+        }
+    }
+  // cell with general Jacobian, but constant within the cell
+  else // if (this->cell_type == internal::MatrixFreeFunctions::affine)
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac = jacobian[0];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        {
+          // compute laplacian before the gradient because it needs to access
+          // unscaled gradient data
+          VectorizedArray<Number> tmp[dim][dim];
+          internal::hessian_unit_times_jac (jac, this->hessians_quad[comp],
+                                            q_point, tmp);
+
+          // compute only the trace part of hessian, J * tmp = J *
+          // hess_unit(u) * J^T
+          for (unsigned int d=0; d<dim; ++d)
+            {
+              hessian_out[comp][d] = jac[d][0] * tmp[0][d];
+              for (unsigned int f=1; f<dim; ++f)
+                hessian_out[comp][d] += jac[d][f] * tmp[f][d];
+            }
+        }
+    }
+  return hessian_out;
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+Tensor<1,n_components_,VectorizedArray<Number> >
+FEEvaluationBase<dim,n_components_,Number>
+::get_laplacian (const unsigned int q_point) const
+{
+  Assert (this->hessians_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  Tensor<1,n_components_,VectorizedArray<Number> > laplacian_out;
+  const Tensor<1,n_components_,Tensor<1,dim,VectorizedArray<Number> > > hess_diag
+    = get_hessian_diagonal(q_point);
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    {
+      laplacian_out[comp] = hess_diag[comp][0];
+      for (unsigned int d=1; d<dim; ++d)
+        laplacian_out[comp] += hess_diag[comp][d];
+    }
+  return laplacian_out;
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::submit_dof_value (const Tensor<1,n_components_,VectorizedArray<Number> > val_in,
+                    const unsigned int dof)
+{
+#ifdef DEBUG
+  this->dof_values_initialized = true;
+#endif
+  AssertIndexRange (dof, this->data->dofs_per_cell);
+  for (unsigned int comp=0; comp<n_components; comp++)
+    this->values_dofs[comp][dof] = val_in[comp];
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::submit_value (const Tensor<1,n_components_,VectorizedArray<Number> > val_in,
+                const unsigned int q_point)
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->values_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::general)
+    {
+      const VectorizedArray<Number> JxW = J_value[q_point];
+      for (unsigned int comp=0; comp<n_components; ++comp)
+        this->values_quad[comp][q_point] = val_in[comp] * JxW;
+    }
+  else //if (this->cell_type < internal::MatrixFreeFunctions::general)
+    {
+      const VectorizedArray<Number> JxW = J_value[0] * quadrature_weights[q_point];
+      for (unsigned int comp=0; comp<n_components; ++comp)
+        this->values_quad[comp][q_point] = val_in[comp] * JxW;
+    }
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+void
+FEEvaluationBase<dim,n_components_,Number>
+::submit_gradient (const Tensor<1,n_components_,
+                   Tensor<1,dim,VectorizedArray<Number> > >grad_in,
+                   const unsigned int q_point)
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->gradients_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      const VectorizedArray<Number> JxW = J_value[0] * quadrature_weights[q_point];
+      for (unsigned int comp=0; comp<n_components; comp++)
+        for (unsigned int d=0; d<dim; ++d)
+          this->gradients_quad[comp][d][q_point] = (grad_in[comp][d] *
+                                                    cartesian_data[0][d] * JxW);
+    }
+  else
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        jacobian[q_point] : jacobian[0];
+      const VectorizedArray<Number> JxW =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        J_value[q_point] : J_value[0] * quadrature_weights[q_point];
+      for (unsigned int comp=0; comp<n_components; ++comp)
+        for (unsigned int d=0; d<dim; ++d)
+          {
+            VectorizedArray<Number> new_val = jac[0][d] * grad_in[comp][0];
+            for (unsigned int e=1; e<dim; ++e)
+              new_val += (jac[e][d] * grad_in[comp][e]);
+            this->gradients_quad[comp][d][q_point] = new_val * JxW;
+          }
+    }
+}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+Tensor<1,n_components_,VectorizedArray<Number> >
+FEEvaluationBase<dim,n_components_,Number>
+::integrate_value () const
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  Assert (this->values_quad_submitted == true,
+          internal::ExcAccessToUninitializedField());
+#endif
+  Tensor<1,n_components_,VectorizedArray<Number> > return_value;
+  for (unsigned int comp=0; comp<n_components; ++comp)
+    return_value[comp] = this->values_quad[comp][0];
+  const unsigned int n_q_points = this->data->n_q_points;
+  for (unsigned int q=1; q<n_q_points; ++q)
+    for (unsigned int comp=0; comp<n_components; ++comp)
+      return_value[comp] += this->values_quad[comp][q];
+  return (return_value);
+}
+
+
+
+/*----------------------- FEEvaluationAccess --------------------------------*/
+
+
+template <int dim, int n_components_, typename Number>
+inline
+FEEvaluationAccess<dim,n_components_,Number>
+::FEEvaluationAccess (const MatrixFree<dim,Number> &data_in,
+                      const unsigned int fe_no,
+                      const unsigned int quad_no_in,
+                      const unsigned int fe_degree,
+                      const unsigned int n_q_points)
+  :
+  FEEvaluationBase <dim,n_components_,Number>
+  (data_in, fe_no, quad_no_in, fe_degree, n_q_points)
+{}
+
+
+
+template <int dim, int n_components_, typename Number>
+template <int n_components_other>
+inline
+FEEvaluationAccess<dim,n_components_,Number>
+::FEEvaluationAccess (const Mapping<dim>       &mapping,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<1>      &quadrature,
+                      const UpdateFlags         update_flags,
+                      const unsigned int        first_selected_component,
+                      const FEEvaluationBase<dim,n_components_other,Number> *other)
+  :
+  FEEvaluationBase <dim,n_components_,Number>(mapping, fe, quadrature, update_flags,
+                                              first_selected_component, other)
+{}
+
+
+
+template <int dim, int n_components_, typename Number>
+inline
+FEEvaluationAccess<dim,n_components_,Number>
+::FEEvaluationAccess (const FEEvaluationAccess<dim,n_components_,Number> &other)
+  :
+  FEEvaluationBase <dim,n_components_,Number>(other)
+{}
+
+
+
+
+/*-------------------- FEEvaluationAccess scalar ----------------------------*/
+
+
+template <int dim, typename Number>
+inline
+FEEvaluationAccess<dim,1,Number>
+::FEEvaluationAccess (const MatrixFree<dim,Number> &data_in,
+                      const unsigned int fe_no,
+                      const unsigned int quad_no_in,
+                      const unsigned int fe_degree,
+                      const unsigned int n_q_points)
+  :
+  FEEvaluationBase <dim,1,Number>
+  (data_in, fe_no, quad_no_in, fe_degree, n_q_points)
+{}
+
+
+
+template <int dim, typename Number>
+template <int n_components_other>
+inline
+FEEvaluationAccess<dim,1,Number>
+::FEEvaluationAccess (const Mapping<dim>       &mapping,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<1>      &quadrature,
+                      const UpdateFlags         update_flags,
+                      const unsigned int        first_selected_component,
+                      const FEEvaluationBase<dim,n_components_other,Number> *other)
+  :
+  FEEvaluationBase <dim,1,Number> (mapping, fe, quadrature, update_flags,
+                                   first_selected_component, other)
+{}
+
+
+
+template <int dim, typename Number>
+inline
+FEEvaluationAccess<dim,1,Number>
+::FEEvaluationAccess (const FEEvaluationAccess<dim,1,Number> &other)
+  :
+  FEEvaluationBase <dim,1,Number>(other)
+{}
+
+
+
+template <int dim, typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<dim,1,Number>
+::get_dof_value (const unsigned int dof) const
+{
+  AssertIndexRange (dof, this->data->dofs_per_cell);
+  return this->values_dofs[0][dof];
+}
+
+
+
+template <int dim, typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<dim,1,Number>
+::get_value (const unsigned int q_point) const
+{
+  Assert (this->values_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  return this->values_quad[0][q_point];
+}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<1,dim,VectorizedArray<Number> >
+FEEvaluationAccess<dim,1,Number>
+::get_gradient (const unsigned int q_point) const
+{
+  // could use the base class gradient, but that involves too many inefficient
+  // initialization operations on tensors
+
+  Assert (this->gradients_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+
+  Tensor<1,dim,VectorizedArray<Number> > grad_out;
+
+  // Cartesian cell
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      for (unsigned int d=0; d<dim; ++d)
+        grad_out[d] = (this->gradients_quad[0][d][q_point] *
+                       this->cartesian_data[0][d]);
+    }
+  // cell with general/constant Jacobian
+  else
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->jacobian[q_point] : this->jacobian[0];
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          grad_out[d] = (jac[d][0] * this->gradients_quad[0][0][q_point]);
+          for (unsigned int e=1; e<dim; ++e)
+            grad_out[d] += (jac[d][e] * this->gradients_quad[0][e][q_point]);
+        }
+    }
+  return grad_out;
+}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<2,dim,VectorizedArray<Number> >
+FEEvaluationAccess<dim,1,Number>
+::get_hessian (const unsigned int q_point) const
+{
+  return BaseClass::get_hessian(q_point)[0];
+}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<1,dim,VectorizedArray<Number> >
+FEEvaluationAccess<dim,1,Number>
+::get_hessian_diagonal (const unsigned int q_point) const
+{
+  return BaseClass::get_hessian_diagonal(q_point)[0];
+}
+
+
+
+template <int dim, typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<dim,1,Number>
+::get_laplacian (const unsigned int q_point) const
+{
+  return BaseClass::get_laplacian(q_point)[0];
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,1,Number>
+::submit_dof_value (const VectorizedArray<Number> val_in,
+                    const unsigned int dof)
+{
+#ifdef DEBUG
+  this->dof_values_initialized = true;
+  AssertIndexRange (dof, this->data->dofs_per_cell);
+#endif
+  this->values_dofs[0][dof] = val_in;
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,1,Number>
+::submit_value (const VectorizedArray<Number> val_in,
+                const unsigned int q_point)
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->values_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::general)
+    {
+      const VectorizedArray<Number> JxW = this->J_value[q_point];
+      this->values_quad[0][q_point] = val_in * JxW;
+    }
+  else //if (this->cell_type < internal::MatrixFreeFunctions::general)
+    {
+      const VectorizedArray<Number> JxW = this->J_value[0] * this->quadrature_weights[q_point];
+      this->values_quad[0][q_point] = val_in * JxW;
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,1,Number>
+::submit_gradient (const Tensor<1,dim,VectorizedArray<Number> > grad_in,
+                   const unsigned int q_point)
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->gradients_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      const VectorizedArray<Number> JxW = this->J_value[0] * this->quadrature_weights[q_point];
+      for (unsigned int d=0; d<dim; ++d)
+        this->gradients_quad[0][d][q_point] = (grad_in[d] *
+                                               this->cartesian_data[0][d] *
+                                               JxW);
+    }
+  // general/affine cell type
+  else
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->jacobian[q_point] : this->jacobian[0];
+      const VectorizedArray<Number> JxW =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->J_value[q_point] : this->J_value[0] * this->quadrature_weights[q_point];
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          VectorizedArray<Number> new_val = jac[0][d] * grad_in[0];
+          for (unsigned int e=1; e<dim; ++e)
+            new_val += jac[e][d] * grad_in[e];
+          this->gradients_quad[0][d][q_point] = new_val * JxW;
+        }
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<dim,1,Number>
+::integrate_value () const
+{
+  return BaseClass::integrate_value()[0];
+}
+
+
+
+
+/*----------------- FEEvaluationAccess vector-valued ------------------------*/
+
+
+template <int dim, typename Number>
+inline
+FEEvaluationAccess<dim,dim,Number>
+::FEEvaluationAccess (const MatrixFree<dim,Number> &data_in,
+                      const unsigned int fe_no,
+                      const unsigned int quad_no_in,
+                      const unsigned int fe_degree,
+                      const unsigned int n_q_points)
+  :
+  FEEvaluationBase <dim,dim,Number>
+  (data_in, fe_no, quad_no_in, fe_degree, n_q_points)
+{}
+
+
+
+template <int dim, typename Number>
+template <int n_components_other>
+inline
+FEEvaluationAccess<dim,dim,Number>
+::FEEvaluationAccess (const Mapping<dim>       &mapping,
+                      const FiniteElement<dim> &fe,
+                      const Quadrature<1>      &quadrature,
+                      const UpdateFlags         update_flags,
+                      const unsigned int        first_selected_component,
+                      const FEEvaluationBase<dim,n_components_other,Number> *other)
+  :
+  FEEvaluationBase <dim,dim,Number> (mapping, fe, quadrature, update_flags,
+                                     first_selected_component, other)
+{}
+
+
+
+template <int dim, typename Number>
+inline
+FEEvaluationAccess<dim,dim,Number>
+::FEEvaluationAccess (const FEEvaluationAccess<dim,dim,Number> &other)
+  :
+  FEEvaluationBase <dim,dim,Number>(other)
+{}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<2,dim,VectorizedArray<Number> >
+FEEvaluationAccess<dim,dim,Number>
+::get_gradient (const unsigned int q_point) const
+{
+  return BaseClass::get_gradient (q_point);
+}
+
+
+
+template <int dim, typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<dim,dim,Number>
+::get_divergence (const unsigned int q_point) const
+{
+  Assert (this->gradients_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+
+  VectorizedArray<Number> divergence;
+
+  // Cartesian cell
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      divergence = (this->gradients_quad[0][0][q_point] *
+                    this->cartesian_data[0][0]);
+      for (unsigned int d=1; d<dim; ++d)
+        divergence += (this->gradients_quad[d][d][q_point] *
+                       this->cartesian_data[0][d]);
+    }
+  // cell with general/constant Jacobian
+  else
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->jacobian[q_point] : this->jacobian[0];
+      divergence = (jac[0][0] * this->gradients_quad[0][0][q_point]);
+      for (unsigned int e=1; e<dim; ++e)
+        divergence += (jac[0][e] * this->gradients_quad[0][e][q_point]);
+      for (unsigned int d=1; d<dim; ++d)
+        for (unsigned int e=0; e<dim; ++e)
+          divergence += (jac[d][e] * this->gradients_quad[d][e][q_point]);
+    }
+  return divergence;
+}
+
+
+
+template <int dim, typename Number>
+inline
+SymmetricTensor<2,dim,VectorizedArray<Number> >
+FEEvaluationAccess<dim,dim,Number>
+::get_symmetric_gradient (const unsigned int q_point) const
+{
+  // copy from generic function into dim-specialization function
+  const Tensor<2,dim,VectorizedArray<Number> > grad = get_gradient(q_point);
+  VectorizedArray<Number> symmetrized [(dim*dim+dim)/2];
+  VectorizedArray<Number> half = make_vectorized_array (0.5);
+  for (unsigned int d=0; d<dim; ++d)
+    symmetrized[d] = grad[d][d];
+  switch (dim)
+    {
+    case 1:
+      break;
+    case 2:
+      symmetrized[2] = grad[0][1] + grad[1][0];
+      symmetrized[2] *= half;
+      break;
+    case 3:
+      symmetrized[3] = grad[0][1] + grad[1][0];
+      symmetrized[3] *= half;
+      symmetrized[4] = grad[0][2] + grad[2][0];
+      symmetrized[4] *= half;
+      symmetrized[5] = grad[1][2] + grad[2][1];
+      symmetrized[5] *= half;
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return SymmetricTensor<2,dim,VectorizedArray<Number> > (symmetrized);
+}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<1,(dim==2?1:dim),VectorizedArray<Number> >
+FEEvaluationAccess<dim,dim,Number>
+::get_curl (const unsigned int q_point) const
+{
+  // copy from generic function into dim-specialization function
+  const Tensor<2,dim,VectorizedArray<Number> > grad = get_gradient(q_point);
+  Tensor<1,(dim==2?1:dim),VectorizedArray<Number> > curl;
+  switch (dim)
+    {
+    case 1:
+      Assert (false,
+              ExcMessage("Computing the curl in 1d is not a useful operation"));
+      break;
+    case 2:
+      curl[0] = grad[1][0] - grad[0][1];
+      break;
+    case 3:
+      curl[0] = grad[2][1] - grad[1][2];
+      curl[1] = grad[0][2] - grad[2][0];
+      curl[2] = grad[1][0] - grad[0][1];
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return curl;
+}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<2,dim,VectorizedArray<Number> >
+FEEvaluationAccess<dim,dim,Number>
+::get_hessian_diagonal (const unsigned int q_point) const
+{
+  Assert (this->hessians_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+
+  return BaseClass::get_hessian_diagonal (q_point);
+}
+
+
+
+template <int dim, typename Number>
+inline
+Tensor<3,dim,VectorizedArray<Number> >
+FEEvaluationAccess<dim,dim,Number>
+::get_hessian (const unsigned int q_point) const
+{
+  Assert (this->hessians_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  return BaseClass::get_hessian(q_point);
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,dim,Number>
+::submit_gradient (const Tensor<2,dim,VectorizedArray<Number> > grad_in,
+                   const unsigned int q_point)
+{
+  BaseClass::submit_gradient (grad_in, q_point);
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,dim,Number>
+::submit_gradient (const Tensor<1,dim,Tensor<1,dim,VectorizedArray<Number> > >
+                   grad_in,
+                   const unsigned int q_point)
+{
+  BaseClass::submit_gradient(grad_in, q_point);
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,dim,Number>
+::submit_divergence (const VectorizedArray<Number> div_in,
+                     const unsigned int q_point)
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->gradients_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      const VectorizedArray<Number> fac = this->J_value[0] *
+                                          this->quadrature_weights[q_point] * div_in;
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          this->gradients_quad[d][d][q_point] = (fac *
+                                                 this->cartesian_data[0][d]);
+          for (unsigned int e=d+1; e<dim; ++e)
+            {
+              this->gradients_quad[d][e][q_point] = VectorizedArray<Number>();
+              this->gradients_quad[e][d][q_point] = VectorizedArray<Number>();
+            }
+        }
+    }
+  else
+    {
+      const Tensor<2,dim,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->jacobian[q_point] : this->jacobian[0];
+      const VectorizedArray<Number> fac =
+        (this->cell_type == internal::MatrixFreeFunctions::general ?
+         this->J_value[q_point] : this->J_value[0] *
+         this->quadrature_weights[q_point]) * div_in;
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          for (unsigned int e=0; e<dim; ++e)
+            this->gradients_quad[d][e][q_point] = jac[d][e] * fac;
+        }
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,dim,Number>
+::submit_symmetric_gradient(const SymmetricTensor<2,dim,VectorizedArray<Number> >
+                            sym_grad,
+                            const unsigned int q_point)
+{
+  // could have used base class operator, but that involves some overhead
+  // which is inefficient. it is nice to have the symmetric tensor because
+  // that saves some operations
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->gradients_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      const VectorizedArray<Number> JxW = this->J_value[0] * this->quadrature_weights[q_point];
+      for (unsigned int d=0; d<dim; ++d)
+        this->gradients_quad[d][d][q_point] = (sym_grad.access_raw_entry(d) *
+                                               JxW *
+                                               this->cartesian_data[0][d]);
+      for (unsigned int e=0, counter=dim; e<dim; ++e)
+        for (unsigned int d=e+1; d<dim; ++d, ++counter)
+          {
+            const VectorizedArray<Number> value = sym_grad.access_raw_entry(counter) * JxW;
+            this->gradients_quad[e][d][q_point] = (value *
+                                                   this->cartesian_data[0][d]);
+            this->gradients_quad[d][e][q_point] = (value *
+                                                   this->cartesian_data[0][e]);
+          }
+    }
+  // general/affine cell type
+  else
+    {
+      const VectorizedArray<Number> JxW =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->J_value[q_point] : this->J_value[0] * this->quadrature_weights[q_point];
+      const Tensor<2,dim,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->jacobian[q_point] : this->jacobian[0];
+      VectorizedArray<Number> weighted [dim][dim];
+      for (unsigned int i=0; i<dim; ++i)
+        weighted[i][i] = sym_grad.access_raw_entry(i) * JxW;
+      for (unsigned int i=0, counter=dim; i<dim; ++i)
+        for (unsigned int j=i+1; j<dim; ++j, ++counter)
+          {
+            const VectorizedArray<Number> value = sym_grad.access_raw_entry(counter) * JxW;
+            weighted[i][j] = value;
+            weighted[j][i] = value;
+          }
+      for (unsigned int comp=0; comp<dim; ++comp)
+        for (unsigned int d=0; d<dim; ++d)
+          {
+            VectorizedArray<Number> new_val = jac[0][d] * weighted[comp][0];
+            for (unsigned int e=1; e<dim; ++e)
+              new_val += jac[e][d] * weighted[comp][e];
+            this->gradients_quad[comp][d][q_point] = new_val;
+          }
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+FEEvaluationAccess<dim,dim,Number>
+::submit_curl (const Tensor<1,dim==2?1:dim,VectorizedArray<Number> > curl,
+               const unsigned int q_point)
+{
+  Tensor<2,dim,VectorizedArray<Number> > grad;
+  switch (dim)
+    {
+    case 1:
+      Assert (false,
+              ExcMessage("Testing by the curl in 1d is not a useful operation"));
+      break;
+    case 2:
+      grad[1][0] = curl[0];
+      grad[0][1] = -curl[0];
+      break;
+    case 3:
+      grad[2][1] = curl[0];
+      grad[1][2] = -curl[0];
+      grad[0][2] = curl[1];
+      grad[2][0] = -curl[1];
+      grad[1][0] = curl[2];
+      grad[0][1] = -curl[2];
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  submit_gradient (grad, q_point);
+}
+
+
+/*-------------------- FEEvaluationAccess scalar for 1d ----------------------------*/
+
+
+template <typename Number>
+inline
+FEEvaluationAccess<1,1,Number>
+::FEEvaluationAccess (const MatrixFree<1,Number> &data_in,
+                      const unsigned int fe_no,
+                      const unsigned int quad_no_in,
+                      const unsigned int fe_degree,
+                      const unsigned int n_q_points)
+  :
+  FEEvaluationBase <1,1,Number>
+  (data_in, fe_no, quad_no_in, fe_degree, n_q_points)
+{}
+
+
+
+template <typename Number>
+template <int n_components_other>
+inline
+FEEvaluationAccess<1,1,Number>
+::FEEvaluationAccess (const Mapping<1>       &mapping,
+                      const FiniteElement<1> &fe,
+                      const Quadrature<1>    &quadrature,
+                      const UpdateFlags       update_flags,
+                      const unsigned int      first_selected_component,
+                      const FEEvaluationBase<1,n_components_other,Number> *other)
+  :
+  FEEvaluationBase <1,1,Number> (mapping, fe, quadrature, update_flags,
+                                 first_selected_component, other)
+{}
+
+
+
+template <typename Number>
+inline
+FEEvaluationAccess<1,1,Number>
+::FEEvaluationAccess (const FEEvaluationAccess<1,1,Number> &other)
+  :
+  FEEvaluationBase <1,1,Number>(other)
+{}
+
+
+
+template <typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<1,1,Number>
+::get_dof_value (const unsigned int dof) const
+{
+  AssertIndexRange (dof, this->data->dofs_per_cell);
+  return this->values_dofs[0][dof];
+}
+
+
+
+template <typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<1,1,Number>
+::get_value (const unsigned int q_point) const
+{
+  Assert (this->values_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  return this->values_quad[0][q_point];
+}
+
+
+
+template <typename Number>
+inline
+Tensor<1,1,VectorizedArray<Number> >
+FEEvaluationAccess<1,1,Number>
+::get_gradient (const unsigned int q_point) const
+{
+  // could use the base class gradient, but that involves too many inefficient
+  // initialization operations on tensors
+
+  Assert (this->gradients_quad_initialized==true,
+          internal::ExcAccessToUninitializedField());
+  AssertIndexRange (q_point, this->data->n_q_points);
+
+  Tensor<1,1,VectorizedArray<Number> > grad_out;
+
+  // Cartesian cell
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      grad_out[0] = (this->gradients_quad[0][0][q_point] *
+                     this->cartesian_data[0][0]);
+    }
+  // cell with general/constant Jacobian
+  else
+    {
+      const Tensor<2,1,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->jacobian[q_point] : this->jacobian[0];
+
+      grad_out[0] = (jac[0][0] * this->gradients_quad[0][0][q_point]);
+    }
+  return grad_out;
+}
+
+
+
+template <typename Number>
+inline
+Tensor<2,1,VectorizedArray<Number> >
+FEEvaluationAccess<1,1,Number>
+::get_hessian (const unsigned int q_point) const
+{
+  return BaseClass::get_hessian(q_point)[0];
+}
+
+
+
+template <typename Number>
+inline
+Tensor<1,1,VectorizedArray<Number> >
+FEEvaluationAccess<1,1,Number>
+::get_hessian_diagonal (const unsigned int q_point) const
+{
+  return BaseClass::get_hessian_diagonal(q_point)[0];
+}
+
+
+
+template <typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<1,1,Number>
+::get_laplacian (const unsigned int q_point) const
+{
+  return BaseClass::get_laplacian(q_point)[0];
+}
+
+
+
+template <typename Number>
+inline
+void
+FEEvaluationAccess<1,1,Number>
+::submit_dof_value (const VectorizedArray<Number> val_in,
+                    const unsigned int dof)
+{
+#ifdef DEBUG
+  this->dof_values_initialized = true;
+  AssertIndexRange (dof, this->data->dofs_per_cell);
+#endif
+  this->values_dofs[0][dof] = val_in;
+}
+
+
+
+template <typename Number>
+inline
+void
+FEEvaluationAccess<1,1,Number>
+::submit_value (const VectorizedArray<Number> val_in,
+                const unsigned int q_point)
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->values_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::general)
+    {
+      const VectorizedArray<Number> JxW = this->J_value[q_point];
+      this->values_quad[0][q_point] = val_in * JxW;
+    }
+  else //if (this->cell_type < internal::MatrixFreeFunctions::general)
+    {
+      const VectorizedArray<Number> JxW = this->J_value[0] * this->quadrature_weights[q_point];
+      this->values_quad[0][q_point] = val_in * JxW;
+    }
+}
+
+
+
+template <typename Number>
+inline
+void
+FEEvaluationAccess<1,1,Number>
+::submit_gradient (const Tensor<1,1,VectorizedArray<Number> > grad_in,
+                   const unsigned int q_point)
+{
+#ifdef DEBUG
+  Assert (this->cell != numbers::invalid_unsigned_int, ExcNotInitialized());
+  AssertIndexRange (q_point, this->data->n_q_points);
+  this->gradients_quad_submitted = true;
+#endif
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      const VectorizedArray<Number> JxW = this->J_value[0] * this->quadrature_weights[q_point];
+      this->gradients_quad[0][0][q_point] = (grad_in[0] *
+                                             this->cartesian_data[0][0] *
+                                             JxW);
+    }
+  // general/affine cell type
+  else
+    {
+      const Tensor<2,1,VectorizedArray<Number> > &jac =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->jacobian[q_point] : this->jacobian[0];
+      const VectorizedArray<Number> JxW =
+        this->cell_type == internal::MatrixFreeFunctions::general ?
+        this->J_value[q_point] : this->J_value[0] * this->quadrature_weights[q_point];
+
+      this->gradients_quad[0][0][q_point] = jac[0][0] * grad_in[0] * JxW;
+    }
+}
+
+
+
+template <typename Number>
+inline
+VectorizedArray<Number>
+FEEvaluationAccess<1,1,Number>
+::integrate_value () const
+{
+  return BaseClass::integrate_value()[0];
+}
+
+
+
+namespace internal
+{
+  /**
+   * In this namespace, the evaluator routines that evaluate the tensor
+   * products are implemented.
+   */
+  enum EvaluatorVariant
+  {
+    evaluate_general,
+    evaluate_symmetric,
+    evaluate_evenodd
+  };
+
+  /**
+   * Generic evaluator framework
+   */
+  template <EvaluatorVariant variant, int dim, int fe_degree, int n_q_points_1d,
+            typename Number>
+  struct EvaluatorTensorProduct
+  {};
+
+  /**
+   * Internal evaluator for 1d-3d shape function using the tensor product form
+   * of the basis functions
+   */
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  struct EvaluatorTensorProduct<evaluate_general,dim,fe_degree,n_q_points_1d,Number>
+  {
+    static const unsigned int dofs_per_cell = Utilities::fixed_int_power<fe_degree+1,dim>::value;
+    static const unsigned int n_q_points = Utilities::fixed_int_power<n_q_points_1d,dim>::value;
+
+    /**
+     * Empty constructor. Does nothing. Be careful when using 'values' and
+     * related methods because they need to be filled with the other pointer
+     */
+    EvaluatorTensorProduct ()
+      :
+      shape_values (0),
+      shape_gradients (0),
+      shape_hessians (0)
+    {}
+
+    /**
+     * Constructor, taking the data from ShapeInfo
+     */
+    EvaluatorTensorProduct (const AlignedVector<Number> &shape_values,
+                            const AlignedVector<Number> &shape_gradients,
+                            const AlignedVector<Number> &shape_hessians)
+      :
+      shape_values (shape_values.begin()),
+      shape_gradients (shape_gradients.begin()),
+      shape_hessians (shape_hessians.begin())
+    {}
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    values (const Number in [],
+            Number       out[]) const
+    {
+      apply<direction,dof_to_quad,add>(shape_values, in, out);
+    }
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    gradients (const Number in [],
+               Number       out[]) const
+    {
+      apply<direction,dof_to_quad,add>(shape_gradients, in, out);
+    }
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    hessians (const Number in [],
+              Number       out[]) const
+    {
+      apply<direction,dof_to_quad,add>(shape_hessians, in, out);
+    }
+
+    template <int direction, bool dof_to_quad, bool add>
+    static void apply (const Number *shape_data,
+                       const Number in [],
+                       Number       out []);
+
+    const Number *shape_values;
+    const Number *shape_gradients;
+    const Number *shape_hessians;
+  };
+
+  // evaluates the given shape data in 1d-3d using the tensor product
+  // form. does not use a particular layout of entries in the matrices
+  // like the functions below and corresponds to a usual matrix-matrix
+  // product
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  template <int direction, bool dof_to_quad, bool add>
+  inline
+  void
+  EvaluatorTensorProduct<evaluate_general,dim,fe_degree,n_q_points_1d,Number>
+  ::apply (const Number *shape_data,
+           const Number in [],
+           Number       out [])
+  {
+    AssertIndexRange (direction, dim);
+    const int mm     = dof_to_quad ? (fe_degree+1) : n_q_points_1d,
+              nn     = dof_to_quad ? n_q_points_1d : (fe_degree+1);
+
+    const int n_blocks1 = (dim > 1 ? (direction > 0 ? nn : mm) : 1);
+    const int n_blocks2 = (dim > 2 ? (direction > 1 ? nn : mm) : 1);
+    const int stride    = Utilities::fixed_int_power<nn,direction>::value;
+
+    for (int i2=0; i2<n_blocks2; ++i2)
+      {
+        for (int i1=0; i1<n_blocks1; ++i1)
+          {
+            for (int col=0; col<nn; ++col)
+              {
+                Number val0;
+                if (dof_to_quad == true)
+                  val0 = shape_data[col];
+                else
+                  val0 = shape_data[col*n_q_points_1d];
+                Number res0 = val0 * in[0];
+                for (int ind=1; ind<mm; ++ind)
+                  {
+                    if (dof_to_quad == true)
+                      val0 = shape_data[ind*n_q_points_1d+col];
+                    else
+                      val0 = shape_data[col*n_q_points_1d+ind];
+                    res0 += val0 * in[stride*ind];
+                  }
+                if (add == false)
+                  out[stride*col]  = res0;
+                else
+                  out[stride*col] += res0;
+              }
+
+            // increment: in regular case, just go to the next point in
+            // x-direction. If we are at the end of one chunk in x-dir, need
+            // to jump over to the next layer in z-direction
+            switch (direction)
+              {
+              case 0:
+                in += mm;
+                out += nn;
+                break;
+              case 1:
+              case 2:
+                ++in;
+                ++out;
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+        if (direction == 1)
+          {
+            in += nn*(mm-1);
+            out += nn*(nn-1);
+          }
+      }
+  }
+
+
+
+  // This method applies the tensor product operation to produce face values
+  // out from cell values. As opposed to the apply_tensor_product method, this
+  // method assumes that the directions orthogonal to the face have
+  // fe_degree+1 degrees of freedom per direction and not n_q_points_1d for
+  // those directions lower than the one currently applied
+  template <int dim, int fe_degree, typename Number, int face_direction,
+            bool dof_to_quad, bool add>
+  inline
+  void
+  apply_tensor_product_face (const Number *shape_data,
+                             const Number in [],
+                             Number       out [])
+  {
+    const int n_blocks1 = dim > 1 ? (fe_degree+1) : 1;
+    const int n_blocks2 = dim > 2 ? (fe_degree+1) : 1;
+
+    AssertIndexRange (face_direction, dim);
+    const int mm     = dof_to_quad ? (fe_degree+1) : 1,
+              nn     = dof_to_quad ? 1 : (fe_degree+1);
+
+    const int stride = Utilities::fixed_int_power<fe_degree+1,face_direction>::value;
+
+    for (int i2=0; i2<n_blocks2; ++i2)
+      {
+        for (int i1=0; i1<n_blocks1; ++i1)
+          {
+            if (dof_to_quad == true)
+              {
+                Number res0 = shape_data[0] * in[0];
+                for (int ind=1; ind<mm; ++ind)
+                  res0 += shape_data[ind] * in[stride*ind];
+                if (add == false)
+                  out[0]  = res0;
+                else
+                  out[0] += res0;
+              }
+            else
+              {
+                for (int col=0; col<nn; ++col)
+                  if (add == false)
+                    out[col*stride]  = shape_data[col] * in[0];
+                  else
+                    out[col*stride] += shape_data[col] * in[0];
+              }
+
+            // increment: in regular case, just go to the next point in
+            // x-direction. If we are at the end of one chunk in x-dir, need
+            // to jump over to the next layer in z-direction
+            switch (face_direction)
+              {
+              case 0:
+                in += mm;
+                out += nn;
+                break;
+              case 1:
+                ++in;
+                ++out;
+                // faces 2 and 3 in 3D use local coordinate system zx, which
+                // is the other way around compared to the tensor
+                // product. Need to take that into account.
+                if (dim == 3)
+                  {
+                    if (dof_to_quad)
+                      out += fe_degree;
+                    else
+                      in += fe_degree;
+                  }
+                break;
+              case 2:
+                ++in;
+                ++out;
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+        if (face_direction == 1 && dim == 3)
+          {
+            in += mm*(mm-1);
+            out += nn*(nn-1);
+            // adjust for local coordinate system zx
+            if (dof_to_quad)
+              out -= (fe_degree+1)*(fe_degree+1)-1;
+            else
+              in -= (fe_degree+1)*(fe_degree+1)-1;
+          }
+      }
+  }
+
+
+
+  // This class specializes the general application of tensor-product based
+  // elements for "symmetric" finite elements, i.e., when the shape functions
+  // are symmetric about 0.5 and the quadrature points are, too.
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  struct EvaluatorTensorProduct<evaluate_symmetric,dim,fe_degree,n_q_points_1d,Number>
+  {
+    static const unsigned int dofs_per_cell = Utilities::fixed_int_power<fe_degree+1,dim>::value;
+    static const unsigned int n_q_points = Utilities::fixed_int_power<n_q_points_1d,dim>::value;
+
+    /**
+     * Constructor, taking the data from ShapeInfo
+     */
+    EvaluatorTensorProduct (const AlignedVector<Number> &shape_values,
+                            const AlignedVector<Number> &shape_gradients,
+                            const AlignedVector<Number> &shape_hessians)
+      :
+      shape_values (shape_values.begin()),
+      shape_gradients (shape_gradients.begin()),
+      shape_hessians (shape_hessians.begin())
+    {}
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    values (const Number in [],
+            Number       out[]) const;
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    gradients (const Number in [],
+               Number       out[]) const;
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    hessians (const Number in [],
+              Number       out[]) const;
+
+    const Number *shape_values;
+    const Number *shape_gradients;
+    const Number *shape_hessians;
+  };
+
+
+
+  // In this case, the 1D shape values read (sorted lexicographically, rows
+  // run over 1D dofs, columns over quadrature points):
+  // Q2 --> [ 0.687  0 -0.087 ]
+  //        [ 0.4    1  0.4   ]
+  //        [-0.087  0  0.687 ]
+  // Q3 --> [ 0.66   0.003  0.002  0.049 ]
+  //        [ 0.521  1.005 -0.01  -0.230 ]
+  //        [-0.230 -0.01   1.005  0.521 ]
+  //        [ 0.049  0.002  0.003  0.66  ]
+  // Q4 --> [ 0.658  0.022  0 -0.007 -0.032 ]
+  //        [ 0.608  1.059  0  0.039  0.176 ]
+  //        [-0.409 -0.113  1 -0.113 -0.409 ]
+  //        [ 0.176  0.039  0  1.059  0.608 ]
+  //        [-0.032 -0.007  0  0.022  0.658 ]
+  //
+  // In these matrices, we want to use avoid computations involving zeros and
+  // ones and in addition use the symmetry in entries to reduce the number of
+  // read operations.
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  template <int direction, bool dof_to_quad, bool add>
+  inline
+  void
+  EvaluatorTensorProduct<evaluate_symmetric,dim,fe_degree,n_q_points_1d,Number>
+  ::values (const Number in [],
+            Number       out []) const
+  {
+    AssertIndexRange (direction, dim);
+    const int mm     = dof_to_quad ? (fe_degree+1) : n_q_points_1d,
+              nn     = dof_to_quad ? n_q_points_1d : (fe_degree+1);
+    const int n_cols = nn / 2;
+    const int mid    = mm / 2;
+
+    const int n_blocks1 = (dim > 1 ? (direction > 0 ? nn : mm) : 1);
+    const int n_blocks2 = (dim > 2 ? (direction > 1 ? nn : mm) : 1);
+    const int stride    = Utilities::fixed_int_power<nn,direction>::value;
+
+    for (int i2=0; i2<n_blocks2; ++i2)
+      {
+        for (int i1=0; i1<n_blocks1; ++i1)
+          {
+            for (int col=0; col<n_cols; ++col)
+              {
+                Number val0, val1, in0, in1, res0, res1;
+                if (dof_to_quad == true)
+                  {
+                    val0 = shape_values[col];
+                    val1 = shape_values[nn-1-col];
+                  }
+                else
+                  {
+                    val0 = shape_values[col*n_q_points_1d];
+                    val1 = shape_values[(col+1)*n_q_points_1d-1];
+                  }
+                if (mid > 0)
+                  {
+                    in0 = in[0];
+                    in1 = in[stride*(mm-1)];
+                    res0 = val0 * in0;
+                    res1 = val1 * in0;
+                    res0 += val1 * in1;
+                    res1 += val0 * in1;
+                    for (int ind=1; ind<mid; ++ind)
+                      {
+                        if (dof_to_quad == true)
+                          {
+                            val0 = shape_values[ind*n_q_points_1d+col];
+                            val1 = shape_values[ind*n_q_points_1d+nn-1-col];
+                          }
+                        else
+                          {
+                            val0 = shape_values[col*n_q_points_1d+ind];
+                            val1 = shape_values[(col+1)*n_q_points_1d-1-ind];
+                          }
+                        in0 = in[stride*ind];
+                        in1 = in[stride*(mm-1-ind)];
+                        res0 += val0 * in0;
+                        res1 += val1 * in0;
+                        res0 += val1 * in1;
+                        res1 += val0 * in1;
+                      }
+                  }
+                else
+                  res0 = res1 = Number();
+                if (dof_to_quad == true)
+                  {
+                    if (mm % 2 == 1)
+                      {
+                        val0 = shape_values[mid*n_q_points_1d+col];
+                        val1 = val0 * in[stride*mid];
+                        res0 += val1;
+                        res1 += val1;
+                      }
+                  }
+                else
+                  {
+                    if (mm % 2 == 1 && nn % 2 == 0)
+                      {
+                        val0 = shape_values[col*n_q_points_1d+mid];
+                        val1 = val0 * in[stride*mid];
+                        res0 += val1;
+                        res1 += val1;
+                      }
+                  }
+                if (add == false)
+                  {
+                    out[stride*col]         = res0;
+                    out[stride*(nn-1-col)]  = res1;
+                  }
+                else
+                  {
+                    out[stride*col]        += res0;
+                    out[stride*(nn-1-col)] += res1;
+                  }
+              }
+            if ( dof_to_quad == true && nn%2==1 && mm%2==1 )
+              {
+                if (add==false)
+                  out[stride*n_cols]  = in[stride*mid];
+                else
+                  out[stride*n_cols] += in[stride*mid];
+              }
+            else if (dof_to_quad == true && nn%2==1)
+              {
+                Number res0;
+                Number val0  = shape_values[n_cols];
+                if (mid > 0)
+                  {
+                    res0  = in[0] + in[stride*(mm-1)];
+                    res0 *= val0;
+                    for (int ind=1; ind<mid; ++ind)
+                      {
+                        val0  = shape_values[ind*n_q_points_1d+n_cols];
+                        Number val1  = in[stride*ind] + in[stride*(mm-1-ind)];
+                        val1 *= val0;
+                        res0 += val1;
+                      }
+                  }
+                else
+                  res0 = Number();
+                if (add == false)
+                  out[stride*n_cols]  = res0;
+                else
+                  out[stride*n_cols] += res0;
+              }
+            else if (dof_to_quad == false && nn%2 == 1)
+              {
+                Number res0;
+                if (mid > 0)
+                  {
+                    Number val0 = shape_values[n_cols*n_q_points_1d];
+                    res0 = in[0] + in[stride*(mm-1)];
+                    res0 *= val0;
+                    for (int ind=1; ind<mid; ++ind)
+                      {
+                        val0  = shape_values[n_cols*n_q_points_1d+ind];
+                        Number val1 = in[stride*ind] + in[stride*(mm-1-ind)];
+                        val1 *= val0;
+                        res0 += val1;
+                      }
+                    if (mm % 2)
+                      res0 += in[stride*mid];
+                  }
+                else
+                  res0 = in[0];
+                if (add == false)
+                  out[stride*n_cols]  = res0;
+                else
+                  out[stride*n_cols] += res0;
+              }
+
+            // increment: in regular case, just go to the next point in
+            // x-direction. If we are at the end of one chunk in x-dir, need to
+            // jump over to the next layer in z-direction
+            switch (direction)
+              {
+              case 0:
+                in += mm;
+                out += nn;
+                break;
+              case 1:
+              case 2:
+                ++in;
+                ++out;
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+        if (direction == 1)
+          {
+            in += nn*(mm-1);
+            out += nn*(nn-1);
+          }
+      }
+  }
+
+
+
+  // For the specialized loop used for the gradient computation in
+  // here, the 1D shape values read (sorted lexicographically, rows
+  // run over 1D dofs, columns over quadrature points):
+  // Q2 --> [-2.549 -1  0.549 ]
+  //        [ 3.098  0 -3.098 ]
+  //        [-0.549  1  2.549 ]
+  // Q3 --> [-4.315 -1.03  0.5  -0.44  ]
+  //        [ 6.07  -1.44 -2.97  2.196 ]
+  //        [-2.196  2.97  1.44 -6.07  ]
+  //        [ 0.44  -0.5   1.03  4.315 ]
+  // Q4 --> [-6.316 -1.3    0.333 -0.353  0.413 ]
+  //        [10.111 -2.76  -2.667  2.066 -2.306 ]
+  //        [-5.688  5.773  0     -5.773  5.688 ]
+  //        [ 2.306 -2.066  2.667  2.76 -10.111 ]
+  //        [-0.413  0.353 -0.333 -0.353  0.413 ]
+  //
+  // In these matrices, we want to use avoid computations involving
+  // zeros and ones and in addition use the symmetry in entries to
+  // reduce the number of read operations.
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  template <int direction, bool dof_to_quad, bool add>
+  inline
+  void
+  EvaluatorTensorProduct<evaluate_symmetric,dim,fe_degree,n_q_points_1d,Number>
+  ::gradients (const Number in [],
+               Number       out []) const
+  {
+    AssertIndexRange (direction, dim);
+    const int mm     = dof_to_quad ? (fe_degree+1) : n_q_points_1d,
+              nn     = dof_to_quad ? n_q_points_1d : (fe_degree+1);
+    const int n_cols = nn / 2;
+    const int mid    = mm / 2;
+
+    const int n_blocks1 = (dim > 1 ? (direction > 0 ? nn : mm) : 1);
+    const int n_blocks2 = (dim > 2 ? (direction > 1 ? nn : mm) : 1);
+    const int stride    = Utilities::fixed_int_power<nn,direction>::value;
+
+    for (int i2=0; i2<n_blocks2; ++i2)
+      {
+        for (int i1=0; i1<n_blocks1; ++i1)
+          {
+            for (int col=0; col<n_cols; ++col)
+              {
+                Number val0, val1, in0, in1, res0, res1;
+                if (dof_to_quad == true)
+                  {
+                    val0 = shape_gradients[col];
+                    val1 = shape_gradients[nn-1-col];
+                  }
+                else
+                  {
+                    val0 = shape_gradients[col*n_q_points_1d];
+                    val1 = shape_gradients[(nn-col-1)*n_q_points_1d];
+                  }
+                if (mid > 0)
+                  {
+                    in0 = in[0];
+                    in1 = in[stride*(mm-1)];
+                    res0 = val0 * in0;
+                    res1 = val1 * in0;
+                    res0 -= val1 * in1;
+                    res1 -= val0 * in1;
+                    for (int ind=1; ind<mid; ++ind)
+                      {
+                        if (dof_to_quad == true)
+                          {
+                            val0 = shape_gradients[ind*n_q_points_1d+col];
+                            val1 = shape_gradients[ind*n_q_points_1d+nn-1-col];
+                          }
+                        else
+                          {
+                            val0 = shape_gradients[col*n_q_points_1d+ind];
+                            val1 = shape_gradients[(nn-col-1)*n_q_points_1d+ind];
+                          }
+                        in0 = in[stride*ind];
+                        in1 = in[stride*(mm-1-ind)];
+                        res0 += val0 * in0;
+                        res1 += val1 * in0;
+                        res0 -= val1 * in1;
+                        res1 -= val0 * in1;
+                      }
+                  }
+                else
+                  res0 = res1 = Number();
+                if (mm % 2 == 1)
+                  {
+                    if (dof_to_quad == true)
+                      val0 = shape_gradients[mid*n_q_points_1d+col];
+                    else
+                      val0 = shape_gradients[col*n_q_points_1d+mid];
+                    val1 = val0 * in[stride*mid];
+                    res0 += val1;
+                    res1 -= val1;
+                  }
+                if (add == false)
+                  {
+                    out[stride*col]         = res0;
+                    out[stride*(nn-1-col)]  = res1;
+                  }
+                else
+                  {
+                    out[stride*col]        += res0;
+                    out[stride*(nn-1-col)] += res1;
+                  }
+              }
+            if ( nn%2 == 1 )
+              {
+                Number val0, res0;
+                if (dof_to_quad == true)
+                  val0 = shape_gradients[n_cols];
+                else
+                  val0 = shape_gradients[n_cols*n_q_points_1d];
+                res0  = in[0] - in[stride*(mm-1)];
+                res0 *= val0;
+                for (int ind=1; ind<mid; ++ind)
+                  {
+                    if (dof_to_quad == true)
+                      val0 = shape_gradients[ind*n_q_points_1d+n_cols];
+                    else
+                      val0 = shape_gradients[n_cols*n_q_points_1d+ind];
+                    Number val1  = in[stride*ind] - in[stride*(mm-1-ind)];
+                    val1 *= val0;
+                    res0 += val1;
+                  }
+                if (add == false)
+                  out[stride*n_cols]  = res0;
+                else
+                  out[stride*n_cols] += res0;
+              }
+
+            // increment: in regular case, just go to the next point in
+            // x-direction. for y-part in 3D and if we are at the end of one
+            // chunk in x-dir, need to jump over to the next layer in
+            // z-direction
+            switch (direction)
+              {
+              case 0:
+                in += mm;
+                out += nn;
+                break;
+              case 1:
+              case 2:
+                ++in;
+                ++out;
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+
+        if (direction == 1)
+          {
+            in  += nn * (mm-1);
+            out += nn * (nn-1);
+          }
+      }
+  }
+
+
+
+  // evaluates the given shape data in 1d-3d using the tensor product
+  // form assuming the symmetries of unit cell shape hessians for
+  // finite elements in FEEvaluation
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  template <int direction, bool dof_to_quad, bool add>
+  inline
+  void
+  EvaluatorTensorProduct<evaluate_symmetric,dim,fe_degree,n_q_points_1d,Number>
+  ::hessians (const Number in [],
+              Number       out []) const
+  {
+    AssertIndexRange (direction, dim);
+    const int mm     = dof_to_quad ? (fe_degree+1) : n_q_points_1d,
+              nn     = dof_to_quad ? n_q_points_1d : (fe_degree+1);
+    const int n_cols = nn / 2;
+    const int mid    = mm / 2;
+
+    const int n_blocks1 = (dim > 1 ? (direction > 0 ? nn : mm) : 1);
+    const int n_blocks2 = (dim > 2 ? (direction > 1 ? nn : mm) : 1);
+    const int stride    = Utilities::fixed_int_power<nn,direction>::value;
+
+    for (int i2=0; i2<n_blocks2; ++i2)
+      {
+        for (int i1=0; i1<n_blocks1; ++i1)
+          {
+            for (int col=0; col<n_cols; ++col)
+              {
+                Number val0, val1, in0, in1, res0, res1;
+                if (dof_to_quad == true)
+                  {
+                    val0 = shape_hessians[col];
+                    val1 = shape_hessians[nn-1-col];
+                  }
+                else
+                  {
+                    val0 = shape_hessians[col*n_q_points_1d];
+                    val1 = shape_hessians[(col+1)*n_q_points_1d-1];
+                  }
+                if (mid > 0)
+                  {
+                    in0 = in[0];
+                    in1 = in[stride*(mm-1)];
+                    res0 = val0 * in0;
+                    res1 = val1 * in0;
+                    res0 += val1 * in1;
+                    res1 += val0 * in1;
+                    for (int ind=1; ind<mid; ++ind)
+                      {
+                        if (dof_to_quad == true)
+                          {
+                            val0 = shape_hessians[ind*n_q_points_1d+col];
+                            val1 = shape_hessians[ind*n_q_points_1d+nn-1-col];
+                          }
+                        else
+                          {
+                            val0 = shape_hessians[col*n_q_points_1d+ind];
+                            val1 = shape_hessians[(col+1)*n_q_points_1d-1-ind];
+                          }
+                        in0 = in[stride*ind];
+                        in1 = in[stride*(mm-1-ind)];
+                        res0 += val0 * in0;
+                        res1 += val1 * in0;
+                        res0 += val1 * in1;
+                        res1 += val0 * in1;
+                      }
+                  }
+                else
+                  res0 = res1 = Number();
+                if (mm % 2 == 1)
+                  {
+                    if (dof_to_quad == true)
+                      val0 = shape_hessians[mid*n_q_points_1d+col];
+                    else
+                      val0 = shape_hessians[col*n_q_points_1d+mid];
+                    val1 = val0 * in[stride*mid];
+                    res0 += val1;
+                    res1 += val1;
+                  }
+                if (add == false)
+                  {
+                    out[stride*col]         = res0;
+                    out[stride*(nn-1-col)]  = res1;
+                  }
+                else
+                  {
+                    out[stride*col]        += res0;
+                    out[stride*(nn-1-col)] += res1;
+                  }
+              }
+            if ( nn%2 == 1 )
+              {
+                Number val0, res0;
+                if (dof_to_quad == true)
+                  val0 = shape_hessians[n_cols];
+                else
+                  val0 = shape_hessians[n_cols*n_q_points_1d];
+                if (mid > 0)
+                  {
+                    res0  = in[0] + in[stride*(mm-1)];
+                    res0 *= val0;
+                    for (int ind=1; ind<mid; ++ind)
+                      {
+                        if (dof_to_quad == true)
+                          val0 = shape_hessians[ind*n_q_points_1d+n_cols];
+                        else
+                          val0 = shape_hessians[n_cols*n_q_points_1d+ind];
+                        Number val1  = in[stride*ind] + in[stride*(mm-1-ind)];
+                        val1 *= val0;
+                        res0 += val1;
+                      }
+                  }
+                else
+                  res0 = Number();
+                if (mm % 2 == 1)
+                  {
+                    if (dof_to_quad == true)
+                      val0 = shape_hessians[mid*n_q_points_1d+n_cols];
+                    else
+                      val0 = shape_hessians[n_cols*n_q_points_1d+mid];
+                    res0 += val0 * in[stride*mid];
+                  }
+                if (add == false)
+                  out[stride*n_cols]  = res0;
+                else
+                  out[stride*n_cols] += res0;
+              }
+
+            // increment: in regular case, just go to the next point in
+            // x-direction. If we are at the end of one chunk in x-dir, need to
+            // jump over to the next layer in z-direction
+            switch (direction)
+              {
+              case 0:
+                in += mm;
+                out += nn;
+                break;
+              case 1:
+              case 2:
+                ++in;
+                ++out;
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+        if (direction == 1)
+          {
+            in += nn*(mm-1);
+            out += nn*(nn-1);
+          }
+      }
+  }
+
+
+
+  // This class implements a different approach to the symmetric case for
+  // values, gradients, and Hessians also treated with the above functions: It
+  // is possible to reduce the cost per dimension from N^2 to N^2/2, where N
+  // is the number of 1D dofs (there are only N^2/2 different entries in the
+  // shape matrix, so this is plausible). The approach is based on the idea of
+  // applying the operator on the even and odd part of the input vectors
+  // separately, given that the shape functions evaluated on quadrature points
+  // are symmetric. This method is presented e.g. in the book "Implementing
+  // Spectral Methods for Partial Differential Equations" by David A. Kopriva,
+  // Springer, 2009, section 3.5.3 (Even-Odd-Decomposition). Even though the
+  // experiments in the book say that the method is not efficient for N<20, it
+  // is more efficient in the context where the loop bounds are compile-time
+  // constants (templates).
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  struct EvaluatorTensorProduct<evaluate_evenodd,dim,fe_degree,n_q_points_1d,Number>
+  {
+    static const unsigned int dofs_per_cell = Utilities::fixed_int_power<fe_degree+1,dim>::value;
+    static const unsigned int n_q_points = Utilities::fixed_int_power<n_q_points_1d,dim>::value;
+
+    /**
+     * Empty constructor. Does nothing. Be careful when using 'values' and
+     * related methods because they need to be filled with the other pointer
+     */
+    EvaluatorTensorProduct ()
+      :
+      shape_values (0),
+      shape_gradients (0),
+      shape_hessians (0)
+    {}
+
+    /**
+     * Constructor, taking the data from ShapeInfo (using the even-odd
+     * variants stored there)
+     */
+    EvaluatorTensorProduct (const AlignedVector<Number> &shape_values,
+                            const AlignedVector<Number> &shape_gradients,
+                            const AlignedVector<Number> &shape_hessians)
+      :
+      shape_values (shape_values.begin()),
+      shape_gradients (shape_gradients.begin()),
+      shape_hessians (shape_hessians.begin())
+    {}
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    values (const Number in [],
+            Number       out[]) const
+    {
+      apply<direction,dof_to_quad,add,0>(shape_values, in, out);
+    }
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    gradients (const Number in [],
+               Number       out[]) const
+    {
+      apply<direction,dof_to_quad,add,1>(shape_gradients, in, out);
+    }
+
+    template <int direction, bool dof_to_quad, bool add>
+    void
+    hessians (const Number in [],
+              Number       out[]) const
+    {
+      apply<direction,dof_to_quad,add,2>(shape_hessians, in, out);
+    }
+
+    template <int direction, bool dof_to_quad, bool add, int type>
+    static void apply (const Number *shape_data,
+                       const Number  in [],
+                       Number        out []);
+
+    const Number *shape_values;
+    const Number *shape_gradients;
+    const Number *shape_hessians;
+  };
+
+
+
+  template <int dim, int fe_degree, int n_q_points_1d, typename Number>
+  template <int direction, bool dof_to_quad, bool add, int type>
+  inline
+  void
+  EvaluatorTensorProduct<evaluate_evenodd,dim,fe_degree,n_q_points_1d,Number>
+  ::apply (const Number *shapes,
+           const Number  in [],
+           Number        out [])
+  {
+    AssertIndexRange (type, 3);
+    AssertIndexRange (direction, dim);
+    const int mm     = dof_to_quad ? (fe_degree+1) : n_q_points_1d,
+              nn     = dof_to_quad ? n_q_points_1d : (fe_degree+1);
+    const int n_cols = nn / 2;
+    const int mid    = mm / 2;
+
+    const int n_blocks1 = (dim > 1 ? (direction > 0 ? nn : mm) : 1);
+    const int n_blocks2 = (dim > 2 ? (direction > 1 ? nn : mm) : 1);
+    const int stride    = Utilities::fixed_int_power<nn,direction>::value;
+
+    const int offset = (n_q_points_1d+1)/2;
+
+    // this code may look very inefficient at first sight due to the many
+    // different cases with if's at the innermost loop part, but all of the
+    // conditionals can be evaluated at compile time because they are
+    // templates, so the compiler should optimize everything away
+    for (int i2=0; i2<n_blocks2; ++i2)
+      {
+        for (int i1=0; i1<n_blocks1; ++i1)
+          {
+            Number xp[mid>0?mid:1], xm[mid>0?mid:1];
+            for (int i=0; i<mid; ++i)
+              {
+                if (dof_to_quad == true && type == 1)
+                  {
+                    xp[i] = in[stride*i] - in[stride*(mm-1-i)];
+                    xm[i] = in[stride*i] + in[stride*(mm-1-i)];
+                  }
+                else
+                  {
+                    xp[i] = in[stride*i] + in[stride*(mm-1-i)];
+                    xm[i] = in[stride*i] - in[stride*(mm-1-i)];
+                  }
+              }
+            for (int col=0; col<n_cols; ++col)
+              {
+                Number r0, r1;
+                if (mid > 0)
+                  {
+                    if (dof_to_quad == true)
+                      {
+                        r0 = shapes[col]                    * xp[0];
+                        r1 = shapes[fe_degree*offset + col] * xm[0];
+                      }
+                    else
+                      {
+                        r0 = shapes[col*offset]             * xp[0];
+                        r1 = shapes[(fe_degree-col)*offset] * xm[0];
+                      }
+                    for (int ind=1; ind<mid; ++ind)
+                      {
+                        if (dof_to_quad == true)
+                          {
+                            r0 += shapes[ind*offset+col]             * xp[ind];
+                            r1 += shapes[(fe_degree-ind)*offset+col] * xm[ind];
+                          }
+                        else
+                          {
+                            r0 += shapes[col*offset+ind]             * xp[ind];
+                            r1 += shapes[(fe_degree-col)*offset+ind] * xm[ind];
+                          }
+                      }
+                  }
+                else
+                  r0 = r1 = Number();
+                if (mm % 2 == 1 && dof_to_quad == true)
+                  {
+                    if (type == 1)
+                      r1 += shapes[mid*offset+col] * in[stride*mid];
+                    else
+                      r0 += shapes[mid*offset+col] * in[stride*mid];
+                  }
+                else if (mm % 2 == 1 && (nn % 2 == 0 || type > 0))
+                  r0 += shapes[col*offset+mid] * in[stride*mid];
+
+                if (add == false)
+                  {
+                    out[stride*col]         = r0 + r1;
+                    if (type == 1 && dof_to_quad == false)
+                      out[stride*(nn-1-col)]  = r1 - r0;
+                    else
+                      out[stride*(nn-1-col)]  = r0 - r1;
+                  }
+                else
+                  {
+                    out[stride*col]        += r0 + r1;
+                    if (type == 1 && dof_to_quad == false)
+                      out[stride*(nn-1-col)] += r1 - r0;
+                    else
+                      out[stride*(nn-1-col)] += r0 - r1;
+                  }
+              }
+            if ( type == 0 && dof_to_quad == true && nn%2==1 && mm%2==1 )
+              {
+                if (add==false)
+                  out[stride*n_cols]  = in[stride*mid];
+                else
+                  out[stride*n_cols] += in[stride*mid];
+              }
+            else if (dof_to_quad == true && nn%2==1)
+              {
+                Number r0;
+                if (mid > 0)
+                  {
+                    r0  = shapes[n_cols] * xp[0];
+                    for (int ind=1; ind<mid; ++ind)
+                      r0 += shapes[ind*offset+n_cols] * xp[ind];
+                  }
+                else
+                  r0 = Number();
+                if (type != 1 && mm % 2 == 1)
+                  r0 += shapes[mid*offset+n_cols] * in[stride*mid];
+
+                if (add == false)
+                  out[stride*n_cols]  = r0;
+                else
+                  out[stride*n_cols] += r0;
+              }
+            else if (dof_to_quad == false && nn%2 == 1)
+              {
+                Number r0;
+                if (mid > 0)
+                  {
+                    if (type == 1)
+                      {
+                        r0 = shapes[n_cols*offset] * xm[0];
+                        for (int ind=1; ind<mid; ++ind)
+                          r0 += shapes[n_cols*offset+ind] * xm[ind];
+                      }
+                    else
+                      {
+                        r0 = shapes[n_cols*offset] * xp[0];
+                        for (int ind=1; ind<mid; ++ind)
+                          r0 += shapes[n_cols*offset+ind] * xp[ind];
+                      }
+                  }
+                else
+                  r0 = Number();
+
+                if (type == 0 && mm % 2 == 1)
+                  r0 += in[stride*mid];
+                else if (type == 2 && mm % 2 == 1)
+                  r0 += shapes[n_cols*offset+mid] * in[stride*mid];
+
+                if (add == false)
+                  out[stride*n_cols]  = r0;
+                else
+                  out[stride*n_cols] += r0;
+              }
+
+            // increment: in regular case, just go to the next point in
+            // x-direction. If we are at the end of one chunk in x-dir, need to
+            // jump over to the next layer in z-direction
+            switch (direction)
+              {
+              case 0:
+                in += mm;
+                out += nn;
+                break;
+              case 1:
+              case 2:
+                ++in;
+                ++out;
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+        if (direction == 1)
+          {
+            in += nn*(mm-1);
+            out += nn*(nn-1);
+          }
+      }
+  }
+
+
+
+  // Select evaluator type from element shape function type
+  template <MatrixFreeFunctions::ElementType element, bool is_long>
+  struct EvaluatorSelector {};
+
+  template <bool is_long>
+  struct EvaluatorSelector<MatrixFreeFunctions::tensor_general,is_long>
+  {
+    static const EvaluatorVariant variant = evaluate_general;
+  };
+
+  template <>
+  struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric,false>
+  {
+    static const EvaluatorVariant variant = evaluate_symmetric;
+  };
+
+  template <> struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric,true>
+  {
+    static const EvaluatorVariant variant = evaluate_evenodd;
+  };
+
+  template <bool is_long>
+  struct EvaluatorSelector<MatrixFreeFunctions::truncated_tensor,is_long>
+  {
+    static const EvaluatorVariant variant = evaluate_general;
+  };
+
+  template <>
+  struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_plus_dg0,false>
+  {
+    static const EvaluatorVariant variant = evaluate_symmetric;
+  };
+
+  template <>
+  struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_plus_dg0,true>
+  {
+    static const EvaluatorVariant variant = evaluate_evenodd;
+  };
+
+  template <bool is_long>
+  struct EvaluatorSelector<MatrixFreeFunctions::tensor_gausslobatto,is_long>
+  {
+    static const EvaluatorVariant variant = evaluate_evenodd;
+  };
+
+
+
+  // This struct performs the evaluation of function values, gradients and
+  // Hessians for tensor-product finite elements. The operation is used for
+  // both the symmetric and non-symmetric case, which use different apply
+  // functions 'values', 'gradients' in the individual coordinate
+  // directions. The apply functions for values are provided through one of
+  // the template classes EvaluatorTensorProduct which in turn are selected
+  // from the MatrixFreeFunctions::ElementType template argument.
+  //
+  // There is a specialization made for Gauss-Lobatto elements further down
+  // where the 'values' operation is identity, which allows us to write
+  // shorter code.
+  template <MatrixFreeFunctions::ElementType type, int dim, int fe_degree,
+            int n_q_points_1d, int n_components, typename Number>
+  struct FEEvaluationImpl
+  {
+    static
+    void evaluate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+                   VectorizedArray<Number> *values_dofs_actual[],
+                   VectorizedArray<Number> *values_quad[],
+                   VectorizedArray<Number> *gradients_quad[][dim],
+                   VectorizedArray<Number> *hessians_quad[][(dim*(dim+1))/2],
+                   const bool               evaluate_val,
+                   const bool               evaluate_grad,
+                   const bool               evaluate_lapl);
+
+    static
+    void integrate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+                    VectorizedArray<Number> *values_dofs_actual[],
+                    VectorizedArray<Number> *values_quad[],
+                    VectorizedArray<Number> *gradients_quad[][dim],
+                    const bool               evaluate_val,
+                    const bool               evaluate_grad);
+  };
+
+
+  template <MatrixFreeFunctions::ElementType type, int dim, int fe_degree,
+            int n_q_points_1d, int n_components, typename Number>
+  inline
+  void
+  FEEvaluationImpl<type,dim,fe_degree,n_q_points_1d,n_components,Number>
+  ::evaluate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+              VectorizedArray<Number> *values_dofs_actual[],
+              VectorizedArray<Number> *values_quad[],
+              VectorizedArray<Number> *gradients_quad[][dim],
+              VectorizedArray<Number> *hessians_quad[][(dim*(dim+1))/2],
+              const bool               evaluate_val,
+              const bool               evaluate_grad,
+              const bool               evaluate_lapl)
+  {
+    if (evaluate_val == false && evaluate_grad == false && evaluate_lapl == false)
+      return;
+
+    const EvaluatorVariant variant =
+      EvaluatorSelector<type,(fe_degree+n_q_points_1d>4)>::variant;
+    typedef EvaluatorTensorProduct<variant, dim, fe_degree, n_q_points_1d,
+            VectorizedArray<Number> > Eval;
+    Eval eval (variant == evaluate_evenodd ? shape_info.shape_val_evenodd :
+               shape_info.shape_values,
+               variant == evaluate_evenodd ? shape_info.shape_gra_evenodd :
+               shape_info.shape_gradients,
+               variant == evaluate_evenodd ? shape_info.shape_hes_evenodd :
+               shape_info.shape_hessians);
+
+    const unsigned int temp_size = Eval::dofs_per_cell > Eval::n_q_points ?
+                                   Eval::dofs_per_cell : Eval::n_q_points;
+
+    VectorizedArray<Number> **values_dofs = values_dofs_actual;
+    VectorizedArray<Number> data_array[type!=MatrixFreeFunctions::truncated_tensor ? 1 :
+                                       n_components*Eval::dofs_per_cell];
+    VectorizedArray<Number> *expanded_dof_values[n_components];
+    if (type == MatrixFreeFunctions::truncated_tensor)
+      {
+        for (unsigned int c=0; c<n_components; ++c)
+          expanded_dof_values[c] = &data_array[c*Eval::dofs_per_cell];
+        values_dofs = expanded_dof_values;
+
+        unsigned int count_p = 0, count_q = 0;
+        for (unsigned int i=0; i<(dim>2?fe_degree+1:1); ++i)
+          {
+            for (unsigned int j=0; j<(dim>1?fe_degree+1-i:1); ++j)
+              {
+                for (unsigned int k=0; k<fe_degree+1-j-i; ++k, ++count_p, ++count_q)
+                  for (unsigned int c=0; c<n_components; ++c)
+                    expanded_dof_values[c][count_q] = values_dofs_actual[c][count_p];
+                for (unsigned int k=fe_degree+1-j-i; k<fe_degree+1; ++k, ++count_q)
+                  for (unsigned int c=0; c<n_components; ++c)
+                    expanded_dof_values[c][count_q] = VectorizedArray<Number>();
+              }
+            for (unsigned int j=fe_degree+1-i; j<fe_degree+1; ++j)
+              for (unsigned int k=0; k<fe_degree+1; ++k, ++count_q)
+                for (unsigned int c=0; c<n_components; ++c)
+                  expanded_dof_values[c][count_q] = VectorizedArray<Number>();
+          }
+        AssertDimension(count_q, Eval::dofs_per_cell);
+      }
+
+    // These avoid compiler errors; they are only used in sensible context but
+    // compilers typically cannot detect when we access something like
+    // gradients_quad[2] only for dim==3.
+    const unsigned int d1 = dim>1?1:0;
+    const unsigned int d2 = dim>2?2:0;
+    const unsigned int d3 = dim>2?3:0;
+    const unsigned int d4 = dim>2?4:0;
+    const unsigned int d5 = dim>2?5:0;
+
+    switch (dim)
+      {
+      case 1:
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            if (evaluate_val == true)
+              eval.template values<0,true,false> (values_dofs[c], values_quad[c]);
+            if (evaluate_grad == true)
+              eval.template gradients<0,true,false>(values_dofs[c], gradients_quad[c][0]);
+            if (evaluate_lapl == true)
+              eval.template hessians<0,true,false> (values_dofs[c], hessians_quad[c][0]);
+          }
+        break;
+
+      case 2:
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            VectorizedArray<Number> temp1[temp_size];
+
+            // grad x
+            if (evaluate_grad == true)
+              {
+                eval.template gradients<0,true,false> (values_dofs[c], temp1);
+                eval.template values<1,true,false> (temp1, gradients_quad[c][0]);
+              }
+            if (evaluate_lapl == true)
+              {
+                // grad xy
+                if (evaluate_grad == false)
+                  eval.template gradients<0,true,false>(values_dofs[c], temp1);
+                eval.template gradients<1,true,false>  (temp1, hessians_quad[c][d1+d1]);
+
+                // grad xx
+                eval.template hessians<0,true,false>(values_dofs[c], temp1);
+                eval.template values<1,true,false>  (temp1, hessians_quad[c][0]);
+              }
+
+            // grad y
+            eval.template values<0,true,false> (values_dofs[c], temp1);
+            if (evaluate_grad == true)
+              eval.template gradients<1,true,false> (temp1, gradients_quad[c][d1]);
+
+            // grad yy
+            if (evaluate_lapl == true)
+              eval.template hessians<1,true,false> (temp1, hessians_quad[c][d1]);
+
+            // val: can use values applied in x
+            if (evaluate_val == true)
+              eval.template values<1,true,false> (temp1, values_quad[c]);
+          }
+        break;
+
+      case 3:
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            VectorizedArray<Number> temp1[temp_size];
+            VectorizedArray<Number> temp2[temp_size];
+
+            if (evaluate_grad == true)
+              {
+                // grad x
+                eval.template gradients<0,true,false> (values_dofs[c], temp1);
+                eval.template values<1,true,false> (temp1, temp2);
+                eval.template values<2,true,false> (temp2, gradients_quad[c][0]);
+              }
+
+            if (evaluate_lapl == true)
+              {
+                // grad xz
+                if (evaluate_grad == false)
+                  {
+                    eval.template gradients<0,true,false> (values_dofs[c], temp1);
+                    eval.template values<1,true,false> (temp1, temp2);
+                  }
+                eval.template gradients<2,true,false> (temp2, hessians_quad[c][d4]);
+
+                // grad xy
+                eval.template gradients<1,true,false> (temp1, temp2);
+                eval.template values<2,true,false> (temp2, hessians_quad[c][d3]);
+
+                // grad xx
+                eval.template hessians<0,true,false>(values_dofs[c], temp1);
+                eval.template values<1,true,false>  (temp1, temp2);
+                eval.template values<2,true,false>  (temp2, hessians_quad[c][0]);
+              }
+
+            // grad y
+            eval.template values<0,true,false> (values_dofs[c], temp1);
+            if (evaluate_grad == true)
+              {
+                eval.template gradients<1,true,false>(temp1, temp2);
+                eval.template values<2,true,false>   (temp2, gradients_quad[c][d1]);
+              }
+
+            if (evaluate_lapl == true)
+              {
+                // grad yz
+                if (evaluate_grad == false)
+                  eval.template gradients<1,true,false>(temp1, temp2);
+                eval.template gradients<2,true,false>  (temp2, hessians_quad[c][d5]);
+
+                // grad yy
+                eval.template hessians<1,true,false> (temp1, temp2);
+                eval.template values<2,true,false> (temp2, hessians_quad[c][d1]);
+              }
+
+            // grad z: can use the values applied in x direction stored in temp1
+            eval.template values<1,true,false> (temp1, temp2);
+            if (evaluate_grad == true)
+              eval.template gradients<2,true,false> (temp2, gradients_quad[c][d2]);
+
+            // grad zz: can use the values applied in x and y direction stored
+            // in temp2
+            if (evaluate_lapl == true)
+              eval.template hessians<2,true,false>(temp2, hessians_quad[c][d2]);
+
+            // val: can use the values applied in x & y direction stored in temp2
+            if (evaluate_val == true)
+              eval.template values<2,true,false> (temp2, values_quad[c]);
+          }
+        break;
+
+      default:
+        AssertThrow(false, ExcNotImplemented());
+      }
+
+    // case additional dof for FE_Q_DG0: add values; gradients and second
+    // derivatives evaluate to zero
+    if (type == MatrixFreeFunctions::tensor_symmetric_plus_dg0 && evaluate_val)
+      for (unsigned int c=0; c<n_components; ++c)
+        for (unsigned int q=0; q<Eval::n_q_points; ++q)
+          values_quad[c][q] += values_dofs[c][Eval::dofs_per_cell];
+  }
+
+
+
+  template <MatrixFreeFunctions::ElementType type, int dim, int fe_degree,
+            int n_q_points_1d, int n_components, typename Number>
+  inline
+  void
+  FEEvaluationImpl<type,dim,fe_degree,n_q_points_1d,n_components,Number>
+  ::integrate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+               VectorizedArray<Number> *values_dofs_actual[],
+               VectorizedArray<Number> *values_quad[],
+               VectorizedArray<Number> *gradients_quad[][dim],
+               const bool               integrate_val,
+               const bool               integrate_grad)
+  {
+    const EvaluatorVariant variant =
+      EvaluatorSelector<type,(fe_degree+n_q_points_1d>4)>::variant;
+    typedef EvaluatorTensorProduct<variant, dim, fe_degree, n_q_points_1d,
+            VectorizedArray<Number> > Eval;
+    Eval eval (variant == evaluate_evenodd ? shape_info.shape_val_evenodd :
+               shape_info.shape_values,
+               variant == evaluate_evenodd ? shape_info.shape_gra_evenodd :
+               shape_info.shape_gradients,
+               variant == evaluate_evenodd ? shape_info.shape_hes_evenodd :
+               shape_info.shape_hessians);
+
+    const unsigned int temp_size = Eval::dofs_per_cell > Eval::n_q_points ?
+                                   Eval::dofs_per_cell : Eval::n_q_points;
+    VectorizedArray<Number> temp1[temp_size];
+    VectorizedArray<Number> temp2[temp_size];
+
+    // expand dof_values to tensor product for truncated tensor products
+    VectorizedArray<Number> **values_dofs = values_dofs_actual;
+    VectorizedArray<Number> data_array[type!=MatrixFreeFunctions::truncated_tensor ? 1 :
+                                       n_components*Eval::dofs_per_cell];
+    VectorizedArray<Number> *expanded_dof_values[n_components];
+    if (type == MatrixFreeFunctions::truncated_tensor)
+      {
+        for (unsigned int c=0; c<n_components; ++c)
+          expanded_dof_values[c] = &data_array[c*Eval::dofs_per_cell];
+        values_dofs = expanded_dof_values;
+      }
+
+    // These avoid compiler errors; they are only used in sensible context but
+    // compilers typically cannot detect when we access something like
+    // gradients_quad[2] only for dim==3.
+    const unsigned int d1 = dim>1?1:0;
+    const unsigned int d2 = dim>2?2:0;
+
+    switch (dim)
+      {
+      case 1:
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            if (integrate_val == true)
+              eval.template values<0,false,false> (values_quad[c], values_dofs[c]);
+            if (integrate_grad == true)
+              {
+                if (integrate_val == true)
+                  eval.template gradients<0,false,true> (gradients_quad[c][0], values_dofs[c]);
+                else
+                  eval.template gradients<0,false,false> (gradients_quad[c][0], values_dofs[c]);
+              }
+          }
+        break;
+
+      case 2:
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            if (integrate_val == true)
+              {
+                // val
+                eval.template values<0,false,false> (values_quad[c], temp1);
+                //grad x
+                if (integrate_grad == true)
+                  eval.template gradients<0,false,true> (gradients_quad[c][0], temp1);
+                eval.template values<1,false,false>(temp1, values_dofs[c]);
+              }
+            if (integrate_grad == true)
+              {
+                // grad y
+                eval.template values<0,false,false>  (gradients_quad[c][d1], temp1);
+                if (integrate_val == false)
+                  {
+                    eval.template gradients<1,false,false>(temp1, values_dofs[c]);
+                    //grad x
+                    eval.template gradients<0,false,false> (gradients_quad[c][0], temp1);
+                    eval.template values<1,false,true> (temp1, values_dofs[c]);
+                  }
+                else
+                  eval.template gradients<1,false,true>(temp1, values_dofs[c]);
+              }
+          }
+        break;
+
+      case 3:
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            if (integrate_val == true)
+              {
+                // val
+                eval.template values<0,false,false> (values_quad[c], temp1);
+                //grad x: can sum to temporary value in temp1
+                if (integrate_grad == true)
+                  eval.template gradients<0,false,true> (gradients_quad[c][0], temp1);
+                eval.template values<1,false,false>(temp1, temp2);
+                if (integrate_grad == true)
+                  {
+                    eval.template values<0,false,false> (gradients_quad[c][d1], temp1);
+                    eval.template gradients<1,false,true>(temp1, temp2);
+                  }
+                eval.template values<2,false,false> (temp2, values_dofs[c]);
+              }
+            else if (integrate_grad == true)
+              {
+                eval.template gradients<0,false,false>(gradients_quad[c][0], temp1);
+                eval.template values<1,false,false> (temp1, temp2);
+                eval.template values<0,false,false> (gradients_quad[c][d1], temp1);
+                eval.template gradients<1,false,true>(temp1, temp2);
+                eval.template values<2,false,false> (temp2, values_dofs[c]);
+              }
+            if (integrate_grad == true)
+              {
+                // grad z: can sum to temporary x and y value in output
+                eval.template values<0,false,false> (gradients_quad[c][d2], temp1);
+                eval.template values<1,false,false> (temp1, temp2);
+                eval.template gradients<2,false,true> (temp2, values_dofs[c]);
+              }
+          }
+        break;
+
+      default:
+        AssertThrow(false, ExcNotImplemented());
+      }
+
+    // case FE_Q_DG0: add values, gradients and second derivatives are zero
+    if (type == MatrixFreeFunctions::tensor_symmetric_plus_dg0)
+      {
+        if (integrate_val)
+          for (unsigned int c=0; c<n_components; ++c)
+            {
+              values_dofs[c][Eval::dofs_per_cell] = values_quad[c][0];
+              for (unsigned int q=1; q<Eval::n_q_points; ++q)
+                values_dofs[c][Eval::dofs_per_cell] += values_quad[c][q];
+            }
+        else
+          for (unsigned int c=0; c<n_components; ++c)
+            values_dofs[c][Eval::dofs_per_cell] = VectorizedArray<Number>();
+      }
+
+    if (type == MatrixFreeFunctions::truncated_tensor)
+      {
+        unsigned int count_p = 0, count_q = 0;
+        for (unsigned int i=0; i<(dim>2?fe_degree+1:1); ++i)
+          {
+            for (unsigned int j=0; j<(dim>1?fe_degree+1-i:1); ++j)
+              {
+                for (unsigned int k=0; k<fe_degree+1-j-i; ++k, ++count_p, ++count_q)
+                  {
+                    for (unsigned int c=0; c<n_components; ++c)
+                      values_dofs_actual[c][count_p] = expanded_dof_values[c][count_q];
+                  }
+                count_q += j+i;
+              }
+            count_q += i*(fe_degree+1);
+          }
+        AssertDimension(count_q, Eval::dofs_per_cell);
+      }
+  }
+
+  // This a specialization for Gauss-Lobatto elements where the 'values'
+  // operation is identity, which allows us to write shorter code.
+  template <int dim, int fe_degree, int n_q_points_1d, int n_components, typename Number>
+  struct FEEvaluationImpl<MatrixFreeFunctions::tensor_gausslobatto, dim,
+    fe_degree, n_q_points_1d, n_components, Number>
+  {
+    static
+    void evaluate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+                   VectorizedArray<Number> *values_dofs[],
+                   VectorizedArray<Number> *values_quad[],
+                   VectorizedArray<Number> *gradients_quad[][dim],
+                   VectorizedArray<Number> *hessians_quad[][(dim*(dim+1))/2],
+                   const bool               evaluate_val,
+                   const bool               evaluate_grad,
+                   const bool               evaluate_lapl);
+
+    static
+    void integrate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+                    VectorizedArray<Number> *values_dofs[],
+                    VectorizedArray<Number> *values_quad[],
+                    VectorizedArray<Number> *gradients_quad[][dim],
+                    const bool               integrate_val,
+                    const bool               integrate_grad);
+  };
+
+  template <int dim, int fe_degree, int n_q_points_1d, int n_components, typename Number>
+  inline
+  void
+  FEEvaluationImpl<MatrixFreeFunctions::tensor_gausslobatto, dim,
+                   fe_degree, n_q_points_1d, n_components, Number>
+                   ::evaluate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+                               VectorizedArray<Number> *values_dofs[],
+                               VectorizedArray<Number> *values_quad[],
+                               VectorizedArray<Number> *gradients_quad[][dim],
+                               VectorizedArray<Number> *hessians_quad[][(dim*(dim+1))/2],
+                               const bool               evaluate_val,
+                               const bool               evaluate_grad,
+                               const bool               evaluate_lapl)
+  {
+    typedef EvaluatorTensorProduct<evaluate_evenodd, dim, fe_degree, fe_degree+1,
+            VectorizedArray<Number> > Eval;
+    Eval eval (shape_info.shape_val_evenodd, shape_info.shape_gra_evenodd,
+               shape_info.shape_hes_evenodd);
+
+    // These avoid compiler errors; they are only used in sensible context but
+    // compilers typically cannot detect when we access something like
+    // gradients_quad[2] only for dim==3.
+    const unsigned int d1 = dim>1?1:0;
+    const unsigned int d2 = dim>2?2:0;
+    const unsigned int d3 = dim>2?3:0;
+    const unsigned int d4 = dim>2?4:0;
+    const unsigned int d5 = dim>2?5:0;
+
+    switch (dim)
+      {
+      case 1:
+        if (evaluate_val == true)
+          std::memcpy (values_quad[0], values_dofs[0],
+                       eval.dofs_per_cell * n_components *
+                       sizeof (values_dofs[0][0]));
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            if (evaluate_grad == true)
+              eval.template gradients<0,true,false>(values_dofs[c], gradients_quad[c][0]);
+            if (evaluate_lapl == true)
+              eval.template hessians<0,true,false> (values_dofs[c], hessians_quad[c][0]);
+          }
+        break;
+
+      case 2:
+        if (evaluate_val == true)
+          {
+            std::memcpy (values_quad[0], values_dofs[0],
+                         Eval::dofs_per_cell * n_components *
+                         sizeof (values_dofs[0][0]));
+          }
+        if (evaluate_grad == true)
+          for (unsigned int comp=0; comp<n_components; comp++)
+            {
+              // grad x
+              eval.template gradients<0,true,false> (values_dofs[comp],
+                                                     gradients_quad[comp][0]);
+              // grad y
+              eval.template gradients<1,true,false> (values_dofs[comp],
+                                                     gradients_quad[comp][d1]);
+            }
+        if (evaluate_lapl == true)
+          for (unsigned int comp=0; comp<n_components; comp++)
+            {
+              // hess x
+              eval.template hessians<0,true,false> (values_dofs[comp],
+                                                    hessians_quad[comp][0]);
+              // hess y
+              eval.template hessians<1,true,false> (values_dofs[comp],
+                                                    hessians_quad[comp][d1]);
+
+              VectorizedArray<Number> temp1[Eval::dofs_per_cell];
+              // grad x grad y
+              eval.template gradients<0,true,false> (values_dofs[comp], temp1);
+              eval.template gradients<1,true,false> (temp1, hessians_quad[comp][d1+d1]);
+            }
+        break;
+
+      case 3:
+        if (evaluate_val == true)
+          {
+            std::memcpy (values_quad[0], values_dofs[0],
+                         Eval::dofs_per_cell * n_components *
+                         sizeof (values_dofs[0][0]));
+          }
+        if (evaluate_grad == true)
+          for (unsigned int comp=0; comp<n_components; comp++)
+            {
+              // grad x
+              eval.template gradients<0,true,false> (values_dofs[comp],
+                                                     gradients_quad[comp][0]);
+              // grad y
+              eval.template gradients<1,true,false> (values_dofs[comp],
+                                                     gradients_quad[comp][d1]);
+              // grad y
+              eval.template gradients<2,true,false> (values_dofs[comp],
+                                                     gradients_quad[comp][d2]);
+            }
+        if (evaluate_lapl == true)
+          for (unsigned int comp=0; comp<n_components; comp++)
+            {
+              // grad x
+              eval.template hessians<0,true,false> (values_dofs[comp],
+                                                    hessians_quad[comp][0]);
+              // grad y
+              eval.template hessians<1,true,false> (values_dofs[comp],
+                                                    hessians_quad[comp][d1]);
+              // grad y
+              eval.template hessians<2,true,false> (values_dofs[comp],
+                                                    hessians_quad[comp][d2]);
+
+              VectorizedArray<Number> temp1[Eval::dofs_per_cell];
+              // grad xy
+              eval.template gradients<0,true,false> (values_dofs[comp], temp1);
+              eval.template gradients<1,true,false> (temp1, hessians_quad[comp][d3]);
+              // grad xz
+              eval.template gradients<2,true,false> (temp1, hessians_quad[comp][d4]);
+              // grad yz
+              eval.template gradients<1,true,false> (values_dofs[comp], temp1);
+              eval.template gradients<2,true,false> (temp1, hessians_quad[comp][d5]);
+            }
+        break;
+      default:
+        AssertThrow(false, ExcNotImplemented());
+      }
+  }
+
+  template <int dim, int fe_degree, int n_q_points_1d, int n_components, typename Number>
+  inline
+  void
+  FEEvaluationImpl<MatrixFreeFunctions::tensor_gausslobatto, dim,
+                   fe_degree, n_q_points_1d, n_components, Number>
+                   ::integrate (const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
+                                VectorizedArray<Number> *values_dofs[],
+                                VectorizedArray<Number> *values_quad[],
+                                VectorizedArray<Number> *gradients_quad[][dim],
+                                const bool               integrate_val,
+                                const bool               integrate_grad)
+  {
+    typedef EvaluatorTensorProduct<evaluate_evenodd, dim, fe_degree, fe_degree+1,
+            VectorizedArray<Number> > Eval;
+    Eval eval (shape_info.shape_val_evenodd, shape_info.shape_gra_evenodd,
+               shape_info.shape_hes_evenodd);
+
+    // These avoid compiler errors; they are only used in sensible context but
+    // compilers typically cannot detect when we access something like
+    // gradients_quad[2] only for dim==3.
+    const unsigned int d1 = dim>1?1:0;
+    const unsigned int d2 = dim>2?2:0;
+
+    if (integrate_val == true)
+      std::memcpy (values_dofs[0], values_quad[0],
+                   Eval::dofs_per_cell * n_components *
+                   sizeof (values_dofs[0][0]));
+    switch (dim)
+      {
+      case 1:
+        for (unsigned int c=0; c<n_components; c++)
+          {
+            if (integrate_grad == true)
+              {
+                if (integrate_val == true)
+                  eval.template gradients<0,false,true> (gradients_quad[c][0],
+                                                         values_dofs[c]);
+                else
+                  eval.template gradients<0,false,false> (gradients_quad[c][0],
+                                                          values_dofs[c]);
+              }
+          }
+
+        break;
+      case 2:
+        if (integrate_grad == true)
+          for (unsigned int comp=0; comp<n_components; comp++)
+            {
+              // grad x: If integrate_val == true we have to add to the
+              // previous output
+              if (integrate_val == true)
+                eval.template gradients<0, false, true> (gradients_quad[comp][0],
+                                                         values_dofs[comp]);
+              else
+                eval.template gradients<0, false, false> (gradients_quad[comp][0],
+                                                          values_dofs[comp]);
+
+              // grad y
+              eval.template gradients<1, false, true> (gradients_quad[comp][d1],
+                                                       values_dofs[comp]);
+            }
+        break;
+
+      case 3:
+        if (integrate_grad == true)
+          for (unsigned int comp=0; comp<n_components; comp++)
+            {
+              // grad x: If integrate_val == true we have to add to the
+              // previous output
+              if (integrate_val == true)
+                eval.template gradients<0, false, true> (gradients_quad[comp][0],
+                                                         values_dofs[comp]);
+              else
+                eval.template gradients<0, false, false> (gradients_quad[comp][0],
+                                                          values_dofs[comp]);
+
+              // grad y
+              eval.template gradients<1, false, true> (gradients_quad[comp][d1],
+                                                       values_dofs[comp]);
+
+              // grad z
+              eval.template gradients<2, false, true> (gradients_quad[comp][d2],
+                                                       values_dofs[comp]);
+            }
+        break;
+
+      default:
+        AssertThrow(false, ExcNotImplemented());
+      }
+  }
+
+} // end of namespace internal
+
+
+
+/*-------------------------- FEEvaluation -----------------------------------*/
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::FEEvaluation (const MatrixFree<dim,Number> &data_in,
+                const unsigned int fe_no,
+                const unsigned int quad_no)
+  :
+  BaseClass (data_in, fe_no, quad_no, fe_degree, n_q_points),
+  dofs_per_cell (this->data->dofs_per_cell)
+{
+  check_template_arguments(fe_no);
+  set_data_pointers();
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::FEEvaluation (const Mapping<dim>       &mapping,
+                const FiniteElement<dim> &fe,
+                const Quadrature<1>      &quadrature,
+                const UpdateFlags         update_flags,
+                const unsigned int        first_selected_component)
+  :
+  BaseClass (mapping, fe, quadrature, update_flags,
+             first_selected_component,
+             static_cast<FEEvaluationBase<dim,1,Number>*>(0)),
+  dofs_per_cell (this->data->dofs_per_cell)
+{
+  check_template_arguments(numbers::invalid_unsigned_int);
+  set_data_pointers();
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::FEEvaluation (const FiniteElement<dim> &fe,
+                const Quadrature<1>      &quadrature,
+                const UpdateFlags         update_flags,
+                const unsigned int        first_selected_component)
+  :
+  BaseClass (StaticMappingQ1<dim>::mapping, fe, quadrature, update_flags,
+             first_selected_component,
+             static_cast<FEEvaluationBase<dim,1,Number>*>(0)),
+  dofs_per_cell (this->data->dofs_per_cell)
+{
+  check_template_arguments(numbers::invalid_unsigned_int);
+  set_data_pointers();
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+template <int n_components_other>
+inline
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::FEEvaluation (const FiniteElement<dim> &fe,
+                const FEEvaluationBase<dim,n_components_other,Number> &other,
+                const unsigned int        first_selected_component)
+  :
+  BaseClass (other.mapped_geometry->get_fe_values().get_mapping(),
+             fe, other.mapped_geometry->get_quadrature(),
+             other.mapped_geometry->get_fe_values().get_update_flags(),
+             first_selected_component, &other),
+  dofs_per_cell (this->data->dofs_per_cell)
+{
+  check_template_arguments(numbers::invalid_unsigned_int);
+  set_data_pointers();
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::FEEvaluation (const FEEvaluation &other)
+  :
+  BaseClass (other),
+  dofs_per_cell (this->data->dofs_per_cell)
+{
+  set_data_pointers();
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+void
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::set_data_pointers()
+{
+  AssertIndexRange(this->data->dofs_per_cell, tensor_dofs_per_cell+2);
+  const unsigned int desired_dofs_per_cell = this->data->dofs_per_cell;
+
+  // set the pointers to the correct position in the data array
+  for (unsigned int c=0; c<n_components_; ++c)
+    {
+      this->values_dofs[c] = &my_data_array[c*desired_dofs_per_cell];
+      this->values_quad[c] = &my_data_array[n_components*desired_dofs_per_cell+c*n_q_points];
+      for (unsigned int d=0; d<dim; ++d)
+        this->gradients_quad[c][d] = &my_data_array[n_components*(desired_dofs_per_cell+
+                                                                  n_q_points)
+                                                    +
+                                                    (c*dim+d)*n_q_points];
+      for (unsigned int d=0; d<(dim*dim+dim)/2; ++d)
+        this->hessians_quad[c][d] = &my_data_array[n_components*((dim+1)*n_q_points+
+                                                                 desired_dofs_per_cell)
+                                                   +
+                                                   (c*(dim*dim+dim)+d)*n_q_points];
+    }
+
+  switch (this->data->element_type)
+    {
+    case internal::MatrixFreeFunctions::tensor_symmetric:
+      evaluate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_symmetric,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::evaluate;
+      integrate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_symmetric,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::integrate;
+      break;
+
+    case internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0:
+      evaluate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::evaluate;
+      integrate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::integrate;
+      break;
+
+    case internal::MatrixFreeFunctions::tensor_general:
+      evaluate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_general,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::evaluate;
+      integrate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_general,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::integrate;
+      break;
+
+    case internal::MatrixFreeFunctions::tensor_gausslobatto:
+      evaluate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_gausslobatto,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::evaluate;
+      integrate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::tensor_gausslobatto,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::integrate;
+      break;
+
+    case internal::MatrixFreeFunctions::truncated_tensor:
+      evaluate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::truncated_tensor,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::evaluate;
+      integrate_funct =
+        internal::FEEvaluationImpl<internal::MatrixFreeFunctions::truncated_tensor,
+        dim, fe_degree, n_q_points_1d, n_components_,
+        Number>::integrate;
+      break;
+
+    default:
+      AssertThrow(false, ExcNotImplemented());
+    }
+
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+void
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::check_template_arguments(const unsigned int fe_no)
+{
+#ifdef DEBUG
+  // print error message when the dimensions do not match. Propose a possible
+  // fix
+  if (fe_degree != this->data->fe_degree
+      ||
+      n_q_points != this->data->n_q_points)
+    {
+      std::string message =
+        "-------------------------------------------------------\n";
+      message += "Illegal arguments in constructor/wrong template arguments!\n";
+      message += "    Called -->   FEEvaluation<dim,";
+      message += Utilities::int_to_string(fe_degree) + ",";
+      message += Utilities::int_to_string(n_q_points_1d);
+      message += "," + Utilities::int_to_string(n_components);
+      message += ",Number>(data";
+      if (fe_no != numbers::invalid_unsigned_int)
+        {
+          message += ", " + Utilities::int_to_string(fe_no) + ", ";
+          message += Utilities::int_to_string(this->quad_no);
+        }
+      message += ")\n";
+
+      // check whether some other vector component has the correct number of
+      // points
+      unsigned int proposed_dof_comp = numbers::invalid_unsigned_int,
+                   proposed_quad_comp = numbers::invalid_unsigned_int;
+      if (fe_no != numbers::invalid_unsigned_int)
+        {
+          if (fe_degree == this->data->fe_degree)
+            proposed_dof_comp = fe_no;
+          else
+            for (unsigned int no=0; no<this->matrix_info->n_components(); ++no)
+              if (this->matrix_info->get_shape_info(no,0,this->active_fe_index,0).fe_degree
+                  == fe_degree)
+                {
+                  proposed_dof_comp = no;
+                  break;
+                }
+          if (n_q_points ==
+              this->mapping_info->mapping_data_gen[this->quad_no].n_q_points[this->active_quad_index])
+            proposed_quad_comp = this->quad_no;
+          else
+            for (unsigned int no=0; no<this->mapping_info->mapping_data_gen.size(); ++no)
+              if (this->mapping_info->mapping_data_gen[no].n_q_points[this->active_quad_index]
+                  == n_q_points)
+                {
+                  proposed_quad_comp = no;
+                  break;
+                }
+        }
+      if (proposed_dof_comp  != numbers::invalid_unsigned_int &&
+          proposed_quad_comp != numbers::invalid_unsigned_int)
+        {
+          if (proposed_dof_comp != fe_no)
+            message += "Wrong vector component selection:\n";
+          else
+            message += "Wrong quadrature formula selection:\n";
+          message += "    Did you mean FEEvaluation<dim,";
+          message += Utilities::int_to_string(fe_degree) + ",";
+          message += Utilities::int_to_string(n_q_points_1d);
+          message += "," + Utilities::int_to_string(n_components);
+          message += ",Number>(data";
+          if (fe_no != numbers::invalid_unsigned_int)
+            {
+              message += ", " + Utilities::int_to_string(proposed_dof_comp) + ", ";
+              message += Utilities::int_to_string(proposed_quad_comp);
+            }
+          message += ")?\n";
+          std::string correct_pos;
+          if (proposed_dof_comp != fe_no)
+            correct_pos = " ^ ";
+          else
+            correct_pos = "   ";
+          if (proposed_quad_comp != this->quad_no)
+            correct_pos += " ^\n";
+          else
+            correct_pos += "  \n";
+          message += "                                                     " + correct_pos;
+        }
+      // ok, did not find the numbers specified by the template arguments in
+      // the given list. Suggest correct template arguments
+      const unsigned int proposed_n_q_points_1d = static_cast<unsigned int>(std::pow(1.001*this->data->n_q_points,1./dim));
+      message += "Wrong template arguments:\n";
+      message += "    Did you mean FEEvaluation<dim,";
+      message += Utilities::int_to_string(this->data->fe_degree) + ",";
+      message += Utilities::int_to_string(proposed_n_q_points_1d);
+      message += "," + Utilities::int_to_string(n_components);
+      message += ",Number>(data";
+      if (fe_no != numbers::invalid_unsigned_int)
+        {
+          message += ", " + Utilities::int_to_string(fe_no) + ", ";
+          message += Utilities::int_to_string(this->quad_no);
+        }
+      message += ")?\n";
+      std::string correct_pos;
+      if (this->data->fe_degree != fe_degree)
+        correct_pos = " ^";
+      else
+        correct_pos = "  ";
+      if (proposed_n_q_points_1d != n_q_points_1d)
+        correct_pos += " ^\n";
+      else
+        correct_pos += "  \n";
+      message += "                                 " + correct_pos;
+
+      Assert (fe_degree == this->data->fe_degree &&
+              n_q_points == this->data->n_q_points,
+              ExcMessage(message));
+    }
+  if (fe_no != numbers::invalid_unsigned_int)
+    {
+      AssertDimension (n_q_points,
+                       this->mapping_info->mapping_data_gen[this->quad_no].
+                       n_q_points[this->active_quad_index]);
+      AssertDimension (this->data->dofs_per_cell * this->n_fe_components,
+                       this->dof_info->dofs_per_cell[this->active_fe_index]);
+    }
+#endif
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+Point<dim,VectorizedArray<Number> >
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::quadrature_point (const unsigned int q) const
+{
+  Assert (this->mapping_info->quadrature_points_initialized == true,
+          ExcNotInitialized());
+  AssertIndexRange (q, n_q_points);
+
+  // Cartesian mesh: not all quadrature points are stored, only the
+  // diagonal. Hence, need to find the tensor product index and retrieve the
+  // value from that
+  if (this->cell_type == internal::MatrixFreeFunctions::cartesian)
+    {
+      Point<dim,VectorizedArray<Number> > point;
+      switch (dim)
+        {
+        case 1:
+          return this->quadrature_points[q];
+        case 2:
+          point[0] = this->quadrature_points[q%n_q_points_1d][0];
+          point[1] = this->quadrature_points[q/n_q_points_1d][1];
+          return point;
+        case 3:
+          point[0] = this->quadrature_points[q%n_q_points_1d][0];
+          point[1] = this->quadrature_points[(q/n_q_points_1d)%n_q_points_1d][1];
+          point[2] = this->quadrature_points[q/(n_q_points_1d*n_q_points_1d)][2];
+          return point;
+        default:
+          Assert (false, ExcNotImplemented());
+          return point;
+        }
+    }
+  // all other cases: just return the respective data as it is fully stored
+  else
+    return this->quadrature_points[q];
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+void
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::evaluate (const bool evaluate_val,
+            const bool evaluate_grad,
+            const bool evaluate_lapl)
+{
+  Assert (this->dof_values_initialized == true,
+          internal::ExcAccessToUninitializedField());
+  Assert(this->matrix_info != 0 ||
+         this->mapped_geometry->is_initialized(), ExcNotInitialized());
+
+  // Select algorithm matching the element type at run time (the function
+  // pointer is easy to predict, so negligible in cost)
+  evaluate_funct (*this->data, &this->values_dofs[0],
+                  this->values_quad, this->gradients_quad, this->hessians_quad,
+                  evaluate_val, evaluate_grad, evaluate_lapl);
+
+#ifdef DEBUG
+  if (evaluate_val == true)
+    this->values_quad_initialized = true;
+  if (evaluate_grad == true)
+    this->gradients_quad_initialized = true;
+  if (evaluate_lapl == true)
+    this->hessians_quad_initialized  = true;
+#endif
+}
+
+
+
+template <int dim, int fe_degree,  int n_q_points_1d, int n_components_,
+          typename Number>
+inline
+void
+FEEvaluation<dim,fe_degree,n_q_points_1d,n_components_,Number>
+::integrate (bool integrate_val,bool integrate_grad)
+{
+  if (integrate_val == true)
+    Assert (this->values_quad_submitted == true,
+            internal::ExcAccessToUninitializedField());
+  if (integrate_grad == true)
+    Assert (this->gradients_quad_submitted == true,
+            internal::ExcAccessToUninitializedField());
+  Assert(this->matrix_info != 0 ||
+         this->mapped_geometry->is_initialized(), ExcNotInitialized());
+
+  // Select algorithm matching the element type at run time (the function
+  // pointer is easy to predict, so negligible in cost)
+  integrate_funct (*this->data, this->values_dofs, this->values_quad,
+                   this->gradients_quad, integrate_val, integrate_grad);
+
+#ifdef DEBUG
+  this->dof_values_initialized = true;
+#endif
+}
+
+
+
+#endif  // ifndef DOXYGEN
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/helper_functions.h b/include/deal.II/matrix_free/helper_functions.h
new file mode 100644
index 0000000..f828027
--- /dev/null
+++ b/include/deal.II/matrix_free/helper_functions.h
@@ -0,0 +1,182 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_helper_functions_h
+#define dealii__matrix_free_helper_functions_h
+
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/vectorization.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+    // forward declaration of internal data structure
+    template <typename Number> struct ConstraintValues;
+
+    /**
+     * A struct that collects all information related to parallelization with
+     * threads: The work is subdivided into tasks that can be done
+     * independently.
+     */
+    struct TaskInfo
+    {
+      /**
+       * Constructor.
+       */
+      TaskInfo ();
+
+      /**
+       * Clears all the data fields and resets them to zero.
+       */
+      void clear ();
+
+      /**
+       * Returns the memory consumption of the class.
+       */
+      std::size_t memory_consumption () const;
+
+      unsigned int block_size;
+      unsigned int n_blocks;
+      unsigned int block_size_last;
+      unsigned int position_short_block;
+      bool use_multithreading;
+      bool use_partition_partition;
+      bool use_coloring_only;
+
+      std::vector<unsigned int> partition_color_blocks_row_index;
+      std::vector<unsigned int> partition_color_blocks_data;
+      unsigned int evens;
+      unsigned int odds;
+      unsigned int n_blocked_workers;
+      unsigned int n_workers;
+
+      std::vector<unsigned int> partition_evens;
+      std::vector<unsigned int> partition_odds;
+      std::vector<unsigned int> partition_n_blocked_workers;
+      std::vector<unsigned int> partition_n_workers;
+    };
+
+
+
+    /**
+     * A struct that collects all information related to the size of the
+     * problem and MPI parallelization.
+     */
+    struct SizeInfo
+    {
+      /**
+       * Constructor.
+       */
+      SizeInfo ();
+
+      /**
+       * Clears all data fields and resets the sizes to zero.
+       */
+      void clear();
+
+      /**
+       * Prints minimum, average, and maximal memory consumption over the MPI
+       * processes.
+       */
+      template <typename StreamType>
+      void print_memory_statistics (StreamType &out,
+                                    std::size_t data_length) const;
+
+      /**
+       * Determines the position of cells with ghosts for distributed-memory
+       * calculations.
+       */
+      void make_layout (const unsigned int n_active_cells_in,
+                        const unsigned int vectorization_length_in,
+                        std::vector<unsigned int> &boundary_cells,
+                        std::vector<unsigned int> &irregular_cells);
+
+      unsigned int n_active_cells;
+      unsigned int n_macro_cells;
+      unsigned int boundary_cells_start;
+      unsigned int boundary_cells_end;
+      unsigned int vectorization_length;
+
+      /**
+       * index sets to describe the layout of cells: locally owned cells and
+       * locally active cells
+       */
+      IndexSet locally_owned_cells;
+      IndexSet ghost_cells;
+
+      /**
+       * MPI communicator
+       */
+      MPI_Comm communicator;
+      unsigned int my_pid;
+      unsigned int n_procs;
+    };
+
+    /**
+     * Data type to identify cell type.
+     */
+    enum CellType {cartesian=0, affine=1, general=2, undefined=3};
+
+
+    /**
+     * A class that is used to compare floating point arrays (e.g.
+     * std::vectors, Tensor<1,dim>, etc.). The idea of this class is to
+     * consider two arrays as equal if they are the same within a given
+     * tolerance. We use this comparator class within an std::map<> of the
+     * given arrays. Note that this comparison operator does not satisfy all
+     * the mathematical properties one usually wants to have (consider e.g.
+     * the numbers a=0, b=0.1, c=0.2 with tolerance 0.15; the operator gives
+     * a<c, but neither of a<b? or b<c? is satisfied). This is not a problem
+     * in the use cases for this class, but be careful when using it in other
+     * contexts.
+     */
+    template<typename Number>
+    struct FPArrayComparator
+    {
+      FPArrayComparator (const Number scaling);
+
+      bool operator() (const std::vector<Number> &v1,
+                       const std::vector<Number> &v2) const;
+
+      template <int dim>
+      bool operator ()(const Tensor<1,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t1,
+                       const Tensor<1,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t2) const;
+
+      template <int dim>
+      bool operator ()(const Tensor<2,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t1,
+                       const Tensor<2,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t2) const;
+
+      Number tolerance;
+    };
+
+    // Note: Implementation in matrix_free.templates.h
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/mapping_data_on_the_fly.h b/include/deal.II/matrix_free/mapping_data_on_the_fly.h
new file mode 100644
index 0000000..ed22268
--- /dev/null
+++ b/include/deal.II/matrix_free/mapping_data_on_the_fly.h
@@ -0,0 +1,340 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_mapping_data_on_the_fly_h
+#define dealii__matrix_free_mapping_data_on_the_fly_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/vectorization.h>
+#include <deal.II/base/aligned_vector.h>
+#include <deal.II/matrix_free/shape_info.h>
+#include <deal.II/matrix_free/mapping_info.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_nothing.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+    /**
+     * This class provides evaluated mapping information using standard
+     * deal.II information in a form that FEEvaluation and friends can use for
+     * vectorized access. Since no vectorization over cells is available with
+     * the DoFHandler/Triangulation cell iterators, the interface to
+     * FEEvaluation's vectorization model is to use @p
+     * VectorizedArray::n_array_element copies of the same element. This
+     * interface is thus primarily useful for evaluating several operators on
+     * the same cell, e.g., when assembling cell matrices.
+     *
+     * As opposed to the Mapping classes in deal.II, this class does not
+     * actually provide a boundary description that can be used to evaluate
+     * the geometry, but it rather provides the evaluated geometry from a
+     * given deal.II mapping (as passed to the constructor of this class) in a
+     * form accessible to FEEvaluation.
+     *
+     * @author Martin Kronbichler, 2014
+     */
+    template <int dim, typename Number=double>
+    class MappingDataOnTheFly
+    {
+    public:
+      /**
+       * Constructor, similar to FEValues. Since this class only evaluates the
+       * geometry, no finite element has to be specified and the simplest
+       * element, FE_Nothing, is used internally for the underlying FEValues
+       * object.
+       */
+      MappingDataOnTheFly (const Mapping<dim> &mapping,
+                           const Quadrature<1> &quadrature,
+                           const UpdateFlags update_flags);
+
+      /**
+       * Constructor. This constructor is equivalent to the other one except
+       * that it makes the object use a $Q_1$ mapping (i.e., an object of type
+       * MappingQGeneric(1)) implicitly.
+       */
+      MappingDataOnTheFly (const Quadrature<1> &quadrature,
+                           const UpdateFlags update_flags);
+
+      /**
+       * Initialize with the given cell iterator.
+       */
+      void reinit(typename dealii::Triangulation<dim>::cell_iterator cell);
+
+      /**
+       * Returns whether reinit() has been called at least once, i.e., a cell
+       * has been set.
+       */
+      bool is_initialized() const;
+
+      /**
+       * Return a triangulation iterator to the current cell.
+       */
+      typename dealii::Triangulation<dim>::cell_iterator get_cell () const;
+
+      /**
+       * Return a reference to the underlying FEValues object that evaluates
+       * certain quantities (only mapping-related ones like Jacobians or
+       * mapped quadrature points are accessible, as no finite element data is
+       * actually used).
+       */
+      const dealii::FEValues<dim> &get_fe_values () const;
+
+      /**
+       * Return a vector of inverse transpose Jacobians. For compatibility
+       * with FEEvaluation, it returns tensors of vectorized arrays, even
+       * though all components are equal.
+       */
+      const AlignedVector<Tensor<2,dim,VectorizedArray<Number> > > &
+      get_inverse_jacobians() const;
+
+      /**
+       * Return a vector of quadrature weights times the Jacobian determinant
+       * (JxW). For compatibility with FEEvaluation, it returns tensors of
+       * vectorized arrays, even though all components are equal.
+       */
+      const AlignedVector<VectorizedArray<Number> > &
+      get_JxW_values() const;
+
+      /**
+       * Return a vector of quadrature points in real space on the given cell.
+       * For compatibility with FEEvaluation, it returns tensors of vectorized
+       * arrays, even though all components are equal.
+       */
+      const AlignedVector<Point<dim,VectorizedArray<Number> > > &
+      get_quadrature_points() const;
+
+      /**
+       * Return a vector of normal vectors in real space on the given cell.
+       * For compatibility with FEEvaluation, it returns tensors of vectorized
+       * arrays, even though all components are equal.
+       */
+      const AlignedVector<Tensor<1,dim,VectorizedArray<Number> > > &
+      get_normal_vectors() const;
+
+      /**
+       * Return a reference to 1D quadrature underlying this object.
+       */
+      const Quadrature<1> &
+      get_quadrature () const;
+
+    private:
+      /**
+       * A cell iterator in case we generate the data on the fly to be able to
+       * check if we need to re-generate the information stored in this class.
+       */
+      typename dealii::Triangulation<dim>::cell_iterator present_cell;
+
+      /**
+       * Dummy finite element object necessary for initializing the FEValues
+       * object.
+       */
+      FE_Nothing<dim> fe_dummy;
+
+      /**
+       * An underlying FEValues object that performs the (scalar) evaluation.
+       */
+      dealii::FEValues<dim> fe_values;
+
+      /**
+       * Get 1D quadrature formula to be used for reinitializing shape info.
+       */
+      const Quadrature<1> quadrature_1d;
+
+      /**
+       * Inverse Jacobians, stored in vectorized array form.
+       */
+      AlignedVector<Tensor<2,dim,VectorizedArray<Number> > > inverse_jacobians;
+
+      /**
+       * Stored Jacobian determinants and quadrature weights
+       */
+      AlignedVector<VectorizedArray<Number> > jxw_values;
+
+      /**
+       * Stored quadrature points
+       */
+      AlignedVector<Point<dim,VectorizedArray<Number> > > quadrature_points;
+
+      /**
+       * Stored normal vectors (for face integration)
+       */
+      AlignedVector<Tensor<1,dim,VectorizedArray<Number> > > normal_vectors;
+    };
+
+
+    /*----------------------- Inline functions ----------------------------------*/
+
+    template <int dim, typename Number>
+    inline
+    MappingDataOnTheFly<dim,Number>::MappingDataOnTheFly (const Mapping<dim> &mapping,
+                                                          const Quadrature<1> &quadrature,
+                                                          const UpdateFlags update_flags)
+      :
+      fe_values(mapping, fe_dummy, Quadrature<dim>(quadrature),
+                internal::MatrixFreeFunctions::MappingInfo<dim,Number>::compute_update_flags(update_flags)),
+      quadrature_1d(quadrature),
+      inverse_jacobians(fe_values.get_quadrature().size()),
+      jxw_values(fe_values.get_quadrature().size()),
+      quadrature_points(fe_values.get_quadrature().size()),
+      normal_vectors(fe_values.get_quadrature().size())
+    {
+      Assert(!(fe_values.get_update_flags() & update_jacobian_grads),
+             ExcNotImplemented());
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    MappingDataOnTheFly<dim,Number>::MappingDataOnTheFly (const Quadrature<1> &quadrature,
+                                                          const UpdateFlags update_flags)
+      :
+      fe_values(fe_dummy, Quadrature<dim>(quadrature),
+                internal::MatrixFreeFunctions::MappingInfo<dim,Number>::compute_update_flags(update_flags)),
+      quadrature_1d(quadrature),
+      inverse_jacobians(fe_values.get_quadrature().size()),
+      jxw_values(fe_values.get_quadrature().size()),
+      quadrature_points(fe_values.get_quadrature().size()),
+      normal_vectors(fe_values.get_quadrature().size())
+    {
+      Assert(!(fe_values.get_update_flags() & update_jacobian_grads),
+             ExcNotImplemented());
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    void
+    MappingDataOnTheFly<dim,Number>::reinit(typename dealii::Triangulation<dim>::cell_iterator cell)
+    {
+      if (present_cell == cell)
+        return;
+      present_cell = cell;
+      fe_values.reinit(present_cell);
+      for (unsigned int q=0; q<fe_values.get_quadrature().size(); ++q)
+        {
+          if (fe_values.get_update_flags() & update_inverse_jacobians)
+            for (unsigned int d=0; d<dim; ++d)
+              for (unsigned int e=0; e<dim; ++e)
+                inverse_jacobians[q][d][e] = fe_values.inverse_jacobian(q)[e][d];
+          if (fe_values.get_update_flags() & update_quadrature_points)
+            for (unsigned int d=0; d<dim; ++d)
+              quadrature_points[q][d] = fe_values.quadrature_point(q)[d];
+          if (fe_values.get_update_flags() & update_normal_vectors)
+            for (unsigned int d=0; d<dim; ++d)
+              normal_vectors[q][d] = fe_values.normal_vector(q)[d];
+          if (fe_values.get_update_flags() & update_JxW_values)
+            jxw_values[q] = fe_values.JxW(q);
+        }
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    bool
+    MappingDataOnTheFly<dim,Number>::is_initialized() const
+    {
+      return present_cell != typename dealii::Triangulation<dim>::cell_iterator();
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    typename dealii::Triangulation<dim>::cell_iterator
+    MappingDataOnTheFly<dim,Number>::get_cell() const
+    {
+      return fe_values.get_cell();
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    const dealii::FEValues<dim> &
+    MappingDataOnTheFly<dim,Number>::get_fe_values() const
+    {
+      return fe_values;
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    const AlignedVector<Tensor<2,dim,VectorizedArray<Number> > > &
+    MappingDataOnTheFly<dim,Number>::get_inverse_jacobians() const
+    {
+      return inverse_jacobians;
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    const AlignedVector<Tensor<1,dim,VectorizedArray<Number> > > &
+    MappingDataOnTheFly<dim,Number>::get_normal_vectors() const
+    {
+      return normal_vectors;
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    const AlignedVector<Point<dim,VectorizedArray<Number> > > &
+    MappingDataOnTheFly<dim,Number>::get_quadrature_points() const
+    {
+      return quadrature_points;
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    const AlignedVector<VectorizedArray<Number> > &
+    MappingDataOnTheFly<dim,Number>::get_JxW_values() const
+    {
+      return jxw_values;
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    const Quadrature<1> &
+    MappingDataOnTheFly<dim,Number>::get_quadrature() const
+    {
+      return quadrature_1d;
+    }
+
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/mapping_info.h b/include/deal.II/matrix_free/mapping_info.h
new file mode 100644
index 0000000..c47feef
--- /dev/null
+++ b/include/deal.II/matrix_free/mapping_info.h
@@ -0,0 +1,380 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_mapping_info_h
+#define dealii__matrix_free_mapping_info_h
+
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/vectorization.h>
+#include <deal.II/base/aligned_vector.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/matrix_free/helper_functions.h>
+
+#include <memory>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+    /**
+     * The class that stores all geometry-dependent data related with cell
+     * interiors for use in the matrix-free class.
+     *
+     * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+     */
+    template <int dim, typename Number>
+    struct MappingInfo
+    {
+      /**
+       * Determines how many bits of an unsigned int are used to distinguish
+       * the cell types (Cartesian, with constant Jacobian, or general)
+       */
+      static const std::size_t  n_cell_type_bits = 2;
+
+      /**
+       * Determines how many types of different cells can be detected at most.
+       * Corresponds to the number of bits we reserved for it.
+       */
+      static const unsigned int n_cell_types = 1U<<n_cell_type_bits;
+
+      /**
+       * An abbreviation for the length of vector lines of the current data
+       * type.
+       */
+      static const unsigned int n_vector_elements = VectorizedArray<Number>::n_array_elements;
+
+      /**
+       * Empty constructor.
+       */
+      MappingInfo();
+
+      /**
+       * Computes the information in the given cells. The cells are specified
+       * by the level and the index within the level (as given by
+       * CellIterator::level() and CellIterator::index(), in order to allow
+       * for different kinds of iterators, e.g. standard DoFHandler,
+       * multigrid, etc.)  on a fixed Triangulation. In addition, a mapping
+       * and several quadrature formulas are given.
+       */
+      void initialize (const dealii::Triangulation<dim>                &tria,
+                       const std::vector<std::pair<unsigned int,unsigned int> > &cells,
+                       const std::vector<unsigned int>         &active_fe_index,
+                       const Mapping<dim>                      &mapping,
+                       const std::vector<dealii::hp::QCollection<1> >  &quad,
+                       const UpdateFlags                        update_flags);
+
+      /**
+       * Helper function to determine which update flags must be set in the
+       * internal functions to initialize all data as requested by the user.
+       */
+      static UpdateFlags
+      compute_update_flags (const UpdateFlags                        update_flags,
+                            const std::vector<dealii::hp::QCollection<1> >  &quad =
+                              std::vector<dealii::hp::QCollection<1> >());
+
+      /**
+       * Returns the type of a given cell as detected during initialization.
+       */
+      CellType get_cell_type (const unsigned int cell_chunk_no) const;
+
+      /**
+       * Returns the type of a given cell as detected during initialization.
+       */
+      unsigned int get_cell_data_index (const unsigned int cell_chunk_no) const;
+
+      /**
+       * Clears all data fields in this class.
+       */
+      void clear ();
+
+      /**
+       * Returns the memory consumption of this class in bytes.
+       */
+      std::size_t memory_consumption() const;
+
+      /**
+       * Prints a detailed summary of memory consumption in the different
+       * structures of this class to the given output stream.
+       */
+      template <typename StreamType>
+      void print_memory_consumption(StreamType     &out,
+                                    const SizeInfo &size_info) const;
+
+      /**
+       * Stores whether a cell is Cartesian, has constant transform data
+       * (Jacobians) or is general. cell_type % 4 gives this information (0:
+       * Cartesian, 1: constant Jacobian throughout cell, 2: general cell),
+       * and cell_type / 4 gives the index in the data field of where to find
+       * the information in the fields Jacobian and JxW values (except for
+       * quadrature points, for which the index runs as usual).
+       */
+      std::vector<unsigned int> cell_type;
+
+      /**
+       * The first field stores the inverse Jacobian for Cartesian cells:
+       * There, it is a diagonal rank-2 tensor, so we actually just store a
+       * rank-1 tensor. It is the same on all cells, therefore we only store
+       * it once per cell, and use similarities from one cell to another, too
+       * (on structured meshes, there are usually many cells with the same
+       * Jacobian).
+       *
+       * The second field stores the Jacobian determinant for Cartesian cells
+       * (without the quadrature weight, which depends on the quadrature
+       * point, whereas the determinant is the same on each quadrature point).
+       */
+      AlignedVector<std::pair<Tensor<1,dim,VectorizedArray<Number> >,
+                    VectorizedArray<Number> > > cartesian_data;
+
+      /**
+       * The first field stores the Jacobian for non-Cartesian cells where all
+       * the Jacobians on the cell are the same (i.e., constant, which comes
+       * from a linear transformation from unit to real cell). Also use
+       * similarities from one cell to another (on structured meshes, there
+       * are usually many cells with the same Jacobian).
+       *
+       * The second field stores the Jacobian determinant for non-Cartesian
+       * cells with constant Jacobian throughout the cell (without the
+       * quadrature weight, which depends on the quadrature point, whereas the
+       * determinant is the same on each quadrature point).
+       */
+      AlignedVector<std::pair<Tensor<2,dim,VectorizedArray<Number> >,
+                    VectorizedArray<Number> > > affine_data;
+
+      /**
+       * Definition of a structure that stores data that depends on the
+       * quadrature formula (if we have more than one quadrature formula on a
+       * given problem, these fields will be different)
+       */
+      struct MappingInfoDependent
+      {
+        /**
+         * This field stores the row starts for the inverse Jacobian
+         * transformations, quadrature weights and second derivatives.
+         */
+        std::vector<unsigned int> rowstart_jacobians;
+
+        /**
+         * This field stores the inverse Jacobian transformation from unit to
+         * real cell, which is needed for most gradient transformations
+         * (corresponds to FEValues::inverse_jacobian) for general cells.
+         */
+        AlignedVector<Tensor<2,dim,VectorizedArray<Number> > > jacobians;
+
+        /**
+         * This field stores the Jacobian determinant times the quadrature
+         * weights (JxW in deal.II speak) for general cells.
+         */
+        AlignedVector<VectorizedArray<Number> > JxW_values;
+
+        /**
+         * Stores the diagonal part of the gradient of the inverse Jacobian
+         * transformation. The first index runs over the derivatives
+         * $\partial^2/\partial x_i^2$, the second over the space coordinate.
+         * Needed for computing the Laplacian of FE functions on the real
+         * cell. Uses a separate storage from the off-diagonal part
+         * $\partial^2/\partial x_i \partial x_j, i\neq j$ because that is
+         * only needed for computing a full Hessian.
+         */
+        AlignedVector<Tensor<2,dim,VectorizedArray<Number> > > jacobians_grad_diag;
+
+        /**
+         * Stores the off-diagonal part of the gradient of the inverse
+         * Jacobian transformation. Because of symmetry, only the upper
+         * diagonal part is needed. The first index runs through the
+         * derivatives row-wise, i.e., $\partial^2/\partial x_1 \partial x_2$
+         * first, then $\partial^2/\partial x_1 \partial x_3$, and so on. The
+         * second index is the spatial coordinate. Not filled currently.
+         */
+        AlignedVector<Tensor<1,(dim>1?dim*(dim-1)/2:1),
+                      Tensor<1,dim,VectorizedArray<Number> > > > jacobians_grad_upper;
+
+        /**
+         * Stores the row start for quadrature points in real coordinates for
+         * both types of cells. Note that Cartesian cells will have shorter
+         * fields (length is @p n_q_points_1d) than non-Cartesian cells
+         * (length is @p n_q_points).
+         */
+        std::vector<unsigned int> rowstart_q_points;
+
+        /**
+         * Stores the quadrature points in real coordinates for Cartesian
+         * cells (does not need to store the full data on all points)
+         */
+        AlignedVector<Point<dim,VectorizedArray<Number> > > quadrature_points;
+
+        /**
+         * The dim-dimensional quadrature formula underlying the problem
+         * (constructed from a 1D tensor product quadrature formula).
+         */
+        dealii::hp::QCollection<dim>    quadrature;
+
+        /**
+         * The (dim-1)-dimensional quadrature formula corresponding to face
+         * evaluation (constructed from a 1D tensor product quadrature
+         * formula).
+         */
+        dealii::hp::QCollection<dim-1>  face_quadrature;
+
+        /**
+         * The number of quadrature points for the current quadrature formula.
+         */
+        std::vector<unsigned int> n_q_points;
+
+        /**
+         * The number of quadrature points for the current quadrature formula
+         * when applied to a face. Only set if the quadrature formula is
+         * derived from a tensor product, since it is not defined from the
+         * full quadrature formula otherwise.
+         */
+        std::vector<unsigned int> n_q_points_face;
+
+        /**
+         * The quadrature weights (vectorized data format) on the unit cell.
+         */
+        std::vector<AlignedVector<VectorizedArray<Number> > > quadrature_weights;
+
+        /**
+         * This variable stores the number of quadrature points for all
+         * quadrature indices in the underlying element for easier access to
+         * data in the hp case.
+         */
+        std::vector<unsigned int> quad_index_conversion;
+
+        /**
+         * Returns the quadrature index for a given number of quadrature
+         * points. If not in hp mode or if the index is not found, this
+         * function always returns index 0. Hence, this function does not
+         * check whether the given degree is actually present.
+         */
+        unsigned int
+        quad_index_from_n_q_points (const unsigned int n_q_points) const;
+
+
+        /**
+         * Prints a detailed summary of memory consumption in the different
+         * structures of this class to the given output stream.
+         */
+        template <typename StreamType>
+        void print_memory_consumption(StreamType     &out,
+                                      const SizeInfo &size_info) const;
+
+        /**
+         * Returns the memory consumption in bytes.
+         */
+        std::size_t memory_consumption () const;
+      };
+
+      /**
+       * Contains all the stuff that depends on the quadrature formula
+       */
+      std::vector<MappingInfoDependent> mapping_data_gen;
+
+      /**
+       * Stores whether JxW values have been initialized
+       */
+      bool JxW_values_initialized;
+
+      /**
+       * Stores whether we computed second derivatives.
+       */
+      bool second_derivatives_initialized;
+
+      /**
+       * Stores whether we computed quadrature points.
+       */
+      bool quadrature_points_initialized;
+
+      /**
+       * Internal temporary data used for the initialization.
+       */
+      struct CellData
+      {
+        CellData (const double jac_size);
+        void resize (const unsigned int size);
+
+        AlignedVector<Tensor<1,dim,VectorizedArray<Number> > >  quadrature_points;
+        AlignedVector<Tensor<2,dim,VectorizedArray<Number> > >  general_jac;
+        AlignedVector<Tensor<3,dim,VectorizedArray<Number> > >  general_jac_grad;
+        Tensor<2,dim,VectorizedArray<Number> > const_jac;
+        const double                           jac_size;
+      };
+
+      /**
+       * Helper function called internally during the initialize function.
+       */
+      void evaluate_on_cell (const dealii::Triangulation<dim> &tria,
+                             const std::pair<unsigned int,unsigned int> *cells,
+                             const unsigned int  cell,
+                             const unsigned int  my_q,
+                             CellType (&cell_t_prev)[n_vector_elements],
+                             CellType (&cell_t)[n_vector_elements],
+                             dealii::FEValues<dim,dim> &fe_values,
+                             CellData          &cell_data) const;
+    };
+
+
+
+    /* ------------------- inline functions ----------------------------- */
+
+    template <int dim, typename Number>
+    inline
+    unsigned int
+    MappingInfo<dim,Number>::MappingInfoDependent::
+    quad_index_from_n_q_points (const unsigned int n_q_points) const
+    {
+      for (unsigned int i=0; i<quad_index_conversion.size(); ++i)
+        if (n_q_points == quad_index_conversion[i])
+          return i;
+      return 0;
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    CellType
+    MappingInfo<dim,Number>::get_cell_type (const unsigned int cell_no) const
+    {
+      AssertIndexRange (cell_no, cell_type.size());
+      CellType enum_cell_type = (CellType)(cell_type[cell_no] % n_cell_types);
+      Assert(enum_cell_type != undefined, ExcInternalError());
+      return enum_cell_type;
+    }
+
+
+
+    template <int dim, typename Number>
+    inline
+    unsigned int
+    MappingInfo<dim,Number>::get_cell_data_index (const unsigned int cell_no) const
+    {
+      AssertIndexRange (cell_no, cell_type.size());
+      return cell_type[cell_no] >> n_cell_type_bits;
+    }
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/mapping_info.templates.h b/include/deal.II/matrix_free/mapping_info.templates.h
new file mode 100644
index 0000000..6a25421
--- /dev/null
+++ b/include/deal.II/matrix_free/mapping_info.templates.h
@@ -0,0 +1,926 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+
+#include <deal.II/matrix_free/mapping_info.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+    // ----------------- actual MappingInfo functions -------------------------
+
+    template <int dim, typename Number>
+    MappingInfo<dim,Number>::MappingInfo()
+      :
+      JxW_values_initialized (false),
+      second_derivatives_initialized (false),
+      quadrature_points_initialized (false)
+    {}
+
+
+
+    template <int dim, typename Number>
+    void
+    MappingInfo<dim,Number>::clear ()
+    {
+      JxW_values_initialized = false;
+      quadrature_points_initialized = false;
+      second_derivatives_initialized = false;
+      mapping_data_gen.clear();
+      cell_type.clear();
+      cartesian_data.clear();
+      affine_data.clear();
+    }
+
+
+
+    template <int dim, typename Number>
+    UpdateFlags
+    MappingInfo<dim,Number>::
+    compute_update_flags (const UpdateFlags update_flags,
+                          const std::vector<dealii::hp::QCollection<1> > &quad)
+    {
+      // this class is build around the evaluation this class is build around
+      // the evaluation of inverse gradients, so compute them in any case
+      UpdateFlags new_flags = update_inverse_jacobians;
+
+      // if the user requested gradients, need inverse Jacobians
+      if (update_flags & update_gradients || update_flags & update_inverse_jacobians)
+        new_flags |= update_inverse_jacobians;
+
+      // for JxW, would only need JxW values.
+      if (update_flags & update_JxW_values)
+        new_flags |= update_JxW_values;
+
+      // for Hessian information, need inverse Jacobians and the derivative of
+      // Jacobians (these two together will give use the gradients of the
+      // inverse Jacobians, which is what we need)
+      if (update_flags & update_hessians || update_flags & update_jacobian_grads)
+        new_flags |= update_jacobian_grads;
+
+      if (update_flags & update_quadrature_points)
+        new_flags |= update_quadrature_points;
+
+      // there is one more thing: if we have a quadrature formula with only
+      // one quadrature point on the first component, but more points on later
+      // components, we need to have Jacobian gradients anyway in order to
+      // determine whether the Jacobian is constant throughout a cell
+      if (quad.empty() == false)
+        {
+          bool formula_with_one_point = false;
+          for (unsigned int i=0; i<quad[0].size(); ++i)
+            if (quad[0][i].size() == 1)
+              {
+                formula_with_one_point = true;
+                break;
+              }
+          if (formula_with_one_point == true)
+            for (unsigned int comp=1; comp<quad.size(); ++comp)
+              for (unsigned int i=0; i<quad[comp].size(); ++i)
+                if (quad[comp][i].size() > 1)
+                  {
+                    new_flags |= update_jacobian_grads;
+                  }
+        }
+      return new_flags;
+    }
+
+
+
+    namespace internal
+    {
+      template <int dim>
+      double get_jacobian_size (const dealii::Triangulation<dim> &tria)
+      {
+        if (tria.n_cells() == 0)
+          return 1;
+        else return tria.begin()->diameter();
+      }
+    }
+
+
+
+    template <int dim, typename Number>
+    void
+    MappingInfo<dim,Number>::initialize
+    (const dealii::Triangulation<dim>                         &tria,
+     const std::vector<std::pair<unsigned int,unsigned int> > &cells,
+     const std::vector<unsigned int>                          &active_fe_index,
+     const Mapping<dim>                                       &mapping,
+     const std::vector<dealii::hp::QCollection<1> >           &quad,
+     const UpdateFlags                                         update_flags_input)
+    {
+      clear();
+      const unsigned int n_quads = quad.size();
+      const unsigned int n_cells = cells.size();
+      const unsigned int vectorization_length =
+        VectorizedArray<Number>::n_array_elements;
+      Assert (n_cells%vectorization_length == 0, ExcInternalError());
+      const unsigned int n_macro_cells = n_cells/vectorization_length;
+      mapping_data_gen.resize (n_quads);
+      cell_type.resize (n_macro_cells);
+
+      // dummy FE that is used to set up an FEValues object. Do not need the
+      // actual finite element because we will only evaluate quantities for
+      // the mapping that are independent of the FE
+      FE_Nothing<dim> dummy_fe;
+      UpdateFlags update_flags = compute_update_flags (update_flags_input, quad);
+
+      if (update_flags & update_JxW_values)
+        JxW_values_initialized = true;
+      if (update_flags & update_jacobian_grads)
+        second_derivatives_initialized = true;
+      if (update_flags & update_quadrature_points)
+        quadrature_points_initialized = true;
+
+      // when we make comparisons about the size of Jacobians we need to know
+      // the approximate size of typical entries in Jacobians. We need to fix
+      // the Jacobian size once and for all. We choose the diameter of the
+      // first cell (on level zero, which is the best accuracy we can hope
+      // for, since diameters on finer levels are computed by differences of
+      // nearby cells). If the mesh extends over a certain domain, the
+      // precision of double values is essentially limited by this precision.
+      const double jacobian_size = internal::get_jacobian_size(tria);
+
+      // objects that hold the data for up to vectorization_length cells while
+      // we fill them up. Only after all vectorization_length cells have been
+      // processed, we can insert the data into the data structures of this
+      // class
+      CellData data (jacobian_size);
+
+      for (unsigned int my_q=0; my_q<n_quads; ++my_q)
+        {
+          MappingInfoDependent &current_data = mapping_data_gen[my_q];
+          const unsigned int n_hp_quads = quad[my_q].size();
+          AssertIndexRange (0, n_hp_quads);
+          current_data.n_q_points.reserve (n_hp_quads);
+          current_data.n_q_points_face.reserve (n_hp_quads);
+          current_data.quadrature_weights.resize (n_hp_quads);
+          std::vector<unsigned int> n_q_points_1d (n_hp_quads),
+              step_size_cartesian (n_hp_quads);
+          if (n_hp_quads > 1)
+            current_data.quad_index_conversion.resize(n_hp_quads);
+          for (unsigned int q=0; q<n_hp_quads; ++q)
+            {
+              n_q_points_1d[q] = quad[my_q][q].size();
+              const unsigned int n_q_points =
+                Utilities::fixed_power<dim>(n_q_points_1d[q]);
+              current_data.n_q_points.push_back (n_q_points);
+
+              current_data.n_q_points_face.push_back
+              (dim>1 ? Utilities::fixed_power<dim-1>(n_q_points_1d[q]) : 1);
+              current_data.quadrature.push_back
+              (Quadrature<dim>(quad[my_q][q]));
+              current_data.face_quadrature.push_back
+              (Quadrature<dim-1>(quad[my_q][q]));
+
+              // set quadrature weights in vectorized form
+              current_data.quadrature_weights[q].resize(n_q_points);
+              for (unsigned int i=0; i<n_q_points; ++i)
+                current_data.quadrature_weights[q][i] =
+                  current_data.quadrature[q].get_weights()[i];
+
+              if (n_hp_quads > 1)
+                current_data.quad_index_conversion[q] = n_q_points;
+
+              // To walk on the diagonal for lexicographic ordering, we have
+              // to jump one index ahead in each direction. For direction 0,
+              // this is just the next point, for direction 1, it means adding
+              // n_q_points_1d, and so on.
+              step_size_cartesian[q] = 0;
+              unsigned int factor = 1;
+              for (unsigned int d=0; d<dim; ++d)
+                {
+                  step_size_cartesian[q] += factor;
+                  factor *= n_q_points_1d[q];
+                }
+            }
+
+          // if there are no cells, there is nothing to do
+          if (cells.size() == 0)
+            continue;
+
+          Tensor<3,dim,VectorizedArray<Number> > jac_grad, grad_jac_inv;
+          Tensor<1,dim,VectorizedArray<Number> > tmp;
+
+          // encodes the cell types of the current cell. Since several cells
+          // must be considered together, this variable holds the individual
+          // info of the last chunk of cells
+          CellType cell_t [vectorization_length],
+                   cell_t_prev [vectorization_length];
+          for (unsigned int j=0; j<vectorization_length; ++j)
+            cell_t_prev[j] = undefined;
+
+          // fe_values object that is used to compute the mapping data. for
+          // the hp case there might be more than one finite element. since we
+          // manually select the active FE index and not via a
+          // hp::DoFHandler<dim>::active_cell_iterator, we need to manually
+          // select the correct finite element, so just hold a vector of
+          // FEValues
+          std::vector<std_cxx11::shared_ptr<dealii::FEValues<dim> > >
+          fe_values (current_data.quadrature.size());
+          UpdateFlags update_flags_feval =
+            (update_flags & update_inverse_jacobians ? update_jacobians : update_default) |
+            (update_flags & update_jacobian_grads ? update_jacobian_grads : update_default) |
+            (update_flags & update_quadrature_points ? update_quadrature_points : update_default);
+
+          // resize the fields that have fixed size or for which we know
+          // something from an earlier loop
+          current_data.rowstart_q_points.resize (n_macro_cells+1);
+          if (my_q > 0)
+            {
+              const unsigned int n_cells_var =
+                mapping_data_gen[0].rowstart_jacobians.size()-1;
+              current_data.rowstart_jacobians.reserve (n_cells_var+1);
+              const unsigned int reserve_size = n_cells_var *
+                                                current_data.n_q_points[0];
+              if (mapping_data_gen[0].jacobians.size() > 0)
+                current_data.jacobians.reserve (reserve_size);
+              if (mapping_data_gen[0].JxW_values.size() > 0)
+                current_data.jacobians.reserve (reserve_size);
+              if (mapping_data_gen[0].jacobians_grad_diag.size() > 0)
+                current_data.jacobians_grad_diag.reserve (reserve_size);
+              if (mapping_data_gen[0].jacobians_grad_upper.size() > 0)
+                current_data.jacobians_grad_upper.reserve (reserve_size);
+            }
+
+          // we would like to put a Tensor<1,dim,VectorizedArray<Number> > as
+          // key into the std::map, but std::map allocation does not align the
+          // allocated memory correctly, so put it into a tensor of the
+          // correct length instead
+          FPArrayComparator<Number> comparator(jacobian_size);
+          typedef Tensor<1,VectorizedArray<Number>::n_array_elements,Number> VEC_ARRAY;
+          std::map<Tensor<1,dim,VEC_ARRAY>, unsigned int,
+              FPArrayComparator<Number> > cartesians(comparator);
+          std::map<Tensor<2,dim,VEC_ARRAY>, unsigned int,
+              FPArrayComparator<Number> > affines(comparator);
+
+          // loop over all cells
+          for (unsigned int cell=0; cell<n_macro_cells; ++cell)
+            {
+              // GENERAL OUTLINE: First generate the data in format "number"
+              // for vectorization_length cells, and then find the most
+              // general type of cell for appropriate vectorized formats. then
+              // fill this data in
+              const unsigned int fe_index = active_fe_index.size() > 0 ?
+                                            active_fe_index[cell] : 0;
+              const unsigned int n_q_points = current_data.n_q_points[fe_index];
+              if (fe_values[fe_index].get() == 0)
+                fe_values[fe_index].reset
+                (new dealii::FEValues<dim> (mapping, dummy_fe,
+                                            current_data.quadrature[fe_index],
+                                            update_flags_feval));
+              dealii::FEValues<dim> &fe_val = *fe_values[fe_index];
+              data.resize (n_q_points);
+
+              // if the fe index has changed from the previous cell, set the
+              // old cell type to invalid (otherwise, we might detect
+              // similarity due to some cells further ahead)
+              if (cell > 0 && active_fe_index.size() > 0 &&
+                  active_fe_index[cell] != active_fe_index[cell-1])
+                cell_t_prev[vectorization_length-1] = undefined;
+              evaluate_on_cell (tria, &cells[cell*vectorization_length],
+                                cell, my_q, cell_t_prev, cell_t, fe_val, data);
+
+              // now reorder the data into vectorized types. if we are here
+              // for the first time, we need to find out whether the Jacobian
+              // allows for some simplification (Cartesian, affine) taking
+              // vectorization_length cell together and we have to insert that
+              // data into the respective fields. Also, we have to compress
+              // different cell indicators into one structure.
+
+              if (my_q == 0)
+                {
+                  // find the most general cell type (most general type is 2
+                  // (general cell))
+                  CellType most_general_type = cartesian;
+                  for (unsigned int j=0; j<vectorization_length; ++j)
+                    if (cell_t[j] > most_general_type)
+                      most_general_type = cell_t[j];
+                  AssertIndexRange (most_general_type, 3);
+                  unsigned int insert_position = numbers::invalid_unsigned_int;
+
+                  // Cartesian cell with diagonal Jacobian: only insert the
+                  // diagonal of the inverse and the Jacobian determinant. We
+                  // do this by using an std::map that collects pointers to
+                  // all Cartesian Jacobians. We need a pointer in the
+                  // std::map because it cannot store data based on
+                  // VectorizedArray (alignment issue). We circumvent the
+                  // problem by temporarily filling the next value into the
+                  // cartesian_data field and, in case we did an insertion,
+                  // the data is already in the correct place.
+                  if (most_general_type == cartesian)
+                    {
+                      std::pair<Tensor<1,dim,VEC_ARRAY>,unsigned int> new_entry;
+                      new_entry.second = cartesians.size();
+                      for (unsigned int d=0; d<dim; ++d)
+                        for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+                          new_entry.first[d][v] = data.const_jac[d][d][v];
+
+                      std::pair<typename std::map<Tensor<1,dim,VEC_ARRAY>,
+                          unsigned int, FPArrayComparator<Number> >::iterator,
+                          bool> it = cartesians.insert(new_entry);
+                      if (it.second == false)
+                        insert_position = it.first->second;
+                      else
+                        insert_position = new_entry.second;
+                    }
+
+                  // Constant Jacobian case. same strategy as before, but with
+                  // other data fields
+                  else if (most_general_type == affine)
+                    {
+                      std::pair<Tensor<2,dim,VEC_ARRAY>,unsigned int> new_entry;
+                      new_entry.second = affines.size();
+                      for (unsigned int d=0; d<dim; ++d)
+                        for (unsigned int e=0; e<dim; ++e)
+                          for (unsigned int v=0; v<VectorizedArray<Number>::n_array_elements; ++v)
+                            new_entry.first[d][e][v] = data.const_jac[d][e][v];
+
+                      std::pair<typename std::map<Tensor<2,dim,VEC_ARRAY>,
+                          unsigned int, FPArrayComparator<Number> >::iterator,
+                          bool> it = affines.insert(new_entry);
+                      if (it.second == false)
+                        insert_position = it.first->second;
+                      else
+                        insert_position = new_entry.second;
+                    }
+
+                  // general cell case: first resize the data field to fit the
+                  // new data. if we are here the first time, assume that
+                  // there are many general cells to come, so reserve some
+                  // memory in order to not have too many reallocations and
+                  // memcpy's. The scheme used here involves at most one
+                  // reallocation.
+                  else
+                    {
+                      Assert (most_general_type == general, ExcInternalError());
+                      insert_position = current_data.rowstart_jacobians.size();
+                      if (current_data.rowstart_jacobians.size() == 0)
+                        {
+                          unsigned int reserve_size = (n_macro_cells-cell+1)/2;
+                          current_data.rowstart_jacobians.reserve
+                          (reserve_size);
+                          reserve_size *= n_q_points;
+                          current_data.jacobians.reserve (reserve_size);
+                          if (update_flags & update_JxW_values)
+                            current_data.JxW_values.reserve (reserve_size);
+                          if (update_flags & update_jacobian_grads)
+                            current_data.jacobians_grad_diag.reserve (reserve_size);
+                          if (update_flags & update_jacobian_grads)
+                            current_data.jacobians_grad_upper.reserve (reserve_size);
+                        }
+                    }
+
+                  cell_type[cell] = ((insert_position << n_cell_type_bits) +
+                                     (unsigned int)most_general_type);
+
+                } // end if (my_q == 0)
+
+              // general cell case: now go through all quadrature points and
+              // collect the data. done for all different quadrature formulas,
+              // so do it outside the above loop.
+              if (get_cell_type(cell) == general)
+                {
+                  const unsigned int previous_size =
+                    current_data.jacobians.size();
+                  current_data.rowstart_jacobians.push_back (previous_size);
+                  if (update_flags & update_JxW_values)
+                    {
+                      AssertDimension (previous_size,
+                                       current_data.JxW_values.size());
+                    }
+                  if (update_flags & update_jacobian_grads)
+                    {
+                      AssertDimension (previous_size,
+                                       current_data.jacobians_grad_diag.size());
+                      AssertDimension (previous_size,
+                                       current_data.jacobians_grad_upper.size());
+                    }
+                  for (unsigned int q=0; q<n_q_points; ++q)
+                    {
+                      Tensor<2,dim,VectorizedArray<Number> > &jac = data.general_jac[q];
+                      Tensor<3,dim,VectorizedArray<Number> > &jacobian_grad = data.general_jac_grad[q];
+                      for (unsigned int j=0; j<vectorization_length; ++j)
+                        if (cell_t[j] == cartesian || cell_t[j] == affine)
+                          {
+                            for (unsigned int d=0; d<dim; ++d)
+                              for (unsigned int e=0; e<dim; ++e)
+                                {
+                                  jac[d][e][j] = data.const_jac[d][e][j];
+                                  for (unsigned int f=0; f<dim; ++f)
+                                    jacobian_grad[d][e][f][j] = 0.;
+                                }
+                          }
+
+                      const VectorizedArray<Number> det = determinant (jac);
+                      current_data.jacobians.push_back (transpose(invert(jac)));
+                      const Tensor<2,dim,VectorizedArray<Number> > &inv_jac = current_data.jacobians.back();
+
+                      if (update_flags & update_JxW_values)
+                        current_data.JxW_values.push_back
+                        (det * current_data.quadrature_weights[fe_index][q]);
+
+                      if (update_flags & update_jacobian_grads)
+                        {
+                          // for second derivatives on the real cell, need
+                          // also the gradient of the inverse Jacobian J. This
+                          // involves some calculus and is done
+                          // vectorized. This is very cheap compared to what
+                          // fe_values does (in early 2011). If L is the
+                          // gradient of the jacobian on the unit cell, the
+                          // gradient of the inverse is given by
+                          // (multidimensional calculus) - J * (J * L) * J
+                          // (the third J is because we need to transform the
+                          // gradient L from the unit to the real cell, and
+                          // then apply the inverse Jacobian). Compare this
+                          // with 1D with j(x) = 1/k(phi(x)), where j = phi'
+                          // is the inverse of the jacobian and k is the
+                          // derivative of the jacobian on the unit cell. Then
+                          // j' = phi' k'/k^2 = j k' j^2.
+
+                          // compute: jac_grad = J*grad_unit(J^-1)
+                          for (unsigned int d=0; d<dim; ++d)
+                            for (unsigned int e=0; e<dim; ++e)
+                              for (unsigned int f=0; f<dim; ++f)
+                                {
+                                  jac_grad[f][e][d] = (inv_jac[f][0] *
+                                                       jacobian_grad[d][e][0]);
+                                  for (unsigned int g=1; g<dim; ++g)
+                                    jac_grad[f][e][d] += (inv_jac[f][g] *
+                                                          jacobian_grad[d][e][g]);
+                                }
+
+                          // compute: transpose (-jac * jac_grad[d] * jac)
+                          for (unsigned int d=0; d<dim; ++d)
+                            for (unsigned int e=0; e<dim; ++e)
+                              {
+                                for (unsigned int f=0; f<dim; ++f)
+                                  {
+                                    tmp[f] = VectorizedArray<Number>();
+                                    for (unsigned int g=0; g<dim; ++g)
+                                      tmp[f] -= jac_grad[d][f][g] * inv_jac[g][e];
+                                  }
+
+                                // needed for non-diagonal part of Jacobian
+                                // grad
+                                for (unsigned int f=0; f<dim; ++f)
+                                  {
+                                    grad_jac_inv[f][d][e] = inv_jac[f][0] * tmp[0];
+                                    for (unsigned int g=1; g<dim; ++g)
+                                      grad_jac_inv[f][d][e] += inv_jac[f][g] * tmp[g];
+                                  }
+                              }
+
+                          {
+                            VectorizedArray<Number> grad_diag[dim][dim];
+                            for (unsigned int d=0; d<dim; ++d)
+                              for (unsigned int e=0; e<dim; ++e)
+                                grad_diag[d][e] = grad_jac_inv[d][d][e];
+                            current_data.jacobians_grad_diag.push_back
+                            (Tensor<2,dim,VectorizedArray<Number> >(grad_diag));
+                          }
+
+                          // sets upper-diagonal part of Jacobian
+                          Tensor<1,(dim>1?dim*(dim-1)/2:1),Tensor<1,dim,VectorizedArray<Number> > > grad_upper;
+                          for (unsigned int d=0, count=0; d<dim; ++d)
+                            for (unsigned int e=d+1; e<dim; ++e, ++count)
+                              for (unsigned int f=0; f<dim; ++f)
+                                grad_upper[count][f] = grad_jac_inv[d][e][f];
+                          current_data.jacobians_grad_upper.push_back(grad_upper);
+                        }
+                    }
+                }
+
+              if (update_flags & update_quadrature_points)
+                {
+                  // eventually we turn to the quadrature points that we can
+                  // compress in case we have Cartesian cells. we also need to
+                  // reorder them into arrays of vectorized data types.  first
+                  // go through the cells and find out how much memory we need
+                  // to allocate for the quadrature points. We store
+                  // n_q_points for all cells but Cartesian cells. For
+                  // Cartesian cells, only need to store the values on a
+                  // diagonal through the cell (n_q_points_1d). This will give
+                  // (with some little indexing) the location of all
+                  // quadrature points.
+                  const unsigned int old_size =
+                    current_data.quadrature_points.size();
+                  current_data.rowstart_q_points[cell] = old_size;
+
+                  Tensor<1,dim,VectorizedArray<Number> > quad_point;
+
+                  if (get_cell_type(cell) == cartesian)
+                    {
+                      current_data.quadrature_points.resize (old_size+
+                                                             n_q_points_1d[fe_index]);
+                      for (unsigned int q=0; q<n_q_points_1d[fe_index]; ++q)
+                        for (unsigned int d=0; d<dim; ++d)
+                          current_data.quadrature_points[old_size+q][d] =
+                            data.quadrature_points[q*step_size_cartesian[fe_index]][d];
+                    }
+                  else
+                    {
+                      current_data.quadrature_points.resize (old_size + n_q_points);
+                      for (unsigned int q=0; q<n_q_points; ++q)
+                        for (unsigned int d=0; d<dim; ++d)
+                          current_data.quadrature_points[old_size+q][d] =
+                            data.quadrature_points[q][d];
+                    }
+                }
+            } // end for ( cell < n_macro_cells )
+          current_data.rowstart_jacobians.push_back
+          (current_data.jacobians.size());
+          current_data.rowstart_q_points[n_macro_cells] =
+            current_data.quadrature_points.size();
+
+          // finally, fill the accumulated data for Cartesian and affine cells
+          //  into cartesian_data and affine_data, invert and transpose the
+          //  Jacobians, and compute the JxW value.
+          if (my_q == 0)
+            {
+              cartesian_data.resize(cartesians.size());
+              for (typename std::map<Tensor<1,dim,VEC_ARRAY>,
+                   unsigned int, FPArrayComparator<Number> >::iterator
+                   it = cartesians.begin(); it != cartesians.end(); ++it)
+                {
+                  VectorizedArray<Number> det = make_vectorized_array<Number>(1.);
+                  for (unsigned int d=0; d<dim; ++d)
+                    {
+                      VectorizedArray<Number> jac_d;
+                      for (unsigned int v=0;
+                           v<VectorizedArray<Number>::n_array_elements; ++v)
+                        jac_d[v] = it->first[d][v];
+                      cartesian_data[it->second].first[d] = 1./jac_d;
+                      det *= jac_d;
+                    }
+                  cartesian_data[it->second].second = det;
+                }
+              affine_data.resize(affines.size());
+              for (typename std::map<Tensor<2,dim,VEC_ARRAY>,
+                   unsigned int, FPArrayComparator<Number> >::iterator
+                   it = affines.begin(); it != affines.end(); ++it)
+                {
+                  Tensor<2,dim,VectorizedArray<Number> > jac;
+                  for (unsigned int d=0; d<dim; ++d)
+                    for (unsigned int e=0; e<dim; ++e)
+                      for (unsigned int v=0;
+                           v<VectorizedArray<Number>::n_array_elements; ++v)
+                        jac[d][e][v] = it->first[d][e][v];
+
+                  affine_data[it->second].second = determinant(jac);
+                  affine_data[it->second].first = transpose(invert(jac));
+                }
+            }
+        }
+    }
+
+
+
+    template<int dim, typename Number>
+    void
+    MappingInfo<dim,Number>::evaluate_on_cell (const dealii::Triangulation<dim> &tria,
+                                               const std::pair<unsigned int,unsigned int> *cells,
+                                               const unsigned int  cell,
+                                               const unsigned int  my_q,
+                                               CellType (&cell_t_prev)[n_vector_elements],
+                                               CellType (&cell_t)[n_vector_elements],
+                                               dealii::FEValues<dim,dim> &fe_val,
+                                               CellData          &data) const
+    {
+      const unsigned int n_q_points = fe_val.n_quadrature_points;
+      const UpdateFlags update_flags = fe_val.get_update_flags();
+
+      // this should be the same value as used in HashValue::scaling (but we
+      // not have that field here)
+      const double zero_tolerance_double = data.jac_size *
+                                           std::numeric_limits<double>::epsilon() * 1024.;
+      for (unsigned int j=0; j<n_vector_elements; ++j)
+        {
+          typename dealii::Triangulation<dim>::cell_iterator
+          cell_it (&tria, cells[j].first, cells[j].second);
+          fe_val.reinit(cell_it);
+          cell_t[j] = undefined;
+
+          // extract quadrature points and store them temporarily. if we have
+          // Cartesian cells, we can compress the indices
+          if (update_flags & update_quadrature_points)
+            for (unsigned int q=0; q<n_q_points; ++q)
+              {
+                const Point<dim> &point = fe_val.quadrature_point(q);
+                for (unsigned int d=0; d<dim; ++d)
+                  data.quadrature_points[q][d][j] = point[d];
+              }
+
+          // if this is not the first quadrature formula and we already have
+          // determined that this cell is either Cartesian or with constant
+          // Jacobian, we have nothing more to do.
+          if (my_q > 0 && (get_cell_type(cell) == cartesian
+                           || get_cell_type(cell) == affine) )
+            continue;
+
+          // first round: if the transformation is detected to be the same as
+          // on the old cell, we only need to copy over the data.
+          if (fe_val.get_cell_similarity() == CellSimilarity::translation
+              &&
+              my_q == 0)
+            {
+              if (j==0)
+                {
+                  Assert (cell>0, ExcInternalError());
+                  cell_t[j] = cell_t_prev[n_vector_elements-1];
+                }
+              else
+                cell_t[j] = cell_t[j-1];
+            }
+
+          const DerivativeForm<1,dim,dim> &jac_0 = fe_val.jacobian(0);
+
+          if (my_q == 0)
+            {
+              // check whether the Jacobian is constant on this cell the first
+              // time we come around here
+              if (cell_t[j] == undefined)
+                {
+                  bool jacobian_constant = true;
+                  for (unsigned int q=1; q<n_q_points; ++q)
+                    {
+                      const DerivativeForm<1,dim,dim> &jac = fe_val.jacobian(q);
+                      for (unsigned int d=0; d<dim; ++d)
+                        for (unsigned int e=0; e<dim; ++e)
+                          if (std::fabs(jac_0[d][e]-jac[d][e]) >
+                              zero_tolerance_double)
+                            jacobian_constant = false;
+                      if (jacobian_constant == false)
+                        break;
+                    }
+
+                  // check whether the Jacobian is diagonal to machine
+                  // accuracy
+                  bool cell_cartesian = jacobian_constant;
+                  for (unsigned int d=0; d<dim; ++d)
+                    for (unsigned int e=0; e<dim; ++e)
+                      if (d!=e)
+                        if (std::fabs(jac_0[d][e]) >
+                            zero_tolerance_double)
+                          {
+                            cell_cartesian=false;
+                            break;
+                          }
+
+                  // in case we have only one quadrature point, we can have
+                  // non-constant Jacobians, but we cannot detect it by
+                  // comparison from one quadrature point to the next: in that
+                  // case, need to look at second derivatives and see whether
+                  // there are some non-zero entries (this is necessary since
+                  // we determine the constness of the Jacobian for the first
+                  // quadrature formula and might not look at them any more
+                  // for the second, third quadrature formula). in any case,
+                  // the flag update_jacobian_grads will be set in that case
+                  if (cell_cartesian == false && n_q_points == 1 &&
+                      update_flags & update_jacobian_grads)
+                    {
+                      const DerivativeForm<1,dim,dim> &jac = fe_val.jacobian(0);
+                      const DerivativeForm<2,dim,dim> &jacobian_grad =
+                        fe_val.jacobian_grad(0);
+                      for (unsigned int d=0; d<dim; ++d)
+                        for (unsigned int e=0; e<dim; ++e)
+                          for (unsigned int f=0; f<dim; ++f)
+                            {
+                              double jac_grad_comp = (jac[f][0] *
+                                                      jacobian_grad[d][e][0]);
+                              for (unsigned int g=1; g<dim; ++g)
+                                jac_grad_comp += (jac[f][g] *
+                                                  jacobian_grad[d][e][g]);
+                              if (std::fabs(jac_grad_comp) >
+                                  zero_tolerance_double)
+                                jacobian_constant = false;
+                            }
+                    }
+                  // set cell type
+                  if (cell_cartesian == true)
+                    cell_t[j] = cartesian;
+                  else if (jacobian_constant == true)
+                    cell_t[j] = affine;
+                  else
+                    cell_t[j] = general;
+                }
+
+              // Cartesian cell
+              if (cell_t[j] == cartesian)
+                {
+                  // set Jacobian into diagonal and clear off-diagonal part
+                  for (unsigned int d=0; d<dim; ++d)
+                    {
+                      data.const_jac[d][d][j] = jac_0[d][d];
+                      for (unsigned int e=d+1; e<dim; ++e)
+                        {
+                          data.const_jac[d][e][j] = 0.;
+                          data.const_jac[e][d][j] = 0.;
+                        }
+                    }
+                  continue;
+                }
+
+              // cell with affine mapping
+              else if (cell_t[j] == affine)
+                {
+                  // compress out very small values
+                  for (unsigned int d=0; d<dim; ++d)
+                    for (unsigned int e=0; e<dim; ++e)
+                      data.const_jac[d][e][j] =
+                        std::fabs(jac_0[d][e]) < zero_tolerance_double ?
+                        0 : jac_0[d][e];
+                  continue;
+                }
+            }
+
+          // general cell case
+
+          // go through all quadrature points and fill in the data into the
+          // temporary data structures with slots for the vectorized data
+          // types
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+
+              // compress out very small numbers which are only noise. Then it
+              // is cleaner to use zero straight away (though it does not save
+              // any memory)
+              const DerivativeForm<1,dim,dim> &jac = fe_val.jacobian(q);
+              for (unsigned int d=0; d<dim; ++d)
+                for (unsigned int e=0; e<dim; ++e)
+                  data.general_jac[q][d][e][j] =
+                    std::fabs(jac[d][e]) < zero_tolerance_double ? 0. : jac[d][e];
+
+              // need to do some calculus based on the gradient of the
+              // Jacobian, in order to find the gradient of the inverse
+              // Jacobian which is needed in user code. however, we would like
+              // to perform that on vectorized data types instead of doubles
+              // or floats. to this end, copy the gradients first
+              if (update_flags & update_jacobian_grads)
+                {
+                  const DerivativeForm<2,dim,dim> &jacobian_grad = fe_val.jacobian_grad(q);
+                  for (unsigned int d=0; d<dim; ++d)
+                    for (unsigned int e=0; e<dim; ++e)
+                      for (unsigned int f=0; f<dim; ++f)
+                        data.general_jac_grad[q][d][e][f][j] = jacobian_grad[d][e][f];
+                }
+            }
+        } // end loop over all entries in vectorization (n_vector_elements
+      // cells)
+
+      // set information for next cell
+      for (unsigned int j=0; j<n_vector_elements; ++j)
+        cell_t_prev[j] = cell_t[j];
+    }
+
+
+    template <int dim, typename Number>
+    MappingInfo<dim,Number>::CellData::CellData (const double jac_size_in)
+      :
+      jac_size (jac_size_in)
+    {}
+
+
+
+    template <int dim, typename Number>
+    void
+    MappingInfo<dim,Number>::CellData::resize (const unsigned int size)
+    {
+      if (general_jac.size() != size)
+        {
+          quadrature_points.resize(size);
+          general_jac.resize(size);
+          general_jac_grad.resize(size);
+        }
+    }
+
+
+
+    template <int dim, typename Number>
+    std::size_t MappingInfo<dim,Number>::MappingInfoDependent::memory_consumption() const
+    {
+      std::size_t
+      memory = MemoryConsumption::memory_consumption (jacobians);
+      memory += MemoryConsumption::memory_consumption (JxW_values);
+      memory += MemoryConsumption::memory_consumption (jacobians_grad_diag);
+      memory += MemoryConsumption::memory_consumption (jacobians_grad_upper);
+      memory += MemoryConsumption::memory_consumption (rowstart_q_points);
+      memory += MemoryConsumption::memory_consumption (quadrature_points);
+      memory += MemoryConsumption::memory_consumption (quadrature);
+      memory += MemoryConsumption::memory_consumption (face_quadrature);
+      memory += MemoryConsumption::memory_consumption (quadrature_weights);
+      memory += MemoryConsumption::memory_consumption (n_q_points);
+      memory += MemoryConsumption::memory_consumption (n_q_points_face);
+      memory += MemoryConsumption::memory_consumption (quad_index_conversion);
+      return memory;
+    }
+
+
+
+    template <int dim, typename Number>
+    std::size_t MappingInfo<dim,Number>::memory_consumption() const
+    {
+      std::size_t
+      memory= MemoryConsumption::memory_consumption (mapping_data_gen);
+      memory += MemoryConsumption::memory_consumption (affine_data);
+      memory += MemoryConsumption::memory_consumption (cartesian_data);
+      memory += MemoryConsumption::memory_consumption (cell_type);
+      memory += sizeof (*this);
+      return memory;
+    }
+
+
+
+    template <int dim, typename Number>
+    template <typename StreamType>
+    void MappingInfo<dim,Number>::MappingInfoDependent::print_memory_consumption
+    (StreamType     &out,
+     const SizeInfo &size_info) const
+    {
+      // print_memory_statistics involves global communication, so we can
+      // disable the check here only if no processor has any such data
+#ifdef DEAL_II_WITH_MPI
+      unsigned int general_size_glob = 0, general_size_loc = jacobians.size();
+      MPI_Allreduce (&general_size_loc, &general_size_glob, 1, MPI_UNSIGNED,
+                     MPI_MAX, size_info.communicator);
+#else
+      unsigned int general_size_glob = jacobians.size();
+#endif
+      if (general_size_glob > 0)
+        {
+          out << "      Memory Jacobian data:          ";
+          size_info.print_memory_statistics
+          (out, MemoryConsumption::memory_consumption (jacobians) +
+           MemoryConsumption::memory_consumption (JxW_values));
+          out << "      Memory second derivative data: ";
+          size_info.print_memory_statistics
+          (out,MemoryConsumption::memory_consumption (jacobians_grad_diag) +
+           MemoryConsumption::memory_consumption (jacobians_grad_upper));
+        }
+
+#ifdef DEAL_II_WITH_MPI
+      unsigned int quad_size_glob = 0, quad_size_loc = quadrature_points.size();
+      MPI_Allreduce (&quad_size_loc, &quad_size_glob, 1, MPI_UNSIGNED,
+                     MPI_MAX, size_info.communicator);
+#else
+      unsigned int quad_size_glob = quadrature_points.size();
+#endif
+      if (quad_size_glob > 0)
+        {
+          out << "      Memory quadrature points:      ";
+          size_info.print_memory_statistics
+          (out, MemoryConsumption::memory_consumption (rowstart_q_points) +
+           MemoryConsumption::memory_consumption (quadrature_points));
+        }
+    }
+
+
+
+    template <int dim, typename Number>
+    template <typename StreamType>
+    void MappingInfo<dim,Number>::print_memory_consumption(StreamType     &out,
+                                                           const SizeInfo &size_info) const
+    {
+      out << "    Cell types:                      ";
+      size_info.print_memory_statistics
+      (out, MemoryConsumption::memory_consumption (cell_type));
+      out << "    Memory transformations compr:    ";
+      size_info.print_memory_statistics
+      (out, MemoryConsumption::memory_consumption (affine_data) +
+       MemoryConsumption::memory_consumption (cartesian_data));
+      for (unsigned int j=0; j<mapping_data_gen.size(); ++j)
+        {
+          out << "    Data component " << j << std::endl;
+          mapping_data_gen[j].print_memory_consumption(out, size_info);
+        }
+    }
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/matrix_free/matrix_free.h b/include/deal.II/matrix_free/matrix_free.h
new file mode 100644
index 0000000..eec524e
--- /dev/null
+++ b/include/deal.II/matrix_free/matrix_free.h
@@ -0,0 +1,2551 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_h
+#define dealii__matrix_free_h
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/parallel.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/vectorization.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/block_vector_base.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/matrix_free/helper_functions.h>
+#include <deal.II/matrix_free/shape_info.h>
+#include <deal.II/matrix_free/dof_info.h>
+#include <deal.II/matrix_free/mapping_info.h>
+
+#ifdef DEAL_II_WITH_THREADS
+#include <tbb/task.h>
+#include <tbb/task_scheduler_init.h>
+#include <tbb/parallel_for.h>
+#include <tbb/blocked_range.h>
+#endif
+
+#include <stdlib.h>
+#include <memory>
+#include <limits>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/**
+ * This class collects all the data that is stored for the matrix free
+ * implementation. The storage scheme is tailored towards several loops
+ * performed with the same data, i.e., typically doing many matrix-vector
+ * products or residual computations on the same mesh. The class is used in
+ * step-37 and step-48.
+ *
+ * This class does not implement any operations involving finite element basis
+ * functions, i.e., regarding the operation performed on the cells. For these
+ * operations, the class FEEvaluation is designed to use the data collected in
+ * this class.
+ *
+ * The stored data can be subdivided into three main components:
+ *
+ * - DoFInfo: It stores how local degrees of freedom relate to global degrees
+ * of freedom. It includes a description of constraints that are evaluated as
+ * going through all local degrees of freedom on a cell.
+ *
+ * - MappingInfo: It stores the transformations from real to unit cells that
+ * are necessary in order to build derivatives of finite element functions and
+ * find location of quadrature weights in physical space.
+ *
+ * - ShapeInfo: It contains the shape functions of the finite element,
+ * evaluated on the unit cell.
+ *
+ * Besides the initialization routines, this class implements only a single
+ * operation, namely a loop over all cells (cell_loop()). This loop is
+ * scheduled in such a way that cells that share degrees of freedom are not
+ * worked on simultaneously, which implies that it is possible to write to
+ * vectors (or matrices) in parallel without having to explicitly synchronize
+ * access to these vectors and matrices. This class does not implement any
+ * shape values, all it does is to cache the respective data. To implement
+ * finite element operations, use the class FEEvaluation (or some of the
+ * related classes).
+ *
+ * This class traverses the cells in a different order than the usual
+ * Triangulation class in deal.II, in order to be flexible with respect to
+ * parallelization in shared memory and vectorization.
+ *
+ * Vectorization is implemented by merging several topological cells into one
+ * so-called macro cell. This enables the application of all cell-related
+ * operations for several cells with one CPU instruction and is one of the
+ * main features of this framework.
+ *
+ * @author Katharina Kormann, Martin Kronbichler, 2010, 2011
+ */
+
+template <int dim, typename Number=double>
+class MatrixFree
+{
+public:
+
+  /**
+   * Collects the options for initialization of the MatrixFree class. The
+   * first parameter specifies the MPI communicator to be used, the second the
+   * parallelization options in shared memory (task-based parallelism, where
+   * one can choose between no parallelism and three schemes that avoid that
+   * cells with access to the same vector entries are accessed
+   * simultaneously), the third with the block size for task parallel
+   * scheduling, the fourth the update flags that should be stored by this
+   * class.
+   *
+   * The fifth parameter specifies the level in the triangulation from which
+   * the indices are to be used. If the level is set to
+   * numbers::invalid_unsigned_int, the active cells are traversed, and
+   * otherwise the cells in the given level. This option has no effect in case
+   * a DoFHandler or hp::DoFHandler is given.
+   *
+   * The parameter @p initialize_plain_indices indicates whether the DoFInfo
+   * class should also allow for access to vectors without resolving
+   * constraints.
+   *
+   * The last two parameters allow the user to disable some of the
+   * initialization processes. For example, if only the scheduling that avoids
+   * touching the same vector/matrix indices simultaneously is to be found,
+   * the mapping needs not be initialized. Likewise, if the mapping has
+   * changed from one iteration to the next but the topology has not (like
+   * when using a deforming mesh with MappingQEulerian), it suffices to
+   * initialize the mapping only.
+   */
+  struct AdditionalData
+  {
+    /**
+     * Collects options for task parallelism.
+     */
+    enum TasksParallelScheme {none, partition_partition, partition_color, color};
+
+    /**
+     * Constructor for AdditionalData.
+     */
+    AdditionalData (const MPI_Comm            mpi_communicator   = MPI_COMM_SELF,
+                    const TasksParallelScheme tasks_parallel_scheme = partition_partition,
+                    const unsigned int        tasks_block_size   = 0,
+                    const UpdateFlags         mapping_update_flags  = update_gradients | update_JxW_values,
+                    const unsigned int level_mg_handler = numbers::invalid_unsigned_int,
+                    const bool                store_plain_indices = true,
+                    const bool                initialize_indices = true,
+                    const bool                initialize_mapping = true)
+      :
+      mpi_communicator      (mpi_communicator),
+      tasks_parallel_scheme (tasks_parallel_scheme),
+      tasks_block_size      (tasks_block_size),
+      mapping_update_flags  (mapping_update_flags),
+      level_mg_handler      (level_mg_handler),
+      store_plain_indices   (store_plain_indices),
+      initialize_indices    (initialize_indices),
+      initialize_mapping    (initialize_mapping)
+    {};
+
+    /**
+     * Sets the MPI communicator that the parallel layout of the operator
+     * should be based upon. Defaults to MPI_COMM_SELF, but should be set to a
+     * communicator similar to the one used for a distributed triangulation in
+     * order to inform this class over all cells that are present.
+     */
+    MPI_Comm            mpi_communicator;
+
+    /**
+     * Sets the scheme for task parallelism. There are four options available.
+     * If set to @p none, the operator application is done in serial without
+     * shared memory parallelism. If this class is used together with MPI and
+     * MPI is also used for parallelism within the nodes, this flag should be
+     * set to @p none. The default value is @p partition_partition, i.e. we
+     * actually use multithreading with the first option described below.
+     *
+     * The first option @p partition_partition is to partition the cells on
+     * two levels in onion-skin-like partitions and forming chunks of
+     * tasks_block_size after the partitioning. The partitioning finds sets of
+     * independent cells that enable working in parallel without accessing the
+     * same vector entries at the same time.
+     *
+     * The second option @p partition_color is to use a partition on the
+     * global level and color cells within the partitions (where all chunks
+     * within a color are independent). Here, the subdivision into chunks of
+     * cells is done before the partitioning, which might give worse
+     * partitions but better cache performance if degrees of freedom are not
+     * renumbered.
+     *
+     * The third option @p color is to use a traditional algorithm of coloring
+     * on the global level. This scheme is a special case of the second option
+     * where only one partition is present. Note that for problems with
+     * hanging nodes, there are quite many colors (50 or more in 3D), which
+     * might degrade parallel performance (bad cache behavior, many
+     * synchronization points).
+     */
+    TasksParallelScheme tasks_parallel_scheme;
+
+    /**
+     * Sets the number of so-called macro cells that should form one
+     * partition. If zero size is given, the class tries to find a good size
+     * for the blocks based on MultithreadInfo::n_threads() and the number of
+     * cells present. Otherwise, the given number is used. If the given number
+     * is larger than one third of the number of total cells, this means no
+     * parallelism. Note that in the case vectorization is used, a macro cell
+     * consists of more than one physical cell.
+     */
+    unsigned int        tasks_block_size;
+
+    /**
+     * This flag is used to determine which quantities should be cached. This
+     * class can cache data needed for gradient computations (inverse
+     * Jacobians), Jacobian determinants (JxW), quadrature points as well as
+     * data for Hessians (derivative of Jacobians). By default, only data for
+     * gradients and Jacobian determinants times quadrature weights, JxW, are
+     * cached. If quadrature points or second derivatives are needed, they
+     * must be specified by this field (even though second derivatives might
+     * still be evaluated on Cartesian cells without this option set here,
+     * since there the Jacobian describes the mapping completely).
+     */
+    UpdateFlags         mapping_update_flags;
+
+    /**
+     * This option can be used to define whether we work on a certain level of
+     * the mesh, and not the active cells. If set to invalid_unsigned_int
+     * (which is the default value), the active cells are gone through,
+     * otherwise the level given by this parameter. Note that if you specify
+     * to work on a level, its dofs must be distributed by using
+     * <code>dof_handler.distribute_mg_dofs(fe);</code>.
+     */
+    unsigned int        level_mg_handler;
+
+    /**
+     * Controls whether to allow reading from vectors without resolving
+     * constraints, i.e., just read the local values of the vector. By
+     * default, this option is disabled, so if you want to use
+     * FEEvaluationBase::read_dof_values_plain, this flag needs to be set.
+     */
+    bool                store_plain_indices;
+
+    /**
+     * Option to control whether the indices stored in the DoFHandler should
+     * be read and the pattern for task parallelism should be set up in the
+     * initialize method of MatrixFree. Defaults to true. Can be disabled in
+     * case the mapping should be recomputed (e.g. when using a deforming mesh
+     * described through MappingEulerian) but the topology of cells has
+     * remained the same.
+     */
+    bool                initialize_indices;
+
+    /**
+     * Option to control whether the mapping information should be computed in
+     * the initialize method of MatrixFree. Defaults to true. Can be disabled
+     * when only some indices should be set up (e.g. when only a set of
+     * independent cells should be computed).
+     */
+    bool                initialize_mapping;
+  };
+
+  /**
+   * @name 1: Construction and initialization
+   */
+  //@{
+  /**
+   * Default empty constructor. Does nothing.
+   */
+  MatrixFree ();
+
+  /**
+   * Destructor.
+   */
+  ~MatrixFree();
+
+  /**
+   * Extracts the information needed to perform loops over cells. The
+   * DoFHandler and ConstraintMatrix describe the layout of degrees of
+   * freedom, the DoFHandler and the mapping describe the transformations from
+   * unit to real cell, and the finite element underlying the DoFHandler
+   * together with the quadrature formula describe the local operations. Note
+   * that the finite element underlying the DoFHandler must either be scalar
+   * or contain several copies of the same element. Mixing several different
+   * elements into one FESystem is not allowed. In that case, use the
+   * initialization function with several DoFHandler arguments.
+   *
+   * The @p IndexSet @p locally_owned_dofs is used to specify the parallel
+   * partitioning with MPI. Usually, this needs not be specified, and the
+   * other initialization function without and @p IndexSet description can be
+   * used, which gets the partitioning information builtin into the
+   * DoFHandler.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const Mapping<dim>     &mapping,
+               const DoFHandlerType   &dof_handler,
+               const ConstraintMatrix &constraint,
+               const IndexSet         &locally_owned_dofs,
+               const QuadratureType   &quad,
+               const AdditionalData    additional_data = AdditionalData());
+
+  /**
+   * Initializes the data structures. Same as above, but with index set stored
+   * in the DoFHandler for describing the locally owned degrees of freedom.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const Mapping<dim>     &mapping,
+               const DoFHandlerType   &dof_handler,
+               const ConstraintMatrix &constraint,
+               const QuadratureType   &quad,
+               const AdditionalData    additional_data = AdditionalData());
+
+  /**
+   * Initializes the data structures. Same as above, but using a $Q_1$
+   * mapping.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const DoFHandlerType   &dof_handler,
+               const ConstraintMatrix &constraint,
+               const QuadratureType   &quad,
+               const AdditionalData    additional_data = AdditionalData());
+
+  /**
+   * Extracts the information needed to perform loops over cells. The
+   * DoFHandler and ConstraintMatrix describe the layout of degrees of
+   * freedom, the DoFHandler and the mapping describe the transformations from
+   * unit to real cell, and the finite element underlying the DoFHandler
+   * together with the quadrature formula describe the local operations. As
+   * opposed to the scalar case treated with the other initialization
+   * functions, this function allows for problems with two or more different
+   * finite elements. The DoFHandlers to each element must be passed as
+   * pointers to the initialization function. Note that the finite element
+   * underlying an DoFHandler must either be scalar or contain several copies
+   * of the same element. Mixing several different elements into one @p
+   * FE_System is not allowed.
+   *
+   * This function also allows for using several quadrature formulas, e.g.
+   * when the description contains independent integrations of elements of
+   * different degrees. However, the number of different quadrature formulas
+   * can be sets independently from the number of DoFHandlers, when several
+   * elements are always integrated with the same quadrature formula.
+   *
+   * The @p IndexSet @p locally_owned_dofs is used to specify the parallel
+   * partitioning with MPI. Usually, this needs not be specified, and the
+   * other initialization function without and @p IndexSet description can be
+   * used, which gets the partitioning information from the DoFHandler. This
+   * is the most general initialization function.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const Mapping<dim>                          &mapping,
+               const std::vector<const DoFHandlerType *>   &dof_handler,
+               const std::vector<const ConstraintMatrix *> &constraint,
+               const std::vector<IndexSet>                 &locally_owned_set,
+               const std::vector<QuadratureType>           &quad,
+               const AdditionalData                        additional_data = AdditionalData());
+
+  /**
+   * Initializes the data structures. Same as before, but now the index set
+   * description of the locally owned range of degrees of freedom is taken
+   * from the DoFHandler.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const Mapping<dim>                          &mapping,
+               const std::vector<const DoFHandlerType *>   &dof_handler,
+               const std::vector<const ConstraintMatrix *> &constraint,
+               const std::vector<QuadratureType>           &quad,
+               const AdditionalData                        additional_data = AdditionalData());
+
+  /**
+   * Initializes the data structures. Same as above, but  using a $Q_1$
+   * mapping.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const std::vector<const DoFHandlerType *>   &dof_handler,
+               const std::vector<const ConstraintMatrix *> &constraint,
+               const std::vector<QuadratureType>           &quad,
+               const AdditionalData                        additional_data = AdditionalData());
+
+  /**
+   * Initializes the data structures. Same as before, but now the index set
+   * description of the locally owned range of degrees of freedom is taken
+   * from the DoFHandler. Moreover, only a single quadrature formula is used,
+   * as might be necessary when several components in a vector-valued problem
+   * are integrated together based on the same quadrature formula.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const Mapping<dim>                          &mapping,
+               const std::vector<const DoFHandlerType *>   &dof_handler,
+               const std::vector<const ConstraintMatrix *> &constraint,
+               const QuadratureType                        &quad,
+               const AdditionalData                        additional_data = AdditionalData());
+
+  /**
+   * Initializes the data structures. Same as above, but  using a $Q_1$
+   * mapping.
+   */
+  template <typename DoFHandlerType, typename QuadratureType>
+  void reinit (const std::vector<const DoFHandlerType *>   &dof_handler,
+               const std::vector<const ConstraintMatrix *> &constraint,
+               const QuadratureType                        &quad,
+               const AdditionalData                        additional_data = AdditionalData());
+
+  /**
+   * Copy function. Creates a deep copy of all data structures. It is usually
+   * enough to keep the data for different operations once, so this function
+   * should not be needed very often.
+   */
+  void copy_from (const MatrixFree<dim,Number> &matrix_free_base);
+
+  /**
+   * Clears all data fields and brings the class into a condition similar to
+   * after having called the default constructor.
+   */
+  void clear();
+
+  //@}
+
+  /**
+   * @name 2: Loop over cells
+   */
+  //@{
+  /**
+   * This method runs the loop over all cells (in parallel) and performs the
+   * MPI data exchange on the source vector and destination vector. The first
+   * argument indicates a function object that has the following signature:
+   * <code>cell_operation (const MatrixFree<dim,Number> &, OutVector &,
+   * InVector &, std::pair<unsigned int,unsigned int> &)</code>, where the
+   * first argument passes the data of the calling class and the last argument
+   * defines the range of cells which should be worked on (typically more than
+   * one cell should be worked on in order to reduce overheads).  One can pass
+   * a pointer to an object in this place if it has an <code>operator()</code>
+   * with the correct set of arguments since such a pointer can be converted
+   * to the function object.
+   */
+  template <typename OutVector, typename InVector>
+  void cell_loop (const std_cxx11::function<void (const MatrixFree<dim,Number> &,
+                                                  OutVector &,
+                                                  const InVector &,
+                                                  const std::pair<unsigned int,
+                                                  unsigned int> &)> &cell_operation,
+                  OutVector      &dst,
+                  const InVector &src) const;
+
+  /**
+   * This is the second variant to run the loop over all cells, now providing
+   * a function pointer to a member function of class @p CLASS with the
+   * signature <code>cell_operation (const MatrixFree<dim,Number> &, OutVector
+   * &, InVector &, std::pair<unsigned int,unsigned int>&)const</code>. This
+   * method obviates the need to call std_cxx11::bind to bind the class into
+   * the given function in case the local function needs to access data in the
+   * class (i.e., it is a non-static member function).
+   */
+  template <typename CLASS, typename OutVector, typename InVector>
+  void cell_loop (void (CLASS::*function_pointer)(const MatrixFree &,
+                                                  OutVector &,
+                                                  const InVector &,
+                                                  const std::pair<unsigned int,
+                                                  unsigned int> &)const,
+                  const CLASS    *owning_class,
+                  OutVector      &dst,
+                  const InVector &src) const;
+
+  /**
+   * Same as above, but for class member functions which are non-const.
+   */
+  template <typename CLASS, typename OutVector, typename InVector>
+  void cell_loop (void (CLASS::*function_pointer)(const MatrixFree &,
+                                                  OutVector &,
+                                                  const InVector &,
+                                                  const std::pair<unsigned int,
+                                                  unsigned int> &),
+                  CLASS          *owning_class,
+                  OutVector      &dst,
+                  const InVector &src) const;
+
+  /**
+   * In the hp adaptive case, a subrange of cells as computed during the cell
+   * loop might contain elements of different degrees. Use this function to
+   * compute what the subrange for an individual finite element degree is. The
+   * finite element degree is associated to the vector component given in the
+   * function call.
+   */
+  std::pair<unsigned int,unsigned int>
+  create_cell_subrange_hp (const std::pair<unsigned int,unsigned int> &range,
+                           const unsigned int fe_degree,
+                           const unsigned int vector_component = 0) const;
+
+  /**
+   * In the hp adaptive case, a subrange of cells as computed during the cell
+   * loop might contain elements of different degrees. Use this function to
+   * compute what the subrange for a given index the hp finite element, as
+   * opposed to the finite element degree in the other function.
+   */
+  std::pair<unsigned int,unsigned int>
+  create_cell_subrange_hp_by_index (const std::pair<unsigned int,unsigned int> &range,
+                                    const unsigned int fe_index,
+                                    const unsigned int vector_component = 0) const;
+
+  //@}
+
+  /**
+   * @name 3: Initialization of vectors
+   */
+  //@{
+  /**
+   * Initialize function for a general vector. The length of the vector is
+   * equal to the total number of degrees in the DoFHandler. If the vector is
+   * of class parallel::distributed::Vector@<Number@>, the ghost entries are
+   * set accordingly. For vector-valued problems with several DoFHandlers
+   * underlying this class, the parameter @p vector_component defines which
+   * component is to be used.
+   */
+  template <typename VectorType>
+  void initialize_dof_vector(VectorType &vec,
+                             const unsigned int vector_component=0) const;
+
+  /**
+   * Initialize function for a distributed vector. The length of the vector is
+   * equal to the total number of degrees in the DoFHandler. If the vector is
+   * of class parallel::distributed::Vector@<Number@>, the ghost entries are
+   * set accordingly. For vector-valued problems with several DoFHandlers
+   * underlying this class, the parameter @p vector_component defines which
+   * component is to be used.
+   */
+  template <typename Number2>
+  void initialize_dof_vector(parallel::distributed::Vector<Number2> &vec,
+                             const unsigned int vector_component=0) const;
+
+  /**
+   * Returns the partitioner that represents the locally owned data and the
+   * ghost indices where access is needed to for the cell loop. The
+   * partitioner is constructed from the locally owned dofs and ghost dofs
+   * given by the respective fields. If you want to have specific information
+   * about these objects, you can query them with the respective access
+   * functions. If you just want to initialize a (parallel) vector, you should
+   * usually prefer this data structure as the data exchange information can
+   * be reused from one vector to another.
+   */
+  const std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> &
+  get_vector_partitioner (const unsigned int vector_component=0) const;
+
+  /**
+   * Returns the set of cells that are oned by the processor.
+   */
+  const IndexSet &
+  get_locally_owned_set (const unsigned int fe_component = 0) const;
+
+  /**
+   * Returns the set of ghost cells needed but not owned by the processor.
+   */
+  const IndexSet &
+  get_ghost_set (const unsigned int fe_component = 0) const;
+
+  /**
+   * Returns a list of all degrees of freedom that are constrained. The list
+   * is returned in MPI-local index space for the locally owned range of the
+   * vector, not in global MPI index space that spans all MPI processors. To
+   * get numbers in global index space, call
+   * <tt>get_vector_partitioner()->local_to_global</tt> on an entry of the
+   * vector. In addition, it only returns the indices for degrees of freedom
+   * that are owned locally, not for ghosts.
+   */
+  const std::vector<unsigned int> &
+  get_constrained_dofs (const unsigned int fe_component = 0) const;
+
+  /**
+   * Calls renumber_dofs function in dof_info which renumbers the degrees of
+   * freedom according to the ordering for parallelization.
+   */
+  void renumber_dofs (std::vector<types::global_dof_index> &renumbering,
+                      const unsigned int vector_component = 0);
+
+  //@}
+
+  /**
+   * @name 4: General information
+   */
+  //@{
+  /**
+   * Returns the number of different DoFHandlers specified at initialization.
+   */
+  unsigned int n_components () const;
+
+  /**
+   * Returns the number of cells this structure is based on. If you are using
+   * a usual DoFHandler, it corresponds to the number of (locally owned)
+   * active cells. Note that most data structures in this class do not
+   * directly act on this number but rather on n_macro_cells() which gives the
+   * number of cells as seen when lumping several cells together with
+   * vectorization.
+   */
+  unsigned int n_physical_cells () const;
+
+  /**
+   * Returns the number of macro cells that this structure works on, i.e., the
+   * number of cell chunks that are worked on after the application of
+   * vectorization which in general works on several cells at once. The cell
+   * range in @p cell_loop runs from zero to n_macro_cells() (exclusive), so
+   * this is the appropriate size if you want to store arrays of data for all
+   * cells to be worked on. This number is approximately
+   * n_physical_cells()/VectorizedArray::n_array_elements (depending on how
+   * many cell chunks that do not get filled up completely).
+   */
+  unsigned int n_macro_cells () const;
+
+  /**
+   * In case this structure was built based on a DoFHandler, this returns the
+   * DoFHandler.
+   */
+  const DoFHandler<dim> &
+  get_dof_handler (const unsigned int fe_component = 0) const;
+
+  /**
+   * This returns the cell iterator in deal.II speak to a given cell in the
+   * renumbering of this structure.
+   *
+   * Note that the cell iterators in deal.II go through cells differently to
+   * what the cell loop of this class does. This is because several cells are
+   * worked on together (vectorization), and since cells with neighbors on
+   * different MPI processors need to be accessed at a certain time when
+   * accessing remote data and overlapping communication with computation.
+   */
+  typename DoFHandler<dim>::cell_iterator
+  get_cell_iterator (const unsigned int macro_cell_number,
+                     const unsigned int vector_number,
+                     const unsigned int fe_component = 0) const;
+
+  /**
+   * This returns the cell iterator in deal.II speak to a given cell in the
+   * renumbering of this structure. This function returns an exception in case
+   * the structure was not constructed based on an hp::DoFHandler.
+   *
+   * Note that the cell iterators in deal.II go through cells differently to
+   * what the cell loop of this class does. This is because several cells are
+   * worked on together (vectorization), and since cells with neighbors on
+   * different MPI processors need to be accessed at a certain time when
+   * accessing remote data and overlapping communication with computation.
+   */
+  typename hp::DoFHandler<dim>::active_cell_iterator
+  get_hp_cell_iterator (const unsigned int macro_cell_number,
+                        const unsigned int vector_number,
+                        const unsigned int fe_component = 0) const;
+
+  /**
+   * Since this class uses vectorized data types with usually more than one
+   * value in the data field, a situation might occur when some components of
+   * the vector type do not correspond to an actual cell in the mesh. When
+   * using only this class, one usually does not need to bother about that
+   * fact since the values are padded with zeros. However, when this class is
+   * mixed with deal.II access to cells, care needs to be taken. This function
+   * returns @p true if not all @p vectorization_length cells for the given @p
+   * macro_cell are real cells. To find out how many cells are actually used,
+   * use the function @p n_components_filled.
+   */
+  bool
+  at_irregular_cell (const unsigned int macro_cell_number) const;
+
+  /**
+   * Use this function to find out how many cells over the length of
+   * vectorization data types correspond to real cells in the mesh. For most
+   * given @p macro_cells, this is just @p vectorization_length many, but
+   * there might be one or a few meshes (where the numbers do not add up)
+   * where there are less such components filled, indicated by the function @p
+   * at_irregular_cell.
+   */
+  unsigned int
+  n_components_filled (const unsigned int macro_cell_number) const;
+
+  /**
+   * Returns the number of degrees of freedom per cell for a given hp index.
+   */
+  unsigned int
+  get_dofs_per_cell (const unsigned int fe_component = 0,
+                     const unsigned int hp_active_fe_index = 0) const;
+
+  /**
+   * Returns the number of quadrature points per cell for a given hp index.
+   */
+  unsigned int
+  get_n_q_points (const unsigned int quad_index = 0,
+                  const unsigned int hp_active_fe_index = 0) const;
+
+  /**
+   * Returns the number of degrees of freedom on each face of the cell for
+   * given hp index.
+   */
+  unsigned int
+  get_dofs_per_face (const unsigned int fe_component = 0,
+                     const unsigned int hp_active_fe_index = 0) const;
+
+  /**
+   * Returns the number of quadrature points on each face of the cell for
+   * given hp index.
+   */
+  unsigned int
+  get_n_q_points_face (const unsigned int quad_index = 0,
+                       const unsigned int hp_active_fe_index = 0) const;
+
+  /**
+   * Returns the quadrature rule for given hp index.
+   */
+  const Quadrature<dim> &
+  get_quadrature (const unsigned int quad_index = 0,
+                  const unsigned int hp_active_fe_index = 0) const;
+
+  /**
+   * Returns the quadrature rule for given hp index.
+   */
+  const Quadrature<dim-1> &
+  get_face_quadrature (const unsigned int quad_index = 0,
+                       const unsigned int hp_active_fe_index = 0) const;
+
+  /**
+   * Queries whether or not the indexation has been set.
+   */
+  bool indices_initialized () const;
+
+  /**
+   * Queries whether or not the geometry-related information for the cells has
+   * been set.
+   */
+
+  bool mapping_initialized () const;
+
+  /**
+   * Returns an approximation of the memory consumption of this class in
+   * bytes.
+   */
+  std::size_t memory_consumption() const;
+
+  /**
+   * Prints a detailed summary of memory consumption in the different
+   * structures of this class to the given output stream.
+   */
+  template <typename StreamType>
+  void print_memory_consumption(StreamType &out) const;
+
+  /**
+   * Prints a summary of this class to the given output stream. It is focused
+   * on the indices, and does not print all the data stored.
+   */
+  void print (std::ostream &out) const;
+
+  //@}
+
+  /**
+   * @name 5: Access of internal data structure (expert mode)
+   */
+  //@{
+  /**
+   * Returns information on task graph.
+   */
+  const internal::MatrixFreeFunctions::TaskInfo &
+  get_task_info () const;
+
+  /**
+   * Returns information on system size.
+   */
+  const internal::MatrixFreeFunctions::SizeInfo &
+  get_size_info () const;
+
+  /*
+   * Returns geometry-dependent information on the cells.
+   */
+  const internal::MatrixFreeFunctions::MappingInfo<dim,Number> &
+  get_mapping_info () const;
+
+  /**
+   * Returns information on indexation degrees of freedom.
+   */
+  const internal::MatrixFreeFunctions::DoFInfo &
+  get_dof_info (const unsigned int fe_component = 0) const;
+
+  /**
+   * Returns the number of weights in the constraint pool.
+   */
+  unsigned int n_constraint_pool_entries() const;
+
+  /**
+   * Returns a pointer to the first number in the constraint pool data with
+   * index @p pool_index (to be used together with @p constraint_pool_end()).
+   */
+  const Number *
+  constraint_pool_begin (const unsigned int pool_index) const;
+
+  /**
+   * Returns a pointer to one past the last number in the constraint pool data
+   * with index @p pool_index (to be used together with @p
+   * constraint_pool_begin()).
+   */
+  const Number *
+  constraint_pool_end (const unsigned int pool_index) const;
+
+  /**
+   * Returns the unit cell information for given hp index.
+   */
+  const internal::MatrixFreeFunctions::ShapeInfo<Number> &
+  get_shape_info (const unsigned int fe_component = 0,
+                  const unsigned int quad_index   = 0,
+                  const unsigned int hp_active_fe_index = 0,
+                  const unsigned int hp_active_quad_index = 0) const;
+
+  //@}
+
+private:
+
+  /**
+   * This is the actual reinit function that sets up the indices for the
+   * DoFHandler case.
+   */
+  void internal_reinit (const Mapping<dim>                &mapping,
+                        const std::vector<const DoFHandler<dim> *> &dof_handler,
+                        const std::vector<const ConstraintMatrix *> &constraint,
+                        const std::vector<IndexSet>       &locally_owned_set,
+                        const std::vector<hp::QCollection<1> > &quad,
+                        const AdditionalData               additional_data);
+
+  /**
+   * Same as before but for hp::DoFHandler instead of generic DoFHandler type.
+   */
+  void internal_reinit (const Mapping<dim>               &mapping,
+                        const std::vector<const hp::DoFHandler<dim>*> &dof_handler,
+                        const std::vector<const ConstraintMatrix *> &constraint,
+                        const std::vector<IndexSet>      &locally_owned_set,
+                        const std::vector<hp::QCollection<1> > &quad,
+                        const AdditionalData              additional_data);
+
+  /**
+   * Initializes the fields in DoFInfo together with the constraint pool that
+   * holds all different weights in the constraints (not part of DoFInfo
+   * because several DoFInfo classes can have the same weights which
+   * consequently only need to be stored once).
+   */
+  void
+  initialize_indices (const std::vector<const ConstraintMatrix *> &constraint,
+                      const std::vector<IndexSet> &locally_owned_set);
+
+  /**
+   * Initializes the DoFHandlers based on a DoFHandler<dim> argument.
+   */
+  void initialize_dof_handlers (const std::vector<const DoFHandler<dim>*> &dof_handlers,
+                                const unsigned int                         level);
+
+  /**
+   * Initializes the DoFHandlers based on a hp::DoFHandler<dim> argument.
+   */
+  void initialize_dof_handlers (const std::vector<const hp::DoFHandler<dim>*> &dof_handlers,
+                                const unsigned int                             level);
+
+  /**
+   * This struct defines which DoFHandler has actually been given at
+   * construction, in order to define the correct behavior when querying the
+   * underlying DoFHandler.
+   */
+  struct DoFHandlers
+  {
+    DoFHandlers () : n_dof_handlers (0), level (numbers::invalid_unsigned_int) {};
+    std::vector<SmartPointer<const DoFHandler<dim> > >   dof_handler;
+    std::vector<SmartPointer<const hp::DoFHandler<dim> > > hp_dof_handler;
+    enum ActiveDoFHandler { usual, hp } active_dof_handler;
+    unsigned int n_dof_handlers;
+    unsigned int level;
+  };
+
+  /**
+   * Pointers to the DoFHandlers underlying the current problem.
+   */
+  DoFHandlers dof_handlers;
+
+  /**
+   * Contains the information about degrees of freedom on the individual cells
+   * and constraints.
+   */
+  std::vector<internal::MatrixFreeFunctions::DoFInfo> dof_info;
+
+  /**
+   * Contains the weights for constraints stored in DoFInfo. Filled into a
+   * separate field since several vector components might share similar
+   * weights, which reduces memory consumption. Moreover, it obviates template
+   * arguments on DoFInfo and keeps it a plain field of indices only.
+   */
+  std::vector<Number> constraint_pool_data;
+
+  /**
+   * Contains an indicator to the start of the ith index in the constraint
+   * pool data.
+   */
+  std::vector<unsigned int> constraint_pool_row_index;
+
+  /**
+   * Holds information on transformation of cells from reference cell to real
+   * cell that is needed for evaluating integrals.
+   */
+  internal::MatrixFreeFunctions::MappingInfo<dim,Number> mapping_info;
+
+  /**
+   * Contains shape value information on the unit cell.
+   */
+  Table<4,internal::MatrixFreeFunctions::ShapeInfo<Number> > shape_info;
+
+  /**
+   * Describes how the cells are gone through. With the cell level (first
+   * index in this field) and the index within the level, one can reconstruct
+   * a deal.II cell iterator and use all the traditional things deal.II offers
+   * to do with cell iterators.
+   */
+  std::vector<std::pair<unsigned int,unsigned int> > cell_level_index;
+
+  /**
+   * Stores how many cells we have, how many cells that we see after applying
+   * vectorization (i.e., the number of macro cells), and MPI-related stuff.
+   */
+  internal::MatrixFreeFunctions::SizeInfo size_info;
+
+  /**
+   * Information regarding the shared memory parallelization.
+   */
+  internal::MatrixFreeFunctions::TaskInfo task_info;
+
+  /**
+   * Stores whether indices have been initialized.
+   */
+  bool                               indices_are_initialized;
+
+  /**
+   * Stores whether indices have been initialized.
+   */
+  bool                               mapping_is_initialized;
+};
+
+
+
+/*----------------------- Inline functions ----------------------------------*/
+
+#ifndef DOXYGEN
+
+
+template <int dim, typename Number>
+template <typename VectorType>
+inline
+void
+MatrixFree<dim,Number>::initialize_dof_vector(VectorType &vec,
+                                              const unsigned int comp) const
+{
+  AssertIndexRange (comp, n_components());
+  vec.reinit(dof_info[comp].vector_partitioner->size());
+}
+
+
+
+template <int dim, typename Number>
+template <typename Number2>
+inline
+void
+MatrixFree<dim,Number>::initialize_dof_vector(parallel::distributed::Vector<Number2> &vec,
+                                              const unsigned int comp) const
+{
+  AssertIndexRange (comp, n_components());
+  vec.reinit(dof_info[comp].vector_partitioner);
+}
+
+
+
+template <int dim, typename Number>
+inline
+const std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> &
+MatrixFree<dim,Number>::get_vector_partitioner (const unsigned int comp) const
+{
+  AssertIndexRange (comp, n_components());
+  return dof_info[comp].vector_partitioner;
+}
+
+
+
+template <int dim, typename Number>
+inline
+const std::vector<unsigned int> &
+MatrixFree<dim,Number>::get_constrained_dofs (const unsigned int comp) const
+{
+  AssertIndexRange (comp, n_components());
+  return dof_info[comp].constrained_dofs;
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::n_components () const
+{
+  AssertDimension (dof_handlers.n_dof_handlers, dof_info.size());
+  return dof_handlers.n_dof_handlers;
+}
+
+
+
+template <int dim, typename Number>
+inline
+const internal::MatrixFreeFunctions::TaskInfo &
+MatrixFree<dim,Number>::get_task_info () const
+{
+  return task_info;
+}
+
+
+
+template <int dim, typename Number>
+inline
+const internal::MatrixFreeFunctions::SizeInfo &
+MatrixFree<dim,Number>::get_size_info () const
+{
+  return size_info;
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::n_macro_cells () const
+{
+  return size_info.n_macro_cells;
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::n_physical_cells () const
+{
+  return size_info.n_active_cells;
+}
+
+
+
+template <int dim, typename Number>
+inline
+const internal::MatrixFreeFunctions::MappingInfo<dim,Number> &
+MatrixFree<dim,Number>::get_mapping_info () const
+{
+  return mapping_info;
+}
+
+
+
+template <int dim, typename Number>
+inline
+const internal::MatrixFreeFunctions::DoFInfo &
+MatrixFree<dim,Number>::get_dof_info (unsigned int dof_index) const
+{
+  AssertIndexRange (dof_index, n_components());
+  return dof_info[dof_index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::n_constraint_pool_entries() const
+{
+  return constraint_pool_row_index.size()-1;
+}
+
+
+
+template <int dim, typename Number>
+inline
+const Number *
+MatrixFree<dim,Number>::constraint_pool_begin (const unsigned int row) const
+{
+  AssertIndexRange (row, constraint_pool_row_index.size()-1);
+  return constraint_pool_data.empty() ? 0 :
+         &constraint_pool_data[0] + constraint_pool_row_index[row];
+}
+
+
+
+template <int dim, typename Number>
+inline
+const Number *
+MatrixFree<dim,Number>::constraint_pool_end (const unsigned int row) const
+{
+  AssertIndexRange (row, constraint_pool_row_index.size()-1);
+  return constraint_pool_data.empty() ? 0 :
+         &constraint_pool_data[0] + constraint_pool_row_index[row+1];
+}
+
+
+
+template <int dim, typename Number>
+inline
+std::pair<unsigned int,unsigned int>
+MatrixFree<dim,Number>::create_cell_subrange_hp
+(const std::pair<unsigned int,unsigned int> &range,
+ const unsigned int degree,
+ const unsigned int vector_component) const
+{
+  AssertIndexRange (vector_component, dof_info.size());
+  if (dof_info[vector_component].cell_active_fe_index.empty())
+    {
+      AssertDimension (dof_info[vector_component].fe_index_conversion.size(),1);
+      if (dof_info[vector_component].fe_index_conversion[0].first == degree)
+        return range;
+      else
+        return std::pair<unsigned int,unsigned int> (range.second,range.second);
+    }
+
+  const unsigned int fe_index =
+    dof_info[vector_component].fe_index_from_degree(degree);
+  if (fe_index >= dof_info[vector_component].max_fe_index)
+    return std::pair<unsigned int,unsigned int>(range.second, range.second);
+  else
+    return create_cell_subrange_hp_by_index (range, fe_index, vector_component);
+}
+
+
+
+template <int dim, typename Number>
+inline
+std::pair<unsigned int,unsigned int>
+MatrixFree<dim,Number>::create_cell_subrange_hp_by_index
+(const std::pair<unsigned int,unsigned int> &range,
+ const unsigned int fe_index,
+ const unsigned int vector_component) const
+{
+  AssertIndexRange (fe_index, dof_info[vector_component].max_fe_index);
+  const std::vector<unsigned int> &fe_indices =
+    dof_info[vector_component].cell_active_fe_index;
+  if (fe_indices.size() == 0)
+    return range;
+  else
+    {
+      // the range over which we are searching must be ordered, otherwise we
+      // got a range that spans over too many cells
+#ifdef DEBUG
+      for (unsigned int i=range.first+1; i<range.second; ++i)
+        Assert (fe_indices[i] >= fe_indices[i-1],
+                ExcMessage ("Cell range must be over sorted range of fe indices in hp case!"));
+      AssertIndexRange(range.first,fe_indices.size()+1);
+      AssertIndexRange(range.second,fe_indices.size()+1);
+#endif
+      std::pair<unsigned int,unsigned int> return_range;
+      return_range.first =
+        std::lower_bound(&fe_indices[0] + range.first,
+                         &fe_indices[0] + range.second, fe_index)
+        -&fe_indices[0] ;
+      return_range.second =
+        std::lower_bound(&fe_indices[0] + return_range.first,
+                         &fe_indices[0] + range.second,
+                         fe_index + 1)-&fe_indices[0];
+      Assert(return_range.first >= range.first &&
+             return_range.second <= range.second, ExcInternalError());
+      return return_range;
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+void
+MatrixFree<dim,Number>::renumber_dofs (std::vector<types::global_dof_index> &renumbering,
+                                       const unsigned int vector_component)
+{
+  AssertIndexRange(vector_component, dof_info.size());
+  dof_info[vector_component].renumber_dofs (renumbering);
+}
+
+
+
+template <int dim, typename Number>
+inline
+const DoFHandler<dim> &
+MatrixFree<dim,Number>::get_dof_handler (const unsigned int dof_index) const
+{
+  AssertIndexRange (dof_index, n_components());
+  if (dof_handlers.active_dof_handler == DoFHandlers::usual)
+    {
+      AssertDimension (dof_handlers.dof_handler.size(),
+                       dof_handlers.n_dof_handlers);
+      return *dof_handlers.dof_handler[dof_index];
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      // put pseudo return argument to avoid compiler error, but trigger a
+      // segfault in case this is only run in optimized mode
+      return *dof_handlers.dof_handler[numbers::invalid_unsigned_int];
+    }
+}
+
+
+
+template <int dim, typename Number>
+inline
+typename DoFHandler<dim>::cell_iterator
+MatrixFree<dim,Number>::get_cell_iterator(const unsigned int macro_cell_number,
+                                          const unsigned int vector_number,
+                                          const unsigned int dof_index) const
+{
+  const unsigned int vectorization_length=VectorizedArray<Number>::n_array_elements;
+#ifdef DEBUG
+  AssertIndexRange (dof_index, dof_handlers.n_dof_handlers);
+  AssertIndexRange (macro_cell_number, size_info.n_macro_cells);
+  AssertIndexRange (vector_number, vectorization_length);
+  const unsigned int irreg_filled = dof_info[dof_index].row_starts[macro_cell_number][2];
+  if (irreg_filled > 0)
+    AssertIndexRange (vector_number, irreg_filled);
+#endif
+
+  const DoFHandler<dim> *dofh = 0;
+  if (dof_handlers.active_dof_handler == DoFHandlers::usual)
+    {
+      AssertDimension (dof_handlers.dof_handler.size(),
+                       dof_handlers.n_dof_handlers);
+      dofh = dof_handlers.dof_handler[dof_index];
+    }
+  else
+    {
+      Assert (false, ExcMessage ("Cannot return DoFHandler<dim>::cell_iterator "
+                                 "for underlying DoFHandler!"));
+    }
+
+  std::pair<unsigned int,unsigned int> index =
+    cell_level_index[macro_cell_number*vectorization_length+vector_number];
+  return typename DoFHandler<dim>::cell_iterator
+         (&dofh->get_triangulation(), index.first, index.second, dofh);
+}
+
+
+
+template <int dim, typename Number>
+inline
+typename hp::DoFHandler<dim>::active_cell_iterator
+MatrixFree<dim,Number>::get_hp_cell_iterator(const unsigned int macro_cell_number,
+                                             const unsigned int vector_number,
+                                             const unsigned int dof_index) const
+{
+  const unsigned int vectorization_length=VectorizedArray<Number>::n_array_elements;
+#ifdef DEBUG
+  AssertIndexRange (dof_index, dof_handlers.n_dof_handlers);
+  AssertIndexRange (macro_cell_number, size_info.n_macro_cells);
+  AssertIndexRange (vector_number, vectorization_length);
+  const unsigned int irreg_filled = dof_info[dof_index].row_starts[macro_cell_number][2];
+  if (irreg_filled > 0)
+    AssertIndexRange (vector_number, irreg_filled);
+#endif
+
+  Assert (dof_handlers.active_dof_handler == DoFHandlers::hp,
+          ExcNotImplemented());
+  const hp::DoFHandler<dim> *dofh = dof_handlers.hp_dof_handler[dof_index];
+  std::pair<unsigned int,unsigned int> index =
+    cell_level_index[macro_cell_number*vectorization_length+vector_number];
+  return typename hp::DoFHandler<dim>::cell_iterator
+         (&dofh->get_triangulation(), index.first, index.second, dofh);
+}
+
+
+
+template <int dim, typename Number>
+inline
+bool
+MatrixFree<dim,Number>::at_irregular_cell (const unsigned int macro_cell) const
+{
+  AssertIndexRange (macro_cell, size_info.n_macro_cells);
+  return dof_info[0].row_starts[macro_cell][2] > 0;
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::n_components_filled (const unsigned int macro_cell) const
+{
+  AssertIndexRange (macro_cell, size_info.n_macro_cells);
+  const unsigned int n_filled = dof_info[0].row_starts[macro_cell][2];
+  if (n_filled == 0)
+    return VectorizedArray<Number>::n_array_elements;
+  else
+    return n_filled;
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::get_dofs_per_cell(const unsigned int dof_index,
+                                          const unsigned int active_fe_index) const
+{
+  AssertIndexRange (dof_index, dof_info.size());
+  return dof_info[dof_index].dofs_per_cell[active_fe_index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::get_n_q_points(const unsigned int quad_index,
+                                       const unsigned int active_fe_index) const
+{
+  AssertIndexRange (quad_index,
+                    mapping_info.mapping_data_gen.size());
+  return mapping_info.mapping_data_gen[quad_index].n_q_points[active_fe_index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::get_dofs_per_face(const unsigned int dof_index,
+                                          const unsigned int active_fe_index) const
+{
+  AssertIndexRange (dof_index, dof_info.size());
+  return dof_info[dof_index].dofs_per_face[active_fe_index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+unsigned int
+MatrixFree<dim,Number>::get_n_q_points_face(const unsigned int quad_index,
+                                            const unsigned int active_fe_index) const
+{
+  AssertIndexRange (quad_index,
+                    mapping_info.mapping_data_gen.size());
+  return mapping_info.mapping_data_gen[quad_index].n_q_points_face[active_fe_index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+const IndexSet &
+MatrixFree<dim,Number>::get_locally_owned_set(const unsigned int dof_index) const
+{
+  AssertIndexRange (dof_index, dof_info.size());
+  return dof_info[dof_index].vector_partitioner->locally_owned_range();
+}
+
+
+
+template <int dim, typename Number>
+inline
+const IndexSet &
+MatrixFree<dim,Number>::get_ghost_set(const unsigned int dof_index) const
+{
+  AssertIndexRange (dof_index, dof_info.size());
+  return dof_info[dof_index].vector_partitioner->ghost_indices();
+}
+
+
+
+template <int dim, typename Number>
+inline
+const internal::MatrixFreeFunctions::ShapeInfo<Number> &
+MatrixFree<dim,Number>::get_shape_info (const unsigned int index_fe,
+                                        const unsigned int index_quad,
+                                        const unsigned int active_fe_index,
+                                        const unsigned int active_quad_index) const
+{
+  AssertIndexRange (index_fe, shape_info.size(0));
+  AssertIndexRange (index_quad, shape_info.size(1));
+  AssertIndexRange (active_fe_index, shape_info.size(2));
+  AssertIndexRange (active_quad_index, shape_info.size(3));
+  return shape_info(index_fe, index_quad,
+                    active_fe_index, active_quad_index);
+}
+
+
+
+template <int dim, typename Number>
+inline
+const Quadrature<dim> &
+MatrixFree<dim,Number>::get_quadrature (const unsigned int quad_index,
+                                        const unsigned int active_fe_index) const
+{
+  AssertIndexRange (quad_index, mapping_info.mapping_data_gen.size());
+  return mapping_info.mapping_data_gen[quad_index].
+         quadrature[active_fe_index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+const Quadrature<dim-1> &
+MatrixFree<dim,Number>::get_face_quadrature (const unsigned int quad_index,
+                                             const unsigned int active_fe_index) const
+{
+  AssertIndexRange (quad_index, mapping_info.mapping_data_gen.size());
+  return mapping_info.mapping_data_gen[quad_index].
+         face_quadrature[active_fe_index];
+}
+
+
+
+template <int dim, typename Number>
+inline
+bool
+MatrixFree<dim,Number>::indices_initialized () const
+{
+  return indices_are_initialized;
+}
+
+
+
+template <int dim, typename Number>
+inline
+bool
+MatrixFree<dim,Number>::mapping_initialized () const
+{
+  return mapping_is_initialized;
+}
+
+
+
+// ------------------------------ reinit functions ---------------------------
+
+namespace internal
+{
+  namespace MatrixFree
+  {
+    template <typename DoFHandlerType>
+    inline
+    std::vector<IndexSet>
+    extract_locally_owned_index_sets (const std::vector<const DoFHandlerType *> &dofh,
+                                      const unsigned int level)
+    {
+      std::vector<IndexSet> locally_owned_set;
+      locally_owned_set.reserve (dofh.size());
+      for (unsigned int j=0; j<dofh.size(); j++)
+        if (level == numbers::invalid_unsigned_int)
+          locally_owned_set.push_back(dofh[j]->locally_owned_dofs());
+        else
+          AssertThrow(false, ExcNotImplemented());
+      return locally_owned_set;
+    }
+
+    template <int dim, int spacedim>
+    inline
+    std::vector<IndexSet>
+    extract_locally_owned_index_sets (const std::vector<const ::dealii::DoFHandler<dim,spacedim> *> &dofh,
+                                      const unsigned int level)
+    {
+      std::vector<IndexSet> locally_owned_set;
+      locally_owned_set.reserve (dofh.size());
+      for (unsigned int j=0; j<dofh.size(); j++)
+        if (level == numbers::invalid_unsigned_int)
+          locally_owned_set.push_back(dofh[j]->locally_owned_dofs());
+        else
+          locally_owned_set.push_back(dofh[j]->locally_owned_mg_dofs(level));
+      return locally_owned_set;
+    }
+  }
+}
+
+
+
+template <int dim, typename Number>
+template <typename DoFHandlerType, typename QuadratureType>
+void MatrixFree<dim,Number>::
+reinit(const DoFHandlerType                                  &dof_handler,
+       const ConstraintMatrix                                &constraints_in,
+       const QuadratureType                                  &quad,
+       const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  std::vector<const DoFHandlerType *>   dof_handlers;
+  std::vector<const ConstraintMatrix *> constraints;
+  std::vector<QuadratureType>           quads;
+
+  dof_handlers.push_back(&dof_handler);
+  constraints.push_back (&constraints_in);
+  quads.push_back (quad);
+
+  std::vector<IndexSet> locally_owned_sets =
+    internal::MatrixFree::extract_locally_owned_index_sets
+    (dof_handlers, additional_data.level_mg_handler);
+  reinit(StaticMappingQ1<dim>::mapping, dof_handlers,constraints, locally_owned_sets, quads,
+         additional_data);
+}
+
+
+
+template <int dim, typename Number>
+template <typename DoFHandlerType, typename QuadratureType>
+void MatrixFree<dim,Number>::
+reinit(const Mapping<dim>                                    &mapping,
+       const DoFHandlerType                                  &dof_handler,
+       const ConstraintMatrix                                &constraints_in,
+       const QuadratureType                                  &quad,
+       const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  std::vector<const DoFHandlerType *>   dof_handlers;
+  std::vector<const ConstraintMatrix *> constraints;
+  std::vector<QuadratureType>           quads;
+
+  dof_handlers.push_back(&dof_handler);
+  constraints.push_back (&constraints_in);
+  quads.push_back (quad);
+
+  std::vector<IndexSet> locally_owned_sets =
+    internal::MatrixFree::extract_locally_owned_index_sets
+    (dof_handlers, additional_data.level_mg_handler);
+  reinit(mapping, dof_handlers,constraints,locally_owned_sets, quads,
+         additional_data);
+}
+
+
+
+template <int dim, typename Number>
+template <typename DoFHandlerType, typename QuadratureType>
+void MatrixFree<dim,Number>::
+reinit(const std::vector<const DoFHandlerType *>   &dof_handler,
+       const std::vector<const ConstraintMatrix *> &constraint,
+       const std::vector<QuadratureType>           &quad,
+       const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  std::vector<IndexSet> locally_owned_set =
+    internal::MatrixFree::extract_locally_owned_index_sets
+    (dof_handler, additional_data.level_mg_handler);
+  reinit(StaticMappingQ1<dim>::mapping, dof_handler,constraint,locally_owned_set,
+         static_cast<const std::vector<Quadrature<1> >&>(quad),
+         additional_data);
+}
+
+
+
+template <int dim, typename Number>
+template <typename DoFHandlerType, typename QuadratureType>
+void MatrixFree<dim,Number>::
+reinit(const std::vector<const DoFHandlerType *>             &dof_handler,
+       const std::vector<const ConstraintMatrix *>           &constraint,
+       const QuadratureType                                  &quad,
+       const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  std::vector<QuadratureType> quads;
+  quads.push_back(quad);
+  std::vector<IndexSet> locally_owned_set =
+    internal::MatrixFree::extract_locally_owned_index_sets
+    (dof_handler, additional_data.level_mg_handler);
+  reinit(StaticMappingQ1<dim>::mapping, dof_handler,constraint,locally_owned_set, quads,
+         additional_data);
+}
+
+
+
+template <int dim, typename Number>
+template <typename DoFHandlerType, typename QuadratureType>
+void MatrixFree<dim,Number>::
+reinit(const Mapping<dim>                                    &mapping,
+       const std::vector<const DoFHandlerType *>             &dof_handler,
+       const std::vector<const ConstraintMatrix *>           &constraint,
+       const QuadratureType                                  &quad,
+       const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  std::vector<QuadratureType> quads;
+  quads.push_back(quad);
+  std::vector<IndexSet> locally_owned_set =
+    internal::MatrixFree::extract_locally_owned_index_sets
+    (dof_handler, additional_data.level_mg_handler);
+  reinit(mapping, dof_handler,constraint,locally_owned_set, quads,
+         additional_data);
+}
+
+
+
+template <int dim, typename Number>
+template <typename DoFHandlerType, typename QuadratureType>
+void MatrixFree<dim,Number>::
+reinit(const Mapping<dim>                                   &mapping,
+       const std::vector<const DoFHandlerType *>            &dof_handler,
+       const std::vector<const ConstraintMatrix *>          &constraint,
+       const std::vector<QuadratureType>                    &quad,
+       const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  std::vector<IndexSet> locally_owned_set =
+    internal::MatrixFree::extract_locally_owned_index_sets
+    (dof_handler, additional_data.level_mg_handler);
+  reinit(mapping, dof_handler,constraint,locally_owned_set,
+         quad, additional_data);
+}
+
+
+
+template <int dim, typename Number>
+template <typename DoFHandlerType, typename QuadratureType>
+void MatrixFree<dim,Number>::
+reinit(const Mapping<dim>                                    &mapping,
+       const std::vector<const DoFHandlerType *>             &dof_handler,
+       const std::vector<const ConstraintMatrix *>           &constraint,
+       const std::vector<IndexSet>                           &locally_owned_set,
+       const std::vector<QuadratureType>                     &quad,
+       const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  // find out whether we use a hp Quadrature or a standard quadrature
+  std::vector<hp::QCollection<1> > quad_hp;
+  for (unsigned int q=0; q<quad.size(); ++q)
+    quad_hp.push_back (hp::QCollection<1>(quad[q]));
+  internal_reinit (mapping,
+                   dof_handler,
+                   constraint, locally_owned_set, quad_hp, additional_data);
+}
+
+
+
+// ------------------------------ implementation of cell_loop ---------------
+
+// internal helper functions that define how to call MPI data exchange
+// functions: for generic vectors, do nothing at all. For distributed vectors,
+// call update_ghost_values_start function and so on. If we have collections
+// of vectors, just do the individual functions of the components. In order to
+// keep ghost values consistent (whether we are in read or write mode). the whole situation is a bit complicated by the fact
+// that we need to treat block vectors differently, which use some additional
+// helper functions to select the blocks and template magic.
+namespace internal
+{
+  template<typename VectorStruct>
+  bool update_ghost_values_start_block (const VectorStruct &vec,
+                                        const unsigned int channel,
+                                        internal::bool2type<true>);
+  template<typename VectorStruct>
+  void reset_ghost_values_block (const VectorStruct &vec,
+                                 const bool          zero_out_ghosts,
+                                 internal::bool2type<true>);
+  template<typename VectorStruct>
+  void update_ghost_values_finish_block (const VectorStruct &vec,
+                                         internal::bool2type<true>);
+  template<typename VectorStruct>
+  void compress_start_block (const VectorStruct &vec,
+                             const unsigned int channel,
+                             internal::bool2type<true>);
+  template<typename VectorStruct>
+  void compress_finish_block (const VectorStruct &vec,
+                              internal::bool2type<true>);
+
+  template<typename VectorStruct>
+  bool update_ghost_values_start_block (const VectorStruct &,
+                                        const unsigned int,
+                                        internal::bool2type<false>)
+  {
+    return false;
+  }
+  template<typename VectorStruct>
+  void reset_ghost_values_block (const VectorStruct &,
+                                 const bool,
+                                 internal::bool2type<false>)
+  {}
+  template<typename VectorStruct>
+  void update_ghost_values_finish_block (const VectorStruct &,
+                                         internal::bool2type<false>)
+  {}
+  template<typename VectorStruct>
+  void compress_start_block (const VectorStruct &,
+                             const unsigned int,
+                             internal::bool2type<false>)
+  {}
+  template<typename VectorStruct>
+  void compress_finish_block (const VectorStruct &,
+                              internal::bool2type<false>)
+  {}
+
+
+
+  // returns true if the vector was in a state without ghost values before,
+  // i.e., we need to zero out ghosts in the very end
+  template<typename VectorStruct>
+  inline
+  bool update_ghost_values_start (const VectorStruct &vec,
+                                  const unsigned int channel = 0)
+  {
+    return
+      update_ghost_values_start_block(vec, channel,
+                                      internal::bool2type<IsBlockVector<VectorStruct>::value>());
+  }
+
+
+
+  template<typename Number>
+  inline
+  bool update_ghost_values_start (const parallel::distributed::Vector<Number> &vec,
+                                  const unsigned int                  channel = 0)
+  {
+    bool return_value = !vec.has_ghost_elements();
+    vec.update_ghost_values_start(channel);
+    return return_value;
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  bool update_ghost_values_start (const std::vector<VectorStruct> &vec)
+  {
+    bool return_value = false;
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      return_value = update_ghost_values_start(vec[comp], comp);
+    return return_value;
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  bool update_ghost_values_start (const std::vector<VectorStruct *> &vec)
+  {
+    bool return_value = false;
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      return_value = update_ghost_values_start(*vec[comp], comp);
+    return return_value;
+  }
+
+
+
+  template<typename VectorStruct>
+  inline
+  bool update_ghost_values_start_block (const VectorStruct &vec,
+                                        const unsigned int channel,
+                                        internal::bool2type<true>)
+  {
+    bool return_value = false;
+    for (unsigned int i=0; i<vec.n_blocks(); ++i)
+      return_value = update_ghost_values_start(vec.block(i), channel+509*i);
+    return return_value;
+  }
+
+
+
+  // if the input vector did not have ghosts imported, clear them here again
+  // in order to avoid subsequent operations e.g. in linear solvers to work
+  // with ghosts all the time
+  template<typename VectorStruct>
+  inline
+  void reset_ghost_values (const VectorStruct &vec,
+                           const bool          zero_out_ghosts)
+  {
+    reset_ghost_values_block(vec, zero_out_ghosts,
+                             internal::bool2type<IsBlockVector<VectorStruct>::value>());
+  }
+
+
+
+  template<typename Number>
+  inline
+  void reset_ghost_values (const parallel::distributed::Vector<Number> &vec,
+                           const bool zero_out_ghosts)
+  {
+    if (zero_out_ghosts)
+      const_cast<parallel::distributed::Vector<Number>&>(vec).zero_out_ghosts();
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void reset_ghost_values (const std::vector<VectorStruct> &vec,
+                           const bool zero_out_ghosts)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      reset_ghost_values(vec[comp], zero_out_ghosts);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void reset_ghost_values (const std::vector<VectorStruct *> &vec,
+                           const bool zero_out_ghosts)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      reset_ghost_values(*vec[comp], zero_out_ghosts);
+  }
+
+
+
+  template<typename VectorStruct>
+  inline
+  void reset_ghost_values_block (const VectorStruct &vec,
+                                 const bool          zero_out_ghosts,
+                                 internal::bool2type<true>)
+  {
+    for (unsigned int i=0; i<vec.n_blocks(); ++i)
+      reset_ghost_values(vec.block(i), zero_out_ghosts);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void update_ghost_values_finish (const VectorStruct &vec)
+  {
+    update_ghost_values_finish_block(vec,
+                                     internal::bool2type<IsBlockVector<VectorStruct>::value>());
+  }
+
+
+
+  template <typename Number>
+  inline
+  void update_ghost_values_finish (const parallel::distributed::Vector<Number> &vec)
+  {
+    vec.update_ghost_values_finish();
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void update_ghost_values_finish (const std::vector<VectorStruct> &vec)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      update_ghost_values_finish(vec[comp]);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void update_ghost_values_finish (const std::vector<VectorStruct *> &vec)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      update_ghost_values_finish(*vec[comp]);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void update_ghost_values_finish_block (const VectorStruct &vec,
+                                         internal::bool2type<true>)
+  {
+    for (unsigned int i=0; i<vec.n_blocks(); ++i)
+      update_ghost_values_finish(vec.block(i));
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_start (VectorStruct &vec,
+                       const unsigned int channel = 0)
+  {
+    compress_start_block (vec, channel,
+                          internal::bool2type<IsBlockVector<VectorStruct>::value>());
+  }
+
+
+
+  template <typename Number>
+  inline
+  void compress_start (parallel::distributed::Vector<Number> &vec,
+                       const unsigned int           channel = 0)
+  {
+    vec.compress_start(channel);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_start (std::vector<VectorStruct> &vec)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      compress_start (vec[comp], comp);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_start (std::vector<VectorStruct *> &vec)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      compress_start (*vec[comp], comp);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_start_block (VectorStruct      &vec,
+                             const unsigned int channel,
+                             internal::bool2type<true>)
+  {
+    for (unsigned int i=0; i<vec.n_blocks(); ++i)
+      compress_start(vec.block(i), channel + 500*i);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_finish (VectorStruct &vec)
+  {
+    compress_finish_block(vec,
+                          internal::bool2type<IsBlockVector<VectorStruct>::value>());
+  }
+
+
+
+  template <typename Number>
+  inline
+  void compress_finish (parallel::distributed::Vector<Number> &vec)
+  {
+    vec.compress_finish(::dealii::VectorOperation::add);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_finish (std::vector<VectorStruct> &vec)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      compress_finish(vec[comp]);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_finish (std::vector<VectorStruct *> &vec)
+  {
+    for (unsigned int comp=0; comp<vec.size(); comp++)
+      compress_finish(*vec[comp]);
+  }
+
+
+
+  template <typename VectorStruct>
+  inline
+  void compress_finish_block (VectorStruct &vec,
+                              internal::bool2type<true>)
+  {
+    for (unsigned int i=0; i<vec.n_blocks(); ++i)
+      compress_finish(vec.block(i));
+  }
+
+
+
+#ifdef DEAL_II_WITH_THREADS
+
+  // This defines the TBB data structures that are needed to schedule the
+  // partition-partition variant
+
+  namespace partition
+  {
+    template<typename Worker>
+    class CellWork : public tbb::task
+    {
+    public:
+      CellWork (const Worker &worker_in,
+                const unsigned int partition_in,
+                const internal::MatrixFreeFunctions::TaskInfo &task_info_in,
+                const bool is_blocked_in)
+        :
+        worker (worker_in),
+        partition (partition_in),
+        task_info (task_info_in),
+        is_blocked (is_blocked_in)
+      {};
+      tbb::task *execute ()
+      {
+        std::pair<unsigned int, unsigned int> cell_range
+        (task_info.partition_color_blocks_data[partition],
+         task_info.partition_color_blocks_data[partition+1]);
+        worker(cell_range);
+        if (is_blocked==true)
+          dummy->spawn (*dummy);
+        return NULL;
+      }
+
+      tbb::empty_task *dummy;
+
+    private:
+      const Worker      &worker;
+      const unsigned int partition;
+      const internal::MatrixFreeFunctions::TaskInfo &task_info;
+      const bool         is_blocked;
+    };
+
+
+
+    template<typename Worker>
+    class PartitionWork : public tbb::task
+    {
+    public:
+      PartitionWork (const Worker &function_in,
+                     const unsigned int partition_in,
+                     const internal::MatrixFreeFunctions::TaskInfo &task_info_in,
+                     const bool    is_blocked_in = false)
+        :
+        function (function_in),
+        partition (partition_in),
+        task_info (task_info_in),
+        is_blocked (is_blocked_in)
+      {};
+      tbb::task *execute ()
+      {
+        tbb::empty_task *root = new( tbb::task::allocate_root() )
+        tbb::empty_task;
+        unsigned int evens = task_info.partition_evens[partition];
+        unsigned int odds  = task_info.partition_odds[partition];
+        unsigned int n_blocked_workers =
+          task_info.partition_n_blocked_workers[partition];
+        unsigned int n_workers = task_info.partition_n_workers[partition];
+        std::vector<CellWork<Worker>*> worker(n_workers);
+        std::vector<CellWork<Worker>*> blocked_worker(n_blocked_workers);
+
+        root->set_ref_count(evens+1);
+        for (unsigned int j=0; j<evens; j++)
+          {
+            worker[j] = new(root->allocate_child())
+            CellWork<Worker>(function, task_info.
+                             partition_color_blocks_row_index[partition]+2*j,
+                             task_info, false);
+            if (j>0)
+              {
+                worker[j]->set_ref_count(2);
+                blocked_worker[j-1]->dummy = new(worker[j]->allocate_child())
+                tbb::empty_task;
+                worker[j-1]->spawn(*blocked_worker[j-1]);
+              }
+            else
+              worker[j]->set_ref_count(1);
+            if (j<evens-1)
+              {
+                blocked_worker[j] = new(worker[j]->allocate_child())
+                CellWork<Worker>(function, task_info.
+                                 partition_color_blocks_row_index
+                                 [partition] + 2*j+1, task_info, true);
+              }
+            else
+              {
+                if (odds==evens)
+                  {
+                    worker[evens] = new(worker[j]->allocate_child())
+                    CellWork<Worker>(function, task_info.
+                                     partition_color_blocks_row_index[partition]+2*j+1,
+                                     task_info, false);
+                    worker[j]->spawn(*worker[evens]);
+                  }
+                else
+                  {
+                    tbb::empty_task *child = new(worker[j]->allocate_child())
+                    tbb::empty_task();
+                    worker[j]->spawn(*child);
+                  }
+              }
+          }
+
+        root->wait_for_all();
+        root->destroy(*root);
+        if (is_blocked==true)
+          dummy->spawn (*dummy);
+        return NULL;
+      }
+
+      tbb::empty_task *dummy;
+
+    private:
+      const Worker  &function;
+      const unsigned int partition;
+      const internal::MatrixFreeFunctions::TaskInfo &task_info;
+      const bool     is_blocked;
+    };
+
+  } // end of namespace partition
+
+
+
+  namespace color
+  {
+    template <typename Worker>
+    class CellWork
+    {
+    public:
+      CellWork (const Worker                   &worker_in,
+                const internal::MatrixFreeFunctions::TaskInfo &task_info_in)
+        :
+        worker (worker_in),
+        task_info (task_info_in)
+      {};
+      void operator()(const tbb::blocked_range<unsigned int> &r) const
+      {
+        for (unsigned int block=r.begin(); block<r.end(); block++)
+          {
+            std::pair<unsigned int,unsigned int> cell_range;
+            if (task_info.position_short_block<block)
+              {
+                cell_range.first = (block-1)*task_info.block_size+
+                                   task_info.block_size_last;
+                cell_range.second = cell_range.first + task_info.block_size;
+              }
+            else
+              {
+                cell_range.first = block*task_info.block_size;
+                cell_range.second = cell_range.first +
+                                    ((block == task_info.position_short_block)?
+                                     (task_info.block_size_last):(task_info.block_size));
+              }
+            worker (cell_range);
+          }
+      }
+    private:
+      const Worker   &worker;
+      const internal::MatrixFreeFunctions::TaskInfo &task_info;
+    };
+
+
+    template<typename Worker>
+    class PartitionWork : public tbb::task
+    {
+    public:
+      PartitionWork (const Worker &worker_in,
+                     const unsigned int partition_in,
+                     const internal::MatrixFreeFunctions::TaskInfo &task_info_in,
+                     const bool    is_blocked_in)
+        :
+        worker (worker_in),
+        partition (partition_in),
+        task_info (task_info_in),
+        is_blocked (is_blocked_in)
+      {};
+      tbb::task *execute ()
+      {
+        unsigned int lower = task_info.partition_color_blocks_data[partition],
+                     upper = task_info.partition_color_blocks_data[partition+1];
+        parallel_for(tbb::blocked_range<unsigned int>(lower,upper,1),
+                     CellWork<Worker> (worker,task_info));
+        if (is_blocked==true)
+          dummy->spawn (*dummy);
+        return NULL;
+      }
+
+      tbb::empty_task *dummy;
+
+    private:
+      const Worker &worker;
+      const unsigned int partition;
+      const internal::MatrixFreeFunctions::TaskInfo &task_info;
+      const bool is_blocked;
+    };
+
+  } // end of namespace color
+
+
+  template<typename VectorStruct>
+  class MPIComDistribute : public tbb::task
+  {
+  public:
+    MPIComDistribute (const VectorStruct  &src_in)
+      :
+      src(src_in)
+    {};
+
+    tbb::task *execute ()
+    {
+      internal::update_ghost_values_finish(src);
+      return 0;
+    }
+
+  private:
+    const VectorStruct &src;
+  };
+
+
+
+  template<typename VectorStruct>
+  class MPIComCompress : public tbb::task
+  {
+  public:
+    MPIComCompress (VectorStruct        &dst_in)
+      :
+      dst(dst_in)
+    {};
+
+    tbb::task *execute ()
+    {
+      internal::compress_start(dst);
+      return 0;
+    }
+
+  private:
+    VectorStruct &dst;
+  };
+
+#endif // DEAL_II_WITH_THREADS
+
+} // end of namespace internal
+
+
+
+template <int dim, typename Number>
+template <typename OutVector, typename InVector>
+inline
+void
+MatrixFree<dim, Number>::cell_loop
+(const std_cxx11::function<void (const MatrixFree<dim,Number> &,
+                                 OutVector &,
+                                 const InVector &,
+                                 const std::pair<unsigned int,
+                                 unsigned int> &)> &cell_operation,
+ OutVector       &dst,
+ const InVector  &src) const
+{
+  // in any case, need to start the ghost import at the beginning
+  bool ghosts_were_not_set = internal::update_ghost_values_start (src);
+
+#ifdef DEAL_II_WITH_THREADS
+
+  // Use multithreading if so requested and if there is enough work to do in
+  // parallel (the code might hang if there are less than two chunks!)
+  if (task_info.use_multithreading == true && task_info.n_blocks > 3)
+    {
+      // to simplify the function calls, bind away all arguments except the
+      // cell range
+      typedef
+      std_cxx11::function<void (const std::pair<unsigned int,unsigned int> &range)>
+      Worker;
+
+      const Worker func = std_cxx11::bind (std_cxx11::ref(cell_operation),
+                                           std_cxx11::cref(*this),
+                                           std_cxx11::ref(dst),
+                                           std_cxx11::cref(src),
+                                           std_cxx11::_1);
+
+      if (task_info.use_partition_partition == true)
+        {
+          tbb::empty_task *root = new( tbb::task::allocate_root() )
+          tbb::empty_task;
+          unsigned int evens = task_info.evens;
+          unsigned int odds  = task_info.odds;
+          root->set_ref_count(evens+1);
+          unsigned int n_blocked_workers = task_info.n_blocked_workers;
+          unsigned int n_workers = task_info.n_workers;
+          std::vector<internal::partition::PartitionWork<Worker>*>
+          worker(n_workers);
+          std::vector<internal::partition::PartitionWork<Worker>*>
+          blocked_worker(n_blocked_workers);
+          internal::MPIComCompress<OutVector> *worker_compr =
+            new(root->allocate_child())
+          internal::MPIComCompress<OutVector>(dst);
+          worker_compr->set_ref_count(1);
+          for (unsigned int j=0; j<evens; j++)
+            {
+              if (j>0)
+                {
+                  worker[j] = new(root->allocate_child())
+                  internal::partition::PartitionWork<Worker>
+                  (func,2*j,task_info,false);
+                  worker[j]->set_ref_count(2);
+                  blocked_worker[j-1]->dummy = new(worker[j]->allocate_child())
+                  tbb::empty_task;
+                  if (j>1)
+                    worker[j-1]->spawn(*blocked_worker[j-1]);
+                  else
+                    worker_compr->spawn(*blocked_worker[j-1]);
+                }
+              else
+                {
+                  worker[j] = new(worker_compr->allocate_child())
+                  internal::partition::PartitionWork<Worker>
+                  (func,2*j,task_info,false);
+                  worker[j]->set_ref_count(2);
+                  internal::MPIComDistribute<InVector> *worker_dist =
+                    new (worker[j]->allocate_child())
+                  internal::MPIComDistribute<InVector>(src);
+                  worker_dist->spawn(*worker_dist);
+                }
+              if (j<evens-1)
+                {
+                  blocked_worker[j] = new(worker[j]->allocate_child())
+                  internal::partition::PartitionWork<Worker>
+                  (func,2*j+1,task_info,true);
+                }
+              else
+                {
+                  if (odds==evens)
+                    {
+                      worker[evens] = new(worker[j]->allocate_child())
+                      internal::partition::PartitionWork<Worker>
+                      (func,2*j+1,task_info,false);
+                      worker[j]->spawn(*worker[evens]);
+                    }
+                  else
+                    {
+                      tbb::empty_task *child = new(worker[j]->allocate_child())
+                      tbb::empty_task();
+                      worker[j]->spawn(*child);
+                    }
+                }
+            }
+
+          root->wait_for_all();
+          root->destroy(*root);
+        }
+      else // end of partition-partition, start of partition-color
+        {
+          unsigned int evens = task_info.evens;
+          unsigned int odds  = task_info.odds;
+
+          // check whether there is only one partition. if not, build up the
+          // tree of partitions
+          if (odds > 0)
+            {
+              tbb::empty_task *root = new( tbb::task::allocate_root() ) tbb::empty_task;
+              root->set_ref_count(evens+1);
+              unsigned int n_blocked_workers = odds-(odds+evens+1)%2;
+              unsigned int n_workers = task_info.partition_color_blocks_data.size()-1-
+                                       n_blocked_workers;
+              std::vector<internal::color::PartitionWork<Worker>*> worker(n_workers);
+              std::vector<internal::color::PartitionWork<Worker>*> blocked_worker(n_blocked_workers);
+              unsigned int worker_index = 0, slice_index = 0;
+              unsigned int spawn_index =  0, spawn_index_new = 0;
+              int spawn_index_child = -2;
+              internal::MPIComCompress<OutVector> *worker_compr = new(root->allocate_child())
+              internal::MPIComCompress<OutVector>(dst);
+              worker_compr->set_ref_count(1);
+              for (unsigned int part=0;
+                   part<task_info.partition_color_blocks_row_index.size()-1; part++)
+                {
+                  spawn_index_new = worker_index;
+                  if (part == 0)
+                    worker[worker_index] = new(worker_compr->allocate_child())
+                    internal::color::PartitionWork<Worker>(func,slice_index,task_info,false);
+                  else
+                    worker[worker_index] = new(root->allocate_child())
+                    internal::color::PartitionWork<Worker>(func,slice_index,task_info,false);
+                  slice_index++;
+                  for (; slice_index<task_info.partition_color_blocks_row_index[part+1];
+                       slice_index++)
+                    {
+                      worker[worker_index]->set_ref_count(1);
+                      worker_index++;
+                      worker[worker_index] = new (worker[worker_index-1]->allocate_child())
+                      internal::color::PartitionWork<Worker>(func,slice_index,task_info,false);
+                    }
+                  worker[worker_index]->set_ref_count(2);
+                  if (part>0)
+                    {
+                      blocked_worker[(part-1)/2]->dummy =
+                        new (worker[worker_index]->allocate_child()) tbb::empty_task;
+                      worker_index++;
+                      if (spawn_index_child == -1)
+                        worker[spawn_index]->spawn(*blocked_worker[(part-1)/2]);
+                      else
+                        worker[spawn_index]->spawn(*worker[spawn_index_child]);
+                      spawn_index = spawn_index_new;
+                      spawn_index_child = -2;
+                    }
+                  else
+                    {
+                      internal::MPIComDistribute<InVector> *worker_dist =
+                        new (worker[worker_index]->allocate_child())
+                      internal::MPIComDistribute<InVector>(src);
+                      worker_dist->spawn(*worker_dist);
+                      worker_index++;
+                    }
+                  part += 1;
+                  if (part<task_info.partition_color_blocks_row_index.size()-1)
+                    {
+                      if (part<task_info.partition_color_blocks_row_index.size()-2)
+                        {
+                          blocked_worker[part/2] = new(worker[worker_index-1]->allocate_child())
+                          internal::color::PartitionWork<Worker>(func,slice_index,task_info,true);
+                          slice_index++;
+                          if (slice_index<
+                              task_info.partition_color_blocks_row_index[part+1])
+                            {
+                              blocked_worker[part/2]->set_ref_count(1);
+                              worker[worker_index] = new(blocked_worker[part/2]->allocate_child())
+                              internal::color::PartitionWork<Worker>(func,slice_index,task_info,false);
+                              slice_index++;
+                            }
+                          else
+                            {
+                              spawn_index_child = -1;
+                              continue;
+                            }
+                        }
+                      for (; slice_index<task_info.partition_color_blocks_row_index[part+1];
+                           slice_index++)
+                        {
+                          if (slice_index>
+                              task_info.partition_color_blocks_row_index[part])
+                            {
+                              worker[worker_index]->set_ref_count(1);
+                              worker_index++;
+                            }
+                          worker[worker_index] = new (worker[worker_index-1]->allocate_child())
+                          internal::color::PartitionWork<Worker>(func,slice_index,task_info,false);
+                        }
+                      spawn_index_child = worker_index;
+                      worker_index++;
+                    }
+                  else
+                    {
+                      tbb::empty_task *final = new (worker[worker_index-1]->allocate_child())
+                      tbb::empty_task;
+                      worker[spawn_index]->spawn(*final);
+                      spawn_index_child = worker_index-1;
+                    }
+                }
+              if (evens==odds)
+                worker[spawn_index]->spawn(*worker[spawn_index_child]);
+              root->wait_for_all();
+              root->destroy(*root);
+            }
+          // case when we only have one partition: this is the usual coloring
+          // scheme, and we just schedule a parallel for loop for each color
+          else
+            {
+              Assert(evens==1,ExcInternalError());
+              internal::update_ghost_values_finish(src);
+
+              for (unsigned int color=0;
+                   color < task_info.partition_color_blocks_row_index[1];
+                   ++color)
+                {
+                  unsigned int lower = task_info.partition_color_blocks_data[color],
+                               upper = task_info.partition_color_blocks_data[color+1];
+                  parallel_for(tbb::blocked_range<unsigned int>(lower,upper,1),
+                               internal::color::CellWork<Worker>
+                               (func,task_info));
+                }
+
+              internal::compress_start(dst);
+            }
+        }
+    }
+  else
+#endif
+    // serial loop
+    {
+      std::pair<unsigned int,unsigned int> cell_range;
+
+      // First operate on cells where no ghost data is needed (inner cells)
+      {
+        cell_range.first = 0;
+        cell_range.second = size_info.boundary_cells_start;
+        cell_operation (*this, dst, src, cell_range);
+      }
+
+      // before starting operations on cells that contain ghost nodes (outer
+      // cells), wait for the MPI commands to finish
+      internal::update_ghost_values_finish(src);
+
+      // For the outer cells, do the same procedure as for inner cells.
+      if (size_info.boundary_cells_end > size_info.boundary_cells_start)
+        {
+          cell_range.first = size_info.boundary_cells_start;
+          cell_range.second = size_info.boundary_cells_end;
+          cell_operation (*this, dst, src, cell_range);
+        }
+
+      internal::compress_start(dst);
+
+      // Finally operate on cells where no ghost data is needed (inner cells)
+      if (size_info.n_macro_cells > size_info.boundary_cells_end)
+        {
+          cell_range.first = size_info.boundary_cells_end;
+          cell_range.second = size_info.n_macro_cells;
+          cell_operation (*this, dst, src, cell_range);
+        }
+    }
+
+  // In every case, we need to finish transfers at the very end
+  internal::compress_finish(dst);
+  internal::reset_ghost_values(src, ghosts_were_not_set);
+}
+
+
+
+template <int dim, typename Number>
+template <typename CLASS, typename OutVector, typename InVector>
+inline
+void
+MatrixFree<dim,Number>::cell_loop
+(void (CLASS::*function_pointer)(const MatrixFree<dim,Number> &,
+                                 OutVector &,
+                                 const InVector &,
+                                 const std::pair<unsigned int,
+                                 unsigned int> &)const,
+ const CLASS    *owning_class,
+ OutVector      &dst,
+ const InVector &src) const
+{
+  // here, use std_cxx11::bind to hand a function handler with the appropriate
+  // argument to the other loop function
+  std_cxx11::function<void (const MatrixFree<dim,Number> &,
+                            OutVector &,
+                            const InVector &,
+                            const std::pair<unsigned int,
+                            unsigned int> &)>
+  function = std_cxx11::bind<void>(function_pointer,
+                                   owning_class,
+                                   std_cxx11::_1,
+                                   std_cxx11::_2,
+                                   std_cxx11::_3,
+                                   std_cxx11::_4);
+  cell_loop (function, dst, src);
+}
+
+
+
+template <int dim, typename Number>
+template <typename CLASS, typename OutVector, typename InVector>
+inline
+void
+MatrixFree<dim,Number>::cell_loop
+(void(CLASS::*function_pointer)(const MatrixFree<dim,Number> &,
+                                OutVector &,
+                                const InVector &,
+                                const std::pair<unsigned int,
+                                unsigned int> &),
+ CLASS          *owning_class,
+ OutVector      &dst,
+ const InVector &src) const
+{
+  // here, use std_cxx11::bind to hand a function handler with the appropriate
+  // argument to the other loop function
+  std_cxx11::function<void (const MatrixFree<dim,Number> &,
+                            OutVector &,
+                            const InVector &,
+                            const std::pair<unsigned int,
+                            unsigned int> &)>
+  function = std_cxx11::bind<void>(function_pointer,
+                                   owning_class,
+                                   std_cxx11::_1,
+                                   std_cxx11::_2,
+                                   std_cxx11::_3,
+                                   std_cxx11::_4);
+  cell_loop (function, dst, src);
+}
+
+
+#endif  // ifndef DOXYGEN
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/matrix_free.templates.h b/include/deal.II/matrix_free/matrix_free.templates.h
new file mode 100644
index 0000000..cad7355
--- /dev/null
+++ b/include/deal.II/matrix_free/matrix_free.templates.h
@@ -0,0 +1,1078 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/polynomials_piecewise.h>
+#include <deal.II/base/mpi.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe_poly.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/distributed/tria.h>
+
+#include <deal.II/matrix_free/matrix_free.h>
+#include <deal.II/matrix_free/shape_info.templates.h>
+#include <deal.II/matrix_free/mapping_info.templates.h>
+#include <deal.II/matrix_free/dof_info.templates.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// --------------------- MatrixFree -----------------------------------
+
+template <int dim, typename Number>
+MatrixFree<dim, Number>::MatrixFree()
+  :
+  indices_are_initialized (false),
+  mapping_is_initialized  (false)
+{}
+
+
+
+template <int dim, typename Number>
+MatrixFree<dim,Number>::~MatrixFree()
+{}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::
+copy_from (const MatrixFree<dim,Number> &v)
+{
+  clear ();
+  dof_handlers = v.dof_handlers;
+  dof_info = v.dof_info;
+  constraint_pool_data = v.constraint_pool_data;
+  constraint_pool_row_index = v.constraint_pool_row_index;
+  mapping_info = v.mapping_info;
+  shape_info = v.shape_info;
+  cell_level_index = v.cell_level_index;
+  task_info = v.task_info;
+  size_info = v.size_info;
+  indices_are_initialized = v.indices_are_initialized;
+  mapping_is_initialized  = v.mapping_is_initialized;
+}
+
+
+
+namespace internal
+{
+  template <int dim>
+  void assert_communicator_equality (const dealii::Triangulation<dim> &tria,
+                                     const MPI_Comm                  &comm_mf)
+  {
+#ifdef DEAL_II_WITH_MPI
+    const parallel::distributed::Triangulation<dim> *dist_tria =
+      dynamic_cast<const parallel::distributed::Triangulation<dim>*>(&tria);
+    if (dist_tria != 0)
+      {
+        if (Utilities::MPI::job_supports_mpi())
+          {
+            int communicators_same = 0;
+            MPI_Comm_compare (dist_tria->get_communicator(), comm_mf,
+                              &communicators_same);
+            Assert (communicators_same == MPI_IDENT ||
+                    communicators_same == MPI_CONGRUENT,
+                    ExcMessage ("MPI communicator in parallel::distributed::Triangulation "
+                                "and matrix free class must be the same!"));
+          }
+      }
+#else
+    (void)tria;
+    (void)comm_mf;
+#endif
+  }
+}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::
+internal_reinit(const Mapping<dim>                          &mapping,
+                const std::vector<const DoFHandler<dim> *>  &dof_handler,
+                const std::vector<const ConstraintMatrix *> &constraint,
+                const std::vector<IndexSet>                 &locally_owned_set,
+                const std::vector<hp::QCollection<1> >      &quad,
+                const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+
+  // Reads out the FE information and stores the shape function values,
+  // gradients and Hessians for quadrature points.
+  {
+    const unsigned int n_fe   = dof_handler.size();
+    const unsigned int n_quad = quad.size();
+    shape_info.reinit (TableIndices<4>(n_fe, n_quad, 1, 1));
+    for (unsigned int no=0; no<n_fe; no++)
+      for (unsigned int nq =0; nq<n_quad; nq++)
+        {
+          AssertDimension (quad[nq].size(), 1);
+          shape_info(no,nq,0,0).reinit(quad[nq][0], dof_handler[no]->get_fe());
+        }
+  }
+
+  if (additional_data.initialize_indices == true)
+    {
+      clear();
+      Assert (dof_handler.size() > 0, ExcMessage("No DoFHandler is given."));
+      AssertDimension (dof_handler.size(), constraint.size());
+      AssertDimension (dof_handler.size(), locally_owned_set.size());
+
+      // set variables that are independent of FE
+      internal::assert_communicator_equality (dof_handler[0]->get_triangulation(),
+                                              additional_data.mpi_communicator);
+      size_info.communicator = additional_data.mpi_communicator;
+      if (Utilities::MPI::job_supports_mpi() == true)
+        {
+          size_info.my_pid  =
+            Utilities::MPI::this_mpi_process(size_info.communicator);
+          size_info.n_procs =
+            Utilities::MPI::n_mpi_processes(size_info.communicator);
+        }
+      else
+        {
+          size_info.my_pid = 0;
+          size_info.n_procs = 1;
+        }
+
+      initialize_dof_handlers (dof_handler, additional_data.level_mg_handler);
+      for (unsigned int no=0; no<dof_handler.size(); ++no)
+        dof_info[no].store_plain_indices = additional_data.store_plain_indices;
+
+      // initialize the basic multithreading information that needs to be
+      // passed to the DoFInfo structure
+#ifdef DEAL_II_WITH_THREADS
+      if (additional_data.tasks_parallel_scheme != AdditionalData::none &&
+          MultithreadInfo::n_threads() > 1)
+        {
+          task_info.use_multithreading = true;
+          task_info.block_size = additional_data.tasks_block_size;
+          task_info.use_partition_partition =
+            (additional_data.tasks_parallel_scheme ==
+             AdditionalData::partition_partition ? true : false);
+          task_info.use_coloring_only =
+            (additional_data.tasks_parallel_scheme ==
+             AdditionalData::color ? true : false);
+        }
+      else
+#endif
+        task_info.use_multithreading = false;
+
+      // set dof_indices together with constraint_indicator and
+      // constraint_pool_data. It also reorders the way cells are gone through
+      // (to separate cells with overlap to other processors from others
+      // without).
+      initialize_indices (constraint, locally_owned_set);
+    }
+
+  // initialize bare structures
+  else if (dof_info.size() != dof_handler.size())
+    {
+      initialize_dof_handlers(dof_handler, additional_data.level_mg_handler);
+      std::vector<unsigned int> dummy;
+      size_info.make_layout (cell_level_index.size(),
+                             VectorizedArray<Number>::n_array_elements,
+                             dummy, dummy);
+      for (unsigned int i=0; i<dof_info.size(); ++i)
+        {
+          dof_info[i].dimension    = dim;
+          dof_info[i].n_components = dof_handler[i]->get_fe().element_multiplicity(0);
+          dof_info[i].dofs_per_cell.push_back(dof_handler[i]->get_fe().dofs_per_cell);
+          dof_info[i].row_starts.resize(size_info.n_macro_cells+1);
+          dof_info[i].row_starts.back()[2] =
+            cell_level_index.size() % VectorizedArray<Number>::n_array_elements;
+
+          // if indices are not initialized, the cell_level_index might not be
+          // divisible by the vectorization length. But it must be for
+          // mapping_info...
+          while (cell_level_index.size() % VectorizedArray<Number>::n_array_elements
+                 != 0)
+            cell_level_index.push_back(cell_level_index.back());
+        }
+    }
+
+  // Evaluates transformations from unit to real cell, Jacobian determinants,
+  // quadrature points in real space, based on the ordering of the cells
+  // determined in @p extract_local_to_global_indices. The algorithm assumes
+  // that the active FE index for the transformations is given the active FE
+  // index in the zeroth DoFHandler. TODO: how do things look like in the more
+  // general case?
+  if (additional_data.initialize_mapping == true)
+    {
+      mapping_info.initialize (dof_handler[0]->get_triangulation(), cell_level_index,
+                               dof_info[0].cell_active_fe_index, mapping, quad,
+                               additional_data.mapping_update_flags);
+
+      mapping_is_initialized = true;
+    }
+}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::
+internal_reinit(const Mapping<dim>                            &mapping,
+                const std::vector<const hp::DoFHandler<dim>*> &dof_handler,
+                const std::vector<const ConstraintMatrix *>    &constraint,
+                const std::vector<IndexSet>                   &locally_owned_set,
+                const std::vector<hp::QCollection<1> >        &quad,
+                const typename MatrixFree<dim,Number>::AdditionalData additional_data)
+{
+  // Reads out the FE information and stores the shape function values,
+  // gradients and Hessians for quadrature points.
+  {
+    const unsigned int n_components = dof_handler.size();
+    const unsigned int n_quad       = quad.size();
+    unsigned int n_fe_in_collection = 0;
+    for (unsigned int i=0; i<n_components; ++i)
+      n_fe_in_collection = std::max (n_fe_in_collection,
+                                     dof_handler[i]->get_fe().size());
+    unsigned int n_quad_in_collection = 0;
+    for (unsigned int q=0; q<n_quad; ++q)
+      n_quad_in_collection = std::max (n_quad_in_collection, quad[q].size());
+    shape_info.reinit (TableIndices<4>(n_components, n_quad,
+                                       n_fe_in_collection,
+                                       n_quad_in_collection));
+    for (unsigned int no=0; no<n_components; no++)
+      for (unsigned int fe_no=0; fe_no<dof_handler[no]->get_fe().size(); ++fe_no)
+        for (unsigned int nq =0; nq<n_quad; nq++)
+          for (unsigned int q_no=0; q_no<quad[nq].size(); ++q_no)
+            shape_info(no,nq,fe_no,q_no).reinit (quad[nq][q_no],
+                                                 dof_handler[no]->get_fe()[fe_no]);
+  }
+
+  if (additional_data.initialize_indices == true)
+    {
+      clear();
+      Assert (dof_handler.size() > 0, ExcMessage("No DoFHandler is given."));
+      AssertDimension (dof_handler.size(), constraint.size());
+      AssertDimension (dof_handler.size(), locally_owned_set.size());
+
+      // set variables that are independent of FE
+      internal::assert_communicator_equality (dof_handler[0]->get_triangulation(),
+                                              additional_data.mpi_communicator);
+      size_info.communicator = additional_data.mpi_communicator;
+      if (Utilities::MPI::job_supports_mpi() == true)
+        {
+          size_info.my_pid  =
+            Utilities::MPI::this_mpi_process(size_info.communicator);
+          size_info.n_procs =
+            Utilities::MPI::n_mpi_processes(size_info.communicator);
+        }
+      else
+        {
+          size_info.my_pid = 0;
+          size_info.n_procs = 1;
+        }
+
+      initialize_dof_handlers (dof_handler, additional_data.level_mg_handler);
+      for (unsigned int no=0; no<dof_handler.size(); ++no)
+        dof_info[no].store_plain_indices = additional_data.store_plain_indices;
+
+      // initialize the basic multithreading information that needs to be
+      // passed to the DoFInfo structure
+#ifdef DEAL_II_WITH_THREADS
+      if (additional_data.tasks_parallel_scheme != AdditionalData::none &&
+          MultithreadInfo::n_threads() > 1)
+        {
+          task_info.use_multithreading = true;
+          task_info.block_size = additional_data.tasks_block_size;
+          task_info.use_partition_partition =
+            (additional_data.tasks_parallel_scheme ==
+             AdditionalData::partition_partition ? true : false);
+          task_info.use_coloring_only =
+            (additional_data.tasks_parallel_scheme ==
+             AdditionalData::color ? true : false);
+        }
+      else
+#endif
+        task_info.use_multithreading = false;
+
+      // set dof_indices together with constraint_indicator and
+      // constraint_pool_data. It also reorders the way cells are gone through
+      // (to separate cells with overlap to other processors from others
+      // without).
+      initialize_indices (constraint, locally_owned_set);
+    }
+
+  // initialize bare structures
+  else if (dof_info.size() != dof_handler.size())
+    {
+      initialize_dof_handlers(dof_handler, additional_data.level_mg_handler);
+      std::vector<unsigned int> dummy;
+      size_info.make_layout (cell_level_index.size(),
+                             VectorizedArray<Number>::n_array_elements,
+                             dummy, dummy);
+      for (unsigned int i=0; i<dof_info.size(); ++i)
+        {
+          Assert(dof_handler[i]->get_fe().size() == 1, ExcNotImplemented());
+          dof_info[i].dimension    = dim;
+          dof_info[i].n_components = dof_handler[i]->get_fe()[0].element_multiplicity(0);
+          dof_info[i].dofs_per_cell.push_back(dof_handler[i]->get_fe()[0].dofs_per_cell);
+          dof_info[i].row_starts.resize(size_info.n_macro_cells+1);
+          dof_info[i].row_starts.back()[2] =
+            cell_level_index.size() % VectorizedArray<Number>::n_array_elements;
+
+          // if indices are not initialized, the cell_level_index might not be
+          // divisible by the vectorization length. But it must be for
+          // mapping_info...
+          while (cell_level_index.size() % VectorizedArray<Number>::n_array_elements
+                 != 0)
+            cell_level_index.push_back(cell_level_index.back());
+        }
+    }
+
+  // Evaluates transformations from unit to real cell, Jacobian determinants,
+  // quadrature points in real space, based on the ordering of the cells
+  // determined in @p extract_local_to_global_indices.
+  if (additional_data.initialize_mapping == true)
+    {
+      mapping_info.initialize (dof_handler[0]->get_triangulation(), cell_level_index,
+                               dof_info[0].cell_active_fe_index, mapping, quad,
+                               additional_data.mapping_update_flags);
+
+      mapping_is_initialized = true;
+    }
+}
+
+
+
+namespace internal
+{
+
+  // steps through all children and adds the
+  // active cells recursively
+  template <typename InIterator>
+  void resolve_cell (const InIterator   &cell,
+                     std::vector<std::pair<unsigned int,unsigned int> > &cell_its,
+                     const unsigned int  subdomain_id)
+  {
+    if (cell->has_children())
+      for (unsigned int child=0; child<cell->n_children(); ++child)
+        resolve_cell (cell->child(child), cell_its,
+                      subdomain_id);
+    else if (cell->subdomain_id() == subdomain_id)
+      {
+        Assert (cell->active(), ExcInternalError());
+        cell_its.push_back (std::pair<unsigned int,unsigned int>
+                            (cell->level(), cell->index()));
+      }
+  }
+}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::
+initialize_dof_handlers (const std::vector<const DoFHandler<dim>*> &dof_handler,
+                         const unsigned int level)
+{
+  dof_handlers.active_dof_handler = DoFHandlers::usual;
+  dof_handlers.level = level;
+  dof_handlers.n_dof_handlers = dof_handler.size();
+  dof_handlers.dof_handler.resize (dof_handlers.n_dof_handlers);
+  for (unsigned int no=0; no<dof_handlers.n_dof_handlers; ++no)
+    dof_handlers.dof_handler[no] = dof_handler[no];
+
+  dof_info.resize (dof_handlers.n_dof_handlers);
+
+  // go through cells on zeroth level and then successively step down into
+  // children. This gives a z-ordering of the cells, which is beneficial when
+  // setting up neighboring relations between cells for thread parallelization
+  const unsigned int n_mpi_procs = size_info.n_procs;
+  const unsigned int my_pid = size_info.my_pid;
+
+  const Triangulation<dim> &tria = dof_handlers.dof_handler[0]->get_triangulation();
+  if (level == numbers::invalid_unsigned_int)
+    {
+      if (n_mpi_procs == 1)
+        cell_level_index.reserve (tria.n_active_cells());
+      typename Triangulation<dim>::cell_iterator cell = tria.begin(0),
+                                                 end_cell = tria.end(0);
+      for ( ; cell != end_cell; ++cell)
+        internal::resolve_cell (cell, cell_level_index, my_pid);
+    }
+  else
+    {
+      AssertIndexRange (level, tria.n_global_levels());
+      if (level < tria.n_levels())
+        {
+          cell_level_index.reserve (tria.n_cells(level));
+          typename Triangulation<dim>::cell_iterator cell = tria.begin(level),
+                                                     end_cell = tria.end(level);
+          for ( ; cell != end_cell; ++cell)
+            if (cell->level_subdomain_id() == my_pid)
+              cell_level_index.push_back (std::pair<unsigned int,unsigned int>
+                                          (cell->level(), cell->index()));
+        }
+    }
+}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::
+initialize_dof_handlers (const std::vector<const hp::DoFHandler<dim>*> &dof_handler,
+                         const unsigned int)
+{
+  dof_handlers.active_dof_handler = DoFHandlers::hp;
+  dof_handlers.n_dof_handlers = dof_handler.size();
+  dof_handlers.hp_dof_handler.resize (dof_handlers.n_dof_handlers);
+  for (unsigned int no=0; no<dof_handlers.n_dof_handlers; ++no)
+    dof_handlers.hp_dof_handler[no] = dof_handler[no];
+
+  dof_info.resize (dof_handlers.n_dof_handlers);
+
+  // go through cells on zeroth level and then successively step down into
+  // children. This gives a z-ordering of the cells, which is beneficial when
+  // setting up neighboring relations between cells for thread parallelization
+  const unsigned int n_mpi_procs = size_info.n_procs;
+  const unsigned int my_pid = size_info.my_pid;
+
+  // if we have no level given, use the same as for the standard DoFHandler,
+  // otherwise we must loop through the respective level
+  const Triangulation<dim> &tria = dof_handler[0]->get_triangulation();
+
+  if (n_mpi_procs == 1)
+    {
+      cell_level_index.reserve (tria.n_active_cells());
+    }
+  typename hp::DoFHandler<dim>::cell_iterator cell = dof_handler[0]->begin(0),
+                                              end_cell = dof_handler[0]->end(0);
+  for ( ; cell != end_cell; ++cell)
+    internal::resolve_cell (cell, cell_level_index,
+                            my_pid);
+}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::initialize_indices
+(const std::vector<const ConstraintMatrix *> &constraint,
+ const std::vector<IndexSet>                 &locally_owned_set)
+{
+  const unsigned int n_fe = dof_handlers.n_dof_handlers;
+  const unsigned int n_active_cells = cell_level_index.size();
+
+  AssertDimension (n_active_cells, cell_level_index.size());
+  AssertDimension (n_fe, locally_owned_set.size());
+  AssertDimension (n_fe, constraint.size());
+
+  std::vector<types::global_dof_index> local_dof_indices;
+  std::vector<std::vector<std::vector<unsigned int> > > lexicographic_inv(n_fe);
+
+  internal::MatrixFreeFunctions::ConstraintValues<double> constraint_values;
+  std::vector<unsigned int> constraint_indices;
+
+  for (unsigned int no=0; no<n_fe; ++no)
+    {
+      std::vector<const FiniteElement<dim>*> fes;
+      if (dof_handlers.active_dof_handler == DoFHandlers::hp)
+        {
+          const hp::DoFHandler<dim> *hpdof = dof_handlers.hp_dof_handler[no];
+          const hp::FECollection<dim> &fe = hpdof->get_fe();
+          for (unsigned int f=0; f<fe.size(); ++f)
+            fes.push_back (&fe[f]);
+
+          dof_info[no].max_fe_index = fe.size();
+          dof_info[no].fe_index_conversion.resize (fe.size());
+          for (unsigned int ind=0; ind<hpdof->get_fe().size(); ++ind)
+            dof_info[no].fe_index_conversion[ind] =
+              std::pair<unsigned int,unsigned int>(fe[ind].degree,
+                                                   fe[ind].dofs_per_cell);
+          if (fe.size() > 1)
+            dof_info[no].cell_active_fe_index.resize(n_active_cells,
+                                                     numbers::invalid_unsigned_int);
+        }
+      else
+        {
+          const DoFHandler<dim> *dofh =&*dof_handlers.dof_handler[no];
+          fes.push_back (&dofh->get_fe());
+          dof_info[no].max_fe_index = 1;
+          dof_info[no].fe_index_conversion.resize (1);
+          dof_info[no].fe_index_conversion[0] =
+            std::pair<unsigned int,unsigned int>(fes.back()->degree,
+                                                 fes.back()->dofs_per_cell);
+        }
+
+      for (unsigned int fe_index = 0; fe_index<fes.size(); ++fe_index)
+        {
+          const FiniteElement<dim> &fe = *fes[fe_index];
+          Assert (fe.n_base_elements() == 1,
+                  ExcMessage ("MatrixFree currently only works for DoFHandler with one base element"));
+          const unsigned int n_fe_components = fe.element_multiplicity (0);
+
+          // cache number of finite elements and dofs_per_cell
+          dof_info[no].dofs_per_cell.push_back (fe.dofs_per_cell);
+          dof_info[no].dofs_per_face.push_back (fe.dofs_per_face);
+          dof_info[no].dimension    = dim;
+          dof_info[no].n_components = n_fe_components;
+
+          AssertDimension (shape_info(no,0,fe_index,0).lexicographic_numbering.size(),
+                           dof_info[no].dofs_per_cell[fe_index]);
+        }
+
+      // set locally owned range for each component
+      Assert (locally_owned_set[no].is_contiguous(), ExcNotImplemented());
+      dof_info[no].vector_partitioner.reset
+      (new Utilities::MPI::Partitioner(locally_owned_set[no], size_info.communicator));
+
+      // initialize the arrays for indices
+      dof_info[no].row_starts.resize (n_active_cells+1);
+      dof_info[no].row_starts[0][0] = 0;
+      dof_info[no].row_starts[0][1] = 0;
+      dof_info[no].row_starts[0][2] = 0;
+      dof_info[no].dof_indices.reserve
+      ((n_active_cells*dof_info[no].dofs_per_cell[0]*3)/2);
+
+      // cache the constrained indices for use in matrix-vector products
+      {
+        const types::global_dof_index
+        start_index = dof_info[no].vector_partitioner->local_range().first,
+        end_index   = dof_info[no].vector_partitioner->local_range().second;
+        for (types::global_dof_index i=start_index; i<end_index; ++i)
+          if (constraint[no]->is_constrained(i)==true)
+            dof_info[no].constrained_dofs.
+            push_back(static_cast<unsigned int>(i-start_index));
+      }
+    }
+
+  // extract all the global indices associated with the computation, and form
+  // the ghost indices
+  std::vector<unsigned int> boundary_cells;
+  for (unsigned int counter = 0 ; counter < n_active_cells ; ++counter)
+    {
+      bool cell_at_boundary = false;
+      for (unsigned int no=0; no<n_fe; ++no)
+        {
+          // OK, read indices from standard DoFHandler in the usual way
+          if (dof_handlers.active_dof_handler == DoFHandlers::usual &&
+              dof_handlers.level == numbers::invalid_unsigned_int)
+            {
+              const DoFHandler<dim> *dofh = &*dof_handlers.dof_handler[no];
+              typename DoFHandler<dim>::active_cell_iterator
+              cell_it (&dofh->get_triangulation(),
+                       cell_level_index[counter].first,
+                       cell_level_index[counter].second,
+                       dofh);
+              local_dof_indices.resize (dof_info[no].dofs_per_cell[0]);
+              cell_it->get_dof_indices(local_dof_indices);
+              dof_info[no].read_dof_indices (local_dof_indices,
+                                             shape_info(no,0,0,0).lexicographic_numbering,
+                                             *constraint[no], counter,
+                                             constraint_values,
+                                             cell_at_boundary);
+            }
+          // ok, now we are requested to use a level in a MG DoFHandler
+          else if (dof_handlers.active_dof_handler == DoFHandlers::usual &&
+                   dof_handlers.level != numbers::invalid_unsigned_int)
+            {
+              const DoFHandler<dim> *dofh = dof_handlers.dof_handler[no];
+              AssertIndexRange (dof_handlers.level, dofh->get_triangulation().n_levels());
+              typename DoFHandler<dim>::cell_iterator
+              cell_it (&dofh->get_triangulation(),
+                       cell_level_index[counter].first,
+                       cell_level_index[counter].second,
+                       dofh);
+              local_dof_indices.resize (dof_info[no].dofs_per_cell[0]);
+              cell_it->get_mg_dof_indices(local_dof_indices);
+              dof_info[no].read_dof_indices (local_dof_indices,
+                                             shape_info(no,0,0,0).lexicographic_numbering,
+                                             *constraint[no], counter,
+                                             constraint_values,
+                                             cell_at_boundary);
+            }
+          else if (dof_handlers.active_dof_handler == DoFHandlers::hp)
+            {
+              const hp::DoFHandler<dim> *dofh =
+                dof_handlers.hp_dof_handler[no];
+              typename hp::DoFHandler<dim>::active_cell_iterator
+              cell_it (&dofh->get_triangulation(),
+                       cell_level_index[counter].first,
+                       cell_level_index[counter].second,
+                       dofh);
+              if (dofh->get_fe().size() > 1)
+                dof_info[no].cell_active_fe_index[counter] =
+                  cell_it->active_fe_index();
+              local_dof_indices.resize (cell_it->get_fe().dofs_per_cell);
+              cell_it->get_dof_indices(local_dof_indices);
+              dof_info[no].read_dof_indices (local_dof_indices,
+                                             shape_info(no,0,cell_it->active_fe_index(),0).lexicographic_numbering,
+                                             *constraint[no], counter,
+                                             constraint_values,
+                                             cell_at_boundary);
+            }
+          else
+            {
+              Assert (false, ExcNotImplemented());
+            }
+        }
+
+      // if we found dofs on some FE component that belong to other
+      // processors, the cell is added to the boundary cells.
+      if (cell_at_boundary == true)
+        boundary_cells.push_back(counter);
+    }
+
+  const unsigned int vectorization_length =
+    VectorizedArray<Number>::n_array_elements;
+  std::vector<unsigned int> irregular_cells;
+  size_info.make_layout (n_active_cells, vectorization_length, boundary_cells,
+                         irregular_cells);
+
+  for (unsigned int no=0; no<n_fe; ++no)
+    dof_info[no].assign_ghosts (boundary_cells);
+
+  // reorganize the indices in order to overlap communication in MPI with
+  // computations: Place all cells with ghost indices into one chunk. Also
+  // reorder cells so that we can parallelize by threads
+  std::vector<unsigned int> renumbering;
+  if (task_info.use_multithreading == true)
+    {
+      dof_info[0].compute_renumber_parallel (boundary_cells, size_info,
+                                             renumbering);
+      if (task_info.use_partition_partition == true)
+        dof_info[0].make_thread_graph_partition_partition
+        (size_info, task_info, renumbering, irregular_cells,
+         dof_handlers.active_dof_handler == DoFHandlers::hp);
+      else
+        dof_info[0].make_thread_graph_partition_color
+        (size_info, task_info, renumbering, irregular_cells,
+         dof_handlers.active_dof_handler == DoFHandlers::hp);
+    }
+  else
+    {
+      // In case, we have an hp-dofhandler, we have to reorder the cell
+      // according to the polynomial degree on the cell.
+      dof_info[0].compute_renumber_serial (boundary_cells, size_info,
+                                           renumbering);
+      if (dof_handlers.active_dof_handler == DoFHandlers::hp)
+        dof_info[0].compute_renumber_hp_serial (size_info, renumbering,
+                                                irregular_cells);
+    }
+
+  // Finally perform the renumbering. We also want to group several cells
+  // together to one "macro-cell" for vectorization (where the arithmetic
+  // operations will then be done simultaneously).
+#ifdef DEBUG
+  {
+    std::vector<unsigned int> sorted_renumbering (renumbering);
+    std::sort (sorted_renumbering.begin(), sorted_renumbering.end());
+    for (unsigned int i=0; i<sorted_renumbering.size(); ++i)
+      Assert (sorted_renumbering[i] == i, ExcInternalError());
+  }
+#endif
+  {
+    std::vector<std::pair<unsigned int,unsigned int> >
+    cell_level_index_old;
+    cell_level_index.swap (cell_level_index_old);
+    cell_level_index.reserve(size_info.n_macro_cells*vectorization_length);
+    unsigned int position_cell=0;
+    for (unsigned int i=0; i<size_info.n_macro_cells; ++i)
+      {
+        unsigned int n_comp = (irregular_cells[i]>0)?
+                              irregular_cells[i] : vectorization_length;
+        for (unsigned int j=0; j<n_comp; ++j)
+          cell_level_index.push_back
+          (cell_level_index_old[renumbering[position_cell+j]]);
+
+        // generate a cell and level index also when we have not filled up
+        // vectorization_length cells. This is needed for MappingInfo when the
+        // transformation data is initialized. We just set the value to the
+        // last valid cell in that case.
+        for (unsigned int j=n_comp; j<vectorization_length; ++j)
+          cell_level_index.push_back
+          (cell_level_index_old[renumbering[position_cell+n_comp-1]]);
+        position_cell += n_comp;
+      }
+    AssertDimension (position_cell, size_info.n_active_cells);
+    AssertDimension (cell_level_index.size(),size_info.n_macro_cells*vectorization_length);
+  }
+
+  // set constraint pool from the std::map and reorder the indices
+  typename std::map<std::vector<double>, types::global_dof_index,
+           internal::MatrixFreeFunctions::FPArrayComparator<double> >::iterator
+           it = constraint_values.constraints.begin(),
+           end = constraint_values.constraints.end();
+  std::vector<const std::vector<double>*>
+  constraints (constraint_values.constraints.size());
+  unsigned int length = 0;
+  for ( ; it != end; ++it)
+    {
+      AssertIndexRange(it->second, constraints.size());
+      constraints[it->second] = &it->first;
+      length += it->first.size();
+    }
+  constraint_pool_data.clear();
+  constraint_pool_data.reserve (length);
+  constraint_pool_row_index.reserve(constraint_values.constraints.size()+1);
+  constraint_pool_row_index.resize(1, 0);
+  for (unsigned int i=0; i<constraints.size(); ++i)
+    {
+      Assert(constraints[i] != 0, ExcInternalError());
+      constraint_pool_data.insert(constraint_pool_data.end(),
+                                  constraints[i]->begin(),
+                                  constraints[i]->end());
+      constraint_pool_row_index.push_back(constraint_pool_data.size());
+    }
+  AssertDimension(constraint_pool_data.size(), length);
+  for (unsigned int no=0; no<n_fe; ++no)
+    dof_info[no].reorder_cells(size_info, renumbering,
+                               constraint_pool_row_index,
+                               irregular_cells, vectorization_length);
+
+  indices_are_initialized = true;
+}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::clear()
+{
+  dof_info.clear();
+  mapping_info.clear();
+  cell_level_index.clear();
+  size_info.clear();
+  task_info.clear();
+  dof_handlers.dof_handler.clear();
+  dof_handlers.hp_dof_handler.clear();
+  indices_are_initialized = false;
+  mapping_is_initialized  = false;
+}
+
+
+
+template <int dim, typename Number>
+std::size_t MatrixFree<dim,Number>::memory_consumption () const
+{
+  std::size_t memory = MemoryConsumption::memory_consumption (dof_info);
+  memory += MemoryConsumption::memory_consumption (cell_level_index);
+  memory += MemoryConsumption::memory_consumption (shape_info);
+  memory += MemoryConsumption::memory_consumption (constraint_pool_data);
+  memory += MemoryConsumption::memory_consumption (constraint_pool_row_index);
+  memory += MemoryConsumption::memory_consumption (task_info);
+  memory += sizeof(*this);
+  memory += mapping_info.memory_consumption();
+  return memory;
+}
+
+
+template <int dim, typename Number>
+template <typename StreamType>
+void MatrixFree<dim,Number>::print_memory_consumption (StreamType &out) const
+{
+  out << "  Memory cell FE operator total: --> ";
+  size_info.print_memory_statistics (out, memory_consumption());
+  out << "   Memory cell index:                ";
+  size_info.print_memory_statistics
+  (out, MemoryConsumption::memory_consumption (cell_level_index));
+  for (unsigned int j=0; j<dof_info.size(); ++ j)
+    {
+      out << "   Memory DoFInfo component "<< j << std::endl;
+      dof_info[j].print_memory_consumption(out, size_info);
+    }
+
+  out << "   Memory mapping info" << std::endl;
+  mapping_info.print_memory_consumption(out, size_info);
+
+  out << "   Memory unit cell shape data:      ";
+  size_info.print_memory_statistics
+  (out, MemoryConsumption::memory_consumption (shape_info));
+  if (task_info.use_multithreading == true)
+    {
+      out << "   Memory task partitioning info:    ";
+      size_info.print_memory_statistics
+      (out, MemoryConsumption::memory_consumption (task_info));
+    }
+}
+
+
+
+template <int dim, typename Number>
+void MatrixFree<dim,Number>::print (std::ostream &out) const
+{
+  // print indices local to global
+  for (unsigned int no=0; no<dof_info.size(); ++no)
+    {
+      out << "\n-- Index data for component " << no << " --" << std::endl;
+      dof_info[no].print (constraint_pool_data, constraint_pool_row_index, out);
+      out << std::endl;
+    }
+}
+
+
+
+/*-------------------- Implementation of helper functions ------------------*/
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+
+    TaskInfo::TaskInfo ()
+    {
+      clear();
+    }
+
+
+
+    void TaskInfo::clear ()
+    {
+      block_size = 0;
+      n_blocks = 0;
+      block_size_last = 0;
+      position_short_block = 0;
+      use_multithreading = false;
+      use_partition_partition = false;
+      use_coloring_only = false;
+      partition_color_blocks_row_index.clear();
+      partition_color_blocks_data.clear();
+      evens = 0;
+      odds = 0;
+      n_blocked_workers = 0;
+      n_workers = 0;
+      partition_evens.clear();
+      partition_odds.clear();
+      partition_n_blocked_workers.clear();
+      partition_n_workers.clear();
+    }
+
+
+
+    std::size_t
+    TaskInfo::memory_consumption () const
+    {
+      return (sizeof(*this)+
+              MemoryConsumption::memory_consumption (partition_color_blocks_row_index) +
+              MemoryConsumption::memory_consumption (partition_color_blocks_data)+
+              MemoryConsumption::memory_consumption (partition_evens) +
+              MemoryConsumption::memory_consumption (partition_odds) +
+              MemoryConsumption::memory_consumption (partition_n_blocked_workers) +
+              MemoryConsumption::memory_consumption (partition_n_workers));
+    }
+
+
+
+    SizeInfo::SizeInfo ()
+    {
+      clear();
+    }
+
+
+
+    void SizeInfo::clear()
+    {
+      n_active_cells = 0;
+      n_macro_cells  = 0;
+      boundary_cells_start = 0;
+      boundary_cells_end   = 0;
+      vectorization_length = 0;
+      locally_owned_cells  = IndexSet();
+      ghost_cells = IndexSet();
+      communicator = MPI_COMM_SELF;
+      my_pid = 0;
+      n_procs = 0;
+    }
+
+
+
+    template <typename StreamType>
+    void SizeInfo::print_memory_statistics (StreamType &out,
+                                            std::size_t data_length) const
+    {
+      Utilities::MPI::MinMaxAvg memory_c
+        = Utilities::MPI::min_max_avg (1e-6*data_length, communicator);
+      if (n_procs < 2)
+        out << memory_c.min;
+      else
+        out << memory_c.min << "/" << memory_c.avg << "/" << memory_c.max;
+      out << " MB" << std::endl;
+    }
+
+
+
+    inline
+    void SizeInfo::make_layout (const unsigned int n_active_cells_in,
+                                const unsigned int vectorization_length_in,
+                                std::vector<unsigned int> &boundary_cells,
+                                std::vector<unsigned int> &irregular_cells)
+    {
+      vectorization_length = vectorization_length_in;
+      n_active_cells = n_active_cells_in;
+
+      unsigned int n_max_boundary_cells = boundary_cells.size();
+      unsigned int n_boundary_cells = n_max_boundary_cells;
+
+      // try to make the number of boundary cells divisible by the number of
+      // vectors in vectorization
+
+      /*
+      // try to balance the number of cells before and after the boundary part
+      // on each processor. probably not worth it!
+      #ifdef DEAL_II_WITH_MPI
+      MPI_Allreduce (&n_boundary_cells, &n_max_boundary_cells, 1, MPI_UNSIGNED,
+                     MPI_MAX, size_info.communicator);
+      #endif
+      if (n_max_boundary_cells > n_active_cells)
+        n_max_boundary_cells = n_active_cells;
+      */
+
+      unsigned int fillup_needed =
+        (vectorization_length - n_boundary_cells%vectorization_length)%vectorization_length;
+      if (fillup_needed > 0 && n_boundary_cells < n_active_cells)
+        {
+          // fill additional cells into the list of boundary cells to get a
+          // balanced number. Go through the indices successively until we
+          // found enough indices
+          std::vector<unsigned int> new_boundary_cells;
+          new_boundary_cells.reserve (n_max_boundary_cells);
+
+          unsigned int next_free_slot = 0, bound_index = 0;
+          while (fillup_needed > 0 && bound_index < boundary_cells.size())
+            {
+              if (next_free_slot < boundary_cells[bound_index])
+                {
+                  // check if there are enough cells to fill with in the
+                  // current slot
+                  if (next_free_slot + fillup_needed <= boundary_cells[bound_index])
+                    {
+                      for (unsigned int j=boundary_cells[bound_index]-fillup_needed;
+                           j < boundary_cells[bound_index]; ++j)
+                        new_boundary_cells.push_back(j);
+                      fillup_needed = 0;
+                    }
+                  // ok, not enough indices, so just take them all up to the
+                  // next boundary cell
+                  else
+                    {
+                      for (unsigned int j=next_free_slot;
+                           j<boundary_cells[bound_index]; ++j)
+                        new_boundary_cells.push_back(j);
+                      fillup_needed -= boundary_cells[bound_index]-next_free_slot;
+                    }
+                }
+              new_boundary_cells.push_back(boundary_cells[bound_index]);
+              next_free_slot = boundary_cells[bound_index]+1;
+              ++bound_index;
+            }
+          while (fillup_needed > 0 && (new_boundary_cells.size()==0 ||
+                                       new_boundary_cells.back()<n_active_cells-1))
+            new_boundary_cells.push_back(new_boundary_cells.back()+1);
+          while (bound_index<boundary_cells.size())
+            new_boundary_cells.push_back(boundary_cells[bound_index++]);
+
+          boundary_cells.swap(new_boundary_cells);
+        }
+
+      // set the number of cells
+      std::sort (boundary_cells.begin(), boundary_cells.end());
+      n_boundary_cells = boundary_cells.size();
+
+      // check that number of boundary cells is divisible by
+      // vectorization_length or that it contains all cells
+      Assert (n_boundary_cells % vectorization_length == 0 ||
+              n_boundary_cells == n_active_cells, ExcInternalError());
+      n_macro_cells = (n_active_cells+vectorization_length-1)/vectorization_length;
+      irregular_cells.resize (n_macro_cells);
+      if (n_macro_cells*vectorization_length > n_active_cells)
+        {
+          irregular_cells[n_macro_cells-1] =
+            vectorization_length - (n_macro_cells*vectorization_length - n_active_cells);
+        }
+      if (n_procs > 1)
+        {
+          const unsigned int n_macro_boundary_cells =
+            (n_boundary_cells+vectorization_length-1)/vectorization_length;
+          boundary_cells_start = (n_macro_cells-n_macro_boundary_cells)/2;
+          boundary_cells_end   = boundary_cells_start + n_macro_boundary_cells;
+        }
+      else
+        boundary_cells_start = boundary_cells_end = n_macro_cells;
+    }
+
+
+
+    /* ------------------------------------------------------------------ */
+
+    template <typename Number>
+    FPArrayComparator<Number>::FPArrayComparator (const Number scaling)
+      :
+      tolerance (scaling * std::numeric_limits<double>::epsilon() * 1024.)
+    {}
+
+
+
+    template <typename Number>
+    bool
+    FPArrayComparator<Number>::operator() (const std::vector<Number> &v1,
+                                           const std::vector<Number> &v2) const
+    {
+      const unsigned int s1 = v1.size(), s2 = v2.size();
+      if (s1 < s2)
+        return true;
+      else if (s1 > s2)
+        return false;
+      else
+        for (unsigned int i=0; i<s1; ++i)
+          if (v1[i] < v2[i] - tolerance)
+            return true;
+          else if (v1[i] > v2[i] + tolerance)
+            return false;
+      return false;
+    }
+
+
+
+    template <typename Number>
+    template <int dim>
+    bool
+    FPArrayComparator<Number>::
+    operator ()(const Tensor<1,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t1,
+                const Tensor<1,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t2) const
+    {
+      for (unsigned int d=0; d<dim; ++d)
+        for (unsigned int k=0; k<VectorizedArray<Number>::n_array_elements; ++k)
+          if ((t1)[d][k] < (t2)[d][k] - tolerance)
+            return true;
+          else if ((t1)[d][k] > (t2)[d][k] + tolerance)
+            return false;
+      return false;
+    }
+
+
+
+    template <typename Number>
+    template <int dim>
+    bool
+    FPArrayComparator<Number>::
+    operator ()(const Tensor<2,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t1,
+                const Tensor<2,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t2) const
+    {
+      for (unsigned int d=0; d<dim; ++d)
+        for (unsigned int e=0; e<dim; ++e)
+          for (unsigned int k=0; k<VectorizedArray<Number>::n_array_elements; ++k)
+            if ((t1)[d][e][k] < (t2)[d][e][k] - tolerance)
+              return true;
+            else if ((t1)[d][e][k] > (t2)[d][e][k] + tolerance)
+              return false;
+      return false;
+    }
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/matrix_free/operators.h b/include/deal.II/matrix_free/operators.h
new file mode 100644
index 0000000..f1c2e02
--- /dev/null
+++ b/include/deal.II/matrix_free/operators.h
@@ -0,0 +1,213 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_operators_h
+#define dealii__matrix_free_operators_h
+
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/vectorization.h>
+
+#include <deal.II/matrix_free/fe_evaluation.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace MatrixFreeOperators
+{
+  /**
+   * This class implements the operation of the action of the inverse of a
+   * mass matrix on an element for the special case of an evaluation object
+   * with as many quadrature points as there are cell degrees of freedom. It
+   * uses algorithms from FEEvaluation and produces the exact mass matrix for
+   * DGQ elements. This algorithm uses tensor products of inverse 1D shape
+   * matrices over quadrature points, so the inverse operation is exactly as
+   * expensive as applying the forward operator on each cell. Of course, for
+   * continuous finite elements this operation does not produce the inverse of
+   * a mass operation as the coupling between the elements cannot be
+   * considered by this operation.
+   *
+   * The equation may contain variable coefficients, so the user is required
+   * to provide an array for the inverse of the local coefficient (this class
+   * provide a helper method 'fill_inverse_JxW_values' to get the inverse of a
+   * constant-coefficient operator).
+   *
+   * @author Martin Kronbichler, 2014
+   */
+  template <int dim, int fe_degree, int n_components = 1, typename Number = double>
+  class CellwiseInverseMassMatrix
+  {
+  public:
+    /**
+     * Constructor. Initializes the shape information from the ShapeInfo field
+     * in the class FEEval.
+     */
+    CellwiseInverseMassMatrix (const FEEvaluationBase<dim,n_components,Number> &fe_eval);
+
+    /**
+     * Applies the inverse mass matrix operation on an input array. It is
+     * assumed that the passed input and output arrays are of correct size,
+     * namely FEEval::dofs_per_cell * n_components long. The inverse of the
+     * local coefficient (also containing the inverse JxW values) must be
+     * passed as first argument. Passing more than one component in the
+     * coefficient is allowed.
+     */
+    void apply(const AlignedVector<VectorizedArray<Number> > &inverse_coefficient,
+               const unsigned int             n_actual_components,
+               const VectorizedArray<Number> *in_array,
+               VectorizedArray<Number>       *out_array) const;
+
+    /**
+     * Fills the given array with the inverse of the JxW values, i.e., a mass
+     * matrix with coefficient 1. Non-unit coefficients must be multiplied (in
+     * inverse form) to this array.
+     */
+    void fill_inverse_JxW_values(AlignedVector<VectorizedArray<Number> > &inverse_jxw) const;
+
+  private:
+    /**
+     * A reference to the FEEvaluation object for getting the JxW_values.
+     */
+    const FEEvaluationBase<dim,n_components,Number> &fe_eval;
+
+    /**
+     * A structure to hold inverse shape functions
+     */
+    AlignedVector<VectorizedArray<Number> > inverse_shape;
+  };
+
+
+
+  // ------------------------------------ inline functions ---------------------
+
+  template <int dim, int fe_degree, int n_components, typename Number>
+  inline
+  CellwiseInverseMassMatrix<dim,fe_degree,n_components,Number>
+  ::CellwiseInverseMassMatrix (const FEEvaluationBase<dim,n_components,Number> &fe_eval)
+    :
+    fe_eval (fe_eval)
+  {
+    FullMatrix<double> shapes_1d(fe_degree+1, fe_degree+1);
+    for (unsigned int i=0, c=0; i<shapes_1d.m(); ++i)
+      for (unsigned int j=0; j<shapes_1d.n(); ++j, ++c)
+        shapes_1d(i,j) = fe_eval.get_shape_info().shape_values_number[c];
+    shapes_1d.gauss_jordan();
+    const unsigned int stride = (fe_degree+2)/2;
+    inverse_shape.resize(stride*(fe_degree+1));
+    for (unsigned int i=0; i<stride; ++i)
+      for (unsigned int q=0; q<(fe_degree+2)/2; ++q)
+        {
+          inverse_shape[i*stride+q] =
+            0.5 * (shapes_1d(i,q) + shapes_1d(i,fe_degree-q));
+          inverse_shape[(fe_degree-i)*stride+q] =
+            0.5 * (shapes_1d(i,q) - shapes_1d(i,fe_degree-q));
+        }
+    if (fe_degree % 2 == 0)
+      for (unsigned int q=0; q<(fe_degree+2)/2; ++q)
+        inverse_shape[fe_degree/2*stride+q] = shapes_1d(fe_degree/2,q);
+  }
+
+
+
+  template <int dim, int fe_degree, int n_components, typename Number>
+  inline
+  void
+  CellwiseInverseMassMatrix<dim,fe_degree,n_components,Number>
+  ::fill_inverse_JxW_values(AlignedVector<VectorizedArray<Number> > &inverse_jxw) const
+  {
+    const unsigned int dofs_per_cell = Utilities::fixed_int_power<fe_degree+1,dim>::value;
+    Assert(inverse_jxw.size() > 0 &&
+           inverse_jxw.size() % dofs_per_cell == 0,
+           ExcMessage("Expected diagonal to be a multiple of scalar dof per cells"));
+
+    // temporarily reduce size of inverse_jxw to dofs_per_cell to get JxW values
+    // from fe_eval (will not reallocate any memory)
+    const unsigned int previous_size = inverse_jxw.size();
+    inverse_jxw.resize(dofs_per_cell);
+    fe_eval.fill_JxW_values(inverse_jxw);
+
+    // invert
+    inverse_jxw.resize_fast(previous_size);
+    for (unsigned int q=0; q<dofs_per_cell; ++q)
+      inverse_jxw[q] = 1./inverse_jxw[q];
+    // copy values to rest of vector
+    for (unsigned int q=dofs_per_cell; q<previous_size; )
+      for (unsigned int i=0; i<dofs_per_cell; ++i, ++q)
+        inverse_jxw[q] = inverse_jxw[i];
+  }
+
+
+
+  template <int dim, int fe_degree, int n_components, typename Number>
+  inline
+  void
+  CellwiseInverseMassMatrix<dim,fe_degree,n_components,Number>
+  ::apply(const AlignedVector<VectorizedArray<Number> > &inverse_coefficients,
+          const unsigned int             n_actual_components,
+          const VectorizedArray<Number> *in_array,
+          VectorizedArray<Number>       *out_array) const
+  {
+    const unsigned int dofs_per_cell = Utilities::fixed_int_power<fe_degree+1,dim>::value;
+    Assert(inverse_coefficients.size() > 0 &&
+           inverse_coefficients.size() % dofs_per_cell == 0,
+           ExcMessage("Expected diagonal to be a multiple of scalar dof per cells"));
+    if (inverse_coefficients.size() != dofs_per_cell)
+      AssertDimension(n_actual_components * dofs_per_cell, inverse_coefficients.size());
+
+    Assert(dim == 2 || dim == 3, ExcNotImplemented());
+
+    internal::EvaluatorTensorProduct<internal::evaluate_evenodd,dim,fe_degree,
+             fe_degree+1, VectorizedArray<Number> >
+             evaluator(inverse_shape, inverse_shape, inverse_shape);
+
+    const unsigned int shift_coefficient =
+      inverse_coefficients.size() > dofs_per_cell ? dofs_per_cell : 0;
+    const VectorizedArray<Number> *inv_coefficient = &inverse_coefficients[0];
+    VectorizedArray<Number> temp_data_field[dofs_per_cell];
+    for (unsigned int d=0; d<n_actual_components; ++d)
+      {
+        const VectorizedArray<Number> *in = in_array+d*dofs_per_cell;
+        VectorizedArray<Number> *out = out_array+d*dofs_per_cell;
+        // Need to select 'apply' method with hessian slot because values
+        // assume symmetries that do not exist in the inverse shapes
+        evaluator.template hessians<0,false,false> (in, temp_data_field);
+        evaluator.template hessians<1,false,false> (temp_data_field, out);
+
+        if (dim == 3)
+          {
+            evaluator.template hessians<2,false,false> (out, temp_data_field);
+            for (unsigned int q=0; q<dofs_per_cell; ++q)
+              temp_data_field[q] *= inv_coefficient[q];
+            evaluator.template hessians<2,true,false> (temp_data_field, out);
+          }
+        else if (dim == 2)
+          for (unsigned int q=0; q<dofs_per_cell; ++q)
+            out[q] *= inv_coefficient[q];
+
+        evaluator.template hessians<1,true,false>(out, temp_data_field);
+        evaluator.template hessians<0,true,false>(temp_data_field, out);
+
+        inv_coefficient += shift_coefficient;
+      }
+  }
+
+} // end of namespace MatrixFreeOperators
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/shape_info.h b/include/deal.II/matrix_free/shape_info.h
new file mode 100644
index 0000000..f134183
--- /dev/null
+++ b/include/deal.II/matrix_free/shape_info.h
@@ -0,0 +1,257 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__matrix_free_shape_info_h
+#define dealii__matrix_free_shape_info_h
+
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/vectorization.h>
+#include <deal.II/base/aligned_vector.h>
+#include <deal.II/fe/fe.h>
+
+#include <deal.II/matrix_free/helper_functions.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+    /**
+     * An enum that encodes the type of element detected during
+     * initialization. FEEvaluation will select the most efficient algorithm
+     * based on the given element type.
+     */
+    enum ElementType
+    {
+      tensor_general,
+      tensor_symmetric,
+      truncated_tensor,
+      tensor_symmetric_plus_dg0,
+      tensor_gausslobatto
+    };
+
+    /**
+     * The class that stores the shape functions, gradients and Hessians
+     * evaluated for a tensor product finite element and tensor product
+     * quadrature formula on the unit cell. Because of this structure, only
+     * one-dimensional data is stored.
+     *
+     * @author Katharina Kormann and Martin Kronbichler, 2010, 2011
+     */
+    template <typename Number>
+    struct ShapeInfo
+    {
+      /**
+       * Empty constructor. Does nothing.
+       */
+      ShapeInfo ();
+
+      /**
+       * Constructor that initializes the data fields using the reinit method.
+       */
+      template <int dim>
+      ShapeInfo (const Quadrature<1> &quad,
+                 const FiniteElement<dim> &fe,
+                 const unsigned int base_element = 0);
+
+      /**
+       * Initializes the data fields. Takes a one-dimensional quadrature
+       * formula and a finite element as arguments and evaluates the shape
+       * functions, gradients and Hessians on the one-dimensional unit cell.
+       * This function assumes that the finite element is derived from a one-
+       * dimensional element by a tensor product and that the zeroth shape
+       * function in zero evaluates to one.
+       */
+      template <int dim>
+      void reinit (const Quadrature<1> &quad,
+                   const FiniteElement<dim> &fe_dim,
+                   const unsigned int base_element = 0);
+
+      /**
+       * Returns the memory consumption of this class in bytes.
+       */
+      std::size_t memory_consumption () const;
+
+      /**
+       * Encodes the type of element detected at construction. FEEvaluation
+       * will select the most efficient algorithm based on the given element
+       * type.
+       */
+      ElementType element_type;
+
+      /**
+       * Stores the shape values of the 1D finite element evaluated on all 1D
+       * quadrature points in vectorized format, i.e., as an array of
+       * VectorizedArray<dim>::n_array_elements equal elements. The length of
+       * this array is <tt>n_dofs_1d * n_q_points_1d</tt> and quadrature
+       * points are the index running fastest.
+       */
+      AlignedVector<VectorizedArray<Number> > shape_values;
+
+      /**
+       * Stores the shape gradients of the 1D finite element evaluated on all
+       * 1D quadrature points in vectorized format, i.e., as an array of
+       * VectorizedArray<dim>::n_array_elements equal elements. The length of
+       * this array is <tt>n_dofs_1d * n_q_points_1d</tt> and quadrature
+       * points are the index running fastest.
+       */
+      AlignedVector<VectorizedArray<Number> > shape_gradients;
+
+      /**
+       * Stores the shape Hessians of the 1D finite element evaluated on all
+       * 1D quadrature points in vectorized format, i.e., as an array of
+       * VectorizedArray<dim>::n_array_elements equal elements. The length of
+       * this array is <tt>n_dofs_1d * n_q_points_1d</tt> and quadrature
+       * points are the index running fastest.
+       */
+      AlignedVector<VectorizedArray<Number> > shape_hessians;
+
+      /**
+       * Stores the shape values in a different format, namely the so-called
+       * even-odd scheme where the symmetries in shape_values are used for
+       * faster evaluation.
+       */
+      AlignedVector<VectorizedArray<Number> > shape_val_evenodd;
+
+      /**
+       * Stores the shape gradients in a different format, namely the so-
+       * called even-odd scheme where the symmetries in shape_gradients are
+       * used for faster evaluation.
+       */
+      AlignedVector<VectorizedArray<Number> > shape_gra_evenodd;
+
+      /**
+       * Stores the shape second derivatives in a different format, namely the
+       * so-called even-odd scheme where the symmetries in shape_hessians are
+       * used for faster evaluation.
+       */
+      AlignedVector<VectorizedArray<Number> > shape_hes_evenodd;
+
+      /**
+       * Stores the indices from cell DoFs to face DoFs. The rows go through
+       * the <tt>2*dim</tt> faces, and the columns the DoFs on the faces.
+       */
+      dealii::Table<2,unsigned int>  face_indices;
+
+      /**
+       * Stores one-dimensional values of shape functions evaluated in zero
+       * and one, i.e., on the one-dimensional faces. Not vectorized.
+       */
+      std::vector<Number>    face_value[2];
+
+      /**
+       * Stores one-dimensional gradients of shape functions evaluated in zero
+       * and one, i.e., on the one-dimensional faces. Not vectorized.
+       */
+      std::vector<Number>    face_gradient[2];
+
+      /**
+       * Stores one-dimensional values of shape functions on subface. Since
+       * there are two subfaces, store two variants. Not vectorized.
+       */
+      std::vector<Number>    subface_value[2];
+
+      /**
+       * Non-vectorized version of shape values. Needed when evaluating face
+       * info.
+       */
+      std::vector<Number>    shape_values_number;
+
+      /**
+       * Non-vectorized version of shape gradients. Needed when evaluating
+       * face info.
+       */
+      std::vector<Number>    shape_gradient_number;
+
+      /**
+       * Renumbering from deal.II's numbering of cell degrees of freedom to
+       * lexicographic numbering used inside the FEEvaluation schemes of the
+       * underlying element in the DoFHandler. For vector-valued elements, the
+       * renumbering starts with a lexicographic numbering of the first
+       * component, then everything of the second component, and so on.
+       */
+      std::vector<unsigned int> lexicographic_numbering;
+
+      /**
+       * Stores the degree of the element.
+       */
+      unsigned int fe_degree;
+
+      /**
+       * Stores the number of quadrature points in @p dim dimensions for a
+       * cell.
+       */
+      unsigned int n_q_points;
+
+      /**
+       * Stores the number of DoFs per cell in @p dim dimensions.
+       */
+      unsigned int dofs_per_cell;
+
+      /**
+       * Stores the number of quadrature points per face in @p dim dimensions.
+       */
+      unsigned int n_q_points_face;
+
+      /**
+       * Stores the number of DoFs per face in @p dim dimensions.
+       */
+      unsigned int dofs_per_face;
+
+      /**
+       * Checks whether we have symmetries in the shape values. In that case,
+       * also fill the shape_???_evenodd fields.
+       */
+      bool check_1d_shapes_symmetric(const unsigned int n_q_points_1d);
+
+      /**
+       * Checks whether symmetric 1D basis functions are such that the shape
+       * values form a diagonal matrix, which allows to use specialized
+       * algorithms that save some operations.
+       */
+      bool check_1d_shapes_gausslobatto();
+    };
+
+
+
+    // ------------------------------------------ inline functions
+
+    template <typename Number>
+    template <int dim>
+    inline
+    ShapeInfo<Number>::ShapeInfo (const Quadrature<1> &quad,
+                                  const FiniteElement<dim> &fe_in,
+                                  const unsigned int base_element_number)
+      :
+      fe_degree (0),
+      n_q_points (0),
+      dofs_per_cell (0)
+    {
+      reinit (quad, fe_in, base_element_number);
+    }
+
+
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/matrix_free/shape_info.templates.h b/include/deal.II/matrix_free/shape_info.templates.h
new file mode 100644
index 0000000..42af09e
--- /dev/null
+++ b/include/deal.II/matrix_free/shape_info.templates.h
@@ -0,0 +1,419 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/polynomials_piecewise.h>
+#include <deal.II/fe/fe_poly.h>
+#include <deal.II/fe/fe_dgp.h>
+#include <deal.II/fe/fe_q_dg0.h>
+
+#include <deal.II/matrix_free/shape_info.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace MatrixFreeFunctions
+  {
+
+    // ----------------- actual ShapeInfo functions --------------------
+
+    template <typename Number>
+    ShapeInfo<Number>::ShapeInfo ()
+      :
+      element_type (tensor_general),
+      n_q_points (0),
+      dofs_per_cell (0)
+    {}
+
+
+
+    template <typename Number>
+    template <int dim>
+    void
+    ShapeInfo<Number>::reinit (const Quadrature<1> &quad,
+                               const FiniteElement<dim> &fe_in,
+                               const unsigned int base_element_number)
+    {
+      const FiniteElement<dim> *fe = &fe_in;
+      fe = &fe_in.base_element(base_element_number);
+
+      Assert (fe->n_components() == 1,
+              ExcMessage("FEEvaluation only works for scalar finite elements."));
+
+      fe_degree = fe->degree;
+
+      const unsigned int n_dofs_1d = fe_degree+1,
+                         n_q_points_1d = quad.size();
+
+      // renumber (this is necessary for FE_Q, for example, since there the
+      // vertex DoFs come first, which is incompatible with the lexicographic
+      // ordering necessary to apply tensor products efficiently)
+      std::vector<unsigned int> scalar_lexicographic;
+      {
+        // find numbering to lexicographic
+        Assert(fe->n_components() == 1,
+               ExcMessage("Expected a scalar element"));
+
+        const FE_Poly<TensorProductPolynomials<dim>,dim,dim> *fe_poly =
+          dynamic_cast<const FE_Poly<TensorProductPolynomials<dim>,dim,dim>*>(fe);
+
+        const FE_Poly<TensorProductPolynomials<dim,Polynomials::
+        PiecewisePolynomial<double> >,dim,dim> *fe_poly_piece =
+          dynamic_cast<const FE_Poly<TensorProductPolynomials<dim,
+          Polynomials::PiecewisePolynomial<double> >,dim,dim>*> (fe);
+
+        const FE_DGP<dim> *fe_dgp = dynamic_cast<const FE_DGP<dim>*>(fe);
+
+        const FE_Q_DG0<dim> *fe_q_dg0 = dynamic_cast<const FE_Q_DG0<dim>*>(fe);
+
+        element_type = tensor_general;
+        if (fe_poly != 0)
+          scalar_lexicographic = fe_poly->get_poly_space_numbering_inverse();
+        else if (fe_poly_piece != 0)
+          scalar_lexicographic = fe_poly_piece->get_poly_space_numbering_inverse();
+        else if (fe_dgp != 0)
+          {
+            scalar_lexicographic.resize(fe_dgp->dofs_per_cell);
+            for (unsigned int i=0; i<fe_dgp->dofs_per_cell; ++i)
+              scalar_lexicographic[i] = i;
+            element_type = truncated_tensor;
+          }
+        else if (fe_q_dg0 != 0)
+          {
+            scalar_lexicographic = fe_q_dg0->get_poly_space_numbering_inverse();
+            element_type = tensor_symmetric_plus_dg0;
+          }
+        else
+          Assert(false, ExcNotImplemented());
+
+        // Finally store the renumbering into the member variable of this
+        // class
+        if (fe_in.n_components() == 1)
+          lexicographic_numbering = scalar_lexicographic;
+        else
+          {
+            // have more than one component, get the inverse
+            // permutation, invert it, sort the components one after one,
+            // and invert back
+            std::vector<unsigned int> scalar_inv =
+              Utilities::invert_permutation(scalar_lexicographic);
+            std::vector<unsigned int> lexicographic(fe_in.dofs_per_cell,
+                                                    numbers::invalid_unsigned_int);
+            unsigned int components_before = 0;
+            for (unsigned int e=0; e<base_element_number; ++e)
+              components_before += fe_in.element_multiplicity(e);
+            for (unsigned int comp=0;
+                 comp<fe_in.element_multiplicity(base_element_number); ++comp)
+              for (unsigned int i=0; i<scalar_inv.size(); ++i)
+                lexicographic[fe_in.component_to_system_index(comp+components_before,i)]
+                  = scalar_inv.size () * comp + scalar_inv[i];
+
+            // invert numbering again. Need to do it manually because we might
+            // have undefined blocks
+            lexicographic_numbering.resize(fe_in.element_multiplicity(base_element_number)*fe->dofs_per_cell);
+            for (unsigned int i=0; i<lexicographic.size(); ++i)
+              if (lexicographic[i] != numbers::invalid_unsigned_int)
+                {
+                  AssertIndexRange(lexicographic[i],
+                                   lexicographic_numbering.size());
+                  lexicographic_numbering[lexicographic[i]] = i;
+                }
+          }
+
+        // to evaluate 1D polynomials, evaluate along the line where y=z=0,
+        // assuming that shape_value(0,Point<dim>()) == 1. otherwise, need
+        // other entry point (e.g. generating a 1D element by reading the
+        // name, as done before r29356)
+        Assert(std::fabs(fe->shape_value(scalar_lexicographic[0],
+                                         Point<dim>())-1) < 1e-13,
+               ExcInternalError());
+      }
+
+      n_q_points      = Utilities::fixed_power<dim>(n_q_points_1d);
+      dofs_per_cell   = fe->dofs_per_cell;
+      n_q_points_face = dim>1?Utilities::fixed_power<dim-1>(n_q_points_1d):1;
+      dofs_per_face   = fe->dofs_per_face;
+
+      const unsigned int array_size = n_dofs_1d*n_q_points_1d;
+      this->shape_gradients.resize_fast (array_size);
+      this->shape_values.resize_fast (array_size);
+      this->shape_hessians.resize_fast (array_size);
+
+      this->face_value[0].resize(n_dofs_1d);
+      this->face_gradient[0].resize(n_dofs_1d);
+      this->subface_value[0].resize(array_size);
+      this->face_value[1].resize(n_dofs_1d);
+      this->face_gradient[1].resize(n_dofs_1d);
+      this->subface_value[1].resize(array_size);
+      this->shape_values_number.resize (array_size);
+      this->shape_gradient_number.resize (array_size);
+
+      for (unsigned int i=0; i<n_dofs_1d; ++i)
+        {
+          // need to reorder from hierarchical to lexicographic to get the
+          // DoFs correct
+          const unsigned int my_i = scalar_lexicographic[i];
+          for (unsigned int q=0; q<n_q_points_1d; ++q)
+            {
+              // fill both vectors with
+              // VectorizedArray<Number>::n_array_elements
+              // copies for the shape information and
+              // non-vectorized fields
+              Point<dim> q_point;
+              q_point[0] = quad.get_points()[q][0];
+              shape_values_number[i*n_q_points_1d+q]   = fe->shape_value(my_i,q_point);
+              shape_gradient_number[i*n_q_points_1d+q] = fe->shape_grad (my_i,q_point)[0];
+              shape_values   [i*n_q_points_1d+q] =
+                shape_values_number  [i*n_q_points_1d+q];
+              shape_gradients[i*n_q_points_1d+q] =
+                shape_gradient_number[i*n_q_points_1d+q];
+              shape_hessians[i*n_q_points_1d+q] =
+                fe->shape_grad_grad(my_i,q_point)[0][0];
+              q_point[0] *= 0.5;
+              subface_value[0][i*n_q_points_1d+q] = fe->shape_value(my_i,q_point);
+              q_point[0] += 0.5;
+              subface_value[1][i*n_q_points_1d+q] = fe->shape_value(my_i,q_point);
+            }
+          Point<dim> q_point;
+          this->face_value[0][i] = fe->shape_value(my_i,q_point);
+          this->face_gradient[0][i] = fe->shape_grad(my_i,q_point)[0];
+          q_point[0] = 1;
+          this->face_value[1][i] = fe->shape_value(my_i,q_point);
+          this->face_gradient[1][i] = fe->shape_grad(my_i,q_point)[0];
+        }
+
+      if (element_type == tensor_general &&
+          check_1d_shapes_symmetric(n_q_points_1d))
+        {
+          if (check_1d_shapes_gausslobatto())
+            element_type = tensor_gausslobatto;
+          else
+            element_type = tensor_symmetric;
+        }
+      else if (element_type == tensor_symmetric_plus_dg0)
+        check_1d_shapes_symmetric(n_q_points_1d);
+
+      // face information
+      unsigned int n_faces = GeometryInfo<dim>::faces_per_cell;
+      this->face_indices.reinit(n_faces, this->dofs_per_face);
+      switch (dim)
+        {
+        case 3:
+        {
+          for (unsigned int i=0; i<this->dofs_per_face; i++)
+            {
+              const unsigned int jump_term =
+                this->dofs_per_face*((i*n_dofs_1d)/this->dofs_per_face);
+              this->face_indices(0,i) = i*n_dofs_1d;
+              this->face_indices(1,i) = i*n_dofs_1d + n_dofs_1d-1;
+              this->face_indices(2,i) = i%n_dofs_1d + jump_term;
+              this->face_indices(3,i) = (i%n_dofs_1d + jump_term +
+                                         (n_dofs_1d-1)*n_dofs_1d);
+              this->face_indices(4,i) = i;
+              this->face_indices(5,i) = (n_dofs_1d-1)*this->dofs_per_face+i;
+            }
+          break;
+        }
+        case 2:
+        {
+          for (unsigned int i=0; i<this->dofs_per_face; i++)
+            {
+              this->face_indices(0,i) = n_dofs_1d*i;
+              this->face_indices(1,i) = n_dofs_1d*i + n_dofs_1d-1;
+              this->face_indices(2,i) = i;
+              this->face_indices(3,i) = (n_dofs_1d-1)*n_dofs_1d+i;
+            }
+          break;
+        }
+        case 1:
+        {
+          this->face_indices(0,0) = 0;
+          this->face_indices(1,0) = n_dofs_1d-1;
+          break;
+        }
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+    }
+
+
+
+    template <typename Number>
+    bool
+    ShapeInfo<Number>::check_1d_shapes_symmetric(const unsigned int n_q_points_1d)
+    {
+      const double zero_tol =
+        types_are_equal<Number,double>::value==true?1e-10:1e-7;
+      // symmetry for values
+      const unsigned int n_dofs_1d = fe_degree + 1;
+      for (unsigned int i=0; i<(n_dofs_1d+1)/2; ++i)
+        for (unsigned int j=0; j<n_q_points_1d; ++j)
+          if (std::fabs(shape_values[i*n_q_points_1d+j][0] -
+                        shape_values[(n_dofs_1d-i)*n_q_points_1d
+                                     -j-1][0]) > zero_tol)
+            return false;
+
+      // shape values should be zero at x=0.5 for all basis functions except
+      // for one which is one
+      if (n_q_points_1d%2 == 1 && n_dofs_1d%2 == 1)
+        {
+          for (unsigned int i=0; i<n_dofs_1d/2; ++i)
+            if (std::fabs(shape_values[i*n_q_points_1d+
+                                       n_q_points_1d/2][0]) > zero_tol)
+              return false;
+          if (std::fabs(shape_values[(n_dofs_1d/2)*n_q_points_1d+
+                                     n_q_points_1d/2][0]-1.)> zero_tol)
+            return false;
+        }
+
+      // skew-symmetry for gradient, zero of middle basis function in middle
+      // quadrature point
+      for (unsigned int i=0; i<(n_dofs_1d+1)/2; ++i)
+        for (unsigned int j=0; j<n_q_points_1d; ++j)
+          if (std::fabs(shape_gradients[i*n_q_points_1d+j][0] +
+                        shape_gradients[(n_dofs_1d-i)*n_q_points_1d-
+                                        j-1][0]) > zero_tol)
+            return false;
+      if (n_dofs_1d%2 == 1 && n_q_points_1d%2 == 1)
+        if (std::fabs(shape_gradients[(n_dofs_1d/2)*n_q_points_1d+
+                                      (n_q_points_1d/2)][0]) > zero_tol)
+          return false;
+
+      // symmetry for Laplacian
+      for (unsigned int i=0; i<(n_dofs_1d+1)/2; ++i)
+        for (unsigned int j=0; j<n_q_points_1d; ++j)
+          if (std::fabs(shape_hessians[i*n_q_points_1d+j][0] -
+                        shape_hessians[(n_dofs_1d-i)*n_q_points_1d-
+                                       j-1][0]) > zero_tol)
+            return false;
+
+      const unsigned int stride = (n_q_points_1d+1)/2;
+      shape_val_evenodd.resize((fe_degree+1)*stride);
+      shape_gra_evenodd.resize((fe_degree+1)*stride);
+      shape_hes_evenodd.resize((fe_degree+1)*stride);
+      for (unsigned int i=0; i<(fe_degree+1)/2; ++i)
+        for (unsigned int q=0; q<stride; ++q)
+          {
+            shape_val_evenodd[i*stride+q] =
+              Number(0.5) * (shape_values[i*n_q_points_1d+q] +
+                             shape_values[i*n_q_points_1d+n_q_points_1d-1-q]);
+            shape_val_evenodd[(fe_degree-i)*stride+q] =
+              Number(0.5) * (shape_values[i*n_q_points_1d+q] -
+                             shape_values[i*n_q_points_1d+n_q_points_1d-1-q]);
+
+            shape_gra_evenodd[i*stride+q] =
+              Number(0.5) * (shape_gradients[i*n_q_points_1d+q] +
+                             shape_gradients[i*n_q_points_1d+n_q_points_1d-1-q]);
+            shape_gra_evenodd[(fe_degree-i)*stride+q] =
+              Number(0.5) * (shape_gradients[i*n_q_points_1d+q] -
+                             shape_gradients[i*n_q_points_1d+n_q_points_1d-1-q]);
+
+            shape_hes_evenodd[i*stride+q] =
+              Number(0.5) * (shape_hessians[i*n_q_points_1d+q] +
+                             shape_hessians[i*n_q_points_1d+n_q_points_1d-1-q]);
+            shape_hes_evenodd[(fe_degree-i)*stride+q] =
+              Number(0.5) * (shape_hessians[i*n_q_points_1d+q] -
+                             shape_hessians[i*n_q_points_1d+n_q_points_1d-1-q]);
+          }
+      if (fe_degree % 2 == 0)
+        for (unsigned int q=0; q<stride; ++q)
+          {
+            shape_val_evenodd[fe_degree/2*stride+q] =
+              shape_values[(fe_degree/2)*n_q_points_1d+q];
+            shape_gra_evenodd[fe_degree/2*stride+q] =
+              shape_gradients[(fe_degree/2)*n_q_points_1d+q];
+            shape_hes_evenodd[fe_degree/2*stride+q] =
+              shape_hessians[(fe_degree/2)*n_q_points_1d+q];
+          }
+
+      return true;
+    }
+
+
+
+    template <typename Number>
+    bool
+    ShapeInfo<Number>::check_1d_shapes_gausslobatto()
+    {
+      if (dofs_per_cell != n_q_points)
+        return false;
+
+      const double zero_tol =
+        types_are_equal<Number,double>::value==true?1e-10:1e-7;
+      // check: - identity operation for shape values
+      //        - zero diagonal at interior points for gradients
+      //        - gradient equal to unity at element boundary
+      const unsigned int n_points_1d = fe_degree+1;
+      for (unsigned int i=0; i<n_points_1d; ++i)
+        for (unsigned int j=0; j<n_points_1d; ++j)
+          if (i!=j)
+            {
+              if (std::fabs(shape_values[i*n_points_1d+j][0])>zero_tol)
+                return false;
+            }
+          else
+            {
+              if (std::fabs(shape_values[i*n_points_1d+
+                                         j][0]-1.)>zero_tol)
+                return false;
+            }
+      for (unsigned int i=1; i<n_points_1d-1; ++i)
+        if (std::fabs(shape_gradients[i*n_points_1d+i][0])>zero_tol)
+          return false;
+      if (std::fabs(shape_gradients[n_points_1d-1][0]-
+                    (n_points_1d%2==0 ? -1. : 1.)) > zero_tol)
+        return false;
+
+      return true;
+    }
+
+
+
+    template <typename Number>
+    std::size_t
+    ShapeInfo<Number>::memory_consumption () const
+    {
+      std::size_t memory = sizeof(*this);
+      memory += MemoryConsumption::memory_consumption(shape_values);
+      memory += MemoryConsumption::memory_consumption(shape_gradients);
+      memory += MemoryConsumption::memory_consumption(shape_hessians);
+      memory += MemoryConsumption::memory_consumption(shape_val_evenodd);
+      memory += MemoryConsumption::memory_consumption(shape_gra_evenodd);
+      memory += MemoryConsumption::memory_consumption(shape_hes_evenodd);
+      memory += face_indices.memory_consumption();
+      for (unsigned int i=0; i<2; ++i)
+        {
+          memory += MemoryConsumption::memory_consumption(face_value[i]);
+          memory += MemoryConsumption::memory_consumption(face_gradient[i]);
+        }
+      memory += MemoryConsumption::memory_consumption(shape_values_number);
+      memory += MemoryConsumption::memory_consumption(shape_gradient_number);
+      return memory;
+    }
+
+    // end of functions for ShapeInfo
+
+  } // end of namespace MatrixFreeFunctions
+} // end of namespace internal
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/meshworker/assembler.h b/include/deal.II/meshworker/assembler.h
new file mode 100644
index 0000000..b791416
--- /dev/null
+++ b/include/deal.II/meshworker/assembler.h
@@ -0,0 +1,1237 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_assembler_h
+#define dealii__mesh_worker_assembler_h
+
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/functional.h>
+#include <deal.II/meshworker/simple.h>
+#include <deal.II/multigrid/mg_constrained_dofs.h>
+#include <deal.II/algorithms/any_data.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  /**
+   * The namespace containing objects that can be used to assemble data
+   * computed on cells and faces into global objects. This can reach from
+   * collecting the total error estimate from cell and face contributions to
+   * assembling matrices and multilevel matrices.
+   *
+   * <h3>Data models</h3>
+   *
+   * The class chosen from this namespace determines which data model is used.
+   * For the local as well as the global objects, we have the choice between
+   * two models:
+   *
+   * <h4>The comprehensive data model</h4>
+   *
+   * This is the structure set up by the FESystem class. Globally, this means,
+   * data is assembled into one residual vector and into one matrix. These
+   * objects may be block vectors and block matrices, but the process of
+   * assembling ignores this fact.
+   *
+   * Similarly, there is only a single cell vector and cell matrix,
+   * respectively, which is indexed by all degrees of freedom of the FESystem.
+   * When building the cell matrix, it is necessary to distinguish between the
+   * different components of the system and select the right operator for each
+   * pair.
+   *
+   * <h4>The blocked data model</h4>
+   *
+   * Here, all the blocks are treated separately (in spite of using FESystem
+   * for its convenience in other places). For instance, no block matrix is
+   * assembled, but a list of blocks, which can be combined later by
+   * BlockMatrixArray. Locally, this means, that each matrix block of a system
+   * is generated separately and assembled into the corresponding global
+   * block.
+   *
+   * This approach is advantageous, if the number of matrices for each block
+   * position in the global system is different. For instance, block
+   * preconditioners for the Oseen problem require 3 pressure matrices, but
+   * only one divergence and one advection-diffusion operator for velocities.
+   *
+   * Additionally, this approach enables the construction of a system of
+   * equations from building blocks for each equation and coupling operator.
+   *
+   * Nevertheless, since a separate FEValues object must be created for each
+   * base element, it is not quite clear a priori, which data model is more
+   * efficient.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  namespace Assembler
+  {
+    /**
+     * Assemble local residuals into global residuals.
+     *
+     * The global residuals are expected as an FEVectors object. The local
+     * residuals are block vectors.
+     *
+     * Depending on whether the BlockInfo object was initialize with
+     * BlockInfo::initialize_local(), the comprehensive or block data model is
+     * used locally.
+     *
+     * In the block model, each of the blocks of the local vectors corresponds
+     * to the restriction of a single block of the system to this cell (see
+     * @ref GlossBlock).
+     * Thus, the size of this local block is the number of degrees of freedom
+     * of the corresponding base element of the FESystem.
+     *
+     * @todo Comprehensive model currently not implemented.
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename VectorType>
+    class ResidualLocalBlocksToGlobalBlocks
+    {
+    public:
+      /**
+       * Copy the BlockInfo and the matrix pointers into local variables.
+       */
+      void initialize(const BlockInfo *block_info,
+                      AnyData &residuals);
+      /**
+       * Initialize the constraints.
+       */
+      void initialize(const ConstraintMatrix &constraints);
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+
+      /**
+       * Assemble the local residuals into the global residuals.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble both local residuals into the global residuals.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+    private:
+      /**
+       * Assemble a single local residual into the global.
+       */
+      void assemble(VectorType                                 &global,
+                    const BlockVector<double>                  &local,
+                    const std::vector<types::global_dof_index> &dof);
+
+      /**
+       * The global vectors, stored as an AnyData container of pointers.
+       */
+      AnyData residuals;
+
+      /**
+       * A pointer to the object containing the block structure.
+       */
+      SmartPointer<const BlockInfo,
+                   ResidualLocalBlocksToGlobalBlocks<VectorType> > block_info;
+      /**
+       * A pointer to the object containing constraints.
+       */
+      SmartPointer<const ConstraintMatrix,
+                   ResidualLocalBlocksToGlobalBlocks<VectorType> > constraints;
+    };
+
+
+    /**
+     * A helper class assembling local matrices into global matrices.
+     *
+     * The global matrices are expected as a vector of MatrixBlock objects,
+     * each containing a matrix object with a function corresponding to
+     * SparseMatrix::add() and information on the block row and column this
+     * matrix represents in a block system.
+     *
+     * The local matrices are expected as a similar vector of MatrixBlock
+     * objects, but containing a FullMatrix.
+     *
+     * Like with ResidualLocalBlocksToGlobalBlocks, the initialization of the
+     * BlockInfo object decides whether the comprehensive data model or the
+     * block model is used.
+     *
+     * In the comprehensive model, each of the LocalMatrixBlocks has
+     * coordinates (0,0) and dimensions equal to the number of degrees of
+     * freedom of the FESystem.
+     *
+     * In the comprehensive model, each block has its own block coordinates
+     * and the size depends on the associated FESystem::base_element(). These
+     * blocks can be generated separately and will be assembled into the
+     * correct matrix block by this object.
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename MatrixType, typename number = double>
+    class MatrixLocalBlocksToGlobalBlocks
+    {
+    public:
+      /**
+       * Constructor, initializing the #threshold, which limits how small
+       * numbers may be to be entered into the matrix.
+       */
+      MatrixLocalBlocksToGlobalBlocks(double threshold = 1.e-12);
+
+      /**
+       * Copy the BlockInfo and the matrix pointers into local variables and
+       * initialize cell matrix vectors.
+       */
+      void initialize(const BlockInfo *block_info,
+                      MatrixBlockVector<MatrixType> &matrices);
+
+      /**
+       * Initialize the constraints.
+       */
+      void initialize(const ConstraintMatrix &constraints);
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+
+      /**
+       * Assemble the local matrices into the global matrices.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble all local matrices into the global matrices.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+
+    private:
+      /**
+       * Assemble a single local matrix into a global one.
+       */
+      void assemble(MatrixBlock<MatrixType>                    &global,
+                    const FullMatrix<number>                   &local,
+                    const unsigned int                          block_row,
+                    const unsigned int                          block_col,
+                    const std::vector<types::global_dof_index> &dof1,
+                    const std::vector<types::global_dof_index> &dof2);
+
+      /**
+       * The global matrices, stored as a vector of pointers.
+       */
+      SmartPointer<MatrixBlockVector<MatrixType>,
+                   MatrixLocalBlocksToGlobalBlocks<MatrixType, number> > matrices;
+
+      /**
+       * A pointer to the object containing the block structure.
+       */
+      SmartPointer<const BlockInfo,
+                   MatrixLocalBlocksToGlobalBlocks<MatrixType, number> > block_info;
+      /**
+       * A pointer to the object containing constraints.
+       */
+      SmartPointer<const ConstraintMatrix,
+                   MatrixLocalBlocksToGlobalBlocks<MatrixType,number> > constraints;
+
+      /**
+       * The smallest positive number that will be entered into the global
+       * matrix. All smaller absolute values will be treated as zero and will
+       * not be assembled.
+       */
+      const double threshold;
+
+    };
+
+    /**
+     * A helper class assembling local matrices into global multilevel
+     * matrices. This class is the multilevel equivalent of
+     * MatrixLocalBlocksToGlobalBlocks and documentation of that class applies
+     * here to a large extend.
+     *
+     * The global matrices are expected as a vector of pointers to MatrixBlock
+     * objects, each containing a MGLevelObject with matrices with a function
+     * corresponding to SparseMatrix::add() and information on the block row
+     * and column this matrix represents in a block system.
+     *
+     * The local matrices are a similar vector of MatrixBlock objects, but
+     * containing a FullMatrix.
+     *
+     * If local refinement occurs, the Multigrid method needs more matrices,
+     * two for continuous elements and another two if numerical fluxes are
+     * computed on interfaces. The second set can be added using
+     * initialize_edge_flux(). Once added, the contributions in all
+     * participating matrices will be assembled from the cell and face
+     * matrices automatically.
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename MatrixType, typename number = double>
+    class MGMatrixLocalBlocksToGlobalBlocks
+    {
+    public:
+      typedef MGMatrixBlockVector<MatrixType> MatrixPtrVector;
+      typedef SmartPointer<MatrixPtrVector, MGMatrixLocalBlocksToGlobalBlocks<MatrixType,number> >
+      MatrixPtrVectorPtr;
+
+      /**
+       * Constructor, initializing the #threshold, which limits how small
+       * numbers may be to be entered into the matrix.
+       */
+      MGMatrixLocalBlocksToGlobalBlocks(double threshold = 1.e-12);
+
+      /**
+       * Copy the BlockInfo and the matrix pointers into local variables and
+       * initialize cell matrix vectors.
+       */
+      void initialize(const BlockInfo *block_info,
+                      MatrixPtrVector &matrices);
+
+      /**
+       * Initialize the multilevel constraints.
+       */
+      void initialize(const MGConstrainedDoFs &mg_constrained_dofs);
+
+      /**
+       * Multigrid methods on locally refined meshes need additional matrices.
+       * For discontinuous Galerkin methods, these are two flux matrices
+       * across the refinement edge, which are set by this method.
+       */
+      void initialize_edge_flux(MatrixPtrVector &up, MatrixPtrVector &down);
+
+      /**
+       * Multigrid methods on locally refined meshes need additional matrices.
+       * For discontinuous Galerkin methods, these are two flux matrices
+       * across the refinement edge, which are set by this method.
+       */
+      void initialize_interfaces (MatrixPtrVector &interface_in, MatrixPtrVector &interface_out);
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+
+      /**
+       * Assemble the local matrices into the global matrices.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble all local matrices into the global matrices.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+
+    private:
+      /**
+       * Assemble a single local matrix into a global one.
+       */
+      void assemble(MatrixType                                 &global,
+                    const FullMatrix<number>                   &local,
+                    const unsigned int                          block_row,
+                    const unsigned int                          block_col,
+                    const std::vector<types::global_dof_index> &dof1,
+                    const std::vector<types::global_dof_index> &dof2,
+                    const unsigned int                          level1,
+                    const unsigned int                          level2,
+                    bool                                        transpose = false);
+
+      /**
+       * Assemble a single local matrix into a global one.
+       */
+      void assemble_fluxes(MatrixType                                 &global,
+                           const FullMatrix<number>                   &local,
+                           const unsigned int                          block_row,
+                           const unsigned int                          block_col,
+                           const std::vector<types::global_dof_index> &dof1,
+                           const std::vector<types::global_dof_index> &dof2,
+                           const unsigned int                          level1,
+                           const unsigned int                          level2);
+
+      /**
+       * Assemble a single local matrix into a global one.
+       */
+      void assemble_up(MatrixType                                 &global,
+                       const FullMatrix<number>                   &local,
+                       const unsigned int                          block_row,
+                       const unsigned int                          block_col,
+                       const std::vector<types::global_dof_index> &dof1,
+                       const std::vector<types::global_dof_index> &dof2,
+                       const unsigned int                          level1,
+                       const unsigned int                          level2);
+
+      /**
+       * Assemble a single local matrix into a global one.
+       */
+      void assemble_down(MatrixType                                 &global,
+                         const FullMatrix<number>                   &local,
+                         const unsigned int                          block_row,
+                         const unsigned int                          block_col,
+                         const std::vector<types::global_dof_index> &dof1,
+                         const std::vector<types::global_dof_index> &dof2,
+                         const unsigned int                          level1,
+                         const unsigned int                          level2);
+
+      /**
+       * Assemble a single local matrix into a global one.
+       */
+      void assemble_in(MatrixType                                 &global,
+                       const FullMatrix<number>                   &local,
+                       const unsigned int                          block_row,
+                       const unsigned int                          block_col,
+                       const std::vector<types::global_dof_index> &dof1,
+                       const std::vector<types::global_dof_index> &dof2,
+                       const unsigned int                          level1,
+                       const unsigned int                          level2);
+
+      /**
+       * Assemble a single local matrix into a global one.
+       */
+      void assemble_out(MatrixType                                 &global,
+                        const FullMatrix<number>                   &local,
+                        const unsigned int                          block_row,
+                        const unsigned int                          block_col,
+                        const std::vector<types::global_dof_index> &dof1,
+                        const std::vector<types::global_dof_index> &dof2,
+                        const unsigned int                          level1,
+                        const unsigned int                          level2);
+
+      /**
+       * The level matrices, stored as a vector of pointers.
+       */
+      MatrixPtrVectorPtr matrices;
+
+      /**
+       * The flux matrix between the fine and the coarse level at refinement
+       * edges.
+       */
+      MatrixPtrVectorPtr flux_down;
+
+      /**
+       * The flux matrix between the coarse and the fine level at refinement
+       * edges.
+       */
+      MatrixPtrVectorPtr flux_up;
+
+      /**
+       * The interface matrix between the fine and the coarse level at
+       * refinement edges.
+       */
+      MatrixPtrVectorPtr interface_out;
+
+      /**
+       * The interface matrix between the coarse and the fine level at
+       * refinement edges.
+       */
+      MatrixPtrVectorPtr interface_in;
+
+      /**
+       * A pointer to the object containing the block structure.
+       */
+      SmartPointer<const BlockInfo, MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number> > block_info;
+
+      /**
+       * A pointer to the object containing constraints.
+       */
+      SmartPointer<const MGConstrainedDoFs,MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number> > mg_constrained_dofs;
+
+
+      /**
+       * The smallest positive number that will be entered into the global
+       * matrix. All smaller absolute values will be treated as zero and will
+       * not be assembled.
+       */
+      const double threshold;
+
+    };
+
+//----------------------------------------------------------------------//
+
+    template <typename VectorType>
+    inline void
+    ResidualLocalBlocksToGlobalBlocks<VectorType>::initialize(const BlockInfo *b,
+                                                              AnyData         &m)
+    {
+      block_info = b;
+      residuals = m;
+    }
+
+    template <typename VectorType>
+    inline void
+    ResidualLocalBlocksToGlobalBlocks<VectorType>::initialize
+    (const ConstraintMatrix &c)
+    {
+      constraints = &c;
+    }
+
+
+    template <typename VectorType>
+    template <class DOFINFO>
+    inline void
+    ResidualLocalBlocksToGlobalBlocks<VectorType>::initialize_info
+    (DOFINFO &info, bool) const
+    {
+      info.initialize_vectors(residuals.size());
+    }
+
+    template <typename VectorType>
+    inline void
+    ResidualLocalBlocksToGlobalBlocks<VectorType>::assemble
+    (VectorType                                 &global,
+     const BlockVector<double>                  &local,
+     const std::vector<types::global_dof_index> &dof)
+    {
+      if (constraints == 0)
+        {
+          for (unsigned int b=0; b<local.n_blocks(); ++b)
+            for (unsigned int j=0; j<local.block(b).size(); ++j)
+              {
+                // The coordinates of
+                // the current entry in
+                // DoFHandler
+                // numbering, which
+                // differs from the
+                // block-wise local
+                // numbering we use in
+                // our local vectors
+                const unsigned int jcell = this->block_info->local().local_to_global(b, j);
+                global(dof[jcell]) += local.block(b)(j);
+              }
+        }
+      else
+        constraints->distribute_local_to_global(local, dof, global);
+    }
+
+
+    template <typename VectorType>
+    template <class DOFINFO>
+    inline void
+    ResidualLocalBlocksToGlobalBlocks<VectorType>::assemble
+    (const DOFINFO &info)
+    {
+      for (unsigned int i=0; i<residuals.size(); ++i)
+        assemble(*residuals(i), info.vector(i), info.indices);
+    }
+
+
+    template <typename VectorType>
+    template <class DOFINFO>
+    inline void
+    ResidualLocalBlocksToGlobalBlocks<VectorType>::assemble
+    (const DOFINFO &info1,
+     const DOFINFO &info2)
+    {
+      for (unsigned int i=0; i<residuals.size(); ++i)
+        {
+          assemble(*residuals(i), info1.vector(i), info1.indices);
+          assemble(*residuals(i), info2.vector(i), info2.indices);
+        }
+    }
+
+
+//----------------------------------------------------------------------//
+
+    template <typename MatrixType, typename number>
+    inline
+    MatrixLocalBlocksToGlobalBlocks<MatrixType, number>::MatrixLocalBlocksToGlobalBlocks
+    (double threshold)
+      :
+      threshold(threshold)
+    {}
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize
+    (const BlockInfo               *b,
+     MatrixBlockVector<MatrixType> &m)
+    {
+      block_info = b;
+      matrices = &m;
+    }
+
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize
+    (const ConstraintMatrix &c)
+    {
+      constraints = &c;
+    }
+
+
+
+    template <typename MatrixType ,typename number>
+    template <class DOFINFO>
+    inline void
+    MatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize_info
+    (DOFINFO &info,
+     bool face) const
+    {
+      info.initialize_matrices(*matrices, face);
+    }
+
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble
+    (MatrixBlock<MatrixType>                    &global,
+     const FullMatrix<number>                   &local,
+     const unsigned int                          block_row,
+     const unsigned int                          block_col,
+     const std::vector<types::global_dof_index> &dof1,
+     const std::vector<types::global_dof_index> &dof2)
+    {
+      if (constraints == 0)
+        {
+          for (unsigned int j=0; j<local.n_rows(); ++j)
+            for (unsigned int k=0; k<local.n_cols(); ++k)
+              if (std::fabs(local(j,k)) >= threshold)
+                {
+                  // The coordinates of
+                  // the current entry in
+                  // DoFHandler
+                  // numbering, which
+                  // differs from the
+                  // block-wise local
+                  // numbering we use in
+                  // our local matrices
+                  const unsigned int jcell = this->block_info->local().local_to_global(block_row, j);
+                  const unsigned int kcell = this->block_info->local().local_to_global(block_col, k);
+
+                  global.add(dof1[jcell], dof2[kcell], local(j,k));
+                }
+        }
+      else
+        {
+          const BlockIndices &bi = this->block_info->local();
+          std::vector<types::global_dof_index> sliced_row_indices (bi.block_size(block_row));
+          for (unsigned int i=0; i<sliced_row_indices.size(); ++i)
+            sliced_row_indices[i] = dof1[bi.block_start(block_row)+i];
+
+          std::vector<types::global_dof_index> sliced_col_indices (bi.block_size(block_col));
+          for (unsigned int i=0; i<sliced_col_indices.size(); ++i)
+            sliced_col_indices[i] = dof2[bi.block_start(block_col)+i];
+
+          constraints->distribute_local_to_global(local,
+                                                  sliced_row_indices, sliced_col_indices, global);
+        }
+    }
+
+
+    template <typename MatrixType, typename number>
+    template <class DOFINFO>
+    inline void
+    MatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble (const DOFINFO &info)
+    {
+      for (unsigned int i=0; i<matrices->size(); ++i)
+        {
+          // Row and column index of
+          // the block we are dealing with
+          const types::global_dof_index row = matrices->block(i).row;
+          const types::global_dof_index col = matrices->block(i).column;
+
+          assemble(matrices->block(i), info.matrix(i,false).matrix, row, col, info.indices, info.indices);
+        }
+    }
+
+
+    template <typename MatrixType, typename number>
+    template <class DOFINFO>
+    inline void
+    MatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble (const DOFINFO &info1,
+        const DOFINFO &info2)
+    {
+      for (unsigned int i=0; i<matrices->size(); ++i)
+        {
+          // Row and column index of
+          // the block we are dealing with
+          const types::global_dof_index row = matrices->block(i).row;
+          const types::global_dof_index col = matrices->block(i).column;
+
+          assemble(matrices->block(i), info1.matrix(i,false).matrix, row, col, info1.indices, info1.indices);
+          assemble(matrices->block(i), info1.matrix(i,true).matrix, row, col, info1.indices, info2.indices);
+          assemble(matrices->block(i), info2.matrix(i,false).matrix, row, col, info2.indices, info2.indices);
+          assemble(matrices->block(i), info2.matrix(i,true).matrix, row, col, info2.indices, info1.indices);
+        }
+    }
+
+
+// ----------------------------------------------------------------------//
+
+    template <typename MatrixType, typename number>
+    inline
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::MGMatrixLocalBlocksToGlobalBlocks
+    (double threshold)
+      :
+      threshold(threshold)
+    {}
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize
+    (const BlockInfo *b,
+     MatrixPtrVector &m)
+    {
+      block_info = b;
+      AssertDimension(block_info->local().size(), block_info->global().size());
+      matrices = &m;
+    }
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize
+    (const MGConstrainedDoFs &mg_c)
+    {
+      mg_constrained_dofs = &mg_c;
+    }
+
+
+    template <typename MatrixType ,typename number>
+    template <class DOFINFO>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize_info
+    (DOFINFO &info,
+     bool face) const
+    {
+      info.initialize_matrices(*matrices, face);
+    }
+
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize_edge_flux
+    (MatrixPtrVector &up,
+     MatrixPtrVector &down)
+    {
+      flux_up = up;
+      flux_down = down;
+    }
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::initialize_interfaces
+    (MatrixPtrVector &in,
+     MatrixPtrVector &out)
+    {
+      interface_in = in;
+      interface_out = out;
+    }
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble
+    (MatrixType                                 &global,
+     const FullMatrix<number>                   &local,
+     const unsigned int                          block_row,
+     const unsigned int                          block_col,
+     const std::vector<types::global_dof_index> &dof1,
+     const std::vector<types::global_dof_index> &dof2,
+     const unsigned int                          level1,
+     const unsigned int                          level2,
+     bool                                        transpose)
+    {
+      for (unsigned int j=0; j<local.n_rows(); ++j)
+        for (unsigned int k=0; k<local.n_cols(); ++k)
+          if (std::fabs(local(j,k)) >= threshold)
+            {
+              // The coordinates of
+              // the current entry in
+              // DoFHandler
+              // numbering, which
+              // differs from the
+              // block-wise local
+              // numbering we use in
+              // our local matrices
+              const unsigned int jcell = this->block_info->local().local_to_global(block_row, j);
+              const unsigned int kcell = this->block_info->local().local_to_global(block_col, k);
+
+              // The global dof
+              // indices to assemble
+              // in. Since we may
+              // have face matrices
+              // coupling two
+              // different cells, we
+              // provide two sets of
+              // dof indices.
+              const unsigned int jglobal = this->block_info->level(level1).global_to_local(dof1[jcell]).second;
+              const unsigned int kglobal = this->block_info->level(level2).global_to_local(dof2[kcell]).second;
+
+              if (mg_constrained_dofs == 0)
+                {
+                  if (transpose)
+                    global.add(kglobal, jglobal, local(j,k));
+                  else
+                    global.add(jglobal, kglobal, local(j,k));
+                }
+              else
+                {
+                  if (!mg_constrained_dofs->at_refinement_edge(level1, jglobal) &&
+                      !mg_constrained_dofs->at_refinement_edge(level2, kglobal))
+                    {
+                      if (mg_constrained_dofs->set_boundary_values())
+                        {
+                          if ((!mg_constrained_dofs->is_boundary_index(level1, jglobal) &&
+                               !mg_constrained_dofs->is_boundary_index(level2, kglobal))
+                              ||
+                              (mg_constrained_dofs->is_boundary_index(level1, jglobal) &&
+                               mg_constrained_dofs->is_boundary_index(level2, kglobal) &&
+                               jglobal == kglobal))
+                            {
+                              if (transpose)
+                                global.add(kglobal, jglobal, local(j,k));
+                              else
+                                global.add(jglobal, kglobal, local(j,k));
+                            }
+                        }
+                      else
+                        {
+                          if (transpose)
+                            global.add(kglobal, jglobal, local(j,k));
+                          else
+                            global.add(jglobal, kglobal, local(j,k));
+                        }
+                    }
+                }
+            }
+    }
+
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble_fluxes
+    (MatrixType                                 &global,
+     const FullMatrix<number>                   &local,
+     const unsigned int                          block_row,
+     const unsigned int                          block_col,
+     const std::vector<types::global_dof_index> &dof1,
+     const std::vector<types::global_dof_index> &dof2,
+     const unsigned int                          level1,
+     const unsigned int                          level2)
+    {
+      for (unsigned int j=0; j<local.n_rows(); ++j)
+        for (unsigned int k=0; k<local.n_cols(); ++k)
+          if (std::fabs(local(j,k)) >= threshold)
+            {
+              // The coordinates of
+              // the current entry in
+              // DoFHandler
+              // numbering, which
+              // differs from the
+              // block-wise local
+              // numbering we use in
+              // our local matrices
+              const unsigned int jcell = this->block_info->local().local_to_global(block_row, j);
+              const unsigned int kcell = this->block_info->local().local_to_global(block_col, k);
+
+              // The global dof
+              // indices to assemble
+              // in. Since we may
+              // have face matrices
+              // coupling two
+              // different cells, we
+              // provide two sets of
+              // dof indices.
+              const unsigned int jglobal = this->block_info->level(level1).global_to_local(dof1[jcell]).second;
+              const unsigned int kglobal = this->block_info->level(level2).global_to_local(dof2[kcell]).second;
+
+              if (mg_constrained_dofs == 0)
+                global.add(jglobal, kglobal, local(j,k));
+              else
+                {
+                  if (!mg_constrained_dofs->non_refinement_edge_index(level1, jglobal) &&
+                      !mg_constrained_dofs->non_refinement_edge_index(level2, kglobal))
+                    {
+                      if (!mg_constrained_dofs->at_refinement_edge(level1, jglobal) &&
+                          !mg_constrained_dofs->at_refinement_edge(level2, kglobal))
+                        global.add(jglobal, kglobal, local(j,k));
+                    }
+                }
+            }
+    }
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble_up
+    (MatrixType                                 &global,
+     const FullMatrix<number>                   &local,
+     const unsigned int                          block_row,
+     const unsigned int                          block_col,
+     const std::vector<types::global_dof_index> &dof1,
+     const std::vector<types::global_dof_index> &dof2,
+     const unsigned int                          level1,
+     const unsigned int                          level2)
+    {
+      for (unsigned int j=0; j<local.n_rows(); ++j)
+        for (unsigned int k=0; k<local.n_cols(); ++k)
+          if (std::fabs(local(j,k)) >= threshold)
+            {
+              // The coordinates of
+              // the current entry in
+              // DoFHandler
+              // numbering, which
+              // differs from the
+              // block-wise local
+              // numbering we use in
+              // our local matrices
+              const unsigned int jcell = this->block_info->local().local_to_global(block_row, j);
+              const unsigned int kcell = this->block_info->local().local_to_global(block_col, k);
+
+              // The global dof
+              // indices to assemble
+              // in. Since we may
+              // have face matrices
+              // coupling two
+              // different cells, we
+              // provide two sets of
+              // dof indices.
+              const unsigned int jglobal = this->block_info->level(level1).global_to_local(dof1[jcell]).second;
+              const unsigned int kglobal = this->block_info->level(level2).global_to_local(dof2[kcell]).second;
+
+              if (mg_constrained_dofs == 0)
+                global.add(jglobal, kglobal, local(j,k));
+              else
+                {
+                  if (!mg_constrained_dofs->non_refinement_edge_index(level1, jglobal) &&
+                      !mg_constrained_dofs->non_refinement_edge_index(level2, kglobal))
+                    {
+                      if (!mg_constrained_dofs->at_refinement_edge(level1, jglobal) &&
+                          !mg_constrained_dofs->at_refinement_edge(level2, kglobal))
+                        global.add(jglobal, kglobal, local(j,k));
+                    }
+                }
+            }
+    }
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble_down
+    (MatrixType                                 &global,
+     const FullMatrix<number>                   &local,
+     const unsigned int                          block_row,
+     const unsigned int                          block_col,
+     const std::vector<types::global_dof_index> &dof1,
+     const std::vector<types::global_dof_index> &dof2,
+     const unsigned int                          level1,
+     const unsigned int                          level2)
+    {
+      for (unsigned int j=0; j<local.n_rows(); ++j)
+        for (unsigned int k=0; k<local.n_cols(); ++k)
+          if (std::fabs(local(k,j)) >= threshold)
+            {
+              // The coordinates of
+              // the current entry in
+              // DoFHandler
+              // numbering, which
+              // differs from the
+              // block-wise local
+              // numbering we use in
+              // our local matrices
+              const unsigned int jcell = this->block_info->local().local_to_global(block_row, j);
+              const unsigned int kcell = this->block_info->local().local_to_global(block_col, k);
+
+              // The global dof
+              // indices to assemble
+              // in. Since we may
+              // have face matrices
+              // coupling two
+              // different cells, we
+              // provide two sets of
+              // dof indices.
+              const unsigned int jglobal = this->block_info->level(level1).global_to_local(dof1[jcell]).second;
+              const unsigned int kglobal = this->block_info->level(level2).global_to_local(dof2[kcell]).second;
+
+              if (mg_constrained_dofs == 0)
+                global.add(jglobal, kglobal, local(k,j));
+              else
+                {
+                  if (!mg_constrained_dofs->non_refinement_edge_index(level1, jglobal) &&
+                      !mg_constrained_dofs->non_refinement_edge_index(level2, kglobal))
+                    {
+                      if (!mg_constrained_dofs->at_refinement_edge(level1, jglobal) &&
+                          !mg_constrained_dofs->at_refinement_edge(level2, kglobal))
+                        global.add(jglobal, kglobal, local(k,j));
+                    }
+                }
+            }
+    }
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble_in
+    (MatrixType                                 &global,
+     const FullMatrix<number>                   &local,
+     const unsigned int                          block_row,
+     const unsigned int                          block_col,
+     const std::vector<types::global_dof_index> &dof1,
+     const std::vector<types::global_dof_index> &dof2,
+     const unsigned int                          level1,
+     const unsigned int                          level2)
+    {
+//      AssertDimension(local.n(), dof1.size());
+//      AssertDimension(local.m(), dof2.size());
+
+      for (unsigned int j=0; j<local.n_rows(); ++j)
+        for (unsigned int k=0; k<local.n_cols(); ++k)
+          if (std::fabs(local(j,k)) >= threshold)
+            {
+              // The coordinates of
+              // the current entry in
+              // DoFHandler
+              // numbering, which
+              // differs from the
+              // block-wise local
+              // numbering we use in
+              // our local matrices
+              const unsigned int jcell = this->block_info->local().local_to_global(block_row, j);
+              const unsigned int kcell = this->block_info->local().local_to_global(block_col, k);
+
+              // The global dof
+              // indices to assemble
+              // in. Since we may
+              // have face matrices
+              // coupling two
+              // different cells, we
+              // provide two sets of
+              // dof indices.
+              const unsigned int jglobal = this->block_info->level(level1).global_to_local(dof1[jcell]).second;
+              const unsigned int kglobal = this->block_info->level(level2).global_to_local(dof2[kcell]).second;
+
+              if (mg_constrained_dofs == 0)
+                global.add(jglobal, kglobal, local(j,k));
+              else
+                {
+                  if (mg_constrained_dofs->at_refinement_edge(level1, jglobal) &&
+                      !mg_constrained_dofs->at_refinement_edge(level2, kglobal))
+                    {
+                      if (mg_constrained_dofs->set_boundary_values())
+                        {
+                          if ((!mg_constrained_dofs->is_boundary_index(level1, jglobal) &&
+                               !mg_constrained_dofs->is_boundary_index(level2, kglobal))
+                              ||
+                              (mg_constrained_dofs->is_boundary_index(level1, jglobal) &&
+                               mg_constrained_dofs->is_boundary_index(level2, kglobal) &&
+                               jglobal == kglobal))
+                            global.add(jglobal, kglobal, local(j,k));
+                        }
+                      else
+                        global.add(jglobal, kglobal, local(j,k));
+                    }
+                }
+            }
+    }
+
+    template <typename MatrixType, typename number>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble_out
+    (MatrixType                                 &global,
+     const FullMatrix<number>                   &local,
+     const unsigned int                          block_row,
+     const unsigned int                          block_col,
+     const std::vector<types::global_dof_index> &dof1,
+     const std::vector<types::global_dof_index> &dof2,
+     const unsigned int                          level1,
+     const unsigned int                          level2)
+    {
+//      AssertDimension(local.n(), dof1.size());
+//      AssertDimension(local.m(), dof2.size());
+
+      for (unsigned int j=0; j<local.n_rows(); ++j)
+        for (unsigned int k=0; k<local.n_cols(); ++k)
+          if (std::fabs(local(k,j)) >= threshold)
+            {
+              // The coordinates of
+              // the current entry in
+              // DoFHandler
+              // numbering, which
+              // differs from the
+              // block-wise local
+              // numbering we use in
+              // our local matrices
+              const unsigned int jcell = this->block_info->local().local_to_global(block_row, j);
+              const unsigned int kcell = this->block_info->local().local_to_global(block_col, k);
+
+              // The global dof
+              // indices to assemble
+              // in. Since we may
+              // have face matrices
+              // coupling two
+              // different cells, we
+              // provide two sets of
+              // dof indices.
+              const unsigned int jglobal = this->block_info->level(level1).global_to_local(dof1[jcell]).second;
+              const unsigned int kglobal = this->block_info->level(level2).global_to_local(dof2[kcell]).second;
+
+              if (mg_constrained_dofs == 0)
+                global.add(jglobal, kglobal, local(k,j));
+              else
+                {
+                  if (mg_constrained_dofs->at_refinement_edge(level1, jglobal) &&
+                      !mg_constrained_dofs->at_refinement_edge(level2, kglobal))
+                    {
+                      if (mg_constrained_dofs->set_boundary_values())
+                        {
+                          if ((!mg_constrained_dofs->is_boundary_index(level1, jglobal) &&
+                               !mg_constrained_dofs->is_boundary_index(level2, kglobal))
+                              ||
+                              (mg_constrained_dofs->is_boundary_index(level1, jglobal) &&
+                               mg_constrained_dofs->is_boundary_index(level2, kglobal) &&
+                               jglobal == kglobal))
+                            global.add(jglobal, kglobal, local(k,j));
+                        }
+                      else
+                        global.add(jglobal, kglobal, local(k,j));
+                    }
+                }
+            }
+    }
+
+
+    template <typename MatrixType, typename number>
+    template <class DOFINFO>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble(const DOFINFO &info)
+    {
+      const unsigned int level = info.cell->level();
+
+      for (unsigned int i=0; i<matrices->size(); ++i)
+        {
+          // Row and column index of
+          // the block we are dealing with
+          const unsigned int row = matrices->block(i)[level].row;
+          const unsigned int col = matrices->block(i)[level].column;
+
+          assemble(matrices->block(i)[level].matrix, info.matrix(i,false).matrix, row, col,
+                   info.indices, info.indices, level, level);
+          if (mg_constrained_dofs != 0)
+            {
+              if (interface_in != 0)
+                assemble_in(interface_in->block(i)[level], info.matrix(i,false).matrix, row, col,
+                            info.indices, info.indices, level, level);
+              if (interface_out != 0)
+                assemble_in(interface_out->block(i)[level], info.matrix(i,false).matrix, row, col,
+                            info.indices, info.indices, level, level);
+
+              assemble_in(matrices->block_in(i)[level], info.matrix(i,false).matrix, row, col,
+                          info.indices, info.indices, level, level);
+              assemble_out(matrices->block_out(i)[level], info.matrix(i,false).matrix, row, col,
+                           info.indices, info.indices, level, level);
+            }
+        }
+    }
+
+
+    template <typename MatrixType, typename number>
+    template <class DOFINFO>
+    inline void
+    MGMatrixLocalBlocksToGlobalBlocks<MatrixType, number>::assemble
+    (const DOFINFO &info1,
+     const DOFINFO &info2)
+    {
+      const unsigned int level1 = info1.cell->level();
+      const unsigned int level2 = info2.cell->level();
+
+      for (unsigned int i=0; i<matrices->size(); ++i)
+        {
+          MGLevelObject<MatrixBlock<MatrixType> > &o = matrices->block(i);
+
+          // Row and column index of
+          // the block we are dealing with
+          const unsigned int row = o[level1].row;
+          const unsigned int col = o[level1].column;
+
+          if (level1 == level2)
+            {
+              if (mg_constrained_dofs == 0)
+                {
+                  assemble(o[level1].matrix, info1.matrix(i,false).matrix, row, col,
+                           info1.indices, info1.indices, level1, level1);
+                  assemble(o[level1].matrix, info1.matrix(i,true).matrix, row, col,
+                           info1.indices, info2.indices, level1, level2);
+                  assemble(o[level1].matrix, info2.matrix(i,false).matrix, row, col,
+                           info2.indices, info2.indices, level2, level2);
+                  assemble(o[level1].matrix, info2.matrix(i,true).matrix, row, col,
+                           info2.indices, info1.indices, level2, level1);
+                }
+              else
+                {
+                  assemble_fluxes(o[level1].matrix, info1.matrix(i,false).matrix, row, col,
+                                  info1.indices, info1.indices, level1, level1);
+                  assemble_fluxes(o[level1].matrix, info1.matrix(i,true).matrix, row, col,
+                                  info1.indices, info2.indices, level1, level2);
+                  assemble_fluxes(o[level1].matrix, info2.matrix(i,false).matrix, row, col,
+                                  info2.indices, info2.indices, level2, level2);
+                  assemble_fluxes(o[level1].matrix, info2.matrix(i,true).matrix, row, col,
+                                  info2.indices, info1.indices, level2, level1);
+                }
+            }
+          else
+            {
+              Assert(level1 > level2, ExcNotImplemented());
+              if (flux_up->size() != 0)
+                {
+                  // Do not add M22,
+                  // which is done by
+                  // the coarser cell
+                  assemble_fluxes(o[level1].matrix, info1.matrix(i,false).matrix, row, col,
+                                  info1.indices, info1.indices, level1, level1);
+                  assemble_up(flux_up->block(i)[level1].matrix, info1.matrix(i,true).matrix, row, col,
+                              info1.indices, info2.indices, level1, level2);
+                  assemble_down(flux_down->block(i)[level1].matrix, info2.matrix(i,true).matrix, row, col,
+                                info2.indices, info1.indices, level2, level1);
+                }
+            }
+        }
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/dof_info.h b/include/deal.II/meshworker/dof_info.h
new file mode 100644
index 0000000..cdfad1e
--- /dev/null
+++ b/include/deal.II/meshworker/dof_info.h
@@ -0,0 +1,478 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_dof_info_h
+#define dealii__mesh_worker_dof_info_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/dofs/block_info.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/local_results.h>
+#include <deal.II/meshworker/vector_selector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  template <int dim, class DOFINFO> class DoFInfoBox;
+
+
+  /**
+   * A class containing information on geometry and degrees of freedom of a
+   * mesh object.
+   *
+   * The information in these objects is usually used by one of the Assembler
+   * classes. It is also the kind of information which is needed in mesh based
+   * matrices (often referred to as matrix free methods).
+   *
+   * In addition to the information on degrees of freedom stored in this
+   * class, it also provides the local computation space for the worker object
+   * operating on it in LocalResults. This base class will automatically be
+   * reinitialized on each cell, but initial setup is up to the user and
+   * should be done when initialize() for this class is called.
+   *
+   * This class operates in two different modes, corresponding to the data
+   * models discussed in the Assembler namespace documentation.
+   *
+   * The choice of the local data model is triggered by the vector
+   * BlockInfo::local_renumbering, which in turn is usually filled by
+   * BlockInfo::initialize_local(). If this function has been used, or the
+   * vector has been changed from zero-length, then local dof indices stored
+   * in this object will automatically be renumbered to reflect local block
+   * structure. This means, the first entries in @p indices will refer to the
+   * first block of the system, then comes the second block and so on.
+   *
+   * The BlockInfo object is stored as a pointer. Therefore, if the block
+   * structure changes, for instance because of mesh refinement, the DoFInfo
+   * class will automatically use the new structures.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template<int dim, int spacedim = dim, typename number = double>
+  class DoFInfo : public LocalResults<number>
+  {
+  public:
+    /// The current cell
+    typename Triangulation<dim, spacedim>::cell_iterator cell;
+
+    /// The current face
+    typename Triangulation<dim, spacedim>::face_iterator face;
+
+    /**
+     * The number of the current face on the current cell.
+     *
+     * This number is numbers::invalid_unsigned_int if the info object was
+     * initialized with a cell.
+     */
+    unsigned int face_number;
+
+    /**
+     * The number of the current subface on the current face
+     *
+     * This number is numbers::invalid_unsigned_int if the info object was not
+     * initialized with a subface.
+     */
+    unsigned int sub_number;
+
+    /*
+     * The DoF indices of the
+     * current cell
+     */
+    std::vector<types::global_dof_index> indices;
+
+    /**
+     * The DoF indices on the current cell, organized by local blocks. The
+     * size of this vector is zero, unless local blocks are used.
+     */
+    std::vector<std::vector<types::global_dof_index> > indices_by_block;
+
+    /**
+     * Constructor setting the #block_info pointer.
+     */
+    DoFInfo(const BlockInfo &block_info);
+
+    /**
+     * Constructor leaving the #block_info pointer empty, but setting the
+     * #aux_local_indices.
+     */
+    DoFInfo (const DoFHandler<dim, spacedim> &dof_handler);
+
+    /**
+     * Set the current cell and fill @p indices.
+     */
+    template <class DHCellIterator>
+    void reinit(const DHCellIterator &c);
+
+    /**
+     * Set the current face and fill @p indices if the #cell changed.
+     */
+    template <class DHCellIterator, class DHFaceIterator>
+    void reinit(const DHCellIterator &c,
+                const DHFaceIterator &f,
+                const unsigned int face_no);
+
+    /**
+     * Set the current subface and fill @p indices if the #cell changed.
+     */
+    template <class DHCellIterator, class DHFaceIterator>
+    void reinit(const DHCellIterator &c,
+                const DHFaceIterator &f,
+                const unsigned int face_no,
+                const unsigned int subface_no);
+
+    /**
+     * Switch to a new face of the same cell. Does not change @p indices and
+     * does not reset data in LocalResults.
+     */
+    template <class DHFaceIterator>
+    void set_face (const DHFaceIterator &f,
+                   const unsigned int face_no);
+
+    /**
+     * Switch to a new subface of the same cell. Does not change @p indices
+     * and does not reset data in LocalResults.
+     */
+    template <class DHFaceIterator>
+    void set_subface (const DHFaceIterator &f,
+                      const unsigned int face_no,
+                      const unsigned int subface_no);
+
+    const BlockIndices &local_indices() const;
+
+
+    /// The block structure of the system
+    SmartPointer<const BlockInfo,DoFInfo<dim,spacedim> > block_info;
+
+    /**
+     * The structure refers to a cell with level data instead of active data.
+     */
+    bool level_cell;
+
+  private:
+    /**
+     * Standard constructor, not setting any block indices. Use of this
+     * constructor is not recommended, but it is needed for the arrays in
+     * DoFInfoBox.
+     */
+    DoFInfo ();
+
+    /// Set up local block indices
+    void set_block_indices ();
+
+    /// Fill index vector with active indices
+    template <class DHCellIterator>
+    void get_indices(const DHCellIterator &c);
+
+    /// Auxiliary vector
+    std::vector<types::global_dof_index> indices_org;
+
+    /**
+     * An auxiliary local BlockIndices object created if #block_info is not
+     * set. It contains just a single block of the size of degrees of freedom
+     * per cell.
+     */
+    BlockIndices aux_local_indices;
+
+    friend class DoFInfoBox<dim, DoFInfo<dim, spacedim, number> >;
+  };
+
+
+  /**
+   * A class bundling the MeshWorker::DoFInfo objects used on a cell.
+   *
+   * @todo Currently, we are storing an object for the cells and two for each
+   * face. We could gather all face data pertaining to the cell itself in one
+   * object, saving a bit of memory and a few operations, but sacrificing some
+   * cleanliness.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2010
+   */
+  template <int dim, class DOFINFO>
+  class DoFInfoBox
+  {
+  public:
+    /**
+     * Constructor copying the seed into all other objects.
+     */
+    DoFInfoBox(const DOFINFO &seed);
+
+    /**
+     * Copy constructor, taking #cell and using it as a seed in the other
+     * constructor.
+     */
+    DoFInfoBox(const DoFInfoBox<dim, DOFINFO> &);
+
+    /**
+     * Reset all the availability flags.
+     */
+    void reset();
+
+    /**
+     * After all info objects have been filled appropriately, use the
+     * ASSEMBLER object to assemble them into the global data. See
+     * MeshWorker::Assembler for available classes.
+     */
+    template <class ASSEMBLER>
+    void assemble(ASSEMBLER &ass) const;
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+
+
+    /**
+     * The data for the cell.
+     */
+    DOFINFO cell;
+    /**
+     * The data for the faces from inside.
+     */
+    DOFINFO interior[GeometryInfo<dim>::faces_per_cell];
+    /**
+     * The data for the faces from outside.
+     */
+    DOFINFO exterior[GeometryInfo<dim>::faces_per_cell];
+
+    /**
+     * A set of flags, indicating whether data on an interior face is
+     * available.
+     */
+    bool interior_face_available[GeometryInfo<dim>::faces_per_cell];
+
+    /**
+     * A set of flags, indicating whether data on an exterior face is
+     * available.
+     */
+    bool exterior_face_available[GeometryInfo<dim>::faces_per_cell];
+
+    /**
+     * A flag to specify if the current object has been set to a valid cell.
+     */
+    bool cell_valid;
+  };
+
+//----------------------------------------------------------------------//
+
+  template <int dim, int spacedim, typename number>
+  DoFInfo<dim,spacedim,number>::DoFInfo(const DoFHandler<dim,spacedim> &dof_handler)
+    :
+    level_cell (false)
+  {
+    std::vector<types::global_dof_index> aux(1);
+    aux[0] = dof_handler.get_fe().dofs_per_cell;
+    aux_local_indices.reinit(aux);
+  }
+
+
+  template <int dim, int spacedim, typename number>
+  template <class DHCellIterator>
+  inline void
+  DoFInfo<dim,spacedim,number>::get_indices(const DHCellIterator &c)
+  {
+    indices.resize(c->get_fe().dofs_per_cell);
+    if (block_info == 0 || block_info->local().size() == 0)
+      c->get_active_or_mg_dof_indices(indices);
+    else
+      {
+        indices_org.resize(c->get_fe().dofs_per_cell);
+        c->get_active_or_mg_dof_indices(indices_org);
+        set_block_indices();
+      }
+  }
+
+
+  template <int dim, int spacedim, typename number>
+  template <class DHCellIterator>
+  inline void
+  DoFInfo<dim,spacedim,number>::reinit(const DHCellIterator &c)
+  {
+    get_indices(c);
+    level_cell = c->is_level_cell();
+
+    cell = typename Triangulation<dim,spacedim>::cell_iterator(*c);
+    face_number = numbers::invalid_unsigned_int;
+    sub_number = numbers::invalid_unsigned_int;
+    if (block_info)
+      LocalResults<number>::reinit(block_info->local());
+    else
+      LocalResults<number>::reinit(aux_local_indices);
+  }
+
+
+  template<int dim, int spacedim, typename number>
+  template <class DHFaceIterator>
+  inline void
+  DoFInfo<dim,spacedim,number>::set_face(
+    const DHFaceIterator &f,
+    const unsigned int face_no)
+  {
+    face = static_cast<typename Triangulation<dim>::face_iterator> (f);
+    face_number = face_no;
+    sub_number = numbers::invalid_unsigned_int;
+  }
+
+
+  template<int dim, int spacedim, typename number>
+  template <class DHCellIterator, class DHFaceIterator>
+  inline void
+  DoFInfo<dim,spacedim,number>::reinit(
+    const DHCellIterator &c,
+    const DHFaceIterator &f,
+    const unsigned int face_no)
+  {
+    if ((cell.state() != IteratorState::valid)
+        ||  cell != typename Triangulation<dim>::cell_iterator(*c))
+      get_indices(c);
+    level_cell = c->is_level_cell();
+
+    cell = typename Triangulation<dim>::cell_iterator(*c);
+    set_face(f,face_no);
+
+    if (block_info)
+      LocalResults<number>::reinit(block_info->local());
+    else
+      LocalResults<number>::reinit(aux_local_indices);
+  }
+
+
+  template<int dim, int spacedim, typename number>
+  template <class DHFaceIterator>
+  inline void
+  DoFInfo<dim,spacedim,number>::set_subface(
+    const DHFaceIterator &f,
+    const unsigned int face_no,
+    const unsigned int subface_no)
+  {
+    face = static_cast<typename Triangulation<dim>::face_iterator> (f);
+    face_number = face_no;
+    sub_number = subface_no;
+  }
+
+
+  template<int dim, int spacedim, typename number>
+  template <class DHCellIterator, class DHFaceIterator>
+  inline void
+  DoFInfo<dim,spacedim,number>::reinit(
+    const DHCellIterator &c,
+    const DHFaceIterator &f,
+    const unsigned int face_no,
+    const unsigned int subface_no)
+  {
+    if (cell.state() != IteratorState::valid
+        || cell != static_cast<typename Triangulation<dim>::cell_iterator> (c))
+      get_indices(c);
+    level_cell = c->is_level_cell();
+
+    cell = static_cast<typename Triangulation<dim>::cell_iterator> (c);
+    set_subface(f, face_no, subface_no);
+
+    if (block_info)
+      LocalResults<number>::reinit(block_info->local());
+    else
+      LocalResults<number>::reinit(aux_local_indices);
+  }
+
+
+  template<int dim, int spacedim, typename number>
+  inline const BlockIndices &
+  DoFInfo<dim,spacedim,number>::local_indices() const
+  {
+    if (block_info)
+      return block_info->local();
+    return aux_local_indices;
+  }
+
+//----------------------------------------------------------------------//
+
+  template <int dim, class DOFINFO>
+  inline
+  DoFInfoBox<dim, DOFINFO>::DoFInfoBox(const DOFINFO &seed)
+    :
+    cell(seed), cell_valid(true)
+  {
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      {
+        exterior[i] = seed;
+        interior[i] = seed;
+        interior_face_available[i] = false;
+        exterior_face_available[i] = false;
+      }
+  }
+
+
+  template <int dim, class DOFINFO>
+  inline
+  DoFInfoBox<dim, DOFINFO>::DoFInfoBox(const DoFInfoBox<dim, DOFINFO> &other)
+    :
+    cell(other.cell), cell_valid(other.cell_valid)
+  {
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      {
+        exterior[i] = other.exterior[i];
+        interior[i] = other.interior[i];
+        interior_face_available[i] = false;
+        exterior_face_available[i] = false;
+      }
+  }
+
+
+  template <int dim, class DOFINFO>
+  inline void
+  DoFInfoBox<dim, DOFINFO>::reset ()
+  {
+    cell_valid = false;
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      {
+        interior_face_available[i] = false;
+        exterior_face_available[i] = false;
+      }
+  }
+
+
+  template <int dim, class DOFINFO>
+  template <class ASSEMBLER>
+  inline void
+  DoFInfoBox<dim, DOFINFO>::assemble (ASSEMBLER &assembler) const
+  {
+    if (!cell_valid)
+      return;
+
+    assembler.assemble(cell);
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      {
+        // Only do something if data available
+        if (interior_face_available[i])
+          {
+            // If both data
+            // available, it is an
+            // interior face
+            if (exterior_face_available[i])
+              assembler.assemble(interior[i], exterior[i]);
+            else
+              assembler.assemble(interior[i]);
+          }
+      }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/dof_info.templates.h b/include/deal.II/meshworker/dof_info.templates.h
new file mode 100644
index 0000000..e2ea7d8
--- /dev/null
+++ b/include/deal.II/meshworker/dof_info.templates.h
@@ -0,0 +1,60 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/base/quadrature_lib.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace MeshWorker
+{
+  template <int dim, int spacedim, typename number>
+  DoFInfo<dim,spacedim,number>::DoFInfo(const BlockInfo &info)
+    :
+    block_info(&info, typeid(*this).name()),
+    level_cell (false)
+  {
+    indices_by_block.resize(info.local().size());
+    for (unsigned int i=0; i<indices_by_block.size(); ++i)
+      indices_by_block[i].resize(info.local().block_size(i));
+  }
+
+
+  template <int dim, int spacedim, typename number>
+  DoFInfo<dim,spacedim,number>::DoFInfo()
+  {}
+
+
+  template <int dim, int spacedim, typename number>
+  void
+  DoFInfo<dim,spacedim,number>::set_block_indices()
+  {
+    for (unsigned int i=0; i<indices.size(); ++i)
+      {
+        const std::pair<unsigned int, unsigned int>
+        bi = block_info->local().global_to_local(this->block_info->renumber(i));
+        indices_by_block[bi.first][bi.second] = indices_org[i];
+      }
+    // Remove this after
+    // changing block codes
+    for (unsigned int i=0; i<indices.size(); ++i)
+      indices[this->block_info->renumber(i)] = indices_org[i];
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/include/deal.II/meshworker/functional.h b/include/deal.II/meshworker/functional.h
new file mode 100644
index 0000000..9e0b9b6
--- /dev/null
+++ b/include/deal.II/meshworker/functional.h
@@ -0,0 +1,272 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_functional_h
+#define dealii__mesh_worker_functional_h
+
+#include <deal.II/algorithms/any_data.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/multigrid/mg_constrained_dofs.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  namespace Assembler
+  {
+    /**
+     * The class assembling local contributions to a functional into global
+     * functionals.
+     *
+     *
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename number = double>
+    class Functional
+    {
+    public:
+      /**
+       * Initialize local data to store functionals. The number <tt>n</tt> is
+       * the number of functionals to be computed.
+       */
+      void initialize(const unsigned int n);
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face);
+
+      /**
+       * Assemble the local values into the global vectors.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble both local values into the global vectors.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+
+      /**
+       * The value of the ith entry in #results.
+       */
+      number operator() (const unsigned int i) const;
+    private:
+      /**
+       * The values into which the results are added.
+       */
+      std::vector<double> results;
+    };
+
+    /**
+     * Compute cell and face contributions of one or several functionals,
+     * typically for error estimates.
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename number = double>
+    class CellsAndFaces
+    {
+    public:
+      /**
+       * The initialization function, specifying the @p results vectors and
+       * whether face data should be collected separately.
+       *
+       * @p results should contain two block vectors named "cells" and "faces"
+       * (the latter only if @p separate_faces is true). In each of the two,
+       * each block should have equal size and be large enough to accommodate
+       * all user indices set in the cells and faces covered by the loop it is
+       * used in. Typically, for estimators, this is
+       * Triangulation::n_active_cells() and Triangulation::n_faces(),
+       * respectively.
+       *
+       * The use of BlockVector may seem cumbersome, but it allows us to
+       * assemble several functionals at the same time, one in each block. The
+       * typical situation for error estimate is just having a single block in
+       * each vector.
+       */
+      void initialize(AnyData &results, bool separate_faces = true);
+
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+      /**
+       * Assemble the local values into the global vectors.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble both local values into the global vectors.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+
+      /**
+       * The value of the ith entry in @p results.
+       */
+      number operator() (const unsigned int i) const;
+    private:
+      AnyData results;
+      bool separate_faces;
+    };
+//----------------------------------------------------------------------//
+
+    template <typename number>
+    inline void
+    Functional<number>::initialize(const unsigned int n)
+    {
+      results.resize(n);
+      std::fill(results.begin(), results.end(), 0.);
+    }
+
+
+    template <typename number>
+    template <class DOFINFO>
+    inline void
+    Functional<number>::initialize_info(DOFINFO &info, bool)
+    {
+      info.initialize_numbers(results.size());
+    }
+
+
+    template <typename number>
+    template <class DOFINFO>
+    inline void
+    Functional<number>::assemble(const DOFINFO &info)
+    {
+      for (unsigned int i=0; i<results.size(); ++i)
+        results[i] += info.value(i);
+    }
+
+
+    template <typename number>
+    template <class DOFINFO>
+    inline void
+    Functional<number>::assemble(const DOFINFO &info1,
+                                 const DOFINFO &info2)
+    {
+      for (unsigned int i=0; i<results.size(); ++i)
+        {
+          results[i] += info1.value(i);
+          results[i] += info2.value(i);
+        }
+    }
+
+
+    template <typename number>
+    inline number
+    Functional<number>::operator() (const unsigned int i) const
+    {
+      AssertIndexRange(i, results.size());
+      return results[i];
+    }
+
+//----------------------------------------------------------------------//
+
+    template <typename number>
+    inline void
+    CellsAndFaces<number>::initialize(AnyData &r, bool sep)
+    {
+      Assert(r.name(0) == "cells", AnyData::ExcNameMismatch(0, "cells"));
+      if (sep)
+        {
+          Assert(r.name(1) == "faces", AnyData::ExcNameMismatch(1, "faces"));
+          AssertDimension(r.entry<BlockVector<double>*>(0)->n_blocks(),
+                          r.entry<BlockVector<double>*>(1)->n_blocks());
+        }
+
+      results = r;
+      separate_faces = sep;
+    }
+
+    template <typename number>
+    template <class DOFINFO>
+    inline void
+    CellsAndFaces<number>::initialize_info(DOFINFO &info, bool) const
+    {
+      info.initialize_numbers(results.entry<BlockVector<double>*>(0)->n_blocks());
+    }
+
+
+    template <typename number>
+    template <class DOFINFO>
+    inline void
+    CellsAndFaces<number>::assemble(const DOFINFO &info)
+    {
+      BlockVector<double> *v;
+      if (separate_faces &&
+          info.face_number != numbers::invalid_unsigned_int)
+        v = results.entry<BlockVector<double>*>(1);
+      else
+        v = results.entry<BlockVector<double>*>(0);
+
+      for (unsigned int i=0; i<info.n_values(); ++i)
+        v->block(i)(info.cell->user_index()) += info.value(i);
+    }
+
+
+    template <typename number>
+    template <class DOFINFO>
+    inline void
+    CellsAndFaces<number>::assemble(const DOFINFO &info1,
+                                    const DOFINFO &info2)
+    {
+      for (unsigned int i=0; i<info1.n_values(); ++i)
+        {
+          if (separate_faces)
+            {
+              BlockVector<double> *v1 = results.entry<BlockVector<double>*>(1);
+              const double J = info1.value(i) + info2.value(i);
+              v1->block(i)(info1.face->user_index()) += J;
+              if (info2.face != info1.face)
+                v1->block(i)(info2.face->user_index()) += J;
+            }
+          else
+            {
+              BlockVector<double> *v0 = results.entry<BlockVector<double>*>(0);
+              v0->block(i)(info1.cell->user_index()) += .5*info1.value(i);
+              v0->block(i)(info2.cell->user_index()) += .5*info2.value(i);
+            }
+        }
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/integration_info.h b/include/deal.II/meshworker/integration_info.h
new file mode 100644
index 0000000..996a2d4
--- /dev/null
+++ b/include/deal.II/meshworker/integration_info.h
@@ -0,0 +1,867 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_integration_info_h
+#define dealii__mesh_worker_integration_info_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/dofs/block_info.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/meshworker/local_results.h>
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/vector_selector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  /**
+   * Class for objects handed to local integration functions.
+   *
+   * Objects of this class contain one or more objects of type FEValues,
+   * FEFaceValues or FESubfaceValues to be used in local integration. They are
+   * stored in an array of pointers to the base classes FEValuesBase. The
+   * template parameter VectorType allows the use of different data types for
+   * the global system.
+   *
+   * Additionally, this function contains space to store the values of finite
+   * element functions stored in #global_data in the quadrature points. These
+   * vectors are initialized automatically on each cell or face. In order to
+   * avoid initializing unused vectors, you can use initialize_selector() to
+   * select the vectors by name that you actually want to use.
+   *
+   * <h3>Integration models</h3>
+   *
+   * This class supports two local integration models, corresponding to the
+   * data models in the documentation of the Assembler namespace. One is the
+   * standard model suggested by the use of FESystem. Namely, there is one
+   * FEValuesBase object in this class, containing all shape functions of the
+   * whole system, and having as many components as the system. Using this
+   * model involves loops over all system shape functions. It requires to
+   * identify the system components for each shape function and to select the
+   * correct bilinear form, usually in an @p if or @p switch statement.
+   *
+   * The second integration model builds one FEValuesBase object per base
+   * element of the system. The degrees of freedom on each cell are renumbered
+   * by block, such that they represent the same block structure as the global
+   * system. Objects performing the integration can then process each block
+   * separately, which improves reusability of code considerably.
+   *
+   * @note As described in DoFInfo, the use of the local block model is
+   * triggered by calling BlockInfo::initialize_local() before using
+   * initialize() in this class.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template<int dim, int spacedim = dim>
+  class IntegrationInfo
+  {
+  private:
+    /// vector of FEValues objects
+    std::vector<std_cxx11::shared_ptr<FEValuesBase<dim, spacedim> > > fevalv;
+  public:
+    static const unsigned int dimension = dim;
+    static const unsigned int space_dimension = spacedim;
+
+    /**
+     * Constructor.
+     */
+    IntegrationInfo();
+
+    /**
+     * Copy constructor, creating a clone to be used by WorkStream::run().
+     */
+    IntegrationInfo(const IntegrationInfo<dim, spacedim> &other);
+
+    /**
+     * Build all internal structures, in particular the FEValuesBase objects
+     * and allocate space for data vectors.
+     *
+     * @param el is the finite element of the DoFHandler.
+     *
+     * @param mapping is the Mapping object used to map the mesh cells.
+     *
+     * @param quadrature is a Quadrature formula used in the constructor of
+     * the FEVALUES objects.
+     *
+     * @param flags are the UpdateFlags used in the constructor of the
+     * FEVALUES objects.
+     *
+     * @param local_block_info is an optional parameter for systems of PDE. If
+     * it is provided with reasonable data, then the degrees of freedom on the
+     * cells will be re-ordered to reflect the block structure of the system.
+     */
+    template <class FEVALUES>
+    void initialize(const FiniteElement<dim,spacedim> &el,
+                    const Mapping<dim,spacedim> &mapping,
+                    const Quadrature<FEVALUES::integral_dimension> &quadrature,
+                    const UpdateFlags flags,
+                    const BlockInfo *local_block_info = 0);
+
+    /**
+     * Initialize the data vector and cache the selector.
+     */
+    void initialize_data(const std_cxx11::shared_ptr<VectorDataBase<dim,spacedim> > &data);
+
+    /**
+     * Delete the data created by initialize().
+     */
+    void clear();
+
+    /**
+     * Return a reference to the FiniteElement that was used to initialize
+     * this object.
+     */
+    const FiniteElement<dim, spacedim> &finite_element() const;
+
+    /// This is true if we are assembling for multigrid
+    bool multigrid;
+    /// Access to finite element
+    /**
+     * This is the access function being used, if initialize() for a single
+     * element was used (without the BlockInfo argument). It throws an
+     * exception, if applied to a vector of elements.
+     */
+    const FEValuesBase<dim, spacedim> &fe_values () const;
+
+    /// Access to finite elements
+    /**
+     * This access function must be used if the initialize() for a group of
+     * elements was used (with a valid BlockInfo object).
+     */
+    const FEValuesBase<dim, spacedim> &fe_values (const unsigned int i) const;
+
+    /**
+     * The vector containing the values of finite element functions in the
+     * quadrature points.
+     *
+     * There is one vector per selected finite element function, containing
+     * one vector for each component, containing vectors with values for each
+     * quadrature point.
+     */
+    std::vector<std::vector<std::vector<double> > > values;
+
+    /**
+     * The vector containing the derivatives of finite element functions in
+     * the quadrature points.
+     *
+     * There is one vector per selected finite element function, containing
+     * one vector for each component, containing vectors with values for each
+     * quadrature point.
+     */
+    std::vector<std::vector<std::vector<Tensor<1,dim> > > > gradients;
+
+    /**
+     * The vector containing the second derivatives of finite element
+     * functions in the quadrature points.
+     *
+     * There is one vector per selected finite element function, containing
+     * one vector for each component, containing vectors with values for each
+     * quadrature point.
+     */
+    std::vector<std::vector<std::vector<Tensor<2,dim> > > > hessians;
+
+    /**
+     * Reinitialize internal data structures for use on a cell.
+     */
+    template <typename number>
+    void reinit(const DoFInfo<dim, spacedim, number> &i);
+
+    /**
+     * Use the finite element functions in #global_data and fill the vectors
+     * #values, #gradients and #hessians.
+     */
+    template<typename number>
+    void fill_local_data(const DoFInfo<dim, spacedim, number> &info, bool split_fevalues);
+
+    /**
+     * The global data vector used to compute function values in quadrature
+     * points.
+     */
+    std_cxx11::shared_ptr<VectorDataBase<dim, spacedim> > global_data;
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+
+  private:
+    /**
+     * The pointer to the (system) element used for initialization.
+     */
+    SmartPointer<const FiniteElement<dim, spacedim>, IntegrationInfo<dim, spacedim> > fe_pointer;
+
+    /**
+     * Use the finite element functions in #global_data and fill the vectors
+     * #values, #gradients and #hessians with values according to the
+     * selector.
+     */
+    template <typename TYPE>
+    void fill_local_data(
+      std::vector<std::vector<std::vector<TYPE> > > &data,
+      VectorSelector &selector,
+      bool split_fevalues) const;
+    /**
+     * Cache the number of components of the system element.
+     */
+    unsigned int n_components;
+  };
+
+  /**
+   * The object holding the scratch data for integrating over cells and faces.
+   * IntegrationInfoBox serves three main purposes:
+   *
+   * <ol>
+   * <li> It provides the interface needed by MeshWorker::loop(), namely the
+   * two functions post_cell() and post_faces(), as well as the data members
+   * #cell, #boundary, #face, #subface, and #neighbor.
+   *
+   * <li> It contains all information needed to initialize the FEValues and
+   * FEFaceValues objects in the IntegrationInfo data members.
+   *
+   * <li> It stores information on finite element vectors and whether their
+   * data should be used to compute values or derivatives of functions at
+   * quadrature points.
+   *
+   * <li> It makes educated guesses on quadrature rules and update flags, so
+   * that minimal code has to be written when default parameters are
+   * sufficient.
+   * </ol>
+   *
+   * In order to allow for sufficient generality, a few steps have to be
+   * undertaken to use this class.
+   *
+   * First, you should consider if you need values from any vectors in a
+   * AnyData object. If so, fill the VectorSelector objects #cell_selector,
+   * #boundary_selector and #face_selector with their names and the data type
+   * (value, gradient, Hessian) to be extracted.
+   *
+   * Afterwards, you will need to consider UpdateFlags for FEValues objects. A
+   * good start is initialize_update_flags(), which looks at the selectors
+   * filled before and adds all the flags needed to get the selection.
+   * Additional flags can be set with add_update_flags().
+   *
+   * Finally, we need to choose quadrature formulas. In the simplest case, you
+   * might be happy with the default settings, which are <i>n</i>-point Gauss
+   * formulas. If only derivatives of the shape functions are used
+   * (#update_values is not set) <i>n</i> equals the highest polynomial degree
+   * in the FiniteElement, if #update_values is set, <i>n</i> is one higher
+   * than this degree.  If you choose to use Gauss formulas of other size, use
+   * initialize_gauss_quadrature() with appropriate values. Otherwise, you can
+   * fill the variables #cell_quadrature, #boundary_quadrature and
+   * #face_quadrature directly.
+   *
+   * In order to save time, you can set the variables boundary_fluxes and
+   * interior_fluxes of the base class to false, thus telling the
+   * Meshworker::loop() not to loop over those faces.
+   *
+   * All the information in here is used to set up IntegrationInfo objects
+   * correctly, typically in an IntegrationInfoBox.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template <int dim, int spacedim=dim>
+  class IntegrationInfoBox
+  {
+  public:
+
+    /**
+     * The type of the info object for cells.
+     */
+    typedef IntegrationInfo<dim, spacedim> CellInfo;
+
+    /**
+     * Default constructor.
+     */
+    IntegrationInfoBox ();
+
+    /**
+     * Initialize the IntegrationInfo objects contained.
+     *
+     * Before doing so, add update flags necessary to produce the data needed
+     * and also set uninitialized quadrature rules to Gauss formulas, which
+     * integrate polynomial bilinear forms exactly.
+     */
+    void initialize(const FiniteElement<dim, spacedim> &el,
+                    const Mapping<dim, spacedim> &mapping,
+                    const BlockInfo *block_info = 0);
+
+    /**
+     * Initialize the IntegrationInfo objects contained.
+     *
+     * Before doing so, add update flags necessary to produce the data needed
+     * and also set uninitialized quadrature rules to Gauss formulas, which
+     * integrate polynomial bilinear forms exactly.
+     */
+    template <typename VectorType>
+    void initialize(const FiniteElement<dim, spacedim> &el,
+                    const Mapping<dim, spacedim>       &mapping,
+                    const AnyData                      &data,
+                    const VectorType                   &dummy,
+                    const BlockInfo                    *block_info = 0);
+    /**
+     * Initialize the IntegrationInfo objects contained.
+     *
+     * Before doing so, add update flags necessary to produce the data needed
+     * and also set uninitialized quadrature rules to Gauss formulas, which
+     * integrate polynomial bilinear forms exactly.
+     */
+    template <typename VectorType>
+    void initialize(const FiniteElement<dim, spacedim> &el,
+                    const Mapping<dim, spacedim>       &mapping,
+                    const AnyData                      &data,
+                    const MGLevelObject<VectorType>    &dummy,
+                    const BlockInfo                    *block_info = 0);
+    /**
+     * @name FEValues setup
+     */
+    /* @{ */
+
+    /**
+     * Call this function before initialize() in order to guess the update
+     * flags needed, based on the data selected.
+     *
+     * When computing face fluxes, we normally can use the geometry
+     * (integration weights and normal vectors) from the original cell and
+     * thus can avoid updating these values on the neighboring cell. Set
+     * <tt>neighbor_geometry</tt> to true in order to initialize these values
+     * as well.
+     */
+    void initialize_update_flags(bool neighbor_geometry = false);
+
+    /**
+     * Add FEValues UpdateFlags for integration on all objects (cells,
+     * boundary faces and all interior faces).
+     */
+    void add_update_flags_all (const UpdateFlags flags);
+
+    /**
+     * Add FEValues UpdateFlags for integration on cells.
+     */
+    void add_update_flags_cell(const UpdateFlags flags);
+
+    /**
+     * Add FEValues UpdateFlags for integration on boundary faces.
+     */
+    void add_update_flags_boundary(const UpdateFlags flags);
+
+    /**
+     * Add FEValues UpdateFlags for integration on interior faces.
+     */
+    void add_update_flags_face(const UpdateFlags flags);
+
+    /**
+     * Add additional update flags to the ones already set in this program.
+     * The four boolean flags indicate whether the additional flags should be
+     * set for cell, boundary, interelement face for the cell itself or
+     * neighbor cell, or any combination thereof.
+     */
+    void add_update_flags(const UpdateFlags flags,
+                          const bool cell = true,
+                          const bool boundary = true,
+                          const bool face = true,
+                          const bool neighbor = true);
+
+    /**
+     * Assign n-point Gauss quadratures to each of the quadrature rules. Here,
+     * a size of zero points means that no loop over these grid entities
+     * should be performed.
+     *
+     * If the parameter <tt>force</tt> is true, then all quadrature sets are
+     * filled with new quadrature rules. If it is false, then only empty rules
+     * are changed.
+     */
+    void initialize_gauss_quadrature(unsigned int n_cell_points,
+                                     unsigned int n_boundary_points,
+                                     unsigned int n_face_points,
+                                     const bool force = true);
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * The set of update flags for boundary cell integration.
+     *
+     * Defaults to #update_JxW_values.
+     */
+    UpdateFlags cell_flags;
+    /**
+     * The set of update flags for boundary face integration.
+     *
+     * Defaults to #update_JxW_values and #update_normal_vectors.
+     */
+    UpdateFlags boundary_flags;
+
+    /**
+     * The set of update flags for interior face integration.
+     *
+     * Defaults to #update_JxW_values and #update_normal_vectors.
+     */
+    UpdateFlags face_flags;
+
+    /**
+     * The set of update flags for interior face integration.
+     *
+     * Defaults to #update_default, since quadrature weights are taken from
+     * the other cell.
+     */
+    UpdateFlags neighbor_flags;
+
+    /**
+     * The quadrature rule used on cells.
+     */
+    Quadrature<dim> cell_quadrature;
+
+    /**
+     * The quadrature rule used on boundary faces.
+     */
+    Quadrature<dim-1> boundary_quadrature;
+
+    /**
+     * The quadrature rule used on interior faces.
+     */
+    Quadrature<dim-1> face_quadrature;
+    /* @} */
+
+    /**
+     * @name Data vectors
+     */
+    /* @{ */
+
+    /**
+     * Initialize the VectorSelector objects #cell_selector,
+     * #boundary_selector and #face_selector in order to save computational
+     * effort. If no selectors are used, then values for all named vectors in
+     * DoFInfo::global_data will be computed in all quadrature points.
+     *
+     * This function will also add UpdateFlags to the flags stored in this
+     * class.
+     */
+    /**
+     * Select the vectors from DoFInfo::global_data that should be computed in
+     * the quadrature points on cells.
+     */
+    MeshWorker::VectorSelector cell_selector;
+
+    /**
+     * Select the vectors from DoFInfo::global_data that should be computed in
+     * the quadrature points on boundary faces.
+     */
+    MeshWorker::VectorSelector boundary_selector;
+
+    /**
+     * Select the vectors from DoFInfo::global_data that should be computed in
+     * the quadrature points on interior faces.
+     */
+    MeshWorker::VectorSelector face_selector;
+
+    std_cxx11::shared_ptr<MeshWorker::VectorDataBase<dim, spacedim> > cell_data;
+    std_cxx11::shared_ptr<MeshWorker::VectorDataBase<dim, spacedim> > boundary_data;
+    std_cxx11::shared_ptr<MeshWorker::VectorDataBase<dim, spacedim> > face_data;
+    /* @} */
+
+    /**
+     * @name Interface for MeshWorker::loop()
+     */
+    /* @{ */
+    /**
+     * A callback function which is called in the loop over all cells, after
+     * the action on a cell has been performed and before the faces are dealt
+     * with.
+     *
+     * In order for this function to have this effect, at least either of the
+     * arguments <tt>boundary_worker</tt> or <tt>face_worker</tt> arguments of
+     * loop() should be nonzero. Additionally, <tt>cells_first</tt> should be
+     * true. If <tt>cells_first</tt> is false, this function is called before
+     * any action on a cell is taken.
+     *
+     * And empty function in this class, but can be replaced in other classes
+     * given to loop() instead.
+     *
+     * See loop() and cell_action() for more details of how this function can
+     * be used.
+     */
+    template <class DOFINFO>
+    void post_cell(const DoFInfoBox<dim, DOFINFO> &);
+
+    /**
+     * A callback function which is called in the loop over all cells, after
+     * the action on the faces of a cell has been performed and before the
+     * cell itself is dealt with (assumes <tt>cells_first</tt> is false).
+     *
+     * In order for this function to have a reasonable effect, at least either
+     * of the arguments <tt>boundary_worker</tt> or <tt>face_worker</tt>
+     * arguments of loop() should be nonzero. Additionally,
+     * <tt>cells_first</tt> should be false.
+     *
+     * And empty function in this class, but can be replaced in other classes
+     * given to loop() instead.
+     *
+     * See loop() and cell_action() for more details of how this function can
+     * be used.
+     */
+    template <class DOFINFO>
+    void post_faces(const DoFInfoBox<dim, DOFINFO> &);
+
+    /**
+     * The info object for a cell.
+     */
+    CellInfo cell;
+    /**
+     * The info object for a boundary face.
+     */
+    CellInfo boundary;
+    /**
+     * The info object for a regular interior face, seen from the first cell.
+     */
+    CellInfo face;
+    /**
+     * The info object for the refined side of an interior face seen from the
+     * first cell.
+     */
+    CellInfo subface;
+    /**
+     * The info object for an interior face, seen from the other cell.
+     */
+    CellInfo neighbor;
+
+    /* @} */
+  };
+
+
+//----------------------------------------------------------------------//
+
+  template<int dim, int sdim>
+  inline
+  IntegrationInfo<dim,sdim>::IntegrationInfo()
+    :
+    fevalv(0),
+    multigrid(false),
+    global_data(std_cxx11::shared_ptr<VectorDataBase<dim, sdim> >(new VectorDataBase<dim, sdim>))
+  {}
+
+
+  template<int dim, int sdim>
+  inline
+  IntegrationInfo<dim,sdim>::IntegrationInfo(const IntegrationInfo<dim,sdim> &other)
+    :
+    multigrid(other.multigrid),
+    values(other.values),
+    gradients(other.gradients),
+    hessians(other.hessians),
+    global_data(other.global_data),
+    fe_pointer(other.fe_pointer),
+    n_components(other.n_components)
+  {
+    fevalv.resize(other.fevalv.size());
+    for (unsigned int i=0; i<other.fevalv.size(); ++i)
+      {
+        const FEValuesBase<dim,sdim> &p = *other.fevalv[i];
+        const FEValues<dim,sdim> *pc = dynamic_cast<const FEValues<dim,sdim>*>(&p);
+        const FEFaceValues<dim,sdim> *pf = dynamic_cast<const FEFaceValues<dim,sdim>*>(&p);
+        const FESubfaceValues<dim,sdim> *ps = dynamic_cast<const FESubfaceValues<dim,sdim>*>(&p);
+
+        if (pc != 0)
+          fevalv[i] = std_cxx11::shared_ptr<FEValuesBase<dim,sdim> > (
+                        new FEValues<dim,sdim> (pc->get_mapping(), pc->get_fe(),
+                                                pc->get_quadrature(), pc->get_update_flags()));
+        else if (pf != 0)
+          fevalv[i] = std_cxx11::shared_ptr<FEValuesBase<dim,sdim> > (
+                        new FEFaceValues<dim,sdim> (pf->get_mapping(), pf->get_fe(), pf->get_quadrature(), pf->get_update_flags()));
+        else if (ps != 0)
+          fevalv[i] = std_cxx11::shared_ptr<FEValuesBase<dim,sdim> > (
+                        new FESubfaceValues<dim,sdim> (ps->get_mapping(), ps->get_fe(), ps->get_quadrature(), ps->get_update_flags()));
+        else
+          Assert(false, ExcInternalError());
+      }
+  }
+
+
+
+  template<int dim, int sdim>
+  template <class FEVALUES>
+  inline void
+  IntegrationInfo<dim,sdim>::initialize(
+    const FiniteElement<dim,sdim> &el,
+    const Mapping<dim,sdim> &mapping,
+    const Quadrature<FEVALUES::integral_dimension> &quadrature,
+    const UpdateFlags flags,
+    const BlockInfo *block_info)
+  {
+    fe_pointer = ⪙
+    if (block_info == 0 || block_info->local().size() == 0)
+      {
+        fevalv.resize(1);
+        fevalv[0] = std_cxx11::shared_ptr<FEValuesBase<dim,sdim> > (
+                      new FEVALUES (mapping, el, quadrature, flags));
+      }
+    else
+      {
+        fevalv.resize(el.n_base_elements());
+        for (unsigned int i=0; i<fevalv.size(); ++i)
+          {
+            fevalv[i] = std_cxx11::shared_ptr<FEValuesBase<dim,sdim> > (
+                          new FEVALUES (mapping, el.base_element(i), quadrature, flags));
+          }
+      }
+    n_components = el.n_components();
+  }
+
+
+  template <int dim, int spacedim>
+  inline const FiniteElement<dim, spacedim> &
+  IntegrationInfo<dim,spacedim>::finite_element() const
+  {
+    Assert (fe_pointer !=0, ExcNotInitialized());
+    return *fe_pointer;
+  }
+
+  template <int dim, int spacedim>
+  inline const FEValuesBase<dim, spacedim> &
+  IntegrationInfo<dim,spacedim>::fe_values() const
+  {
+    AssertDimension(fevalv.size(), 1);
+    return *fevalv[0];
+  }
+
+
+  template <int dim, int spacedim>
+  inline const FEValuesBase<dim, spacedim> &
+  IntegrationInfo<dim,spacedim>::fe_values(unsigned int i) const
+  {
+    Assert (i<fevalv.size(), ExcIndexRange(i,0,fevalv.size()));
+    return *fevalv[i];
+  }
+
+
+  template <int dim, int spacedim>
+  template <typename number>
+  inline void
+  IntegrationInfo<dim,spacedim>::reinit(const DoFInfo<dim, spacedim, number> &info)
+  {
+    for (unsigned int i=0; i<fevalv.size(); ++i)
+      {
+        FEValuesBase<dim, spacedim> &febase = *fevalv[i];
+        if (info.sub_number != numbers::invalid_unsigned_int)
+          {
+            // This is a subface
+            FESubfaceValues<dim> &fe = dynamic_cast<FESubfaceValues<dim>&> (febase);
+            fe.reinit(info.cell, info.face_number, info.sub_number);
+          }
+        else if (info.face_number != numbers::invalid_unsigned_int)
+          {
+            // This is a face
+            FEFaceValues<dim> &fe = dynamic_cast<FEFaceValues<dim>&> (febase);
+            fe.reinit(info.cell, info.face_number);
+          }
+        else
+          {
+            // This is a cell
+            FEValues<dim,spacedim> &fe = dynamic_cast<FEValues<dim,spacedim>&> (febase);
+            fe.reinit(info.cell);
+          }
+      }
+
+    const bool split_fevalues = info.block_info != 0;
+    if (!global_data->empty())
+      fill_local_data(info, split_fevalues);
+  }
+
+
+
+
+//----------------------------------------------------------------------//
+
+  template <int dim, int sdim>
+  inline
+  void
+  IntegrationInfoBox<dim,sdim>::initialize_gauss_quadrature(
+    unsigned int cp,
+    unsigned int bp,
+    unsigned int fp,
+    bool force)
+  {
+    if (force || cell_quadrature.size() == 0)
+      cell_quadrature = QGauss<dim>(cp);
+    if (force || boundary_quadrature.size() == 0)
+      boundary_quadrature = QGauss<dim-1>(bp);
+    if (force || face_quadrature.size() == 0)
+      face_quadrature = QGauss<dim-1>(fp);
+  }
+
+
+  template <int dim, int sdim>
+  inline
+  void
+  IntegrationInfoBox<dim,sdim>::add_update_flags_all (const UpdateFlags flags)
+  {
+    add_update_flags(flags, true, true, true, true);
+  }
+
+
+  template <int dim, int sdim>
+  inline
+  void
+  IntegrationInfoBox<dim,sdim>::add_update_flags_cell (const UpdateFlags flags)
+  {
+    add_update_flags(flags, true, false, false, false);
+  }
+
+
+  template <int dim, int sdim>
+  inline
+  void
+  IntegrationInfoBox<dim,sdim>::add_update_flags_boundary (const UpdateFlags flags)
+  {
+    add_update_flags(flags, false, true, false, false);
+  }
+
+
+  template <int dim, int sdim>
+  inline
+  void
+  IntegrationInfoBox<dim,sdim>::add_update_flags_face (const UpdateFlags flags)
+  {
+    add_update_flags(flags, false, false, true, true);
+  }
+
+
+  template <int dim, int sdim>
+  inline
+  void
+  IntegrationInfoBox<dim,sdim>::initialize(
+    const FiniteElement<dim,sdim> &el,
+    const Mapping<dim,sdim> &mapping,
+    const BlockInfo *block_info)
+  {
+    initialize_update_flags();
+    initialize_gauss_quadrature(
+      (cell_flags & update_values) ? (el.tensor_degree()+1) : el.tensor_degree(),
+      (boundary_flags & update_values) ? (el.tensor_degree()+1) : el.tensor_degree(),
+      (face_flags & update_values) ? (el.tensor_degree()+1) : el.tensor_degree(), false);
+
+    cell.template initialize<FEValues<dim,sdim> >(el, mapping, cell_quadrature,
+                                                  cell_flags, block_info);
+    boundary.template initialize<FEFaceValues<dim,sdim> >(el, mapping, boundary_quadrature,
+                                                          boundary_flags, block_info);
+    face.template initialize<FEFaceValues<dim,sdim> >(el, mapping, face_quadrature,
+                                                      face_flags, block_info);
+    subface.template initialize<FESubfaceValues<dim,sdim> >(el, mapping, face_quadrature,
+                                                            face_flags, block_info);
+    neighbor.template initialize<FEFaceValues<dim,sdim> >(el, mapping, face_quadrature,
+                                                          neighbor_flags, block_info);
+  }
+
+
+  template <int dim, int sdim>
+  template <typename VectorType>
+  void
+  IntegrationInfoBox<dim,sdim>::initialize
+  (const FiniteElement<dim,sdim> &el,
+   const Mapping<dim,sdim>       &mapping,
+   const AnyData                 &data,
+   const VectorType &,
+   const BlockInfo               *block_info)
+  {
+    initialize(el, mapping, block_info);
+    std_cxx11::shared_ptr<VectorData<VectorType, dim, sdim> > p;
+    VectorDataBase<dim,sdim> *pp;
+
+    p = std_cxx11::shared_ptr<VectorData<VectorType, dim, sdim> >(new VectorData<VectorType, dim, sdim> (cell_selector));
+    // Public member function of parent class was not found without
+    // explicit cast
+    pp = &*p;
+    pp->initialize(data);
+    cell_data = p;
+    cell.initialize_data(p);
+
+    p = std_cxx11::shared_ptr<VectorData<VectorType, dim, sdim> >(new VectorData<VectorType, dim, sdim> (boundary_selector));
+    pp = &*p;
+    pp->initialize(data);
+    boundary_data = p;
+    boundary.initialize_data(p);
+
+    p = std_cxx11::shared_ptr<VectorData<VectorType, dim, sdim> >(new VectorData<VectorType, dim, sdim> (face_selector));
+    pp = &*p;
+    pp->initialize(data);
+    face_data = p;
+    face.initialize_data(p);
+    subface.initialize_data(p);
+    neighbor.initialize_data(p);
+  }
+
+  template <int dim, int sdim>
+  template <typename VectorType>
+  void
+  IntegrationInfoBox<dim,sdim>::initialize
+  (const FiniteElement<dim,sdim>   &el,
+   const Mapping<dim,sdim>         &mapping,
+   const AnyData                   &data,
+   const MGLevelObject<VectorType> &,
+   const BlockInfo                 *block_info)
+  {
+    initialize(el, mapping, block_info);
+    std_cxx11::shared_ptr<MGVectorData<VectorType, dim, sdim> > p;
+    VectorDataBase<dim,sdim> *pp;
+
+    p = std_cxx11::shared_ptr<MGVectorData<VectorType, dim, sdim> >(new MGVectorData<VectorType, dim, sdim> (cell_selector));
+    // Public member function of parent class was not found without
+    // explicit cast
+    pp = &*p;
+    pp->initialize(data);
+    cell_data = p;
+    cell.initialize_data(p);
+
+    p = std_cxx11::shared_ptr<MGVectorData<VectorType, dim, sdim> >(new MGVectorData<VectorType, dim, sdim> (boundary_selector));
+    pp = &*p;
+    pp->initialize(data);
+    boundary_data = p;
+    boundary.initialize_data(p);
+
+    p = std_cxx11::shared_ptr<MGVectorData<VectorType, dim, sdim> >(new MGVectorData<VectorType, dim, sdim> (face_selector));
+    pp = &*p;
+    pp->initialize(data);
+    face_data = p;
+    face.initialize_data(p);
+    subface.initialize_data(p);
+    neighbor.initialize_data(p);
+  }
+
+  template <int dim, int sdim>
+  template <class DOFINFO>
+  void
+  IntegrationInfoBox<dim,sdim>::post_cell(const DoFInfoBox<dim, DOFINFO> &)
+  {}
+
+
+  template <int dim, int sdim>
+  template <class DOFINFO>
+  void
+  IntegrationInfoBox<dim,sdim>::post_faces(const DoFInfoBox<dim, DOFINFO> &)
+  {}
+
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/integration_info.templates.h b/include/deal.II/meshworker/integration_info.templates.h
new file mode 100644
index 0000000..ed25ee5
--- /dev/null
+++ b/include/deal.II/meshworker/integration_info.templates.h
@@ -0,0 +1,231 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/integration_info.h>
+#include <deal.II/base/quadrature_lib.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace MeshWorker
+{
+  template<int dim, int sdim>
+  void
+  IntegrationInfo<dim,sdim>::initialize_data(
+    const std_cxx11::shared_ptr<VectorDataBase<dim,sdim> > &data)
+  {
+    global_data = data;
+    const unsigned int nqp = fevalv[0]->n_quadrature_points;
+
+    values.resize(global_data->n_values());
+    // For all selected finite
+    // element functions
+    for (unsigned int i=0; i<values.size(); ++i)
+      {
+        values[i].resize(n_components);
+        // For all components
+        for (unsigned int j=0; j<values[i].size(); ++j)
+          {
+            values[i][j].resize(nqp);
+          }
+      }
+
+    gradients.resize(global_data->n_gradients());
+    // For all selected finite
+    // element functions
+    for (unsigned int i=0; i<gradients.size(); ++i)
+      {
+        gradients[i].resize(n_components);
+        // For all components
+        for (unsigned int j=0; j<gradients[i].size(); ++j)
+          {
+            gradients[i][j].resize(nqp);
+          }
+      }
+
+    hessians.resize(global_data->n_hessians());
+    // For all selected finite
+    // element functions
+    for (unsigned int i=0; i<hessians.size(); ++i)
+      {
+        hessians[i].resize(n_components);
+        // For all components
+        for (unsigned int j=0; j<hessians[i].size(); ++j)
+          {
+            hessians[i][j].resize(nqp);
+          }
+      }
+  }
+
+
+  template<int dim, int sdim>
+  void
+  IntegrationInfo<dim,sdim>::clear()
+  {
+    fevalv.resize(0);
+  }
+
+
+
+  template<int dim, int sdim>
+  template <typename number>
+  void
+  IntegrationInfo<dim,sdim>::fill_local_data(const DoFInfo<dim, sdim, number> &info, bool split_fevalues)
+  {
+    if (split_fevalues)
+      {
+        unsigned int comp = 0;
+        // Loop over all blocks
+        for (unsigned int b=0; b<info.block_info->local().size(); ++b)
+          {
+            const unsigned int fe_no = info.block_info->base_element(b);
+            const FEValuesBase<dim,sdim> &fe = this->fe_values(fe_no);
+            const unsigned int n_comp = fe.get_fe().n_components();
+            const unsigned int block_start = info.block_info->local().block_start(b);
+            const unsigned int block_size = info.block_info->local().block_size(b);
+
+            if (info.level_cell)
+              this->global_data->mg_fill(values, gradients, hessians, fe, info.cell->level(), info.indices,
+                                         comp, n_comp, block_start, block_size);
+            else
+              this->global_data->fill(values, gradients, hessians, fe, info.indices,
+                                      comp, n_comp, block_start, block_size);
+            comp += n_comp;
+          }
+      }
+    else
+      {
+        const FEValuesBase<dim,sdim> &fe = this->fe_values(0);
+        const unsigned int n_comp = fe.get_fe().n_components();
+        if (info.level_cell)
+          this->global_data->mg_fill(values, gradients, hessians, fe, info.cell->level(), info.indices,
+                                     0, n_comp, 0, info.indices.size());
+        else
+          this->global_data->fill(values, gradients, hessians, fe, info.indices,
+                                  0, n_comp, 0, info.indices.size());
+      }
+  }
+
+
+  template<int dim, int sdim>
+  std::size_t
+  IntegrationInfo<dim,sdim>::memory_consumption () const
+  {
+    std::size_t mem = sizeof(*this)
+                      + MemoryConsumption::memory_consumption(fevalv)
+                      - sizeof (fevalv);
+    for (unsigned int i=0; i<fevalv.size(); ++i)
+      mem += fevalv[i]->memory_consumption();
+    return mem;
+  }
+
+//----------------------------------------------------------------------//
+
+  template<int dim, int sdim>
+  IntegrationInfoBox<dim,sdim>::IntegrationInfoBox()
+  {
+    cell_flags = update_default;
+    boundary_flags = update_default;
+    face_flags = update_default;
+    neighbor_flags = update_default;
+  }
+
+
+  template<int dim, int sdim>
+  void
+  IntegrationInfoBox<dim,sdim>::initialize_update_flags (bool neighbor_geometry)
+  {
+    cell_flags |= update_JxW_values;
+    boundary_flags |= UpdateFlags(update_JxW_values | update_normal_vectors);
+    face_flags |= boundary_flags;
+    neighbor_flags |= neighbor_geometry
+                      ? boundary_flags
+                      : update_default;
+
+    if (cell_selector.has_values() != 0) cell_flags |= update_values;
+    if (cell_selector.has_gradients() != 0) cell_flags |= update_gradients;
+    if (cell_selector.has_hessians() != 0) cell_flags |= update_hessians;
+
+    if (boundary_selector.has_values() != 0) boundary_flags |= update_values;
+    if (boundary_selector.has_gradients() != 0) boundary_flags |= update_gradients;
+    if (boundary_selector.has_hessians() != 0) boundary_flags |= update_hessians;
+
+    if (face_selector.has_values() != 0) face_flags |= update_values;
+    if (face_selector.has_gradients() != 0) face_flags |= update_gradients;
+    if (face_selector.has_hessians() != 0) face_flags |= update_hessians;
+
+    if (face_selector.has_values() != 0) neighbor_flags |= update_values;
+    if (face_selector.has_gradients() != 0) neighbor_flags |= update_gradients;
+    if (face_selector.has_hessians() != 0) neighbor_flags |= update_hessians;
+  }
+
+
+  template <int dim, int sdim>
+  void
+  IntegrationInfoBox<dim,sdim>::add_update_flags(
+    const UpdateFlags flags,
+    bool cell,
+    bool boundary,
+    bool face,
+    bool neighbor)
+  {
+    if (cell) cell_flags |= flags;
+    if (boundary) boundary_flags |= flags;
+    if (face) face_flags |= flags;
+    if (neighbor) neighbor_flags |= flags;
+  }
+
+
+  template<int dim, int sdim>
+  std::size_t
+  IntegrationInfoBox<dim,sdim>::memory_consumption () const
+  {
+    std::size_t mem = sizeof(*this)
+                      + MemoryConsumption::memory_consumption(cell_quadrature)
+                      - sizeof (cell_quadrature)
+                      + MemoryConsumption::memory_consumption(boundary_quadrature)
+                      - sizeof (boundary_quadrature)
+                      + MemoryConsumption::memory_consumption(face_quadrature)
+                      - sizeof (face_quadrature)
+                      + MemoryConsumption::memory_consumption(cell_selector)
+                      -sizeof (cell_selector)
+                      + MemoryConsumption::memory_consumption(boundary_selector)
+                      -sizeof (boundary_selector)
+                      + MemoryConsumption::memory_consumption(face_selector)
+                      -sizeof (face_selector)
+                      + MemoryConsumption::memory_consumption(cell)
+                      - sizeof(cell)
+                      + MemoryConsumption::memory_consumption(boundary)
+                      - sizeof(boundary)
+                      + MemoryConsumption::memory_consumption(face)
+                      - sizeof(face)
+                      + MemoryConsumption::memory_consumption(subface)
+                      - sizeof(subface)
+                      + MemoryConsumption::memory_consumption(neighbor)
+                      - sizeof(neighbor);
+//   if (cell_data != 0)
+//     mem += MemoryConsumption::memory_consumption(*cell_data);
+//   if (boundary_data != 0)
+//     mem += MemoryConsumption::memory_consumption(*boundary_data);
+//   if (face_data != 0)
+//     mem += MemoryConsumption::memory_consumption(*face_data);
+
+    return mem;
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/meshworker/local_integrator.h b/include/deal.II/meshworker/local_integrator.h
new file mode 100644
index 0000000..fcb0b5a
--- /dev/null
+++ b/include/deal.II/meshworker/local_integrator.h
@@ -0,0 +1,146 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_local_integrator_h
+#define dealii__mesh_worker_local_integrator_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/std_cxx11/function.h>
+
+#include <vector>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  template <int dim, int spacedim, typename number> class DoFInfo;
+  template <int dim, int spacedim> class IntegrationInfo;
+
+  /**
+   * A local integrator object, which can be used to simplify the call of
+   * loop(). Instead of providing the three local integration functions
+   * separately, we bundle them as virtual functions in this class.
+   *
+   * Additionally, since we cannot have a virtual null function, we provide
+   * flags, which allow us to indicate, whether we want to integrate on
+   * boundary and interior faces. These flags are true by default, but can be
+   * modified by applications to speed up the loop.
+   *
+   * If a function is not overloaded in a derived class, but its usage flag is
+   * true, the function will cause an exception ExcPureFunction.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat
+   * @date 2012
+   */
+  template <int dim, int spacedim=dim, typename number=double>
+  class LocalIntegrator : public Subscriptor
+  {
+  public:
+    /**
+     * The constructor setting default values, namely all integration flags to
+     * true.
+     */
+    LocalIntegrator();
+
+    /**
+     * The constructor setting integration flags to specified values.
+     */
+    LocalIntegrator(bool use_cell, bool use_boundary, bool use_face);
+
+    /**
+     * The empty virtual destructor.
+     */
+    ~LocalIntegrator();
+
+    /**
+     * Virtual function for integrating on cells. Throws exception
+     * PureFunctionCalled if not overloaded by a derived class.
+     */
+    virtual void cell(DoFInfo<dim, spacedim, number> &dinfo,
+                      IntegrationInfo<dim, spacedim> &info) const;
+    /**
+     * Virtual function for integrating on boundary faces. Throws exception
+     * PureFunctionCalled if not overloaded by a derived class.
+     */
+    virtual void boundary(DoFInfo<dim, spacedim, number> &dinfo,
+                          IntegrationInfo<dim, spacedim> &info) const;
+    /**
+     * Virtual function for integrating on interior faces. Throws exception
+     * PureFunctionCalled if not overloaded by a derived class.
+     */
+    virtual void face(DoFInfo<dim, spacedim, number> &dinfo1,
+                      DoFInfo<dim, spacedim, number> &dinfo2,
+                      IntegrationInfo<dim, spacedim> &info1,
+                      IntegrationInfo<dim, spacedim> &info2) const;
+
+    /**
+     * The flag indicating whether the cell integrator cell() is to be used in
+     * the loop. Defaults to <tt>true</tt>.
+     */
+    bool use_cell;
+
+    /**
+     * The flag indicating whether the boundary integrator boundary() is to be
+     * used in the loop. Defaults to <tt>true</tt>.
+     */
+    bool use_boundary;
+
+    /**
+     * The flag indicating whether the interior face integrator face() is to
+     * be used in the loop. Defaults to <tt>true</tt>.
+     */
+    bool use_face;
+
+    /**
+     * The names of the input vectors. If this vector is nonempty, it can be
+     * used by application programs to automatically select and verify the
+     * input vectors used for integration.
+     *
+     * @note This variable is currently not used by the library, but it is
+     * provided to help develop application programs.
+     */
+    std::vector<std::string> input_vector_names;
+
+    /**
+     * The names of the results produced. If this vector is nonempty, it can
+     * be used by application programs to automatically assign names to output
+     * values and/or verify the names of vectors.
+     *
+     * @note This variable is currently not used by the library, but it is
+     * provided to help develop application programs.
+     */
+    std::vector<std::string> output_names;
+
+    /**
+     * This error is thrown if one of the virtual functions cell(),
+     * boundary(), or face() is called without being overloaded in a derived
+     * class. Consider setting #use_cell, #use_boundary, and #use_face to
+     * false, respectively.
+     *
+     * @ingroup Exceptions
+     */
+    DeclException0(ExcPureFunction);
+  };
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/local_results.h b/include/deal.II/meshworker/local_results.h
new file mode 100644
index 0000000..d9427c0
--- /dev/null
+++ b/include/deal.II/meshworker/local_results.h
@@ -0,0 +1,673 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_local_results_h
+#define dealii__mesh_worker_local_results_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/function.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/lac/matrix_block.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/meshworker/vector_selector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+class BlockIndices;
+template<int,int> class DoFHandler;
+
+
+/**
+ * A collection of functions and classes for the mesh loops that are an
+ * ubiquitous part of each finite element program.
+ *
+ * The workhorse of this namespace is the loop() function, which implements a
+ * completely generic loop over all mesh cells. Since the calls to loop() are
+ * error-prone due to its generality, for many applications it is advisable to
+ * derive a class from MeshWorker::LocalIntegrator and use the less general
+ * integration_loop() instead.
+ *
+ * The loop() depends on certain objects handed to it as arguments. These
+ * objects are of two types, info objects like DoFInfo and IntegrationInfo and
+ * worker objects like LocalWorker and IntegrationWorker.
+ *
+ * Worker objects usually do two different jobs: first, they compute the local
+ * contribution of a cell or face to the global operation. Second, they
+ * assemble this local contribution into the global result, whether a
+ * functional, a form or a bilinear form. While the first job is particular to
+ * the problem being solved, the second is generic and only depends on the
+ * data structures. Therefore, base classes for workers assembling into global
+ * data are provided in the namespace Assembler.
+ *
+ * <h3>Template argument types</h3>
+ *
+ * The functions loop() and cell_action() take some arguments which are
+ * template parameters. Let us list the minimum requirements for these classes
+ * here and describe their properties.
+ *
+ * <h4>ITERATOR</h4>
+ *
+ * Any object that has an <tt>operator++()</tt> and points to a
+ * TriaObjectAccessor.
+ *
+ * <h4>DOFINFO</h4>
+ *
+ * For an example implementation, refer to the class template DoFInfo. In
+ * order to work with cell_action() and loop(), DOFINFO needs to follow the
+ * following interface.
+ * @code
+ * class DOFINFO
+ * {
+ *   private:
+ *     DOFINFO();
+ *     DOFINFO(const DOFINFO&);
+ *     DOFINFO& operator=(const DOFINFO&);
+ *
+ *   public:
+ *     template <class CellIt>
+ *     void reinit(const CellIt& c);
+ *
+ *     template <class CellIt, class FaceIt>
+ *     void reinit(const CellIt& c, const FaceIt& f, unsigned int n);
+ *
+ *     template <class CellIt, class FaceIt>
+ *     void reinit(const CellIt& c, const FaceIt& f, unsigned int n,
+ *     unsigned int s);
+ *
+ *   friend template class DoFInfoBox<int dim, DOFINFO>;
+ * };
+ * @endcode
+ *
+ * The three private functions are called by DoFInfoBox and should not be
+ * needed elsewhere. Obviously, they can be made public and then the friend
+ * declaration at the end may be missing.
+ *
+ * Additionally, you will need at least one public constructor. Furthermore
+ * DOFINFO is pretty useless yet: functions to interface with INTEGRATIONINFO
+ * and ASSEMBLER are needed.
+ *
+ * DOFINFO objects are gathered in a DoFInfoBox. In those objects, we store
+ * the results of local operations on each cell and its faces. Once all this
+ * information has been gathered, an ASSEMBLER is used to assemble it into
+ * golbal data.
+ *
+ * <h4>INFOBOX</h4>
+ *
+ * This type is exemplified in IntegrationInfoBox. It collects the input data
+ * for actions on cells and faces in INFO objects (see below). It provides the
+ * following interface to loop() and cell_action():
+ *
+ * @code
+ * class INFOBOX
+ * {
+ *   public:
+ *     template <int dim, class DOFINFO>
+ *     void post_cell(const DoFInfoBox<dim, DOFINFO>&);
+ *
+ *     template <int dim, class DOFINFO>
+ *     void post_faces(const DoFInfoBox<dim, DOFINFO>&);
+ *
+ *     INFO cell;
+ *     INFO boundary;
+ *     INFO face;
+ *     INFO subface;
+ *     INFO neighbor;
+ * };
+ * @endcode
+ *
+ * The main purpose of this class is gathering the five INFO objects, which
+ * contain the temporary data used on each cell or face. The requirements on
+ * these objects are listed below. Here, we only note that there need to be
+ * these 5 objects with the names listed above.
+ *
+ * The two function templates are call back functions called in cell_action().
+ * The first is called before the faces are worked on, the second after the
+ * faces.
+ *
+ * <h4>INFO</h4>
+ *
+ * See IntegrationInfo for an example of these objects. They contain the
+ * temporary data needed on each cell or face to compute the result. The
+ * MeshWorker only uses the interface
+ *
+ * @code
+ * class INFO
+ * {
+ *   public:
+ *     void reinit(const DOFINFO& i);
+ * };
+ * @endcode
+ *
+ * <h3>Simplified interfaces</h3>
+ *
+ * Since the loop() is fairly general, a specialization integration_loop() is
+ * available, which is a wrapper around loop() with a simplified interface.
+ *
+ * The integration_loop() function loop takes most of the information that it
+ * needs to pass to loop() from an IntegrationInfoBox object. Its use is
+ * explained in step-12, but in short it requires functions that do the local
+ * integration on a cell, interior or boundary face, and it needs an object
+ * (called "assembler") that copies these local contributions into the global
+ * matrix and right hand side objects.
+ *
+ * Before we can run the integration loop, we have to initialize several data
+ * structures in our IntegrationWorker and assembler objects. For instance, we
+ * have to decide on the quadrature rule or we may need more than the default
+ * update flags.
+ *
+ * @ingroup MeshWorker
+ * @ingroup Integrators
+ * @author Guido Kanschat
+ * @date 2009
+ */
+namespace MeshWorker
+{
+  /**
+   * The class providing the scrapbook to fill with results of local
+   * integration. Depending on the task the mesh worker loop is performing,
+   * local results can be of different types. They have in common that they
+   * are the result of local integration over a cell or face. Their actual
+   * type is determined by the Assember using them. It is also the assembler
+   * setting the arrays of local results to the sizes needed. Here is a list
+   * of the provided data types and the assembers using them:
+   *
+   * <ol>
+   * <li> n_values() numbers accessed with value(), and stored in the data
+   * member #J.
+   *
+   * <li> n_vectors() vectors of the length of dofs on this cell, accessed by
+   * vector(), and stored in #R.
+   * <li> n_matrices() matrices of dimension dofs per cell in each direction,
+   * accessed by matrix() with second argument <tt>false</tt>. These are
+   * stored in #M1, and they are the matrices coupling degrees of freedom in
+   * the same cell. For fluxes across faces, there is an additional set #M2 of
+   * matrices of the same size, but the dimension of the matrices being
+   * according to the degrees of freedom on both cells. These are accessed
+   * with matrix(), using the second argument <tt>true</tt>.
+   * </ol>
+   *
+   * The local matrices initialized by reinit() of the info object and then
+   * assembled into the global system by Assembler classes.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template <typename number>
+  class LocalResults
+  {
+  public:
+    /**
+     * The number of scalar values.
+     *
+     * This number is set to a nonzero value by Assember::CellsAndFaces
+     *
+     */
+    unsigned int n_values () const;
+
+    /**
+     * The number of vectors.
+     *
+     * This number is set to a nonzero value by Assember::ResidualSimple and
+     * Assember::ResidualLocalBlocksToGlobalBlocks.
+     */
+    unsigned int n_vectors () const;
+
+    /**
+     * The number of matrices.
+     */
+    unsigned int n_matrices () const;
+
+    /**
+     * The number of quadrature points in quadrature_values().
+     */
+    unsigned int n_quadrature_points() const;
+
+    /**
+     * The number of values in each quadrature point in quadrature_values().
+     */
+    unsigned int n_quadrature_values() const;
+
+    /**
+     * Access scalar value at index @p i.
+     */
+    number &value(unsigned int i);
+
+    /**
+     * Read scalar value at index @p i.
+     */
+    number value(unsigned int i) const;
+
+    /**
+     * Access vector at index @p i.
+     */
+    BlockVector<number> &vector(unsigned int i);
+
+    /**
+     * Read vector at index @p i.
+     */
+    const BlockVector<number> &vector(unsigned int i) const;
+
+    /**
+     * Access matrix at index @p i. For results on internal faces, a true
+     * value for @p external refers to the flux between cells, while false
+     * refers to entries coupling inside the cell.
+     */
+    MatrixBlock<FullMatrix<number> > &matrix(unsigned int i, bool external = false);
+
+    /**
+     * Read matrix at index @p i. For results on internal faces, a true value
+     * for @p external refers to the flux between cells, while false refers to
+     * entries coupling inside the cell.
+     */
+    const MatrixBlock<FullMatrix<number> > &matrix(unsigned int i, bool external = false) const;
+
+    /**
+     * Access to the vector #quadrature_data of data in quadrature points,
+     * organized such that there is a vector for each point, containing one
+     * entry for each component.
+     */
+    Table<2, number> &quadrature_values();
+
+    /**
+     * Access the <i>i</i>th value at quadrature point <i>k</i>
+     */
+    number &quadrature_value(unsigned int k, unsigned int i);
+
+    /**
+     * Read the <i>i</i>th value at quadrature point <i>k</i>
+     */
+    number quadrature_value(unsigned int k, unsigned int i) const;
+
+    /**
+     * Initialize the vector with scalar values.
+     *
+     * @note This function is usually only called by the assembler.
+     */
+    void initialize_numbers(const unsigned int n);
+
+    /**
+     * Initialize the vector with vector values.
+     *
+     * @note This function is usually only called by the assembler.
+     */
+    void initialize_vectors(const unsigned int n);
+
+    /**
+     * Allocate @p n local matrices. Additionally, set their block row and
+     * column coordinates to zero. The matrices themselves are resized by
+     * reinit().
+     *
+     * @note This function is usually only called by the assembler.
+     */
+    void initialize_matrices(unsigned int n, bool both);
+
+    /**
+     * Allocate a local matrix for each of the global ones in @p matrices.
+     * Additionally, set their block row and column coordinates. The matrices
+     * themselves are resized by reinit().
+     *
+     * @note This function is usually only called by the assembler.
+     */
+    template <typename MatrixType>
+    void initialize_matrices(const MatrixBlockVector<MatrixType> &matrices,
+                             bool both);
+
+    /**
+     * Allocate a local matrix for each of the global level objects in @p
+     * matrices. Additionally, set their block row and column coordinates. The
+     * matrices themselves are resized by reinit().
+     *
+     * @note This function is usually only called by the assembler.
+     */
+    template <typename MatrixType>
+    void initialize_matrices(const MGMatrixBlockVector<MatrixType> &matrices,
+                             bool both);
+
+    /**
+     * Initialize quadrature values to <tt>nv</tt> values in <tt>np</tt>
+     * quadrature points.
+     */
+    void initialize_quadrature(unsigned int np, unsigned int nv);
+
+    /**
+     * Reinitialize matrices for new cell. Does not resize any of the data
+     * vectors stored in this object, but resizes the vectors in #R and the
+     * matrices in #M1 and #M2 for hp and sets them to zero.
+     */
+    void reinit(const BlockIndices &local_sizes);
+
+    template <class StreamType>
+    void print_debug(StreamType &os) const;
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+
+  private:
+    /**
+     * Initialize a single local matrix block. A helper function for
+     * initialize()
+     */
+    void initialize_local(MatrixBlock<FullMatrix<number> > &M,
+                          const unsigned int row,
+                          const unsigned int col);
+
+    /**
+     * The local numbers, computed on a cell or on a face.
+     */
+    std::vector<number> J;
+
+    /**
+     * The local vectors. This field is public, so that local integrators can
+     * write to it.
+     */
+    std::vector<BlockVector<number> > R;
+
+    /**
+     * The local matrices coupling degrees of freedom in the cell itself or
+     * within the first cell on a face.
+     */
+    std::vector<MatrixBlock<FullMatrix<number> > > M1;
+
+    /**
+     * The local matrices coupling test functions on the cell with trial
+     * functions on the other cell.
+     *
+     * Only used on interior faces.
+     */
+    std::vector<MatrixBlock<FullMatrix<number> > > M2;
+
+    /**
+     * Values in quadrature points for writing into patch data.
+     */
+    Table<2, number> quadrature_data;
+  };
+
+//----------------------------------------------------------------------//
+
+  template <typename number>
+  inline void
+  LocalResults<number>::initialize_numbers(unsigned int n)
+  {
+    J.resize(n);
+  }
+
+
+  template <typename number>
+  inline void
+  LocalResults<number>::initialize_vectors(const unsigned int n)
+  {
+    R.resize(n);
+  }
+
+
+  template <typename number>
+  template <typename MatrixType>
+  inline void
+  LocalResults<number>::initialize_matrices
+  (const MatrixBlockVector<MatrixType> &matrices,
+   bool                                 both)
+  {
+    M1.resize(matrices.size());
+    if (both)
+      M2.resize(matrices.size());
+    for (unsigned int i=0; i<matrices.size(); ++i)
+      {
+        const unsigned int row = matrices.block(i).row;
+        const unsigned int col = matrices.block(i).column;
+
+        M1[i].row = row;
+        M1[i].column = col;
+        if (both)
+          {
+            M2[i].row = row;
+            M2[i].column = col;
+          }
+      }
+  }
+
+
+  template <typename number>
+  template <typename MatrixType>
+  inline void
+  LocalResults<number>::initialize_matrices
+  (const MGMatrixBlockVector<MatrixType> &matrices,
+   bool                                   both)
+  {
+    M1.resize(matrices.size());
+    if (both)
+      M2.resize(matrices.size());
+    for (unsigned int i=0; i<matrices.size(); ++i)
+      {
+        const MGLevelObject<MatrixBlock<MatrixType> > &o = matrices.block(i);
+        const unsigned int row = o[o.min_level()].row;
+        const unsigned int col = o[o.min_level()].column;
+
+        M1[i].row = row;
+        M1[i].column = col;
+        if (both)
+          {
+            M2[i].row = row;
+            M2[i].column = col;
+          }
+      }
+  }
+
+
+  template <typename number>
+  inline void
+  LocalResults<number>::initialize_matrices(const unsigned int n,
+                                            const bool both)
+  {
+    M1.resize(n);
+    if (both)
+      M2.resize(n);
+    for (unsigned int i=0; i<n; ++i)
+      {
+        M1[i].row = 0;
+        M1[i].column = 0;
+        if (both)
+          {
+            M2[i].row = 0;
+            M2[i].column = 0;
+          }
+      }
+  }
+
+
+  template <typename number>
+  inline void
+  LocalResults<number>::initialize_quadrature(unsigned int np, unsigned int nv)
+  {
+    quadrature_data.reinit(np, nv);
+  }
+
+
+  template <typename number>
+  inline
+  unsigned int
+  LocalResults<number>::n_values() const
+  {
+    return J.size();
+  }
+
+
+  template <typename number>
+  inline
+  unsigned int
+  LocalResults<number>::n_vectors() const
+  {
+    return R.size();
+  }
+
+
+  template <typename number>
+  inline
+  unsigned int
+  LocalResults<number>::n_matrices() const
+  {
+    return M1.size();
+  }
+
+
+  template <typename number>
+  inline
+  unsigned int
+  LocalResults<number>::n_quadrature_points() const
+  {
+    return quadrature_data.n_rows();
+  }
+
+
+  template <typename number>
+  inline
+  unsigned int
+  LocalResults<number>::n_quadrature_values() const
+  {
+    return quadrature_data.n_cols();
+  }
+
+
+  template <typename number>
+  inline
+  number &
+  LocalResults<number>::value(unsigned int i)
+  {
+    AssertIndexRange(i,J.size());
+    return J[i];
+  }
+
+
+  template <typename number>
+  inline
+  BlockVector<number> &
+  LocalResults<number>::vector(unsigned int i)
+  {
+    AssertIndexRange(i,R.size());
+    return R[i];
+  }
+
+
+  template <typename number>
+  inline
+  MatrixBlock<FullMatrix<number> > &
+  LocalResults<number>::matrix(unsigned int i, bool external)
+  {
+    if (external)
+      {
+        AssertIndexRange(i,M2.size());
+        return M2[i];
+      }
+    AssertIndexRange(i,M1.size());
+    return M1[i];
+  }
+
+
+  template <typename number>
+  inline
+  number &
+  LocalResults<number>::quadrature_value(unsigned int k, unsigned int i)
+  {
+    return quadrature_data(k,i);
+  }
+
+
+  template <typename number>
+  inline
+  Table<2, number> &
+  LocalResults<number>::quadrature_values()
+  {
+    return quadrature_data;
+  }
+
+
+  template <typename number>
+  inline
+  number
+  LocalResults<number>::value(unsigned int i) const
+  {
+    AssertIndexRange(i,J.size());
+    return J[i];
+  }
+
+
+  template <typename number>
+  inline
+  const BlockVector<number> &
+  LocalResults<number>::vector(unsigned int i) const
+  {
+    AssertIndexRange(i,R.size());
+    return R[i];
+  }
+
+
+  template <typename number>
+  inline
+  const MatrixBlock<FullMatrix<number> > &
+  LocalResults<number>::matrix(unsigned int i, bool external) const
+  {
+    if (external)
+      {
+        AssertIndexRange(i,M2.size());
+        return M2[i];
+      }
+    AssertIndexRange(i,M1.size());
+    return M1[i];
+  }
+
+
+  template <typename number>
+  inline
+  number
+  LocalResults<number>::quadrature_value(unsigned int k, unsigned int i) const
+  {
+    return quadrature_data(k,i);
+  }
+
+
+  template <typename number>
+  template <class StreamType>
+  void
+  LocalResults<number>::print_debug(StreamType &os) const
+  {
+    os << "J: " << J.size() << std::endl;
+    os << "R: " << R.size() << std::endl;
+    for (unsigned int i=0; i<R.size(); ++i)
+      {
+        os << "  " << R[i].n_blocks() << " -";
+        for (unsigned int j=0; j<R[i].n_blocks(); ++j)
+          os << ' ' << R[i].block(j).size();
+        os << std::endl;
+      }
+    os << "M: " << M1.size() << " face " << M2.size() << std::endl;
+    for (unsigned int i=0; i<M1.size(); ++i)
+      {
+        os << "  " << M1[i].row << "," << M1[i].column
+           << " " << M1[i].matrix.m() << 'x' << M1[i].matrix.n();
+        if (i < M2.size())
+          os << " face " << M2[i].row << "," << M2[i].column
+             << " " << M2[i].matrix.m() << 'x' << M2[i].matrix.n();
+        os << std::endl;
+      }
+  }
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/loop.h b/include/deal.II/meshworker/loop.h
new file mode 100644
index 0000000..f05cc9f
--- /dev/null
+++ b/include/deal.II/meshworker/loop.h
@@ -0,0 +1,455 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_loop_h
+#define dealii__mesh_worker_loop_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/std_cxx11/function.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/meshworker/local_integrator.h>
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/integration_info.h>
+
+
+#define DEAL_II_MESHWORKER_PARALLEL 1
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename> class TriaActiveIterator;
+template <typename> class FilteredIterator;
+
+namespace internal
+{
+  /**
+   * Find out if an iterator supports inactive cells.
+   */
+  template <class DI>
+  inline bool is_active_iterator(const DI &)
+  {
+    return false;
+  }
+
+  template <class ACCESSOR>
+  inline bool is_active_iterator(const TriaActiveIterator<ACCESSOR> &)
+  {
+    return true;
+  }
+
+  template <class ACCESSOR>
+  inline bool is_active_iterator(const FilteredIterator<TriaActiveIterator<ACCESSOR> > &)
+  {
+    return true;
+  }
+
+  template<int dim, class DOFINFO, class A>
+  void assemble(const MeshWorker::DoFInfoBox<dim, DOFINFO> &dinfo, A *assembler)
+  {
+    dinfo.assemble(*assembler);
+  }
+}
+
+
+
+namespace MeshWorker
+{
+  /**
+   * Collection of parameters for execution of MeshWorker loops.
+   */
+  class LoopControl
+  {
+  public:
+
+    /**
+     * Constructor.
+     */
+    LoopControl()
+      : own_cells(true), ghost_cells(false),
+        faces_to_ghost(LoopControl::one), own_faces(LoopControl::one),
+        cells_first(true)
+    {
+    }
+
+    /**
+     * Loop over cells owned by this process. Defaults to <code>true</code>.
+     */
+    bool own_cells;
+    /**
+     * Loop over cells not owned by this process. Defaults to
+     * <code>false</code>.
+     */
+    bool ghost_cells;
+
+    enum FaceOption
+    {
+      never,
+      one,
+      both
+    };
+
+    /**
+     * Loop over faces between a locally owned cell and a ghost cell: - never:
+     * do not assembly these faces - one: only one of the processes will
+     * assemble these faces ( from the finer side or the process with the
+     * lower mpi rank) - both: both processes will assemble these faces Note
+     * that these faces are never assembled from both sides on a single
+     * process. Default is one.
+     */
+    FaceOption faces_to_ghost;
+
+    /**
+     * Loop over faces between two locally owned cells: - never: do not
+     * assemble face terms - one: assemble once (always coming from the finer
+     * side) - both: assemble each face twice (not implemented for hanging
+     * nodes!) Default is one.
+     */
+    FaceOption own_faces;
+
+
+    /**
+     * Flag to determine if cells integrals should be done before or after
+     * face integrals. Default is t
+     */
+    bool cells_first;
+  };
+
+
+
+  /**
+   * The function called by loop() to perform the required actions on a cell
+   * and its faces. The three functions <tt>cell_worker</tt>,
+   * <tt>boundary_worker</tt> and <tt>face_worker</tt> are the same ones
+   * handed to loop(). While there we only run the loop over all cells, here,
+   * we do a single cell and, if necessary, its faces, interior and boundary.
+   *
+   * Upon return, the DoFInfo objects in the DoFInfoBox are filled with the
+   * data computed on the cell and each of the faces. Thus, after the
+   * execution of this function, we are ready to call DoFInfoBox::assemble()
+   * to distribute the local data into global data.
+   *
+   * @param cell is the cell we work on
+   * @param dof_info is the object into which local results are entered. It is
+   * expected to have been set up for the right types of data.
+   * @param info is the object containing additional data only needed for
+   * internal processing.
+   * @param cell_worker defines the local action on each cell.
+   * @param boundary_worker defines the local action on boundary faces
+   * @param face_worker defines the local action on interior faces.
+   * @param loop_control control structure to specify what actions should be
+   * performed.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat
+   * @date 2010
+   */
+  template<class INFOBOX, class DOFINFO, int dim, int spacedim, class ITERATOR>
+  void cell_action(
+    ITERATOR cell,
+    DoFInfoBox<dim, DOFINFO> &dof_info,
+    INFOBOX &info,
+    const std_cxx11::function<void (DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker,
+    const std_cxx11::function<void (DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker,
+    const std_cxx11::function<void (DOFINFO &, DOFINFO &,
+                                    typename INFOBOX::CellInfo &,
+                                    typename INFOBOX::CellInfo &)> &face_worker,
+    const LoopControl &loop_control)
+  {
+    const bool ignore_subdomain = (cell->get_triangulation().locally_owned_subdomain()
+                                   == numbers::invalid_subdomain_id);
+
+    types::subdomain_id csid = (cell->is_level_cell())
+                               ? cell->level_subdomain_id()
+                               : cell->subdomain_id();
+
+    const bool own_cell = ignore_subdomain || (csid == cell->get_triangulation().locally_owned_subdomain());
+
+    dof_info.reset();
+
+    if ((!ignore_subdomain) && (csid == numbers::artificial_subdomain_id))
+      return;
+
+    dof_info.cell.reinit(cell);
+    dof_info.cell_valid = true;
+
+    const bool integrate_cell          = (cell_worker != 0);
+    const bool integrate_boundary      = (boundary_worker != 0);
+    const bool integrate_interior_face = (face_worker != 0);
+
+    if (integrate_cell)
+      info.cell.reinit(dof_info.cell);
+    // Execute this, if cells
+    // have to be dealt with
+    // before faces
+    if (integrate_cell && loop_control.cells_first &&
+        ((loop_control.own_cells && own_cell) || (loop_control.ghost_cells && !own_cell)))
+      cell_worker(dof_info.cell, info.cell);
+
+    // Call the callback function in
+    // the info box to do
+    // computations between cell and
+    // face action.
+    info.post_cell(dof_info);
+
+    if (integrate_interior_face || integrate_boundary)
+      for (unsigned int face_no=0; face_no < GeometryInfo<ITERATOR::AccessorType::Container::dimension>::faces_per_cell; ++face_no)
+        {
+          typename ITERATOR::AccessorType::Container::face_iterator face = cell->face(face_no);
+          if (cell->at_boundary(face_no))
+            {
+              // only integrate boundary faces of own cells
+              if (integrate_boundary && own_cell)
+                {
+                  dof_info.interior_face_available[face_no] = true;
+                  dof_info.interior[face_no].reinit(cell, face, face_no);
+                  info.boundary.reinit(dof_info.interior[face_no]);
+                  boundary_worker(dof_info.interior[face_no], info.boundary);
+                }
+            }
+          else if (integrate_interior_face)
+            {
+              // Interior face
+              TriaIterator<typename ITERATOR::AccessorType> neighbor = cell->neighbor(face_no);
+
+              types::subdomain_id neighbid = numbers::artificial_subdomain_id;
+              if (neighbor->is_level_cell())
+                neighbid = neighbor->level_subdomain_id();
+              //subdomain id is only valid for active cells
+              else if (neighbor->active())
+                neighbid = neighbor->subdomain_id();
+
+              const bool own_neighbor = ignore_subdomain ||
+                                        (neighbid == cell->get_triangulation().locally_owned_subdomain());
+
+              // skip all faces between two ghost cells
+              if (!own_cell && !own_neighbor)
+                continue;
+
+              // skip if the user doesn't want faces between own cells
+              if (own_cell && own_neighbor && loop_control.own_faces==LoopControl::never)
+                continue;
+
+              // skip face to ghost
+              if (own_cell != own_neighbor && loop_control.faces_to_ghost==LoopControl::never)
+                continue;
+
+              // Deal with
+              // refinement edges
+              // from the refined
+              // side. Assuming
+              // one-irregular
+              // meshes, this
+              // situation should
+              // only occur if
+              // both cells are
+              // active.
+              if (cell->neighbor_is_coarser(face_no))
+                {
+                  Assert(!cell->has_children(), ExcInternalError());
+                  Assert(!neighbor->has_children(), ExcInternalError());
+
+                  // skip if only one processor needs to assemble the face
+                  // to a ghost cell and the fine cell is not ours.
+                  if (!own_cell
+                      && loop_control.faces_to_ghost == LoopControl::one)
+                    continue;
+
+                  const std::pair<unsigned int, unsigned int> neighbor_face_no
+                    = cell->neighbor_of_coarser_neighbor(face_no);
+                  const typename ITERATOR::AccessorType::Container::face_iterator nface
+                    = neighbor->face(neighbor_face_no.first);
+
+                  dof_info.interior_face_available[face_no] = true;
+                  dof_info.exterior_face_available[face_no] = true;
+                  dof_info.interior[face_no].reinit(cell, face, face_no);
+                  info.face.reinit(dof_info.interior[face_no]);
+                  dof_info.exterior[face_no].reinit(
+                    neighbor, nface, neighbor_face_no.first, neighbor_face_no.second);
+                  info.subface.reinit(dof_info.exterior[face_no]);
+
+                  face_worker(dof_info.interior[face_no], dof_info.exterior[face_no],
+                              info.face, info.subface);
+                }
+              else
+                {
+                  // If iterator is active and neighbor is refined, skip
+                  // internal face.
+                  if (internal::is_active_iterator(cell) && neighbor->has_children())
+                    {
+                      Assert(loop_control.own_faces != LoopControl::both, ExcMessage(
+                               "Assembling from both sides for own_faces is not "
+                               "supported with hanging nodes!"));
+                      continue;
+                    }
+
+                  // Now neighbor is on same level, double-check this:
+                  Assert(cell->level()==neighbor->level(), ExcInternalError());
+
+                  // If we own both cells only do faces from one side (unless
+                  // LoopControl says otherwise). Here, we rely on cell comparison
+                  // that will look at cell->index().
+                  if (own_cell && own_neighbor
+                      && loop_control.own_faces == LoopControl::one
+                      && (neighbor < cell))
+                    continue;
+
+                  // independent of loop_control.faces_to_ghost,
+                  // we only look at faces to ghost on the same level once
+                  // (only where own_cell=true and own_neighbor=false)
+                  if (!own_cell)
+                    continue;
+
+                  // now only one processor assembles faces_to_ghost. We let the
+                  // processor with the smaller (level-)subdomain id assemble the
+                  // face.
+                  if (own_cell && !own_neighbor
+                      && loop_control.faces_to_ghost == LoopControl::one
+                      && (neighbid < csid))
+                    continue;
+
+                  const unsigned int neighbor_face_no = cell->neighbor_face_no(face_no);
+                  Assert (neighbor->face(neighbor_face_no) == face, ExcInternalError());
+                  // Regular interior face
+                  dof_info.interior_face_available[face_no] = true;
+                  dof_info.exterior_face_available[face_no] = true;
+                  dof_info.interior[face_no].reinit(cell, face, face_no);
+                  info.face.reinit(dof_info.interior[face_no]);
+                  dof_info.exterior[face_no].reinit(
+                    neighbor, neighbor->face(neighbor_face_no), neighbor_face_no);
+                  info.neighbor.reinit(dof_info.exterior[face_no]);
+
+                  face_worker(dof_info.interior[face_no], dof_info.exterior[face_no],
+                              info.face, info.neighbor);
+                }
+            }
+        } // faces
+    // Call the callback function in
+    // the info box to do
+    // computations between face and
+    // cell action.
+    info.post_faces(dof_info);
+
+    // Execute this, if faces
+    // have to be handled first
+    if (integrate_cell && !loop_control.cells_first &&
+        ((loop_control.own_cells && own_cell) || (loop_control.ghost_cells && !own_cell)))
+      cell_worker(dof_info.cell, info.cell);
+  }
+
+
+  /**
+   * The main work function of this namespace. It is a loop over all cells in
+   * an iterator range, in which cell_action() is called for each cell.
+   * Unilaterally refined interior faces are handled automatically by the
+   * loop. Most of the work in this loop is done in cell_action(), which also
+   * receives most of the parameters of this function. See the documentation
+   * there for more details.
+   *
+   * If you don't want anything to be done on cells, interior or boundary
+   * faces to happen, simply pass the Null pointer to one of the function
+   * arguments.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template<int dim, int spacedim, class DOFINFO, class INFOBOX, class ASSEMBLER, class ITERATOR>
+  void loop(ITERATOR begin,
+            typename identity<ITERATOR>::type end,
+            DOFINFO &dinfo,
+            INFOBOX &info,
+            const std_cxx11::function<void (DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker,
+            const std_cxx11::function<void (DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker,
+            const std_cxx11::function<void (DOFINFO &, DOFINFO &,
+                                            typename INFOBOX::CellInfo &,
+                                            typename INFOBOX::CellInfo &)> &face_worker,
+            ASSEMBLER &assembler,
+            const LoopControl &lctrl = LoopControl())
+  {
+    DoFInfoBox<dim, DOFINFO> dof_info(dinfo);
+
+    assembler.initialize_info(dof_info.cell, false);
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      {
+        assembler.initialize_info(dof_info.interior[i], true);
+        assembler.initialize_info(dof_info.exterior[i], true);
+      }
+
+    // Loop over all cells
+#ifdef DEAL_II_MESHWORKER_PARALLEL
+    WorkStream::run(begin, end,
+                    std_cxx11::bind(&cell_action<INFOBOX, DOFINFO, dim, spacedim, ITERATOR>,
+                                    std_cxx11::_1, std_cxx11::_3, std_cxx11::_2,
+                                    cell_worker, boundary_worker, face_worker, lctrl),
+                    std_cxx11::bind(&internal::assemble<dim,DOFINFO,ASSEMBLER>, std_cxx11::_1, &assembler),
+                    info, dof_info);
+#else
+    for (ITERATOR cell = begin; cell != end; ++cell)
+      {
+        cell_action<INFOBOX,DOFINFO,dim,spacedim>(cell, dof_info,
+                                                  info, cell_worker,
+                                                  boundary_worker, face_worker,
+                                                  lctrl);
+        dof_info.assemble(assembler);
+      }
+#endif
+  }
+
+
+  /**
+   * Simplified interface for loop() if specialized for integration, using the
+   * virtual functions in LocalIntegrator.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template<int dim, int spacedim, class ITERATOR, class ASSEMBLER>
+  void integration_loop(ITERATOR begin,
+                        typename identity<ITERATOR>::type end,
+                        DoFInfo<dim, spacedim> &dof_info,
+                        IntegrationInfoBox<dim, spacedim> &box,
+                        const LocalIntegrator<dim, spacedim> &integrator,
+                        ASSEMBLER &assembler,
+                        const LoopControl &lctrl = LoopControl())
+  {
+    std_cxx11::function<void (DoFInfo<dim>&, IntegrationInfo<dim, spacedim>&)> cell_worker;
+    std_cxx11::function<void (DoFInfo<dim>&, IntegrationInfo<dim, spacedim>&)> boundary_worker;
+    std_cxx11::function<void (DoFInfo<dim> &, DoFInfo<dim> &,
+                              IntegrationInfo<dim, spacedim> &,
+                              IntegrationInfo<dim, spacedim> &)> face_worker;
+    if (integrator.use_cell)
+      cell_worker = std_cxx11::bind(&LocalIntegrator<dim, spacedim>::cell, &integrator, std_cxx11::_1, std_cxx11::_2);
+    if (integrator.use_boundary)
+      boundary_worker = std_cxx11::bind(&LocalIntegrator<dim, spacedim>::boundary, &integrator, std_cxx11::_1, std_cxx11::_2);
+    if (integrator.use_face)
+      face_worker = std_cxx11::bind(&LocalIntegrator<dim, spacedim>::face, &integrator, std_cxx11::_1, std_cxx11::_2, std_cxx11::_3, std_cxx11::_4);
+
+    loop<dim, spacedim>
+    (begin, end,
+     dof_info,
+     box,
+     cell_worker,
+     boundary_worker,
+     face_worker,
+     assembler,
+     lctrl);
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/output.h b/include/deal.II/meshworker/output.h
new file mode 100644
index 0000000..fd947fd
--- /dev/null
+++ b/include/deal.II/meshworker/output.h
@@ -0,0 +1,240 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_output_h
+#define dealii__mesh_worker_output_h
+
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/base/mg_level_object.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  namespace Assembler
+  {
+
+    /**
+     * A class that, instead of assembling into a matrix or vector, outputs
+     * the results on a cell to a gnuplot patch.
+     *
+     * This assembler expects that LocalResults contains quadrature values set
+     * with LocalResults::quadrature_value(). When it is initialized with the
+     * number of quadrature points in a single (!) space direction and the
+     * number of data fields to be displayed, it initializes LocalResults
+     * automatically. The number of data fields in local results will be
+     * increased by dim in order to accommodate for the coordinates of the
+     * data points.
+     *
+     * While data slots for the space coordinates are allocated automatically,
+     * these coordinates are not entered. It is up to the user to enter the
+     * coordinates in the first dim data entries at every point. This adds the
+     * flexibility to output transformed coordinates or even something
+     * completely different.
+     *
+     * @note In the current implementation, only cell data can be written.
+     *
+     * @author Guido Kanschat
+     * @date 2011, 2012
+     */
+    class GnuplotPatch
+    {
+    public:
+      /**
+       * Constructor.
+       */
+      GnuplotPatch();
+
+      /**
+       * Initialize for writing <i>n</i> data vectors. The number of points is
+       * the number of quadrature points in a single direction in a tensor
+       * product formula. It must match the number in the actual Quadrature
+       * used to create the patches. The total number of data vectors produced
+       * is <tt>n+dim</tt> and the first dim should be the space coordinates
+       * of the points. Nevertheless, it is up to the user to set these values
+       * to whatever is desired.
+       */
+      void initialize (const unsigned int n_points,
+                       const unsigned int n_vectors);
+
+      /**
+       * Set the stream #os to which data is written. If no stream is selected
+       * with this function, data goes to @p deallog.
+       */
+      void initialize_stream (std::ostream &stream);
+
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <int dim>
+      void initialize_info(DoFInfo<dim> &info, bool face);
+
+      /**
+       * Write the patch to the output stream.
+       */
+      template<int dim>
+      void assemble(const DoFInfo<dim> &info);
+
+      /**
+       * @warning Not implemented yet
+       */
+      template<int dim>
+      void assemble(const DoFInfo<dim> &info1,
+                    const DoFInfo<dim> &info2);
+
+    private:
+      /**
+       * Write the object T either to the stream #os, if initialize_stream()
+       * has been called, or to @p deallog if no pointer has been set.
+       */
+      template<typename T>
+      void write(const T &t) const;
+
+      /**
+       * Write an end-of-line marker either to the stream #os, if
+       * initialize_stream has been called, or to @p deallog if no pointer has
+       * been set.
+       */
+      void write_endl () const;
+
+      /**
+       * The number of output components in each point.
+       */
+      unsigned int n_vectors;
+      /**
+       * The number of points in one direction.
+       */
+      unsigned int n_points;
+
+      /**
+       * Stream to which output is to be written. Set by initialize_stream().
+       */
+      std::ostream *os;
+    };
+
+//----------------------------------------------------------------------//
+
+    template <typename T>
+    inline void
+    GnuplotPatch::write(const T &d) const
+    {
+      if (os == 0)
+        deallog << d;
+      else
+        (*os) << d;
+    }
+
+
+    inline void
+    GnuplotPatch::write_endl() const
+    {
+      if (os == 0)
+        deallog << std::endl;
+      else
+        (*os) << std::endl;
+    }
+
+
+    inline
+    GnuplotPatch::GnuplotPatch()
+      :
+      os(0)
+    {}
+
+
+    inline void
+    GnuplotPatch::initialize (const unsigned int np,
+                              const unsigned int nv)
+    {
+      n_vectors = nv;
+      n_points = np;
+    }
+
+
+    inline void
+    GnuplotPatch::initialize_stream (std::ostream &stream)
+    {
+      os = &stream;
+    }
+
+
+    template <int dim>
+    inline void
+    GnuplotPatch::initialize_info(DoFInfo<dim> &info, bool face)
+    {
+      if (face)
+        info.initialize_quadrature(Utilities::fixed_power<dim-1>(n_points), n_vectors+dim);
+      else
+        info.initialize_quadrature(Utilities::fixed_power<dim>(n_points), n_vectors+dim);
+    }
+
+
+    template <int dim>
+    inline void
+    GnuplotPatch::assemble(const DoFInfo<dim> &info)
+    {
+      const unsigned int np = info.n_quadrature_points();
+      const unsigned int nv = info.n_quadrature_values();
+      const unsigned int patch_dim = (info.face_number == numbers::invalid_unsigned_int)
+                                     ? dim : (dim-1);
+      const unsigned int row_length = n_points;
+      // If patches are 1D, end the
+      // patch after a row, else end
+      // it after a square
+      const unsigned int row_length2 = (patch_dim==1) ? row_length : (row_length*row_length);
+
+//      AssertDimension(np, Utilities::fixed_power<dim>(n_points));
+      AssertDimension(nv, n_vectors+dim);
+
+
+      for (unsigned int k=0; k<np; ++k)
+        {
+          if (k % row_length == 0)
+            write_endl();
+          if (k % row_length2 == 0)
+            write_endl();
+
+          for (unsigned int i=0; i<nv; ++i)
+            {
+              write(info.quadrature_value(k,i));
+              write('\t');
+            }
+          write_endl();
+        }
+    }
+
+
+    template <int dim>
+    inline void
+    GnuplotPatch::assemble(const DoFInfo<dim> &info1, const DoFInfo<dim> &info2)
+    {
+      assemble(info1);
+      assemble(info2);
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/simple.h b/include/deal.II/meshworker/simple.h
new file mode 100644
index 0000000..bf8993b
--- /dev/null
+++ b/include/deal.II/meshworker/simple.h
@@ -0,0 +1,1163 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mesh_worker_simple_h
+#define dealii__mesh_worker_simple_h
+
+#include <deal.II/algorithms/any_data.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/meshworker/dof_info.h>
+#include <deal.II/meshworker/functional.h>
+#include <deal.II/multigrid/mg_constrained_dofs.h>
+
+/*
+ * The header containing the classes MeshWorker::Assember::MatrixSimple,
+ * MeshWorker::Assember::MGMatrixSimple, MeshWorker::Assember::ResidualSimple,
+ * and MeshWorker::Assember::SystemSimple.
+ */
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  namespace Assembler
+  {
+    /**
+     * Assemble residuals without block structure.
+     *
+     * The data structure for this Assembler class is a simple vector on each
+     * cell with entries from zero to FiniteElementData::dofs_per_cell and a
+     * simple global vector with entries numbered from zero to
+     * DoFHandler::n_dofs(). No BlockInfo is required and the global vector
+     * may be any type of vector having element access through <tt>operator()
+     * (unsigned int)</tt>
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename VectorType>
+    class ResidualSimple
+    {
+    public:
+      /**
+       * Initialize with an AnyData object holding the result of assembling.
+       *
+       * Assembling currently writes into the first vector of
+       * <tt>results</tt>.
+       */
+      void initialize(AnyData &results);
+
+      /**
+       * Initialize the constraints.
+       */
+      void initialize(const ConstraintMatrix &constraints);
+
+      /**
+       * @deprecated This function is of no effect. Only the block info
+       * structure in DoFInfo is being used.
+       *
+       * Store information on the local block structure. If the assembler is
+       * inititialized with this function, initialize_info() will generate one
+       * local matrix for each block row and column, which will be numbered
+       * lexicographically, row by row.
+       *
+       * In spite of using local block structure, all blocks will be enteres
+       * into the same global matrix, disregarding any global block structure.
+       */
+
+      void initialize_local_blocks(const BlockIndices &);
+
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+      /**
+       * Assemble the local residuals into the global residuals.
+       *
+       * Values are added to the previous contents. If constraints are active,
+       * ConstraintMatrix::distribute_local_to_global() is used.
+       */
+      template <class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble both local residuals into the global residuals.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+    private:
+      /**
+       * The global residal vectors filled by assemble().
+       */
+      AnyData residuals;
+      /**
+       * A pointer to the object containing constraints.
+       */
+      SmartPointer<const ConstraintMatrix,ResidualSimple<VectorType> > constraints;
+    };
+
+
+    /**
+     * Assemble local matrices into a single global matrix or several global
+     * matrices associated with the same DoFHandler. If these global matrix
+     * have a block structure, this structure is not used, but rather the
+     * global numbering of degrees of freedom.
+     *
+     * After being initialized with a SparseMatrix object (or another matrix
+     * offering the same functionality as SparseMatrix::add()) or a vector of
+     * such, this class can be used in a MeshWorker::loop() to assemble the
+     * cell and face matrices into the global matrix.
+     *
+     * If a ConstraintMatrix has been provided during initialization, this
+     * matrix will be used (ConstraintMatrix::distribute_local_to_global(), to
+     * be precise) to enter the local matrix into the global sparse matrix.
+     *
+     * The assembler can handle two different types of local data. First, by
+     * default, the obvious choice of taking a single local matrix with
+     * dimensions equal to the number of degrees of freedom of the cell.
+     * Alternatively, a local block structure can be initialized in DoFInfo.
+     * After this, the local data will be arranged as an array of n by n
+     * FullMatrix blocks (n being the number of blocks in the FESystem used by
+     * the DoFHandler in DoFInfo), which are ordered lexicographically with
+     * column index fastest in DoFInfo. If the matrix was initialized with a
+     * vector of several matrices and local block structure is used, then the
+     * first n<sup>2</sup> matrices in LocalResults will be used for the first
+     * matrix in this vector, the second set of n<sup>2</sup> for the second,
+     * and so on.
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename MatrixType>
+    class MatrixSimple
+    {
+    public:
+      /**
+       * Constructor, initializing the #threshold, which limits how small
+       * numbers may be to be entered into the matrix.
+       */
+      MatrixSimple(double threshold = 1.e-12);
+
+      /**
+       * Store the result matrix for later assembling.
+       */
+      void initialize(MatrixType &m);
+
+      /**
+       * Store several result matrices for later assembling.
+       */
+      void initialize(std::vector<MatrixType> &m);
+
+      /**
+       * Initialize the constraints. After this function has been called with
+       * a valid ConstraintMatrix, the function
+       * ConstraintMatrix::distribute_local_to_global() will be used by
+       * assemble() to distribute the cell and face matrices into a global
+       * sparse matrix.
+       */
+      void initialize(const ConstraintMatrix &constraints);
+
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+      /**
+       * Assemble the local matrices associated with a single cell into the
+       * global matrix.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble all local matrices associated with an interior face in the
+       * info objects into the global matrix.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+    private:
+      /**
+       * Assemble a single matrix <code>M</code> into the element at
+       * <code>index</code> in the vector #matrix.
+       */
+      void assemble(const FullMatrix<double> &M,
+                    const unsigned int index,
+                    const std::vector<types::global_dof_index> &i1,
+                    const std::vector<types::global_dof_index> &i2);
+
+      /**
+       * The vector of global matrices being assembled.
+       */
+      std::vector<SmartPointer<MatrixType,MatrixSimple<MatrixType> > > matrix;
+      /**
+       * A pointer to the object containing constraints.
+       */
+      SmartPointer<const ConstraintMatrix,MatrixSimple<MatrixType> > constraints;
+
+      /**
+       * The smallest positive number that will be entered into the global
+       * matrix. All smaller absolute values will be treated as zero and will
+       * not be assembled.
+       */
+      const double threshold;
+
+    };
+
+
+    /**
+     * Assemble local matrices into level matrices without using block
+     * structure.
+     *
+     * @todo The matrix structures needed for assembling level matrices with
+     * local refinement and continuous elements are missing.
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename MatrixType>
+    class MGMatrixSimple
+    {
+    public:
+      /**
+       * Constructor, initializing the #threshold, which limits how small
+       * numbers may be to be entered into the matrix.
+       */
+      MGMatrixSimple(double threshold = 1.e-12);
+
+      /**
+       * Store the result matrix for later assembling.
+       */
+      void initialize(MGLevelObject<MatrixType> &m);
+
+      /**
+       * Initialize the multilevel constraints.
+       */
+      void initialize(const MGConstrainedDoFs &mg_constrained_dofs);
+
+      /**
+       * Initialize the matrices #flux_up and #flux_down used for local
+       * refinement with discontinuous Galerkin methods.
+       */
+      void initialize_fluxes(MGLevelObject<MatrixType> &flux_up,
+                             MGLevelObject<MatrixType> &flux_down);
+
+      /**
+       * Initialize the matrices #interface_in and #interface_out used for
+       * local refinement with continuous Galerkin methods.
+       */
+
+      void initialize_interfaces(MGLevelObject<MatrixType> &interface_in,
+                                 MGLevelObject<MatrixType> &interface_out);
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+      /**
+       * Assemble the matrix DoFInfo::M1[0] into the global matrix.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble both local matrices in the info objects into the global
+       * matrices.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+    private:
+      /**
+       * Assemble a single matrix into a global matrix.
+       */
+      void assemble(MatrixType &G,
+                    const FullMatrix<double> &M,
+                    const std::vector<types::global_dof_index> &i1,
+                    const std::vector<types::global_dof_index> &i2);
+
+      /**
+       * Assemble a single matrix into a global matrix.
+       */
+      void assemble(MatrixType &G,
+                    const FullMatrix<double> &M,
+                    const std::vector<types::global_dof_index> &i1,
+                    const std::vector<types::global_dof_index> &i2,
+                    const unsigned int level);
+
+      /**
+       * Assemble a single matrix into a global matrix.
+       */
+
+      void assemble_up(MatrixType &G,
+                       const FullMatrix<double> &M,
+                       const std::vector<types::global_dof_index> &i1,
+                       const std::vector<types::global_dof_index> &i2,
+                       const unsigned int level = numbers::invalid_unsigned_int);
+      /**
+       * Assemble a single matrix into a global matrix.
+       */
+
+      void assemble_down(MatrixType &G,
+                         const FullMatrix<double> &M,
+                         const std::vector<types::global_dof_index> &i1,
+                         const std::vector<types::global_dof_index> &i2,
+                         const unsigned int level = numbers::invalid_unsigned_int);
+
+      /**
+       * Assemble a single matrix into a global matrix.
+       */
+
+      void assemble_in(MatrixType &G,
+                       const FullMatrix<double> &M,
+                       const std::vector<types::global_dof_index> &i1,
+                       const std::vector<types::global_dof_index> &i2,
+                       const unsigned int level = numbers::invalid_unsigned_int);
+
+      /**
+       * Assemble a single matrix into a global matrix.
+       */
+
+      void assemble_out(MatrixType &G,
+                        const FullMatrix<double> &M,
+                        const std::vector<types::global_dof_index> &i1,
+                        const std::vector<types::global_dof_index> &i2,
+                        const unsigned int level = numbers::invalid_unsigned_int);
+
+      /**
+       * The global matrix being assembled.
+       */
+      SmartPointer<MGLevelObject<MatrixType>,MGMatrixSimple<MatrixType> > matrix;
+
+      /**
+       * The matrix used for face flux terms across the refinement edge,
+       * coupling coarse to fine.
+       */
+      SmartPointer<MGLevelObject<MatrixType>,MGMatrixSimple<MatrixType> > flux_up;
+
+      /**
+       * The matrix used for face flux terms across the refinement edge,
+       * coupling fine to coarse.
+       */
+      SmartPointer<MGLevelObject<MatrixType>,MGMatrixSimple<MatrixType> > flux_down;
+
+      /**
+       * The matrix used for face contributions for continuous elements across
+       * the refinement edge, coupling coarse to fine.
+       */
+      SmartPointer<MGLevelObject<MatrixType>,MGMatrixSimple<MatrixType> > interface_in;
+
+      /**
+       * The matrix used for face contributions for continuous elements across
+       * the refinement edge, coupling fine to coarse.
+       */
+      SmartPointer<MGLevelObject<MatrixType>,MGMatrixSimple<MatrixType> > interface_out;
+      /**
+       * A pointer to the object containing constraints.
+       */
+      SmartPointer<const MGConstrainedDoFs,MGMatrixSimple<MatrixType> > mg_constrained_dofs;
+
+      /**
+       * The smallest positive number that will be entered into the global
+       * matrix. All smaller absolute values will be treated as zero and will
+       * not be assembled.
+       */
+      const double threshold;
+
+    };
+
+
+    /**
+     * Assemble a simple matrix and a simple right hand side at once. We use a
+     * combination of MatrixSimple and ResidualSimple to achieve this. Cell
+     * and face operators should fill the matrix and vector objects in
+     * LocalResults and this class will assemble them into matrix and vector
+     * objects.
+     *
+     * @ingroup MeshWorker
+     * @author Guido Kanschat, 2009
+     */
+    template <typename MatrixType, typename VectorType>
+    class SystemSimple :
+      private MatrixSimple<MatrixType>,
+      private ResidualSimple<VectorType>
+    {
+    public:
+      /**
+       * Constructor setting the threshold value in MatrixSimple.
+       */
+      SystemSimple(double threshold = 1.e-12);
+
+      /**
+       * Store the two objects data is assembled into.
+       */
+      void initialize(MatrixType &m, VectorType &rhs);
+
+      /**
+       * Initialize the constraints. After this function has been called with
+       * a valid ConstraintMatrix, the function
+       * ConstraintMatrix::distribute_local_to_global() will be used by
+       * assemble() to distribute the cell and face matrices into a global
+       * sparse matrix.
+       */
+      void initialize(const ConstraintMatrix &constraints);
+
+      /**
+       * Initialize the local data in the DoFInfo object used later for
+       * assembling.
+       *
+       * The info object refers to a cell if <code>!face</code>, or else to an
+       * interior or boundary face.
+       */
+      template <class DOFINFO>
+      void initialize_info(DOFINFO &info, bool face) const;
+
+      /**
+       * Assemble the matrix DoFInfo::M1[0] into the global matrix.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info);
+
+      /**
+       * Assemble both local matrices in the info objects into the global
+       * matrix.
+       */
+      template<class DOFINFO>
+      void assemble(const DOFINFO &info1,
+                    const DOFINFO &info2);
+    };
+
+
+//----------------------------------------------------------------------//
+
+    template <typename VectorType>
+    inline void
+    ResidualSimple<VectorType>::initialize(AnyData &results)
+    {
+      residuals = results;
+    }
+
+    template <typename VectorType>
+    inline void
+    ResidualSimple<VectorType>::initialize(const ConstraintMatrix &c)
+    {
+      constraints = &c;
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    ResidualSimple<MatrixType>::initialize_local_blocks(const BlockIndices &)
+    {}
+
+
+    template <typename VectorType>
+    template <class DOFINFO>
+    inline void
+    ResidualSimple<VectorType>::initialize_info(DOFINFO &info, bool) const
+    {
+      info.initialize_vectors(residuals.size());
+    }
+
+
+    template <typename VectorType>
+    template <class DOFINFO>
+    inline void
+    ResidualSimple<VectorType>::assemble(const DOFINFO &info)
+    {
+      for (unsigned int k=0; k<residuals.size(); ++k)
+        {
+          VectorType *v = residuals.entry<VectorType *>(k);
+          if (constraints == 0)
+            {
+              for (unsigned int i=0; i<info.vector(k).block(0).size(); ++i)
+                (*v)(info.indices[i]) += info.vector(k).block(0)(i);
+            }
+          else
+            {
+              if (info.indices_by_block.size() == 0)
+                constraints->distribute_local_to_global(info.vector(k).block(0), info.indices, *v);
+              else
+                for (unsigned int i=0; i != info.vector(k).n_blocks(); ++i)
+                  constraints->distribute_local_to_global(info.vector(k).block(i), info.indices_by_block[i], *v);
+            }
+        }
+    }
+
+    template <typename VectorType>
+    template <class DOFINFO>
+    inline void
+    ResidualSimple<VectorType>::assemble(const DOFINFO &info1,
+                                         const DOFINFO &info2)
+    {
+      for (unsigned int k=0; k<residuals.size(); ++k)
+        {
+          VectorType *v = residuals.entry<VectorType *>(k);
+          if (constraints == 0)
+            {
+              for (unsigned int i=0; i<info1.vector(k).block(0).size(); ++i)
+                (*v)(info1.indices[i]) += info1.vector(k).block(0)(i);
+              for (unsigned int i=0; i<info2.vector(k).block(0).size(); ++i)
+                (*v)(info2.indices[i]) += info2.vector(k).block(0)(i);
+            }
+          else
+            {
+              if (info1.indices_by_block.size() == 0 && info2.indices_by_block.size() == 0)
+                {
+                  constraints->distribute_local_to_global
+                  (info1.vector(k).block(0), info1.indices, *v);
+                  constraints->distribute_local_to_global
+                  (info2.vector(k).block(0), info2.indices, *v);
+                }
+              else if (info1.indices_by_block.size() != 0 && info2.indices_by_block.size() != 0)
+                {
+                  for (unsigned int i=0; i<info1.vector(k).n_blocks(); ++i)
+                    {
+                      constraints->distribute_local_to_global
+                      (info1.vector(k).block(i), info1.indices_by_block[i], *v);
+                      constraints->distribute_local_to_global
+                      (info2.vector(k).block(i), info2.indices_by_block[i], *v);
+                    }
+                }
+              else
+                {
+                  Assert(false, ExcNotImplemented());
+                }
+            }
+        }
+    }
+
+
+//----------------------------------------------------------------------//
+
+    template <typename MatrixType>
+    inline
+    MatrixSimple<MatrixType>::MatrixSimple(double threshold)
+      :
+      threshold(threshold)
+    {}
+
+
+    template <typename MatrixType>
+    inline void
+    MatrixSimple<MatrixType>::initialize(MatrixType &m)
+    {
+      matrix.resize(1);
+      matrix[0] = &m;
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MatrixSimple<MatrixType>::initialize(std::vector<MatrixType> &m)
+    {
+      matrix.resize(m.size());
+      for (unsigned int i=0; i<m.size(); ++i)
+        matrix[i] = &m[i];
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MatrixSimple<MatrixType>::initialize(const ConstraintMatrix &c)
+    {
+      constraints = &c;
+    }
+
+
+    template <typename MatrixType >
+    template <class DOFINFO>
+    inline void
+    MatrixSimple<MatrixType>::initialize_info(DOFINFO &info, bool face) const
+    {
+      Assert(matrix.size() != 0, ExcNotInitialized());
+
+      const unsigned int n = info.indices_by_block.size();
+
+      if (n == 0)
+        info.initialize_matrices(matrix.size(), face);
+      else
+        {
+          info.initialize_matrices(matrix.size()*n*n, face);
+          unsigned int k=0;
+          for (unsigned int m=0; m<matrix.size(); ++m)
+            for (unsigned int i=0; i<n; ++i)
+              for (unsigned int j=0; j<n; ++j,++k)
+                {
+                  info.matrix(k,false).row = i;
+                  info.matrix(k,false).column = j;
+                  if (face)
+                    {
+                      info.matrix(k,true).row = i;
+                      info.matrix(k,true).column = j;
+                    }
+                }
+        }
+    }
+
+
+
+    template <typename MatrixType>
+    inline void
+    MatrixSimple<MatrixType>::assemble(const FullMatrix<double> &M,
+                                       const unsigned int index,
+                                       const std::vector<types::global_dof_index> &i1,
+                                       const std::vector<types::global_dof_index> &i2)
+    {
+      AssertDimension(M.m(), i1.size());
+      AssertDimension(M.n(), i2.size());
+
+      if (constraints == 0)
+        {
+          for (unsigned int j=0; j<i1.size(); ++j)
+            for (unsigned int k=0; k<i2.size(); ++k)
+              if (std::fabs(M(j,k)) >= threshold)
+                matrix[index]->add(i1[j], i2[k], M(j,k));
+        }
+      else
+        constraints->distribute_local_to_global(M, i1, i2, *matrix[index]);
+    }
+
+
+    template <typename MatrixType>
+    template <class DOFINFO>
+    inline void
+    MatrixSimple<MatrixType>::assemble(const DOFINFO &info)
+    {
+      Assert(!info.level_cell, ExcMessage("Cell may not access level dofs"));
+      const unsigned int n = info.indices_by_block.size();
+
+      if (n == 0)
+        for (unsigned int m=0; m<matrix.size(); ++m)
+          assemble(info.matrix(m,false).matrix, m, info.indices, info.indices);
+      else
+        {
+          for (unsigned int m=0; m<matrix.size(); ++m)
+            for (unsigned int k=0; k<n*n; ++k)
+              {
+                assemble(info.matrix(k+m*n*n,false).matrix, m,
+                         info.indices_by_block[info.matrix(k+m*n*n,false).row],
+                         info.indices_by_block[info.matrix(k+m*n*n,false).column]);
+              }
+        }
+    }
+
+
+    template <typename MatrixType>
+    template <class DOFINFO>
+    inline void
+    MatrixSimple<MatrixType>::assemble(const DOFINFO &info1, const DOFINFO &info2)
+    {
+      Assert(!info1.level_cell, ExcMessage("Cell may not access level dofs"));
+      Assert(!info2.level_cell, ExcMessage("Cell may not access level dofs"));
+      AssertDimension(info1.indices_by_block.size(),info2.indices_by_block.size());
+
+      const unsigned int n = info1.indices_by_block.size();
+
+      if (n == 0)
+        {
+          for (unsigned int m=0; m<matrix.size(); ++m)
+            {
+              assemble(info1.matrix(m,false).matrix, m, info1.indices, info1.indices);
+              assemble(info1.matrix(m,true).matrix, m, info1.indices, info2.indices);
+              assemble(info2.matrix(m,false).matrix, m, info2.indices, info2.indices);
+              assemble(info2.matrix(m,true).matrix, m, info2.indices, info1.indices);
+            }
+        }
+      else
+        {
+          for (unsigned int m=0; m<matrix.size(); ++m)
+            for (unsigned int k=0; k<n*n; ++k)
+              {
+                const unsigned int row = info1.matrix(k+m*n*n,false).row;
+                const unsigned int column = info1.matrix(k+m*n*n,false).column;
+
+                assemble(info1.matrix(k+m*n*n,false).matrix, m,
+                         info1.indices_by_block[row], info1.indices_by_block[column]);
+                assemble(info1.matrix(k+m*n*n,true).matrix, m,
+                         info1.indices_by_block[row], info2.indices_by_block[column]);
+                assemble(info2.matrix(k+m*n*n,false).matrix, m,
+                         info2.indices_by_block[row], info2.indices_by_block[column]);
+                assemble(info2.matrix(k+m*n*n,true).matrix, m,
+                         info2.indices_by_block[row], info1.indices_by_block[column]);
+              }
+        }
+    }
+
+
+//----------------------------------------------------------------------//
+
+    template <typename MatrixType>
+    inline
+    MGMatrixSimple<MatrixType>::MGMatrixSimple(double threshold)
+      :
+      threshold(threshold)
+    {}
+
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::initialize(MGLevelObject<MatrixType> &m)
+    {
+      matrix = &m;
+    }
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::initialize(const MGConstrainedDoFs &c)
+    {
+      mg_constrained_dofs = &c;
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::initialize_fluxes(MGLevelObject<MatrixType> &up,
+                                                  MGLevelObject<MatrixType> &down)
+    {
+      flux_up = &up;
+      flux_down = &down;
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::initialize_interfaces
+    (MGLevelObject<MatrixType> &in, MGLevelObject<MatrixType> &out)
+    {
+      interface_in = ∈
+      interface_out = &out;
+    }
+
+
+    template <typename MatrixType >
+    template <class DOFINFO>
+    inline void
+    MGMatrixSimple<MatrixType>::initialize_info(DOFINFO &info, bool face) const
+    {
+      const unsigned int n = info.indices_by_block.size();
+
+      if (n == 0)
+        info.initialize_matrices(1, face);
+      else
+        {
+          info.initialize_matrices(n*n, face);
+          unsigned int k=0;
+          for (unsigned int i=0; i<n; ++i)
+            for (unsigned int j=0; j<n; ++j,++k)
+              {
+                info.matrix(k,false).row = i;
+                info.matrix(k,false).column = j;
+                if (face)
+                  {
+                    info.matrix(k,true).row = i;
+                    info.matrix(k,true).column = j;
+                  }
+              }
+        }
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble
+    (MatrixType                                 &G,
+     const FullMatrix<double>                   &M,
+     const std::vector<types::global_dof_index> &i1,
+     const std::vector<types::global_dof_index> &i2)
+    {
+      AssertDimension(M.m(), i1.size());
+      AssertDimension(M.n(), i2.size());
+      Assert(mg_constrained_dofs == 0, ExcInternalError());
+//TODO: Possibly remove this function all together
+
+      for (unsigned int j=0; j<i1.size(); ++j)
+        for (unsigned int k=0; k<i2.size(); ++k)
+          if (std::fabs(M(j,k)) >= threshold)
+            G.add(i1[j], i2[k], M(j,k));
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble
+    (MatrixType                                 &G,
+     const FullMatrix<double>                   &M,
+     const std::vector<types::global_dof_index> &i1,
+     const std::vector<types::global_dof_index> &i2,
+     const unsigned int                          level)
+    {
+      AssertDimension(M.m(), i1.size());
+      AssertDimension(M.n(), i2.size());
+
+      if (mg_constrained_dofs == 0)
+        {
+          for (unsigned int j=0; j<i1.size(); ++j)
+            for (unsigned int k=0; k<i2.size(); ++k)
+              if (std::fabs(M(j,k)) >= threshold)
+                G.add(i1[j], i2[k], M(j,k));
+        }
+      else
+        {
+          for (unsigned int j=0; j<i1.size(); ++j)
+            for (unsigned int k=0; k<i2.size(); ++k)
+              {
+                // Only enter the local values into the global matrix,
+                //  if the value is larger than the threshold
+                if (std::fabs(M(j,k)) < threshold)
+                  continue;
+
+                // Do not enter, if either the row or the column
+                // corresponds to an index on the refinement edge. The
+                // level problems are solved with homogeneous
+                // Dirichlet boundary conditions, therefore we
+                // eliminate these rows and columns. The corresponding
+                // matrix entries are entered by assemble_in() and
+                // assemble_out().
+                if (mg_constrained_dofs->at_refinement_edge(level, i1[j]) ||
+                    mg_constrained_dofs->at_refinement_edge(level, i2[k]))
+                  continue;
+
+                // At the boundary, only enter the term on the
+                // diagonal, but not the coupling terms
+                if ((mg_constrained_dofs->is_boundary_index(level, i1[j]) ||
+                     mg_constrained_dofs->is_boundary_index(level, i2[k])) &&
+                    (i1[j] != i2[k]))
+                  continue;
+
+                G.add(i1[j], i2[k], M(j,k));
+              }
+        }
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble_up
+    (MatrixType                                 &G,
+     const FullMatrix<double>                   &M,
+     const std::vector<types::global_dof_index> &i1,
+     const std::vector<types::global_dof_index> &i2,
+     const unsigned int                          level)
+    {
+      AssertDimension(M.n(), i1.size());
+      AssertDimension(M.m(), i2.size());
+
+      if (mg_constrained_dofs == 0)
+        {
+          for (unsigned int j=0; j<i1.size(); ++j)
+            for (unsigned int k=0; k<i2.size(); ++k)
+              if (std::fabs(M(k,j)) >= threshold)
+                G.add(i1[j], i2[k], M(k,j));
+        }
+      else
+        {
+          for (unsigned int j=0; j<i1.size(); ++j)
+            for (unsigned int k=0; k<i2.size(); ++k)
+              if (std::fabs(M(k,j)) >= threshold)
+                if (!mg_constrained_dofs->at_refinement_edge(level, i2[k]))
+                  G.add(i1[j], i2[k], M(k,j));
+        }
+    }
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble_down
+    (MatrixType                                 &G,
+     const FullMatrix<double>                   &M,
+     const std::vector<types::global_dof_index> &i1,
+     const std::vector<types::global_dof_index> &i2,
+     const unsigned int                          level)
+    {
+      AssertDimension(M.m(), i1.size());
+      AssertDimension(M.n(), i2.size());
+
+      if (mg_constrained_dofs == 0)
+        {
+          for (unsigned int j=0; j<i1.size(); ++j)
+            for (unsigned int k=0; k<i2.size(); ++k)
+              if (std::fabs(M(j,k)) >= threshold)
+                G.add(i1[j], i2[k], M(j,k));
+        }
+      else
+        {
+          for (unsigned int j=0; j<i1.size(); ++j)
+            for (unsigned int k=0; k<i2.size(); ++k)
+              if (std::fabs(M(j,k)) >= threshold)
+                if (!mg_constrained_dofs->at_refinement_edge(level, i2[k]))
+                  G.add(i1[j], i2[k], M(j,k));
+        }
+    }
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble_in
+    (MatrixType                                 &G,
+     const FullMatrix<double>                   &M,
+     const std::vector<types::global_dof_index> &i1,
+     const std::vector<types::global_dof_index> &i2,
+     const unsigned int                          level)
+    {
+      AssertDimension(M.m(), i1.size());
+      AssertDimension(M.n(), i2.size());
+      Assert(mg_constrained_dofs != 0, ExcInternalError());
+
+      for (unsigned int j=0; j<i1.size(); ++j)
+        for (unsigned int k=0; k<i2.size(); ++k)
+          if (std::fabs(M(j,k)) >= threshold)
+            // Enter values into matrix only if j corresponds to a
+            // degree of freedom on the refinemenent edge, k does
+            // not, and both are not on the boundary. This is part
+            // the difference between the complete matrix with no
+            // boundary condition at the refinement edge and and
+            // the matrix assembled above by assemble().
+
+            // Thus the logic is: enter the row if it is
+            // constrained by hanging node constraints (actually,
+            // the whole refinement edge), but not if it is
+            // constrained by a boundary constraint.
+            if (mg_constrained_dofs->at_refinement_edge(level, i1[j]) &&
+                !mg_constrained_dofs->at_refinement_edge(level, i2[k]))
+              {
+                if ((!mg_constrained_dofs->is_boundary_index(level, i1[j]) &&
+                     !mg_constrained_dofs->is_boundary_index(level, i2[k]))
+                    ||
+                    (mg_constrained_dofs->is_boundary_index(level, i1[j]) &&
+                     mg_constrained_dofs->is_boundary_index(level, i2[k]) &&
+                     i1[j] == i2[k]))
+                  G.add(i1[j], i2[k], M(j,k));
+              }
+    }
+
+
+    template <typename MatrixType>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble_out
+    (MatrixType                                 &G,
+     const FullMatrix<double>                   &M,
+     const std::vector<types::global_dof_index> &i1,
+     const std::vector<types::global_dof_index> &i2,
+     const unsigned int                          level)
+    {
+      AssertDimension(M.n(), i1.size());
+      AssertDimension(M.m(), i2.size());
+      Assert(mg_constrained_dofs != 0, ExcInternalError());
+
+      for (unsigned int j=0; j<i1.size(); ++j)
+        for (unsigned int k=0; k<i2.size(); ++k)
+          if (std::fabs(M(k,j)) >= threshold)
+            if (mg_constrained_dofs->at_refinement_edge(level, i1[j]) &&
+                !mg_constrained_dofs->at_refinement_edge(level, i2[k]))
+              {
+                if ((!mg_constrained_dofs->is_boundary_index(level, i1[j]) &&
+                     !mg_constrained_dofs->is_boundary_index(level, i2[k]))
+                    ||
+                    (mg_constrained_dofs->is_boundary_index(level, i1[j]) &&
+                     mg_constrained_dofs->is_boundary_index(level, i2[k]) &&
+                     i1[j] == i2[k]))
+                  G.add(i1[j], i2[k], M(k,j));
+              }
+    }
+
+
+    template <typename MatrixType>
+    template <class DOFINFO>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble(const DOFINFO &info)
+    {
+      Assert(info.level_cell, ExcMessage("Cell must access level dofs"));
+      const unsigned int level = info.cell->level();
+
+      if (info.indices_by_block.size() == 0)
+        {
+          assemble((*matrix)[level], info.matrix(0,false).matrix,
+                   info.indices, info.indices, level);
+          if (mg_constrained_dofs != 0)
+            {
+              assemble_in((*interface_in)[level], info.matrix(0,false).matrix,
+                          info.indices, info.indices, level);
+              assemble_out((*interface_out)[level],info.matrix(0,false).matrix,
+                           info.indices, info.indices, level);
+            }
+        }
+      else
+        for (unsigned int k=0; k<info.n_matrices(); ++k)
+          {
+            const unsigned int row = info.matrix(k,false).row;
+            const unsigned int column = info.matrix(k,false).column;
+
+            assemble((*matrix)[level], info.matrix(k,false).matrix,
+                     info.indices_by_block[row], info.indices_by_block[column], level);
+
+            if (mg_constrained_dofs != 0)
+              {
+                assemble_in((*interface_in)[level], info.matrix(k,false).matrix,
+                            info.indices_by_block[row], info.indices_by_block[column], level);
+                assemble_out((*interface_out)[level],info.matrix(k,false).matrix,
+                             info.indices_by_block[column], info.indices_by_block[row], level);
+              }
+          }
+    }
+
+
+    template <typename MatrixType>
+    template <class DOFINFO>
+    inline void
+    MGMatrixSimple<MatrixType>::assemble(const DOFINFO &info1,
+                                         const DOFINFO &info2)
+    {
+      Assert(info1.level_cell, ExcMessage("Cell must access level dofs"));
+      Assert(info2.level_cell, ExcMessage("Cell must access level dofs"));
+      const unsigned int level1 = info1.cell->level();
+      const unsigned int level2 = info2.cell->level();
+
+      if (info1.indices_by_block.size() == 0)
+        {
+          if (level1 == level2)
+            {
+              assemble((*matrix)[level1], info1.matrix(0,false).matrix, info1.indices, info1.indices, level1);
+              assemble((*matrix)[level1], info1.matrix(0,true).matrix, info1.indices, info2.indices, level1);
+              assemble((*matrix)[level1], info2.matrix(0,false).matrix, info2.indices, info2.indices, level1);
+              assemble((*matrix)[level1], info2.matrix(0,true).matrix, info2.indices, info1.indices, level1);
+            }
+          else
+            {
+              Assert(level1 > level2, ExcInternalError());
+              // Do not add info2.M1,
+              // which is done by
+              // the coarser cell
+              assemble((*matrix)[level1], info1.matrix(0,false).matrix, info1.indices, info1.indices, level1);
+              if (level1>0)
+                {
+                  assemble_up((*flux_up)[level1],info1.matrix(0,true).matrix, info2.indices, info1.indices, level1);
+                  assemble_down((*flux_down)[level1], info2.matrix(0,true).matrix, info2.indices, info1.indices, level1);
+                }
+            }
+        }
+      else
+        for (unsigned int k=0; k<info1.n_matrices(); ++k)
+          {
+            const unsigned int row = info1.matrix(k,false).row;
+            const unsigned int column = info1.matrix(k,false).column;
+
+            if (level1 == level2)
+              {
+                assemble((*matrix)[level1], info1.matrix(k,false).matrix, info1.indices_by_block[row], info1.indices_by_block[column], level1);
+                assemble((*matrix)[level1], info1.matrix(k,true).matrix, info1.indices_by_block[row], info2.indices_by_block[column], level1);
+                assemble((*matrix)[level1], info2.matrix(k,false).matrix, info2.indices_by_block[row], info2.indices_by_block[column], level1);
+                assemble((*matrix)[level1], info2.matrix(k,true).matrix, info2.indices_by_block[row], info1.indices_by_block[column], level1);
+              }
+            else
+              {
+                Assert(level1 > level2, ExcInternalError());
+                // Do not add info2.M1,
+                // which is done by
+                // the coarser cell
+                assemble((*matrix)[level1], info1.matrix(k,false).matrix, info1.indices_by_block[row], info1.indices_by_block[column], level1);
+                if (level1>0)
+                  {
+                    assemble_up((*flux_up)[level1],info1.matrix(k,true).matrix, info2.indices_by_block[row], info1.indices_by_block[column], level1);
+                    assemble_down((*flux_down)[level1], info2.matrix(k,true).matrix, info2.indices_by_block[row], info1.indices_by_block[column], level1);
+                  }
+              }
+          }
+    }
+
+//----------------------------------------------------------------------//
+
+    template <typename MatrixType, typename VectorType>
+    SystemSimple<MatrixType,VectorType>::SystemSimple(double t)
+      :
+      MatrixSimple<MatrixType>(t)
+    {}
+
+
+    template <typename MatrixType, typename VectorType>
+    inline void
+    SystemSimple<MatrixType,VectorType>::initialize(MatrixType &m, VectorType &rhs)
+    {
+      AnyData data;
+      VectorType *p = &rhs;
+      data.add(p, "right hand side");
+
+      MatrixSimple<MatrixType>::initialize(m);
+      ResidualSimple<VectorType>::initialize(data);
+    }
+
+    template <typename MatrixType, typename VectorType>
+    inline void
+    SystemSimple<MatrixType,VectorType>::initialize(const ConstraintMatrix &c)
+    {
+      MatrixSimple<MatrixType>::initialize(c);
+      ResidualSimple<VectorType>::initialize(c);
+    }
+
+
+    template <typename MatrixType, typename VectorType>
+    template <class DOFINFO>
+    inline void
+    SystemSimple<MatrixType,VectorType>::initialize_info(DOFINFO &info,
+                                                         bool    face) const
+    {
+      MatrixSimple<MatrixType>::initialize_info(info, face);
+      ResidualSimple<VectorType>::initialize_info(info, face);
+    }
+
+
+    template <typename MatrixType, typename VectorType>
+    template <class DOFINFO>
+    inline void
+    SystemSimple<MatrixType,VectorType>::assemble(const DOFINFO &info)
+    {
+      MatrixSimple<MatrixType>::assemble(info);
+      ResidualSimple<VectorType>::assemble(info);
+    }
+
+
+    template <typename MatrixType, typename VectorType>
+    template <class DOFINFO>
+    inline void
+    SystemSimple<MatrixType,VectorType>::assemble(const DOFINFO &info1,
+                                                  const DOFINFO &info2)
+    {
+      MatrixSimple<MatrixType>::assemble(info1, info2);
+      ResidualSimple<VectorType>::assemble(info1, info2);
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/vector_selector.h b/include/deal.II/meshworker/vector_selector.h
new file mode 100644
index 0000000..5e06d00
--- /dev/null
+++ b/include/deal.II/meshworker/vector_selector.h
@@ -0,0 +1,538 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mesh_worker_vector_selector_h
+#define dealii__mesh_worker_vector_selector_h
+
+#include <deal.II/algorithms/any_data.h>
+#include <deal.II/algorithms/named_selection.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/mg_level_object.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template<int,int> class FEValuesBase;
+
+namespace MeshWorker
+{
+
+  /**
+   * A class that selects vectors from a list of named vectors.
+   *
+   * Since the number of vectors in an AnyData object may grow with every
+   * nesting of applications or loops, it is important to be able to select
+   * those, which are actually used in computing residuals etc. This class
+   * organizes the selection.
+   *
+   * It is used for instance in IntegrationWorker to determine which values,
+   * derivatives or second derivatives are actually computed.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat 2009
+   */
+  class VectorSelector :
+    public Subscriptor
+  {
+  public:
+    /**
+     * Add a vector to the selection of finite element functions. The
+     * arguments are the name of the vector and indicators, which information
+     * is to be extracted from the vector. The name refers to an entry in a
+     * AnyData object, which will be identified by initialize().  The three
+     * bool parameters indicate, whether values, gradients and Hessians of the
+     * finite element function are to be computed on each cell or face.
+     */
+    void add(const std::string &name,
+             const bool values = true,
+             const bool gradients = false,
+             const bool hessians = false);
+
+    /**
+     * Does the same as the function above but it is possible to select a
+     * block of the global vector.
+     */
+//      void add(const std::string& name,
+//               const unsigned int selected_block,
+//             bool values = true,
+//             bool gradients = false,
+//             bool hessians = false);
+
+    /**
+     * Initialize the selection field with a data vector. While add() only
+     * enters the names of vectors, which will be used in the integration loop
+     * over cells and faces, this function links the names to actual vectos in
+     * a AnyData object.
+     *
+     * @note This function caches the index associated with a name. Therefore,
+     * it must be called every time after the AnyData object has changed.
+     */
+    void initialize(const AnyData &);
+
+    /**
+     * Check whether any vector is selected.
+     */
+    bool empty () const;
+
+    /**
+     * Returns true if values are selected for any vector.
+     */
+    bool has_values () const;
+
+    /**
+     * Returns true if gradients are selected for any vector.
+     */
+    bool has_gradients () const;
+
+    /**
+     * Returns true if hessians are selected for any vector.
+     */
+    bool has_hessians () const;
+
+    /**
+     * Number of vectors for values
+     */
+    unsigned int n_values () const;
+
+    /**
+     * Number of vectors for gradients
+     */
+    unsigned int n_gradients () const;
+
+    /**
+     * Number of vectors for Hessians
+     */
+    unsigned int n_hessians () const;
+
+    /**
+     * The vector index for the ith value
+     */
+    unsigned int value_index (const unsigned int i) const;
+
+    /**
+     * The vector index for the ith gradient
+     */
+    unsigned int gradient_index (const unsigned int i) const;
+
+    /**
+     * The vector index for the ith Hessian
+     */
+    unsigned int hessian_index (const unsigned int i) const;
+
+    /**
+     * Print the contents of the selection to the stream.
+     */
+    template <class StreamType, typename DATA>
+    void print (StreamType &s, const AnyData &v) const;
+
+    /**
+     * Print the number of selections to the stream.
+     */
+    template <class StreamType>
+    void print (StreamType &s) const;
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+
+  protected:
+    /**
+     * Selection of the vectors used to compute values.
+     */
+    NamedSelection value_selection;
+
+    /**
+     * Selection of the vectors used to compute gradients.
+     */
+    NamedSelection gradient_selection;
+
+    /**
+     * Selection of the vectors used to compute hessians.
+     */
+    NamedSelection hessian_selection;
+  };
+
+  /**
+   * Based on VectorSelector, this is the class used by IntegrationInfo to
+   * compute values of source vectors in quadrature points.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template <int dim, int spacedim = dim, typename Number=double>
+  class VectorDataBase :
+    public VectorSelector
+  {
+  public:
+    /**
+     * Constructor
+     */
+    VectorDataBase();
+
+    /**
+     * Constructor from a base class object
+     */
+    VectorDataBase(const VectorSelector &);
+
+    /**
+     * Initialize with a AnyData object and cache the indices in the
+     * VectorSelector base class.
+     *
+     * @note Make sure the VectorSelector base class was filled with
+     * reasonable data before calling this function.
+     */
+    void initialize(const AnyData &);
+
+    /**
+     * Virtual, but empty destructor.
+     */
+    virtual ~VectorDataBase();
+
+    /**
+     * The only function added to VectorSelector is an abstract virtual
+     * function implemented in the derived class template and called by
+     * IntegrationInfo.
+     *
+     * Depending on the selections made in our base class, this fills the
+     * first three arguments with the local data of the finite element
+     * functions. It is usually called either for the whole FESystem, or for
+     * each base element separately.
+     *
+     * @param values is the vector filled with the values of the finite
+     * element function in the quadrature points.
+     *
+     * @param gradients is the vector filled with the derivatives of the
+     * finite element function in the quadrature points.
+     *
+     * @param hessians is the vector filled with the second derivatives of the
+     * finite element function in the quadrature points.
+     *
+     * @param fe is the FEValuesBase object which is used to compute the
+     * function values. Its UpdateFlags must have been set appropriately.
+     *
+     * @param index is the local index vector. If @p fe refers to base
+     * elements of the system, this vector should be sorted by block and the
+     * arguments @p start and @p size below specify the subset of @p indices
+     * used.
+     *
+     * @param component is the first index in @p values, @p gradients and @p
+     * hessians entered in this function.
+     *
+     * @param n_comp is the number of components to be filled.
+     *
+     * @param start is the first index of this block in @p indices, or zero if
+     * no base elements are used.
+     *
+     * @param size is the number of dofs per cell of the current element or
+     * base element.
+     */
+    virtual void fill(
+      std::vector<std::vector<std::vector<Number> > > &values,
+      std::vector<std::vector<std::vector<Tensor<1,dim,Number> > > > &gradients,
+      std::vector<std::vector<std::vector<Tensor<2,dim,Number> > > > &hessians,
+      const FEValuesBase<dim,spacedim> &fe,
+      const std::vector<types::global_dof_index> &index,
+      const unsigned int component,
+      const unsigned int n_comp,
+      const unsigned int start,
+      const unsigned int size) const;
+
+    /**
+     * Fill the local data vector from level vectors. Performs exactly what
+     * the other fill() does, but uses the cell level to access a single level
+     * out of a hierarchy of level vectors, instead of a global data vector on
+     * the active cells.
+     */
+    virtual void mg_fill(
+      std::vector<std::vector<std::vector<Number> > > &values,
+      std::vector<std::vector<std::vector<Tensor<1,dim,Number> > > > &gradients,
+      std::vector<std::vector<std::vector<Tensor<2,dim,Number> > > > &hessians,
+      const FEValuesBase<dim,spacedim> &fe,
+      const unsigned int level,
+      const std::vector<types::global_dof_index> &index,
+      const unsigned int component,
+      const unsigned int n_comp,
+      const unsigned int start,
+      const unsigned int size) const;
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+  protected:
+    AnyData data;
+  };
+
+
+  /**
+   * Based on VectorSelector, this is the class that implements the function
+   * VectorDataBase::fill() for a certain type of vector, using AnyData to
+   * identify vectors by name.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2009
+   */
+  template <typename VectorType, int dim, int spacedim = dim>
+  class VectorData :
+    public VectorDataBase<dim, spacedim,typename VectorType::value_type>
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    VectorData();
+    /**
+     * Constructor using a prefilled VectorSelector
+     */
+    VectorData(const VectorSelector &);
+
+    /**
+     * Initialize with an object of named vectors.
+     */
+    void initialize(const AnyData &);
+
+    /**
+     * Initialize with a single vector and cache the indices in the
+     * VectorSelector base class.
+     *
+     * @note Make sure the VectorSelector base class was filled with
+     * reasonable data before calling this function.
+     */
+    void initialize(const VectorType *, const std::string &name);
+
+    virtual void fill(
+      std::vector<std::vector<std::vector<typename VectorType::value_type> > > &values,
+      std::vector<std::vector<std::vector<Tensor<1,dim,typename VectorType::value_type> > > > &gradients,
+      std::vector<std::vector<std::vector<Tensor<2,dim,typename VectorType::value_type> > > > &hessians,
+      const FEValuesBase<dim,spacedim> &fe,
+      const std::vector<types::global_dof_index> &index,
+      const unsigned int component,
+      const unsigned int n_comp,
+      const unsigned int start,
+      const unsigned int size) const;
+
+    virtual void mg_fill(
+      std::vector<std::vector<std::vector<typename VectorType::value_type> > > &values,
+      std::vector<std::vector<std::vector<Tensor<1,dim,typename VectorType::value_type> > > > &gradients,
+      std::vector<std::vector<std::vector<Tensor<2,dim,typename VectorType::value_type> > > > &hessians,
+      const FEValuesBase<dim,spacedim> &fe,
+      const unsigned int level,
+      const std::vector<types::global_dof_index> &index,
+      const unsigned int component,
+      const unsigned int n_comp,
+      const unsigned int start,
+      const unsigned int size) const;
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+
+  /**
+   * Based on VectorSelector, this is the class that implements the function
+   * VectorDataBase::fill() for a certain type of multilevel vectors, using
+   * AnyData to identify vectors by name.
+   *
+   * @ingroup MeshWorker
+   * @author Guido Kanschat, 2010
+   */
+  template <typename VectorType, int dim, int spacedim = dim>
+  class MGVectorData :
+    public VectorData<VectorType, dim, spacedim>
+  {
+  public:
+    /**
+     * Constructor.
+     */
+    MGVectorData();
+
+    /**
+     * Constructor using a prefilled VectorSelector
+     */
+    MGVectorData(const VectorSelector &);
+
+    /**
+     * Initialize with an object of named vectors
+     */
+    void initialize(const AnyData &);
+
+    /**
+     * Initialize with a single vector and cache the indices in the
+     * VectorSelector base class.
+     *
+     * @note Make sure the VectorSelector base class was filled with
+     * reasonable data before calling this function.
+     */
+    void initialize(const MGLevelObject<VectorType> *, const std::string &name);
+
+
+    /**
+     * The memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+
+//----------------------------------------------------------------------//
+
+  inline void
+  VectorSelector::add(const std::string &name,
+                      const bool values,
+                      const bool gradients,
+                      const bool hessians)
+  {
+    if (values)
+      value_selection.add(name);
+    if (gradients)
+      gradient_selection.add(name);
+    if (hessians)
+      hessian_selection.add(name);
+  }
+
+
+  //inline void
+  //VectorSelector::add(const std::string& name,
+  //   const unsigned int block,
+  //   bool values, bool gradients, bool hessians)
+  //{
+  //  if (values) value_selection.add(name, block);
+  //  if (gradients) gradient_selection.add(name, block);
+  //  if (hessians) hessian_selection.add(name, block);
+  //}
+
+
+  inline void
+  VectorSelector::initialize(const AnyData &src)
+  {
+    value_selection.initialize(src);
+    gradient_selection.initialize(src);
+    hessian_selection.initialize(src);
+  }
+
+  inline bool
+  VectorSelector::empty() const
+  {
+    return (value_selection.size() == 0 &&
+            gradient_selection.size() == 0 &&
+            hessian_selection.size() == 0);
+  }
+
+
+  inline bool
+  VectorSelector::has_values() const
+  {
+    return value_selection.size() != 0;
+  }
+
+
+  inline bool
+  VectorSelector::has_gradients() const
+  {
+    return gradient_selection.size() != 0;
+  }
+
+
+  inline bool
+  VectorSelector::has_hessians() const
+  {
+    return hessian_selection.size() != 0;
+  }
+
+
+  inline unsigned int
+  VectorSelector::n_values() const
+  {
+    return value_selection.size();
+  }
+
+
+  inline unsigned int
+  VectorSelector::n_gradients() const
+  {
+    return gradient_selection.size();
+  }
+
+
+  inline unsigned int
+  VectorSelector::n_hessians() const
+  {
+    return hessian_selection.size();
+  }
+
+
+  inline unsigned int
+  VectorSelector::value_index(const unsigned int i) const
+  {
+    return value_selection(i);
+  }
+
+
+  inline unsigned int
+  VectorSelector::gradient_index(const unsigned int i) const
+  {
+    return gradient_selection(i);
+  }
+
+
+  inline unsigned int
+  VectorSelector::hessian_index(const unsigned int i) const
+  {
+    return hessian_selection(i);
+  }
+
+
+  template <class StreamType>
+  inline void
+  VectorSelector::print(StreamType &s) const
+  {
+    s << "values: " << n_values()
+      << " gradients: " << n_gradients()
+      << " hessians: " << n_hessians()
+      << std::endl;
+  }
+
+
+  template <class StreamType, typename DATA>
+  inline void
+  VectorSelector::print(StreamType &s, const AnyData &v) const
+  {
+    s << "values:   ";
+    for (unsigned int i=0; i<n_values(); ++i)
+      s << " '" << v.name(value_selection(i)) << '\'';
+    s << std::endl << "gradients:";
+    for (unsigned int i=0; i<n_gradients(); ++i)
+      s << " '" << v.name(gradient_selection(i)) << '\'';
+    s << std::endl << "hessians: ";
+    for (unsigned int i=0; i<n_hessians(); ++i)
+      s << " '" << v.name(hessian_selection(i)) << '\'';
+    s << std::endl;
+  }
+
+
+  inline
+  std::size_t
+  VectorSelector::memory_consumption () const
+  {
+    return sizeof(*this);
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/meshworker/vector_selector.templates.h b/include/deal.II/meshworker/vector_selector.templates.h
new file mode 100644
index 0000000..805bced
--- /dev/null
+++ b/include/deal.II/meshworker/vector_selector.templates.h
@@ -0,0 +1,246 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/meshworker/vector_selector.h>
+#include <deal.II/base/vector_slice.h>
+#include <deal.II/fe/fe_values.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  template <int dim, int spacedim, typename Number>
+  VectorDataBase<dim, spacedim, Number>::~VectorDataBase()
+  {}
+
+
+  template <int dim, int spacedim, typename Number>
+  VectorDataBase<dim, spacedim, Number>::VectorDataBase(const VectorSelector &v)
+    :
+    VectorSelector(v)
+  {}
+
+
+  template <int dim, int spacedim, typename Number>
+  VectorDataBase<dim, spacedim, Number>::VectorDataBase()
+  {}
+
+
+  template <int dim, int spacedim, typename Number>
+  void
+  VectorDataBase<dim, spacedim, Number>::initialize(const AnyData &d)
+  {
+    this->data = d;
+    VectorSelector::initialize(d);
+  }
+
+
+  template <int dim, int spacedim, typename Number>
+  void
+  VectorDataBase<dim, spacedim, Number>::fill(
+    std::vector<std::vector<std::vector<Number> > > &,
+    std::vector<std::vector<std::vector<Tensor<1,dim,Number> > > > &,
+    std::vector<std::vector<std::vector<Tensor<2,dim,Number> > > > &,
+    const FEValuesBase<dim,spacedim> &,
+    const std::vector<types::global_dof_index> &,
+    const unsigned int,
+    const unsigned int,
+    const unsigned int,
+    const unsigned int) const
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+  template <int dim, int spacedim, typename Number>
+  void
+  VectorDataBase<dim, spacedim, Number>::mg_fill(
+    std::vector<std::vector<std::vector<Number> > > &,
+    std::vector<std::vector<std::vector<Tensor<1,dim,Number> > > > &,
+    std::vector<std::vector<std::vector<Tensor<2,dim,Number> > > > &,
+    const FEValuesBase<dim,spacedim> &,
+    const unsigned int,
+    const std::vector<types::global_dof_index> &,
+    const unsigned int,
+    const unsigned int,
+    const unsigned int,
+    const unsigned int) const
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+//----------------------------------------------------------------------//
+
+  template <typename VectorType, int dim, int spacedim>
+  VectorData<VectorType, dim, spacedim>::VectorData()
+  {}
+
+
+  template <typename VectorType, int dim, int spacedim>
+  VectorData<VectorType, dim, spacedim>::VectorData(const VectorSelector &s)
+    :
+    VectorDataBase<dim, spacedim, typename VectorType::value_type>(s)
+  {}
+
+
+  template <typename VectorType, int dim, int spacedim>
+  void
+  VectorData<VectorType, dim, spacedim>::initialize(const AnyData &d)
+  {
+    this->data = d;
+    VectorSelector::initialize(d);
+  }
+
+
+  template <typename VectorType, int dim, int spacedim>
+  void
+  VectorData<VectorType, dim, spacedim>::initialize(const VectorType *v, const std::string &name)
+  {
+    SmartPointer<const VectorType,VectorData<VectorType, dim, spacedim> > p = v;
+    this->data.add(p, name);
+    VectorSelector::initialize(this->data);
+  }
+
+
+  template <typename VectorType, int dim, int spacedim>
+  void
+  VectorData<VectorType, dim, spacedim>::fill(
+    std::vector<std::vector<std::vector<typename VectorType::value_type> > > &values,
+    std::vector<std::vector<std::vector<Tensor<1,dim,typename VectorType::value_type> > > > &gradients,
+    std::vector<std::vector<std::vector<Tensor<2,dim,typename VectorType::value_type> > > > &hessians,
+    const FEValuesBase<dim,spacedim> &fe,
+    const std::vector<types::global_dof_index> &index,
+    const unsigned int component,
+    const unsigned int n_comp,
+    const unsigned int start,
+    const unsigned int size) const
+  {
+    AssertDimension(values.size(), this->n_values());
+    AssertDimension(gradients.size(), this->n_gradients());
+    AssertDimension(hessians.size(), this->n_hessians());
+
+    const AnyData &data = this->data;
+    for (unsigned int i=0; i<this->n_values(); ++i)
+      {
+        const VectorType *src = data.read_ptr<VectorType>(this->value_index(i));
+        VectorSlice<std::vector<std::vector<typename VectorType::value_type> > > dst(values[i], component, n_comp);
+        fe.get_function_values(*src, make_slice(index, start, size), dst, true);
+      }
+
+    for (unsigned int i=0; i<this->n_gradients(); ++i)
+      {
+        const VectorType *src = data.read_ptr<VectorType>(this->gradient_index(i));
+        VectorSlice<std::vector<std::vector<Tensor<1,dim,typename VectorType::value_type> > > > dst(gradients[i], component, n_comp);
+        fe.get_function_gradients(*src, make_slice(index, start, size), dst, true);
+      }
+
+    for (unsigned int i=0; i<this->n_hessians(); ++i)
+      {
+        const VectorType *src = data.read_ptr<VectorType>(this->hessian_index(i));
+        VectorSlice<std::vector<std::vector<Tensor<2,dim,typename VectorType::value_type> > > > dst(hessians[i], component, n_comp);
+        fe.get_function_hessians(*src, make_slice(index, start, size), dst, true);
+      }
+  }
+
+
+  template <typename VectorType, int dim, int spacedim>
+  std::size_t
+  VectorData<VectorType, dim, spacedim>::memory_consumption () const
+  {
+    std::size_t mem = VectorSelector::memory_consumption();
+    mem += sizeof (this->data);
+    return mem;
+  }
+
+//----------------------------------------------------------------------//
+
+  template <typename VectorType, int dim, int spacedim>
+  MGVectorData<VectorType, dim, spacedim>::MGVectorData()
+  {}
+
+
+  template <typename VectorType, int dim, int spacedim>
+  MGVectorData<VectorType, dim, spacedim>::MGVectorData(const VectorSelector &s)
+    :
+    VectorData<VectorType, dim, spacedim>(s)
+  {}
+
+
+  template <typename VectorType, int dim, int spacedim>
+  void
+  MGVectorData<VectorType, dim, spacedim>::initialize(const AnyData &d)
+  {
+    this->data = d;
+    VectorSelector::initialize(d);
+  }
+
+
+  template <typename VectorType, int dim, int spacedim>
+  void
+  MGVectorData<VectorType, dim, spacedim>::initialize(const MGLevelObject<VectorType> *v, const std::string &name)
+  {
+    SmartPointer<const MGLevelObject<VectorType>, MGVectorData<VectorType, dim, spacedim> >
+    p = v;
+    this->data.add(p, name);
+    VectorSelector::initialize(this->data);
+  }
+
+
+  template <typename VectorType, int dim, int spacedim>
+  void
+  VectorData<VectorType, dim, spacedim>::mg_fill
+  (std::vector<std::vector<std::vector<typename VectorType::value_type> > >                &values,
+   std::vector<std::vector<std::vector<Tensor<1,dim,typename VectorType::value_type> > > > &gradients,
+   std::vector<std::vector<std::vector<Tensor<2,dim,typename VectorType::value_type> > > > &hessians,
+   const FEValuesBase<dim,spacedim>           &fe,
+   const unsigned int                         level,
+   const std::vector<types::global_dof_index> &index,
+   const unsigned int                         component,
+   const unsigned int                         n_comp,
+   const unsigned int                         start,
+   const unsigned int                         size)
+  const
+  {
+    AssertDimension(values.size(), this->n_values());
+    AssertDimension(gradients.size(), this->n_gradients());
+    AssertDimension(hessians.size(), this->n_hessians());
+
+    const AnyData &data = this->data;
+    for (unsigned int i=0; i<this->n_values(); ++i)
+      {
+        const MGLevelObject<VectorType> *src = data.read_ptr<MGLevelObject<VectorType> >(this->value_index(i));
+        VectorSlice<std::vector<std::vector<typename VectorType::value_type> > > dst(values[i], component, n_comp);
+        fe.get_function_values((*src)[level], make_slice(index, start, size), dst, true);
+      }
+
+    for (unsigned int i=0; i<this->n_gradients(); ++i)
+      {
+        const MGLevelObject<VectorType> *src = data.read_ptr<MGLevelObject<VectorType> >(this->value_index(i));
+        VectorSlice<std::vector<std::vector<Tensor<1,dim,typename VectorType::value_type> > > > dst(gradients[i], component, n_comp);
+        fe.get_function_gradients((*src)[level], make_slice(index, start, size), dst, true);
+      }
+
+    for (unsigned int i=0; i<this->n_hessians(); ++i)
+      {
+        const MGLevelObject<VectorType> *src = data.read_ptr<MGLevelObject<VectorType> >(this->value_index(i));
+        VectorSlice<std::vector<std::vector<Tensor<2,dim,typename VectorType::value_type> > > > dst(hessians[i], component, n_comp);
+        fe.get_function_hessians((*src)[level], make_slice(index, start, size), dst, true);
+      }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/multigrid/mg_base.h b/include/deal.II/multigrid/mg_base.h
new file mode 100644
index 0000000..04e10af
--- /dev/null
+++ b/include/deal.II/multigrid/mg_base.h
@@ -0,0 +1,235 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_base_h
+#define dealii__mg_base_h
+
+/*
+ * This file contains MGLevelObject and some abstract base classes
+ * used by Multigrid.
+ */
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/vector.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename> class MGLevelObject;
+
+/*!@addtogroup mg */
+/*@{*/
+
+
+/**
+ * Multilevel matrix base. This class sets up the interface needed by
+ * multilevel algorithms. It has no relation to the actual matrix type and
+ * takes the vector class as only template argument.
+ *
+ * Usually, the derived class MGMatrix, operating on an MGLevelObject of
+ * matrices will be sufficient for applications.
+ *
+ * @author Guido Kanschat, 2002
+ */
+template <typename VectorType>
+class MGMatrixBase : public Subscriptor
+{
+public:
+  /*
+   * Virtual destructor.
+   */
+  virtual ~MGMatrixBase();
+
+  /**
+   * Matrix-vector-multiplication on a certain level.
+   */
+  virtual void vmult (const unsigned int level,
+                      VectorType         &dst,
+                      const VectorType   &src) const = 0;
+
+  /**
+   * Adding matrix-vector-multiplication on a certain level.
+   */
+  virtual void vmult_add (const unsigned int level,
+                          VectorType         &dst,
+                          const VectorType   &src) const = 0;
+
+  /**
+   * Transpose matrix-vector-multiplication on a certain level.
+   */
+  virtual void Tvmult (const unsigned int level,
+                       VectorType         &dst,
+                       const VectorType   &src) const = 0;
+
+  /**
+   * Adding transpose matrix-vector-multiplication on a certain level.
+   */
+  virtual void Tvmult_add (const unsigned int level,
+                           VectorType         &dst,
+                           const VectorType   &src) const = 0;
+};
+
+
+/**
+ * Base class for coarse grid solvers.  This defines the virtual parenthesis
+ * operator, being the interface used by multigrid methods. Any implementation
+ * will be done by derived classes.
+ *
+ * @author Guido Kanschat, 2002
+ */
+template <typename VectorType>
+class MGCoarseGridBase : public Subscriptor
+{
+public:
+  /**
+   * Virtual destructor.
+   */
+  virtual ~MGCoarseGridBase ();
+
+  /**
+   * Solution operator.
+   */
+  virtual void operator() (const unsigned int level,
+                           VectorType         &dst,
+                           const VectorType   &src) const = 0;
+};
+
+
+/**
+ * Base class used to declare the operations needed by a concrete class
+ * implementing prolongation and restriction of vectors in the multigrid
+ * context. This class is abstract and has no implementation of these
+ * operations.
+ *
+ * There are several derived classes, reflecting the fact that vector types
+ * and numbering of the fine-grid discretization and of the multi-level
+ * implementation are independent.
+ *
+ * If you use multigrid for a single PDE or for your complete system of
+ * equations, you will use MGTransferPrebuilt together with Multigrid. The
+ * vector types used on the fine grid as well as for the multilevel operations
+ * may be Vector or BlockVector. In both cases, MGTransferPrebuilt will
+ * operate on all components of the solution.
+ *
+ * @note For the following, it is important to realize the difference between
+ * a solution
+ * @ref GlossComponent "component"
+ * and a solution
+ * @ref GlossBlock "block".
+ * The distinction only applies if vector valued elements are used, but is
+ * quite important then. This is reflected in the fact that it is not possible
+ * right now to use transfer classes based on MGTransferComponentBase for
+ * genuine vector valued elements, but descendants of MGTransferBlockBase
+ * would have to be applied. In the following text, we will use the term
+ * <em>block</em>, but remark that it might refer to components as well.
+ *
+ * @todo update the following documentation, since it does not reflect the
+ * latest changes in structure.
+ *
+ * For mixed systems, it may be required to do multigrid only for a single
+ * component or for some components. The classes MGTransferSelect and
+ * MGTransferBlock handle these cases.
+ *
+ * MGTransferSelect is used if you use multigrid (on Vector objects) for a
+ * single component, possibly grouped using <tt>mg_target_component</tt>.
+ *
+ * The class MGTransferBlock handles the case where your multigrid method
+ * operates on BlockVector objects. These can contain all or a consecutive set
+ * of the blocks of the complete system. Since most smoothers cannot operate
+ * on block structures, it is not clear whether this case is really useful.
+ * Therefore, a tested implementation of this case will be supplied when
+ * needed.
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, 1999, 2002, 2007
+ */
+template <typename VectorType>
+class MGTransferBase : public Subscriptor
+{
+public:
+  /**
+   * Destructor. Does nothing here, but needs to be declared virtual anyway.
+   */
+  virtual ~MGTransferBase();
+
+  /**
+   * Prolongate a vector from level <tt>to_level-1</tt> to level
+   * <tt>to_level</tt>. The previous content of <tt>dst</tt> is overwritten.
+   *
+   * @arg src is a vector with as many elements as there are degrees of
+   * freedom on the coarser level involved.
+   *
+   * @arg dst has as many elements as there are degrees of freedom on the
+   * finer level.
+   */
+  virtual void prolongate (const unsigned int to_level,
+                           VectorType         &dst,
+                           const VectorType   &src) const = 0;
+
+  /**
+   * Restrict a vector from level <tt>from_level</tt> to level
+   * <tt>from_level-1</tt> and add this restriction to <tt>dst</tt>. If the
+   * region covered by cells on level <tt>from_level</tt> is smaller than that
+   * of level <tt>from_level-1</tt> (local refinement), then some degrees of
+   * freedom in <tt>dst</tt> are active and will not be altered. For the other
+   * degrees of freedom, the result of the restriction is added.
+   *
+   * @arg src is a vector with as many elements as there are degrees of
+   * freedom on the finer level
+   *
+   * @arg dst has as many elements as there are degrees of freedom on the
+   * coarser level.
+   *
+   */
+  virtual void restrict_and_add (const unsigned int from_level,
+                                 VectorType         &dst,
+                                 const VectorType   &src) const = 0;
+};
+
+
+
+/**
+ * Base class for multigrid smoothers. Does nothing but defining the interface
+ * used by multigrid methods.
+ *
+ * @author Guido Kanschat, 2002
+ */
+template <typename VectorType>
+class MGSmootherBase : public Subscriptor
+{
+public:
+  /**
+   * Virtual destructor.
+   */
+  virtual ~MGSmootherBase();
+  /**
+   * Release matrices.
+   */
+  virtual void clear() = 0;
+
+  /**
+   * Smoothing function. This is the function used in multigrid methods.
+   */
+  virtual void smooth (const unsigned int level,
+                       VectorType         &u,
+                       const VectorType   &rhs) const = 0;
+};
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_block_smoother.h b/include/deal.II/multigrid/mg_block_smoother.h
new file mode 100644
index 0000000..1408e90
--- /dev/null
+++ b/include/deal.II/multigrid/mg_block_smoother.h
@@ -0,0 +1,313 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_block_smoother_h
+#define dealii__mg_block_smoother_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/pointer_matrix.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/base/mg_level_object.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*
+ * MGSmootherBase is defined in mg_base.h
+ */
+
+/*!@addtogroup mg */
+/*@{*/
+
+/**
+ * General smoother class for block vectors. This class gives complete freedom
+ * to the choice of a block smoother by being initialized with a matrix and a
+ * smoother object. Therefore, the smoother object for each level must be
+ * constructed by hand.
+ *
+ * @author Guido Kanschat, 2005
+ */
+template <typename MatrixType, class RelaxationType, typename number>
+class MGSmootherBlock
+  : public MGSmootherBase<BlockVector<number> >
+{
+public:
+  /**
+   * Constructor. Sets memory and smoothing parameters.
+   */
+  MGSmootherBlock (VectorMemory<BlockVector<number> > &mem,
+                   const unsigned int                   steps     = 1,
+                   const bool                           variable  = false,
+                   const bool                           symmetric = false,
+                   const bool                           transpose = false,
+                   const bool                           reverse   = false);
+
+  /**
+   * Initialize for matrices. The parameter <tt>matrices</tt> can be any
+   * object having functions <tt>get_minlevel()</tt> and
+   * <tt>get_maxlevel()</tt> as well as an <tt>operator[]</tt> returning a
+   * reference to @p MatrixType.
+   *
+   * The same convention is used for the parameter <tt>smoothers</tt>, such
+   * that <tt>operator[]</tt> returns the object doing the block-smoothing on
+   * a single level.
+   *
+   * This function stores pointers to the level matrices and smoothing
+   * operator for each level.
+   */
+  template <class MGMatrixType, class MGRelaxationType>
+  void initialize (const MGMatrixType     &matrices,
+                   const MGRelaxationType &smoothers);
+
+  /**
+   * Empty all vectors.
+   */
+  void clear ();
+
+  /**
+   * Modify the number of smoothing steps on finest level.
+   */
+  void set_steps (const unsigned int);
+
+  /**
+   * Switch on/off variable smoothing.
+   */
+  void set_variable (const bool);
+
+  /**
+   * Switch on/off symmetric smoothing.
+   */
+  void set_symmetric (const bool);
+
+  /**
+   * Switch on/off transposed. This is mutually exclusive with reverse().
+   */
+  void set_transpose (const bool);
+
+  /**
+   * Switch on/off reversed. This is mutually exclusive with transpose().
+   */
+  void set_reverse (const bool);
+
+  /**
+   * Implementation of the interface for @p Multigrid. This function does
+   * nothing, which by comparison with the definition of this function means
+   * that the the smoothing operator equals the null operator.
+   */
+  virtual void smooth (const unsigned int         level,
+                       BlockVector<number>       &u,
+                       const BlockVector<number> &rhs) const;
+private:
+  /**
+   * Pointer to the matrices.
+   */
+  MGLevelObject<PointerMatrix<MatrixType, BlockVector<number> > > matrices;
+
+  /**
+   * Pointer to the matrices.
+   */
+  MGLevelObject<PointerMatrix<RelaxationType, BlockVector<number> > > smoothers;
+
+  /**
+   * Number of smoothing steps.
+   */
+  unsigned int steps;
+
+  /**
+   * Variable smoothing?
+   */
+  bool variable;
+
+  /**
+   * Symmetric smoothing?
+   */
+  bool symmetric;
+
+  /*
+   * Transposed?
+   */
+  bool transpose;
+
+  /**
+   * Reverse?
+   */
+  bool reverse;
+
+  /**
+   * Memory for auxiliary vectors.
+   */
+  VectorMemory<BlockVector<number> > &mem;
+
+};
+
+/**@}*/
+
+//---------------------------------------------------------------------------
+
+#ifndef DOXYGEN
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline
+MGSmootherBlock<MatrixType, RelaxationType, number>::MGSmootherBlock
+(VectorMemory<BlockVector<number> > &mem,
+ const unsigned int                  steps,
+ const bool                          variable,
+ const bool                          symmetric,
+ const bool                          transpose,
+ const bool                          reverse)
+  :
+  steps(steps),
+  variable(variable),
+  symmetric(symmetric),
+  transpose(transpose),
+  reverse(reverse),
+  mem(mem)
+{}
+
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::clear ()
+{
+  unsigned int i=matrices.min_level(),
+               max_level=matrices.max_level();
+  for (; i<=max_level; ++i)
+    {
+      smoothers[i] = 0;
+      matrices[i] = 0;
+    }
+}
+
+
+template <typename MatrixType, class RelaxationType, typename number>
+template <class MGMatrixType, class MGRelaxationType>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::initialize (const MGMatrixType &m,
+    const MGRelaxationType &s)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &m[i];
+      smoothers[i] = &s[i];
+    }
+}
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::
+set_steps (const unsigned int s)
+{
+  steps = s;
+}
+
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::
+set_variable (const bool flag)
+{
+  variable = flag;
+}
+
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::
+set_symmetric (const bool flag)
+{
+  symmetric = flag;
+}
+
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::
+set_transpose (const bool flag)
+{
+  transpose = flag;
+}
+
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::
+set_reverse (const bool flag)
+{
+  reverse = flag;
+}
+
+
+template <typename MatrixType, class RelaxationType, typename number>
+inline void
+MGSmootherBlock<MatrixType, RelaxationType, number>::smooth(const unsigned int         level,
+                                                            BlockVector<number>       &u,
+                                                            const BlockVector<number> &rhs) const
+{
+  deallog.push("Smooth");
+
+  unsigned int maxlevel = matrices.max_level();
+  unsigned int steps2 = steps;
+
+  if (variable)
+    steps2 *= (1<<(maxlevel-level));
+
+  BlockVector<number> *r = mem.alloc();
+  BlockVector<number> *d = mem.alloc();
+  r->reinit(u);
+  d->reinit(u);
+
+  bool T = transpose;
+  if (symmetric && (steps2 % 2 == 0))
+    T = false;
+
+  for (unsigned int i=0; i<steps2; ++i)
+    {
+      if (T)
+        {
+          matrices[level].vmult(*r,u);
+          r->sadd(-1.,1.,rhs);
+          smoothers[level].Tvmult(*d, *r);
+        }
+      else
+        {
+          matrices[level].vmult(*r,u);
+          r->sadd(-1.,1.,rhs);
+          smoothers[level].vmult(*d, *r);
+        }
+      u += *d;
+      if (symmetric)
+        T = !T;
+    }
+
+  mem.free(r);
+  mem.free(d);
+  deallog.pop();
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_coarse.h b/include/deal.II/multigrid/mg_coarse.h
new file mode 100644
index 0000000..db6b57a
--- /dev/null
+++ b/include/deal.II/multigrid/mg_coarse.h
@@ -0,0 +1,357 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_coarse_h
+#define dealii__mg_coarse_h
+
+
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/matrix_lib.h>
+#include <deal.II/lac/householder.h>
+#include <deal.II/multigrid/mg_base.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup mg */
+/*@{*/
+
+/**
+ * Coarse grid solver using LAC iterative methods. This is a little wrapper,
+ * transforming a triplet of iterative solver, matrix and preconditioner into
+ * a coarse grid solver.
+ *
+ * The type of the matrix (i.e. the template parameter @p MatrixType) should
+ * be derived from @p Subscriptor to allow for the use of a smart pointer to
+ * it.
+ *
+ * @author Guido Kanschat, 1999, Ralf Hartmann, 2002.
+ */
+template<typename SolverType, class VectorType = Vector<double> >
+class MGCoarseGridLACIteration :  public MGCoarseGridBase<VectorType>
+{
+public:
+  /**
+   * Default constructor.
+   */
+  MGCoarseGridLACIteration ();
+
+  /**
+   * Constructor. Store solver, matrix and preconditioning method for later
+   * use.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  MGCoarseGridLACIteration (SolverType &,
+                            const MatrixType &,
+                            const PreconditionerType &);
+
+  /**
+   * Destructor freeing the pointers.
+   */
+  ~MGCoarseGridLACIteration ();
+
+  /**
+   * Initialize new data.
+   */
+  template<typename MatrixType, typename PreconditionerType>
+  void initialize (SolverType &,
+                   const MatrixType &,
+                   const PreconditionerType &);
+
+  /**
+   * Clear all pointers.
+   */
+  void clear ();
+
+  /**
+   * Implementation of the abstract function. Calls the solver method with
+   * matrix, vectors and preconditioner.
+   */
+  void operator() (const unsigned int level,
+                   VectorType         &dst,
+                   const VectorType   &src) const;
+
+  /**
+   * Sets the matrix. This gives the possibility to replace the matrix that
+   * was given to the constructor by a new matrix.
+   */
+  template <typename MatrixType>
+  void set_matrix (const MatrixType &);
+
+private:
+  /**
+   * Reference to the solver.
+   */
+  SmartPointer<SolverType,MGCoarseGridLACIteration<SolverType,VectorType> > solver;
+
+  /**
+   * Reference to the matrix.
+   */
+  PointerMatrixBase<VectorType> *matrix;
+
+  /**
+   * Reference to the preconditioner.
+   */
+  PointerMatrixBase<VectorType> *precondition;
+};
+
+
+
+/**
+ * Coarse grid solver by QR factorization implemented in the class
+ * Householder.
+ *
+ * Upon initialization, the QR decomposition of the matrix is computed. then,
+ * the operator() uses Householder::least_squares() to compute the action of
+ * the inverse.
+ *
+ * @author Guido Kanschat, 2003, 2012
+ */
+template<typename number = double, class VectorType = Vector<number> >
+class MGCoarseGridHouseholder : public MGCoarseGridBase<VectorType>
+{
+public:
+  /**
+   * Constructor, taking the coarse grid matrix.
+   */
+  MGCoarseGridHouseholder (const FullMatrix<number> *A = 0);
+
+  /**
+   * Initialize for a new matrix.
+   */
+  void initialize (const FullMatrix<number> &A);
+
+  void operator() (const unsigned int level,
+                   VectorType         &dst,
+                   const VectorType   &src) const;
+
+private:
+  /**
+   * Matrix for QR-factorization.
+   */
+  Householder<number> householder;
+};
+
+/**
+ * Coarse grid solver using singular value decomposition of LAPACK matrices.
+ *
+ * Upon initialization, the singular value decomposition of the matrix is
+ * computed. then, the operator() uses
+ *
+ * @author Guido Kanschat, 2003, 2012
+ */
+template<typename number = double, class VectorType = Vector<number> >
+class MGCoarseGridSVD : public MGCoarseGridBase<VectorType>
+{
+public:
+  /**
+   * Constructor leaving an uninitialized object.
+   */
+  MGCoarseGridSVD ();
+
+  /**
+   * Initialize for a new matrix. This resets the dimensions to the
+   */
+  void initialize (const FullMatrix<number> &A, const double threshold = 0);
+
+  void operator() (const unsigned int level,
+                   VectorType         &dst,
+                   const VectorType   &src) const;
+
+  /**
+   * Write the singular values to @p deallog.
+   */
+  void log () const;
+
+private:
+
+  /**
+   * Matrix for singular value decomposition.
+   */
+  LAPACKFullMatrix<number> matrix;
+};
+
+/*@}*/
+
+#ifndef DOXYGEN
+/* ------------------ Functions for MGCoarseGridLACIteration ------------ */
+
+
+template<typename SolverType, class VectorType>
+MGCoarseGridLACIteration<SolverType, VectorType>
+::MGCoarseGridLACIteration()
+  :
+  solver(0, typeid(*this).name()),
+  matrix(0),
+  precondition(0)
+{}
+
+
+template<typename SolverType, class VectorType>
+template<typename MatrixType, typename PreconditionerType>
+MGCoarseGridLACIteration<SolverType, VectorType>
+::MGCoarseGridLACIteration (SolverType               &s,
+                            const MatrixType         &m,
+                            const PreconditionerType &p)
+  :
+  solver(&s, typeid(*this).name())
+{
+  matrix = new PointerMatrix<MatrixType, VectorType>(&m);
+  precondition = new PointerMatrix<PreconditionerType, VectorType>(&p);
+}
+
+
+template<typename SolverType, class VectorType>
+MGCoarseGridLACIteration<SolverType, VectorType>
+::~MGCoarseGridLACIteration()
+{
+  clear();
+}
+
+
+template<typename SolverType, class VectorType>
+template<typename MatrixType, typename PreconditionerType>
+void
+MGCoarseGridLACIteration<SolverType, VectorType>
+::initialize (SolverType               &s,
+              const MatrixType         &m,
+              const PreconditionerType &p)
+{
+  solver = &s;
+  if (matrix)
+    delete matrix;
+  matrix = new PointerMatrix<MatrixType, VectorType>(&m);
+  if (precondition)
+    delete precondition;
+  precondition = new PointerMatrix<PreconditionerType, VectorType>(&p);
+}
+
+
+template<typename SolverType, class VectorType>
+void
+MGCoarseGridLACIteration<SolverType, VectorType>
+::clear()
+{
+  solver = 0;
+  if (matrix)
+    delete matrix;
+  matrix = 0;
+  if (precondition)
+    delete precondition;
+  precondition = 0;
+}
+
+
+template<typename SolverType, class VectorType>
+void
+MGCoarseGridLACIteration<SolverType, VectorType>
+::operator() (const unsigned int /* level */,
+              VectorType         &dst,
+              const VectorType   &src) const
+{
+  Assert(solver!=0, ExcNotInitialized());
+  Assert(matrix!=0, ExcNotInitialized());
+  Assert(precondition!=0, ExcNotInitialized());
+  solver->solve(*matrix, dst, src, *precondition);
+}
+
+
+template<typename SolverType, class VectorType>
+template<typename MatrixType>
+void
+MGCoarseGridLACIteration<SolverType, VectorType>
+::set_matrix(const MatrixType &m)
+{
+  if (matrix)
+    delete matrix;
+  matrix = new PointerMatrix<MatrixType, VectorType>(&m);
+}
+
+//---------------------------------------------------------------------------
+
+template<typename number, class VectorType>
+MGCoarseGridHouseholder<number, VectorType>::MGCoarseGridHouseholder
+(const FullMatrix<number> *A)
+{
+  if (A != 0) householder.initialize(*A);
+}
+
+
+
+template<typename number, class VectorType>
+void
+MGCoarseGridHouseholder<number, VectorType>::initialize(const FullMatrix<number> &A)
+{
+  householder.initialize(A);
+}
+
+
+
+template<typename number, class VectorType>
+void
+MGCoarseGridHouseholder<number, VectorType>::operator() (const unsigned int /*level*/,
+                                                         VectorType         &dst,
+                                                         const VectorType   &src) const
+{
+  householder.least_squares(dst, src);
+}
+
+//---------------------------------------------------------------------------
+
+template<typename number, class VectorType>
+inline
+MGCoarseGridSVD<number, VectorType>::MGCoarseGridSVD()
+{}
+
+
+
+template<typename number, class VectorType>
+void
+MGCoarseGridSVD<number, VectorType>::initialize (const FullMatrix<number> &A,
+                                                 double                    threshold)
+{
+  matrix.reinit(A.n_rows(), A.n_cols());
+  matrix = A;
+  matrix.compute_inverse_svd(threshold);
+}
+
+
+template<typename number, class VectorType>
+void
+MGCoarseGridSVD<number, VectorType>::operator() (
+  const unsigned int /*level*/,
+  VectorType         &dst,
+  const VectorType   &src) const
+{
+  matrix.vmult(dst, src);
+}
+
+
+template<typename number, class VectorType>
+void
+MGCoarseGridSVD<number, VectorType>::log() const
+{
+  const unsigned int n = std::min(matrix.n_rows(), matrix.n_cols());
+
+  for (unsigned int i=0; i<n; ++i)
+    deallog << ' ' << matrix.singular_value(i);
+  deallog << std::endl;
+}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_constrained_dofs.h b/include/deal.II/multigrid/mg_constrained_dofs.h
new file mode 100644
index 0000000..6a839a7
--- /dev/null
+++ b/include/deal.II/multigrid/mg_constrained_dofs.h
@@ -0,0 +1,228 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_constrained_dofs_h
+#define dealii__mg_constrained_dofs_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+
+#include <deal.II/multigrid/mg_tools.h>
+
+#include <vector>
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class DoFHandler;
+template <int dim, typename Number> struct FunctionMap;
+
+
+/**
+ * Collection of boundary constraints and refinement edge constraints for
+ * level vectors.
+ *
+ * @ingroup mg
+ */
+class MGConstrainedDoFs : public Subscriptor
+{
+public:
+
+  typedef std::vector<std::set<types::global_dof_index> >::size_type size_dof;
+  /**
+   * Fill the internal data structures with hanging node constraints extracted
+   * from the dof handler object. Works with natural boundary conditions only.
+   * There exists a sister function setting up boundary constraints as well.
+   *
+   * This function ensures that on every level, degrees of freedom at interior
+   * edges of a refinement level are treated corrected but leaves degrees of
+   * freedom at the boundary of the domain untouched assuming that no
+   * Dirichlet boundary conditions for them exist.
+   */
+  template <int dim, int spacedim>
+  void initialize (const DoFHandler<dim,spacedim> &dof);
+
+  /**
+   * Fill the internal data structures with values extracted from the dof
+   * handler object and apply the boundary values provided.
+   *
+   * This function internally calls the initialize() function above and the
+   * constrains degrees on the external boundary of the domain by calling
+   * MGTools::make_boundary_list() with the given second and third argument.
+   */
+  template <int dim, int spacedim>
+  void initialize(const DoFHandler<dim,spacedim> &dof,
+                  const typename FunctionMap<dim>::type &function_map,
+                  const ComponentMask &component_mask = ComponentMask());
+
+  /**
+   * Reset the data structures.
+   */
+  void clear();
+
+  /**
+   * Determine whether a dof index is subject to a boundary constraint.
+   */
+  bool is_boundary_index (const unsigned int level,
+                          const types::global_dof_index index) const;
+
+  /**
+   * Determine whether a dof index is at the refinement edge.
+   */
+  bool at_refinement_edge (const unsigned int level,
+                           const types::global_dof_index index) const;
+
+  /**
+   * Return the indices of level dofs on the given level that are subject to
+   * Dirichlet boundary conditions (as set by the @p function_map parameter in
+   * initialize()).  The indices are restricted to the set of locally relevant
+   * level dofs.
+   */
+  const IndexSet &
+  get_boundary_indices (const unsigned int level) const;
+
+
+  /**
+   * Return the indices of dofs on the given level that lie on an refinement
+   * edge (dofs on faces to neighbors that are coarser).
+   */
+  const IndexSet &
+  get_refinement_edge_indices (unsigned int level) const;
+
+
+  /**
+   * Return if Dirichlet boundary indices are set in initialize().
+   */
+  bool have_boundary_indices () const;
+
+private:
+
+  /**
+   * The indices of boundary dofs for each level.
+   */
+  std::vector<IndexSet> boundary_indices;
+
+  /**
+   * The degrees of freedom on a given level that live on the refinement edge
+   * between the level and cells on a coarser level.
+   */
+  std::vector<IndexSet> refinement_edge_indices;
+};
+
+
+template <int dim, int spacedim>
+inline
+void
+MGConstrainedDoFs::initialize(const DoFHandler<dim,spacedim> &dof)
+{
+  boundary_indices.clear();
+
+  const unsigned int nlevels = dof.get_triangulation().n_global_levels();
+
+  refinement_edge_indices.resize(nlevels);
+  for (unsigned int l=0; l<nlevels; ++l)
+    refinement_edge_indices[l] = IndexSet(dof.n_dofs(l));
+
+  MGTools::extract_inner_interface_dofs (dof, refinement_edge_indices);
+}
+
+
+template <int dim, int spacedim>
+inline
+void
+MGConstrainedDoFs::initialize(const DoFHandler<dim,spacedim> &dof,
+                              const typename FunctionMap<dim>::type &function_map,
+                              const ComponentMask &component_mask)
+{
+  initialize (dof);
+
+  // allocate an IndexSet for each global level. Contents will be
+  // overwritten inside make_boundary_list.
+  const unsigned int n_levels = dof.get_triangulation().n_global_levels();
+  boundary_indices.resize(n_levels);
+
+  MGTools::make_boundary_list (dof,
+                               function_map,
+                               boundary_indices,
+                               component_mask);
+}
+
+
+inline
+void
+MGConstrainedDoFs::clear()
+{
+  boundary_indices.clear();
+  refinement_edge_indices.clear();
+}
+
+
+inline
+bool
+MGConstrainedDoFs::is_boundary_index (const unsigned int level,
+                                      const types::global_dof_index index) const
+{
+  if (boundary_indices.size() == 0)
+    return false;
+
+  AssertIndexRange(level, boundary_indices.size());
+  return boundary_indices[level].is_element(index);
+}
+
+inline
+bool
+MGConstrainedDoFs::at_refinement_edge (const unsigned int level,
+                                       const types::global_dof_index index) const
+{
+  AssertIndexRange(level, refinement_edge_indices.size());
+
+  return refinement_edge_indices[level].is_element(index);
+}
+
+
+
+
+inline
+const IndexSet &
+MGConstrainedDoFs::get_boundary_indices (const unsigned int level) const
+{
+  AssertIndexRange(level, boundary_indices.size());
+  return boundary_indices[level];
+}
+
+
+
+inline
+const IndexSet &
+MGConstrainedDoFs::get_refinement_edge_indices (unsigned int level) const
+{
+  AssertIndexRange(level, refinement_edge_indices.size());
+  return refinement_edge_indices[level];
+}
+
+
+
+
+inline
+bool
+MGConstrainedDoFs::have_boundary_indices () const
+{
+  return boundary_indices.size()!=0;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_dof_accessor.h b/include/deal.II/multigrid/mg_dof_accessor.h
new file mode 100644
index 0000000..97e7552
--- /dev/null
+++ b/include/deal.II/multigrid/mg_dof_accessor.h
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_dof_accessor_h
+#define dealii__mg_dof_accessor_h
+
+#warning This file is deprecated. Use <deal.II/dofs/dof_accessor.h> instead.
+#include <deal.II/dofs/dof_accessor.h>
+
+#endif
diff --git a/include/deal.II/multigrid/mg_matrix.h b/include/deal.II/multigrid/mg_matrix.h
new file mode 100644
index 0000000..66d1e8a
--- /dev/null
+++ b/include/deal.II/multigrid/mg_matrix.h
@@ -0,0 +1,349 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_matrix_h
+#define dealii__mg_matrix_h
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/pointer_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup mg */
+/*@{*/
+
+namespace mg
+{
+  /**
+   * Multilevel matrix. This matrix stores an MGLevelObject of
+   * PointerMatrixBase objects. It implements the interface defined in
+   * MGMatrixBase, so that it can be used as a matrix in Multigrid.
+   *
+   * @author Guido Kanschat
+   * @date 2002, 2010
+   */
+  template <typename VectorType = Vector<double> >
+  class Matrix
+    : public MGMatrixBase<VectorType>
+  {
+  public:
+    /**
+     * Default constructor for an empty object.
+     */
+    Matrix();
+
+    /**
+     * Constructor setting up pointers to the matrices in <tt>M</tt> by
+     * calling initialize().
+     */
+    template <typename MatrixType>
+    Matrix(const MGLevelObject<MatrixType> &M);
+
+    /**
+     * Initialize the object such that the level multiplication uses the
+     * matrices in <tt>M</tt>
+     */
+    template <typename MatrixType>
+    void
+    initialize(const MGLevelObject<MatrixType> &M);
+
+    /**
+     * Access matrix on a level.
+     */
+    const PointerMatrixBase<VectorType> &operator[] (unsigned int level) const;
+
+    virtual void vmult (const unsigned int level, VectorType &dst, const VectorType &src) const;
+    virtual void vmult_add (const unsigned int level, VectorType &dst, const VectorType &src) const;
+    virtual void Tvmult (const unsigned int level, VectorType &dst, const VectorType &src) const;
+    virtual void Tvmult_add (const unsigned int level, VectorType &dst, const VectorType &src) const;
+
+    /**
+     * Memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+  private:
+    MGLevelObject<std_cxx11::shared_ptr<PointerMatrixBase<VectorType> > > matrices;
+  };
+
+}
+
+
+/**
+ * Multilevel matrix selecting from block matrices. This class implements the
+ * interface defined by MGMatrixBase.  The template parameter @p MatrixType
+ * should be a block matrix class like BlockSparseMatrix or @p
+ * BlockSparseMatrixEZ. Then, this class stores a pointer to a MGLevelObject
+ * of this matrix class. In each @p vmult, the block selected on
+ * initialization will be multiplied with the vector provided.
+ *
+ * @author Guido Kanschat, 2002
+ */
+template <typename MatrixType, typename number>
+class MGMatrixSelect : public MGMatrixBase<Vector<number> >
+{
+public:
+  /**
+   * Constructor. @p row and @p col are the coordinate of the selected block.
+   * The other argument is handed over to the @p SmartPointer constructor.
+   */
+  MGMatrixSelect (const unsigned int         row    = 0,
+                  const unsigned int         col    = 0,
+                  MGLevelObject<MatrixType> *matrix = 0);
+
+  /**
+   * Set the matrix object to be used. The matrix object must exist longer as
+   * the @p MGMatrix object, since only a pointer is stored.
+   */
+  void set_matrix (MGLevelObject<MatrixType> *M);
+
+  /**
+   * Select the block for multiplication.
+   */
+  void select_block (const unsigned int row,
+                     const unsigned int col);
+
+  /**
+   * Matrix-vector-multiplication on a certain level.
+   */
+  virtual void vmult (const unsigned int level,
+                      Vector<number> &dst,
+                      const Vector<number> &src) const;
+
+  /**
+   * Adding matrix-vector-multiplication on a certain level.
+   */
+  virtual void vmult_add (const unsigned int level,
+                          Vector<number> &dst,
+                          const Vector<number> &src) const;
+
+  /**
+   * Transpose matrix-vector-multiplication on a certain level.
+   */
+  virtual void Tvmult (const unsigned int level,
+                       Vector<number> &dst,
+                       const Vector<number> &src) const;
+
+  /**
+   * Adding transpose matrix-vector-multiplication on a certain level.
+   */
+  virtual void Tvmult_add (const unsigned int level,
+                           Vector<number> &dst,
+                           const Vector<number> &src) const;
+
+private:
+  /**
+   * Pointer to the matrix objects on each level.
+   */
+  SmartPointer<MGLevelObject<MatrixType>,MGMatrixSelect<MatrixType,number> > matrix;
+  /**
+   * Row coordinate of selected block.
+   */
+  unsigned int row;
+  /**
+   * Column coordinate of selected block.
+   */
+  unsigned int col;
+
+};
+
+/*@}*/
+
+/*----------------------------------------------------------------------*/
+
+namespace mg
+{
+  template <typename VectorType>
+  template <typename MatrixType>
+  inline
+  void
+  Matrix<VectorType>::initialize (const MGLevelObject<MatrixType> &p)
+  {
+    matrices.resize(p.min_level(), p.max_level());
+    for (unsigned int level=p.min_level(); level <= p.max_level(); ++level)
+      matrices[level] = std_cxx11::shared_ptr<PointerMatrixBase<VectorType> >
+                        (new_pointer_matrix_base(p[level], VectorType()));
+  }
+
+
+  template <typename VectorType>
+  template <typename MatrixType>
+  inline
+  Matrix<VectorType>::Matrix (const MGLevelObject<MatrixType> &p)
+  {
+    initialize(p);
+  }
+
+
+  template <typename VectorType>
+  inline
+  Matrix<VectorType>::Matrix ()
+  {}
+
+
+  template <typename VectorType>
+  inline
+  const PointerMatrixBase<VectorType> &
+  Matrix<VectorType>::operator[] (unsigned int level) const
+  {
+    return *matrices[level];
+  }
+
+
+  template <typename VectorType>
+  void
+  Matrix<VectorType>::vmult (const unsigned int level,
+                             VectorType         &dst,
+                             const VectorType   &src) const
+  {
+    matrices[level]->vmult(dst, src);
+  }
+
+
+  template <typename VectorType>
+  void
+  Matrix<VectorType>::vmult_add (const unsigned int level,
+                                 VectorType         &dst,
+                                 const VectorType   &src) const
+  {
+    matrices[level]->vmult_add(dst, src);
+  }
+
+
+  template <typename VectorType>
+  void
+  Matrix<VectorType>::Tvmult (const unsigned int level,
+                              VectorType         &dst,
+                              const VectorType   &src) const
+  {
+    matrices[level]->Tvmult(dst, src);
+  }
+
+
+  template <typename VectorType>
+  void
+  Matrix<VectorType>::Tvmult_add (const unsigned int level,
+                                  VectorType         &dst,
+                                  const VectorType   &src) const
+  {
+    matrices[level]->Tvmult_add(dst, src);
+  }
+
+
+  template <typename VectorType>
+  inline
+  std::size_t
+  Matrix<VectorType>::memory_consumption () const
+  {
+    return sizeof(*this) + matrices->memory_consumption();
+  }
+}
+
+
+/*----------------------------------------------------------------------*/
+
+template <typename MatrixType, typename number>
+MGMatrixSelect<MatrixType, number>::
+MGMatrixSelect (const unsigned int         row,
+                const unsigned int         col,
+                MGLevelObject<MatrixType> *p)
+  :
+  matrix (p, typeid(*this).name()),
+  row(row),
+  col(col)
+{}
+
+
+
+template <typename MatrixType, typename number>
+void
+MGMatrixSelect<MatrixType, number>::set_matrix (MGLevelObject<MatrixType> *p)
+{
+  matrix = p;
+}
+
+
+template <typename MatrixType, typename number>
+void
+MGMatrixSelect<MatrixType, number>::
+select_block (const unsigned int brow,
+              const unsigned int bcol)
+{
+  row = brow;
+  col = bcol;
+}
+
+
+template <typename MatrixType, typename number>
+void
+MGMatrixSelect<MatrixType, number>::
+vmult  (const unsigned int    level,
+        Vector<number>       &dst,
+        const Vector<number> &src) const
+{
+  Assert(matrix != 0, ExcNotInitialized());
+
+  const MGLevelObject<MatrixType> &m = *matrix;
+  m[level].block(row, col).vmult(dst, src);
+}
+
+
+template <typename MatrixType, typename number>
+void
+MGMatrixSelect<MatrixType, number>::
+vmult_add  (const unsigned int    level,
+            Vector<number>       &dst,
+            const Vector<number> &src) const
+{
+  Assert(matrix != 0, ExcNotInitialized());
+
+  const MGLevelObject<MatrixType> &m = *matrix;
+  m[level].block(row, col).vmult_add(dst, src);
+}
+
+
+template <typename MatrixType, typename number>
+void
+MGMatrixSelect<MatrixType, number>::
+Tvmult  (const unsigned int    level,
+         Vector<number>       &dst,
+         const Vector<number> &src) const
+{
+  Assert(matrix != 0, ExcNotInitialized());
+
+  const MGLevelObject<MatrixType> &m = *matrix;
+  m[level].block(row, col).Tvmult(dst, src);
+}
+
+
+template <typename MatrixType, typename number>
+void
+MGMatrixSelect<MatrixType, number>::
+Tvmult_add  (const unsigned int    level,
+             Vector<number>       &dst,
+             const Vector<number> &src) const
+{
+  Assert(matrix != 0, ExcNotInitialized());
+
+  const MGLevelObject<MatrixType> &m = *matrix;
+  m[level].block(row, col).Tvmult_add(dst, src);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_smoother.h b/include/deal.II/multigrid/mg_smoother.h
new file mode 100644
index 0000000..1c007aa
--- /dev/null
+++ b/include/deal.II/multigrid/mg_smoother.h
@@ -0,0 +1,1057 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_smoother_h
+#define dealii__mg_smoother_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/pointer_matrix.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/base/mg_level_object.h>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*
+ * MGSmootherBase is defined in mg_base.h
+ */
+
+/*!@addtogroup mg */
+/*@{*/
+
+/**
+ * A base class for smoother handling information on smoothing. While not
+ * adding to the abstract interface in MGSmootherBase, this class stores
+ * information on the number and type of smoothing steps, which in turn can be
+ * used by a derived class.
+ *
+ * @author Guido Kanschat 2009
+ */
+template <typename VectorType>
+class MGSmoother : public MGSmootherBase<VectorType>
+{
+public:
+  /**
+   * Constructor.
+   */
+  MGSmoother(const unsigned int steps = 1,
+             const bool variable = false,
+             const bool symmetric = false,
+             const bool transpose = false);
+
+  /**
+   * Modify the number of smoothing steps on finest level.
+   */
+  void set_steps (const unsigned int);
+
+  /**
+   * Switch on/off variable smoothing.
+   */
+  void set_variable (const bool);
+
+  /**
+   * Switch on/off symmetric smoothing.
+   */
+  void set_symmetric (const bool);
+
+  /**
+   * Switch on/off transposed smoothing. The effect is overridden by
+   * set_symmetric().
+   */
+  void set_transpose (const bool);
+
+  /**
+   * Set @p debug to a nonzero value to get debug information logged to @p
+   * deallog. Increase to get more information
+   */
+  void set_debug (const unsigned int level);
+
+protected:
+  /**
+   * A memory object to be used for temporary vectors.
+   *
+   * The object is marked as mutable since we will need to use it to allocate
+   * temporary vectors also in functions that are const.
+   */
+  mutable GrowingVectorMemory<VectorType> vector_memory;
+
+  /**
+   * Number of smoothing steps on the finest level. If no #variable smoothing
+   * is chosen, this is the number of steps on all levels.
+   */
+  unsigned int steps;
+
+  /**
+   * Variable smoothing: double the number of smoothing steps whenever going
+   * to the next coarser level
+   */
+  bool variable;
+
+  /**
+   * Symmetric smoothing: in the smoothing iteration, alternate between the
+   * relaxation method and its transpose.
+   */
+  bool symmetric;
+
+  /**
+   * Use the transpose of the relaxation method instead of the method itself.
+   * This has no effect if #symmetric smoothing is chosen.
+   */
+  bool transpose;
+
+  /**
+   * Output debugging information to @p deallog if this is nonzero.
+   */
+  unsigned int debug;
+};
+
+
+/**
+ * Smoother doing nothing. This class is not useful for many applications
+ * other than for testing some multigrid procedures. Also some applications
+ * might get convergence without smoothing and then this class brings you the
+ * cheapest possible multigrid.
+ *
+ * @author Guido Kanschat, 1999, 2002
+ */
+template <typename VectorType>
+class MGSmootherIdentity : public MGSmootherBase<VectorType>
+{
+public:
+  /**
+   * Implementation of the interface for @p Multigrid. This function does
+   * nothing, which by comparison with the definition of this function means
+   * that the the smoothing operator equals the null operator.
+   */
+  virtual void smooth (const unsigned int level,
+                       VectorType         &u,
+                       const VectorType   &rhs) const;
+  virtual void clear ();
+};
+
+
+namespace mg
+{
+  /**
+   * Smoother using relaxation classes.
+   *
+   * A relaxation class is an object that satisfies the
+   * @ref ConceptRelaxationType "relaxation concept".
+   *
+   * This class performs smoothing on each level. The operation can be
+   * controlled by several parameters. First, the relaxation parameter @p
+   * omega is used in the underlying relaxation method. @p steps is the number
+   * of relaxation steps on the finest level (on all levels if @p variable is
+   * off). If @p variable is @p true, the number of smoothing steps is doubled
+   * on each coarser level. This results in a method having the complexity of
+   * the W-cycle, but saving grid transfers. This is the method proposed by
+   * Bramble at al.
+   *
+   * The option @p symmetric switches on alternating between the smoother and
+   * its transpose in each step as proposed by Bramble.
+   *
+   * @p transpose uses the transposed smoothing operation using <tt>Tstep</tt>
+   * instead of the regular <tt>step</tt> of the relaxation scheme.
+   *
+   * If you are using block matrices, the second @p initialize function offers
+   * the possibility to extract a single block for smoothing. In this case,
+   * the multigrid method must be used only with the vector associated to that
+   * single block.
+   *
+   * @author Guido Kanschat,
+   * @date 2003, 2009, 2010
+   */
+  template<class RelaxationType, typename VectorType>
+  class SmootherRelaxation : public MGLevelObject<RelaxationType>, public MGSmoother<VectorType>
+  {
+  public:
+    /**
+     * Constructor. Sets smoothing parameters.
+     */
+    SmootherRelaxation(const unsigned int steps = 1,
+                       const bool variable = false,
+                       const bool symmetric = false,
+                       const bool transpose = false);
+
+    /**
+     * Initialize for matrices. This function initializes the smoothing
+     * operator with the same smoother for each level.
+     *
+     * @p additional_data is an object of type @p
+     * RelaxationType::AdditionalData and is handed to the initialization
+     * function of the relaxation method.
+     */
+    template <typename MatrixType2>
+    void initialize (const MGLevelObject<MatrixType2>     &matrices,
+                     const typename RelaxationType::AdditionalData &additional_data
+                     = typename RelaxationType::AdditionalData());
+
+    /**
+     * Initialize matrices and additional data for each level.
+     *
+     * If minimal or maximal level of the two objects differ, the greatest
+     * common range is utilized. This way, smoothing can be restricted to
+     * certain levels even if the matrix was generated for all levels.
+     */
+    template <typename MatrixType2, class DATA>
+    void initialize (const MGLevelObject<MatrixType2> &matrices,
+                     const MGLevelObject<DATA>        &additional_data);
+
+    /**
+     * Empty all vectors.
+     */
+    void clear ();
+
+    /**
+     * The actual smoothing method.
+     */
+    virtual void smooth (const unsigned int level,
+                         VectorType         &u,
+                         const VectorType   &rhs) const;
+
+    /**
+     * Memory used by this object.
+     */
+    std::size_t memory_consumption () const;
+  };
+}
+
+/**
+ * Smoother using a solver that satisfies the
+ * @ref ConceptRelaxationType "relaxation concept".
+ *
+ * This class performs smoothing on each level. The operation can be
+ * controlled by several parameters. First, the relaxation parameter @p omega
+ * is used in the underlying relaxation method. @p steps is the number of
+ * relaxation steps on the finest level (on all levels if @p variable is off).
+ * If @p variable is @p true, the number of smoothing steps is doubled on each
+ * coarser level. This results in a method having the complexity of the
+ * W-cycle, but saving grid transfers. This is the method proposed by Bramble
+ * at al.
+ *
+ * The option @p symmetric switches on alternating between the smoother and
+ * its transpose in each step as proposed by Bramble.
+ *
+ * @p transpose uses the transposed smoothing operation using <tt>Tstep</tt>
+ * instead of the regular <tt>step</tt> of the relaxation scheme.
+ *
+ * If you are using block matrices, the second @p initialize function offers
+ * the possibility to extract a single block for smoothing. In this case, the
+ * multigrid method must be used only with the vector associated to that
+ * single block.
+ *
+ * The library contains instantiation for <tt>SparseMatrix<.></tt> and
+ * <tt>Vector<.></tt>, where the template arguments are all combinations of @p
+ * float and @p double. Additional instantiations may be created by including
+ * the file mg_smoother.templates.h.
+ *
+ * @author Guido Kanschat, 2003
+ */
+template<typename MatrixType, class RelaxationType, typename VectorType>
+class MGSmootherRelaxation : public MGSmoother<VectorType>
+{
+public:
+  /**
+   * Constructor. Sets smoothing parameters.
+   */
+  MGSmootherRelaxation(const unsigned int steps = 1,
+                       const bool variable = false,
+                       const bool symmetric = false,
+                       const bool transpose = false);
+
+  /**
+   * Initialize for matrices. This function stores pointers to the level
+   * matrices and initializes the smoothing operator with the same smoother
+   * for each level.
+   *
+   * @p additional_data is an object of type @p RelaxationType::AdditionalData
+   * and is handed to the initialization function of the relaxation method.
+   */
+  template <typename MatrixType2>
+  void initialize (const MGLevelObject<MatrixType2>     &matrices,
+                   const typename RelaxationType::AdditionalData &additional_data
+                   = typename RelaxationType::AdditionalData());
+
+  /**
+   * Initialize for matrices. This function stores pointers to the level
+   * matrices and initializes the smoothing operator with the according
+   * smoother for each level.
+   *
+   * @p additional_data is an object of type @p RelaxationType::AdditionalData
+   * and is handed to the initialization function of the relaxation method.
+   */
+  template <typename MatrixType2, class DATA>
+  void initialize (const MGLevelObject<MatrixType2> &matrices,
+                   const MGLevelObject<DATA>        &additional_data);
+
+  /**
+   * Initialize for single blocks of matrices. Of this block matrix, the block
+   * indicated by @p block_row and @p block_col is selected on each level.
+   * This function stores pointers to the level matrices and initializes the
+   * smoothing operator with the same smoother for each level.
+   *
+   * @p additional_data is an object of type @p RelaxationType::AdditionalData
+   * and is handed to the initialization function of the relaxation method.
+   */
+  template <typename MatrixType2, class DATA>
+  void initialize (const MGLevelObject<MatrixType2> &matrices,
+                   const DATA                       &additional_data,
+                   const unsigned int                block_row,
+                   const unsigned int                block_col);
+
+  /**
+   * Initialize for single blocks of matrices. Of this block matrix, the block
+   * indicated by @p block_row and @p block_col is selected on each level.
+   * This function stores pointers to the level matrices and initializes the
+   * smoothing operator with the according smoother for each level.
+   *
+   * @p additional_data is an object of type @p RelaxationType::AdditionalData
+   * and is handed to the initialization function of the relaxation method.
+   */
+  template <typename MatrixType2, class DATA>
+  void initialize (const MGLevelObject<MatrixType2> &matrices,
+                   const MGLevelObject<DATA>        &additional_data,
+                   const unsigned int                block_row,
+                   const unsigned int                block_col);
+
+  /**
+   * Empty all vectors.
+   */
+  void clear ();
+
+  /**
+   * The actual smoothing method.
+   */
+  virtual void smooth (const unsigned int level,
+                       VectorType         &u,
+                       const VectorType   &rhs) const;
+
+  /**
+   * Object containing relaxation methods.
+   */
+  MGLevelObject<RelaxationType> smoothers;
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+
+private:
+  /**
+   * Pointer to the matrices.
+   */
+  MGLevelObject<PointerMatrix<MatrixType, VectorType> > matrices;
+
+};
+
+
+
+/**
+ * Smoother using preconditioner classes.
+ *
+ * This class performs smoothing on each level. The operation can be
+ * controlled by several parameters. First, the relaxation parameter @p omega
+ * is used in the underlying relaxation method. @p steps is the number of
+ * relaxation steps on the finest level (on all levels if @p variable is off).
+ * If @p variable is @p true, the number of smoothing steps is doubled on each
+ * coarser level. This results in a method having the complexity of the
+ * W-cycle, but saving grid transfers. This is the method proposed by Bramble
+ * at al.
+ *
+ * The option @p symmetric switches on alternating between the smoother and
+ * its transpose in each step as proposed by Bramble.
+ *
+ * @p transpose uses the transposed smoothing operation using <tt>Tvmult</tt>
+ * instead of the regular <tt>vmult</tt> of the relaxation scheme.
+ *
+ * If you are using block matrices, the second @p initialize function offers
+ * the possibility to extract a single block for smoothing. In this case, the
+ * multigrid method must be used only with the vector associated to that
+ * single block.
+ *
+ * The library contains instantiation for <tt>SparseMatrix<.></tt> and
+ * <tt>Vector<.></tt>, where the template arguments are all combinations of @p
+ * float and @p double. Additional instantiations may be created by including
+ * the file mg_smoother.templates.h.
+ *
+ * @author Guido Kanschat, 2009
+ */
+template<typename MatrixType, typename PreconditionerType, typename VectorType>
+class MGSmootherPrecondition : public MGSmoother<VectorType>
+{
+public:
+  /**
+   * Constructor. Sets smoothing parameters.
+   */
+  MGSmootherPrecondition(const unsigned int steps = 1,
+                         const bool variable = false,
+                         const bool symmetric = false,
+                         const bool transpose = false);
+
+  /**
+   * Initialize for matrices. This function stores pointers to the level
+   * matrices and initializes the smoothing operator with the same smoother
+   * for each level.
+   *
+   * @p additional_data is an object of type @p
+   * PreconditionerType::AdditionalData and is handed to the initialization
+   * function of the relaxation method.
+   */
+  template <typename MatrixType2>
+  void initialize (const MGLevelObject<MatrixType2> &matrices,
+                   const typename PreconditionerType::AdditionalData &additional_data = typename PreconditionerType::AdditionalData());
+
+  /**
+   * Initialize for matrices. This function stores pointers to the level
+   * matrices and initializes the smoothing operator with the according
+   * smoother for each level.
+   *
+   * @p additional_data is an object of type @p
+   * PreconditionerType::AdditionalData and is handed to the initialization
+   * function of the relaxation method.
+   */
+  template <typename MatrixType2, class DATA>
+  void initialize (const MGLevelObject<MatrixType2> &matrices,
+                   const MGLevelObject<DATA>        &additional_data);
+
+  /**
+   * Initialize for single blocks of matrices. Of this block matrix, the block
+   * indicated by @p block_row and @p block_col is selected on each level.
+   * This function stores pointers to the level matrices and initializes the
+   * smoothing operator with the same smoother for each level.
+   *
+   * @p additional_data is an object of type @p
+   * PreconditionerType::AdditionalData and is handed to the initialization
+   * function of the relaxation method.
+   */
+  template <typename MatrixType2, class DATA>
+  void initialize (const MGLevelObject<MatrixType2> &matrices,
+                   const DATA                       &additional_data,
+                   const unsigned int                block_row,
+                   const unsigned int                block_col);
+
+  /**
+   * Initialize for single blocks of matrices. Of this block matrix, the block
+   * indicated by @p block_row and @p block_col is selected on each level.
+   * This function stores pointers to the level matrices and initializes the
+   * smoothing operator with the according smoother for each level.
+   *
+   * @p additional_data is an object of type @p
+   * PreconditionerType::AdditionalData and is handed to the initialization
+   * function of the relaxation method.
+   */
+  template <typename MatrixType2, class DATA>
+  void initialize (const MGLevelObject<MatrixType2> &matrices,
+                   const MGLevelObject<DATA>        &additional_data,
+                   const unsigned int                block_row,
+                   const unsigned int                block_col);
+
+  /**
+   * Empty all vectors.
+   */
+  void clear ();
+
+  /**
+   * The actual smoothing method.
+   */
+  virtual void smooth (const unsigned int level,
+                       VectorType         &u,
+                       const VectorType   &rhs) const;
+
+  /**
+   * Object containing relaxation methods.
+   */
+  MGLevelObject<PreconditionerType> smoothers;
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+
+private:
+  /**
+   * Pointer to the matrices.
+   */
+  MGLevelObject<PointerMatrix<MatrixType, VectorType> > matrices;
+
+};
+
+/*@}*/
+
+/* ------------------------------- Inline functions -------------------------- */
+
+#ifndef DOXYGEN
+
+template <typename VectorType>
+inline void
+MGSmootherIdentity<VectorType>::smooth (const unsigned int,
+                                        VectorType &,
+                                        const VectorType &) const
+{}
+
+template <typename VectorType>
+inline void
+MGSmootherIdentity<VectorType>::clear ()
+{}
+
+//---------------------------------------------------------------------------
+
+template <typename VectorType>
+inline
+MGSmoother<VectorType>::MGSmoother (const unsigned int steps,
+                                    const bool         variable,
+                                    const bool         symmetric,
+                                    const bool         transpose)
+  :
+  steps(steps),
+  variable(variable),
+  symmetric(symmetric),
+  transpose(transpose),
+  debug(0)
+{}
+
+
+template <typename VectorType>
+inline void
+MGSmoother<VectorType>::set_steps (const unsigned int s)
+{
+  steps = s;
+}
+
+
+template <typename VectorType>
+inline void
+MGSmoother<VectorType>::set_debug (const unsigned int s)
+{
+  debug = s;
+}
+
+
+template <typename VectorType>
+inline void
+MGSmoother<VectorType>::set_variable (const bool flag)
+{
+  variable = flag;
+}
+
+
+template <typename VectorType>
+inline void
+MGSmoother<VectorType>::set_symmetric (const bool flag)
+{
+  symmetric = flag;
+}
+
+
+template <typename VectorType>
+inline void
+MGSmoother<VectorType>::set_transpose (const bool flag)
+{
+  transpose = flag;
+}
+
+//----------------------------------------------------------------------//
+
+namespace mg
+{
+  template <class RelaxationType, typename VectorType>
+  inline
+  SmootherRelaxation<RelaxationType, VectorType>::SmootherRelaxation
+  (const unsigned int steps,
+   const bool         variable,
+   const bool         symmetric,
+   const bool         transpose)
+    : MGSmoother<VectorType>(steps, variable, symmetric, transpose)
+  {}
+
+
+  template <class RelaxationType, typename VectorType>
+  inline void
+  SmootherRelaxation<RelaxationType, VectorType>::clear ()
+  {
+    MGLevelObject<RelaxationType>::clear();
+  }
+
+
+  template <class RelaxationType, typename VectorType>
+  template <typename MatrixType2>
+  inline void
+  SmootherRelaxation<RelaxationType, VectorType>::initialize
+  (const MGLevelObject<MatrixType2>     &m,
+   const typename RelaxationType::AdditionalData &data)
+  {
+    const unsigned int min = m.min_level();
+    const unsigned int max = m.max_level();
+
+    this->resize(min, max);
+
+    for (unsigned int i=min; i<=max; ++i)
+      (*this)[i].initialize(m[i], data);
+  }
+
+
+  template <class RelaxationType, typename VectorType>
+  template <typename MatrixType2, class DATA>
+  inline void
+  SmootherRelaxation<RelaxationType, VectorType>::initialize
+  (const MGLevelObject<MatrixType2> &m,
+   const MGLevelObject<DATA>        &data)
+  {
+    const unsigned int min = std::max(m.min_level(), data.min_level());
+    const unsigned int max = std::min(m.max_level(), data.max_level());
+
+    this->resize(min, max);
+
+    for (unsigned int i=min; i<=max; ++i)
+      (*this)[i].initialize(m[i], data[i]);
+  }
+
+
+  template <class RelaxationType, typename VectorType>
+  inline void
+  SmootherRelaxation<RelaxationType, VectorType>::smooth (const unsigned int  level,
+                                                          VectorType         &u,
+                                                          const VectorType   &rhs) const
+  {
+    unsigned int maxlevel = this->max_level();
+    unsigned int steps2 = this->steps;
+
+    if (this->variable)
+      steps2 *= (1<<(maxlevel-level));
+
+    bool T = this->transpose;
+    if (this->symmetric && (steps2 % 2 == 0))
+      T = false;
+    if (this->debug > 0)
+      deallog << 'S' << level << ' ';
+
+    for (unsigned int i=0; i<steps2; ++i)
+      {
+        if (T)
+          (*this)[level].Tstep(u, rhs);
+        else
+          (*this)[level].step(u, rhs);
+        if (this->symmetric)
+          T = !T;
+      }
+  }
+
+
+  template <class RelaxationType, typename VectorType>
+  inline
+  std::size_t
+  SmootherRelaxation<RelaxationType, VectorType>::
+  memory_consumption () const
+  {
+    return sizeof(*this)
+           -sizeof(MGLevelObject<RelaxationType>)
+           + MGLevelObject<RelaxationType>::memory_consumption()
+           + this->vector_memory.memory_consumption();
+  }
+}
+
+
+//----------------------------------------------------------------------//
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+inline
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::MGSmootherRelaxation
+(const unsigned int steps,
+ const bool         variable,
+ const bool         symmetric,
+ const bool         transpose)
+  :
+  MGSmoother<VectorType>(steps, variable, symmetric, transpose)
+{}
+
+
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+inline void
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::clear ()
+{
+  smoothers.clear();
+
+  unsigned int i=matrices.min_level(),
+               max_level=matrices.max_level();
+  for (; i<=max_level; ++i)
+    matrices[i]=0;
+}
+
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+template <typename MatrixType2>
+inline void
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::initialize
+(const MGLevelObject<MatrixType2>     &m,
+ const typename RelaxationType::AdditionalData &data)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &m[i];
+      smoothers[i].initialize(m[i], data);
+    }
+}
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+template <typename MatrixType2, class DATA>
+inline void
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::initialize
+(const MGLevelObject<MatrixType2> &m,
+ const MGLevelObject<DATA>        &data)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  Assert (data.min_level() == min,
+          ExcDimensionMismatch(data.min_level(), min));
+  Assert (data.max_level() == max,
+          ExcDimensionMismatch(data.max_level(), max));
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &m[i];
+      smoothers[i].initialize(m[i], data[i]);
+    }
+}
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+template <typename MatrixType2, class DATA>
+inline void
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::initialize
+(const MGLevelObject<MatrixType2> &m,
+ const DATA                       &data,
+ const unsigned int                row,
+ const unsigned int                col)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &(m[i].block(row, col));
+      smoothers[i].initialize(m[i].block(row, col), data);
+    }
+}
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+template <typename MatrixType2, class DATA>
+inline void
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::initialize
+(const MGLevelObject<MatrixType2> &m,
+ const MGLevelObject<DATA>        &data,
+ const unsigned int                row,
+ const unsigned int                col)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  Assert (data.min_level() == min,
+          ExcDimensionMismatch(data.min_level(), min));
+  Assert (data.max_level() == max,
+          ExcDimensionMismatch(data.max_level(), max));
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &(m[i].block(row, col));
+      smoothers[i].initialize(m[i].block(row, col), data[i]);
+    }
+}
+
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+inline void
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::smooth (const unsigned int  level,
+    VectorType         &u,
+    const VectorType   &rhs) const
+{
+  unsigned int maxlevel = smoothers.max_level();
+  unsigned int steps2 = this->steps;
+
+  if (this->variable)
+    steps2 *= (1<<(maxlevel-level));
+
+  bool T = this->transpose;
+  if (this->symmetric && (steps2 % 2 == 0))
+    T = false;
+  if (this->debug > 0)
+    deallog << 'S' << level << ' ';
+
+  for (unsigned int i=0; i<steps2; ++i)
+    {
+      if (T)
+        smoothers[level].Tstep(u, rhs);
+      else
+        smoothers[level].step(u, rhs);
+      if (this->symmetric)
+        T = !T;
+    }
+}
+
+
+
+template <typename MatrixType, class RelaxationType, typename VectorType>
+inline
+std::size_t
+MGSmootherRelaxation<MatrixType, RelaxationType, VectorType>::
+memory_consumption () const
+{
+  return sizeof(*this)
+         + matrices.memory_consumption()
+         + smoothers.memory_consumption()
+         + this->vector_memory.memory_consumption();
+}
+
+
+//----------------------------------------------------------------------//
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+inline
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::MGSmootherPrecondition
+(const unsigned int steps,
+ const bool         variable,
+ const bool         symmetric,
+ const bool         transpose)
+  :
+  MGSmoother<VectorType>(steps, variable, symmetric, transpose)
+{}
+
+
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+inline void
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::clear ()
+{
+  smoothers.clear();
+
+  unsigned int i=matrices.min_level(),
+               max_level=matrices.max_level();
+  for (; i<=max_level; ++i)
+    matrices[i]=0;
+}
+
+
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+template <typename MatrixType2>
+inline void
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::initialize
+(const MGLevelObject<MatrixType2>              &m,
+ const typename PreconditionerType::AdditionalData &data)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &m[i];
+      smoothers[i].initialize(m[i], data);
+    }
+}
+
+
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+template <typename MatrixType2, class DATA>
+inline void
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::initialize
+(const MGLevelObject<MatrixType2> &m,
+ const MGLevelObject<DATA>        &data)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  Assert (data.min_level() == min,
+          ExcDimensionMismatch(data.min_level(), min));
+  Assert (data.max_level() == max,
+          ExcDimensionMismatch(data.max_level(), max));
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &m[i];
+      smoothers[i].initialize(m[i], data[i]);
+    }
+}
+
+
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+template <typename MatrixType2, class DATA>
+inline void
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::initialize
+(const MGLevelObject<MatrixType2> &m,
+ const DATA                       &data,
+ const unsigned int                row,
+ const unsigned int                col)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &(m[i].block(row, col));
+      smoothers[i].initialize(m[i].block(row, col), data);
+    }
+}
+
+
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+template <typename MatrixType2, class DATA>
+inline void
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::initialize
+(const MGLevelObject<MatrixType2> &m,
+ const MGLevelObject<DATA>        &data,
+ const unsigned int                row,
+ const unsigned int                col)
+{
+  const unsigned int min = m.min_level();
+  const unsigned int max = m.max_level();
+
+  Assert (data.min_level() == min,
+          ExcDimensionMismatch(data.min_level(), min));
+  Assert (data.max_level() == max,
+          ExcDimensionMismatch(data.max_level(), max));
+
+  matrices.resize(min, max);
+  smoothers.resize(min, max);
+
+  for (unsigned int i=min; i<=max; ++i)
+    {
+      matrices[i] = &(m[i].block(row, col));
+      smoothers[i].initialize(m[i].block(row, col), data[i]);
+    }
+}
+
+
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+inline void
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::smooth
+(const unsigned int level,
+ VectorType         &u,
+ const VectorType   &rhs) const
+{
+  unsigned int maxlevel = matrices.max_level();
+  unsigned int steps2 = this->steps;
+
+  if (this->variable)
+    steps2 *= (1<<(maxlevel-level));
+
+  typename VectorMemory<VectorType>::Pointer r(this->vector_memory);
+  typename VectorMemory<VectorType>::Pointer d(this->vector_memory);
+
+  r->reinit(u,true);
+  d->reinit(u,true);
+
+  bool T = this->transpose;
+  if (this->symmetric && (steps2 % 2 == 0))
+    T = false;
+  if (this->debug > 0)
+    deallog << 'S' << level << ' ';
+
+  for (unsigned int i=0; i<steps2; ++i)
+    {
+      if (T)
+        {
+          if (this->debug > 0)
+            deallog << 'T';
+          if (i == 0 && u.all_zero())
+            *r = rhs;
+          else
+            {
+              matrices[level].Tvmult(*r,u);
+              r->sadd(-1.,1.,rhs);
+            }
+          if (this->debug > 2)
+            deallog << ' ' << r->l2_norm() << ' ';
+          smoothers[level].Tvmult(*d, *r);
+          if (this->debug > 1)
+            deallog << ' ' << d->l2_norm() << ' ';
+        }
+      else
+        {
+          if (this->debug > 0)
+            deallog << 'N';
+          if (i == 0 && u.all_zero())
+            *r = rhs;
+          else
+            {
+              matrices[level].vmult(*r,u);
+              r->sadd(-1.,rhs);
+            }
+          if (this->debug > 2)
+            deallog << ' ' << r->l2_norm() << ' ';
+          smoothers[level].vmult(*d, *r);
+          if (this->debug > 1)
+            deallog << ' ' << d->l2_norm() << ' ';
+        }
+      u += *d;
+      if (this->symmetric)
+        T = !T;
+    }
+  if (this->debug > 0)
+    deallog << std::endl;
+}
+
+
+
+template <typename MatrixType, typename PreconditionerType, typename VectorType>
+inline
+std::size_t
+MGSmootherPrecondition<MatrixType, PreconditionerType, VectorType>::
+memory_consumption () const
+{
+  return sizeof(*this)
+         + matrices.memory_consumption()
+         + smoothers.memory_consumption()
+         + this->vector_memory.memory_consumption();
+}
+
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_tools.h b/include/deal.II/multigrid/mg_tools.h
new file mode 100644
index 0000000..63d1de5
--- /dev/null
+++ b/include/deal.II/multigrid/mg_tools.h
@@ -0,0 +1,228 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_tools_h
+#define dealii__mg_tools_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_handler.h>
+
+#include <vector>
+#include <set>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <class Object> class MGLevelObject;
+template <int dim, int spacedim> class DoFHandler;
+template <typename number> class Vector;
+template <typename number> class SparseMatrix;
+template <typename number> class BlockVector;
+template <typename number> class BlockSparseMatrix;
+template <typename number> class FullMatrix;
+template <typename number> class BlockSparseMatrix;
+
+/* !@addtogroup mg */
+/* @{ */
+
+/**
+ * This is a collection of functions operating on, and manipulating the
+ * numbers of degrees of freedom in a multilevel triangulation. It is similar
+ * in purpose and function to the @p DoFTools namespace, but operates on
+ * levels of DoFHandler objects. See there and the documentation of the member
+ * functions for more information.
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, 1999 - 2005, 2012
+ */
+namespace MGTools
+{
+  /**
+   * Compute row length vector for multilevel methods.
+   */
+  template <int dim, int spacedim>
+  void
+  compute_row_length_vector(const DoFHandler<dim,spacedim> &dofs,
+                            const unsigned int level,
+                            std::vector<unsigned int> &row_lengths,
+                            const DoFTools::Coupling flux_couplings = DoFTools::none);
+
+  /**
+   * Compute row length vector for multilevel methods with optimization for
+   * block couplings.
+   */
+  template <int dim, int spacedim>
+  void
+  compute_row_length_vector(const DoFHandler<dim,spacedim> &dofs,
+                            const unsigned int level,
+                            std::vector<unsigned int> &row_lengths,
+                            const Table<2,DoFTools::Coupling> &couplings,
+                            const Table<2,DoFTools::Coupling> &flux_couplings);
+
+  /**
+   * Write the sparsity structure of the matrix belonging to the specified @p
+   * level. The sparsity pattern is not compressed, so before creating the
+   * actual matrix you have to compress the matrix yourself, using
+   * <tt>SparseMatrixStruct::compress()</tt>.
+   *
+   * There is no need to consider hanging nodes here, since only one level is
+   * considered.
+   */
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_sparsity_pattern (const DoFHandlerType &dof_handler,
+                         SparsityPatternType  &sparsity,
+                         const unsigned int    level);
+
+  /**
+   * Make a sparsity pattern including fluxes of discontinuous Galerkin
+   * methods.
+   * @see
+   * @ref make_sparsity_pattern
+   * and
+   * @ref DoFTools
+   */
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern (const DoFHandler<dim,spacedim> &dof_handler,
+                              SparsityPatternType            &sparsity,
+                              const unsigned int              level);
+
+  /**
+   * Create sparsity pattern for the fluxes at refinement edges. The matrix
+   * maps a function of the fine level space @p level to the coarser space.
+   *
+   * make_flux_sparsity_pattern()
+   */
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern_edge (const DoFHandler<dim,spacedim> &dof_handler,
+                                   SparsityPatternType            &sparsity,
+                                   const unsigned int              level);
+  /**
+   * This function does the same as the other with the same name, but it gets
+   * two additional coefficient matrices. A matrix entry will only be
+   * generated for two basis functions, if there is a non-zero entry linking
+   * their associated components in the coefficient matrix.
+   *
+   * There is one matrix for couplings in a cell and one for the couplings
+   * occurring in fluxes.
+   */
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern (const DoFHandler<dim,spacedim>    &dof,
+                              SparsityPatternType               &sparsity,
+                              const unsigned int                 level,
+                              const Table<2,DoFTools::Coupling> &int_mask,
+                              const Table<2,DoFTools::Coupling> &flux_mask);
+
+  /**
+   * Create sparsity pattern for the fluxes at refinement edges. The matrix
+   * maps a function of the fine level space @p level to the coarser space.
+   * This is the version restricting the pattern to the elements actually
+   * needed.
+   *
+   * make_flux_sparsity_pattern()
+   */
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern_edge (const DoFHandler<dim,spacedim>    &dof_handler,
+                                   SparsityPatternType               &sparsity,
+                                   const unsigned int                 level,
+                                   const Table<2,DoFTools::Coupling> &flux_mask);
+
+  /**
+   * Count the dofs block-wise on each level.
+   *
+   * Result is a vector containing for each level a vector containing the
+   * number of dofs for each block (access is <tt>result[level][block]</tt>).
+   */
+  template <typename DoFHandlerType>
+  void
+  count_dofs_per_block (const DoFHandlerType                               &dof_handler,
+                        std::vector<std::vector<types::global_dof_index> > &dofs_per_block,
+                        std::vector<unsigned int> target_block = std::vector<unsigned int>());
+
+  /**
+   * Count the dofs component-wise on each level.
+   *
+   * Result is a vector containing for each level a vector containing the
+   * number of dofs for each component (access is
+   * <tt>result[level][component]</tt>).
+   */
+  template <int dim, int spacedim>
+  void
+  count_dofs_per_component (const DoFHandler<dim,spacedim> &mg_dof,
+                            std::vector<std::vector<types::global_dof_index> > &result,
+                            const bool only_once = false,
+                            std::vector<unsigned int> target_component = std::vector<unsigned int>());
+
+  /**
+   * Generate a list of those degrees of freedom at the boundary of the domain
+   * that should be eliminated from the matrix because they will be
+   * constrained by Dirichlet boundary conditions.
+   *
+   * This is the multilevel equivalent of
+   * VectorTools::interpolate_boundary_values, but since the multilevel method
+   * does not have its own right hand side, the function values returned by
+   * the function object that is part of the function_map argument are
+   * ignored.
+   *
+   * @arg <tt>boundary_indices</tt> is a vector which on return contains all
+   * indices of degrees of freedom for each level that are at the part of the
+   * boundary identified by the function_map argument. Its length has to match
+   * the number of levels in the dof handler object.
+   */
+  template <int dim, int spacedim>
+  void
+  make_boundary_list (const DoFHandler<dim,spacedim>      &mg_dof,
+                      const typename FunctionMap<dim>::type &function_map,
+                      std::vector<std::set<types::global_dof_index> > &boundary_indices,
+                      const ComponentMask                   &component_mask = ComponentMask());
+
+  /**
+   * The same function as above, but return an IndexSet rather than a
+   * std::set<unsigned int> on each level.
+   */
+  template <int dim, int spacedim>
+  void
+  make_boundary_list (const DoFHandler<dim,spacedim>      &mg_dof,
+                      const typename FunctionMap<dim>::type &function_map,
+                      std::vector<IndexSet>                 &boundary_indices,
+                      const ComponentMask               &component_mask = ComponentMask());
+
+  /**
+   * For each level in a multigrid hierarchy, produce an IndexSet that
+   * indicates which of the degrees of freedom are along interfaces of this
+   * level to cells that only exist on coarser levels.
+   */
+  template <int dim, int spacedim>
+  void
+  extract_inner_interface_dofs (const DoFHandler<dim,spacedim> &mg_dof_handler,
+                                std::vector<IndexSet>  &interface_dofs);
+
+
+  template <int dim, int spacedim>
+  void
+  extract_non_interface_dofs (const DoFHandler<dim,spacedim> &mg_dof_handler,
+                              std::vector<std::set<types::global_dof_index> > &non_interface_dofs);
+}
+
+/* @} */
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_transfer.h b/include/deal.II/multigrid/mg_transfer.h
new file mode 100644
index 0000000..4f44a5c
--- /dev/null
+++ b/include/deal.II/multigrid/mg_transfer.h
@@ -0,0 +1,557 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_transfer_h
+#define dealii__mg_transfer_h
+
+#include <deal.II/base/config.h>
+
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/parallel_vector.h>
+
+#include <deal.II/lac/vector_memory.h>
+
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/multigrid/mg_constrained_dofs.h>
+#include <deal.II/base/mg_level_object.h>
+
+#include <deal.II/dofs/dof_handler.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  template <typename VectorType>
+  struct MatrixSelector
+  {
+    typedef ::dealii::SparsityPattern Sparsity;
+    typedef ::dealii::SparseMatrix<typename VectorType::value_type> Matrix;
+
+    template <typename SparsityPatternType, typename DoFHandlerType>
+    static void reinit(Matrix &matrix, Sparsity &sparsity, int level, const SparsityPatternType &sp, const DoFHandlerType &)
+    {
+      sparsity.copy_from (sp);
+      (void)level;
+      matrix.reinit (sparsity);
+    }
+  };
+
+#ifdef DEAL_II_WITH_TRILINOS
+  template <typename Number>
+  struct MatrixSelector<parallel::distributed::Vector<Number> >
+  {
+    typedef ::dealii::TrilinosWrappers::SparsityPattern Sparsity;
+    typedef ::dealii::TrilinosWrappers::SparseMatrix Matrix;
+
+    template <typename SparsityPatternType, typename DoFHandlerType>
+    static void reinit(Matrix &matrix, Sparsity &, int level, const SparsityPatternType &sp, DoFHandlerType &dh)
+    {
+      matrix.reinit(dh.locally_owned_mg_dofs(level+1),
+                    dh.locally_owned_mg_dofs(level),
+                    sp, MPI_COMM_WORLD, true);
+    }
+
+  };
+  template <>
+  struct MatrixSelector<dealii::TrilinosWrappers::MPI::Vector>
+  {
+    typedef ::dealii::TrilinosWrappers::SparsityPattern Sparsity;
+    typedef ::dealii::TrilinosWrappers::SparseMatrix Matrix;
+
+    template <typename SparsityPatternType, typename DoFHandlerType>
+    static void reinit(Matrix &matrix, Sparsity &, int level, const SparsityPatternType &sp, DoFHandlerType &dh)
+    {
+      matrix.reinit(dh.locally_owned_mg_dofs(level+1),
+                    dh.locally_owned_mg_dofs(level),
+                    sp, MPI_COMM_WORLD, true);
+    }
+
+  };
+
+  template <>
+  struct MatrixSelector<dealii::TrilinosWrappers::Vector>
+  {
+    typedef ::dealii::TrilinosWrappers::SparsityPattern Sparsity;
+    typedef ::dealii::TrilinosWrappers::SparseMatrix Matrix;
+
+    template <typename SparsityPatternType, typename DoFHandlerType>
+    static void reinit(Matrix &, Sparsity &, int /*level*/, const SparsityPatternType &, DoFHandlerType &)
+    {
+    }
+  };
+#endif
+}
+
+/*
+ * MGTransferBase is defined in mg_base.h
+ */
+
+/*!@addtogroup mg */
+/*@{*/
+
+
+
+/**
+ * Implementation of transfer between the global vectors and the multigrid
+ * levels for use in the derived class MGTransferPrebuilt and other classes.
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, Timo Heister, Martin Kronbichler
+ * @date 1999, 2000, 2001, 2002, 2003, 2004, 2012, 2015
+ */
+template <typename VectorType>
+class MGLevelGlobalTransfer : public MGTransferBase<VectorType>
+{
+public:
+
+  /**
+   * Reset the object to the state it had right after the default constructor.
+   */
+  void clear ();
+
+  /**
+   * Transfer from a vector on the global grid to vectors defined on each of
+   * the levels separately, i.a. an @p MGVector.
+   */
+  template <int dim, class InVector, int spacedim>
+  void
+  copy_to_mg (const DoFHandler<dim,spacedim> &mg_dof,
+              MGLevelObject<VectorType>      &dst,
+              const InVector                 &src) const;
+
+  /**
+   * Transfer from multi-level vector to normal vector.
+   *
+   * Copies data from active portions of an MGVector into the respective
+   * positions of a <tt>Vector<number></tt>. In order to keep the result
+   * consistent, constrained degrees of freedom are set to zero.
+   */
+  template <int dim, class OutVector, int spacedim>
+  void
+  copy_from_mg (const DoFHandler<dim,spacedim>  &mg_dof,
+                OutVector                       &dst,
+                const MGLevelObject<VectorType> &src) const;
+
+  /**
+   * Add a multi-level vector to a normal vector.
+   *
+   * Works as the previous function, but probably not for continuous elements.
+   */
+  template <int dim, class OutVector, int spacedim>
+  void
+  copy_from_mg_add (const DoFHandler<dim,spacedim>  &mg_dof,
+                    OutVector                       &dst,
+                    const MGLevelObject<VectorType> &src) const;
+
+  /**
+   * If this object operates on BlockVector objects, we need to describe how
+   * the individual vector components are mapped to the blocks of a vector.
+   * For example, for a Stokes system, we have dim+1 vector components for
+   * velocity and pressure, but we may want to use block vectors with only two
+   * blocks for all velocities in one block, and the pressure variables in the
+   * other.
+   *
+   * By default, if this function is not called, block vectors have as many
+   * blocks as the finite element has vector components. However, this can be
+   * changed by calling this function with an array that describes how vector
+   * components are to be grouped into blocks. The meaning of the argument is
+   * the same as the one given to the DoFTools::count_dofs_per_component
+   * function.
+   */
+  void
+  set_component_to_block_map (const std::vector<unsigned int> &map);
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Print the copy index fields for debugging purposes.
+   */
+  void print_indices(std::ostream &os) const;
+
+protected:
+
+  /**
+   * Internal function to @p fill copy_indices*. Called by derived classes.
+   */
+  template <int dim, int spacedim>
+  void fill_and_communicate_copy_indices(const DoFHandler<dim,spacedim> &mg_dof);
+
+  /**
+   * Sizes of the multi-level vectors.
+   */
+  std::vector<types::global_dof_index> sizes;
+
+  /**
+   * Mapping for the copy_to_mg() and copy_from_mg() functions. Here only
+   * index pairs locally owned
+   *
+   * The data is organized as follows: one vector per level. Each element of
+   * these vectors contains first the global index, then the level index.
+   */
+  std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > >
+  copy_indices;
+
+  /**
+   * Additional degrees of freedom for the copy_to_mg() function. These are
+   * the ones where the global degree of freedom is locally owned and the
+   * level degree of freedom is not.
+   *
+   * Organization of the data is like for @p copy_indices_mine.
+   */
+  std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > >
+  copy_indices_global_mine;
+
+  /**
+   * Additional degrees of freedom for the copy_from_mg() function. These are
+   * the ones where the level degree of freedom is locally owned and the
+   * global degree of freedom is not.
+   *
+   * Organization of the data is like for @p copy_indices_mine.
+   */
+  std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > >
+  copy_indices_level_mine;
+
+  /**
+   * Stores whether the copy operation from the global to the level vector is
+   * actually a plain copy to the finest level. This means that the grid has
+   * no adaptive refinement and the numbering on the finest multigrid level is
+   * the same as in the global case.
+   */
+  bool perform_plain_copy;
+
+  /**
+   * The vector that stores what has been given to the
+   * set_component_to_block_map() function.
+   */
+  std::vector<unsigned int> component_to_block_map;
+
+  /**
+   * The mg_constrained_dofs of the level systems.
+   */
+  SmartPointer<const MGConstrainedDoFs, MGLevelGlobalTransfer<VectorType> > mg_constrained_dofs;
+};
+
+
+
+/**
+ * Implementation of transfer between the global vectors and the multigrid
+ * levels for use in the derived class MGTransferPrebuilt and other classes.
+ * This class is a specialization for the case of
+ * parallel::distributed::Vector that requires a few different calling
+ * routines as compared to the %parallel vectors in the PETScWrappers and
+ * TrilinosWrappers namespaces.
+ *
+ * @author Martin Kronbichler
+ * @date 2016
+ */
+template <typename Number>
+class MGLevelGlobalTransfer<parallel::distributed::Vector<Number> > : public MGTransferBase<parallel::distributed::Vector<Number> >
+{
+public:
+
+  /**
+   * Reset the object to the state it had right after the default constructor.
+   */
+  void clear ();
+
+  /**
+   * Transfer from a vector on the global grid to vectors defined on each of
+   * the levels separately, i.a. an @p MGVector.
+   */
+  template <int dim, typename Number2, int spacedim>
+  void
+  copy_to_mg (const DoFHandler<dim,spacedim>                        &mg_dof,
+              MGLevelObject<parallel::distributed::Vector<Number> > &dst,
+              const parallel::distributed::Vector<Number2>          &src) const;
+
+  /**
+   * Transfer from multi-level vector to normal vector.
+   *
+   * Copies data from active portions of an MGVector into the respective
+   * positions of a <tt>Vector<number></tt>. In order to keep the result
+   * consistent, constrained degrees of freedom are set to zero.
+   */
+  template <int dim, typename Number2, int spacedim>
+  void
+  copy_from_mg (const DoFHandler<dim,spacedim>                              &mg_dof,
+                parallel::distributed::Vector<Number2>                      &dst,
+                const MGLevelObject<parallel::distributed::Vector<Number> > &src) const;
+
+  /**
+   * Add a multi-level vector to a normal vector.
+   *
+   * Works as the previous function, but probably not for continuous elements.
+   */
+  template <int dim, typename Number2, int spacedim>
+  void
+  copy_from_mg_add (const DoFHandler<dim,spacedim>                              &mg_dof,
+                    parallel::distributed::Vector<Number2>                      &dst,
+                    const MGLevelObject<parallel::distributed::Vector<Number> > &src) const;
+
+  /**
+   * If this object operates on BlockVector objects, we need to describe how
+   * the individual vector components are mapped to the blocks of a vector.
+   * For example, for a Stokes system, we have dim+1 vector components for
+   * velocity and pressure, but we may want to use block vectors with only two
+   * blocks for all velocities in one block, and the pressure variables in the
+   * other.
+   *
+   * By default, if this function is not called, block vectors have as many
+   * blocks as the finite element has vector components. However, this can be
+   * changed by calling this function with an array that describes how vector
+   * components are to be grouped into blocks. The meaning of the argument is
+   * the same as the one given to the DoFTools::count_dofs_per_component
+   * function.
+   */
+  void
+  set_component_to_block_map (const std::vector<unsigned int> &map);
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Print the copy index fields for debugging purposes.
+   */
+  void print_indices(std::ostream &os) const;
+
+protected:
+
+  /**
+   * Internal function to @p fill copy_indices*. Called by derived classes.
+   */
+  template <int dim, int spacedim>
+  void fill_and_communicate_copy_indices(const DoFHandler<dim,spacedim> &mg_dof);
+
+  /**
+   * Sizes of the multi-level vectors.
+   */
+  std::vector<types::global_dof_index> sizes;
+
+  /**
+   * Mapping for the copy_to_mg() and copy_from_mg() functions. Here only
+   * index pairs locally owned is stored.
+   *
+   * The data is organized as follows: one vector per level. Each element of
+   * these vectors contains first the global index, then the level index.
+   */
+  std::vector<std::vector<std::pair<unsigned int, unsigned int> > >
+  copy_indices;
+
+  /**
+   * Additional degrees of freedom for the copy_to_mg() function. These are
+   * the ones where the global degree of freedom is locally owned and the
+   * level degree of freedom is not.
+   *
+   * Organization of the data is like for @p copy_indices_mine.
+   */
+  std::vector<std::vector<std::pair<unsigned int, unsigned int> > >
+  copy_indices_global_mine;
+
+  /**
+   * Additional degrees of freedom for the copy_from_mg() function. These are
+   * the ones where the level degree of freedom is locally owned and the
+   * global degree of freedom is not.
+   *
+   * Organization of the data is like for @p copy_indices_mine.
+   */
+  std::vector<std::vector<std::pair<unsigned int, unsigned int> > >
+  copy_indices_level_mine;
+
+  /**
+   * Stores whether the copy operation from the global to the level vector is
+   * actually a plain copy to the finest level. This means that the grid has
+   * no adaptive refinement and the numbering on the finest multigrid level is
+   * the same as in the global case.
+   */
+  bool perform_plain_copy;
+
+  /**
+   * The vector that stores what has been given to the
+   * set_component_to_block_map() function.
+   */
+  std::vector<unsigned int> component_to_block_map;
+
+  /**
+   * The mg_constrained_dofs of the level systems.
+   */
+  SmartPointer<const MGConstrainedDoFs, MGLevelGlobalTransfer<parallel::distributed::Vector<Number> > > mg_constrained_dofs;
+
+  /**
+   * In the function copy_to_mg, we need to access ghosted entries of the
+   * global vector for inserting into the level vectors. This vector is
+   * populated with those entries.
+   */
+  mutable parallel::distributed::Vector<Number> ghosted_global_vector;
+
+  /**
+   * In the function copy_from_mg, we access all level vectors with certain
+   * ghost entries for inserting the result into a global vector.
+   */
+  mutable MGLevelObject<parallel::distributed::Vector<Number> > ghosted_level_vector;
+};
+
+
+
+/**
+ * Implementation of the MGTransferBase interface for which the transfer
+ * operations are prebuilt upon construction of the object of this class as
+ * matrices. This is the fast way, since it only needs to build the operation
+ * once by looping over all cells and storing the result in a matrix for each
+ * level, but requires additional memory.
+ *
+ * See MGTransferBase to find out which of the transfer classes is best for
+ * your needs.
+ *
+ * @author Wolfgang Bangerth, Guido Kanschat, Timo Heister, Martin Kronbichler
+ * @date 1999, 2000, 2001, 2002, 2003, 2004, 2012, 2015
+ */
+template <typename VectorType>
+class MGTransferPrebuilt : public MGLevelGlobalTransfer<VectorType>
+{
+public:
+  /**
+   * Constructor without constraint matrices. Use this constructor only with
+   * discontinuous finite elements or with no local refinement.
+   */
+  MGTransferPrebuilt ();
+
+  /**
+   * Constructor with constraints. Equivalent to the default constructor
+   * followed by initialize_constraints().
+   */
+  MGTransferPrebuilt (const ConstraintMatrix &constraints,
+                      const MGConstrainedDoFs &mg_constrained_dofs);
+
+  /**
+   * Destructor.
+   */
+  virtual ~MGTransferPrebuilt ();
+
+  /**
+   * Initialize the constraints to be used in build_matrices().
+   */
+  void initialize_constraints (const ConstraintMatrix &constraints,
+                               const MGConstrainedDoFs &mg_constrained_dofs);
+
+  /**
+   * Reset the object to the state it had right after the default constructor.
+   */
+  void clear ();
+
+  /**
+   * Actually build the prolongation matrices for each level.
+   */
+  template <int dim, int spacedim>
+  void build_matrices (const DoFHandler<dim,spacedim> &mg_dof);
+
+  /**
+   * Prolongate a vector from level <tt>to_level-1</tt> to level
+   * <tt>to_level</tt> using the embedding matrices of the underlying finite
+   * element. The previous content of <tt>dst</tt> is overwritten.
+   *
+   * @arg src is a vector with as many elements as there are degrees of
+   * freedom on the coarser level involved.
+   *
+   * @arg dst has as many elements as there are degrees of freedom on the
+   * finer level.
+   */
+  virtual void prolongate (const unsigned int to_level,
+                           VectorType         &dst,
+                           const VectorType   &src) const;
+
+  /**
+   * Restrict a vector from level <tt>from_level</tt> to level
+   * <tt>from_level-1</tt> using the transpose operation of the @p prolongate
+   * method. If the region covered by cells on level <tt>from_level</tt> is
+   * smaller than that of level <tt>from_level-1</tt> (local refinement), then
+   * some degrees of freedom in <tt>dst</tt> are active and will not be
+   * altered. For the other degrees of freedom, the result of the restriction
+   * is added.
+   *
+   * @arg src is a vector with as many elements as there are degrees of
+   * freedom on the finer level involved.
+   *
+   * @arg dst has as many elements as there are degrees of freedom on the
+   * coarser level.
+   */
+  virtual void restrict_and_add (const unsigned int from_level,
+                                 VectorType         &dst,
+                                 const VectorType   &src) const;
+
+  /**
+   * Finite element does not provide prolongation matrices.
+   */
+  DeclException0(ExcNoProlongation);
+
+  /**
+   * You have to call build_matrices() before using this object.
+   */
+  DeclException0(ExcMatricesNotBuilt);
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Print all the matrices for debugging purposes.
+   */
+  void print_matrices(std::ostream &os) const;
+
+private:
+
+  /**
+   * Sparsity patterns for transfer matrices.
+   */
+  std::vector<std_cxx11::shared_ptr<typename internal::MatrixSelector<VectorType>::Sparsity> > prolongation_sparsities;
+
+  /**
+   * The actual prolongation matrix.  column indices belong to the dof indices
+   * of the mother cell, i.e. the coarse level.  while row indices belong to
+   * the child cell, i.e. the fine level.
+   */
+  std::vector<std_cxx11::shared_ptr<typename internal::MatrixSelector<VectorType>::Matrix> > prolongation_matrices;
+
+  /**
+   * Degrees of freedom on the refinement edge excluding those on the
+   * boundary.
+   */
+  std::vector<std::vector<bool> > interface_dofs;
+
+  /**
+   * The constraints of the global system.
+   */
+  SmartPointer<const ConstraintMatrix, MGTransferPrebuilt<VectorType> > constraints;
+};
+
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_transfer.templates.h b/include/deal.II/multigrid/mg_transfer.templates.h
new file mode 100644
index 0000000..76225e3
--- /dev/null
+++ b/include/deal.II/multigrid/mg_transfer.templates.h
@@ -0,0 +1,551 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mg_transfer_templates_h
+#define dealii__mg_transfer_templates_h
+
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_transfer.h>
+#include <deal.II/distributed/tria.h>
+
+#include <algorithm>
+
+// Here you can turn on some cout statements and MPI Barriers for debugging:
+//#define DEBUG_OUTPUT
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  /**
+   * Adjust vectors on all levels to correct size.  Here, we just count the
+   * numbers of degrees of freedom on each level and @p reinit each level
+   * vector to this length. For compatibility reasons with the next function
+   * the target_component is added here but is not used.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  reinit_vector (const dealii::DoFHandler<dim,spacedim> &mg_dof,
+                 const std::vector<unsigned int> &,
+                 MGLevelObject<dealii::Vector<number> > &v)
+  {
+    for (unsigned int level=v.min_level();
+         level<=v.max_level(); ++level)
+      {
+        unsigned int n = mg_dof.n_dofs (level);
+        v[level].reinit(n);
+      }
+
+  }
+
+  /**
+   * Adjust vectors on all levels to correct size.  Here, we just count the
+   * numbers of degrees of freedom on each level and @p reinit each level
+   * vector to this length. The target_component is handed to
+   * MGTools::count_dofs_per_block. See for documentation there.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  reinit_vector (const dealii::DoFHandler<dim,spacedim> &mg_dof,
+                 std::vector<unsigned int> target_component,
+                 MGLevelObject<BlockVector<number> > &v)
+  {
+    const unsigned int n_blocks = mg_dof.get_fe().n_blocks();
+    if (target_component.size()==0)
+      {
+        target_component.resize(n_blocks);
+        for (unsigned int i=0; i<n_blocks; ++i)
+          target_component[i] = i;
+      }
+    Assert(target_component.size()==n_blocks,
+           ExcDimensionMismatch(target_component.size(),n_blocks));
+    const unsigned int max_block
+      = *std::max_element (target_component.begin(),
+                           target_component.end());
+    const unsigned int n_target_blocks = max_block + 1;
+
+    std::vector<std::vector<types::global_dof_index> >
+    ndofs(mg_dof.get_triangulation().n_levels(),
+          std::vector<types::global_dof_index>(n_target_blocks));
+    MGTools::count_dofs_per_block (mg_dof, ndofs, target_component);
+
+    for (unsigned int level=v.min_level();
+         level<=v.max_level(); ++level)
+      {
+        v[level].reinit(n_target_blocks);
+        for (unsigned int b=0; b<n_target_blocks; ++b)
+          v[level].block(b).reinit(ndofs[level][b]);
+        v[level].collect_sizes();
+      }
+  }
+
+  /**
+   * Adjust vectors on all levels to correct size.  Here, we just count the
+   * numbers of degrees of freedom on each level and @p reinit each level
+   * vector to this length.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  reinit_vector (const dealii::DoFHandler<dim,spacedim> &mg_dof,
+                 const std::vector<unsigned int> &,
+                 MGLevelObject<parallel::distributed::Vector<number> > &v)
+  {
+    const parallel::Triangulation<dim,spacedim> *tria =
+      (dynamic_cast<const parallel::Triangulation<dim,spacedim>*>
+       (&mg_dof.get_triangulation()));
+
+    for (unsigned int level=v.min_level(); level<=v.max_level(); ++level)
+      {
+        if (v[level].size() != mg_dof.locally_owned_mg_dofs(level).size() ||
+            v[level].local_size() != mg_dof.locally_owned_mg_dofs(level).n_elements())
+          v[level].reinit(mg_dof.locally_owned_mg_dofs(level), tria != 0 ?
+                          tria->get_communicator() : MPI_COMM_SELF);
+        else
+          v[level] = 0.;
+      }
+  }
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * Adjust vectors on all levels to correct size.  Here, we just count the
+   * numbers of degrees of freedom on each level and @p reinit each level
+   * vector to this length.
+   */
+  template <int dim, int spacedim>
+  void
+  reinit_vector (const dealii::DoFHandler<dim,spacedim> &mg_dof,
+                 const std::vector<unsigned int> &,
+                 MGLevelObject<TrilinosWrappers::MPI::Vector> &v)
+  {
+    const dealii::parallel::distributed::Triangulation<dim,spacedim> *tria =
+      (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+       (&mg_dof.get_triangulation()));
+    AssertThrow(tria!=NULL, ExcMessage("multigrid with Trilinos vectors only works with distributed Triangulation!"));
+
+#ifdef DEAL_II_WITH_P4EST
+    for (unsigned int level=v.min_level();
+         level<=v.max_level(); ++level)
+      {
+        v[level].reinit(mg_dof.locally_owned_mg_dofs(level), tria->get_communicator());
+      }
+#else
+    (void)v;
+#endif
+  }
+#endif
+}
+
+
+
+/* ------------------ MGLevelGlobalTransfer<VectorType> ----------------- */
+
+
+namespace internal
+{
+  // generic copy function of two different vectors -> need to access each
+  // individual entry
+  template <typename T, typename V>
+  void copy_vector (const std::vector<std::pair<types::global_dof_index,types::global_dof_index> > &copy_indices,
+                    const T &src,
+                    V &dst)
+  {
+    // we should have i->second == i->first, therefore we can use the same
+    // function for both copying to mg as well as copying from mg
+    for (std::vector<std::pair<types::global_dof_index, types::global_dof_index> >::
+         const_iterator i = copy_indices.begin(); i != copy_indices.end(); ++i)
+      dst(i->first) = src(i->first);
+    dst.compress(VectorOperation::insert);
+  }
+
+  // specialized copy function for the same vector
+  template <typename T>
+  void copy_vector (const std::vector<std::pair<types::global_dof_index,types::global_dof_index> > &,
+                    const T &src,
+                    T &dst)
+  {
+    dst = src;
+  }
+}
+
+
+template <typename VectorType>
+template <int dim, class InVector, int spacedim>
+void
+MGLevelGlobalTransfer<VectorType>::copy_to_mg
+(const DoFHandler<dim,spacedim> &mg_dof_handler,
+ MGLevelObject<VectorType>      &dst,
+ const InVector                 &src) const
+{
+  reinit_vector(mg_dof_handler, component_to_block_map, dst);
+  bool first = true;
+#ifdef DEBUG_OUTPUT
+  std::cout << "copy_to_mg src " << src.l2_norm() << std::endl;
+  MPI_Barrier(MPI_COMM_WORLD);
+#endif
+
+  if (perform_plain_copy)
+    {
+      // if the finest multigrid level covers the whole domain (i.e., no
+      // adaptive refinement) and the numbering of the finest level DoFs and
+      // the global DoFs are the same, we can do a plain copy
+      AssertDimension(dst[dst.max_level()].size(), src.size());
+      internal::copy_vector(copy_indices[dst.max_level()], src, dst[dst.max_level()]);
+
+      // do the initial restriction
+      for (unsigned int level=mg_dof_handler.get_triangulation().n_global_levels()-1; level != 0; )
+        {
+          --level;
+          this->restrict_and_add (level+1, dst[level], dst[level+1]);
+        }
+      return;
+    }
+
+  for (unsigned int level=mg_dof_handler.get_triangulation().n_global_levels(); level != 0;)
+    {
+      --level;
+#ifdef DEBUG_OUTPUT
+      MPI_Barrier(MPI_COMM_WORLD);
+#endif
+
+      typedef std::vector<std::pair<types::global_dof_index, types::global_dof_index> >::const_iterator dof_pair_iterator;
+      VectorType &dst_level = dst[level];
+
+      // first copy local unknowns
+      for (dof_pair_iterator i = copy_indices[level].begin();
+           i != copy_indices[level].end(); ++i)
+        dst_level(i->second) = src(i->first);
+
+      // Do the same for the indices where the global index is local, but the
+      // local index is not
+      for (dof_pair_iterator i = copy_indices_global_mine[level].begin();
+           i != copy_indices_global_mine[level].end(); ++i)
+        dst_level(i->second) = src(i->first);
+
+      dst_level.compress(VectorOperation::insert);
+
+#ifdef DEBUG_OUTPUT
+      MPI_Barrier(MPI_COMM_WORLD);
+      std::cout << "copy_to_mg dst " << level << " " << dst_level.l2_norm() << std::endl;
+#endif
+
+      if (!first)
+        {
+          this->restrict_and_add (level+1, dst[level], dst[level+1]);
+#ifdef DEBUG_OUTPUT
+          std::cout << "copy_to_mg restr&add " << level << " " << dst_level.l2_norm() << std::endl;
+#endif
+        }
+
+      first = false;
+    }
+}
+
+
+
+template <typename VectorType>
+template <int dim, class OutVector, int spacedim>
+void
+MGLevelGlobalTransfer<VectorType>::copy_from_mg
+(const DoFHandler<dim,spacedim>  &mg_dof_handler,
+ OutVector                       &dst,
+ const MGLevelObject<VectorType> &src) const
+{
+  if (perform_plain_copy)
+    {
+      AssertDimension(dst.size(), src[src.max_level()].size());
+      internal::copy_vector(copy_indices[src.max_level()], src[src.max_level()], dst);
+      return;
+    }
+
+  // For non-DG: degrees of freedom in the refinement face may need special
+  // attention, since they belong to the coarse level, but have fine level
+  // basis functions
+  dst = 0;
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_global_levels(); ++level)
+    {
+#ifdef DEBUG_OUTPUT
+      MPI_Barrier(MPI_COMM_WORLD);
+      std::cout << "copy_from_mg src " << level << " " << src[level].l2_norm() << std::endl;
+      MPI_Barrier(MPI_COMM_WORLD);
+#endif
+
+      typedef std::vector<std::pair<types::global_dof_index, types::global_dof_index> >::const_iterator dof_pair_iterator;
+      const VectorType &src_level = src[level];
+
+      // First copy all indices local to this process
+      for (dof_pair_iterator i = copy_indices[level].begin();
+           i != copy_indices[level].end(); ++i)
+        dst(i->first) = src_level(i->second);
+
+      // Do the same for the indices where the level index is local, but the
+      // global index is not
+      for (dof_pair_iterator i = copy_indices_level_mine[level].begin();
+           i != copy_indices_level_mine[level].end(); ++i)
+        dst(i->first) = src_level(i->second);
+
+#ifdef DEBUG_OUTPUT
+      {
+        dst.compress(VectorOperation::insert);
+        MPI_Barrier(MPI_COMM_WORLD);
+        std::cout << "copy_from_mg level=" << level << " " << dst.l2_norm() << std::endl;
+      }
+#endif
+    }
+  dst.compress(VectorOperation::insert);
+#ifdef DEBUG_OUTPUT
+  MPI_Barrier(MPI_COMM_WORLD);
+  std::cout << "copy_from_mg " << dst.l2_norm() << std::endl;
+#endif
+}
+
+
+
+template <typename VectorType>
+template <int dim, class OutVector, int spacedim>
+void
+MGLevelGlobalTransfer<VectorType>::copy_from_mg_add
+(const DoFHandler<dim,spacedim>  &mg_dof_handler,
+ OutVector                       &dst,
+ const MGLevelObject<VectorType> &src) const
+{
+  // For non-DG: degrees of freedom in the refinement face may need special
+  // attention, since they belong to the coarse level, but have fine level
+  // basis functions
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_global_levels(); ++level)
+    {
+      typedef std::vector<std::pair<types::global_dof_index, types::global_dof_index> >::const_iterator dof_pair_iterator;
+      const VectorType &src_level = src[level];
+
+      // First add all indices local to this process
+      for (dof_pair_iterator i = copy_indices[level].begin();
+           i != copy_indices[level].end(); ++i)
+        dst(i->first) += src_level(i->second);
+
+      // Do the same for the indices where the level index is local, but the
+      // global index is not
+      for (dof_pair_iterator i = copy_indices_level_mine[level].begin();
+           i != copy_indices_level_mine[level].end(); ++i)
+        dst(i->first) += src_level(i->second);
+    }
+  dst.compress(VectorOperation::add);
+}
+
+
+
+template <typename VectorType>
+void
+MGLevelGlobalTransfer<VectorType>::
+set_component_to_block_map (const std::vector<unsigned int> &map)
+{
+  component_to_block_map = map;
+}
+
+
+
+/* --------- MGLevelGlobalTransfer<parallel::distributed::Vector> ------- */
+
+template <typename Number>
+template <int dim, typename Number2, int spacedim>
+void
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::copy_to_mg
+(const DoFHandler<dim,spacedim>                        &mg_dof_handler,
+ MGLevelObject<parallel::distributed::Vector<Number> > &dst,
+ const parallel::distributed::Vector<Number2>          &src) const
+{
+  reinit_vector(mg_dof_handler, component_to_block_map, dst);
+  bool first = true;
+
+  if (perform_plain_copy)
+    {
+      // In this case, we can simply copy the local range (in parallel by
+      // VectorView)
+      AssertDimension(dst[dst.max_level()].local_size(), src.local_size());
+      VectorView<Number>  dst_view (src.local_size(), dst[dst.max_level()].begin());
+      VectorView<Number2> src_view (src.local_size(), src.begin());
+      static_cast<Vector<Number> &>(dst_view) = static_cast<Vector<Number2> &>(src_view);
+
+      // do the initial restriction
+      for (unsigned int level=mg_dof_handler.get_triangulation().n_global_levels()-1; level != 0; )
+        {
+          --level;
+          this->restrict_and_add (level+1, dst[level], dst[level+1]);
+        }
+      return;
+    }
+
+  // the ghosted vector should already have the correct local size (but
+  // different parallel layout)
+  AssertDimension(ghosted_global_vector.local_size(), src.local_size());
+
+  // copy the source vector to the temporary vector that we hold for the
+  // purpose of data exchange
+  ghosted_global_vector = src;
+  ghosted_global_vector.update_ghost_values();
+
+  for (unsigned int level=mg_dof_handler.get_triangulation().n_global_levels(); level != 0;)
+    {
+      --level;
+
+      typedef std::vector<std::pair<unsigned int, unsigned int> >::const_iterator dof_pair_iterator;
+      parallel::distributed::Vector<Number> &dst_level = dst[level];
+
+      // first copy local unknowns
+      for (dof_pair_iterator i = copy_indices[level].begin();
+           i != copy_indices[level].end(); ++i)
+        dst_level.local_element(i->second) = ghosted_global_vector.local_element(i->first);
+
+      // Do the same for the indices where the level index is local, but the
+      // global index is not
+      for (dof_pair_iterator i = copy_indices_level_mine[level].begin();
+           i != copy_indices_level_mine[level].end(); ++i)
+        dst_level.local_element(i->second) = ghosted_global_vector.local_element(i->first);
+
+      dst_level.compress(VectorOperation::insert);
+
+      if (!first)
+        {
+          this->restrict_and_add (level+1, dst_level, dst[level+1]);
+        }
+
+      first = false;
+    }
+}
+
+
+
+template <typename Number>
+template <int dim, typename Number2, int spacedim>
+void
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::copy_from_mg
+(const DoFHandler<dim,spacedim>                              &mg_dof_handler,
+ parallel::distributed::Vector<Number2>                      &dst,
+ const MGLevelObject<parallel::distributed::Vector<Number> > &src) const
+{
+  if (perform_plain_copy)
+    {
+      // In this case, we can simply copy the local range (in parallel by
+      // VectorView). To avoid having stray data in ghost entries of the
+      // destination, make sure to clear them here.
+      dst.zero_out_ghosts();
+      AssertDimension(dst.local_size(), src[src.max_level()].local_size());
+      VectorView<Number2> dst_view (dst.local_size(), dst.begin());
+      VectorView<Number>  src_view (dst.local_size(), src[src.max_level()].begin());
+      static_cast<Vector<Number2> &>(dst_view) = static_cast<Vector<Number> &>(src_view);
+      return;
+    }
+
+  // For non-DG: degrees of freedom in the refinement face may need special
+  // attention, since they belong to the coarse level, but have fine level
+  // basis functions
+
+  dst = 0;
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_global_levels(); ++level)
+    {
+      typedef std::vector<std::pair<unsigned int, unsigned int> >::const_iterator dof_pair_iterator;
+
+      // the ghosted vector should already have the correct local size (but
+      // different parallel layout)
+      AssertDimension(ghosted_level_vector[level].local_size(),
+                      src[level].local_size());
+
+      // the first time around, we copy the source vector to the temporary
+      // vector that we hold for the purpose of data exchange
+      parallel::distributed::Vector<Number> &ghosted_vector =
+        ghosted_level_vector[level];
+      ghosted_vector = src[level];
+      ghosted_vector.update_ghost_values();
+
+      // first copy local unknowns
+      for (dof_pair_iterator i = copy_indices[level].begin();
+           i != copy_indices[level].end(); ++i)
+        dst.local_element(i->first) = ghosted_vector.local_element(i->second);
+
+      // Do the same for the indices where the level index is local, but the
+      // global index is not
+      for (dof_pair_iterator i = copy_indices_global_mine[level].begin();
+           i != copy_indices_global_mine[level].end(); ++i)
+        dst.local_element(i->first) = ghosted_vector.local_element(i->second);
+    }
+  dst.compress(VectorOperation::insert);
+}
+
+
+
+template <typename Number>
+template <int dim, typename Number2, int spacedim>
+void
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::copy_from_mg_add
+(const DoFHandler<dim,spacedim>                              &mg_dof_handler,
+ parallel::distributed::Vector<Number2>                      &dst,
+ const MGLevelObject<parallel::distributed::Vector<Number> > &src) const
+{
+  // For non-DG: degrees of freedom in the refinement face may need special
+  // attention, since they belong to the coarse level, but have fine level
+  // basis functions
+
+  dst.zero_out_ghosts();
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_global_levels(); ++level)
+    {
+      typedef std::vector<std::pair<unsigned int, unsigned int> >::const_iterator dof_pair_iterator;
+
+      // the ghosted vector should already have the correct local size (but
+      // different parallel layout)
+      AssertDimension(ghosted_level_vector[level].local_size(),
+                      src[level].local_size());
+
+      // the first time around, we copy the source vector to the temporary
+      // vector that we hold for the purpose of data exchange
+      parallel::distributed::Vector<Number> &ghosted_vector =
+        ghosted_level_vector[level];
+      ghosted_vector = src[level];
+      ghosted_vector.update_ghost_values();
+
+      // first add local unknowns
+      for (dof_pair_iterator i= copy_indices[level].begin();
+           i != copy_indices[level].end(); ++i)
+        dst.local_element(i->first) += ghosted_vector.local_element(i->second);
+
+      // Do the same for the indices where the level index is local, but the
+      // global index is not
+      for (dof_pair_iterator i= copy_indices_global_mine[level].begin();
+           i != copy_indices_global_mine[level].end(); ++i)
+        dst.local_element(i->first) += ghosted_vector.local_element(i->second);
+    }
+  dst.compress(VectorOperation::add);
+}
+
+
+
+template <typename Number>
+void
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::
+set_component_to_block_map (const std::vector<unsigned int> &map)
+{
+  component_to_block_map = map;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_transfer_block.h b/include/deal.II/multigrid/mg_transfer_block.h
new file mode 100644
index 0000000..b992cf9
--- /dev/null
+++ b/include/deal.II/multigrid/mg_transfer_block.h
@@ -0,0 +1,461 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_transfer_block_h
+#define dealii__mg_transfer_block_h
+
+#include <deal.II/base/config.h>
+
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/block_matrix_array.h>
+
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/multigrid/mg_constrained_dofs.h>
+#include <deal.II/base/mg_level_object.h>
+
+
+
+#include <deal.II/dofs/dof_handler.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, int spacedim> class DoFHandler;
+
+/*
+ * MGTransferBase is defined in mg_base.h
+ */
+
+/*!@addtogroup mg */
+/*@{*/
+
+/**
+ * Implementation of matrix generation for MGTransferBlock.
+ *
+ * This is the base class for MGTransfer objects for systems of equations
+ * where multigrid is applied only to one or some blocks, where a
+ * @ref GlossBlock
+ * comprises all degrees of freedom generated by one base element.
+ *
+ * @author Guido Kanschat, 2001-2003
+ */
+class MGTransferBlockBase
+{
+public:
+  /**
+   * Constructor without constraint matrices. Use this constructor only with
+   * discontinuous finite elements or with no local refinement.
+   */
+  MGTransferBlockBase ();
+  /**
+   * Constructor with constraint matrices as well as mg_constrained_dofs.
+   */
+  MGTransferBlockBase (const ConstraintMatrix &constraints,
+                       const MGConstrainedDoFs &mg_constrained_dofs);
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+protected:
+  /**
+   * Actually build the prolongation matrices for each level.
+   *
+   * This function is only called by derived classes. These can also set the
+   * member variables #selected and others to restrict the transfer matrices
+   * to certain blocks.
+   */
+  template <int dim, int spacedim>
+  void build_matrices (const DoFHandler<dim,spacedim> &dof,
+                       const DoFHandler<dim,spacedim> &mg_dof);
+
+  /**
+   * Flag of selected blocks.
+   *
+   * The transfer operators only act on the blocks having a <tt>true</tt>
+   * entry here.
+   */
+//TODO: rename this to block_mask, in the same way as has already been done in MGTransferComponent, and give it type BlockMask
+  std::vector<bool> selected;
+
+  /**
+   * Number of blocks of multigrid vector.
+   */
+  unsigned int n_mg_blocks;
+
+  /**
+   * For each block of the whole block vector, list to what block of the
+   * multigrid vector it is mapped. Since depending on #selected, there may be
+   * fewer multilevel blocks than original blocks, some of the entries may be
+   * illegal unsigned integers.
+   */
+//TODO: rename this to mg_block_mask, in the same way as has already been done in MGTransferComponent, and give it type BlockMask
+  std::vector<unsigned int> mg_block;
+
+  /**
+   * Sizes of the multi-level vectors.
+   */
+  mutable std::vector<std::vector<types::global_dof_index> > sizes;
+
+  /**
+   * Start index of each block.
+   */
+  std::vector<types::global_dof_index> block_start;
+
+  /**
+   * Start index of each block on all levels.
+   */
+  std::vector<std::vector<types::global_dof_index> > mg_block_start;
+
+  /**
+   * Call build_matrices() function first.
+   */
+  DeclException0(ExcMatricesNotBuilt);
+
+private:
+  std::vector<std_cxx11::shared_ptr<BlockSparsityPattern> >   prolongation_sparsities;
+
+protected:
+
+  /**
+   * The actual prolongation matrix. column indices belong to the dof indices
+   * of the mother cell, i.e. the coarse level. while row indices belong to
+   * the child cell, i.e. the fine level.
+   */
+  std::vector<std_cxx11::shared_ptr<BlockSparseMatrix<double> > > prolongation_matrices;
+
+  /**
+   * Mapping for the <tt>copy_to/from_mg</tt>-functions. The indices into this
+   * vector are (in this order): global block number, level number. The data
+   * is first the global index inside the block, then the level index inside
+   * the block.
+   */
+  std::vector<std::vector<std::vector<std::pair<unsigned int, unsigned int> > > >
+  copy_indices;
+  /**
+   * The constraints of the global system.
+   */
+  SmartPointer<const ConstraintMatrix, MGTransferBlockBase> constraints;
+  /**
+   * The mg_constrained_dofs of the level systems.
+   */
+
+  SmartPointer<const MGConstrainedDoFs, MGTransferBlockBase> mg_constrained_dofs;
+};
+
+/**
+ * Implementation of the MGTransferBase interface for block matrices and block
+ * vectors.
+ *
+ * @warning This class is in an untested state. If you use it and you
+ * encounter problems, please contact Guido Kanschat.
+ *
+ * In addition to the functionality of MGTransferPrebuilt, the operation may
+ * be restricted to certain blocks of the vector.
+ *
+ * If the restricted mode is chosen, block vectors used in the transfer
+ * routines may only have as many blocks as there are @p trues in the
+ * selected-field.
+ *
+ * See MGTransferBase to find out which of the transfer classes is best for
+ * your needs.
+ *
+ * @author Guido Kanschat, 2001, 2002
+ */
+template <typename number>
+class MGTransferBlock : public MGTransferBase<BlockVector<number> >,
+  private MGTransferBlockBase
+{
+public:
+  /**
+   * Default constructor.
+   */
+  MGTransferBlock();
+
+  /**
+   * Destructor.
+   */
+  virtual ~MGTransferBlock ();
+
+  /**
+   * Initialize additional #factors and #memory if the restriction of the
+   * blocks is to be weighted differently.
+   */
+  void initialize (const std::vector<number> &factors,
+                   VectorMemory<Vector<number> > &memory);
+
+  /**
+   * Build the prolongation matrices for each level.
+   *
+   * This function is a front-end for the same function in
+   * MGTransferBlockBase.
+   */
+  template<int dim, int spacedim>
+  void build_matrices (const DoFHandler<dim,spacedim> &dof,
+                       const DoFHandler<dim,spacedim> &mg_dof,
+                       const std::vector<bool> &selected);
+
+  virtual void prolongate (const unsigned int    to_level,
+                           BlockVector<number>       &dst,
+                           const BlockVector<number> &src) const;
+
+  virtual void restrict_and_add (const unsigned int    from_level,
+                                 BlockVector<number>       &dst,
+                                 const BlockVector<number> &src) const;
+
+  /**
+   * Transfer from a vector on the global grid to a multilevel vector.
+   *
+   * The action for discontinuous elements is as follows: on an active mesh
+   * cell, the global vector entries are simply copied to the corresponding
+   * entries of the level vector. Then, these values are restricted down to
+   * the coarsest level.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_to_mg (const DoFHandler<dim,spacedim> &mg_dof,
+              MGLevelObject<BlockVector<number> > &dst,
+              const BlockVector<number2> &src) const;
+
+  /**
+   * Transfer from multi-level vector to normal vector.
+   *
+   * Copies data from active portions of a multilevel vector into the
+   * respective positions of a global vector.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg (const DoFHandler<dim,spacedim> &mg_dof,
+                BlockVector<number2> &dst,
+                const MGLevelObject<BlockVector<number> > &src) const;
+
+  /**
+   * Add a multi-level vector to a normal vector.
+   *
+   * Works as the previous function, but probably not for continuous elements.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg_add (const DoFHandler<dim,spacedim> &mg_dof,
+                    BlockVector<number2> &dst,
+                    const MGLevelObject<BlockVector<number> > &src) const;
+
+  using MGTransferBlockBase::memory_consumption;
+
+private:
+  /**
+   * Optional multiplication factors for each block. Requires initialization
+   * of #memory.
+   */
+  std::vector<number> factors;
+
+  /**
+   * Memory pool required if additional multiplication using #factors is
+   * desired.
+   */
+  SmartPointer<VectorMemory<Vector<number> >,MGTransferBlock<number> > memory;
+};
+
+
+//TODO:[GK] Update documentation for copy_* functions
+
+/**
+ * Implementation of the MGTransferBase interface for block matrices and
+ * simple vectors. This class uses MGTransferBlockBase selecting a single
+ * block. The intergrid transfer operators are implemented for Vector objects,
+ * The copy functions between regular and multigrid vectors for Vector and
+ * BlockVector.
+ *
+ * See MGTransferBase to find out which of the transfer classes is best for
+ * your needs.
+ *
+ * @author Guido Kanschat, 2001, 2002, 2003
+ */
+template <typename number>
+class MGTransferBlockSelect : public MGTransferBase<Vector<number> >,
+  private MGTransferBlockBase
+{
+public:
+  /**
+   * Constructor without constraint matrices. Use this constructor only with
+   * discontinuous finite elements or with no local refinement.
+   */
+  MGTransferBlockSelect ();
+  /**
+   * Constructor with constraint matrices as well as mg_constrained_dofs.
+   */
+  MGTransferBlockSelect (const ConstraintMatrix &constraints,
+                         const MGConstrainedDoFs &mg_constrained_dofs);
+  /**
+   * Destructor.
+   */
+  virtual ~MGTransferBlockSelect ();
+
+  /**
+   * Actually build the prolongation matrices for grouped blocks.
+   *
+   * This function is a front-end for the same function in
+   * MGTransferBlockBase.
+   *
+   * @arg selected: Number of the block of the global vector to be copied from
+   * and to the multilevel vector.
+   *
+   * @arg mg_selected: Number of the component for which the transfer matrices
+   * should be built.
+   */
+  template<int dim, int spacedim>
+  void build_matrices (const DoFHandler<dim,spacedim> &dof,
+                       const DoFHandler<dim,spacedim> &mg_dof,
+                       unsigned int selected);
+
+  /**
+   * Change selected block. Handle with care!
+   */
+  void select (const unsigned int block);
+
+  virtual void prolongate (const unsigned int    to_level,
+                           Vector<number>       &dst,
+                           const Vector<number> &src) const;
+
+  virtual void restrict_and_add (const unsigned int    from_level,
+                                 Vector<number>       &dst,
+                                 const Vector<number> &src) const;
+
+  /**
+   * Transfer a single block from a vector on the global grid to a multilevel
+   * vector.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_to_mg (const DoFHandler<dim,spacedim>        &mg_dof,
+              MGLevelObject<Vector<number> > &dst,
+              const Vector<number2>          &src) const;
+
+  /**
+   * Transfer from multilevel vector to normal vector.
+   *
+   * Copies data from active portions of an multilevel vector into the
+   * respective positions of a Vector.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg (const DoFHandler<dim,spacedim>              &mg_dof,
+                Vector<number2>                      &dst,
+                const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Add a multi-level vector to a normal vector.
+   *
+   * Works as the previous function, but probably not for continuous elements.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg_add (const DoFHandler<dim,spacedim>              &mg_dof,
+                    Vector<number2>                      &dst,
+                    const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Transfer a block from a vector on the global grid to multilevel vectors.
+   * Only the block selected is transfered.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_to_mg (const DoFHandler<dim,spacedim>        &mg_dof,
+              MGLevelObject<Vector<number> > &dst,
+              const BlockVector<number2>     &src) const;
+
+  /**
+   * Transfer from multilevel vector to normal vector.
+   *
+   * Copies data from active portions of a multilevel vector into the
+   * respective positions of a global BlockVector.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg (const DoFHandler<dim,spacedim>              &mg_dof,
+                BlockVector<number2>                 &dst,
+                const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Add a multi-level vector to a normal vector.
+   *
+   * Works as the previous function, but probably not for continuous elements.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg_add (const DoFHandler<dim,spacedim>              &mg_dof,
+                    BlockVector<number2>                 &dst,
+                    const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Implementation of the public function.
+   */
+  template <int dim, class OutVector, int spacedim>
+  void
+  do_copy_from_mg (const DoFHandler<dim,spacedim>              &mg_dof,
+                   OutVector                            &dst,
+                   const MGLevelObject<Vector<number> > &src,
+                   const unsigned int offset) const;
+
+  /**
+   * Implementation of the public function.
+   */
+  template <int dim, class OutVector, int spacedim>
+  void
+  do_copy_from_mg_add (const DoFHandler<dim,spacedim>              &mg_dof,
+                       OutVector                            &dst,
+                       const MGLevelObject<Vector<number> > &src,
+                       const unsigned int offset) const;
+
+  /**
+   * Actual implementation of copy_to_mg().
+   */
+  template <int dim, class InVector, int spacedim>
+  void
+  do_copy_to_mg (const DoFHandler<dim,spacedim>        &mg_dof,
+                 MGLevelObject<Vector<number> > &dst,
+                 const InVector                 &src,
+                 const unsigned int              offset) const;
+  /**
+   * Selected block.
+   */
+  unsigned int selected_block;
+};
+
+/*@}*/
+
+//----------inline function definition--------------------------------------------------
+template <typename number>
+inline void
+MGTransferBlockSelect<number>::select(const unsigned int block)
+{
+  selected_block = block;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_transfer_block.templates.h b/include/deal.II/multigrid/mg_transfer_block.templates.h
new file mode 100644
index 0000000..2745fc9
--- /dev/null
+++ b/include/deal.II/multigrid/mg_transfer_block.templates.h
@@ -0,0 +1,151 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mg_transfer_block_templates_h
+#define dealii__mg_transfer_block_templates_h
+
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_transfer_block.h>
+
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+/* --------------------- MGTransferBlockSelect -------------- */
+
+// Simplify some things below
+typedef std::vector<std::pair<unsigned int, unsigned int> >::const_iterator IT;
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlockSelect<number>::copy_from_mg (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  BlockVector<number2>                 &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_levels(); ++level)
+    for (IT i= copy_indices[selected_block][level].begin();
+         i != copy_indices[selected_block][level].end(); ++i)
+      dst.block(selected_block)(i->first) = src[level](i->second);
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlockSelect<number>::copy_from_mg (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  Vector<number2>                      &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_levels(); ++level)
+    for (IT i= copy_indices[selected_block][level].begin();
+         i != copy_indices[selected_block][level].end(); ++i)
+      dst(i->first) = src[level](i->second);
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlockSelect<number>::copy_from_mg_add (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  BlockVector<number2>                 &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_levels(); ++level)
+    for (IT i= copy_indices[selected_block][level].begin();
+         i != copy_indices[selected_block][level].end(); ++i)
+      dst.block(selected_block)(i->first) += src[level](i->second);
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlockSelect<number>::copy_from_mg_add (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  Vector<number2>                      &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_levels(); ++level)
+    for (IT i= copy_indices[selected_block][level].begin();
+         i != copy_indices[selected_block][level].end(); ++i)
+      dst(i->first) += src[level](i->second);
+}
+
+
+
+template <typename number>
+std::size_t
+MGTransferBlockSelect<number>::memory_consumption () const
+{
+  return sizeof(int) + MGTransferBlockBase::memory_consumption();
+}
+
+
+/* --------------------- MGTransferBlock -------------- */
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlock<number>::copy_from_mg (
+  const DoFHandler<dim,spacedim> &mg_dof_handler,
+  BlockVector<number2> &dst,
+  const MGLevelObject<BlockVector<number> > &src) const
+{
+  for (unsigned int block=0; block<selected.size(); ++block)
+    if (selected[block])
+      for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_levels(); ++level)
+        for (IT i= copy_indices[block][level].begin();
+             i != copy_indices[block][level].end(); ++i)
+          dst.block(block)(i->first) = src[level].block(mg_block[block])(i->second);
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlock<number>::copy_from_mg_add (
+  const DoFHandler<dim,spacedim> &mg_dof_handler,
+  BlockVector<number2> &dst,
+  const MGLevelObject<BlockVector<number> > &src) const
+{
+  for (unsigned int block=0; block<selected.size(); ++block)
+    if (selected[block])
+      for (unsigned int level=0; level<mg_dof_handler.get_triangulation().n_levels(); ++level)
+        for (IT i= copy_indices[block][level].begin();
+             i != copy_indices[block][level].end(); ++i)
+          dst.block(block)(i->first) += src[level].block(mg_block[block])(i->second);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_transfer_component.h b/include/deal.II/multigrid/mg_transfer_component.h
new file mode 100644
index 0000000..195bfe7
--- /dev/null
+++ b/include/deal.II/multigrid/mg_transfer_component.h
@@ -0,0 +1,378 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_transfer_component_h
+#define dealii__mg_transfer_component_h
+
+#include <deal.II/base/config.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/vector_memory.h>
+
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/fe/component_mask.h>
+#include <deal.II/multigrid/mg_base.h>
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, int spacedim> class DoFHandler;
+
+/*
+ * MGTransferBase is defined in mg_base.h
+ */
+
+/*!@addtogroup mg */
+/*@{*/
+
+/**
+ * Implementation of matrix generation for component wise multigrid transfer.
+ *
+ * @note MGTransferBlockBase is probably the more logical class. Still
+ * eventually, a class should be developed allowing to select multiple
+ * components.
+ *
+ * @author Guido Kanschat, 2001-2003
+ */
+class MGTransferComponentBase
+{
+public:
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+
+protected:
+  /**
+   * Actually build the prolongation matrices for each level.
+   *
+   * This function is only called by derived classes. These can also set the
+   * member variables <code>selected_component</code> and
+   * <code>mg_selected_component</code> member variables to restrict the
+   * transfer matrices to certain components. Furthermore, they use
+   * <code>target_component</code> and <code>mg_target_component</code> for
+   * re-ordering and grouping of components.
+   */
+  template <int dim, int spacedim>
+  void build_matrices (const DoFHandler<dim,spacedim> &dof,
+                       const DoFHandler<dim,spacedim> &mg_dof);
+
+  /**
+   * Flag of selected components.
+   *
+   * The transfer operators only act on the components having a <tt>true</tt>
+   * entry here. If renumbering by #target_component is used, this refers to
+   * the <b>renumbered</b> components.
+   */
+  ComponentMask component_mask;
+
+  /**
+   * Flag of selected components.
+   *
+   * The transfer operators only act on the components having a <tt>true</tt>
+   * entry here. If renumbering by #mg_target_component is used, this refers
+   * to the <b>renumbered</b> components.
+   */
+  ComponentMask mg_component_mask;
+
+  /**
+   * Target component of the fine-level vector if renumbering is required.
+   */
+  std::vector<unsigned int> target_component;
+
+  /**
+   * Target component if renumbering of level vectors is required.
+   */
+  std::vector<unsigned int> mg_target_component;
+
+  /**
+   * Sizes of the multi-level vectors.
+   */
+  mutable std::vector<std::vector<types::global_dof_index> > sizes;
+
+  /**
+   * Start index of each component.
+   */
+  std::vector<types::global_dof_index> component_start;
+
+  /**
+   * Start index of each component on all levels.
+   */
+  std::vector<std::vector<types::global_dof_index> > mg_component_start;
+
+  /**
+   * Call build_matrices() function first.
+   */
+  DeclException0(ExcMatricesNotBuilt);
+
+private:
+  std::vector<std_cxx11::shared_ptr<BlockSparsityPattern> >   prolongation_sparsities;
+
+protected:
+
+  /**
+   * The actual prolongation matrix. column indices belong to the dof indices
+   * of the mother cell, i.e. the coarse level. while row indices belong to
+   * the child cell, i.e. the fine level.
+   */
+  std::vector<std_cxx11::shared_ptr<BlockSparseMatrix<double> > > prolongation_matrices;
+
+  /**
+   * Holds the mapping for the <tt>copy_to/from_mg</tt>-functions. The data is
+   * first the global index, then the level index.
+   */
+  std::vector<std::vector<std::pair<types::global_dof_index, unsigned int> > >
+  copy_to_and_from_indices;
+
+  /**
+   * Store the boundary_indices. These are needed for the boundary values in
+   * the restriction matrix.
+   */
+  std::vector<std::set<types::global_dof_index> > boundary_indices;
+};
+
+//TODO:[GK] Update documentation for copy_* functions
+
+//TODO: Use same kind of template argument as MGTransferSelect
+
+/**
+ * Implementation of the MGTransferBase interface for block matrices and
+ * simple vectors. This class uses MGTransferComponentBase selecting a single
+ * component or grouping several components into a single block. The transfer
+ * operators themselves are implemented for Vector and BlockVector objects.
+ *
+ * See MGTransferBase to find out which of the transfer classes is best for
+ * your needs.
+ *
+ * @author Guido Kanschat, 2001, 2002, 2003
+ */
+template <typename number>
+class MGTransferSelect : public MGTransferBase<Vector<number> >,
+  private MGTransferComponentBase
+{
+public:
+  /**
+   * Constructor without constraint matrices. Use this constructor only with
+   * discontinuous finite elements or with no local refinement.
+   */
+  MGTransferSelect ();
+
+  /**
+   * Constructor with constraint matrices.
+   */
+  MGTransferSelect (const ConstraintMatrix &constraints);
+
+  /**
+   * Destructor.
+   */
+  virtual ~MGTransferSelect ();
+
+//TODO: rewrite docs; make sure defaulted args are actually allowed
+  /**
+   * Actually build the prolongation matrices for grouped components.
+   *
+   * This function is a front-end for the same function in
+   * MGTransferComponentBase.
+   *
+   * @arg selected: Number of the block of the global vector to be copied from
+   * and to the multilevel vector. This number refers to the renumbering by
+   * <tt>target_component</tt>.
+   *
+   * @arg mg_selected: Number of the block for which the transfer matrices
+   * should be built.
+   *
+   * If <tt>mg_target_component</tt> is present, this refers to the renumbered
+   * components.
+   *
+   * @arg target_component: this argument allows grouping and renumbering of
+   * components in the fine-level vector (see DoFRenumbering::component_wise).
+   *
+   * @arg mg_target_component: this argument allows grouping and renumbering
+   * of components in the level vectors (see DoFRenumbering::component_wise).
+   * It also affects the behavior of the <tt>selected</tt> argument
+   *
+   * @arg boundary_indices: holds the boundary indices on each level.
+   */
+  template <int dim, int spacedim>
+  void build_matrices (const DoFHandler<dim,spacedim> &dof,
+                       const DoFHandler<dim,spacedim> &mg_dof,
+                       unsigned int selected,
+                       unsigned int mg_selected,
+                       const std::vector<unsigned int> &target_component
+                       = std::vector<unsigned int>(),
+                       const std::vector<unsigned int> &mg_target_component
+                       = std::vector<unsigned int>(),
+                       const std::vector<std::set<types::global_dof_index> > &boundary_indices
+                       = std::vector<std::set<types::global_dof_index> >()
+                      );
+
+  /**
+   * Change selected component. Handle with care!
+   */
+  void select (const unsigned int component,
+               const unsigned int mg_component = numbers::invalid_unsigned_int);
+
+  virtual void prolongate (const unsigned int    to_level,
+                           Vector<number>       &dst,
+                           const Vector<number> &src) const;
+
+  virtual void restrict_and_add (const unsigned int    from_level,
+                                 Vector<number>       &dst,
+                                 const Vector<number> &src) const;
+
+  /**
+   * Transfer from a vector on the global grid to a multilevel vector.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_to_mg (const DoFHandler<dim,spacedim>        &mg_dof,
+              MGLevelObject<Vector<number> > &dst,
+              const Vector<number2>          &src) const;
+
+  /**
+   * Transfer from multilevel vector to normal vector.
+   *
+   * Copies data from active portions of an multilevel vector into the
+   * respective positions of a Vector.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg (const DoFHandler<dim,spacedim>              &mg_dof,
+                Vector<number2>                      &dst,
+                const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Add a multi-level vector to a normal vector.
+   *
+   * Works as the previous function, but probably not for continuous elements.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg_add (const DoFHandler<dim,spacedim>              &mg_dof,
+                    Vector<number2>                      &dst,
+                    const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Transfer from a vector on the global grid to multilevel vectors.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_to_mg (const DoFHandler<dim,spacedim>        &mg_dof,
+              MGLevelObject<Vector<number> > &dst,
+              const BlockVector<number2>     &src) const;
+
+  /**
+   * Transfer from multilevel vector to normal vector.
+   *
+   * Copies data from active portions of a multilevel vector into the
+   * respective positions of a global BlockVector.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg (const DoFHandler<dim,spacedim>              &mg_dof,
+                BlockVector<number2>                 &dst,
+                const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Add a multi-level vector to a normal vector.
+   *
+   * Works as the previous function, but probably not for continuous elements.
+   */
+  template <int dim, typename number2, int spacedim>
+  void
+  copy_from_mg_add (const DoFHandler<dim,spacedim>              &mg_dof,
+                    BlockVector<number2>                 &dst,
+                    const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+  /**
+   * Implementation of the public function.
+   */
+  template <int dim, class OutVector, int spacedim>
+  void
+  do_copy_from_mg (const DoFHandler<dim,spacedim>              &mg_dof,
+                   OutVector                            &dst,
+                   const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Implementation of the public function.
+   */
+  template <int dim, class OutVector, int spacedim>
+  void
+  do_copy_from_mg_add (const DoFHandler<dim,spacedim>              &mg_dof,
+                       OutVector                            &dst,
+                       const MGLevelObject<Vector<number> > &src) const;
+
+  /**
+   * Actual implementation of copy_to_mg().
+   */
+  template <int dim, class InVector, int spacedim>
+  void
+  do_copy_to_mg (const DoFHandler<dim,spacedim>        &mg_dof,
+                 MGLevelObject<Vector<number> > &dst,
+                 const InVector                 &src) const;
+  /**
+   * Selected component of global vector.
+   */
+  unsigned int selected_component;
+  /**
+   * Selected component inside multigrid.
+   */
+  unsigned int mg_selected_component;
+
+  /**
+   * The degrees of freedom on the refinement edges. For each level the index
+   * set denotes which level degrees of freedom are on the refinement edge
+   * towards the lower level, excluding boundary dofs.
+   */
+  std::vector<IndexSet> interface_dofs;
+
+  /**
+   * The constraints of the global system.
+   */
+public:
+  SmartPointer<const ConstraintMatrix> constraints;
+};
+
+/*@}*/
+
+//---------------------------------------------------------------------------
+template <typename number>
+inline void
+MGTransferSelect<number>::select(const unsigned int component,
+                                 const unsigned int mg_component)
+{
+  selected_component = component;
+  mg_selected_component = (mg_component == numbers::invalid_unsigned_int)
+                          ? component
+                          : mg_component;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_transfer_component.templates.h b/include/deal.II/multigrid/mg_transfer_component.templates.h
new file mode 100644
index 0000000..6bbfd46
--- /dev/null
+++ b/include/deal.II/multigrid/mg_transfer_component.templates.h
@@ -0,0 +1,223 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__mg_transfer_component_templates_h
+#define dealii__mg_transfer_component_templates_h
+
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_transfer_component.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <algorithm>
+#include <sstream>
+#include <fstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+/* --------------------- MGTransferSelect -------------- */
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferSelect<number>::copy_to_mg (
+  const DoFHandler<dim,spacedim>        &mg_dof_handler,
+  MGLevelObject<Vector<number> > &dst,
+  const BlockVector<number2>     &src) const
+{
+  do_copy_to_mg (mg_dof_handler, dst, src.block(target_component[selected_component]));
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferSelect<number>::copy_to_mg (
+  const DoFHandler<dim,spacedim>        &mg_dof_handler,
+  MGLevelObject<Vector<number> > &dst,
+  const Vector<number2>          &src) const
+{
+  do_copy_to_mg (mg_dof_handler, dst, src);
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferSelect<number>::copy_from_mg (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  BlockVector<number2>                 &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  dst = 0;
+  do_copy_from_mg (mg_dof_handler,
+                   dst.block(target_component[selected_component]), src);
+  if (constraints != 0)
+    constraints->condense(dst);
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferSelect<number>::copy_from_mg (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  Vector<number2>                      &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  dst = 0;
+  do_copy_from_mg (mg_dof_handler, dst, src);
+  if (constraints != 0)
+    {
+      //If we were given constraints
+      //apply them to the dst that goes
+      //back now to the linear solver.
+      //Since constraints are globally
+      //defined create a global vector here
+      //and copy dst to the right component,
+      //apply the constraints then and copy
+      //the block back to dst.
+      const unsigned int n_blocks =
+        *std::max_element(target_component.begin(), target_component.end()) + 1;
+      std::vector<types::global_dof_index> dofs_per_block (n_blocks);
+      DoFTools::count_dofs_per_block (mg_dof_handler, dofs_per_block, target_component);
+      BlockVector<number> tmp;
+      tmp.reinit(n_blocks);
+      for (unsigned int b=0; b<n_blocks; ++b)
+        tmp.block(b).reinit(dofs_per_block[b]);
+      tmp.collect_sizes ();
+      tmp.block(target_component[selected_component]) = dst;
+      constraints->condense(tmp);
+      dst = tmp.block(target_component[selected_component]);
+    }
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferSelect<number>::copy_from_mg_add (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  BlockVector<number2>                 &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  do_copy_from_mg_add (mg_dof_handler, dst, src);
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferSelect<number>::copy_from_mg_add (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  Vector<number2>                      &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  do_copy_from_mg_add (mg_dof_handler, dst, src);
+}
+
+
+
+template <typename number>
+template <int dim, class OutVector, int spacedim>
+void
+MGTransferSelect<number>::do_copy_from_mg (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  OutVector                            &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  typename DoFHandler<dim,spacedim>::active_cell_iterator
+  level_cell = mg_dof_handler.begin_active();
+  const typename DoFHandler<dim,spacedim>::active_cell_iterator
+  endc = mg_dof_handler.end();
+
+  // traverse all cells and copy the
+  // data appropriately to the output
+  // vector
+
+  // Note that the level is increasing monotonically
+  dst = 0;
+  for (; level_cell != endc; ++level_cell)
+    {
+      const unsigned int level = level_cell->level();
+      typedef std::vector<std::pair<types::global_dof_index, unsigned int> >::const_iterator IT;
+      for (IT i=copy_to_and_from_indices[level].begin();
+           i != copy_to_and_from_indices[level].end(); ++i)
+        dst(i->first) = src[level](i->second);
+    }
+}
+
+
+template <typename number>
+template <int dim, class OutVector, int spacedim>
+void
+MGTransferSelect<number>::do_copy_from_mg_add (
+  const DoFHandler<dim,spacedim>              &mg_dof_handler,
+  OutVector                            &dst,
+  const MGLevelObject<Vector<number> > &src) const
+{
+  const FiniteElement<dim> &fe = mg_dof_handler.get_fe();
+  const unsigned int dofs_per_cell = fe.dofs_per_cell;
+
+  std::vector<types::global_dof_index> global_dof_indices (dofs_per_cell);
+  std::vector<types::global_dof_index> level_dof_indices (dofs_per_cell);
+
+  typename DoFHandler<dim,spacedim>::active_cell_iterator
+  level_cell = mg_dof_handler.begin_active();
+  const typename DoFHandler<dim,spacedim>::active_cell_iterator
+  endc = mg_dof_handler.end();
+
+  // traverse all cells and copy the
+  // data appropriately to the output
+  // vector
+
+  // Note that the level is increasing monotonically
+  dst = 0;
+  for (; level_cell != endc; ++level_cell)
+    {
+      const unsigned int level = level_cell->level();
+      typedef std::vector<std::pair<types::global_dof_index, unsigned int> >::const_iterator IT;
+      for (IT i=copy_to_and_from_indices[level].begin();
+           i != copy_to_and_from_indices[level].end(); ++i)
+        dst(i->first) += src[level](i->second);
+    }
+}
+
+
+template <typename number>
+std::size_t
+MGTransferSelect<number>::memory_consumption () const
+{
+  return sizeof(int) + MGTransferComponentBase::memory_consumption();
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/mg_transfer_matrix_free.h b/include/deal.II/multigrid/mg_transfer_matrix_free.h
new file mode 100644
index 0000000..a56fd06
--- /dev/null
+++ b/include/deal.II/multigrid/mg_transfer_matrix_free.h
@@ -0,0 +1,237 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2016 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_transfer_matrix_free_h
+#define dealii__mg_transfer_matrix_free_h
+
+#include <deal.II/base/config.h>
+
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/multigrid/mg_constrained_dofs.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/multigrid/mg_transfer.h>
+#include <deal.II/matrix_free/shape_info.h>
+
+#include <deal.II/dofs/dof_handler.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*!@addtogroup mg */
+/*@{*/
+
+/**
+ * Implementation of the MGTransferBase interface for which the transfer
+ * operations is implemented in a matrix-free way based on the interpolation
+ * matrices of the underlying finite element. This requires considerably less
+ * memory than MGTransferPrebuilt and can also be considerably faster than
+ * that variant.
+ *
+ * This class currently only works for tensor-product finite elements based on
+ * FE_Q and FE_DGQ elements, including systems involving multiple components
+ * of one of these elements. Systems with different elements or other elements
+ * are currently not implemented.
+ *
+ * @author Martin Kronbichler
+ * @date 2016
+ */
+template <int dim, typename Number>
+class MGTransferMatrixFree : public MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >
+{
+public:
+  /**
+   * Constructor without constraint matrices. Use this constructor only with
+   * discontinuous finite elements or with no local refinement.
+   */
+  MGTransferMatrixFree ();
+
+  /**
+   * Constructor with constraints. Equivalent to the default constructor
+   * followed by initialize_constraints().
+   */
+  MGTransferMatrixFree (const MGConstrainedDoFs &mg_constrained_dofs);
+
+  /**
+   * Destructor.
+   */
+  virtual ~MGTransferMatrixFree ();
+
+  /**
+   * Initialize the constraints to be used in build().
+   */
+  void initialize_constraints (const MGConstrainedDoFs &mg_constrained_dofs);
+
+  /**
+   * Reset the object to the state it had right after the default constructor.
+   */
+  void clear ();
+
+  /**
+   * Actually build the information for the prolongation for each level.
+   */
+  void build (const DoFHandler<dim,dim> &mg_dof);
+
+  /**
+   * Prolongate a vector from level <tt>to_level-1</tt> to level
+   * <tt>to_level</tt> using the embedding matrices of the underlying finite
+   * element. The previous content of <tt>dst</tt> is overwritten.
+   *
+   * @param src is a vector with as many elements as there are degrees of
+   * freedom on the coarser level involved.
+   *
+   * @param dst has as many elements as there are degrees of freedom on the
+   * finer level.
+   */
+  virtual void prolongate (const unsigned int                           to_level,
+                           parallel::distributed::Vector<Number>       &dst,
+                           const parallel::distributed::Vector<Number> &src) const;
+
+  /**
+   * Restrict a vector from level <tt>from_level</tt> to level
+   * <tt>from_level-1</tt> using the transpose operation of the prolongate()
+   * method. If the region covered by cells on level <tt>from_level</tt> is
+   * smaller than that of level <tt>from_level-1</tt> (local refinement), then
+   * some degrees of freedom in <tt>dst</tt> are active and will not be
+   * altered. For the other degrees of freedom, the result of the restriction
+   * is added.
+   *
+   * @param src is a vector with as many elements as there are degrees of
+   * freedom on the finer level involved.
+   *
+   * @param dst has as many elements as there are degrees of freedom on the
+   * coarser level.
+   */
+  virtual void restrict_and_add (const unsigned int from_level,
+                                 parallel::distributed::Vector<Number>       &dst,
+                                 const parallel::distributed::Vector<Number> &src) const;
+
+  /**
+   * Finite element does not provide prolongation matrices.
+   */
+  DeclException0(ExcNoProlongation);
+
+  /**
+   * Memory used by this object.
+   */
+  std::size_t memory_consumption () const;
+
+private:
+
+  /**
+   * Stores the degree of the finite element contained in the DoFHandler
+   * passed to build(). The selection of the computational kernel is based on
+   * this number.
+   */
+  unsigned int fe_degree;
+
+  /**
+   * Stores whether the element is continuous and there is a joint degree of
+   * freedom in the center of the 1D line.
+   */
+  bool element_is_continuous;
+
+  /**
+   * Stores the number of components in the finite element contained in the
+   * DoFHandler passed to build().
+   */
+  unsigned int n_components;
+
+  /**
+   * Stores the number of degrees of freedom on all child cells. It is
+   * <tt>2<sup>dim</sup>*fe.dofs_per_cell</tt> for DG elements and somewhat
+   * less for continuous elements.
+   */
+  unsigned int n_child_cell_dofs;
+
+  /**
+   * Holds the indices for cells on a given level, extracted from DoFHandler
+   * for fast access. All DoF indices on a given level are stored as a plain
+   * array (since this class assumes constant DoFs per cell). To index into
+   * this array, use the cell number times dofs_per_cell.
+   *
+   * This array first is arranged such that all locally owned level cells come
+   * first (found in the variable n_owned_level_cells) and then other cells
+   * necessary for the transfer to the next level.
+   */
+  std::vector<std::vector<unsigned int> > level_dof_indices;
+
+  /**
+   * Stores the connectivity from parent to child cell numbers for each level.
+   */
+  std::vector<std::vector<std::pair<unsigned int,unsigned int> > > parent_child_connect;
+
+  /**
+   * Stores the number of cells owned on a given process (sets the bounds for
+   * the worker loops) for each level.
+   */
+  std::vector<unsigned int> n_owned_level_cells;
+
+  /**
+   * Holds the one-dimensional embedding (prolongation) matrix from mother
+   * element to the children.
+   */
+  internal::MatrixFreeFunctions::ShapeInfo<Number> shape_info;
+
+  /**
+   * Holds the temporary values for the tensor evaluation
+   */
+  mutable AlignedVector<VectorizedArray<Number> > evaluation_data;
+
+  /**
+   * For continuous elements, restriction is not additive and we need to
+   * weight the result at the end of prolongation (and at the start of
+   * restriction) by the valence of the degrees of freedom, i.e., on how many
+   * elements they appear. We store the data in vectorized form to allow for
+   * cheap access. Moreover, we utilize the fact that we only need to store
+   * <tt>3<sup>dim</sup></tt> indices.
+   *
+   * Data is organized in terms of each level (outer vector) and the cells on
+   * each level (inner vector).
+   */
+  std::vector<AlignedVector<VectorizedArray<Number> > > weights_on_refined;
+
+  /**
+   * Stores the local indices of Dirichlet boundary conditions on cells for
+   * all levels (outer index), the cells within the levels (second index), and
+   * the indices on the cell (inner index).
+   */
+  std::vector<std::vector<std::vector<unsigned short> > > dirichlet_indices;
+
+  /**
+   * Performs templated prolongation operation
+   */
+  template <int degree>
+  void do_prolongate_add(const unsigned int                           to_level,
+                         parallel::distributed::Vector<Number>       &dst,
+                         const parallel::distributed::Vector<Number> &src) const;
+
+  /**
+   * Performs templated restriction operation
+   */
+  template <int degree>
+  void do_restrict_add(const unsigned int                           from_level,
+                       parallel::distributed::Vector<Number>       &dst,
+                       const parallel::distributed::Vector<Number> &src) const;
+};
+
+
+/*@}*/
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/multigrid.h b/include/deal.II/multigrid/multigrid.h
new file mode 100644
index 0000000..7f81b93
--- /dev/null
+++ b/include/deal.II/multigrid/multigrid.h
@@ -0,0 +1,554 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__multigrid_h
+#define dealii__multigrid_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/base/mg_level_object.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*!@addtogroup mg */
+/*@{*/
+
+/**
+ * Implementation of the multigrid method.
+ *
+ * @warning multigrid on locally refined meshes only works with
+ * <b>discontinuous finite elements</b> right now. It is not clear, whether
+ * the paradigm of local smoothing we use is applicable to continuous elements
+ * with hanging nodes; in fact, most people you meet on conferences seem to
+ * deny this.
+ *
+ * The function which starts a multigrid cycle on the finest level is cycle().
+ * Depending on the cycle type chosen with the constructor (see enum Cycle),
+ * this function triggers one of the cycles level_v_step() or level_step(),
+ * where the latter one can do different types of cycles.
+ *
+ * Using this class, it is expected that the right hand side has been
+ * converted from a vector living on the locally finest level to a multilevel
+ * vector. This is a nontrivial operation, usually initiated automatically by
+ * the class PreconditionMG and performed by the classes derived from
+ * MGTransferBase.
+ *
+ * @note The interface of this class is still very clumsy. In particular, you
+ * will have to set up quite a few auxiliary objects before you can use it.
+ * Unfortunately, it seems that this can be avoided only be restricting the
+ * flexibility of this class in an unacceptable way.
+ *
+ * @author Guido Kanschat, 1999 - 2005
+ */
+template <typename VectorType>
+class Multigrid : public Subscriptor
+{
+public:
+  /**
+   * List of implemented cycle types.
+   */
+  enum Cycle
+  {
+    /// The V-cycle
+    v_cycle,
+    /// The W-cycle
+    w_cycle,
+    /// The F-cycle
+    f_cycle
+  };
+
+  typedef VectorType vector_type;
+  typedef const VectorType const_vector_type;
+
+  /**
+   * Constructor. The DoFHandler is used to determine the highest possible
+   * level. <tt>transfer</tt> is an object performing prolongation and
+   * restriction.
+   *
+   * This function already initializes the vectors which will be used later in
+   * the course of the computations. You should therefore create objects of
+   * this type as late as possible.
+   */
+  template <int dim>
+  Multigrid(const DoFHandler<dim>              &mg_dof_handler,
+            const MGMatrixBase<VectorType>     &matrix,
+            const MGCoarseGridBase<VectorType> &coarse,
+            const MGTransferBase<VectorType>   &transfer,
+            const MGSmootherBase<VectorType>   &pre_smooth,
+            const MGSmootherBase<VectorType>   &post_smooth,
+            Cycle                              cycle = v_cycle);
+
+  /**
+   * Experimental constructor for cases in which no DoFHandler is available.
+   *
+   * @warning Not intended for general use.
+   */
+  Multigrid(const unsigned int                 minlevel,
+            const unsigned int                 maxlevel,
+            const MGMatrixBase<VectorType>     &matrix,
+            const MGCoarseGridBase<VectorType> &coarse,
+            const MGTransferBase<VectorType>   &transfer,
+            const MGSmootherBase<VectorType>   &pre_smooth,
+            const MGSmootherBase<VectorType>   &post_smooth,
+            Cycle                              cycle = v_cycle);
+
+  /**
+   * Reinit this class according to #minlevel and #maxlevel.
+   */
+  void reinit (const unsigned int minlevel,
+               const unsigned int maxlevel);
+
+  /**
+   * Execute one multigrid cycle. The type of cycle is selected by the
+   * constructor argument cycle. See the enum Cycle for available types.
+   */
+  void cycle ();
+
+  /**
+   * Execute one step of the V-cycle algorithm.  This function assumes, that
+   * the multilevel vector #defect is filled with the residual of an outer
+   * defect correction scheme. This is usually taken care of by
+   * PreconditionMG). After vcycle(), the result is in the multilevel vector
+   * #solution. See <tt>copy_*_mg</tt> in the MGTools namespace if you want to
+   * use these vectors yourself.
+   *
+   * The actual work for this function is done in level_v_step().
+   */
+  void vcycle ();
+
+  /**
+   * Set additional matrices to correct residual computation at refinement
+   * edges. Since we only smoothen in the interior of the refined part of the
+   * mesh, the coupling across the refinement edge is missing. This coupling
+   * is provided by these two matrices.
+   *
+   * @note While <tt>edge_out.vmult</tt> is used, for the second argument, we
+   * use <tt>edge_in.Tvmult</tt>. Thus, <tt>edge_in</tt> should be assembled
+   * in transposed form. This saves a second sparsity pattern for
+   * <tt>edge_in</tt>. In particular, for symmetric operators, both arguments
+   * can refer to the same matrix, saving assembling of one of them.
+   */
+  void set_edge_matrices (const MGMatrixBase<VectorType> &edge_out,
+                          const MGMatrixBase<VectorType> &edge_in);
+
+  /**
+   * Set additional matrices to correct residual computation at refinement
+   * edges. These matrices originate from discontinuous Galerkin methods (see
+   * FE_DGQ etc.), where they correspond to the edge fluxes at the refinement
+   * edge between two levels.
+   *
+   * @note While <tt>edge_down.vmult</tt> is used, for the second argument, we
+   * use <tt>edge_up.Tvmult</tt>. Thus, <tt>edge_up</tt> should be assembled
+   * in transposed form. This saves a second sparsity pattern for
+   * <tt>edge_up</tt>. In particular, for symmetric operators, both arguments
+   * can refer to the same matrix, saving assembling of one of them.
+   */
+  void set_edge_flux_matrices (const MGMatrixBase<VectorType> &edge_down,
+                               const MGMatrixBase<VectorType> &edge_up);
+
+  /**
+   * Return the finest level for multigrid.
+   */
+  unsigned int get_maxlevel() const;
+
+  /**
+   * Return the coarsest level for multigrid.
+   */
+  unsigned int get_minlevel() const;
+
+  /**
+   * Set the highest level for which the multilevel method is performed. By
+   * default, this is the finest level of the Triangulation; therefore, this
+   * function will only accept arguments smaller than the current #maxlevel
+   * and not smaller than the current #minlevel.
+   */
+  void set_maxlevel (const unsigned int);
+
+  /**
+   * Set the coarsest level for which the multilevel method is performed. By
+   * default, this is zero. Accepted are non-negative values not larger than
+   * the current #maxlevel.
+   *
+   * If <tt>relative</tt> is <tt>true</tt>, then this function determines the
+   * number of levels used, that is, it sets #minlevel to
+   * #maxlevel-<tt>level</tt>.
+   *
+   * @note The mesh on the coarsest level must cover the whole domain. There
+   * may not be hanging nodes on #minlevel.
+   *
+   * @note If #minlevel is set to a nonzero value, do not forget to adjust
+   * your coarse grid solver!
+   */
+  void set_minlevel (const unsigned int level,
+                     bool relative = false);
+
+  /**
+   * Chance #cycle_type used in cycle().
+   */
+  void set_cycle(Cycle);
+
+  /**
+   * Set the debug level. Higher values will create more debugging output
+   * during the multigrid cycles.
+   */
+  void set_debug (const unsigned int);
+
+private:
+
+  /**
+   * The V-cycle multigrid method. <tt>level</tt> is the level the function
+   * starts on. It will usually be called for the highest level from outside,
+   * but will then call itself recursively for <tt>level-1</tt>, unless we are
+   * on #minlevel where the coarse grid solver solves the problem exactly.
+   */
+  void level_v_step (const unsigned int level);
+
+  /**
+   * The actual W-cycle or F-cycle multigrid method. <tt>level</tt> is the
+   * level the function starts on. It will usually be called for the highest
+   * level from outside, but will then call itself recursively for
+   * <tt>level-1</tt>, unless we are on #minlevel where the coarse grid solver
+   * solves the problem exactly.
+   */
+  void level_step (const unsigned int level, Cycle cycle);
+
+  /**
+   * Cycle type performed by the method cycle().
+   */
+  Cycle cycle_type;
+
+  /**
+   * Level for coarse grid solution.
+   */
+  unsigned int minlevel;
+
+  /**
+   * Highest level of cells.
+   */
+  unsigned int maxlevel;
+
+public:
+  /**
+   * Input vector for the cycle. Contains the defect of the outer method
+   * projected to the multilevel vectors.
+   */
+  MGLevelObject<VectorType> defect;
+
+  /**
+   * The solution update after the multigrid step.
+   */
+  MGLevelObject<VectorType> solution;
+
+private:
+  /**
+   * Auxiliary vector.
+   */
+  MGLevelObject<VectorType> t;
+
+  /**
+   * Auxiliary vector for W- and F-cycles. Left uninitialized in V-cycle.
+   */
+  MGLevelObject<VectorType> defect2;
+
+
+  /**
+   * The matrix for each level.
+   */
+  SmartPointer<const MGMatrixBase<VectorType>,Multigrid<VectorType> > matrix;
+
+  /**
+   * The matrix for each level.
+   */
+  SmartPointer<const MGCoarseGridBase<VectorType>,Multigrid<VectorType> > coarse;
+
+  /**
+   * Object for grid tranfer.
+   */
+  SmartPointer<const MGTransferBase<VectorType>,Multigrid<VectorType> > transfer;
+
+  /**
+   * The pre-smoothing object.
+   */
+  SmartPointer<const MGSmootherBase<VectorType>,Multigrid<VectorType> > pre_smooth;
+
+  /**
+   * The post-smoothing object.
+   */
+  SmartPointer<const MGSmootherBase<VectorType>,Multigrid<VectorType> > post_smooth;
+
+  /**
+   * Edge matrix from the interior of the refined part to the refinement edge.
+   *
+   * @note Only <tt>vmult</tt> is used for these matrices.
+   */
+  SmartPointer<const MGMatrixBase<VectorType> > edge_out;
+
+  /**
+   * Transpose edge matrix from the refinement edge to the interior of the
+   * refined part.
+   *
+   * @note Only <tt>Tvmult</tt> is used for these matrices.
+   */
+  SmartPointer<const MGMatrixBase<VectorType> > edge_in;
+
+  /**
+   * Edge matrix from fine to coarse.
+   *
+   * @note Only <tt>vmult</tt> is used for these matrices.
+   */
+  SmartPointer<const MGMatrixBase<VectorType>,Multigrid<VectorType> > edge_down;
+
+  /**
+   * Transpose edge matrix from coarse to fine.
+   *
+   * @note Only <tt>Tvmult</tt> is used for these matrices.
+   */
+  SmartPointer<const MGMatrixBase<VectorType>,Multigrid<VectorType> > edge_up;
+
+  /**
+   * Level for debug output. Defaults to zero and can be set by set_debug().
+   */
+  unsigned int debug;
+
+  template<int dim, class OtherVectorType, class TRANSFER> friend class PreconditionMG;
+};
+
+
+/**
+ * Multi-level preconditioner. Here, we collect all information needed for
+ * multi-level preconditioning and provide the standard interface for LAC
+ * iterative methods.
+ *
+ * Furthermore, it needs functions <tt>void copy_to_mg(const VectorType&)</tt>
+ * to store @p src in the right hand side of the multi-level method and
+ * <tt>void copy_from_mg(VectorType&)</tt> to store the result of the v-cycle
+ * in @p dst.
+ *
+ * @author Guido Kanschat, 1999, 2000, 2001, 2002
+ */
+template<int dim, typename VectorType, class TRANSFER>
+class PreconditionMG : public Subscriptor
+{
+public:
+  /**
+   * Constructor. Arguments are the multigrid object, pre-smoother, post-
+   * smoother and coarse grid solver.
+   */
+  PreconditionMG(const DoFHandler<dim> &dof_handler,
+                 Multigrid<VectorType> &mg,
+                 const TRANSFER        &transfer);
+
+  /**
+   * Dummy function needed by other classes.
+   */
+  bool empty () const;
+
+  /**
+   * Preconditioning operator. Calls the @p vcycle function of the @p MG
+   * object passed to the constructor.
+   *
+   * This is the operator used by LAC iterative solvers.
+   */
+  template<class OtherVectorType>
+  void vmult (OtherVectorType       &dst,
+              const OtherVectorType &src) const;
+
+  /**
+   * Preconditioning operator. Calls the @p vcycle function of the @p MG
+   * object passed to the constructor.
+   */
+  template<class OtherVectorType>
+  void vmult_add (OtherVectorType       &dst,
+                  const OtherVectorType &src) const;
+
+  /**
+   * Tranposed preconditioning operator.
+   *
+   * Not implemented, but the definition may be needed.
+   */
+  template<class OtherVectorType>
+  void Tvmult (OtherVectorType       &dst,
+               const OtherVectorType &src) const;
+
+  /**
+   * Tranposed preconditioning operator.
+   *
+   * Not implemented, but the definition may be needed.
+   */
+  template<class OtherVectorType>
+  void Tvmult_add (OtherVectorType       &dst,
+                   const OtherVectorType &src) const;
+
+private:
+  /**
+   * Associated @p DoFHandler.
+   */
+  SmartPointer<const DoFHandler<dim>,PreconditionMG<dim,VectorType,TRANSFER> > dof_handler;
+
+  /**
+   * The multigrid object.
+   */
+  SmartPointer<Multigrid<VectorType>,PreconditionMG<dim,VectorType,TRANSFER> > multigrid;
+
+  /**
+   * Object for grid tranfer.
+   */
+  SmartPointer<const TRANSFER,PreconditionMG<dim,VectorType,TRANSFER> > transfer;
+};
+
+/*@}*/
+
+#ifndef DOXYGEN
+/* --------------------------- inline functions --------------------- */
+
+
+template <typename VectorType>
+template <int dim>
+Multigrid<VectorType>::Multigrid (const DoFHandler<dim>          &mg_dof_handler,
+                                  const MGMatrixBase<VectorType>     &matrix,
+                                  const MGCoarseGridBase<VectorType> &coarse,
+                                  const MGTransferBase<VectorType>   &transfer,
+                                  const MGSmootherBase<VectorType>   &pre_smooth,
+                                  const MGSmootherBase<VectorType>   &post_smooth,
+                                  Cycle                              cycle)
+  :
+  cycle_type(cycle),
+  minlevel(0),
+  maxlevel(mg_dof_handler.get_triangulation().n_global_levels()-1),
+  defect(minlevel,maxlevel),
+  solution(minlevel,maxlevel),
+  t(minlevel,maxlevel),
+  defect2(minlevel,maxlevel),
+  matrix(&matrix, typeid(*this).name()),
+  coarse(&coarse, typeid(*this).name()),
+  transfer(&transfer, typeid(*this).name()),
+  pre_smooth(&pre_smooth, typeid(*this).name()),
+  post_smooth(&post_smooth, typeid(*this).name()),
+  edge_down(0, typeid(*this).name()),
+  edge_up(0, typeid(*this).name()),
+  debug(0)
+{}
+
+
+
+template <typename VectorType>
+inline
+unsigned int
+Multigrid<VectorType>::get_maxlevel () const
+{
+  return maxlevel;
+}
+
+
+
+template <typename VectorType>
+inline
+unsigned int
+Multigrid<VectorType>::get_minlevel () const
+{
+  return minlevel;
+}
+
+
+/* --------------------------- inline functions --------------------- */
+
+
+template<int dim, typename VectorType, class TRANSFER>
+PreconditionMG<dim, VectorType, TRANSFER>
+::PreconditionMG(const DoFHandler<dim>  &dof_handler,
+                 Multigrid<VectorType>  &mg,
+                 const TRANSFER         &transfer)
+  :
+  dof_handler(&dof_handler),
+  multigrid(&mg),
+  transfer(&transfer)
+{}
+
+template<int dim, typename VectorType, class TRANSFER>
+inline bool
+PreconditionMG<dim, VectorType, TRANSFER>::empty () const
+{
+  return false;
+}
+
+template<int dim, typename VectorType, class TRANSFER>
+template<class OtherVectorType>
+void
+PreconditionMG<dim, VectorType, TRANSFER>::vmult
+(OtherVectorType       &dst,
+ const OtherVectorType &src) const
+{
+  transfer->copy_to_mg(*dof_handler,
+                       multigrid->defect,
+                       src);
+  multigrid->cycle();
+
+  transfer->copy_from_mg(*dof_handler,
+                         dst,
+                         multigrid->solution);
+}
+
+
+template<int dim, typename VectorType, class TRANSFER>
+template<class OtherVectorType>
+void
+PreconditionMG<dim, VectorType, TRANSFER>::vmult_add
+(OtherVectorType       &dst,
+ const OtherVectorType &src) const
+{
+  transfer->copy_to_mg(*dof_handler,
+                       multigrid->defect,
+                       src);
+  multigrid->cycle();
+  transfer->copy_from_mg_add(*dof_handler,
+                             dst,
+                             multigrid->solution);
+}
+
+
+template<int dim, typename VectorType, class TRANSFER>
+template<class OtherVectorType>
+void
+PreconditionMG<dim, VectorType, TRANSFER>::Tvmult
+(OtherVectorType &,
+ const OtherVectorType &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template<int dim, typename VectorType, class TRANSFER>
+template<class OtherVectorType>
+void
+PreconditionMG<dim, VectorType, TRANSFER>::Tvmult_add
+(OtherVectorType &,
+ const OtherVectorType &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+#endif // DOXYGEN
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/multigrid.templates.h b/include/deal.II/multigrid/multigrid.templates.h
new file mode 100644
index 0000000..e4f3bef
--- /dev/null
+++ b/include/deal.II/multigrid/multigrid.templates.h
@@ -0,0 +1,424 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__multigrid_templates_h
+#define dealii__multigrid_templates_h
+#include <deal.II/multigrid/multigrid.h>
+
+#include <deal.II/base/logstream.h>
+
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename VectorType>
+Multigrid<VectorType>::Multigrid (const unsigned int                    minlevel,
+                                  const unsigned int                    maxlevel,
+                                  const MGMatrixBase<VectorType>        &matrix,
+                                  const MGCoarseGridBase<VectorType>    &coarse,
+                                  const MGTransferBase<VectorType>      &transfer,
+                                  const MGSmootherBase<VectorType>      &pre_smooth,
+                                  const MGSmootherBase<VectorType>      &post_smooth,
+                                  typename Multigrid<VectorType>::Cycle cycle)
+  :
+  cycle_type(cycle),
+  minlevel(minlevel),
+  maxlevel(maxlevel),
+  defect(minlevel,maxlevel),
+  solution(minlevel,maxlevel),
+  t(minlevel,maxlevel),
+  matrix(&matrix, typeid(*this).name()),
+  coarse(&coarse, typeid(*this).name()),
+  transfer(&transfer, typeid(*this).name()),
+  pre_smooth(&pre_smooth, typeid(*this).name()),
+  post_smooth(&post_smooth, typeid(*this).name()),
+  edge_out(0, typeid(*this).name()),
+  edge_in(0, typeid(*this).name()),
+  edge_down(0, typeid(*this).name()),
+  edge_up(0, typeid(*this).name()),
+  debug(0)
+{}
+
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::reinit (const unsigned int min_level,
+                               const unsigned int max_level)
+{
+  minlevel=min_level;
+  maxlevel=max_level;
+  defect.resize(minlevel, maxlevel);
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::set_maxlevel (const unsigned int l)
+{
+  Assert (l <= maxlevel, ExcIndexRange(l,minlevel,maxlevel+1));
+  Assert (l >= minlevel, ExcIndexRange(l,minlevel,maxlevel+1));
+  maxlevel = l;
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::set_minlevel (const unsigned int l,
+                                     const bool relative)
+{
+  Assert (l <= maxlevel, ExcIndexRange(l,minlevel,maxlevel+1));
+  minlevel = (relative)
+             ? (maxlevel-l)
+             : l;
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::set_cycle(typename Multigrid<VectorType>::Cycle c)
+{
+  cycle_type = c;
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::set_debug (const unsigned int d)
+{
+  debug = d;
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::set_edge_matrices (const MGMatrixBase<VectorType> &down,
+                                          const MGMatrixBase<VectorType> &up)
+{
+  edge_out = &down;
+  edge_in = &up;
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::set_edge_flux_matrices (const MGMatrixBase<VectorType> &down,
+                                               const MGMatrixBase<VectorType> &up)
+{
+  edge_down = &down;
+  edge_up = &up;
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::level_v_step (const unsigned int level)
+{
+  if (debug>0)
+    deallog << "V-cycle entering level " << level << std::endl;
+  if (debug>2)
+    deallog << "V-cycle  Defect norm   " << defect[level].l2_norm()
+            << std::endl;
+
+  if (level == minlevel)
+    {
+      if (debug>0)
+        deallog << "Coarse level           " << level << std::endl;
+      (*coarse)(level, solution[level], defect[level]);
+      return;
+    }
+  if (debug>1)
+    deallog << "Smoothing on     level " << level << std::endl;
+  // smoothing of the residual by
+  // modifying s
+//  defect[level].print(std::cout, 2,false);
+//  std::cout<<std::endl;
+  pre_smooth->smooth(level, solution[level], defect[level]);
+//  solution[level].print(std::cout, 2,false);
+
+  if (debug>2)
+    deallog << "Solution norm          " << solution[level].l2_norm()
+            << std::endl;
+
+  if (debug>1)
+    deallog << "Residual on      level " << level << std::endl;
+  // t = A*solution[level]
+  matrix->vmult(level, t[level], solution[level]);
+
+  if (debug>2)
+    deallog << "Residual norm          " << t[level].l2_norm()
+            << std::endl;
+//  std::cout<<std::endl;
+//  t[level].print(std::cout, 2,false);
+
+  // make t rhs of lower level The
+  // non-refined parts of the
+  // coarse-level defect already
+  // contain the global defect, the
+  // refined parts its restriction.
+  for (unsigned int l = level; l>minlevel; --l)
+    {
+      t[l-1] = 0.;
+      if (l==level && edge_out != 0)
+        {
+          edge_out->vmult_add(level, t[level], solution[level]);
+          if (debug>2)
+            deallog << "Norm     t[" << level << "] " << t[level].l2_norm() << std::endl;
+        }
+
+      if (l==level && edge_down != 0)
+        edge_down->vmult(level, t[level-1], solution[level]);
+
+      transfer->restrict_and_add (l, t[l-1], t[l]);
+      if (debug>3)
+        deallog << "restrict t[" << l-1 << "] " << t[l-1].l2_norm() << std::endl;
+      defect[l-1] -= t[l-1];
+      if (debug>3)
+        deallog << "defect   d[" << l-1 << "] " << defect[l-1].l2_norm() << std::endl;
+    }
+
+  // do recursion
+  solution[level-1] = 0.;
+  level_v_step(level-1);
+
+  // reset size of the auxiliary
+  // vector, since it has been
+  // resized in the recursive call to
+  // level_v_step directly above
+  t[level] = 0.;
+
+  // do coarse grid correction
+  transfer->prolongate(level, t[level], solution[level-1]);
+  if (debug>2)
+    deallog << "Prolongate norm        " << t[level].l2_norm() << std::endl;
+
+  solution[level] += t[level];
+
+  if (edge_in != 0)
+    {
+      edge_in->Tvmult(level, t[level], solution[level]);
+      defect[level] -= t[level];
+    }
+
+  if (edge_up != 0)
+    {
+      edge_up->Tvmult(level, t[level], solution[level-1]);
+      defect[level] -= t[level];
+    }
+
+  if (debug>2)
+    deallog << "V-cycle  Defect norm   " << defect[level].l2_norm()
+            << std::endl;
+
+  if (debug>1)
+    deallog << "Smoothing on     level " << level << std::endl;
+  // post-smoothing
+
+//  std::cout<<std::endl;
+//  defect[level].print(std::cout, 2,false);
+  post_smooth->smooth(level, solution[level], defect[level]);
+//  solution[level].print(std::cout, 2,false);
+//  std::cout<<std::endl;
+
+  if (debug>2)
+    deallog << "Solution norm          " << solution[level].l2_norm()
+            << std::endl;
+
+  if (debug>1)
+    deallog << "V-cycle leaving  level " << level << std::endl;
+}
+
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::level_step(const unsigned int level,
+                                  Cycle cycle)
+{
+  char cychar = '?';
+  switch (cycle)
+    {
+    case v_cycle:
+      cychar = 'V';
+      break;
+    case f_cycle:
+      cychar = 'F';
+      break;
+    case w_cycle:
+      cychar = 'W';
+      break;
+    }
+
+  if (debug>0)
+    deallog << cychar << "-cycle entering level " << level << std::endl;
+
+  // Not actually the defect yet, but
+  // we do not want to spend yet
+  // another vector.
+  if (level>minlevel)
+    {
+      defect2[level-1] = 0.;
+      transfer->restrict_and_add (level, defect2[level-1], defect2[level]);
+    }
+
+  // We get an update of the defect
+  // from the previous level in t and
+  // from two levels above in
+  // defect2. This is subtracted from
+  // the original defect.
+  t[level].equ(-1.,defect2[level],1.,defect[level]);
+
+  if (debug>2)
+    deallog << cychar << "-cycle defect norm    " << t[level].l2_norm()
+            << std::endl;
+
+  if (level == minlevel)
+    {
+      if (debug>0)
+        deallog << cychar << "-cycle coarse level   " << level << std::endl;
+
+      (*coarse)(level, solution[level], t[level]);
+      return;
+    }
+  if (debug>1)
+    deallog << cychar << "-cycle smoothing level " << level << std::endl;
+  // smoothing of the residual by
+  // modifying s
+  pre_smooth->smooth(level, solution[level], t[level]);
+
+  if (debug>2)
+    deallog << cychar << "-cycle solution norm    "
+            << solution[level].l2_norm() << std::endl;
+
+  if (debug>1)
+    deallog << cychar << "-cycle residual level   " << level << std::endl;
+  // t = A*solution[level]
+  matrix->vmult(level, t[level], solution[level]);
+  // make t rhs of lower level The
+  // non-refined parts of the
+  // coarse-level defect already
+  // contain the global defect, the
+  // refined parts its restriction.
+  if (edge_out != 0)
+    edge_out->vmult_add(level, t[level], solution[level]);
+
+  if (edge_down != 0)
+    edge_down->vmult_add(level, defect2[level-1], solution[level]);
+
+  transfer->restrict_and_add (level, defect2[level-1], t[level]);
+  // do recursion
+  solution[level-1] = 0.;
+  // Every cycle need one recursion,
+  // the V-cycle, which is included
+  // here for the sake of the
+  // F-cycle, needs only one,
+  level_step(level-1, cycle);
+  // If we solve exactly, then we do
+  // not need a second coarse grid
+  // step.
+  if (level>minlevel+1)
+    {
+      // while the W-cycle repeats itself
+      if (cycle == w_cycle)
+        level_step(level-1, cycle);
+      // and the F-cycle does a V-cycle
+      // after an F-cycle.
+      else if (cycle == f_cycle)
+        level_step(level-1, v_cycle);
+    }
+
+  // reset size of the auxiliary
+  // vector, since it has been
+  // resized in the recursive call to
+  // level_v_step directly above
+  t[level] = 0.;
+  // do coarse grid correction
+  transfer->prolongate(level, t[level], solution[level-1]);
+
+  solution[level] += t[level];
+
+
+  if (edge_in != 0)
+    edge_in->Tvmult(level, t[level], solution[level]);
+
+  if (edge_up != 0)
+    edge_up->Tvmult(level, t[level], solution[level-1]);
+
+  t[level].sadd(-1.,-1.,defect2[level],1.,defect[level]);
+
+  if (debug>2)
+    deallog << cychar << "-cycle  Defect norm    " << t[level].l2_norm()
+            << std::endl;
+
+  if (debug>1)
+    deallog << cychar << "-cycle smoothing level " << level << std::endl;
+  // post-smoothing
+  post_smooth->smooth(level, solution[level], t[level]);
+
+  if (debug>1)
+    deallog << cychar << "-cycle leaving level   " << level << std::endl;
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::cycle()
+{
+  // The defect vector has been
+  // initialized by copy_to_mg. Now
+  // adjust the other vectors.
+  solution.resize(minlevel, maxlevel);
+  t.resize(minlevel, maxlevel);
+  if (cycle_type != v_cycle)
+    defect2.resize(minlevel, maxlevel);
+
+  for (unsigned int level=minlevel; level<=maxlevel; ++level)
+    {
+      solution[level].reinit(defect[level]);
+      t[level].reinit(defect[level]);
+      if (cycle_type != v_cycle)
+        defect2[level].reinit(defect[level]);
+    }
+
+  if (cycle_type == v_cycle)
+    level_v_step (maxlevel);
+  else
+    level_step (maxlevel, cycle_type);
+}
+
+
+template <typename VectorType>
+void
+Multigrid<VectorType>::vcycle()
+{
+  // The defect vector has been
+  // initialized by copy_to_mg. Now
+  // adjust the other vectors.
+  solution.resize(minlevel, maxlevel);
+  t.resize(minlevel, maxlevel);
+
+  for (unsigned int level=minlevel; level<=maxlevel; ++level)
+    {
+      solution[level].reinit(defect[level]);
+      t[level].reinit(defect[level]);
+    }
+  level_v_step (maxlevel);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/multigrid/sparse_matrix_collection.h b/include/deal.II/multigrid/sparse_matrix_collection.h
new file mode 100644
index 0000000..7980de5
--- /dev/null
+++ b/include/deal.II/multigrid/sparse_matrix_collection.h
@@ -0,0 +1,123 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__mg_sparse_matrix_collection_h
+#define dealii__mg_sparse_matrix_collection_h
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/pointer_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace mg
+{
+  /**
+   * Handler and storage for all five SparseMatrix object involved in using
+   * multigrid with local refinement.
+   *
+   * @author Baerbel Janssen, Guido Kanschat
+   * @date 2013
+   */
+  template <typename number>
+  class SparseMatrixCollection : public Subscriptor
+  {
+  public:
+    void resize(const unsigned int minlevel, const unsigned  int maxlevel);
+
+    template <typename DoFHandlerType>
+    void reinit(const DoFHandlerType &dof_handler);
+
+    void set_zero();
+
+    MGLevelObject<SparsityPattern> sparsity;
+    MGLevelObject<SparsityPattern> sparsity_edge;
+
+    MGLevelObject<SparseMatrix<number> > matrix;
+    MGLevelObject<SparseMatrix<number> > matrix_down;
+    MGLevelObject<SparseMatrix<number> > matrix_up;
+    MGLevelObject<SparseMatrix<number> > matrix_in;
+    MGLevelObject<SparseMatrix<number> > matrix_out;
+  };
+
+
+  template <typename number>
+  void
+  SparseMatrixCollection<number>::resize(const unsigned int minlevel, const unsigned  int maxlevel)
+  {
+    matrix.resize(minlevel, maxlevel);
+    matrix.clear();
+    matrix_up.resize(minlevel+1, maxlevel);
+    matrix_up.clear();
+    matrix_down.resize(minlevel+1, maxlevel);
+    matrix_down.clear();
+    matrix_in.resize(minlevel, maxlevel);
+    matrix_in.clear();
+    matrix_out.resize(minlevel, maxlevel);
+    matrix_out.clear();
+    sparsity.resize(minlevel, maxlevel);
+    sparsity_edge.resize(minlevel, maxlevel);
+  }
+
+
+  template <typename number>
+  template <typename DoFHandlerType>
+  void
+  SparseMatrixCollection<number>::reinit(const DoFHandlerType &dof_handler)
+  {
+    AssertIndexRange(sparsity.max_level(), dof_handler.get_triangulation().n_levels());
+
+    for (unsigned int level=sparsity.min_level();
+         level<=sparsity.max_level(); ++level)
+      {
+        DynamicSparsityPattern dsp(dof_handler.n_dofs(level));
+        MGTools::make_flux_sparsity_pattern(dof_handler, dsp, level);
+        sparsity[level].copy_from(dsp);
+        matrix[level].reinit(sparsity[level]);
+        matrix_in[level].reinit(sparsity[level]);
+        matrix_out[level].reinit(sparsity[level]);
+        if (level>0)
+          {
+            DynamicSparsityPattern ci_sparsity;
+            ci_sparsity.reinit(dof_handler.n_dofs(level-1), dof_handler.n_dofs(level));
+            MGTools::make_flux_sparsity_pattern_edge(dof_handler, ci_sparsity, level);
+            sparsity_edge[level].copy_from(ci_sparsity);
+            matrix_up[level].reinit(sparsity_edge[level]);
+            matrix_down[level].reinit(sparsity_edge[level]);
+          }
+      }
+  }
+
+  template <typename number>
+  void
+  SparseMatrixCollection<number>::set_zero()
+  {
+    matrix = 0.;
+    matrix_in = 0.;
+    matrix_out = 0.;
+    matrix_up = 0.;
+    matrix_down = 0.;
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/data_component_interpretation.h b/include/deal.II/numerics/data_component_interpretation.h
new file mode 100644
index 0000000..5c20a30
--- /dev/null
+++ b/include/deal.II/numerics/data_component_interpretation.h
@@ -0,0 +1,68 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_component_interpretation_h
+#define dealii__data_component_interpretation_h
+
+
+
+#include <deal.II/base/config.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * A namespace solely for the declaration of the
+ * DataComponentInterpretation::DataComponentInterpretation enum.
+ */
+namespace DataComponentInterpretation
+{
+  /**
+   * The members of this enum are used to describe the logical interpretation
+   * of what the various components of a vector-valued data set mean. For
+   * example, if one has a finite element for the Stokes equations in 2d,
+   * representing components $(u,v,p)$, one would like to indicate that the
+   * first two, $u$ and $v$, represent a logical vector so that later on when
+   * we generate graphical output we can hand them off to a visualization
+   * program that will automatically know to render them as a vector field,
+   * rather than as two separate and independent scalar fields.
+   *
+   * By passing a set of enums of the current kind to the
+   * DataOut_DoFData::add_data_vector functions, this can be achieved.
+   *
+   * See the step-22 tutorial program for an example on how this information
+   * can be used in practice.
+   *
+   * @author Wolfgang Bangerth, 2007
+   */
+  enum DataComponentInterpretation
+  {
+    /**
+     * Indicates that a component of a data set corresponds to a scalar field
+     * independent of the others.
+     */
+    component_is_scalar,
+
+    /**
+     * Indicates that a component of a data set is part of a vector-valued
+     * quantity.
+     */
+    component_is_part_of_vector
+  };
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/data_out.h b/include/deal.II/numerics/data_out.h
new file mode 100644
index 0000000..e85bb7a
--- /dev/null
+++ b/include/deal.II/numerics/data_out.h
@@ -0,0 +1,309 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_out_h
+#define dealii__data_out_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/numerics/data_out_dof_data.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int> class FEValuesBase;
+
+namespace internal
+{
+  namespace DataOut
+  {
+    /**
+     * A derived class for use in the DataOut class. This is a class for the
+     * AdditionalData kind of data structure discussed in the documentation of
+     * the WorkStream context.
+     */
+    template <int dim, int spacedim>
+    struct ParallelData : public ParallelDataBase<dim,spacedim>
+    {
+      ParallelData (const unsigned int n_datasets,
+                    const unsigned int n_subdivisions,
+                    const std::vector<unsigned int> &n_postprocessor_outputs,
+                    const Mapping<dim,spacedim> &mapping,
+                    const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                    const UpdateFlags update_flags,
+                    const std::vector<std::vector<unsigned int> > &cell_to_patch_index_map);
+
+      std::vector<Point<spacedim> > patch_evaluation_points;
+
+      const std::vector<std::vector<unsigned int> > *cell_to_patch_index_map;
+    };
+  }
+}
+
+
+
+/**
+ * This class is the main class to provide output of data described by finite
+ * element fields defined on a collection of cells.
+ *
+ * This class is an actual implementation of the functionality proposed by the
+ * DataOut_DoFData class. It offers a function build_patches() that generates
+ * the data to be written in some graphics format. Most of the interface and
+ * an example of its use is described in the documentation of this base class.
+ *
+ * The only thing this class offers is the function build_patches() which
+ * loops over all cells of the triangulation stored by the
+ * attach_dof_handler() function of the base class (with the exception of
+ * cells of parallel::distributed::Triangulation objects that are not owned by
+ * the current processor) and converts the data on these to actual patches
+ * which are the objects that are later output by the functions of the base
+ * classes. You can give a parameter to the function which determines how many
+ * subdivisions in each coordinate direction are to be performed, i.e. of how
+ * many subcells each patch shall consist. Default is one, but you may want to
+ * choose a higher number for higher order elements, for example two for
+ * quadratic elements, three for cubic elements three, and so on. The purpose
+ * of this parameter is because most graphics programs do not allow to specify
+ * higher order polynomial functions in the file formats: only data at
+ * vertices can be plotted and is then shown as a bilinear interpolation
+ * within the interior of cells. This may be insufficient if you have higher
+ * order finite elements, and the only way to achieve better output is to
+ * subdivide each cell of the mesh into several cells for graphical output. Of
+ * course, what you get to see is still a bilinear interpolation on each cell
+ * of the output (where these cells are not subdivisions of the cells of the
+ * triangulation in use) due to the same limitations in output formats, but at
+ * least a bilinear interpolation of a higher order polynomial on a finer
+ * mesh.
+ *
+ * Note that after having called build_patches() once, you can call one or
+ * more of the write() functions of DataOutInterface. You can therefore output
+ * the same data in more than one format without having to rebuild the
+ * patches.
+ *
+ *
+ * <h3>User interface information</h3>
+ *
+ * The base classes of this class, DataOutBase, DataOutInterface and
+ * DataOut_DoFData offer several interfaces of their own. Refer to the
+ * DataOutBase class's documentation for a discussion of the different output
+ * formats presently supported, DataOutInterface for ways of selecting which
+ * format to use upon output at run-time and without the need to adapt your
+ * program when new formats become available, as well as for flags to
+ * determine aspects of output. The DataOut_DoFData() class's documentation
+ * has an example of using nodal data to generate output.
+ *
+ *
+ * <h3>Extensions</h3>
+ *
+ * By default, this class produces patches for all active cells. Sometimes,
+ * this is not what you want, maybe because they are simply too many (and too
+ * small to be seen individually) or because you only want to see a certain
+ * region of the domain (for example in parallel programs such as the step-18
+ * example program), or for some other reason.
+ *
+ * For this, internally build_patches() does not generate the sequence of
+ * cells to be converted into patches itself, but relies on the two functions
+ * first_cell() and next_cell(). By default, they return the first active
+ * cell, and the next active cell, respectively. Since they are @p virtual
+ * functions, you can write your own class derived from DataOut in which you
+ * overload these two functions to select other cells for output. This may,
+ * for example, include only cells that are in parts of a domain (e.g., if you
+ * don't care about the solution elsewhere, think for example a buffer region
+ * in which you attenuate outgoing waves in the Perfectly Matched Layer
+ * method) or if you don't want output to be generated at all levels of an
+ * adaptively refined mesh because this creates too much data (in this case,
+ * the set of cells returned by your implementations of first_cell() and
+ * next_cell() will include non-active cells, and DataOut::build_patches()
+ * will simply take interpolated values of the solution instead of the exact
+ * values on these cells children for output). Once you derive your own class,
+ * you would just create an object of this type instead of an object of type
+ * DataOut, and everything else will remain the same.
+ *
+ * The two functions are not constant, so you may store information within
+ * your derived class about the last accessed cell. This is useful if the
+ * information of the last cell which was accessed is not sufficient to
+ * determine the next one.
+ *
+ * There is one caveat, however: if you have cell data (in contrast to nodal,
+ * or dof, data) such as error indicators, then you must make sure that
+ * first_cell() and next_cell() only walk over active cells, since cell data
+ * cannot be interpolated to a coarser cell. If you do have cell data and use
+ * this pair of functions and they return a non-active cell, then an exception
+ * will be thrown.
+ *
+ * @pre This class only makes sense if the first template argument,
+ * <code>dim</code> equals the dimension of the DoFHandler type given as the
+ * second template argument, i.e., if <code>dim ==
+ * DoFHandlerType::dimension</code>. This redundancy is a historical relic
+ * from the time where the library had only a single DoFHandler class and this
+ * class consequently only a single template argument.
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 1999
+ */
+template <int dim, typename DoFHandlerType=DoFHandler<dim> >
+class DataOut : public DataOut_DoFData<DoFHandlerType, DoFHandlerType::dimension, DoFHandlerType::space_dimension>
+{
+public:
+  /**
+   * Typedef to the iterator type of the dof handler class under
+   * consideration.
+   */
+  typedef typename DataOut_DoFData<DoFHandlerType, DoFHandlerType::dimension, DoFHandlerType::space_dimension>::cell_iterator
+  cell_iterator;
+  typedef typename DataOut_DoFData<DoFHandlerType, DoFHandlerType::dimension, DoFHandlerType::space_dimension>::active_cell_iterator
+  active_cell_iterator;
+
+  /**
+   * Enumeration describing the region of the domain in which curved cells
+   * shall be created.
+   */
+  enum CurvedCellRegion
+  {
+    no_curved_cells,
+    curved_boundary,
+    curved_inner_cells
+  };
+
+  /**
+   * This is the central function of this class since it builds the list of
+   * patches to be written by the low-level functions of the base class. A
+   * patch is, in essence, some intermediate representation of the data on
+   * each cell of a triangulation and DoFHandler object that can then be used
+   * to write files in some format that is readable by visualization programs.
+   *
+   * You can find an overview of the use of this function in the general
+   * documentation of this class. An example is also provided in the
+   * documentation of this class's base class DataOut_DoFData.
+   *
+   * @param n_subdivisions A parameter that determines how many "patches" this
+   * function will build out of every cell. If you do not specify this value
+   * in calling, or provide the default value zero, then this is interpreted
+   * as DataOutInterface::default_subdivisions which most of the time will be
+   * equal to one (unless you have set it to something else). The purpose of
+   * this parameter is to subdivide each cell of the mesh into $2\times 2,
+   * 3\times 3, \ldots$ "patches" in 2d, and $2\times 2\times 2, 3\times
+   * 3\times 3, \ldots$ (if passed the value 2, 3, etc) where each patch
+   * represents the data from a regular subdivision of the cell into equal
+   * parts. Most of the times, this is not necessary and outputting one patch
+   * per cell is exactly what you want to plot the solution. That said, the
+   * data we write into files for visualization can only represent (bi-,
+   * tri)linear data on each cell, and most visualization programs can in fact
+   * only visualize this kind of data. That's good enough if you work with
+   * (bi-, tri)linear finite elements, in which case what you get to see is
+   * exactly what has been computed. On the other hand, if you work with (bi-,
+   * tri)quadratic elements, then what is written into the output file is just
+   * a (bi-, tri)linear interpolation onto the current mesh, i.e., only the
+   * values at the vertices. If this is not good enough, you can, for example,
+   * specify @p n_subdivisions equal to 2 to plot the solution on a once-
+   * refined mesh, or if set to 3, on a mesh where each cell is represented by
+   * 3-by-3 patches. On each of these smaller patches, given the limitations
+   * of output formats, the data is still linearly interpolated, but a linear
+   * interpolation of quadratic data on a finer mesh is still a better
+   * representation of the actual quadratic surface than on the original mesh.
+   * In other words, using this parameter can not help you plot the solution
+   * exactly, but it can get you closer if you use finite elements of higher
+   * polynomial degree.
+   */
+  virtual void build_patches (const unsigned int n_subdivisions = 0);
+
+  /**
+   * Same as above, except that the additional first parameter defines a
+   * mapping that is to be used in the generation of output. If
+   * <tt>n_subdivisions>1</tt>, the points interior of subdivided patches
+   * which originate from cells at the boundary of the domain can be computed
+   * using the mapping, i.e. a higher order mapping leads to a representation
+   * of a curved boundary by using more subdivisions. Some mappings like
+   * MappingQEulerian result in curved cells in the interior of the domain.
+   * However, there is nor easy way to get this information from the Mapping.
+   * Thus the last argument @p curved_region take one of three values
+   * resulting in no curved cells at all, curved cells at the boundary
+   * (default) or curved cells in the whole domain.
+   *
+   * Even for non-curved cells the mapping argument can be used for the
+   * Eulerian mappings (see class MappingQ1Eulerian) where a mapping is used
+   * not only to determine the position of points interior to a cell, but also
+   * of the vertices.  It offers an opportunity to watch the solution on a
+   * deformed triangulation on which the computation was actually carried out,
+   * even if the mesh is internally stored in its undeformed configuration and
+   * the deformation is only tracked by an additional vector that holds the
+   * deformation of each vertex.
+   *
+   * @todo The @p mapping argument should be replaced by a
+   * hp::MappingCollection in case of a hp::DoFHandler.
+   */
+  virtual void build_patches (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+                              const unsigned int n_subdivisions = 0,
+                              const CurvedCellRegion curved_region = curved_boundary);
+
+  /**
+   * Return the first cell which we want output for. The default
+   * implementation returns the first active cell, but you might want to
+   * return other cells in a derived class.
+   */
+  virtual cell_iterator first_cell ();
+
+  /**
+   * Return the next cell after @p cell which we want output for.  If there
+   * are no more cells, <tt>#dofs->end()</tt> shall be returned.
+   *
+   * The default implementation returns the next active cell, but you might
+   * want to return other cells in a derived class. Note that the default
+   * implementation assumes that the given @p cell is active, which is
+   * guaranteed as long as first_cell() is also used from the default
+   * implementation. Overloading only one of the two functions might not be a
+   * good idea.
+   */
+  virtual cell_iterator next_cell (const cell_iterator &cell);
+
+private:
+
+  /**
+   * Return the first cell produced by the first_cell()/next_cell() function
+   * pair that is locally owned. If this object operates on a non-distributed
+   * triangulation, the result equals what first_cell() returns.
+   */
+  virtual cell_iterator first_locally_owned_cell ();
+
+  /**
+   * Return the next cell produced by the next_cell() function that is locally
+   * owned. If this object operates on a non-distributed triangulation, the
+   * result equals what first_cell() returns.
+   */
+  virtual cell_iterator next_locally_owned_cell (const cell_iterator &cell);
+
+  /**
+   * Build one patch. This function is called in a WorkStream context.
+   *
+   * The first argument here is the iterator, the second the scratch data
+   * object. All following are tied to particular values when calling
+   * WorkStream::run(). The function does not take a CopyData object but
+   * rather allocates one on its own stack for memory access efficiency
+   * reasons.
+   */
+  void build_one_patch
+  (const std::pair<cell_iterator, unsigned int>                 *cell_and_index,
+   internal::DataOut::ParallelData<DoFHandlerType::dimension, DoFHandlerType::space_dimension>  &scratch_data,
+   const unsigned int                                            n_subdivisions,
+   const CurvedCellRegion                                        curved_cell_region,
+   std::vector<DataOutBase::Patch<DoFHandlerType::dimension, DoFHandlerType::space_dimension> > &patches);
+};
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/data_out_dof_data.h b/include/deal.II/numerics/data_out_dof_data.h
new file mode 100644
index 0000000..1c0e7e6
--- /dev/null
+++ b/include/deal.II/numerics/data_out_dof_data.h
@@ -0,0 +1,971 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_out_dof_data_h
+#define dealii__data_out_dof_data_h
+
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/data_out_base.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/numerics/data_postprocessor.h>
+#include <deal.II/numerics/data_component_interpretation.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int, int> class FEValuesBase;
+
+
+namespace Exceptions
+{
+  /**
+   * A namespace for exceptions that are used throughout the DataOut*
+   * collection of classes.
+   */
+  namespace DataOut
+  {
+    /**
+     * Exception
+     */
+    DeclException1 (ExcInvalidNumberOfSubdivisions,
+                    int,
+                    << "The number of subdivisions per patch, " << arg1
+                    << ", is not valid. It needs to be greater or equal to "
+                    "one, or zero if you want it to be determined "
+                    "automatically.");
+
+    /**
+     * Exception
+     */
+    DeclExceptionMsg (ExcNoTriangulationSelected,
+                      "For the operation you are attempting, you first need to "
+                      "tell the DataOut or related object which DoFHandler or "
+                      "triangulation you would like to work on.");
+
+    /**
+     * Exception
+     */
+    DeclExceptionMsg (ExcNoDoFHandlerSelected,
+                      "For the operation you are attempting, you first need to "
+                      "tell the DataOut or related object which DoFHandler "
+                      "you would like to work on.");
+
+    /**
+     * Exception
+     */
+    DeclException3 (ExcInvalidVectorSize,
+                    int, int, int,
+                    << "The vector has size " << arg1
+                    << " but the DoFHandler object says that there are " << arg2
+                    << " degrees of freedom and there are " << arg3
+                    << " active cells. The size of your vector needs to be"
+                    << " either equal to the number of degrees of freedom, or"
+                    << " equal to the number of active cells.");
+    /**
+     * Exception
+     */
+    DeclException2 (ExcInvalidCharacter,
+                    std::string, size_t,
+                    << "Please use only the characters [a-zA-Z0-9_<>()] for" << std::endl
+                    << "description strings since some graphics formats will only accept these."
+                    << std::endl
+                    << "The string you gave was <" << arg1
+                    << ">, within which the invalid character is <" << arg1[arg2]
+                    << ">." << std::endl);
+    /**
+     * Exception
+     */
+    DeclExceptionMsg (ExcOldDataStillPresent,
+                      "When attaching a triangulation or DoFHandler object, it is "
+                      "not allowed if old data vectors are still referenced. If "
+                      "you want to reuse an object of the current type, you first "
+                      "need to call the 'clear_data_vector()' function.");
+    /**
+     * Exception
+     */
+    DeclException2 (ExcInvalidNumberOfNames,
+                    int, int,
+                    << "You have to give one name per component in your "
+                    << "data vector. The number you gave was " << arg1
+                    << ", but the number of components is " << arg2
+                    << ".");
+    /**
+     * Exception
+     */
+    DeclExceptionMsg (ExcIncompatibleDatasetNames,
+                      "While merging sets of patches, the two sets to be merged "
+                      "need to refer to data that agrees on the names of the "
+                      "various variables represented. In other words, you "
+                      "cannot merge sets of patches that originate from "
+                      "entirely unrelated simulations.");
+    /**
+     * Exception
+     */
+    DeclExceptionMsg (ExcIncompatiblePatchLists,
+                      "While merging sets of patches, the two sets to be merged "
+                      "need to refer to data that agrees on the number of "
+                      "subdivisions and other properties. In other words, you "
+                      "cannot merge sets of patches that originate from "
+                      "entirely unrelated simulations.");
+
+    DeclException2 (ExcInvalidVectorDeclaration,
+                    int, std::string,
+                    << "When declaring that a number of components in a data "
+                    << "set to be output logically form a vector instead of "
+                    << "simply a set of scalar fields, you need to specify "
+                    << "this for all relevant components. Furthermore, "
+                    << "vectors must always consist of exactly <dim> "
+                    << "components. However, the vector component at "
+                    << "position " << arg1 << " with name <" << arg2
+                    << "> does not satisfy these conditions.");
+  }
+}
+
+
+namespace internal
+{
+  namespace DataOut
+  {
+    /**
+     * For each vector that has been added through the add_data_vector()
+     * functions, we need to keep track of a pointer to it, and allow data
+     * extraction from it when we generate patches. Unfortunately, we need to
+     * do this for a number of different vector types. Fortunately, they all
+     * have the same interface. So the way we go is to have a base class that
+     * provides the functions to access the vector's information, and to have
+     * a derived template class that can be instantiated for each vector type.
+     * Since the vectors all have the same interface, this is no big problem,
+     * as they can all use the same general templatized code.
+     *
+     * @author Wolfgang Bangerth, 2004
+     */
+    template <typename DoFHandlerType>
+    class DataEntryBase
+    {
+    public:
+      /**
+       * Constructor. Give a list of names for the individual components of
+       * the vector and their interpretation as scalar or vector data. This
+       * constructor assumes that no postprocessor is going to be used.
+       */
+      DataEntryBase (const DoFHandlerType           *dofs,
+                     const std::vector<std::string> &names,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation);
+
+      /**
+       * Constructor when a data postprocessor is going to be used. In that
+       * case, the names and vector declarations are going to be acquired from
+       * the postprocessor.
+       */
+      DataEntryBase (const DoFHandlerType *dofs,
+                     const DataPostprocessor<DoFHandlerType::space_dimension> *data_postprocessor);
+
+      /**
+       * Destructor made virtual.
+       */
+      virtual ~DataEntryBase ();
+
+      /**
+       * Assuming that the stored vector is a cell vector, extract the given
+       * element from it.
+       */
+      virtual
+      double
+      get_cell_data_value (const unsigned int cell_number) const = 0;
+
+      /**
+       * Given a FEValuesBase object, extract the values on the present cell
+       * from the vector we actually store.
+       */
+      virtual
+      void
+      get_function_values
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<double> &patch_values) const = 0;
+
+      /**
+       * Given a FEValuesBase object, extract the values on the present cell
+       * from the vector we actually store. This function does the same as the
+       * one above but for vector-valued finite elements.
+       */
+      virtual
+      void
+      get_function_values
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<dealii::Vector<double> > &patch_values_system) const = 0;
+
+      /**
+       * Given a FEValuesBase object, extract the gradients on the present
+       * cell from the vector we actually store.
+       */
+      virtual
+      void
+      get_function_gradients
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<Tensor<1,DoFHandlerType::space_dimension> > &patch_gradients) const = 0;
+
+      /**
+       * Given a FEValuesBase object, extract the gradients on the present
+       * cell from the vector we actually store. This function does the same
+       * as the one above but for vector-valued finite elements.
+       */
+      virtual
+      void
+      get_function_gradients
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<std::vector<Tensor<1,DoFHandlerType::space_dimension> > > &patch_gradients_system) const = 0;
+
+      /**
+       * Given a FEValuesBase object, extract the second derivatives on the
+       * present cell from the vector we actually store.
+       */
+      virtual
+      void
+      get_function_hessians
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<Tensor<2,DoFHandlerType::space_dimension> > &patch_hessians) const = 0;
+
+      /**
+       * Given a FEValuesBase object, extract the second derivatives on the
+       * present cell from the vector we actually store. This function does
+       * the same as the one above but for vector-valued finite elements.
+       */
+      virtual
+      void
+      get_function_hessians
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<std::vector< Tensor<2,DoFHandlerType::space_dimension> > > &patch_hessians_system) const = 0;
+
+      /**
+       * Clear all references to the vectors.
+       */
+      virtual void clear () = 0;
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      virtual std::size_t memory_consumption () const = 0;
+
+      /**
+       * Pointer to the DoFHandler object that the vector is based on.
+       */
+      SmartPointer<const DoFHandlerType> dof_handler;
+
+      /**
+       * Names of the components of this data vector.
+       */
+      const std::vector<std::string> names;
+
+      /**
+       * A vector that for each of the n_output_variables variables of the
+       * current data set indicates whether they are scalar fields, parts of a
+       * vector-field, or any of the other supported kinds of data.
+       */
+      const std::vector<DataComponentInterpretation::DataComponentInterpretation>
+      data_component_interpretation;
+
+      /**
+       * Pointer to a DataPostprocessing object which shall be applied to this
+       * data vector.
+       */
+      SmartPointer<const dealii::DataPostprocessor<DoFHandlerType::space_dimension> > postprocessor;
+
+      /**
+       * Number of output variables this dataset provides (either number of
+       * components in vector valued function / data vector or number of
+       * computed quantities, if DataPostprocessor is applied). This variable
+       * is determined via and thus equivalent to <tt>names.size()</tt>.
+       */
+      unsigned int n_output_variables;
+    };
+
+
+    /**
+     * A data structure that holds all data needed in one thread when building
+     * patches in parallel. These data structures are created globally rather
+     * than on each cell to avoid allocation of memory in the threads. This is
+     * a base class for the AdditionalData kind of data structure discussed in
+     * the documentation of the WorkStream class.
+     *
+     * The <code>cell_to_patch_index_map</code> is an array that stores for
+     * index <tt>[i][j]</tt> the number of the patch that associated with the
+     * cell with index @p j on level @p i. This information is set up prior to
+     * generation of the patches, and is needed to generate neighborship
+     * information.
+     *
+     * This structure is used by several of the DataOut* classes, which
+     * derived their own ParallelData classes from it for additional fields.
+     */
+    template <int dim, int spacedim>
+    struct ParallelDataBase
+    {
+      ParallelDataBase (const unsigned int n_datasets,
+                        const unsigned int n_subdivisions,
+                        const std::vector<unsigned int> &n_postprocessor_outputs,
+                        const Mapping<dim,spacedim> &mapping,
+                        const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                        const UpdateFlags update_flags,
+                        const bool        use_face_values);
+
+      ParallelDataBase (const ParallelDataBase &data);
+
+      template <typename DoFHandlerType>
+      void reinit_all_fe_values(std::vector<std_cxx11::shared_ptr<DataEntryBase<DoFHandlerType> > > &dof_data,
+                                const typename dealii::Triangulation<dim,spacedim>::cell_iterator &cell,
+                                const unsigned int face = numbers::invalid_unsigned_int);
+
+      const FEValuesBase<dim,spacedim> &
+      get_present_fe_values(const unsigned int dataset) const;
+
+      void resize_system_vectors(const unsigned int n_components);
+
+      const unsigned int n_datasets;
+      const unsigned int n_subdivisions;
+
+      std::vector<double>                                patch_values;
+      std::vector<dealii::Vector<double> >               patch_values_system;
+      std::vector<Tensor<1,spacedim> >                   patch_gradients;
+      std::vector<std::vector<Tensor<1,spacedim> > >     patch_gradients_system;
+      std::vector<Tensor<2,spacedim> >                   patch_hessians;
+      std::vector<std::vector<Tensor<2,spacedim> > >     patch_hessians_system;
+      std::vector<std::vector<dealii::Vector<double> > > postprocessed_values;
+
+      const dealii::hp::MappingCollection<dim,spacedim> mapping_collection;
+      const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > finite_elements;
+      const UpdateFlags update_flags;
+
+      std::vector<std_cxx11::shared_ptr<dealii::hp::FEValues<dim,spacedim> > > x_fe_values;
+      std::vector<std_cxx11::shared_ptr<dealii::hp::FEFaceValues<dim,spacedim> > > x_fe_face_values;
+    };
+  }
+}
+
+
+//TODO: Most of the documentation of DataOut_DoFData applies to DataOut.
+
+/**
+ * This is an abstract class which provides the functionality to generate
+ * patches for output by base classes from data vectors on a grid. It allows
+ * to attach one or more pointers to a DoFHandler and attached node and cell
+ * data denoting functions on the grid which shall later be written in any of
+ * the implemented data formats.
+ *
+ *
+ * <h3>User visible interface</h3>
+ *
+ * The user visible interface of this class allows the user to specify data in
+ * two different ways. One is to make a DoFHandler object known to this class
+ * and to add data vectors that all correspond to this DoFHandler or the grid
+ * cells which will later be written to a file in some format. The second
+ * approach is to pass a DoFHandler object along with the vector. This allows
+ * setting data from different DoFHandlers in a neat way (of course, they both
+ * need to be based on the same triangulation). Instead of pondering about the
+ * different functions, an example for the first kind is probably the best
+ * explanation:
+ * @code
+ *   ...
+ *   ...   // compute solution, which contains nodal values
+ *   ...
+ *   ...   // compute error_estimator, which contains one value per cell
+ *
+ *   std::vector<std::string> solution_names;
+ *   solution_names.push_back ("x-displacement");
+ *   solution_names.push_back ("y-displacement");
+ *
+ *   DataOut<dim> data_out;
+ *   data_out.attach_dof_handler (dof_handler);
+ *   data_out.add_data_vector (solution, solution_names);
+ *   data_out.add_data_vector (error_estimator, "estimated_error");
+ *
+ *   data_out.build_patches ();
+ *
+ *   ofstream output_file ("output");
+ *   data_out.write_xxx (output_file);
+ *
+ *   data_out.clear();
+ * @endcode
+ *
+ * attach_dof_handler() tells this class that all future operations are to
+ * take place with the DoFHandler object and the triangulation it lives on. We
+ * then add the solution vector and the error estimator; note that they have
+ * different dimensions, because the solution is a nodal vector, here
+ * consisting of two components ("x-displacement" and "y-displacement") while
+ * the error estimator probably is a vector holding cell data. When attaching
+ * a data vector, you have to give a name to each component of the vector,
+ * which is done through an object of type <tt>vector<string></tt> as second
+ * argument; if only one component is in the vector, for example if we are
+ * adding cell data as in the second case, or if the finite element used by
+ * the DoFHandler has only one component, then you can use the second
+ * add_data_vector() function which takes a @p string instead of the
+ * <tt>vector<string></tt>.
+ *
+ * The add_data_vector() functions have additional arguments (with default
+ * values) that can be used to specify certain transformations. In particular,
+ * it allows to attach DataPostprocessor arguments to compute derived
+ * information from a data vector at each point at which the field will be
+ * evaluated so that it can be written to a file (for example, the Mach number
+ * in hypersonic flow can be computed from density and velocities; step-29
+ * also shows an example); another piece of information specified through
+ * arguments with default values is how certain output components should be
+ * interpreted, i.e. whether each component of the data is logically an
+ * independent scalar field, or whether some of them together form logically a
+ * vector-field (see the
+ * DataComponentInterpretation::DataComponentInterpretation enum, and the
+ * @ref step_22 "step-22"
+ * tutorial program).
+ *
+ * This class does not copy the vector given to it through the
+ * add_data_vector() functions, for memory consumption reasons. It only stores
+ * a reference to it, so it is in your responsibility to make sure that the
+ * data vectors exist long enough.
+ *
+ * After adding all data vectors, you need to call a function which generates
+ * the patches (i.e., some intermediate data representation) for output from
+ * the stored data. Derived classes name this function build_patches().
+ * Finally, you write() the data in one format or other, to a file.
+ *
+ * In the example above, an object of type DataOut was used, i.e. an object of
+ * a derived class. This is necessary since the current class does not provide
+ * means to actually generate the patches, only aids to store and access data.
+ * Any real functionality is implemented in derived classes such as DataOut.
+ *
+ * Note that the base class of this class, DataOutInterface offers several
+ * functions to ease programming with run-time determinable output formats
+ * (i.e. you need not use a fixed format by calling
+ * DataOutInterface::write_xxx in the above example, but you can select it by
+ * a run-time parameter without having to write the <tt>if () ... else
+ * ...</tt> clauses yourself), and also functions and classes offering ways to
+ * control the appearance of the output by setting flags for each output
+ * format.
+ *
+ *
+ * <h3>Information for derived classes</h3>
+ *
+ * What this class lacks is a way to produce the patches for output itself,
+ * from the stored data and degree of freedom information. Since this task is
+ * often application dependent it is left to derived classes. For example, in
+ * many applications, it might be wanted to limit the depth of output to a
+ * certain number of refinement levels and write data from finer cells only in
+ * a way interpolated to coarser cells, to reduce the amount of output. Also,
+ * it might be wanted to use different numbers of subdivisions on different
+ * cells when forming a patch, for example to accomplish for different
+ * polynomial degrees of the trial space on different cells. Also, the output
+ * need not necessarily consist of a patch for each cell, but might be made up
+ * of patches for faces, of other things. Take a look at derived classes to
+ * what is possible in this respect.
+ *
+ * For this reason, it is left to a derived class to provide a function, named
+ * usually build_patches() or the like, which fills the #patches array of this
+ * class.
+ *
+ * Regarding the templates of this class, it needs three values: first the
+ * space dimension in which the triangulation and the DoF handler operate,
+ * second the dimension of the objects which the patches represent.  Although
+ * in most cases they are equal, there are also classes for which this does
+ * not hold, for example if one outputs the result of a computation exploiting
+ * rotational symmetry in the original domain (in which the space dimension of
+ * the output would be one higher than that of the DoF handler, see the
+ * DataOut_Rotation() class), or one might conceive that one could write a
+ * class that only outputs the solution on a cut through the domain, in which
+ * case the space dimension of the output is less than that of the DoF
+ * handler. The last template argument denotes the dimension of the space into
+ * which the patches are embedded; usually, this dimension is the same as the
+ * dimensio of the patches themselves (which is also the default value of the
+ * template parameter), but there might be cases where this is not so. For
+ * example, in the DataOut_Faces() class, patches are generated from faces of
+ * the triangulation. Thus, the dimension of the patch is one less than the
+ * dimension of the embedding space, which is, in this case, equal to the
+ * dimension of the triangulation and DoF handler. However, for the cut
+ * through the domain mentioned above, if the cut is a straight one, then the
+ * cut can be embedded into a space of one dimension lower than the dimension
+ * of the triangulation, so that the last template parameter has the same
+ * value as the second one.
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 1999
+ */
+template <typename DoFHandlerType, int patch_dim, int patch_space_dim=patch_dim>
+class DataOut_DoFData : public DataOutInterface<patch_dim,patch_space_dim>
+{
+public:
+
+  /**
+   * Typedef to the iterator type of the dof handler class under
+   * consideration.
+   */
+  typedef typename Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::cell_iterator cell_iterator;
+  typedef typename Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::active_cell_iterator active_cell_iterator;
+
+public:
+
+  /**
+   * Type describing what the vector given to add_data_vector() is: a vector
+   * that has one entry per degree of freedom in a DoFHandler object (such as
+   * solution vectors), or one entry per cell in the triangulation underlying
+   * the DoFHandler object (such as error per cell data). The value
+   * #type_automatic tells add_data_vector() to find out itself (see the
+   * documentation of add_data_vector() for the method used).
+   */
+  enum DataVectorType
+  {
+    /**
+     * Data vector entries are associated to degrees of freedom
+     */
+    type_dof_data,
+
+    /**
+     * Data vector entries are one per grid cell
+     */
+    type_cell_data,
+
+    /**
+     * Find out automatically
+     */
+    type_automatic
+  };
+
+  /**
+   * Constructor
+   */
+  DataOut_DoFData ();
+
+  /**
+   * Destructor.
+   */
+  virtual ~DataOut_DoFData ();
+
+  /**
+   * Designate a dof handler to be used to extract geometry data and the
+   * mapping between nodes and node values. This call is not necessary if all
+   * added data vectors are supplemented with a DoFHandler argument.
+   *
+   * This call is optional: If you add data vectors with specified DoFHandler
+   * object, then that contains all information needed to generate the output.
+   */
+  void attach_dof_handler (const DoFHandlerType &);
+
+  /**
+   * Designate a triangulation to be used to extract geometry data and the
+   * mapping between nodes and node values.
+   *
+   * This call is optional: If you add data vectors with specified DoFHandler
+   * object, then that contains all information needed to generate the output.
+   * This call is useful when you only output cell vectors and no DoFHandler
+   * at all, in which case it provides the geometry.
+   */
+  void attach_triangulation (const Triangulation<DoFHandlerType::dimension,
+                             DoFHandlerType::space_dimension> &);
+
+  /**
+   * Add a data vector together with its name.
+   *
+   * A pointer to the vector is stored, so you have to make sure the vector
+   * exists at that address at least as long as you call the <tt>write_*</tt>
+   * functions.
+   *
+   * It is assumed that the vector has the same number of components as there
+   * are degrees of freedom in the dof handler, in which case it is assumed to
+   * be a vector storing nodal data; or the size may be the number of active
+   * cells on the present grid, in which case it is assumed to be a cell data
+   * vector. As the number of degrees of freedom and of cells is usually not
+   * equal, the function can determine itself which type of vector it is
+   * given. However, there are corner cases where this automatic determination
+   * does not work.  One example is if you compute with piecewise constant
+   * elements and have a scalar solution, then there are as many cells as
+   * there are degrees of freedom (though they may be numbered differently).
+   * Another possibility is if you have a 1d mesh embedded in 2d space and the
+   * mesh consists of a closed curve of cells; in this case, there are as many
+   * nodes as there are cells, and when using a Q1 element you will have as
+   * many degrees of freedom as there are cells.  In these cases, you can
+   * change the last argument of the function from its default value
+   * #type_automatic to either #type_dof_data or #type_cell_data, depending on
+   * what the vector represents. Apart from such corner cases, you can leave
+   * the argument at its default value and let the function determine the type
+   * of the vector itself.
+   *
+   * If it is a vector holding DoF data, the names given shall be one for each
+   * component of the underlying finite element.  If it is a finite element
+   * composed of only one subelement, then there is another function following
+   * which takes a single name instead of a vector of names.
+   *
+   * The data_component_interpretation argument contains information about how
+   * the individual components of output files that consist of more than one
+   * data set are to be interpreted.
+   *
+   * For example, if one has a finite element for the Stokes equations in 2d,
+   * representing components (u,v,p), one would like to indicate that the
+   * first two, u and v, represent a logical vector so that later on when we
+   * generate graphical output we can hand them off to a visualization program
+   * that will automatically know to render them as a vector field, rather
+   * than as two separate and independent scalar fields.
+   *
+   * The default value of this argument (i.e. an empty vector) corresponds is
+   * equivalent to a vector of values
+   * DataComponentInterpretation::component_is_scalar, indicating that all
+   * output components are independent scalar fields. However, if the given
+   * data vector represents logical vectors, you may pass a vector that
+   * contains values DataComponentInterpretation::component_is_part_of_vector.
+   * In the example above, one would pass in a vector with components
+   * (DataComponentInterpretation::component_is_part_of_vector,
+   * DataComponentInterpretation::component_is_part_of_vector,
+   * DataComponentInterpretation::component_is_scalar) for (u,v,p).
+   *
+   * The names of a data vector shall only contain characters which are
+   * letters, underscore and a few other ones. Refer to the
+   * ExcInvalidCharacter exception declared in this class to see which
+   * characters are valid and which are not.
+   *
+   * @note The actual type for the vector argument may be any vector type from
+   * which FEValues can extract values on a cell using the
+   * FEValuesBase::get_function_values() function. In particular, this
+   * includes all of the usual vector types, but also IndexSet (see step-41
+   * for a use of this).
+   */
+  template <class VectorType>
+  void add_data_vector (const VectorType               &data,
+                        const std::vector<std::string> &names,
+                        const DataVectorType           type = type_automatic,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation
+                        = std::vector<DataComponentInterpretation::DataComponentInterpretation>());
+
+  /**
+   * This function is an abbreviation to the above one (see there for a
+   * discussion of the various arguments), intended for use with finite
+   * elements that are not composed of subelements. In this case, only one
+   * name per data vector needs to be given, which is what this function
+   * takes. It simply relays its arguments after a conversion of the @p name
+   * to a vector of strings, to the other add_data_vector() function above.
+   *
+   * If @p data is a vector with multiple components this function will
+   * generate distinct names for all components by appending an underscore and
+   * the number of each component to @p name
+   *
+   * The actual type for the template argument may be any vector type from
+   * which FEValues can extract values on a cell using the
+   * FEValuesBase::get_function_values() function.
+   */
+  template <class VectorType>
+  void add_data_vector (const VectorType     &data,
+                        const std::string    &name,
+                        const DataVectorType type = type_automatic,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation
+                        = std::vector<DataComponentInterpretation::DataComponentInterpretation>());
+
+  /**
+   * This function is an extension of the above one (see there for a
+   * discussion of the arguments except the first one) and allows to set a
+   * vector with its own DoFHandler object. This DoFHandler needs to be
+   * compatible with the other DoFHandler objects assigned with calls to @p
+   * add_data_vector or @p attach_dof_handler, in the sense that the
+   * DoFHandler objects need to be based on the same triangulation. This
+   * function allows you to export data from multiple DoFHandlers that
+   * describe different solution components.
+   *
+   * Since this function takes a DoFHandler object and hence naturally
+   * represents dof data, the data vector type argument present in the other
+   * methods above is skipped.
+   */
+  template <class VectorType>
+  void add_data_vector (const DoFHandlerType           &dof_handler,
+                        const VectorType               &data,
+                        const std::vector<std::string> &names,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation
+                        = std::vector<DataComponentInterpretation::DataComponentInterpretation>());
+
+
+  /**
+   * This function is an abbreviation of the function above with only a scalar
+   * @p dof_handler given and a single data name.
+   */
+  template <class VectorType>
+  void add_data_vector (const DoFHandlerType          &dof_handler,
+                        const VectorType  &data,
+                        const std::string &name,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation
+                        = std::vector<DataComponentInterpretation::DataComponentInterpretation>());
+
+  /**
+   * This function is an alternative to the above ones, allowing the output of
+   * derived quantities instead of the given data. This conversion has to be
+   * done in a class derived from DataPostprocessor. This function is used in
+   * step-29. Other uses are shown in step-32 and step-33.
+   *
+   * The names for these derived quantities are provided by the @p
+   * data_postprocessor argument. Likewise, the data_component_interpretation
+   * argument of the other add_data_vector() functions is provided by the
+   * data_postprocessor argument. As only data of type @p type_dof_data can be
+   * transformed, this type is also known implicitly and does not have to be
+   * given.
+   *
+   * @note The actual type for the vector argument may be any vector type from
+   * which FEValues can extract values on a cell using the
+   * FEValuesBase::get_function_values() function. In particular, this
+   * includes all of the usual vector types, but also IndexSet (see step-41
+   * for a use of this).
+   *
+   * @note The DataPostprocessor object (i.e., in reality the object of your
+   * derived class) has to live until the DataOut object is destroyed as the
+   * latter keeps a pointer to the former and will complain if the object
+   * pointed to is destroyed while the latter still has a pointer to it. If
+   * both the data postprocessor and DataOut objects are local variables of a
+   * function (as they are, for example, in step-29), then you can avoid this
+   * error by declaring the data postprocessor variable before the DataOut
+   * variable as objects are destroyed in reverse order of declaration.
+   */
+  template <class VectorType>
+  void add_data_vector (const VectorType                             &data,
+                        const DataPostprocessor<DoFHandlerType::space_dimension> &data_postprocessor);
+
+  /**
+   * Same function as above, but with a DoFHandler object that does not need
+   * to coincide with the DoFHandler initially set. Note that the
+   * postprocessor can only read data from the given DoFHandler and solution
+   * vector, not other solution vectors or DoFHandlers.
+   */
+  template <class VectorType>
+  void add_data_vector (const DoFHandlerType                         &dof_handler,
+                        const VectorType                             &data,
+                        const DataPostprocessor<DoFHandlerType::space_dimension> &data_postprocessor);
+
+  /**
+   * Release the pointers to the data vectors. This allows output of a new set
+   * of vectors without supplying the DoF handler again. Therefore, the
+   * DataOut object can be used in an algebraic context. Note that besides the
+   * data vectors also the patches already computed are deleted.
+   */
+  void clear_data_vectors ();
+
+  /**
+   * Release pointers to all input data elements, i.e. pointers to data
+   * vectors and to the DoF handler object. This function may be useful when
+   * you have called the @p build_patches function of derived class, since
+   * then the patches are built and the input data is no more needed, nor is
+   * there a need to reference it. You can then output the patches detached
+   * from the main thread and need not make sure anymore that the DoF handler
+   * object and vectors must not be deleted before the output thread is
+   * finished.
+   */
+  void clear_input_data_references ();
+
+  /**
+   * This function can be used to merge the patches that were created using
+   * the @p build_patches function of the object given as argument into the
+   * list of patches created by this object. This is sometimes handy if one
+   * has, for example, a domain decomposition algorithm where each block is
+   * represented by a DoFHandler of its own, but one wants to output the
+   * solution on all the blocks at the same time.
+   *
+   * For this to work, the given argument and this object need to have the
+   * same number of output vectors, and they need to use the same number of
+   * subdivisions per patch. The output will probably look rather funny if
+   * patches in both objects overlap in space.
+   *
+   * If you call build_patches() for this object after merging in patches, the
+   * previous state is overwritten, and the merged-in patches are lost.
+   *
+   * The second parameter allows to shift each node of the patches in the
+   * object passed in in the first parameter by a certain amount. This is
+   * sometimes useful to generate "exploded" views of a collection of blocks.
+   *
+   * This function will fail if either this or the other object did not yet
+   * set up any patches.
+   */
+  template <typename DoFHandlerType2>
+  void merge_patches (const DataOut_DoFData<DoFHandlerType2,patch_dim,patch_space_dim> &source,
+                      const Point<patch_space_dim> &shift = Point<patch_space_dim>());
+
+  /**
+   * Release the pointers to the data vectors and the DoF handler. You have to
+   * set all data entries again using the add_data_vector() function. The
+   * pointer to the dof handler is cleared as well, along with all other data.
+   * In effect, this function resets everything to a virgin state.
+   */
+  virtual void clear ();
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+protected:
+  /**
+   * Abbreviate the somewhat lengthy name for the Patch class.
+   */
+  typedef dealii::DataOutBase::Patch<patch_dim,patch_space_dim> Patch;
+
+  /**
+   * Pointer to the triangulation object.
+   */
+  SmartPointer<const Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> > triangulation;
+
+  /**
+   * Pointer to the optional handler object.
+   */
+  SmartPointer<const DoFHandlerType> dofs;
+
+  /**
+   * List of data elements with vectors of values for each degree of freedom.
+   */
+  std::vector<std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> > >  dof_data;
+
+  /**
+   * List of data elements with vectors of values for each cell.
+   */
+  std::vector<std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> > >  cell_data;
+
+  /**
+   * This is a list of patches that is created each time build_patches() is
+   * called. These patches are used in the output routines of the base
+   * classes.
+   */
+  std::vector<Patch> patches;
+
+  /**
+   * Function by which the base class's functions get to know what patches
+   * they shall write to a file.
+   */
+  virtual
+  const std::vector<Patch> &get_patches () const;
+
+  /**
+   * Virtual function through which the names of data sets are obtained by the
+   * output functions of the base class.
+   */
+  virtual
+  std::vector<std::string> get_dataset_names () const;
+
+  /**
+   * Extracts the finite elements stored in the dof_data object, including a
+   * dummy object of FE_DGQ<dim>(0) in case only the triangulation is used.
+   */
+  std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<DoFHandlerType::dimension,DoFHandlerType::space_dimension> > >
+  get_finite_elements() const;
+
+  /**
+   * Overload of the respective DataOutInterface::get_vector_data_ranges()
+   * function. See there for a more extensive documentation.
+   */
+  virtual
+  std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+  get_vector_data_ranges () const;
+
+  /**
+   * Make all template siblings friends. Needed for the merge_patches()
+   * function.
+   */
+  template <class, int, int>
+  friend class DataOut_DoFData;
+};
+
+
+
+// -------------------- template and inline functions ------------------------
+
+
+template <typename DoFHandlerType, int patch_dim, int patch_space_dim>
+template <typename DoFHandlerType2>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+merge_patches (const DataOut_DoFData<DoFHandlerType2,patch_dim,patch_space_dim> &source,
+               const Point<patch_space_dim> &shift)
+{
+  const std::vector<Patch> source_patches = source.get_patches ();
+  Assert ((patches.size () != 0) &&
+          (source_patches.size () != 0),
+          ExcMessage ("When calling this function, both the current "
+                      "object and the one being merged need to have a "
+                      "nonzero number of patches associated with it. "
+                      "Either you called this function on objects that "
+                      "are empty, or you may have forgotten to call "
+                      "the 'build_patches()' function."));
+  // check equality of component
+  // names
+  Assert (get_dataset_names() == source.get_dataset_names(),
+          Exceptions::DataOut::ExcIncompatibleDatasetNames());
+  // make sure patches are compatible. we'll
+  // assume that if the first respective
+  // patches are ok that all the other ones
+  // are ok as well
+  Assert (patches[0].n_subdivisions == source_patches[0].n_subdivisions,
+          Exceptions::DataOut::ExcIncompatiblePatchLists());
+  Assert (patches[0].data.n_rows() == source_patches[0].data.n_rows(),
+          Exceptions::DataOut::ExcIncompatiblePatchLists());
+  Assert (patches[0].data.n_cols() == source_patches[0].data.n_cols(),
+          Exceptions::DataOut::ExcIncompatiblePatchLists());
+
+  // check equality of the vector data
+  // specifications
+  Assert (get_vector_data_ranges().size() ==
+          source.get_vector_data_ranges().size(),
+          ExcMessage ("Both sources need to declare the same components "
+                      "as vectors."));
+  for (unsigned int i=0; i<get_vector_data_ranges().size(); ++i)
+    {
+      Assert (std_cxx11::get<0>(get_vector_data_ranges()[i]) ==
+              std_cxx11::get<0>(source.get_vector_data_ranges()[i]),
+              ExcMessage ("Both sources need to declare the same components "
+                          "as vectors."));
+      Assert (std_cxx11::get<1>(get_vector_data_ranges()[i]) ==
+              std_cxx11::get<1>(source.get_vector_data_ranges()[i]),
+              ExcMessage ("Both sources need to declare the same components "
+                          "as vectors."));
+      Assert (std_cxx11::get<2>(get_vector_data_ranges()[i]) ==
+              std_cxx11::get<2>(source.get_vector_data_ranges()[i]),
+              ExcMessage ("Both sources need to declare the same components "
+                          "as vectors."));
+    }
+
+  // merge patches. store old number
+  // of elements, since we need to
+  // adjust patch numbers, etc
+  // afterwards
+  const unsigned int old_n_patches = patches.size();
+  patches.insert (patches.end(),
+                  source_patches.begin(),
+                  source_patches.end());
+
+  // perform shift, if so desired
+  if (shift != Point<patch_space_dim>())
+    for (unsigned int i=old_n_patches; i<patches.size(); ++i)
+      for (unsigned int v=0; v<GeometryInfo<patch_dim>::vertices_per_cell; ++v)
+        patches[i].vertices[v] += shift;
+
+  // adjust patch numbers
+  for (unsigned int i=old_n_patches; i<patches.size(); ++i)
+    patches[i].patch_index += old_n_patches;
+
+  // adjust patch neighbors
+  for (unsigned int i=old_n_patches; i<patches.size(); ++i)
+    for (unsigned int n=0; n<GeometryInfo<patch_dim>::faces_per_cell; ++n)
+      if (patches[i].neighbors[n] != Patch::no_neighbor)
+        patches[i].neighbors[n] += old_n_patches;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/data_out_faces.h b/include/deal.II/numerics/data_out_faces.h
new file mode 100644
index 0000000..a82feb8
--- /dev/null
+++ b/include/deal.II/numerics/data_out_faces.h
@@ -0,0 +1,238 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_out_faces_h
+#define dealii__data_out_faces_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <string>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DataOutFaces
+  {
+    /**
+     * A derived class for use in the DataOutFaces class. This is a class for
+     * the AdditionalData kind of data structure discussed in the
+     * documentation of the WorkStream context.
+     */
+    template <int dim, int spacedim>
+    struct ParallelData : public internal::DataOut::ParallelDataBase<dim,spacedim>
+    {
+      ParallelData (const unsigned int n_datasets,
+                    const unsigned int n_subdivisions,
+                    const std::vector<unsigned int> &n_postprocessor_outputs,
+                    const Mapping<dim,spacedim> &mapping,
+                    const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                    const UpdateFlags update_flags);
+
+      std::vector<Point<dim> > patch_normals;
+      std::vector<Point<spacedim> > patch_evaluation_points;
+    };
+  }
+}
+
+
+/**
+ * This class generates output from faces of a triangulation. It might be used
+ * to generate output only for the surface of the triangulation (this is the
+ * default of this class), or for all faces of active cells, as specified in
+ * the constructor. The output of this class is a set of patches (as defined
+ * by the class DataOutBase::Patch()), one for each face for which output is
+ * to be generated. These patches can then be written in several graphical
+ * data formats by the functions of the underlying classes.
+ *
+ * <h3>Interface</h3>
+ *
+ * The interface of this class is copied from the DataOut class. Furthermore,
+ * they share the common parent class DataOut_DoFData. See the reference of
+ * these two classes for a discussion of the interface.
+ *
+ *
+ * <h3>Extending this class</h3>
+ *
+ * The sequence of faces to generate patches from is generated in the same way
+ * as in the DataOut class; see there for a description of the respective
+ * interface. The functions generating the sequence of faces which shall be
+ * used to generate output, are called first_face() and next_face().
+ *
+ * Since we need to initialize objects of type FEValues with the faces
+ * generated from these functions, it is not sufficient that they only return
+ * face iterators. Rather, we need a pair of cell and the number of the face,
+ * as the values of finite element fields need not necessarily be unique on a
+ * face (think of discontinuous finite elements, where the value of the finite
+ * element field depend on the direction from which you approach a face, thus
+ * it is necessary to use a pair of cell and face, rather than only a face
+ * iterator). Therefore, this class defines a @p typedef which creates a type
+ * @p FaceDescriptor that is an abbreviation for a pair of cell iterator and
+ * face number. The functions @p first_face and @p next_face operate on
+ * objects of this type.
+ *
+ * Extending this class might, for example, be useful if you only want output
+ * from certain portions of the boundary, e.g. as indicated by the boundary
+ * indicator of the respective faces. However, it is also conceivable that one
+ * generates patches not from boundary faces, but from interior faces that are
+ * selected due to other criteria; one application might be to use only those
+ * faces where one component of the solution attains a certain value, in order
+ * to display the values of other solution components on these faces. Other
+ * applications certainly exist, for which the author is not imaginative
+ * enough.
+ *
+ * @pre This class only makes sense if the first template argument,
+ * <code>dim</code> equals the dimension of the DoFHandler type given as the
+ * second template argument, i.e., if <code>dim ==
+ * DoFHandlerType::dimension</code>. This redundancy is a historical relic
+ * from the time where the library had only a single DoFHandler class and this
+ * class consequently only a single template argument.
+ *
+ * @todo Reimplement this whole class using actual FEFaceValues and
+ * MeshWorker.
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, Guido Kanschat, 2000, 2011
+ */
+template <int dim, typename DoFHandlerType=DoFHandler<dim> >
+class DataOutFaces : public DataOut_DoFData<DoFHandlerType,DoFHandlerType::dimension-1,
+  DoFHandlerType::dimension>
+{
+public:
+  /**
+   * An abbreviation for the dimension of the DoFHandler object we work with.
+   * Faces are then <code>dimension-1</code> dimensional objects.
+   */
+  static const unsigned int dimension = DoFHandlerType::dimension;
+
+  /**
+   * An abbreviation for the spatial dimension within which the triangulation
+   * and DoFHandler are embedded in.
+   */
+  static const unsigned int space_dimension = DoFHandlerType::space_dimension;
+
+  /**
+   * Typedef to the iterator type of the dof handler class under
+   * consideration.
+   */
+  typedef typename DataOut_DoFData<DoFHandlerType,dimension-1,
+          dimension>::cell_iterator cell_iterator;
+
+  /**
+   * Constructor determining whether a surface mesh (default) or the whole
+   * wire basket is written.
+   */
+  DataOutFaces (const bool surface_only = true);
+
+  /**
+   * This is the central function of this class since it builds the list of
+   * patches to be written by the low-level functions of the base class. A
+   * patch is, in essence, some intermediate representation of the data on
+   * each face of a triangulation and DoFHandler object that can then be used
+   * to write files in some format that is readable by visualization programs.
+   *
+   * You can find an overview of the use of this function in the general
+   * documentation of this class. An example is also provided in the
+   * documentation of this class's base class DataOut_DoFData.
+   *
+   * @param n_subdivisions See DataOut::build_patches() for an extensive
+   * description of this parameter.
+   */
+  virtual void
+  build_patches (const unsigned int n_subdivisions = 0);
+
+  /**
+   * Same as above, except that the additional first parameter defines a
+   * mapping that is to be used in the generation of output. If
+   * <tt>n_subdivisions>1</tt>, the points interior of subdivided patches
+   * which originate from cells at the boundary of the domain can be computed
+   * using the mapping, i.e. a higher order mapping leads to a representation
+   * of a curved boundary by using more subdivisions.
+   *
+   * Even for non-curved cells the mapping argument can be used for the
+   * Eulerian mappings (see class MappingQ1Eulerian) where a mapping is used
+   * not only to determine the position of points interior to a cell, but also
+   * of the vertices.  It offers an opportunity to watch the solution on a
+   * deformed triangulation on which the computation was actually carried out,
+   * even if the mesh is internally stored in its undeformed configuration and
+   * the deformation is only tracked by an additional vector that holds the
+   * deformation of each vertex.
+   *
+   * @todo The @p mapping argument should be replaced by a
+   * hp::MappingCollection in case of a hp::DoFHandler.
+   */
+  virtual void build_patches (const Mapping<dimension> &mapping,
+                              const unsigned int n_subdivisions = 0);
+
+  /**
+   * Declare a way to describe a face which we would like to generate output
+   * for. The usual way would, of course, be to use an object of type
+   * <tt>DoFHandler<dim>::face_iterator</tt>, but since we have to describe
+   * faces to objects of type FEValues, we can only represent faces by pairs
+   * of a cell and the number of the face. This pair is here aliased to a name
+   * that is better to type.
+   */
+  typedef typename std::pair<cell_iterator,unsigned int> FaceDescriptor;
+
+
+  /**
+   * Return the first face which we want output for. The default
+   * implementation returns the first face of an active cell or the first such
+   * on the boundary.
+   *
+   * For more general sets, overload this function in a derived class.
+   */
+  virtual FaceDescriptor first_face ();
+
+  /**
+   * Return the next face after which we want output for. If there are no more
+   * faces, <tt>dofs->end()</tt> is returned as the first component of the
+   * return value.
+   *
+   * The default implementation returns the next face of an active cell, or
+   * the next such on the boundary.
+   *
+   * This function traverses the mesh cell by cell (active only), and then
+   * through all faces of the cell. As a result, interior faces are output
+   * twice.  This function can be overloaded in a derived class to select a
+   * different set of faces. Note that the default implementation assumes that
+   * the given @p face is active, which is guaranteed as long as first_face()
+   * is also used from the default implementation. Overloading only one of the
+   * two functions should be done with care.
+   */
+  virtual FaceDescriptor next_face (const FaceDescriptor &face);
+
+private:
+  /**
+   * Parameter deciding between surface meshes and full wire basket.
+   */
+  const bool surface_only;
+
+  /**
+   * Build one patch. This function is called in a WorkStream context.
+   */
+  void build_one_patch (const FaceDescriptor *cell_and_face,
+                        internal::DataOutFaces::ParallelData<dimension, dimension> &data,
+                        DataOutBase::Patch<dimension-1,space_dimension> &patch);
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/data_out_rotation.h b/include/deal.II/numerics/data_out_rotation.h
new file mode 100644
index 0000000..eeb10e8
--- /dev/null
+++ b/include/deal.II/numerics/data_out_rotation.h
@@ -0,0 +1,209 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_out_rotation_h
+#define dealii__data_out_rotation_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/numerics/data_out_dof_data.h>
+
+#include <string>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DataOutRotation
+  {
+    /**
+     * A derived class for use in the DataOutFaces class. This is a class for
+     * the AdditionalData kind of data structure discussed in the
+     * documentation of the WorkStream class.
+     */
+    template <int dim, int spacedim>
+    struct ParallelData : public internal::DataOut::ParallelDataBase<dim,spacedim>
+    {
+      ParallelData (const unsigned int n_datasets,
+                    const unsigned int n_subdivisions,
+                    const unsigned int n_patches_per_circle,
+                    const std::vector<unsigned int> &n_postprocessor_outputs,
+                    const Mapping<dim,spacedim> &mapping,
+                    const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                    const UpdateFlags update_flags);
+
+      const unsigned int n_patches_per_circle;
+
+      std::vector<Point<spacedim> > patch_evaluation_points;
+    };
+  }
+}
+
+
+
+/**
+ * This class generates output in the full domain of computations that were
+ * done using rotational symmetry of domain and solution. In particular, if a
+ * computation of a three dimensional problem with rotational symmetry around
+ * the @p z-axis (i.e. in the @p r-z-plane) was done, then this class can be
+ * used to generate the output in the original @p x-y-z space. In order to do
+ * so, it generates from each cell in the computational mesh a cell in the
+ * space with dimension one greater than that of the DoFHandler object. The
+ * resulting output will then consist of hexahedra forming an object that has
+ * rotational symmetry around the z-axis. As most graphical programs can not
+ * represent ring-like structures, the angular (rotation) variable is
+ * discretized into a finite number of intervals as well; the number of these
+ * intervals must be given to the @p build_patches function. It is noted,
+ * however, that while this function generates nice pictures of the whole
+ * domain, it often produces <em>very</em> large output files.
+ *
+ *
+ * <h3>Interface</h3>
+ *
+ * The interface of this class is copied from the DataOut class. Furthermore,
+ * they share the common parent class DataOut_DoFData(). See the reference of
+ * these two classes for a discussion of the interface and how to extend it by
+ * deriving further classes from this class.
+ *
+ *
+ * <h3>Details for 1d computations</h3>
+ *
+ * The one coordinate in the triangulation used by the DoFHandler object
+ * passed to this class is taken as the radial variable, and the output will
+ * then be either a circle or a ring domain. It is in the user's
+ * responsibility to assure that the radial coordinate only attains non-
+ * negative values.
+ *
+ *
+ * <h3>Details for 2d computations</h3>
+ *
+ * We consider the computation (represented by the DoFHandler object that is
+ * attached to this class) to have happened in the @p r-z-plane, where @p r is
+ * the radial variable and @p z denotes the axis of revolution around which
+ * the solution is symmetric. The output is in @p x-y-z space, where the
+ * radial dependence is transformed to the @p x-y plane. At present, it is not
+ * possible to exchange the meaning of the first and second variable of the
+ * plane in which the simulation was made, i.e. generate output from a
+ * simulation where the first variable denoted the symmetry axis, and the
+ * second denoted the radial variable. You have to take that into account when
+ * first programming your application.
+ *
+ * It is in the responsibility of the user to make sure that the radial
+ * variable attains only non-negative values.
+ *
+ * @pre This class only makes sense if the first template argument,
+ * <code>dim</code> equals the dimension of the DoFHandler type given as the
+ * second template argument, i.e., if <code>dim ==
+ * DoFHandlerType::dimension</code>. This redundancy is a historical relic
+ * from the time where the library had only a single DoFHandler class and this
+ * class consequently only a single template argument.
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 2000
+ */
+template <int dim, typename DoFHandlerType=DoFHandler<dim> >
+class DataOutRotation : public DataOut_DoFData<DoFHandlerType,DoFHandlerType::dimension+1>
+{
+public:
+  /**
+   * An abbreviation for the dimension of the DoFHandler object we work with.
+   * Faces are then <code>dimension-1</code> dimensional objects.
+   */
+  static const unsigned int dimension = DoFHandlerType::dimension;
+
+  /**
+   * An abbreviation for the spatial dimension within which the triangulation
+   * and DoFHandler are embedded in.
+   */
+  static const unsigned int space_dimension = DoFHandlerType::space_dimension;
+
+  /**
+   * Typedef to the iterator type of the dof handler class under
+   * consideration.
+   */
+  typedef typename DataOut_DoFData<DoFHandlerType,dimension+1>::cell_iterator cell_iterator;
+
+  /**
+   * This is the central function of this class since it builds the list of
+   * patches to be written by the low-level functions of the base class. A
+   * patch is, in essence, some intermediate representation of the data on
+   * each cell of a triangulation and DoFHandler object that can then be used
+   * to write files in some format that is readable by visualization programs.
+   *
+   * You can find an overview of the use of this function in the general
+   * documentation of this class. An example is also provided in the
+   * documentation of this class's base class DataOut_DoFData.
+   *
+   * @param n_patches_per_circle Denotes into how many intervals the angular
+   * (rotation) variable is to be subdivided.
+   *
+   * @param n_subdivisions See DataOut::build_patches() for an extensive
+   * description of this parameter.
+   */
+  virtual void build_patches (const unsigned int n_patches_per_circle,
+                              const unsigned int n_subdivisions = 0);
+
+  /**
+   * Return the first cell which we want output for. The default
+   * implementation returns the first
+   * @ref GlossActive "active cell",
+   * but you might want to return other cells in a derived class.
+   */
+  virtual cell_iterator first_cell ();
+
+  /**
+   * Return the next cell after @p cell which we want output for. If there are
+   * no more cells, <tt>dofs->end()</tt> shall be returned.
+   *
+   * The default implementation returns the next active cell, but you might
+   * want to return other cells in a derived class. Note that the default
+   * implementation assumes that the given @p cell is active, which is
+   * guaranteed as long as @p first_cell is also used from the default
+   * implementation. Overloading only one of the two functions might not be a
+   * good idea.
+   */
+  virtual cell_iterator next_cell (const cell_iterator &cell);
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcRadialVariableHasNegativeValues,
+                  double,
+                  << "You are attempting to use this class on a triangulation "
+                  "in which some vertices have a negative radial coordinate "
+                  "value of "
+                  << arg1
+                  << ". If you rotate such a triangulation around an "
+                  "axis, you will get (dim+1)-dimensional meshes "
+                  "that are not likely what you hoped to see.");
+
+private:
+  /**
+   * Builds every @p n_threads's patch. This function may be called in
+   * parallel. If multithreading is not used, the function is called once and
+   * generates all patches.
+   */
+  void
+  build_one_patch (const cell_iterator *cell,
+                   internal::DataOutRotation::ParallelData<dimension, space_dimension> &data,
+                   std::vector<DataOutBase::Patch<dimension+1,space_dimension+1> > &patches);
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/data_out_stack.h b/include/deal.II/numerics/data_out_stack.h
new file mode 100644
index 0000000..4a1e071
--- /dev/null
+++ b/include/deal.II/numerics/data_out_stack.h
@@ -0,0 +1,350 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_out_stack_h
+#define dealii__data_out_stack_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/data_out_base.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/numerics/data_out_dof_data.h>
+
+#include <string>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class DoFHandler;
+
+/**
+ * This class is used to stack the output from several computations into one
+ * output file by stacking the data sets in another co-ordinate direction
+ * orthogonal to the space directions. The most common use is to stack the
+ * results of several time steps into one space-time output file, or for
+ * example to connect the results of solutions of a parameter dependent
+ * equation for several parameter value together into one. The interface is
+ * mostly modelled after the DataOut class, see there for some more
+ * documentation.
+ *
+ * We will explain the concept for a time dependent problem, but instead of
+ * the time any parameter can be substituted. In our example, a solution of an
+ * equation is computed for each discrete time level. This is then added to an
+ * object of the present class and after all time levels are added, a space-
+ * time plot will be written in any of the output formats supported by the
+ * base class. Upon output, the (spatial) solution on each time level is
+ * extended into the time direction by writing it twice, once for the time
+ * level itself and once for a time equal to the time level minus a given time
+ * step. These two copies are connected, to form a space-time slab, with
+ * constant values in time.
+ *
+ * Due to the piecewise constant output in time, the written solution will in
+ * general be discontinuous at discrete time levels, but the output is still
+ * sufficient in most cases. More sophisticated interpolations in time may be
+ * added in the future.
+ *
+ *
+ * <h3>Example of Use</h3>
+ *
+ * The following little example shall illustrate the different steps of use of
+ * this class. It is assumed that the finite element used is composed of two
+ * components, @p u and @p v, that the solution vector is named @p solution
+ * and that a vector @p error is computed which contains an error indicator
+ * for each spatial cell.
+ *
+ * Note that unlike for the DataOut class it is necessary to first declare
+ * data vectors and the names of the components before first use. This is
+ * because on all time levels the same data should be present to produce
+ * reasonable time-space output. The output is generated with two subdivisions
+ * in each space and time direction, which is suitable for quadratic finite
+ * elements in space, for example.
+ *
+ * @code
+ *   DataOutStack<dim> data_out_stack;
+ *
+ *                                  // first declare the vectors
+ *                                  // to be used later
+ *   std::vector<std::string> solution_names;
+ *   solution_names.push_back ("u");
+ *   solution_names.push_back ("v");
+ *   data_out_stack.declare_data_vector (solution_names,
+ *                                       DataOutStack<dim>::dof_vector);
+ *   data_out_stack.declare_data_vector ("error",
+ *                                       DataOutStack<dim>::cell_vector);
+ *
+ *                                  // now do computations
+ *   for (double parameter=0; ...)
+ *     {
+ *       DoFHandler<dim,spacedim> dof_handler;
+ *       ...                        // compute something
+ *
+ *                                  // now for output
+ *       data_out_stack.new_parameter_value (parameter,
+ *                                           delta_parameter);
+ *       data_out_stack.attach_dof_handler (dof_handler);
+ *       data_out_stack.add_data_vector (solution, solution_names);
+ *       data_out_stack.add_data_vector (error, "error");
+ *       data_out_stack.build_patches (2);
+ *       data_out_stack.finish_parameter_value ();
+ *     };
+ * @endcode
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 1999
+ */
+template <int dim, int spacedim=dim, typename DoFHandlerType = DoFHandler<dim,spacedim> >
+class DataOutStack : public DataOutInterface<dim+1>
+{
+public:
+  /**
+   * Data type declaring the two types of vectors which are used in this
+   * class.
+   */
+  enum VectorType { cell_vector, dof_vector };
+
+  /**
+   * Destructor. Only declared to make it @p virtual.
+   */
+  virtual ~DataOutStack ();
+
+  /**
+   * Start the next set of data for a specific parameter value. The argument
+   * @p parameter_step denotes the interval (in backward direction, counted
+   * from @p parameter_value) with which the output will be extended in
+   * parameter direction, i.e. orthogonal to the space directions.
+   */
+  void new_parameter_value (const double parameter_value,
+                            const double parameter_step);
+
+  /**
+   * Attach the DoF handler for the grid and data associated with the
+   * parameter previously set by @p new_parameter_value.
+   *
+   * This has to happen before adding data vectors for the present parameter
+   * value.
+   */
+  void attach_dof_handler (const DoFHandlerType &dof_handler);
+
+  /**
+   * Declare a data vector. The @p vector_type argument determines whether the
+   * data vector will be considered as DoF or cell data.
+   *
+   * This version may be called if the finite element presently used by the
+   * DoFHandler (and previously attached to this object) has only one
+   * component and therefore only one name needs to be given.
+   */
+  void declare_data_vector (const std::string &name,
+                            const VectorType   vector_type);
+
+  /**
+   * Declare a data vector. The @p vector_type argument determines whether the
+   * data vector will be considered as DoF or cell data.
+   *
+   * This version must be called if the finite element presently used by the
+   * DoFHandler (and previously attached to this object) has more than one
+   * component and therefore more than one name needs to be given. However,
+   * you can also call this function with a
+   * <tt>std::vector@<std::string@></tt> containing only one element if the
+   * finite element has only one component.
+   */
+  void declare_data_vector (const std::vector<std::string> &name,
+                            const VectorType                vector_type);
+
+
+  /**
+   * Add a data vector for the presently set value of the parameter.
+   *
+   * This version may be called if the finite element presently used by the
+   * DoFHandler (and previously attached to this object) has only one
+   * component and therefore only one name needs to be given.
+   *
+   * If @p vec is a vector with multiple components this function will
+   * generate distinct names for all components by appending an underscore and
+   * the number of each component to @p name
+   *
+   * The data vector must have been registered using the @p
+   * declare_data_vector function before actually using it the first time.
+   *
+   * Note that a copy of this vector is stored until @p finish_parameter_value
+   * is called the next time, so if you are short of memory you may want to
+   * call this function only after all computations involving large matrices
+   * are already done.
+   */
+  template <typename number>
+  void add_data_vector (const Vector<number> &vec,
+                        const std::string    &name);
+
+  /**
+   * Add a data vector for the presently set value of the parameter.
+   *
+   * This version must be called if the finite element presently used by the
+   * DoFHandler (and previously attached to this object) has more than one
+   * component and therefore more than one name needs to be given. However,
+   * you can also call this function with a
+   * <tt>std::vector@<std::string@></tt> containing only one element if the
+   * finite element has only one component.
+   *
+   * The data vector must have been registered using the @p
+   * declare_data_vector function before actually using it the first time.
+   *
+   * Note that a copy of this vector is stored until @p finish_parameter_value
+   * is called the next time, so if you are short of memory you may want to
+   * call this function only after all computations involving large matrices
+   * are already done.
+   */
+  template <typename number>
+  void add_data_vector (const Vector<number>           &vec,
+                        const std::vector<std::string> &names);
+
+  /**
+   * This is the central function of this class since it builds the list of
+   * patches to be written by the low-level functions of the base class. A
+   * patch is, in essence, some intermediate representation of the data on
+   * each cell of a triangulation and DoFHandler object that can then be used
+   * to write files in some format that is readable by visualization programs.
+   *
+   * You can find an overview of the use of this function in the general
+   * documentation of this class. An example is also provided in the
+   * documentation of this class's base class DataOut_DoFData.
+   *
+   * @param n_subdivisions See DataOut::build_patches() for an extensive
+   * description of this parameter. The number of subdivisions is always one
+   * in the direction of the time-like parameter used by this class.
+   */
+  void build_patches (const unsigned int n_subdivisions = 0);
+
+  /**
+   * Release all data that is no more needed once @p build_patches was called
+   * and all other transactions for a given parameter value are done.
+   *
+   * Couterpart of @p new_parameter_value.
+   */
+  void finish_parameter_value ();
+
+  /**
+   * Clear all data presently stored in this object.
+   */
+  void clear ();
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Exception
+   */
+  DeclException1 (ExcVectorNotDeclared,
+                  std::string,
+                  << "The data vector for which the first component has the name "
+                  << arg1 << " has not been added before.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcDataNotCleared,
+                    "You cannot start a new time/parameter step before calling "
+                    "finish_parameter_value() on the previous step.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcDataAlreadyAdded,
+                    "You cannot declare additional vectors after already calling "
+                    "build_patches(). All data vectors need to be declared "
+                    "before you call this function the first time.");
+  /**
+   * Exception
+   */
+  DeclException1 (ExcNameAlreadyUsed,
+                  std::string,
+                  << "You tried to declare a component of a data vector with "
+                  << "the name <" << arg1 << ">, but that name is already used.");
+
+private:
+  /**
+   * Present parameter value.
+   */
+  double                               parameter;
+
+  /**
+   * Present parameter step, i.e. length of the parameter interval to be
+   * written next.
+   */
+  double                               parameter_step;
+
+  /**
+   * DoF handler to be used for the data corresponding to the present
+   * parameter value.
+   */
+  SmartPointer<const DoFHandlerType,DataOutStack<dim,spacedim,DoFHandlerType> > dof_handler;
+
+  /**
+   * List of patches of all past and present parameter value data sets.
+   */
+  std::vector< dealii::DataOutBase::Patch<dim+1,dim+1> >   patches;
+
+  /**
+   * Structure holding data vectors (cell and dof data) for the present
+   * parameter value.
+   */
+  struct DataVector
+  {
+    /**
+     * Data vector.
+     */
+    Vector<double> data;
+
+    /**
+     * Names of the different components within each such data set.
+     */
+    std::vector<std::string> names;
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+  };
+
+  /**
+   * List of DoF data vectors.
+   */
+  std::vector<DataVector> dof_data;
+
+  /**
+   * List of cell data vectors.
+   */
+  std::vector<DataVector> cell_data;
+
+  /**
+   * This is the function through which derived classes propagate preprocessed
+   * data in the form of Patch structures (declared in the base class
+   * DataOutBase) to the actual output function.
+   */
+  virtual const std::vector< dealii::DataOutBase::Patch<dim+1,dim+1> > & get_patches () const;
+
+
+  /**
+   * Virtual function through which the names of data sets are obtained by the
+   * output functions of the base class.
+   */
+  virtual std::vector<std::string> get_dataset_names () const;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/data_postprocessor.h b/include/deal.II/numerics/data_postprocessor.h
new file mode 100644
index 0000000..0baa696
--- /dev/null
+++ b/include/deal.II/numerics/data_postprocessor.h
@@ -0,0 +1,354 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__data_postprocessor_h
+#define dealii__data_postprocessor_h
+
+
+
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/fe/fe_update_flags.h>
+#include <deal.II/numerics/data_component_interpretation.h>
+
+#include <vector>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * This class provides an interface to compute derived quantities from a
+ * solution that can then be output in graphical formats for visualization,
+ * using facilities such as the DataOut class.
+ *
+ * For the (graphical) output of a FE solution one frequently wants to include
+ * derived quantities, which are calculated from the values of the solution
+ * and possibly the first and second derivatives of the solution. Examples are
+ * the calculation of Mach numbers from velocity and density in supersonic
+ * flow computations, or the computation of the magnitude of a complex-valued
+ * solution as demonstrated in step-29. Other uses are shown in step-32 and
+ * step-33. This class offers the interface to perform such postprocessing.
+ * Given the values and derivatives of the solution at those points where we
+ * want to generated output, the functions of this class can be overloaded to
+ * compute new quantities.
+ *
+ * A data vector and an object of a class derived from the current one can be
+ * given to the DataOut::add_data_vector() function (and similarly for
+ * DataOutRotation and DataOutFaces). This will cause DataOut::build_patches()
+ * to compute the derived quantities instead of using the data provided by the
+ * data vector (typically the solution vector). Note that the
+ * DataPostprocessor object (i.e., in reality the object of your derived
+ * class) has to live until the DataOut object is destroyed as the latter
+ * keeps a pointer to the former and will complain if the object pointed to is
+ * destroyed while the latter still has a pointer to it. If both the data
+ * postprocessor and DataOut objects are local variables of a function (as
+ * they are, for example, in step-29), then you can avoid this error by
+ * declaring the data postprocessor variable before the DataOut variable as
+ * objects are destroyed in reverse order of declaration.
+ *
+ * In order not to perform needless calculations, DataPostprocessor has to
+ * provide information which input data is needed for the calculation of the
+ * derived quantities, i.e. whether it needs the values, the first derivative
+ * and/or the second derivative of the provided data. DataPostprocessor
+ * objects which are used in combination with a DataOutFaces object can also
+ * ask for the normal vectors at each point. The information which data is
+ * needed has to be provided via the UpdateFlags returned by the virtual
+ * function get_needed_update_flags(). It is your responsibility to use only
+ * those values which were updated in the calculation of derived quantities.
+ * The DataOut object will provide references to the requested data in the
+ * call to compute_derived_quantities_scalar() or
+ * compute_derived_quantities_vector() (DataOut decides which of the two
+ * functions to call depending on whether the finite element in use has only a
+ * single, or multiple vector components; note that this is only determined by
+ * the number of components in the finite element in use, and not by whether
+ * the data computed by a class derived from the current one is scalar or
+ * vector valued).
+ *
+ * Furthermore, derived classes have to implement the get_names() function,
+ * where the number of output variables returned by the latter function has to
+ * match the size of the vector returned by the former. Furthermore, this
+ * number has to match the number of computed quantities, of course.
+ *
+ *
+ * <h3>Use in simpler cases</h3>
+ *
+ * Deriving from the current class allows to implement very general
+ * postprocessors. For example, in the step-32 program, we implement a
+ * postprocessor that takes a solution that consists of velocity, pressure and
+ * temperature (dim+2 components) and computes a variety of output quantities,
+ * some of which are vector valued and some of which are scalar. On the other
+ * hand, in step-29 we implement a postprocessor that only computes the
+ * magnitude of a complex number given by a two-component finite element. It
+ * seems silly to have to implement four virtual functions for this
+ * (compute_derived_quantities_scalar() or
+ * compute_derived_quantities_vector(), get_names(), get_update_flags() and
+ * get_data_component_interpretation()).
+ *
+ * To this end there are two classes DataPostprocessorScalar and
+ * DataPostprocessorVector that are meant to be used if the output quantity is
+ * either a single scalar or a single vector (here used meaning to have
+ * exactly dim components). When using these classes, one only has to write a
+ * constructor that passes the name of the output variable and the update
+ * flags to the constructor of the base class and overload the function that
+ * actually computes the results.
+ *
+ * @ingroup output
+ * @author Tobias Leicht, 2007
+ */
+template <int dim>
+class DataPostprocessor: public Subscriptor
+{
+public:
+  /**
+   * Destructor. This function doesn't actually do anything but is marked as
+   * virtual to ensure that data postprocessors can be destroyed through
+   * pointers to the base class.
+   */
+  virtual ~DataPostprocessor ();
+
+  /**
+   * This is the main function which actually performs the postprocessing. The
+   * last argument is a reference to the postprocessed data which has correct
+   * size already and must be filled by this function. @p uh is a reference to
+   * a vector of data values at all points, @p duh the same for gradients, @p
+   * dduh for second derivatives and @p normals is a reference to the normal
+   * vectors. Note, that the last four references will only contain valid
+   * data, if the respective flags are returned by @p get_needed_update_flags,
+   * otherwise those vectors will be in an unspecified state. @p normals will
+   * always be an empty vector when working on cells, not on faces.
+   *
+   * This function is called when the original data vector represents scalar
+   * data, i.e. the finite element in use has only a single vector component.
+   */
+  virtual
+  void
+  compute_derived_quantities_scalar (const std::vector<double>         &uh,
+                                     const std::vector<Tensor<1,dim> > &duh,
+                                     const std::vector<Tensor<2,dim> > &dduh,
+                                     const std::vector<Point<dim> >    &normals,
+                                     const std::vector<Point<dim> >    &evaluation_points,
+                                     std::vector<Vector<double> >      &computed_quantities) const;
+
+  /**
+   * Same as the compute_derived_quantities_scalar() function, but this
+   * function is called when the original data vector represents vector data,
+   * i.e. the finite element in use has multiple vector components.
+   */
+  virtual
+  void
+  compute_derived_quantities_vector (const std::vector<Vector<double> >              &uh,
+                                     const std::vector<std::vector<Tensor<1,dim> > > &duh,
+                                     const std::vector<std::vector<Tensor<2,dim> > > &dduh,
+                                     const std::vector<Point<dim> >                  &normals,
+                                     const std::vector<Point<dim> >                  &evaluation_points,
+                                     std::vector<Vector<double> >                    &computed_quantities) const;
+
+  /**
+   * Return the vector of strings describing the names of the computed
+   * quantities.
+   */
+  virtual std::vector<std::string> get_names () const = 0;
+
+  /**
+   * This functions returns information about how the individual components of
+   * output files that consist of more than one data set are to be
+   * interpreted.
+   *
+   * For example, if one has a finite element for the Stokes equations in 2d,
+   * representing components (u,v,p), one would like to indicate that the
+   * first two, u and v, represent a logical vector so that later on when we
+   * generate graphical output we can hand them off to a visualization program
+   * that will automatically know to render them as a vector field, rather
+   * than as two separate and independent scalar fields.
+   *
+   * The default implementation of this function returns a vector of values
+   * DataComponentInterpretation::component_is_scalar, indicating that all
+   * output components are independent scalar fields. However, if a derived
+   * class produces data that represents vectors, it may return a vector that
+   * contains values DataComponentInterpretation::component_is_part_of_vector.
+   * In the example above, one would return a vector with components
+   * (DataComponentInterpretation::component_is_part_of_vector,
+   * DataComponentInterpretation::component_is_part_of_vector,
+   * DataComponentInterpretation::component_is_scalar) for (u,v,p).
+   */
+  virtual
+  std::vector<DataComponentInterpretation::DataComponentInterpretation>
+  get_data_component_interpretation () const;
+
+  /**
+   * Return, which data has to be provided to compute the derived quantities.
+   * This has to be a combination of @p update_values, @p update_gradients and
+   * @p update_hessians. If the DataPostprocessor is to be used in combination
+   * with DataOutFaces, you may also ask for a update of normals via the @p
+   * update_normal_vectors flag.
+   */
+  virtual UpdateFlags get_needed_update_flags () const = 0;
+};
+
+
+
+/**
+ * This class provides a simpler interface to the functionality offered by the
+ * DataPostprocessor class in case one wants to compute only a single scalar
+ * quantity from the finite element field passed to the DataOut class. For
+ * this particular case, it is clear what the returned value of
+ * DataPostprocessor::get_data_component_interpretation() should be and we
+ * pass the values returned by get_names() and get_needed_update_flags() to
+ * the constructor so that derived classes do not have to implement these
+ * functions by hand.
+ *
+ * All derived classes have to do is implement a constructor and overload
+ * either DataPostprocessor::compute_derived_quantities_scalar() or
+ * DataPostprocessor::compute_derived_quantities_vector().
+ *
+ * An example of how this class can be used can be found in step-29.
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 2011
+ */
+template <int dim>
+class DataPostprocessorScalar : public DataPostprocessor<dim>
+{
+public:
+  /**
+   * Constructor. Take the name of the single scalar variable computed by
+   * classes derived from the current one, as well as the update flags
+   * necessary to compute this quantity.
+   *
+   * @param name The name by which the scalar variable computed by this class
+   * should be made available in graphical output files.
+   * @param update_flags This has to be a combination of @p update_values, @p
+   * update_gradients and @p update_hessians. If the DataPostprocessor is to
+   * be used in combination with DataOutFaces, you may also ask for a update
+   * of normals via the @p update_normal_vectors flag.
+   */
+  DataPostprocessorScalar (const std::string &name,
+                           const UpdateFlags  update_flags);
+
+  /**
+   * Return the vector of strings describing the names of the computed
+   * quantities. Given the purpose of this class, this is a vector with a
+   * single entry equal to the name given to the constructor.
+   */
+  virtual std::vector<std::string> get_names () const;
+
+  /**
+   * This functions returns information about how the individual components of
+   * output files that consist of more than one data set are to be
+   * interpreted. Since the current class is meant to be used for a single
+   * scalar result variable, the returned value is obviously
+   * DataComponentInterpretation::component_is_scalar.
+   */
+  virtual
+  std::vector<DataComponentInterpretation::DataComponentInterpretation>
+  get_data_component_interpretation () const;
+
+  /**
+   * Return, which data has to be provided to compute the derived quantities.
+   * The flags returned here are the ones passed to the constructor of this
+   * class.
+   */
+  virtual UpdateFlags get_needed_update_flags () const;
+
+private:
+  /**
+   * Copies of the two arguments given to the constructor of this class.
+   */
+  const std::string name;
+  const UpdateFlags update_flags;
+};
+
+
+
+/**
+ * This class provides a simpler interface to the functionality offered by the
+ * DataPostprocessor class in case one wants to compute only a single vector
+ * quantity (defined as having exactly dim components) from the finite element
+ * field passed to the DataOut class. For this particular case, it is clear
+ * what the returned value of
+ * DataPostprocessor::get_data_component_interpretation() should be and we
+ * pass the values returned by get_names() and get_needed_update_flags() to
+ * the constructor so that derived classes do not have to implement these
+ * functions by hand.
+ *
+ * All derived classes have to do is implement a constructor and overload
+ * either DataPostprocessor::compute_derived_quantities_scalar() or
+ * DataPostprocessor::compute_derived_quantities_vector().
+ *
+ * An example of how the closely related class DataPostprocessorScalar is used
+ * can be found in step-29.
+ *
+ * @ingroup output
+ * @author Wolfgang Bangerth, 2011
+ */
+template <int dim>
+class DataPostprocessorVector : public DataPostprocessor<dim>
+{
+public:
+  /**
+   * Constructor. Take the name of the single vector variable computed by
+   * classes derived from the current one, as well as the update flags
+   * necessary to compute this quantity.
+   *
+   * @param name The name by which the vector variable computed by this class
+   * should be made available in graphical output files.
+   * @param update_flags This has to be a combination of @p update_values, @p
+   * update_gradients and @p update_hessians. If the DataPostprocessor is to
+   * be used in combination with DataOutFaces, you may also ask for a update
+   * of normals via the @p update_normal_vectors flag.
+   */
+  DataPostprocessorVector (const std::string &name,
+                           const UpdateFlags  update_flags);
+
+  /**
+   * Return the vector of strings describing the names of the computed
+   * quantities. Given the purpose of this class, this is a vector with dim
+   * entries all equal to the name given to the constructor.
+   */
+  virtual std::vector<std::string> get_names () const;
+
+  /**
+   * This functions returns information about how the individual components of
+   * output files that consist of more than one data set are to be
+   * interpreted. Since the current class is meant to be used for a single
+   * vector result variable, the returned value is obviously
+   * DataComponentInterpretation::component_is_part repeated dim times.
+   */
+  virtual
+  std::vector<DataComponentInterpretation::DataComponentInterpretation>
+  get_data_component_interpretation () const;
+
+  /**
+   * Return which data has to be provided to compute the derived quantities.
+   * The flags returned here are the ones passed to the constructor of this
+   * class.
+   */
+  virtual UpdateFlags get_needed_update_flags () const;
+
+private:
+  /**
+   * Copies of the two arguments given to the constructor of this class.
+   */
+  const std::string name;
+  const UpdateFlags update_flags;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/derivative_approximation.h b/include/deal.II/numerics/derivative_approximation.h
new file mode 100644
index 0000000..5f232ac
--- /dev/null
+++ b/include/deal.II/numerics/derivative_approximation.h
@@ -0,0 +1,310 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__derivative_approximation_h
+#define dealii__derivative_approximation_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/std_cxx11/tuple.h>
+#include <deal.II/base/synchronous_iterator.h>
+#include <deal.II/fe/fe_update_flags.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/grid/filtered_iterator.h>
+#ifdef _MSC_VER
+#  include <deal.II/dofs/dof_accessor.h>
+#endif
+#include <utility>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim> class DoFHandler;
+namespace hp
+{
+  template <int dim, int spacedim> class DoFHandler;
+}
+
+
+
+/**
+ * This namespace provides functions that compute a cell-wise approximation of
+ * the norm of a derivative of a finite element field by taking difference
+ * quotients between neighboring cells. This is a rather simple but efficient
+ * form to get an error indicator, since it can be computed with relatively
+ * little numerical effort and yet gives a reasonable approximation.
+ *
+ * The way the difference quotients are computed on cell $K$ is the following
+ * (here described for the approximation of the gradient of a finite element
+ * field, but see below for higher derivatives): let $K'$ be a neighboring
+ * cell, and let $y_{K'}=x_{K'}-x_K$ be the distance vector between the
+ * centers of the two cells, then $ \frac{u_h(x_{K'}) - u_h(x_K)}{ \|y_{K'}\|
+ * }$ is an approximation of the directional derivative $ \nabla u(x_K) \cdot
+ * \frac{y_{K'}}{ \|y_{K'}\| }.$ By multiplying both terms by $\frac{y_{K'}}{
+ * \|y_{K'}\| }$ from the left and summing over all neighbors $K'$, we obtain
+ * $ \sum_{K'} \left( \frac{y_{K'}}{ \|y_{K'}\|} \frac{y_{K'}^T}{ \|y_{K'}\| }
+ * \right) \nabla u(x_K) \approx \sum_{K'} \left( \frac{y_{K'}}{ \|y_{K'}\|}
+ * \frac{u_h(x_{K'}) - u_h(x_K)}{ \|y_{K'}\| }  \right).$
+ *
+ * Thus, if the matrix $ Y =  \sum_{K'} \left( \frac{y_{K'}}{\|y_{K'}\|}
+ * \frac{y_{K'}^T}{ \|y_{K'}\| } \right)$ is regular (which is the case when
+ * the vectors $y_{K'}$ to all neighbors span the whole space), we can obtain
+ * an approximation to the true gradient by $ \nabla u(x_K) \approx Y^{-1}
+ * \sum_{K'} \left( \frac{y_{K'}}{\|y_{K'}\|} \frac{u_h(x_{K'}) - u_h(x_K)}{
+ * \|y_{K'}\| } \right).$ This is a quantity that is easily computed. The
+ * value returned for each cell when calling the @p approximate_gradient
+ * function of this class is the $l_2$ norm of this approximation to the
+ * gradient. To make this a useful quantity, you may want to scale each
+ * element by the correct power of the respective cell size.
+ *
+ * The computation of this quantity must fail if a cell has only neighbors for
+ * which the direction vectors $y_K$ do not span the whole space, since then
+ * the matrix $Y$ is no longer invertible. If this happens, you will get an
+ * error similar to this one:
+ * @code
+ * --------------------------------------------------------
+ * An error occurred in line <749> of file <source/numerics/derivative_approximation.cc> in function
+ *     void DerivativeApproximation::approximate(const Mapping<dim,spacedim>&, const DoFHandlerType<dim,spacedim>&, const InputVector&, unsigned int, const
+ *  std::pair<unsigned int, unsigned int>&, Vector<float>&) [with DerivativeDescription = DerivativeApproximation::Gradient<3>, int
+ * dim = 3, DoFHandlerType = DoFHandler, InputVector = Vector<double>]
+ * The violated condition was:
+ *     determinant(Y) != 0
+ * The name and call sequence of the exception was:
+ *     ExcInsufficientDirections()
+ * Additional Information:
+ * (none)
+ * --------------------------------------------------------
+ * @endcode
+ * As can easily be verified, this can only happen on very coarse grids, when
+ * some cells and all their neighbors have not been refined even once. You
+ * should therefore only call the functions of this class if all cells are at
+ * least once refined. In practice this is not much of a restriction.
+ *
+ *
+ * <h3>Approximation of higher derivatives</h3>
+ *
+ * Similar to the reasoning above, approximations to higher derivatives can be
+ * computed in a similar fashion. For example, the tensor of second
+ * derivatives is approximated by the formula $ \nabla^2 u(x_K) \approx Y^{-1}
+ * \sum_{K'} \left( \frac{y_{K'}}{\|y_{K'}\|} \otimes \frac{\nabla u_h(x_{K'})
+ * - \nabla u_h(x_K)}{ \|y_{K'}\| } \right), $ where $\otimes$ denotes the
+ * outer product of two vectors. Note that unlike the true tensor of second
+ * derivatives, its approximation is not necessarily symmetric. This is due to
+ * the fact that in the derivation, it is not clear whether we shall consider
+ * as projected second derivative the term $\nabla^2 u y_{KK'}$ or $y_{KK'}^T
+ * \nabla^2 u$. Depending on which choice we take, we obtain one approximation
+ * of the tensor of second derivatives or its transpose. To avoid this
+ * ambiguity, as result we take the symmetrized form, which is the mean value
+ * of the approximation and its transpose.
+ *
+ * The returned value on each cell is the spectral norm of the approximated
+ * tensor of second derivatives, i.e. the largest eigenvalue by absolute
+ * value. This equals the largest curvature of the finite element field at
+ * each cell, and the spectral norm is the matrix norm associated to the $l_2$
+ * vector norm.
+ *
+ * Even higher than the second derivative can be obtained along the same lines
+ * as exposed above.
+ *
+ *
+ * <h3>Refinement indicators based on the derivatives</h3>
+ *
+ * If you would like to base a refinement criterion upon these approximation
+ * of the derivatives, you will have to scale the results of this class by an
+ * appropriate power of the mesh width. For example, since $\|u-u_h\|^2_{L_2}
+ * \le C h^2 \|\nabla u\|^2_{L_2}$, it might be the right thing to scale the
+ * indicators as $\eta_K = h \|\nabla u\|_K$, i.e. $\eta_K = h^{1+d/2}
+ * \|\nabla u\|_{\infty;K}$, i.e. the right power is $1+d/2$.
+ *
+ * Likewise, for the second derivative, one should choose a power of the mesh
+ * size $h$ one higher than for the gradient.
+ *
+ *
+ * <h3>Implementation</h3>
+ *
+ * The formulae for the computation of approximations to the gradient and to
+ * the tensor of second derivatives shown above are very much alike. The basic
+ * difference is that in one case the finite difference quotient is a scalar,
+ * while in the other case it is a vector. For higher derivatives, this would
+ * be a tensor of even higher rank. We then have to form the outer product of
+ * this difference quotient with the distance vector $y_{KK'}$, symmetrize it,
+ * contract it with the matrix $Y^{-1}$ and compute its norm. To make the
+ * implementation simpler and to allow for code reuse, all these operations
+ * that are dependent on the actual order of the derivatives to be
+ * approximated, as well as the computation of the quantities entering the
+ * difference quotient, have been separated into auxiliary nested classes
+ * (names @p Gradient and @p SecondDerivative) and the main algorithm is
+ * simply passed one or the other data types and asks them to perform the
+ * order dependent operations. The main framework that is independent of this,
+ * such as finding all active neighbors, or setting up the matrix $Y$ is done
+ * in the main function @p approximate.
+ *
+ * Due to this way of operation, the class may be easily extended for higher
+ * oder derivatives than are presently implemented. Basically, only an
+ * additional class along the lines of the derivative descriptor classes @p
+ * Gradient and @p SecondDerivative has to be implemented, with the respective
+ * typedefs and functions replaced by the appropriate analogues for the
+ * derivative that is to be approximated.
+ *
+ * @ingroup numerics
+ * @author Wolfgang Bangerth, 2000
+ */
+namespace DerivativeApproximation
+{
+  /**
+   * This function is used to obtain an approximation of the gradient. Pass it
+   * the DoF handler object that describes the finite element field, a nodal
+   * value vector, and receive the cell-wise Euclidean norm of the
+   * approximated gradient.
+   *
+   * The last parameter denotes the solution component, for which the gradient
+   * is to be computed. It defaults to the first component. For scalar
+   * elements, this is the only valid choice; for vector-valued ones, any
+   * component between zero and the number of vector components can be given
+   * here.
+   *
+   * In a parallel computation the @p solution vector needs to contain the
+   * locally relevant unknowns.
+   */
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_gradient (const Mapping<dim,spacedim>        &mapping,
+                        const DoFHandlerType<dim,spacedim> &dof,
+                        const InputVector                  &solution,
+                        Vector<float>                      &derivative_norm,
+                        const unsigned int                  component = 0);
+
+  /**
+   * Calls the @p interpolate function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_gradient (const DoFHandlerType<dim,spacedim> &dof,
+                        const InputVector                  &solution,
+                        Vector<float>                      &derivative_norm,
+                        const unsigned int                  component = 0);
+
+  /**
+   * This function is the analogue to the one above, computing finite
+   * difference approximations of the tensor of second derivatives. Pass it
+   * the DoF handler object that describes the finite element field, a nodal
+   * value vector, and receive the cell-wise spectral norm of the approximated
+   * tensor of second derivatives. The spectral norm is the matrix norm
+   * associated to the $l_2$ vector norm.
+   *
+   * The last parameter denotes the solution component, for which the gradient
+   * is to be computed. It defaults to the first component. For scalar
+   * elements, this is the only valid choice; for vector-valued ones, any
+   * component between zero and the number of vector components can be given
+   * here.
+   *
+   * In a parallel computation the @p solution vector needs to contain the
+   * locally relevant unknowns.
+   */
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_second_derivative (const Mapping<dim,spacedim>        &mapping,
+                                 const DoFHandlerType<dim,spacedim> &dof,
+                                 const InputVector                  &solution,
+                                 Vector<float>                      &derivative_norm,
+                                 const unsigned int                  component = 0);
+
+  /**
+   * Calls the @p interpolate function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_second_derivative (const DoFHandlerType<dim,spacedim> &dof,
+                                 const InputVector                  &solution,
+                                 Vector<float>                      &derivative_norm,
+                                 const unsigned int                  component = 0);
+
+  /**
+   * This function calculates the <tt>order</tt>-th order approximate
+   * derivative and returns the full tensor for a single cell.
+   *
+   * The last parameter denotes the solution component, for which the gradient
+   * is to be computed. It defaults to the first component. For scalar
+   * elements, this is the only valid choice; for vector-valued ones, any
+   * component between zero and the number of vector components can be given
+   * here.
+   *
+   * In a parallel computation the @p solution vector needs to contain the
+   * locally relevant unknowns.
+   */
+  template <typename DoFHandlerType, int dim, int spacedim, class InputVector, int order>
+  void
+  approximate_derivative_tensor
+  (const Mapping<dim, spacedim>                        &mapping,
+   const DoFHandlerType                                &dof,
+   const InputVector                                   &solution,
+#ifndef _MSC_VER
+   const typename DoFHandlerType::active_cell_iterator &cell,
+#else
+   const TriaActiveIterator <dealii::DoFCellAccessor<DoFHandlerType, false> > &cell,
+#endif
+   Tensor<order, dim>                                  &derivative,
+   const unsigned int                                   component = 0);
+
+  /**
+   * Same as above, with <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <typename DoFHandlerType, int dim, int spacedim, class InputVector, int order>
+  void
+  approximate_derivative_tensor
+  (const DoFHandlerType                                &dof,
+   const InputVector                                   &solution,
+#ifndef _MSC_VER
+   const typename DoFHandlerType::active_cell_iterator &cell,
+#else
+   const TriaActiveIterator<dealii::DoFCellAccessor<DoFHandlerType, false> > &cell,
+#endif
+   Tensor<order, dim>                                  &derivative,
+   const unsigned int                                   component = 0);
+
+  /**
+   * Return the norm of the derivative.
+   */
+  template <int dim, int order>
+  double
+  derivative_norm (const Tensor<order,dim> &derivative);
+
+  /**
+   * Exception
+   */
+  DeclException2 (ExcVectorLengthVsNActiveCells,
+                  int, int,
+                  << "The output vector needs to have a size equal "
+                  "to the number of active cells of your triangulation "
+                  "but has length " << arg1 << "There are "
+                  << arg2 << " active cells in your triangulation.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcInsufficientDirections,
+                    "We have encountered a cell on which the number of linearly "
+                    "independent directions that span the matrix Y (discussed "
+                    "in the documentation of the DerivativeApproximation "
+                    "class) is not equal to dim. The matrix Y then is "
+                    "rank deficient and can not be inverted.");
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/dof_output_operator.h b/include/deal.II/numerics/dof_output_operator.h
new file mode 100644
index 0000000..c10475d
--- /dev/null
+++ b/include/deal.II/numerics/dof_output_operator.h
@@ -0,0 +1,80 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__dof_output_operator_h
+#define dealii__dof_output_operator_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/algorithms/any_data.h>
+#include <deal.II/base/event.h>
+#include <deal.II/algorithms/operator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <fstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+  /**
+   * An output operator writing a separate file in each step and writing the
+   * vectors as finite element functions with respect to a given DoFHandler.
+   */
+  template <typename VectorType, int dim, int spacedim=dim>
+  class DoFOutputOperator : public OutputOperator<VectorType>
+  {
+  public:
+    /*
+     * Constructor. The <tt>filename</tt> is the common base name of
+     * all files and the argument <tt>digits</tt> should be the number
+     * of digits of the highest number in the sequence. File names by
+     * default have the form "outputNNN" with NNN the number set by the
+     * last step command. Numbers with less digits are filled with
+     * zeros from the left.
+     */
+    DoFOutputOperator (const std::string filename_base = std::string("output"),
+                       const unsigned int digits = 3);
+
+    void parse_parameters(ParameterHandler &param);
+    void initialize (const DoFHandler<dim, spacedim> &dof_handler);
+
+    virtual OutputOperator<VectorType> &
+    operator << (const AnyData &vectors);
+
+  private:
+    SmartPointer<const DoFHandler<dim, spacedim>,
+                 DoFOutputOperator<VectorType, dim, spacedim> > dof;
+
+    const std::string filename_base;
+    const unsigned int digits;
+
+    DataOut<dim> out;
+  };
+
+  template <typename VectorType, int dim, int spacedim>
+  inline void
+  DoFOutputOperator<VectorType, dim, spacedim>::initialize(const DoFHandler<dim, spacedim> &dof_handler)
+  {
+    dof = &dof_handler;
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/dof_output_operator.templates.h b/include/deal.II/numerics/dof_output_operator.templates.h
new file mode 100644
index 0000000..6d2df70
--- /dev/null
+++ b/include/deal.II/numerics/dof_output_operator.templates.h
@@ -0,0 +1,69 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/dof_output_operator.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+  template <typename VectorType, int dim, int spacedim>
+  DoFOutputOperator<VectorType, dim, spacedim>::DoFOutputOperator (
+    const std::string filename_base,
+    const unsigned int digits)
+    :
+    filename_base(filename_base),
+    digits(digits)
+  {
+    out.set_default_format(DataOutBase::gnuplot);
+  }
+
+
+  template <typename VectorType, int dim, int spacedim>
+  void
+  DoFOutputOperator<VectorType, dim, spacedim>::parse_parameters(ParameterHandler &param)
+  {
+    out.parse_parameters(param);
+  }
+
+  template <typename VectorType, int dim, int spacedim>
+  OutputOperator<VectorType> &
+  DoFOutputOperator<VectorType, dim, spacedim>::operator<<(
+    const AnyData &data)
+  {
+    Assert ((dof!=0), ExcNotInitialized());
+    out.attach_dof_handler (*dof);
+    for (unsigned int i=0; i<data.size(); ++i)
+      {
+        const VectorType *p = data.try_read_ptr<VectorType>(i);
+        if (p!=0)
+          {
+            out.add_data_vector (*p, data.name(i));
+          }
+      }
+    std::ostringstream streamOut;
+    streamOut << filename_base
+              << std::setw(digits) << std::setfill('0') << this->step
+              << out.default_suffix();
+    std::ofstream out_filename (streamOut.str().c_str());
+    out.build_patches ();
+    out.write (out_filename);
+    out.clear ();
+    return *this;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/numerics/dof_print_solver_step.h b/include/deal.II/numerics/dof_print_solver_step.h
new file mode 100644
index 0000000..3ed3c43
--- /dev/null
+++ b/include/deal.II/numerics/dof_print_solver_step.h
@@ -0,0 +1,131 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__dof_print_solver_step_h
+#define dealii__dof_print_solver_step_h
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/solver_control.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/numerics/data_out.h>
+
+#include <sstream>
+#include <iomanip>
+#include <fstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * Print intermediate solutions in solvers.  This is derived from a solver
+ * class provided as template argument.  It implements the @p print_vector
+ * function of the solver using a DoFHandler. This way, the intermediate
+ * vectors can be viewed as finite element functions. This class might be used
+ * first to understand how solvers work (for example to visualize the
+ * smoothing properties of various solvers, e.g. in a multigrid context), and
+ * second to investigate why and how a solver fails to solve certain classes
+ * of problems.
+ *
+ * Objects of this class are provided with a solver class through a template
+ * argument, and with a file name (as a string), with which a new file is
+ * constructed in each iteration (named <tt>basename.[step].[suffix]</tt>) and
+ * into which the solution is written as a finite element field using the
+ * DataOut class. Please note that this class may produce enormous amounts of
+ * data!
+ *
+ * @ingroup output
+ * @author Guido Kanschat, 2000
+ */
+template<int dim, typename SolverType, class VectorType = Vector<double> >
+class DoFPrintSolverStep : public SolverType
+{
+public:
+  /**
+   * Constructor.  First, we take the arguments needed for the solver. @p
+   * data_out is the object doing the output as a finite element function.
+   *
+   * One output file with the name <tt>basename.[step].[suffix]</tt> will be
+   * produced for each iteration step.
+   */
+  DoFPrintSolverStep (SolverControl &control,
+                      VectorMemory<VectorType> &mem,
+                      DataOut<dim>             &data_out,
+                      const std::string        &basename);
+
+  /**
+   * Call-back function for the iterative method.
+   */
+  virtual void print_vectors (const unsigned int step,
+                              const VectorType   &x,
+                              const VectorType   &r,
+                              const VectorType   &d) const;
+private:
+  /**
+   * Output object.
+   */
+  DataOut<dim> &out;
+
+  /**
+   * Base of filenames.
+   */
+  const std::string basename;
+};
+
+
+/* ----------------------- template functions --------------- */
+
+template<int dim, typename SolverType, class VectorType>
+DoFPrintSolverStep<dim, SolverType, VectorType>::DoFPrintSolverStep
+(SolverControl            &control,
+ VectorMemory<VectorType> &mem,
+ DataOut<dim>             &data_out,
+ const std::string        &basename)
+  : SolverType (control, mem),
+    out (data_out),
+    basename (basename)
+{}
+
+
+template<int dim, typename SolverType, class VectorType>
+void
+DoFPrintSolverStep<dim, SolverType, VectorType>::print_vectors
+(const unsigned int  step,
+ const VectorType   &x,
+ const VectorType   &r,
+ const VectorType   &d) const
+{
+  out.clear_data_vectors();
+  out.add_data_vector(x, "solution");
+  out.add_data_vector(r, "residual");
+  out.add_data_vector(d, "update");
+
+  std::ostringstream filename;
+  filename << basename
+           << std::setw(3) << std::setfill('0') << step
+           << out.default_suffix();
+
+  const std::string fname = filename.str();
+
+  deallog << "Writing file:" << fname << std::endl;
+
+  out.build_patches();
+  std::ofstream of (fname.c_str());
+  out.write (of);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/error_estimator.h b/include/deal.II/numerics/error_estimator.h
new file mode 100644
index 0000000..52407af
--- /dev/null
+++ b/include/deal.II/numerics/error_estimator.h
@@ -0,0 +1,777 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__error_estimator_h
+#define dealii__error_estimator_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/function.h>
+#include <deal.II/dofs/function_map.h>
+#include <deal.II/fe/component_mask.h>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int, int> class DoFHandler;
+template <int, int> class Mapping;
+template <int> class Quadrature;
+
+namespace hp
+{
+  template <int, int> class DoFHandler;
+  template <int> class QCollection;
+}
+
+
+
+/**
+ * Implementation of the error indicator by Kelly, De S. R. Gago, Zienkiewicz
+ * and Babuska and its modification for the hp-FEM. This error indicator tries
+ * to approximate the error per cell by integration of the jump of the
+ * gradient of the solution along the faces of each cell.  It can be
+ * understood as a gradient recovery estimator; see the survey of Ainsworth
+ * and Oden, "A Posteriori Error Estimation in Finite Element Analysis"
+ * (Wiley, 2000) for a complete discussion.
+ *
+ * In the original Kelly error estimator, the contribution of each face to the
+ * cell error is scaled with the cell diagonal. In the modified version,
+ * however, we employ a scaling factor which depends on the face diagonal and
+ * polynomial degrees of the adjacent elements. The choice between the two is
+ * done by means of the enumerator, defined within the class.
+ *
+ * @note In spite of the name, Kelly estimator is not truly an a posteriori
+ * error estimator, even if applied to the Poisson problem only. It gives good
+ * hints for mesh refinement, but the estimate is not to be trusted. For
+ * higher order trial spaces the integrals computed here tend to zero faster
+ * than the error itself, thus ruling out the values as error estimators.
+ * However, the modified version discussed below can be utilised to obtain the
+ * reliable error estimator by adding the residual (volume) part.
+ *
+ * The error estimator really only estimates the error for the generalized
+ * Poisson equation $-\nabla\cdot a(x) \nabla u = f$ with either Dirichlet
+ * boundary conditions or generalized Neumann boundary conditions involving
+ * the conormal derivative $a\frac{du}{dn} = g$.
+ *
+ * The error estimator returns a vector of estimated errors per cell which can
+ * be used to feed the GridRefinement::refine_fixed_fraction,
+ * GridRefinement::refine_fixed_number, and similar functions. This vector
+ * contains elements of data type @p float, rather than @p double, since
+ * accuracy is not important in the current context.
+ *
+ * The full reference for the paper in which this error estimator is defined
+ * is as follows:
+ * @code{.bib}
+ * @Article{KGZB83,
+ *   author =       {Kelly, D. W. and {De S. R. Gago}, J. P. and Zienkiewicz, O. C.
+ *                   and Babu\v{s}ka, I.},
+ *   title =        {A posteriori error analysis and adaptive processes
+ *                   in the finite element method: Part {I}--Error Analysis},
+ *   journal =      {Int. J. Num. Meth. Engrg.},
+ *   year =         {1983},
+ *   volume =       {19},
+ *   pages =        {1593--1619}
+ * }
+ * @endcode
+ *
+ *
+ * <h3>Implementation</h3>
+ *
+ * In principle, the implementation of the error estimation is simple: let \f[
+ * \eta_K^2 = \sum_{F\in\partial K} c_F \int_{\partial K_F} \left[a
+ * \frac{\partial u_h}{\partial n}\right]^2 do \f] be the error estimator for
+ * cell $K$. $[\cdot]$ denotes the jump of the argument at the face. In the
+ * paper of Ainsworth $ c_F=\frac h{24} $, but this factor is a bit esoteric,
+ * stemming from interpolation estimates and stability constants which may
+ * hold for the Poisson problem, but may not hold for more general situations.
+ * Alternatively, we consider the case when $ c_F=\frac {h_F}{2p_F} $, where $
+ * h_F $ is face diagonal and $ p_F=max(p^+,p^-) $ is the maximum polynomial
+ * degree of adjacent elements. The choice between the two is done by means of
+ * the enumerator, provided as the last argument in all functions.
+ *
+ * To perform the integration, use is made of the FEFaceValues and
+ * FESubfaceValues classes. The integration is performed by looping over all
+ * cells and integrating over faces that are not yet treated. This way we
+ * avoid integration on faces twice, once for each time we visit one of the
+ * adjacent cells. In a second loop over all cells, we sum up the
+ * contributions of the faces (which are the integrated square of the jumps
+ * times some factor) of each cell and take the square root.
+ *
+ * The integration is done using a quadrature formula on the face. For linear
+ * trial functions (FEQ1), the QGauss2 or even the QMidpoint rule will
+ * suffice. For higher order elements, it is necessary to utilize higher order
+ * quadrature formulae as well.
+ *
+ * We store the contribution of each face in a @p map, as provided by the C++
+ * standard library, with the iterator pointing to that face being the key
+ * into the map. When looping the second time over all cells, we have to sum
+ * up the contributions of the faces and take the square root. For the Kelly
+ * estimator, the multiplication with $\frac h{24}$ is done in the second
+ * loop. By doing so we avoid problems to decide with which $h$ to multiply,
+ * that of the cell on the one or that of the cell on the other side of the
+ * face. Whereas for the hp-estimator the @p map stores integrals multiplied
+ * by $\frac {h_F}{2p_F}$, which are then summed in the second loop.
+ *
+ * $h$ ($h_F$) is taken to be the greatest length of the diagonals of the cell
+ * (face). For more or less uniform cells (faces) without deformed angles,
+ * this coincides with the diameter of the cell (face).
+ *
+ *
+ * <h3>Vector-valued functions</h3>
+ *
+ * If the finite element field for which the error is to be estimated is
+ * vector-valued, i.e. the finite element has more than one component, then
+ * all the above can be applied to all or only some components at the same
+ * time. The main function of this class takes a list of flags denoting the
+ * components for which components the error estimator is to be applied; by
+ * default, it is a list of only @p trues, meaning that all variables shall be
+ * treated.
+ *
+ * In case the different components of a field have different physical meaning
+ * (for example velocity and pressure in the Stokes equations), it would be
+ * senseless to use the same coefficient for all components. In that case, you
+ * can pass a function with as many components as there are components in the
+ * finite element field and each component of the error estimator will then be
+ * weighted by the respective component in this coefficient function. In the
+ * other case, when all components have the same meaning (for example the
+ * displacements in Lame's equations of elasticity), you can specify a scalar
+ * coefficient which will then be used for all components.
+ *
+ *
+ * <h3>%Boundary values</h3>
+ *
+ * If the face is at the boundary, i.e. there is no neighboring cell to which
+ * the jump in the gradiend could be computed, there are two possibilities:
+ * <ul>
+ * <li> The face belongs to a Dirichlet boundary. Then the face is not
+ * considered, which can be justified looking at a dual problem technique and
+ * should hold exactly if the boundary can be approximated exactly by the
+ * finite element used (i.e. it is a linear boundary for linear finite
+ * elements, quadratic for isoparametric quadratic elements, etc). For
+ * boundaries which can not be exactly approximated, one should consider the
+ * difference $z-z_h$ on the face, $z$ being a dual problem's solution which
+ * is zero at the true boundary and $z_h$ being an approximation, which in
+ * most cases will be zero on the numerical boundary. Since on the numerical
+ * boundary $z$ will not be zero in general, we would get another term here,
+ * but this one is neglected for practical reasons, in the hope that the error
+ * made here will tend to zero faster than the energy error we wish to
+ * estimate.
+ *
+ * Though no integration is necessary, in the list of face contributions we
+ * store a zero for this face, which makes summing up the contributions of the
+ * different faces to the cells easier.
+ *
+ * <li> The face belongs to a Neumann boundary.  In this case, the
+ * contribution of the face $F\in\partial K$ looks like \f[ n_F\int_F
+ * \left|g-a\frac{\partial u_h}{\partial n}\right|^2 ds \f] where $g$ is the
+ * Neumann boundary function, $n_F=\frac {h}{24}$ and $n_F=\frac {h_F}{p}$ for
+ * the Kelly and hp-estimator, respectively. If the finite element is vector-
+ * valued, then obviously the function denoting the Neumann boundary
+ * conditions needs to be vector-valued as well.
+ *
+ * <li> No other boundary conditions are considered.
+ * </ul>
+ *
+ * In practice, if you have Robin boundary conditions or are too lazy to
+ * accurately describe Neumann values, then this is rarely an issue: if you
+ * don't say anything in the map about a particular part of the boundary then
+ * the Kelly indicator will simply assume that the solution is correct on that
+ * part of the boundary and not touch it. Of course, if you have a have a
+ * Neumann or Robin boundary, that isn't quite true, there is going to be a
+ * difference between the normal derivative of the numerical solution and the
+ * Neumann values these normal derivatives should equal. So if we simply
+ * ignore those parts of the boundary, we'll underestimate the error. In
+ * practice, this rarely appears to be a problem -- you may not refine the
+ * cell this time around but you'll probably refine it in the next refinement
+ * step and everything is good again. After all, for all problems but the
+ * Laplace equation, the Kelly indicator is only an indicator, not an
+ * estimator, and so the values it computes are not exact error
+ * representations anyway.
+ *
+ *
+ * <h3>Handling of hanging nodes</h3>
+ *
+ * The integration along faces with hanging nodes is quite tricky, since one
+ * of the elements has to be shifted one level up or down. See the
+ * documentation for the FESubFaceValues class for more information about
+ * technical issues regarding this topic.
+ *
+ * In praxi, since we integrate over each face only once, we do this when we
+ * are on the coarser one of the two cells adjacent to a subface (a subface is
+ * defined to be the child of a face; seen from the coarse cell, it is a
+ * subface, while seen from the refined cell it is one of its faces). The
+ * reason is that finding neighborship information is a bit easier then, but
+ * that's all practical reasoning, nothing fundamental.
+ *
+ * Since we integrate from the coarse side of the face, we have the mother
+ * face readily at hand and store the result of the integration over that
+ * mother face (being the sum of the integrals along the subfaces) in the
+ * abovementioned map of integrals as well. This consumes some memory more
+ * than needed, but makes the summing up of the face contributions to the
+ * cells easier, since then we have the information from all faces of all
+ * cells at hand and need not think about explicitly determining whether a
+ * face was refined or not. The same applies for boundary faces, see above.
+ *
+ *
+ * <h3>Multiple solution vectors</h3>
+ *
+ * In some cases, for example in time-dependent problems, one would like to
+ * compute the error estimates for several solution vectors on the same grid
+ * at once, with the same coefficients, boundary condition object, etc, e.g.
+ * for the solutions on several successive time steps. One could then call the
+ * functions of this class several times for each solution. However, the
+ * largest factor in the computation of the error estimates (in terms of
+ * computing time) is initialization of FEFaceValues and FESubFaceValues
+ * objects, and iterating through all faces and subfaces. If the solution
+ * vectors live on the same grid, this effort can be reduced significantly by
+ * treating all solution vectors at the same time, initializing the
+ * FEFaceValues objects only once per cell and for all solution vectors at
+ * once, and also only looping through the triangulation only once. For this
+ * reason, besides the @p estimate function in this class that takes a single
+ * input vector and returns a single output vector, there is also a function
+ * that accepts several in- and output vectors at the same time.
+ *
+ * @ingroup numerics
+ * @author Wolfgang Bangerth, 1998, 1999, 2000, 2004, 2006, Denis Davydov,
+ * 2015; parallelization by Thomas Richter, 2000
+ */
+template <int dim, int spacedim=dim>
+class KellyErrorEstimator
+{
+public:
+  /**
+   * The enum type given to the class functions to decide on the scaling
+   * factors of the facial integrals.
+   */
+  enum Strategy
+  {
+    //! Kelly error estimator with the factor $\frac {h}{24}$.
+    cell_diameter_over_24 = 0,
+    //! the boundary residual estimator with the factor $\frac {h_F}{2 max(p^+,p^-)}$.
+    face_diameter_over_twice_max_degree
+  };
+
+  /**
+   * Implementation of the error estimator described above. You may give a
+   * coefficient, but there is a default value which denotes the constant
+   * coefficient with value one. The coefficient function may either be a
+   * scalar one, in which case it is used for all components of the finite
+   * element, or a vector-valued one with as many components as there are in
+   * the finite element; in the latter case, each component is weighted by the
+   * respective component in the coefficient.
+   *
+   * You might give a list of components you want to evaluate, in case the
+   * finite element used by the DoFHandler object is vector-valued. You then
+   * have to set those entries to true in the bit-vector @p component_mask
+   * (see
+   * @ref GlossComponentMask
+   * ) for which the respective component is to be used in the error
+   * estimator. The default is to use all components, which is done by either
+   * providing a bit-vector with all-set entries, or an empty bit-vector.
+   *
+   * The @p subdomain_id parameter indicates whether we shall compute
+   * indicators for all cells (in case its value is the default,
+   * <tt>numbers::invalid_unsigned_int</tt>), or only for the cells belonging
+   * to a certain subdomain with the given indicator. The latter case is used
+   * for parallel computations where all processor nodes have the global grid
+   * stored, and could well compute all the indicators for all cells
+   * themselves, but where it is more efficient to have each process compute
+   * only indicators for the cells it owns, and have them exchange the
+   * resulting information afterwards. This is in particular true for the case
+   * where meshes are very large and computing indicators for @em every cell
+   * is too expensive, while computing indicators for only local cells is
+   * acceptable. Note that if you only ask for the indicators of a certain
+   * subdomain to be computed, you must nevertheless make sure that this
+   * function has access to the correct node values of @em all degrees of
+   * freedom. This is since the function needs to access DoF values on
+   * neighboring cells as well, even if they belong to a different subdomain.
+   *
+   * The @p material_id parameter has a similar meaning: if not set to its
+   * default value (which is numbers::invalid_material_id), it means that
+   * indicators will only be computed for cells with this particular material
+   * id.
+   *
+   * The @p n_threads parameter used to indicate the number of threads to be
+   * used to compute the error estimator. This parameter is now ignored, with
+   * the number of threads determined automatically. The parameter is retained
+   * for compatibility with old versions of the library.
+   *
+   * The @p strategy parameter is used to choose the scaling factor for the
+   * integral over cell's faces.
+   *
+   * @note If the DoFHandler object given as an argument to this function
+   * builds on a parallel::distributed::Triangulation, this function skips
+   * computations on all cells that are not locally owned. In that case, the
+   * only valid value for the subdomain_id argument (besides the invalid
+   * value) is the subdomain id that is associated with the currently
+   * processor, as reported by
+   * parallel::distributed::Triangulation::locally_owned_subdomain(). Even
+   * though nothing is computed on cells that we don't locally own, the error
+   * indicator vector must still have a length equal to the number of active
+   * cell in the mesh as reported by
+   * parallel::distributed::Triangulation::n_locally_owned_active_cells().
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<dim, spacedim>               &mapping,
+   const DoFHandlerType                       &dof,
+   const Quadrature<dim-1>                    &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+  /**
+   * Calls the @p estimate function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const Quadrature<dim-1>                    &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+  /**
+   * Same function as above, but accepts more than one solution vector and
+   * returns one error vector for each solution vector. For the reason of
+   * existence of this function, see the general documentation of this class.
+   *
+   * Since we do not want to force the user of this function to copy around
+   * their solution vectors, the vector of solution vectors takes pointers to
+   * the solutions, rather than being a vector of vectors. This makes it
+   * simpler to have the solution vectors somewhere in memory, rather than to
+   * have them collected somewhere special. (Note that it is not possible to
+   * construct of vector of references, so we had to use a vector of
+   * pointers.)
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<dim, spacedim>               &mapping,
+   const DoFHandlerType                       &dof,
+   const Quadrature<dim-1>                    &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+  /**
+   * Calls the @p estimate function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const Quadrature<dim-1>                    &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<dim, spacedim>               &mapping,
+   const DoFHandlerType                       &dof,
+   const hp::QCollection<dim-1>               &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const hp::QCollection<dim-1>               &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<dim, spacedim>               &mapping,
+   const DoFHandlerType                       &dof,
+   const hp::QCollection<dim-1>               &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const hp::QCollection<dim-1>               &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id,
+   const Strategy                              strategy       = cell_diameter_over_24);
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcInvalidComponentMask,
+                    "You provided a ComponentMask argument that is invalid. "
+                    "Component masks need to be either default constructed "
+                    "(in which case they indicate that every component is "
+                    "selected) or need to have a length equal to the number "
+                    "of vector components of the finite element in use "
+                    "by the DoFHandler object. In the latter case, at "
+                    "least one component needs to be selected.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcInvalidCoefficient,
+                    "If you do specify the argument for a (possibly "
+                    "spatially variable) coefficient function for this function, "
+                    "then it needs to refer to a coefficient that is either "
+                    "scalar (has one vector component) or has as many vector "
+                    "components as there are in the finite element used by "
+                    "the DoFHandler argument.");
+  /**
+   * Exception
+   */
+  DeclException3 (ExcInvalidBoundaryFunction,
+                  types::boundary_id,
+                  int,
+                  int,
+                  << "You provided a function map that for boundary indicator "
+                  << arg1 << " specifies a function with "
+                  << arg2 << " vector components. However, the finite "
+                  "element in use has "
+                  << arg2 << " components, and these two numbers need to match.");
+  /**
+   * Exception
+   */
+  DeclException2 (ExcIncompatibleNumberOfElements,
+                  int, int,
+                  << "The number of input vectors, " << arg1
+                  << " needs to be equal to the number of output vectors, "
+                  << arg2
+                  << ". This is not the case in your call of this function.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcNoSolutions,
+                    "You need to specify at least one solution vector as "
+                    "input.");
+};
+
+
+
+/**
+ * This is a specialization of the general template for 1d. The implementation
+ * is sufficiently different for 1d to justify this specialization. The basic
+ * difference between 1d and all other space dimensions is that in 1d, there
+ * are no faces of cells, just the vertices between line segments, so we have
+ * to compute the jump terms differently. However, this class offers exactly
+ * the same public functions as the general template, so that a user will not
+ * see any difference.
+ *
+ * @author Wolfgang Bangerth, 1998, 2004.
+ */
+template <int spacedim>
+class KellyErrorEstimator<1,spacedim>
+{
+public:
+  /**
+   * Implementation of the error estimator described above. You may give a
+   * coefficient, but there is a default value which denotes the constant
+   * coefficient with value one. The coefficient function may either be a
+   * scalar one, in which case it is used for all components of the finite
+   * element, or a vector-valued one with as many components as there are in
+   * the finite element; in the latter case, each component is weighted by the
+   * respective component in the coefficient.
+   *
+   * You might give a list of components you want to evaluate, in case the
+   * finite element used by the DoFHandler object is vector-valued. You then
+   * have to set those entries to true in the bit-vector @p component_mask for
+   * which the respective component is to be used in the error estimator. The
+   * default is to use all components, which is done by either providing a
+   * bit-vector with all-set entries, or an empty bit-vector. All the other
+   * parameters are as in the general case used for 2d and higher.
+   *
+   * The parameter n_threads is no longer used and will be ignored.
+   * Multithreading is not presently implemented for 1d, but we retain the
+   * respective parameter for compatibility with the function signature in the
+   * general case.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<1,spacedim>                  &mapping,
+   const DoFHandlerType                       &dof,
+   const Quadrature<0>                        &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+  /**
+   * Calls the @p estimate function, see above, with
+   * <tt>mapping=MappingQGeneric1<1>()</tt>.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const Quadrature<0>                        &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+  /**
+   * Same function as above, but accepts more than one solution vectors and
+   * returns one error vector for each solution vector. For the reason of
+   * existence of this function, see the general documentation of this class.
+   *
+   * Since we do not want to force the user of this function to copy around
+   * their solution vectors, the vector of solution vectors takes pointers to
+   * the solutions, rather than being a vector of vectors. This makes it
+   * simpler to have the solution vectors somewhere in memory, rather than to
+   * have them collected somewhere special. (Note that it is not possible to
+   * construct of vector of references, so we had to use a vector of
+   * pointers.)
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<1,spacedim>                  &mapping,
+   const DoFHandlerType                       &dof,
+   const Quadrature<0>                        &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+  /**
+   * Calls the @p estimate function, see above, with
+   * <tt>mapping=MappingQGeneric1<1>()</tt>.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const Quadrature<0>                        &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<1,spacedim>                  &mapping,
+   const DoFHandlerType                       &dof,
+   const hp::QCollection<0>                   &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const hp::QCollection<0>                   &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const InputVector                          &solution,
+   Vector<float>                              &error,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const Mapping<1,spacedim>                  &mapping,
+   const DoFHandlerType                       &dof,
+   const hp::QCollection<0>                   &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+
+  /**
+   * Equivalent to the set of functions above, except that this one takes a
+   * quadrature collection for hp finite element dof handlers.
+   */
+  template <typename InputVector, typename DoFHandlerType>
+  static void estimate
+  (const DoFHandlerType                       &dof,
+   const hp::QCollection<0>                   &quadrature,
+   const typename FunctionMap<spacedim>::type &neumann_bc,
+   const std::vector<const InputVector *>     &solutions,
+   std::vector<Vector<float>*>                &errors,
+   const ComponentMask                        &component_mask = ComponentMask(),
+   const Function<spacedim>                   *coefficients   = 0,
+   const unsigned int                          n_threads      = numbers::invalid_unsigned_int,
+   const types::subdomain_id                   subdomain_id   = numbers::invalid_subdomain_id,
+   const types::material_id                    material_id    = numbers::invalid_material_id);
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcInvalidComponentMask,
+                    "You provided a ComponentMask argument that is invalid. "
+                    "Component masks need to be either default constructed "
+                    "(in which case they indicate that every component is "
+                    "selected) or need to have a length equal to the number "
+                    "of vector components of the finite element in use "
+                    "by the DoFHandler object. In the latter case, at "
+                    "least one component needs to be selected.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcInvalidCoefficient,
+                    "If you do specify the argument for a (possibly "
+                    "spatially variable) coefficient function for this function, "
+                    "then it needs to refer to a coefficient that is either "
+                    "scalar (has one vector component) or has as many vector "
+                    "components as there are in the finite element used by "
+                    "the DoFHandler argument.");
+  /**
+   * Exception
+   */
+  DeclException3 (ExcInvalidBoundaryFunction,
+                  types::boundary_id,
+                  int,
+                  int,
+                  << "You provided a function map that for boundary indicator "
+                  << arg1 << " specifies a function with "
+                  << arg2 << " vector components. However, the finite "
+                  "element in use has "
+                  << arg3 << " components, and these two numbers need to match.");
+  /**
+   * Exception
+   */
+  DeclException2 (ExcIncompatibleNumberOfElements,
+                  int, int,
+                  << "The number of input vectors, " << arg1
+                  << " needs to be equal to the number of output vectors, "
+                  << arg2
+                  << ". This is not the case in your call of this function.");
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcNoSolutions,
+                    "You need to specify at least one solution vector as "
+                    "input.");
+};
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/error_estimator.templates.h b/include/deal.II/numerics/error_estimator.templates.h
new file mode 100644
index 0000000..e4f8c4b
--- /dev/null
+++ b/include/deal.II/numerics/error_estimator.templates.h
@@ -0,0 +1,1292 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/fe_update_flags.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/distributed/tria.h>
+
+#include <deal.II/base/std_cxx11/bind.h>
+
+#include <numeric>
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  template <typename CellIterator>
+  inline
+  void advance_by_n (CellIterator &cell,
+                     const unsigned int n)
+  {
+    // store a pointer to the end iterator, since we can't get at it any more
+    // once cell is already the end iterator (in that case dereferencing
+    // cell-> triggers an assertion)
+    const CellIterator endc = cell->get_dof_handler().end();
+    for (unsigned int t=0; ((t<n) && (cell!=endc)); ++t, ++cell)
+      ;
+  }
+}
+
+
+namespace internal
+{
+  namespace
+  {
+    /**
+     * All small temporary data objects that are needed once per thread by the
+     * several functions of the error estimator are gathered in this struct.
+     * The reason for this structure is mainly that we have a number of
+     * functions that operate on cells or faces and need a number of small
+     * temporary data objects. Since these functions may run in parallel, we
+     * cannot make these objects member variables of the enclosing class. On
+     * the other hand, declaring them locally in each of these functions would
+     * require their reallocating every time we visit the next cell or face,
+     * which we found can take a significant amount of time if it happens
+     * often even in the single threaded case (10-20 per cent in our
+     * measurements); however, most importantly, memory allocation requires
+     * synchronisation in multithreaded mode. While that is done by the C++
+     * library and has not to be handcoded, it nevertheless seriously damages
+     * the ability to efficiently run the functions of this class in parallel,
+     * since they are quite often blocked by these synchronisation points,
+     * slowing everything down by a factor of two or three.
+     *
+     * Thus, every thread gets an instance of this class to work with and
+     * needs not allocate memory itself, or synchronise with other threads.
+     *
+     * The sizes of the arrays are initialized with the maximal number of
+     * entries necessary for the hp case. Within the loop over individual
+     * cells, we then resize the arrays as necessary. Since for std::vector
+     * resizing to a smaller size doesn't imply memory allocation, this is
+     * fast.
+     */
+    template <typename DoFHandlerType,typename number>
+    struct ParallelData
+    {
+      static const unsigned int dim      = DoFHandlerType::dimension;
+      static const unsigned int spacedim = DoFHandlerType::space_dimension;
+
+      /**
+       * The finite element to be used.
+       */
+      const dealii::hp::FECollection<dim,spacedim> finite_element;
+
+      /**
+       * The quadrature formulas to be used for the faces.
+       */
+      const dealii::hp::QCollection<dim-1> face_quadratures;
+
+      /**
+       * FEFaceValues objects to integrate over the faces of the current and
+       * potentially of neighbor cells.
+       */
+      dealii::hp::FEFaceValues<dim,spacedim>    fe_face_values_cell;
+      dealii::hp::FEFaceValues<dim,spacedim>    fe_face_values_neighbor;
+      dealii::hp::FESubfaceValues<dim,spacedim> fe_subface_values;
+
+      /**
+       * A vector to store the jump of the normal vectors in the quadrature
+       * points for each of the solution vectors (i.e. a temporary value).
+       * This vector is not allocated inside the functions that use it, but
+       * rather globally, since memory allocation is slow, in particular in
+       * presence of multiple threads where synchronisation makes things even
+       * slower.
+       */
+      std::vector<std::vector<std::vector<number> > > phi;
+
+      /**
+       * A vector for the gradients of the finite element function on one cell
+       *
+       * Let psi be a short name for <tt>a grad u_h</tt>, where the third
+       * index be the component of the finite element, and the second index
+       * the number of the quadrature point. The first index denotes the index
+       * of the solution vector.
+       */
+      std::vector<std::vector<std::vector<Tensor<1,spacedim,number> > > > psi;
+
+      /**
+       * The same vector for a neighbor cell
+       */
+      std::vector<std::vector<std::vector<Tensor<1,spacedim,number> > > > neighbor_psi;
+
+      /**
+       * The normal vectors of the finite element function on one face
+       */
+      std::vector<Tensor<1,spacedim> > normal_vectors;
+
+      /**
+       * Normal vectors of the opposing face.
+       */
+      std::vector<Tensor<1,spacedim> > neighbor_normal_vectors;
+
+      /**
+       * Two arrays needed for the values of coefficients in the jumps, if
+       * they are given.
+       */
+      std::vector<double>                  coefficient_values1;
+      std::vector<dealii::Vector<double> > coefficient_values;
+
+      /**
+       * Array for the products of Jacobian determinants and weights of
+       * quadraturs points.
+       */
+      std::vector<double>          JxW_values;
+
+      /**
+       * The subdomain id we are to care for.
+       */
+      const types::subdomain_id subdomain_id;
+      /**
+       * The material id we are to care for.
+       */
+      const types::material_id material_id;
+
+      /**
+       * Some more references to input data to the
+       * KellyErrorEstimator::estimate() function.
+       */
+      const typename FunctionMap<spacedim>::type *neumann_bc;
+      const ComponentMask                component_mask;
+      const Function<spacedim>                   *coefficients;
+
+      /**
+       * Constructor.
+       */
+      template <class FE>
+      ParallelData (const FE                                           &fe,
+                    const dealii::hp::QCollection<dim-1>               &face_quadratures,
+                    const dealii::hp::MappingCollection<dim,spacedim> &mapping,
+                    const bool         need_quadrature_points,
+                    const unsigned int n_solution_vectors,
+                    const types::subdomain_id subdomain_id,
+                    const types::material_id material_id,
+                    const typename FunctionMap<spacedim>::type *neumann_bc,
+                    const ComponentMask                component_mask,
+                    const Function<spacedim>                   *coefficients);
+
+      /**
+       * Resize the arrays so that they fit the number of quadrature points
+       * associated with the given finite element index into the hp
+       * collections.
+       */
+      void resize (const unsigned int active_fe_index);
+    };
+
+
+    template <typename DoFHandlerType,typename number>
+    template <class FE>
+    ParallelData<DoFHandlerType,number>::
+    ParallelData
+    (const FE                                           &fe,
+     const dealii::hp::QCollection<dim-1>               &face_quadratures,
+     const dealii::hp::MappingCollection<dim, spacedim> &mapping,
+     const bool                                          need_quadrature_points,
+     const unsigned int                                  n_solution_vectors,
+     const types::subdomain_id                           subdomain_id,
+     const types::material_id                            material_id,
+     const typename FunctionMap<spacedim>::type         *neumann_bc,
+     const ComponentMask                                 component_mask,
+     const Function<spacedim>                           *coefficients)
+      :
+      finite_element (fe),
+      face_quadratures (face_quadratures),
+      fe_face_values_cell (mapping,
+                           finite_element,
+                           face_quadratures,
+                           update_gradients      |
+                           update_JxW_values     |
+                           (need_quadrature_points  ?
+                            update_quadrature_points :
+                            UpdateFlags()) |
+                           update_normal_vectors),
+      fe_face_values_neighbor (mapping,
+                               finite_element,
+                               face_quadratures,
+                               update_gradients|
+                               update_normal_vectors),
+      fe_subface_values (mapping,
+                         finite_element,
+                         face_quadratures,
+                         update_gradients|
+                         update_normal_vectors),
+      phi (n_solution_vectors,
+           std::vector<std::vector<number> >
+           (face_quadratures.max_n_quadrature_points(),
+            std::vector<number> (fe.n_components()))),
+      psi (n_solution_vectors,
+           std::vector<std::vector<Tensor<1,spacedim,number> > >
+           (face_quadratures.max_n_quadrature_points(),
+            std::vector<Tensor<1,spacedim,number> > (fe.n_components()))),
+      neighbor_psi (n_solution_vectors,
+                    std::vector<std::vector<Tensor<1,spacedim,number> > >
+                    (face_quadratures.max_n_quadrature_points(),
+                     std::vector<Tensor<1,spacedim,number> > (fe.n_components()))),
+      normal_vectors (face_quadratures.max_n_quadrature_points()),
+      neighbor_normal_vectors (face_quadratures.max_n_quadrature_points()),
+      coefficient_values1 (face_quadratures.max_n_quadrature_points()),
+      coefficient_values (face_quadratures.max_n_quadrature_points(),
+                          dealii::Vector<double> (fe.n_components())),
+      JxW_values (face_quadratures.max_n_quadrature_points()),
+      subdomain_id (subdomain_id),
+      material_id (material_id),
+      neumann_bc (neumann_bc),
+      component_mask (component_mask),
+      coefficients (coefficients)
+    {}
+
+
+
+    template <typename DoFHandlerType, typename number>
+    void
+    ParallelData<DoFHandlerType,number>::resize (const unsigned int active_fe_index)
+    {
+      const unsigned int n_q_points   = face_quadratures[active_fe_index].size();
+      const unsigned int n_components = finite_element.n_components();
+
+      normal_vectors.resize(n_q_points);
+      neighbor_normal_vectors.resize(n_q_points);
+      coefficient_values1.resize(n_q_points);
+      coefficient_values.resize(n_q_points);
+      JxW_values.resize(n_q_points);
+
+      for (unsigned int i=0; i<phi.size(); ++i)
+        {
+          phi[i].resize(n_q_points);
+          psi[i].resize(n_q_points);
+          neighbor_psi[i].resize(n_q_points);
+
+          for (unsigned int qp=0; qp<n_q_points; ++qp)
+            {
+              phi[i][qp].resize(n_components);
+              psi[i][qp].resize(n_components);
+              neighbor_psi[i][qp].resize(n_components);
+            }
+        }
+
+      for (unsigned int qp=0; qp<n_q_points; ++qp)
+        coefficient_values[qp].reinit(n_components);
+    }
+
+
+
+    /**
+     * Copy data from the local_face_integrals map of a single ParallelData
+     * object into a global such map. This is the copier stage of a WorkStream
+     * pipeline.
+     */
+    template <typename DoFHandlerType>
+    void
+    copy_local_to_global
+    (const std::map<typename DoFHandlerType::face_iterator,std::vector<double> > &local_face_integrals,
+     std::map<typename DoFHandlerType::face_iterator,std::vector<double> > &face_integrals)
+    {
+
+      // now copy locally computed elements into the global map
+      for (typename std::map<typename DoFHandlerType::face_iterator,std::vector<double> >::const_iterator
+           p=local_face_integrals.begin();
+           p!=local_face_integrals.end();
+           ++p)
+        {
+          // double check that the element does not already exists in the
+          // global map
+          Assert (face_integrals.find (p->first) == face_integrals.end(),
+                  ExcInternalError());
+
+          for (unsigned int i=0; i<p->second.size(); ++i)
+            {
+              Assert (numbers::is_finite(p->second[i]), ExcInternalError());
+              Assert (p->second[i] >= 0, ExcInternalError());
+            }
+
+          face_integrals[p->first] = p->second;
+        }
+    }
+
+
+    /**
+     * Actually do the computation based on the evaluated gradients in
+     * ParallelData.
+     */
+    template <typename DoFHandlerType, typename number>
+    std::vector<double>
+    integrate_over_face
+    (ParallelData<DoFHandlerType,number>                 &parallel_data,
+     const typename DoFHandlerType::face_iterator        &face,
+     dealii::hp::FEFaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_face_values_cell)
+    {
+      const unsigned int n_q_points         = parallel_data.psi[0].size(),
+                         n_components       = parallel_data.finite_element.n_components(),
+                         n_solution_vectors = parallel_data.psi.size();
+
+      // now psi contains the following:
+      // - for an internal face, psi=[grad u]
+      // - for a neumann boundary face, psi=grad u
+      // each component being the mentioned value at one of the quadrature
+      // points
+
+      // next we have to multiply this with the normal vector. Since we have
+      // taken the difference of gradients for internal faces, we may chose
+      // the normal vector of one cell, taking that of the neighbor would only
+      // change the sign. We take the outward normal.
+
+      parallel_data.normal_vectors =
+        fe_face_values_cell.get_present_fe_values().get_all_normal_vectors();
+
+      for (unsigned int n=0; n<n_solution_vectors; ++n)
+        for (unsigned int component=0; component<n_components; ++component)
+          for (unsigned int point=0; point<n_q_points; ++point)
+            parallel_data.phi[n][point][component]
+              = (parallel_data.psi[n][point][component] *
+                 parallel_data.normal_vectors[point]);
+
+      if (face->at_boundary() == false)
+        {
+          // compute the jump in the gradients
+
+          for (unsigned int n=0; n<n_solution_vectors; ++n)
+            for (unsigned int component=0; component<n_components; ++component)
+              for (unsigned int p=0; p<n_q_points; ++p)
+                parallel_data.phi[n][p][component]
+                += (parallel_data.neighbor_psi[n][p][component] *
+                    parallel_data.neighbor_normal_vectors[p]);
+        }
+
+      // if a coefficient was given: use that to scale the jump in the
+      // gradient
+      if (parallel_data.coefficients != 0)
+        {
+          // scalar coefficient
+          if (parallel_data.coefficients->n_components == 1)
+            {
+              parallel_data.coefficients
+              ->value_list (fe_face_values_cell.get_present_fe_values()
+                            .get_quadrature_points(),
+                            parallel_data.coefficient_values1);
+              for (unsigned int n=0; n<n_solution_vectors; ++n)
+                for (unsigned int component=0; component<n_components; ++component)
+                  for (unsigned int point=0; point<n_q_points; ++point)
+                    parallel_data.phi[n][point][component] *=
+                      parallel_data.coefficient_values1[point];
+            }
+          else
+            // vector-valued coefficient
+            {
+              parallel_data.coefficients
+              ->vector_value_list (fe_face_values_cell.get_present_fe_values()
+                                   .get_quadrature_points(),
+                                   parallel_data.coefficient_values);
+              for (unsigned int n=0; n<n_solution_vectors; ++n)
+                for (unsigned int component=0; component<n_components; ++component)
+                  for (unsigned int point=0; point<n_q_points; ++point)
+                    parallel_data.phi[n][point][component] *=
+                      parallel_data.coefficient_values[point](component);
+            }
+        }
+
+
+      if (face->at_boundary() == true)
+        // neumann boundary face. compute difference between normal derivative
+        // and boundary function
+        {
+          const types::boundary_id boundary_id = face->boundary_id();
+
+          Assert (parallel_data.neumann_bc->find(boundary_id) !=
+                  parallel_data.neumann_bc->end(),
+                  ExcInternalError ());
+          // get the values of the boundary function at the quadrature points
+          if (n_components == 1)
+            {
+              std::vector<double> g(n_q_points);
+              parallel_data.neumann_bc->find(boundary_id)->second
+              ->value_list (fe_face_values_cell.get_present_fe_values()
+                            .get_quadrature_points(), g);
+
+              for (unsigned int n=0; n<n_solution_vectors; ++n)
+                for (unsigned int point=0; point<n_q_points; ++point)
+                  parallel_data.phi[n][point][0] -= g[point];
+            }
+          else
+            {
+              std::vector<dealii::Vector<double> >
+              g(n_q_points, dealii::Vector<double>(n_components));
+              parallel_data.neumann_bc->find(boundary_id)->second
+              ->vector_value_list (fe_face_values_cell.get_present_fe_values()
+                                   .get_quadrature_points(),
+                                   g);
+
+              for (unsigned int n=0; n<n_solution_vectors; ++n)
+                for (unsigned int component=0; component<n_components; ++component)
+                  for (unsigned int point=0; point<n_q_points; ++point)
+                    parallel_data.phi[n][point][component] -= g[point](component);
+            }
+        }
+
+
+
+
+      // now phi contains the following:
+      // - for an internal face, phi=[a du/dn]
+      // - for a neumann boundary face, phi=a du/dn-g
+      // each component being the mentioned value at one of the quadrature
+      // points
+
+      parallel_data.JxW_values
+        = fe_face_values_cell.get_present_fe_values().get_JxW_values();
+
+      // take the square of the phi[i] for integration, and sum up
+      std::vector<double> face_integral (n_solution_vectors, 0);
+      for (unsigned int n=0; n<n_solution_vectors; ++n)
+        for (unsigned int component=0; component<n_components; ++component)
+          if (parallel_data.component_mask[component] == true)
+            for (unsigned int p=0; p<n_q_points; ++p)
+              face_integral[n] += Utilities::fixed_power<2>(parallel_data.phi[n][p][component]) *
+                                  parallel_data.JxW_values[p];
+
+      return face_integral;
+    }
+
+    /**
+     * A factor to scale the integral for the face at the boundary. Used for
+     * Neumann BC.
+     */
+    template <typename DoFHandlerType>
+    double boundary_face_factor(const typename DoFHandlerType::active_cell_iterator &cell,
+                                const unsigned int                       face_no,
+                                const dealii::hp::FEFaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_face_values_cell,
+                                const typename KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::Strategy strategy)
+    {
+      switch (strategy)
+        {
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::cell_diameter_over_24 :
+        {
+          return 1.0;
+        }
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::face_diameter_over_twice_max_degree :
+        {
+          const double cell_degree = fe_face_values_cell.get_fe_collection()[cell->active_fe_index()].degree;
+          return cell->face(face_no)->diameter() / cell_degree;
+        }
+        default:
+        {
+          Assert (false, ExcNotImplemented());
+          return -std::numeric_limits<double>::max();
+        }
+        }
+    }
+
+
+    /**
+     * A factor to scale the integral for the regular face.
+     */
+    template <typename DoFHandlerType>
+    double regular_face_factor(const typename DoFHandlerType::active_cell_iterator &cell,
+                               const unsigned int                       face_no,
+                               const dealii::hp::FEFaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_face_values_cell,
+                               const dealii::hp::FEFaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_face_values_neighbor,
+                               const typename KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::Strategy strategy)
+    {
+      switch (strategy)
+        {
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::cell_diameter_over_24 :
+        {
+          return 1.0;
+        }
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::face_diameter_over_twice_max_degree :
+        {
+          const double cell_degree     = fe_face_values_cell.get_fe_collection()[cell->active_fe_index()].degree;
+          const double neighbor_degree = fe_face_values_neighbor.get_fe_collection()[cell->neighbor(face_no)->active_fe_index()].degree;
+          return cell->face(face_no)->diameter() / std::max(cell_degree,neighbor_degree) / 2.0;
+        }
+        default:
+        {
+          Assert (false, ExcNotImplemented());
+          return -std::numeric_limits<double>::max();
+        }
+        }
+    }
+
+    /**
+     * A factor to scale the integral for the irregular face.
+     */
+    template <typename DoFHandlerType>
+    double irregular_face_factor(const typename DoFHandlerType::active_cell_iterator &cell,
+                                 const typename DoFHandlerType::active_cell_iterator &neighbor_child,
+                                 const unsigned int                       face_no,
+                                 const unsigned int                       subface_no,
+                                 const dealii::hp::FEFaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_face_values,
+                                 dealii::hp::FESubfaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension>    &fe_subface_values,
+                                 const typename KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::Strategy strategy)
+    {
+      switch (strategy)
+        {
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::cell_diameter_over_24 :
+        {
+          return 1.0;
+        }
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::face_diameter_over_twice_max_degree :
+        {
+          const double cell_degree = fe_face_values.get_fe_collection()[cell->active_fe_index()].degree;
+          const double neighbor_child_degree = fe_subface_values.get_fe_collection()[neighbor_child->active_fe_index()].degree;
+          return cell->face(face_no)->child(subface_no)->diameter()/std::max(neighbor_child_degree,cell_degree)/2.0;
+        }
+        default:
+        {
+          Assert (false, ExcNotImplemented());
+          return -std::numeric_limits<double>::max();
+        }
+        }
+    }
+
+    /**
+     * A factor used when summing up all the contribution from different faces
+     * of each cell.
+     */
+    template <typename DoFHandlerType>
+    double cell_factor(const typename DoFHandlerType::active_cell_iterator &cell,
+                       const unsigned int                       /*face_no*/,
+                       const DoFHandlerType                    &/*dof_handler*/,
+                       const typename KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::Strategy strategy)
+    {
+      switch (strategy)
+        {
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::cell_diameter_over_24 :
+        {
+          return cell->diameter()/24;
+        }
+        case KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::face_diameter_over_twice_max_degree :
+        {
+          return 1.0;
+        }
+        default:
+        {
+          Assert (false, ExcNotImplemented());
+          return -std::numeric_limits<double>::max();
+        }
+        }
+    }
+
+
+
+    /**
+     * Actually do the computation on a face which has no hanging nodes (it is
+     * regular), i.e. either on the other side there is nirvana (face is at
+     * boundary), or the other side's refinement level is the same as that of
+     * this side, then handle the integration of these both cases together.
+     */
+    template <typename InputVector, typename DoFHandlerType>
+    void
+    integrate_over_regular_face (const std::vector<const InputVector *>   &solutions,
+                                 ParallelData<DoFHandlerType, typename InputVector::value_type> &parallel_data,
+                                 std::map<typename DoFHandlerType::face_iterator,std::vector<double> > &local_face_integrals,
+                                 const typename DoFHandlerType::active_cell_iterator &cell,
+                                 const unsigned int                       face_no,
+                                 dealii::hp::FEFaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_face_values_cell,
+                                 dealii::hp::FEFaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_face_values_neighbor,
+                                 const typename KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::Strategy strategy)
+    {
+      const unsigned int dim = DoFHandlerType::dimension;
+      (void)dim;
+
+      const typename DoFHandlerType::face_iterator face = cell->face(face_no);
+      const unsigned int n_solution_vectors = solutions.size();
+
+
+      // initialize data of the restriction
+      // of this cell to the present face
+      fe_face_values_cell.reinit (cell, face_no,
+                                  cell->active_fe_index());
+
+      // get gradients of the finite element
+      // function on this cell
+      for (unsigned int n=0; n<n_solution_vectors; ++n)
+        fe_face_values_cell.get_present_fe_values()
+        .get_function_gradients (*solutions[n], parallel_data.psi[n]);
+
+      double factor;
+      // now compute over the other side of the face
+      if (face->at_boundary() == false)
+        // internal face; integrate jump of gradient across this face
+        {
+          Assert (cell->neighbor(face_no).state() == IteratorState::valid,
+                  ExcInternalError());
+
+          const typename DoFHandlerType::active_cell_iterator neighbor = cell->neighbor(face_no);
+
+          // find which number the current face has relative to the
+          // neighboring cell
+          const unsigned int neighbor_neighbor
+            = cell->neighbor_of_neighbor (face_no);
+          Assert (neighbor_neighbor<GeometryInfo<dim>::faces_per_cell,
+                  ExcInternalError());
+
+          // get restriction of finite element function of @p{neighbor} to the
+          // common face. in the hp case, use the quadrature formula that
+          // matches the one we would use for the present cell
+          fe_face_values_neighbor.reinit (neighbor, neighbor_neighbor,
+                                          cell->active_fe_index());
+
+          factor = regular_face_factor<DoFHandlerType>(cell,face_no,
+                                                       fe_face_values_cell,fe_face_values_neighbor,
+                                                       strategy);
+
+          // get gradients on neighbor cell
+          for (unsigned int n=0; n<n_solution_vectors; ++n)
+            {
+              fe_face_values_neighbor.get_present_fe_values()
+              .get_function_gradients (*solutions[n],
+                                       parallel_data.neighbor_psi[n]);
+            }
+
+          parallel_data.neighbor_normal_vectors =
+            fe_face_values_neighbor.get_present_fe_values().get_all_normal_vectors();
+
+        }
+      else
+        {
+          factor = boundary_face_factor<DoFHandlerType>(cell,face_no,
+                                                        fe_face_values_cell,
+                                                        strategy);
+        }
+
+      // now go to the generic function that does all the other things
+      local_face_integrals[face] =
+        integrate_over_face (parallel_data, face,
+                             fe_face_values_cell);
+
+      for (unsigned int i = 0; i < local_face_integrals[face].size(); i++)
+        local_face_integrals[face][i] *= factor;
+    }
+
+
+
+
+    /**
+     * The same applies as for the function above, except that integration is
+     * over face @p face_no of @p cell, where the respective neighbor is
+     * refined, so that the integration is a bit more complex.
+     */
+    template <typename InputVector, typename DoFHandlerType>
+    void
+    integrate_over_irregular_face (const std::vector<const InputVector *>   &solutions,
+                                   ParallelData<DoFHandlerType, typename InputVector::value_type> &parallel_data,
+                                   std::map<typename DoFHandlerType::face_iterator,std::vector<double> > &local_face_integrals,
+                                   const typename DoFHandlerType::active_cell_iterator    &cell,
+                                   const unsigned int                          face_no,
+                                   dealii::hp::FEFaceValues<DoFHandlerType::dimension,DoFHandlerType::space_dimension>    &fe_face_values,
+                                   dealii::hp::FESubfaceValues<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe_subface_values,
+                                   const typename KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::Strategy strategy)
+    {
+      const unsigned int dim = DoFHandlerType::dimension;
+      (void)dim;
+
+      const typename DoFHandlerType::cell_iterator neighbor = cell->neighbor(face_no);
+      (void)neighbor;
+      const unsigned int n_solution_vectors = solutions.size();
+      const typename DoFHandlerType::face_iterator
+      face=cell->face(face_no);
+
+      Assert (neighbor.state() == IteratorState::valid, ExcInternalError());
+      Assert (face->has_children(), ExcInternalError());
+
+      // set up a vector of the gradients of the finite element function on
+      // this cell at the quadrature points
+      //
+      // let psi be a short name for [a grad u_h], where the second index be
+      // the component of the finite element, and the first index the number
+      // of the quadrature point
+
+      // store which number @p{cell} has in the list of neighbors of
+      // @p{neighbor}
+      const unsigned int neighbor_neighbor
+        = cell->neighbor_of_neighbor (face_no);
+      Assert (neighbor_neighbor<GeometryInfo<dim>::faces_per_cell,
+              ExcInternalError());
+
+      // loop over all subfaces
+      for (unsigned int subface_no=0; subface_no<face->n_children(); ++subface_no)
+        {
+          // get an iterator pointing to the cell behind the present subface
+          const typename DoFHandlerType::active_cell_iterator neighbor_child
+            = cell->neighbor_child_on_subface (face_no, subface_no);
+          Assert (!neighbor_child->has_children(),
+                  ExcInternalError());
+
+          // restrict the finite element on the present cell to the subface
+          fe_subface_values.reinit (cell, face_no, subface_no,
+                                    cell->active_fe_index());
+
+          // restrict the finite element on the neighbor cell to the common
+          // @p{subface}.
+          fe_face_values.reinit (neighbor_child, neighbor_neighbor,
+                                 cell->active_fe_index());
+
+          const double factor = irregular_face_factor<DoFHandlerType>(cell,
+                                                                      neighbor_child,
+                                                                      face_no,
+                                                                      subface_no,
+                                                                      fe_face_values,
+                                                                      fe_subface_values,
+                                                                      strategy);
+
+          // store the gradient of the solution in psi
+          for (unsigned int n=0; n<n_solution_vectors; ++n)
+            fe_subface_values.get_present_fe_values()
+            .get_function_gradients (*solutions[n], parallel_data.psi[n]);
+
+          // store the gradient from the neighbor's side in @p{neighbor_psi}
+          for (unsigned int n=0; n<n_solution_vectors; ++n)
+            fe_face_values.get_present_fe_values()
+            .get_function_gradients (*solutions[n], parallel_data.neighbor_psi[n]);
+
+          // call generic evaluate function
+          parallel_data.neighbor_normal_vectors =
+            fe_subface_values.get_present_fe_values().get_all_normal_vectors();
+
+          local_face_integrals[neighbor_child->face(neighbor_neighbor)] =
+            integrate_over_face (parallel_data, face, fe_face_values);
+          for (unsigned int i = 0; i < local_face_integrals[neighbor_child->face(neighbor_neighbor)].size(); i++)
+            local_face_integrals[neighbor_child->face(neighbor_neighbor)][i] *= factor;
+        }
+
+      // finally loop over all subfaces to collect the contributions of the
+      // subfaces and store them with the mother face
+      std::vector<double> sum (n_solution_vectors, 0);
+      for (unsigned int subface_no=0; subface_no<face->n_children(); ++subface_no)
+        {
+          Assert (local_face_integrals.find(face->child(subface_no)) !=
+                  local_face_integrals.end(),
+                  ExcInternalError());
+          Assert (local_face_integrals[face->child(subface_no)][0] >= 0,
+                  ExcInternalError());
+
+          for (unsigned int n=0; n<n_solution_vectors; ++n)
+            sum[n] += local_face_integrals[face->child(subface_no)][n];
+        }
+
+      local_face_integrals[face] = sum;
+    }
+
+
+    /**
+     * Computate the error on the faces of a single cell.
+     *
+     * This function is only needed in two or three dimensions.  The error
+     * estimator in one dimension is implemented separately.
+     */
+    template <typename InputVector, typename DoFHandlerType>
+    void
+    estimate_one_cell (const typename DoFHandlerType::active_cell_iterator &cell,
+                       ParallelData<DoFHandlerType, typename InputVector::value_type> &parallel_data,
+                       std::map<typename DoFHandlerType::face_iterator,std::vector<double> > &local_face_integrals,
+                       const std::vector<const InputVector *> &solutions,
+                       const typename KellyErrorEstimator<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::Strategy strategy)
+    {
+      const unsigned int dim = DoFHandlerType::dimension;
+      const unsigned int n_solution_vectors = solutions.size();
+
+      const types::subdomain_id subdomain_id = parallel_data.subdomain_id;
+      const unsigned int material_id  = parallel_data.material_id;
+
+      // empty our own copy of the local face integrals
+      local_face_integrals.clear();
+
+      // loop over all faces of this cell
+      for (unsigned int face_no=0;
+           face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+        {
+          const typename DoFHandlerType::face_iterator
+          face=cell->face(face_no);
+
+          // make sure we do work only once: this face may either be regular
+          // or irregular. if it is regular and has a neighbor, then we visit
+          // the face twice, once from every side. let the one with the lower
+          // index do the work. if it is at the boundary, or if the face is
+          // irregular, then do the work below
+          if ((face->has_children() == false) &&
+              !cell->at_boundary(face_no) &&
+              (!cell->neighbor_is_coarser(face_no) &&
+               (cell->neighbor(face_no)->index() < cell->index() ||
+                (cell->neighbor(face_no)->index() == cell->index() &&
+                 cell->neighbor(face_no)->level() < cell->level()))))
+            continue;
+
+          // if the neighboring cell is less refined than the present one,
+          // then do nothing since we integrate over the subfaces when we
+          // visit the coarse cells.
+          if (face->at_boundary() == false)
+            if (cell->neighbor_is_coarser(face_no))
+              continue;
+
+          // if this face is part of the boundary but not of the neumann
+          // boundary -> nothing to do. However, to make things easier when
+          // summing up the contributions of the faces of cells, we enter this
+          // face into the list of faces with contribution zero.
+          if (face->at_boundary()
+              &&
+              (parallel_data.neumann_bc->find(face->boundary_id()) ==
+               parallel_data.neumann_bc->end()))
+            {
+              local_face_integrals[face]
+                = std::vector<double> (n_solution_vectors, 0.);
+              continue;
+            }
+
+          // finally: note that we only have to do something if either the
+          // present cell is on the subdomain we care for (and the same for
+          // material_id), or if one of the neighbors behind the face is on
+          // the subdomain we care for
+          if ( ! ( ((subdomain_id == numbers::invalid_subdomain_id)
+                    ||
+                    (cell->subdomain_id() == subdomain_id))
+                   &&
+                   ((material_id == numbers::invalid_material_id)
+                    ||
+                    (cell->material_id() == material_id))) )
+            {
+              // ok, cell is unwanted, but maybe its neighbor behind the face
+              // we presently work on? oh is there a face at all?
+              if (face->at_boundary())
+                continue;
+
+              bool care_for_cell = false;
+              if (face->has_children() == false)
+                care_for_cell |= ((cell->neighbor(face_no)->subdomain_id()
+                                   == subdomain_id) ||
+                                  (subdomain_id == numbers::invalid_subdomain_id))
+                                 &&
+                                 ((cell->neighbor(face_no)->material_id()
+                                   == material_id) ||
+                                  (material_id == numbers::invalid_material_id));
+              else
+                {
+                  for (unsigned int sf=0; sf<face->n_children(); ++sf)
+                    if (((cell->neighbor_child_on_subface(face_no,sf)
+                          ->subdomain_id() == subdomain_id)
+                         &&
+                         (material_id ==
+                          numbers::invalid_material_id))
+                        ||
+                        ((cell->neighbor_child_on_subface(face_no,sf)
+                          ->material_id() == material_id)
+                         &&
+                         (subdomain_id ==
+                          numbers::invalid_subdomain_id)))
+                      {
+                        care_for_cell = true;
+                        break;
+                      }
+                }
+
+              // so if none of the neighbors cares for this subdomain or
+              // material either, then try next face
+              if (care_for_cell == false)
+                continue;
+            }
+
+          // so now we know that we care for this face, let's do something
+          // about it. first re-size the arrays we may use to the correct
+          // size:
+          parallel_data.resize (cell->active_fe_index());
+
+
+          // then do the actual integration
+          if (face->has_children() == false)
+            // if the face is a regular one, i.e.  either on the other side
+            // there is nirvana (face is at boundary), or the other side's
+            // refinement level is the same as that of this side, then handle
+            // the integration of these both cases together
+            integrate_over_regular_face (solutions,
+                                         parallel_data,
+                                         local_face_integrals,
+                                         cell, face_no,
+                                         parallel_data.fe_face_values_cell,
+                                         parallel_data.fe_face_values_neighbor,
+                                         strategy);
+
+          else
+            // otherwise we need to do some special computations which do not
+            // fit into the framework of the above function
+            integrate_over_irregular_face (solutions,
+                                           parallel_data,
+                                           local_face_integrals,
+                                           cell, face_no,
+                                           parallel_data.fe_face_values_cell,
+                                           parallel_data.fe_subface_values,
+                                           strategy);
+        }
+    }
+  }
+}
+
+
+
+
+
+// the following function is still independent of dimension, but it
+// calls dimension dependent functions
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<dim, spacedim>::
+estimate (const Mapping<dim, spacedim>               &mapping,
+          const DoFHandlerType                       &dof_handler,
+          const Quadrature<dim-1>                    &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id,
+          const Strategy                              strategy)
+{
+  // just pass on to the other function
+  const std::vector<const InputVector *> solutions (1, &solution);
+  std::vector<Vector<float>*>              errors (1, &error);
+  estimate (mapping, dof_handler, quadrature, neumann_bc, solutions, errors,
+            component_mask, coefficients, n_threads, subdomain_id, material_id, strategy);
+}
+
+
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<dim,spacedim>::
+estimate (const DoFHandlerType                       &dof_handler,
+          const Quadrature<dim-1>                    &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id,
+          const Strategy                              strategy)
+{
+  estimate(StaticMappingQ1<dim,spacedim>::mapping, dof_handler, quadrature, neumann_bc, solution,
+           error, component_mask, coefficients, n_threads,
+           subdomain_id, material_id, strategy);
+}
+
+
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<dim, spacedim>::
+estimate (const Mapping<dim, spacedim>               &mapping,
+          const DoFHandlerType                       &dof_handler,
+          const hp::QCollection<dim-1>               &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id,
+          const Strategy                              strategy)
+{
+  // just pass on to the other function
+  const std::vector<const InputVector *> solutions (1, &solution);
+  std::vector<Vector<float>*>              errors (1, &error);
+  estimate (mapping, dof_handler, quadrature, neumann_bc, solutions, errors,
+            component_mask, coefficients, n_threads, subdomain_id, material_id, strategy);
+}
+
+
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<dim, spacedim>::
+estimate (const DoFHandlerType                       &dof_handler,
+          const hp::QCollection<dim-1>               &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id,
+          const Strategy                              strategy)
+{
+  estimate(StaticMappingQ1<dim, spacedim>::mapping, dof_handler, quadrature, neumann_bc, solution,
+           error, component_mask, coefficients, n_threads,
+           subdomain_id, material_id, strategy);
+}
+
+
+
+
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<dim, spacedim>::
+estimate (const Mapping<dim, spacedim>               &mapping,
+          const DoFHandlerType                       &dof_handler,
+          const hp::QCollection<dim-1>               &face_quadratures,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const std::vector<const InputVector *>     &solutions,
+          std::vector<Vector<float>*>                &errors,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int,
+          const types::subdomain_id                   subdomain_id_,
+          const types::material_id                    material_id,
+          const Strategy                              strategy)
+{
+#ifdef DEAL_II_WITH_P4EST
+  if (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+      (&dof_handler.get_triangulation())
+      != 0)
+    Assert ((subdomain_id_ == numbers::invalid_subdomain_id)
+            ||
+            (subdomain_id_ ==
+             dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>&>
+             (dof_handler.get_triangulation()).locally_owned_subdomain()),
+            ExcMessage ("For parallel distributed triangulations, the only "
+                        "valid subdomain_id that can be passed here is the "
+                        "one that corresponds to the locally owned subdomain id."));
+
+  const types::subdomain_id subdomain_id
+    = ((dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+        (&dof_handler.get_triangulation())
+        != 0)
+       ?
+       dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>&>
+       (dof_handler.get_triangulation()).locally_owned_subdomain()
+       :
+       subdomain_id_);
+#else
+  const types::subdomain_id subdomain_id
+    = subdomain_id_;
+#endif
+
+  const unsigned int n_components = dof_handler.get_fe().n_components();
+  (void)n_components;
+
+  // sanity checks
+  Assert (solutions.size() > 0,
+          ExcNoSolutions());
+  Assert (solutions.size() == errors.size(),
+          ExcIncompatibleNumberOfElements(solutions.size(), errors.size()));
+
+  for (typename FunctionMap<spacedim>::type::const_iterator i=neumann_bc.begin();
+       i!=neumann_bc.end(); ++i)
+    Assert (i->second->n_components == n_components,
+            ExcInvalidBoundaryFunction(i->first,
+                                       i->second->n_components,
+                                       n_components));
+
+  Assert (component_mask.represents_n_components(n_components),
+          ExcInvalidComponentMask());
+  Assert (component_mask.n_selected_components(n_components) > 0,
+          ExcInvalidComponentMask());
+
+  Assert ((coefficients == 0) ||
+          (coefficients->n_components == n_components) ||
+          (coefficients->n_components == 1),
+          ExcInvalidCoefficient());
+
+  for (unsigned int n=0; n<solutions.size(); ++n)
+    Assert (solutions[n]->size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(solutions[n]->size(),
+                                 dof_handler.n_dofs()));
+
+  const unsigned int n_solution_vectors = solutions.size();
+
+  // Map of integrals indexed by the corresponding face. In this map we store
+  // the integrated jump of the gradient for each face.  At the end of the
+  // function, we again loop over the cells and collect the contributions of
+  // the different faces of the cell.
+  std::map<typename DoFHandlerType::face_iterator,std::vector<double> > face_integrals;
+
+  // all the data needed in the error estimator by each of the threads is
+  // gathered in the following structures
+  const hp::MappingCollection<dim,spacedim> mapping_collection(mapping);
+  const internal::ParallelData<DoFHandlerType,typename InputVector::value_type>
+  parallel_data (dof_handler.get_fe(),
+                 face_quadratures,
+                 mapping_collection,
+                 (!neumann_bc.empty() || (coefficients != 0)),
+                 solutions.size(),
+                 subdomain_id,
+                 material_id,
+                 &neumann_bc,
+                 component_mask,
+                 coefficients);
+  std::map<typename DoFHandlerType::face_iterator,std::vector<double> > sample_local_face_integrals;
+
+  // now let's work on all those cells:
+  WorkStream::run (dof_handler.begin_active(),
+                   static_cast<typename DoFHandlerType::active_cell_iterator>(dof_handler.end()),
+                   std_cxx11::bind (&internal::estimate_one_cell<InputVector,DoFHandlerType>,
+                                    std_cxx11::_1, std_cxx11::_2, std_cxx11::_3, std_cxx11::ref(solutions),strategy),
+                   std_cxx11::bind (&internal::copy_local_to_global<DoFHandlerType>,
+                                    std_cxx11::_1, std_cxx11::ref(face_integrals)),
+                   parallel_data,
+                   sample_local_face_integrals);
+
+  // finally add up the contributions of the faces for each cell
+
+  // reserve one slot for each cell and set it to zero
+  for (unsigned int n=0; n<n_solution_vectors; ++n)
+    {
+      (*errors[n]).reinit (dof_handler.get_triangulation().n_active_cells());
+      for (unsigned int i=0; i<dof_handler.get_triangulation().n_active_cells(); ++i)
+        (*errors[n])(i)=0;
+    }
+
+  // now walk over all cells and collect information from the faces. only do
+  // something if this is a cell we care for based on the subdomain id
+  unsigned int present_cell=0;
+  for (typename DoFHandlerType::active_cell_iterator cell=dof_handler.begin_active();
+       cell!=dof_handler.end();
+       ++cell, ++present_cell)
+    if ( ((subdomain_id == numbers::invalid_subdomain_id)
+          ||
+          (cell->subdomain_id() == subdomain_id))
+         &&
+         ((material_id == numbers::invalid_material_id)
+          ||
+          (cell->material_id() == material_id)))
+      {
+        // loop over all faces of this cell
+        for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          {
+            Assert(face_integrals.find(cell->face(face_no))
+                   != face_integrals.end(),
+                   ExcInternalError());
+            const double factor = internal::cell_factor<DoFHandlerType>(cell,
+                                                                        face_no,
+                                                                        dof_handler,
+                                                                        strategy);
+
+            for (unsigned int n=0; n<n_solution_vectors; ++n)
+              {
+                // make sure that we have written a meaningful value into this
+                // slot
+                Assert (face_integrals[cell->face(face_no)][n] >= 0,
+                        ExcInternalError());
+
+                (*errors[n])(present_cell)
+                += (face_integrals[cell->face(face_no)][n] * factor);
+              }
+          }
+
+        for (unsigned int n=0; n<n_solution_vectors; ++n)
+          (*errors[n])(present_cell) = std::sqrt((*errors[n])(present_cell));
+      }
+}
+
+
+
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<dim, spacedim>::
+estimate (const Mapping<dim, spacedim>               &mapping,
+          const DoFHandlerType                       &dof_handler,
+          const Quadrature<dim-1>                    &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const std::vector<const InputVector *>     &solutions,
+          std::vector<Vector<float>*>                &errors,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id,
+          const Strategy                              strategy)
+{
+  // forward to the function with the QCollection
+  estimate (mapping, dof_handler,
+            hp::QCollection<dim-1>(quadrature),
+            neumann_bc, solutions,
+            errors, component_mask, coefficients,
+            n_threads, subdomain_id, material_id, strategy);
+}
+
+
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void KellyErrorEstimator<dim, spacedim>::estimate
+(const DoFHandlerType                       &dof_handler,
+ const Quadrature<dim-1>                    &quadrature,
+ const typename FunctionMap<spacedim>::type &neumann_bc,
+ const std::vector<const InputVector *>     &solutions,
+ std::vector<Vector<float>*>                &errors,
+ const ComponentMask                        &component_mask,
+ const Function<spacedim>                   *coefficients,
+ const unsigned int                          n_threads,
+ const types::subdomain_id                   subdomain_id,
+ const types::material_id                    material_id,
+ const Strategy                              strategy)
+{
+  estimate(StaticMappingQ1<dim, spacedim>::mapping, dof_handler, quadrature, neumann_bc, solutions,
+           errors, component_mask, coefficients, n_threads,
+           subdomain_id, material_id, strategy);
+}
+
+
+
+template <int dim, int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void KellyErrorEstimator<dim, spacedim>::estimate
+(const DoFHandlerType                       &dof_handler,
+ const hp::QCollection<dim-1>               &quadrature,
+ const typename FunctionMap<spacedim>::type &neumann_bc,
+ const std::vector<const InputVector *>     &solutions,
+ std::vector<Vector<float>*>                &errors,
+ const ComponentMask                        &component_mask,
+ const Function<spacedim>                   *coefficients,
+ const unsigned int                          n_threads,
+ const types::subdomain_id                   subdomain_id,
+ const types::material_id                    material_id,
+ const Strategy                              strategy)
+{
+  estimate(StaticMappingQ1<dim, spacedim>::mapping, dof_handler, quadrature, neumann_bc, solutions,
+           errors, component_mask, coefficients, n_threads,
+           subdomain_id, material_id, strategy);
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/numerics/fe_field_function.h b/include/deal.II/numerics/fe_field_function.h
new file mode 100644
index 0000000..65e04ef
--- /dev/null
+++ b/include/deal.II/numerics/fe_field_function.h
@@ -0,0 +1,468 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__fe_function_h
+#define dealii__fe_function_h
+
+#include <deal.II/base/function.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/thread_local_storage.h>
+
+#include <deal.II/lac/vector.h>
+
+#include <boost/optional.hpp>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace VectorTools
+{
+  class ExcPointNotAvailableHere;
+}
+
+namespace Functions
+{
+
+  /**
+   * This is an interpolation function for the given dof handler and the given
+   * solution vector. The points at which this function can be evaluated MUST
+   * be inside the domain of the dof handler, but except from this, no other
+   * requirement is given. This function is rather slow, as it needs to
+   * construct a quadrature object for the point (or set of points) where you
+   * want to evaluate your finite element function. In order to do so, it
+   * needs to find out where the points lie.
+   *
+   * If you know in advance in which cell your points lie, you can accelerate
+   * things a bit, by calling set_active_cell before asking for values or
+   * gradients of the function. If you don't do this, and your points don't
+   * lie in the cell that is currently stored, the function
+   * GridTools::find_cell_around_point is called to find out where the point
+   * is. You can specify an optional mapping to use when looking for points in
+   * the grid. If you don't do so, this function uses a Q1 mapping.
+   *
+   * Once the FEFieldFunction knows where the points lie, it creates a
+   * quadrature formula for those points, and calls
+   * FEValues::get_function_values or FEValues::get_function_gradients with
+   * the given quadrature points.
+   *
+   * If you only need the quadrature points but not the values of the finite
+   * element function (you might want this for the adjoint interpolation), you
+   * can also use the function @p compute_point_locations alone.
+   *
+   * An example of how to use this function is the following:
+   *
+   * @code
+   *
+   * // Generate two triangulations
+   * Triangulation<dim> tria_1;
+   * Triangulation<dim> tria_2;
+   *
+   * // Read the triangulations from files, or build them up, or get them
+   * // from some place. Assume that tria_2 is *entirely* included in tria_1.
+   *
+   * // Associate a dof handler and a solution to the first triangulation
+   * DoFHandler<dim> dh1 (tria_1);
+   * Vector<double> solution_1;
+   *
+   * // Do the same with the second
+   * DoFHandler<dim> dh2 (tria_2);
+   * Vector<double> solution_2;
+   *
+   * // Setup the system, assemble matrices, solve problems and get the
+   * // nobel prize on the first domain...
+   *
+   * // Now project it to the second domain
+   * FEFieldFunction<dim> fe_function_1 (dh_1, solution_1);
+   * VectorTools::project (dh_2, constraints_2, quad, fe_function_1, solution_2);
+   *
+   * // Or interpolate it...
+   * Vector<double> solution_3;
+   * VectorTools::interpolate (dh_2, fe_function_1, solution_3);
+   *
+   * @endcode
+   *
+   * The snippet of code above will work assuming that the second
+   * triangulation is entirely included in the first one.
+   *
+   * FEFieldFunction is designed to be an easy way to get the results of your
+   * computations across different, possibly non matching, grids. No knowledge
+   * of the location of the points is assumed in this class, which makes it
+   * rely entirely on the GridTools::find_active_cell_around_point utility for
+   * its job. However the class can be fed an "educated guess" of where the
+   * points that will be computed actually are by using the
+   * FEFieldFunction::set_active_cell method, so if you have a smart way to
+   * tell where your points are, you will save a lot of computational time by
+   * letting this class know.
+   *
+   *
+   * <h3>Using FEFieldFunction with parallel::distributed::Triangulation</h3>
+   *
+   * When using this class with a parallel distributed triangulation object
+   * and evaluating the solution at a particular point, not every processor
+   * will own the cell at which the solution is evaluated. Rather, it may be
+   * that the cell in which this point is found is in fact a ghost or
+   * artificial cell (see
+   * @ref GlossArtificialCell
+   * and
+   * @ref GlossGhostCell).
+   * If the cell is artificial, we have no access to the solution there and
+   * functions that evaluate the solution at such a point will trigger an
+   * exception of type VectorTools::ExcPointNotAvailableHere. The same kind of
+   * exception will also be produced if the cell is a ghost cell: On such
+   * cells, one could in principle evaluate the solution, but it becomes
+   * easier if we do not allow to do so because then there is exactly one
+   * processor in a parallel distributed computation that can indeed evaluate
+   * the solution. Consequently, it is clear which processor is responsible
+   * for producing output if the point evaluation is done as a postprocessing
+   * step.
+   *
+   * To deal with this situation, you will want to use code as follows when,
+   * for example, evaluating the solution at the origin (here using a parallel
+   * TrilinosWrappers vector to hold the solution):
+   * @code
+   *   Functions::FEFieldFunction<dim,DoFHandler<dim>,TrilinosWrappers::MPI::Vector>
+   *     solution_function (dof_handler, solution);
+   *   Point<dim> origin = Point<dim>();
+   *
+   *   double solution_at_origin;
+   *   bool   point_found = true;
+   *   try
+   *     {
+   *       solution_at_origin = solution_function.value (origin);
+   *     }
+   *   catch (const VectorTools::ExcPointNotAvailableHere &)
+   *     {
+   *       point_found = false;
+   *     }
+   *
+   *   if (point_found == true)
+   *     ...do something...;
+   * @endcode
+   *
+   * @ingroup functions
+   * @author Luca Heltai, 2006, Markus Buerg, 2012, Wolfgang Bangerth, 2013
+   */
+  template <int dim,
+            typename DoFHandlerType=DoFHandler<dim>,
+            typename VectorType=Vector<double> >
+  class FEFieldFunction :  public Function<dim>
+  {
+  public:
+    /**
+     * Construct a vector function. A smart pointers is stored to the dof
+     * handler, so you have to make sure that it make sense for the entire
+     * lifetime of this object. The number of components of this functions is
+     * equal to the number of components of the finite element object. If a
+     * mapping is specified, that is what is used to find out where the points
+     * lay. Otherwise the standard Q1 mapping is used.
+     */
+    FEFieldFunction (const DoFHandlerType &dh,
+                     const VectorType     &data_vector,
+                     const Mapping<dim>   &mapping = StaticMappingQ1<dim>::mapping);
+
+    /**
+     * Set the current cell. If you know in advance where your points lie, you
+     * can tell this object by calling this function. This will speed things
+     * up a little.
+     */
+    void set_active_cell (const typename DoFHandlerType::active_cell_iterator &newcell);
+
+    /**
+     * Get one vector value at the given point. It is inefficient to use
+     * single points. If you need more than one at a time, use the
+     * vector_value_list() function. For efficiency reasons, it is better if
+     * all the points lie on the same cell. This is not mandatory, however it
+     * does speed things up.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void vector_value (const Point<dim> &p,
+                               Vector<double>   &values) const;
+
+    /**
+     * Return the value of the function at the given point. Unless there is
+     * only one component (i.e. the function is scalar), you should state the
+     * component you want to have evaluated; it defaults to zero, i.e. the
+     * first component. It is inefficient to use single points. If you need
+     * more than one at a time, use the vector_value_list function. For
+     * efficiency reasons, it is better if all the points lie on the same
+     * cell. This is not mandatory, however it does speed things up.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual double value (const Point< dim > &p,
+                          const unsigned int  component = 0)    const;
+
+    /**
+     * Set @p values to the point values of the specified component of the
+     * function at the @p points. It is assumed that @p values already has the
+     * right size, i.e. the same size as the points array. This is rather
+     * efficient if all the points lie on the same cell. If this is not the
+     * case, things may slow down a bit.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void value_list (const std::vector<Point< dim > >     &points,
+                             std::vector< double > &values,
+                             const unsigned int  component = 0)    const;
+
+
+    /**
+     * Set @p values to the point values of the function at the @p points. It
+     * is assumed that @p values already has the right size, i.e. the same
+     * size as the points array. This is rather efficient if all the points
+     * lie on the same cell. If this is not the case, things may slow down a
+     * bit.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void vector_value_list (const std::vector<Point< dim > >     &points,
+                                    std::vector< Vector<double> > &values) const;
+
+    /**
+     * Return the gradient of all components of the function at the given
+     * point.  It is inefficient to use single points. If you need more than
+     * one at a time, use the vector_value_list function. For efficiency
+     * reasons, it is better if all the points lie on the same cell. This is
+     * not mandatory, however it does speed things up.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void
+    vector_gradient (const Point< dim > &p,
+                     std::vector< Tensor< 1, dim > > &gradients) const;
+
+    /**
+     * Return the gradient of the specified component of the function at the
+     * given point. It is inefficient to use single points. If you need more
+     * than one at a time, use the vector_value_list function. For efficiency
+     * reasons, it is better if all the points lie on the same cell. This is
+     * not mandatory, however it does speed things up.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual Tensor<1,dim> gradient(const Point< dim > &p,
+                                   const unsigned int component = 0)const;
+
+    /**
+     * Return the gradient of all components of the function at all the given
+     * points. This is rather efficient if all the points lie on the same
+     * cell. If this is not the case, things may slow down a bit.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void
+    vector_gradient_list (const std::vector< Point< dim > > &p,
+                          std::vector<
+                          std::vector< Tensor< 1, dim > > > &gradients) const;
+
+    /**
+     * Return the gradient of the specified component of the function at all
+     * the given points.  This is rather efficient if all the points lie on
+     * the same cell. If this is not the case, things may slow down a bit.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void
+    gradient_list (const std::vector< Point< dim > > &p,
+                   std::vector< Tensor< 1, dim > > &gradients,
+                   const unsigned int component=0) const;
+
+
+    /**
+     * Compute the Laplacian of a given component at point <tt>p</tt>.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual double
+    laplacian (const Point<dim>   &p,
+               const unsigned int  component = 0) const;
+
+    /**
+     * Compute the Laplacian of all components at point <tt>p</tt> and store
+     * them in <tt>values</tt>.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void
+    vector_laplacian (const Point<dim>   &p,
+                      Vector<double>     &values) const;
+
+    /**
+     * Compute the Laplacian of one component at a set of points.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void
+    laplacian_list (const std::vector<Point<dim> > &points,
+                    std::vector<double>            &values,
+                    const unsigned int              component = 0) const;
+
+    /**
+     * Compute the Laplacians of all components at a set of points.
+     *
+     * @note When using this function on a
+     * parallel::distributed::Triangulation you may get an exception when
+     * trying to evaluate the solution at a point that does not lie in a
+     * locally owned cell (see
+     * @ref GlossLocallyOwnedCell).
+     * See the section in the general documentation of this class for more
+     * information.
+     */
+    virtual void
+    vector_laplacian_list (const std::vector<Point<dim> > &points,
+                           std::vector<Vector<double> >   &values) const;
+
+    /**
+     * Create quadrature rules. This function groups the points into blocks
+     * that live in the same cell, and fills up three vectors: @p cells, @p
+     * qpoints and @p maps. The first is a list of the cells that contain the
+     * points, the second is a list of quadrature points matching each cell of
+     * the first list, and the third contains the index of the given
+     * quadrature points, i.e., @p points[maps[3][4]] ends up as the 5th
+     * quadrature point in the 4th cell. This is where optimization would
+     * help. This function returns the number of cells that contain the given
+     * set of points.
+     */
+    unsigned int
+    compute_point_locations
+    (const std::vector<Point<dim> >                              &points,
+     std::vector<typename DoFHandlerType::active_cell_iterator > &cells,
+     std::vector<std::vector<Point<dim> > >                      &qpoints,
+     std::vector<std::vector<unsigned int> >                     &maps) const;
+
+    /**
+     * @deprecated Use VectorTools::ExcPointNotAvailableHere instead.
+     */
+    typedef VectorTools::ExcPointNotAvailableHere ExcPointNotAvailableHere DEAL_II_DEPRECATED;
+
+  private:
+    /**
+     * Typedef holding the local cell_hint.
+     */
+    typedef
+    Threads::ThreadLocalStorage <typename DoFHandlerType::active_cell_iterator >
+    cell_hint_t;
+
+    /**
+     * Pointer to the dof handler.
+     */
+    SmartPointer<const DoFHandlerType,FEFieldFunction<dim, DoFHandlerType, VectorType> > dh;
+
+    /**
+     * A reference to the actual data vector.
+     */
+    const VectorType &data_vector;
+
+    /**
+     * A reference to the mapping being used.
+     */
+    const Mapping<dim> &mapping;
+
+    /**
+     * The latest cell hint.
+     */
+    mutable cell_hint_t cell_hint;
+
+    /**
+     * Store the number of components of this function.
+     */
+    const unsigned int n_components;
+
+    /**
+     * Given a cell, return the reference coordinates of the given point
+     * within this cell if it indeed lies within the cell. Otherwise return an
+     * uninitialized boost::optional object.
+     */
+    boost::optional<Point<dim> >
+    get_reference_coordinates (const typename DoFHandlerType::active_cell_iterator &cell,
+                               const Point<dim>                                    &point) const;
+  };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/fe_field_function.templates.h b/include/deal.II/numerics/fe_field_function.templates.h
new file mode 100644
index 0000000..df5bbd4
--- /dev/null
+++ b/include/deal.II/numerics/fe_field_function.templates.h
@@ -0,0 +1,595 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/fe_field_function.h>
+#include <deal.II/numerics/vector_tools.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Functions
+{
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::FEFieldFunction
+  (const DoFHandlerType &mydh,
+   const VectorType     &myv,
+   const Mapping<dim>   &mymapping)
+    :
+    Function<dim>(mydh.get_fe().n_components()),
+    dh(&mydh, "FEFieldFunction"),
+    data_vector(myv),
+    mapping(mymapping),
+    cell_hint(dh->end()),
+    n_components(mydh.get_fe().n_components())
+  {
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  set_active_cell(const typename DoFHandlerType::active_cell_iterator &newcell)
+  {
+    cell_hint.get() = newcell;
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void FEFieldFunction<dim, DoFHandlerType, VectorType>::vector_value (const Point<dim> &p,
+      Vector<double>   &values) const
+  {
+    Assert (values.size() == n_components,
+            ExcDimensionMismatch(values.size(), n_components));
+    typename DoFHandlerType::active_cell_iterator cell = cell_hint.get();
+    if (cell == dh->end())
+      cell = dh->begin_active();
+
+    boost::optional<Point<dim> >
+    qp = get_reference_coordinates (cell, p);
+    if (!qp)
+      {
+        const std::pair<typename dealii::internal::ActiveCellIterator<dim, dim, DoFHandlerType>::type, Point<dim> > my_pair
+          = GridTools::find_active_cell_around_point (mapping, *dh, p);
+        AssertThrow (my_pair.first->is_locally_owned(),
+                     VectorTools::ExcPointNotAvailableHere());
+
+        cell = my_pair.first;
+        qp = my_pair.second;
+      }
+
+    cell_hint.get() = cell;
+
+    // Now we can find out about the point
+    Quadrature<dim> quad(qp.get());
+    FEValues<dim> fe_v(mapping, cell->get_fe(), quad,
+                       update_values);
+    fe_v.reinit(cell);
+    std::vector< Vector<typename VectorType::value_type> >
+    vvalues (1, Vector<typename VectorType::value_type>(values.size()));
+    fe_v.get_function_values(data_vector, vvalues);
+    values = vvalues[0];
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  double
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::value (const Point<dim>   &p,
+                                                           const unsigned int comp) const
+  {
+    Vector<double> values(n_components);
+    vector_value(p, values);
+    return values(comp);
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  vector_gradient (const Point<dim>            &p,
+                   std::vector<Tensor<1,dim> > &gradients) const
+  {
+    Assert (gradients.size() == n_components,
+            ExcDimensionMismatch(gradients.size(), n_components));
+    typename DoFHandlerType::active_cell_iterator cell = cell_hint.get();
+    if (cell == dh->end())
+      cell = dh->begin_active();
+
+    boost::optional<Point<dim> >
+    qp = get_reference_coordinates (cell, p);
+    if (!qp)
+      {
+        const std::pair<typename dealii::internal::ActiveCellIterator<dim, dim, DoFHandlerType>::type, Point<dim> > my_pair
+          = GridTools::find_active_cell_around_point (mapping, *dh, p);
+        AssertThrow (my_pair.first->is_locally_owned(),
+                     VectorTools::ExcPointNotAvailableHere());
+
+        cell = my_pair.first;
+        qp = my_pair.second;
+      }
+
+    cell_hint.get() = cell;
+
+    // Now we can find out about the point
+    Quadrature<dim> quad(qp.get());
+    FEValues<dim> fe_v(mapping, cell->get_fe(), quad,
+                       update_gradients);
+    fe_v.reinit(cell);
+
+    // FIXME: we need a temp argument because get_function_values wants to put
+    // its data into an object storing the correct scalar type, but this
+    // function wants to return everything in a vector<double>
+    std::vector< std::vector<Tensor<1,dim,typename VectorType::value_type> > > vgrads
+    (1,  std::vector<Tensor<1,dim,typename VectorType::value_type> >(n_components) );
+    fe_v.get_function_gradients(data_vector, vgrads);
+    gradients = std::vector<Tensor<1,dim> >(vgrads[0].begin(), vgrads[0].end());
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  Tensor<1,dim>
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  gradient (const Point<dim>   &p,
+            const unsigned int comp) const
+  {
+    std::vector<Tensor<1,dim> > grads(n_components);
+    vector_gradient(p, grads);
+    return grads[comp];
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  vector_laplacian (const Point<dim> &p,
+                    Vector<double>   &values) const
+  {
+    Assert (values.size() == n_components,
+            ExcDimensionMismatch(values.size(), n_components));
+    typename DoFHandlerType::active_cell_iterator cell = cell_hint.get();
+    if (cell == dh->end())
+      cell = dh->begin_active();
+
+    boost::optional<Point<dim> >
+    qp = get_reference_coordinates (cell, p);
+    if (!qp)
+      {
+        const std::pair<typename dealii::internal::ActiveCellIterator<dim, dim, DoFHandlerType>::type, Point<dim> > my_pair
+          = GridTools::find_active_cell_around_point (mapping, *dh, p);
+        AssertThrow (my_pair.first->is_locally_owned(),
+                     VectorTools::ExcPointNotAvailableHere());
+
+        cell = my_pair.first;
+        qp = my_pair.second;
+      }
+
+    cell_hint.get() = cell;
+
+    // Now we can find out about the point
+    Quadrature<dim> quad(qp.get());
+    FEValues<dim> fe_v(mapping, cell->get_fe(), quad,
+                       update_hessians);
+    fe_v.reinit(cell);
+    std::vector< Vector<typename VectorType::value_type> >
+    vvalues (1, Vector<typename VectorType::value_type>(values.size()));
+    fe_v.get_function_laplacians(data_vector, vvalues);
+    values = vvalues[0];
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  double FEFieldFunction<dim, DoFHandlerType, VectorType>::laplacian
+  (const Point<dim>   &p,
+   const unsigned int  comp) const
+  {
+    Vector<double> lap(n_components);
+    vector_laplacian(p, lap);
+    return lap[comp];
+  }
+
+
+  // Now the list versions
+  // ==============================
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  vector_value_list (const std::vector<Point< dim > > &points,
+                     std::vector< Vector<double> >    &values) const
+  {
+    Assert(points.size() == values.size(),
+           ExcDimensionMismatch(points.size(), values.size()));
+
+    std::vector<typename DoFHandlerType::active_cell_iterator > cells;
+    std::vector<std::vector<Point<dim> > > qpoints;
+    std::vector<std::vector<unsigned int> > maps;
+
+    unsigned int ncells = compute_point_locations(points, cells, qpoints, maps);
+    hp::MappingCollection<dim> mapping_collection (mapping);
+    hp::FECollection<dim> fe_collection (dh->get_fe ());
+    hp::QCollection<dim> quadrature_collection;
+    // Create quadrature collection
+    for (unsigned int i=0; i<ncells; ++i)
+      {
+        // Number of quadrature points on this cell
+        unsigned int nq = qpoints[i].size();
+        // Construct a quadrature formula
+        std::vector< double > ww(nq, 1./((double) nq));
+
+        quadrature_collection.push_back (Quadrature<dim> (qpoints[i], ww));
+      }
+    // Get a function value object
+    hp::FEValues<dim> fe_v(mapping_collection, fe_collection, quadrature_collection,
+                           update_values);
+    // Now gather all the informations we need
+    for (unsigned int i=0; i<ncells; ++i)
+      {
+        fe_v.reinit(cells[i], i, 0);
+        const unsigned int nq = qpoints[i].size();
+        std::vector< Vector<typename VectorType::value_type> > vvalues (nq, Vector<typename VectorType::value_type>(n_components));
+        fe_v.get_present_fe_values ().get_function_values(data_vector, vvalues);
+        for (unsigned int q=0; q<nq; ++q)
+          values[maps[i][q]] = vvalues[q];
+      }
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  value_list (const std::vector<Point< dim > > &points,
+              std::vector< double >            &values,
+              const unsigned int                component) const
+  {
+    Assert(points.size() == values.size(),
+           ExcDimensionMismatch(points.size(), values.size()));
+    std::vector< Vector<double> > vvalues(points.size(), Vector<double>(n_components));
+    vector_value_list(points, vvalues);
+    for (unsigned int q=0; q<points.size(); ++q)
+      values[q] = vvalues[q](component);
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  vector_gradient_list (const std::vector<Point< dim > >           &points,
+                        std::vector<std::vector< Tensor<1,dim> > > &values) const
+  {
+    Assert(points.size() == values.size(),
+           ExcDimensionMismatch(points.size(), values.size()));
+
+    std::vector<typename DoFHandlerType::active_cell_iterator > cells;
+    std::vector<std::vector<Point<dim> > > qpoints;
+    std::vector<std::vector<unsigned int> > maps;
+
+    unsigned int ncells = compute_point_locations(points, cells, qpoints, maps);
+    hp::MappingCollection<dim> mapping_collection (mapping);
+    hp::FECollection<dim> fe_collection (dh->get_fe ());
+    hp::QCollection<dim> quadrature_collection;
+    // Create quadrature collection
+    for (unsigned int i=0; i<ncells; ++i)
+      {
+        // Number of quadrature points on this cell
+        unsigned int nq = qpoints[i].size();
+        // Construct a quadrature formula
+        std::vector< double > ww(nq, 1./((double) nq));
+
+        quadrature_collection.push_back (Quadrature<dim> (qpoints[i], ww));
+      }
+    // Get a function value object
+    hp::FEValues<dim> fe_v(mapping_collection, fe_collection, quadrature_collection,
+                           update_gradients);
+    // Now gather all the informations we need
+    for (unsigned int i=0; i<ncells; ++i)
+      {
+        fe_v.reinit(cells[i], i, 0);
+        const unsigned int nq = qpoints[i].size();
+        std::vector< std::vector<Tensor<1,dim,typename VectorType::value_type> > >
+        vgrads (nq, std::vector<Tensor<1,dim,typename VectorType::value_type> >(n_components));
+        fe_v.get_present_fe_values ().get_function_gradients(data_vector, vgrads);
+        for (unsigned int q=0; q<nq; ++q)
+          {
+            const unsigned int s = vgrads[q].size();
+            values[maps[i][q]].resize(s);
+            for (unsigned int l=0; l<s; l++)
+              values[maps[i][q]][l] = vgrads[q][l];
+          }
+      }
+  }
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  gradient_list (const std::vector<Point< dim > > &points,
+                 std::vector< Tensor<1,dim> >     &values,
+                 const unsigned int                component) const
+  {
+    Assert(points.size() == values.size(),
+           ExcDimensionMismatch(points.size(), values.size()));
+    std::vector< std::vector<Tensor<1,dim> > >
+    vvalues(points.size(), std::vector<Tensor<1,dim> >(n_components));
+    vector_gradient_list(points, vvalues);
+    for (unsigned int q=0; q<points.size(); ++q)
+      values[q] = vvalues[q][component];
+  }
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  vector_laplacian_list (const std::vector<Point< dim > > &points,
+                         std::vector< Vector<double> >    &values) const
+  {
+    Assert(points.size() == values.size(),
+           ExcDimensionMismatch(points.size(), values.size()));
+
+    std::vector<typename DoFHandlerType::active_cell_iterator> cells;
+    std::vector<std::vector<Point<dim> > > qpoints;
+    std::vector<std::vector<unsigned int> > maps;
+
+    unsigned int ncells = compute_point_locations(points, cells, qpoints, maps);
+    hp::MappingCollection<dim> mapping_collection (mapping);
+    hp::FECollection<dim> fe_collection (dh->get_fe ());
+    hp::QCollection<dim> quadrature_collection;
+    // Create quadrature collection
+    for (unsigned int i=0; i<ncells; ++i)
+      {
+        // Number of quadrature points on this cell
+        unsigned int nq = qpoints[i].size();
+        // Construct a quadrature formula
+        std::vector< double > ww(nq, 1./((double) nq));
+
+        quadrature_collection.push_back (Quadrature<dim> (qpoints[i], ww));
+      }
+    // Get a function value object
+    hp::FEValues<dim> fe_v(mapping_collection, fe_collection, quadrature_collection,
+                           update_hessians);
+    // Now gather all the informations we need
+    for (unsigned int i=0; i<ncells; ++i)
+      {
+        fe_v.reinit(cells[i], i, 0);
+        const unsigned int nq = qpoints[i].size();
+        std::vector< Vector<typename VectorType::value_type> > vvalues (nq, Vector<typename VectorType::value_type>(n_components));
+        fe_v.get_present_fe_values ().get_function_laplacians(data_vector, vvalues);
+        for (unsigned int q=0; q<nq; ++q)
+          values[maps[i][q]] = vvalues[q];
+      }
+  }
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  void
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  laplacian_list (const std::vector<Point<dim> > &points,
+                  std::vector<double>            &values,
+                  const unsigned int              component) const
+  {
+    Assert(points.size() == values.size(),
+           ExcDimensionMismatch(points.size(), values.size()));
+    std::vector< Vector<double> > vvalues(points.size(), Vector<double>(n_components));
+    vector_laplacian_list(points, vvalues);
+    for (unsigned int q=0; q<points.size(); ++q)
+      values[q] = vvalues[q](component);
+  }
+
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  unsigned int FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  compute_point_locations
+  (const std::vector<Point<dim> >                              &points,
+   std::vector<typename DoFHandlerType::active_cell_iterator > &cells,
+   std::vector<std::vector<Point<dim> > >                      &qpoints,
+   std::vector<std::vector<unsigned int> >                     &maps) const
+  {
+    // How many points are here?
+    const unsigned int np = points.size();
+
+    // Reset output maps.
+    cells.clear();
+    qpoints.clear();
+    maps.clear();
+
+    // Now the easy case.
+    if (np==0) return 0;
+
+    // Keep track of the points we
+    // found
+    std::vector<bool> point_flags(np, false);
+
+    // Set this to true until all
+    // points have been classified
+    bool left_over = true;
+
+    // Current quadrature point
+    typename DoFHandlerType::active_cell_iterator cell = cell_hint.get();
+    if (cell == dh->end())
+      cell = dh->begin_active();
+
+    {
+      // see if the point is
+      // inside the
+      // cell. there are two
+      // ways that
+      // transform_real_to_unit_cell
+      // can indicate that a
+      // point is outside: by
+      // returning
+      // coordinates that lie
+      // outside the
+      // reference cell, or
+      // by throwing an
+      // exception. handle
+      // both
+      boost::optional<Point<dim> >
+      qp = get_reference_coordinates (cell, points[0]);
+      if (!qp)
+        {
+          const std::pair<typename dealii::internal::ActiveCellIterator<dim, dim, DoFHandlerType>::type, Point<dim> >
+          my_pair  = GridTools::find_active_cell_around_point
+                     (mapping, *dh, points[0]);
+          AssertThrow (my_pair.first->is_locally_owned(),
+                       VectorTools::ExcPointNotAvailableHere());
+
+          cell = my_pair.first;
+          qp = my_pair.second;
+          point_flags[0] = true;
+        }
+
+      // Put in the first point.
+      cells.push_back(cell);
+      qpoints.push_back(std::vector<Point<dim> >(1, qp.get()));
+      maps.push_back(std::vector<unsigned int> (1, 0));
+    }
+
+
+    // Check if we need to do anything else
+    if (points.size() > 1)
+      left_over = true;
+    else
+      left_over = false;
+
+
+    // This is the first index of a non processed point
+    unsigned int first_outside = 1;
+
+    // And this is the index of the current cell
+    unsigned int c = 0;
+
+    while (left_over == true)
+      {
+        // Assume this is the last one
+        left_over = false;
+        Assert(first_outside < np,
+               ExcIndexRange(first_outside, 0, np));
+
+        // If we found one in this cell, keep looking in the same cell
+        for (unsigned int p=first_outside; p<np; ++p)
+          if (point_flags[p] == false)
+            {
+              // same logic as above
+              const boost::optional<Point<dim> >
+              qp = get_reference_coordinates (cells[c], points[p]);
+              if (qp)
+                {
+                  point_flags[p] = true;
+                  qpoints[c].push_back(qp.get());
+                  maps[c].push_back(p);
+                }
+              else
+                {
+                  // Set things up for next round
+                  if (left_over == false)
+                    first_outside = p;
+                  left_over = true;
+                }
+            }
+        // If we got here and there is
+        // no left over, we are
+        // done. Else we need to find
+        // the next cell
+        if (left_over == true)
+          {
+            const std::pair<typename dealii::internal::ActiveCellIterator<dim, dim, DoFHandlerType>::type, Point<dim> > my_pair
+              = GridTools::find_active_cell_around_point (mapping, *dh, points[first_outside]);
+            AssertThrow (my_pair.first->is_locally_owned(),
+                         VectorTools::ExcPointNotAvailableHere());
+
+            cells.push_back(my_pair.first);
+            qpoints.push_back(std::vector<Point<dim> >(1, my_pair.second));
+            maps.push_back(std::vector<unsigned int>(1, first_outside));
+            c++;
+            point_flags[first_outside] = true;
+            // And check if we can exit the loop now
+            if (first_outside == np-1)
+              left_over = false;
+          }
+      }
+
+    // Augment of one the number of cells
+    ++c;
+    // Debug Checking
+    Assert(c == cells.size(), ExcInternalError());
+
+    Assert(c == maps.size(),
+           ExcDimensionMismatch(c, maps.size()));
+
+    Assert(c == qpoints.size(),
+           ExcDimensionMismatch(c, qpoints.size()));
+
+#ifdef DEBUG
+    unsigned int qps = 0;
+    // The number of points in all
+    // the cells must be the same as
+    // the number of points we
+    // started off from.
+    for (unsigned int n=0; n<c; ++n)
+      {
+        Assert(qpoints[n].size() == maps[n].size(),
+               ExcDimensionMismatch(qpoints[n].size(), maps[n].size()));
+        qps += qpoints[n].size();
+      }
+    Assert(qps == np,
+           ExcDimensionMismatch(qps, np));
+#endif
+
+    return c;
+  }
+
+
+  template <int dim, typename DoFHandlerType, typename VectorType>
+  boost::optional<Point<dim> >
+  FEFieldFunction<dim, DoFHandlerType, VectorType>::
+  get_reference_coordinates (const typename DoFHandlerType::active_cell_iterator &cell,
+                             const Point<dim>                                    &point) const
+  {
+    try
+      {
+        Point<dim> qp =  mapping.transform_real_to_unit_cell(cell, point);
+        if (GeometryInfo<dim>::is_inside_unit_cell(qp))
+          return qp;
+        else
+          return boost::optional<Point<dim> >();
+      }
+    catch (const typename Mapping<dim>::ExcTransformationFailed &)
+      {
+        // transformation failed, so
+        // assume the point is
+        // outside
+        return boost::optional<Point<dim> >();
+      }
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/include/deal.II/numerics/histogram.h b/include/deal.II/numerics/histogram.h
new file mode 100644
index 0000000..fb85886
--- /dev/null
+++ b/include/deal.II/numerics/histogram.h
@@ -0,0 +1,231 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__histogram_h
+#define dealii__histogram_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/vector.h>
+#include <vector>
+#include <string>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/**
+ * This class provides some facilities to generate 2d and 3d histograms. It is
+ * used by giving it one or several data sets and a rule how to break the
+ * range of values therein into intervals (e.g. linear spacing or logarithmic
+ * spacing of intervals). The values are then sorted into the different
+ * intervals and the number of values in each interval is stored for output
+ * later. In case only one data set was given, the resulting histogram will be
+ * a 2d one, while it will be a 3d one if more than one data set was given.
+ * For more than one data set, the same intervals are used for each of them
+ * anyway, to make comparison easier.
+ *
+ *
+ * <h3>Ways to generate the intervals</h3>
+ *
+ * At present, the following schemes for interval spacing are implemented:
+ * <ul>
+ * <li> Linear spacing: The intervals are distributed in constant steps
+ * between the minimum and maximum values of the data.
+ * <li> Logarithmic spacing: The intervals are distributed in constant steps
+ * between the minimum and maximum values of the logs of the values. This
+ * scheme is only useful if the data has only positive values. Negative and
+ * zero values are sorted into the leftmost interval.
+ * </ul>
+ *
+ * To keep programs extensible, you can use the two functions @p
+ * get_interval_spacing_names and @p parse_interval_spacing, which always give
+ * you a complete list of spacing formats presently supported and are able to
+ * generate the respective value of the @p enum. If you use them, you can
+ * write your program in a way such that it only needs to be recompiled to
+ * take effect of newly added formats, without changing your code.
+ *
+ *
+ * <h3>Output formats</h3>
+ *
+ * At present, only GNUPLOT output is supported.
+ *
+ *
+ * @ingroup textoutput
+ * @author Wolfgang Bangerth, 1999
+ */
+class Histogram
+{
+public:
+  /**
+   * Definition of several ways to arrange the spacing of intervals.
+   */
+  enum IntervalSpacing
+  {
+    linear, logarithmic
+  };
+
+
+  /**
+   * Take several lists of values, each on to produce one histogram that will
+   * then be arrange one behind each other.
+   *
+   * Using several data sets at once allows to compare them more easily, since
+   * the intervals into which the data is sorted is the same for all data
+   * sets.
+   *
+   * The histograms will be arranged such that the computed intervals of the
+   * <tt>values[i][j]</tt> form the x-range, and the number of values in each
+   * interval will be the y-range (for 2d plots) or the z-range (for 3d
+   * plots). For 3d plots, the @p y_values parameter is used to assign each
+   * data set a value in the y direction, which is the depth coordinate in the
+   * resulting plot. For 2d plots, the @p y_values are ignored.
+   *
+   * If you give only one data set, i.e. <tt>values.size()==1</tt>, then the
+   * resulting histogram will be a 2d one.
+   *
+   * @p n_intervals denotes the number of intervals into which the data will
+   * be sorted; @p interval_spacing denotes the way the bounds of the
+   * intervals are computed. Refer to the general documentation for more
+   * information on this.
+   */
+  template <typename number>
+  void evaluate (const std::vector<Vector<number> > &values,
+                 const std::vector<double>                   &y_values,
+                 const unsigned int                           n_intervals,
+                 const IntervalSpacing                        interval_spacing = linear);
+
+  /**
+   * This function is only a wrapper to the above one in case you have only
+   * one data set.
+   */
+  template <typename number>
+  void evaluate (const Vector<number>          &values,
+                 const unsigned int             n_intervals,
+                 const IntervalSpacing          interval_spacing = linear);
+
+  /**
+   * Write the histogram computed by the @p evaluate function to a stream in a
+   * format suitable to the GNUPLOT program. The function generates 2d or 3d
+   * histograms.
+   */
+  void write_gnuplot (std::ostream &out) const;
+
+  /**
+   * Return allowed names for the interval spacing as string. At present this
+   * is "linear|logarithmic".
+   */
+  static std::string get_interval_spacing_names ();
+
+  /**
+   * Get a string containing one of the names returned by the above function
+   * and return the respective value of @p IntervalSpacing. Throw an error if
+   * the string is no valid one.
+   */
+  static IntervalSpacing parse_interval_spacing (const std::string &name);
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Exception.
+   */
+  DeclExceptionMsg (ExcEmptyData,
+                    "Your input argument to this function does not appear to "
+                    "have any data in it.");
+  /**
+   * Exception.
+   */
+  DeclException2 (ExcIncompatibleArraySize,
+                  int, int,
+                  << "The two array sizes " << arg1 << " and " << arg2
+                  << " must match, but don't.");
+  /**
+   * Exception.
+   */
+  DeclException1 (ExcInvalidName,
+                  std::string,
+                  << "The given name <" << arg1
+                  << "> does not match any of the known formats.");
+
+private:
+  /**
+   * Structure denoting one interval.
+   */
+  struct Interval
+  {
+    /**
+     * Constructor. Sets the bounds and sets the number of values in this
+     * interval to zero.
+     */
+    Interval (const double left_point,
+              const double right_point);
+
+    /**
+     * Determine an estimate for the memory consumption (in bytes) of this
+     * object.
+     */
+    std::size_t memory_consumption () const;
+
+    /**
+     * Left bound of the interval.
+     */
+    double       left_point;
+
+    /**
+     * Right bound of the interval.
+     */
+    double       right_point;
+
+    /**
+     * Number of values in this interval.
+     */
+    unsigned int content;
+  };
+
+  /**
+   * "Less-than" operation which finds the minimal positive value by sorting
+   * zero and negative value to be larger than the largest positive number.
+   * Used to find the lower bound of the leftmost interval in the logarithmic
+   * case interval spacing scheme.
+   *
+   * Return @p true, if (<tt>n1<n2</tt>, and (<tt>n1>0</tt> or
+   * <tt>n2<0</tt>)), or (n2<n1 and n1>0 and n2<=0). This in effect sorts all
+   * negative numbers to be larger than the largest positive number.
+   */
+  template <typename number>
+  static bool logarithmic_less (const number n1,
+                                const number n2);
+
+  /**
+   * Vector holding one set of intervals for each data set given to the @p
+   * evaluate function.
+   */
+  std::vector<std::vector<Interval> > intervals;
+
+  /**
+   * Values for the depth axis of 3d histograms. Stored in the @p evaluate
+   * function.
+   */
+  std::vector<double>            y_values;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/matrix_creator.templates.h b/include/deal.II/numerics/matrix_creator.templates.h
new file mode 100644
index 0000000..30953c1
--- /dev/null
+++ b/include/deal.II/numerics/matrix_creator.templates.h
@@ -0,0 +1,1894 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__matrix_creator_templates_h
+#define dealii__matrix_creator_templates_h
+
+#include <deal.II/base/function.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+#  include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#  include <deal.II/lac/petsc_sparse_matrix.h>
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_vector.h>
+#  include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_block_vector.h>
+#endif
+
+#include <algorithm>
+
+
+#include <algorithm>
+#include <set>
+#include <cmath>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MatrixCreator
+{
+  namespace internal
+  {
+    /**
+     * Convenience abbreviation for
+     * pairs of DoF handler cell
+     * iterators. This type works
+     * just like a
+     * <tt>std::pair<iterator,iterator></tt>
+     * but is templatized on the
+     * dof handler that should be used.
+     */
+    template <typename DoFHandlerType>
+    struct IteratorRange
+    {
+      /**
+       * Typedef for the iterator type.
+       */
+      typedef typename DoFHandlerType::active_cell_iterator active_cell_iterator;
+
+      /**
+       * Abbreviation for a pair of
+       * iterators.
+       */
+      typedef std::pair<active_cell_iterator,active_cell_iterator> iterator_pair;
+
+      /**
+       * Constructor. Initialize
+       * the two values by the
+       * given values.
+       */
+      IteratorRange (const active_cell_iterator &first,
+                     const active_cell_iterator &second);
+
+      /**
+       * Constructor taking a pair
+       * of values for
+       * initialization.
+       */
+      IteratorRange (const iterator_pair &ip);
+
+      /**
+       * Pair of iterators denoting
+       * a half-open range.
+       */
+      active_cell_iterator first, second;
+    };
+
+
+
+
+    template <typename DoFHandlerType>
+    inline
+    IteratorRange<DoFHandlerType>::
+    IteratorRange (const active_cell_iterator &first,
+                   const active_cell_iterator &second)
+      :
+      first (first),
+      second (second)
+    {}
+
+
+
+    template <typename DoFHandlerType>
+    inline
+    IteratorRange<DoFHandlerType>::IteratorRange (const iterator_pair &ip)
+      :
+      first (ip.first),
+      second (ip.second)
+    {}
+
+
+
+    namespace AssemblerData
+    {
+      template <int dim,
+                int spacedim>
+      struct Scratch
+      {
+        Scratch (const ::dealii::hp::FECollection<dim,spacedim> &fe,
+                 const UpdateFlags         update_flags,
+                 const Function<spacedim> *coefficient,
+                 const Function<spacedim> *rhs_function,
+                 const ::dealii::hp::QCollection<dim> &quadrature,
+                 const ::dealii::hp::MappingCollection<dim,spacedim> &mapping)
+          :
+          fe_collection (fe),
+          quadrature_collection (quadrature),
+          mapping_collection (mapping),
+          x_fe_values (mapping_collection,
+                       fe_collection,
+                       quadrature_collection,
+                       update_flags),
+          coefficient_values(quadrature_collection.max_n_quadrature_points()),
+          coefficient_vector_values (quadrature_collection.max_n_quadrature_points(),
+                                     dealii::Vector<double> (fe_collection.n_components())),
+          rhs_values(quadrature_collection.max_n_quadrature_points()),
+          rhs_vector_values(quadrature_collection.max_n_quadrature_points(),
+                            dealii::Vector<double> (fe_collection.n_components())),
+          coefficient (coefficient),
+          rhs_function (rhs_function),
+          update_flags (update_flags)
+        {}
+
+        Scratch (const Scratch &data)
+          :
+          fe_collection (data.fe_collection),
+          quadrature_collection (data.quadrature_collection),
+          mapping_collection (data.mapping_collection),
+          x_fe_values (mapping_collection,
+                       fe_collection,
+                       quadrature_collection,
+                       data.update_flags),
+          coefficient_values (data.coefficient_values),
+          coefficient_vector_values (data.coefficient_vector_values),
+          rhs_values (data.rhs_values),
+          rhs_vector_values (data.rhs_vector_values),
+          coefficient (data.coefficient),
+          rhs_function (data.rhs_function),
+          update_flags (data.update_flags)
+        {}
+
+        const ::dealii::hp::FECollection<dim,spacedim>      &fe_collection;
+        const ::dealii::hp::QCollection<dim>                &quadrature_collection;
+        const ::dealii::hp::MappingCollection<dim,spacedim> &mapping_collection;
+
+        ::dealii::hp::FEValues<dim,spacedim> x_fe_values;
+
+        std::vector<double>                  coefficient_values;
+        std::vector<dealii::Vector<double> > coefficient_vector_values;
+        std::vector<double>                  rhs_values;
+        std::vector<dealii::Vector<double> > rhs_vector_values;
+
+        const Function<spacedim>   *coefficient;
+        const Function<spacedim>   *rhs_function;
+
+        const UpdateFlags update_flags;
+      };
+
+
+      template <typename number>
+      struct CopyData
+      {
+        std::vector<types::global_dof_index> dof_indices;
+        FullMatrix<number>        cell_matrix;
+        dealii::Vector<number>    cell_rhs;
+        const ConstraintMatrix   *constraints;
+      };
+    }
+
+
+    template <int dim,
+              int spacedim,
+              typename CellIterator,
+              typename number>
+    void mass_assembler (const CellIterator &cell,
+                         MatrixCreator::internal::AssemblerData::Scratch<dim,spacedim> &data,
+                         MatrixCreator::internal::AssemblerData::CopyData<number>      &copy_data)
+    {
+      data.x_fe_values.reinit (cell);
+      const FEValues<dim,spacedim> &fe_values = data.x_fe_values.get_present_fe_values ();
+
+      const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                         n_q_points    = fe_values.n_quadrature_points;
+      const FiniteElement<dim,spacedim> &fe  = fe_values.get_fe();
+      const unsigned int n_components  = fe.n_components();
+
+      Assert(data.rhs_function == 0 ||
+             data.rhs_function->n_components==1 ||
+             data.rhs_function->n_components==n_components,
+             ::dealii::MatrixCreator::ExcComponentMismatch());
+      Assert(data.coefficient == 0 ||
+             data.coefficient->n_components==1 ||
+             data.coefficient->n_components==n_components,
+             ::dealii::MatrixCreator::ExcComponentMismatch());
+
+      copy_data.cell_matrix.reinit (dofs_per_cell, dofs_per_cell);
+      copy_data.cell_rhs.reinit (dofs_per_cell);
+
+      copy_data.dof_indices.resize (dofs_per_cell);
+      cell->get_dof_indices (copy_data.dof_indices);
+
+      const bool use_rhs_function = data.rhs_function != 0;
+      if (use_rhs_function)
+        {
+          if (data.rhs_function->n_components==1)
+            {
+              data.rhs_values.resize (n_q_points);
+              data.rhs_function->value_list (fe_values.get_quadrature_points(),
+                                             data.rhs_values);
+            }
+          else
+            {
+              data.rhs_vector_values.resize (n_q_points,
+                                             dealii::Vector<double>(n_components));
+              data.rhs_function->vector_value_list (fe_values.get_quadrature_points(),
+                                                    data.rhs_vector_values);
+            }
+        }
+
+      const bool use_coefficient = data.coefficient != 0;
+      if (use_coefficient)
+        {
+          if (data.coefficient->n_components==1)
+            {
+              data.coefficient_values.resize (n_q_points);
+              data.coefficient->value_list (fe_values.get_quadrature_points(),
+                                            data.coefficient_values);
+            }
+          else
+            {
+              data.coefficient_vector_values.resize (n_q_points,
+                                                     dealii::Vector<double>(n_components));
+              data.coefficient->vector_value_list (fe_values.get_quadrature_points(),
+                                                   data.coefficient_vector_values);
+            }
+        }
+
+
+      double add_data;
+      const std::vector<double> &JxW = fe_values.get_JxW_values();
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        if (fe.is_primitive ())
+          {
+            const unsigned int component_i =
+              fe.system_to_component_index(i).first;
+            const double *phi_i = &fe_values.shape_value(i,0);
+            add_data = 0;
+
+            // use symmetry in the mass matrix here:
+            // just need to calculate the diagonal
+            // and half of the elements above the
+            // diagonal
+            for (unsigned int j=i; j<dofs_per_cell; ++j)
+              if ((n_components==1) ||
+                  (fe.system_to_component_index(j).first ==
+                   component_i))
+                {
+                  const double *phi_j = &fe_values.shape_value(j,0);
+                  add_data = 0;
+                  if (use_coefficient)
+                    {
+                      if (data.coefficient->n_components==1)
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += (phi_i[point] * phi_j[point] * JxW[point] *
+                                       data.coefficient_values[point]);
+                      else
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += (phi_i[point] * phi_j[point] * JxW[point] *
+                                       data.coefficient_vector_values[point](component_i));
+                    }
+                  else
+                    for (unsigned int point=0; point<n_q_points; ++point)
+                      add_data += phi_i[point] * phi_j[point] * JxW[point];
+
+                  // this is even ok for i==j, since then
+                  // we just write the same value twice.
+                  copy_data.cell_matrix(i,j) = add_data;
+                  copy_data.cell_matrix(j,i) = add_data;
+                }
+
+            if (use_rhs_function)
+              {
+                add_data = 0;
+                if (data.rhs_function->n_components==1)
+                  for (unsigned int point=0; point<n_q_points; ++point)
+                    add_data += phi_i[point] * JxW[point] *
+                                data.rhs_values[point];
+                else
+                  for (unsigned int point=0; point<n_q_points; ++point)
+                    add_data += phi_i[point] * JxW[point] *
+                                data.rhs_vector_values[point](component_i);
+                copy_data.cell_rhs(i) = add_data;
+              }
+          }
+        else
+          {
+            // non-primitive vector-valued FE, using
+            // symmetry again
+            for (unsigned int j=i; j<dofs_per_cell; ++j)
+              {
+                add_data = 0;
+                for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                  if (fe.get_nonzero_components(i)[comp_i] &&
+                      fe.get_nonzero_components(j)[comp_i])
+                    {
+                      if (use_coefficient)
+                        {
+                          if (data.coefficient->n_components==1)
+                            for (unsigned int point=0; point<n_q_points; ++point)
+                              add_data += (fe_values.shape_value_component(i,point,comp_i) *
+                                           fe_values.shape_value_component(j,point,comp_i) *
+                                           JxW[point] *
+                                           data.coefficient_values[point]);
+                          else
+                            for (unsigned int point=0; point<n_q_points; ++point)
+                              add_data += (fe_values.shape_value_component(i,point,comp_i) *
+                                           fe_values.shape_value_component(j,point,comp_i) *
+                                           JxW[point] *
+                                           data.coefficient_vector_values[point](comp_i));
+                        }
+                      else
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += fe_values.shape_value_component(i,point,comp_i) *
+                                      fe_values.shape_value_component(j,point,comp_i) * JxW[point];
+                    }
+
+                copy_data.cell_matrix(i,j) = add_data;
+                copy_data.cell_matrix(j,i) = add_data;
+              }
+
+            if (use_rhs_function)
+              {
+                add_data = 0;
+                for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                  if (fe.get_nonzero_components(i)[comp_i])
+                    {
+                      if (data.rhs_function->n_components==1)
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += fe_values.shape_value_component(i,point,comp_i) *
+                                      JxW[point] * data.rhs_values[point];
+                      else
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += fe_values.shape_value_component(i,point,comp_i) *
+                                      JxW[point] * data.rhs_vector_values[point](comp_i);
+                    }
+                copy_data.cell_rhs(i) = add_data;
+              }
+          }
+    }
+
+
+
+    template <int dim,
+              int spacedim,
+              typename CellIterator>
+    void laplace_assembler (const CellIterator &cell,
+                            MatrixCreator::internal::AssemblerData::Scratch<dim,spacedim> &data,
+                            MatrixCreator::internal::AssemblerData::CopyData<double>      &copy_data)
+    {
+      data.x_fe_values.reinit (cell);
+      const FEValues<dim,spacedim> &fe_values = data.x_fe_values.get_present_fe_values ();
+
+      const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                         n_q_points    = fe_values.n_quadrature_points;
+      const FiniteElement<dim,spacedim>    &fe  = fe_values.get_fe();
+      const unsigned int n_components  = fe.n_components();
+
+      Assert(data.rhs_function == 0 ||
+             data.rhs_function->n_components==1 ||
+             data.rhs_function->n_components==n_components,
+             ::dealii::MatrixCreator::ExcComponentMismatch());
+      Assert(data.coefficient == 0 ||
+             data.coefficient->n_components==1 ||
+             data.coefficient->n_components==n_components,
+             ::dealii::MatrixCreator::ExcComponentMismatch());
+
+      copy_data.cell_matrix.reinit (dofs_per_cell, dofs_per_cell);
+      copy_data.cell_rhs.reinit (dofs_per_cell);
+      copy_data.dof_indices.resize (dofs_per_cell);
+      cell->get_dof_indices (copy_data.dof_indices);
+
+
+      const bool use_rhs_function = data.rhs_function != 0;
+      if (use_rhs_function)
+        {
+          if (data.rhs_function->n_components==1)
+            {
+              data.rhs_values.resize (n_q_points);
+              data.rhs_function->value_list (fe_values.get_quadrature_points(),
+                                             data.rhs_values);
+            }
+          else
+            {
+              data.rhs_vector_values.resize (n_q_points,
+                                             dealii::Vector<double>(n_components));
+              data.rhs_function->vector_value_list (fe_values.get_quadrature_points(),
+                                                    data.rhs_vector_values);
+            }
+        }
+
+      const bool use_coefficient = data.coefficient != 0;
+      if (use_coefficient)
+        {
+          if (data.coefficient->n_components==1)
+            {
+              data.coefficient_values.resize (n_q_points);
+              data.coefficient->value_list (fe_values.get_quadrature_points(),
+                                            data.coefficient_values);
+            }
+          else
+            {
+              data.coefficient_vector_values.resize (n_q_points,
+                                                     dealii::Vector<double>(n_components));
+              data.coefficient->vector_value_list (fe_values.get_quadrature_points(),
+                                                   data.coefficient_vector_values);
+            }
+        }
+
+
+      const std::vector<double> &JxW = fe_values.get_JxW_values();
+      double add_data;
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        if (fe.is_primitive ())
+          {
+            const unsigned int component_i =
+              fe.system_to_component_index(i).first;
+            const Tensor<1,spacedim> *grad_phi_i =
+              &fe_values.shape_grad(i,0);
+
+            // can use symmetry
+            for (unsigned int j=i; j<dofs_per_cell; ++j)
+              if ((n_components==1) ||
+                  (fe.system_to_component_index(j).first ==
+                   component_i))
+                {
+                  const Tensor<1,spacedim> *grad_phi_j =
+                    & fe_values.shape_grad(j,0);
+                  add_data = 0;
+                  if (use_coefficient)
+                    {
+                      if (data.coefficient->n_components==1)
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += ((grad_phi_i[point]*grad_phi_j[point]) *
+                                       JxW[point] *
+                                       data.coefficient_values[point]);
+                      else
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += ((grad_phi_i[point]*grad_phi_j[point]) *
+                                       JxW[point] *
+                                       data.coefficient_vector_values[point](component_i));
+                    }
+                  else
+                    for (unsigned int point=0; point<n_q_points; ++point)
+                      add_data += (grad_phi_i[point]*grad_phi_j[point]) *
+                                  JxW[point];
+
+                  copy_data.cell_matrix(i,j) = add_data;
+                  copy_data.cell_matrix(j,i) = add_data;
+                }
+
+            if (use_rhs_function)
+              {
+                const double *phi_i = &fe_values.shape_value(i,0);
+                add_data = 0;
+                if (data.rhs_function->n_components==1)
+                  for (unsigned int point=0; point<n_q_points; ++point)
+                    add_data += phi_i[point] * JxW[point] *
+                                data.rhs_values[point];
+                else
+                  for (unsigned int point=0; point<n_q_points; ++point)
+                    add_data += phi_i[point] * JxW[point] *
+                                data.rhs_vector_values[point](component_i);
+                copy_data.cell_rhs(i) = add_data;
+              }
+          }
+        else
+          {
+            // non-primitive vector-valued FE
+            for (unsigned int j=i; j<dofs_per_cell; ++j)
+              {
+                add_data = 0;
+                for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                  if (fe.get_nonzero_components(i)[comp_i] &&
+                      fe.get_nonzero_components(j)[comp_i])
+                    {
+                      if (use_coefficient)
+                        {
+                          if (data.coefficient->n_components==1)
+                            for (unsigned int point=0; point<n_q_points; ++point)
+                              add_data += ((fe_values.shape_grad_component(i,point,comp_i) *
+                                            fe_values.shape_grad_component(j,point,comp_i)) *
+                                           JxW[point] *
+                                           data.coefficient_values[point]);
+                          else
+                            for (unsigned int point=0; point<n_q_points; ++point)
+                              add_data += ((fe_values.shape_grad_component(i,point,comp_i) *
+                                            fe_values.shape_grad_component(j,point,comp_i)) *
+                                           JxW[point] *
+                                           data.coefficient_vector_values[point](comp_i));
+                        }
+                      else
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += (fe_values.shape_grad_component(i,point,comp_i) *
+                                       fe_values.shape_grad_component(j,point,comp_i)) *
+                                      JxW[point];
+                    }
+
+                copy_data.cell_matrix(i,j) = add_data;
+                copy_data.cell_matrix(j,i) = add_data;
+              }
+
+            if (use_rhs_function)
+              {
+                add_data = 0;
+                for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                  if (fe.get_nonzero_components(i)[comp_i])
+                    {
+                      if (data.rhs_function->n_components==1)
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += fe_values.shape_value_component(i,point,comp_i) *
+                                      JxW[point] * data.rhs_values[point];
+                      else
+                        for (unsigned int point=0; point<n_q_points; ++point)
+                          add_data += fe_values.shape_value_component(i,point,comp_i) *
+                                      JxW[point] * data.rhs_vector_values[point](comp_i);
+                    }
+                copy_data.cell_rhs(i) = add_data;
+              }
+          }
+    }
+
+
+
+    template <typename number,
+              typename MatrixType,
+              typename VectorType>
+    void copy_local_to_global (const AssemblerData::CopyData<number> &data,
+                               MatrixType *matrix,
+                               VectorType *right_hand_side)
+    {
+      const unsigned int dofs_per_cell = data.dof_indices.size();
+      (void)dofs_per_cell;
+
+      Assert (data.cell_matrix.m() == dofs_per_cell,
+              ExcInternalError());
+      Assert (data.cell_matrix.n() == dofs_per_cell,
+              ExcInternalError());
+      Assert ((right_hand_side == 0)
+              ||
+              (data.cell_rhs.size() == dofs_per_cell),
+              ExcInternalError());
+
+      if (right_hand_side != 0)
+        data.constraints->distribute_local_to_global(data.cell_matrix,
+                                                     data.cell_rhs,
+                                                     data.dof_indices,
+                                                     *matrix, *right_hand_side);
+      else
+        data.constraints->distribute_local_to_global(data.cell_matrix,
+                                                     data.dof_indices,
+                                                     *matrix);
+    }
+
+
+
+    namespace AssemblerBoundary
+    {
+      struct Scratch
+      {
+        Scratch() {}
+      };
+
+      template <typename DoFHandlerType>
+      struct CopyData
+      {
+        CopyData() {};
+
+        CopyData(CopyData const &data);
+
+        unsigned int dofs_per_cell;
+        std::vector<types::global_dof_index> dofs;
+        std::vector<std::vector<bool> > dof_is_on_face;
+        typename DoFHandlerType::active_cell_iterator cell;
+        std::vector<FullMatrix<double> > cell_matrix;
+        std::vector<Vector<double> > cell_vector;
+      };
+
+      template <typename DoFHandlerType>
+      CopyData<DoFHandlerType>::CopyData(CopyData const &data) :
+        dofs_per_cell(data.dofs_per_cell),
+        dofs(data.dofs),
+        dof_is_on_face(data.dof_is_on_face),
+        cell(data.cell),
+        cell_matrix(data.cell_matrix),
+        cell_vector(data.cell_vector)
+      {}
+    }
+  }
+}
+
+
+namespace MatrixCreator
+{
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const Mapping<dim,spacedim>       &mapping,
+                           const DoFHandler<dim,spacedim>    &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    hp::FECollection<dim,spacedim>      fe_collection (dof.get_fe());
+    hp::QCollection<dim>                q_collection (q);
+    hp::MappingCollection<dim,spacedim> mapping_collection (mapping);
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (fe_collection,
+                    update_values | update_JxW_values |
+                    (coefficient != 0 ? update_quadrature_points : UpdateFlags(0)),
+                    coefficient, /*rhs_function=*/0,
+                    q_collection, mapping_collection);
+
+    MatrixCreator::internal::AssemblerData::CopyData<number> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::mass_assembler<dim, spacedim, typename DoFHandler<dim,spacedim>::active_cell_iterator,number>,
+                     std_cxx11::bind (&MatrixCreator::internal::
+                                      copy_local_to_global<number,SparseMatrix<number>, Vector<number> >,
+                                      std_cxx11::_1, &matrix, (Vector<number> *)0),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const DoFHandler<dim,spacedim>    &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    create_mass_matrix(StaticMappingQ1<dim,spacedim>::mapping, dof,
+                       q, matrix, coefficient, constraints);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const Mapping<dim,spacedim>       &mapping,
+                           const DoFHandler<dim,spacedim>    &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim>      &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    hp::FECollection<dim,spacedim>      fe_collection (dof.get_fe());
+    hp::QCollection<dim>                q_collection (q);
+    hp::MappingCollection<dim,spacedim> mapping_collection (mapping);
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (fe_collection,
+                    update_values |
+                    update_JxW_values | update_quadrature_points,
+                    coefficient, &rhs,
+                    q_collection, mapping_collection);
+    MatrixCreator::internal::AssemblerData::CopyData<number> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::mass_assembler<dim, spacedim, typename DoFHandler<dim,spacedim>::active_cell_iterator,number>,
+                     std_cxx11::bind(&MatrixCreator::internal::
+                                     copy_local_to_global<number,SparseMatrix<number>, Vector<number> >,
+                                     std_cxx11::_1, &matrix, &rhs_vector),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const DoFHandler<dim,spacedim>    &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim>      &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    create_mass_matrix(StaticMappingQ1<dim,spacedim>::mapping,
+                       dof, q, matrix, rhs, rhs_vector, coefficient,
+                       constraints);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::MappingCollection<dim,spacedim> &mapping,
+                           const hp::DoFHandler<dim,spacedim>    &dof,
+                           const hp::QCollection<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (dof.get_fe(),
+                    update_values | update_JxW_values |
+                    (coefficient != 0 ? update_quadrature_points : UpdateFlags(0)),
+                    coefficient, /*rhs_function=*/0,
+                    q, mapping);
+    MatrixCreator::internal::AssemblerData::CopyData<number> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::mass_assembler<dim, spacedim, typename hp::DoFHandler<dim,spacedim>::active_cell_iterator,number>,
+                     std_cxx11::bind (&MatrixCreator::internal::
+                                      copy_local_to_global<number,SparseMatrix<number>, Vector<number> >,
+                                      std_cxx11::_1, &matrix, (Vector<number> *)0),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::DoFHandler<dim,spacedim> &dof,
+                           const hp::QCollection<dim> &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    create_mass_matrix(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                       dof, q, matrix, coefficient, constraints);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::MappingCollection<dim,spacedim> &mapping,
+                           const hp::DoFHandler<dim,spacedim> &dof,
+                           const hp::QCollection<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim>      &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (dof.get_fe(),
+                    update_values |
+                    update_JxW_values | update_quadrature_points,
+                    coefficient, &rhs,
+                    q, mapping);
+    MatrixCreator::internal::AssemblerData::CopyData<number> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::mass_assembler<dim, spacedim, typename hp::DoFHandler<dim,spacedim>::active_cell_iterator,number>,
+                     std_cxx11::bind (&MatrixCreator::internal::
+                                      copy_local_to_global<number,SparseMatrix<number>, Vector<number> >,
+                                      std_cxx11::_1, &matrix, &rhs_vector),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::DoFHandler<dim,spacedim> &dof,
+                           const hp::QCollection<dim> &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const coefficient,
+                           const ConstraintMatrix   &constraints)
+  {
+    create_mass_matrix(hp::StaticMappingQ1<dim,spacedim>::mapping_collection, dof, q,
+                       matrix, rhs, rhs_vector, coefficient, constraints);
+  }
+
+
+
+  namespace internal
+  {
+    template <int dim, int spacedim>
+    void
+    static inline
+    create_boundary_mass_matrix_1 (typename DoFHandler<dim,spacedim>::active_cell_iterator const &cell,
+                                   MatrixCreator::internal::AssemblerBoundary::Scratch const &,
+                                   MatrixCreator::internal::AssemblerBoundary::CopyData<DoFHandler<dim,
+                                   spacedim> > &copy_data,
+                                   Mapping<dim, spacedim> const &mapping,
+                                   FiniteElement<dim,spacedim> const &fe,
+                                   Quadrature<dim-1> const &q,
+                                   typename FunctionMap<spacedim>::type const &boundary_functions,
+                                   Function<spacedim> const *const coefficient,
+                                   std::vector<unsigned int> const &component_mapping)
+
+    {
+      // All assertions for this function
+      // are in the calling function
+      // before creating threads.
+      const unsigned int n_components = fe.n_components();
+      const unsigned int n_function_components = boundary_functions.begin()->second->n_components;
+      const bool         fe_is_system = (n_components != 1);
+      const bool         fe_is_primitive = fe.is_primitive();
+
+      const unsigned int dofs_per_face = fe.dofs_per_face;
+
+      copy_data.cell = cell;
+      copy_data.dofs_per_cell = fe.dofs_per_cell;
+
+      UpdateFlags update_flags = UpdateFlags (update_values     |
+                                              update_JxW_values |
+                                              update_normal_vectors |
+                                              update_quadrature_points);
+      FEFaceValues<dim,spacedim> fe_values (mapping, fe, q, update_flags);
+
+      // two variables for the coefficient,
+      // one for the two cases indicated in
+      // the name
+      std::vector<double>          coefficient_values (fe_values.n_quadrature_points, 1.);
+      std::vector<Vector<double> > coefficient_vector_values (fe_values.n_quadrature_points,
+                                                              Vector<double>(n_components));
+      const bool coefficient_is_vector = (coefficient != 0 && coefficient->n_components != 1)
+                                         ? true : false;
+
+      std::vector<double>          rhs_values_scalar (fe_values.n_quadrature_points);
+      std::vector<Vector<double> > rhs_values_system (fe_values.n_quadrature_points,
+                                                      Vector<double>(n_function_components));
+
+      copy_data.dofs.resize(copy_data.dofs_per_cell);
+      cell->get_dof_indices (copy_data.dofs);
+
+      std::vector<types::global_dof_index> dofs_on_face_vector (dofs_per_face);
+
+      // Because CopyData objects are reused and that push_back is used,
+      // dof_is_on_face, cell_matrix, and cell_vector must be cleared before
+      // they are reused
+      copy_data.dof_is_on_face.clear();
+      copy_data.cell_matrix.clear();
+      copy_data.cell_vector.clear();
+
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        // check if this face is on that part of
+        // the boundary we are interested in
+        if (boundary_functions.find(cell->face(face)->boundary_id()) !=
+            boundary_functions.end())
+          {
+            copy_data.cell_matrix.push_back(FullMatrix<double> (copy_data.dofs_per_cell,
+                                                                copy_data.dofs_per_cell));
+            copy_data.cell_vector.push_back(Vector<double> (copy_data.dofs_per_cell));
+            fe_values.reinit (cell, face);
+
+            if (fe_is_system)
+              // FE has several components
+              {
+                boundary_functions.find(cell->face(face)->boundary_id())
+                ->second->vector_value_list (fe_values.get_quadrature_points(),
+                                             rhs_values_system);
+
+                if (coefficient_is_vector)
+                  // If coefficient is
+                  // vector valued, fill
+                  // all components
+                  coefficient->vector_value_list (fe_values.get_quadrature_points(),
+                                                  coefficient_vector_values);
+                else
+                  {
+                    // If a scalar
+                    // function is
+                    // given, update
+                    // the values, if
+                    // not, use the
+                    // default one set
+                    // in the
+                    // constructor above
+                    if (coefficient != 0)
+                      coefficient->value_list (fe_values.get_quadrature_points(),
+                                               coefficient_values);
+                    // Copy scalar
+                    // values into vector
+                    for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                      coefficient_vector_values[point] = coefficient_values[point];
+                  }
+
+                // Special treatment
+                // for Hdiv and Hcurl
+                // elements, where only
+                // the normal or
+                // tangential component
+                // should be projected.
+                std::vector<std::vector<double> > normal_adjustment(fe_values.n_quadrature_points,
+                                                                    std::vector<double>(n_components, 1.));
+
+                for (unsigned int comp = 0; comp<n_components; ++comp)
+                  {
+                    const FiniteElement<dim,spacedim> &base = fe.base_element(fe.component_to_base_index(comp).first);
+                    const unsigned int bcomp = fe.component_to_base_index(comp).second;
+
+                    if (!base.conforms(FiniteElementData<dim>::H1) &&
+                        base.conforms(FiniteElementData<dim>::Hdiv))
+                      for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                        normal_adjustment[point][comp] = fe_values.normal_vector(point)[bcomp]
+                                                         * fe_values.normal_vector(point)[bcomp];
+                  }
+
+                for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                  {
+                    const double weight = fe_values.JxW(point);
+                    for (unsigned int i=0; i<fe_values.dofs_per_cell; ++i)
+                      if (fe_is_primitive)
+                        {
+                          for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                            {
+                              if (fe.system_to_component_index(j).first
+                                  == fe.system_to_component_index(i).first)
+                                {
+                                  copy_data.cell_matrix.back()(i,j)
+                                  += weight
+                                     * fe_values.shape_value(j,point)
+                                     * fe_values.shape_value(i,point)
+                                     * coefficient_vector_values[point](fe.system_to_component_index(i).first);
+                                }
+                            }
+                          copy_data.cell_vector.back()(i) += fe_values.shape_value(i,point)
+                                                             * rhs_values_system[point](component_mapping[fe.system_to_component_index(i).first])
+                                                             * weight;
+                        }
+                      else
+                        {
+                          for (unsigned int comp=0; comp<n_components; ++comp)
+                            {
+                              for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                                copy_data.cell_matrix.back()(i,j)
+                                += fe_values.shape_value_component(j,point,comp)
+                                   * fe_values.shape_value_component(i,point,comp)
+                                   * normal_adjustment[point][comp]
+                                   * weight * coefficient_vector_values[point](comp);
+                              copy_data.cell_vector.back()(i) += fe_values.shape_value_component(i,point,comp) *
+                                                                 rhs_values_system[point](component_mapping[comp])
+                                                                 * normal_adjustment[point][comp]
+                                                                 * weight;
+                            }
+                        }
+                  }
+              }
+            else
+              // FE is a scalar one
+              {
+                boundary_functions.find(cell->face(face)->boundary_id())
+                ->second->value_list (fe_values.get_quadrature_points(), rhs_values_scalar);
+
+                if (coefficient != 0)
+                  coefficient->value_list (fe_values.get_quadrature_points(),
+                                           coefficient_values);
+                for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                  {
+                    const double weight = fe_values.JxW(point);
+                    for (unsigned int i=0; i<fe_values.dofs_per_cell; ++i)
+                      {
+                        const double v = fe_values.shape_value(i,point);
+                        for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                          {
+                            const double u = fe_values.shape_value(j,point);
+                            copy_data.cell_matrix.back()(i,j) += (u*v*weight*coefficient_values[point]);
+                          }
+                        copy_data.cell_vector.back()(i) += v * rhs_values_scalar[point] *weight;
+                      }
+                  }
+              }
+
+
+            cell->face(face)->get_dof_indices (dofs_on_face_vector);
+            // for each dof on the cell, have a
+            // flag whether it is on the face
+            copy_data.dof_is_on_face.push_back(std::vector<bool> (copy_data.dofs_per_cell));
+            // check for each of the dofs on this cell
+            // whether it is on the face
+            for (unsigned int i=0; i<copy_data.dofs_per_cell; ++i)
+              copy_data.dof_is_on_face.back()[i] = (std::find(dofs_on_face_vector.begin(),
+                                                              dofs_on_face_vector.end(),
+                                                              copy_data.dofs[i])
+                                                    !=
+                                                    dofs_on_face_vector.end());
+          }
+    }
+
+    template <int dim,int spacedim>
+    void copy_boundary_mass_matrix_1(MatrixCreator::internal::AssemblerBoundary::CopyData<DoFHandler<dim,
+                                     spacedim> > const &copy_data,
+                                     typename FunctionMap<spacedim>::type const &boundary_functions,
+                                     std::vector<types::global_dof_index> const &dof_to_boundary_mapping,
+                                     SparseMatrix<double> &matrix,
+                                     Vector<double> &rhs_vector)
+    {
+      // now transfer cell matrix and vector to the whole boundary matrix
+      //
+      // in the following: dof[i] holds the global index of the i-th degree of
+      // freedom on the present cell. If it is also a dof on the boundary, it
+      // must be a nonzero entry in the dof_to_boundary_mapping and then
+      // the boundary index of this dof is dof_to_boundary_mapping[dof[i]].
+      //
+      // if dof[i] is not on the boundary, it should be zero on the boundary
+      // therefore on all quadrature points and finally all of its
+      // entries in the cell matrix and vector should be zero. If not, we
+      // throw an error (note: because of the evaluation of the shape
+      // functions only up to machine precision, the term "must be zero"
+      // really should mean: "should be very small". since this is only an
+      // assertion and not part of the code, we may choose "very small"
+      // quite arbitrarily)
+      //
+      // the main problem here is that the matrix or vector entry should also
+      // be zero if the degree of freedom dof[i] is on the boundary, but not
+      // on the present face, i.e. on another face of the same cell also
+      // on the boundary. We can therefore not rely on the
+      // dof_to_boundary_mapping[dof[i]] being !=-1, we really have to
+      // determine whether dof[i] is a dof on the present face. We do so
+      // by getting the dofs on the face into @p{dofs_on_face_vector},
+      // a vector as always. Usually, searching in a vector is
+      // inefficient, so we copy the dofs into a set, which enables binary
+      // searches.
+      unsigned int pos(0);
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        {
+          // check if this face is on that part of
+          // the boundary we are interested in
+          if (boundary_functions.find(copy_data.cell->face(face)->boundary_id()) !=
+              boundary_functions.end())
+            {
+              for (unsigned int i=0; i<copy_data.dofs_per_cell; ++i)
+                {
+                  if (copy_data.dof_is_on_face[pos][i] &&
+                      dof_to_boundary_mapping[copy_data.dofs[i]] != numbers::invalid_dof_index)
+                    {
+                      for (unsigned int j=0; j<copy_data.dofs_per_cell; ++j)
+                        if (copy_data.dof_is_on_face[pos][j] &&
+                            dof_to_boundary_mapping[copy_data.dofs[j]] != numbers::invalid_dof_index)
+                          {
+                            AssertIsFinite(copy_data.cell_matrix[pos](i,j));
+                            matrix.add(dof_to_boundary_mapping[copy_data.dofs[i]],
+                                       dof_to_boundary_mapping[copy_data.dofs[j]],
+                                       copy_data.cell_matrix[pos](i,j));
+                          }
+                      AssertIsFinite(copy_data.cell_vector[pos](i));
+                      rhs_vector(dof_to_boundary_mapping[copy_data.dofs[i]]) += copy_data.cell_vector[pos](i);
+                    }
+                }
+              ++pos;
+            }
+        }
+    }
+
+
+    template <>
+    void
+    inline
+    create_boundary_mass_matrix_1<1,3> (DoFHandler<1,3>::active_cell_iterator const &/*cell*/,
+                                        MatrixCreator::internal::AssemblerBoundary::Scratch const &,
+                                        MatrixCreator::internal::AssemblerBoundary::CopyData<DoFHandler<1,
+                                        3> > &/*copy_data*/,
+                                        Mapping<1,3> const &,
+                                        FiniteElement<1,3> const &,
+                                        Quadrature<0> const &,
+                                        FunctionMap<3>::type const &/*boundary_functions*/,
+                                        Function<3> const *const /*coefficient*/,
+                                        std::vector<unsigned int> const &/*component_mapping*/)
+    {
+      Assert(false,ExcNotImplemented());
+    }
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  void
+  create_boundary_mass_matrix (const Mapping<dim, spacedim>  &mapping,
+                               const DoFHandler<dim,spacedim> &dof,
+                               const Quadrature<dim-1>  &q,
+                               SparseMatrix<double>  &matrix,
+                               const typename FunctionMap<spacedim>::type  &boundary_functions,
+                               Vector<double>            &rhs_vector,
+                               std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                               const Function<spacedim> *const coefficient,
+                               std::vector<unsigned int> component_mapping)
+  {
+    // what would that be in 1d? the
+    // identity matrix on the boundary
+    // dofs?
+    if (dim == 1)
+      {
+        Assert (false, ExcNotImplemented());
+        return;
+      }
+
+    const FiniteElement<dim,spacedim> &fe = dof.get_fe();
+    const unsigned int n_components  = fe.n_components();
+
+    Assert (matrix.n() == dof.n_boundary_dofs(boundary_functions),
+            ExcInternalError());
+    Assert (matrix.n() == matrix.m(), ExcInternalError());
+    Assert (matrix.n() == rhs_vector.size(), ExcInternalError());
+    Assert (boundary_functions.size() != 0, ExcInternalError());
+    Assert (dof_to_boundary_mapping.size() == dof.n_dofs(),
+            ExcInternalError());
+    Assert (coefficient ==0 ||
+            coefficient->n_components==1 ||
+            coefficient->n_components==n_components, ExcComponentMismatch());
+
+    if (component_mapping.size() == 0)
+      {
+        AssertDimension (n_components, boundary_functions.begin()->second->n_components);
+        for (unsigned int i=0; i<n_components; ++i)
+          component_mapping.push_back(i);
+      }
+    else
+      AssertDimension (n_components, component_mapping.size());
+
+    MatrixCreator::internal::AssemblerBoundary::Scratch scratch;
+    MatrixCreator::internal::AssemblerBoundary::CopyData<DoFHandler<dim,spacedim> > copy_data;
+
+    WorkStream::run(dof.begin_active(),dof.end(),
+                    static_cast<std_cxx11::function<void (typename DoFHandler<dim,spacedim>::active_cell_iterator
+                                                          const &,MatrixCreator::internal::AssemblerBoundary::Scratch const &,
+                                                          MatrixCreator::internal::AssemblerBoundary::CopyData<DoFHandler<dim,spacedim> > &)> >
+                    (std_cxx11::bind(&internal::create_boundary_mass_matrix_1<dim,spacedim>,std_cxx11::_1,std_cxx11::_2,
+                                     std_cxx11::_3,
+                                     std_cxx11::cref(mapping),std_cxx11::cref(fe),std_cxx11::cref(q),
+                                     std_cxx11::cref(boundary_functions),coefficient,
+                                     std_cxx11::cref(component_mapping))),
+                    static_cast<std_cxx11::function<void (MatrixCreator::internal::AssemblerBoundary
+                                                          ::CopyData<DoFHandler<dim,spacedim> > const &)> > (std_cxx11::bind(
+                                                                &internal::copy_boundary_mass_matrix_1<dim,spacedim>,
+                                                                std_cxx11::_1,
+                                                                std_cxx11::cref(boundary_functions),
+                                                                std_cxx11::cref(dof_to_boundary_mapping),
+                                                                std_cxx11::ref(matrix),
+                                                                std_cxx11::ref(rhs_vector))),
+                    scratch,
+                    copy_data);
+  }
+
+
+
+  namespace
+  {
+
+    template <int dim, int spacedim>
+    void
+    create_hp_boundary_mass_matrix_1 (typename hp::DoFHandler<dim,spacedim>::active_cell_iterator const
+                                      &cell,
+                                      MatrixCreator::internal::AssemblerBoundary::Scratch const &,
+                                      MatrixCreator::internal::AssemblerBoundary
+                                      ::CopyData<hp::DoFHandler<dim,spacedim> > &copy_data,
+                                      hp::MappingCollection<dim,spacedim> const &mapping,
+                                      hp::FECollection<dim,spacedim> const &fe_collection,
+                                      hp::QCollection<dim-1> const &q,
+                                      const typename FunctionMap<spacedim>::type &boundary_functions,
+                                      Function<spacedim> const *const coefficient,
+                                      std::vector<unsigned int> const &component_mapping)
+    {
+      const unsigned int n_components  = fe_collection.n_components();
+      const unsigned int n_function_components = boundary_functions.begin()->second->n_components;
+      const bool         fe_is_system  = (n_components != 1);
+      const FiniteElement<dim,spacedim> &fe = cell->get_fe();
+      const unsigned int dofs_per_face = fe.dofs_per_face;
+
+      copy_data.cell = cell;
+      copy_data.dofs_per_cell = fe.dofs_per_cell;
+      copy_data.dofs.resize(copy_data.dofs_per_cell);
+      cell->get_dof_indices (copy_data.dofs);
+
+
+      UpdateFlags update_flags = UpdateFlags (update_values     |
+                                              update_JxW_values |
+                                              update_quadrature_points);
+      hp::FEFaceValues<dim,spacedim> x_fe_values (mapping, fe_collection, q, update_flags);
+
+      // two variables for the coefficient,
+      // one for the two cases indicated in
+      // the name
+      std::vector<double>          coefficient_values;
+      std::vector<Vector<double> > coefficient_vector_values;
+
+      std::vector<double>          rhs_values_scalar;
+      std::vector<Vector<double> > rhs_values_system;
+
+      std::vector<types::global_dof_index> dofs_on_face_vector (dofs_per_face);
+
+      copy_data.dofs.resize(copy_data.dofs_per_cell);
+      cell->get_dof_indices (copy_data.dofs);
+
+      // Because CopyData objects are reused and that push_back is used,
+      // dof_is_on_face, cell_matrix, and cell_vector must be cleared before
+      // they are reused
+      copy_data.dof_is_on_face.clear();
+      copy_data.cell_matrix.clear();
+      copy_data.cell_vector.clear();
+
+
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        // check if this face is on that part of
+        // the boundary we are interested in
+        if (boundary_functions.find(cell->face(face)->boundary_id()) !=
+            boundary_functions.end())
+          {
+            x_fe_values.reinit (cell, face);
+
+            const FEFaceValues<dim,spacedim> &fe_values = x_fe_values.get_present_fe_values ();
+
+            copy_data.cell_matrix.push_back(FullMatrix<double> (copy_data.dofs_per_cell,
+                                                                copy_data.dofs_per_cell));
+            copy_data.cell_vector.push_back(Vector<double> (copy_data.dofs_per_cell));
+
+            if (fe_is_system)
+              // FE has several components
+              {
+                rhs_values_system.resize (fe_values.n_quadrature_points,
+                                          Vector<double>(n_function_components));
+                boundary_functions.find(cell->face(face)->boundary_id())
+                ->second->vector_value_list (fe_values.get_quadrature_points(),
+                                             rhs_values_system);
+
+                if (coefficient != 0)
+                  {
+                    if (coefficient->n_components==1)
+                      {
+                        coefficient_values.resize (fe_values.n_quadrature_points);
+                        coefficient->value_list (fe_values.get_quadrature_points(),
+                                                 coefficient_values);
+                        for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                          {
+                            const double weight = fe_values.JxW(point);
+                            for (unsigned int i=0; i<fe_values.dofs_per_cell; ++i)
+                              {
+                                const double v = fe_values.shape_value(i,point);
+                                for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                                  if (fe.system_to_component_index(i).first ==
+                                      fe.system_to_component_index(j).first)
+                                    {
+                                      const double u = fe_values.shape_value(j,point);
+                                      copy_data.cell_matrix.back()(i,j)
+                                      += (u * v * weight * coefficient_values[point]);
+                                    }
+
+                                copy_data.cell_vector.back()(i) += v *
+                                                                   rhs_values_system[point](
+                                                                     component_mapping[fe.system_to_component_index(i).first]) * weight;
+                              }
+                          }
+                      }
+                    else
+                      {
+                        coefficient_vector_values.resize (fe_values.n_quadrature_points,
+                                                          Vector<double>(n_components));
+                        coefficient->vector_value_list (fe_values.get_quadrature_points(),
+                                                        coefficient_vector_values);
+                        for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                          {
+                            const double weight = fe_values.JxW(point);
+                            for (unsigned int i=0; i<fe_values.dofs_per_cell; ++i)
+                              {
+                                const double v = fe_values.shape_value(i,point);
+                                const unsigned int component_i=
+                                  fe.system_to_component_index(i).first;
+                                for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                                  if (fe.system_to_component_index(j).first ==
+                                      component_i)
+                                    {
+                                      const double u = fe_values.shape_value(j,point);
+                                      copy_data.cell_matrix.back()(i,j) +=
+                                        (u * v * weight * coefficient_vector_values[point](component_i));
+                                    }
+                                copy_data.cell_vector.back()(i) += v *
+                                                                   rhs_values_system[point](component_mapping[component_i]) * weight;
+                              }
+                          }
+                      }
+                  }
+                else  //      if (coefficient == 0)
+                  for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                    {
+                      const double weight = fe_values.JxW(point);
+                      for (unsigned int i=0; i<fe_values.dofs_per_cell; ++i)
+                        {
+                          const double v = fe_values.shape_value(i,point);
+                          for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                            if (fe.system_to_component_index(i).first ==
+                                fe.system_to_component_index(j).first)
+                              {
+                                const double u = fe_values.shape_value(j,point);
+                                copy_data.cell_matrix.back()(i,j) += (u * v * weight);
+                              }
+                          copy_data.cell_vector.back()(i) += v *
+                                                             rhs_values_system[point](
+                                                               fe.system_to_component_index(i).first) *
+                                                             weight;
+                        }
+                    }
+              }
+            else
+              // FE is a scalar one
+              {
+                rhs_values_scalar.resize (fe_values.n_quadrature_points);
+                boundary_functions.find(cell->face(face)->boundary_id())
+                ->second->value_list (fe_values.get_quadrature_points(), rhs_values_scalar);
+
+                if (coefficient != 0)
+                  {
+                    coefficient_values.resize (fe_values.n_quadrature_points);
+                    coefficient->value_list (fe_values.get_quadrature_points(),
+                                             coefficient_values);
+                    for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                      {
+                        const double weight = fe_values.JxW(point);
+                        for (unsigned int i=0; i<fe_values.dofs_per_cell; ++i)
+                          {
+                            const double v = fe_values.shape_value(i,point);
+                            for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                              {
+                                const double u = fe_values.shape_value(j,point);
+                                copy_data.cell_matrix.back()(i,j) += (u * v * weight *
+                                                                      coefficient_values[point]);
+                              }
+                            copy_data.cell_vector.back()(i) += v * rhs_values_scalar[point] *weight;
+                          }
+                      }
+                  }
+                else
+                  for (unsigned int point=0; point<fe_values.n_quadrature_points; ++point)
+                    {
+                      const double weight = fe_values.JxW(point);
+                      for (unsigned int i=0; i<fe_values.dofs_per_cell; ++i)
+                        {
+                          const double v = fe_values.shape_value(i,point);
+                          for (unsigned int j=0; j<fe_values.dofs_per_cell; ++j)
+                            {
+                              const double u = fe_values.shape_value(j,point);
+                              copy_data.cell_matrix.back()(i,j) += (u * v * weight);
+                            }
+                          copy_data.cell_vector.back()(i) += v * rhs_values_scalar[point] * weight;
+                        }
+                    }
+              }
+
+            cell->face(face)->get_dof_indices (dofs_on_face_vector,
+                                               cell->active_fe_index());
+            // for each dof on the cell, have a
+            // flag whether it is on the face
+            copy_data.dof_is_on_face.push_back(std::vector<bool> (copy_data.dofs_per_cell));
+            // check for each of the dofs on this cell
+            // whether it is on the face
+            for (unsigned int i=0; i<copy_data.dofs_per_cell; ++i)
+              copy_data.dof_is_on_face.back()[i] = (std::find(dofs_on_face_vector.begin(),
+                                                              dofs_on_face_vector.end(),
+                                                              copy_data.dofs[i])
+                                                    !=
+                                                    dofs_on_face_vector.end());
+          }
+    }
+
+
+
+    template <int dim,int spacedim>
+    void copy_hp_boundary_mass_matrix_1(MatrixCreator::internal::AssemblerBoundary
+                                        ::CopyData<hp::DoFHandler<dim,spacedim> > const &copy_data,
+                                        typename FunctionMap<spacedim>::type const &boundary_functions,
+                                        std::vector<types::global_dof_index> const &dof_to_boundary_mapping,
+                                        SparseMatrix<double> &matrix,
+                                        Vector<double> &rhs_vector)
+    {
+      // now transfer cell matrix and vector to the whole boundary matrix
+      //
+      // in the following: dof[i] holds the  global index of the i-th degree of
+      // freedom on the present cell. If it is also a dof on the boundary, it
+      // must be a nonzero entry in the dof_to_boundary_mapping and then
+      // the boundary index of this dof is dof_to_boundary_mapping[dof[i]].
+      //
+      // if dof[i] is not on the boundary, it should be zero on the boundary
+      // therefore on all quadrature points and finally all of its
+      // entries in the cell matrix and vector should be zero. If not, we
+      // throw an error (note: because of the evaluation of the shape
+      // functions only up to machine precision, the term "must be zero"
+      // really should mean: "should be very small". since this is only an
+      // assertion and not part of the code, we may choose "very small"
+      // quite arbitrarily)
+      //
+      // the main problem here is that the matrix or vector entry should also
+      // be zero if the degree of freedom dof[i] is on the boundary, but not
+      // on the present face, i.e. on another face of the same cell also
+      // on the boundary. We can therefore not rely on the
+      // dof_to_boundary_mapping[dof[i]] being !=-1, we really have to
+      // determine whether dof[i] is a dof on the present face. We do so
+      // by getting the dofs on the face into @p{dofs_on_face_vector},
+      // a vector as always. Usually, searching in a vector is
+      // inefficient, so we copy the dofs into a set, which enables binary
+      // searches.
+      unsigned int pos(0);
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        {
+          // check if this face is on that part of
+          // the boundary we are interested in
+          if (boundary_functions.find(copy_data.cell->face(face)->boundary_id()) !=
+              boundary_functions.end())
+            {
+#ifdef DEBUG
+              // in debug mode: compute an element in the matrix which is
+              // guaranteed to belong to a boundary dof. We do this to check that the
+              // entries in the cell matrix are guaranteed to be zero if the
+              // respective dof is not on the boundary. Since because of
+              // round-off, the actual value of the matrix entry may be
+              // only close to zero, we assert that it is small relative to an element
+              // which is guaranteed to be nonzero. (absolute smallness does not
+              // suffice since the size of the domain scales in here)
+              //
+              // for this purpose we seek the diagonal of the matrix, where there
+              // must be an element belonging to the boundary. we take the maximum
+              // diagonal entry.
+              types::global_dof_index max_element = static_cast<types::global_dof_index>(0);
+              for (std::vector<types::global_dof_index>::const_iterator i=dof_to_boundary_mapping.begin();
+                   i!=dof_to_boundary_mapping.end(); ++i)
+                if ((*i != hp::DoFHandler<dim,spacedim>::invalid_dof_index) &&
+                    (*i > max_element))
+                  max_element = *i;
+              Assert (max_element  == matrix.n()-1, ExcInternalError());
+
+              double max_diag_entry = 0;
+              for (unsigned int i=0; i<copy_data.dofs_per_cell; ++i)
+                if (std::fabs(copy_data.cell_matrix[pos](i,i)) > max_diag_entry)
+                  max_diag_entry = std::fabs(copy_data.cell_matrix[pos](i,i));
+#endif
+
+              for (unsigned int i=0; i<copy_data.dofs_per_cell; ++i)
+                for (unsigned int j=0; j<copy_data.dofs_per_cell; ++j)
+                  {
+                    if (copy_data.dof_is_on_face[pos][i] && copy_data.dof_is_on_face[pos][j])
+                      matrix.add(dof_to_boundary_mapping[copy_data.dofs[i]],
+                                 dof_to_boundary_mapping[copy_data.dofs[j]],
+                                 copy_data.cell_matrix[pos](i,j));
+                    else
+                      {
+                        // assume that all shape functions that are nonzero on the boundary
+                        // are also listed in the @p{dof_to_boundary} mapping. if that
+                        // is not the case, then the boundary mass matrix does not
+                        // make that much sense anyway, as it only contains entries for
+                        // parts of the functions living on the boundary
+                        //
+                        // these, we may compare here for relative smallness of all
+                        // entries in the local matrix which are not taken over to
+                        // the global one
+                        Assert (std::fabs(copy_data.cell_matrix[pos](i,j)) <= 1e-10 * max_diag_entry,
+                                ExcInternalError ());
+                      }
+                  }
+
+              for (unsigned int j=0; j<copy_data.dofs_per_cell; ++j)
+                if (copy_data.dof_is_on_face[pos][j])
+                  rhs_vector(dof_to_boundary_mapping[copy_data.dofs[j]]) += copy_data.cell_vector[pos](j);
+                else
+                  {
+                    // compare here for relative
+                    // smallness
+                    Assert (std::fabs(copy_data.cell_vector[pos](j)) <= 1e-10 * max_diag_entry,
+                            ExcInternalError());
+                  }
+              ++pos;
+            }
+        }
+    }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_boundary_mass_matrix (const DoFHandler<dim,spacedim>     &dof,
+                                    const Quadrature<dim-1>   &q,
+                                    SparseMatrix<double>      &matrix,
+                                    const typename FunctionMap<spacedim>::type &rhs,
+                                    Vector<double>            &rhs_vector,
+                                    std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                                    const Function<spacedim> *const a,
+                                    std::vector<unsigned int> component_mapping)
+  {
+    create_boundary_mass_matrix(StaticMappingQ1<dim,spacedim>::mapping, dof, q,
+                                matrix,rhs, rhs_vector, dof_to_boundary_mapping, a, component_mapping);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  create_boundary_mass_matrix (const hp::MappingCollection<dim,spacedim>        &mapping,
+                               const hp::DoFHandler<dim,spacedim>     &dof,
+                               const hp::QCollection<dim-1>   &q,
+                               SparseMatrix<double>      &matrix,
+                               const typename FunctionMap<spacedim>::type         &boundary_functions,
+                               Vector<double>            &rhs_vector,
+                               std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                               const Function<spacedim> *const coefficient,
+                               std::vector<unsigned int> component_mapping)
+  {
+    // what would that be in 1d? the
+    // identity matrix on the boundary
+    // dofs?
+    if (dim == 1)
+      {
+        Assert (false, ExcNotImplemented());
+        return;
+      }
+
+    const hp::FECollection<dim,spacedim> &fe_collection = dof.get_fe();
+    const unsigned int n_components  = fe_collection.n_components();
+
+    Assert (matrix.n() == dof.n_boundary_dofs(boundary_functions),
+            ExcInternalError());
+    Assert (matrix.n() == matrix.m(), ExcInternalError());
+    Assert (matrix.n() == rhs_vector.size(), ExcInternalError());
+    Assert (boundary_functions.size() != 0, ExcInternalError());
+    Assert (dof_to_boundary_mapping.size() == dof.n_dofs(),
+            ExcInternalError());
+    Assert (coefficient ==0 ||
+            coefficient->n_components==1 ||
+            coefficient->n_components==n_components, ExcComponentMismatch());
+
+    if (component_mapping.size() == 0)
+      {
+        AssertDimension (n_components, boundary_functions.begin()->second->n_components);
+        for (unsigned int i=0; i<n_components; ++i)
+          component_mapping.push_back(i);
+      }
+    else
+      AssertDimension (n_components, component_mapping.size());
+
+    MatrixCreator::internal::AssemblerBoundary::Scratch scratch;
+    MatrixCreator::internal::AssemblerBoundary::CopyData<hp::DoFHandler<dim,spacedim> > copy_data;
+
+    WorkStream::run(dof.begin_active(),dof.end(),
+                    static_cast<std_cxx11::function<void (typename hp::DoFHandler<dim,spacedim>::active_cell_iterator
+                                                          const &,MatrixCreator::internal::AssemblerBoundary::Scratch const &,
+                                                          MatrixCreator::internal::AssemblerBoundary::CopyData<hp::DoFHandler<dim,spacedim> > &)> >
+                    (std_cxx11::bind( &create_hp_boundary_mass_matrix_1<dim,spacedim>,std_cxx11::_1,std_cxx11::_2,
+                                      std_cxx11::_3,
+                                      std_cxx11::cref(mapping),std_cxx11::cref(fe_collection),std_cxx11::cref(q),
+                                      std_cxx11::cref(boundary_functions),coefficient,
+                                      std_cxx11::cref(component_mapping))),
+                    static_cast<std_cxx11::function<void (MatrixCreator::internal::AssemblerBoundary
+                                                          ::CopyData<hp::DoFHandler<dim,spacedim> > const &)> > (
+                      std_cxx11::bind( &copy_hp_boundary_mass_matrix_1<dim,spacedim>,
+                                       std_cxx11::_1,
+                                       std_cxx11::cref(boundary_functions),
+                                       std_cxx11::cref(dof_to_boundary_mapping),
+                                       std_cxx11::ref(matrix),
+                                       std_cxx11::ref(rhs_vector))),
+                    scratch,
+                    copy_data);
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  void create_boundary_mass_matrix (const hp::DoFHandler<dim,spacedim>     &dof,
+                                    const hp::QCollection<dim-1>   &q,
+                                    SparseMatrix<double>      &matrix,
+                                    const typename FunctionMap<spacedim>::type &rhs,
+                                    Vector<double>            &rhs_vector,
+                                    std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                                    const Function<spacedim> *const a,
+                                    std::vector<unsigned int> component_mapping)
+  {
+    create_boundary_mass_matrix(hp::StaticMappingQ1<dim,spacedim>::mapping_collection, dof, q,
+                                matrix,rhs, rhs_vector, dof_to_boundary_mapping, a, component_mapping);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const Mapping<dim, spacedim>       &mapping,
+                              const DoFHandler<dim,spacedim>    &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    hp::FECollection<dim,spacedim>      fe_collection (dof.get_fe());
+    hp::QCollection<dim>                q_collection (q);
+    hp::MappingCollection<dim,spacedim> mapping_collection (mapping);
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (fe_collection,
+                    update_gradients  | update_JxW_values |
+                    (coefficient != 0 ? update_quadrature_points : UpdateFlags(0)),
+                    coefficient, /*rhs_function=*/0,
+                    q_collection, mapping_collection);
+    MatrixCreator::internal::AssemblerData::CopyData<double> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::laplace_assembler<dim, spacedim, typename DoFHandler<dim,spacedim>::active_cell_iterator>,
+                     std_cxx11::bind (&MatrixCreator::internal::
+                                      copy_local_to_global<double,SparseMatrix<double>, Vector<double> >,
+                                      std_cxx11::_1,
+                                      &matrix,
+                                      (Vector<double> *)NULL),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const DoFHandler<dim,spacedim>    &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix &constraints)
+  {
+    create_laplace_matrix(StaticMappingQ1<dim,spacedim>::mapping,
+                          dof, q, matrix, coefficient, constraints);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const Mapping<dim, spacedim>       &mapping,
+                              const DoFHandler<dim,spacedim>    &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim>      &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    hp::FECollection<dim,spacedim>      fe_collection (dof.get_fe());
+    hp::QCollection<dim>                q_collection (q);
+    hp::MappingCollection<dim,spacedim> mapping_collection (mapping);
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (fe_collection,
+                    update_gradients  | update_values |
+                    update_JxW_values | update_quadrature_points,
+                    coefficient, &rhs,
+                    q_collection, mapping_collection);
+    MatrixCreator::internal::AssemblerData::CopyData<double> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::laplace_assembler<dim, spacedim, typename DoFHandler<dim,spacedim>::active_cell_iterator>,
+                     std_cxx11::bind (&MatrixCreator::internal::
+                                      copy_local_to_global<double,SparseMatrix<double>, Vector<double> >,
+                                      std_cxx11::_1,
+                                      &matrix,
+                                      &rhs_vector),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const DoFHandler<dim,spacedim>    &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim>      &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix &constraints)
+  {
+    create_laplace_matrix(StaticMappingQ1<dim,spacedim>::mapping, dof, q,
+                          matrix, rhs, rhs_vector, coefficient, constraints);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::MappingCollection<dim,spacedim>       &mapping,
+                              const hp::DoFHandler<dim,spacedim>    &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (dof.get_fe(),
+                    update_gradients  | update_JxW_values |
+                    (coefficient != 0 ? update_quadrature_points : UpdateFlags(0)),
+                    coefficient, /*rhs_function=*/0,
+                    q, mapping);
+    MatrixCreator::internal::AssemblerData::CopyData<double> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::laplace_assembler<dim, spacedim, typename hp::DoFHandler<dim,spacedim>::active_cell_iterator>,
+                     std_cxx11::bind (&MatrixCreator::internal::
+                                      copy_local_to_global<double,SparseMatrix<double>, Vector<double> >,
+                                      std_cxx11::_1,
+                                      &matrix,
+                                      (Vector<double> *)0),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::DoFHandler<dim,spacedim>    &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix &constraints)
+  {
+    create_laplace_matrix(hp::StaticMappingQ1<dim,spacedim>::mapping_collection, dof, q,
+                          matrix, coefficient, constraints);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::MappingCollection<dim,spacedim>       &mapping,
+                              const hp::DoFHandler<dim,spacedim>    &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim>      &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix   &constraints)
+  {
+    Assert (matrix.m() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.m(), dof.n_dofs()));
+    Assert (matrix.n() == dof.n_dofs(),
+            ExcDimensionMismatch (matrix.n(), dof.n_dofs()));
+
+    MatrixCreator::internal::AssemblerData::Scratch<dim, spacedim>
+    assembler_data (dof.get_fe(),
+                    update_gradients  | update_values |
+                    update_JxW_values | update_quadrature_points,
+                    coefficient, &rhs,
+                    q, mapping);
+    MatrixCreator::internal::AssemblerData::CopyData<double> copy_data;
+    copy_data.cell_matrix.reinit (assembler_data.fe_collection.max_dofs_per_cell(),
+                                  assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.cell_rhs.reinit (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.dof_indices.resize (assembler_data.fe_collection.max_dofs_per_cell());
+    copy_data.constraints = &constraints;
+
+    WorkStream::run (dof.begin_active(),
+                     static_cast<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator>(dof.end()),
+                     &MatrixCreator::internal::laplace_assembler<dim, spacedim, typename hp::DoFHandler<dim,spacedim>::active_cell_iterator>,
+                     std_cxx11::bind (&MatrixCreator::internal::
+                                      copy_local_to_global<double,SparseMatrix<double>, Vector<double> >,
+                                      std_cxx11::_1,
+                                      &matrix,
+                                      &rhs_vector),
+                     assembler_data,
+                     copy_data);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::DoFHandler<dim,spacedim>    &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim>      &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const coefficient,
+                              const ConstraintMatrix &constraints)
+  {
+    create_laplace_matrix(hp::StaticMappingQ1<dim,spacedim>::mapping_collection, dof, q,
+                          matrix, rhs, rhs_vector, coefficient, constraints);
+  }
+
+}  // namespace MatrixCreator
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/matrix_tools.h b/include/deal.II/numerics/matrix_tools.h
new file mode 100644
index 0000000..fa2c85f
--- /dev/null
+++ b/include/deal.II/numerics/matrix_tools.h
@@ -0,0 +1,957 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__matrix_tools_h
+#define dealii__matrix_tools_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/dofs/function_map.h>
+
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// forward declarations
+template <int dim> class Quadrature;
+
+
+template<typename number> class Vector;
+template<typename number> class FullMatrix;
+template<typename number> class SparseMatrix;
+
+template <typename number> class BlockSparseMatrix;
+template <typename Number> class BlockVector;
+
+template <int dim, int spacedim> class Mapping;
+template <int dim, int spacedim> class DoFHandler;
+template <int dim, int spacedim> class FEValues;
+
+namespace hp
+{
+  template <int> class QCollection;
+  template <int, int> class MappingCollection;
+  template <int, int> class DoFHandler;
+}
+
+
+#ifdef DEAL_II_WITH_PETSC
+namespace PETScWrappers
+{
+  class SparseMatrix;
+  class Vector;
+  namespace MPI
+  {
+    class SparseMatrix;
+    class BlockSparseMatrix;
+    class Vector;
+    class BlockVector;
+  }
+}
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+namespace TrilinosWrappers
+{
+  class SparseMatrix;
+  class Vector;
+  class BlockSparseMatrix;
+  class BlockVector;
+  namespace MPI
+  {
+    class Vector;
+    class BlockVector;
+  }
+}
+#endif
+
+
+/**
+ * This namespace provides functions that assemble certain standard matrices
+ * for a given triangulation, using a given finite element, a given mapping
+ * and a quadrature formula.
+ *
+ *
+ * <h3>Conventions for all functions</h3>
+ *
+ * There exist two versions of almost all functions, one that takes an
+ * explicit Mapping argument and one that does not. The second one generally
+ * calls the first with an implicit $Q_1$ argument (i.e., with an argument of
+ * kind MappingQGeneric(1)). If your intend your code to use a different
+ * mapping than a (bi-/tri-)linear one, then you need to call the functions
+ * <b>with</b> mapping argument should be used.
+ *
+ * All functions take a sparse matrix object to hold the matrix to be created.
+ * The functions assume that the matrix is initialized with a sparsity pattern
+ * (SparsityPattern) corresponding to the given degree of freedom handler,
+ * i.e. the sparsity structure is already as needed. You can do this by
+ * calling the DoFTools::make_sparsity_pattern() function.
+ *
+ * Furthermore it is assumed that no relevant data is in the matrix. Some
+ * entries will be overwritten and some others will contain invalid data if
+ * the matrix wasn't empty before. Therefore you may want to clear the matrix
+ * before assemblage.
+ *
+ * By default, all created matrices are `raw': they are not condensed, i.e.
+ * hanging nodes are not eliminated. The reason is that you may want to add
+ * several matrices and could then condense afterwards only once, instead of
+ * for every matrix. To actually do computations with these matrices, you have
+ * to condense the matrix using the ConstraintMatrix::condense function; you
+ * also have to condense the right hand side accordingly and distribute the
+ * solution afterwards. Alternatively, you can give an optional argument
+ * ConstraintMatrix that writes cell matrix (and vector) entries with
+ * distribute_local_to_global into the global matrix and vector. This way,
+ * adding several matrices from different sources is more complicated and you
+ * should make sure that you do not mix different ways of applying
+ * constraints. Particular caution is necessary when the given constraint
+ * matrix contains inhomogeneous constraints: In that case, the matrix
+ * assembled this way must be the only matrix (or you need to assemble the
+ * <b>same</b> right hand side for <b>every</b> matrix you generate and add
+ * together).
+ *
+ * If you want to use boundary conditions with the matrices generated by the
+ * functions of this namespace in addition to the ones in a possible
+ * constraint matrix, you have to use a function like
+ * <tt>ProblemBase<>::apply_dirichlet_bc</tt> to matrix and right hand side.
+ *
+ *
+ * <h3>Supported matrices</h3>
+ *
+ * At present there are functions to create the following matrices:
+ * <ul>
+ * <li> @p create_mass_matrix: create the matrix with entries $m_{ij} =
+ * \int_\Omega \phi_i(x) \phi_j(x) dx$ by numerical quadrature. Here, the
+ * $\phi_i$ are the basis functions of the finite element space given.
+ *
+ * A coefficient may be given to evaluate $m_{ij} = \int_\Omega a(x) \phi_i(x)
+ * \phi_j(x) dx$ instead.
+ *
+ * <li> @p create_laplace_matrix: create the matrix with entries $a_{ij} =
+ * \int_\Omega \nabla\phi_i(x) \nabla\phi_j(x) dx$ by numerical quadrature.
+ *
+ * Again, a coefficient may be given to evaluate $a_{ij} = \int_\Omega a(x)
+ * \nabla\phi_i(x) \nabla\phi_j(x) dx$ instead.
+ * </ul>
+ *
+ * Make sure that the order of the Quadrature formula given to these functions
+ * is sufficiently high to compute the matrices with the required accuracy.
+ * For the choice of this quadrature rule you need to take into account the
+ * polynomial degree of the FiniteElement basis functions, the roughness of
+ * the coefficient @p a, as well as the degree of the given @p Mapping (if
+ * any).
+ *
+ * Note, that for vector-valued elements the mass matrix and the laplace
+ * matrix is implemented in such a way that each component couples only with
+ * itself, i.e. there is no coupling of shape functions belonging to different
+ * components. If the degrees of freedom have been sorted according to their
+ * vector component (e.g., using DoFRenumbering::component_wise()), then the
+ * resulting matrices will be block diagonal.
+ *
+ * If the finite element for which the mass matrix or the Laplace matrix is to
+ * be built has more than one component, the functions accept a single
+ * coefficient as well as a vector valued coefficient function. For the latter
+ * case, the number of components must coincide with the number of components
+ * of the system finite element.
+ *
+ *
+ * <h3>Matrices on the boundary</h3>
+ *
+ * The create_boundary_mass_matrix() creates the matrix with entries $m_{ij} =
+ * \int_{\Gamma} \phi_i \phi_j dx$, where $\Gamma$ is the union of boundary
+ * parts with indicators contained in a FunctionMap passed to the function
+ * (i.e. if you want to set up the mass matrix for the parts of the boundary
+ * with indicators zero and 2, you pass the function a map of <tt>unsigned
+ * char</tt>s as parameter @p boundary_functions containing the keys zero and
+ * 2). The size of the matrix is equal to the number of degrees of freedom
+ * that have support on the boundary, i.e. it is <em>not</em> a matrix on all
+ * degrees of freedom, but only a subset. (The $\phi_i$ in the formula are the
+ * subset of basis functions which have at least part of their support on
+ * $\Gamma$.) In order to determine which shape functions are to be
+ * considered, and in order to determine in which order, the function takes a
+ * @p dof_to_boundary_mapping; this object maps global DoF numbers to a
+ * numbering of the degrees of freedom located on the boundary, and can be
+ * obtained using the function DoFTools::map_dof_to_boundary_indices().
+ *
+ * In order to work, the function needs a matrix of the correct size, built on
+ * top of a corresponding sparsity pattern. Since we only work on a subset of
+ * the degrees of freedom, we can't use the matrices and sparsity patterns
+ * that are created for the entire set of degrees of freedom. Rather, you
+ * should use the DoFHandler::make_boundary_sparsity_pattern() function to
+ * create the correct sparsity pattern, and build a matrix on top of it.
+ *
+ * Note that at present there is no function that computes the mass matrix for
+ * <em>all</em> shape functions, though such a function would be trivial to
+ * implement.
+ *
+ *
+ * <h3>Right hand sides</h3>
+ *
+ * In many cases, you will not only want to build the matrix, but also a right
+ * hand side, which will give a vector with $f_i = \int_\Omega f(x) \phi_i(x)
+ * dx$. For this purpose, each function exists in two versions, one only
+ * building the matrix and one also building the right hand side vector. If
+ * you want to create a right hand side vector without creating a matrix, you
+ * can use the VectorTools::create_right_hand_side() function. The use of the
+ * latter may be useful if you want to create many right hand side vectors.
+ *
+ * @ingroup numerics
+ * @author Wolfgang Bangerth, 1998, Ralf Hartmann, 2001
+ */
+namespace MatrixCreator
+{
+  /**
+   * Assemble the mass matrix. If no coefficient is given (i.e., if the
+   * pointer to a function object is zero as it is by default), the
+   * coefficient is taken as being constant and equal to one.
+   *
+   * If the library is configured to use multithreading, this function works
+   * in parallel.
+   *
+   * The optional argument @p constraints allows to apply constraints on the
+   * resulting matrix directly. Note, however, that this becomes difficult
+   * when you have inhomogeneous constraints and later want to add several
+   * such matrices, for example in time dependent settings such as the main
+   * loop of step-26.
+   *
+   * See the general documentation of this namespace for more information.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const Mapping<dim, spacedim>       &mapping,
+                           const DoFHandler<dim,spacedim>    &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Calls the create_mass_matrix() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const DoFHandler<dim,spacedim>    &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Assemble the mass matrix and a right hand side vector. If no coefficient
+   * is given (i.e., if the pointer to a function object is zero as it is by
+   * default), the coefficient is taken as being constant and equal to one.
+   *
+   * If the library is configured to use multithreading, this function works
+   * in parallel.
+   *
+   * The optional argument @p constraints allows to apply constraints on the
+   * resulting matrix directly. Note, however, that this becomes difficult
+   * when you have inhomogeneous constraints and later want to add several
+   * such matrices, for example in time dependent settings such as the main
+   * loop of step-26.
+   *
+   * See the general documentation of this namespace for more information.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const Mapping<dim, spacedim>   &mapping,
+                           const DoFHandler<dim,spacedim> &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Calls the create_mass_matrix() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const DoFHandler<dim,spacedim> &dof,
+                           const Quadrature<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Same function as above, but for hp objects.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::MappingCollection<dim,spacedim>       &mapping,
+                           const hp::DoFHandler<dim,spacedim>    &dof,
+                           const hp::QCollection<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Same function as above, but for hp objects.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::DoFHandler<dim,spacedim>    &dof,
+                           const hp::QCollection<dim>    &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Same function as above, but for hp objects.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::MappingCollection<dim,spacedim> &mapping,
+                           const hp::DoFHandler<dim,spacedim> &dof,
+                           const hp::QCollection<dim> &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Same function as above, but for hp objects.
+   */
+  template <int dim, typename number, int spacedim>
+  void create_mass_matrix (const hp::DoFHandler<dim,spacedim> &dof,
+                           const hp::QCollection<dim> &q,
+                           SparseMatrix<number>     &matrix,
+                           const Function<spacedim> &rhs,
+                           Vector<number>           &rhs_vector,
+                           const Function<spacedim> *const a = 0,
+                           const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+
+  /**
+   * Assemble the mass matrix and a right hand side vector along the boundary.
+   *
+   * The matrix is assumed to already be initialized with a suiting sparsity
+   * pattern (the DoFHandler provides an appropriate function).
+   *
+   * If the library is configured to use multithreading, this function works
+   * in parallel.
+   *
+   * @arg @p weight: an optional weight for the computation of the mass
+   * matrix. If no weight is given, it is set to one.
+   *
+   * @arg @p component_mapping: if the components in @p boundary_functions and
+   * @p dof do not coincide, this vector allows them to be remapped. If the
+   * vector is not empty, it has to have one entry for each component in @p
+   * dof. This entry is the component number in @p boundary_functions that
+   * should be used for this component in @p dof. By default, no remapping is
+   * applied.
+   *
+   * @todo This function does not work for finite elements with cell-dependent
+   * shape functions.
+   */
+  template <int dim, int spacedim>
+  void create_boundary_mass_matrix (const Mapping<dim, spacedim>       &mapping,
+                                    const DoFHandler<dim,spacedim>    &dof,
+                                    const Quadrature<dim-1>  &q,
+                                    SparseMatrix<double>     &matrix,
+                                    const typename FunctionMap<spacedim>::type &boundary_functions,
+                                    Vector<double>           &rhs_vector,
+                                    std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                                    const Function<spacedim> *const weight = 0,
+                                    std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+
+  /**
+   * Calls the create_boundary_mass_matrix() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void create_boundary_mass_matrix (const DoFHandler<dim,spacedim>    &dof,
+                                    const Quadrature<dim-1>  &q,
+                                    SparseMatrix<double>     &matrix,
+                                    const typename FunctionMap<spacedim>::type        &boundary_functions,
+                                    Vector<double>           &rhs_vector,
+                                    std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                                    const Function<spacedim> *const a = 0,
+                                    std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Same function as above, but for hp objects.
+   */
+  template <int dim, int spacedim>
+  void create_boundary_mass_matrix (const hp::MappingCollection<dim,spacedim>       &mapping,
+                                    const hp::DoFHandler<dim,spacedim>    &dof,
+                                    const hp::QCollection<dim-1>  &q,
+                                    SparseMatrix<double>     &matrix,
+                                    const typename FunctionMap<spacedim>::type &boundary_functions,
+                                    Vector<double>           &rhs_vector,
+                                    std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                                    const Function<spacedim> *const a = 0,
+                                    std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Same function as above, but for hp objects.
+   */
+  template <int dim, int spacedim>
+  void create_boundary_mass_matrix (const hp::DoFHandler<dim,spacedim>    &dof,
+                                    const hp::QCollection<dim-1>  &q,
+                                    SparseMatrix<double>     &matrix,
+                                    const typename FunctionMap<spacedim>::type        &boundary_functions,
+                                    Vector<double>           &rhs_vector,
+                                    std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+                                    const Function<spacedim> *const a = 0,
+                                    std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Assemble the Laplace matrix. If no coefficient is given (i.e., if the
+   * pointer to a function object is zero as it is by default), the
+   * coefficient is taken as being constant and equal to one.
+   *
+   * If the library is configured to use multithreading, this function works
+   * in parallel.
+   *
+   * The optional argument @p constraints allows to apply constraints on the
+   * resulting matrix directly. Note, however, that this becomes difficult
+   * when you have inhomogeneous constraints and later want to add several
+   * such matrices, for example in time dependent settings such as the main
+   * loop of step-26.
+   *
+   * See the general documentation of this namespace for more information.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const Mapping<dim, spacedim>   &mapping,
+                              const DoFHandler<dim,spacedim> &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Calls the create_laplace_matrix() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const DoFHandler<dim,spacedim> &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Assemble the Laplace matrix and a right hand side vector. If no
+   * coefficient is given, it is assumed to be constant one.
+   *
+   * If the library is configured to use multithreading, this function works
+   * in parallel.
+   *
+   * The optional argument @p constraints allows to apply constraints on the
+   * resulting matrix directly. Note, however, that this becomes difficult
+   * when you have inhomogeneous constraints and later want to add several
+   * such matrices, for example in time dependent settings such as the main
+   * loop of step-26.
+   *
+   * See the general documentation of this namespace for more information.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const Mapping<dim, spacedim>   &mapping,
+                              const DoFHandler<dim,spacedim> &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Calls the create_laplace_matrix() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const DoFHandler<dim,spacedim> &dof,
+                              const Quadrature<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Like the functions above, but for hp dof handlers, mappings, and
+   * quadrature collections.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::MappingCollection<dim,spacedim> &mapping,
+                              const hp::DoFHandler<dim,spacedim> &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Like the functions above, but for hp dof handlers, mappings, and
+   * quadrature collections.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::DoFHandler<dim,spacedim> &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Like the functions above, but for hp dof handlers, mappings, and
+   * quadrature collections.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::MappingCollection<dim,spacedim> &mapping,
+                              const hp::DoFHandler<dim,spacedim> &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim>      &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Like the functions above, but for hp dof handlers, mappings, and
+   * quadrature collections.
+   */
+  template <int dim, int spacedim>
+  void create_laplace_matrix (const hp::DoFHandler<dim,spacedim> &dof,
+                              const hp::QCollection<dim>    &q,
+                              SparseMatrix<double>     &matrix,
+                              const Function<spacedim>      &rhs,
+                              Vector<double>           &rhs_vector,
+                              const Function<spacedim> *const a = 0,
+                              const ConstraintMatrix   &constraints = ConstraintMatrix());
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcComponentMismatch,
+                    "You are providing either a right hand side function or a "
+                    "coefficient with a number of vector components that is "
+                    "inconsistent with the rest of the arguments. If you do "
+                    "provide a coefficient or right hand side function, then "
+                    "it either needs to have as many components as the finite "
+                    "element in use, or only a single vector component. In "
+                    "the latter case, the same value will be taken for "
+                    "each vector component of the finite element.");
+}
+
+
+
+/**
+ * Provide a collection of functions operating on matrices. These include the
+ * application of boundary conditions to a linear system of equations and
+ * others.
+ *
+ *
+ * <h3>Boundary conditions</h3>
+ *
+ * The apply_boundary_values() function inserts boundary conditions into a
+ * system of equations.  To actually do this you have to specify a list of
+ * degree of freedom indices along with the values these degrees of freedom
+ * shall assume. To see how to get such a list, see the discussion of the
+ * VectorTools::interpolate_boundary_values function.
+ *
+ * There are two ways to incorporate fixed degrees of freedom such as boundary
+ * nodes into a linear system, as discussed below.
+ *
+ * @dealiiVideoLecture{21.6,21.65}
+ *
+ *
+ *
+ * <h3>Global elimination</h3>
+ *
+ * In the first method, we first assemble the global linear system without
+ * respect for fixed degrees of freedom, and in a second step eliminate them
+ * again from the linear system. The inclusion into the assembly process is as
+ * follows: when the matrix and vectors are set up, a list of nodes subject to
+ * Dirichlet bc is made and matrix and vectors are modified accordingly. This
+ * is done by deleting all entries in the matrix in the line of this degree of
+ * freedom, setting the main diagonal entry to a suitable positive value and
+ * the right hand side element to a value so that the solution of the linear
+ * system will have the boundary value at this node. To decouple the remaining
+ * linear system of equations and to make the system symmetric again (at least
+ * if it was before), one Gauss elimination step is performed with this line,
+ * by adding this (now almost empty) line to all other lines which couple with
+ * the given degree of freedom and thus eliminating all coupling between this
+ * degree of freedom and others. Now the respective column also consists only
+ * of zeroes, apart from the main diagonal entry. Alternatively, the functions
+ * in this namespace take a boolean parameter that allows to omit this last
+ * step, if symmetry of the resulting linear system is not required. Note that
+ * usually even CG can cope with a non-symmetric linear system with this
+ * particular structure.
+ *
+ * Finding which rows contain an entry in the column for which we are
+ * presently performing a Gauss elimination step is either difficult or very
+ * simple, depending on the circumstances. If the sparsity pattern is
+ * symmetric (whether the matrix is symmetric is irrelevant here), then we can
+ * infer the rows which have a nonzero entry in the present column by looking
+ * at which columns in the present row are nonempty. In this case, we only
+ * need to look into a fixed number of rows and need not search all rows. On
+ * the other hand, if the sparsity pattern is nonsymmetric, then we need to
+ * use an iterative solver which can handle nonsymmetric matrices in any case,
+ * so there may be no need to do the Gauss elimination anyway. In fact, this
+ * is the way the function works: it takes a parameter (@p eliminate_columns)
+ * that specifies whether the sparsity pattern is symmetric; if so, then the
+ * column is eliminated and the right hand side is also modified accordingly.
+ * If not, then only the row is deleted and the column is not touched at all,
+ * and all right hand side values apart from the one corresponding to the
+ * present row remain unchanged.
+ *
+ * If the sparsity pattern for your matrix is non-symmetric, you must set the
+ * value of this parameter to @p false in any case, since then we can't
+ * eliminate the column without searching all rows, which would be too
+ * expensive (if @p N be the number of rows, and @p m the number of nonzero
+ * elements per row, then eliminating one column is an <tt>O(N*log(m))</tt>
+ * operation, since searching in each row takes <tt>log(m)</tt> operations).
+ * If your sparsity pattern is symmetric, but your matrix is not, then you
+ * might specify @p false as well. If your sparsity pattern and matrix are
+ * both symmetric, you might want to specify @p true (the complexity of
+ * eliminating one row is then <tt>O(m*log(m))</tt>, since we only have to
+ * search @p m rows for the respective element of the column). Given the fact
+ * that @p m is roughly constant, irrespective of the discretization, and that
+ * the number of boundary nodes is <tt>sqrt(N)</tt> in 2d, the algorithm for
+ * symmetric sparsity patterns is <tt>O(sqrt(N)*m*log(m))</tt>, while it would
+ * be <tt>O(N*sqrt(N)*log(m))</tt> for the general case; the latter is too
+ * expensive to be performed.
+ *
+ * It seems as if we had to make clear not to overwrite the lines of other
+ * boundary nodes when doing the Gauss elimination step. However, since we
+ * reset the right hand side when passing such a node, it is not a problem to
+ * change the right hand side values of other boundary nodes not yet
+ * processed. It would be a problem to change those entries of nodes already
+ * processed, but since the matrix entry of the present column on the row of
+ * an already processed node is zero, the Gauss step does not change the right
+ * hand side. We need therefore not take special care of other boundary nodes.
+ *
+ * To make solving faster, we preset the solution vector with the right
+ * boundary values (as to why this is necessary, see the discussion below in
+ * the description of local elimination). It it not clear whether the deletion
+ * of coupling between the boundary degree of freedom and other dofs really
+ * forces the corresponding entry in the solution vector to have the right
+ * value when using iterative solvers, since their search directions may
+ * contain components in the direction of the boundary node. For this reason,
+ * we perform a very simple line balancing by not setting the main diagonal
+ * entry to unity, but rather to the value it had before deleting this line,
+ * or to the first nonzero main diagonal entry if it is zero for some reason.
+ * Of course we have to change the right hand side appropriately. This is not
+ * a very good strategy, but it at least should give the main diagonal entry a
+ * value in the right order of dimension, which makes the solution process a
+ * bit more stable. A refined algorithm would set the entry to the mean of the
+ * other diagonal entries, but this seems to be too expensive.
+ *
+ * In some cases, it might be interesting to solve several times with the same
+ * matrix, but for different right hand sides or boundary values. However,
+ * since the modification for boundary values of the right hand side vector
+ * depends on the original matrix, this is not possible without storing the
+ * original matrix somewhere and applying the @p apply_boundary_conditions
+ * function to a copy of it each time we want to solve. In that case, you can
+ * use the FilteredMatrix class in the @p LAC sublibrary. There you can also
+ * find a formal (mathematical) description of the process of modifying the
+ * matrix and right hand side vectors for boundary values.
+ *
+ *
+ * <h3>Local elimination</h3>
+ *
+ * The second way of handling boundary values is to modify the local matrix
+ * and vector contributions appropriately before transferring them into the
+ * global sparse matrix and vector. This is what local_apply_boundary_values()
+ * does. The advantage is that we save the call to the apply_boundary_values
+ * function (which is expensive because it has to work on sparse data
+ * structures). On the other hand, the local_apply_boundary_values() function
+ * is called many times, even if we only have a very small number of fixed
+ * boundary nodes, and the main drawback is that this function doesn't work as
+ * expected if there are hanging nodes that also need to be treated. The
+ * reason that this function doesn't work is that it is meant to be run before
+ * distribution into the global matrix, i.e. before hanging nodes are
+ * distributed; since hanging nodes can be constrained to a boundary node, the
+ * treatment of hanging nodes can add entries again to rows and columns
+ * corresponding to boundary values and that we have already vacated in the
+ * local elimination step. To make things worse, in 3d constrained nodes can
+ * even lie on the boundary. Thus, it is imperative that boundary node
+ * elimination happens @em after hanging node elimination, but this can't be
+ * achieved with local elimination of boundary nodes unless there are no
+ * hanging node constraints at all.
+ *
+ * Local elimination has one additional drawback: we don't have access to the
+ * solution vector, only to the local contributions to the matrix and right
+ * hand side. The problem with this is subtle, but can lead to very hard to
+ * find difficulties: when we eliminate a degree of freedom, we delete the row
+ * and column of this unknown, and set the diagonal entry to some positive
+ * value. To make the problem more or less well-conditioned, we set this
+ * diagonal entry to the absolute value of its prior value if that was non-
+ * zero, or to the average magnitude of all other nonzero diagonal elements.
+ * Then we set the right hand side value such that the resulting solution
+ * entry has the right value as given by the boundary values. Since we add
+ * these contributions up over all local contributions, the diagonal entry and
+ * the respective value in the right hand side are added up correspondingly,
+ * so that the entry in the solution of the linear system is still valid.
+ *
+ * A problem arises, however, if the diagonal entries so chosen are not
+ * appropriate for the linear system. Consider, for example, a mixed Laplace
+ * problem with matrix <tt>[[A B][C^T 0]]</tt>, where we only specify boundary
+ * values for the second component of the solution. In the mixed formulation,
+ * the stress-strain tensor only appears in either the matrix @p B or @p C, so
+ * one of them may be significantly larger or smaller than the other one. Now,
+ * if we eliminate boundary values, we delete some rows and columns, but we
+ * also introduce a few entries on the diagonal of the lower right block, so
+ * that we get the system <tt>[[A' B'][C'^T X]]</tt>. The diagonal entries in
+ * the matrix @p X will be of the same order of magnitude as those in @p A.
+ * Now, if we solve this system in the Schur complement formulation, we have
+ * to invert the matrix <tt>X-C'^TA'^{-1}B'</tt>. Deleting rows and columns
+ * above makes sure that boundary nodes indeed have empty rows and columns in
+ * the Schur complement as well, except for the entries in @p X. However, the
+ * entries in @p X may be of significantly different orders of magnitude than
+ * those in <tt>C'^TA'^{-1}B'</tt>! If this is the case, we may run into
+ * trouble with iterative solvers. For example, assume that we start with zero
+ * entries in the solution vector and that the entries in @p X are several
+ * orders of magnitude too small; in this case, iterative solvers will compute
+ * the residual vector in each step and form correction vectors, but since the
+ * entries in @p X are so small, the residual contributions for boundary nodes
+ * are really small, despite the fact that the boundary nodes are still at
+ * values close to zero and not in accordance with the prescribed boundary
+ * values. Since the residual is so small, the corrections the iterative
+ * solver computes are very small, and in the end the solver will indicate
+ * convergence to a small total residual with the boundary values still being
+ * significantly wrong.
+ *
+ * We avoid this problem in the global elimination process described above by
+ * 'priming' the solution vector with the correct values for boundary nodes.
+ * However, we can't do this for the local elimination process. Therefore, if
+ * you experience a problem like the one above, you need to either increase
+ * the diagonal entries in @p X to a size that matches those in the other part
+ * of the Schur complement, or, simpler, prime the solution vector before you
+ * start the solver.
+ *
+ * In conclusion, local elimination of boundary nodes only works if there are
+ * no hanging nodes and even then doesn't always work fully satisfactorily.
+ *
+ * @ingroup numerics
+ * @author Wolfgang Bangerth, 1998, 2000, 2004, 2005
+ */
+namespace MatrixTools
+{
+  /**
+   * Import namespace MatrixCreator for backward compatibility with older
+   * versions of deal.II in which these namespaces were classes and class
+   * MatrixTools was publicly derived from class MatrixCreator.
+   */
+  using namespace MatrixCreator;
+
+  /**
+   * Apply Dirichlet boundary conditions to the system matrix and vectors as
+   * described in the general documentation.
+   */
+  template <typename number>
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         SparseMatrix<number>  &matrix,
+                         Vector<number>        &solution,
+                         Vector<number>        &right_hand_side,
+                         const bool             eliminate_columns = true);
+
+  /**
+   * Apply Dirichlet boundary conditions to the system matrix and vectors as
+   * described in the general documentation. This function works for block
+   * sparse matrices and block vectors
+   */
+  template <typename number>
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         BlockSparseMatrix<number>           &matrix,
+                         BlockVector<number>                 &solution,
+                         BlockVector<number>                 &right_hand_side,
+                         const bool           eliminate_columns = true);
+
+#ifdef DEAL_II_WITH_PETSC
+  /**
+   * Apply Dirichlet boundary conditions to the system matrix and vectors as
+   * described in the general documentation. This function works on the
+   * classes that are used to wrap PETSc objects.
+   *
+   * <b>Important:</b> This function is not very efficient: it needs to
+   * alternatingly read and write into the matrix, a situation that PETSc does
+   * not handle well. In addition, we only get rid of rows corresponding to
+   * boundary nodes, but the corresponding case of deleting the respective
+   * columns (i.e. if @p eliminate_columns is @p true) is not presently
+   * implemented, and probably will never because it is too expensive without
+   * direct access to the PETSc data structures. (This leads to the situation
+   * where the action indicated by the default value of the last argument is
+   * actually not implemented; that argument has <code>true</code> as its
+   * default value to stay consistent with the other functions of same name in
+   * this namespace.)
+   *
+   * This function is used in step-17 and step-18.
+   */
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         PETScWrappers::SparseMatrix  &matrix,
+                         PETScWrappers::Vector  &solution,
+                         PETScWrappers::Vector  &right_hand_side,
+                         const bool             eliminate_columns = true);
+
+  /**
+   * Same function as above, but for parallel PETSc matrices.
+   *
+   * @note If the matrix is stored in parallel across multiple processors
+   * using MPI, this function only touches rows that are locally stored and
+   * simply ignores all other rows. In other words, each processor is
+   * responsible for its own rows, and the @p boundary_values argument needs
+   * to contain all locally owned rows of the matrix that you want to have
+   * treated. (But it can also contain entries for degrees of freedom not
+   * owned locally; these will simply be ignored.) Further, in the context of
+   * parallel computations, you will get into trouble if you treat a row while
+   * other processors still have pending writes or additions into the same
+   * row. In other words, if another processor still wants to add something to
+   * an element of a row and you call this function to zero out the row, then
+   * the next time you call compress() may add the remote value to the zero
+   * you just created. Consequently, you will want to call compress() after
+   * you made the last modifications to a matrix and before starting to clear
+   * rows.
+   */
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         PETScWrappers::MPI::SparseMatrix  &matrix,
+                         PETScWrappers::MPI::Vector  &solution,
+                         PETScWrappers::MPI::Vector  &right_hand_side,
+                         const bool             eliminate_columns = true);
+
+  /**
+   * Same as above but for BlockSparseMatrix.
+   */
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double>  &boundary_values,
+                         PETScWrappers::MPI::BlockSparseMatrix &matrix,
+                         PETScWrappers::MPI::BlockVector        &solution,
+                         PETScWrappers::MPI::BlockVector        &right_hand_side,
+                         const bool       eliminate_columns = true);
+
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+  /**
+   * Apply Dirichlet boundary conditions to the system matrix and vectors as
+   * described in the general documentation. This function works on the
+   * classes that are used to wrap Trilinos objects.
+   *
+   * <b>Important:</b> This function is not very efficient: it needs to
+   * alternatingly read and write into the matrix, a situation that Trilinos
+   * does not handle well. In addition, we only get rid of rows corresponding
+   * to boundary nodes, but the corresponding case of deleting the respective
+   * columns (i.e. if @p eliminate_columns is @p true) is not presently
+   * implemented, and probably will never because it is too expensive without
+   * direct access to the Trilinos data structures. (This leads to the
+   * situation where the action indicated by the default value of the last
+   * argument is actually not implemented; that argument has <code>true</code>
+   * as its default value to stay consistent with the other functions of same
+   * name in this namespace.)
+   */
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         TrilinosWrappers::SparseMatrix  &matrix,
+                         TrilinosWrappers::Vector        &solution,
+                         TrilinosWrappers::Vector        &right_hand_side,
+                         const bool             eliminate_columns = true);
+
+  /**
+   * This function does the same as the one above, except now working on block
+   * structures.
+   */
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         TrilinosWrappers::BlockSparseMatrix  &matrix,
+                         TrilinosWrappers::BlockVector        &solution,
+                         TrilinosWrappers::BlockVector        &right_hand_side,
+                         const bool                eliminate_columns = true);
+
+  /**
+   * Same as above, but for parallel matrices and vectors.
+   *
+   * @note If the matrix is stored in parallel across multiple processors
+   * using MPI, this function only touches rows that are locally stored and
+   * simply ignores all other rows. In other words, each processor is
+   * responsible for its own rows, and the @p boundary_values argument needs
+   * to contain all locally owned rows of the matrix that you want to have
+   * treated. (But it can also contain entries for degrees of freedom not
+   * owned locally; these will simply be ignored.) Further, in the context of
+   * parallel computations, you will get into trouble if you treat a row while
+   * other processors still have pending writes or additions into the same
+   * row. In other words, if another processor still wants to add something to
+   * an element of a row and you call this function to zero out the row, then
+   * the next time you call compress() may add the remote value to the zero
+   * you just created. Consequently, you will want to call compress() after
+   * you made the last modifications to a matrix and before starting to clear
+   * rows.
+   */
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         TrilinosWrappers::SparseMatrix  &matrix,
+                         TrilinosWrappers::MPI::Vector   &solution,
+                         TrilinosWrappers::MPI::Vector   &right_hand_side,
+                         const bool             eliminate_columns = true);
+
+  /**
+   * This function does the same as the one above, except now working on block
+   * structures.
+   */
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         TrilinosWrappers::BlockSparseMatrix  &matrix,
+                         TrilinosWrappers::MPI::BlockVector   &solution,
+                         TrilinosWrappers::MPI::BlockVector   &right_hand_side,
+                         const bool                eliminate_columns = true);
+#endif
+
+  /**
+   * Rather than applying boundary values to the global matrix and vector
+   * after creating the global matrix, this function does so during assembly,
+   * by modifying the local matrix and vector contributions. If you call this
+   * function on all local contributions, the resulting matrix will have the
+   * same entries, and the final call to apply_boundary_values() on the global
+   * system will not be necessary.
+   *
+   * Since this function does not have to work on the complicated data
+   * structures of sparse matrices, it is relatively cheap. It may therefore
+   * be a win if you have many fixed degrees of freedom (e.g. boundary nodes),
+   * or if access to the sparse matrix is expensive (e.g. for block sparse
+   * matrices, or for PETSc or Trilinos matrices). However, it doesn't work as
+   * expected if there are also hanging nodes to be considered. More caveats
+   * are listed in the general documentation of this namespace.
+   *
+   * @dealiiVideoLecture{21.6,21.65}
+   */
+  void
+  local_apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                               const std::vector<types::global_dof_index> &local_dof_indices,
+                               FullMatrix<double> &local_matrix,
+                               Vector<double>     &local_rhs,
+                               const bool          eliminate_columns);
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcBlocksDontMatch,
+                    "You are providing a matrix whose subdivision into "
+                    "blocks in either row or column direction does not use "
+                    "the same blocks sizes as the solution vector or "
+                    "right hand side vectors, respectively.");
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/point_value_history.h b/include/deal.II/numerics/point_value_history.h
new file mode 100644
index 0000000..2a75666
--- /dev/null
+++ b/include/deal.II/numerics/point_value_history.h
@@ -0,0 +1,661 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__point_value_history_h
+#define dealii__point_value_history_h
+
+#include <deal.II/base/point.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/component_mask.h>
+#include <deal.II/numerics/data_postprocessor.h>
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <map>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace PointValueHistory
+  {
+    template <int dim> class PointGeometryData;
+  }
+}
+
+
+
+namespace internal
+{
+  namespace PointValueHistory
+  {
+    /**
+     * A class that stores the data needed to reference the support points
+     * closest to one requested point.
+     */
+    template <int dim>
+    class PointGeometryData
+    {
+    public:
+      PointGeometryData(const Point <dim> &new_requested_location, const std::vector <Point <dim> > &new_locations,
+                        const std::vector <types::global_dof_index> &new_sol_indices);
+      Point <dim> requested_location;
+      std::vector <Point <dim> > support_point_locations;
+      std::vector <types::global_dof_index> solution_indices;
+    };
+  }
+}
+
+
+
+/**
+ * PointValueHistory tackles the overhead of plotting time (or any other
+ * iterative process) graphs of solution values at specific points on the
+ * mesh. The user specifies the points which the solution should be monitored
+ * at ahead of time, as well as giving each solution vector that they want to
+ * record a mnemonic name. Then, for each step the user calls one of the three
+ * available "evaluate field" methods to store the data from each time step,
+ * and the class extracts data for the requested points to store it. Finally,
+ * once the computation is finished, the user can request output files to be
+ * generated; these files are in Gnuplot format but are basically just regular
+ * text and can easily be imported into other programs well, for example into
+ * spreadsheets.
+ *
+ * The user can store extra variables which do not relate to mesh location
+ * specifying n_independent_variables. The class then expects a std::vector of
+ * size n_independent_variables to be added during each step using the method
+ * @p push_back_independent. This may be used for example for recording
+ * external input, logging solver performance data such as time taken to solve
+ * the step and solver steps before convergence, saving norms calculated, or
+ * simply saving the time, number of time step, or number of nonlinear
+ * iteration along with the data evaluated from the mesh.
+ *
+ * The three "evaluate field" methods each have different strengths and
+ * weaknesses making each suitable for different contexts:
+ * <ol>
+ * <li>Firstly, the @p evaluate_field version that does not take a @p
+ * DataPostprocessor object selects the nearest support point (see
+ * @ref GlossSupport "this entry in the glossary"
+ * ) to a given point to extract data from. This makes the code that needs to
+ * be run at each time step very short, since looping over the mesh to extract
+ * the needed dof_index can be done just once at the start. However, this
+ * method is not suitable for FiniteElement objects that do not assign dofs to
+ * actual mesh locations (i.e. FEs without
+ * @ref GlossSupport "support points"
+ * ) or if adaptive mesh refinement is used. The reason for the latter
+ * restriction is that the location of the closest support point to a given
+ * point may change upon mesh refinement. The class will throw an exception if
+ * any change to the triangulation is made (Although the nearest support point
+ * could be re- computed upon mesh refinement, the location of the support
+ * point will most likely change slightly, making the interpretation of the
+ * data difficult, hence this is not implemented currently.)
+ *
+ * <li> Secondly, @p evaluate_field_at_requested_location calls @p
+ * VectorTools::point_value to compute values at the specific point requested.
+ * This method is valid for any FE that is supported by @p
+ * VectorTools::point_value. Specifically, this method can be called by codes
+ * using adaptive mesh refinement.
+ *
+ * <li>Finally, the class offers a function @p evaluate_field that takes a @p
+ * DataPostprocessor object. This method allows the deal.II data postprocessor
+ * to be used to compute new quantities from the solution on the fly. The
+ * values are located at the nearest quadrature point to the requested point.
+ * If the mesh is refined between calls, this point will change, so care must
+ * be taken when using this method in code using adaptive refinement, but as
+ * the output will be meaningful (in the sense that the quadrature point
+ * selected is guaranteed to remain in the same vicinity, the class does not
+ * prevent the use of this method in adaptive codes. The class provides
+ * warnings in the output files if the mesh has changed. Note that one can
+ * reduce the error this procedure introduces by providing a quadrature
+ * formula that has more points, at the expense of performing more work since
+ * then the closest quadrature points is nearer to the point at which the
+ * evaluation is really supposed to happen. (As a sidenote: Why not do the
+ * evaluation at the requested point right away? The reason for this is that
+ * it would require setting up a new quadrature point object on each cell that
+ * has only a single point corresponding to the reference coordinates of the
+ * point you really want; then initializing a FEValues object with it; then
+ * evaluating the solution at this point; then handing the result to the
+ * DataPostprocessor object. This sequence of things is expensive -- which is
+ * the reason why VectorTools::point_value() is expensive. Using the same
+ * quadrature formula on each cell on which we want to evaluate the solution
+ * and only having to initialize a FEValue object once is a much cheaper
+ * alternative, albeit of course at the expense of getting only an approximate
+ * result.)
+ * </ol>
+ *
+ * When recording a new mnemonic name, the user must supply a component_mask
+ * (see
+ * @ref GlossComponentMask "this glossary entry"
+ * ) to indicate the
+ * @ref GlossComponent "(vector) components"
+ * to be extracted from the given input. If the user simply wants to extract
+ * all the components, the mask need not be explicitly supplied to the @p
+ * add_field_name method and the default value of the parameter is sufficient.
+ * If the @p evaluate_field with a @p DataPostprocessor object is used, the
+ * component_mask is interpreted as the mask of the @p DataPostprocessor
+ * return vector. The size of this mask can be different to that of the FE
+ * space, but must be provided when the @p add_field_name method is called.
+ * One variant of the @p add_field_name method allows an unsigned int input to
+ * construct a suitable mask, if all values from the @p DataPostprocessor are
+ * desired.
+ *
+ * The class automatically generates names for the data stored based on the
+ * mnemonics supplied. The methods @p add_component_names and @p
+ * add_independent_names allow the user to provide lists of names to use
+ * instead if desired.
+ *
+ * Following is a little code snippet that shows a common usage of this class:
+ *
+ * @code
+ * #include <deal.II/numerics/point_value_history.h>
+ * //....
+ *
+ * //... code to setup Triangulation, perform any refinement necessary
+ * // and setup DoFHandler, sizing solution Vectors etc
+ *
+ * // call the constructor
+ * unsigned int n_inputs = 1; // just one independent value, which happens to be an input
+ * PointValueHistory<dim> node_monitor(dof_handler, n_inputs);
+ *
+ * // setup fields and points required
+ * node_monitor.add_field_name("Solution");
+ * std::vector <Point <dim> > point_vector(2);
+ * point_vector[0] = Point <dim>(0, 0);
+ * point_vector[1] = Point <dim>(0.25, 0);
+ * node_monitor.add_points(point_vector); // multiple points at once
+ * node_monitor.add_point(Point<dim>(1, 0.2)); // add a single point
+ * node_monitor.close(); // close the class once the setup is complete
+ * node_monitor.status(std::cout); // print out status to check if desired
+ *
+ * // ... more code ...
+ *
+ * // ... in an iterative loop ...
+ * // double time, vector <double> with size 1 input_value,
+ * // and Vector <double> solution calculated in the loop
+ * node_monitor.start_new_dataset(time);
+ * node_monitor.push_back_independent(input_value);
+ * node_monitor.evaluate_field("Solution", solution);
+ *
+ * // ... end of iterative loop ...
+ *
+ * node_monitor.write_gnuplot("node"); // write out data files
+ *
+ * @endcode
+ */
+template <int dim>
+class PointValueHistory
+{
+public:
+  /**
+   * Provide a stripped down instance of the class which does not support
+   * adding points or mesh data.  This may be used for example for recording
+   * external input or logging solver performance data.
+   */
+  PointValueHistory (const unsigned int n_independent_variables = 0);
+
+  /**
+   * Constructor linking the class to a specific @p DoFHandler. This class
+   * reads specific data from the @p DoFHandler and stores it internally for
+   * quick access (in particular dof indices of closest neighbors to requested
+   * points) the class is fairly intolerant to changes to the @p DoFHandler if
+   * data at support points is required. Mesh refinement and @p DoFRenumbering
+   * methods should be performed before the @p add_points method is called and
+   * adaptive grid refinement is only supported by some methods.
+   *
+   * The user can store extra variables which do not relate to mesh location
+   * by specifying the number required using n_independent_variables and
+   * making calls to @p push_back_independent as needed.  This may be used for
+   * example for recording external input or logging solver performance data.
+   */
+  PointValueHistory (const DoFHandler<dim> &dof_handler,
+                     const unsigned int n_independent_variables = 0);
+
+  /**
+   * Copy constructor. This constructor can be safely called with a @p
+   * PointValueHistory object that contains data, but this could be expensive
+   * and should be avoided.
+   */
+  PointValueHistory (const PointValueHistory &point_value_history);
+
+  /**
+   * Assignment operator. This assignment operator can be safely called once
+   * the class is closed and data added, but this is provided primarily to
+   * allow a @p PointValueHistory object declared in a class to be
+   * reinitialized later in the class. Using the assignment operator when the
+   * object contains data could be expensive.
+   */
+  PointValueHistory &operator=(const PointValueHistory &point_value_history);
+
+  /**
+   * Deconstructor.
+   */
+  ~PointValueHistory ();
+
+  /**
+   * Add a single point to the class. The support points (one per component)
+   * in the mesh that are closest to that point are found and their details
+   * stored for use when @p evaluate_field is called. If more than one point
+   * is required rather use the @p add_points method since this minimizes
+   * iterations over the mesh.
+   */
+  void add_point(const Point <dim> &location);
+
+  /**
+   * Add multiple points to the class. The support points (one per component)
+   * in the mesh that are closest to that point is found and their details
+   * stored for use when @p evaluate_field is called. If more than one point
+   * is required, rather call this method as it is more efficient than the
+   * add_point method since it minimizes iterations over the mesh. The points
+   * are added to the internal database in the order they appear in the list
+   * and there is always a one to one correspondence between the requested
+   * point and the added point, even if a point is requested multiple times.
+   */
+  void add_points (const std::vector <Point <dim> > &locations);
+
+
+
+  /**
+   * Put another mnemonic string (and hence @p VectorType) into the class.
+   * This method adds storage space for variables equal to the number of true
+   * values in component_mask. This also adds extra entries for points that
+   * are already in the class, so @p add_field_name and @p add_points can be
+   * called in any order.
+   */
+  void add_field_name(const std::string &vector_name,
+                      const ComponentMask &component_mask = ComponentMask());
+
+  /**
+   * Put another mnemonic string (and hence @p VectorType) into the class.
+   * This method adds storage space for n_components variables. This also adds
+   * extra entries for points that are already in the class, so @p
+   * add_field_name and @p add_points can be called in any order. This method
+   * generates a std::vector 0, ..., n_components-1 and calls the previous
+   * function.
+   */
+  void add_field_name(const std::string &vector_name,
+                      const unsigned int n_components);
+
+  /**
+   * Provide optional names for each component of a field. These names will be
+   * used instead of names generated from the field name, if supplied.
+   */
+  void add_component_names(const std::string &vector_name,
+                           const std::vector <std::string> &component_names);
+
+  /**
+   * Provide optional names for the independent values. These names will be
+   * used instead of "Indep_...", if supplied.
+   */
+  void add_independent_names(const std::vector <std::string> &independent_names);
+
+
+
+  /**
+   * Extract values at the stored points from the VectorType supplied and add
+   * them to the new dataset in vector_name. The component mask supplied when
+   * the field was added is used to select components to extract. If a @p
+   * DoFHandler is used, one (and only one) evaluate_field method must be
+   * called for each dataset (time step, iteration, etc) for each vector_name,
+   * otherwise a @p ExcDataLostSync error can occur.
+   */
+  template <class VectorType>
+  void evaluate_field(const std::string &name,
+                      const VectorType  &solution);
+
+
+  /**
+   * Compute values using a @p DataPostprocessor object with the @p VectorType
+   * supplied and add them to the new dataset in vector_name. The
+   * component_mask supplied when the field was added is used to select
+   * components to extract from the @p DataPostprocessor return vector. This
+   * method takes a vector of field names to process and is preferred if many
+   * fields use the same @p DataPostprocessor object as each cell is only
+   * located once. The quadrature object supplied is used for all components
+   * of a vector field. Although this method will not throw an exception if
+   * the mesh has changed. (No internal data structures are invalidated as the
+   * quadrature points are repicked each time the function is called.)
+   * Nevertheless the user must be aware that if the mesh changes the point
+   * selected will also vary slightly, making interpretation of the data more
+   * difficult. If a @p DoFHandler is used, one (and only one) evaluate_field
+   * method must be called for each dataset (time step, iteration, etc) for
+   * each vector_name, otherwise a @p ExcDataLostSync error can occur.
+   */
+  template <class VectorType>
+  void evaluate_field(const std::vector <std::string> &names,
+                      const VectorType                &solution,
+                      const DataPostprocessor<dim>    &data_postprocessor,
+                      const Quadrature<dim>           &quadrature);
+
+  /**
+   * Construct a std::vector <std::string> containing only vector_name and
+   * call the above function. The above function is more efficient if multiple
+   * fields use the same @p DataPostprocessor object.
+   */
+  template <class VectorType>
+  void evaluate_field(const std::string            &name,
+                      const VectorType             &solution,
+                      const DataPostprocessor<dim> &data_postprocessor,
+                      const Quadrature<dim>        &quadrature);
+
+
+  /**
+   * Extract values at the points actually requested from the VectorType
+   * supplied and add them to the new dataset in vector_name. Unlike the other
+   * evaluate_field methods this method does not care if the dof_handler has
+   * been modified because it uses calls to @p VectorTools::point_value to
+   * extract there data. Therefore, if only this method is used, the class is
+   * fully compatible with adaptive refinement. The component_mask supplied
+   * when the field was added is used to select components to extract. If a @p
+   * DoFHandler is used, one (and only one) evaluate_field method must be
+   * called for each dataset (time step, iteration, etc) for each vector_name,
+   * otherwise a @p ExcDataLostSync error can occur.
+   */
+  template <class VectorType>
+  void evaluate_field_at_requested_location(const std::string &name,
+                                            const VectorType  &solution);
+
+
+  /**
+   * Add the key for the current dataset to the dataset. Although calling this
+   * method first is sensible, the order in which this method, @p
+   * evaluate_field and @p push_back_independent is not important. It is
+   * however important that all the data for a give dataset is added to each
+   * dataset and that it is added before a new data set is started. This
+   * prevents a @p ExcDataLostSync.
+   */
+  void start_new_dataset (const double key);
+
+  /**
+   * If independent values have been set up, this method stores these values.
+   * This should only be called once per dataset, and if independent values
+   * are used it must be called for every dataset. A @p ExcDataLostSync
+   * exception can be thrown if this method is not called.
+   */
+  void push_back_independent (const std::vector <double> &independent_values);
+
+
+  /**
+   * Write out a series of .gpl files named base_name + "-00.gpl", base_name +
+   * "-01.gpl" etc. The data file gives info about where the support points
+   * selected and interpreting the data. If @p n_indep != 0 an additional file
+   * base_name + "_indep.gpl" containing key and independent data. The file
+   * name agrees with the order the points were added to the class. The names
+   * of the data columns can be supplied using the functions @p
+   * add_component_names and @p add_independent_names. The support point
+   * information is only meaningful if the dof_handler has not been changed.
+   * Therefore, if adaptive mesh refinement has been used the support point
+   * data should not be used. The optional parameter postprocessor_locations
+   * is used to add the postprocessor locations to the output files. If this
+   * is desired, the data should be obtained from a call to
+   * get_postprocessor_locations while the dof_handler is usable. The default
+   * parameter is an empty vector of strings, and will suppress postprocessor
+   * locations output.
+   */
+  void write_gnuplot (const std::string &base_name,
+                      const std::vector <Point <dim> > postprocessor_locations = std::vector <Point <dim> > ());
+
+
+  /**
+   * Return a @p Vector with the indices of selected points flagged with a 1.
+   * This method is mainly for testing and verifying that the class is working
+   * correctly. By passing this vector to a DataOut object, the user can
+   * verify that the positions returned by @p get_points agree with the
+   * positions that @p DataOut interprets from the @p Vector returned. The
+   * code snippet below demonstrates how this could be done:
+   * @code
+   * // Make a DataOut object and attach the dof_handler
+   * DataOut<dim> data_out;
+   * data_out.attach_dof_handler(dof_handler);
+   *
+   * // Call the mark_locations method to get the vector with indices flagged
+   * Vector<double> support_point_locations = node_monitor.mark_locations();
+   *
+   * // Add the vector to the data_out object and write out a file in the usual way
+   * data_out.add_data_vector(support_point_locations, "Monitor_Locations");
+   * data_out.build_patches(2);
+   * std::ofstream output("locations.gpl");
+   * data_out.write_gnuplot(output);
+   * @endcode
+   */
+  Vector<double> mark_support_locations();
+
+  /**
+   * Stores the actual location of each support point selected by the @p
+   * add_point(s) method.  This can be used to compare with the point
+   * requested, for example by using the @p Point<dim>::distance function. For
+   * convenience, location is resized to the correct number of points by the
+   * method.
+   */
+  void get_support_locations (std::vector <std::vector<Point <dim> > > &locations);
+
+  /**
+   * @deprecated
+   *
+   * This function only exists for backward compatibility as this is the
+   * interface provided by previous versions of the library. The function
+   * get_support_locations replaces it and reflects the fact that the points
+   * returned are actually the support points.
+   */
+  void get_points (std::vector <std::vector<Point <dim> > > &locations);
+
+  /**
+   * Stores the actual location of the points used by the data_postprocessor.
+   * This can be used to compare with the points requested, for example by
+   * using the @p Point<dim>::distance function. Unlike the support_locations,
+   * these locations are computed every time the evaluate_field method is
+   * called with a postprocessor. This method uses the same algorithm so can
+   * will find the same points. For convenience, location is resized to the
+   * correct number of points by the method.
+   */
+  void get_postprocessor_locations (const Quadrature<dim> &quadrature,
+                                    std::vector<Point <dim> > &locations);
+
+  /**
+   * Once datasets have been added to the class, requests to add additional
+   * points will make the data interpretation unclear. The boolean @p closed
+   * defines a state of the class and ensures this does not happen. Additional
+   * points or vectors can only be added while the class is not closed, and
+   * the class must be closed before datasets can be added or written to file.
+   * @p PointValueHistory::get_points and @p PointValueHistory::status do not
+   * require the class to be closed. If a method that requires a class to be
+   * open or close is called while in the wrong state a @p ExcInvalidState
+   * exception is thrown.
+   */
+  void close();
+
+
+  /**
+   * Delete the lock this object has to the @p DoFHandler used the last time
+   * the class was created.  This method should not normally need to be
+   * called, but can be useful to ensure that the @p DoFHandler is released
+   * before it goes out of scope if the @p PointValueHistory class might live
+   * longer than it. Once this method has been called, the majority of methods
+   * will throw a @p ExcInvalidState exception, so if used this method should
+   * be the last call to the class.
+   */
+  void clear();
+
+  /**
+   * Print useful debugging information about the class, include details about
+   * which support points were selected for each point and sizes of the data
+   * stored.
+   */
+  void status(std::ostream &out);
+
+
+  /**
+   * Check the internal data sizes to test for a loss of data sync. This is
+   * often used in @p Assert statements with the @p ExcDataLostSync exception.
+   * If @p strict is @p false this method returns @p true if all sizes are
+   * within 1 of each other (needed to allow data to be added), with @p strict
+   * = @p true they must be exactly equal.
+   */
+
+  bool deep_check (const bool strict);
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg(ExcNoIndependent,
+                   "A call has been made to push_back_independent() when "
+                   "no independent values were requested.");
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg(ExcDataLostSync,
+                   "This error is thrown to indicate that the data sets appear to be out of "
+                   "sync. The class requires that the number of dataset keys is the same as "
+                   "the number of independent values sets and mesh linked value sets. The "
+                   "number of each of these is allowed to differ by one to allow new values "
+                   "to be added with out restricting the order the user choses to do so. "
+                   "Special cases of no FHandler and no independent values should not "
+                   "trigger this error.");
+
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg(ExcDoFHandlerRequired,
+                   "A method which requires access to a @p DoFHandler to be meaningful has "
+                   "been called when have_dof_handler is false (most likely due to default "
+                   "constructor being called). Only independent variables may be logged with "
+                   "no DoFHandler.");
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg(ExcDoFHandlerChanged,
+                   "The triangulation has been refined or coarsened in some way. This "
+                   "suggests that the internal DoF indices stored by the current "
+                   "object are no longer meaningful.");
+
+private:
+  /**
+   * Stores keys, values on the abscissa. This will often be time, but
+   * possibly time step, iteration etc.
+   */
+  std::vector <double> dataset_key;
+
+  /**
+   * Values that do not depend on grid location.
+   */
+  std::vector <std::vector <double> > independent_values;
+
+  /**
+   * Saves a vector listing component names associated with a
+   * independent_values. This will be an empty vector if the user does not
+   * supplies names.
+   */
+  std::vector<std::string> indep_names;
+
+  /**
+   * Saves data for each mnemonic entry. data_store: mnemonic ->
+   * [point_0_components point_1_components ... point_n-1_components][key]
+   * This format facilitates scalar mnemonics in a vector space, because
+   * scalar mnemonics will only have one component per point. Vector
+   * components are strictly FE.n_components () long.
+   */
+  std::map <std::string, std::vector <std::vector <double> > > data_store;
+
+  /**
+   * Saves a component mask for each mnemonic.
+   */
+  std::map <std::string, ComponentMask> component_mask;
+
+
+  /**
+   * Saves a vector listing component names associated with a mnemonic. This
+   * will be an empty vector if the user does not supplies names.
+   */
+  std::map <std::string, std::vector<std::string> > component_names_map;
+
+  /**
+   * Saves the location and other mesh info about support points.
+   */
+  std::vector <internal::PointValueHistory::PointGeometryData <dim> >
+  point_geometry_data;
+
+
+  /**
+   * Used to enforce @p closed state for some methods.
+   */
+  bool closed;
+
+  /**
+   * Used to enforce @p !cleared state for some methods.
+   */
+  bool cleared;
+
+
+  /**
+   * A smart pointer to the dof_handler supplied to the constructor. This can
+   * be released by calling @p clear().
+   */
+  SmartPointer<const DoFHandler<dim>,PointValueHistory<dim> > dof_handler;
+
+
+  /**
+   * Variable to check if the triangulation has changed. If it has changed,
+   * certain data is out of date (especially the
+   * PointGeometryData::solution_indices.
+   */
+  bool triangulation_changed;
+
+  /**
+   * A boolean to record whether the class was initialized with a DoFHandler
+   * or not.
+   */
+  bool have_dof_handler;
+
+  /**
+   * Used to detect signals from the Triangulation.
+   */
+  boost::signals2::connection tria_listener;
+
+  /**
+   * Stores the number of independent variables requested.
+   */
+  unsigned int n_indep;
+
+
+  /**
+   * A function that will be triggered through signals whenever the
+   * triangulation is modified.
+   *
+   * It is currently used to check if the triangulation has changed,
+   * invalidating precomputed values.
+   */
+  void tria_change_listener ();
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+#endif /* dealii__point_value_history_h */
diff --git a/include/deal.II/numerics/solution_transfer.h b/include/deal.II/numerics/solution_transfer.h
new file mode 100644
index 0000000..524e164
--- /dev/null
+++ b/include/deal.II/numerics/solution_transfer.h
@@ -0,0 +1,505 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__solution_transfer_h
+#define dealii__solution_transfer_h
+
+
+/*----------------------------   solutiontransfer.h     ----------------------*/
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/dofs/dof_handler.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * This class implements the transfer of a discrete FE function (e.g. a
+ * solution vector) from one mesh to another that is obtained from the first
+ * by a single refinement and/or coarsening step. During interpolation the
+ * vector is reinitialized to the new size and filled with the interpolated
+ * values. This class is used in the step-15, step-26, step-31, and step-33
+ * tutorial programs. A version of this class that works on parallel
+ * triangulations is available as parallel::distributed::SolutionTransfer.
+ *
+ * <h3>Usage</h3>
+ *
+ * This class implements the algorithms in two different ways:
+ * <ul>
+ * <li> If the grid will only be refined (i.e. no cells are coarsened) then
+ * use @p SolutionTransfer as follows:
+ * @code
+ * SolutionTransfer<dim, double> soltrans(*dof_handler);
+ *                                     // flag some cells for refinement, e.g.
+ * GridRefinement::refine_and_coarsen_fixed_fraction(
+ *   *tria, error_indicators, 0.3, 0);
+ *                                     // prepare the triangulation
+ *                                     // for refinement,
+ * tria->prepare_coarsening_and_refinement();
+ *                                     // tell the SolutionTransfer object
+ *                                     // that we intend to do pure refinement,
+ * soltrans.prepare_for_pure_refinement();
+ *                                     // actually execute the refinement,
+ * tria->execute_coarsening_and_refinement();
+ *                                     // and redistribute dofs.
+ * dof_handler->distribute_dofs (fe);
+ * @endcode
+ *
+ * Then to proceed do
+ * @code
+ *                                     // take a copy of the solution vector
+ * Vector<double> solution_old(solution);
+ *                                     // resize solution vector to the correct
+ *                                     // size, as the @p refine_interpolate
+ *                                     // function requires the vectors to be
+ *                                     // of right sizes
+ * solution.reinit(dof_handler->n_dofs());
+ *                                     // and finally interpolate
+ * soltrans.refine_interpolate(solution_old, solution);
+ * @endcode
+ *
+ * Although the @p refine_interpolate functions are allowed to be called
+ * multiple times, e.g. for interpolating several solution vectors, there is
+ * following possibility of interpolating several functions simultaneously.
+ * @code
+ * vector<Vector<double> > solutions_old(n_vectors, Vector<double> (n));
+ * ...
+ * vector<Vector<double> > solutions(n_vectors, Vector<double> (n));
+ * soltrans.refine_interpolate(solutions_old, solutions);
+ * @endcode
+ * This is used in several of the tutorial programs, for example step-31.
+ *
+ * <li> If the grid has cells that will be coarsened, then use @p
+ * SolutionTransfer as follows:
+ * @code
+ * SolutionTransfer<dim, Vector<double> > soltrans(*dof_handler);
+ *                                     // flag some cells for refinement
+ *                                     // and coarsening, e.g.
+ * GridRefinement::refine_and_coarsen_fixed_fraction(
+ *   *tria, error_indicators, 0.3, 0.05);
+ *                                     // prepare the triangulation,
+ * tria->prepare_coarsening_and_refinement();
+ *                                     // prepare the SolutionTransfer object
+ *                                     // for coarsening and refinement and give
+ *                                     // the solution vector that we intend to
+ *                                     // interpolate later,
+ * soltrans.prepare_for_coarsening_and_refinement(solution);
+ *                                     // actually execute the refinement,
+ * tria->execute_coarsening_and_refinement ();
+ *                                     // redistribute dofs,
+ * dof_handler->distribute_dofs (fe);
+ *                                     // and interpolate the solution
+ * Vector<double> interpolate_solution(dof_handler->n_dofs());
+ * soltrans.interpolate(solution, interpolated_solution);
+ * @endcode
+ *
+ * Multiple calls to the function <code>interpolate (const Vector<number> &in,
+ * Vector<number> &out)</code> are NOT allowed. Interpolating several
+ * functions can be performed in one step by using <tt>void interpolate (const
+ * vector<Vector<number> >&all_in, vector<Vector<number> >&all_out)
+ * const</tt>, and using the respective @p
+ * prepare_for_coarsening_and_refinement function taking several vectors as
+ * input before actually refining and coarsening the triangulation (see
+ * there).
+ * </ul>
+ *
+ * For deleting all stored data in @p SolutionTransfer and reinitializing it
+ * use the <tt>clear()</tt> function.
+ *
+ * The template argument @p number denotes the data type of the vectors you
+ * want to transfer.
+ *
+ *
+ * <h3>Interpolating in the presence of hanging nodes and boundary values</h3>
+ *
+ * The interpolation onto the new mesh is a local operation, i.e., it
+ * interpolates onto the new mesh only. If that new mesh has hanging nodes,
+ * you will therefore get a solution that does not satisfy hanging node
+ * constraints. The same is true with boundary values: the interpolated
+ * solution will just be the interpolation of the old solution at the
+ * boundary, and this may or may not satisfy boundary values at newly
+ * introduced boundary nodes.
+ *
+ * Consequently, you may have to apply hanging node or boundary value
+ * constraints after interpolation. step-15 and step-26 have examples of
+ * dealing with this.
+ *
+ *
+ * <h3>Implementation</h3>
+ *
+ * <ul>
+ * <li> Solution transfer with only refinement. Assume that we have got a
+ * solution vector on the current (original) grid. Each entry of this vector
+ * belongs to one of the DoFs of the discretization. If we now refine the grid
+ * then the calling of DoFHandler::distribute_dofs() will change at least some
+ * of the DoF indices. Hence we need to store the DoF indices of all active
+ * cells before the refinement. A pointer for each active cell is used to
+ * point to the vector of these DoF indices of that cell. This is done by
+ * prepare_for_pure_refinement().
+ *
+ * In the function <tt>refine_interpolate(in,out)</tt> and on each cell where
+ * the pointer is set (i.e. the cells that were active in the original grid)
+ * we can now access the local values of the solution vector @p in on that
+ * cell by using the stored DoF indices. These local values are interpolated
+ * and set into the vector @p out that is at the end the discrete function @p
+ * in interpolated on the refined mesh.
+ *
+ * The <tt>refine_interpolate(in,out)</tt> function can be called multiple
+ * times for arbitrary many discrete functions (solution vectors) on the
+ * original grid.
+ *
+ * <li> Solution transfer with coarsening and refinement. After calling
+ * Triangulation::prepare_coarsening_and_refinement the coarsen flags of
+ * either all or none of the children of a (father-)cell are set. While
+ * coarsening (Triangulation::execute_coarsening_and_refinement) the cells
+ * that are not needed any more will be deleted from the Triangulation.
+ *
+ * For the interpolation from the (to be coarsenend) children to their father
+ * the children cells are needed. Hence this interpolation and the storing of
+ * the interpolated values of each of the discrete functions that we want to
+ * interpolate needs to take place before these children cells are coarsened
+ * (and deleted!!). Again a pointers for the relevant cells is set to point to
+ * these values (see below). Additionally the DoF indices of the cells that
+ * will not be coarsened need to be stored according to the solution transfer
+ * while pure refinement (cf there). All this is performed by
+ * <tt>prepare_for_coarsening_and_refinement(all_in)</tt> where the
+ * <tt>vector<Vector<number> >vector all_in</tt> includes all discrete
+ * functions to be interpolated onto the new grid.
+ *
+ * As we need two different kinds of pointers (<tt>vector<unsigned int> *</tt>
+ * for the Dof indices and <tt>vector<Vector<number> > *</tt> for the
+ * interpolated DoF values) we use the @p Pointerstruct that includes both of
+ * these pointers and the pointer for each cell points to these @p
+ * Pointerstructs. On each cell only one of the two different pointers is used
+ * at one time hence we could use a <tt>void * pointer</tt> as
+ * <tt>vector<unsigned int> *</tt> at one time and as
+ * <tt>vector<Vector<number> > *</tt> at the other but using this @p
+ * Pointerstruct in between makes the use of these pointers more safe and
+ * gives better possibility to expand their usage.
+ *
+ * In <tt>interpolate(all_in, all_out)</tt> the refined cells are treated
+ * according to the solution transfer while pure refinement. Additionally, on
+ * each cell that is coarsened (hence previously was a father cell), the
+ * values of the discrete functions in @p all_out are set to the stored local
+ * interpolated values that are accessible due to the 'vector<Vector<number> >
+ * *' pointer in @p Pointerstruct that is pointed to by the pointer of that
+ * cell. It is clear that <tt>interpolate(all_in, all_out)</tt> only can be
+ * called with the <tt>vector<Vector<number> > all_in</tt> that previously was
+ * the parameter of the <tt>prepare_for_coarsening_and_refinement(all_in)</tt>
+ * function. Hence <tt>interpolate(all_in, all_out)</tt> can (in contrast to
+ * <tt>refine_interpolate(in, out)</tt>) only be called once.
+ * </ul>
+ *
+ *
+ * <h3>Interaction with hanging nodes</h3>
+ *
+ * This class does its best to represent on the new mesh the finite element
+ * function that existed on the old mesh, but this may lead to situations
+ * where the function on the new mesh is no longer conforming at hanging
+ * nodes. To this end, consider a situation of a twice refined mesh that
+ * started with a single square cell (i.e., we now have 16 cells). Consider
+ * also that we coarsen 4 of the cells back to the first refinement level. In
+ * this case, we end up with a mesh that will look as follows if we were to
+ * use a $Q_1$ element:
+ *
+ * @image html hanging_nodes.png ""
+ *
+ * The process of interpolating from the old to the new mesh would imply that
+ * the values of the finite element function will not change on all of the
+ * cells that remained as they are (i.e., the fine cells) but that on the
+ * coarse cell at the top right, the four values at the vertices are obtained
+ * by interpolating down from its former children.  If the original function
+ * was not linear, this implies that the marked hanging nodes will retain
+ * their old values which, in general, will not lead to a continuous function
+ * along the corresponding edges. In other words, the solution vector obtained
+ * after SolutionTransfer::interpolate() does not satisfy hanging node
+ * constraints: it corresponds to the pointwise interpolation, but not to the
+ * interpolation <i>onto the new finite element space that contains
+ * constraints from hanging nodes</i>.
+ *
+ * Whether this is a problem you need to worry about or not depends on your
+ * application. The situation is easily corrected, of course, by applying
+ * ConstraintMatrix::distribute() to your solution vector after transfer,
+ * using a constraint matrix object computed on the new DoFHandler object (you
+ * probably need to create this object anyway if you have hanging nodes). This
+ * is also what is done, for example, in step-15.
+ *
+ * @note This situation can only happen if you do coarsening. If all cells
+ * remain as they are or are refined, then SolutionTransfer::interpolate()
+ * computes a new vector of nodel values, but the function represented is of
+ * course exactly the same because the old finite element space is a subspace
+ * of the new one. Thus, if the old function was conforming (i.e., satisfied
+ * hanging node constraints), then so does the new one, and it is not
+ * necessary to call ConstraintMatrix::distribute().
+ *
+ *
+ * <h3>Implementation in the context of hp finite elements</h3>
+ *
+ * In the case of hp::DoFHandlers, nothing defines which of the finite
+ * elements that are part of the hp::FECollection associated with the DoF
+ * handler, should be considered on cells that are not active (i.e., that have
+ * children). This is because degrees of freedom are only allocated for active
+ * cells and, in fact, it is not allowed to set an active_fe_index on non-
+ * active cells using DoFAccessor::set_active_fe_index().
+ *
+ * It is, thus, not entirely natural what should happen if, for example, a few
+ * cells are coarsened away. This class then implements the following
+ * algorithm: - If a cell is refined, then the values of the solution
+ * vector(s) are saved before refinement on the to-be-refined cell and in the
+ * space associated with this cell. These values are then interpolated to the
+ * finite element spaces of the children post-refinement. This may lose
+ * information if, for example, the old cell used a Q2 space and the children
+ * use Q1 spaces, or the information may be prolonged if the mother cell used
+ * a Q1 space and the children are Q2s. - If cells are to be coarsened, then
+ * the values from the child cells are interpolated to the mother cell using
+ * the largest of the child cell spaces. For example, if the children of a
+ * cell use Q1, Q2 and Q3 spaces, then the values from the children are
+ * interpolated into a Q3 space on the mother cell. After refinement, this Q3
+ * function on the mother cell is then interpolated into the space the user
+ * has selected for this cell (which may be different from Q3, in this
+ * example, if the user has set the active_fe_index for a different space
+ * post-refinement and before calling hp::DoFHandler::distribute_dofs()).
+ *
+ * @note In the context of hp refinement, if cells are coarsened or the
+ * polynomial degree is lowered on some cells, then the old finite element
+ * space is not a subspace of the new space and you may run into the same
+ * situation as discussed above with hanging nodes. You may want to consider
+ * calling ConstraintMatrix::distribute() on the vector obtained by
+ * transfering the solution.
+ *
+ * @ingroup numerics
+ * @author Ralf Hartmann, 1999, Oliver Kayser-Herold and Wolfgang Bangerth,
+ * 2006, Wolfgang Bangerth 2014
+ */
+template<int dim, typename VectorType=Vector<double>,
+         typename DoFHandlerType=DoFHandler<dim> >
+class SolutionTransfer
+{
+public:
+
+  /**
+   * Constructor, takes the current DoFHandler as argument.
+   */
+  SolutionTransfer(const DoFHandlerType &dof);
+
+  /**
+   * Destructor
+   */
+  ~SolutionTransfer();
+
+  /**
+   * Reinit this class to the state that it has directly after calling the
+   * Constructor
+   */
+  void clear();
+
+  /**
+   * Prepares the @p SolutionTransfer for pure refinement. It stores the dof
+   * indices of each cell. After calling this function only calling the @p
+   * refine_interpolate functions is allowed.
+   */
+  void prepare_for_pure_refinement();
+
+  /**
+   * Prepares the @p SolutionTransfer for coarsening and refinement. It stores
+   * the dof indices of each cell and stores the dof values of the vectors in
+   * @p all_in in each cell that'll be coarsened. @p all_in includes all
+   * vectors that are to be interpolated onto the new (refined and/or
+   * coarsenend) grid.
+   */
+  void prepare_for_coarsening_and_refinement (const std::vector<VectorType> &all_in);
+
+  /**
+   * Same as previous function but for only one discrete function to be
+   * interpolated.
+   */
+  void prepare_for_coarsening_and_refinement (const VectorType &in);
+
+  /**
+   * This function interpolates the discrete function @p in, which is a vector
+   * on the grid before the refinement, to the function @p out which then is a
+   * vector on the refined grid. It assumes the vectors having the right sizes
+   * (i.e. <tt>in.size()==n_dofs_old</tt>,
+   * <tt>out.size()==n_dofs_refined</tt>)
+   *
+   * Calling this function is allowed only if @p prepare_for_pure_refinement
+   * is called and the refinement is executed before. Multiple calling of this
+   * function is allowed. e.g. for interpolating several functions.
+   */
+  void refine_interpolate (const VectorType &in,
+                           VectorType       &out) const;
+
+  /**
+   * This function interpolates the discrete functions that are stored in @p
+   * all_in onto the refined and/or coarsenend grid. It assumes the vectors in
+   * @p all_in denote the same vectors as in @p all_in as parameter of
+   * <tt>prepare_for_refinement_and_coarsening(all_in)</tt>. However, there is
+   * no way of verifying this internally, so be careful here.
+   *
+   * Calling this function is allowed only if first
+   * Triangulation::prepare_coarsening_and_refinement, second @p
+   * SolutionTransfer::prepare_for_coarsening_and_refinement, an then third
+   * Triangulation::execute_coarsening_and_refinement are called before.
+   * Multiple calling of this function is NOT allowed. Interpolating several
+   * functions can be performed in one step.
+   *
+   * The number of output vectors is assumed to be the same as the number of
+   * input vectors. Also, the sizes of the output vectors are assumed to be of
+   * the right size (@p n_dofs_refined). Otherwise an assertion will be
+   * thrown.
+   */
+  void interpolate (const std::vector<VectorType> &all_in,
+                    std::vector<VectorType>       &all_out) const;
+
+  /**
+   * Same as the previous function. It interpolates only one function. It
+   * assumes the vectors having the right sizes (i.e.
+   * <tt>in.size()==n_dofs_old</tt>, <tt>out.size()==n_dofs_refined</tt>)
+   *
+   * Multiple calling of this function is NOT allowed. Interpolating several
+   * functions can be performed in one step by using <tt>interpolate (all_in,
+   * all_out)</tt>
+   */
+  void interpolate (const VectorType &in,
+                    VectorType       &out) const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg(ExcNotPrepared,
+                   "You are attempting an operation for which this object is "
+                   "not prepared. This may be because you either did not call "
+                   "one of the prepare_*() functions at all, or because you "
+                   "called the wrong one for the operation you are currently "
+                   "attempting.");
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg(ExcAlreadyPrepForRef,
+                   "You are attempting to call one of the prepare_*() functions "
+                   "of this object to prepare it for an operation for which it "
+                   "is already prepared. Specifically, the object was "
+                   "previously prepared for pure refinement.");
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg(ExcAlreadyPrepForCoarseAndRef,
+                   "You are attempting to call one of the prepare_*() functions "
+                   "of this object to prepare it for an operation for which it "
+                   "is already prepared. Specifically, the object was "
+                   "previously prepared for both coarsening and refinement.");
+
+private:
+
+  /**
+   * Pointer to the degree of freedom handler to work with.
+   */
+  SmartPointer<const DoFHandlerType, SolutionTransfer<dim,VectorType, DoFHandlerType> > dof_handler;
+
+  /**
+   * Stores the number of DoFs before the refinement and/or coarsening.
+   */
+  types::global_dof_index n_dofs_old;
+
+  /**
+   * Declaration of @p PreparationState that denotes the three possible states
+   * of the @p SolutionTransfer: being prepared for 'pure refinement',
+   * prepared for 'coarsening and refinement' or not prepared.
+   */
+  enum PreparationState
+  {
+    none, pure_refinement, coarsening_and_refinement
+  };
+
+  /**
+   * Definition of the respective variable.
+   */
+  PreparationState prepared_for;
+
+
+  /**
+   * Is used for @p prepare_for_refining (of course also for @p
+   * repare_for_refining_and_coarsening) and stores all dof indices of the
+   * cells that'll be refined
+   */
+  std::vector<std::vector<types::global_dof_index> > indices_on_cell;
+
+  /**
+   * All cell data (the dof indices and the dof values) should be accessible
+   * from each cell. As each cell has got only one @p user_pointer, multiple
+   * pointers to the data need to be packetized in a structure. Note that in
+   * our case on each cell either the <tt>vector<unsigned int> indices</tt>
+   * (if the cell will be refined) or the <tt>vector<double> dof_values</tt>
+   * (if the children of this cell will be deleted) is needed, hence one @p
+   * user_pointer should be sufficient, but to allow some error checks and to
+   * preserve the user from making user errors the @p user_pointer will be
+   * 'multiplied' by this structure.
+   */
+  struct Pointerstruct
+  {
+    Pointerstruct() : indices_ptr(0), dof_values_ptr(0), active_fe_index(0) {};
+    Pointerstruct(std::vector<types::global_dof_index> *indices_ptr_in,
+                  const unsigned int active_fe_index_in = 0)
+      :
+      indices_ptr(indices_ptr_in),
+      dof_values_ptr (0),
+      active_fe_index(active_fe_index_in) {};
+    Pointerstruct(std::vector<Vector<typename VectorType::value_type> > *dof_values_ptr_in,
+                  const unsigned int active_fe_index_in = 0) :
+      indices_ptr (0),
+      dof_values_ptr(dof_values_ptr_in),
+      active_fe_index(active_fe_index_in) {};
+    std::size_t memory_consumption () const;
+
+    std::vector<types::global_dof_index>    *indices_ptr;
+    std::vector<Vector<typename VectorType::value_type> > *dof_values_ptr;
+    unsigned int active_fe_index;
+  };
+
+  /**
+   * Map mapping from level and index of cell to the @p Pointerstructs (cf.
+   * there). This map makes it possible to keep all the information needed to
+   * transfer the solution inside this object rather than using user pointers
+   * of the Triangulation for this purpose.
+   */
+  std::map<std::pair<unsigned int, unsigned int>, Pointerstruct> cell_map;
+
+  /**
+   * Is used for @p prepare_for_refining_and_coarsening The interpolated dof
+   * values of all cells that'll be coarsened will be stored in this vector.
+   */
+  std::vector<std::vector<Vector<typename VectorType::value_type> > > dof_values_on_cell;
+};
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+/*----------------------------   solutiontransfer.h     ---------------------------*/
+#endif
+/*----------------------------   solutiontransfer.h     ---------------------------*/
diff --git a/include/deal.II/numerics/time_dependent.h b/include/deal.II/numerics/time_dependent.h
new file mode 100644
index 0000000..79dae77
--- /dev/null
+++ b/include/deal.II/numerics/time_dependent.h
@@ -0,0 +1,1586 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__time_dependent_h
+#define dealii__time_dependent_h
+
+
+/*----------------------------   time-dependent.h     ---------------------------*/
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/smartpointer.h>
+
+#include <vector>
+#include <utility>
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+class TimeStepBase;
+template <typename number> class Vector;
+template <int dim, int spacedim> class Triangulation;
+
+/**
+ * This class provides an abstract interface to time dependent problems in
+ * that it addresses some of the most annoying aspects of this class of
+ * problems: data management. These problems frequently need large amounts of
+ * computer resources, most notably computing time, main memory and disk
+ * space. Main memory reduction is often the most pressing need, methods to
+ * implement it are almost always quite messy, though, quickly leading to code
+ * that stores and reloads data at places scattered all over the program, and
+ * which becomes unmaintainable sometimes. The present class tries to offer a
+ * more structured interface, albeit simple, which emerged in my mind after
+ * messing with my wave equation simulation for several months.
+ *
+ * The design of this class is mostly tailored for the solution of time
+ * dependent partial differential equations where the computational meshes may
+ * differ between each two timesteps and where the computations on each time
+ * step take a rather long time compared with the overhead of this class.
+ * Since no reference to the class of problems is made within this class, it
+ * is not restricted to PDEs, though, and it seems likely that a solver for
+ * large ordinary matrix differential equations may successfully use the same
+ * setup and therefore this class.
+ *
+ *
+ * <h3>Overview</h3>
+ *
+ * The general structure of a time dependent problem solver using a
+ * timestepping scheme is about the following: we have a collection of time
+ * step objects on which we solve our problem subsequently. In order to do so,
+ * we need knowledge of the data on zero or several previous timesteps (when
+ * using single or multiple step methods, that is) and maybe also some data of
+ * time steps ahead (for example the computational grid on these). Depending
+ * on the problem in question, a second loop over all timesteps may be done
+ * solving a dual problem, where the loop may run forward (one dual problem
+ * for each time step) or backward (using a global dual problem). Within one
+ * of these loops or using a separate loop, error estimators may be computed
+ * and the grids may be refined. Each of these loops are initiated by a call
+ * preparing each timestep object for the next loop, before actually starting
+ * the loop itself.
+ *
+ * We will denote a complete set of all these loops with the term "sweep".
+ * Since this library is mostly about adaptive methods, it is likely that the
+ * last loop within a sweep will generate refined meshes and that we will
+ * perform another sweep on these refined meshes. A total run will therefore
+ * often be a sequence of several sweeps. The global setup therefore looks
+ * like this:
+ * @verbatim
+ *    for sweep=0 to n_sweeps-1
+ *    {
+ *      for i=0 to n_timesteps-1
+ *        initialize timestep i for this sweep, e.g. for setting up
+ *        data structures, creating temporary files, etc.
+ *
+ *      for i=0 to n_timesteps-1
+ *        prepare timestep i for loop 0
+ *      for i=0 to n_timesteps-1
+ *        perform loop 0 on timestep i   (e.g. solve primal problem)
+ *
+ *      for i=0 to n_timesteps-1
+ *        prepare timestep i for loop 1
+ *      for i=0 to n_timesteps-1
+ *        perform loop 1 on timestep i   (e.g. solve dual problem)
+ *
+ *      for i=0 to n_timesteps-1
+ *        prepare timestep i for loop 2
+ *      for i=0 to n_timesteps-1
+ *        perform loop 2 on timestep i   (e.g. compute error information)
+ *
+ *      ...
+ *
+ *      for i=0 to n_timesteps-1
+ *        notify timestep i of the end of the sweep, e.g. for cleanups,
+ *        deletion of temporary files, etc.
+ *    }
+ * @endverbatim
+ * The user may specify that a loop shall run forward or backward (the latter
+ * being needed for the solution of global dual problems, for example).
+ *
+ * Going from the global overview to a more local viewpoint, we note that when
+ * a loop visits one timestep (e.g. to solve the primal or dual problem, or to
+ * compute error information), we need information on this, one or more
+ * previous time steps and zero or more timesteps in the future. However,
+ * often it is not needed to know all information from these timesteps and it
+ * is often a computational requirement to delete data at the first possible
+ * time when it is no more needed. Likewise, data should be reloaded at the
+ * latest time possible.
+ *
+ * In order to facilitate these principles, the concept of waking up and
+ * letting sleep a time step object was developed. Assume we have a time
+ * stepping scheme which needs to look ahead one time step and needs the data
+ * of the last two time steps, the following pseudocode described what the
+ * centeral loop function of this class will do when we move from timestep @p
+ * n-1 to timestep @p n:
+ * @verbatim
+ *   wake up timestep n+1 with signal 1
+ *   wake up timestep n with signal 0
+ *   do computation on timestep n
+ *   let timestep n sleep with signal 0
+ *   let timestep n-1 sleep with signal 1
+ *   let timestep n-2 sleep with signal 2
+ *
+ *   move from n to n+1
+ * @endverbatim
+ * The signal number here denotes the distance of the timestep being sent the
+ * signal to the timestep where computations are done on. The calls to the @p
+ * wake_up and @p sleep functions with signal 0 could in principle be absorbed
+ * into the function doing the computation; we use these redundant signals,
+ * however, in order to separate computations and data management from each
+ * other, allowing to put all stuff around grid management, data reload and
+ * storage into one set of functions and computations into another.
+ *
+ * In the example above, possible actions might be: timestep <tt>n+1</tt>
+ * rebuilds the computational grid (there is a specialized class which can do
+ * this for you); timestep @p n builds matrices sets solution vectors to the
+ * right size, maybe using an initial guess; then it does the computations;
+ * then it deletes the matrices since they are not needed by subsequent
+ * timesteps; timestep @p n-1 deletes those data vectors which are only needed
+ * by one timestep ahead; timestep @p n-2 deletes the remaining vectors and
+ * deletes the computational grid, somewhere storing information how to
+ * rebuild it eventually.
+ *
+ * From the given sketch above, it is clear that each time step object sees
+ * the following sequence of events:
+ * @verbatim
+ *   wake up with signal 1
+ *   wake up signal 0
+ *   do computation
+ *   sleep with signal 0
+ *   sleep with signal 1
+ *   sleep with signal 2
+ * @endverbatim
+ * This pattern is repeated for each loop in each sweep.
+ *
+ * For the different loops within each sweep, the numbers of timesteps to look
+ * ahead (i.e. the maximum signal number to the @p wake_up function) and the
+ * look-behind (i.e. the maximum signal number to the @p sleep function) can
+ * be chosen separately. For example, it is usually only needed to look one
+ * time step behind when computing error estimation (in some cases, it may
+ * vene be possible to not look ahead or back at all, in which case only
+ * signals zero will be sent), while one needs a look back of at least one for
+ * a timestepping method.
+ *
+ * Finally, a note on the direction of look-ahead and look-back is in place:
+ * look-ahead always refers to the direction the loop is running in, i.e. for
+ * loops running forward, @p wake_up is called for timestep objects with a
+ * greater time value than the one previously computed on, while @p sleep is
+ * called for timesteps with a lower time. If the loop runs in the opposite
+ * direction, e.g. when solving a global dual problem, this order is reversed.
+ *
+ *
+ * <h3>Implementation</h3>
+ *
+ * The main loop of a program using this class will usually look like the
+ * following one, taken modified from an application program that isn't
+ * distributed as part of the library:
+ * @code
+ *   template <int dim>
+ *   void TimeDependent_Wave<dim>::run_sweep (const unsigned int sweep_no)
+ *   {
+ *     start_sweep (sweep_no);
+ *
+ *     solve_primal_problem ();
+ *
+ *     if (compute_dual_problem)
+ *       solve_dual_problem ();
+ *
+ *     postprocess ();
+ *
+ *     if (sweep_no != number_of_sweeps-1)
+ *       refine_grids ();
+ *
+ *     write_statistics ();
+ *
+ *     end_sweep ();
+ *   };
+ *
+ *
+ *
+ *   template <int dim>
+ *   void WaveProblem<dim>::run ()
+ *   {
+ *     for (unsigned int sweep=0; sweep<number_of_sweeps; ++sweep)
+ *       timestep_manager.run_sweep (sweep);
+ *   };
+ * @endcode
+ * Here, @p timestep_manager is an object of type TimeDependent_Wave(), which
+ * is a class derived from TimeDependent. @p start_sweep, @p
+ * solve_primal_problem, @p solve_dual_problem, @p postprocess and @p
+ * end_sweep are functions inherited from this class. They all do a loop over
+ * all timesteps within this object and call the respective function on each
+ * of these objects. For example, here are two of the functions as they are
+ * implemented by the library:
+ * @code
+ *   void TimeDependent::start_sweep (const unsigned int s)
+ *   {
+ *     sweep_no = s;
+ *
+ *                                 // reset the number each
+ *                                 // time step has, since some time
+ *                                 // steps might have been added since
+ *                                 // the last time we visited them
+ *                                 //
+ *                                 // also set the sweep we will
+ *                                 // process in the sequel
+ *     for (unsigned int step=0; step<timesteps.size(); ++step)
+ *       {
+ *         timesteps[step]->set_timestep_no (step);
+ *         timesteps[step]->set_sweep_no (sweep_no);
+ *       };
+ *
+ *     for (unsigned int step=0; step<timesteps.size(); ++step)
+ *       timesteps[step]->start_sweep ();
+ *   };
+ *
+ *
+ *   void
+ *   TimeDependent::solve_primal_problem ()
+ *   {
+ *     do_loop (mem_fun(&TimeStepBase::init_for_primal_problem),
+ *              mem_fun(&TimeStepBase::solve_primal_problem),
+ *              timestepping_data_primal,
+ *              forward);
+ *   };
+ * @endcode
+ * The latter function shows rather clear how most of the loops are invoked
+ * (@p solve_primal_problem, @p solve_dual_problem, @p postprocess, @p
+ * refine_grids and @p write_statistics all have this form, where the latter
+ * two give functions of the derived timestep class, rather than from the base
+ * class). The function TimeStepBase::init_for_primal_problem and the
+ * respective ones for the other operations defined by that class are only
+ * used to store the type of operation which the loop presently performed will
+ * do.
+ *
+ * As can be seen, most of the work is done by the @p do_loop function of this
+ * class, which takes the addresses of two functions which are used to
+ * initialize all timestep objects for the loop and to actually perform some
+ * action. The next parameter gives some information on the look-ahead and
+ * look-back and the last one denotes in which direction the loop is to be
+ * run.
+ *
+ * Using function pointers through the @p mem_fun functions provided by the
+ * <tt>C++</tt> standard library, it is possible to do neat tricks, like the
+ * following, also taken from the wave program, in this case from the function
+ * @p refine_grids:
+ * @code
+ *   ...
+ *   compute the thresholds for refinement
+ *   ...
+ *
+ *   do_loop (mem_fun (&TimeStepBase_Tria<dim>::init_for_refinement),
+ *            bind2nd (mem_fun1 (&TimeStepBase_Wave<dim>::refine_grid),
+ *                     TimeStepBase_Tria<dim>::RefinementData (top_threshold,
+ *                                                             bottom_threshold)),
+ *            TimeDependent::TimeSteppingData (0,1),
+ *            TimeDependent::forward);
+ * @endcode
+ * TimeStepBase_Wave::refine_grid is a function taking an argument, unlike all
+ * the other functions used above within the loops. However, in this special
+ * case the parameter was the same for all timesteps and known before the loop
+ * was started, so we fixed it and made a function object which to the outside
+ * world does not take parameters.
+ *
+ * Since it is the central function of this class, we finally present a
+ * stripped down version of the @p do_loop method, which is shown in order to
+ * provide a better understanding of the internals of this class. For brevity
+ * we have omitted the parts that deal with backward running loops as well as
+ * the checks whether wake-up and sleep operations act on timesteps outside
+ * <tt>0..n_timesteps-1</tt>.
+ * @code
+ *   template <typename InitFunctionObject, typename LoopFunctionObject>
+ *   void TimeDependent::do_loop (InitFunctionObject      init_function,
+ *                           LoopFunctionObject      loop_function,
+ *                           const TimeSteppingData &timestepping_data,
+ *                           const Direction         direction)
+ *   {
+ *                                 // initialize the time steps for
+ *                                 // a round of this loop
+ *     for (unsigned int step=0; step<n_timesteps; ++step)
+ *       init_function (static_cast<typename InitFunctionObject::argument_type>
+ *                 (timesteps[step]));
+ *
+ *                                 // wake up the first few time levels
+ *     for (int step=-timestepping_data.look_ahead; step<0; ++step)
+ *       for (int look_ahead=0; look_ahead<=timestepping_data.look_ahead; ++look_ahead)
+ *         timesteps[step+look_ahead]->wake_up(look_ahead);
+ *
+ *
+ *     for (unsigned int step=0; step<n_timesteps; ++step)
+ *       {
+ *                                     // first thing: wake up the
+ *                                     // timesteps ahead as necessary
+ *         for (unsigned int look_ahead=0;
+ *         look_ahead<=timestepping_data.look_ahead; ++look_ahead)
+ *      timesteps[step+look_ahead]->wake_up(look_ahead);
+ *
+ *
+ *                                     // actually do the work
+ *         loop_function (static_cast<typename LoopFunctionObject::argument_type>
+ *                   (timesteps[step]));
+ *
+ *                                     // let the timesteps behind sleep
+ *         for (unsigned int look_back=0; look_back<=timestepping_data.look_back; ++look_back)
+ *      timesteps[step-look_back]->sleep(look_back);
+ *       };
+ *
+ *                                 // make the last few timesteps sleep
+ *     for (int step=n_timesteps; n_timesteps+timestepping_data.look_back; ++step)
+ *       for (int look_back=0; look_back<=timestepping_data.look_back; ++look_back)
+ *         timesteps[step-look_back]->sleep(look_back);
+ *   };
+ * @endcode
+ *
+ *
+ * @author Wolfgang Bangerth, 1999
+ */
+class TimeDependent
+{
+public:
+  /**
+   * Structure holding the two basic entities that control a loop over all
+   * time steps: how many time steps ahead of the present one we shall start
+   * waking up timestep objects and how many timesteps behind we shall call
+   * their @p sleep method.
+   */
+  struct TimeSteppingData
+  {
+    /**
+     * Constructor; see the different fields for a description of the meaning
+     * of the parameters.
+     */
+    TimeSteppingData (const unsigned int look_ahead,
+                      const unsigned int look_back);
+
+    /**
+     * This denotes the number of timesteps the timestepping algorithm needs
+     * to look ahead. Usually, this number will be zero, since algorithms
+     * looking ahead can't act as timestepping schemes since they can't
+     * compute their data from knowledge of the past only and are therefore
+     * global in time.
+     *
+     * However, it may be necessary to look ahead in other circumstances, when
+     * not wanting to access the data of the next time step(s), but for
+     * example to know the next grid, the solution of a dual problem on the
+     * next time level, etc.
+     *
+     * Note that for a dual problem walking back in time, "looking ahead"
+     * means looking towards smaller time values.
+     *
+     * The value of this number determines, how many time steps ahead the time
+     * step manager start to call the @p wake_up function for each time step.
+     */
+    const unsigned int look_ahead;
+
+    /**
+     * This is the opposite variable to the above one. It denotes the number
+     * of time steps behind the present one for which we need to keep all data
+     * in order to do the computations on the present time level.
+     *
+     * For one step schemes (e.g. the Euler schemes, or the Crank-Nicolson
+     * scheme), this value will be one.
+     *
+     * The value of this number determines, how many time steps after having
+     * done computations on a tim level the time step manager will call the @p
+     * sleep function for each time step.
+     */
+    const unsigned int look_back;
+  };
+
+  /**
+   * Enum offering the different directions in which a loop executed by @p
+   * do_loop may be run.
+   */
+  enum Direction
+  {
+    forward, backward
+  };
+
+  /**
+   * Constructor.
+   */
+  TimeDependent (const TimeSteppingData &data_primal,
+                 const TimeSteppingData &data_dual,
+                 const TimeSteppingData &data_postprocess);
+
+
+  /**
+   * Destructor. This will delete the objects pointed to by the pointers given
+   * to the <tt>insert_*</tt> and @p add_timestep functions, i.e. it will
+   * delete the objects doing the computations on each time step.
+   */
+  virtual ~TimeDependent ();
+
+  /**
+   * Add a timestep at any position. The position is a pointer to an existing
+   * time step object, or a null pointer denoting the end of the timestep
+   * sequence. If @p position is non-null, the new time step will be inserted
+   * before the respective element.
+   *
+   * Note that by giving an object to this function, the TimeDependent object
+   * assumes ownership of the object; it will therefore also take care of
+   * deletion of the objects its manages.
+   *
+   * There is another function, @p add_timestep, which inserts a time step at
+   * the end of the list.
+   *
+   * Note that this function does not change the timestep numbers stored
+   * within the other timestep objects, nor does it set the timestep number of
+   * this new timestep. This is only done upon calling the @p start_sweep
+   * function. In not changing the timestep numbers, it is simpler to operate
+   * on a space-time triangulation since one can always use the timestep
+   * numbers that were used in the previous sweep.
+   */
+  void insert_timestep (const TimeStepBase *position,
+                        TimeStepBase       *new_timestep);
+
+  /**
+   * Just like @p insert_timestep, but insert at the end.
+   *
+   * This mechanism usually will result in a set-up loop like this
+   * @code
+   * for (i=0; i<N; ++i)
+   *   manager.add_timestep(new MyTimeStep());
+   * @endcode
+   */
+  void add_timestep (TimeStepBase *new_timestep);
+
+  /**
+   * Delete a timestep. This is only necessary to call, if you want to delete
+   * it between two sweeps; at the end of the lifetime of this object, care is
+   * taken automatically of deletion of the time step objects. Deletion of the
+   * object by the destructor is done through this function also.
+   *
+   * Note that this function does not change the timestep numbers stored
+   * within the other timestep objects. This is only done upon calling the @p
+   * start_sweep function. In not changing the timestep numbers, it is simpler
+   * to operate on a space-time triangulation since one can always use the
+   * timestep numbers that were used in the previous sweep.
+   */
+  void delete_timestep (const unsigned int position);
+
+  /**
+   * Solve the primal problem; uses the functions @p init_for_primal_problem
+   * and @p solve_primal_problem of the TimeStepBase class through the @p
+   * do_loop function of this class.
+   *
+   * Look ahead and look back are determined by the @p
+   * timestepping_data_primal object given to the constructor.
+   */
+  void solve_primal_problem ();
+
+  /**
+   * Solve the dual problem; uses the functions @p init_for_dual_problem and
+   * @p solve_dual_problem of the TimeStepBase class through the @p do_loop
+   * function of this class.
+   *
+   * Look ahead and look back are determined by the @p timestepping_data_dual
+   * object given to the constructor.
+   */
+  void solve_dual_problem ();
+
+  /**
+   * Do a postprocessing round; uses the functions @p init_for_postprocessing
+   * and @p postprocess of the TimeStepBase class through the @p do_loop
+   * function of this class.
+   *
+   * Look ahead and look back are determined by the @p
+   * timestepping_data_postprocess object given to the constructor.
+   */
+  void postprocess ();
+
+  /**
+   * Do a loop over all timesteps, call the @p init_function at the beginning
+   * and the @p loop_function of each time step. The @p timestepping_data
+   * determine how many timesteps in front and behind the present one the @p
+   * wake_up and @p sleep functions are called.
+   *
+   * To see how this function work, note that the function @p
+   * solve_primal_problem only consists of a call to <tt>do_loop
+   * (mem_fun(&TimeStepBase::init_for_primal_problem),
+   * mem_fun(&TimeStepBase::solve_primal_problem), timestepping_data_primal,
+   * forward);</tt>.
+   *
+   * Note also, that the given class from which the two functions are taken
+   * needs not necessarily be TimeStepBase, but it could also be a derived
+   * class, that is @p static_castable from a TimeStepBase. The function may
+   * be a virtual function (even a pure one) of that class, which should help
+   * if the actual class where it is implemented is one which is derived
+   * through virtual base classes and thus unreachable by @p static_cast from
+   * the TimeStepBase class.
+   *
+   * Instead of using the above form, you can equally well use
+   * <tt>bind2nd(mem_fun1(&X::unary_function), arg)</tt> which lets the @p
+   * do_loop function call the given function with the specified parameter.
+   * Note that you need to bind the second parameter since the first one
+   * implicitly contains the object which the function is to be called for.
+   */
+  template <typename InitFunctionObject, typename LoopFunctionObject>
+  void do_loop (InitFunctionObject      init_function,
+                LoopFunctionObject      loop_function,
+                const TimeSteppingData &timestepping_data,
+                const Direction         direction);
+
+
+  /**
+   * Initialize the objects for the next sweep. This function specifically
+   * does the following: assign each time level the number it presently has
+   * within the array (which may change, if time levels are inserted or
+   * deleted) and transmit the number of the present sweep to these objects.
+   *
+   * It also calls the @p start_sweep function of each time step object, after
+   * the numbers above are set.
+   *
+   * This function is virtual, so you may overload it. You should, however not
+   * forget to call this function as well from your overwritten version, at
+   * best at the beginning of your function since this is some kind of
+   * "constructor-like" function, which should be called bottom-up.
+   *
+   * The default implementation of this function calls @p start_sweep on all
+   * time step objects.
+   */
+  virtual void start_sweep (const unsigned int sweep_no);
+
+  /**
+   * Analogon to the above function, calling @p end_sweep of each time step
+   * object. The same applies with respect to the @p virtualness of this
+   * function as for the previous one.
+   *
+   * @note This function does not guarantee that @p end_sweep is called for
+   * successive time steps successively, rather the order of time step objects
+   * for which the function is called is arbitrary. You should therefore not
+   * assume that that function has been called for previous time steps
+   * already. If in multithread mode, the @p end_sweep function of several
+   * time steps may be called at once, so you should use synchronization
+   * mechanisms if your program requires so.
+   */
+  virtual void end_sweep ();
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   */
+  std::size_t memory_consumption () const;
+
+  /**
+   * Exception.
+   */
+  DeclExceptionMsg (ExcInvalidPosition,
+                    "You cannot insert a time step at the specified position.");
+
+protected:
+  /**
+   * Vector holding pointers to the time level objects. This is the main data
+   * this object operates on. Note that this object takes possession of the
+   * objects pointed to by the pointers in this collection.
+   */
+  std::vector<SmartPointer<TimeStepBase,TimeDependent> > timesteps;
+
+  /**
+   * Number of the present sweep. This is reset by the @p start_sweep function
+   * called at the outset of each sweep.
+   */
+  unsigned int sweep_no;
+
+  /**
+   * Some flags telling the @p solve_primal_problem function what to do. See
+   * the documentation of this struct for more information.
+   */
+  const TimeSteppingData timestepping_data_primal;
+
+  /**
+   * Some flags telling the @p solve_dual_problem function what to do. See the
+   * documentation of this struct for more information.
+   */
+  const TimeSteppingData timestepping_data_dual;
+
+  /**
+   * Some flags telling the @p postprocess function what to do. See the
+   * documentation of this struct for more information.
+   */
+  const TimeSteppingData timestepping_data_postprocess;
+
+private:
+
+  /**
+   * Do the work of <tt>end_sweep()</tt> for some timesteps only. This is
+   * useful in multithread mode.
+   */
+  void end_sweep (const unsigned int begin_timestep,
+                  const unsigned int end_timestep);
+};
+
+
+
+/**
+ * Base class for a time step in time dependent problems. This class provides
+ * barely more than the basic framework, defining the necessary virtual
+ * functions (namely @p sleep and @p wake_up), the interface to previous and
+ * following grids, and some functions to be called before a new loop over all
+ * time steps is started.
+ *
+ * @author Wolfgang Bangerth, 1999
+ */
+class TimeStepBase : public Subscriptor
+{
+public:
+  /**
+   * Enum denoting the type of problem which will have to be solved next.
+   */
+  enum SolutionState
+  {
+    primal_problem = 0x0,
+    dual_problem   = 0x1,
+    postprocess    = 0x2
+  };
+
+  /**
+   * Constructor. Does nothing here apart from setting the time.
+   */
+  TimeStepBase (const double time);
+
+  /**
+   * Destructor. At present, this does nothing.
+   */
+  virtual ~TimeStepBase ();
+
+  /**
+   * Reconstruct all the data that is needed for this time level to work. This
+   * function serves to reget all the variables and data structures to work
+   * again after they have been send to sleep some time before, or at the
+   * first time we visit this time level. In particular, it is used to
+   * reconstruct the triangulation, degree of freedom handlers, to reload data
+   * vectors in case they have been stored to disk, etc.
+   *
+   * The actual implementation of this function does nothing.
+   *
+   * Since this is an important task, you should call this function from your
+   * own function, should you choose to overload it in your own class (which
+   * likely is the case), preferably at the beginning so that your function
+   * can take effect of the triangulation already existing.
+   */
+  virtual void wake_up (const unsigned int);
+
+  /**
+   * This is the opposite function to @p wake_up. It is used to delete data or
+   * save it to disk after they are no more needed for the present sweep.
+   * Typical kinds of data for this are data vectors, degree of freedom
+   * handlers, triangulation objects, etc. which occupy large amounts of
+   * memory and may therefore be externalized.
+   *
+   * By default, this function does nothing.
+   */
+  virtual void sleep (const unsigned int);
+
+  /**
+   * This function is called each time before a new sweep is started. You may
+   * want to set up some fields needed in the course of the computations, and
+   * so on. You should take good care, however, not to install large objects,
+   * which should be deferred until the @p wake_up function is called.
+   *
+   * A typical action of this function would be sorting out names of temporary
+   * files needed in the process of solving, etc.
+   *
+   * At the time this function is called, the values of @p timestep_no, @p
+   * sweep_no and the pointer to the previous and next time step object
+   * already have their correct value.
+   *
+   * The default implementation of this function does nothing.
+   */
+  virtual void start_sweep ();
+
+  /**
+   * This is the analogon to the above function, but it is called at the end
+   * of a sweep. You will usually want to do clean-ups in this function, such
+   * as deleting temporary files and the like.
+   */
+  virtual void end_sweep ();
+
+  /**
+   * Before the primal problem is solved on each time level, this function is
+   * called (i.e. before the solution takes place on the first time level). By
+   * default, this function sets the @p next_action variable of this class.
+   * You may overload this function, but you should call this function within
+   * your own one.
+   */
+  virtual void init_for_primal_problem ();
+
+  /**
+   * Same as above, but called before a round of dual problem solves.
+   */
+  virtual void init_for_dual_problem ();
+
+  /**
+   * Same as above, but called before a round of postprocessing steps.
+   */
+  virtual void init_for_postprocessing ();
+
+  /**
+   * This function is called by the manager object when solving the primal
+   * problem on this time level is needed. It is called after the @p wake_up
+   * function was called and before the @p sleep function will be called.
+   * There is no default implementation for obvious reasons, so you have to
+   * overload this function.
+   */
+  virtual void solve_primal_problem () = 0;
+
+  /**
+   * This function is called by the manager object when solving the dual
+   * problem on this time level is needed. It is called after the @p wake_up
+   * function was called and before the @p sleep function will be called.
+   * There is a default implementation doing plain nothing since some problems
+   * may not need solving a dual problem. However, it will abort the program
+   * when being called anyway, since then you should really overload the
+   * function.
+   */
+  virtual void solve_dual_problem ();
+
+  /**
+   * This function is called by the manager object when postprocessing this
+   * time level is needed. It is called after the @p wake_up function was
+   * called and before the @p sleep function will be called. There is a
+   * default implementation doing plain nothing since some problems may not
+   * need doing a postprocess step, e.g. if everything was already done when
+   * solving the primal problem. However, it will abort the program when being
+   * called anyway, since then you should really overload the function.
+   */
+  virtual void postprocess_timestep ();
+
+  /**
+   * Return the time value of this time step.
+   */
+  double get_time () const;
+
+  /**
+   * Return the number of this time step. Note that this number may vary
+   * between different sweeps, if timesteps are added or deleted.
+   */
+  unsigned int get_timestep_no () const;
+
+  /**
+   * Compute the time difference to the last time step. If this timestep is
+   * the first one, this function will result in an exception. Though this
+   * behaviour seems a bit drastic, it is appropriate in most cases since if
+   * there is no previous time step you will need special treatment anyway and
+   * this way no invalid value is returned which could lead to wrong but
+   * unnoticed results of your computation. (The only sensible value to return
+   * in that case would not be zero, since valid computation can be done with
+   * that, but would be a denormalized value such as @p NaN. However, there is
+   * not much difference in finding that the results of a computation are all
+   * denormalized values or in getting an exception; in the latter case you at
+   * least get the exact place where your problem lies.)
+   */
+  double get_backward_timestep () const;
+
+  /**
+   * Return the time difference to the next time step. With regard to the case
+   * that there is no next time step, the same applies as for the function
+   * above.
+   */
+  double get_forward_timestep () const;
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * You will want to overload this function in derived classes to compute the
+   * amount memory used by the derived class, and add the result of this
+   * function to your result.
+   */
+  virtual std::size_t memory_consumption () const;
+
+protected:
+  /**
+   * Pointer to the previous time step object in the list.
+   */
+  const TimeStepBase *previous_timestep;
+
+  /**
+   * Pointer to the next time step object in the list.
+   */
+  const TimeStepBase *next_timestep;
+
+  /**
+   * Number of the sweep we are presently in. This number is reset by the time
+   * level manager before a sweep is started.
+   */
+  unsigned int sweep_no;
+
+  /**
+   * Number of the time step, counted from zero onwards. This number is reset
+   * at the start of each sweep by the time level manager, since some time
+   * steps may have been inserted or deleted after the previous sweep.
+   */
+  unsigned int timestep_no;
+
+  /**
+   * Discrete time this level operates on.
+   */
+  const double time;
+
+  /**
+   * Variable storing whether the solution of a primal or a dual problem is
+   * actual, or any of the other actions specified. This variable is set by
+   * the <tt>init_for_*</tt> functions.
+   */
+  unsigned int next_action;
+
+private:
+  /**
+   * Reset the pointer to the previous time step; shall only be called by the
+   * time level manager object.
+   *
+   * This function is called at the set-up of the manager object and whenever
+   * a timestep is inserted or deleted.
+   */
+  void set_previous_timestep (const TimeStepBase *previous);
+
+  /**
+   * Reset the pointer to the next time step; shall only be called by the time
+   * level manager object.
+   *
+   * This function is called at the set-up of the manager object and whenever
+   * a timestep is inserted or deleted.
+   */
+  void set_next_timestep (const TimeStepBase *next);
+
+  /**
+   * Set the number this time step has in the list of timesteps. This function
+   * is called by the time step management object at the beginning of each
+   * sweep, to update information which may have changed due to addition or
+   * deleltion of time levels.
+   */
+  void set_timestep_no (const unsigned int step_no);
+
+  /**
+   * Set the number of the sweep we are presently in. This function is called
+   * by the time level management object at start-up time of each sweep.
+   */
+  void set_sweep_no (const unsigned int sweep_no);
+
+
+  /**
+   * Copy constructor. I can see no reason why someone might want to use it,
+   * so I don't provide it. Since this class has pointer members, making it
+   * private prevents the compiler to provide it's own, incorrect one if
+   * anyone chose to copy such an object.
+   */
+  TimeStepBase (const TimeStepBase &);
+
+  /**
+   * Copy operator. I can see no reason why someone might want to use it, so I
+   * don't provide it. Since this class has pointer members, making it private
+   * prevents the compiler to provide it's own, incorrect one if anyone chose
+   * to copy such an object.
+   */
+  TimeStepBase &operator = (const TimeStepBase &);
+
+  // make the manager object a friend
+  friend class TimeDependent;
+};
+
+
+
+
+/**
+ * Namespace in which some classes are declared that encapsulate flags for the
+ * TimeStepBase_Tria() class. These used to be local data types of that class,
+ * but some compilers choked on some aspects, so we put them into a namespace
+ * of their own.
+ *
+ * @author Wolfgang Bangerth, 2001
+ */
+namespace TimeStepBase_Tria_Flags
+{
+  /**
+   * This structure is used to tell the TimeStepBase_Tria() class how grids
+   * should be handled. It has flags defining the moments where grids shall be
+   * re-made and when they may be deleted. Also, one variable states whether
+   * grids should be kept in memory or should be deleted between to uses to
+   * save memory.
+   */
+  template <int dim>
+  struct Flags
+  {
+    /**
+     * Default constructor; yields an exception, so is not really usable.
+     */
+    Flags ();
+
+    /**
+     * Constructor; see the different fields for a description of the meaning
+     * of the parameters.
+     */
+    Flags (const bool         delete_and_rebuild_tria,
+           const unsigned int wakeup_level_to_build_grid,
+           const unsigned int sleep_level_to_delete_grid);
+
+    /**
+     * This flag determines whether the @p sleep and @p wake_up functions
+     * shall delete and rebuild the triangulation.  While for small problems,
+     * this is not necessary, for large problems it is indispensable to save
+     * memory.  The reason for this is that there may be several hundred time
+     * levels in memory, each with its own triangulation, which may require
+     * large amounts if there are many cells on each. Having a total of
+     * 100.000.000 cells on all time levels taken together is not uncommon,
+     * which makes this flag understandable.
+     */
+    const bool delete_and_rebuild_tria;
+
+    /**
+     * This number denotes the parameter to the @p wake_up function at which
+     * it shall rebuild the grid. Obviously, it shall be less than or equal to
+     * the @p look_ahead number passed to the time step management object; if
+     * it is equal, then the grid is rebuilt the first time the @p wake_up
+     * function is called. If @p delete_and_rebuild_tria is @p false, this
+     * number has no meaning.
+     */
+    const unsigned int wakeup_level_to_build_grid;
+
+    /**
+     * This is the opposite flag to the one above: it determines at which call
+     * to * @p sleep the grid shall be deleted.
+     */
+    const unsigned int sleep_level_to_delete_grid;
+  };
+
+
+
+  /**
+   * This structure is used to tell the TimeStepBase_Tria() class how grids
+   * should be refined. Before we explain all the different variables, fist
+   * some terminology:
+   * <ul>
+   * <li> Correction: after having flagged some cells of the triangulation for
+   * following some given criterion, we may want to change the number of
+   * flagged cells on this grid according to another criterion that the number
+   * of cells may be only a certain fraction more or less then the number of
+   * cells on the previous grid. This change of refinement flags will be
+   * called "correction" in the sequel.
+   * <li> Adaption: in order to make the change between one grid and the next
+   * not to large, we may want to flag some additional cells on one of the two
+   * grids such that there are not too grave differences. This process will be
+   * called "adaption".
+   * </ul>
+   *
+   *
+   * <h3>Description of flags</h3>
+   *
+   * <ul>
+   * <li> @p max_refinement_level: Cut the refinement of cells at a given
+   * level. This flag does not influence the flagging of cells, so not more
+   * cells on the coarser levels are flagged than usual. Rather, the flags are
+   * all set, but when it comes to the actual refinement, the maximum
+   * refinement level is truncated.
+   *
+   * This option is only really useful when you want to compare global
+   * refinement with adaptive refinement when you don't want the latter to
+   * refine more than the global refinement.
+   *
+   * <li> @p first_sweep_with_correction: When using cell number correction as
+   * defined above, it may be worth while to start with this only in later
+   * sweeps, not already in the first one. If this variable is zero, then
+   * start with the first sweep, else with a higher one. The rationale for
+   * only starting later is that we do not want to block the development of
+   * grids at the beginning and only impose restrictions in the sweeps where
+   * we start to be interested in the actual results of the computations.
+   *
+   * <li> @p min_cells_for_correction: If we want a more free process of grid
+   * development, we may want to impose less rules for grids with few cells
+   * also. This variable sets a lower bound for the cell number of grids where
+   * corrections are to be performed.
+   *
+   * <li> @p cell_number_corridor_top: Fraction of the number of cells by
+   * which the number of cells of one grid may be higher than that on the
+   * previous grid. Common values are 10 per cent (i.e. 0.1). The naming of
+   * the variable results from the goal to define a target corridor for the
+   * number of cells after refinement has taken place.
+   *
+   * <li> @p cell_number_corridor_bottom: Fraction of the number of cells by
+   * which the number of cells of one grid may be lower than that on the
+   * previous grid. Common values are 5 per cent (i.e. 0.05). Usually this
+   * number will be smaller than @p cell_number_corridor_top since an increase
+   * of the number of cells is not harmful (though it increases the numerical
+   * amount of work needed to solve the problem) while a sharp decrease may
+   * reduce the accuracy of the final result even if the time steps computed
+   * before the decrease were computed to high accuracy.
+   *
+   * Note however, that if you compute the dual problem as well, then the time
+   * direction is reversed, so the two values defining the cell number
+   * corridor should be about equal.
+   *
+   * <li> @p correction_relaxations: This is a list of pairs of number with
+   * the following meaning: just as for @p min_cells_for_correction, it may be
+   * worth while to reduce the requirements upon grids if the have few cells.
+   * The present variable stores a list of cell numbers along with some values
+   * which tell us that the cell number corridor should be enlarged by a
+   * certain factor. For example, if this list was <tt>((100 5) (200 3) (500
+   * 2))</tt>, this would mean that for grids with a cell number below 100,
+   * the <tt>cell_number_corridor_*</tt> variables are to be multiplied by 5
+   * before they are applied, for cell numbers below 200 they are to be
+   * multiplied by 3, and so on.
+   *
+   * @p correction_relaxations is actually a vector of such list. Each entry
+   * in this vector denotes the relaxation rules for one sweep. The last entry
+   * defines the relaxation rules for all following sweeps. This scheme is
+   * adopted to allow for stricter corrections in later sweeps while the
+   * relaxations may be more generous in the first sweeps.
+   *
+   * There is a static variable @p default_correction_relaxations which you
+   * can use as a default value. It is an empty list and thus defines no
+   * relaxations.
+   *
+   * <li> @p cell_number_correction_steps: Usually, if you want the number of
+   * cells to be corrected, the target corridor for the cell number is
+   * computed and some additional cells are flagged or flags are removed. But
+   * since the cell number resulting after flagging and deflagging can not be
+   * easily computed, it will usually not be within the corridor. We therefore
+   * need to iteratively get to our goal. Usually, three or four iterations
+   * are needed, but using this variable, you can reduce the allowed number of
+   * iterations; breaking the loop after two iterations yields good results
+   * regularly. Setting the variable to zero will result in no correction
+   * steps at all.
+   *
+   * <li> @p mirror_flags_to_previous_grid: If a cell on the present grid is
+   * flagged for refinement, also flag the corresponding cell on the previous
+   * grid. This is useful if, for example, error indicators are computed for
+   * space-time cells, but are stored for the second grid only. Now, since the
+   * first grid has the same contributions to the indicators as the second, it
+   * may be useful to flag both if necessary. This is done if the present
+   * variable is set.
+   *
+   * <li> @p adapt_grids: adapt the present grid to the previous one in the
+   * sense defined above. What is actually done here is the following: if
+   * going from the previous to the present grid would result in double
+   * refinement or double coarsening of some cells, then we try to flag these
+   * cells for refinement or coarsening such as to avoid the double step.
+   * Obviously, more than double refinement of coarsening is also caught.
+   *
+   * Grid adaption can try to avoid such changes between two grids, but it can
+   * never promise that they don't occur. This is because the next grid may
+   * change the present one, but then again there may be jumps in refinement
+   * level between the present and the previous one; this could only be
+   * avoided by looping iteratively through all grids, back and forth, until
+   * nothing changes anymore, which is obviously impossible if there are many
+   * time steps with very large grids.
+   * </ul>
+   */
+  template <int dim>
+  struct RefinementFlags
+  {
+    /**
+     * Typedef of a data type describing some relaxations of the correction
+     * process. See the general description of this class for more
+     * information.
+     */
+    typedef std::vector<std::vector<std::pair<unsigned int, double> > >   CorrectionRelaxations;
+
+    /**
+     * Default values for the relaxations: no relaxations.
+     */
+    static CorrectionRelaxations default_correction_relaxations;
+
+    /**
+     * Constructor. The default values are chosen such that almost no
+     * restriction on the mesh refinement is imposed.
+     */
+    RefinementFlags (const unsigned int max_refinement_level         = 0,
+                     const unsigned int first_sweep_with_correction  = 0,
+                     const unsigned int min_cells_for_correction     = 0,
+                     const double cell_number_corridor_top           = (1<<dim),
+                     const double cell_number_corridor_bottom        = 1,
+                     const CorrectionRelaxations &correction_relaxations = CorrectionRelaxations(),
+                     const unsigned int cell_number_correction_steps = 0,
+                     const bool mirror_flags_to_previous_grid        = false,
+                     const bool adapt_grids                          = false);
+
+    /**
+     * Maximum level of a cell in the triangulation of a time level. If it is
+     * set to zero, then no limit is imposed on the number of refinements a
+     * coarse grid cell may undergo. Usually, this field is used, if for some
+     * reason you want to limit refinement in an adaptive process, for example
+     * to avoid overly large numbers of cells or to compare with grids which
+     * have a certain number of refinements.
+     */
+    const unsigned int  max_refinement_level;
+
+    /**
+     * First sweep to perform cell number correction steps on; for sweeps
+     * before, cells are only flagged and no number-correction to previous
+     * grids is performed.
+     */
+    const unsigned int  first_sweep_with_correction;
+
+
+    /**
+     * Apply cell number correction with the previous time level only if there
+     * are more than this number of cells.
+     */
+    const unsigned int  min_cells_for_correction;
+
+    /**
+     * Fraction by which the number of cells on a time level may differ from
+     * the number on the previous time level (first: top deviation, second:
+     * bottom deviation).
+     */
+    const double        cell_number_corridor_top;
+
+    /**
+     * @ref cell_number_corridor_top
+     */
+    const double        cell_number_corridor_bottom;
+
+    /**
+     * List of relaxations to the correction step.
+     */
+    const std::vector<std::vector<std::pair<unsigned int,double> > > correction_relaxations;
+
+    /**
+     * Number of iterations to be performed to adjust the number of cells on a
+     * time level to those on the previous one. Zero means: do no such
+     * iteration.
+     */
+    const unsigned int  cell_number_correction_steps;
+
+    /**
+     * Flag all cells which are flagged on this timestep for refinement on the
+     * previous one also. This is useful in case the error indicator was
+     * computed by integration over time-space cells, but are now associated
+     * to a grid on a discrete time level. Since the error contribution comes
+     * from both grids, however, it is appropriate to refine both grids.
+     *
+     * Since the previous grid does not mirror the flags to the one before it,
+     * this does not lead to an almost infinite growth of cell numbers. You
+     * should use this flag with cell number correction switched on only,
+     * however.
+     *
+     * Mirroring is done after cell number correction is done, but before grid
+     * adaption, so the cell number on this grid is not noticeably influenced
+     * by the cells flagged additionally on the previous grid.
+     */
+    const bool          mirror_flags_to_previous_grid;
+
+    /**
+     * Adapt this grid to the previous one.
+     */
+    const bool          adapt_grids;
+
+    /**
+     * Exception
+     */
+    DeclException1 (ExcInvalidValue,
+                    double,
+                    << "The value " << arg1
+                    << " for the cell number corridor does not fulfill "
+                    "its natural requirements.");
+  };
+
+
+
+  /**
+   * Structure given to the actual refinement function, telling it which
+   * thresholds to take for coarsening and refinement. The actual refinement
+   * criteria are loaded by calling the virtual function @p
+   * get_tria_refinement_criteria.
+   */
+  template <int dim>
+  struct RefinementData
+  {
+    /**
+     * Constructor
+     */
+    RefinementData (const double         refinement_threshold,
+                    const double         coarsening_threshold=0);
+
+    /**
+     * Threshold for refinement: cells having a larger value will be refined
+     * (at least in the first round; subsequent steps of the refinement
+     * process may flag other cells as well or remove the flag from cells with
+     * a criterion higher than this threshold).
+     */
+    const double         refinement_threshold;
+
+    /**
+     * Same threshold for coarsening: cells with a smaller threshold will be
+     * coarsened if possible.
+     */
+    const double         coarsening_threshold;
+
+    /**
+     * Exception
+     */
+    DeclException1 (ExcInvalidValue,
+                    double,
+                    << "The value " << arg1
+                    << " for the cell refinement thresholds does not fulfill "
+                    "its natural requirements.");
+  };
+}
+
+
+
+
+/**
+ * Specialisation of TimeStepBase which addresses some aspects of grid
+ * handling. In particular, this class is thought to make handling of grids
+ * available that are adaptively refined on each time step separately or with
+ * a loose coupling between time steps. It also takes care of deleting and
+ * rebuilding grids when memory resources are a point, through the @p sleep
+ * and @p wake_up functions declared in the base class.
+ *
+ * In addition to that, it offers functions which do some rather hairy
+ * refinement rules for time dependent problems, trying to avoid too much
+ * change in the grids between subsequent time levels, while also trying to
+ * retain the freedom of refining each grid separately. There are lots of
+ * flags and numbers controlling this function, which might drastically change
+ * the behaviour of the function -- see the description of the flags for
+ * further information.
+ *
+ * @author Wolfgang Bangerth, 1999; large parts taken from the wave program,
+ * by Wolfgang Bangerth 1998
+ */
+template <int dim>
+class TimeStepBase_Tria : public TimeStepBase
+{
+public:
+  /**
+   * Typedef the data types of the TimeStepBase_Tria_Flags() namespace into
+   * local scope.
+   */
+  typedef typename TimeStepBase_Tria_Flags::Flags<dim>           Flags;
+  typedef typename TimeStepBase_Tria_Flags::RefinementFlags<dim> RefinementFlags;
+  typedef typename TimeStepBase_Tria_Flags::RefinementData<dim>  RefinementData;
+
+
+  /**
+   * Extension of the enum in the base class denoting the next action to be
+   * done.
+   */
+  enum SolutionState
+  {
+    grid_refinement = 0x1000
+  };
+
+
+  /**
+   * Default constructor. Does nothing but throws an exception. We need to
+   * have such a constructor in order to satisfy the needs of derived classes,
+   * which take this class as a virtual base class and don't need to call this
+   * constructor of they are not terminal classes. The compiler would like to
+   * know a constructor to call anyway since it can't know that the class is
+   * not terminal.
+   */
+  TimeStepBase_Tria ();
+
+  /**
+   * Constructor. Takes a coarse grid from which the grids on this time level
+   * will be derived and some flags steering the behaviour of this object.
+   *
+   * The ownership of the coarse grid stays with the creator of this object.
+   * However, it is locked from destruction to guarantee the lifetime of the
+   * coarse grid is longer than it is needed by this object.
+   *
+   * You need to give the general flags structure to this function since it is
+   * needed anyway; the refinement flags can be omitted if you do not intend
+   * to call the refinement function of this class.
+   */
+  TimeStepBase_Tria (const double              time,
+                     const Triangulation<dim, dim> &coarse_grid,
+                     const Flags              &flags,
+                     const RefinementFlags    &refinement_flags = RefinementFlags());
+
+  /**
+   * Destructor. At present, this does not more than releasing the lock on the
+   * coarse grid triangulation given to the constructor.
+   */
+  virtual ~TimeStepBase_Tria ();
+
+  /**
+   * Reconstruct all the data that is needed for this time level to work. This
+   * function serves to reget all the variables and data structures to work
+   * again after they have been send to sleep some time before, or at the
+   * first time we visit this time level. In particular, it is used to
+   * reconstruct the triangulation, degree of freedom handlers, to reload data
+   * vectors in case they have been stored to disk, etc. By default, this
+   * function rebuilds the triangulation if the respective flag has been set
+   * to destroy it in the @p sleep function. It does so also the first time we
+   * hit this function and @p wakeup_level equals
+   * <tt>flags.wakeup_level_to_build_grid</tt>, independently of the value of
+   * the mentioned flag. (Actually, it does so whenever the triangulation
+   * pointer equals the Null pointer and the value of @p wakeup_level is
+   * right.)
+   *
+   * Since this is an important task, you should call this function from your
+   * own function, should you choose to overload it in your own class (which
+   * likely is the case), preferably at the beginning so that your function
+   * can take effect of the triangulation already existing.
+   */
+  virtual void wake_up (const unsigned int wakeup_level);
+
+  /**
+   * This is the opposite function to @p wake_up. It is used to delete data or
+   * save it to disk after they are no more needed for the present sweep.
+   * Typical kinds of data for this are data vectors, degree of freedom
+   * handlers, triangulation objects, etc. which occupy large amounts of
+   * memory and may therefore be externalized.
+   *
+   * By default, if the user specified so in the flags for this object, the
+   * triangulation is deleted and the refinement history saved such that the
+   * respective @p wake_up function can rebuild it. You should therefore call
+   * this function from your overloaded version, preferably at the end so that
+   * your function can use the triangulation as long as you need it.
+   */
+  virtual void sleep (const unsigned int);
+
+  /**
+   * Do the refinement according to the flags passed to the constructor of
+   * this object and the data passed to this function. For a description of
+   * the working of this function refer to the general documentation of this
+   * class.
+   *
+   * In fact, this function does not actually refine or coarsen the
+   * triangulation, but only sets the respective flags. This is done because
+   * usually you will not need the grid immediately afterwards but only in the
+   * next sweep, so it suffices to store the flags and rebuild it the next
+   * time we need it. Also, it may be that the next time step would like to
+   * add or delete some flags, so we have to wait anyway with the use of this
+   * grid.
+   */
+  void refine_grid (const RefinementData data);
+
+  /**
+   * Respective init function for the refinement loop; does nothing in the
+   * default implementation, apart from setting @p next_action to @p
+   * grid_refinement but can be overloaded.
+   */
+  virtual void init_for_refinement ();
+
+  /**
+   * Virtual function that should fill the vector with the refinement criteria
+   * for the present triangulation. It is used within the @p refine_grid
+   * function to get the criteria for the present time step, since they can't
+   * be passed through its argument when using the loop of the time step
+   * management object.
+   */
+  virtual void get_tria_refinement_criteria (Vector<float> &criteria) const = 0;
+
+  /**
+   * The refinement flags of the triangulation are stored in a local variable
+   * thus allowing a restoration. The coarsening flags are also stored.
+   */
+  void save_refine_flags ();
+
+  /**
+   * Determine an estimate for the memory consumption (in bytes) of this
+   * object.
+   *
+   * You will want to overload this function in derived classes to compute the
+   * amount memory used by the derived class, and add the result of this
+   * function to your result.
+   */
+  virtual std::size_t memory_consumption () const;
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcGridNotDeleted,
+                    "When calling restore_grid(), you must have previously "
+                    "deleted the triangulation.");
+
+protected:
+
+  /**
+   * Triangulation used at this time level. Since this is something that every
+   * time stepping scheme needs to have, we can safely put it into the base
+   * class. Note that the triangulation is frequently deleted and rebuilt by
+   * the functions @p sleep and @p wake_up to save memory, if such a behaviour
+   * is specified in the @p flags structure.
+   */
+  SmartPointer<Triangulation<dim, dim>,TimeStepBase_Tria<dim> > tria;
+
+  /**
+   * Pointer to a grid which is to be used as the coarse grid for this time
+   * level.  This pointer is set through the constructor; ownership remains
+   * with the owner of this management object.
+   */
+  SmartPointer<const Triangulation<dim, dim>,TimeStepBase_Tria<dim> > coarse_grid;
+
+  /**
+   * Some flags about how this time level shall behave. See the documentation
+   * of this struct to find out more about that.
+   */
+  const Flags flags;
+
+  /**
+   * Flags controlling the refinement process; see the documentation of the
+   * respective structure for more information.
+   */
+  const RefinementFlags refinement_flags;
+
+private:
+  /**
+   * Vectors holding the refinement and coarsening flags of the different
+   * sweeps on this time level. The vectors therefore hold the history of the
+   * grid.
+   */
+  std::vector<std::vector<bool> >   refine_flags;
+
+  /**
+   * @ref refine_flags
+   */
+  std::vector<std::vector<bool> >   coarsen_flags;
+
+  /**
+   * Restore the grid according to the saved data. For this, the coarse grid
+   * is copied and the grid is stepwise rebuilt using the saved flags.
+   */
+  void restore_grid ();
+};
+
+
+
+
+
+/*----------------------------- template functions ------------------------------*/
+
+template <typename InitFunctionObject, typename LoopFunctionObject>
+void TimeDependent::do_loop (InitFunctionObject      init_function,
+                             LoopFunctionObject      loop_function,
+                             const TimeSteppingData &timestepping_data,
+                             const Direction         direction)
+{
+  // the following functions looks quite
+  // disrupted due to the recurring switches
+  // for forward and backward running loops.
+  //
+  // I chose to switch at every place where
+  // it is needed, since it is so easy
+  // to overlook something when you change
+  // some code at one place when it needs
+  // to be changed at a second place, here
+  // for the other direction, also.
+
+  const unsigned int n_timesteps = timesteps.size();
+
+  // initialize the time steps for
+  // a round of this loop
+  for (unsigned int step=0; step<n_timesteps; ++step)
+    switch (direction)
+      {
+      case forward:
+        init_function (static_cast<typename InitFunctionObject::argument_type>
+                       (&*timesteps[step]));
+        break;
+      case backward:
+        init_function (static_cast<typename InitFunctionObject::argument_type>
+                       (&*timesteps[n_timesteps-step-1]));
+        break;
+      };
+
+
+  // wake up the first few time levels
+  for (int step=-timestepping_data.look_ahead; step<0; ++step)
+    for (int look_ahead=0;
+         look_ahead<=static_cast<int>(timestepping_data.look_ahead); ++look_ahead)
+      switch (direction)
+        {
+        case forward:
+          if (step+look_ahead >= 0)
+            timesteps[step+look_ahead]->wake_up(look_ahead);
+          break;
+        case backward:
+          if (n_timesteps-(step+look_ahead) < n_timesteps)
+            timesteps[n_timesteps-(step+look_ahead)]->wake_up(look_ahead);
+          break;
+        };
+
+
+  for (unsigned int step=0; step<n_timesteps; ++step)
+    {
+      // first thing: wake up the
+      // timesteps ahead as necessary
+      for (unsigned int look_ahead=0;
+           look_ahead<=timestepping_data.look_ahead; ++look_ahead)
+        switch (direction)
+          {
+          case forward:
+            if (step+look_ahead < n_timesteps)
+              timesteps[step+look_ahead]->wake_up(look_ahead);
+            break;
+          case backward:
+            if (n_timesteps > (step+look_ahead))
+              timesteps[n_timesteps-(step+look_ahead)-1]->wake_up(look_ahead);
+            break;
+          };
+
+
+      // actually do the work
+      switch (direction)
+        {
+        case forward:
+          loop_function (static_cast<typename LoopFunctionObject::argument_type>
+                         (&*timesteps[step]));
+          break;
+        case backward:
+          loop_function (static_cast<typename LoopFunctionObject::argument_type>
+                         (&*timesteps[n_timesteps-step-1]));
+          break;
+        };
+
+      // let the timesteps behind sleep
+      for (unsigned int look_back=0;
+           look_back<=timestepping_data.look_back; ++look_back)
+        switch (direction)
+          {
+          case forward:
+            if (step>=look_back)
+              timesteps[step-look_back]->sleep(look_back);
+            break;
+          case backward:
+            if (n_timesteps-(step-look_back) <= n_timesteps)
+              timesteps[n_timesteps-(step-look_back)-1]->sleep(look_back);
+            break;
+          };
+    };
+
+  // make the last few timesteps sleep
+  for (int step=n_timesteps;
+       step<static_cast<int>(n_timesteps+timestepping_data.look_back); ++step)
+    for (int look_back=0;
+         look_back<=static_cast<int>(timestepping_data.look_back); ++look_back)
+      switch (direction)
+        {
+        case forward:
+          if ((step-look_back >= 0)
+              &&
+              (step-look_back < static_cast<int>(n_timesteps)))
+            timesteps[step-look_back]->sleep(look_back);
+          break;
+        case backward:
+          if ((step-look_back >= 0)
+              &&
+              (step-look_back < static_cast<int>(n_timesteps)))
+            timesteps[n_timesteps-(step-look_back)-1]->sleep(look_back);
+          break;
+        };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+/*----------------------------   time-dependent.h     ---------------------------*/
+#endif
+/*----------------------------   time-dependent.h     ---------------------------*/
diff --git a/include/deal.II/numerics/vector_tools.h b/include/deal.II/numerics/vector_tools.h
new file mode 100644
index 0000000..635ee35
--- /dev/null
+++ b/include/deal.II/numerics/vector_tools.h
@@ -0,0 +1,2423 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#ifndef dealii__vector_tools_h
+#define dealii__vector_tools_h
+
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/point.h>
+#include <deal.II/dofs/function_map.h>
+#include <deal.II/fe/mapping_q.h>
+#include <deal.II/hp/mapping_collection.h>
+
+#include <map>
+#include <vector>
+#include <set>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, typename Number> class Function;
+template <int dim, typename Number> struct FunctionMap;
+template <int dim> class Quadrature;
+template <int dim> class QGauss;
+
+template <typename number> class Vector;
+template <typename number> class FullMatrix;
+template <int dim, int spacedim> class Mapping;
+template <int dim, int spacedim> class DoFHandler;
+template <typename gridtype> class InterGridMap;
+namespace hp
+{
+  template <int dim, int spacedim> class DoFHandler;
+  template <int dim, int spacedim> class MappingCollection;
+  template <int dim> class QCollection;
+}
+class ConstraintMatrix;
+
+
+//TODO: Move documentation of functions to the functions!
+
+/**
+ * Provide a namespace which offers some operations on vectors. Among these
+ * are assembling of standard vectors, integration of the difference of a
+ * finite element solution and a continuous function, interpolations and
+ * projections of continuous functions to the finite element space and other
+ * operations.
+ *
+ * @note There exist two versions of almost all functions, one that takes an
+ * explicit Mapping argument and one that does not. The second one generally
+ * calls the first with an implicit $Q_1$ argument (i.e., with an argument of
+ * kind MappingQGeneric(1)). If your intend your code to use a different
+ * mapping than a (bi-/tri-)linear one, then you need to call the functions
+ * <b>with</b> mapping argument should be used.
+ *
+ *
+ * <h3>Description of operations</h3>
+ *
+ * This collection of methods offers the following operations:
+ * <ul>
+ * <li> Interpolation: assign each degree of freedom in the vector to be the
+ * value of the function given as argument. This is identical to saying that
+ * the resulting finite element function (which is isomorphic to the output
+ * vector) has exact function values in all support points of trial functions.
+ * The support point of a trial function is the point where its value equals
+ * one, e.g. for linear trial functions the support points are four corners of
+ * an element. This function therefore relies on the assumption that a finite
+ * element is used for which the degrees of freedom are function values
+ * (Lagrange elements) rather than gradients, normal derivatives, second
+ * derivatives, etc (Hermite elements, quintic Argyris element, etc.).
+ *
+ * It seems inevitable that some values of the vector to be created are set
+ * twice or even more than that. The reason is that we have to loop over all
+ * cells and get the function values for each of the trial functions located
+ * thereon. This applies also to the functions located on faces and corners
+ * which we thus visit more than once. While setting the value in the vector
+ * is not an expensive operation, the evaluation of the given function may be,
+ * taking into account that a virtual function has to be called.
+ *
+ * <li> Projection: compute the <i>L</i><sup>2</sup>-projection of the given
+ * function onto the finite element space, i.e. if <i>f</i> is the function to
+ * be projected, compute <i>f<sub>h</sub></i> in <i>V<sub>h</sub></i> such
+ * that
+ * (<i>f<sub>h</sub></i>,<i>v<sub>h</sub></i>)=(<i>f</i>,<i>v<sub>h</sub></i>)
+ * for all discrete test functions <i>v<sub>h</sub></i>. This is done through
+ * the solution of the linear system of equations <i> M v = f</i> where
+ * <i>M</i> is the mass matrix $m_{ij} = \int_\Omega \phi_i(x) \phi_j(x) dx$
+ * and $f_i = \int_\Omega f(x) \phi_i(x) dx$. The solution vector $v$ then is
+ * the nodal representation of the projection <i>f<sub>h</sub></i>. The
+ * project() functions are used in the step-21 and step-23 tutorial programs.
+ *
+ * In order to get proper results, it be may necessary to treat boundary
+ * conditions right. Below are listed some cases where this may be needed.  If
+ * needed, this is done by <i>L</i><sup>2</sup>-projection of the trace of the
+ * given function onto the finite element space restricted to the boundary of
+ * the domain, then taking this information and using it to eliminate the
+ * boundary nodes from the mass matrix of the whole domain, using the
+ * MatrixTools::apply_boundary_values() function. The projection of the trace
+ * of the function to the boundary is done with the
+ * VectorTools::project_boundary_values() (see below) function, which is
+ * called with a map of boundary functions FunctionMap in which all boundary
+ * indicators from zero to numbers::internal_face_boundary_id-1
+ * (numbers::internal_face_boundary_id is used for other purposes, see the
+ * Triangulation class documentation) point to the function to be projected.
+ * The projection to the boundary takes place using a second quadrature
+ * formula on the boundary given to the project() function. The first
+ * quadrature formula is used to compute the right hand side and for numerical
+ * quadrature of the mass matrix.
+ *
+ * The projection of the boundary values first, then eliminating them from the
+ * global system of equations is not needed usually. It may be necessary if
+ * you want to enforce special restrictions on the boundary values of the
+ * projected function, for example in time dependent problems: you may want to
+ * project the initial values but need consistency with the boundary values
+ * for later times. Since the latter are projected onto the boundary in each
+ * time step, it is necessary that we also project the boundary values of the
+ * initial values, before projecting them to the whole domain.
+ *
+ * Obviously, the results of the two schemes for projection are different.
+ * Usually, when projecting to the boundary first, the
+ * <i>L</i><sup>2</sup>-norm of the difference between original function and
+ * projection over the whole domain will be larger (factors of five have been
+ * observed) while the <i>L</i><sup>2</sup>-norm of the error integrated over
+ * the boundary should of course be less. The reverse should also hold if no
+ * projection to the boundary is performed.
+ *
+ * The selection whether the projection to the boundary first is needed is
+ * done with the <tt>project_to_boundary_first</tt> flag passed to the
+ * function.  If @p false is given, the additional quadrature formula for
+ * faces is ignored.
+ *
+ * You should be aware of the fact that if no projection to the boundary is
+ * requested, a function with zero boundary values may not have zero boundary
+ * values after projection. There is a flag for this especially important
+ * case, which tells the function to enforce zero boundary values on the
+ * respective boundary parts. Since enforced zero boundary values could also
+ * have been reached through projection, but are more economically obtain
+ * using other methods, the @p project_to_boundary_first flag is ignored if
+ * the @p enforce_zero_boundary flag is set.
+ *
+ * The solution of the linear system is presently done using a simple CG
+ * method without preconditioning and without multigrid. This is clearly not
+ * too efficient, but sufficient in many cases and simple to implement. This
+ * detail may change in the future.
+ *
+ * <li> Creation of right hand side vectors: The create_right_hand_side()
+ * function computes the vector $f_i = \int_\Omega f(x) \phi_i(x) dx$. This is
+ * the same as what the <tt>MatrixCreator::create_*</tt> functions which take
+ * a right hand side do, but without assembling a matrix.
+ *
+ * <li> Creation of right hand side vectors for point sources: The
+ * create_point_source_vector() function computes the vector $f_i =
+ * \int_\Omega \delta(x-x_0) \phi_i(x) dx$.
+ *
+ * <li> Creation of boundary right hand side vectors: The
+ * create_boundary_right_hand_side() function computes the vector $f_i =
+ * \int_{\partial\Omega} g(x) \phi_i(x) dx$. This is the right hand side
+ * contribution of boundary forces when having inhomogeneous Neumann boundary
+ * values in Laplace's equation or other second order operators. This function
+ * also takes an optional argument denoting over which parts of the boundary
+ * the integration shall extend. If the default argument is used, it is
+ * applied to all boundaries.
+ *
+ * <li> Interpolation of boundary values: The
+ * MatrixTools::apply_boundary_values() function takes a list of boundary
+ * nodes and their values. You can get such a list by interpolation of a
+ * boundary function using the interpolate_boundary_values() function. To use
+ * it, you have to specify a list of pairs of boundary indicators (of type
+ * <tt>types::boundary_id</tt>; see the section in the documentation of the
+ * Triangulation class for more details) and the according functions denoting
+ * the Dirichlet boundary values of the nodes on boundary faces with this
+ * boundary indicator.
+ *
+ * Usually, all other boundary conditions, such as inhomogeneous Neumann
+ * values or mixed boundary conditions are handled in the weak formulation. No
+ * attempt is made to include these into the process of matrix and vector
+ * assembly therefore.
+ *
+ * Within this function, boundary values are interpolated, i.e. a node is
+ * given the point value of the boundary function. In some cases, it may be
+ * necessary to use the L2-projection of the boundary function or any other
+ * method. For this purpose we refer to the project_boundary_values() function
+ * below.
+ *
+ * You should be aware that the boundary function may be evaluated at nodes on
+ * the interior of faces. These, however, need not be on the true boundary,
+ * but rather are on the approximation of the boundary represented by the
+ * mapping of the unit cell to the real cell. Since this mapping will in most
+ * cases not be the exact one at the face, the boundary function is evaluated
+ * at points which are not on the boundary and you should make sure that the
+ * returned values are reasonable in some sense anyway.
+ *
+ * In 1d the situation is a bit different since there faces (i.e. vertices)
+ * have no boundary indicator. It is assumed that if the boundary indicator
+ * zero is given in the list of boundary functions, the left boundary point is
+ * to be interpolated while the right boundary point is associated with the
+ * boundary index 1 in the map. The respective boundary functions are then
+ * evaluated at the place of the respective boundary point.
+ *
+ * <li> Projection of boundary values: The project_boundary_values() function
+ * acts similar to the interpolate_boundary_values() function, apart from the
+ * fact that it does not get the nodal values of boundary nodes by
+ * interpolation but rather through the <i>L</i><sup>2</sup>-projection of the
+ * trace of the function to the boundary.
+ *
+ * The projection takes place on all boundary parts with boundary indicators
+ * listed in the map (FunctioMap::FunctionMap) of boundary functions. These
+ * boundary parts may or may not be continuous. For these boundary parts, the
+ * mass matrix is assembled using the
+ * MatrixTools::create_boundary_mass_matrix() function, as well as the
+ * appropriate right hand side. Then the resulting system of equations is
+ * solved using a simple CG method (without preconditioning), which is in most
+ * cases sufficient for the present purpose.
+ *
+ * <li> Computing errors: The function integrate_difference() performs the
+ * calculation of the error between a given (continuous) reference function
+ * and the finite element solution in different norms. The integration is
+ * performed using a given quadrature formula and assumes that the given
+ * finite element objects equals that used for the computation of the
+ * solution.
+ *
+ * The result is stored in a vector (named @p difference), where each entry
+ * equals the given norm of the difference on a cell. The order of entries is
+ * the same as a @p cell_iterator takes when started with @p begin_active and
+ * promoted with the <tt>++</tt> operator.
+ *
+ * This data, one number per active cell, can be used to generate graphical
+ * output by directly passing it to the DataOut class through the
+ * DataOut::add_data_vector function. Alternatively, it can be interpolated to
+ * the nodal points of a finite element field using the
+ * DoFTools::distribute_cell_to_dof_vector function.
+ *
+ * Presently, there is the possibility to compute the following values from
+ * the difference, on each cell: @p mean, @p L1_norm, @p L2_norm, @p
+ * Linfty_norm, @p H1_seminorm and @p H1_norm, see VectorTools::NormType. For
+ * the mean difference value, the reference function minus the numerical
+ * solution is computed, not the other way round.
+ *
+ * The infinity norm of the difference on a given cell returns the maximum
+ * absolute value of the difference at the quadrature points given by the
+ * quadrature formula parameter. This will in some cases not be too good an
+ * approximation, since for example the Gauss quadrature formulae do not
+ * evaluate the difference at the end or corner points of the cells. You may
+ * want to choose a quadrature formula with more quadrature points or one with
+ * another distribution of the quadrature points in this case. You should also
+ * take into account the superconvergence properties of finite elements in
+ * some points: for example in 1D, the standard finite element method is a
+ * collocation method and should return the exact value at nodal points.
+ * Therefore, the trapezoidal rule should always return a vanishing L-infinity
+ * error. Conversely, in 2D the maximum L-infinity error should be located at
+ * the vertices or at the center of the cell, which would make it plausible to
+ * use the Simpson quadrature rule. On the other hand, there may be
+ * superconvergence at Gauss integration points. These examples are not
+ * intended as a rule of thumb, rather they are thought to illustrate that the
+ * use of the wrong quadrature formula may show a significantly wrong result
+ * and care should be taken to chose the right formula.
+ *
+ * The <i>H</i><sup>1</sup> seminorm is the <i>L</i><sup>2</sup> norm of the
+ * gradient of the difference. The square of the full <i>H</i><sup>1</sup>
+ * norm is the sum of the square of seminorm and the square of the
+ * <i>L</i><sup>2</sup> norm.
+ *
+ * To get the global <i>L<sup>1</sup></i> error, you have to sum up the
+ * entries in @p difference, e.g. using Vector::l1_norm() function.  For the
+ * global <i>L</i><sup>2</sup> difference, you have to sum up the squares of
+ * the entries and take the root of the sum, e.g. using Vector::l2_norm().
+ * These two operations represent the <i>l</i><sub>1</sub> and
+ * <i>l</i><sub>2</sub> norms of the vectors, but you need not take the
+ * absolute value of each entry, since the cellwise norms are already
+ * positive.
+ *
+ * To get the global mean difference, simply sum up the elements as above. To
+ * get the $L_\infty$ norm, take the maximum of the vector elements, e.g.
+ * using the Vector::linfty_norm() function.
+ *
+ * For the global <i>H</i><sup>1</sup> norm and seminorm, the same rule
+ * applies as for the <i>L</i><sup>2</sup> norm: compute the
+ * <i>l</i><sub>2</sub> norm of the cell error vector.
+ *
+ * Note that, in the codimension one case, if you ask for a norm that requires
+ * the computation of a gradient, then the provided function is automatically
+ * projected along the curve, and the difference is only computed on the
+ * tangential part of the gradient, since no information is available on the
+ * normal component of the gradient anyway.
+ * </ul>
+ *
+ * All functions use the finite element given to the DoFHandler object the
+ * last time that the degrees of freedom were distributed over the
+ * triangulation. Also, if access to an object describing the exact form of
+ * the boundary is needed, the pointer stored within the triangulation object
+ * is accessed.
+ *
+ * @note Instantiations for this template are provided for some vector types,
+ * in particular <code>Vector<float>, Vector<double>,
+ * BlockVector<float>, BlockVector<double></code>; others can be
+ * generated in application code (see the section on
+ * @ref Instantiations
+ * in the manual).
+ *
+ * @ingroup numerics
+ * @author Wolfgang Bangerth, Ralf Hartmann, Guido Kanschat, 1998, 1999, 2000,
+ * 2001
+ */
+namespace VectorTools
+{
+  /**
+   * Denote which norm/integral is to be computed by the
+   * integrate_difference() function of this namespace. The following
+   * possibilities are implemented:
+   */
+  enum NormType
+  {
+    /**
+     * The function or difference of functions is integrated on each cell.
+     */
+    mean,
+    /**
+     * The absolute value of the function is integrated.
+     */
+    L1_norm,
+    /**
+     * The square of the function is integrated and the the square root of the
+     * result is computed on each cell.
+     */
+    L2_norm,
+    /**
+     * The absolute value to the <i>p</i>th power is integrated and the pth
+     * root is computed on each cell. The exponent <i>p</i> is the last
+     * parameter of the function.
+     */
+    Lp_norm,
+    /**
+     * The maximum absolute value of the function.
+     */
+    Linfty_norm,
+    /**
+     * #L2_norm of the gradient.
+     */
+    H1_seminorm,
+    /**
+     * #L2_norm of the divergence of a vector field
+     */
+    Hdiv_seminorm,
+    /**
+     * The square of this norm is the square of the #L2_norm plus the square
+     * of the #H1_seminorm.
+     */
+    H1_norm,
+    /**
+     * #Lp_norm of the gradient.
+     */
+    W1p_seminorm,
+    /**
+     * same as #H1_norm for <i>L<sup>p</sup></i>.
+     */
+    W1p_norm,
+    /**
+     * #Linfty_norm of the gradient.
+     */
+    W1infty_seminorm,
+    /**
+     * same as #H1_norm for <i>L<sup>infty</sup></i>.
+     */
+    W1infty_norm
+
+  };
+  /**
+   * @name Interpolation and projection
+   */
+  //@{
+  /**
+   * Compute the interpolation of @p function at the support points to the
+   * finite element space described by the Triangulation and FiniteElement
+   * object with which the given DoFHandler argument is initialized. It is
+   * assumed that the number of components of @p function matches that of the
+   * finite element used by @p dof.
+   *
+   * Note that you may have to call <tt>hanging_nodes.distribute(vec)</tt>
+   * with the hanging nodes from space @p dof afterwards, to make the result
+   * continuous again.
+   *
+   * The template argument <code>DoFHandlerType</code> may either be of type
+   * DoFHandler or hp::DoFHandler.
+   *
+   * See the general documentation of this namespace for further information.
+   *
+   * @todo The @p mapping argument should be replaced by a
+   * hp::MappingCollection in case of a hp::DoFHandler.
+   */
+  template <typename VectorType, int dim, int spacedim, template <int, int> class DoFHandlerType>
+  void interpolate (const Mapping<dim,spacedim>        &mapping,
+                    const DoFHandlerType<dim,spacedim> &dof,
+                    const Function<spacedim,double>    &function,
+                    VectorType                         &vec);
+
+  /**
+   * Calls the @p interpolate() function above with
+   * <tt>mapping=MappingQGeneric1@<dim>@()</tt>.
+   */
+  template <typename VectorType, typename DoFHandlerType>
+  void interpolate (const DoFHandlerType                                   &dof,
+                    const Function<DoFHandlerType::space_dimension,double> &function,
+                    VectorType                                             &vec);
+
+  /**
+   * Interpolate different finite element spaces. The interpolation of vector
+   * @p data_1 is executed from the FE space represented by @p dof_1 to the
+   * vector @p data_2 on FE space @p dof_2. The interpolation on each cell is
+   * represented by the matrix @p transfer. Curved boundaries are neglected so
+   * far.
+   *
+   * Note that you may have to call <tt>hanging_nodes.distribute(data_2)</tt>
+   * with the hanging nodes from space @p dof_2 afterwards, to make the result
+   * continuous again.
+   *
+   * @note Instantiations for this template are provided for some vector types
+   * (see the general documentation of the namespace), but only the same
+   * vector for InVector and OutVector. Other combinations must be
+   * instantiated by hand.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void interpolate (const DoFHandler<dim,spacedim> &dof_1,
+                    const DoFHandler<dim,spacedim> &dof_2,
+                    const FullMatrix<double>       &transfer,
+                    const InVector                 &data_1,
+                    OutVector                      &data_2);
+
+  /**
+   * This function is a kind of generalization or modification of the very
+   * first interpolate() function in the series. It interpolations a set of
+   * functions onto the finite element space given by the DoFHandler argument
+   * where the determination which function to use is made based on the
+   * material id (see
+   * @ref GlossMaterialId)
+   * of each cell.
+   *
+   * @param mapping        - The mapping to use to determine the location of
+   * support points at which the functions are to be evaluated.
+   * @param dof_handler    - DoFHandler initialized with Triangulation and
+   * FiniteElement objects,
+   * @param function_map   - std::map reflecting the correspondence between
+   * material ids and functions,
+   * @param dst            - global FE vector at the support points,
+   * @param component_mask - mask of components that shall be interpolated
+   *
+   * @note If a material id of some group of cells is missed in @p
+   * function_map, then @p dst will not be updated in the respective degrees
+   * of freedom of the output vector For example, if @p dst was successfully
+   * initialized to capture the degrees of freedom of the @p dof_handler of
+   * the problem with all zeros in it, then those zeros which correspond to
+   * the missed material ids will still remain in @p dst even after calling
+   * this function.
+   *
+   * @note Degrees of freedom located on faces between cells of different
+   * material ids will get their value by that cell which was called last in
+   * the respective loop over cells implemented in this function. Since this
+   * process is kind of arbitrary, you cannot control it. However, if you want
+   * to have control over the order in which cells are visited, let us take a
+   * look at the following example: Let @p u be a variable of interest which
+   * is approximated by some CG finite element. Let @p 0, @p 1 and @p 2 be
+   * material ids of cells on the triangulation. Let 0: 0.0, 1: 1.0, 2: 2.0 be
+   * the whole @p function_map that you want to pass to this function, where
+   * @p key is a material id and @p value is a value of @p u. By using the
+   * whole @p function_map you do not really know which values will be
+   * assigned to the face DoFs. On the other hand, if you split the whole @p
+   * function_map into three smaller independent objects 0: 0.0 and 1: 1.0 and
+   * 2: 2.0 and make three distinct calls of this function passing each of
+   * these objects separately (the order depends on what you want to get
+   * between cells), then each subsequent call will rewrite the intercell @p
+   * dofs of the previous one.
+   *
+   * @author Valentin Zingan, 2013
+   */
+  template <typename VectorType, typename DoFHandlerType>
+  void
+  interpolate_based_on_material_id
+  (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                  &dof_handler,
+   const std::map<types::material_id, const Function<DoFHandlerType::space_dimension, double> *> &function_map,
+   VectorType                                                            &dst,
+   const ComponentMask                                                   &component_mask = ComponentMask());
+
+  /**
+   * Gives the interpolation of a @p dof1-function @p u1 to a @p dof2-function
+   * @p u2, where @p dof1 and @p dof2 represent different triangulations with
+   * a common coarse grid.
+   *
+   * dof1 and dof2 need to have the same finite element discretization.
+   *
+   * Note that for continuous elements on grids with hanging nodes (i.e.
+   * locally refined grids) this function does not give the expected output.
+   * Indeed, the resulting output vector does not necessarily respect
+   * continuity requirements at hanging nodes, due to local cellwise
+   * interpolation.
+   *
+   * For this case (continuous elements on grids with hanging nodes), please
+   * use the interpolate_to_different_mesh function with an additional
+   * ConstraintMatrix argument, see below, or make the field conforming
+   * yourself by calling the @p ConstraintsMatrix::distribute function of your
+   * hanging node constraints object.
+   *
+   * @note: This function works with parallel::distributed::Triangulation, but
+   * only if the parallel partitioning is the same for both meshes (see the
+   * parallel::distributed::Triangulation<dim>::no_automatic_repartitioning
+   * flag).
+   */
+  template <int dim, int spacedim,
+            template <int, int> class DoFHandlerType,
+            typename VectorType>
+  void
+  interpolate_to_different_mesh (const DoFHandlerType<dim, spacedim> &dof1,
+                                 const VectorType                    &u1,
+                                 const DoFHandlerType<dim, spacedim> &dof2,
+                                 VectorType                          &u2);
+
+  /**
+   * Gives the interpolation of a @p dof1-function @p u1 to a @p dof2-function
+   * @p u2, where @p dof1 and @p dof2 represent different triangulations with
+   * a common coarse grid.
+   *
+   * dof1 and dof2 need to have the same finite element discretization.
+   *
+   * @p constraints is a hanging node constraints object corresponding to @p
+   * dof2. This object is particularly important when interpolating onto
+   * continuous elements on grids with hanging nodes (locally refined grids):
+   * Without it - due to cellwise interpolation - the resulting output vector
+   * does not necessarily respect continuity requirements at hanging nodes.
+   */
+  template <int dim, int spacedim,
+            template <int, int> class DoFHandlerType,
+            typename VectorType>
+  void
+  interpolate_to_different_mesh (const DoFHandlerType<dim, spacedim> &dof1,
+                                 const VectorType                    &u1,
+                                 const DoFHandlerType<dim, spacedim> &dof2,
+                                 const ConstraintMatrix              &constraints,
+                                 VectorType                          &u2);
+
+
+  /**
+   * The same function as above, but takes an InterGridMap object directly as
+   * a parameter. Useful for interpolating several vectors at the same time.
+   *
+   * @p intergridmap has to be initialized via InterGridMap::make_mapping
+   * pointing from a source DoFHandler to a destination DoFHandler.
+   */
+  template <int dim, int spacedim,
+            template <int, int> class DoFHandlerType,
+            typename VectorType>
+  void
+  interpolate_to_different_mesh
+  (const InterGridMap<DoFHandlerType<dim, spacedim> > &intergridmap,
+   const VectorType                                   &u1,
+   const ConstraintMatrix                             &constraints,
+   VectorType                                         &u2);
+
+  /**
+   * Compute the projection of @p function to the finite element space.
+   *
+   * By default, projection to the boundary and enforcement of zero boundary
+   * values are disabled. The ordering of arguments to this function is such
+   * that you need not give a second quadrature formula if you don't want to
+   * project to the boundary first, but that you must if you want to do so.
+   *
+   * This function needs the mass matrix of the finite element space on the
+   * present grid. To this end, the mass matrix is assembled exactly using
+   * MatrixTools::create_mass_matrix. This function performs numerical
+   * quadrature using the given quadrature rule; you should therefore make
+   * sure that the given quadrature formula is also sufficient for the
+   * integration of the mass matrix.
+   *
+   * See the general documentation of this namespace for further information.
+   *
+   * In 1d, the default value of the boundary quadrature formula is an invalid
+   * object since integration on the boundary doesn't happen in 1d.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void project (const Mapping<dim, spacedim>    &mapping,
+                const DoFHandler<dim,spacedim>  &dof,
+                const ConstraintMatrix          &constraints,
+                const Quadrature<dim>           &quadrature,
+                const Function<spacedim,double> &function,
+                VectorType                      &vec,
+                const bool                      enforce_zero_boundary = false,
+                const Quadrature<dim-1>         &q_boundary = (dim > 1 ?
+                                                              QGauss<dim-1>(2) :
+                                                              Quadrature<dim-1>(0)),
+                const bool                      project_to_boundary_first = false);
+
+  /**
+   * Calls the project() function above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void project (const DoFHandler<dim,spacedim>  &dof,
+                const ConstraintMatrix          &constraints,
+                const Quadrature<dim>           &quadrature,
+                const Function<spacedim,double> &function,
+                VectorType                      &vec,
+                const bool                      enforce_zero_boundary = false,
+                const Quadrature<dim-1>         &q_boundary = (dim > 1 ?
+                                                              QGauss<dim-1>(2) :
+                                                              Quadrature<dim-1>(0)),
+                const bool                      project_to_boundary_first = false);
+
+  /**
+   * Same as above, but for arguments of type hp::DoFHandler,
+   * hp::QuadratureCollection, hp::MappingCollection
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void project (const hp::MappingCollection<dim, spacedim> &mapping,
+                const hp::DoFHandler<dim,spacedim>         &dof,
+                const ConstraintMatrix                     &constraints,
+                const hp::QCollection<dim>                 &quadrature,
+                const Function<spacedim,double>            &function,
+                VectorType                                 &vec,
+                const bool                                 enforce_zero_boundary = false,
+                const hp::QCollection<dim-1> &q_boundary = hp::QCollection<dim-1>(dim > 1 ?
+                                                           QGauss<dim-1>(2) :
+                                                           Quadrature<dim-1>(0)),
+                const bool                                 project_to_boundary_first = false);
+
+  /**
+   * Calls the project() function above, with a collection of $Q_1$ mapping
+   * objects, i.e., with hp::StaticMappingQ1::mapping_collection.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void project (const hp::DoFHandler<dim,spacedim> &dof,
+                const ConstraintMatrix             &constraints,
+                const hp::QCollection<dim>         &quadrature,
+                const Function<spacedim,double>    &function,
+                VectorType                         &vec,
+                const bool                         enforce_zero_boundary = false,
+                const hp::QCollection<dim-1>       &q_boundary = hp::QCollection<dim-1>(dim > 1 ?
+                    QGauss<dim-1>(2) :
+                    Quadrature<dim-1>(0)),
+                const bool                         project_to_boundary_first = false);
+
+  /**
+   * Compute Dirichlet boundary conditions.  This function makes up a map of
+   * degrees of freedom subject to Dirichlet boundary conditions and the
+   * corresponding values to be assigned to them, by interpolation around the
+   * boundary. For each degree of freedom at the boundary, if its index
+   * already exists in @p boundary_values then its boundary value will be
+   * overwritten, otherwise a new entry with proper index and boundary value
+   * for this degree of freedom will be inserted into @p boundary_values.
+   *
+   * The parameter @p function_map provides a list of boundary indicators to
+   * be handled by this function and corresponding boundary value functions.
+   * The keys of this map correspond to the number @p boundary_id of the face.
+   * numbers::internal_face_boundary_id is an illegal value for this key since
+   * it is reserved for interior faces.
+   *
+   * The flags in the last parameter, @p component_mask denote which
+   * components of the finite element space shall be interpolated. If it is
+   * left as specified by the default value (i.e. an empty array), all
+   * components are interpolated. If it is different from the default value,
+   * it is assumed that the number of entries equals the number of components
+   * in the boundary functions and the finite element, and those components in
+   * the given boundary function will be used for which the respective flag
+   * was set in the component mask. See also
+   * @ref GlossComponentMask.
+   * As an example, assume that you are solving the Stokes equations in 2d,
+   * with variables $(u,v,p)$ and that you only want to interpolate boundary
+   * values for the velocity, then the component mask should correspond to
+   * <code>(true,true,false)</code>.
+   *
+   * @note Whether a component mask has been specified or not, the number of
+   * components of the functions in @p function_map must match that of the
+   * finite element used by @p dof. In other words, for the example above, you
+   * need to provide a Function object that has 3 components (the two
+   * velocities and the pressure), even though you are only interested in the
+   * first two of them. interpolate_boundary_values() will then call this
+   * function to obtain a vector of 3 values at each interpolation point but
+   * only take the first two and discard the third. In other words, you are
+   * free to return whatever you like in the third component of the vector
+   * returned by Function::vector_value, but the Function object must state
+   * that it has 3 components.
+   *
+   * If the finite element used has shape functions that are non-zero in more
+   * than one component (in deal.II speak: they are non-primitive), then these
+   * components can presently not be used for interpolating boundary values.
+   * Thus, the elements in the component mask corresponding to the components
+   * of these non-primitive shape functions must be @p false.
+   *
+   * See the general documentation of this namespace for more information.
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                     &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type        &function_map,
+   std::map<types::global_dof_index,double>                                 &boundary_values,
+   const ComponentMask                                                      &component_mask = ComponentMask());
+
+  /**
+   * Like the previous function, but take a mapping collection to go with the
+   * hp::DoFHandler object.
+   */
+  template <int dim, int spacedim>
+  void
+  interpolate_boundary_values
+  (const hp::MappingCollection<dim,spacedim>  &mapping,
+   const hp::DoFHandler<dim,spacedim>         &dof,
+   const typename FunctionMap<spacedim>::type &function_map,
+   std::map<types::global_dof_index,double>   &boundary_values,
+   const ComponentMask                        &component_mask = ComponentMask());
+
+  /**
+   * Same function as above, but taking only one pair of boundary indicator
+   * and corresponding boundary function. The same comments apply as for the
+   * previous function, in particular about the use of the component mask and
+   * the requires size of the function object.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                     &dof,
+   const types::boundary_id                                                  boundary_component,
+   const Function<DoFHandlerType::space_dimension,double>                   &boundary_function,
+   std::map<types::global_dof_index,double>                                 &boundary_values,
+   const ComponentMask                                                      &component_mask = ComponentMask());
+
+  /**
+   * Calls the other interpolate_boundary_values() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim,spacedim@>(1)</tt>. The same comments
+   * apply as for the previous function, in particular about the use of the
+   * component mask and the requires size of the function object.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                                   &dof,
+   const types::boundary_id                                boundary_component,
+   const Function<DoFHandlerType::space_dimension,double> &boundary_function,
+   std::map<types::global_dof_index,double>               &boundary_values,
+   const ComponentMask                                    &component_mask = ComponentMask());
+
+
+  /**
+   * Calls the other interpolate_boundary_values() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim,spacedim@>(1)</tt>. The same comments
+   * apply as for the previous function, in particular about the use of the
+   * component mask and the requires size of the function object.
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                                              &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type &function_map,
+   std::map<types::global_dof_index,double>                          &boundary_values,
+   const ComponentMask                                               &component_mask = ComponentMask());
+
+
+  /**
+   * Insert the (algebraic) constraints due to Dirichlet boundary conditions
+   * into a ConstraintMatrix @p constraints. This function identifies the
+   * degrees of freedom subject to Dirichlet boundary conditions, adds them to
+   * the list of constrained DoFs in @p constraints and sets the respective
+   * inhomogeneity to the value interpolated around the boundary. If this
+   * routine encounters a DoF that already is constrained (for instance by a
+   * hanging node constraint, see below, or any other type of constraint, e.g.
+   * from periodic boundary conditions), the old setting of the constraint
+   * (dofs the entry is constrained to, inhomogeneities) is kept and nothing
+   * happens.
+   *
+   * @note When combining adaptively refined meshes with hanging node
+   * constraints and boundary conditions like from the current function within
+   * one ConstraintMatrix object, the hanging node constraints should always
+   * be set first, and then the boundary conditions since boundary conditions
+   * are not set in the second operation on degrees of freedom that are
+   * already constrained. This makes sure that the discretization remains
+   * conforming as is needed. See the discussion on conflicting constraints in
+   * the module on
+   * @ref constraints.
+   *
+   * The parameter @p boundary_component corresponds to the number @p
+   * boundary_id of the face.
+   *
+   * The flags in the last parameter, @p component_mask denote which
+   * components of the finite element space shall be interpolated. If it is
+   * left as specified by the default value (i.e. an empty array), all
+   * components are interpolated. If it is different from the default value,
+   * it is assumed that the number of entries equals the number of components
+   * in the boundary functions and the finite element, and those components in
+   * the given boundary function will be used for which the respective flag
+   * was set in the component mask. See also
+   * @ref GlossComponentMask.
+   * As an example, assume that you are solving the Stokes equations in 2d,
+   * with variables $(u,v,p)$ and that you only want to interpolate boundary
+   * values for the pressure, then the component mask should correspond to
+   * <code>(true,true,false)</code>.
+   *
+   * @note Whether a component mask has been specified or not, the number of
+   * components of the functions in @p function_map must match that of the
+   * finite element used by @p dof. In other words, for the example above, you
+   * need to provide a Function object that has 3 components (the two
+   * velocities and the pressure), even though you are only interested in the
+   * first two of them. interpolate_boundary_values() will then call this
+   * function to obtain a vector of 3 values at each interpolation point but
+   * only take the first two and discard the third. In other words, you are
+   * free to return whatever you like in the third component of the vector
+   * returned by Function::vector_value, but the Function object must state
+   * that it has 3 components.
+   *
+   * If the finite element used has shape functions that are non-zero in more
+   * than one component (in deal.II speak: they are non-primitive), then these
+   * components can presently not be used for interpolating boundary values.
+   * Thus, the elements in the component mask corresponding to the components
+   * of these non-primitive shape functions must be @p false.
+   *
+   * See the general documentation of this namespace for more information.
+   *
+   * @ingroup constraints
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                     &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type        &function_map,
+   ConstraintMatrix                                                         &constraints,
+   const ComponentMask                                                      &component_mask = ComponentMask());
+
+  /**
+   * Same function as above, but taking only one pair of boundary indicator
+   * and corresponding boundary function. The same comments apply as for the
+   * previous function, in particular about the use of the component mask and
+   * the requires size of the function object.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                     &dof,
+   const types::boundary_id                                                  boundary_component,
+   const Function<DoFHandlerType::space_dimension,double>                   &boundary_function,
+   ConstraintMatrix                                                         &constraints,
+   const ComponentMask                                                      &component_mask = ComponentMask());
+
+  /**
+   * Calls the other interpolate_boundary_values() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim,spacedim@>(1)</tt>. The same comments
+   * apply as for the previous function, in particular about the use of the
+   * component mask and the requires size of the function object.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                                   &dof,
+   const types::boundary_id                                boundary_component,
+   const Function<DoFHandlerType::space_dimension,double> &boundary_function,
+   ConstraintMatrix                                       &constraints,
+   const ComponentMask                                    &component_mask = ComponentMask());
+
+
+  /**
+   * Calls the other interpolate_boundary_values() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim,spacedim@>(1)</tt>. The same comments
+   * apply as for the previous function, in particular about the use of the
+   * component mask and the requires size of the function object.
+   *
+   * @ingroup constraints
+   */
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                                              &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type &function_map,
+   ConstraintMatrix                                                  &constraints,
+   const ComponentMask                                               &component_mask = ComponentMask());
+
+
+  /**
+   * Project a function or a set of functions to the boundary of the domain.
+   * In other words, compute the solution of the following problem: Find $u_h
+   * \in V_h$ (where $V_h$ is the finite element space represented by the
+   * DoFHandler argument of this function) so that
+   * @f{align*}{
+   * \int_{\Gamma} \varphi_i u_h
+   * = \sum_{k \in {\cal K}} \int_{\Gamma_k} \varphi_i f_k,
+   * \qquad \forall \varphi_i \in V_h
+   * @f}
+   * where $\Gamma = \bigcup_{k \in {\cal K}} \Gamma_k$, $\Gamma_k \subset
+   * \partial\Omega$, $\cal K$ is the set of indices and $f_k$ the
+   * corresponding boundary functions represented in the function map argument
+   * @p boundary_values to this function, and the integrals are evaluated by
+   * quadrature. This problem has a non-unique solution in the interior, but
+   * it is well defined for the degrees of freedom on the part of the
+   * boundary, $\Gamma$, for which we do the integration. The values of
+   * $u_h|_\Gamma$, i.e., the nodal values of the degrees of freedom of this
+   * function along the boundary, are then what is computed by this function.
+   *
+   * @param[in] mapping The mapping that will be used in the transformations
+   * necessary to integrate along the boundary.
+   * @param[in] dof The DoFHandler that describes the finite element space and
+   * the numbering of degrees of freedom.
+   * @param[in] boundary_functions A map from boundary indicators to pointers
+   * to functions that describe the desired values on those parts of the
+   * boundary marked with this boundary indicator (see
+   * @ref GlossBoundaryIndicator "Boundary indicator").
+   * The projection happens on only those parts of the boundary whose
+   * indicators are represented in this map.
+   * @param[in] q The face quadrature used in the integration necessary to
+   * compute the mass matrix and right hand side of the projection.
+   * @param[out] boundary_values The result of this function. It is a map
+   * containing all indices of degrees of freedom at the boundary (as covered
+   * by the boundary parts in @p boundary_functions) and the computed dof
+   * value for this degree of freedom. For each degree of freedom at the
+   * boundary, if its index already exists in @p boundary_values then its
+   * boundary value will be overwritten, otherwise a new entry with proper
+   * index and boundary value for this degree of freedom will be inserted into
+   * @p boundary_values.
+   * @param[in] component_mapping It is sometimes convenient to project a
+   * vector-valued function onto only parts of a finite element space (for
+   * example, to project a function with <code>dim</code> components onto the
+   * velocity components of a <code>dim+1</code> component DoFHandler for a
+   * Stokes problem). To allow for this, this argument allows components to be
+   * remapped. If the vector is not empty, it has to have one entry for each
+   * vector component of the finite element used in @p dof. This entry is the
+   * component number in @p boundary_functions that should be used for this
+   * component in @p dof. By default, no remapping is applied.
+   */
+  template <int dim, int spacedim>
+  void project_boundary_values (const Mapping<dim, spacedim>       &mapping,
+                                const DoFHandler<dim,spacedim>    &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_functions,
+                                const Quadrature<dim-1>  &q,
+                                std::map<types::global_dof_index,double> &boundary_values,
+                                std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Calls the project_boundary_values() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim,spacedim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void project_boundary_values (const DoFHandler<dim,spacedim>    &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_function,
+                                const Quadrature<dim-1>  &q,
+                                std::map<types::global_dof_index,double> &boundary_values,
+                                std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Same as above, but for objects of type hp::DoFHandler
+   */
+  template <int dim, int spacedim>
+  void project_boundary_values (const hp::MappingCollection<dim, spacedim>       &mapping,
+                                const hp::DoFHandler<dim,spacedim>    &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_functions,
+                                const hp::QCollection<dim-1>  &q,
+                                std::map<types::global_dof_index,double> &boundary_values,
+                                std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Calls the project_boundary_values() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim,spacedim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void project_boundary_values (const hp::DoFHandler<dim,spacedim>    &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_function,
+                                const hp::QCollection<dim-1>  &q,
+                                std::map<types::global_dof_index,double> &boundary_values,
+                                std::vector<unsigned int> component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Project a function to the boundary of the domain, using the given
+   * quadrature formula for the faces. This function identifies the degrees of
+   * freedom subject to Dirichlet boundary conditions, adds them to the list
+   * of constrained DoFs in @p constraints and sets the respective
+   * inhomogeneity to the value resulting from the projection operation. If
+   * this routine encounters a DoF that already is constrained (for instance
+   * by a hanging node constraint, see below, or any other type of constraint,
+   * e.g. from periodic boundary conditions), the old setting of the
+   * constraint (dofs the entry is constrained to, inhomogeneities) is kept
+   * and nothing happens.
+   *
+   * @note When combining adaptively refined meshes with hanging node
+   * constraints and boundary conditions like from the current function within
+   * one ConstraintMatrix object, the hanging node constraints should always
+   * be set first, and then the boundary conditions since boundary conditions
+   * are not set in the second operation on degrees of freedom that are
+   * already constrained. This makes sure that the discretization remains
+   * conforming as is needed. See the discussion on conflicting constraints in
+   * the module on
+   * @ref constraints.
+   *
+   * If @p component_mapping is empty, it is assumed that the number of
+   * components of @p boundary_function matches that of the finite element
+   * used by @p dof.
+   *
+   * In 1d, projection equals interpolation. Therefore,
+   * interpolate_boundary_values is called.
+   *
+   * @arg @p component_mapping: if the components in @p boundary_functions and
+   * @p dof do not coincide, this vector allows them to be remapped. If the
+   * vector is not empty, it has to have one entry for each component in @p
+   * dof. This entry is the component number in @p boundary_functions that
+   * should be used for this component in @p dof. By default, no remapping is
+   * applied.
+   *
+   * @ingroup constraints
+   */
+  template <int dim, int spacedim>
+  void project_boundary_values (const Mapping<dim, spacedim>   &mapping,
+                                const DoFHandler<dim,spacedim> &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_functions,
+                                const Quadrature<dim-1>        &q,
+                                ConstraintMatrix               &constraints,
+                                std::vector<unsigned int>       component_mapping = std::vector<unsigned int>());
+
+  /**
+   * Calls the project_boundary_values() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim,spacedim@>(1)</tt>.
+   *
+   * @ingroup constraints
+   */
+  template <int dim, int spacedim>
+  void project_boundary_values (const DoFHandler<dim,spacedim> &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_function,
+                                const Quadrature<dim-1>        &q,
+                                ConstraintMatrix               &constraints,
+                                std::vector<unsigned int>       component_mapping = std::vector<unsigned int>());
+
+
+  /**
+   * Compute constraints that correspond to boundary conditions of the form
+   * $\vec{n}\times\vec{u}=\vec{n}\times\vec{f}$, i.e. the tangential
+   * components of $u$ and $f$ shall coincide.
+   *
+   * If the ConstraintMatrix @p constraints contained values or other
+   * constraints before, the new ones are added or the old ones overwritten,
+   * if a node of the boundary part to be used was already in the list of
+   * constraints. This is handled by using inhomogeneous constraints. Please
+   * note that when combining adaptive meshes and this kind of constraints,
+   * the Dirichlet conditions should be set first, and then completed by
+   * hanging node constraints, in order to make sure that the discretization
+   * remains consistent. See the discussion on conflicting constraints in the
+   * module on
+   * @ref constraints.
+   *
+   * This function is explicitly written to use with the FE_Nedelec elements.
+   * Thus it throws an exception, if it is called with other finite elements.
+   *
+   * The second argument of this function denotes the first vector component
+   * in the finite element that corresponds to the vector function that you
+   * want to constrain. For example, if we want to solve Maxwell's equations
+   * in 3d and the finite element has components $(E_x,E_y,E_z,B_x,B_y,B_z)$
+   * and we want the boundary conditions
+   * $\vec{n}\times\vec{B}=\vec{n}\times\vec{f}$, then @p
+   * first_vector_component would be 3. Vectors are implicitly assumed to have
+   * exactly <code>dim</code> components that are ordered in the same way as
+   * we usually order the coordinate directions, i.e. $x$-, $y$-, and finally
+   * $z$-component.
+   *
+   * The parameter @p boundary_component corresponds to the number @p
+   * boundary_id of the face. numbers::internal_face_boundary_id is an illegal
+   * value, since it is reserved for interior faces.
+   *
+   * The last argument is denoted to compute the normal vector $\vec{n}$ at
+   * the boundary points.
+   *
+   * <h4>Computing constraints</h4>
+   *
+   * To compute the constraints we use projection-based interpolation as
+   * proposed in Solin, Segeth and Dolezel (Higher order finite elements,
+   * Chapman&Hall, 2004) on every face located at the boundary.
+   *
+   * First one projects $\vec{f}$ on the lowest-order edge shape functions.
+   * Then the remaining part $(I-P_0)\vec{f}$ of the function is projected on
+   * the remaining higher-order edge shape functions. In the last step we
+   * project $(I-P_0-P_e)\vec{f}$ on the bubble shape functions defined on the
+   * face.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim>
+  void project_boundary_values_curl_conforming (const DoFHandler<dim> &dof_handler,
+                                                const unsigned int first_vector_component,
+                                                const Function<dim,double> &boundary_function,
+                                                const types::boundary_id boundary_component,
+                                                ConstraintMatrix &constraints,
+                                                const Mapping<dim> &mapping = StaticMappingQ1<dim>::mapping);
+
+  /**
+   * Same as above for the hp-namespace.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim>
+  void project_boundary_values_curl_conforming (const hp::DoFHandler<dim> &dof_handler,
+                                                const unsigned int first_vector_component,
+                                                const Function<dim,double> &boundary_function,
+                                                const types::boundary_id boundary_component,
+                                                ConstraintMatrix &constraints,
+                                                const hp::MappingCollection<dim, dim> &mapping_collection = hp::StaticMappingQ1<dim>::mapping_collection);
+
+  /**
+   * This function is an updated version of the
+   * project_boundary_values_curl_conforming function. The intention is to fix
+   * a problem when using the previous function in conjunction with non-
+   * rectangular geometries (i.e. elements with non-rectangular faces). The
+   * L2-projection method used has been taken from the paper "Electromagnetic
+   * scattering simulation using an H (curl) conforming hp finite element
+   * method in three dimensions" by PD Ledger, K Morgan and O Hassan ( Int. J.
+   * Num. Meth. Fluids, Volume 53, Issue 8, pages 1267–1296).
+   *
+   * This function will compute constraints that correspond to Dirichlet
+   * boundary conditions of the form
+   * $\vec{n}\times\vec{E}=\vec{n}\times\vec{F}$ i.e. the tangential
+   * components of $\vec{E}$ and $f$ shall coincide.
+   *
+   * <h4>Computing constraints</h4>
+   *
+   * To compute the constraints we use a projection method based upon the
+   * paper mentioned above. In 2D this is done in a single stage for the edge-
+   * based shape functions, regardless of the order of the finite element. In
+   * 3D this is done in two stages, edges first and then faces.
+   *
+   * For each cell, each edge, $e$, is projected by solving the linear system
+   * $Ax=b$ where $x$ is the vector of contraints on degrees of freedom on the
+   * edge and
+   *
+   * $A_{ij} = \int_{e} (\vec{s}_{i}\cdot\vec{t})(\vec{s}_{j}\cdot\vec{t}) dS$
+   *
+   * $b_{i} = \int_{e} (\vec{s}_{i}\cdot\vec{t})(\vec{F}\cdot\vec{t}) dS$
+   *
+   * with $\vec{s}_{i}$ the $i^{th}$ shape function and $\vec{t}$ the tangent
+   * vector.
+   *
+   * Once all edge constraints, $x$, have been computed, we may compute the
+   * face constraints in a similar fashion, taking into account the residuals
+   * from the edges.
+   *
+   * For each face on the cell, $f$, we solve the linear system $By=c$ where
+   * $y$ is the vector of constraints on degrees of freedom on the face and
+   *
+   * $B_{ij} = \int_{f} (\vec{n} \times \vec{s}_{i}) \cdot (\vec{n} \times
+   * \vec{s}_{j}) dS$
+   *
+   * $c_{i} = \int_{f} (\vec{n} \times \vec{r}) \cdot (\vec{n} \times
+   * \vec{s}_i) dS$
+   *
+   * and $\vec{r} = \vec{F} - \sum_{e \in f} \sum{i \in e} x_{i}\vec{s}_i$,
+   * the edge residual.
+   *
+   * The resulting constraints are then given in the solutions $x$ and $y$.
+   *
+   * If the ConstraintMatrix @p constraints contained values or other
+   * constraints before, the new ones are added or the old ones overwritten,
+   * if a node of the boundary part to be used was already in the list of
+   * constraints. This is handled by using inhomogeneous constraints. Please
+   * note that when combining adaptive meshes and this kind of constraints,
+   * the Dirichlet conditions should be set first, and then completed by
+   * hanging node constraints, in order to make sure that the discretization
+   * remains consistent. See the discussion on conflicting constraints in the
+   * module on
+   * @ref constraints.
+   *
+   * <h4>Arguments to this function></h4>
+   *
+   * This function is explicitly for use with FE_Nedelec elements, or with
+   * FESystem elements which contain FE_Nedelec elements. It will throw an
+   * exception if called with any other finite element. The user must ensure
+   * that FESystem elements are correctly setup when using this function as
+   * this check not possible in this case.
+   *
+   * The second argument of this function denotes the first vector component
+   * of the finite element which corresponds to the vector function that you
+   * wish to constrain. For example, if we are solving Maxwell's equations in
+   * 3D and have components $(E_x,E_y,E_z,B_x,B_y,B_z)$ and we want the
+   * boundary conditions $\vec{n}\times\vec{B}=\vec{n}\times\vec{f}$, then @p
+   * first_vector_component would be 3. The @p boundary_function must return 6
+   * components in this example, with the first 3 corresponding to $\vec{E}$
+   * and the second 3 corresponding to $\vec{B}$. Vectors are implicitly
+   * assumed to have exactly <code>dim</code> components that are ordered in
+   * the same way as we usually order the coordinate directions, i.e. $x$-,
+   * $y$-, and finally $z$-component.
+   *
+   * The parameter @p boundary_component corresponds to the number @p
+   * boundary_id of the face. numbers::internal_face_boundary_id is an illegal
+   * value, since it is reserved for interior faces.
+   *
+   * The last argument is denoted to compute the normal vector $\vec{n}$ at
+   * the boundary points.
+   *
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim>
+  void project_boundary_values_curl_conforming_l2 (const DoFHandler<dim> &dof_handler,
+                                                   const unsigned int first_vector_component,
+                                                   const Function<dim,double> &boundary_function,
+                                                   const types::boundary_id boundary_component,
+                                                   ConstraintMatrix &constraints,
+                                                   const Mapping<dim> &mapping = StaticMappingQ1<dim>::mapping);
+
+
+  /**
+   * hp-namespace version of project_boundary_values_curl_conforming_l2
+   * (above).
+   *
+   * @ingroup constraints
+   */
+  template <int dim>
+  void project_boundary_values_curl_conforming_l2 (const hp::DoFHandler<dim> &dof_handler,
+                                                   const unsigned int first_vector_component,
+                                                   const Function<dim,double> &boundary_function,
+                                                   const types::boundary_id boundary_component,
+                                                   ConstraintMatrix &constraints,
+                                                   const hp::MappingCollection<dim, dim> &mapping_collection = hp::StaticMappingQ1<dim>::mapping_collection);
+
+
+  /**
+   * Compute constraints that correspond to boundary conditions of the form
+   * $\vec{n}^T\vec{u}=\vec{n}^T\vec{f}$, i.e. the normal components of the
+   * solution $u$ and a given $f$ shall coincide. The function $f$ is given by
+   * @p boundary_function and the resulting constraints are added to @p
+   * constraints for faces with boundary indicator @p boundary_component.
+   *
+   * This function is explicitly written to use with the FE_RaviartThomas
+   * elements. Thus it throws an exception, if it is called with other finite
+   * elements.
+   *
+   * If the ConstraintMatrix @p constraints contained values or other
+   * constraints before, the new ones are added or the old ones overwritten,
+   * if a node of the boundary part to be used was already in the list of
+   * constraints. This is handled by using inhomogeneous constraints. Please
+   * note that when combining adaptive meshes and this kind of constraints,
+   * the Dirichlet conditions should be set first, and then completed by
+   * hanging node constraints, in order to make sure that the discretization
+   * remains consistent. See the discussion on conflicting constraints in the
+   * module on
+   * @ref constraints.
+   *
+   * The argument @p first_vector_component denotes the first vector component
+   * in the finite element that corresponds to the vector function $\vec{u}$
+   * that you want to constrain. Vectors are implicitly assumed to have
+   * exactly <code>dim</code> components that are ordered in the same way as
+   * we usually order the coordinate directions, i.e., $x$-, $y$-, and finally
+   * $z$-component.
+   *
+   * The parameter @p boundary_component corresponds to the @p boundary_id of
+   * the faces where the boundary conditions are applied.
+   * numbers::internal_face_boundary_id is an illegal value, since it is
+   * reserved for interior faces. The @p mapping is used to compute the normal
+   * vector $\vec{n}$ at the boundary points.
+   *
+   * <h4>Computing constraints</h4>
+   *
+   * To compute the constraints we use interpolation operator proposed in
+   * Brezzi, Fortin (Mixed and Hybrid (Finite Element Methods, Springer, 1991)
+   * on every face located at the boundary.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template<int dim>
+  void project_boundary_values_div_conforming (const DoFHandler<dim> &dof_handler,
+                                               const unsigned int first_vector_component,
+                                               const Function<dim,double> &boundary_function,
+                                               const types::boundary_id boundary_component,
+                                               ConstraintMatrix &constraints,
+                                               const Mapping<dim> &mapping = StaticMappingQ1<dim>::mapping);
+
+  /**
+   * Same as above for the hp-namespace.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template<int dim>
+  void project_boundary_values_div_conforming (const hp::DoFHandler<dim> &dof_handler,
+                                               const unsigned int first_vector_component,
+                                               const Function<dim,double> &boundary_function,
+                                               const types::boundary_id boundary_component,
+                                               ConstraintMatrix &constraints,
+                                               const hp::MappingCollection<dim, dim> &mapping_collection = hp::StaticMappingQ1<dim>::mapping_collection);
+
+
+  /**
+   * This function computes the constraints that correspond to boundary
+   * conditions of the form $\vec u \cdot \vec n=\vec u_\Gamma \cdot \vec n$,
+   * i.e. normal flux constraints if $\vec u$ is a vector-valued quantity.
+   * These conditions have exactly the form handled by the ConstraintMatrix
+   * class, so instead of creating a map between boundary degrees of freedom
+   * and corresponding value, we here create a list of constraints that are
+   * written into a ConstraintMatrix. This object may already have some
+   * content, for example from hanging node constraints, that remains
+   * untouched. These constraints have to be applied to the linear system like
+   * any other such constraints, i.e. you have to condense the linear system
+   * with the constraints before solving, and you have to distribute the
+   * solution vector afterwards.
+   *
+   * The use of this function is explained in more detail in step-31. It
+   * doesn't make much sense in 1d, so the function throws an exception in
+   * that case.
+   *
+   * The second argument of this function denotes the first vector component
+   * in the finite element that corresponds to the vector function that you
+   * want to constrain. For example, if we were solving a Stokes equation in
+   * 2d and the finite element had components $(u,v,p)$, then @p
+   * first_vector_component would be zero. On the other hand, if we solved the
+   * Maxwell equations in 3d and the finite element has components
+   * $(E_x,E_y,E_z,B_x,B_y,B_z)$ and we want the boundary condition $\vec
+   * B\cdot \vec n=\vec B_\Gamma\cdot \vec n$, then @p first_vector_component
+   * would be 3. Vectors are implicitly assumed to have exactly
+   * <code>dim</code> components that are ordered in the same way as we
+   * usually order the coordinate directions, i.e. $x$-, $y$-, and finally
+   * $z$-component. The function assumes, but can't check, that the vector
+   * components in the range
+   * <code>[first_vector_component,first_vector_component+dim)</code> come
+   * from the same base finite element. For example, in the Stokes example
+   * above, it would not make sense to use a
+   * <code>FESystem@<dim@>(FE_Q@<dim@>(2), 1, FE_Q@<dim@>(1), dim)</code>
+   * (note that the first velocity vector component is a $Q_2$ element,
+   * whereas all the other ones are $Q_1$ elements) as there would be points
+   * on the boundary where the $x$-velocity is defined but no corresponding
+   * $y$- or $z$-velocities.
+   *
+   * The third argument denotes the set of boundary indicators on which the
+   * boundary condition is to be enforced. Note that, as explained below, this
+   * is one of the few functions where it makes a difference where we call the
+   * function multiple times with only one boundary indicator, or whether we
+   * call the function once with the whole set of boundary indicators at once.
+   *
+   * The forth parameter describes the boundary function that is used for
+   * computing these constraints.
+   *
+   * The mapping argument is used to compute the boundary points where the
+   * function needs to request the normal vector $\vec n$ from the boundary
+   * description.
+   *
+   * @note When combining adaptively refined meshes with hanging node
+   * constraints and boundary conditions like from the current function within
+   * one ConstraintMatrix object, the hanging node constraints should always
+   * be set first, and then the boundary conditions since boundary conditions
+   * are not set in the second operation on degrees of freedom that are
+   * already constrained. This makes sure that the discretization remains
+   * conforming as is needed. See the discussion on conflicting constraints in
+   * the module on
+   * @ref constraints.
+   *
+   *
+   * <h4>Computing constraints in 2d</h4>
+   *
+   * Computing these constraints requires some smarts. The main question
+   * revolves around the question what the normal vector is. Consider the
+   * following situation:
+   *
+   * <p ALIGN="center">
+   * @image html no_normal_flux_1.png
+   * </p>
+   *
+   * Here, we have two cells that use a bilinear mapping (i.e.,
+   * MappingQGeneric(1)). Consequently, for each of the cells, the normal
+   * vector is perpendicular to the straight edge. If the two edges at the top
+   * and right are meant to approximate a curved boundary (as indicated by the
+   * dashed line), then neither of the two computed normal vectors are equal
+   * to the exact normal vector (though they approximate it as the mesh is
+   * refined further). What is worse, if we constrain $\vec u \cdot \vec n=
+   * \vec u_\Gamma \cdot \vec n$ at the common vertex with the normal vector
+   * from both cells, then we constrain the vector $\vec u$ with respect to
+   * two linearly independent vectors; consequently, the constraint would be
+   * $\vec u=\vec u_\Gamma$ at this point (i.e. <i>all</i> components of the
+   * vector), which is not what we wanted.
+   *
+   * To deal with this situation, the algorithm works in the following way: at
+   * each point where we want to constrain $\vec u$, we first collect all
+   * normal vectors that adjacent cells might compute at this point. We then
+   * do not constrain $\vec u \cdot \vec n=\vec u_\Gamma \cdot \vec n$ for
+   * <i>each</i> of these normal vectors but only for the <i>average</i> of
+   * the normal vectors. In the example above, we therefore record only a
+   * single constraint $\vec u \cdot \vec {\bar n}=\vec u_\Gamma \cdot \vec
+   * {\bar n}$, where $\vec {\bar n}$ is the average of the two indicated
+   * normal vectors.
+   *
+   * Unfortunately, this is not quite enough. Consider the situation here:
+   *
+   * <p ALIGN="center">
+   * @image html no_normal_flux_2.png
+   * </p>
+   *
+   * If again the top and right edges approximate a curved boundary, and the
+   * left boundary a separate boundary (for example straight) so that the
+   * exact boundary has indeed a corner at the top left vertex, then the above
+   * construction would not work: here, we indeed want the constraint that
+   * $\vec u$ at this point (because the normal velocities with respect to
+   * both the left normal as well as the top normal vector should be zero),
+   * not that the velocity in the direction of the average normal vector is
+   * zero.
+   *
+   * Consequently, we use the following heuristic to determine whether all
+   * normal vectors computed at one point are to be averaged: if two normal
+   * vectors for the same point are computed on <i>different</i> cells, then
+   * they are to be averaged. This covers the first example above. If they are
+   * computed from the same cell, then the fact that they are different is
+   * considered indication that they come from different parts of the boundary
+   * that might be joined by a real corner, and must not be averaged.
+   *
+   * There is one problem with this scheme. If, for example, the same domain
+   * we have considered above, is discretized with the following mesh, then we
+   * get into trouble:
+   *
+   * <p ALIGN="center">
+   * @image html no_normal_flux_3.png
+   * </p>
+   *
+   * Here, the algorithm assumes that the boundary does not have a corner at
+   * the point where faces $F1$ and $F2$ join because at that point there are
+   * two different normal vectors computed from different cells. If you intend
+   * for there to be a corner of the exact boundary at this point, the only
+   * way to deal with this is to assign the two parts of the boundary
+   * different boundary indicators and call this function twice, once for each
+   * boundary indicators; doing so will yield only one normal vector at this
+   * point per invocation (because we consider only one boundary part at a
+   * time), with the result that the normal vectors will not be averaged. This
+   * situation also needs to be taken into account when using this function
+   * around reentrant corners on Cartesian meshes. If normal-flux boundary
+   * conditions are to be enforced on non-Cartesian meshes around reentrant
+   * corners, one may even get cycles in the constraints as one will in
+   * general constrain different components from the two sides. In that case,
+   * set a no-slip constraint on the reentrant vertex first.
+   *
+   *
+   * <h4>Computing constraints in 3d</h4>
+   *
+   * The situation is more complicated in 3d. Consider the following case
+   * where we want to compute the constraints at the marked vertex:
+   *
+   * <p ALIGN="center">
+   * @image html no_normal_flux_4.png
+   * </p>
+   *
+   * Here, we get four different normal vectors, one from each of the four
+   * faces that meet at the vertex. Even though they may form a complete set
+   * of vectors, it is not our intent to constrain all components of the
+   * vector field at this point. Rather, we would like to still allow
+   * tangential flow, where the term "tangential" has to be suitably defined.
+   *
+   * In a case like this, the algorithm proceeds as follows: for each cell
+   * that has computed two tangential vectors at this point, we compute the
+   * unconstrained direction as the outer product of the two tangential
+   * vectors (if necessary multiplied by minus one). We then average these
+   * tangential vectors. Finally, we compute constraints for the two
+   * directions perpendicular to this averaged tangential direction.
+   *
+   * There are cases where one cell contributes two tangential directions and
+   * another one only one; for example, this would happen if both top and
+   * front faces of the left cell belong to the boundary selected whereas only
+   * the top face of the right cell belongs to it, maybe indicating the the
+   * entire front part of the domain is a smooth manifold whereas the top
+   * really forms two separate manifolds that meet in a ridge, and that
+   * normal-flux boundary conditions are only desired on the front manifold
+   * and the right one on top. In cases like these, it's difficult to define
+   * what should happen. The current implementation simply ignores the one
+   * contribution from the cell that only contributes one normal vector. In
+   * the example shown, this is acceptable because the normal vector for the
+   * front face of the left cell is the same as the normal vector provided by
+   * the front face of the right cell (the surface is planar) but it would be
+   * a problem if the front manifold would be curved. Regardless, it is
+   * unclear how one would proceed in this case and ignoring the single cell
+   * is likely the best one can do.
+   *
+   *
+   * <h4>Results</h4>
+   *
+   * Because it makes for good pictures, here are two images of vector fields
+   * on a circle and on a sphere to which the constraints computed by this
+   * function have been applied:
+   *
+   * <p ALIGN="center">
+   * @image html no_normal_flux_5.png
+   * @image html no_normal_flux_6.png
+   * </p>
+   *
+   * The vectors fields are not physically reasonable but the tangentiality
+   * constraint is clearly enforced. The fact that the vector fields are zero
+   * at some points on the boundary is an artifact of the way it is created,
+   * it is not constrained to be zero at these points.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_nonzero_normal_flux_constraints
+  (const DoFHandlerType<dim,spacedim>   &dof_handler,
+   const unsigned int                    first_vector_component,
+   const std::set<types::boundary_id>   &boundary_ids,
+   typename FunctionMap<spacedim>::type &function_map,
+   ConstraintMatrix                     &constraints,
+   const Mapping<dim, spacedim>         &mapping = StaticMappingQ1<dim>::mapping);
+
+  /**
+   * Same as above for homogeneous normal-flux constraints.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_no_normal_flux_constraints
+  (const DoFHandlerType<dim,spacedim> &dof_handler,
+   const unsigned int                  first_vector_component,
+   const std::set<types::boundary_id> &boundary_ids,
+   ConstraintMatrix                   &constraints,
+   const Mapping<dim, spacedim>       &mapping = StaticMappingQ1<dim>::mapping);
+
+  /**
+   * Compute the constraints that correspond to boundary conditions of the
+   * form $\vec u \times \vec n=\vec u_\Gamma \times \vec n$, i.e. tangential
+   * flow constraints if $\vec u$ is a vector-valued quantity. This function
+   * constrains exactly those vector-valued components that are left
+   * unconstrained by compute_no_normal_flux_constraints, and leaves the one
+   * component unconstrained that is constrained by
+   * compute_no_normal_flux_constraints.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_nonzero_tangential_flux_constraints
+  (const DoFHandlerType<dim,spacedim>   &dof_handler,
+   const unsigned int                    first_vector_component,
+   const std::set<types::boundary_id>   &boundary_ids,
+   typename FunctionMap<spacedim>::type &function_map,
+   ConstraintMatrix                     &constraints,
+   const Mapping<dim, spacedim>         &mapping = StaticMappingQ1<dim>::mapping);
+
+  /**
+   * Same as above for homogeneous tangential-flux constraints.
+   *
+   * @ingroup constraints
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_normal_flux_constraints
+  (const DoFHandlerType<dim,spacedim> &dof_handler,
+   const unsigned int                  first_vector_component,
+   const std::set<types::boundary_id> &boundary_ids,
+   ConstraintMatrix                   &constraints,
+   const Mapping<dim, spacedim>       &mapping = StaticMappingQ1<dim>::mapping);
+
+
+  //@}
+  /**
+   * @name Assembling of right hand sides
+   */
+  //@{
+
+  /**
+   * Create a right hand side vector. Prior content of the given @p rhs_vector
+   * vector is deleted.
+   *
+   * See the general documentation of this namespace for further information.
+   */
+  template <int dim, int spacedim>
+  void create_right_hand_side (const Mapping<dim, spacedim>    &mapping,
+                               const DoFHandler<dim,spacedim> &dof,
+                               const Quadrature<dim> &q,
+                               const Function<spacedim,double>   &rhs,
+                               Vector<double>        &rhs_vector);
+
+  /**
+   * Calls the create_right_hand_side() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void create_right_hand_side (const DoFHandler<dim,spacedim> &dof,
+                               const Quadrature<dim> &q,
+                               const Function<spacedim,double>   &rhs,
+                               Vector<double>        &rhs_vector);
+
+  /**
+   * Like the previous set of functions, but for hp objects.
+   */
+  template <int dim, int spacedim>
+  void create_right_hand_side (const hp::MappingCollection<dim,spacedim>    &mapping,
+                               const hp::DoFHandler<dim,spacedim> &dof,
+                               const hp::QCollection<dim> &q,
+                               const Function<spacedim,double>   &rhs,
+                               Vector<double>        &rhs_vector);
+
+  /**
+   * Like the previous set of functions, but for hp objects.
+   */
+  template <int dim, int spacedim>
+  void create_right_hand_side (const hp::DoFHandler<dim,spacedim> &dof,
+                               const hp::QCollection<dim> &q,
+                               const Function<spacedim,double>   &rhs,
+                               Vector<double>        &rhs_vector);
+
+  /**
+   * Create a right hand side vector for a point source at point @p p. In
+   * other words, it creates a vector $F$ so that $F_i = \int_\Omega
+   * \delta(x-p) \phi_i(x) dx$. Prior content of the given @p rhs_vector
+   * vector is deleted.
+   *
+   * See the general documentation of this namespace for further information.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const Mapping<dim,spacedim>    &mapping,
+                                  const DoFHandler<dim,spacedim> &dof,
+                                  const Point<spacedim>      &p,
+                                  Vector<double>        &rhs_vector);
+
+  /**
+   * Calls the create_point_source_vector() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const DoFHandler<dim,spacedim> &dof,
+                                  const Point<spacedim>      &p,
+                                  Vector<double>        &rhs_vector);
+
+  /**
+   * Like the previous set of functions, but for hp objects.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const hp::MappingCollection<dim,spacedim>    &mapping,
+                                  const hp::DoFHandler<dim,spacedim> &dof,
+                                  const Point<spacedim>      &p,
+                                  Vector<double>        &rhs_vector);
+
+  /**
+   * Like the previous set of functions, but for hp objects. The function uses
+   * the default Q1 mapping object. Note that if your hp::DoFHandler uses any
+   * active fe index other than zero, then you need to call the function above
+   * that provides a mapping object for each active fe index.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const hp::DoFHandler<dim,spacedim> &dof,
+                                  const Point<spacedim>      &p,
+                                  Vector<double>        &rhs_vector);
+
+  /**
+   * Create a right hand side vector for a point source at point @p p. This
+   * variation of the function is meant for vector-valued problems with
+   * exactly dim components (it will also work for problems with more than dim
+   * components, and in this case simply consider only the first dim
+   * components of the shape functions). It computes a right hand side that
+   * corresponds to a forcing function that is equal to a delta function times
+   * a given direction. In other words, it creates a vector $F$ so that $F_i =
+   * \int_\Omega [\mathbf d \delta(x-p)] \cdot \phi_i(x) dx$. Note here that
+   * $\phi_i$ is a vector-valued function. $\mathbf d$ is the given direction
+   * of the source term $\mathbf d \delta(x-p)$ and corresponds to the @p
+   * direction argument to be passed to this function.
+   *
+   * Prior content of the given @p rhs_vector vector is deleted.
+   *
+   * See the general documentation of this namespace for further information.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const Mapping<dim,spacedim>    &mapping,
+                                  const DoFHandler<dim,spacedim> &dof,
+                                  const Point<spacedim>          &p,
+                                  const Point<dim>               &direction,
+                                  Vector<double>                 &rhs_vector);
+
+  /**
+   * Calls the create_point_source_vector() function for vector-valued finite
+   * elements, see above, with <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const DoFHandler<dim,spacedim> &dof,
+                                  const Point<spacedim>          &p,
+                                  const Point<dim>               &direction,
+                                  Vector<double>                 &rhs_vector);
+
+  /**
+   * Like the previous set of functions, but for hp objects.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const hp::MappingCollection<dim,spacedim> &mapping,
+                                  const hp::DoFHandler<dim,spacedim>        &dof,
+                                  const Point<spacedim>                     &p,
+                                  const Point<dim>                          &direction,
+                                  Vector<double>                            &rhs_vector);
+
+  /**
+   * Like the previous set of functions, but for hp objects. The function uses
+   * the default Q1 mapping object. Note that if your hp::DoFHandler uses any
+   * active fe index other than zero, then you need to call the function above
+   * that provides a mapping object for each active fe index.
+   */
+  template <int dim, int spacedim>
+  void create_point_source_vector(const hp::DoFHandler<dim,spacedim> &dof,
+                                  const Point<spacedim>              &p,
+                                  const Point<dim>                   &direction,
+                                  Vector<double>                     &rhs_vector);
+
+  /**
+   * Create a right hand side vector from boundary forces. Prior content of
+   * the given @p rhs_vector vector is deleted.
+   *
+   * See the general documentation of this namespace for further information.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, int spacedim>
+  void create_boundary_right_hand_side (const Mapping<dim,spacedim>      &mapping,
+                                        const DoFHandler<dim,spacedim>   &dof,
+                                        const Quadrature<dim-1> &q,
+                                        const Function<spacedim,double>     &rhs,
+                                        Vector<double>          &rhs_vector,
+                                        const std::set<types::boundary_id> &boundary_ids = std::set<types::boundary_id>());
+
+  /**
+   * Calls the create_boundary_right_hand_side() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, int spacedim>
+  void create_boundary_right_hand_side (const DoFHandler<dim,spacedim>   &dof,
+                                        const Quadrature<dim-1> &q,
+                                        const Function<spacedim,double>     &rhs,
+                                        Vector<double>          &rhs_vector,
+                                        const std::set<types::boundary_id> &boundary_ids = std::set<types::boundary_id>());
+
+  /**
+   * Same as the set of functions above, but for hp objects.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, int spacedim>
+  void create_boundary_right_hand_side (const hp::MappingCollection<dim,spacedim>      &mapping,
+                                        const hp::DoFHandler<dim,spacedim>   &dof,
+                                        const hp::QCollection<dim-1> &q,
+                                        const Function<spacedim,double>     &rhs,
+                                        Vector<double>          &rhs_vector,
+                                        const std::set<types::boundary_id> &boundary_ids = std::set<types::boundary_id>());
+
+  /**
+   * Calls the create_boundary_right_hand_side() function, see above, with a
+   * single Q1 mapping as collection. This function therefore will only work
+   * if the only active fe index in use is zero.
+   *
+   * @see
+   * @ref GlossBoundaryIndicator "Glossary entry on boundary indicators"
+   */
+  template <int dim, int spacedim>
+  void create_boundary_right_hand_side (const hp::DoFHandler<dim,spacedim>   &dof,
+                                        const hp::QCollection<dim-1> &q,
+                                        const Function<spacedim,double>     &rhs,
+                                        Vector<double>          &rhs_vector,
+                                        const std::set<types::boundary_id> &boundary_ids = std::set<types::boundary_id>());
+
+  //@}
+  /**
+   * @name Evaluation of functions and errors
+   */
+  //@{
+
+  /**
+   * Compute the error of the finite element solution.  Integrate the
+   * difference between a reference function which is given as a continuous
+   * function object, and a finite element function. The result of this
+   * function is the vector @p difference that contains one value per active
+   * cell $K$ of the triangulation. Each of the values of this vector $d$
+   * equals
+   * @f{align*}{
+   * d_K = \| u-u_h \|_X
+   * @f}
+   * where $X$ denotes the norm chosen and $u$ represents the exact solution.
+   *
+   * It is assumed that the number of components of the function @p
+   * exact_solution matches that of the finite element used by @p dof.
+   *
+   * @param[in] mapping The mapping that is used when integrating the
+   * difference $u-u_h$.
+   * @param[in] dof The DoFHandler object that describes the finite element
+   * space in which the solution vector lives.
+   * @param[in] fe_function A vector with nodal values representing the
+   * numerical approximation $u_h$. This vector needs to correspond to the
+   * finite element space represented by @p dof.
+   * @param[in] exact_solution The exact solution that is used to compute the
+   * error.
+   * @param[out] difference The vector of values $d_K$ computed as above.
+   * @param[in] q The quadrature formula used to approximate the integral
+   * shown above. Note that some quadrature formulas are more useful than
+   * other in integrating $u-u_h$. For example, it is known that the $Q_1$
+   * approximation $u_h$ to the exact solution $u$ of a Laplace equation is
+   * particularly accurate (in fact, superconvergent, i.e. accurate to higher
+   * order) at the 4 Gauss points of a cell in 2d (or 8 points in 3d) that
+   * correspond to a QGauss(2) object. Consequently, because a QGauss(2)
+   * formula only evaluates the two solutions at these particular points,
+   * choosing this quadrature formula may indicate an error far smaller than
+   * it actually is.
+   * @param[in] norm The norm $X$ shown above that should be computed. If the
+   * norm is NormType::Hdiv_seminorm, then the finite element on which this
+   * function is called needs to have at least dim vector components, and the
+   * divergence will be computed on the first div components. This works, for
+   * example, on the finite elements used for the mixed Laplace (step-20) and
+   * the Stokes equations (step-22).
+   * @param[in] weight The additional argument @p weight allows to evaluate
+   * weighted norms.  The weight function may be scalar, establishing a
+   * spatially variable weight in the domain for all components equally. This
+   * may be used, for instance, to only integrate over parts of the domain.
+   * The weight function may also be vector-valued, with as many components as
+   * the finite element: Then, different components get different weights. A
+   * typical application is when the error with respect to only one or a
+   * subset of the solution variables is to be computed, in which case the
+   * other components would have weight values equal to zero. The
+   * ComponentSelectFunction class is particularly useful for this purpose as
+   * it provides such a "mask" weight. The weight function is expected to be
+   * positive, but negative values are not filtered. The default value of this
+   * function, a null pointer, is interpreted as "no weighting function",
+   * i.e., weight=1 in the whole domain for all vector components uniformly.
+   * @param[in] exponent This value denotes the $p$ used in computing
+   * $L^p$-norms and $W^{1,p}$-norms. The value is ignores if a @p norm other
+   * than NormType::Lp_norm or NormType::W1p_norm is chosen.
+   *
+   *
+   * See the general documentation of this namespace for more information.
+   *
+   * @note If the integration here happens over the cells of a
+   * parallel::distribute::Triangulation object, then this function computes
+   * the vector elements $d_K$ for an output vector with as many cells as
+   * there are active cells of the triangulation object of the current
+   * processor. However, not all active cells are in fact locally owned: some
+   * may be ghost or artificial cells (see
+   * @ref GlossGhostCell "here"
+   * and
+   * @ref GlossArtificialCell "here").
+   * The vector computed will, in the case of a distributed triangulation,
+   * contain zeros for cells that are not locally owned. As a consequence, in
+   * order to compute the <i>global</i> $L_2$ error (for example), the errors
+   * from different processors need to be combined, but this is simple because
+   * every processor only computes contributions for those cells of the global
+   * triangulation it locally owns (and these sets are, by definition,
+   * mutually disjoint). Consequently, the following piece of code computes
+   * the global $L_2$ error across multiple processors sharing a
+   * parallel::distribute::Triangulation:
+   * @code
+   *    Vector<double> local_errors (tria.n_active_cells());
+   *    VectorTools::integrate_difference (mapping, dof,
+   *                                       solution, exact_solution,
+   *                                       local_errors,
+   *                                       QGauss<dim>(fe.degree+2),
+   *                                       VectorTools::L2_norm);
+   *    const double total_local_error = local_errors.l2_norm();
+   *    const double total_global_error
+   *      = std::sqrt (Utilities::MPI::sum (total_local_error * total_local_error, MPI_COMM_WORLD));
+   * @endcode
+   * The squaring and taking the square root is necessary in order to compute
+   * the sum of squares of norms over all all cells in the definition of the
+   * $L_2$ norm:
+   * @f{align*}{
+   * \textrm{error} = \sqrt{\sum_K \|u-u_h\|_{L_2(K)}^2}
+   * @f}
+   * Obviously, if you are interested in computing the $L_1$ norm of the
+   * error, the correct form of the last two lines would have been
+   * @code
+   *    const double total_local_error = local_errors.l1_norm();
+   *    const double total_global_error
+   *      = Utilities::MPI::sum (total_local_error, MPI_COMM_WORLD);
+   * @endcode
+   * instead, and similar considerations hold when computing the $L_\infty$
+   * norm of the error.
+   *
+   * Instantiations for this template are provided for some vector types (see
+   * the general documentation of the namespace), but only for InVectors as in
+   * the documentation of the namespace, OutVector only Vector<double> and
+   * Vector<float>.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void integrate_difference (const Mapping<dim,spacedim>    &mapping,
+                             const DoFHandler<dim,spacedim> &dof,
+                             const InVector                 &fe_function,
+                             const Function<spacedim,double>       &exact_solution,
+                             OutVector                      &difference,
+                             const Quadrature<dim>          &q,
+                             const NormType                 &norm,
+                             const Function<spacedim,double>       *weight = 0,
+                             const double exponent = 2.);
+
+  /**
+   * Calls the integrate_difference() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void integrate_difference (const DoFHandler<dim,spacedim> &dof,
+                             const InVector                 &fe_function,
+                             const Function<spacedim,double>       &exact_solution,
+                             OutVector                      &difference,
+                             const Quadrature<dim>          &q,
+                             const NormType                 &norm,
+                             const Function<spacedim,double>       *weight = 0,
+                             const double exponent = 2.);
+
+  /**
+   * Same as above for hp.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void integrate_difference (const hp::MappingCollection<dim,spacedim> &mapping,
+                             const hp::DoFHandler<dim,spacedim>        &dof,
+                             const InVector                            &fe_function,
+                             const Function<spacedim,double>                  &exact_solution,
+                             OutVector                                 &difference,
+                             const hp::QCollection<dim>                &q,
+                             const NormType                            &norm,
+                             const Function<spacedim,double>                  *weight = 0,
+                             const double exponent = 2.);
+
+  /**
+   * Calls the integrate_difference() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void integrate_difference (const hp::DoFHandler<dim,spacedim> &dof,
+                             const InVector                     &fe_function,
+                             const Function<spacedim,double>           &exact_solution,
+                             OutVector                          &difference,
+                             const hp::QCollection<dim>         &q,
+                             const NormType                     &norm,
+                             const Function<spacedim,double>           *weight = 0,
+                             const double exponent = 2.);
+
+  /**
+   * Point error evaluation. Find the first cell containing the given point
+   * and compute the difference of a (possibly vector-valued) finite element
+   * function and a continuous function (with as many vector components as the
+   * finite element) at this point.
+   *
+   * This is a wrapper function using a Q1-mapping for cell boundaries to call
+   * the other point_difference() function.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void point_difference (const DoFHandler<dim,spacedim>  &dof,
+                         const VectorType                &fe_function,
+                         const Function<spacedim,double> &exact_solution,
+                         Vector<double>                  &difference,
+                         const Point<spacedim>           &point);
+
+  /**
+   * Point error evaluation. Find the first cell containing the given point
+   * and compute the difference of a (possibly vector-valued) finite element
+   * function and a continuous function (with as many vector components as the
+   * finite element) at this point.
+   *
+   * Compared with the other function of the same name, this function uses an
+   * arbitrary mapping to evaluate the difference.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void point_difference (const Mapping<dim, spacedim>    &mapping,
+                         const DoFHandler<dim,spacedim>  &dof,
+                         const VectorType                &fe_function,
+                         const Function<spacedim,double> &exact_solution,
+                         Vector<double>                  &difference,
+                         const Point<spacedim>           &point);
+
+  /**
+   * Evaluate a possibly vector-valued finite element function defined by the
+   * given DoFHandler and nodal vector at the given point, and return the
+   * (vector) value of this function through the last argument.
+   *
+   * This is a wrapper function using a Q1-mapping for cell boundaries to call
+   * the other point_difference() function.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point,
+               Vector<double>                 &value);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const hp::DoFHandler<dim,spacedim> &dof,
+               const VectorType                   &fe_function,
+               const Point<spacedim>              &point,
+               Vector<double>                     &value);
+
+  /**
+   * Evaluate a scalar finite element function defined by the given DoFHandler
+   * and nodal vector at the given point, and return the value of this
+   * function.
+   *
+   * Compared with the other function of the same name, this is a wrapper
+   * function using a Q1-mapping for cells.
+   *
+   * This function is used in the "Possibilities for extensions" part of the
+   * results section of
+   * @ref step_3 "step-3".
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const hp::DoFHandler<dim,spacedim> &dof,
+               const VectorType                   &fe_function,
+               const Point<spacedim>              &point);
+
+  /**
+   * Evaluate a possibly vector-valued finite element function defined by the
+   * given DoFHandler and nodal vector at the given point, and return the
+   * (vector) value of this function through the last argument.
+   *
+   * Compared with the other function of the same name, this function uses an
+   * arbitrary mapping to evaluate the difference.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const Mapping<dim, spacedim>   &mapping,
+               const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point,
+               Vector<double>                 &value);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const hp::MappingCollection<dim, spacedim> &mapping,
+               const hp::DoFHandler<dim,spacedim>         &dof,
+               const VectorType                           &fe_function,
+               const Point<spacedim>                      &point,
+               Vector<double>                             &value);
+
+  /**
+   * Evaluate a scalar finite element function defined by the given DoFHandler
+   * and nodal vector at the given point, and return the value of this
+   * function.
+   *
+   * Compared with the other function of the same name, this function uses an
+   * arbitrary mapping to evaluate the difference.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const Mapping<dim,spacedim>    &mapping,
+               const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const hp::MappingCollection<dim,spacedim> &mapping,
+               const hp::DoFHandler<dim,spacedim>        &dof,
+               const VectorType                          &fe_function,
+               const Point<spacedim>                     &point);
+
+  /**
+   * Evaluate a possibly vector-valued finite element function defined by the
+   * given DoFHandler and nodal vector at the given point, and return the
+   * (vector) gradient of this function through the last argument.
+   *
+   * This is a wrapper function using a Q1-mapping for cell boundaries to call
+   * the other point_gradient() function.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const DoFHandler<dim,spacedim>    &dof,
+                  const VectorType                  &fe_function,
+                  const Point<spacedim>             &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &value);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const hp::DoFHandler<dim,spacedim> &dof,
+                  const VectorType                   &fe_function,
+                  const Point<spacedim>              &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &value);
+
+  /**
+   * Evaluate a scalar finite element function defined by the given DoFHandler
+   * and nodal vector at the given point, and return the gradient of this
+   * function.
+   *
+   * Compared with the other function of the same name, this is a wrapper
+   * function using a Q1-mapping for cells.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const DoFHandler<dim,spacedim> &dof,
+                  const VectorType               &fe_function,
+                  const Point<spacedim>          &point);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const hp::DoFHandler<dim,spacedim> &dof,
+                  const VectorType                   &fe_function,
+                  const Point<spacedim>              &point);
+
+  /**
+   * Evaluate a possibly vector-valued finite element function defined by the
+   * given DoFHandler and nodal vector at the given point, and return the
+   * gradients of this function through the last argument.
+   *
+   * Compared with the other function of the same name, this function uses an
+   * arbitrary mapping for evaluation.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const Mapping<dim, spacedim>      &mapping,
+                  const DoFHandler<dim,spacedim>    &dof,
+                  const VectorType                  &fe_function,
+                  const Point<spacedim>             &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &value);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const hp::MappingCollection<dim, spacedim> &mapping,
+                  const hp::DoFHandler<dim,spacedim>         &dof,
+                  const VectorType                           &fe_function,
+                  const Point<spacedim>                      &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &value);
+
+  /**
+   * Evaluate a scalar finite element function defined by the given DoFHandler
+   * and nodal vector at the given point, and return the gradient of this
+   * function.
+   *
+   * Compared with the other function of the same name, this function uses an
+   * arbitrary mapping for evaluation.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const Mapping<dim,spacedim>    &mapping,
+                  const DoFHandler<dim,spacedim> &dof,
+                  const VectorType               &fe_function,
+                  const Point<spacedim>          &point);
+
+  /**
+   * Same as above for hp.
+   *
+   * @note If the cell in which the point is found is not locally owned, an
+   * exception of type VectorTools::ExcPointNotAvailableHere is thrown.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const hp::MappingCollection<dim,spacedim> &mapping,
+                  const hp::DoFHandler<dim,spacedim>        &dof,
+                  const VectorType                          &fe_function,
+                  const Point<spacedim>                     &point);
+
+  //@}
+  /**
+   * Mean value operations
+   */
+  //@{
+
+  /**
+   * Subtract the (algebraic) mean value from a vector.
+   *
+   * This function is most frequently used as a mean-value filter for Stokes:
+   * The pressure in Stokes' equations with only Dirichlet boundaries for the
+   * velocities is only determined up to a constant. This function allows to
+   * subtract the mean value of the pressure. It is usually called in a
+   * preconditioner and generates updates with mean value zero. The mean value
+   * is computed as the mean value of the degrees of freedom values as given
+   * by the input vector; they are not weighted by the area of cells, i.e. the
+   * mean is computed as $\sum_i v_i$, rather than as $\int_\Omega v(x) =
+   * \int_\Omega \sum_i v_i \phi_i(x)$. The latter can be obtained from the
+   * VectorTools::compute_mean_function, however.
+   *
+   * Apart from the vector @p v to operate on, this function takes a boolean
+   * mask @p p_select that has a true entry for every element of the vector
+   * for which the mean value shall be computed and later subtracted. The
+   * argument is used to denote which components of the solution vector
+   * correspond to the pressure, and avoid touching all other components of
+   * the vector, such as the velocity components. (Note, however, that the
+   * mask is not a
+   * @ref GlossComponentMask
+   * operating on the vector components of the finite element the solution
+   * vector @p v may be associated with; rather, it is a mask on the entire
+   * vector, without reference to what the vector elements mean.)
+   *
+   * The boolean mask @p p_select has an empty vector as default value, which
+   * will be interpreted as selecting all vector elements, hence, subtracting
+   * the algebraic mean value on the whole vector. This allows to call this
+   * function without a boolean mask if the whole vector should be processed.
+   *
+   * @note In the context of using this function to filter out the kernel of
+   * an operator (such as the null space of the Stokes operator that consists
+   * of the constant pressures), this function only makes sense for finite
+   * elements for which the null space indeed consists of the vector
+   * $(1,1,\ldots,1)^T$. This is the case for example for the usual Lagrange
+   * elements where the sum of all shape functions equals the function that is
+   * constant one. However, it is not true for some other functions: for
+   * example, for the FE_DGP element (another valid choice for the pressure in
+   * Stokes discretizations), the first shape function on each cell is
+   * constant while further elements are $L_2$ orthogonal to it (on the
+   * reference cell); consequently, the sum of all shape functions is not
+   * equal to one, and the vector that is associated with the constant mode is
+   * not equal to $(1,1,\ldots,1)^T$. For such elements, a different procedure
+   * has to be used when subtracting the mean value.
+   */
+  template <typename VectorType>
+  void subtract_mean_value(VectorType              &v,
+                           const std::vector<bool> &p_select = std::vector<bool>());
+
+
+  /**
+   * Compute the mean value of one component of the solution.
+   *
+   * This function integrates the chosen component over the whole domain and
+   * returns the result, i.e. it computes $\frac{1}{|\Omega|}\int_\Omega
+   * [u_h(x)]_c \; dx$ where $c$ is the vector component and $u_h$ is the
+   * function representation of the nodal vector given as fourth argument. The
+   * integral is evaluated numerically using the quadrature formula given as
+   * third argument.
+   *
+   * This function is used in the "Possibilities for extensions" part of the
+   * results section of
+   * @ref step_3 "step-3".
+   *
+   * @note The function is most often used when solving a problem whose
+   * solution is only defined up to a constant, for example a pure Neumann
+   * problem or the pressure in a Stokes or Navier-Stokes problem. In both
+   * cases, subtracting the mean value as computed by the current function,
+   * from the nodal vector does not generally yield the desired result of a
+   * finite element function with mean value zero. In fact, it only works for
+   * Lagrangian elements. For all other elements, you will need to compute the
+   * mean value and subtract it right inside the evaluation routine.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  double compute_mean_value (const Mapping<dim, spacedim>   &mapping,
+                             const DoFHandler<dim,spacedim> &dof,
+                             const Quadrature<dim>          &quadrature,
+                             const VectorType               &v,
+                             const unsigned int             component);
+
+  /**
+   * Calls the other compute_mean_value() function, see above, with
+   * <tt>mapping=MappingQGeneric@<dim@>(1)</tt>.
+   */
+  template <int dim, typename VectorType, int spacedim>
+  double compute_mean_value (const DoFHandler<dim,spacedim> &dof,
+                             const Quadrature<dim>          &quadrature,
+                             const VectorType               &v,
+                             const unsigned int             component);
+  //@}
+  /**
+   * Geometrical interpolation
+   */
+  //@{
+  /**
+   * Given a DoFHandler containing at least a spacedim vector field, this
+   * function interpolates the Triangulation at the support points of a FE_Q()
+   * finite element of the same degree as the degree of the required
+   * components.
+   *
+   * Curved manifold are respected, and the resulting VectorType will be
+   * geometrically consistent. The resulting map is guaranteed to be
+   * interpolatory at the support points of a FE_Q() finite element of the
+   * same degree as the degree of the required components.
+   *
+   * If the underlying finite element is an FE_Q(1)^spacedim, then the
+   * resulting @p VectorType is a finite element field representation of the
+   * vertices of the Triangulation.
+   *
+   * The optional ComponentMask argument can be used to specify what
+   * components of the FiniteElement to use to describe the geometry. If no
+   * mask is specified at construction time, then a default one is used, i.e.,
+   * the first spacedim components of the FiniteElement are assumed to
+   * represent the geometry of the problem.
+   *
+   * This function is only implemented for FiniteElements where the specified
+   * components are primitive.
+   *
+   * @author Luca Heltai, 2015
+   */
+  template<typename DoFHandlerType, typename VectorType>
+  void get_position_vector(const DoFHandlerType &dh,
+                           VectorType           &vector,
+                           const ComponentMask  &mask = ComponentMask());
+
+  //@}
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcNonInterpolatingFE,
+                    "You are attempting an operation that requires the "
+                    "finite element involved to be 'interpolating', i.e., "
+                    "it needs to have support points. The finite element "
+                    "you are using here does not appear to have those.");
+
+  /**
+   * Exception
+   */
+  DeclExceptionMsg (ExcPointNotAvailableHere,
+                    "The given point is inside a cell of a "
+                    "parallel::distributed::Triangulation that is not "
+                    "locally owned by this processor.");
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/numerics/vector_tools.templates.h b/include/deal.II/numerics/vector_tools.templates.h
new file mode 100644
index 0000000..e6889a3
--- /dev/null
+++ b/include/deal.II/numerics/vector_tools.templates.h
@@ -0,0 +1,7276 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__vector_tools_templates_h
+#define dealii__vector_tools_templates_h
+
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/filtered_matrix.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/intergrid_map.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_nedelec.h>
+#include <deal.II/fe/fe_raviart_thomas.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+#include <deal.II/base/std_cxx11/array.h>
+#include <numeric>
+#include <algorithm>
+#include <vector>
+#include <cmath>
+#include <limits>
+#include <set>
+#include <list>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace VectorTools
+{
+
+  template <typename VectorType, int dim, int spacedim,
+            template <int, int> class DoFHandlerType>
+  void interpolate (const Mapping<dim,spacedim>        &mapping,
+                    const DoFHandlerType<dim,spacedim> &dof,
+                    const Function<spacedim>           &function,
+                    VectorType                         &vec)
+  {
+    Assert (vec.size() == dof.n_dofs(),
+            ExcDimensionMismatch (vec.size(), dof.n_dofs()));
+    Assert (dof.get_fe().n_components() == function.n_components,
+            ExcDimensionMismatch(dof.get_fe().n_components(),
+                                 function.n_components));
+
+    const hp::FECollection<dim,spacedim> fe (dof.get_fe());
+    const unsigned int          n_components = fe.n_components();
+    const bool                  fe_is_system = (n_components != 1);
+
+    typename DoFHandlerType<dim,spacedim>::active_cell_iterator
+    cell = dof.begin_active(),
+    endc = dof.end();
+
+    // For FESystems many of the
+    // unit_support_points will appear
+    // multiple times, as a point may be
+    // unit_support_point for several of the
+    // components of the system.  The following
+    // is rather complicated, but at least
+    // attempts to avoid evaluating the
+    // vectorfunction multiple times at the
+    // same point on a cell.
+    //
+    // note that we have to set up all of the
+    // following arrays for each of the
+    // elements in the FECollection (which
+    // means only once if this is for a regular
+    // DoFHandler)
+    std::vector<std::vector<Point<dim> > > unit_support_points (fe.size());
+    for (unsigned int fe_index=0; fe_index<fe.size(); ++fe_index)
+      {
+        unit_support_points[fe_index] = fe[fe_index].get_unit_support_points();
+        Assert ((unit_support_points[fe_index].size() != 0)||(fe[fe_index].dofs_per_cell == 0),
+                ExcNonInterpolatingFE());
+      }
+
+
+    // Find the support points on a cell that
+    // are mentioned multiple times in
+    // unit_support_points.  Mark the first
+    // representative of each support point
+    // mentioned multiple times by appending
+    // its dof index to dofs_of_rep_points.
+    // Each multiple point gets to know the dof
+    // index of its representative point by the
+    // dof_to_rep_dof_table.
+
+    // the following vector collects all dofs i,
+    // 0<=i<fe.dofs_per_cell, for that
+    // unit_support_points[i]
+    // is a representative one. i.e.
+    // the following vector collects all rep dofs.
+    // the position of a rep dof within this vector
+    // is called rep index.
+    std::vector<std::vector<types::global_dof_index> > dofs_of_rep_points(fe.size());
+    // the following table converts a dof i
+    // to the rep index.
+    std::vector<std::vector<types::global_dof_index> > dof_to_rep_index_table(fe.size());
+
+    std::vector<unsigned int> n_rep_points (fe.size(), 0);
+
+    for (unsigned int fe_index=0; fe_index<fe.size(); ++fe_index)
+      {
+        for (unsigned int i=0; i<fe[fe_index].dofs_per_cell; ++i)
+          {
+            bool representative=true;
+            // the following loop is looped
+            // the other way round to get
+            // the minimal effort of
+            // O(fe.dofs_per_cell) for multiple
+            // support points that are placed
+            // one after the other.
+            for (unsigned int j=dofs_of_rep_points[fe_index].size(); j>0; --j)
+              if (unit_support_points[fe_index][i]
+                  == unit_support_points[fe_index][dofs_of_rep_points[fe_index][j-1]])
+                {
+                  dof_to_rep_index_table[fe_index].push_back(j-1);
+                  representative=false;
+                  break;
+                }
+
+            if (representative)
+              {
+                dof_to_rep_index_table[fe_index].push_back(dofs_of_rep_points[fe_index].size());
+                dofs_of_rep_points[fe_index].push_back(i);
+                ++n_rep_points[fe_index];
+              }
+          }
+
+        Assert(dofs_of_rep_points[fe_index].size()==n_rep_points[fe_index],
+               ExcInternalError());
+        Assert(dof_to_rep_index_table[fe_index].size()==fe[fe_index].dofs_per_cell,
+               ExcInternalError());
+      }
+
+    const unsigned int max_rep_points = *std::max_element (n_rep_points.begin(),
+                                                           n_rep_points.end());
+    std::vector<types::global_dof_index> dofs_on_cell (fe.max_dofs_per_cell());
+    std::vector<Point<spacedim> >  rep_points (max_rep_points);
+
+    // get space for the values of the
+    // function at the rep support points.
+    //
+    // have two versions, one for system fe
+    // and one for scalar ones, to take the
+    // more efficient one respectively
+    std::vector<std::vector<double> >         function_values_scalar(fe.size());
+    std::vector<std::vector<Vector<double> > > function_values_system(fe.size());
+
+    // Make a quadrature rule from support points
+    // to feed it into FEValues
+    hp::QCollection<dim> support_quadrature;
+    for (unsigned int fe_index=0; fe_index<fe.size(); ++fe_index)
+      support_quadrature.push_back (Quadrature<dim>(unit_support_points[fe_index]));
+
+    // Transformed support points are computed by
+    // FEValues
+    hp::MappingCollection<dim,spacedim> mapping_collection (mapping);
+
+    hp::FEValues<dim,spacedim> fe_values (mapping_collection,
+                                          fe, support_quadrature, update_quadrature_points);
+
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          const unsigned int fe_index = cell->active_fe_index();
+          if (fe[fe_index].dofs_per_cell != 0)
+            {
+              // for each cell:
+              // get location of finite element
+              // support_points
+              fe_values.reinit(cell);
+              const std::vector<Point<spacedim> > &support_points =
+                fe_values.get_present_fe_values().get_quadrature_points();
+
+              // pick out the representative
+              // support points
+              rep_points.resize (dofs_of_rep_points[fe_index].size());
+              for (unsigned int j=0; j<dofs_of_rep_points[fe_index].size(); ++j)
+                rep_points[j] = support_points[dofs_of_rep_points[fe_index][j]];
+
+              // get indices of the dofs on this cell
+              dofs_on_cell.resize (fe[fe_index].dofs_per_cell);
+              cell->get_dof_indices (dofs_on_cell);
+
+
+              if (fe_is_system)
+                {
+                  // get function values at
+                  // these points. Here: get
+                  // all components
+                  function_values_system[fe_index]
+                  .resize (n_rep_points[fe_index],
+                           Vector<double> (fe[fe_index].n_components()));
+                  function.vector_value_list (rep_points,
+                                              function_values_system[fe_index]);
+                  // distribute the function
+                  // values to the global
+                  // vector
+                  for (unsigned int i=0; i<fe[fe_index].dofs_per_cell; ++i)
+                    {
+                      const unsigned int component
+                        = fe[fe_index].system_to_component_index(i).first;
+                      const unsigned int rep_dof=dof_to_rep_index_table[fe_index][i];
+                      vec(dofs_on_cell[i])
+                        = function_values_system[fe_index][rep_dof](component);
+                    }
+                }
+              else
+                {
+                  // get first component only,
+                  // which is the only component
+                  // in the function anyway
+                  function_values_scalar[fe_index].resize (n_rep_points[fe_index]);
+                  function.value_list (rep_points,
+                                       function_values_scalar[fe_index],
+                                       0);
+                  // distribute the function
+                  // values to the global
+                  // vector
+                  for (unsigned int i=0; i<fe[fe_index].dofs_per_cell; ++i)
+                    vec(dofs_on_cell[i])
+                      = function_values_scalar[fe_index][dof_to_rep_index_table[fe_index][i]];
+                }
+            }
+        }
+    vec.compress(VectorOperation::insert);
+  }
+
+
+  template <typename VectorType, typename DoFHandlerType>
+  void interpolate (const DoFHandlerType                            &dof,
+                    const Function<DoFHandlerType::space_dimension> &function,
+                    VectorType                                      &vec)
+  {
+    interpolate(StaticMappingQ1<DoFHandlerType::dimension,
+                DoFHandlerType::space_dimension>::mapping,
+                dof,
+                function,
+                vec);
+  }
+
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void
+  interpolate (const DoFHandler<dim,spacedim>           &dof_1,
+               const DoFHandler<dim,spacedim>           &dof_2,
+               const FullMatrix<double>        &transfer,
+               const InVector                  &data_1,
+               OutVector                       &data_2)
+  {
+    Vector<double> cell_data_1(dof_1.get_fe().dofs_per_cell);
+    Vector<double> cell_data_2(dof_2.get_fe().dofs_per_cell);
+
+    std::vector<short unsigned int> touch_count (dof_2.n_dofs(), 0); //TODO: check on datatype... kinda strange (UK)
+    std::vector<types::global_dof_index>       local_dof_indices (dof_2.get_fe().dofs_per_cell);
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator h = dof_1.begin_active();
+    typename DoFHandler<dim,spacedim>::active_cell_iterator l = dof_2.begin_active();
+    const typename DoFHandler<dim,spacedim>::cell_iterator endh = dof_1.end();
+
+    for (; h != endh; ++h, ++l)
+      {
+        h->get_dof_values(data_1, cell_data_1);
+        transfer.vmult(cell_data_2, cell_data_1);
+
+        l->get_dof_indices (local_dof_indices);
+
+        // distribute cell vector
+        for (unsigned int j=0; j<dof_2.get_fe().dofs_per_cell; ++j)
+          {
+            data_2(local_dof_indices[j]) += cell_data_2(j);
+
+            // count, how often we have
+            // added to this dof
+            Assert (touch_count[local_dof_indices[j]] < numbers::internal_face_boundary_id,
+                    ExcInternalError());
+            ++touch_count[local_dof_indices[j]];
+          }
+      }
+
+    // compute the mean value of the
+    // sum which we have placed in each
+    // entry of the output vector
+    for (unsigned int i=0; i<dof_2.n_dofs(); ++i)
+      {
+        Assert (touch_count[i] != 0,
+                ExcInternalError());
+
+        data_2(i) /= touch_count[i];
+      }
+  }
+
+
+  template<typename VectorType, typename DoFHandlerType>
+  void
+  interpolate_based_on_material_id
+  (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                      &dof,
+   const std::map<types::material_id, const Function<DoFHandlerType::space_dimension> *> &function_map,
+   VectorType                                                                &dst,
+   const ComponentMask                                                       &component_mask)
+  {
+    const unsigned int dim = DoFHandlerType::dimension;
+
+    Assert( component_mask.represents_n_components(dof.get_fe().n_components()),
+            ExcMessage("The number of components in the mask has to be either "
+                       "zero or equal to the number of components in the finite "
+                       "element.") );
+
+    if ( function_map.size() == 0 )
+      return;
+
+    Assert( function_map.find(numbers::invalid_material_id) == function_map.end(),
+            ExcMessage("You cannot specify the invalid material indicator "
+                       "in your function map."));
+
+    for (typename std::map<types::material_id, const Function<DoFHandlerType::space_dimension>* >
+         ::const_iterator
+         iter  = function_map.begin();
+         iter != function_map.end();
+         ++iter )
+      {
+        Assert( dof.get_fe().n_components() == iter->second->n_components,
+                ExcDimensionMismatch(dof.get_fe().n_components(), iter->second->n_components) );
+      }
+
+    const hp::FECollection<DoFHandlerType::dimension, DoFHandlerType::space_dimension>
+    fe(dof.get_fe());
+    const unsigned int n_components =  fe.n_components();
+    const bool         fe_is_system = (n_components != 1);
+
+    typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                  endc = dof.end();
+
+    std::vector< std::vector< Point<dim> > > unit_support_points(fe.size());
+    for (unsigned int fe_index = 0; fe_index < fe.size(); ++fe_index)
+      {
+        unit_support_points[fe_index] = fe[fe_index].get_unit_support_points();
+        Assert( unit_support_points[fe_index].size() != 0,
+                ExcNonInterpolatingFE() );
+      }
+
+    std::vector< std::vector<unsigned int> > dofs_of_rep_points(fe.size());
+    std::vector< std::vector<unsigned int> > dof_to_rep_index_table(fe.size());
+    std::vector<unsigned int>                n_rep_points(fe.size(), 0);
+
+    for (unsigned int fe_index = 0; fe_index < fe.size(); ++fe_index)
+      {
+        for (unsigned int i = 0; i < fe[fe_index].dofs_per_cell; ++i)
+          {
+            bool representative = true;
+
+            for (unsigned int j = dofs_of_rep_points[fe_index].size(); j > 0; --j)
+              if ( unit_support_points[fe_index][i] == unit_support_points[fe_index][dofs_of_rep_points[fe_index][j-1]] )
+                {
+                  dof_to_rep_index_table[fe_index].push_back(j-1);
+                  representative = false;
+                  break;
+                }
+
+            if (representative)
+              {
+                dof_to_rep_index_table[fe_index].push_back(dofs_of_rep_points[fe_index].size());
+                dofs_of_rep_points[fe_index].push_back(i);
+                ++n_rep_points[fe_index];
+              }
+          }
+
+        Assert( dofs_of_rep_points[fe_index].size() == n_rep_points[fe_index],
+                ExcInternalError() );
+        Assert( dof_to_rep_index_table[fe_index].size() == fe[fe_index].dofs_per_cell,
+                ExcInternalError() );
+      }
+
+    const unsigned int max_rep_points = *std::max_element(n_rep_points.begin(),
+                                                          n_rep_points.end());
+    std::vector< types::global_dof_index> dofs_on_cell(fe.max_dofs_per_cell());
+    std::vector< Point<DoFHandlerType::space_dimension> > rep_points(max_rep_points);
+
+    std::vector< std::vector<double> >           function_values_scalar(fe.size());
+    std::vector< std::vector< Vector<double> > > function_values_system(fe.size());
+
+    hp::QCollection<dim> support_quadrature;
+    for (unsigned int fe_index = 0; fe_index < fe.size(); ++fe_index)
+      support_quadrature.push_back( Quadrature<dim>(unit_support_points[fe_index]) );
+
+    hp::MappingCollection<dim, DoFHandlerType::space_dimension> mapping_collection(mapping);
+    hp::FEValues<dim, DoFHandlerType::space_dimension> fe_values(mapping_collection,
+        fe,
+        support_quadrature,
+        update_quadrature_points);
+
+    for ( ; cell != endc; ++cell)
+      if ( cell->is_locally_owned() )
+        if ( function_map.find(cell->material_id()) != function_map.end() )
+          {
+            const unsigned int fe_index = cell->active_fe_index();
+
+            fe_values.reinit(cell);
+
+            const std::vector< Point<DoFHandlerType::space_dimension> > &support_points
+              = fe_values.get_present_fe_values().get_quadrature_points();
+
+            rep_points.resize( dofs_of_rep_points[fe_index].size() );
+            for (unsigned int i = 0; i < dofs_of_rep_points[fe_index].size(); ++i)
+              rep_points[i] = support_points[dofs_of_rep_points[fe_index][i]];
+
+            dofs_on_cell.resize( fe[fe_index].dofs_per_cell );
+            cell->get_dof_indices(dofs_on_cell);
+
+            if (fe_is_system)
+              {
+                function_values_system[fe_index].resize( n_rep_points[fe_index],
+                                                         Vector<double>(fe[fe_index].n_components()) );
+
+                function_map.find(cell->material_id())->second->vector_value_list(rep_points,
+                    function_values_system[fe_index]);
+
+                for (unsigned int i = 0; i < fe[fe_index].dofs_per_cell; ++i)
+                  {
+                    const unsigned int component = fe[fe_index].system_to_component_index(i).first;
+
+                    if ( component_mask[component] )
+                      {
+                        const unsigned int rep_dof = dof_to_rep_index_table[fe_index][i];
+                        dst(dofs_on_cell[i])       = function_values_system[fe_index][rep_dof](component);
+                      }
+                  }
+              }
+            else
+              {
+                function_values_scalar[fe_index].resize(n_rep_points[fe_index]);
+
+                function_map.find(cell->material_id())->second->value_list(rep_points,
+                                                                           function_values_scalar[fe_index],
+                                                                           0);
+
+                for (unsigned int i = 0; i < fe[fe_index].dofs_per_cell; ++i)
+                  dst(dofs_on_cell[i]) = function_values_scalar[fe_index][dof_to_rep_index_table[fe_index][i]];
+              }
+          }
+
+    dst.compress (VectorOperation::insert);
+  }
+
+
+  namespace internal
+  {
+    /**
+     * Interpolate zero boundary values. We don't need to worry about a
+     * mapping here because the function we evaluate for the DoFs is zero in
+     * the mapped locations as well as in the original, unmapped locations
+     */
+    template <typename DoFHandlerType>
+    void
+    interpolate_zero_boundary_values (const DoFHandlerType                     &dof_handler,
+                                      std::map<types::global_dof_index,double> &boundary_values)
+    {
+      const unsigned int dim = DoFHandlerType::dimension;
+
+      // loop over all boundary faces
+      // to get all dof indices of
+      // dofs on the boundary. note
+      // that in 3d there are cases
+      // where a face is not at the
+      // boundary, yet one of its
+      // lines is, and we should
+      // consider the degrees of
+      // freedom on it as boundary
+      // nodes. likewise, in 2d and
+      // 3d there are cases where a
+      // cell is only at the boundary
+      // by one vertex. nevertheless,
+      // since we do not support
+      // boundaries with dimension
+      // less or equal to dim-2, each
+      // such boundary dof is also
+      // found from some other face
+      // that is actually wholly on
+      // the boundary, not only by
+      // one line or one vertex
+      typename DoFHandlerType::active_cell_iterator
+      cell = dof_handler.begin_active(),
+      endc = dof_handler.end();
+      std::vector<types::global_dof_index> face_dof_indices;
+      for (; cell!=endc; ++cell)
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->at_boundary(f))
+            {
+              face_dof_indices.resize (cell->get_fe().dofs_per_face);
+              cell->face(f)->get_dof_indices (face_dof_indices,
+                                              cell->active_fe_index());
+              for (unsigned int i=0; i<cell->get_fe().dofs_per_face; ++i)
+                // enter zero boundary values
+                // for all boundary nodes
+                //
+                // we need not care about
+                // vector valued elements here,
+                // since we set all components
+                boundary_values[face_dof_indices[i]] = 0.;
+            }
+    }
+  }
+
+
+
+  template <int dim, int spacedim, template <int, int> class DoFHandlerType, typename VectorType>
+  void
+  interpolate_to_different_mesh (const DoFHandlerType<dim, spacedim> &dof1,
+                                 const VectorType                    &u1,
+                                 const DoFHandlerType<dim, spacedim> &dof2,
+                                 VectorType                          &u2)
+  {
+    Assert(GridTools::have_same_coarse_mesh(dof1, dof2),
+           ExcMessage ("The two DoF handlers must represent triangulations that "
+                       "have the same coarse meshes"));
+
+    InterGridMap<DoFHandlerType<dim, spacedim> > intergridmap;
+    intergridmap.make_mapping(dof1, dof2);
+
+    ConstraintMatrix dummy;
+    dummy.close();
+
+    interpolate_to_different_mesh(intergridmap, u1, dummy, u2);
+  }
+
+
+
+  template <int dim, int spacedim, template <int, int> class DoFHandlerType, typename VectorType>
+  void
+  interpolate_to_different_mesh (const DoFHandlerType<dim, spacedim> &dof1,
+                                 const VectorType                    &u1,
+                                 const DoFHandlerType<dim, spacedim> &dof2,
+                                 const ConstraintMatrix              &constraints,
+                                 VectorType                          &u2)
+  {
+    Assert(GridTools::have_same_coarse_mesh(dof1, dof2),
+           ExcMessage ("The two DoF handlers must represent triangulations that "
+                       "have the same coarse meshes"));
+
+    InterGridMap<DoFHandlerType<dim, spacedim> > intergridmap;
+    intergridmap.make_mapping(dof1, dof2);
+
+    interpolate_to_different_mesh(intergridmap, u1, constraints, u2);
+  }
+
+  namespace internal
+  {
+    /**
+     * Returns whether the cell and all of its descendants are locally owned.
+     */
+    template <typename cell_iterator>
+    bool is_locally_owned(const cell_iterator &cell)
+    {
+      if (cell->active())
+        return cell->is_locally_owned();
+
+      for (unsigned int c=0; c<cell->n_children(); ++c)
+        if (!is_locally_owned(cell->child(c)))
+          return false;
+
+      return true;
+    }
+  }
+
+
+  template <int dim, int spacedim, template <int, int> class DoFHandlerType, typename VectorType>
+  void
+  interpolate_to_different_mesh
+  (const InterGridMap<DoFHandlerType<dim, spacedim> > &intergridmap,
+   const VectorType       &u1,
+   const ConstraintMatrix &constraints,
+   VectorType             &u2)
+  {
+    const DoFHandlerType<dim, spacedim> &dof1 = intergridmap.get_source_grid();
+    const DoFHandlerType<dim, spacedim> &dof2 = intergridmap.get_destination_grid();
+    (void)dof2;
+
+    Assert(u1.size()==dof1.n_dofs(),
+           ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+    Assert(u2.size()==dof2.n_dofs(),
+           ExcDimensionMismatch(u2.size(),dof2.n_dofs()));
+
+    Vector<typename VectorType::value_type> cache;
+
+    // Looping over the finest common
+    // mesh, this means that source and
+    // destination cells have to be on the
+    // same level and at least one has to
+    // be active.
+    //
+    // Therefor, loop over all cells
+    // (active and inactive) of the source
+    // grid ..
+    typename DoFHandlerType<dim,spacedim>::cell_iterator       cell1 = dof1.begin();
+    const typename DoFHandlerType<dim,spacedim>::cell_iterator endc1 = dof1.end();
+
+    for (; cell1 != endc1; ++cell1)
+      {
+        const typename DoFHandlerType<dim,spacedim>::cell_iterator cell2 = intergridmap[cell1];
+
+        // .. and skip if source and destination
+        // cells are not on the same level ..
+        if (cell1->level() != cell2->level())
+          continue;
+        // .. or none of them is active.
+        if (!cell1->active() && !cell2->active())
+          continue;
+
+        Assert(internal::is_locally_owned(cell1) == internal::is_locally_owned(cell2),
+               ExcMessage("The two Triangulations are required to have the same parallel partitioning."));
+
+        // Skip foreign cells.
+        if (cell1->active() && !cell1->is_locally_owned())
+          continue;
+        if (cell2->active() && !cell2->is_locally_owned())
+          continue;
+
+        Assert(cell1->get_fe().get_name() ==
+               cell2->get_fe().get_name(),
+               ExcMessage ("Source and destination cells need to use the same finite element"));
+
+        cache.reinit(cell1->get_fe().dofs_per_cell);
+
+        // Get and set the corresponding
+        // dof_values by interpolation.
+        cell1->get_interpolated_dof_values(u1, cache);
+        cell2->set_dof_values_by_interpolation(cache, u2);
+      }
+
+    // finish the work on parallel vectors
+    u2.compress (VectorOperation::insert);
+    // Apply hanging node constraints.
+    constraints.distribute(u2);
+  }
+
+
+  namespace
+  {
+    /**
+     * Compute the boundary values to be used in the project() functions.
+     */
+    template <int dim, int spacedim, template <int, int> class DoFHandlerType,
+              template <int,int> class M_or_MC, template <int> class Q_or_QC>
+    void project_compute_b_v
+    (const M_or_MC<dim, spacedim>             &mapping,
+     const DoFHandlerType<dim,spacedim>       &dof,
+     const Function<spacedim>                 &function,
+     const bool                                enforce_zero_boundary,
+     const Q_or_QC<dim-1>                     &q_boundary,
+     const bool                                project_to_boundary_first,
+     std::map<types::global_dof_index,double> &boundary_values)
+    {
+      if (enforce_zero_boundary == true)
+        // no need to project boundary
+        // values, but enforce
+        // homogeneous boundary values
+        // anyway
+        internal::
+        interpolate_zero_boundary_values (dof, boundary_values);
+
+      else
+        // no homogeneous boundary values
+        if (project_to_boundary_first == true)
+          // boundary projection required
+          {
+            // set up a list of boundary
+            // functions for the
+            // different boundary
+            // parts. We want the
+            // function to hold on
+            // all parts of the boundary
+            const std::vector<types::boundary_id>
+            used_boundary_ids = dof.get_triangulation().get_boundary_ids();
+
+            typename FunctionMap<spacedim>::type boundary_functions;
+            for (unsigned int i=0; i<used_boundary_ids.size(); ++i)
+              boundary_functions[used_boundary_ids[i]] = &function;
+            project_boundary_values (mapping, dof, boundary_functions, q_boundary,
+                                     boundary_values);
+          }
+    }
+
+
+    /**
+     * Return whether the boundary values try to constrain a degree of freedom
+     * that is already constrained to something else
+     */
+    inline
+    bool constraints_and_b_v_are_compatible (const ConstraintMatrix   &constraints,
+                                             std::map<types::global_dof_index,double> &boundary_values)
+    {
+      for (std::map<types::global_dof_index,double>::iterator it=boundary_values.begin();
+           it != boundary_values.end(); ++it)
+        if (constraints.is_constrained(it->first))
+//TODO: This looks wrong -- shouldn't it be ==0 in the first condition and && ?
+          if (!(constraints.get_constraint_entries(it->first)->size() > 0
+                ||
+                (constraints.get_inhomogeneity(it->first) == it->second)))
+            return false;
+
+      return true;
+    }
+
+
+    /**
+     * Generic implementation of the project() function
+     */
+    template <int dim, int spacedim, typename VectorType,
+              template <int, int> class DoFHandlerType,
+              template <int,int> class M_or_MC, template <int> class Q_or_QC>
+    void do_project (const M_or_MC<dim, spacedim>       &mapping,
+                     const DoFHandlerType<dim,spacedim> &dof,
+                     const ConstraintMatrix             &constraints,
+                     const Q_or_QC<dim>                 &quadrature,
+                     const Function<spacedim>           &function,
+                     VectorType                         &vec_result,
+                     const bool                          enforce_zero_boundary,
+                     const Q_or_QC<dim-1>               &q_boundary,
+                     const bool                          project_to_boundary_first)
+    {
+      Assert (dof.get_fe().n_components() == function.n_components,
+              ExcDimensionMismatch(dof.get_fe().n_components(),
+                                   function.n_components));
+      Assert (vec_result.size() == dof.n_dofs(),
+              ExcDimensionMismatch (vec_result.size(), dof.n_dofs()));
+
+      // make up boundary values
+      std::map<types::global_dof_index,double> boundary_values;
+      project_compute_b_v(mapping, dof, function, enforce_zero_boundary,
+                          q_boundary, project_to_boundary_first, boundary_values);
+
+      // check if constraints are compatible (see below)
+      const bool constraints_are_compatible =
+        constraints_and_b_v_are_compatible(constraints, boundary_values);
+
+      // set up mass matrix and right hand side
+      Vector<double> vec (dof.n_dofs());
+      SparsityPattern sparsity;
+      {
+        DynamicSparsityPattern dsp (dof.n_dofs(), dof.n_dofs());
+        DoFTools::make_sparsity_pattern (dof, dsp, constraints,
+                                         !constraints_are_compatible);
+
+        sparsity.copy_from (dsp);
+      }
+      SparseMatrix<double> mass_matrix (sparsity);
+      Vector<double> tmp (mass_matrix.n());
+
+      // If the constraint matrix does not conflict with the given boundary
+      // values (i.e., it either does not contain boundary values or it contains
+      // the same as boundary_values), we can let it call
+      // distribute_local_to_global straight away, otherwise we need to first
+      // interpolate the boundary values and then condense the matrix and vector
+      if (constraints_are_compatible)
+        {
+          const Function<spacedim> *dummy = 0;
+          MatrixCreator::create_mass_matrix (mapping, dof, quadrature,
+                                             mass_matrix, function, tmp,
+                                             dummy, constraints);
+          if (boundary_values.size() > 0)
+            MatrixTools::apply_boundary_values (boundary_values,
+                                                mass_matrix, vec, tmp,
+                                                true);
+        }
+      else
+        {
+          // create mass matrix and rhs at once, which is faster.
+          MatrixCreator::create_mass_matrix (mapping, dof, quadrature,
+                                             mass_matrix, function, tmp);
+          MatrixTools::apply_boundary_values (boundary_values,
+                                              mass_matrix, vec, tmp,
+                                              true);
+          constraints.condense(mass_matrix, tmp);
+        }
+
+      // Allow for a maximum of 5*n steps to reduce the residual by 10^-12. n
+      // steps may not be sufficient, since roundoff errors may accumulate for
+      // badly conditioned matrices
+      ReductionControl      control(5*tmp.size(), 0., 1e-12, false, false);
+      GrowingVectorMemory<> memory;
+      SolverCG<>            cg(control,memory);
+
+      PreconditionSSOR<> prec;
+      prec.initialize(mass_matrix, 1.2);
+
+      cg.solve (mass_matrix, vec, tmp, prec);
+
+      constraints.distribute (vec);
+
+      // copy vec into vec_result. we can't use vec_result itself above, since
+      // it may be of another type than Vector<double> and that wouldn't
+      // necessarily go together with the matrix and other functions
+      for (unsigned int i=0; i<vec.size(); ++i)
+        vec_result(i) = vec(i);
+    }
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void project (const Mapping<dim, spacedim>   &mapping,
+                const DoFHandler<dim,spacedim> &dof,
+                const ConstraintMatrix         &constraints,
+                const Quadrature<dim>          &quadrature,
+                const Function<spacedim>       &function,
+                VectorType                     &vec_result,
+                const bool                     enforce_zero_boundary,
+                const Quadrature<dim-1>        &q_boundary,
+                const bool                     project_to_boundary_first)
+  {
+    do_project (mapping, dof, constraints, quadrature,
+                function, vec_result,
+                enforce_zero_boundary, q_boundary,
+                project_to_boundary_first);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void project (const DoFHandler<dim,spacedim> &dof,
+                const ConstraintMatrix         &constraints,
+                const Quadrature<dim>          &quadrature,
+                const Function<spacedim>       &function,
+                VectorType                     &vec,
+                const bool                     enforce_zero_boundary,
+                const Quadrature<dim-1>        &q_boundary,
+                const bool                     project_to_boundary_first)
+  {
+    project(StaticMappingQ1<dim,spacedim>::mapping, dof, constraints, quadrature, function, vec,
+            enforce_zero_boundary, q_boundary, project_to_boundary_first);
+  }
+
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void project (const hp::MappingCollection<dim, spacedim> &mapping,
+                const hp::DoFHandler<dim,spacedim>         &dof,
+                const ConstraintMatrix                     &constraints,
+                const hp::QCollection<dim>                 &quadrature,
+                const Function<spacedim>                   &function,
+                VectorType                                 &vec_result,
+                const bool                                 enforce_zero_boundary,
+                const hp::QCollection<dim-1>               &q_boundary,
+                const bool                                 project_to_boundary_first)
+  {
+    do_project (mapping, dof, constraints, quadrature,
+                function, vec_result,
+                enforce_zero_boundary, q_boundary,
+                project_to_boundary_first);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void project (const hp::DoFHandler<dim,spacedim> &dof,
+                const ConstraintMatrix             &constraints,
+                const hp::QCollection<dim>         &quadrature,
+                const Function<spacedim>           &function,
+                VectorType                         &vec,
+                const bool                         enforce_zero_boundary,
+                const hp::QCollection<dim-1>       &q_boundary,
+                const bool                         project_to_boundary_first)
+  {
+    project(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+            dof, constraints, quadrature, function, vec,
+            enforce_zero_boundary, q_boundary, project_to_boundary_first);
+  }
+
+
+  template <int dim, int spacedim>
+  void create_right_hand_side (const Mapping<dim, spacedim>    &mapping,
+                               const DoFHandler<dim,spacedim> &dof_handler,
+                               const Quadrature<dim> &quadrature,
+                               const Function<spacedim>   &rhs_function,
+                               Vector<double>        &rhs_vector)
+  {
+    const FiniteElement<dim,spacedim> &fe  = dof_handler.get_fe();
+    Assert (fe.n_components() == rhs_function.n_components,
+            ExcDimensionMismatch(fe.n_components(), rhs_function.n_components));
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+    rhs_vector = 0;
+
+    UpdateFlags update_flags = UpdateFlags(update_values   |
+                                           update_quadrature_points |
+                                           update_JxW_values);
+    FEValues<dim,spacedim> fe_values (mapping, fe, quadrature, update_flags);
+
+    const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                       n_q_points    = fe_values.n_quadrature_points,
+                       n_components  = fe.n_components();
+
+    std::vector<types::global_dof_index> dofs (dofs_per_cell);
+    Vector<double> cell_vector (dofs_per_cell);
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    if (n_components==1)
+      {
+        std::vector<double> rhs_values(n_q_points);
+
+        for (; cell!=endc; ++cell)
+          {
+            fe_values.reinit(cell);
+
+            const std::vector<double> &weights   = fe_values.get_JxW_values ();
+            rhs_function.value_list (fe_values.get_quadrature_points(),
+                                     rhs_values);
+
+            cell_vector = 0;
+            for (unsigned int point=0; point<n_q_points; ++point)
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                cell_vector(i) += rhs_values[point] *
+                                  fe_values.shape_value(i,point) *
+                                  weights[point];
+
+            cell->get_dof_indices (dofs);
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              rhs_vector(dofs[i]) += cell_vector(i);
+          }
+
+      }
+    else
+      {
+        std::vector<Vector<double> > rhs_values(n_q_points,
+                                                Vector<double>(n_components));
+
+        for (; cell!=endc; ++cell)
+          {
+            fe_values.reinit(cell);
+
+            const std::vector<double> &weights   = fe_values.get_JxW_values ();
+            rhs_function.vector_value_list (fe_values.get_quadrature_points(),
+                                            rhs_values);
+
+            cell_vector = 0;
+            // Use the faster code if the
+            // FiniteElement is primitive
+            if (fe.is_primitive ())
+              {
+                for (unsigned int point=0; point<n_q_points; ++point)
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    {
+                      const unsigned int component
+                        = fe.system_to_component_index(i).first;
+
+                      cell_vector(i) += rhs_values[point](component) *
+                                        fe_values.shape_value(i,point) *
+                                        weights[point];
+                    }
+              }
+            else
+              {
+                // Otherwise do it the way
+                // proposed for vector valued
+                // elements
+                for (unsigned int point=0; point<n_q_points; ++point)
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                      if (fe.get_nonzero_components(i)[comp_i])
+                        {
+                          cell_vector(i) += rhs_values[point](comp_i) *
+                                            fe_values.shape_value_component(i,point,comp_i) *
+                                            weights[point];
+                        }
+              }
+
+            cell->get_dof_indices (dofs);
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              rhs_vector(dofs[i]) += cell_vector(i);
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_right_hand_side (const DoFHandler<dim,spacedim>    &dof_handler,
+                               const Quadrature<dim>    &quadrature,
+                               const Function<spacedim>      &rhs_function,
+                               Vector<double>           &rhs_vector)
+  {
+    create_right_hand_side(StaticMappingQ1<dim,spacedim>::mapping, dof_handler, quadrature,
+                           rhs_function, rhs_vector);
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  void create_right_hand_side (const hp::MappingCollection<dim,spacedim>    &mapping,
+                               const hp::DoFHandler<dim,spacedim> &dof_handler,
+                               const hp::QCollection<dim> &quadrature,
+                               const Function<spacedim>   &rhs_function,
+                               Vector<double>        &rhs_vector)
+  {
+    const hp::FECollection<dim,spacedim> &fe  = dof_handler.get_fe();
+    Assert (fe.n_components() == rhs_function.n_components,
+            ExcDimensionMismatch(fe.n_components(), rhs_function.n_components));
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+    rhs_vector = 0;
+
+    UpdateFlags update_flags = UpdateFlags(update_values   |
+                                           update_quadrature_points |
+                                           update_JxW_values);
+    hp::FEValues<dim,spacedim> x_fe_values (mapping, fe, quadrature, update_flags);
+
+    const unsigned int n_components  = fe.n_components();
+
+    std::vector<types::global_dof_index> dofs (fe.max_dofs_per_cell());
+    Vector<double> cell_vector (fe.max_dofs_per_cell());
+
+    typename hp::DoFHandler<dim,spacedim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    if (n_components==1)
+      {
+        std::vector<double> rhs_values;
+
+        for (; cell!=endc; ++cell)
+          {
+            x_fe_values.reinit(cell);
+
+            const FEValues<dim,spacedim> &fe_values = x_fe_values.get_present_fe_values();
+
+            const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                               n_q_points    = fe_values.n_quadrature_points;
+            rhs_values.resize (n_q_points);
+            dofs.resize (dofs_per_cell);
+            cell_vector.reinit (dofs_per_cell);
+
+            const std::vector<double> &weights   = fe_values.get_JxW_values ();
+            rhs_function.value_list (fe_values.get_quadrature_points(),
+                                     rhs_values);
+
+            cell_vector = 0;
+            for (unsigned int point=0; point<n_q_points; ++point)
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                cell_vector(i) += rhs_values[point] *
+                                  fe_values.shape_value(i,point) *
+                                  weights[point];
+
+            cell->get_dof_indices (dofs);
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              rhs_vector(dofs[i]) += cell_vector(i);
+          }
+
+      }
+    else
+      {
+        std::vector<Vector<double> > rhs_values;
+
+        for (; cell!=endc; ++cell)
+          {
+            x_fe_values.reinit(cell);
+
+            const FEValues<dim,spacedim> &fe_values = x_fe_values.get_present_fe_values();
+
+            const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                               n_q_points    = fe_values.n_quadrature_points;
+            rhs_values.resize (n_q_points,
+                               Vector<double>(n_components));
+            dofs.resize (dofs_per_cell);
+            cell_vector.reinit (dofs_per_cell);
+
+            const std::vector<double> &weights   = fe_values.get_JxW_values ();
+            rhs_function.vector_value_list (fe_values.get_quadrature_points(),
+                                            rhs_values);
+
+            cell_vector = 0;
+
+            // Use the faster code if the
+            // FiniteElement is primitive
+            if (cell->get_fe().is_primitive ())
+              {
+                for (unsigned int point=0; point<n_q_points; ++point)
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    {
+                      const unsigned int component
+                        = cell->get_fe().system_to_component_index(i).first;
+
+                      cell_vector(i) += rhs_values[point](component) *
+                                        fe_values.shape_value(i,point) *
+                                        weights[point];
+                    }
+              }
+            else
+              {
+                // Otherwise do it the way proposed
+                // for vector valued elements
+                for (unsigned int point=0; point<n_q_points; ++point)
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                      if (cell->get_fe().get_nonzero_components(i)[comp_i])
+                        {
+                          cell_vector(i) += rhs_values[point](comp_i) *
+                                            fe_values.shape_value_component(i,point,comp_i) *
+                                            weights[point];
+                        }
+              }
+
+            cell->get_dof_indices (dofs);
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              rhs_vector(dofs[i]) += cell_vector(i);
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_right_hand_side (const hp::DoFHandler<dim,spacedim>    &dof_handler,
+                               const hp::QCollection<dim>    &quadrature,
+                               const Function<spacedim>      &rhs_function,
+                               Vector<double>           &rhs_vector)
+  {
+    create_right_hand_side(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                           dof_handler, quadrature,
+                           rhs_function, rhs_vector);
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const Mapping<dim, spacedim>       &mapping,
+                                   const DoFHandler<dim,spacedim>    &dof_handler,
+                                   const Point<spacedim>         &p,
+                                   Vector<double>           &rhs_vector)
+  {
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+    Assert (dof_handler.get_fe().n_components() == 1,
+            ExcMessage ("This function only works for scalar finite elements"));
+
+    rhs_vector = 0;
+
+    std::pair<typename DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point =
+      GridTools::find_active_cell_around_point (mapping, dof_handler, p);
+
+    Quadrature<dim> q(GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+
+    FEValues<dim,spacedim> fe_values(mapping, dof_handler.get_fe(),
+                                     q, UpdateFlags(update_values));
+    fe_values.reinit(cell_point.first);
+
+    const unsigned int dofs_per_cell = dof_handler.get_fe().dofs_per_cell;
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    cell_point.first->get_dof_indices (local_dof_indices);
+
+    for (unsigned int i=0; i<dofs_per_cell; i++)
+      rhs_vector(local_dof_indices[i]) =  fe_values.shape_value(i,0);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const DoFHandler<dim,spacedim>    &dof_handler,
+                                   const Point<spacedim>         &p,
+                                   Vector<double>           &rhs_vector)
+  {
+    create_point_source_vector(StaticMappingQ1<dim,spacedim>::mapping, dof_handler,
+                               p, rhs_vector);
+  }
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const hp::MappingCollection<dim,spacedim>       &mapping,
+                                   const hp::DoFHandler<dim,spacedim>    &dof_handler,
+                                   const Point<spacedim>         &p,
+                                   Vector<double>           &rhs_vector)
+  {
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+    Assert (dof_handler.get_fe().n_components() == 1,
+            ExcMessage ("This function only works for scalar finite elements"));
+
+    rhs_vector = 0;
+
+    std::pair<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point =
+      GridTools::find_active_cell_around_point (mapping, dof_handler, p);
+
+    Quadrature<dim> q(GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+
+    FEValues<dim> fe_values(mapping[cell_point.first->active_fe_index()],
+                            cell_point.first->get_fe(), q, UpdateFlags(update_values));
+    fe_values.reinit(cell_point.first);
+
+    const unsigned int dofs_per_cell = cell_point.first->get_fe().dofs_per_cell;
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    cell_point.first->get_dof_indices (local_dof_indices);
+
+    for (unsigned int i=0; i<dofs_per_cell; i++)
+      rhs_vector(local_dof_indices[i]) =  fe_values.shape_value(i,0);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const hp::DoFHandler<dim,spacedim>    &dof_handler,
+                                   const Point<spacedim>         &p,
+                                   Vector<double>           &rhs_vector)
+  {
+    create_point_source_vector(hp::StaticMappingQ1<dim>::mapping_collection,
+                               dof_handler,
+                               p, rhs_vector);
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const Mapping<dim, spacedim>       &mapping,
+                                   const DoFHandler<dim,spacedim>     &dof_handler,
+                                   const Point<spacedim>              &p,
+                                   const Point<dim>                   &orientation,
+                                   Vector<double>                     &rhs_vector)
+  {
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+    Assert (dof_handler.get_fe().n_components() == dim,
+            ExcMessage ("This function only works for vector-valued finite elements."));
+
+    rhs_vector = 0;
+
+    const std::pair<typename DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point =
+      GridTools::find_active_cell_around_point (mapping, dof_handler, p);
+
+    const Quadrature<dim> q(GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+
+    const FEValuesExtractors::Vector vec (0);
+    FEValues<dim,spacedim> fe_values(mapping, dof_handler.get_fe(),
+                                     q, UpdateFlags(update_values));
+    fe_values.reinit(cell_point.first);
+
+    const unsigned int dofs_per_cell = dof_handler.get_fe().dofs_per_cell;
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    cell_point.first->get_dof_indices (local_dof_indices);
+
+    for (unsigned int i=0; i<dofs_per_cell; i++)
+      rhs_vector(local_dof_indices[i]) =  orientation * fe_values[vec].value(i,0);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const DoFHandler<dim,spacedim>    &dof_handler,
+                                   const Point<spacedim>             &p,
+                                   const Point<dim>                  &orientation,
+                                   Vector<double>                    &rhs_vector)
+  {
+    create_point_source_vector(StaticMappingQ1<dim,spacedim>::mapping, dof_handler,
+                               p, orientation, rhs_vector);
+  }
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const hp::MappingCollection<dim,spacedim> &mapping,
+                                   const hp::DoFHandler<dim,spacedim>        &dof_handler,
+                                   const Point<spacedim>                     &p,
+                                   const Point<dim>                          &orientation,
+                                   Vector<double>                            &rhs_vector)
+  {
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+    Assert (dof_handler.get_fe().n_components() == dim,
+            ExcMessage ("This function only works for vector-valued finite elements."));
+
+    rhs_vector = 0;
+
+    std::pair<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point =
+      GridTools::find_active_cell_around_point (mapping, dof_handler, p);
+
+    Quadrature<dim> q(GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+
+    const FEValuesExtractors::Vector vec (0);
+    FEValues<dim> fe_values(mapping[cell_point.first->active_fe_index()],
+                            cell_point.first->get_fe(), q, UpdateFlags(update_values));
+    fe_values.reinit(cell_point.first);
+
+    const unsigned int dofs_per_cell = cell_point.first->get_fe().dofs_per_cell;
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    cell_point.first->get_dof_indices (local_dof_indices);
+
+    for (unsigned int i=0; i<dofs_per_cell; i++)
+      rhs_vector(local_dof_indices[i]) =  orientation * fe_values[vec].value(i,0);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void create_point_source_vector (const hp::DoFHandler<dim,spacedim>   &dof_handler,
+                                   const Point<spacedim>                &p,
+                                   const Point<dim>                     &orientation,
+                                   Vector<double>                       &rhs_vector)
+  {
+    create_point_source_vector(hp::StaticMappingQ1<dim>::mapping_collection,
+                               dof_handler,
+                               p, orientation, rhs_vector);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  create_boundary_right_hand_side (const Mapping<dim, spacedim>      &mapping,
+                                   const DoFHandler<dim,spacedim>   &dof_handler,
+                                   const Quadrature<dim-1> &quadrature,
+                                   const Function<spacedim>     &rhs_function,
+                                   Vector<double>          &rhs_vector,
+                                   const std::set<types::boundary_id> &boundary_ids)
+  {
+    const FiniteElement<dim> &fe  = dof_handler.get_fe();
+    Assert (fe.n_components() == rhs_function.n_components,
+            ExcDimensionMismatch(fe.n_components(), rhs_function.n_components));
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+
+    rhs_vector = 0;
+
+    UpdateFlags update_flags = UpdateFlags(update_values   |
+                                           update_quadrature_points |
+                                           update_JxW_values);
+    FEFaceValues<dim> fe_values (mapping, fe, quadrature, update_flags);
+
+    const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                       n_q_points    = fe_values.n_quadrature_points,
+                       n_components  = fe.n_components();
+
+    std::vector<types::global_dof_index> dofs (dofs_per_cell);
+    Vector<double> cell_vector (dofs_per_cell);
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell = dof_handler.begin_active(),
+                                                            endc = dof_handler.end();
+
+    if (n_components==1)
+      {
+        std::vector<double> rhs_values(n_q_points);
+
+        for (; cell!=endc; ++cell)
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->at_boundary () &&
+                (boundary_ids.empty() ||
+                 (boundary_ids.find (cell->face(face)->boundary_id())
+                  !=
+                  boundary_ids.end())))
+              {
+                fe_values.reinit(cell, face);
+
+                const std::vector<double> &weights   = fe_values.get_JxW_values ();
+                rhs_function.value_list (fe_values.get_quadrature_points(), rhs_values);
+
+                cell_vector = 0;
+                for (unsigned int point=0; point<n_q_points; ++point)
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    cell_vector(i) += rhs_values[point] *
+                                      fe_values.shape_value(i,point) *
+                                      weights[point];
+
+                cell->get_dof_indices (dofs);
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  rhs_vector(dofs[i]) += cell_vector(i);
+              }
+      }
+    else
+      {
+        std::vector<Vector<double> > rhs_values(n_q_points, Vector<double>(n_components));
+
+        for (; cell!=endc; ++cell)
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->at_boundary () &&
+                (boundary_ids.empty() ||
+                 (boundary_ids.find (cell->face(face)->boundary_id())
+                  !=
+                  boundary_ids.end())))
+              {
+                fe_values.reinit(cell, face);
+
+                const std::vector<double> &weights   = fe_values.get_JxW_values ();
+                rhs_function.vector_value_list (fe_values.get_quadrature_points(), rhs_values);
+
+                cell_vector = 0;
+
+                // Use the faster code if the
+                // FiniteElement is primitive
+                if (fe.is_primitive ())
+                  {
+                    for (unsigned int point=0; point<n_q_points; ++point)
+                      for (unsigned int i=0; i<dofs_per_cell; ++i)
+                        {
+                          const unsigned int component
+                            = fe.system_to_component_index(i).first;
+
+                          cell_vector(i) += rhs_values[point](component) *
+                                            fe_values.shape_value(i,point) *
+                                            weights[point];
+                        }
+                  }
+                else
+                  {
+                    // And the full featured
+                    // code, if vector valued
+                    // FEs are used
+                    for (unsigned int point=0; point<n_q_points; ++point)
+                      for (unsigned int i=0; i<dofs_per_cell; ++i)
+                        for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                          if (fe.get_nonzero_components(i)[comp_i])
+                            {
+                              cell_vector(i)
+                              += rhs_values[point](comp_i) *
+                                 fe_values.shape_value_component(i,point,comp_i) *
+                                 weights[point];
+                            }
+                  }
+
+                cell->get_dof_indices (dofs);
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  rhs_vector(dofs[i]) += cell_vector(i);
+              }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  create_boundary_right_hand_side (const DoFHandler<dim,spacedim>   &dof_handler,
+                                   const Quadrature<dim-1> &quadrature,
+                                   const Function<spacedim>     &rhs_function,
+                                   Vector<double>          &rhs_vector,
+                                   const std::set<types::boundary_id> &boundary_ids)
+  {
+    create_boundary_right_hand_side(StaticMappingQ1<dim>::mapping, dof_handler,
+                                    quadrature,
+                                    rhs_function, rhs_vector,
+                                    boundary_ids);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  create_boundary_right_hand_side (const hp::MappingCollection<dim,spacedim> &mapping,
+                                   const hp::DoFHandler<dim,spacedim> &dof_handler,
+                                   const hp::QCollection<dim-1>  &quadrature,
+                                   const Function<spacedim>      &rhs_function,
+                                   Vector<double>                &rhs_vector,
+                                   const std::set<types::boundary_id> &boundary_ids)
+  {
+    const hp::FECollection<dim> &fe  = dof_handler.get_fe();
+    Assert (fe.n_components() == rhs_function.n_components,
+            ExcDimensionMismatch(fe.n_components(), rhs_function.n_components));
+    Assert (rhs_vector.size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(rhs_vector.size(), dof_handler.n_dofs()));
+
+    rhs_vector = 0;
+
+    UpdateFlags update_flags = UpdateFlags(update_values   |
+                                           update_quadrature_points |
+                                           update_JxW_values);
+    hp::FEFaceValues<dim> x_fe_values (mapping, fe, quadrature, update_flags);
+
+    const unsigned int n_components  = fe.n_components();
+
+    std::vector<types::global_dof_index> dofs (fe.max_dofs_per_cell());
+    Vector<double> cell_vector (fe.max_dofs_per_cell());
+
+    typename hp::DoFHandler<dim,spacedim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+
+    if (n_components==1)
+      {
+        std::vector<double> rhs_values;
+
+        for (; cell!=endc; ++cell)
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->at_boundary () &&
+                (boundary_ids.empty() ||
+                 (boundary_ids.find (cell->face(face)->boundary_id())
+                  !=
+                  boundary_ids.end())))
+              {
+                x_fe_values.reinit(cell, face);
+
+                const FEFaceValues<dim> &fe_values = x_fe_values.get_present_fe_values();
+
+                const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                                   n_q_points    = fe_values.n_quadrature_points;
+                rhs_values.resize (n_q_points);
+
+                const std::vector<double> &weights   = fe_values.get_JxW_values ();
+                rhs_function.value_list (fe_values.get_quadrature_points(), rhs_values);
+
+                cell_vector = 0;
+                for (unsigned int point=0; point<n_q_points; ++point)
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    cell_vector(i) += rhs_values[point] *
+                                      fe_values.shape_value(i,point) *
+                                      weights[point];
+
+                dofs.resize(dofs_per_cell);
+                cell->get_dof_indices (dofs);
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  rhs_vector(dofs[i]) += cell_vector(i);
+              }
+      }
+    else
+      {
+        std::vector<Vector<double> > rhs_values;
+
+        for (; cell!=endc; ++cell)
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->at_boundary () &&
+                (boundary_ids.empty() ||
+                 (boundary_ids.find (cell->face(face)->boundary_id())
+                  !=
+                  boundary_ids.end())))
+              {
+                x_fe_values.reinit(cell, face);
+
+                const FEFaceValues<dim> &fe_values = x_fe_values.get_present_fe_values();
+
+                const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+                                   n_q_points    = fe_values.n_quadrature_points;
+                rhs_values.resize (n_q_points, Vector<double>(n_components));
+
+                const std::vector<double> &weights   = fe_values.get_JxW_values ();
+                rhs_function.vector_value_list (fe_values.get_quadrature_points(), rhs_values);
+
+                cell_vector = 0;
+
+                // Use the faster code if the
+                // FiniteElement is primitive
+                if (cell->get_fe().is_primitive ())
+                  {
+                    for (unsigned int point=0; point<n_q_points; ++point)
+                      for (unsigned int i=0; i<dofs_per_cell; ++i)
+                        {
+                          const unsigned int component
+                            = cell->get_fe().system_to_component_index(i).first;
+
+                          cell_vector(i) += rhs_values[point](component) *
+                                            fe_values.shape_value(i,point) *
+                                            weights[point];
+                        }
+                  }
+                else
+                  {
+                    // And the full featured
+                    // code, if vector valued
+                    // FEs are used
+                    for (unsigned int point=0; point<n_q_points; ++point)
+                      for (unsigned int i=0; i<dofs_per_cell; ++i)
+                        for (unsigned int comp_i = 0; comp_i < n_components; ++comp_i)
+                          if (cell->get_fe().get_nonzero_components(i)[comp_i])
+                            {
+                              cell_vector(i)
+                              += rhs_values[point](comp_i) *
+                                 fe_values.shape_value_component(i,point,comp_i) *
+                                 weights[point];
+                            }
+                  }
+                dofs.resize(dofs_per_cell);
+                cell->get_dof_indices (dofs);
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  rhs_vector(dofs[i]) += cell_vector(i);
+              }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  create_boundary_right_hand_side (const hp::DoFHandler<dim,spacedim> &dof_handler,
+                                   const hp::QCollection<dim-1>  &quadrature,
+                                   const Function<spacedim>      &rhs_function,
+                                   Vector<double>                &rhs_vector,
+                                   const std::set<types::boundary_id> &boundary_ids)
+  {
+    create_boundary_right_hand_side(hp::StaticMappingQ1<dim>::mapping_collection,
+                                    dof_handler, quadrature,
+                                    rhs_function, rhs_vector,
+                                    boundary_ids);
+  }
+
+
+
+// ----------- interpolate_boundary_values for std::map --------------------
+
+  namespace
+  {
+    // interpolate boundary values in
+    // 1d. in higher dimensions, we
+    // use FEValues to figure out
+    // what to do on faces, but in 1d
+    // faces are points and it is far
+    // easier to simply work on
+    // individual vertices
+    template <typename DoFHandlerType, template <int,int> class M_or_MC>
+    static inline
+    void do_interpolate_boundary_values
+    (const M_or_MC<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &,
+     const DoFHandlerType                                              &dof,
+     const typename FunctionMap<DoFHandlerType::space_dimension>::type &function_map,
+     std::map<types::global_dof_index,double>                          &boundary_values,
+     const ComponentMask                                               &component_mask,
+     const dealii::internal::int2type<1>)
+    {
+      const unsigned int dim = DoFHandlerType::dimension;
+      const unsigned int spacedim = DoFHandlerType::space_dimension;
+
+      Assert (component_mask.represents_n_components(dof.get_fe().n_components()),
+              ExcMessage ("The number of components in the mask has to be either "
+                          "zero or equal to the number of components in the finite "
+                          "element."));
+
+      // if for whatever reason we were
+      // passed an empty map, return
+      // immediately
+      if (function_map.size() == 0)
+        return;
+
+      for (typename DoFHandlerType::active_cell_iterator cell = dof.begin_active();
+           cell != dof.end(); ++cell)
+        for (unsigned int direction=0;
+             direction<GeometryInfo<dim>::faces_per_cell; ++direction)
+          if (cell->at_boundary(direction)
+              &&
+              (function_map.find(cell->face(direction)->boundary_id()) != function_map.end()))
+            {
+              const Function<DoFHandlerType::space_dimension> &boundary_function
+                = *function_map.find(cell->face(direction)->boundary_id())->second;
+
+              // get the FE corresponding to this
+              // cell
+              const FiniteElement<dim,spacedim> &fe = cell->get_fe();
+              Assert (fe.n_components() == boundary_function.n_components,
+                      ExcDimensionMismatch(fe.n_components(),
+                                           boundary_function.n_components));
+
+              Assert (component_mask.n_selected_components(fe.n_components()) > 0,
+                      ComponentMask::ExcNoComponentSelected());
+
+              // now set the value of
+              // the vertex degree of
+              // freedom. setting
+              // also creates the
+              // entry in the map if
+              // it did not exist
+              // beforehand
+              //
+              // save some time by
+              // requesting values
+              // only once for each
+              // point, irrespective
+              // of the number of
+              // components of the
+              // function
+              Vector<double> function_values (fe.n_components());
+              if (fe.n_components() == 1)
+                function_values(0)
+                  = boundary_function.value (cell->vertex(direction));
+              else
+                boundary_function.vector_value (cell->vertex(direction),
+                                                function_values);
+
+              for (unsigned int i=0; i<fe.dofs_per_vertex; ++i)
+                if (component_mask[fe.face_system_to_component_index(i).first])
+                  boundary_values[cell->
+                                  vertex_dof_index(direction,i,
+                                                   cell->active_fe_index())]
+                    = function_values(fe.face_system_to_component_index(i).first);
+            }
+    }
+
+
+
+    // template for the case dim!=1. Since the function has a template argument
+    // dim_, it is clearly less specialized than the 1d function above and
+    // whenever possible (i.e., if dim==1), the function template above
+    // will be used
+    template <typename DoFHandlerType, template <int,int> class M_or_MC, int dim_>
+    static inline
+    void
+    do_interpolate_boundary_values
+    (const M_or_MC<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+     const DoFHandlerType                                                      &dof,
+     const typename FunctionMap<DoFHandlerType::space_dimension>::type         &function_map,
+     std::map<types::global_dof_index,double>                                  &boundary_values,
+     const ComponentMask                                                       &component_mask,
+     const dealii::internal::int2type<dim_>)
+    {
+      const unsigned int dim = DoFHandlerType::dimension;
+      const unsigned int spacedim=DoFHandlerType::space_dimension;
+
+      Assert (component_mask.represents_n_components(dof.get_fe().n_components()),
+              ExcMessage ("The number of components in the mask has to be either "
+                          "zero or equal to the number of components in the finite "
+                          "element."));
+
+
+      // if for whatever reason we were passed an empty map, return
+      // immediately
+      if (function_map.size() == 0)
+        return;
+
+      Assert (function_map.find(numbers::internal_face_boundary_id) == function_map.end(),
+              ExcMessage("You cannot specify the special boundary indicator "
+                         "for interior faces in your function map."));
+
+      const unsigned int        n_components = DoFTools::n_components(dof);
+      const bool                fe_is_system = (n_components != 1);
+
+      for (typename FunctionMap<spacedim>::type::const_iterator i=function_map.begin();
+           i!=function_map.end(); ++i)
+        Assert (n_components == i->second->n_components,
+                ExcDimensionMismatch(n_components, i->second->n_components));
+
+      // field to store the indices
+      std::vector<types::global_dof_index> face_dofs;
+      face_dofs.reserve (DoFTools::max_dofs_per_face(dof));
+
+      std::vector<Point<spacedim> >  dof_locations;
+      dof_locations.reserve (DoFTools::max_dofs_per_face(dof));
+
+      // array to store the values of the boundary function at the boundary
+      // points. have two arrays for scalar and vector functions to use the
+      // more efficient one respectively
+      std::vector<double>          dof_values_scalar;
+      std::vector<Vector<double> > dof_values_system;
+      dof_values_scalar.reserve (DoFTools::max_dofs_per_face (dof));
+      dof_values_system.reserve (DoFTools::max_dofs_per_face (dof));
+
+      // before we start with the loop over all cells create an hp::FEValues
+      // object that holds the interpolation points of all finite elements
+      // that may ever be in use
+      dealii::hp::FECollection<dim,spacedim> finite_elements (dof.get_fe());
+      dealii::hp::QCollection<dim-1>  q_collection;
+      for (unsigned int f=0; f<finite_elements.size(); ++f)
+        {
+          const FiniteElement<dim,spacedim> &fe = finite_elements[f];
+
+          // generate a quadrature rule on the face from the unit support
+          // points. this will be used to obtain the quadrature points on the
+          // real cell's face
+          //
+          // to do this, we check whether the FE has support points on the
+          // face at all:
+          if (fe.has_face_support_points())
+            q_collection.push_back (Quadrature<dim-1>(fe.get_unit_face_support_points()));
+          else
+            {
+              // if not, then we should try a more clever way. the idea is
+              // that a finite element may not offer support points for all
+              // its shape functions, but maybe only some. if it offers
+              // support points for the components we are interested in in
+              // this function, then that's fine. if not, the function we call
+              // in the finite element will raise an exception. the support
+              // points for the other shape functions are left uninitialized
+              // (well, initialized by the default constructor), since we
+              // don't need them anyway.
+              //
+              // As a detour, we must make sure we only query
+              // face_system_to_component_index if the index corresponds to a
+              // primitive shape function. since we know that all the
+              // components we are interested in are primitive (by the above
+              // check), we can safely put such a check in front
+              std::vector<Point<dim-1> > unit_support_points (fe.dofs_per_face);
+
+              for (unsigned int i=0; i<fe.dofs_per_face; ++i)
+                if (fe.is_primitive (fe.face_to_cell_index(i,0)))
+                  if (component_mask[fe.face_system_to_component_index(i).first]
+                      == true)
+                    unit_support_points[i] = fe.unit_face_support_point(i);
+
+              q_collection.push_back (Quadrature<dim-1>(unit_support_points));
+            }
+        }
+      // now that we have a q_collection object with all the right quadrature
+      // points, create an hp::FEFaceValues object that we can use to evaluate
+      // the boundary values at
+      dealii::hp::MappingCollection<dim,spacedim> mapping_collection (mapping);
+      dealii::hp::FEFaceValues<dim,spacedim> x_fe_values (mapping_collection, finite_elements, q_collection,
+                                                          update_quadrature_points);
+
+      typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                    endc = dof.end();
+      for (; cell!=endc; ++cell)
+        if (!cell->is_artificial())
+          for (unsigned int face_no = 0; face_no < GeometryInfo<dim>::faces_per_cell;
+               ++face_no)
+            {
+              const FiniteElement<dim,spacedim> &fe = cell->get_fe();
+
+              // we can presently deal only with primitive elements for
+              // boundary values. this does not preclude us using
+              // non-primitive elements in components that we aren't
+              // interested in, however. make sure that all shape functions
+              // that are non-zero for the components we are interested in,
+              // are in fact primitive
+              for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                {
+                  const ComponentMask &nonzero_component_array
+                    = cell->get_fe().get_nonzero_components (i);
+                  for (unsigned int c=0; c<n_components; ++c)
+                    if ((nonzero_component_array[c] == true)
+                        &&
+                        (component_mask[c] == true))
+                      Assert (cell->get_fe().is_primitive (i),
+                              ExcMessage ("This function can only deal with requested boundary "
+                                          "values that correspond to primitive (scalar) base "
+                                          "elements"));
+                }
+
+              const typename DoFHandlerType::face_iterator face = cell->face(face_no);
+              const types::boundary_id boundary_component = face->boundary_id();
+
+              // see if this face is part of the boundaries for which we are
+              // supposed to do something, and also see if the finite element
+              // in use here has DoFs on the face at all
+              if ((function_map.find(boundary_component) != function_map.end())
+                  &&
+                  (cell->get_fe().dofs_per_face > 0))
+                {
+                  // face is of the right component
+                  x_fe_values.reinit(cell, face_no);
+                  const dealii::FEFaceValues<dim,spacedim> &fe_values =
+                    x_fe_values.get_present_fe_values();
+
+                  // get indices, physical location and boundary values of
+                  // dofs on this face
+                  face_dofs.resize (fe.dofs_per_face);
+                  face->get_dof_indices (face_dofs, cell->active_fe_index());
+                  const std::vector<Point<spacedim> > &dof_locations
+                    = fe_values.get_quadrature_points ();
+
+                  if (fe_is_system)
+                    {
+                      // resize array. avoid construction of a memory
+                      // allocating temporary if possible
+                      if (dof_values_system.size() < fe.dofs_per_face)
+                        dof_values_system.resize (fe.dofs_per_face,
+                                                  Vector<double>(fe.n_components()));
+                      else
+                        dof_values_system.resize (fe.dofs_per_face);
+
+                      function_map.find(boundary_component)->second
+                      ->vector_value_list (dof_locations, dof_values_system);
+
+                      // enter those dofs into the list that match the
+                      // component signature. avoid the usual complication
+                      // that we can't just use *_system_to_component_index
+                      // for non-primitive FEs
+                      for (unsigned int i=0; i<face_dofs.size(); ++i)
+                        {
+                          unsigned int component;
+                          if (fe.is_primitive())
+                            component = fe.face_system_to_component_index(i).first;
+                          else
+                            {
+                              // non-primitive case. make sure that this
+                              // particular shape function _is_ primitive, and
+                              // get at it's component. use usual trick to
+                              // transfer face dof index to cell dof index
+                              const unsigned int cell_i
+                                = (dim == 1 ?
+                                   i
+                                   :
+                                   (dim == 2 ?
+                                    (i<2*fe.dofs_per_vertex ? i : i+2*fe.dofs_per_vertex)
+                                    :
+                                    (dim == 3 ?
+                                     (i<4*fe.dofs_per_vertex ?
+                                      i
+                                      :
+                                      (i<4*fe.dofs_per_vertex+4*fe.dofs_per_line ?
+                                       i+4*fe.dofs_per_vertex
+                                       :
+                                       i+4*fe.dofs_per_vertex+8*fe.dofs_per_line))
+                                     :
+                                     numbers::invalid_unsigned_int)));
+                              Assert (cell_i < fe.dofs_per_cell, ExcInternalError());
+
+                              // make sure that if this is not a primitive
+                              // shape function, then all the corresponding
+                              // components in the mask are not set
+                              if (!fe.is_primitive(cell_i))
+                                for (unsigned int c=0; c<n_components; ++c)
+                                  if (fe.get_nonzero_components(cell_i)[c])
+                                    Assert (component_mask[c] == false,
+                                            FETools::ExcFENotPrimitive());
+
+                              // let's pick the first of possibly more than
+                              // one non-zero components. if shape function is
+                              // non-primitive, then we will ignore the result
+                              // in the following anyway, otherwise there's
+                              // only one non-zero component which we will use
+                              component = fe.get_nonzero_components(cell_i).first_selected_component();
+                            }
+
+                          if (component_mask[component] == true)
+                            boundary_values[face_dofs[i]] = dof_values_system[i](component);
+                        }
+                    }
+                  else
+                    // fe has only one component, so save some computations
+                    {
+                      // get only the one component that this function has
+                      dof_values_scalar.resize (fe.dofs_per_face);
+                      function_map.find(boundary_component)->second
+                      ->value_list (dof_locations, dof_values_scalar, 0);
+
+                      // enter into list
+
+                      for (unsigned int i=0; i<face_dofs.size(); ++i)
+                        boundary_values[face_dofs[i]] = dof_values_scalar[i];
+                    }
+                }
+            }
+    } // end of interpolate_boundary_values
+  } // end of namespace internal
+
+
+
+  template <typename DoFHandlerType>
+  void
+
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                      &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type         &function_map,
+   std::map<types::global_dof_index,double>                                  &boundary_values,
+   const ComponentMask                                                       &component_mask_)
+  {
+    do_interpolate_boundary_values (mapping, dof, function_map, boundary_values,
+                                    component_mask_,
+                                    dealii::internal::int2type<DoFHandlerType::dimension>());
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                      &dof,
+   const types::boundary_id                                                   boundary_component,
+   const Function<DoFHandlerType::space_dimension>                           &boundary_function,
+   std::map<types::global_dof_index,double>                                  &boundary_values,
+   const ComponentMask                                                       &component_mask)
+  {
+    typename FunctionMap<DoFHandlerType::space_dimension>::type function_map;
+    function_map[boundary_component] = &boundary_function;
+    interpolate_boundary_values (mapping, dof, function_map, boundary_values,
+                                 component_mask);
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  interpolate_boundary_values
+  (const hp::MappingCollection<dim,spacedim>  &mapping,
+   const hp::DoFHandler<dim,spacedim>         &dof,
+   const typename FunctionMap<spacedim>::type &function_map,
+   std::map<types::global_dof_index,double>   &boundary_values,
+   const ComponentMask                        &component_mask_)
+  {
+    do_interpolate_boundary_values (mapping, dof, function_map, boundary_values,
+                                    component_mask_,
+                                    dealii::internal::int2type<dim>());
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                            &dof,
+   const types::boundary_id                         boundary_component,
+   const Function<DoFHandlerType::space_dimension> &boundary_function,
+   std::map<types::global_dof_index,double>        &boundary_values,
+   const ComponentMask                             &component_mask)
+  {
+    interpolate_boundary_values(StaticMappingQ1<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::mapping,
+                                dof, boundary_component,
+                                boundary_function, boundary_values, component_mask);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                                              &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type &function_map,
+   std::map<types::global_dof_index,double>                          &boundary_values,
+   const ComponentMask                                               &component_mask)
+  {
+    interpolate_boundary_values
+    (StaticMappingQ1<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::mapping,
+     dof, function_map,
+     boundary_values, component_mask);
+  }
+
+
+
+
+// ----------- interpolate_boundary_values for ConstraintMatrix --------------
+
+
+
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                      &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type         &function_map,
+   ConstraintMatrix                                                          &constraints,
+   const ComponentMask                                                       &component_mask_)
+  {
+    std::map<types::global_dof_index,double> boundary_values;
+    interpolate_boundary_values (mapping, dof, function_map,
+                                 boundary_values, component_mask_);
+    std::map<types::global_dof_index,double>::const_iterator boundary_value =
+      boundary_values.begin();
+    for ( ; boundary_value !=boundary_values.end(); ++boundary_value)
+      {
+        if (constraints.can_store_line (boundary_value->first)
+            &&
+            !constraints.is_constrained(boundary_value->first))
+          {
+            constraints.add_line (boundary_value->first);
+            constraints.set_inhomogeneity (boundary_value->first,
+                                           boundary_value->second);
+          }
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const Mapping<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+   const DoFHandlerType                                                      &dof,
+   const types::boundary_id                                                   boundary_component,
+   const Function<DoFHandlerType::space_dimension>                           &boundary_function,
+   ConstraintMatrix                                                          &constraints,
+   const ComponentMask                                                       &component_mask)
+  {
+    typename FunctionMap<DoFHandlerType::space_dimension>::type function_map;
+    function_map[boundary_component] = &boundary_function;
+    interpolate_boundary_values (mapping, dof, function_map, constraints,
+                                 component_mask);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                            &dof,
+   const types::boundary_id                         boundary_component,
+   const Function<DoFHandlerType::space_dimension> &boundary_function,
+   ConstraintMatrix                                &constraints,
+   const ComponentMask                             &component_mask)
+  {
+    interpolate_boundary_values
+    (StaticMappingQ1<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::mapping,
+     dof, boundary_component,
+     boundary_function, constraints, component_mask);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  interpolate_boundary_values
+  (const DoFHandlerType                                              &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type &function_map,
+   ConstraintMatrix                                                  &constraints,
+   const ComponentMask                                               &component_mask)
+  {
+    interpolate_boundary_values
+    (StaticMappingQ1<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::mapping,
+     dof, function_map,
+     constraints, component_mask);
+  }
+
+
+
+
+// -------- implementation for project_boundary_values with std::map --------
+
+
+  namespace
+  {
+    template <int dim, int spacedim, template <int, int> class DoFHandlerType,
+              template <int,int> class M_or_MC, template <int> class Q_or_QC>
+    void
+    do_project_boundary_values (const M_or_MC<dim, spacedim>               &mapping,
+                                const DoFHandlerType<dim, spacedim>        &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_functions,
+                                const Q_or_QC<dim-1>                       &q,
+                                std::map<types::global_dof_index,double>   &boundary_values,
+                                std::vector<unsigned int>                   component_mapping)
+    {
+      // in 1d, projection onto the 0d end points == interpolation
+      if (dim == 1)
+        {
+          Assert (component_mapping.size() == 0, ExcNotImplemented());
+          interpolate_boundary_values (mapping, dof, boundary_functions,
+                                       boundary_values, ComponentMask());
+          return;
+        }
+
+      //TODO:[?] In project_boundary_values, no condensation of sparsity
+      //    structures, matrices and right hand sides or distribution of
+      //    solution vectors is performed. This is ok for dim<3 because then
+      //    there are no constrained nodes on the boundary, but is not
+      //    acceptable for higher dimensions. Fix this.
+
+      if (component_mapping.size() == 0)
+        {
+          AssertDimension (dof.get_fe().n_components(), boundary_functions.begin()->second->n_components);
+          // I still do not see why i
+          // should create another copy
+          // here
+          component_mapping.resize(dof.get_fe().n_components());
+          for (unsigned int i= 0 ; i < component_mapping.size() ; ++i)
+            component_mapping[i] = i;
+        }
+      else
+        AssertDimension (dof.get_fe().n_components(), component_mapping.size());
+
+      std::vector<types::global_dof_index> dof_to_boundary_mapping;
+      std::set<types::boundary_id> selected_boundary_components;
+      for (typename FunctionMap<spacedim>::type::const_iterator i=boundary_functions.begin();
+           i!=boundary_functions.end(); ++i)
+        selected_boundary_components.insert (i->first);
+
+      DoFTools::map_dof_to_boundary_indices (dof, selected_boundary_components,
+                                             dof_to_boundary_mapping);
+
+      // Done if no degrees of freedom on the boundary
+      if (dof.n_boundary_dofs (boundary_functions) == 0)
+        return;
+
+      // set up sparsity structure
+      DynamicSparsityPattern dsp(dof.n_boundary_dofs (boundary_functions),
+                                 dof.n_boundary_dofs (boundary_functions));
+      DoFTools::make_boundary_sparsity_pattern (dof,
+                                                boundary_functions,
+                                                dof_to_boundary_mapping,
+                                                dsp);
+      SparsityPattern sparsity;
+      sparsity.copy_from(dsp);
+
+
+
+      // note: for three or more dimensions, there
+      // may be constrained nodes on the boundary
+      // in this case the boundary mass matrix has
+      // to be condensed and the solution is to
+      // be distributed afterwards, which is not
+      // yet implemented. The reason for this is
+      // that we cannot simply use the condense
+      // family of functions, since the matrices
+      // and vectors do not use the global
+      // numbering but rather the boundary
+      // numbering, i.e. the condense function
+      // needs to use another indirection. There
+      // should be not many technical problems,
+      // but it needs to be implemented
+      if (dim>=3)
+        {
+#ifdef DEBUG
+          // Assert that there are no hanging nodes at the boundary
+          int level = -1;
+          for (typename DoFHandlerType<dim,spacedim>::active_cell_iterator cell = dof.begin_active();
+               cell != dof.end(); ++cell)
+            for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+              {
+                if (cell->at_boundary(f))
+                  {
+                    if (level == -1)
+                      level = cell->level();
+                    else
+                      {
+                        Assert (level == cell->level(), ExcNotImplemented());
+                      }
+                  }
+              }
+#endif
+        }
+      sparsity.compress();
+
+
+      // make mass matrix and right hand side
+      SparseMatrix<double> mass_matrix(sparsity);
+      Vector<double>       rhs(sparsity.n_rows());
+
+
+      MatrixCreator::create_boundary_mass_matrix (mapping, dof, q,
+                                                  mass_matrix, boundary_functions,
+                                                  rhs, dof_to_boundary_mapping, (const Function<spacedim> *) 0,
+                                                  component_mapping);
+
+      // For certain weird elements,
+      // there might be degrees of
+      // freedom on the boundary, but
+      // their shape functions do not
+      // have support there. Let's
+      // eliminate them here.
+
+      // The Bogner-Fox-Schmidt element
+      // is an example for those.
+
+//TODO: Maybe we should figure out if the element really needs this
+
+      FilteredMatrix<Vector<double> > filtered_mass_matrix(mass_matrix, true);
+      FilteredMatrix<Vector<double> > filtered_precondition;
+      std::vector<bool> excluded_dofs(mass_matrix.m(), false);
+
+      double max_element = 0.;
+      for (unsigned int i=0; i<mass_matrix.m(); ++i)
+        if (mass_matrix.diag_element(i) > max_element)
+          max_element = mass_matrix.diag_element(i);
+
+      for (unsigned int i=0; i<mass_matrix.m(); ++i)
+        if (mass_matrix.diag_element(i) < 1.e-8 * max_element)
+          {
+            filtered_mass_matrix.add_constraint(i, 0.);
+            filtered_precondition.add_constraint(i, 0.);
+            mass_matrix.diag_element(i) = 1.;
+            excluded_dofs[i] = true;
+          }
+
+      Vector<double> boundary_projection (rhs.size());
+
+      // cannot reduce residual in a useful way if we are close to the square
+      // root of the minimal double value
+      if (rhs.norm_sqr() < 1e28 * std::numeric_limits<double>::min())
+        boundary_projection = 0;
+      else
+        {
+          // Allow for a maximum of 5*n steps to reduce the residual by 10^-12. n
+          // steps may not be sufficient, since roundoff errors may accumulate for
+          // badly conditioned matrices
+          ReductionControl        control(5*rhs.size(), 0., 1.e-12, false, false);
+          GrowingVectorMemory<> memory;
+          SolverCG<>              cg(control,memory);
+
+          PreconditionSSOR<> prec;
+          prec.initialize(mass_matrix, 1.2);
+          filtered_precondition.initialize(prec, true);
+          // solve
+          cg.solve (filtered_mass_matrix, boundary_projection, rhs, filtered_precondition);
+          filtered_precondition.apply_constraints(boundary_projection, true);
+          filtered_precondition.clear();
+        }
+      // fill in boundary values
+      for (unsigned int i=0; i<dof_to_boundary_mapping.size(); ++i)
+        if (dof_to_boundary_mapping[i] != DoFHandler<dim,spacedim>::invalid_dof_index
+            && ! excluded_dofs[dof_to_boundary_mapping[i]])
+          {
+            AssertIsFinite(boundary_projection(dof_to_boundary_mapping[i]));
+
+            // this dof is on one of the
+            // interesting boundary parts
+            //
+            // remember: i is the global dof
+            // number, dof_to_boundary_mapping[i]
+            // is the number on the boundary and
+            // thus in the solution vector
+            boundary_values[i] = boundary_projection(dof_to_boundary_mapping[i]);
+          }
+    }
+  }
+
+  template <int dim, int spacedim>
+  void
+  project_boundary_values (const Mapping<dim, spacedim>   &mapping,
+                           const DoFHandler<dim, spacedim> &dof,
+                           const typename FunctionMap<spacedim>::type &boundary_functions,
+                           const Quadrature<dim-1>        &q,
+                           std::map<types::global_dof_index,double>  &boundary_values,
+                           std::vector<unsigned int>       component_mapping)
+  {
+    do_project_boundary_values(mapping, dof, boundary_functions, q,
+                               boundary_values, component_mapping);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  project_boundary_values (const DoFHandler<dim,spacedim>    &dof,
+                           const typename FunctionMap<spacedim>::type &boundary_functions,
+                           const Quadrature<dim-1>  &q,
+                           std::map<types::global_dof_index,double> &boundary_values,
+                           std::vector<unsigned int> component_mapping)
+  {
+    project_boundary_values(StaticMappingQ1<dim,spacedim>::mapping, dof, boundary_functions, q,
+                            boundary_values, component_mapping);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void project_boundary_values (const hp::MappingCollection<dim, spacedim>       &mapping,
+                                const hp::DoFHandler<dim,spacedim>    &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_functions,
+                                const hp::QCollection<dim-1>  &q,
+                                std::map<types::global_dof_index,double> &boundary_values,
+                                std::vector<unsigned int> component_mapping)
+  {
+    do_project_boundary_values (mapping, dof,
+                                boundary_functions,
+                                q, boundary_values,
+                                component_mapping);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void project_boundary_values (const hp::DoFHandler<dim,spacedim>    &dof,
+                                const typename FunctionMap<spacedim>::type &boundary_function,
+                                const hp::QCollection<dim-1>  &q,
+                                std::map<types::global_dof_index,double> &boundary_values,
+                                std::vector<unsigned int> component_mapping)
+  {
+    project_boundary_values (hp::StaticMappingQ1<dim,spacedim>::mapping_collection, dof,
+                             boundary_function,
+                             q, boundary_values,
+                             component_mapping);
+  }
+
+
+  // ----- implementation for project_boundary_values with ConstraintMatrix -----
+
+
+
+  template <int dim, int spacedim>
+  void
+  project_boundary_values (const Mapping<dim, spacedim>       &mapping,
+                           const DoFHandler<dim,spacedim>    &dof,
+                           const typename FunctionMap<spacedim>::type &boundary_functions,
+                           const Quadrature<dim-1>  &q,
+                           ConstraintMatrix &constraints,
+                           std::vector<unsigned int> component_mapping)
+  {
+    std::map<types::global_dof_index,double> boundary_values;
+    project_boundary_values (mapping, dof, boundary_functions, q,
+                             boundary_values, component_mapping);
+    std::map<types::global_dof_index,double>::const_iterator boundary_value =
+      boundary_values.begin();
+    for ( ; boundary_value !=boundary_values.end(); ++boundary_value)
+      {
+        if (!constraints.is_constrained(boundary_value->first))
+          {
+            constraints.add_line (boundary_value->first);
+            constraints.set_inhomogeneity (boundary_value->first,
+                                           boundary_value->second);
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  project_boundary_values (const DoFHandler<dim,spacedim>    &dof,
+                           const typename FunctionMap<spacedim>::type &boundary_functions,
+                           const Quadrature<dim-1>  &q,
+                           ConstraintMatrix &constraints,
+                           std::vector<unsigned int> component_mapping)
+  {
+    project_boundary_values(StaticMappingQ1<dim,spacedim>::mapping, dof, boundary_functions, q,
+                            constraints, component_mapping);
+  }
+
+
+
+
+  namespace internal
+  {
+    /**
+     * A structure that stores the dim DoF indices that correspond to a
+     * vector-valued quantity at a single support point.
+     */
+    template <int dim>
+    struct VectorDoFTuple
+    {
+      types::global_dof_index dof_indices[dim];
+
+      VectorDoFTuple ()
+      {
+        for (unsigned int i=0; i<dim; ++i)
+          dof_indices[i] = numbers::invalid_dof_index;
+      }
+
+
+      bool operator < (const VectorDoFTuple<dim> &other) const
+      {
+        for (unsigned int i=0; i<dim; ++i)
+          if (dof_indices[i] < other.dof_indices[i])
+            return true;
+          else if (dof_indices[i] > other.dof_indices[i])
+            return false;
+        return false;
+      }
+
+      bool operator == (const VectorDoFTuple<dim> &other) const
+      {
+        for (unsigned int i=0; i<dim; ++i)
+          if (dof_indices[i] != other.dof_indices[i])
+            return false;
+
+        return true;
+      }
+
+      bool operator != (const VectorDoFTuple<dim> &other) const
+      {
+        return ! (*this == other);
+      }
+    };
+
+
+    template <int dim>
+    std::ostream &operator << (std::ostream &out,
+                               const VectorDoFTuple<dim> &vdt)
+    {
+      for (unsigned int d=0; d<dim; ++d)
+        out << vdt.dof_indices[d] << (d < dim-1 ? " " : "");
+      return out;
+    }
+
+
+
+    /**
+     * Add the constraint $\vec n \cdot \vec u = inhom$ to the list of
+     * constraints.
+     *
+     * Here, $\vec u$ is represented by the set of given DoF indices, and
+     * $\vec n$ by the vector specified as the second argument.
+     *
+     * The function does not add constraints if a degree of freedom is already
+     * constrained in the constraints object.
+     */
+    template <int dim>
+    void
+    add_constraint (const VectorDoFTuple<dim> &dof_indices,
+                    const Tensor<1,dim>       &constraining_vector,
+                    ConstraintMatrix          &constraints,
+                    const double              inhomogeneity=0)
+    {
+
+      // choose the DoF that has the
+      // largest component in the
+      // constraining_vector as the
+      // one to be constrained as
+      // this makes the process
+      // stable in cases where the
+      // constraining_vector has the
+      // form n=(1,0) or n=(0,1)
+      //
+      // we get constraints of the form
+      //   x0 = a_1*x1 + a2*x2 + ...
+      // if one of the weights is
+      // essentially zero then skip
+      // this part. the ConstraintMatrix
+      // can also deal with cases like
+      //   x0 = 0
+      // if necessary
+      //
+      // there is a problem if we have a
+      // normal vector of the form
+      // (a,a,small) or (a,a,a). Depending on
+      // round-off we may choose the first or
+      // second component (or third, in the
+      // latter case) as the largest one, and
+      // depending on our choice one or
+      // another degree of freedom will be
+      // constrained. On a single processor
+      // this is not much of a problem, but
+      // it's a nightmare when we run in
+      // parallel and two processors disagree
+      // on which DoF should be
+      // constrained. This led to an
+      // incredibly difficult to find bug in
+      // step-32 when running in parallel
+      // with 9 or more processors.
+      //
+      // in practice, such normal vectors of
+      // the form (a,a,small) or (a,a,a)
+      // happen not infrequently since they
+      // lie on the diagonals where vertices
+      // frequently happen to land upon mesh
+      // refinement if one starts from a
+      // symmetric and regular body. we work
+      // around this problem in the following
+      // way: if we have a normal vector of
+      // the form (a,b) (similarly algorithm
+      // in 3d), we choose 'a' as the largest
+      // coefficient not if a>b but if
+      // a>b+1e-10. this shifts the problem
+      // away from the frequently visited
+      // diagonal to a line that is off the
+      // diagonal. there will of course be
+      // problems where the exact values of a
+      // and b differ by exactly 1e-10 and we
+      // get into the same instability, but
+      // from a practical viewpoint such
+      // problems should be much rarer. in
+      // particular, meshes have to be very
+      // very fine for a vertex to land on
+      // this line if the original body had a
+      // vertex on the diagonal as well
+      switch (dim)
+        {
+        case 2:
+        {
+          if (std::fabs(constraining_vector[0]) > std::fabs(constraining_vector[1]) + 1e-10)
+            {
+              if (!constraints.is_constrained(dof_indices.dof_indices[0])
+                  &&
+                  constraints.can_store_line(dof_indices.dof_indices[0]))
+                {
+                  constraints.add_line (dof_indices.dof_indices[0]);
+
+                  if (std::fabs (constraining_vector[1]/constraining_vector[0])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[0],
+                                           dof_indices.dof_indices[1],
+                                           -constraining_vector[1]/constraining_vector[0]);
+
+                  if (std::fabs (inhomogeneity/constraining_vector[0])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.set_inhomogeneity(dof_indices.dof_indices[0],
+                                                  inhomogeneity/constraining_vector[0]);
+                }
+            }
+          else
+            {
+              if (!constraints.is_constrained(dof_indices.dof_indices[1])
+                  &&
+                  constraints.can_store_line(dof_indices.dof_indices[1]))
+                {
+                  constraints.add_line (dof_indices.dof_indices[1]);
+
+                  if (std::fabs (constraining_vector[0]/constraining_vector[1])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[1],
+                                           dof_indices.dof_indices[0],
+                                           -constraining_vector[0]/constraining_vector[1]);
+
+                  if (std::fabs (inhomogeneity/constraining_vector[1])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.set_inhomogeneity(dof_indices.dof_indices[1],
+                                                  inhomogeneity/constraining_vector[1]);
+                }
+            }
+          break;
+        }
+
+        case 3:
+        {
+          if ((std::fabs(constraining_vector[0]) >= std::fabs(constraining_vector[1])+1e-10)
+              &&
+              (std::fabs(constraining_vector[0]) >= std::fabs(constraining_vector[2])+2e-10))
+            {
+              if (!constraints.is_constrained(dof_indices.dof_indices[0])
+                  &&
+                  constraints.can_store_line(dof_indices.dof_indices[0]))
+                {
+                  constraints.add_line (dof_indices.dof_indices[0]);
+
+                  if (std::fabs (constraining_vector[1]/constraining_vector[0])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[0],
+                                           dof_indices.dof_indices[1],
+                                           -constraining_vector[1]/constraining_vector[0]);
+
+                  if (std::fabs (constraining_vector[2]/constraining_vector[0])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[0],
+                                           dof_indices.dof_indices[2],
+                                           -constraining_vector[2]/constraining_vector[0]);
+
+                  if (std::fabs (inhomogeneity/constraining_vector[0])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.set_inhomogeneity(dof_indices.dof_indices[0],
+                                                  inhomogeneity/constraining_vector[0]);
+                }
+            }
+          else if ((std::fabs(constraining_vector[1])+1e-10 >= std::fabs(constraining_vector[0]))
+                   &&
+                   (std::fabs(constraining_vector[1]) >= std::fabs(constraining_vector[2])+1e-10))
+            {
+              if (!constraints.is_constrained(dof_indices.dof_indices[1])
+                  &&
+                  constraints.can_store_line(dof_indices.dof_indices[1]))
+                {
+                  constraints.add_line (dof_indices.dof_indices[1]);
+
+                  if (std::fabs (constraining_vector[0]/constraining_vector[1])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[1],
+                                           dof_indices.dof_indices[0],
+                                           -constraining_vector[0]/constraining_vector[1]);
+
+                  if (std::fabs (constraining_vector[2]/constraining_vector[1])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[1],
+                                           dof_indices.dof_indices[2],
+                                           -constraining_vector[2]/constraining_vector[1]);
+
+                  if (std::fabs (inhomogeneity/constraining_vector[1])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.set_inhomogeneity(dof_indices.dof_indices[1],
+                                                  inhomogeneity/constraining_vector[1]);
+                }
+            }
+          else
+            {
+              if (!constraints.is_constrained(dof_indices.dof_indices[2])
+                  &&
+                  constraints.can_store_line(dof_indices.dof_indices[2]))
+                {
+                  constraints.add_line (dof_indices.dof_indices[2]);
+
+                  if (std::fabs (constraining_vector[0]/constraining_vector[2])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[2],
+                                           dof_indices.dof_indices[0],
+                                           -constraining_vector[0]/constraining_vector[2]);
+
+                  if (std::fabs (constraining_vector[1]/constraining_vector[2])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.add_entry (dof_indices.dof_indices[2],
+                                           dof_indices.dof_indices[1],
+                                           -constraining_vector[1]/constraining_vector[2]);
+
+                  if (std::fabs (inhomogeneity/constraining_vector[2])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.set_inhomogeneity(dof_indices.dof_indices[2],
+                                                  inhomogeneity/constraining_vector[2]);
+                }
+            }
+
+          break;
+        }
+
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+    }
+
+
+    /**
+     * Add the constraint $(\vec u-\vec u_\Gamma) \| \vec t$ to the list of
+     * constraints. In 2d, this is a single constraint, in 3d these are two
+     * constraints
+     *
+     * Here, $\vec u$ is represented by the set of given DoF indices, and
+     * $\vec t$ by the vector specified as the second argument.
+     *
+     * The function does not add constraints if a degree of freedom is already
+     * constrained in the constraints object.
+     */
+    template <int dim>
+    void
+    add_tangentiality_constraints
+    (const VectorDoFTuple<dim> &dof_indices,
+     const Tensor<1,dim>       &tangent_vector,
+     ConstraintMatrix          &constraints,
+     const Vector<double>      &b_values = Vector<double>(dim))
+    {
+
+      // choose the DoF that has the
+      // largest component in the
+      // tangent_vector as the
+      // independent component, and
+      // then constrain the others to
+      // it. specifically, if, say,
+      // component 0 of the tangent
+      // vector t is largest by
+      // magnitude, then
+      // x1=(b[1]*t[0]-b[0]*t[1])/t[0]+t[1]/t[0]*x_0, etc.
+      unsigned int largest_component = 0;
+      for (unsigned int d=1; d<dim; ++d)
+        if (std::fabs(tangent_vector[d]) > std::fabs(tangent_vector[largest_component]) + 1e-10)
+          largest_component = d;
+
+      // then constrain all of the
+      // other degrees of freedom in
+      // terms of the one just found
+      for (unsigned int d=0; d<dim; ++d)
+        if (d != largest_component)
+          if (!constraints.is_constrained(dof_indices.dof_indices[d])
+              &&
+              constraints.can_store_line(dof_indices.dof_indices[d]))
+            {
+              constraints.add_line (dof_indices.dof_indices[d]);
+
+              if (std::fabs (tangent_vector[d]/tangent_vector[largest_component])
+                  > std::numeric_limits<double>::epsilon())
+                constraints.add_entry (dof_indices.dof_indices[d],
+                                       dof_indices.dof_indices[largest_component],
+                                       tangent_vector[d]/tangent_vector[largest_component]);
+
+              const double inhomogeneity
+                = (b_values(d)*tangent_vector[largest_component]
+                   -b_values(largest_component)*tangent_vector[d])
+                  /tangent_vector[largest_component];
+
+              if (std::fabs(inhomogeneity)
+                  > std::numeric_limits<double>::epsilon())
+                constraints.set_inhomogeneity(dof_indices.dof_indices[d],
+                                              inhomogeneity);
+            }
+    }
+
+
+
+    /**
+     * Given a vector, compute a set of dim-1 vectors that are orthogonal to
+     * the first one and mutually orthonormal as well.
+     */
+    template <int dim>
+    void
+    compute_orthonormal_vectors (const Tensor<1,dim> &vector,
+                                 Tensor<1,dim> (&orthonormals)[dim-1])
+    {
+      switch (dim)
+        {
+        case 3:
+        {
+          // to do this in 3d, take
+          // one vector that is
+          // guaranteed to be not
+          // aligned with the
+          // average tangent and
+          // form the cross
+          // product. this yields
+          // one vector that is
+          // certainly
+          // perpendicular to the
+          // tangent; then take the
+          // cross product between
+          // this vector and the
+          // tangent and get one
+          // vector that is
+          // perpendicular to both
+
+          // construct a
+          // temporary vector
+          // by swapping the
+          // larger two
+          // components and
+          // flipping one
+          // sign; this can
+          // not be collinear
+          // with the average
+          // tangent
+          Tensor<1,dim> tmp = vector;
+          if ((std::fabs(tmp[0]) > std::fabs(tmp[1]))
+              &&
+              (std::fabs(tmp[0]) > std::fabs(tmp[2])))
+            {
+              // entry zero
+              // is the
+              // largest
+              if ((std::fabs(tmp[1]) > std::fabs(tmp[2])))
+                std::swap (tmp[0], tmp[1]);
+              else
+                std::swap (tmp[0], tmp[2]);
+
+              tmp[0] *= -1;
+            }
+          else if ((std::fabs(tmp[1]) > std::fabs(tmp[0]))
+                   &&
+                   (std::fabs(tmp[1]) > std::fabs(tmp[2])))
+            {
+              // entry one
+              // is the
+              // largest
+              if ((std::fabs(tmp[0]) > std::fabs(tmp[2])))
+                std::swap (tmp[1], tmp[0]);
+              else
+                std::swap (tmp[1], tmp[2]);
+
+              tmp[1] *= -1;
+            }
+          else
+            {
+              // entry two
+              // is the
+              // largest
+              if ((std::fabs(tmp[0]) > std::fabs(tmp[1])))
+                std::swap (tmp[2], tmp[0]);
+              else
+                std::swap (tmp[2], tmp[1]);
+
+              tmp[2] *= -1;
+            }
+
+          // make sure the two vectors
+          // are indeed not collinear
+          Assert (std::fabs(vector * tmp / vector.norm() / tmp.norm())
+                  <
+                  (1-1e-12),
+                  ExcInternalError());
+
+          // now compute the
+          // two normals
+          orthonormals[0] = cross_product_3d (vector, tmp);
+          orthonormals[1] = cross_product_3d (vector, orthonormals[0]);
+
+          break;
+        }
+
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+    }
+  }
+
+
+  namespace internals
+  {
+    // This function computes the
+    // projection of the boundary
+    // function on edges for 3D.
+    template<typename cell_iterator>
+    void
+    compute_edge_projection (const cell_iterator &cell,
+                             const unsigned int face,
+                             const unsigned int line,
+                             hp::FEValues<3> &hp_fe_values,
+                             const Function<3> &boundary_function,
+                             const unsigned int first_vector_component,
+                             std::vector<double> &dof_values,
+                             std::vector<bool> &dofs_processed)
+    {
+      const double tol = 0.5 * cell->face (face)->line (line)->diameter () / cell->get_fe ().degree;
+      const unsigned int dim = 3;
+      const unsigned int spacedim = 3;
+
+      hp_fe_values.reinit
+      (cell,
+       (cell->active_fe_index () * GeometryInfo<dim>::faces_per_cell + face)
+       * GeometryInfo<dim>::lines_per_face + line);
+
+      // Initialize the required
+      // objects.
+      const FEValues<dim> &
+      fe_values = hp_fe_values.get_present_fe_values ();
+      const FiniteElement<dim> &fe = cell->get_fe ();
+      const std::vector< DerivativeForm<1,dim,spacedim> > &
+      jacobians = fe_values.get_jacobians ();
+      const std::vector<Point<dim> > &
+      quadrature_points = fe_values.get_quadrature_points ();
+
+      std::vector<Tensor<1,dim> > tangentials (fe_values.n_quadrature_points);
+      std::vector<Vector<double> > values (fe_values.n_quadrature_points,
+                                           Vector<double> (fe.n_components ()));
+
+      // Get boundary function values
+      // at quadrature points.
+      boundary_function.vector_value_list (quadrature_points, values);
+
+      const std::vector<Point<dim> > &
+      reference_quadrature_points = fe_values.get_quadrature ().get_points ();
+      std::pair<unsigned int, unsigned int> base_indices (0, 0);
+
+      if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) != 0)
+        {
+          unsigned int fe_index = 0;
+          unsigned int fe_index_old = 0;
+          unsigned int i = 0;
+
+          for (; i < fe.n_base_elements (); ++i)
+            {
+              fe_index_old = fe_index;
+              fe_index += fe.element_multiplicity (i) * fe.base_element (i).n_components ();
+
+              if (fe_index > first_vector_component)
+                break;
+            }
+
+          base_indices.first = i;
+          base_indices.second = (first_vector_component - fe_index_old) / fe.base_element (i).n_components ();
+        }
+
+      // coordinate directions of
+      // the edges of the face.
+      const unsigned int
+      edge_coordinate_direction
+      [GeometryInfo<dim>::faces_per_cell]
+      [GeometryInfo<dim>::lines_per_face]
+      = { { 2, 2, 1, 1 },
+        { 2, 2, 1, 1 },
+        { 0, 0, 2, 2 },
+        { 0, 0, 2, 2 },
+        { 1, 1, 0, 0 },
+        { 1, 1, 0, 0 }
+      };
+      const FEValuesExtractors::Vector vec (first_vector_component);
+
+      // The interpolation for the
+      // lowest order edge shape
+      // functions is just the mean
+      // value of the tangential
+      // components of the boundary
+      // function on the edge.
+      for (unsigned int q_point = 0; q_point < fe_values.n_quadrature_points;
+           ++q_point)
+        {
+          // Therefore compute the
+          // tangential of the edge at
+          // the quadrature point.
+          Point<dim> shifted_reference_point_1 = reference_quadrature_points[q_point];
+          Point<dim> shifted_reference_point_2 = reference_quadrature_points[q_point];
+
+          shifted_reference_point_1 (edge_coordinate_direction[face][line]) += tol;
+          shifted_reference_point_2 (edge_coordinate_direction[face][line]) -= tol;
+          tangentials[q_point]
+            = (0.5 *
+               (fe_values.get_mapping ()
+                .transform_unit_to_real_cell (cell,
+                                              shifted_reference_point_1)
+                -
+                fe_values.get_mapping ()
+                .transform_unit_to_real_cell (cell,
+                                              shifted_reference_point_2))
+               / tol);
+          tangentials[q_point] /= tangentials[q_point].norm();
+
+          // Compute the degrees of
+          // freedom.
+          for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+            if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                 && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices)
+                 && (fe.base_element (base_indices.first).face_to_cell_index (line * fe.degree, face)
+                     <= fe.system_to_base_index (fe.face_to_cell_index (i, face)).second)
+                 && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).second
+                     <= fe.base_element (base_indices.first).face_to_cell_index
+                     ((line + 1) * fe.degree - 1, face)))
+                || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0) && (line * fe.degree <= i)
+                    && (i < (line + 1) * fe.degree)))
+              {
+                const double tangential_solution_component
+                  = (values[q_point] (first_vector_component) * tangentials[q_point][0]
+                     + values[q_point] (first_vector_component + 1) * tangentials[q_point][1]
+                     + values[q_point] (first_vector_component + 2) * tangentials[q_point][2]);
+                dof_values[i]
+                += (fe_values.JxW (q_point)
+                    * tangential_solution_component
+                    * (fe_values[vec].value (fe.face_to_cell_index (i, face), q_point) *
+                       tangentials[q_point])
+                    / std::sqrt (jacobians[q_point][0][edge_coordinate_direction[face][line]]
+                                 * jacobians[q_point][0][edge_coordinate_direction[face][line]]
+                                 + jacobians[q_point][1][edge_coordinate_direction[face][line]]
+                                 * jacobians[q_point][1][edge_coordinate_direction[face][line]]
+                                 + jacobians[q_point][2][edge_coordinate_direction[face][line]]
+                                 * jacobians[q_point][2][edge_coordinate_direction[face][line]]));
+
+                if (q_point == 0)
+                  dofs_processed[i] = true;
+              }
+        }
+    }
+
+    // dummy implementation of above
+    // function for all other
+    // dimensions
+    template<int dim, typename cell_iterator>
+    void
+    compute_edge_projection (const cell_iterator &,
+                             const unsigned int,
+                             const unsigned int,
+                             hp::FEValues<dim> &,
+                             const Function<dim> &,
+                             const unsigned int,
+                             std::vector<double> &,
+                             std::vector<bool> &)
+    {
+      Assert (false, ExcInternalError ());
+    }
+
+    // This function computes the
+    // projection of the boundary
+    // function on the interior of
+    // faces.
+    template<int dim, typename cell_iterator>
+    void
+    compute_face_projection_curl_conforming (const cell_iterator &cell,
+                                             const unsigned int face,
+                                             hp::FEValues<dim> &hp_fe_values,
+                                             const Function<dim> &boundary_function,
+                                             const unsigned int first_vector_component,
+                                             std::vector<double> &dof_values,
+                                             std::vector<bool> &dofs_processed)
+    {
+      const unsigned int spacedim = dim;
+      hp_fe_values.reinit (cell, cell->active_fe_index ()
+                           * GeometryInfo<dim>::faces_per_cell + face);
+      // Initialize the required
+      // objects.
+      const FEValues<dim> &
+      fe_values = hp_fe_values.get_present_fe_values ();
+      const FiniteElement<dim> &fe = cell->get_fe ();
+      const std::vector< DerivativeForm<1,dim,spacedim> > &
+      jacobians = fe_values.get_jacobians ();
+      const std::vector<Point<dim> > &
+      quadrature_points = fe_values.get_quadrature_points ();
+      const unsigned int degree = fe.degree - 1;
+      std::pair<unsigned int, unsigned int> base_indices (0, 0);
+
+      if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) != 0)
+        {
+          unsigned int fe_index = 0;
+          unsigned int fe_index_old = 0;
+          unsigned int i = 0;
+
+          for (; i < fe.n_base_elements (); ++i)
+            {
+              fe_index_old = fe_index;
+              fe_index += fe.element_multiplicity (i) * fe.base_element (i).n_components ();
+
+              if (fe_index > first_vector_component)
+                break;
+            }
+
+          base_indices.first = i;
+          base_indices.second = (first_vector_component - fe_index_old) / fe.base_element (i).n_components ();
+        }
+
+      std::vector<Vector<double> >
+      values (fe_values.n_quadrature_points, Vector<double> (fe.n_components ()));
+
+      // Get boundary function
+      // values at quadrature
+      // points.
+      boundary_function.vector_value_list (quadrature_points, values);
+
+      switch (dim)
+        {
+        case 2:
+        {
+          const double tol = 0.5 * cell->face (face)->diameter () / cell->get_fe ().degree;
+          std::vector<Tensor<1,dim> > tangentials (fe_values.n_quadrature_points);
+
+          const std::vector<Point<dim> > &
+          reference_quadrature_points = fe_values.get_quadrature ().get_points ();
+
+          // coordinate directions
+          // of the face.
+          const unsigned int
+          face_coordinate_direction[GeometryInfo<dim>::faces_per_cell]
+            = { 1, 1, 0, 0 };
+          const FEValuesExtractors::Vector vec (first_vector_component);
+
+          // The interpolation for
+          // the lowest order face
+          // shape functions is just
+          // the mean value of the
+          // tangential  components
+          // of the boundary function
+          // on the edge.
+          for (unsigned int q_point = 0;
+               q_point < fe_values.n_quadrature_points; ++q_point)
+            {
+              // Therefore compute the
+              // tangential of the
+              // face at the quadrature
+              // point.
+              Point<dim> shifted_reference_point_1
+                = reference_quadrature_points[q_point];
+              Point<dim> shifted_reference_point_2
+                = reference_quadrature_points[q_point];
+
+              shifted_reference_point_1 (face_coordinate_direction[face])
+              += tol;
+              shifted_reference_point_2 (face_coordinate_direction[face])
+              -= tol;
+              tangentials[q_point]
+                = (fe_values.get_mapping ()
+                   .transform_unit_to_real_cell (cell,
+                                                 shifted_reference_point_1)
+                   -
+                   fe_values.get_mapping ()
+                   .transform_unit_to_real_cell (cell,
+                                                 shifted_reference_point_2))
+                  / tol;
+              tangentials[q_point] /= tangentials[q_point].norm();
+
+              // Compute the degrees
+              // of freedom.
+              for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+                if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices))
+                    || (dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0))
+                  {
+                    dof_values[i]
+                    += fe_values.JxW (q_point)
+                       * (values[q_point] (first_vector_component)
+                          * tangentials[q_point][0]
+                          + values[q_point] (first_vector_component + 1)
+                          * tangentials[q_point][1])
+                       * (fe_values[vec].value (fe.face_to_cell_index (i, face), q_point)
+                          * tangentials[q_point]);
+
+                    if (q_point == 0)
+                      dofs_processed[i] = true;
+                  }
+            }
+
+          break;
+        }
+
+        case 3:
+        {
+          const FEValuesExtractors::Vector vec (first_vector_component);
+          FullMatrix<double>
+          assembling_matrix (degree * fe.degree,
+                             dim * fe_values.n_quadrature_points);
+          Vector<double> assembling_vector (assembling_matrix.n ());
+          Vector<double> cell_rhs (assembling_matrix.m ());
+          FullMatrix<double> cell_matrix (assembling_matrix.m (),
+                                          assembling_matrix.m ());
+          FullMatrix<double> cell_matrix_inv (assembling_matrix.m (),
+                                              assembling_matrix.m ());
+          Vector<double> solution (cell_matrix.m ());
+
+          // Get coordinate directions
+          // of the face.
+          const unsigned int
+          global_face_coordinate_directions[GeometryInfo<3>::faces_per_cell][2]
+          = { { 1, 2 },
+            { 1, 2 },
+            { 2, 0 },
+            { 2, 0 },
+            { 0, 1 },
+            { 0, 1 }
+          };
+
+          // The projection is divided into two steps.  In the first step we
+          // project the boundary function on the horizontal shape functions.
+          // Then the boundary function is projected on the vertical shape
+          // functions.  We begin with the horizontal shape functions and set
+          // up a linear system of equations to get the values for degrees of
+          // freedom associated with the interior of the face.
+          for (unsigned int q_point = 0;
+               q_point < fe_values.n_quadrature_points; ++q_point)
+            {
+              // The right hand
+              // side of the
+              // corresponding problem
+              // is the residual
+              // of the boundary
+              // function and
+              // the already
+              // interpolated part
+              // on the edges.
+              Tensor<1, dim> tmp;
+
+              for (unsigned int d = 0; d < dim; ++d)
+                tmp[d] = values[q_point] (first_vector_component + d);
+
+              for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+                if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices)
+                     && (fe.base_element (base_indices.first).face_to_cell_index (2 * fe.degree, face)
+                         <= fe.system_to_base_index (fe.face_to_cell_index (i, face)).second)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).second
+                         <= fe.base_element (base_indices.first).face_to_cell_index (4 * fe.degree - 1, face)))
+                    || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0) && (2 * fe.degree <= i)
+                        && (i < 4 * fe.degree)))
+                  tmp -= dof_values[i] * fe_values[vec].value (fe.face_to_cell_index (i, face), q_point);
+
+              const double JxW
+                = std::sqrt (fe_values.JxW (q_point)
+                             / ((jacobians[q_point][0][global_face_coordinate_directions[face][0]]
+                                 * jacobians[q_point][0][global_face_coordinate_directions[face][0]]
+                                 +
+                                 jacobians[q_point][1][global_face_coordinate_directions[face][0]]
+                                 * jacobians[q_point][1][global_face_coordinate_directions[face][0]]
+                                 +
+                                 jacobians[q_point][2][global_face_coordinate_directions[face][0]]
+                                 * jacobians[q_point][2][global_face_coordinate_directions[face][0]])
+                                *
+                                (jacobians[q_point][0][global_face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][0][global_face_coordinate_directions[face][1]]
+                                 +
+                                 jacobians[q_point][1][global_face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][1][global_face_coordinate_directions[face][1]]
+                                 +
+                                 jacobians[q_point][2][global_face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][2][global_face_coordinate_directions[face][1]])));
+
+              // In the weak form
+              // the right hand
+              // side function
+              // is multiplicated
+              // by the horizontal
+              // shape functions
+              // defined in the
+              // interior of
+              // the face.
+              for (unsigned int d = 0; d < dim; ++d)
+                assembling_vector (dim * q_point + d) = JxW * tmp[d];
+
+              unsigned int index = 0;
+
+              for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+                if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices)
+                     && (fe.base_element (base_indices.first).face_to_cell_index
+                         (GeometryInfo<dim>::lines_per_face * fe.degree, face)
+                         <= fe.system_to_base_index (fe.face_to_cell_index (i, face)).second)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).second
+                         < fe.base_element (base_indices.first).face_to_cell_index
+                         ((degree + GeometryInfo<dim>::lines_per_face) * fe.degree, face)))
+                    || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0)
+                        && (GeometryInfo<dim>::lines_per_face * fe.degree <= i)
+                        && (i < (degree + GeometryInfo<dim>::lines_per_face) * fe.degree)))
+                  {
+                    const Tensor<1, dim> shape_value
+                      = (JxW * fe_values[vec].value (fe.face_to_cell_index (i, face),
+                                                     q_point));
+
+                    for (unsigned int d = 0; d < dim; ++d)
+                      assembling_matrix (index, dim * q_point + d) = shape_value[d];
+
+                    ++index;
+                  }
+            }
+
+          // Create the system matrix by multiplying the assembling matrix
+          // with its transposed and the right hand side vector by mutliplying
+          // the assembling matrix with the assembling vector.  Invert the
+          // system matrix.
+          assembling_matrix.mTmult (cell_matrix, assembling_matrix);
+          cell_matrix_inv.invert (cell_matrix);
+          assembling_matrix.vmult (cell_rhs, assembling_vector);
+          cell_matrix_inv.vmult (solution, cell_rhs);
+
+          // Store the computed
+          // values.
+          {
+            unsigned int index = 0;
+
+            for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+              if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                   && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices)
+                   && (fe.base_element (base_indices.first).face_to_cell_index
+                       (GeometryInfo<dim>::lines_per_face * fe.degree, face)
+                       <= fe.system_to_base_index (fe.face_to_cell_index (i, face)).second)
+                   && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).second
+                       < fe.base_element (base_indices.first).face_to_cell_index
+                       ((degree + GeometryInfo<dim>::lines_per_face) * fe.degree, face)))
+                  || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0)
+                      && (GeometryInfo<dim>::lines_per_face * fe.degree <= i)
+                      && (i < (degree + GeometryInfo<dim>::lines_per_face) * fe.degree)))
+                {
+                  dof_values[i] = solution (index);
+                  dofs_processed[i] = true;
+                  ++index;
+                }
+          }
+
+          // Now we do the same as above with the vertical shape functions
+          // instead of the horizontal ones.
+          for (unsigned int q_point = 0;
+               q_point < fe_values.n_quadrature_points; ++q_point)
+            {
+              Tensor<1, dim> tmp;
+
+              for (unsigned int d = 0; d < dim; ++d)
+                tmp[d] = values[q_point] (first_vector_component + d);
+
+              for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+                if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).second
+                         <= fe.base_element (base_indices.first).face_to_cell_index (2 * fe.degree - 1, face))
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).second
+                         >= fe.base_element (base_indices.first).face_to_cell_index (0, face)))
+                    || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0) && (i < 2 * fe.degree)))
+                  tmp -= dof_values[i] * fe_values[vec].value (fe.face_to_cell_index (i, face), q_point);
+
+              const double JxW
+                = std::sqrt (fe_values.JxW (q_point)
+                             / ((jacobians[q_point][0][global_face_coordinate_directions[face][0]]
+                                 * jacobians[q_point][0][global_face_coordinate_directions[face][0]]
+                                 +
+                                 jacobians[q_point][1][global_face_coordinate_directions[face][0]]
+                                 * jacobians[q_point][1][global_face_coordinate_directions[face][0]]
+                                 +
+                                 jacobians[q_point][2][global_face_coordinate_directions[face][0]]
+                                 * jacobians[q_point][2][global_face_coordinate_directions[face][0]])
+                                *
+                                (jacobians[q_point][0][global_face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][0][global_face_coordinate_directions[face][1]]
+                                 +
+                                 jacobians[q_point][1][global_face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][1][global_face_coordinate_directions[face][1]]
+                                 +
+                                 jacobians[q_point][2][global_face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][2][global_face_coordinate_directions[face][1]])));
+
+              for (unsigned int d = 0; d < dim; ++d)
+                assembling_vector (dim * q_point + d) = JxW * tmp[d];
+
+              unsigned int index = 0;
+
+              for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+                if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                     && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices)
+                     && (fe.base_element (base_indices.first).face_to_cell_index
+                         ((degree + GeometryInfo<dim>::lines_per_face) * fe.degree, face)
+                         <= fe.system_to_base_index (fe.face_to_cell_index (i, face)).second))
+                    || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0)
+                        && ((degree + GeometryInfo<dim>::lines_per_face) * fe.degree <= i)))
+                  {
+                    const Tensor<1, dim> shape_value
+                      = JxW * fe_values[vec].value (fe.face_to_cell_index (i, face),
+                                                    q_point);
+
+                    for (unsigned int d = 0; d < dim; ++d)
+                      assembling_matrix (index, dim * q_point + d) = shape_value[d];
+
+                    ++index;
+                  }
+            }
+
+          assembling_matrix.mTmult (cell_matrix, assembling_matrix);
+          cell_matrix_inv.invert (cell_matrix);
+          assembling_matrix.vmult (cell_rhs, assembling_vector);
+          cell_matrix_inv.vmult (solution, cell_rhs);
+
+          unsigned int index = 0;
+
+          for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+            if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                 && (fe.system_to_base_index (fe.face_to_cell_index (i, face)).first == base_indices)
+                 && (fe.base_element (base_indices.first).face_to_cell_index
+                     ((degree + GeometryInfo<dim>::lines_per_face) * fe.degree, face)
+                     <= fe.system_to_base_index (fe.face_to_cell_index (i, face)).second))
+                || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0)
+                    && ((degree + GeometryInfo<dim>::lines_per_face) * fe.degree <= i)))
+              {
+                dof_values[i] = solution (index);
+                dofs_processed[i] = true;
+                ++index;
+              }
+
+          break;
+        }
+
+        default:
+          Assert (false, ExcNotImplemented ());
+        }
+    }
+  }
+
+
+
+
+  template <int dim>
+  void
+
+  project_boundary_values_curl_conforming (const DoFHandler<dim> &dof_handler,
+                                           const unsigned int first_vector_component,
+                                           const Function<dim> &boundary_function,
+                                           const types::boundary_id boundary_component,
+                                           ConstraintMatrix &constraints,
+                                           const Mapping<dim> &mapping)
+  {
+    // Projection-based interpolation is performed in two (in 2D) respectively
+    // three (in 3D) steps. First the tangential component of the function is
+    // interpolated on each edge.  This gives the values for the degrees of
+    // freedom corresponding to the edge shape functions. Now we are done for
+    // 2D, but in 3D we possibly have also degrees of freedom, which are
+    // located in the interior of the faces. Therefore we compute the residual
+    // of the function describing the boundary values and the interpolated
+    // part, which we have computed in the last step. On the faces there are
+    // two kinds of shape functions, the horizontal and the vertical
+    // ones. Thus we have to solve two linear systems of equations of size
+    // <tt>degree * (degree + 1)<tt> to obtain the values for the
+    // corresponding degrees of freedom.
+    const unsigned int superdegree = dof_handler.get_fe ().degree;
+    const QGauss<dim - 1> reference_face_quadrature (2 * superdegree);
+    const unsigned int dofs_per_face = dof_handler.get_fe ().dofs_per_face;
+    hp::FECollection<dim> fe_collection (dof_handler.get_fe ());
+    hp::MappingCollection<dim> mapping_collection (mapping);
+    hp::QCollection<dim> face_quadrature_collection;
+
+    for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+      face_quadrature_collection.push_back
+      (QProjector<dim>::project_to_face (reference_face_quadrature, face));
+
+    hp::FEValues<dim> fe_face_values (mapping_collection, fe_collection,
+                                      face_quadrature_collection,
+                                      update_jacobians |
+                                      update_JxW_values |
+                                      update_quadrature_points |
+                                      update_values);
+
+    std::vector<bool> dofs_processed (dofs_per_face);
+    std::vector<double> dof_values (dofs_per_face);
+    std::vector<types::global_dof_index> face_dof_indices (dofs_per_face);
+    typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active ();
+
+    switch (dim)
+      {
+      case 2:
+      {
+        for (; cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary () && cell->is_locally_owned ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // if the FE is a
+                  // FE_Nothing object
+                  // there is no work to
+                  // do
+                  if (dynamic_cast<const FE_Nothing<dim>*> (&cell->get_fe ()) != 0)
+                    return;
+
+                  // This is only
+                  // implemented, if the
+                  // FE is a Nedelec
+                  // element. If the FE
+                  // is a FESystem, we
+                  // cannot check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+                      AssertThrow (dynamic_cast<const FE_Nedelec<dim>*> (&cell->get_fe ()) != 0,
+
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    {
+                      dof_values[dof] = 0.0;
+                      dofs_processed[dof] = false;
+                    }
+
+                  // Compute the
+                  // projection of the
+                  // boundary function on
+                  // the edge.
+                  internals
+                  ::compute_face_projection_curl_conforming (cell, face, fe_face_values,
+                                                             boundary_function,
+                                                             first_vector_component,
+                                                             dof_values, dofs_processed);
+                  cell->face (face)->get_dof_indices (face_dof_indices,
+                                                      cell->active_fe_index ());
+
+                  // Add the computed
+                  // constraints to the
+                  // constraint matrix,
+                  // if the degree of
+                  // freedom is not
+                  // already constrained.
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    if (dofs_processed[dof] && constraints.can_store_line (face_dof_indices[dof])
+                        && !(constraints.is_constrained (face_dof_indices[dof])))
+                      {
+                        constraints.add_line (face_dof_indices[dof]);
+
+                        if (std::abs (dof_values[dof]) > 1e-13)
+                          constraints.set_inhomogeneity (face_dof_indices[dof], dof_values[dof]);
+                      }
+                }
+
+        break;
+      }
+
+      case 3:
+      {
+        const QGauss<dim - 2> reference_edge_quadrature (2 * superdegree);
+        const unsigned int degree = superdegree - 1;
+        hp::QCollection<dim> edge_quadrature_collection;
+
+        for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+          for (unsigned int line = 0; line < GeometryInfo<dim>::lines_per_face; ++line)
+            edge_quadrature_collection.push_back
+            (QProjector<dim>::project_to_face
+             (QProjector<dim - 1>::project_to_face
+              (reference_edge_quadrature, line), face));
+
+        hp::FEValues<dim> fe_edge_values (mapping_collection, fe_collection,
+                                          edge_quadrature_collection,
+                                          update_jacobians |
+                                          update_JxW_values |
+                                          update_quadrature_points |
+                                          update_values);
+
+        for (; cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary () && cell->is_locally_owned ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // if the FE is a
+                  // FE_Nothing object
+                  // there is no work to
+                  // do
+                  if (dynamic_cast<const FE_Nothing<dim>*> (&cell->get_fe ()) != 0)
+                    return;
+
+                  // This is only
+                  // implemented, if the
+                  // FE is a Nedelec
+                  // element. If the FE is
+                  // a FESystem we cannot
+                  // check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+
+                      AssertThrow (dynamic_cast<const FE_Nedelec<dim>*> (&cell->get_fe ()) != 0,
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    {
+                      dof_values[dof] = 0.0;
+                      dofs_processed[dof] = false;
+                    }
+
+                  // First we compute the
+                  // projection on the
+                  // edges.
+                  for (unsigned int line = 0;
+                       line < GeometryInfo<3>::lines_per_face; ++line)
+                    internals
+                    ::compute_edge_projection (cell, face, line, fe_edge_values,
+                                               boundary_function,
+                                               first_vector_component,
+                                               dof_values, dofs_processed);
+
+                  // If there are higher
+                  // order shape
+                  // functions, there is
+                  // still some work
+                  // left.
+                  if (degree > 0)
+                    internals
+                    ::compute_face_projection_curl_conforming (cell, face, fe_face_values,
+                                                               boundary_function,
+                                                               first_vector_component,
+                                                               dof_values,
+                                                               dofs_processed);
+
+                  // Store the computed
+                  // values in the global
+                  // vector.
+                  cell->face (face)->get_dof_indices (face_dof_indices,
+                                                      cell->active_fe_index ());
+
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    if (dofs_processed[dof] && constraints.can_store_line (face_dof_indices[dof])
+                        && !(constraints.is_constrained (face_dof_indices[dof])))
+                      {
+                        constraints.add_line (face_dof_indices[dof]);
+
+                        if (std::abs (dof_values[dof]) > 1e-13)
+                          constraints.set_inhomogeneity (face_dof_indices[dof], dof_values[dof]);
+                      }
+                }
+
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented ());
+      }
+  }
+
+
+
+  template <int dim>
+  void
+
+  project_boundary_values_curl_conforming (const hp::DoFHandler<dim> &dof_handler,
+                                           const unsigned int first_vector_component,
+                                           const Function<dim> &boundary_function,
+                                           const types::boundary_id boundary_component,
+                                           ConstraintMatrix &constraints,
+                                           const hp::MappingCollection<dim> &mapping_collection)
+  {
+    hp::FECollection<dim> fe_collection (dof_handler.get_fe ());
+    hp::QCollection<dim> face_quadrature_collection;
+
+    for (unsigned int i = 0; i < fe_collection.size (); ++i)
+      {
+        const QGauss<dim - 1>
+        reference_face_quadrature (2 * fe_collection[i].degree);
+
+        for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+          face_quadrature_collection.push_back
+          (QProjector<dim>::project_to_face (reference_face_quadrature, face));
+      }
+
+    hp::FEValues<dim> fe_face_values (mapping_collection, fe_collection,
+                                      face_quadrature_collection,
+                                      update_jacobians |
+                                      update_JxW_values |
+                                      update_quadrature_points |
+                                      update_values);
+    std::vector<bool> dofs_processed;
+    std::vector<double> dof_values;
+    std::vector<types::global_dof_index> face_dof_indices;
+    typename hp::DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active ();
+
+    switch (dim)
+      {
+      case 2:
+      {
+        for (; cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary () && cell->is_locally_owned ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // if the FE is a FE_Nothing object there is no work to do
+                  if (dynamic_cast<const FE_Nothing<dim>*> (&cell->get_fe ()) != 0)
+                    return;
+
+                  // This is only implemented, if the FE is a Nedelec
+                  // element. If the FE is a FESystem we cannot check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+
+                      AssertThrow (dynamic_cast<const FE_Nedelec<dim>*> (&cell->get_fe ()) != 0,
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  const unsigned int dofs_per_face = cell->get_fe ().dofs_per_face;
+
+                  dofs_processed.resize (dofs_per_face);
+                  dof_values.resize (dofs_per_face);
+
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    {
+                      dof_values[dof] = 0.0;
+                      dofs_processed[dof] = false;
+                    }
+
+                  internals
+                  ::compute_face_projection_curl_conforming (cell, face, fe_face_values,
+                                                             boundary_function,
+                                                             first_vector_component,
+                                                             dof_values, dofs_processed);
+                  face_dof_indices.resize (dofs_per_face);
+                  cell->face (face)->get_dof_indices (face_dof_indices,
+                                                      cell->active_fe_index ());
+
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    if (dofs_processed[dof] && constraints.can_store_line (face_dof_indices[dof])
+                        && !(constraints.is_constrained (face_dof_indices[dof])))
+                      {
+                        constraints.add_line (face_dof_indices[dof]);
+
+                        if (std::abs (dof_values[dof]) > 1e-13)
+                          constraints.set_inhomogeneity (face_dof_indices[dof], dof_values[dof]);
+                      }
+                }
+
+        break;
+      }
+
+      case 3:
+      {
+        hp::QCollection<dim> edge_quadrature_collection;
+
+        for (unsigned int i = 0; i < fe_collection.size (); ++i)
+          {
+            const QGauss<dim - 2>
+            reference_edge_quadrature (2 * fe_collection[i].degree);
+
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              for (unsigned int line = 0; line < GeometryInfo<dim>::lines_per_face; ++line)
+                edge_quadrature_collection.push_back
+                (QProjector<dim>::project_to_face
+                 (QProjector<dim - 1>::project_to_face (reference_edge_quadrature, line),
+                  face));
+          }
+
+        hp::FEValues<dim> fe_edge_values (mapping_collection, fe_collection,
+                                          edge_quadrature_collection,
+                                          update_jacobians |
+                                          update_JxW_values |
+                                          update_quadrature_points |
+                                          update_values);
+
+        for (; cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary () && cell->is_locally_owned ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // if the FE is a FE_Nothing object there is no work to do
+                  if (dynamic_cast<const FE_Nothing<dim>*> (&cell->get_fe ()) != 0)
+                    return;
+
+                  // This is only implemented, if the FE is a Nedelec
+                  // element. If the FE is a FESystem we cannot check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+
+                      AssertThrow (dynamic_cast<const FE_Nedelec<dim>*> (&cell->get_fe ()) != 0,
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  const unsigned int superdegree = cell->get_fe ().degree;
+                  const unsigned int degree = superdegree - 1;
+                  const unsigned int dofs_per_face = cell->get_fe ().dofs_per_face;
+
+                  dofs_processed.resize (dofs_per_face);
+                  dof_values.resize (dofs_per_face);
+
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    {
+                      dof_values[dof] = 0.0;
+                      dofs_processed[dof] = false;
+                    }
+
+                  for (unsigned int line = 0;
+                       line < GeometryInfo<dim>::lines_per_face; ++line)
+                    internals
+                    ::compute_edge_projection (cell, face, line, fe_edge_values,
+                                               boundary_function,
+                                               first_vector_component,
+                                               dof_values, dofs_processed);
+
+                  // If there are higher order shape functions, there is still
+                  // some work left.
+                  if (degree > 0)
+                    internals
+                    ::compute_face_projection_curl_conforming (cell, face, fe_face_values,
+                                                               boundary_function,
+                                                               first_vector_component,
+                                                               dof_values, dofs_processed);
+
+
+                  face_dof_indices.resize (dofs_per_face);
+                  cell->face (face)->get_dof_indices (face_dof_indices,
+                                                      cell->active_fe_index ());
+
+                  for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                    if (dofs_processed[dof] && constraints.can_store_line (face_dof_indices[dof])
+                        && !(constraints.is_constrained (face_dof_indices[dof])))
+                      {
+                        constraints.add_line (face_dof_indices[dof]);
+
+                        if (std::abs (dof_values[dof]) > 1e-13)
+                          constraints.set_inhomogeneity (face_dof_indices[dof], dof_values[dof]);
+                      }
+                }
+
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented ());
+      }
+  }
+
+
+  namespace internals
+  {
+    template<typename cell_iterator>
+    void
+    compute_edge_projection_l2 (const cell_iterator &cell,
+                                const unsigned int face,
+                                const unsigned int line,
+                                hp::FEValues<3> &hp_fe_values,
+                                const Function<3> &boundary_function,
+                                const unsigned int first_vector_component,
+                                std::vector<double> &dof_values,
+                                std::vector<bool> &dofs_processed)
+    {
+      // This function computes the L2-projection of the given
+      // boundary function on 3D edges and returns the constraints
+      // associated with the edge functions for the given cell.
+      //
+      // In the context of this function, by associated DoFs we mean:
+      // the DoFs corresponding to the group of components making up the vector
+      // with first component first_vector_component (length dim).
+      const unsigned int dim = 3;
+      const FiniteElement<dim> &fe = cell->get_fe ();
+
+      // reinit for this cell, face and line.
+      hp_fe_values.reinit
+      (cell,
+       (cell->active_fe_index () * GeometryInfo<dim>::faces_per_cell + face)
+       * GeometryInfo<dim>::lines_per_face + line);
+
+      // Initialize the required objects.
+      const FEValues<dim> &
+      fe_values = hp_fe_values.get_present_fe_values ();
+      // Store degree as fe.degree-1
+      // For nedelec elements FE_Nedelec<dim> (0) returns fe.degree = 1.
+      const unsigned int degree = fe.degree - 1;
+
+      const std::vector<Point<dim> > &
+      quadrature_points = fe_values.get_quadrature_points ();
+      std::vector<Vector<double> > values (fe_values.n_quadrature_points,
+                                           Vector<double> (fe.n_components ()));
+
+      // Get boundary function values
+      // at quadrature points.
+      boundary_function.vector_value_list (quadrature_points, values);
+
+      // Find the group of vector components (dim of them,
+      // starting at first_vector_component) are within an FESystem.
+      //
+      // If not using FESystem then must be using FE_Nedelec,
+      // which has one base element and one copy of it (with 3 components).
+      std::pair<unsigned int, unsigned int> base_indices (0, 0);
+      if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) != 0)
+        {
+          unsigned int fe_index = 0;
+          unsigned int fe_index_old = 0;
+          unsigned int i = 0;
+
+          // Find base element:
+          // base_indices.first
+          //
+          // Then select which copy of that base element
+          // [ each copy is of length fe.base_element(base_indices.first).n_components() ]
+          // corresponds to first_vector_component:
+          // base_index.second
+          for (; i < fe.n_base_elements (); ++i)
+            {
+              fe_index_old = fe_index;
+              fe_index += fe.element_multiplicity (i) * fe.base_element (i).n_components ();
+
+              if (fe_index > first_vector_component)
+                break;
+            }
+
+          base_indices.first = i;
+          base_indices.second = (first_vector_component - fe_index_old) / fe.base_element (i).n_components ();
+        }
+
+      // Find DoFs we want to constrain:
+      // There are fe.dofs_per_line DoFs associated with the
+      // given line on the given face on the given cell.
+      //
+      // Want to know which of these DoFs (there are degree+1 of interest)
+      // are associated with the components given by first_vector_component.
+      // Then we can make a map from the associated line DoFs to the face DoFs.
+      //
+      // For a single FE_Nedelec<3> element this is simple:
+      //    We know the ordering of local DoFs goes
+      //    lines -> faces -> cells
+      //
+      // For a set of FESystem<3> elements we need to pick out the matching base element and
+      // the index within this ordering.
+      //
+      // We call the map associated_edge_dof_to_face_dof
+      std::vector<unsigned int> associated_edge_dof_to_face_dof (degree + 1);
+
+      // Lowest DoF in the base element allowed for this edge:
+      const unsigned int lower_bound =
+        fe.base_element(base_indices.first).face_to_cell_index(line * (degree + 1), face);
+      // Highest DoF in the base element allowed for this edge:
+      const unsigned int upper_bound =
+        fe.base_element(base_indices.first).face_to_cell_index((line + 1) * (degree + 1) - 1, face);
+
+      unsigned int associated_edge_dof_index = 0;
+      //       for (unsigned int face_idx = 0; face_idx < fe.dofs_per_face; ++face_idx)
+      for (unsigned int line_idx = 0; line_idx < fe.dofs_per_line; ++line_idx)
+        {
+          // Assuming DoFs on a face are numbered in order by lines then faces.
+          // i.e. line 0 has degree+1 dofs numbered 0,..,degree
+          //      line 1 has degree+1 dofs numbered (degree+1),..,2*(degree+1) and so on.
+          const unsigned int face_idx = line*fe.dofs_per_line + line_idx;
+          // Note, assuming that the edge orientations are "standard"
+          //       i.e. cell->line_orientation(line) = true.
+          const unsigned int cell_idx = fe.face_to_cell_index (face_idx, face);
+
+          // Check this cell_idx belongs to the correct base_element, component and line:
+          if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+               && (fe.system_to_base_index (cell_idx).first == base_indices)
+               && (lower_bound <= fe.system_to_base_index (cell_idx).second)
+               && (fe.system_to_base_index (cell_idx).second <= upper_bound))
+              || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0)
+                  && (line * (degree + 1) <= face_idx)
+                  && (face_idx <= (line + 1) * (degree + 1) - 1)))
+            {
+              associated_edge_dof_to_face_dof[associated_edge_dof_index] = face_idx;
+              ++associated_edge_dof_index;
+            }
+        }
+      // Sanity check:
+      const unsigned int associated_edge_dofs = associated_edge_dof_index;
+      Assert (associated_edge_dofs == degree + 1,
+              ExcMessage ("Error: Unexpected number of 3D edge DoFs"));
+
+      // Matrix and RHS vectors to store linear system:
+      // We have (degree+1) basis functions for an edge
+      FullMatrix<double> edge_matrix (degree + 1,degree + 1);
+      FullMatrix<double> edge_matrix_inv (degree + 1,degree + 1);
+      Vector<double> edge_rhs (degree + 1);
+      Vector<double> edge_solution (degree + 1);
+
+      const FEValuesExtractors::Vector vec (first_vector_component);
+
+      // coordinate directions of
+      // the edges of the face.
+      const unsigned int
+      edge_coordinate_direction
+      [GeometryInfo<dim>::faces_per_cell]
+      [GeometryInfo<dim>::lines_per_face]
+      = { { 2, 2, 1, 1 },
+        { 2, 2, 1, 1 },
+        { 0, 0, 2, 2 },
+        { 0, 0, 2, 2 },
+        { 1, 1, 0, 0 },
+        { 1, 1, 0, 0 }
+      };
+
+      const double tol = 0.5 * cell->face (face)->line (line)->diameter () / fe.degree;
+      const std::vector<Point<dim> > &
+      reference_quadrature_points = fe_values.get_quadrature ().get_points ();
+
+      // Project the boundary function onto the shape functions for this edge
+      // and set up a linear system of equations to get the values for the DoFs
+      // associated with this edge.
+      for (unsigned int q_point = 0; q_point < fe_values.n_quadrature_points; ++q_point)
+        {
+          // Compute the tangential
+          // of the edge at
+          // the quadrature point.
+          Point<dim> shifted_reference_point_1 = reference_quadrature_points[q_point];
+          Point<dim> shifted_reference_point_2 = reference_quadrature_points[q_point];
+
+          shifted_reference_point_1 (edge_coordinate_direction[face][line]) += tol;
+          shifted_reference_point_2 (edge_coordinate_direction[face][line]) -= tol;
+          Tensor<1,dim> tangential
+            = (0.5 *
+               (fe_values.get_mapping ()
+                .transform_unit_to_real_cell (cell,
+                                              shifted_reference_point_1)
+                -
+                fe_values.get_mapping ()
+                .transform_unit_to_real_cell (cell,
+                                              shifted_reference_point_2))
+               / tol);
+          tangential
+          /= tangential.norm ();
+
+          // Compute the entires of the linear system
+          // Note the system is symmetric so we could only compute the lower/upper triangle.
+          //
+          // The matrix entries are
+          // \int_{edge} (tangential*edge_shape_function_i)*(tangential*edge_shape_function_j) dS
+          //
+          // The RHS entries are:
+          // \int_{edge} (tangential*boundary_value)*(tangential*edge_shape_function_i) dS.
+          for (unsigned int j = 0; j < associated_edge_dofs; ++j)
+            {
+              const unsigned int j_face_idx = associated_edge_dof_to_face_dof[j];
+              const unsigned int j_cell_idx = fe.face_to_cell_index (j_face_idx, face);
+              for (unsigned int i = 0; i < associated_edge_dofs; ++i)
+                {
+                  const unsigned int i_face_idx = associated_edge_dof_to_face_dof[i];
+                  const unsigned int i_cell_idx = fe.face_to_cell_index (i_face_idx, face);
+
+                  edge_matrix(i,j)
+                  += fe_values.JxW (q_point)
+                     * (fe_values[vec].value (i_cell_idx, q_point) * tangential)
+                     * (fe_values[vec].value (j_cell_idx, q_point) * tangential);
+                }
+              // Compute the RHS entries:
+              edge_rhs(j) += fe_values.JxW (q_point)
+                             * (values[q_point] (first_vector_component) * tangential [0]
+                                + values[q_point] (first_vector_component + 1) * tangential [1]
+                                + values[q_point] (first_vector_component + 2) * tangential [2])
+                             * (fe_values[vec].value (j_cell_idx, q_point) * tangential);
+            }
+        }
+
+      // Invert linear system
+      edge_matrix_inv.invert(edge_matrix);
+      edge_matrix_inv.vmult(edge_solution,edge_rhs);
+
+      // Store computed DoFs
+      for (unsigned int i = 0; i < associated_edge_dofs; ++i)
+        {
+          dof_values[associated_edge_dof_to_face_dof[i]] = edge_solution(i);
+          dofs_processed[associated_edge_dof_to_face_dof[i]] = true;
+        }
+    }
+
+
+    template<int dim, typename cell_iterator>
+    void
+    compute_edge_projection_l2 (const cell_iterator &,
+                                const unsigned int,
+                                const unsigned int,
+                                hp::FEValues<dim> &,
+                                const Function<dim> &,
+                                const unsigned int,
+                                std::vector<double> &,
+                                std::vector<bool> &)
+    {
+      // dummy implementation of above function
+      // for all other dimensions
+      Assert (false, ExcInternalError ());
+    }
+
+    template<int dim, typename cell_iterator>
+    void
+    compute_face_projection_curl_conforming_l2 (const cell_iterator &cell,
+                                                const unsigned int face,
+                                                hp::FEFaceValues<dim> &hp_fe_face_values,
+                                                const Function<dim> &boundary_function,
+                                                const unsigned int first_vector_component,
+                                                std::vector<double> &dof_values,
+                                                std::vector<bool> &dofs_processed)
+    {
+      // This function computes the L2-projection of the boundary
+      // function on the interior of faces only. In 3D, this should only be
+      // called after first calling compute_edge_projection_l2, as it relies on
+      // edge constraints which are found.
+
+      // In the context of this function, by associated DoFs we mean:
+      // the DoFs corresponding to the group of components making up the vector
+      // with first component first_vector_component (with total components dim).
+
+      // Copy to the standard FEFaceValues object:
+      hp_fe_face_values.reinit (cell, face);
+      const FEFaceValues<dim> &fe_face_values = hp_fe_face_values.get_present_fe_values();
+
+      // Initialize the required objects.
+      const FiniteElement<dim> &fe = cell->get_fe ();
+      const std::vector<Point<dim> > &
+      quadrature_points = fe_face_values.get_quadrature_points ();
+      const unsigned int degree = fe.degree - 1;
+
+      std::vector<Vector<double> >
+      values (fe_face_values.n_quadrature_points, Vector<double> (fe.n_components ()));
+
+      // Get boundary function values at quadrature points.
+      boundary_function.vector_value_list (quadrature_points, values);
+
+      // Find where the group of vector components (dim of them,
+      // starting at first_vector_component) are within an FESystem.
+      //
+      // If not using FESystem then must be using FE_Nedelec,
+      // which has one base element and one copy of it (with 3 components).
+      std::pair<unsigned int, unsigned int> base_indices (0, 0);
+      if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) != 0)
+        {
+          unsigned int fe_index = 0;
+          unsigned int fe_index_old = 0;
+          unsigned int i = 0;
+
+          // Find base element:
+          // base_indices.first
+          //
+          // Then select which copy of that base element
+          // [ each copy is of length fe.base_element(base_indices.first).n_components() ]
+          // corresponds to first_vector_component:
+          // base_index.second
+          for (; i < fe.n_base_elements (); ++i)
+            {
+              fe_index_old = fe_index;
+              fe_index += fe.element_multiplicity (i) * fe.base_element (i).n_components ();
+
+              if (fe_index > first_vector_component)
+                break;
+            }
+          base_indices.first = i;
+          base_indices.second = (first_vector_component - fe_index_old) / fe.base_element (i).n_components ();
+        }
+
+      switch (dim)
+        {
+        case 2:
+          // NOTE: This is very similar to compute_edge_projection as used in 3D,
+          //       and contains a lot of overlap with that function.
+        {
+
+          // Find the DoFs we want to constrain. There are degree+1 in total.
+          // Create a map from these to the face index
+          // Note:
+          //    - for a single FE_Nedelec<2> element this is
+          //      simply 0 to fe.dofs_per_face
+          //    - for FESystem<2> this just requires matching the
+          //      base element, fe.system_to_base_index.first.first
+          //      and the copy of the base element we're interested
+          //      in, fe.system_to_base_index.first.second
+          std::vector<unsigned int> associated_edge_dof_to_face_dof (degree + 1);
+
+          unsigned int associated_edge_dof_index = 0;
+          for (unsigned int face_idx = 0; face_idx < fe.dofs_per_face; ++face_idx)
+            {
+              const unsigned int cell_idx = fe.face_to_cell_index (face_idx, face);
+              if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                   && (fe.system_to_base_index (cell_idx).first == base_indices))
+                  || (dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0))
+                {
+                  associated_edge_dof_to_face_dof[associated_edge_dof_index] = face_idx;
+                  ++associated_edge_dof_index;
+                }
+            }
+          // Sanity check:
+          const unsigned int associated_edge_dofs = associated_edge_dof_index;
+          Assert (associated_edge_dofs == degree + 1,
+                  ExcMessage ("Error: Unexpected number of 2D edge DoFs"));
+
+          // Matrix and RHS vectors to store:
+          // We have (degree+1) edge basis functions
+          FullMatrix<double> edge_matrix (degree + 1,degree + 1);
+          FullMatrix<double> edge_matrix_inv (degree + 1,degree + 1);
+          Vector<double> edge_rhs (degree + 1);
+          Vector<double> edge_solution (degree + 1);
+
+          const FEValuesExtractors::Vector vec (first_vector_component);
+
+          // Project the boundary function onto the shape functions for this edge
+          // and set up a linear system of equations to get the values for the DoFs
+          // associated with this edge.
+          for (unsigned int q_point = 0;
+               q_point < fe_face_values.n_quadrature_points; ++q_point)
+            {
+              // Compute the entires of the linear system
+              // Note the system is symmetric so we could only compute the lower/upper triangle.
+              //
+              // The matrix entries are
+              // \int_{edge} (tangential * edge_shape_function_i) * (tangential * edge_shape_function_j) dS
+              //
+              // The RHS entries are:
+              // \int_{edge} (tangential* boundary_value) * (tangential * edge_shape_function_i) dS.
+              //
+              // In 2D, tangential*vector is equivalent to cross_product_3d(normal, vector), so we use this instead.
+              // This avoids possible issues with the computation of the tangent.
+
+              // Store the normal at this quad point:
+              Tensor<1,dim> normal_at_q_point = fe_face_values.normal_vector(q_point);
+              for (unsigned int j = 0; j < associated_edge_dofs; ++j)
+                {
+                  const unsigned int j_face_idx = associated_edge_dof_to_face_dof[j];
+                  const unsigned int j_cell_idx = fe.face_to_cell_index (j_face_idx, face);
+
+                  Tensor<1,dim> phi_j = fe_face_values[vec].value (j_cell_idx, q_point);
+                  for (unsigned int i = 0; i < associated_edge_dofs; ++i)
+                    {
+                      const unsigned int i_face_idx = associated_edge_dof_to_face_dof[i];
+                      const unsigned int i_cell_idx = fe.face_to_cell_index (i_face_idx, face);
+
+                      Tensor<1,dim> phi_i = fe_face_values[vec].value (i_cell_idx, q_point);
+
+                      // Using n cross phi
+                      edge_matrix(i,j)
+                      += fe_face_values.JxW (q_point)
+                         * ((phi_i[1]*normal_at_q_point[0] - phi_i[0]*normal_at_q_point[1])
+                            * (phi_j[1]*normal_at_q_point[0] - phi_j[0]*normal_at_q_point[1]));
+                    }
+                  // Using n cross phi
+                  edge_rhs(j)
+                  += fe_face_values.JxW (q_point)
+                     * ((values[q_point] (first_vector_component+1) * normal_at_q_point[0]
+                         - values[q_point] (first_vector_component) * normal_at_q_point[1])
+                        * (phi_j[1]*normal_at_q_point[0] - phi_j[0]*normal_at_q_point[1]));
+                }
+            }
+
+          // Invert linear system
+          edge_matrix_inv.invert(edge_matrix);
+          edge_matrix_inv.vmult(edge_solution,edge_rhs);
+
+          // Store computed DoFs
+          for (unsigned int associated_edge_dof_index = 0; associated_edge_dof_index < associated_edge_dofs; ++associated_edge_dof_index)
+            {
+              dof_values[associated_edge_dof_to_face_dof[associated_edge_dof_index]] = edge_solution (associated_edge_dof_index);
+              dofs_processed[associated_edge_dof_to_face_dof[associated_edge_dof_index]] = true;
+            }
+          break;
+        }
+
+        case 3:
+        {
+          const FEValuesExtractors::Vector vec (first_vector_component);
+
+          // First group DoFs associated with edges which we already know.
+          // Sort these into groups of dofs (0 -> degree+1 of them) by each edge.
+          // This will help when computing the residual for the face projections.
+          //
+          // This matches with the search done in compute_edge_projection.
+          const unsigned int lines_per_face = GeometryInfo<dim>::lines_per_face;
+          std::vector<std::vector<unsigned int> >
+          associated_edge_dof_to_face_dof(lines_per_face, std::vector<unsigned int> (degree + 1));
+          std::vector<unsigned int> associated_edge_dofs (lines_per_face);
+
+          for (unsigned int line = 0; line < lines_per_face; ++line)
+            {
+              // Lowest DoF in the base element allowed for this edge:
+              const unsigned int lower_bound =
+                fe.base_element(base_indices.first).face_to_cell_index(line * (degree + 1), face);
+              // Highest DoF in the base element allowed for this edge:
+              const unsigned int upper_bound =
+                fe.base_element(base_indices.first).face_to_cell_index((line + 1) * (degree + 1) - 1, face);
+              unsigned int associated_edge_dof_index = 0;
+              for (unsigned int line_idx = 0; line_idx < fe.dofs_per_line; ++line_idx)
+                {
+                  const unsigned int face_idx = line*fe.dofs_per_line + line_idx;
+                  const unsigned int cell_idx = fe.face_to_cell_index(face_idx, face);
+                  // Check this cell_idx belongs to the correct base_element, component and line:
+                  if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                       && (fe.system_to_base_index (cell_idx).first == base_indices)
+                       && (lower_bound <= fe.system_to_base_index (cell_idx).second)
+                       && (fe.system_to_base_index (cell_idx).second <= upper_bound))
+                      || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0)
+                          && (line * (degree + 1) <= face_idx)
+                          && (face_idx <= (line + 1) * (degree + 1) - 1)))
+                    {
+                      associated_edge_dof_to_face_dof[line][associated_edge_dof_index] = face_idx;
+                      ++associated_edge_dof_index;
+                    }
+                }
+              // Sanity check:
+              associated_edge_dofs[line] = associated_edge_dof_index;
+              Assert (associated_edge_dofs[line] == degree + 1,
+                      ExcMessage ("Error: Unexpected number of 3D edge DoFs"));
+            }
+
+          // Next find the face DoFs associated with the vector components
+          // we're interested in. There are 2*degree*(degree+1) DoFs associated
+          // with each face (not including edges!).
+          //
+          // Create a map mapping from the consecutively numbered associated_dofs
+          // to the face DoF (which can be transferred to a local cell index).
+          //
+          // For FE_Nedelec<3> we just need to have a face numbering greater than
+          // the number of edge DoFs (=lines_per_face*(degree+1).
+          //
+          // For FE_System<3> we need to base the base_indices (base element and
+          // copy within that base element) and ensure we're above the number of
+          // edge DoFs within that base element.
+          std::vector<unsigned int> associated_face_dof_to_face_dof (2*degree*(degree + 1));
+
+          // Skip the edge DoFs, so we start at lines_per_face*(fe.dofs_per_line).
+          unsigned int associated_face_dof_index = 0;
+          for (unsigned int face_idx = lines_per_face*(fe.dofs_per_line); face_idx < fe.dofs_per_face; ++face_idx)
+            {
+              const unsigned int cell_idx = fe.face_to_cell_index (face_idx, face);
+              if (((dynamic_cast<const FESystem<dim>*> (&fe) != 0)
+                   && (fe.system_to_base_index (cell_idx).first == base_indices))
+                  || ((dynamic_cast<const FE_Nedelec<dim>*> (&fe) != 0)))
+                {
+                  associated_face_dof_to_face_dof[associated_face_dof_index] = face_idx;
+                  ++associated_face_dof_index;
+                }
+            }
+          // Sanity check:
+          const unsigned int associated_face_dofs = associated_face_dof_index;
+          Assert (associated_face_dofs == 2*degree*(degree + 1),
+                  ExcMessage ("Error: Unexpected number of 3D face DoFs"));
+
+          // Storage for the linear system.
+          // There are 2*degree*(degree+1) DoFs associated with a face in 3D.
+          // Note this doesn't include the DoFs associated with edges on that face.
+          FullMatrix<double> face_matrix (2*degree*(degree + 1));
+          FullMatrix<double> face_matrix_inv (2*degree*(degree + 1));
+          Vector<double> face_rhs (2*degree*(degree + 1));
+          Vector<double> face_solution (2*degree*(degree + 1));
+
+          // Project the boundary function onto the shape functions for this face
+          // and set up a linear system of equations to get the values for the DoFs
+          // associated with this face. We also must include the residuals from the
+          // shape funcations associated with edges.
+          Tensor<1, dim> tmp;
+          Tensor<1, dim> cross_product_i;
+          Tensor<1, dim> cross_product_j;
+          Tensor<1, dim> cross_product_rhs;
+
+          // Loop to construct face linear system.
+          for (unsigned int q_point = 0;
+               q_point < fe_face_values.n_quadrature_points; ++q_point)
+            {
+              // First calculate the residual from the edge functions
+              // store the result in tmp.
+              //
+              // Edge_residual =
+              //        boundary_value - (
+              //            \sum_(edges on face)
+              //                 \sum_(DoFs on edge) edge_dof_value*edge_shape_function
+              //                   )
+              for (unsigned int d = 0; d < dim; ++d)
+                {
+                  tmp[d]=0.0;
+                }
+              for (unsigned int line = 0; line < lines_per_face; ++line)
+                {
+                  for (unsigned int associated_edge_dof = 0; associated_edge_dof < associated_edge_dofs[line]; ++associated_edge_dof)
+                    {
+                      const unsigned int face_idx = associated_edge_dof_to_face_dof[line][associated_edge_dof];
+                      const unsigned int cell_idx = fe.face_to_cell_index(face_idx, face);
+                      tmp -= dof_values[face_idx]*fe_face_values[vec].value(cell_idx, q_point);
+                    }
+                }
+
+              for (unsigned int d=0; d<dim; ++d)
+                {
+                  tmp[d] += values[q_point] (first_vector_component + d);
+                }
+
+              // Tensor of normal vector on the face at q_point;
+              const Tensor<1,dim> normal_vector = fe_face_values.normal_vector(q_point);
+
+              // Now compute the linear system:
+              // On a face:
+              // The matrix entries are:
+              // \int_{face} (n x face_shape_function_i) \cdot ( n x face_shape_function_j) dS
+              //
+              // The RHS entries are:
+              // \int_{face} (n x (Edge_residual) \cdot (n x face_shape_function_i) dS
+
+              for (unsigned int j = 0; j < associated_face_dofs; ++j)
+                {
+                  const unsigned int j_face_idx = associated_face_dof_to_face_dof[j];
+                  const unsigned int cell_j = fe.face_to_cell_index (j_face_idx, face);
+
+                  cross_product_j =
+                    cross_product_3d(normal_vector,
+                                     fe_face_values[vec].value(cell_j, q_point));
+
+                  for (unsigned int i = 0; i < associated_face_dofs; ++i)
+                    {
+                      const unsigned int i_face_idx = associated_face_dof_to_face_dof[i];
+                      const unsigned int cell_i = fe.face_to_cell_index (i_face_idx, face);
+                      cross_product_i =
+                        cross_product_3d(normal_vector,
+                                         fe_face_values[vec].value(cell_i, q_point));
+
+                      face_matrix(i, j) += fe_face_values.JxW(q_point) *
+                                           cross_product_i * cross_product_j;
+                    }
+                  // compute rhs
+                  cross_product_rhs = cross_product_3d(normal_vector, tmp);
+                  face_rhs(j) += fe_face_values.JxW(q_point) *
+                                 cross_product_rhs * cross_product_j;
+                }
+            }
+
+          // Solve lienar system:
+          face_matrix_inv.invert(face_matrix);
+          face_matrix_inv.vmult(face_solution, face_rhs);
+
+
+          // Store computed DoFs:
+          for (unsigned int associated_face_dof = 0; associated_face_dof < associated_face_dofs; ++associated_face_dof)
+            {
+              dof_values[associated_face_dof_to_face_dof[associated_face_dof]] = face_solution(associated_face_dof);
+              dofs_processed[associated_face_dof_to_face_dof[associated_face_dof]] = true;
+            }
+          break;
+        }
+        default:
+          Assert (false, ExcNotImplemented ());
+        }
+    }
+
+    template <int dim, typename DoFHandlerType>
+    void
+    compute_project_boundary_values_curl_conforming_l2
+    (const DoFHandlerType                  &dof_handler,
+     const unsigned int                     first_vector_component,
+     const Function<dim>                   &boundary_function,
+     const types::boundary_id               boundary_component,
+     ConstraintMatrix                      &constraints,
+     const hp::MappingCollection<dim, dim> &mapping_collection)
+    {
+      // L2-projection based interpolation formed in one (in 2D) or two (in 3D) steps.
+      //
+      // In 2D we only need to constrain edge DoFs.
+      //
+      // In 3D we need to constrain both edge and face DoFs. This is done in two parts.
+      //
+      // For edges, since the face shape functions are zero here ("bubble functions"),
+      // we project the tangential component of the boundary function and compute
+      // the L2-projection. This returns the values for the DoFs associated with each edge shape
+      // function. In 3D, this is computed by internals::compute_edge_projection_l2, in 2D,
+      // it is handled by compute_face_projection_curl_conforming_l2.
+      //
+      // For faces we compute the residual of the boundary function which is satisfied
+      // by the edge shape functions alone. Which can then be used to calculate the
+      // remaining face DoF values via a projection which leads to a linear system to
+      // solve. This is handled by compute_face_projection_curl_conforming_l2
+      //
+      // For details see (for example) section 4.2:
+      // Electromagnetic scattering simulation using an H (curl) conforming hp finite element
+      // method in three dimensions, PD Ledger, K Morgan, O Hassan,
+      // Int. J.  Num. Meth. Fluids, Volume 53, Issue 8, pages 1267–1296, 20 March 2007:
+      // http://onlinelibrary.wiley.com/doi/10.1002/fld.1223/abstract
+
+      // Create hp FEcollection, dof_handler can be either hp or standard type.
+      // From here on we can treat it like a hp-namespace object.
+      const hp::FECollection<dim> fe_collection (dof_handler.get_fe ());
+
+      // Create face quadrature collection
+      hp::QCollection<dim - 1> face_quadrature_collection;
+      for (unsigned int i = 0; i < fe_collection.size (); ++i)
+        {
+          const QGauss<dim - 1>  reference_face_quadrature (2 * fe_collection[i].degree + 1);
+          face_quadrature_collection.push_back(reference_face_quadrature);
+        }
+
+      hp::FEFaceValues<dim> fe_face_values (mapping_collection, fe_collection,
+                                            face_quadrature_collection,
+                                            update_values |
+                                            update_quadrature_points |
+                                            update_normal_vectors |
+                                            update_JxW_values);
+
+      // Storage for dof values found and whether they have been processed:
+      std::vector<bool> dofs_processed;
+      std::vector<double> dof_values;
+      std::vector<types::global_dof_index> face_dof_indices;
+      typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active ();
+
+      switch (dim)
+        {
+        case 2:
+        {
+          for (; cell != dof_handler.end (); ++cell)
+            {
+              if (cell->at_boundary () && cell->is_locally_owned ())
+                {
+                  for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+                    {
+                      if (cell->face (face)->boundary_id () == boundary_component)
+                        {
+                          // If the FE is an FE_Nothing object there is no work to do
+                          if (dynamic_cast<const FE_Nothing<dim>*> (&cell->get_fe ()) != 0)
+                            {
+                              return;
+                            }
+
+                          // This is only implemented for FE_Nedelec elements.
+                          // If the FE is a FESystem we cannot check this.
+                          if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                            {
+                              typedef FiniteElement<dim> FEL;
+                              AssertThrow (dynamic_cast<const FE_Nedelec<dim>*> (&cell->get_fe ()) != 0,
+                                           typename FEL::ExcInterpolationNotImplemented ());
+
+                            }
+
+                          const unsigned int dofs_per_face = cell->get_fe ().dofs_per_face;
+
+                          dofs_processed.resize (dofs_per_face);
+                          dof_values.resize (dofs_per_face);
+
+                          for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                            {
+                              dof_values[dof] = 0.0;
+                              dofs_processed[dof] = false;
+                            }
+
+                          // Compute the projection of the boundary function on the edge.
+                          // In 2D this is all that's required.
+                          compute_face_projection_curl_conforming_l2 (cell, face, fe_face_values,
+                                                                      boundary_function,
+                                                                      first_vector_component,
+                                                                      dof_values, dofs_processed);
+
+                          // store the local->global map:
+                          face_dof_indices.resize(dofs_per_face);
+                          cell->face (face)->get_dof_indices (face_dof_indices,
+                                                              cell->active_fe_index ());
+
+                          // Add the computed constraints to the constraint matrix,
+                          // assuming the degree of freedom is not already constrained.
+                          for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                            {
+                              if (dofs_processed[dof] && constraints.can_store_line (face_dof_indices[dof])
+                                  && !(constraints.is_constrained (face_dof_indices[dof])))
+                                {
+                                  constraints.add_line (face_dof_indices[dof]);
+                                  if (std::abs (dof_values[dof]) > 1e-13)
+                                    {
+                                      constraints.set_inhomogeneity (face_dof_indices[dof], dof_values[dof]);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+          break;
+        }
+
+        case 3:
+        {
+          hp::QCollection<dim> edge_quadrature_collection;
+
+          // Create equivalent of FEEdgeValues:
+          for (unsigned int i = 0; i < fe_collection.size (); ++i)
+            {
+              const QGauss<dim-2> reference_edge_quadrature (2 * fe_collection[i].degree + 1);
+              for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+                {
+                  for (unsigned int line = 0; line < GeometryInfo<dim>::lines_per_face; ++line)
+                    {
+                      edge_quadrature_collection.push_back
+                      (QProjector<dim>::project_to_face
+                       (QProjector<dim - 1>::project_to_face
+                        (reference_edge_quadrature, line), face));
+                    }
+                }
+            }
+
+          hp::FEValues<dim> fe_edge_values (mapping_collection, fe_collection,
+                                            edge_quadrature_collection,
+                                            update_jacobians |
+                                            update_JxW_values |
+                                            update_quadrature_points |
+                                            update_values);
+
+          for (; cell != dof_handler.end (); ++cell)
+            {
+              if (cell->at_boundary () && cell->is_locally_owned ())
+                {
+                  for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+                    {
+                      if (cell->face (face)->boundary_id () == boundary_component)
+                        {
+                          // If the FE is an FE_Nothing object there is no work to do
+                          if (dynamic_cast<const FE_Nothing<dim>*> (&cell->get_fe ()) != 0)
+                            {
+                              return;
+                            }
+
+                          // This is only implemented for FE_Nedelec elements.
+                          // If the FE is a FESystem we cannot check this.
+                          if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                            {
+                              typedef FiniteElement<dim> FEL;
+
+                              AssertThrow (dynamic_cast<const FE_Nedelec<dim>*> (&cell->get_fe ()) != 0,
+                                           typename FEL::ExcInterpolationNotImplemented ());
+                            }
+
+                          const unsigned int superdegree = cell->get_fe ().degree;
+                          const unsigned int degree = superdegree - 1;
+                          const unsigned int dofs_per_face = cell->get_fe ().dofs_per_face;
+
+                          dofs_processed.resize (dofs_per_face);
+                          dof_values.resize (dofs_per_face);
+                          for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                            {
+                              dof_values[dof] = 0.0;
+                              dofs_processed[dof] = false;
+                            }
+
+                          // First compute the projection on the edges.
+                          for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_face; ++line)
+                            {
+                              compute_edge_projection_l2 (cell, face, line, fe_edge_values,
+                                                          boundary_function,
+                                                          first_vector_component,
+                                                          dof_values, dofs_processed);
+                            }
+
+                          // If there are higher order shape functions, then we
+                          // still need to compute the face projection
+                          if (degree > 0)
+                            {
+                              compute_face_projection_curl_conforming_l2 (cell, face, fe_face_values,
+                                                                          boundary_function,
+                                                                          first_vector_component,
+                                                                          dof_values,
+                                                                          dofs_processed);
+                            }
+
+                          // Store the computed values in the global vector.
+                          face_dof_indices.resize(dofs_per_face);
+                          cell->face (face)->get_dof_indices (face_dof_indices,
+                                                              cell->active_fe_index ());
+
+                          for (unsigned int dof = 0; dof < dofs_per_face; ++dof)
+                            {
+                              if (dofs_processed[dof] && constraints.can_store_line (face_dof_indices[dof])
+                                  && !(constraints.is_constrained (face_dof_indices[dof])))
+                                {
+                                  constraints.add_line (face_dof_indices[dof]);
+
+                                  if (std::abs (dof_values[dof]) > 1e-13)
+                                    {
+                                      constraints.set_inhomogeneity (face_dof_indices[dof], dof_values[dof]);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+          break;
+        }
+        default:
+          Assert (false, ExcNotImplemented ());
+        }
+    }
+
+  }
+
+
+  template <int dim>
+  void
+  project_boundary_values_curl_conforming_l2 (const DoFHandler<dim> &dof_handler,
+                                              const unsigned int first_vector_component,
+                                              const Function<dim> &boundary_function,
+                                              const types::boundary_id boundary_component,
+                                              ConstraintMatrix &constraints,
+                                              const Mapping<dim> &mapping)
+  {
+    // non-hp version - calls the internal
+    // compute_project_boundary_values_curl_conforming_l2() function
+    // above after recasting the mapping.
+
+    hp::MappingCollection<dim> mapping_collection (mapping);
+    internals::
+    compute_project_boundary_values_curl_conforming_l2(dof_handler,
+                                                       first_vector_component,
+                                                       boundary_function,
+                                                       boundary_component,
+                                                       constraints,
+                                                       mapping_collection);
+  }
+
+  template <int dim>
+  void
+  project_boundary_values_curl_conforming_l2 (const hp::DoFHandler<dim> &dof_handler,
+                                              const unsigned int first_vector_component,
+                                              const Function<dim> &boundary_function,
+                                              const types::boundary_id boundary_component,
+                                              ConstraintMatrix &constraints,
+                                              const hp::MappingCollection<dim, dim> &mapping_collection)
+  {
+    // hp version - calls the internal
+    // compute_project_boundary_values_curl_conforming_l2() function above.
+    internals::
+    compute_project_boundary_values_curl_conforming_l2(dof_handler,
+                                                       first_vector_component,
+                                                       boundary_function,
+                                                       boundary_component,
+                                                       constraints,
+                                                       mapping_collection);
+  }
+
+
+
+  namespace internals
+  {
+    // This function computes the projection of the boundary function on the
+    // boundary in 2d.
+    template <typename cell_iterator>
+    void
+    compute_face_projection_div_conforming (const cell_iterator &cell,
+                                            const unsigned int face,
+                                            const FEFaceValues<2> &fe_values,
+                                            const unsigned int first_vector_component,
+                                            const Function<2> &boundary_function,
+                                            const std::vector<DerivativeForm<1,2,2> > &jacobians,
+                                            ConstraintMatrix &constraints)
+    {
+      // Compute the intergral over the product of the normal components of
+      // the boundary function times the normal components of the shape
+      // functions supported on the boundary.
+      const FEValuesExtractors::Vector vec (first_vector_component);
+      const FiniteElement<2> &fe = cell->get_fe ();
+      const std::vector<Tensor<1,2> > &normals = fe_values.get_all_normal_vectors ();
+      const unsigned int
+      face_coordinate_direction[GeometryInfo<2>::faces_per_cell] = {1, 1, 0, 0};
+      std::vector<Vector<double> >
+      values (fe_values.n_quadrature_points, Vector<double> (2));
+      Vector<double> dof_values (fe.dofs_per_face);
+
+      // Get the values of the boundary function at the quadrature points.
+      {
+        const std::vector<Point<2> > &
+        quadrature_points = fe_values.get_quadrature_points ();
+
+        boundary_function.vector_value_list (quadrature_points, values);
+      }
+
+      for (unsigned int q_point = 0; q_point < fe_values.n_quadrature_points; ++q_point)
+        {
+          double tmp = 0.0;
+
+          for (unsigned int d = 0; d < 2; ++d)
+            tmp += normals[q_point][d] * values[q_point] (d);
+
+          tmp *= fe_values.JxW (q_point)
+                 * std::sqrt (jacobians[q_point][0][face_coordinate_direction[face]]
+                              * jacobians[q_point][0][face_coordinate_direction[face]]
+                              + jacobians[q_point][1][face_coordinate_direction[face]]
+                              * jacobians[q_point][1][face_coordinate_direction[face]]);
+
+          for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+            dof_values (i) += tmp * (normals[q_point]
+                                     * fe_values[vec].value (fe.face_to_cell_index (i, face), q_point));
+        }
+
+      std::vector<types::global_dof_index> face_dof_indices (fe.dofs_per_face);
+
+      cell->face (face)->get_dof_indices (face_dof_indices, cell->active_fe_index ());
+
+      // Copy the computed values in the ConstraintMatrix only, if the degree
+      // of freedom is not already constrained.
+      for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+        if (!(constraints.is_constrained (face_dof_indices[i])))
+          {
+            constraints.add_line (face_dof_indices[i]);
+
+            if (std::abs (dof_values (i)) > 1e-14)
+              constraints.set_inhomogeneity (face_dof_indices[i], dof_values (i));
+          }
+    }
+
+    // dummy implementation of above function for all other dimensions
+    template<int dim, typename cell_iterator>
+    void
+    compute_face_projection_div_conforming (const cell_iterator &,
+                                            const unsigned int,
+                                            const FEFaceValues<dim> &,
+                                            const unsigned int,
+                                            const Function<dim> &,
+                                            const std::vector<DerivativeForm<1,dim,dim> > &,
+                                            ConstraintMatrix &)
+    {
+      Assert (false, ExcNotImplemented ());
+    }
+
+    // This function computes the projection of the boundary function on the
+    // boundary in 3d.
+    template<typename cell_iterator>
+    void
+    compute_face_projection_div_conforming (const cell_iterator &cell,
+                                            const unsigned int face,
+                                            const FEFaceValues<3> &fe_values,
+                                            const unsigned int first_vector_component,
+                                            const Function<3> &boundary_function,
+                                            const std::vector<DerivativeForm<1,3,3> > &jacobians,
+                                            std::vector<double> &dof_values,
+                                            std::vector<types::global_dof_index> &projected_dofs)
+    {
+      // Compute the intergral over the product of the normal components of
+      // the boundary function times the normal components of the shape
+      // functions supported on the boundary.
+      const FEValuesExtractors::Vector vec (first_vector_component);
+      const FiniteElement<3> &fe = cell->get_fe ();
+      const std::vector<Tensor<1,3> > &normals = fe_values.get_all_normal_vectors ();
+      const unsigned int
+      face_coordinate_directions[GeometryInfo<3>::faces_per_cell][2] = {{1, 2},
+        {1, 2},
+        {2, 0},
+        {2, 0},
+        {0, 1},
+        {0, 1}
+      };
+      std::vector<Vector<double> >
+      values (fe_values.n_quadrature_points, Vector<double> (3));
+      Vector<double> dof_values_local (fe.dofs_per_face);
+
+      {
+        const std::vector<Point<3> > &
+        quadrature_points = fe_values.get_quadrature_points ();
+
+        boundary_function.vector_value_list (quadrature_points, values);
+      }
+
+      for (unsigned int q_point = 0; q_point < fe_values.n_quadrature_points; ++q_point)
+        {
+          double tmp = 0.0;
+
+          for (unsigned int d = 0; d < 3; ++d)
+            tmp += normals[q_point][d] * values[q_point] (d);
+
+          tmp *= fe_values.JxW (q_point)
+                 * std::sqrt ((jacobians[q_point][0][face_coordinate_directions[face][0]]
+                               * jacobians[q_point][0][face_coordinate_directions[face][0]]
+                               + jacobians[q_point][1][face_coordinate_directions[face][0]]
+                               * jacobians[q_point][1][face_coordinate_directions[face][0]]
+                               + jacobians[q_point][2][face_coordinate_directions[face][0]]
+                               * jacobians[q_point][2][face_coordinate_directions[face][0]])
+                              * (jacobians[q_point][0][face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][0][face_coordinate_directions[face][1]]
+                                 + jacobians[q_point][1][face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][1][face_coordinate_directions[face][1]]
+                                 + jacobians[q_point][2][face_coordinate_directions[face][1]]
+                                 * jacobians[q_point][2][face_coordinate_directions[face][1]]));
+
+          for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+            dof_values_local (i) += tmp * (normals[q_point]
+                                           * fe_values[vec].value (fe.face_to_cell_index (i, face), q_point));
+        }
+
+      std::vector<types::global_dof_index> face_dof_indices (fe.dofs_per_face);
+
+      cell->face (face)->get_dof_indices (face_dof_indices, cell->active_fe_index ());
+
+      for (unsigned int i = 0; i < fe.dofs_per_face; ++i)
+        if (projected_dofs[face_dof_indices[i]] < fe.degree)
+          {
+            dof_values[face_dof_indices[i]] = dof_values_local (i);
+            projected_dofs[face_dof_indices[i]] = fe.degree;
+          }
+    }
+
+    // dummy implementation of above
+    // function for all other
+    // dimensions
+    template<int dim, typename cell_iterator>
+    void
+    compute_face_projection_div_conforming (const cell_iterator &,
+                                            const unsigned int,
+                                            const FEFaceValues<dim> &,
+                                            const unsigned int,
+                                            const Function<dim> &,
+                                            const std::vector<DerivativeForm<1,dim,dim> > &,
+                                            std::vector<double> &,
+                                            std::vector<types::global_dof_index> &)
+    {
+      Assert (false, ExcNotImplemented ());
+    }
+  }
+
+
+  template <int dim>
+  void
+  project_boundary_values_div_conforming (const DoFHandler<dim> &dof_handler,
+                                          const unsigned int first_vector_component,
+                                          const Function<dim> &boundary_function,
+                                          const types::boundary_id boundary_component,
+                                          ConstraintMatrix &constraints,
+                                          const Mapping<dim> &mapping)
+  {
+    const unsigned int spacedim = dim;
+    // Interpolate the normal components
+    // of the boundary functions. Since
+    // the Raviart-Thomas elements are
+    // constructed from a Lagrangian
+    // basis, it suffices to compute
+    // the integral over the product
+    // of the normal components of the
+    // boundary function times the
+    // normal components of the shape
+    // functions supported on the
+    // boundary.
+    const FiniteElement<dim> &fe = dof_handler.get_fe ();
+    QGauss<dim - 1> face_quadrature (fe.degree + 1);
+    FEFaceValues<dim> fe_face_values (mapping, fe, face_quadrature, update_JxW_values |
+                                      update_normal_vectors |
+                                      update_quadrature_points |
+                                      update_values);
+    hp::FECollection<dim> fe_collection (fe);
+    hp::MappingCollection<dim> mapping_collection (mapping);
+    hp::QCollection<dim> quadrature_collection;
+
+    for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+      quadrature_collection.push_back (QProjector<dim>::project_to_face (face_quadrature,
+                                       face));
+
+    hp::FEValues<dim> fe_values (mapping_collection, fe_collection, quadrature_collection,
+                                 update_jacobians);
+
+    switch (dim)
+      {
+      case 2:
+      {
+        for (typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active ();
+             cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // if the FE is a
+                  // FE_Nothing object
+                  // there is no work to
+                  // do
+                  if (dynamic_cast<const FE_Nothing<dim>*> (&cell->get_fe ()) != 0)
+                    return;
+
+                  // This is only
+                  // implemented, if the
+                  // FE is a Raviart-Thomas
+                  // element. If the FE is
+                  // a FESystem we cannot
+                  // check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+
+                      AssertThrow (dynamic_cast<const FE_RaviartThomas<dim>*> (&cell->get_fe ()) != 0,
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  fe_values.reinit (cell, face + cell->active_fe_index ()
+                                    * GeometryInfo<dim>::faces_per_cell);
+
+                  const std::vector<DerivativeForm<1,dim,spacedim> > &
+                  jacobians = fe_values.get_present_fe_values ().get_jacobians ();
+
+                  fe_face_values.reinit (cell, face);
+                  internals::compute_face_projection_div_conforming (cell, face,
+                                                                     fe_face_values,
+                                                                     first_vector_component,
+                                                                     boundary_function,
+                                                                     jacobians,
+                                                                     constraints);
+                }
+
+        break;
+      }
+
+      case 3:
+      {
+        // In three dimensions the
+        // edges between two faces
+        // are treated twice.
+        // Therefore we store the
+        // computed values in a
+        // vector and copy them over
+        // in the ConstraintMatrix
+        // after all values have been
+        // computed.
+        // If we have two values for
+        // one edge, we choose the one,
+        // which was computed with the
+        // higher order element.
+        // If both elements are of the
+        // same order, we just keep the
+        // first value and do not
+        // compute a second one.
+        const unsigned int &n_dofs = dof_handler.n_dofs ();
+        std::vector<double> dof_values (n_dofs);
+        std::vector<types::global_dof_index> projected_dofs (n_dofs);
+
+        for (unsigned int dof = 0; dof < n_dofs; ++dof)
+          projected_dofs[dof] = 0;
+
+        for (typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active ();
+             cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // This is only
+                  // implemented, if the
+                  // FE is a Raviart-Thomas
+                  // element. If the FE is
+                  // a FESystem we cannot
+                  // check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+
+                      AssertThrow (dynamic_cast<const FE_RaviartThomas<dim>*> (&cell->get_fe ()) != 0,
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  fe_values.reinit (cell, face + cell->active_fe_index ()
+                                    * GeometryInfo<dim>::faces_per_cell);
+
+                  const std::vector<DerivativeForm<1,dim ,spacedim> > &
+                  jacobians = fe_values.get_present_fe_values ().get_jacobians ();
+
+                  fe_face_values.reinit (cell, face);
+                  internals::compute_face_projection_div_conforming (cell, face,
+                                                                     fe_face_values,
+                                                                     first_vector_component,
+                                                                     boundary_function,
+                                                                     jacobians, dof_values,
+                                                                     projected_dofs);
+                }
+
+        for (unsigned int dof = 0; dof < n_dofs; ++dof)
+          if ((projected_dofs[dof] != 0) && !(constraints.is_constrained (dof)))
+            {
+              constraints.add_line (dof);
+
+              if (std::abs (dof_values[dof]) > 1e-14)
+                constraints.set_inhomogeneity (dof, dof_values[dof]);
+            }
+
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented ());
+      }
+  }
+
+
+  template <int dim>
+  void
+  project_boundary_values_div_conforming (const hp::DoFHandler<dim> &dof_handler,
+                                          const unsigned int first_vector_component,
+                                          const Function<dim> &boundary_function,
+                                          const types::boundary_id boundary_component,
+                                          ConstraintMatrix &constraints,
+                                          const hp::MappingCollection<dim, dim> &mapping_collection)
+  {
+    const unsigned int spacedim = dim;
+    const hp::FECollection<dim> &fe_collection = dof_handler.get_fe ();
+    hp::QCollection<dim - 1> face_quadrature_collection;
+    hp::QCollection<dim> quadrature_collection;
+
+    for (unsigned int i = 0; i < fe_collection.size (); ++i)
+      {
+        const QGauss<dim - 1> quadrature (fe_collection[i].degree + 1);
+
+        face_quadrature_collection.push_back (quadrature);
+
+        for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+          quadrature_collection.push_back (QProjector<dim>::project_to_face (quadrature,
+                                           face));
+      }
+
+    hp::FEFaceValues<dim> fe_face_values (mapping_collection, fe_collection,
+                                          face_quadrature_collection, update_JxW_values |
+                                          update_normal_vectors |
+                                          update_quadrature_points |
+                                          update_values);
+    hp::FEValues<dim> fe_values (mapping_collection, fe_collection, quadrature_collection,
+                                 update_jacobians);
+
+    switch (dim)
+      {
+      case 2:
+      {
+        for (typename hp::DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active ();
+             cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // This is only
+                  // implemented, if the
+                  // FE is a Raviart-Thomas
+                  // element. If the FE is
+                  // a FESystem we cannot
+                  // check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+
+                      AssertThrow (dynamic_cast<const FE_RaviartThomas<dim>*> (&cell->get_fe ()) != 0,
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  fe_values.reinit (cell, face + cell->active_fe_index ()
+                                    * GeometryInfo<dim>::faces_per_cell);
+
+                  const std::vector<DerivativeForm<1,dim,spacedim> > &
+                  jacobians = fe_values.get_present_fe_values ().get_jacobians ();
+
+                  fe_face_values.reinit (cell, face);
+                  internals::compute_face_projection_div_conforming (cell, face,
+                                                                     fe_face_values.get_present_fe_values (),
+                                                                     first_vector_component,
+                                                                     boundary_function,
+                                                                     jacobians,
+                                                                     constraints);
+                }
+
+        break;
+      }
+
+      case 3:
+      {
+        const unsigned int &n_dofs = dof_handler.n_dofs ();
+        std::vector<double> dof_values (n_dofs);
+        std::vector<types::global_dof_index> projected_dofs (n_dofs);
+
+        for (unsigned int dof = 0; dof < n_dofs; ++dof)
+          projected_dofs[dof] = 0;
+
+        for (typename hp::DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active ();
+             cell != dof_handler.end (); ++cell)
+          if (cell->at_boundary ())
+            for (unsigned int face = 0; face < GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face (face)->boundary_id () == boundary_component)
+                {
+                  // This is only
+                  // implemented, if the
+                  // FE is a Raviart-Thomas
+                  // element. If the FE is
+                  // a FESystem we cannot
+                  // check this.
+                  if (dynamic_cast<const FESystem<dim>*> (&cell->get_fe ()) == 0)
+                    {
+                      typedef FiniteElement<dim> FEL;
+
+                      AssertThrow (dynamic_cast<const FE_RaviartThomas<dim>*> (&cell->get_fe ()) != 0,
+                                   typename FEL::ExcInterpolationNotImplemented ());
+                    }
+
+                  fe_values.reinit (cell, face + cell->active_fe_index ()
+                                    * GeometryInfo<dim>::faces_per_cell);
+
+                  const std::vector<DerivativeForm<1,dim,spacedim> > &
+                  jacobians = fe_values.get_present_fe_values ().get_jacobians ();
+
+                  fe_face_values.reinit (cell, face);
+                  internals::compute_face_projection_div_conforming (cell, face,
+                                                                     fe_face_values.get_present_fe_values (),
+                                                                     first_vector_component,
+                                                                     boundary_function,
+                                                                     jacobians, dof_values,
+                                                                     projected_dofs);
+                }
+
+        for (unsigned int dof = 0; dof < n_dofs; ++dof)
+          if ((projected_dofs[dof] != 0) && !(constraints.is_constrained (dof)))
+            {
+              constraints.add_line (dof);
+
+              if (std::abs (dof_values[dof]) > 1e-14)
+                constraints.set_inhomogeneity (dof, dof_values[dof]);
+            }
+
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented ());
+      }
+  }
+
+
+
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_no_normal_flux_constraints (const DoFHandlerType<dim,spacedim> &dof_handler,
+                                      const unsigned int                  first_vector_component,
+                                      const std::set<types::boundary_id> &boundary_ids,
+                                      ConstraintMatrix                   &constraints,
+                                      const Mapping<dim, spacedim>       &mapping)
+  {
+    ZeroFunction<dim>zero_function(dim);
+    typename FunctionMap<spacedim>::type function_map;
+    std::set<types::boundary_id>::const_iterator it
+      = boundary_ids.begin();
+    for (; it != boundary_ids.end(); ++it)
+      function_map[*it] = &zero_function;
+    compute_nonzero_normal_flux_constraints(dof_handler,
+                                            first_vector_component,
+                                            boundary_ids,
+                                            function_map,
+                                            constraints,
+                                            mapping);
+  }
+
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_nonzero_normal_flux_constraints
+  (const DoFHandlerType<dim,spacedim>   &dof_handler,
+   const unsigned int                    first_vector_component,
+   const std::set<types::boundary_id>   &boundary_ids,
+   typename FunctionMap<spacedim>::type &function_map,
+   ConstraintMatrix                     &constraints,
+   const Mapping<dim, spacedim>         &mapping)
+  {
+    Assert (dim > 1,
+            ExcMessage ("This function is not useful in 1d because it amounts "
+                        "to imposing Dirichlet values on the vector-valued "
+                        "quantity."));
+
+    std::vector<types::global_dof_index> face_dofs;
+
+    // create FE and mapping collections for all elements in use by this
+    // DoFHandler
+    hp::FECollection<dim,spacedim>      fe_collection (dof_handler.get_fe());
+    hp::MappingCollection<dim,spacedim> mapping_collection;
+    for (unsigned int i=0; i<fe_collection.size(); ++i)
+      mapping_collection.push_back (mapping);
+
+    // now also create a quadrature collection for the faces of a cell. fill
+    // it with a quadrature formula with the support points on faces for each
+    // FE
+    hp::QCollection<dim-1> face_quadrature_collection;
+    for (unsigned int i=0; i<fe_collection.size(); ++i)
+      {
+        const std::vector<Point<dim-1> > &
+        unit_support_points = fe_collection[i].get_unit_face_support_points();
+
+        Assert (unit_support_points.size() == fe_collection[i].dofs_per_face,
+                ExcInternalError());
+
+        face_quadrature_collection.push_back (Quadrature<dim-1> (unit_support_points));
+      }
+
+    // now create the object with which we will generate the normal vectors
+    hp::FEFaceValues<dim,spacedim> x_fe_face_values (mapping_collection,
+                                                     fe_collection,
+                                                     face_quadrature_collection,
+                                                     update_q_points |
+                                                     update_normal_vectors);
+
+    // have a map that stores normal vectors for each vector-dof tuple we want
+    // to constrain. since we can get at the same vector dof tuple more than
+    // once (for example if it is located at a vertex that we visit from all
+    // adjacent cells), we will want to average later on the normal vectors
+    // computed on different cells as described in the documentation of this
+    // function. however, we can only average if the contributions came from
+    // different cells, whereas we want to constrain twice or more in case the
+    // contributions came from different faces of the same cell
+    // (i.e. constrain not just the *average normal direction* but *all normal
+    // directions* we find). consequently, we also have to store which cell a
+    // normal vector was computed on
+    typedef
+    std::multimap<internal::VectorDoFTuple<dim>,
+        std::pair<Tensor<1,dim>, typename DoFHandlerType<dim,spacedim>::active_cell_iterator> >
+        DoFToNormalsMap;
+    std::map<internal::VectorDoFTuple<dim>, Vector<double> >
+    dof_vector_to_b_values;
+
+    DoFToNormalsMap dof_to_normals_map;
+
+    // now loop over all cells and all faces
+    typename DoFHandlerType<dim,spacedim>::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    std::set<types::boundary_id>::iterator b_id;
+    for (; cell!=endc; ++cell)
+      if (!cell->is_artificial())
+        for (unsigned int face_no=0; face_no < GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          if ((b_id=boundary_ids.find(cell->face(face_no)->boundary_id()))
+              != boundary_ids.end())
+            {
+              const FiniteElement<dim> &fe = cell->get_fe ();
+              typename DoFHandlerType<dim,spacedim>::face_iterator face = cell->face(face_no);
+
+              // get the indices of the dofs on this cell...
+              face_dofs.resize (fe.dofs_per_face);
+              face->get_dof_indices (face_dofs, cell->active_fe_index());
+
+              x_fe_face_values.reinit (cell, face_no);
+              const FEFaceValues<dim> &fe_values = x_fe_face_values.get_present_fe_values();
+
+              // then identify which of them correspond to the selected set of
+              // vector components
+              for (unsigned int i=0; i<face_dofs.size(); ++i)
+                if (fe.face_system_to_component_index(i).first ==
+                    first_vector_component)
+                  {
+                    // find corresponding other components of vector
+                    internal::VectorDoFTuple<dim> vector_dofs;
+                    vector_dofs.dof_indices[0] = face_dofs[i];
+
+                    Assert(first_vector_component+dim<=fe.n_components(),
+                           ExcMessage("Error: the finite element does not have enough components "
+                                      "to define a normal direction."));
+
+                    for (unsigned int k=0; k<fe.dofs_per_face; ++k)
+                      if ((k != i)
+                          &&
+                          (face_quadrature_collection[cell->active_fe_index()].point(k) ==
+                           face_quadrature_collection[cell->active_fe_index()].point(i))
+                          &&
+                          (fe.face_system_to_component_index(k).first >=
+                           first_vector_component)
+                          &&
+                          (fe.face_system_to_component_index(k).first <
+                           first_vector_component + dim))
+                        vector_dofs.dof_indices[fe.face_system_to_component_index(k).first -
+                                                first_vector_component]
+                          = face_dofs[k];
+
+                    for (unsigned int d=0; d<dim; ++d)
+                      Assert (vector_dofs.dof_indices[d] < dof_handler.n_dofs(),
+                              ExcInternalError());
+
+                    // we need the normal vector on this face. we know that it
+                    // is a vector of length 1 but at least with higher order
+                    // mappings it isn't always possible to guarantee that
+                    // each component is exact up to zero tolerance. in
+                    // particular, as shown in the deal.II/no_flux_06 test, if
+                    // we just take the normal vector as given by the
+                    // fe_values object, we can get entries in the normal
+                    // vectors of the unit cube that have entries up to
+                    // several times 1e-14.
+                    //
+                    // the problem with this is that this later yields
+                    // constraints that are circular (e.g., in the testcase,
+                    // we get constraints of the form
+                    //
+                    // x22 =  2.93099e-14*x21 + 2.93099e-14*x23
+                    // x21 = -2.93099e-14*x22 + 2.93099e-14*x21
+                    //
+                    // in both of these constraints, the small numbers should
+                    // be zero and the constraints should simply be
+                    // x22 = x21 = 0
+                    //
+                    // to achieve this, we utilize that we know that the
+                    // normal vector has (or should have) length 1 and that we
+                    // can simply set small elements to zero (without having
+                    // to check that they are small *relative to something
+                    // else*). we do this and then normalize the length of the
+                    // vector back to one, just to be on the safe side
+                    //
+                    // one more point: we would like to use the "real" normal
+                    // vector here, as provided by the boundary description
+                    // and as opposed to what we get from the FEValues object.
+                    // we do this in the immediately next line, but as is
+                    // obvious, the boundary only has a vague idea which side
+                    // of a cell it is on -- indicated by the face number. in
+                    // other words, it may provide the inner or outer normal.
+                    // by and large, there is no harm from this, since the
+                    // tangential vector we compute is still the same. however,
+                    // we do average over normal vectors from adjacent cells
+                    // and if they have recorded normal vectors from the inside
+                    // once and from the outside the other time, then this
+                    // averaging is going to run into trouble. as a consequence
+                    // we ask the mapping after all for its normal vector,
+                    // but we only ask it so that we can possibly correct the
+                    // sign of the normal vector provided by the boundary
+                    // if they should point in different directions. this is the
+                    // case in tests/deal.II/no_flux_11.
+                    Tensor<1,dim> normal_vector
+                      = (cell->face(face_no)->get_boundary().normal_vector
+                         (cell->face(face_no),
+                          fe_values.quadrature_point(i)));
+                    if (normal_vector * fe_values.normal_vector(i) < 0)
+                      normal_vector *= -1;
+                    Assert (std::fabs(normal_vector.norm() - 1) < 1e-14,
+                            ExcInternalError());
+                    for (unsigned int d=0; d<dim; ++d)
+                      if (std::fabs(normal_vector[d]) < 1e-13)
+                        normal_vector[d] = 0;
+                    normal_vector /= normal_vector.norm();
+
+                    const Point<dim> point
+                      = fe_values.quadrature_point(i);
+                    Vector<double> b_values(dim);
+                    function_map[*b_id]->vector_value(point, b_values);
+
+                    // now enter the (dofs,(normal_vector,cell)) entry into
+                    // the map
+                    dof_to_normals_map.insert
+                    (std::make_pair (vector_dofs,
+                                     std::make_pair (normal_vector,cell)));
+                    dof_vector_to_b_values.insert
+                    (std::make_pair(vector_dofs, b_values));
+
+#ifdef DEBUG_NO_NORMAL_FLUX
+                    std::cout << "Adding normal vector:" << std::endl
+                              << "   dofs=" << vector_dofs << std::endl
+                              << "   cell=" << cell << " at " << cell->center() << std::endl
+                              << "   normal=" << normal_vector << std::endl;
+#endif
+                  }
+            }
+
+    // Now do something with the collected information. To this end, loop
+    // through all sets of pairs (dofs,normal_vector) and identify which
+    // entries belong to the same set of dofs and then do as described in the
+    // documentation, i.e. either average the normal vector or don't for this
+    // particular set of dofs
+    typename DoFToNormalsMap::const_iterator
+    p = dof_to_normals_map.begin();
+
+    while (p != dof_to_normals_map.end())
+      {
+        // first find the range of entries in the multimap that corresponds to
+        // the same vector-dof tuple. as usual, we define the range
+        // half-open. the first entry of course is 'p'
+        typename DoFToNormalsMap::const_iterator same_dof_range[2] = { p };
+        for (++p; p != dof_to_normals_map.end(); ++p)
+          if (p->first != same_dof_range[0]->first)
+            {
+              same_dof_range[1] = p;
+              break;
+            }
+        if (p == dof_to_normals_map.end())
+          same_dof_range[1] = dof_to_normals_map.end();
+
+#ifdef DEBUG_NO_NORMAL_FLUX
+        std::cout << "For dof indices <" << p->first << ">, found the following normals"
+                  << std::endl;
+        for (typename DoFToNormalsMap::const_iterator
+             q = same_dof_range[0];
+             q != same_dof_range[1]; ++q)
+          std::cout << "   " << q->second.first
+                    << " from cell " << q->second.second
+                    << std::endl;
+#endif
+
+
+        // now compute the reverse mapping: for each of the cells that
+        // contributed to the current set of vector dofs, add up the normal
+        // vectors. the values of the map are pairs of normal vectors and
+        // number of cells that have contributed
+        typedef std::map<typename DoFHandlerType<dim,spacedim>::active_cell_iterator,
+                std::pair<Tensor<1,dim>, unsigned int> >
+                CellToNormalsMap;
+
+        CellToNormalsMap cell_to_normals_map;
+        for (typename DoFToNormalsMap::const_iterator
+             q = same_dof_range[0];
+             q != same_dof_range[1]; ++q)
+          if (cell_to_normals_map.find (q->second.second)
+              == cell_to_normals_map.end())
+            cell_to_normals_map[q->second.second]
+              = std::make_pair (q->second.first, 1U);
+          else
+            {
+              const Tensor<1,dim> old_normal
+                = cell_to_normals_map[q->second.second].first;
+              const unsigned int old_count
+                = cell_to_normals_map[q->second.second].second;
+
+              Assert (old_count > 0, ExcInternalError());
+
+              // in the same entry, store again the now averaged normal vector
+              // and the new count
+              cell_to_normals_map[q->second.second]
+                = std::make_pair ((old_normal * old_count + q->second.first) / (old_count + 1),
+                                  old_count + 1);
+            }
+        Assert (cell_to_normals_map.size() >= 1, ExcInternalError());
+
+#ifdef DEBUG_NO_NORMAL_FLUX
+        std::cout << "   cell_to_normals_map:" << std::endl;
+        for (typename CellToNormalsMap::const_iterator
+             x = cell_to_normals_map.begin();
+             x != cell_to_normals_map.end(); ++x)
+          std::cout << "      " << x->first << " -> ("
+                    << x->second.first << ',' << x->second.second << ')'
+                    << std::endl;
+#endif
+
+        // count the maximum number of contributions from each cell
+        unsigned int max_n_contributions_per_cell = 1;
+        for (typename CellToNormalsMap::const_iterator
+             x = cell_to_normals_map.begin();
+             x != cell_to_normals_map.end(); ++x)
+          max_n_contributions_per_cell
+            = std::max (max_n_contributions_per_cell,
+                        x->second.second);
+
+        // verify that each cell can have only contributed at most dim times,
+        // since that is the maximum number of faces that come together at a
+        // single place
+        Assert (max_n_contributions_per_cell <= dim, ExcInternalError());
+
+        switch (max_n_contributions_per_cell)
+          {
+          // first deal with the case that a number of cells all have
+          // registered that they have a normal vector defined at the
+          // location of a given vector dof, and that each of them have
+          // encountered this vector dof exactly once while looping over all
+          // their faces. as stated in the documentation, this is the case
+          // where we want to simply average over all normal vectors
+          //
+          // the typical case is in 2d where multiple cells meet at one
+          // vertex sitting on the boundary. same in 3d for a vertex that
+          // is associated with only one of the boundary indicators passed
+          // to this function
+          case 1:
+          {
+            // compute the average normal vector from all the ones that have
+            // the same set of dofs. we could add them up and divide them by
+            // the number of additions, or simply normalize them right away
+            // since we want them to have unit length anyway
+            Tensor<1,dim> normal;
+            for (typename CellToNormalsMap::const_iterator
+                 x = cell_to_normals_map.begin();
+                 x != cell_to_normals_map.end(); ++x)
+              normal += x->second.first;
+            normal /= normal.norm();
+
+            // normalize again
+            for (unsigned int d=0; d<dim; ++d)
+              if (std::fabs(normal[d]) < 1e-13)
+                normal[d] = 0;
+            normal /= normal.norm();
+
+            // then construct constraints from this:
+            const internal::VectorDoFTuple<dim> &
+            dof_indices = same_dof_range[0]->first;
+            double normal_value = 0.;
+            const Vector<double> b_values = dof_vector_to_b_values[dof_indices];
+            for (unsigned int i=0; i<dim; ++i)
+              normal_value += b_values[i]*normal[i];
+            internal::add_constraint (dof_indices, normal,
+                                      constraints, normal_value);
+
+            break;
+          }
+
+          // this is the slightly more complicated case that a single cell has
+          // contributed with exactly DIM normal vectors to the same set of
+          // vector dofs. this is what happens in a corner in 2d and 3d (but
+          // not on an edge in 3d, where we have only 2, i.e. <DIM,
+          // contributions. Here we do not want to average the normal
+          // vectors. Since we have DIM contributions, let's assume (and
+          // verify) that they are in fact all linearly independent; in that
+          // case, all vector components are constrained and we need to set all
+          // of them to the corresponding boundary values
+          case dim:
+          {
+            // assert that indeed only a single cell has contributed
+            Assert (cell_to_normals_map.size() == 1,
+                    ExcInternalError());
+
+            // check linear independence by computing the determinant of the
+            // matrix created from all the normal vectors. if they are
+            // linearly independent, then the determinant is nonzero. if they
+            // are orthogonal, then the matrix is in fact equal to 1 (since
+            // they are all unit vectors); make sure the determinant is larger
+            // than 1e-3 to avoid cases where cells are degenerate
+            {
+              Tensor<2,dim> t;
+
+              typename DoFToNormalsMap::const_iterator x = same_dof_range[0];
+              for (unsigned int i=0; i<dim; ++i, ++x)
+                for (unsigned int j=0; j<dim; ++j)
+                  t[i][j] = x->second.first[j];
+
+              Assert (std::fabs(determinant (t)) > 1e-3,
+                      ExcMessage("Found a set of normal vectors that are nearly collinear."));
+            }
+
+            // so all components of this vector dof are constrained. enter
+            // this into the constraint matrix
+            //
+            // ignore dofs already constrained
+            const internal::VectorDoFTuple<dim> &
+            dof_indices = same_dof_range[0]->first;
+            const Vector<double> b_values = dof_vector_to_b_values[dof_indices];
+            for (unsigned int i=0; i<dim; ++i)
+              if (!constraints.is_constrained(same_dof_range[0]->first.dof_indices[i])
+                  &&
+                  constraints.can_store_line(same_dof_range[0]->first.dof_indices[i]))
+                {
+                  const types::global_dof_index line
+                    = dof_indices.dof_indices[i];
+                  constraints.add_line (line);
+                  if (std::fabs(b_values[i])
+                      > std::numeric_limits<double>::epsilon())
+                    constraints.set_inhomogeneity(line, b_values[i]);
+                  // no add_entries here
+                }
+
+            break;
+          }
+
+          // this is the case of an edge contribution in 3d, i.e. the vector
+          // is constrained in two directions but not the third.
+          default:
+          {
+            Assert (dim >= 3, ExcNotImplemented());
+            Assert (max_n_contributions_per_cell == 2, ExcInternalError());
+
+            // as described in the documentation, let us first collect what
+            // each of the cells contributed at the current point. we use a
+            // std::list instead of a std::set (which would be more natural)
+            // because std::set requires that the stored elements are
+            // comparable with operator<
+            typedef std::map<typename DoFHandlerType<dim,spacedim>::active_cell_iterator,
+                    std::list<Tensor<1,dim> > >
+                    CellContributions;
+            CellContributions cell_contributions;
+
+            for (typename DoFToNormalsMap::const_iterator
+                 q = same_dof_range[0];
+                 q != same_dof_range[1]; ++q)
+              cell_contributions[q->second.second].push_back (q->second.first);
+            Assert (cell_contributions.size() >= 1, ExcInternalError());
+
+            // now for each cell that has contributed determine the number of
+            // normal vectors it has contributed. we currently only implement
+            // if this is dim-1 for all cells (if a single cell has
+            // contributed dim, or if all adjacent cells have contributed 1
+            // normal vector, this is already handled above).
+            //
+            // we only implement the case that all cells contribute
+            // dim-1 because we assume that we are following an edge
+            // of the domain (think: we are looking at a vertex
+            // located on one of the edges of a refined cube where the
+            // boundary indicators of the two adjacent faces of the
+            // cube are both listed in the set of boundary indicators
+            // passed to this function). in that case, all cells along
+            // that edge of the domain are assumed to have contributed
+            // dim-1 normal vectors. however, there are cases where
+            // this assumption is not justified (see the lengthy
+            // explanation in test no_flux_12.cc) and in those cases
+            // we simply ignore the cell that contributes only
+            // once. this is also discussed at length in the
+            // documentation of this function.
+            //
+            // for each contributing cell compute the tangential vector that
+            // remains unconstrained
+            std::list<Tensor<1,dim> > tangential_vectors;
+            for (typename CellContributions::const_iterator
+                 contribution = cell_contributions.begin();
+                 contribution != cell_contributions.end();
+                 ++contribution)
+              {
+#ifdef DEBUG_NO_NORMAL_FLUX
+                std::cout << "   Treating edge case with dim-1 contributions." << std::endl
+                          << "   Looking at cell " << contribution->first
+                          << " which has contributed these normal vectors:"
+                          << std::endl;
+                for (typename std::list<Tensor<1,dim> >::const_iterator
+                     t = contribution->second.begin();
+                     t != contribution->second.end();
+                     ++t)
+                  std::cout << "      " << *t << std::endl;
+#endif
+
+                // as mentioned above, simply ignore cells that only
+                // contribute once
+                if (contribution->second.size() < dim-1)
+                  continue;
+
+                Tensor<1,dim> normals[dim-1];
+                {
+                  unsigned int index=0;
+                  for (typename std::list<Tensor<1,dim> >::const_iterator
+                       t = contribution->second.begin();
+                       t != contribution->second.end();
+                       ++t, ++index)
+                    normals[index] = *t;
+                  Assert (index == dim-1, ExcInternalError());
+                }
+
+                // calculate the tangent as the outer product of the normal
+                // vectors. since these vectors do not need to be orthogonal
+                // (think, for example, the case of the deal.II/no_flux_07
+                // test: a sheared cube in 3d, with Q2 elements, where we have
+                // constraints from the two normal vectors of two faces of the
+                // sheared cube that are not perpendicular to each other), we
+                // have to normalize the outer product
+                Tensor<1,dim> tangent;
+                switch (dim)
+                  {
+                  case 3:
+                    // take cross product between normals[0] and
+                    // normals[1]. write it in the current form (with [dim-2])
+                    // to make sure that compilers don't warn about
+                    // out-of-bounds accesses -- the warnings are bogus since
+                    // we get here only for dim==3, but at least one isn't
+                    // quite smart enough to notice this and warns when
+                    // compiling the function in 2d
+                    tangent = cross_product_3d (normals[0], normals[dim-2]);
+                    break;
+                  default:
+                    Assert (false, ExcNotImplemented());
+                  }
+
+                Assert (std::fabs (tangent.norm()) > 1e-12,
+                        ExcMessage("Two normal vectors from adjacent faces are almost "
+                                   "parallel."));
+                tangent /= tangent.norm();
+
+                tangential_vectors.push_back (tangent);
+              }
+
+            // go through the list of tangents and make sure that they all
+            // roughly point in the same direction as the first one (i.e. have
+            // an angle less than 90 degrees); if they don't then flip their
+            // sign
+            {
+              const Tensor<1,dim> first_tangent = tangential_vectors.front();
+              typename std::list<Tensor<1,dim> >::iterator
+              t = tangential_vectors.begin();
+              ++t;
+              for (; t != tangential_vectors.end(); ++t)
+                if (*t * first_tangent < 0)
+                  *t *= -1;
+            }
+
+            // now compute the average tangent and normalize it
+            Tensor<1,dim> average_tangent;
+            for (typename std::list<Tensor<1,dim> >::const_iterator
+                 t = tangential_vectors.begin();
+                 t != tangential_vectors.end();
+                 ++t)
+              average_tangent += *t;
+            average_tangent /= average_tangent.norm();
+
+            // now all that is left is that we add the constraints that the
+            // vector is parallel to the tangent
+            const internal::VectorDoFTuple<dim> &
+            dof_indices = same_dof_range[0]->first;
+            const Vector<double> b_values = dof_vector_to_b_values[dof_indices];
+            internal::add_tangentiality_constraints (dof_indices,
+                                                     average_tangent,
+                                                     constraints,
+                                                     b_values);
+          }
+          }
+      }
+  }
+
+
+
+  namespace
+  {
+    template <int dim>
+    struct PointComparator
+    {
+      bool operator ()(const std_cxx11::array<types::global_dof_index,dim> &p1,
+                       const std_cxx11::array<types::global_dof_index,dim> &p2)
+      {
+        for (unsigned int d=0; d<dim; ++d)
+          if (p1[d] < p2[d])
+            return true;
+        return false;
+      }
+    };
+  }
+
+
+
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_normal_flux_constraints (const DoFHandlerType<dim,spacedim> &dof_handler,
+                                   const unsigned int                  first_vector_component,
+                                   const std::set<types::boundary_id> &boundary_ids,
+                                   ConstraintMatrix                   &constraints,
+                                   const Mapping<dim, spacedim>       &mapping)
+  {
+    ZeroFunction<dim>zero_function(dim);
+    typename FunctionMap<spacedim>::type function_map;
+    std::set<types::boundary_id>::const_iterator it
+      = boundary_ids.begin();
+    for (; it != boundary_ids.end(); ++it)
+      function_map[*it] = &zero_function;
+    compute_nonzero_tangential_flux_constraints(dof_handler,
+                                                first_vector_component,
+                                                boundary_ids,
+                                                function_map,
+                                                constraints,
+                                                mapping);
+  }
+
+  template <int dim, template <int, int> class DoFHandlerType, int spacedim>
+  void
+  compute_nonzero_tangential_flux_constraints
+  (const DoFHandlerType<dim,spacedim>   &dof_handler,
+   const unsigned int                    first_vector_component,
+   const std::set<types::boundary_id>   &boundary_ids,
+   typename FunctionMap<spacedim>::type &function_map,
+   ConstraintMatrix                     &constraints,
+   const Mapping<dim, spacedim>         &mapping)
+  {
+    ConstraintMatrix no_normal_flux_constraints(constraints.get_local_lines());
+    compute_nonzero_normal_flux_constraints (dof_handler,
+                                             first_vector_component,
+                                             boundary_ids,
+                                             function_map,
+                                             no_normal_flux_constraints,
+                                             mapping);
+
+    hp::FECollection<dim,spacedim>      fe_collection (dof_handler.get_fe());
+    hp::MappingCollection<dim,spacedim> mapping_collection;
+    for (unsigned int i=0; i<fe_collection.size(); ++i)
+      mapping_collection.push_back (mapping);
+
+    // now also create a quadrature collection for the faces of a cell. fill
+    // it with a quadrature formula with the support points on faces for each
+    // FE
+    hp::QCollection<dim-1> face_quadrature_collection;
+    for (unsigned int i=0; i<fe_collection.size(); ++i)
+      {
+        const std::vector<Point<dim-1> > &
+        unit_support_points = fe_collection[i].get_unit_face_support_points();
+
+        Assert (unit_support_points.size() == fe_collection[i].dofs_per_face,
+                ExcInternalError());
+
+        face_quadrature_collection.push_back (Quadrature<dim-1> (unit_support_points));
+      }
+
+    // now create the object with which we will generate the normal vectors
+    hp::FEFaceValues<dim,spacedim> x_fe_face_values (mapping_collection,
+                                                     fe_collection,
+                                                     face_quadrature_collection,
+                                                     update_q_points |
+                                                     update_normal_vectors);
+
+    // Extract a list that collects all vector components that belong to the
+    // same node (scalar basis function). When creating that list, we use an
+    // array of dim components that stores the global degree of freedom.
+    std::set<std_cxx11::array<types::global_dof_index,dim>, PointComparator<dim> > vector_dofs;
+    std::vector<types::global_dof_index> face_dofs;
+
+    std::map<std_cxx11::array<types::global_dof_index,dim>, Vector<double> >
+    dof_vector_to_b_values;
+
+    std::set<types::boundary_id>::iterator b_id;
+    std::vector<std_cxx11::array<types::global_dof_index,dim> > cell_vector_dofs;
+    for (typename DoFHandlerType<dim,spacedim>::active_cell_iterator cell =
+           dof_handler.begin_active(); cell != dof_handler.end(); ++cell)
+      if (!cell->is_artificial())
+        for (unsigned int face_no=0; face_no < GeometryInfo<dim>::faces_per_cell;
+             ++face_no)
+          if ((b_id=boundary_ids.find(cell->face(face_no)->boundary_id()))
+              != boundary_ids.end())
+            {
+              const FiniteElement<dim> &fe = cell->get_fe();
+              typename DoFHandlerType<dim,spacedim>::face_iterator face=cell->face(face_no);
+
+              // get the indices of the dofs on this cell...
+              face_dofs.resize (fe.dofs_per_face);
+              face->get_dof_indices (face_dofs, cell->active_fe_index());
+
+              x_fe_face_values.reinit (cell, face_no);
+              const FEFaceValues<dim> &fe_values = x_fe_face_values.get_present_fe_values();
+
+              std::map<types::global_dof_index, double> dof_to_b_value;
+
+              unsigned int n_scalar_indices = 0;
+              cell_vector_dofs.resize(fe.dofs_per_face);
+              for (unsigned int i=0; i<fe.dofs_per_face; ++i)
+                {
+                  if (fe.face_system_to_component_index(i).first >= first_vector_component &&
+                      fe.face_system_to_component_index(i).first < first_vector_component + dim)
+                    {
+                      const unsigned int component
+                        = fe.face_system_to_component_index(i).first
+                          -first_vector_component;
+                      n_scalar_indices =
+                        std::max(n_scalar_indices,
+                                 fe.face_system_to_component_index(i).second+1);
+                      cell_vector_dofs[fe.face_system_to_component_index(i).second]
+                      [component]
+                        = face_dofs[i];
+
+                      const Point<dim> point
+                        = fe_values.quadrature_point(i);
+                      const double b_value
+                        = function_map[*b_id]->value(point, component);
+                      dof_to_b_value.insert
+                      (std::make_pair(face_dofs[i], b_value));
+                    }
+                }
+
+              // now we identified the vector indices on the cell, so next
+              // insert them into the set (it would be expensive to directly
+              // insert incomplete points into the set)
+              for (unsigned int i=0; i<n_scalar_indices; ++i)
+                {
+                  vector_dofs.insert(cell_vector_dofs[i]);
+                  Vector<double> b_values(dim);
+                  for (unsigned int j=0; j<dim; ++j)
+                    b_values[j]=dof_to_b_value[cell_vector_dofs[i][j]];
+                  dof_vector_to_b_values.insert
+                  (std::make_pair(cell_vector_dofs[i], b_values));
+                }
+
+            }
+
+    // iterate over the list of all vector components we found and see if we
+    // can find constrained ones
+    unsigned int n_total_constraints_found = 0;
+    for (typename std::set<std_cxx11::array<types::global_dof_index,dim>,
+         PointComparator<dim> >::const_iterator it=vector_dofs.begin();
+         it!=vector_dofs.end(); ++it)
+      {
+        unsigned int n_constraints = 0;
+        bool is_constrained[dim];
+        for (unsigned int d=0; d<dim; ++d)
+          if (no_normal_flux_constraints.is_constrained((*it)[d]))
+            {
+              is_constrained[d] = true;
+              ++n_constraints;
+              ++n_total_constraints_found;
+            }
+          else
+            is_constrained[d] = false;
+        if (n_constraints > 0)
+          {
+            // if more than one no-flux constraint is present, we need to
+            // constrain all vector degrees of freedom (we are in a corner
+            // where several faces meet and to get a continuous FE solution we
+            // need to set all conditions corresponding to the boundary function.).
+            if (n_constraints > 1)
+              {
+                const Vector<double> b_value = dof_vector_to_b_values[*it];
+                for (unsigned int d=0; d<dim; ++d)
+                  {
+                    constraints.add_line((*it)[d]);
+                    constraints.set_inhomogeneity((*it)[d], b_value(d));
+                  }
+                continue;
+              }
+
+            // ok, this is a no-flux constraint, so get the index of the dof
+            // that is currently constrained and make it unconstrained. The
+            // constraint indices will get the normal that contain the other
+            // indices.
+            Tensor<1,dim> normal;
+            unsigned constrained_index = -1;
+            for (unsigned int d=0; d<dim; ++d)
+              if (is_constrained[d])
+                {
+                  constrained_index = d;
+                  normal[d] = 1.;
+                }
+            AssertIndexRange(constrained_index, dim);
+            const std::vector<std::pair<types::global_dof_index, double> > *constrained
+              = no_normal_flux_constraints.get_constraint_entries((*it)[constrained_index]);
+            // find components to which this index is constrained to
+            Assert(constrained != 0, ExcInternalError());
+            Assert(constrained->size() < dim, ExcInternalError());
+            for (unsigned int c=0; c<constrained->size(); ++c)
+              {
+                int index = -1;
+                for (unsigned int d=0; d<dim; ++d)
+                  if ((*constrained)[c].first == (*it)[d])
+                    index = d;
+                Assert (index != -1, ExcInternalError());
+                normal[index] = (*constrained)[c].second;
+              }
+            Vector<double> boundary_value = dof_vector_to_b_values[*it];
+            for (unsigned int d=0; d<dim; ++d)
+              {
+                if (is_constrained[d])
+                  continue;
+                const unsigned int new_index = (*it)[d];
+                if (!constraints.is_constrained(new_index))
+                  {
+                    constraints.add_line(new_index);
+                    if (std::abs(normal[d]) > 1e-13)
+                      constraints.add_entry(new_index, (*it)[constrained_index],
+                                            -normal[d]);
+                    constraints.set_inhomogeneity(new_index, boundary_value[d]);
+                  }
+              }
+          }
+      }
+    AssertDimension(n_total_constraints_found,
+                    no_normal_flux_constraints.n_constraints());
+  }
+
+
+
+  namespace internal
+  {
+    template <int dim, int spacedim, typename Number>
+    struct IDScratchData
+    {
+      IDScratchData (const dealii::hp::MappingCollection<dim,spacedim> &mapping,
+                     const dealii::hp::FECollection<dim,spacedim> &fe,
+                     const dealii::hp::QCollection<dim> &q,
+                     const UpdateFlags update_flags);
+
+      IDScratchData (const IDScratchData &data);
+
+      void resize_vectors (const unsigned int n_q_points,
+                           const unsigned int n_components);
+
+      std::vector<Vector<Number> > function_values;
+      std::vector<std::vector<Tensor<1,spacedim,Number> > > function_grads;
+      std::vector<double> weight_values;
+      std::vector<Vector<double> > weight_vectors;
+
+      std::vector<Vector<Number> > psi_values;
+      std::vector<std::vector<Tensor<1,spacedim,Number> > > psi_grads;
+      std::vector<Number> psi_scalar;
+
+      std::vector<double>         tmp_values;
+      std::vector<Vector<double> > tmp_vector_values;
+      std::vector<Tensor<1,spacedim> > tmp_gradients;
+      std::vector<std::vector<Tensor<1,spacedim> > > tmp_vector_gradients;
+
+      dealii::hp::FEValues<dim,spacedim> x_fe_values;
+    };
+
+
+    template <int dim, int spacedim, typename Number>
+    IDScratchData<dim,spacedim,Number>
+    ::IDScratchData(const dealii::hp::MappingCollection<dim,spacedim> &mapping,
+                    const dealii::hp::FECollection<dim,spacedim> &fe,
+                    const dealii::hp::QCollection<dim> &q,
+                    const UpdateFlags update_flags)
+      :
+      x_fe_values(mapping, fe, q, update_flags)
+    {}
+
+    template <int dim, int spacedim, typename Number>
+    IDScratchData<dim,spacedim,Number>::IDScratchData (const IDScratchData &data)
+      :
+      x_fe_values(data.x_fe_values.get_mapping_collection(),
+                  data.x_fe_values.get_fe_collection(),
+                  data.x_fe_values.get_quadrature_collection(),
+                  data.x_fe_values.get_update_flags())
+    {}
+
+    template <int dim, int spacedim, typename Number>
+    void
+    IDScratchData<dim,spacedim,Number>::resize_vectors (const unsigned int n_q_points,
+                                                        const unsigned int n_components)
+    {
+      function_values.resize (n_q_points,
+                              Vector<Number>(n_components));
+      function_grads.resize (n_q_points,
+                             std::vector<Tensor<1,spacedim,Number> >(n_components));
+
+      weight_values.resize (n_q_points);
+      weight_vectors.resize (n_q_points,
+                             Vector<double>(n_components));
+
+      psi_values.resize (n_q_points,
+                         Vector<Number>(n_components));
+      psi_grads.resize (n_q_points,
+                        std::vector<Tensor<1,spacedim,Number> >(n_components));
+      psi_scalar.resize (n_q_points);
+
+      tmp_values.resize (n_q_points);
+      tmp_vector_values.resize (n_q_points,
+                                Vector<double>(n_components));
+      tmp_gradients.resize (n_q_points);
+      tmp_vector_gradients.resize (n_q_points,
+                                   std::vector<Tensor<1,spacedim> >(n_components));
+    }
+
+
+    // avoid compiling inner function for many vector types when we always
+    // really do the same thing by putting the main work into this helper
+    // function
+    template <int dim, int spacedim, typename Number>
+    double
+    integrate_difference_inner (const Function<spacedim>   &exact_solution,
+                                const NormType              &norm,
+                                const Function<spacedim>    *weight,
+                                const UpdateFlags            update_flags,
+                                const double                 exponent,
+                                const unsigned int           n_components,
+                                IDScratchData<dim,spacedim,Number> &data)
+    {
+      const bool fe_is_system = (n_components != 1);
+      const dealii::FEValues<dim, spacedim> &fe_values  = data.x_fe_values.get_present_fe_values ();
+      const unsigned int n_q_points = fe_values.n_quadrature_points;
+
+      if (weight!=0)
+        {
+          if (weight->n_components>1)
+            weight->vector_value_list (fe_values.get_quadrature_points(),
+                                       data.weight_vectors);
+          else
+            {
+              weight->value_list (fe_values.get_quadrature_points(),
+                                  data.weight_values);
+              for (unsigned int k=0; k<n_q_points; ++k)
+                data.weight_vectors[k] = data.weight_values[k];
+            }
+        }
+      else
+        {
+          for (unsigned int k=0; k<n_q_points; ++k)
+            data.weight_vectors[k] = 1.;
+        }
+
+
+      if (update_flags & update_values)
+        {
+          // first compute the exact solution (vectors) at the quadrature
+          // points. try to do this as efficient as possible by avoiding a
+          // second virtual function call in case the function really has only
+          // one component
+          //
+          // TODO: we have to work a bit here because the Function<dim,double>
+          //   interface of the argument denoting the exact function only
+          //   provides us with double/Tensor<1,dim> values, rather than
+          //   with the correct data type. so evaluate into a temp
+          //   object, then copy around
+          if (fe_is_system)
+            {
+              exact_solution.vector_value_list (fe_values.get_quadrature_points(),
+                                                data.tmp_vector_values);
+              for (unsigned int i=0; i<n_q_points; ++i)
+                data.psi_values[i] = data.tmp_vector_values[i];
+            }
+          else
+            {
+              exact_solution.value_list (fe_values.get_quadrature_points(),
+                                         data.tmp_values);
+              for (unsigned int i=0; i<n_q_points; ++i)
+                data.psi_values[i](0) = data.tmp_values[i];
+            }
+
+          // then subtract finite element fe_function
+          for (unsigned int q=0; q<n_q_points; ++q)
+            for (unsigned int i=0; i<data.psi_values[q].size(); ++i)
+              data.psi_values[q][i] -= data.function_values[q][i];
+        }
+
+      // Do the same for gradients, if required
+      if (update_flags & update_gradients)
+        {
+          // try to be a little clever to avoid recursive virtual function
+          // calls when calling gradient_list for functions that are really
+          // scalar functions
+          if (fe_is_system)
+            {
+              exact_solution.vector_gradient_list (fe_values.get_quadrature_points(),
+                                                   data.tmp_vector_gradients);
+              for (unsigned int i=0; i<n_q_points; ++i)
+                for (unsigned int comp=0; comp<data.psi_grads[i].size(); ++comp)
+                  data.psi_grads[i][comp] = data.tmp_vector_gradients[i][comp];
+            }
+          else
+            {
+              exact_solution.gradient_list (fe_values.get_quadrature_points(),
+                                            data.tmp_gradients);
+              for (unsigned int i=0; i<n_q_points; ++i)
+                data.psi_grads[i][0] = data.tmp_gradients[i];
+            }
+
+          // then subtract finite element function_grads. We need to be
+          // careful in the codimension one case, since there we only have
+          // tangential gradients in the finite element function, not the full
+          // gradient. This is taken care of, by subtracting the normal
+          // component of the gradient from the exact function.
+          if (update_flags & update_normal_vectors)
+            for (unsigned int k=0; k<n_components; ++k)
+              for (unsigned int q=0; q<n_q_points; ++q)
+                {
+                  // compute (f.n) n
+                  const Number f_dot_n
+                    = (data.psi_grads[q][k] * fe_values.normal_vector(q));
+                  const Tensor<1,spacedim,Number> f_dot_n_times_n (f_dot_n * fe_values.normal_vector(q));
+
+                  data.psi_grads[q][k] -= (data.function_grads[q][k] + f_dot_n_times_n);
+                }
+          else
+            for (unsigned int k=0; k<n_components; ++k)
+              for (unsigned int q=0; q<n_q_points; ++q)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  data.psi_grads[q][k][d] -= data.function_grads[q][k][d];
+        }
+
+      double diff = 0;
+
+      // First work on function values:
+      switch (norm)
+        {
+        case mean:
+          // Compute values in quadrature points and integrate
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              double sum = 0;
+              for (unsigned int k=0; k<n_components; ++k)
+                sum += data.psi_values[q](k) * data.weight_vectors[q](k);
+              diff += sum * fe_values.JxW(q);
+            }
+          break;
+
+        case Lp_norm:
+        case L1_norm:
+        case W1p_norm:
+          // Compute values in quadrature points and integrate
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              double sum = 0;
+              for (unsigned int k=0; k<n_components; ++k)
+                sum += std::pow(
+                         static_cast<double>(data.psi_values[q](k)*data.psi_values[q](k)),
+                         exponent/2.) * data.weight_vectors[q](k);
+              diff += sum * fe_values.JxW(q);
+            }
+
+          // Compute the root only if no derivative values are added later
+          if (!(update_flags & update_gradients))
+            diff = std::pow(diff, 1./exponent);
+          break;
+
+        case L2_norm:
+        case H1_norm:
+          // Compute values in quadrature points and integrate
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              double sum = 0;
+              for (unsigned int k=0; k<n_components; ++k)
+                sum += data.psi_values[q](k) * data.psi_values[q](k) *
+                       data.weight_vectors[q](k);
+              diff += sum * fe_values.JxW(q);
+            }
+          // Compute the root only, if no derivative values are added later
+          if (norm == L2_norm)
+            diff=std::sqrt(diff);
+          break;
+
+        case Linfty_norm:
+        case W1infty_norm:
+          for (unsigned int q=0; q<n_q_points; ++q)
+            for (unsigned int k=0; k<n_components; ++k)
+              diff = std::max (diff, double(std::abs(data.psi_values[q](k)*
+                                                     data.weight_vectors[q](k))));
+          break;
+
+        case H1_seminorm:
+        case Hdiv_seminorm:
+        case W1p_seminorm:
+        case W1infty_seminorm:
+          // function values are not used for these norms
+          break;
+
+        default:
+          Assert (false, ExcNotImplemented());
+          break;
+        }
+
+      // Now compute terms depending on derivatives:
+      switch (norm)
+        {
+        case W1p_seminorm:
+        case W1p_norm:
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              double sum = 0;
+              for (unsigned int k=0; k<n_components; ++k)
+                sum += std::pow(
+                         static_cast<double>(data.psi_grads[q][k]*data.psi_grads[q][k]),
+                         exponent/2.) * data.weight_vectors[q](k);
+              diff += sum * fe_values.JxW(q);
+            }
+          diff = std::pow(diff, 1./exponent);
+          break;
+
+        case H1_seminorm:
+        case H1_norm:
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              double sum = 0;
+              for (unsigned int k=0; k<n_components; ++k)
+                sum += (data.psi_grads[q][k] * data.psi_grads[q][k]) *
+                       data.weight_vectors[q](k);
+              diff += sum * fe_values.JxW(q);
+            }
+          diff = std::sqrt(diff);
+          break;
+
+        case Hdiv_seminorm:
+          for (unsigned int q=0; q<n_q_points; ++q)
+            {
+              Assert (n_components >= dim,
+                      ExcMessage ("You can only ask for the Hdiv norm for a finite element "
+                                  "with at least 'dim' components. In that case, this function "
+                                  "will take the divergence of the first 'dim' components."));
+              double sum = 0;
+              // take the trace of the derivatives scaled by the weight and square it
+              for (unsigned int k=0; k<dim; ++k)
+                sum += data.psi_grads[q][k][k] * std::sqrt(data.weight_vectors[q](k));
+              diff += sum * sum * fe_values.JxW(q);
+            }
+          diff = std::sqrt(diff);
+          break;
+
+        case W1infty_seminorm:
+        case W1infty_norm:
+        {
+          double t = 0;
+          for (unsigned int q=0; q<n_q_points; ++q)
+            for (unsigned int k=0; k<n_components; ++k)
+              for (unsigned int d=0; d<dim; ++d)
+                t = std::max(t,
+                             double(std::abs(data.psi_grads[q][k][d]) *
+                                    data.weight_vectors[q](k)));
+
+          // then add seminorm to norm if that had previously been computed
+          diff += t;
+        }
+        break;
+        default:
+          break;
+        }
+
+      // append result of this cell to the end of the vector
+      AssertIsFinite(diff);
+      return diff;
+    }
+
+
+
+    template <int dim, class InVector, class OutVector, typename DoFHandlerType, int spacedim>
+    static
+    void
+    do_integrate_difference (const dealii::hp::MappingCollection<dim,spacedim> &mapping,
+                             const DoFHandlerType                              &dof,
+                             const InVector                                    &fe_function,
+                             const Function<spacedim>                          &exact_solution,
+                             OutVector                                         &difference,
+                             const dealii::hp::QCollection<dim>                &q,
+                             const NormType                                    &norm,
+                             const Function<spacedim>                          *weight,
+                             const double                                       exponent_1)
+    {
+      typedef typename InVector::value_type Number;
+      // we mark the "exponent" parameter to this function "const" since it is
+      // strictly incoming, but we need to set it to something different later
+      // on, if necessary, so have a read-write version of it:
+      double exponent = exponent_1;
+
+      const unsigned int        n_components = dof.get_fe().n_components();
+
+      if (weight!=0)
+        {
+          Assert ((weight->n_components==1) || (weight->n_components==n_components),
+                  ExcDimensionMismatch(weight->n_components, n_components));
+        }
+
+      difference.reinit (dof.get_triangulation().n_active_cells());
+
+      switch (norm)
+        {
+        case L2_norm:
+        case H1_seminorm:
+        case H1_norm:
+        case Hdiv_seminorm:
+          exponent = 2.;
+          break;
+
+        case L1_norm:
+          exponent = 1.;
+          break;
+
+        default:
+          break;
+        }
+
+      UpdateFlags update_flags = UpdateFlags (update_quadrature_points  |
+                                              update_JxW_values);
+      switch (norm)
+        {
+        case H1_seminorm:
+        case Hdiv_seminorm:
+        case W1p_seminorm:
+        case W1infty_seminorm:
+          update_flags |= UpdateFlags (update_gradients);
+          if (spacedim == dim+1)
+            update_flags |= UpdateFlags (update_normal_vectors);
+
+          break;
+
+        case H1_norm:
+        case W1p_norm:
+        case W1infty_norm:
+          update_flags |= UpdateFlags (update_gradients);
+          if (spacedim == dim+1)
+            update_flags |= UpdateFlags (update_normal_vectors);
+        // no break!
+
+        default:
+          update_flags |= UpdateFlags (update_values);
+          break;
+        }
+
+      dealii::hp::FECollection<dim,spacedim> fe_collection (dof.get_fe());
+      IDScratchData<dim,spacedim, Number> data(mapping, fe_collection, q, update_flags);
+
+      // loop over all cells
+      for (typename DoFHandlerType::active_cell_iterator cell = dof.begin_active();
+           cell != dof.end(); ++cell)
+        if (cell->is_locally_owned())
+          {
+            // initialize for this cell
+            data.x_fe_values.reinit (cell);
+
+            const dealii::FEValues<dim, spacedim> &fe_values  = data.x_fe_values.get_present_fe_values ();
+            const unsigned int   n_q_points = fe_values.n_quadrature_points;
+            data.resize_vectors (n_q_points, n_components);
+
+            if (update_flags & update_values)
+              fe_values.get_function_values (fe_function, data.function_values);
+            if (update_flags & update_gradients)
+              fe_values.get_function_gradients (fe_function, data.function_grads);
+
+            difference(cell->active_cell_index()) =
+              integrate_difference_inner<dim,spacedim, Number> (exact_solution, norm, weight,
+                                                                update_flags, exponent,
+                                                                n_components, data);
+          }
+        else
+          // the cell is a ghost cell or is artificial. write a zero into the
+          // corresponding value of the returned vector
+          difference(cell->active_cell_index()) = 0;
+    }
+
+  } // namespace internal
+
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void
+  integrate_difference (const Mapping<dim, spacedim>    &mapping,
+                        const DoFHandler<dim,spacedim> &dof,
+                        const InVector        &fe_function,
+                        const Function<spacedim>   &exact_solution,
+                        OutVector             &difference,
+                        const Quadrature<dim> &q,
+                        const NormType        &norm,
+                        const Function<spacedim>   *weight,
+                        const double           exponent)
+  {
+    internal
+    ::do_integrate_difference (hp::MappingCollection<dim,spacedim>(mapping),
+                               dof, fe_function, exact_solution,
+                               difference, hp::QCollection<dim>(q),
+                               norm, weight, exponent);
+  }
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void
+  integrate_difference (const DoFHandler<dim,spacedim>    &dof,
+                        const InVector           &fe_function,
+                        const Function<spacedim>      &exact_solution,
+                        OutVector                &difference,
+                        const Quadrature<dim>    &q,
+                        const NormType           &norm,
+                        const Function<spacedim>      *weight,
+                        const double              exponent)
+  {
+    internal
+    ::do_integrate_difference(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                              dof, fe_function, exact_solution,
+                              difference, hp::QCollection<dim>(q),
+                              norm, weight, exponent);
+  }
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void
+  integrate_difference (const dealii::hp::MappingCollection<dim,spacedim>    &mapping,
+                        const dealii::hp::DoFHandler<dim,spacedim> &dof,
+                        const InVector        &fe_function,
+                        const Function<spacedim>   &exact_solution,
+                        OutVector             &difference,
+                        const dealii::hp::QCollection<dim> &q,
+                        const NormType        &norm,
+                        const Function<spacedim>   *weight,
+                        const double           exponent)
+  {
+    internal
+    ::do_integrate_difference (hp::MappingCollection<dim,spacedim>(mapping),
+                               dof, fe_function, exact_solution,
+                               difference, q,
+                               norm, weight, exponent);
+  }
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void
+  integrate_difference (const dealii::hp::DoFHandler<dim,spacedim>    &dof,
+                        const InVector           &fe_function,
+                        const Function<spacedim>      &exact_solution,
+                        OutVector                &difference,
+                        const dealii::hp::QCollection<dim>    &q,
+                        const NormType           &norm,
+                        const Function<spacedim>      *weight,
+                        const double              exponent)
+  {
+    internal
+    ::do_integrate_difference(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                              dof, fe_function, exact_solution,
+                              difference, q,
+                              norm, weight, exponent);
+  }
+
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_difference (const DoFHandler<dim,spacedim> &dof,
+                    const VectorType               &fe_function,
+                    const Function<spacedim>       &exact_function,
+                    Vector<double>                 &difference,
+                    const Point<spacedim>          &point)
+  {
+    point_difference(StaticMappingQ1<dim>::mapping,
+                     dof,
+                     fe_function,
+                     exact_function,
+                     difference,
+                     point);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_difference (const Mapping<dim, spacedim>   &mapping,
+                    const DoFHandler<dim,spacedim> &dof,
+                    const VectorType               &fe_function,
+                    const Function<spacedim>       &exact_function,
+                    Vector<double>                 &difference,
+                    const Point<spacedim>          &point)
+  {
+    typedef typename VectorType::value_type Number;
+    const FiniteElement<dim> &fe = dof.get_fe();
+
+    Assert(difference.size() == fe.n_components(),
+           ExcDimensionMismatch(difference.size(), fe.n_components()));
+
+    // first find the cell in which this point
+    // is, initialize a quadrature rule with
+    // it, and then a FEValues object
+    const std::pair<typename DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point = GridTools::find_active_cell_around_point (mapping, dof, point);
+
+    AssertThrow(cell_point.first->is_locally_owned(),
+                ExcPointNotAvailableHere());
+    Assert(GeometryInfo<dim>::distance_to_unit_cell(cell_point.second) < 1e-10,
+           ExcInternalError());
+
+    const Quadrature<dim>
+    quadrature (GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+    FEValues<dim> fe_values(mapping, fe, quadrature, update_values);
+    fe_values.reinit(cell_point.first);
+
+    // then use this to get at the values of
+    // the given fe_function at this point
+    std::vector<Vector<Number> > u_value(1, Vector<Number> (fe.n_components()));
+    fe_values.get_function_values(fe_function, u_value);
+
+    if (fe.n_components() == 1)
+      difference(0) = exact_function.value(point);
+    else
+      exact_function.vector_value(point, difference);
+
+    for (unsigned int i=0; i<difference.size(); ++i)
+      difference(i) -= u_value[0](i);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point,
+               Vector<double>                 &value)
+  {
+
+    point_value (StaticMappingQ1<dim,spacedim>::mapping,
+                 dof,
+                 fe_function,
+                 point,
+                 value);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const hp::DoFHandler<dim,spacedim> &dof,
+               const VectorType                   &fe_function,
+               const Point<spacedim>              &point,
+               Vector<double>                     &value)
+  {
+    point_value(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                dof,
+                fe_function,
+                point,
+                value);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point)
+  {
+    return point_value (StaticMappingQ1<dim,spacedim>::mapping,
+                        dof,
+                        fe_function,
+                        point);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const hp::DoFHandler<dim,spacedim> &dof,
+               const VectorType                   &fe_function,
+               const Point<spacedim>              &point)
+  {
+    return point_value(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                       dof,
+                       fe_function,
+                       point);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const Mapping<dim, spacedim>   &mapping,
+               const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point,
+               Vector<double>                 &value)
+  {
+    typedef typename VectorType::value_type Number;
+    const FiniteElement<dim> &fe = dof.get_fe();
+
+    Assert(value.size() == fe.n_components(),
+           ExcDimensionMismatch(value.size(), fe.n_components()));
+
+    // first find the cell in which this point
+    // is, initialize a quadrature rule with
+    // it, and then a FEValues object
+    const std::pair<typename DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point
+      = GridTools::find_active_cell_around_point (mapping, dof, point);
+
+    AssertThrow(cell_point.first->is_locally_owned(),
+                ExcPointNotAvailableHere());
+    Assert(GeometryInfo<dim>::distance_to_unit_cell(cell_point.second) < 1e-10,
+           ExcInternalError());
+
+    const Quadrature<dim>
+    quadrature (GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+
+    FEValues<dim> fe_values(mapping, fe, quadrature, update_values);
+    fe_values.reinit(cell_point.first);
+
+    // then use this to get at the values of
+    // the given fe_function at this point
+    std::vector<Vector<Number> > u_value(1, Vector<Number> (fe.n_components()));
+    fe_values.get_function_values(fe_function, u_value);
+
+    value = u_value[0];
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_value (const hp::MappingCollection<dim, spacedim> &mapping,
+               const hp::DoFHandler<dim,spacedim>         &dof,
+               const VectorType                           &fe_function,
+               const Point<spacedim>                      &point,
+               Vector<double>                             &value)
+  {
+    typedef typename VectorType::value_type Number;
+    const hp::FECollection<dim, spacedim> &fe = dof.get_fe();
+
+    Assert(value.size() == fe.n_components(),
+           ExcDimensionMismatch(value.size(), fe.n_components()));
+
+    // first find the cell in which this point
+    // is, initialize a quadrature rule with
+    // it, and then a FEValues object
+    const std::pair<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point =  GridTools::find_active_cell_around_point (mapping, dof, point);
+
+    AssertThrow(cell_point.first->is_locally_owned(),
+                ExcPointNotAvailableHere());
+    Assert(GeometryInfo<dim>::distance_to_unit_cell(cell_point.second) < 1e-10,
+           ExcInternalError());
+
+    const Quadrature<dim>
+    quadrature (GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+    hp::FEValues<dim, spacedim> hp_fe_values(mapping, fe, hp::QCollection<dim>(quadrature), update_values);
+    hp_fe_values.reinit(cell_point.first);
+    const FEValues<dim, spacedim> &fe_values = hp_fe_values.get_present_fe_values();
+
+    // then use this to get at the values of
+    // the given fe_function at this point
+    std::vector<Vector<Number> > u_value(1, Vector<Number> (fe.n_components()));
+    fe_values.get_function_values(fe_function, u_value);
+
+    value = u_value[0];
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const Mapping<dim, spacedim>   &mapping,
+               const DoFHandler<dim,spacedim> &dof,
+               const VectorType               &fe_function,
+               const Point<spacedim>          &point)
+  {
+    Assert(dof.get_fe().n_components() == 1,
+           ExcMessage ("Finite element is not scalar as is necessary for this function"));
+
+    Vector<double> value(1);
+    point_value(mapping, dof, fe_function, point, value);
+
+    return value(0);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  double
+  point_value (const hp::MappingCollection<dim, spacedim> &mapping,
+               const hp::DoFHandler<dim,spacedim>         &dof,
+               const VectorType                           &fe_function,
+               const Point<spacedim>                      &point)
+  {
+    Assert(dof.get_fe().n_components() == 1,
+           ExcMessage ("Finite element is not scalar as is necessary for this function"));
+
+    Vector<double> value(1);
+    point_value(mapping, dof, fe_function, point, value);
+
+    return value(0);
+  }
+
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const DoFHandler<dim,spacedim> &dof,
+                  const VectorType               &fe_function,
+                  const Point<spacedim>          &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &gradients)
+  {
+
+    point_gradient (StaticMappingQ1<dim,spacedim>::mapping,
+                    dof,
+                    fe_function,
+                    point,
+                    gradients);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const hp::DoFHandler<dim,spacedim> &dof,
+                  const VectorType                   &fe_function,
+                  const Point<spacedim>              &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &gradients)
+  {
+    point_gradient(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                   dof,
+                   fe_function,
+                   point,
+                   gradients);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const DoFHandler<dim,spacedim> &dof,
+                  const VectorType               &fe_function,
+                  const Point<spacedim>          &point)
+  {
+    return point_gradient (StaticMappingQ1<dim,spacedim>::mapping,
+                           dof,
+                           fe_function,
+                           point);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const hp::DoFHandler<dim,spacedim> &dof,
+                  const VectorType                   &fe_function,
+                  const Point<spacedim>              &point)
+  {
+    return point_gradient(hp::StaticMappingQ1<dim,spacedim>::mapping_collection,
+                          dof,
+                          fe_function,
+                          point);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const Mapping<dim, spacedim>   &mapping,
+                  const DoFHandler<dim,spacedim> &dof,
+                  const VectorType               &fe_function,
+                  const Point<spacedim>          &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &gradient)
+  {
+    const FiniteElement<dim> &fe = dof.get_fe();
+
+    Assert(gradient.size() == fe.n_components(),
+           ExcDimensionMismatch(gradient.size(), fe.n_components()));
+
+    // first find the cell in which this point
+    // is, initialize a quadrature rule with
+    // it, and then a FEValues object
+    const std::pair<typename DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point
+      = GridTools::find_active_cell_around_point (mapping, dof, point);
+
+    AssertThrow(cell_point.first->is_locally_owned(),
+                ExcPointNotAvailableHere());
+    Assert(GeometryInfo<dim>::distance_to_unit_cell(cell_point.second) < 1e-10,
+           ExcInternalError());
+
+    const Quadrature<dim>
+    quadrature (GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+
+    FEValues<dim> fe_values(mapping, fe, quadrature, update_gradients);
+    fe_values.reinit(cell_point.first);
+
+    // then use this to get the gradients of
+    // the given fe_function at this point
+    typedef typename VectorType::value_type Number;
+    std::vector<std::vector<Tensor<1, dim, Number> > >
+    u_gradient(1, std::vector<Tensor<1, dim, Number> > (fe.n_components()));
+    fe_values.get_function_gradients(fe_function, u_gradient);
+
+    gradient = u_gradient[0];
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  void
+  point_gradient (const hp::MappingCollection<dim, spacedim> &mapping,
+                  const hp::DoFHandler<dim,spacedim>         &dof,
+                  const VectorType                           &fe_function,
+                  const Point<spacedim>                      &point,
+                  std::vector<Tensor<1, spacedim, typename VectorType::value_type> > &gradient)
+  {
+    typedef typename VectorType::value_type Number;
+    const hp::FECollection<dim, spacedim> &fe = dof.get_fe();
+
+    Assert(gradient.size() == fe.n_components(),
+           ExcDimensionMismatch(gradient.size(), fe.n_components()));
+
+    // first find the cell in which this point
+    // is, initialize a quadrature rule with
+    // it, and then a FEValues object
+    const std::pair<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator, Point<spacedim> >
+    cell_point =  GridTools::find_active_cell_around_point (mapping, dof, point);
+
+    AssertThrow(cell_point.first->is_locally_owned(),
+                ExcPointNotAvailableHere());
+    Assert(GeometryInfo<dim>::distance_to_unit_cell(cell_point.second) < 1e-10,
+           ExcInternalError());
+
+    const Quadrature<dim>
+    quadrature (GeometryInfo<dim>::project_to_unit_cell(cell_point.second));
+    hp::FEValues<dim, spacedim> hp_fe_values(mapping, fe, hp::QCollection<dim>(quadrature), update_gradients);
+    hp_fe_values.reinit(cell_point.first);
+    const FEValues<dim, spacedim> &fe_values = hp_fe_values.get_present_fe_values();
+
+    std::vector<std::vector<Tensor<1, dim, Number> > >
+    u_gradient(1, std::vector<Tensor<1, dim, Number> > (fe.n_components()));
+    fe_values.get_function_gradients(fe_function, u_gradient);
+
+    gradient = u_gradient[0];
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const Mapping<dim, spacedim>   &mapping,
+                  const DoFHandler<dim,spacedim> &dof,
+                  const VectorType               &fe_function,
+                  const Point<spacedim>          &point)
+  {
+    Assert(dof.get_fe().n_components() == 1,
+           ExcMessage ("Finite element is not scalar as is necessary for this function"));
+
+    std::vector<Tensor<1, dim, typename VectorType::value_type> > gradient(1);
+    point_gradient (mapping, dof, fe_function, point, gradient);
+
+    return gradient[0];
+  }
+
+
+
+  template <int dim, typename VectorType, int spacedim>
+  Tensor<1, spacedim, typename VectorType::value_type>
+  point_gradient (const hp::MappingCollection<dim, spacedim> &mapping,
+                  const hp::DoFHandler<dim,spacedim>         &dof,
+                  const VectorType                           &fe_function,
+                  const Point<spacedim>                      &point)
+  {
+    Assert(dof.get_fe().n_components() == 1,
+           ExcMessage ("Finite element is not scalar as is necessary for this function"));
+
+    std::vector<Tensor<1, dim, typename VectorType::value_type> > gradient(1);
+    point_gradient (mapping, dof, fe_function, point, gradient);
+
+    return gradient[0];
+  }
+
+
+
+  template <typename VectorType>
+  void
+  subtract_mean_value(VectorType              &v,
+                      const std::vector<bool> &p_select)
+  {
+    if (p_select.size() == 0)
+      {
+        // In case of an empty boolean mask operate on the whole vector:
+        v.add( - v.mean_value() );
+      }
+    else
+      {
+        // This function is not implemented for distributed vectors, so
+        // if v is not a boring Vector or BlockVector:
+        Assert(   dynamic_cast<Vector<double> *>(& v)
+                  || dynamic_cast<Vector<float> *>(& v)
+                  || dynamic_cast<Vector<long double> *>(& v)
+                  || dynamic_cast<BlockVector<double> *>(& v)
+                  || dynamic_cast<BlockVector<float> *>(& v)
+                  || dynamic_cast<BlockVector<long double> *>(& v),
+                  ExcNotImplemented());
+
+        const unsigned int n = v.size();
+
+        Assert(p_select.size() == n,
+               ExcDimensionMismatch(p_select.size(), n));
+
+        typename VectorType::value_type s = 0.;
+        unsigned int counter = 0;
+        for (unsigned int i=0; i<n; ++i)
+          if (p_select[i])
+            {
+              s += v(i);
+              ++counter;
+            }
+        // Error out if we have not constrained anything. Note that in this
+        // case the vector v is always nonempty.
+        Assert (n == 0 || counter > 0, ComponentMask::ExcNoComponentSelected());
+
+        s /= counter;
+
+        for (unsigned int i=0; i<n; ++i)
+          if (p_select[i])
+            v(i) -= s;
+      }
+  }
+
+  namespace
+  {
+    template <typename Number>
+    void set_possibly_complex_number(const double &r,
+                                     const double &,
+                                     Number &n)
+    {
+      n = r;
+    }
+
+
+
+    template <typename Type>
+    void set_possibly_complex_number(const double &r,
+                                     const double &i,
+                                     std::complex<Type> &n)
+    {
+      n = std::complex<Type>(r,i);
+    }
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  double
+  compute_mean_value (const Mapping<dim, spacedim>   &mapping,
+                      const DoFHandler<dim,spacedim> &dof,
+                      const Quadrature<dim>          &quadrature,
+                      const VectorType               &v,
+                      const unsigned int             component)
+  {
+    typedef typename VectorType::value_type Number;
+    Assert (v.size() == dof.n_dofs(),
+            ExcDimensionMismatch (v.size(), dof.n_dofs()));
+    Assert (component < dof.get_fe().n_components(),
+            ExcIndexRange(component, 0, dof.get_fe().n_components()));
+
+    FEValues<dim,spacedim> fe(mapping, dof.get_fe(), quadrature,
+                              UpdateFlags(update_JxW_values
+                                          | update_values));
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell;
+    std::vector<Vector<Number> > values(quadrature.size(),
+                                        Vector<Number> (dof.get_fe().n_components()));
+
+    Number mean = Number();
+    double area = 0.;
+    // Compute mean value
+    for (cell = dof.begin_active(); cell != dof.end(); ++cell)
+      if (cell->is_locally_owned())
+        {
+          fe.reinit (cell);
+          fe.get_function_values(v, values);
+          for (unsigned int k=0; k< quadrature.size(); ++k)
+            {
+              mean += fe.JxW(k) * values[k](component);
+              area += fe.JxW(k);
+            }
+        }
+
+#ifdef DEAL_II_WITH_MPI
+    // if this was a distributed DoFHandler, we need to do the reduction
+    // over the entire domain
+    if (const parallel::Triangulation<dim,spacedim> *
+        p_triangulation
+        = dynamic_cast<const parallel::Triangulation<dim,spacedim> *>(&dof.get_triangulation()))
+      {
+        // The type used to store the elements of the global vector may be a
+        // real or a complex number. Do the global reduction always with real
+        // and imaginary types so that we don't have to distinguish, and to this
+        // end just copy everything into a complex number and, later, back into
+        // the original data type.
+        std::complex<double> mean_double = mean;
+        double my_values[3] = { mean_double.real(), mean_double.imag(), area };
+        double global_values[3];
+
+        MPI_Allreduce (my_values, global_values, 3, MPI_DOUBLE,
+                       MPI_SUM,
+                       p_triangulation->get_communicator());
+
+        set_possibly_complex_number(global_values[0], global_values[1],
+                                    mean);
+        area = global_values[2];
+      }
+#endif
+
+    return (mean/area);
+  }
+
+
+  template <int dim, typename VectorType, int spacedim>
+  double
+  compute_mean_value (const DoFHandler<dim,spacedim> &dof,
+                      const Quadrature<dim>          &quadrature,
+                      const VectorType               &v,
+                      const unsigned int             component)
+  {
+    return compute_mean_value(StaticMappingQ1<dim,spacedim>::mapping, dof, quadrature, v, component);
+  }
+
+
+  template<typename DoFHandlerType, typename VectorType>
+  void get_position_vector(const DoFHandlerType &dh,
+                           VectorType           &vector,
+                           const ComponentMask  &mask)
+  {
+    AssertDimension(vector.size(), dh.n_dofs());
+
+    const unsigned int dim=DoFHandlerType::dimension;
+    const unsigned int spacedim=DoFHandlerType::space_dimension;
+    const FiniteElement<dim, spacedim> &fe = dh.get_fe();
+
+
+    // Construct default fe_mask;
+    const ComponentMask fe_mask(mask.size() ? mask :
+                                ComponentMask(fe.get_nonzero_components(0).size(), true));
+
+    AssertDimension(fe_mask.size(), fe.get_nonzero_components(0).size());
+
+    std::vector<unsigned int> fe_to_real(fe_mask.size(), numbers::invalid_unsigned_int);
+    unsigned int size = 0;
+    for (unsigned int i=0; i<fe_mask.size(); ++i)
+      {
+        if (fe_mask[i])
+          fe_to_real[i] = size++;
+      }
+    Assert(size == spacedim,
+           ExcMessage("The Component Mask you provided is invalid. It has to select exactly spacedim entries."));
+
+
+    if ( fe.has_support_points() )
+      {
+        typename DoFHandlerType::active_cell_iterator cell;
+        const Quadrature<dim> quad(fe.get_unit_support_points());
+
+        MappingQ<dim,spacedim> map_q(fe.degree);
+        FEValues<dim,spacedim> fe_v(map_q, fe, quad, update_quadrature_points);
+        std::vector<types::global_dof_index> dofs(fe.dofs_per_cell);
+
+        AssertDimension(fe.dofs_per_cell, fe.get_unit_support_points().size());
+        Assert(fe.is_primitive(), ExcMessage("FE is not Primitive! This won't work."));
+
+        for (cell = dh.begin_active(); cell != dh.end(); ++cell)
+          {
+            fe_v.reinit(cell);
+            cell->get_dof_indices(dofs);
+            const std::vector<Point<spacedim> > &points = fe_v.get_quadrature_points();
+            for (unsigned int q = 0; q < points.size(); ++q)
+              {
+                unsigned int comp = fe.system_to_component_index(q).first;
+                if (fe_mask[comp])
+                  vector(dofs[q]) = points[q][fe_to_real[comp]];
+              }
+          }
+      }
+    else
+      {
+        // Construct a FiniteElement with FE_Q^spacedim, and call this
+        // function again.
+        //
+        // Once we have this, interpolate with the given finite element
+        // to get a Mapping which is interpolatory at the support points
+        // of FE_Q(fe.degree())
+        const FESystem<dim,spacedim> *fe_system = dynamic_cast<const FESystem<dim, spacedim> *>(&fe);
+        Assert(fe_system, ExcNotImplemented());
+        unsigned int degree = numbers::invalid_unsigned_int;
+
+        // Get information about the blocks
+        for (unsigned int i=0; i<fe_mask.size(); ++i)
+          if (fe_mask[i])
+            {
+              const unsigned int base_i = fe_system->component_to_base_index(i).first;
+              Assert(degree == numbers::invalid_unsigned_int ||
+                     degree == fe_system->base_element(base_i).degree,
+                     ExcNotImplemented());
+              Assert(fe_system->base_element(base_i).is_primitive(),
+                     ExcNotImplemented());
+              degree = fe_system->base_element(base_i).degree;
+            }
+
+        // We create an intermediate FE_Q vector space, and then
+        // interpolate from that vector space to this one, by
+        // carefully selecting the right components.
+
+        FESystem<dim,spacedim> feq(FE_Q<dim,spacedim>(degree), spacedim);
+        DoFHandlerType dhq(dh.get_triangulation());
+        dhq.distribute_dofs(feq);
+        Vector<double> eulerq(dhq.n_dofs());
+        const ComponentMask maskq(spacedim, true);
+        get_position_vector(dhq, eulerq);
+
+        FullMatrix<double> transfer(fe.dofs_per_cell, feq.dofs_per_cell);
+        FullMatrix<double> local_transfer(feq.dofs_per_cell);
+        const std::vector<Point<dim> > &points = feq.get_unit_support_points();
+
+        // Here we construct the interpolation matrix from
+        // FE_Q^spacedim to the FiniteElement used by
+        // euler_dof_handler.
+        //
+        // In order to construct such interpolation matrix, we have to
+        // solve the following system:
+        //
+        // v_j phi_j(q_i) = w_k psi_k(q_i) = w_k delta_ki = w_i
+        //
+        // where psi_k are the basis functions for fe_q, and phi_i are
+        // the basis functions of the target space while q_i are the
+        // support points for the fe_q space. With this choice of
+        // interpolation points, on the matrices is the identity
+        // matrix, and we have to invert only one matrix. The
+        // resulting vector will be interpolatory at the support
+        // points of fe_q, even if the finite element does not have
+        // support points.
+        //
+        // Morally, we should invert the matrix T_ij = phi_i(q_j),
+        // however in general this matrix is not invertible, since
+        // there may be components which do not contribute to the
+        // displacement vector. Since we are not interested in those
+        // components, we construct a square matrix with the same
+        // number of components of the FE_Q system. The FE_Q system
+        // was constructed above in such a way that the polynomial
+        // degree of the FE_Q system and that of the given FE are the
+        // same on the cell, which should guarantee that, for the
+        // displacement components only, the interpolation matrix is
+        // invertible. We construct a mapping between indices first,
+        // and check that this is the case. If not, we bail out, not
+        // knowing what to do in this case.
+
+        std::vector<unsigned int> fe_to_feq(fe.dofs_per_cell, numbers::invalid_unsigned_int);
+        unsigned int index=0;
+        for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+          if (fe_mask[fe.system_to_component_index(i).first])
+            fe_to_feq[i] = index++;
+
+        // If index is not the same as feq.dofs_per_cell, we won't
+        // know how to invert the resulting matrix. Bail out.
+        Assert(index == feq.dofs_per_cell, ExcNotImplemented());
+
+        for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+          {
+            const unsigned int comp_j = fe.system_to_component_index(j).first;
+            if (fe_mask[comp_j])
+              for (unsigned int i=0; i<points.size(); ++i)
+                {
+                  if ( fe_to_real[comp_j] == feq.system_to_component_index(i).first)
+                    local_transfer(i, fe_to_feq[j]) = fe.shape_value(j, points[i]);
+                }
+          }
+
+        // Now we construct the rectangular interpolation matrix. This
+        // one is filled only with the information from the components
+        // of the displacement. The rest is set to zero.
+        local_transfer.invert(local_transfer);
+        for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+          if (fe_to_feq[i] != numbers::invalid_unsigned_int)
+            for (unsigned int j=0; j<feq.dofs_per_cell; ++j)
+              transfer(i, j) = local_transfer(fe_to_feq[i], j);
+
+        // The interpolation matrix is then passed to the
+        // VectorTools::interpolate() function to generate the correct
+        // interpolation.
+        interpolate(dhq, dh, transfer, eulerq, vector);
+      }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/include/deal.II/opencascade/boundary_lib.h b/include/deal.II/opencascade/boundary_lib.h
new file mode 100644
index 0000000..ed69a17
--- /dev/null
+++ b/include/deal.II/opencascade/boundary_lib.h
@@ -0,0 +1,333 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 2014 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__occ_boundary_lib_h
+#define dealii__occ_boundary_lib_h
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_OPENCASCADE
+
+#include <deal.II/opencascade/utilities.h>
+#include <deal.II/grid/tria_boundary.h>
+
+// opencascade needs "HAVE_CONFIG_H" to be exported...
+#define HAVE_CONFIG_H
+#include <BRepAdaptor_Curve.hxx>
+#include <Adaptor3d_Curve.hxx>
+#undef HAVE_CONFIG_H
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * @addtogroup OpenCASCADE
+ * @{
+ */
+
+namespace OpenCASCADE
+{
+  /**
+   * A Boundary object based on OpenCASCADE TopoDS_Shape where where new
+   * points are first computed by averaging the surrounding points in the same
+   * way as FlatManifold does, and are then projected in the normal direction
+   * using OpenCASCADE utilities.
+   *
+   * This class makes no assumptions on the shape you pass to it, and the
+   * topological dimension of the Manifold is inferred from the TopoDS_Shape
+   * itself. In debug mode there is a sanity check to make sure that the
+   * surrounding points (the ones used in project_to_manifold()) actually live
+   * on the Manifold, i.e., calling OpenCASCADE::closest_point() on those
+   * points leaves them untouched. If this is not the case, an
+   * ExcPointNotOnManifold is thrown.
+   *
+   * This could happen, for example, if you are trying to use a shape of type
+   * TopoDS_Edge when projecting on a face. In this case, the vertices of the
+   * face would be collapsed to the edge, and your surrounding points would
+   * not be lying on the given shape, raising an exception.
+   *
+   * @author Luca Heltai, Andrea Mola, 2011--2014.
+   */
+  template <int dim, int spacedim>
+  class NormalProjectionBoundary : public Boundary<dim,spacedim>
+  {
+  public:
+
+    /**
+     * The standard constructor takes a generic TopoDS_Shape @p sh, and a
+     * tolerance used to compute distances internally.
+     *
+     * The TopoDS_Shape can be arbitrary, i.e., a collection of shapes, faces,
+     * edges or a single face or edge.
+     */
+    NormalProjectionBoundary(const TopoDS_Shape &sh,
+                             const double tolerance=1e-7);
+
+    /**
+     * Perform the actual projection onto the manifold. This function, in
+     * debug mode, checks that each of the @p surrounding_points is within
+     * tolerance from the given TopoDS_Shape. If this is not the case, an
+     * exception is thrown.
+     *
+     * The projected point is computed using OpenCASCADE normal projection
+     * algorithms.
+     */
+    virtual Point<spacedim>
+    project_to_manifold (const std::vector<Point<spacedim> > &surrounding_points,
+                         const Point<spacedim> &candidate) const;
+
+
+  private:
+    /**
+     * The topological shape which is used internally to project points. You
+     * can construct such a shape by calling the OpenCASCADE::read_IGES()
+     * function, which will create a TopoDS_Shape with the geometry contained
+     * in the IGES file.
+     */
+    const TopoDS_Shape sh;
+
+    /**
+     * Relative tolerance used by this class to compute distances.
+     */
+    const double tolerance;
+  };
+
+  /**
+   * A Boundary object based on OpenCASCADE TopoDS_Shape where new points are
+   * first computed by averaging the surrounding points in the same way as
+   * FlatManifold does, and then projecting them onto the manifold along the
+   * direction specified at construction time using OpenCASCADE utilities.
+   *
+   * This class makes no assumptions on the shape you pass to it, and the
+   * topological dimension of the Manifold is inferred from the TopoDS_Shape
+   * itself. In debug mode there is a sanity check to make sure that the
+   * surrounding points (the ones used in project_to_manifold()) actually live
+   * on the Manifold, i.e., calling OpenCASCADE::closest_point() on those
+   * points leaves them untouched. If this is not the case, an
+   * ExcPointNotOnManifold is thrown.
+   *
+   * Notice that this type of Boundary descriptor may fail to give results if
+   * the triangulation to be refined is close to the boundary of the given
+   * TopoDS_Shape, or when the direction you use at construction time does not
+   * intersect the shape. An exception is thrown when this happens.
+   *
+   * @author Luca Heltai, Andrea Mola, 2011--2014.
+   */
+  template <int dim, int spacedim>
+  class DirectionalProjectionBoundary : public Boundary<dim,spacedim>
+  {
+  public:
+    /**
+     * Construct a Boundary object which will project points on the
+     * TopoDS_Shape @p sh, along the given @p direction.
+     */
+    DirectionalProjectionBoundary(const TopoDS_Shape &sh,
+                                  const Tensor<1,spacedim> &direction,
+                                  const double tolerance=1e-7);
+
+    /**
+     * Perform the actual projection onto the manifold. This function, in
+     * debug mode, checks that each of the @p surrounding_points is within
+     * tolerance from the given TopoDS_Shape. If this is not the case, an
+     * exception is thrown.
+     *
+     * The projected point is computed using OpenCASCADE directional
+     * projection algorithms.
+     */
+    virtual Point<spacedim>
+    project_to_manifold (const std::vector<Point<spacedim> > &surrounding_points,
+                         const Point<spacedim> &candidate) const;
+
+  private:
+    /**
+     * The topological shape which is used internally to project points. You
+     * can construct such a shape by calling the OpenCASCADE::read_IGES()
+     * function, which will create a TopoDS_Shape with the geometry contained
+     * in the IGES file.
+     */
+    const TopoDS_Shape sh;
+
+    /**
+     * Direction used to project new points on the shape.
+     */
+    const Point<3> direction;
+
+    /**
+     * Relative tolerance used by this class to compute distances.
+     */
+    const double tolerance;
+  };
+
+
+  /**
+   * A Boundary object based on OpenCASCADE TopoDS_Shape where new points are
+   * first computed by averaging the surrounding points in the same way as
+   * FlatManifold does, and then projecting them using OpenCASCADE utilities
+   * onto the manifold along a direction which is an estimation of the
+   * surrounding points (hence mesh cell) normal.
+   *
+   * The direction normal to the mesh is particularly useful because it is the
+   * direction in which the mesh is missing nodes. For instance, during the
+   * refinement of a cell a new node is initially created around the
+   * baricenter of the cell. This location somehow ensures a uniform distance
+   * from the nodes of the old cell. Projecting such cell baricenter onto the
+   * CAD surface in the direction normal to the original cell will then retain
+   * uniform distance from the points of the original cell. Of course, at the
+   * stage of mesh generation, no dof handler nor finite element are defined,
+   * and such direction has to be estimated. For the case in which 8
+   * surrounding points are present, 4 different triangles are identified with
+   * the points assigned, and the normals of such triangles are averaged to
+   * obtain the approximation of the normal to the cell.
+   *
+   * The case in which 2 surrounding points are present (i.e.:a cell edge is
+   * being refined) is of course more tricky. The average of the CAD surface
+   * normals at the 2 surrounding points is first computed, and then projected
+   * onto the plane normal to the segment linking the surrounding points. This
+   * again is an attempt to have the new point with equal distance with
+   * respect to the surrounding points
+   *
+   * This class only operates with CAD faces and makes the assumption that the
+   * shape you pass to it contains at least one face. If that is not the case,
+   * an Exception is thrown. In debug mode there is a sanity check to make
+   * sure that the surrounding points (the ones used in project_to_manifold())
+   * actually live on the Manifold, i.e., calling OpenCASCADE::closest_point()
+   * on those points leaves them untouched. If this is not the case, an
+   * ExcPointNotOnManifold is thrown.
+   *
+   *
+   * Notice that this type of Boundary descriptor may fail to give results if
+   * the triangulation to be refined is close to the boundary of the given
+   * TopoDS_Shape, or when the normal direction estimated from the surrounding
+   * points does not intersect the shape.  An exception is thrown when this
+   * happens.
+   *
+   * @author Luca Heltai, Andrea Mola, 2011--2014.
+   */
+  template <int dim, int spacedim>
+  class NormalToMeshProjectionBoundary : public Boundary<dim,spacedim>
+  {
+  public:
+    /**
+     * Construct a Boundary object which will project points on the
+     * TopoDS_Shape @p sh, along a direction which is approximately normal to
+     * the mesh cell.
+     */
+    NormalToMeshProjectionBoundary(const TopoDS_Shape &sh,
+                                   const double tolerance=1e-7);
+
+    /**
+     * Perform the actual projection onto the manifold. This function, in
+     * debug mode, checks that each of the @p surrounding_points is within
+     * tolerance from the given TopoDS_Shape. If this is not the case, an
+     * exception is thrown.
+     */
+    virtual Point<spacedim>
+    project_to_manifold (const std::vector<Point<spacedim> > &surrounding_points,
+                         const Point<spacedim> &candidate) const;
+
+  private:
+    /**
+     * The topological shape which is used internally to project points. You
+     * can construct such a shape by calling the OpenCASCADE::read_IGES()
+     * function, which will create a TopoDS_Shape with the geometry contained
+     * in the IGES file.
+     */
+    const TopoDS_Shape sh;
+
+    /**
+     * Direction used to project new points on the shape.
+     */
+    const Point<3> direction;
+
+    /**
+     * Relative tolerance used by this class to compute distances.
+     */
+    const double tolerance;
+  };
+
+  /**
+   * A Boundary object based on OpenCASCADE TopoDS_Shape objects which have
+   * topological dimension equal to one (TopoDS_Edge or TopoDS_Wire) where new
+   * points are located at the arclength average of the surrounding points. If
+   * the given TopoDS_Shape can be casted to a periodic (closed) curve, then
+   * this information is used internally to set the periodicity of the base
+   * ChartManifold class.
+   *
+   * This class can only work on TopoDS_Edge or TopoDS_Wire objects, and it
+   * only makes sense when spacedim is three. If you use an object of
+   * topological dimension different from one, an exception is throw.
+   *
+   * In debug mode there is an additional sanity check to make sure that the
+   * surrounding points actually live on the Manifold, i.e., calling
+   * OpenCASCADE::closest_point() on those points leaves them untouched. If
+   * this is not the case, an ExcPointNotOnManifold is thrown.
+   *
+   * @author Luca Heltai, Andrea Mola, 2011--2014.
+   */
+  template <int dim, int spacedim>
+  class ArclengthProjectionLineManifold : public  ChartManifold<dim,spacedim,1>
+  {
+  public:
+    /**
+     * Default constructor with a TopoDS_Edge.
+     */
+    ArclengthProjectionLineManifold(const TopoDS_Shape &sh,
+                                    const double tolerance=1e-7);
+
+    /**
+     * Given a point on real space, find its arclength parameter. Throws an
+     * error in debug mode, if the point is not on the TopoDS_Edge given at
+     * construction time.
+     */
+    virtual Point<1>
+    pull_back(const Point<spacedim> &space_point) const;
+
+    /**
+     * Given an arclength parameter, find its image in real space.
+     */
+    virtual Point<spacedim>
+    push_forward(const Point<1> &chart_point) const;
+
+  private:
+    /**
+     * A Curve adaptor. This is the one which is used in the computations, and
+     * it points to the right one above.
+     */
+    Handle_Adaptor3d_HCurve curve;
+
+    /**
+     * Relative tolerance used in all internal computations.
+     */
+    const double tolerance;
+
+    /**
+     * The total length of the curve. This is also used as a period if the
+     * edge is periodic.
+     */
+    const double length;
+  };
+}
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_WITH_OPENCASCADE
+
+/*------------------------------ occ_boundary_lib.h ------------------------------*/
+#endif
+/*------------------------------ occ_boundary_lib.h ------------------------------*/
diff --git a/include/deal.II/opencascade/utilities.h b/include/deal.II/opencascade/utilities.h
new file mode 100644
index 0000000..67fbc5b
--- /dev/null
+++ b/include/deal.II/opencascade/utilities.h
@@ -0,0 +1,379 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 2014 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dealii__occ_utilities_h
+#define dealii__occ_utilities_h
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_OPENCASCADE
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/base/point.h>
+
+#include <string>
+
+// opencascade needs "HAVE_CONFIG_H" to be exported...
+#define HAVE_CONFIG_H
+#include <TopoDS_Shape.hxx>
+#include <TopoDS_Face.hxx>
+#include <TopoDS_Edge.hxx>
+#include <TopoDS_Vertex.hxx>
+#include <TopoDS_Compound.hxx>
+#include <TopoDS_CompSolid.hxx>
+#include <TopoDS_Solid.hxx>
+#include <TopoDS_Shell.hxx>
+#include <TopoDS_Wire.hxx>
+#include <IFSelect_ReturnStatus.hxx>
+#include <gp_Pnt.hxx>
+#undef HAVE_CONFIG_H
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+/**
+ * We collect in this namespace all utilities which operate on OpenCASCADE
+ * entities. OpenCASCADE splits every object into a topological description
+ * and a geometrical entity. The basic topological description is a
+ * TopoDS_Shape. TopoDS_Shapes are light objects, and can be copied around.
+ * The closest deal.II analog is a TriaIterator.
+ *
+ * The OpenCASCADE topology is designed with reference to the STEP standard
+ * ISO-10303-42.  The structure is an oriented one-way graph, where parents
+ * refer to their children, and there are no back references. Abstract
+ * structure is implemented as C++ classes from the TopoDS package. A
+ * TopoDS_Shape is manipulated by value and contains 3 fields: location,
+ * orientation and a myTShape handle (of the TopoDS_TShape type). According to
+ * OpenCASCADE documentation, myTShape and Location are used to share data
+ * between various shapes to save memory. For example, an edge belonging to
+ * two faces has equal Locations and myTShape fields but different
+ * Orientations (Forward in context of one face and Reversed in one of the
+ * other).
+ *
+ * Valid shapes include collection of other shapes, solids, faces, edges,
+ * vertices, etc.
+ *
+ * Once a topological description is available, if a concrete geometrical
+ * object can be created, the BRep classes allow one to extract the actual
+ * geometrical information from a shape.
+ *
+ * This is done by inheriting abstract topology classes from the TopoDS
+ * package by those implementing a boundary representation model (from the
+ * BRep package). Only 3 types of topological objects have geometric
+ * representations – vertex, edge, and face.
+ *
+ * Every TopoDS_Shape can be queried to figure out what type of shape it is,
+ * and actual geometrical objects, like surfaces, curves or points, can be
+ * extracted using BRepTools.
+ *
+ * In this namespace we provide readers and writers that read standard CAD
+ * files, and return a TopoDS_Shape, or that write a CAD file, given a
+ * TopoDS_Shape. Most of the functions in the OpenCASCADE namespace deal with
+ * TopoDS_Shapes of one type or another, and provide interfaces to common
+ * deal.II objects, like Triangulation, Manifold, and so on.
+ *
+ * Notice that these tools are only useful when spacedim is equal to three,
+ * since OpenCASCADE only operates in three-dimensional mode.
+ *
+ * @author Luca Heltai, Andrea Mola, 2011--2014.
+ */
+namespace OpenCASCADE
+{
+  /**
+   * Count the subobjects of a shape. This function is useful to gather
+   * information about the TopoDS_Shape passed as argument. It returns the
+   * number of faces, edges and vertices (the only topological entities
+   * associated with actual geometries) which are contained in the given
+   * shape.
+   */
+  std_cxx11::tuple<unsigned int, unsigned int, unsigned int>
+  count_elements(const TopoDS_Shape &shape);
+
+  /**
+   * Read IGES files and translate their content into openCascade topological
+   * entities. The option scale_factor is used to compensate for different
+   * units being used in the IGES files and in the target application. The
+   * standard unit for IGES files is millimiters. The return object is a
+   * TopoDS_Shape which contains all objects from the file.
+   */
+  TopoDS_Shape read_IGES(const std::string &filename,
+                         const double scale_factor=1e-3);
+
+  /**
+   * Write the given topological shape into an IGES file.
+   */
+  void write_IGES(const TopoDS_Shape &shape,
+                  const std::string &filename);
+
+  /**
+   * Read STEP files and translate their content into openCascade topological
+   * entities. The option scale_factor is used to compensate for different
+   * units being used in the STEP files and in the target application. The
+   * standard unit for STEP files is millimiters. The return object is a
+   * TopoDS_Shape which contains all objects from the file.
+   */
+  TopoDS_Shape read_STEP(const std::string &filename,
+                         const double scale_factor=1e-3);
+
+
+  /**
+   * Write the given topological shape into an STEP file.
+   */
+  void write_STEP(const TopoDS_Shape &shape,
+                  const std::string &filename);
+
+  /**
+   * This function returns the tolerance associated with the shape. Each CAD
+   * geometrical object is defined along with a tolerance, which indicates
+   * possible inaccuracy of its placement. For instance, the tolerance of a
+   * vertex indicates that it can be located in any point contained in a
+   * sphere centered in the nominal position and having radius tol. While
+   * carrying out an operation such as projecting a point onto a surface
+   * (which will in turn have its tolerance) we must keep in mind that the
+   * precision of the projection will be limited by the tolerance with which
+   * the surface is built.  The tolerance is computed taking the maximum
+   * tolerance among the subshapes composing the shape.
+   */
+  double get_shape_tolerance(const TopoDS_Shape &shape);
+
+  /**
+   * Perform the intersection of the given topological shape with the plane
+   * $c_x x + c_y y + c_z z +c = 0$. The returned topological shape will
+   * contain as few bsplines as possible. An exception is thrown if the
+   * intersection produces an empty shape.
+   */
+  TopoDS_Shape  intersect_plane(const TopoDS_Shape &in_shape,
+                                const double c_x,
+                                const double c_y,
+                                const double c_z,
+                                const double c,
+                                const double tolerance=1e-7);
+
+  /**
+   * Try to join all edges contained in the given TopoDS_Shape into a single
+   * TopoDS_Edge, containing as few BSPlines as possible. If the input shape
+   * contains faces, they will be ignored by this function. If the contained
+   * edges cannot be joined into a single one, i.e., they form disconnected
+   * curves, an exception will be thrown.
+   */
+  TopoDS_Edge join_edges(const TopoDS_Shape &in_shape,
+                         const double tolerance=1e-7);
+
+  /**
+   * Creates a 3D smooth BSpline curve passing through the points in the
+   * assigned vector, and store it in the returned TopoDS_Shape (which is of
+   * type TopoDS_Edge). The points are reordered internally according to their
+   * scalar product with the direction, if direction is different from zero,
+   * otherwise they are used as passed. Notice that this function changes the
+   * input points if required by the algorithm.
+   *
+   * This class is used to interpolate a BsplineCurve passing through an array
+   * of points, with a C2 Continuity. If the optional parameter @p closed is
+   * set to true, then the curve will be C2 at all points except the first
+   * (where only C1 continuity will be given), and it will be a closed curve.
+   *
+   * The curve is garanteed to be at distance @p tolerance from the input
+   * points. If the algorithm fails in generating such a curve, an exception
+   * is thrown.
+   */
+  TopoDS_Edge interpolation_curve(std::vector<Point<3> >  &curve_points,
+                                  const Tensor<1,3> &direction=Tensor<1,3>(),
+                                  const bool closed=false,
+                                  const double tolerance=1e-7);
+
+  /**
+   * Extract all subshapes from a TopoDS_Shape, and store the results into
+   * standard containers. If the shape does not contain a certain type of
+   * shape, the respective container will be empty.
+   */
+  void extract_geometrical_shapes(const TopoDS_Shape &shape,
+                                  std::vector<TopoDS_Face> &faces,
+                                  std::vector<TopoDS_Edge> &edges,
+                                  std::vector<TopoDS_Vertex> &vertices);
+
+  /**
+   * Create a triangulation from a single face. This class extract the first u
+   * and v parameter of the parametric surface making up this face, and
+   * creates a Triangulation<2,3> containing a single coarse cell reflecting
+   * this face. If the surface is not a trimmed surface, the vertices of this
+   * cell will coincide with the TopoDS_Vertex vertices of the original
+   * TopoDS_Face. This, however, is often not the case, and the user should be
+   * careful on how this mesh is used.
+   */
+  void create_triangulation(const TopoDS_Face &face,
+                            Triangulation<2,3> &tria);
+
+  /**
+   * Extract all compound shapes from a TopoDS_Shape, and store the results
+   * into standard containers. If the shape does not contain a certain type of
+   * compound, the respective container will be empty.
+   */
+  void extract_compound_shapes(const TopoDS_Shape &shape,
+                               std::vector<TopoDS_Compound> &compounds,
+                               std::vector<TopoDS_CompSolid> &compsolids,
+                               std::vector<TopoDS_Solid> &solids,
+                               std::vector<TopoDS_Shell> &shells,
+                               std::vector<TopoDS_Wire> &wires);
+
+  /**
+   * Project the point @p origin on the topological shape given by @p
+   * in_shape, and returns the projected point, the subshape which contains
+   * the point and the parametric u and v coordinates of the point within the
+   * resulting shape. If the shape is not elementary, all its subshapes are
+   * iterated, faces first, then edges, and the returned shape is the closest
+   * one to the point @p origin. If the returned shape is an edge, then only
+   * the u coordinate is filled with sensible information, and the v
+   * coordinate is set to zero.
+   *
+   * This function returns a tuple containing the projected point, the shape,
+   * the u coordinate and the v coordinate (which is different from zero only
+   * if the resulting shape is a face).
+   */
+  std_cxx11::tuple<Point<3>, TopoDS_Shape, double, double>
+  project_point_and_pull_back(const TopoDS_Shape &in_shape,
+                              const Point<3> &origin,
+                              const double tolerance=1e-7);
+
+  /**
+   * Return the projection of the point @p origin on the topological shape
+   * given by @p in_shape. If the shape is not elementary, all its subshapes
+   * are iterated, faces first, then edges, and the returned point is the
+   * closest one to the @p in_shape, regardless of its type.
+   */
+  Point<3> closest_point(const TopoDS_Shape &in_shape,
+                         const Point<3> &origin,
+                         const double tolerance=1e-7);
+
+  /**
+   * Given an elementary shape @p in_shape and the reference coordinates
+   * within the shape, returns the corresponding point in real space. If the
+   * shape is a TopoDS_Edge, the @p v coordinate is ignored. Only edges or
+   * faces, as returned by the function project_point_and_pull_back(), can be
+   * used as input to this function. If this is not the case, an Exception is
+   * thrown.
+   */
+  Point<3> push_forward(const TopoDS_Shape &in_shape,
+                        const double u,
+                        const double v);
+
+
+  /**
+   * Given a TopoDS_Face @p face and the reference coordinates within this
+   * face, returns the corresponding point in real space, the normal to the
+   * surface at that point and the mean curvature as a tuple.
+   */
+  std_cxx11::tuple<Point<3>, Point<3>, double >
+  push_forward_and_differential_forms(const TopoDS_Face &face,
+                                      const double u,
+                                      const double v,
+                                      const double tolerance=1e-7);
+
+
+  /**
+   * Get the closest point to the given topological shape, together with the
+   * normal and the mean curvature at that point. If the shape is not
+   * elementary, all its sub-faces (only the faces) are iterated, faces first,
+   * and only the closest point is returned. This function will throw an
+   * exception if the @p in_shape does not contain at least one face.
+   */
+  std_cxx11::tuple<Point<3>, Point<3>, double>
+  closest_point_and_differential_forms(const TopoDS_Shape &in_shape,
+                                       const Point<3> &origin,
+                                       const double tolerance=1e-7);
+
+
+  /**
+   * Intersect a line passing through the given @p origin point along @p
+   * direction and the given topological shape. If there is more than one
+   * intersection, it will return the closest one.
+   *
+   * The optional @p tolerance parameter is used to compute distances.
+   */
+  Point<3> line_intersection(const TopoDS_Shape &in_shape,
+                             const Point<3> &origin,
+                             const Tensor<1,3> &direction,
+                             const double tolerance=1e-7);
+
+
+  /**
+   * Convert OpenCASCADE point into a Point<3>.
+   */
+  Point<3> point(const gp_Pnt &p);
+
+
+  /**
+   * Convert Point<3> into OpenCASCADE point.
+   */
+  gp_Pnt point(const Point<3> &p);
+
+
+  /**
+   * Sort two points according to their scalar product with direction. If the
+   * norm of the direction is zero, then use lexicographical ordering. The
+   * optional parameter is used as a relative tolerance when comparing
+   * objects.
+   */
+  bool point_compare(const Point<3> &p1, const Point<3> &p2,
+                     const Tensor<1,3> &direction=Tensor<1,3>(),
+                     const double tolerance=1e-10);
+
+
+  /**
+   * Exception thrown when the point specified as argument does not lie
+   * between @p tolerance from the given TopoDS_Shape.
+   */
+  DeclException1 (ExcPointNotOnManifold,
+                  Point<3>,
+                  <<"The point [ "<<arg1<<" ] is not on the manifold.");
+
+  /**
+   * Exception thrown when the point specified as argument cannot be projected
+   * to the manifold.
+   */
+  DeclException1 (ExcProjectionFailed,
+                  Point<3>,
+                  <<"Projection of point [ "<< arg1
+                  << " ] failed.");
+
+  /**
+   * Thrown when internal OpenCASCADE utilities fail to return the OK status.
+   */
+  DeclException1 (ExcOCCError,
+                  IFSelect_ReturnStatus,
+                  <<"An OpenCASCADE routine failed with return status "
+                  <<arg1);
+
+  /**
+   * Trying to make curve operations on a degenerate edge.
+   */
+  DeclException0(ExcEdgeIsDegenerate);
+
+  /**
+   * Trying to make operations on the wrong type of shapes.
+   */
+  DeclException0(ExcUnsupportedShape);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_OPENCASCADE
+
+/*------------------------------ occ_utilities.h ------------------------------*/
+#endif
+/*------------------------------ occ_utilities.h ------------------------------*/
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
new file mode 100644
index 0000000..b7bb501
--- /dev/null
+++ b/source/CMakeLists.txt
@@ -0,0 +1,144 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+MESSAGE(STATUS "Setting up library")
+
+#
+# Compile the deal.II library
+#
+
+INCLUDE_DIRECTORIES(
+  ${CMAKE_BINARY_DIR}/include/
+  ${CMAKE_SOURCE_DIR}/include/
+  ${DEAL_II_BUNDLED_INCLUDE_DIRS}
+  ${DEAL_II_INCLUDE_DIRS}
+  )
+
+#
+# List the directories where we have source files. the ones with the longest
+# compile jobs come first so that 'make -j N' saturates many processors also
+# towards the end of compiling rather than having to wait for one long
+# compilation that, because it has been listed last, is started towards the
+# end of everything (e.g. numerics/vectors.cc takes several minutes to
+# compile...)
+#
+ADD_SUBDIRECTORY(numerics)
+ADD_SUBDIRECTORY(fe)
+ADD_SUBDIRECTORY(dofs)
+ADD_SUBDIRECTORY(lac)
+ADD_SUBDIRECTORY(base)
+ADD_SUBDIRECTORY(grid)
+ADD_SUBDIRECTORY(hp)
+ADD_SUBDIRECTORY(multigrid)
+ADD_SUBDIRECTORY(distributed)
+ADD_SUBDIRECTORY(algorithms)
+ADD_SUBDIRECTORY(integrators)
+ADD_SUBDIRECTORY(matrix_free)
+ADD_SUBDIRECTORY(meshworker)
+ADD_SUBDIRECTORY(opencascade)
+
+FOREACH(build ${DEAL_II_BUILD_TYPES})
+  STRING(TOLOWER ${build} build_lowercase)
+
+  #
+  # Combine all ${build} OBJECT targets to a ${build} library:
+  #
+
+  GET_PROPERTY(_objects GLOBAL PROPERTY DEAL_II_OBJECTS_${build})
+  ADD_LIBRARY(${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX}
+    dummy.cc # Workaround for a bug in the Xcode generator
+    ${_objects}
+    )
+  ADD_DEPENDENCIES(library ${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX})
+
+  SET_TARGET_PROPERTIES(${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX}
+    PROPERTIES
+    VERSION "${DEAL_II_PACKAGE_VERSION}"
+    #
+    # Sonaming: Well... we just use the version number.
+    # No point to wrack one's brain over the question whether a new version of
+    # a C++ library is still ABI backwards compatible :-]
+    #
+    SOVERSION "${DEAL_II_PACKAGE_VERSION}"
+    LINK_FLAGS "${DEAL_II_LINKER_FLAGS} ${DEAL_II_LINKER_FLAGS_${build}}"
+    LINKER_LANGUAGE "CXX"
+    COMPILE_DEFINITIONS "${DEAL_II_DEFINITIONS};${DEAL_II_DEFINITIONS_${build}}"
+    COMPILE_FLAGS "${DEAL_II_CXX_FLAGS} ${DEAL_II_CXX_FLAGS_${build}}"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${DEAL_II_LIBRARY_RELDIR}"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${DEAL_II_LIBRARY_RELDIR}"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${DEAL_II_EXECUTABLE_RELDIR}"
+    )
+
+  IF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+    SET_TARGET_PROPERTIES(${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX}
+      PROPERTIES
+      MACOSX_RPATH OFF
+      BUILD_WITH_INSTALL_RPATH OFF
+      INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/${DEAL_II_LIBRARY_RELDIR}"
+      )
+  ENDIF()
+
+  # Under Windows (MSVC) cmake will always generate multi-configuration
+  # projects. When building on the command line with 'cmake --build .',
+  # release and debug builds of the library are done with the default 'Debug'
+  # configuration. This causes the debug and release .lib to be built inside
+  # ./lib/Debug/. This is not very pretty and confuses example/test projects,
+  # so we just hard-wire the location here. We only really need to set static
+  # lib locations for _DEBUG (no support for dynamic linking, _RELEASE will be
+  # ignored), but we do it anyhow.
+  IF (DEAL_II_MSVC)
+    SET_PROPERTY(TARGET ${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX} PROPERTY
+      LIBRARY_OUTPUT_DIRECTORY_DEBUG "${CMAKE_BINARY_DIR}/${DEAL_II_LIBRARY_RELDIR}"
+      )
+    SET_PROPERTY(TARGET ${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX} PROPERTY
+      ARCHIVE_OUTPUT_DIRECTORY_DEBUG "${CMAKE_BINARY_DIR}/${DEAL_II_LIBRARY_RELDIR}"
+      )
+    SET_PROPERTY(TARGET ${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX} PROPERTY
+      LIBRARY_OUTPUT_DIRECTORY_RELEASE "${CMAKE_BINARY_DIR}/${DEAL_II_LIBRARY_RELDIR}"
+      )
+    SET_PROPERTY(TARGET ${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX} PROPERTY
+      ARCHIVE_OUTPUT_DIRECTORY_RELEASE "${CMAKE_BINARY_DIR}/${DEAL_II_LIBRARY_RELDIR}"
+      )
+  ENDIF()
+
+
+  TARGET_LINK_LIBRARIES(${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX}
+    ${DEAL_II_LIBRARIES_${build}}
+    ${DEAL_II_LIBRARIES}
+    )
+
+  FILE(MAKE_DIRECTORY
+    ${CMAKE_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_RELDIR}
+    )
+  EXPORT(TARGETS ${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX}
+    FILE ${CMAKE_BINARY_DIR}/${DEAL_II_PROJECT_CONFIG_RELDIR}/${DEAL_II_PROJECT_CONFIG_NAME}Targets.cmake
+    APPEND
+    )
+
+  INSTALL(TARGETS ${DEAL_II_BASE_NAME}${DEAL_II_${build}_SUFFIX}
+    COMPONENT library
+    EXPORT ${DEAL_II_PROJECT_CONFIG_NAME}Targets
+    RUNTIME DESTINATION ${DEAL_II_EXECUTABLE_RELDIR}
+    LIBRARY DESTINATION ${DEAL_II_LIBRARY_RELDIR}
+    ARCHIVE DESTINATION ${DEAL_II_LIBRARY_RELDIR}
+    )
+ENDFOREACH()
+
+INSTALL(EXPORT ${DEAL_II_PROJECT_CONFIG_NAME}Targets
+  DESTINATION ${DEAL_II_PROJECT_CONFIG_RELDIR}
+  COMPONENT library
+  )
+
+MESSAGE(STATUS "Setting up library - Done")
diff --git a/source/algorithms/CMakeLists.txt b/source/algorithms/CMakeLists.txt
new file mode 100644
index 0000000..8e5c633
--- /dev/null
+++ b/source/algorithms/CMakeLists.txt
@@ -0,0 +1,32 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  operator.cc
+  timestep_control.cc
+  )
+
+SET(_inst
+  operator.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/algorithms/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_algorithms OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_algorithms "${_inst}")
diff --git a/source/algorithms/operator.cc b/source/algorithms/operator.cc
new file mode 100644
index 0000000..475f6ea
--- /dev/null
+++ b/source/algorithms/operator.cc
@@ -0,0 +1,57 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector_memory.h>
+
+#include <deal.II/algorithms/operator.templates.h>
+#include <deal.II/algorithms/newton.templates.h>
+#include <deal.II/algorithms/theta_timestepping.templates.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+  OperatorBase::~OperatorBase()
+  {}
+
+  void OperatorBase::notify(const Event &e)
+  {
+    notifications += e;
+  }
+
+  void
+  OperatorBase::clear_events ()
+  {
+    notifications.clear();
+  }
+
+
+#include "operator.inst"
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/algorithms/operator.inst.in b/source/algorithms/operator.inst.in
new file mode 100644
index 0000000..d036004
--- /dev/null
+++ b/source/algorithms/operator.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS)
+{  
+  template class OutputOperator<VEC>;
+  template class Newton<VEC>;
+  template class ThetaTimestepping<VEC>;
+}
diff --git a/source/algorithms/timestep_control.cc b/source/algorithms/timestep_control.cc
new file mode 100644
index 0000000..7f261b6
--- /dev/null
+++ b/source/algorithms/timestep_control.cc
@@ -0,0 +1,144 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/algorithms/timestep_control.h>
+#include <deal.II/base/parameter_handler.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+using namespace Algorithms;
+
+TimestepControl::TimestepControl (double start,
+                                  double final,
+                                  double tolerance,
+                                  double start_step,
+                                  double print_step,
+                                  double max_step)
+  : start_val(start),
+    final_val(final),
+    tolerance_val(tolerance),
+    strategy_val(uniform),
+    start_step_val(start_step),
+    max_step_val(max_step),
+    min_step_val(0),
+    current_step_val(start_step),
+    step_val(start_step),
+    print_step(print_step)
+{
+  now_val = start_val;
+  strcpy(format, "T.%06.3f");
+
+  // avoid compiler warning
+  (void)min_step_val;
+}
+
+
+
+void TimestepControl::declare_parameters (ParameterHandler &param)
+{
+  param.declare_entry ("Start", "0.", Patterns::Double());
+  param.declare_entry ("Final", "1.", Patterns::Double());
+  param.declare_entry ("First step", "1.e-2", Patterns::Double());
+  param.declare_entry ("Max step", "1.", Patterns::Double());
+  param.declare_entry ("Tolerance", "1.e-2", Patterns::Double());
+  param.declare_entry ("Print step", "-1.", Patterns::Double());
+  param.declare_entry ("Strategy", "uniform",
+                       Patterns::Selection("uniform|doubling"));
+}
+
+
+
+
+void TimestepControl::parse_parameters (ParameterHandler &param)
+{
+  start (param.get_double ("Start"));
+  start_step (param.get_double ("First step"));
+  max_step (param.get_double ("Max step"));
+  final (param.get_double ("Final"));
+  tolerance (param.get_double ("Tolerance"));
+  print_step = param.get_double ("Print step");
+  const std::string strat = param.get("Strategy");
+  if (strat == std::string("uniform"))
+    strategy_val = uniform;
+  else if (strat == std::string("doubling"))
+    strategy_val = doubling;
+}
+
+
+
+
+bool
+TimestepControl::advance ()
+{
+  bool changed = false;
+  double s = step_val;
+
+  // Do time step control, but not in
+  // first step.
+  if (now_val != start())
+    {
+      if (strategy_val == doubling && 2*s <= tolerance_val)
+        s *= 2;
+      if (s > max_step_val)
+        s = max_step_val;
+    }
+
+  // Try incrementing time by s
+  double h = now_val + s;
+  changed = s != step_val;
+
+  step_val = s;
+  current_step_val = s;
+  // If we just missed the final
+  // time, increase the step size a
+  // bit. This way, we avoid a very
+  // small final step. If the step
+  // shot over the final time, adjust
+  // it so we hit the final time
+  // exactly.
+  double s1 = .01*s;
+  if (h > final_val-s1)
+    {
+      current_step_val = final_val - now_val;
+      h = final_val;
+      changed = true;
+    }
+
+  now_val = h;
+  return changed;
+}
+
+
+bool TimestepControl::print ()
+{
+  if (print_step == 0.)
+    return false;
+  if (print_step < 0.)
+    return true;
+
+  bool result = (now_val >= next_print_val);
+
+  if (result)
+    {
+      next_print_val += print_step;
+      if (next_print_val > final_val)
+        next_print_val = final_val;
+    }
+  return result;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/base/CMakeLists.txt b/source/base/CMakeLists.txt
new file mode 100644
index 0000000..d780f23
--- /dev/null
+++ b/source/base/CMakeLists.txt
@@ -0,0 +1,88 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  auto_derivative_function.cc
+  conditional_ostream.cc
+  config.cc
+  convergence_table.cc
+  data_out_base.cc
+  event.cc
+  exceptions.cc
+  flow_function.cc
+  function.cc
+  function_derivative.cc
+  function_lib.cc
+  function_lib_cutoff.cc
+  function_parser.cc
+  function_time.cc
+  geometry_info.cc
+  index_set.cc
+  job_identifier.cc
+  logstream.cc
+  mpi.cc
+  multithread_info.cc
+  named_selection.cc
+  parallel.cc
+  parameter_handler.cc
+  parsed_function.cc
+  partitioner.cc
+  path_search.cc
+  polynomial.cc
+  polynomials_abf.cc
+  polynomials_adini.cc
+  polynomials_bernstein.cc
+  polynomials_bdm.cc
+  polynomials_nedelec.cc
+  polynomial_space.cc
+  polynomials_p.cc
+  polynomials_piecewise.cc
+  polynomials_rannacher_turek.cc
+  polynomials_raviart_thomas.cc
+  quadrature.cc
+  quadrature_lib.cc
+  quadrature_selector.cc
+  subscriptor.cc
+  symmetric_tensor.cc
+  table_handler.cc
+  tensor_function.cc
+  tensor_product_polynomials.cc
+  tensor_product_polynomials_bubbles.cc
+  tensor_product_polynomials_const.cc
+  thread_management.cc
+  timer.cc
+  time_stepping.cc
+  utilities.cc
+  )
+
+SET(_inst
+  data_out_base.inst.in
+  function.inst.in
+  function_time.inst.in
+  polynomials_rannacher_turek.inst.in
+  tensor_function.inst.in
+  time_stepping.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/base/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_base OBJECT ${_src} ${_header} ${_inst}
+  ${CMAKE_BINARY_DIR}/include/deal.II/base/config.h
+  )
+EXPAND_INSTANTIATIONS(obj_base "${_inst}")
diff --git a/source/base/auto_derivative_function.cc b/source/base/auto_derivative_function.cc
new file mode 100644
index 0000000..5b26745
--- /dev/null
+++ b/source/base/auto_derivative_function.cc
@@ -0,0 +1,347 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/point.h>
+#include <deal.II/base/auto_derivative_function.h>
+#include <deal.II/lac/vector.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim>
+AutoDerivativeFunction<dim>::
+AutoDerivativeFunction (const double hh,
+                        const unsigned int n_components,
+                        const double       initial_time)
+  :
+  Function<dim>(n_components, initial_time),
+  h(1),
+  ht(dim),
+  formula(Euler)
+{
+  set_h(hh);
+  set_formula();
+}
+
+
+template <int dim>
+AutoDerivativeFunction<dim>::~AutoDerivativeFunction ()
+{}
+
+
+
+template <int dim>
+void
+AutoDerivativeFunction<dim>::set_formula (const DifferenceFormula form)
+{
+  formula = form;
+}
+
+
+template <int dim>
+void
+AutoDerivativeFunction<dim>::set_h (const double hh)
+{
+  h=hh;
+  for (unsigned int i=0; i<dim; ++i)
+    ht[i][i]=h;
+}
+
+
+template <int dim>
+Tensor<1,dim>
+AutoDerivativeFunction<dim>::gradient (const Point<dim>   &p,
+                                       const unsigned int  comp) const
+{
+  Tensor<1,dim> grad;
+  switch (formula)
+    {
+    case UpwindEuler:
+    {
+      Point<dim> q1;
+      for (unsigned int i=0; i<dim; ++i)
+        {
+          q1=p-ht[i];
+          grad[i]=(this->value(p, comp)-this->value(q1, comp))/h;
+        }
+      break;
+    }
+    case Euler:
+    {
+      Point<dim> q1, q2;
+      for (unsigned int i=0; i<dim; ++i)
+        {
+          q1=p+ht[i];
+          q2=p-ht[i];
+          grad[i]=(this->value(q1, comp)-this->value(q2, comp))/(2*h);
+        }
+      break;
+    }
+    case FourthOrder:
+    {
+      Point<dim> q1, q2, q3, q4;
+      for (unsigned int i=0; i<dim; ++i)
+        {
+          q2=p+ht[i];
+          q1=q2+ht[i];
+          q3=p-ht[i];
+          q4=q3-ht[i];
+          grad[i]=(-  this->value(q1, comp)
+                   +8*this->value(q2, comp)
+                   -8*this->value(q3, comp)
+                   +  this->value(q4, comp))/(12*h);
+
+        }
+      break;
+    }
+    default:
+      Assert(false, ExcInvalidFormula());
+    }
+  return grad;
+}
+
+
+template <int dim>
+void
+AutoDerivativeFunction<dim>::
+vector_gradient (const Point<dim>            &p,
+                 std::vector<Tensor<1,dim> > &gradients) const
+{
+  Assert (gradients.size() == this->n_components,
+          ExcDimensionMismatch(gradients.size(), this->n_components));
+
+  switch (formula)
+    {
+    case UpwindEuler:
+    {
+      Point<dim> q1;
+      Vector<double> v(this->n_components), v1(this->n_components);
+      const double h_inv=1./h;
+      for (unsigned int i=0; i<dim; ++i)
+        {
+          q1=p-ht[i];
+          this->vector_value(p, v);
+          this->vector_value(q1, v1);
+
+          for (unsigned int comp=0; comp<this->n_components; ++comp)
+            gradients[comp][i]=(v(comp)-v1(comp))*h_inv;
+        }
+      break;
+    }
+
+    case Euler:
+    {
+      Point<dim> q1, q2;
+      Vector<double> v1(this->n_components), v2(this->n_components);
+      const double h_inv_2=1./(2*h);
+      for (unsigned int i=0; i<dim; ++i)
+        {
+          q1=p+ht[i];
+          q2=p-ht[i];
+          this->vector_value(q1, v1);
+          this->vector_value(q2, v2);
+
+          for (unsigned int comp=0; comp<this->n_components; ++comp)
+            gradients[comp][i]=(v1(comp)-v2(comp))*h_inv_2;
+        }
+      break;
+    }
+
+    case FourthOrder:
+    {
+      Point<dim> q1, q2, q3, q4;
+      Vector<double>
+      v1(this->n_components), v2(this->n_components),
+      v3(this->n_components), v4(this->n_components);
+      const double h_inv_12=1./(12*h);
+      for (unsigned int i=0; i<dim; ++i)
+        {
+          q2=p+ht[i];
+          q1=q2+ht[i];
+          q3=p-ht[i];
+          q4=q3-ht[i];
+          this->vector_value(q1, v1);
+          this->vector_value(q2, v2);
+          this->vector_value(q3, v3);
+          this->vector_value(q4, v4);
+
+          for (unsigned int comp=0; comp<this->n_components; ++comp)
+            gradients[comp][i]=(-v1(comp)+8*v2(comp)-8*v3(comp)+v4(comp))*h_inv_12;
+        }
+      break;
+    }
+
+    default:
+      Assert(false, ExcInvalidFormula());
+    }
+}
+
+
+template <int dim>
+void
+AutoDerivativeFunction<dim>::
+gradient_list (const std::vector<Point<dim> > &points,
+               std::vector<Tensor<1,dim> >    &gradients,
+               const unsigned int              comp) const
+{
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+
+  switch (formula)
+    {
+    case UpwindEuler:
+    {
+      Point<dim> q1;
+      for (unsigned int p=0; p<points.size(); ++p)
+        for (unsigned int i=0; i<dim; ++i)
+          {
+            q1=points[p]-ht[i];
+            gradients[p][i]=(this->value(points[p], comp)-this->value(q1, comp))/h;
+          }
+      break;
+    }
+
+    case Euler:
+    {
+      Point<dim> q1, q2;
+      for (unsigned int p=0; p<points.size(); ++p)
+        for (unsigned int i=0; i<dim; ++i)
+          {
+            q1=points[p]+ht[i];
+            q2=points[p]-ht[i];
+            gradients[p][i]=(this->value(q1, comp)-this->value(q2, comp))/(2*h);
+          }
+      break;
+    }
+
+    case FourthOrder:
+    {
+      Point<dim> q1, q2, q3, q4;
+      for (unsigned int p=0; p<points.size(); ++p)
+        for (unsigned int i=0; i<dim; ++i)
+          {
+            q2=points[p]+ht[i];
+            q1=q2+ht[i];
+            q3=points[p]-ht[i];
+            q4=q3-ht[i];
+            gradients[p][i]=(-  this->value(q1, comp)
+                             +8*this->value(q2, comp)
+                             -8*this->value(q3, comp)
+                             +  this->value(q4, comp))/(12*h);
+          }
+      break;
+    }
+
+    default:
+      Assert(false, ExcInvalidFormula());
+    }
+}
+
+
+
+template <int dim>
+void
+AutoDerivativeFunction<dim>::
+vector_gradient_list (const std::vector<Point<dim> >            &points,
+                      std::vector<std::vector<Tensor<1,dim> > > &gradients) const
+{
+  Assert (gradients.size() == points.size(),
+          ExcDimensionMismatch(gradients.size(), points.size()));
+  for (unsigned int p=0; p<points.size(); ++p)
+    Assert (gradients[p].size() == this->n_components,
+            ExcDimensionMismatch(gradients.size(), this->n_components));
+
+  switch (formula)
+    {
+    case UpwindEuler:
+    {
+      Point<dim> q1;
+      for (unsigned int p=0; p<points.size(); ++p)
+        for (unsigned int i=0; i<dim; ++i)
+          {
+            q1=points[p]-ht[i];
+            for (unsigned int comp=0; comp<this->n_components; ++comp)
+              gradients[p][comp][i]=(this->value(points[p], comp)-this->value(q1, comp))/h;
+          }
+      break;
+    }
+
+    case Euler:
+    {
+      Point<dim> q1, q2;
+      for (unsigned int p=0; p<points.size(); ++p)
+        for (unsigned int i=0; i<dim; ++i)
+          {
+            q1=points[p]+ht[i];
+            q2=points[p]-ht[i];
+            for (unsigned int comp=0; comp<this->n_components; ++comp)
+              gradients[p][comp][i]=(this->value(q1, comp) -
+                                     this->value(q2, comp))/(2*h);
+          }
+      break;
+    }
+
+    case FourthOrder:
+    {
+      Point<dim> q1, q2, q3, q4;
+      for (unsigned int p=0; p<points.size(); ++p)
+        for (unsigned int i=0; i<dim; ++i)
+          {
+            q2=points[p]+ht[i];
+            q1=q2+ht[i];
+            q3=points[p]-ht[i];
+            q4=q3-ht[i];
+            for (unsigned int comp=0; comp<this->n_components; ++comp)
+              gradients[p][comp][i]=(-  this->value(q1, comp)
+                                     +8*this->value(q2, comp)
+                                     -8*this->value(q3, comp)
+                                     +  this->value(q4, comp))/(12*h);
+          }
+      break;
+    }
+
+    default:
+      Assert(false, ExcInvalidFormula());
+    }
+}
+
+
+template <int dim>
+typename AutoDerivativeFunction<dim>::DifferenceFormula
+AutoDerivativeFunction<dim>::get_formula_of_order(const unsigned int ord)
+{
+  switch (ord)
+    {
+    case 0:
+    case 1:
+      return UpwindEuler;
+    case 2:
+      return Euler;
+    case 3:
+    case 4:
+      return FourthOrder;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+  return Euler;
+}
+
+
+template class AutoDerivativeFunction<1>;
+template class AutoDerivativeFunction<2>;
+template class AutoDerivativeFunction<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/conditional_ostream.cc b/source/base/conditional_ostream.cc
new file mode 100644
index 0000000..1afb6d9
--- /dev/null
+++ b/source/base/conditional_ostream.cc
@@ -0,0 +1,40 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/conditional_ostream.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+ConditionalOStream::ConditionalOStream(std::ostream &stream,
+                                       const bool    active)
+  :
+  output_stream (stream),
+  active_flag(active)
+{}
+
+
+void ConditionalOStream::set_condition(bool flag)
+{
+  active_flag = flag;
+}
+
+
+bool ConditionalOStream::is_active() const
+{
+  return active_flag;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/config.cc b/source/base/config.cc
new file mode 100644
index 0000000..983eef3
--- /dev/null
+++ b/source/base/config.cc
@@ -0,0 +1,41 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/config.h>
+#include <cmath>
+#include <complex>
+#include <limits>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace numbers
+{
+  template <typename number>
+  const bool NumberTraits<number>::is_complex;
+
+  template <typename number>
+  const bool NumberTraits<std::complex<number> >::is_complex;
+
+// explicit instantiations
+  template struct NumberTraits<double>;
+  template struct NumberTraits<float>;
+  template struct NumberTraits<long double>;
+
+  template struct NumberTraits<std::complex<double> >;
+  template struct NumberTraits<std::complex<float> >;
+  template struct NumberTraits<std::complex<long double> >;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/convergence_table.cc b/source/base/convergence_table.cc
new file mode 100644
index 0000000..b61b24b
--- /dev/null
+++ b/source/base/convergence_table.cc
@@ -0,0 +1,247 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/convergence_table.h>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+ConvergenceTable::ConvergenceTable()
+{}
+
+
+void ConvergenceTable::evaluate_convergence_rates(const std::string &data_column_key,
+                                                  const std::string &reference_column_key,
+                                                  const RateMode     rate_mode,
+                                                  const unsigned int dim)
+{
+  Assert(columns.count(data_column_key),
+         ExcColumnNotExistent(data_column_key));
+  Assert(columns.count(reference_column_key),
+         ExcColumnNotExistent(reference_column_key));
+
+  if (rate_mode == none)
+    return;
+
+  // reset the auto fill mode flag since we are going to fill columns from
+  // the top that don't yet exist
+  set_auto_fill_mode (false);
+
+  std::vector<internal::TableEntry> &entries=columns[data_column_key].entries;
+  std::vector<internal::TableEntry> &ref_entries=columns[reference_column_key].entries;
+  std::string rate_key = data_column_key+"...";
+
+  const unsigned int n = entries.size();
+  const unsigned int n_ref = ref_entries.size();
+  Assert(n == n_ref, ExcDimensionMismatch(n, n_ref));
+
+  std::vector<double> values(n);
+  std::vector<double> ref_values(n_ref);
+
+  for (unsigned int i=0; i<n; ++i)
+    {
+      values[i]     = entries[i].get_numeric_value();
+      ref_values[i] = ref_entries[i].get_numeric_value();
+    }
+
+  unsigned int no_rate_entries = 0;
+
+  switch (rate_mode)
+    {
+    case none:
+      break;
+    case reduction_rate:
+      rate_key += "red.rate";
+      no_rate_entries = columns[rate_key].entries.size();
+      // Calculate all missing rate values:
+      for (unsigned int i = no_rate_entries; i<n; ++i)
+        {
+          if (i == 0)
+            {
+              // no value available for the first row
+              add_value(rate_key, std::string("-"));
+            }
+          else
+            {
+              add_value(rate_key, values[i-1]/values[i] *
+                        ref_values[i]/ref_values[i-1]);
+            }
+        }
+      break;
+    case reduction_rate_log2:
+      rate_key += "red.rate.log2";
+      no_rate_entries = columns[rate_key].entries.size();
+      // Calculate all missing rate values:
+      for (unsigned int i = no_rate_entries; i<n; ++i)
+        {
+          if (i == 0)
+            {
+              // no value available for the first row
+              add_value(rate_key, std::string("-"));
+            }
+          else
+            {
+              add_value(rate_key, dim*std::log(std::fabs(values[i-1]/values[i])) /
+                        std::log(std::fabs(ref_values[i]/ref_values[i-1])));
+            }
+        }
+      break;
+    default:
+      AssertThrow(false, ExcNotImplemented());
+    }
+
+  Assert(columns.count(rate_key), ExcInternalError());
+  columns[rate_key].flag = 1;
+  set_precision(rate_key, 2);
+
+  std::string superkey = data_column_key;
+  if (!supercolumns.count(superkey))
+    {
+      add_column_to_supercolumn(data_column_key, superkey);
+      set_tex_supercaption(superkey, columns[data_column_key].tex_caption);
+    }
+
+  // only add rate_key to the supercolumn once
+  if (no_rate_entries == 0)
+    {
+      add_column_to_supercolumn(rate_key, superkey);
+    }
+
+}
+
+
+
+void
+ConvergenceTable::evaluate_convergence_rates(const std::string &data_column_key,
+                                             const RateMode     rate_mode)
+{
+  Assert(columns.count(data_column_key), ExcColumnNotExistent(data_column_key));
+
+  // reset the auto fill mode flag since we are going to fill columns from
+  // the top that don't yet exist
+  set_auto_fill_mode (false);
+
+  std::vector<internal::TableEntry> &entries = columns[data_column_key].entries;
+  std::string rate_key=data_column_key+"...";
+
+  const unsigned int n=entries.size();
+
+  std::vector<double> values(n);
+  for (unsigned int i=0; i<n; ++i)
+    values[i] = entries[i].get_numeric_value();
+
+  unsigned int no_rate_entries = 0;
+
+  switch (rate_mode)
+    {
+    case none:
+      break;
+
+    case reduction_rate:
+      rate_key+="red.rate";
+      no_rate_entries = columns[rate_key].entries.size();
+      // Calculate all missing rate values:
+      for (unsigned int i = no_rate_entries; i<n; ++i)
+        {
+          if (i == 0)
+            {
+              // no value available for the first row
+              add_value(rate_key, std::string("-"));
+            }
+          else
+            {
+              add_value(rate_key, values[i-1]/values[i]);
+            }
+        }
+      break;
+
+    case reduction_rate_log2:
+      rate_key+="red.rate.log2";
+      no_rate_entries = columns[rate_key].entries.size();
+      // Calculate all missing rate values:
+      for (unsigned int i = no_rate_entries; i<n; ++i)
+        {
+          if (i == 0)
+            {
+              // no value available for the first row
+              add_value(rate_key, std::string("-"));
+            }
+          else
+            {
+              add_value(rate_key, std::log(std::fabs(values[i-1]/values[i]))/std::log(2.0));
+            }
+        }
+      break;
+
+    default:
+      AssertThrow(false, ExcNotImplemented());
+    }
+
+  Assert(columns.count(rate_key), ExcInternalError());
+  columns[rate_key].flag=1;
+  set_precision(rate_key, 2);
+
+  // set the superkey equal to the key
+  std::string superkey=data_column_key;
+  // and set the tex caption of the supercolumn to the tex caption of the
+  // data_column.
+  if (!supercolumns.count(superkey))
+    {
+      add_column_to_supercolumn(data_column_key, superkey);
+      set_tex_supercaption(superkey, columns[data_column_key].tex_caption);
+    }
+
+  // only add rate_key to the supercolumn once
+  if (no_rate_entries == 0)
+    {
+      add_column_to_supercolumn(rate_key, superkey);
+    }
+}
+
+
+
+void
+ConvergenceTable::omit_column_from_convergence_rate_evaluation(const std::string &key)
+{
+  Assert(columns.count(key), ExcColumnNotExistent(key));
+
+  const std::map<std::string, Column>::iterator col_iter=columns.find(key);
+  col_iter->second.flag=1;
+}
+
+
+
+void
+ConvergenceTable::evaluate_all_convergence_rates(const std::string &reference_column_key,
+                                                 const RateMode rate_mode)
+{
+  for (std::map<std::string, Column>::const_iterator col_iter=columns.begin();
+       col_iter!=columns.end(); ++col_iter)
+    if (!col_iter->second.flag)
+      evaluate_convergence_rates(col_iter->first, reference_column_key, rate_mode);
+}
+
+
+
+void
+ConvergenceTable::evaluate_all_convergence_rates(const RateMode rate_mode)
+{
+  for (std::map<std::string, Column>::const_iterator col_iter=columns.begin();
+       col_iter!=columns.end(); ++col_iter)
+    if (!col_iter->second.flag)
+      evaluate_convergence_rates(col_iter->first, rate_mode);
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/data_out_base.cc b/source/base/data_out_base.cc
new file mode 100644
index 0000000..8e5a78b
--- /dev/null
+++ b/source/base/data_out_base.cc
@@ -0,0 +1,7512 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+//TODO: Do neighbors for dx and povray smooth triangles
+
+//////////////////////////////////////////////////////////////////////
+// Remarks on the implementations
+//
+// Variable names: in most functions, variable names have been
+// standardized in the following way:
+//
+// n1, n2, ni Number of points in coordinate direction 1, 2, i
+//    will be 1 if i>=dim
+//
+// i1, i2, ii Loop variable running up to ni
+//
+// d1, d2, di Multiplicators for ii to find positions in the
+//    array of nodes.
+//////////////////////////////////////////////////////////////////////
+
+#include <deal.II/base/data_out_base.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+#include <deal.II/base/mpi.h>
+
+#include <cstring>
+#include <algorithm>
+#include <iomanip>
+#include <ctime>
+#include <cmath>
+#include <set>
+#include <sstream>
+#include <fstream>
+
+// we use uint32_t and uint8_t below, which are declared here:
+#include <stdint.h>
+
+#ifdef DEAL_II_WITH_ZLIB
+#  include <zlib.h>
+#endif
+
+#ifdef DEAL_II_WITH_HDF5
+#include <hdf5.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// we need the following exception from a global function, so can't declare it
+// in the usual way inside a class
+namespace
+{
+  DeclException2 (ExcUnexpectedInput,
+                  std::string, std::string,
+                  << "Unexpected input: expected line\n  <"
+                  << arg1
+                  << ">\nbut got\n  <"
+                  << arg2 << ">");
+}
+
+
+namespace
+{
+#ifdef DEAL_II_WITH_ZLIB
+  // the functions in this namespace are
+  // taken from the libb64 project, see
+  // http://sourceforge.net/projects/libb64
+  //
+  // libb64 has been placed in the public
+  // domain
+  namespace base64
+  {
+    typedef enum
+    {
+      step_A, step_B, step_C
+    } base64_encodestep;
+
+    typedef struct
+    {
+      base64_encodestep step;
+      char result;
+    } base64_encodestate;
+
+    void base64_init_encodestate(base64_encodestate *state_in)
+    {
+      state_in->step = step_A;
+      state_in->result = 0;
+    }
+
+    inline
+    char base64_encode_value(char value_in)
+    {
+      static const char *encoding
+        = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+      if (value_in > 63) return '=';
+      return encoding[(int)value_in];
+    }
+
+    int base64_encode_block(const char *plaintext_in,
+                            int length_in,
+                            char *code_out,
+                            base64_encodestate *state_in)
+    {
+      const char *plainchar = plaintext_in;
+      const char *const plaintextend = plaintext_in + length_in;
+      char *codechar = code_out;
+      char result;
+      char fragment;
+
+      result = state_in->result;
+
+      switch (state_in->step)
+        {
+          while (1)
+            {
+            case step_A:
+              if (plainchar == plaintextend)
+                {
+                  state_in->result = result;
+                  state_in->step = step_A;
+                  return codechar - code_out;
+                }
+              fragment = *plainchar++;
+              result = (fragment & 0x0fc) >> 2;
+              *codechar++ = base64_encode_value(result);
+              result = (fragment & 0x003) << 4;
+            case step_B:
+              if (plainchar == plaintextend)
+                {
+                  state_in->result = result;
+                  state_in->step = step_B;
+                  return codechar - code_out;
+                }
+              fragment = *plainchar++;
+              result |= (fragment & 0x0f0) >> 4;
+              *codechar++ = base64_encode_value(result);
+              result = (fragment & 0x00f) << 2;
+            case step_C:
+              if (plainchar == plaintextend)
+                {
+                  state_in->result = result;
+                  state_in->step = step_C;
+                  return codechar - code_out;
+                }
+              fragment = *plainchar++;
+              result |= (fragment & 0x0c0) >> 6;
+              *codechar++ = base64_encode_value(result);
+              result  = (fragment & 0x03f) >> 0;
+              *codechar++ = base64_encode_value(result);
+            }
+        }
+      /* control should not reach here */
+      return codechar - code_out;
+    }
+
+    int base64_encode_blockend(char *code_out, base64_encodestate *state_in)
+    {
+      char *codechar = code_out;
+
+      switch (state_in->step)
+        {
+        case step_B:
+          *codechar++ = base64_encode_value(state_in->result);
+          *codechar++ = '=';
+          *codechar++ = '=';
+          break;
+        case step_C:
+          *codechar++ = base64_encode_value(state_in->result);
+          *codechar++ = '=';
+          break;
+        case step_A:
+          break;
+        }
+      *codechar++ = '\0';
+
+      return codechar - code_out;
+    }
+  }
+
+
+  /**
+   * Do a base64 encoding of the given data.
+   *
+   * The function allocates memory as
+   * necessary and returns a pointer to
+   * it. The calling function must release
+   * this memory again.
+   */
+  char *
+  encode_block (const char *data,
+                const int   data_size)
+  {
+    base64::base64_encodestate state;
+    base64::base64_init_encodestate(&state);
+
+    char *encoded_data = new char[2*data_size+1];
+
+    const int encoded_length_data
+      = base64::base64_encode_block (data, data_size,
+                                     encoded_data, &state);
+    base64::base64_encode_blockend (encoded_data + encoded_length_data,
+                                    &state);
+
+    return encoded_data;
+  }
+
+
+  /**
+   * Convert between the enum specified inside VtkFlags and the preprocessor
+   * constant defined by zlib.
+   */
+  int get_zlib_compression_level(const DataOutBase::VtkFlags::ZlibCompressionLevel level)
+  {
+    switch (level)
+      {
+      case (DataOutBase::VtkFlags::no_compression):
+        return Z_NO_COMPRESSION;
+      case (DataOutBase::VtkFlags::best_speed):
+        return Z_BEST_SPEED;
+      case (DataOutBase::VtkFlags::best_compression):
+        return Z_BEST_COMPRESSION;
+      case (DataOutBase::VtkFlags::default_compression):
+        return Z_DEFAULT_COMPRESSION;
+      default:
+        Assert(false, ExcNotImplemented());
+        return Z_NO_COMPRESSION;
+      }
+  }
+
+  /**
+   * Do a zlib compression followed
+   * by a base64 encoding of the
+   * given data. The result is then
+   * written to the given stream.
+   */
+  template <typename T>
+  void write_compressed_block (const std::vector<T>        &data,
+                               const DataOutBase::VtkFlags &flags,
+                               std::ostream                &output_stream)
+  {
+    if (data.size() != 0)
+      {
+        // allocate a buffer for compressing
+        // data and do so
+        uLongf compressed_data_length
+          = compressBound (data.size() * sizeof(T));
+        char *compressed_data = new char[compressed_data_length];
+        int err = compress2 ((Bytef *) compressed_data,
+                             &compressed_data_length,
+                             (const Bytef *) &data[0],
+                             data.size() * sizeof(T),
+                             get_zlib_compression_level(flags.compression_level));
+        (void)err;
+        Assert (err == Z_OK, ExcInternalError());
+
+        // now encode the compression header
+        const uint32_t compression_header[4]
+          = { 1,                                   /* number of blocks */
+              (uint32_t)(data.size() * sizeof(T)), /* size of block */
+              (uint32_t)(data.size() * sizeof(T)), /* size of last block */
+              (uint32_t)compressed_data_length
+            }; /* list of compressed sizes of blocks */
+
+        char *encoded_header = encode_block ((char *)&compression_header[0],
+                                             4 * sizeof(compression_header[0]));
+        output_stream << encoded_header;
+        delete[] encoded_header;
+
+        // next do the compressed
+        // data encoding in base64
+        char *encoded_data = encode_block (compressed_data,
+                                           compressed_data_length);
+        delete[] compressed_data;
+
+        output_stream << encoded_data;
+        delete[] encoded_data;
+      }
+  }
+#endif
+}
+
+
+// some declarations of functions and locally used classes
+namespace DataOutBase
+{
+  namespace
+  {
+    /**
+     * Class holding the data of one cell of a patch in two space
+     * dimensions for output. It is the projection of a cell in
+     * three-dimensional space (two coordinates, one height value) to
+     * the direction of sight.
+     */
+    class SvgCell
+    {
+    public:
+
+      // Center of the cell (three-dimensional)
+      Point<3> center;
+
+      /**
+       * Vector of vertices of this cell (three-dimensional)
+       */
+      Point<3> vertices[4];
+
+      /**
+       * Depth into the picture, which is defined as the distance from
+       * an observer at an the origin in direction of the line of sight.
+       */
+      float depth;
+
+      /**
+       * Vector of vertices of this cell (projected, two-dimensional).
+       */
+      Point<2> projected_vertices[4];
+
+      // Center of the cell (projected, two-dimensional)
+      Point<2> projected_center;
+
+      /**
+       * Comparison operator for sorting.
+       */
+      bool operator < (const SvgCell &) const;
+    };
+
+    bool SvgCell::operator < (const SvgCell &e) const
+    {
+      // note the "wrong" order in
+      // which we sort the elements
+      return depth > e.depth;
+    }
+
+
+
+    /**
+     * Class holding the data of one cell of a patch in two space
+     * dimensions for output. It is the projection of a cell in
+     * three-dimensional space (two coordinates, one height value) to
+     * the direction of sight.
+     */
+    class EpsCell2d
+    {
+    public:
+
+      /**
+       * Vector of vertices of this cell.
+       */
+      Point<2> vertices[4];
+
+      /**
+       * Data value from which the actual colors will be computed by the
+       * colorization function stated in the <tt>EpsFlags</tt> class.
+       */
+      float color_value;
+
+      /**
+       * Depth into the picture, which is defined as the distance from
+       * an observer at an the origin in direction of the line of sight.
+       */
+      float depth;
+
+      /**
+       * Comparison operator for sorting.
+       */
+      bool operator < (const EpsCell2d &) const;
+    };
+
+
+    /**
+     * This is a helper function for the write_gmv() function. There,
+     * the data in the patches needs to be copied around as output is
+     * one variable globally at a time, rather than all data on each
+     * vertex at a time. This copying around can be done detached from
+     * the main thread, and is thus moved into this separate function.
+     *
+     * Note that because of the similarity of the formats, this function
+     * is also used by the Vtk and Tecplot output functions.
+     */
+    template <int dim, int spacedim>
+    void
+    write_gmv_reorder_data_vectors (const std::vector<Patch<dim,spacedim> > &patches,
+                                    Table<2,double>                         &data_vectors)
+    {
+      // unlike in the main function, we
+      // don't have here the data_names
+      // field, so we initialize it with
+      // the number of data sets in the
+      // first patch. the equivalence of
+      // these two definitions is checked
+      // in the main function.
+
+      // we have to take care, however, whether the
+      // points are appended to the end of the
+      // patch->data table
+      const unsigned int n_data_sets
+        =patches[0].points_are_available ? (patches[0].data.n_rows() - spacedim) : patches[0].data.n_rows();
+
+      Assert (data_vectors.size()[0] == n_data_sets,
+              ExcInternalError());
+
+      // loop over all patches
+      unsigned int next_value = 0;
+      for (typename std::vector<Patch<dim,spacedim> >::const_iterator patch=patches.begin();
+           patch != patches.end(); ++patch)
+        {
+          const unsigned int n_subdivisions = patch->n_subdivisions;
+          (void)n_subdivisions;
+
+          Assert ((patch->data.n_rows() == n_data_sets && !patch->points_are_available) ||
+                  (patch->data.n_rows() == n_data_sets+spacedim && patch->points_are_available),
+                  ExcDimensionMismatch (patch->points_are_available
+                                        ?
+                                        (n_data_sets + spacedim)
+                                        :
+                                        n_data_sets,
+                                        patch->data.n_rows()));
+          Assert ((n_data_sets == 0)
+                  ||
+                  (patch->data.n_cols() == Utilities::fixed_power<dim>(n_subdivisions+1)),
+                  ExcInvalidDatasetSize (patch->data.n_cols(), n_subdivisions+1));
+
+          for (unsigned int i=0; i<patch->data.n_cols(); ++i, ++next_value)
+            for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+              data_vectors[data_set][next_value] = patch->data(data_set,i);
+        }
+
+      for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+        Assert (data_vectors[data_set].size() == next_value,
+                ExcInternalError());
+    }
+  }
+}
+
+//----------------------------------------------------------------------//
+// DataOutFilter class member functions
+//----------------------------------------------------------------------//
+
+template<int dim>
+void DataOutBase::DataOutFilter::write_point(const unsigned int &index, const Point<dim> &p)
+{
+  Map3DPoint::const_iterator  it;
+  unsigned int      internal_ind;
+  Point<3>      int_pt;
+
+  for (int d=0; d<3; ++d) int_pt(d) = (d < dim ? p(d) : 0);
+  node_dim = dim;
+  it = existing_points.find(int_pt);
+
+  // If the point isn't in the set, or we're not filtering duplicate points, add it
+  if (it == existing_points.end() || !flags.filter_duplicate_vertices)
+    {
+      internal_ind = existing_points.size();
+      existing_points.insert(std::make_pair(int_pt, internal_ind));
+    }
+  else
+    {
+      internal_ind = it->second;
+    }
+  // Now add the index to the list of filtered points
+  filtered_points[index] = internal_ind;
+}
+
+void DataOutBase::DataOutFilter::internal_add_cell(const unsigned int &cell_index, const unsigned int &pt_index)
+{
+  filtered_cells[cell_index] = filtered_points[pt_index];
+}
+
+void DataOutBase::DataOutFilter::fill_node_data(std::vector<double> &node_data) const
+{
+  Map3DPoint::const_iterator  it;
+
+  node_data.resize(existing_points.size()*node_dim);
+
+  for (it=existing_points.begin(); it!=existing_points.end(); ++it)
+    {
+      for (int d=0; d<node_dim; ++d) node_data[node_dim*it->second+d] = it->first(d);
+    }
+}
+
+void DataOutBase::DataOutFilter::fill_cell_data(const unsigned int &local_node_offset, std::vector<unsigned int> &cell_data) const
+{
+  std::map<unsigned int, unsigned int>::const_iterator  it;
+
+  cell_data.resize(filtered_cells.size());
+
+  for (it=filtered_cells.begin(); it!=filtered_cells.end(); ++it)
+    {
+      cell_data[it->first] = it->second+local_node_offset;
+    }
+}
+
+template<int dim>
+void
+DataOutBase::DataOutFilter::write_cell(
+  unsigned int index,
+  unsigned int start,
+  unsigned int d1,
+  unsigned int d2,
+  unsigned int d3)
+{
+  unsigned int base_entry = index * GeometryInfo<dim>::vertices_per_cell;
+  n_cell_verts = GeometryInfo<dim>::vertices_per_cell;
+  internal_add_cell(base_entry+0, start);
+  internal_add_cell(base_entry+1, start+d1);
+  if (dim>=2)
+    {
+      internal_add_cell(base_entry+2, start+d2+d1);
+      internal_add_cell(base_entry+3, start+d2);
+      if (dim>=3)
+        {
+          internal_add_cell(base_entry+4, start+d3);
+          internal_add_cell(base_entry+5, start+d3+d1);
+          internal_add_cell(base_entry+6, start+d3+d2+d1);
+          internal_add_cell(base_entry+7, start+d3+d2);
+        }
+    }
+}
+
+void DataOutBase::DataOutFilter::write_data_set(const std::string &name, const unsigned int &dimension, const unsigned int &set_num, const Table<2,double> &data_vectors)
+{
+  unsigned int    num_verts = existing_points.size();
+  unsigned int    i, r, d, new_dim;
+
+  // HDF5/XDMF output only supports 1D or 3D output, so force rearrangement if needed
+  if (flags.xdmf_hdf5_output && dimension != 1) new_dim = 3;
+  else new_dim = dimension;
+
+  // Record the data set name, dimension, and allocate space for it
+  data_set_names.push_back(name);
+  data_set_dims.push_back(new_dim);
+  data_sets.push_back(std::vector<double>(new_dim*num_verts));
+
+  // TODO: averaging, min/max, etc for merged vertices
+  for (i=0; i<filtered_points.size(); ++i)
+    {
+      for (d=0; d<new_dim; ++d)
+        {
+          r = filtered_points[i];
+          if (d < dimension) data_sets.back()[r*new_dim+d] = data_vectors(set_num+d, i);
+          else data_sets.back()[r*new_dim+d] = 0;
+        }
+    }
+}
+
+
+//----------------------------------------------------------------------//
+//Auxiliary data
+//----------------------------------------------------------------------//
+
+namespace
+{
+  const char *gmv_cell_type[4] =
+  {
+    "", "line 2", "quad 4", "hex 8"
+  };
+
+  const char *ucd_cell_type[4] =
+  {
+    "", "line", "quad", "hex"
+  };
+
+  const char *tecplot_cell_type[4] =
+  {
+    "", "lineseg", "quadrilateral", "brick"
+  };
+
+#ifdef DEAL_II_HAVE_TECPLOT
+  const unsigned int tecplot_binary_cell_type[4] =
+  {
+    0, 0, 1, 3
+  };
+#endif
+
+  // NOTE: The dimension of the array is chosen to 5 to allow the choice
+  // DataOutBase<deal_II_dimension,deal_II_dimension+1> in general
+  // Wolfgang supposed that we don't need it in general, but however this
+  // choice avoids a -Warray-bounds check warning
+  const unsigned int vtk_cell_type[5] =
+  {
+    0, 3, 9, 12, static_cast<unsigned int>(-1)
+  };
+
+//----------------------------------------------------------------------//
+//Auxiliary functions
+//----------------------------------------------------------------------//
+//For a given patch, compute the node interpolating the corner nodes
+//linearly at the point (xstep, ystep, zstep)*1./n_subdivisions.
+//If the points are saved in the patch->data member, return the
+//saved point instead
+
+//TODO: Make this function return its value, rather than using a reference
+// as first argument; take a reference for 'patch', not a pointer
+  template <int dim, int spacedim>
+  inline
+  void
+  compute_node(
+    Point<spacedim> &node,
+    const DataOutBase::Patch<dim,spacedim> *patch,
+    const unsigned int xstep,
+    const unsigned int ystep,
+    const unsigned int zstep,
+    const unsigned int n_subdivisions)
+  {
+    if (patch->points_are_available)
+      {
+        unsigned int point_no=0;
+        // note: switch without break !
+        switch (dim)
+          {
+          case 3:
+            Assert (zstep<n_subdivisions+1, ExcIndexRange(zstep,0,n_subdivisions+1));
+            point_no+=(n_subdivisions+1)*(n_subdivisions+1)*zstep;
+          case 2:
+            Assert (ystep<n_subdivisions+1, ExcIndexRange(ystep,0,n_subdivisions+1));
+            point_no+=(n_subdivisions+1)*ystep;
+          case 1:
+            Assert (xstep<n_subdivisions+1, ExcIndexRange(xstep,0,n_subdivisions+1));
+            point_no+=xstep;
+
+            // break here for dim<=3
+            break;
+
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+        for (unsigned int d=0; d<spacedim; ++d)
+          node[d]=patch->data(patch->data.size(0)-spacedim+d,point_no);
+      }
+    else
+      {
+        // perform a dim-linear interpolation
+        const double stepsize=1./n_subdivisions,
+                     xfrac=xstep*stepsize;
+
+        node = (patch->vertices[1] * xfrac) + (patch->vertices[0] * (1-xfrac));
+        if (dim>1)
+          {
+            const double yfrac=ystep*stepsize;
+            node*= 1-yfrac;
+            node += ((patch->vertices[3] * xfrac) + (patch->vertices[2] * (1-xfrac))) * yfrac;
+            if (dim>2)
+              {
+                const double zfrac=zstep*stepsize;
+                node *= (1-zfrac);
+                node += (((patch->vertices[5] * xfrac) + (patch->vertices[4] * (1-xfrac)))
+                         * (1-yfrac) +
+                         ((patch->vertices[7] * xfrac) + (patch->vertices[6] * (1-xfrac)))
+                         * yfrac) * zfrac;
+              }
+          }
+      }
+  }
+
+
+
+  template<int dim, int spacedim>
+  static
+  void
+  compute_sizes(const std::vector<DataOutBase::Patch<dim, spacedim> > &patches,
+                unsigned int &n_nodes,
+                unsigned int &n_cells)
+  {
+    n_nodes = 0;
+    n_cells = 0;
+    for (typename std::vector<DataOutBase::Patch<dim,spacedim> >::const_iterator patch=patches.begin();
+         patch!=patches.end(); ++patch)
+      {
+        n_nodes += Utilities::fixed_power<dim>(patch->n_subdivisions+1);
+        n_cells += Utilities::fixed_power<dim>(patch->n_subdivisions);
+      }
+  }
+
+  /**
+   * Class describing common functionality between different output streams.
+   *
+   * @ingroup output
+   */
+  template<typename FlagsType>
+  class StreamBase
+  {
+  public:
+    /*
+     * Constructor. Stores a reference to the output stream for immediate use.
+     */
+    StreamBase (std::ostream &stream,
+                const FlagsType &flags)
+      :
+      selected_component (numbers::invalid_unsigned_int),
+      stream (stream),
+      flags (flags)
+    {}
+
+    /**
+     * Output operator for points. All inheriting classes should implement this
+     * function.
+     */
+    template <int dim>
+    void write_point (const unsigned int,
+                      const Point<dim> &)
+    {
+      Assert (false, ExcMessage ("The derived class you are using needs to "
+                                 "reimplement this function if you want to call "
+                                 "it."));
+    }
+
+    /**
+     * Do whatever is necessary to terminate the list of points. The default
+     * implementation does nothing; derived classes that do not require any
+     * action do not need to reimplement this.
+     */
+    void flush_points () {}
+
+    /**
+     * Write dim-dimensional cell with first vertex at number start and further
+     * vertices offset by the specified values. Values not needed are
+     * ignored. All inheriting classes should implement this function.
+     */
+    template <int dim>
+    void write_cell (const unsigned int /*index*/,
+                     const unsigned int /*start*/,
+                     const unsigned int /*x_offset*/,
+                     const unsigned int /*y_offset*/,
+                     const unsigned int /*z_offset*/)
+    {
+      Assert (false, ExcMessage ("The derived class you are using needs to "
+                                 "reimplement this function if you want to call "
+                                 "it."));
+    }
+
+    /**
+     * Do whatever is necessary to terminate the list of cells. This function is
+     * usually only reimplemented if deal.II is compiled with zlib. The default
+     * implementation does nothing; derived classes that do not require any
+     * action do not need to reimplement this.
+     */
+    void flush_cells () {}
+
+    /**
+     * Forwarding of an output stream. This function is usually only
+     * reimplemented if inheriting classes use zlib.
+     */
+    template <typename T>
+    std::ostream &operator<< (const T &t)
+    {
+      stream << t;
+      return stream;
+    }
+
+    /**
+     * Since the GMV and Tecplot formats read the x, y and z coordinates in
+     * separate fields, we enable write() to output only a single selected
+     * component at once and do this dim times for the whole set of nodes. This
+     * integer can be used to select the component written.
+     */
+    unsigned int selected_component;
+
+  protected:
+    /**
+     * The ostream to use. Since the life span of these objects is small, we use
+     * a very simple storage technique.
+     */
+    std::ostream &stream;
+
+    /**
+     * The flags controlling the output.
+     */
+    const FlagsType flags;
+  };
+
+  /**
+   * Class for writing basic
+   * entities in @ref
+   * SoftwareOpenDX format,
+   * depending on the flags.
+   */
+  class DXStream : public StreamBase<DataOutBase::DXFlags>
+  {
+  public:
+    DXStream (std::ostream &stream,
+              const DataOutBase::DXFlags &flags);
+
+    template <int dim>
+    void write_point (const unsigned int index,
+                      const Point<dim> &);
+
+    /**
+     * The order of vertices for
+     * these cells in different
+     * dimensions is
+     * <ol>
+     * <li> [0,1]
+     * <li> [0,2,1,3]
+     * <li> [0,4,2,6,1,5,3,7]
+     * </ol>
+     */
+    template <int dim>
+    void write_cell (const unsigned int index,
+                     const unsigned int start,
+                     const unsigned int x_offset,
+                     const unsigned int y_offset,
+                     const unsigned int z_offset);
+
+    /**
+     * Write a complete set of
+     * data for a single node.
+     *
+     * The index given as first
+     * argument indicates the
+     * number of a data set, as
+     * some output formats require
+     * this number to be printed.
+     */
+    template<typename data>
+    void write_dataset (const unsigned int       index,
+                        const std::vector<data> &values);
+  };
+
+  /**
+   * Class for writing basic
+   * entities in @ref SoftwareGMV
+   * format, depending on the
+   * flags.
+   */
+  class GmvStream : public StreamBase<DataOutBase::GmvFlags>
+  {
+  public:
+    GmvStream (std::ostream &stream,
+               const DataOutBase::GmvFlags &flags);
+
+    template <int dim>
+    void write_point (const unsigned int index,
+                      const Point<dim> &);
+
+    /**
+     * The order of vertices for
+     * these cells in different
+     * dimensions is
+     * <ol>
+     * <li> [0,1]
+     * <li> [0,1,3,2]
+     * <li> [0,1,3,2,4,5,7,6]
+     * </ol>
+     */
+    template <int dim>
+    void write_cell (const unsigned int index,
+                     const unsigned int start,
+                     const unsigned int x_offset,
+                     const unsigned int y_offset,
+                     const unsigned int z_offset);
+  };
+
+  /**
+   * Class for writing basic
+   * entities in @ref
+   * SoftwareTecplot format,
+   * depending on the flags.
+   */
+  class TecplotStream : public StreamBase<DataOutBase::TecplotFlags>
+  {
+  public:
+    TecplotStream (std::ostream &stream,
+                   const DataOutBase::TecplotFlags &flags);
+
+    template <int dim>
+    void write_point (const unsigned int index,
+                      const Point<dim> &);
+
+    /**
+     * The order of vertices for
+     * these cells in different
+     * dimensions is
+     * <ol>
+     * <li> [0,1]
+     * <li> [0,1,3,2]
+     * <li> [0,1,3,2,4,5,7,6]
+     * </ol>
+     */
+    template <int dim>
+    void write_cell (const unsigned int index,
+                     const unsigned int start,
+                     const unsigned int x_offset,
+                     const unsigned int y_offset,
+                     const unsigned int z_offset);
+  };
+
+  /**
+   * Class for writing basic
+   * entities in UCD format for
+   * @ref SoftwareAVS, depending on
+   * the flags.
+   */
+  class UcdStream : public StreamBase<DataOutBase::UcdFlags>
+  {
+  public:
+    UcdStream (std::ostream &stream,
+               const DataOutBase::UcdFlags &flags);
+
+    template <int dim>
+    void write_point (const unsigned int index,
+                      const Point<dim> &);
+
+    /**
+     * The additional offset 1 is
+     * added inside this
+     * function.
+     *
+     * The order of vertices for
+     * these cells in different
+     * dimensions is
+     * <ol>
+     * <li> [0,1]
+     * <li> [0,1,3,2]
+     * <li> [0,1,5,4,2,3,7,6]
+     * </ol>
+     */
+    template <int dim>
+    void write_cell (const unsigned int index,
+                     const unsigned int start,
+                     const unsigned int x_offset,
+                     const unsigned int y_offset,
+                     const unsigned int z_offset);
+
+    /**
+     * Write a complete set of
+     * data for a single node.
+     *
+     * The index given as first
+     * argument indicates the
+     * number of a data set, as
+     * some output formats require
+     * this number to be printed.
+     */
+    template<typename data>
+    void write_dataset (const unsigned int       index,
+                        const std::vector<data> &values);
+  };
+
+  /**
+   * Class for writing basic
+   * entities in @ref SoftwareVTK
+   * format, depending on the
+   * flags.
+   */
+  class VtkStream : public StreamBase<DataOutBase::VtkFlags>
+  {
+  public:
+    VtkStream (std::ostream &stream,
+               const DataOutBase::VtkFlags &flags);
+
+    template <int dim>
+    void write_point (const unsigned int index,
+                      const Point<dim> &);
+
+    /**
+     * The order of vertices for
+     * these cells in different
+     * dimensions is
+     * <ol>
+     * <li> [0,1]
+     * <li> []
+     * <li> []
+     * </ol>
+     */
+    template <int dim>
+    void write_cell (const unsigned int index,
+                     const unsigned int start,
+                     const unsigned int x_offset,
+                     const unsigned int y_offset,
+                     const unsigned int z_offset);
+  };
+
+
+  class VtuStream : public StreamBase<DataOutBase::VtkFlags>
+  {
+  public:
+    VtuStream (std::ostream &stream,
+               const DataOutBase::VtkFlags &flags);
+
+    template <int dim>
+    void write_point (const unsigned int index,
+                      const Point<dim> &);
+
+    void flush_points ();
+
+    /**
+     * The order of vertices for
+     * these cells in different
+     * dimensions is
+     * <ol>
+     * <li> [0,1]
+     * <li> []
+     * <li> []
+     * </ol>
+     */
+    template <int dim>
+    void write_cell (const unsigned int index,
+                     const unsigned int start,
+                     const unsigned int x_offset,
+                     const unsigned int y_offset,
+                     const unsigned int z_offset);
+
+    void flush_cells ();
+
+    template <typename T>
+    std::ostream &operator<< (const T &);
+
+    /**
+     * Forwarding of output stream.
+     *
+     * If libz was found during
+     * configuration, this operator
+     * compresses and encodes the
+     * entire data
+     * block. Otherwise, it simply
+     * writes it element by
+     * element.
+     */
+    template <typename T>
+    std::ostream &operator<< (const std::vector<T> &);
+
+  private:
+    /**
+     * A list of vertices and
+     * cells, to be used in case we
+     * want to compress the data.
+     *
+     * The data types of these
+     * arrays needs to match what
+     * we print in the XML-preamble
+     * to the respective parts of
+     * VTU files (e.g. Float64 and
+     * Int32)
+     */
+    std::vector<double>  vertices;
+    std::vector<int32_t> cells;
+  };
+
+
+//----------------------------------------------------------------------//
+
+  DXStream::DXStream (std::ostream &out,
+                      const DataOutBase::DXFlags &f)
+    :
+    StreamBase<DataOutBase::DXFlags> (out, f)
+  {}
+
+
+  template<int dim>
+  void
+  DXStream::write_point (const unsigned int,
+                         const Point<dim> &p)
+  {
+    if (flags.coordinates_binary)
+      {
+        float data[dim];
+        for (unsigned int d=0; d<dim; ++d)
+          data[d] = p(d);
+        stream.write(reinterpret_cast<const char *>(data),
+                     dim * sizeof(*data));
+      }
+    else
+      {
+        for (unsigned int d=0; d<dim; ++d)
+          stream << p(d) << '\t';
+        stream << '\n';
+      }
+  }
+
+
+
+  template<int dim>
+  void
+  DXStream::write_cell (unsigned int,
+                        unsigned int start,
+                        unsigned int d1,
+                        unsigned int d2,
+                        unsigned int d3)
+  {
+    int nodes[1<<dim];
+    nodes[GeometryInfo<dim>::dx_to_deal[0]] = start;
+    nodes[GeometryInfo<dim>::dx_to_deal[1]] = start+d1;
+    if (dim>=2)
+      {
+        // Add shifted line in y direction
+        nodes[GeometryInfo<dim>::dx_to_deal[2]] = start+d2;
+        nodes[GeometryInfo<dim>::dx_to_deal[3]] = start+d2+d1;
+        if (dim>=3)
+          {
+            // Add shifted quad in z direction
+            nodes[GeometryInfo<dim>::dx_to_deal[4]] = start+d3;
+            nodes[GeometryInfo<dim>::dx_to_deal[5]] = start+d3+d1;
+            nodes[GeometryInfo<dim>::dx_to_deal[6]] = start+d3+d2;
+            nodes[GeometryInfo<dim>::dx_to_deal[7]] = start+d3+d2+d1;
+          }
+      }
+
+    if (flags.int_binary)
+      stream.write(reinterpret_cast<const char *>(nodes),
+                   (1<<dim) * sizeof(*nodes));
+    else
+      {
+        const unsigned int final = (1<<dim) - 1;
+        for (unsigned int i=0; i<final ; ++i)
+          stream << nodes[i] << '\t';
+        stream << nodes[final] << '\n';
+      }
+  }
+
+
+
+  template<typename data>
+  inline
+  void
+  DXStream::write_dataset (const unsigned int,
+                           const std::vector<data> &values)
+  {
+    if (flags.data_binary)
+      {
+        stream.write(reinterpret_cast<const char *>(&values[0]),
+                     values.size()*sizeof(data));
+      }
+    else
+      {
+        for (unsigned int i=0; i<values.size(); ++i)
+          stream << '\t' << values[i];
+        stream << '\n';
+      }
+  }
+
+
+
+//----------------------------------------------------------------------//
+
+  GmvStream::GmvStream (std::ostream &out,
+                        const DataOutBase::GmvFlags &f)
+    :
+    StreamBase<DataOutBase::GmvFlags> (out, f)
+  {}
+
+
+  template<int dim>
+  void
+  GmvStream::write_point (const unsigned int,
+                          const Point<dim> &p)
+  {
+    Assert(selected_component != numbers::invalid_unsigned_int,
+           ExcNotInitialized());
+    stream << p(selected_component) << ' ';
+  }
+
+
+
+  template<int dim>
+  void
+  GmvStream::write_cell (unsigned int,
+                         unsigned int s,
+                         unsigned int d1,
+                         unsigned int d2,
+                         unsigned int d3)
+  {
+    // Vertices are numbered starting
+    // with one.
+    const unsigned int start=s+1;
+    stream << gmv_cell_type[dim] << '\n';
+
+    stream << start << '\t'
+           << start+d1;
+    if (dim>=2)
+      {
+        stream << '\t' << start+d2+d1
+               << '\t' << start+d2;
+        if (dim>=3)
+          {
+            stream << '\t' << start+d3
+                   << '\t' << start+d3+d1
+                   << '\t' << start+d3+d2+d1
+                   << '\t' << start+d3+d2;
+          }
+      }
+    stream << '\n';
+  }
+
+
+
+  TecplotStream::TecplotStream (std::ostream &out,
+                                const DataOutBase::TecplotFlags &f)
+    :
+    StreamBase<DataOutBase::TecplotFlags> (out, f)
+  {}
+
+
+  template<int dim>
+  void
+  TecplotStream::write_point (const unsigned int,
+                              const Point<dim> &p)
+  {
+    Assert(selected_component != numbers::invalid_unsigned_int,
+           ExcNotInitialized());
+    stream << p(selected_component) << '\n';
+  }
+
+
+
+  template<int dim>
+  void
+  TecplotStream::write_cell (unsigned int,
+                             unsigned int s,
+                             unsigned int d1,
+                             unsigned int d2,
+                             unsigned int d3)
+  {
+    const unsigned int start = s+1;
+
+    stream << start << '\t'
+           << start+d1;
+    if (dim>=2)
+      {
+        stream << '\t' << start+d2+d1
+               << '\t' << start+d2;
+        if (dim>=3)
+          {
+            stream << '\t' << start+d3
+                   << '\t' << start+d3+d1
+                   << '\t' << start+d3+d2+d1
+                   << '\t' << start+d3+d2;
+          }
+      }
+    stream << '\n';
+  }
+
+
+
+  UcdStream::UcdStream (std::ostream &out,
+                        const DataOutBase::UcdFlags &f)
+    :
+    StreamBase<DataOutBase::UcdFlags> (out, f)
+  {}
+
+
+  template<int dim>
+  void
+  UcdStream::write_point (const unsigned int index,
+                          const Point<dim> &p)
+  {
+    stream << index+1
+           << "   ";
+    // write out coordinates
+    for (unsigned int i=0; i<dim; ++i)
+      stream << p(i) << ' ';
+    // fill with zeroes
+    for (unsigned int i=dim; i<3; ++i)
+      stream << "0 ";
+    stream << '\n';
+  }
+
+
+
+  template<int dim>
+  void
+  UcdStream::write_cell (unsigned int index,
+                         unsigned int start,
+                         unsigned int d1,
+                         unsigned int d2,
+                         unsigned int d3)
+  {
+    int nodes[1<<dim];
+    nodes[GeometryInfo<dim>::ucd_to_deal[0]] = start;
+    nodes[GeometryInfo<dim>::ucd_to_deal[1]] = start+d1;
+    if (dim>=2)
+      {
+        // Add shifted line in y direction
+        nodes[GeometryInfo<dim>::ucd_to_deal[2]] = start+d2;
+        nodes[GeometryInfo<dim>::ucd_to_deal[3]] = start+d2+d1;
+        if (dim>=3)
+          {
+            // Add shifted quad in z direction
+            nodes[GeometryInfo<dim>::ucd_to_deal[4]] = start+d3;
+            nodes[GeometryInfo<dim>::ucd_to_deal[5]] = start+d3+d1;
+            nodes[GeometryInfo<dim>::ucd_to_deal[6]] = start+d3+d2;
+            nodes[GeometryInfo<dim>::ucd_to_deal[7]] = start+d3+d2+d1;
+          }
+      }
+
+    // Write out all cells and remember
+    // that all indices must be shifted
+    // by one.
+    stream << index+1 << "\t0 " << ucd_cell_type[dim];
+    const unsigned int final = (1<<dim);
+    for (unsigned int i=0; i<final ; ++i)
+      stream << '\t' << nodes[i]+1;
+    stream << '\n';
+  }
+
+
+
+  template<typename data>
+  inline
+  void
+  UcdStream::write_dataset (const unsigned int index,
+                            const std::vector<data> &values)
+  {
+    stream << index+1;
+    for (unsigned int i=0; i<values.size(); ++i)
+      stream << '\t' << values[i];
+    stream << '\n';
+  }
+
+
+
+//----------------------------------------------------------------------//
+
+  VtkStream::VtkStream (std::ostream &out,
+                        const DataOutBase::VtkFlags &f)
+    :
+    StreamBase<DataOutBase::VtkFlags> (out, f)
+  {}
+
+
+  template<int dim>
+  void
+  VtkStream::write_point (const unsigned int,
+                          const Point<dim> &p)
+  {
+    // write out coordinates
+    stream << p;
+    // fill with zeroes
+    for (unsigned int i=dim; i<3; ++i)
+      stream << " 0";
+    stream << '\n';
+  }
+
+
+
+  template<int dim>
+  void
+  VtkStream::write_cell (unsigned int,
+                         unsigned int start,
+                         unsigned int d1,
+                         unsigned int d2,
+                         unsigned int d3)
+  {
+    stream << GeometryInfo<dim>::vertices_per_cell << '\t'
+           << start << '\t'
+           << start+d1;
+    if (dim>=2)
+      {
+        stream << '\t' << start+d2+d1
+               << '\t' << start+d2;
+        if (dim>=3)
+          {
+            stream << '\t' << start+d3
+                   << '\t' << start+d3+d1
+                   << '\t' << start+d3+d2+d1
+                   << '\t' << start+d3+d2;
+          }
+      }
+    stream << '\n';
+  }
+
+
+
+  VtuStream::VtuStream (std::ostream &out,
+                        const DataOutBase::VtkFlags &f)
+    :
+    StreamBase<DataOutBase::VtkFlags> (out, f)
+  {}
+
+
+  template<int dim>
+  void
+  VtuStream::write_point (const unsigned int,
+                          const Point<dim> &p)
+  {
+#if !defined(DEAL_II_WITH_ZLIB)
+    // write out coordinates
+    stream << p;
+    // fill with zeroes
+    for (unsigned int i=dim; i<3; ++i)
+      stream << " 0";
+    stream << '\n';
+#else
+    // if we want to compress, then
+    // first collect all the data in
+    // an array
+    for (unsigned int i=0; i<dim; ++i)
+      vertices.push_back(p[i]);
+    for (unsigned int i=dim; i<3; ++i)
+      vertices.push_back(0);
+#endif
+  }
+
+
+  void
+  VtuStream::flush_points ()
+  {
+#ifdef DEAL_II_WITH_ZLIB
+    // compress the data we have in
+    // memory and write them to the
+    // stream. then release the data
+    *this << vertices << '\n';
+    vertices.clear ();
+#endif
+  }
+
+
+  template<int dim>
+  void
+  VtuStream::write_cell (unsigned int,
+                         unsigned int start,
+                         unsigned int d1,
+                         unsigned int d2,
+                         unsigned int d3)
+  {
+#if !defined(DEAL_II_WITH_ZLIB)
+    stream << start << '\t'
+           << start+d1;
+    if (dim>=2)
+      {
+        stream << '\t' << start+d2+d1
+               << '\t' << start+d2;
+        if (dim>=3)
+          {
+            stream << '\t' << start+d3
+                   << '\t' << start+d3+d1
+                   << '\t' << start+d3+d2+d1
+                   << '\t' << start+d3+d2;
+          }
+      }
+    stream << '\n';
+#else
+    cells.push_back (start);
+    cells.push_back (start+d1);
+    if (dim>=2)
+      {
+        cells.push_back (start+d2+d1);
+        cells.push_back (start+d2);
+        if (dim>=3)
+          {
+            cells.push_back (start+d3);
+            cells.push_back (start+d3+d1);
+            cells.push_back (start+d3+d2+d1);
+            cells.push_back (start+d3+d2);
+          }
+      }
+#endif
+  }
+
+
+
+  void
+  VtuStream::flush_cells ()
+  {
+#ifdef DEAL_II_WITH_ZLIB
+    // compress the data we have in
+    // memory and write them to the
+    // stream. then release the data
+    *this << cells << '\n';
+    cells.clear ();
+#endif
+  }
+
+
+  template <typename T>
+  std::ostream &
+  VtuStream::operator<< (const std::vector<T> &data)
+  {
+#ifdef DEAL_II_WITH_ZLIB
+    // compress the data we have in
+    // memory and write them to the
+    // stream. then release the data
+    write_compressed_block (data, flags, stream);
+#else
+    for (unsigned int i=0; i<data.size(); ++i)
+      stream << data[i] << ' ';
+#endif
+
+    return stream;
+  }
+}
+
+
+
+namespace DataOutBase
+{
+  template <int dim, int spacedim>
+  const unsigned int Patch<dim,spacedim>::space_dim;
+
+  const unsigned int Deal_II_IntermediateFlags::format_version;
+
+
+
+  template <int dim, int spacedim>
+  const unsigned int Patch<dim,spacedim>::no_neighbor;
+
+
+  template <int dim, int spacedim>
+  Patch<dim,spacedim>::Patch ()
+    :
+    patch_index(no_neighbor),
+    n_subdivisions (1),
+    points_are_available(false)
+    // all the other data has a
+    // constructor of its own, except
+    // for the "neighbors" field, which
+    // we set to invalid values.
+  {
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      neighbors[i] = no_neighbor;
+
+    Assert (dim<=spacedim, ExcIndexRange(dim,0,spacedim));
+    Assert (spacedim<=3, ExcNotImplemented());
+  }
+
+
+
+  template <int dim, int spacedim>
+  bool
+  Patch<dim,spacedim>::operator == (const Patch &patch) const
+  {
+//TODO: make tolerance relative
+    const double epsilon=3e-16;
+    for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+      if (vertices[i].distance(patch.vertices[i]) > epsilon)
+        return false;
+
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      if (neighbors[i] != patch.neighbors[i])
+        return false;
+
+    if (patch_index != patch.patch_index)
+      return false;
+
+    if (n_subdivisions != patch.n_subdivisions)
+      return false;
+
+    if (points_are_available != patch.points_are_available)
+      return false;
+
+    if (data.n_rows() != patch.data.n_rows())
+      return false;
+
+    if (data.n_cols() != patch.data.n_cols())
+      return false;
+
+    for (unsigned int i=0; i<data.n_rows(); ++i)
+      for (unsigned int j=0; j<data.n_cols(); ++j)
+        if (data[i][j] != patch.data[i][j])
+          return false;
+
+    return true;
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::size_t
+  Patch<dim,spacedim>::memory_consumption () const
+  {
+    return (sizeof(vertices) / sizeof(vertices[0]) *
+            MemoryConsumption::memory_consumption(vertices[0])
+            +
+            MemoryConsumption::memory_consumption(n_subdivisions)
+            +
+            MemoryConsumption::memory_consumption(data)
+            +
+            MemoryConsumption::memory_consumption(points_are_available));
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  Patch<dim,spacedim>::swap (Patch<dim,spacedim> &other_patch)
+  {
+    std::swap (vertices, other_patch.vertices);
+    std::swap (neighbors, other_patch.neighbors);
+    std::swap (patch_index, other_patch.patch_index);
+    std::swap (n_subdivisions, other_patch.n_subdivisions);
+    data.swap (other_patch.data);
+    std::swap (points_are_available, other_patch.points_are_available);
+  }
+
+
+
+  UcdFlags::UcdFlags (const bool write_preamble)
+    :
+    write_preamble (write_preamble)
+  {}
+
+
+
+  PovrayFlags::PovrayFlags (const bool smooth,
+                            const bool bicubic_patch,
+                            const bool external_data)
+    :
+    smooth (smooth),
+    bicubic_patch(bicubic_patch),
+    external_data(external_data)
+  {}
+
+
+  DataOutFilterFlags::DataOutFilterFlags (const bool filter_duplicate_vertices,
+                                          const bool xdmf_hdf5_output) :
+    filter_duplicate_vertices(filter_duplicate_vertices),
+    xdmf_hdf5_output(xdmf_hdf5_output)
+  {}
+
+
+  void DataOutFilterFlags::declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry ("Filter duplicate vertices", "false",
+                       Patterns::Bool(),
+                       "Whether to remove duplicate vertex values.");
+    prm.declare_entry ("XDMF HDF5 output", "false",
+                       Patterns::Bool(),
+                       "Whether the data will be used in an XDMF/HDF5 combination.");
+  }
+
+
+
+  void DataOutFilterFlags::parse_parameters (const ParameterHandler &prm)
+  {
+    filter_duplicate_vertices = prm.get_bool ("Filter duplicate vertices");
+    xdmf_hdf5_output = prm.get_bool ("XDMF HDF5 output");
+  }
+
+
+
+  DXFlags::DXFlags (const bool write_neighbors,
+                    const bool int_binary,
+                    const bool coordinates_binary,
+                    const bool data_binary)
+    :
+    write_neighbors(write_neighbors),
+    int_binary(int_binary),
+    coordinates_binary(coordinates_binary),
+    data_binary(data_binary),
+    data_double(false)
+  {}
+
+
+  void DXFlags::declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry ("Write neighbors", "true",
+                       Patterns::Bool(),
+                       "A boolean field indicating whether neighborship "
+                       "information between cells is to be written to the "
+                       "OpenDX output file");
+    prm.declare_entry ("Integer format", "ascii",
+                       Patterns::Selection("ascii|32|64"),
+                       "Output format of integer numbers, which is "
+                       "either a text representation (ascii) or binary integer "
+                       "values of 32 or 64 bits length");
+    prm.declare_entry ("Coordinates format", "ascii",
+                       Patterns::Selection("ascii|32|64"),
+                       "Output format of vertex coordinates, which is "
+                       "either a text representation (ascii) or binary "
+                       "floating point values of 32 or 64 bits length");
+    prm.declare_entry ("Data format", "ascii",
+                       Patterns::Selection("ascii|32|64"),
+                       "Output format of data values, which is "
+                       "either a text representation (ascii) or binary "
+                       "floating point values of 32 or 64 bits length");
+  }
+
+
+
+  void DXFlags::parse_parameters (const ParameterHandler &prm)
+  {
+    write_neighbors = prm.get_bool ("Write neighbors");
+//TODO:[GK] Read the new  parameters
+  }
+
+
+
+  void UcdFlags::declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry ("Write preamble", "true",
+                       Patterns::Bool(),
+                       "A flag indicating whether a comment should be "
+                       "written to the beginning of the output file "
+                       "indicating date and time of creation as well "
+                       "as the creating program");
+  }
+
+
+
+  void UcdFlags::parse_parameters (const ParameterHandler &prm)
+  {
+    write_preamble = prm.get_bool ("Write preamble");
+  }
+
+
+
+  SvgFlags::SvgFlags (const unsigned int height_vector,
+                      const int azimuth_angle,
+                      const int polar_angle,
+                      const unsigned int line_thickness,
+                      const bool margin,
+                      const bool draw_colorbar)
+    :
+    height(4000),
+    width(0),
+    height_vector(height_vector),
+    azimuth_angle(azimuth_angle),
+    polar_angle(polar_angle),
+    line_thickness(line_thickness),
+    margin(margin),
+    draw_colorbar(draw_colorbar)
+  {}
+
+
+
+  void PovrayFlags::declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry ("Use smooth triangles", "false",
+                       Patterns::Bool(),
+                       "A flag indicating whether POVRAY should use smoothed "
+                       "triangles instead of the usual ones");
+    prm.declare_entry ("Use bicubic patches", "false",
+                       Patterns::Bool(),
+                       "Whether POVRAY should use bicubic patches");
+    prm.declare_entry ("Include external file", "true",
+                       Patterns::Bool (),
+                       "Whether camera and lighting information should "
+                       "be put into an external file \"data.inc\" or into "
+                       "the POVRAY input file");
+  }
+
+
+
+  void PovrayFlags::parse_parameters (const ParameterHandler &prm)
+  {
+    smooth        = prm.get_bool ("Use smooth triangles");
+    bicubic_patch = prm.get_bool ("Use bicubic patches");
+    external_data = prm.get_bool ("Include external file");
+  }
+
+
+
+  EpsFlags::EpsFlags (const unsigned int  height_vector,
+                      const unsigned int  color_vector,
+                      const SizeType      size_type,
+                      const unsigned int  size,
+                      const double        line_width,
+                      const double        azimut_angle,
+                      const double        turn_angle,
+                      const double        z_scaling,
+                      const bool          draw_mesh,
+                      const bool          draw_cells,
+                      const bool          shade_cells,
+                      const ColorFunction color_function)
+    :
+    height_vector(height_vector),
+    color_vector(color_vector),
+    size_type(size_type),
+    size(size),
+    line_width(line_width),
+    azimut_angle(azimut_angle),
+    turn_angle(turn_angle),
+    z_scaling(z_scaling),
+    draw_mesh(draw_mesh),
+    draw_cells(draw_cells),
+    shade_cells(shade_cells),
+    color_function(color_function)
+  {}
+
+
+
+  EpsFlags::RgbValues
+  EpsFlags::default_color_function (const double x,
+                                    const double xmin,
+                                    const double xmax)
+  {
+    RgbValues rgb_values = { 0,0,0 };
+
+// A difficult color scale:
+//     xmin          = black  (1)
+// 3/4*xmin+1/4*xmax = blue   (2)
+// 1/2*xmin+1/2*xmax = green  (3)
+// 1/4*xmin+3/4*xmax = red    (4)
+//              xmax = white  (5)
+// Makes the following color functions:
+//
+// red      green    blue
+//       __
+//      /      /\  /  /\    /
+// ____/    __/  \/  /  \__/
+
+//     { 0                                (1) - (3)
+// r = { ( 4*x-2*xmin+2*xmax)/(xmax-xmin) (3) - (4)
+//     { 1                                (4) - (5)
+//
+//     { 0                                (1) - (2)
+// g = { ( 4*x-3*xmin-  xmax)/(xmax-xmin) (2) - (3)
+//     { (-4*x+  xmin+3*xmax)/(xmax-xmin) (3) - (4)
+//     { ( 4*x-  xmin-3*xmax)/(xmax-xmin) (4) - (5)
+//
+//     { ( 4*x-4*xmin       )/(xmax-xmin) (1) - (2)
+// b = { (-4*x+2*xmin+2*xmax)/(xmax-xmin) (2) - (3)
+//     { 0                                (3) - (4)
+//     { ( 4*x-  xmin-3*xmax)/(xmax-xmin) (4) - (5)
+
+    double sum   =   xmax+  xmin;
+    double sum13 =   xmin+3*xmax;
+    double sum22 = 2*xmin+2*xmax;
+    double sum31 = 3*xmin+  xmax;
+    double dif = xmax-xmin;
+    double rezdif = 1.0/dif;
+
+    int where;
+
+    if (x<(sum31)/4)
+      where = 0;
+    else if (x<(sum22)/4)
+      where = 1;
+    else if (x<(sum13)/4)
+      where = 2;
+    else
+      where = 3;
+
+    if (dif!=0)
+      {
+        switch (where)
+          {
+          case 0:
+            rgb_values.red   = 0;
+            rgb_values.green = 0;
+            rgb_values.blue  = (x-xmin)*4.*rezdif;
+            break;
+          case 1:
+            rgb_values.red   = 0;
+            rgb_values.green = (4*x-3*xmin-xmax)*rezdif;
+            rgb_values.blue  = (sum22-4.*x)*rezdif;
+            break;
+          case 2:
+            rgb_values.red   = (4*x-2*sum)*rezdif;
+            rgb_values.green = (xmin+3*xmax-4*x)*rezdif;
+            rgb_values.blue  = 0;
+            break;
+          case 3:
+            rgb_values.red   = 1;
+            rgb_values.green = (4*x-xmin-3*xmax)*rezdif;
+            rgb_values.blue  = (4.*x-sum13)*rezdif;
+          default:
+            break;
+          }
+      }
+    else // White
+      rgb_values.red = rgb_values.green = rgb_values.blue = 1;
+
+    return rgb_values;
+  }
+
+
+
+  EpsFlags::RgbValues
+  EpsFlags::grey_scale_color_function (const double x,
+                                       const double xmin,
+                                       const double xmax)
+  {
+    EpsFlags::RgbValues rgb_values;
+    rgb_values.red = rgb_values.blue = rgb_values.green
+                                       = (x-xmin)/(xmax-xmin);
+    return rgb_values;
+  }
+
+
+
+  EpsFlags::RgbValues
+  EpsFlags::reverse_grey_scale_color_function (const double x,
+                                               const double xmin,
+                                               const double xmax)
+  {
+    EpsFlags::RgbValues rgb_values;
+    rgb_values.red = rgb_values.blue = rgb_values.green
+                                       = 1-(x-xmin)/(xmax-xmin);
+    return rgb_values;
+  }
+
+
+
+  bool EpsCell2d::operator < (const EpsCell2d &e) const
+  {
+    // note the "wrong" order in
+    // which we sort the elements
+    return depth > e.depth;
+  }
+
+
+
+  void EpsFlags::declare_parameters (ParameterHandler &prm)
+  {
+    prm.declare_entry ("Index of vector for height", "0",
+                       Patterns::Integer(),
+                       "Number of the input vector that is to be used to "
+                       "generate height information");
+    prm.declare_entry ("Index of vector for color", "0",
+                       Patterns::Integer(),
+                       "Number of the input vector that is to be used to "
+                       "generate color information");
+    prm.declare_entry ("Scale to width or height", "width",
+                       Patterns::Selection ("width|height"),
+                       "Whether width or height should be scaled to match "
+                       "the given size");
+    prm.declare_entry ("Size (width or height) in eps units", "300",
+                       Patterns::Integer(),
+                       "The size (width or height) to which the eps output "
+                       "file is to be scaled");
+    prm.declare_entry ("Line widths in eps units", "0.5",
+                       Patterns::Double(),
+                       "The width in which the postscript renderer is to "
+                       "plot lines");
+    prm.declare_entry ("Azimut angle", "60",
+                       Patterns::Double(0,180),
+                       "Angle of the viewing position against the vertical "
+                       "axis");
+    prm.declare_entry ("Turn angle", "30",
+                       Patterns::Double(0,360),
+                       "Angle of the viewing direction against the y-axis");
+    prm.declare_entry ("Scaling for z-axis", "1",
+                       Patterns::Double (),
+                       "Scaling for the z-direction relative to the scaling "
+                       "used in x- and y-directions");
+    prm.declare_entry ("Draw mesh lines", "true",
+                       Patterns::Bool(),
+                       "Whether the mesh lines, or only the surface should be "
+                       "drawn");
+    prm.declare_entry ("Fill interior of cells", "true",
+                       Patterns::Bool(),
+                       "Whether only the mesh lines, or also the interior of "
+                       "cells should be plotted. If this flag is false, then "
+                       "one can see through the mesh");
+    prm.declare_entry ("Color shading of interior of cells", "true",
+                       Patterns::Bool(),
+                       "Whether the interior of cells shall be shaded");
+    prm.declare_entry ("Color function", "default",
+                       Patterns::Selection ("default|grey scale|reverse grey scale"),
+                       "Name of a color function used to colorize mesh lines "
+                       "and/or cell interiors");
+  }
+
+
+
+  void EpsFlags::parse_parameters (const ParameterHandler &prm)
+  {
+    height_vector = prm.get_integer ("Index of vector for height");
+    color_vector  = prm.get_integer ("Index of vector for color");
+    if (prm.get ("Scale to width or height") == "width")
+      size_type   = width;
+    else
+      size_type   = height;
+    size          = prm.get_integer ("Size (width or height) in eps units");
+    line_width    = prm.get_double ("Line widths in eps units");
+    azimut_angle  = prm.get_double ("Azimut angle");
+    turn_angle    = prm.get_double ("Turn angle");
+    z_scaling     = prm.get_double ("Scaling for z-axis");
+    draw_mesh     = prm.get_bool ("Draw mesh lines");
+    draw_cells    = prm.get_bool ("Fill interior of cells");
+    shade_cells   = prm.get_bool ("Color shading of interior of cells");
+    if (prm.get("Color function") == "default")
+      color_function = &default_color_function;
+    else if (prm.get("Color function") == "grey scale")
+      color_function = &grey_scale_color_function;
+    else if (prm.get("Color function") == "reverse grey scale")
+      color_function = &reverse_grey_scale_color_function;
+    else
+      // we shouldn't get here, since
+      // the parameter object should
+      // already have checked that the
+      // given value is valid
+      Assert (false, ExcInternalError());
+  }
+
+
+
+  TecplotFlags::
+  TecplotFlags (const char *tecplot_binary_file_name,
+                const char *zone_name,
+                const double solution_time)
+    :
+    tecplot_binary_file_name(tecplot_binary_file_name),
+    zone_name(zone_name),
+    solution_time (solution_time)
+  {}
+
+
+
+  std::size_t
+  TecplotFlags::memory_consumption () const
+  {
+    return sizeof(*this)
+           + MemoryConsumption::memory_consumption(tecplot_binary_file_name)
+           + MemoryConsumption::memory_consumption(zone_name);
+  }
+
+
+
+  VtkFlags::VtkFlags (const double time,
+                      const unsigned int cycle,
+                      const bool print_date_and_time,
+                      const VtkFlags::ZlibCompressionLevel compression_level)
+    :
+    time (time),
+    cycle (cycle),
+    print_date_and_time (print_date_and_time),
+    compression_level (compression_level)
+  {}
+
+
+
+  OutputFormat
+  parse_output_format (const std::string &format_name)
+  {
+    if (format_name == "none")
+      return none;
+
+    if (format_name == "dx")
+      return dx;
+
+    if (format_name == "ucd")
+      return ucd;
+
+    if (format_name == "gnuplot")
+      return gnuplot;
+
+    if (format_name == "povray")
+      return povray;
+
+    if (format_name == "eps")
+      return eps;
+
+    if (format_name == "gmv")
+      return gmv;
+
+    if (format_name == "tecplot")
+      return tecplot;
+
+    if (format_name == "tecplot_binary")
+      return tecplot_binary;
+
+    if (format_name == "vtk")
+      return vtk;
+
+    if (format_name == "vtu")
+      return vtu;
+
+    if (format_name == "deal.II intermediate")
+      return deal_II_intermediate;
+
+    if (format_name == "hdf5")
+      return hdf5;
+
+    AssertThrow (false,
+                 ExcMessage ("The given file format name is not recognized: <"
+                             + format_name + ">"));
+
+    // return something invalid
+    return OutputFormat(-1);
+  }
+
+
+
+  std::string
+  get_output_format_names ()
+  {
+    return "none|dx|ucd|gnuplot|povray|eps|gmv|tecplot|tecplot_binary|vtk|vtu|hdf5|svg|deal.II intermediate";
+  }
+
+
+
+  std::string
+  default_suffix (const OutputFormat output_format)
+  {
+    switch (output_format)
+      {
+      case none:
+        return "";
+      case dx:
+        return ".dx";
+      case ucd:
+        return ".inp";
+      case gnuplot:
+        return ".gnuplot";
+      case povray:
+        return ".pov";
+      case eps:
+        return ".eps";
+      case gmv:
+        return ".gmv";
+      case tecplot:
+        return ".dat";
+      case tecplot_binary:
+        return ".plt";
+      case vtk:
+        return ".vtk";
+      case vtu:
+        return ".vtu";
+      case deal_II_intermediate:
+        return ".d2";
+      case hdf5:
+        return ".h5";
+      case svg:
+        return ".svg";
+      default:
+        Assert (false, ExcNotImplemented());
+        return "";
+      }
+  }
+
+
+//----------------------------------------------------------------------//
+
+  template <int dim, int spacedim, typename StreamType>
+  void
+  write_nodes (const std::vector<Patch<dim,spacedim> > &patches,
+               StreamType                              &out)
+  {
+    Assert (dim<=3, ExcNotImplemented());
+    unsigned int count = 0;
+    // We only need this point below,
+    // but it does not harm to declare
+    // it here.
+    Point<spacedim> node;
+
+    for (typename std::vector<Patch<dim,spacedim> >::const_iterator
+         patch=patches.begin();
+         patch!=patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+        const unsigned int n = n_subdivisions+1;
+        // Length of loops in all
+        // dimensions. If a dimension
+        // is not used, a loop of
+        // length one will do the job.
+        const unsigned int n1 = (dim>0) ? n : 1;
+        const unsigned int n2 = (dim>1) ? n : 1;
+        const unsigned int n3 = (dim>2) ? n : 1;
+
+        for (unsigned int i3=0; i3<n3; ++i3)
+          for (unsigned int i2=0; i2<n2; ++i2)
+            for (unsigned int i1=0; i1<n1; ++i1)
+              {
+                compute_node(node, &*patch,
+                             i1,
+                             i2,
+                             i3,
+                             n_subdivisions);
+                out.write_point(count++, node);
+              }
+      }
+    out.flush_points ();
+  }
+
+  template <int dim, int spacedim, typename StreamType>
+  void
+  write_cells (const std::vector<Patch<dim,spacedim> > &patches,
+               StreamType                              &out)
+  {
+    Assert (dim<=3, ExcNotImplemented());
+    unsigned int count = 0;
+    unsigned int first_vertex_of_patch = 0;
+    // Array to hold all the node
+    // numbers of a cell. 8 is
+    // sufficient for 3D
+    for (typename std::vector<Patch<dim,spacedim> >::const_iterator
+         patch=patches.begin();
+         patch!=patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+        const unsigned int n = n_subdivisions+1;
+        // Length of loops in all dimensons
+        const unsigned int n1 = (dim>0) ? n_subdivisions : 1;
+        const unsigned int n2 = (dim>1) ? n_subdivisions : 1;
+        const unsigned int n3 = (dim>2) ? n_subdivisions : 1;
+        // Offsets of outer loops
+        unsigned int d1 = 1;
+        unsigned int d2 = n;
+        unsigned int d3 = n*n;
+        for (unsigned int i3=0; i3<n3; ++i3)
+          for (unsigned int i2=0; i2<n2; ++i2)
+            for (unsigned int i1=0; i1<n1; ++i1)
+              {
+                const unsigned int offset = first_vertex_of_patch+i3*d3+i2*d2+i1*d1;
+                // First write line in x direction
+                out.template write_cell<dim>(count++, offset, d1, d2, d3);
+              }
+        // finally update the number
+        // of the first vertex of this patch
+        first_vertex_of_patch += Utilities::fixed_power<dim>(n_subdivisions+1);
+      }
+
+    out.flush_cells ();
+  }
+
+
+  template <int dim, int spacedim, class StreamType>
+  void
+  write_data
+  (const std::vector<Patch<dim,spacedim> > &patches,
+   unsigned int                             n_data_sets,
+   const bool                               double_precision,
+   StreamType                              &out)
+  {
+    Assert (dim<=3, ExcNotImplemented());
+    unsigned int count = 0;
+
+    for (typename std::vector<Patch<dim,spacedim> >::const_iterator patch
+         = patches.begin();
+         patch != patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+        const unsigned int n = n_subdivisions+1;
+        // Length of loops in all dimensions
+        Assert ((patch->data.n_rows() == n_data_sets && !patch->points_are_available) ||
+                (patch->data.n_rows() == n_data_sets+spacedim && patch->points_are_available),
+                ExcDimensionMismatch (patch->points_are_available
+                                      ?
+                                      (n_data_sets + spacedim)
+                                      :
+                                      n_data_sets,
+                                      patch->data.n_rows()));
+        Assert (patch->data.n_cols() == Utilities::fixed_power<dim>(n),
+                ExcInvalidDatasetSize (patch->data.n_cols(), n));
+
+        std::vector<float>  floats(n_data_sets);
+        std::vector<double> doubles(n_data_sets);
+
+        // Data is already in
+        // lexicographic ordering
+        for (unsigned int i=0; i<Utilities::fixed_power<dim>(n); ++i, ++count)
+          if (double_precision)
+            {
+              for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                doubles[data_set] = patch->data(data_set, i);
+              out.write_dataset(count, doubles);
+            }
+          else
+            {
+              for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                floats[data_set] = patch->data(data_set, i);
+              out.write_dataset(count, floats);
+            }
+      }
+  }
+
+
+
+  namespace
+  {
+    /**
+     * This function projects a three-dimensional point (Point<3> point)
+     * onto a two-dimensional image plane, specified by the position of
+     * the camera viewing system (Point<3> camera_position), camera
+     * direction (Point<3> camera_position), camera horizontal (Point<3>
+     * camera_horizontal, necessary for the correct alignment of the
+     * later images), and the focus of the camera (float camera_focus).
+     */
+    Point<2> svg_project_point(Point<3> point, Point<3> camera_position, Point<3> camera_direction, Point<3> camera_horizontal, float camera_focus)
+    {
+      Point<3> camera_vertical;
+      camera_vertical[0] = camera_horizontal[1] * camera_direction[2] - camera_horizontal[2] * camera_direction[1];
+      camera_vertical[1] = camera_horizontal[2] * camera_direction[0] - camera_horizontal[0] * camera_direction[2];
+      camera_vertical[2] = camera_horizontal[0] * camera_direction[1] - camera_horizontal[1] * camera_direction[0];
+
+      float phi;
+      phi  = camera_focus;
+      phi /= (point[0] - camera_position[0]) * camera_direction[0] + (point[1] - camera_position[1]) * camera_direction[1] + (point[2] - camera_position[2]) * camera_direction[2];
+
+      Point<3> projection;
+      projection[0] = camera_position[0] + phi * (point[0] - camera_position[0]);
+      projection[1] = camera_position[1] + phi * (point[1] - camera_position[1]);
+      projection[2] = camera_position[2] + phi * (point[2] - camera_position[2]);
+
+      Point<2> projection_decomposition;
+      projection_decomposition[0]  = (projection[0] - camera_position[0] - camera_focus * camera_direction[0]) * camera_horizontal[0];
+      projection_decomposition[0] += (projection[1] - camera_position[1] - camera_focus * camera_direction[1]) * camera_horizontal[1];
+      projection_decomposition[0] += (projection[2] - camera_position[2] - camera_focus * camera_direction[2]) * camera_horizontal[2];
+
+      projection_decomposition[1]  = (projection[0] - camera_position[0] - camera_focus * camera_direction[0]) * camera_vertical[0];
+      projection_decomposition[1] += (projection[1] - camera_position[1] - camera_focus * camera_direction[1]) * camera_vertical[1];
+      projection_decomposition[1] += (projection[2] - camera_position[2] - camera_focus * camera_direction[2]) * camera_vertical[2];
+
+      return projection_decomposition;
+    }
+
+
+    /**
+     * Function to compute the gradient parameters for a triangle with
+     * given values for the vertices.
+     */
+    Point<6> svg_get_gradient_parameters(Point<3> points[])
+    {
+      Point<3> v_min, v_max, v_inter;
+
+      // Use the Bubblesort algorithm to sort the points with respect to the third coordinate
+      for (int i = 0; i < 2; ++i)
+        {
+          for (int j = 0; j < 2-i; ++j)
+            {
+              if (points[j][2] > points[j + 1][2])
+                {
+                  Point<3> temp = points[j];
+                  points[j] = points[j+1];
+                  points[j+1] = temp;
+                }
+            }
+        }
+
+      // save the related three-dimensional vectors v_min, v_inter, and v_max
+      v_min = points[0];
+      v_inter = points[1];
+      v_max = points[2];
+
+      Point<2> A[2];
+      Point<2> b, gradient;
+
+      // determine the plane offset c
+      A[0][0] = v_max[0] - v_min[0];
+      A[0][1] = v_inter[0] - v_min[0];
+      A[1][0] = v_max[1] - v_min[1];
+      A[1][1] = v_inter[1] - v_min[1];
+
+      b[0] = - v_min[0];
+      b[1] = - v_min[1];
+
+      double x, sum;
+      bool col_change = false;
+
+      if (A[0][0] == 0)
+        {
+          col_change = true;
+
+          A[0][0] = A[0][1];
+          A[0][1] = 0;
+
+          double temp = A[1][0];
+          A[1][0] = A[1][1];
+          A[1][1] = temp;
+        }
+
+      for (unsigned int k = 0; k < 1; k++)
+        {
+          for (unsigned int i = k+1; i < 2; i++)
+            {
+              x = A[i][k] / A[k][k];
+
+              for (unsigned int j = k+1; j < 2; j++) A[i][j] = A[i][j] - A[k][j] * x;
+
+              b[i] = b[i] - b[k]*x;
+
+            }
+        }
+
+      b[1] = b[1] / A[1][1];
+
+      for (int i = 0; i >= 0; i--)
+        {
+          sum = b[i];
+
+          for (unsigned int j = i+1; j < 2; j++) sum = sum - A[i][j] * b[j];
+
+          b[i] = sum / A[i][i];
+        }
+
+      if (col_change)
+        {
+          double temp = b[0];
+          b[0] = b[1];
+          b[1] = temp;
+        }
+
+      double c = b[0] * (v_max[2] - v_min[2]) + b[1] * (v_inter[2] - v_min[2]) + v_min[2];
+
+      // Determine the first entry of the gradient (phi, cf. documentation)
+      A[0][0] = v_max[0] - v_min[0];
+      A[0][1] = v_inter[0] - v_min[0];
+      A[1][0] = v_max[1] - v_min[1];
+      A[1][1] = v_inter[1] - v_min[1];
+
+      b[0] = 1.0 - v_min[0];
+      b[1] = - v_min[1];
+
+      col_change = false;
+
+      if (A[0][0] == 0)
+        {
+          col_change = true;
+
+          A[0][0] = A[0][1];
+          A[0][1] = 0;
+
+          double temp = A[1][0];
+          A[1][0] = A[1][1];
+          A[1][1] = temp;
+        }
+
+      for (unsigned int k = 0; k < 1; k++)
+        {
+          for (unsigned int i = k+1; i < 2; i++)
+            {
+              x = A[i][k] / A[k][k];
+
+              for (unsigned int j = k+1; j < 2; j++) A[i][j] = A[i][j] - A[k][j] * x;
+
+              b[i] = b[i] - b[k] * x;
+
+            }
+        }
+
+      b[1]=b[1] / A[1][1];
+
+      for (int i = 0; i >= 0; i--)
+        {
+          sum = b[i];
+
+          for (unsigned int j = i+1; j < 2; j++) sum = sum - A[i][j]*b[j];
+
+          b[i] = sum / A[i][i];
+        }
+
+      if (col_change)
+        {
+          double temp = b[0];
+          b[0] = b[1];
+          b[1] = temp;
+        }
+
+      gradient[0] = b[0] * (v_max[2] - v_min[2]) + b[1] * (v_inter[2] - v_min[2]) - c + v_min[2];
+
+      // determine the second entry of the gradient
+      A[0][0] = v_max[0] - v_min[0];
+      A[0][1] = v_inter[0] - v_min[0];
+      A[1][0] = v_max[1] - v_min[1];
+      A[1][1] = v_inter[1] - v_min[1];
+
+      b[0] = - v_min[0];
+      b[1] = 1.0 - v_min[1];
+
+      col_change = false;
+
+      if (A[0][0] == 0)
+        {
+          col_change = true;
+
+          A[0][0] = A[0][1];
+          A[0][1] = 0;
+
+          double temp = A[1][0];
+          A[1][0] = A[1][1];
+          A[1][1] = temp;
+        }
+
+      for (unsigned int k = 0; k < 1; k++)
+        {
+          for (unsigned int i = k+1; i < 2; i++)
+            {
+              x = A[i][k] / A[k][k];
+
+              for (unsigned int j = k+1; j < 2; j++) A[i][j] = A[i][j] - A[k][j] * x;
+
+              b[i] = b[i] - b[k] * x;
+            }
+        }
+
+      b[1] = b[1] / A[1][1];
+
+      for (int i = 0; i >= 0; i--)
+        {
+          sum = b[i];
+
+          for (unsigned int j = i+1; j < 2; j++) sum = sum - A[i][j] * b[j];
+
+          b[i] = sum / A[i][i];
+        }
+
+      if (col_change)
+        {
+          double temp = b[0];
+          b[0] = b[1];
+          b[1] = temp;
+        }
+
+      gradient[1] = b[0] * (v_max[2] - v_min[2]) + b[1] * (v_inter[2] - v_min[2]) - c + v_min[2];
+
+      // normalize the gradient
+      double gradient_norm = sqrt(pow(gradient[0], 2.0) + pow(gradient[1], 2.0));
+      gradient[0] /= gradient_norm;
+      gradient[1] /= gradient_norm;
+
+      double lambda = - gradient[0] * (v_min[0] - v_max[0]) - gradient[1] * (v_min[1] - v_max[1]);
+
+      Point<6> gradient_parameters;
+
+      gradient_parameters[0] = v_min[0];
+      gradient_parameters[1] = v_min[1];
+
+      gradient_parameters[2] = v_min[0] + lambda * gradient[0];
+      gradient_parameters[3] = v_min[1] + lambda * gradient[1];
+
+      gradient_parameters[4] = v_min[2];
+      gradient_parameters[5] = v_max[2];
+
+      return gradient_parameters;
+    }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void write_ucd (const std::vector<Patch<dim,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                  const UcdFlags                          &flags,
+                  std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+
+    const unsigned int n_data_sets = data_names.size();
+
+    UcdStream ucd_out(out, flags);
+
+    // first count the number of cells
+    // and cells for later use
+    unsigned int n_nodes;
+    unsigned int n_cells;
+    compute_sizes<dim,spacedim> (patches, n_nodes, n_cells);
+    ///////////////////////
+    // preamble
+    if (flags.write_preamble)
+      {
+        out << "# This file was generated by the deal.II library." << '\n'
+            << "# Date =  " << Utilities::System::get_date() << "\n"
+            << "# Time =  " << Utilities::System::get_time() << "\n"
+            << "#" << '\n'
+            << "# For a description of the UCD format see the AVS Developer's guide."
+            << '\n'
+            << "#" << '\n';
+      }
+
+    // start with ucd data
+    out << n_nodes << ' '
+        << n_cells << ' '
+        << n_data_sets << ' '
+        << 0 << ' '                  // no cell data at present
+        << 0                         // no model data
+        << '\n';
+
+    write_nodes(patches, ucd_out);
+    out << '\n';
+
+    write_cells(patches, ucd_out);
+    out << '\n';
+
+    /////////////////////////////
+    // now write data
+    if (n_data_sets != 0)
+      {
+        out << n_data_sets << "    ";    // number of vectors
+        for (unsigned int i=0; i<n_data_sets; ++i)
+          out << 1 << ' ';               // number of components;
+        // only 1 supported presently
+        out << '\n';
+
+        for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+          out << data_names[data_set]
+              << ",dimensionless"      // no units supported at present
+              << '\n';
+
+        write_data(patches, n_data_sets, true, ucd_out);
+      }
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    // assert the stream is still ok
+    AssertThrow (out, ExcIO());
+  }
+
+
+  template <int dim, int spacedim>
+  void write_dx (const std::vector<Patch<dim,spacedim> > &patches,
+                 const std::vector<std::string>          &data_names,
+                 const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                 const DXFlags                           &flags,
+                 std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+    // Stream with special features for dx output
+    DXStream dx_out(out, flags);
+
+    // Variable counting the offset of
+    // binary data.
+    unsigned int offset = 0;
+
+    const unsigned int n_data_sets = data_names.size();
+
+    // first count the number of cells
+    // and cells for later use
+    unsigned int n_nodes;
+    unsigned int n_cells;
+    compute_sizes<dim,spacedim>(patches, n_nodes, n_cells);
+    // start with vertices order is
+    // lexicographical, x varying
+    // fastest
+    out << "object \"vertices\" class array type float rank 1 shape " << spacedim
+        << " items " << n_nodes;
+
+    if (flags.coordinates_binary)
+      {
+        out << " lsb ieee data 0" << '\n';
+        offset += n_nodes * spacedim *  sizeof(float);
+      }
+    else
+      {
+        out << " data follows" << '\n';
+        write_nodes(patches, dx_out);
+      }
+
+    ///////////////////////////////
+    // first write the coordinates of all vertices
+
+    /////////////////////////////////////////
+    // write cells
+    out << "object \"cells\" class array type int rank 1 shape "
+        << GeometryInfo<dim>::vertices_per_cell
+        << " items " << n_cells;
+
+    if (flags.int_binary)
+      {
+        out << " lsb binary data " << offset << '\n';
+        offset += n_cells * sizeof (int);
+      }
+    else
+      {
+        out << " data follows" << '\n';
+        write_cells(patches, dx_out);
+        out << '\n';
+      }
+
+
+    out << "attribute \"element type\" string \"";
+    if (dim==1) out << "lines";
+    if (dim==2) out << "quads";
+    if (dim==3) out << "cubes";
+    out << "\"" << '\n'
+        << "attribute \"ref\" string \"positions\"" << '\n';
+
+//TODO:[GK] Patches must be of same size!
+    /////////////////////////////
+    // write neighbor information
+    if (flags.write_neighbors)
+      {
+        out << "object \"neighbors\" class array type int rank 1 shape "
+            << GeometryInfo<dim>::faces_per_cell
+            << " items " << n_cells
+            << " data follows";
+
+        for (typename std::vector<Patch<dim,spacedim> >::const_iterator
+             patch=patches.begin();
+             patch!=patches.end(); ++patch)
+          {
+            const unsigned int n = patch->n_subdivisions;
+            const unsigned int n1 = (dim>0) ? n : 1;
+            const unsigned int n2 = (dim>1) ? n : 1;
+            const unsigned int n3 = (dim>2) ? n : 1;
+            unsigned int cells_per_patch = Utilities::fixed_power<dim>(n);
+            unsigned int dx = 1;
+            unsigned int dy = n;
+            unsigned int dz = n*n;
+
+            const unsigned int patch_start = patch->patch_index * cells_per_patch;
+
+            for (unsigned int i3=0; i3<n3; ++i3)
+              for (unsigned int i2=0; i2<n2; ++i2)
+                for (unsigned int i1=0; i1<n1; ++i1)
+                  {
+                    const unsigned int nx = i1*dx;
+                    const unsigned int ny = i2*dy;
+                    const unsigned int nz = i3*dz;
+
+                    out << '\n';
+                    // Direction -x
+                    // Last cell in row
+                    // of other patch
+                    if (i1==0)
+                      {
+                        const unsigned int nn = patch->neighbors[0];
+                        out << '\t';
+                        if (nn != patch->no_neighbor)
+                          out << (nn*cells_per_patch+ny+nz+dx*(n-1));
+                        else
+                          out << "-1";
+                      }
+                    else
+                      {
+                        out << '\t'
+                            << patch_start+nx-dx+ny+nz;
+                      }
+                    // Direction +x
+                    // First cell in row
+                    // of other patch
+                    if (i1 == n-1)
+                      {
+                        const unsigned int nn = patch->neighbors[1];
+                        out << '\t';
+                        if (nn != patch->no_neighbor)
+                          out << (nn*cells_per_patch+ny+nz);
+                        else
+                          out << "-1";
+                      }
+                    else
+                      {
+                        out << '\t'
+                            << patch_start+nx+dx+ny+nz;
+                      }
+                    if (dim<2)
+                      continue;
+                    // Direction -y
+                    if (i2==0)
+                      {
+                        const unsigned int nn = patch->neighbors[2];
+                        out << '\t';
+                        if (nn != patch->no_neighbor)
+                          out << (nn*cells_per_patch+nx+nz+dy*(n-1));
+                        else
+                          out << "-1";
+                      }
+                    else
+                      {
+                        out << '\t'
+                            << patch_start+nx+ny-dy+nz;
+                      }
+                    // Direction +y
+                    if (i2 == n-1)
+                      {
+                        const unsigned int nn = patch->neighbors[3];
+                        out << '\t';
+                        if (nn != patch->no_neighbor)
+                          out << (nn*cells_per_patch+nx+nz);
+                        else
+                          out << "-1";
+                      }
+                    else
+                      {
+                        out << '\t'
+                            << patch_start+nx+ny+dy+nz;
+                      }
+                    if (dim<3)
+                      continue;
+
+                    // Direction -z
+                    if (i3==0)
+                      {
+                        const unsigned int nn = patch->neighbors[4];
+                        out << '\t';
+                        if (nn != patch->no_neighbor)
+                          out << (nn*cells_per_patch+nx+ny+dz*(n-1));
+                        else
+                          out << "-1";
+                      }
+                    else
+                      {
+                        out << '\t'
+                            << patch_start+nx+ny+nz-dz;
+                      }
+                    // Direction +z
+                    if (i3 == n-1)
+                      {
+                        const unsigned int nn = patch->neighbors[5];
+                        out << '\t';
+                        if (nn != patch->no_neighbor)
+                          out << (nn*cells_per_patch+nx+ny);
+                        else
+                          out << "-1";
+                      }
+                    else
+                      {
+                        out << '\t'
+                            << patch_start+nx+ny+nz+dz;
+                      }
+                  }
+            out << '\n';
+          }
+      }
+    /////////////////////////////
+    // now write data
+    if (n_data_sets != 0)
+      {
+        out << "object \"data\" class array type float rank 1 shape "
+            << n_data_sets
+            << " items " << n_nodes;
+
+        if (flags.data_binary)
+          {
+            out << " lsb ieee data " << offset << '\n';
+            offset += n_data_sets * n_nodes * ((flags.data_double)
+                                               ? sizeof(double)
+                                               : sizeof(float));
+          }
+        else
+          {
+            out << " data follows" << '\n';
+            write_data(patches, n_data_sets, flags.data_double, dx_out);
+          }
+
+        // loop over all patches
+        out << "attribute \"dep\" string \"positions\"" << '\n';
+      }
+    else
+      {
+        out << "object \"data\" class constantarray type float rank 0 items " << n_nodes << " data follows"
+            << '\n' << '0' << '\n';
+      }
+
+    // no model data
+
+    out << "object \"deal data\" class field" << '\n'
+        << "component \"positions\" value \"vertices\"" << '\n'
+        << "component \"connections\" value \"cells\"" << '\n'
+        << "component \"data\" value \"data\"" << '\n';
+
+    if (flags.write_neighbors)
+      out << "component \"neighbors\" value \"neighbors\"" << '\n';
+
+    {
+      out << "attribute \"created\" string \""
+          << Utilities::System::get_date()
+          << ' '
+          << Utilities::System::get_time() << '"' << '\n';
+    }
+
+    out << "end" << '\n';
+    // Write all binary data now
+    if (flags.coordinates_binary)
+      write_nodes(patches, dx_out);
+    if (flags.int_binary)
+      write_cells(patches, dx_out);
+    if (flags.data_binary)
+      write_data(patches, n_data_sets, flags.data_double, dx_out);
+
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    // assert the stream is still ok
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void write_gnuplot (const std::vector<Patch<dim,spacedim> > &patches,
+                      const std::vector<std::string>          &data_names,
+                      const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                      const GnuplotFlags                      &/*flags*/,
+                      std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+
+    const unsigned int n_data_sets = data_names.size();
+
+    // write preamble
+    {
+      out << "# This file was generated by the deal.II library." << '\n'
+          << "# Date =  " << Utilities::System::get_date() << '\n'
+          << "# Time =  " << Utilities::System::get_time() << '\n'
+          << "#" << '\n'
+          << "# For a description of the GNUPLOT format see the GNUPLOT manual."
+          << '\n'
+          << "#" << '\n'
+          << "# ";
+
+      switch (spacedim)
+        {
+        case 1:
+          out << "<x> ";
+          break;
+        case 2:
+          out << "<x> <y> ";
+          break;
+        case 3:
+          out << "<x> <y> <z> ";
+          break;
+
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+
+      for (unsigned int i=0; i<data_names.size(); ++i)
+        out << '<' << data_names[i] << "> ";
+      out << '\n';
+    }
+
+
+    // loop over all patches
+    for (typename std::vector<Patch<dim,spacedim> >::const_iterator patch=patches.begin();
+         patch != patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+        const unsigned int n = n_subdivisions+1;
+        // Length of loops in all dimensions
+        const unsigned int n1 = (dim>0) ? n : 1;
+        const unsigned int n2 = (dim>1) ? n : 1;
+        const unsigned int n3 = (dim>2) ? n : 1;
+        unsigned int d1 = 1;
+        unsigned int d2 = n;
+        unsigned int d3 = n*n;
+
+        Assert ((patch->data.n_rows() == n_data_sets && !patch->points_are_available) ||
+                (patch->data.n_rows() == n_data_sets+spacedim && patch->points_are_available),
+                ExcDimensionMismatch (patch->points_are_available
+                                      ?
+                                      (n_data_sets + spacedim)
+                                      :
+                                      n_data_sets,
+                                      patch->data.n_rows()));
+        Assert (patch->data.n_cols() == Utilities::fixed_power<dim>(n),
+                ExcInvalidDatasetSize (patch->data.n_cols(), n_subdivisions+1));
+
+        Point<spacedim> this_point;
+        Point<spacedim> node;
+        if (dim<3)
+          {
+            for (unsigned int i2=0; i2<n2; ++i2)
+              {
+                for (unsigned int i1=0; i1<n1; ++i1)
+                  {
+                    // compute coordinates for
+                    // this patch point
+                    compute_node(node, &*patch, i1, i2, 0, n_subdivisions);
+                    out << node << ' ';
+
+                    for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                      out << patch->data(data_set,i1*d1+i2*d2) << ' ';
+                    out << '\n';
+                  }
+                // end of row in patch
+                if (dim>1)
+                  out << '\n';
+              }
+            // end of patch
+            if (dim==1)
+              out << '\n';
+            out << '\n';
+          }
+        else if (dim==3)
+          {
+            // for all grid points: draw
+            // lines into all positive
+            // coordinate directions if
+            // there is another grid point
+            // there
+            for (unsigned int i3=0; i3<n3; ++i3)
+              for (unsigned int i2=0; i2<n2; ++i2)
+                for (unsigned int i1=0; i1<n1; ++i1)
+                  {
+                    // compute coordinates for
+                    // this patch point
+                    compute_node(this_point, &*patch, i1, i2, i3, n_subdivisions);
+                    // line into positive x-direction
+                    // if possible
+                    if (i1 < n_subdivisions)
+                      {
+                        // write point here
+                        // and its data
+                        out << this_point;
+                        for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                          out  << ' '
+                               << patch->data(data_set,i1*d1+i2*d2+i3*d3);
+                        out << '\n';
+
+                        // write point there
+                        // and its data
+                        compute_node(node, &*patch, i1+1, i2, i3, n_subdivisions);
+                        out << node;
+
+                        for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                          out  << ' '
+                               << patch->data(data_set,(i1+1)*d1+i2*d2+i3*d3);
+                        out << '\n';
+
+                        // end of line
+                        out << '\n'
+                            << '\n';
+                      }
+
+                    // line into positive y-direction
+                    // if possible
+                    if (i2 < n_subdivisions)
+                      {
+                        // write point here
+                        // and its data
+                        out << this_point;
+                        for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                          out  << ' '
+                               << patch->data(data_set, i1*d1+i2*d2+i3*d3);
+                        out << '\n';
+
+                        // write point there
+                        // and its data
+                        compute_node(node, &*patch, i1, i2+1, i3, n_subdivisions);
+                        out << node;
+
+                        for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                          out  << ' '
+                               << patch->data(data_set,i1*d1+(i2+1)*d2+i3*d3);
+                        out << '\n';
+
+                        // end of line
+                        out << '\n'
+                            << '\n';
+                      }
+
+                    // line into positive z-direction
+                    // if possible
+                    if (i3 < n_subdivisions)
+                      {
+                        // write point here
+                        // and its data
+                        out << this_point;
+                        for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                          out  << ' '
+                               << patch->data(data_set,i1*d1+i2*d2+i3*d3);
+                        out << '\n';
+
+                        // write point there
+                        // and its data
+                        compute_node(node, &*patch, i1, i2, i3+1, n_subdivisions);
+                        out << node;
+
+                        for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+                          out  << ' '
+                               << patch->data(data_set,i1*d1+i2*d2+(i3+1)*d3);
+                        out << '\n';
+                        // end of line
+                        out << '\n'
+                            << '\n';
+                      }
+
+                  }
+          }
+        else
+          Assert (false, ExcNotImplemented());
+      }
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void write_povray (const std::vector<Patch<dim,spacedim> > &patches,
+                     const std::vector<std::string>          &data_names,
+                     const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                     const PovrayFlags                       &flags,
+                     std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+    Assert (dim==2, ExcNotImplemented());        // only for 2-D surfaces on a 2-D plane
+    Assert (spacedim==2, ExcNotImplemented());
+
+    const unsigned int n_data_sets = data_names.size();
+    (void)n_data_sets;
+
+    // write preamble
+    {
+      out << "/* This file was generated by the deal.II library." << '\n'
+          << "   Date =  " << Utilities::System::get_date() << '\n'
+          << "   Time =  " << Utilities::System::get_time() << '\n'
+          << '\n'
+          << "   For a description of the POVRAY format see the POVRAY manual."
+          << '\n'
+          << "*/ " << '\n';
+
+      // include files
+      out << "#include \"colors.inc\" " << '\n'
+          << "#include \"textures.inc\" " << '\n';
+
+
+      // use external include file for textures,
+      // camera and light
+      if (flags.external_data)
+        out << "#include \"data.inc\" " << '\n';
+      else                          // all definitions in data file
+        {
+          // camera
+          out << '\n' << '\n'
+              << "camera {"            << '\n'
+              << "  location <1,4,-7>" << '\n'
+              << "  look_at <0,0,0>"   << '\n'
+              << "  angle 30"          << '\n'
+              << "}"                   << '\n';
+
+          // light
+          out << '\n'
+              << "light_source {"      << '\n'
+              << "  <1,4,-7>"      << '\n'
+              << "  color Grey"        << '\n'
+              << "}"                   << '\n';
+          out << '\n'
+              << "light_source {"      << '\n'
+              << "  <0,20,0>"      << '\n'
+              << "  color White"       << '\n'
+              << "}"                   << '\n';
+        }
+    }
+
+    // max. and min. heigth of solution
+    Assert(patches.size()>0, ExcInternalError());
+    double hmin=patches[0].data(0,0);
+    double hmax=patches[0].data(0,0);
+
+    for (typename std::vector<Patch<dim,spacedim> >::const_iterator patch=patches.begin();
+         patch != patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+
+        Assert ((patch->data.n_rows() == n_data_sets && !patch->points_are_available) ||
+                (patch->data.n_rows() == n_data_sets+spacedim && patch->points_are_available),
+                ExcDimensionMismatch (patch->points_are_available
+                                      ?
+                                      (n_data_sets + spacedim)
+                                      :
+                                      n_data_sets,
+                                      patch->data.n_rows()));
+        Assert (patch->data.n_cols() == Utilities::fixed_power<dim>(n_subdivisions+1),
+                ExcInvalidDatasetSize (patch->data.n_cols(), n_subdivisions+1));
+
+        for (unsigned int i=0; i<n_subdivisions+1; ++i)
+          for (unsigned int j=0; j<n_subdivisions+1; ++j)
+            {
+              const int dl = i*(n_subdivisions+1)+j;
+              if (patch->data(0,dl)<hmin)
+                hmin=patch->data(0,dl);
+              if (patch->data(0,dl)>hmax)
+                hmax=patch->data(0,dl);
+            }
+      }
+
+    out << "#declare HMIN=" << hmin << ";" << '\n'
+        << "#declare HMAX=" << hmax << ";" << '\n' << '\n';
+
+    if (!flags.external_data)
+      {
+        // texture with scaled niveau lines
+        // 10 lines in the surface
+        out << "#declare Tex=texture{" << '\n'
+            << "  pigment {" << '\n'
+            << "    gradient y" << '\n'
+            << "    scale y*(HMAX-HMIN)*" << 0.1 << '\n'
+            << "    color_map {" << '\n'
+            << "      [0.00 color Light_Purple] " << '\n'
+            << "      [0.95 color Light_Purple] " << '\n'
+            << "      [1.00 color White]    " << '\n'
+            << "} } }" << '\n' << '\n';
+      }
+
+    if (!flags.bicubic_patch)
+      {
+        // start of mesh header
+        out << '\n'
+            << "mesh {" << '\n';
+      }
+
+    // loop over all patches
+    for (typename std::vector<Patch<dim,spacedim> >::const_iterator patch=patches.begin();
+         patch != patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+        const unsigned int n = n_subdivisions+1;
+        const unsigned int d1=1;
+        const unsigned int d2=n;
+
+        Assert ((patch->data.n_rows() == n_data_sets && !patch->points_are_available) ||
+                (patch->data.n_rows() == n_data_sets+spacedim && patch->points_are_available),
+                ExcDimensionMismatch (patch->points_are_available
+                                      ?
+                                      (n_data_sets + spacedim)
+                                      :
+                                      n_data_sets,
+                                      patch->data.n_rows()));
+        Assert (patch->data.n_cols() == Utilities::fixed_power<dim>(n),
+                ExcInvalidDatasetSize (patch->data.n_cols(), n_subdivisions+1));
+
+
+        std::vector<Point<spacedim> > ver(n*n);
+
+        for (unsigned int i2=0; i2<n; ++i2)
+          for (unsigned int i1=0; i1<n; ++i1)
+            {
+              // compute coordinates for
+              // this patch point, storing in ver
+              compute_node(ver[i1*d1+i2*d2], &*patch, i1, i2, 0, n_subdivisions);
+            }
+
+
+        if (!flags.bicubic_patch)
+          {
+            // approximate normal
+            // vectors in patch
+            std::vector<Point<3> > nrml;
+            // only if smooth triangles are used
+            if (flags.smooth)
+              {
+                nrml.resize(n*n);
+                // These are
+                // difference
+                // quotients of
+                // the surface
+                // mapping. We
+                // take them
+                // symmetric
+                // inside the
+                // patch and
+                // one-sided at
+                // the edges
+                Point<3> h1,h2;
+                // Now compute normals in every point
+                for (unsigned int i=0; i<n; ++i)
+                  for (unsigned int j=0; j<n; ++j)
+                    {
+                      const unsigned int il = (i==0) ? i : (i-1);
+                      const unsigned int ir = (i==n_subdivisions) ? i : (i+1);
+                      const unsigned int jl = (j==0) ? j : (j-1);
+                      const unsigned int jr = (j==n_subdivisions) ? j : (j+1);
+
+                      h1(0)=ver[ir*d1+j*d2](0) - ver[il*d1+j*d2](0);
+                      h1(1)=patch->data(0,ir*d1+j*d2)-
+                            patch->data(0,il*d1+j*d2);
+                      h1(2)=ver[ir*d1+j*d2](1) - ver[il*d1+j*d2](1);
+
+                      h2(0)=ver[i*d1+jr*d2](0) - ver[i*d1+jl*d2](0);
+                      h2(1)=patch->data(0,i*d1+jr*d2)-
+                            patch->data(0,i*d1+jl*d2);
+                      h2(2)=ver[i*d1+jr*d2](1) - ver[i*d1+jl*d2](1);
+
+                      nrml[i*d1+j*d2](0)=h1(1)*h2(2)-h1(2)*h2(1);
+                      nrml[i*d1+j*d2](1)=h1(2)*h2(0)-h1(0)*h2(2);
+                      nrml[i*d1+j*d2](2)=h1(0)*h2(1)-h1(1)*h2(0);
+
+                      // normalize Vector
+                      double norm=std::sqrt(
+                                    std::pow(nrml[i*d1+j*d2](0),2.)+
+                                    std::pow(nrml[i*d1+j*d2](1),2.)+
+                                    std::pow(nrml[i*d1+j*d2](2),2.));
+
+                      if (nrml[i*d1+j*d2](1)<0)
+                        norm*=-1.;
+
+                      for (unsigned int k=0; k<3; ++k)
+                        nrml[i*d1+j*d2](k)/=norm;
+                    }
+              }
+
+            // setting up triangles
+            for (unsigned int i=0; i<n_subdivisions; ++i)
+              for (unsigned int j=0; j<n_subdivisions; ++j)
+                {
+                  // down/left vertex of triangle
+                  const int dl = i*d1+j*d2;
+                  if (flags.smooth)
+                    {
+                      // writing smooth_triangles
+
+                      // down/right triangle
+                      out << "smooth_triangle {" << '\n' << "\t<"
+                          << ver[dl](0) << ","
+                          << patch->data(0,dl) << ","
+                          << ver[dl](1) << ">, <"
+                          << nrml[dl](0) << ", "
+                          << nrml[dl](1) << ", "
+                          << nrml[dl](2)
+                          << ">," << '\n';
+                      out << " \t<"
+                          << ver[dl+d1](0) << ","
+                          << patch->data(0,dl+d1)  << ","
+                          << ver[dl+d1](1) << ">, <"
+                          << nrml[dl+d1](0) << ", "
+                          << nrml[dl+d1](1) << ", "
+                          << nrml[dl+d1](2)
+                          << ">," << '\n';
+                      out << "\t<"
+                          << ver[dl+d1+d2](0) << ","
+                          << patch->data(0,dl+d1+d2)  << ","
+                          << ver[dl+d1+d2](1) << ">, <"
+                          << nrml[dl+d1+d2](0) << ", "
+                          << nrml[dl+d1+d2](1) << ", "
+                          << nrml[dl+d1+d2](2)
+                          << ">}" << '\n';
+
+                      // upper/left triangle
+                      out << "smooth_triangle {" << '\n' << "\t<"
+                          << ver[dl](0) << ","
+                          << patch->data(0,dl) << ","
+                          << ver[dl](1) << ">, <"
+                          << nrml[dl](0) << ", "
+                          << nrml[dl](1) << ", "
+                          << nrml[dl](2)
+                          << ">," << '\n';
+                      out << "\t<"
+                          << ver[dl+d1+d2](0) << ","
+                          << patch->data(0,dl+d1+d2)  << ","
+                          << ver[dl+d1+d2](1) << ">, <"
+                          << nrml[dl+d1+d2](0) << ", "
+                          << nrml[dl+d1+d2](1) << ", "
+                          << nrml[dl+d1+d2](2)
+                          << ">," << '\n';
+                      out << "\t<"
+                          << ver[dl+d2](0) << ","
+                          << patch->data(0,dl+d2)  << ","
+                          << ver[dl+d2](1) << ">, <"
+                          << nrml[dl+d2](0) << ", "
+                          << nrml[dl+d2](1) << ", "
+                          << nrml[dl+d2](2)
+                          << ">}" << '\n';
+                    }
+                  else
+                    {
+                      // writing standard triangles
+                      // down/right triangle
+                      out << "triangle {" << '\n' << "\t<"
+                          << ver[dl](0) << ","
+                          << patch->data(0,dl) << ","
+                          << ver[dl](1) << ">," << '\n';
+                      out << "\t<"
+                          << ver[dl+d1](0) << ","
+                          << patch->data(0,dl+d1)  << ","
+                          << ver[dl+d1](1) << ">," << '\n';
+                      out << "\t<"
+                          << ver[dl+d1+d2](0) << ","
+                          << patch->data(0,dl+d1+d2)  << ","
+                          << ver[dl+d1+d2](1) << ">}" << '\n';
+
+                      // upper/left triangle
+                      out << "triangle {" << '\n' << "\t<"
+                          << ver[dl](0) << ","
+                          << patch->data(0,dl) << ","
+                          << ver[dl](1) << ">," << '\n';
+                      out << "\t<"
+                          << ver[dl+d1+d2](0) << ","
+                          << patch->data(0,dl+d1+d2)  << ","
+                          << ver[dl+d1+d2](1) << ">," << '\n';
+                      out << "\t<"
+                          << ver[dl+d2](0) << ","
+                          << patch->data(0,dl+d2)  << ","
+                          << ver[dl+d2](1) << ">}" << '\n';
+                    }
+                }
+          }
+        else
+          {
+            // writing bicubic_patch
+            Assert (n_subdivisions==3, ExcDimensionMismatch(n_subdivisions,3));
+            out << '\n'
+                << "bicubic_patch {" << '\n'
+                << "  type 0" << '\n'
+                << "  flatness 0" << '\n'
+                << "  u_steps 0" << '\n'
+                << "  v_steps 0" << '\n';
+            for (int i=0; i<16; ++i)
+              {
+                out << "\t<" << ver[i](0) << "," << patch->data(0,i) << "," << ver[i](1) << ">";
+                if (i!=15) out << ",";
+                out << '\n';
+              }
+            out << "  texture {Tex}" <<  '\n'
+                << "}" << '\n';
+          }
+      }
+
+    if (!flags.bicubic_patch)
+      {
+        // the end of the mesh
+        out << "  texture {Tex}" << '\n'
+            << "}" << '\n'
+            << '\n';
+      }
+
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void write_eps (const std::vector<Patch<dim,spacedim> > &/*patches*/,
+                  const std::vector<std::string>          &/*data_names*/,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                  const EpsFlags                          &/*flags*/,
+                  std::ostream                            &/*out*/)
+  {
+    // not implemented, see the documentation of the function
+    AssertThrow (dim==2, ExcNotImplemented());
+  }
+
+
+  template <int spacedim>
+  void write_eps (const std::vector<Patch<2,spacedim> > &patches,
+                  const std::vector<std::string>          &/*data_names*/,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                  const EpsFlags                          &flags,
+                  std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+
+    const unsigned int old_precision = out.precision();
+
+    // set up an array of cells to be
+    // written later. this array holds the
+    // cells of all the patches as
+    // projected to the plane perpendicular
+    // to the line of sight.
+    //
+    // note that they are kept sorted by
+    // the set, where we chose the value
+    // of the center point of the cell
+    // along the line of sight as value
+    // for sorting
+    std::multiset<EpsCell2d> cells;
+
+    // two variables in which we
+    // will store the minimum and
+    // maximum values of the field
+    // to be used for colorization
+    //
+    // preset them by 0 to calm down the
+    // compiler; they are initialized later
+    double min_color_value=0, max_color_value=0;
+
+    // Array for z-coordinates of points.
+    // The elevation determined by a function if spacedim=2
+    // or the z-cooridate of the grid point if spacedim=3
+    double heights[4] = { 0, 0, 0, 0 };
+
+    // compute the cells for output and
+    // enter them into the set above
+    // note that since dim==2, we
+    // have exactly four vertices per
+    // patch and per cell
+    for (typename std::vector<Patch<2,spacedim> >::const_iterator patch=patches.begin();
+         patch!=patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+        const unsigned int n = n_subdivisions+1;
+        const unsigned int d1 = 1;
+        const unsigned int d2 = n;
+
+        for (unsigned int i2=0; i2<n_subdivisions; ++i2)
+          for (unsigned int i1=0; i1<n_subdivisions; ++i1)
+            {
+              Point<spacedim> points[4];
+              compute_node(points[0], &*patch, i1, i2, 0, n_subdivisions);
+              compute_node(points[1], &*patch, i1+1, i2, 0, n_subdivisions);
+              compute_node(points[2], &*patch, i1, i2+1, 0, n_subdivisions);
+              compute_node(points[3], &*patch, i1+1, i2+1, 0, n_subdivisions);
+
+              switch (spacedim)
+                {
+                case 2:
+                  Assert ((flags.height_vector < patch->data.n_rows()) ||
+                          patch->data.n_rows() == 0,
+                          ExcIndexRange (flags.height_vector, 0,
+                                         patch->data.n_rows()));
+                  heights[0] = patch->data.n_rows() != 0 ?
+                               patch->data(flags.height_vector,i1*d1 + i2*d2) * flags.z_scaling
+                               : 0;
+                  heights[1] = patch->data.n_rows() != 0 ?
+                               patch->data(flags.height_vector,(i1+1)*d1 + i2*d2) * flags.z_scaling
+                               : 0;
+                  heights[2] = patch->data.n_rows() != 0 ?
+                               patch->data(flags.height_vector,i1*d1 + (i2+1)*d2) * flags.z_scaling
+                               : 0;
+                  heights[3] = patch->data.n_rows() != 0 ?
+                               patch->data(flags.height_vector,(i1+1)*d1 + (i2+1)*d2) * flags.z_scaling
+                               : 0;
+
+                  break;
+                case 3:
+                  // Copy z-coordinates into the height vector
+                  for (unsigned int i=0; i<4; ++i)
+                    heights[i] = points[i](2);
+                  break;
+                default:
+                  Assert(false, ExcNotImplemented());
+                }
+
+
+              // now compute the projection of
+              // the bilinear cell given by the
+              // four vertices and their heights
+              // and write them to a proper
+              // cell object. note that we only
+              // need the first two components
+              // of the projected position for
+              // output, but we need the value
+              // along the line of sight for
+              // sorting the cells for back-to-
+              // front-output
+              //
+              // this computation was first written
+              // by Stefan Nauber. please no-one
+              // ask me why it works that way (or
+              // may be not), especially not about
+              // the angles and the sign of
+              // the height field, I don't know
+              // it.
+              EpsCell2d eps_cell;
+              const double pi = numbers::PI;
+              const double cx = -std::cos(pi-flags.azimut_angle * 2*pi / 360.),
+                           cz = -std::cos(flags.turn_angle * 2*pi / 360.),
+                           sx = std::sin(pi-flags.azimut_angle * 2*pi / 360.),
+                           sz = std::sin(flags.turn_angle * 2*pi / 360.);
+              for (unsigned int vertex=0; vertex<4; ++vertex)
+                {
+                  const double x = points[vertex](0),
+                               y = points[vertex](1),
+                               z = -heights[vertex];
+
+                  eps_cell.vertices[vertex](0) = -   cz*x+   sz*y;
+                  eps_cell.vertices[vertex](1) = -cx*sz*x-cx*cz*y-sx*z;
+
+                  //      ( 1 0    0 )
+                  // D1 = ( 0 cx -sx )
+                  //      ( 0 sx  cx )
+
+                  //      ( cy 0 sy )
+                  // Dy = (  0 1  0 )
+                  //      (-sy 0 cy )
+
+                  //      ( cz -sz 0 )
+                  // Dz = ( sz  cz 0 )
+                  //      (  0   0 1 )
+
+//       ( cz -sz 0 )( 1 0    0 )(x)   ( cz*x-sz*(cx*y-sx*z)+0*(sx*y+cx*z) )
+// Dxz = ( sz  cz 0 )( 0 cx -sx )(y) = ( sz*x+cz*(cx*y-sx*z)+0*(sx*y+cx*z) )
+//       (  0   0 1 )( 0 sx  cx )(z)   (  0*x+  *(cx*y-sx*z)+1*(sx*y+cx*z) )
+                }
+
+              // compute coordinates of
+              // center of cell
+              const Point<spacedim> center_point
+                = (points[0] + points[1] + points[2] + points[3]) / 4;
+              const double center_height
+                = -(heights[0] + heights[1] + heights[2] + heights[3]) / 4;
+
+              // compute the depth into
+              // the picture
+              eps_cell.depth = -sx*sz*center_point(0)
+                               -sx*cz*center_point(1)
+                               +cx*center_height;
+
+              if (flags.draw_cells && flags.shade_cells)
+                {
+                  Assert ((flags.color_vector < patch->data.n_rows()) ||
+                          patch->data.n_rows() == 0,
+                          ExcIndexRange (flags.color_vector, 0,
+                                         patch->data.n_rows()));
+                  const double color_values[4]
+                    = { patch->data.n_rows() != 0 ?
+                        patch->data(flags.color_vector,i1 *d1 + i2 *d2)       : 1,
+
+                        patch->data.n_rows() != 0 ?
+                        patch->data(flags.color_vector,(i1+1)*d1 + i2 *d2)   : 1,
+
+                        patch->data.n_rows() != 0 ?
+                        patch->data(flags.color_vector,i1 *d1 + (i2+1)*d2)     : 1,
+
+                        patch->data.n_rows() != 0 ?
+                        patch->data(flags.color_vector,(i1+1)*d1 + (i2+1)*d2) : 1
+                      };
+
+                  // set color value to average of the value
+                  // at the vertices
+                  eps_cell.color_value = (color_values[0] +
+                                          color_values[1] +
+                                          color_values[3] +
+                                          color_values[2]) / 4;
+
+                  // update bounds of color
+                  // field
+                  if (patch == patches.begin())
+                    min_color_value = max_color_value = eps_cell.color_value;
+                  else
+                    {
+                      min_color_value = (min_color_value < eps_cell.color_value ?
+                                         min_color_value : eps_cell.color_value);
+                      max_color_value = (max_color_value > eps_cell.color_value ?
+                                         max_color_value : eps_cell.color_value);
+                    }
+                }
+
+              // finally add this cell
+              cells.insert (eps_cell);
+            }
+      }
+
+    // find out minimum and maximum x and
+    // y coordinates to compute offsets
+    // and scaling factors
+    double x_min = cells.begin()->vertices[0](0);
+    double x_max = x_min;
+    double y_min = cells.begin()->vertices[0](1);
+    double y_max = y_min;
+
+    for (typename std::multiset<EpsCell2d>::const_iterator
+         cell=cells.begin();
+         cell!=cells.end(); ++cell)
+      for (unsigned int vertex=0; vertex<4; ++vertex)
+        {
+          x_min = std::min (x_min, cell->vertices[vertex](0));
+          x_max = std::max (x_max, cell->vertices[vertex](0));
+          y_min = std::min (y_min, cell->vertices[vertex](1));
+          y_max = std::max (y_max, cell->vertices[vertex](1));
+        }
+
+    // scale in x-direction such that
+    // in the output 0 <= x <= 300.
+    // don't scale in y-direction to
+    // preserve the shape of the
+    // triangulation
+    const double scale = (flags.size /
+                          (flags.size_type==EpsFlags::width ?
+                           x_max - x_min :
+                           y_min - y_max));
+
+    const Point<2> offset(x_min, y_min);
+
+
+    // now write preamble
+    {
+      out << "%!PS-Adobe-2.0 EPSF-1.2" << '\n'
+          << "%%Title: deal.II Output" << '\n'
+          << "%%Creator: the deal.II library" << '\n'
+          << "%%Creation Date: "
+          << Utilities::System::get_date()
+          << " - "
+          << Utilities::System::get_time() << '\n'
+          << "%%BoundingBox: "
+          // lower left corner
+          << "0 0 "
+          // upper right corner
+          << static_cast<unsigned int>( (x_max-x_min) * scale + 0.5)
+          << ' '
+          << static_cast<unsigned int>( (y_max-y_min) * scale + 0.5)
+          << '\n';
+
+      // define some abbreviations to keep
+      // the output small:
+      // m=move turtle to
+      // l=define a line
+      // s=set rgb color
+      // sg=set gray value
+      // lx=close the line and plot the line
+      // lf=close the line and fill the interior
+      out << "/m {moveto} bind def"      << '\n'
+          << "/l {lineto} bind def"      << '\n'
+          << "/s {setrgbcolor} bind def" << '\n'
+          << "/sg {setgray} bind def"    << '\n'
+          << "/lx {lineto closepath stroke} bind def" << '\n'
+          << "/lf {lineto closepath fill} bind def"   << '\n';
+
+      out << "%%EndProlog" << '\n'
+          << '\n';
+      // set fine lines
+      out << flags.line_width << " setlinewidth" << '\n';
+      // allow only five digits
+      // for output (instead of the
+      // default six); this should suffice
+      // even for fine grids, but reduces
+      // the file size significantly
+      out << std::setprecision (5);
+    }
+
+    // check if min and max
+    // values for the color are
+    // actually different. If
+    // that is not the case (such
+    // things happen, for
+    // example, in the very first
+    // time step of a time
+    // dependent problem, if the
+    // initial values are zero),
+    // all values are equal, and
+    // then we can draw
+    // everything in an arbitrary
+    // color. Thus, change one of
+    // the two values arbitrarily
+    if (max_color_value == min_color_value)
+      max_color_value = min_color_value+1;
+
+    // now we've got all the information
+    // we need. write the cells.
+    // note: due to the ordering, we
+    // traverse the list of cells
+    // back-to-front
+    for (typename std::multiset<EpsCell2d>::const_iterator
+         cell=cells.begin();
+         cell!=cells.end(); ++cell)
+      {
+        if (flags.draw_cells)
+          {
+            if (flags.shade_cells)
+              {
+                const EpsFlags::RgbValues rgb_values
+                  = (*flags.color_function) (cell->color_value,
+                                             min_color_value,
+                                             max_color_value);
+
+                // write out color
+                if (rgb_values.is_grey())
+                  out << rgb_values.red << " sg ";
+                else
+                  out << rgb_values.red   << ' '
+                      << rgb_values.green << ' '
+                      << rgb_values.blue  << " s ";
+              }
+            else
+              out << "1 sg ";
+
+            out << (cell->vertices[0]-offset) * scale << " m "
+                << (cell->vertices[1]-offset) * scale << " l "
+                << (cell->vertices[3]-offset) * scale << " l "
+                << (cell->vertices[2]-offset) * scale << " lf"
+                << '\n';
+          }
+
+        if (flags.draw_mesh)
+          out << "0 sg "      // draw lines in black
+              << (cell->vertices[0]-offset) * scale << " m "
+              << (cell->vertices[1]-offset) * scale << " l "
+              << (cell->vertices[3]-offset) * scale << " l "
+              << (cell->vertices[2]-offset) * scale << " lx"
+              << '\n';
+      }
+    out << "showpage" << '\n';
+    // make sure everything now gets to
+    // disk
+    out << std::setprecision(old_precision);
+    out.flush ();
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void write_gmv (const std::vector<Patch<dim,spacedim> > &patches,
+                  const std::vector<std::string>          &data_names,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                  const GmvFlags                          &flags,
+                  std::ostream                            &out)
+  {
+    Assert(dim<=3, ExcNotImplemented());
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+
+    GmvStream gmv_out(out, flags);
+    const unsigned int n_data_sets = data_names.size();
+    // check against # of data sets in
+    // first patch. checks against all
+    // other patches are made in
+    // write_gmv_reorder_data_vectors
+    Assert ((patches[0].data.n_rows() == n_data_sets && !patches[0].points_are_available) ||
+            (patches[0].data.n_rows() == n_data_sets+spacedim && patches[0].points_are_available),
+            ExcDimensionMismatch (patches[0].points_are_available
+                                  ?
+                                  (n_data_sets + spacedim)
+                                  :
+                                  n_data_sets,
+                                  patches[0].data.n_rows()));
+
+    ///////////////////////
+    // preamble
+    out << "gmvinput ascii"
+        << '\n'
+        << '\n';
+
+    // first count the number of cells
+    // and cells for later use
+    unsigned int n_nodes;
+    unsigned int n_cells;
+    compute_sizes<dim,spacedim>(patches, n_nodes, n_cells);
+
+    // in gmv format the vertex
+    // coordinates and the data have an
+    // order that is a bit unpleasant
+    // (first all x coordinates, then
+    // all y coordinate, ...; first all
+    // data of variable 1, then
+    // variable 2, etc), so we have to
+    // copy the data vectors a bit around
+    //
+    // note that we copy vectors when
+    // looping over the patches since we
+    // have to write them one variable
+    // at a time and don't want to use
+    // more than one loop
+    //
+    // this copying of data vectors can
+    // be done while we already output
+    // the vertices, so do this on a
+    // separate task and when wanting
+    // to write out the data, we wait
+    // for that task to finish
+    Table<2,double> data_vectors (n_data_sets, n_nodes);
+    void (*fun_ptr) (const std::vector<Patch<dim,spacedim> > &,
+                     Table<2,double> &)
+      = &write_gmv_reorder_data_vectors<dim,spacedim>;
+    Threads::Task<> reorder_task = Threads::new_task (fun_ptr, patches, data_vectors);
+
+    ///////////////////////////////
+    // first make up a list of used
+    // vertices along with their
+    // coordinates
+    //
+    // note that we have to print
+    // 3 dimensions
+    out << "nodes " << n_nodes << '\n';
+    for (unsigned int d=0; d<spacedim; ++d)
+      {
+        gmv_out.selected_component = d;
+        write_nodes(patches, gmv_out);
+        out << '\n';
+      }
+    gmv_out.selected_component = numbers::invalid_unsigned_int;
+
+    for (unsigned int d=spacedim; d<3; ++d)
+      {
+        for (unsigned int i=0; i<n_nodes; ++i)
+          out << "0 ";
+        out << '\n';
+      }
+
+    /////////////////////////////////
+    // now for the cells. note that
+    // vertices are counted from 1 onwards
+    out << "cells " << n_cells << '\n';
+    write_cells(patches, gmv_out);
+
+    ///////////////////////////////////////
+    // data output.
+    out << "variable" << '\n';
+
+    // now write the data vectors to
+    // @p{out} first make sure that all
+    // data is in place
+    reorder_task.join ();
+
+    // then write data.
+    // the '1' means: node data (as opposed
+    // to cell data, which we do not
+    // support explicitly here)
+    for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+      {
+        out << data_names[data_set] << " 1" << '\n';
+        std::copy (data_vectors[data_set].begin(),
+                   data_vectors[data_set].end(),
+                   std::ostream_iterator<double>(out, " "));
+        out << '\n'
+            << '\n';
+      }
+
+
+
+    // end of variable section
+    out << "endvars" << '\n';
+
+    // end of output
+    out << "endgmv"
+        << '\n';
+
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    // assert the stream is still ok
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void write_tecplot (const std::vector<Patch<dim,spacedim> > &patches,
+                      const std::vector<std::string>          &data_names,
+                      const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                      const TecplotFlags                      &flags,
+                      std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+
+    TecplotStream tecplot_out(out, flags);
+
+    const unsigned int n_data_sets = data_names.size();
+    // check against # of data sets in
+    // first patch. checks against all
+    // other patches are made in
+    // write_gmv_reorder_data_vectors
+    Assert ((patches[0].data.n_rows() == n_data_sets && !patches[0].points_are_available) ||
+            (patches[0].data.n_rows() == n_data_sets+spacedim && patches[0].points_are_available),
+            ExcDimensionMismatch (patches[0].points_are_available
+                                  ?
+                                  (n_data_sets + spacedim)
+                                  :
+                                  n_data_sets,
+                                  patches[0].data.n_rows()));
+
+    // first count the number of cells
+    // and cells for later use
+    unsigned int n_nodes;
+    unsigned int n_cells;
+    compute_sizes<dim,spacedim>(patches, n_nodes, n_cells);
+
+    ///////////
+    // preamble
+    {
+      out << "# This file was generated by the deal.II library." << '\n'
+          << "# Date =  " << Utilities::System::get_date() << '\n'
+          << "# Time =  " << Utilities::System::get_time() << '\n'
+          << "#" << '\n'
+          << "# For a description of the Tecplot format see the Tecplot documentation."
+          << '\n'
+          << "#" << '\n';
+
+
+      out << "Variables=";
+
+      switch (spacedim)
+        {
+        case 1:
+          out << "\"x\"";
+          break;
+        case 2:
+          out << "\"x\", \"y\"";
+          break;
+        case 3:
+          out << "\"x\", \"y\", \"z\"";
+          break;
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+
+      for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+        out << ", \"" << data_names[data_set] << "\"";
+
+      out << '\n';
+
+      out << "zone ";
+      if (flags.zone_name)
+        out << "t=\"" << flags.zone_name << "\" ";
+
+      if (flags.solution_time >= 0.0)
+        out << "strandid=1, solutiontime=" << flags.solution_time <<", ";
+
+      out << "f=feblock, n=" << n_nodes << ", e=" << n_cells
+          << ", et=" << tecplot_cell_type[dim] << '\n';
+    }
+
+
+    // in Tecplot FEBLOCK format the vertex
+    // coordinates and the data have an
+    // order that is a bit unpleasant
+    // (first all x coordinates, then
+    // all y coordinate, ...; first all
+    // data of variable 1, then
+    // variable 2, etc), so we have to
+    // copy the data vectors a bit around
+    //
+    // note that we copy vectors when
+    // looping over the patches since we
+    // have to write them one variable
+    // at a time and don't want to use
+    // more than one loop
+    //
+    // this copying of data vectors can
+    // be done while we already output
+    // the vertices, so do this on a
+    // separate task and when wanting
+    // to write out the data, we wait
+    // for that task to finish
+
+    Table<2,double> data_vectors (n_data_sets, n_nodes);
+
+    void (*fun_ptr) (const std::vector<Patch<dim,spacedim> > &,
+                     Table<2,double> &)
+      = &write_gmv_reorder_data_vectors<dim,spacedim>;
+    Threads::Task<> reorder_task = Threads::new_task (fun_ptr, patches, data_vectors);
+
+    ///////////////////////////////
+    // first make up a list of used
+    // vertices along with their
+    // coordinates
+
+
+    for (unsigned int d=0; d<spacedim; ++d)
+      {
+        tecplot_out.selected_component = d;
+        write_nodes(patches, tecplot_out);
+        out << '\n';
+      }
+
+
+    ///////////////////////////////////////
+    // data output.
+    //
+    // now write the data vectors to
+    // @p{out} first make sure that all
+    // data is in place
+    reorder_task.join ();
+
+    // then write data.
+    for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+      {
+        std::copy (data_vectors[data_set].begin(),
+                   data_vectors[data_set].end(),
+                   std::ostream_iterator<double>(out, "\n"));
+        out << '\n';
+      }
+
+    write_cells(patches, tecplot_out);
+
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    // assert the stream is still ok
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+//---------------------------------------------------------------------------
+// Macros for handling Tecplot API data
+
+#ifdef DEAL_II_HAVE_TECPLOT
+
+  namespace
+  {
+    class TecplotMacros
+    {
+    public:
+      TecplotMacros(const unsigned int n_nodes = 0,
+                    const unsigned int n_vars = 0,
+                    const unsigned int n_cells = 0,
+                    const unsigned int n_vert = 0);
+      ~TecplotMacros();
+      float &nd(const unsigned int i, const unsigned int j);
+      int    &cd(const unsigned int i, const unsigned int j);
+      std::vector<float> nodalData;
+      std::vector<int>   connData;
+    private:
+      unsigned int n_nodes;
+      unsigned int n_vars;
+      unsigned int n_cells;
+      unsigned int n_vert;
+    };
+
+
+    inline
+    TecplotMacros::TecplotMacros(const unsigned int n_nodes,
+                                 const unsigned int n_vars,
+                                 const unsigned int n_cells,
+                                 const unsigned int n_vert)
+      :
+      n_nodes(n_nodes),
+      n_vars(n_vars),
+      n_cells(n_cells),
+      n_vert(n_vert)
+    {
+      nodalData.resize(n_nodes*n_vars);
+      connData.resize(n_cells*n_vert);
+    }
+
+
+
+    inline
+    TecplotMacros::~TecplotMacros()
+    {}
+
+
+
+    inline
+    float &TecplotMacros::nd (const unsigned int i,
+                              const unsigned int j)
+    {
+      return nodalData[i*n_nodes+j];
+    }
+
+
+
+    inline
+    int &TecplotMacros::cd (const unsigned int i,
+                            const unsigned int j)
+    {
+      return connData[i+j*n_vert];
+    }
+
+  }
+
+
+#endif
+//---------------------------------------------------------------------------
+
+
+
+  template <int dim, int spacedim>
+  void write_tecplot_binary (const std::vector<Patch<dim,spacedim> > &patches,
+                             const std::vector<std::string>          &data_names,
+                             const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                             const TecplotFlags                      &flags,
+                             std::ostream                            &out)
+  {
+
+#ifndef DEAL_II_HAVE_TECPLOT
+
+    // simply call the ASCII output
+    // function if the Tecplot API
+    // isn't present
+    write_tecplot (patches, data_names, vector_data_ranges, flags, out);
+    return;
+
+#else
+
+    // Tecplot binary output only good
+    // for 2D & 3D
+    if (dim == 1)
+      {
+        write_tecplot (patches, data_names, vector_data_ranges, flags, out);
+        return;
+      }
+
+    // if the user hasn't specified a
+    // file name we should call the
+    // ASCII function and use the
+    // ostream @p{out} instead of doing
+    // something silly later
+    char *file_name = (char *) flags.tecplot_binary_file_name;
+
+    if (file_name == NULL)
+      {
+        // At least in debug mode we
+        // should tell users why they
+        // don't get tecplot binary
+        // output
+        Assert(false, ExcMessage("Specify the name of the tecplot_binary"
+                                 " file through the TecplotFlags interface."));
+        write_tecplot (patches, data_names, vector_data_ranges, flags, out);
+        return;
+      }
+
+
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+
+    const unsigned int n_data_sets = data_names.size();
+    // check against # of data sets in
+    // first patch. checks against all
+    // other patches are made in
+    // write_gmv_reorder_data_vectors
+    Assert ((patches[0].data.n_rows() == n_data_sets && !patches[0].points_are_available) ||
+            (patches[0].data.n_rows() == n_data_sets+spacedim && patches[0].points_are_available),
+            ExcDimensionMismatch (patches[0].points_are_available
+                                  ?
+                                  (n_data_sets + spacedim)
+                                  :
+                                  n_data_sets,
+                                  patches[0].data.n_rows()));
+
+    // first count the number of cells
+    // and cells for later use
+    unsigned int n_nodes;
+    unsigned int n_cells;
+    compute_sizes<dim,spacedim>(patches, n_nodes, n_cells);
+    // local variables only needed to write Tecplot
+    // binary output files
+    const unsigned int vars_per_node  = (spacedim+n_data_sets),
+                       nodes_per_cell = GeometryInfo<dim>::vertices_per_cell;
+
+    TecplotMacros tm(n_nodes, vars_per_node, n_cells, nodes_per_cell);
+
+    int is_double = 0,
+        tec_debug = 0,
+        cell_type = tecplot_binary_cell_type[dim];
+
+    std::string tec_var_names;
+    switch (spacedim)
+      {
+      case 2:
+        tec_var_names  = "x y";
+        break;
+      case 3:
+        tec_var_names  = "x y z";
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+
+    for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+      {
+        tec_var_names += " ";
+        tec_var_names += data_names[data_set];
+      }
+    // in Tecplot FEBLOCK format the vertex
+    // coordinates and the data have an
+    // order that is a bit unpleasant
+    // (first all x coordinates, then
+    // all y coordinate, ...; first all
+    // data of variable 1, then
+    // variable 2, etc), so we have to
+    // copy the data vectors a bit around
+    //
+    // note that we copy vectors when
+    // looping over the patches since we
+    // have to write them one variable
+    // at a time and don't want to use
+    // more than one loop
+    //
+    // this copying of data vectors can
+    // be done while we already output
+    // the vertices, so do this on a
+    // separate task and when wanting
+    // to write out the data, we wait
+    // for that task to finish
+    Table<2,double> data_vectors (n_data_sets, n_nodes);
+
+    void (*fun_ptr) (const std::vector<Patch<dim,spacedim> > &,
+                     Table<2,double> &)
+      = &write_gmv_reorder_data_vectors<dim,spacedim>;
+    Threads::Task<> reorder_task = Threads::new_task (fun_ptr, patches, data_vectors);
+
+    ///////////////////////////////
+    // first make up a list of used
+    // vertices along with their
+    // coordinates
+    for (unsigned int d=1; d<=spacedim; ++d)
+      {
+        unsigned int entry=0;
+
+        for (typename std::vector<Patch<dim,spacedim> >::const_iterator patch=patches.begin();
+             patch!=patches.end(); ++patch)
+          {
+            const unsigned int n_subdivisions = patch->n_subdivisions;
+
+            switch (dim)
+              {
+              case 2:
+              {
+                for (unsigned int j=0; j<n_subdivisions+1; ++j)
+                  for (unsigned int i=0; i<n_subdivisions+1; ++i)
+                    {
+                      const double x_frac = i * 1./n_subdivisions,
+                                   y_frac = j * 1./n_subdivisions;
+
+                      tm.nd((d-1),entry) = static_cast<float>(
+                                             (((patch->vertices[1](d-1) * x_frac) +
+                                               (patch->vertices[0](d-1) * (1-x_frac))) * (1-y_frac) +
+                                              ((patch->vertices[3](d-1) * x_frac) +
+                                               (patch->vertices[2](d-1) * (1-x_frac))) * y_frac)
+                                           );
+                      entry++;
+                    }
+                break;
+              }
+
+              case 3:
+              {
+                for (unsigned int j=0; j<n_subdivisions+1; ++j)
+                  for (unsigned int k=0; k<n_subdivisions+1; ++k)
+                    for (unsigned int i=0; i<n_subdivisions+1; ++i)
+                      {
+                        const double x_frac = i * 1./n_subdivisions,
+                                     y_frac = k * 1./n_subdivisions,
+                                     z_frac = j * 1./n_subdivisions;
+
+                        // compute coordinates for
+                        // this patch point
+                        tm.nd((d-1),entry) = static_cast<float>(
+                                               ((((patch->vertices[1](d-1) * x_frac) +
+                                                  (patch->vertices[0](d-1) * (1-x_frac))) * (1-y_frac) +
+                                                 ((patch->vertices[3](d-1) * x_frac) +
+                                                  (patch->vertices[2](d-1) * (1-x_frac))) * y_frac)   * (1-z_frac) +
+                                                (((patch->vertices[5](d-1) * x_frac) +
+                                                  (patch->vertices[4](d-1) * (1-x_frac))) * (1-y_frac) +
+                                                 ((patch->vertices[7](d-1) * x_frac) +
+                                                  (patch->vertices[6](d-1) * (1-x_frac))) * y_frac)   * z_frac)
+                                             );
+                        entry++;
+                      }
+                break;
+              }
+
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+      }
+
+
+    ///////////////////////////////////////
+    // data output.
+    //
+    reorder_task.join ();
+
+    // then write data.
+    for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+      for (unsigned int entry=0; entry<data_vectors[data_set].size(); entry++)
+        tm.nd((spacedim+data_set),entry) = static_cast<float>(data_vectors[data_set][entry]);
+
+
+
+
+    /////////////////////////////////
+    // now for the cells. note that
+    // vertices are counted from 1 onwards
+    unsigned int first_vertex_of_patch = 0;
+    unsigned int elem=0;
+
+    for (typename std::vector<Patch<dim,spacedim> >::const_iterator patch=patches.begin();
+         patch!=patches.end(); ++patch)
+      {
+        const unsigned int n_subdivisions = patch->n_subdivisions;
+        const unsigned int n = n_subdivisions+1;
+        const unsigned int d1=1;
+        const unsigned int d2=n;
+        const unsigned int d3=n*n;
+        // write out the cells making
+        // up this patch
+        switch (dim)
+          {
+          case 2:
+          {
+            for (unsigned int i2=0; i2<n_subdivisions; ++i2)
+              for (unsigned int i1=0; i1<n_subdivisions; ++i1)
+                {
+                  tm.cd(0,elem) = first_vertex_of_patch+(i1  )*d1+(i2  )*d2+1;
+                  tm.cd(1,elem) = first_vertex_of_patch+(i1+1)*d1+(i2  )*d2+1;
+                  tm.cd(2,elem) = first_vertex_of_patch+(i1+1)*d1+(i2+1)*d2+1;
+                  tm.cd(3,elem) = first_vertex_of_patch+(i1  )*d1+(i2+1)*d2+1;
+
+                  elem++;
+                }
+            break;
+          }
+
+          case 3:
+          {
+            for (unsigned int i3=0; i3<n_subdivisions; ++i3)
+              for (unsigned int i2=0; i2<n_subdivisions; ++i2)
+                for (unsigned int i1=0; i1<n_subdivisions; ++i1)
+                  {
+                    // note: vertex indices start with 1!
+
+
+                    tm.cd(0,elem) = first_vertex_of_patch+(i1  )*d1+(i2  )*d2+(i3  )*d3+1;
+                    tm.cd(1,elem) = first_vertex_of_patch+(i1+1)*d1+(i2  )*d2+(i3  )*d3+1;
+                    tm.cd(2,elem) = first_vertex_of_patch+(i1+1)*d1+(i2+1)*d2+(i3  )*d3+1;
+                    tm.cd(3,elem) = first_vertex_of_patch+(i1  )*d1+(i2+1)*d2+(i3  )*d3+1;
+                    tm.cd(4,elem) = first_vertex_of_patch+(i1  )*d1+(i2  )*d2+(i3+1)*d3+1;
+                    tm.cd(5,elem) = first_vertex_of_patch+(i1+1)*d1+(i2  )*d2+(i3+1)*d3+1;
+                    tm.cd(6,elem) = first_vertex_of_patch+(i1+1)*d1+(i2+1)*d2+(i3+1)*d3+1;
+                    tm.cd(7,elem) = first_vertex_of_patch+(i1  )*d1+(i2+1)*d2+(i3+1)*d3+1;
+
+                    elem++;
+                  }
+            break;
+          }
+
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+
+
+        // finally update the number
+        // of the first vertex of this patch
+        first_vertex_of_patch += Utilities::fixed_power<dim>(n);
+      }
+
+
+    {
+      int ierr      = 0,
+          num_nodes = static_cast<int>(n_nodes),
+          num_cells = static_cast<int>(n_cells);
+
+      char dot[2] = {'.', 0};
+      // Unfortunately, TECINI takes a
+      // char *, but c_str() gives a
+      // const char *.  As we don't do
+      // anything else with
+      // tec_var_names following
+      // const_cast is ok
+      char *var_names=const_cast<char *> (tec_var_names.c_str());
+      ierr = TECINI (NULL,
+                     var_names,
+                     file_name,
+                     dot,
+                     &tec_debug,
+                     &is_double);
+
+      Assert (ierr == 0, ExcErrorOpeningTecplotFile(file_name));
+
+      char FEBLOCK[] = {'F','E','B','L','O','C','K',0};
+      ierr = TECZNE (NULL,
+                     &num_nodes,
+                     &num_cells,
+                     &cell_type,
+                     FEBLOCK,
+                     NULL);
+
+      Assert (ierr == 0, ExcTecplotAPIError());
+
+      int total = (vars_per_node*num_nodes);
+
+      ierr = TECDAT (&total,
+                     &tm.nodalData[0],
+                     &is_double);
+
+      Assert (ierr == 0, ExcTecplotAPIError());
+
+      ierr = TECNOD (&tm.connData[0]);
+
+      Assert (ierr == 0, ExcTecplotAPIError());
+
+      ierr = TECEND ();
+
+      Assert (ierr == 0, ExcTecplotAPIError());
+    }
+#endif
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  write_vtk (const std::vector<Patch<dim,spacedim> > &patches,
+             const std::vector<std::string>          &data_names,
+             const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+             const VtkFlags                          &flags,
+             std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      return;
+#endif
+
+    VtkStream vtk_out(out, flags);
+
+    const unsigned int n_data_sets = data_names.size();
+    // check against # of data sets in
+    // first patch.
+    if (patches[0].points_are_available)
+      {
+        AssertDimension(n_data_sets + spacedim, patches[0].data.n_rows())
+      }
+    else
+      {
+        AssertDimension(n_data_sets, patches[0].data.n_rows())
+      }
+
+    ///////////////////////
+    // preamble
+    {
+      out << "# vtk DataFile Version 3.0"
+          << '\n'
+          << "#This file was generated by the deal.II library";
+      if (flags.print_date_and_time)
+        {
+          out << " on " << Utilities::System::get_date()
+              << " at " << Utilities::System::get_time();
+        }
+      else
+        out << ".";
+      out << '\n'
+          << "ASCII"
+          << '\n';
+      // now output the data header
+      out << "DATASET UNSTRUCTURED_GRID\n"
+          << '\n';
+    }
+
+    // if desired, output time and cycle of the simulation, following
+    // the instructions at
+    // http://www.visitusers.org/index.php?title=Time_and_Cycle_in_VTK_files
+    {
+      const unsigned int
+      n_metadata = ((flags.cycle != std::numeric_limits<unsigned int>::min() ? 1 : 0)
+                    +
+                    (flags.time != std::numeric_limits<double>::min() ? 1 : 0));
+      if (n_metadata > 0)
+        out << "FIELD FieldData " << n_metadata << "\n";
+
+      if (flags.cycle != std::numeric_limits<unsigned int>::min())
+        {
+          out << "CYCLE 1 1 int\n"
+              << flags.cycle << "\n";
+        }
+      if (flags.time != std::numeric_limits<double>::min())
+        {
+          out << "TIME 1 1 double\n"
+              << flags.time << "\n";
+        }
+    }
+
+    // first count the number of cells
+    // and cells for later use
+    unsigned int n_nodes;
+    unsigned int n_cells;
+    compute_sizes<dim,spacedim>(patches, n_nodes, n_cells);
+    // in gmv format the vertex
+    // coordinates and the data have an
+    // order that is a bit unpleasant
+    // (first all x coordinates, then
+    // all y coordinate, ...; first all
+    // data of variable 1, then
+    // variable 2, etc), so we have to
+    // copy the data vectors a bit around
+    //
+    // note that we copy vectors when
+    // looping over the patches since we
+    // have to write them one variable
+    // at a time and don't want to use
+    // more than one loop
+    //
+    // this copying of data vectors can
+    // be done while we already output
+    // the vertices, so do this on a
+    // separate task and when wanting
+    // to write out the data, we wait
+    // for that task to finish
+    Table<2,double> data_vectors (n_data_sets, n_nodes);
+
+    void (*fun_ptr) (const std::vector<Patch<dim,spacedim> > &,
+                     Table<2,double> &)
+      = &write_gmv_reorder_data_vectors<dim,spacedim>;
+    Threads::Task<> reorder_task = Threads::new_task (fun_ptr, patches, data_vectors);
+
+    ///////////////////////////////
+    // first make up a list of used
+    // vertices along with their
+    // coordinates
+    //
+    // note that we have to print
+    // d=1..3 dimensions
+    out << "POINTS " << n_nodes << " double" << '\n';
+    write_nodes(patches, vtk_out);
+    out << '\n';
+    /////////////////////////////////
+    // now for the cells
+    out << "CELLS " << n_cells << ' '
+        << n_cells*(GeometryInfo<dim>::vertices_per_cell+1)
+        << '\n';
+    write_cells(patches, vtk_out);
+    out << '\n';
+    // next output the types of the
+    // cells. since all cells are
+    // the same, this is simple
+    out << "CELL_TYPES " << n_cells << '\n';
+    for (unsigned int i=0; i<n_cells; ++i)
+      out << ' ' << vtk_cell_type[dim];
+    out << '\n';
+    ///////////////////////////////////////
+    // data output.
+
+    // now write the data vectors to
+    // @p{out} first make sure that all
+    // data is in place
+    reorder_task.join ();
+
+    // then write data.  the
+    // 'POINT_DATA' means: node data
+    // (as opposed to cell data, which
+    // we do not support explicitly
+    // here). all following data sets
+    // are point data
+    out << "POINT_DATA " << n_nodes
+        << '\n';
+
+    // when writing, first write out
+    // all vector data, then handle the
+    // scalar data sets that have been
+    // left over
+    std::vector<bool> data_set_written (n_data_sets, false);
+    for (unsigned int n_th_vector=0; n_th_vector<vector_data_ranges.size(); ++n_th_vector)
+      {
+        AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) >=
+                     std_cxx11::get<0>(vector_data_ranges[n_th_vector]),
+                     ExcLowerRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]),
+                                    std_cxx11::get<0>(vector_data_ranges[n_th_vector])));
+        AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) < n_data_sets,
+                     ExcIndexRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]),
+                                    0, n_data_sets));
+        AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) + 1
+                     - std_cxx11::get<0>(vector_data_ranges[n_th_vector]) <= 3,
+                     ExcMessage ("Can't declare a vector with more than 3 components "
+                                 "in VTK"));
+
+        // mark these components as already written:
+        for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+             i<=std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+             ++i)
+          data_set_written[i] = true;
+
+        // write the
+        // header. concatenate all the
+        // component names with double
+        // underscores unless a vector
+        // name has been specified
+        out << "VECTORS ";
+
+        if (std_cxx11::get<2>(vector_data_ranges[n_th_vector]) != "")
+          out << std_cxx11::get<2>(vector_data_ranges[n_th_vector]);
+        else
+          {
+            for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+                 i<std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+                 ++i)
+              out << data_names[i] << "__";
+            out << data_names[std_cxx11::get<1>(vector_data_ranges[n_th_vector])];
+          }
+
+        out << " double"
+            << '\n';
+
+        // now write data. pad all
+        // vectors to have three
+        // components
+        for (unsigned int n=0; n<n_nodes; ++n)
+          {
+            switch (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) -
+                    std_cxx11::get<0>(vector_data_ranges[n_th_vector]))
+              {
+              case 0:
+                out << data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector]), n) << " 0 0"
+                    << '\n';
+                break;
+
+              case 1:
+                out << data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector]),   n) << ' '<< data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector])+1, n) << " 0"
+                    << '\n';
+                break;
+              case 2:
+                out << data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector]),   n) << ' '<< data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector])+1, n) << ' '<< data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector])+2, n)
+                    << '\n';
+                break;
+
+              default:
+                // VTK doesn't
+                // support
+                // anything else
+                // than vectors
+                // with 1, 2, or
+                // 3 components
+                Assert (false, ExcInternalError());
+              }
+          }
+      }
+
+    // now do the left over scalar data sets
+    for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+      if (data_set_written[data_set] == false)
+        {
+          out << "SCALARS "
+              << data_names[data_set]
+              << " double 1"
+              << '\n'
+              << "LOOKUP_TABLE default"
+              << '\n';
+          std::copy (data_vectors[data_set].begin(),
+                     data_vectors[data_set].end(),
+                     std::ostream_iterator<double>(out, " "));
+          out << '\n';
+        }
+
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    // assert the stream is still ok
+    AssertThrow (out, ExcIO());
+  }
+
+
+  void write_vtu_header (std::ostream &out,
+                         const VtkFlags &flags)
+  {
+    AssertThrow (out, ExcIO());
+    out << "<?xml version=\"1.0\" ?> \n";
+    out << "<!-- \n";
+    out << "# vtk DataFile Version 3.0"
+        << '\n'
+        << "#This file was generated by the deal.II library";
+    if (flags.print_date_and_time)
+      {
+        out << " on " << Utilities::System::get_time()
+            << " at " << Utilities::System::get_date();
+      }
+    else
+      out << ".";
+    out << "\n-->\n";
+    out << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\"";
+#ifdef DEAL_II_WITH_ZLIB
+    out << " compressor=\"vtkZLibDataCompressor\"";
+#endif
+#ifdef DEAL_II_WORDS_BIGENDIAN
+    out << " byte_order=\"BigEndian\"";
+#else
+    out << " byte_order=\"LittleEndian\"";
+#endif
+    out << ">";
+    out << '\n';
+    out << "<UnstructuredGrid>";
+    out << '\n';
+  }
+
+
+
+  void write_vtu_footer (std::ostream &out)
+  {
+    AssertThrow (out, ExcIO());
+    out << " </UnstructuredGrid>\n";
+    out << "</VTKFile>\n";
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  write_vtu (const std::vector<Patch<dim,spacedim> > &patches,
+             const std::vector<std::string>          &data_names,
+             const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+             const VtkFlags                          &flags,
+             std::ostream                            &out)
+  {
+    write_vtu_header(out, flags);
+    write_vtu_main (patches, data_names, vector_data_ranges, flags, out);
+    write_vtu_footer(out);
+
+    out << std::flush;
+  }
+
+
+  template <int dim, int spacedim>
+  void write_vtu_main (const std::vector<Patch<dim,spacedim> > &patches,
+                       const std::vector<std::string>          &data_names,
+                       const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                       const VtkFlags                          &flags,
+                       std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+#ifndef DEAL_II_WITH_MPI
+    // verify that there are indeed
+    // patches to be written out. most
+    // of the times, people just forget
+    // to call build_patches when there
+    // are no patches, so a warning is
+    // in order. that said, the
+    // assertion is disabled if we
+    // support MPI since then it can
+    // happen that on the coarsest
+    // mesh, a processor simply has no
+    // cells it actually owns, and in
+    // that case it is legit if there
+    // are no patches
+    Assert (patches.size() > 0, ExcNoPatches());
+#else
+    if (patches.size() == 0)
+      {
+        // we still need to output a valid vtu file, because other CPUs
+        // might output data. This is the minimal file that is accepted by paraview and visit.
+        // if we remove the field definitions, visit is complaining.
+        out << "<Piece NumberOfPoints=\"0\" NumberOfCells=\"0\" >\n"
+            << "<Cells>\n"
+            << "<DataArray type=\"UInt8\" Name=\"types\"></DataArray>\n"
+            << "</Cells>\n"
+            << "  <PointData Scalars=\"scalars\">\n";
+        std::vector<bool> data_set_written (data_names.size(), false);
+        for (unsigned int n_th_vector=0; n_th_vector<vector_data_ranges.size(); ++n_th_vector)
+          {
+            // mark these components as already
+            // written:
+            for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+                 i<=std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+                 ++i)
+              data_set_written[i] = true;
+
+            // write the
+            // header. concatenate all the
+            // component names with double
+            // underscores unless a vector
+            // name has been specified
+            out << "    <DataArray type=\"Float64\" Name=\"";
+
+            if (std_cxx11::get<2>(vector_data_ranges[n_th_vector]) != "")
+              out << std_cxx11::get<2>(vector_data_ranges[n_th_vector]);
+            else
+              {
+                for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+                     i<std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+                     ++i)
+                  out << data_names[i] << "__";
+                out << data_names[std_cxx11::get<1>(vector_data_ranges[n_th_vector])];
+              }
+
+            out << "\" NumberOfComponents=\"3\"></DataArray>\n";
+          }
+
+        for (unsigned int data_set=0; data_set<data_names.size(); ++data_set)
+          if (data_set_written[data_set] == false)
+            {
+              out << "    <DataArray type=\"Float64\" Name=\""
+                  << data_names[data_set]
+                  << "\"></DataArray>\n";
+            }
+
+        out << "  </PointData>\n";
+        out << "</Piece>\n";
+
+        out << std::flush;
+
+        return;
+      }
+#endif
+
+    // first up: metadata
+    //
+    // if desired, output time and cycle of the simulation, following
+    // the instructions at
+    // http://www.visitusers.org/index.php?title=Time_and_Cycle_in_VTK_files
+    {
+      const unsigned int
+      n_metadata = ((flags.cycle != std::numeric_limits<unsigned int>::min() ? 1 : 0)
+                    +
+                    (flags.time != std::numeric_limits<double>::min() ? 1 : 0));
+      if (n_metadata > 0)
+        out << "<FieldData>\n";
+
+      if (flags.cycle != std::numeric_limits<unsigned int>::min())
+        {
+          out << "<DataArray type=\"Float32\" Name=\"CYCLE\" NumberOfTuples=\"1\" format=\"ascii\">"
+              << flags.cycle
+              << "</DataArray>\n";
+        }
+      if (flags.time != std::numeric_limits<double>::min())
+        {
+          out << "<DataArray type=\"Float32\" Name=\"TIME\" NumberOfTuples=\"1\" format=\"ascii\">"
+              << flags.time
+              << "</DataArray>\n";
+        }
+
+      if (n_metadata > 0)
+        out << "</FieldData>\n";
+    }
+
+
+    VtuStream vtu_out(out, flags);
+
+    const unsigned int n_data_sets = data_names.size();
+    // check against # of data sets in
+    // first patch. checks against all
+    // other patches are made in
+    // write_gmv_reorder_data_vectors
+    if (patches[0].points_are_available)
+      {
+        AssertDimension(n_data_sets + spacedim, patches[0].data.n_rows())
+      }
+    else
+      {
+        AssertDimension(n_data_sets, patches[0].data.n_rows())
+      }
+
+#ifdef DEAL_II_WITH_ZLIB
+    const char *ascii_or_binary = "binary";
+#else
+    const char *ascii_or_binary = "ascii";
+#endif
+
+
+    // first count the number of cells
+    // and cells for later use
+    unsigned int n_nodes;
+    unsigned int n_cells;
+    compute_sizes<dim,spacedim>(patches, n_nodes, n_cells);
+    // in gmv format the vertex
+    // coordinates and the data have an
+    // order that is a bit unpleasant
+    // (first all x coordinates, then
+    // all y coordinate, ...; first all
+    // data of variable 1, then
+    // variable 2, etc), so we have to
+    // copy the data vectors a bit around
+    //
+    // note that we copy vectors when
+    // looping over the patches since we
+    // have to write them one variable
+    // at a time and don't want to use
+    // more than one loop
+    //
+    // this copying of data vectors can
+    // be done while we already output
+    // the vertices, so do this on a
+    // separate task and when wanting
+    // to write out the data, we wait
+    // for that task to finish
+    Table<2,double> data_vectors (n_data_sets, n_nodes);
+
+    void (*fun_ptr) (const std::vector<Patch<dim,spacedim> > &,
+                     Table<2,double> &)
+      = &write_gmv_reorder_data_vectors<dim,spacedim>;
+    Threads::Task<> reorder_task = Threads::new_task (fun_ptr, patches,
+                                                      data_vectors);
+
+    ///////////////////////////////
+    // first make up a list of used
+    // vertices along with their
+    // coordinates
+    //
+    // note that according to the standard, we
+    // have to print d=1..3 dimensions, even if
+    // we are in reality in 2d, for example
+    out << "<Piece NumberOfPoints=\"" << n_nodes
+        <<"\" NumberOfCells=\"" << n_cells << "\" >\n";
+    out << "  <Points>\n";
+    out << "    <DataArray type=\"Float64\" NumberOfComponents=\"3\" format=\""
+        << ascii_or_binary << "\">\n";
+    write_nodes(patches, vtu_out);
+    out << "    </DataArray>\n";
+    out << "  </Points>\n\n";
+    /////////////////////////////////
+    // now for the cells
+    out << "  <Cells>\n";
+    out << "    <DataArray type=\"Int32\" Name=\"connectivity\" format=\""
+        << ascii_or_binary << "\">\n";
+    write_cells(patches, vtu_out);
+    out << "    </DataArray>\n";
+
+    // XML VTU format uses offsets; this is
+    // different than the VTK format, which
+    // puts the number of nodes per cell in
+    // front of the connectivity list.
+    out << "    <DataArray type=\"Int32\" Name=\"offsets\" format=\""
+        << ascii_or_binary << "\">\n";
+
+    std::vector<int32_t> offsets (n_cells);
+    for (unsigned int i=0; i<n_cells; ++i)
+      offsets[i] = (i+1)*GeometryInfo<dim>::vertices_per_cell;
+    vtu_out << offsets;
+    out << "\n";
+    out << "    </DataArray>\n";
+
+    // next output the types of the
+    // cells. since all cells are
+    // the same, this is simple
+    out << "    <DataArray type=\"UInt8\" Name=\"types\" format=\""
+        << ascii_or_binary << "\">\n";
+
+    {
+      // uint8_t might be a typedef to unsigned
+      // char which is then not printed as
+      // ascii integers
+#ifdef DEAL_II_WITH_ZLIB
+      std::vector<uint8_t> cell_types (n_cells,
+                                       static_cast<uint8_t>(vtk_cell_type[dim]));
+#else
+      std::vector<unsigned int> cell_types (n_cells,
+                                            vtk_cell_type[dim]);
+#endif
+      // this should compress well :-)
+      vtu_out << cell_types;
+    }
+    out << "\n";
+    out << "    </DataArray>\n";
+    out << "  </Cells>\n";
+
+
+    ///////////////////////////////////////
+    // data output.
+
+    // now write the data vectors to
+    // @p{out} first make sure that all
+    // data is in place
+    reorder_task.join ();
+
+    // then write data.  the
+    // 'POINT_DATA' means: node data
+    // (as opposed to cell data, which
+    // we do not support explicitly
+    // here). all following data sets
+    // are point data
+    out << "  <PointData Scalars=\"scalars\">\n";
+
+    // when writing, first write out
+    // all vector data, then handle the
+    // scalar data sets that have been
+    // left over
+    std::vector<bool> data_set_written (n_data_sets, false);
+    for (unsigned int n_th_vector=0; n_th_vector<vector_data_ranges.size(); ++n_th_vector)
+      {
+        AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) >=
+                     std_cxx11::get<0>(vector_data_ranges[n_th_vector]),
+                     ExcLowerRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]),
+                                    std_cxx11::get<0>(vector_data_ranges[n_th_vector])));
+        AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) < n_data_sets,
+                     ExcIndexRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]),
+                                    0, n_data_sets));
+        AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) + 1
+                     - std_cxx11::get<0>(vector_data_ranges[n_th_vector]) <= 3,
+                     ExcMessage ("Can't declare a vector with more than 3 components "
+                                 "in VTK"));
+
+        // mark these components as already
+        // written:
+        for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+             i<=std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+             ++i)
+          data_set_written[i] = true;
+
+        // write the
+        // header. concatenate all the
+        // component names with double
+        // underscores unless a vector
+        // name has been specified
+        out << "    <DataArray type=\"Float64\" Name=\"";
+
+        if (std_cxx11::get<2>(vector_data_ranges[n_th_vector]) != "")
+          out << std_cxx11::get<2>(vector_data_ranges[n_th_vector]);
+        else
+          {
+            for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+                 i<std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+                 ++i)
+              out << data_names[i] << "__";
+            out << data_names[std_cxx11::get<1>(vector_data_ranges[n_th_vector])];
+          }
+
+        out << "\" NumberOfComponents=\"3\" format=\""
+            << ascii_or_binary << "\">\n";
+
+        // now write data. pad all
+        // vectors to have three
+        // components
+        std::vector<double> data;
+        data.reserve (n_nodes*dim);
+
+        for (unsigned int n=0; n<n_nodes; ++n)
+          {
+            switch (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) -
+                    std_cxx11::get<0>(vector_data_ranges[n_th_vector]))
+              {
+              case 0:
+                data.push_back (data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector]), n));
+                data.push_back (0);
+                data.push_back (0);
+                break;
+
+              case 1:
+                data.push_back (data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector]),   n));
+                data.push_back (data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector])+1, n));
+                data.push_back (0);
+                break;
+              case 2:
+                data.push_back (data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector]),   n));
+                data.push_back (data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector])+1, n));
+                data.push_back (data_vectors(std_cxx11::get<0>(vector_data_ranges[n_th_vector])+2, n));
+                break;
+
+              default:
+                // VTK doesn't
+                // support
+                // anything else
+                // than vectors
+                // with 1, 2, or
+                // 3 components
+                Assert (false, ExcInternalError());
+              }
+          }
+        vtu_out << data;
+        out << "    </DataArray>\n";
+      }
+
+    // now do the left over scalar data sets
+    for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+      if (data_set_written[data_set] == false)
+        {
+          out << "    <DataArray type=\"Float64\" Name=\""
+              << data_names[data_set]
+              << "\" format=\""
+              << ascii_or_binary << "\">\n";
+
+          std::vector<double> data (data_vectors[data_set].begin(),
+                                    data_vectors[data_set].end());
+          vtu_out << data;
+          out << "    </DataArray>\n";
+        }
+
+    out << "  </PointData>\n";
+
+    // Finish up writing a valid XML file
+    out << " </Piece>\n";
+
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+
+    // assert the stream is still ok
+    AssertThrow (out, ExcIO());
+  }
+
+
+  template <int dim, int spacedim>
+  void write_svg (const std::vector<Patch<dim,spacedim> > &,
+                  const std::vector<std::string> &,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &,
+                  const SvgFlags &,
+                  std::ostream &)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+  template <int spacedim>
+  void write_svg (const std::vector<Patch<2,spacedim> > &patches,
+                  const std::vector<std::string> &/*data_names*/,
+                  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &/*vector_data_ranges*/,
+                  const SvgFlags &flags,
+                  std::ostream &out)
+  {
+    const int dim = 2;
+    const unsigned int height = flags.height;
+    unsigned int width = flags.width;
+
+    // margin around the plotted area
+    unsigned int margin_in_percent = 0;
+    if (flags.margin) margin_in_percent = 5;
+
+
+    // determine the bounding box in the model space
+    double x_dimension, y_dimension, z_dimension;
+
+    typename std::vector<Patch<dim,spacedim> >::const_iterator patch = patches.begin();
+
+    unsigned int n_subdivisions = patch->n_subdivisions;
+    unsigned int n = n_subdivisions + 1;
+    const unsigned int d1 = 1;
+    const unsigned int d2 = n;
+
+    Point<spacedim> projected_point;
+    Point<spacedim> projected_points[4];
+
+    Point<2> projection_decomposition;
+    Point<2> projection_decompositions[4];
+
+    compute_node(projected_point, &*patch, 0, 0, 0, n_subdivisions);
+
+    Assert ((flags.height_vector < patch->data.n_rows()) ||
+            patch->data.n_rows() == 0,
+            ExcIndexRange (flags.height_vector, 0, patch->data.n_rows()));
+
+    double x_min = projected_point[0];
+    double x_max = x_min;
+    double y_min = projected_point[1];
+    double y_max = y_min;
+    double z_min = patch->data.n_rows() != 0 ? patch->data(flags.height_vector,0) : 0;
+    double z_max = z_min;
+
+    // iterate over the patches
+    for (; patch != patches.end(); ++patch)
+      {
+        n_subdivisions = patch->n_subdivisions;
+        n = n_subdivisions + 1;
+
+        for (unsigned int i2 = 0; i2 < n_subdivisions; ++i2)
+          {
+            for (unsigned int i1 = 0; i1 < n_subdivisions; ++i1)
+              {
+                compute_node(projected_points[0], &*patch, i1, i2, 0, n_subdivisions);
+                compute_node(projected_points[1], &*patch, i1+1, i2, 0, n_subdivisions);
+                compute_node(projected_points[2], &*patch, i1, i2+1, 0, n_subdivisions);
+                compute_node(projected_points[3], &*patch, i1+1, i2+1, 0, n_subdivisions);
+
+                x_min = std::min(x_min, (double)projected_points[0][0]);
+                x_min = std::min(x_min, (double)projected_points[1][0]);
+                x_min = std::min(x_min, (double)projected_points[2][0]);
+                x_min = std::min(x_min, (double)projected_points[3][0]);
+
+                x_max = std::max(x_max, (double)projected_points[0][0]);
+                x_max = std::max(x_max, (double)projected_points[1][0]);
+                x_max = std::max(x_max, (double)projected_points[2][0]);
+                x_max = std::max(x_max, (double)projected_points[3][0]);
+
+                y_min = std::min(y_min, (double)projected_points[0][1]);
+                y_min = std::min(y_min, (double)projected_points[1][1]);
+                y_min = std::min(y_min, (double)projected_points[2][1]);
+                y_min = std::min(y_min, (double)projected_points[3][1]);
+
+                y_max = std::max(y_max, (double)projected_points[0][1]);
+                y_max = std::max(y_max, (double)projected_points[1][1]);
+                y_max = std::max(y_max, (double)projected_points[2][1]);
+                y_max = std::max(y_max, (double)projected_points[3][1]);
+
+                Assert ((flags.height_vector < patch->data.n_rows()) ||
+                        patch->data.n_rows() == 0,
+                        ExcIndexRange (flags.height_vector, 0, patch->data.n_rows()));
+
+                z_min = std::min(z_min, (double)patch->data(flags.height_vector, i1*d1 + i2*d2));
+                z_min = std::min(z_min, (double)patch->data(flags.height_vector, (i1+1)*d1 + i2*d2));
+                z_min = std::min(z_min, (double)patch->data(flags.height_vector, i1*d1 + (i2+1)*d2));
+                z_min = std::min(z_min, (double)patch->data(flags.height_vector, (i1+1)*d1 + (i2+1)*d2));
+
+                z_max = std::max(z_max, (double)patch->data(flags.height_vector, i1*d1 + i2*d2));
+                z_max = std::max(z_max, (double)patch->data(flags.height_vector, (i1+1)*d1 + i2*d2));
+                z_max = std::max(z_max, (double)patch->data(flags.height_vector, i1*d1 + (i2+1)*d2));
+                z_max = std::max(z_max, (double)patch->data(flags.height_vector, (i1+1)*d1 + (i2+1)*d2));
+              }
+          }
+      }
+
+    x_dimension = x_max - x_min;
+    y_dimension = y_max - y_min;
+    z_dimension = z_max - z_min;
+
+
+// set initial camera position
+    Point<3> camera_position;
+    Point<3> camera_direction;
+    Point<3> camera_horizontal;
+    float camera_focus = 0;
+
+    // translate camera from the origin to the initial position
+    camera_position[0] = 0.;
+    camera_position[1] = 0.;
+    camera_position[2] = z_min + 2. * z_dimension;
+
+    camera_direction[0] = 0.;
+    camera_direction[1] = 0.;
+    camera_direction[2] = - 1.;
+
+    camera_horizontal[0] = 1.;
+    camera_horizontal[1] = 0.;
+    camera_horizontal[2] = 0.;
+
+    camera_focus = .5 * z_dimension;
+
+    Point<3> camera_position_temp;
+    Point<3> camera_direction_temp;
+    Point<3> camera_horizontal_temp;
+
+    const float angle_factor = 3.14159265f / 180.f;
+
+    // (I) rotate the camera to the chosen polar angle
+    camera_position_temp[1] = cos(angle_factor * flags.polar_angle) * camera_position[1] - sin(angle_factor * flags.polar_angle) * camera_position[2];
+    camera_position_temp[2] = sin(angle_factor * flags.polar_angle) * camera_position[1] + cos(angle_factor * flags.polar_angle) * camera_position[2];
+
+    camera_direction_temp[1] = cos(angle_factor * flags.polar_angle) * camera_direction[1] - sin(angle_factor * flags.polar_angle) * camera_direction[2];
+    camera_direction_temp[2] = sin(angle_factor * flags.polar_angle) * camera_direction[1] + cos(angle_factor * flags.polar_angle) * camera_direction[2];
+
+    camera_horizontal_temp[1] = cos(angle_factor * flags.polar_angle) * camera_horizontal[1] - sin(angle_factor * flags.polar_angle) * camera_horizontal[2];
+    camera_horizontal_temp[2] = sin(angle_factor * flags.polar_angle) * camera_horizontal[1] + cos(angle_factor * flags.polar_angle) * camera_horizontal[2];
+
+    camera_position[1] = camera_position_temp[1];
+    camera_position[2] = camera_position_temp[2];
+
+    camera_direction[1] = camera_direction_temp[1];
+    camera_direction[2] = camera_direction_temp[2];
+
+    camera_horizontal[1] = camera_horizontal_temp[1];
+    camera_horizontal[2] = camera_horizontal_temp[2];
+
+    // (II) rotate the camera to the chosen azimuth angle
+    camera_position_temp[0] = cos(angle_factor * flags.azimuth_angle) * camera_position[0] - sin(angle_factor * flags.azimuth_angle) * camera_position[1];
+    camera_position_temp[1] = sin(angle_factor * flags.azimuth_angle) * camera_position[0] + cos(angle_factor * flags.azimuth_angle) * camera_position[1];
+
+    camera_direction_temp[0] = cos(angle_factor * flags.azimuth_angle) * camera_direction[0] - sin(angle_factor * flags.azimuth_angle) * camera_direction[1];
+    camera_direction_temp[1] = sin(angle_factor * flags.azimuth_angle) * camera_direction[0] + cos(angle_factor * flags.azimuth_angle) * camera_direction[1];
+
+    camera_horizontal_temp[0] = cos(angle_factor * flags.azimuth_angle) * camera_horizontal[0] - sin(angle_factor * flags.azimuth_angle) * camera_horizontal[1];
+    camera_horizontal_temp[1] = sin(angle_factor * flags.azimuth_angle) * camera_horizontal[0] + cos(angle_factor * flags.azimuth_angle) * camera_horizontal[1];
+
+    camera_position[0] = camera_position_temp[0];
+    camera_position[1] = camera_position_temp[1];
+
+    camera_direction[0] = camera_direction_temp[0];
+    camera_direction[1] = camera_direction_temp[1];
+
+    camera_horizontal[0] = camera_horizontal_temp[0];
+    camera_horizontal[1] = camera_horizontal_temp[1];
+
+    // (III) translate the camera
+    camera_position[0] = x_min + .5 * x_dimension;
+    camera_position[1] = y_min + .5 * y_dimension;
+
+    camera_position[0] += (z_min + 2. * z_dimension) * sin(angle_factor * flags.polar_angle) * sin(angle_factor * flags.azimuth_angle);
+    camera_position[1] -= (z_min + 2. * z_dimension) * sin(angle_factor * flags.polar_angle) * cos(angle_factor * flags.azimuth_angle);
+
+
+// determine the bounding box on the projection plane
+    double x_min_perspective, y_min_perspective;
+    double x_max_perspective, y_max_perspective;
+    double x_dimension_perspective, y_dimension_perspective;
+
+    patch = patches.begin();
+
+    n_subdivisions = patch->n_subdivisions;
+    n = n_subdivisions + 1;
+
+    Point<3> point;
+
+    compute_node(projected_point, &*patch, 0, 0, 0, n_subdivisions);
+
+    Assert ((flags.height_vector < patch->data.n_rows()) ||
+            patch->data.n_rows() == 0,
+            ExcIndexRange (flags.height_vector, 0, patch->data.n_rows()));
+
+    point[0] = projected_point[0];
+    point[1] = projected_point[1];
+    point[2] = patch->data.n_rows() != 0 ? patch->data(flags.height_vector, 0) : 0;
+
+    projection_decomposition = svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+    x_min_perspective = projection_decomposition[0];
+    x_max_perspective = projection_decomposition[0];
+    y_min_perspective = projection_decomposition[1];
+    y_max_perspective = projection_decomposition[1];
+
+    // iterate over the patches
+    for (; patch != patches.end(); ++patch)
+      {
+        n_subdivisions = patch->n_subdivisions;
+        n = n_subdivisions + 1;
+
+        for (unsigned int i2 = 0; i2 < n_subdivisions; ++i2)
+          {
+            for (unsigned int i1 = 0; i1 < n_subdivisions; ++i1)
+              {
+                Point<spacedim> projected_vertices[4];
+                Point<3> vertices[4];
+
+                compute_node(projected_vertices[0], &*patch, i1, i2, 0, n_subdivisions);
+                compute_node(projected_vertices[1], &*patch, i1+1, i2, 0, n_subdivisions);
+                compute_node(projected_vertices[2], &*patch, i1, i2+1, 0, n_subdivisions);
+                compute_node(projected_vertices[3], &*patch, i1+1, i2+1, 0, n_subdivisions);
+
+                Assert ((flags.height_vector < patch->data.n_rows()) ||
+                        patch->data.n_rows() == 0,
+                        ExcIndexRange (flags.height_vector, 0, patch->data.n_rows()));
+
+                vertices[0][0] = projected_vertices[0][0];
+                vertices[0][1] = projected_vertices[0][1];
+                vertices[0][2] = patch->data.n_rows() != 0 ? patch->data(0,i1*d1 + i2*d2) : 0;
+
+                vertices[1][0] = projected_vertices[1][0];
+                vertices[1][1] = projected_vertices[1][1];
+                vertices[1][2] = patch->data.n_rows() != 0 ? patch->data(0,(i1+1)*d1 + i2*d2) : 0;
+
+                vertices[2][0] = projected_vertices[2][0];
+                vertices[2][1] = projected_vertices[2][1];
+                vertices[2][2] = patch->data.n_rows() != 0 ? patch->data(0,i1*d1 + (i2+1)*d2) : 0;
+
+                vertices[3][0] = projected_vertices[3][0];
+                vertices[3][1] = projected_vertices[3][1];
+                vertices[3][2] = patch->data.n_rows() != 0 ? patch->data(0,(i1+1)*d1 + (i2+1)*d2) : 0;
+
+                projection_decompositions[0] = svg_project_point(vertices[0], camera_position, camera_direction, camera_horizontal, camera_focus);
+                projection_decompositions[1] = svg_project_point(vertices[1], camera_position, camera_direction, camera_horizontal, camera_focus);
+                projection_decompositions[2] = svg_project_point(vertices[2], camera_position, camera_direction, camera_horizontal, camera_focus);
+                projection_decompositions[3] = svg_project_point(vertices[3], camera_position, camera_direction, camera_horizontal, camera_focus);
+
+                x_min_perspective = std::min(x_min_perspective, (double)projection_decompositions[0][0]);
+                x_min_perspective = std::min(x_min_perspective, (double)projection_decompositions[1][0]);
+                x_min_perspective = std::min(x_min_perspective, (double)projection_decompositions[2][0]);
+                x_min_perspective = std::min(x_min_perspective, (double)projection_decompositions[3][0]);
+
+                x_max_perspective = std::max(x_max_perspective, (double)projection_decompositions[0][0]);
+                x_max_perspective = std::max(x_max_perspective, (double)projection_decompositions[1][0]);
+                x_max_perspective = std::max(x_max_perspective, (double)projection_decompositions[2][0]);
+                x_max_perspective = std::max(x_max_perspective, (double)projection_decompositions[3][0]);
+
+                y_min_perspective = std::min(y_min_perspective, (double)projection_decompositions[0][1]);
+                y_min_perspective = std::min(y_min_perspective, (double)projection_decompositions[1][1]);
+                y_min_perspective = std::min(y_min_perspective, (double)projection_decompositions[2][1]);
+                y_min_perspective = std::min(y_min_perspective, (double)projection_decompositions[3][1]);
+
+                y_max_perspective = std::max(y_max_perspective, (double)projection_decompositions[0][1]);
+                y_max_perspective = std::max(y_max_perspective, (double)projection_decompositions[1][1]);
+                y_max_perspective = std::max(y_max_perspective, (double)projection_decompositions[2][1]);
+                y_max_perspective = std::max(y_max_perspective, (double)projection_decompositions[3][1]);
+              }
+          }
+      }
+
+    x_dimension_perspective = x_max_perspective - x_min_perspective;
+    y_dimension_perspective = y_max_perspective - y_min_perspective;
+
+    std::multiset<SvgCell> cells;
+
+    // iterate over the patches
+    for (patch = patches.begin(); patch != patches.end(); ++patch)
+      {
+        n_subdivisions = patch->n_subdivisions;
+        n = n_subdivisions + 1;
+
+        for (unsigned int i2 = 0; i2 < n_subdivisions; ++i2)
+          {
+            for (unsigned int i1 = 0; i1 < n_subdivisions; ++i1)
+              {
+                Point<spacedim> projected_vertices[4];
+                SvgCell cell;
+
+                compute_node(projected_vertices[0], &*patch, i1, i2, 0, n_subdivisions);
+                compute_node(projected_vertices[1], &*patch, i1+1, i2, 0, n_subdivisions);
+                compute_node(projected_vertices[2], &*patch, i1, i2+1, 0, n_subdivisions);
+                compute_node(projected_vertices[3], &*patch, i1+1, i2+1, 0, n_subdivisions);
+
+                Assert ((flags.height_vector < patch->data.n_rows()) ||
+                        patch->data.n_rows() == 0,
+                        ExcIndexRange (flags.height_vector, 0, patch->data.n_rows()));
+
+                cell.vertices[0][0] = projected_vertices[0][0];
+                cell.vertices[0][1] = projected_vertices[0][1];
+                cell.vertices[0][2] = patch->data.n_rows() != 0 ? patch->data(0,i1*d1 + i2*d2) : 0;
+
+                cell.vertices[1][0] = projected_vertices[1][0];
+                cell.vertices[1][1] = projected_vertices[1][1];
+                cell.vertices[1][2] = patch->data.n_rows() != 0 ? patch->data(0,(i1+1)*d1 + i2*d2) : 0;
+
+                cell.vertices[2][0] = projected_vertices[2][0];
+                cell.vertices[2][1] = projected_vertices[2][1];
+                cell.vertices[2][2] = patch->data.n_rows() != 0 ? patch->data(0,i1*d1 + (i2+1)*d2) : 0;
+
+                cell.vertices[3][0] = projected_vertices[3][0];
+                cell.vertices[3][1] = projected_vertices[3][1];
+                cell.vertices[3][2] = patch->data.n_rows() != 0 ? patch->data(0,(i1+1)*d1 + (i2+1)*d2) : 0;
+
+                cell.projected_vertices[0] = svg_project_point(cell.vertices[0], camera_position, camera_direction, camera_horizontal, camera_focus);
+                cell.projected_vertices[1] = svg_project_point(cell.vertices[1], camera_position, camera_direction, camera_horizontal, camera_focus);
+                cell.projected_vertices[2] = svg_project_point(cell.vertices[2], camera_position, camera_direction, camera_horizontal, camera_focus);
+                cell.projected_vertices[3] = svg_project_point(cell.vertices[3], camera_position, camera_direction, camera_horizontal, camera_focus);
+
+                cell.center = .25 * (cell.vertices[0] + cell.vertices[1] + cell.vertices[2] + cell.vertices[3]);
+                cell.projected_center = svg_project_point(cell.center, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+                cell.depth = cell.center.distance(camera_position);
+
+                cells.insert(cell);
+              }
+          }
+      }
+
+
+    // write the svg file
+    if (width==0)
+      width = static_cast<unsigned int>(.5 + height * (x_dimension_perspective / y_dimension_perspective));
+    unsigned int additional_width = 0;
+
+    if (flags.draw_colorbar) additional_width = static_cast<unsigned int>(.5 + height * .3); // additional width for colorbar
+
+    // basic svg header and background rectangle
+    out << "<svg width=\"" << width + additional_width << "\" height=\"" << height << "\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">" << '\n'
+        << " <rect width=\"" << width + additional_width << "\" height=\"" << height << "\" style=\"fill:white\"/>" << '\n' << '\n';
+
+    unsigned int triangle_counter = 0;
+
+    // write the cells in the correct order
+    for (typename std::multiset<SvgCell>::const_iterator cell = cells.begin(); cell != cells.end(); ++cell)
+      {
+        Point<3> points3d_triangle[3];
+
+        for (unsigned int triangle_index = 0; triangle_index < 4; triangle_index++)
+          {
+            switch (triangle_index)
+              {
+              case 0:
+                points3d_triangle[0] = cell->vertices[0], points3d_triangle[1] = cell->vertices[1], points3d_triangle[2] = cell->center;
+                break;
+              case 1:
+                points3d_triangle[0] = cell->vertices[1], points3d_triangle[1] = cell->vertices[3], points3d_triangle[2] = cell->center;
+                break;
+              case 2:
+                points3d_triangle[0] = cell->vertices[3], points3d_triangle[1] = cell->vertices[2], points3d_triangle[2] = cell->center;
+                break;
+              case 3:
+                points3d_triangle[0] = cell->vertices[2], points3d_triangle[1] = cell->vertices[0], points3d_triangle[2] = cell->center;
+                break;
+              default:
+                break;
+              }
+
+            Point<6> gradient_param = svg_get_gradient_parameters(points3d_triangle);
+
+            double start_h = .667 - ((gradient_param[4] - z_min) / z_dimension) * .667;
+            double stop_h = .667 - ((gradient_param[5] - z_min) / z_dimension) * .667;
+
+            unsigned int start_r = 0;
+            unsigned int start_g = 0;
+            unsigned int start_b = 0;
+
+            unsigned int stop_r = 0;
+            unsigned int stop_g = 0;
+            unsigned int stop_b = 0;
+
+            unsigned int start_i = static_cast<unsigned int>(start_h * 6.);
+            unsigned int stop_i = static_cast<unsigned int>(stop_h * 6.);
+
+            double start_f = start_h * 6. - start_i;
+            double start_q = 1. - start_f;
+
+            double stop_f = stop_h * 6. - stop_i;
+            double stop_q = 1. - stop_f;
+
+            switch (start_i % 6)
+              {
+              case 0:
+                start_r = 255, start_g = static_cast<unsigned int>(.5 + 255. * start_f);
+                break;
+              case 1:
+                start_r = static_cast<unsigned int>(.5 + 255. * start_q), start_g = 255;
+                break;
+              case 2:
+                start_g = 255, start_b = static_cast<unsigned int>(.5 + 255. * start_f);
+                break;
+              case 3:
+                start_g = static_cast<unsigned int>(.5 + 255. * start_q), start_b = 255;
+                break;
+              case 4:
+                start_r = static_cast<unsigned int>(.5 + 255. * start_f), start_b = 255;
+                break;
+              case 5:
+                start_r = 255, start_b = static_cast<unsigned int>(.5 + 255. * start_q);
+                break;
+              default:
+                break;
+              }
+
+            switch (stop_i % 6)
+              {
+              case 0:
+                stop_r = 255, stop_g = static_cast<unsigned int>(.5 + 255. * stop_f);
+                break;
+              case 1:
+                stop_r = static_cast<unsigned int>(.5 + 255. * stop_q), stop_g = 255;
+                break;
+              case 2:
+                stop_g = 255, stop_b = static_cast<unsigned int>(.5 + 255. * stop_f);
+                break;
+              case 3:
+                stop_g = static_cast<unsigned int>(.5 + 255. * stop_q), stop_b = 255;
+                break;
+              case 4:
+                stop_r = static_cast<unsigned int>(.5 + 255. * stop_f), stop_b = 255;
+                break;
+              case 5:
+                stop_r = 255, stop_b = static_cast<unsigned int>(.5 + 255. * stop_q);
+                break;
+              default:
+                break;
+              }
+
+            Point<3> gradient_start_point_3d, gradient_stop_point_3d;
+
+            gradient_start_point_3d[0] = gradient_param[0];
+            gradient_start_point_3d[1] = gradient_param[1];
+            gradient_start_point_3d[2] = gradient_param[4];
+
+            gradient_stop_point_3d[0] = gradient_param[2];
+            gradient_stop_point_3d[1] = gradient_param[3];
+            gradient_stop_point_3d[2] = gradient_param[5];
+
+            Point<2> gradient_start_point = svg_project_point(gradient_start_point_3d, camera_position, camera_direction, camera_horizontal, camera_focus);
+            Point<2> gradient_stop_point = svg_project_point(gradient_stop_point_3d, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+            // define linear gradient
+            out << "  <linearGradient id=\"" << triangle_counter << "\" gradientUnits=\"userSpaceOnUse\" "
+                << "x1=\""
+                << static_cast<unsigned int>(.5 + ((gradient_start_point[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                << "\" "
+                << "y1=\""
+                << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((gradient_start_point[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent))
+                << "\" "
+                << "x2=\""
+                << static_cast<unsigned int>(.5 + ((gradient_stop_point[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                << "\" "
+                << "y2=\""
+                << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((gradient_stop_point[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent))
+                << "\""
+                << ">" << '\n'
+                << "   <stop offset=\"0\" style=\"stop-color:rgb(" << start_r << "," << start_g << "," << start_b << ")\"/>" << '\n'
+                << "   <stop offset=\"1\" style=\"stop-color:rgb(" << stop_r << "," << stop_g << "," << stop_b << ")\"/>" << '\n'
+                << "  </linearGradient>" << '\n';
+
+            // draw current triangle
+            double x1 = 0, y1 = 0, x2 = 0, y2 = 0;
+            double x3 = cell->projected_center[0];
+            double y3 = cell->projected_center[1];
+
+            switch (triangle_index)
+              {
+              case 0:
+                x1 = cell->projected_vertices[0][0], y1 = cell->projected_vertices[0][1], x2 = cell->projected_vertices[1][0], y2 = cell->projected_vertices[1][1];
+                break;
+              case 1:
+                x1 = cell->projected_vertices[1][0], y1 = cell->projected_vertices[1][1], x2 = cell->projected_vertices[3][0], y2 = cell->projected_vertices[3][1];
+                break;
+              case 2:
+                x1 = cell->projected_vertices[3][0], y1 = cell->projected_vertices[3][1], x2 = cell->projected_vertices[2][0], y2 = cell->projected_vertices[2][1];
+                break;
+              case 3:
+                x1 = cell->projected_vertices[2][0], y1 = cell->projected_vertices[2][1], x2 = cell->projected_vertices[0][0], y2 = cell->projected_vertices[0][1];
+                break;
+              default:
+                break;
+              }
+
+            out << "  <path d=\"M "
+                << static_cast<unsigned int>(.5 + ((x1 - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                << ' '
+                << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((y1 - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent))
+                << " L "
+                << static_cast<unsigned int>(.5 + ((x2 - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                << ' '
+                << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((y2 - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent))
+                << " L "
+                << static_cast<unsigned int>(.5 + ((x3 - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                << ' '
+                << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((y3 - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent))
+                << " L "
+                << static_cast<unsigned int>(.5 + ((x1 - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                << ' '
+                << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((y1 - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent))
+                << "\" style=\"stroke:black; fill:url(#" << triangle_counter << "); stroke-width:" << flags.line_thickness << "\"/>" << '\n';
+
+            triangle_counter++;
+          }
+      }
+
+
+// draw the colorbar
+    if (flags.draw_colorbar)
+      {
+        out << '\n' << " <!-- colorbar -->" << '\n';
+
+        unsigned int element_height = static_cast<unsigned int>(((height/100.) * (71. - 2.*margin_in_percent)) / 4);
+        unsigned int element_width = static_cast<unsigned int>(.5 + (height/100.) * 2.5);
+
+        additional_width = 0;
+        if (!flags.margin) additional_width = static_cast<unsigned int>(.5 + (height/100.) * 2.5);
+
+        for (unsigned int index = 0; index < 4; index++)
+          {
+            double start_h = .667 - ((index+1) / 4.) * .667;
+            double stop_h = .667 - (index / 4.) * .667;
+
+            unsigned int start_r = 0;
+            unsigned int start_g = 0;
+            unsigned int start_b = 0;
+
+            unsigned int stop_r = 0;
+            unsigned int stop_g = 0;
+            unsigned int stop_b = 0;
+
+            unsigned int start_i = static_cast<unsigned int>(start_h * 6.);
+            unsigned int stop_i = static_cast<unsigned int>(stop_h * 6.);
+
+            double start_f = start_h * 6. - start_i;
+            double start_q = 1. - start_f;
+
+            double stop_f = stop_h * 6. - stop_i;
+            double stop_q = 1. - stop_f;
+
+            switch (start_i % 6)
+              {
+              case 0:
+                start_r = 255, start_g = static_cast<unsigned int>(.5 + 255. * start_f);
+                break;
+              case 1:
+                start_r = static_cast<unsigned int>(.5 + 255. * start_q), start_g = 255;
+                break;
+              case 2:
+                start_g = 255, start_b = static_cast<unsigned int>(.5 + 255. * start_f);
+                break;
+              case 3:
+                start_g = static_cast<unsigned int>(.5 + 255. * start_q), start_b = 255;
+                break;
+              case 4:
+                start_r = static_cast<unsigned int>(.5 + 255. * start_f), start_b = 255;
+                break;
+              case 5:
+                start_r = 255, start_b = static_cast<unsigned int>(.5 + 255. * start_q);
+                break;
+              default:
+                break;
+              }
+
+            switch (stop_i % 6)
+              {
+              case 0:
+                stop_r = 255, stop_g = static_cast<unsigned int>(.5 + 255. * stop_f);
+                break;
+              case 1:
+                stop_r = static_cast<unsigned int>(.5 + 255. * stop_q), stop_g = 255;
+                break;
+              case 2:
+                stop_g = 255, stop_b = static_cast<unsigned int>(.5 + 255. * stop_f);
+                break;
+              case 3:
+                stop_g = static_cast<unsigned int>(.5 + 255. * stop_q), stop_b = 255;
+                break;
+              case 4:
+                stop_r = static_cast<unsigned int>(.5 + 255. * stop_f), stop_b = 255;
+                break;
+              case 5:
+                stop_r = 255, stop_b = static_cast<unsigned int>(.5 + 255. * stop_q);
+                break;
+              default:
+                break;
+              }
+
+            // define gradient
+            out << "  <linearGradient id=\"colorbar_" << index << "\" gradientUnits=\"userSpaceOnUse\" "
+                << "x1=\"" << width + additional_width << "\" "
+                << "y1=\"" << static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent + 29)) + (3-index) * element_height << "\" "
+                << "x2=\"" << width + additional_width << "\" "
+                << "y2=\"" << static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent + 29)) + (4-index) * element_height << "\""
+                << ">" << '\n'
+                << "   <stop offset=\"0\" style=\"stop-color:rgb(" << start_r << "," << start_g << "," << start_b << ")\"/>" << '\n'
+                << "   <stop offset=\"1\" style=\"stop-color:rgb(" << stop_r << "," << stop_g << "," << stop_b << ")\"/>" << '\n'
+                << "  </linearGradient>" << '\n';
+
+            // draw box corresponding to the gradient above
+            out << "  <rect"
+                << " x=\"" << width + additional_width
+                << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent + 29)) + (3-index) * element_height
+                << "\" width=\"" << element_width
+                << "\" height=\"" << element_height
+                << "\" style=\"stroke:black; stroke-width:2; fill:url(#colorbar_" << index << ")\"/>" << '\n';
+          }
+
+        for (unsigned int index = 0; index < 5; index++)
+          {
+            out << "  <text x=\"" << width + additional_width + static_cast<unsigned int>(1.5 * element_width)
+                << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent + 29) + (4.-index) * element_height + 30.) << "\""
+                << " style=\"text-anchor:start; font-size:80; font-family:Helvetica";
+
+            if (index == 0 || index == 4) out << "; font-weight:bold";
+
+            out << "\">" << (float)(((int)((z_min + index * (z_dimension / 4.))*10000))/10000.);
+
+            if (index == 4) out << " max";
+            if (index == 0) out << " min";
+
+            out << "</text>" << '\n';
+          }
+      }
+
+    // finalize the svg file
+    out << '\n' << "</svg>";
+    out.flush();
+
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  write_deal_II_intermediate (const std::vector<Patch<dim,spacedim> > &patches,
+                              const std::vector<std::string>          &data_names,
+                              const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                              const Deal_II_IntermediateFlags         &/*flags*/,
+                              std::ostream                            &out)
+  {
+    AssertThrow (out, ExcIO());
+
+    // first write tokens indicating the
+    // template parameters. we need this in
+    // here because we may want to read in data
+    // again even if we don't know in advance
+    // the template parameters, see step-19
+    out << dim << ' ' << spacedim << '\n';
+
+    // then write a header
+    out << "[deal.II intermediate format graphics data]" << '\n'
+        << "[written by " << DEAL_II_PACKAGE_NAME << " " << DEAL_II_PACKAGE_VERSION << "]" << '\n'
+        << "[Version: " << Deal_II_IntermediateFlags::format_version << "]" << '\n';
+
+    out << data_names.size() << '\n';
+    for (unsigned int i=0; i<data_names.size(); ++i)
+      out << data_names[i] << '\n';
+
+    out << patches.size() << '\n';
+    for (unsigned int i=0; i<patches.size(); ++i)
+      out << patches[i] << '\n';
+
+    out << vector_data_ranges.size() << '\n';
+    for (unsigned int i=0; i<vector_data_ranges.size(); ++i)
+      out << std_cxx11::get<0>(vector_data_ranges[i]) << ' '
+          << std_cxx11::get<1>(vector_data_ranges[i]) << '\n'
+          << std_cxx11::get<2>(vector_data_ranges[i]) << '\n';
+
+    out << '\n';
+    // make sure everything now gets to
+    // disk
+    out.flush ();
+  }
+
+
+
+  std::pair<unsigned int, unsigned int>
+  determine_intermediate_format_dimensions (std::istream &input)
+  {
+    AssertThrow (input, ExcIO());
+
+    unsigned int dim, spacedim;
+    input >> dim >> spacedim;
+
+    return std::make_pair (dim, spacedim);
+  }
+} // namespace DataOutBase
+
+
+
+
+/* --------------------------- class DataOutInterface ---------------------- */
+
+
+template <int dim, int spacedim>
+DataOutInterface<dim,spacedim>::DataOutInterface ()
+  : default_subdivisions(1)
+{}
+
+
+template <int dim, int spacedim>
+DataOutInterface<dim,spacedim>::~DataOutInterface ()
+{}
+
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_dx (std::ostream &out) const
+{
+  DataOutBase::write_dx (get_patches(), get_dataset_names(),
+                         get_vector_data_ranges(),
+                         dx_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_ucd (std::ostream &out) const
+{
+  DataOutBase::write_ucd (get_patches(), get_dataset_names(),
+                          get_vector_data_ranges(),
+                          ucd_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_gnuplot (std::ostream &out) const
+{
+  DataOutBase::write_gnuplot (get_patches(), get_dataset_names(),
+                              get_vector_data_ranges(),
+                              gnuplot_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_povray (std::ostream &out) const
+{
+  DataOutBase::write_povray (get_patches(), get_dataset_names(),
+                             get_vector_data_ranges(),
+                             povray_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_eps (std::ostream &out) const
+{
+  DataOutBase::write_eps (get_patches(), get_dataset_names(),
+                          get_vector_data_ranges(),
+                          eps_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_gmv (std::ostream &out) const
+{
+  DataOutBase::write_gmv (get_patches(), get_dataset_names(),
+                          get_vector_data_ranges(),
+                          gmv_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_tecplot (std::ostream &out) const
+{
+  DataOutBase::write_tecplot (get_patches(), get_dataset_names(),
+                              get_vector_data_ranges(),
+                              tecplot_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_tecplot_binary (std::ostream &out) const
+{
+  DataOutBase::write_tecplot_binary (get_patches(), get_dataset_names(),
+                                     get_vector_data_ranges(),
+                                     tecplot_flags, out);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_vtk (std::ostream &out) const
+{
+  DataOutBase::write_vtk (get_patches(), get_dataset_names(),
+                          get_vector_data_ranges(),
+                          vtk_flags, out);
+}
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_vtu (std::ostream &out) const
+{
+  DataOutBase::write_vtu (get_patches(), get_dataset_names(),
+                          get_vector_data_ranges(),
+                          vtk_flags, out);
+}
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_svg (std::ostream &out) const
+{
+  DataOutBase::write_svg (get_patches(), get_dataset_names(),
+                          get_vector_data_ranges(),
+                          svg_flags, out);
+}
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::write_vtu_in_parallel (const char *filename, MPI_Comm comm) const
+{
+#ifndef DEAL_II_WITH_MPI
+  //without MPI fall back to the normal way to write a vtu file:
+  (void)comm;
+
+  std::ofstream f(filename);
+  write_vtu (f);
+#else
+
+  int myrank, nproc, err;
+  MPI_Comm_rank(comm, &myrank);
+  MPI_Comm_size(comm, &nproc);
+
+  MPI_Info info;
+  MPI_Info_create(&info);
+  MPI_File fh;
+  err = MPI_File_open(comm, const_cast<char *>(filename),
+                      MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &fh);
+  AssertThrow(err==0, ExcMessage("Unable to open file with MPI_File_open!"));
+
+
+  MPI_File_set_size(fh, 0); // delete the file contents
+  // this barrier is necessary, because otherwise others might already
+  // write while one core is still setting the size to zero.
+  MPI_Barrier(comm);
+  MPI_Info_free(&info);
+
+  unsigned int header_size;
+
+  //write header
+  if (myrank==0)
+    {
+      std::stringstream ss;
+      DataOutBase::write_vtu_header(ss, vtk_flags);
+      header_size = ss.str().size();
+      MPI_File_write(fh, const_cast<char *>(ss.str().c_str()), header_size, MPI_CHAR, MPI_STATUS_IGNORE);
+    }
+
+  MPI_Bcast(&header_size, 1, MPI_UNSIGNED, 0, comm);
+
+  MPI_File_seek_shared( fh, header_size, MPI_SEEK_SET );
+  {
+    std::stringstream ss;
+    DataOutBase::write_vtu_main (get_patches(), get_dataset_names(),
+                                 get_vector_data_ranges(),
+                                 vtk_flags, ss);
+    MPI_File_write_ordered(fh, const_cast<char *>(ss.str().c_str()), ss.str().size(), MPI_CHAR, MPI_STATUS_IGNORE);
+  }
+
+  //write footer
+  if (myrank==0)
+    {
+      std::stringstream ss;
+      DataOutBase::write_vtu_footer(ss);
+      unsigned int footer_size = ss.str().size();
+      MPI_File_write_shared(fh, const_cast<char *>(ss.str().c_str()), footer_size, MPI_CHAR, MPI_STATUS_IGNORE);
+    }
+  MPI_File_close( &fh );
+#endif
+}
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::
+write_pvd_record (std::ostream &out,
+                  const std::vector<std::pair<double,std::string> >  &times_and_names) const
+{
+  AssertThrow (out, ExcIO());
+
+  out << "<?xml version=\"1.0\"?>\n";
+
+  out << "<!--\n";
+  out << "#This file was generated by the deal.II library"
+      << " on " << Utilities::System::get_date()
+      << " at " << Utilities::System::get_time()
+      << "\n-->\n";
+
+  out << "<VTKFile type=\"Collection\" version=\"0.1\" ByteOrder=\"LittleEndian\">\n";
+  out << "  <Collection>\n";
+
+  for (unsigned int i=0; i<times_and_names.size(); ++i)
+    out << "    <DataSet timestep=\"" << times_and_names[i].first
+        << "\" group=\"\" part=\"0\" file=\"" << times_and_names[i].second
+        << "\"/>\n";
+
+  out << "  </Collection>\n";
+  out << "</VTKFile>\n";
+
+  out.flush();
+
+  AssertThrow (out, ExcIO());
+}
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::write_pvtu_record (std::ostream &out,
+                                                   const std::vector<std::string> &piece_names) const
+{
+  AssertThrow (out, ExcIO());
+
+  const std::vector<std::string> data_names = get_dataset_names();
+  const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > vector_data_ranges
+    = get_vector_data_ranges();
+
+  const unsigned int n_data_sets = data_names.size();
+
+  out << "<?xml version=\"1.0\"?>\n";
+
+  out << "<!--\n";
+  out << "#This file was generated by the deal.II library"
+      << " on " << Utilities::System::get_date()
+      << " at " << Utilities::System::get_time()
+      << "\n-->\n";
+
+  out << "<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n";
+  out << "  <PUnstructuredGrid GhostLevel=\"0\">\n";
+  out << "    <PPointData Scalars=\"scalars\">\n";
+
+  // We need to output in the same order as
+  // the write_vtu function does:
+  std::vector<bool> data_set_written (n_data_sets, false);
+  for (unsigned int n_th_vector=0; n_th_vector<vector_data_ranges.size(); ++n_th_vector)
+    {
+      AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) >=
+                   std_cxx11::get<0>(vector_data_ranges[n_th_vector]),
+                   ExcLowerRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]),
+                                  std_cxx11::get<0>(vector_data_ranges[n_th_vector])));
+      AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) < n_data_sets,
+                   ExcIndexRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]),
+                                  0, n_data_sets));
+      AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) + 1
+                   - std_cxx11::get<0>(vector_data_ranges[n_th_vector]) <= 3,
+                   ExcMessage ("Can't declare a vector with more than 3 components "
+                               "in VTK"));
+
+      // mark these components as already
+      // written:
+      for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+           i<=std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+           ++i)
+        data_set_written[i] = true;
+
+      // write the
+      // header. concatenate all the
+      // component names with double
+      // underscores unless a vector
+      // name has been specified
+      out << "    <PDataArray type=\"Float64\" Name=\"";
+
+      if (std_cxx11::get<2>(vector_data_ranges[n_th_vector]) != "")
+        out << std_cxx11::get<2>(vector_data_ranges[n_th_vector]);
+      else
+        {
+          for (unsigned int i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]);
+               i<std_cxx11::get<1>(vector_data_ranges[n_th_vector]);
+               ++i)
+            out << data_names[i] << "__";
+          out << data_names[std_cxx11::get<1>(vector_data_ranges[n_th_vector])];
+        }
+
+      out << "\" NumberOfComponents=\"3\" format=\"ascii\"/>\n";
+    }
+
+  for (unsigned int data_set=0; data_set<n_data_sets; ++data_set)
+    if (data_set_written[data_set] == false)
+      {
+        out << "    <PDataArray type=\"Float64\" Name=\""
+            << data_names[data_set]
+            << "\" format=\"ascii\"/>\n";
+      }
+
+  out << "    </PPointData>\n";
+
+  out << "    <PPoints>\n";
+  out << "      <PDataArray type=\"Float64\" NumberOfComponents=\"3\"/>\n";
+  out << "    </PPoints>\n";
+
+  for (unsigned int i=0; i<piece_names.size(); ++i)
+    out << "    <Piece Source=\"" << piece_names[i] << "\"/>\n";
+
+  out << "  </PUnstructuredGrid>\n";
+  out << "</VTKFile>\n";
+
+  out.flush();
+
+  // assert the stream is still ok
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::write_visit_record (std::ostream &out,
+                                                    const std::vector<std::string> &piece_names) const
+{
+  out << "!NBLOCKS " << piece_names.size() << '\n';
+  for (unsigned int i=0; i<piece_names.size(); ++i)
+    out << piece_names[i] << '\n';
+
+  out << std::flush;
+}
+
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::write_visit_record (std::ostream &out,
+                                                    const std::vector<std::vector<std::string> > &piece_names) const
+{
+  AssertThrow (out, ExcIO());
+
+  if (piece_names.size() == 0)
+    return;
+
+  const double nblocks = piece_names[0].size();
+  Assert(nblocks > 0, ExcMessage("piece_names should be a vector of nonempty vectors.") )
+
+  out << "!NBLOCKS " << nblocks << '\n';
+  for (std::vector<std::vector<std::string> >::const_iterator domain = piece_names.begin(); domain != piece_names.end(); ++domain)
+    {
+      Assert(domain->size() == nblocks, ExcMessage("piece_names should be a vector of equal sized vectors.") )
+      for (std::vector<std::string>::const_iterator subdomain = domain->begin(); subdomain != domain->end(); ++subdomain)
+        out << *subdomain << '\n';
+    }
+
+  out << std::flush;
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::
+write_deal_II_intermediate (std::ostream &out) const
+{
+  DataOutBase::write_deal_II_intermediate (get_patches(), get_dataset_names(),
+                                           get_vector_data_ranges(),
+                                           deal_II_intermediate_flags, out);
+}
+
+
+template <int dim, int spacedim>
+XDMFEntry DataOutInterface<dim,spacedim>::
+create_xdmf_entry (const DataOutBase::DataOutFilter &data_filter,
+                   const std::string &h5_filename, const double cur_time, MPI_Comm comm) const
+{
+  return create_xdmf_entry(data_filter, h5_filename, h5_filename, cur_time, comm);
+}
+
+
+
+template <int dim, int spacedim>
+XDMFEntry DataOutInterface<dim,spacedim>::
+create_xdmf_entry (const DataOutBase::DataOutFilter &data_filter,
+                   const std::string &h5_mesh_filename,
+                   const std::string &h5_solution_filename,
+                   const double cur_time,
+                   MPI_Comm comm) const
+{
+  unsigned int    local_node_cell_count[2], global_node_cell_count[2];
+  int             myrank;
+
+#ifndef DEAL_II_WITH_HDF5
+  // throw an exception, but first make
+  // sure the compiler does not warn about
+  // the now unused function arguments
+  (void)data_filter;
+  (void)h5_mesh_filename;
+  (void)h5_solution_filename;
+  (void)cur_time;
+  (void)comm;
+  AssertThrow(false, ExcMessage ("XDMF support requires HDF5 to be turned on."));
+#endif
+  AssertThrow(dim == 2 || dim == 3, ExcMessage ("XDMF only supports 2 or 3 dimensions."));
+
+  local_node_cell_count[0] = data_filter.n_nodes();
+  local_node_cell_count[1] = data_filter.n_cells();
+
+  // And compute the global total
+#ifdef DEAL_II_WITH_MPI
+  MPI_Comm_rank(comm, &myrank);
+  MPI_Allreduce(local_node_cell_count, global_node_cell_count, 2, MPI_UNSIGNED, MPI_SUM, comm);
+#else
+  myrank = 0;
+  global_node_cell_count[0] = local_node_cell_count[0];
+  global_node_cell_count[1] = local_node_cell_count[1];
+#endif
+
+  // Output the XDMF file only on the root process
+  if (myrank == 0)
+    {
+      XDMFEntry       entry(h5_mesh_filename, h5_solution_filename, cur_time, global_node_cell_count[0], global_node_cell_count[1], dim);
+      unsigned int  n_data_sets = data_filter.n_data_sets();
+
+      // The vector names generated here must match those generated in the HDF5 file
+      unsigned int    i;
+      for (i=0; i<n_data_sets; ++i)
+        {
+          entry.add_attribute(data_filter.get_data_set_name(i), data_filter.get_data_set_dim(i));
+        }
+
+      return entry;
+    }
+  else
+    {
+      return XDMFEntry();
+    }
+}
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::
+write_xdmf_file (const std::vector<XDMFEntry> &entries,
+                 const std::string &filename,
+                 MPI_Comm comm) const
+{
+  int             myrank;
+
+#ifdef DEAL_II_WITH_MPI
+  MPI_Comm_rank(comm, &myrank);
+#else
+  (void)comm;
+  myrank = 0;
+#endif
+
+  // Only rank 0 process writes the XDMF file
+  if (myrank == 0)
+    {
+      std::ofstream                               xdmf_file(filename.c_str());
+      std::vector<XDMFEntry>::const_iterator      it;
+
+      xdmf_file << "<?xml version=\"1.0\" ?>\n";
+      xdmf_file << "<!DOCTYPE Xdmf SYSTEM \"Xdmf.dtd\" []>\n";
+      xdmf_file << "<Xdmf Version=\"2.0\">\n";
+      xdmf_file << "  <Domain>\n";
+      xdmf_file << "    <Grid Name=\"CellTime\" GridType=\"Collection\" CollectionType=\"Temporal\">\n";
+
+      // Write out all the entries indented
+      for (it=entries.begin(); it!=entries.end(); ++it)
+        xdmf_file << it->get_xdmf_content(3);
+
+      xdmf_file << "    </Grid>\n";
+      xdmf_file << "  </Domain>\n";
+      xdmf_file << "</Xdmf>\n";
+
+      xdmf_file.close();
+    }
+}
+
+
+/*
+ * Get the XDMF content associated with this entry.
+ * If the entry is not valid, this returns an empty string.
+ */
+std::string XDMFEntry::get_xdmf_content(const unsigned int indent_level) const
+{
+  std::stringstream   ss;
+  std::map<std::string, unsigned int>::const_iterator     it;
+
+  if (!valid) return "";
+
+  ss << indent(indent_level+0) << "<Grid Name=\"mesh\" GridType=\"Uniform\">\n";
+  ss << indent(indent_level+1) << "<Time Value=\"" << entry_time << "\"/>\n";
+  ss << indent(indent_level+1) << "<Geometry GeometryType=\"" << (dimension == 2 ? "XY" : "XYZ" ) << "\">\n";
+  ss << indent(indent_level+2) << "<DataItem Dimensions=\"" << num_nodes << " " << dimension << "\" NumberType=\"Float\" Precision=\"8\" Format=\"HDF\">\n";
+  ss << indent(indent_level+3) << h5_mesh_filename << ":/nodes\n";
+  ss << indent(indent_level+2) << "</DataItem>\n";
+  ss << indent(indent_level+1) << "</Geometry>\n";
+  // If we have cells defined, use a quadrilateral (2D) or hexahedron (3D) topology
+  if (num_cells > 0)
+    {
+      ss << indent(indent_level+1) << "<Topology TopologyType=\"" << (dimension == 2 ? "Quadrilateral" : "Hexahedron") << "\" NumberOfElements=\"" << num_cells << "\">\n";
+      ss << indent(indent_level+2) << "<DataItem Dimensions=\"" << num_cells << " " << (2 << (dimension-1)) << "\" NumberType=\"UInt\" Format=\"HDF\">\n";
+      ss << indent(indent_level+3) << h5_mesh_filename << ":/cells\n";
+      ss << indent(indent_level+2) << "</DataItem>\n";
+      ss << indent(indent_level+1) << "</Topology>\n";
+    }
+  else
+    {
+      // Otherwise, we assume the points are isolated in space and use a Polyvertex topology
+      ss << indent(indent_level+1) << "<Topology TopologyType=\"Polyvertex\" NumberOfElements=\"" << num_nodes << "\">\n";
+      ss << indent(indent_level+1) << "</Topology>\n";
+    }
+
+  for (it=attribute_dims.begin(); it!=attribute_dims.end(); ++it)
+    {
+      ss << indent(indent_level+1) << "<Attribute Name=\"" << it->first << "\" AttributeType=\"" << (it->second > 1 ? "Vector" : "Scalar") << "\" Center=\"Node\">\n";
+      // Vectors must have 3 elements even for 2D models
+      ss << indent(indent_level+2) << "<DataItem Dimensions=\"" << num_nodes << " " << (it->second > 1 ? 3 : 1) << "\" NumberType=\"Float\" Precision=\"8\" Format=\"HDF\">\n";
+      ss << indent(indent_level+3) << h5_sol_filename << ":/" << it->first << "\n";
+      ss << indent(indent_level+2) << "</DataItem>\n";
+      ss << indent(indent_level+1) << "</Attribute>\n";
+    }
+
+  ss << indent(indent_level+0) << "</Grid>\n";
+
+  return ss.str();
+}
+
+/*
+ * Write the data in this DataOutInterface to a DataOutFilter object.
+ * Filtering is performed based on the DataOutFilter flags.
+ */
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::
+write_filtered_data (DataOutBase::DataOutFilter &filtered_data) const
+{
+  DataOutBase::write_filtered_data(get_patches(), get_dataset_names(),
+                                   get_vector_data_ranges(),
+                                   filtered_data);
+}
+
+template <int dim, int spacedim>
+void DataOutBase::write_filtered_data (const std::vector<Patch<dim,spacedim> > &patches,
+                                       const std::vector<std::string>          &data_names,
+                                       const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                                       DataOutBase::DataOutFilter &filtered_data)
+{
+  const unsigned int n_data_sets = data_names.size();
+  unsigned int    n_node, n_cell;
+  Table<2,double> data_vectors;
+  Threads::Task<> reorder_task;
+
+#ifndef DEAL_II_WITH_MPI
+  // verify that there are indeed
+  // patches to be written out. most
+  // of the times, people just forget
+  // to call build_patches when there
+  // are no patches, so a warning is
+  // in order. that said, the
+  // assertion is disabled if we
+  // support MPI since then it can
+  // happen that on the coarsest
+  // mesh, a processor simply has no
+  // cells it actually owns, and in
+  // that case it is legit if there
+  // are no patches
+  Assert (patches.size() > 0, ExcNoPatches());
+#endif
+
+  compute_sizes<dim,spacedim>(patches, n_node, n_cell);
+
+  data_vectors = Table<2,double> (n_data_sets, n_node);
+  void (*fun_ptr) (const std::vector<Patch<dim,spacedim> > &, Table<2,double> &) = &DataOutBase::template write_gmv_reorder_data_vectors<dim,spacedim>;
+  reorder_task = Threads::new_task (fun_ptr, patches, data_vectors);
+
+  // Write the nodes/cells to the DataOutFilter object.
+  write_nodes(patches, filtered_data);
+  write_cells(patches, filtered_data);
+
+  // Ensure reordering is done before we output data set values
+  reorder_task.join ();
+
+  // when writing, first write out
+  // all vector data, then handle the
+  // scalar data sets that have been
+  // left over
+  unsigned int    i, n_th_vector, data_set, pt_data_vector_dim;
+  std::string     vector_name;
+  for (n_th_vector=0,data_set=0; data_set<n_data_sets;)
+    {
+      // Advance n_th_vector to at least the current data set we are on
+      while (n_th_vector < vector_data_ranges.size() && std_cxx11::get<0>(vector_data_ranges[n_th_vector]) < data_set) n_th_vector++;
+
+      // Determine the dimension of this data
+      if (n_th_vector < vector_data_ranges.size() && std_cxx11::get<0>(vector_data_ranges[n_th_vector]) == data_set)
+        {
+          // Multiple dimensions
+          pt_data_vector_dim = std_cxx11::get<1>(vector_data_ranges[n_th_vector]) - std_cxx11::get<0>(vector_data_ranges[n_th_vector])+1;
+
+          // Ensure the dimensionality of the data is correct
+          AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) >= std_cxx11::get<0>(vector_data_ranges[n_th_vector]),
+                       ExcLowerRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]), std_cxx11::get<0>(vector_data_ranges[n_th_vector])));
+          AssertThrow (std_cxx11::get<1>(vector_data_ranges[n_th_vector]) < n_data_sets,
+                       ExcIndexRange (std_cxx11::get<1>(vector_data_ranges[n_th_vector]), 0, n_data_sets));
+
+          // Determine the vector name
+          // Concatenate all the
+          // component names with double
+          // underscores unless a vector
+          // name has been specified
+          if (std_cxx11::get<2>(vector_data_ranges[n_th_vector]) != "")
+            {
+              vector_name = std_cxx11::get<2>(vector_data_ranges[n_th_vector]);
+            }
+          else
+            {
+              vector_name = "";
+              for (i=std_cxx11::get<0>(vector_data_ranges[n_th_vector]); i<std_cxx11::get<1>(vector_data_ranges[n_th_vector]); ++i)
+                vector_name += data_names[i] + "__";
+              vector_name += data_names[std_cxx11::get<1>(vector_data_ranges[n_th_vector])];
+            }
+        }
+      else
+        {
+          // One dimension
+          pt_data_vector_dim = 1;
+          vector_name = data_names[data_set];
+        }
+
+      // Write data to the filter object
+      filtered_data.write_data_set(vector_name, pt_data_vector_dim, data_set, data_vectors);
+
+      // Advance the current data set
+      data_set += pt_data_vector_dim;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::
+write_hdf5_parallel (const DataOutBase::DataOutFilter &data_filter,
+                     const std::string &filename, MPI_Comm comm) const
+{
+  DataOutBase::write_hdf5_parallel(get_patches(), data_filter, filename, comm);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutInterface<dim,spacedim>::
+write_hdf5_parallel (const DataOutBase::DataOutFilter &data_filter,
+                     const bool write_mesh_file, const std::string &mesh_filename, const std::string &solution_filename, MPI_Comm comm) const
+{
+  DataOutBase::write_hdf5_parallel(get_patches(), data_filter, write_mesh_file, mesh_filename, solution_filename, comm);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutBase::write_hdf5_parallel (const std::vector<Patch<dim,spacedim> > &patches,
+                                       const DataOutBase::DataOutFilter &data_filter,
+                                       const std::string &filename,
+                                       MPI_Comm comm)
+{
+  write_hdf5_parallel(patches, data_filter, true, filename, filename, comm);
+}
+
+
+
+template <int dim, int spacedim>
+void DataOutBase::write_hdf5_parallel (const std::vector<Patch<dim,spacedim> > &/*patches*/,
+                                       const DataOutBase::DataOutFilter &data_filter,
+                                       const bool write_mesh_file,
+                                       const std::string &mesh_filename,
+                                       const std::string &solution_filename,
+                                       MPI_Comm comm)
+{
+#ifndef DEAL_II_WITH_HDF5
+  // throw an exception, but first make
+  // sure the compiler does not warn about
+  // the now unused function arguments
+  (void)data_filter;
+  (void)write_mesh_file;
+  (void)mesh_filename;
+  (void)solution_filename;
+  (void)comm;
+  AssertThrow(false, ExcMessage ("HDF5 support is disabled."));
+#else
+#ifndef DEAL_II_WITH_MPI
+  // verify that there are indeed
+  // patches to be written out. most
+  // of the times, people just forget
+  // to call build_patches when there
+  // are no patches, so a warning is
+  // in order. that said, the
+  // assertion is disabled if we
+  // support MPI since then it can
+  // happen that on the coarsest
+  // mesh, a processor simply has no
+  // cells it actually owns, and in
+  // that case it is legit if there
+  // are no patches
+  Assert (data_filter.n_nodes() > 0, ExcNoPatches());
+#else
+  hid_t           h5_mesh_file_id=-1, h5_solution_file_id, file_plist_id, plist_id;
+  hid_t           node_dataspace, node_dataset, node_file_dataspace, node_memory_dataspace;
+  hid_t           cell_dataspace, cell_dataset, cell_file_dataspace, cell_memory_dataspace;
+  hid_t           pt_data_dataspace, pt_data_dataset, pt_data_file_dataspace, pt_data_memory_dataspace;
+  herr_t          status;
+  unsigned int    local_node_cell_count[2], global_node_cell_count[2], global_node_cell_offsets[2];
+  hsize_t         count[2], offset[2], node_ds_dim[2], cell_ds_dim[2];
+  std::vector<double>          node_data_vec;
+  std::vector<unsigned int>    cell_data_vec;
+
+  // If HDF5 is not parallel and we're using multiple processes, abort
+#ifndef H5_HAVE_PARALLEL
+#  ifdef DEAL_II_WITH_MPI
+  int world_size;
+  MPI_Comm_size(comm, &world_size);
+  AssertThrow (world_size <= 1,
+               ExcMessage ("Serial HDF5 output on multiple processes is not yet supported."));
+#  endif
+#endif
+
+  local_node_cell_count[0] = data_filter.n_nodes();
+  local_node_cell_count[1] = data_filter.n_cells();
+
+  // Create file access properties
+  file_plist_id = H5Pcreate(H5P_FILE_ACCESS);
+  AssertThrow(file_plist_id != -1, ExcIO());
+  // If MPI is enabled *and* HDF5 is parallel, we can do parallel output
+#ifdef DEAL_II_WITH_MPI
+#ifdef H5_HAVE_PARALLEL
+  // Set the access to use the specified MPI_Comm object
+  status = H5Pset_fapl_mpio(file_plist_id, comm, MPI_INFO_NULL);
+  AssertThrow(status >= 0, ExcIO());
+#endif
+#endif
+
+  // Compute the global total number of nodes/cells
+  // And determine the offset of the data for this process
+#ifdef DEAL_II_WITH_MPI
+  MPI_Allreduce(local_node_cell_count, global_node_cell_count, 2, MPI_UNSIGNED, MPI_SUM, comm);
+  MPI_Scan(local_node_cell_count, global_node_cell_offsets, 2, MPI_UNSIGNED, MPI_SUM, comm);
+  global_node_cell_offsets[0] -= local_node_cell_count[0];
+  global_node_cell_offsets[1] -= local_node_cell_count[1];
+#else
+  global_node_cell_offsets[0] = global_node_cell_offsets[1] = 0;
+#endif
+
+  // Create the property list for a collective write
+  plist_id = H5Pcreate(H5P_DATASET_XFER);
+  AssertThrow(plist_id >= 0, ExcIO());
+#ifdef DEAL_II_WITH_MPI
+#ifdef H5_HAVE_PARALLEL
+  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+  AssertThrow(status >= 0, ExcIO());
+#endif
+#endif
+
+  if (write_mesh_file)
+    {
+      // Overwrite any existing files (change this to an option?)
+      h5_mesh_file_id = H5Fcreate(mesh_filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, file_plist_id);
+      AssertThrow(h5_mesh_file_id >= 0, ExcIO());
+
+      // Create the dataspace for the nodes and cells
+      node_ds_dim[0] = global_node_cell_count[0];
+      node_ds_dim[1] = dim;
+      node_dataspace = H5Screate_simple(2, node_ds_dim, NULL);
+      AssertThrow(node_dataspace >= 0, ExcIO());
+
+      cell_ds_dim[0] = global_node_cell_count[1];
+      cell_ds_dim[1] = GeometryInfo<dim>::vertices_per_cell;
+      cell_dataspace = H5Screate_simple(2, cell_ds_dim, NULL);
+      AssertThrow(cell_dataspace >= 0, ExcIO());
+
+      // Create the dataset for the nodes and cells
+#if H5Gcreate_vers == 1
+      node_dataset = H5Dcreate(h5_mesh_file_id, "nodes", H5T_NATIVE_DOUBLE, node_dataspace, H5P_DEFAULT);
+#else
+      node_dataset = H5Dcreate(h5_mesh_file_id, "nodes", H5T_NATIVE_DOUBLE, node_dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+#endif
+      AssertThrow(node_dataset >= 0, ExcIO());
+#if H5Gcreate_vers == 1
+      cell_dataset = H5Dcreate(h5_mesh_file_id, "cells", H5T_NATIVE_UINT, cell_dataspace, H5P_DEFAULT);
+#else
+      cell_dataset = H5Dcreate(h5_mesh_file_id, "cells", H5T_NATIVE_UINT, cell_dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+#endif
+      AssertThrow(cell_dataset >= 0, ExcIO());
+
+      // Close the node and cell dataspaces since we're done with them
+      status = H5Sclose(node_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+      status = H5Sclose(cell_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+
+      // Create the data subset we'll use to read from memory
+      count[0] = local_node_cell_count[0];
+      count[1] = dim;
+      offset[0] = global_node_cell_offsets[0];
+      offset[1] = 0;
+      node_memory_dataspace = H5Screate_simple(2, count, NULL);
+      AssertThrow(node_memory_dataspace >= 0, ExcIO());
+
+      // Select the hyperslab in the file
+      node_file_dataspace = H5Dget_space(node_dataset);
+      AssertThrow(node_file_dataspace >= 0, ExcIO());
+      status = H5Sselect_hyperslab(node_file_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+      AssertThrow(status >= 0, ExcIO());
+
+      // And repeat for cells
+      count[0] = local_node_cell_count[1];
+      count[1] = GeometryInfo<dim>::vertices_per_cell;
+      offset[0] = global_node_cell_offsets[1];
+      offset[1] = 0;
+      cell_memory_dataspace = H5Screate_simple(2, count, NULL);
+      AssertThrow(cell_memory_dataspace >= 0, ExcIO());
+
+      cell_file_dataspace = H5Dget_space(cell_dataset);
+      AssertThrow(cell_file_dataspace >= 0, ExcIO());
+      status = H5Sselect_hyperslab(cell_file_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+      AssertThrow(status >= 0, ExcIO());
+
+      // And finally, write the node data
+      data_filter.fill_node_data(node_data_vec);
+      status = H5Dwrite(node_dataset, H5T_NATIVE_DOUBLE, node_memory_dataspace, node_file_dataspace, plist_id, &node_data_vec[0]);
+      AssertThrow(status >= 0, ExcIO());
+      node_data_vec.clear();
+
+      // And the cell data
+      data_filter.fill_cell_data(global_node_cell_offsets[0], cell_data_vec);
+      status = H5Dwrite(cell_dataset, H5T_NATIVE_UINT, cell_memory_dataspace, cell_file_dataspace, plist_id, &cell_data_vec[0]);
+      AssertThrow(status >= 0, ExcIO());
+      cell_data_vec.clear();
+
+      // Close the file dataspaces
+      status = H5Sclose(node_file_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+      status = H5Sclose(cell_file_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+
+      // Close the memory dataspaces
+      status = H5Sclose(node_memory_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+      status = H5Sclose(cell_memory_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+
+      // Close the datasets
+      status = H5Dclose(node_dataset);
+      AssertThrow(status >= 0, ExcIO());
+      status = H5Dclose(cell_dataset);
+      AssertThrow(status >= 0, ExcIO());
+
+      // If the filenames are different, we need to close the mesh file
+      if (mesh_filename != solution_filename)
+        {
+          status = H5Fclose(h5_mesh_file_id);
+          AssertThrow(status >= 0, ExcIO());
+        }
+    }
+
+  // If the filenames are identical, continue with the same file
+  if (mesh_filename == solution_filename && write_mesh_file)
+    {
+      h5_solution_file_id = h5_mesh_file_id;
+    }
+  else
+    {
+      // Otherwise we need to open a new file
+      h5_solution_file_id = H5Fcreate(solution_filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, file_plist_id);
+      AssertThrow(h5_solution_file_id >= 0, ExcIO());
+    }
+
+  // when writing, first write out
+  // all vector data, then handle the
+  // scalar data sets that have been
+  // left over
+  unsigned int    i, pt_data_vector_dim;
+  std::string     vector_name;
+  for (i=0; i<data_filter.n_data_sets(); ++i)
+    {
+      // Allocate space for the point data
+      // Must be either 1D or 3D
+      pt_data_vector_dim = data_filter.get_data_set_dim(i);
+      vector_name = data_filter.get_data_set_name(i);
+
+      // Create the dataspace for the point data
+      node_ds_dim[0] = global_node_cell_count[0];
+      node_ds_dim[1] = pt_data_vector_dim;
+      pt_data_dataspace = H5Screate_simple(2, node_ds_dim, NULL);
+      AssertThrow(pt_data_dataspace >= 0, ExcIO());
+
+#if H5Gcreate_vers == 1
+      pt_data_dataset = H5Dcreate(h5_solution_file_id, vector_name.c_str(), H5T_NATIVE_DOUBLE, pt_data_dataspace, H5P_DEFAULT);
+#else
+      pt_data_dataset = H5Dcreate(h5_solution_file_id, vector_name.c_str(), H5T_NATIVE_DOUBLE, pt_data_dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+#endif
+      AssertThrow(pt_data_dataset >= 0, ExcIO());
+
+      // Create the data subset we'll use to read from memory
+      count[0] = local_node_cell_count[0];
+      count[1] = pt_data_vector_dim;
+      offset[0] = global_node_cell_offsets[0];
+      offset[1] = 0;
+      pt_data_memory_dataspace = H5Screate_simple(2, count, NULL);
+      AssertThrow(pt_data_memory_dataspace >= 0, ExcIO());
+
+      // Select the hyperslab in the file
+      pt_data_file_dataspace = H5Dget_space(pt_data_dataset);
+      AssertThrow(pt_data_file_dataspace >= 0, ExcIO());
+      status = H5Sselect_hyperslab(pt_data_file_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+      AssertThrow(status >= 0, ExcIO());
+
+      // And finally, write the data
+      status = H5Dwrite(pt_data_dataset, H5T_NATIVE_DOUBLE, pt_data_memory_dataspace, pt_data_file_dataspace, plist_id, data_filter.get_data_set(i));
+      AssertThrow(status >= 0, ExcIO());
+
+      // Close the dataspaces
+      status = H5Sclose(pt_data_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+      status = H5Sclose(pt_data_memory_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+      status = H5Sclose(pt_data_file_dataspace);
+      AssertThrow(status >= 0, ExcIO());
+      // Close the dataset
+      status = H5Dclose(pt_data_dataset);
+      AssertThrow(status >= 0, ExcIO());
+    }
+
+  // Close the file property list
+  status = H5Pclose(file_plist_id);
+  AssertThrow(status >= 0, ExcIO());
+
+  // Close the parallel access
+  status = H5Pclose(plist_id);
+  AssertThrow(status >= 0, ExcIO());
+
+  // Close the file
+  status = H5Fclose(h5_solution_file_id);
+  AssertThrow(status >= 0, ExcIO());
+#endif
+#endif
+}
+
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::write (std::ostream &out,
+                                       const DataOutBase::OutputFormat output_format_) const
+{
+  DataOutBase::OutputFormat output_format = output_format_;
+  if (output_format == DataOutBase::default_format)
+    output_format = default_fmt;
+
+  switch (output_format)
+    {
+    case DataOutBase::none:
+      break;
+
+    case DataOutBase::dx:
+      write_dx (out);
+      break;
+
+    case DataOutBase::ucd:
+      write_ucd (out);
+      break;
+
+    case DataOutBase::gnuplot:
+      write_gnuplot (out);
+      break;
+
+    case DataOutBase::povray:
+      write_povray (out);
+      break;
+
+    case DataOutBase::eps:
+      write_eps (out);
+      break;
+
+    case DataOutBase::gmv:
+      write_gmv (out);
+      break;
+
+    case DataOutBase::tecplot:
+      write_tecplot (out);
+      break;
+
+    case DataOutBase::tecplot_binary:
+      write_tecplot_binary (out);
+      break;
+
+    case DataOutBase::vtk:
+      write_vtk (out);
+      break;
+
+    case DataOutBase::vtu:
+      write_vtu (out);
+      break;
+
+    case DataOutBase::svg:
+      write_svg (out);
+      break;
+
+    case DataOutBase::deal_II_intermediate:
+      write_deal_II_intermediate (out);
+      break;
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::set_default_format(const DataOutBase::OutputFormat fmt)
+{
+  Assert (fmt != DataOutBase::default_format, ExcNotImplemented());
+  default_fmt = fmt;
+}
+
+template <int dim, int spacedim>
+template <typename FlagType>
+void
+DataOutInterface<dim, spacedim>::set_flags (const FlagType &flags)
+{
+  // The price for not writing ten duplicates of this function is some loss in
+  // type safety.
+  if (typeid(flags) == typeid(dx_flags))
+    dx_flags = *reinterpret_cast<const DataOutBase::DXFlags *>(&flags);
+  else if (typeid(flags) == typeid(ucd_flags))
+    ucd_flags = *reinterpret_cast<const DataOutBase::UcdFlags *>(&flags);
+  else if (typeid(flags) == typeid(povray_flags))
+    povray_flags = *reinterpret_cast<const DataOutBase::PovrayFlags *>(&flags);
+  else if (typeid(flags) == typeid(eps_flags))
+    eps_flags = *reinterpret_cast<const DataOutBase::EpsFlags *>(&flags);
+  else if (typeid(flags) == typeid(gmv_flags))
+    gmv_flags = *reinterpret_cast<const DataOutBase::GmvFlags *>(&flags);
+  else if (typeid(flags) == typeid(tecplot_flags))
+    tecplot_flags = *reinterpret_cast<const DataOutBase::TecplotFlags *>(&flags);
+  else if (typeid(flags) == typeid(vtk_flags))
+    vtk_flags = *reinterpret_cast<const DataOutBase::VtkFlags *>(&flags);
+  else if (typeid(flags) == typeid(svg_flags))
+    svg_flags = *reinterpret_cast<const DataOutBase::SvgFlags *>(&flags);
+  else if (typeid(flags) == typeid(deal_II_intermediate_flags))
+    deal_II_intermediate_flags = *reinterpret_cast<const DataOutBase::Deal_II_IntermediateFlags *>(&flags);
+  else
+    Assert(false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+DataOutInterface<dim,spacedim>::
+default_suffix (const DataOutBase::OutputFormat output_format) const
+{
+  if (output_format == DataOutBase::default_format)
+    return DataOutBase::default_suffix (default_fmt);
+  else
+    return DataOutBase::default_suffix (output_format);
+}
+
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::declare_parameters (ParameterHandler &prm)
+{
+  prm.declare_entry ("Output format", "gnuplot",
+                     Patterns::Selection (DataOutBase::get_output_format_names ()),
+                     "A name for the output format to be used");
+  prm.declare_entry("Subdivisions", "1", Patterns::Integer(),
+                    "Number of subdivisions of each mesh cell");
+
+  prm.enter_subsection ("DX output parameters");
+  DataOutBase::DXFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("UCD output parameters");
+  DataOutBase::UcdFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Gnuplot output parameters");
+  DataOutBase::GnuplotFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Povray output parameters");
+  DataOutBase::PovrayFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Eps output parameters");
+  DataOutBase::EpsFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Gmv output parameters");
+  DataOutBase::GmvFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Tecplot output parameters");
+  DataOutBase::TecplotFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Vtk output parameters");
+  DataOutBase::VtkFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+
+
+  prm.enter_subsection ("deal.II intermediate output parameters");
+  DataOutBase::Deal_II_IntermediateFlags::declare_parameters (prm);
+  prm.leave_subsection ();
+}
+
+
+
+template <int dim, int spacedim>
+void
+DataOutInterface<dim,spacedim>::parse_parameters (ParameterHandler &prm)
+{
+  const std::string &output_name = prm.get ("Output format");
+  default_fmt = DataOutBase::parse_output_format (output_name);
+  default_subdivisions = prm.get_integer ("Subdivisions");
+
+  prm.enter_subsection ("DX output parameters");
+  dx_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("UCD output parameters");
+  ucd_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Gnuplot output parameters");
+  gnuplot_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Povray output parameters");
+  povray_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Eps output parameters");
+  eps_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Gmv output parameters");
+  gmv_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Tecplot output parameters");
+  tecplot_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("Vtk output parameters");
+  vtk_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+
+  prm.enter_subsection ("deal.II intermediate output parameters");
+  deal_II_intermediate_flags.parse_parameters (prm);
+  prm.leave_subsection ();
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+DataOutInterface<dim,spacedim>::memory_consumption () const
+{
+  return (sizeof (default_fmt) +
+          MemoryConsumption::memory_consumption (dx_flags) +
+          MemoryConsumption::memory_consumption (ucd_flags) +
+          MemoryConsumption::memory_consumption (gnuplot_flags) +
+          MemoryConsumption::memory_consumption (povray_flags) +
+          MemoryConsumption::memory_consumption (eps_flags) +
+          MemoryConsumption::memory_consumption (gmv_flags) +
+          MemoryConsumption::memory_consumption (tecplot_flags) +
+          MemoryConsumption::memory_consumption (vtk_flags) +
+          MemoryConsumption::memory_consumption (svg_flags) +
+          MemoryConsumption::memory_consumption (deal_II_intermediate_flags));
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+DataOutInterface<dim,spacedim>::get_vector_data_ranges () const
+{
+  return std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >();
+}
+
+
+
+
+// ---------------------------------------------- DataOutReader ----------
+
+template <int dim, int spacedim>
+void
+DataOutReader<dim,spacedim>::read (std::istream &in)
+{
+  AssertThrow (in, ExcIO());
+
+  // first empty previous content
+  {
+    std::vector<typename dealii::DataOutBase::Patch<dim,spacedim> >
+    tmp;
+    tmp.swap (patches);
+  }
+  {
+    std::vector<std::string> tmp;
+    tmp.swap (dataset_names);
+  }
+  {
+    std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > tmp;
+    tmp.swap (vector_data_ranges);
+  }
+
+  // then check that we have the
+  // correct header of this
+  // file. both the first and second
+  // real lines have to match, as
+  // well as the dimension
+  // information written before that
+  // and the Version information
+  // written in the third line
+  {
+    std::pair<unsigned int, unsigned int>
+    dimension_info
+      = DataOutBase::determine_intermediate_format_dimensions (in);
+    AssertThrow ((dimension_info.first  == dim) &&
+                 (dimension_info.second == spacedim),
+                 ExcIncompatibleDimensions (dimension_info.first, dim,
+                                            dimension_info.second, spacedim));
+
+    // read to the end of the line
+    std::string tmp;
+    getline (in, tmp);
+  }
+
+  {
+    std::string header;
+    getline (in, header);
+
+    std::ostringstream s;
+    s << "[deal.II intermediate format graphics data]";
+
+    Assert (header == s.str(), ExcUnexpectedInput(s.str(),header));
+  }
+  {
+    std::string header;
+    getline (in, header);
+
+    std::ostringstream s;
+    s << "[written by " << DEAL_II_PACKAGE_NAME << " " << DEAL_II_PACKAGE_VERSION << "]";
+
+    Assert (header == s.str(), ExcUnexpectedInput(s.str(),header));
+  }
+  {
+    std::string header;
+    getline (in, header);
+
+    std::ostringstream s;
+    s << "[Version: " << dealii::DataOutBase::Deal_II_IntermediateFlags::format_version << "]";
+
+    Assert (header == s.str(),
+            ExcMessage("Invalid or incompatible file format. Intermediate format "
+                       "files can only be read by the same deal.II version as they "
+                       "are written by."));
+  }
+
+  // then read the rest of the data
+  unsigned int n_datasets;
+  in >> n_datasets;
+  dataset_names.resize (n_datasets);
+  for (unsigned int i=0; i<n_datasets; ++i)
+    in >> dataset_names[i];
+
+  unsigned int n_patches;
+  in >> n_patches;
+  patches.resize (n_patches);
+  for (unsigned int i=0; i<n_patches; ++i)
+    in >> patches[i];
+
+  unsigned int n_vector_data_ranges;
+  in >> n_vector_data_ranges;
+  vector_data_ranges.resize (n_vector_data_ranges);
+  for (unsigned int i=0; i<n_vector_data_ranges; ++i)
+    {
+      in >> std_cxx11::get<0>(vector_data_ranges[i])
+         >> std_cxx11::get<1>(vector_data_ranges[i]);
+
+      // read in the name of that vector
+      // range. because it is on a separate
+      // line, we first need to read to the
+      // end of the previous line (nothing
+      // should be there any more after we've
+      // read the previous two integers) and
+      // then read the entire next line for
+      // the name
+      std::string name;
+      getline(in, name);
+      getline(in, name);
+      std_cxx11::get<2>(vector_data_ranges[i]) = name;
+    }
+
+  AssertThrow (in, ExcIO());
+}
+
+
+
+template <int dim, int spacedim>
+void
+DataOutReader<dim,spacedim>::
+merge (const DataOutReader<dim,spacedim> &source)
+{
+  typedef typename dealii::DataOutBase::Patch<dim,spacedim> Patch;
+
+
+  const std::vector<Patch> source_patches = source.get_patches ();
+  Assert (patches.size () != 0,        ExcNoPatches ());
+  Assert (source_patches.size () != 0, ExcNoPatches ());
+  // check equality of component
+  // names
+  Assert (get_dataset_names() == source.get_dataset_names(),
+          ExcIncompatibleDatasetNames());
+
+  // check equality of the vector data
+  // specifications
+  Assert (get_vector_data_ranges().size() ==
+          source.get_vector_data_ranges().size(),
+          ExcMessage ("Both sources need to declare the same components "
+                      "as vectors."));
+  for (unsigned int i=0; i<get_vector_data_ranges().size(); ++i)
+    {
+      Assert (std_cxx11::get<0>(get_vector_data_ranges()[i]) ==
+              std_cxx11::get<0>(source.get_vector_data_ranges()[i]),
+              ExcMessage ("Both sources need to declare the same components "
+                          "as vectors."));
+      Assert (std_cxx11::get<1>(get_vector_data_ranges()[i]) ==
+              std_cxx11::get<1>(source.get_vector_data_ranges()[i]),
+              ExcMessage ("Both sources need to declare the same components "
+                          "as vectors."));
+      Assert (std_cxx11::get<2>(get_vector_data_ranges()[i]) ==
+              std_cxx11::get<2>(source.get_vector_data_ranges()[i]),
+              ExcMessage ("Both sources need to declare the same components "
+                          "as vectors."));
+    }
+
+  // make sure patches are compatible
+  Assert (patches[0].n_subdivisions == source_patches[0].n_subdivisions,
+          ExcIncompatiblePatchLists());
+  Assert (patches[0].data.n_rows() == source_patches[0].data.n_rows(),
+          ExcIncompatiblePatchLists());
+  Assert (patches[0].data.n_cols() == source_patches[0].data.n_cols(),
+          ExcIncompatiblePatchLists());
+
+  // merge patches. store old number
+  // of elements, since we need to
+  // adjust patch numbers, etc
+  // afterwards
+  const unsigned int old_n_patches = patches.size();
+  patches.insert (patches.end(),
+                  source_patches.begin(),
+                  source_patches.end());
+
+  // adjust patch numbers
+  for (unsigned int i=old_n_patches; i<patches.size(); ++i)
+    patches[i].patch_index += old_n_patches;
+
+  // adjust patch neighbors
+  for (unsigned int i=old_n_patches; i<patches.size(); ++i)
+    for (unsigned int n=0; n<GeometryInfo<dim>::faces_per_cell; ++n)
+      if (patches[i].neighbors[n] != dealii::DataOutBase::Patch<dim,spacedim>::no_neighbor)
+        patches[i].neighbors[n] += old_n_patches;
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<typename dealii::DataOutBase::Patch<dim,spacedim> > &
+DataOutReader<dim,spacedim>::get_patches () const
+{
+  return patches;
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::string>
+DataOutReader<dim,spacedim>::get_dataset_names () const
+{
+  return dataset_names;
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+DataOutReader<dim,spacedim>::get_vector_data_ranges () const
+{
+  return vector_data_ranges;
+}
+
+
+
+namespace DataOutBase
+{
+  template <int dim, int spacedim>
+  std::ostream &
+  operator << (std::ostream                           &out,
+               const Patch<dim,spacedim> &patch)
+  {
+    // write a header line
+    out << "[deal.II intermediate Patch<" << dim << ',' << spacedim << ">]"
+        << '\n';
+
+    // then write all the data that is
+    // in this patch
+    for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+      out << patch.vertices[GeometryInfo<dim>::ucd_to_deal[i]] << ' ';
+    out << '\n';
+
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      out << patch.neighbors[i] << ' ';
+    out << '\n';
+
+    out << patch.patch_index << ' ' << patch.n_subdivisions
+        << '\n';
+
+    out << patch.points_are_available<<'\n';
+
+    out << patch.data.n_rows() << ' ' << patch.data.n_cols() << '\n';
+    for (unsigned int i=0; i<patch.data.n_rows(); ++i)
+      for (unsigned int j=0; j<patch.data.n_cols(); ++j)
+        out << patch.data[i][j] << ' ';
+    out << '\n';
+    out << '\n';
+
+    return out;
+  }
+
+
+  template <int dim, int spacedim>
+  std::istream &
+  operator >> (std::istream                     &in,
+               Patch<dim,spacedim> &patch)
+  {
+    AssertThrow (in, ExcIO());
+
+    // read a header line and compare
+    // it to what we usually
+    // write. skip all lines that
+    // contain only blanks at the start
+    {
+      std::string header;
+      do
+        {
+          getline (in, header);
+          while ((header.size() != 0) &&
+                 (header[header.size()-1] == ' '))
+            header.erase(header.size()-1);
+        }
+      while ((header == "") && in);
+
+      std::ostringstream s;
+      s << "[deal.II intermediate Patch<" << dim << ',' << spacedim << ">]";
+
+      Assert (header == s.str(), ExcUnexpectedInput(s.str(),header));
+    }
+
+
+    // then read all the data that is
+    // in this patch
+    for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+      in >> patch.vertices[GeometryInfo<dim>::ucd_to_deal[i]];
+
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      in >> patch.neighbors[i];
+
+    in >> patch.patch_index >> patch.n_subdivisions;
+
+    in >> patch.points_are_available;
+
+    unsigned int n_rows, n_cols;
+    in >> n_rows >> n_cols;
+    patch.data.reinit (n_rows, n_cols);
+    for (unsigned int i=0; i<patch.data.n_rows(); ++i)
+      for (unsigned int j=0; j<patch.data.n_cols(); ++j)
+        in >> patch.data[i][j];
+
+    AssertThrow (in, ExcIO());
+
+    return in;
+  }
+}
+
+
+
+// explicit instantiations
+#include "data_out_base.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/data_out_base.inst.in b/source/base/data_out_base.inst.in
new file mode 100644
index 0000000..b301850
--- /dev/null
+++ b/source/base/data_out_base.inst.in
@@ -0,0 +1,142 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+{
+#if deal_II_dimension <= deal_II_space_dimension
+  template class DataOutInterface<deal_II_dimension, deal_II_space_dimension>;
+  template class DataOutReader<deal_II_dimension, deal_II_space_dimension>;
+
+  namespace DataOutBase
+  \{
+  template struct Patch<deal_II_dimension, deal_II_space_dimension>;
+
+  template
+  std::ostream &
+  operator << (std::ostream                           &out,
+               const Patch<deal_II_dimension, deal_II_space_dimension> &patch);
+
+  template
+  std::istream &
+  operator >> (std::istream                     &in,
+               Patch<deal_II_dimension, deal_II_space_dimension> &patch);
+
+  template
+  void
+  write_vtk (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const VtkFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_vtu (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const VtkFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_ucd (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const UcdFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_dx (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const DXFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_gnuplot (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const GnuplotFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_povray (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const PovrayFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_eps (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const EpsFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_gmv (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const GmvFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_tecplot (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const TecplotFlags                          &flags,
+                        std::ostream                            &out);
+
+  template
+  void
+  write_tecplot_binary (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const TecplotFlags                          &flags,
+                        std::ostream                            &out);
+
+#if deal_II_space_dimension >1
+  template
+  void
+  write_svg (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const SvgFlags                          &flags,
+                        std::ostream                            &out);
+#endif
+  template
+  void
+  write_deal_II_intermediate (const std::vector<Patch<deal_II_dimension,deal_II_space_dimension> > &patches,
+                        const std::vector<std::string>          &data_names,
+                        const std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> > &vector_data_ranges,
+                        const Deal_II_IntermediateFlags                          &flags,
+                        std::ostream                            &out);
+  \}
+#endif
+}
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; flag_type : OUTPUT_FLAG_TYPES)
+{
+  template
+  void
+  DataOutInterface<deal_II_dimension, deal_II_space_dimension>::set_flags (const DataOutBase::flag_type &flags);
+}
diff --git a/source/base/event.cc b/source/base/event.cc
new file mode 100644
index 0000000..330e77f
--- /dev/null
+++ b/source/base/event.cc
@@ -0,0 +1,74 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/event.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+//TODO: Thread safety
+
+namespace Algorithms
+{
+  std::vector<std::string> Event::names;
+
+  Event
+  Event::assign(const char *name)
+  {
+    unsigned int index = names.size();
+    names.push_back(name);
+
+    Event result;
+    // The constructor generated an
+    // object with all flags equal
+    // zero. Now we set the new one.
+    result.flags[index] = true;
+
+    return result;
+  }
+
+
+  Event::Event ()
+    :
+    all_true(false),
+    flags(names.size(), false)
+  {}
+
+
+  void
+  Event::clear ()
+  {
+    all_true = false;
+    std::fill(flags.begin(), flags.end(), false);
+  }
+
+
+  void
+  Event::all ()
+  {
+    all_true = true;
+  }
+
+  namespace Events
+  {
+    const Event initial = Event::assign("Initial");
+    const Event remesh = Event::assign("Remesh");
+    const Event bad_derivative = Event::assign("Bad Derivative");
+    const Event new_time = Event::assign("New Time");
+    const Event new_timestep_size = Event::assign("New Time Step Size");
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/exceptions.cc b/source/base/exceptions.cc
new file mode 100644
index 0000000..d8d1b20
--- /dev/null
+++ b/source/base/exceptions.cc
@@ -0,0 +1,375 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/logstream.h>
+#include <string>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+
+#ifdef DEAL_II_HAVE_GLIBC_STACKTRACE
+#  include <execinfo.h>
+#endif
+
+#ifdef DEAL_II_HAVE_LIBSTDCXX_DEMANGLER
+#  include <cxxabi.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace deal_II_exceptions
+{
+
+  std::string additional_assert_output;
+
+  void set_additional_assert_output (const char *const p)
+  {
+    additional_assert_output = p;
+  }
+
+  bool show_stacktrace = true;
+
+  void suppress_stacktrace_in_exceptions ()
+  {
+    show_stacktrace = false;
+  }
+
+  bool abort_on_exception = true;
+
+  void disable_abort_on_exception ()
+  {
+    abort_on_exception = false;
+  }
+
+}
+
+
+
+ExceptionBase::ExceptionBase ()
+  :
+  file(""),
+  line(0),
+  function(""),
+  cond(""),
+  exc(""),
+  stacktrace (NULL),
+  n_stacktrace_frames (0),
+  what_str("")
+{}
+
+
+
+ExceptionBase::ExceptionBase (const ExceptionBase &exc)
+  :
+  file(exc.file),
+  line(exc.line),
+  function(exc.function),
+  cond(exc.cond),
+  exc(exc.exc),
+  stacktrace (NULL), // don't copy stacktrace to avoid double de-allocation problem
+  n_stacktrace_frames (0),
+  what_str("") // don't copy the error message, it gets generated dynamically by what()
+{}
+
+
+
+ExceptionBase::~ExceptionBase () throw ()
+{
+  free (stacktrace); // free(NULL) is allowed
+  stacktrace = NULL;
+}
+
+
+
+void ExceptionBase::set_fields (const char *f,
+                                const int  l,
+                                const char *func,
+                                const char *c,
+                                const char *e)
+{
+  file = f;
+  line = l;
+  function = func;
+  cond = c;
+  exc  = e;
+
+  // If the system supports this, get a stacktrace how we got here:
+  // Note that we defer the symbol lookup done by backtrace_symbols()
+  // to when we need it (see what() below). This is for performance
+  // reasons, as this requires loading libraries and can take in the
+  // order of seconds on some machines.
+#ifdef DEAL_II_HAVE_GLIBC_STACKTRACE
+  n_stacktrace_frames = backtrace(raw_stacktrace, 25);
+#endif
+}
+
+const char *ExceptionBase::what() const throw()
+{
+  // If no error c_string was generated so far, do it now:
+  if (what_str == "")
+    {
+#ifdef DEAL_II_HAVE_GLIBC_STACKTRACE
+      // We have deferred the symbol lookup to this point to avoid costly
+      // runtime penalties due to linkage of external libraries by
+      // backtrace_symbols.
+
+      // first delete old stacktrace if necessary
+      free (stacktrace); // free(NULL) is allowed
+      stacktrace = backtrace_symbols(raw_stacktrace, n_stacktrace_frames);
+#endif
+
+      generate_message();
+    }
+
+  return what_str.c_str();
+}
+
+
+const char *ExceptionBase::get_exc_name () const
+{
+  return exc;
+}
+
+
+
+void ExceptionBase::print_exc_data (std::ostream &out) const
+{
+  out << "An error occurred in line <" << line
+      << "> of file <" << file
+      << "> in function" << std::endl
+      << "    " << function << std::endl
+      << "The violated condition was: "<< std::endl
+      << "    " << cond << std::endl
+      << "The name and call sequence of the exception was:" << std::endl
+      << "    " << exc  << std::endl
+      << "Additional Information: " << std::endl;
+}
+
+
+
+void ExceptionBase::print_info (std::ostream &out) const
+{
+  out << "(none)" << std::endl;
+}
+
+
+
+void ExceptionBase::print_stack_trace (std::ostream &out) const
+{
+  if (n_stacktrace_frames == 0)
+    return;
+
+  if (deal_II_exceptions::show_stacktrace == false)
+    return;
+
+  // if there is a stackframe stored, print it
+  out << std::endl;
+  out << "Stacktrace:" << std::endl
+      << "-----------" << std::endl;
+
+  // print the stacktrace. first omit all those frames that have
+  // ExceptionBase or deal_II_exceptions in their names, as these
+  // correspond to the exception raising mechanism themselves, rather than
+  // the place where the exception was triggered
+  int frame = 0;
+  while ((frame < n_stacktrace_frames)
+         &&
+         ((std::string(stacktrace[frame]).find ("ExceptionBase") != std::string::npos)
+          ||
+          (std::string(stacktrace[frame]).find ("deal_II_exceptions") != std::string::npos)))
+    ++frame;
+
+  // output the rest
+  const unsigned int first_significant_frame = frame;
+  for (; frame < n_stacktrace_frames; ++frame)
+    {
+      out << '#' << frame - first_significant_frame
+          << "  ";
+
+      // the stacktrace frame is actually of the format
+      // "filename(functionname+offset) [address]". let's try to get the
+      // mangled functionname out:
+      std::string stacktrace_entry (stacktrace[frame]);
+      const unsigned int pos_start = stacktrace_entry.find('('),
+                         pos_end   = stacktrace_entry.find('+');
+      std::string functionname = stacktrace_entry.substr (pos_start+1,
+                                                          pos_end-pos_start-1);
+
+      // demangle, and if successful replace old mangled string by
+      // unmangled one (skipping address and offset). treat "main"
+      // differently, since it is apparently demangled as "unsigned int"
+      // for unknown reasons :-) if we can, demangle the function name
+#ifdef DEAL_II_HAVE_LIBSTDCXX_DEMANGLER
+      int         status;
+      char *p = abi::__cxa_demangle(functionname.c_str(), 0, 0, &status);
+
+      if ((status == 0) && (functionname != "main"))
+        {
+          std::string realname(p);
+          // in MT mode, one often gets backtraces spanning several lines
+          // because we have so many boost::tuple arguments in the MT
+          // calling functions. most of the trailing arguments of these
+          // tuples are actually unused boost::tuples::null_type, so we
+          // should split them off if they are trailing a template argument
+          // list
+          while (realname.find (", boost::tuples::null_type>")
+                 != std::string::npos)
+            realname.erase (realname.find (", boost::tuples::null_type>"),
+                            std::string (", boost::tuples::null_type").size());
+
+          stacktrace_entry = stacktrace_entry.substr(0, pos_start)
+                             +
+                             ": "
+                             +
+                             realname;
+        }
+      else
+        stacktrace_entry = stacktrace_entry.substr(0, pos_start)
+                           +
+                           ": "
+                           +
+                           functionname;
+
+      free (p);
+
+#else
+
+      stacktrace_entry = stacktrace_entry.substr(0, pos_start)
+                         +
+                         ": "
+                         +
+                         functionname;
+#endif
+
+      // then output what we have
+      out << stacktrace_entry
+          << std::endl;
+
+      // stop if we're in main()
+      if (functionname == "main")
+        break;
+    }
+}
+
+
+
+void ExceptionBase::generate_message () const
+{
+  // build up a c_string with the error message.
+  // Guard this procedure with a try block, we shall not throw at this
+  // place...
+  try
+    {
+      std::ostringstream converter;
+
+      converter << std::endl
+                << "--------------------------------------------------------"
+                << std::endl;
+
+      // print out general data
+      print_exc_data (converter);
+      // print out exception specific data
+      print_info (converter);
+      print_stack_trace (converter);
+
+      if (!deal_II_exceptions::additional_assert_output.empty())
+        {
+          converter << "--------------------------------------------------------"
+                    << std::endl
+                    << deal_II_exceptions::additional_assert_output
+                    << std::endl;
+        }
+
+      converter << "--------------------------------------------------------"
+                << std::endl;
+
+      what_str = converter.str();
+    }
+  catch (...)
+    {
+      // On error, resume next. There is nothing better we can do...
+      what_str = "ExceptionBase::generate_message () failed";
+    }
+}
+
+
+
+namespace deal_II_exceptions
+{
+  namespace internals
+  {
+
+    void abort (const ExceptionBase &exc, bool nothrow /*= false*/)
+    {
+      if (dealii::deal_II_exceptions::abort_on_exception)
+        {
+          // Print the error message and bail out:
+          std::cerr << exc.what() << std::endl;
+          std::abort();
+        }
+      else if (nothrow)
+        {
+          // We are not allowed to throw, and not allowed to abort.
+          // Just print the exception name to deallog and continue
+          // normally:
+          deallog << "Exception: " << exc.get_exc_name() << std::endl;
+        }
+      else
+        {
+          // We are not allowed to abort, so just throw the error so just
+          // throw the error so just throw the error so just throw the
+          // error:
+          throw exc;
+        }
+    }
+
+  } /*namespace internals*/
+} /*namespace deal_II_exceptions*/
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+
+// Newer versions of gcc have a very nice feature: you can set a verbose
+// terminate handler, that not only aborts a program when an exception is
+// thrown and not caught somewhere, but before aborting it prints that an
+// exception has been thrown, and possibly what the std::exception::what()
+// function has to say. Since many people run into the trap of not having a
+// catch clause in main(), they wonder where that abort may be coming from.
+// The terminate handler then at least says what is missing in their
+// program.
+#ifdef DEAL_II_HAVE_VERBOSE_TERMINATE
+namespace __gnu_cxx
+{
+  extern void __verbose_terminate_handler ();
+}
+
+namespace
+{
+  struct preload_terminate_dummy
+  {
+    preload_terminate_dummy()
+    {
+      std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
+    }
+  };
+
+  static preload_terminate_dummy dummy;
+}
+#endif
diff --git a/source/base/flow_function.cc b/source/base/flow_function.cc
new file mode 100644
index 0000000..7b43271
--- /dev/null
+++ b/source/base/flow_function.cc
@@ -0,0 +1,778 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/flow_function.h>
+#include <deal.II/lac/vector.h>
+
+#include <cmath>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace Functions
+{
+
+  template<int dim>
+  FlowFunction<dim>::FlowFunction()
+    :
+    Function<dim>(dim+1),
+    mean_pressure(0),
+    aux_values(dim+1),
+    aux_gradients(dim+1)
+  {}
+
+
+  template<int dim>
+  FlowFunction<dim>::~FlowFunction()
+  {}
+
+
+  template<int dim>
+  void
+  FlowFunction<dim>::pressure_adjustment(double p)
+  {
+    mean_pressure = p;
+  }
+
+
+  template<int dim>
+  void FlowFunction<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> >   &values) const
+  {
+    const unsigned int n_points = points.size();
+    Assert(values.size() == n_points, ExcDimensionMismatch(values.size(), n_points));
+
+    // guard access to the aux_*
+    // variables in multithread mode
+    Threads::Mutex::ScopedLock lock (mutex);
+
+    for (unsigned int d=0; d<dim+1; ++d)
+      aux_values[d].resize(n_points);
+    vector_values(points, aux_values);
+
+    for (unsigned int k=0; k<n_points; ++k)
+      {
+        Assert(values[k].size() == dim+1, ExcDimensionMismatch(values[k].size(), dim+1));
+        for (unsigned int d=0; d<dim+1; ++d)
+          values[k](d) = aux_values[d][k];
+      }
+  }
+
+
+  template<int dim>
+  void FlowFunction<dim>::vector_value (
+    const Point<dim> &point,
+    Vector<double> &value) const
+  {
+    Assert(value.size() == dim+1, ExcDimensionMismatch(value.size(), dim+1));
+
+    const unsigned int n_points = 1;
+    std::vector<Point<dim> > points(1);
+    points[0] = point;
+
+    // guard access to the aux_*
+    // variables in multithread mode
+    Threads::Mutex::ScopedLock lock (mutex);
+
+    for (unsigned int d=0; d<dim+1; ++d)
+      aux_values[d].resize(n_points);
+    vector_values(points, aux_values);
+
+    for (unsigned int d=0; d<dim+1; ++d)
+      value(d) = aux_values[d][0];
+  }
+
+
+  template<int dim>
+  double FlowFunction<dim>::value (
+    const Point<dim> &point,
+    const unsigned int comp) const
+  {
+    Assert(comp < dim+1, ExcIndexRange(comp, 0, dim+1));
+    const unsigned int n_points = 1;
+    std::vector<Point<dim> > points(1);
+    points[0] = point;
+
+    // guard access to the aux_*
+    // variables in multithread mode
+    Threads::Mutex::ScopedLock lock (mutex);
+
+    for (unsigned int d=0; d<dim+1; ++d)
+      aux_values[d].resize(n_points);
+    vector_values(points, aux_values);
+
+    return aux_values[comp][0];
+  }
+
+
+  template<int dim>
+  void FlowFunction<dim>::vector_gradient_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<Tensor<1,dim> > > &values) const
+  {
+    const unsigned int n_points = points.size();
+    Assert(values.size() == n_points, ExcDimensionMismatch(values.size(), n_points));
+
+    // guard access to the aux_*
+    // variables in multithread mode
+    Threads::Mutex::ScopedLock lock (mutex);
+
+    for (unsigned int d=0; d<dim+1; ++d)
+      aux_gradients[d].resize(n_points);
+    vector_gradients(points, aux_gradients);
+
+    for (unsigned int k=0; k<n_points; ++k)
+      {
+        Assert(values[k].size() == dim+1, ExcDimensionMismatch(values[k].size(), dim+1));
+        for (unsigned int d=0; d<dim+1; ++d)
+          values[k][d] = aux_gradients[d][k];
+      }
+  }
+
+
+  template<int dim>
+  void FlowFunction<dim>::vector_laplacian_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> > &values) const
+  {
+    const unsigned int n_points = points.size();
+    Assert(values.size() == n_points, ExcDimensionMismatch(values.size(), n_points));
+
+    // guard access to the aux_*
+    // variables in multithread mode
+    Threads::Mutex::ScopedLock lock (mutex);
+
+    for (unsigned int d=0; d<dim+1; ++d)
+      aux_values[d].resize(n_points);
+    vector_laplacians(points, aux_values);
+
+    for (unsigned int k=0; k<n_points; ++k)
+      {
+        Assert(values[k].size() == dim+1, ExcDimensionMismatch(values[k].size(), dim+1));
+        for (unsigned int d=0; d<dim+1; ++d)
+          values[k](d) = aux_values[d][k];
+      }
+  }
+
+
+  template<int dim>
+  std::size_t
+  FlowFunction<dim>::memory_consumption () const
+  {
+    Assert(false, ExcNotImplemented());
+    return 0;
+  }
+
+
+//----------------------------------------------------------------------//
+
+  template<int dim>
+  PoisseuilleFlow<dim>::PoisseuilleFlow(const double r,
+                                        const double Re)
+    :
+    radius(r), Reynolds(Re)
+  {
+    Assert(Reynolds != 0., ExcMessage("Reynolds number cannot be zero"));
+  }
+
+
+  template<int dim>
+  PoisseuilleFlow<dim>::~PoisseuilleFlow()
+  {}
+
+
+  template<int dim>
+  void PoisseuilleFlow<dim>::vector_values (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+    double stretch = 1./radius;
+
+    Assert(values.size() == dim+1, ExcDimensionMismatch(values.size(), dim+1));
+    for (unsigned int d=0; d<dim+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<dim> &p = points[k];
+        // First, compute the
+        // square of the distance to
+        // the x-axis divided by the
+        // radius.
+        double r2 = 0;
+        for (unsigned int d=1; d<dim; ++d)
+          r2 += p(d)*p(d)*stretch*stretch;
+
+        // x-velocity
+        values[0][k] = 1.-r2;
+        // other velocities
+        for (unsigned int d=1; d<dim; ++d)
+          values[d][k] = 0.;
+        // pressure
+        values[dim][k] = -2*(dim-1)*stretch*stretch*p(0)/Reynolds + this->mean_pressure;
+      }
+  }
+
+
+
+  template<int dim>
+  void PoisseuilleFlow<dim>::vector_gradients (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<Tensor<1,dim> > > &values) const
+  {
+    unsigned int n = points.size();
+    double stretch = 1./radius;
+
+    Assert(values.size() == dim+1, ExcDimensionMismatch(values.size(), dim+1));
+    for (unsigned int d=0; d<dim+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<dim> &p = points[k];
+        // x-velocity
+        values[0][k][0] = 0.;
+        for (unsigned int d=1; d<dim; ++d)
+          values[0][k][d] = -2.*p(d)*stretch*stretch;
+        // other velocities
+        for (unsigned int d=1; d<dim; ++d)
+          values[d][k] = 0.;
+        // pressure
+        values[dim][k][0] = -2*(dim-1)*stretch*stretch/Reynolds;
+        for (unsigned int d=1; d<dim; ++d)
+          values[dim][k][d] = 0.;
+      }
+  }
+
+
+
+  template<int dim>
+  void PoisseuilleFlow<dim>::vector_laplacians (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+    (void)n;
+    Assert(values.size() == dim+1, ExcDimensionMismatch(values.size(), dim+1));
+    for (unsigned int d=0; d<dim+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int d=0; d<values.size(); ++d)
+      for (unsigned int k=0; k<values[d].size(); ++k)
+        values[d][k] = 0.;
+  }
+
+//----------------------------------------------------------------------//
+
+  template<int dim>
+  StokesCosine<dim>::StokesCosine(const double nu, const double r)
+    :
+    viscosity(nu), reaction(r)
+  {}
+
+
+  template<int dim>
+  StokesCosine<dim>::~StokesCosine()
+  {}
+
+
+  template<int dim>
+  void
+  StokesCosine<dim>::set_parameters(const double nu, const double r)
+  {
+    viscosity = nu;
+    reaction = r;
+  }
+
+
+  template<int dim>
+  void StokesCosine<dim>::vector_values (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+
+    Assert(values.size() == dim+1, ExcDimensionMismatch(values.size(), dim+1));
+    for (unsigned int d=0; d<dim+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<dim> &p = points[k];
+        const double x = numbers::PI/2. * p(0);
+        const double y = numbers::PI/2. * p(1);
+        const double cx = cos(x);
+        const double cy = cos(y);
+        const double sx = sin(x);
+        const double sy = sin(y);
+
+        if (dim==2)
+          {
+            values[0][k] = cx*cx*cy*sy;
+            values[1][k] = -cx*sx*cy*cy;
+            values[2][k] = cx*sx*cy*sy + this->mean_pressure;
+          }
+        else if (dim==3)
+          {
+            const double z = numbers::PI/2. * p(2);
+            const double cz = cos(z);
+            const double sz = sin(z);
+
+            values[0][k] = cx*cx*cy*sy*cz*sz;
+            values[1][k] = cx*sx*cy*cy*cz*sz;
+            values[2][k] = -2.*cx*sx*cy*sy*cz*cz;
+            values[3][k] = cx*sx*cy*sy*cz*sz + this->mean_pressure;
+          }
+        else
+          {
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+
+  template<int dim>
+  void StokesCosine<dim>::vector_gradients (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<Tensor<1,dim> > > &values) const
+  {
+    unsigned int n = points.size();
+
+    Assert(values.size() == dim+1, ExcDimensionMismatch(values.size(), dim+1));
+    for (unsigned int d=0; d<dim+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<dim> &p = points[k];
+        const double x = numbers::PI/2. * p(0);
+        const double y = numbers::PI/2. * p(1);
+        const double c2x = cos(2*x);
+        const double c2y = cos(2*y);
+        const double s2x = sin(2*x);
+        const double s2y = sin(2*y);
+        const double cx2 = .5+.5*c2x;               // cos^2 x
+        const double cy2 = .5+.5*c2y;               // cos^2 y
+
+        if (dim==2)
+          {
+            values[0][k][0] = -.25*numbers::PI * s2x*s2y;
+            values[0][k][1] =  .5 *numbers::PI * cx2*c2y;
+            values[1][k][0] = -.5 *numbers::PI * c2x*cy2;
+            values[1][k][1] =  .25*numbers::PI * s2x*s2y;
+            values[2][k][0] =  .25*numbers::PI * c2x*s2y;
+            values[2][k][1] =  .25*numbers::PI * s2x*c2y;
+          }
+        else if (dim==3)
+          {
+            const double z = numbers::PI/2. * p(2);
+            const double c2z = cos(2*z);
+            const double s2z = sin(2*z);
+            const double cz2 = .5+.5*c2z;               // cos^2 z
+
+            values[0][k][0] = -.125*numbers::PI * s2x*s2y*s2z;
+            values[0][k][1] =  .25 *numbers::PI * cx2*c2y*s2z;
+            values[0][k][2] =  .25 *numbers::PI * cx2*s2y*c2z;
+
+            values[1][k][0] =  .25 *numbers::PI * c2x*cy2*s2z;
+            values[1][k][1] = -.125*numbers::PI * s2x*s2y*s2z;
+            values[1][k][2] =  .25 *numbers::PI * s2x*cy2*c2z;
+
+            values[2][k][0] = -.5  *numbers::PI * c2x*s2y*cz2;
+            values[2][k][1] = -.5  *numbers::PI * s2x*c2y*cz2;
+            values[2][k][2] =  .25 *numbers::PI * s2x*s2y*s2z;
+
+            values[3][k][0] = .125*numbers::PI * c2x*s2y*s2z;
+            values[3][k][1] = .125*numbers::PI * s2x*c2y*s2z;
+            values[3][k][2] = .125*numbers::PI * s2x*s2y*c2z;
+          }
+        else
+          {
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+
+  template<int dim>
+  void StokesCosine<dim>::vector_laplacians (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+
+    Assert(values.size() == dim+1, ExcDimensionMismatch(values.size(), dim+1));
+    for (unsigned int d=0; d<dim+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    if (reaction != 0.)
+      {
+        vector_values(points, values);
+        for (unsigned int d=0; d<dim; ++d)
+          for (unsigned int k=0; k<values[d].size(); ++k)
+            values[d][k] *= -reaction;
+      }
+    else
+      {
+        for (unsigned int d=0; d<dim; ++d)
+          for (unsigned int k=0; k<values[d].size(); ++k)
+            values[d][k] = 0.;
+      }
+
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<dim> &p = points[k];
+        const double x = numbers::PI/2. * p(0);
+        const double y = numbers::PI/2. * p(1);
+        const double c2x = cos(2*x);
+        const double c2y = cos(2*y);
+        const double s2x = sin(2*x);
+        const double s2y = sin(2*y);
+        const double pi2 = .25 * numbers::PI * numbers::PI;
+
+        if (dim==2)
+          {
+            values[0][k] += - viscosity*pi2 * (1.+2.*c2x) * s2y - numbers::PI/4. * c2x*s2y;
+            values[1][k] +=   viscosity*pi2 * s2x * (1.+2.*c2y) - numbers::PI/4. * s2x*c2y;
+            values[2][k] = 0.;
+          }
+        else if (dim==3)
+          {
+            const double z = numbers::PI * p(2);
+            const double c2z = cos(2*z);
+            const double s2z = sin(2*z);
+
+            values[0][k] += - .5*viscosity*pi2 * (1.+2.*c2x) * s2y * s2z - numbers::PI/8. * c2x * s2y * s2z;
+            values[1][k] +=   .5*viscosity*pi2 * s2x * (1.+2.*c2y) * s2z - numbers::PI/8. * s2x * c2y * s2z;
+            values[2][k] += - .5*viscosity*pi2 * s2x * s2y * (1.+2.*c2z) - numbers::PI/8. * s2x * s2y * c2z;
+            values[3][k] = 0.;
+          }
+        else
+          {
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+//----------------------------------------------------------------------//
+
+  const double StokesLSingularity::lambda = 0.54448373678246;
+
+  StokesLSingularity::StokesLSingularity()
+    :
+    omega (3./2.*numbers::PI),
+    coslo (cos(lambda *omega)),
+    lp(1.+lambda),
+    lm(1.-lambda)
+  {}
+
+
+  inline
+  double
+  StokesLSingularity::Psi(double phi) const
+  {
+    return coslo * (sin(lp*phi)/lp - sin(lm*phi)/lm)
+           - cos(lp*phi) + cos(lm*phi);
+  }
+
+
+  inline
+  double
+  StokesLSingularity::Psi_1(double phi) const
+  {
+    return coslo * (cos(lp*phi) - cos(lm*phi))
+           + lp*sin(lp*phi) - lm*sin(lm*phi);
+  }
+
+
+  inline
+  double
+  StokesLSingularity::Psi_2(double phi) const
+  {
+    return coslo * (lm*sin(lm*phi) - lp*sin(lp*phi))
+           + lp*lp*cos(lp*phi) - lm*lm*cos(lm*phi);
+  }
+
+
+  inline
+  double
+  StokesLSingularity::Psi_3(double phi) const
+  {
+    return coslo * (lm*lm*cos(lm*phi) - lp*lp*cos(lp*phi))
+           + lm*lm*lm*sin(lm*phi) - lp*lp*lp*sin(lp*phi);
+  }
+
+
+  inline
+  double
+  StokesLSingularity::Psi_4(double phi) const
+  {
+    return coslo * (lp*lp*lp*sin(lp*phi) - lm*lm*lm*sin(lm*phi))
+           + lm*lm*lm*lm*cos(lm*phi) - lp*lp*lp*lp*cos(lp*phi);
+  }
+
+
+  void StokesLSingularity::vector_values (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+
+    Assert(values.size() == 2+1, ExcDimensionMismatch(values.size(), 2+1));
+    for (unsigned int d=0; d<2+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<2> &p = points[k];
+        const double x = p(0);
+        const double y = p(1);
+
+        if ((x<0) || (y<0))
+          {
+            const double phi = std::atan2(y,-x)+numbers::PI;
+            const double r2 = x*x+y*y;
+            const double rl = pow(r2,lambda/2.);
+            const double rl1 = pow(r2,lambda/2.-.5);
+            values[0][k] = rl * (lp*sin(phi)*Psi(phi) + cos(phi)*Psi_1(phi));
+            values[1][k] = rl * (lp*cos(phi)*Psi(phi) - sin(phi)*Psi_1(phi));
+            values[2][k] = -rl1 * (lp*lp*Psi_1(phi) + Psi_3(phi)) / lm + this->mean_pressure;
+          }
+        else
+          {
+            for (unsigned int d=0; d<3; ++d)
+              values[d][k] = 0.;
+          }
+      }
+  }
+
+
+
+  void StokesLSingularity::vector_gradients (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<Tensor<1,2> > > &values) const
+  {
+    unsigned int n = points.size();
+
+    Assert(values.size() == 2+1, ExcDimensionMismatch(values.size(), 2+1));
+    for (unsigned int d=0; d<2+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<2> &p = points[k];
+        const double x = p(0);
+        const double y = p(1);
+
+        if ((x<0) || (y<0))
+          {
+            const double phi = std::atan2(y,-x)+numbers::PI;
+            const double r2 = x*x+y*y;
+            const double r = sqrt(r2);
+            const double rl = pow(r2,lambda/2.);
+            const double rl1 = pow(r2,lambda/2.-.5);
+            const double rl2 = pow(r2,lambda/2.-1.);
+            const double psi =Psi(phi);
+            const double psi1=Psi_1(phi);
+            const double psi2=Psi_2(phi);
+            const double cosp= cos(phi);
+            const double sinp= sin(phi);
+
+            // Derivatives of u with respect to r, phi
+            const double udr = lambda * rl1 * (lp*sinp*psi + cosp*psi1);
+            const double udp = rl * (lp*cosp*psi + lp*sinp*psi1 - sinp*psi1 + cosp*psi2);
+            // Derivatives of v with respect to r, phi
+            const double vdr = lambda * rl1 * (lp*cosp*psi - sinp*psi1);
+            const double vdp = rl * (lp*(cosp*psi1 - sinp*psi) - cosp*psi1 - sinp*psi2);
+            // Derivatives of p with respect to r, phi
+            const double pdr = -(lambda-1.) * rl2 * (lp*lp*psi1+Psi_3(phi)) / lm;
+            const double pdp = -rl1 * (lp*lp*psi2+Psi_4(phi)) / lm;
+            values[0][k][0] = cosp*udr - sinp/r*udp;
+            values[0][k][1] = - sinp*udr - cosp/r*udp;
+            values[1][k][0] = cosp*vdr - sinp/r*vdp;
+            values[1][k][1] = - sinp*vdr - cosp/r*vdp;
+            values[2][k][0] = cosp*pdr - sinp/r*pdp;
+            values[2][k][1] = - sinp*pdr - cosp/r*pdp;
+          }
+        else
+          {
+            for (unsigned int d=0; d<3; ++d)
+              values[d][k] = 0.;
+          }
+      }
+  }
+
+
+
+  void StokesLSingularity::vector_laplacians (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+    (void)n;
+    Assert(values.size() == 2+1, ExcDimensionMismatch(values.size(), 2+1));
+    for (unsigned int d=0; d<2+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int d=0; d<values.size(); ++d)
+      for (unsigned int k=0; k<values[d].size(); ++k)
+        values[d][k] = 0.;
+  }
+
+
+//----------------------------------------------------------------------//
+
+  Kovasznay::Kovasznay(double Re, bool stokes)
+    :
+    Reynolds(Re),
+    stokes(stokes)
+  {
+    long double r2 = Reynolds/2.;
+    long double b = 4*numbers::PI*numbers::PI;
+    long double l = -b/(r2+std::sqrt(r2*r2+b));
+    lbda = l;
+    // mean pressure for a domain
+    // spreading from -.5 to 1.5 in
+    // x-direction
+    p_average = 1/(8*l)*(std::exp(3.*l)-std::exp(-l));
+  }
+
+
+  Kovasznay::~Kovasznay()
+  {}
+
+
+  void Kovasznay::vector_values (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+
+    Assert(values.size() == 2+1, ExcDimensionMismatch(values.size(), 2+1));
+    for (unsigned int d=0; d<2+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    for (unsigned int k=0; k<n; ++k)
+      {
+        const Point<2> &p = points[k];
+        const double x = p(0);
+        const double y = 2. * numbers::PI * p(1);
+        const double elx = std::exp(lbda*x);
+
+        values[0][k] = 1. - elx * cos(y);
+        values[1][k] = .5 / numbers::PI * lbda * elx * sin(y);
+        values[2][k] = -.5 * elx * elx + p_average + this->mean_pressure;
+      }
+  }
+
+
+  void Kovasznay::vector_gradients (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<Tensor<1,2> > > &gradients) const
+  {
+    unsigned int n = points.size();
+
+    Assert (gradients.size() == 3, ExcDimensionMismatch(gradients.size(), 3));
+    Assert (gradients[0].size() == n,
+            ExcDimensionMismatch(gradients[0].size(), n));
+
+    for (unsigned int i=0; i<n; ++i)
+      {
+        const double x = points[i](0);
+        const double y = points[i](1);
+
+        const double elx = std::exp(lbda*x);
+        const double cy = cos(2*numbers::PI*y);
+        const double sy = sin(2*numbers::PI*y);
+
+        // u
+        gradients[0][i][0] = -lbda*elx*cy;
+        gradients[0][i][1] = 2. * numbers::PI*elx*sy;
+        gradients[1][i][0] = lbda*lbda/(2*numbers::PI)*elx*sy;
+        gradients[1][i][1] =lbda*elx*cy;
+        // p
+        gradients[2][i][0] = -lbda*elx*elx;
+        gradients[2][i][1] = 0.;
+      }
+  }
+
+
+
+  void Kovasznay::vector_laplacians (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<double> > &values) const
+  {
+    unsigned int n = points.size();
+    Assert(values.size() == 2+1, ExcDimensionMismatch(values.size(), 2+1));
+    for (unsigned int d=0; d<2+1; ++d)
+      Assert(values[d].size() == n, ExcDimensionMismatch(values[d].size(), n));
+
+    if (stokes)
+      {
+        const double zp = 2. * numbers::PI;
+        for (unsigned int k=0; k<n; ++k)
+          {
+            const Point<2> &p = points[k];
+            const double x = p(0);
+            const double y = zp * p(1);
+            const double elx = std::exp(lbda*x);
+            const double u  = 1. - elx * cos(y);
+            const double ux = -lbda * elx * cos(y);
+            const double uy = elx * zp * sin(y);
+            const double v  = lbda/zp * elx * sin(y);
+            const double vx = lbda*lbda/zp * elx * sin(y);
+            const double vy = zp*lbda/zp * elx * cos(y);
+
+            values[0][k] = u*ux+v*uy;
+            values[1][k] = u*vx+v*vy;
+            values[2][k] = 0.;
+          }
+      }
+    else
+      {
+        for (unsigned int d=0; d<values.size(); ++d)
+          for (unsigned int k=0; k<values[d].size(); ++k)
+            values[d][k] = 0.;
+      }
+  }
+
+  double
+  Kovasznay::lambda () const
+  {
+    return lbda;
+  }
+
+
+
+  template class FlowFunction<2>;
+  template class FlowFunction<3>;
+  template class PoisseuilleFlow<2>;
+  template class PoisseuilleFlow<3>;
+  template class StokesCosine<2>;
+  template class StokesCosine<3>;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/function.cc b/source/base/function.cc
new file mode 100644
index 0000000..ffe3fb2
--- /dev/null
+++ b/source/base/function.cc
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/function.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// explicit instantiations
+#include "function.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/function.inst.in b/source/base/function.inst.in
new file mode 100644
index 0000000..26711a7
--- /dev/null
+++ b/source/base/function.inst.in
@@ -0,0 +1,37 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (S : REAL_SCALARS; dim : SPACE_DIMENSIONS)
+{
+  template class Function<dim, S>;
+  template class ZeroFunction<dim, S>;
+  template class ConstantFunction<dim, S>;
+  template class ComponentSelectFunction<dim, S>;
+  template class ScalarFunctionFromFunctionObject<dim, S>;
+  template class VectorFunctionFromScalarFunctionObject<dim, S>;
+  template class VectorFunctionFromTensorFunction<dim, S>;
+}
+
+for (S : COMPLEX_SCALARS; dim : SPACE_DIMENSIONS)
+{
+  template class Function<dim, S>;
+  template class ZeroFunction<dim, S>;
+  template class ConstantFunction<dim, S>;
+  template class ComponentSelectFunction<dim, S>;
+  template class ScalarFunctionFromFunctionObject<dim, S>;
+  template class VectorFunctionFromScalarFunctionObject<dim, S>;
+  template class VectorFunctionFromTensorFunction<dim, S>;
+}
diff --git a/source/base/function_derivative.cc b/source/base/function_derivative.cc
new file mode 100644
index 0000000..8395d70
--- /dev/null
+++ b/source/base/function_derivative.cc
@@ -0,0 +1,225 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/point.h>
+#include <deal.II/base/function_derivative.h>
+#include <deal.II/lac/vector.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim>
+FunctionDerivative<dim>::FunctionDerivative (const Function<dim> &f,
+                                             const Point<dim>    &dir,
+                                             const double         h)
+  :
+  AutoDerivativeFunction<dim> (h, f.n_components, f.get_time()),
+  f(f),
+  h(h),
+  incr(1, h *dir)
+{
+  set_formula();
+}
+
+
+
+template <int dim>
+FunctionDerivative<dim>::FunctionDerivative (const Function<dim> &f,
+                                             const std::vector<Point<dim> > &dir,
+                                             const double h)
+  :
+  AutoDerivativeFunction<dim> (h, f.n_components, f.get_time()),
+  f(f),
+  h(h),
+  incr(dir.size())
+{
+  for (unsigned int i=0; i<incr.size (); ++i)
+    incr[i] = h*dir[i];
+  set_formula();
+}
+
+
+
+template <int dim>
+void
+FunctionDerivative<dim>::set_formula (typename AutoDerivativeFunction<dim>::DifferenceFormula form)
+{
+  formula = form;
+}
+
+
+
+template <int dim>
+void
+FunctionDerivative<dim>::set_h (const double new_h)
+{
+  for (unsigned int i=0; i<incr.size (); ++i)
+    incr[i] *= new_h/h;
+  h = new_h;
+}
+
+
+
+template <int dim>
+double
+FunctionDerivative<dim>::value (const Point<dim>   &p,
+                                const unsigned int  component) const
+{
+  Assert (incr.size() == 1,
+          ExcMessage ("FunctionDerivative was not initialized for constant direction"));
+
+  switch (formula)
+    {
+    case AutoDerivativeFunction<dim>::Euler:
+      return (f.value(p+incr[0], component)-f.value(p-incr[0], component))/(2*h);
+    case AutoDerivativeFunction<dim>::UpwindEuler:
+      return (f.value(p, component)-f.value(p-incr[0], component))/h;
+    case AutoDerivativeFunction<dim>::FourthOrder:
+      return (-f.value(p+2*incr[0], component) + 8*f.value(p+incr[0], component)
+              -8*f.value(p-incr[0], component) + f.value(p-2*incr[0], component))/(12*h);
+    default:
+      Assert(false, ExcInvalidFormula());
+    }
+  return 0.;
+}
+
+
+
+template <int dim>
+void
+FunctionDerivative<dim>::vector_value (
+  const Point<dim>   &p,
+  Vector<double> &result) const
+{
+  Assert (incr.size() == 1,
+          ExcMessage ("FunctionDerivative was not initialized for constant direction"));
+  Vector<double> aux(result.size());
+
+  // Formulas are the same as in
+  // value, but here we have to use
+  // Vector arithmetic
+  switch (formula)
+    {
+    case AutoDerivativeFunction<dim>::Euler:
+      f.vector_value(p+incr[0], result);
+      f.vector_value(p-incr[0], aux);
+      result.sadd(1./(2*h), -1./(2*h), aux);
+      return;
+    case AutoDerivativeFunction<dim>::UpwindEuler:
+      f.vector_value(p, result);
+      f.vector_value(p-incr[0], aux);
+      result.sadd(1./h, -1./h, aux);
+      return;
+    case AutoDerivativeFunction<dim>::FourthOrder:
+      f.vector_value(p-2*incr[0], result);
+      f.vector_value(p+2*incr[0], aux);
+      result.add(-1., aux);
+      f.vector_value(p-incr[0], aux);
+      result.add(-8., aux);
+      f.vector_value(p+incr[0], aux);
+      result.add(8., aux);
+      result/=(12.*h);
+      return;
+    default:
+      Assert(false, ExcInvalidFormula());
+    }
+}
+
+
+
+template <int dim>
+void
+FunctionDerivative<dim>::value_list (const std::vector<Point<dim> > &points,
+                                     std::vector<double>            &values,
+                                     const unsigned int              component) const
+{
+  const unsigned int n = points.size();
+  const bool variable_direction = (incr.size() == 1) ? false : true;
+  if (variable_direction)
+    Assert (incr.size() == points.size(),
+            ExcDimensionMismatch(incr.size(), points.size()));
+
+  // Vector of auxiliary values
+  std::vector<double> aux(n);
+  // Vector of auxiliary points
+  std::vector<Point<dim> > paux(n);
+  // Use the same formulas as in
+  // value, but with vector
+  // arithmetic
+  switch (formula)
+    {
+    case AutoDerivativeFunction<dim>::Euler:
+      for (unsigned int i=0; i<n; ++i)
+        paux[i] = points[i]+incr[(variable_direction) ? i : 0U];
+      f.value_list(paux, values, component);
+      for (unsigned int i=0; i<n; ++i)
+        paux[i] = points[i]-incr[(variable_direction) ? i : 0U];
+      f.value_list(paux, aux, component);
+      for (unsigned int i=0; i<n; ++i)
+        values[i] = (values[i]-aux[i])/(2*h);
+      return;
+    case AutoDerivativeFunction<dim>::UpwindEuler:
+      f.value_list(points, values, component);
+      for (unsigned int i=0; i<n; ++i)
+        paux[i] = points[i]-incr[(variable_direction) ? i : 0U];
+      f.value_list(paux, aux, component);
+      for (unsigned int i=0; i<n; ++i)
+        values[i] = (values[i]-aux[i])/h;
+      return;
+    case AutoDerivativeFunction<dim>::FourthOrder:
+      for (unsigned int i=0; i<n; ++i)
+        paux[i] = points[i]-2*incr[(variable_direction) ? i : 0U];
+      f.value_list(paux, values, component);
+      for (unsigned int i=0; i<n; ++i)
+        paux[i] = points[i]+2*incr[(variable_direction) ? i : 0U];
+      f.value_list(paux, aux, component);
+      for (unsigned int i=0; i<n; ++i)
+        values[i] -= aux[i];
+      for (unsigned int i=0; i<n; ++i)
+        paux[i] = points[i]+incr[(variable_direction) ? i : 0U];
+      f.value_list(paux, aux, component);
+      for (unsigned int i=0; i<n; ++i)
+        values[i] += 8.*aux[i];
+      for (unsigned int i=0; i<n; ++i)
+        paux[i] = points[i]-incr[(variable_direction) ? i : 0U];
+      f.value_list(paux, aux, component);
+      for (unsigned int i=0; i<n; ++i)
+        values[i] = (values[i] - 8.*aux[i])/(12*h);
+      return;
+    default:
+      Assert(false, ExcInvalidFormula());
+    }
+}
+
+
+
+template <int dim>
+std::size_t
+FunctionDerivative<dim>::memory_consumption () const
+{
+  // only simple data elements, so
+  // use sizeof operator
+  return sizeof (*this);
+}
+
+
+
+// explicit instantiations
+template class FunctionDerivative<1>;
+template class FunctionDerivative<2>;
+template class FunctionDerivative<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/function_lib.cc b/source/base/function_lib.cc
new file mode 100644
index 0000000..b276365
--- /dev/null
+++ b/source/base/function_lib.cc
@@ -0,0 +1,2760 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/function_lib.h>
+#include <deal.II/base/function_bessel.h>
+#include <deal.II/lac/vector.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// in strict ANSI C mode, the following constants are not defined by
+// default, so we do it ourselves
+#ifndef M_PI
+#  define       M_PI            3.14159265358979323846
+#endif
+
+#ifndef M_PI_2
+#  define       M_PI_2          1.57079632679489661923
+#endif
+
+
+
+namespace Functions
+{
+
+
+  template<int dim>
+  double
+  SquareFunction<dim>::value (const Point<dim>   &p,
+                              const unsigned int) const
+  {
+    return p.square();
+  }
+
+
+  template<int dim>
+  void
+  SquareFunction<dim>::vector_value (const Point<dim>   &p,
+                                     Vector<double>     &values) const
+  {
+    AssertDimension(values.size(), 1);
+    values(0) = p.square();
+  }
+
+
+  template<int dim>
+  void
+  SquareFunction<dim>::value_list (const std::vector<Point<dim> > &points,
+                                   std::vector<double>            &values,
+                                   const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        values[i] = p.square();
+      }
+  }
+
+
+  template<int dim>
+  double
+  SquareFunction<dim>::laplacian (const Point<dim> &,
+                                  const unsigned int) const
+  {
+    return 2*dim;
+  }
+
+
+  template<int dim>
+  void
+  SquareFunction<dim>::laplacian_list (const std::vector<Point<dim> > &points,
+                                       std::vector<double>            &values,
+                                       const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = 2*dim;
+  }
+
+
+
+  template<int dim>
+  Tensor<1,dim>
+  SquareFunction<dim>::gradient (const Point<dim>   &p,
+                                 const unsigned int) const
+  {
+    return p*2.;
+  }
+
+
+  template<int dim>
+  void
+  SquareFunction<dim>::vector_gradient (
+    const Point<dim> &p,
+    std::vector<Tensor<1,dim> > &values) const
+  {
+    AssertDimension(values.size(), 1);
+    values[0] = p*2.;
+  }
+
+
+
+  template<int dim>
+  void
+  SquareFunction<dim>::gradient_list (const std::vector<Point<dim> > &points,
+                                      std::vector<Tensor<1,dim> >    &gradients,
+                                      const unsigned int) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      gradients[i] = static_cast<Tensor<1,dim> >(points[i])*2;
+  }
+
+
+//////////////////////////////////////////////////////////////////////
+
+
+  template<int dim>
+  double
+  Q1WedgeFunction<dim>::value (const Point<dim>   &p,
+                               const unsigned int) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    return p(0)*p(1);
+  }
+
+
+
+  template<int dim>
+  void
+  Q1WedgeFunction<dim>::value_list (const std::vector<Point<dim> > &points,
+                                    std::vector<double>            &values,
+                                    const unsigned int) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        values[i] = p(0)*p(1);
+      }
+  }
+
+
+  template<int dim>
+  void
+  Q1WedgeFunction<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> > &values) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+    Assert(values[0].size() == 1, ExcDimensionMismatch(values[0].size(), 1));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        values[i](0) = p(0)*p(1);
+      }
+  }
+
+
+  template<int dim>
+  double
+  Q1WedgeFunction<dim>::laplacian (const Point<dim> &,
+                                   const unsigned int) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    return 0.;
+  }
+
+
+  template<int dim>
+  void
+  Q1WedgeFunction<dim>::laplacian_list (const std::vector<Point<dim> > &points,
+                                        std::vector<double>            &values,
+                                        const unsigned int) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = 0.;
+  }
+
+
+
+  template<int dim>
+  Tensor<1,dim>
+  Q1WedgeFunction<dim>::gradient (const Point<dim>   &p,
+                                  const unsigned int) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    Tensor<1,dim> erg;
+    erg[0] = p(1);
+    erg[1] = p(0);
+    return erg;
+  }
+
+
+
+  template<int dim>
+  void
+  Q1WedgeFunction<dim>::gradient_list (const std::vector<Point<dim> > &points,
+                                       std::vector<Tensor<1,dim> >    &gradients,
+                                       const unsigned int) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        gradients[i][0] = points[i](1);
+        gradients[i][1] = points[i](0);
+      }
+  }
+
+
+  template<int dim>
+  void
+  Q1WedgeFunction<dim>::vector_gradient_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<Tensor<1,dim> > > &gradients) const
+  {
+    Assert (dim>=2, ExcInternalError());
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+    Assert(gradients[0].size() == 1,
+           ExcDimensionMismatch(gradients[0].size(), 1));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        gradients[i][0][0] = points[i](1);
+        gradients[i][0][1] = points[i](0);
+      }
+  }
+
+
+//////////////////////////////////////////////////////////////////////
+
+
+  template<int dim>
+  PillowFunction<dim>::PillowFunction (const double offset)
+    :
+    offset(offset)
+  {}
+
+
+  template<int dim>
+  double
+  PillowFunction<dim>::value (const Point<dim>   &p,
+                              const unsigned int) const
+  {
+    switch (dim)
+      {
+      case 1:
+        return 1.-p(0)*p(0)+offset;
+      case 2:
+        return (1.-p(0)*p(0))*(1.-p(1)*p(1))+offset;
+      case 3:
+        return (1.-p(0)*p(0))*(1.-p(1)*p(1))*(1.-p(2)*p(2))+offset;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return 0.;
+  }
+
+  template<int dim>
+  void
+  PillowFunction<dim>::value_list (const std::vector<Point<dim> > &points,
+                                   std::vector<double>            &values,
+                                   const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            values[i] = 1.-p(0)*p(0)+offset;
+            break;
+          case 2:
+            values[i] = (1.-p(0)*p(0))*(1.-p(1)*p(1))+offset;
+            break;
+          case 3:
+            values[i] = (1.-p(0)*p(0))*(1.-p(1)*p(1))*(1.-p(2)*p(2))+offset;
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+
+  template<int dim>
+  double
+  PillowFunction<dim>::laplacian (const Point<dim>   &p,
+                                  const unsigned int) const
+  {
+    switch (dim)
+      {
+      case 1:
+        return -2.;
+      case 2:
+        return -2.*((1.-p(0)*p(0))+(1.-p(1)*p(1)));
+      case 3:
+        return -2.*((1.-p(0)*p(0))*(1.-p(1)*p(1))
+                    +(1.-p(1)*p(1))*(1.-p(2)*p(2))
+                    +(1.-p(2)*p(2))*(1.-p(0)*p(0)));
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return 0.;
+  }
+
+  template<int dim>
+  void
+  PillowFunction<dim>::laplacian_list (const std::vector<Point<dim> > &points,
+                                       std::vector<double>            &values,
+                                       const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            values[i] = -2.;
+            break;
+          case 2:
+            values[i] = -2.*((1.-p(0)*p(0))+(1.-p(1)*p(1)));
+            break;
+          case 3:
+            values[i] = -2.*((1.-p(0)*p(0))*(1.-p(1)*p(1))
+                             +(1.-p(1)*p(1))*(1.-p(2)*p(2))
+                             +(1.-p(2)*p(2))*(1.-p(0)*p(0)));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+  template<int dim>
+  Tensor<1,dim>
+  PillowFunction<dim>::gradient (const Point<dim>   &p,
+                                 const unsigned int) const
+  {
+    Tensor<1,dim> result;
+    switch (dim)
+      {
+      case 1:
+        result[0] = -2.*p(0);
+        break;
+      case 2:
+        result[0] = -2.*p(0)*(1.-p(1)*p(1));
+        result[1] = -2.*p(1)*(1.-p(0)*p(0));
+        break;
+      case 3:
+        result[0] = -2.*p(0)*(1.-p(1)*p(1))*(1.-p(2)*p(2));
+        result[1] = -2.*p(1)*(1.-p(0)*p(0))*(1.-p(2)*p(2));
+        result[2] = -2.*p(2)*(1.-p(0)*p(0))*(1.-p(1)*p(1));
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return result;
+  }
+
+  template<int dim>
+  void
+  PillowFunction<dim>::gradient_list (const std::vector<Point<dim> > &points,
+                                      std::vector<Tensor<1,dim> >    &gradients,
+                                      const unsigned int) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            gradients[i][0] = -2.*p(0);
+            break;
+          case 2:
+            gradients[i][0] = -2.*p(0)*(1.-p(1)*p(1));
+            gradients[i][1] = -2.*p(1)*(1.-p(0)*p(0));
+            break;
+          case 3:
+            gradients[i][0] = -2.*p(0)*(1.-p(1)*p(1))*(1.-p(2)*p(2));
+            gradients[i][1] = -2.*p(1)*(1.-p(0)*p(0))*(1.-p(2)*p(2));
+            gradients[i][2] = -2.*p(2)*(1.-p(0)*p(0))*(1.-p(1)*p(1));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+  template <int dim>
+  CosineFunction<dim>::CosineFunction (const unsigned int n_components)
+    :
+    Function<dim> (n_components)
+  {}
+
+
+
+  template<int dim>
+  double
+  CosineFunction<dim>::value (const Point<dim>   &p,
+                              const unsigned int) const
+  {
+    switch (dim)
+      {
+      case 1:
+        return std::cos(M_PI_2*p(0));
+      case 2:
+        return std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+      case 3:
+        return std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return 0.;
+  }
+
+  template<int dim>
+  void
+  CosineFunction<dim>::value_list (const std::vector<Point<dim> > &points,
+                                   std::vector<double>            &values,
+                                   const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = value(points[i]);
+  }
+
+
+  template<int dim>
+  void
+  CosineFunction<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> >   &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const double v = value(points[i]);
+        for (unsigned int k=0; k<values[i].size(); ++k)
+          values[i](k) = v;
+      }
+  }
+
+
+  template<int dim>
+  double
+  CosineFunction<dim>::laplacian (const Point<dim>   &p,
+                                  const unsigned int) const
+  {
+    switch (dim)
+      {
+      case 1:
+        return -M_PI_2*M_PI_2* std::cos(M_PI_2*p(0));
+      case 2:
+        return -2*M_PI_2*M_PI_2* std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+      case 3:
+        return -3*M_PI_2*M_PI_2* std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return 0.;
+  }
+
+  template<int dim>
+  void
+  CosineFunction<dim>::laplacian_list (const std::vector<Point<dim> > &points,
+                                       std::vector<double>            &values,
+                                       const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = laplacian(points[i]);
+  }
+
+  template<int dim>
+  Tensor<1,dim>
+  CosineFunction<dim>::gradient (const Point<dim>   &p,
+                                 const unsigned int) const
+  {
+    Tensor<1,dim> result;
+    switch (dim)
+      {
+      case 1:
+        result[0] = -M_PI_2* std::sin(M_PI_2*p(0));
+        break;
+      case 2:
+        result[0] = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+        result[1] = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1));
+        break;
+      case 3:
+        result[0] = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+        result[1] = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+        result[2] = -M_PI_2* std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return result;
+  }
+
+  template<int dim>
+  void
+  CosineFunction<dim>::gradient_list (const std::vector<Point<dim> > &points,
+                                      std::vector<Tensor<1,dim> >    &gradients,
+                                      const unsigned int) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            gradients[i][0] = -M_PI_2* std::sin(M_PI_2*p(0));
+            break;
+          case 2:
+            gradients[i][0] = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+            gradients[i][1] = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1));
+            break;
+          case 3:
+            gradients[i][0] = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+            gradients[i][1] = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+            gradients[i][2] = -M_PI_2* std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+  template<int dim>
+  SymmetricTensor<2,dim>
+  CosineFunction<dim>::hessian (const Point<dim>   &p,
+                                const unsigned int) const
+  {
+    const double pi2 = M_PI_2*M_PI_2;
+
+    SymmetricTensor<2,dim> result;
+    switch (dim)
+      {
+      case 1:
+        result[0][0] = -pi2* std::cos(M_PI_2*p(0));
+        break;
+      case 2:
+        if (true)
+          {
+            const double coco = -pi2*std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+            const double sisi = pi2*std::sin(M_PI_2*p(0)) * std::sin(M_PI_2*p(1));
+            result[0][0] = coco;
+            result[1][1] = coco;
+            // for SymmetricTensor we assign [ij] and [ji] simultaneously:
+            result[0][1] = sisi;
+          }
+        break;
+      case 3:
+        if (true)
+          {
+            const double cococo = -pi2*std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+            const double sisico = pi2*std::sin(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+            const double sicosi = pi2*std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+            const double cosisi = pi2*std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+
+            result[0][0] = cococo;
+            result[1][1] = cococo;
+            result[2][2] = cococo;
+            // for SymmetricTensor we assign [ij] and [ji] simultaneously:
+            result[0][1] = sisico;
+            result[0][2] = sicosi;
+            result[1][2] = cosisi;
+          }
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return result;
+  }
+
+  template<int dim>
+  void
+  CosineFunction<dim>::hessian_list (const std::vector<Point<dim> >       &points,
+                                     std::vector<SymmetricTensor<2,dim> > &hessians,
+                                     const unsigned int) const
+  {
+    Assert (hessians.size() == points.size(),
+            ExcDimensionMismatch(hessians.size(), points.size()));
+
+    const double pi2 = M_PI_2*M_PI_2;
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            hessians[i][0][0] = -pi2* std::cos(M_PI_2*p(0));
+            break;
+          case 2:
+            if (true)
+              {
+                const double coco = -pi2*std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+                const double sisi = pi2*std::sin(M_PI_2*p(0)) * std::sin(M_PI_2*p(1));
+                hessians[i][0][0] = coco;
+                hessians[i][1][1] = coco;
+                // for SymmetricTensor we assign [ij] and [ji] simultaneously:
+                hessians[i][0][1] = sisi;
+              }
+            break;
+          case 3:
+            if (true)
+              {
+                const double cococo = -pi2*std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+                const double sisico = pi2*std::sin(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+                const double sicosi = pi2*std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+                const double cosisi = pi2*std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+
+                hessians[i][0][0] = cococo;
+                hessians[i][1][1] = cococo;
+                hessians[i][2][2] = cococo;
+                // for SymmetricTensor we assign [ij] and [ji] simultaneously:
+                hessians[i][0][1] = sisico;
+                hessians[i][0][2] = sicosi;
+                hessians[i][1][2] = cosisi;
+              }
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+  template <int dim>
+  CosineGradFunction<dim>::CosineGradFunction ()
+    :
+    Function<dim> (dim)
+  {}
+
+
+  template<int dim>
+  double
+  CosineGradFunction<dim>::value (
+    const Point<dim>   &p,
+    const unsigned int d) const
+  {
+    AssertIndexRange(d, dim);
+    const unsigned int d1 = (d+1) % dim;
+    const unsigned int d2 = (d+2) % dim;
+    switch (dim)
+      {
+      case 1:
+        return (-M_PI_2* std::sin(M_PI_2*p(0)));
+      case 2:
+        return (-M_PI_2* std::sin(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1)));
+      case 3:
+        return (-M_PI_2* std::sin(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1)) * std::cos(M_PI_2*p(d2)));
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return 0.;
+  }
+
+
+  template<int dim>
+  void
+  CosineGradFunction<dim>::vector_value (
+    const Point<dim> &p,
+    Vector<double> &result) const
+  {
+    AssertDimension(result.size(), dim);
+    switch (dim)
+      {
+      case 1:
+        result(0) = -M_PI_2* std::sin(M_PI_2*p(0));
+        break;
+      case 2:
+        result(0) = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+        result(1) = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1));
+        break;
+      case 3:
+        result(0) = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+        result(1) = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+        result(2) = -M_PI_2* std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+  }
+
+
+  template<int dim>
+  void
+  CosineGradFunction<dim>::value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<double> &values,
+    const unsigned int d) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+    AssertIndexRange(d, dim);
+    const unsigned int d1 = (d+1) % dim;
+    const unsigned int d2 = (d+2) % dim;
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            values[i] = -M_PI_2* std::sin(M_PI_2*p(d));
+            break;
+          case 2:
+            values[i] = -M_PI_2* std::sin(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1));
+            break;
+          case 3:
+            values[i] = -M_PI_2* std::sin(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1)) * std::cos(M_PI_2*p(d2));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+  template<int dim>
+  void
+  CosineGradFunction<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> >   &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            values[i](0) = -M_PI_2* std::sin(M_PI_2*p(0));
+            break;
+          case 2:
+            values[i](0) = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+            values[i](1) = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1));
+            break;
+          case 3:
+            values[i](0) = -M_PI_2* std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+            values[i](1) = -M_PI_2* std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+            values[i](2) = -M_PI_2* std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+  template<int dim>
+  double
+  CosineGradFunction<dim>::laplacian (
+    const Point<dim>   &p,
+    const unsigned int d) const
+  {
+    return -M_PI_2*M_PI_2* value(p,d);
+  }
+
+
+  template<int dim>
+  Tensor<1,dim>
+  CosineGradFunction<dim>::gradient (
+    const Point<dim> &p,
+    const unsigned int d) const
+  {
+    AssertIndexRange(d, dim);
+    const unsigned int d1 = (d+1) % dim;
+    const unsigned int d2 = (d+2) % dim;
+    const double pi2 = M_PI_2*M_PI_2;
+
+    Tensor<1,dim> result;
+    switch (dim)
+      {
+      case 1:
+        result[0] = -pi2* std::cos(M_PI_2*p(0));
+        break;
+      case 2:
+        result[d ] = -pi2*std::cos(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1));
+        result[d1] =  pi2*std::sin(M_PI_2*p(d)) * std::sin(M_PI_2*p(d1));
+        break;
+      case 3:
+        result[d ] = -pi2*std::cos(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1)) * std::cos(M_PI_2*p(d2));
+        result[d1] =  pi2*std::sin(M_PI_2*p(d)) * std::sin(M_PI_2*p(d1)) * std::cos(M_PI_2*p(d2));
+        result[d2] =  pi2*std::sin(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1)) * std::sin(M_PI_2*p(d2));
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return result;
+  }
+
+
+  template<int dim>
+  void
+  CosineGradFunction<dim>::gradient_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Tensor<1,dim> >    &gradients,
+    const unsigned int d) const
+  {
+    AssertIndexRange(d, dim);
+    const unsigned int d1 = (d+1) % dim;
+    const unsigned int d2 = (d+2) % dim;
+    const double pi2 = M_PI_2*M_PI_2;
+
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        Tensor<1,dim> &result = gradients[i];
+
+        switch (dim)
+          {
+          case 1:
+            result[0] = -pi2* std::cos(M_PI_2*p(0));
+            break;
+          case 2:
+            result[d ] = -pi2*std::cos(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1));
+            result[d1] =  pi2*std::sin(M_PI_2*p(d)) * std::sin(M_PI_2*p(d1));
+            break;
+          case 3:
+            result[d ] = -pi2*std::cos(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1)) * std::cos(M_PI_2*p(d2));
+            result[d1] =  pi2*std::sin(M_PI_2*p(d)) * std::sin(M_PI_2*p(d1)) * std::cos(M_PI_2*p(d2));
+            result[d2] =  pi2*std::sin(M_PI_2*p(d)) * std::cos(M_PI_2*p(d1)) * std::sin(M_PI_2*p(d2));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+  template<int dim>
+  void
+  CosineGradFunction<dim>::vector_gradient_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<Tensor<1,dim> > > &gradients) const
+  {
+    AssertVectorVectorDimension(gradients, points.size(), dim);
+    const double pi2 = M_PI_2*M_PI_2;
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            gradients[i][0][0] = -pi2* std::cos(M_PI_2*p(0));
+            break;
+          case 2:
+            if (true)
+              {
+                const double coco = -pi2*std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1));
+                const double sisi = pi2*std::sin(M_PI_2*p(0)) * std::sin(M_PI_2*p(1));
+                gradients[i][0][0] = coco;
+                gradients[i][1][1] = coco;
+                gradients[i][0][1] = sisi;
+                gradients[i][1][0] = sisi;
+              }
+            break;
+          case 3:
+            if (true)
+              {
+                const double cococo = -pi2*std::cos(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+                const double sisico = pi2*std::sin(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::cos(M_PI_2*p(2));
+                const double sicosi = pi2*std::sin(M_PI_2*p(0)) * std::cos(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+                const double cosisi = pi2*std::cos(M_PI_2*p(0)) * std::sin(M_PI_2*p(1)) * std::sin(M_PI_2*p(2));
+
+                gradients[i][0][0] = cococo;
+                gradients[i][1][1] = cococo;
+                gradients[i][2][2] = cococo;
+                gradients[i][0][1] = sisico;
+                gradients[i][1][0] = sisico;
+                gradients[i][0][2] = sicosi;
+                gradients[i][2][0] = sicosi;
+                gradients[i][1][2] = cosisi;
+                gradients[i][2][1] = cosisi;
+              }
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+
+//////////////////////////////////////////////////////////////////////
+
+  template<int dim>
+  double
+  ExpFunction<dim>::value (const Point<dim>   &p,
+                           const unsigned int) const
+  {
+    switch (dim)
+      {
+      case 1:
+        return std::exp(p(0));
+      case 2:
+        return std::exp(p(0)) * std::exp(p(1));
+      case 3:
+        return std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return 0.;
+  }
+
+  template<int dim>
+  void
+  ExpFunction<dim>::value_list (const std::vector<Point<dim> > &points,
+                                std::vector<double>            &values,
+                                const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            values[i] = std::exp(p(0));
+            break;
+          case 2:
+            values[i] = std::exp(p(0)) * std::exp(p(1));
+            break;
+          case 3:
+            values[i] = std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+  template<int dim>
+  double
+  ExpFunction<dim>::laplacian (const Point<dim>   &p,
+                               const unsigned int) const
+  {
+    switch (dim)
+      {
+      case 1:
+        return std::exp(p(0));
+      case 2:
+        return 2 * std::exp(p(0)) * std::exp(p(1));
+      case 3:
+        return 3 * std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return 0.;
+  }
+
+  template<int dim>
+  void
+  ExpFunction<dim>::laplacian_list (const std::vector<Point<dim> > &points,
+                                    std::vector<double>            &values,
+                                    const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            values[i] = std::exp(p(0));
+            break;
+          case 2:
+            values[i] = 2 * std::exp(p(0)) * std::exp(p(1));
+            break;
+          case 3:
+            values[i] = 3 * std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+  template<int dim>
+  Tensor<1,dim>
+  ExpFunction<dim>::gradient (const Point<dim>   &p,
+                              const unsigned int) const
+  {
+    Tensor<1,dim> result;
+    switch (dim)
+      {
+      case 1:
+        result[0] = std::exp(p(0));
+        break;
+      case 2:
+        result[0] = std::exp(p(0)) * std::exp(p(1));
+        result[1] = std::exp(p(0)) * std::exp(p(1));
+        break;
+      case 3:
+        result[0] = std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+        result[1] = std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+        result[2] = std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    return result;
+  }
+
+  template<int dim>
+  void
+  ExpFunction<dim>::gradient_list (const std::vector<Point<dim> > &points,
+                                   std::vector<Tensor<1,dim> >    &gradients,
+                                   const unsigned int) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        switch (dim)
+          {
+          case 1:
+            gradients[i][0] = std::exp(p(0));
+            break;
+          case 2:
+            gradients[i][0] = std::exp(p(0)) * std::exp(p(1));
+            gradients[i][1] = std::exp(p(0)) * std::exp(p(1));
+            break;
+          case 3:
+            gradients[i][0] = std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+            gradients[i][1] = std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+            gradients[i][2] = std::exp(p(0)) * std::exp(p(1)) * std::exp(p(2));
+            break;
+          default:
+            Assert(false, ExcNotImplemented());
+          }
+      }
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+
+  double
+  LSingularityFunction::value (const Point<2>   &p,
+                               const unsigned int) const
+  {
+    double x = p(0);
+    double y = p(1);
+
+    if ((x>=0) && (y>=0))
+      return 0.;
+
+    double phi = std::atan2(y,-x)+M_PI;
+    double r2 = x*x+y*y;
+
+    return std::pow(r2,1./3.) * std::sin(2./3.*phi);
+  }
+
+
+  void
+  LSingularityFunction::value_list (const std::vector<Point<2> > &points,
+                                    std::vector<double>            &values,
+                                    const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        double x = points[i](0);
+        double y = points[i](1);
+
+        if ((x>=0) && (y>=0))
+          values[i] = 0.;
+        else
+          {
+            double phi = std::atan2(y,-x)+M_PI;
+            double r2 = x*x+y*y;
+
+            values[i] = std::pow(r2,1./3.) * std::sin(2./3.*phi);
+          }
+      }
+  }
+
+
+  void
+  LSingularityFunction::vector_value_list (
+    const std::vector<Point<2> > &points,
+    std::vector<Vector<double> > &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        Assert (values[i].size() == 1,
+                ExcDimensionMismatch(values[i].size(), 1));
+        double x = points[i](0);
+        double y = points[i](1);
+
+        if ((x>=0) && (y>=0))
+          values[i](0) = 0.;
+        else
+          {
+            double phi = std::atan2(y,-x)+M_PI;
+            double r2 = x*x+y*y;
+
+            values[i](0) = std::pow(r2,1./3.) * std::sin(2./3.*phi);
+          }
+      }
+  }
+
+
+  double
+  LSingularityFunction::laplacian (const Point<2> &,
+                                   const unsigned int) const
+  {
+    return 0.;
+  }
+
+
+  void
+  LSingularityFunction::laplacian_list (const std::vector<Point<2> > &points,
+                                        std::vector<double>            &values,
+                                        const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = 0.;
+  }
+
+
+  Tensor<1,2>
+  LSingularityFunction::gradient (const Point<2>   &p,
+                                  const unsigned int) const
+  {
+    double x = p(0);
+    double y = p(1);
+    double phi = std::atan2(y,-x)+M_PI;
+    double r43 = std::pow(x*x+y*y,2./3.);
+
+    Tensor<1,2> result;
+    result[0] = 2./3.*(std::sin(2./3.*phi)*x + std::cos(2./3.*phi)*y)/r43;
+    result[1] = 2./3.*(std::sin(2./3.*phi)*y - std::cos(2./3.*phi)*x)/r43;
+    return result;
+  }
+
+
+  void
+  LSingularityFunction::gradient_list (const std::vector<Point<2> > &points,
+                                       std::vector<Tensor<1,2> >    &gradients,
+                                       const unsigned int) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<2> &p = points[i];
+        double x = p(0);
+        double y = p(1);
+        double phi = std::atan2(y,-x)+M_PI;
+        double r43 = std::pow(x*x+y*y,2./3.);
+
+        gradients[i][0] = 2./3.*(std::sin(2./3.*phi)*x + std::cos(2./3.*phi)*y)/r43;
+        gradients[i][1] = 2./3.*(std::sin(2./3.*phi)*y - std::cos(2./3.*phi)*x)/r43;
+      }
+  }
+
+
+  void
+  LSingularityFunction::vector_gradient_list (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<Tensor<1,2> > > &gradients) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        Assert(gradients[i].size() == 1,
+               ExcDimensionMismatch(gradients[i].size(), 1));
+        const Point<2> &p = points[i];
+        double x = p(0);
+        double y = p(1);
+        double phi = std::atan2(y,-x)+M_PI;
+        double r43 = std::pow(x*x+y*y,2./3.);
+
+        gradients[i][0][0] = 2./3.*(std::sin(2./3.*phi)*x + std::cos(2./3.*phi)*y)/r43;
+        gradients[i][0][1] = 2./3.*(std::sin(2./3.*phi)*y - std::cos(2./3.*phi)*x)/r43;
+      }
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+  LSingularityGradFunction::LSingularityGradFunction ()
+    :
+    Function<2> (2)
+  {}
+
+
+  double
+  LSingularityGradFunction::value (const Point<2>   &p,
+                                   const unsigned int d) const
+  {
+    AssertIndexRange(d,2);
+
+    const double x = p(0);
+    const double y = p(1);
+    const double phi = std::atan2(y,-x)+M_PI;
+    const double r43 = std::pow(x*x+y*y,2./3.);
+
+    return 2./3.*(std::sin(2./3.*phi)*p(d) +
+                  (d==0
+                   ? (std::cos(2./3.*phi)*y)
+                   : (-std::cos(2./3.*phi)*x)))
+           /r43;
+  }
+
+
+  void
+  LSingularityGradFunction::value_list (
+    const std::vector<Point<2> > &points,
+    std::vector<double> &values,
+    const unsigned int d) const
+  {
+    AssertIndexRange(d, 2);
+    AssertDimension(values.size(), points.size());
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<2> &p = points[i];
+        const double x = p(0);
+        const double y = p(1);
+        const double phi = std::atan2(y,-x)+M_PI;
+        const double r43 = std::pow(x*x+y*y,2./3.);
+
+        values[i] = 2./3.*(std::sin(2./3.*phi)*p(d) +
+                           (d==0
+                            ? (std::cos(2./3.*phi)*y)
+                            : (-std::cos(2./3.*phi)*x)))
+                    /r43;
+      }
+  }
+
+
+  void
+  LSingularityGradFunction::vector_value_list (
+    const std::vector<Point<2> > &points,
+    std::vector<Vector<double> > &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        AssertDimension(values[i].size(), 2);
+        const Point<2> &p = points[i];
+        const double x = p(0);
+        const double y = p(1);
+        const double phi = std::atan2(y,-x)+M_PI;
+        const double r43 = std::pow(x*x+y*y,2./3.);
+
+        values[i](0) = 2./3.*(std::sin(2./3.*phi)*x + std::cos(2./3.*phi)*y)/r43;
+        values[i](1) = 2./3.*(std::sin(2./3.*phi)*y - std::cos(2./3.*phi)*x)/r43;
+      }
+  }
+
+
+  double
+  LSingularityGradFunction::laplacian (const Point<2> &,
+                                       const unsigned int) const
+  {
+    return 0.;
+  }
+
+
+  void
+  LSingularityGradFunction::laplacian_list (const std::vector<Point<2> > &points,
+                                            std::vector<double>            &values,
+                                            const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = 0.;
+  }
+
+
+
+  Tensor<1,2>
+  LSingularityGradFunction::gradient (
+    const Point<2>   &/*p*/,
+    const unsigned int /*component*/) const
+  {
+    Assert(false, ExcNotImplemented());
+    return Tensor<1,2>();
+  }
+
+
+  void
+  LSingularityGradFunction::gradient_list (
+    const std::vector<Point<2> > & /*points*/,
+    std::vector<Tensor<1,2> > & /*gradients*/,
+    const unsigned int /*component*/) const
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+  void
+  LSingularityGradFunction::vector_gradient_list (
+    const std::vector<Point<2> > & /*points*/,
+    std::vector<std::vector<Tensor<1,2> > > & /*gradients*/) const
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+  template <int dim>
+  double
+  SlitSingularityFunction<dim>::value (
+    const Point<dim>   &p,
+    const unsigned int) const
+  {
+    double x = p(0);
+    double y = p(1);
+
+    double phi = std::atan2(x,y)+M_PI;
+    double r2 = x*x+y*y;
+
+    return std::pow(r2,.25) * std::sin(.5*phi);
+  }
+
+
+  template <int dim>
+  void
+  SlitSingularityFunction<dim>::value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<double>            &values,
+    const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        double x = points[i](0);
+        double y = points[i](1);
+
+        double phi = std::atan2(x,y)+M_PI;
+        double r2 = x*x+y*y;
+
+        values[i] = std::pow(r2,.25) * std::sin(.5*phi);
+      }
+  }
+
+
+  template <int dim>
+  void
+  SlitSingularityFunction<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> > &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        Assert (values[i].size() == 1,
+                ExcDimensionMismatch(values[i].size(), 1));
+
+        double x = points[i](0);
+        double y = points[i](1);
+
+        double phi = std::atan2(x,y)+M_PI;
+        double r2 = x*x+y*y;
+
+        values[i](0) = std::pow(r2,.25) * std::sin(.5*phi);
+      }
+  }
+
+
+  template <int dim>
+  double
+  SlitSingularityFunction<dim>::laplacian (const Point<dim> &,
+                                           const unsigned int) const
+  {
+    return 0.;
+  }
+
+
+  template <int dim>
+  void
+  SlitSingularityFunction<dim>::laplacian_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<double>            &values,
+    const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = 0.;
+  }
+
+
+  template <int dim>
+  Tensor<1,dim>
+  SlitSingularityFunction<dim>::gradient (const Point<dim>   &p,
+                                          const unsigned int) const
+  {
+    double x = p(0);
+    double y = p(1);
+    double phi = std::atan2(x,y)+M_PI;
+    double r64 = std::pow(x*x+y*y,3./4.);
+
+    Tensor<1,dim> result;
+    result[0] = 1./2.*(std::sin(1./2.*phi)*x + std::cos(1./2.*phi)*y)/r64;
+    result[1] = 1./2.*(std::sin(1./2.*phi)*y - std::cos(1./2.*phi)*x)/r64;
+    return result;
+  }
+
+
+  template <int dim>
+  void
+  SlitSingularityFunction<dim>::gradient_list (const std::vector<Point<dim> > &points,
+                                               std::vector<Tensor<1,dim> >    &gradients,
+                                               const unsigned int) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<dim> &p = points[i];
+        double x = p(0);
+        double y = p(1);
+        double phi = std::atan2(x,y)+M_PI;
+        double r64 = std::pow(x*x+y*y,3./4.);
+
+        gradients[i][0] = 1./2.*(std::sin(1./2.*phi)*x + std::cos(1./2.*phi)*y)/r64;
+        gradients[i][1] = 1./2.*(std::sin(1./2.*phi)*y - std::cos(1./2.*phi)*x)/r64;
+        for (unsigned int d=2; d<dim; ++d)
+          gradients[i][d] = 0.;
+      }
+  }
+
+  template <int dim>
+  void
+  SlitSingularityFunction<dim>::vector_gradient_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<std::vector<Tensor<1,dim> > > &gradients) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        Assert(gradients[i].size() == 1,
+               ExcDimensionMismatch(gradients[i].size(), 1));
+
+        const Point<dim> &p = points[i];
+        double x = p(0);
+        double y = p(1);
+        double phi = std::atan2(x,y)+M_PI;
+        double r64 = std::pow(x*x+y*y,3./4.);
+
+        gradients[i][0][0] = 1./2.*(std::sin(1./2.*phi)*x + std::cos(1./2.*phi)*y)/r64;
+        gradients[i][0][1] = 1./2.*(std::sin(1./2.*phi)*y - std::cos(1./2.*phi)*x)/r64;
+        for (unsigned int d=2; d<dim; ++d)
+          gradients[i][0][d] = 0.;
+      }
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+
+  double
+  SlitHyperSingularityFunction::value (const Point<2>   &p,
+                                       const unsigned int) const
+  {
+    double x = p(0);
+    double y = p(1);
+
+    double phi = std::atan2(x,y)+M_PI;
+    double r2 = x*x+y*y;
+
+    return std::pow(r2,.125) * std::sin(.25*phi);
+  }
+
+
+  void
+  SlitHyperSingularityFunction::value_list (
+    const std::vector<Point<2> > &points,
+    std::vector<double>            &values,
+    const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        double x = points[i](0);
+        double y = points[i](1);
+
+        double phi = std::atan2(x,y)+M_PI;
+        double r2 = x*x+y*y;
+
+        values[i] = std::pow(r2,.125) * std::sin(.25*phi);
+      }
+  }
+
+
+  void
+  SlitHyperSingularityFunction::vector_value_list (
+    const std::vector<Point<2> > &points,
+    std::vector<Vector<double> > &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        Assert(values[i].size() == 1,
+               ExcDimensionMismatch(values[i].size(), 1));
+
+        double x = points[i](0);
+        double y = points[i](1);
+
+        double phi = std::atan2(x,y)+M_PI;
+        double r2 = x*x+y*y;
+
+        values[i](0) = std::pow(r2,.125) * std::sin(.25*phi);
+      }
+  }
+
+
+  double
+  SlitHyperSingularityFunction::laplacian (
+    const Point<2> &,
+    const unsigned int) const
+  {
+    return 0.;
+  }
+
+
+  void
+  SlitHyperSingularityFunction::laplacian_list (
+    const std::vector<Point<2> > &points,
+    std::vector<double>            &values,
+    const unsigned int) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = 0.;
+  }
+
+
+  Tensor<1,2>
+  SlitHyperSingularityFunction::gradient (
+    const Point<2>   &p,
+    const unsigned int) const
+  {
+    double x = p(0);
+    double y = p(1);
+    double phi = std::atan2(x,y)+M_PI;
+    double r78 = std::pow(x*x+y*y,7./8.);
+
+
+    Tensor<1,2> result;
+    result[0] = 1./4.*(std::sin(1./4.*phi)*x + std::cos(1./4.*phi)*y)/r78;
+    result[1] = 1./4.*(std::sin(1./4.*phi)*y - std::cos(1./4.*phi)*x)/r78;
+    return result;
+  }
+
+
+  void
+  SlitHyperSingularityFunction::gradient_list (
+    const std::vector<Point<2> > &points,
+    std::vector<Tensor<1,2> >    &gradients,
+    const unsigned int) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        const Point<2> &p = points[i];
+        double x = p(0);
+        double y = p(1);
+        double phi = std::atan2(x,y)+M_PI;
+        double r78 = std::pow(x*x+y*y,7./8.);
+
+        gradients[i][0] = 1./4.*(std::sin(1./4.*phi)*x + std::cos(1./4.*phi)*y)/r78;
+        gradients[i][1] = 1./4.*(std::sin(1./4.*phi)*y - std::cos(1./4.*phi)*x)/r78;
+      }
+  }
+
+
+  void
+  SlitHyperSingularityFunction::vector_gradient_list (
+    const std::vector<Point<2> > &points,
+    std::vector<std::vector<Tensor<1,2> > > &gradients) const
+  {
+    Assert (gradients.size() == points.size(),
+            ExcDimensionMismatch(gradients.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      {
+        Assert(gradients[i].size() == 1,
+               ExcDimensionMismatch(gradients[i].size(), 1));
+
+        const Point<2> &p = points[i];
+        double x = p(0);
+        double y = p(1);
+        double phi = std::atan2(x,y)+M_PI;
+        double r78 = std::pow(x*x+y*y,7./8.);
+
+        gradients[i][0][0] = 1./4.*(std::sin(1./4.*phi)*x + std::cos(1./4.*phi)*y)/r78;
+        gradients[i][0][1] = 1./4.*(std::sin(1./4.*phi)*y - std::cos(1./4.*phi)*x)/r78;
+      }
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+  template<int dim>
+  JumpFunction<dim>::JumpFunction(const Point<dim> &direction,
+                                  const double      steepness)
+    :
+    direction(direction),
+    steepness(steepness)
+  {
+    switch (dim)
+      {
+      case 1:
+        angle = 0;
+        break;
+      case 2:
+        angle = std::atan2(direction(0),direction(1));
+        break;
+      case 3:
+        Assert(false, ExcNotImplemented());
+      }
+    sine = std::sin(angle);
+    cosine = std::cos(angle);
+  }
+
+
+
+  template<int dim>
+  double
+  JumpFunction<dim>::value (const Point<dim>   &p,
+                            const unsigned int) const
+  {
+    double x = steepness*(-cosine*p(0)+sine*p(1));
+    return -std::atan(x);
+  }
+
+
+
+  template<int dim>
+  void
+  JumpFunction<dim>::value_list (const std::vector<Point<dim> > &p,
+                                 std::vector<double>          &values,
+                                 const unsigned int) const
+  {
+    Assert (values.size() == p.size(),
+            ExcDimensionMismatch(values.size(), p.size()));
+
+    for (unsigned int i=0; i<p.size(); ++i)
+      {
+        double x = steepness*(-cosine*p[i](0)+sine*p[i](1));
+        values[i] = -std::atan(x);
+      }
+  }
+
+
+  template<int dim>
+  double
+  JumpFunction<dim>::laplacian (const Point<dim>   &p,
+                                const unsigned int) const
+  {
+    double x = steepness*(-cosine*p(0)+sine*p(1));
+    double r = 1+x*x;
+    return 2*steepness*steepness*x/(r*r);
+  }
+
+
+  template<int dim>
+  void
+  JumpFunction<dim>::laplacian_list (const std::vector<Point<dim> > &p,
+                                     std::vector<double>          &values,
+                                     const unsigned int) const
+  {
+    Assert (values.size() == p.size(),
+            ExcDimensionMismatch(values.size(), p.size()));
+
+    double f = 2*steepness*steepness;
+
+    for (unsigned int i=0; i<p.size(); ++i)
+      {
+        double x = steepness*(-cosine*p[i](0)+sine*p[i](1));
+        double r = 1+x*x;
+        values[i] = f*x/(r*r);
+      }
+  }
+
+
+
+  template<int dim>
+  Tensor<1,dim>
+  JumpFunction<dim>::gradient (const Point<dim>   &p,
+                               const unsigned int) const
+  {
+    double x = steepness*(-cosine*p(0)+sine*p(1));
+    double r = -steepness*(1+x*x);
+    Tensor<1,dim> erg;
+    erg[0] = cosine*r;
+    erg[1] = sine*r;
+    return erg;
+  }
+
+
+
+  template<int dim>
+  void
+  JumpFunction<dim>::gradient_list (const std::vector<Point<dim> > &p,
+                                    std::vector<Tensor<1,dim> >  &gradients,
+                                    const unsigned int) const
+  {
+    Assert (gradients.size() == p.size(),
+            ExcDimensionMismatch(gradients.size(), p.size()));
+
+    for (unsigned int i=0; i<p.size(); ++i)
+      {
+        double x = steepness*(cosine*p[i](0)+sine*p[i](1));
+        double r = -steepness*(1+x*x);
+        gradients[i][0] = cosine*r;
+        gradients[i][1] = sine*r;
+      }
+  }
+
+
+
+  template <int dim>
+  std::size_t
+  JumpFunction<dim>::memory_consumption () const
+  {
+    // only simple data elements, so
+    // use sizeof operator
+    return sizeof (*this);
+  }
+
+
+
+
+
+  /* ---------------------- FourierCosineFunction ----------------------- */
+
+
+  template <int dim>
+  FourierCosineFunction<dim>::
+  FourierCosineFunction (const Tensor<1,dim> &fourier_coefficients)
+    :
+    Function<dim> (1),
+    fourier_coefficients (fourier_coefficients)
+  {}
+
+
+
+  template <int dim>
+  double
+  FourierCosineFunction<dim>::value (const Point<dim>   &p,
+                                     const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+    return std::cos(fourier_coefficients * p);
+  }
+
+
+
+  template <int dim>
+  Tensor<1,dim>
+  FourierCosineFunction<dim>::gradient (const Point<dim>   &p,
+                                        const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+    return -fourier_coefficients * std::sin(fourier_coefficients * p);
+  }
+
+
+
+  template <int dim>
+  double
+  FourierCosineFunction<dim>::laplacian (const Point<dim>   &p,
+                                         const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+    return (fourier_coefficients * fourier_coefficients) * (-std::cos(fourier_coefficients * p));
+  }
+
+
+
+
+  /* ---------------------- FourierSineFunction ----------------------- */
+
+
+
+  template <int dim>
+  FourierSineFunction<dim>::
+  FourierSineFunction (const Tensor<1,dim> &fourier_coefficients)
+    :
+    Function<dim> (1),
+    fourier_coefficients (fourier_coefficients)
+  {}
+
+
+
+  template <int dim>
+  double
+  FourierSineFunction<dim>::value (const Point<dim>   &p,
+                                   const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+    return std::sin(fourier_coefficients * p);
+  }
+
+
+
+  template <int dim>
+  Tensor<1,dim>
+  FourierSineFunction<dim>::gradient (const Point<dim>   &p,
+                                      const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+    return fourier_coefficients * std::cos(fourier_coefficients * p);
+  }
+
+
+
+  template <int dim>
+  double
+  FourierSineFunction<dim>::laplacian (const Point<dim>   &p,
+                                       const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+    return (fourier_coefficients * fourier_coefficients) * (-std::sin(fourier_coefficients * p));
+  }
+
+
+
+
+  /* ---------------------- FourierSineSum ----------------------- */
+
+
+
+  template <int dim>
+  FourierSineSum<dim>::
+  FourierSineSum (const std::vector<Point<dim> > &fourier_coefficients,
+                  const std::vector<double>      &weights)
+    :
+    Function<dim> (1),
+    fourier_coefficients (fourier_coefficients),
+    weights (weights)
+  {
+    Assert (fourier_coefficients.size() > 0, ExcZero());
+    Assert (fourier_coefficients.size() == weights.size(),
+            ExcDimensionMismatch(fourier_coefficients.size(),
+                                 weights.size()));
+  }
+
+
+
+  template <int dim>
+  double
+  FourierSineSum<dim>::value (const Point<dim>   &p,
+                              const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    const unsigned int n = weights.size();
+    double sum = 0;
+    for (unsigned int s=0; s<n; ++s)
+      sum += weights[s] * std::sin(fourier_coefficients[s] * p);
+
+    return sum;
+  }
+
+
+
+  template <int dim>
+  Tensor<1,dim>
+  FourierSineSum<dim>::gradient (const Point<dim>   &p,
+                                 const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    const unsigned int n = weights.size();
+    Tensor<1,dim> sum;
+    for (unsigned int s=0; s<n; ++s)
+      sum += fourier_coefficients[s] * std::cos(fourier_coefficients[s] * p);
+
+    return sum;
+  }
+
+
+
+  template <int dim>
+  double
+  FourierSineSum<dim>::laplacian (const Point<dim>   &p,
+                                  const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    const unsigned int n = weights.size();
+    double sum = 0;
+    for (unsigned int s=0; s<n; ++s)
+      sum -= (fourier_coefficients[s] * fourier_coefficients[s]) * std::sin(fourier_coefficients[s] * p);
+
+    return sum;
+  }
+
+
+
+  /* ---------------------- FourierCosineSum ----------------------- */
+
+
+
+  template <int dim>
+  FourierCosineSum<dim>::
+  FourierCosineSum (const std::vector<Point<dim> > &fourier_coefficients,
+                    const std::vector<double>      &weights)
+    :
+    Function<dim> (1),
+    fourier_coefficients (fourier_coefficients),
+    weights (weights)
+  {
+    Assert (fourier_coefficients.size() > 0, ExcZero());
+    Assert (fourier_coefficients.size() == weights.size(),
+            ExcDimensionMismatch(fourier_coefficients.size(),
+                                 weights.size()));
+  }
+
+
+
+  template <int dim>
+  double
+  FourierCosineSum<dim>::value (const Point<dim>   &p,
+                                const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    const unsigned int n = weights.size();
+    double sum = 0;
+    for (unsigned int s=0; s<n; ++s)
+      sum += weights[s] * std::cos(fourier_coefficients[s] * p);
+
+    return sum;
+  }
+
+
+
+  template <int dim>
+  Tensor<1,dim>
+  FourierCosineSum<dim>::gradient (const Point<dim>   &p,
+                                   const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    const unsigned int n = weights.size();
+    Tensor<1,dim> sum;
+    for (unsigned int s=0; s<n; ++s)
+      sum -= fourier_coefficients[s] * std::sin(fourier_coefficients[s] * p);
+
+    return sum;
+  }
+
+
+
+  template <int dim>
+  double
+  FourierCosineSum<dim>::laplacian (const Point<dim>   &p,
+                                    const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    const unsigned int n = weights.size();
+    double sum = 0;
+    for (unsigned int s=0; s<n; ++s)
+      sum -= (fourier_coefficients[s] * fourier_coefficients[s]) * std::cos(fourier_coefficients[s] * p);
+
+    return sum;
+  }
+
+
+
+
+  /* ---------------------- Monomial ----------------------- */
+
+
+
+  template <int dim>
+  Monomial<dim>::
+  Monomial (const Tensor<1,dim> &exponents,
+            const unsigned int   n_components)
+    :
+    Function<dim> (n_components),
+    exponents (exponents)
+  {}
+
+
+
+  template <int dim>
+  double
+  Monomial<dim>::value (const Point<dim>   &p,
+                        const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component<this->n_components,
+            ExcIndexRange(component, 0, this->n_components)) ;
+
+    double prod = 1;
+    for (unsigned int s=0; s<dim; ++s)
+      {
+        if (p[s] < 0)
+          Assert(std::floor(exponents[s]) == exponents[s],
+                 ExcMessage("Exponentiation of a negative base number with "
+                            "a real exponent can't be performed."));
+        prod *= std::pow(p[s], exponents[s]);
+      }
+    return prod;
+  }
+
+
+
+  template <int dim>
+  void
+  Monomial<dim>::vector_value (const Point<dim>   &p,
+                               Vector<double>     &values) const
+  {
+    Assert (values.size() == this->n_components,
+            ExcDimensionMismatch (values.size(), this->n_components));
+
+    for (unsigned int i=0; i<values.size(); ++i)
+      values(i) = Monomial<dim>::value(p,i);
+  }
+
+
+
+  template <int dim>
+  Tensor<1,dim>
+  Monomial<dim>::gradient (const Point<dim>   &p,
+                           const unsigned int  component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1)) ;
+
+    Tensor<1,dim> r;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        double prod = 1;
+        for (unsigned int s=0; s<dim; ++s)
+          {
+            if ((s==d) && (exponents[s] == 0) && (p[s] == 0))
+              {
+                prod = 0;
+                break;
+              }
+            else
+              {
+                if (p[s] < 0)
+                  Assert(std::floor(exponents[s]) == exponents[s],
+                         ExcMessage("Exponentiation of a negative base number with "
+                                    "a real exponent can't be performed."));
+                prod *= (s==d
+                         ?
+                         exponents[s] * std::pow(p[s], exponents[s]-1)
+                         :
+                         std::pow(p[s], exponents[s]));
+              }
+          }
+        r[d] = prod;
+      }
+
+    return r;
+  }
+
+
+
+  template<int dim>
+  void
+  Monomial<dim>::value_list (const std::vector<Point<dim> > &points,
+                             std::vector<double>            &values,
+                             const unsigned int              component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = Monomial<dim>::value (points[i], component);
+  }
+
+
+  template <int dim>
+  Bessel1<dim>::Bessel1(
+    const unsigned int order,
+    const double wave_number,
+    const Point<dim> center)
+    :
+    order(order),
+    wave_number(wave_number),
+    center(center)
+  {}
+
+  template <int dim>
+  double
+  Bessel1<dim>::value(const Point<dim> &p, const unsigned int) const
+  {
+    Assert(dim==2, ExcNotImplemented());
+    const double r = p.distance(center);
+#ifdef DEAL_II_HAVE_JN
+    return jn(order, r*wave_number);
+#else
+    Assert(false, ExcMessage("The Bessel function jn was not found by CMake."));
+    return r;
+#endif
+  }
+
+
+  template <int dim>
+  void
+  Bessel1<dim>::value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<double>            &values,
+    const unsigned int) const
+  {
+    Assert(dim==2, ExcNotImplemented());
+    AssertDimension(points.size(), values.size());
+    for (unsigned int k=0; k<points.size(); ++k)
+      {
+#ifdef DEAL_II_HAVE_JN
+        const double r = points[k].distance(center);
+        values[k] = jn(order, r*wave_number);
+#else
+        Assert(false, ExcMessage("The Bessel function jn was not found by CMake."));
+#endif
+      }
+  }
+
+
+  template <int dim>
+  Tensor<1,dim>
+  Bessel1<dim>::gradient (const Point<dim>   &p,
+                          const unsigned int) const
+  {
+    Assert(dim==2, ExcNotImplemented());
+    const double r = p.distance(center);
+    const double co = (r==0.) ? 0. : (p(0)-center(0))/r;
+    const double si = (r==0.) ? 0. : (p(1)-center(1))/r;
+
+#ifdef DEAL_II_HAVE_JN
+    const double dJn = (order==0)
+                       ? (-jn(1, r*wave_number))
+                       : (.5*(jn(order-1, wave_number*r) -jn(order+1, wave_number*r)));
+    Tensor<1,dim> result;
+    result[0] = wave_number * co * dJn;
+    result[1] = wave_number * si * dJn;
+    return result;
+#else
+    Assert(false, ExcMessage("The Bessel function jn was not found by CMake."));
+    return Tensor<1,dim>();
+#endif
+  }
+
+
+
+  template <int dim>
+  void
+  Bessel1<dim>::gradient_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Tensor<1,dim> >    &gradients,
+    const unsigned int) const
+  {
+    Assert(dim==2, ExcNotImplemented());
+    AssertDimension(points.size(), gradients.size());
+    for (unsigned int k=0; k<points.size(); ++k)
+      {
+        const Point<dim> &p = points[k];
+        const double r = p.distance(center);
+        const double co = (r==0.) ? 0. : (p(0)-center(0))/r;
+        const double si = (r==0.) ? 0. : (p(1)-center(1))/r;
+
+#ifdef DEAL_II_HAVE_JN
+        const double dJn = (order==0)
+                           ? (-jn(1, r*wave_number))
+                           : (.5*(jn(order-1, wave_number*r) -jn(order+1, wave_number*r)));
+#else
+        const double dJn = 0.;
+        Assert(false, ExcMessage("The Bessel function jn was not found by CMake."));
+#endif
+        Tensor<1,dim> &result = gradients[k];
+        result[0] = wave_number * co * dJn;
+        result[1] = wave_number * si * dJn;
+      }
+  }
+
+
+
+  namespace
+  {
+    // interpolate a data value from a table where ix denotes
+    // the (lower) left endpoint of the interval to interpolate
+    // in, and p_unit denotes the point in unit coordinates to do so.
+    double interpolate (const Table<1,double> &data_values,
+                        const TableIndices<1> &ix,
+                        const Point<1>        &xi)
+    {
+      return ((1-xi[0])*data_values[ix[0]]
+              +
+              xi[0]*data_values[ix[0]+1]);
+    }
+
+    double interpolate (const Table<2,double> &data_values,
+                        const TableIndices<2> &ix,
+                        const Point<2>        &p_unit)
+    {
+      return (((1-p_unit[0])*data_values[ix[0]][ix[1]]
+               +
+               p_unit[0]*data_values[ix[0]+1][ix[1]])*(1-p_unit[1])
+              +
+              ((1-p_unit[0])*data_values[ix[0]][ix[1]+1]
+               +
+               p_unit[0]*data_values[ix[0]+1][ix[1]+1])*p_unit[1]);
+    }
+
+    double interpolate (const Table<3,double> &data_values,
+                        const TableIndices<3> &ix,
+                        const Point<3>        &p_unit)
+    {
+      return ((((1-p_unit[0])*data_values[ix[0]][ix[1]][ix[2]]
+                +
+                p_unit[0]*data_values[ix[0]+1][ix[1]][ix[2]])*(1-p_unit[1])
+               +
+               ((1-p_unit[0])*data_values[ix[0]][ix[1]+1][ix[2]]
+                +
+                p_unit[0]*data_values[ix[0]+1][ix[1]+1][ix[2]])*p_unit[1]) * (1-p_unit[2])
+              +
+              (((1-p_unit[0])*data_values[ix[0]][ix[1]][ix[2]+1]
+                +
+                p_unit[0]*data_values[ix[0]+1][ix[1]][ix[2]+1])*(1-p_unit[1])
+               +
+               ((1-p_unit[0])*data_values[ix[0]][ix[1]+1][ix[2]+1]
+                +
+                p_unit[0]*data_values[ix[0]+1][ix[1]+1][ix[2]+1])*p_unit[1]) * p_unit[2]);
+    }
+
+
+    // Interpolate the gradient of a data value from a table where ix
+    // denotes the lower left endpoint of the interval to interpolate
+    // in, p_unit denotes the point in unit coordinates, and dx
+    // denotes the width of the interval in each dimension.
+    Tensor<1,1> gradient_interpolate (const Table<1,double> &data_values,
+                                      const TableIndices<1> &ix,
+                                      const Point<1>        &p_unit,
+                                      const Point<1>        &dx)
+    {
+      (void)p_unit;
+      Tensor<1,1> grad;
+      grad[0] = (data_values[ix[0]+1] - data_values[ix[0]]) / dx[0];
+      return grad;
+    }
+
+
+    Tensor<1,2> gradient_interpolate (const Table<2,double> &data_values,
+                                      const TableIndices<2> &ix,
+                                      const Point<2>        &p_unit,
+                                      const Point<2>        &dx)
+    {
+      Tensor<1,2> grad;
+      double
+      u00 = data_values[ix[0]][ix[1]],
+      u01 = data_values[ix[0]+1][ix[1]],
+      u10 = data_values[ix[0]][ix[1]+1],
+      u11 = data_values[ix[0]+1][ix[1]+1];
+
+      grad[0] = ((1-p_unit[1])*(u01-u00) + p_unit[1]*(u11-u10))/dx[0];
+      grad[1] = ((1-p_unit[0])*(u10-u00) + p_unit[0]*(u11-u01))/dx[1];
+      return grad;
+    }
+
+
+    Tensor<1,3> gradient_interpolate (const Table<3,double> &data_values,
+                                      const TableIndices<3> &ix,
+                                      const Point<3>        &p_unit,
+                                      const Point<3>        &dx)
+    {
+      Tensor<1,3> grad;
+      double
+      u000 = data_values[ix[0]][ix[1]][ix[2]],
+      u001 = data_values[ix[0]+1][ix[1]][ix[2]],
+      u010 = data_values[ix[0]][ix[1]+1][ix[2]],
+      u100 = data_values[ix[0]][ix[1]][ix[2]+1],
+      u011 = data_values[ix[0]+1][ix[1]+1][ix[2]],
+      u101 = data_values[ix[0]+1][ix[1]][ix[2]+1],
+      u110 = data_values[ix[0]][ix[1]+1][ix[2]+1],
+      u111 = data_values[ix[0]+1][ix[1]+1][ix[2]+1];
+
+      grad[0] = ((1-p_unit[2])
+                 *
+                 ((1-p_unit[1])*(u001-u000) + p_unit[1]*(u011-u010))
+                 +
+                 p_unit[2]
+                 *
+                 ((1-p_unit[1])*(u101-u100) + p_unit[1]*(u111-u110))
+                )/dx[0];
+      grad[1] = ((1-p_unit[2])
+                 *
+                 ((1-p_unit[0])*(u010-u000) + p_unit[0]*(u011-u001))
+                 +
+                 p_unit[2]
+                 *
+                 ((1-p_unit[0])*(u110-u100) + p_unit[0]*(u111-u101))
+                )/dx[1];
+      grad[2] = ((1-p_unit[1])
+                 *
+                 ((1-p_unit[0])*(u100-u000) + p_unit[0]*(u101-u001))
+                 +
+                 p_unit[1]
+                 *
+                 ((1-p_unit[0])*(u110-u010) + p_unit[0]*(u111-u011))
+                )/dx[2];
+
+      return grad;
+    }
+  }
+
+  template <int dim>
+  InterpolatedTensorProductGridData<dim>::
+  InterpolatedTensorProductGridData(const std_cxx11::array<std::vector<double>,dim> &coordinate_values,
+                                    const Table<dim,double>                         &data_values)
+    :
+    coordinate_values (coordinate_values),
+    data_values (data_values)
+  {
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        Assert (coordinate_values[d].size() >= 2,
+                ExcMessage ("Coordinate arrays must have at least two coordinate values!"));
+        for (unsigned int i=0; i<coordinate_values[d].size()-1; ++i)
+          Assert (coordinate_values[d][i] < coordinate_values[d][i+1],
+                  ExcMessage ("Coordinate arrays must be sorted in strictly ascending order."));
+
+        Assert (data_values.size()[d] == coordinate_values[d].size(),
+                ExcMessage ("Data and coordinate tables do not have the same size."));
+      }
+  }
+
+
+
+  template <int dim>
+  double
+  InterpolatedTensorProductGridData<dim>::value(const Point<dim> &p,
+                                                const unsigned int component) const
+  {
+    (void)component;
+    Assert (component == 0,
+            ExcMessage ("This is a scalar function object, the component can only be zero."));
+
+    // find out where this data point lies, relative to the given
+    // points. if we run all the way to the end of the range,
+    // set the indices so that we will simply query the last of the
+    // intervals, starting at x.size()-2 and going to x.size()-1.
+    TableIndices<dim> ix;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        // get the index of the first element of the coordinate arrays that is
+        // larger than p[d]
+        ix[d] = (std::lower_bound (coordinate_values[d].begin(), coordinate_values[d].end(),
+                                   p[d])
+                 - coordinate_values[d].begin());
+        // the one we want is the index of the coordinate to the left, however,
+        // so decrease it by one (unless we have a point to the left of all, in which
+        // case we stay where we are; the formulas below are made in a way that allow
+        // us to extend the function by a constant value)
+        //
+        // to make this work, if we got coordinate_values[d].end(), we actually have
+        // to consider the last box which has index size()-2
+        if (ix[d] == coordinate_values[d].size())
+          ix[d] = coordinate_values[d].size()-2;
+        else if (ix[d] > 0)
+          --ix[d];
+      }
+
+    // now compute the relative point within the interval/rectangle/box
+    // defined by the point coordinates found above. truncate below and
+    // above to accommodate points that may lie outside the range
+    Point<dim> p_unit;
+    for (unsigned int d=0; d<dim; ++d)
+      p_unit[d] =  std::max(std::min((p[d]-coordinate_values[d][ix[d]]) /
+                                     (coordinate_values[d][ix[d]+1]-coordinate_values[d][ix[d]]),
+                                     1.),
+                            0.);
+
+    return interpolate (data_values, ix, p_unit);
+  }
+
+
+
+  template <int dim>
+  Tensor<1, dim>
+  InterpolatedTensorProductGridData<dim>::gradient(const Point<dim> &p,
+                                                   const unsigned int component) const
+  {
+    (void)component;
+    Assert (component == 0,
+            ExcMessage ("This is a scalar function object, the component can only be zero."));
+
+    // find out where this data point lies
+    TableIndices<dim> ix;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        ix[d] = (std::lower_bound (coordinate_values[d].begin(),
+                                   coordinate_values[d].end(),
+                                   p[d])
+                 - coordinate_values[d].begin());
+
+        if (ix[d] == coordinate_values[d].size())
+          ix[d] = coordinate_values[d].size()-2;
+        else if (ix[d] > 0)
+          --ix[d];
+      }
+
+    Point<dim> dx;
+    for (unsigned int d=0; d<dim; ++d)
+      dx[d] = coordinate_values[d][ix[d]+1]-coordinate_values[d][ix[d]];
+
+    Point<dim> p_unit;
+    for (unsigned int d=0; d<dim; ++d)
+      p_unit[d] = std::max(std::min((p[d]-coordinate_values[d][ix[d]]) / dx[d],
+                                    1.),
+                           0.0);
+
+    return gradient_interpolate(data_values, ix, p_unit, dx);
+  }
+
+
+
+  template <int dim>
+  InterpolatedUniformGridData<dim>::
+  InterpolatedUniformGridData(const std_cxx11::array<std::pair<double,double>,dim> &interval_endpoints,
+                              const std_cxx11::array<unsigned int,dim>             &n_subintervals,
+                              const Table<dim,double>                              &data_values)
+    :
+    interval_endpoints (interval_endpoints),
+    n_subintervals (n_subintervals),
+    data_values (data_values)
+  {
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        Assert (n_subintervals[d] >= 1,
+                ExcMessage ("There needs to be at least one subinterval in each "
+                            "coordinate direction."));
+        Assert (interval_endpoints[d].first < interval_endpoints[d].second,
+                ExcMessage ("The interval in each coordinate direction needs "
+                            "to have positive size"));
+        Assert (data_values.size()[d] == n_subintervals[d]+1,
+                ExcMessage ("The data table does not have the correct size."));
+      }
+  }
+
+
+  template <int dim>
+  double
+  InterpolatedUniformGridData<dim>::value(const Point<dim> &p,
+                                          const unsigned int component) const
+  {
+    (void)component;
+    Assert (component == 0,
+            ExcMessage ("This is a scalar function object, the component can only be zero."));
+
+    // find out where this data point lies, relative to the given
+    // subdivision points
+    TableIndices<dim> ix;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        const double delta_x = ((interval_endpoints[d].second - interval_endpoints[d].first) /
+                                n_subintervals[d]);
+        if (p[d] <= interval_endpoints[d].first)
+          ix[d] = 0;
+        else if (p[d] >= interval_endpoints[d].second-delta_x)
+          ix[d] = n_subintervals[d]-1;
+        else
+          ix[d] = (unsigned int)((p[d]-interval_endpoints[d].first) / delta_x);
+      }
+
+    // now compute the relative point within the interval/rectangle/box
+    // defined by the point coordinates found above. truncate below and
+    // above to accommodate points that may lie outside the range
+    Point<dim> p_unit;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        const double delta_x = ((interval_endpoints[d].second - interval_endpoints[d].first) /
+                                n_subintervals[d]);
+        p_unit[d] = std::max(std::min((p[d]-interval_endpoints[d].first-ix[d]*delta_x)/delta_x,
+                                      1.),
+                             0.);
+      }
+
+    return interpolate (data_values, ix, p_unit);
+  }
+
+  /* ---------------------- Polynomial ----------------------- */
+
+
+
+  template <int dim>
+  Polynomial<dim>::
+  Polynomial(const Table<2,double>     &exponents,
+             const std::vector<double> &coefficients)
+    :
+    Function<dim> (1),
+    exponents (exponents),
+    coefficients(coefficients)
+  {
+    Assert(exponents.n_rows() == coefficients.size(),
+           ExcDimensionMismatch(exponents.n_rows(), coefficients.size()));
+    Assert(exponents.n_cols() == dim,
+           ExcDimensionMismatch(exponents.n_cols(), dim));
+  }
+
+
+
+  template <int dim>
+  double
+  Polynomial<dim>::value (const Point<dim> &p,
+                          const unsigned int component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    double prod;
+    double sum = 0;
+    for (unsigned int monom = 0; monom < exponents.n_rows(); ++monom)
+      {
+        prod = 1;
+        for (unsigned int s=0; s< dim; ++s)
+          {
+            if (p[s] < 0)
+              Assert(std::floor(exponents[monom][s]) == exponents[monom][s],
+                     ExcMessage("Exponentiation of a negative base number with "
+                                "a real exponent can't be performed."));
+            prod *= std::pow(p[s], exponents[monom][s]);
+          }
+        sum += coefficients[monom]*prod;
+      }
+    return sum;
+  }
+
+
+
+  template <int dim>
+  void
+  Polynomial<dim>::value_list (const std::vector<Point<dim> > &points,
+                               std::vector<double>    &values,
+                               const unsigned int     component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      values[i] = Polynomial<dim>::value (points[i], component);
+  }
+
+
+
+  template <int dim>
+  Tensor<1,dim>
+  Polynomial<dim>::gradient (const Point<dim> &p,
+                             const unsigned int component) const
+  {
+    (void)component;
+    Assert (component==0, ExcIndexRange(component,0,1));
+
+    Tensor<1,dim> r;
+
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        double sum = 0;
+
+        for (unsigned int monom = 0; monom < exponents.n_rows(); ++monom)
+          {
+            double prod = 1;
+            for (unsigned int s=0; s < dim; ++s)
+              {
+                if ((s==d)&&(exponents[monom][s] == 0)&&(p[s] == 0))
+                  {
+                    prod = 0;
+                    break;
+                  }
+                else
+                  {
+                    if (p[s] < 0)
+                      Assert(std::floor(exponents[monom][s]) == exponents[monom][s],
+                             ExcMessage("Exponentiation of a negative base number with "
+                                        "a real exponent can't be performed."));
+                    prod *= (s==d
+                             ?
+                             exponents[monom][s] * std::pow(p[s], exponents[monom][s]-1)
+                             :
+                             std::pow(p[s], exponents[monom][s]));
+                  }
+              }
+            sum += coefficients[monom]*prod;
+          }
+        r[d] = sum;
+      }
+    return r;
+  }
+
+
+// explicit instantiations
+  template class SquareFunction<1>;
+  template class SquareFunction<2>;
+  template class SquareFunction<3>;
+  template class Q1WedgeFunction<1>;
+  template class Q1WedgeFunction<2>;
+  template class Q1WedgeFunction<3>;
+  template class PillowFunction<1>;
+  template class PillowFunction<2>;
+  template class PillowFunction<3>;
+  template class CosineFunction<1>;
+  template class CosineFunction<2>;
+  template class CosineFunction<3>;
+  template class CosineGradFunction<1>;
+  template class CosineGradFunction<2>;
+  template class CosineGradFunction<3>;
+  template class ExpFunction<1>;
+  template class ExpFunction<2>;
+  template class ExpFunction<3>;
+  template class JumpFunction<1>;
+  template class JumpFunction<2>;
+  template class JumpFunction<3>;
+  template class FourierCosineFunction<1>;
+  template class FourierCosineFunction<2>;
+  template class FourierCosineFunction<3>;
+  template class FourierSineFunction<1>;
+  template class FourierSineFunction<2>;
+  template class FourierSineFunction<3>;
+  template class FourierCosineSum<1>;
+  template class FourierCosineSum<2>;
+  template class FourierCosineSum<3>;
+  template class FourierSineSum<1>;
+  template class FourierSineSum<2>;
+  template class FourierSineSum<3>;
+  template class SlitSingularityFunction<2>;
+  template class SlitSingularityFunction<3>;
+  template class Monomial<1>;
+  template class Monomial<2>;
+  template class Monomial<3>;
+  template class Bessel1<2>;
+  template class Bessel1<3>;
+  template class InterpolatedTensorProductGridData<1>;
+  template class InterpolatedTensorProductGridData<2>;
+  template class InterpolatedTensorProductGridData<3>;
+  template class InterpolatedUniformGridData<1>;
+  template class InterpolatedUniformGridData<2>;
+  template class InterpolatedUniformGridData<3>;
+  template class Polynomial<1>;
+  template class Polynomial<2>;
+  template class Polynomial<3>;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/function_lib_cutoff.cc b/source/base/function_lib_cutoff.cc
new file mode 100644
index 0000000..46141d7
--- /dev/null
+++ b/source/base/function_lib_cutoff.cc
@@ -0,0 +1,327 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/function_lib.h>
+#include <deal.II/lac/vector.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace Functions
+{
+  template<int dim>
+  CutOffFunctionBase<dim>::CutOffFunctionBase (const double r,
+                                               const Point<dim> p,
+                                               const unsigned int n_components,
+                                               const unsigned int select)
+    :
+    Function<dim> (n_components),
+    center(p),
+    radius(r),
+    selected(select)
+  {}
+
+
+  template<int dim>
+  void
+  CutOffFunctionBase<dim>::new_center (const Point<dim> &p)
+  {
+    center = p;
+  }
+
+
+  template<int dim>
+  void
+  CutOffFunctionBase<dim>::new_radius (const double r)
+  {
+    radius = r;
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+  template<int dim>
+  CutOffFunctionLinfty<dim>::CutOffFunctionLinfty (const double r,
+                                                   const Point<dim> p,
+                                                   const unsigned int n_components,
+                                                   const unsigned int select)
+    :
+    CutOffFunctionBase<dim> (r, p, n_components, select)
+  {}
+
+
+  template<int dim>
+  double
+  CutOffFunctionLinfty<dim>::value (const Point<dim>   &p,
+                                    const unsigned int component) const
+  {
+    if (this->selected==CutOffFunctionBase<dim>::no_component
+        ||
+        component==this->selected)
+      return ((this->center.distance(p)<this->radius) ? 1. : 0.);
+    return 0.;
+  }
+
+
+  template<int dim>
+  void
+  CutOffFunctionLinfty<dim>::value_list (const std::vector<Point<dim> > &points,
+                                         std::vector<double>            &values,
+                                         const unsigned int component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+    Assert (component < this->n_components,
+            ExcIndexRange(component,0,this->n_components));
+
+
+    if (this->selected==CutOffFunctionBase<dim>::no_component
+        ||
+        component==this->selected)
+      for (unsigned int k=0; k<values.size(); ++k)
+        values[k] = (this->center.distance(points[k])<this->radius) ? 1. : 0.;
+    else
+      std::fill (values.begin(), values.end(), 0.);
+  }
+
+
+  template<int dim>
+  void
+  CutOffFunctionLinfty<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> >           &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int k=0; k<values.size(); ++k)
+      {
+        const double
+        val = (this->center.distance(points[k])<this->radius) ? 1. : 0.;
+        if (this->selected==CutOffFunctionBase<dim>::no_component)
+          values[k] = val;
+        else
+          {
+            values[k] = 0;
+            values[k](this->selected) = val;
+          }
+      }
+  }
+
+
+  template<int dim>
+  CutOffFunctionW1<dim>::CutOffFunctionW1 (const double     r,
+                                           const Point<dim> p,
+                                           const unsigned int n_components,
+                                           const unsigned int select)
+    :
+    CutOffFunctionBase<dim> (r, p, n_components, select)
+  {}
+
+
+  template<int dim>
+  double
+  CutOffFunctionW1<dim>::value (const Point<dim>   &p,
+                                const unsigned int component) const
+  {
+    if (this->selected==CutOffFunctionBase<dim>::no_component
+        ||
+        component==this->selected)
+      {
+        const double d = this->center.distance(p);
+        return ((d<this->radius) ? (this->radius-d) : 0.);
+      }
+    return 0.;
+  }
+
+
+  template<int dim>
+  void
+  CutOffFunctionW1<dim>::value_list (const std::vector<Point<dim> > &points,
+                                     std::vector<double>            &values,
+                                     const unsigned int component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    if (this->selected==CutOffFunctionBase<dim>::no_component
+        ||
+        component==this->selected)
+      for (unsigned int i=0; i<values.size(); ++i)
+        {
+          const double d = this->center.distance(points[i]);
+          values[i] = ((d<this->radius) ? (this->radius-d) : 0.);
+        }
+    else
+      std::fill (values.begin(), values.end(), 0.);
+  }
+
+
+
+  template<int dim>
+  void
+  CutOffFunctionW1<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> >           &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int k=0; k<values.size(); ++k)
+      {
+        const double d = this->center.distance(points[k]);
+        const double
+        val = (d<this->radius) ? (this->radius-d) : 0.;
+        if (this->selected==CutOffFunctionBase<dim>::no_component)
+          values[k] = val;
+        else
+          {
+            values[k] = 0;
+            values[k](this->selected) = val;
+          }
+      }
+  }
+
+
+  template<int dim>
+  CutOffFunctionCinfty<dim>::CutOffFunctionCinfty (const double     r,
+                                                   const Point<dim> p,
+                                                   const unsigned int n_components,
+                                                   const unsigned int select)
+    :
+    CutOffFunctionBase<dim> (r, p, n_components, select)
+  {}
+
+
+  template<int dim>
+  double
+  CutOffFunctionCinfty<dim>::value (const Point<dim>   &p,
+                                    const unsigned int component) const
+  {
+    if (this->selected==CutOffFunctionBase<dim>::no_component
+        ||
+        component==this->selected)
+      {
+        const double d = this->center.distance(p);
+        const double r = this->radius;
+        if (d>=r)
+          return 0.;
+        const double e = -r*r/(r*r-d*d);
+        return  ((e<-50) ? 0. : numbers::E * exp(e));
+      }
+    return 0.;
+  }
+
+
+  template<int dim>
+  void
+  CutOffFunctionCinfty<dim>::value_list (const std::vector<Point<dim> > &points,
+                                         std::vector<double>            &values,
+                                         const unsigned int component) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    const double r = this->radius;
+
+    if (this->selected==CutOffFunctionBase<dim>::no_component
+        ||
+        component==this->selected)
+      for (unsigned int i=0; i<values.size(); ++i)
+        {
+          const double d = this->center.distance(points[i]);
+          if (d>=r)
+            {
+              values[i] = 0.;
+            }
+          else
+            {
+              const double e = -r*r/(r*r-d*d);
+              values[i] = (e<-50) ? 0. : numbers::E * exp(e);
+            }
+        }
+    else
+      std::fill (values.begin(), values.end(), 0.);
+  }
+
+
+  template<int dim>
+  void
+  CutOffFunctionCinfty<dim>::vector_value_list (
+    const std::vector<Point<dim> > &points,
+    std::vector<Vector<double> >           &values) const
+  {
+    Assert (values.size() == points.size(),
+            ExcDimensionMismatch(values.size(), points.size()));
+
+    for (unsigned int k=0; k<values.size(); ++k)
+      {
+        const double d = this->center.distance(points[k]);
+        const double r = this->radius;
+        double e = 0.;
+        double val = 0.;
+        if (d<this->radius)
+          {
+            e = -r*r/(r*r-d*d);
+            if (e>-50)
+              val = numbers::E * exp(e);
+          }
+
+        if (this->selected==CutOffFunctionBase<dim>::no_component)
+          values[k] = val;
+        else
+          {
+            values[k] = 0;
+            values[k](this->selected) = val;
+          }
+      }
+  }
+
+
+
+  template<int dim>
+  Tensor<1,dim>
+  CutOffFunctionCinfty<dim>::gradient (const Point<dim>   &p,
+                                       const unsigned int) const
+  {
+    const double d = this->center.distance(p);
+    const double r = this->radius;
+    if (d>=r)
+      return Tensor<1,dim>();
+    const double e = -d*d/(r-d)/(r+d);
+    return  ((e<-50) ?
+             Point<dim>() :
+             (p-this->center)/d*(-2.0*r*r/pow(-r*r+d*d,2.0)*d*exp(e)));
+  }
+
+
+// explicit instantiations
+  template class CutOffFunctionLinfty <1>;
+  template class CutOffFunctionLinfty <2>;
+  template class CutOffFunctionLinfty <3>;
+
+  template class CutOffFunctionW1 <1>;
+  template class CutOffFunctionW1 <2>;
+  template class CutOffFunctionW1 <3>;
+
+  template class CutOffFunctionCinfty <1>;
+  template class CutOffFunctionCinfty <2>;
+  template class CutOffFunctionCinfty <3>;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/function_parser.cc b/source/base/function_parser.cc
new file mode 100644
index 0000000..50703b2
--- /dev/null
+++ b/source/base/function_parser.cc
@@ -0,0 +1,475 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/function_parser.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/vector.h>
+#include <cmath>
+#include <map>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/random.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#ifdef DEAL_II_WITH_MUPARSER
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <muParser.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+#else
+
+
+
+namespace fparser
+{
+  class FunctionParser
+  {};
+}
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <int dim>
+FunctionParser<dim>::FunctionParser(const unsigned int n_components,
+                                    const double       initial_time)
+  :
+  Function<dim>(n_components, initial_time)
+{}
+
+
+
+template <int dim>
+FunctionParser<dim>::~FunctionParser()
+{}
+
+#ifdef DEAL_II_WITH_MUPARSER
+
+template <int dim>
+void FunctionParser<dim>::initialize (const std::string              &variables,
+                                      const std::vector<std::string> &expressions,
+                                      const std::map<std::string, double> &constants,
+                                      const bool time_dependent)
+{
+  this->fp.clear(); // this will reset all thread-local objects
+
+  this->constants = constants;
+  this->var_names = Utilities::split_string_list(variables, ',');
+  this->expressions = expressions;
+  AssertThrow(((time_dependent)?dim+1:dim) == var_names.size(),
+              ExcMessage("Wrong number of variables"));
+
+  // We check that the number of
+  // components of this function
+  // matches the number of components
+  // passed in as a vector of
+  // strings.
+  AssertThrow(this->n_components == expressions.size(),
+              ExcInvalidExpressionSize(this->n_components,
+                                       expressions.size()) );
+
+  // Now we define how many variables
+  // we expect to read in.  We
+  // distinguish between two cases:
+  // Time dependent problems, and not
+  // time dependent problems. In the
+  // first case the number of
+  // variables is given by the
+  // dimension plus one. In the other
+  // case, the number of variables is
+  // equal to the dimension. Once we
+  // parsed the variables string, if
+  // none of this is the case, then
+  // an exception is thrown.
+  if (time_dependent)
+    n_vars = dim+1;
+  else
+    n_vars = dim;
+
+  // create a parser object for the current thread we can then query
+  // in value() and vector_value(). this is not strictly necessary
+  // because a user may never call these functions on the current
+  // thread, but it gets us error messages about wrong formulas right
+  // away
+  init_muparser ();
+
+  // finally set the initialization bit
+  initialized = true;
+}
+
+
+
+namespace internal
+{
+  // convert double into int
+  int mu_round(double val)
+  {
+    return static_cast<int>(val + ((val>=0.0) ? 0.5 : -0.5) );
+  }
+
+  double mu_if(double condition, double thenvalue, double elsevalue)
+  {
+    if (mu_round(condition))
+      return thenvalue;
+    else
+      return elsevalue;
+  }
+
+  double mu_or(double left, double right)
+  {
+    return (mu_round(left)) || (mu_round(right));
+  }
+
+  double mu_and(double left, double right)
+  {
+    return (mu_round(left)) && (mu_round(right));
+  }
+
+  double mu_int(double value)
+  {
+    return static_cast<double>(mu_round(value));
+  }
+
+  double mu_ceil(double value)
+  {
+    return ceil(value);
+  }
+
+  double mu_floor(double value)
+  {
+    return floor(value);
+  }
+
+  double mu_cot(double value)
+  {
+    return 1.0/tan(value);
+  }
+
+  double mu_csc(double value)
+  {
+    return 1.0/sin(value);
+  }
+
+  double mu_sec(double value)
+  {
+    return 1.0/cos(value);
+  }
+
+  double mu_log(double value)
+  {
+    return log(value);
+  }
+
+  double mu_pow(double a, double b)
+  {
+    return std::pow(a, b);
+  }
+
+  double mu_erfc(double value)
+  {
+    return erfc(value);
+  }
+
+  // returns a random value in the range [0,1] initializing the generator
+  // with the given seed
+  double mu_rand_seed(double seed)
+  {
+    static Threads::Mutex rand_mutex;
+    Threads::Mutex::ScopedLock lock(rand_mutex);
+
+    static boost::random::uniform_real_distribution<> uniform_distribution(0,1);
+
+    // for each seed an unique random number generator is created,
+    // which is initialized with the seed itself
+    static std::map<double, boost::random::mt19937> rng_map;
+
+    if (rng_map.find(seed) == rng_map.end())
+      rng_map[seed] = boost::random::mt19937(static_cast<unsigned int>(seed));
+
+    return uniform_distribution(rng_map[seed]);
+  }
+
+  // returns a random value in the range [0,1]
+  double mu_rand()
+  {
+    static Threads::Mutex rand_mutex;
+    Threads::Mutex::ScopedLock lock(rand_mutex);
+    static boost::random::uniform_real_distribution<> uniform_distribution(0,1);
+    static boost::random::mt19937 rng(static_cast<unsigned long>(std::time(0)));
+    return uniform_distribution(rng);
+  }
+
+}
+
+
+template <int dim>
+void FunctionParser<dim>:: init_muparser() const
+{
+  // check that we have not already initialized the parser on the
+  // current thread, i.e., that the current function is only called
+  // once per thread
+  Assert (fp.get().size()==0, ExcInternalError());
+
+  // initialize the objects for the current thread (fp.get() and
+  // vars.get())
+  fp.get().resize(this->n_components);
+  vars.get().resize(var_names.size());
+  for (unsigned int component=0; component<this->n_components; ++component)
+    {
+      for (std::map< std::string, double >::const_iterator constant = constants.begin();
+           constant != constants.end(); ++constant)
+        {
+          fp.get()[component].DefineConst(constant->first.c_str(), constant->second);
+        }
+
+      for (unsigned int iv=0; iv<var_names.size(); ++iv)
+        fp.get()[component].DefineVar(var_names[iv].c_str(), &vars.get()[iv]);
+
+      // define some compatibility functions:
+      fp.get()[component].DefineFun("if",internal::mu_if, true);
+      fp.get()[component].DefineOprt("|", internal::mu_or, 1);
+      fp.get()[component].DefineOprt("&", internal::mu_and, 2);
+      fp.get()[component].DefineFun("int", internal::mu_int, true);
+      fp.get()[component].DefineFun("ceil", internal::mu_ceil, true);
+      fp.get()[component].DefineFun("cot", internal::mu_cot, true);
+      fp.get()[component].DefineFun("csc", internal::mu_csc, true);
+      fp.get()[component].DefineFun("floor", internal::mu_floor, true);
+      fp.get()[component].DefineFun("sec", internal::mu_sec, true);
+      fp.get()[component].DefineFun("log", internal::mu_log, true);
+      fp.get()[component].DefineFun("pow", internal::mu_pow, true);
+      fp.get()[component].DefineFun("erfc", internal::mu_erfc, true);
+      fp.get()[component].DefineFun("rand_seed", internal::mu_rand_seed, true);
+      fp.get()[component].DefineFun("rand", internal::mu_rand, true);
+
+      try
+        {
+          // muparser expects that functions have no
+          // space between the name of the function and the opening
+          // parenthesis. this is awkward because it is not backward
+          // compatible to the library we used to use before muparser
+          // (the fparser library) but also makes no real sense.
+          // consequently, in the expressions we set, remove any space
+          // we may find after function names
+          std::string transformed_expression = expressions[component];
+
+          const char *function_names[] =
+          {
+            // functions predefined by muparser
+            "sin",
+            "cos",
+            "tan",
+            "asin",
+            "acos",
+            "atan",
+            "sinh",
+            "cosh",
+            "tanh",
+            "asinh",
+            "acosh",
+            "atanh",
+            "atan2",
+            "log2",
+            "log10",
+            "log",
+            "ln",
+            "exp",
+            "sqrt",
+            "sign",
+            "rint",
+            "abs",
+            "min",
+            "max",
+            "sum",
+            "avg",
+            // functions we define ourselves above
+            "if",
+            "int",
+            "ceil",
+            "cot",
+            "csc",
+            "floor",
+            "sec",
+            "pow",
+            "erfc",
+            "rand",
+            "rand_seed"
+          };
+          for (unsigned int f=0; f<sizeof(function_names)/sizeof(function_names[0]); ++f)
+            {
+              const std::string  function_name        = function_names[f];
+              const unsigned int function_name_length = function_name.size();
+
+              std::string::size_type pos = 0;
+              while (true)
+                {
+                  // try to find any occurrences of the function name
+                  pos = transformed_expression.find (function_name, pos);
+                  if (pos == std::string::npos)
+                    break;
+
+                  // replace whitespace until there no longer is any
+                  while ((pos+function_name_length<transformed_expression.size())
+                         &&
+                         ((transformed_expression[pos+function_name_length] == ' ')
+                          ||
+                          (transformed_expression[pos+function_name_length] == '\t')))
+                    transformed_expression.erase (transformed_expression.begin()+pos+function_name_length);
+
+                  // move the current search position by the size of the
+                  // actual function name
+                  pos += function_name_length;
+                }
+            }
+
+          // now use the transformed expression
+          fp.get()[component].SetExpr(transformed_expression);
+        }
+      catch (mu::ParserError &e)
+        {
+          std::cerr << "Message:  <" << e.GetMsg() << ">\n";
+          std::cerr << "Formula:  <" << e.GetExpr() << ">\n";
+          std::cerr << "Token:    <" << e.GetToken() << ">\n";
+          std::cerr << "Position: <" << e.GetPos() << ">\n";
+          std::cerr << "Errc:     <" << e.GetCode() << ">" << std::endl;
+          AssertThrow(false, ExcParseError(e.GetCode(), e.GetMsg().c_str()));
+        }
+    }
+}
+
+
+
+template <int dim>
+void FunctionParser<dim>::initialize (const std::string &vars,
+                                      const std::string &expression,
+                                      const std::map<std::string, double> &constants,
+                                      const bool time_dependent)
+{
+  initialize(vars, Utilities::split_string_list(expression, ';'),
+             constants, time_dependent);
+}
+
+
+
+template <int dim>
+double FunctionParser<dim>::value (const Point<dim>  &p,
+                                   const unsigned int component) const
+{
+  Assert (initialized==true, ExcNotInitialized());
+  Assert (component < this->n_components,
+          ExcIndexRange(component, 0, this->n_components));
+
+  // initialize the parser if that hasn't happened yet on the current thread
+  if (fp.get().size() == 0)
+    init_muparser();
+
+  for (unsigned int i=0; i<dim; ++i)
+    vars.get()[i] = p(i);
+  if (dim != n_vars)
+    vars.get()[dim] = this->get_time();
+
+  try
+    {
+      return fp.get()[component].Eval();
+    }
+  catch (mu::ParserError &e)
+    {
+      std::cerr << "Message:  <" << e.GetMsg() << ">\n";
+      std::cerr << "Formula:  <" << e.GetExpr() << ">\n";
+      std::cerr << "Token:    <" << e.GetToken() << ">\n";
+      std::cerr << "Position: <" << e.GetPos() << ">\n";
+      std::cerr << "Errc:     <" << e.GetCode() << ">" << std::endl;
+      AssertThrow(false, ExcParseError(e.GetCode(), e.GetMsg().c_str()));
+      return 0.0;
+    }
+}
+
+
+
+template <int dim>
+void FunctionParser<dim>::vector_value (const Point<dim> &p,
+                                        Vector<double>   &values) const
+{
+  Assert (initialized==true, ExcNotInitialized());
+  Assert (values.size() == this->n_components,
+          ExcDimensionMismatch (values.size(), this->n_components));
+
+
+  // initialize the parser if that hasn't happened yet on the current thread
+  if (fp.get().size() == 0)
+    init_muparser();
+
+  for (unsigned int i=0; i<dim; ++i)
+    vars.get()[i] = p(i);
+  if (dim != n_vars)
+    vars.get()[dim] = this->get_time();
+
+  for (unsigned int component = 0; component < this->n_components;
+       ++component)
+    values(component) = fp.get()[component].Eval();
+}
+
+#else
+
+
+template <int dim>
+void
+FunctionParser<dim>::initialize(const std::string &,
+                                const std::vector<std::string> &,
+                                const std::map<std::string, double> &,
+                                const bool)
+{
+  Assert(false, ExcNeedsFunctionparser());
+}
+
+template <int dim>
+void
+FunctionParser<dim>::initialize(const std::string &,
+                                const std::string &,
+                                const std::map<std::string, double> &,
+                                const bool)
+{
+  Assert(false, ExcNeedsFunctionparser());
+}
+
+
+
+template <int dim>
+double FunctionParser<dim>::value (
+  const Point<dim> &, unsigned int) const
+{
+  Assert(false, ExcNeedsFunctionparser());
+  return 0.;
+}
+
+
+template <int dim>
+void FunctionParser<dim>::vector_value (
+  const Point<dim> &, Vector<double> &) const
+{
+  Assert(false, ExcNeedsFunctionparser());
+}
+
+
+#endif
+
+// Explicit Instantiations.
+
+template class FunctionParser<1>;
+template class FunctionParser<2>;
+template class FunctionParser<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/function_time.cc b/source/base/function_time.cc
new file mode 100644
index 0000000..2c34202
--- /dev/null
+++ b/source/base/function_time.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/function_time.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// explicit instantiation
+#include "function_time.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/base/function_time.inst.in b/source/base/function_time.inst.in
new file mode 100644
index 0000000..56f36f1
--- /dev/null
+++ b/source/base/function_time.inst.in
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+for (S : REAL_SCALARS)
+{
+  template class FunctionTime<S>;
+}
+
+for (S : COMPLEX_SCALARS)
+{
+  template class FunctionTime<S>;
+}
diff --git a/source/base/geometry_info.cc b/source/base/geometry_info.cc
new file mode 100644
index 0000000..8d7b354
--- /dev/null
+++ b/source/base/geometry_info.cc
@@ -0,0 +1,1972 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/tensor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+unsigned int
+GeometryInfo<0>::n_children(const RefinementCase<0> &)
+{
+  return 0;
+}
+
+
+
+template <int dim> const unsigned int GeometryInfo<dim>::max_children_per_cell;
+template <int dim> const unsigned int GeometryInfo<dim>::faces_per_cell;
+template <int dim> const unsigned int GeometryInfo<dim>::max_children_per_face;
+template <int dim> const unsigned int GeometryInfo<dim>::vertices_per_cell;
+template <int dim> const unsigned int GeometryInfo<dim>::vertices_per_face;
+template <int dim> const unsigned int GeometryInfo<dim>::lines_per_face;
+template <int dim> const unsigned int GeometryInfo<dim>::quads_per_face;
+template <int dim> const unsigned int GeometryInfo<dim>::lines_per_cell;
+template <int dim> const unsigned int GeometryInfo<dim>::quads_per_cell;
+template <int dim> const unsigned int GeometryInfo<dim>::hexes_per_cell;
+
+
+using namespace numbers;
+
+// make sure that also the icc compiler defines (and not only declares)
+// these variables
+namespace internal
+{
+  void foo (const unsigned int *) {}
+
+  template <int dim>
+  void define_variables ()
+  {
+    foo(&::dealii::GeometryInfo<dim>::vertices_per_cell);
+  }
+
+  template void define_variables<2> ();
+  template void define_variables<3> ();
+  template void define_variables<4> ();
+}
+
+
+
+template <>
+const unsigned int
+GeometryInfo<1>::unit_normal_direction[faces_per_cell]
+  = { 0, 0 };
+
+template <>
+const unsigned int
+GeometryInfo<2>::unit_normal_direction[faces_per_cell]
+  = { 0, 0, 1, 1 };
+
+template <>
+const unsigned int
+GeometryInfo<3>::unit_normal_direction[faces_per_cell]
+  = { 0, 0, 1, 1, 2, 2 };
+
+template <>
+const unsigned int
+GeometryInfo<4>::unit_normal_direction[faces_per_cell]
+  = { 0, 0, 1, 1, 2, 2, 3, 3 };
+
+
+
+template <>
+const int
+GeometryInfo<1>::unit_normal_orientation[faces_per_cell]
+  = { -1, 1 };
+
+template <>
+const int
+GeometryInfo<2>::unit_normal_orientation[faces_per_cell]
+  = { -1, 1, -1, 1 };
+
+template <>
+const int
+GeometryInfo<3>::unit_normal_orientation[faces_per_cell]
+  = { -1, 1, -1, 1, -1, 1 };
+
+template <>
+const int
+GeometryInfo<4>::unit_normal_orientation[faces_per_cell]
+  = { -1, 1, -1, 1, -1, 1, -1, 1 };
+
+
+
+template <>
+const unsigned int
+GeometryInfo<1>::opposite_face[faces_per_cell]
+  = { 1, 0 };
+
+template <>
+const unsigned int
+GeometryInfo<2>::opposite_face[faces_per_cell]
+  = { 1, 0, 3, 2 };
+
+template <>
+const unsigned int
+GeometryInfo<3>::opposite_face[faces_per_cell]
+  = { 1, 0, 3, 2, 5, 4 };
+
+template <>
+const unsigned int
+GeometryInfo<4>::opposite_face[faces_per_cell]
+  = { 1, 0, 3, 2, 5, 4, 7, 6 };
+
+
+
+template <>
+const unsigned int GeometryInfo<1>::ucd_to_deal[GeometryInfo<1>::vertices_per_cell]
+  = { 0, 1};
+
+template <>
+const unsigned int GeometryInfo<2>::ucd_to_deal[GeometryInfo<2>::vertices_per_cell]
+  = { 0, 1, 3, 2};
+
+template <>
+const unsigned int GeometryInfo<3>::ucd_to_deal[GeometryInfo<3>::vertices_per_cell]
+  = { 0, 1, 5, 4, 2, 3, 7, 6};
+
+template <>
+const unsigned int GeometryInfo<4>::ucd_to_deal[GeometryInfo<4>::vertices_per_cell]
+  = {  invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int
+    };
+
+
+template <>
+const unsigned int GeometryInfo<1>::dx_to_deal[GeometryInfo<1>::vertices_per_cell]
+  = { 0, 1};
+
+template <>
+const unsigned int GeometryInfo<2>::dx_to_deal[GeometryInfo<2>::vertices_per_cell]
+  = { 0, 2, 1, 3};
+
+template <>
+const unsigned int GeometryInfo<3>::dx_to_deal[GeometryInfo<3>::vertices_per_cell]
+  = { 0, 4, 2, 6, 1, 5, 3, 7};
+
+template <>
+const unsigned int GeometryInfo<4>::dx_to_deal[GeometryInfo<4>::vertices_per_cell]
+  = {  invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int,
+       invalid_unsigned_int
+    };
+
+template <>
+const unsigned int GeometryInfo<1>::vertex_to_face
+[GeometryInfo<1>::vertices_per_cell][1]
+= { { 0 },
+  { 1 }
+};
+
+template <>
+const unsigned int GeometryInfo<2>::vertex_to_face
+[GeometryInfo<2>::vertices_per_cell][2]
+= { { 0, 2 },
+  { 1, 2 },
+  { 0, 3 },
+  { 1, 3 }
+};
+
+template <>
+const unsigned int GeometryInfo<3>::vertex_to_face
+[GeometryInfo<3>::vertices_per_cell][3]
+= { { 0, 2, 4 },
+  { 1, 2, 4 },
+  { 0, 3, 4 },
+  { 1, 3, 4 },
+  { 0, 2, 5 },
+  { 1, 2, 5 },
+  { 0, 3, 5 },
+  { 1, 3, 5 }
+};
+
+template <>
+const unsigned int GeometryInfo<4>::vertex_to_face
+[GeometryInfo<4>::vertices_per_cell][4]
+= { { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int },
+  { invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int, invalid_unsigned_int }
+};
+
+
+template<int dim>
+unsigned int
+GeometryInfo<dim>::n_children(const RefinementCase<dim> &ref_case)
+{
+  static const unsigned int n_children[RefinementCase<3>::cut_xyz+1]=
+  {0, 2, 2, 4, 2, 4, 4, 8};
+
+  return n_children[ref_case];
+}
+
+
+template<>
+unsigned int
+GeometryInfo<1>::n_subfaces(const internal::SubfaceCase<1> &)
+{
+  Assert(false, ExcImpossibleInDim(1));
+  return 0;
+}
+
+
+
+template<>
+unsigned int
+GeometryInfo<2>::n_subfaces(const internal::SubfaceCase<2> &subface_case)
+{
+  return (subface_case == internal::SubfaceCase<2>::case_x) ? 2 : 0;
+}
+
+
+
+template<>
+unsigned int
+GeometryInfo<3>::n_subfaces(const internal::SubfaceCase<3> &subface_case)
+{
+  static const unsigned int nsubs[internal::SubfaceCase<3>::case_isotropic+1]=
+  {0, 2, 3, 3, 4, 2, 3, 3, 4, 4};
+  return nsubs[subface_case];
+}
+
+
+template<>
+double
+GeometryInfo<1>::subface_ratio(const internal::SubfaceCase<1> &,
+                               const unsigned int)
+{
+  return 1;
+}
+
+
+template<>
+double
+GeometryInfo<2>::subface_ratio(const internal::SubfaceCase<2> &subface_case,
+                               const unsigned int)
+{
+  const unsigned int dim=2;
+
+  double ratio=1;
+  switch (subface_case)
+    {
+    case internal::SubfaceCase<dim>::case_none:
+      // Here, an
+      // Assert(false,ExcInternalError())
+      // would be the right
+      // choice, but
+      // unfortunately the
+      // current function is
+      // also called for faces
+      // without children (see
+      // tests/fe/mapping.cc).
+//          Assert(false, ExcMessage("Face has no subfaces."));
+      // Furthermore, assign
+      // following value as
+      // otherwise the
+      // bits/volume_x tests
+      // break
+      ratio=1./GeometryInfo<dim>::max_children_per_face;
+      break;
+    case internal::SubfaceCase<dim>::case_x:
+      ratio=0.5;
+      break;
+    default:
+      // there should be no
+      // cases left
+      Assert(false, ExcInternalError());
+      break;
+    }
+
+  return ratio;
+}
+
+
+template<>
+double
+GeometryInfo<3>::subface_ratio(const internal::SubfaceCase<3> &subface_case,
+                               const unsigned int subface_no)
+{
+  const unsigned int dim=3;
+
+  double ratio=1;
+  switch (subface_case)
+    {
+    case internal::SubfaceCase<dim>::case_none:
+      // Here, an
+      // Assert(false,ExcInternalError())
+      // would be the right
+      // choice, but
+      // unfortunately the
+      // current function is
+      // also called for faces
+      // without children (see
+      // tests/bits/mesh_3d_16.cc). Add
+      // following switch to
+      // avoid diffs in
+      // tests/bits/mesh_3d_16
+      ratio=1./GeometryInfo<dim>::max_children_per_face;
+      break;
+    case internal::SubfaceCase<dim>::case_x:
+    case internal::SubfaceCase<dim>::case_y:
+      ratio=0.5;
+      break;
+    case internal::SubfaceCase<dim>::case_xy:
+    case internal::SubfaceCase<dim>::case_x1y2y:
+    case internal::SubfaceCase<dim>::case_y1x2x:
+      ratio=0.25;
+      break;
+    case internal::SubfaceCase<dim>::case_x1y:
+    case internal::SubfaceCase<dim>::case_y1x:
+      if (subface_no<2)
+        ratio=0.25;
+      else
+        ratio=0.5;
+      break;
+    case internal::SubfaceCase<dim>::case_x2y:
+    case internal::SubfaceCase<dim>::case_y2x:
+      if (subface_no==0)
+        ratio=0.5;
+      else
+        ratio=0.25;
+      break;
+    default:
+      // there should be no
+      // cases left
+      Assert(false, ExcInternalError());
+      break;
+    }
+
+  return ratio;
+}
+
+
+
+template<>
+RefinementCase<0>
+GeometryInfo<1>::face_refinement_case(const RefinementCase<1> &,
+                                      const unsigned int,
+                                      const bool,
+                                      const bool,
+                                      const bool)
+{
+  Assert(false, ExcImpossibleInDim(1));
+
+  return RefinementCase<0>::no_refinement;
+}
+
+
+template<>
+RefinementCase<1>
+GeometryInfo<2>::face_refinement_case(const RefinementCase<2> &cell_refinement_case,
+                                      const unsigned int face_no,
+                                      const bool,
+                                      const bool,
+                                      const bool)
+{
+  const unsigned int dim=2;
+  Assert(cell_refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+         ExcIndexRange(cell_refinement_case, 0, RefinementCase<dim>::isotropic_refinement+1));
+  Assert(face_no<GeometryInfo<dim>::faces_per_cell,
+         ExcIndexRange(face_no, 0, GeometryInfo<dim>::faces_per_cell));
+
+  static const RefinementCase<dim-1>
+  ref_cases[RefinementCase<dim>::isotropic_refinement+1][GeometryInfo<dim>::faces_per_cell/2]=
+  {
+    {
+      RefinementCase<dim-1>::no_refinement,  // no_refinement
+      RefinementCase<dim-1>::no_refinement
+    },
+
+    {
+      RefinementCase<dim-1>::no_refinement,
+      RefinementCase<dim-1>::cut_x
+    },
+
+    {
+      RefinementCase<dim-1>::cut_x,
+      RefinementCase<dim-1>::no_refinement
+    },
+
+    {
+      RefinementCase<dim-1>::cut_x,          // cut_xy
+      RefinementCase<dim-1>::cut_x
+    }
+  };
+
+  return ref_cases[cell_refinement_case][face_no/2];
+}
+
+
+template<>
+RefinementCase<2>
+GeometryInfo<3>::face_refinement_case(const RefinementCase<3> &cell_refinement_case,
+                                      const unsigned int face_no,
+                                      const bool face_orientation,
+                                      const bool /*face_flip*/,
+                                      const bool face_rotation)
+{
+  const unsigned int dim=3;
+  Assert(cell_refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+         ExcIndexRange(cell_refinement_case, 0, RefinementCase<dim>::isotropic_refinement+1));
+  Assert(face_no<GeometryInfo<dim>::faces_per_cell,
+         ExcIndexRange(face_no, 0, GeometryInfo<dim>::faces_per_cell));
+
+  static const RefinementCase<dim-1>
+  ref_cases[RefinementCase<dim>::isotropic_refinement+1][GeometryInfo<dim>::faces_per_cell/2]=
+  {
+    {
+      RefinementCase<dim-1>::no_refinement,  // no_refinement
+      RefinementCase<dim-1>::no_refinement,
+      RefinementCase<dim-1>::no_refinement
+    },
+
+    {
+      RefinementCase<dim-1>::no_refinement,  // cut_x
+      RefinementCase<dim-1>::cut_y,
+      RefinementCase<dim-1>::cut_x
+    },
+
+    {
+      RefinementCase<dim-1>::cut_x,          // cut_y
+      RefinementCase<dim-1>::no_refinement,
+      RefinementCase<dim-1>::cut_y
+    },
+
+    {
+      RefinementCase<dim-1>::cut_x,          // cut_xy
+      RefinementCase<dim-1>::cut_y,
+      RefinementCase<dim-1>::cut_xy
+    },
+
+    {
+      RefinementCase<dim-1>::cut_y,          // cut_z
+      RefinementCase<dim-1>::cut_x,
+      RefinementCase<dim-1>::no_refinement
+    },
+
+    {
+      RefinementCase<dim-1>::cut_y,          // cut_xz
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_x
+    },
+
+    {
+      RefinementCase<dim-1>::cut_xy,         // cut_yz
+      RefinementCase<dim-1>::cut_x,
+      RefinementCase<dim-1>::cut_y
+    },
+
+    {
+      RefinementCase<dim-1>::cut_xy,         // cut_xyz
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy
+    },
+  };
+
+  const RefinementCase<dim-1> ref_case=ref_cases[cell_refinement_case][face_no/2];
+
+  static const RefinementCase<dim-1> flip[4]=
+  {
+    RefinementCase<dim-1>::no_refinement,
+    RefinementCase<dim-1>::cut_y,
+    RefinementCase<dim-1>::cut_x,
+    RefinementCase<dim-1>::cut_xy
+  };
+
+  // correct the ref_case for face_orientation
+  // and face_rotation. for face_orientation,
+  // 'true' is the default value whereas for
+  // face_rotation, 'false' is standard. If
+  // <tt>face_rotation==face_orientation</tt>,
+  // then one of them is non-standard and we
+  // have to swap cut_x and cut_y, otherwise no
+  // change is necessary.  face_flip has no
+  // influence. however, in order to keep the
+  // interface consistent with other functions,
+  // we still include it as an argument to this
+  // function
+  return (face_orientation==face_rotation) ? flip[ref_case] : ref_case;
+}
+
+
+
+template<>
+RefinementCase<1>
+GeometryInfo<1>::line_refinement_case(const RefinementCase<1> &cell_refinement_case,
+                                      const unsigned int line_no)
+{
+  (void)line_no;
+  const unsigned int dim = 1;
+  (void)dim;
+  Assert(cell_refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+         ExcIndexRange(cell_refinement_case, 0, RefinementCase<dim>::isotropic_refinement+1));
+  Assert(line_no<GeometryInfo<dim>::lines_per_cell,
+         ExcIndexRange(line_no, 0, GeometryInfo<dim>::lines_per_cell));
+
+  return cell_refinement_case;
+}
+
+
+template<>
+RefinementCase<1>
+GeometryInfo<2>::line_refinement_case(const RefinementCase<2> &cell_refinement_case,
+                                      const unsigned int line_no)
+{
+  // Assertions are in face_refinement_case()
+  return face_refinement_case(cell_refinement_case, line_no);
+}
+
+
+template<>
+RefinementCase<1>
+GeometryInfo<3>::line_refinement_case(const RefinementCase<3> &cell_refinement_case,
+                                      const unsigned int line_no)
+{
+  const unsigned int dim=3;
+  Assert(cell_refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+         ExcIndexRange(cell_refinement_case, 0, RefinementCase<dim>::isotropic_refinement+1));
+  Assert(line_no<GeometryInfo<dim>::lines_per_cell,
+         ExcIndexRange(line_no, 0, GeometryInfo<dim>::lines_per_cell));
+
+  // array indicating, which simple refine
+  // case cuts a line in direction x, y or
+  // z. For example, cut_y and everything
+  // containing cut_y (cut_xy, cut_yz,
+  // cut_xyz) cuts lines, which are in y
+  // direction.
+  static const RefinementCase<dim>
+  cut_one[dim] =
+  {
+    RefinementCase<dim>::cut_x,
+    RefinementCase<dim>::cut_y,
+    RefinementCase<dim>::cut_z
+  };
+
+  // order the direction of lines
+  // 0->x, 1->y, 2->z
+  static const unsigned int direction[lines_per_cell]=
+  {1,1,0,0,1,1,0,0,2,2,2,2};
+
+  return ((cell_refinement_case & cut_one[direction[line_no]]) ?
+          RefinementCase<1>::cut_x : RefinementCase<1>::no_refinement);
+}
+
+
+
+template<>
+RefinementCase<1>
+GeometryInfo<1>::min_cell_refinement_case_for_face_refinement(const RefinementCase<0> &,
+    const unsigned int,
+    const bool,
+    const bool,
+    const bool)
+{
+  const unsigned int dim = 1;
+  Assert(false, ExcImpossibleInDim(dim));
+
+  return RefinementCase<dim>::no_refinement;
+}
+
+
+template<>
+RefinementCase<2>
+GeometryInfo<2>::min_cell_refinement_case_for_face_refinement(const RefinementCase<1> &face_refinement_case,
+    const unsigned int face_no,
+    const bool,
+    const bool,
+    const bool)
+{
+  const unsigned int dim = 2;
+  Assert(face_refinement_case<RefinementCase<dim-1>::isotropic_refinement+1,
+         ExcIndexRange(face_refinement_case, 0, RefinementCase<dim-1>::isotropic_refinement+1));
+  Assert(face_no<GeometryInfo<dim>::faces_per_cell,
+         ExcIndexRange(face_no, 0, GeometryInfo<dim>::faces_per_cell));
+
+  if (face_refinement_case==RefinementCase<dim>::cut_x)
+    return (face_no/2) ? RefinementCase<dim>::cut_x : RefinementCase<dim>::cut_y;
+  else
+    return RefinementCase<dim>::no_refinement;
+}
+
+
+template<>
+RefinementCase<3>
+GeometryInfo<3>::min_cell_refinement_case_for_face_refinement(const RefinementCase<2> &face_refinement_case,
+    const unsigned int face_no,
+    const bool face_orientation,
+    const bool /*face_flip*/,
+    const bool face_rotation)
+{
+  const unsigned int dim=3;
+  Assert(face_refinement_case<RefinementCase<dim-1>::isotropic_refinement+1,
+         ExcIndexRange(face_refinement_case, 0, RefinementCase<dim-1>::isotropic_refinement+1));
+  Assert(face_no<GeometryInfo<dim>::faces_per_cell,
+         ExcIndexRange(face_no, 0, GeometryInfo<dim>::faces_per_cell));
+
+  static const RefinementCase<2> flip[4]=
+  {
+    RefinementCase<2>::no_refinement,
+    RefinementCase<2>::cut_y,
+    RefinementCase<2>::cut_x,
+    RefinementCase<2>::cut_xy
+  };
+
+  // correct the face_refinement_case for
+  // face_orientation and face_rotation. for
+  // face_orientation, 'true' is the default
+  // value whereas for face_rotation, 'false'
+  // is standard. If
+  // <tt>face_rotation==face_orientation</tt>,
+  // then one of them is non-standard and we
+  // have to swap cut_x and cut_y, otherwise no
+  // change is necessary.  face_flip has no
+  // influence. however, in order to keep the
+  // interface consistent with other functions,
+  // we still include it as an argument to this
+  // function
+  const RefinementCase<dim-1> std_face_ref = (face_orientation==face_rotation) ? flip[face_refinement_case] : face_refinement_case;
+
+  static const RefinementCase<dim> face_to_cell[3][4]=
+  {
+    {
+      RefinementCase<dim>::no_refinement,  // faces 0 and 1
+      RefinementCase<dim>::cut_y,          // cut_x in face 0 means cut_y for the cell
+      RefinementCase<dim>::cut_z,
+      RefinementCase<dim>::cut_yz
+    },
+
+    {
+      RefinementCase<dim>::no_refinement,  // faces 2 and 3 (note that x and y are "exchanged on faces 2 and 3")
+      RefinementCase<dim>::cut_z,
+      RefinementCase<dim>::cut_x,
+      RefinementCase<dim>::cut_xz
+    },
+
+    {
+      RefinementCase<dim>::no_refinement,  // faces 4 and 5
+      RefinementCase<dim>::cut_x,
+      RefinementCase<dim>::cut_y,
+      RefinementCase<dim>::cut_xy
+    }
+  };
+
+  return face_to_cell[face_no/2][std_face_ref];
+}
+
+
+
+template<>
+RefinementCase<1>
+GeometryInfo<1>::min_cell_refinement_case_for_line_refinement(const unsigned int line_no)
+{
+  (void)line_no;
+  Assert(line_no==0, ExcIndexRange(line_no,0,1));
+
+  return RefinementCase<1>::cut_x;
+}
+
+
+template<>
+RefinementCase<2>
+GeometryInfo<2>::min_cell_refinement_case_for_line_refinement(const unsigned int line_no)
+{
+  const unsigned int dim = 2;
+  (void)dim;
+  Assert(line_no<GeometryInfo<dim>::lines_per_cell,
+         ExcIndexRange(line_no, 0, GeometryInfo<dim>::lines_per_cell));
+
+  return (line_no/2) ? RefinementCase<2>::cut_x : RefinementCase<2>::cut_y;
+}
+
+
+template<>
+RefinementCase<3>
+GeometryInfo<3>::min_cell_refinement_case_for_line_refinement(const unsigned int line_no)
+{
+  const unsigned int dim=3;
+  Assert(line_no<GeometryInfo<dim>::lines_per_cell,
+         ExcIndexRange(line_no, 0, GeometryInfo<dim>::lines_per_cell));
+
+  static const RefinementCase<dim> ref_cases[6]=
+  {
+    RefinementCase<dim>::cut_y,  // lines  0 and  1
+    RefinementCase<dim>::cut_x,  // lines  2 and  3
+    RefinementCase<dim>::cut_y,  // lines  4 and  5
+    RefinementCase<dim>::cut_x,  // lines  6 and  7
+    RefinementCase<dim>::cut_z,  // lines  8 and  9
+    RefinementCase<dim>::cut_z
+  }; // lines 10 and 11
+
+  return ref_cases[line_no/2];
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<3>::standard_to_real_face_vertex(const unsigned int vertex,
+                                              const bool face_orientation,
+                                              const bool face_flip,
+                                              const bool face_rotation)
+{
+  Assert(vertex<GeometryInfo<3>::vertices_per_face,
+         ExcIndexRange(vertex,0,GeometryInfo<3>::vertices_per_face));
+
+  // set up a table to make sure that
+  // we handle non-standard faces correctly
+  //
+  // so set up a table that for each vertex (of
+  // a quad in standard position) describes
+  // which vertex to take
+  //
+  // first index: four vertices 0...3
+  //
+  // second index: face_orientation; 0:
+  // opposite normal, 1: standard
+  //
+  // third index: face_flip; 0: standard, 1:
+  // face rotated by 180 degrees
+  //
+  // forth index: face_rotation: 0: standard,
+  // 1: face rotated by 90 degrees
+
+  static const unsigned int vertex_translation[4][2][2][2] =
+  {
+    { { { 0, 2 },  // vertex 0, face_orientation=false, face_flip=false, face_rotation=false and true
+        { 3, 1 }
+      }, // vertex 0, face_orientation=false, face_flip=true, face_rotation=false and true
+      { { 0, 2 },  // vertex 0, face_orientation=true, face_flip=false, face_rotation=false and true
+        { 3, 1 }
+      }
+    },// vertex 0, face_orientation=true, face_flip=true, face_rotation=false and true
+
+    { { { 2, 3 },  // vertex 1 ...
+        { 1, 0 }
+      },
+      { { 1, 0 },
+        { 2, 3 }
+      }
+    },
+
+    { { { 1, 0 },  // vertex 2 ...
+        { 2, 3 }
+      },
+      { { 2, 3 },
+        { 1, 0 }
+      }
+    },
+
+    { { { 3, 1 },  // vertex 3 ...
+        { 0, 2 }
+      },
+      { { 3, 1 },
+        { 0, 2 }
+      }
+    }
+  };
+
+  return vertex_translation[vertex][face_orientation][face_flip][face_rotation];
+}
+
+
+
+template <int dim>
+unsigned int
+GeometryInfo<dim>::standard_to_real_face_vertex(const unsigned int vertex,
+                                                const bool,
+                                                const bool,
+                                                const bool)
+{
+  Assert(dim>1, ExcImpossibleInDim(dim));
+  Assert(vertex<GeometryInfo<dim>::vertices_per_face,
+         ExcIndexRange(vertex,0,GeometryInfo<dim>::vertices_per_face));
+  return vertex;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<3>::real_to_standard_face_vertex(const unsigned int vertex,
+                                              const bool face_orientation,
+                                              const bool face_flip,
+                                              const bool face_rotation)
+{
+  Assert(vertex<GeometryInfo<3>::vertices_per_face,
+         ExcIndexRange(vertex,0,GeometryInfo<3>::vertices_per_face));
+
+  // set up a table to make sure that
+  // we handle non-standard faces correctly
+  //
+  // so set up a table that for each vertex (of
+  // a quad in standard position) describes
+  // which vertex to take
+  //
+  // first index: four vertices 0...3
+  //
+  // second index: face_orientation; 0:
+  // opposite normal, 1: standard
+  //
+  // third index: face_flip; 0: standard, 1:
+  // face rotated by 180 degrees
+  //
+  // forth index: face_rotation: 0: standard,
+  // 1: face rotated by 90 degrees
+
+  static const unsigned int vertex_translation[4][2][2][2] =
+  {
+    { { { 0, 2 },  // vertex 0, face_orientation=false, face_flip=false, face_rotation=false and true
+        { 3, 1 }
+      }, // vertex 0, face_orientation=false, face_flip=true, face_rotation=false and true
+      { { 0, 1 },  // vertex 0, face_orientation=true, face_flip=false, face_rotation=false and true
+        { 3, 2 }
+      }
+    },// vertex 0, face_orientation=true, face_flip=true, face_rotation=false and true
+
+    { { { 2, 3 },  // vertex 1 ...
+        { 1, 0 }
+      },
+      { { 1, 3 },
+        { 2, 0 }
+      }
+    },
+
+    { { { 1, 0 },  // vertex 2 ...
+        { 2, 3 }
+      },
+      { { 2, 0 },
+        { 1, 3 }
+      }
+    },
+
+    { { { 3, 1 },  // vertex 3 ...
+        { 0, 2 }
+      },
+      { { 3, 2 },
+        { 0, 1 }
+      }
+    }
+  };
+
+  return vertex_translation[vertex][face_orientation][face_flip][face_rotation];
+}
+
+
+
+template <int dim>
+unsigned int
+GeometryInfo<dim>::real_to_standard_face_vertex(const unsigned int vertex,
+                                                const bool,
+                                                const bool,
+                                                const bool)
+{
+  Assert(dim>1, ExcImpossibleInDim(dim));
+  Assert(vertex<GeometryInfo<dim>::vertices_per_face,
+         ExcIndexRange(vertex,0,GeometryInfo<dim>::vertices_per_face));
+  return vertex;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<3>::standard_to_real_face_line(const unsigned int line,
+                                            const bool face_orientation,
+                                            const bool face_flip,
+                                            const bool face_rotation)
+{
+  Assert(line<GeometryInfo<3>::lines_per_face,
+         ExcIndexRange(line,0,GeometryInfo<3>::lines_per_face));
+
+
+  // make sure we handle
+  // non-standard faces correctly
+  //
+  // so set up a table that for each line (of a
+  // quad) describes which line to take
+  //
+  // first index: four lines 0...3
+  //
+  // second index: face_orientation; 0:
+  // opposite normal, 1: standard
+  //
+  // third index: face_flip; 0: standard, 1:
+  // face rotated by 180 degrees
+  //
+  // forth index: face_rotation: 0: standard,
+  // 1: face rotated by 90 degrees
+
+  static const unsigned int line_translation[4][2][2][2] =
+  {
+    { { { 2, 0 },  // line 0, face_orientation=false, face_flip=false, face_rotation=false and true
+        { 3, 1 }
+      }, // line 0, face_orientation=false, face_flip=true, face_rotation=false and true
+      { { 0, 3 },  // line 0, face_orientation=true, face_flip=false, face_rotation=false and true
+        { 1, 2 }
+      }
+    },// line 0, face_orientation=true, face_flip=true, face_rotation=false and true
+
+    { { { 3, 1 },  // line 1 ...
+        { 2, 0 }
+      },
+      { { 1, 2 },
+        { 0, 3 }
+      }
+    },
+
+    { { { 0, 3 },  // line 2 ...
+        { 1, 2 }
+      },
+      { { 2, 0 },
+        { 3, 1 }
+      }
+    },
+
+    { { { 1, 2 },  // line 3 ...
+        { 0, 3 }
+      },
+      { { 3, 1 },
+        { 2, 0 }
+      }
+    }
+  };
+
+  return line_translation[line][face_orientation][face_flip][face_rotation];
+}
+
+
+
+template <int dim>
+unsigned int
+GeometryInfo<dim>::standard_to_real_face_line(const unsigned int line,
+                                              const bool,
+                                              const bool,
+                                              const bool)
+{
+  Assert(false, ExcNotImplemented());
+  return line;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<3>::real_to_standard_face_line(const unsigned int line,
+                                            const bool face_orientation,
+                                            const bool face_flip,
+                                            const bool face_rotation)
+{
+  Assert(line<GeometryInfo<3>::lines_per_face,
+         ExcIndexRange(line,0,GeometryInfo<3>::lines_per_face));
+
+
+  // make sure we handle
+  // non-standard faces correctly
+  //
+  // so set up a table that for each line (of a
+  // quad) describes which line to take
+  //
+  // first index: four lines 0...3
+  //
+  // second index: face_orientation; 0:
+  // opposite normal, 1: standard
+  //
+  // third index: face_flip; 0: standard, 1:
+  // face rotated by 180 degrees
+  //
+  // forth index: face_rotation: 0: standard,
+  // 1: face rotated by 90 degrees
+
+  static const unsigned int line_translation[4][2][2][2] =
+  {
+    { { { 2, 0 },  // line 0, face_orientation=false, face_flip=false, face_rotation=false and true
+        { 3, 1 }
+      }, // line 0, face_orientation=false, face_flip=true, face_rotation=false and true
+      { { 0, 2 },  // line 0, face_orientation=true, face_flip=false, face_rotation=false and true
+        { 1, 3 }
+      }
+    },// line 0, face_orientation=true, face_flip=true, face_rotation=false and true
+
+    { { { 3, 1 },  // line 1 ...
+        { 2, 0 }
+      },
+      { { 1, 3 },
+        { 0, 2 }
+      }
+    },
+
+    { { { 0, 3 },  // line 2 ...
+        { 1, 2 }
+      },
+      { { 2, 1 },
+        { 3, 0 }
+      }
+    },
+
+    { { { 1, 2 },  // line 3 ...
+        { 0, 3 }
+      },
+      { { 3, 0 },
+        { 2, 1 }
+      }
+    }
+  };
+
+  return line_translation[line][face_orientation][face_flip][face_rotation];
+}
+
+
+
+template <int dim>
+unsigned int
+GeometryInfo<dim>::real_to_standard_face_line(const unsigned int line,
+                                              const bool,
+                                              const bool,
+                                              const bool)
+{
+  Assert(false, ExcNotImplemented());
+  return line;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<1>::child_cell_on_face (const RefinementCase<1> &,
+                                     const unsigned int face,
+                                     const unsigned int subface,
+                                     const bool, const bool, const bool,
+                                     const RefinementCase<0> &)
+{
+  (void)subface;
+  Assert (face<faces_per_cell, ExcIndexRange(face, 0, faces_per_cell));
+  Assert (subface<max_children_per_face,
+          ExcIndexRange(subface, 0, max_children_per_face));
+
+  return face;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<2>::child_cell_on_face (const RefinementCase<2> &ref_case,
+                                     const unsigned int face,
+                                     const unsigned int subface,
+                                     const bool /*face_orientation*/,
+                                     const bool face_flip,
+                                     const bool /*face_rotation*/,
+                                     const RefinementCase<1> &)
+{
+  Assert (face<faces_per_cell, ExcIndexRange(face, 0, faces_per_cell));
+  Assert (subface<max_children_per_face,
+          ExcIndexRange(subface, 0, max_children_per_face));
+
+  // always return the child adjacent to the specified
+  // subface. if the face of a cell is not refined, don't
+  // throw an assertion but deliver the child adjacent to
+  // the face nevertheless, i.e. deliver the child of
+  // this cell adjacent to the subface of a possibly
+  // refined neighbor. this simplifies setting neighbor
+  // information in execute_refinement.
+  static const unsigned int
+  subcells[2][RefinementCase<2>::isotropic_refinement][faces_per_cell][max_children_per_face] =
+  {
+    {
+      // Normal orientation (face_filp = false)
+      {{0,0},{1,1},{0,1},{0,1}},          // cut_x
+      {{0,1},{0,1},{0,0},{1,1}},          // cut_y
+      {{0,2},{1,3},{0,1},{2,3}}           // cut_z
+    },
+    {
+      // Flipped orientation (face_flip = true)
+      {{0,0},{1,1},{1,0},{1,0}},          // cut_x
+      {{1,0},{1,0},{0,0},{1,1}},          // cut_y
+      {{2,0},{3,1},{1,0},{3,2}}           // cut_z
+    }
+  };
+
+  return subcells[face_flip][ref_case-1][face][subface];
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<3>::child_cell_on_face (const RefinementCase<3> &ref_case,
+                                     const unsigned int face,
+                                     const unsigned int subface,
+                                     const bool face_orientation,
+                                     const bool face_flip,
+                                     const bool face_rotation,
+                                     const RefinementCase<2> &face_ref_case)
+{
+  const unsigned int dim = 3;
+
+  Assert (ref_case>RefinementCase<dim-1>::no_refinement, ExcMessage("Cell has no children."));
+  Assert (face<faces_per_cell, ExcIndexRange(face, 0, faces_per_cell));
+  Assert (subface<GeometryInfo<dim-1>::n_children(face_ref_case) ||
+          (subface==0 && face_ref_case==RefinementCase<dim-1>::no_refinement),
+          ExcIndexRange(subface, 0, GeometryInfo<2>::n_children(face_ref_case)));
+
+  // invalid number used for invalid cases,
+  // e.g. when the children are more refined at
+  // a given face than the face itself
+  static const unsigned int e=invalid_unsigned_int;
+
+  // the whole process of finding a child cell
+  // at a given subface considering the
+  // possibly anisotropic refinement cases of
+  // the cell and the face as well as
+  // orientation, flip and rotation of the face
+  // is quite complicated. thus, we break it
+  // down into several steps.
+
+  // first step: convert the given face refine
+  // case to a face refine case concerning the
+  // face in standard orientation (, flip and
+  // rotation). This only affects cut_x and
+  // cut_y
+  static const RefinementCase<dim-1> flip[4]=
+  {
+    RefinementCase<dim-1>::no_refinement,
+    RefinementCase<dim-1>::cut_y,
+    RefinementCase<dim-1>::cut_x,
+    RefinementCase<dim-1>::cut_xy
+  };
+  // for face_orientation, 'true' is the
+  // default value whereas for face_rotation,
+  // 'false' is standard. If
+  // <tt>face_rotation==face_orientation</tt>,
+  // then one of them is non-standard and we
+  // have to swap cut_x and cut_y, otherwise no
+  // change is necessary.
+  const RefinementCase<dim-1> std_face_ref = (face_orientation==face_rotation) ? flip[face_ref_case] : face_ref_case;
+
+  // second step: convert the given subface
+  // index to the one for a standard face
+  // respecting face_orientation, face_flip and
+  // face_rotation
+
+  // first index:  face_ref_case
+  // second index: face_orientation
+  // third index:  face_flip
+  // forth index:  face_rotation
+  // fifth index:  subface index
+  static const unsigned int subface_exchange[4][2][2][2][4]=
+  {
+    // no_refinement (subface 0 stays 0,
+    // all others are invalid)
+    { { { {0,e,e,e},
+          {0,e,e,e}
+        },
+        { {0,e,e,e},
+          {0,e,e,e}
+        }
+      },
+      { { {0,e,e,e},
+          {0,e,e,e}
+        },
+        { {0,e,e,e},
+          {0,e,e,e}
+        }
+      }
+    },
+    // cut_x (here, if the face is only
+    // rotated OR only falsely oriented,
+    // then subface 0 of the non-standard
+    // face does NOT correspond to one of
+    // the subfaces of a standard
+    // face. Thus we indicate the subface
+    // which is located at the lower left
+    // corner (the origin of the face's
+    // local coordinate system) with
+    // '0'. The rest of this issue is
+    // taken care of using the above
+    // conversion to a 'standard face
+    // refine case')
+    { { { {0,1,e,e},
+          {0,1,e,e}
+        },
+        { {1,0,e,e},
+          {1,0,e,e}
+        }
+      },
+      { { {0,1,e,e},
+          {0,1,e,e}
+        },
+        { {1,0,e,e},
+          {1,0,e,e}
+        }
+      }
+    },
+    // cut_y (the same applies as for
+    // cut_x)
+    { { { {0,1,e,e},
+          {1,0,e,e}
+        },
+        { {1,0,e,e},
+          {0,1,e,e}
+        }
+      },
+      { { {0,1,e,e},
+          {1,0,e,e}
+        },
+        { {1,0,e,e},
+          {0,1,e,e}
+        }
+      }
+    },
+    // cut_xyz: this information is
+    // identical to the information
+    // returned by
+    // GeometryInfo<3>::real_to_standard_face_vertex()
+    { { { {0,2,1,3},    // face_orientation=false, face_flip=false, face_rotation=false, subfaces 0,1,2,3
+          {2,3,0,1}
+        },   // face_orientation=false, face_flip=false, face_rotation=true,  subfaces 0,1,2,3
+        { {3,1,2,0},    // face_orientation=false, face_flip=true,  face_rotation=false, subfaces 0,1,2,3
+          {1,0,3,2}
+        }
+      },  // face_orientation=false, face_flip=true,  face_rotation=true,  subfaces 0,1,2,3
+      { { {0,1,2,3},    // face_orientation=true,  face_flip=false, face_rotation=false, subfaces 0,1,2,3
+          {1,3,0,2}
+        },   // face_orientation=true,  face_flip=false, face_rotation=true,  subfaces 0,1,2,3
+        { {3,2,1,0},    // face_orientation=true,  face_flip=true,  face_rotation=false, subfaces 0,1,2,3
+          {2,0,3,1}
+        }
+      }
+    }
+  };// face_orientation=true,  face_flip=true,  face_rotation=true,  subfaces 0,1,2,3
+
+  const unsigned int std_subface=subface_exchange
+                                 [face_ref_case]
+                                 [face_orientation]
+                                 [face_flip]
+                                 [face_rotation]
+                                 [subface];
+  Assert (std_subface!=e, ExcInternalError());
+
+  // third step: these are the children, which
+  // can be found at the given subfaces of an
+  // isotropically refined (standard) face
+  //
+  // first index:  (refinement_case-1)
+  // second index: face_index
+  // third index:  subface_index (isotropic refinement)
+  static const unsigned int
+  iso_children[RefinementCase<dim>::cut_xyz][faces_per_cell][max_children_per_face] =
+  {
+    // cut_x
+    { {0, 0, 0, 0},  // face 0, subfaces 0,1,2,3
+      {1, 1, 1, 1},  // face 1, subfaces 0,1,2,3
+      {0, 0, 1, 1},  // face 2, subfaces 0,1,2,3
+      {0, 0, 1, 1},  // face 3, subfaces 0,1,2,3
+      {0, 1, 0, 1},  // face 4, subfaces 0,1,2,3
+      {0, 1, 0, 1}
+    }, // face 5, subfaces 0,1,2,3
+    // cut_y
+    { {0, 1, 0, 1},
+      {0, 1, 0, 1},
+      {0, 0, 0, 0},
+      {1, 1, 1, 1},
+      {0, 0, 1, 1},
+      {0, 0, 1, 1}
+    },
+    // cut_xy
+    { {0, 2, 0, 2},
+      {1, 3, 1, 3},
+      {0, 0, 1, 1},
+      {2, 2, 3, 3},
+      {0, 1, 2, 3},
+      {0, 1, 2, 3}
+    },
+    // cut_z
+    { {0, 0, 1, 1},
+      {0, 0, 1, 1},
+      {0, 1, 0, 1},
+      {0, 1, 0, 1},
+      {0, 0, 0, 0},
+      {1, 1, 1, 1}
+    },
+    // cut_xz
+    { {0, 0, 1, 1},
+      {2, 2, 3, 3},
+      {0, 1, 2, 3},
+      {0, 1, 2, 3},
+      {0, 2, 0, 2},
+      {1, 3, 1, 3}
+    },
+    // cut_yz
+    { {0, 1, 2, 3},
+      {0, 1, 2, 3},
+      {0, 2, 0, 2},
+      {1, 3, 1, 3},
+      {0, 0, 1, 1},
+      {2, 2, 3, 3}
+    },
+    // cut_xyz
+    { {0, 2, 4, 6},
+      {1, 3, 5, 7},
+      {0, 4, 1, 5},
+      {2, 6, 3, 7},
+      {0, 1, 2, 3},
+      {4, 5, 6, 7}
+    }
+  };
+
+  // forth step: check, whether the given face
+  // refine case is valid for the given cell
+  // refine case. this is the case, if the
+  // given face refine case is at least as
+  // refined as the face is for the given cell
+  // refine case
+
+  // note, that we are considering standard
+  // face refinement cases here and thus must
+  // not pass the given orientation, flip and
+  // rotation flags
+  if ((std_face_ref & face_refinement_case(ref_case, face))
+      == face_refinement_case(ref_case, face))
+    {
+      // all is fine. for anisotropic face
+      // refine cases, select one of the
+      // isotropic subfaces which neighbors the
+      // same child
+
+      // first index: (standard) face refine case
+      // second index: subface index
+      static const unsigned int equivalent_iso_subface[4][4]=
+      {
+        {0,e,e,e},                    // no_refinement
+        {0,3,e,e},                    // cut_x
+        {0,3,e,e},                    // cut_y
+        {0,1,2,3}
+      };                   // cut_xy
+
+      const unsigned int equ_std_subface
+        =equivalent_iso_subface[std_face_ref][std_subface];
+      Assert (equ_std_subface!=e, ExcInternalError());
+
+      return iso_children[ref_case-1][face][equ_std_subface];
+    }
+  else
+    {
+      // the face_ref_case was too coarse,
+      // throw an error
+      Assert(false,
+             ExcMessage("The face RefineCase is too coarse "
+                        "for the given cell RefineCase."));
+    }
+  // we only get here in case of an error
+  return e;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<4>::child_cell_on_face (const RefinementCase<4> &,
+                                     const unsigned int,
+                                     const unsigned int,
+                                     const bool, const bool, const bool,
+                                     const RefinementCase<3> &)
+{
+  Assert(false, ExcNotImplemented());
+  return invalid_unsigned_int;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<1>::line_to_cell_vertices (const unsigned int line,
+                                        const unsigned int vertex)
+{
+  (void)line;
+  Assert (line<lines_per_cell, ExcIndexRange(line, 0, lines_per_cell));
+  Assert (vertex<2, ExcIndexRange(vertex, 0, 2));
+
+  return vertex;
+}
+
+
+template <>
+unsigned int
+GeometryInfo<2>::line_to_cell_vertices (const unsigned int line,
+                                        const unsigned int vertex)
+{
+  return child_cell_on_face(RefinementCase<2>::isotropic_refinement, line, vertex);
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<3>::line_to_cell_vertices (const unsigned int line,
+                                        const unsigned int vertex)
+{
+  Assert (line<lines_per_cell, ExcIndexRange(line, 0, lines_per_cell));
+  Assert (vertex<2, ExcIndexRange(vertex, 0, 2));
+
+  static const unsigned
+  vertices[lines_per_cell][2] = {{0, 2},  // bottom face
+    {1, 3},
+    {0, 1},
+    {2, 3},
+    {4, 6},  // top face
+    {5, 7},
+    {4, 5},
+    {6, 7},
+    {0, 4},  // connects of bottom
+    {1, 5},  //   top face
+    {2, 6},
+    {3, 7}
+  };
+  return vertices[line][vertex];
+}
+
+
+template <>
+unsigned int
+GeometryInfo<4>::line_to_cell_vertices (const unsigned int,
+                                        const unsigned int)
+{
+  Assert(false, ExcNotImplemented());
+  return invalid_unsigned_int;
+}
+
+
+template <>
+unsigned int
+GeometryInfo<1>::face_to_cell_lines (const unsigned int face,
+                                     const unsigned int line,
+                                     const bool, const bool, const bool)
+{
+  (void)face;
+  (void)line;
+  Assert (face+1<faces_per_cell+1, ExcIndexRange(face, 0, faces_per_cell));
+  Assert (line+1<lines_per_face+1, ExcIndexRange(line, 0, lines_per_face));
+
+  // There is only a single line, so
+  // it must be this.
+  return 0;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<2>::face_to_cell_lines (const unsigned int face,
+                                     const unsigned int line,
+                                     const bool, const bool, const bool)
+{
+  (void)line;
+  Assert (face<faces_per_cell, ExcIndexRange(face, 0, faces_per_cell));
+  Assert (line<lines_per_face, ExcIndexRange(line, 0, lines_per_face));
+
+  // The face is a line itself.
+  return face;
+}
+
+
+
+template <>
+unsigned int
+GeometryInfo<3>::face_to_cell_lines (const unsigned int face,
+                                     const unsigned int line,
+                                     const bool face_orientation,
+                                     const bool face_flip,
+                                     const bool face_rotation)
+{
+  Assert (face<faces_per_cell, ExcIndexRange(face, 0, faces_per_cell));
+  Assert (line<lines_per_face, ExcIndexRange(line, 0, lines_per_face));
+
+  static const unsigned
+  lines[faces_per_cell][lines_per_face] = {{8,10, 0, 4}, // left face
+    {9,11, 1, 5}, // right face
+    {2, 6, 8, 9}, // front face
+    {3, 7,10,11}, // back face
+    {0, 1, 2, 3}, // bottom face
+    {4, 5, 6, 7}
+  };// top face
+  return lines[face][real_to_standard_face_line(line,
+                                                face_orientation,
+                                                face_flip,
+                                                face_rotation)];
+}
+
+
+
+template<int dim>
+unsigned int
+GeometryInfo<dim>::face_to_cell_lines (const unsigned int,
+                                       const unsigned int,
+                                       const bool, const bool, const bool)
+{
+  Assert(false, ExcNotImplemented());
+  return invalid_unsigned_int;
+}
+
+
+
+template <int dim>
+unsigned int
+GeometryInfo<dim>::face_to_cell_vertices (const unsigned int face,
+                                          const unsigned int vertex,
+                                          const bool face_orientation,
+                                          const bool face_flip,
+                                          const bool face_rotation)
+{
+  return child_cell_on_face(RefinementCase<dim>::isotropic_refinement, face, vertex,
+                            face_orientation, face_flip, face_rotation);
+}
+
+
+
+template <int dim>
+Point<dim>
+GeometryInfo<dim>::project_to_unit_cell (const Point<dim> &q)
+{
+  Point<dim> p = q;
+  for (unsigned int i=0; i<dim; i++)
+    if      (p[i] < 0.)  p[i] = 0.;
+    else if (p[i] > 1.)  p[i] = 1.;
+
+  return p;
+}
+
+
+
+template <int dim>
+double
+GeometryInfo<dim>::distance_to_unit_cell (const Point<dim> &p)
+{
+  double result = 0.0;
+
+  for (unsigned int i=0; i<dim; i++)
+    if ((-p[i]) > result)
+      result = -p[i];
+    else if ((p[i]-1.) > result)
+      result = (p[i] - 1.);
+
+  return result;
+}
+
+
+
+template <int dim>
+double
+GeometryInfo<dim>::
+d_linear_shape_function (const Point<dim> &xi,
+                         const unsigned int i)
+{
+  Assert (i < GeometryInfo<dim>::vertices_per_cell,
+          ExcIndexRange (i, 0, GeometryInfo<dim>::vertices_per_cell));
+
+  switch (dim)
+    {
+    case 1:
+    {
+      const double x = xi[0];
+      switch (i)
+        {
+        case 0:
+          return 1-x;
+        case 1:
+          return x;
+        }
+    }
+
+    case 2:
+    {
+      const double x = xi[0];
+      const double y = xi[1];
+      switch (i)
+        {
+        case 0:
+          return (1-x)*(1-y);
+        case 1:
+          return x*(1-y);
+        case 2:
+          return (1-x)*y;
+        case 3:
+          return x*y;
+        }
+    }
+
+    case 3:
+    {
+      const double x = xi[0];
+      const double y = xi[1];
+      const double z = xi[2];
+      switch (i)
+        {
+        case 0:
+          return (1-x)*(1-y)*(1-z);
+        case 1:
+          return x*(1-y)*(1-z);
+        case 2:
+          return (1-x)*y*(1-z);
+        case 3:
+          return x*y*(1-z);
+        case 4:
+          return (1-x)*(1-y)*z;
+        case 5:
+          return x*(1-y)*z;
+        case 6:
+          return (1-x)*y*z;
+        case 7:
+          return x*y*z;
+        }
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return -1e9;
+}
+
+
+
+template <>
+Tensor<1,1>
+GeometryInfo<1>::
+d_linear_shape_function_gradient (const Point<1> &,
+                                  const unsigned int i)
+{
+  Assert (i < GeometryInfo<1>::vertices_per_cell,
+          ExcIndexRange (i, 0, GeometryInfo<1>::vertices_per_cell));
+
+  switch (i)
+    {
+    case 0:
+      return Point<1>(-1.);
+    case 1:
+      return Point<1>(1.);
+    }
+
+  return Point<1>(-1e9);
+}
+
+
+
+template <>
+Tensor<1,2>
+GeometryInfo<2>::
+d_linear_shape_function_gradient (const Point<2> &xi,
+                                  const unsigned int i)
+{
+  Assert (i < GeometryInfo<2>::vertices_per_cell,
+          ExcIndexRange (i, 0, GeometryInfo<2>::vertices_per_cell));
+
+  const double x = xi[0];
+  const double y = xi[1];
+  switch (i)
+    {
+    case 0:
+      return Point<2>(-(1-y),-(1-x));
+    case 1:
+      return Point<2>(1-y,-x);
+    case 2:
+      return Point<2>(-y, 1-x);
+    case 3:
+      return Point<2>(y,x);
+    }
+  return Point<2> (-1e9, -1e9);
+}
+
+
+
+template <>
+Tensor<1,3>
+GeometryInfo<3>::
+d_linear_shape_function_gradient (const Point<3> &xi,
+                                  const unsigned int i)
+{
+  Assert (i < GeometryInfo<3>::vertices_per_cell,
+          ExcIndexRange (i, 0, GeometryInfo<3>::vertices_per_cell));
+
+  const double x = xi[0];
+  const double y = xi[1];
+  const double z = xi[2];
+  switch (i)
+    {
+    case 0:
+      return Point<3>(-(1-y)*(1-z),
+                      -(1-x)*(1-z),
+                      -(1-x)*(1-y));
+    case 1:
+      return Point<3>((1-y)*(1-z),
+                      -x*(1-z),
+                      -x*(1-y));
+    case 2:
+      return Point<3>(-y*(1-z),
+                      (1-x)*(1-z),
+                      -(1-x)*y);
+    case 3:
+      return Point<3>(y*(1-z),
+                      x*(1-z),
+                      -x*y);
+    case 4:
+      return Point<3>(-(1-y)*z,
+                      -(1-x)*z,
+                      (1-x)*(1-y));
+    case 5:
+      return Point<3>((1-y)*z,
+                      -x*z,
+                      x*(1-y));
+    case 6:
+      return Point<3>(-y*z,
+                      (1-x)*z,
+                      (1-x)*y);
+    case 7:
+      return Point<3>(y*z, x*z, x*y);
+    }
+
+  return Point<3> (-1e9, -1e9, -1e9);
+}
+
+
+
+template <int dim>
+Tensor<1,dim>
+GeometryInfo<dim>::
+d_linear_shape_function_gradient (const Point<dim> &,
+                                  const unsigned int)
+{
+  Assert (false, ExcNotImplemented());
+  return Tensor<1,dim>();
+}
+
+
+
+
+
+namespace internal
+{
+  namespace GeometryInfo
+  {
+    // wedge product of a single
+    // vector in 2d: we just have to
+    // rotate it by 90 degrees to the
+    // right
+    inline
+    Tensor<1,2>
+    wedge_product (const Tensor<1,2> (&derivative)[1])
+    {
+      Tensor<1,2> result;
+      result[0] = derivative[0][1];
+      result[1] = -derivative[0][0];
+
+      return result;
+    }
+
+
+    // wedge product of 2 vectors in
+    // 3d is the cross product
+    inline
+    Tensor<1,3>
+    wedge_product (const Tensor<1,3> (&derivative)[2])
+    {
+      return cross_product_3d (derivative[0], derivative[1]);
+    }
+
+
+    // wedge product of dim vectors
+    // in dim-d: that's the
+    // determinant of the matrix
+    template <int dim>
+    inline
+    Tensor<0,dim>
+    wedge_product (const Tensor<1,dim> (&derivative)[dim])
+    {
+      Tensor<2,dim> jacobian;
+      for (unsigned int i=0; i<dim; ++i)
+        jacobian[i] = derivative[i];
+
+      return determinant (jacobian);
+    }
+  }
+}
+
+
+template <int dim>
+template <int spacedim>
+void
+GeometryInfo<dim>::
+alternating_form_at_vertices
+#ifndef DEAL_II_CONSTEXPR_BUG
+(const Point<spacedim> (&vertices)[vertices_per_cell],
+ Tensor<spacedim-dim,spacedim> (&forms)[vertices_per_cell])
+#else
+(const Point<spacedim> *vertices,
+ Tensor<spacedim-dim,spacedim> *forms)
+#endif
+{
+  // for each of the vertices,
+  // compute the alternating form
+  // of the mapped unit
+  // vectors. consider for
+  // example the case of a quad
+  // in spacedim==3: to do so, we
+  // need to see how the
+  // infinitesimal vectors
+  // (d\xi_1,0) and (0,d\xi_2)
+  // are transformed into
+  // spacedim-dimensional space
+  // and then form their cross
+  // product (i.e. the wedge product
+  // of two vectors). to this end, note
+  // that
+  //    \vec x = sum_i \vec v_i phi_i(\vec xi)
+  // so the transformed vectors are
+  //   [x(\xi+(d\xi_1,0))-x(\xi)]/d\xi_1
+  // and
+  //   [x(\xi+(0,d\xi_2))-x(\xi)]/d\xi_2
+  // which boils down to the columns
+  // of the 3x2 matrix \grad_\xi x(\xi)
+  //
+  // a similar reasoning would
+  // hold for all dim,spacedim
+  // pairs -- we only have to
+  // compute the wedge product of
+  // the columns of the
+  // derivatives
+  for (unsigned int i=0; i<vertices_per_cell; ++i)
+    {
+      Tensor<1,spacedim> derivatives[dim];
+
+      for (unsigned int j=0; j<vertices_per_cell; ++j)
+        {
+          const Tensor<1,dim> grad_phi_j
+            = d_linear_shape_function_gradient (unit_cell_vertex(i),
+                                                j);
+          for (unsigned int l=0; l<dim; ++l)
+            derivatives[l] += vertices[j] * grad_phi_j[l];
+        }
+
+      forms[i] = internal::GeometryInfo::wedge_product (derivatives);
+    }
+}
+
+
+template struct GeometryInfo<1>;
+template struct GeometryInfo<2>;
+template struct GeometryInfo<3>;
+template struct GeometryInfo<4>;
+
+template
+void
+GeometryInfo<1>::
+alternating_form_at_vertices
+#ifndef DEAL_II_CONSTEXPR_BUG
+(const Point<1> (&)[vertices_per_cell],
+ Tensor<1-1,1> (&)[vertices_per_cell])
+#else
+(const Point<1> *, Tensor<1-1,1> *)
+#endif
+;
+
+template
+void
+GeometryInfo<1>::
+alternating_form_at_vertices
+#ifndef DEAL_II_CONSTEXPR_BUG
+(const Point<2> (&)[vertices_per_cell],
+ Tensor<2-1,2> (&)[vertices_per_cell])
+#else
+(const Point<2> *, Tensor<2-1,2> *)
+#endif
+;
+
+template
+void
+GeometryInfo<2>::
+alternating_form_at_vertices
+#ifndef DEAL_II_CONSTEXPR_BUG
+(const Point<2> (&vertices)[vertices_per_cell],
+ Tensor<2-2,2> (&forms)[vertices_per_cell])
+#else
+(const Point<2> *, Tensor<2-2,2> *)
+#endif
+;
+
+template
+void
+GeometryInfo<2>::
+alternating_form_at_vertices
+#ifndef DEAL_II_CONSTEXPR_BUG
+(const Point<3> (&vertices)[vertices_per_cell],
+ Tensor<3-2,3> (&forms)[vertices_per_cell])
+#else
+(const Point<3> *, Tensor<3-2,3> *)
+#endif
+;
+
+
+template
+void
+GeometryInfo<3>::
+alternating_form_at_vertices
+#ifndef DEAL_II_CONSTEXPR_BUG
+(const Point<3> (&vertices)[vertices_per_cell],
+ Tensor<3-3,3> (&forms)[vertices_per_cell])
+#else
+(const Point<3> *, Tensor<3-3,3> *)
+#endif
+;
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/index_set.cc b/source/base/index_set.cc
new file mode 100644
index 0000000..3cc5606
--- /dev/null
+++ b/source/base/index_set.cc
@@ -0,0 +1,558 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/index_set.h>
+#include <list>
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  ifdef DEAL_II_WITH_MPI
+#    include <Epetra_MpiComm.h>
+#  endif
+#  include <Epetra_SerialComm.h>
+#  include <Epetra_Map.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+// the 64-bit path uses a few different names, so put that into a separate
+// implementation
+
+#ifdef DEAL_II_WITH_64BIT_INDICES
+
+IndexSet::IndexSet (const Epetra_Map &map)
+  :
+  is_compressed (true),
+  index_space_size (map.NumGlobalElements64()),
+  largest_range (numbers::invalid_unsigned_int)
+{
+  // For a contiguous map, we do not need to go through the whole data...
+  if (map.LinearMap())
+    add_range(size_type(map.MinMyGID64()), size_type(map.MaxMyGID64()+1));
+  else
+    {
+      const size_type n_indices = map.NumMyElements();
+      size_type *indices = (size_type *)map.MyGlobalElements64();
+      add_indices(indices, indices+n_indices);
+    }
+  compress();
+}
+
+#else
+
+// this is the standard 32-bit implementation
+
+IndexSet::IndexSet (const Epetra_Map &map)
+  :
+  is_compressed (true),
+  index_space_size (map.NumGlobalElements()),
+  largest_range (numbers::invalid_unsigned_int)
+{
+  // For a contiguous map, we do not need to go through the whole data...
+  if (map.LinearMap())
+    add_range(size_type(map.MinMyGID()), size_type(map.MaxMyGID()+1));
+  else
+    {
+      const size_type n_indices = map.NumMyElements();
+      unsigned int *indices = (unsigned int *)map.MyGlobalElements();
+      add_indices(indices, indices+n_indices);
+    }
+  compress();
+}
+
+#endif
+
+#endif // ifdef DEAL_II_WITH_TRILINOS
+
+
+
+void
+IndexSet::add_range (const size_type begin,
+                     const size_type end)
+{
+  Assert ((begin < index_space_size)
+          ||
+          ((begin == index_space_size) && (end == index_space_size)),
+          ExcIndexRangeType<size_type> (begin, 0, index_space_size));
+  Assert (end <= index_space_size,
+          ExcIndexRangeType<size_type> (end, 0, index_space_size+1));
+  Assert (begin <= end,
+          ExcIndexRangeType<size_type> (begin, 0, end));
+
+  if (begin != end)
+    {
+      const Range new_range(begin,end);
+
+      // the new index might be larger than the last index present in the
+      // ranges. Then we can skip the binary search
+      if (ranges.size() == 0 || begin > ranges.back().end)
+        ranges.push_back(new_range);
+      else
+        ranges.insert (Utilities::lower_bound (ranges.begin(),
+                                               ranges.end(),
+                                               new_range),
+                       new_range);
+      is_compressed = false;
+    }
+}
+
+
+
+void
+IndexSet::do_compress () const
+{
+  // see if any of the contiguous ranges can be merged. do not use
+  // std::vector::erase in-place as it is quadratic in the number of
+  // ranges. since the ranges are sorted by their first index, determining
+  // overlap isn't all that hard
+  std::vector<Range>::iterator store = ranges.begin();
+  for (std::vector<Range>::iterator i = ranges.begin();
+       i != ranges.end(); )
+    {
+      std::vector<Range>::iterator
+      next = i;
+      ++next;
+
+      size_type first_index = i->begin;
+      size_type last_index  = i->end;
+
+      // see if we can merge any of the following ranges
+      while (next != ranges.end() &&
+             (next->begin <= last_index))
+        {
+          last_index = std::max (last_index, next->end);
+          ++next;
+        }
+      i = next;
+
+      // store the new range in the slot we last occupied
+      *store = Range(first_index, last_index);
+      ++store;
+    }
+  // use a compact array with exactly the right amount of storage
+  if (store != ranges.end())
+    {
+      std::vector<Range> new_ranges(ranges.begin(), store);
+      ranges.swap(new_ranges);
+    }
+
+  // now compute indices within set and the range with most elements
+  size_type next_index = 0, largest_range_size = 0;
+  for (std::vector<Range>::iterator i = ranges.begin();  i != ranges.end();
+       ++i)
+    {
+      Assert(i->begin < i->end, ExcInternalError());
+
+      i->nth_index_in_set = next_index;
+      next_index += (i->end - i->begin);
+      if (i->end - i->begin > largest_range_size)
+        {
+          largest_range_size = i->end - i->begin;
+          largest_range = i - ranges.begin();
+        }
+    }
+  is_compressed = true;
+
+  // check that next_index is correct. needs to be after the previous
+  // statement because we otherwise will get into an endless loop
+  Assert (next_index == n_elements(), ExcInternalError());
+}
+
+
+
+IndexSet
+IndexSet::operator & (const IndexSet &is) const
+{
+  Assert (size() == is.size(),
+          ExcDimensionMismatch (size(), is.size()));
+
+  compress ();
+  is.compress ();
+
+  std::vector<Range>::const_iterator r1 = ranges.begin(),
+                                     r2 = is.ranges.begin();
+  IndexSet result (size());
+
+  while ((r1 != ranges.end())
+         &&
+         (r2 != is.ranges.end()))
+    {
+      // if r1 and r2 do not overlap at all, then move the pointer that sits
+      // to the left of the other up by one
+      if (r1->end <= r2->begin)
+        ++r1;
+      else if (r2->end <= r1->begin)
+        ++r2;
+      else
+        {
+          // the ranges must overlap somehow
+          Assert (((r1->begin <= r2->begin) &&
+                   (r1->end > r2->begin))
+                  ||
+                  ((r2->begin <= r1->begin) &&
+                   (r2->end > r1->begin)),
+                  ExcInternalError());
+
+          // add the overlapping range to the result
+          result.add_range (std::max (r1->begin,
+                                      r2->begin),
+                            std::min (r1->end,
+                                      r2->end));
+
+          // now move that iterator that ends earlier one up. note that it has
+          // to be this one because a subsequent range may still have a chance
+          // of overlapping with the range that ends later
+          if (r1->end <= r2->end)
+            ++r1;
+          else
+            ++r2;
+        }
+    }
+
+  result.compress ();
+  return result;
+}
+
+
+
+IndexSet
+IndexSet::get_view (const size_type begin,
+                    const size_type end) const
+{
+  Assert (begin <= end,
+          ExcMessage ("End index needs to be larger or equal to begin index!"));
+  Assert (end <= size(),
+          ExcMessage ("Given range exceeds index set dimension"));
+
+  IndexSet result (end-begin);
+  std::vector<Range>::const_iterator r1 = ranges.begin();
+
+  while (r1 != ranges.end())
+    {
+      if ((r1->end > begin)
+          &&
+          (r1->begin < end))
+        {
+          result.add_range (std::max(r1->begin, begin)-begin,
+                            std::min(r1->end, end)-begin);
+
+        }
+      else if (r1->begin >= end)
+        break;
+
+      ++r1;
+    }
+
+  result.compress();
+  return result;
+}
+
+
+
+void
+IndexSet::subtract_set (const IndexSet &other)
+{
+  compress();
+  other.compress();
+  is_compressed = false;
+
+
+  // we save new ranges to be added to our IndexSet in an temporary list and
+  // add all of them in one go at the end. This is necessary because a growing
+  // ranges vector invalidates iterators.
+  std::list<Range> temp_list;
+
+  std::vector<Range>::iterator own_it = ranges.begin();
+  std::vector<Range>::iterator other_it = other.ranges.begin();
+
+  while (own_it != ranges.end() && other_it != other.ranges.end())
+    {
+      //advance own iterator until we get an overlap
+      if (own_it->end <= other_it->begin)
+        {
+          ++own_it;
+          continue;
+        }
+      //we are done with other_it, so advance
+      if (own_it->begin >= other_it->end)
+        {
+          ++other_it;
+          continue;
+        }
+
+      //Now own_it and other_it overlap.  First save the part of own_it that
+      //is before other_it (if not empty).
+      if (own_it->begin < other_it->begin)
+        {
+          Range r(own_it->begin, other_it->begin);
+          r.nth_index_in_set = 0; //fix warning of unused variable
+          temp_list.push_back(r);
+        }
+      // change own_it to the sub range behind other_it. Do not delete own_it
+      // in any case. As removal would invalidate iterators, we just shrink
+      // the range to an empty one.
+      own_it->begin = other_it->end;
+      if (own_it->begin > own_it->end)
+        {
+          own_it->begin = own_it->end;
+          ++own_it;
+        }
+
+      // continue without advancing iterators, the right one will be advanced
+      // next.
+    }
+
+  // Now delete all empty ranges we might
+  // have created.
+  for (std::vector<Range>::iterator it = ranges.begin();
+       it != ranges.end(); )
+    {
+      if (it->begin >= it->end)
+        it = ranges.erase(it);
+      else
+        ++it;
+    }
+
+  // done, now add the temporary ranges
+  for (std::list<Range>::iterator it = temp_list.begin();
+       it != temp_list.end();
+       ++it)
+    add_range(it->begin, it->end);
+
+  compress();
+}
+
+
+
+void
+IndexSet::add_indices(const IndexSet &other,
+                      const unsigned int offset)
+{
+  if ((this == &other) && (offset == 0))
+    return;
+
+  compress();
+  other.compress();
+
+  std::vector<Range>::const_iterator r1 = ranges.begin(),
+                                     r2 = other.ranges.begin();
+
+  std::vector<Range> new_ranges;
+  // just get the start and end of the ranges right in this method, everything
+  // else will be done in compress()
+  while (r1 != ranges.end() || r2 != other.ranges.end())
+    {
+      // the two ranges do not overlap or we are at the end of one of the
+      // ranges
+      if (r2 == other.ranges.end() ||
+          (r1 != ranges.end() && r1->end < (r2->begin+offset)))
+        {
+          new_ranges.push_back(*r1);
+          ++r1;
+        }
+      else if (r1 == ranges.end() || (r2->end+offset) < r1->begin)
+        {
+          new_ranges.push_back(Range(r2->begin+offset,r2->end+offset));
+          ++r2;
+        }
+      else
+        {
+          // ok, we do overlap, so just take the combination of the current
+          // range (do not bother to merge with subsequent ranges)
+          Range next(std::min(r1->begin, r2->begin+offset),
+                     std::max(r1->end, r2->end+offset));
+          new_ranges.push_back(next);
+          ++r1;
+          ++r2;
+        }
+    }
+  ranges.swap(new_ranges);
+
+  is_compressed = false;
+  compress();
+}
+
+
+
+void
+IndexSet::write(std::ostream &out) const
+{
+  compress();
+  out << size() << " ";
+  out << ranges.size() << std::endl;
+  std::vector<Range>::const_iterator r = ranges.begin();
+  for ( ; r!=ranges.end(); ++r)
+    {
+      out << r->begin << " " << r->end << std::endl;
+    }
+}
+
+
+
+void
+IndexSet::read(std::istream &in)
+{
+  size_type s;
+  unsigned int numranges;
+
+  in >> s >> numranges;
+  ranges.clear();
+  set_size(s);
+  for (unsigned int i=0; i<numranges; ++i)
+    {
+      size_type b, e;
+      in >> b >> e;
+      add_range(b,e);
+    }
+}
+
+
+void
+IndexSet::block_write(std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+  out.write(reinterpret_cast<const char *>(&index_space_size),
+            sizeof(index_space_size));
+  size_t n_ranges = ranges.size();
+  out.write(reinterpret_cast<const char *>(&n_ranges),
+            sizeof(n_ranges));
+  if (ranges.empty() == false)
+    out.write (reinterpret_cast<const char *>(&*ranges.begin()),
+               ranges.size() * sizeof(Range));
+  AssertThrow (out, ExcIO());
+}
+
+void
+IndexSet::block_read(std::istream &in)
+{
+  size_type size;
+  size_t n_ranges;
+  in.read(reinterpret_cast<char *>(&size), sizeof(size));
+  in.read(reinterpret_cast<char *>(&n_ranges), sizeof(n_ranges));
+  // we have to clear ranges first
+  ranges.clear();
+  set_size(size);
+  ranges.resize(n_ranges, Range(0,0));
+  if (n_ranges)
+    in.read(reinterpret_cast<char *>(&*ranges.begin()),
+            ranges.size() * sizeof(Range));
+
+  do_compress(); // needed so that largest_range can be recomputed
+}
+
+
+
+void IndexSet::fill_index_vector(std::vector<size_type> &indices) const
+{
+  compress();
+
+  indices.clear();
+  indices.reserve(n_elements());
+
+  for (std::vector<Range>::iterator it = ranges.begin();
+       it != ranges.end();
+       ++it)
+    for (size_type i=it->begin; i<it->end; ++i)
+      indices.push_back (i);
+
+  Assert (indices.size() == n_elements(), ExcInternalError());
+}
+
+
+
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+Epetra_Map
+IndexSet::make_trilinos_map (const MPI_Comm &communicator,
+                             const bool overlapping) const
+{
+  compress ();
+
+#ifdef DEBUG
+  if (!overlapping)
+    {
+      const size_type n_global_elements
+        = Utilities::MPI::sum (n_elements(), communicator);
+      Assert (n_global_elements == size(),
+              ExcMessage ("You are trying to create an Epetra_Map object "
+                          "that partitions elements of an index set "
+                          "between processors. However, the union of the "
+                          "index sets on different processors does not "
+                          "contain all indices exactly once: the sum of "
+                          "the number of entries the various processors "
+                          "want to store locally is "
+                          + Utilities::to_string (n_global_elements) +
+                          " whereas the total size of the object to be "
+                          "allocated is "
+                          + Utilities::to_string (size()) +
+                          ". In other words, there are "
+                          "either indices that are not spoken for "
+                          "by any processor, or there are indices that are "
+                          "claimed by multiple processors."));
+    }
+#endif
+
+  if ((is_contiguous() == true) && (!overlapping))
+    return Epetra_Map (TrilinosWrappers::types::int_type(size()),
+                       TrilinosWrappers::types::int_type(n_elements()),
+                       0,
+#ifdef DEAL_II_WITH_MPI
+                       Epetra_MpiComm(communicator));
+#else
+                       Epetra_SerialComm());
+#endif
+  else
+    {
+      std::vector<size_type> indices;
+      fill_index_vector(indices);
+
+      return Epetra_Map (TrilinosWrappers::types::int_type(-1),
+                         TrilinosWrappers::types::int_type(n_elements()),
+                         (n_elements() > 0
+                          ?
+                          reinterpret_cast<TrilinosWrappers::types::int_type *>(&indices[0])
+                          :
+                          0),
+                         0,
+#ifdef DEAL_II_WITH_MPI
+                         Epetra_MpiComm(communicator));
+#else
+                         Epetra_SerialComm());
+      (void)communicator;
+#endif
+    }
+}
+
+
+#endif
+
+
+std::size_t
+IndexSet::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (ranges) +
+          MemoryConsumption::memory_consumption (is_compressed) +
+          MemoryConsumption::memory_consumption (index_space_size));
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/job_identifier.cc b/source/base/job_identifier.cc
new file mode 100644
index 0000000..320bfc2
--- /dev/null
+++ b/source/base/job_identifier.cc
@@ -0,0 +1,71 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/job_identifier.h>
+#include <ctime>
+
+#ifdef DEAL_II_HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#ifdef DEAM_II_MSVC
+#  include <WinSock2.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+JobIdentifier dealjobid;
+
+
+JobIdentifier::JobIdentifier()
+{
+  time_t t = std::time(0);
+  id = std::string("JobId ");
+
+#if defined(DEAL_II_HAVE_UNISTD_H) && defined(DEAL_II_HAVE_GETHOSTNAME)
+  char name[100];
+  gethostname(name,99);
+  id += std::string(name) + std::string(" ");
+#else
+  id += std::string("unknown ");
+#endif
+
+  id += std::string(std::ctime(&t));
+}
+
+
+const std::string
+JobIdentifier::operator ()() const
+{
+  return id;
+}
+
+
+std::string
+JobIdentifier::base_name(const char *filename)
+{
+  std::string name(filename);
+  std::string::size_type pos = name.find(".");
+  name.erase(pos, name.size());
+  pos = name.rfind("/");
+  if (pos < name.size())
+    name.erase(0,pos);
+  return name;
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/logstream.cc b/source/base/logstream.cc
new file mode 100644
index 0000000..eb99ccc
--- /dev/null
+++ b/source/base/logstream.cc
@@ -0,0 +1,528 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/job_identifier.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/thread_management.h>
+
+#ifdef DEAL_II_HAVE_SYS_RESOURCE_H
+#  include <sys/resource.h>
+#endif
+
+#ifdef DEAL_II_HAVE_UNISTD_H
+#  include <unistd.h>
+#endif
+
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <sstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace
+{
+  Threads::Mutex log_lock;
+  Threads::Mutex write_lock;
+}
+
+
+// The standard log object of deal.II:
+LogStream deallog;
+
+
+LogStream::LogStream()
+  :
+  std_out(&std::cerr),
+  file(0),
+  std_depth(0),
+  file_depth(10000),
+  print_utime(false),
+  diff_utime(false),
+  last_time (0.),
+  double_threshold(0.),
+  float_threshold(0.),
+  offset(0),
+  old_cerr(0),
+  at_newline(true)
+{
+  get_prefixes().push("DEAL:");
+
+#if defined(DEAL_II_HAVE_UNISTD_H) && defined(DEAL_II_HAVE_TIMES)
+  reference_time_val = 1./sysconf(_SC_CLK_TCK) * times(&reference_tms);
+#endif
+
+}
+
+
+LogStream::~LogStream()
+{
+  // if there was anything left in the stream that is current to this
+  // thread, make sure we flush it before it gets lost
+  {
+    if (get_stream().str().length() > 0)
+      {
+        // except the situation is not quite that simple. if this object is
+        // the 'deallog' object, then it is destroyed upon exit of the
+        // program. since it's defined in a shared library that depends on
+        // libstdc++.so, destruction happens before destruction of
+        // std::cout/cerr, but after all file variables defined in user
+        // programs have been destroyed. in other words, if we get here and
+        // the object being destroyed is 'deallog' and if 'deallog' is
+        // associated with a file stream, then we're in trouble: we'll try
+        // to write to a file that doesn't exist any more, and we're likely
+        // going to crash (this is tested by base/log_crash_01). rather
+        // than letting it come to this, print a message to the screen
+        // (note that we can't issue an assertion here either since Assert
+        // may want to write to 'deallog' itself, and AssertThrow will
+        // throw an exception that can't be caught)
+        if ((this == &deallog) && file)
+          std::cerr << ("You still have content that was written to 'deallog' "
+                        "but not flushed to the screen or a file while the "
+                        "program is being terminated. This would lead to a "
+                        "segmentation fault. Make sure you flush the "
+                        "content of the 'deallog' object using 'std::endl' "
+                        "before the end of the program.")
+                    << std::endl;
+        else
+          *this << std::endl;
+      }
+  }
+
+  if (old_cerr)
+    std::cerr.rdbuf(old_cerr);
+}
+
+
+void
+LogStream::test_mode(bool on)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  if (on)
+    {
+      double_threshold = 1.e-10;
+      float_threshold = 1.e-7f;
+      offset = 1.e-7;
+    }
+  else
+    {
+      double_threshold = 0.;
+      float_threshold = 0.;
+      offset = 0.;
+    }
+}
+
+
+LogStream &
+LogStream::operator<< (std::ostream& (*p) (std::ostream &))
+{
+
+  std::ostringstream &stream = get_stream();
+
+  // Print to the internal stringstream:
+  stream << p;
+
+
+  // This is a bloody hack until LogStream got reimplemented as a proper
+  // child of std::streambuf (or similar).
+  //
+  // The problem is that at this point we would like to know whether an
+  // std::flush or std::endl has called us, however, there is no way to
+  // detect this in a sane manner.
+  //
+  // The obvious idea to compare function pointers,
+  //   std::ostream & (* const p_flush) (std::ostream &) = &std::flush;
+  //   p == p_flush ? ...,
+  // is wrong as there doesn't has to be a _single_ std::flush instance...
+  // there could be multiple of it. And in fact, LLVM's libc++ implements
+  // std::flush and std::endl in a way that every shared library and
+  // executable has its local copy... fun...
+  //
+  // - Maier, 2013
+
+  class QueryStreambuf : public std::streambuf
+  {
+    // Implement a minimalistic stream buffer that only stores the fact
+    // whether overflow or sync was called
+  public:
+    QueryStreambuf()
+      : flushed_(false), newline_written_(false)
+    {
+    }
+    bool flushed()
+    {
+      return flushed_;
+    }
+    bool newline_written()
+    {
+      return newline_written_;
+    }
+  private:
+    int_type overflow(int_type ch)
+    {
+      newline_written_ = true;
+      return ch;
+    }
+    int sync()
+    {
+      flushed_ = true;
+      return 0;
+    }
+    bool flushed_;
+    bool newline_written_;
+  } query_streambuf;
+
+  {
+    // and initialize an ostream with this streambuf:
+    std::ostream inject (&query_streambuf);
+    inject << p;
+  }
+
+  if (query_streambuf.flushed())
+    {
+      Threads::Mutex::ScopedLock lock(write_lock);
+
+      // Print the line head in case of a previous newline:
+      if (at_newline)
+        print_line_head();
+
+      at_newline = query_streambuf.newline_written();
+
+      if (get_prefixes().size() <= std_depth)
+        *std_out << stream.str();
+
+      if (file && (get_prefixes().size() <= file_depth))
+        *file << stream.str() << std::flush;
+
+      // Start a new string:
+      stream.str("");
+    }
+
+  return *this;
+}
+
+
+void
+LogStream::attach(std::ostream &o,
+                  const bool    print_job_id)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  file = &o;
+  o.setf(std::ios::showpoint | std::ios::left);
+  if (print_job_id)
+    o << dealjobid();
+}
+
+
+void LogStream::detach ()
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  file = 0;
+}
+
+
+void LogStream::log_cerr ()
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  if (old_cerr == 0)
+    {
+      old_cerr = std::cerr.rdbuf(file->rdbuf());
+    }
+  else
+    {
+      std::cerr.rdbuf(old_cerr);
+      old_cerr = 0;
+    }
+}
+
+
+std::ostream &
+LogStream::get_console()
+{
+  return *std_out;
+}
+
+
+std::ostream &
+LogStream::get_file_stream()
+{
+  Assert(file, ExcNoFileStreamGiven());
+  return *file;
+}
+
+
+bool
+LogStream::has_file() const
+{
+  return (file != 0);
+}
+
+
+const std::string &
+LogStream::get_prefix() const
+{
+  static std::string empty_string;
+
+  if (get_prefixes().size() > 0)
+    return get_prefixes().top();
+  else
+    return empty_string;
+}
+
+
+void
+LogStream::push (const std::string &text)
+{
+  std::string pre;
+  if (get_prefixes().size() > 0)
+    pre = get_prefixes().top();
+
+  pre += text;
+  pre += std::string(":");
+  get_prefixes().push(pre);
+}
+
+
+void LogStream::pop ()
+{
+  if (get_prefixes().size() > 0)
+    get_prefixes().pop();
+}
+
+
+std::ios::fmtflags
+LogStream::flags(const std::ios::fmtflags f)
+{
+  return get_stream().flags (f);
+}
+
+
+std::streamsize
+LogStream::precision (const std::streamsize prec)
+{
+  return get_stream().precision (prec);
+}
+
+
+std::streamsize
+LogStream::width (const std::streamsize wide)
+{
+  return get_stream().width (wide);
+}
+
+
+unsigned int
+LogStream::depth_console (const unsigned int n)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  const unsigned int h = std_depth;
+  std_depth = n;
+  return h;
+}
+
+
+unsigned int
+LogStream::depth_file (const unsigned int n)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  const unsigned int h = file_depth;
+  file_depth = n;
+  return h;
+}
+
+
+void
+LogStream::threshold_double (const double t)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  double_threshold = t;
+}
+
+
+void
+LogStream::threshold_float (const float t)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  float_threshold = t;
+}
+
+
+bool
+LogStream::log_execution_time (const bool flag)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  const bool h = print_utime;
+  print_utime = flag;
+  return h;
+}
+
+
+bool
+LogStream::log_time_differences (const bool flag)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  const bool h = diff_utime;
+  diff_utime = flag;
+  return h;
+}
+
+
+bool
+LogStream::log_thread_id (const bool flag)
+{
+  Threads::Mutex::ScopedLock lock(log_lock);
+  const bool h = print_thread_id;
+  print_thread_id = flag;
+  return h;
+}
+
+std::stack<std::string> &
+LogStream::get_prefixes() const
+{
+#ifdef DEAL_II_WITH_THREADS
+  bool exists = false;
+  std::stack<std::string> &local_prefixes = prefixes.get(exists);
+
+  // If this is a new locally stored stack, copy the "blessed" prefixes
+  // from the initial thread that created logstream.
+  if (! exists)
+    {
+      const tbb::enumerable_thread_specific<std::stack<std::string> > &impl
+        = prefixes.get_implementation();
+
+      // The thread that created this LogStream object should be the first
+      // in tbb's enumerable_thread_specific container.
+      const tbb::enumerable_thread_specific<std::stack<std::string> >::const_iterator first_elem
+        = impl.begin();
+
+      if (first_elem != impl.end())
+        {
+          local_prefixes = *first_elem;
+        }
+    }
+
+  return local_prefixes;
+
+#else
+  return prefixes.get();
+#endif
+}
+
+
+void
+LogStream::print_line_head()
+{
+#ifdef DEAL_II_HAVE_SYS_RESOURCE_H
+  rusage usage;
+  double utime = 0.;
+  if (print_utime)
+    {
+      getrusage(RUSAGE_SELF, &usage);
+      utime = usage.ru_utime.tv_sec + 1.e-6 * usage.ru_utime.tv_usec;
+      if (diff_utime)
+        {
+          double diff = utime - last_time;
+          last_time = utime;
+          utime = diff;
+        }
+    }
+#else
+//TODO[BG]: Do something useful here
+  double utime = 0.;
+#endif
+
+  const std::string &head = get_prefix();
+  const unsigned int thread = Threads::this_thread_id();
+
+  if (get_prefixes().size() <= std_depth)
+    {
+      if (print_utime)
+        {
+          int p = std_out->width(5);
+          *std_out << utime << ':';
+          std_out->width(p);
+        }
+      if (print_thread_id)
+        *std_out << '[' << thread << ']';
+
+      if (head.size() > 0)
+        *std_out <<  head << ':';
+    }
+
+  if (file && (get_prefixes().size() <= file_depth))
+    {
+      if (print_utime)
+        {
+          int p = file->width(6);
+          *file << utime << ':';
+          file->width(p);
+        }
+      if (print_thread_id)
+        *file << '[' << thread << ']';
+
+      if (head.size() > 0)
+        *file << head << ':';
+    }
+}
+
+
+void
+LogStream::timestamp ()
+{
+  struct tms current_tms;
+#if defined(DEAL_II_HAVE_UNISTD_H) && defined(DEAL_II_HAVE_TIMES)
+  const clock_t tick = sysconf(_SC_CLK_TCK);
+  const double time = 1./tick * times(&current_tms);
+#else
+  const double time = 0.;
+  const unsigned int tick = 100;
+  current_tms.tms_utime = 0;
+  current_tms.tms_stime = 0;
+  current_tms.tms_cutime = 0;
+  current_tms.tms_cstime = 0;
+#endif
+  (*this) << "Wall: " << time - reference_time_val
+          << " User: " << 1./tick * (current_tms.tms_utime - reference_tms.tms_utime)
+          << " System: " << 1./tick * (current_tms.tms_stime - reference_tms.tms_stime)
+          << " Child-User: " << 1./tick * (current_tms.tms_cutime - reference_tms.tms_cutime)
+          << " Child-System: " << 1./tick * (current_tms.tms_cstime - reference_tms.tms_cstime)
+          << std::endl;
+}
+
+
+std::size_t
+LogStream::memory_consumption () const
+{
+  // TODO
+  Assert(false, ExcNotImplemented());
+
+  std::size_t mem = sizeof(*this);
+  // to determine size of stack
+  // elements, we have to copy the
+  // stack since we can't access
+  // elements from further below
+//   std::stack<std::string> tmp = prefixes;
+//   while (tmp.empty() == false)
+//     {
+//       mem += MemoryConsumption::memory_consumption (tmp.top());
+//       tmp.pop ();
+//     }
+
+  return mem;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/mpi.cc b/source/base/mpi.cc
new file mode 100644
index 0000000..ea24b30
--- /dev/null
+++ b/source/base/mpi.cc
@@ -0,0 +1,549 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/mpi.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/base/multithread_info.h>
+
+#include <cstddef>
+#include <iostream>
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  ifdef DEAL_II_WITH_MPI
+#    include <Epetra_MpiComm.h>
+#    include <deal.II/lac/vector_memory.h>
+#    include <deal.II/lac/trilinos_vector.h>
+#    include <deal.II/lac/trilinos_block_vector.h>
+#  endif
+#endif
+
+#ifdef DEAL_II_WITH_PETSC
+#  ifdef DEAL_II_WITH_MPI
+#    include <petscsys.h>
+#    include <deal.II/lac/petsc_block_vector.h>
+#    include <deal.II/lac/petsc_parallel_block_vector.h>
+#    include <deal.II/lac/petsc_vector.h>
+#    include <deal.II/lac/petsc_parallel_vector.h>
+#  endif
+#endif
+
+#ifdef DEAL_II_WITH_SLEPC
+#  ifdef DEAL_II_WITH_MPI
+#    include <slepcsys.h>
+#  endif
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace Utilities
+{
+
+  namespace MPI
+  {
+#ifdef DEAL_II_WITH_MPI
+    // Unfortunately, we have to work
+    // around an oddity in the way PETSc
+    // and some gcc versions interact. If
+    // we use PETSc's MPI dummy
+    // implementation, it expands the
+    // calls to the two MPI functions
+    // basically as ``(n_jobs=1, 0)'',
+    // i.e. it assigns the number one to
+    // the variable holding the number of
+    // jobs, and then uses the comma
+    // operator to let the entire
+    // expression have the value zero. The
+    // latter is important, since
+    // ``MPI_Comm_size'' returns an error
+    // code that we may want to check (we
+    // don't here, but one could in
+    // principle), and the trick with the
+    // comma operator makes sure that both
+    // the number of jobs is correctly
+    // assigned, and the return value is
+    // zero. Unfortunately, if some recent
+    // versions of gcc detect that the
+    // comma expression just stands by
+    // itself, i.e. the result is not
+    // assigned to another variable, then
+    // they warn ``right-hand operand of
+    // comma has no effect''. This
+    // unwanted side effect can be
+    // suppressed by casting the result of
+    // the entire expression to type
+    // ``void'' -- not beautiful, but
+    // helps calming down unwarranted
+    // compiler warnings...
+    unsigned int n_mpi_processes (const MPI_Comm &mpi_communicator)
+    {
+      int n_jobs=1;
+      (void) MPI_Comm_size (mpi_communicator, &n_jobs);
+
+      return n_jobs;
+    }
+
+
+    unsigned int this_mpi_process (const MPI_Comm &mpi_communicator)
+    {
+      int rank=0;
+      (void) MPI_Comm_rank (mpi_communicator, &rank);
+
+      return rank;
+    }
+
+
+    MPI_Comm duplicate_communicator (const MPI_Comm &mpi_communicator)
+    {
+      MPI_Comm new_communicator;
+      MPI_Comm_dup (mpi_communicator, &new_communicator);
+      return new_communicator;
+    }
+
+
+    std::vector<unsigned int>
+    compute_point_to_point_communication_pattern (const MPI_Comm &mpi_comm,
+                                                  const std::vector<unsigned int> &destinations)
+    {
+      unsigned int myid = Utilities::MPI::this_mpi_process(mpi_comm);
+      unsigned int n_procs = Utilities::MPI::n_mpi_processes(mpi_comm);
+
+      for (unsigned int i=0; i<destinations.size(); ++i)
+        {
+          Assert (destinations[i] < n_procs,
+                  ExcIndexRange (destinations[i], 0, n_procs));
+          Assert (destinations[i] != myid,
+                  ExcMessage ("There is no point in communicating with ourselves."));
+        }
+
+
+      // let all processors
+      // communicate the maximal
+      // number of destinations they
+      // have
+      const unsigned int max_n_destinations
+        = Utilities::MPI::max (destinations.size(), mpi_comm);
+
+      if (max_n_destinations==0)
+        // all processes have nothing to send/receive:
+        return std::vector<unsigned int>();
+
+      // now that we know the number
+      // of data packets every
+      // processor wants to send, set
+      // up a buffer with the maximal
+      // size and copy our
+      // destinations in there,
+      // padded with -1's
+      std::vector<unsigned int> my_destinations(max_n_destinations,
+                                                numbers::invalid_unsigned_int);
+      std::copy (destinations.begin(), destinations.end(),
+                 my_destinations.begin());
+
+      // now exchange these (we could
+      // communicate less data if we
+      // used MPI_Allgatherv, but
+      // we'd have to communicate
+      // my_n_destinations to all
+      // processors in this case,
+      // which is more expensive than
+      // the reduction operation
+      // above in MPI_Allreduce)
+      std::vector<unsigned int> all_destinations (max_n_destinations * n_procs);
+      MPI_Allgather (&my_destinations[0], max_n_destinations, MPI_UNSIGNED,
+                     &all_destinations[0], max_n_destinations, MPI_UNSIGNED,
+                     mpi_comm);
+
+      // now we know who is going to
+      // communicate with
+      // whom. collect who is going
+      // to communicate with us!
+      std::vector<unsigned int> origins;
+      for (unsigned int i=0; i<n_procs; ++i)
+        for (unsigned int j=0; j<max_n_destinations; ++j)
+          if (all_destinations[i*max_n_destinations + j] == myid)
+            origins.push_back (i);
+          else if (all_destinations[i*max_n_destinations + j] ==
+                   numbers::invalid_unsigned_int)
+            break;
+
+      return origins;
+    }
+
+
+    namespace
+    {
+      // custom MIP_Op for calculate_collective_mpi_min_max_avg
+      void max_reduce ( const void *in_lhs_,
+                        void *inout_rhs_,
+                        int *len,
+                        MPI_Datatype *)
+      {
+        (void)len;
+        const MinMaxAvg *in_lhs = static_cast<const MinMaxAvg *>(in_lhs_);
+        MinMaxAvg *inout_rhs = static_cast<MinMaxAvg *>(inout_rhs_);
+
+        Assert(*len==1, ExcInternalError());
+
+        inout_rhs->sum += in_lhs->sum;
+        if (inout_rhs->min>in_lhs->min)
+          {
+            inout_rhs->min = in_lhs->min;
+            inout_rhs->min_index = in_lhs->min_index;
+          }
+        else if (inout_rhs->min == in_lhs->min)
+          {
+            // choose lower cpu index when tied to make operator commutative
+            if (inout_rhs->min_index > in_lhs->min_index)
+              inout_rhs->min_index = in_lhs->min_index;
+          }
+
+        if (inout_rhs->max < in_lhs->max)
+          {
+            inout_rhs->max = in_lhs->max;
+            inout_rhs->max_index = in_lhs->max_index;
+          }
+        else if (inout_rhs->max == in_lhs->max)
+          {
+            // choose lower cpu index when tied to make operator commutative
+            if (inout_rhs->max_index > in_lhs->max_index)
+              inout_rhs->max_index = in_lhs->max_index;
+          }
+      }
+    }
+
+
+
+    MinMaxAvg
+    min_max_avg(const double my_value,
+                const MPI_Comm &mpi_communicator)
+    {
+      // If MPI was not started, we have a serial computation and cannot run
+      // the other MPI commands
+      if (job_supports_mpi() == false)
+        {
+          MinMaxAvg result;
+          result.sum = my_value;
+          result.avg = my_value;
+          result.min = my_value;
+          result.max = my_value;
+          result.min_index = 0;
+          result.max_index = 0;
+
+          return result;
+        }
+
+      // To avoid uninitialized values on some MPI implementations, provide
+      // result with a default value already...
+      MinMaxAvg result = { 0., std::numeric_limits<double>::max(),
+                           -std::numeric_limits<double>::max(), 0, 0, 0.
+                         };
+
+      const unsigned int my_id
+        = dealii::Utilities::MPI::this_mpi_process(mpi_communicator);
+      const unsigned int numproc
+        = dealii::Utilities::MPI::n_mpi_processes(mpi_communicator);
+
+      MPI_Op op;
+      int ierr = MPI_Op_create((MPI_User_function *)&max_reduce, true, &op);
+      AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
+
+      MinMaxAvg in;
+      in.sum = in.min = in.max = my_value;
+      in.min_index = in.max_index = my_id;
+
+      MPI_Datatype type;
+      int lengths[]= {3,2};
+      MPI_Aint displacements[]= {0,offsetof(MinMaxAvg, min_index)};
+      MPI_Datatype types[]= {MPI_DOUBLE, MPI_INT};
+
+      ierr = MPI_Type_struct(2, lengths, displacements, types, &type);
+      AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
+
+      ierr = MPI_Type_commit(&type);
+      ierr = MPI_Allreduce (&in, &result, 1, type, op, mpi_communicator);
+      AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
+
+      ierr = MPI_Type_free (&type);
+      AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
+
+      ierr = MPI_Op_free(&op);
+      AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
+
+      result.avg = result.sum / numproc;
+
+      return result;
+    }
+
+#else
+
+    unsigned int n_mpi_processes (const MPI_Comm &)
+    {
+      return 1;
+    }
+
+
+
+    unsigned int this_mpi_process (const MPI_Comm &)
+    {
+      return 0;
+    }
+
+
+    MPI_Comm duplicate_communicator (const MPI_Comm &mpi_communicator)
+    {
+      return mpi_communicator;
+    }
+
+
+
+    MinMaxAvg
+    min_max_avg(const double my_value,
+                const MPI_Comm &)
+    {
+      MinMaxAvg result;
+
+      result.sum = my_value;
+      result.avg = my_value;
+      result.min = my_value;
+      result.max = my_value;
+      result.min_index = 0;
+      result.max_index = 0;
+
+      return result;
+    }
+
+#endif
+
+
+
+    MPI_InitFinalize::MPI_InitFinalize (int    &argc,
+                                        char ** &argv,
+                                        const unsigned int max_num_threads)
+    {
+      static bool constructor_has_already_run = false;
+      (void)constructor_has_already_run;
+      Assert (constructor_has_already_run == false,
+              ExcMessage ("You can only create a single object of this class "
+                          "in a program since it initializes the MPI system."));
+
+
+
+#ifdef DEAL_II_WITH_MPI
+      // if we have PETSc, we will initialize it and let it handle MPI.
+      // Otherwise, we will do it.
+      int MPI_has_been_started = 0;
+      MPI_Initialized(&MPI_has_been_started);
+      AssertThrow (MPI_has_been_started == 0,
+                   ExcMessage ("MPI error. You can only start MPI once!"));
+
+      int mpi_err, provided;
+      // this works like mpi_err = MPI_Init (&argc, &argv); but tells MPI that
+      // we might use several threads but never call two MPI functions at the
+      // same time. For an explanation see on why we do this see
+      // http://www.open-mpi.org/community/lists/users/2010/03/12244.php
+      int wanted = MPI_THREAD_SERIALIZED;
+      mpi_err = MPI_Init_thread(&argc, &argv, wanted, &provided);
+      AssertThrow (mpi_err == 0,
+                   ExcMessage ("MPI could not be initialized."));
+
+      // disable for now because at least some implementations always return MPI_THREAD_SINGLE.
+      //Assert(max_num_threads==1 || provided != MPI_THREAD_SINGLE,
+      //    ExcMessage("MPI reports that we are not allowed to use multiple threads."));
+#else
+      // make sure the compiler doesn't warn
+      // about these variables
+      (void)argc;
+      (void)argv;
+#endif
+
+      // we are allowed to call MPI_Init ourselves and PETScInitialize will
+      // detect this. This allows us to use MPI_Init_thread instead.
+#ifdef DEAL_II_WITH_PETSC
+#  ifdef DEAL_II_WITH_SLEPC
+      // Initialize SLEPc (with PETSc):
+      SlepcInitialize(&argc, &argv, PETSC_NULL, PETSC_NULL);
+#  else
+      // or just initialize PETSc alone:
+      PetscInitialize(&argc, &argv, PETSC_NULL, PETSC_NULL);
+#  endif
+#endif
+
+      constructor_has_already_run = true;
+
+
+      // Now also see how many threads we'd like to run
+      if (max_num_threads != numbers::invalid_unsigned_int)
+        {
+          // set maximum number of threads (also respecting the environment
+          // variable that the called function evaluates) based on what
+          // the user asked
+          MultithreadInfo::set_thread_limit(max_num_threads);
+        }
+      else
+        // user wants automatic choice
+        {
+#ifdef DEAL_II_WITH_MPI
+          // we need to figure out how many MPI processes there
+          // are on the current node, as well as how many CPU cores
+          // we have. for the first task, check what get_hostname()
+          // returns and then to an allgather so each processor
+          // gets the answer
+          //
+          // in calculating the length of the string, don't forget the
+          // terminating \0 on C-style strings
+          const std::string hostname = Utilities::System::get_hostname();
+          const unsigned int max_hostname_size = Utilities::MPI::max (hostname.size()+1,
+                                                                      MPI_COMM_WORLD);
+          std::vector<char> hostname_array (max_hostname_size);
+          std::copy (hostname.c_str(), hostname.c_str()+hostname.size()+1,
+                     hostname_array.begin());
+
+          std::vector<char> all_hostnames(max_hostname_size *
+                                          MPI::n_mpi_processes(MPI_COMM_WORLD));
+          MPI_Allgather (&hostname_array[0], max_hostname_size, MPI_CHAR,
+                         &all_hostnames[0], max_hostname_size, MPI_CHAR,
+                         MPI_COMM_WORLD);
+
+          // search how often our own hostname appears and the
+          // how-manyth instance the current process represents
+          unsigned int n_local_processes=0;
+          unsigned int nth_process_on_host = 0;
+          for (unsigned int i=0; i<MPI::n_mpi_processes(MPI_COMM_WORLD); ++i)
+            if (std::string (&all_hostnames[0] + i*max_hostname_size) == hostname)
+              {
+                ++n_local_processes;
+                if (i <= MPI::this_mpi_process (MPI_COMM_WORLD))
+                  ++nth_process_on_host;
+              }
+          Assert (nth_process_on_host > 0, ExcInternalError());
+
+
+          // compute how many cores each process gets. if the number does
+          // not divide evenly, then we get one more core if we are
+          // among the first few processes
+          //
+          // if the number would be zero, round up to one since every
+          // process needs to have at least one thread
+          const unsigned int n_threads
+            = std::max(MultithreadInfo::n_cores() / n_local_processes
+                       +
+                       (nth_process_on_host <= MultithreadInfo::n_cores() % n_local_processes
+                        ?
+                        1
+                        :
+                        0),
+                       1U);
+#else
+          const unsigned int n_threads = MultithreadInfo::n_cores();
+#endif
+
+          // finally set this number of threads
+          MultithreadInfo::set_thread_limit(n_threads);
+        }
+    }
+
+
+    MPI_InitFinalize::~MPI_InitFinalize()
+    {
+      // make memory pool release all PETSc/Trilinos/MPI-based vectors that are no
+      // longer used at this point. this is relevant because the
+      // static object destructors run for these vectors at the end of
+      // the program would run after MPI_Finalize is called, leading
+      // to errors
+
+#ifdef DEAL_II_WITH_MPI
+      // Start with the deal.II MPI vectors (need to do this before finalizing
+      // PETSc because it finalizes MPI).  Delete vectors from the pools:
+      GrowingVectorMemory<parallel::distributed::Vector<double> >
+      ::release_unused_memory ();
+      GrowingVectorMemory<parallel::distributed::BlockVector<double> >
+      ::release_unused_memory ();
+      GrowingVectorMemory<parallel::distributed::Vector<float> >
+      ::release_unused_memory ();
+      GrowingVectorMemory<parallel::distributed::BlockVector<float> >
+      ::release_unused_memory ();
+
+      // Next with Trilinos:
+#  if defined(DEAL_II_WITH_TRILINOS)
+      GrowingVectorMemory<TrilinosWrappers::MPI::Vector>
+      ::release_unused_memory ();
+      GrowingVectorMemory<TrilinosWrappers::MPI::BlockVector>
+      ::release_unused_memory ();
+#  endif
+#endif
+
+
+      // Now deal with PETSc (with or without MPI). Only delete the vectors if finalize hasn't
+      // been called yet, otherwise this will lead to errors.
+#ifdef DEAL_II_WITH_PETSC
+      if ((PetscInitializeCalled == PETSC_TRUE)
+          &&
+          (PetscFinalizeCalled == PETSC_FALSE))
+        {
+          GrowingVectorMemory<PETScWrappers::MPI::Vector>
+          ::release_unused_memory ();
+          GrowingVectorMemory<PETScWrappers::MPI::BlockVector>
+          ::release_unused_memory ();
+          GrowingVectorMemory<PETScWrappers::Vector>
+          ::release_unused_memory ();
+          GrowingVectorMemory<PETScWrappers::BlockVector>
+          ::release_unused_memory ();
+
+#  ifdef DEAL_II_WITH_SLEPC
+          // and now end SLEPc (with PETSc)
+          SlepcFinalize();
+#  else
+          // or just end PETSc.
+          PetscFinalize();
+#  endif
+        }
+#endif
+
+
+      // only MPI_Finalize if we are running with MPI. We also need to do this
+      // when running PETSc, because we initialize MPI ourselves before calling
+      // PetscInitialize
+#ifdef DEAL_II_WITH_MPI
+      if (job_supports_mpi() == true)
+        {
+          if (std::uncaught_exception())
+            {
+              std::cerr << "ERROR: Uncaught exception in MPI_InitFinalize on proc "
+                        << this_mpi_process(MPI_COMM_WORLD)
+                        << ". Skipping MPI_Finalize() to avoid a deadlock."
+                        << std::endl;
+            }
+          else
+            {
+              const int mpi_err = MPI_Finalize();
+              AssertThrow (mpi_err == 0,
+                           ExcMessage ("An error occurred while calling MPI_Finalize()"));
+            }
+        }
+#endif
+    }
+
+
+  } // end of namespace MPI
+
+} // end of namespace Utilities
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/multithread_info.cc b/source/base/multithread_info.cc
new file mode 100644
index 0000000..6e5d146
--- /dev/null
+++ b/source/base/multithread_info.cc
@@ -0,0 +1,222 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/base/utilities.h>
+
+#ifdef DEAL_II_HAVE_UNISTD_H
+#  include <unistd.h>
+#endif
+
+#if (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__)
+#  include <sys/types.h>
+#  include <sys/sysctl.h>
+#endif
+
+
+#ifdef DEAL_II_WITH_THREADS
+#  include <deal.II/base/thread_management.h>
+#  include <tbb/task_scheduler_init.h>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+#ifdef DEAL_II_WITH_THREADS
+
+/* Detecting how many processors a given machine has is something that
+   varies greatly between operating systems. For a few operating
+   systems, we have figured out how to do that below, but some others
+   are still missing. If you find a way to do this on your favorite
+   system, please let us know.
+ */
+
+
+#  if defined(__linux__) ||  defined(__sun__) || defined(__osf__) || defined(_AIX)
+
+unsigned int MultithreadInfo::get_n_cpus()
+{
+  return sysconf(_SC_NPROCESSORS_ONLN);
+}
+
+#  elif (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__)
+// This is only tested on a dual G5 2.5GHz running MacOSX 10.3.6
+// and on an Intel Mac Book Pro.
+// If it doesn't work please contact the mailinglist.
+unsigned int MultithreadInfo::get_n_cpus()
+{
+  int mib[2];
+  int n_cpus;
+  size_t len;
+
+  mib[0] = CTL_HW;
+  mib[1] = HW_NCPU;
+  len = sizeof(n_cpus);
+  sysctl(mib, 2, &n_cpus, &len, NULL, 0);
+
+  return n_cpus;
+}
+
+#  else
+
+// If you get n_cpus=1 although you are on a multi-processor machine,
+// then this may have two reasons: either because the system macros,
+// e.g.__linux__, __sgi__, etc. weren't defined by the compiler or the
+// detection of processors is really not implemented for your specific
+// system. In the first case you can add e.g. -D__sgi__ to your
+// compiling flags, in the latter case you need to implement the
+// get_n_cpus() function for your system.
+//
+// In both cases, this #else case is compiled, a fact that you can
+// easily verify by uncommenting the following #error directive,
+// recompiling and getting a compilation error right at that line.
+// After definition of the system macro or the implementation of the
+// new detection this #error message during compilation shouldn't
+// occur any more.
+//
+// Please send all new implementations of detection of processors to
+// the deal.II mailing list, such that it can be included into the
+// next deal.II release.
+
+//#error Detection of Processors not supported on this OS. Setting n_cpus=1 by default.
+
+unsigned int MultithreadInfo::get_n_cpus()
+{
+  return 1;
+}
+
+#  endif
+
+unsigned int MultithreadInfo::n_cores()
+{
+  return MultithreadInfo::n_cpus;
+}
+
+
+void MultithreadInfo::set_thread_limit(const unsigned int max_threads)
+{
+  // set the maximal number of threads to the given value as specified
+  n_max_threads = max_threads;
+
+  // then also see if something was given in the environment
+  {
+    const char *penv = getenv ("DEAL_II_NUM_THREADS");
+    if (penv!=NULL)
+      {
+        unsigned int max_threads_env = numbers::invalid_unsigned_int;
+        try
+          {
+            max_threads_env = Utilities::string_to_int(std::string(penv));
+          }
+        catch (...)
+          {
+            AssertThrow (false,
+                         ExcMessage (std::string("When specifying the <DEAL_II_NUM_THREADS> environment "
+                                                 "variable, it needs to be something that can be interpreted "
+                                                 "as an integer. The text you have in the environment "
+                                                 "variable is <") + penv + ">"));
+          }
+
+        AssertThrow (max_threads_env>0,
+                     ExcMessage ("When specifying the <DEAL_II_NUM_THREADS> environment "
+                                 "variable, it needs to be a positive number."));
+
+        if (n_max_threads != numbers::invalid_unsigned_int)
+          n_max_threads = std::min(n_max_threads, max_threads_env);
+        else
+          n_max_threads = max_threads_env;
+      }
+  }
+  // Without restrictions from the user query TBB for the recommended number
+  // of threads:
+  if (n_max_threads == numbers::invalid_unsigned_int)
+    n_max_threads = tbb::task_scheduler_init::default_num_threads();
+
+  // Initialize the scheduler and destroy the old one before doing so
+  static tbb::task_scheduler_init dummy (tbb::task_scheduler_init::deferred);
+  if (dummy.is_active())
+    dummy.terminate();
+  dummy.initialize(n_max_threads);
+}
+
+
+unsigned int MultithreadInfo::n_threads()
+{
+  Assert(n_max_threads != numbers::invalid_unsigned_int, ExcInternalError());
+  return n_max_threads;
+}
+
+
+#else                            // not in MT mode
+
+unsigned int MultithreadInfo::get_n_cpus()
+{
+  return 1;
+}
+
+unsigned int MultithreadInfo::n_cores()
+{
+  return 1;
+}
+
+unsigned int MultithreadInfo::n_threads()
+{
+  return 1;
+}
+
+void MultithreadInfo::set_thread_limit(const unsigned int)
+{
+}
+
+#endif
+
+
+bool MultithreadInfo::is_running_single_threaded()
+{
+  return n_threads() == 1;
+}
+
+
+MultithreadInfo::MultithreadInfo ()
+{}
+
+
+
+std::size_t
+MultithreadInfo::memory_consumption ()
+{
+  // only simple data elements, so
+  // use sizeof operator
+  return sizeof (MultithreadInfo);
+}
+
+
+const unsigned int MultithreadInfo::n_cpus = MultithreadInfo::get_n_cpus();
+unsigned int MultithreadInfo::n_max_threads = numbers::invalid_unsigned_int;
+
+namespace
+{
+// Force the first call to set_thread_limit happen before any tasks in TBB are
+// used. This is necessary as tbb::task_scheduler_init has no effect if TBB
+// got automatically initialized (which happens the first time we use it).
+  struct DoOnce
+  {
+    DoOnce ()
+    {
+      MultithreadInfo::set_thread_limit (numbers::invalid_unsigned_int);
+    }
+  } do_once;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/named_selection.cc b/source/base/named_selection.cc
new file mode 100644
index 0000000..9cc50e5
--- /dev/null
+++ b/source/base/named_selection.cc
@@ -0,0 +1,30 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/algorithms/named_selection.h>
+#include <deal.II/algorithms/any_data.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+void
+NamedSelection::initialize(const AnyData &data)
+{
+  indices.resize(names.size());
+  for (unsigned int i=0; i<names.size(); ++i)
+    indices[i] = data.find(names[i]);
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/base/parallel.cc b/source/base/parallel.cc
new file mode 100644
index 0000000..0ca6d65
--- /dev/null
+++ b/source/base/parallel.cc
@@ -0,0 +1,60 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/parallel.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace Vector
+  {
+    // set minimum grain size. this value is
+    // roughly in accordance with the curve
+    // in the TBB book (fig 3.2) that shows
+    // run time as a function of grain size
+    // -- there, values from 200 upward are
+    // so that the scheduling overhead
+    // amortizes well (for very large values
+    // in that example, the grain size is too
+    // large to split the work load into
+    // enough chunks and the problem becomes
+    // badly balanced)
+    unsigned int minimum_parallel_grain_size = 1024;
+  }
+
+
+  namespace SparseMatrix
+  {
+    // set this value to 1/4 of the value of
+    // the minimum grain size of
+    // vectors. this rests on the fact that
+    // we have to do a lot more work per row
+    // of a matrix than per element of a
+    // vector. it could possibly be reduced
+    // even further but that doesn't appear
+    // worth it any more for anything but
+    // very small matrices that we don't care
+    // that much about anyway.
+    unsigned int minimum_parallel_grain_size = 256;
+  }
+}
+
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/parameter_handler.cc b/source/base/parameter_handler.cc
new file mode 100644
index 0000000..dab4492
--- /dev/null
+++ b/source/base/parameter_handler.cc
@@ -0,0 +1,3054 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/path_search.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/utilities.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
+#include <boost/property_tree/json_parser.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#include <fstream>
+#include <iostream>
+#include <iomanip>
+#include <cstdlib>
+#include <algorithm>
+#include <list>
+#include <sstream>
+#include <cctype>
+#include <limits>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+//TODO[WB]: various functions here could be simplified by using namespace Utilities
+
+namespace Patterns
+{
+
+  namespace
+  {
+    /**
+     * Read to the end of the stream and
+     * return whether all there is is
+     * whitespace or whether there are other
+     * characters as well.
+     */
+    bool has_only_whitespace (std::istream &in)
+    {
+      while (in)
+        {
+          char c;
+
+          // skip if we've reached the end of
+          // the line
+          if (!(in >> c))
+            break;
+
+          if ((c != ' ') && (c != '\t'))
+            return false;
+        }
+      return true;
+    }
+  }
+
+
+
+  PatternBase *pattern_factory (const std::string &description)
+  {
+    PatternBase *p;
+
+    p = Integer::create(description);
+    if (p != 0)
+      return p;
+
+    p = Double::create(description);
+    if (p !=0 )
+      return p;
+
+    p = Selection::create(description);
+    if (p !=0 )
+      return p;
+
+    p = List::create(description);
+    if (p !=0 )
+      return p;
+
+    p = MultipleSelection::create(description);
+    if (p !=0 )
+      return p;
+
+    p = Bool::create(description);
+    if (p!=0 )
+      return p;
+
+    p = Anything::create(description);
+    if (p !=0 )
+      return p;
+
+    p = FileName::create(description);
+    if (p !=0 )
+      return p;
+
+    p = DirectoryName::create(description);
+    if (p!=0 )
+      return p;
+
+    Assert(false, ExcNotImplemented());
+
+    return 0;
+  }
+
+
+
+  PatternBase::~PatternBase ()
+  {}
+
+
+  std::size_t
+  PatternBase::memory_consumption () const
+  {
+    if (dynamic_cast<const Integer *>(this) != 0)
+      return sizeof(Integer);
+    else if (dynamic_cast<const Double *>(this) != 0)
+      return sizeof(Double);
+    else if (dynamic_cast<const Bool *>(this) != 0)
+      return sizeof(Bool);
+    else if (dynamic_cast<const Anything *>(this) != 0)
+      return sizeof(Anything);
+    else
+      return sizeof(*this) + 32;
+  }
+
+
+
+  const int Integer::min_int_value = std::numeric_limits<int>::min();
+  const int Integer::max_int_value = std::numeric_limits<int>::max();
+
+  const char *Integer::description_init = "[Integer";
+
+  Integer::Integer (const int lower_bound,
+                    const int upper_bound)
+    :
+    lower_bound (lower_bound),
+    upper_bound (upper_bound)
+  {}
+
+
+
+  bool Integer::match (const std::string &test_string) const
+  {
+    std::istringstream str(test_string);
+
+    int i;
+    if (!(str >> i))
+      return false;
+
+    if (!has_only_whitespace (str))
+      return false;
+    // check whether valid bounds
+    // were specified, and if so
+    // enforce their values
+    if (lower_bound <= upper_bound)
+      return ((lower_bound <= i) &&
+              (upper_bound >= i));
+    else
+      return true;
+  }
+
+
+
+  std::string Integer::description () const
+  {
+    // check whether valid bounds
+    // were specified, and if so
+    // output their values
+    if (lower_bound <= upper_bound)
+      {
+        std::ostringstream description;
+
+        description << description_init
+                    <<" range "
+                    << lower_bound << "..." << upper_bound
+                    << " (inclusive)]";
+        return description.str();
+      }
+    else
+      // if no bounds were given, then
+      // return generic string
+      return "[Integer]";
+  }
+
+
+
+  PatternBase *
+  Integer::clone () const
+  {
+    return new Integer(lower_bound, upper_bound);
+  }
+
+
+
+  Integer *Integer::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      {
+        std::istringstream is(description);
+
+        if (is.str().size() > strlen(description_init) + 1)
+          {
+//TODO: verify that description matches the pattern "^\[Integer range \d+\.\.\.\d+\]$"
+            int lower_bound, upper_bound;
+
+            is.ignore(strlen(description_init) + strlen(" range "));
+
+            if (!(is >> lower_bound))
+              return new Integer();
+
+            is.ignore(strlen("..."));
+
+            if (!(is >> upper_bound))
+              return new Integer();
+
+            return new Integer(lower_bound, upper_bound);
+          }
+        else
+          return new Integer();
+      }
+    else
+      return 0;
+  }
+
+
+
+  const double Double::min_double_value = -std::numeric_limits<double>::max();
+  const double Double::max_double_value = std::numeric_limits<double>::max();
+
+  const char *Double::description_init = "[Double";
+
+  Double::Double (const double lower_bound,
+                  const double upper_bound)
+    :
+    lower_bound (lower_bound),
+    upper_bound (upper_bound)
+  {}
+
+
+
+  bool Double::match (const std::string &test_string) const
+  {
+    std::istringstream str(test_string);
+
+    double d;
+    if (!(str >> d))
+      return false;
+
+    if (!has_only_whitespace (str))
+      return false;
+    // check whether valid bounds
+    // were specified, and if so
+    // enforce their values
+    if (lower_bound <= upper_bound)
+      return ((lower_bound <= d) &&
+              (upper_bound >= d));
+    else
+      return true;
+  }
+
+
+
+  std::string Double::description () const
+  {
+    std::ostringstream description;
+
+    // check whether valid bounds
+    // were specified, and if so
+    // output their values
+    if (lower_bound <= upper_bound)
+      {
+        description << description_init
+                    << " "
+                    << lower_bound << "..." << upper_bound
+                    << " (inclusive)]";
+        return description.str();
+      }
+    else
+      // if no bounds were given, then
+      // return generic string
+      {
+        description << description_init
+                    << "]";
+        return description.str();
+      }
+  }
+
+
+  PatternBase *
+  Double::clone () const
+  {
+    return new Double(lower_bound, upper_bound);
+  }
+
+
+
+  Double *Double::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      {
+        std::istringstream is(description);
+
+        if (is.str().size() > strlen(description_init) + 1)
+          {
+            double lower_bound, upper_bound;
+
+            is.ignore(strlen(description_init) + strlen(" range "));
+
+            if (!(is >> lower_bound))
+              return new Double();
+
+            is.ignore(strlen("..."));
+
+            if (!(is >> upper_bound))
+              return new Double();
+
+            return new Double(lower_bound, upper_bound);
+          }
+        else
+          return new Double();
+      }
+    else
+      return 0;
+  }
+
+
+
+  const char *Selection::description_init = "[Selection";
+
+
+  Selection::Selection (const std::string &seq)
+  {
+    sequence = seq;
+
+    while (sequence.find(" |") != std::string::npos)
+      sequence.replace (sequence.find(" |"), 2, "|");
+    while (sequence.find("| ") != std::string::npos)
+      sequence.replace (sequence.find("| "), 2, "|");
+  }
+
+
+
+  bool Selection::match (const std::string &test_string) const
+  {
+    std::vector<std::string> choices;
+    std::string tmp(sequence);
+    // check the different possibilities
+    while (tmp.find('|') != std::string::npos)
+      {
+        if (test_string == std::string(tmp, 0, tmp.find('|')))
+          return true;
+
+        tmp.erase (0, tmp.find('|')+1);
+      };
+    // check last choice, not finished by |
+    if (test_string == tmp)
+      return true;
+
+    // not found
+    return false;
+  }
+
+
+
+  std::string Selection::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init
+                << " "
+                << sequence
+                << " ]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  Selection::clone () const
+  {
+    return new Selection(sequence);
+  }
+
+
+  std::size_t
+  Selection::memory_consumption () const
+  {
+    return (sizeof(PatternBase) +
+            MemoryConsumption::memory_consumption(sequence));
+  }
+
+
+
+  Selection *Selection::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      {
+        std::string sequence(description);
+
+        sequence.erase(0, std::strlen(description_init) + 1);
+        sequence.erase(sequence.length()-2, 2);
+
+        return new Selection(sequence);
+      }
+    else
+      return 0;
+  }
+
+
+
+  const unsigned int List::max_int_value
+    = std::numeric_limits<unsigned int>::max();
+
+  const char *List::description_init = "[List";
+
+
+  List::List (const PatternBase  &p,
+              const unsigned int  min_elements,
+              const unsigned int  max_elements,
+              const std::string  &separator)
+    :
+    pattern (p.clone()),
+    min_elements (min_elements),
+    max_elements (max_elements),
+    separator (separator)
+  {
+    Assert (min_elements <= max_elements,
+            ExcInvalidRange (min_elements, max_elements));
+    Assert (separator.size() > 0,
+            ExcMessage ("The separator must have a non-zero length."));
+  }
+
+
+
+  List::~List ()
+  {
+    delete pattern;
+    pattern = 0;
+  }
+
+
+
+  bool List::match (const std::string &test_string_list) const
+  {
+    std::string tmp = test_string_list;
+    std::vector<std::string> split_list;
+
+    // first split the input list
+    while (tmp.length() != 0)
+      {
+        std::string name;
+        name = tmp;
+
+        if (name.find(separator) != std::string::npos)
+          {
+            name.erase (name.find(separator), std::string::npos);
+            tmp.erase (0, tmp.find(separator)+separator.size());
+          }
+        else
+          tmp = "";
+
+        while ((name.length() != 0) &&
+               (std::isspace (name[0])))
+          name.erase (0,1);
+
+        while (std::isspace (name[name.length()-1]))
+          name.erase (name.length()-1, 1);
+
+        split_list.push_back (name);
+      }
+
+    if ((split_list.size() < min_elements) ||
+        (split_list.size() > max_elements))
+      return false;
+
+    // check the different possibilities
+    for (std::vector<std::string>::const_iterator
+         test_string = split_list.begin();
+         test_string != split_list.end(); ++test_string)
+      if (pattern->match (*test_string) == false)
+        return false;
+
+    return true;
+  }
+
+
+
+  std::string List::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init
+                << " list of <" << pattern->description() << ">"
+                << " of length " << min_elements << "..." << max_elements
+                << " (inclusive)";
+    if (separator != ",")
+      description << " separated by <" << separator << ">";
+    description << "]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  List::clone () const
+  {
+    return new List(*pattern, min_elements, max_elements, separator);
+  }
+
+
+  std::size_t
+  List::memory_consumption () const
+  {
+    return (sizeof(*this) +
+            MemoryConsumption::memory_consumption(*pattern) +
+            MemoryConsumption::memory_consumption(separator));
+  }
+
+
+
+  List *List::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      {
+        int min_elements, max_elements;
+
+        std::istringstream is(description);
+        is.ignore(strlen(description_init) + strlen(" list of <"));
+
+        std::string str;
+        std::getline(is, str, '>');
+
+        std_cxx11::shared_ptr<PatternBase> base_pattern (pattern_factory(str));
+
+        is.ignore(strlen(" of length "));
+        if (!(is >> min_elements))
+          return new List(*base_pattern);
+
+        is.ignore(strlen("..."));
+        if (!(is >> max_elements))
+          return new List(*base_pattern, min_elements);
+
+        is.ignore(strlen(" separated by <"));
+        std::string separator;
+        if (!is)
+          std::getline(is, separator, '>');
+        else
+          separator = ",";
+
+        return new List(*base_pattern, min_elements, max_elements, separator);
+      }
+    else
+      return 0;
+  }
+
+
+
+  const unsigned int Map::max_int_value
+    = std::numeric_limits<unsigned int>::max();
+
+  const char *Map::description_init = "[Map";
+
+
+  Map::Map (const PatternBase  &p_key,
+            const PatternBase  &p_value,
+            const unsigned int  min_elements,
+            const unsigned int  max_elements,
+            const std::string  &separator)
+    :
+    key_pattern (p_key.clone()),
+    value_pattern (p_value.clone()),
+    min_elements (min_elements),
+    max_elements (max_elements),
+    separator (separator)
+  {
+    Assert (min_elements <= max_elements,
+            ExcInvalidRange (min_elements, max_elements));
+    Assert (separator.size() > 0,
+            ExcMessage ("The separator must have a non-zero length."));
+    Assert (separator != ":",
+            ExcMessage ("The separator can not be a colon ':' since that "
+                        "is the separator between the two elements of <key:value> pairs"));
+  }
+
+
+
+  Map::~Map ()
+  {
+    delete key_pattern;
+    key_pattern = 0;
+
+    delete value_pattern;
+    value_pattern = 0;
+  }
+
+
+
+  bool Map::match (const std::string &test_string_list) const
+  {
+    std::string tmp = test_string_list;
+    std::vector<std::string> split_list;
+
+    // first split the input list at comma sites
+    while (tmp.length() != 0)
+      {
+        std::string map_entry;
+        map_entry = tmp;
+
+        if (map_entry.find(separator) != std::string::npos)
+          {
+            map_entry.erase (map_entry.find(separator), std::string::npos);
+            tmp.erase (0, tmp.find(separator)+separator.size());
+          }
+        else
+          tmp = "";
+
+        while ((map_entry.length() != 0) &&
+               (std::isspace (map_entry[0])))
+          map_entry.erase (0,1);
+
+        while (std::isspace (map_entry[map_entry.length()-1]))
+          map_entry.erase (map_entry.length()-1, 1);
+
+        split_list.push_back (map_entry);
+      }
+
+    if ((split_list.size() < min_elements) ||
+        (split_list.size() > max_elements))
+      return false;
+
+    // check the different possibilities
+    for (std::vector<std::string>::const_iterator
+         test_string = split_list.begin();
+         test_string != split_list.end(); ++test_string)
+      {
+        // separate key and value from the test_string
+        if (test_string->find(":") == std::string::npos)
+          return false;
+
+        // we know now that there is a ':', so split the string there
+        // and trim spaces
+        std::string key = *test_string;
+        key.erase (key.find(":"), std::string::npos);
+        while ((key.length() > 0) && (std::isspace (key[key.length()-1])))
+          key.erase (key.length()-1, 1);
+
+        std::string value = *test_string;
+        value.erase (0, value.find(":")+1);
+        while ((value.length() > 0) && (std::isspace (value[0])))
+          value.erase (0, 1);
+
+        // then verify that the patterns are satisfied
+        if (key_pattern->match (key) == false)
+          return false;
+        if (value_pattern->match (value) == false)
+          return false;
+      }
+
+    return true;
+  }
+
+
+
+  std::string Map::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init
+                << " map of <"
+                << key_pattern->description() << ":"
+                << value_pattern->description() << ">"
+                << " of length " << min_elements << "..." << max_elements
+                << " (inclusive)";
+    if (separator != ",")
+      description << " separated by <" << separator << ">";
+    description << "]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  Map::clone () const
+  {
+    return new Map(*key_pattern, *value_pattern,
+                   min_elements, max_elements,
+                   separator);
+  }
+
+
+  std::size_t
+  Map::memory_consumption () const
+  {
+    return (sizeof(*this) +
+            MemoryConsumption::memory_consumption (*key_pattern) +
+            MemoryConsumption::memory_consumption (*value_pattern) +
+            MemoryConsumption::memory_consumption (separator));
+  }
+
+
+
+  Map *Map::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      {
+        int min_elements, max_elements;
+
+        std::istringstream is(description);
+        is.ignore(strlen(description_init) + strlen(" map of <"));
+
+        std::string str;
+        std::getline(is, str, '>');
+
+        // split 'str' into key and value
+        std::string key = str;
+        key.erase (key.find(":"), std::string::npos);
+
+        std::string value = str;
+        value.erase (0, value.find(":")+1);
+
+        std_cxx11::shared_ptr<PatternBase> key_pattern (pattern_factory(key));
+        std_cxx11::shared_ptr<PatternBase> value_pattern (pattern_factory(value));
+
+        is.ignore(strlen(" of length "));
+        if (!(is >> min_elements))
+          return new Map(*key_pattern, *value_pattern);
+
+        is.ignore(strlen("..."));
+        if (!(is >> max_elements))
+          return new Map(*key_pattern, *value_pattern, min_elements);
+
+        is.ignore(strlen(" separated by <"));
+        std::string separator;
+        if (!is)
+          std::getline(is, separator, '>');
+        else
+          separator = ",";
+
+        return new Map(*key_pattern, *value_pattern,
+                       min_elements, max_elements,
+                       separator);
+      }
+    else
+      return 0;
+  }
+
+
+
+  const char *MultipleSelection::description_init = "[MultipleSelection";
+
+
+  MultipleSelection::MultipleSelection (const std::string &seq)
+  {
+    Assert (seq.find (",") == std::string::npos, ExcCommasNotAllowed(seq.find(",")));
+
+    sequence = seq;
+    while (sequence.find(" |") != std::string::npos)
+      sequence.replace (sequence.find(" |"), 2, "|");
+    while (sequence.find("| ") != std::string::npos)
+      sequence.replace (sequence.find("| "), 2, "|");
+  }
+
+
+
+  bool MultipleSelection::match (const std::string &test_string_list) const
+  {
+    std::string tmp = test_string_list;
+    std::list<std::string> split_list;
+
+    // first split the input list
+    while (tmp.length() != 0)
+      {
+        std::string name;
+        name = tmp;
+
+        if (name.find(",") != std::string::npos)
+          {
+            name.erase (name.find(","), std::string::npos);
+            tmp.erase (0, tmp.find(",")+1);
+          }
+        else
+          tmp = "";
+
+        while ((name.length() != 0) &&
+               (std::isspace (name[0])))
+          name.erase (0,1);
+        while (std::isspace (name[name.length()-1]))
+          name.erase (name.length()-1, 1);
+
+        split_list.push_back (name);
+      };
+
+
+    // check the different possibilities
+    for (std::list<std::string>::const_iterator test_string = split_list.begin();
+         test_string != split_list.end(); ++test_string)
+      {
+        bool string_found = false;
+
+        tmp = sequence;
+        while (tmp.find('|') != std::string::npos)
+          {
+            if (*test_string == std::string(tmp, 0, tmp.find('|')))
+              {
+                // string found, quit
+                // loop. don't change
+                // tmp, since we don't
+                // need it anymore.
+                string_found = true;
+                break;
+              };
+
+            tmp.erase (0, tmp.find('|')+1);
+          };
+        // check last choice, not finished by |
+        if (!string_found)
+          if (*test_string == tmp)
+            string_found = true;
+
+        if (!string_found)
+          return false;
+      };
+
+    return true;
+  }
+
+
+
+  std::string MultipleSelection::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init
+                << " "
+                << sequence
+                << " ]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  MultipleSelection::clone () const
+  {
+    return new MultipleSelection(sequence);
+  }
+
+
+  std::size_t
+  MultipleSelection::memory_consumption () const
+  {
+    return (sizeof(PatternBase) +
+            MemoryConsumption::memory_consumption(sequence));
+  }
+
+
+
+  MultipleSelection *MultipleSelection::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      {
+        std::string sequence(description);
+
+        sequence.erase(0, std::strlen(description_init) + 1);
+        sequence.erase(sequence.length()-2, 2);
+
+        return new MultipleSelection(sequence);
+      }
+    else
+      return 0;
+  }
+
+
+
+  const char *Bool::description_init = "[Bool";
+
+
+  Bool::Bool ()
+    :
+    Selection ("true|false")
+  {}
+
+
+
+  std::string Bool::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init
+                << "]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  Bool::clone () const
+  {
+    return new Bool();
+  }
+
+
+
+  Bool *Bool::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      return new Bool();
+    else
+      return 0;
+  }
+
+
+
+  const char *Anything::description_init = "[Anything";
+
+
+  Anything::Anything ()
+  {}
+
+
+
+  bool Anything::match (const std::string &) const
+  {
+    return true;
+  }
+
+
+
+  std::string Anything::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init
+                << "]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  Anything::clone () const
+  {
+    return new Anything();
+  }
+
+
+
+  Anything *Anything::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      return new Anything();
+    else
+      return 0;
+  }
+
+
+
+  const char *FileName::description_init = "[FileName";
+
+
+  FileName::FileName (const FileType type)
+    : file_type (type)
+  {}
+
+
+
+  bool FileName::match (const std::string &) const
+  {
+    return true;
+  }
+
+
+
+  std::string FileName::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init;
+
+    if (file_type == input)
+      description << " (Type: input)]";
+    else
+      description << " (Type: output)]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  FileName::clone () const
+  {
+    return new FileName(file_type);
+  }
+
+
+
+  FileName *FileName::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      {
+        std::istringstream is(description);
+        std::string file_type;
+        FileType type;
+
+        is.ignore(strlen(description_init) + strlen(" (Type:"));
+
+        is >> file_type;
+
+        if (file_type == "input)]")
+          type = input;
+        else
+          type = output;
+
+        return new FileName(type);
+      }
+    else
+      return 0;
+  }
+
+
+
+  const char *DirectoryName::description_init = "[DirectoryName";
+
+
+  DirectoryName::DirectoryName ()
+  {}
+
+
+
+  bool DirectoryName::match (const std::string &) const
+  {
+    return true;
+  }
+
+
+
+  std::string DirectoryName::description () const
+  {
+    std::ostringstream description;
+
+    description << description_init << "]";
+
+    return description.str();
+  }
+
+
+
+  PatternBase *
+  DirectoryName::clone () const
+  {
+    return new DirectoryName();
+  }
+
+
+
+  DirectoryName *DirectoryName::create (const std::string &description)
+  {
+    if (description.compare(0, std::strlen(description_init), description_init) == 0)
+      return new DirectoryName();
+    else
+      return 0;
+  }
+
+}   // end namespace Patterns
+
+
+
+ParameterHandler::ParameterHandler ()
+  :
+  entries (new boost::property_tree::ptree())
+{}
+
+
+
+ParameterHandler::~ParameterHandler ()
+{}
+
+
+
+std::string
+ParameterHandler::mangle (const std::string &s)
+{
+  std::string u;
+
+  // reserve the minimum number of characters we will need. it may
+  // be more but this is the least we can do
+  u.reserve (s.size());
+
+  // see if the name is special and if so mangle the whole thing
+  const bool mangle_whole_string = (s == "value");
+
+  // for all parts of the string, see
+  // if it is an allowed character or
+  // not
+  for (unsigned int i=0; i<s.size(); ++i)
+    {
+      static const std::string allowed_characters
+      ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");
+
+      if ((! mangle_whole_string)
+          &&
+          (allowed_characters.find (s[i]) != std::string::npos))
+        u.push_back (s[i]);
+      else
+        {
+          u.push_back ('_');
+          static const char hex[16]
+            = { '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+          u.push_back (hex[static_cast<unsigned char>(s[i])/16]);
+          u.push_back (hex[static_cast<unsigned char>(s[i])%16]);
+        }
+    }
+
+  return u;
+}
+
+
+
+std::string
+ParameterHandler::demangle (const std::string &s)
+{
+  std::string u;
+  u.reserve (s.size());
+
+  for (unsigned int i=0; i<s.size(); ++i)
+    if (s[i] != '_')
+      u.push_back (s[i]);
+    else
+      {
+        Assert (i+2 < s.size(),
+                ExcMessage ("Trying to demangle an invalid string."));
+
+        unsigned char c = 0;
+        switch (s[i+1])
+          {
+          case '0':
+            c = 0 * 16;
+            break;
+          case '1':
+            c = 1 * 16;
+            break;
+          case '2':
+            c = 2 * 16;
+            break;
+          case '3':
+            c = 3 * 16;
+            break;
+          case '4':
+            c = 4 * 16;
+            break;
+          case '5':
+            c = 5 * 16;
+            break;
+          case '6':
+            c = 6 * 16;
+            break;
+          case '7':
+            c = 7 * 16;
+            break;
+          case '8':
+            c = 8 * 16;
+            break;
+          case '9':
+            c = 9 * 16;
+            break;
+          case 'a':
+            c = 10 * 16;
+            break;
+          case 'b':
+            c = 11 * 16;
+            break;
+          case 'c':
+            c = 12 * 16;
+            break;
+          case 'd':
+            c = 13 * 16;
+            break;
+          case 'e':
+            c = 14 * 16;
+            break;
+          case 'f':
+            c = 15 * 16;
+            break;
+          default:
+            Assert (false, ExcInternalError());
+          }
+        switch (s[i+2])
+          {
+          case '0':
+            c += 0;
+            break;
+          case '1':
+            c += 1;
+            break;
+          case '2':
+            c += 2;
+            break;
+          case '3':
+            c += 3;
+            break;
+          case '4':
+            c += 4;
+            break;
+          case '5':
+            c += 5;
+            break;
+          case '6':
+            c += 6;
+            break;
+          case '7':
+            c += 7;
+            break;
+          case '8':
+            c += 8;
+            break;
+          case '9':
+            c += 9;
+            break;
+          case 'a':
+            c += 10;
+            break;
+          case 'b':
+            c += 11;
+            break;
+          case 'c':
+            c += 12;
+            break;
+          case 'd':
+            c += 13;
+            break;
+          case 'e':
+            c += 14;
+            break;
+          case 'f':
+            c += 15;
+            break;
+          default:
+            Assert (false, ExcInternalError());
+          }
+
+        u.push_back (static_cast<char>(c));
+
+        // skip the two characters
+        i += 2;
+      }
+
+  return u;
+}
+
+
+
+namespace
+{
+  /**
+   * Return whether a given node is a parameter node (as opposed
+   * to being a subsection or alias node)
+   */
+  bool
+  is_parameter_node (const boost::property_tree::ptree &p)
+  {
+    return static_cast<bool>(p.get_optional<std::string>("value"));
+  }
+
+
+  /**
+   * Return whether a given node is a alias node (as opposed
+   * to being a subsection or parameter node)
+   */
+  bool
+  is_alias_node (const boost::property_tree::ptree &p)
+  {
+    return static_cast<bool>(p.get_optional<std::string>("alias"));
+  }
+}
+
+
+std::string
+ParameterHandler::get_current_path () const
+{
+  if (subsection_path.size() > 0)
+    {
+      std::string p = mangle(subsection_path[0]);
+      for (unsigned int i=1; i<subsection_path.size(); ++i)
+        {
+          p += path_separator;
+          p += mangle(subsection_path[i]);
+        }
+      return p;
+    }
+  else
+    return "";
+}
+
+
+
+std::string
+ParameterHandler::get_current_full_path (const std::string &name) const
+{
+  std::string path = get_current_path ();
+  if (path.empty() == false)
+    path += path_separator;
+
+  path += mangle(name);
+
+  return path;
+}
+
+
+
+bool ParameterHandler::read_input (std::istream &input,
+                                   const std::string &filename)
+{
+  AssertThrow (input, ExcIO());
+
+  // store subsections we are currently in
+  std::vector<std::string> saved_path = subsection_path;
+
+  std::string input_line;
+  std::string fully_concatenated_line;
+  bool is_concatenated = false;
+  unsigned int current_line_n = 0;
+  bool status = true;
+
+  while (std::getline (input, input_line))
+    {
+      ++current_line_n;
+      // Trim the whitespace at the ends of the line here instead of in
+      // scan_line. This makes the continuation line logic a lot simpler.
+      input_line = Utilities::trim (input_line);
+
+      // Check whether or not the current line should be joined with the next
+      // line before calling scan_line.
+      if (input_line.length() != 0 &&
+          input_line.find_last_of('\\') == input_line.length() - 1)
+        {
+          input_line.erase (input_line.length() - 1); // remove the last '\'
+          is_concatenated = true;
+
+          fully_concatenated_line += input_line;
+        }
+      // If the previous line ended in a '\' but the current did not, then we
+      // should proceed to scan_line.
+      else if (is_concatenated)
+        {
+          fully_concatenated_line += input_line;
+          is_concatenated = false;
+        }
+      // Finally, if neither the previous nor current lines are continuations,
+      // then the current input line is entirely concatenated.
+      else
+        {
+          fully_concatenated_line = input_line;
+        }
+
+      if (!is_concatenated)
+        {
+          status &= scan_line (fully_concatenated_line, filename, current_line_n);
+          fully_concatenated_line.clear();
+        }
+    }
+
+  // While it does not make much sense for anyone to actually do this, allow
+  // the last line to end in a backslash.
+  if (is_concatenated)
+    {
+      status &= scan_line (fully_concatenated_line, filename, current_line_n);
+    }
+
+  if (status && (saved_path != subsection_path))
+    {
+      std::cerr << "Unbalanced 'subsection'/'end' in file <" << filename
+                << ">." << std::endl;
+      if (saved_path.size()>0)
+        {
+          std::cerr << "Path before loading input:" << std::endl;
+          for (unsigned int i=0; i<saved_path.size(); ++i)
+            std::cerr << std::setw(i*2+4) << " "
+                      << "subsection " << saved_path[i] << std::endl;
+        }
+      std::cerr << "Current path:" << std::endl;
+      for (unsigned int i=0; i<subsection_path.size(); ++i)
+        std::cerr << std::setw(i*2+4) << " "
+                  << "subsection " << subsection_path[i] << std::endl;
+
+      // restore subsection we started with and return failure:
+      subsection_path = saved_path;
+      return false;
+    }
+
+  return status;
+}
+
+
+
+bool ParameterHandler::read_input (const std::string &filename,
+                                   const bool optional,
+                                   const bool write_compact)
+{
+  PathSearch search("PARAMETERS");
+
+  try
+    {
+      std::string openname = search.find(filename);
+      std::ifstream file_stream (openname.c_str());
+      AssertThrow(file_stream, ExcIO());
+
+      return read_input (file_stream, filename);
+    }
+  catch (const PathSearch::ExcFileNotFound &)
+    {
+      std::cerr << "ParameterHandler::read_input: could not open file <"
+                << filename << "> for reading." << std::endl;
+      if (!optional)
+        {
+          std:: cerr << "Trying to make file <"
+                     << filename << "> with default values for you." << std::endl;
+          std::ofstream output (filename.c_str());
+          if (output)
+            print_parameters (output, (write_compact ? ShortText : Text));
+        }
+    }
+  return false;
+}
+
+
+
+bool ParameterHandler::read_input_from_string (const char *s)
+{
+  std::istringstream input_stream (s);
+  return read_input (input_stream, "input string");
+}
+
+
+
+namespace
+{
+  // Recursively go through the 'source' tree
+  // and see if we can find corresponding
+  // entries in the 'destination' tree. If
+  // not, error out (i.e. we have just read
+  // an XML file that has entries that
+  // weren't declared in the ParameterHandler
+  // object); if so, copy the value of these
+  // nodes into the destination object
+  bool
+  read_xml_recursively (const boost::property_tree::ptree &source,
+                        const std::string                 &current_path,
+                        const char                         path_separator,
+                        const std::vector<std_cxx11::shared_ptr<const Patterns::PatternBase> > &
+                        patterns,
+                        boost::property_tree::ptree       &destination)
+  {
+    for (boost::property_tree::ptree::const_iterator p = source.begin();
+         p != source.end(); ++p)
+      {
+        // a sub-tree must either be a
+        // parameter node or a subsection
+        if (p->second.get_optional<std::string>("value"))
+          {
+            // make sure we have a
+            // corresponding entry in the
+            // destination object as well
+            const std::string full_path
+              = (current_path == ""
+                 ?
+                 p->first
+                 :
+                 current_path + path_separator + p->first);
+            if (destination.get_optional<std::string> (full_path)
+                &&
+                destination.get_optional<std::string> (full_path +
+                                                       path_separator +
+                                                       "value"))
+              {
+                // first make sure that the
+                // new entry actually
+                // satisfies its constraints
+                const std::string new_value
+                  = p->second.get<std::string>("value");
+
+                const unsigned int pattern_index
+                  = destination.get<unsigned int> (full_path +
+                                                   path_separator +
+                                                   "pattern");
+                if (patterns[pattern_index]->match(new_value) == false)
+                  {
+                    std::cerr << "    The entry value" << std::endl
+                              << "        " << new_value << std::endl
+                              << "    for the entry named" << std::endl
+                              << "        " << full_path << std::endl
+                              << "    does not match the given pattern" << std::endl
+                              << "        " << patterns[pattern_index]->description()
+                              << std::endl;
+                    return false;
+                  }
+
+                // set the found parameter in
+                // the destination argument
+                destination.put (full_path + path_separator + "value",
+                                 new_value);
+
+                // this node might have
+                // sub-nodes in addition to
+                // "value", such as
+                // "default_value",
+                // "documentation", etc. we
+                // might at some point in the
+                // future want to make sure
+                // that if they exist that
+                // they match the ones in the
+                // 'destination' tree
+              }
+            else
+              {
+                std::cerr << "The entry <" << full_path
+                          << "> with value <"
+                          << p->second.get<std::string>("value")
+                          << "> has not been declared."
+                          << std::endl;
+                return false;
+              }
+          }
+        else if (p->second.get_optional<std::string>("alias"))
+          {
+            // it is an alias node. alias nodes are static and
+            // there is nothing to do here (but the same applies as
+            // mentioned in the comment above about the static
+            // nodes inside parameter nodes
+          }
+        else
+          {
+            // it must be a subsection
+            const bool result
+              = read_xml_recursively (p->second,
+                                      (current_path == "" ?
+                                       p->first :
+                                       current_path + path_separator + p->first),
+                                      path_separator,
+                                      patterns,
+                                      destination);
+
+            // see if the recursive read
+            // succeeded. if yes, continue,
+            // otherwise exit now
+            if (result == false)
+              return false;
+          }
+      }
+
+    return true;
+  }
+}
+
+
+
+bool ParameterHandler::read_input_from_xml (std::istream &in)
+{
+  // read the XML tree assuming that (as we
+  // do in print_parameters(XML) it has only
+  // a single top-level node called
+  // "ParameterHandler"
+  boost::property_tree::ptree single_node_tree;
+  try
+    {
+      read_xml (in, single_node_tree);
+    }
+  catch (...)
+    {
+      std::cerr << "This input stream appears not to be valid XML"
+                << std::endl;
+      return false;
+    }
+
+  // make sure there is a top-level element
+  // called "ParameterHandler"
+  if (!single_node_tree.get_optional<std::string>("ParameterHandler"))
+    {
+      std::cerr << "There is no top-level XML element called \"ParameterHandler\"."
+                << std::endl;
+      return false;
+    }
+
+  // ensure that there is only a single
+  // top-level element
+  if (std::distance (single_node_tree.begin(), single_node_tree.end()) != 1)
+    {
+      std::cerr << "The top-level XML element \"ParameterHandler\" is "
+                << "not the only one."
+                << std::endl;
+      std::cerr << "(There are "
+                << std::distance (single_node_tree.begin(),
+                                  single_node_tree.end())
+                << " top-level elements.)"
+                << std::endl;
+      return false;
+    }
+
+  // read the child elements recursively
+  const boost::property_tree::ptree
+  &my_entries = single_node_tree.get_child("ParameterHandler");
+
+  return read_xml_recursively (my_entries, "", path_separator, patterns,
+                               *entries);
+}
+
+
+
+void ParameterHandler::clear ()
+{
+  entries.reset (new boost::property_tree::ptree());
+}
+
+
+
+void
+ParameterHandler::declare_entry (const std::string           &entry,
+                                 const std::string           &default_value,
+                                 const Patterns::PatternBase &pattern,
+                                 const std::string           &documentation)
+{
+  entries->put (get_current_full_path(entry) + path_separator + "value",
+                default_value);
+  entries->put (get_current_full_path(entry) + path_separator + "default_value",
+                default_value);
+  entries->put (get_current_full_path(entry) + path_separator + "documentation",
+                documentation);
+
+  // clone the pattern and store its
+  // index in the node
+  patterns.push_back (std_cxx11::shared_ptr<const Patterns::PatternBase>
+                      (pattern.clone()));
+  entries->put (get_current_full_path(entry) + path_separator + "pattern",
+                static_cast<unsigned int>(patterns.size()-1));
+  // also store the description of
+  // the pattern. we do so because we
+  // may wish to export the whole
+  // thing as XML or any other format
+  // so that external tools can work
+  // on the parameter file; in that
+  // case, they will have to be able
+  // to re-create the patterns as far
+  // as possible
+  entries->put (get_current_full_path(entry) + path_separator +
+                "pattern_description",
+                patterns.back()->description());
+
+  // as documented, do the default value checking at the very end
+  AssertThrow (pattern.match (default_value),
+               ExcValueDoesNotMatchPattern (default_value, pattern.description()));
+}
+
+
+
+void
+ParameterHandler::declare_alias(const std::string &existing_entry_name,
+                                const std::string &alias_name,
+                                const bool         alias_is_deprecated)
+{
+  // see if there is anything to refer to already
+  Assert (entries->get_optional<std::string>(get_current_full_path(existing_entry_name)),
+          ExcMessage ("You are trying to declare an alias entry <"
+                      + alias_name +
+                      "> that references an entry <"
+                      + existing_entry_name +
+                      ">, but the latter does not exist."));
+  // then also make sure that what is being referred to is in
+  // fact a parameter (not an alias or subsection)
+  Assert (entries->get_optional<std::string>(get_current_full_path(existing_entry_name) + path_separator + "value"),
+          ExcMessage ("You are trying to declare an alias entry <"
+                      + alias_name +
+                      "> that references an entry <"
+                      + existing_entry_name +
+                      ">, but the latter does not seem to be a "
+                      "parameter declaration."));
+
+
+  // now also make sure that if the alias has already been
+  // declared, that it is also an alias and refers to the same
+  // entry
+  if (entries->get_optional<std::string>(get_current_full_path(alias_name)))
+    {
+      Assert (entries->get_optional<std::string> (get_current_full_path(alias_name) + path_separator + "alias"),
+              ExcMessage ("You are trying to declare an alias entry <"
+                          + alias_name +
+                          "> but a non-alias entry already exists in this "
+                          "subsection (i.e., there is either a preexisting "
+                          "further subsection, or a parameter entry, with "
+                          "the same name as the alias)."));
+      Assert (entries->get<std::string> (get_current_full_path(alias_name) + path_separator + "alias")
+              ==
+              existing_entry_name,
+              ExcMessage ("You are trying to declare an alias entry <"
+                          + alias_name +
+                          "> but an alias entry already exists in this "
+                          "subsection and this existing alias references a "
+                          "different parameter entry. Specifically, "
+                          "you are trying to reference the entry <"
+                          + existing_entry_name +
+                          "> whereas the existing alias references "
+                          "the entry <"
+                          + entries->get<std::string> (get_current_full_path(alias_name) + path_separator + "alias") +
+                          ">."));
+    }
+
+  entries->put (get_current_full_path(alias_name) + path_separator + "alias",
+                existing_entry_name);
+  entries->put (get_current_full_path(alias_name) + path_separator + "deprecation_status",
+                (alias_is_deprecated ? "true" : "false"));
+}
+
+
+
+void ParameterHandler::enter_subsection (const std::string &subsection)
+{
+  const std::string current_path = get_current_path ();
+
+  // if necessary create subsection
+  if (!entries->get_child_optional (get_current_full_path(subsection)))
+    entries->add_child (get_current_full_path(subsection),
+                        boost::property_tree::ptree());
+
+  // then enter it
+  subsection_path.push_back (subsection);
+}
+
+
+
+void ParameterHandler::leave_subsection ()
+{
+  // assert there is a subsection that
+  // we may leave
+  Assert (subsection_path.size() != 0, ExcAlreadyAtTopLevel());
+
+  if (subsection_path.size() > 0)
+    subsection_path.pop_back ();
+}
+
+
+
+std::string
+ParameterHandler::get (const std::string &entry_string) const
+{
+  // assert that the entry is indeed
+  // declared
+  if (boost::optional<std::string> value
+      = entries->get_optional<std::string> (get_current_full_path(entry_string) + path_separator + "value"))
+    return value.get();
+  else
+    {
+      Assert (false, ExcEntryUndeclared(entry_string));
+      return "";
+    }
+}
+
+
+
+long int ParameterHandler::get_integer (const std::string &entry_string) const
+{
+  try
+    {
+      return Utilities::string_to_int (get (entry_string));
+    }
+  catch (...)
+    {
+      AssertThrow (false,
+                   ExcMessage("Can't convert the parameter value <"
+                              + get(entry_string) +
+                              "> for entry <"
+                              + entry_string +
+                              " to an integer."));
+      return 0;
+    }
+}
+
+
+
+double ParameterHandler::get_double (const std::string &entry_string) const
+{
+  try
+    {
+      return Utilities::string_to_double (get (entry_string));
+    }
+  catch (...)
+    {
+      AssertThrow (false,
+                   ExcMessage("Can't convert the parameter value <"
+                              + get(entry_string) +
+                              "> for entry <"
+                              + entry_string +
+                              " to a double precision variable."));
+      return 0;
+    }
+}
+
+
+
+bool ParameterHandler::get_bool (const std::string &entry_string) const
+{
+  const std::string s = get(entry_string);
+
+  AssertThrow ((s=="true") || (s=="false") ||
+               (s=="yes") || (s=="no"),
+               ExcMessage("Can't convert the parameter value <"
+                          + get(entry_string) +
+                          "> for entry <"
+                          + entry_string +
+                          " to a boolean."));
+  if (s=="true" || s=="yes")
+    return true;
+  else
+    return false;
+}
+
+
+
+void
+ParameterHandler::set (const std::string &entry_string,
+                       const std::string &new_value)
+{
+  // resolve aliases before looking up the correct entry
+  std::string path = get_current_full_path(entry_string);
+  if (entries->get_optional<std::string>(path + path_separator + "alias"))
+    path = get_current_full_path(entries->get<std::string>(path + path_separator + "alias"));
+
+  // assert that the entry is indeed declared
+  if (entries->get_optional<std::string>(path + path_separator + "value"))
+    {
+      const unsigned int pattern_index
+        = entries->get<unsigned int> (path + path_separator + "pattern");
+      AssertThrow (patterns[pattern_index]->match(new_value),
+                   ExcValueDoesNotMatchPattern (new_value,
+                                                entries->get<std::string>
+                                                (path +
+                                                 path_separator +
+                                                 "pattern_description")));
+
+      entries->put (path + path_separator + "value",
+                    new_value);
+    }
+  else
+    AssertThrow (false, ExcEntryUndeclared(entry_string));
+}
+
+
+void
+ParameterHandler::set (const std::string &entry_string,
+                       const char        *new_value)
+{
+  // simply forward
+  set (entry_string, std::string(new_value));
+}
+
+
+void
+ParameterHandler::set (const std::string &entry_string,
+                       const double      &new_value)
+{
+  std::ostringstream s;
+  s << std::setprecision(16);
+  s << new_value;
+
+  // hand this off to the function that
+  // actually sets the value as a string
+  set (entry_string, s.str());
+}
+
+
+
+void
+ParameterHandler::set (const std::string &entry_string,
+                       const long int    &new_value)
+{
+  std::ostringstream s;
+  s << new_value;
+
+  // hand this off to the function that
+  // actually sets the value as a string
+  set (entry_string, s.str());
+}
+
+
+
+void
+ParameterHandler::set (const std::string &entry_string,
+                       const bool        &new_value)
+{
+  // hand this off to the function that
+  // actually sets the value as a string
+  set (entry_string,
+       (new_value ? "true" : "false"));
+}
+
+
+
+std::ostream &
+ParameterHandler::print_parameters (std::ostream     &out,
+                                    const OutputStyle style)
+{
+  AssertThrow (out, ExcIO());
+
+  switch (style)
+    {
+    case XML:
+    {
+      // call the writer
+      // function and exit as
+      // there is nothing
+      // further to do down in
+      // this function
+      //
+      // XML has a requirement that
+      // there can only be one
+      // single top-level entry,
+      // but we may have multiple
+      // entries and sections.  we
+      // work around this by
+      // creating a tree just for
+      // this purpose with the
+      // single top-level node
+      // "ParameterHandler" and
+      // assign the existing tree
+      // under it
+      boost::property_tree::ptree single_node_tree;
+      single_node_tree.add_child("ParameterHandler",
+                                 *entries);
+
+      write_xml (out, single_node_tree);
+      return out;
+    }
+
+
+    case JSON:
+      // call the writer
+      // function and exit as
+      // there is nothing
+      // further to do down in
+      // this function
+      write_json (out, *entries);
+      return out;
+
+    case Text:
+      out << "# Listing of Parameters" << std::endl
+          << "# ---------------------" << std::endl;
+      break;
+    case LaTeX:
+      out << "\\subsection{Global parameters}" << std::endl;
+      out << "\\label{parameters:global}" << std::endl;
+      out << std::endl << std::endl;
+      break;
+    case Description:
+      out << "Listing of Parameters:" << std::endl << std::endl;
+      break;
+    case ShortText:
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    };
+
+  // dive recursively into the subsections
+  print_parameters_section (out, style, 0);
+
+  switch (style)
+    {
+    case Text:
+    case Description:
+    case ShortText:
+      break;
+    case LaTeX:
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    };
+
+  return out;
+}
+
+
+
+// Print a section in the desired style. The styles are separated into
+// several verbosity classes depending on the higher bits.
+//
+// If bit 7 (128) is set, comments are not printed.
+// If bit 6 (64) is set, default values after change are not printed.
+void
+ParameterHandler::print_parameters_section (std::ostream      &out,
+                                            const OutputStyle  style,
+                                            const unsigned int indent_level,
+                                            const bool         include_top_level_elements)
+{
+  AssertThrow (out, ExcIO());
+
+  const boost::property_tree::ptree &current_section
+    = entries->get_child (get_current_path());
+
+  unsigned int overall_indent_level = indent_level;
+
+  switch (style)
+    {
+    case XML:
+    {
+      if (include_top_level_elements)
+        {
+          // call the writer
+          // function and exit as
+          // there is nothing
+          // further to do down in
+          // this function
+          //
+          // XML has a requirement that
+          // there can only be one
+          // single top-level entry,
+          // but a section has multiple
+          // entries and sections. we
+          // work around this by
+          // creating a tree just for
+          // this purpose with the
+          // single top-level node
+          // "ParameterHandler" and
+          // assign the full path of
+          // down to the current section
+          // under it
+          boost::property_tree::ptree single_node_tree;
+
+          // if there is no subsection selected,
+          // add the whole tree of entries,
+          // otherwise add a root element
+          // and the selected subsection under it
+          if (subsection_path.size() == 0)
+            {
+              single_node_tree.add_child("ParameterHandler",
+                                         *entries);
+            }
+          else
+            {
+              std::string  path ("ParameterHandler");
+
+              single_node_tree.add_child(path,
+                                         boost::property_tree::ptree());
+
+              path += path_separator + get_current_path ();
+              single_node_tree.add_child (path, current_section);
+            };
+
+          write_xml (out, single_node_tree);
+        }
+      else
+        Assert (false, ExcNotImplemented());
+
+      break;
+    }
+    case Text:
+    case ShortText:
+    {
+      // if there are top level elements to print, do it
+      if (include_top_level_elements && (subsection_path.size() > 0))
+        for (unsigned int i=0; i<subsection_path.size(); ++i)
+          {
+            out << std::setw(overall_indent_level*2) << ""
+                << "subsection " << demangle (subsection_path[i]) << std::endl;
+            overall_indent_level += 1;
+          };
+
+      // first find out the longest
+      // entry name to be able to
+      // align the equal signs
+      //
+      // to do this loop over all
+      // nodes of the current tree,
+      // select the parameter nodes
+      // (and discard sub-tree
+      // nodes) and take the
+      // maximum of their lengths
+      std::size_t longest_name = 0;
+      for (boost::property_tree::ptree::const_iterator
+           p = current_section.begin();
+           p != current_section.end(); ++p)
+        if (is_parameter_node (p->second) == true)
+          longest_name = std::max (longest_name,
+                                   demangle(p->first).length());
+
+      // likewise find the longest
+      // actual value string to
+      // make sure we can align the
+      // default and documentation
+      // strings
+      std::size_t longest_value = 0;
+      for (boost::property_tree::ptree::const_iterator
+           p = current_section.begin();
+           p != current_section.end(); ++p)
+        if (is_parameter_node (p->second) == true)
+          longest_value = std::max (longest_value,
+                                    p->second.get<std::string>("value").length());
+
+
+      // print entries one by
+      // one. make sure they are
+      // sorted by using the
+      // appropriate iterators
+      bool first_entry = true;
+      for (boost::property_tree::ptree::const_assoc_iterator
+           p = current_section.ordered_begin();
+           p != current_section.not_found(); ++p)
+        if (is_parameter_node (p->second) == true)
+          {
+            const std::string value = p->second.get<std::string>("value");
+
+            // if there is documentation,
+            // then add an empty line (unless
+            // this is the first entry in a
+            // subsection), print the
+            // documentation, and then the
+            // actual entry; break the
+            // documentation into readable
+            // chunks such that the whole
+            // thing is at most 78 characters
+            // wide
+            if ((!(style & 128)) &&
+                !p->second.get<std::string>("documentation").empty())
+              {
+                if (first_entry == false)
+                  out << std::endl;
+                else
+                  first_entry = false;
+
+                const std::vector<std::string> doc_lines
+                  = Utilities::
+                    break_text_into_lines (p->second.get<std::string>("documentation"),
+                                           78 - overall_indent_level*2 - 2);
+
+                for (unsigned int i=0; i<doc_lines.size(); ++i)
+                  out << std::setw(overall_indent_level*2) << ""
+                      << "# "
+                      << doc_lines[i]
+                      << std::endl;
+              }
+
+
+
+            // print name and value
+            // of this entry
+            out << std::setw(overall_indent_level*2) << ""
+                << "set "
+                << demangle(p->first)
+                << std::setw(longest_name-demangle(p->first).length()+1) << " "
+                << "= " << value;
+
+            // finally print the
+            // default value, but
+            // only if it differs
+            // from the actual value
+            if ((!(style & 64)) && value != p->second.get<std::string>("default_value"))
+              {
+                out << std::setw(longest_value-value.length()+1) << ' '
+                    << "# ";
+                out << "default: " << p->second.get<std::string>("default_value");
+              }
+
+            out << std::endl;
+          }
+
+      break;
+    }
+
+    case LaTeX:
+    {
+      // if there are any parameters in
+      // this section then print them as an
+      // itemized list
+      bool parameters_exist_here = false;
+      for (boost::property_tree::ptree::const_assoc_iterator
+           p = current_section.ordered_begin();
+           p != current_section.not_found(); ++p)
+        if ((is_parameter_node (p->second) == true)
+            ||
+            (is_alias_node (p->second) == true))
+          {
+            parameters_exist_here = true;
+            break;
+          }
+
+      if (parameters_exist_here)
+        {
+          out << "\\begin{itemize}"
+              << std::endl;
+
+          // print entries one by
+          // one. make sure they are
+          // sorted by using the
+          // appropriate iterators
+          for (boost::property_tree::ptree::const_assoc_iterator
+               p = current_section.ordered_begin();
+               p != current_section.not_found(); ++p)
+            if (is_parameter_node (p->second) == true)
+              {
+                const std::string value = p->second.get<std::string>("value");
+
+                // print name
+                out << "\\item {\\it Parameter name:} {\\tt " << demangle(p->first) << "}\n"
+                    << "\\phantomsection\\label{parameters:";
+                for (unsigned int i=0; i<subsection_path.size(); ++i)
+                  out << subsection_path[i] << "/";
+                out << demangle(p->first);
+                out << "}\n\n"
+                    << std::endl;
+
+                out << "\\index[prmindex]{"
+                    << demangle(p->first)
+                    << "}\n";
+                out << "\\index[prmindexfull]{";
+                for (unsigned int i=0; i<subsection_path.size(); ++i)
+                  out << subsection_path[i] << "!";
+                out << demangle(p->first)
+                    << "}\n";
+
+                // finally print value and default
+                out << "{\\it Value:} " << value << "\n\n"
+                    << std::endl
+                    << "{\\it Default:} "
+                    << p->second.get<std::string>("default_value") << "\n\n"
+                    << std::endl;
+
+                // if there is a
+                // documenting string,
+                // print it as well
+                if (!p->second.get<std::string>("documentation").empty())
+                  out << "{\\it Description:} "
+                      << p->second.get<std::string>("documentation") << "\n\n"
+                      << std::endl;
+
+                // also output possible values
+                out << "{\\it Possible values:} "
+                    << p->second.get<std::string> ("pattern_description")
+                    << std::endl;
+              }
+            else if (is_alias_node (p->second) == true)
+              {
+                const std::string alias = p->second.get<std::string>("alias");
+
+                // print name
+                out << "\\item {\\it Parameter name:} {\\tt " << demangle(p->first) << "}\n"
+                    << "\\phantomsection\\label{parameters:";
+                for (unsigned int i=0; i<subsection_path.size(); ++i)
+                  out << subsection_path[i] << "/";
+                out << demangle(p->first);
+                out << "}\n\n"
+                    << std::endl;
+
+                out << "\\index[prmindex]{"
+                    << demangle(p->first)
+                    << "}\n";
+                out << "\\index[prmindexfull]{";
+                for (unsigned int i=0; i<subsection_path.size(); ++i)
+                  out << subsection_path[i] << "!";
+                out << demangle(p->first)
+                    << "}\n";
+
+                // finally print alias and indicate if it is deprecated
+                out << "This parameter is an alias for the parameter ``\\texttt{"
+                    << alias << "}''."
+                    << (p->second.get<std::string>("deprecation_status") == "true"
+                        ?
+                        " Its use is deprecated."
+                        :
+                        "")
+                    << "\n\n"
+                    << std::endl;
+              }
+          out << "\\end{itemize}" << std::endl;
+        }
+
+      break;
+    }
+
+    case Description:
+    {
+      // if there are top level elements to print, do it
+      if (include_top_level_elements && (subsection_path.size() > 0))
+        for (unsigned int i=0; i<subsection_path.size(); ++i)
+          {
+            out << std::setw(overall_indent_level*2) << ""
+                << "subsection " << demangle (subsection_path[i]) << std::endl;
+            overall_indent_level += 1;
+          };
+
+      // first find out the longest
+      // entry name to be able to
+      // align the equal signs
+      std::size_t longest_name = 0;
+      for (boost::property_tree::ptree::const_iterator
+           p = current_section.begin();
+           p != current_section.end(); ++p)
+        if (is_parameter_node (p->second) == true)
+          longest_name = std::max (longest_name,
+                                   demangle(p->first).length());
+
+      // print entries one by
+      // one. make sure they are
+      // sorted by using the
+      // appropriate iterators
+      for (boost::property_tree::ptree::const_assoc_iterator
+           p = current_section.ordered_begin();
+           p != current_section.not_found(); ++p)
+        if (is_parameter_node (p->second) == true)
+          {
+            const std::string value = p->second.get<std::string>("value");
+
+            // print name and value
+            out << std::setw(overall_indent_level*2) << ""
+                << "set "
+                << demangle(p->first)
+                << std::setw(longest_name-demangle(p->first).length()+1) << " "
+                << " = ";
+
+            // print possible values:
+            const std::vector<std::string> description_str
+              = Utilities::break_text_into_lines (p->second.get<std::string>
+                                                  ("pattern_description"),
+                                                  78 - overall_indent_level*2 - 2, '|');
+            if (description_str.size() > 1)
+              {
+                out << std::endl;
+                for (unsigned int i=0; i<description_str.size(); ++i)
+                  out << std::setw(overall_indent_level*2+6) << ""
+                      << description_str[i] << std::endl;
+              }
+            else if (description_str.empty() == false)
+              out << "  " << description_str[0] << std::endl;
+            else
+              out << std::endl;
+
+            // if there is a
+            // documenting string,
+            // print it as well
+            if (p->second.get<std::string>("documentation").length() != 0)
+              out << std::setw(overall_indent_level*2 + longest_name + 10) << ""
+                  << "(" << p->second.get<std::string>("documentation") << ")" << std::endl;
+          }
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+
+
+  // if there was text before and there are
+  // sections to come, put two newlines
+  // between the last entry and the first
+  // subsection
+  if (style != XML)
+    {
+      unsigned int n_parameters = 0;
+      unsigned int n_sections   = 0;
+      for (boost::property_tree::ptree::const_iterator
+           p = current_section.begin();
+           p != current_section.end(); ++p)
+        if (is_parameter_node (p->second) == true)
+          ++n_parameters;
+        else if (is_alias_node (p->second) == false)
+          ++n_sections;
+
+      if ((style != Description)
+          &&
+          (!(style & 128))
+          &&
+          (n_parameters != 0)
+          &&
+          (n_sections != 0))
+        out << std::endl << std::endl;
+
+      // now traverse subsections tree,
+      // in alphabetical order
+      for (boost::property_tree::ptree::const_assoc_iterator
+           p = current_section.ordered_begin();
+           p != current_section.not_found(); ++p)
+        if ((is_parameter_node (p->second) == false)
+            &&
+            (is_alias_node (p->second) == false))
+          {
+            // first print the subsection header
+            switch (style)
+              {
+              case Text:
+              case Description:
+              case ShortText:
+                out << std::setw(overall_indent_level*2) << ""
+                    << "subsection " << demangle(p->first) << std::endl;
+                break;
+              case LaTeX:
+              {
+                out << std::endl
+                    << "\\subsection{Parameters in section \\tt ";
+
+                // find the path to the
+                // current section so that we
+                // can print it in the
+                // \subsection{...} heading
+                for (unsigned int i=0; i<subsection_path.size(); ++i)
+                  out << subsection_path[i] << "/";
+                out << demangle(p->first);
+
+                out << "}" << std::endl;
+                out << "\\label{parameters:";
+                for (unsigned int i=0; i<subsection_path.size(); ++i)
+                  out << mangle(subsection_path[i]) << "/";
+                out << p->first << "}";
+                out << std::endl;
+
+                out << std::endl;
+                break;
+              }
+
+              default:
+                Assert (false, ExcNotImplemented());
+              };
+
+            // then the contents of the
+            // subsection
+            enter_subsection (demangle(p->first));
+            print_parameters_section (out, style, overall_indent_level+1);
+            leave_subsection ();
+            switch (style)
+              {
+              case Text:
+                // write end of
+                // subsection. one
+                // blank line after
+                // each subsection
+                out << std::setw(overall_indent_level*2) << ""
+                    << "end" << std::endl
+                    << std::endl;
+
+                // if this is a toplevel
+                // subsection, then have two
+                // newlines
+                if (overall_indent_level == 0)
+                  out << std::endl;
+
+                break;
+              case Description:
+                break;
+              case ShortText:
+                // write end of
+                // subsection.
+                out << std::setw(overall_indent_level*2) << ""
+                    << "end" << std::endl;
+                break;
+              case LaTeX:
+                break;
+              default:
+                Assert (false, ExcNotImplemented());
+              }
+          }
+    }
+
+  // close top level elements, if there are any
+  switch (style)
+    {
+    case XML:
+    case LaTeX:
+    case Description:
+      break;
+    case Text:
+    case ShortText:
+    {
+      if (include_top_level_elements && (subsection_path.size() > 0))
+        for (unsigned int i=0; i<subsection_path.size(); ++i)
+          {
+            overall_indent_level -= 1;
+            out << std::setw(overall_indent_level*2) << ""
+                << "end" << std::endl;
+          };
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+
+}
+
+
+
+void
+ParameterHandler::log_parameters (LogStream &out)
+{
+  out.push("parameters");
+  // dive recursively into the
+  // subsections
+  log_parameters_section (out);
+
+  out.pop();
+}
+
+
+
+void
+ParameterHandler::log_parameters_section (LogStream &out)
+{
+  const boost::property_tree::ptree &current_section
+    = entries->get_child (get_current_path());
+
+  // print entries one by
+  // one. make sure they are
+  // sorted by using the
+  // appropriate iterators
+  for (boost::property_tree::ptree::const_assoc_iterator
+       p = current_section.ordered_begin();
+       p != current_section.not_found(); ++p)
+    if (is_parameter_node (p->second) == true)
+      out << demangle(p->first) << ": "
+          << p->second.get<std::string>("value") << std::endl;
+
+  // now transverse subsections tree
+  // now traverse subsections tree,
+  // in alphabetical order
+  for (boost::property_tree::ptree::const_assoc_iterator
+       p = current_section.ordered_begin();
+       p != current_section.not_found(); ++p)
+    if (is_parameter_node (p->second) == false)
+      {
+        out.push (demangle(p->first));
+        enter_subsection (demangle(p->first));
+        log_parameters_section (out);
+        leave_subsection ();
+        out.pop ();
+      }
+}
+
+
+
+bool
+ParameterHandler::scan_line (std::string         line,
+                             const std::string  &input_filename,
+                             const unsigned int  current_line_n)
+{
+  // if there is a comment, delete it
+  if (line.find('#') != std::string::npos)
+    line.erase (line.find("#"), std::string::npos);
+
+  // replace \t by space:
+  while (line.find('\t') != std::string::npos)
+    line.replace (line.find('\t'), 1, " ");
+
+  //trim start and end:
+  line = Utilities::trim(line);
+
+  // if line is now empty: leave
+  if (line.length() == 0)
+    return true;
+
+  // enter subsection
+  if ((line.find ("SUBSECTION ") == 0) ||
+      (line.find ("subsection ") == 0))
+    {
+      // delete this prefix
+      line.erase (0, std::string("subsection").length()+1);
+
+      const std::string subsection = Utilities::trim(line);
+
+      // check whether subsection exists
+      if (!entries->get_child_optional (get_current_full_path(subsection)))
+        {
+          std::cerr << "Line <" << current_line_n
+                    << "> of file <" << input_filename
+                    << ">: There is no such subsection to be entered: "
+                    << demangle(get_current_full_path(subsection)) << std::endl;
+          for (unsigned int i=0; i<subsection_path.size(); ++i)
+            std::cerr << std::setw(i*2+4) << " "
+                      << "subsection " << subsection_path[i] << std::endl;
+          std::cerr << std::setw(subsection_path.size()*2+4) << " "
+                    << "subsection " << subsection << std::endl;
+          return false;
+        }
+
+      // subsection exists
+      subsection_path.push_back (subsection);
+      return true;
+    }
+
+  // exit subsection
+  if ((line.find ("END") == 0) ||
+      (line.find ("end") == 0))
+    {
+      line.erase (0, 3);
+      while ((line.size() > 0) && (std::isspace(line[0])))
+        line.erase (0, 1);
+
+      if (line.size()>0)
+        {
+          std::cerr << "Line <" << current_line_n
+                    << "> of file <" << input_filename
+                    << ">: invalid content after 'end'!" << std::endl;
+          return false;
+        }
+
+      if (subsection_path.size() == 0)
+        {
+          std::cerr << "Line <" << current_line_n
+                    << "> of file <" << input_filename
+                    << ">: There is no subsection to leave here!" << std::endl;
+          return false;
+        }
+      else
+        {
+          leave_subsection ();
+          return true;
+        }
+
+    }
+
+  // regular entry
+  if ((line.find ("SET ") == 0) ||
+      (line.find ("set ") == 0))
+    {
+      // erase "set" statement
+      line.erase (0, 4);
+
+      std::string::size_type pos = line.find("=");
+      if (pos == std::string::npos)
+        {
+          std::cerr << "Line <" << current_line_n
+                    << "> of file <" << input_filename
+                    << ">: invalid format of set expression!" << std::endl;
+          return false;
+        }
+
+      // extract entry name and value and trim
+      std::string entry_name = Utilities::trim(std::string(line, 0, pos));
+      std::string entry_value = Utilities::trim(std::string(line, pos+1, std::string::npos));
+
+      // resolve aliases before we look up the entry. if necessary, print
+      // a warning that the alias is deprecated
+      std::string path = get_current_full_path(entry_name);
+      if (entries->get_optional<std::string>(path + path_separator + "alias"))
+        {
+          if (entries->get<std::string>(path + path_separator + "deprecation_status") == "true")
+            {
+              std::cerr << "Warning in line <" << current_line_n
+                        << "> of file <" << input_filename
+                        << ">: You are using the deprecated spelling <"
+                        << entry_name
+                        << "> of the parameter <"
+                        << entries->get<std::string>(path + path_separator + "alias")
+                        << ">." << std::endl;
+            }
+          path = get_current_full_path(entries->get<std::string>(path + path_separator + "alias"));
+        }
+
+      // assert that the entry is indeed declared
+      if (entries->get_optional<std::string> (path + path_separator + "value"))
+        {
+          // if entry was declared:
+          // does it match the regex? if not,
+          // don't enter it into the database
+          // exception: if it contains characters
+          // which specify it as a multiple loop
+          // entry, then ignore content
+          if (entry_value.find ('{') == std::string::npos)
+            {
+              const unsigned int pattern_index
+                = entries->get<unsigned int> (path + path_separator + "pattern");
+              if (!patterns[pattern_index]->match(entry_value))
+                {
+                  std::cerr << "Line <" << current_line_n
+                            << "> of file <" << input_filename
+                            << ">:" << std::endl
+                            << "    The entry value" << std::endl
+                            << "        " << entry_value << std::endl
+                            << "    for the entry named" << std::endl
+                            << "        " << entry_name << std::endl
+                            << "    does not match the given pattern" << std::endl
+                            << "        " << patterns[pattern_index]->description()
+                            << std::endl;
+                  return false;
+                }
+            }
+
+          entries->put (path + path_separator + "value",
+                        entry_value);
+          return true;
+        }
+      else
+        {
+          std::cerr << "Line <" << current_line_n
+                    << "> of file <" << input_filename
+                    << ">: No such entry was declared:" << std::endl
+                    << "    " << entry_name << std::endl
+                    << "    <Present subsection:" << std::endl;
+          for (unsigned int i=0; i<subsection_path.size(); ++i)
+            std::cerr << std::setw(i*2+8) << " "
+                      << "subsection " << subsection_path[i] << std::endl;
+          std::cerr << "    >" << std::endl;
+
+          return false;
+        }
+    }
+
+  // an include statement?
+  if ((line.find ("INCLUDE ") == 0) ||
+      (line.find ("include ") == 0))
+    {
+      // erase "include " statement and eliminate spaces
+      line.erase (0, 7);
+      while ((line.size() > 0) && (line[0] == ' '))
+        line.erase (0, 1);
+
+      // the remainder must then be a filename
+      if (line.size() == 0)
+        {
+          std::cerr << "Line <" << current_line_n
+                    << "> of file <" << input_filename
+                    << "> is an include statement but does not name a file!"
+                    << std::endl;
+
+          return false;
+        }
+
+      std::ifstream input (line.c_str());
+      if (!input)
+        {
+          std::cerr << "Line <" << current_line_n
+                    << "> of file <" << input_filename
+                    << "> is an include statement but the file <"
+                    << line << "> could not be opened!"
+                    << std::endl;
+
+          return false;
+        }
+      else
+        return read_input (input);
+    }
+
+  // this line matched nothing known
+  std::cerr << "Line <" << current_line_n
+            << "> of file <" << input_filename
+            << ">: This line matched nothing known ('set' or 'subsection' missing!?):" << std::endl
+            << "    " << line << std::endl;
+  return false;
+}
+
+
+
+std::size_t
+ParameterHandler::memory_consumption () const
+{
+//TODO: add to this an estimate of the memory in the property_tree
+  return (MemoryConsumption::memory_consumption (subsection_path));
+}
+
+
+
+bool
+ParameterHandler::operator == (const ParameterHandler &prm2)  const
+{
+  if (patterns.size() != prm2.patterns.size())
+    return false;
+
+  for (unsigned int j=0; j<patterns.size(); ++j)
+    if (patterns[j]->description() != prm2.patterns[j]->description())
+      return false;
+
+  // instead of walking through all
+  // the nodes of the two trees
+  // entries and prm2.entries and
+  // comparing them for equality,
+  // simply dump the content of the
+  // entire structure into a string
+  // and compare those for equality
+  std::ostringstream o1, o2;
+  write_json (o1, *entries);
+  write_json (o2, *prm2.entries);
+  return (o1.str() == o2.str());
+}
+
+
+
+
+MultipleParameterLoop::UserClass::~UserClass ()
+{}
+
+
+
+MultipleParameterLoop::MultipleParameterLoop()
+  :
+  n_branches(0)
+{}
+
+
+
+MultipleParameterLoop::~MultipleParameterLoop ()
+{}
+
+
+
+bool MultipleParameterLoop::read_input (std::istream &input,
+                                        const std::string &filename)
+{
+  AssertThrow (input, ExcIO());
+
+  bool x = ParameterHandler::read_input (input, filename);
+  if (x)
+    init_branches ();
+  return x;
+}
+
+
+
+void MultipleParameterLoop::loop (MultipleParameterLoop::UserClass &uc)
+{
+  for (unsigned int run_no=0; run_no<n_branches; ++run_no)
+    {
+      // give create_new one-based numbers
+      uc.create_new (run_no+1);
+      fill_entry_values (run_no);
+      uc.run (*this);
+    };
+}
+
+
+
+void MultipleParameterLoop::init_branches ()
+{
+  multiple_choices.clear ();
+  init_branches_current_section ();
+
+  // split up different values
+  for (unsigned int i=0; i<multiple_choices.size(); ++i)
+    multiple_choices[i].split_different_values ();
+
+  // finally calculate number of branches
+  n_branches = 1;
+  for (unsigned int i=0; i<multiple_choices.size(); ++i)
+    if (multiple_choices[i].type == Entry::variant)
+      n_branches *= multiple_choices[i].different_values.size();
+
+  // check whether array entries have the correct
+  // number of entries
+  for (unsigned int i=0; i<multiple_choices.size(); ++i)
+    if (multiple_choices[i].type == Entry::array)
+      if (multiple_choices[i].different_values.size() != n_branches)
+        std::cerr << "    The entry value" << std::endl
+                  << "        " << multiple_choices[i].entry_value << std::endl
+                  << "    for the entry named" << std::endl
+                  << "        " << multiple_choices[i].entry_name << std::endl
+                  << "    does not have the right number of entries for the " << std::endl
+                  << "        " << n_branches << " variant runs that will be performed."
+                  << std::endl;
+
+
+  // do a first run on filling the values to
+  // check for the conformance with the regexp
+  // (later on, this will be lost in the whole
+  // other output)
+  for (unsigned int i=0; i<n_branches; ++i)
+    fill_entry_values (i);
+}
+
+
+
+void MultipleParameterLoop::init_branches_current_section ()
+{
+  const boost::property_tree::ptree &current_section
+    = entries->get_child (get_current_path());
+
+  // check all entries in the present
+  // subsection whether they are
+  // multiple entries
+  //
+  // we loop over entries in sorted
+  // order to guarantee backward
+  // compatibility to an earlier
+  // implementation
+  for (boost::property_tree::ptree::const_assoc_iterator
+       p = current_section.ordered_begin();
+       p != current_section.not_found(); ++p)
+    if (is_parameter_node (p->second) == true)
+      {
+        const std::string value = p->second.get<std::string>("value");
+        if (value.find('{') != std::string::npos)
+          multiple_choices.push_back (Entry(subsection_path,
+                                            demangle(p->first),
+                                            value));
+      }
+
+  // then loop over all subsections
+  for (boost::property_tree::ptree::const_iterator
+       p = current_section.begin();
+       p != current_section.end(); ++p)
+    if (is_parameter_node (p->second) == false)
+      {
+        enter_subsection (demangle(p->first));
+        init_branches_current_section ();
+        leave_subsection ();
+      }
+}
+
+
+
+
+void MultipleParameterLoop::fill_entry_values (const unsigned int run_no)
+{
+  unsigned int possibilities = 1;
+
+  std::vector<Entry>::iterator choice;
+  for (choice = multiple_choices.begin();
+       choice != multiple_choices.end();
+       ++choice)
+    {
+      const unsigned int selection
+        = (run_no/possibilities) % choice->different_values.size();
+      std::string entry_value;
+      if (choice->type == Entry::variant)
+        entry_value = choice->different_values[selection];
+      else
+        {
+          if (run_no>=choice->different_values.size())
+            {
+              std::cerr << "The given array for entry <"
+                        << choice->entry_name
+                        << "> does not contain enough elements! Taking empty string instead."
+                        << std::endl;
+              entry_value = "";
+            }
+          else
+            entry_value = choice->different_values[run_no];
+        }
+
+      // temporarily enter the
+      // subsection tree of this
+      // multiple entry, set the
+      // value, and get out
+      // again. the set() operation
+      // also tests for the
+      // correctness of the value
+      // with regard to the pattern
+      subsection_path.swap (choice->subsection_path);
+      set (choice->entry_name, entry_value);
+      subsection_path.swap (choice->subsection_path);
+
+      // move ahead if it was a variant entry
+      if (choice->type == Entry::variant)
+        possibilities *= choice->different_values.size();
+    }
+}
+
+
+
+
+std::size_t
+MultipleParameterLoop::memory_consumption () const
+{
+  std::size_t mem = ParameterHandler::memory_consumption ();
+  for (unsigned int i=0; i<multiple_choices.size(); ++i)
+    mem += multiple_choices[i].memory_consumption ();
+
+  return mem;
+}
+
+
+
+MultipleParameterLoop::Entry::Entry (const std::vector<std::string> &ssp,
+                                     const std::string              &Name,
+                                     const std::string              &Value)
+  :
+  subsection_path (ssp), entry_name(Name), entry_value(Value), type (Entry::array)
+{}
+
+
+
+void MultipleParameterLoop::Entry::split_different_values ()
+{
+  // split string into three parts:
+  // part before the opening "{",
+  // the selection itself, final
+  // part after "}"
+  std::string prefix  (entry_value, 0, entry_value.find('{'));
+  std::string multiple(entry_value, entry_value.find('{')+1,
+                       entry_value.rfind('}')-entry_value.find('{')-1);
+  std::string postfix (entry_value, entry_value.rfind('}')+1, std::string::npos);
+  // if array entry {{..}}: delete inner
+  // pair of braces
+  if (multiple[0]=='{')
+    multiple.erase (0,1);
+  if (multiple[multiple.size()-1] == '}')
+    multiple.erase (multiple.size()-1, 1);
+  // erase leading and trailing spaces
+  // in multiple
+  while (std::isspace (multiple[0])) multiple.erase (0,1);
+  while (std::isspace (multiple[multiple.size()-1])) multiple.erase (multiple.size()-1,1);
+
+  // delete spaces around '|'
+  while (multiple.find(" |") != std::string::npos)
+    multiple.replace (multiple.find(" |"), 2, "|");
+  while (multiple.find("| ") != std::string::npos)
+    multiple.replace (multiple.find("| "), 2, "|");
+
+  while (multiple.find('|') != std::string::npos)
+    {
+      different_values.push_back (prefix +
+                                  std::string(multiple, 0, multiple.find('|'))+
+                                  postfix);
+      multiple.erase (0, multiple.find('|')+1);
+    };
+  // make up the last selection ("while" broke
+  // because there was no '|' any more
+  different_values.push_back (prefix+multiple+postfix);
+  // finally check whether this was a variant
+  // entry ({...}) or an array ({{...}})
+  if ((entry_value.find("{{") != std::string::npos) &&
+      (entry_value.find("}}") != std::string::npos))
+    type = Entry::array;
+  else
+    type = Entry::variant;
+}
+
+
+std::size_t
+MultipleParameterLoop::Entry::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (subsection_path) +
+          MemoryConsumption::memory_consumption (entry_name) +
+          MemoryConsumption::memory_consumption (entry_value) +
+          MemoryConsumption::memory_consumption (different_values) +
+          sizeof (type));
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/parsed_function.cc b/source/base/parsed_function.cc
new file mode 100644
index 0000000..bebba8e
--- /dev/null
+++ b/source/base/parsed_function.cc
@@ -0,0 +1,185 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/parsed_function.h>
+#include <deal.II/base/utilities.h>
+
+#include <cstdio>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Functions
+{
+  template <int dim>
+  ParsedFunction<dim>::ParsedFunction (const unsigned int n_components, const double h)
+    :
+    AutoDerivativeFunction<dim>(h, n_components),
+    function_object(n_components)
+  {}
+
+
+
+  template <int dim>
+  void
+  ParsedFunction<dim>::declare_parameters(ParameterHandler  &prm,
+                                          const unsigned int n_components)
+  {
+    Assert(n_components > 0, ExcZero());
+
+    std::string vnames;
+    switch (dim)
+      {
+      case 1:
+        vnames = "x,t";
+        break;
+      case 2:
+        vnames = "x,y,t";
+        break;
+      case 3:
+        vnames = "x,y,z,t";
+        break;
+      default:
+        AssertThrow(false, ExcNotImplemented());
+        break;
+      }
+    prm.declare_entry("Variable names", vnames, Patterns::Anything(),
+                      "The name of the variables as they will be used in the "
+                      "function, separated by commas. By default, the names of variables "
+                      "at which the function will be evaluated is `x' (in 1d), `x,y' (in 2d) or "
+                      "`x,y,z' (in 3d) for spatial coordinates and `t' for time. You can then "
+                      "use these variable names in your function expression and they will be "
+                      "replaced by the values of these variables at which the function is "
+                      "currently evaluated. However, you can also choose a different set "
+                      "of names for the independent variables at which to evaluate your function "
+                      "expression. For example, if you work in spherical coordinates, you may "
+                      "wish to set this input parameter to `r,phi,theta,t' and then use these "
+                      "variable names in your function expression.");
+
+    // The expression of the function
+    std::string expr = "0";
+    for (unsigned int i=1; i<n_components; ++i)
+      expr += "; 0";
+
+    prm.declare_entry("Function expression", expr, Patterns::Anything(),
+                      "The formula that denotes the function you want to evaluate for "
+                      "particular values of the independent variables. This expression "
+                      "may contain any of the usual operations such as addition or "
+                      "multiplication, as well as all of the common functions such as "
+                      "`sin' or `cos'. In addition, it may contain expressions like "
+                      "`if(x>0, 1, -1)' where the expression evaluates to the second "
+                      "argument if the first argument is true, and to the third argument "
+                      "otherwise. For a full overview of possible expressions accepted "
+                      "see the documentation of the muparser library at http://muparser.beltoforion.de/."
+                      "\n\n"
+                      "If the function you are describing represents a vector-valued "
+                      "function with multiple components, then separate the expressions "
+                      "for individual components by a semicolon.");
+    prm.declare_entry("Function constants", "", Patterns::Anything(),
+                      "Sometimes it is convenient to use symbolic constants in the "
+                      "expression that describes the function, rather than having to "
+                      "use its numeric value everywhere the constant appears. These "
+                      "values can be defined using this parameter, in the form "
+                      "`var1=value1, var2=value2, ...'."
+                      "\n\n"
+                      "A typical example would be to set this runtime parameter to "
+                      "`pi=3.1415926536' and then use `pi' in the expression of the "
+                      "actual formula. (That said, for convenience this class actually "
+                      "defines both `pi' and `Pi' by default, but you get the idea.)");
+  }
+
+
+
+  template <int dim>
+  void ParsedFunction<dim>::parse_parameters(ParameterHandler &prm)
+  {
+    std::string vnames = prm.get("Variable names");
+    std::string expression = prm.get("Function expression");
+    std::string constants_list = prm.get("Function constants");
+
+    std::vector<std::string> const_list =
+      Utilities::split_string_list(constants_list, ',');
+    std::map<std::string, double> constants;
+    for (unsigned int i = 0; i < const_list.size(); ++i)
+      {
+        std::vector<std::string> this_c =
+          Utilities::split_string_list(const_list[i], '=');
+        AssertThrow(this_c.size() == 2, ExcMessage("Invalid format"));
+        double tmp;
+        AssertThrow( std::sscanf(this_c[1].c_str(), "%lf", &tmp),
+                     ExcMessage("Double number?"));
+        constants[this_c[0]] = tmp;
+      }
+
+    // set pi and Pi as synonyms for the corresponding value. note that
+    // this overrides any value a user may have given
+    constants["pi"] = numbers::PI;
+    constants["Pi"] = numbers::PI;
+
+    const unsigned int nn = (Utilities::split_string_list(vnames)).size();
+    switch (nn)
+      {
+      case dim:
+        // Time independent function
+        function_object.initialize(vnames, expression, constants);
+        break;
+      case dim+1:
+        // Time dependent function
+        function_object.initialize(vnames, expression, constants, true);
+        break;
+      default:
+        AssertThrow(false,
+                    ExcMessage("The list of variables specified is <" + vnames
+                               + "> which is a list of length "
+                               + Utilities::int_to_string(nn)
+                               + " but it has to be a list of length equal to"
+                               + " either dim (for a time-independent function)"
+                               + " or dim+1 (for a time-dependent function)."));
+      }
+  }
+
+
+
+  template <int dim>
+  void ParsedFunction<dim>::vector_value (const Point<dim> &p,
+                                          Vector<double>   &values) const
+  {
+    function_object.vector_value(p, values);
+  }
+
+
+
+  template <int dim>
+  double ParsedFunction<dim>::value (const Point<dim>   &p,
+                                     unsigned int comp) const
+  {
+    return function_object.value(p, comp);
+  }
+
+
+
+  template <int dim>
+  void ParsedFunction<dim>::set_time (const double newtime)
+  {
+    function_object.set_time(newtime);
+    AutoDerivativeFunction<dim>::set_time(newtime);
+  }
+
+
+// Explicit instantiations
+  template class ParsedFunction<1>;
+  template class ParsedFunction<2>;
+  template class ParsedFunction<3>;
+}
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/partitioner.cc b/source/base/partitioner.cc
new file mode 100644
index 0000000..3e44d8b
--- /dev/null
+++ b/source/base/partitioner.cc
@@ -0,0 +1,394 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/partitioner.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Utilities
+{
+  namespace MPI
+  {
+    Partitioner::Partitioner ()
+      :
+      global_size (0),
+      local_range_data (std::pair<types::global_dof_index, types::global_dof_index> (0, 0)),
+      n_ghost_indices_data (0),
+      n_import_indices_data (0),
+      my_pid (0),
+      n_procs (1),
+      communicator (MPI_COMM_SELF),
+      have_ghost_indices (false)
+    {}
+
+
+
+
+    Partitioner::Partitioner (const unsigned int size)
+      :
+      global_size (size),
+      locally_owned_range_data (size),
+      local_range_data (std::pair<types::global_dof_index, types::global_dof_index> (0, size)),
+      n_ghost_indices_data (0),
+      n_import_indices_data (0),
+      my_pid (0),
+      n_procs (1),
+      communicator (MPI_COMM_SELF),
+      have_ghost_indices (false)
+    {
+      locally_owned_range_data.add_range (0, size);
+      locally_owned_range_data.compress ();
+      ghost_indices_data.set_size (size);
+    }
+
+
+
+    Partitioner::Partitioner (const IndexSet &locally_owned_indices,
+                              const IndexSet &ghost_indices_in,
+                              const MPI_Comm  communicator_in)
+      :
+      global_size (static_cast<types::global_dof_index>(locally_owned_indices.size())),
+      n_ghost_indices_data (0),
+      n_import_indices_data (0),
+      my_pid (0),
+      n_procs (1),
+      communicator (communicator_in),
+      have_ghost_indices (false)
+    {
+      set_owned_indices (locally_owned_indices);
+      set_ghost_indices (ghost_indices_in);
+    }
+
+
+
+    Partitioner::Partitioner (const IndexSet &locally_owned_indices,
+                              const MPI_Comm  communicator_in)
+      :
+      global_size (static_cast<types::global_dof_index>(locally_owned_indices.size())),
+      n_ghost_indices_data (0),
+      n_import_indices_data (0),
+      my_pid (0),
+      n_procs (1),
+      communicator (communicator_in),
+      have_ghost_indices (false)
+    {
+      set_owned_indices (locally_owned_indices);
+    }
+
+
+
+    void
+    Partitioner::set_owned_indices (const IndexSet &locally_owned_indices)
+    {
+      if (Utilities::System::job_supports_mpi() == true)
+        {
+          my_pid = Utilities::MPI::this_mpi_process(communicator);
+          n_procs = Utilities::MPI::n_mpi_processes(communicator);
+        }
+      else
+        {
+          my_pid = 0;
+          n_procs = 1;
+        }
+
+      // set the local range
+      Assert (locally_owned_indices.is_contiguous() == true,
+              ExcMessage ("The index set specified in locally_owned_indices "
+                          "is not contiguous."));
+      locally_owned_indices.compress();
+      if (locally_owned_indices.n_elements()>0)
+        local_range_data = std::pair<types::global_dof_index, types::global_dof_index>
+                           (locally_owned_indices.nth_index_in_set(0),
+                            locally_owned_indices.nth_index_in_set(0) +
+                            locally_owned_indices.n_elements());
+      AssertThrow (local_range_data.second-local_range_data.first <
+                   static_cast<types::global_dof_index>(std::numeric_limits<unsigned int>::max()),
+                   ExcMessage("Index overflow: This class supports at most 2^32-1 locally owned vector entries"));
+      locally_owned_range_data.set_size (locally_owned_indices.size());
+      locally_owned_range_data.add_range (local_range_data.first, local_range_data.second);
+      locally_owned_range_data.compress();
+
+      ghost_indices_data.set_size (locally_owned_indices.size());
+    }
+
+
+
+    void
+    Partitioner::set_ghost_indices (const IndexSet &ghost_indices_in)
+    {
+      // Set ghost indices from input. To be sure that no entries from the
+      // locally owned range are present, subtract the locally owned indices
+      // in any case.
+      Assert (ghost_indices_in.n_elements() == 0 ||
+              ghost_indices_in.size() == locally_owned_range_data.size(),
+              ExcDimensionMismatch (ghost_indices_in.size(),
+                                    locally_owned_range_data.size()));
+
+      ghost_indices_data = ghost_indices_in;
+      if (ghost_indices_data.size() != locally_owned_range_data.size())
+        ghost_indices_data.set_size(locally_owned_range_data.size());
+      ghost_indices_data.subtract_set (locally_owned_range_data);
+      ghost_indices_data.compress();
+      AssertThrow (ghost_indices_data.n_elements() <
+                   static_cast<types::global_dof_index>(std::numeric_limits<unsigned int>::max()),
+                   ExcMessage("Index overflow: This class supports at most 2^32-1 ghost elements"));
+      n_ghost_indices_data = ghost_indices_data.n_elements();
+
+      have_ghost_indices =
+        Utilities::MPI::sum(n_ghost_indices_data, communicator) > 0;
+
+      // In the rest of this function, we determine the point-to-point
+      // communication pattern of the partitioner. We make up a list with both
+      // the processors the ghost indices actually belong to, and the indices
+      // that are locally held but ghost indices of other processors. This
+      // allows then to import and export data very easily.
+
+      // find out the end index for each processor and communicate it (this
+      // implies the start index for the next processor)
+#ifdef DEAL_II_WITH_MPI
+      if (n_procs < 2)
+        {
+          Assert (ghost_indices_data.n_elements() == 0, ExcInternalError());
+          Assert (n_import_indices_data == 0, ExcInternalError());
+          Assert (n_ghost_indices_data  == 0, ExcInternalError());
+          return;
+        }
+
+      std::vector<types::global_dof_index> first_index (n_procs+1);
+      // Allow non-zero start index for the vector. send this data to all
+      // processors
+      first_index[0] = local_range_data.first;
+      MPI_Bcast(&first_index[0], 1, DEAL_II_DOF_INDEX_MPI_TYPE,
+                0, communicator);
+
+      // Get the end-of-local_range for all processors
+      MPI_Allgather(&local_range_data.second, 1,
+                    DEAL_II_DOF_INDEX_MPI_TYPE, &first_index[1], 1,
+                    DEAL_II_DOF_INDEX_MPI_TYPE, communicator);
+      first_index[n_procs] = global_size;
+
+      // fix case when there are some processors without any locally owned
+      // indices: then there might be a zero in some entries
+      if (global_size > 0)
+        {
+          unsigned int first_proc_with_nonzero_dofs = 0;
+          for (unsigned int i=0; i<n_procs; ++i)
+            if (first_index[i+1]>0)
+              {
+                first_proc_with_nonzero_dofs = i;
+                break;
+              }
+          for (unsigned int i=first_proc_with_nonzero_dofs+1; i<n_procs; ++i)
+            if (first_index[i] == 0)
+              first_index[i] = first_index[i-1];
+
+          // correct if our processor has a wrong local range
+          if (first_index[my_pid] != local_range_data.first)
+            {
+              Assert(local_range_data.first == local_range_data.second,
+                     ExcInternalError());
+              local_range_data.first = local_range_data.second = first_index[my_pid];
+            }
+        }
+
+      // Allocate memory for data that will be exported
+      std::vector<types::global_dof_index> expanded_ghost_indices (n_ghost_indices_data);
+      unsigned int n_ghost_targets = 0;
+      if (n_ghost_indices_data > 0)
+        {
+          // Create first a vector of ghost_targets from the list of ghost
+          // indices and then push back new values. When we are done, copy the
+          // data to that field of the partitioner. This way, the variable
+          // ghost_targets will have exactly the size we need, whereas the
+          // vector filled with push_back might actually be too long.
+          unsigned int current_proc = 0;
+          ghost_indices_data.fill_index_vector (expanded_ghost_indices);
+          types::global_dof_index current_index = expanded_ghost_indices[0];
+          while (current_index >= first_index[current_proc+1])
+            current_proc++;
+          std::vector<std::pair<unsigned int, unsigned int> > ghost_targets_temp
+          (1, std::pair<unsigned int, unsigned int>(current_proc, 0));
+          n_ghost_targets++;
+
+          for (unsigned int iterator=1; iterator<n_ghost_indices_data; ++iterator)
+            {
+              current_index = expanded_ghost_indices[iterator];
+              while (current_index >= first_index[current_proc+1])
+                current_proc++;
+              AssertIndexRange (current_proc, n_procs);
+              if ( ghost_targets_temp[n_ghost_targets-1].first < current_proc)
+                {
+                  ghost_targets_temp[n_ghost_targets-1].second =
+                    iterator - ghost_targets_temp[n_ghost_targets-1].second;
+                  ghost_targets_temp.push_back(std::pair<unsigned int,
+                                               unsigned int>(current_proc,iterator));
+                  n_ghost_targets++;
+                }
+            }
+          ghost_targets_temp[n_ghost_targets-1].second =
+            n_ghost_indices_data - ghost_targets_temp[n_ghost_targets-1].second;
+          ghost_targets_data = ghost_targets_temp;
+        }
+      // find the processes that want to import to me
+      {
+        std::vector<int> send_buffer (n_procs, 0);
+        std::vector<int> receive_buffer (n_procs, 0);
+        for (unsigned int i=0; i<n_ghost_targets; i++)
+          send_buffer[ghost_targets_data[i].first] = ghost_targets_data[i].second;
+
+        MPI_Alltoall (&send_buffer[0], 1, MPI_INT, &receive_buffer[0], 1,
+                      MPI_INT, communicator);
+
+        // allocate memory for import data
+        std::vector<std::pair<unsigned int,unsigned int> > import_targets_temp;
+        n_import_indices_data = 0;
+        for (unsigned int i=0; i<n_procs; i++)
+          if (receive_buffer[i] > 0)
+            {
+              n_import_indices_data += receive_buffer[i];
+              import_targets_temp.push_back(std::pair<unsigned int,
+                                            unsigned int> (i, receive_buffer[i]));
+            }
+        import_targets_data = import_targets_temp;
+      }
+
+      // send and receive indices for import data. non-blocking receives and
+      // blocking sends
+      std::vector<types::global_dof_index> expanded_import_indices (n_import_indices_data);
+      {
+        unsigned int current_index_start = 0;
+        std::vector<MPI_Request> import_requests (import_targets_data.size());
+        for (unsigned int i=0; i<import_targets_data.size(); i++)
+          {
+            MPI_Irecv (&expanded_import_indices[current_index_start],
+                       import_targets_data[i].second,
+                       DEAL_II_DOF_INDEX_MPI_TYPE,
+                       import_targets_data[i].first, import_targets_data[i].first,
+                       communicator, &import_requests[i]);
+            current_index_start += import_targets_data[i].second;
+          }
+        AssertDimension (current_index_start, n_import_indices_data);
+
+        // use blocking send
+        current_index_start = 0;
+        for (unsigned int i=0; i<n_ghost_targets; i++)
+          {
+            MPI_Send (&expanded_ghost_indices[current_index_start],
+                      ghost_targets_data[i].second, DEAL_II_DOF_INDEX_MPI_TYPE,
+                      ghost_targets_data[i].first, my_pid,
+                      communicator);
+            current_index_start += ghost_targets_data[i].second;
+          }
+        AssertDimension (current_index_start, n_ghost_indices_data);
+
+        if (import_requests.size()>0)
+          MPI_Waitall (import_requests.size(), &import_requests[0],
+                       MPI_STATUSES_IGNORE);
+
+        // transform import indices to local index space and compress
+        // contiguous indices in form of ranges
+        {
+          types::global_dof_index last_index = numbers::invalid_dof_index-1;
+          std::vector<std::pair<unsigned int,unsigned int> > compressed_import_indices;
+          for (unsigned int i=0; i<n_import_indices_data; i++)
+            {
+              Assert (expanded_import_indices[i] >= local_range_data.first &&
+                      expanded_import_indices[i] < local_range_data.second,
+                      ExcIndexRange(expanded_import_indices[i], local_range_data.first,
+                                    local_range_data.second));
+              types::global_dof_index new_index = (expanded_import_indices[i] -
+                                                   local_range_data.first);
+              Assert(new_index<numbers::invalid_unsigned_int,
+                     ExcNotImplemented());
+              if (new_index == last_index+1)
+                compressed_import_indices.back().second++;
+              else
+                {
+                  compressed_import_indices.push_back
+                  (std::pair<unsigned int,unsigned int>(new_index,new_index+1));
+                }
+              last_index = new_index;
+            }
+          import_indices_data = compressed_import_indices;
+
+          // sanity check
+#ifdef DEBUG
+          const types::global_dof_index n_local_dofs = local_range_data.second-local_range_data.first;
+          for (unsigned int i=0; i<import_indices_data.size(); ++i)
+            {
+              AssertIndexRange (import_indices_data[i].first, n_local_dofs);
+              AssertIndexRange (import_indices_data[i].second-1, n_local_dofs);
+            }
+#endif
+        }
+      }
+#endif
+    }
+
+
+
+    bool
+    Partitioner::is_compatible (const Partitioner &part) const
+    {
+      // if the partitioner points to the same memory location as the calling
+      // processor
+      if (&part == this)
+        return true;
+#ifdef DEAL_II_WITH_MPI
+      if (Utilities::MPI::job_supports_mpi())
+        {
+          int communicators_same = 0;
+          MPI_Comm_compare (part.communicator, communicator,
+                            &communicators_same);
+          if (!(communicators_same == MPI_IDENT ||
+                communicators_same == MPI_CONGRUENT))
+            return false;
+        }
+#endif
+      return (global_size == part.global_size &&
+              local_range_data == part.local_range_data &&
+              ghost_indices_data == part.ghost_indices_data);
+    }
+
+
+
+    bool
+    Partitioner::is_globally_compatible (const Partitioner &part) const
+    {
+      return Utilities::MPI::min(static_cast<int>(is_compatible(part)),
+                                 communicator) == 1;
+    }
+
+
+
+    std::size_t
+    Partitioner::memory_consumption() const
+    {
+      std::size_t memory = (3*sizeof(types::global_dof_index)+4*sizeof(unsigned int)+
+                            sizeof(MPI_Comm));
+      memory += MemoryConsumption::memory_consumption(locally_owned_range_data);
+      memory += MemoryConsumption::memory_consumption(ghost_targets_data);
+      memory += MemoryConsumption::memory_consumption(import_targets_data);
+      memory += MemoryConsumption::memory_consumption(import_indices_data);
+      memory += MemoryConsumption::memory_consumption(ghost_indices_data);
+      return memory;
+    }
+
+  } // end of namespace MPI
+
+} // end of namespace Utilities
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/path_search.cc b/source/base/path_search.cc
new file mode 100644
index 0000000..91745af
--- /dev/null
+++ b/source/base/path_search.cc
@@ -0,0 +1,256 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/path_search.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+
+#include <iostream>
+#include <cstdio>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+std::map<std::string, std::vector<std::string> > PathSearch::path_lists;
+std::map<std::string, std::vector<std::string> > PathSearch::suffix_lists;
+std::string PathSearch::empty("");
+
+void
+PathSearch::initialize_classes()
+{
+  std::vector<std::string> v;
+  v.push_back(empty);
+  path_lists.insert(map_type(std::string("PARAMETER"), v));
+
+  /*
+   * TODO: reenable some sensible default paths. Maier, 2012
+   */
+  path_lists.insert(map_type(std::string("MESH"), v));
+
+  v.clear();
+  v.push_back(empty);
+  v.push_back(std::string(".prm"));
+  suffix_lists.insert(map_type(std::string("PARAMETER"), v));
+
+  /*
+   * TODO: "Would require linking with the deal.II libraries"? This .cc
+   * file gets compiled into the library... maier, 2012
+   */
+  // We cannot use the GridIn class
+  // to query the formats, since this
+  // would require linking with the
+  // deal.II libraries.
+  v.clear();
+  v.push_back(empty);
+  v.push_back(std::string(".inp"));
+  v.push_back(std::string(".xda"));
+  v.push_back(std::string(".dbmesh"));
+  v.push_back(std::string(".dat"));
+  v.push_back(std::string(".plt"));
+  v.push_back(std::string(".nc"));
+  v.push_back(std::string(".msh"));
+  suffix_lists.insert(map_type(std::string("MESH"), v));
+}
+
+std::vector<std::string> &
+PathSearch::get_path_list(const std::string &cls)
+{
+  if (path_lists.empty())
+    initialize_classes();
+
+  // Modified by Luca Heltai. If a class is not there, add it
+  if (path_lists.count(cls) == 0) add_class(cls);
+
+  // Assert(path_lists.count(cls) != 0, ExcNoClass(cls));
+  Assert(path_lists.count(cls) != 0, ExcInternalError());
+
+  return path_lists.find(cls)->second;
+}
+
+
+std::vector<std::string> &
+PathSearch::get_suffix_list(const std::string &cls)
+{
+  // This is redundant. The constructor should have already called the
+  // add_path function with the path_list bit...
+
+  // Modified by Luca Heltai. If a class is not there, add it
+  if (suffix_lists.count(cls) == 0) add_class(cls);
+
+  // Assert(suffix_lists.count(cls) != 0, ExcNoClass(cls));
+  Assert(suffix_lists.count(cls) != 0, ExcInternalError());
+
+  return suffix_lists.find(cls)->second;
+}
+
+
+PathSearch::PathSearch(const std::string &cls,
+                       const unsigned int debug)
+  :
+  cls(cls),
+  my_path_list(get_path_list(cls)),
+  my_suffix_list(get_suffix_list(cls)),
+  debug(debug)
+{}
+
+
+std::string
+PathSearch::find (const std::string &filename,
+                  const std::string &suffix,
+                  const char *open_mode)
+{
+  std::vector<std::string>::const_iterator path;
+  const std::vector<std::string>::const_iterator endp = my_path_list.end();
+
+  std::string real_name;
+
+  if (debug > 2)
+    deallog << "PathSearch[" << cls << "] "
+            << my_path_list.size() << " directories "
+            << std::endl;
+
+  // Try to open file in the various directories we have
+  for (path = my_path_list.begin(); path != endp; ++path)
+    {
+      // see if the file exists as given, i.e., with
+      // the whole filename specified, including (possibly)
+      // the suffix
+      {
+        real_name = *path + filename;
+        if (debug > 1)
+          deallog << "PathSearch[" << cls << "] trying "
+                  << real_name << std::endl;
+        FILE *fp = fopen(real_name.c_str(), open_mode);
+        if (fp != 0)
+          {
+            if (debug > 0)
+              deallog << "PathSearch[" << cls << "] opened "
+                      << real_name << std::endl;
+            fclose(fp);
+            return real_name;
+          }
+      }
+
+      // try again with the suffix appended, unless there is
+      // no suffix
+      if (suffix != "")
+        {
+          real_name = *path + filename + suffix;
+          if (debug > 1)
+            deallog << "PathSearch[" << cls << "] trying "
+                    << real_name << std::endl;
+          FILE *fp = fopen(real_name.c_str(), open_mode);
+          if (fp != 0)
+            {
+              if (debug > 0)
+                deallog << "PathSearch[" << cls << "] opened "
+                        << real_name << std::endl;
+              fclose(fp);
+              return real_name;
+            }
+        }
+    }
+  AssertThrow(false, ExcFileNotFound(filename, cls));
+  return std::string("");
+}
+
+std::string
+PathSearch::find (const std::string &filename,
+                  const char *open_mode)
+{
+  std::vector<std::string>::const_iterator suffix;
+  const std::vector<std::string>::const_iterator ends = my_suffix_list.end();
+
+  if (debug > 2)
+    deallog << "PathSearch[" << cls << "] "
+            << my_path_list.size() << " directories "
+            << my_suffix_list.size() << " suffixes"
+            << std::endl;
+
+  for (suffix = my_suffix_list.begin(); suffix != ends; ++suffix)
+    {
+      try
+        {
+          return find(filename, *suffix, open_mode);
+        }
+      catch (ExcFileNotFound)
+        {
+          continue;
+        }
+
+    }
+  AssertThrow(false, ExcFileNotFound(filename, cls));
+  return std::string("");
+}
+
+
+void
+PathSearch::add_class (const std::string &cls)
+{
+  // Make sure standard classes are
+  // initialized first
+  if (path_lists.empty())
+    initialize_classes();
+  // Add empty path and empty suffix
+  // for new class
+  std::vector<std::string> v;
+  v.push_back(empty);
+  path_lists.insert(map_type(cls, v));
+  suffix_lists.insert(map_type(cls, v));
+}
+
+
+void
+PathSearch::add_path (const std::string &path,
+                      Position pos)
+{
+  if (pos == back)
+    my_path_list.push_back(path);
+  else if (pos == front)
+    my_path_list.insert(my_path_list.begin(), path);
+  else if (pos == after_none)
+    {
+      std::vector<std::string>::iterator
+      i = std::find(my_path_list.begin(), my_path_list.end(), empty);
+      if (i != my_path_list.end())
+        ++i;
+      my_path_list.insert(i, path);
+    }
+}
+
+
+void
+PathSearch::add_suffix (const std::string &suffix,
+                        Position pos)
+{
+  if (pos == back)
+    my_suffix_list.push_back(suffix);
+  else if (pos == front)
+    my_suffix_list.insert(my_suffix_list.begin(), suffix);
+  else if (pos == after_none)
+    {
+      std::vector<std::string>::iterator
+      i = std::find(my_suffix_list.begin(), my_suffix_list.end(), empty);
+      if (i != my_suffix_list.end())
+        ++i;
+      my_suffix_list.insert(i, suffix);
+    }
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomial.cc b/source/base/polynomial.cc
new file mode 100644
index 0000000..cdea89d
--- /dev/null
+++ b/source/base/polynomial.cc
@@ -0,0 +1,1375 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/thread_management.h>
+
+#include <cmath>
+#include <algorithm>
+#include <limits>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+// have a lock that guarantees that at most one thread is changing and
+// accessing the @p{coefficients} arrays of classes implementing
+// polynomials with tables. make this lock local to this file.
+//
+// having only one lock for all of these classes is probably not going
+// to be a problem since we only need it on very rare occasions. if
+// someone finds this is a bottleneck, feel free to replace it by a
+// more fine-grained solution
+namespace
+{
+  Threads::Mutex coefficients_lock;
+}
+
+
+
+namespace Polynomials
+{
+
+// -------------------- class Polynomial ---------------- //
+
+
+  template <typename number>
+  Polynomial<number>::Polynomial (const std::vector<number> &a)
+    :
+    coefficients             (a),
+    in_lagrange_product_form (false),
+    lagrange_weight          (1.)
+  {}
+
+
+
+  template <typename number>
+  Polynomial<number>::Polynomial (const unsigned int n)
+    :
+    coefficients             (n+1, 0.),
+    in_lagrange_product_form (false),
+    lagrange_weight          (1.)
+  {}
+
+
+
+  template <typename number>
+  Polynomial<number>::Polynomial (const std::vector<Point<1> > &supp,
+                                  const unsigned int            center)
+    :
+    in_lagrange_product_form (true)
+  {
+    Assert (supp.size()>0, ExcEmptyObject());
+    AssertIndexRange (center, supp.size());
+
+    lagrange_support_points.reserve (supp.size()-1);
+    number tmp_lagrange_weight = 1.;
+    for (unsigned int i=0; i<supp.size(); ++i)
+      if (i!=center)
+        {
+          lagrange_support_points.push_back(supp[i](0));
+          tmp_lagrange_weight *= supp[center](0) - supp[i](0);
+        }
+
+    // check for underflow and overflow
+    Assert (std::fabs(tmp_lagrange_weight) > std::numeric_limits<number>::min(),
+            ExcMessage ("Underflow in computation of Lagrange denominator."));
+    Assert (std::fabs(tmp_lagrange_weight) < std::numeric_limits<number>::max(),
+            ExcMessage ("Overflow in computation of Lagrange denominator."));
+
+    lagrange_weight = static_cast<number>(1.)/tmp_lagrange_weight;
+  }
+
+
+
+  template <typename number>
+  void
+  Polynomial<number>::value (const number         x,
+                             std::vector<number> &values) const
+  {
+    Assert (values.size() > 0, ExcZero());
+    const unsigned int values_size=values.size();
+
+    // evaluate Lagrange polynomial and derivatives
+    if (in_lagrange_product_form == true)
+      {
+        // to compute the value and all derivatives of a polynomial of the
+        // form (x-x_1)*(x-x_2)*...*(x-x_n), expand the derivatives like
+        // automatic differentiation does.
+        const unsigned int n_supp = lagrange_support_points.size();
+        switch (values_size)
+          {
+          default:
+            values[0] = 1;
+            for (unsigned int d=1; d<values_size; ++d)
+              values[d] = 0;
+            for (unsigned int i=0; i<n_supp; ++i)
+              {
+                const number v = x-lagrange_support_points[i];
+
+                // multiply by (x-x_i) and compute action on all derivatives,
+                // too (inspired from automatic differentiation: implement the
+                // product rule for the old value and the new variable 'v',
+                // i.e., expand value v and derivative one). since we reuse a
+                // value from the next lower derivative from the steps before,
+                // need to start from the highest derivative
+                for (unsigned int k=values_size-1; k>0; --k)
+                  values[k] = (values[k] * v + values[k-1]);
+                values[0] *= v;
+              }
+            // finally, multiply by the weight in the Lagrange
+            // denominator. Could be done instead of setting values[0] = 1
+            // above, but that gives different accumulation of round-off
+            // errors (multiplication is not associative) compared to when we
+            // computed the weight, and hence a basis function might not be
+            // exactly one at the center point, which is nice to have. We also
+            // multiply derivatives by k! to transform the product p_n =
+            // p^(n)(x)/k! into the actual form of the derivative
+            {
+              number k_faculty = 1;
+              for (unsigned int k=0; k<values_size; ++k)
+                {
+                  values[k] *= k_faculty * lagrange_weight;
+                  k_faculty *= static_cast<number>(k+1);
+                }
+            }
+            break;
+
+          // manually implement size 1 (values only), size 2 (value + first
+          // derivative), and size 3 (up to second derivative) since they
+          // might be called often. then, we can unroll the loop.
+          case 1:
+            values[0] = 1;
+            for (unsigned int i=0; i<n_supp; ++i)
+              {
+                const number v = x-lagrange_support_points[i];
+                values[0] *= v;
+              }
+            values[0] *= lagrange_weight;
+            break;
+
+          case 2:
+            values[0] = 1;
+            values[1] = 0;
+            for (unsigned int i=0; i<n_supp; ++i)
+              {
+                const number v = x-lagrange_support_points[i];
+                values[1] = values[1] * v + values[0];
+                values[0] *= v;
+              }
+            values[0] *= lagrange_weight;
+            values[1] *= lagrange_weight;
+            break;
+
+          case 3:
+            values[0] = 1;
+            values[1] = 0;
+            values[2] = 0;
+            for (unsigned int i=0; i<n_supp; ++i)
+              {
+                const number v = x-lagrange_support_points[i];
+                values[2] = values[2] * v + values[1];
+                values[1] = values[1] * v + values[0];
+                values[0] *= v;
+              }
+            values[0] *= lagrange_weight;
+            values[1] *= lagrange_weight;
+            values[2] *= static_cast<number>(2) * lagrange_weight;
+            break;
+          }
+        return;
+      }
+
+    Assert (coefficients.size() > 0, ExcEmptyObject());
+
+    // if we only need the value, then call the other function since that is
+    // significantly faster (there is no need to allocate and free memory,
+    // which is really expensive compared to all the other operations!)
+    if (values_size == 1)
+      {
+        values[0] = value(x);
+        return;
+      };
+
+    // if there are derivatives needed, then do it properly by the full Horner
+    // scheme
+    const unsigned int m=coefficients.size();
+    std::vector<number> a(coefficients);
+    unsigned int j_faculty=1;
+
+    // loop over all requested derivatives. note that derivatives @p{j>m} are
+    // necessarily zero, as they differentiate the polynomial more often than
+    // the highest power is
+    const unsigned int min_valuessize_m=std::min(values_size, m);
+    for (unsigned int j=0; j<min_valuessize_m; ++j)
+      {
+        for (int k=m-2; k>=static_cast<int>(j); --k)
+          a[k]+=x*a[k+1];
+        values[j]=static_cast<number>(j_faculty)*a[j];
+
+        j_faculty*=j+1;
+      }
+
+    // fill higher derivatives by zero
+    for (unsigned int j=min_valuessize_m; j<values_size; ++j)
+      values[j] = 0;
+  }
+
+
+
+  template <typename number>
+  void
+  Polynomial<number>::transform_into_standard_form ()
+  {
+    // should only be called when the product form is active
+    Assert (in_lagrange_product_form == true, ExcInternalError());
+    Assert (coefficients.size() == 0, ExcInternalError());
+
+    // compute coefficients by expanding the product (x-x_i) term by term
+    coefficients.resize (lagrange_support_points.size()+1);
+    if (lagrange_support_points.size() == 0)
+      coefficients[0] = 1.;
+    else
+      {
+        coefficients[0] = -lagrange_support_points[0];
+        coefficients[1] = 1.;
+        for (unsigned int i=1; i<lagrange_support_points.size(); ++i)
+          {
+            coefficients[i+1] = 1.;
+            for (unsigned int j=i; j>0; --j)
+              coefficients[j] = (-lagrange_support_points[i]*coefficients[j] +
+                                 coefficients[j-1]);
+            coefficients[0] *= -lagrange_support_points[i];
+          }
+      }
+    for (unsigned int i=0; i<lagrange_support_points.size()+1; ++i)
+      coefficients[i] *= lagrange_weight;
+
+    // delete the product form data
+    std::vector<number> new_points;
+    lagrange_support_points.swap(new_points);
+    in_lagrange_product_form = false;
+    lagrange_weight = 1.;
+  }
+
+
+
+  template <typename number>
+  void
+  Polynomial<number>::scale (std::vector<number> &coefficients,
+                             const number         factor)
+  {
+    number f = 1.;
+    for (typename std::vector<number>::iterator c = coefficients.begin();
+         c != coefficients.end(); ++c)
+      {
+        *c *= f;
+        f *= factor;
+      }
+  }
+
+
+
+  template <typename number>
+  void
+  Polynomial<number>::scale (const number factor)
+  {
+    // to scale (x-x_0)*(x-x_1)*...*(x-x_n), scale
+    // support points by 1./factor and the weight
+    // likewise
+    if (in_lagrange_product_form == true)
+      {
+        number inv_fact = number(1.)/factor;
+        number accumulated_fact = 1.;
+        for (unsigned int i=0; i<lagrange_support_points.size(); ++i)
+          {
+            lagrange_support_points[i] *= inv_fact;
+            accumulated_fact *= factor;
+          }
+        lagrange_weight *= accumulated_fact;
+      }
+    // otherwise, use the function above
+    else
+      scale (coefficients, factor);
+  }
+
+
+
+  template <typename number>
+  void
+  Polynomial<number>::multiply (std::vector<number> &coefficients,
+                                const number         factor)
+  {
+    for (typename std::vector<number>::iterator c = coefficients.begin();
+         c != coefficients.end(); ++c)
+      *c *= factor;
+  }
+
+
+
+  template <typename number>
+  Polynomial<number> &
+  Polynomial<number>::operator *= (const double s)
+  {
+    if (in_lagrange_product_form == true)
+      lagrange_weight *= s;
+    else
+      {
+        for (typename std::vector<number>::iterator c = coefficients.begin();
+             c != coefficients.end(); ++c)
+          *c *= s;
+      }
+    return *this;
+  }
+
+
+
+  template <typename number>
+  Polynomial<number> &
+  Polynomial<number>::operator *= (const Polynomial<number> &p)
+  {
+    // if we are in Lagrange form, just append the
+    // new points
+    if (in_lagrange_product_form == true && p.in_lagrange_product_form == true)
+      {
+        lagrange_weight *= p.lagrange_weight;
+        lagrange_support_points.insert (lagrange_support_points.end(),
+                                        p.lagrange_support_points.begin(),
+                                        p.lagrange_support_points.end());
+      }
+
+    // cannot retain product form, recompute...
+    else if (in_lagrange_product_form == true)
+      transform_into_standard_form();
+
+    // need to transform p into standard form as
+    // well if necessary. copy the polynomial to
+    // do this
+    std_cxx11::shared_ptr<Polynomial<number> > q_data;
+    const Polynomial<number> *q = 0;
+    if (p.in_lagrange_product_form == true)
+      {
+        q_data.reset (new Polynomial<number>(p));
+        q_data->transform_into_standard_form();
+        q = q_data.get();
+      }
+    else
+      q = &p;
+
+    // Degree of the product
+    unsigned int new_degree = this->degree() + q->degree();
+
+    std::vector<number> new_coefficients(new_degree+1, 0.);
+
+    for (unsigned int i=0; i<q->coefficients.size(); ++i)
+      for (unsigned int j=0; j<this->coefficients.size(); ++j)
+        new_coefficients[i+j] += this->coefficients[j]*q->coefficients[i];
+    this->coefficients = new_coefficients;
+
+    return *this;
+  }
+
+
+
+  template <typename number>
+  Polynomial<number> &
+  Polynomial<number>::operator += (const Polynomial<number> &p)
+  {
+    // Lagrange product form cannot reasonably be
+    // retained after polynomial addition. we
+    // could in theory check if either this
+    // polynomial or the other is a zero
+    // polynomial and retain it, but we actually
+    // currently (r23974) assume that the addition
+    // of a zero polynomial changes the state and
+    // tests equivalence.
+    if (in_lagrange_product_form == true)
+      transform_into_standard_form();
+
+    // need to transform p into standard form as
+    // well if necessary. copy the polynomial to
+    // do this
+    std_cxx11::shared_ptr<Polynomial<number> > q_data;
+    const Polynomial<number> *q = 0;
+    if (p.in_lagrange_product_form == true)
+      {
+        q_data.reset (new Polynomial<number>(p));
+        q_data->transform_into_standard_form();
+        q = q_data.get();
+      }
+    else
+      q = &p;
+
+    // if necessary expand the number
+    // of coefficients we store
+    if (q->coefficients.size() > coefficients.size())
+      coefficients.resize (q->coefficients.size(), 0.);
+
+    for (unsigned int i=0; i<q->coefficients.size(); ++i)
+      coefficients[i] += q->coefficients[i];
+
+    return *this;
+  }
+
+
+
+  template <typename number>
+  Polynomial<number> &
+  Polynomial<number>::operator -= (const Polynomial<number> &p)
+  {
+    // Lagrange product form cannot reasonably be
+    // retained after polynomial addition
+    if (in_lagrange_product_form == true)
+      transform_into_standard_form();
+
+    // need to transform p into standard form as
+    // well if necessary. copy the polynomial to
+    // do this
+    std_cxx11::shared_ptr<Polynomial<number> > q_data;
+    const Polynomial<number> *q = 0;
+    if (p.in_lagrange_product_form == true)
+      {
+        q_data.reset (new Polynomial<number>(p));
+        q_data->transform_into_standard_form();
+        q = q_data.get();
+      }
+    else
+      q = &p;
+
+    // if necessary expand the number
+    // of coefficients we store
+    if (q->coefficients.size() > coefficients.size())
+      coefficients.resize (q->coefficients.size(), 0.);
+
+    for (unsigned int i=0; i<q->coefficients.size(); ++i)
+      coefficients[i] -= q->coefficients[i];
+
+    return *this;
+  }
+
+
+
+  template <typename number >
+  bool
+  Polynomial<number>::operator == (const Polynomial<number> &p)  const
+  {
+    // need to distinguish a few cases based on
+    // whether we are in product form or not. two
+    // polynomials can still be the same when they
+    // are on different forms, but the expansion
+    // is the same
+    if (in_lagrange_product_form == true &&
+        p.in_lagrange_product_form == true)
+      return ((lagrange_weight == p.lagrange_weight) &&
+              (lagrange_support_points == p.lagrange_support_points));
+    else if (in_lagrange_product_form == true)
+      {
+        Polynomial<number> q = *this;
+        q.transform_into_standard_form();
+        return (q.coefficients == p.coefficients);
+      }
+    else if (p.in_lagrange_product_form == true)
+      {
+        Polynomial<number> q = p;
+        q.transform_into_standard_form();
+        return (q.coefficients == coefficients);
+      }
+    else
+      return (p.coefficients == coefficients);
+  }
+
+
+
+  template <typename number>
+  template <typename number2>
+  void
+  Polynomial<number>::shift(std::vector<number> &coefficients,
+                            const number2 offset)
+  {
+    // too many coefficients cause overflow in
+    // the binomial coefficient used below
+    Assert (coefficients.size() < 31, ExcNotImplemented());
+
+    // Copy coefficients to a vector of
+    // accuracy given by the argument
+    std::vector<number2> new_coefficients(coefficients.begin(),
+                                          coefficients.end());
+
+    // Traverse all coefficients from
+    // c_1. c_0 will be modified by
+    // higher degrees, only.
+    for (unsigned int d=1; d<new_coefficients.size(); ++d)
+      {
+        const unsigned int n = d;
+        // Binomial coefficients are
+        // needed for the
+        // computation. The rightmost
+        // value is unity.
+        unsigned int binomial_coefficient = 1;
+
+        // Powers of the offset will be
+        // needed and computed
+        // successively.
+        number2 offset_power = offset;
+
+        // Compute (x+offset)^d
+        // and modify all values c_k
+        // with k<d.
+        // The coefficient in front of
+        // x^d is not modified in this step.
+        for (unsigned int k=0; k<d; ++k)
+          {
+            // Recursion from Bronstein
+            // Make sure no remainders
+            // occur in integer
+            // division.
+            binomial_coefficient = (binomial_coefficient*(n-k))/(k+1);
+
+            new_coefficients[d-k-1] += new_coefficients[d]
+                                       * binomial_coefficient
+                                       * offset_power;
+            offset_power *= offset;
+          }
+        // The binomial coefficient
+        // should have gone through a
+        // whole row of Pascal's
+        // triangle.
+        Assert (binomial_coefficient == 1, ExcInternalError());
+      }
+
+    // copy new elements to old vector
+    coefficients.assign(new_coefficients.begin(), new_coefficients.end());
+  }
+
+
+
+  template <typename number>
+  template <typename number2>
+  void
+  Polynomial<number>::shift (const number2 offset)
+  {
+    // shift is simple for a polynomial in product
+    // form, (x-x_0)*(x-x_1)*...*(x-x_n). just add
+    // offset to all shifts
+    if (in_lagrange_product_form == true)
+      {
+        for (unsigned int i=0; i<lagrange_support_points.size(); ++i)
+          lagrange_support_points[i] -= offset;
+      }
+    else
+      // do the shift in any case
+      shift (coefficients, offset);
+  }
+
+
+
+  template <typename number>
+  Polynomial<number>
+  Polynomial<number>::derivative () const
+  {
+    // no simple form possible for Lagrange
+    // polynomial on product form
+    if (degree() == 0)
+      return Monomial<number>(0, 0.);
+
+    std_cxx11::shared_ptr<Polynomial<number> > q_data;
+    const Polynomial<number> *q = 0;
+    if (in_lagrange_product_form == true)
+      {
+        q_data.reset (new Polynomial<number>(*this));
+        q_data->transform_into_standard_form();
+        q = q_data.get();
+      }
+    else
+      q = this;
+
+    std::vector<number> newcoefficients (q->coefficients.size()-1);
+    for (unsigned int i=1 ; i<q->coefficients.size() ; ++i)
+      newcoefficients[i-1] = number(i) * q->coefficients[i];
+
+    return Polynomial<number> (newcoefficients);
+  }
+
+
+
+  template <typename number>
+  Polynomial<number>
+  Polynomial<number>::primitive () const
+  {
+    // no simple form possible for Lagrange
+    // polynomial on product form
+    std_cxx11::shared_ptr<Polynomial<number> > q_data;
+    const Polynomial<number> *q = 0;
+    if (in_lagrange_product_form == true)
+      {
+        q_data.reset (new Polynomial<number>(*this));
+        q_data->transform_into_standard_form();
+        q = q_data.get();
+      }
+    else
+      q = this;
+
+    std::vector<number> newcoefficients (q->coefficients.size()+1);
+    newcoefficients[0] = 0.;
+    for (unsigned int i=0 ; i<q->coefficients.size() ; ++i)
+      newcoefficients[i+1] = q->coefficients[i]/number(i+1.);
+
+    return Polynomial<number> (newcoefficients);
+  }
+
+
+
+  template <typename number>
+  void
+  Polynomial<number>::print (std::ostream &out) const
+  {
+    if (in_lagrange_product_form == true)
+      {
+        out << lagrange_weight;
+        for (unsigned int i=0; i<lagrange_support_points.size(); ++i)
+          out << " (x-" << lagrange_support_points[i] << ")";
+        out << std::endl;
+      }
+    else
+      for (int i=degree(); i>=0; --i)
+        {
+          out << coefficients[i] << " x^" << i << std::endl;
+        }
+  }
+
+
+
+// ------------------ class Monomial -------------------------- //
+
+  template <typename number>
+  std::vector<number>
+  Monomial<number>::make_vector(unsigned int n,
+                                double coefficient)
+  {
+    std::vector<number> result(n+1, 0.);
+    result[n] = coefficient;
+    return result;
+  }
+
+
+
+  template <typename number>
+  Monomial<number>::Monomial (unsigned int n,
+                              double coefficient)
+    : Polynomial<number>(make_vector(n, coefficient))
+  {}
+
+
+
+  template <typename number>
+  std::vector<Polynomial<number> >
+  Monomial<number>::generate_complete_basis (const unsigned int degree)
+  {
+    std::vector<Polynomial<number> > v;
+    v.reserve(degree+1);
+    for (unsigned int i=0; i<=degree; ++i)
+      v.push_back (Monomial<number>(i));
+    return v;
+  }
+
+
+
+// ------------------ class LagrangeEquidistant --------------- //
+
+  namespace internal
+  {
+    namespace LagrangeEquidistant
+    {
+      std::vector<Point<1> >
+      generate_equidistant_unit_points (const unsigned int n)
+      {
+        std::vector<Point<1> > points (n+1);
+        const double one_over_n = 1./n;
+        for (unsigned int k=0; k<=n; ++k)
+          points[k](0) = static_cast<double>(k)*one_over_n;
+        return points;
+      }
+    }
+  }
+
+
+
+  LagrangeEquidistant::LagrangeEquidistant (const unsigned int n,
+                                            const unsigned int support_point)
+    :
+    Polynomial<double> (internal::LagrangeEquidistant::
+                        generate_equidistant_unit_points (n),
+                        support_point)
+  {
+    Assert (coefficients.size() == 0, ExcInternalError());
+
+    // For polynomial order up to 3, we have precomputed weights. Use these
+    // weights instead of the product form
+    if (n <= 3)
+      {
+        this->in_lagrange_product_form = false;
+        this->lagrange_weight = 1.;
+        std::vector<double> new_support_points;
+        this->lagrange_support_points.swap(new_support_points);
+        this->coefficients.resize (n+1);
+        compute_coefficients(n, support_point, this->coefficients);
+      }
+  }
+
+
+
+  void
+  LagrangeEquidistant::compute_coefficients (const unsigned int n,
+                                             const unsigned int support_point,
+                                             std::vector<double> &a)
+  {
+    Assert(support_point<n+1, ExcIndexRange(support_point, 0, n+1));
+
+    unsigned int n_functions=n+1;
+    Assert(support_point<n_functions,
+           ExcIndexRange(support_point, 0, n_functions));
+    double const *x=0;
+
+    switch (n)
+      {
+      case 1:
+      {
+        static const double x1[4]=
+        {
+          1.0, -1.0,
+          0.0, 1.0
+        };
+        x=&x1[0];
+        break;
+      }
+      case 2:
+      {
+        static const double x2[9]=
+        {
+          1.0, -3.0, 2.0,
+          0.0, 4.0, -4.0,
+          0.0, -1.0, 2.0
+        };
+        x=&x2[0];
+        break;
+      }
+      case 3:
+      {
+        static const double x3[16]=
+        {
+          1.0, -11.0/2.0, 9.0, -9.0/2.0,
+          0.0, 9.0, -45.0/2.0, 27.0/2.0,
+          0.0, -9.0/2.0, 18.0, -27.0/2.0,
+          0.0, 1.0, -9.0/2.0, 9.0/2.0
+        };
+        x=&x3[0];
+        break;
+      }
+      default:
+        Assert(false, ExcInternalError())
+      }
+
+    Assert(x!=0, ExcInternalError());
+    for (unsigned int i=0; i<n_functions; ++i)
+      a[i]=x[support_point*n_functions+i];
+  }
+
+
+
+  std::vector<Polynomial<double> >
+  LagrangeEquidistant::
+  generate_complete_basis (const unsigned int degree)
+  {
+    if (degree==0)
+      // create constant polynomial
+      return std::vector<Polynomial<double> >
+             (1, Polynomial<double> (std::vector<double> (1,1.)));
+    else
+      {
+        // create array of Lagrange
+        // polynomials
+        std::vector<Polynomial<double> > v;
+        for (unsigned int i=0; i<=degree; ++i)
+          v.push_back(LagrangeEquidistant(degree,i));
+        return v;
+      }
+  }
+
+
+
+//----------------------------------------------------------------------//
+
+
+  std::vector<Polynomial<double> >
+  generate_complete_Lagrange_basis (const std::vector<Point<1> > &points)
+  {
+    std::vector<Polynomial<double> > p;
+    p.reserve (points.size());
+
+    for (unsigned int i=0; i<points.size(); ++i)
+      p.push_back (Polynomial<double> (points, i));
+    return p;
+  }
+
+
+
+// ------------------ class Legendre --------------- //
+
+
+// Reserve space for polynomials up to degree 19. Should be sufficient
+// for the start.
+  std::vector<std_cxx11::shared_ptr<const std::vector<double> > >
+  Legendre::recursive_coefficients(20);
+  std::vector<std_cxx11::shared_ptr<const std::vector<double> > >
+  Legendre::shifted_coefficients(20);
+
+
+  Legendre::Legendre (const unsigned int k)
+    :
+    Polynomial<double> (get_coefficients(k))
+  {}
+
+
+
+  void
+  Legendre::compute_coefficients (const unsigned int k_)
+  {
+    // make sure we call the
+    // Polynomial::shift function
+    // only with an argument with
+    // which it will not crash the
+    // compiler
+#ifdef DEAL_II_LONG_DOUBLE_LOOP_BUG
+    typedef double SHIFT_TYPE;
+#else
+    typedef long double SHIFT_TYPE;
+#endif
+
+    unsigned int k = k_;
+
+    // first make sure that no other
+    // thread intercepts the
+    // operation of this function;
+    // for this, acquire the lock
+    // until we quit this function
+    Threads::Mutex::ScopedLock lock(coefficients_lock);
+
+    // The first 2 coefficients are hard-coded
+    if (k==0)
+      k=1;
+    // check: does the information
+    // already exist?
+    if ((recursive_coefficients.size() < k+1) ||
+        ((recursive_coefficients.size() >= k+1) &&
+         (recursive_coefficients[k] ==
+          std_cxx11::shared_ptr<const std::vector<double> >())))
+      // no, then generate the
+      // respective coefficients
+      {
+        // make sure that there is enough
+        // space in the array for the
+        // coefficients, so we have to resize
+        // it to size k+1
+
+        // but it's more complicated than
+        // that: we call this function
+        // recursively, so if we simply
+        // resize it to k+1 here, then
+        // compute the coefficients for
+        // degree k-1 by calling this
+        // function recursively, then it will
+        // reset the size to k -- not enough
+        // for what we want to do below. the
+        // solution therefore is to only
+        // resize the size if we are going to
+        // *increase* it
+        if (recursive_coefficients.size() < k+1)
+          recursive_coefficients.resize (k+1);
+
+        if (k<=1)
+          {
+            // create coefficients
+            // vectors for k=0 and k=1
+            //
+            // allocate the respective
+            // amount of memory and
+            // later assign it to the
+            // coefficients array to
+            // make it const
+            std::vector<double> *c0 = new std::vector<double>(1);
+            (*c0)[0] = 1.;
+
+            std::vector<double> *c1 = new std::vector<double>(2);
+            (*c1)[0] = 0.;
+            (*c1)[1] = 1.;
+
+            // now make these arrays
+            // const. use shared_ptr for
+            // recursive_coefficients because
+            // that avoids a memory leak that
+            // would appear if we used plain
+            // pointers.
+            recursive_coefficients[0] =
+              std_cxx11::shared_ptr<const std::vector<double> >(c0);
+            recursive_coefficients[1] =
+              std_cxx11::shared_ptr<const std::vector<double> >(c1);
+
+            // Compute polynomials
+            // orthogonal on [0,1]
+            c0 = new std::vector<double>(*c0);
+            c1 = new std::vector<double>(*c1);
+
+            Polynomial<double>::shift<SHIFT_TYPE> (*c0, -1.);
+            Polynomial<double>::scale(*c0, 2.);
+            Polynomial<double>::shift<SHIFT_TYPE> (*c1, -1.);
+            Polynomial<double>::scale(*c1, 2.);
+            Polynomial<double>::multiply(*c1, std::sqrt(3.));
+            shifted_coefficients[0]=std_cxx11::shared_ptr<const std::vector<double> >(c0);
+            shifted_coefficients[1]=std_cxx11::shared_ptr<const std::vector<double> >(c1);
+          }
+        else
+          {
+            // for larger numbers,
+            // compute the coefficients
+            // recursively. to do so,
+            // we have to release the
+            // lock temporarily to
+            // allow the called
+            // function to acquire it
+            // itself
+            coefficients_lock.release ();
+            compute_coefficients(k-1);
+            coefficients_lock.acquire ();
+
+            std::vector<double> *ck = new std::vector<double>(k+1);
+
+            const double a = 1./(k);
+            const double b = a*(2*k-1);
+            const double c = a*(k-1);
+
+            (*ck)[k]   = b*(*recursive_coefficients[k-1])[k-1];
+            (*ck)[k-1] = b*(*recursive_coefficients[k-1])[k-2];
+            for (unsigned int i=1 ; i<= k-2 ; ++i)
+              (*ck)[i] = b*(*recursive_coefficients[k-1])[i-1]
+                         -c*(*recursive_coefficients[k-2])[i];
+
+            (*ck)[0]   = -c*(*recursive_coefficients[k-2])[0];
+
+            // finally assign the newly
+            // created vector to the
+            // const pointer in the
+            // coefficients array
+            recursive_coefficients[k] =
+              std_cxx11::shared_ptr<const std::vector<double> >(ck);
+            // and compute the
+            // coefficients for [0,1]
+            ck = new std::vector<double>(*ck);
+            Polynomial<double>::shift<SHIFT_TYPE> (*ck, -1.);
+            Polynomial<double>::scale(*ck, 2.);
+            Polynomial<double>::multiply(*ck, std::sqrt(2.*k+1.));
+            shifted_coefficients[k] =
+              std_cxx11::shared_ptr<const std::vector<double> >(ck);
+          };
+      };
+  }
+
+
+
+  const std::vector<double> &
+  Legendre::get_coefficients (const unsigned int k)
+  {
+    // first make sure the coefficients
+    // get computed if so necessary
+    compute_coefficients (k);
+
+    // then get a pointer to the array
+    // of coefficients. do that in a MT
+    // safe way
+    Threads::Mutex::ScopedLock lock (coefficients_lock);
+    return *shifted_coefficients[k];
+  }
+
+
+
+  std::vector<Polynomial<double> >
+  Legendre::generate_complete_basis (const unsigned int degree)
+  {
+    std::vector<Polynomial<double> > v;
+    v.reserve(degree+1);
+    for (unsigned int i=0; i<=degree; ++i)
+      v.push_back (Legendre(i));
+    return v;
+  }
+
+
+
+// ------------------ class Lobatto -------------------- //
+
+
+  Lobatto::Lobatto (const unsigned int p) : Polynomial<double> (compute_coefficients (p))
+  {
+  }
+
+  std::vector<double> Lobatto::compute_coefficients (const unsigned int p)
+  {
+    switch (p)
+      {
+      case 0:
+      {
+        std::vector<double> coefficients (2);
+
+        coefficients[0] = 1.0;
+        coefficients[1] = -1.0;
+        return coefficients;
+      }
+
+      case 1:
+      {
+        std::vector<double> coefficients (2);
+
+        coefficients[0] = 0.0;
+        coefficients[1] = 1.0;
+        return coefficients;
+      }
+
+      case 2:
+      {
+        std::vector<double> coefficients (3);
+
+        coefficients[0] = 0.0;
+        coefficients[1] = -1.0 * std::sqrt (3.);
+        coefficients[2] = std::sqrt (3.);
+        return coefficients;
+      }
+
+      default:
+      {
+        std::vector<double> coefficients (p + 1);
+        std::vector<double> legendre_coefficients_tmp1 (p);
+        std::vector<double> legendre_coefficients_tmp2 (p - 1);
+
+        coefficients[0] = -1.0 * std::sqrt (3.);
+        coefficients[1] = 2.0 * std::sqrt (3.);
+        legendre_coefficients_tmp1[0] = 1.0;
+
+        for (unsigned int i = 2; i < p; ++i)
+          {
+            for (unsigned int j = 0; j < i - 1; ++j)
+              legendre_coefficients_tmp2[j] = legendre_coefficients_tmp1[j];
+
+            for (unsigned int j = 0; j < i; ++j)
+              legendre_coefficients_tmp1[j] = coefficients[j];
+
+            coefficients[0] = std::sqrt (2 * i + 1.) * ((1.0 - 2 * i) * legendre_coefficients_tmp1[0] / std::sqrt (2 * i - 1.) + (1.0 - i) * legendre_coefficients_tmp2[0] / std::sqrt (2 * i - 3.)) / i;
+
+            for (unsigned int j = 1; j < i - 1; ++j)
+              coefficients[j] = std::sqrt (2 * i + 1.) * (std::sqrt (2 * i - 1.) * (2.0 * legendre_coefficients_tmp1[j - 1] - legendre_coefficients_tmp1[j]) + (1.0 - i) * legendre_coefficients_tmp2[j] / std::sqrt (2 * i - 3.)) / i;
+
+            coefficients[i - 1] = std::sqrt (4 * i * i - 1.) * (2.0 * legendre_coefficients_tmp1[i - 2] - legendre_coefficients_tmp1[i - 1]) / i;
+            coefficients[i] = 2.0 * std::sqrt (4 * i * i - 1.) * legendre_coefficients_tmp1[i - 1] / i;
+          }
+
+        for (int i = p; i > 0; --i)
+          coefficients[i] = coefficients[i - 1] / i;
+
+        coefficients[0] = 0.0;
+        return coefficients;
+      }
+      }
+  }
+
+  std::vector<Polynomial<double> > Lobatto::generate_complete_basis (const unsigned int p)
+  {
+    std::vector<Polynomial<double> > basis (p + 1);
+
+    for (unsigned int i = 0; i <= p; ++i)
+      basis[i] = Lobatto (i);
+
+    return basis;
+  }
+
+
+
+// ------------------ class Hierarchical --------------- //
+
+
+// Reserve space for polynomials up to degree 19. Should be sufficient
+// for the start.
+  std::vector<std_cxx11::shared_ptr<const std::vector<double> > >
+  Hierarchical::recursive_coefficients(20);
+
+
+
+  Hierarchical::Hierarchical (const unsigned int k)
+    :
+    Polynomial<double> (get_coefficients(k))
+  {}
+
+
+
+  void
+  Hierarchical::compute_coefficients (const unsigned int k_)
+  {
+    unsigned int k = k_;
+
+    // first make sure that no other
+    // thread intercepts the operation
+    // of this function
+    // for this, acquire the lock
+    // until we quit this function
+    Threads::Mutex::ScopedLock lock(coefficients_lock);
+
+    // The first 2 coefficients
+    // are hard-coded
+    if (k==0)
+      k=1;
+    // check: does the information
+    // already exist?
+    if (  (recursive_coefficients.size() < k+1) ||
+          ((recursive_coefficients.size() >= k+1) &&
+           (recursive_coefficients[k].get() == 0)) )
+      // no, then generate the
+      // respective coefficients
+      {
+        // make sure that there is enough
+        // space in the array for the
+        // coefficients, so we have to resize
+        // it to size k+1
+
+        // but it's more complicated than
+        // that: we call this function
+        // recursively, so if we simply
+        // resize it to k+1 here, then
+        // compute the coefficients for
+        // degree k-1 by calling this
+        // function recursively, then it will
+        // reset the size to k -- not enough
+        // for what we want to do below. the
+        // solution therefore is to only
+        // resize the size if we are going to
+        // *increase* it
+        if (recursive_coefficients.size() < k+1)
+          recursive_coefficients.resize (k+1);
+
+        if (k<=1)
+          {
+            // create coefficients
+            // vectors for k=0 and k=1
+            //
+            // allocate the respective
+            // amount of memory and
+            // later assign it to the
+            // coefficients array to
+            // make it const
+            std::vector<double> *c0 = new std::vector<double>(2);
+            (*c0)[0] =  1.;
+            (*c0)[1] = -1.;
+
+            std::vector<double> *c1 = new std::vector<double>(2);
+            (*c1)[0] = 0.;
+            (*c1)[1] = 1.;
+
+            // now make these arrays
+            // const
+            recursive_coefficients[0] =
+              std_cxx11::shared_ptr<const std::vector<double> >(c0);
+            recursive_coefficients[1] =
+              std_cxx11::shared_ptr<const std::vector<double> >(c1);
+          }
+        else if (k==2)
+          {
+            coefficients_lock.release ();
+            compute_coefficients(1);
+            coefficients_lock.acquire ();
+
+            std::vector<double> *c2 = new std::vector<double>(3);
+
+            const double a = 1.; //1./8.;
+
+            (*c2)[0] =   0.*a;
+            (*c2)[1] =  -4.*a;
+            (*c2)[2] =   4.*a;
+
+            recursive_coefficients[2] =
+              std_cxx11::shared_ptr<const std::vector<double> >(c2);
+          }
+        else
+          {
+            // for larger numbers,
+            // compute the coefficients
+            // recursively. to do so,
+            // we have to release the
+            // lock temporarily to
+            // allow the called
+            // function to acquire it
+            // itself
+            coefficients_lock.release ();
+            compute_coefficients(k-1);
+            coefficients_lock.acquire ();
+
+            std::vector<double> *ck = new std::vector<double>(k+1);
+
+            const double a = 1.; //1./(2.*k);
+
+            (*ck)[0] = - a*(*recursive_coefficients[k-1])[0];
+
+            for (unsigned int i=1; i<=k-1; ++i)
+              (*ck)[i] = a*( 2.*(*recursive_coefficients[k-1])[i-1]
+                             - (*recursive_coefficients[k-1])[i] );
+
+            (*ck)[k] = a*2.*(*recursive_coefficients[k-1])[k-1];
+            // for even degrees, we need
+            // to add a multiple of
+            // basis fcn phi_2
+            if ( (k%2) == 0 )
+              {
+                double b = 1.; //8.;
+                //for (unsigned int i=1; i<=k; i++)
+                //  b /= 2.*i;
+
+                (*ck)[1] += b*(*recursive_coefficients[2])[1];
+                (*ck)[2] += b*(*recursive_coefficients[2])[2];
+              }
+            // finally assign the newly
+            // created vector to the
+            // const pointer in the
+            // coefficients array
+            recursive_coefficients[k] =
+              std_cxx11::shared_ptr<const std::vector<double> >(ck);
+          };
+      };
+  }
+
+
+
+  const std::vector<double> &
+  Hierarchical::get_coefficients (const unsigned int k)
+  {
+    // first make sure the coefficients
+    // get computed if so necessary
+    compute_coefficients (k);
+
+    // then get a pointer to the array
+    // of coefficients. do that in a MT
+    // safe way
+    Threads::Mutex::ScopedLock lock (coefficients_lock);
+    return *recursive_coefficients[k];
+  }
+
+
+
+  std::vector<Polynomial<double> >
+  Hierarchical::generate_complete_basis (const unsigned int degree)
+  {
+    if (degree==0)
+      // create constant
+      // polynomial. note that we
+      // can't use the other branch
+      // of the if-statement, since
+      // calling the constructor of
+      // this class with argument
+      // zero does _not_ create the
+      // constant polynomial, but
+      // rather 1-x
+      return std::vector<Polynomial<double> >
+             (1, Polynomial<double> (std::vector<double> (1,1.)));
+    else
+      {
+        std::vector<Polynomial<double> > v;
+        v.reserve(degree+1);
+        for (unsigned int i=0; i<=degree; ++i)
+          v.push_back (Hierarchical(i));
+        return v;
+      }
+  }
+
+// ------------------ HermiteInterpolation --------------- //
+
+  HermiteInterpolation::HermiteInterpolation (const unsigned int p)
+    :
+    Polynomial<double>((p<4) ? 3 : p+1)
+  {
+    if (p==0)
+      {
+        this->coefficients[0] = 1.;
+        this->coefficients[2] = -3.;
+        this->coefficients[3] = 2.;
+      }
+    else if (p==1)
+      {
+        this->coefficients[2] = 3.;
+        this->coefficients[3] = -2.;
+      }
+    else if (p==2)
+      {
+        this->coefficients[1] = 1.;
+        this->coefficients[2] = -2.;
+        this->coefficients[3] = 1.;
+      }
+    else if (p==3)
+      {
+        this->coefficients[2] = -1.;
+        this->coefficients[3] = 1.;
+      }
+    else
+      {
+        this->coefficients[4] = 16.;
+        this->coefficients[3] = -32.;
+        this->coefficients[2] = 16.;
+
+        if (p>4)
+          {
+            Legendre legendre(p-4);
+            (*this) *= legendre;
+          }
+      }
+  }
+
+
+  std::vector<Polynomial<double> >
+  HermiteInterpolation::generate_complete_basis (const unsigned int n)
+  {
+    std::vector<Polynomial<double> > basis (n + 1);
+
+    for (unsigned int i = 0; i <= n; ++i)
+      basis[i] = HermiteInterpolation (i);
+
+    return basis;
+  }
+}
+
+// ------------------ explicit instantiations --------------- //
+
+namespace Polynomials
+{
+  template class Polynomial<float>;
+  template class Polynomial<double>;
+  template class Polynomial<long double>;
+
+  template void Polynomial<float>::shift(const float offset);
+  template void Polynomial<float>::shift(const double offset);
+  template void Polynomial<double>::shift(const double offset);
+  template void Polynomial<long double>::shift(const long double offset);
+  template void Polynomial<float>::shift(const long double offset);
+  template void Polynomial<double>::shift(const long double offset);
+
+  template class Monomial<float>;
+  template class Monomial<double>;
+  template class Monomial<long double>;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomial_space.cc b/source/base/polynomial_space.cc
new file mode 100644
index 0000000..664d29e
--- /dev/null
+++ b/source/base/polynomial_space.cc
@@ -0,0 +1,410 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+unsigned int
+PolynomialSpace<dim>::compute_n_pols (const unsigned int n)
+{
+  unsigned int n_pols = n;
+  for (unsigned int i=1; i<dim; ++i)
+    {
+      n_pols *= (n+i);
+      n_pols /= (i+1);
+    }
+  return n_pols;
+}
+
+
+template <>
+unsigned int
+PolynomialSpace<0>::compute_n_pols (const unsigned int)
+{
+  return 0;
+}
+
+
+template <>
+void
+PolynomialSpace<1>::
+compute_index (const unsigned int i,
+               unsigned int      (&index)[1]) const
+{
+  Assert(i<index_map.size(),
+         ExcIndexRange(i,0,index_map.size()));
+  const unsigned int n=index_map[i];
+  index[0] = n;
+}
+
+
+
+template <>
+void
+PolynomialSpace<2>::
+compute_index (const unsigned int i,
+               unsigned int      (&index)[2]) const
+{
+  Assert(i<index_map.size(),
+         ExcIndexRange(i,0,index_map.size()));
+  const unsigned int n=index_map[i];
+  // there should be a better way to
+  // write this function (not
+  // linear in n_1d), someone
+  // should think about this...
+  const unsigned int n_1d=polynomials.size();
+  unsigned int k=0;
+  for (unsigned int iy=0; iy<n_1d; ++iy)
+    if (n < k+n_1d-iy)
+      {
+        index[0] = n-k;
+        index[1] = iy;
+        return;
+      }
+    else
+      k+=n_1d-iy;
+}
+
+
+
+template <>
+void
+PolynomialSpace<3>::
+compute_index (const unsigned int i,
+               unsigned int      (&index)[3]) const
+{
+  Assert(i<index_map.size(),
+         ExcIndexRange(i,0,index_map.size()));
+  const unsigned int n=index_map[i];
+  // there should be a better way to
+  // write this function (not
+  // quadratic in n_1d), someone
+  // should think about this...
+  //
+  // (ah, and yes: the original
+  // algorithm was even cubic!)
+  const unsigned int n_1d=polynomials.size();
+  unsigned int k=0;
+  for (unsigned int iz=0; iz<n_1d; ++iz)
+    for (unsigned int iy=0; iy<n_1d-iz; ++iy)
+      if (n < k+n_1d-iy-iz)
+        {
+          index[0] = n-k;
+          index[1] = iy;
+          index[2] = iz;
+          return;
+        }
+      else
+        k += n_1d-iy-iz;
+}
+
+
+template <int dim>
+void
+PolynomialSpace<dim>::set_numbering(
+  const std::vector<unsigned int> &renumber)
+{
+  Assert(renumber.size()==index_map.size(),
+         ExcDimensionMismatch(renumber.size(), index_map.size()));
+
+  index_map=renumber;
+  for (unsigned int i=0; i<index_map.size(); ++i)
+    index_map_inverse[index_map[i]]=i;
+}
+
+
+
+template <int dim>
+double
+PolynomialSpace<dim>::compute_value (const unsigned int i,
+                                     const Point<dim>  &p) const
+{
+  unsigned int ix[dim];
+  compute_index(i,ix);
+  // take the product of the
+  // polynomials in the various space
+  // directions
+  double result = 1.;
+  for (unsigned int d=0; d<dim; ++d)
+    result *= polynomials[ix[d]].value(p(d));
+  return result;
+}
+
+
+
+template <int dim>
+Tensor<1,dim>
+PolynomialSpace<dim>::compute_grad (const unsigned int i,
+                                    const Point<dim>  &p) const
+{
+  unsigned int ix[dim];
+  compute_index(i,ix);
+
+  Tensor<1,dim> result;
+  for (unsigned int d=0; d<dim; ++d)
+    result[d] = 1.;
+
+  // get value and first derivative
+  std::vector<double> v(2);
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      polynomials[ix[d]].value(p(d), v);
+      result[d] *= v[1];
+      for (unsigned int d1=0; d1<dim; ++d1)
+        if (d1 != d)
+          result[d1] *= v[0];
+    }
+  return result;
+}
+
+
+template <int dim>
+Tensor<2,dim>
+PolynomialSpace<dim>::compute_grad_grad (const unsigned int i,
+                                         const Point<dim>  &p) const
+{
+  unsigned int ix[dim];
+  compute_index(i,ix);
+
+  Tensor<2,dim> result;
+  for (unsigned int d=0; d<dim; ++d)
+    for (unsigned int d1=0; d1<dim; ++d1)
+      result[d][d1] = 1.;
+
+  // get value, first and second
+  // derivatives
+  std::vector<double> v(3);
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      polynomials[ix[d]].value(p(d), v);
+      result[d][d] *= v[2];
+      for (unsigned int d1=0; d1<dim; ++d1)
+        {
+          if (d1 != d)
+            {
+              result[d][d1] *= v[1];
+              result[d1][d] *= v[1];
+              for (unsigned int d2=0; d2<dim; ++d2)
+                if (d2 != d)
+                  result[d1][d2] *= v[0];
+            }
+        }
+    }
+  return result;
+}
+
+
+template <int dim>
+void
+PolynomialSpace<dim>::compute (const Point<dim>            &p,
+                               std::vector<double>         &values,
+                               std::vector<Tensor<1,dim> > &grads,
+                               std::vector<Tensor<2,dim> > &grad_grads,
+                               std::vector<Tensor<3,dim> > &third_derivatives,
+                               std::vector<Tensor<4,dim> > &fourth_derivatives) const
+{
+  const unsigned int n_1d=polynomials.size();
+
+  Assert(values.size()==n_pols || values.size()==0,
+         ExcDimensionMismatch2(values.size(), n_pols, 0));
+  Assert(grads.size()==n_pols|| grads.size()==0,
+         ExcDimensionMismatch2(grads.size(), n_pols, 0));
+  Assert(grad_grads.size()==n_pols|| grad_grads.size()==0,
+         ExcDimensionMismatch2(grad_grads.size(), n_pols, 0));
+  Assert(third_derivatives.size()==n_pols|| third_derivatives.size()==0,
+         ExcDimensionMismatch2(third_derivatives.size(), n_pols, 0));
+  Assert(fourth_derivatives.size()==n_pols|| fourth_derivatives.size()==0,
+         ExcDimensionMismatch2(fourth_derivatives.size(), n_pols, 0));
+
+  unsigned int v_size=0;
+  bool update_values=false, update_grads=false, update_grad_grads=false;
+  bool update_3rd_derivatives=false, update_4th_derivatives=false;
+  if (values.size()==n_pols)
+    {
+      update_values=true;
+      v_size=1;
+    }
+  if (grads.size()==n_pols)
+    {
+      update_grads=true;
+      v_size=2;
+    }
+  if (grad_grads.size()==n_pols)
+    {
+      update_grad_grads=true;
+      v_size=3;
+    }
+  if (third_derivatives.size()==n_pols)
+    {
+      update_3rd_derivatives=true;
+      v_size=4;
+    }
+  if (fourth_derivatives.size()==n_pols)
+    {
+      update_4th_derivatives=true;
+      v_size=5;
+    }
+
+  // Store data in a single
+  // object. Access is by
+  // v[d][n][o]
+  //  d: coordinate direction
+  //  n: number of 1d polynomial
+  //  o: order of derivative
+  Table<2,std::vector<double> > v(dim, n_1d);
+  for (unsigned int d=0; d<v.size()[0]; ++d)
+    for (unsigned int i=0; i<v.size()[1]; ++i)
+      {
+        v(d,i).resize (v_size, 0.);
+        polynomials[i].value(p(d), v(d,i));
+      };
+
+  if (update_values)
+    {
+      unsigned int k = 0;
+
+      for (unsigned int iz=0; iz<((dim>2) ? n_1d : 1); ++iz)
+        for (unsigned int iy=0; iy<((dim>1) ? n_1d-iz : 1); ++iy)
+          for (unsigned int ix=0; ix<n_1d-iy-iz; ++ix)
+            values[index_map_inverse[k++]] =
+              v[0][ix][0]
+              * ((dim>1) ? v[1][iy][0] : 1.)
+              * ((dim>2) ? v[2][iz][0] : 1.);
+    }
+
+  if (update_grads)
+    {
+      unsigned int k = 0;
+
+      for (unsigned int iz=0; iz<((dim>2) ? n_1d : 1); ++iz)
+        for (unsigned int iy=0; iy<((dim>1) ? n_1d-iz : 1); ++iy)
+          for (unsigned int ix=0; ix<n_1d-iy-iz; ++ix)
+            {
+              const unsigned int k2=index_map_inverse[k++];
+              for (unsigned int d=0; d<dim; ++d)
+                grads[k2][d] = v[0][ix][(d==0) ? 1 : 0]
+                               * ((dim>1) ? v[1][iy][(d==1) ? 1 : 0] : 1.)
+                               * ((dim>2) ? v[2][iz][(d==2) ? 1 : 0] : 1.);
+            }
+    }
+
+  if (update_grad_grads)
+    {
+      unsigned int k = 0;
+
+      for (unsigned int iz=0; iz<((dim>2) ? n_1d : 1); ++iz)
+        for (unsigned int iy=0; iy<((dim>1) ? n_1d-iz : 1); ++iy)
+          for (unsigned int ix=0; ix<n_1d-iy-iz; ++ix)
+            {
+              const unsigned int k2=index_map_inverse[k++];
+              for (unsigned int d1=0; d1<dim; ++d1)
+                for (unsigned int d2=0; d2<dim; ++d2)
+                  {
+                    // Derivative
+                    // order for each
+                    // direction
+                    const unsigned int
+                    j0 = ((d1==0) ? 1 : 0) + ((d2==0) ? 1 : 0);
+                    const unsigned int
+                    j1 = ((d1==1) ? 1 : 0) + ((d2==1) ? 1 : 0);
+                    const unsigned int
+                    j2 = ((d1==2) ? 1 : 0) + ((d2==2) ? 1 : 0);
+
+                    grad_grads[k2][d1][d2] =
+                      v[0][ix][j0]
+                      * ((dim>1) ? v[1][iy][j1] : 1.)
+                      * ((dim>2) ? v[2][iz][j2] : 1.);
+                  }
+            }
+    }
+
+  if (update_3rd_derivatives)
+    {
+      unsigned int k = 0;
+
+      for (unsigned int iz=0; iz<((dim>2) ? n_1d : 1); ++iz)
+        for (unsigned int iy=0; iy<((dim>1) ? n_1d-iz : 1); ++iy)
+          for (unsigned int ix=0; ix<n_1d-iy-iz; ++ix)
+            {
+              const unsigned int k2=index_map_inverse[k++];
+              for (unsigned int d1=0; d1<dim; ++d1)
+                for (unsigned int d2=0; d2<dim; ++d2)
+                  for (unsigned int d3=0; d3<dim; ++d3)
+                    {
+                      // Derivative
+                      // order for each
+                      // direction
+                      std::vector<unsigned int> deriv_order (dim, 0);
+                      for (unsigned int x=0; x<dim; ++x)
+                        {
+                          if (d1==x) ++deriv_order[x];
+                          if (d2==x) ++deriv_order[x];
+                          if (d3==x) ++deriv_order[x];
+                        }
+
+                      third_derivatives[k2][d1][d2][d3] =
+                        v[0][ix][deriv_order[0]]
+                        * ((dim>1) ? v[1][iy][deriv_order[1]] : 1.)
+                        * ((dim>2) ? v[2][iz][deriv_order[2]] : 1.);
+                    }
+            }
+    }
+
+  if (update_4th_derivatives)
+    {
+      unsigned int k = 0;
+
+      for (unsigned int iz=0; iz<((dim>2) ? n_1d : 1); ++iz)
+        for (unsigned int iy=0; iy<((dim>1) ? n_1d-iz : 1); ++iy)
+          for (unsigned int ix=0; ix<n_1d-iy-iz; ++ix)
+            {
+              const unsigned int k2=index_map_inverse[k++];
+              for (unsigned int d1=0; d1<dim; ++d1)
+                for (unsigned int d2=0; d2<dim; ++d2)
+                  for (unsigned int d3=0; d3<dim; ++d3)
+                    for (unsigned int d4=0; d4<dim; ++d4)
+                      {
+                        // Derivative
+                        // order for each
+                        // direction
+                        std::vector<unsigned int> deriv_order (dim, 0);
+                        for (unsigned int x=0; x<dim; ++x)
+                          {
+                            if (d1==x) ++deriv_order[x];
+                            if (d2==x) ++deriv_order[x];
+                            if (d3==x) ++deriv_order[x];
+                            if (d4==x) ++deriv_order[x];
+                          }
+
+                        fourth_derivatives[k2][d1][d2][d3][d4] =
+                          v[0][ix][deriv_order[0]]
+                          * ((dim>1) ? v[1][iy][deriv_order[1]] : 1.)
+                          * ((dim>2) ? v[2][iz][deriv_order[2]] : 1.);
+                      }
+            }
+    }
+}
+
+
+template class PolynomialSpace<1>;
+template class PolynomialSpace<2>;
+template class PolynomialSpace<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_abf.cc b/source/base/polynomials_abf.cc
new file mode 100644
index 0000000..f2718a0
--- /dev/null
+++ b/source/base/polynomials_abf.cc
@@ -0,0 +1,155 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/polynomials_abf.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <iostream>
+#include <iomanip>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+PolynomialsABF<dim>::PolynomialsABF (const unsigned int k)
+  :
+  my_degree(k),
+  n_pols(compute_n_pols(k))
+{
+  std::vector<std::vector< Polynomials::Polynomial< double > > > pols(dim);
+  pols[0] = Polynomials::LagrangeEquidistant::generate_complete_basis(k+2);
+  if (k == 0)
+    for (unsigned int d=1; d<dim; ++d)
+      pols[d] = Polynomials::Legendre::generate_complete_basis(0);
+  else
+    for (unsigned int d=1; d<dim; ++d)
+      pols[d] = Polynomials::LagrangeEquidistant::generate_complete_basis(k);
+  polynomial_space = new AnisotropicPolynomials<dim>(pols);
+}
+
+
+template <int dim>
+PolynomialsABF<dim>::~PolynomialsABF ()
+{
+  delete polynomial_space;
+}
+
+
+template <int dim>
+void
+PolynomialsABF<dim>::compute (const Point<dim>            &unit_point,
+                              std::vector<Tensor<1,dim> > &values,
+                              std::vector<Tensor<2,dim> > &grads,
+                              std::vector<Tensor<3,dim> > &grad_grads,
+                              std::vector<Tensor<4,dim> > &third_derivatives,
+                              std::vector<Tensor<5,dim> > &fourth_derivatives) const
+{
+  Assert(values.size()==n_pols || values.size()==0,
+         ExcDimensionMismatch(values.size(), n_pols));
+  Assert(grads.size()==n_pols|| grads.size()==0,
+         ExcDimensionMismatch(grads.size(), n_pols));
+  Assert(grad_grads.size()==n_pols|| grad_grads.size()==0,
+         ExcDimensionMismatch(grad_grads.size(), n_pols));
+  Assert(third_derivatives.size()==n_pols|| third_derivatives.size()==0,
+         ExcDimensionMismatch(third_derivatives.size(), n_pols));
+  Assert(fourth_derivatives.size()==n_pols|| fourth_derivatives.size()==0,
+         ExcDimensionMismatch(fourth_derivatives.size(), n_pols));
+
+  const unsigned int n_sub = polynomial_space->n();
+  // guard access to the scratch
+  // arrays in the following block
+  // using a mutex to make sure they
+  // are not used by multiple threads
+  // at once
+  Threads::Mutex::ScopedLock lock(mutex);
+
+  p_values.resize((values.size() == 0) ? 0 : n_sub);
+  p_grads.resize((grads.size() == 0) ? 0 : n_sub);
+  p_grad_grads.resize((grad_grads.size() == 0) ? 0 : n_sub);
+  p_third_derivatives.resize((third_derivatives.size() == 0) ? 0 : n_sub);
+  p_fourth_derivatives.resize((fourth_derivatives.size() == 0) ? 0 : n_sub);
+
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      // First we copy the point. The
+      // polynomial space for
+      // component d consists of
+      // polynomials of degree k+1 in
+      // x_d and degree k in the
+      // other variables. in order to
+      // simplify this, we use the
+      // same AnisotropicPolynomial
+      // space and simply rotate the
+      // coordinates through all
+      // directions.
+      Point<dim> p;
+      for (unsigned int c=0; c<dim; ++c)
+        p(c) = unit_point((c+d)%dim);
+
+      polynomial_space->compute (p, p_values, p_grads, p_grad_grads,
+                                 p_third_derivatives, p_fourth_derivatives);
+
+      for (unsigned int i=0; i<p_values.size(); ++i)
+        values[i+d*n_sub][d] = p_values[i];
+
+      for (unsigned int i=0; i<p_grads.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          grads[i+d*n_sub][d][(d1+d)%dim] = p_grads[i][d1];
+
+      for (unsigned int i=0; i<p_grad_grads.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            grad_grads[i+d*n_sub][d][(d1+d)%dim][(d2+d)%dim]
+              = p_grad_grads[i][d1][d2];
+
+      for (unsigned int i=0; i<p_third_derivatives.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              third_derivatives[i+d*n_sub][d][(d1+d)%dim][(d2+d)%dim][(d3+d)%dim]
+                = p_third_derivatives[i][d1][d2][d3];
+
+      for (unsigned int i=0; i<p_fourth_derivatives.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              for (unsigned int d4=0; d4<dim; ++d4)
+                fourth_derivatives[i+d*n_sub][d][(d1+d)%dim][(d2+d)%dim][(d3+d)%dim][(d4+d)%dim]
+                  = p_fourth_derivatives[i][d1][d2][d3][d4];
+    }
+}
+
+
+template <int dim>
+unsigned int
+PolynomialsABF<dim>::compute_n_pols(unsigned int k)
+{
+  if (dim == 1) return k+1;
+  if (dim == 2) return 2*(k+1)*(k+3);
+  //TODO:Check what are the correct numbers ...
+  if (dim == 3) return 3*(k+1)*(k+1)*(k+2);
+
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+
+template class PolynomialsABF<1>;
+template class PolynomialsABF<2>;
+template class PolynomialsABF<3>;
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_adini.cc b/source/base/polynomials_adini.cc
new file mode 100644
index 0000000..7de1d16
--- /dev/null
+++ b/source/base/polynomials_adini.cc
@@ -0,0 +1,246 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/polynomials_adini.h>
+
+#define ENTER_COEFFICIENTS(koefs,z,a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)       \
+  koefs(0, z)= a0; koefs(1, z)=a1; koefs(2, z)=a2; koefs(3, z)=a3; koefs( 4, z)=a4 ; koefs( 5, z)=a5 ; \
+  koefs(6, z)= a6; koefs(7, z)=a7; koefs(8, z)=a8; koefs(9, z)=a9; koefs(10, z)=a10; koefs(11, z)=a11;
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+PolynomialsAdini::PolynomialsAdini() :
+  coef(12,12),
+  dx(12,12),
+  dy(12,12),
+  dxx(12,12),
+  dyy(12,12),
+  dxy(12,12)
+{
+  //                       1  x  y  xx yy xy 3x 3y xyy xxy 3xy x3y
+  //                       0  1  2  3  4  5  6  7  8  9 10 11
+  ENTER_COEFFICIENTS(coef,  0, 1, 0, 0,-3,-3,-1, 2, 2, 3, 3,-2,-2);
+  ENTER_COEFFICIENTS(coef,  1, 0, 1, 0,-2, 0,-1, 1, 0, 0, 2,-1, 0);
+  ENTER_COEFFICIENTS(coef,  2, 0, 0, 1, 0,-2,-1, 0, 1, 2, 0, 0,-1);
+  ENTER_COEFFICIENTS(coef,  3, 0, 0, 0, 3, 0, 1,-2, 0,-3,-3, 2, 2);
+  ENTER_COEFFICIENTS(coef,  4, 0, 0, 0,-1, 0, 0, 1, 0, 0, 1,-1, 0);
+  ENTER_COEFFICIENTS(coef,  5, 0, 0, 0, 0, 0, 1, 0, 0,-2, 0, 0, 1);
+  ENTER_COEFFICIENTS(coef,  6, 0, 0, 0, 0, 3, 1, 0,-2,-3,-3, 2, 2);
+  ENTER_COEFFICIENTS(coef,  7, 0, 0, 0, 0, 0, 1, 0, 0, 0,-2, 1, 0);
+  ENTER_COEFFICIENTS(coef,  8, 0, 0, 0, 0,-1, 0, 0, 1, 1, 0, 0,-1);
+  ENTER_COEFFICIENTS(coef,  9, 0, 0, 0, 0, 0,-1, 0, 0, 3, 3,-2,-2);
+  ENTER_COEFFICIENTS(coef, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,-1, 1, 0);
+  ENTER_COEFFICIENTS(coef, 11, 0, 0, 0, 0, 0, 0, 0, 0,-1, 0, 0, 1);
+
+  ENTER_COEFFICIENTS( dx,  0, 0,-6,-1, 6, 3, 6, 0,-2, 0,-6, 0, 0);
+  ENTER_COEFFICIENTS( dx,  1, 1,-4,-1, 3, 0, 4, 0, 0, 0,-3, 0, 0);
+  ENTER_COEFFICIENTS( dx,  2, 0, 0,-1, 0, 2, 0, 0,-1, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dx,  3, 0, 6, 1,-6,-3,-6, 0, 2, 0, 6, 0, 0);
+  ENTER_COEFFICIENTS( dx,  4, 0,-2, 0, 3, 0, 2, 0, 0, 0,-3, 0, 0);
+  ENTER_COEFFICIENTS( dx,  5, 0, 0, 1, 0,-2, 0, 0, 1, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dx,  6, 0, 0, 1, 0,-3,-6, 0, 2, 0, 6, 0, 0);
+  ENTER_COEFFICIENTS( dx,  7, 0, 0, 1, 0, 0,-4, 0, 0, 0, 3, 0, 0);
+  ENTER_COEFFICIENTS( dx,  8, 0, 0, 0, 0, 1, 0, 0,-1, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dx,  9, 0, 0,-1, 0, 3, 6, 0,-2, 0,-6, 0, 0);
+  ENTER_COEFFICIENTS( dx, 10, 0, 0, 0, 0, 0,-2, 0, 0, 0, 3, 0, 0);
+  ENTER_COEFFICIENTS( dx, 11, 0, 0, 0, 0,-1, 0, 0, 1, 0, 0, 0, 0);
+
+  ENTER_COEFFICIENTS( dy,  0, 0,-1,-6, 3, 6, 6,-2, 0,-6, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  1, 0,-1, 0, 2, 0, 0,-1, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  2, 1,-1,-4, 0, 3, 4, 0, 0,-3, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  3, 0, 1, 0,-3, 0,-6, 2, 0, 6, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  4, 0, 0, 0, 1, 0, 0,-1, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  5, 0, 1, 0, 0, 0,-4, 0, 0, 3, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  6, 0, 1, 6,-3,-6,-6, 2, 0, 6, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  7, 0, 1, 0,-2, 0, 0, 1, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  8, 0, 0,-2, 0, 3, 2, 0, 0,-3, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy,  9, 0,-1, 0, 3, 0, 6,-2, 0,-6, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy, 10, 0, 0, 0,-1, 0, 0, 1, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dy, 11, 0, 0, 0, 0, 0,-2, 0, 0, 3, 0, 0, 0);
+
+  //                       0  1  2  3  4  5  6  7  8  9 10 11
+  ENTER_COEFFICIENTS( dxx,  0,-6, 12, 6, 0, 0,-12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  1,-4,  6, 4, 0, 0, -6, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  2, 0,  0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  3, 6,-12,-6, 0, 0, 12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  4,-2,  6, 2, 0, 0, -6, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  5, 0,  0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  6, 0,  0,-6, 0, 0, 12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  7, 0,  0,-4, 0, 0,  6, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  8, 0,  0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx,  9, 0,  0, 6, 0, 0,-12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx, 10, 0,  0,-2, 0, 0,  6, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxx, 11, 0,  0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+
+  ENTER_COEFFICIENTS( dyy,  0,-6, 6, 12, 0, 0,-12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  1, 0, 0,  0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  2,-4, 4,  6, 0, 0, -6, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  3, 0,-6,  0, 0, 0, 12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  4, 0, 0,  0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  5, 0,-4,  0, 0, 0,  6, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  6, 6,-6,-12, 0, 0, 12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  7, 0, 0,  0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  8,-2, 2,  6, 0, 0, -6, 0,-0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy,  9, 0, 6,  0, 0, 0,-12, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy, 10, 0, 0,  0, 0, 0,  0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dyy, 11, 0,-2,  0, 0, 0,  6, 0, 0, 0, 0, 0, 0);
+
+  ENTER_COEFFICIENTS( dxy,  0,-1, 6, 6,-6,-6, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  1,-1, 4, 0,-3, 0, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  2,-1, 0, 4, 0,-3, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  3, 1,-6,-6, 6, 6, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  4, 0, 2, 0,-3, 0, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  5, 1, 0,-4, 0, 3, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  6, 1,-6,-6, 6, 6, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  7, 1,-4, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  8, 0, 0, 2, 0,-3, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy,  9,-1, 6, 6,-6,-6, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy, 10, 0,-2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0);
+  ENTER_COEFFICIENTS( dxy, 11, 0, 0,-2, 0, 3, 0, 0, 0, 0, 0, 0, 0);
+}
+
+void
+PolynomialsAdini::compute (const Point<2> &unit_point,
+                           std::vector<double> &values,
+                           std::vector<Tensor<1,2> > &grads,
+                           std::vector< Tensor<2,2> > &grad_grads) const
+{
+  if (values.empty() == false) // do not bother if empty
+    {
+      for (unsigned int i=0; i<values.size(); ++i)
+        {
+          values[i] = compute_value(i, unit_point);
+        }
+    }
+
+  if (grads.empty() == false) // do not bother if empty
+    {
+      for (unsigned int i=0; i<grads.size(); ++i)
+        {
+          grads[i] = compute_grad(i, unit_point);
+        }
+    }
+
+  if (grad_grads.empty() == false) // do not bother if empty
+    {
+      for (unsigned int i=0; i<grad_grads.size(); ++i)
+        {
+          grad_grads[i] = compute_grad_grad(i, unit_point);
+        }
+    }
+  return;
+}
+
+double PolynomialsAdini::compute_value (const unsigned int i, const Point<2> &p) const
+{
+  const double x = p(0);
+  const double y = p(1);
+  return coef( 0,i)           +
+         coef( 1,i)*x         +
+         coef( 2,i)*y         +
+         coef( 3,i)*x*x       +
+         coef( 4,i)*y*y       +
+         coef( 5,i)*x*y       +
+         coef( 6,i)*x*x*x     +
+         coef( 7,i)*y*y*y     +
+         coef( 8,i)*x*y*y     +
+         coef( 9,i)*x*x*y     +
+         coef(10,i)*x*x*x*y  +
+         coef(11,i)*x*y*y*y;
+}
+
+Tensor<1,2> PolynomialsAdini::compute_grad (const unsigned int i, const Point<2> &p) const
+{
+  const double x = p(0);
+  const double y = p(1);
+  Tensor<1,2> tensor;
+  tensor[0]= dx( 0,i)          +
+             dx( 1,i)*x        +
+             dx( 2,i)*y        +
+             dx( 3,i)*x*x      +
+             dx( 4,i)*y*y      +
+             dx( 5,i)*x*y      +
+             dx( 6,i)*x*x*x    +
+             dx( 7,i)*y*y*y    +
+             dx( 8,i)*x*y*y    +
+             dx( 9,i)*x*x*y    +
+             dx(10,i)*x*x*x*y +
+             dx(11,i)*x*y*y*y;
+
+  tensor[1]= dy( 0,i)          +
+             dy( 1,i)*x        +
+             dy( 2,i)*y        +
+             dy( 3,i)*x*x      +
+             dy( 4,i)*y*y      +
+             dy( 5,i)*x*y      +
+             dy( 6,i)*x*x*x    +
+             dy( 7,i)*y*y*y    +
+             dy( 8,i)*x*y*y    +
+             dy( 9,i)*x*x*y    +
+             dy(10,i)*x*x*x*y +
+             dy(11,i)*x*y*y*y;
+  return tensor;
+}
+
+Tensor<2,2> PolynomialsAdini::compute_grad_grad (const unsigned int i, const Point<2> &p) const
+{
+  const double x = p(0);
+  const double y = p(1);
+  Tensor<2,2> tensor;
+  tensor[0][0]= dxx( 0,i)          +
+                dxx( 1,i)*x        +
+                dxx( 2,i)*y        +
+                dxx( 3,i)*x*x      +
+                dxx( 4,i)*y*y      +
+                dxx( 5,i)*x*y      +
+                dxx( 6,i)*x*x*x    +
+                dxx( 7,i)*y*y*y    +
+                dxx( 8,i)*x*y*y    +
+                dxx( 9,i)*x*x*y    +
+                dxx(10,i)*x*x*x*y +
+                dxx(11,i)*x*y*y*y;
+  tensor[0][1]= dxy( 0,i)          +
+                dxy( 1,i)*x        +
+                dxy( 2,i)*y        +
+                dxy( 3,i)*x*x      +
+                dxy( 4,i)*y*y      +
+                dxy( 5,i)*x*y      +
+                dxy( 6,i)*x*x*x    +
+                dxy( 7,i)*y*y*y    +
+                dxy( 8,i)*x*y*y    +
+                dxy( 9,i)*x*x*y    +
+                dxy(10,i)*x*x*x*y +
+                dxy(11,i)*x*y*y*y;
+  tensor[1][0]= tensor[0][1];
+  tensor[1][1]= dyy( 0,i)          +
+                dyy( 1,i)*x        +
+                dyy( 2,i)*y        +
+                dyy( 3,i)*x*x      +
+                dyy( 4,i)*y*y      +
+                dyy( 5,i)*x*y      +
+                dyy( 6,i)*x*x*x    +
+                dyy( 7,i)*y*y*y    +
+                dyy( 8,i)*x*y*y    +
+                dyy( 9,i)*x*x*y    +
+                dyy(10,i)*x*x*x*y +
+                dyy(11,i)*x*y*y*y;
+  return tensor;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_bdm.cc b/source/base/polynomials_bdm.cc
new file mode 100644
index 0000000..cdf32d2
--- /dev/null
+++ b/source/base/polynomials_bdm.cc
@@ -0,0 +1,423 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/polynomials_bdm.h>
+#include <deal.II/base/polynomial_space.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <iostream>
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+PolynomialsBDM<dim>::PolynomialsBDM (const unsigned int k)
+  :
+  polynomial_space (Polynomials::Legendre::generate_complete_basis(k)),
+  monomials((dim==2) ? (1) : (k+2)),
+  n_pols(compute_n_pols(k)),
+  p_values(polynomial_space.n()),
+  p_grads(polynomial_space.n()),
+  p_grad_grads(polynomial_space.n())
+{
+  switch (dim)
+    {
+    case 2:
+      monomials[0] = Polynomials::Monomial<double> (k+1);
+      break;
+    case 3:
+      for (unsigned int i=0; i<monomials.size(); ++i)
+        monomials[i] = Polynomials::Monomial<double> (i);
+      break;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+
+template <int dim>
+void
+PolynomialsBDM<dim>::compute (const Point<dim>            &unit_point,
+                              std::vector<Tensor<1,dim> > &values,
+                              std::vector<Tensor<2,dim> > &grads,
+                              std::vector<Tensor<3,dim> > &grad_grads,
+                              std::vector<Tensor<4,dim> > &third_derivatives,
+                              std::vector<Tensor<5,dim> > &fourth_derivatives) const
+{
+  Assert(values.size()==n_pols || values.size()==0,
+         ExcDimensionMismatch(values.size(), n_pols));
+  Assert(grads.size()==n_pols|| grads.size()==0,
+         ExcDimensionMismatch(grads.size(), n_pols));
+  Assert(grad_grads.size()==n_pols|| grad_grads.size()==0,
+         ExcDimensionMismatch(grad_grads.size(), n_pols));
+  Assert(third_derivatives.size()==n_pols|| third_derivatives.size()==0,
+         ExcDimensionMismatch(third_derivatives.size(), n_pols));
+  Assert(fourth_derivatives.size()==n_pols|| fourth_derivatives.size()==0,
+         ExcDimensionMismatch(fourth_derivatives.size(), n_pols));
+
+  // third and fourth derivatives not implemented
+  (void)third_derivatives;
+  Assert(third_derivatives.size()==0,
+         ExcNotImplemented());
+  (void)fourth_derivatives;
+  Assert(fourth_derivatives.size()==0,
+         ExcNotImplemented());
+
+  const unsigned int n_sub = polynomial_space.n();
+
+  // guard access to the scratch
+  // arrays in the following block
+  // using a mutex to make sure they
+  // are not used by multiple threads
+  // at once
+  {
+    Threads::Mutex::ScopedLock lock(mutex);
+
+    p_values.resize((values.size() == 0) ? 0 : n_sub);
+    p_grads.resize((grads.size() == 0) ? 0 : n_sub);
+    p_grad_grads.resize((grad_grads.size() == 0) ? 0 : n_sub);
+
+    // Compute values of complete space
+    // and insert into tensors.  Result
+    // will have first all polynomials
+    // in the x-component, then y and
+    // z.
+    polynomial_space.compute (unit_point, p_values, p_grads, p_grad_grads,
+                              p_third_derivatives, p_fourth_derivatives);
+
+    std::fill(values.begin(), values.end(), Tensor<1,dim>());
+    for (unsigned int i=0; i<p_values.size(); ++i)
+      for (unsigned int j=0; j<dim; ++j)
+        values[i+j*n_sub][j] = p_values[i];
+
+    std::fill(grads.begin(), grads.end(), Tensor<2,dim>());
+    for (unsigned int i=0; i<p_grads.size(); ++i)
+      for (unsigned int j=0; j<dim; ++j)
+        grads[i+j*n_sub][j] = p_grads[i];
+
+    std::fill(grad_grads.begin(), grad_grads.end(), Tensor<3,dim>());
+    for (unsigned int i=0; i<p_grad_grads.size(); ++i)
+      for (unsigned int j=0; j<dim; ++j)
+        grad_grads[i+j*n_sub][j] = p_grad_grads[i];
+  }
+
+  // This is the first polynomial not
+  // covered by the P_k subspace
+  unsigned int start = dim*n_sub;
+
+  // Store values of auxiliary
+  // polynomials and their three
+  // derivatives
+  std::vector<std::vector<double> > monovali(dim, std::vector<double>(4));
+  std::vector<std::vector<double> > monovalk(dim, std::vector<double>(4));
+
+  if (dim == 2)
+    {
+      for (unsigned int d=0; d<dim; ++d)
+        monomials[0].value(unit_point(d), monovali[d]);
+      if (values.size() != 0)
+        {
+          values[start][0] = monovali[0][0];
+          values[start][1] = -unit_point(1) * monovali[0][1];
+          values[start+1][0] = unit_point(0) * monovali[1][1];
+          values[start+1][1] = -monovali[1][0];
+        }
+      if (grads.size() != 0)
+        {
+          grads[start][0][0] = monovali[0][1];
+          grads[start][0][1] = 0.;
+          grads[start][1][0] = -unit_point(1) * monovali[0][2];
+          grads[start][1][1] = -monovali[0][1];
+          grads[start+1][0][0] = monovali[1][1];
+          grads[start+1][0][1] = unit_point(0) * monovali[1][2];
+          grads[start+1][1][0] = 0.;
+          grads[start+1][1][1] = -monovali[1][1];
+        }
+      if (grad_grads.size() != 0)
+        {
+          grad_grads[start][0][0][0] = monovali[0][2];
+          grad_grads[start][0][0][1] = 0.;
+          grad_grads[start][0][1][0] = 0.;
+          grad_grads[start][0][1][1] = 0.;
+          grad_grads[start][1][0][0] = -unit_point(1) * monovali[0][3];
+          grad_grads[start][1][0][1] = -monovali[0][2];
+          grad_grads[start][1][1][0] = -monovali[0][2];
+          grad_grads[start][1][1][1] = 0.;
+          grad_grads[start+1][0][0][0] = 0;
+          grad_grads[start+1][0][0][1] = monovali[1][2];
+          grad_grads[start+1][0][1][0] = monovali[1][2];
+          grad_grads[start+1][0][1][1] = unit_point(0) * monovali[1][3];
+          grad_grads[start+1][1][0][0] = 0.;
+          grad_grads[start+1][1][0][1] = 0.;
+          grad_grads[start+1][1][1][0] = 0.;
+          grad_grads[start+1][1][1][1] = -monovali[1][2];
+        }
+    }
+  else // dim == 3
+    {
+      // The number of curls in each
+      // component. Note that the
+      // table in BrezziFortin91 has
+      // a typo, but the text has the
+      // right basis
+
+      // Note that the next basis
+      // function is always obtained
+      // from the previous by cyclic
+      // rotation of the coordinates
+      const unsigned int n_curls = monomials.size() - 1;
+      for (unsigned int i=0; i<n_curls; ++i, start+=dim)
+        {
+          for (unsigned int d=0; d<dim; ++d)
+            {
+              // p(t) = t^(i+1)
+              monomials[i+1].value(unit_point(d), monovali[d]);
+              // q(t) = t^(k-i)
+              monomials[degree()-i].value(unit_point(d), monovalk[d]);
+            }
+          if (values.size() != 0)
+            {
+              // x p'(y) q(z)
+              values[start][0] = unit_point(0) * monovali[1][1] * monovalk[2][0];
+              // - p(y) q(z)
+              values[start][1] = -monovali[1][0] * monovalk[2][0];
+              values[start][2] = 0.;
+
+              // y p'(z) q(x)
+              values[start+1][1] = unit_point(1) * monovali[2][1] * monovalk[0][0];
+              // - p(z) q(x)
+              values[start+1][2] = -monovali[2][0] * monovalk[0][0];
+              values[start+1][0] = 0.;
+
+              // z p'(x) q(y)
+              values[start+2][2] = unit_point(2) * monovali[0][1] * monovalk[1][0];
+              // -p(x) q(y)
+              values[start+2][0] = -monovali[0][0] * monovalk[1][0];
+              values[start+2][1] = 0.;
+            }
+          if (grads.size() != 0)
+            {
+              grads[start][0][0] = monovali[1][1] * monovalk[2][0];
+              grads[start][0][1] = unit_point(0) * monovali[1][2] * monovalk[2][0];
+              grads[start][0][2] = unit_point(0) * monovali[1][1] * monovalk[2][1];
+              grads[start][1][0] = 0.;
+              grads[start][1][1] = -monovali[1][1] * monovalk[2][0];
+              grads[start][1][2] = -monovali[1][0] * monovalk[2][1];
+              grads[start][2][0] = 0.;
+              grads[start][2][1] = 0.;
+              grads[start][2][2] = 0.;
+
+              grads[start+1][1][1] = monovali[2][1] * monovalk[0][0];
+              grads[start+1][1][2] = unit_point(1) * monovali[2][2] * monovalk[0][0];
+              grads[start+1][1][0] = unit_point(1) * monovali[2][1] * monovalk[0][1];
+              grads[start+1][2][1] = 0.;
+              grads[start+1][2][2] = -monovali[2][1] * monovalk[0][0];
+              grads[start+1][2][0] = -monovali[2][0] * monovalk[0][1];
+              grads[start+1][0][1] = 0.;
+              grads[start+1][0][2] = 0.;
+              grads[start+1][0][0] = 0.;
+
+              grads[start+2][2][2] = monovali[0][1] * monovalk[1][0];
+              grads[start+2][2][0] = unit_point(2) * monovali[0][2] * monovalk[1][0];
+              grads[start+2][2][1] = unit_point(2) * monovali[0][1] * monovalk[1][1];
+              grads[start+2][0][2] = 0.;
+              grads[start+2][0][0] = -monovali[0][1] * monovalk[1][0];
+              grads[start+2][0][1] = -monovali[0][0] * monovalk[1][1];
+              grads[start+2][1][2] = 0.;
+              grads[start+2][1][0] = 0.;
+              grads[start+2][1][1] = 0.;
+            }
+          if (grad_grads.size() != 0)
+            {
+              grad_grads[start][0][0][0] = 0.;
+              grad_grads[start][0][0][1] = monovali[1][2]*monovalk[2][0];
+              grad_grads[start][0][0][2] = monovali[1][1]*monovalk[2][1];
+              grad_grads[start][0][1][0] = monovali[1][2]*monovalk[2][0];
+              grad_grads[start][0][1][1] = unit_point(0)*monovali[1][3]*monovalk[2][0];
+              grad_grads[start][0][1][2] = unit_point(0)*monovali[1][2]*monovalk[2][1];
+              grad_grads[start][0][2][0] = monovali[1][1]*monovalk[2][1];
+              grad_grads[start][0][2][1] = unit_point(0)*monovali[1][2]*monovalk[2][1];
+              grad_grads[start][0][2][2] = unit_point(0)*monovali[1][1]*monovalk[2][2];
+              grad_grads[start][1][0][0] = 0.;
+              grad_grads[start][1][0][1] = 0.;
+              grad_grads[start][1][0][2] = 0.;
+              grad_grads[start][1][1][0] = 0.;
+              grad_grads[start][1][1][1] = -monovali[1][2]*monovalk[2][0];
+              grad_grads[start][1][1][2] = -monovali[1][1]*monovalk[2][1];
+              grad_grads[start][1][2][0] = 0.;
+              grad_grads[start][1][2][1] = -monovali[1][1]*monovalk[2][1];
+              grad_grads[start][1][2][2] = -monovali[1][0]*monovalk[2][2];
+              grad_grads[start][2][0][0] = 0.;
+              grad_grads[start][2][0][1] = 0.;
+              grad_grads[start][2][0][2] = 0.;
+              grad_grads[start][2][1][0] = 0.;
+              grad_grads[start][2][1][1] = 0.;
+              grad_grads[start][2][1][2] = 0.;
+              grad_grads[start][2][2][0] = 0.;
+              grad_grads[start][2][2][1] = 0.;
+              grad_grads[start][2][2][2] = 0.;
+
+              grad_grads[start+1][0][0][0] = 0.;
+              grad_grads[start+1][0][0][1] = 0.;
+              grad_grads[start+1][0][0][2] = 0.;
+              grad_grads[start+1][0][1][0] = 0.;
+              grad_grads[start+1][0][1][1] = 0.;
+              grad_grads[start+1][0][1][2] = 0.;
+              grad_grads[start+1][0][2][0] = 0.;
+              grad_grads[start+1][0][2][1] = 0.;
+              grad_grads[start+1][0][2][2] = 0.;
+              grad_grads[start+1][1][0][0] = unit_point(1)*monovali[2][1]*monovalk[0][2];
+              grad_grads[start+1][1][0][1] = monovali[2][1]*monovalk[0][1];
+              grad_grads[start+1][1][0][2] = unit_point(1)*monovali[2][2]*monovalk[0][1];
+              grad_grads[start+1][1][1][0] = monovalk[0][1]*monovali[2][1];
+              grad_grads[start+1][1][1][1] = 0.;
+              grad_grads[start+1][1][1][2] = monovalk[0][0]*monovali[2][2];
+              grad_grads[start+1][1][2][0] = unit_point(1)*monovalk[0][1]*monovali[2][2];
+              grad_grads[start+1][1][2][1] = monovalk[0][0]*monovali[2][2];
+              grad_grads[start+1][1][2][2] = unit_point(1)*monovalk[0][0]*monovali[2][3];
+              grad_grads[start+1][2][0][0] = -monovalk[0][2]*monovali[2][0];
+              grad_grads[start+1][2][0][1] = 0.;
+              grad_grads[start+1][2][0][2] = -monovalk[0][1]*monovali[2][1];
+              grad_grads[start+1][2][1][0] = 0.;
+              grad_grads[start+1][2][1][1] = 0.;
+              grad_grads[start+1][2][1][2] = 0.;
+              grad_grads[start+1][2][2][0] = -monovalk[0][1]*monovali[2][1];
+              grad_grads[start+1][2][2][1] = 0.;
+              grad_grads[start+1][2][2][2] = -monovalk[0][0]*monovali[2][2];
+
+              grad_grads[start+2][0][0][0] = -monovali[0][2]*monovalk[1][0];
+              grad_grads[start+2][0][0][1] = -monovali[0][1]*monovalk[1][1];
+              grad_grads[start+2][0][0][2] = 0.;
+              grad_grads[start+2][0][1][0] = -monovali[0][1]*monovalk[1][1];
+              grad_grads[start+2][0][1][1] = -monovali[0][0]*monovalk[1][2];
+              grad_grads[start+2][0][1][2] = 0.;
+              grad_grads[start+2][0][2][0] = 0.;
+              grad_grads[start+2][0][2][1] = 0.;
+              grad_grads[start+2][0][2][2] = 0.;
+              grad_grads[start+2][1][0][0] = 0.;
+              grad_grads[start+2][1][0][1] = 0.;
+              grad_grads[start+2][1][0][2] = 0.;
+              grad_grads[start+2][1][1][0] = 0.;
+              grad_grads[start+2][1][1][1] = 0.;
+              grad_grads[start+2][1][1][2] = 0.;
+              grad_grads[start+2][1][2][0] = 0.;
+              grad_grads[start+2][1][2][1] = 0.;
+              grad_grads[start+2][1][2][2] = 0.;
+              grad_grads[start+2][2][0][0] = unit_point(2)*monovali[0][3]*monovalk[1][0];
+              grad_grads[start+2][2][0][1] = unit_point(2)*monovali[0][2]*monovalk[1][1];
+              grad_grads[start+2][2][0][2] = monovali[0][2]*monovalk[1][0];
+              grad_grads[start+2][2][1][0] = unit_point(2)*monovali[0][2]*monovalk[1][1];
+              grad_grads[start+2][2][1][1] = unit_point(2)*monovali[0][1]*monovalk[1][2];
+              grad_grads[start+2][2][1][2] = monovali[0][1]*monovalk[1][1];
+              grad_grads[start+2][2][2][0] = monovali[0][2]*monovalk[1][0];
+              grad_grads[start+2][2][2][1] = monovali[0][1]*monovalk[1][1];
+              grad_grads[start+2][2][2][2] = 0.;
+
+            }
+        }
+      Assert(start == n_pols, ExcInternalError());
+    }
+}
+
+
+/*
+template <int dim>
+void
+PolynomialsBDM<dim>::compute_node_matrix (Table<2,double>& A) const
+{
+  std::vector<Polynomial<double> > moment_weight(2);
+  for (unsigned int i=0;i<moment_weight.size();++i)
+    moment_weight[i] = Monomial<double>(i);
+
+  QGauss<dim-1> qface(polynomial_space.degree()+1);
+
+  std::vector<Tensor<1,dim> > values(n());
+  std::vector<Tensor<2,dim> > grads;
+  std::vector<Tensor<3,dim> > grad_grads;
+  values.resize(n());
+
+  for (unsigned int face=0;face<2*dim;++face)
+    {
+      double orientation = 1.;
+      if ((face==0) || (face==3))
+        orientation = -1.;
+
+      for (unsigned int k=0;k<qface.size();++k)
+        {
+          const double w = qface.weight(k) * orientation;
+          const double x = qface.point(k)(0);
+          Point<dim> p;
+          switch (face)
+            {
+              case 2:
+                p(1) = 1.;
+              case 0:
+                p(0) = x;
+                break;
+              case 1:
+                p(0) = 1.;
+              case 3:
+                p(1) = x;
+                break;
+            }
+//      std::cerr << p
+//                << '\t' << moment_weight[0].value(x)
+//                << '\t' << moment_weight[1].value(x)
+//        ;
+
+          compute (p, values, grads, grad_grads);
+
+          for (unsigned int i=0;i<n();++i)
+            {
+//          std::cerr << '\t' << std::setw(6) << values[i][1-face%2];
+                                               // Integrate normal component.
+                                               // This is easy on the unit square
+              for (unsigned int j=0;j<moment_weight.size();++j)
+                A(moment_weight.size()*face+j,i)
+                  += w * values[i][1-face%2] * moment_weight[j].value(x);
+            }
+//      std::cerr << std::endl;
+        }
+    }
+
+                                   // Volume integrals are missing
+                                   //
+                                   // This degree is one larger
+  Assert (polynomial_space.degree() <= 2,
+          ExcNotImplemented());
+}
+*/
+
+template <int dim>
+unsigned int
+PolynomialsBDM<dim>::compute_n_pols(unsigned int k)
+{
+  if (dim == 1) return k+1;
+  if (dim == 2) return (k+1)*(k+2)+2;
+  if (dim == 3) return ((k+1)*(k+2)*(k+3))/2+3*(k+1);
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+
+template class PolynomialsBDM<1>;
+template class PolynomialsBDM<2>;
+template class PolynomialsBDM<3>;
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_bernstein.cc b/source/base/polynomials_bernstein.cc
new file mode 100644
index 0000000..fedd181
--- /dev/null
+++ b/source/base/polynomials_bernstein.cc
@@ -0,0 +1,58 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/polynomials_bernstein.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/math/special_functions/binomial.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace
+{
+  template <typename number>
+  std::vector<number>
+  get_bernstein_coefficients (
+    const unsigned int k, const unsigned int n)
+  {
+    Assert(n>0, ExcMessage("Bernstein polynomial needs to be of degree > 0."));
+    AssertIndexRange(k, n+1);
+    std::vector<number> coeff(n + 1, number(0.0));
+    for (unsigned int i = k; i < n + 1; ++i)
+      {
+        coeff[i] = ((i - k) % 2 == 0 ? 1 : -1)
+                   * boost::math::binomial_coefficient<number>(n, i)
+                   * boost::math::binomial_coefficient<number>(i, k);
+      }
+    return coeff;
+  }
+}
+
+template <typename number>
+PolynomialsBernstein<number>:: PolynomialsBernstein (
+  const unsigned int index, const unsigned int degree)
+  :
+  Polynomials::Polynomial<number>(
+    get_bernstein_coefficients<number>(index, degree))
+{
+}
+
+
+#include "polynomials_bernstein.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_bernstein.inst b/source/base/polynomials_bernstein.inst
new file mode 100644
index 0000000..f8c0f1b
--- /dev/null
+++ b/source/base/polynomials_bernstein.inst
@@ -0,0 +1,2 @@
+
+template class dealii::PolynomialsBernstein<double>;
diff --git a/source/base/polynomials_nedelec.cc b/source/base/polynomials_nedelec.cc
new file mode 100644
index 0000000..ea52665
--- /dev/null
+++ b/source/base/polynomials_nedelec.cc
@@ -0,0 +1,1415 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/polynomials_nedelec.h>
+#include <iostream>
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<int dim>
+PolynomialsNedelec<dim>::PolynomialsNedelec (const unsigned int k) :
+  my_degree (k), polynomial_space (create_polynomials (k)),
+  n_pols (compute_n_pols (k))
+{
+}
+
+template<int dim>
+std::vector<std::vector< Polynomials::Polynomial<double> > >
+PolynomialsNedelec<dim>::create_polynomials (const unsigned int k)
+{
+  std::vector<std::vector< Polynomials::Polynomial<double> > > pols (dim);
+
+  pols[0] = Polynomials::Legendre::generate_complete_basis (k);
+
+  for (unsigned int i = 1; i < dim; ++i)
+    pols[i] = Polynomials::Lobatto::generate_complete_basis (k + 1);
+
+  return pols;
+}
+
+
+// Compute the values, gradients
+// and double gradients of the
+// polynomial at the given point.
+template<int dim>
+void
+PolynomialsNedelec<dim>::compute (const Point<dim> &unit_point,
+                                  std::vector<Tensor<1,dim> > &values,
+                                  std::vector<Tensor<2,dim> > &grads,
+                                  std::vector<Tensor<3,dim> > &grad_grads,
+                                  std::vector<Tensor<4,dim> > &third_derivatives,
+                                  std::vector<Tensor<5,dim> > &fourth_derivatives) const
+{
+  Assert(values.size () == n_pols || values.size () == 0,
+         ExcDimensionMismatch(values.size (), n_pols));
+  Assert(grads.size () == n_pols || grads.size () == 0,
+         ExcDimensionMismatch(grads.size (), n_pols));
+  Assert(grad_grads.size () == n_pols || grad_grads.size () == 0,
+         ExcDimensionMismatch(grad_grads.size (), n_pols));
+  Assert(third_derivatives.size () == n_pols || third_derivatives.size () == 0,
+         ExcDimensionMismatch(third_derivatives.size (), n_pols));
+  Assert(fourth_derivatives.size () == n_pols || fourth_derivatives.size () == 0,
+         ExcDimensionMismatch(fourth_derivatives.size (), n_pols));
+
+  // third and fourth derivatives not implemented
+  (void)third_derivatives;
+  Assert(third_derivatives.size () == 0,
+         ExcNotImplemented());
+  (void)fourth_derivatives;
+  Assert(fourth_derivatives.size () == 0,
+         ExcNotImplemented());
+
+  // Declare the values, derivatives
+  // and second derivatives vectors of
+  // <tt>polynomial_space</tt> at
+  // <tt>unit_point</tt>
+  const unsigned int &n_basis = polynomial_space.n ();
+  std::vector<double> unit_point_values ((values.size () == 0) ? 0 : n_basis);
+  std::vector<Tensor<1, dim> >
+  unit_point_grads ((grads.size () == 0) ? 0 : n_basis);
+  std::vector<Tensor<2, dim> >
+  unit_point_grad_grads ((grad_grads.size () == 0) ? 0 : n_basis);
+  std::vector<Tensor<3, dim> > empty_vector_of_3rd_order_tensors;
+  std::vector<Tensor<4, dim> > empty_vector_of_4th_order_tensors;
+
+  switch (dim)
+    {
+    case 1:
+    {
+      polynomial_space.compute (unit_point, unit_point_values,
+                                unit_point_grads, unit_point_grad_grads,
+                                empty_vector_of_3rd_order_tensors,
+                                empty_vector_of_4th_order_tensors);
+
+      // Assign the correct values to the
+      // corresponding shape functions.
+      if (values.size () > 0)
+        for (unsigned int i = 0; i < unit_point_values.size (); ++i)
+          values[i][0] = unit_point_values[i];
+
+      if (grads.size () > 0)
+        for (unsigned int i = 0; i < unit_point_grads.size (); ++i)
+          grads[i][0][0] = unit_point_grads[i][0];
+
+      if (grad_grads.size () > 0)
+        for (unsigned int i = 0; i < unit_point_grad_grads.size (); ++i)
+          grad_grads[i][0][0][0] = unit_point_grad_grads[i][0][0];
+
+      break;
+    }
+
+    case 2:
+    {
+      polynomial_space.compute (unit_point, unit_point_values,
+                                unit_point_grads, unit_point_grad_grads,
+                                empty_vector_of_3rd_order_tensors,
+                                empty_vector_of_4th_order_tensors);
+
+      // Declare the values, derivatives and
+      // second derivatives vectors of
+      // <tt>polynomial_space</tt> at
+      // <tt>unit_point</tt> with coordinates
+      // shifted one step in positive direction
+      Point<dim> p;
+
+      p (0) = unit_point (1);
+      p (1) = unit_point (0);
+
+      std::vector<double> p_values ((values.size () == 0) ? 0 : n_basis);
+      std::vector<Tensor<1, dim> >
+      p_grads ((grads.size () == 0) ? 0 : n_basis);
+      std::vector<Tensor<2, dim> >
+      p_grad_grads ((grad_grads.size () == 0) ? 0 : n_basis);
+
+      polynomial_space.compute (p, p_values, p_grads, p_grad_grads,
+                                empty_vector_of_3rd_order_tensors,
+                                empty_vector_of_4th_order_tensors);
+
+      // Assign the correct values to the
+      // corresponding shape functions.
+      if (values.size () > 0)
+        {
+          for (unsigned int i = 0; i <= my_degree; ++i)
+            for (unsigned int j = 0; j < 2; ++j)
+              {
+                values[i + j * (my_degree + 1)][0] = 0.0;
+                values[i + j * (my_degree + 1)][1]
+                  = p_values[i + j * (my_degree + 1)];
+                values[i + (j + 2) * (my_degree + 1)][0]
+                  = unit_point_values[i + j * (my_degree + 1)];
+                values[i + (j + 2) * (my_degree + 1)][1] = 0.0;
+              }
+
+          if (my_degree > 0)
+            for (unsigned int i = 0; i <= my_degree; ++i)
+              for (unsigned int j = 0; j < my_degree; ++j)
+                {
+                  values[(i + GeometryInfo<dim>::lines_per_cell) * my_degree
+                         + j + GeometryInfo<dim>::lines_per_cell][0]
+                    = unit_point_values[i + (j + 2) * (my_degree + 1)];
+                  values[(i + GeometryInfo<dim>::lines_per_cell) * my_degree
+                         + j + GeometryInfo<dim>::lines_per_cell][1] = 0.0;
+                  values[i + (j + my_degree
+                              + GeometryInfo<dim>::lines_per_cell)
+                         * (my_degree + 1)][0] = 0.0;
+                  values[i + (j + my_degree
+                              + GeometryInfo<dim>::lines_per_cell)
+                         * (my_degree + 1)][1]
+                    = p_values[i + (j + 2) * (my_degree + 1)];
+                }
+        }
+
+      if (grads.size () > 0)
+        {
+          for (unsigned int i = 0; i <= my_degree; ++i)
+            for (unsigned int j = 0; j < 2; ++j)
+              {
+                for (unsigned int k = 0; k < dim; ++k)
+                  {
+                    grads[i + j * (my_degree + 1)][0][k] = 0.0;
+                    grads[i + (j + 2) * (my_degree + 1)][0][k]
+                      = unit_point_grads[i + j * (my_degree + 1)][k];
+                    grads[i + (j + 2) * (my_degree + 1)][1][k] = 0.0;
+                  }
+
+                grads[i + j * (my_degree + 1)][1][0]
+                  = p_grads[i + j * (my_degree + 1)][1];
+                grads[i + j * (my_degree + 1)][1][1]
+                  = p_grads[i + j * (my_degree + 1)][0];
+              }
+
+          if (my_degree > 0)
+            for (unsigned int i = 0; i <= my_degree; ++i)
+              for (unsigned int j = 0; j < my_degree; ++j)
+                {
+                  for (unsigned int k = 0; k < dim; ++k)
+                    {
+                      grads[(i + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][0][k]
+                        = unit_point_grads[i + (j + 2) * (my_degree + 1)]
+                          [k];
+                      grads[(i + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][1][k]
+                        = 0.0;
+                      grads[i + (j + my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][0][k] = 0.0;
+                    }
+
+                  grads[i + (j + my_degree
+                             + GeometryInfo<dim>::lines_per_cell)
+                        * (my_degree + 1)][1][0]
+                    = p_grads[i + (j + 2) * (my_degree + 1)][1];
+                  grads[i + (j + my_degree
+                             + GeometryInfo<dim>::lines_per_cell)
+                        * (my_degree + 1)][1][1]
+                    = p_grads[i + (j + 2) * (my_degree + 1)][0];
+                }
+        }
+
+      if (grad_grads.size () > 0)
+        {
+          for (unsigned int i = 0; i <= my_degree; ++i)
+            for (unsigned int j = 0; j < 2; ++j)
+              {
+                for (unsigned int k = 0; k < dim; ++k)
+                  for (unsigned int l = 0; l < dim; ++l)
+                    {
+                      grad_grads[i + j * (my_degree + 1)][0][k][l] = 0.0;
+                      grad_grads[i + (j + 2) * (my_degree + 1)][0][k][l]
+                        = unit_point_grad_grads[i + j * (my_degree + 1)][k]
+                          [l];
+                      grad_grads[i + (j + 2) * (my_degree + 1)][1][k][l]
+                        = 0.0;
+                    }
+
+                grad_grads[i + j * (my_degree + 1)][1][0][0]
+                  = p_grad_grads[i + j * (my_degree + 1)][1][1];
+                grad_grads[i + j * (my_degree + 1)][1][0][1]
+                  = p_grad_grads[i + j * (my_degree + 1)][1][0];
+                grad_grads[i + j * (my_degree + 1)][1][1][0]
+                  = p_grad_grads[i + j * (my_degree + 1)][0][1];
+                grad_grads[i + j * (my_degree + 1)][1][1][1]
+                  = p_grad_grads[i + j * (my_degree + 1)][0][0];
+              }
+
+          if (my_degree > 0)
+            for (unsigned int i = 0; i <= my_degree; ++i)
+              for (unsigned int j = 0; j < my_degree; ++j)
+                {
+                  for (unsigned int k = 0; k < dim; ++k)
+                    for (unsigned int l = 0; l < dim; ++l)
+                      {
+                        grad_grads[(i + GeometryInfo<dim>::lines_per_cell)
+                                   * my_degree + j
+                                   + GeometryInfo<dim>::lines_per_cell][0]
+                        [k][l]
+                          = unit_point_grad_grads[i + (j + 2)
+                                                  * (my_degree + 1)][k][l];
+                        grad_grads[(i + GeometryInfo<dim>::lines_per_cell)
+                                   * my_degree + j
+                                   + GeometryInfo<dim>::lines_per_cell][1]
+                        [k][l] = 0.0;
+                        grad_grads[i + (j + my_degree
+                                        + GeometryInfo<dim>::lines_per_cell)
+                                   * (my_degree + 1)][0][k][l] = 0.0;
+                      }
+
+                  grad_grads[i + (j + my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][1][0][0]
+                    = p_grad_grads[i + (j + 2) * (my_degree + 1)][1][1];
+                  grad_grads[i + (j + my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][1][0][1]
+                    = p_grad_grads[i + (j + 2) * (my_degree + 1)][1][0];
+                  grad_grads[i + (j + my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][1][1][0]
+                    = p_grad_grads[i + (j + 2) * (my_degree + 1)][0][1];
+                  grad_grads[i + (j + my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][1][1][1]
+                    = p_grad_grads[i + (j + 2) * (my_degree + 1)][0][0];
+                }
+        }
+
+      break;
+    }
+
+    case 3:
+    {
+      polynomial_space.compute (unit_point, unit_point_values,
+                                unit_point_grads, unit_point_grad_grads,
+                                empty_vector_of_3rd_order_tensors,
+                                empty_vector_of_4th_order_tensors);
+
+      // Declare the values, derivatives
+      // and second derivatives vectors of
+      // <tt>polynomial_space</tt> at
+      // <tt>unit_point</tt> with coordinates
+      // shifted two steps in positive
+      // direction
+      Point<dim> p1, p2;
+      std::vector<double> p1_values ((values.size () == 0) ? 0 : n_basis);
+      std::vector<Tensor<1, dim> >
+      p1_grads ((grads.size () == 0) ? 0 : n_basis);
+      std::vector<Tensor<2, dim> >
+      p1_grad_grads ((grad_grads.size () == 0) ? 0 : n_basis);
+      std::vector<double> p2_values ((values.size () == 0) ? 0 : n_basis);
+      std::vector<Tensor<1, dim> >
+      p2_grads ((grads.size () == 0) ? 0 : n_basis);
+      std::vector<Tensor<2, dim> >
+      p2_grad_grads ((grad_grads.size () == 0) ? 0 : n_basis);
+
+      p1 (0) = unit_point (1);
+      p1 (1) = unit_point (2);
+      p1 (2) = unit_point (0);
+      polynomial_space.compute (p1, p1_values, p1_grads, p1_grad_grads,
+                                empty_vector_of_3rd_order_tensors,
+                                empty_vector_of_4th_order_tensors);
+      p2 (0) = unit_point (2);
+      p2 (1) = unit_point (0);
+      p2 (2) = unit_point (1);
+      polynomial_space.compute (p2, p2_values, p2_grads, p2_grad_grads,
+                                empty_vector_of_3rd_order_tensors,
+                                empty_vector_of_4th_order_tensors);
+
+      // Assign the correct values to the
+      // corresponding shape functions.
+      if (values.size () > 0)
+        {
+          for (unsigned int i = 0; i <= my_degree; ++i)
+            {
+              for (unsigned int j = 0; j < 2; ++j)
+                {
+                  for (unsigned int k = 0; k < 2; ++k)
+                    {
+                      for (unsigned int l = 0; l < 2; ++l)
+                        {
+                          values[i + (j + 4 * k) * (my_degree + 1)][2 * l]
+                            = 0.0;
+                          values[i + (j + 4 * k + 2) * (my_degree + 1)]
+                          [l + 1] = 0.0;
+                          values[i + (j + 2 * (k + 4)) * (my_degree + 1)][l]
+                            = 0.0;
+                        }
+
+                      values[i + (j + 4 * k + 2) * (my_degree + 1)][0]
+                        = unit_point_values[i + (j + k * (my_degree + 2))
+                                            * (my_degree + 1)];
+                      values[i + (j + 2 * (k + 4)) * (my_degree + 1)][2]
+                        = p2_values[i + (j + k * (my_degree + 2))
+                                    * (my_degree + 1)];
+                    }
+
+                  values[i + j * (my_degree + 1)][1]
+                    = p1_values[i + j * (my_degree + 1) * (my_degree + 2)];
+                }
+
+              values[i + 4 * (my_degree + 1)][1]
+                = p1_values[i + my_degree + 1];
+              values[i + 5 * (my_degree + 1)][1]
+                = p1_values[i + (my_degree + 1) * (my_degree + 3)];
+            }
+
+          if (my_degree > 0)
+            for (unsigned int i = 0; i <= my_degree; ++i)
+              for (unsigned int j = 0; j < my_degree; ++j)
+                {
+                  for (unsigned int k = 0; k < my_degree; ++k)
+                    {
+                      for (unsigned int l = 0; l < 2; ++l)
+                        {
+                          values[((i + 2
+                                   * GeometryInfo<dim>::faces_per_cell)
+                                  * my_degree + j
+                                  + GeometryInfo<dim>::lines_per_cell + 2
+                                  * GeometryInfo<dim>::faces_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][l + 1]
+                            = 0.0;
+                          values[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree)
+                                  * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][2 * l]
+                            = 0.0;
+                          values[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree))
+                                      * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][l] = 0.0;
+                        }
+
+                      values[((i + 2 * GeometryInfo<dim>::faces_per_cell)
+                              * my_degree + j
+                              + GeometryInfo<dim>::lines_per_cell + 2
+                              * GeometryInfo<dim>::faces_per_cell)
+                             * my_degree + k
+                             + GeometryInfo<dim>::lines_per_cell][0]
+                        = unit_point_values[i + (j + (k + 2) * (my_degree
+                                                                + 2) + 2)
+                                            * (my_degree + 1)];
+                      values[(i + (j + 2 * GeometryInfo<dim>::faces_per_cell
+                                   + my_degree) * (my_degree + 1)
+                              + GeometryInfo<dim>::lines_per_cell)
+                             * my_degree + k
+                             + GeometryInfo<dim>::lines_per_cell][1]
+                        = p1_values[i + ((j + 2) * (my_degree + 2) + k + 2)
+                                    * (my_degree + 1)];
+                      values[i + (j + (k + 2
+                                       * (GeometryInfo<dim>::faces_per_cell
+                                          + my_degree))
+                                  * my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][2]
+                        = p2_values[i + (j + (k + 2) * (my_degree + 2) + 2)
+                                    * (my_degree + 1)];
+                    }
+
+                  for (unsigned int k = 0; k < 2; ++k)
+                    {
+                      for (unsigned int l = 0; l < 2; ++l)
+                        {
+                          for (unsigned int m = 0; m < 2; ++m)
+                            {
+                              values[i + (j + (2 * (k + 2 * l) + 1)
+                                          * my_degree
+                                          + GeometryInfo<dim>::lines_per_cell)
+                                     * (my_degree + 1)][m + l] = 0.0;
+                              values[(i + 2 * (k + 2 * (l + 1)) * (my_degree
+                                                                   + 1)
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                     * my_degree + j
+                                     + GeometryInfo<dim>::lines_per_cell]
+                              [m + l] = 0.0;
+                            }
+
+                          values[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2 * l]
+                            = 0.0;
+                          values[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2 * l] = 0.0;
+                        }
+
+                      values[(i + 2 * k * (my_degree + 1)
+                              + GeometryInfo<dim>::lines_per_cell)
+                             * my_degree + j
+                             + GeometryInfo<dim>::lines_per_cell][1]
+                        = p1_values[i + (j + k * (my_degree + 2) + 2)
+                                    * (my_degree + 1)];
+                      values[i + (j + (2 * k + 1) * my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][2]
+                        = p2_values[i + ((j + 2) * (my_degree + 2) + k)
+                                    * (my_degree + 1)];
+                      values[(i + 2 * (k + 2) * (my_degree + 1)
+                              + GeometryInfo<dim>::lines_per_cell)
+                             * my_degree + j
+                             + GeometryInfo<dim>::lines_per_cell][2]
+                        = p2_values[i + (j + k * (my_degree + 2) + 2)
+                                    * (my_degree + 1)];
+                      values[i + (j + (2 * k + 5) * my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][0]
+                        = unit_point_values[i + ((j + 2) * (my_degree + 2) + k)
+                                            * (my_degree + 1)];
+                      values[(i + 2 * (k + 4) * (my_degree + 1)
+                              + GeometryInfo<dim>::lines_per_cell)
+                             * my_degree + j
+                             + GeometryInfo<dim>::lines_per_cell][0]
+                        = unit_point_values[i + (j + k * (my_degree + 2)
+                                                 + 2) * (my_degree + 1)];
+                      values[i + (j + (2 * k + 9) * my_degree
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * (my_degree + 1)][1]
+                        = p1_values[i + ((j + 2) * (my_degree + 2) + k)
+                                    * (my_degree + 1)];
+                    }
+                }
+        }
+
+      if (grads.size () > 0)
+        {
+          for (unsigned int i = 0; i <= my_degree; ++i)
+            {
+              for (unsigned int j = 0; j < 2; ++j)
+                {
+                  for (unsigned int k = 0; k < 2; ++k)
+                    {
+                      for (unsigned int l = 0; l < 2; ++l)
+                        for (unsigned int m = 0; m < dim; ++m)
+                          {
+                            grads[i + (j + 4 * k) * (my_degree + 1)][2 * l]
+                            [m] = 0.0;
+                            grads[i + (j + 4 * k + 2) * (my_degree + 1)]
+                            [l + 1][m] = 0.0;
+                            grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                            [l][m] = 0.0;
+                          }
+
+                      for (unsigned int l = 0; l < dim; ++l)
+                        grads[i + (j + 4 * k + 2) * (my_degree + 1)][0][l]
+                          = unit_point_grads[i + (j + k * (my_degree + 2))
+                                             * (my_degree + 1)][l];
+
+                      grads[i + (j + 2 * (k + 4)) * (my_degree + 1)][2][0]
+                        = p2_grads[i + (j + k * (my_degree + 2))
+                                   * (my_degree + 1)][1];
+                      grads[i + (j + 2 * (k + 4)) * (my_degree + 1)][2][1]
+                        = p2_grads[i + (j + k * (my_degree + 2))
+                                   * (my_degree + 1)][2];
+                      grads[i + (j + 2 * (k + 4)) * (my_degree + 1)][2][2]
+                        = p2_grads[i + (j + k * (my_degree + 2))
+                                   * (my_degree + 1)][0];
+                    }
+
+                  grads[i + j * (my_degree + 1)][1][0]
+                    = p1_grads[i + j * (my_degree + 1) * (my_degree + 2)]
+                      [2];
+                  grads[i + j * (my_degree + 1)][1][1]
+                    = p1_grads[i + j * (my_degree + 1) * (my_degree + 2)]
+                      [0];
+                  grads[i + j * (my_degree + 1)][1][2]
+                    = p1_grads[i + j * (my_degree + 1) * (my_degree + 2)]
+                      [1];
+                }
+
+              grads[i + 4 * (my_degree + 1)][1][0]
+                = p1_grads[i + my_degree + 1][2];
+              grads[i + 4 * (my_degree + 1)][1][1]
+                = p1_grads[i + my_degree + 1][0];
+              grads[i + 4 * (my_degree + 1)][1][2]
+                = p1_grads[i + my_degree + 1][1];
+              grads[i + 5 * (my_degree + 1)][1][0]
+                = p1_grads[i + (my_degree + 1) * (my_degree + 3)][2];
+              grads[i + 5 * (my_degree + 1)][1][1]
+                = p1_grads[i + (my_degree + 1) * (my_degree + 3)][0];
+              grads[i + 5 * (my_degree + 1)][1][2]
+                = p1_grads[i + (my_degree + 1) * (my_degree + 3)][1];
+            }
+
+          if (my_degree > 0)
+            for (unsigned int i = 0; i <= my_degree; ++i)
+              for (unsigned int j = 0; j < my_degree; ++j)
+                {
+                  for (unsigned int k = 0; k < my_degree; ++k)
+                    {
+                      for (unsigned int l = 0; l < dim; ++l)
+                        {
+                          for (unsigned int m = 0; m < 2; ++m)
+                            {
+                              grads[((i + 2
+                                      * GeometryInfo<dim>::faces_per_cell)
+                                     * my_degree + j
+                                     + GeometryInfo<dim>::lines_per_cell
+                                     + 2
+                                     * GeometryInfo<dim>::faces_per_cell)
+                                    * my_degree + k
+                                    + GeometryInfo<dim>::lines_per_cell]
+                              [m + 1][l] = 0.0;
+                              grads[(i + (j + 2
+                                          * GeometryInfo<dim>::faces_per_cell
+                                          + my_degree) * (my_degree + 1)
+                                     + GeometryInfo<dim>::lines_per_cell)
+                                    * my_degree + k
+                                    + GeometryInfo<dim>::lines_per_cell]
+                              [2 * m][l] = 0.0;
+                              grads[i + (j + (k + 2
+                                              * (GeometryInfo<dim>::faces_per_cell
+                                                 + my_degree)) * my_degree
+                                         + GeometryInfo<dim>::lines_per_cell)
+                                    * (my_degree + 1)][m][l] = 0.0;
+                            }
+
+                          grads[((i + 2 * GeometryInfo<dim>::faces_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell + 2
+                                 * GeometryInfo<dim>::faces_per_cell)
+                                * my_degree + k
+                                + GeometryInfo<dim>::lines_per_cell][0][l]
+                            = unit_point_grads[i + (j + (k + 2)
+                                                    * (my_degree + 2) + 2)
+                                               * (my_degree + 1)][l];
+                        }
+
+                      grads[(i + (j + 2 * GeometryInfo<dim>::faces_per_cell
+                                  + my_degree) * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + k
+                            + GeometryInfo<dim>::lines_per_cell][1][0]
+                        = p1_grads[i + ((j + 2) * (my_degree + 2) + k + 2)
+                                   * (my_degree + 1)][2];
+                      grads[(i + (j + 2 * GeometryInfo<dim>::faces_per_cell
+                                  + my_degree) * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + k
+                            + GeometryInfo<dim>::lines_per_cell][1][1]
+                        = p1_grads[i + ((j + 2) * (my_degree + 2) + k + 2)
+                                   * (my_degree + 1)][0];
+                      grads[(i + (j + 2 * GeometryInfo<dim>::faces_per_cell
+                                  + my_degree) * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + k
+                            + GeometryInfo<dim>::lines_per_cell][1][2]
+                        = p1_grads[i + ((j + 2) * (my_degree + 2) + k + 2)
+                                   * (my_degree + 1)][1];
+                      grads[i + (j + (k + 2
+                                      * (GeometryInfo<dim>::faces_per_cell
+                                         + my_degree)) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][2][0]
+                        = p2_grads[i + (j + (k + 2) * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][1];
+                      grads[i + (j + (k + 2
+                                      * (GeometryInfo<dim>::faces_per_cell
+                                         + my_degree)) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][2][1]
+                        = p2_grads[i + (j + (k + 2) * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][2];
+                      grads[i + (j + (k + 2
+                                      * (GeometryInfo<dim>::faces_per_cell
+                                         + my_degree))
+                                 * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][2][2]
+                        = p2_grads[i + (j + (k + 2) * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][0];
+                    }
+
+                  for (unsigned int k = 0; k < 2; ++k)
+                    {
+                      for (unsigned int l = 0; l < 2; ++l)
+                        for (unsigned int m = 0; m < dim; ++m)
+                          {
+                            for (unsigned int n = 0; n < 2; ++n)
+                              {
+                                grads[i + (j + (2 * (k + 2 * l) + 1)
+                                           * my_degree
+                                           + GeometryInfo<dim>::lines_per_cell)
+                                      * (my_degree + 1)][n + l][m] = 0.0;
+                                grads[(i + 2 * (k + 2 * (l + 1))
+                                       * (my_degree + 1)
+                                       + GeometryInfo<dim>::lines_per_cell)
+                                      * my_degree + j
+                                      + GeometryInfo<dim>::lines_per_cell]
+                                [n + l][m] = 0.0;
+                              }
+
+                            grads[(i + 2 * k * (my_degree + 1)
+                                   + GeometryInfo<dim>::lines_per_cell)
+                                  * my_degree + j
+                                  + GeometryInfo<dim>::lines_per_cell]
+                            [2 * l][m] = 0.0;
+                            grads[i + (j + (2 * k + 9) * my_degree
+                                       + GeometryInfo<dim>::lines_per_cell)
+                                  * (my_degree + 1)][2 * l][m] = 0.0;
+                          }
+
+                      for (unsigned int l = 0; l < dim; ++l)
+                        {
+                          grads[i + (j + (2 * k + 5) * my_degree
+                                     + GeometryInfo<dim>::lines_per_cell)
+                                * (my_degree + 1)][0][l]
+                            = unit_point_grads[i + ((j + 2) * (my_degree
+                                                               + 2) + k)
+                                               * (my_degree + 1)][l];
+                          grads[(i + 2 * (k + 4) * (my_degree + 1)
+                                 + GeometryInfo<dim>::lines_per_cell)
+                                * my_degree + j
+                                + GeometryInfo<dim>::lines_per_cell][0][l]
+                            = unit_point_grads[i + (j + k * (my_degree + 2)
+                                                    + 2) * (my_degree + 1)]
+                              [l];
+                        }
+
+                      grads[(i + 2 * k * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][1][0]
+                        = p1_grads[i + (j + k * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][2];
+                      grads[(i + 2 * k * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][1][1]
+                        = p1_grads[i + (j + k * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][0];
+                      grads[(i + 2 * k * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][1][2]
+                        = p1_grads[i + (j + k * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][1];
+                      grads[i + (j + (2 * k + 1) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][2][0]
+                        = p2_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                   * (my_degree + 1)][1];
+                      grads[i + (j + (2 * k + 1) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][2][1]
+                        = p2_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                   * (my_degree + 1)][2];
+                      grads[i + (j + (2 * k + 1) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][2][2]
+                        = p2_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                   * (my_degree + 1)][0];
+                      grads[(i + 2 * (k + 2) * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][2][0]
+                        = p2_grads[i + (j + k * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][1];
+                      grads[(i + 2 * (k + 2) * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][2][1]
+                        = p2_grads[i + (j + k * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][2];
+                      grads[(i + 2 * (k + 2) * (my_degree + 1)
+                             + GeometryInfo<dim>::lines_per_cell)
+                            * my_degree + j
+                            + GeometryInfo<dim>::lines_per_cell][2][2]
+                        = p2_grads[i + (j + k * (my_degree + 2) + 2)
+                                   * (my_degree + 1)][0];
+                      grads[i + (j + (2 * k + 9) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][1][0]
+                        = p1_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                   * (my_degree + 1)][2];
+                      grads[i + (j + (2 * k + 9) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][1][1]
+                        = p1_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                   * (my_degree + 1)][0];
+                      grads[i + (j + (2 * k + 9) * my_degree
+                                 + GeometryInfo<dim>::lines_per_cell)
+                            * (my_degree + 1)][1][2]
+                        = p1_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                   * (my_degree + 1)][1];
+                    }
+                }
+        }
+
+      if (grad_grads.size () > 0)
+        {
+          for (unsigned int i = 0; i <= my_degree; ++i)
+            {
+              for (unsigned int j = 0; j < 2; ++j)
+                {
+                  for (unsigned int k = 0; k < 2; ++k)
+                    {
+                      for (unsigned int l = 0; l < dim; ++l)
+                        for (unsigned int m = 0; m < dim; ++m)
+                          {
+                            for (unsigned int n = 0; n < 2; ++n)
+                              {
+                                grad_grads[i + (j + 4 * k) * (my_degree
+                                                              + 1)][2 * n]
+                                [l][m] = 0.0;
+                                grad_grads[i + (j + 4 * k + 2) * (my_degree
+                                                                  + 1)]
+                                [n + 1][l][m] = 0.0;
+                                grad_grads[i + (j + 2 * (k + 4))
+                                           * (my_degree + 1)][n][l][m]
+                                  = 0.0;
+                              }
+
+                            grad_grads[i + (j + 4 * k + 2) * (my_degree
+                                                              + 1)][0][l][m]
+                              = unit_point_grad_grads[i + (j + k
+                                                           * (my_degree
+                                                              + 2))
+                                                      * (my_degree + 1)][l]
+                                [m];
+                          }
+
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][0][0]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][1][1];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][0][1]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][1][2];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][0][2]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][1][0];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][1][0]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][2][1];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][1][1]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][2][2];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][1][2]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][2][0];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][2][0]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][0][1];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][2][1]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][0][2];
+                      grad_grads[i + (j + 2 * (k + 4)) * (my_degree + 1)]
+                      [2][2][2]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2))
+                                        * (my_degree + 1)][0][0];
+                    }
+
+                  grad_grads[i + j * (my_degree + 1)][1][0][0]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][2][2];
+                  grad_grads[i + j * (my_degree + 1)][1][0][1]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][2][0];
+                  grad_grads[i + j * (my_degree + 1)][1][0][2]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][2][1];
+                  grad_grads[i + j * (my_degree + 1)][1][1][0]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][0][2];
+                  grad_grads[i + j * (my_degree + 1)][1][1][1]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][0][0];
+                  grad_grads[i + j * (my_degree + 1)][1][1][2]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][0][1];
+                  grad_grads[i + j * (my_degree + 1)][1][2][0]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][1][2];
+                  grad_grads[i + j * (my_degree + 1)][1][2][1]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][1][0];
+                  grad_grads[i + j * (my_degree + 1)][1][2][2]
+                    = p1_grad_grads[i + j * (my_degree + 1)
+                                    * (my_degree + 2)][1][1];
+                }
+
+              grad_grads[i + 4 * (my_degree + 1)][1][0][0]
+                = p1_grad_grads[i + my_degree + 1][2][2];
+              grad_grads[i + 4 * (my_degree + 1)][1][0][1]
+                = p1_grad_grads[i + my_degree + 1][2][0];
+              grad_grads[i + 4 * (my_degree + 1)][1][0][2]
+                = p1_grad_grads[i + my_degree + 1][2][1];
+              grad_grads[i + 4 * (my_degree + 1)][1][1][0]
+                = p1_grad_grads[i + my_degree + 1][0][2];
+              grad_grads[i + 4 * (my_degree + 1)][1][1][1]
+                = p1_grad_grads[i + my_degree + 1][0][0];
+              grad_grads[i + 4 * (my_degree + 1)][1][1][2]
+                = p1_grad_grads[i + my_degree + 1][0][1];
+              grad_grads[i + 4 * (my_degree + 1)][1][2][0]
+                = p1_grad_grads[i + my_degree + 1][1][2];
+              grad_grads[i + 4 * (my_degree + 1)][1][2][1]
+                = p1_grad_grads[i + my_degree + 1][1][0];
+              grad_grads[i + 4 * (my_degree + 1)][1][2][2]
+                = p1_grad_grads[i + my_degree + 1][1][1];
+              grad_grads[i + 5 * (my_degree + 1)][1][0][0]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][2]
+                  [2];
+              grad_grads[i + 5 * (my_degree + 1)][1][0][1]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][2]
+                  [0];
+              grad_grads[i + 5 * (my_degree + 1)][1][0][2]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][2]
+                  [1];
+              grad_grads[i + 5 * (my_degree + 1)][1][1][0]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][0]
+                  [2];
+              grad_grads[i + 5 * (my_degree + 1)][1][1][1]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][0]
+                  [0];
+              grad_grads[i + 5 * (my_degree + 1)][1][1][2]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][0]
+                  [1];
+              grad_grads[i + 5 * (my_degree + 1)][1][2][0]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][1]
+                  [2];
+              grad_grads[i + 5 * (my_degree + 1)][1][2][1]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][1]
+                  [0];
+              grad_grads[i + 5 * (my_degree + 1)][1][2][2]
+                = p1_grad_grads[i + (my_degree + 1) * (my_degree + 3)][1]
+                  [1];
+            }
+
+          if (my_degree > 0)
+            for (unsigned int i = 0; i <= my_degree; ++i)
+              for (unsigned int j = 0; j < my_degree; ++j)
+                {
+                  for (unsigned int k = 0; k < my_degree; ++k)
+                    {
+                      for (unsigned int l = 0; l < dim; ++l)
+                        for (unsigned int m = 0; m < dim; ++m)
+                          {
+                            for (unsigned int n = 0; n < 2; ++n)
+                              {
+                                grad_grads[((i + 2
+                                             * GeometryInfo<dim>::faces_per_cell)
+                                            * my_degree + j
+                                            + GeometryInfo<dim>::lines_per_cell
+                                            + 2
+                                            * GeometryInfo<dim>::faces_per_cell)
+                                           * my_degree + k
+                                           + GeometryInfo<dim>::lines_per_cell]
+                                [n + 1][l][m] = 0.0;
+                                grad_grads[(i + (j + 2
+                                                 * GeometryInfo<dim>::faces_per_cell
+                                                 + my_degree) * (my_degree
+                                                                 + 1)
+                                            + GeometryInfo<dim>::lines_per_cell)
+                                           * my_degree + k
+                                           + GeometryInfo<dim>::lines_per_cell]
+                                [2 * n][l][m] = 0.0;
+                                grad_grads[i + (j + (k + 2
+                                                     * (GeometryInfo<dim>::faces_per_cell
+                                                        + my_degree))
+                                                * my_degree
+                                                + GeometryInfo<dim>::lines_per_cell)
+                                           * (my_degree + 1)][n][l][m]
+                                  = 0.0;
+                              }
+
+                            grad_grads[((i + 2
+                                         * GeometryInfo<dim>::faces_per_cell)
+                                        * my_degree + j
+                                        + GeometryInfo<dim>::lines_per_cell
+                                        + 2
+                                        * GeometryInfo<dim>::faces_per_cell)
+                                       * my_degree + k
+                                       + GeometryInfo<dim>::lines_per_cell]
+                            [0][l][m]
+                              = unit_point_grad_grads[i + (j + (k + 2)
+                                                           * (my_degree + 2)
+                                                           + 2) * (my_degree
+                                                                   + 1)][l][m];
+                          }
+
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree)
+                                  * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][0]
+                      [0]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][2][2];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][0]
+                      [1]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][2][0];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][0]
+                      [2]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][2][1];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][1]
+                      [0]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][0][2];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][1]
+                      [1]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][0][0];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][1]
+                      [2]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][0][1];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][2]
+                      [0]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][1][2];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][2]
+                      [1]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][1][0];
+                      grad_grads[(i + (j + 2
+                                       * GeometryInfo<dim>::faces_per_cell
+                                       + my_degree) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + k
+                                 + GeometryInfo<dim>::lines_per_cell][1][2]
+                      [2]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k
+                                             + 2) * (my_degree + 1)][1][1];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree)) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][0][0]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][1][1];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree)) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][0][1]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][1][2];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree))
+                                      * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][0][2]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][1][0];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree))
+                                      * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][1][0]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][2][1];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree)) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][1][1]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][2][2];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree)) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][1][2]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][2][0];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree)) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][2][0]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][0][1];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree)) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][2][1]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][0][2];
+                      grad_grads[i + (j + (k + 2
+                                           * (GeometryInfo<dim>::faces_per_cell
+                                              + my_degree)) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][2][2]
+                        = p2_grad_grads[i + (j + (k + 2) * (my_degree + 2)
+                                             + 2) * (my_degree + 1)][0][0];
+                    }
+
+                  for (unsigned int k = 0; k < 2; ++k)
+                    {
+                      for (unsigned int l = 0; l < dim; ++l)
+                        for (unsigned int m = 0; m < dim; ++m)
+                          {
+                            for (unsigned int n = 0; n < 2; ++n)
+                              {
+                                for (unsigned int o = 0; o < 2; ++o)
+                                  {
+                                    grad_grads[i + (j + (2 * (k + 2 * n)
+                                                         + 1) * my_degree
+                                                    + GeometryInfo<dim>::lines_per_cell)
+                                               * (my_degree + 1)][o + n][l][m]
+                                      = 0.0;
+                                    grad_grads[(i + 2 * (k + 2 * (n + 1))
+                                                * (my_degree + 1)
+                                                + GeometryInfo<dim>::lines_per_cell)
+                                               * my_degree + j
+                                               + GeometryInfo<dim>::lines_per_cell]
+                                    [o + k][l][m] = 0.0;
+                                  }
+
+                                grad_grads[(i + 2 * k * (my_degree + 1)
+                                            + GeometryInfo<dim>::lines_per_cell)
+                                           * my_degree + j
+                                           + GeometryInfo<dim>::lines_per_cell]
+                                [2 * n][l][m] = 0.0;
+                                grad_grads[i + (j + (2 * k + 9)
+                                                * my_degree
+                                                + GeometryInfo<dim>::lines_per_cell)
+                                           * (my_degree + 1)][2 * n][l][m]
+                                  = 0.0;
+                              }
+
+                            grad_grads[i + (j + (2 * k + 5) * my_degree
+                                            + GeometryInfo<dim>::lines_per_cell)
+                                       * (my_degree + 1)]
+                            [0][l][m]
+                              = unit_point_grad_grads[i + ((j + 2)
+                                                           * (my_degree
+                                                              + 2) + k)
+                                                      * (my_degree + 1)][l]
+                                [m];
+                            grad_grads[(i + 2 * (k + 4) * (my_degree + 1)
+                                        + GeometryInfo<dim>::lines_per_cell)
+                                       * my_degree + j
+                                       + GeometryInfo<dim>::lines_per_cell]
+                            [0][l][m]
+                              = unit_point_grad_grads[i + (j + k
+                                                           * (my_degree
+                                                              + 2) + 2)
+                                                      * (my_degree + 1)][l]
+                                [m];
+                          }
+
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][0]
+                      [0]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][2][2];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][0]
+                      [1]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][2][0];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][0]
+                      [2]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][2][1];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][1]
+                      [0]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][0][2];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][1]
+                      [1]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][0][0];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][1]
+                      [2]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][0][1];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][2]
+                      [0]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][1][2];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][2]
+                      [1]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][1][0];
+                      grad_grads[(i + 2 * k * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][1][2]
+                      [2]
+                        = p1_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][1][1];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][0][0]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][1][1];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][0][1]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][1][2];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][0][2]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][1][0];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][1][0]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][2][1];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][1][1]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][2][2];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][1][2]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][2][0];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][2][0]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][0][1];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][2][1]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][0][2];
+                      grad_grads[i + (j + (2 * k + 1) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][2][2][2]
+                        = p2_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][0][0];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][0][0]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][1][1];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][0][1]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][1][2];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][0][2]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][1][0];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][1][0]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][2][1];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][1][1]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][2][2];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][1][2]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][2][0];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][2][0]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][0][1];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][2][1]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][0][2];
+                      grad_grads[(i + 2 * (k + 2) * (my_degree + 1)
+                                  + GeometryInfo<dim>::lines_per_cell)
+                                 * my_degree + j
+                                 + GeometryInfo<dim>::lines_per_cell][2][2][2]
+                        = p2_grad_grads[i + (j + k * (my_degree + 2) + 2)
+                                        * (my_degree + 1)][0][0];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][0][0]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][2][2];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][0][1]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][2][0];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][0][2]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][2][1];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][1][0]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][0][2];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][1][1]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][0][0];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][1][2]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][0][1];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][2][0]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][1][2];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][2][1]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][1][0];
+                      grad_grads[i + (j + (2 * k + 9) * my_degree
+                                      + GeometryInfo<dim>::lines_per_cell)
+                                 * (my_degree + 1)][1][2][2]
+                        = p1_grad_grads[i + ((j + 2) * (my_degree + 2) + k)
+                                        * (my_degree + 1)][1][1];
+                    }
+                }
+        }
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+}
+
+
+template<int dim>
+unsigned int
+PolynomialsNedelec<dim>::compute_n_pols (unsigned int k)
+{
+  switch (dim)
+    {
+    case 1:
+      return k + 1;
+
+    case 2:
+      return 2 * (k + 1) * (k + 2);
+
+    case 3:
+      return 3 * (k + 1) * (k + 2) * (k + 2);
+
+    default:
+    {
+      Assert (false, ExcNotImplemented ());
+      return 0;
+    }
+    }
+}
+
+
+template class PolynomialsNedelec<1>;
+template class PolynomialsNedelec<2>;
+template class PolynomialsNedelec<3>;
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_p.cc b/source/base/polynomials_p.cc
new file mode 100644
index 0000000..f967d8c
--- /dev/null
+++ b/source/base/polynomials_p.cc
@@ -0,0 +1,114 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/polynomials_p.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+PolynomialsP<dim>::PolynomialsP (const unsigned int p)
+  :
+  PolynomialSpace<dim>(Polynomials::Monomial<double>::generate_complete_basis(p)),
+  p(p)
+{
+  std::vector<unsigned int> index_map(this->n());
+  create_polynomial_ordering(index_map);
+  this->set_numbering(index_map);
+}
+
+
+template <>
+void PolynomialsP<1>::create_polynomial_ordering(
+  std::vector<unsigned int> &index_map) const
+{
+  Assert(index_map.size()==this->n(),
+         ExcDimensionMismatch(index_map.size(), this->n()));
+
+  // identity
+  for (unsigned int i=0; i<this->n(); ++i)
+    index_map[i]=i;
+}
+
+
+namespace
+{
+  const unsigned int imap2[6][21]=
+  {
+    {0},
+    {0,1,2},
+    {0,1,3,4,2,5},
+    {0,1,4,5,2,7,6,8,3,9},
+    {0,1,5,6,2,9,7,10,3,12,11,8,13,4,14},
+    {0,1,6,7,2,11,8,12,3,15,13,9,16,4,18,14,17,10,19,5,20}
+  };
+}
+
+template <>
+void PolynomialsP<2>::create_polynomial_ordering(
+  std::vector<unsigned int> &index_map) const
+{
+  Assert(index_map.size()==this->n(),
+         ExcDimensionMismatch(index_map.size(), this->n()));
+  Assert(p<=5, ExcNotImplemented());
+
+  // Given the number i of the
+  // polynomial in
+  // $1,x,y,xy,x2,y2,...$,
+  // index_map[i] gives the number of
+  // the polynomial in
+  // PolynomialSpace.
+  for (unsigned int i=0; i<this->n(); ++i)
+    index_map[i]=imap2[p][i];
+}
+
+
+namespace
+{
+  const unsigned int imap3[4][20]=
+  {
+    {0},
+    {0,1,2,3},
+    {0,1,3,6,4,7,8,2,5,9},
+    {0,1,4,10,5,11,13,2,7,16,14,6,12,8,15,17,18,3,9,19}
+  };
+}
+
+template <>
+void PolynomialsP<3>::create_polynomial_ordering(
+  std::vector<unsigned int> &index_map) const
+{
+  Assert(index_map.size()==this->n(),
+         ExcDimensionMismatch(index_map.size(), this->n()));
+  Assert(p<=3, ExcNotImplemented());
+
+  // Given the number i of the
+  // polynomial in
+  // $1,x,y,xy,x2,y2,...$,
+  // index_map[i] gives the number of
+  // the polynomial in
+  // PolynomialSpace.
+  for (unsigned int i=0; i<this->n(); ++i)
+    index_map[i]=imap3[p][i];
+}
+
+
+
+template class PolynomialsP<1>;
+template class PolynomialsP<2>;
+template class PolynomialsP<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_piecewise.cc b/source/base/polynomials_piecewise.cc
new file mode 100644
index 0000000..2565686
--- /dev/null
+++ b/source/base/polynomials_piecewise.cc
@@ -0,0 +1,146 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/polynomials_piecewise.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace Polynomials
+{
+
+  template <typename number>
+  PiecewisePolynomial<number>::PiecewisePolynomial (const Polynomial<number> &coefficients_on_interval,
+                                                    const unsigned int        n_intervals,
+                                                    const unsigned int        interval,
+                                                    const bool                spans_next_interval)
+    :
+    polynomial               (coefficients_on_interval),
+    n_intervals              (n_intervals),
+    interval                 (interval),
+    spans_two_intervals      (spans_next_interval)
+  {
+    Assert (n_intervals > 0, ExcMessage ("No intervals given"));
+    AssertIndexRange (interval, n_intervals);
+  }
+
+
+
+  template <typename number>
+  void
+  PiecewisePolynomial<number>::value (const number         x,
+                                      std::vector<number> &values) const
+  {
+    Assert (values.size() > 0, ExcZero());
+    const unsigned int values_size=values.size();
+
+    // shift polynomial if necessary
+    number y = x;
+    double derivative_change_sign = 1.;
+    if (n_intervals > 0)
+      {
+        const number step = 1./n_intervals;
+        // polynomial spans over two intervals
+        if (spans_two_intervals)
+          {
+            const double offset = step * interval;
+            if (x<offset || x>offset+step+step)
+              {
+                for (unsigned int k=0; k<values.size(); ++k)
+                  values[k] = 0;
+                return;
+              }
+            else if (x<offset+step)
+              y = x-offset;
+            else
+              {
+                derivative_change_sign = -1.;
+                y = offset+step+step-x;
+              }
+          }
+        else
+          {
+            const double offset = step * interval;
+            if (x<offset || x>offset+step)
+              {
+                for (unsigned int k=0; k<values.size(); ++k)
+                  values[k] = 0;
+                return;
+              }
+            else
+              y = x-offset;
+          }
+
+        // on subinterval boundaries, cannot evaluate derivatives properly, so
+        // set them to zero
+        if ((std::abs(y)<1e-14 && (interval > 0 ||
+                                   derivative_change_sign == -1.))
+            ||
+            (std::abs(y-step)<1e-14 &&
+             (interval < n_intervals-1 || derivative_change_sign == -1.)))
+          {
+            values[0] = value(x);
+            for (unsigned int d=1; d<values_size; ++d)
+              values[d] = 0;
+            return;
+          }
+      }
+
+    polynomial.value(y, values);
+
+    // change sign if necessary
+    for (unsigned int j=1; j<values_size; j+=2)
+      values[j] *= derivative_change_sign;
+  }
+
+
+
+  std::vector<PiecewisePolynomial<double> >
+  generate_complete_Lagrange_basis_on_subdivisions (const unsigned int n_subdivisions,
+                                                    const unsigned int base_degree)
+  {
+    std::vector<Polynomial<double> > p_base =
+      LagrangeEquidistant::generate_complete_basis(base_degree);
+    for (unsigned int i=0; i<p_base.size(); ++i)
+      p_base[i].scale(n_subdivisions);
+
+    std::vector<PiecewisePolynomial<double> > p;
+    p.reserve (n_subdivisions * base_degree + 1);
+
+    p.push_back (PiecewisePolynomial<double> (p_base[0], n_subdivisions, 0,
+                                              false));
+    for (unsigned int s=0; s<n_subdivisions; ++s)
+      for (unsigned int i=0; i<base_degree; ++i)
+        p.push_back (PiecewisePolynomial<double> (p_base[i+1], n_subdivisions,
+                                                  s,
+                                                  i==(base_degree-1) &&
+                                                  s<n_subdivisions-1));
+    return p;
+  }
+
+}
+
+// ------------------ explicit instantiations --------------- //
+
+namespace Polynomials
+{
+  template class PiecewisePolynomial<float>;
+  template class PiecewisePolynomial<double>;
+  template class PiecewisePolynomial<long double>;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_rannacher_turek.cc b/source/base/polynomials_rannacher_turek.cc
new file mode 100644
index 0000000..68a2128
--- /dev/null
+++ b/source/base/polynomials_rannacher_turek.cc
@@ -0,0 +1,186 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/polynomials_rannacher_turek.h>
+#include <deal.II/base/geometry_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+PolynomialsRannacherTurek<dim>::PolynomialsRannacherTurek()
+{
+  Assert(dim == 2, ExcNotImplemented());
+}
+
+
+
+template <int dim>
+double PolynomialsRannacherTurek<dim>::compute_value(const unsigned int i,
+                                                     const Point<dim> &p) const
+{
+  Assert(dim == 2, ExcNotImplemented());
+  if (i == 0)
+    {
+      return (0.75 - 2.5*p(0) + 1.5*p(1) + 1.5*(p(0)*p(0) - p(1)*p(1)));
+    }
+  else if (i == 1)
+    {
+      return (-0.25 - 0.5*p(0) + 1.5*p(1) + 1.5*(p(0)*p(0) - p(1)*p(1)));
+    }
+  else if (i == 2)
+    {
+      return (0.75 + 1.5*p(0) - 2.5*p(1) - 1.5*(p(0)*p(0) - p(1)*p(1)));
+    }
+  else if (i == 3)
+    {
+      return (-0.25 + 1.5*p(0) - 0.5*p(1) - 1.5*(p(0)*p(0) - p(1)*p(1)));
+    }
+
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+
+
+template <int dim>
+Tensor<1, dim> PolynomialsRannacherTurek<dim>::compute_grad(
+  const unsigned int i,
+  const Point<dim> &p) const
+{
+  Assert(dim == 2, ExcNotImplemented());
+  Tensor<1, dim> grad;
+  if (i == 0)
+    {
+      grad[0] = -2.5 + 3*p(0);
+      grad[1] = 1.5 - 3*p(1);
+    }
+  else if (i == 1)
+    {
+      grad[0] = -0.5 + 3.0*p(0);
+      grad[1] = 1.5 - 3.0*p(1);
+    }
+  else if (i == 2)
+    {
+      grad[0] = 1.5 - 3.0*p(0);
+      grad[1] = -2.5 + 3.0*p(1);
+    }
+  else if (i == 3)
+    {
+      grad[0] = 1.5 - 3.0*p(0);
+      grad[1] = -0.5 + 3.0*p(1);
+    }
+  else
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+  return grad;
+}
+
+
+
+template <int dim> Tensor<2, dim>
+PolynomialsRannacherTurek<dim>::compute_grad_grad(const unsigned int i,
+                                                  const Point<dim> & /*p*/)
+const
+{
+  Assert(dim == 2, ExcNotImplemented());
+  Tensor<2, dim> grad_grad;
+  if (i == 0)
+    {
+      grad_grad[0][0] = 3;
+      grad_grad[0][1] = 0;
+      grad_grad[1][0] = 0;
+      grad_grad[1][1] = -3;
+    }
+  else if (i == 1)
+    {
+      grad_grad[0][0] = 3;
+      grad_grad[0][1] = 0;
+      grad_grad[1][0] = 0;
+      grad_grad[1][1] = -3;
+    }
+  else if (i == 2)
+    {
+      grad_grad[0][0] = -3;
+      grad_grad[0][1] = 0;
+      grad_grad[1][0] = 0;
+      grad_grad[1][1] = 3;
+    }
+  else if (i == 3)
+    {
+      grad_grad[0][0] = -3;
+      grad_grad[0][1] = 0;
+      grad_grad[1][0] = 0;
+      grad_grad[1][1] = 3;
+    }
+  return grad_grad;
+}
+
+
+
+template <int dim>
+void PolynomialsRannacherTurek<dim>::compute(
+  const Point<dim> &unit_point,
+  std::vector<double> &values,
+  std::vector<Tensor<1, dim> > &grads,
+  std::vector<Tensor<2, dim> > &grad_grads,
+  std::vector<Tensor<3, dim> > &third_derivatives,
+  std::vector<Tensor<4, dim> > &fourth_derivatives) const
+{
+  const unsigned int n_pols = dealii::GeometryInfo<dim>::faces_per_cell;
+  Assert(values.size() == n_pols || values.size() == 0,
+         ExcDimensionMismatch(values.size(), n_pols));
+  Assert(grads.size() == n_pols || grads.size() == 0,
+         ExcDimensionMismatch(grads.size(), n_pols));
+  Assert(grad_grads.size() == n_pols || grad_grads.size() == 0,
+         ExcDimensionMismatch(grad_grads.size(), n_pols));
+  Assert(third_derivatives.size() == n_pols || third_derivatives.size() == 0,
+         ExcDimensionMismatch(third_derivatives.size(), n_pols));
+  Assert(fourth_derivatives.size() == n_pols || fourth_derivatives.size() == 0,
+         ExcDimensionMismatch(fourth_derivatives.size(), n_pols));
+
+  for (unsigned int i = 0; i < n_pols; ++i)
+    {
+      if (values.size() != 0)
+        {
+          values[i] = compute_value(i, unit_point);
+        }
+      if (grads.size() != 0)
+        {
+          grads[i] = compute_grad(i, unit_point);
+        }
+      if (grad_grads.size() != 0)
+        {
+          grad_grads[i] = compute_grad_grad(i, unit_point);
+        }
+      if (third_derivatives.size() != 0)
+        {
+          third_derivatives[i] = compute_derivative<3>(i, unit_point);
+        }
+      if (fourth_derivatives.size() != 0)
+        {
+          fourth_derivatives[i] = compute_derivative<4>(i, unit_point);
+        }
+    }
+}
+
+
+// explicit instantiations
+#include "polynomials_rannacher_turek.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/polynomials_rannacher_turek.inst.in b/source/base/polynomials_rannacher_turek.inst.in
new file mode 100644
index 0000000..32c95b6
--- /dev/null
+++ b/source/base/polynomials_rannacher_turek.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class PolynomialsRannacherTurek<deal_II_dimension>;
+  }
+
diff --git a/source/base/polynomials_raviart_thomas.cc b/source/base/polynomials_raviart_thomas.cc
new file mode 100644
index 0000000..e449912
--- /dev/null
+++ b/source/base/polynomials_raviart_thomas.cc
@@ -0,0 +1,174 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/polynomials_raviart_thomas.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/thread_management.h>
+#include <iostream>
+#include <iomanip>
+
+//TODO[WB]: This class is not thread-safe: it uses mutable member variables that contain temporary state. this is not what one would want when one uses a finite element object in a number of different contexts on different threads: finite element objects should be stateless
+//TODO:[GK] This can be achieved by writing a function in Polynomial space which does the rotated fill performed below and writes the data into the right data structures. The same function would be used
+//by Nedelec polynomials.
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+PolynomialsRaviartThomas<dim>::PolynomialsRaviartThomas (const unsigned int k)
+  :
+  my_degree(k),
+  polynomial_space (create_polynomials (k)),
+  n_pols(compute_n_pols(k))
+{}
+
+
+
+template <int dim>
+std::vector<std::vector< Polynomials::Polynomial< double > > >
+PolynomialsRaviartThomas<dim>::create_polynomials (const unsigned int k)
+{
+  std::vector<std::vector< Polynomials::Polynomial< double > > > pols(dim);
+  pols[0] = Polynomials::LagrangeEquidistant::generate_complete_basis(k+1);
+  if (k == 0)
+    for (unsigned int d=1; d<dim; ++d)
+      pols[d] = Polynomials::Legendre::generate_complete_basis(0);
+  else
+    for (unsigned int d=1; d<dim; ++d)
+      pols[d] = Polynomials::LagrangeEquidistant::generate_complete_basis(k);
+
+  return pols;
+}
+
+
+template <int dim>
+void
+PolynomialsRaviartThomas<dim>::compute (const Point<dim>            &unit_point,
+                                        std::vector<Tensor<1,dim> > &values,
+                                        std::vector<Tensor<2,dim> > &grads,
+                                        std::vector<Tensor<3, dim> > &grad_grads,
+                                        std::vector<Tensor<4, dim> > &third_derivatives,
+                                        std::vector<Tensor<5, dim> > &fourth_derivatives) const
+{
+  Assert(values.size()==n_pols || values.size()==0,
+         ExcDimensionMismatch(values.size(), n_pols));
+  Assert(grads.size()==n_pols|| grads.size()==0,
+         ExcDimensionMismatch(grads.size(), n_pols));
+  Assert(grad_grads.size()==n_pols|| grad_grads.size()==0,
+         ExcDimensionMismatch(grad_grads.size(), n_pols));
+  Assert(third_derivatives.size()==n_pols|| third_derivatives.size()==0,
+         ExcDimensionMismatch(third_derivatives.size(), n_pols));
+  Assert(fourth_derivatives.size()==n_pols|| fourth_derivatives.size()==0,
+         ExcDimensionMismatch(fourth_derivatives.size(), n_pols));
+
+  // have a few scratch
+  // arrays. because we don't want to
+  // re-allocate them every time this
+  // function is called, we make them
+  // static. however, in return we
+  // have to ensure that the calls to
+  // the use of these variables is
+  // locked with a mutex. if the
+  // mutex is removed, several tests
+  // (notably
+  // deal.II/create_mass_matrix_05)
+  // will start to produce random
+  // results in multithread mode
+  static Threads::Mutex mutex;
+  Threads::Mutex::ScopedLock lock(mutex);
+
+  static std::vector<double> p_values;
+  static std::vector<Tensor<1,dim> > p_grads;
+  static std::vector<Tensor<2,dim> > p_grad_grads;
+  static std::vector<Tensor<3,dim> > p_third_derivatives;
+  static std::vector<Tensor<4,dim> > p_fourth_derivatives;
+
+  const unsigned int n_sub = polynomial_space.n();
+  p_values.resize((values.size() == 0) ? 0 : n_sub);
+  p_grads.resize((grads.size() == 0) ? 0 : n_sub);
+  p_grad_grads.resize((grad_grads.size() == 0) ? 0 : n_sub);
+  p_third_derivatives.resize((third_derivatives.size() == 0) ? 0 : n_sub);
+  p_fourth_derivatives.resize((fourth_derivatives.size() == 0) ? 0 : n_sub);
+
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      // First we copy the point. The
+      // polynomial space for
+      // component d consists of
+      // polynomials of degree k+1 in
+      // x_d and degree k in the
+      // other variables. in order to
+      // simplify this, we use the
+      // same AnisotropicPolynomial
+      // space and simply rotate the
+      // coordinates through all
+      // directions.
+      Point<dim> p;
+      for (unsigned int c=0; c<dim; ++c)
+        p(c) = unit_point((c+d)%dim);
+
+      polynomial_space.compute (p, p_values, p_grads, p_grad_grads, p_third_derivatives, p_fourth_derivatives);
+
+      for (unsigned int i=0; i<p_values.size(); ++i)
+        values[i+d*n_sub][d] = p_values[i];
+
+      for (unsigned int i=0; i<p_grads.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          grads[i+d*n_sub][d][(d1+d)%dim] = p_grads[i][d1];
+
+      for (unsigned int i=0; i<p_grad_grads.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            grad_grads[i+d*n_sub][d][(d1+d)%dim][(d2+d)%dim]
+              = p_grad_grads[i][d1][d2];
+
+      for (unsigned int i=0; i<p_third_derivatives.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              third_derivatives[i+d*n_sub][d][(d1+d)%dim][(d2+d)%dim][(d3+d)%dim]
+                = p_third_derivatives[i][d1][d2][d3];
+
+      for (unsigned int i=0; i<p_fourth_derivatives.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              for (unsigned int d4=0; d4<dim; ++d4)
+                fourth_derivatives[i+d*n_sub][d][(d1+d)%dim][(d2+d)%dim][(d3+d)%dim][(d4+d)%dim]
+                  = p_fourth_derivatives[i][d1][d2][d3][d4];
+    }
+}
+
+
+template <int dim>
+unsigned int
+PolynomialsRaviartThomas<dim>::compute_n_pols(unsigned int k)
+{
+  if (dim == 1) return k+1;
+  if (dim == 2) return 2*(k+1)*(k+2);
+  if (dim == 3) return 3*(k+1)*(k+1)*(k+2);
+
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+
+template class PolynomialsRaviartThomas<1>;
+template class PolynomialsRaviartThomas<2>;
+template class PolynomialsRaviartThomas<3>;
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/quadrature.cc b/source/base/quadrature.cc
new file mode 100644
index 0000000..21bc38d
--- /dev/null
+++ b/source/base/quadrature.cc
@@ -0,0 +1,1782 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/utilities.h>
+
+#include <cmath>
+#include <cstdlib>
+#include <iterator>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <>
+Quadrature<0>::Quadrature (const unsigned int n_q)
+  :
+  quadrature_points (n_q),
+  weights (n_q, 0)
+{}
+
+
+
+template <>
+Quadrature<0>::~Quadrature ()
+{}
+
+
+
+template <int dim>
+Quadrature<dim>::Quadrature (const unsigned int n_q)
+  :
+  quadrature_points (n_q, Point<dim>()),
+  weights (n_q, 0)
+{}
+
+
+
+template <int dim>
+void
+Quadrature<dim>::initialize (const std::vector<Point<dim> > &p,
+                             const std::vector<double>      &w)
+{
+  AssertDimension (w.size(), p.size());
+  quadrature_points = p;
+  weights = w;
+}
+
+
+
+template <int dim>
+Quadrature<dim>::Quadrature (const std::vector<Point<dim> > &points,
+                             const std::vector<double>      &weights)
+  :
+  quadrature_points(points),
+  weights(weights)
+{
+  Assert (weights.size() == points.size(),
+          ExcDimensionMismatch(weights.size(), points.size()));
+}
+
+
+
+template <int dim>
+Quadrature<dim>::Quadrature (const std::vector<Point<dim> > &points)
+  :
+  quadrature_points(points),
+  weights(points.size(), std::atof("Inf"))
+{
+  Assert(weights.size() == points.size(),
+         ExcDimensionMismatch(weights.size(), points.size()));
+}
+
+
+
+template <int dim>
+Quadrature<dim>::Quadrature (const Point<dim> &point)
+  :
+  quadrature_points(std::vector<Point<dim> > (1, point)),
+  weights(std::vector<double> (1, 1.))
+{}
+
+
+template <>
+Quadrature<0>::Quadrature (const SubQuadrature &,
+                           const Quadrature<1> &)
+{
+  Assert(false, ExcImpossibleInDim(0));
+}
+
+
+
+template <int dim>
+Quadrature<dim>::Quadrature (const SubQuadrature &q1,
+                             const Quadrature<1> &q2)
+  :
+  quadrature_points (q1.size() * q2.size()),
+  weights (q1.size() * q2.size())
+{
+  unsigned int present_index = 0;
+  for (unsigned int i2=0; i2<q2.size(); ++i2)
+    for (unsigned int i1=0; i1<q1.size(); ++i1)
+      {
+        // compose coordinates of
+        // new quadrature point by tensor
+        // product in the last component
+        for (unsigned int d=0; d<dim-1; ++d)
+          quadrature_points[present_index](d)
+            = q1.point(i1)(d);
+        quadrature_points[present_index](dim-1)
+          = q2.point(i2)(0);
+
+        weights[present_index] = q1.weight(i1) * q2.weight(i2);
+
+        ++present_index;
+      };
+
+#ifdef DEBUG
+  if (size() > 0)
+    {
+      double sum = 0;
+      for (unsigned int i=0; i<size(); ++i)
+        sum += weights[i];
+      // we cannot guarantee the sum of weights
+      // to be exactly one, but it should be
+      // near that.
+      Assert ((sum>0.999999) && (sum<1.000001), ExcInternalError());
+    }
+#endif
+}
+
+
+
+template <>
+Quadrature<1>::Quadrature (const SubQuadrature &,
+                           const Quadrature<1> &q2)
+  :
+  quadrature_points (q2.size()),
+  weights (q2.size())
+{
+  unsigned int present_index = 0;
+  for (unsigned int i2=0; i2<q2.size(); ++i2)
+    {
+      // compose coordinates of
+      // new quadrature point by tensor
+      // product in the last component
+      quadrature_points[present_index](0)
+        = q2.point(i2)(0);
+
+      weights[present_index] = q2.weight(i2);
+
+      ++present_index;
+    }
+
+#ifdef DEBUG
+  if (size() > 0)
+    {
+      double sum = 0;
+      for (unsigned int i=0; i<size(); ++i)
+        sum += weights[i];
+      // we cannot guarantee the sum of weights
+      // to be exactly one, but it should be
+      // near that.
+      Assert ((sum>0.999999) && (sum<1.000001), ExcInternalError());
+    }
+#endif
+}
+
+
+
+template <>
+Quadrature<0>::Quadrature (const Quadrature<1> &)
+  :
+  Subscriptor(),
+//              quadrature_points(1),
+  weights(1,1.)
+{}
+
+
+template <>
+Quadrature<1>::Quadrature (const Quadrature<0> &)
+  :
+  Subscriptor()
+{
+  // this function should never be
+  // called -- this should be the
+  // copy constructor in 1d...
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+template <int dim>
+Quadrature<dim>::Quadrature (const Quadrature<dim != 1 ? 1 : 0> &q)
+:
+Subscriptor(),
+            quadrature_points (Utilities::fixed_power<dim>(q.size())),
+            weights (Utilities::fixed_power<dim>(q.size()))
+{
+  Assert (dim <= 3, ExcNotImplemented());
+
+  const unsigned int n0 = q.size();
+  const unsigned int n1 = (dim>1) ? n0 : 1;
+  const unsigned int n2 = (dim>2) ? n0 : 1;
+
+  unsigned int k=0;
+  for (unsigned int i2=0; i2<n2; ++i2)
+    for (unsigned int i1=0; i1<n1; ++i1)
+      for (unsigned int i0=0; i0<n0; ++i0)
+        {
+          quadrature_points[k](0) = q.point(i0)(0);
+          if (dim>1)
+            quadrature_points[k](1) = q.point(i1)(0);
+          if (dim>2)
+            quadrature_points[k](2) = q.point(i2)(0);
+          weights[k] = q.weight(i0);
+          if (dim>1)
+            weights[k] *= q.weight(i1);
+          if (dim>2)
+            weights[k] *= q.weight(i2);
+          ++k;
+        }
+}
+
+
+
+template <int dim>
+Quadrature<dim>::Quadrature (const Quadrature<dim> &q)
+  :
+  Subscriptor(),
+  quadrature_points (q.quadrature_points),
+  weights (q.weights)
+{}
+
+
+template <int dim>
+Quadrature<dim> &
+Quadrature<dim>::operator= (const Quadrature<dim> &q)
+{
+  weights = q.weights;
+  quadrature_points = q.quadrature_points;
+  return *this;
+}
+
+
+
+template <int dim>
+bool
+Quadrature<dim>::operator == (const Quadrature<dim> &q) const
+{
+  return ((quadrature_points == q.quadrature_points)
+          &&
+          (weights == q.weights));
+}
+
+
+
+template <int dim>
+Quadrature<dim>::~Quadrature ()
+{}
+
+
+
+template <int dim>
+std::size_t
+Quadrature<dim>::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (quadrature_points) +
+          MemoryConsumption::memory_consumption (weights));
+}
+
+
+//---------------------------------------------------------------------------
+template<int dim>
+QAnisotropic<dim>::QAnisotropic(const Quadrature<1> &qx)
+  : Quadrature<dim>(qx.size())
+{
+  Assert (dim==1, ExcImpossibleInDim(dim));
+  unsigned int k=0;
+  for (unsigned int k1=0; k1<qx.size(); ++k1)
+    {
+      this->quadrature_points[k](0) = qx.point(k1)(0);
+      this->weights[k++] = qx.weight(k1);
+    }
+  Assert (k==this->size(), ExcInternalError());
+}
+
+
+
+template<int dim>
+QAnisotropic<dim>::QAnisotropic(const Quadrature<1> &qx,
+                                const Quadrature<1> &qy)
+  : Quadrature<dim>(qx.size()
+                    *qy.size())
+{
+  Assert (dim==2, ExcImpossibleInDim(dim));
+  unsigned int k=0;
+  for (unsigned int k2=0; k2<qy.size(); ++k2)
+    for (unsigned int k1=0; k1<qx.size(); ++k1)
+      {
+        this->quadrature_points[k](0) = qx.point(k1)(0);
+        this->quadrature_points[k](1) = qy.point(k2)(0);
+        this->weights[k++] = qx.weight(k1) * qy.weight(k2);
+      }
+  Assert (k==this->size(), ExcInternalError());
+}
+
+
+
+template<int dim>
+QAnisotropic<dim>::QAnisotropic(const Quadrature<1> &qx,
+                                const Quadrature<1> &qy,
+                                const Quadrature<1> &qz)
+  : Quadrature<dim>(qx.size()
+                    *qy.size()
+                    *qz.size())
+{
+  Assert (dim==3, ExcImpossibleInDim(dim));
+  unsigned int k=0;
+  for (unsigned int k3=0; k3<qz.size(); ++k3)
+    for (unsigned int k2=0; k2<qy.size(); ++k2)
+      for (unsigned int k1=0; k1<qx.size(); ++k1)
+        {
+          this->quadrature_points[k](0) = qx.point(k1)(0);
+          this->quadrature_points[k](1) = qy.point(k2)(0);
+          this->quadrature_points[k](2) = qz.point(k3)(0);
+          this->weights[k++] = qx.weight(k1) * qy.weight(k2) * qz.weight(k3);
+        }
+  Assert (k==this->size(), ExcInternalError());
+}
+
+
+
+//---------------------------------------------------------------------------
+
+
+
+template <int dim>
+Quadrature<2>
+QProjector<dim>::reflect (const Quadrature<2> &q)
+{
+  std::vector<Point<2> > q_points (q.size());
+  std::vector<double>    weights (q.size());
+  for (unsigned int i=0; i<q.size(); ++i)
+    {
+      q_points[i][0] = q.point(i)[1];
+      q_points[i][1] = q.point(i)[0];
+
+      weights[i] = q.weight(i);
+    }
+
+  return Quadrature<2> (q_points, weights);
+}
+
+
+template <int dim>
+Quadrature<2>
+QProjector<dim>::rotate (const Quadrature<2> &q,
+                         const unsigned int   n_times)
+{
+  std::vector<Point<2> > q_points (q.size());
+  std::vector<double>    weights (q.size());
+  for (unsigned int i=0; i<q.size(); ++i)
+    {
+      switch (n_times%4)
+        {
+        case 0:
+          // 0 degree
+          q_points[i][0] = q.point(i)[0];
+          q_points[i][1] = q.point(i)[1];
+          break;
+        case 1:
+          // 90 degree counterclockwise
+          q_points[i][0] = 1.0 - q.point(i)[1];
+          q_points[i][1] = q.point(i)[0];
+          break;
+        case 2:
+          // 180 degree counterclockwise
+          q_points[i][0] = 1.0 - q.point(i)[0];
+          q_points[i][1] = 1.0 - q.point(i)[1];
+          break;
+        case 3:
+          // 270 degree counterclockwise
+          q_points[i][0] = q.point(i)[1];
+          q_points[i][1] = 1.0 - q.point(i)[0];
+          break;
+        }
+
+      weights[i] = q.weight(i);
+    }
+
+  return Quadrature<2> (q_points, weights);
+}
+
+
+template <>
+void
+QProjector<1>::project_to_face (const Quadrature<0> &,
+                                const unsigned int face_no,
+                                std::vector<Point<1> > &q_points)
+{
+  const unsigned int dim=1;
+  AssertIndexRange (face_no, GeometryInfo<dim>::faces_per_cell);
+  AssertDimension (q_points.size(), 1);
+
+  q_points[0] = Point<dim>((double) face_no);
+}
+
+
+
+template <>
+void
+QProjector<2>::project_to_face (const Quadrature<1>      &quadrature,
+                                const unsigned int        face_no,
+                                std::vector<Point<2> >   &q_points)
+{
+  const unsigned int dim=2;
+  AssertIndexRange (face_no, GeometryInfo<dim>::faces_per_cell);
+  Assert (q_points.size() == quadrature.size(),
+          ExcDimensionMismatch (q_points.size(), quadrature.size()));
+
+  for (unsigned int p=0; p<quadrature.size(); ++p)
+    switch (face_no)
+      {
+      case 0:
+        q_points[p] = Point<dim>(0,quadrature.point(p)(0));
+        break;
+      case 1:
+        q_points[p] = Point<dim>(1,quadrature.point(p)(0));
+        break;
+      case 2:
+        q_points[p] = Point<dim>(quadrature.point(p)(0),0);
+        break;
+      case 3:
+        q_points[p] = Point<dim>(quadrature.point(p)(0),1);
+        break;
+      default:
+        Assert (false, ExcInternalError());
+      };
+}
+
+
+
+template <>
+void
+QProjector<3>::project_to_face (const Quadrature<2>    &quadrature,
+                                const unsigned int      face_no,
+                                std::vector<Point<3> > &q_points)
+{
+  const unsigned int dim=3;
+  AssertIndexRange (face_no, GeometryInfo<dim>::faces_per_cell);
+  Assert (q_points.size() == quadrature.size(),
+          ExcDimensionMismatch (q_points.size(), quadrature.size()));
+
+  for (unsigned int p=0; p<quadrature.size(); ++p)
+    switch (face_no)
+      {
+      case 0:
+        q_points[p] = Point<dim>(0,
+                                 quadrature.point(p)(0),
+                                 quadrature.point(p)(1));
+        break;
+      case 1:
+        q_points[p] = Point<dim>(1,
+                                 quadrature.point(p)(0),
+                                 quadrature.point(p)(1));
+        break;
+      case 2:
+        q_points[p] = Point<dim>(quadrature.point(p)(1),
+                                 0,
+                                 quadrature.point(p)(0));
+        break;
+      case 3:
+        q_points[p] = Point<dim>(quadrature.point(p)(1),
+                                 1,
+                                 quadrature.point(p)(0));
+        break;
+      case 4:
+        q_points[p] = Point<dim>(quadrature.point(p)(0),
+                                 quadrature.point(p)(1),
+                                 0);
+        break;
+      case 5:
+        q_points[p] = Point<dim>(quadrature.point(p)(0),
+                                 quadrature.point(p)(1),
+                                 1);
+        break;
+
+      default:
+        Assert (false, ExcInternalError());
+      };
+}
+
+
+
+template <>
+void
+QProjector<1>::project_to_subface (const Quadrature<0> &,
+                                   const unsigned int face_no,
+                                   const unsigned int,
+                                   std::vector<Point<1> > &q_points,
+                                   const RefinementCase<0> &)
+{
+  const unsigned int dim=1;
+  AssertIndexRange (face_no, GeometryInfo<dim>::faces_per_cell);
+  AssertDimension (q_points.size(), 1);
+
+  q_points[0] = Point<dim>((double) face_no);
+}
+
+
+
+template <>
+void
+QProjector<2>::project_to_subface (const Quadrature<1>    &quadrature,
+                                   const unsigned int      face_no,
+                                   const unsigned int      subface_no,
+                                   std::vector<Point<2> > &q_points,
+                                   const RefinementCase<1> &)
+{
+  const unsigned int dim=2;
+  AssertIndexRange (face_no, GeometryInfo<dim>::faces_per_cell);
+  AssertIndexRange (subface_no, GeometryInfo<dim>::max_children_per_face);
+
+  Assert (q_points.size() == quadrature.size(),
+          ExcDimensionMismatch (q_points.size(), quadrature.size()));
+
+  for (unsigned int p=0; p<quadrature.size(); ++p)
+    switch (face_no)
+      {
+      case 0:
+        switch (subface_no)
+          {
+          case 0:
+            q_points[p] = Point<dim>(0,quadrature.point(p)(0)/2);
+            break;
+          case 1:
+            q_points[p] = Point<dim>(0,quadrature.point(p)(0)/2+0.5);
+            break;
+          default:
+            Assert (false, ExcInternalError());
+          };
+        break;
+      case 1:
+        switch (subface_no)
+          {
+          case 0:
+            q_points[p] = Point<dim>(1,quadrature.point(p)(0)/2);
+            break;
+          case 1:
+            q_points[p] = Point<dim>(1,quadrature.point(p)(0)/2+0.5);
+            break;
+          default:
+            Assert (false, ExcInternalError());
+          };
+        break;
+      case 2:
+        switch (subface_no)
+          {
+          case 0:
+            q_points[p]
+              = Point<dim>(quadrature.point(p)(0)/2,0);
+            break;
+          case 1:
+            q_points[p]
+              = Point<dim>(quadrature.point(p)(0)/2+0.5,0);
+            break;
+          default:
+            Assert (false, ExcInternalError());
+          };
+        break;
+      case 3:
+        switch (subface_no)
+          {
+          case 0:
+            q_points[p] = Point<dim>(quadrature.point(p)(0)/2,1);
+            break;
+          case 1:
+            q_points[p] = Point<dim>(quadrature.point(p)(0)/2+0.5,1);
+            break;
+          default:
+            Assert (false, ExcInternalError());
+          };
+        break;
+
+      default:
+        Assert (false, ExcInternalError());
+      };
+}
+
+
+
+template <>
+void
+QProjector<3>::project_to_subface (const Quadrature<2>    &quadrature,
+                                   const unsigned int      face_no,
+                                   const unsigned int      subface_no,
+                                   std::vector<Point<3> > &q_points,
+                                   const RefinementCase<2> &ref_case)
+{
+  const unsigned int dim=3;
+  AssertIndexRange (face_no, GeometryInfo<dim>::faces_per_cell);
+  AssertIndexRange (subface_no, GeometryInfo<dim>::max_children_per_face);
+  Assert (q_points.size() == quadrature.size(),
+          ExcDimensionMismatch (q_points.size(), quadrature.size()));
+
+  // one coordinate is at a const value. for
+  // faces 0, 2 and 4 this value is 0.0, for
+  // faces 1, 3 and 5 it is 1.0
+  double const_value=face_no%2;
+  // local 2d coordinates are xi and eta,
+  // global 3d coordinates are x, y and
+  // z. those have to be mapped. the following
+  // indices tell, which global coordinate
+  // (0->x, 1->y, 2->z) corresponds to which
+  // local one
+  unsigned int xi_index   = numbers::invalid_unsigned_int,
+               eta_index   = numbers::invalid_unsigned_int,
+               const_index = face_no/2;
+  // the xi and eta values have to be scaled
+  // (by factor 0.5 or factor 1.0) depending on
+  // the refinement case and translated (by 0.0
+  // or 0.5) depending on the refinement case
+  // and subface_no.
+  double xi_scale=1.0,
+         eta_scale=1.0,
+         xi_translation=0.0,
+         eta_translation=0.0;
+  // set the index mapping between local and
+  // global coordinates
+  switch (face_no/2)
+    {
+    case 0:
+      xi_index=1;
+      eta_index=2;
+      break;
+    case 1:
+      xi_index=2;
+      eta_index=0;
+      break;
+    case 2:
+      xi_index=0;
+      eta_index=1;
+      break;
+    }
+  // set the scale and translation parameter
+  // for individual subfaces
+  switch ((unsigned char)ref_case)
+    {
+    case RefinementCase<dim-1>::cut_x:
+      xi_scale=0.5;
+      xi_translation=subface_no%2 * 0.5;
+      break;
+    case RefinementCase<dim-1>::cut_y:
+      eta_scale=0.5;
+      eta_translation=subface_no%2 * 0.5;
+      break;
+    case RefinementCase<dim-1>::cut_xy:
+      xi_scale= 0.5;
+      eta_scale=0.5;
+      xi_translation =subface_no%2 * 0.5;
+      eta_translation=subface_no/2 * 0.5;
+      break;
+    default:
+      Assert(false,ExcInternalError());
+      break;
+    }
+  // finally, compute the scaled, translated,
+  // projected quadrature points
+  for (unsigned int p=0; p<quadrature.size(); ++p)
+    {
+      q_points[p][xi_index]    = xi_scale  * quadrature.point(p)(0) + xi_translation;
+      q_points[p][eta_index]   = eta_scale * quadrature.point(p)(1) + eta_translation;
+      q_points[p][const_index] = const_value;
+    }
+}
+
+
+template <>
+Quadrature<1>
+QProjector<1>::project_to_all_faces (const Quadrature<0> &quadrature)
+{
+  const unsigned int dim = 1;
+
+  const unsigned int n_points = 1,
+                     n_faces  = GeometryInfo<dim>::faces_per_cell;
+
+  // first fix quadrature points
+  std::vector<Point<dim> > q_points;
+  q_points.reserve(n_points * n_faces);
+  std::vector <Point<dim> > help(n_points);
+
+
+  // project to each face and append
+  // results
+  for (unsigned int face=0; face<n_faces; ++face)
+    {
+      project_to_face(quadrature, face, help);
+      std::copy (help.begin(), help.end(),
+                 std::back_inserter (q_points));
+    }
+
+  // next copy over weights
+  std::vector<double> weights;
+  weights.reserve (n_points * n_faces);
+  for (unsigned int face=0; face<n_faces; ++face)
+    std::copy (quadrature.get_weights().begin(),
+               quadrature.get_weights().end(),
+               std::back_inserter (weights));
+
+  Assert (q_points.size() == n_points * n_faces,
+          ExcInternalError());
+  Assert (weights.size() == n_points * n_faces,
+          ExcInternalError());
+
+  return Quadrature<dim>(q_points, weights);
+}
+
+
+
+template <>
+Quadrature<2>
+QProjector<2>::project_to_all_faces (const SubQuadrature &quadrature)
+{
+  const unsigned int dim = 2;
+
+  const unsigned int n_points = quadrature.size(),
+                     n_faces  = GeometryInfo<dim>::faces_per_cell;
+
+  // first fix quadrature points
+  std::vector<Point<dim> > q_points;
+  q_points.reserve(n_points * n_faces);
+  std::vector <Point<dim> > help(n_points);
+
+  // project to each face and append
+  // results
+  for (unsigned int face=0; face<n_faces; ++face)
+    {
+      project_to_face(quadrature, face, help);
+      std::copy (help.begin(), help.end(),
+                 std::back_inserter (q_points));
+    }
+
+  // next copy over weights
+  std::vector<double> weights;
+  weights.reserve (n_points * n_faces);
+  for (unsigned int face=0; face<n_faces; ++face)
+    std::copy (quadrature.get_weights().begin(),
+               quadrature.get_weights().end(),
+               std::back_inserter (weights));
+
+  Assert (q_points.size() == n_points * n_faces,
+          ExcInternalError());
+  Assert (weights.size() == n_points * n_faces,
+          ExcInternalError());
+
+  return Quadrature<dim>(q_points, weights);
+}
+
+
+
+template <>
+Quadrature<3>
+QProjector<3>::project_to_all_faces (const SubQuadrature &quadrature)
+{
+  const unsigned int dim = 3;
+
+  SubQuadrature q_reflected=reflect (quadrature);
+  SubQuadrature q[8]=
+  {
+    quadrature,
+    rotate (quadrature,1),
+    rotate (quadrature,2),
+    rotate (quadrature,3),
+    q_reflected,
+    rotate (q_reflected,3),
+    rotate (q_reflected,2),
+    rotate (q_reflected,1)
+  };
+
+
+
+  const unsigned int n_points = quadrature.size(),
+                     n_faces  = GeometryInfo<dim>::faces_per_cell;
+
+  // first fix quadrature points
+  std::vector<Point<dim> > q_points;
+  q_points.reserve(n_points * n_faces * 8);
+  std::vector <Point<dim> > help(n_points);
+
+  std::vector<double> weights;
+  weights.reserve (n_points * n_faces * 8);
+
+  // do the following for all possible
+  // mutations of a face (mutation==0
+  // corresponds to a face with standard
+  // orientation, no flip and no rotation)
+  for (unsigned int mutation=0; mutation<8; ++mutation)
+    {
+      // project to each face and append
+      // results
+      for (unsigned int face=0; face<n_faces; ++face)
+        {
+          project_to_face(q[mutation], face, help);
+          std::copy (help.begin(), help.end(),
+                     std::back_inserter (q_points));
+        }
+
+      // next copy over weights
+      for (unsigned int face=0; face<n_faces; ++face)
+        std::copy (q[mutation].get_weights().begin(),
+                   q[mutation].get_weights().end(),
+                   std::back_inserter (weights));
+    }
+
+
+  Assert (q_points.size() == n_points * n_faces * 8,
+          ExcInternalError());
+  Assert (weights.size() == n_points * n_faces * 8,
+          ExcInternalError());
+
+  return Quadrature<dim>(q_points, weights);
+}
+
+
+
+template <>
+Quadrature<1>
+QProjector<1>::project_to_all_subfaces (const Quadrature<0> &quadrature)
+{
+  const unsigned int dim = 1;
+
+  const unsigned int n_points          = 1,
+                     n_faces           = GeometryInfo<dim>::faces_per_cell,
+                     subfaces_per_face = GeometryInfo<dim>::max_children_per_face;
+
+  // first fix quadrature points
+  std::vector<Point<dim> > q_points;
+  q_points.reserve (n_points * n_faces * subfaces_per_face);
+  std::vector <Point<dim> > help(n_points);
+
+  // project to each face and copy
+  // results
+  for (unsigned int face=0; face<n_faces; ++face)
+    for (unsigned int subface=0; subface<subfaces_per_face; ++subface)
+      {
+        project_to_subface(quadrature, face, subface, help);
+        std::copy (help.begin(), help.end(),
+                   std::back_inserter (q_points));
+      };
+
+  // next copy over weights
+  std::vector<double> weights;
+  weights.reserve (n_points * n_faces * subfaces_per_face);
+  for (unsigned int face=0; face<n_faces; ++face)
+    for (unsigned int subface=0; subface<subfaces_per_face; ++subface)
+      std::copy (quadrature.get_weights().begin(),
+                 quadrature.get_weights().end(),
+                 std::back_inserter (weights));
+
+  Assert (q_points.size() == n_points * n_faces * subfaces_per_face,
+          ExcInternalError());
+  Assert (weights.size() == n_points * n_faces * subfaces_per_face,
+          ExcInternalError());
+
+  return Quadrature<dim>(q_points, weights);
+}
+
+
+
+template <>
+Quadrature<2>
+QProjector<2>::project_to_all_subfaces (const SubQuadrature &quadrature)
+{
+  const unsigned int dim = 2;
+
+  const unsigned int n_points          = quadrature.size(),
+                     n_faces           = GeometryInfo<dim>::faces_per_cell,
+                     subfaces_per_face = GeometryInfo<dim>::max_children_per_face;
+
+  // first fix quadrature points
+  std::vector<Point<dim> > q_points;
+  q_points.reserve (n_points * n_faces * subfaces_per_face);
+  std::vector <Point<dim> > help(n_points);
+
+  // project to each face and copy
+  // results
+  for (unsigned int face=0; face<n_faces; ++face)
+    for (unsigned int subface=0; subface<subfaces_per_face; ++subface)
+      {
+        project_to_subface(quadrature, face, subface, help);
+        std::copy (help.begin(), help.end(),
+                   std::back_inserter (q_points));
+      };
+
+  // next copy over weights
+  std::vector<double> weights;
+  weights.reserve (n_points * n_faces * subfaces_per_face);
+  for (unsigned int face=0; face<n_faces; ++face)
+    for (unsigned int subface=0; subface<subfaces_per_face; ++subface)
+      std::copy (quadrature.get_weights().begin(),
+                 quadrature.get_weights().end(),
+                 std::back_inserter (weights));
+
+  Assert (q_points.size() == n_points * n_faces * subfaces_per_face,
+          ExcInternalError());
+  Assert (weights.size() == n_points * n_faces * subfaces_per_face,
+          ExcInternalError());
+
+  return Quadrature<dim>(q_points, weights);
+}
+
+
+
+template <>
+Quadrature<3>
+QProjector<3>::project_to_all_subfaces (const SubQuadrature &quadrature)
+{
+  const unsigned int dim = 3;
+  SubQuadrature q_reflected=reflect (quadrature);
+  SubQuadrature q[8]=
+  {
+    quadrature,
+    rotate (quadrature,1),
+    rotate (quadrature,2),
+    rotate (quadrature,3),
+    q_reflected,
+    rotate (q_reflected,3),
+    rotate (q_reflected,2),
+    rotate (q_reflected,1)
+  };
+
+  const unsigned int n_points          = quadrature.size(),
+                     n_faces           = GeometryInfo<dim>::faces_per_cell,
+                     total_subfaces_per_face = 2 + 2 + 4;
+
+  // first fix quadrature points
+  std::vector<Point<dim> > q_points;
+  q_points.reserve (n_points * n_faces * total_subfaces_per_face * 8);
+  std::vector <Point<dim> > help(n_points);
+
+  std::vector<double> weights;
+  weights.reserve (n_points * n_faces * total_subfaces_per_face * 8);
+
+  // do the following for all possible
+  // mutations of a face (mutation==0
+  // corresponds to a face with standard
+  // orientation, no flip and no rotation)
+  for (unsigned int mutation=0; mutation<8; ++mutation)
+    {
+      // project to each face and copy
+      // results
+      for (unsigned int face=0; face<n_faces; ++face)
+        for (unsigned int ref_case=RefinementCase<dim-1>::cut_xy;
+             ref_case>=RefinementCase<dim-1>::cut_x;
+             --ref_case)
+          for (unsigned int subface=0; subface<GeometryInfo<dim-1>::n_children(RefinementCase<dim-1>(ref_case)); ++subface)
+            {
+              project_to_subface(q[mutation], face, subface, help,
+                                 RefinementCase<dim-1>(ref_case));
+              std::copy (help.begin(), help.end(),
+                         std::back_inserter (q_points));
+            }
+
+      // next copy over weights
+      for (unsigned int face=0; face<n_faces; ++face)
+        for (unsigned int ref_case=RefinementCase<dim-1>::cut_xy;
+             ref_case>=RefinementCase<dim-1>::cut_x;
+             --ref_case)
+          for (unsigned int subface=0; subface<GeometryInfo<dim-1>::n_children(RefinementCase<dim-1>(ref_case)); ++subface)
+            std::copy (q[mutation].get_weights().begin(),
+                       q[mutation].get_weights().end(),
+                       std::back_inserter (weights));
+    }
+
+  Assert (q_points.size() == n_points * n_faces * total_subfaces_per_face * 8,
+          ExcInternalError());
+  Assert (weights.size() == n_points * n_faces * total_subfaces_per_face * 8,
+          ExcInternalError());
+
+  return Quadrature<dim>(q_points, weights);
+}
+
+
+
+// This function is not used in the library
+template <int dim>
+Quadrature<dim>
+QProjector<dim>::project_to_child (const Quadrature<dim>    &quadrature,
+                                   const unsigned int        child_no)
+{
+  Assert (child_no < GeometryInfo<dim>::max_children_per_cell,
+          ExcIndexRange (child_no, 0, GeometryInfo<dim>::max_children_per_cell));
+
+  const unsigned int n_q_points = quadrature.size();
+
+  std::vector<Point<dim> > q_points(n_q_points);
+  for (unsigned int i=0; i<n_q_points; ++i)
+    q_points[i]=GeometryInfo<dim>::child_to_cell_coordinates(
+                  quadrature.point(i), child_no);
+
+  // for the weights, things are
+  // equally simple: copy them and
+  // scale them
+  std::vector<double> weights = quadrature.get_weights ();
+  for (unsigned int i=0; i<n_q_points; ++i)
+    weights[i] *= (1./GeometryInfo<dim>::max_children_per_cell);
+
+  return Quadrature<dim> (q_points, weights);
+}
+
+
+template <int dim>
+Quadrature<dim>
+QProjector<dim>::project_to_all_children (const Quadrature<dim> &quadrature)
+{
+  const unsigned int n_points = quadrature.size(),
+                     n_children  = GeometryInfo<dim>::max_children_per_cell;
+
+  std::vector<Point<dim> > q_points(n_points * n_children);
+  std::vector<double> weights(n_points * n_children);
+
+  // project to each child and copy
+  // results
+  for (unsigned int child=0; child<n_children; ++child)
+    {
+      Quadrature<dim> help = project_to_child(quadrature, child);
+      for (unsigned int i=0; i<n_points; ++i)
+        {
+          q_points[child*n_points+i] = help.point(i);
+          weights[child*n_points+i] = help.weight(i);
+        }
+    }
+  return Quadrature<dim>(q_points, weights);
+}
+
+
+
+
+template <int dim>
+Quadrature<dim>
+QProjector<dim>::project_to_line(
+  const Quadrature<1> &quadrature,
+  const Point<dim> &p1,
+  const Point<dim> &p2)
+{
+  const unsigned int n = quadrature.size();
+  std::vector<Point<dim> > points(n);
+  std::vector<double> weights(n);
+  const double length = p1.distance(p2);
+
+  for (unsigned int k=0; k<n; ++k)
+    {
+      const double alpha = quadrature.point(k)(0);
+      points[k] = alpha * p2;
+      points[k] += (1.-alpha) * p1;
+      weights[k] = length * quadrature.weight(k);
+    }
+  return Quadrature<dim> (points, weights);
+}
+
+
+
+template <int dim>
+typename QProjector<dim>::DataSetDescriptor
+QProjector<dim>::DataSetDescriptor::
+face (const unsigned int face_no,
+      const bool         face_orientation,
+      const bool         face_flip,
+      const bool         face_rotation,
+      const unsigned int n_quadrature_points)
+{
+  Assert (face_no < GeometryInfo<dim>::faces_per_cell,
+          ExcInternalError());
+
+  switch (dim)
+    {
+    case 1:
+    case 2:
+      return face_no * n_quadrature_points;
+
+
+    case 3:
+    {
+      // in 3d, we have to account for faces that
+      // have non-standard face orientation, flip
+      // and rotation. thus, we have to store
+      // _eight_ data sets per face or subface
+
+      // set up a table with the according offsets
+      // for non-standard orientation, first index:
+      // face_orientation (standard true=1), second
+      // index: face_flip (standard false=0), third
+      // index: face_rotation (standard false=0)
+      //
+      // note, that normally we should use the
+      // obvious offsets 0,1,2,3,4,5,6,7. However,
+      // prior to the changes enabling flipped and
+      // rotated faces, in many places of the
+      // library the convention was used, that the
+      // first dataset with offset 0 corresponds to
+      // a face in standard orientation. therefore
+      // we use the offsets 4,5,6,7,0,1,2,3 here to
+      // stick to that (implicit) convention
+      static const unsigned int offset[2][2][2]=
+      {
+        { {4*GeometryInfo<dim>::faces_per_cell, 5*GeometryInfo<dim>::faces_per_cell},    // face_orientation=false; face_flip=false; face_rotation=false and true
+          {6*GeometryInfo<dim>::faces_per_cell, 7*GeometryInfo<dim>::faces_per_cell}
+        },   // face_orientation=false; face_flip=true;  face_rotation=false and true
+        { {0*GeometryInfo<dim>::faces_per_cell, 1*GeometryInfo<dim>::faces_per_cell},    // face_orientation=true;  face_flip=false; face_rotation=false and true
+          {2*GeometryInfo<dim>::faces_per_cell, 3*GeometryInfo<dim>::faces_per_cell}
+        }
+      };  // face_orientation=true;  face_flip=true;  face_rotation=false and true
+
+      return ((face_no
+               + offset[face_orientation][face_flip][face_rotation])
+              * n_quadrature_points);
+    }
+
+    default:
+      Assert (false, ExcInternalError());
+    }
+  return numbers::invalid_unsigned_int;
+}
+
+
+
+template <>
+QProjector<1>::DataSetDescriptor
+QProjector<1>::DataSetDescriptor::
+subface (const unsigned int face_no,
+         const unsigned int subface_no,
+         const bool,
+         const bool,
+         const bool,
+         const unsigned int n_quadrature_points,
+         const internal::SubfaceCase<1>)
+{
+  Assert (face_no < GeometryInfo<1>::faces_per_cell,
+          ExcInternalError());
+  Assert (subface_no < GeometryInfo<1>::max_children_per_face,
+          ExcInternalError());
+
+  return ((face_no * GeometryInfo<1>::max_children_per_face +
+           subface_no)
+          * n_quadrature_points);
+}
+
+
+
+template <>
+QProjector<2>::DataSetDescriptor
+QProjector<2>::DataSetDescriptor::
+subface (const unsigned int face_no,
+         const unsigned int subface_no,
+         const bool,
+         const bool,
+         const bool,
+         const unsigned int n_quadrature_points,
+         const internal::SubfaceCase<2>)
+{
+  Assert (face_no < GeometryInfo<2>::faces_per_cell,
+          ExcInternalError());
+  Assert (subface_no < GeometryInfo<2>::max_children_per_face,
+          ExcInternalError());
+
+  return ((face_no * GeometryInfo<2>::max_children_per_face +
+           subface_no)
+          * n_quadrature_points);
+}
+
+
+template <>
+QProjector<3>::DataSetDescriptor
+QProjector<3>::DataSetDescriptor::
+subface (const unsigned int face_no,
+         const unsigned int subface_no,
+         const bool         face_orientation,
+         const bool         face_flip,
+         const bool         face_rotation,
+         const unsigned int n_quadrature_points,
+         const internal::SubfaceCase<3> ref_case)
+{
+  const unsigned int dim = 3;
+
+  Assert (face_no < GeometryInfo<dim>::faces_per_cell,
+          ExcInternalError());
+  Assert (subface_no < GeometryInfo<dim>::max_children_per_face,
+          ExcInternalError());
+
+  // As the quadrature points created by
+  // QProjector are on subfaces in their
+  // "standard location" we have to use a
+  // permutation of the equivalent subface
+  // number in order to respect face
+  // orientation, flip and rotation. The
+  // information we need here is exactly the
+  // same as the
+  // GeometryInfo<3>::child_cell_on_face info
+  // for the bottom face (face 4) of a hex, as
+  // on this the RefineCase of the cell matches
+  // that of the face and the subfaces are
+  // numbered in the same way as the child
+  // cells.
+
+  // in 3d, we have to account for faces that
+  // have non-standard face orientation, flip
+  // and rotation. thus, we have to store
+  // _eight_ data sets per face or subface
+  // already for the isotropic
+  // case. Additionally, we have three
+  // different refinement cases, resulting in
+  // <tt>4 + 2 + 2 = 8</tt> different subfaces
+  // for each face.
+  const unsigned int total_subfaces_per_face=8;
+
+  // set up a table with the according offsets
+  // for non-standard orientation, first index:
+  // face_orientation (standard true=1), second
+  // index: face_flip (standard false=0), third
+  // index: face_rotation (standard false=0)
+  //
+  // note, that normally we should use the
+  // obvious offsets 0,1,2,3,4,5,6,7. However,
+  // prior to the changes enabling flipped and
+  // rotated faces, in many places of the
+  // library the convention was used, that the
+  // first dataset with offset 0 corresponds to
+  // a face in standard orientation. therefore
+  // we use the offsets 4,5,6,7,0,1,2,3 here to
+  // stick to that (implicit) convention
+  static const unsigned int orientation_offset[2][2][2]=
+  {
+    {
+      // face_orientation=false; face_flip=false; face_rotation=false and true
+      {
+        4*GeometryInfo<dim>::faces_per_cell*total_subfaces_per_face,
+        5*GeometryInfo<dim>::faces_per_cell *total_subfaces_per_face
+      },
+      // face_orientation=false; face_flip=true;  face_rotation=false and true
+      {
+        6*GeometryInfo<dim>::faces_per_cell*total_subfaces_per_face,
+        7*GeometryInfo<dim>::faces_per_cell *total_subfaces_per_face
+      }
+    },
+    {
+      // face_orientation=true;  face_flip=false; face_rotation=false and true
+      {
+        0*GeometryInfo<dim>::faces_per_cell*total_subfaces_per_face,
+        1*GeometryInfo<dim>::faces_per_cell *total_subfaces_per_face
+      },
+      // face_orientation=true;  face_flip=true;  face_rotation=false and true
+      {
+        2*GeometryInfo<dim>::faces_per_cell*total_subfaces_per_face,
+        3*GeometryInfo<dim>::faces_per_cell *total_subfaces_per_face
+      }
+    }
+  };
+
+  // set up a table with the offsets for a
+  // given refinement case respecting the
+  // corresponding number of subfaces. the
+  // index corresponds to (RefineCase::Type - 1)
+
+  // note, that normally we should use the
+  // obvious offsets 0,2,6. However, prior to
+  // the implementation of anisotropic
+  // refinement, in many places of the library
+  // the convention was used, that the first
+  // dataset with offset 0 corresponds to a
+  // standard (isotropic) face
+  // refinement. therefore we use the offsets
+  // 6,4,0 here to stick to that (implicit)
+  // convention
+  static const unsigned int ref_case_offset[3]=
+  {
+    6,  //cut_x
+    4,  //cut_y
+    0   //cut_xy
+  };
+
+
+  // for each subface of a given FaceRefineCase
+  // there is a corresponding equivalent
+  // subface number of one of the "standard"
+  // RefineCases (cut_x, cut_y, cut_xy). Map
+  // the given values to those equivalent
+  // ones.
+
+  // first, define an invalid number
+  static const unsigned int e = numbers::invalid_unsigned_int;
+
+  static const RefinementCase<dim-1>
+  equivalent_refine_case[internal::SubfaceCase<dim>::case_isotropic+1][GeometryInfo<3>::max_children_per_face]
+  =
+  {
+    // case_none. there should be only
+    // invalid values here. However, as
+    // this function is also called (in
+    // tests) for cells which have no
+    // refined faces, use isotropic
+    // refinement instead
+    {
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy
+    },
+    // case_x
+    {
+      RefinementCase<dim-1>::cut_x,
+      RefinementCase<dim-1>::cut_x,
+      RefinementCase<dim-1>::no_refinement,
+      RefinementCase<dim-1>::no_refinement
+    },
+    // case_x1y
+    {
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_x,
+      RefinementCase<dim-1>::no_refinement
+    },
+    // case_x2y
+    {
+      RefinementCase<dim-1>::cut_x,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::no_refinement
+    },
+    // case_x1y2y
+    {
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy
+    },
+    // case_y
+    {
+      RefinementCase<dim-1>::cut_y,
+      RefinementCase<dim-1>::cut_y,
+      RefinementCase<dim-1>::no_refinement,
+      RefinementCase<dim-1>::no_refinement
+    },
+    // case_y1x
+    {
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_y,
+      RefinementCase<dim-1>::no_refinement
+    },
+    // case_y2x
+    {
+      RefinementCase<dim-1>::cut_y,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::no_refinement
+    },
+    // case_y1x2x
+    {
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy
+    },
+    // case_xy (case_isotropic)
+    {
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy,
+      RefinementCase<dim-1>::cut_xy
+    }
+  };
+
+  static const unsigned int
+  equivalent_subface_number[internal::SubfaceCase<dim>::case_isotropic+1][GeometryInfo<3>::max_children_per_face]
+  =
+  {
+    // case_none, see above
+    {0,1,2,3},
+    // case_x
+    {0,1,e,e},
+    // case_x1y
+    {0,2,1,e},
+    // case_x2y
+    {0,1,3,e},
+    // case_x1y2y
+    {0,2,1,3},
+    // case_y
+    {0,1,e,e},
+    // case_y1x
+    {0,1,1,e},
+    // case_y2x
+    {0,2,3,e},
+    // case_y1x2x
+    {0,1,2,3},
+    // case_xy (case_isotropic)
+    {0,1,2,3}
+  };
+
+  // If face-orientation or face_rotation are
+  // non-standard, cut_x and cut_y have to be
+  // exchanged.
+  static const RefinementCase<dim-1> ref_case_permutation[4]
+    = {RefinementCase<dim-1>::no_refinement,
+       RefinementCase<dim-1>::cut_y,
+       RefinementCase<dim-1>::cut_x,
+       RefinementCase<dim-1>::cut_xy
+      };
+
+  // set a corresponding (equivalent)
+  // RefineCase and subface number
+  const RefinementCase<dim-1> equ_ref_case=equivalent_refine_case[ref_case][subface_no];
+  const unsigned int equ_subface_no=equivalent_subface_number[ref_case][subface_no];
+  // make sure, that we got a valid subface and RefineCase
+  Assert(equ_ref_case!=RefinementCase<dim>::no_refinement, ExcInternalError());
+  Assert(equ_subface_no!=e, ExcInternalError());
+  // now, finally respect non-standard faces
+  const RefinementCase<dim-1>
+  final_ref_case = (face_orientation==face_rotation
+                    ?
+                    ref_case_permutation[equ_ref_case]
+                    :
+                    equ_ref_case);
+
+  // what we have now is the number of
+  // the subface in the natural
+  // orientation of the *face*. what we
+  // need to know is the number of the
+  // subface concerning the standard face
+  // orientation as seen from the *cell*.
+
+  // this mapping is not trivial, but we
+  // have done exactly this stuff in the
+  // child_cell_on_face function. in
+  // order to reduce the amount of code
+  // as well as to make maintaining the
+  // functionality easier we want to
+  // reuse that information. So we note
+  // that on the bottom face (face 4) of
+  // a hex cell the local x and y
+  // coordinates of the face and the cell
+  // coincide, thus also the refinement
+  // case of the face corresponds to the
+  // refinement case of the cell
+  // (ignoring cell refinement along the
+  // z direction). Using this knowledge
+  // we can (ab)use the
+  // child_cell_on_face function to do
+  // exactly the transformation we are in
+  // need of now
+  const unsigned int
+  final_subface_no = GeometryInfo<dim>::child_cell_on_face(RefinementCase<dim>(final_ref_case),
+                                                           4,
+                                                           equ_subface_no,
+                                                           face_orientation,
+                                                           face_flip,
+                                                           face_rotation,
+                                                           equ_ref_case);
+
+  return (((face_no * total_subfaces_per_face
+            + ref_case_offset[final_ref_case-1]
+            + final_subface_no)
+           + orientation_offset[face_orientation][face_flip][face_rotation]
+          )
+          * n_quadrature_points);
+}
+
+
+
+template <int dim>
+Quadrature<dim>
+QProjector<dim>::project_to_face(const SubQuadrature &quadrature,
+                                 const unsigned int face_no)
+{
+  std::vector<Point<dim> > points(quadrature.size());
+  project_to_face(quadrature, face_no, points);
+  return Quadrature<dim>(points, quadrature.get_weights());
+}
+
+
+template <int dim>
+Quadrature<dim>
+QProjector<dim>::project_to_subface(const SubQuadrature       &quadrature,
+                                    const unsigned int         face_no,
+                                    const unsigned int         subface_no,
+                                    const RefinementCase<dim-1> &ref_case)
+{
+  std::vector<Point<dim> > points(quadrature.size());
+  project_to_subface(quadrature, face_no, subface_no, points, ref_case);
+  return Quadrature<dim>(points, quadrature.get_weights());
+}
+
+
+// ------------------------------------------------------------ //
+
+
+template <>
+bool
+QIterated<1>::uses_both_endpoints (const Quadrature<1> &base_quadrature)
+{
+  bool at_left = false,
+       at_right = false;
+  for (unsigned int i=0; i<base_quadrature.size(); ++i)
+    {
+      if (base_quadrature.point(i) == Point<1>(0.0))
+        at_left = true;
+      if (base_quadrature.point(i) == Point<1>(1.0))
+        at_right = true;
+    };
+
+  return (at_left && at_right);
+}
+
+
+// template<>
+// void
+// QIterated<1>::fill(Quadrature<1>& dst,
+//                 const Quadrature<1> &base_quadrature,
+//                 const unsigned int   n_copies)
+// {
+//   Assert (n_copies > 0, ExcZero());
+//   Assert (base_quadrature.size() > 0, ExcZero());
+
+//   const unsigned int np =
+//     uses_both_endpoints(base_quadrature)
+//     ? (base_quadrature.size()-1) * n_copies + 1
+//     : base_quadrature.size() * n_copies;
+
+//   dst.quadrature_points.resize(np);
+//   dst.weights.resize(np);
+
+//   if (!uses_both_endpoints(base_quadrature))
+//                                   // we don't have to skip some
+//                                   // points in order to get a
+//                                   // reasonable quadrature formula
+//     {
+//       unsigned int next_point = 0;
+//       for (unsigned int copy=0; copy<n_copies; ++copy)
+//      for (unsigned int q_point=0; q_point<base_quadrature.size(); ++q_point)
+//        {
+//          dst.quadrature_points[next_point](0)
+//            = (copy + base_quadrature.point(q_point)(0)) / n_copies;
+//          dst.weights[next_point]
+//            = base_quadrature.weight(q_point) / n_copies;
+//          ++next_point;
+//        };
+//     }
+//   else
+//                                   // skip doubly available points
+//     {
+//       unsigned int next_point = 0;
+
+//                                     // first find out the weights of
+//                                     // the left and the right boundary
+//                                     // points. note that these usually
+//                                     // are but need not necessarily be
+//                                     // the same
+//       double double_point_weight = 0;
+//       unsigned int n_end_points = 0;
+//       for (unsigned int i=0; i<base_quadrature.size(); ++i)
+//                                       // add up the weight if this
+//                                       // is an endpoint
+//      if ((base_quadrature.point(i)(0) == 0.) ||
+//          (base_quadrature.point(i)(0) == 1.))
+//        {
+//          double_point_weight += base_quadrature.weight(i);
+//          ++n_end_points;
+//        };
+//                                     // scale the weight correctly
+//       double_point_weight /= n_copies;
+
+//                                     // make sure the base quadrature formula
+//                                     // has only one quadrature point
+//                                     // per end point
+//       Assert (n_end_points == 2, ExcInvalidQuadratureFormula());
+
+
+//       for (unsigned int copy=0; copy<n_copies; ++copy)
+//      for (unsigned int q_point=0; q_point<base_quadrature.size(); ++q_point)
+//        {
+//                                           // skip the left point of
+//                                           // this copy since we
+//                                           // have already entered
+//                                           // it the last time
+//          if ((copy > 0) &&
+//              (base_quadrature.point(q_point)(0) == 0.))
+//            continue;
+
+//          dst.quadrature_points[next_point](0)
+//            = (copy+base_quadrature.point(q_point)(0)) / n_copies;
+
+//                                           // if this is the
+//                                           // rightmost point of one
+//                                           // of the non-last
+//                                           // copies: give it the
+//                                           // double weight
+//          if ((copy != n_copies-1) &&
+//              (base_quadrature.point(q_point)(0) == 1.))
+//            dst.weights[next_point] = double_point_weight;
+//          else
+//            dst.weights[next_point] = base_quadrature.weight(q_point) /
+//                                        n_copies;
+
+//          ++next_point;
+//        };
+//     };
+
+// #if DEBUG
+//   double sum_of_weights = 0;
+//   for (unsigned int i=0; i<dst.size(); ++i)
+//     sum_of_weights += dst.weight(i);
+//   Assert (std::fabs(sum_of_weights-1) < 1e-15,
+//        ExcInternalError());
+// #endif
+
+// }
+
+
+template <>
+QIterated<0>::QIterated (const Quadrature<1> &,
+                         const unsigned int   )
+  :
+  Quadrature<0>()
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <>
+QIterated<1>::QIterated (const Quadrature<1> &base_quadrature,
+                         const unsigned int   n_copies)
+  :
+  Quadrature<1> (uses_both_endpoints(base_quadrature) ?
+                 (base_quadrature.size()-1) * n_copies + 1 :
+                 base_quadrature.size() * n_copies)
+{
+//  fill(*this, base_quadrature, n_copies);
+  Assert (base_quadrature.size() > 0, ExcNotInitialized());
+  Assert (n_copies > 0, ExcZero());
+
+  if (!uses_both_endpoints(base_quadrature))
+    // we don't have to skip some
+    // points in order to get a
+    // reasonable quadrature formula
+    {
+      unsigned int next_point = 0;
+      for (unsigned int copy=0; copy<n_copies; ++copy)
+        for (unsigned int q_point=0; q_point<base_quadrature.size(); ++q_point)
+          {
+            this->quadrature_points[next_point]
+              = Point<1>(base_quadrature.point(q_point)(0) / n_copies
+                         +
+                         (1.0*copy)/n_copies);
+            this->weights[next_point]
+              = base_quadrature.weight(q_point) / n_copies;
+
+            ++next_point;
+          };
+    }
+  else
+    // skip doubly available points
+    {
+      unsigned int next_point = 0;
+
+      // first find out the weights of
+      // the left and the right boundary
+      // points. note that these usually
+      // are but need not necessarily be
+      // the same
+      double double_point_weight = 0;
+      unsigned int n_end_points = 0;
+      for (unsigned int i=0; i<base_quadrature.size(); ++i)
+        // add up the weight if this
+        // is an endpoint
+        if ((base_quadrature.point(i) == Point<1>(0.0)) ||
+            (base_quadrature.point(i) == Point<1>(1.0)))
+          {
+            double_point_weight += base_quadrature.weight(i);
+            ++n_end_points;
+          };
+      // scale the weight correctly
+      double_point_weight /= n_copies;
+
+      // make sure the base quadrature formula
+      // has only one quadrature point
+      // per end point
+      Assert (n_end_points == 2, ExcInvalidQuadratureFormula());
+
+
+      for (unsigned int copy=0; copy<n_copies; ++copy)
+        for (unsigned int q_point=0; q_point<base_quadrature.size(); ++q_point)
+          {
+            // skip the left point of
+            // this copy since we
+            // have already entered
+            // it the last time
+            if ((copy > 0) &&
+                (base_quadrature.point(q_point) == Point<1>(0.0)))
+              continue;
+
+            this->quadrature_points[next_point]
+              = Point<1>(base_quadrature.point(q_point)(0) / n_copies
+                         +
+                         (1.0*copy)/n_copies);
+
+            // if this is the
+            // rightmost point of one
+            // of the non-last
+            // copies: give it the
+            // double weight
+            if ((copy != n_copies-1) &&
+                (base_quadrature.point(q_point) == Point<1>(1.0)))
+              this->weights[next_point] = double_point_weight;
+            else
+              this->weights[next_point] = base_quadrature.weight(q_point) /
+                                          n_copies;
+
+            ++next_point;
+          };
+    };
+
+#if DEBUG
+  double sum_of_weights = 0;
+  for (unsigned int i=0; i<this->size(); ++i)
+    sum_of_weights += this->weight(i);
+  Assert (std::fabs(sum_of_weights-1) < 1e-13,
+          ExcInternalError());
+#endif
+}
+
+
+// template <int dim>
+// void
+// QIterated<dim>::fill(Quadrature<dim>&, const Quadrature<1>&, unsigned int)
+// {
+//   Assert(false, ExcNotImplemented());
+// }
+
+
+// construct higher dimensional quadrature formula by tensor product
+// of lower dimensional iterated quadrature formulae
+template <int dim>
+QIterated<dim>::QIterated (const Quadrature<1> &base_quadrature,
+                           const unsigned int   N)
+  :
+  Quadrature<dim> (QIterated<dim-1>(base_quadrature, N),
+                   QIterated<1>(base_quadrature, N))
+{}
+
+
+
+// explicit instantiations; note: we need them all for all dimensions
+template class Quadrature<0>;
+template class Quadrature<1>;
+template class Quadrature<2>;
+template class Quadrature<3>;
+template class QAnisotropic<1>;
+template class QAnisotropic<2>;
+template class QAnisotropic<3>;
+template class QIterated<1>;
+template class QIterated<2>;
+template class QIterated<3>;
+template class QProjector<1>;
+template class QProjector<2>;
+template class QProjector<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/quadrature_lib.cc b/source/base/quadrature_lib.cc
new file mode 100644
index 0000000..dd16fb4
--- /dev/null
+++ b/source/base/quadrature_lib.cc
@@ -0,0 +1,1379 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/geometry_info.h>
+
+
+#include <cmath>
+#include <limits>
+#include <algorithm>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// please note: for a given dimension, we need the quadrature formulae
+// for all lower dimensions as well. That is why in this file the check
+// is for deal_II_dimension >= any_number and not for ==
+
+
+
+template <>
+QGauss<0>::QGauss (const unsigned int)
+  :
+  // there are n_q^dim == 1
+  // points
+  Quadrature<0> (1)
+{
+  // the single quadrature point gets unit
+  // weight
+  this->weights[0] = 1;
+}
+
+
+
+template <>
+QGaussLobatto<0>::QGaussLobatto (const unsigned int)
+  :
+  // there are n_q^dim == 1
+  // points
+  Quadrature<0> (1)
+{
+  // the single quadrature point gets unit
+  // weight
+  this->weights[0] = 1;
+}
+
+
+
+template <>
+QGauss<1>::QGauss (const unsigned int n)
+  :
+  Quadrature<1> (n)
+{
+  if (n == 0)
+    return;
+
+  const unsigned int m = (n+1)/2;
+
+  // tolerance for the Newton
+  // iteration below. we need to make
+  // it adaptive since on some
+  // machines (for example PowerPC)
+  // long double is the same as
+  // double -- in that case we can
+  // only get to a certain multiple
+  // of the accuracy of double there,
+  // while on other machines we'd
+  // like to go further down
+  //
+  // the situation is complicated by
+  // the fact that even if long
+  // double exists and is described
+  // by std::numeric_limits, we may
+  // not actually get the additional
+  // precision. One case where this
+  // happens is on x86, where one can
+  // set hardware flags that disable
+  // long double precision even for
+  // long double variables. these
+  // flags are not usually set, but
+  // for example matlab sets them and
+  // this then breaks deal.II code
+  // that is run as a subroutine to
+  // matlab...
+  //
+  // a similar situation exists, btw,
+  // when running programs under
+  // valgrind up to and including at
+  // least version 3.3: valgrind's
+  // emulator only supports 64 bit
+  // arithmetic, even for 80 bit long
+  // doubles.
+  const long double
+  long_double_eps = static_cast<long double>(std::numeric_limits<long double>::epsilon()),
+  double_eps      = static_cast<long double>(std::numeric_limits<double>::epsilon());
+
+  // now check whether long double is more
+  // accurate than double, and set
+  // tolerances accordingly. generate a one
+  // that really is generated at run-time
+  // and is not optimized away by the
+  // compiler. that makes sure that the
+  // tolerance is set at run-time with the
+  // current behavior, not at compile-time
+  // (not doing so leads to trouble with
+  // valgrind for example).
+  volatile long double runtime_one = 1.0;
+  const long double tolerance
+    = (runtime_one + long_double_eps != runtime_one
+       ?
+       std::max (double_eps / 100, long_double_eps * 5)
+       :
+       double_eps * 5
+      );
+
+
+  for (unsigned int i=1; i<=m; ++i)
+    {
+      long double z = std::cos(numbers::PI * (i-.25)/(n+.5));
+
+      long double pp;
+      long double p1, p2, p3;
+
+      // Newton iteration
+      do
+        {
+          // compute L_n (z)
+          p1 = 1.;
+          p2 = 0.;
+          for (unsigned int j=0; j<n; ++j)
+            {
+              p3 = p2;
+              p2 = p1;
+              p1 = ((2.*j+1.)*z*p2-j*p3)/(j+1);
+            }
+          pp = n*(z*p1-p2)/(z*z-1);
+          z = z-p1/pp;
+        }
+      while (std::abs(p1/pp) > tolerance);
+
+      double x = .5*z;
+      this->quadrature_points[i-1] = Point<1>(.5-x);
+      this->quadrature_points[n-i] = Point<1>(.5+x);
+
+      double w = 1./((1.-z*z)*pp*pp);
+      this->weights[i-1] = w;
+      this->weights[n-i] = w;
+    }
+}
+
+
+template <>
+QGaussLobatto<1>::QGaussLobatto (const unsigned int n)
+  :
+  Quadrature<1> (n)
+{
+  Assert (n >= 2, ExcNotImplemented());
+
+  std::vector<long double> points  = compute_quadrature_points(n, 1, 1);
+  std::vector<long double> w       = compute_quadrature_weights(points, 0, 0);
+
+  // scale points to the interval
+  // [0.0, 1.0]:
+  for (unsigned int i=0; i<points.size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(0.5 + 0.5*static_cast<double>(points[i]));
+      this->weights[i]           = 0.5*w[i];
+    }
+}
+
+
+
+template <>
+std::vector<long double> QGaussLobatto<1>::
+compute_quadrature_points(const unsigned int q,
+                          const int alpha,
+                          const int beta) const
+{
+  const unsigned int m = q-2; // no. of inner points
+  std::vector<long double> x(m);
+
+  // compute quadrature points with
+  // a Newton algorithm.
+
+  // Set tolerance. See class QGauss
+  // for detailed explanation.
+  const long double
+  long_double_eps = static_cast<long double>(std::numeric_limits<long double>::epsilon()),
+  double_eps      = static_cast<long double>(std::numeric_limits<double>::epsilon());
+
+  // check whether long double is
+  // more accurate than double, and
+  // set tolerances accordingly
+  volatile long double runtime_one = 1.0;
+  const long double tolerance
+    = (runtime_one + long_double_eps != runtime_one
+       ?
+       std::max (double_eps / 100, long_double_eps * 5)
+       :
+       double_eps * 5
+      );
+
+  // The following implementation
+  // follows closely the one given in
+  // the appendix of the book by
+  // Karniadakis and Sherwin:
+  // Spectral/hp element methods for
+  // computational fluid dynamics
+  // (Oxford University Press, 2005)
+
+  // we take the zeros of the Chebyshev
+  // polynomial (alpha=beta=-0.5) as
+  // initial values:
+  for (unsigned int i=0; i<m; ++i)
+    x[i] = - std::cos( (long double) (2*i+1)/(2*m) * numbers::PI );
+
+  long double r, s, J_x, f, delta;
+
+  for (unsigned int k=0; k<m; ++k)
+    {
+      r = x[k];
+      if (k>0)
+        r = (r + x[k-1])/2;
+
+      do
+        {
+          s = 0.;
+          for (unsigned int i=0; i<k; ++i)
+            s += 1./(r - x[i]);
+
+          J_x   =  0.5*(alpha + beta + m + 1)*JacobiP(r, alpha+1, beta+1, m-1);
+          f     = JacobiP(r, alpha, beta, m);
+          delta = f/(f*s- J_x);
+          r += delta;
+        }
+      while (std::fabs(delta) >= tolerance);
+
+      x[k] = r;
+    } // for
+
+  // add boundary points:
+  x.insert(x.begin(), -1.L);
+  x.push_back(+1.L);
+
+  return x;
+}
+
+
+
+template <>
+std::vector<long double> QGaussLobatto<1>::
+compute_quadrature_weights(const std::vector<long double> &x,
+                           const int alpha,
+                           const int beta) const
+{
+  const unsigned int q = x.size();
+  std::vector<long double> w(q);
+  long double s = 0.L;
+
+  const long double factor = std::pow(2., alpha+beta+1) *
+                             gamma(alpha+q) *
+                             gamma(beta+q) /
+                             ((q-1)*gamma(q)*gamma(alpha+beta+q+1));
+  for (unsigned int i=0; i<q; ++i)
+    {
+      s = JacobiP(x[i], alpha, beta, q-1);
+      w[i] = factor/(s*s);
+    }
+  w[0]   *= (beta + 1);
+  w[q-1] *= (alpha + 1);
+
+  return w;
+}
+
+
+
+template <>
+long double QGaussLobatto<1>::JacobiP(const long double x,
+                                      const int alpha,
+                                      const int beta,
+                                      const unsigned int n) const
+{
+  // the Jacobi polynomial is evaluated
+  // using a recursion formula.
+  std::vector<long double> p(n+1);
+  int v, a1, a2, a3, a4;
+
+  // initial values P_0(x), P_1(x):
+  p[0] = 1.0L;
+  if (n==0) return p[0];
+  p[1] = ((alpha+beta+2)*x + (alpha-beta))/2;
+  if (n==1) return p[1];
+
+  for (unsigned int i=1; i<=(n-1); ++i)
+    {
+      v  = 2*i + alpha + beta;
+      a1 = 2*(i+1)*(i + alpha + beta + 1)*v;
+      a2 = (v + 1)*(alpha*alpha - beta*beta);
+      a3 = v*(v + 1)*(v + 2);
+      a4 = 2*(i+alpha)*(i+beta)*(v + 2);
+
+      p[i+1] = static_cast<long double>( (a2 + a3*x)*p[i] - a4*p[i-1])/a1;
+    } // for
+  return p[n];
+}
+
+
+
+template <>
+long double QGaussLobatto<1>::gamma(const unsigned int n) const
+{
+  long double result = n - 1;
+  for (int i=n-2; i>1; --i)
+    result *= i;
+  return result;
+}
+
+
+
+template <>
+QMidpoint<1>::QMidpoint ()
+  :
+  Quadrature<1>(1)
+{
+  this->quadrature_points[0] = Point<1>(0.5);
+  this->weights[0] = 1.0;
+}
+
+
+
+template <>
+QTrapez<1>::QTrapez ()
+  :
+  Quadrature<1> (2)
+{
+  static const double xpts[] = { 0.0, 1.0 };
+  static const double wts[]  = { 0.5, 0.5 };
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(xpts[i]);
+      this->weights[i] = wts[i];
+    };
+}
+
+
+
+template <>
+QSimpson<1>::QSimpson ()
+  :
+  Quadrature<1> (3)
+{
+  static const double xpts[] = { 0.0, 0.5, 1.0 };
+  static const double wts[]  = { 1./6., 2./3., 1./6. };
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(xpts[i]);
+      this->weights[i] = wts[i];
+    };
+}
+
+
+
+template <>
+QMilne<1>::QMilne ()
+  :
+  Quadrature<1> (5)
+{
+  static const double xpts[] = { 0.0, .25, .5, .75, 1.0 };
+  static const double wts[]  = { 7./90., 32./90., 12./90., 32./90., 7./90. };
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(xpts[i]);
+      this->weights[i] = wts[i];
+    };
+}
+
+
+
+template <>
+QWeddle<1>::QWeddle ()
+  :
+  Quadrature<1> (7)
+{
+  static const double xpts[] = { 0.0, 1./6., 1./3., .5, 2./3., 5./6., 1.0 };
+  static const double wts[]  = { 41./840., 216./840., 27./840., 272./840.,
+                                 27./840., 216./840., 41./840.
+                               };
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(xpts[i]);
+      this->weights[i] = wts[i];
+    };
+}
+
+
+template <>
+QGaussLog<1>::QGaussLog(const unsigned int n,
+                        const bool revert)
+  :
+  Quadrature<1> (n)
+{
+
+  std::vector<double> p=set_quadrature_points(n);
+  std::vector<double> w=set_quadrature_weights(n);
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      // Using the change of variables x=1-t, it's possible to show
+      // that int f(x)ln|1-x| = int f(1-t) ln|t|, which implies that
+      // we can use this quadrature formula also with weight ln|1-x|.
+      this->quadrature_points[i] = revert ? Point<1>(1-p[n-1-i]) : Point<1>(p[i]);
+      this->weights[i]           = revert ? w[n-1-i] : w[i];
+    }
+
+}
+
+template <>
+std::vector<double>
+QGaussLog<1>::set_quadrature_points(const unsigned int n) const
+{
+
+  std::vector<double> points(n);
+
+  switch (n)
+    {
+    case 1:
+      points[0] = 0.3333333333333333;
+      break;
+
+    case 2:
+      points[0] = 0.1120088061669761;
+      points[1] = 0.6022769081187381;
+      break;
+
+    case 3:
+      points[0] = 0.06389079308732544;
+      points[1] = 0.3689970637156184;
+      points[2] = 0.766880303938942;
+      break;
+
+    case 4:
+      points[0] = 0.04144848019938324;
+      points[1] = 0.2452749143206022;
+      points[2] = 0.5561654535602751;
+      points[3] = 0.848982394532986;
+      break;
+
+    case 5:
+      points[0] = 0.02913447215197205;
+      points[1] = 0.1739772133208974;
+      points[2] =  0.4117025202849029;
+      points[3] = 0.6773141745828183;
+      points[4] = 0.89477136103101;
+      break;
+
+    case 6:
+      points[0] = 0.02163400584411693;
+      points[1] = 0.1295833911549506;
+      points[2] = 0.3140204499147661;
+      points[3] = 0.5386572173517997;
+      points[4] = 0.7569153373774084;
+      points[5] = 0.922668851372116;
+      break;
+
+
+    case 7:
+      points[0] = 0.0167193554082585;
+      points[1] = 0.100185677915675;
+      points[2] = 0.2462942462079286;
+      points[3] = 0.4334634932570557;
+      points[4] = 0.6323509880476823;
+      points[5] = 0.81111862674023;
+      points[6] = 0.940848166743287;
+      break;
+
+    case 8:
+      points[0] = 0.01332024416089244;
+      points[1] = 0.07975042901389491;
+      points[2] = 0.1978710293261864;
+      points[3] =   0.354153994351925;
+      points[4] =   0.5294585752348643;
+      points[5] = 0.7018145299391673;
+      points[6] = 0.849379320441094;
+      points[7] = 0.953326450056343;
+      break;
+
+    case 9:
+      points[0] = 0.01086933608417545;
+      points[1] = 0.06498366633800794;
+      points[2] = 0.1622293980238825;
+      points[3] = 0.2937499039716641;
+      points[4] = 0.4466318819056009;
+      points[5] = 0.6054816627755208;
+      points[6] = 0.7541101371585467;
+      points[7] = 0.877265828834263;
+      points[8] = 0.96225055941096;
+      break;
+
+    case 10:
+      points[0] = 0.00904263096219963;
+      points[1] = 0.05397126622250072;
+      points[2] =  0.1353118246392511;
+      points[3] = 0.2470524162871565;
+      points[4] = 0.3802125396092744;
+      points[5] = 0.5237923179723384;
+      points[6] = 0.6657752055148032;
+      points[7] = 0.7941904160147613;
+      points[8] = 0.898161091216429;
+      points[9] = 0.9688479887196;
+      break;
+
+
+    case 11:
+      points[0] = 0.007643941174637681;
+      points[1] = 0.04554182825657903;
+      points[2] = 0.1145222974551244;
+      points[3] = 0.2103785812270227;
+      points[4] = 0.3266955532217897;
+      points[5] = 0.4554532469286375;
+      points[6] = 0.5876483563573721;
+      points[7] = 0.7139638500230458;
+      points[8] = 0.825453217777127;
+      points[9] = 0.914193921640008;
+      points[10] = 0.973860256264123;
+      break;
+
+    case 12:
+      points[0] = 0.006548722279080035;
+      points[1] = 0.03894680956045022;
+      points[2] = 0.0981502631060046;
+      points[3] = 0.1811385815906331;
+      points[4] = 0.2832200676673157;
+      points[5] = 0.398434435164983;
+      points[6] = 0.5199526267791299;
+      points[7] = 0.6405109167754819;
+      points[8] = 0.7528650118926111;
+      points[9] = 0.850240024421055;
+      points[10] = 0.926749682988251;
+      points[11] = 0.977756129778486;
+      break;
+
+    default:
+      Assert(false, ExcNotImplemented());
+      break;
+    }
+
+  return points;
+}
+
+
+template <>
+std::vector<double>
+QGaussLog<1>::set_quadrature_weights(const unsigned int n) const
+{
+
+  std::vector<double> weights(n);
+
+  switch (n)
+    {
+    case 1:
+      weights[0] = -1.0;
+      break;
+    case 2:
+      weights[0] = -0.7185393190303845;
+      weights[1] = -0.2814606809696154;
+      break;
+
+    case 3:
+      weights[0] = -0.5134045522323634;
+      weights[1] = -0.3919800412014877;
+      weights[2] = -0.0946154065661483;
+      break;
+
+    case 4:
+      weights[0] =-0.3834640681451353;
+      weights[1] =-0.3868753177747627;
+      weights[2] =-0.1904351269501432;
+      weights[3] =-0.03922548712995894;
+      break;
+
+    case 5:
+      weights[0] =-0.2978934717828955;
+      weights[1] =-0.3497762265132236;
+      weights[2] =-0.234488290044052;
+      weights[3] =-0.0989304595166356;
+      weights[4] =-0.01891155214319462;
+      break;
+
+    case 6:
+      weights[0] = -0.2387636625785478;
+      weights[1] = -0.3082865732739458;
+      weights[2] = -0.2453174265632108;
+      weights[3] = -0.1420087565664786;
+      weights[4] = -0.05545462232488041;
+      weights[5] = -0.01016895869293513;
+      break;
+
+
+    case 7:
+      weights[0] = -0.1961693894252476;
+      weights[1] = -0.2703026442472726;
+      weights[2] = -0.239681873007687;
+      weights[3] = -0.1657757748104267;
+      weights[4] = -0.0889432271377365;
+      weights[5] = -0.03319430435645653;
+      weights[6] = -0.005932787015162054;
+      break;
+
+    case 8:
+      weights[0] = -0.164416604728002;
+      weights[1] = -0.2375256100233057;
+      weights[2] = -0.2268419844319134;
+      weights[3] = -0.1757540790060772;
+      weights[4] = -0.1129240302467932;
+      weights[5] = -0.05787221071771947;
+      weights[6] = -0.02097907374214317;
+      weights[7] = -0.003686407104036044;
+      break;
+
+    case 9:
+      weights[0] = -0.1400684387481339;
+      weights[1] = -0.2097722052010308;
+      weights[2] = -0.211427149896601;
+      weights[3] = -0.1771562339380667;
+      weights[4] = -0.1277992280331758;
+      weights[5] = -0.07847890261203835;
+      weights[6] = -0.0390225049841783;
+      weights[7] = -0.01386729555074604;
+      weights[8] = -0.002408041036090773;
+      break;
+
+    case 10:
+      weights[0] = -0.12095513195457;
+      weights[1] = -0.1863635425640733;
+      weights[2] = -0.1956608732777627;
+      weights[3] = -0.1735771421828997;
+      weights[4] = -0.135695672995467;
+      weights[5] = -0.0936467585378491;
+      weights[6] = -0.05578772735275126;
+      weights[7] = -0.02715981089692378;
+      weights[8] = -0.00951518260454442;
+      weights[9] = -0.001638157633217673;
+      break;
+
+
+    case 11:
+      weights[0] = -0.1056522560990997;
+      weights[1] = -0.1665716806006314;
+      weights[2] = -0.1805632182877528;
+      weights[3] = -0.1672787367737502;
+      weights[4] = -0.1386970574017174;
+      weights[5] = -0.1038334333650771;
+      weights[6] = -0.06953669788988512;
+      weights[7] = -0.04054160079499477;
+      weights[8] = -0.01943540249522013;
+      weights[9] = -0.006737429326043388;
+      weights[10] = -0.001152486965101561;
+      break;
+
+    case 12:
+      weights[0] = -0.09319269144393;
+      weights[1] = -0.1497518275763289;
+      weights[2] = -0.166557454364573;
+      weights[3] = -0.1596335594369941;
+      weights[4] = -0.1384248318647479;
+      weights[5] = -0.1100165706360573;
+      weights[6] = -0.07996182177673273;
+      weights[7] = -0.0524069547809709;
+      weights[8] = -0.03007108900074863;
+      weights[9] = -0.01424924540252916;
+      weights[10] = -0.004899924710875609;
+      weights[11] = -0.000834029009809656;
+      break;
+
+    default:
+      Assert(false, ExcNotImplemented());
+      break;
+    }
+
+  return weights;
+
+}
+
+
+template<>
+QGaussLogR<1>::QGaussLogR(const unsigned int n,
+                          const Point<1> origin,
+                          const double alpha,
+                          const bool factor_out_singularity) :
+  Quadrature<1>( ( (origin[0] == 0) || (origin[0] == 1) ) ?
+                 (alpha == 1 ? n : 2*n ) : 4*n ),
+  fraction( ( (origin[0] == 0) || (origin[0] == 1.) ) ? 1. : origin[0] )
+{
+  // The three quadrature formulas that make this one up. There are
+  // at most two when the origin is one of the extremes, and there is
+  // only one if the origin is one of the extremes and alpha is
+  // equal to one.
+  //
+  // If alpha is different from one, then we need a correction which
+  // is performed with a standard Gauss quadrature rule on each
+  // segment. This is not needed in the standard case where alpha is
+  // equal to one and the origin is on one of the extremes. We
+  // integrate with weight ln|(x-o)/alpha|. In the easy cases, we
+  // only need n quadrature points. In the most difficult one, we
+  // need 2*n points for the first segment, and 2*n points for the
+  // second segment.
+  QGaussLog<1> quad1(n, origin[0] != 0);
+  QGaussLog<1> quad2(n);
+  QGauss<1> quad(n);
+
+  // Check that the origin is inside 0,1
+  Assert( (fraction >= 0) && (fraction <= 1),
+          ExcMessage("Origin is outside [0,1]."));
+
+  // Non singular offset. This is the start of non singular quad
+  // points.
+  unsigned int ns_offset = (fraction == 1) ? n : 2*n;
+
+  for (unsigned int i=0, j=ns_offset; i<n; ++i, ++j)
+    {
+      // The first i quadrature points are the same as quad1, and
+      // are by default singular.
+      this->quadrature_points[i] = quad1.point(i)*fraction;
+      this->weights[i] = quad1.weight(i)*fraction;
+
+      // We need to scale with -log|fraction*alpha|
+      if ( (alpha != 1) || (fraction != 1) )
+        {
+          this->quadrature_points[j] = quad.point(i)*fraction;
+          this->weights[j] = -std::log(alpha/fraction)*quad.weight(i)*fraction;
+        }
+      // In case we need the second quadrature as well, do it now.
+      if (fraction != 1)
+        {
+          this->quadrature_points[i+n] = quad2.point(i)*(1-fraction)+Point<1>(fraction);
+          this->weights[i+n] = quad2.weight(i)*(1-fraction);
+
+          // We need to scale with -log|fraction*alpha|
+          this->quadrature_points[j+n] = quad.point(i)*(1-fraction)+Point<1>(fraction);
+          this->weights[j+n] = -std::log(alpha/(1-fraction))*quad.weight(i)*(1-fraction);
+        }
+    }
+  if (factor_out_singularity == true)
+    for (unsigned int i=0; i<size(); ++i)
+      {
+        Assert( this->quadrature_points[i] != origin,
+                ExcMessage("The singularity cannot be on a Gauss point of the same order!") );
+        double denominator = std::log(std::abs( (this->quadrature_points[i]-origin)[0] )/alpha);
+        Assert( denominator != 0.0,
+                ExcMessage("The quadrature formula you are using does not allow to "
+                           "factor out the singularity, which is zero at one point."));
+        this->weights[i] /= denominator;
+      }
+}
+
+
+template<>
+unsigned int QGaussOneOverR<2>::quad_size(const Point<2> singularity,
+                                          const unsigned int n)
+{
+  double eps=1e-8;
+  bool on_edge=false;
+  bool on_vertex=false;
+  for (unsigned int i=0; i<2; ++i)
+    if ( ( std::abs(singularity[i]  ) < eps ) ||
+         ( std::abs(singularity[i]-1) < eps ) )
+      on_edge = true;
+  if (on_edge && (std::abs( (singularity-Point<2>(.5, .5)).norm_square()-.5)
+                  < eps) )
+    on_vertex = true;
+  if (on_vertex) return (2*n*n);
+  if (on_edge) return (4*n*n);
+  return (8*n*n);
+}
+
+template<>
+QGaussOneOverR<2>::QGaussOneOverR(const unsigned int n,
+                                  const Point<2> singularity,
+                                  const bool factor_out_singularity) :
+  Quadrature<2>(quad_size(singularity, n))
+{
+  // We treat all the cases in the
+  // same way. Split the element in 4
+  // pieces, measure the area, if
+  // it's relevant, add the
+  // quadrature connected to that
+  // singularity.
+  std::vector<QGaussOneOverR<2> > quads;
+  std::vector<Point<2> > origins;
+  // Id of the corner with a
+  // singularity
+  quads.push_back(QGaussOneOverR(n, 3, factor_out_singularity));
+  quads.push_back(QGaussOneOverR(n, 2, factor_out_singularity));
+  quads.push_back(QGaussOneOverR(n, 1, factor_out_singularity));
+  quads.push_back(QGaussOneOverR(n, 0, factor_out_singularity));
+
+  origins.push_back(Point<2>(0.,0.));
+  origins.push_back(Point<2>(singularity[0],0.));
+  origins.push_back(Point<2>(0.,singularity[1]));
+  origins.push_back(singularity);
+
+  // Lexicographical ordering.
+
+  double eps = 1e-8;
+  unsigned int q_id = 0; // Current quad point index.
+  double area = 0;
+  Tensor<1,2> dist;
+
+  for (unsigned int box=0; box<4; ++box)
+    {
+      dist = (singularity-GeometryInfo<2>::unit_cell_vertex(box));
+      dist = Point<2>(std::abs(dist[0]), std::abs(dist[1]));
+      area = dist[0]*dist[1];
+      if (area > eps)
+        for (unsigned int q=0; q<quads[box].size(); ++q, ++q_id)
+          {
+            const Point<2> &qp = quads[box].point(q);
+            this->quadrature_points[q_id] =
+              origins[box]+
+              Point<2>(dist[0]*qp[0], dist[1]*qp[1]);
+            this->weights[q_id] = quads[box].weight(q)*area;
+          }
+    }
+}
+
+
+template<>
+QGaussOneOverR<2>::QGaussOneOverR(const unsigned int n,
+                                  const unsigned int vertex_index,
+                                  const bool factor_out_singularity) :
+  Quadrature<2>(2*n *n)
+{
+  // This version of the constructor
+  // works only for the 4
+  // vertices. If you need a more
+  // general one, you should use the
+  // one with the Point<2> in the
+  // constructor.
+  Assert(vertex_index <4, ExcIndexRange(vertex_index, 0, 4));
+
+  // Start with the gauss quadrature formula on the (u,v) reference
+  // element.
+  QGauss<2> gauss(n);
+
+  Assert(gauss.size() == n*n, ExcInternalError());
+
+  // For the moment we only implemented this for the vertices of a
+  // quadrilateral. We are planning to do this also for the support
+  // points of arbitrary FE_Q elements, to allow the use of this
+  // class in boundary element programs with higher order mappings.
+  Assert(vertex_index < 4, ExcIndexRange(vertex_index, 0, 4));
+
+  // We create only the first one. All other pieces are rotation of
+  // this one.
+  // In this case the transformation is
+  //
+  // (x,y) = (u, u tan(pi/4 v))
+  //
+  // with Jacobian
+  //
+  // J = pi/4 R / cos(pi/4 v)
+  //
+  // And we get rid of R to take into account the singularity,
+  // unless specified differently in the constructor.
+  std::vector<Point<2> >      &ps = this->quadrature_points;
+  std::vector<double>         &ws = this->weights;
+  double pi4 = numbers::PI/4;
+
+  for (unsigned int q=0; q<gauss.size(); ++q)
+    {
+      const Point<2> &gp = gauss.point(q);
+      ps[q][0] = gp[0];
+      ps[q][1] = gp[0] * std::tan(pi4*gp[1]);
+      ws[q]    = gauss.weight(q)*pi4/std::cos(pi4 *gp[1]);
+      if (factor_out_singularity)
+        ws[q] *= (ps[q]-GeometryInfo<2>::unit_cell_vertex(0)).norm();
+      // The other half of the quadrilateral is symmetric with
+      // respect to xy plane.
+      ws[gauss.size()+q]    = ws[q];
+      ps[gauss.size()+q][0] = ps[q][1];
+      ps[gauss.size()+q][1] = ps[q][0];
+    }
+
+  // Now we distribute these vertices in the correct manner
+  double theta = 0;
+  switch (vertex_index)
+    {
+    case 0:
+      theta = 0;
+      break;
+    case 1:
+      //
+      theta = numbers::PI/2;
+      break;
+    case 2:
+      theta = -numbers::PI/2;
+      break;
+    case 3:
+      theta = numbers::PI;
+      break;
+    }
+
+  double R00 =  std::cos(theta), R01 = -std::sin(theta);
+  double R10 =  std::sin(theta), R11 =  std::cos(theta);
+
+  if (vertex_index != 0)
+    for (unsigned int q=0; q<size(); ++q)
+      {
+        double x = ps[q][0]-.5,  y = ps[q][1]-.5;
+
+        ps[q][0] = R00*x + R01*y + .5;
+        ps[q][1] = R10*x + R11*y + .5;
+      }
+}
+
+
+template <int dim>
+QSorted<dim>::QSorted(Quadrature<dim> quad) :
+  Quadrature<dim>(quad.size())
+{
+  std::vector< std::pair<double, Point<dim> > > wp;
+  for (unsigned int i=0; i<quad.size(); ++i)
+    wp.push_back(std::pair<double, Point<dim> >(quad.weight(i),
+                                                quad.point(i)));
+  sort(wp.begin(), wp.end(), *this);
+  for (unsigned int i=0; i<quad.size(); ++i)
+    {
+      this->weights[i] = wp[i].first;
+      this->quadrature_points[i] = wp[i].second;
+    }
+}
+
+
+template <int dim>
+bool QSorted<dim>::operator()(const std::pair<double, Point<dim> > &a,
+                              const std::pair<double, Point<dim> > &b)
+{
+  return (a.first < b.first);
+}
+
+
+// construct the quadrature formulae in higher dimensions by
+// tensor product of lower dimensions
+
+template <int dim>
+QGauss<dim>::QGauss (const unsigned int n)
+  :  Quadrature<dim> (QGauss<dim-1>(n), QGauss<1>(n))
+{}
+
+
+
+template <int dim>
+QGaussLobatto<dim>::QGaussLobatto (const unsigned int n)
+  :  Quadrature<dim> (QGaussLobatto<dim-1>(n), QGaussLobatto<1>(n))
+{}
+
+
+
+template <int dim>
+QMidpoint<dim>::QMidpoint ()
+  :
+  Quadrature<dim> (QMidpoint<dim-1>(), QMidpoint<1>())
+{}
+
+
+
+template <int dim>
+QTrapez<dim>::QTrapez ()
+  :
+  Quadrature<dim> (QTrapez<dim-1>(), QTrapez<1>())
+{}
+
+
+
+template <int dim>
+QSimpson<dim>::QSimpson ()
+  :
+  Quadrature<dim> (QSimpson<dim-1>(), QSimpson<1>())
+{}
+
+
+
+template <int dim>
+QMilne<dim>::QMilne ()
+  :
+  Quadrature<dim> (QMilne<dim-1>(), QMilne<1>())
+{}
+
+
+template <int dim>
+QWeddle<dim>::QWeddle ()
+  :
+  Quadrature<dim> (QWeddle<dim-1>(), QWeddle<1>())
+{}
+
+template <int dim>
+QTelles<dim>::QTelles (
+  const Quadrature<1> &base_quad, const Point<dim> &singularity)
+  :
+/**
+* We need the explicit implementation if dim == 1. If dim > 1 we use the
+* former implementation and apply a tensorial product to obtain the higher
+* dimensions.
+**/
+  Quadrature<dim>(
+    dim == 2 ?
+    QAnisotropic<dim>(
+      QTelles<1>(base_quad, Point<1>(singularity[0])),
+      QTelles<1>(base_quad, Point<1>(singularity[1]))) :
+    dim == 3 ?
+    QAnisotropic<dim>(
+      QTelles<1>(base_quad, Point<1>(singularity[0])),
+      QTelles<1>(base_quad, Point<1>(singularity[1])),
+      QTelles<1>(base_quad, Point<1>(singularity[2]))) :
+    Quadrature<dim>())
+{
+}
+
+template <int dim>
+QTelles<dim>::QTelles (
+  const unsigned int n, const Point<dim> &singularity)
+  :
+/**
+* In this case we map the standard Gauss Legendre formula using the given
+* singularity point coordinates.
+**/
+  Quadrature<dim>(QTelles<dim>(QGauss<1>(n), singularity))
+{}
+
+
+
+template <>
+QTelles<1>::QTelles (
+  const Quadrature<1> &base_quad, const Point<1> &singularity)
+  :
+/**
+* We explicitly implement the Telles' variable change if dim == 1.
+**/
+  Quadrature<1>(base_quad)
+{
+  /**
+  * We define all the constants to be used in the implementation of
+  * Telles' rule
+  **/
+  const double eta_bar = singularity[0] * 2. - 1.;
+  const double eta_star = eta_bar * eta_bar - 1.;
+  double gamma_bar;
+
+  std::vector<Point<1> > quadrature_points_dummy(quadrature_points.size());
+  std::vector<double> weights_dummy(weights.size());
+  unsigned int cont = 0;
+  const double tol = 1e-10;
+  for (unsigned int d = 0; d < quadrature_points.size(); ++d)
+    {
+      if (std::abs(quadrature_points[d][0] - singularity[0]) > tol)
+        {
+          quadrature_points_dummy[d-cont] = quadrature_points[d];
+          weights_dummy[d-cont] = weights[d];
+        }
+      else
+        {
+          // We need to remove the singularity point from the quadrature point
+          // list. To do so we use the variable cont.
+          cont = 1;
+        }
+
+    }
+  if (cont == 1)
+    {
+      quadrature_points.resize(quadrature_points_dummy.size()-1);
+      weights.resize(weights_dummy.size()-1);
+      for (unsigned int d = 0; d < quadrature_points.size()-1; ++d)
+        {
+          quadrature_points[d] = quadrature_points_dummy[d];
+          weights[d] = weights_dummy[d];
+        }
+    }
+  // We need to check if the singularity is at the boundary of the interval.
+  if (std::abs(eta_star) <= tol)
+    {
+      gamma_bar = std::pow((eta_bar * eta_star + std::abs(eta_star)),1.0 / 3.0)
+                  + std::pow((eta_bar * eta_star - std::abs(eta_star)), 1.0 / 3.0)
+                  + eta_bar;
+    }
+  else
+    {
+      gamma_bar = (eta_bar * eta_star + std::abs(eta_star))/std::abs(eta_bar * eta_star + std::abs(eta_star))*
+                  std::pow(std::abs(eta_bar * eta_star + std::abs(eta_star)),1.0 / 3.0)
+                  + (eta_bar * eta_star - std::abs(eta_star))/std::abs(eta_bar * eta_star - std::abs(eta_star))*
+                  std::pow(std::abs(eta_bar * eta_star - std::abs(eta_star)), 1.0 / 3.0)
+                  + eta_bar;
+    }
+  for (unsigned int q = 0; q < quadrature_points.size(); ++q)
+    {
+      double gamma = quadrature_points[q][0] * 2 - 1;
+      double eta = (std::pow(gamma - gamma_bar, 3.0)
+                    + gamma_bar * (gamma_bar * gamma_bar + 3))
+                   / (1 + 3 * gamma_bar * gamma_bar);
+
+      double J = 3 * ((gamma - gamma_bar) *(gamma - gamma_bar))
+                 / (1 + 3 * gamma_bar * gamma_bar);
+
+      quadrature_points[q][0] = (eta + 1) / 2.0;
+      weights[q] = J * weights[q];
+
+    }
+}
+
+
+
+template <>
+std::vector<double>
+QGaussChebyshev<1>::get_quadrature_points(const unsigned int n)
+{
+
+  std::vector<double> points(n);
+  // n point quadrature: index from 0 to n-1
+  for (unsigned short i=0; i<n; ++i)
+    // would be cos((2i+1)Pi/(2N+2))
+    // put + Pi so we start from the smallest point
+    // then map from [-1,1] to [0,1]
+    points[i] = 1./2.*(1.+std::cos(numbers::PI*(1.+double(2*i+1)/double(2*(n-1)+2))));
+
+  return points;
+}
+
+
+template <>
+std::vector<double>
+QGaussChebyshev<1>::get_quadrature_weights(const unsigned int n)
+{
+
+  std::vector<double> weights(n);
+
+  for (unsigned short i=0; i<n; ++i)
+    {
+      // same weights as on [-1,1]
+      weights[i] = numbers::PI/double(n);
+    }
+
+  return weights;
+
+}
+
+
+template <>
+QGaussChebyshev<1>::QGaussChebyshev(const unsigned int n)
+  :
+  Quadrature<1> (n)
+{
+
+  Assert(n>0,ExcMessage("Need at least one point for the quadrature rule"));
+  std::vector<double> p=get_quadrature_points(n);
+  std::vector<double> w=get_quadrature_weights(n);
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(p[i]);
+      this->weights[i]           = w[i];
+    }
+
+}
+
+
+template <int dim>
+QGaussChebyshev<dim>::QGaussChebyshev (const unsigned int n)
+  :
+  Quadrature<dim> (QGaussChebyshev<dim-1>(n), QGaussChebyshev<1>(n))
+{}
+
+
+
+
+
+template <>
+std::vector<double>
+QGaussRadauChebyshev<1>::get_quadrature_points(const unsigned int n,
+                                               EndPoint ep)
+{
+
+  std::vector<double> points(n);
+  // n point quadrature: index from 0 to n-1
+  for (unsigned short i=0; i<n; ++i)
+    // would be -cos(2i Pi/(2N+1))
+    // put + Pi so we start from the smallest point
+    // then map from [-1,1] to [0,1]
+    if (ep == QGaussRadauChebyshev::left)
+      points[i] = 1./2.*(1.-std::cos(numbers::PI*(1+2*double(i)/(2*double(n-1)+1.))));
+    else
+      {
+        Assert(ep==QGaussRadauChebyshev::right,ExcInvalidConstructorCall());
+        points[i] = 1./2.*(1.-std::cos(numbers::PI*(2*double(n-1-i)/(2*double(n-1)+1.))));
+      }
+
+  return points;
+}
+
+
+template <>
+std::vector<double>
+QGaussRadauChebyshev<1>::get_quadrature_weights(const unsigned int n,
+                                                EndPoint ep)
+{
+
+  std::vector<double> weights(n);
+
+  for (unsigned short i=0; i<n; ++i)
+    {
+      // same weights as on [-1,1]
+      weights[i] = 2.*numbers::PI/double(2*(n-1)+1.);
+      if (ep==left && i==0)
+        weights[i] /= 2.;
+      else if (ep==right && i==(n-1))
+        weights[i] /= 2.;
+    }
+
+  return weights;
+
+}
+
+
+template <>
+QGaussRadauChebyshev<1>::QGaussRadauChebyshev(const unsigned int n,
+                                              QGaussRadauChebyshev<1>::EndPoint ep)
+  :
+  Quadrature<1> (n),
+  ep (ep)
+{
+
+  Assert(n>0,ExcMessage("Need at least one point for quadrature rules"));
+  std::vector<double> p=get_quadrature_points(n,ep);
+  std::vector<double> w=get_quadrature_weights(n,ep);
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(p[i]);
+      this->weights[i]           = w[i];
+    }
+}
+
+
+template <>
+QGaussRadauChebyshev<2>::QGaussRadauChebyshev (const unsigned int n,
+                                               EndPoint ep)
+  :
+  Quadrature<2> (QGaussRadauChebyshev<1>(n, static_cast<QGaussRadauChebyshev<1>::EndPoint>(ep)),
+                 QGaussRadauChebyshev<1>(n, static_cast<QGaussRadauChebyshev<1>::EndPoint>(ep))),
+  ep (ep)
+{}
+
+
+template <int dim>
+QGaussRadauChebyshev<dim>::QGaussRadauChebyshev (const unsigned int n,
+                                                 EndPoint ep)
+  :
+  Quadrature<dim> (QGaussRadauChebyshev<dim-1>(n,static_cast<typename QGaussRadauChebyshev<dim-1>::EndPoint>(ep)),
+                   QGaussRadauChebyshev<1>(n,static_cast<QGaussRadauChebyshev<1>::EndPoint>(ep))),
+  ep (ep)
+{}
+
+
+template <>
+std::vector<double>
+QGaussLobattoChebyshev<1>::get_quadrature_points(const unsigned int n)
+{
+
+  std::vector<double> points(n);
+  // n point quadrature: index from 0 to n-1
+  for (unsigned short i=0; i<n; ++i)
+    // would be cos(i Pi/N)
+    // put + Pi so we start from the smallest point
+    // then map from [-1,1] to [0,1]
+    points[i] = 1./2.*(1.+std::cos(numbers::PI*(1+double(i)/double(n-1))));
+
+  return points;
+}
+
+
+template <>
+std::vector<double>
+QGaussLobattoChebyshev<1>::get_quadrature_weights(const unsigned int n)
+{
+
+  std::vector<double> weights(n);
+
+  for (unsigned short i=0; i<n; ++i)
+    {
+      // same weights as on [-1,1]
+      weights[i] = numbers::PI/double((n-1));
+      if (i==0 || i==(n-1))
+        weights[i] /= 2.;
+    }
+
+  return weights;
+
+}
+
+
+template <>
+QGaussLobattoChebyshev<1>::QGaussLobattoChebyshev(const unsigned int n)
+  :
+  Quadrature<1> (n)
+{
+
+  Assert(n>1,ExcMessage("Need at least two points for Gauss-Lobatto quadrature rule"));
+  std::vector<double> p=get_quadrature_points(n);
+  std::vector<double> w=get_quadrature_weights(n);
+
+  for (unsigned int i=0; i<this->size(); ++i)
+    {
+      this->quadrature_points[i] = Point<1>(p[i]);
+      this->weights[i]           = w[i];
+    }
+
+}
+
+
+template <int dim>
+QGaussLobattoChebyshev<dim>::QGaussLobattoChebyshev (const unsigned int n)
+  :
+  Quadrature<dim> (QGaussLobattoChebyshev<dim-1>(n), QGaussLobattoChebyshev<1>(n))
+{}
+
+// explicit specialization
+// note that 1d formulae are specialized by implementation above
+template class QGauss<2>;
+template class QGaussLobatto<2>;
+template class QMidpoint<2>;
+template class QTrapez<2>;
+template class QSimpson<2>;
+template class QMilne<2>;
+template class QWeddle<2>;
+
+template class QGauss<3>;
+template class QGaussLobatto<3>;
+template class QMidpoint<3>;
+template class QTrapez<3>;
+template class QSimpson<3>;
+template class QMilne<3>;
+template class QWeddle<3>;
+
+template class QSorted<1>;
+template class QSorted<2>;
+template class QSorted<3>;
+
+template class QTelles<1> ;
+template class QTelles<2> ;
+template class QTelles<3> ;
+
+template class QGaussChebyshev<1>;
+template class QGaussChebyshev<2>;
+template class QGaussChebyshev<3>;
+
+template class QGaussRadauChebyshev<1>;
+template class QGaussRadauChebyshev<2>;
+template class QGaussRadauChebyshev<3>;
+
+template class QGaussLobattoChebyshev<1>;
+template class QGaussLobattoChebyshev<2>;
+template class QGaussLobattoChebyshev<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/quadrature_selector.cc b/source/base/quadrature_selector.cc
new file mode 100644
index 0000000..f503556
--- /dev/null
+++ b/source/base/quadrature_selector.cc
@@ -0,0 +1,79 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/quadrature_selector.h>
+#include <deal.II/base/quadrature_lib.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+Quadrature<dim>
+QuadratureSelector<dim>::
+create_quadrature (const std::string &s,
+                   const unsigned int order)
+{
+  if (s == "gauss")
+    {
+      AssertThrow(order >= 1, ExcInvalidQGaussOrder(order));
+      return QGauss<dim>(order);
+    }
+  else
+    {
+      AssertThrow(order == 0, ExcInvalidOrder(s, order));
+
+      if (s == "midpoint")        return QMidpoint<dim>();
+      else if (s == "milne")      return QMilne<dim>();
+      else if (s == "simpson")    return QSimpson<dim>();
+      else if (s == "trapez")     return QTrapez<dim>();
+      else if (s == "weddle")     return QWeddle<dim>();
+    }
+
+  // we didn't find this name
+  AssertThrow (false, ExcInvalidQuadrature(s));
+  // return something to suppress
+  // stupid warnings by some
+  // compilers
+  return Quadrature<dim>();
+}
+
+
+
+template <int dim>
+QuadratureSelector<dim>::QuadratureSelector (const std::string &s,
+                                             const unsigned int order)
+  :
+  Quadrature<dim> (create_quadrature(s, order).get_points(),
+                   create_quadrature(s, order).get_weights())
+{
+}
+
+
+
+template <int dim>
+std::string
+QuadratureSelector<dim>::get_quadrature_names()
+{
+  return std::string("gauss|midpoint|milne|simpson|trapez|weddle");
+}
+
+
+
+// explicit instantiations
+template class QuadratureSelector<1>;
+template class QuadratureSelector<2>;
+template class QuadratureSelector<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/subscriptor.cc b/source/base/subscriptor.cc
new file mode 100644
index 0000000..c3a6142
--- /dev/null
+++ b/source/base/subscriptor.cc
@@ -0,0 +1,227 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/subscriptor.h>
+#include <deal.II/base/logstream.h>
+
+#include <typeinfo>
+#include <string>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+#ifdef DEBUG
+namespace
+{
+// create a lock that might be used to control subscription to and
+// unsubscription from objects, as that might happen in parallel.
+// since it should happen rather seldom that several threads try to
+// operate on different objects at the same time (the usual case is
+// that they subscribe to the same object right after thread
+// creation), a global lock should be sufficient, rather than one that
+// operates on a per-object base (in which case we would have to
+// include the huge <thread_management.h> file into the
+// <subscriptor.h> file).
+  Threads::Mutex subscription_lock;
+}
+#endif
+
+
+static const char *unknown_subscriber = "unknown subscriber";
+
+
+Subscriptor::Subscriptor ()
+  :
+  counter (0),
+  object_info (0)
+{
+  // this has to go somewhere to avoid an extra warning.
+  (void)unknown_subscriber;
+}
+
+
+
+Subscriptor::Subscriptor (const Subscriptor &)
+  :
+  counter (0),
+  object_info (0)
+{}
+
+
+Subscriptor::~Subscriptor ()
+{
+  // check whether there are still
+  // subscriptions to this object. if
+  // so, output the actual name of
+  // the class to which this object
+  // belongs, i.e. the most derived
+  // class. note that the name may be
+  // mangled, so it need not be the
+  // clear-text class name. however,
+  // you can obtain the latter by
+  // running the c++filt program over
+  // the output.
+#ifdef DEBUG
+
+  // if there are still active pointers, show
+  // a message and kill the program. However,
+  // under some circumstances, this is not so
+  // desirable. For example, in code like this
+  //
+  // Triangulation tria;
+  // DoFHandler *dh = new DoFHandler(tria);
+  // ...some function that throws an exception
+  //
+  // the exception will lead to the
+  // destruction of the triangulation, but
+  // since the dof_handler is on the heap it
+  // will not be destroyed. This will trigger
+  // an assertion in the triangulation. If we
+  // kill the program at this point, we will
+  // never be able to learn what caused the
+  // problem. In this situation, just display
+  // a message and continue the program.
+  if (counter != 0)
+    {
+      if (std::uncaught_exception() == false)
+        {
+          std::string infostring;
+          for (map_iterator it = counter_map.begin(); it != counter_map.end(); ++it)
+            {
+              if (it->second > 0)
+                infostring += std::string("\n  from Subscriber ")
+                              + std::string(it->first);
+            }
+
+          if (infostring == "")
+            infostring = "<none>";
+
+          AssertNothrow (counter == 0,
+                         ExcInUse (counter, object_info->name(), infostring));
+        }
+      else
+        {
+          std::cerr << "---------------------------------------------------------"
+                    << std::endl
+                    << "An object pointed to by a SmartPointer is being destroyed."
+                    << std::endl
+                    << "Under normal circumstances, this would abort the program."
+                    << std::endl
+                    << "However, another exception is being processed at the"
+                    << std::endl
+                    << "moment, so the program will continue to run to allow"
+                    << std::endl
+                    << "this exception to be processed."
+                    << std::endl
+                    << "---------------------------------------------------------"
+                    << std::endl;
+        }
+    }
+  // In case we do not abort
+  // on error, this will tell
+  // do_unsubscribe below that the
+  // object is unused now.
+  counter = 0;
+#endif
+}
+
+
+
+Subscriptor &Subscriptor::operator = (const Subscriptor &s)
+{
+  object_info = s.object_info;
+  return *this;
+}
+
+
+
+void
+Subscriptor::subscribe(const char *id) const
+{
+#ifdef DEBUG
+  if (object_info == 0)
+    object_info = &typeid(*this);
+  Threads::Mutex::ScopedLock lock (subscription_lock);
+  ++counter;
+
+#  ifndef DEAL_II_WITH_THREADS
+  const char *const name = (id != 0) ? id : unknown_subscriber;
+
+  map_iterator it = counter_map.find(name);
+  if (it == counter_map.end())
+    counter_map.insert(map_value_type(name, 1U));
+
+  else
+    it->second++;
+#  else
+  (void)id;
+#  endif
+#else
+  (void)id;
+#endif
+}
+
+
+void
+Subscriptor::unsubscribe(const char *id) const
+{
+#ifdef DEBUG
+  const char *name = (id != 0) ? id : unknown_subscriber;
+  AssertNothrow (counter>0, ExcNoSubscriber(object_info->name(), name));
+  // This is for the case that we do
+  // not abort after the exception
+  if (counter == 0)
+    return;
+
+  Threads::Mutex::ScopedLock lock (subscription_lock);
+  --counter;
+
+#  ifndef DEAL_II_WITH_THREADS
+  map_iterator it = counter_map.find(name);
+  AssertNothrow (it != counter_map.end(), ExcNoSubscriber(object_info->name(), name));
+  AssertNothrow (it->second > 0, ExcNoSubscriber(object_info->name(), name));
+
+  it->second--;
+#  endif
+#else
+  (void)id;
+#endif
+}
+
+
+
+unsigned int Subscriptor::n_subscriptions () const
+{
+  return counter;
+}
+
+
+
+void Subscriptor::list_subscribers () const
+{
+#ifndef DEAL_II_WITH_THREADS
+  for (map_iterator it = counter_map.begin();
+       it != counter_map.end(); ++it)
+    deallog << it->second << '/'
+            << counter << " subscriptions from \""
+            << it->first << '\"' << std::endl;
+#else
+  deallog << "No subscriber listing with multithreading" << std::endl;
+#endif
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/symmetric_tensor.cc b/source/base/symmetric_tensor.cc
new file mode 100644
index 0000000..15542b4
--- /dev/null
+++ b/source/base/symmetric_tensor.cc
@@ -0,0 +1,129 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/symmetric_tensor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <>
+SymmetricTensor<4,3,double>
+invert<3,double> (const SymmetricTensor<4,3,double> &t)
+{
+  SymmetricTensor<4,3,double> tmp = t;
+
+  // this function follows the exact same
+  // scheme as the 2d case, except that
+  // hardcoding the inverse of a 6x6 matrix
+  // is pretty wasteful. instead, we use the
+  // Gauss-Jordan algorithm implemented for
+  // FullMatrix; the following code is copied
+  // from there because using the FullMatrix
+  // class would introduce circular
+  // references between libbase and liblac
+  const unsigned int N = 6;
+
+  // first get an estimate of the
+  // size of the elements of this
+  // matrix, for later checks whether
+  // the pivot element is large
+  // enough, or whether we have to
+  // fear that the matrix is not
+  // regular
+  double diagonal_sum = 0;
+  for (unsigned int i=0; i<N; ++i)
+    diagonal_sum += std::fabs(tmp.data[i][i]);
+  const double typical_diagonal_element = diagonal_sum/N;
+  (void)typical_diagonal_element;
+
+  unsigned int p[N];
+  for (unsigned int i=0; i<N; ++i)
+    p[i] = i;
+
+  for (unsigned int j=0; j<N; ++j)
+    {
+      // pivot search: search that
+      // part of the line on and
+      // right of the diagonal for
+      // the largest element
+      double       max = std::fabs(tmp.data[j][j]);
+      unsigned int r   = j;
+      for (unsigned int i=j+1; i<N; ++i)
+        if (std::fabs(tmp.data[i][j]) > max)
+          {
+            max = std::fabs(tmp.data[i][j]);
+            r = i;
+          }
+      // check whether the pivot is
+      // too small
+      Assert(max > 1.e-16*typical_diagonal_element,
+             ExcMessage("This tensor seems to be noninvertible"));
+
+      // row interchange
+      if (r>j)
+        {
+          for (unsigned int k=0; k<N; ++k)
+            std::swap (tmp.data[j][k], tmp.data[r][k]);
+
+          std::swap (p[j], p[r]);
+        }
+
+      // transformation
+      const double hr = 1./tmp.data[j][j];
+      tmp.data[j][j] = hr;
+      for (unsigned int k=0; k<N; ++k)
+        {
+          if (k==j) continue;
+          for (unsigned int i=0; i<N; ++i)
+            {
+              if (i==j) continue;
+              tmp.data[i][k] -= tmp.data[i][j]*tmp.data[j][k]*hr;
+            }
+        }
+      for (unsigned int i=0; i<N; ++i)
+        {
+          tmp.data[i][j] *= hr;
+          tmp.data[j][i] *= -hr;
+        }
+      tmp.data[j][j] = hr;
+    }
+  // column interchange
+  double hv[N];
+  for (unsigned int i=0; i<N; ++i)
+    {
+      for (unsigned int k=0; k<N; ++k)
+        hv[p[k]] = tmp.data[i][k];
+      for (unsigned int k=0; k<N; ++k)
+        tmp.data[i][k] = hv[k];
+    }
+
+  // scale rows and columns. the mult matrix
+  // here is diag[1, 1, 1, 1/2, 1/2, 1/2]
+  for (unsigned int i=3; i<6; ++i)
+    for (unsigned int j=0; j<3; ++j)
+      tmp.data[i][j] /= 2;
+
+  for (unsigned int i=0; i<3; ++i)
+    for (unsigned int j=3; j<6; ++j)
+      tmp.data[i][j] /= 2;
+
+  for (unsigned int i=3; i<6; ++i)
+    for (unsigned int j=3; j<6; ++j)
+      tmp.data[i][j] /= 4;
+
+  return tmp;
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/table_handler.cc b/source/base/table_handler.cc
new file mode 100644
index 0000000..4f1099b
--- /dev/null
+++ b/source/base/table_handler.cc
@@ -0,0 +1,749 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/table_handler.h>
+#include <deal.II/base/table.h>
+
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*---------------------------------------------------------------------*/
+
+// inline and template functions
+namespace internal
+{
+  TableEntry::TableEntry ()
+  {}
+
+
+  double TableEntry::get_numeric_value () const
+  {
+    // we don't quite know the data type in 'value', but
+    // it must be one of the ones in the type list of the
+    // boost::variant. Go through this list and return
+    // the value if this happens to be a number
+    //
+    // first try with int
+    try
+      {
+        return boost::get<int>(value);
+      }
+    catch (...)
+      {}
+
+
+    // ... then with unsigned int...
+    try
+      {
+        return boost::get<unsigned int>(value);
+      }
+    catch (...)
+      {}
+
+    // ...and finally with double precision:
+    try
+      {
+        return boost::get<double>(value);
+      }
+    catch (...)
+      {
+        Assert (false, ExcMessage ("The number stored by this element of the "
+                                   "table is not a number."))
+      }
+
+    return 0;
+  }
+
+  void TableEntry::cache_string(bool scientific, unsigned int precision) const
+  {
+    std::ostringstream ss;
+
+    ss << std::setprecision(precision);
+
+    if (scientific)
+      ss.setf(std::ios::scientific, std::ios::floatfield);
+    else
+      ss.setf(std::ios::fixed, std::ios::floatfield);
+
+    ss << value;
+
+    cached_value = ss.str();
+    if (cached_value.size()==0)
+      cached_value = "\"\"";
+  }
+
+  const std::string &TableEntry::get_cached_string() const
+  {
+    return cached_value;
+  }
+
+
+  namespace Local
+  {
+    // see which type we can cast to, then use this type to create
+    // a default constructed object
+    struct GetDefaultValue : public boost::static_visitor<>
+    {
+      template <typename T>
+      void operator()( T &operand ) const
+      {
+        operand = T();
+      }
+    };
+  }
+
+  TableEntry TableEntry::get_default_constructed_copy () const
+  {
+    TableEntry new_entry = *this;
+    boost::apply_visitor(Local::GetDefaultValue(), new_entry.value);
+
+    return new_entry;
+  }
+
+
+}
+
+/* ------------------------------------------------ */
+
+TableHandler::Column::Column(const std::string &tex_caption)
+  :
+  tex_caption(tex_caption),
+  tex_format("c"),
+  precision(4),
+  scientific(0),
+  flag(0),
+  max_length(0)
+{}
+
+
+
+TableHandler::Column::Column()
+  :
+  tex_caption(),
+  tex_format("c"),
+  precision(4),
+  scientific(0),
+  flag(0),
+  max_length(0)
+{}
+
+
+
+void
+TableHandler::Column::pad_column_below (const unsigned int size)
+{
+  // we should never have a column that is completely
+  // empty and that needs to be padded
+  Assert (entries.size() > 0, ExcInternalError());
+
+  // add as many elements as necessary
+  while (entries.size() < size)
+    {
+      entries.push_back (entries.back().get_default_constructed_copy());
+      internal::TableEntry &entry = entries.back();
+      entry.cache_string(scientific, precision);
+      max_length = std::max(max_length, static_cast<unsigned int>(entry.get_cached_string().length()));
+    }
+}
+
+
+void
+TableHandler::Column::invalidate_cache()
+{
+  max_length = 0;
+
+  for (std::vector<dealii::internal::TableEntry>::iterator it=entries.begin(); it!=entries.end(); ++it)
+    {
+      it->cache_string(this->scientific, this->precision);
+      max_length = std::max(max_length, static_cast<unsigned int>(it->get_cached_string().length()));
+    }
+}
+
+
+/*---------------------------------------------------------------------*/
+
+
+TableHandler::TableHandler()
+  :
+  auto_fill_mode (false)
+{}
+
+
+
+void
+TableHandler::set_auto_fill_mode (const bool state)
+{
+  auto_fill_mode = state;
+}
+
+
+void TableHandler::add_column_to_supercolumn (const std::string &key,
+                                              const std::string &superkey)
+{
+  Assert(columns.count(key), ExcColumnNotExistent(key));
+
+  if (!supercolumns.count(superkey))
+    {
+      std::pair<std::string, std::vector<std::string> >
+      new_column(superkey, std::vector<std::string>());
+      supercolumns.insert(new_column);
+      // replace key in column_order
+      // by superkey
+      for (unsigned int j=0; j<column_order.size(); ++j)
+        if (column_order[j]==key)
+          {
+            column_order[j]=superkey;
+            break;
+          }
+    }
+  else
+    {
+      // remove key from column_order
+      // for erase we need an iterator
+      for (std::vector<std::string>::iterator order_iter=column_order.begin();
+           order_iter!=column_order.end(); ++order_iter)
+        if (*order_iter==key)
+          {
+            column_order.erase(order_iter);
+            break;
+          }
+    }
+
+  if (supercolumns.count(superkey))
+    {
+      supercolumns[superkey].push_back(key);
+      // By default set the
+      // tex_supercaption to superkey
+      std::pair<std::string, std::string> new_tex_supercaption(superkey, superkey);
+      tex_supercaptions.insert(new_tex_supercaption);
+    }
+  else
+    Assert(false, ExcInternalError());
+}
+
+
+
+void TableHandler::set_column_order (const std::vector<std::string> &new_order)
+{
+  for (unsigned int j=0; j<new_order.size(); ++j)
+    Assert(supercolumns.count(new_order[j]) || columns.count(new_order[j]),
+           ExcColumnOrSuperColumnNotExistent(new_order[j]));
+
+  column_order=new_order;
+}
+
+
+void TableHandler::set_tex_caption (const std::string &key,
+                                    const std::string &tex_caption)
+{
+  Assert(columns.count(key), ExcColumnNotExistent(key));
+  columns[key].tex_caption=tex_caption;
+}
+
+
+
+void TableHandler::set_tex_table_caption (const std::string &table_caption)
+{
+  tex_table_caption=table_caption;
+}
+
+
+
+void TableHandler::set_tex_table_label (const std::string &table_label)
+{
+  tex_table_label=table_label;
+}
+
+
+
+void TableHandler::set_tex_supercaption (const std::string &superkey,
+                                         const std::string &tex_supercaption)
+{
+  Assert(supercolumns.count(superkey), ExcSuperColumnNotExistent(superkey));
+  Assert(tex_supercaptions.count(superkey), ExcInternalError());
+  tex_supercaptions[superkey]=tex_supercaption;
+}
+
+
+
+void TableHandler::set_tex_format (const std::string &key,
+                                   const std::string &tex_format)
+{
+  Assert(columns.count(key), ExcColumnNotExistent(key));
+  Assert(tex_format=="l" || tex_format=="c" || tex_format=="r",
+         ExcUndefinedTexFormat(tex_format));
+  columns[key].tex_format=tex_format;
+}
+
+
+
+void TableHandler::set_precision (const std::string &key,
+                                  const unsigned int precision)
+{
+  Assert(columns.count(key), ExcColumnNotExistent(key));
+  if (columns[key].precision!=precision)
+    {
+      columns[key].precision = precision;
+      columns[key].invalidate_cache();
+    }
+}
+
+
+void TableHandler::set_scientific (const std::string &key,
+                                   const bool scientific)
+{
+  Assert(columns.count(key), ExcColumnNotExistent(key));
+  if (columns[key].scientific!=scientific)
+    {
+      columns[key].scientific = scientific;
+      columns[key].invalidate_cache();
+    }
+}
+
+
+void TableHandler::write_text(std::ostream &out,
+                              const TextOutputFormat format) const
+{
+  AssertThrow (out, ExcIO());
+
+  // first pad the table from below if necessary
+  if (auto_fill_mode == true)
+    {
+      unsigned int max_rows = 0;
+      for (std::map<std::string, Column>::const_iterator p = columns.begin();
+           p != columns.end(); ++p)
+        max_rows = std::max<unsigned int>(max_rows, p->second.entries.size());
+
+      for (std::map<std::string, Column>::iterator p = columns.begin();
+           p != columns.end(); ++p)
+        p->second.pad_column_below (max_rows);
+    }
+
+  std::vector<std::string> sel_columns;
+  get_selected_columns(sel_columns);
+
+  const unsigned int nrows  = n_rows();
+  const unsigned int n_cols = sel_columns.size();
+
+  // cache the columns and compute the widths of each column for alignment
+  std::vector<const Column *> cols;
+  std::vector<unsigned int> column_widths (n_cols, 0);
+  for (unsigned int j=0; j<n_cols; ++j)
+    {
+      std::string key=sel_columns[j];
+      const std::map<std::string, Column>::const_iterator
+      col_iter=columns.find(key);
+      Assert(col_iter!=columns.end(), ExcInternalError());
+      cols.push_back(&(col_iter->second));
+
+      column_widths[j] = col_iter->second.max_length;
+    }
+
+  switch (format)
+    {
+    case org_mode_table:
+    {
+      // write the captions
+      out << "| " << std::left;
+      for (unsigned int j=0; j<n_cols; ++j)
+        {
+          const std::string &key = sel_columns[j];
+          column_widths[j] = std::max(column_widths[j],
+                                      (unsigned int)key.length());
+          out << std::setw(column_widths[j]);
+          out << key << " | ";
+        }
+      out << std::endl;
+
+      // write the body
+      for (unsigned int i=0; i<nrows; ++i)
+        {
+          out << "| ";
+          for (unsigned int j=0; j<n_cols; ++j)
+            {
+              const Column &column=*(cols[j]);
+
+              out << std::setw(column_widths[j]);
+              out << column.entries[i].get_cached_string();
+              out << " | ";
+            }
+          out << '\n';
+        }
+
+      out << std::flush;
+      return;
+    }
+
+    case simple_table_with_separate_column_description:
+    {
+      // write the captions
+      for (unsigned int j=0; j<n_cols; ++j)
+        {
+          const std::string &key = sel_columns[j];
+          out << "# " << j+1 << ": " << key << '\n';
+        }
+
+      // write the body
+      for (unsigned int i=0; i<nrows; ++i)
+        {
+          for (unsigned int j=0; j<n_cols; ++j)
+            {
+              const Column &column=*(cols[j]);
+
+              out << column.entries[i].get_cached_string();
+              out << ' ';
+            }
+          out << '\n';
+        }
+
+      out << std::flush;
+      return;
+    }
+
+    case table_with_separate_column_description:
+    {
+      // writing the captions for table_with_separate_column_description
+      // means that we ignore supercolumns and output the column
+      // header for each column. enumerate columns starting with 1
+      for (unsigned int j=0; j<n_cols; ++j)
+        {
+          std::string key=sel_columns[j];
+          out << "# " << j+1 << ": " << key << '\n';
+        }
+      break;
+    }
+
+    case table_with_headers:
+    {
+      // This format output supercolumn headers and aligns them centered
+      // over all the columns that belong to it.
+      for (unsigned int j=0; j<column_order.size(); ++j)
+        {
+          const std::string &key = column_order[j];
+          unsigned int width=0;
+          {
+            // compute the width of this column or supercolumn
+            const std::map<std::string, std::vector<std::string> >::const_iterator
+            super_iter=supercolumns.find(key);
+            if (super_iter!=supercolumns.end())
+              {
+                const unsigned int n_subcolumns=super_iter->second.size();
+                for (unsigned int k=0; k<n_subcolumns; ++k)
+                  {
+                    const std::map<std::string, Column>::const_iterator
+                    col_iter=columns.find(super_iter->second[k]);
+                    Assert(col_iter!=columns.end(), ExcInternalError());
+
+                    width += col_iter->second.max_length;
+                  }
+                width += n_subcolumns - 1; // separators between subcolumns
+              }
+            else
+              {
+                const std::map<std::string, Column>::const_iterator
+                col_iter=columns.find(key);
+
+                width = col_iter->second.max_length;
+              }
+          }
+
+          // header is longer than the column(s) under it
+          if (width<key.length())
+            {
+              // make the column or the last column in this
+              // supercolumn wide enough
+              std::string colname;
+
+              const std::map<std::string, std::vector<std::string> >::const_iterator
+              super_iter=supercolumns.find(key);
+              if (super_iter!=supercolumns.end())
+                colname = super_iter->second.back();
+              else
+                colname = key;
+
+              // find column and change output width
+              for (unsigned int i=0; i<n_cols; ++i)
+                {
+                  if (sel_columns[i]==colname)
+                    {
+                      column_widths[i] += key.length() - width;
+                      break;
+                    }
+                }
+
+              width=key.length();
+            }
+
+          // now write key. try to center it somehow
+          const unsigned int front_padding = (width-key.length())/2,
+                             rear_padding  = (width-key.length()) -
+                                             front_padding;
+          for (unsigned int i=0; i<front_padding; ++i)
+            out << ' ';
+          out << key;
+          for (unsigned int i=0; i<rear_padding; ++i)
+            out << ' ';
+
+          out << ' ';
+        }
+      out << '\n';
+      break;
+    }
+
+    default:
+      Assert (false, ExcInternalError());
+    }
+
+
+  // finally output the data itself for
+  // table_with_headers or table_with_separate_column_description:
+  for (unsigned int i=0; i<nrows; ++i)
+    {
+      for (unsigned int j=0; j<n_cols; ++j)
+        {
+          const Column &column=*(cols[j]);
+          out << std::setw(column_widths[j]);
+          out << column.entries[i].get_cached_string();
+
+          // pad after this column
+          out << ' ';
+        }
+      out << '\n';
+    }
+  out << std::flush;
+}
+
+
+void TableHandler::write_tex (std::ostream &out, const bool with_header) const
+{
+  //TODO[TH]: update code similar to
+  //write_text() to use the cache
+  AssertThrow (out, ExcIO());
+  if (with_header)
+    out << "\\documentclass[10pt]{report}" << std::endl
+        << "\\usepackage{float}" << std::endl << std::endl << std::endl
+        << "\\begin{document}" << std::endl;
+
+  out << "\\begin{table}[H]" << std::endl
+      << "\\begin{center}" << std::endl
+      << "\\begin{tabular}{|";
+
+  // first pad the table from below if necessary
+  if (auto_fill_mode == true)
+    {
+      unsigned int max_rows = 0;
+      for (std::map<std::string, Column>::const_iterator p = columns.begin();
+           p != columns.end(); ++p)
+        max_rows = std::max<unsigned int>(max_rows, p->second.entries.size());
+
+      for (std::map<std::string, Column>::iterator p = columns.begin();
+           p != columns.end(); ++p)
+        p->second.pad_column_below (max_rows);
+    }
+
+  std::vector<std::string> sel_columns;
+  get_selected_columns(sel_columns);
+
+  // write the column formats
+  for (unsigned int j=0; j<column_order.size(); ++j)
+    {
+      std::string key=column_order[j];
+      // avoid `supercolumns[key]'
+      const std::map<std::string, std::vector<std::string> >::const_iterator
+      super_iter=supercolumns.find(key);
+
+      if (super_iter!=supercolumns.end())
+        {
+          const unsigned int n_subcolumns=super_iter->second.size();
+          for (unsigned int k=0; k<n_subcolumns; ++k)
+            {
+              // avoid `columns[supercolumns[key]]'
+              const std::map<std::string, Column>::const_iterator
+              col_iter=columns.find(super_iter->second[k]);
+              Assert(col_iter!=columns.end(), ExcInternalError());
+
+              out << col_iter->second.tex_format << "|";
+            }
+        }
+      else
+        {
+          // avoid `columns[key]';
+          const std::map<std::string, Column>::const_iterator
+          col_iter=columns.find(key);
+          Assert(col_iter!=columns.end(), ExcInternalError());
+          out << col_iter->second.tex_format << "|";
+        }
+    }
+  out << "} \\hline" << std::endl;
+
+  // write the caption line of the table
+
+  for (unsigned int j=0; j<column_order.size(); ++j)
+    {
+      std::string key=column_order[j];
+      const std::map<std::string, std::vector<std::string> >::const_iterator
+      super_iter=supercolumns.find(key);
+
+      if (super_iter!=supercolumns.end())
+        {
+          const unsigned int n_subcolumns=super_iter->second.size();
+          // avoid use of `tex_supercaptions[key]'
+          std::map<std::string,std::string>::const_iterator
+          tex_super_cap_iter=tex_supercaptions.find(key);
+          out << std::endl << "\\multicolumn{" << n_subcolumns << "}{|c|}{"
+              << tex_super_cap_iter->second << "}";
+        }
+      else
+        {
+          // col_iter->second=columns[col];
+          const std::map<std::string, Column>::const_iterator
+          col_iter=columns.find(key);
+          Assert(col_iter!=columns.end(), ExcInternalError());
+          out << col_iter->second.tex_caption;
+        }
+      if (j<column_order.size()-1)
+        out << " & ";
+    }
+  out << "\\\\ \\hline" << std::endl;
+
+  // write the n rows
+  const unsigned int nrows=n_rows();
+  for (unsigned int i=0; i<nrows; ++i)
+    {
+      const unsigned int n_cols=sel_columns.size();
+
+      for (unsigned int j=0; j<n_cols; ++j)
+        {
+          std::string key=sel_columns[j];
+          // avoid `column[key]'
+          const std::map<std::string, Column>::const_iterator
+          col_iter=columns.find(key);
+          Assert(col_iter!=columns.end(), ExcInternalError());
+
+          const Column &column=col_iter->second;
+
+          out << std::setprecision(column.precision);
+
+          if (col_iter->second.scientific)
+            out.setf(std::ios::scientific, std::ios::floatfield);
+          else
+            out.setf(std::ios::fixed, std::ios::floatfield);
+
+          out << column.entries[i].value;
+
+          if (j<n_cols-1)
+            out << " & ";
+        }
+      out << "\\\\ \\hline" << std::endl;
+    }
+
+  out   << "\\end{tabular}" << std::endl
+        << "\\end{center}" << std::endl;
+  if (tex_table_caption!="")
+    out << "\\caption{"  << tex_table_caption << "}" << std::endl;
+  if (tex_table_label!="")
+    out << "\\label{"   << tex_table_label << "}" << std::endl;
+  out   << "\\end{table}" << std::endl;
+  if (with_header)
+    out << "\\end{document}" << std::endl;
+}
+
+
+void TableHandler::clear()
+{
+
+  columns.clear();
+  supercolumns.clear();
+  column_order.clear();
+  tex_supercaptions.clear();
+
+  tex_table_label.clear();
+  tex_table_caption.clear();
+
+}
+
+
+unsigned int TableHandler::n_rows() const
+{
+  if (columns.size() == 0)
+    return 0;
+
+  std::map<std::string, Column>::const_iterator col_iter = columns.begin();
+  unsigned int n = col_iter->second.entries.size();
+  std::string first_name=col_iter->first;
+
+  for (++col_iter; col_iter!=columns.end(); ++col_iter)
+    Assert(col_iter->second.entries.size()==n,
+           ExcWrongNumberOfDataEntries(col_iter->first,
+                                       col_iter->second.entries.size(),
+                                       first_name, n));
+
+  return n;
+}
+
+
+void TableHandler::get_selected_columns(std::vector<std::string> &sel_columns) const
+{
+  sel_columns.clear();
+
+  for (unsigned int j=0; j<column_order.size(); ++j)
+    {
+      std::string key=column_order[j];
+      const std::map<std::string, std::vector<std::string> >::const_iterator
+      super_iter=supercolumns.find(key);
+
+      if (super_iter!=supercolumns.end())
+        {
+          // i.e. key is a supercolumn key
+          const unsigned int n_subcolumns=super_iter->second.size();
+          for (unsigned int k=0; k<n_subcolumns; ++k)
+            {
+              const std::string subkey=super_iter->second[k];
+              Assert(columns.count(subkey), ExcInternalError());
+              sel_columns.push_back(subkey);
+            }
+        }
+      else
+        {
+          Assert(columns.count(key), ExcInternalError());
+          // i.e. key is a column key
+          sel_columns.push_back(key);
+        }
+    }
+}
+
+
+void TableHandler::clear_current_row ()
+{
+  // Figure out what is the currect (max) length of the columns
+  // so that we "shave" one off.
+  std::vector<internal::TableEntry>::size_type n = 0;
+  for (std::map< std::string, Column >::iterator p = columns.begin(); p != columns.end(); ++p)
+    n = std::max(n, p->second.entries.size());
+
+  // shave the top most element
+  if (n != 0)
+    for (std::map< std::string, Column >::iterator p = columns.begin(); p != columns.end(); ++p)
+      if (p->second.entries.size() == n)
+        p->second.entries.pop_back();
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/tensor_function.cc b/source/base/tensor_function.cc
new file mode 100644
index 0000000..50a6d22
--- /dev/null
+++ b/source/base/tensor_function.cc
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/tensor_function.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// explicit instantiations
+#include "tensor_function.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/tensor_function.inst.in b/source/base/tensor_function.inst.in
new file mode 100644
index 0000000..39a52c9
--- /dev/null
+++ b/source/base/tensor_function.inst.in
@@ -0,0 +1,29 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (S : REAL_SCALARS; rank: RANKS; dim : SPACE_DIMENSIONS)
+{
+  template class TensorFunction<rank, dim, S>;
+  template class ConstantTensorFunction<rank, dim, S>;
+  template class ZeroTensorFunction<rank, dim, S>;
+}
+
+for (S : COMPLEX_SCALARS; rank: RANKS; dim : SPACE_DIMENSIONS)
+{
+  template class TensorFunction<rank, dim, S>;
+  template class ConstantTensorFunction<rank, dim, S>;
+  template class ZeroTensorFunction<rank, dim, S>;
+}
diff --git a/source/base/tensor_product_polynomials.cc b/source/base/tensor_product_polynomials.cc
new file mode 100644
index 0000000..9ee229c
--- /dev/null
+++ b/source/base/tensor_product_polynomials.cc
@@ -0,0 +1,679 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/polynomials_piecewise.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/* ------------------- TensorProductPolynomials -------------- */
+
+
+namespace internal
+{
+  namespace
+  {
+    template <int dim>
+    inline
+    void compute_tensor_index(const unsigned int,
+                              const unsigned int,
+                              const unsigned int,
+                              unsigned int      ( &)[dim])
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+    inline
+    void compute_tensor_index(const unsigned int n,
+                              const unsigned int ,
+                              const unsigned int ,
+                              unsigned int       (&indices)[1])
+    {
+      indices[0] = n;
+    }
+
+    inline
+    void compute_tensor_index(const unsigned int n,
+                              const unsigned int n_pols_0,
+                              const unsigned int ,
+                              unsigned int       (&indices)[2])
+    {
+      indices[0] = n % n_pols_0;
+      indices[1] = n / n_pols_0;
+    }
+
+    inline
+    void compute_tensor_index(const unsigned int n,
+                              const unsigned int n_pols_0,
+                              const unsigned int n_pols_1,
+                              unsigned int       (&indices)[3])
+    {
+      indices[0] = n % n_pols_0;
+      indices[1] = (n/n_pols_0) % n_pols_1;
+      indices[2] = n / (n_pols_0*n_pols_1);
+    }
+  }
+}
+
+
+
+template <int dim, typename PolynomialType>
+inline
+void
+TensorProductPolynomials<dim,PolynomialType>::
+compute_index (const unsigned int i,
+               unsigned int       (&indices)[(dim > 0 ? dim : 1)]) const
+{
+  Assert (i<Utilities::fixed_power<dim>(polynomials.size()),ExcInternalError());
+  internal::compute_tensor_index(index_map[i], polynomials.size(),
+                                 polynomials.size(), indices);
+}
+
+
+
+template <int dim, typename PolynomialType>
+void
+TensorProductPolynomials<dim,PolynomialType>::output_indices(std::ostream &out) const
+{
+  unsigned int ix[dim];
+  for (unsigned int i=0; i<n_tensor_pols; ++i)
+    {
+      compute_index(i,ix);
+      out << i << "\t";
+      for (unsigned int d=0; d<dim; ++d)
+        out << ix[d] << " ";
+      out << std::endl;
+    }
+}
+
+
+
+template <int dim, typename PolynomialType>
+void
+TensorProductPolynomials<dim,PolynomialType>::set_numbering
+(const std::vector<unsigned int> &renumber)
+{
+  Assert(renumber.size()==index_map.size(),
+         ExcDimensionMismatch(renumber.size(), index_map.size()));
+
+  index_map=renumber;
+  for (unsigned int i=0; i<index_map.size(); ++i)
+    index_map_inverse[index_map[i]]=i;
+}
+
+
+
+template <>
+double
+TensorProductPolynomials<0,Polynomials::Polynomial<double> >
+::compute_value(const unsigned int,
+                const Point<0> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return 0;
+}
+
+
+
+template <int dim, typename PolynomialType>
+double
+TensorProductPolynomials<dim,PolynomialType>::compute_value
+(const unsigned int  i,
+ const Point<dim>   &p) const
+{
+  Assert(dim>0, ExcNotImplemented());
+
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  double value=1.;
+  for (unsigned int d=0; d<dim; ++d)
+    value *= polynomials[indices[d]].value(p(d));
+
+  return value;
+}
+
+
+
+template <int dim, typename PolynomialType>
+Tensor<1,dim>
+TensorProductPolynomials<dim,PolynomialType>::compute_grad (const unsigned int  i,
+                                                            const Point<dim>   &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  // compute values and
+  // uni-directional derivatives at
+  // the given point in each
+  // co-ordinate direction
+  double v [dim][2];
+  {
+    std::vector<double> tmp (2);
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        polynomials[indices[d]].value (p(d), tmp);
+        v[d][0] = tmp[0];
+        v[d][1] = tmp[1];
+      }
+  }
+
+  Tensor<1,dim> grad;
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      grad[d] = 1.;
+      for (unsigned int x=0; x<dim; ++x)
+        grad[d] *= v[x][d==x];
+    }
+
+  return grad;
+}
+
+
+
+template <int dim, typename PolynomialType>
+Tensor<2,dim>
+TensorProductPolynomials<dim,PolynomialType>::compute_grad_grad
+(const unsigned int  i,
+ const Point<dim>   &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  double v [dim][3];
+  {
+    std::vector<double> tmp (3);
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        polynomials[indices[d]].value (p(d), tmp);
+        v[d][0] = tmp[0];
+        v[d][1] = tmp[1];
+        v[d][2] = tmp[2];
+      }
+  }
+
+  Tensor<2,dim> grad_grad;
+  for (unsigned int d1=0; d1<dim; ++d1)
+    for (unsigned int d2=0; d2<dim; ++d2)
+      {
+        grad_grad[d1][d2] = 1.;
+        for (unsigned int x=0; x<dim; ++x)
+          {
+            unsigned int derivative=0;
+            if (d1==x || d2==x)
+              {
+                if (d1==d2)
+                  derivative=2;
+                else
+                  derivative=1;
+              }
+            grad_grad[d1][d2] *= v[x][derivative];
+          }
+      }
+
+  return grad_grad;
+}
+
+
+
+
+template <int dim, typename PolynomialType>
+void
+TensorProductPolynomials<dim,PolynomialType>::
+compute (const Point<dim>            &p,
+         std::vector<double>         &values,
+         std::vector<Tensor<1,dim> > &grads,
+         std::vector<Tensor<2,dim> > &grad_grads,
+         std::vector<Tensor<3,dim> > &third_derivatives,
+         std::vector<Tensor<4,dim> > &fourth_derivatives) const
+{
+  Assert (values.size()==n_tensor_pols    || values.size()==0,
+          ExcDimensionMismatch2(values.size(), n_tensor_pols, 0));
+  Assert (grads.size()==n_tensor_pols     || grads.size()==0,
+          ExcDimensionMismatch2(grads.size(), n_tensor_pols, 0));
+  Assert (grad_grads.size()==n_tensor_pols|| grad_grads.size()==0,
+          ExcDimensionMismatch2(grad_grads.size(), n_tensor_pols, 0));
+  Assert (third_derivatives.size()==n_tensor_pols|| third_derivatives.size()==0,
+          ExcDimensionMismatch2(third_derivatives.size(), n_tensor_pols, 0));
+  Assert (fourth_derivatives.size()==n_tensor_pols|| fourth_derivatives.size()==0,
+          ExcDimensionMismatch2(fourth_derivatives.size(), n_tensor_pols, 0));
+
+  const bool update_values     = (values.size() == n_tensor_pols),
+             update_grads      = (grads.size()==n_tensor_pols),
+             update_grad_grads = (grad_grads.size()==n_tensor_pols),
+             update_3rd_derivatives      = (third_derivatives.size()==n_tensor_pols),
+             update_4th_derivatives = (fourth_derivatives.size()==n_tensor_pols);
+
+  // check how many
+  // values/derivatives we have to
+  // compute
+  unsigned int n_values_and_derivatives = 0;
+  if (update_values)
+    n_values_and_derivatives = 1;
+  if (update_grads)
+    n_values_and_derivatives = 2;
+  if (update_grad_grads)
+    n_values_and_derivatives = 3;
+  if (update_3rd_derivatives)
+    n_values_and_derivatives = 4;
+  if (update_4th_derivatives)
+    n_values_and_derivatives = 5;
+
+
+  // compute the values (and derivatives, if
+  // necessary) of all polynomials at this
+  // evaluation point. to avoid many
+  // reallocation, use one std::vector for
+  // polynomial evaluation and store the
+  // result as Tensor<1,5> (that has enough
+  // fields for any evaluation of values and
+  // derivatives, up to the 4th derivative)
+  Table<2,Tensor<1,5> > v(dim, polynomials.size());
+  {
+    std::vector<double> tmp (n_values_and_derivatives);
+    for (unsigned int d=0; d<dim; ++d)
+      for (unsigned int i=0; i<polynomials.size(); ++i)
+        {
+          polynomials[i].value(p(d), tmp);
+          for (unsigned int e=0; e<n_values_and_derivatives; ++e)
+            v(d,i)[e] = tmp[e];
+        };
+  }
+
+  for (unsigned int i=0; i<n_tensor_pols; ++i)
+    {
+      // first get the
+      // one-dimensional indices of
+      // this particular tensor
+      // product polynomial
+      unsigned int indices[dim];
+      compute_index (i, indices);
+
+      if (update_values)
+        {
+          values[i] = 1;
+          for (unsigned int x=0; x<dim; ++x)
+            values[i] *= v(x,indices[x])[0];
+        }
+
+      if (update_grads)
+        for (unsigned int d=0; d<dim; ++d)
+          {
+            grads[i][d] = 1.;
+            for (unsigned int x=0; x<dim; ++x)
+              grads[i][d] *= v(x,indices[x])[d==x];
+          }
+
+      if (update_grad_grads)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            {
+              grad_grads[i][d1][d2] = 1.;
+              for (unsigned int x=0; x<dim; ++x)
+                {
+                  unsigned int derivative=0;
+                  if (d1==x) ++derivative;
+                  if (d2==x) ++derivative;
+
+                  grad_grads[i][d1][d2]
+                  *= v(x,indices[x])[derivative];
+                }
+            }
+
+      if (update_3rd_derivatives)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              {
+                third_derivatives[i][d1][d2][d3] = 1.;
+                for (unsigned int x=0; x<dim; ++x)
+                  {
+                    unsigned int derivative=0;
+                    if (d1==x) ++derivative;
+                    if (d2==x) ++derivative;
+                    if (d3==x) ++derivative;
+
+                    third_derivatives[i][d1][d2][d3]
+                    *= v(x,indices[x])[derivative];
+                  }
+              }
+
+      if (update_4th_derivatives)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              for (unsigned int d4=0; d4<dim; ++d4)
+                {
+                  fourth_derivatives[i][d1][d2][d3][d4] = 1.;
+                  for (unsigned int x=0; x<dim; ++x)
+                    {
+                      unsigned int derivative=0;
+                      if (d1==x) ++derivative;
+                      if (d2==x) ++derivative;
+                      if (d3==x) ++derivative;
+                      if (d4==x) ++derivative;
+
+                      fourth_derivatives[i][d1][d2][d3][d4]
+                      *= v(x,indices[x])[derivative];
+                    }
+                }
+    }
+}
+
+
+
+
+/* ------------------- AnisotropicPolynomials -------------- */
+
+
+template <int dim>
+AnisotropicPolynomials<dim>::
+AnisotropicPolynomials(const std::vector<std::vector<Polynomials::Polynomial<double> > > &pols)
+  :
+  polynomials (pols),
+  n_tensor_pols(get_n_tensor_pols(pols))
+{
+  Assert (pols.size() == dim, ExcDimensionMismatch(pols.size(), dim));
+  for (unsigned int d=0; d<dim; ++d)
+    Assert (pols[d].size() > 0,
+            ExcMessage ("The number of polynomials must be larger than zero "
+                        "for all coordinate directions."));
+}
+
+
+
+
+template <int dim>
+void
+AnisotropicPolynomials<dim>::
+compute_index (const unsigned int i,
+               unsigned int       (&indices)[dim]) const
+{
+#ifdef DEBUG
+  unsigned int n_poly = 1;
+  for (unsigned int d=0; d<dim; ++d)
+    n_poly *= polynomials[d].size();
+  Assert (i < n_poly, ExcInternalError());
+#endif
+
+  if (dim==1)
+    internal::compute_tensor_index(i, polynomials[0].size(),
+                                   0 /*not used*/, indices);
+  else
+    internal::compute_tensor_index(i, polynomials[0].size(),
+                                   polynomials[1].size(), indices);
+}
+
+
+
+template <int dim>
+double
+AnisotropicPolynomials<dim>::compute_value (const unsigned int i,
+                                            const Point<dim> &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  double value=1.;
+  for (unsigned int d=0; d<dim; ++d)
+    value *= polynomials[d][indices[d]].value(p(d));
+
+  return value;
+}
+
+
+template <int dim>
+Tensor<1,dim>
+AnisotropicPolynomials<dim>::compute_grad (const unsigned int i,
+                                           const Point<dim> &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  // compute values and
+  // uni-directional derivatives at
+  // the given point in each
+  // co-ordinate direction
+  std::vector<std::vector<double> > v(dim, std::vector<double> (2));
+  for (unsigned int d=0; d<dim; ++d)
+    polynomials[d][indices[d]].value(p(d), v[d]);
+
+  Tensor<1,dim> grad;
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      grad[d] = 1.;
+      for (unsigned int x=0; x<dim; ++x)
+        grad[d] *= v[x][d==x];
+    }
+
+  return grad;
+}
+
+
+template <int dim>
+Tensor<2,dim>
+AnisotropicPolynomials<dim>::compute_grad_grad (const unsigned int i,
+                                                const Point<dim> &p) const
+{
+  unsigned int indices[dim];
+  compute_index (i, indices);
+
+  std::vector<std::vector<double> > v(dim, std::vector<double> (3));
+  for (unsigned int d=0; d<dim; ++d)
+    polynomials[d][indices[d]].value(p(d), v[d]);
+
+  Tensor<2,dim> grad_grad;
+  for (unsigned int d1=0; d1<dim; ++d1)
+    for (unsigned int d2=0; d2<dim; ++d2)
+      {
+        grad_grad[d1][d2] = 1.;
+        for (unsigned int x=0; x<dim; ++x)
+          {
+            unsigned int derivative=0;
+            if (d1==x || d2==x)
+              {
+                if (d1==d2)
+                  derivative=2;
+                else
+                  derivative=1;
+              }
+            grad_grad[d1][d2] *= v[x][derivative];
+          }
+      }
+
+  return grad_grad;
+}
+
+
+
+
+template <int dim>
+void
+AnisotropicPolynomials<dim>::
+compute (const Point<dim>            &p,
+         std::vector<double>         &values,
+         std::vector<Tensor<1,dim> > &grads,
+         std::vector<Tensor<2,dim> > &grad_grads,
+         std::vector<Tensor<3,dim> > &third_derivatives,
+         std::vector<Tensor<4,dim> > &fourth_derivatives) const
+{
+  Assert (values.size()==n_tensor_pols || values.size()==0,
+          ExcDimensionMismatch2(values.size(), n_tensor_pols, 0));
+  Assert (grads.size()==n_tensor_pols|| grads.size()==0,
+          ExcDimensionMismatch2(grads.size(), n_tensor_pols, 0));
+  Assert (grad_grads.size()==n_tensor_pols|| grad_grads.size()==0,
+          ExcDimensionMismatch2(grad_grads.size(), n_tensor_pols, 0));
+  Assert (third_derivatives.size()==n_tensor_pols|| third_derivatives.size()==0,
+          ExcDimensionMismatch2(third_derivatives.size(), n_tensor_pols, 0));
+  Assert (fourth_derivatives.size()==n_tensor_pols|| fourth_derivatives.size()==0,
+          ExcDimensionMismatch2(fourth_derivatives.size(), n_tensor_pols, 0));
+
+  const bool update_values     = (values.size() == n_tensor_pols),
+             update_grads      = (grads.size()==n_tensor_pols),
+             update_grad_grads = (grad_grads.size()==n_tensor_pols),
+             update_3rd_derivatives      = (third_derivatives.size()==n_tensor_pols),
+             update_4th_derivatives = (fourth_derivatives.size()==n_tensor_pols);
+
+  // check how many
+  // values/derivatives we have to
+  // compute
+  unsigned int n_values_and_derivatives = 0;
+  if (update_values)
+    n_values_and_derivatives = 1;
+  if (update_grads)
+    n_values_and_derivatives = 2;
+  if (update_grad_grads)
+    n_values_and_derivatives = 3;
+  if (update_3rd_derivatives)
+    n_values_and_derivatives = 4;
+  if (update_4th_derivatives)
+    n_values_and_derivatives = 5;
+
+  // compute the values (and
+  // derivatives, if necessary) of
+  // all polynomials at this
+  // evaluation point
+  std::vector<std::vector<std::vector<double> > > v(dim);
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      v[d].resize (polynomials[d].size());
+      for (unsigned int i=0; i<polynomials[d].size(); ++i)
+        {
+          v[d][i].resize (n_values_and_derivatives, 0.);
+          polynomials[d][i].value(p(d), v[d][i]);
+        };
+    }
+
+  for (unsigned int i=0; i<n_tensor_pols; ++i)
+    {
+      // first get the
+      // one-dimensional indices of
+      // this particular tensor
+      // product polynomial
+      unsigned int indices[dim];
+      compute_index (i, indices);
+
+      if (update_values)
+        {
+          values[i] = 1;
+          for (unsigned int x=0; x<dim; ++x)
+            values[i] *= v[x][indices[x]][0];
+        }
+
+      if (update_grads)
+        for (unsigned int d=0; d<dim; ++d)
+          {
+            grads[i][d] = 1.;
+            for (unsigned int x=0; x<dim; ++x)
+              grads[i][d] *= v[x][indices[x]][d==x ? 1 : 0];
+          }
+
+      if (update_grad_grads)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            {
+              grad_grads[i][d1][d2] = 1.;
+              for (unsigned int x=0; x<dim; ++x)
+                {
+                  unsigned int derivative=0;
+                  if (d1==x) ++derivative;
+                  if (d2==x) ++derivative;
+
+                  grad_grads[i][d1][d2]
+                  *= v[x][indices[x]][derivative];
+                }
+            }
+
+      if (update_3rd_derivatives)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              {
+                third_derivatives[i][d1][d2][d3] = 1.;
+                for (unsigned int x=0; x<dim; ++x)
+                  {
+                    unsigned int derivative=0;
+                    if (d1==x) ++derivative;
+                    if (d2==x) ++derivative;
+                    if (d3==x) ++derivative;
+
+                    third_derivatives[i][d1][d2][d3]
+                    *= v[x][indices[x]][derivative];
+                  }
+              }
+
+      if (update_4th_derivatives)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            for (unsigned int d3=0; d3<dim; ++d3)
+              for (unsigned int d4=0; d4<dim; ++d4)
+                {
+                  fourth_derivatives[i][d1][d2][d3][d4] = 1.;
+                  for (unsigned int x=0; x<dim; ++x)
+                    {
+                      unsigned int derivative=0;
+                      if (d1==x) ++derivative;
+                      if (d2==x) ++derivative;
+                      if (d3==x) ++derivative;
+                      if (d4==x) ++derivative;
+
+                      fourth_derivatives[i][d1][d2][d3][d4]
+                      *= v[x][indices[x]][derivative];
+                    }
+                }
+    }
+}
+
+
+
+template<int dim>
+unsigned int
+AnisotropicPolynomials<dim>::n() const
+{
+  return n_tensor_pols;
+}
+
+
+template <int dim>
+unsigned int
+AnisotropicPolynomials<dim>::
+get_n_tensor_pols (const std::vector<std::vector<Polynomials::Polynomial<double> > > &pols)
+{
+  unsigned int y = 1;
+  for (unsigned int d=0; d<dim; ++d)
+    y *= pols[d].size();
+  return y;
+}
+
+
+
+/* ------------------- explicit instantiations -------------- */
+template class TensorProductPolynomials<1,Polynomials::Polynomial<double> >;
+template class TensorProductPolynomials<2,Polynomials::Polynomial<double> >;
+template class TensorProductPolynomials<3,Polynomials::Polynomial<double> >;
+
+template class TensorProductPolynomials<1,Polynomials::PiecewisePolynomial<double> >;
+template class TensorProductPolynomials<2,Polynomials::PiecewisePolynomial<double> >;
+template class TensorProductPolynomials<3,Polynomials::PiecewisePolynomial<double> >;
+
+template class AnisotropicPolynomials<1>;
+template class AnisotropicPolynomials<2>;
+template class AnisotropicPolynomials<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/tensor_product_polynomials_bubbles.cc b/source/base/tensor_product_polynomials_bubbles.cc
new file mode 100644
index 0000000..cb8b7f9
--- /dev/null
+++ b/source/base/tensor_product_polynomials_bubbles.cc
@@ -0,0 +1,294 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/tensor_product_polynomials_bubbles.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/* ------------------- TensorProductPolynomialsBubbles -------------- */
+
+
+template <int dim>
+double
+TensorProductPolynomialsBubbles<dim>::compute_value (const unsigned int i,
+                                                     const Point<dim> &p) const
+{
+  const unsigned int q_degree = this->polynomials.size()-1;
+  const unsigned int max_q_indices = this->n_tensor_pols;
+  const unsigned int n_bubbles = ((q_degree<=1)?1:dim);
+  (void)n_bubbles;
+  Assert (i<max_q_indices+n_bubbles, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_q_indices)
+    return this->TensorProductPolynomials<dim>::compute_value(i,p);
+
+  const unsigned int comp = i - this->n_tensor_pols;
+
+  //compute \prod_{i=1}^d 4*(1-x_i^2)(p)
+  double value=1.;
+  for (unsigned int j=0; j<dim; ++j)
+    value*=4*p(j)*(1-p(j));
+  // and multiply with (2x_i-1)^{r-1}
+  for (unsigned int i=0; i<q_degree-1; ++i)
+    value*=2*p(comp)-1;
+  return value;
+}
+
+
+
+template <>
+double
+TensorProductPolynomialsBubbles<0>::compute_value (const unsigned int ,
+                                                   const Point<0> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return 0.;
+}
+
+
+template <int dim>
+Tensor<1,dim>
+TensorProductPolynomialsBubbles<dim>::compute_grad (const unsigned int i,
+                                                    const Point<dim> &p) const
+{
+  const unsigned int q_degree = this->polynomials.size()-1;
+  const unsigned int max_q_indices = this->n_tensor_pols;
+  const unsigned int n_bubbles = ((q_degree<=1)?1:dim);
+  (void)n_bubbles;
+  Assert (i<max_q_indices+n_bubbles, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_q_indices)
+    return this->TensorProductPolynomials<dim>::compute_grad(i,p);
+
+  const unsigned int comp = i - this->n_tensor_pols;
+  Tensor<1,dim> grad;
+
+  for (unsigned int d=0; d<dim ; ++d)
+    {
+      grad[d] = 1.;
+      //compute grad(4*\prod_{i=1}^d (x_i(1-x_i)))(p)
+      for (unsigned j=0; j<dim; ++j)
+        grad[d] *= (d==j ? 4*(1-2*p(j)) : 4*p(j)*(1-p(j)));
+      // and multiply with (2*x_i-1)^{r-1}
+      for (unsigned int i=0; i<q_degree-1; ++i)
+        grad[d]*=2*p(comp)-1;
+    }
+
+  if (q_degree>=2)
+    {
+      //add \prod_{i=1}^d 4*(x_i(1-x_i))(p)
+      double value=1.;
+      for (unsigned int j=0; j < dim; ++j)
+        value*=4*p(j)*(1-p(j));
+      //and multiply with grad(2*x_i-1)^{r-1}
+      double tmp=value*2*(q_degree-1);
+      for (unsigned int i=0; i<q_degree-2; ++i)
+        tmp*=2*p(comp)-1;
+      grad[comp]+=tmp;
+    }
+
+  return grad;
+}
+
+
+
+template <int dim>
+Tensor<2,dim>
+TensorProductPolynomialsBubbles<dim>::compute_grad_grad (const unsigned int i,
+                                                         const Point<dim> &p) const
+{
+  const unsigned int q_degree = this->polynomials.size()-1;
+  const unsigned int max_q_indices = this->n_tensor_pols;
+  const unsigned int n_bubbles = ((q_degree<=1)?1:dim);
+  (void)n_bubbles;
+  Assert (i<max_q_indices+n_bubbles, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_q_indices)
+    return this->TensorProductPolynomials<dim>::compute_grad_grad(i,p);
+
+  const unsigned int comp = i - this->n_tensor_pols;
+
+  double v [dim+1][3];
+  {
+    for (unsigned int c=0; c<dim; ++c)
+      {
+        v[c][0] = 4*p(c)*(1-p(c));
+        v[c][1] = 4*(1-2*p(c));
+        v[c][2] = -8;
+      }
+
+    double tmp=1.;
+    for (unsigned int i=0; i<q_degree-1; ++i)
+      tmp *= 2*p(comp)-1;
+    v[dim][0] = tmp;
+
+    if (q_degree>=2)
+      {
+        double tmp = 2*(q_degree-1);
+        for (unsigned int i=0; i<q_degree-2; ++i)
+          tmp *= 2*p(comp)-1;
+        v[dim][1] = tmp;
+      }
+    else
+      v[dim][1] = 0.;
+
+    if (q_degree>=3)
+      {
+        double tmp=4*(q_degree-2)*(q_degree-1);
+        for (unsigned int i=0; i<q_degree-3; ++i)
+          tmp *= 2*p(comp)-1;
+        v[dim][2] = tmp;
+      }
+    else
+      v[dim][2] = 0.;
+  }
+
+  //calculate (\partial_j \partial_k \psi) * monomial
+  Tensor<2,dim> grad_grad_1;
+  for (unsigned int d1=0; d1<dim; ++d1)
+    for (unsigned int d2=0; d2<dim; ++d2)
+      {
+        grad_grad_1[d1][d2] = v[dim][0];
+        for (unsigned int x=0; x<dim; ++x)
+          {
+            unsigned int derivative=0;
+            if (d1==x || d2==x)
+              {
+                if (d1==d2)
+                  derivative=2;
+                else
+                  derivative=1;
+              }
+            grad_grad_1[d1][d2] *= v[x][derivative];
+          }
+      }
+
+  //calculate (\partial_j  \psi) *(\partial_k monomial)
+  // and (\partial_k  \psi) *(\partial_j monomial)
+  Tensor<2,dim> grad_grad_2;
+  Tensor<2,dim> grad_grad_3;
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      grad_grad_2[d][comp] = v[dim][1];
+      grad_grad_3[comp][d] = v[dim][1];
+      for (unsigned int x=0; x<dim; ++x)
+        {
+          grad_grad_2[d][comp] *= v[x][d==x];
+          grad_grad_3[comp][d] *= v[x][d==x];
+        }
+    }
+
+  //calculate \psi *(\partial j \partial_k monomial) and sum
+  Tensor<2,dim> grad_grad;
+  double psi_value = 1.;
+  for (unsigned int x=0; x<dim; ++x)
+    psi_value *= v[x][0];
+
+  for (unsigned int d1=0; d1<dim; ++d1)
+    for (unsigned int d2=0; d2<dim; ++d2)
+      grad_grad[d1][d2] = grad_grad_1[d1][d2]
+                          +grad_grad_2[d1][d2]
+                          +grad_grad_3[d1][d2];
+  grad_grad[comp][comp]+=psi_value*v[dim][2];
+
+  return grad_grad;
+}
+
+template <int dim>
+void
+TensorProductPolynomialsBubbles<dim>::
+compute (const Point<dim>            &p,
+         std::vector<double>         &values,
+         std::vector<Tensor<1,dim> > &grads,
+         std::vector<Tensor<2,dim> > &grad_grads,
+         std::vector<Tensor<3,dim> > &third_derivatives,
+         std::vector<Tensor<4,dim> > &fourth_derivatives) const
+{
+  const unsigned int q_degree = this->polynomials.size()-1;
+  const unsigned int max_q_indices = this->n_tensor_pols;
+  (void) max_q_indices;
+  const unsigned int n_bubbles = ((q_degree<=1)?1:dim);
+  Assert (values.size()==max_q_indices+n_bubbles || values.size()==0,
+          ExcDimensionMismatch2(values.size(), max_q_indices+n_bubbles, 0));
+  Assert (grads.size()==max_q_indices+n_bubbles     || grads.size()==0,
+          ExcDimensionMismatch2(grads.size(), max_q_indices+n_bubbles, 0));
+  Assert (grad_grads.size()==max_q_indices+n_bubbles || grad_grads.size()==0,
+          ExcDimensionMismatch2(grad_grads.size(), max_q_indices+n_bubbles, 0));
+  Assert (third_derivatives.size()==max_q_indices+n_bubbles || third_derivatives.size()==0,
+          ExcDimensionMismatch2(third_derivatives.size(), max_q_indices+n_bubbles, 0));
+  Assert (fourth_derivatives.size()==max_q_indices+n_bubbles || fourth_derivatives.size()==0,
+          ExcDimensionMismatch2(fourth_derivatives.size(), max_q_indices+n_bubbles, 0));
+
+  bool do_values = false, do_grads = false, do_grad_grads = false;
+  bool do_3rd_derivatives = false, do_4th_derivatives = false;
+  if (values.empty() == false)
+    {
+      values.resize(this->n_tensor_pols);
+      do_values = true;
+    }
+  if (grads.empty() == false)
+    {
+      grads.resize(this->n_tensor_pols);
+      do_grads = true;
+    }
+  if (grad_grads.empty() == false)
+    {
+      grad_grads.resize(this->n_tensor_pols);
+      do_grad_grads = true;
+    }
+  if (third_derivatives.empty() == false)
+    {
+      third_derivatives.resize(this->n_tensor_pols);
+      do_3rd_derivatives = true;
+    }
+  if (fourth_derivatives.empty() == false)
+    {
+      fourth_derivatives.resize(this->n_tensor_pols);
+      do_4th_derivatives = true;
+    }
+
+  this->TensorProductPolynomials<dim>::compute(p, values, grads, grad_grads, third_derivatives, fourth_derivatives);
+
+  for (unsigned int i=this->n_tensor_pols; i<this->n_tensor_pols+n_bubbles; ++i)
+    {
+      if (do_values)
+        values.push_back(compute_value(i,p));
+      if (do_grads)
+        grads.push_back(compute_grad(i,p));
+      if (do_grad_grads)
+        grad_grads.push_back(compute_grad_grad(i,p));
+      if (do_3rd_derivatives)
+        third_derivatives.push_back(compute_derivative<3>(i,p));
+      if (do_4th_derivatives)
+        fourth_derivatives.push_back(compute_derivative<4>(i,p));
+    }
+}
+
+
+/* ------------------- explicit instantiations -------------- */
+template class TensorProductPolynomialsBubbles<1>;
+template class TensorProductPolynomialsBubbles<2>;
+template class TensorProductPolynomialsBubbles<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/tensor_product_polynomials_const.cc b/source/base/tensor_product_polynomials_const.cc
new file mode 100644
index 0000000..edc01a9
--- /dev/null
+++ b/source/base/tensor_product_polynomials_const.cc
@@ -0,0 +1,160 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/tensor_product_polynomials_const.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/table.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/* ------------------- TensorProductPolynomialsConst -------------- */
+
+
+template <int dim>
+double
+TensorProductPolynomialsConst<dim>::compute_value (const unsigned int i,
+                                                   const Point<dim> &p) const
+{
+  const unsigned int max_indices = this->n_tensor_pols;
+  Assert (i<=max_indices, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_indices)
+    return this->TensorProductPolynomials<dim>::compute_value(i,p);
+  else
+    // this is for the constant function
+    return 1.;
+}
+
+
+
+template <>
+double
+TensorProductPolynomialsConst<0>::compute_value (const unsigned int ,
+                                                 const Point<0> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return 0.;
+}
+
+
+template <int dim>
+Tensor<1,dim>
+TensorProductPolynomialsConst<dim>::compute_grad (const unsigned int i,
+                                                  const Point<dim> &p) const
+{
+  const unsigned int max_indices = this->n_tensor_pols;
+  Assert (i<=max_indices, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_indices)
+    return this->TensorProductPolynomials<dim>::compute_grad(i,p);
+  else
+    // this is for the constant function
+    return Tensor<1,dim>();
+}
+
+template <int dim>
+Tensor<2,dim>
+TensorProductPolynomialsConst<dim>::compute_grad_grad (const unsigned int i,
+                                                       const Point<dim> &p) const
+{
+  const unsigned int max_indices = this->n_tensor_pols;
+  Assert (i<=max_indices, ExcInternalError());
+
+  // treat the regular basis functions
+  if (i<max_indices)
+    return this->TensorProductPolynomials<dim>::compute_grad_grad(i,p);
+  else
+    // this is for the constant function
+    return Tensor<2,dim>();
+}
+
+template <int dim>
+void
+TensorProductPolynomialsConst<dim>::
+compute (const Point<dim>            &p,
+         std::vector<double>         &values,
+         std::vector<Tensor<1,dim> > &grads,
+         std::vector<Tensor<2,dim> > &grad_grads,
+         std::vector<Tensor<3,dim> > &third_derivatives,
+         std::vector<Tensor<4,dim> > &fourth_derivatives) const
+{
+  Assert (values.size()==this->n_tensor_pols+1 || values.size()==0,
+          ExcDimensionMismatch2(values.size(), this->n_tensor_pols+1, 0));
+  Assert (grads.size()==this->n_tensor_pols+1     || grads.size()==0,
+          ExcDimensionMismatch2(grads.size(), this->n_tensor_pols+1, 0));
+  Assert (grad_grads.size()==this->n_tensor_pols+1 || grad_grads.size()==0,
+          ExcDimensionMismatch2(grad_grads.size(), this->n_tensor_pols+1, 0));
+  Assert (third_derivatives.size()==this->n_tensor_pols+1 || third_derivatives.size()==0,
+          ExcDimensionMismatch2(third_derivatives.size(), this->n_tensor_pols+1, 0));
+  Assert (fourth_derivatives.size()==this->n_tensor_pols+1 || fourth_derivatives.size()==0,
+          ExcDimensionMismatch2(fourth_derivatives.size(), this->n_tensor_pols+1, 0));
+
+  // remove slot for const value, go into the base class compute method and
+  // finally append the const value again
+  bool do_values = false, do_grads = false, do_grad_grads = false;
+  bool do_3rd_derivatives = false, do_4th_derivatives = false;
+  if (values.empty() == false)
+    {
+      values.pop_back();
+      do_values = true;
+    }
+  if (grads.empty() == false)
+    {
+      grads.pop_back();
+      do_grads = true;
+    }
+  if (grad_grads.empty() == false)
+    {
+      grad_grads.pop_back();
+      do_grad_grads = true;
+    }
+  if (third_derivatives.empty() == false)
+    {
+      third_derivatives.resize(this->n_tensor_pols);
+      do_3rd_derivatives = true;
+    }
+  if (fourth_derivatives.empty() == false)
+    {
+      fourth_derivatives.resize(this->n_tensor_pols);
+      do_4th_derivatives = true;
+    }
+
+  this->TensorProductPolynomials<dim>::compute(p, values, grads, grad_grads, third_derivatives, fourth_derivatives);
+
+  //for dgq node: values =1, grads=0, grads_grads=0, third_derivatives=0, fourth_derivatives=0
+  if (do_values)
+    values.push_back(1.);
+  if (do_grads)
+    grads.push_back(Tensor<1,dim>());
+  if (do_grad_grads)
+    grad_grads.push_back(Tensor<2,dim>());
+  if (do_3rd_derivatives)
+    third_derivatives.push_back(Tensor<3,dim>());
+  if (do_4th_derivatives)
+    fourth_derivatives.push_back(Tensor<4,dim>());
+}
+
+
+/* ------------------- explicit instantiations -------------- */
+template class TensorProductPolynomialsConst<1>;
+template class TensorProductPolynomialsConst<2>;
+template class TensorProductPolynomialsConst<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/thread_management.cc b/source/base/thread_management.cc
new file mode 100644
index 0000000..c2c0aad
--- /dev/null
+++ b/source/base/thread_management.cc
@@ -0,0 +1,275 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/thread_management.h>
+
+#include <cerrno>
+#include <cstdlib>
+#include <iostream>
+#include <list>
+
+#ifdef DEAL_II_HAVE_UNISTD_H
+#  include <unistd.h>
+#endif
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace Threads
+{
+  namespace internal
+  {
+    // counter and access mutex for the
+    // number of threads
+    volatile unsigned int n_existing_threads_counter = 1;
+    Mutex  n_existing_threads_mutex;
+
+
+    void register_thread ()
+    {
+      Mutex::ScopedLock lock (n_existing_threads_mutex);
+      ++n_existing_threads_counter;
+    }
+
+
+
+    void deregister_thread ()
+    {
+      Mutex::ScopedLock lock (n_existing_threads_mutex);
+      --n_existing_threads_counter;
+      Assert (n_existing_threads_counter >= 1,
+              ExcInternalError());
+    }
+
+
+
+    void handle_std_exception (const std::exception &exc)
+    {
+      // lock the following context
+      // to ensure that we don't
+      // print things over each other
+      // if we have trouble from
+      // multiple threads. release
+      // the lock before calling
+      // std::abort, though
+      static Mutex mutex;
+      {
+        Mutex::ScopedLock lock(mutex);
+
+        std::cerr << std::endl << std::endl
+                  << "---------------------------------------------------------"
+                  << std::endl
+                  << "In one of the sub-threads of this program, an exception\n"
+                  << "was thrown and not caught. Since exceptions do not\n"
+                  << "propagate to the main thread, the library has caught it.\n"
+                  << "The information carried by this exception is given below.\n"
+                  << std::endl
+                  << "---------------------------------------------------------"
+                  << std::endl;
+        std::cerr << "Exception message: " << std::endl
+                  << "  " << exc.what() << std::endl
+                  << "Exception type: " << std::endl
+                  << "  " << typeid(exc).name() << std::endl;
+        std::cerr << "Aborting!" << std::endl
+                  << "---------------------------------------------------------"
+                  << std::endl;
+      }
+
+      std::abort ();
+    }
+
+
+
+    void handle_unknown_exception ()
+    {
+      // lock the following context
+      // to ensure that we don't
+      // print things over each other
+      // if we have trouble from
+      // multiple threads. release
+      // the lock before calling
+      // std::abort, though
+      static Mutex mutex;
+      {
+        Mutex::ScopedLock lock(mutex);
+
+        std::cerr << std::endl << std::endl
+                  << "---------------------------------------------------------"
+                  << std::endl
+                  << "In one of the sub-threads of this program, an exception\n"
+                  << "was thrown and not caught. Since exceptions do not\n"
+                  << "propagate to the main thread, the library has caught it.\n"
+                  << std::endl
+                  << "---------------------------------------------------------"
+                  << std::endl;
+        std::cerr << "Type of exception is unknown, but not std::exception.\n"
+                  << "No additional information is available.\n"
+                  << "---------------------------------------------------------"
+                  << std::endl;
+      }
+      std::abort ();
+    }
+  }
+
+
+
+  unsigned int n_existing_threads ()
+  {
+    Mutex::ScopedLock lock (internal::n_existing_threads_mutex);
+    return internal::n_existing_threads_counter;
+  }
+
+
+  unsigned int this_thread_id ()
+  {
+#ifdef SYS_gettid
+    const pid_t this_id = syscall(SYS_gettid);
+#elif defined(DEAL_II_HAVE_UNISTD_H) && defined(DEAL_II_HAVE_GETPID)
+    const pid_t this_id = getpid();
+#else
+    const unsigned int this_id = 0;
+#endif
+
+    return static_cast<unsigned int>(this_id);
+  }
+
+
+
+#ifndef DEAL_II_WITH_THREADS
+  DummyBarrier::DummyBarrier (const unsigned int  count,
+                              const char *,
+                              void *)
+  {
+    (void)count;
+    Assert (count == 1, ExcBarrierSizeNotUseful(count));
+  }
+
+
+#else
+#  ifdef DEAL_II_USE_MT_POSIX
+
+
+#ifndef DEAL_II_USE_MT_POSIX_NO_BARRIERS
+  PosixThreadBarrier::PosixThreadBarrier (const unsigned int  count,
+                                          const char *,
+                                          void *)
+  {
+    pthread_barrier_init (&barrier, 0, count);
+  }
+
+#else
+
+  PosixThreadBarrier::PosixThreadBarrier (const unsigned int  count,
+                                          const char *,
+                                          void *)
+    : count (count)
+  {
+    // throw an exception unless we
+    // have the special case that a
+    // count of 1 is given, since
+    // then waiting for a barrier is
+    // a no-op, and we don't need the
+    // POSIX functionality
+    AssertThrow (count == 1,
+                 ExcMessage ("Your local POSIX installation does not support\n"
+                             "POSIX barriers. You will not be able to use\n"
+                             "this class, but the rest of the threading\n"
+                             "functionality is available."));
+  }
+#endif
+
+
+
+  PosixThreadBarrier::~PosixThreadBarrier ()
+  {
+#ifndef DEAL_II_USE_MT_POSIX_NO_BARRIERS
+    pthread_barrier_destroy (&barrier);
+#else
+    // unless the barrier is a no-op,
+    // complain again (how did we get
+    // here then?)
+    if (count != 1)
+      std::abort ();
+#endif
+  }
+
+
+
+  int
+  PosixThreadBarrier::wait ()
+  {
+#ifndef DEAL_II_USE_MT_POSIX_NO_BARRIERS
+    return pthread_barrier_wait (&barrier);
+#else
+    // in the special case, this
+    // function is a no-op. otherwise
+    // complain about the missing
+    // POSIX functions
+    if (count == 1)
+      return 0;
+    else
+      {
+        std::abort ();
+        return 1;
+      };
+#endif
+  }
+
+
+
+
+#  endif
+#endif
+
+
+
+  std::vector<std::pair<unsigned int,unsigned int> >
+  split_interval (const unsigned int begin,
+                  const unsigned int end,
+                  const unsigned int n_intervals)
+  {
+    Assert (end >= begin, ExcInternalError());
+
+    const unsigned int n_elements              = end-begin;
+    const unsigned int n_elements_per_interval = n_elements / n_intervals;
+    const unsigned int residual                = n_elements % n_intervals;
+
+    std::vector<std::pair<unsigned int,unsigned int> > return_values (n_intervals);
+
+    return_values[0].first = begin;
+    for (unsigned int i=0; i<n_intervals; ++i)
+      {
+        if (i != n_intervals-1)
+          {
+            return_values[i].second = (return_values[i].first
+                                       + n_elements_per_interval);
+            // distribute residual in
+            // division equally among
+            // the first few
+            // subintervals
+            if (i < residual)
+              ++return_values[i].second;
+            return_values[i+1].first = return_values[i].second;
+          }
+        else
+          return_values[i].second = end;
+      };
+    return return_values;
+  }
+}   // end namespace Thread
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/time_stepping.cc b/source/base/time_stepping.cc
new file mode 100644
index 0000000..a75b30b
--- /dev/null
+++ b/source/base/time_stepping.cc
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/time_stepping.templates.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_parallel_block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+namespace TimeStepping
+{
+#include "time_stepping.inst"
+}
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/time_stepping.inst.in b/source/base/time_stepping.inst.in
new file mode 100644
index 0000000..29d7173
--- /dev/null
+++ b/source/base/time_stepping.inst.in
@@ -0,0 +1,47 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (S : REAL_SCALARS; V : DEAL_II_VEC_TEMPLATES)
+{
+  template class RungeKutta<V<S> >;
+  template class ExplicitRungeKutta<V<S> >;
+  template class ImplicitRungeKutta<V<S> >;
+  template class EmbeddedExplicitRungeKutta<V<S> >;
+}
+
+for (S : REAL_SCALARS; V : DEAL_II_VEC_TEMPLATES)
+{
+  template class RungeKutta<parallel::distributed::V<S> >;
+  template class ExplicitRungeKutta<parallel::distributed::V<S> >;
+  template class ImplicitRungeKutta<parallel::distributed::V<S> >;
+  template class EmbeddedExplicitRungeKutta<parallel::distributed::V<S> >;
+}
+
+for (V : EXTERNAL_SEQUENTIAL_VECTORS)
+{
+  template class RungeKutta<V>;
+  template class ExplicitRungeKutta<V>;
+  template class ImplicitRungeKutta<V>;
+  template class EmbeddedExplicitRungeKutta<V>;
+}
+
+for (V : EXTERNAL_PARALLEL_VECTORS)
+{
+  template class RungeKutta<V>;
+  template class ExplicitRungeKutta<V>;
+  template class ImplicitRungeKutta<V>;
+  template class EmbeddedExplicitRungeKutta<V>;
+}
diff --git a/source/base/timer.cc b/source/base/timer.cc
new file mode 100644
index 0000000..1e187d6
--- /dev/null
+++ b/source/base/timer.cc
@@ -0,0 +1,610 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/timer.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/utilities.h>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include <algorithm>
+#include <stddef.h>
+
+#if defined(DEAL_II_HAVE_SYS_TIME_H) && defined(DEAL_II_HAVE_SYS_RESOURCE_H)
+#  include <sys/time.h>
+#  include <sys/resource.h>
+#endif
+
+#ifdef DEAL_II_MSVC
+#  include <windows.h>
+#endif
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+// in case we use an MPI compiler, need
+// to create a communicator just for the
+// current process
+Timer::Timer()
+  :
+  cumulative_time (0.),
+  cumulative_wall_time (0.)
+#ifdef DEAL_II_WITH_MPI
+  , mpi_communicator (MPI_COMM_SELF)
+  , sync_wall_time (false)
+#endif
+{
+  start();
+}
+
+
+
+// in case we use an MPI compiler, use
+// the communicator given from input
+#ifdef DEAL_II_WITH_MPI
+Timer::Timer(MPI_Comm mpi_communicator,
+             bool sync_wall_time_)
+  :
+  cumulative_time (0.),
+  cumulative_wall_time (0.),
+  mpi_communicator (mpi_communicator),
+  sync_wall_time(sync_wall_time_)
+{
+  start();
+}
+#endif
+
+#ifdef DEAL_II_MSVC
+
+namespace
+{
+  namespace windows
+  {
+    double wall_clock()
+    {
+      LARGE_INTEGER freq, time;
+      QueryPerformanceFrequency(&freq);
+      QueryPerformanceCounter(&time);
+      return (double) time.QuadPart / freq.QuadPart;
+    }
+
+
+    double cpu_clock()
+    {
+      FILETIME cpuTime, sysTime, createTime, exitTime;
+      if (GetProcessTimes(GetCurrentProcess(),  &createTime,
+                          &exitTime, &sysTime, &cpuTime))
+        {
+          return (double)(((unsigned long long)cpuTime.dwHighDateTime << 32)
+                          | cpuTime.dwLowDateTime) / 1e6;
+        }
+      return 0;
+    }
+  }
+}
+
+#endif
+
+
+void Timer::start ()
+{
+  running    = true;
+
+#ifdef DEAL_II_WITH_MPI
+  if (sync_wall_time)
+    MPI_Barrier(mpi_communicator);
+#endif
+
+#if defined(DEAL_II_HAVE_SYS_TIME_H) && defined(DEAL_II_HAVE_SYS_RESOURCE_H)
+
+//TODO: Break this out into a function like the functions in
+//namespace windows above
+  struct timeval wall_timer;
+  gettimeofday(&wall_timer, NULL);
+  start_wall_time = wall_timer.tv_sec + 1.e-6 * wall_timer.tv_usec;
+
+  rusage usage;
+  getrusage (RUSAGE_SELF, &usage);
+  start_time = usage.ru_utime.tv_sec + 1.e-6 * usage.ru_utime.tv_usec;
+
+  rusage usage_children;
+  getrusage (RUSAGE_CHILDREN, &usage_children);
+  start_time_children = usage_children.ru_utime.tv_sec + 1.e-6 * usage_children.ru_utime.tv_usec;
+
+#elif defined(DEAL_II_MSVC)
+  start_wall_time = windows::wall_clock();
+  start_time = windows::cpu_clock();
+  start_time_children = start_time;
+#else
+#  error Unsupported platform. Porting not finished.
+#endif
+}
+
+
+
+double Timer::stop ()
+{
+  if (running)
+    {
+      running = false;
+
+#if defined(DEAL_II_HAVE_SYS_TIME_H) && defined(DEAL_II_HAVE_SYS_RESOURCE_H)
+//TODO: Break this out into a function like the functions in
+//namespace windows above
+      rusage usage;
+      getrusage (RUSAGE_SELF, &usage);
+      const double dtime = usage.ru_utime.tv_sec + 1.e-6 * usage.ru_utime.tv_usec;
+      cumulative_time += dtime - start_time;
+
+      rusage usage_children;
+      getrusage (RUSAGE_CHILDREN, &usage_children);
+      const double dtime_children =
+        usage_children.ru_utime.tv_sec + 1.e-6 * usage_children.ru_utime.tv_usec;
+      cumulative_time += dtime_children - start_time_children;
+
+      struct timeval wall_timer;
+      gettimeofday(&wall_timer, NULL);
+      last_lap_time = wall_timer.tv_sec + 1.e-6 * wall_timer.tv_usec
+                      - start_wall_time;
+#elif defined(DEAL_II_MSVC)
+      last_lap_time = windows::wall_clock() - start_wall_time;
+      cumulative_time += windows::cpu_clock() - start_time;
+#else
+#  error Unsupported platform. Porting not finished.
+#endif
+
+#ifdef DEAL_II_WITH_MPI
+      if (sync_wall_time && Utilities::MPI::job_supports_mpi())
+        {
+          this->mpi_data
+            = Utilities::MPI::min_max_avg (last_lap_time, mpi_communicator);
+          last_lap_time = this->mpi_data.max;
+          cumulative_wall_time += last_lap_time;
+        }
+      else
+#endif
+        cumulative_wall_time += last_lap_time;
+    }
+  return cumulative_time;
+}
+
+
+
+double Timer::get_lap_time() const
+{
+  // time already has the difference between the last start()/stop() cycle.
+  return Utilities::MPI::max (last_lap_time, mpi_communicator);
+}
+
+
+
+double Timer::operator() () const
+{
+  if (running)
+    {
+#if defined(DEAL_II_HAVE_SYS_TIME_H) && defined(DEAL_II_HAVE_SYS_RESOURCE_H)
+      rusage usage;
+      getrusage (RUSAGE_SELF, &usage);
+      const double dtime =  usage.ru_utime.tv_sec + 1.e-6 * usage.ru_utime.tv_usec;
+
+      rusage usage_children;
+      getrusage (RUSAGE_CHILDREN, &usage_children);
+      const double dtime_children =
+        usage_children.ru_utime.tv_sec + 1.e-6 * usage_children.ru_utime.tv_usec;
+
+      const double running_time = dtime - start_time + dtime_children
+                                  - start_time_children + cumulative_time;
+
+      // in case of MPI, need to get the time passed by summing the time over
+      // all processes in the network. works also in case we just want to have
+      // the time of a single thread, since then the communicator is
+      // MPI_COMM_SELF
+      return Utilities::MPI::sum (running_time, mpi_communicator);
+
+#elif defined(DEAL_II_MSVC)
+      const double running_time = windows::cpu_clock() - start_time + cumulative_time;
+      return running_time;
+#else
+#  error Unsupported platform. Porting not finished.
+#endif
+    }
+  else
+    {
+      return Utilities::MPI::sum (cumulative_time, mpi_communicator);
+    }
+}
+
+
+
+double Timer::wall_time () const
+{
+  if (running)
+    {
+#if defined(DEAL_II_HAVE_SYS_TIME_H) && defined(DEAL_II_HAVE_SYS_RESOURCE_H)
+      struct timeval wall_timer;
+      gettimeofday(&wall_timer, NULL);
+      return (wall_timer.tv_sec
+              + 1.e-6 * wall_timer.tv_usec
+              - start_wall_time
+              + cumulative_wall_time);
+#else
+//TODO[BG]: Do something useful here
+      return 0;
+#endif
+    }
+  else
+    return cumulative_wall_time;
+}
+
+
+
+void Timer::reset ()
+{
+  last_lap_time = 0.;
+  cumulative_time = 0.;
+  cumulative_wall_time = 0.;
+  running         = false;
+}
+
+
+
+/* ---------------------------- TimerOutput -------------------------- */
+
+TimerOutput::TimerOutput (std::ostream &stream,
+                          const enum OutputFrequency output_frequency,
+                          const enum OutputType output_type)
+  :
+  output_frequency (output_frequency),
+  output_type (output_type),
+  out_stream (stream, true),
+  output_is_enabled (true)
+#ifdef DEAL_II_WITH_MPI
+  , mpi_communicator (MPI_COMM_SELF)
+#endif
+{}
+
+
+
+TimerOutput::TimerOutput (ConditionalOStream &stream,
+                          const enum OutputFrequency output_frequency,
+                          const enum OutputType output_type)
+  :
+  output_frequency (output_frequency),
+  output_type (output_type),
+  out_stream (stream),
+  output_is_enabled (true)
+#ifdef DEAL_II_WITH_MPI
+  , mpi_communicator (MPI_COMM_SELF)
+#endif
+{}
+
+
+#ifdef DEAL_II_WITH_MPI
+
+TimerOutput::TimerOutput (MPI_Comm      mpi_communicator,
+                          std::ostream &stream,
+                          const enum OutputFrequency output_frequency,
+                          const enum OutputType output_type)
+  :
+  output_frequency (output_frequency),
+  output_type (output_type),
+  out_stream (stream, true),
+  output_is_enabled (true),
+  mpi_communicator (mpi_communicator)
+{}
+
+
+
+TimerOutput::TimerOutput (MPI_Comm      mpi_communicator,
+                          ConditionalOStream &stream,
+                          const enum OutputFrequency output_frequency,
+                          const enum OutputType output_type)
+  :
+  output_frequency (output_frequency),
+  output_type (output_type),
+  out_stream (stream),
+  output_is_enabled (true),
+  mpi_communicator (mpi_communicator)
+{}
+
+#endif
+
+
+TimerOutput::~TimerOutput()
+{
+  while (active_sections.size() > 0)
+    leave_subsection();
+
+  if ( (output_frequency == summary || output_frequency == every_call_and_summary)
+       && output_is_enabled == true)
+    print_summary();
+}
+
+
+
+void
+TimerOutput::enter_subsection (const std::string &section_name)
+{
+  Threads::Mutex::ScopedLock lock (mutex);
+
+  Assert (section_name.empty() == false,
+          ExcMessage ("Section string is empty."));
+
+  Assert (std::find (active_sections.begin(), active_sections.end(),
+                     section_name) == active_sections.end(),
+          ExcMessage (std::string("Cannot enter the already active section <")
+                      + section_name + ">."));
+
+  if (sections.find (section_name) == sections.end())
+    {
+#ifdef DEAL_II_WITH_MPI
+      if (mpi_communicator != MPI_COMM_SELF)
+        {
+          // create a new timer for this section. the second argument
+          // will ensure that we have an MPI barrier before starting
+          // and stopping a timer, and this ensures that we get the
+          // maximum run time for this section over all processors.
+          // The mpi_communicator from TimerOutput is passed to the
+          // Timer here, so this Timer will collect timing information
+          // among all processes inside mpi_communicator.
+          sections[section_name].timer = Timer(mpi_communicator, true);
+        }
+#endif
+
+
+      sections[section_name].total_cpu_time = 0;
+      sections[section_name].total_wall_time = 0;
+      sections[section_name].n_calls = 0;
+    }
+
+  sections[section_name].timer.reset();
+  sections[section_name].timer.start();
+  sections[section_name].n_calls++;
+
+  active_sections.push_back (section_name);
+}
+
+
+
+void
+TimerOutput::leave_subsection (const std::string &section_name)
+{
+  Assert (!active_sections.empty(),
+          ExcMessage("Cannot exit any section because none has been entered!"));
+
+  Threads::Mutex::ScopedLock lock (mutex);
+
+  if (section_name != "")
+    {
+      Assert (sections.find (section_name) != sections.end(),
+              ExcMessage ("Cannot delete a section that was never created."));
+      Assert (std::find (active_sections.begin(), active_sections.end(),
+                         section_name) != active_sections.end(),
+              ExcMessage ("Cannot delete a section that has not been entered."));
+    }
+
+  // if no string is given, exit the last
+  // active section.
+  const std::string actual_section_name = (section_name == "" ?
+                                           active_sections.back () :
+                                           section_name);
+
+  sections[actual_section_name].timer.stop();
+  sections[actual_section_name].total_wall_time
+  += sections[actual_section_name].timer.wall_time();
+
+  // Get cpu time. On MPI systems, if constructed with an mpi_communicator
+  // like MPI_COMM_WORLD, then the Timer will sum up the CPU time between
+  // processors among the provided mpi_communicator. Therefore, no
+  // communication is needed here.
+  const double cpu_time = sections[actual_section_name].timer();
+  sections[actual_section_name].total_cpu_time += cpu_time;
+
+  // in case we have to print out something, do that here...
+  if ((output_frequency == every_call || output_frequency == every_call_and_summary)
+      && output_is_enabled == true)
+    {
+      std::string output_time;
+      std::ostringstream cpu;
+      cpu << cpu_time << "s";
+      std::ostringstream wall;
+      wall << sections[actual_section_name].timer.wall_time() << "s";
+      if (output_type == cpu_times)
+        output_time = ", CPU time: " + cpu.str();
+      else if (output_type == wall_times)
+        output_time = ", wall time: " + wall.str() + ".";
+      else
+        output_time = ", CPU/wall time: " + cpu.str() + " / " + wall.str() + ".";
+
+      out_stream << actual_section_name << output_time
+                 << std::endl;
+    }
+
+  // delete the index from the list of
+  // active ones
+  active_sections.erase (std::find (active_sections.begin(), active_sections.end(),
+                                    actual_section_name));
+}
+
+
+
+void
+TimerOutput::print_summary () const
+{
+  // we are going to change the
+  // precision and width of output
+  // below. store the old values so we
+  // can restore it later on
+  const std::istream::fmtflags old_flags = out_stream.get_stream().flags();
+  const std::streamsize    old_precision = out_stream.get_stream().precision ();
+  const std::streamsize    old_width     = out_stream.get_stream().width ();
+
+  // in case we want to write CPU times
+  if (output_type != wall_times)
+    {
+      double total_cpu_time = Utilities::MPI::sum(timer_all(), mpi_communicator);
+
+      // check that the sum of all times is
+      // less or equal than the total
+      // time. otherwise, we might have
+      // generated a lot of overhead in this
+      // function.
+      double check_time = 0.;
+      for (std::map<std::string, Section>::const_iterator
+           i = sections.begin(); i!=sections.end(); ++i)
+        check_time += i->second.total_cpu_time;
+
+      const double time_gap = check_time-total_cpu_time;
+      if (time_gap > 0.0)
+        total_cpu_time = check_time;
+
+      // generate a nice table
+      out_stream << "\n\n"
+                 << "+---------------------------------------------+------------"
+                 << "+------------+\n"
+                 << "| Total CPU time elapsed since start          |";
+      out_stream << std::setw(10) << std::setprecision(3) << std::right;
+      out_stream << total_cpu_time << "s |            |\n";
+      out_stream << "|                                             |            "
+                 << "|            |\n";
+      out_stream << "| Section                         | no. calls |";
+      out_stream << std::setw(10);
+      out_stream << std::setprecision(3);
+      out_stream << "  CPU time "  << " | % of total |\n";
+      out_stream << "+---------------------------------+-----------+------------"
+                 << "+------------+";
+      for (std::map<std::string, Section>::const_iterator
+           i = sections.begin(); i!=sections.end(); ++i)
+        {
+          std::string name_out = i->first;
+
+          // resize the array so that it is always
+          // of the same size
+          unsigned int pos_non_space = name_out.find_first_not_of (" ");
+          name_out.erase(0, pos_non_space);
+          name_out.resize (32, ' ');
+          out_stream << std::endl;
+          out_stream << "| " << name_out;
+          out_stream << "| ";
+          out_stream << std::setw(9);
+          out_stream << i->second.n_calls << " |";
+          out_stream << std::setw(10);
+          out_stream << std::setprecision(3);
+          out_stream << i->second.total_cpu_time << "s |";
+          out_stream << std::setw(10);
+          out_stream << std::setprecision(2);
+          if (total_cpu_time != 0)
+            out_stream << i->second.total_cpu_time/total_cpu_time * 100 << "% |";
+          else
+            out_stream << 0.0 << "% |";
+        }
+      out_stream << std::endl
+                 << "+---------------------------------+-----------+"
+                 << "------------+------------+\n"
+                 << std::endl;
+
+      if (time_gap > 0.0)
+        out_stream << std::endl
+                   << "Note: The sum of counted times is " << time_gap
+                   << " seconds larger than the total time.\n"
+                   << "(Timer function may have introduced too much overhead, or different\n"
+                   << "section timers may have run at the same time.)" << std::endl;
+    }
+
+  // in case we want to write out wallclock times
+  if (output_type != cpu_times)
+    {
+      double total_wall_time = timer_all.wall_time();
+
+      // now generate a nice table
+      out_stream << "\n\n"
+                 << "+---------------------------------------------+------------"
+                 << "+------------+\n"
+                 << "| Total wallclock time elapsed since start    |";
+      out_stream << std::setw(10) << std::setprecision(3) << std::right;
+      out_stream << total_wall_time << "s |            |\n";
+      out_stream << "|                                             |            "
+                 << "|            |\n";
+      out_stream << "| Section                         | no. calls |";
+      out_stream << std::setw(10);
+      out_stream << std::setprecision(3);
+      out_stream << "  wall time | % of total |\n";
+      out_stream << "+---------------------------------+-----------+------------"
+                 << "+------------+";
+      for (std::map<std::string, Section>::const_iterator
+           i = sections.begin(); i!=sections.end(); ++i)
+        {
+          std::string name_out = i->first;
+
+          // resize the array so that it is always
+          // of the same size
+          unsigned int pos_non_space = name_out.find_first_not_of (" ");
+          name_out.erase(0, pos_non_space);
+          name_out.resize (32, ' ');
+          out_stream << std::endl;
+          out_stream << "| " << name_out;
+          out_stream << "| ";
+          out_stream << std::setw(9);
+          out_stream << i->second.n_calls << " |";
+          out_stream << std::setw(10);
+          out_stream << std::setprecision(3);
+          out_stream << i->second.total_wall_time << "s |";
+          out_stream << std::setw(10);
+          out_stream << std::setprecision(2);
+          double value = i->second.total_wall_time/total_wall_time * 100;
+          if (!numbers::is_finite(value))
+            value = 0.0;
+          out_stream << value << "% |";
+        }
+      out_stream << std::endl
+                 << "+---------------------------------+-----------+"
+                 << "------------+------------+\n"
+                 << std::endl;
+    }
+
+  // restore previous precision and width
+  out_stream.get_stream().precision (old_precision);
+  out_stream.get_stream().width (old_width);
+  out_stream.get_stream().flags (old_flags);
+}
+
+
+
+void
+TimerOutput::disable_output ()
+{
+  output_is_enabled = false;
+}
+
+
+
+void
+TimerOutput::enable_output ()
+{
+  output_is_enabled = true;
+}
+
+void
+TimerOutput::reset ()
+{
+  Threads::Mutex::ScopedLock lock (mutex);
+  sections.clear();
+  active_sections.clear();
+  timer_all.restart();
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/base/utilities.cc b/source/base/utilities.cc
new file mode 100644
index 0000000..a8a9fe9
--- /dev/null
+++ b/source/base/utilities.cc
@@ -0,0 +1,849 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/thread_local_storage.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/random.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <cmath>
+#include <cstddef>
+#include <cstdio>
+#include <ctime>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+#include <sstream>
+
+#ifndef DEAL_II_MSVC
+#  include <stdlib.h>
+#endif
+
+#ifdef DEAL_II_MSVC
+#  include <winsock2.h>
+#endif
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  ifdef DEAL_II_WITH_MPI
+#    include <Epetra_MpiComm.h>
+#    include <deal.II/lac/vector_memory.h>
+#    include <deal.II/lac/trilinos_vector.h>
+#    include <deal.II/lac/trilinos_block_vector.h>
+#  endif
+#  include "Teuchos_RCP.hpp"
+#  include "Epetra_SerialComm.h"
+#endif
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace Utilities
+{
+
+
+  DeclException2 (ExcInvalidNumber2StringConversersion,
+                  unsigned int, unsigned int,
+                  << "When trying to convert " << arg1
+                  << " to a string with " << arg2 << " digits");
+  DeclException1 (ExcInvalidNumber,
+                  unsigned int,
+                  << "Invalid number " << arg1);
+  DeclException1 (ExcCantConvertString,
+                  std::string,
+                  << "Can't convert the string " << arg1
+                  << " to the desired type");
+
+  std::string
+  int_to_string (const unsigned int value, const unsigned int digits)
+  {
+    return to_string(value,digits);
+  }
+
+  template <typename number>
+  std::string
+  to_string (const number value, const unsigned int digits)
+  {
+    std::string lc_string = boost::lexical_cast<std::string>(value);
+
+    if (digits == numbers::invalid_unsigned_int)
+      return lc_string;
+    else if (lc_string.size() < digits)
+      {
+        // We have to add the padding zeroes in front of the number
+        const unsigned int padding_position = (lc_string[0] == '-')
+                                              ?
+                                              1
+                                              :
+                                              0;
+
+        const std::string padding(digits - lc_string.size(), '0');
+        lc_string.insert(padding_position, padding);
+      }
+    return lc_string;
+  }
+
+
+  std::string
+  replace_in_string(const std::string &input,
+                    const std::string &from,
+                    const std::string &to)
+  {
+    if (from.empty())
+      return input;
+
+    std::string out = input;
+    std::string::size_type pos = out.find(from);
+
+    while (pos != std::string::npos)
+      {
+        out.replace(pos, from.size(), to);
+        pos = out.find(from, pos + to.size());
+      }
+    return out;
+  }
+
+  std::string
+  trim(const std::string &input)
+  {
+    std::string::size_type left = 0;
+    std::string::size_type right = input.size() - 1;
+
+    for (; left < input.size(); ++left)
+      {
+        if (!std::isspace(input[left]))
+          {
+            break;
+          }
+      }
+
+    for (; right >= left; --right)
+      {
+        if (!std::isspace(input[right]))
+          {
+            break;
+          }
+      }
+
+    return std::string(input, left, right - left + 1);
+  }
+
+
+
+  std::string
+  dim_string(const int dim, const int spacedim)
+  {
+    if (dim == spacedim)
+      return int_to_string(dim);
+    else
+      return int_to_string(dim)+","+int_to_string(spacedim);
+  }
+
+
+  unsigned int
+  needed_digits (const unsigned int max_number)
+  {
+    if (max_number < 10)
+      return 1;
+    if (max_number < 100)
+      return 2;
+    if (max_number < 1000)
+      return 3;
+    if (max_number < 10000)
+      return 4;
+    if (max_number < 100000)
+      return 5;
+    if (max_number < 1000000)
+      return 6;
+    AssertThrow (false, ExcInvalidNumber(max_number));
+    return 0;
+  }
+
+
+
+  int
+  string_to_int (const std::string &s_)
+  {
+    // trim whitespace on either side of the text if necessary
+    std::string s = s_;
+    while ((s.size() > 0) && (s[0] == ' '))
+      s.erase (s.begin());
+    while ((s.size() > 0) && (s[s.size()-1] == ' '))
+      s.erase (s.end()-1);
+
+    // now convert and see whether we succeed. note that strtol only
+    // touches errno if an error occurred, so if we want to check
+    // whether an error happened, we need to make sure that errno==0
+    // before calling strtol since otherwise it may be that the
+    // conversion succeeds and that errno remains at the value it
+    // was before, whatever that was
+    char *p;
+    errno = 0;
+    const int i = std::strtol(s.c_str(), &p, 10);
+    AssertThrow ( !((errno != 0) || (s.size() == 0) || ((s.size()>0) && (*p != '\0'))),
+                  ExcMessage ("Can't convert <" + s + "> to an integer."));
+
+    return i;
+  }
+
+
+
+  std::vector<int>
+  string_to_int (const std::vector<std::string> &s)
+  {
+    std::vector<int> tmp (s.size());
+    for (unsigned int i=0; i<s.size(); ++i)
+      tmp[i] = string_to_int (s[i]);
+    return tmp;
+  }
+
+
+
+  double
+  string_to_double (const std::string &s_)
+  {
+    // trim whitespace on either side of the text if necessary
+    std::string s = s_;
+    while ((s.size() > 0) && (s[0] == ' '))
+      s.erase (s.begin());
+    while ((s.size() > 0) && (s[s.size()-1] == ' '))
+      s.erase (s.end()-1);
+
+    // now convert and see whether we succeed. note that strtol only
+    // touches errno if an error occurred, so if we want to check
+    // whether an error happened, we need to make sure that errno==0
+    // before calling strtol since otherwise it may be that the
+    // conversion succeeds and that errno remains at the value it
+    // was before, whatever that was
+    char *p;
+    errno = 0;
+    const double d = std::strtod(s.c_str(), &p);
+    AssertThrow ( !((errno != 0) || (s.size() == 0) || ((s.size()>0) && (*p != '\0'))),
+                  ExcMessage ("Can't convert <" + s + "> to an integer."));
+
+    return d;
+  }
+
+
+
+  std::vector<double>
+  string_to_double (const std::vector<std::string> &s)
+  {
+    std::vector<double> tmp (s.size());
+    for (unsigned int i=0; i<s.size(); ++i)
+      tmp[i] = string_to_double (s[i]);
+    return tmp;
+  }
+
+
+
+  std::vector<std::string>
+  split_string_list (const std::string &s,
+                     const char         delimiter)
+  {
+    // keep the currently remaining part of the input string in 'tmp' and
+    // keep chopping elements of the list off the front
+    std::string tmp = s;
+
+    // as discussed in the documentation, eat whitespace from the end
+    // of the string
+    while (tmp.length() != 0 && tmp[tmp.length()-1] == ' ')
+      tmp.erase (tmp.length()-1, 1);
+
+    // split the input list until it is empty. since in every iteration
+    // 'tmp' is what's left of the string after the next delimiter,
+    // and since we've stripped trailing space already, 'tmp' will
+    // be empty at one point if 's' ended in a delimiter, even if
+    // there was space after the last delimiter. this matches what's
+    // discussed in the documentation
+    std::vector<std::string> split_list;
+    split_list.reserve (std::count (tmp.begin(), tmp.end(), delimiter)+1);
+    while (tmp.length() != 0)
+      {
+        std::string name;
+        name = tmp;
+
+        if (name.find(delimiter) != std::string::npos)
+          {
+            name.erase (name.find(delimiter), std::string::npos);
+            tmp.erase (0, tmp.find(delimiter)+1);
+          }
+        else
+          tmp = "";
+
+        // strip spaces from this element's front and end
+        while ((name.length() != 0) && (name[0] == ' '))
+          name.erase (0,1);
+        while (name.length() != 0 && name[name.length()-1] == ' ')
+          name.erase (name.length()-1, 1);
+
+        split_list.push_back (name);
+      }
+
+    return split_list;
+  }
+
+
+
+  std::vector<std::string>
+  break_text_into_lines (const std::string &original_text,
+                         const unsigned int width,
+                         const char delimiter)
+  {
+    std::string              text = original_text;
+    std::vector<std::string> lines;
+
+    // remove trailing spaces
+    while ((text.length() != 0) && (text[text.length()-1] == delimiter))
+      text.erase(text.length()-1,1);
+
+    // then split the text into lines
+    while (text.length() != 0)
+      {
+        // in each iteration, first remove
+        // leading spaces
+        while ((text.length() != 0) && (text[0] == delimiter))
+          text.erase(0, 1);
+
+        std::size_t pos_newline = text.find_first_of("\n", 0);
+        if (pos_newline != std::string::npos && pos_newline <= width)
+          {
+            std::string line (text, 0, pos_newline);
+            while ((line.length() != 0) && (line[line.length()-1] == delimiter))
+              line.erase(line.length()-1,1);
+            lines.push_back (line);
+            text.erase (0, pos_newline+1);
+            continue;
+          }
+
+        // if we can fit everything into one
+        // line, then do so. otherwise, we have
+        // to keep breaking
+        if (text.length() < width)
+          {
+            // remove trailing spaces
+            while ((text.length() != 0) && (text[text.length()-1] == delimiter))
+              text.erase(text.length()-1,1);
+            lines.push_back (text);
+            text = "";
+          }
+        else
+          {
+            // starting at position width, find the
+            // location of the previous space, so
+            // that we can break around there
+            int location = std::min<int>(width,text.length()-1);
+            for (; location>0; --location)
+              if (text[location] == delimiter)
+                break;
+
+            // if there are no spaces, then try if
+            // there are spaces coming up
+            if (location == 0)
+              for (location = std::min<int>(width,text.length()-1);
+                   location<static_cast<int>(text.length());
+                   ++location)
+                if (text[location] == delimiter)
+                  break;
+
+            // now take the text up to the found
+            // location and put it into a single
+            // line, and remove it from 'text'
+            std::string line (text, 0, location);
+            while ((line.length() != 0) && (line[line.length()-1] == delimiter))
+              line.erase(line.length()-1,1);
+            lines.push_back (line);
+            text.erase (0, location);
+          }
+      }
+
+    return lines;
+  }
+
+
+
+  bool
+  match_at_string_start (const std::string &name,
+                         const std::string &pattern)
+  {
+    if (pattern.size() > name.size())
+      return false;
+
+    for (unsigned int i=0; i<pattern.size(); ++i)
+      if (pattern[i] != name[i])
+        return false;
+
+    return true;
+  }
+
+
+
+  std::pair<int, unsigned int>
+  get_integer_at_position (const std::string &name,
+                           const unsigned int position)
+  {
+    Assert (position < name.size(), ExcInternalError());
+
+    const std::string test_string (name.begin()+position,
+                                   name.end());
+
+    std::istringstream str(test_string);
+
+    int i;
+    if (str >> i)
+      {
+        // compute the number of
+        // digits of i. assuming it
+        // is less than 8 is likely
+        // ok
+        if (i<10)
+          return std::make_pair (i, 1U);
+        else if (i<100)
+          return std::make_pair (i, 2U);
+        else if (i<1000)
+          return std::make_pair (i, 3U);
+        else if (i<10000)
+          return std::make_pair (i, 4U);
+        else if (i<100000)
+          return std::make_pair (i, 5U);
+        else if (i<1000000)
+          return std::make_pair (i, 6U);
+        else if (i<10000000)
+          return std::make_pair (i, 7U);
+        else
+          {
+            Assert (false, ExcNotImplemented());
+            return std::make_pair (-1, numbers::invalid_unsigned_int);
+          }
+      }
+    else
+      return std::make_pair (-1, numbers::invalid_unsigned_int);
+  }
+
+
+
+  double
+  generate_normal_random_number (const double a,
+                                 const double sigma)
+  {
+    // if no noise: return now
+    if (sigma == 0)
+      return a;
+
+    // we would want to use rand(), but that function is not reentrant
+    // in a thread context. one could use rand_r, but this does not
+    // produce reproducible results between threads either (though at
+    // least it is reentrant). these two approaches being
+    // non-workable, use a thread-local random number generator here
+    static Threads::ThreadLocalStorage<boost::mt19937> random_number_generator;
+    return boost::normal_distribution<>(a,sigma)(random_number_generator.get());
+  }
+
+
+
+  std::vector<unsigned int>
+  reverse_permutation (const std::vector<unsigned int> &permutation)
+  {
+    const unsigned int n = permutation.size();
+
+    std::vector<unsigned int> out (n);
+    for (unsigned int i=0; i<n; ++i)
+      out[i] = n - 1 - permutation[i];
+
+    return out;
+  }
+
+
+
+  std::vector<unsigned int>
+  invert_permutation (const std::vector<unsigned int> &permutation)
+  {
+    const unsigned int n = permutation.size();
+
+    std::vector<unsigned int> out (n, numbers::invalid_unsigned_int);
+
+    for (unsigned int i=0; i<n; ++i)
+      {
+        Assert (permutation[i] < n, ExcIndexRange (permutation[i], 0, n));
+        out[permutation[i]] = i;
+      }
+
+    // check that we have actually reached
+    // all indices
+    for (unsigned int i=0; i<n; ++i)
+      Assert (out[i] != numbers::invalid_unsigned_int,
+              ExcMessage ("The given input permutation had duplicate entries!"));
+
+    return out;
+  }
+
+  std::vector<unsigned long long int>
+  reverse_permutation (const std::vector<unsigned long long int> &permutation)
+  {
+    const unsigned long long int n = permutation.size();
+
+    std::vector<unsigned long long int> out (n);
+    for (unsigned long long int i=0; i<n; ++i)
+      out[i] = n - 1 - permutation[i];
+
+    return out;
+  }
+
+
+
+  std::vector<unsigned long long int>
+  invert_permutation (const std::vector<unsigned long long int> &permutation)
+  {
+    const unsigned long long int n = permutation.size();
+
+    std::vector<unsigned long long int> out (n, numbers::invalid_unsigned_int);
+
+    for (unsigned long long int i=0; i<n; ++i)
+      {
+        Assert (permutation[i] < n, ExcIndexRange (permutation[i], 0, n));
+        out[permutation[i]] = i;
+      }
+
+    // check that we have actually reached
+    // all indices
+    for (unsigned long long int i=0; i<n; ++i)
+      Assert (out[i] != numbers::invalid_unsigned_int,
+              ExcMessage ("The given input permutation had duplicate entries!"));
+
+    return out;
+  }
+
+
+  template <typename Integer>
+  std::vector<Integer>
+  reverse_permutation (const std::vector<Integer> &permutation)
+  {
+    const unsigned int n = permutation.size();
+
+    std::vector<Integer> out (n);
+    for (unsigned int i=0; i<n; ++i)
+      out[i] = n - 1 - permutation[i];
+
+    return out;
+  }
+
+
+
+  template <typename Integer>
+  std::vector<Integer>
+  invert_permutation (const std::vector<Integer> &permutation)
+  {
+    const unsigned int n = permutation.size();
+
+    std::vector<Integer> out (n, numbers::invalid_unsigned_int);
+
+    for (unsigned int i=0; i<n; ++i)
+      {
+        Assert (permutation[i] < n, ExcIndexRange (permutation[i], 0, n));
+        out[permutation[i]] = i;
+      }
+
+    // check that we have actually reached
+    // all indices
+    for (unsigned int i=0; i<n; ++i)
+      Assert (out[i] != numbers::invalid_unsigned_int,
+              ExcMessage ("The given input permutation had duplicate entries!"));
+
+    return out;
+  }
+
+
+
+
+  namespace System
+  {
+#if defined(__linux__)
+
+    double get_cpu_load ()
+    {
+      std::ifstream cpuinfo;
+      cpuinfo.open("/proc/loadavg");
+
+      AssertThrow(cpuinfo, ExcIO());
+
+      double load;
+      cpuinfo >> load;
+
+      return load;
+    }
+
+#else
+
+    double get_cpu_load ()
+    {
+      return 0.;
+    }
+
+#endif
+
+
+
+    void get_memory_stats (MemoryStats &stats)
+    {
+      stats.VmPeak = stats.VmSize = stats.VmHWM = stats.VmRSS = 0;
+
+      // parsing /proc/self/stat would be a
+      // lot easier, but it does not contain
+      // VmHWM, so we use /status instead.
+#if defined(__linux__)
+      std::ifstream file("/proc/self/status");
+      std::string line;
+      std::string name;
+      while (!file.eof())
+        {
+          file >> name;
+          if (name == "VmPeak:")
+            file >> stats.VmPeak;
+          else if (name == "VmSize:")
+            file >> stats.VmSize;
+          else if (name == "VmHWM:")
+            file >> stats.VmHWM;
+          else if (name == "VmRSS:")
+            {
+              file >> stats.VmRSS;
+              break; //this is always the last entry
+            }
+
+          getline(file, line);
+        }
+#endif
+    }
+
+
+
+    std::string get_hostname ()
+    {
+#if defined(DEAL_II_HAVE_UNISTD_H) && defined(DEAL_II_HAVE_GETHOSTNAME)
+      const unsigned int N=1024;
+      char hostname[N];
+      gethostname (&(hostname[0]), N-1);
+#else
+      std::string hostname("unknown");
+#endif
+      return hostname;
+    }
+
+
+
+    std::string get_time ()
+    {
+      std::time_t  time1= std::time (0);
+      std::tm     *time = std::localtime(&time1);
+
+      std::ostringstream o;
+      o << time->tm_hour << ":"
+        << (time->tm_min < 10 ? "0" : "") << time->tm_min << ":"
+        << (time->tm_sec < 10 ? "0" : "") << time->tm_sec;
+
+      return o.str();
+    }
+
+
+
+    std::string get_date ()
+    {
+      std::time_t  time1= std::time (0);
+      std::tm     *time = std::localtime(&time1);
+
+      std::ostringstream o;
+      o << time->tm_year + 1900 << "/"
+        << time->tm_mon + 1 << "/"
+        << time->tm_mday;
+
+      return o.str();
+    }
+
+
+
+    void posix_memalign (void **memptr, size_t alignment, size_t size)
+    {
+#ifndef DEAL_II_MSVC
+      const int ierr = ::posix_memalign (memptr, alignment, size);
+
+      AssertThrow (ierr == 0, ExcOutOfMemory());
+      AssertThrow (*memptr != 0, ExcOutOfMemory());
+#else
+      // Windows does not appear to have posix_memalign. just use the
+      // regular malloc in that case
+      *memptr = malloc (size);
+      (void)alignment;
+      AssertThrow (*memptr != 0, ExcOutOfMemory());
+#endif
+    }
+
+
+
+    bool job_supports_mpi ()
+    {
+      return Utilities::MPI::job_supports_mpi();
+    }
+  }
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+  namespace Trilinos
+  {
+    const Epetra_Comm &
+    comm_world()
+    {
+#ifdef DEAL_II_WITH_MPI
+      static Teuchos::RCP<Epetra_MpiComm>
+      communicator = Teuchos::rcp (new Epetra_MpiComm (MPI_COMM_WORLD), true);
+#else
+      static Teuchos::RCP<Epetra_SerialComm>
+      communicator = Teuchos::rcp (new Epetra_SerialComm (), true);
+#endif
+
+      return *communicator;
+    }
+
+
+
+    const Epetra_Comm &
+    comm_self()
+    {
+#ifdef DEAL_II_WITH_MPI
+      static Teuchos::RCP<Epetra_MpiComm>
+      communicator = Teuchos::rcp (new Epetra_MpiComm (MPI_COMM_SELF), true);
+#else
+      static Teuchos::RCP<Epetra_SerialComm>
+      communicator = Teuchos::rcp (new Epetra_SerialComm (), true);
+#endif
+
+      return *communicator;
+    }
+
+
+
+    Epetra_Comm *
+    duplicate_communicator (const Epetra_Comm &communicator)
+    {
+#ifdef DEAL_II_WITH_MPI
+
+      // see if the communicator is in fact a
+      // parallel MPI communicator; if so,
+      // return a duplicate of it
+      const Epetra_MpiComm
+      *mpi_comm = dynamic_cast<const Epetra_MpiComm *>(&communicator);
+      if (mpi_comm != 0)
+        return new Epetra_MpiComm(Utilities::MPI::
+                                  duplicate_communicator(mpi_comm->GetMpiComm()));
+#endif
+
+      // if we don't support MPI, or if the
+      // communicator in question was in fact
+      // not an MPI communicator, return a
+      // copy of the same object again
+      Assert (dynamic_cast<const Epetra_SerialComm *>(&communicator)
+              != 0,
+              ExcInternalError());
+      return new Epetra_SerialComm(dynamic_cast<const Epetra_SerialComm &>(communicator));
+    }
+
+
+
+    void destroy_communicator (Epetra_Comm &communicator)
+    {
+      // save the communicator, reset the map, and delete the communicator if
+      // this whole thing was created as an MPI communicator
+#ifdef DEAL_II_WITH_MPI
+      Epetra_MpiComm
+      *mpi_comm = dynamic_cast<Epetra_MpiComm *>(&communicator);
+      if (mpi_comm != 0)
+        {
+          MPI_Comm comm = mpi_comm->GetMpiComm();
+          *mpi_comm = Epetra_MpiComm(MPI_COMM_SELF);
+          MPI_Comm_free (&comm);
+        }
+#endif
+    }
+
+
+
+    unsigned int get_n_mpi_processes (const Epetra_Comm &mpi_communicator)
+    {
+      return mpi_communicator.NumProc();
+    }
+
+
+    unsigned int get_this_mpi_process (const Epetra_Comm &mpi_communicator)
+    {
+      return (unsigned int)mpi_communicator.MyPID();
+    }
+
+
+
+    Epetra_Map
+    duplicate_map (const Epetra_BlockMap &map,
+                   const Epetra_Comm     &comm)
+    {
+      if (map.LinearMap() == true)
+        {
+          // each processor stores a
+          // contiguous range of
+          // elements in the
+          // following constructor
+          // call
+          return Epetra_Map (map.NumGlobalElements(),
+                             map.NumMyElements(),
+                             map.IndexBase(),
+                             comm);
+        }
+      else
+        {
+          // the range is not
+          // contiguous
+          return Epetra_Map (map.NumGlobalElements(),
+                             map.NumMyElements(),
+                             map.MyGlobalElements (),
+                             0,
+                             comm);
+        }
+    }
+  }
+
+#endif
+
+  template std::string to_string<int> (int, unsigned int);
+  template std::string to_string<long int> (long int, unsigned int);
+  template std::string to_string<long long int> (long long int, unsigned int);
+  template std::string to_string<unsigned int> (unsigned int, unsigned int);
+  template std::string to_string<unsigned long int> (unsigned long int, unsigned int);
+  template std::string to_string<unsigned long long int> (unsigned long long int, unsigned int);
+  template std::string to_string<float> (float, unsigned int);
+  template std::string to_string<double> (double, unsigned int);
+  template std::string to_string<long double> (long double, unsigned int);
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/distributed/CMakeLists.txt b/source/distributed/CMakeLists.txt
new file mode 100644
index 0000000..b52d205
--- /dev/null
+++ b/source/distributed/CMakeLists.txt
@@ -0,0 +1,39 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  grid_refinement.cc
+  solution_transfer.cc
+  tria.cc
+  tria_base.cc
+  shared_tria.cc
+  )
+
+SET(_inst
+  grid_refinement.inst.in
+  solution_transfer.inst.in
+  tria.inst.in
+  shared_tria.inst.in
+  tria_base.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/distributed/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_distributed OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_distributed "${_inst}")
diff --git a/source/distributed/grid_refinement.cc b/source/distributed/grid_refinement.cc
new file mode 100644
index 0000000..0c77258
--- /dev/null
+++ b/source/distributed/grid_refinement.cc
@@ -0,0 +1,627 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#ifdef DEAL_II_WITH_P4EST
+
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria.h>
+
+#include <deal.II/distributed/grid_refinement.h>
+
+#include <numeric>
+#include <algorithm>
+#include <limits>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  template <typename number>
+  inline
+  number
+  max_element (const Vector<number> &criteria)
+  {
+    return (criteria.size()>0)
+           ?
+           (*std::max_element(criteria.begin(), criteria.end()))
+           :
+           std::numeric_limits<number>::min();
+  }
+
+
+
+  template <typename number>
+  inline
+  number
+  min_element (const Vector<number> &criteria)
+  {
+    return (criteria.size()>0)
+           ?
+           (*std::min_element(criteria.begin(), criteria.end()))
+           :
+           std::numeric_limits<number>::max();
+  }
+
+
+  /**
+   * Compute the global max and min
+   * of the criteria vector. These
+   * are returned only on the
+   * processor with rank zero, all
+   * others get a pair of zeros.
+   */
+  template <typename number>
+  std::pair<number,number>
+  compute_global_min_and_max_at_root (const Vector<number> &criteria,
+                                      MPI_Comm              mpi_communicator)
+  {
+    // we'd like to compute the
+    // global max and min from the
+    // local ones in one MPI
+    // communication. we can do that
+    // by taking the elementwise
+    // minimum of the local min and
+    // the negative maximum over all
+    // processors
+
+    const double local_min = min_element (criteria),
+                 local_max = max_element (criteria);
+    double comp[2] = { local_min, -local_max };
+    double result[2] = { 0, 0 };
+
+    // compute the minimum on
+    // processor zero
+    MPI_Reduce (comp, result, 2, MPI_DOUBLE,
+                MPI_MIN, 0, mpi_communicator);
+
+    // make sure only processor zero
+    // got something
+    if (Utilities::MPI::this_mpi_process (mpi_communicator) != 0)
+      Assert ((result[0] == 0) && (result[1] == 0),
+              ExcInternalError());
+
+    return std::make_pair (result[0], -result[1]);
+  }
+
+
+
+  /**
+   * Compute the global sum over the elements
+   * of the vectors passed to this function
+   * on all processors. This number is
+   * returned only on the processor with rank
+   * zero, all others get zero.
+   */
+  template <typename number>
+  double
+  compute_global_sum (const Vector<number> &criteria,
+                      MPI_Comm              mpi_communicator)
+  {
+    double my_sum = std::accumulate (criteria.begin(),
+                                     criteria.end(),
+                                     /* do accumulation in the correct data type: */
+                                     number());
+
+    double result = 0;
+    // compute the minimum on
+    // processor zero
+    MPI_Reduce (&my_sum, &result, 1, MPI_DOUBLE,
+                MPI_SUM, 0, mpi_communicator);
+
+    // make sure only processor zero
+    // got something
+    if (Utilities::MPI::this_mpi_process (mpi_communicator) != 0)
+      Assert (result == 0, ExcInternalError());
+
+    return result;
+  }
+
+
+
+  /**
+   * Given a vector of refinement criteria
+   * for all cells of a mesh (locally owned
+   * or not), extract those that pertain to
+   * locally owned cells.
+   */
+  template <int dim, int spacedim, typename VectorType>
+  void
+  get_locally_owned_indicators (const parallel::distributed::Triangulation<dim,spacedim> &tria,
+                                const VectorType &criteria,
+                                Vector<typename VectorType::value_type> &locally_owned_indicators)
+  {
+    Assert (locally_owned_indicators.size() == tria.n_locally_owned_active_cells(),
+            ExcInternalError());
+
+    unsigned int owned_index = 0;
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell = tria.begin_active();
+         cell != tria.end(); ++cell)
+      if (cell->subdomain_id() == tria.locally_owned_subdomain())
+        {
+          locally_owned_indicators(owned_index)
+            = criteria(cell->active_cell_index());
+          ++owned_index;
+        }
+    Assert (owned_index == tria.n_locally_owned_active_cells(),
+            ExcInternalError());
+  }
+
+
+  // we compute refinement
+  // thresholds by bisection of the
+  // interval spanned by the
+  // smallest and largest error
+  // indicator. this leads to a
+  // small problem: if, for
+  // example, we want to coarsen
+  // zero per cent of the cells,
+  // then we need to pick a
+  // threshold equal to the
+  // smallest indicator, but of
+  // course the bisection algorithm
+  // can never find a threshold
+  // equal to one of the end points
+  // of the interval. So we
+  // slightly increase the interval
+  // before we even start
+  void adjust_interesting_range (double (&interesting_range)[2])
+  {
+    Assert (interesting_range[0] <= interesting_range[1],
+            ExcInternalError());
+
+    Assert (interesting_range[0] >= 0,
+            ExcInternalError());
+
+    // adjust the lower bound only
+    // if the end point is not equal
+    // to zero, otherwise it could
+    // happen, that the result
+    // becomes negative
+    if (interesting_range[0] > 0)
+      interesting_range[0] *= 0.99;
+
+    if (interesting_range[1] > 0)
+      interesting_range[1] *= 1.01;
+    else
+      interesting_range[1]
+      += 0.01 * (interesting_range[1] - interesting_range[0]);
+  }
+
+
+
+  /**
+   * Given a vector of criteria and bottom
+   * and top thresholds for coarsening and
+   * refinement, mark all those cells that we
+   * locally own as appropriate for
+   * coarsening or refinement.
+   */
+  template <int dim, int spacedim, typename VectorType>
+  void
+  mark_cells (parallel::distributed::Triangulation<dim,spacedim> &tria,
+              const VectorType                                   &criteria,
+              const double                                        top_threshold,
+              const double                                        bottom_threshold)
+  {
+    dealii::GridRefinement::refine (tria, criteria, top_threshold);
+    dealii::GridRefinement::coarsen (tria, criteria, bottom_threshold);
+
+    // as a final good measure,
+    // delete all flags again
+    // from cells that we don't
+    // locally own
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell = tria.begin_active();
+         cell != tria.end(); ++cell)
+      if (cell->subdomain_id() != tria.locally_owned_subdomain())
+        {
+          cell->clear_refine_flag ();
+          cell->clear_coarsen_flag ();
+        }
+  }
+
+
+
+
+  namespace RefineAndCoarsenFixedNumber
+  {
+    /**
+     * Compute a threshold value so
+     * that exactly n_target_cells have
+     * a value that is larger.
+     */
+    template <typename number>
+    number
+    compute_threshold (const Vector<number> &criteria,
+                       const std::pair<double,double> global_min_and_max,
+                       const unsigned int    n_target_cells,
+                       MPI_Comm              mpi_communicator)
+    {
+      double interesting_range[2] = { global_min_and_max.first,
+                                      global_min_and_max.second
+                                    };
+      adjust_interesting_range (interesting_range);
+
+      const unsigned int master_mpi_rank = 0;
+      unsigned int iteration = 0;
+
+      do
+        {
+          MPI_Bcast (&interesting_range[0], 2, MPI_DOUBLE,
+                     master_mpi_rank, mpi_communicator);
+
+          if (interesting_range[0] == interesting_range[1])
+            return interesting_range[0];
+
+          const double test_threshold
+            = (interesting_range[0] > 0
+               ?
+               std::sqrt(interesting_range[0] * interesting_range[1])
+               :
+               (interesting_range[0] + interesting_range[1]) / 2);
+
+          // count how many of our own
+          // elements would be above
+          // this threshold and then
+          // add to it the number for
+          // all the others
+          unsigned int
+          my_count = std::count_if (criteria.begin(),
+                                    criteria.end(),
+                                    std::bind2nd (std::greater<double>(),
+                                                  test_threshold));
+
+          unsigned int total_count;
+          MPI_Reduce (&my_count, &total_count, 1, MPI_UNSIGNED,
+                      MPI_SUM, master_mpi_rank, mpi_communicator);
+
+          // now adjust the range. if
+          // we have to many cells, we
+          // take the upper half of the
+          // previous range, otherwise
+          // the lower half. if we have
+          // hit the right number, then
+          // set the range to the exact
+          // value.
+          // slave nodes also update their own interesting_range, however
+          // their results are not significant since the values will be
+          // overwritten by MPI_Bcast from the master node in next loop.
+          if (total_count > n_target_cells)
+            interesting_range[0] = test_threshold;
+          else if (total_count < n_target_cells)
+            interesting_range[1] = test_threshold;
+          else
+            interesting_range[0] = interesting_range[1] = test_threshold;
+
+          // terminate the iteration after 25 go-arounds. this is necessary
+          // because oftentimes error indicators on cells have exactly the
+          // same value, and so there may not be a particular value that cuts
+          // the indicators in such a way that we can achieve the desired
+          // number of cells. using a maximal number of iterations means that
+          // we terminate the iteration after a fixed number N of steps if the
+          // indicators were perfectly badly distributed, and we make at most
+          // a mistake of 1/2^N in the number of cells flagged if indicators
+          // are perfectly equidistributed
+          ++iteration;
+          if (iteration == 25)
+            interesting_range[0] = interesting_range[1] = test_threshold;
+        }
+      while (true);
+
+      Assert (false, ExcInternalError());
+      return -1;
+    }
+  }
+
+
+
+
+  namespace RefineAndCoarsenFixedFraction
+  {
+    /**
+     * Compute a threshold value so
+     * that the error accumulated over all criteria[i] so that
+     *     criteria[i] > threshold
+     * is larger than target_error.
+     */
+    template <typename number>
+    number
+    compute_threshold (const Vector<number> &criteria,
+                       const std::pair<double,double> global_min_and_max,
+                       const double          target_error,
+                       MPI_Comm              mpi_communicator)
+    {
+      double interesting_range[2] = { global_min_and_max.first,
+                                      global_min_and_max.second
+                                    };
+      adjust_interesting_range (interesting_range);
+
+      const unsigned int master_mpi_rank = 0;
+      unsigned int iteration = 0;
+
+      do
+        {
+          MPI_Bcast (&interesting_range[0], 2, MPI_DOUBLE,
+                     master_mpi_rank, mpi_communicator);
+
+          if (interesting_range[0] == interesting_range[1])
+            {
+              // so we have found our threshold. since we adjust
+              // the range at the top of the function to be slightly
+              // larger than the actual extremes of the refinement
+              // criteria values, we can end up in a situation where
+              // the threshold is in fact larger than the maximal
+              // refinement indicator. in such cases, we get no
+              // refinement at all. thus, cap the threshold by the
+              // actual largest value
+              double final_threshold =  std::min (interesting_range[0],
+                                                  global_min_and_max.second);
+              MPI_Bcast (&final_threshold, 1, MPI_DOUBLE,
+                         master_mpi_rank, mpi_communicator);
+
+              return final_threshold;
+            }
+
+          const double test_threshold
+            = (interesting_range[0] > 0
+               ?
+               std::sqrt(interesting_range[0] * interesting_range[1])
+               :
+               (interesting_range[0] + interesting_range[1]) / 2);
+
+          // accumulate the error of those our own elements above this
+          // threshold and then add to it the number for all the
+          // others
+          double my_error = 0;
+          for (unsigned int i=0; i<criteria.size(); ++i)
+            if (criteria(i) > test_threshold)
+              my_error += criteria(i);
+
+          double total_error;
+          MPI_Reduce (&my_error, &total_error, 1, MPI_DOUBLE,
+                      MPI_SUM, master_mpi_rank, mpi_communicator);
+
+          // now adjust the range. if we have to many cells, we take
+          // the upper half of the previous range, otherwise the lower
+          // half. if we have hit the right number, then set the range
+          // to the exact value.
+          // slave nodes also update their own interesting_range, however
+          // their results are not significant since the values will be
+          // overwritten by MPI_Bcast from the master node in next loop.
+          if (total_error > target_error)
+            interesting_range[0] = test_threshold;
+          else if (total_error < target_error)
+            interesting_range[1] = test_threshold;
+          else
+            interesting_range[0] = interesting_range[1] = test_threshold;
+
+          // terminate the iteration after 25 go-arounds. this is
+          // necessary because oftentimes error indicators on cells
+          // have exactly the same value, and so there may not be a
+          // particular value that cuts the indicators in such a way
+          // that we can achieve the desired number of cells. using a
+          // max of 25 iterations means that we terminate the
+          // iteration after 25 steps if the indicators were perfectly
+          // badly distributed, and we make at most a mistake of
+          // 1/2^25 in the number of cells flagged if indicators are
+          // perfectly equidistributed
+          ++iteration;
+          if (iteration == 25)
+            interesting_range[0] = interesting_range[1] = test_threshold;
+        }
+      while (true);
+
+      Assert (false, ExcInternalError());
+      return -1;
+    }
+  }
+}
+
+
+
+namespace parallel
+{
+  namespace distributed
+  {
+    namespace GridRefinement
+    {
+      template <int dim, typename VectorType, int spacedim>
+      void
+      refine_and_coarsen_fixed_number
+      (parallel::distributed::Triangulation<dim,spacedim> &tria,
+       const VectorType                                   &criteria,
+       const double                                        top_fraction_of_cells,
+       const double                                        bottom_fraction_of_cells,
+       const unsigned int                                  max_n_cells)
+      {
+        Assert (criteria.size() == tria.n_active_cells(),
+                ExcDimensionMismatch (criteria.size(), tria.n_active_cells()));
+        Assert ((top_fraction_of_cells>=0) && (top_fraction_of_cells<=1),
+                dealii::GridRefinement::ExcInvalidParameterValue());
+        Assert ((bottom_fraction_of_cells>=0) && (bottom_fraction_of_cells<=1),
+                dealii::GridRefinement::ExcInvalidParameterValue());
+        Assert (top_fraction_of_cells+bottom_fraction_of_cells <= 1,
+                dealii::GridRefinement::ExcInvalidParameterValue());
+        Assert (criteria.is_non_negative (),
+                dealii::GridRefinement::ExcNegativeCriteria());
+
+        const std::pair<double, double> adjusted_fractions =
+          dealii::GridRefinement::adjust_refine_and_coarsen_number_fraction<dim> (
+            tria.n_global_active_cells(),
+            max_n_cells,
+            top_fraction_of_cells,
+            bottom_fraction_of_cells);
+
+        // first extract from the
+        // vector of indicators the
+        // ones that correspond to
+        // cells that we locally own
+        Vector<typename VectorType::value_type>
+        locally_owned_indicators (tria.n_locally_owned_active_cells());
+        get_locally_owned_indicators (tria,
+                                      criteria,
+                                      locally_owned_indicators);
+
+        MPI_Comm mpi_communicator = tria.get_communicator ();
+
+        // figure out the global
+        // max and min of the
+        // indicators. we don't
+        // need it here, but it's a
+        // collective communication
+        // call
+        const std::pair<typename VectorType::value_type,typename VectorType::value_type> global_min_and_max
+          = compute_global_min_and_max_at_root (locally_owned_indicators,
+                                                mpi_communicator);
+
+
+        double top_threshold, bottom_threshold;
+        top_threshold =
+          RefineAndCoarsenFixedNumber::
+          compute_threshold (locally_owned_indicators,
+                             global_min_and_max,
+                             static_cast<unsigned int>
+                             (adjusted_fractions.first *
+                              tria.n_global_active_cells()),
+                             mpi_communicator);
+
+        // compute bottom
+        // threshold only if
+        // necessary. otherwise
+        // use a threshold lower
+        // than the smallest
+        // value we have locally
+        if (adjusted_fractions.second > 0)
+          bottom_threshold =
+            RefineAndCoarsenFixedNumber::
+            compute_threshold (locally_owned_indicators,
+                               global_min_and_max,
+                               static_cast<unsigned int>
+                               ((1-adjusted_fractions.second) *
+                                tria.n_global_active_cells()),
+                               mpi_communicator);
+        else
+          {
+            bottom_threshold = *std::min_element (criteria.begin(),
+                                                  criteria.end());
+            bottom_threshold -= std::fabs(bottom_threshold);
+          }
+
+        // now refine the mesh
+        mark_cells (tria, criteria, top_threshold, bottom_threshold);
+      }
+
+
+      template <int dim, typename VectorType, int spacedim>
+      void
+      refine_and_coarsen_fixed_fraction
+      (parallel::distributed::Triangulation<dim,spacedim> &tria,
+       const VectorType                                   &criteria,
+       const double                                        top_fraction_of_error,
+       const double                                        bottom_fraction_of_error)
+      {
+        Assert (criteria.size() == tria.n_active_cells(),
+                ExcDimensionMismatch (criteria.size(), tria.n_active_cells()));
+        Assert ((top_fraction_of_error>=0) && (top_fraction_of_error<=1),
+                dealii::GridRefinement::ExcInvalidParameterValue());
+        Assert ((bottom_fraction_of_error>=0) && (bottom_fraction_of_error<=1),
+                dealii::GridRefinement::ExcInvalidParameterValue());
+        Assert (top_fraction_of_error+bottom_fraction_of_error <= 1,
+                dealii::GridRefinement::ExcInvalidParameterValue());
+        Assert (criteria.is_non_negative (),
+                dealii::GridRefinement::ExcNegativeCriteria());
+
+        // first extract from the
+        // vector of indicators the
+        // ones that correspond to
+        // cells that we locally own
+        Vector<typename VectorType::value_type>
+        locally_owned_indicators (tria.n_locally_owned_active_cells());
+        get_locally_owned_indicators (tria,
+                                      criteria,
+                                      locally_owned_indicators);
+
+        MPI_Comm mpi_communicator = tria.get_communicator ();
+
+        // figure out the global
+        // max and min of the
+        // indicators. we don't
+        // need it here, but it's a
+        // collective communication
+        // call
+        const std::pair<double,double> global_min_and_max
+          = compute_global_min_and_max_at_root (locally_owned_indicators,
+                                                mpi_communicator);
+
+        const double total_error
+          = compute_global_sum (locally_owned_indicators,
+                                mpi_communicator);
+        double top_threshold, bottom_threshold;
+        top_threshold =
+          RefineAndCoarsenFixedFraction::
+          compute_threshold (locally_owned_indicators,
+                             global_min_and_max,
+                             top_fraction_of_error *
+                             total_error,
+                             mpi_communicator);
+        // compute bottom
+        // threshold only if
+        // necessary. otherwise
+        // use a threshold lower
+        // than the smallest
+        // value we have locally
+        if (bottom_fraction_of_error > 0)
+          bottom_threshold =
+            RefineAndCoarsenFixedFraction::
+            compute_threshold (locally_owned_indicators,
+                               global_min_and_max,
+                               (1-bottom_fraction_of_error) *
+                               total_error,
+                               mpi_communicator);
+        else
+          {
+            bottom_threshold = *std::min_element (criteria.begin(),
+                                                  criteria.end());
+            bottom_threshold -= std::fabs(bottom_threshold);
+          }
+
+        // now refine the mesh
+        mark_cells (tria, criteria, top_threshold, bottom_threshold);
+      }
+    }
+  }
+}
+
+
+// explicit instantiations
+#include "grid_refinement.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/source/distributed/grid_refinement.inst.in b/source/distributed/grid_refinement.inst.in
new file mode 100644
index 0000000..cf9cb33
--- /dev/null
+++ b/source/distributed/grid_refinement.inst.in
@@ -0,0 +1,79 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+
+for (S : REAL_SCALARS; deal_II_dimension : DIMENSIONS)
+{
+#if deal_II_dimension != 1
+namespace parallel
+\{
+  namespace distributed
+  \{
+    namespace GridRefinement
+    \{
+      template
+      void
+      refine_and_coarsen_fixed_number<deal_II_dimension,dealii::Vector<S>,deal_II_dimension>
+      (parallel::distributed::Triangulation<deal_II_dimension> &,
+       const dealii::Vector<S> &,
+       const double,
+       const double,
+       const unsigned int);
+
+      template
+      void
+      refine_and_coarsen_fixed_fraction<deal_II_dimension,dealii::Vector<S>,deal_II_dimension>
+      (parallel::distributed::Triangulation<deal_II_dimension> &,
+       const dealii::Vector<S> &,
+       const double,
+       const double);
+    \}
+  \}
+\}
+#endif
+
+
+#if deal_II_dimension == 3
+
+namespace parallel
+\{
+  namespace distributed
+  \{
+    namespace GridRefinement
+    \{
+      template
+      void
+      refine_and_coarsen_fixed_number<deal_II_dimension-1,dealii::Vector<S>,deal_II_dimension>
+      (parallel::distributed::Triangulation<deal_II_dimension-1,deal_II_dimension> &,
+       const dealii::Vector<S> &,
+       const double,
+       const double,
+       const unsigned int);
+
+      template
+      void
+      refine_and_coarsen_fixed_fraction<deal_II_dimension-1,dealii::Vector<S>,deal_II_dimension>
+      (parallel::distributed::Triangulation<deal_II_dimension-1,deal_II_dimension> &,
+       const dealii::Vector<S> &,
+       const double,
+       const double);
+    \}
+  \}
+\}
+
+#endif
+}
diff --git a/source/distributed/shared_tria.cc b/source/distributed/shared_tria.cc
new file mode 100644
index 0000000..b373689
--- /dev/null
+++ b/source/distributed/shared_tria.cc
@@ -0,0 +1,192 @@
+// ---------------------------------------------------------------------
+// $Id: tria.cc 32807 2014-04-22 15:01:57Z heister $
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/mpi.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/filtered_iterator.h>
+#include <deal.II/distributed/shared_tria.h>
+
+
+
+
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include <fstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+#ifdef DEAL_II_WITH_MPI
+namespace parallel
+{
+  namespace shared
+  {
+
+    template <int dim, int spacedim>
+    Triangulation<dim,spacedim>::Triangulation (MPI_Comm mpi_communicator,
+                                                const typename dealii::Triangulation<dim,spacedim>::MeshSmoothing smooth_grid,
+                                                const bool allow_artificial_cells):
+      dealii::parallel::Triangulation<dim,spacedim>(mpi_communicator,smooth_grid,false),
+      allow_artificial_cells(allow_artificial_cells)
+    {
+    }
+
+    template <int dim, int spacedim>
+    void Triangulation<dim,spacedim>::partition()
+    {
+      dealii::GridTools::partition_triangulation (this->n_subdomains, *this);
+
+      true_subdomain_ids_of_cells.resize(this->n_active_cells());
+
+      // loop over all cells and mark artificial:
+      typename parallel::shared::Triangulation<dim,spacedim>::active_cell_iterator
+      cell = this->begin_active(),
+      endc = this->end();
+
+      if (allow_artificial_cells)
+        {
+          // get halo layer of (ghost) cells
+          // parallel::shared::Triangulation<dim>::
+          std_cxx11::function<bool (const typename parallel::shared::Triangulation<dim,spacedim>::active_cell_iterator &)> predicate
+            = IteratorFilters::SubdomainEqualTo(this->my_subdomain);
+
+          const std::vector<typename parallel::shared::Triangulation<dim,spacedim>::active_cell_iterator>
+          active_halo_layer_vector = GridTools::compute_active_cell_halo_layer (*this, predicate);
+          std::set<typename parallel::shared::Triangulation<dim,spacedim>::active_cell_iterator>
+          active_halo_layer(active_halo_layer_vector.begin(), active_halo_layer_vector.end());
+
+          for (unsigned int index=0; cell != endc; cell++, index++)
+            {
+              // store original/true subdomain ids:
+              true_subdomain_ids_of_cells[index] = cell->subdomain_id();
+
+              if (cell->is_locally_owned() == false &&
+                  active_halo_layer.find(cell) == active_halo_layer.end())
+                cell->set_subdomain_id(numbers::artificial_subdomain_id);
+            }
+        }
+      else
+        {
+          // just store true subdomain ids
+          for (unsigned int index=0; cell != endc; cell++, index++)
+            true_subdomain_ids_of_cells[index] = cell->subdomain_id();
+
+        }
+    }
+
+    template <int dim, int spacedim>
+    bool
+    Triangulation<dim,spacedim>::with_artificial_cells() const
+    {
+      return allow_artificial_cells;
+    }
+
+    template <int dim, int spacedim>
+    const std::vector<unsigned int> &
+    Triangulation<dim,spacedim>::get_true_subdomain_ids_of_cells() const
+    {
+      return true_subdomain_ids_of_cells;
+    }
+
+    template <int dim, int spacedim>
+    Triangulation<dim,spacedim>::~Triangulation ()
+    {
+
+    }
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::execute_coarsening_and_refinement ()
+    {
+      dealii::Triangulation<dim,spacedim>::execute_coarsening_and_refinement ();
+      partition();
+      this->update_number_cache ();
+    }
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::create_triangulation (const std::vector< Point< spacedim > > &vertices,
+                                                       const std::vector< CellData< dim > > &cells,
+                                                       const SubCellData &subcelldata)
+    {
+      try
+        {
+          dealii::Triangulation<dim,spacedim>::
+          create_triangulation (vertices, cells, subcelldata);
+        }
+      catch (const typename dealii::Triangulation<dim,spacedim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+      partition();
+      this->update_number_cache ();
+    }
+
+  }
+}
+
+#else
+
+namespace parallel
+{
+  namespace shared
+  {
+    template <int dim, int spacedim>
+    Triangulation<dim,spacedim>::Triangulation ()
+      :
+      dealii::parallel::Triangulation<dim,spacedim>(MPI_COMM_SELF)
+    {
+      Assert (false, ExcNotImplemented());
+    }
+
+    template <int dim, int spacedim>
+    bool
+    Triangulation<dim,spacedim>::with_artificial_cells() const
+    {
+      Assert (false, ExcNotImplemented());
+      return true;
+    }
+
+    template <int dim, int spacedim>
+    const std::vector<unsigned int> &
+    Triangulation<dim,spacedim>::get_true_subdomain_ids_of_cells() const
+    {
+      Assert (false, ExcNotImplemented());
+      return true_subdomain_ids_of_cells;
+    }
+
+  }
+}
+
+
+#endif
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "shared_tria.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/distributed/shared_tria.inst.in b/source/distributed/shared_tria.inst.in
new file mode 100644
index 0000000..0119298
--- /dev/null
+++ b/source/distributed/shared_tria.inst.in
@@ -0,0 +1,36 @@
+// ---------------------------------------------------------------------
+// $Id: tria.inst.in 32674 2014-03-20 16:57:24Z denis.davydov $
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    namespace parallel
+    \{
+      namespace shared
+      \{
+        template class Triangulation<deal_II_dimension>;
+#       if deal_II_dimension < 3
+        template class Triangulation<deal_II_dimension, deal_II_dimension+1>;
+#       endif
+#       if deal_II_dimension < 2
+        template class Triangulation<deal_II_dimension, deal_II_dimension+2>;
+#       endif
+      \}
+    \}
+
+  }
+
diff --git a/source/distributed/solution_transfer.cc b/source/distributed/solution_transfer.cc
new file mode 100644
index 0000000..a793b2b
--- /dev/null
+++ b/source/distributed/solution_transfer.cc
@@ -0,0 +1,268 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_WITH_P4EST
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#include <deal.II/distributed/solution_transfer.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+
+#include <deal.II/base/std_cxx11/bind.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace parallel
+{
+  namespace distributed
+  {
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::SolutionTransfer (const DoFHandlerType &dof)
+      :
+      dof_handler(&dof, typeid(*this).name())
+    {
+      Assert (dynamic_cast<const parallel::distributed::Triangulation<dim>*>
+              (&dof_handler->get_triangulation()) != 0,
+              ExcMessage("parallel::distributed::SolutionTransfer requires a parallel::distributed::Triangulation object."));
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::~SolutionTransfer ()
+    {}
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::
+    prepare_for_coarsening_and_refinement (const std::vector<const VectorType *> &all_in)
+    {
+      input_vectors = all_in;
+      register_data_attach( get_data_size() * input_vectors.size() );
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::register_data_attach (const std::size_t size)
+    {
+      Assert(size > 0, ExcMessage("Please transfer at least one vector!"));
+
+//TODO: casting away constness is bad
+      parallel::distributed::Triangulation<dim,DoFHandlerType::space_dimension> *tria
+        = (dynamic_cast<parallel::distributed::Triangulation<dim,DoFHandlerType::space_dimension>*>
+           (const_cast<dealii::Triangulation<dim,DoFHandlerType::space_dimension>*>
+            (&dof_handler->get_triangulation())));
+      Assert (tria != 0, ExcInternalError());
+
+      offset
+        = tria->register_data_attach(size,
+                                     std_cxx11::bind(&SolutionTransfer<dim, VectorType,
+                                                     DoFHandlerType>::pack_callback,
+                                                     this,
+                                                     std_cxx11::_1,
+                                                     std_cxx11::_2,
+                                                     std_cxx11::_3));
+
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::
+    prepare_for_coarsening_and_refinement (const VectorType &in)
+    {
+      std::vector<const VectorType *> all_in(1, &in);
+      prepare_for_coarsening_and_refinement(all_in);
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::prepare_serialization (const VectorType &in)
+    {
+      std::vector<const VectorType *> all_in(1, &in);
+      prepare_serialization(all_in);
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::prepare_serialization
+    (const std::vector<const VectorType *> &all_in)
+    {
+      prepare_for_coarsening_and_refinement (all_in);
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::deserialize (VectorType &in)
+    {
+      std::vector<VectorType *> all_in(1, &in);
+      deserialize(all_in);
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::deserialize (std::vector<VectorType *> &all_in)
+    {
+      register_data_attach( get_data_size() * all_in.size() );
+
+      // this makes interpolate() happy
+      input_vectors.resize(all_in.size());
+
+      interpolate(all_in);
+    }
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::interpolate (std::vector<VectorType *> &all_out)
+    {
+      Assert(input_vectors.size()==all_out.size(),
+             ExcDimensionMismatch(input_vectors.size(), all_out.size()) );
+
+//TODO: casting away constness is bad
+      parallel::distributed::Triangulation<dim,DoFHandlerType::space_dimension> *tria
+        = (dynamic_cast<parallel::distributed::Triangulation<dim,DoFHandlerType::space_dimension>*>
+           (const_cast<dealii::Triangulation<dim,DoFHandlerType::space_dimension>*>
+            (&dof_handler->get_triangulation())));
+      Assert (tria != 0, ExcInternalError());
+
+      tria->notify_ready_to_unpack(offset,
+                                   std_cxx11::bind(&SolutionTransfer<dim, VectorType,
+                                                   DoFHandlerType>::unpack_callback,
+                                                   this,
+                                                   std_cxx11::_1,
+                                                   std_cxx11::_2,
+                                                   std_cxx11::_3,
+                                                   std_cxx11::ref(all_out)));
+
+
+      for (typename std::vector<VectorType *>::iterator it=all_out.begin();
+           it !=all_out.end();
+           ++it)
+        (*it)->compress(::dealii::VectorOperation::insert);
+
+      input_vectors.clear();
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::interpolate (VectorType &out)
+    {
+      std::vector<VectorType *> all_out(1, &out);
+      interpolate(all_out);
+    }
+
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    unsigned int
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::get_data_size() const
+    {
+      return sizeof(double)* DoFTools::max_dofs_per_cell(*dof_handler);
+    }
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::
+    pack_callback(const typename Triangulation<dim,DoFHandlerType::space_dimension>::cell_iterator &cell_,
+                  const typename Triangulation<dim,DoFHandlerType::space_dimension>::CellStatus /*status*/,
+                  void *data)
+    {
+      typename VectorType::value_type *data_store = reinterpret_cast<typename VectorType::value_type *>(data);
+
+      typename DoFHandlerType::cell_iterator cell(*cell_, dof_handler);
+
+      const unsigned int dofs_per_cell=cell->get_fe().dofs_per_cell;
+      ::dealii::Vector<typename VectorType::value_type> dofvalues(dofs_per_cell);
+      for (typename std::vector<const VectorType *>::iterator it=input_vectors.begin();
+           it !=input_vectors.end();
+           ++it)
+        {
+          cell->get_interpolated_dof_values(*(*it), dofvalues);
+          Assert (typeid(typename VectorType::value_type) == typeid(double), ExcNotImplemented());
+          std::memcpy(data_store, &dofvalues(0), sizeof(typename VectorType::value_type)*dofs_per_cell);
+          data_store += dofs_per_cell;
+        }
+    }
+
+
+    template<int dim, typename VectorType, typename DoFHandlerType>
+    void
+    SolutionTransfer<dim, VectorType, DoFHandlerType>::unpack_callback
+    (const typename Triangulation<dim,DoFHandlerType::space_dimension>::cell_iterator &cell_,
+     const typename Triangulation<dim,DoFHandlerType::space_dimension>::CellStatus    /*status*/,
+     const void                                           *data,
+     std::vector<VectorType *>                            &all_out)
+    {
+      typename DoFHandlerType::cell_iterator
+      cell(*cell_, dof_handler);
+
+      const unsigned int dofs_per_cell=cell->get_fe().dofs_per_cell;
+      ::dealii::Vector<typename VectorType::value_type> dofvalues(dofs_per_cell);
+      const typename VectorType::value_type *data_store = reinterpret_cast<const typename VectorType::value_type *>(data);
+
+      for (typename std::vector<VectorType *>::iterator it = all_out.begin();
+           it != all_out.end();
+           ++it)
+        {
+          Assert (typeid(typename VectorType::value_type) == typeid(double), ExcNotImplemented());
+          std::memcpy(&dofvalues(0), data_store, sizeof(typename VectorType::value_type)*dofs_per_cell);
+          cell->set_dof_values_by_interpolation(dofvalues, *(*it));
+          data_store += dofs_per_cell;
+        }
+    }
+
+
+  }
+}
+
+
+// explicit instantiations
+#include "solution_transfer.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/source/distributed/solution_transfer.inst.in b/source/distributed/solution_transfer.inst.in
new file mode 100644
index 0000000..248a52a
--- /dev/null
+++ b/source/distributed/solution_transfer.inst.in
@@ -0,0 +1,59 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension : SPACE_DIMENSIONS)
+  {
+  namespace parallel
+  \{
+    namespace distributed
+    \{
+#if deal_II_dimension > 1
+#if deal_II_dimension <= deal_II_space_dimension
+    template class SolutionTransfer<deal_II_dimension,::dealii::Vector<double>, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+    template class SolutionTransfer<deal_II_dimension,::dealii::parallel::distributed::Vector<double>, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+    template class SolutionTransfer<deal_II_dimension,::dealii::parallel::distributed::Vector<float>, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+    template class SolutionTransfer<deal_II_dimension,::dealii::parallel::distributed::BlockVector<double>, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+    template class SolutionTransfer<deal_II_dimension,::dealii::parallel::distributed::BlockVector<float>, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+
+
+#ifdef DEAL_II_WITH_PETSC
+    template class SolutionTransfer<deal_II_dimension, PETScWrappers::Vector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+
+    template class SolutionTransfer<deal_II_dimension, PETScWrappers::BlockVector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+
+    template class SolutionTransfer<deal_II_dimension, PETScWrappers::MPI::Vector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+
+    template class SolutionTransfer<deal_II_dimension, PETScWrappers::MPI::BlockVector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+    template class SolutionTransfer<deal_II_dimension, TrilinosWrappers::Vector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+
+    template class SolutionTransfer<deal_II_dimension, TrilinosWrappers::BlockVector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+
+    template class SolutionTransfer<deal_II_dimension, TrilinosWrappers::MPI::Vector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+
+    template class SolutionTransfer<deal_II_dimension, TrilinosWrappers::MPI::BlockVector, DoFHandler<deal_II_dimension,deal_II_space_dimension> >;
+#endif
+    
+#endif
+#endif
+    \}
+  \}
+
+  }
+
diff --git a/source/distributed/tria.cc b/source/distributed/tria.cc
new file mode 100644
index 0000000..f8b7009
--- /dev/null
+++ b/source/distributed/tria.cc
@@ -0,0 +1,5188 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/distributed/tria.h>
+
+#ifdef DEAL_II_WITH_P4EST
+#  include <p4est_bits.h>
+#  include <p4est_extended.h>
+#  include <p4est_vtk.h>
+#  include <p4est_ghost.h>
+#  include <p4est_communication.h>
+#  include <p4est_iterate.h>
+
+#  include <p8est_bits.h>
+#  include <p8est_extended.h>
+#  include <p8est_vtk.h>
+#  include <p8est_ghost.h>
+#  include <p8est_communication.h>
+#  include <p8est_iterate.h>
+#endif
+
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include <fstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+#ifdef DEAL_II_WITH_P4EST
+
+namespace internal
+{
+  namespace p4est
+  {
+    /**
+     * A structure whose explicit specializations contain pointers to the
+     * relevant p4est_* and p8est_* functions. Using this structure, for
+     * example by saying functions<dim>::quadrant_compare, we can write code
+     * in a dimension independent way, either calling p4est_quadrant_compare
+     * or p8est_quadrant_compare, depending on template argument.
+     */
+    template <int dim> struct functions;
+
+    template <> struct functions<2>
+    {
+      static
+      int (&quadrant_compare) (const void *v1, const void *v2);
+
+      static
+      void (&quadrant_childrenv) (const types<2>::quadrant *q,
+                                  types<2>::quadrant c[]);
+
+      static
+      int (&quadrant_overlaps_tree) (types<2>::tree *tree,
+                                     const types<2>::quadrant *q);
+
+      static
+      void (&quadrant_set_morton) (types<2>::quadrant *quadrant,
+                                   int level,
+                                   uint64_t id);
+
+      static
+      int (&quadrant_is_equal) (const types<2>::quadrant *q1,
+                                const types<2>::quadrant *q2);
+
+      static
+      int (&quadrant_is_sibling) (const types<2>::quadrant *q1,
+                                  const types<2>::quadrant *q2);
+
+      static
+      int (&quadrant_is_ancestor) (const types<2>::quadrant *q1,
+                                   const types<2>::quadrant *q2);
+
+      static
+      int (&quadrant_ancestor_id) (const types<2>::quadrant *q,
+                                   int level);
+
+      static
+      int (&comm_find_owner) (types<2>::forest *p4est,
+                              const types<2>::locidx which_tree,
+                              const types<2>::quadrant *q,
+                              const int guess);
+
+      static
+      types<2>::connectivity *(&connectivity_new) (types<2>::topidx num_vertices,
+                                                   types<2>::topidx num_trees,
+                                                   types<2>::topidx num_corners,
+                                                   types<2>::topidx num_vtt);
+
+      static
+      void (&connectivity_join_faces) (types<2>::connectivity *conn,
+                                       types<2>::topidx tree_left,
+                                       types<2>::topidx tree_right,
+                                       int face_left,
+                                       int face_right,
+                                       int orientation);
+
+
+
+      static
+      void (&connectivity_destroy) (p4est_connectivity_t *connectivity);
+
+      static
+      types<2>::forest *(&new_forest) (MPI_Comm mpicomm,
+                                       types<2>::connectivity *connectivity,
+                                       types<2>::locidx min_quadrants,
+                                       int min_level,
+                                       int fill_uniform,
+                                       size_t data_size,
+                                       p4est_init_t init_fn,
+                                       void *user_pointer);
+
+      static
+      void (&destroy) (types<2>::forest *p4est);
+
+      static
+      void (&refine) (types<2>::forest *p4est,
+                      int refine_recursive,
+                      p4est_refine_t refine_fn,
+                      p4est_init_t init_fn);
+
+      static
+      void (&coarsen) (types<2>::forest *p4est,
+                       int coarsen_recursive,
+                       p4est_coarsen_t coarsen_fn,
+                       p4est_init_t init_fn);
+
+      static
+      void (&balance) (types<2>::forest *p4est,
+                       types<2>::balance_type btype,
+                       p4est_init_t init_fn);
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      p4est_gloidx_t (&partition) (types<2>::forest *p4est,
+                                   int partition_for_coarsening,
+                                   p4est_weight_t weight_fn);
+#else
+      static
+      void (&partition) (types<2>::forest *p4est,
+                         int partition_for_coarsening,
+                         p4est_weight_t weight_fn);
+#endif
+
+      static
+      void (&save) (const char *filename,
+                    types<2>::forest *p4est,
+                    int save_data);
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      types<2>::forest *(&load_ext) (const char *filename,
+                                     MPI_Comm mpicomm,
+                                     size_t data_size,
+                                     int load_data,
+                                     int autopartition,
+                                     int broadcasthead,
+                                     void *user_pointer,
+                                     types<2>::connectivity **p4est);
+#else
+      static
+      types<2>::forest *(&load) (const char *filename,
+                                 MPI_Comm mpicomm,
+                                 size_t data_size,
+                                 int load_data,
+                                 void *user_pointer,
+                                 types<2>::connectivity **p4est);
+#endif
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      int (&connectivity_save) (const char *filename,
+                                types<2>::connectivity *connectivity);
+#else
+      static
+      void (&connectivity_save) (const char *filename,
+                                 types<2>::connectivity *connectivity);
+#endif
+
+      static
+      int (&connectivity_is_valid) (types<2>::connectivity *connectivity);
+
+#if DEAL_II_P4EST_VERSION_GTE(1,0,0,0)
+      static
+      types<2>::connectivity *(&connectivity_load) (const char *filename,
+                                                    size_t *length);
+#elif DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      types<2>::connectivity *(&connectivity_load) (const char *filename,
+                                                    long unsigned *length);
+#else
+      static
+      types<2>::connectivity *(&connectivity_load) (const char *filename,
+                                                    long *length);
+#endif
+
+      static
+      unsigned int (&checksum) (types<2>::forest *p4est);
+
+      static
+      void (&vtk_write_file) (types<2>::forest *p4est,
+                              p4est_geometry_t *,
+                              const char *baseName);
+
+      static
+      types<2>::ghost *(&ghost_new) (types<2>::forest *p4est,
+                                     types<2>::balance_type btype);
+
+      static
+      void (&ghost_destroy) (types<2>::ghost *ghost);
+
+      static
+      void (&reset_data) (types<2>::forest *p4est,
+                          size_t data_size,
+                          p4est_init_t init_fn,
+                          void *user_pointer);
+
+      static
+      size_t (&forest_memory_used) (types<2>::forest *p4est);
+
+      static
+      size_t (&connectivity_memory_used) (types<2>::connectivity *p4est);
+
+      static const unsigned max_level;
+    };
+
+    int (&functions<2>::quadrant_compare) (const void *v1, const void *v2)
+      = p4est_quadrant_compare;
+
+    void (&functions<2>::quadrant_childrenv) (const types<2>::quadrant *q,
+                                              types<2>::quadrant c[])
+      = p4est_quadrant_childrenv;
+
+    int (&functions<2>::quadrant_overlaps_tree) (types<2>::tree *tree,
+                                                 const types<2>::quadrant *q)
+      = p4est_quadrant_overlaps_tree;
+
+    void (&functions<2>::quadrant_set_morton) (types<2>::quadrant *quadrant,
+                                               int level,
+                                               uint64_t id)
+      = p4est_quadrant_set_morton;
+
+    int (&functions<2>::quadrant_is_equal) (const types<2>::quadrant *q1,
+                                            const types<2>::quadrant *q2)
+      = p4est_quadrant_is_equal;
+
+    int (&functions<2>::quadrant_is_sibling) (const types<2>::quadrant *q1,
+                                              const types<2>::quadrant *q2)
+      = p4est_quadrant_is_sibling;
+
+    int (&functions<2>::quadrant_is_ancestor) (const types<2>::quadrant *q1,
+                                               const types<2>::quadrant *q2)
+      = p4est_quadrant_is_ancestor;
+
+    int (&functions<2>::quadrant_ancestor_id) (const types<2>::quadrant *q,
+                                               int level)
+      = p4est_quadrant_ancestor_id;
+
+    int (&functions<2>::comm_find_owner) (types<2>::forest *p4est,
+                                          const types<2>::locidx which_tree,
+                                          const types<2>::quadrant *q,
+                                          const int guess)
+      = p4est_comm_find_owner;
+
+    types<2>::connectivity *(&functions<2>::connectivity_new) (types<2>::topidx num_vertices,
+                                                               types<2>::topidx num_trees,
+                                                               types<2>::topidx num_corners,
+                                                               types<2>::topidx num_vtt)
+      = p4est_connectivity_new;
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,1)
+    void (&functions<2>::connectivity_join_faces) (types<2>::connectivity *conn,
+                                                   types<2>::topidx tree_left,
+                                                   types<2>::topidx tree_right,
+                                                   int face_left,
+                                                   int face_right,
+                                                   int orientation)
+      = p4est_connectivity_join_faces;
+#endif
+
+    void (&functions<2>::connectivity_destroy) (p4est_connectivity_t *connectivity)
+      = p4est_connectivity_destroy;
+
+    types<2>::forest *(&functions<2>::new_forest) (MPI_Comm mpicomm,
+                                                   types<2>::connectivity *connectivity,
+                                                   types<2>::locidx min_quadrants,
+                                                   int min_level,
+                                                   int fill_uniform,
+                                                   size_t data_size,
+                                                   p4est_init_t init_fn,
+                                                   void *user_pointer)
+      = p4est_new_ext;
+
+    void (&functions<2>::destroy) (types<2>::forest *p4est)
+      = p4est_destroy;
+
+    void (&functions<2>::refine) (types<2>::forest *p4est,
+                                  int refine_recursive,
+                                  p4est_refine_t refine_fn,
+                                  p4est_init_t init_fn)
+      = p4est_refine;
+
+    void (&functions<2>::coarsen) (types<2>::forest *p4est,
+                                   int coarsen_recursive,
+                                   p4est_coarsen_t coarsen_fn,
+                                   p4est_init_t init_fn)
+      = p4est_coarsen;
+
+    void (&functions<2>::balance) (types<2>::forest *p4est,
+                                   types<2>::balance_type btype,
+                                   p4est_init_t init_fn)
+      = p4est_balance;
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    p4est_gloidx_t (&functions<2>::partition) (types<2>::forest *p4est,
+                                               int partition_for_coarsening,
+                                               p4est_weight_t weight_fn)
+      = p4est_partition_ext;
+#else
+    void (&functions<2>::partition) (types<2>::forest *p4est,
+                                     int partition_for_coarsening,
+                                     p4est_weight_t weight_fn)
+      = p4est_partition_ext;
+#endif
+
+    void (&functions<2>::save) (const char *filename,
+                                types<2>::forest *p4est,
+                                int save_data)
+      = p4est_save;
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    types<2>::forest *
+    (&functions<2>::load_ext) (const char *filename,
+                               MPI_Comm mpicomm,
+                               std::size_t data_size,
+                               int load_data,
+                               int autopartition,
+                               int broadcasthead,
+                               void *user_pointer,
+                               types<2>::connectivity **p4est)
+      = p4est_load_ext;
+#else
+    types<2>::forest *
+    (&functions<2>::load) (const char *filename,
+                           MPI_Comm mpicomm,
+                           std::size_t data_size,
+                           int load_data,
+                           void *user_pointer,
+                           types<2>::connectivity **p4est)
+      = p4est_load;
+#endif
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    int (&functions<2>::connectivity_save) (const char *filename,
+                                            types<2>::connectivity *connectivity)
+      = p4est_connectivity_save;
+#else
+    void (&functions<2>::connectivity_save) (const char *filename,
+                                             types<2>::connectivity *connectivity)
+      = p4est_connectivity_save;
+#endif
+
+    int (&functions<2>::connectivity_is_valid) (types<2>::connectivity
+                                                *connectivity)
+      = p4est_connectivity_is_valid;
+
+#if DEAL_II_P4EST_VERSION_GTE(1,0,0,0)
+    types<2>::connectivity *
+    (&functions<2>::connectivity_load) (const char *filename,
+                                        size_t *length)
+      = p4est_connectivity_load;
+#elif DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    types<2>::connectivity *
+    (&functions<2>::connectivity_load) (const char *filename,
+                                        long unsigned *length)
+      = p4est_connectivity_load;
+#else
+    types<2>::connectivity *
+    (&functions<2>::connectivity_load) (const char *filename,
+                                        long *length)
+      = p4est_connectivity_load;
+#endif
+
+    unsigned int (&functions<2>::checksum) (types<2>::forest *p4est)
+      = p4est_checksum;
+
+    void (&functions<2>::vtk_write_file) (types<2>::forest *p4est,
+                                          p4est_geometry_t *,
+                                          const char *baseName)
+      = p4est_vtk_write_file;
+
+    types<2>::ghost *(&functions<2>::ghost_new) (types<2>::forest *p4est,
+                                                 types<2>::balance_type btype)
+      = p4est_ghost_new;
+
+    void (&functions<2>::ghost_destroy) (types<2>::ghost *ghost)
+      = p4est_ghost_destroy;
+
+    void (&functions<2>::reset_data) (types<2>::forest *p4est,
+                                      size_t data_size,
+                                      p4est_init_t init_fn,
+                                      void *user_pointer)
+      = p4est_reset_data;
+
+    size_t (&functions<2>::forest_memory_used) (types<2>::forest *p4est)
+      = p4est_memory_used;
+
+    size_t (&functions<2>::connectivity_memory_used) (types<2>::connectivity *p4est)
+      = p4est_connectivity_memory_used;
+
+    const unsigned int functions<2>::max_level = P4EST_MAXLEVEL;
+
+    template <> struct functions<3>
+    {
+      static
+      int (&quadrant_compare) (const void *v1, const void *v2);
+
+      static
+      void (&quadrant_childrenv) (const types<3>::quadrant *q,
+                                  types<3>::quadrant c[]);
+
+      static
+      int (&quadrant_overlaps_tree) (types<3>::tree *tree,
+                                     const types<3>::quadrant *q);
+
+      static
+      void (&quadrant_set_morton) (types<3>::quadrant *quadrant,
+                                   int level,
+                                   uint64_t id);
+
+      static
+      int (&quadrant_is_equal) (const types<3>::quadrant *q1,
+                                const types<3>::quadrant *q2);
+
+      static
+      int (&quadrant_is_sibling) (const types<3>::quadrant *q1,
+                                  const types<3>::quadrant *q2);
+
+      static
+      int (&quadrant_is_ancestor) (const types<3>::quadrant *q1,
+                                   const types<3>::quadrant *q2);
+
+      static
+      int (&quadrant_ancestor_id) (const types<3>::quadrant *q,
+                                   int level);
+
+      static
+      int (&comm_find_owner) (types<3>::forest *p4est,
+                              const types<3>::locidx which_tree,
+                              const types<3>::quadrant *q,
+                              const int guess);
+
+      static
+      types<3>::connectivity *(&connectivity_new) (types<3>::topidx num_vertices,
+                                                   types<3>::topidx num_trees,
+                                                   types<3>::topidx num_edges,
+                                                   types<3>::topidx num_ett,
+                                                   types<3>::topidx num_corners,
+                                                   types<3>::topidx num_ctt);
+
+      static
+      void (&connectivity_join_faces) (types<3>::connectivity *conn,
+                                       types<3>::topidx tree_left,
+                                       types<3>::topidx tree_right,
+                                       int face_left,
+                                       int face_right,
+                                       int orientation);
+
+      static
+      void (&connectivity_destroy) (p8est_connectivity_t *connectivity);
+
+      static
+      types<3>::forest *(&new_forest) (MPI_Comm mpicomm,
+                                       types<3>::connectivity *connectivity,
+                                       types<3>::locidx min_quadrants,
+                                       int min_level,
+                                       int fill_uniform,
+                                       size_t data_size,
+                                       p8est_init_t init_fn,
+                                       void *user_pointer);
+
+      static
+      void (&destroy) (types<3>::forest *p8est);
+
+      static
+      void (&refine) (types<3>::forest *p8est,
+                      int refine_recursive,
+                      p8est_refine_t refine_fn,
+                      p8est_init_t init_fn);
+
+      static
+      void (&coarsen) (types<3>::forest *p8est,
+                       int coarsen_recursive,
+                       p8est_coarsen_t coarsen_fn,
+                       p8est_init_t init_fn);
+
+      static
+      void (&balance) (types<3>::forest *p8est,
+                       types<3>::balance_type btype,
+                       p8est_init_t init_fn);
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      p4est_gloidx_t (&partition) (types<3>::forest *p8est,
+                                   int partition_for_coarsening,
+                                   p8est_weight_t weight_fn);
+#else
+      static
+      void (&partition) (types<3>::forest *p8est,
+                         int partition_for_coarsening,
+                         p8est_weight_t weight_fn);
+#endif
+
+      static
+      void (&save) (const char *filename,
+                    types<3>::forest *p4est,
+                    int save_data);
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      types<3>::forest *(&load_ext) (const char *filename,
+                                     MPI_Comm mpicomm,
+                                     std::size_t data_size,
+                                     int load_data,
+                                     int autopartition,
+                                     int broadcasthead,
+                                     void *user_pointer,
+                                     types<3>::connectivity **p4est);
+#else
+      static
+      types<3>::forest *(&load) (const char *filename,
+                                 MPI_Comm mpicomm,
+                                 std::size_t data_size,
+                                 int load_data,
+                                 void *user_pointer,
+                                 types<3>::connectivity **p4est);
+#endif
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      int (&connectivity_save) (const char *filename,
+                                types<3>::connectivity *connectivity);
+#else
+      static
+      void (&connectivity_save) (const char *filename,
+                                 types<3>::connectivity *connectivity);
+#endif
+
+      static
+      int (&connectivity_is_valid) (types<3>::connectivity *connectivity);
+
+#if DEAL_II_P4EST_VERSION_GTE(1,0,0,0)
+      static
+      types<3>::connectivity *(&connectivity_load) (const char *filename,
+                                                    size_t *length);
+#elif DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      static
+      types<3>::connectivity *(&connectivity_load) (const char *filename,
+                                                    long unsigned *length);
+#else
+      static
+      types<3>::connectivity *(&connectivity_load) (const char *filename,
+                                                    long *length);
+#endif
+
+      static
+      unsigned int (&checksum) (types<3>::forest *p8est);
+
+      static
+      void (&vtk_write_file) (types<3>::forest *p8est,
+                              p8est_geometry_t *,
+                              const char *baseName);
+
+      static
+      types<3>::ghost *(&ghost_new) (types<3>::forest *p4est,
+                                     types<3>::balance_type btype);
+
+      static
+      void (&ghost_destroy) (types<3>::ghost *ghost);
+
+      static
+      void (&reset_data) (types<3>::forest *p4est,
+                          size_t data_size,
+                          p8est_init_t init_fn,
+                          void *user_pointer);
+
+      static
+      size_t (&forest_memory_used) (types<3>::forest *p4est);
+
+      static
+      size_t (&connectivity_memory_used) (types<3>::connectivity *p4est);
+
+      static const unsigned max_level;
+    };
+
+
+    int (&functions<3>::quadrant_compare) (const void *v1, const void *v2)
+      = p8est_quadrant_compare;
+
+    void (&functions<3>::quadrant_childrenv) (const types<3>::quadrant *q,
+                                              types<3>::quadrant c[])
+      = p8est_quadrant_childrenv;
+
+    int (&functions<3>::quadrant_overlaps_tree) (types<3>::tree *tree,
+                                                 const types<3>::quadrant *q)
+      = p8est_quadrant_overlaps_tree;
+
+    void (&functions<3>::quadrant_set_morton) (types<3>::quadrant *quadrant,
+                                               int level,
+                                               uint64_t id)
+      = p8est_quadrant_set_morton;
+
+    int (&functions<3>::quadrant_is_equal) (const types<3>::quadrant *q1,
+                                            const types<3>::quadrant *q2)
+      = p8est_quadrant_is_equal;
+
+    int (&functions<3>::quadrant_is_sibling) (const types<3>::quadrant *q1,
+                                              const types<3>::quadrant *q2)
+      = p8est_quadrant_is_sibling;
+
+    int (&functions<3>::quadrant_is_ancestor) (const types<3>::quadrant *q1,
+                                               const types<3>::quadrant *q2)
+      = p8est_quadrant_is_ancestor;
+
+    int (&functions<3>::quadrant_ancestor_id) (const types<3>::quadrant *q,
+                                               int level)
+      = p8est_quadrant_ancestor_id;
+
+    int (&functions<3>::comm_find_owner) (types<3>::forest *p4est,
+                                          const types<3>::locidx which_tree,
+                                          const types<3>::quadrant *q,
+                                          const int guess)
+      = p8est_comm_find_owner;
+
+    types<3>::connectivity *(&functions<3>::connectivity_new) (types<3>::topidx num_vertices,
+                                                               types<3>::topidx num_trees,
+                                                               types<3>::topidx num_edges,
+                                                               types<3>::topidx num_ett,
+                                                               types<3>::topidx num_corners,
+                                                               types<3>::topidx num_ctt)
+      = p8est_connectivity_new;
+
+    void (&functions<3>::connectivity_destroy) (p8est_connectivity_t *connectivity)
+      = p8est_connectivity_destroy;
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,1)
+    void (&functions<3>::connectivity_join_faces) (types<3>::connectivity *conn,
+                                                   types<3>::topidx tree_left,
+                                                   types<3>::topidx tree_right,
+                                                   int face_left,
+                                                   int face_right,
+                                                   int orientation)
+      = p8est_connectivity_join_faces;
+#endif
+
+    types<3>::forest *(&functions<3>::new_forest) (MPI_Comm mpicomm,
+                                                   types<3>::connectivity *connectivity,
+                                                   types<3>::locidx min_quadrants,
+                                                   int min_level,
+                                                   int fill_uniform,
+                                                   size_t data_size,
+                                                   p8est_init_t init_fn,
+                                                   void *user_pointer)
+      = p8est_new_ext;
+
+    void (&functions<3>::destroy) (types<3>::forest *p8est)
+      = p8est_destroy;
+
+    void (&functions<3>::refine) (types<3>::forest *p8est,
+                                  int refine_recursive,
+                                  p8est_refine_t refine_fn,
+                                  p8est_init_t init_fn)
+      = p8est_refine;
+
+    void (&functions<3>::coarsen) (types<3>::forest *p8est,
+                                   int coarsen_recursive,
+                                   p8est_coarsen_t coarsen_fn,
+                                   p8est_init_t init_fn)
+      = p8est_coarsen;
+
+    void (&functions<3>::balance) (types<3>::forest *p8est,
+                                   types<3>::balance_type btype,
+                                   p8est_init_t init_fn)
+      = p8est_balance;
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    p4est_gloidx_t (&functions<3>::partition) (types<3>::forest *p8est,
+                                               int partition_for_coarsening,
+                                               p8est_weight_t weight_fn)
+      = p8est_partition_ext;
+#else
+    void (&functions<3>::partition) (types<3>::forest *p8est,
+                                     int partition_for_coarsening,
+                                     p8est_weight_t weight_fn)
+      = p8est_partition_ext;
+#endif
+
+    void (&functions<3>::save) (const char *filename,
+                                types<3>::forest *p4est,
+                                int save_data)
+      = p8est_save;
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    types<3>::forest *
+    (&functions<3>::load_ext) (const char *filename,
+                               MPI_Comm mpicomm,
+                               std::size_t data_size,
+                               int load_data,
+                               int autopartition,
+                               int broadcasthead,
+                               void *user_pointer,
+                               types<3>::connectivity **p4est)
+      = p8est_load_ext;
+#else
+    types<3>::forest *
+    (&functions<3>::load) (const char *filename,
+                           MPI_Comm mpicomm,
+                           std::size_t data_size,
+                           int load_data,
+                           void *user_pointer,
+                           types<3>::connectivity **p4est)
+      = p8est_load;
+#endif
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    int (&functions<3>::connectivity_save) (const char *filename,
+                                            types<3>::connectivity *connectivity)
+      = p8est_connectivity_save;
+#else
+    void (&functions<3>::connectivity_save) (const char *filename,
+                                             types<3>::connectivity *connectivity)
+      = p8est_connectivity_save;
+#endif
+
+    int (&functions<3>::connectivity_is_valid) (types<3>::connectivity
+                                                *connectivity)
+      = p8est_connectivity_is_valid;
+
+#if DEAL_II_P4EST_VERSION_GTE(1,0,0,0)
+    types<3>::connectivity *
+    (&functions<3>::connectivity_load) (const char *filename,
+                                        size_t *length)
+      = p8est_connectivity_load;
+#elif DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+    types<3>::connectivity *
+    (&functions<3>::connectivity_load) (const char *filename,
+                                        long unsigned *length)
+      = p8est_connectivity_load;
+#else
+    types<3>::connectivity *
+    (&functions<3>::connectivity_load) (const char *filename,
+                                        long *length)
+      = p8est_connectivity_load;
+#endif
+
+    unsigned int (&functions<3>::checksum) (types<3>::forest *p8est)
+      = p8est_checksum;
+
+    void (&functions<3>::vtk_write_file) (types<3>::forest *p8est,
+                                          p8est_geometry_t *,
+                                          const char *baseName)
+      = p8est_vtk_write_file;
+
+    types<3>::ghost *(&functions<3>::ghost_new) (types<3>::forest *p4est,
+                                                 types<3>::balance_type btype)
+      = p8est_ghost_new;
+
+    void (&functions<3>::ghost_destroy) (types<3>::ghost *ghost)
+      = p8est_ghost_destroy;
+
+    void (&functions<3>::reset_data) (types<3>::forest *p4est,
+                                      size_t data_size,
+                                      p8est_init_t init_fn,
+                                      void *user_pointer)
+      = p8est_reset_data;
+
+    size_t (&functions<3>::forest_memory_used) (types<3>::forest *p4est)
+      = p8est_memory_used;
+
+    size_t (&functions<3>::connectivity_memory_used) (types<3>::connectivity *p4est)
+      = p8est_connectivity_memory_used;
+
+    const unsigned int functions<3>::max_level = P8EST_MAXLEVEL;
+
+
+    template <int dim>
+    void
+    init_quadrant_children
+    (const typename types<dim>::quadrant &p4est_cell,
+     typename types<dim>::quadrant (&p4est_children)[GeometryInfo<dim>::max_children_per_cell])
+    {
+
+      for (unsigned int c=0;
+           c<GeometryInfo<dim>::max_children_per_cell; ++c)
+        switch (dim)
+          {
+          case 2:
+            P4EST_QUADRANT_INIT(&p4est_children[c]);
+            break;
+          case 3:
+            P8EST_QUADRANT_INIT(&p4est_children[c]);
+            break;
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+
+
+      functions<dim>::quadrant_childrenv (&p4est_cell,
+                                          p4est_children);
+
+    }
+
+
+    template <int dim>
+    void
+    init_coarse_quadrant(typename types<dim>::quadrant &quad)
+    {
+      switch (dim)
+        {
+        case 2:
+          P4EST_QUADRANT_INIT(&quad);
+          break;
+        case 3:
+          P8EST_QUADRANT_INIT(&quad);
+          break;
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+      functions<dim>::quadrant_set_morton (&quad,
+                                           /*level=*/0,
+                                           /*index=*/0);
+    }
+
+
+    template <int dim>
+    bool
+    quadrant_is_equal (const typename types<dim>::quadrant &q1,
+                       const typename types<dim>::quadrant &q2)
+    {
+      return functions<dim>::quadrant_is_equal(&q1, &q2);
+    }
+
+
+
+    template <int dim>
+    bool
+    quadrant_is_ancestor (const typename types<dim>::quadrant &q1,
+                          const typename types<dim>::quadrant &q2)
+    {
+      return functions<dim>::quadrant_is_ancestor(&q1, &q2);
+    }
+
+    /**
+     * This struct templatizes the p4est iterate structs and function
+     * prototypes, which are used to execute callback functions for faces,
+     * edges, and corners that require local neighborhood information, i.e.
+     * the neighboring cells */
+    template <int dim> struct iter;
+
+    template <> struct iter<2>
+    {
+      typedef p4est_iter_corner_info_t corner_info;
+      typedef p4est_iter_corner_side_t corner_side;
+      typedef p4est_iter_corner_t      corner_iter;
+      typedef p4est_iter_face_info_t face_info;
+      typedef p4est_iter_face_side_t face_side;
+      typedef p4est_iter_face_t      face_iter;
+    };
+
+    template <> struct iter<3>
+    {
+      typedef p8est_iter_corner_info_t corner_info;
+      typedef p8est_iter_corner_side_t corner_side;
+      typedef p8est_iter_corner_t      corner_iter;
+      typedef p8est_iter_edge_info_t edge_info;
+      typedef p8est_iter_edge_side_t edge_side;
+      typedef p8est_iter_edge_t      edge_iter;
+      typedef p8est_iter_face_info_t face_info;
+      typedef p8est_iter_face_side_t face_side;
+      typedef p8est_iter_face_t      face_iter;
+    };
+
+  }
+}
+
+
+namespace
+{
+  template <int dim, int spacedim>
+  void
+  get_vertex_to_cell_mappings (const Triangulation<dim,spacedim> &triangulation,
+                               std::vector<unsigned int> &vertex_touch_count,
+                               std::vector<std::list<
+                               std::pair<typename Triangulation<dim,spacedim>::active_cell_iterator,unsigned int> > >
+                               &vertex_to_cell)
+  {
+    vertex_touch_count.resize (triangulation.n_vertices());
+    vertex_to_cell.resize (triangulation.n_vertices());
+
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        {
+          ++vertex_touch_count[cell->vertex_index(v)];
+          vertex_to_cell[cell->vertex_index(v)]
+          .push_back (std::make_pair (cell, v));
+        }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  get_edge_to_cell_mappings (const Triangulation<dim,spacedim> &triangulation,
+                             std::vector<unsigned int> &edge_touch_count,
+                             std::vector<std::list<
+                             std::pair<typename Triangulation<dim,spacedim>::active_cell_iterator,unsigned int> > >
+                             &edge_to_cell)
+  {
+    Assert (triangulation.n_levels() == 1, ExcInternalError());
+
+    edge_touch_count.resize (triangulation.n_active_lines());
+    edge_to_cell.resize (triangulation.n_active_lines());
+
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+        {
+          ++edge_touch_count[cell->line(l)->index()];
+          edge_to_cell[cell->line(l)->index()]
+          .push_back (std::make_pair (cell, l));
+        }
+  }
+
+
+
+  /**
+   * Set all vertex and cell related information in the p4est connectivity
+   * structure.
+   */
+  template <int dim, int spacedim>
+  void
+  set_vertex_and_cell_info (const Triangulation<dim,spacedim> &triangulation,
+                            const std::vector<unsigned int> &vertex_touch_count,
+                            const std::vector<std::list<
+                            std::pair<typename Triangulation<dim,spacedim>::active_cell_iterator,unsigned int> > >
+                            &vertex_to_cell,
+                            const std::vector<types::global_dof_index> &coarse_cell_to_p4est_tree_permutation,
+                            const bool set_vertex_info,
+                            typename internal::p4est::types<dim>::connectivity *connectivity)
+  {
+    // copy the vertices into the connectivity structure. the triangulation
+    // exports the array of vertices, but some of the entries are sometimes
+    // unused; this shouldn't be the case for a newly created triangulation,
+    // but make sure
+    //
+    // note that p4est stores coordinates as a triplet of values even in 2d
+    Assert (triangulation.get_used_vertices().size() ==
+            triangulation.get_vertices().size(),
+            ExcInternalError());
+    Assert (std::find (triangulation.get_used_vertices().begin(),
+                       triangulation.get_used_vertices().end(),
+                       false)
+            == triangulation.get_used_vertices().end(),
+            ExcInternalError());
+    if (set_vertex_info == true)
+      for (unsigned int v=0; v<triangulation.n_vertices(); ++v)
+        {
+          connectivity->vertices[3*v  ] = triangulation.get_vertices()[v][0];
+          connectivity->vertices[3*v+1] = triangulation.get_vertices()[v][1];
+          connectivity->vertices[3*v+2] = (spacedim == 2 ?
+                                           0
+                                           :
+                                           triangulation.get_vertices()[v][2]);
+        }
+
+    // next store the tree_to_vertex indices (each tree is here only a single
+    // cell in the coarse mesh). p4est requires vertex numbering in clockwise
+    // orientation
+    //
+    // while we're at it, also copy the neighborship information between cells
+    typename Triangulation<dim, spacedim>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+    for (; cell != endc; ++cell)
+      {
+        const unsigned int
+        index = coarse_cell_to_p4est_tree_permutation[cell->index()];
+
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+          {
+            if (set_vertex_info == true)
+              connectivity->tree_to_vertex[index*GeometryInfo<dim>::vertices_per_cell+v] = cell->vertex_index(v);
+            connectivity->tree_to_corner[index*GeometryInfo<dim>::vertices_per_cell+v] = cell->vertex_index(v);
+          }
+
+        // neighborship information. if a cell is at a boundary, then enter
+        // the index of the cell itself here
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->face(f)->at_boundary() == false)
+            connectivity->tree_to_tree[index*GeometryInfo<dim>::faces_per_cell + f]
+              = coarse_cell_to_p4est_tree_permutation[cell->neighbor(f)->index()];
+          else
+            connectivity->tree_to_tree[index*GeometryInfo<dim>::faces_per_cell + f]
+              = coarse_cell_to_p4est_tree_permutation[cell->index()];
+
+        // fill tree_to_face, which is essentially neighbor_to_neighbor;
+        // however, we have to remap the resulting face number as well
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->face(f)->at_boundary() == false)
+            {
+              switch (dim)
+                {
+                case 2:
+                {
+                  connectivity->tree_to_face[index*GeometryInfo<dim>::faces_per_cell + f]
+                    = cell->neighbor_of_neighbor (f);
+                  break;
+                }
+
+                case 3:
+                {
+                  /*
+                   * The values for tree_to_face are in 0..23 where ttf % 6
+                   * gives the face number and ttf / 4 the face orientation
+                   * code.  The orientation is determined as follows.  Let
+                   * my_face and other_face be the two face numbers of the
+                   * connecting trees in 0..5.  Then the first face vertex of
+                   * the lower of my_face and other_face connects to a face
+                   * vertex numbered 0..3 in the higher of my_face and
+                   * other_face.  The face orientation is defined as this
+                   * number.  If my_face == other_face, treating either of
+                   * both faces as the lower one leads to the same result.
+                   */
+
+                  connectivity->tree_to_face[index*6 + f]
+                    = cell->neighbor_of_neighbor (f);
+
+                  unsigned int face_idx_list[2] =
+                  {f, cell->neighbor_of_neighbor (f)};
+                  typename Triangulation<dim>::active_cell_iterator
+                  cell_list[2] = {cell, cell->neighbor(f)};
+                  unsigned int smaller_idx = 0;
+
+                  if (f>cell->neighbor_of_neighbor (f))
+                    smaller_idx = 1;
+
+                  unsigned int larger_idx = (smaller_idx+1) % 2;
+                  //smaller = *_list[smaller_idx]
+                  //larger = *_list[larger_idx]
+
+                  unsigned int v = 0;
+
+                  // global vertex index of vertex 0 on face of cell with
+                  // smaller local face index
+                  unsigned int g_idx =
+                    cell_list[smaller_idx]->vertex_index(
+                      GeometryInfo<dim>::face_to_cell_vertices(
+                        face_idx_list[smaller_idx],
+                        0,
+                        cell_list[smaller_idx]->face_orientation(face_idx_list[smaller_idx]),
+                        cell_list[smaller_idx]->face_flip(face_idx_list[smaller_idx]),
+                        cell_list[smaller_idx]->face_rotation(face_idx_list[smaller_idx]))
+                    );
+
+                  // loop over vertices on face from other cell and compare
+                  // global vertex numbers
+                  for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_face; ++i)
+                    {
+                      unsigned int idx
+                        =
+                          cell_list[larger_idx]->vertex_index(
+                            GeometryInfo<dim>::face_to_cell_vertices(
+                              face_idx_list[larger_idx],
+                              i)
+                          );
+
+                      if (idx==g_idx)
+                        {
+                          v = i;
+                          break;
+                        }
+                    }
+
+                  connectivity->tree_to_face[index*6 + f] += 6*v;
+                  break;
+                }
+
+                default:
+                  Assert (false, ExcNotImplemented());
+                }
+            }
+          else
+            connectivity->tree_to_face[index*GeometryInfo<dim>::faces_per_cell + f] = f;
+      }
+
+    // now fill the vertex information
+    connectivity->ctt_offset[0] = 0;
+    std::partial_sum (vertex_touch_count.begin(),
+                      vertex_touch_count.end(),
+                      &connectivity->ctt_offset[1]);
+
+    const typename internal::p4est::types<dim>::locidx
+    num_vtt = std::accumulate (vertex_touch_count.begin(),
+                               vertex_touch_count.end(),
+                               0);
+    (void)num_vtt;
+    Assert (connectivity->ctt_offset[triangulation.n_vertices()] ==
+            num_vtt,
+            ExcInternalError());
+
+    for (unsigned int v=0; v<triangulation.n_vertices(); ++v)
+      {
+        Assert (vertex_to_cell[v].size() == vertex_touch_count[v],
+                ExcInternalError());
+
+        typename std::list<std::pair
+        <typename Triangulation<dim,spacedim>::active_cell_iterator,
+        unsigned int> >::const_iterator
+        p = vertex_to_cell[v].begin();
+        for (unsigned int c=0; c<vertex_touch_count[v]; ++c, ++p)
+          {
+            connectivity->corner_to_tree[connectivity->ctt_offset[v]+c]
+              = coarse_cell_to_p4est_tree_permutation[p->first->index()];
+            connectivity->corner_to_corner[connectivity->ctt_offset[v]+c]
+              = p->second;
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  bool
+  tree_exists_locally (const typename internal::p4est::types<dim>::forest *parallel_forest,
+                       const typename internal::p4est::types<dim>::topidx coarse_grid_cell)
+  {
+    Assert (coarse_grid_cell < parallel_forest->connectivity->num_trees,
+            ExcInternalError());
+    return ((coarse_grid_cell >= parallel_forest->first_local_tree)
+            &&
+            (coarse_grid_cell <= parallel_forest->last_local_tree));
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  delete_all_children_and_self (const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+  {
+    if (cell->has_children())
+      for (unsigned int c=0; c<cell->n_children(); ++c)
+        delete_all_children_and_self<dim,spacedim> (cell->child(c));
+    else
+      cell->set_coarsen_flag ();
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  delete_all_children (const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+  {
+    if (cell->has_children())
+      for (unsigned int c=0; c<cell->n_children(); ++c)
+        delete_all_children_and_self<dim,spacedim> (cell->child(c));
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  determine_level_subdomain_id_recursively (const typename internal::p4est::types<dim>::tree     &tree,
+                                            const typename internal::p4est::types<dim>::locidx &tree_index,
+                                            const typename Triangulation<dim,spacedim>::cell_iterator     &dealii_cell,
+                                            const typename internal::p4est::types<dim>::quadrant &p4est_cell,
+                                            typename internal::p4est::types<dim>::forest   &forest,
+                                            const types::subdomain_id                           my_subdomain,
+                                            const std::vector<std::vector<bool> > &marked_vertices)
+  {
+    if (dealii_cell->level_subdomain_id()==numbers::artificial_subdomain_id)
+      {
+        //important: only assign the level_subdomain_id if it is a ghost cell
+        // even though we could fill in all.
+        bool used = false;
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+          {
+            if (marked_vertices[dealii_cell->level()][dealii_cell->vertex_index(v)])
+              {
+                used = true;
+                break;
+              }
+          }
+
+        if (used)
+          {
+            int owner = internal::p4est::functions<dim>::comm_find_owner (&forest,
+                        tree_index,
+                        &p4est_cell,
+                        my_subdomain);
+            Assert((owner!=-2) && (owner!=-1), ExcMessage("p4est should know the owner."));
+            dealii_cell->set_level_subdomain_id(owner);
+          }
+
+      }
+
+    if (dealii_cell->has_children ())
+      {
+        typename internal::p4est::types<dim>::quadrant
+        p4est_child[GeometryInfo<dim>::max_children_per_cell];
+        for (unsigned int c=0;
+             c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          switch (dim)
+            {
+            case 2:
+              P4EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            case 3:
+              P8EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+
+
+        internal::p4est::functions<dim>::
+        quadrant_childrenv (&p4est_cell,
+                            p4est_child);
+
+        for (unsigned int c=0;
+             c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          {
+            determine_level_subdomain_id_recursively <dim,spacedim> (tree,tree_index,
+                                                                     dealii_cell->child(c),
+                                                                     p4est_child[c],
+                                                                     forest,
+                                                                     my_subdomain,
+                                                                     marked_vertices);
+          }
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  match_tree_recursively (const typename internal::p4est::types<dim>::tree     &tree,
+                          const typename Triangulation<dim,spacedim>::cell_iterator     &dealii_cell,
+                          const typename internal::p4est::types<dim>::quadrant &p4est_cell,
+                          const typename internal::p4est::types<dim>::forest   &forest,
+                          const types::subdomain_id                           my_subdomain)
+  {
+    // check if this cell exists in the local p4est cell
+    if (sc_array_bsearch(const_cast<sc_array_t *>(&tree.quadrants),
+                         &p4est_cell,
+                         internal::p4est::functions<dim>::quadrant_compare)
+        != -1)
+      {
+        // yes, cell found in local part of p4est
+        delete_all_children<dim,spacedim> (dealii_cell);
+        if (!dealii_cell->has_children())
+          dealii_cell->set_subdomain_id(my_subdomain);
+      }
+    else
+      {
+        // no, cell not found in local part of p4est. this means that the
+        // local part is more refined than the current cell. if this cell has
+        // no children of its own, we need to refine it, and if it does
+        // already have children then loop over all children and see if they
+        // are locally available as well
+        if (dealii_cell->has_children () == false)
+          dealii_cell->set_refine_flag ();
+        else
+          {
+            typename internal::p4est::types<dim>::quadrant
+            p4est_child[GeometryInfo<dim>::max_children_per_cell];
+            for (unsigned int c=0;
+                 c<GeometryInfo<dim>::max_children_per_cell; ++c)
+              switch (dim)
+                {
+                case 2:
+                  P4EST_QUADRANT_INIT(&p4est_child[c]);
+                  break;
+                case 3:
+                  P8EST_QUADRANT_INIT(&p4est_child[c]);
+                  break;
+                default:
+                  Assert (false, ExcNotImplemented());
+                }
+
+
+            internal::p4est::functions<dim>::
+            quadrant_childrenv (&p4est_cell,
+                                p4est_child);
+
+            for (unsigned int c=0;
+                 c<GeometryInfo<dim>::max_children_per_cell; ++c)
+              if (internal::p4est::functions<dim>::
+                  quadrant_overlaps_tree (const_cast<typename internal::p4est::types<dim>::tree *>(&tree),
+                                          &p4est_child[c])
+                  == false)
+                {
+                  // no, this child is locally not available in the p4est.
+                  // delete all its children but, because this may not be
+                  // successfull, make sure to mark all children recursively
+                  // as not local.
+                  delete_all_children<dim,spacedim> (dealii_cell->child(c));
+                  dealii_cell->child(c)
+                  ->recursively_set_subdomain_id(numbers::artificial_subdomain_id);
+                }
+              else
+                {
+                  // at least some part of the tree rooted in this child is
+                  // locally available
+                  match_tree_recursively<dim,spacedim> (tree,
+                                                        dealii_cell->child(c),
+                                                        p4est_child[c],
+                                                        forest,
+                                                        my_subdomain);
+                }
+          }
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  match_quadrant (const dealii::Triangulation<dim,spacedim> *tria,
+                  unsigned int dealii_index,
+                  typename internal::p4est::types<dim>::quadrant &ghost_quadrant,
+                  unsigned int ghost_owner)
+  {
+    int i, child_id;
+    int l = ghost_quadrant.level;
+
+    for (i = 0; i < l; i++)
+      {
+        typename Triangulation<dim,spacedim>::cell_iterator cell (tria, i, dealii_index);
+        if (cell->has_children () == false)
+          {
+            cell->clear_coarsen_flag();
+            cell->set_refine_flag ();
+            return;
+          }
+
+        child_id = internal::p4est::functions<dim>::quadrant_ancestor_id (&ghost_quadrant, i + 1);
+        dealii_index = cell->child_index(child_id);
+      }
+
+    typename Triangulation<dim,spacedim>::cell_iterator cell (tria, l, dealii_index);
+    if (cell->has_children())
+      delete_all_children<dim,spacedim> (cell);
+    else
+      {
+        cell->clear_coarsen_flag();
+        cell->set_subdomain_id(ghost_owner);
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  attach_mesh_data_recursively (const typename internal::p4est::types<dim>::tree &tree,
+                                const typename Triangulation<dim,spacedim>::cell_iterator &dealii_cell,
+                                const typename internal::p4est::types<dim>::quadrant &p4est_cell,
+                                const typename std::list<std::pair<unsigned int, typename std_cxx11::function<
+                                void(typename parallel::distributed::Triangulation<dim,spacedim>::cell_iterator,
+                                     typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus,
+                                     void *)
+                                > > > &attached_data_pack_callbacks)
+  {
+    typedef std::list<std::pair<unsigned int, typename std_cxx11::function<
+    void(typename parallel::distributed::Triangulation<dim,spacedim>::cell_iterator,
+         typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus,
+         void *)
+    > > > callback_list_t;
+
+    int idx = sc_array_bsearch(const_cast<sc_array_t *>(&tree.quadrants),
+                               &p4est_cell,
+                               internal::p4est::functions<dim>::quadrant_compare);
+
+    if (idx == -1 && (internal::p4est::functions<dim>::
+                      quadrant_overlaps_tree (const_cast<typename internal::p4est::types<dim>::tree *>(&tree),
+                                              &p4est_cell)
+                      == false))
+      return; //this quadrant and none of its children belongs to us.
+
+    bool p4est_has_children = (idx == -1);
+
+    if (p4est_has_children && dealii_cell->has_children())
+      {
+        //recurse further
+        typename internal::p4est::types<dim>::quadrant
+        p4est_child[GeometryInfo<dim>::max_children_per_cell];
+        for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          switch (dim)
+            {
+            case 2:
+              P4EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            case 3:
+              P8EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+
+        internal::p4est::functions<dim>::
+        quadrant_childrenv (&p4est_cell, p4est_child);
+
+        for (unsigned int c=0;
+             c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          {
+            attach_mesh_data_recursively<dim,spacedim> (tree,
+                                                        dealii_cell->child(c),
+                                                        p4est_child[c],
+                                                        attached_data_pack_callbacks);
+          }
+      }
+    else if (!p4est_has_children && !dealii_cell->has_children())
+      {
+        //this active cell didn't change
+        typename internal::p4est::types<dim>::quadrant *q;
+        q = static_cast<typename internal::p4est::types<dim>::quadrant *> (
+              sc_array_index (const_cast<sc_array_t *>(&tree.quadrants), idx)
+            );
+        *static_cast<typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus *>(q->p.user_data) = parallel::distributed::Triangulation<dim,spacedim>::CELL_PERSIST;
+
+        for (typename callback_list_t::const_iterator it = attached_data_pack_callbacks.begin();
+             it != attached_data_pack_callbacks.end();
+             ++it)
+          {
+            void *ptr = static_cast<char *>(q->p.user_data) + (*it).first; //add offset
+            ((*it).second)(dealii_cell,
+                           parallel::distributed::Triangulation<dim,spacedim>::CELL_PERSIST,
+                           ptr);
+          }
+      }
+    else if (p4est_has_children)
+      {
+        //this cell got refined
+
+        //attach to the first child, because we can only attach to active
+        // quadrants
+        typename internal::p4est::types<dim>::quadrant
+        p4est_child[GeometryInfo<dim>::max_children_per_cell];
+        for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          switch (dim)
+            {
+            case 2:
+              P4EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            case 3:
+              P8EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+
+        internal::p4est::functions<dim>::
+        quadrant_childrenv (&p4est_cell, p4est_child);
+        int child0_idx = sc_array_bsearch(const_cast<sc_array_t *>(&tree.quadrants),
+                                          &p4est_child[0],
+                                          internal::p4est::functions<dim>::quadrant_compare);
+        Assert(child0_idx != -1, ExcMessage("the first child should exist as an active quadrant!"));
+
+        typename internal::p4est::types<dim>::quadrant *q;
+        q = static_cast<typename internal::p4est::types<dim>::quadrant *> (
+              sc_array_index (const_cast<sc_array_t *>(&tree.quadrants), child0_idx)
+            );
+        *static_cast<typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus *>(q->p.user_data) = parallel::distributed::Triangulation<dim,spacedim>::CELL_REFINE;
+
+        for (typename callback_list_t::const_iterator it = attached_data_pack_callbacks.begin();
+             it != attached_data_pack_callbacks.end();
+             ++it)
+          {
+            void *ptr = static_cast<char *>(q->p.user_data) + (*it).first; //add offset
+
+            ((*it).second)(dealii_cell,
+                           parallel::distributed::Triangulation<dim,spacedim>::CELL_REFINE,
+                           ptr);
+          }
+
+        //mark other children as invalid, so that unpack only happens once
+        for (unsigned int i=1; i<GeometryInfo<dim>::max_children_per_cell; ++i)
+          {
+            int child_idx = sc_array_bsearch(const_cast<sc_array_t *>(&tree.quadrants),
+                                             &p4est_child[i],
+                                             internal::p4est::functions<dim>::quadrant_compare);
+            q = static_cast<typename internal::p4est::types<dim>::quadrant *> (
+                  sc_array_index (const_cast<sc_array_t *>(&tree.quadrants), child_idx)
+                );
+            *static_cast<typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus *>(q->p.user_data) = parallel::distributed::Triangulation<dim,spacedim>::CELL_INVALID;
+          }
+
+
+      }
+    else
+      {
+        //its children got coarsened into this cell
+        typename internal::p4est::types<dim>::quadrant *q;
+        q = static_cast<typename internal::p4est::types<dim>::quadrant *> (
+              sc_array_index (const_cast<sc_array_t *>(&tree.quadrants), idx)
+            );
+        *static_cast<typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus *>(q->p.user_data) = parallel::distributed::Triangulation<dim,spacedim>::CELL_COARSEN;
+
+        for (typename callback_list_t::const_iterator it = attached_data_pack_callbacks.begin();
+             it != attached_data_pack_callbacks.end();
+             ++it)
+          {
+            void *ptr = static_cast<char *>(q->p.user_data) + (*it).first; //add offset
+            ((*it).second)(dealii_cell,
+                           parallel::distributed::Triangulation<dim,spacedim>::CELL_COARSEN,
+                           ptr);
+          }
+      }
+  }
+
+  template <int dim, int spacedim>
+  void
+  get_cell_weights_recursively (const typename internal::p4est::types<dim>::tree &tree,
+                                const typename Triangulation<dim,spacedim>::cell_iterator &dealii_cell,
+                                const typename internal::p4est::types<dim>::quadrant &p4est_cell,
+                                const typename Triangulation<dim,spacedim>::Signals &signals,
+                                std::vector<unsigned int> &weight)
+  {
+    const int idx = sc_array_bsearch(const_cast<sc_array_t *>(&tree.quadrants),
+                                     &p4est_cell,
+                                     internal::p4est::functions<dim>::quadrant_compare);
+
+    if (idx == -1 && (internal::p4est::functions<dim>::
+                      quadrant_overlaps_tree (const_cast<typename internal::p4est::types<dim>::tree *>(&tree),
+                                              &p4est_cell)
+                      == false))
+      return; // This quadrant and none of its children belongs to us.
+
+    const bool p4est_has_children = (idx == -1);
+
+    if (p4est_has_children && dealii_cell->has_children())
+      {
+        //recurse further
+        typename internal::p4est::types<dim>::quadrant
+        p4est_child[GeometryInfo<dim>::max_children_per_cell];
+        for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          switch (dim)
+            {
+            case 2:
+              P4EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            case 3:
+              P8EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+
+        internal::p4est::functions<dim>::
+        quadrant_childrenv (&p4est_cell, p4est_child);
+
+        for (unsigned int c=0;
+             c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          {
+            get_cell_weights_recursively<dim,spacedim> (tree,
+                                                        dealii_cell->child(c),
+                                                        p4est_child[c],
+                                                        signals,
+                                                        weight);
+          }
+      }
+    else if (!p4est_has_children && !dealii_cell->has_children())
+      {
+        // This active cell didn't change
+        weight.push_back(1000);
+        weight.back() += signals.cell_weight(dealii_cell,
+                                             parallel::distributed::Triangulation<dim,spacedim>::CELL_PERSIST);
+      }
+    else if (p4est_has_children)
+      {
+        // This cell will be refined
+        unsigned int parent_weight(1000);
+        parent_weight += signals.cell_weight(dealii_cell,
+                                             parallel::distributed::Triangulation<dim,spacedim>::CELL_REFINE);
+
+        for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          {
+            // We assign the weight of the parent cell equally to all children
+            weight.push_back(parent_weight);
+          }
+      }
+    else
+      {
+        // This cell's children will be coarsened into this cell
+        weight.push_back(1000);
+        weight.back() += signals.cell_weight(dealii_cell,
+                                             parallel::distributed::Triangulation<dim,spacedim>::CELL_COARSEN);
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  post_mesh_data_recursively (const typename internal::p4est::types<dim>::tree &tree,
+                              const typename Triangulation<dim,spacedim>::cell_iterator &dealii_cell,
+                              const typename Triangulation<dim,spacedim>::cell_iterator &parent_cell,
+                              const typename internal::p4est::types<dim>::quadrant &p4est_cell,
+                              const unsigned int offset,
+                              const typename std_cxx11::function<
+                              void(typename parallel::distributed::Triangulation<dim,spacedim>::cell_iterator, typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus, void *)
+                              > &unpack_callback)
+  {
+    int idx = sc_array_bsearch(const_cast<sc_array_t *>(&tree.quadrants),
+                               &p4est_cell,
+                               internal::p4est::functions<dim>::quadrant_compare);
+    if (idx == -1 && (internal::p4est::functions<dim>::
+                      quadrant_overlaps_tree (const_cast<typename internal::p4est::types<dim>::tree *>(&tree),
+                                              &p4est_cell)
+                      == false))
+      // this quadrant and none of its children belong to us.
+      return;
+
+
+    const bool p4est_has_children = (idx == -1);
+    if (p4est_has_children)
+      {
+        Assert(dealii_cell->has_children(), ExcInternalError());
+
+        //recurse further
+        typename internal::p4est::types<dim>::quadrant
+        p4est_child[GeometryInfo<dim>::max_children_per_cell];
+        for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          switch (dim)
+            {
+            case 2:
+              P4EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            case 3:
+              P8EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+
+        internal::p4est::functions<dim>::
+        quadrant_childrenv (&p4est_cell, p4est_child);
+
+        for (unsigned int c=0;
+             c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          {
+            post_mesh_data_recursively<dim,spacedim> (tree,
+                                                      dealii_cell->child(c),
+                                                      dealii_cell,
+                                                      p4est_child[c],
+                                                      offset,
+                                                      unpack_callback);
+          }
+      }
+    else
+      {
+        Assert(! dealii_cell->has_children(), ExcInternalError());
+
+        typename internal::p4est::types<dim>::quadrant *q;
+        q = static_cast<typename internal::p4est::types<dim>::quadrant *> (
+              sc_array_index (const_cast<sc_array_t *>(&tree.quadrants), idx)
+            );
+
+        void *ptr = static_cast<char *>(q->p.user_data) + offset;
+        typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus
+        status = * static_cast<
+                 typename parallel::distributed::Triangulation<dim,spacedim>::CellStatus *
+                 >(q->p.user_data);
+        switch (status)
+          {
+          case parallel::distributed::Triangulation<dim,spacedim>::CELL_PERSIST:
+          {
+            unpack_callback(dealii_cell, status, ptr);
+            break;
+          }
+          case parallel::distributed::Triangulation<dim,spacedim>::CELL_REFINE:
+          {
+            unpack_callback(parent_cell, status, ptr);
+            break;
+          }
+          case parallel::distributed::Triangulation<dim,spacedim>::CELL_COARSEN:
+          {
+            unpack_callback(dealii_cell, status, ptr);
+            break;
+          }
+          case parallel::distributed::Triangulation<dim,spacedim>::CELL_INVALID:
+          {
+            break;
+          }
+          default:
+            AssertThrow (false, ExcInternalError());
+          }
+      }
+  }
+
+
+
+  /**
+   * A data structure that we use to store which cells (indicated by
+   * internal::p4est::types<dim>::quadrant objects) shall be refined and which
+   * shall be coarsened.
+   */
+  template <int dim, int spacedim>
+  class RefineAndCoarsenList
+  {
+  public:
+    RefineAndCoarsenList (const Triangulation<dim,spacedim> &triangulation,
+                          const std::vector<types::global_dof_index> &p4est_tree_to_coarse_cell_permutation,
+                          const types::subdomain_id                   my_subdomain);
+
+    /**
+     * A callback function that we pass to the p4est data structures when a
+     * forest is to be refined. The p4est functions call it back with a tree
+     * (the index of the tree that grows out of a given coarse cell) and a
+     * refinement path from that coarse cell to a terminal/leaf cell. The
+     * function returns whether the corresponding cell in the deal.II
+     * triangulation has the refined flag set.
+     */
+    static
+    int
+    refine_callback (typename internal::p4est::types<dim>::forest *forest,
+                     typename internal::p4est::types<dim>::topidx  coarse_cell_index,
+                     typename internal::p4est::types<dim>::quadrant *quadrant);
+
+    /**
+     * Same as the refine_callback function, but return whether all four of
+     * the given children of a non-terminal cell are to be coarsened away.
+     */
+    static
+    int
+    coarsen_callback (typename internal::p4est::types<dim>::forest *forest,
+                      typename internal::p4est::types<dim>::topidx  coarse_cell_index,
+                      typename internal::p4est::types<dim>::quadrant *children[]);
+
+    bool pointers_are_at_end () const;
+
+  private:
+    std::vector<typename internal::p4est::types<dim>::quadrant> refine_list;
+    typename std::vector<typename internal::p4est::types<dim>::quadrant>::const_iterator current_refine_pointer;
+
+    std::vector<typename internal::p4est::types<dim>::quadrant> coarsen_list;
+    typename std::vector<typename internal::p4est::types<dim>::quadrant>::const_iterator current_coarsen_pointer;
+
+    void build_lists (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                      const typename internal::p4est::types<dim>::quadrant &p4est_cell,
+                      const unsigned int myid);
+  };
+
+
+
+  template <int dim, int spacedim>
+  bool
+  RefineAndCoarsenList<dim,spacedim>::
+  pointers_are_at_end () const
+  {
+    return ((current_refine_pointer == refine_list.end())
+            &&
+            (current_coarsen_pointer == coarsen_list.end()));
+  }
+
+
+
+  template <int dim, int spacedim>
+  RefineAndCoarsenList<dim,spacedim>::
+  RefineAndCoarsenList (const Triangulation<dim,spacedim>            &triangulation,
+                        const std::vector<types::global_dof_index>   &p4est_tree_to_coarse_cell_permutation,
+                        const types::subdomain_id                    my_subdomain)
+  {
+    // count how many flags are set and allocate that much memory
+    unsigned int n_refine_flags  = 0,
+                 n_coarsen_flags = 0;
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      {
+        //skip cells that are not local
+        if (cell->subdomain_id() != my_subdomain)
+          continue;
+
+        if (cell->refine_flag_set())
+          ++n_refine_flags;
+        else if (cell->coarsen_flag_set())
+          ++n_coarsen_flags;
+      }
+
+    refine_list.reserve (n_refine_flags);
+    coarsen_list.reserve (n_coarsen_flags);
+
+
+    // now build the lists of cells that are flagged. note that p4est will
+    // traverse its cells in the order in which trees appear in the
+    // forest. this order is not the same as the order of coarse cells in the
+    // deal.II Triangulation because we have translated everything by the
+    // coarse_cell_to_p4est_tree_permutation permutation. in order to make
+    // sure that the output array is already in the correct order, traverse
+    // our coarse cells in the same order in which p4est will:
+    for (unsigned int c=0; c<triangulation.n_cells(0); ++c)
+      {
+        unsigned int coarse_cell_index =
+          p4est_tree_to_coarse_cell_permutation[c];
+
+        const typename Triangulation<dim,spacedim>::cell_iterator
+        cell (&triangulation, 0, coarse_cell_index);
+
+        typename internal::p4est::types<dim>::quadrant p4est_cell;
+        internal::p4est::functions<dim>::
+        quadrant_set_morton (&p4est_cell,
+                             /*level=*/0,
+                             /*index=*/0);
+        p4est_cell.p.which_tree = c;
+        build_lists (cell, p4est_cell, my_subdomain);
+      }
+
+
+    Assert(refine_list.size() == n_refine_flags,
+           ExcInternalError());
+    Assert(coarsen_list.size() == n_coarsen_flags,
+           ExcInternalError());
+
+    // make sure that our ordering in fact worked
+    for (unsigned int i=1; i<refine_list.size(); ++i)
+      Assert (refine_list[i].p.which_tree >=
+              refine_list[i-1].p.which_tree,
+              ExcInternalError());
+    for (unsigned int i=1; i<coarsen_list.size(); ++i)
+      Assert (coarsen_list[i].p.which_tree >=
+              coarsen_list[i-1].p.which_tree,
+              ExcInternalError());
+
+    current_refine_pointer  = refine_list.begin();
+    current_coarsen_pointer = coarsen_list.begin();
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  RefineAndCoarsenList<dim,spacedim>::
+  build_lists (const typename Triangulation<dim,spacedim>::cell_iterator     &cell,
+               const typename internal::p4est::types<dim>::quadrant &p4est_cell,
+               const types::subdomain_id my_subdomain)
+  {
+    if (!cell->has_children())
+      {
+        if (cell->subdomain_id() == my_subdomain)
+          {
+            if (cell->refine_flag_set())
+              refine_list.push_back (p4est_cell);
+            else if (cell->coarsen_flag_set())
+              coarsen_list.push_back (p4est_cell);
+          }
+      }
+    else
+      {
+        typename internal::p4est::types<dim>::quadrant
+        p4est_child[GeometryInfo<dim>::max_children_per_cell];
+        for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          switch (dim)
+            {
+            case 2:
+              P4EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            case 3:
+              P8EST_QUADRANT_INIT(&p4est_child[c]);
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+        internal::p4est::functions<dim>::
+        quadrant_childrenv (&p4est_cell,
+                            p4est_child);
+        for (unsigned int c=0;
+             c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          {
+            p4est_child[c].p.which_tree = p4est_cell.p.which_tree;
+            build_lists (cell->child(c),
+                         p4est_child[c],
+                         my_subdomain);
+          }
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  int
+  RefineAndCoarsenList<dim,spacedim>::
+  refine_callback (typename internal::p4est::types<dim>::forest *forest,
+                   typename internal::p4est::types<dim>::topidx  coarse_cell_index,
+                   typename internal::p4est::types<dim>::quadrant *quadrant)
+  {
+    RefineAndCoarsenList<dim,spacedim> *this_object
+      = reinterpret_cast<RefineAndCoarsenList<dim,spacedim>*>(forest->user_pointer);
+
+    // if there are no more cells in our list the current cell can't be
+    // flagged for refinement
+    if (this_object->current_refine_pointer == this_object->refine_list.end())
+      return false;
+
+    Assert (coarse_cell_index <=
+            this_object->current_refine_pointer->p.which_tree,
+            ExcInternalError());
+
+    // if p4est hasn't yet reached the tree of the next flagged cell the
+    // current cell can't be flagged for refinement
+    if (coarse_cell_index <
+        this_object->current_refine_pointer->p.which_tree)
+      return false;
+
+    // now we're in the right tree in the forest
+    Assert (coarse_cell_index <=
+            this_object->current_refine_pointer->p.which_tree,
+            ExcInternalError());
+
+    // make sure that the p4est loop over cells hasn't gotten ahead of our own
+    // pointer
+    Assert (internal::p4est::functions<dim>::
+            quadrant_compare (quadrant,
+                              &*this_object->current_refine_pointer) <= 0,
+            ExcInternalError());
+
+    // now, if the p4est cell is one in the list, it is supposed to be refined
+    if (internal::p4est::functions<dim>::
+        quadrant_is_equal (quadrant, &*this_object->current_refine_pointer))
+      {
+        ++this_object->current_refine_pointer;
+        return true;
+      }
+
+    // p4est cell is not in list
+    return false;
+  }
+
+
+
+  template <int dim, int spacedim>
+  int
+  RefineAndCoarsenList<dim,spacedim>::
+  coarsen_callback (typename internal::p4est::types<dim>::forest *forest,
+                    typename internal::p4est::types<dim>::topidx  coarse_cell_index,
+                    typename internal::p4est::types<dim>::quadrant *children[])
+  {
+    RefineAndCoarsenList<dim,spacedim> *this_object
+      = reinterpret_cast<RefineAndCoarsenList<dim,spacedim>*>(forest->user_pointer);
+
+    // if there are no more cells in our list the current cell can't be
+    // flagged for coarsening
+    if (this_object->current_coarsen_pointer ==
+        this_object->coarsen_list.end())
+      return false;
+
+    Assert (coarse_cell_index <=
+            this_object->current_coarsen_pointer->p.which_tree,
+            ExcInternalError());
+
+    // if p4est hasn't yet reached the tree of the next flagged cell the
+    // current cell can't be flagged for coarsening
+    if (coarse_cell_index <
+        this_object->current_coarsen_pointer->p.which_tree)
+      return false;
+
+    // now we're in the right tree in the forest
+    Assert (coarse_cell_index <=
+            this_object->current_coarsen_pointer->p.which_tree,
+            ExcInternalError());
+
+    // make sure that the p4est loop over cells hasn't gotten ahead of our own
+    // pointer
+    Assert (internal::p4est::functions<dim>::
+            quadrant_compare (children[0],
+                              &*this_object->current_coarsen_pointer) <= 0,
+            ExcInternalError());
+
+    // now, if the p4est cell is one in the list, it is supposed to be
+    // coarsened
+    if (internal::p4est::functions<dim>::
+        quadrant_is_equal (children[0],
+                           &*this_object->current_coarsen_pointer))
+      {
+        // move current pointer one up
+        ++this_object->current_coarsen_pointer;
+
+        // note that the next 3 cells in our list need to correspond to the
+        // other siblings of the cell we have just found
+        for (unsigned int c=1; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+          {
+            Assert (internal::p4est::functions<dim>::
+                    quadrant_is_equal (children[c],
+                                       &*this_object->current_coarsen_pointer),
+                    ExcInternalError());
+            ++this_object->current_coarsen_pointer;
+          }
+
+        return true;
+      }
+
+    // p4est cell is not in list
+    return false;
+  }
+
+
+
+  /**
+   * A data structure that we use to store the weights of all cells to
+   * be used upon partitioning. The class stores them in the order in
+   * which p4est will encounter cells, not in the order in which
+   * deal.II walks over them.
+   */
+  template <int dim, int spacedim>
+  class PartitionWeights
+  {
+  public:
+    /**
+     * This constructor assumes the cell_weights are already sorted in the
+     * order that p4est will encounter the cells, and they do not contain
+     * ghost cells or artificial cells.
+     */
+    PartitionWeights (const std::vector<unsigned int> &cell_weights);
+
+    /**
+     * A callback function that we pass to the p4est data structures when a
+     * forest is to be partitioned. The p4est functions call it back with a tree
+     * (the index of the tree that grows out of a given coarse cell) and a
+     * refinement path from that coarse cell to a terminal/leaf cell. The
+     * function returns the weight of the cell.
+     */
+    static
+    int
+    cell_weight (typename internal::p4est::types<dim>::forest *forest,
+                 typename internal::p4est::types<dim>::topidx  coarse_cell_index,
+                 typename internal::p4est::types<dim>::quadrant *quadrant);
+
+  private:
+    std::vector<unsigned int> cell_weights_list;
+    std::vector<unsigned int>::const_iterator current_pointer;
+  };
+
+
+  template <int dim, int spacedim>
+  PartitionWeights<dim,spacedim>::
+  PartitionWeights (const std::vector<unsigned int> &cell_weights)
+    :
+    cell_weights_list(cell_weights)
+  {
+    // set the current pointer to the first element of the list, given that
+    // we will walk through it sequentially
+    current_pointer  = cell_weights_list.begin();
+  }
+
+
+  template <int dim, int spacedim>
+  int
+  PartitionWeights<dim,spacedim>::
+  cell_weight (typename internal::p4est::types<dim>::forest *forest,
+               typename internal::p4est::types<dim>::topidx,
+               typename internal::p4est::types<dim>::quadrant *)
+  {
+    // the function gets two additional arguments, but we don't need them
+    // since we know in which order p4est will walk through the cells
+    // and have already built our weight lists in this order
+
+    PartitionWeights<dim,spacedim> *this_object
+      = reinterpret_cast<PartitionWeights<dim,spacedim>*>(forest->user_pointer);
+
+    Assert (this_object->current_pointer >= this_object->cell_weights_list.begin(),
+            ExcInternalError());
+    Assert (this_object->current_pointer < this_object->cell_weights_list.end(),
+            ExcInternalError());
+
+    // get the weight, increment the pointer, and return the weight
+    return *this_object->current_pointer++;
+  }
+}
+
+
+// initialize p4est
+namespace internal
+{
+  namespace p4est
+  {
+    struct InitFinalize
+    {
+    private:
+      struct Singleton
+      {
+        Singleton ()
+        {
+          // ensure that the initialization code is run only once, even if we
+          // link with 1d, 2d, and 3d libraries
+          static bool initialized = false;
+
+          if (initialized == false)
+            {
+              sc_init (MPI_COMM_WORLD,
+                       0, 0, 0, SC_LP_SILENT);
+              p4est_init (0, SC_LP_SILENT);
+
+              initialized = true;
+            }
+        }
+
+        ~Singleton ()
+        {
+          // same here
+          static bool deinitialized = false;
+
+          if (deinitialized == false)
+            {
+              // p4est has no p4est_finalize function
+              sc_finalize ();
+
+              deinitialized = true;
+            }
+        }
+      };
+
+    public:
+      // do run the initialization code, at least the first time around we get
+      // to this function
+      static void do_initialize ()
+      {
+        static Singleton singleton;
+      }
+    };
+  }
+}
+
+
+namespace parallel
+{
+  namespace distributed
+  {
+
+    /* ---------------------- class Triangulation<dim,spacedim> ------------------------------ */
+
+
+    template <int dim, int spacedim>
+    Triangulation<dim,spacedim>::
+    Triangulation (MPI_Comm mpi_communicator,
+                   const typename dealii::Triangulation<dim,spacedim>::MeshSmoothing smooth_grid,
+                   const Settings settings_)
+      :
+      // do not check for distorted cells
+      dealii::parallel::Triangulation<dim,spacedim>
+      (mpi_communicator,
+       smooth_grid,
+       false),
+      settings(settings_),
+      triangulation_has_content (false),
+      connectivity (0),
+      parallel_forest (0),
+      refinement_in_progress (false),
+      attached_data_size(0),
+      n_attached_datas(0),
+      n_attached_deserialize(0)
+    {
+      // initialize p4est. do this in a separate function since it has to
+      // happen only once, even if we have triangulation objects for several
+      // different space dimensions
+      dealii::internal::p4est::InitFinalize::do_initialize ();
+
+      parallel_ghost = 0;
+    }
+
+
+
+    template <int dim, int spacedim>
+    Triangulation<dim,spacedim>::~Triangulation ()
+    {
+      clear ();
+
+      Assert (triangulation_has_content == false,
+              ExcInternalError());
+      Assert (connectivity == 0,    ExcInternalError());
+      Assert (parallel_forest == 0, ExcInternalError());
+      Assert (refinement_in_progress == false, ExcInternalError());
+    }
+
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    create_triangulation (const std::vector<Point<spacedim> >    &vertices,
+                          const std::vector<CellData<dim> > &cells,
+                          const SubCellData                 &subcelldata)
+    {
+      try
+        {
+          dealii::Triangulation<dim,spacedim>::
+          create_triangulation (vertices, cells, subcelldata);
+        }
+      catch (const typename dealii::Triangulation<dim,spacedim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+      // note that now we have some content in the p4est objects and call the
+      // functions that do the actual work (which are dimension dependent, so
+      // separate)
+      triangulation_has_content = true;
+
+      setup_coarse_cell_to_p4est_tree_permutation ();
+
+      copy_new_triangulation_to_p4est (dealii::internal::int2type<dim>());
+
+      try
+        {
+          copy_local_forest_to_triangulation ();
+        }
+      catch (const typename Triangulation<dim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+      this->update_number_cache ();
+    }
+
+
+    // This anonymous namespace contains utility for
+    // the function Triangulation::communicate_locally_moved_vertices
+    namespace CommunicateLocallyMovedVertices
+    {
+      namespace
+      {
+        /**
+          * A list of tree+quadrant and their vertex indices.
+          * The bool vector describes which vertices are of interest
+          * and should be set on the receiving processes.
+          */
+        template <int dim, int spacedim>
+        struct CellInfo
+        {
+          // store all the tree_indices we send/receive consecutively (n_cells entries)
+          std::vector<unsigned int> tree_index;
+          // store all the quadrants we send/receive consecutively (n_cells entries)
+          std::vector<typename dealii::internal::p4est::types<dim>::quadrant> quadrants;
+          // store for each cell the number of vertices we send/receive
+          // and then the vertex indices (for each cell: n_vertices+1 entries)
+          std::vector<unsigned int> vertex_indices;
+          // store for each cell the vertices we send/receive
+          // (for each cell n_vertices entries)
+          std::vector<dealii::Point<spacedim> > vertices;
+          // for receiving and unpacking data we need to store pointers to the
+          // first vertex and vertex_index on each cell additionally
+          // both vectors have as many entries as there are cells
+          std::vector<unsigned int * > first_vertex_indices;
+          std::vector<dealii::Point<spacedim>* > first_vertices;
+
+          unsigned int bytes_for_buffer () const
+          {
+            return (sizeof(unsigned int) +
+                    tree_index.size() * sizeof(unsigned int) +
+                    quadrants.size() * sizeof(typename dealii::internal::p4est
+                                              ::types<dim>::quadrant) +
+                    vertices.size() * sizeof(dealii::Point<spacedim>)) +
+                   vertex_indices.size() * sizeof(unsigned int);
+          }
+
+          void pack_data (std::vector<char> &buffer) const
+          {
+            buffer.resize(bytes_for_buffer());
+
+            char *ptr = &buffer[0];
+
+            const unsigned int num_cells = tree_index.size();
+            std::memcpy(ptr, &num_cells, sizeof(unsigned int));
+            ptr += sizeof(unsigned int);
+
+            std::memcpy(ptr,
+                        &tree_index[0],
+                        num_cells*sizeof(unsigned int));
+            ptr += num_cells*sizeof(unsigned int);
+
+            std::memcpy(ptr,
+                        &quadrants[0],
+                        num_cells * sizeof(typename dealii::internal::p4est::
+                                           types<dim>::quadrant));
+            ptr += num_cells*sizeof(typename dealii::internal::p4est::types<dim>::
+                                    quadrant);
+
+            std::memcpy(ptr,
+                        &vertex_indices[0],
+                        vertex_indices.size() * sizeof(unsigned int));
+            ptr += vertex_indices.size() * sizeof(unsigned int);
+
+            std::memcpy(ptr,
+                        &vertices[0],
+                        vertices.size() * sizeof(dealii::Point<spacedim>));
+            ptr += vertices.size() * sizeof(dealii::Point<spacedim>);
+
+            Assert (ptr == &buffer[0]+buffer.size(),
+                    ExcInternalError());
+
+          }
+
+          void unpack_data (const std::vector<char> &buffer)
+          {
+            const char *ptr = &buffer[0];
+            unsigned int cells;
+            memcpy(&cells, ptr, sizeof(unsigned int));
+            ptr += sizeof(unsigned int);
+
+            tree_index.resize(cells);
+            memcpy(&tree_index[0],ptr,sizeof(unsigned int)*cells);
+            ptr += sizeof(unsigned int)*cells;
+
+            quadrants.resize(cells);
+            memcpy(&quadrants[0],ptr,
+                   sizeof(typename dealii::internal::p4est::types<dim>::quadrant)*cells);
+            ptr += sizeof(typename dealii::internal::p4est::types<dim>::quadrant)*cells;
+
+            vertex_indices.clear();
+            first_vertex_indices.resize(cells);
+            std::vector<unsigned int> n_vertices_on_cell(cells);
+            std::vector<unsigned int> first_indices (cells);
+            for (unsigned int c=0; c<cells; ++c)
+              {
+                // The first 'vertex index' is the number of vertices.
+                // Additionally, we need to store the pointer to this
+                // vertex index with respect to the std::vector
+                const unsigned int *const vertex_index
+                  = reinterpret_cast<const unsigned int *const>(ptr);
+                first_indices[c] = vertex_indices.size();
+                vertex_indices.push_back(*vertex_index);
+                n_vertices_on_cell[c] = *vertex_index;
+                ptr += sizeof(unsigned int);
+                // Now copy all the 'real' vertex_indices
+                vertex_indices.resize(vertex_indices.size() + n_vertices_on_cell[c]);
+                memcpy(&vertex_indices[vertex_indices.size() - n_vertices_on_cell[c]],
+                       ptr, n_vertices_on_cell[c]*sizeof(unsigned int));
+                ptr += n_vertices_on_cell[c]*sizeof(unsigned int);
+              }
+            for (unsigned int c=0; c<cells; ++c)
+              first_vertex_indices[c] = &vertex_indices[first_indices[c]];
+
+            vertices.clear();
+            first_vertices.resize(cells);
+            for (unsigned int c=0; c<cells; ++c)
+              {
+                // We need to store a pointer to the first vertex.
+                const dealii::Point<spacedim> *const vertex
+                  = reinterpret_cast<const dealii::Point<spacedim> * const>(ptr);
+                first_indices[c] = vertices.size();
+                vertices.push_back(*vertex);
+                ptr += sizeof(dealii::Point<spacedim>);
+                vertices.resize(vertices.size() + n_vertices_on_cell[c]-1);
+                memcpy(&vertices[vertices.size() - (n_vertices_on_cell[c]-1)],
+                       ptr, (n_vertices_on_cell[c]-1)*sizeof(dealii::Point<spacedim>));
+                ptr += (n_vertices_on_cell[c]-1)*sizeof(dealii::Point<spacedim>);
+              }
+            for (unsigned int c=0; c<cells; ++c)
+              first_vertices[c] = &vertices[first_indices[c]];
+
+            Assert (ptr == &buffer[0]+buffer.size(),
+                    ExcInternalError());
+          }
+        };
+
+
+
+        // This function is responsible for gathering the information
+        // we want to send to each process.
+        // For each dealii cell on the coarsest level the corresponding
+        // p4est_cell has to be provided when calling this function.
+        // By recursing through all children we consider each active cell.
+        // vertices_with_ghost_neighbors tells us which vertices
+        // are in the ghost layer and for which processes they might
+        // be interesting.
+        // Whether a vertex has actually been updated locally is
+        // stored in vertex_locally_moved. Only those are considered
+        // for sending.
+        // The gathered information is saved into needs_to_get_cell.
+        template <int dim, int spacedim>
+        void
+        fill_vertices_recursively (const typename parallel::distributed::Triangulation<dim,spacedim> &tria,
+                                   const unsigned int tree_index,
+                                   const typename Triangulation<dim,spacedim>::cell_iterator &dealii_cell,
+                                   const typename dealii::internal::p4est::types<dim>::quadrant &p4est_cell,
+                                   const std::map<unsigned int, std::set<dealii::types::subdomain_id> > &vertices_with_ghost_neighbors,
+                                   const std::vector<bool> &vertex_locally_moved,
+                                   std::map<dealii::types::subdomain_id, CellInfo<dim, spacedim> > &needs_to_get_cell)
+        {
+          // see if we have to
+          // recurse...
+          if (dealii_cell->has_children())
+            {
+              typename dealii::internal::p4est::types<dim>::quadrant
+              p4est_child[GeometryInfo<dim>::max_children_per_cell];
+              dealii::internal::p4est::init_quadrant_children<dim>(p4est_cell, p4est_child);
+
+
+              for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+                fill_vertices_recursively<dim,spacedim>(tria,
+                                                        tree_index,
+                                                        dealii_cell->child(c),
+                                                        p4est_child[c],
+                                                        vertices_with_ghost_neighbors,
+                                                        vertex_locally_moved,
+                                                        needs_to_get_cell);
+              return;
+            }
+
+          // We're at a leaf cell. If the cell is locally owned, we may
+          // have to send its vertices to other processors if any of
+          // its vertices is adjacent to a ghost cell and has been moved
+          //
+          // If one of the vertices of the cell is interesting,
+          // send all moved vertices of the cell to all processors
+          // adjacent to all cells adjacent to this vertex
+          if (dealii_cell->is_locally_owned())
+            {
+              std::set<dealii::types::subdomain_id> send_to;
+              for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+                {
+                  const std::map<unsigned int, std::set<dealii::types::subdomain_id> >::const_iterator
+                  neighbor_subdomains_of_vertex
+                    = vertices_with_ghost_neighbors.find (dealii_cell->vertex_index(v));
+
+                  if (neighbor_subdomains_of_vertex
+                      != vertices_with_ghost_neighbors.end())
+                    {
+                      Assert(neighbor_subdomains_of_vertex->second.size()!=0,
+                             ExcInternalError());
+                      send_to.insert(neighbor_subdomains_of_vertex->second.begin(),
+                                     neighbor_subdomains_of_vertex->second.end());
+                    }
+                }
+
+              if (send_to.size() > 0)
+                {
+                  std::vector<unsigned int> vertex_indices;
+                  std::vector<dealii::Point<spacedim> > local_vertices;
+                  for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+                    if (vertex_locally_moved[dealii_cell->vertex_index(v)])
+                      {
+                        vertex_indices.push_back(v);
+                        local_vertices.push_back(dealii_cell->vertex(v));
+                      }
+
+                  if (vertex_indices.size()>0)
+                    for (std::set<dealii::types::subdomain_id>::iterator it=send_to.begin();
+                         it!=send_to.end(); ++it)
+                      {
+                        const dealii::types::subdomain_id subdomain = *it;
+
+                        // get an iterator to what needs to be sent to that
+                        // subdomain (if already exists), or create such an object
+                        const typename std::map<dealii::types::subdomain_id, CellInfo<dim, spacedim> >::iterator
+                        p
+                          = needs_to_get_cell.insert (std::make_pair(subdomain,
+                                                                     CellInfo<dim,spacedim>()))
+                            .first;
+
+                        p->second.tree_index.push_back(tree_index);
+                        p->second.quadrants.push_back(p4est_cell);
+
+                        p->second.vertex_indices.push_back(vertex_indices.size());
+                        p->second.vertex_indices.insert(p->second.vertex_indices.end(),
+                                                        vertex_indices.begin(),
+                                                        vertex_indices.end());
+
+                        p->second.vertices.insert(p->second.vertices.end(),
+                                                  local_vertices.begin(),
+                                                  local_vertices.end());
+                      }
+                }
+            }
+        }
+
+
+
+        // After the cell data has been received this function is responsible
+        // for moving the vertices in the corresponding ghost layer locally.
+        // As in fill_vertices_recursively for each dealii cell on the
+        // coarsest level the corresponding p4est_cell has to be provided
+        // when calling this function. By recursing through through all
+        // children we consider each active cell.
+        // Additionally, we need to give a pointer to the first vertex indices
+        // and vertices. Since the first information saved in vertex_indices
+        // is the number of vertices this all the information we need.
+        template <int dim, int spacedim>
+        void
+        set_vertices_recursively (
+          const parallel::distributed::Triangulation<dim,spacedim> &tria,
+          const typename dealii::internal::p4est::types<dim>::quadrant &p4est_cell,
+          const typename Triangulation<dim,spacedim>::cell_iterator &dealii_cell,
+          const typename dealii::internal::p4est::types<dim>::quadrant &quadrant,
+          const dealii::Point<spacedim> *const vertices,
+          const unsigned int *const vertex_indices)
+        {
+          if (dealii::internal::p4est::quadrant_is_equal<dim>(p4est_cell, quadrant))
+            {
+              Assert(!dealii_cell->is_artificial(), ExcInternalError());
+              Assert(!dealii_cell->has_children(), ExcInternalError());
+              Assert(!dealii_cell->is_locally_owned(), ExcInternalError());
+
+              const unsigned int n_vertices = vertex_indices[0];
+
+              // update dof indices of cell
+              for (unsigned int i=0; i<n_vertices; ++i)
+                dealii_cell->vertex(vertex_indices[i+1]) = vertices[i];
+
+              return;
+            }
+
+          if (! dealii_cell->has_children())
+            return;
+
+          if (! dealii::internal::p4est::quadrant_is_ancestor<dim> (p4est_cell, quadrant))
+            return;
+
+          typename dealii::internal::p4est::types<dim>::quadrant
+          p4est_child[GeometryInfo<dim>::max_children_per_cell];
+          dealii::internal::p4est::init_quadrant_children<dim>(p4est_cell, p4est_child);
+
+          for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+            set_vertices_recursively<dim,spacedim> (tria, p4est_child[c],
+                                                    dealii_cell->child(c),
+                                                    quadrant, vertices,
+                                                    vertex_indices);
+        }
+      }
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::clear ()
+    {
+      triangulation_has_content = false;
+
+      if (parallel_ghost != 0)
+        {
+          dealii::internal::p4est::functions<dim>::ghost_destroy (parallel_ghost);
+          parallel_ghost = 0;
+        }
+
+      if (parallel_forest != 0)
+        {
+          dealii::internal::p4est::functions<dim>::destroy (parallel_forest);
+          parallel_forest = 0;
+        }
+
+      if (connectivity != 0)
+        {
+          dealii::internal::p4est::functions<dim>::connectivity_destroy (connectivity);
+          connectivity = 0;
+        }
+
+      coarse_cell_to_p4est_tree_permutation.resize (0);
+      p4est_tree_to_coarse_cell_permutation.resize (0);
+
+      periodic_face_pairs_level_0.clear();
+
+      dealii::Triangulation<dim,spacedim>::clear ();
+
+      this->update_number_cache ();
+    }
+
+    template <int dim, int spacedim>
+    bool
+    Triangulation<dim,spacedim>::has_hanging_nodes () const
+    {
+      if (this->n_global_levels()<=1)
+        return false; // can not have hanging nodes without refined cells
+
+      // if there are any active cells with level less than n_global_levels()-1, then
+      // there is obviously also one with level n_global_levels()-1, and
+      // consequently there must be a hanging node somewhere.
+      //
+      // The problem is that we cannot just ask for the first active cell, but
+      // instead need to filter over locally owned cells.
+      bool have_coarser_cell = false;
+      for (typename Triangulation<dim, spacedim>::active_cell_iterator cell = this->begin_active(this->n_global_levels()-2);
+           cell != this->end(this->n_global_levels()-2);
+           ++cell)
+        if (cell->is_locally_owned())
+          {
+            have_coarser_cell = true;
+            break;
+          }
+
+      // return true if at least one process has a coarser cell
+      return 0<Utilities::MPI::max(have_coarser_cell?1:0, this->mpi_communicator);
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::setup_coarse_cell_to_p4est_tree_permutation ()
+    {
+      DynamicSparsityPattern cell_connectivity;
+      GridTools::get_vertex_connectivity_of_cells (*this, cell_connectivity);
+      coarse_cell_to_p4est_tree_permutation.resize (this->n_cells(0));
+      SparsityTools::
+      reorder_hierarchical (cell_connectivity,
+                            coarse_cell_to_p4est_tree_permutation);
+
+      p4est_tree_to_coarse_cell_permutation
+        = Utilities::invert_permutation (coarse_cell_to_p4est_tree_permutation);
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::write_mesh_vtk (const char *file_basename) const
+    {
+      Assert (parallel_forest != 0,
+              ExcMessage ("Can't produce output when no forest is created yet."));
+      dealii::internal::p4est::functions<dim>::
+      vtk_write_file (parallel_forest, 0, file_basename);
+    }
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    save(const char *filename) const
+    {
+      Assert(n_attached_deserialize==0,
+             ExcMessage ("not all SolutionTransfer's got deserialized after the last load()"));
+      int real_data_size = 0;
+      if (attached_data_size>0)
+        real_data_size = attached_data_size+sizeof(CellStatus);
+
+      Assert(this->n_cells()>0, ExcMessage("Can not save() an empty Triangulation."));
+
+      if (this->my_subdomain==0)
+        {
+          std::string fname=std::string(filename)+".info";
+          std::ofstream f(fname.c_str());
+          f << "version nproc attached_bytes n_attached_objs n_coarse_cells" << std::endl
+            << 2 << " "
+            << Utilities::MPI::n_mpi_processes (this->mpi_communicator) << " "
+            << real_data_size << " "
+            << attached_data_pack_callbacks.size() << " "
+            << this->n_cells(0)
+            << std::endl;
+        }
+
+      if (attached_data_size>0)
+        {
+          const_cast<dealii::parallel::distributed::Triangulation<dim, spacedim>*>(this)
+          ->attach_mesh_data();
+        }
+
+      dealii::internal::p4est::functions<dim>::save(filename, parallel_forest, attached_data_size>0);
+
+      dealii::parallel::distributed::Triangulation<dim, spacedim> *tria
+        = const_cast<dealii::parallel::distributed::Triangulation<dim, spacedim>*>(this);
+
+      tria->n_attached_datas = 0;
+      tria->attached_data_size = 0;
+      tria->attached_data_pack_callbacks.clear();
+
+      // and release the data
+      void *userptr = parallel_forest->user_pointer;
+      dealii::internal::p4est::functions<dim>::reset_data (parallel_forest, 0, NULL, NULL);
+      parallel_forest->user_pointer = userptr;
+    }
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    load (const char *filename,
+          const bool autopartition)
+    {
+      Assert(this->n_cells()>0, ExcMessage("load() only works if the Triangulation already contains a coarse mesh!"));
+      Assert(this->n_levels()==1, ExcMessage("Triangulation may only contain coarse cells when calling load()."));
+
+
+      if (parallel_ghost != 0)
+        {
+          dealii::internal::p4est::functions<dim>::ghost_destroy (parallel_ghost);
+          parallel_ghost = 0;
+        }
+      dealii::internal::p4est::functions<dim>::destroy (parallel_forest);
+      parallel_forest = 0;
+      dealii::internal::p4est::functions<dim>::connectivity_destroy (connectivity);
+      connectivity = 0;
+
+      unsigned int version, numcpus, attached_size, attached_count, n_coarse_cells;
+      {
+        std::string fname=std::string(filename)+".info";
+        std::ifstream f(fname.c_str());
+        std::string firstline;
+        getline(f, firstline); //skip first line
+        f >> version >> numcpus >> attached_size >> attached_count >> n_coarse_cells;
+      }
+
+      Assert(version == 2, ExcMessage("Incompatible version found in .info file."));
+      Assert(this->n_cells(0) == n_coarse_cells, ExcMessage("Number of coarse cells differ!"));
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+#else
+      AssertThrow(numcpus <= Utilities::MPI::n_mpi_processes (this->mpi_communicator),
+                  ExcMessage("parallel::distributed::Triangulation::load() only supports loading "
+                             "saved data with a greater or equal number of processes than were used to "
+                             "save() when using p4est 0.3.4.2."));
+#endif
+
+      attached_data_size = 0;
+      n_attached_datas = 0;
+      n_attached_deserialize = attached_count;
+
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,3)
+      parallel_forest = dealii::internal::p4est::functions<dim>::load_ext (
+                          filename, this->mpi_communicator,
+                          attached_size, attached_size>0,
+                          autopartition, 0,
+                          this,
+                          &connectivity);
+#else
+      (void)autopartition;
+      parallel_forest = dealii::internal::p4est::functions<dim>::load (
+                          filename, this->mpi_communicator,
+                          attached_size, attached_size>0,
+                          this,
+                          &connectivity);
+#endif
+      if (numcpus != Utilities::MPI::n_mpi_processes (this->mpi_communicator))
+        // We are changing the number of CPUs so we need to repartition.
+        // Note that p4est actually distributes the cells between the changed
+        // number of CPUs and so everything works without this call, but
+        // this command changes the distribution for some reason, so we
+        // will leave it in here.
+        repartition();
+
+      try
+        {
+          copy_local_forest_to_triangulation ();
+        }
+      catch (const typename Triangulation<dim>::DistortedCellList &)
+        {
+          // the underlying
+          // triangulation should not
+          // be checking for
+          // distorted cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+      this->update_number_cache ();
+    }
+
+
+
+    template <int dim, int spacedim>
+    unsigned int
+    Triangulation<dim,spacedim>::get_checksum () const
+    {
+      Assert (parallel_forest != 0,
+              ExcMessage ("Can't produce a check sum when no forest is created yet."));
+      return dealii::internal::p4est::functions<dim>::checksum (parallel_forest);
+    }
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    update_number_cache ()
+    {
+      parallel::Triangulation<dim,spacedim>::update_number_cache();
+
+      if (this->n_levels() == 0)
+        return;
+
+      if (settings & construct_multigrid_hierarchy)
+        {
+          // find level ghost owners
+          for (typename Triangulation<dim,spacedim>::cell_iterator
+               cell = this->begin();
+               cell != this->end();
+               ++cell)
+            if (cell->level_subdomain_id() != numbers::artificial_subdomain_id
+                && cell->level_subdomain_id() != this->locally_owned_subdomain())
+              this->number_cache.level_ghost_owners.insert(cell->level_subdomain_id());
+
+          Assert(this->number_cache.level_ghost_owners.size() < Utilities::MPI::n_mpi_processes(this->mpi_communicator), ExcInternalError());
+        }
+    }
+
+
+    template <int dim, int spacedim>
+    typename dealii::internal::p4est::types<dim>::tree *
+    Triangulation<dim,spacedim>::
+    init_tree(const int dealii_coarse_cell_index) const
+    {
+      const unsigned int tree_index
+        = coarse_cell_to_p4est_tree_permutation[dealii_coarse_cell_index];
+      typename dealii::internal::p4est::types<dim>::tree *tree
+        = static_cast<typename dealii::internal::p4est::types<dim>::tree *>
+          (sc_array_index (parallel_forest->trees,
+                           tree_index));
+
+      return tree;
+    }
+
+
+
+    template <>
+    void
+    Triangulation<2,2>::copy_new_triangulation_to_p4est (dealii::internal::int2type<2>)
+    {
+      const unsigned int dim = 2, spacedim = 2;
+      Assert (this->n_cells(0) > 0, ExcInternalError());
+      Assert (this->n_levels() == 1, ExcInternalError());
+
+      // data structures that counts how many cells touch each vertex
+      // (vertex_touch_count), and which cells touch a given vertex (together
+      // with the local numbering of that vertex within the cells that touch
+      // it)
+      std::vector<unsigned int> vertex_touch_count;
+      std::vector<
+      std::list<
+      std::pair<Triangulation<dim,spacedim>::active_cell_iterator,
+          unsigned int> > >
+          vertex_to_cell;
+      get_vertex_to_cell_mappings (*this,
+                                   vertex_touch_count,
+                                   vertex_to_cell);
+      const dealii::internal::p4est::types<2>::locidx
+      num_vtt = std::accumulate (vertex_touch_count.begin(),
+                                 vertex_touch_count.end(),
+                                 0);
+
+      // now create a connectivity object with the right sizes for all
+      // arrays. set vertex information only in debug mode (saves a few bytes
+      // in optimized mode)
+      const bool set_vertex_info
+#ifdef DEBUG
+        = true
+#else
+        = false
+#endif
+          ;
+
+      connectivity
+        = dealii::internal::p4est::functions<2>::
+          connectivity_new ((set_vertex_info == true ? this->n_vertices() : 0),
+                            this->n_cells(0),
+                            this->n_vertices(),
+                            num_vtt);
+
+      set_vertex_and_cell_info (*this,
+                                vertex_touch_count,
+                                vertex_to_cell,
+                                coarse_cell_to_p4est_tree_permutation,
+                                set_vertex_info,
+                                connectivity);
+
+      Assert (p4est_connectivity_is_valid (connectivity) == 1,
+              ExcInternalError());
+
+      // now create a forest out of the connectivity data structure
+      parallel_forest
+        = dealii::internal::p4est::functions<2>::
+          new_forest (this->mpi_communicator,
+                      connectivity,
+                      /* minimum initial number of quadrants per tree */ 0,
+                      /* minimum level of upfront refinement */ 0,
+                      /* use uniform upfront refinement */ 1,
+                      /* user_data_size = */ 0,
+                      /* user_data_constructor = */ NULL,
+                      /* user_pointer */ this);
+    }
+
+
+
+    // TODO: This is a verbatim copy of the 2,2 case. However, we can't just
+    // specialize the dim template argument, but let spacedim open
+    template <>
+    void
+    Triangulation<2,3>::copy_new_triangulation_to_p4est (dealii::internal::int2type<2>)
+    {
+      const unsigned int dim = 2, spacedim = 3;
+      Assert (this->n_cells(0) > 0, ExcInternalError());
+      Assert (this->n_levels() == 1, ExcInternalError());
+
+      // data structures that counts how many cells touch each vertex
+      // (vertex_touch_count), and which cells touch a given vertex (together
+      // with the local numbering of that vertex within the cells that touch
+      // it)
+      std::vector<unsigned int> vertex_touch_count;
+      std::vector<
+      std::list<
+      std::pair<Triangulation<dim,spacedim>::active_cell_iterator,
+          unsigned int> > >
+          vertex_to_cell;
+      get_vertex_to_cell_mappings (*this,
+                                   vertex_touch_count,
+                                   vertex_to_cell);
+      const dealii::internal::p4est::types<2>::locidx
+      num_vtt = std::accumulate (vertex_touch_count.begin(),
+                                 vertex_touch_count.end(),
+                                 0);
+
+      // now create a connectivity object with the right sizes for all
+      // arrays. set vertex information only in debug mode (saves a few bytes
+      // in optimized mode)
+      const bool set_vertex_info
+#ifdef DEBUG
+        = true
+#else
+        = false
+#endif
+          ;
+
+      connectivity
+        = dealii::internal::p4est::functions<2>::
+          connectivity_new ((set_vertex_info == true ? this->n_vertices() : 0),
+                            this->n_cells(0),
+                            this->n_vertices(),
+                            num_vtt);
+
+      set_vertex_and_cell_info (*this,
+                                vertex_touch_count,
+                                vertex_to_cell,
+                                coarse_cell_to_p4est_tree_permutation,
+                                set_vertex_info,
+                                connectivity);
+
+      Assert (p4est_connectivity_is_valid (connectivity) == 1,
+              ExcInternalError());
+
+      // now create a forest out of the connectivity data structure
+      parallel_forest
+        = dealii::internal::p4est::functions<2>::
+          new_forest (this->mpi_communicator,
+                      connectivity,
+                      /* minimum initial number of quadrants per tree */ 0,
+                      /* minimum level of upfront refinement */ 0,
+                      /* use uniform upfront refinement */ 1,
+                      /* user_data_size = */ 0,
+                      /* user_data_constructor = */ NULL,
+                      /* user_pointer */ this);
+    }
+
+
+
+    template <>
+    void
+    Triangulation<3,3>::copy_new_triangulation_to_p4est (dealii::internal::int2type<3>)
+    {
+      const int dim = 3, spacedim = 3;
+      Assert (this->n_cells(0) > 0, ExcInternalError());
+      Assert (this->n_levels() == 1, ExcInternalError());
+
+      // data structures that counts how many cells touch each vertex
+      // (vertex_touch_count), and which cells touch a given vertex (together
+      // with the local numbering of that vertex within the cells that touch
+      // it)
+      std::vector<unsigned int> vertex_touch_count;
+      std::vector<
+      std::list<
+      std::pair<Triangulation<3>::active_cell_iterator,
+          unsigned int> > >
+          vertex_to_cell;
+      get_vertex_to_cell_mappings (*this,
+                                   vertex_touch_count,
+                                   vertex_to_cell);
+      const dealii::internal::p4est::types<2>::locidx
+      num_vtt = std::accumulate (vertex_touch_count.begin(),
+                                 vertex_touch_count.end(),
+                                 0);
+
+      std::vector<unsigned int> edge_touch_count;
+      std::vector<
+      std::list<
+      std::pair<Triangulation<3>::active_cell_iterator,
+          unsigned int> > >
+          edge_to_cell;
+      get_edge_to_cell_mappings (*this,
+                                 edge_touch_count,
+                                 edge_to_cell);
+      const dealii::internal::p4est::types<2>::locidx
+      num_ett = std::accumulate (edge_touch_count.begin(),
+                                 edge_touch_count.end(),
+                                 0);
+
+      // now create a connectivity object with the right sizes for all arrays
+      const bool set_vertex_info
+#ifdef DEBUG
+        = true
+#else
+        = false
+#endif
+          ;
+
+      connectivity
+        = dealii::internal::p4est::functions<3>::
+          connectivity_new ((set_vertex_info == true ? this->n_vertices() : 0),
+                            this->n_cells(0),
+                            this->n_active_lines(),
+                            num_ett,
+                            this->n_vertices(),
+                            num_vtt);
+
+      set_vertex_and_cell_info (*this,
+                                vertex_touch_count,
+                                vertex_to_cell,
+                                coarse_cell_to_p4est_tree_permutation,
+                                set_vertex_info,
+                                connectivity);
+
+      // next to tree-to-edge
+      // data. note that in p4est lines
+      // are ordered as follows
+      //      *---3---*        *---3---*
+      //     /|       |       /       /|
+      //    6 |       11     6       7 11
+      //   /  10      |     /       /  |
+      //  *   |       |    *---2---*   |
+      //  |   *---1---*    |       |   *
+      //  |  /       /     |       9  /
+      //  8 4       5      8       | 5
+      //  |/       /       |       |/
+      //  *---0---*        *---0---*
+      // whereas in deal.II they are like this:
+      //      *---7---*        *---7---*
+      //     /|       |       /       /|
+      //    4 |       11     4       5 11
+      //   /  10      |     /       /  |
+      //  *   |       |    *---6---*   |
+      //  |   *---3---*    |       |   *
+      //  |  /       /     |       9  /
+      //  8 0       1      8       | 1
+      //  |/       /       |       |/
+      //  *---2---*        *---2---*
+
+      const unsigned int deal_to_p4est_line_index[12]
+        = { 4, 5, 0, 1,  6, 7, 2, 3, 8, 9, 10, 11 } ;
+
+      for (Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end(); ++cell)
+        {
+          const unsigned int
+          index = coarse_cell_to_p4est_tree_permutation[cell->index()];
+          for (unsigned int e=0; e<GeometryInfo<3>::lines_per_cell; ++e)
+            connectivity->tree_to_edge[index*GeometryInfo<3>::lines_per_cell+
+                                       deal_to_p4est_line_index[e]]
+              = cell->line(e)->index();
+        }
+
+      // now also set edge-to-tree
+      // information
+      connectivity->ett_offset[0] = 0;
+      std::partial_sum (edge_touch_count.begin(),
+                        edge_touch_count.end(),
+                        &connectivity->ett_offset[1]);
+
+      Assert (connectivity->ett_offset[this->n_active_lines()] ==
+              num_ett,
+              ExcInternalError());
+
+      for (unsigned int v=0; v<this->n_active_lines(); ++v)
+        {
+          Assert (edge_to_cell[v].size() == edge_touch_count[v],
+                  ExcInternalError());
+
+          std::list<std::pair
+          <Triangulation<dim,spacedim>::active_cell_iterator,
+          unsigned int> >::const_iterator
+          p = edge_to_cell[v].begin();
+          for (unsigned int c=0; c<edge_touch_count[v]; ++c, ++p)
+            {
+              connectivity->edge_to_tree[connectivity->ett_offset[v]+c]
+                = coarse_cell_to_p4est_tree_permutation[p->first->index()];
+              connectivity->edge_to_edge[connectivity->ett_offset[v]+c]
+                = deal_to_p4est_line_index[p->second];
+            }
+        }
+
+      Assert (p8est_connectivity_is_valid (connectivity) == 1,
+              ExcInternalError());
+
+      // now create a forest out of the connectivity data structure
+      parallel_forest
+        = dealii::internal::p4est::functions<3>::
+          new_forest (this->mpi_communicator,
+                      connectivity,
+                      /* minimum initial number of quadrants per tree */ 0,
+                      /* minimum level of upfront refinement */ 0,
+                      /* use uniform upfront refinement */ 1,
+                      /* user_data_size = */ 0,
+                      /* user_data_constructor = */ NULL,
+                      /* user_pointer */ this);
+    }
+
+
+
+    namespace
+    {
+      // this function combines vertices that have different locations (and
+      // thus, different vertex_index) but represent the same topological
+      // entity over periodic boundaries. The vector
+      // topological_vertex_numbering contains a linear map from 0 to
+      // n_vertices at input and at output relates periodic vertices with only
+      // one vertex index. The output is used to always identify the same
+      // vertex according to the periodicity, e.g. when finding the maximum
+      // cell level around a vertex.
+      //
+      // Example: On a 3D cell with vertices numbered from 0 to 7 and periodic
+      // boundary conditions in x direction, the vector
+      // topological_vertex_numbering will contain the numbers
+      // {0,0,2,2,4,4,6,6} (because the vertex pairs {0,1}, {2,3}, {4,5},
+      // {6,7} belong together, respectively). If periodicity is set in x and
+      // z direction, the output is {0,0,2,2,0,0,2,2}, and if periodicity is
+      // in all directions, the output is simply {0,0,0,0,0,0,0,0}.
+      template <typename ITERATOR>
+      void
+      identify_periodic_vertices_recursively(const GridTools::PeriodicFacePair<ITERATOR> &periodic,
+                                             std::vector<unsigned int> &topological_vertex_numbering)
+      {
+        const unsigned int dim = ITERATOR::AccessorType::dimension;
+
+        // for hanging nodes we will consider all necessary coupling already
+        // on the parent level, so we just need to consider neighbors of the
+        // same level
+        if (periodic.cell[0]->has_children() &&
+            periodic.cell[1]->has_children())
+          {
+            // copy orientations etc. from parent to child
+            GridTools::PeriodicFacePair<ITERATOR> periodic_child = periodic;
+
+            // find appropriate pairs of child elements
+            for (unsigned int cf=0; cf<periodic.cell[0]->face(periodic.face_idx[0])->n_children(); ++cf)
+              {
+                const unsigned int child_index_0 =
+                  GeometryInfo<dim>::child_cell_on_face(periodic.cell[0]->refinement_case(),
+                                                        periodic.face_idx[0], cf,
+                                                        periodic.orientation[0],
+                                                        periodic.orientation[1],
+                                                        periodic.orientation[2]);
+                periodic_child.cell[0] = periodic.cell[0]->child(child_index_0);
+                periodic_child.face_idx[0] = periodic.face_idx[0];
+
+                // the second face is in standard orientation in terms of the
+                // periodic face pair
+                const unsigned int child_index_1 =
+                  GeometryInfo<dim>::child_cell_on_face(periodic.cell[1]->refinement_case(),
+                                                        periodic.face_idx[1], cf);
+
+                periodic_child.cell[1] = periodic.cell[1]->child(child_index_1);
+                periodic_child.face_idx[1] = periodic.face_idx[1];
+
+                // recursive call into children
+                identify_periodic_vertices_recursively (periodic_child,
+                                                        topological_vertex_numbering);
+              }
+          }
+
+        for (unsigned int v=0; v<GeometryInfo<dim-1>::vertices_per_cell; ++v)
+          {
+            // take possible non-standard orientation of face on cell[0] into
+            // account
+            const unsigned int vface0 =
+              GeometryInfo<dim>::standard_to_real_face_vertex(v,periodic.orientation[0],
+                                                              periodic.orientation[1],
+                                                              periodic.orientation[2]);
+            const unsigned int vi0 = topological_vertex_numbering[periodic.cell[0]->face(periodic.face_idx[0])->vertex_index(vface0)];
+            const unsigned int vi1 = topological_vertex_numbering[periodic.cell[1]->face(periodic.face_idx[1])->vertex_index(v)];
+            const unsigned int min_index = std::min(vi0, vi1);
+            topological_vertex_numbering[periodic.cell[0]->face(periodic.face_idx[0])->vertex_index(vface0)]
+              = topological_vertex_numbering[periodic.cell[1]->face(periodic.face_idx[1])->vertex_index(v)]
+                = min_index;
+          }
+      }
+
+
+
+      // ensures the 2:1 mesh balance for periodic boundary conditions in the
+      // artificial cell layer (the active cells are taken care of by p4est)
+      template <int dim, int spacedim>
+      bool enforce_mesh_balance_over_periodic_boundaries
+      (Triangulation<dim,spacedim> &tria,
+       const std::vector<GridTools::PeriodicFacePair<typename dealii::Triangulation<dim,spacedim>::cell_iterator> > periodic_face_pairs_level_0)
+      {
+        if (periodic_face_pairs_level_0.empty())
+          return false;
+
+        std::vector<bool> flags_before[2];
+        tria.save_coarsen_flags (flags_before[0]);
+        tria.save_refine_flags (flags_before[1]);
+
+        std::vector<unsigned int> topological_vertex_numbering(tria.n_vertices());
+        for (unsigned int i=0; i<topological_vertex_numbering.size(); ++i)
+          topological_vertex_numbering[i] = i;
+        for (unsigned int i=0; i<periodic_face_pairs_level_0.size(); ++i)
+          {
+            identify_periodic_vertices_recursively(periodic_face_pairs_level_0[i],
+                                                   topological_vertex_numbering);
+          }
+
+        // this code is replicated from grid/tria.cc but using an indirection
+        // for periodic boundary conditions
+        bool continue_iterating = true;
+        std::vector<int> vertex_level(tria.n_vertices());
+        while (continue_iterating)
+          {
+            // store highest level one of the cells adjacent to a vertex
+            // belongs to
+            std::fill (vertex_level.begin(), vertex_level.end(), 0);
+            typename Triangulation<dim,spacedim>::active_cell_iterator
+            cell = tria.begin_active(), endc = tria.end();
+            for (; cell!=endc; ++cell)
+              {
+                if (cell->refine_flag_set())
+                  for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                       ++vertex)
+                    vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]]
+                      = std::max (vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]],
+                                  cell->level()+1);
+                else if (!cell->coarsen_flag_set())
+                  for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                       ++vertex)
+                    vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]]
+                      = std::max (vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]],
+                                  cell->level());
+                else
+                  {
+                    // if coarsen flag is set then tentatively assume
+                    // that the cell will be coarsened. this isn't
+                    // always true (the coarsen flag could be removed
+                    // again) and so we may make an error here. we try
+                    // to correct this by iterating over the entire
+                    // process until we are converged
+                    Assert (cell->coarsen_flag_set(), ExcInternalError());
+                    for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                         ++vertex)
+                      vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]]
+                        = std::max (vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]],
+                                    cell->level()-1);
+                  }
+              }
+
+            continue_iterating = false;
+
+            // loop over all cells in reverse order. do so because we
+            // can then update the vertex levels on the adjacent
+            // vertices and maybe already flag additional cells in this
+            // loop
+            //
+            // note that not only may we have to add additional
+            // refinement flags, but we will also have to remove
+            // coarsening flags on cells adjacent to vertices that will
+            // see refinement
+            for (cell=tria.last_active(); cell != endc; --cell)
+              if (cell->refine_flag_set() == false)
+                {
+                  for (unsigned int vertex=0;
+                       vertex<GeometryInfo<dim>::vertices_per_cell; ++vertex)
+                    if (vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]] >=
+                        cell->level()+1)
+                      {
+                        // remove coarsen flag...
+                        cell->clear_coarsen_flag();
+
+                        // ...and if necessary also refine the current
+                        // cell, at the same time updating the level
+                        // information about vertices
+                        if (vertex_level[topological_vertex_numbering[cell->vertex_index(vertex)]] >
+                            cell->level()+1)
+                          {
+                            cell->set_refine_flag();
+                            continue_iterating = true;
+
+                            for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell;
+                                 ++v)
+                              vertex_level[topological_vertex_numbering[cell->vertex_index(v)]]
+                                = std::max (vertex_level[topological_vertex_numbering[cell->vertex_index(v)]],
+                                            cell->level()+1);
+                          }
+
+                        // continue and see whether we may, for example,
+                        // go into the inner 'if' above based on a
+                        // different vertex
+                      }
+                }
+
+            // clear coarsen flag if not all children were marked
+            for (typename Triangulation<dim,spacedim>::cell_iterator cell = tria.begin();
+                 cell!=tria.end(); ++cell)
+              {
+                // nothing to do if we are already on the finest level
+                if (cell->active())
+                  continue;
+
+                const unsigned int n_children=cell->n_children();
+                unsigned int flagged_children=0;
+                for (unsigned int child=0; child<n_children; ++child)
+                  if (cell->child(child)->active() &&
+                      cell->child(child)->coarsen_flag_set())
+                    ++flagged_children;
+
+                // if not all children were flagged for coarsening, remove
+                // coarsen flags
+                if (flagged_children < n_children)
+                  for (unsigned int child=0; child<n_children; ++child)
+                    if (cell->child(child)->active())
+                      cell->child(child)->clear_coarsen_flag();
+              }
+          }
+        std::vector<bool> flags_after[2];
+        tria.save_coarsen_flags (flags_after[0]);
+        tria.save_refine_flags (flags_after[1]);
+        return ((flags_before[0] != flags_after[0]) ||
+                (flags_before[1] != flags_after[1]));
+      }
+    }
+
+
+
+    template <int dim, int spacedim>
+    bool
+    Triangulation<dim,spacedim>::prepare_coarsening_and_refinement()
+    {
+      std::vector<bool> flags_before[2];
+      this->save_coarsen_flags (flags_before[0]);
+      this->save_refine_flags (flags_before[1]);
+
+      bool mesh_changed = false;
+      do
+        {
+          this->dealii::Triangulation<dim,spacedim>::prepare_coarsening_and_refinement();
+
+          // enforce 2:1 mesh balance over periodic boundaries
+          if (this->smooth_grid &
+              dealii::Triangulation<dim,spacedim>::limit_level_difference_at_vertices)
+            mesh_changed = enforce_mesh_balance_over_periodic_boundaries(*this,
+                           periodic_face_pairs_level_0);
+        }
+      while (mesh_changed);
+
+      // check if any of the refinement flags were changed during this
+      // function and return that value
+      std::vector<bool> flags_after[2];
+      this->save_coarsen_flags (flags_after[0]);
+      this->save_refine_flags (flags_after[1]);
+      return ((flags_before[0] != flags_after[0]) ||
+              (flags_before[1] != flags_after[1]));
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::copy_local_forest_to_triangulation ()
+    {
+      // disable mesh smoothing for recreating the deal.II triangulation,
+      // otherwise we might not be able to reproduce the p4est mesh
+      // exactly. We restore the original smoothing at the end of this
+      // function. Note that the smoothing flag is used in the normal
+      // refinement process.
+      typename Triangulation<dim,spacedim>::MeshSmoothing
+      save_smooth = this->smooth_grid;
+
+      // We will refine manually to match the p4est further down, which
+      // obeys a level difference of 2 at each vertex (see the balance call
+      // to p4est). We can disable this here so we store fewer artificial
+      // cells (in some cases). For geometric multigrid it turns out that
+      // we will miss level cells at shared vertices if we ignore this.
+      // See tests/mpi/mg_06.
+      if (settings & construct_multigrid_hierarchy)
+        this->smooth_grid = dealii::Triangulation<dim,spacedim>::limit_level_difference_at_vertices;
+      else
+        this->smooth_grid = dealii::Triangulation<dim,spacedim>::none;
+
+      bool mesh_changed = false;
+
+      // remove all deal.II refinements. Note that we could skip this and
+      // start from our current state, because the algorithm later coarsens as
+      // necessary. This has the advantage of being faster when large parts
+      // of the local partition changes (likely) and gives a deterministic
+      // ordering of the cells (useful for snapshot/resume).
+      // TODO: is there a more efficient way to do this?
+      if (settings & mesh_reconstruction_after_repartitioning)
+        while (this->begin_active()->level() > 0)
+          {
+            for (typename Triangulation<dim, spacedim>::active_cell_iterator
+                 cell = this->begin_active();
+                 cell != this->end();
+                 ++cell)
+              {
+                cell->set_coarsen_flag();
+              }
+
+            this->prepare_coarsening_and_refinement();
+            const bool saved_refinement_in_progress = refinement_in_progress;
+            refinement_in_progress = true;
+
+            try
+              {
+                this->execute_coarsening_and_refinement();
+              }
+            catch (const typename Triangulation<dim, spacedim>::DistortedCellList &)
+              {
+                // the underlying triangulation should not be checking for
+                // distorted cells
+                AssertThrow (false, ExcInternalError());
+              }
+
+            refinement_in_progress = saved_refinement_in_progress;
+          }
+
+
+      // query p4est for the ghost cells
+      if (parallel_ghost != 0)
+        {
+          dealii::internal::p4est::functions<dim>::ghost_destroy (parallel_ghost);
+          parallel_ghost = 0;
+        }
+      parallel_ghost = dealii::internal::p4est::functions<dim>::ghost_new (parallel_forest,
+                       (dim == 2
+                        ?
+                        typename dealii::internal::p4est::types<dim>::
+                        balance_type(P4EST_CONNECT_CORNER)
+                        :
+                        typename dealii::internal::p4est::types<dim>::
+                        balance_type(P8EST_CONNECT_CORNER)));
+
+      Assert (parallel_ghost, ExcInternalError());
+
+
+      // set all cells to artificial. we will later set it to the correct
+      // subdomain in match_tree_recursively
+      for (typename Triangulation<dim,spacedim>::cell_iterator
+           cell = this->begin(0);
+           cell != this->end(0);
+           ++cell)
+        cell->recursively_set_subdomain_id(numbers::artificial_subdomain_id);
+
+      do
+        {
+          for (typename Triangulation<dim,spacedim>::cell_iterator
+               cell = this->begin(0);
+               cell != this->end(0);
+               ++cell)
+            {
+              // if this processor stores no part of the forest that comes out
+              // of this coarse grid cell, then we need to delete all children
+              // of this cell (the coarse grid cell remains)
+              if (tree_exists_locally<dim,spacedim>(parallel_forest,
+                                                    coarse_cell_to_p4est_tree_permutation[cell->index()])
+                  == false)
+                {
+                  delete_all_children<dim,spacedim> (cell);
+                  if (!cell->has_children())
+                    cell->set_subdomain_id (numbers::artificial_subdomain_id);
+                }
+
+              else
+                {
+                  // this processor stores at least a part of the tree that
+                  // comes out of this cell.
+
+                  typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+                  typename dealii::internal::p4est::types<dim>::tree *tree =
+                    init_tree(cell->index());
+
+                  dealii::internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+                  match_tree_recursively<dim,spacedim> (*tree, cell,
+                                                        p4est_coarse_cell,
+                                                        *parallel_forest,
+                                                        this->my_subdomain);
+                }
+            }
+
+          // check mesh for ghostcells, refine as necessary.  iterate over
+          // every ghostquadrant, find corresponding deal coarsecell and
+          // recurse.
+          typename dealii::internal::p4est::types<dim>::quadrant *quadr;
+          unsigned int ghost_owner=0;
+          typename dealii::internal::p4est::types<dim>::topidx ghost_tree=0;
+
+          for (unsigned int g_idx=0; g_idx<parallel_ghost->ghosts.elem_count; ++g_idx)
+            {
+              while (g_idx >= (unsigned int)parallel_ghost->proc_offsets[ghost_owner+1])
+                ++ghost_owner;
+              while (g_idx >= (unsigned int)parallel_ghost->tree_offsets[ghost_tree+1])
+                ++ghost_tree;
+
+              quadr = static_cast<typename dealii::internal::p4est::types<dim>::quadrant *>
+                      ( sc_array_index(&parallel_ghost->ghosts, g_idx) );
+
+              unsigned int coarse_cell_index =
+                p4est_tree_to_coarse_cell_permutation[ghost_tree];
+
+              match_quadrant<dim,spacedim> (this, coarse_cell_index, *quadr, ghost_owner);
+            }
+
+          // fix all the flags to make sure we have a consistent mesh
+          this->prepare_coarsening_and_refinement ();
+
+          // see if any flags are still set
+          mesh_changed = false;
+          for (typename Triangulation<dim,spacedim>::active_cell_iterator
+               cell = this->begin_active();
+               cell != this->end();
+               ++cell)
+            if (cell->refine_flag_set() || cell->coarsen_flag_set())
+              {
+                mesh_changed = true;
+                break;
+              }
+
+          // actually do the refinement but prevent the refinement hook below
+          // from taking over
+          const bool saved_refinement_in_progress = refinement_in_progress;
+          refinement_in_progress = true;
+
+          try
+            {
+              this->execute_coarsening_and_refinement();
+            }
+          catch (const typename Triangulation<dim,spacedim>::DistortedCellList &)
+            {
+              // the underlying triangulation should not be checking for
+              // distorted cells
+              AssertThrow (false, ExcInternalError());
+            }
+
+          refinement_in_progress = saved_refinement_in_progress;
+        }
+      while (mesh_changed);
+
+#ifdef DEBUG
+      // check if correct number of ghosts is created
+      unsigned int num_ghosts = 0;
+
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end();
+           ++cell)
+        {
+          if (cell->subdomain_id() != this->my_subdomain
+              &&
+              cell->subdomain_id() != numbers::artificial_subdomain_id)
+            ++num_ghosts;
+        }
+
+      Assert( num_ghosts == parallel_ghost->ghosts.elem_count, ExcInternalError());
+#endif
+
+
+
+      // fill level_subdomain_ids for geometric multigrid
+      // the level ownership of a cell is defined as the owner if the cell is active or as the owner of child(0)
+      // we need this information for all our ancestors and the same-level neighbors of our own cells (=level ghosts)
+      if (settings & construct_multigrid_hierarchy)
+        {
+          // step 1: We set our own ids all the way down and all the others to
+          // -1. Note that we do not fill other cells we could figure out the
+          // same way, because we might accidentally set an id for a cell that
+          // is not a ghost cell.
+          for (unsigned int lvl=this->n_levels(); lvl>0; )
+            {
+              --lvl;
+              for (typename Triangulation<dim,spacedim>::cell_iterator cell = this->begin(lvl); cell!=this->end(lvl); ++cell)
+                {
+                  if ((!cell->has_children() && cell->subdomain_id()==this->locally_owned_subdomain())
+                      || (cell->has_children() && cell->child(0)->level_subdomain_id()==this->locally_owned_subdomain()))
+                    cell->set_level_subdomain_id(this->locally_owned_subdomain());
+                  else
+                    {
+                      //not our cell
+                      cell->set_level_subdomain_id(numbers::artificial_subdomain_id);
+                    }
+                }
+            }
+
+          //step 2: make sure all the neighbors to our level_cells exist. Need
+          //to look up in p4est...
+          std::vector<std::vector<bool> > marked_vertices(this->n_levels());
+          for (unsigned int lvl=0; lvl < this->n_levels(); ++lvl)
+            marked_vertices[lvl] = mark_locally_active_vertices_on_level(lvl);
+
+          for (typename Triangulation<dim,spacedim>::cell_iterator cell = this->begin(0); cell!=this->end(0); ++cell)
+            {
+              typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+              const unsigned int tree_index
+                = coarse_cell_to_p4est_tree_permutation[cell->index()];
+              typename dealii::internal::p4est::types<dim>::tree *tree =
+                init_tree(cell->index());
+
+              dealii::internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+              determine_level_subdomain_id_recursively<dim,spacedim> (*tree, tree_index, cell,
+                                                                      p4est_coarse_cell,
+                                                                      *parallel_forest,
+                                                                      this->my_subdomain,
+                                                                      marked_vertices);
+            }
+
+          //step 3: make sure we have the parent of our level cells
+          for (unsigned int lvl=this->n_levels(); lvl>0;)
+            {
+              --lvl;
+              for (typename Triangulation<dim,spacedim>::cell_iterator cell = this->begin(lvl); cell!=this->end(lvl); ++cell)
+                {
+                  if (cell->has_children())
+                    for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+                      {
+                        if (cell->child(c)->level_subdomain_id()==this->locally_owned_subdomain())
+                          {
+                            //at least one of the children belongs to us, so
+                            //make sure we set the level subdomain id
+                            types::subdomain_id mark = numbers::artificial_subdomain_id;
+                            mark = cell->child(0)->level_subdomain_id();
+                            Assert(mark != numbers::artificial_subdomain_id, ExcInternalError()); //we should know the child(0)
+                            cell->set_level_subdomain_id(mark);
+                            break;
+                          }
+                      }
+                }
+            }
+
+          //step 4: Special case: on each level we need all the face neighbors
+          // of our own level cells these are normally on the same level,
+          // unless the neighbor is active and coarser. It can end up on a
+          // different processor. Luckily, the level_subdomain_id can be
+          // figured out without communication, because the cell is active
+          // (and so level_subdomain_id=subdomain_id). Finally, also consider
+          // the opposite case: if we are the coarser neighbor for another
+          // processor, also mark them.
+          for (typename Triangulation<dim,spacedim>::cell_iterator cell = this->begin(); cell!=this->end(); ++cell)
+            {
+              bool cell_level_mine = cell->level_subdomain_id() == this->locally_owned_subdomain();
+
+              for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+                {
+                  if (cell->face(f)->at_boundary() || cell->neighbor(f)->level() >= cell->level())
+                    continue;
+
+                  bool neighbor_level_mine = cell->neighbor(f)->level_subdomain_id() == this->locally_owned_subdomain();
+
+                  if (cell_level_mine && !neighbor_level_mine)
+                    {
+                      // set the neighbor level_id up
+                      Assert(cell->neighbor(f)->active(), ExcInternalError());
+                      Assert(cell->neighbor(f)->subdomain_id() != numbers::artificial_subdomain_id, ExcInternalError());
+                      Assert(cell->neighbor(f)->level_subdomain_id() == numbers::artificial_subdomain_id
+                             || cell->neighbor(f)->level_subdomain_id() == cell->neighbor(f)->subdomain_id(), ExcInternalError());
+                      cell->neighbor(f)->set_level_subdomain_id(cell->neighbor(f)->subdomain_id());
+                    }
+                  else if (!cell_level_mine && neighbor_level_mine)
+                    {
+                      // set the current cell up because it is a neighbor for us
+                      Assert(cell->active(), ExcInternalError());
+                      Assert(cell->subdomain_id() != numbers::artificial_subdomain_id, ExcInternalError());
+                      Assert(cell->level_subdomain_id() == numbers::artificial_subdomain_id
+                             || cell->level_subdomain_id() == cell->subdomain_id(), ExcInternalError());
+                      cell->set_level_subdomain_id(cell->subdomain_id());
+                    }
+                }
+
+            }
+
+        }
+
+
+
+      // check that our local copy has exactly as many cells as the p4est
+      // original (at least if we are on only one processor); for parallel
+      // computations, we want to check that we have at least as many as p4est
+      // stores locally (in the future we should check that we have exactly as
+      // many non-artificial cells as parallel_forest->local_num_quadrants)
+      {
+        const unsigned int total_local_cells = this->n_active_cells();
+        (void)total_local_cells;
+
+        if (Utilities::MPI::n_mpi_processes (this->mpi_communicator) == 1)
+          Assert (static_cast<unsigned int>(parallel_forest->local_num_quadrants) ==
+                  total_local_cells,
+                  ExcInternalError())
+          else
+            Assert (static_cast<unsigned int>(parallel_forest->local_num_quadrants) <=
+                    total_local_cells,
+                    ExcInternalError());
+
+        // count the number of owned, active cells and compare with p4est.
+        unsigned int n_owned = 0;
+        for (typename Triangulation<dim,spacedim>::active_cell_iterator
+             cell = this->begin_active();
+             cell != this->end(); ++cell)
+          {
+            if (cell->subdomain_id() == this->my_subdomain)
+              ++n_owned;
+          }
+
+        Assert(static_cast<unsigned int>(parallel_forest->local_num_quadrants) ==
+               n_owned, ExcInternalError());
+
+      }
+
+      this->smooth_grid = save_smooth;
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim, spacedim>::execute_coarsening_and_refinement ()
+    {
+      // first make sure that recursive calls are handled correctly
+      if (refinement_in_progress == true)
+        {
+          dealii::Triangulation<dim,spacedim>::execute_coarsening_and_refinement ();
+          return;
+        }
+
+      // do not allow anisotropic refinement
+#ifdef DEBUG
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end(); ++cell)
+        if (cell->is_locally_owned() && cell->refine_flag_set())
+          Assert (cell->refine_flag_set() ==
+                  RefinementPossibilities<dim>::isotropic_refinement,
+                  ExcMessage ("This class does not support anisotropic refinement"));
+#endif
+
+
+      // safety check: p4est has an upper limit on the level of a cell
+      if (this->n_levels()==dealii::internal::p4est::functions<dim>::max_level)
+        {
+          for (typename Triangulation<dim,spacedim>::active_cell_iterator
+               cell = this->begin_active(dealii::internal::p4est::functions<dim>::max_level-1);
+               cell != this->end(dealii::internal::p4est::functions<dim>::max_level-1); ++cell)
+            {
+              AssertThrow(!(cell->refine_flag_set()),
+                          ExcMessage("Fatal Error: maximum refinement level of p4est reached."));
+            }
+        }
+
+      // now do the work we're supposed to do when we are in charge
+      refinement_in_progress = true;
+      this->prepare_coarsening_and_refinement ();
+
+      // make sure all flags are cleared on cells we don't own, since nothing
+      // good can come of that if they are still around
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end(); ++cell)
+        if (cell->is_ghost() || cell->is_artificial())
+          {
+            cell->clear_refine_flag ();
+            cell->clear_coarsen_flag ();
+          }
+
+
+      // count how many cells will be refined and coarsened, and allocate that
+      // much memory
+      RefineAndCoarsenList<dim,spacedim>
+      refine_and_coarsen_list (*this,
+                               p4est_tree_to_coarse_cell_permutation,
+                               this->my_subdomain);
+
+      // copy refine and coarsen flags into p4est and execute the refinement
+      // and coarsening. this uses the refine_and_coarsen_list just built,
+      // which is communicated to the callback functions through
+      // p4est's user_pointer object
+      Assert (parallel_forest->user_pointer == this,
+              ExcInternalError());
+      parallel_forest->user_pointer = &refine_and_coarsen_list;
+
+      if (parallel_ghost != 0)
+        {
+          dealii::internal::p4est::functions<dim>::ghost_destroy (parallel_ghost);
+          parallel_ghost = 0;
+        }
+      dealii::internal::p4est::functions<dim>::
+      refine (parallel_forest, /* refine_recursive */ false,
+              &RefineAndCoarsenList<dim,spacedim>::refine_callback,
+              /*init_callback=*/NULL);
+      dealii::internal::p4est::functions<dim>::
+      coarsen (parallel_forest, /* coarsen_recursive */ false,
+               &RefineAndCoarsenList<dim,spacedim>::coarsen_callback,
+               /*init_callback=*/NULL);
+
+      // make sure all cells in the lists have been consumed
+      Assert (refine_and_coarsen_list.pointers_are_at_end(),
+              ExcInternalError());
+
+      // reset the pointer
+      parallel_forest->user_pointer = this;
+
+      // enforce 2:1 hanging node condition
+      dealii::internal::p4est::functions<dim>::
+      balance (parallel_forest,
+               /* face and corner balance */
+               (dim == 2
+                ?
+                typename dealii::internal::p4est::types<dim>::
+                balance_type(P4EST_CONNECT_FULL)
+                :
+                typename dealii::internal::p4est::types<dim>::
+                balance_type(P8EST_CONNECT_FULL)),
+               /*init_callback=*/NULL);
+
+      // before repartitioning the mesh let others attach mesh related info
+      // (such as SolutionTransfer data) to the p4est
+      attach_mesh_data();
+
+      if (!(settings & no_automatic_repartitioning))
+        {
+          // partition the new mesh between all processors. If cell weights have
+          // not been given balance the number of cells.
+          if (this->signals.cell_weight.num_slots() == 0)
+            dealii::internal::p4est::functions<dim>::
+            partition (parallel_forest,
+                       /* prepare coarsening */ 1,
+                       /* weight_callback */ NULL);
+          else
+            {
+              // get cell weights for a weighted repartitioning.
+              const std::vector<unsigned int> cell_weights = get_cell_weights();
+
+              PartitionWeights<dim,spacedim> partition_weights (cell_weights);
+
+              // attach (temporarily) a pointer to the cell weights through p4est's
+              // user_pointer object
+              Assert (parallel_forest->user_pointer == this,
+                      ExcInternalError());
+              parallel_forest->user_pointer = &partition_weights;
+
+              dealii::internal::p4est::functions<dim>::
+              partition (parallel_forest,
+                         /* prepare coarsening */ 1,
+                         /* weight_callback */ &PartitionWeights<dim,spacedim>::cell_weight);
+
+              // reset the user pointer to its previous state
+              parallel_forest->user_pointer = this;
+            }
+        }
+
+      // finally copy back from local part of tree to deal.II
+      // triangulation. before doing so, make sure there are no refine or
+      // coarsen flags pending
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end(); ++cell)
+        {
+          cell->clear_refine_flag();
+          cell->clear_coarsen_flag();
+        }
+
+      try
+        {
+          copy_local_forest_to_triangulation ();
+        }
+      catch (const typename Triangulation<dim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+
+      refinement_in_progress = false;
+
+      this->update_number_cache ();
+    }
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::repartition ()
+    {
+
+#ifdef DEBUG
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end(); ++cell)
+        if (cell->is_locally_owned())
+          Assert (
+            !cell->refine_flag_set() && !cell->coarsen_flag_set(),
+            ExcMessage ("Error: There shouldn't be any cells flagged for coarsening/refinement when calling repartition()."));
+#endif
+
+      refinement_in_progress = true;
+
+      // before repartitioning the mesh let others attach mesh related info
+      // (such as SolutionTransfer data) to the p4est
+      attach_mesh_data();
+
+      if (this->signals.cell_weight.num_slots() == 0)
+        {
+          // no cell weights given -- call p4est's 'partition' without a
+          // callback for cell weights
+          dealii::internal::p4est::functions<dim>::
+          partition (parallel_forest,
+                     /* prepare coarsening */ 1,
+                     /* weight_callback */ NULL);
+        }
+      else
+        {
+          // get cell weights for a weighted repartitioning.
+          const std::vector<unsigned int> cell_weights = get_cell_weights();
+
+          PartitionWeights<dim,spacedim> partition_weights (cell_weights);
+
+          // attach (temporarily) a pointer to the cell weights through p4est's
+          // user_pointer object
+          Assert (parallel_forest->user_pointer == this,
+                  ExcInternalError());
+          parallel_forest->user_pointer = &partition_weights;
+
+          dealii::internal::p4est::functions<dim>::
+          partition (parallel_forest,
+                     /* prepare coarsening */ 1,
+                     /* weight_callback */ &PartitionWeights<dim,spacedim>::cell_weight);
+
+          // reset the user pointer to its previous state
+          parallel_forest->user_pointer = this;
+        }
+
+      try
+        {
+          copy_local_forest_to_triangulation ();
+        }
+      catch (const typename Triangulation<dim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+      refinement_in_progress = false;
+
+      // update how many cells, edges, etc, we store locally
+      this->update_number_cache ();
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    communicate_locally_moved_vertices (const std::vector<bool> &vertex_locally_moved)
+    {
+      Assert (vertex_locally_moved.size() == this->n_vertices(),
+              ExcDimensionMismatch(vertex_locally_moved.size(),
+                                   this->n_vertices()));
+#ifdef DEBUG
+      {
+        const std::vector<bool> locally_owned_vertices
+          = GridTools::get_locally_owned_vertices (*this);
+        for (unsigned int i=0; i<locally_owned_vertices.size(); ++i)
+          Assert ((vertex_locally_moved[i] == false)
+                  ||
+                  (locally_owned_vertices[i] == true),
+                  ExcMessage ("The vertex_locally_moved argument must not "
+                              "contain vertices that are not locally owned"));
+      }
+#endif
+
+      std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+      vertices_with_ghost_neighbors;
+
+      // First find out which process should receive which vertices.
+      // these are specifically the ones that sit on ghost cells and,
+      // among these, the ones that we own locally
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell=this->begin_active(); cell!=this->end();
+           ++cell)
+        if (cell->is_ghost())
+          for (unsigned int vertex_no=0;
+               vertex_no<GeometryInfo<dim>::vertices_per_cell; ++vertex_no)
+            {
+              const unsigned int process_local_vertex_no = cell->vertex_index(vertex_no);
+              vertices_with_ghost_neighbors[process_local_vertex_no].insert
+              (cell->subdomain_id());
+            }
+
+      // now collect cells and their vertices
+      // for the interested neighbors
+      typedef
+      std::map<dealii::types::subdomain_id, CommunicateLocallyMovedVertices::CellInfo<dim,spacedim> > cellmap_t;
+      cellmap_t needs_to_get_cells;
+
+      for (typename Triangulation<dim,spacedim>::cell_iterator
+           cell = this->begin(0);
+           cell != this->end(0);
+           ++cell)
+        {
+          typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+          dealii::internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+          CommunicateLocallyMovedVertices::fill_vertices_recursively<dim,spacedim>
+          (*this,
+           this->get_coarse_cell_to_p4est_tree_permutation()[cell->index()],
+           cell,
+           p4est_coarse_cell,
+           vertices_with_ghost_neighbors,
+           vertex_locally_moved,
+           needs_to_get_cells);
+        }
+
+      // sending
+      std::vector<std::vector<char> > sendbuffers (needs_to_get_cells.size());
+      std::vector<std::vector<char> >::iterator buffer = sendbuffers.begin();
+      std::vector<MPI_Request> requests (needs_to_get_cells.size());
+      std::vector<unsigned int> destinations;
+
+      unsigned int idx=0;
+
+      for (typename cellmap_t::iterator it=needs_to_get_cells.begin();
+           it!=needs_to_get_cells.end();
+           ++it, ++buffer, ++idx)
+        {
+          const unsigned int num_cells = it->second.tree_index.size();
+          (void)num_cells;
+          destinations.push_back(it->first);
+
+          Assert(num_cells==it->second.quadrants.size(), ExcInternalError());
+          Assert(num_cells>0, ExcInternalError());
+
+          // pack all the data into
+          // the buffer for this
+          // recipient and send
+          // it. keep data around
+          // till we can make sure
+          // that the packet has been
+          // received
+          it->second.pack_data (*buffer);
+          MPI_Isend(&(*buffer)[0], buffer->size(),
+                    MPI_BYTE, it->first,
+                    123, this->get_communicator(), &requests[idx]);
+        }
+
+      Assert(destinations.size()==needs_to_get_cells.size(), ExcInternalError());
+
+      // collect the neighbors
+      // that are going to send stuff to us
+      const std::vector<unsigned int> senders
+        = Utilities::MPI::compute_point_to_point_communication_pattern
+          (this->get_communicator(), destinations);
+
+      // receive ghostcelldata
+      std::vector<char> receive;
+      CommunicateLocallyMovedVertices::CellInfo<dim,spacedim> cellinfo;
+      for (unsigned int i=0; i<senders.size(); ++i)
+        {
+          MPI_Status status;
+          int len;
+          MPI_Probe(MPI_ANY_SOURCE, 123, this->get_communicator(), &status);
+          MPI_Get_count(&status, MPI_BYTE, &len);
+          receive.resize(len);
+
+          char *ptr = &receive[0];
+          MPI_Recv(ptr, len, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG,
+                   this->get_communicator(), &status);
+
+          cellinfo.unpack_data(receive);
+          const unsigned int cells = cellinfo.tree_index.size();
+          for (unsigned int c=0; c<cells; ++c)
+            {
+              typename dealii::parallel::distributed::Triangulation<dim,spacedim>::cell_iterator
+              cell (this,
+                    0,
+                    this->get_p4est_tree_to_coarse_cell_permutation()[cellinfo.tree_index[c]]);
+
+              typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+              dealii::internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+              CommunicateLocallyMovedVertices::set_vertices_recursively<dim,spacedim> (*this,
+                  p4est_coarse_cell,
+                  cell,
+                  cellinfo.quadrants[c],
+                  cellinfo.first_vertices[c],
+                  cellinfo.first_vertex_indices[c]);
+            }
+        }
+
+      // complete all sends, so that we can
+      // safely destroy the buffers.
+      if (requests.size() > 0)
+        MPI_Waitall(requests.size(), &requests[0], MPI_STATUSES_IGNORE);
+
+      //check all msgs got sent and received
+      Assert(Utilities::MPI::sum(needs_to_get_cells.size(), this->get_communicator())
+             == Utilities::MPI::sum(senders.size(), this->get_communicator()),
+             ExcInternalError());
+    }
+
+    template <int dim, int spacedim>
+    unsigned int
+    Triangulation<dim,spacedim>::
+    register_data_attach (const std::size_t size,
+                          const std_cxx11::function<void(const cell_iterator &,
+                                                         const CellStatus,
+                                                         void *)> &pack_callback)
+    {
+      Assert(size>0, ExcMessage("register_data_attach(), size==0"));
+      Assert(attached_data_pack_callbacks.size()==n_attached_datas,
+             ExcMessage("register_data_attach(), not all data has been unpacked last time?"));
+
+      unsigned int offset = attached_data_size+sizeof(CellStatus);
+      ++n_attached_datas;
+      attached_data_size+=size;
+      attached_data_pack_callbacks.push_back(
+        std::pair<unsigned int, pack_callback_t> (offset, pack_callback)
+      );
+      return offset;
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    notify_ready_to_unpack (const unsigned int offset,
+                            const std_cxx11::function<void (const cell_iterator &,
+                                                            const CellStatus,
+                                                            const void *)> &unpack_callback)
+    {
+      Assert (offset >= sizeof(CellStatus),
+              ExcMessage ("invalid offset in notify_ready_to_unpack()"));
+      Assert (offset < sizeof(CellStatus)+attached_data_size,
+              ExcMessage ("invalid offset in notify_ready_to_unpack()"));
+      Assert (n_attached_datas > 0, ExcMessage ("notify_ready_to_unpack() called too often"));
+
+      // Recurse over p4est and hand the caller the data back
+      for (typename Triangulation<dim, spacedim>::cell_iterator
+           cell = this->begin (0);
+           cell != this->end (0);
+           ++cell)
+        {
+          //skip coarse cells, that are not ours
+          if (tree_exists_locally<dim, spacedim> (parallel_forest,
+                                                  coarse_cell_to_p4est_tree_permutation[cell->index() ])
+              == false)
+            continue;
+
+          typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+          typename dealii::internal::p4est::types<dim>::tree *tree =
+            init_tree (cell->index());
+
+          dealii::internal::p4est::init_coarse_quadrant<dim> (p4est_coarse_cell);
+
+          // parent_cell is not correct here, but is only used in a refined
+          // cell
+          post_mesh_data_recursively<dim, spacedim> (*tree,
+                                                     cell,
+                                                     cell,
+                                                     p4est_coarse_cell,
+                                                     offset,
+                                                     unpack_callback);
+        }
+
+      --n_attached_datas;
+      if (n_attached_deserialize > 0)
+        {
+          --n_attached_deserialize;
+          attached_data_pack_callbacks.pop_front();
+        }
+
+      // important: only remove data if we are not in the deserialization
+      // process. There, each SolutionTransfer registers and unpacks before
+      // the next one does this, so n_attached_datas is only 1 here.  This
+      // would destroy the saved data before the second SolutionTransfer can
+      // get it. This created a bug that is documented in
+      // tests/mpi/p4est_save_03 with more than one SolutionTransfer.
+      if (!n_attached_datas && n_attached_deserialize == 0)
+        {
+          // everybody got his data, time for cleanup!
+          attached_data_size = 0;
+          attached_data_pack_callbacks.clear();
+
+          // and release the data
+          void *userptr = parallel_forest->user_pointer;
+          dealii::internal::p4est::functions<dim>::reset_data (parallel_forest, 0, NULL, NULL);
+          parallel_forest->user_pointer = userptr;
+        }
+    }
+
+
+    template <int dim, int spacedim>
+    const std::vector<types::global_dof_index> &
+    Triangulation<dim, spacedim>::get_p4est_tree_to_coarse_cell_permutation() const
+    {
+      return p4est_tree_to_coarse_cell_permutation;
+    }
+
+
+
+    template <int dim, int spacedim>
+    const std::vector<types::global_dof_index> &
+    Triangulation<dim, spacedim>::get_coarse_cell_to_p4est_tree_permutation() const
+    {
+      return coarse_cell_to_p4est_tree_permutation;
+    }
+
+
+
+    namespace
+    {
+      /**
+       * This is the callback data structure used to fill
+       * vertices_with_ghost_neighbors via the p4est_iterate tool
+       */
+      template <int dim, int spacedim>
+      struct FindGhosts
+      {
+        typename dealii::parallel::distributed::Triangulation<dim,spacedim> *triangulation;
+        sc_array_t *subids;
+        std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+        *vertices_with_ghost_neighbors;
+      };
+
+      /** At a corner (vertex), determine if any of the neighboring cells are
+       * ghosts.  If there are, find out their subdomain ids, and if this is a
+       * local vertex, then add these subdomain ids to the map
+       * vertices_with_ghost_neighbors of that index
+       */
+      template <int dim, int spacedim>
+      void
+      find_ghosts_corner
+      (typename dealii::internal::p4est::iter<dim>::corner_info *info,
+       void *user_data)
+      {
+        int i, j;
+        int nsides = info->sides.elem_count;
+        typename dealii::internal::p4est::iter<dim>::corner_side *sides =
+          (typename dealii::internal::p4est::iter<dim>::corner_side *)
+          (info->sides.array);
+        FindGhosts<dim,spacedim> *fg = static_cast<FindGhosts<dim,spacedim> *>(user_data);
+        sc_array_t *subids = fg->subids;
+        typename dealii::parallel::distributed::Triangulation<dim,spacedim> *triangulation = fg->triangulation;
+        int nsubs;
+        dealii::types::subdomain_id *subdomain_ids;
+        std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+        *vertices_with_ghost_neighbors = fg->vertices_with_ghost_neighbors;
+
+        subids->elem_count = 0;
+        for (i = 0; i < nsides; i++)
+          {
+            if (sides[i].is_ghost)
+              {
+                typename dealii::parallel::distributed::Triangulation<dim,spacedim>::cell_iterator cell = cell_from_quad (triangulation, sides[i].treeid, *(sides[i].quad));
+                Assert (cell->is_ghost(), ExcMessage ("ghost quad did not find ghost cell"));
+                dealii::types::subdomain_id *subid =
+                  static_cast<dealii::types::subdomain_id *>(sc_array_push (subids));
+                *subid = cell->subdomain_id();
+              }
+          }
+
+        if (!subids->elem_count)
+          {
+            return;
+          }
+
+        nsubs = (int) subids->elem_count;
+        subdomain_ids = (dealii::types::subdomain_id *) (subids->array);
+
+        for (i = 0; i < nsides; i++)
+          {
+            if (!sides[i].is_ghost)
+              {
+                typename dealii::parallel::distributed::Triangulation<dim,spacedim>::cell_iterator cell = cell_from_quad (triangulation, sides[i].treeid, *(sides[i].quad));
+
+                Assert (!cell->is_ghost(), ExcMessage ("local quad found ghost cell"));
+
+                for (j = 0; j < nsubs; j++)
+                  {
+                    (*vertices_with_ghost_neighbors)[cell->vertex_index(sides[i].corner)]
+                    .insert (subdomain_ids[j]);
+                  }
+              }
+          }
+
+        subids->elem_count = 0;
+      }
+
+      /** Similar to find_ghosts_corner, but for the hanging vertex in the
+       * middle of an edge
+       */
+      template <int dim, int spacedim>
+      void
+      find_ghosts_edge
+      (typename dealii::internal::p4est::iter<dim>::edge_info *info,
+       void *user_data)
+      {
+        int i, j, k;
+        int nsides = info->sides.elem_count;
+        typename dealii::internal::p4est::iter<dim>::edge_side *sides =
+          (typename dealii::internal::p4est::iter<dim>::edge_side *)
+          (info->sides.array);
+        FindGhosts<dim,spacedim> *fg = static_cast<FindGhosts<dim,spacedim> *>(user_data);
+        sc_array_t *subids = fg->subids;
+        typename dealii::parallel::distributed::Triangulation<dim,spacedim> *triangulation = fg->triangulation;
+        int nsubs;
+        dealii::types::subdomain_id *subdomain_ids;
+        std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+        *vertices_with_ghost_neighbors = fg->vertices_with_ghost_neighbors;
+
+        subids->elem_count = 0;
+        for (i = 0; i < nsides; i++)
+          {
+            if (sides[i].is_hanging)
+              {
+                for (j = 0; j < 2; j++)
+                  {
+                    if (sides[i].is.hanging.is_ghost[j])
+                      {
+                        typename dealii::parallel::distributed::Triangulation<dim,spacedim>::cell_iterator cell = cell_from_quad (triangulation, sides[i].treeid, *(sides[i].is.hanging.quad[j]));
+                        dealii::types::subdomain_id *subid =
+                          static_cast<dealii::types::subdomain_id *>(sc_array_push (subids));
+                        *subid = cell->subdomain_id();
+                      }
+                  }
+              }
+          }
+
+        if (!subids->elem_count)
+          {
+            return;
+          }
+
+        nsubs = (int) subids->elem_count;
+        subdomain_ids = (dealii::types::subdomain_id *) (subids->array);
+
+        for (i = 0; i < nsides; i++)
+          {
+            if (sides[i].is_hanging)
+              {
+                for (j = 0; j < 2; j++)
+                  {
+                    if (!sides[i].is.hanging.is_ghost[j])
+                      {
+                        typename dealii::parallel::distributed::Triangulation<dim,spacedim>::cell_iterator cell = cell_from_quad (triangulation, sides[i].treeid, *(sides[i].is.hanging.quad[j]));
+
+                        for (k = 0; k < nsubs; k++)
+                          {
+                            (*vertices_with_ghost_neighbors)[cell->vertex_index(p8est_edge_corners[sides[i].edge][1^j])]
+                            .insert (subdomain_ids[k]);
+                          }
+                      }
+                  }
+              }
+          }
+
+        subids->elem_count = 0;
+      }
+
+      /** Similar to find_ghosts_corner, but for the hanging vertex in the
+       * middle of a face
+       */
+      template <int dim, int spacedim>
+      void
+      find_ghosts_face
+      (typename dealii::internal::p4est::iter<dim>::face_info *info,
+       void *user_data)
+      {
+        int i, j, k;
+        int nsides = info->sides.elem_count;
+        typename dealii::internal::p4est::iter<dim>::face_side *sides =
+          (typename dealii::internal::p4est::iter<dim>::face_side *)
+          (info->sides.array);
+        FindGhosts<dim,spacedim> *fg = static_cast<FindGhosts<dim,spacedim> *>(user_data);
+        sc_array_t *subids = fg->subids;
+        typename dealii::parallel::distributed::Triangulation<dim,spacedim> *triangulation = fg->triangulation;
+        int nsubs;
+        dealii::types::subdomain_id *subdomain_ids;
+        std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+        *vertices_with_ghost_neighbors = fg->vertices_with_ghost_neighbors;
+        int limit = (dim == 2) ? 2 : 4;
+
+        subids->elem_count = 0;
+        for (i = 0; i < nsides; i++)
+          {
+            if (sides[i].is_hanging)
+              {
+                for (j = 0; j < limit; j++)
+                  {
+                    if (sides[i].is.hanging.is_ghost[j])
+                      {
+                        typename dealii::parallel::distributed::Triangulation<dim,spacedim>::cell_iterator cell = cell_from_quad (triangulation, sides[i].treeid, *(sides[i].is.hanging.quad[j]));
+                        dealii::types::subdomain_id *subid =
+                          static_cast<dealii::types::subdomain_id *>(sc_array_push (subids));
+                        *subid = cell->subdomain_id();
+                      }
+                  }
+              }
+          }
+
+        if (!subids->elem_count)
+          {
+            return;
+          }
+
+        nsubs = (int) subids->elem_count;
+        subdomain_ids = (dealii::types::subdomain_id *) (subids->array);
+
+        for (i = 0; i < nsides; i++)
+          {
+            if (sides[i].is_hanging)
+              {
+                for (j = 0; j < limit; j++)
+                  {
+                    if (!sides[i].is.hanging.is_ghost[j])
+                      {
+                        typename dealii::parallel::distributed::Triangulation<dim,spacedim>::cell_iterator cell = cell_from_quad (triangulation, sides[i].treeid, *(sides[i].is.hanging.quad[j]));
+
+                        for (k = 0; k < nsubs; k++)
+                          {
+                            if (dim == 2)
+                              {
+                                (*vertices_with_ghost_neighbors)[cell->vertex_index(p4est_face_corners[sides[i].face][(limit - 1)^j])]
+                                .insert (subdomain_ids[k]);
+                              }
+                            else
+                              {
+                                (*vertices_with_ghost_neighbors)[cell->vertex_index(p8est_face_corners[sides[i].face][(limit - 1)^j])]
+                                .insert (subdomain_ids[k]);
+                              }
+                          }
+                      }
+                  }
+              }
+          }
+
+        subids->elem_count = 0;
+      }
+    }
+
+
+
+    namespace
+    {
+      /**
+       * ensures that if one of the two vertices on a periodic face is marked
+       * as active (i.e., belonging to an owned level cell), also the other
+       * one is active
+       */
+      template <typename ITERATOR>
+      void
+      mark_periodic_vertices_recursively(const GridTools::PeriodicFacePair<ITERATOR> &periodic,
+                                         const int target_level,
+                                         std::vector<bool> &active_vertices_on_level)
+      {
+        if (periodic.cell[0]->level() > target_level)
+          return;
+
+        const unsigned int dim = ITERATOR::AccessorType::dimension;
+        // for hanging nodes there is nothing to do since we are interested in
+        // the connections on the same level...
+        if (periodic.cell[0]->level() < target_level &&
+            periodic.cell[0]->has_children() &&
+            periodic.cell[1]->has_children())
+          {
+            // copy orientations etc. from parent to child
+            GridTools::PeriodicFacePair<ITERATOR> periodic_child = periodic;
+
+            // find appropriate pairs of child elements
+            for (unsigned int cf=0; cf<periodic.cell[0]->face(periodic.face_idx[0])->n_children(); ++cf)
+              {
+                const unsigned int child_index_0 =
+                  GeometryInfo<dim>::child_cell_on_face(periodic.cell[0]->refinement_case(),
+                                                        periodic.face_idx[0], cf,
+                                                        periodic.orientation[0],
+                                                        periodic.orientation[1],
+                                                        periodic.orientation[2]);
+                periodic_child.cell[0] = periodic.cell[0]->child(child_index_0);
+                periodic_child.face_idx[0] = periodic.face_idx[0];
+
+                // the second face is in standard orientation in terms of the
+                // periodic face pair
+                const unsigned int child_index_1 =
+                  GeometryInfo<dim>::child_cell_on_face(periodic.cell[1]->refinement_case(),
+                                                        periodic.face_idx[1], cf);
+
+                periodic_child.cell[1] = periodic.cell[1]->child(child_index_1);
+                periodic_child.face_idx[1] = periodic.face_idx[1];
+
+                // recursive call into children
+                mark_periodic_vertices_recursively (periodic_child, target_level,
+                                                    active_vertices_on_level);
+              }
+            return;
+          }
+
+        if (periodic.cell[0]->level() != target_level)
+          return;
+
+        for (unsigned int v=0; v<GeometryInfo<dim-1>::vertices_per_cell; ++v)
+          {
+            // take possible non-standard orientation of face on cell[0] into
+            // account
+            const unsigned int vface0 =
+              GeometryInfo<dim>::standard_to_real_face_vertex(v,periodic.orientation[0],
+                                                              periodic.orientation[1],
+                                                              periodic.orientation[2]);
+            if (active_vertices_on_level[periodic.cell[0]->face(periodic.face_idx[0])->vertex_index(vface0)] ||
+                active_vertices_on_level[periodic.cell[1]->face(periodic.face_idx[1])->vertex_index(v)])
+              active_vertices_on_level[periodic.cell[0]->face(periodic.face_idx[0])->vertex_index(vface0)]
+                = active_vertices_on_level[periodic.cell[1]->face(periodic.face_idx[1])->vertex_index(v)]
+                  = true;
+          }
+      }
+
+
+
+      /**
+       * ensures that always both vertices over a periodic face are identified
+       * together
+       */
+      template <typename ITERATOR>
+      void
+      set_periodic_ghost_neighbors_recursively(const GridTools::PeriodicFacePair<ITERATOR> &periodic,
+                                               const int target_level,
+                                               std::map<unsigned int, std::set<dealii::types::subdomain_id> > &vertices_with_ghost_neighbors)
+      {
+        if (periodic.cell[0]->level() > target_level)
+          return;
+
+        // for hanging nodes there is nothing to do since we are interested in
+        // the connections on the same level...
+        if (periodic.cell[0]->level() < target_level &&
+            periodic.cell[0]->has_children() &&
+            periodic.cell[1]->has_children())
+          {
+            // copy orientations etc. from parent to child
+            GridTools::PeriodicFacePair<ITERATOR> periodic_child = periodic;
+
+            // find appropriate pairs of child elements
+            for (unsigned int cf=0; cf<periodic.cell[0]->face(periodic.face_idx[0])->n_children(); ++cf)
+              {
+                unsigned int c=0;
+                for (; c<periodic.cell[0]->n_children(); ++c)
+                  {
+                    if (periodic.cell[0]->child(c)->face(periodic.face_idx[0]) ==
+                        periodic.cell[0]->face(periodic.face_idx[0])->child(cf))
+                      break;
+                  }
+                Assert(c < periodic.cell[0]->n_children(),
+                       ExcMessage("Face child not found"));
+                periodic_child.cell[0] = periodic.cell[0]->child(c);
+                periodic_child.face_idx[0] = periodic.face_idx[0];
+
+                c=0;
+                for (; c<periodic.cell[1]->n_children(); ++c)
+                  {
+                    if (periodic.cell[1]->child(c)->face(periodic.face_idx[1]) ==
+                        periodic.cell[1]->face(periodic.face_idx[1])->child(cf))
+                      break;
+                  }
+                Assert(c < periodic.cell[1]->n_children(),
+                       ExcMessage("Face child not found"));
+                periodic_child.cell[1] = periodic.cell[1]->child(c);
+                periodic_child.face_idx[1] = periodic.face_idx[1];
+
+                // recursive call into children
+                set_periodic_ghost_neighbors_recursively (periodic_child, target_level,
+                                                          vertices_with_ghost_neighbors);
+              }
+            return;
+          }
+
+        if (periodic.cell[0]->level() != target_level)
+          return;
+
+        // TODO: fix non-standard orientation
+        Assert(periodic.orientation[0] == true &&
+               periodic.orientation[1] == false &&
+               periodic.orientation[2] == false,
+               ExcNotImplemented());
+
+        for (unsigned int v=0; v<GeometryInfo<ITERATOR::AccessorType::dimension-1>::vertices_per_cell; ++v)
+          {
+            const unsigned int
+            idx0 = periodic.cell[0]->face(periodic.face_idx[0])->vertex_index(v),
+            idx1 = periodic.cell[1]->face(periodic.face_idx[1])->vertex_index(v);
+            if (vertices_with_ghost_neighbors.find(idx0) !=
+                vertices_with_ghost_neighbors.end())
+              vertices_with_ghost_neighbors[idx1].
+              insert(vertices_with_ghost_neighbors[idx0].begin(),
+                     vertices_with_ghost_neighbors[idx0].end());
+            if (vertices_with_ghost_neighbors.find(idx1) !=
+                vertices_with_ghost_neighbors.end())
+              vertices_with_ghost_neighbors[idx0].
+              insert(vertices_with_ghost_neighbors[idx1].begin(),
+                     vertices_with_ghost_neighbors[idx1].end());
+          }
+      }
+    }
+
+
+
+    /**
+     * Determine the neighboring subdomains that are adjacent to each vertex.
+     * This is achieved via the p4est_iterate/p8est_iterate tool
+     */
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    fill_vertices_with_ghost_neighbors
+    (std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+     &vertices_with_ghost_neighbors)
+    {
+      Assert (dim>1, ExcNotImplemented());
+
+      FindGhosts<dim,spacedim> fg;
+      fg.subids = sc_array_new (sizeof (dealii::types::subdomain_id));
+      fg.triangulation = this;
+      fg.vertices_with_ghost_neighbors = &vertices_with_ghost_neighbors;
+
+      // switch between functions. to make the compiler happy, we need to cast
+      // the first two arguments to the type p[48]est_iterate wants to see. this
+      // cast is the identity cast in each of the two branches, so it is safe.
+      switch (dim)
+        {
+        case 2:
+          p4est_iterate (reinterpret_cast<dealii::internal::p4est::types<2>::forest *>(this->parallel_forest),
+                         reinterpret_cast<dealii::internal::p4est::types<2>::ghost *>(this->parallel_ghost),
+                         static_cast<void *>(&fg),
+                         NULL, find_ghosts_face<2,spacedim>, find_ghosts_corner<2,spacedim>);
+          break;
+
+        case 3:
+          p8est_iterate (reinterpret_cast<dealii::internal::p4est::types<3>::forest *>(this->parallel_forest),
+                         reinterpret_cast<dealii::internal::p4est::types<3>::ghost *>(this->parallel_ghost),
+                         static_cast<void *>(&fg),
+                         NULL, find_ghosts_face<3,spacedim>, find_ghosts_edge<3,spacedim>, find_ghosts_corner<3,spacedim>);
+          break;
+
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+
+      sc_array_destroy (fg.subids);
+    }
+
+
+
+    /**
+     * Determine the neighboring subdomains that are adjacent to each vertex
+     * on the given multigrid level
+     */
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    fill_level_vertices_with_ghost_neighbors
+    (const unsigned int level,
+     std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+     &vertices_with_ghost_neighbors)
+    {
+      const std::vector<bool> locally_active_vertices =
+        mark_locally_active_vertices_on_level(level);
+      for (cell_iterator cell = this->begin(level); cell != this->end(level); ++cell)
+        if (cell->level_subdomain_id() != dealii::numbers::artificial_subdomain_id
+            && cell->level_subdomain_id() != this->locally_owned_subdomain())
+          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+            if (locally_active_vertices[cell->vertex_index(v)])
+              vertices_with_ghost_neighbors[cell->vertex_index(v)]
+              .insert (cell->level_subdomain_id());
+
+      for (unsigned int i=0; i<periodic_face_pairs_level_0.size(); ++i)
+        set_periodic_ghost_neighbors_recursively(periodic_face_pairs_level_0[i],
+                                                 level, vertices_with_ghost_neighbors);
+    }
+
+
+
+    template<int dim, int spacedim>
+    std::vector<bool>
+    Triangulation<dim,spacedim>
+    ::mark_locally_active_vertices_on_level (const unsigned int level) const
+    {
+      Assert (dim>1, ExcNotImplemented());
+
+      std::vector<bool> marked_vertices(this->n_vertices(), false);
+      for (cell_iterator cell = this->begin(level); cell != this->end(level); ++cell)
+        if (cell->level_subdomain_id() == this->locally_owned_subdomain())
+          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+            marked_vertices[cell->vertex_index(v)] = true;
+
+      for (unsigned int i=0; i<periodic_face_pairs_level_0.size(); ++i)
+        mark_periodic_vertices_recursively(periodic_face_pairs_level_0[i],
+                                           level, marked_vertices);
+
+      return marked_vertices;
+    }
+
+
+
+    template<int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::add_periodicity
+    (const std::vector<GridTools::PeriodicFacePair<cell_iterator> > &
+     periodicity_vector)
+    {
+#if DEAL_II_P4EST_VERSION_GTE(0,3,4,1)
+      Assert (triangulation_has_content == true,
+              ExcMessage ("The triangulation is empty!"));
+      Assert (this->n_levels() == 1,
+              ExcMessage ("The triangulation is refined!"));
+
+      typedef std::vector<GridTools::PeriodicFacePair<cell_iterator> >
+      FaceVector;
+      typename FaceVector::const_iterator it, periodic_end;
+      it = periodicity_vector.begin();
+      periodic_end = periodicity_vector.end();
+
+      for (; it<periodic_end; ++it)
+        {
+          const cell_iterator first_cell = it->cell[0];
+          const cell_iterator second_cell = it->cell[1];
+          const unsigned int face_left = it->face_idx[0];
+          const unsigned int face_right = it->face_idx[1];
+
+          //respective cells of the matching faces in p4est
+          const unsigned int tree_left
+            = coarse_cell_to_p4est_tree_permutation[std::distance(this->begin(),
+                                                                  first_cell)];
+          const unsigned int tree_right
+            = coarse_cell_to_p4est_tree_permutation[std::distance(this->begin(),
+                                                                  second_cell)];
+
+          // p4est wants to know which corner the first corner on
+          // the face with the lower id is mapped to on the face with
+          // with the higher id. For d==2 there are only two possibilities
+          // that are determined by it->orientation[1].
+          // For d==3 we have to use GridTools::OrientationLookupTable.
+          // The result is given below.
+
+          unsigned int p4est_orientation = 0;
+          if (dim==2)
+            p4est_orientation = it->orientation[1];
+          else
+            {
+              const unsigned int face_idx_list[] = {face_left, face_right};
+              const cell_iterator cell_list[] = {first_cell, second_cell};
+              unsigned int lower_idx, higher_idx;
+              if (face_left<=face_right)
+                {
+                  higher_idx = 1;
+                  lower_idx = 0;
+                }
+              else
+                {
+                  higher_idx = 0;
+                  lower_idx = 1;
+                }
+
+              // get the cell index of the first index on the face with the lower id
+              unsigned int first_p4est_idx_on_cell = p8est_face_corners[face_idx_list[lower_idx]][0];
+              unsigned int first_dealii_idx_on_face = numbers::invalid_unsigned_int;
+              for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_face; ++i)
+                {
+                  const unsigned int first_dealii_idx_on_cell
+                    =  GeometryInfo<dim>::face_to_cell_vertices
+                       (face_idx_list[lower_idx], i,
+                        cell_list[lower_idx]->face_orientation(face_idx_list[lower_idx]),
+                        cell_list[lower_idx]->face_flip(face_idx_list[lower_idx]),
+                        cell_list[lower_idx]->face_rotation(face_idx_list[lower_idx]));
+                  if (first_p4est_idx_on_cell == first_dealii_idx_on_cell)
+                    {
+                      first_dealii_idx_on_face = i;
+                      break;
+                    }
+                }
+              Assert( first_dealii_idx_on_face != numbers::invalid_unsigned_int, ExcInternalError());
+              // Now map dealii_idx_on_face according to the orientation
+              const unsigned int left_to_right [8][4] = {{0,2,1,3},{0,1,2,3},{3,1,2,0},{3,2,1,0},
+                {2,3,0,1},{1,3,0,2},{1,0,3,2},{2,0,3,1}
+              };
+              const unsigned int right_to_left [8][4] = {{0,2,1,3},{0,1,2,3},{3,1,2,0},{3,2,1,0},
+                {2,3,0,1},{2,0,3,1},{1,0,3,2},{1,3,0,2}
+              };
+              const unsigned int second_dealii_idx_on_face
+                = lower_idx==0?left_to_right[it->orientation.to_ulong()][first_dealii_idx_on_face]:
+                  right_to_left[it->orientation.to_ulong()][first_dealii_idx_on_face];
+              const unsigned int second_dealii_idx_on_cell
+                = GeometryInfo<dim>::face_to_cell_vertices
+                  (face_idx_list[higher_idx], second_dealii_idx_on_face,
+                   cell_list[higher_idx]->face_orientation(face_idx_list[higher_idx]),
+                   cell_list[higher_idx]->face_flip(face_idx_list[higher_idx]),
+                   cell_list[higher_idx]->face_rotation(face_idx_list[higher_idx]));
+              //map back to p4est
+              const unsigned int second_p4est_idx_on_face
+                = p8est_corner_face_corners[second_dealii_idx_on_cell][face_idx_list[higher_idx]];
+              p4est_orientation = second_p4est_idx_on_face;
+            }
+
+          dealii::internal::p4est::functions<dim>::
+          connectivity_join_faces (connectivity,
+                                   tree_left,
+                                   tree_right,
+                                   face_left,
+                                   face_right,
+                                   p4est_orientation);
+        }
+
+
+      Assert(dealii::internal::p4est::functions<dim>::connectivity_is_valid
+             (connectivity) == 1, ExcInternalError());
+
+      // now create a forest out of the connectivity data structure
+      dealii::internal::p4est::functions<dim>::destroy (parallel_forest);
+      parallel_forest
+        = dealii::internal::p4est::functions<dim>::
+          new_forest (this->mpi_communicator,
+                      connectivity,
+                      /* minimum initial number of quadrants per tree */ 0,
+                      /* minimum level of upfront refinement */ 0,
+                      /* use uniform upfront refinement */ 1,
+                      /* user_data_size = */ 0,
+                      /* user_data_constructor = */ NULL,
+                      /* user_pointer */ this);
+
+
+      try
+        {
+          copy_local_forest_to_triangulation ();
+        }
+      catch (const typename Triangulation<dim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+      periodic_face_pairs_level_0.insert(periodic_face_pairs_level_0.end(),
+                                         periodicity_vector.begin(),
+                                         periodicity_vector.end());
+
+#else
+      Assert(false, ExcMessage ("Need p4est version >= 0.3.4.1!"));
+#endif
+    }
+
+
+
+    template <int dim, int spacedim>
+    std::size_t
+    Triangulation<dim,spacedim>::memory_consumption () const
+    {
+      std::size_t mem=
+        this->dealii::parallel::Triangulation<dim,spacedim>::memory_consumption()
+        + MemoryConsumption::memory_consumption(triangulation_has_content)
+        + MemoryConsumption::memory_consumption(connectivity)
+        + MemoryConsumption::memory_consumption(parallel_forest)
+        + MemoryConsumption::memory_consumption(refinement_in_progress)
+        + MemoryConsumption::memory_consumption(attached_data_size)
+        + MemoryConsumption::memory_consumption(n_attached_datas)
+//      + MemoryConsumption::memory_consumption(attached_data_pack_callbacks) //TODO[TH]: how?
+        + MemoryConsumption::memory_consumption(coarse_cell_to_p4est_tree_permutation)
+        + MemoryConsumption::memory_consumption(p4est_tree_to_coarse_cell_permutation)
+        + memory_consumption_p4est();
+
+      return mem;
+    }
+
+
+
+    template <int dim, int spacedim>
+    std::size_t
+    Triangulation<dim,spacedim>::memory_consumption_p4est () const
+    {
+      return dealii::internal::p4est::functions<dim>::forest_memory_used(parallel_forest)
+             + dealii::internal::p4est::functions<dim>::connectivity_memory_used(connectivity);
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    copy_triangulation (const dealii::Triangulation<dim, spacedim> &old_tria)
+    {
+      try
+        {
+          dealii::Triangulation<dim,spacedim>::
+          copy_triangulation (old_tria);
+        }
+      catch (const typename dealii::Triangulation<dim,spacedim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+      // note that now we have some content in the p4est objects and call the
+      // functions that do the actual work (which are dimension dependent, so
+      // separate)
+      triangulation_has_content = true;
+
+      Assert (old_tria.n_levels() == 1,
+              ExcMessage ("Parallel distributed triangulations can only be copied, "
+                          "if they are not refined!"));
+
+      if (const dealii::parallel::distributed::Triangulation<dim,spacedim> *
+          old_tria_x = dynamic_cast<const dealii::parallel::distributed::Triangulation<dim,spacedim> *>(&old_tria))
+        {
+          Assert (!old_tria_x->refinement_in_progress,
+                  ExcMessage ("Parallel distributed triangulations can only "
+                              "be copied, if no refinement is in progress!"));
+
+          // duplicate MPI communicator, stored in the base class
+          dealii::parallel::Triangulation<dim,spacedim>::copy_triangulation (old_tria);
+
+          coarse_cell_to_p4est_tree_permutation = old_tria_x->coarse_cell_to_p4est_tree_permutation;
+          p4est_tree_to_coarse_cell_permutation = old_tria_x->p4est_tree_to_coarse_cell_permutation;
+          attached_data_size = old_tria_x->attached_data_size;
+          n_attached_datas   = old_tria_x->n_attached_datas;
+
+          settings           = old_tria_x->settings;
+        }
+      else
+        {
+          setup_coarse_cell_to_p4est_tree_permutation ();
+        }
+
+      copy_new_triangulation_to_p4est (dealii::internal::int2type<dim>());
+
+      try
+        {
+          copy_local_forest_to_triangulation ();
+        }
+      catch (const typename Triangulation<dim>::DistortedCellList &)
+        {
+          // the underlying triangulation should not be checking for distorted
+          // cells
+          AssertThrow (false, ExcInternalError());
+        }
+
+      this->update_number_cache ();
+    }
+
+
+    template <int dim, int spacedim>
+    void
+    Triangulation<dim,spacedim>::
+    attach_mesh_data()
+    {
+      // determine size of memory in bytes to attach to each cell. This needs
+      // to be constant because of p4est.
+      if (attached_data_size==0)
+        {
+          Assert(n_attached_datas==0, ExcInternalError());
+
+          //nothing to do
+          return;
+        }
+
+      // realloc user_data in p4est
+      void *userptr = parallel_forest->user_pointer;
+      dealii::internal::p4est::functions<dim>::reset_data (parallel_forest,
+                                                           attached_data_size+sizeof(CellStatus),
+                                                           NULL, NULL);
+      parallel_forest->user_pointer = userptr;
+
+
+      // Recurse over p4est and Triangulation
+      // to find refined/coarsened/kept
+      // cells. Then query and attach the data.
+      for (typename Triangulation<dim,spacedim>::cell_iterator
+           cell = this->begin(0);
+           cell != this->end(0);
+           ++cell)
+        {
+          //skip coarse cells, that are not ours
+          if (tree_exists_locally<dim,spacedim>(parallel_forest,
+                                                coarse_cell_to_p4est_tree_permutation[cell->index()])
+              == false)
+            continue;
+
+          typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+          typename dealii::internal::p4est::types<dim>::tree *tree =
+            init_tree(cell->index());
+
+          dealii::internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+          attach_mesh_data_recursively<dim,spacedim>(*tree,
+                                                     cell,
+                                                     p4est_coarse_cell,
+                                                     attached_data_pack_callbacks);
+        }
+    }
+
+    template <int dim, int spacedim>
+    std::vector<unsigned int>
+    Triangulation<dim,spacedim>::
+    get_cell_weights()
+    {
+      // Allocate the space for the weights. In fact we do not know yet, how
+      // many cells we own after the refinement (only p4est knows that
+      // at this point). We simply reserve n_active_cells space and if many
+      // more cells are refined than coarsened than additional reallocation
+      // will be done inside get_cell_weights_recursively.
+      std::vector<unsigned int> weights;
+      weights.reserve(this->n_active_cells());
+
+      // Recurse over p4est and Triangulation
+      // to find refined/coarsened/kept
+      // cells. Then append cell_weight.
+      // Note that we need to follow the p4est ordering
+      // instead of the deal.II ordering to get the cell_weights
+      // in the same order p4est will encounter them during repartitioning.
+      for (unsigned int c=0; c<this->n_cells(0); ++c)
+        {
+          // skip coarse cells, that are not ours
+          if (tree_exists_locally<dim,spacedim>(parallel_forest,c) == false)
+            continue;
+
+          const unsigned int coarse_cell_index =
+            p4est_tree_to_coarse_cell_permutation[c];
+
+          const typename Triangulation<dim,spacedim>::cell_iterator
+          dealii_coarse_cell (this, 0, coarse_cell_index);
+
+          typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+          dealii::internal::p4est::functions<dim>::
+          quadrant_set_morton (&p4est_coarse_cell,
+                               /*level=*/0,
+                               /*index=*/0);
+          p4est_coarse_cell.p.which_tree = c;
+
+          const typename dealii::internal::p4est::types<dim>::tree *tree =
+            init_tree(coarse_cell_index);
+
+          get_cell_weights_recursively<dim,spacedim>(*tree,
+                                                     dealii_coarse_cell,
+                                                     p4est_coarse_cell,
+                                                     this->signals,
+                                                     weights);
+        }
+
+      return weights;
+    }
+
+    template <int dim, int spacedim>
+    typename dealii::Triangulation<dim,spacedim>::cell_iterator
+    cell_from_quad
+    (typename dealii::parallel::distributed::Triangulation<dim,spacedim> *triangulation,
+     typename dealii::internal::p4est::types<dim>::topidx treeidx,
+     typename dealii::internal::p4est::types<dim>::quadrant &quad)
+    {
+      int i, l = quad.level;
+      int child_id;
+      types::global_dof_index dealii_index =
+        triangulation->get_p4est_tree_to_coarse_cell_permutation()[treeidx];
+
+      for (i = 0; i < l; i++)
+        {
+          typename dealii::Triangulation<dim,spacedim>::cell_iterator cell (triangulation, i, dealii_index);
+          child_id = dealii::internal::p4est::functions<dim>::quadrant_ancestor_id (&quad, i + 1);
+          Assert (cell->has_children (), ExcMessage ("p4est quadrant does not correspond to a cell!"));
+          dealii_index = cell->child_index(child_id);
+        }
+
+      typename dealii::Triangulation<dim,spacedim>::cell_iterator out_cell (triangulation, l, dealii_index);
+
+      return out_cell;
+    }
+
+
+
+    template <int spacedim>
+    Triangulation<1,spacedim>::Triangulation (MPI_Comm)
+      :
+      dealii::parallel::Triangulation<1,spacedim>(MPI_COMM_WORLD,
+                                                  typename dealii::Triangulation<1,spacedim>::MeshSmoothing(),
+                                                  false)
+    {
+      Assert (false, ExcNotImplemented());
+    }
+
+
+    template <int spacedim>
+    Triangulation<1,spacedim>::~Triangulation ()
+    {
+      Assert (false, ExcNotImplemented());
+    }
+
+
+
+    template <int spacedim>
+    void
+    Triangulation<1,spacedim>::communicate_locally_moved_vertices
+    (const std::vector<bool> &/*vertex_locally_moved*/)
+    {
+      Assert (false, ExcNotImplemented());
+    }
+
+
+    template <int spacedim>
+    const std::vector<types::global_dof_index> &
+    Triangulation<1,spacedim>::get_p4est_tree_to_coarse_cell_permutation() const
+    {
+      static std::vector<types::global_dof_index> a;
+      return a;
+    }
+
+    template <int spacedim>
+    void
+    Triangulation<1,spacedim>::
+    fill_vertices_with_ghost_neighbors
+    (std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+     &/*vertices_with_ghost_neighbors*/)
+    {
+      Assert (false, ExcNotImplemented());
+    }
+
+    template <int spacedim>
+    void
+    Triangulation<1,spacedim>::
+    fill_level_vertices_with_ghost_neighbors
+    (const unsigned int /*level*/,
+     std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+     &/*vertices_with_ghost_neighbors*/)
+    {
+      Assert (false, ExcNotImplemented());
+    }
+
+    template <int spacedim>
+    std::vector<bool>
+    Triangulation<1,spacedim>::
+    mark_locally_active_vertices_on_level (const unsigned int) const
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<bool>();
+    }
+  }
+}
+
+
+#else // DEAL_II_WITH_P4EST
+
+namespace parallel
+{
+  namespace distributed
+  {
+    template <int dim, int spacedim>
+    Triangulation<dim,spacedim>::Triangulation ()
+      :
+      dealii::parallel::Triangulation<dim,spacedim>(MPI_COMM_SELF)
+    {
+      Assert (false, ExcNotImplemented());
+    }
+  }
+}
+
+#endif // DEAL_II_WITH_P4EST
+
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "tria.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/distributed/tria.inst.in b/source/distributed/tria.inst.in
new file mode 100644
index 0000000..529f009
--- /dev/null
+++ b/source/distributed/tria.inst.in
@@ -0,0 +1,70 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+
+#   ifdef DEAL_II_WITH_P4EST
+
+    namespace internal
+    \{
+      namespace p4est
+      \{
+#if     deal_II_dimension > 1
+        template
+        void
+        init_quadrant_children<deal_II_dimension>
+        (const types<deal_II_dimension>::quadrant & p4est_cell,
+         types<deal_II_dimension>::quadrant (&p4est_children)[GeometryInfo<deal_II_dimension>::max_children_per_cell]);
+
+        template
+        void
+        init_coarse_quadrant<deal_II_dimension>
+        (types<deal_II_dimension>::quadrant & quad);
+
+        template
+        bool
+        quadrant_is_equal<deal_II_dimension>
+        (const types<deal_II_dimension>::quadrant & q1,
+         const types<deal_II_dimension>::quadrant & q2);
+
+        template
+        bool
+        quadrant_is_ancestor<deal_II_dimension>
+        (const types<deal_II_dimension>::quadrant & q1,
+         const types<deal_II_dimension>::quadrant & q2);
+#      endif
+      \}
+    \}
+#   endif // DEAL_II_WITH_P4EST
+
+    namespace parallel
+    \{
+      namespace distributed
+      \{
+        template class Triangulation<deal_II_dimension>;
+#       if deal_II_dimension < 3
+        template class Triangulation<deal_II_dimension, deal_II_dimension+1>;
+#       endif
+#       if deal_II_dimension < 2
+        template class Triangulation<deal_II_dimension, deal_II_dimension+2>;
+#       endif
+      \}
+    \}
+
+  }
+
diff --git a/source/distributed/tria_base.cc b/source/distributed/tria_base.cc
new file mode 100644
index 0000000..e898831
--- /dev/null
+++ b/source/distributed/tria_base.cc
@@ -0,0 +1,248 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/distributed/tria_base.h>
+
+
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include <fstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace parallel
+{
+
+  template <int dim, int spacedim>
+  Triangulation<dim,spacedim>::Triangulation (MPI_Comm mpi_communicator,
+                                              const typename dealii::Triangulation<dim,spacedim>::MeshSmoothing smooth_grid,
+                                              const bool check_for_distorted_cells)
+    :
+    dealii::Triangulation<dim,spacedim>(smooth_grid,check_for_distorted_cells),
+    mpi_communicator (Utilities::MPI::
+                      duplicate_communicator(mpi_communicator)),
+    my_subdomain (Utilities::MPI::this_mpi_process (this->mpi_communicator)),
+    n_subdomains(Utilities::MPI::n_mpi_processes(mpi_communicator))
+  {
+#ifndef DEAL_II_WITH_MPI
+    Assert(false, ExcMessage("You compiled deal.II without MPI support, for "
+                             "which parallel::Triangulation is not available."));
+#endif
+    number_cache.n_locally_owned_active_cells.resize (n_subdomains);
+  }
+
+  template <int dim, int spacedim>
+  void
+  Triangulation<dim,spacedim>::copy_triangulation (const dealii::Triangulation<dim, spacedim> &old_tria)
+  {
+#ifndef DEAL_II_WITH_MPI
+    Assert(false, ExcNotImplemented());
+#endif
+    if (const dealii::parallel::Triangulation<dim,spacedim> *
+        old_tria_x = dynamic_cast<const dealii::parallel::Triangulation<dim,spacedim> *>(&old_tria))
+      {
+        mpi_communicator = Utilities::MPI::duplicate_communicator (old_tria_x->get_communicator ());
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::size_t
+  Triangulation<dim,spacedim>::memory_consumption() const
+  {
+    std::size_t mem=
+      this->dealii::Triangulation<dim,spacedim>::memory_consumption()
+      + MemoryConsumption::memory_consumption(mpi_communicator)
+      + MemoryConsumption::memory_consumption(my_subdomain)
+      + MemoryConsumption::memory_consumption(number_cache.n_locally_owned_active_cells)
+      + MemoryConsumption::memory_consumption(number_cache.n_global_active_cells)
+      + MemoryConsumption::memory_consumption(number_cache.n_global_levels);
+    return mem;
+
+  }
+
+  template <int dim, int spacedim>
+  Triangulation<dim,spacedim>::~Triangulation ()
+  {
+#ifdef DEAL_II_WITH_MPI
+    // get rid of the unique communicator used here again
+    MPI_Comm_free (&this->mpi_communicator);
+#endif
+  }
+
+  template <int dim, int spacedim>
+  Triangulation<dim,spacedim>::NumberCache::NumberCache()
+    :
+    n_global_active_cells(0),
+    n_global_levels(0)
+  {}
+
+  template <int dim, int spacedim>
+  unsigned int
+  Triangulation<dim,spacedim>::n_locally_owned_active_cells () const
+  {
+    return number_cache.n_locally_owned_active_cells[my_subdomain];
+  }
+
+  template <int dim, int spacedim>
+  unsigned int
+  Triangulation<dim,spacedim>::n_global_levels () const
+  {
+    return number_cache.n_global_levels;
+  }
+
+  template <int dim, int spacedim>
+  types::global_dof_index
+  Triangulation<dim,spacedim>::n_global_active_cells () const
+  {
+    return number_cache.n_global_active_cells;
+  }
+
+  template <int dim, int spacedim>
+  const std::vector<unsigned int> &
+  Triangulation<dim,spacedim>::n_locally_owned_active_cells_per_processor () const
+  {
+    return number_cache.n_locally_owned_active_cells;
+  }
+
+  template <int dim, int spacedim>
+  MPI_Comm
+  Triangulation<dim,spacedim>::get_communicator () const
+  {
+    return mpi_communicator;
+  }
+
+#ifdef DEAL_II_WITH_MPI
+  template <int dim, int spacedim>
+  void
+  Triangulation<dim,spacedim>::update_number_cache ()
+  {
+    Assert (number_cache.n_locally_owned_active_cells.size()
+            ==
+            Utilities::MPI::n_mpi_processes (this->mpi_communicator),
+            ExcInternalError());
+
+    std::fill (number_cache.n_locally_owned_active_cells.begin(),
+               number_cache.n_locally_owned_active_cells.end(),
+               0);
+
+    number_cache.ghost_owners.clear ();
+    number_cache.level_ghost_owners.clear ();
+
+    if (this->n_levels() == 0)
+      {
+        // Skip communication done below if we do not have any cells
+        // (meaning the Triangulation is empty on all processors). This will
+        // happen when called from the destructor of Triangulation, which
+        // can get called during exception handling causing a hang in this
+        // function.
+        number_cache.n_global_active_cells = 0;
+        number_cache.n_global_levels = 0;
+        return;
+      }
+
+
+    {
+      // find ghost owners
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end();
+           ++cell)
+        if (cell->is_ghost())
+          number_cache.ghost_owners.insert(cell->subdomain_id());
+
+      Assert(number_cache.ghost_owners.size() < Utilities::MPI::n_mpi_processes(mpi_communicator), ExcInternalError());
+    }
+
+    if (this->n_levels() > 0)
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell = this->begin_active();
+           cell != this->end(); ++cell)
+        if (cell->subdomain_id() == my_subdomain)
+          ++number_cache.n_locally_owned_active_cells[my_subdomain];
+
+    unsigned int send_value
+      = number_cache.n_locally_owned_active_cells[my_subdomain];
+    MPI_Allgather (&send_value,
+                   1,
+                   MPI_UNSIGNED,
+                   &number_cache.n_locally_owned_active_cells[0],
+                   1,
+                   MPI_UNSIGNED,
+                   this->mpi_communicator);
+
+    number_cache.n_global_active_cells
+      = std::accumulate (number_cache.n_locally_owned_active_cells.begin(),
+                         number_cache.n_locally_owned_active_cells.end(),
+                         /* ensure sum is computed with correct data type:*/
+                         static_cast<types::global_dof_index>(0));
+    number_cache.n_global_levels = Utilities::MPI::max(this->n_levels(), this->mpi_communicator);
+  }
+#else
+  template <int dim, int spacedim>
+  void
+  Triangulation<dim,spacedim>::update_number_cache ()
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+#endif
+
+  template <int dim, int spacedim>
+  types::subdomain_id
+  Triangulation<dim,spacedim>::locally_owned_subdomain () const
+  {
+    Assert (dim > 1, ExcNotImplemented());
+    return my_subdomain;
+  }
+
+  template <int dim, int spacedim>
+  const std::set<unsigned int> &
+  Triangulation<dim,spacedim>::
+  ghost_owners () const
+  {
+    return number_cache.ghost_owners;
+  }
+
+  template <int dim, int spacedim>
+  const std::set<unsigned int> &
+  Triangulation<dim,spacedim>::
+  level_ghost_owners () const
+  {
+    return number_cache.level_ghost_owners;
+  }
+
+} // end namespace parallel
+
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "tria_base.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/distributed/tria_base.inst.in b/source/distributed/tria_base.inst.in
new file mode 100644
index 0000000..074deeb
--- /dev/null
+++ b/source/distributed/tria_base.inst.in
@@ -0,0 +1,33 @@
+// ---------------------------------------------------------------------
+// $Id: tria.inst.in 32674 2014-03-20 16:57:24Z denis.davydov $
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    namespace parallel
+    \{
+        template class Triangulation<deal_II_dimension>;
+#       if deal_II_dimension < 3
+        template class Triangulation<deal_II_dimension, deal_II_dimension+1>;
+#       endif
+#       if deal_II_dimension < 2
+        template class Triangulation<deal_II_dimension, deal_II_dimension+2>;
+#       endif
+    \}
+
+  }
+
diff --git a/source/dofs/CMakeLists.txt b/source/dofs/CMakeLists.txt
new file mode 100644
index 0000000..7654322
--- /dev/null
+++ b/source/dofs/CMakeLists.txt
@@ -0,0 +1,53 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  block_info.cc
+  dof_accessor.cc
+  dof_accessor_get.cc
+  dof_accessor_set.cc
+  dof_faces.cc
+  dof_handler.cc
+  dof_handler_policy.cc
+  dof_objects.cc
+  dof_renumbering.cc
+  dof_tools.cc
+  dof_tools_constraints.cc
+  dof_tools_sparsity.cc
+  number_cache.cc
+  )
+
+SET(_inst
+  block_info.inst.in
+  dof_accessor_get.inst.in
+  dof_accessor.inst.in
+  dof_accessor_set.inst.in
+  dof_handler.inst.in
+  dof_handler_policy.inst.in
+  dof_objects.inst.in
+  dof_renumbering.inst.in
+  dof_tools_constraints.inst.in
+  dof_tools.inst.in
+  dof_tools_sparsity.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/dofs/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_dofs OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_dofs "${_inst}")
diff --git a/source/dofs/block_info.cc b/source/dofs/block_info.cc
new file mode 100644
index 0000000..d2b77a6
--- /dev/null
+++ b/source/dofs/block_info.cc
@@ -0,0 +1,77 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/dofs/block_info.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_tools.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, int spacedim>
+void
+BlockInfo::initialize(const DoFHandler<dim, spacedim> &dof, bool levels_only, bool active_only)
+{
+  if (!levels_only && dof.has_active_dofs())
+    {
+      const FiniteElement<dim, spacedim> &fe = dof.get_fe();
+      std::vector<types::global_dof_index> sizes(fe.n_blocks());
+      DoFTools::count_dofs_per_block(dof, sizes);
+      bi_global.reinit(sizes);
+    }
+
+  if (!active_only && dof.has_level_dofs())
+    {
+      std::vector<std::vector<types::global_dof_index> > sizes (dof.get_triangulation().n_levels ());
+
+      for (unsigned int i = 0; i < sizes.size (); ++i)
+        sizes[i].resize (dof.get_fe ().n_blocks ());
+
+      MGTools::count_dofs_per_block (dof, sizes);
+      levels.resize (sizes.size ());
+
+      for (unsigned int i = 0; i < sizes.size (); ++i)
+        levels[i].reinit (sizes[i]);
+    }
+}
+
+
+template <int dim, int spacedim>
+void
+BlockInfo::initialize_local(const DoFHandler<dim, spacedim> &dof)
+{
+  const FiniteElement<dim, spacedim> &fe = dof.get_fe();
+  std::vector<types::global_dof_index> sizes(fe.n_blocks());
+
+  base_elements.resize(fe.n_blocks());
+
+  for (unsigned int i=0; i<base_elements.size(); ++i)
+    base_elements[i] = fe.block_to_base_index(i).first;
+
+  local_renumbering.resize(fe.n_dofs_per_cell());
+  FETools::compute_block_renumbering(fe,
+                                     local_renumbering,
+                                     sizes, false);
+  bi_local.reinit(sizes);
+}
+
+
+// explicit instantiations
+#include "block_info.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/block_info.inst.in b/source/dofs/block_info.inst.in
new file mode 100644
index 0000000..a3c28d0
--- /dev/null
+++ b/source/dofs/block_info.inst.in
@@ -0,0 +1,32 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template void BlockInfo::initialize(const DoFHandler<deal_II_dimension,deal_II_dimension>&, bool, bool);
+    template void BlockInfo::initialize_local(const DoFHandler<deal_II_dimension,deal_II_dimension>&);
+
+#if deal_II_dimension < 3
+    template void BlockInfo::initialize(const DoFHandler<deal_II_dimension,deal_II_dimension+1>&, bool, bool);
+    template void BlockInfo::initialize_local(const DoFHandler<deal_II_dimension,deal_II_dimension+1>&);
+#endif
+
+#if deal_II_dimension == 3
+    template void BlockInfo::initialize(const DoFHandler<1,3>&, bool, bool);
+    template void BlockInfo::initialize_local(const DoFHandler<1,3>&);
+#endif
+  }
+
diff --git a/source/dofs/dof_accessor.cc b/source/dofs/dof_accessor.cc
new file mode 100644
index 0000000..588654d
--- /dev/null
+++ b/source/dofs/dof_accessor.cc
@@ -0,0 +1,91 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_levels.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_iterator.templates.h>
+#include <deal.II/fe/fe.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*------------------------- Static variables: DoFAccessor -----------------------*/
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+const unsigned int DoFAccessor<structdim,DoFHandlerType,level_dof_access>::dimension;
+
+template <int structdim, typename DoFHandlerType, bool level_dof_access>
+const unsigned int DoFAccessor<structdim,DoFHandlerType,level_dof_access>::space_dimension;
+
+
+
+/*------------------------- Functions: DoFCellAccessor -----------------------*/
+
+
+
+
+template <typename DoFHandlerType, bool lda>
+void
+DoFCellAccessor<DoFHandlerType,lda>::update_cell_dof_indices_cache () const
+{
+  Assert (static_cast<unsigned int>(this->present_level) < this->dof_handler->levels.size(),
+          ExcMessage ("DoFHandler not initialized"));
+
+  Assert (this->dof_handler != 0, typename BaseClass::ExcInvalidObject());
+
+  internal::DoFCellAccessor::Implementation::
+  update_cell_dof_indices_cache (*this);
+}
+
+
+
+template <typename DoFHandlerType, bool lda>
+void
+DoFCellAccessor<DoFHandlerType,lda>::set_dof_indices (const std::vector<types::global_dof_index> &local_dof_indices)
+{
+  Assert (static_cast<unsigned int>(this->present_level) < this->dof_handler->levels.size(),
+          ExcMessage ("DoFHandler not initialized"));
+
+  Assert (this->dof_handler != 0, typename BaseClass::ExcInvalidObject());
+
+  internal::DoFCellAccessor::Implementation::
+  set_dof_indices (*this, local_dof_indices);
+}
+
+
+
+
+template <typename DoFHandlerType, bool lda>
+TriaIterator<DoFCellAccessor<DoFHandlerType,lda> >
+DoFCellAccessor<DoFHandlerType,lda>::neighbor_child_on_subface (const unsigned int face,
+    const unsigned int subface) const
+{
+  const TriaIterator<CellAccessor<dim,spacedim> > q
+    = CellAccessor<dim,spacedim>::neighbor_child_on_subface (face, subface);
+  return TriaIterator<DoFCellAccessor<DoFHandlerType,lda> > (*q, this->dof_handler);
+}
+
+
+
+// --------------------------------------------------------------------------
+// explicit instantiations
+#include "dof_accessor.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_accessor.inst.in b/source/dofs/dof_accessor.inst.in
new file mode 100644
index 0000000..24e7f90
--- /dev/null
+++ b/source/dofs/dof_accessor.inst.in
@@ -0,0 +1,166 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+// TODO: This could surely be made more systematic!
+for (deal_II_dimension : DIMENSIONS; lda : BOOL)
+  {
+// explicit instantiations (for DoFHandler)
+
+
+#if deal_II_dimension == 2
+    template class DoFAccessor<1, DoFHandler<2>, lda>;
+    
+    template class TriaRawIterator   <DoFAccessor<1, DoFHandler<2>, lda> >;
+    template class TriaIterator      <DoFAccessor<1, DoFHandler<2>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<1, DoFHandler<2>, lda> >;
+#endif
+
+#if deal_II_dimension == 3
+    template class DoFAccessor<1, DoFHandler<3>, lda>;
+    template class DoFAccessor<2, DoFHandler<3>, lda>;
+    
+    template class TriaRawIterator   <DoFAccessor<1, DoFHandler<3>, lda> >;
+    template class TriaIterator      <DoFAccessor<1, DoFHandler<3>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<1, DoFHandler<3>, lda> >;
+    template class TriaRawIterator   <DoFAccessor<2, DoFHandler<3>, lda> >;
+    template class TriaIterator      <DoFAccessor<2, DoFHandler<3>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<2, DoFHandler<3>, lda> >;
+#endif
+
+    template class DoFAccessor<deal_II_dimension, DoFHandler<deal_II_dimension>, lda>;
+    template class DoFCellAccessor<DoFHandler<deal_II_dimension>, lda>;
+
+    template class TriaRawIterator   <DoFCellAccessor<DoFHandler<deal_II_dimension>, lda> >;
+    template class TriaIterator      <DoFCellAccessor<DoFHandler<deal_II_dimension>, lda> >;
+    template class TriaActiveIterator<DoFCellAccessor<DoFHandler<deal_II_dimension>, lda> >;
+
+
+// --------------------------------------------------------------------------
+// explicit instantiations (for hp::DoFHandler)
+
+
+#if deal_II_dimension == 2
+    template class DoFAccessor<1, hp::DoFHandler<2>, lda>;
+    template class TriaRawIterator   <DoFAccessor<1, hp::DoFHandler<2>, lda> >;
+    template class TriaIterator      <DoFAccessor<1, hp::DoFHandler<2>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<1, hp::DoFHandler<2>, lda> >;
+#endif
+
+
+#if deal_II_dimension == 3
+    template class DoFAccessor<1, hp::DoFHandler<3>, lda>;
+    template class TriaRawIterator   <DoFAccessor<1, hp::DoFHandler<3>, lda> >;
+    template class TriaIterator      <DoFAccessor<1, hp::DoFHandler<3>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<1, hp::DoFHandler<3>, lda> >;
+    
+    template class DoFAccessor<2, hp::DoFHandler<3>, lda>;
+    template class TriaRawIterator   <DoFAccessor<2, hp::DoFHandler<3>, lda> >;
+    template class TriaIterator      <DoFAccessor<2, hp::DoFHandler<3>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<2, hp::DoFHandler<3>, lda> >;
+#endif
+
+    template class DoFAccessor<deal_II_dimension, hp::DoFHandler<deal_II_dimension>, lda>;
+    template class DoFCellAccessor<hp::DoFHandler<deal_II_dimension>, lda>;
+
+    template class TriaRawIterator   <DoFCellAccessor<hp::DoFHandler<deal_II_dimension>, lda> >;
+    template class TriaIterator      <DoFCellAccessor<hp::DoFHandler<deal_II_dimension>, lda> >;
+    template class TriaActiveIterator<DoFCellAccessor<hp::DoFHandler<deal_II_dimension>, lda> >;
+
+
+
+// // --------------------------------------------------------------------------
+// // explicit instantiations (for DoFHandler)
+
+#if deal_II_dimension == 1
+    template class DoFAccessor<1, DoFHandler<1,2>, lda>;
+#endif
+
+#if deal_II_dimension == 2
+    template class DoFAccessor<1, DoFHandler<2,3>, lda>;
+    template class DoFAccessor<2, DoFHandler<2,3>, lda>;
+
+    template class TriaRawIterator   <DoFAccessor<1, DoFHandler<2,3>, lda> >;
+    template class TriaIterator      <DoFAccessor<1, DoFHandler<2,3>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<1, DoFHandler<2,3>, lda> >;
+#endif
+
+
+#if deal_II_dimension != 3
+    template class DoFCellAccessor<DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda>;
+
+    template class
+      TriaRawIterator   <DoFCellAccessor<DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda> >;
+    template class
+      TriaIterator      <DoFCellAccessor<DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda> >;
+    template class
+      TriaActiveIterator<DoFCellAccessor<DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda> >;
+#endif
+
+
+#if deal_II_dimension == 3
+    template class DoFCellAccessor<DoFHandler<1,3>, lda>;
+
+    template class
+      TriaRawIterator   <DoFCellAccessor<DoFHandler<1,3>, lda> >;
+    template class
+      TriaIterator      <DoFCellAccessor<DoFHandler<1,3>, lda> >;
+    template class
+      TriaActiveIterator<DoFCellAccessor<DoFHandler<1,3>, lda> >;
+#endif
+
+
+// --------------------------------------------------------------------------
+// explicit instantiations (for hp::DoFHandler)
+
+#if deal_II_dimension == 1
+    template class DoFAccessor<1, hp::DoFHandler<1,2>, lda>;
+    template class DoFAccessor<1, hp::DoFHandler<1,3>, lda>;
+#endif
+
+#if deal_II_dimension == 2
+    template class DoFAccessor<1, hp::DoFHandler<2,3>, lda>;
+    template class DoFAccessor<2, hp::DoFHandler<2,3>, lda>;
+
+    template class TriaRawIterator   <DoFAccessor<1, hp::DoFHandler<2,3>, lda> >;
+    template class TriaIterator      <DoFAccessor<1, hp::DoFHandler<2,3>, lda> >;
+    template class TriaActiveIterator<DoFAccessor<1, hp::DoFHandler<2,3>, lda> >;
+#endif
+
+#if deal_II_dimension != 3
+    template class DoFCellAccessor<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda>;
+
+    template class
+      TriaRawIterator   <DoFCellAccessor<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda> >;
+    template class
+      TriaIterator      <DoFCellAccessor<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda> >;
+    template class
+      TriaActiveIterator<DoFCellAccessor<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda> >;
+#endif
+
+
+#if deal_II_dimension == 3
+    template class DoFCellAccessor<hp::DoFHandler<1,3>, lda>;
+
+    template class
+      TriaRawIterator   <DoFCellAccessor<hp::DoFHandler<1,3>, lda> >;
+    template class
+      TriaIterator      <DoFCellAccessor<hp::DoFHandler<1,3>, lda> >;
+    template class
+      TriaActiveIterator<DoFCellAccessor<hp::DoFHandler<1,3>, lda> >;
+#endif
+  }
+
diff --git a/source/dofs/dof_accessor_get.cc b/source/dofs/dof_accessor_get.cc
new file mode 100644
index 0000000..b8a7781
--- /dev/null
+++ b/source/dofs/dof_accessor_get.cc
@@ -0,0 +1,176 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_levels.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_iterator.templates.h>
+#include <deal.II/fe/fe.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename DoFHandlerType, bool lda>
+template <class InputVector, typename number>
+void
+DoFCellAccessor<DoFHandlerType,lda>::
+get_interpolated_dof_values (const InputVector &values,
+                             Vector<number>    &interpolated_values,
+                             const unsigned int fe_index) const
+{
+  if (!this->has_children())
+    // if this cell has no children: simply return the exact values on this
+    // cell unless the finite element we need to interpolate to is different than
+    // the one we have on the current cell
+    {
+      if ((dynamic_cast<DoFHandler<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+           (this->dof_handler)
+           != 0)
+          ||
+          // for hp-DoFHandlers, we need to require that on
+          // active cells, you either don't specify an fe_index,
+          // or that you specify the correct one
+          (fe_index == this->active_fe_index())
+          ||
+          (fe_index == DoFHandlerType::default_fe_index))
+        this->get_dof_values (values, interpolated_values);
+      else
+        {
+          // well, here we need to first get the values from the current
+          // cell and then interpolate it to the element requested. this
+          // can clearly only happen for hp::DoFHandler objects
+          Vector<number> tmp (this->get_fe().dofs_per_cell);
+          this->get_dof_values (values, tmp);
+
+          FullMatrix<double> interpolation (this->dof_handler->get_fe()[fe_index].dofs_per_cell,
+                                            this->get_fe().dofs_per_cell);
+          this->dof_handler->get_fe()[fe_index].get_interpolation_matrix (this->get_fe(),
+              interpolation);
+          interpolation.vmult (interpolated_values, tmp);
+        }
+    }
+  else
+    // otherwise obtain them from the children
+    {
+      // we are on a non-active cell. these do not have any finite
+      // element associated with them in the hp context (in the non-hp
+      // context, we can simply assume that the FE space to which we
+      // want to interpolate is the same as for all elements in the
+      // mesh). consequently, we cannot interpolate from children's FE
+      // space to this cell's (unknown) FE space unless an explicit
+      // fe_index is given
+      Assert ((dynamic_cast<DoFHandler<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+               (this->dof_handler)
+               != 0)
+              ||
+              (fe_index != DoFHandlerType::default_fe_index),
+              ExcMessage ("You cannot call this function on non-active cells "
+                          "of hp::DoFHandler objects unless you provide an explicit "
+                          "finite element index because they do not have naturally "
+                          "associated finite element spaces associated: degrees "
+                          "of freedom are only distributed on active cells for which "
+                          "the active_fe_index has been set."));
+
+      const FiniteElement<dim,spacedim> &fe            = this->get_dof_handler().get_fe()[fe_index];
+      const unsigned int                 dofs_per_cell = fe.dofs_per_cell;
+
+      Assert (this->dof_handler != 0,
+              typename BaseClass::ExcInvalidObject());
+      Assert (interpolated_values.size() == dofs_per_cell,
+              typename BaseClass::ExcVectorDoesNotMatch());
+      Assert (values.size() == this->dof_handler->n_dofs(),
+              typename BaseClass::ExcVectorDoesNotMatch());
+
+
+      // see if the finite element we have on the current cell has any
+      // degrees of freedom to begin with; if not (e.g., when
+      // interpolating FE_Nothing), then simply skip all of the
+      // following since the output vector would be of size zero
+      // anyway (and in fact is of size zero, see the assertion above)
+      if (fe.dofs_per_cell > 0)
+        {
+          Vector<number> tmp1(dofs_per_cell);
+          Vector<number> tmp2(dofs_per_cell);
+
+          interpolated_values = 0;
+
+          // later on we will have to push the values interpolated from the child
+          // to the mother cell into the output vector. unfortunately, there are
+          // two types of elements: ones where you add up the contributions from
+          // the different child cells, and ones where you overwrite.
+          //
+          // an example for the first is piecewise constant (and discontinuous)
+          // elements, where we build the value on the coarse cell by averaging
+          // the values from the cell (i.e. by adding up a fraction of the values
+          // of their values)
+          //
+          // an example for the latter are the usual continuous elements. the
+          // value on a vertex of a coarse cell must there be the same,
+          // irrespective of the adjacent cell we are presently on. so we always
+          // overwrite. in fact, we must, since we cannot know in advance how many
+          // neighbors there will be, so there is no way to compute the average
+          // with fixed factors
+          //
+          // so we have to find out to which type this element belongs. the
+          // difficulty is: the finite element may be a composed one, so we can
+          // only hope to do this for each shape function individually. in fact,
+          // there are even weird finite elements (for example the Raviart-Thomas
+          // element) which have shape functions that are additive (interior ones)
+          // and others that are overwriting (face degrees of freedom that need to
+          // be continuous across the face).
+          for (unsigned int child=0; child<this->n_children(); ++child)
+            {
+              // get the values from the present child, if necessary by
+              // interpolation itself either from its own children or
+              // by interpolating from the finite element on an active
+              // child to the finite element space requested here
+              this->child(child)->get_interpolated_dof_values (values,
+                                                               tmp1,
+                                                               fe_index);
+              // interpolate these to the mother cell
+              fe.get_restriction_matrix(child, this->refinement_case()).vmult (tmp2, tmp1);
+
+              // and add up or set them in the output vector
+              for (unsigned int i=0; i<dofs_per_cell; ++i)
+                if (fe.restriction_is_additive(i))
+                  interpolated_values(i) += tmp2(i);
+                else if (tmp2(i) != number())
+                  interpolated_values(i) = tmp2(i);
+            }
+        }
+    }
+}
+
+
+// --------------------------------------------------------------------------
+// explicit instantiations
+#include "dof_accessor_get.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_accessor_get.inst.in b/source/dofs/dof_accessor_get.inst.in
new file mode 100644
index 0000000..69aa7d8
--- /dev/null
+++ b/source/dofs/dof_accessor_get.inst.in
@@ -0,0 +1,77 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (VEC : SERIAL_VECTORS; SCALAR : REAL_SCALARS; deal_II_dimension : DIMENSIONS; lda : BOOL)
+  {
+    template
+      void
+      DoFCellAccessor<DoFHandler<deal_II_dimension>, lda>::get_interpolated_dof_values
+      (const VEC&, Vector<SCALAR>&,
+       const unsigned int fe_index) const;
+
+#if deal_II_dimension != 3
+
+    template
+      void
+      DoFCellAccessor<DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda>::get_interpolated_dof_values
+      (const VEC&, Vector<SCALAR>&,
+       const unsigned int fe_index) const;
+
+#endif
+
+#if deal_II_dimension == 3
+
+    template
+      void
+      DoFCellAccessor<DoFHandler<1,3>, lda>::get_interpolated_dof_values
+      (const VEC&, Vector<SCALAR>&,
+       const unsigned int fe_index) const;
+
+#endif
+
+  }
+
+
+for (VEC : SERIAL_VECTORS; SCALAR : REAL_SCALARS; deal_II_dimension : DIMENSIONS; lda : BOOL)
+  {
+    template
+      void
+      DoFCellAccessor<hp::DoFHandler<deal_II_dimension>, lda>::get_interpolated_dof_values
+      (const VEC&, Vector<SCALAR>&,
+       const unsigned int fe_index) const;
+
+#if deal_II_dimension != 3
+
+    template
+      void
+      DoFCellAccessor<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda>::get_interpolated_dof_values
+      (const VEC&, Vector<SCALAR>&,
+       const unsigned int fe_index) const;
+
+#endif
+
+#if deal_II_dimension == 3
+
+    template
+      void
+      DoFCellAccessor<hp::DoFHandler<1,3>, lda>::get_interpolated_dof_values
+      (const VEC&, Vector<SCALAR>&,
+       const unsigned int fe_index) const;
+
+#endif
+  }
+
diff --git a/source/dofs/dof_accessor_set.cc b/source/dofs/dof_accessor_set.cc
new file mode 100644
index 0000000..a97193c
--- /dev/null
+++ b/source/dofs/dof_accessor_set.cc
@@ -0,0 +1,125 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_levels.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_iterator.templates.h>
+#include <deal.II/fe/fe.h>
+
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename DoFHandlerType, bool lda>
+template <class OutputVector, typename number>
+void
+DoFCellAccessor<DoFHandlerType,lda>::
+set_dof_values_by_interpolation (const Vector<number> &local_values,
+                                 OutputVector         &values,
+                                 const unsigned int fe_index) const
+{
+  if (!this->has_children() && !this->is_artificial ())
+    {
+      if ((dynamic_cast<DoFHandler<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+           (this->dof_handler)
+           != 0)
+          ||
+          // for hp-DoFHandlers, we need to require that on
+          // active cells, you either don't specify an fe_index,
+          // or that you specify the correct one
+          (fe_index == this->active_fe_index())
+          ||
+          (fe_index == DoFHandlerType::default_fe_index))
+        // simply set the values on this cell
+        this->set_dof_values (local_values, values);
+      else
+        {
+          Assert (local_values.size() == this->dof_handler->get_fe()[fe_index].dofs_per_cell,
+                  ExcMessage ("Incorrect size of local_values vector.") );
+
+          FullMatrix<double> interpolation (this->get_fe().dofs_per_cell, this->dof_handler->get_fe()[fe_index].dofs_per_cell);
+
+          this->get_fe().get_interpolation_matrix (this->dof_handler->get_fe()[fe_index],
+                                                   interpolation);
+
+          // do the interpolation to the target space. for historical
+          // reasons, matrices are set to size 0x0 internally even
+          // we reinit as 4x0, so we have to treat this case specially
+          Vector<number> tmp (this->get_fe().dofs_per_cell);
+          if ((tmp.size() > 0) && (local_values.size() > 0))
+            interpolation.vmult (tmp, local_values);
+
+          // now set the dof values in the global vector
+          this->set_dof_values (tmp, values);
+        }
+    }
+  else
+    // otherwise distribute them to the children
+    {
+      Assert ((dynamic_cast<DoFHandler<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+               (this->dof_handler)
+               != 0)
+              ||
+              (fe_index != DoFHandlerType::default_fe_index),
+              ExcMessage ("You cannot call this function on non-active cells "
+                          "of hp::DoFHandler objects unless you provide an explicit "
+                          "finite element index because they do not have naturally "
+                          "associated finite element spaces associated: degrees "
+                          "of freedom are only distributed on active cells for which "
+                          "the active_fe_index has been set."));
+
+      const FiniteElement<dim,spacedim> &fe            = this->get_dof_handler().get_fe()[fe_index];
+      const unsigned int                 dofs_per_cell = fe.dofs_per_cell;
+
+      Assert (this->dof_handler != 0,
+              typename BaseClass::ExcInvalidObject());
+      Assert (local_values.size() == dofs_per_cell,
+              typename BaseClass::ExcVectorDoesNotMatch());
+      Assert (values.size() == this->dof_handler->n_dofs(),
+              typename BaseClass::ExcVectorDoesNotMatch());
+
+      Vector<number> tmp(dofs_per_cell);
+
+      for (unsigned int child=0; child<this->n_children(); ++child)
+        {
+          if (tmp.size() > 0)
+            fe.get_prolongation_matrix(child, this->refinement_case())
+            .vmult (tmp, local_values);
+          this->child(child)->set_dof_values_by_interpolation (tmp, values, fe_index);
+        }
+    }
+}
+
+
+// --------------------------------------------------------------------------
+// explicit instantiations
+#include "dof_accessor_set.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_accessor_set.inst.in b/source/dofs/dof_accessor_set.inst.in
new file mode 100644
index 0000000..ef6c0bc
--- /dev/null
+++ b/source/dofs/dof_accessor_set.inst.in
@@ -0,0 +1,77 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (VEC : SERIAL_VECTORS; SCALAR : REAL_SCALARS; deal_II_dimension : DIMENSIONS; lda : BOOL)
+  {
+    template
+      void
+      DoFCellAccessor<DoFHandler<deal_II_dimension>, lda>::set_dof_values_by_interpolation
+      (const Vector<SCALAR>&, VEC&,
+       const unsigned int fe_index) const;
+
+#if deal_II_dimension != 3
+
+    template
+      void
+      DoFCellAccessor<DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda>::set_dof_values_by_interpolation
+      (const Vector<SCALAR>&, VEC&,
+       const unsigned int fe_index) const;
+
+#endif
+
+#if deal_II_dimension == 3
+
+    template
+      void
+      DoFCellAccessor<DoFHandler<1,3>, lda>::set_dof_values_by_interpolation
+      (const Vector<SCALAR>&, VEC&,
+       const unsigned int fe_index) const;
+
+#endif
+
+  }
+
+
+for (VEC : SERIAL_VECTORS; SCALAR : REAL_SCALARS; deal_II_dimension : DIMENSIONS; lda : BOOL)
+  {
+    template
+      void
+      DoFCellAccessor<hp::DoFHandler<deal_II_dimension>, lda>::set_dof_values_by_interpolation
+      (const Vector<SCALAR>&, VEC&,
+       const unsigned int fe_index) const;
+
+#if deal_II_dimension != 3
+
+    template
+      void
+      DoFCellAccessor<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, lda>::set_dof_values_by_interpolation
+      (const Vector<SCALAR>&, VEC&,
+       const unsigned int fe_index) const;
+
+#endif
+
+#if deal_II_dimension == 3
+
+    template
+      void
+      DoFCellAccessor<hp::DoFHandler<1,3>, lda>::set_dof_values_by_interpolation
+      (const Vector<SCALAR>&, VEC&,
+       const unsigned int fe_index) const;
+
+#endif
+  }
+
diff --git a/source/dofs/dof_faces.cc b/source/dofs/dof_faces.cc
new file mode 100644
index 0000000..02034f9
--- /dev/null
+++ b/source/dofs/dof_faces.cc
@@ -0,0 +1,50 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/dofs/dof_faces.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    std::size_t
+    DoFFaces<1>::memory_consumption () const
+    {
+      return 0;
+    }
+
+
+    std::size_t
+    DoFFaces<2>::memory_consumption () const
+    {
+      return MemoryConsumption::memory_consumption (lines);
+    }
+
+
+    std::size_t
+    DoFFaces<3>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (quads) +
+              MemoryConsumption::memory_consumption (lines) );
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_handler.cc b/source/dofs/dof_handler.cc
new file mode 100644
index 0000000..4627df3
--- /dev/null
+++ b/source/dofs/dof_handler.cc
@@ -0,0 +1,1713 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_handler_policy.h>
+#include <deal.II/dofs/dof_levels.h>
+#include <deal.II/dofs/dof_faces.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_levels.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/distributed/shared_tria.h>
+#include <deal.II/distributed/tria.h>
+
+#include <set>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+//TODO[WB]: do not use a plain pointer for DoFHandler::faces, but rather an
+//unique_ptr or some such thing. alternatively, why not use the DoFFaces object
+//right away?
+
+template<int dim, int spacedim>
+const unsigned int DoFHandler<dim,spacedim>::dimension;
+
+template<int dim, int spacedim>
+const unsigned int DoFHandler<dim,spacedim>::space_dimension;
+
+template <int dim, int spacedim>
+const types::global_dof_index DoFHandler<dim,spacedim>::invalid_dof_index;
+
+template <int dim, int spacedim>
+const unsigned int DoFHandler<dim,spacedim>::default_fe_index;
+
+
+// reference the invalid_dof_index variable explicitly to work around
+// a bug in the icc8 compiler
+namespace internal
+{
+  template <int dim, int spacedim>
+  const types::global_dof_index *dummy ()
+  {
+    return &dealii::DoFHandler<dim,spacedim>::invalid_dof_index;
+  }
+}
+
+
+
+namespace internal
+{
+  template<int dim, int spacedim>
+  std::string policy_to_string(const dealii::internal::DoFHandler::Policy::PolicyBase<dim,spacedim> &policy)
+  {
+    std::string policy_name;
+    if (dynamic_cast<const typename dealii::internal::DoFHandler::Policy::Sequential<dim,spacedim>*>(&policy))
+      policy_name = "Policy::Sequential<";
+    else if (dynamic_cast<const typename dealii::internal::DoFHandler::Policy::ParallelDistributed<dim,spacedim>*>(&policy))
+      policy_name = "Policy::ParallelDistributed<";
+    else if (dynamic_cast<const typename dealii::internal::DoFHandler::Policy::ParallelShared<dim,spacedim>*>(&policy))
+      policy_name = "Policy::ParallelShared<";
+    else
+      AssertThrow(false, ExcNotImplemented());
+    policy_name += Utilities::int_to_string(dim)+
+                   ","+Utilities::int_to_string(spacedim)+">";
+    return policy_name;
+  }
+
+
+  namespace DoFHandler
+  {
+    // access class
+    // dealii::DoFHandler instead of
+    // namespace internal::DoFHandler
+    using dealii::DoFHandler;
+
+
+    /**
+     * A class with the same purpose as the similarly named class of the
+     * Triangulation class. See there for more information.
+     */
+    struct Implementation
+    {
+      /**
+       * Implement the function of same name in
+       * the mother class.
+       */
+      template <int spacedim>
+      static
+      unsigned int
+      max_couplings_between_dofs (const DoFHandler<1,spacedim> &dof_handler)
+      {
+        return std::min(static_cast<types::global_dof_index>(3*dof_handler.selected_fe->dofs_per_vertex +
+                                                             2*dof_handler.selected_fe->dofs_per_line),
+                        dof_handler.n_dofs());
+      }
+
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      max_couplings_between_dofs (const DoFHandler<2,spacedim> &dof_handler)
+      {
+
+        // get these numbers by drawing pictures
+        // and counting...
+        // example:
+        //   |     |     |
+        // --x-----x--x--X--
+        //   |     |  |  |
+        //   |     x--x--x
+        //   |     |  |  |
+        // --x--x--*--x--x--
+        //   |  |  |     |
+        //   x--x--x     |
+        //   |  |  |     |
+        // --X--x--x-----x--
+        //   |     |     |
+        // x = vertices connected with center vertex *;
+        //   = total of 19
+        // (the X vertices are connected with * if
+        // the vertices adjacent to X are hanging
+        // nodes)
+        // count lines -> 28 (don't forget to count
+        // mother and children separately!)
+        types::global_dof_index max_couplings;
+        switch (dof_handler.tria->max_adjacent_cells())
+          {
+          case 4:
+            max_couplings=19*dof_handler.selected_fe->dofs_per_vertex +
+                          28*dof_handler.selected_fe->dofs_per_line +
+                          8*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 5:
+            max_couplings=21*dof_handler.selected_fe->dofs_per_vertex +
+                          31*dof_handler.selected_fe->dofs_per_line +
+                          9*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 6:
+            max_couplings=28*dof_handler.selected_fe->dofs_per_vertex +
+                          42*dof_handler.selected_fe->dofs_per_line +
+                          12*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 7:
+            max_couplings=30*dof_handler.selected_fe->dofs_per_vertex +
+                          45*dof_handler.selected_fe->dofs_per_line +
+                          13*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 8:
+            max_couplings=37*dof_handler.selected_fe->dofs_per_vertex +
+                          56*dof_handler.selected_fe->dofs_per_line +
+                          16*dof_handler.selected_fe->dofs_per_quad;
+            break;
+
+          // the following numbers are not based on actual counting but by
+          // extrapolating the number sequences from the previous ones (for
+          // example, for dofs_per_vertex, the sequence above is 19, 21, 28,
+          // 30, 37, and is continued as follows):
+          case 9:
+            max_couplings=39*dof_handler.selected_fe->dofs_per_vertex +
+                          59*dof_handler.selected_fe->dofs_per_line +
+                          17*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 10:
+            max_couplings=46*dof_handler.selected_fe->dofs_per_vertex +
+                          70*dof_handler.selected_fe->dofs_per_line +
+                          20*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 11:
+            max_couplings=48*dof_handler.selected_fe->dofs_per_vertex +
+                          73*dof_handler.selected_fe->dofs_per_line +
+                          21*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 12:
+            max_couplings=55*dof_handler.selected_fe->dofs_per_vertex +
+                          84*dof_handler.selected_fe->dofs_per_line +
+                          24*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 13:
+            max_couplings=57*dof_handler.selected_fe->dofs_per_vertex +
+                          87*dof_handler.selected_fe->dofs_per_line +
+                          25*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 14:
+            max_couplings=63*dof_handler.selected_fe->dofs_per_vertex +
+                          98*dof_handler.selected_fe->dofs_per_line +
+                          28*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 15:
+            max_couplings=65*dof_handler.selected_fe->dofs_per_vertex +
+                          103*dof_handler.selected_fe->dofs_per_line +
+                          29*dof_handler.selected_fe->dofs_per_quad;
+            break;
+          case 16:
+            max_couplings=72*dof_handler.selected_fe->dofs_per_vertex +
+                          114*dof_handler.selected_fe->dofs_per_line +
+                          32*dof_handler.selected_fe->dofs_per_quad;
+            break;
+
+          default:
+            Assert (false, ExcNotImplemented());
+            max_couplings=0;
+          }
+        return std::min(max_couplings,dof_handler.n_dofs());
+      }
+
+
+      template <int spacedim>
+      static
+      unsigned int
+      max_couplings_between_dofs (const DoFHandler<3,spacedim> &dof_handler)
+      {
+//TODO:[?] Invent significantly better estimates than the ones in this function
+
+        // doing the same thing here is a
+        // rather complicated thing, compared
+        // to the 2d case, since it is hard
+        // to draw pictures with several
+        // refined hexahedra :-) so I
+        // presently only give a coarse
+        // estimate for the case that at most
+        // 8 hexes meet at each vertex
+        //
+        // can anyone give better estimate
+        // here?
+        const unsigned int max_adjacent_cells
+          = dof_handler.tria->max_adjacent_cells();
+
+        types::global_dof_index max_couplings;
+        if (max_adjacent_cells <= 8)
+          max_couplings=7*7*7*dof_handler.selected_fe->dofs_per_vertex +
+                        7*6*7*3*dof_handler.selected_fe->dofs_per_line +
+                        9*4*7*3*dof_handler.selected_fe->dofs_per_quad +
+                        27*dof_handler.selected_fe->dofs_per_hex;
+        else
+          {
+            Assert (false, ExcNotImplemented());
+            max_couplings=0;
+          }
+
+        return std::min(max_couplings,dof_handler.n_dofs());
+      }
+
+
+      /**
+       * Reserve enough space in the
+       * <tt>levels[]</tt> objects to store the
+       * numbers of the degrees of freedom
+       * needed for the given element. The
+       * given element is that one which
+       * was selected when calling
+       * @p distribute_dofs the last time.
+       */
+      template <int spacedim>
+      static
+      void reserve_space (DoFHandler<1,spacedim> &dof_handler)
+      {
+        dof_handler.vertex_dofs
+        .resize(dof_handler.tria->n_vertices() *
+                dof_handler.selected_fe->dofs_per_vertex,
+                DoFHandler<1,spacedim>::invalid_dof_index);
+
+        for (unsigned int i=0; i<dof_handler.tria->n_levels(); ++i)
+          {
+            dof_handler.levels
+            .push_back (new internal::DoFHandler::DoFLevel<1>);
+
+            dof_handler.levels.back()->dof_object.dofs
+            .resize (dof_handler.tria->n_raw_cells(i) *
+                     dof_handler.selected_fe->dofs_per_line,
+                     DoFHandler<1,spacedim>::invalid_dof_index);
+
+            dof_handler.levels.back()->cell_dof_indices_cache
+            .resize (dof_handler.tria->n_raw_cells(i) *
+                     dof_handler.selected_fe->dofs_per_cell,
+                     DoFHandler<1,spacedim>::invalid_dof_index);
+          }
+      }
+
+
+      template <int spacedim>
+      static
+      void reserve_space (DoFHandler<2,spacedim> &dof_handler)
+      {
+        dof_handler.vertex_dofs
+        .resize(dof_handler.tria->n_vertices() *
+                dof_handler.selected_fe->dofs_per_vertex,
+                DoFHandler<2,spacedim>::invalid_dof_index);
+
+        for (unsigned int i=0; i<dof_handler.tria->n_levels(); ++i)
+          {
+            dof_handler.levels.push_back (new internal::DoFHandler::DoFLevel<2>);
+
+            dof_handler.levels.back()->dof_object.dofs
+            .resize (dof_handler.tria->n_raw_cells(i) *
+                     dof_handler.selected_fe->dofs_per_quad,
+                     DoFHandler<2,spacedim>::invalid_dof_index);
+
+            dof_handler.levels.back()->cell_dof_indices_cache
+            .resize (dof_handler.tria->n_raw_cells(i) *
+                     dof_handler.selected_fe->dofs_per_cell,
+                     DoFHandler<2,spacedim>::invalid_dof_index);
+          }
+
+        dof_handler.faces = new internal::DoFHandler::DoFFaces<2>;
+        // avoid access to n_raw_lines when there are no cells
+        if (dof_handler.tria->n_cells() > 0)
+          {
+            dof_handler.faces->lines.dofs
+            .resize (dof_handler.tria->n_raw_lines() *
+                     dof_handler.selected_fe->dofs_per_line,
+                     DoFHandler<2,spacedim>::invalid_dof_index);
+          }
+      }
+
+
+      template <int spacedim>
+      static
+      void reserve_space (DoFHandler<3,spacedim> &dof_handler)
+      {
+        dof_handler.vertex_dofs
+        .resize(dof_handler.tria->n_vertices() *
+                dof_handler.selected_fe->dofs_per_vertex,
+                DoFHandler<3,spacedim>::invalid_dof_index);
+
+        for (unsigned int i=0; i<dof_handler.tria->n_levels(); ++i)
+          {
+            dof_handler.levels.push_back (new internal::DoFHandler::DoFLevel<3>);
+
+            dof_handler.levels.back()->dof_object.dofs
+            .resize (dof_handler.tria->n_raw_cells(i) *
+                     dof_handler.selected_fe->dofs_per_hex,
+                     DoFHandler<3,spacedim>::invalid_dof_index);
+
+            dof_handler.levels.back()->cell_dof_indices_cache
+            .resize (dof_handler.tria->n_raw_cells(i) *
+                     dof_handler.selected_fe->dofs_per_cell,
+                     DoFHandler<3,spacedim>::invalid_dof_index);
+          }
+        dof_handler.faces = new internal::DoFHandler::DoFFaces<3>;
+
+        // avoid access to n_raw_lines when there are no cells
+        if (dof_handler.tria->n_cells() > 0)
+          {
+            dof_handler.faces->lines.dofs
+            .resize (dof_handler.tria->n_raw_lines() *
+                     dof_handler.selected_fe->dofs_per_line,
+                     DoFHandler<3,spacedim>::invalid_dof_index);
+            dof_handler.faces->quads.dofs
+            .resize (dof_handler.tria->n_raw_quads() *
+                     dof_handler.selected_fe->dofs_per_quad,
+                     DoFHandler<3,spacedim>::invalid_dof_index);
+          }
+      }
+
+      template<int spacedim>
+      static
+      void reserve_space_mg (DoFHandler<1, spacedim> &dof_handler)
+      {
+        Assert (dof_handler.get_triangulation().n_levels () > 0, ExcMessage ("Invalid triangulation"));
+        dof_handler.clear_mg_space ();
+
+        const dealii::Triangulation<1, spacedim> &tria = dof_handler.get_triangulation();
+        const unsigned int &dofs_per_line = dof_handler.get_fe ().dofs_per_line;
+        const unsigned int &n_levels = tria.n_levels ();
+
+        for (unsigned int i = 0; i < n_levels; ++i)
+          {
+            dof_handler.mg_levels.push_back (new internal::DoFHandler::DoFLevel<1>);
+            dof_handler.mg_levels.back ()->dof_object.dofs = std::vector<types::global_dof_index> (tria.n_raw_lines (i) * dofs_per_line, DoFHandler<1>::invalid_dof_index);
+          }
+
+        const unsigned int &n_vertices = tria.n_vertices ();
+
+        dof_handler.mg_vertex_dofs.resize (n_vertices);
+
+        std::vector<unsigned int> max_level (n_vertices, 0);
+        std::vector<unsigned int> min_level (n_vertices, n_levels);
+
+        for (typename dealii::Triangulation<1, spacedim>::cell_iterator cell = tria.begin (); cell != tria.end (); ++cell)
+          {
+            const unsigned int level = cell->level ();
+
+            for (unsigned int vertex = 0; vertex < GeometryInfo<1>::vertices_per_cell; ++vertex)
+              {
+                const unsigned int vertex_index = cell->vertex_index (vertex);
+
+                if (min_level[vertex_index] > level)
+                  min_level[vertex_index] = level;
+
+                if (max_level[vertex_index] < level)
+                  max_level[vertex_index] = level;
+              }
+          }
+
+        for (unsigned int vertex = 0; vertex < n_vertices; ++vertex)
+          if (tria.vertex_used (vertex))
+            {
+              Assert (min_level[vertex] < n_levels, ExcInternalError ());
+              Assert (max_level[vertex] >= min_level[vertex], ExcInternalError ());
+              dof_handler.mg_vertex_dofs[vertex].init (min_level[vertex], max_level[vertex], dof_handler.get_fe ().dofs_per_vertex);
+            }
+
+          else
+            {
+              Assert (min_level[vertex] == n_levels, ExcInternalError ());
+              Assert (max_level[vertex] == 0, ExcInternalError ());
+              dof_handler.mg_vertex_dofs[vertex].init (1, 0, 0);
+            }
+      }
+
+      template<int spacedim>
+      static
+      void reserve_space_mg (DoFHandler<2, spacedim> &dof_handler)
+      {
+        Assert (dof_handler.get_triangulation().n_levels () > 0, ExcMessage ("Invalid triangulation"));
+        dof_handler.clear_mg_space ();
+
+        const dealii::FiniteElement<2, spacedim> &fe = dof_handler.get_fe ();
+        const dealii::Triangulation<2, spacedim> &tria = dof_handler.get_triangulation();
+        const unsigned int &n_levels = tria.n_levels ();
+
+        for (unsigned int i = 0; i < n_levels; ++i)
+          {
+            dof_handler.mg_levels.push_back (new internal::DoFHandler::DoFLevel<2>);
+            dof_handler.mg_levels.back ()->dof_object.dofs = std::vector<types::global_dof_index> (tria.n_raw_quads (i) * fe.dofs_per_quad, DoFHandler<2>::invalid_dof_index);
+          }
+
+        dof_handler.mg_faces = new internal::DoFHandler::DoFFaces<2>;
+        dof_handler.mg_faces->lines.dofs = std::vector<types::global_dof_index> (tria.n_raw_lines () * fe.dofs_per_line, DoFHandler<2>::invalid_dof_index);
+
+        const unsigned int &n_vertices = tria.n_vertices ();
+
+        dof_handler.mg_vertex_dofs.resize (n_vertices);
+
+        std::vector<unsigned int> max_level (n_vertices, 0);
+        std::vector<unsigned int> min_level (n_vertices, n_levels);
+
+        for (typename dealii::Triangulation<2, spacedim>::cell_iterator cell = tria.begin (); cell != tria.end (); ++cell)
+          {
+            const unsigned int level = cell->level ();
+
+            for (unsigned int vertex = 0; vertex < GeometryInfo<2>::vertices_per_cell; ++vertex)
+              {
+                const unsigned int vertex_index = cell->vertex_index (vertex);
+
+                if (min_level[vertex_index] > level)
+                  min_level[vertex_index] = level;
+
+                if (max_level[vertex_index] < level)
+                  max_level[vertex_index] = level;
+              }
+          }
+
+        for (unsigned int vertex = 0; vertex < n_vertices; ++vertex)
+          if (tria.vertex_used (vertex))
+            {
+              Assert (min_level[vertex] < n_levels, ExcInternalError ());
+              Assert (max_level[vertex] >= min_level[vertex], ExcInternalError ());
+              dof_handler.mg_vertex_dofs[vertex].init (min_level[vertex], max_level[vertex], fe.dofs_per_vertex);
+            }
+
+          else
+            {
+              Assert (min_level[vertex] == n_levels, ExcInternalError ());
+              Assert (max_level[vertex] == 0, ExcInternalError ());
+              dof_handler.mg_vertex_dofs[vertex].init (1, 0, 0);
+            }
+      }
+
+      template<int spacedim>
+      static
+      void reserve_space_mg (DoFHandler<3, spacedim> &dof_handler)
+      {
+        Assert (dof_handler.get_triangulation().n_levels () > 0, ExcMessage ("Invalid triangulation"));
+        dof_handler.clear_mg_space ();
+
+        const dealii::FiniteElement<3, spacedim> &fe = dof_handler.get_fe ();
+        const dealii::Triangulation<3, spacedim> &tria = dof_handler.get_triangulation();
+        const unsigned int &n_levels = tria.n_levels ();
+
+        for (unsigned int i = 0; i < n_levels; ++i)
+          {
+            dof_handler.mg_levels.push_back (new internal::DoFHandler::DoFLevel<3>);
+            dof_handler.mg_levels.back ()->dof_object.dofs = std::vector<types::global_dof_index> (tria.n_raw_hexs (i) * fe.dofs_per_hex, DoFHandler<3>::invalid_dof_index);
+          }
+
+        dof_handler.mg_faces = new internal::DoFHandler::DoFFaces<3>;
+        dof_handler.mg_faces->lines.dofs = std::vector<types::global_dof_index> (tria.n_raw_lines () * fe.dofs_per_line, DoFHandler<3>::invalid_dof_index);
+        dof_handler.mg_faces->quads.dofs = std::vector<types::global_dof_index> (tria.n_raw_quads () * fe.dofs_per_quad, DoFHandler<3>::invalid_dof_index);
+
+        const unsigned int &n_vertices = tria.n_vertices ();
+
+        dof_handler.mg_vertex_dofs.resize (n_vertices);
+
+        std::vector<unsigned int> max_level (n_vertices, 0);
+        std::vector<unsigned int> min_level (n_vertices, n_levels);
+
+        for (typename dealii::Triangulation<3, spacedim>::cell_iterator cell = tria.begin (); cell != tria.end (); ++cell)
+          {
+            const unsigned int level = cell->level ();
+
+            for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_cell; ++vertex)
+              {
+                const unsigned int vertex_index = cell->vertex_index (vertex);
+
+                if (min_level[vertex_index] > level)
+                  min_level[vertex_index] = level;
+
+                if (max_level[vertex_index] < level)
+                  max_level[vertex_index] = level;
+              }
+          }
+
+        for (unsigned int vertex = 0; vertex < n_vertices; ++vertex)
+          if (tria.vertex_used (vertex))
+            {
+              Assert (min_level[vertex] < n_levels, ExcInternalError ());
+              Assert (max_level[vertex] >= min_level[vertex], ExcInternalError ());
+              dof_handler.mg_vertex_dofs[vertex].init (min_level[vertex], max_level[vertex], fe.dofs_per_vertex);
+            }
+
+          else
+            {
+              Assert (min_level[vertex] == n_levels, ExcInternalError ());
+              Assert (max_level[vertex] == 0, ExcInternalError ());
+              dof_handler.mg_vertex_dofs[vertex].init (1, 0, 0);
+            }
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index distribute_dofs_on_cell (typename DoFHandler<1, spacedim>::cell_iterator &cell, types::global_dof_index next_free_dof)
+      {
+        const FiniteElement<1, spacedim> &fe = cell->get_fe ();
+
+        if (fe.dofs_per_vertex > 0)
+          for (unsigned int vertex = 0; vertex < GeometryInfo<1>::vertices_per_cell; ++vertex)
+            {
+              typename DoFHandler<1, spacedim>::cell_iterator neighbor = cell->neighbor (vertex);
+
+              if (neighbor.state () == IteratorState::valid)
+                if (neighbor->user_flag_set () && (neighbor->level () == cell->level ()))
+                  {
+                    if (vertex == 0)
+                      for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+                        cell->set_mg_vertex_dof_index (cell->level (), 0, dof, neighbor->mg_vertex_dof_index (cell->level (), 1, dof));
+
+                    else
+                      for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+                        cell->set_mg_vertex_dof_index (cell->level (), 1, dof, neighbor->mg_vertex_dof_index (cell->level (), 0, dof));
+
+                    continue;
+                  }
+
+              for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+                cell->set_mg_vertex_dof_index (cell->level (), vertex, dof, next_free_dof++);
+            }
+
+        if (fe.dofs_per_line > 0)
+          for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+            cell->set_mg_dof_index (cell->level (), dof, next_free_dof++);
+
+        cell->set_user_flag ();
+        return next_free_dof;
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index distribute_dofs_on_cell (typename DoFHandler<2, spacedim>::cell_iterator &cell, types::global_dof_index next_free_dof)
+      {
+        const FiniteElement<2, spacedim> &fe = cell->get_fe ();
+
+        if (fe.dofs_per_vertex > 0)
+          for (unsigned int vertex = 0; vertex < GeometryInfo<2>::vertices_per_cell; ++vertex)
+            if (cell->mg_vertex_dof_index (cell->level (), vertex, 0) == DoFHandler<2>::invalid_dof_index)
+              for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+                cell->set_mg_vertex_dof_index (cell->level (), vertex, dof, next_free_dof++);
+
+        if (fe.dofs_per_line > 0)
+          for (unsigned int face = 0; face < GeometryInfo<2>::faces_per_cell; ++face)
+            {
+              typename DoFHandler<2, spacedim>::line_iterator line = cell->line (face);
+
+              if (line->mg_dof_index (cell->level (), 0) == DoFHandler<2>::invalid_dof_index)
+                for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+                  line->set_mg_dof_index (cell->level (), dof, next_free_dof++);
+            }
+
+        if (fe.dofs_per_quad > 0)
+          for (unsigned int dof = 0; dof < fe.dofs_per_quad; ++dof)
+            cell->set_mg_dof_index (cell->level (), dof, next_free_dof++);
+
+        cell->set_user_flag ();
+        return next_free_dof;
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index distribute_dofs_on_cell (typename DoFHandler<3, spacedim>::cell_iterator &cell, types::global_dof_index next_free_dof)
+      {
+        const FiniteElement<3, spacedim> &fe = cell->get_fe ();
+
+        if (fe.dofs_per_vertex > 0)
+          for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_cell; ++vertex)
+            if (cell->mg_vertex_dof_index (cell->level (), vertex, 0) == DoFHandler<3>::invalid_dof_index)
+              for (unsigned int dof = 0; dof < fe.dofs_per_vertex; ++dof)
+                cell->set_mg_vertex_dof_index (cell->level (), vertex, dof, next_free_dof++);
+
+        if (fe.dofs_per_line > 0)
+          for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_cell; ++line)
+            {
+              typename DoFHandler<3, spacedim>::line_iterator line_it = cell->line (line);
+
+              if (line_it->mg_dof_index (cell->level (), 0) == DoFHandler<3>::invalid_dof_index)
+                for (unsigned int dof = 0; dof < fe.dofs_per_line; ++dof)
+                  line_it->set_mg_dof_index (cell->level (), dof, next_free_dof++);
+            }
+
+        if (fe.dofs_per_quad > 0)
+          for (unsigned int face = 0; face < GeometryInfo<3>::quads_per_cell; ++face)
+            {
+              typename DoFHandler<3, spacedim>::quad_iterator quad = cell->quad (face);
+
+              if (quad->mg_dof_index (cell->level (), 0) == DoFHandler<3>::invalid_dof_index)
+                for (unsigned int dof = 0; dof < fe.dofs_per_quad; ++dof)
+                  quad->set_mg_dof_index (cell->level (), dof, next_free_dof++);
+            }
+
+        if (fe.dofs_per_hex > 0)
+          for (unsigned int dof = 0; dof < fe.dofs_per_hex; ++dof)
+            cell->set_mg_dof_index (cell->level (), dof, next_free_dof++);
+
+        cell->set_user_flag ();
+        return next_free_dof;
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (
+        const DoFHandler<1, spacedim> &dof_handler,
+        internal::DoFHandler::DoFLevel<1> &mg_level,
+        internal::DoFHandler::DoFFaces<1> &,
+        const unsigned int obj_index,
+        const unsigned int fe_index,
+        const unsigned int local_index,
+        const int2type<1>)
+      {
+        return mg_level.dof_object.get_dof_index (dof_handler, obj_index, fe_index, local_index);
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const DoFHandler<2, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<2> &, internal::DoFHandler::DoFFaces<2> &mg_faces, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const int2type<1>)
+      {
+        return mg_faces.lines.get_dof_index (dof_handler, obj_index, fe_index, local_index);
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const DoFHandler<2, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<2> &mg_level, internal::DoFHandler::DoFFaces<2> &, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const int2type<2>)
+      {
+        return mg_level.dof_object.get_dof_index (dof_handler, obj_index, fe_index, local_index);
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const DoFHandler<3, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<3> &, internal::DoFHandler::DoFFaces<3> &mg_faces, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const int2type<1>)
+      {
+        return mg_faces.lines.get_dof_index (dof_handler, obj_index, fe_index, local_index);
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const DoFHandler<3, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<3> &, internal::DoFHandler::DoFFaces<3> &mg_faces, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const int2type<2>)
+      {
+        return mg_faces.quads.get_dof_index (dof_handler, obj_index, fe_index, local_index);
+      }
+
+      template<int spacedim>
+      static
+      types::global_dof_index
+      get_dof_index (const DoFHandler<3, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<3> &mg_level, internal::DoFHandler::DoFFaces<3> &, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const int2type<3>)
+      {
+        return mg_level.dof_object.get_dof_index (dof_handler, obj_index, fe_index, local_index);
+      }
+
+      template<int spacedim>
+      static
+      void set_dof_index (const DoFHandler<1, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<1> &mg_level, internal::DoFHandler::DoFFaces<1> &, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index, const int2type<1>)
+      {
+        mg_level.dof_object.set_dof_index (dof_handler, obj_index, fe_index, local_index, global_index);
+      }
+
+      template<int spacedim>
+      static
+      void set_dof_index (const DoFHandler<2, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<2> &, internal::DoFHandler::DoFFaces<2> &mg_faces, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index, const int2type<1>)
+      {
+        mg_faces.lines.set_dof_index (dof_handler, obj_index, fe_index, local_index, global_index);
+      }
+
+      template<int spacedim>
+      static
+      void set_dof_index (const DoFHandler<2, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<2> &mg_level, internal::DoFHandler::DoFFaces<2> &, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index, const int2type<2>)
+      {
+        mg_level.dof_object.set_dof_index (dof_handler, obj_index, fe_index, local_index, global_index);
+      }
+
+      template<int spacedim>
+      static
+      void set_dof_index (const DoFHandler<3, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<3> &, internal::DoFHandler::DoFFaces<3> &mg_faces, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index, const int2type<1>)
+      {
+        mg_faces.lines.set_dof_index (dof_handler, obj_index, fe_index, local_index, global_index);
+      }
+
+      template<int spacedim>
+      static
+      void set_dof_index (const DoFHandler<3, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<3> &, internal::DoFHandler::DoFFaces<3> &mg_faces, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index, const int2type<2>)
+      {
+        mg_faces.quads.set_dof_index (dof_handler, obj_index, fe_index, local_index, global_index);
+      }
+
+      template<int spacedim>
+      static
+      void set_dof_index (const DoFHandler<3, spacedim> &dof_handler, internal::DoFHandler::DoFLevel<3> &mg_level, internal::DoFHandler::DoFFaces<3> &, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index, const int2type<3>)
+      {
+        mg_level.dof_object.set_dof_index (dof_handler, obj_index, fe_index, local_index, global_index);
+      }
+    };
+  }
+}
+
+
+
+template<int dim, int spacedim>
+DoFHandler<dim,spacedim>::DoFHandler (const Triangulation<dim,spacedim> &tria)
+  :
+  tria(&tria, typeid(*this).name()),
+  selected_fe(0, typeid(*this).name()),
+  faces(NULL),
+  mg_faces (NULL)
+{
+  // decide whether we need a
+  // sequential or a parallel
+  // distributed policy
+  if (dynamic_cast<const parallel::shared::Triangulation< dim, spacedim>*>
+      (&tria)
+      != 0)
+    policy.reset (new internal::DoFHandler::Policy::ParallelShared<dim,spacedim>());
+  else if (dynamic_cast<const parallel::distributed::Triangulation< dim, spacedim >*>
+           (&tria)
+           == 0)
+    policy.reset (new internal::DoFHandler::Policy::Sequential<dim,spacedim>());
+  else
+    policy.reset (new internal::DoFHandler::Policy::ParallelDistributed<dim,spacedim>());
+}
+
+
+template<int dim, int spacedim>
+DoFHandler<dim,spacedim>::DoFHandler ()
+  :
+  tria(0, typeid(*this).name()),
+  selected_fe(0, typeid(*this).name()),
+  faces(NULL),
+  mg_faces (NULL)
+{}
+
+
+template <int dim, int spacedim>
+DoFHandler<dim,spacedim>::~DoFHandler ()
+{
+  // release allocated memory
+  clear ();
+}
+
+
+template<int dim, int spacedim>
+void
+DoFHandler<dim,spacedim>::initialize(
+  const Triangulation<dim,spacedim> &t,
+  const FiniteElement<dim,spacedim> &fe)
+{
+  tria = &t;
+  faces = 0;
+  number_cache.n_global_dofs = 0;
+
+  // decide whether we need a
+  // sequential or a parallel
+  // distributed policy
+  if (dynamic_cast<const parallel::shared::Triangulation< dim, spacedim>*>
+      (&t)
+      != 0)
+    policy.reset (new internal::DoFHandler::Policy::ParallelShared<dim,spacedim>());
+  else if (dynamic_cast<const parallel::distributed::Triangulation< dim, spacedim >*>
+           (&t)
+           == 0)
+    policy.reset (new internal::DoFHandler::Policy::Sequential<dim,spacedim>());
+  else
+    policy.reset (new internal::DoFHandler::Policy::ParallelDistributed<dim,spacedim>());
+
+  distribute_dofs(fe);
+}
+
+
+
+/*------------------------ Cell iterator functions ------------------------*/
+
+template <int dim, int spacedim>
+typename DoFHandler<dim,spacedim>::cell_iterator
+DoFHandler<dim,spacedim>::begin (const unsigned int level) const
+{
+  typename Triangulation<dim,spacedim>::cell_iterator cell = this->get_triangulation().begin(level);
+  if (cell == this->get_triangulation().end(level))
+    return end(level);
+  return cell_iterator (*cell, this);
+}
+
+
+
+template <int dim, int spacedim>
+typename DoFHandler<dim,spacedim>::active_cell_iterator
+DoFHandler<dim,spacedim>::begin_active (const unsigned int level) const
+{
+  // level is checked in begin
+  cell_iterator i = begin (level);
+  if (i.state() != IteratorState::valid)
+    return i;
+  while (i->has_children())
+    if ((++i).state() != IteratorState::valid)
+      return i;
+  return i;
+}
+
+
+
+template <int dim, int spacedim>
+typename DoFHandler<dim,spacedim>::cell_iterator
+DoFHandler<dim,spacedim>::end () const
+{
+  return cell_iterator (&this->get_triangulation(),
+                        -1,
+                        -1,
+                        this);
+}
+
+
+template <int dim, int spacedim>
+typename DoFHandler<dim,spacedim>::cell_iterator
+DoFHandler<dim,spacedim>::end (const unsigned int level) const
+{
+  typename Triangulation<dim,spacedim>::cell_iterator cell = this->get_triangulation().end(level);
+  if (cell.state() != IteratorState::valid)
+    return end();
+  return cell_iterator (*cell, this);
+}
+
+
+template <int dim, int spacedim>
+typename DoFHandler<dim, spacedim>::active_cell_iterator
+DoFHandler<dim, spacedim>::end_active (const unsigned int level) const
+{
+  typename Triangulation<dim,spacedim>::cell_iterator cell = this->get_triangulation().end_active(level);
+  if (cell.state() != IteratorState::valid)
+    return active_cell_iterator(end());
+  return active_cell_iterator (*cell, this);
+}
+
+
+
+template <int dim, int spacedim>
+typename DoFHandler<dim, spacedim>::level_cell_iterator
+DoFHandler<dim, spacedim>::begin_mg (const unsigned int level) const
+{
+  // Assert(this->has_level_dofs(), ExcMessage("You can only iterate over mg "
+  //     "levels if mg dofs got distributed."));
+  typename Triangulation<dim,spacedim>::cell_iterator cell = this->get_triangulation().begin(level);
+  if (cell == this->get_triangulation().end(level))
+    return end_mg(level);
+  return level_cell_iterator (*cell, this);
+}
+
+
+template <int dim, int spacedim>
+typename DoFHandler<dim, spacedim>::level_cell_iterator
+DoFHandler<dim, spacedim>::end_mg (const unsigned int level) const
+{
+  // Assert(this->has_level_dofs(), ExcMessage("You can only iterate over mg "
+  //     "levels if mg dofs got distributed."));
+  typename Triangulation<dim,spacedim>::cell_iterator cell = this->get_triangulation().end(level);
+  if (cell.state() != IteratorState::valid)
+    return end();
+  return level_cell_iterator (*cell, this);
+}
+
+
+template <int dim, int spacedim>
+typename DoFHandler<dim, spacedim>::level_cell_iterator
+DoFHandler<dim, spacedim>::end_mg () const
+{
+  return level_cell_iterator (&this->get_triangulation(), -1, -1, this);
+}
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+DoFHandler<dim, spacedim>::cell_iterators () const
+{
+  return
+    IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+    (begin(), end());
+}
+
+
+template <int dim, int spacedim>
+IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+DoFHandler<dim, spacedim>::active_cell_iterators () const
+{
+  return
+    IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+    (begin_active(), end());
+}
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename DoFHandler<dim, spacedim>::level_cell_iterator>
+DoFHandler<dim, spacedim>::mg_cell_iterators () const
+{
+  return
+    IteratorRange<typename DoFHandler<dim, spacedim>::level_cell_iterator>
+    (begin_mg(), end_mg());
+}
+
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+DoFHandler<dim, spacedim>::cell_iterators_on_level (const unsigned int level) const
+{
+  return
+    IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+    (begin(level), end(level));
+}
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+DoFHandler<dim, spacedim>::active_cell_iterators_on_level (const unsigned int level) const
+{
+  return
+    IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+    (begin_active(level), end_active(level));
+}
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename DoFHandler<dim, spacedim>::level_cell_iterator>
+DoFHandler<dim, spacedim>::mg_cell_iterators_on_level (const unsigned int level) const
+{
+  return
+    IteratorRange<typename DoFHandler<dim, spacedim>::level_cell_iterator>
+    (begin_mg(level), end_mg(level));
+}
+
+
+
+//---------------------------------------------------------------------------
+
+
+
+template <>
+types::global_dof_index DoFHandler<1>::n_boundary_dofs () const
+{
+  return 2*get_fe().dofs_per_vertex;
+}
+
+
+
+template <>
+types::global_dof_index DoFHandler<1>::n_boundary_dofs (const FunctionMap &boundary_ids) const
+{
+  // check that only boundary
+  // indicators 0 and 1 are allowed
+  // in 1d
+  for (FunctionMap::const_iterator i=boundary_ids.begin();
+       i!=boundary_ids.end(); ++i)
+    Assert ((i->first == 0) || (i->first == 1),
+            ExcInvalidBoundaryIndicator());
+
+  return boundary_ids.size()*get_fe().dofs_per_vertex;
+}
+
+
+
+template <>
+types::global_dof_index DoFHandler<1>::n_boundary_dofs (const std::set<types::boundary_id> &boundary_ids) const
+{
+  // check that only boundary
+  // indicators 0 and 1 are allowed
+  // in 1d
+  for (std::set<types::boundary_id>::const_iterator i=boundary_ids.begin();
+       i!=boundary_ids.end(); ++i)
+    Assert ((*i == 0) || (*i == 1),
+            ExcInvalidBoundaryIndicator());
+
+  return boundary_ids.size()*get_fe().dofs_per_vertex;
+}
+
+
+template <>
+types::global_dof_index DoFHandler<1,2>::n_boundary_dofs () const
+{
+  return 2*get_fe().dofs_per_vertex;
+}
+
+
+
+template <>
+types::global_dof_index DoFHandler<1,2>::n_boundary_dofs (const FunctionMap &boundary_ids) const
+{
+  // check that only boundary
+  // indicators 0 and 1 are allowed
+  // in 1d
+  for (FunctionMap::const_iterator i=boundary_ids.begin();
+       i!=boundary_ids.end(); ++i)
+    Assert ((i->first == 0) || (i->first == 1),
+            ExcInvalidBoundaryIndicator());
+
+  return boundary_ids.size()*get_fe().dofs_per_vertex;
+}
+
+
+
+template <>
+types::global_dof_index DoFHandler<1,2>::n_boundary_dofs (const std::set<types::boundary_id> &boundary_ids) const
+{
+  // check that only boundary
+  // indicators 0 and 1 are allowed
+  // in 1d
+  for (std::set<types::boundary_id>::const_iterator i=boundary_ids.begin();
+       i!=boundary_ids.end(); ++i)
+    Assert ((*i == 0) || (*i == 1),
+            ExcInvalidBoundaryIndicator());
+
+  return boundary_ids.size()*get_fe().dofs_per_vertex;
+}
+
+
+
+template<int dim, int spacedim>
+types::global_dof_index DoFHandler<dim,spacedim>::n_boundary_dofs () const
+{
+  std::set<int> boundary_dofs;
+
+  const unsigned int dofs_per_face = get_fe().dofs_per_face;
+  std::vector<types::global_dof_index> dofs_on_face(dofs_per_face);
+
+  // loop over all faces of all cells
+  // and see whether they are at a
+  // boundary. note (i) that we visit
+  // interior faces twice (which we
+  // don't care about) but exterior
+  // faces only once as is
+  // appropriate, and (ii) that we
+  // need not take special care of
+  // single lines (using
+  // @p{cell->has_boundary_lines}),
+  // since we do not support
+  // boundaries of dimension dim-2,
+  // and so every boundary line is
+  // also part of a boundary face.
+  active_cell_iterator cell = begin_active (),
+                       endc = end();
+  for (; cell!=endc; ++cell)
+    for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+      if (cell->at_boundary(f))
+        {
+          cell->face(f)->get_dof_indices (dofs_on_face);
+          for (unsigned int i=0; i<dofs_per_face; ++i)
+            boundary_dofs.insert(dofs_on_face[i]);
+        }
+
+  return boundary_dofs.size();
+}
+
+
+
+template<int dim, int spacedim>
+types::global_dof_index
+DoFHandler<dim,spacedim>::n_boundary_dofs (const FunctionMap &boundary_ids) const
+{
+  Assert (boundary_ids.find(numbers::internal_face_boundary_id) == boundary_ids.end(),
+          ExcInvalidBoundaryIndicator());
+
+  std::set<int> boundary_dofs;
+
+  const unsigned int dofs_per_face = get_fe().dofs_per_face;
+  std::vector<types::global_dof_index> dofs_on_face(dofs_per_face);
+
+  // same as in the previous
+  // function, but with an additional
+  // check for the boundary indicator
+  active_cell_iterator cell = begin_active (),
+                       endc = end();
+  for (; cell!=endc; ++cell)
+    for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+      if (cell->at_boundary(f)
+          &&
+          (boundary_ids.find(cell->face(f)->boundary_id()) !=
+           boundary_ids.end()))
+        {
+          cell->face(f)->get_dof_indices (dofs_on_face);
+          for (unsigned int i=0; i<dofs_per_face; ++i)
+            boundary_dofs.insert(dofs_on_face[i]);
+        }
+
+  return boundary_dofs.size();
+}
+
+
+
+template<int dim, int spacedim>
+types::global_dof_index
+DoFHandler<dim,spacedim>::n_boundary_dofs (const std::set<types::boundary_id> &boundary_ids) const
+{
+  Assert (boundary_ids.find (numbers::internal_face_boundary_id) == boundary_ids.end(),
+          ExcInvalidBoundaryIndicator());
+
+  std::set<int> boundary_dofs;
+
+  const unsigned int dofs_per_face = get_fe().dofs_per_face;
+  std::vector<types::global_dof_index> dofs_on_face(dofs_per_face);
+
+  // same as in the previous
+  // function, but with a different
+  // check for the boundary indicator
+  active_cell_iterator cell = begin_active (),
+                       endc = end();
+  for (; cell!=endc; ++cell)
+    for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+      if (cell->at_boundary(f)
+          &&
+          (std::find (boundary_ids.begin(),
+                      boundary_ids.end(),
+                      cell->face(f)->boundary_id()) !=
+           boundary_ids.end()))
+        {
+          cell->face(f)->get_dof_indices (dofs_on_face);
+          for (unsigned int i=0; i<dofs_per_face; ++i)
+            boundary_dofs.insert(dofs_on_face[i]);
+        }
+
+  return boundary_dofs.size();
+}
+
+
+
+template<int dim, int spacedim>
+std::size_t
+DoFHandler<dim,spacedim>::memory_consumption () const
+{
+  std::size_t mem = (MemoryConsumption::memory_consumption (tria) +
+                     MemoryConsumption::memory_consumption (selected_fe) +
+                     MemoryConsumption::memory_consumption (block_info_object) +
+                     MemoryConsumption::memory_consumption (levels) +
+                     MemoryConsumption::memory_consumption (*faces) +
+                     MemoryConsumption::memory_consumption (faces) +
+                     sizeof (number_cache) +
+                     MemoryConsumption::memory_consumption (mg_number_cache) +
+                     MemoryConsumption::memory_consumption (vertex_dofs));
+  for (unsigned int i=0; i<levels.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (*levels[i]);
+
+  for (unsigned int level = 0; level < mg_levels.size (); ++level)
+    mem += mg_levels[level]->memory_consumption ();
+
+  if (mg_faces != 0)
+    mem += MemoryConsumption::memory_consumption (*mg_faces);
+
+  for (unsigned int i = 0; i < mg_vertex_dofs.size (); ++i)
+    mem += sizeof (MGVertexDoFs) + (1 + mg_vertex_dofs[i].get_finest_level () - mg_vertex_dofs[i].get_coarsest_level ()) * sizeof (types::global_dof_index);
+
+  return mem;
+}
+
+
+
+template<int dim, int spacedim>
+void DoFHandler<dim,spacedim>::distribute_dofs (const FiniteElement<dim,spacedim> &ff)
+{
+  selected_fe = &ff;
+
+  // delete all levels and set them
+  // up newly. note that we still
+  // have to allocate space for all
+  // degrees of freedom on this mesh
+  // (including ghost and cells that
+  // are entirely stored on different
+  // processors), though we may not
+  // assign numbers to some of them
+  // (i.e. they will remain at
+  // invalid_dof_index). We need to
+  // allocate the space because we
+  // will want to be able to query
+  // the dof_indices on each cell,
+  // and simply be told that we don't
+  // know them on some cell (i.e. get
+  // back invalid_dof_index)
+  clear_space ();
+  internal::DoFHandler::Implementation::reserve_space (*this);
+
+  // hand things off to the policy
+  policy->distribute_dofs (*this,number_cache);
+
+  // initialize the block info object
+  // only if this is a sequential
+  // triangulation. it doesn't work
+  // correctly yet if it is parallel
+  if (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>(&*tria) == 0)
+    block_info_object.initialize(*this, false, true);
+}
+
+
+template<int dim, int spacedim>
+void DoFHandler<dim, spacedim>::distribute_mg_dofs (const FiniteElement<dim, spacedim> &fe)
+{
+  (void)fe;
+  Assert(levels.size()>0, ExcMessage("Distribute active DoFs using distribute_dofs() before calling distribute_mg_dofs()."));
+
+  const FiniteElement<dim, spacedim> *old_fe = selected_fe;
+  (void)old_fe;
+  Assert(old_fe == &fe, ExcMessage("You are required to use the same FE for level and active DoFs!") );
+
+  clear_mg_space();
+
+  internal::DoFHandler::Implementation::reserve_space_mg (*this);
+  const parallel::distributed::Triangulation<dim,spacedim> *dist_tr = dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>(&*tria);
+  if (!dist_tr)
+    mg_number_cache.resize((*tria).n_levels());
+  else
+    mg_number_cache.resize(dist_tr->n_global_levels());
+
+  policy->distribute_mg_dofs (*this, mg_number_cache);
+
+  // initialize the block info object
+  // only if this is a sequential
+  // triangulation. it doesn't work
+  // correctly yet if it is parallel
+  if (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>(&*tria) == 0)
+    block_info_object.initialize (*this, true, false);
+}
+
+template<int dim, int spacedim>
+void DoFHandler<dim, spacedim>::reserve_space ()
+{
+  //TODO: move this to distribute_mg_dofs and remove function
+}
+
+template<int dim, int spacedim>
+void DoFHandler<dim, spacedim>::clear_mg_space ()
+{
+  for (unsigned int i = 0; i < mg_levels.size (); ++i)
+    delete mg_levels[i];
+
+  mg_levels.clear ();
+  delete mg_faces;
+  mg_faces = NULL;
+
+  std::vector<MGVertexDoFs> tmp;
+
+  std::swap (mg_vertex_dofs, tmp);
+
+  mg_number_cache.clear();
+}
+
+
+template<int dim, int spacedim>
+void DoFHandler<dim,spacedim>::initialize_local_block_info ()
+{
+  block_info_object.initialize_local(*this);
+}
+
+
+
+template<int dim, int spacedim>
+void DoFHandler<dim,spacedim>::clear ()
+{
+  // release lock to old fe
+  selected_fe = 0;
+
+  // release memory
+  clear_space ();
+  clear_mg_space ();
+}
+
+
+
+template <int dim, int spacedim>
+void
+DoFHandler<dim,spacedim>::renumber_dofs (const std::vector<types::global_dof_index> &new_numbers)
+{
+  Assert(levels.size()>0, ExcMessage("You need to distribute DoFs before you can renumber them."));
+
+  Assert (new_numbers.size() == n_locally_owned_dofs(),
+          ExcRenumberingIncomplete());
+
+#ifdef DEBUG
+  // assert that the new indices are
+  // consecutively numbered if we are
+  // working on a single
+  // processor. this doesn't need to
+  // hold in the case of a parallel
+  // mesh since we map the interval
+  // [0...n_dofs()) into itself but
+  // only globally, not on each
+  // processor
+  if (n_locally_owned_dofs() == n_dofs())
+    {
+      std::vector<types::global_dof_index> tmp(new_numbers);
+      std::sort (tmp.begin(), tmp.end());
+      std::vector<types::global_dof_index>::const_iterator p = tmp.begin();
+      types::global_dof_index i = 0;
+      for (; p!=tmp.end(); ++p, ++i)
+        Assert (*p == i, ExcNewNumbersNotConsecutive(i));
+    }
+  else
+    for (types::global_dof_index i=0; i<new_numbers.size(); ++i)
+      Assert (new_numbers[i] < n_dofs(),
+              ExcMessage ("New DoF index is not less than the total number of dofs."));
+#endif
+
+  policy->renumber_dofs (new_numbers, *this,number_cache);
+}
+
+
+template <int dim, int spacedim>
+void
+DoFHandler<dim,spacedim>::renumber_dofs (const unsigned int,
+                                         const std::vector<types::global_dof_index> &)
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template<>
+void DoFHandler<1>::renumber_dofs (const unsigned int level,
+                                   const std::vector<types::global_dof_index> &new_numbers)
+{
+  Assert(mg_levels.size()>0 && levels.size()>0,
+         ExcMessage("You need to distribute active and level DoFs before you can renumber level DoFs."));
+  Assert (new_numbers.size() == n_dofs(level), DoFHandler<1>::ExcRenumberingIncomplete());
+
+  // note that we can not use cell iterators
+  // in this function since then we would
+  // renumber the dofs on the interface of
+  // two cells more than once. Anyway, this
+  // ways it's not only more correct but also
+  // faster
+  for (std::vector<MGVertexDoFs>::iterator i=mg_vertex_dofs.begin();
+       i!=mg_vertex_dofs.end(); ++i)
+    // if the present vertex lives on
+    // the present level
+    if ((i->get_coarsest_level() <= level) &&
+        (i->get_finest_level() >= level))
+      for (unsigned int d=0; d<this->get_fe().dofs_per_vertex; ++d)
+        i->set_index (level, d, new_numbers[i->get_index (level, d)]);
+
+  for (std::vector<types::global_dof_index>::iterator i=mg_levels[level]->dof_object.dofs.begin();
+       i!=mg_levels[level]->dof_object.dofs.end(); ++i)
+    {
+      if (*i != DoFHandler<1>::invalid_dof_index)
+        {
+          Assert(*i<new_numbers.size(), ExcInternalError());
+          *i = new_numbers[*i];
+        }
+    }
+}
+
+
+
+template<>
+void DoFHandler<2>::renumber_dofs (const unsigned int  level,
+                                   const std::vector<types::global_dof_index>  &new_numbers)
+{
+  Assert(mg_levels.size()>0 && levels.size()>0,
+         ExcMessage("You need to distribute active and level DoFs before you can renumber level DoFs."));
+  Assert (new_numbers.size() == n_dofs(level),
+          DoFHandler<2>::ExcRenumberingIncomplete());
+
+  for (std::vector<MGVertexDoFs>::iterator i=mg_vertex_dofs.begin();
+       i!=mg_vertex_dofs.end(); ++i)
+    // if the present vertex lives on
+    // the present level
+    if ((i->get_coarsest_level() <= level) &&
+        (i->get_finest_level() >= level))
+      for (unsigned int d=0; d<this->get_fe().dofs_per_vertex; ++d)
+        i->set_index (level, d, new_numbers[i->get_index (level, d)]);
+
+  if (this->get_fe().dofs_per_line > 0)
+    {
+      // save user flags as they will be modified
+      std::vector<bool> user_flags;
+      this->get_triangulation().save_user_flags(user_flags);
+      const_cast<Triangulation<2> &>(this->get_triangulation()).clear_user_flags ();
+
+      // flag all lines adjacent to cells of the current
+      // level, as those lines logically belong to the same
+      // level as the cell, at least for for isotropic
+      // refinement
+      for (level_cell_iterator cell = begin(level); cell != end(level); ++cell)
+        for (unsigned int line=0; line < GeometryInfo<2>::faces_per_cell; ++line)
+          cell->face(line)->set_user_flag();
+
+      for (cell_iterator cell = begin(); cell != end(); ++cell)
+        for (unsigned int l=0; l<GeometryInfo<2>::lines_per_cell; ++l)
+          if (cell->line(l)->user_flag_set())
+            {
+              for (unsigned int d=0; d<this->get_fe().dofs_per_line; ++d)
+                cell->line(l)->set_mg_dof_index (level, d,
+                                                 new_numbers[cell->line(l)->mg_dof_index(level, d)]);
+              cell->line(l)->clear_user_flag();
+            }
+      // finally, restore user flags
+      const_cast<Triangulation<2> &>(this->get_triangulation()).load_user_flags (user_flags);
+    }
+
+  for (std::vector<types::global_dof_index>::iterator i=mg_levels[level]->dof_object.dofs.begin();
+       i!=mg_levels[level]->dof_object.dofs.end(); ++i)
+    {
+      if (*i != DoFHandler<2>::invalid_dof_index)
+        {
+          Assert(*i<new_numbers.size(), ExcInternalError());
+          *i = new_numbers[*i];
+        }
+    }
+}
+
+
+
+template<>
+void DoFHandler<3>::renumber_dofs (const unsigned int  level,
+                                   const std::vector<types::global_dof_index>  &new_numbers)
+{
+  Assert(mg_levels.size()>0 && levels.size()>0,
+         ExcMessage("You need to distribute active and level DoFs before you can renumber level DoFs."));
+  Assert (new_numbers.size() == n_dofs(level),
+          DoFHandler<3>::ExcRenumberingIncomplete());
+
+  for (std::vector<MGVertexDoFs>::iterator i=mg_vertex_dofs.begin();
+       i!=mg_vertex_dofs.end(); ++i)
+    // if the present vertex lives on
+    // the present level
+    if ((i->get_coarsest_level() <= level) &&
+        (i->get_finest_level() >= level))
+      for (unsigned int d=0; d<this->get_fe().dofs_per_vertex; ++d)
+        i->set_index (level, d, new_numbers[i->get_index (level, d)]);
+
+  // LINE DoFs
+  if (this->get_fe().dofs_per_line > 0)
+    {
+      // save user flags as they will be modified
+      std::vector<bool> user_flags;
+      this->get_triangulation().save_user_flags(user_flags);
+      const_cast<Triangulation<3> &>(this->get_triangulation()).clear_user_flags ();
+
+      // flag all lines adjacent to cells of the current
+      // level, as those lines logically belong to the same
+      // level as the cell, at least for for isotropic
+      // refinement
+      for (level_cell_iterator cell = begin(level) ; cell != end(level) ; ++cell)
+        for (unsigned int line=0; line < GeometryInfo<3>::lines_per_cell; ++line)
+          cell->line(line)->set_user_flag();
+
+
+      for (cell_iterator cell = begin(level); cell != end(level); ++cell)
+        for (unsigned int l=0; l<GeometryInfo<3>::lines_per_cell; ++l)
+          if (cell->line(l)->user_flag_set())
+            {
+              for (unsigned int d=0; d<this->get_fe().dofs_per_line; ++d)
+                cell->line(l)->set_mg_dof_index (level, d,
+                                                 new_numbers[cell->line(l)->mg_dof_index(level, d)]);
+              cell->line(l)->clear_user_flag();
+            }
+      // finally, restore user flags
+      const_cast<Triangulation<3> &>(this->get_triangulation()).load_user_flags (user_flags);
+    }
+
+  // QUAD DoFs
+  if (this->get_fe().dofs_per_quad > 0)
+    {
+      // save user flags as they will be modified
+      std::vector<bool> user_flags;
+      this->get_triangulation().save_user_flags(user_flags);
+      const_cast<Triangulation<3> &>(this->get_triangulation()).clear_user_flags ();
+
+      // flag all quads adjacent to cells of the current
+      // level, as those lines logically belong to the same
+      // level as the cell, at least for for isotropic
+      // refinement
+      for (level_cell_iterator cell = begin(level) ; cell != end(level); ++cell)
+        for (unsigned int quad=0; quad < GeometryInfo<3>::faces_per_cell; ++quad)
+          cell->face(quad)->set_user_flag();
+
+      for (cell_iterator cell = begin(level); cell != end(level); ++cell)
+        for (unsigned int q=0; q<GeometryInfo<3>::quads_per_cell; ++q)
+          if (cell->quad(q)->user_flag_set())
+            {
+              for (unsigned int d=0; d<this->get_fe().dofs_per_quad; ++d)
+                cell->quad(q)->set_mg_dof_index (level, d,
+                                                 new_numbers[cell->quad(q)->mg_dof_index(level, d)]);
+              cell->quad(q)->clear_user_flag();
+            }
+      // finally, restore user flags
+      const_cast<Triangulation<3> &>(this->get_triangulation()).load_user_flags (user_flags);
+    }
+
+  //HEX DoFs
+  for (std::vector<types::global_dof_index>::iterator i=mg_levels[level]->dof_object.dofs.begin();
+       i!=mg_levels[level]->dof_object.dofs.end(); ++i)
+    {
+      if (*i != DoFHandler<3>::invalid_dof_index)
+        {
+          Assert(*i<new_numbers.size(), ExcInternalError());
+          *i = new_numbers[*i];
+        }
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+unsigned int
+DoFHandler<dim,spacedim>::max_couplings_between_dofs () const
+{
+  return internal::DoFHandler::Implementation::max_couplings_between_dofs (*this);
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+DoFHandler<dim,spacedim>::max_couplings_between_boundary_dofs () const
+{
+  switch (dim)
+    {
+    case 1:
+      return get_fe().dofs_per_vertex;
+    case 2:
+      return (3*get_fe().dofs_per_vertex +
+              2*get_fe().dofs_per_line);
+    case 3:
+      // we need to take refinement of
+      // one boundary face into
+      // consideration here; in fact,
+      // this function returns what
+      // #max_coupling_between_dofs<2>
+      // returns
+      //
+      // we assume here, that only four
+      // faces meet at the boundary;
+      // this assumption is not
+      // justified and needs to be
+      // fixed some time. fortunately,
+      // omitting it for now does no
+      // harm since the matrix will cry
+      // foul if its requirements are
+      // not satisfied
+      return (19*get_fe().dofs_per_vertex +
+              28*get_fe().dofs_per_line +
+              8*get_fe().dofs_per_quad);
+    default:
+      Assert (false, ExcNotImplemented());
+      return numbers::invalid_unsigned_int;
+    }
+}
+
+
+
+template<int dim, int spacedim>
+void DoFHandler<dim,spacedim>::clear_space ()
+{
+  for (unsigned int i=0; i<levels.size(); ++i)
+    delete levels[i];
+  levels.resize (0);
+
+  delete faces;
+  faces = 0;
+
+  std::vector<types::global_dof_index> tmp;
+  std::swap (vertex_dofs, tmp);
+
+  number_cache.clear ();
+}
+
+template<int dim, int spacedim>
+template<int structdim>
+types::global_dof_index
+DoFHandler<dim, spacedim>::get_dof_index (
+  const unsigned int obj_level,
+  const unsigned int obj_index,
+  const unsigned int fe_index,
+  const unsigned int local_index) const
+{
+  return internal::DoFHandler::Implementation::get_dof_index (*this, *this->mg_levels[obj_level],
+                                                              *this->mg_faces, obj_index,
+                                                              fe_index, local_index,
+                                                              internal::int2type<structdim> ());
+}
+
+template<int dim, int spacedim>
+template<int structdim>
+void DoFHandler<dim, spacedim>::set_dof_index (const unsigned int obj_level, const unsigned int obj_index, const unsigned int fe_index, const unsigned int local_index, const types::global_dof_index global_index) const
+{
+  internal::DoFHandler::Implementation::set_dof_index (*this, *this->mg_levels[obj_level], *this->mg_faces, obj_index, fe_index, local_index, global_index, internal::int2type<structdim> ());
+}
+
+
+template<int dim, int spacedim>
+DoFHandler<dim, spacedim>::MGVertexDoFs::MGVertexDoFs (): coarsest_level (numbers::invalid_unsigned_int), finest_level (0), indices (0), indices_offset (0)
+{
+}
+
+
+template<int dim, int spacedim>
+DoFHandler<dim, spacedim>::MGVertexDoFs::~MGVertexDoFs ()
+{
+  delete[] indices;
+  delete[] indices_offset;
+}
+
+template<int dim, int spacedim>
+void DoFHandler<dim, spacedim>::MGVertexDoFs::init (const unsigned int cl, const unsigned int fl, const unsigned int dofs_per_vertex)
+{
+  if (indices != 0)
+    {
+      delete[] indices;
+      indices = 0;
+    }
+
+  if (indices_offset != 0)
+    {
+      delete[] indices_offset;
+      indices_offset = 0;
+    }
+
+  coarsest_level = cl;
+  finest_level = fl;
+
+  if (cl > fl)
+    return;
+
+  const unsigned int n_levels = finest_level - coarsest_level + 1;
+  const unsigned int n_indices = n_levels * dofs_per_vertex;
+
+  indices = new types::global_dof_index[n_indices];
+  Assert (indices != 0, ExcNoMemory ());
+
+  for (unsigned int i = 0; i < n_indices; ++i)
+    indices[i] = DoFHandler<dim, spacedim>::invalid_dof_index;
+
+  indices_offset = new types::global_dof_index[n_levels];
+  Assert (indices != 0, ExcNoMemory ());
+
+  for (unsigned int i = 0; i < n_levels; ++i)
+    indices_offset[i] = i * dofs_per_vertex;
+}
+
+template<int dim, int spacedim>
+unsigned int DoFHandler<dim, spacedim>::MGVertexDoFs::get_coarsest_level () const
+{
+  return coarsest_level;
+}
+
+template<int dim, int spacedim>
+unsigned int DoFHandler<dim, spacedim>::MGVertexDoFs::get_finest_level () const
+{
+  return finest_level;
+}
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "dof_handler.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_handler.inst.in b/source/dofs/dof_handler.inst.in
new file mode 100644
index 0000000..8780b8c
--- /dev/null
+++ b/source/dofs/dof_handler.inst.in
@@ -0,0 +1,164 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    namespace internal
+    \{
+      template const types::global_dof_index * dummy<deal_II_dimension,deal_II_dimension> ();
+      template std::string policy_to_string(const dealii::internal::DoFHandler::Policy::
+      PolicyBase<deal_II_dimension,deal_II_dimension> &);
+      
+#if deal_II_dimension < 3
+      template const types::global_dof_index * dummy<deal_II_dimension,deal_II_dimension+1> ();
+      template std::string policy_to_string(const dealii::internal::DoFHandler::Policy::
+      PolicyBase<deal_II_dimension,deal_II_dimension+1> &);
+#endif
+    \}
+
+
+    template class DoFHandler<deal_II_dimension>;
+
+#if deal_II_dimension < 3
+    template class DoFHandler<deal_II_dimension,deal_II_dimension+1>;
+#endif
+
+#if deal_II_dimension == 3
+    template class DoFHandler<1,deal_II_dimension>;
+    
+    template
+    types::global_dof_index
+    DoFHandler<1,3>::
+    get_dof_index<1> (const unsigned int       obj_level,
+		              const unsigned int       obj_index,
+		              const unsigned int       fe_index,
+		              const unsigned int       local_index) const;
+#endif
+
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension,deal_II_dimension>::
+get_dof_index<1> (const unsigned int       obj_level,
+		  const unsigned int       obj_index,
+		  const unsigned int       fe_index,
+		  const unsigned int       local_index) const;
+
+#if deal_II_dimension < 3
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension,deal_II_dimension+1>::
+get_dof_index<1> (const unsigned int       obj_level,
+		  const unsigned int       obj_index,
+		  const unsigned int       fe_index,
+		  const unsigned int       local_index) const;
+#endif
+
+#if deal_II_dimension >= 2
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension,deal_II_dimension>::
+get_dof_index<2> (const unsigned int       obj_level,
+		  const unsigned int       obj_index,
+		  const unsigned int       fe_index,
+		  const unsigned int       local_index) const;
+
+#if deal_II_dimension < 3
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension,deal_II_dimension+1>::
+get_dof_index<2> (const unsigned int       obj_level,
+		  const unsigned int       obj_index,
+		  const unsigned int       fe_index,
+		  const unsigned int       local_index) const;
+#endif
+
+#if deal_II_dimension >= 3
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension,deal_II_dimension>::
+get_dof_index<3> (const unsigned int       obj_level,
+		  const unsigned int       obj_index,
+		  const unsigned int       fe_index,
+		  const unsigned int       local_index) const;
+#endif
+#endif
+
+template
+void
+DoFHandler<deal_II_dimension,deal_II_dimension>::
+set_dof_index<1> (const unsigned int       obj_level,
+		  const unsigned int            obj_index,
+		  const unsigned int            fe_index,
+		  const unsigned int            local_index,
+		  const types::global_dof_index global_index) const;
+
+#if deal_II_dimension < 3
+template
+void
+DoFHandler<deal_II_dimension,deal_II_dimension+1>::
+set_dof_index<1> (const unsigned int       obj_level,
+		  const unsigned int            obj_index,
+		  const unsigned int            fe_index,
+		  const unsigned int            local_index,
+		  const types::global_dof_index global_index) const;
+#endif
+
+#if deal_II_dimension < 2
+template
+void
+DoFHandler<deal_II_dimension,deal_II_dimension+2>::
+set_dof_index<1> (const unsigned int       obj_level,
+		  const unsigned int            obj_index,
+		  const unsigned int            fe_index,
+		  const unsigned int            local_index,
+		  const types::global_dof_index global_index) const;
+#endif
+
+#if deal_II_dimension >= 2
+template
+void
+DoFHandler<deal_II_dimension,deal_II_dimension>::
+set_dof_index<2> (const unsigned int       obj_level,
+		  const unsigned int            obj_index,
+		  const unsigned int            fe_index,
+		  const unsigned int            local_index,
+		  const types::global_dof_index global_index) const;
+
+#if deal_II_dimension < 3
+template
+void
+DoFHandler<deal_II_dimension,deal_II_dimension+1>::
+set_dof_index<2> (const unsigned int       obj_level,
+		  const unsigned int            obj_index,
+		  const unsigned int            fe_index,
+		  const unsigned int            local_index,
+		  const types::global_dof_index global_index) const;
+#endif
+
+#if deal_II_dimension >= 3
+template
+void
+DoFHandler<deal_II_dimension,deal_II_dimension>::
+set_dof_index<3> (const unsigned int       obj_level,
+		  const unsigned int            obj_index,
+		  const unsigned int            fe_index,
+		  const unsigned int            local_index,
+		  const types::global_dof_index global_index) const;
+#endif
+#endif
+  }
+
diff --git a/source/dofs/dof_handler_policy.cc b/source/dofs/dof_handler_policy.cc
new file mode 100644
index 0000000..c50e6d2
--- /dev/null
+++ b/source/dofs/dof_handler_policy.cc
@@ -0,0 +1,2901 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+//TODO [TH]: renumber DoFs for multigrid is not done yet
+
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler_policy.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/distributed/shared_tria.h>
+#include <deal.II/distributed/tria.h>
+
+#include <set>
+#include <algorithm>
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    namespace Policy
+    {
+      // use class dealii::DoFHandler instead
+      // of namespace internal::DoFHandler in
+      // the following
+      using dealii::DoFHandler;
+
+      struct Implementation
+      {
+
+        /* -------------- distribute_dofs functionality ------------- */
+
+        /**
+         * Distribute dofs on the given cell,
+         * with new dofs starting with index
+         * @p next_free_dof. Return the next
+         * unused index number.
+         *
+         * This function is excluded from the
+         * @p distribute_dofs function since
+         * it can not be implemented dimension
+         * independent.
+         */
+        template <int spacedim>
+        static
+        types::global_dof_index
+        distribute_dofs_on_cell (const DoFHandler<1,spacedim> &dof_handler,
+                                 const typename DoFHandler<1,spacedim>::active_cell_iterator &cell,
+                                 types::global_dof_index next_free_dof)
+        {
+
+          // distribute dofs of vertices
+          if (dof_handler.get_fe().dofs_per_vertex > 0)
+            for (unsigned int v=0; v<GeometryInfo<1>::vertices_per_cell; ++v)
+              {
+                if (cell->vertex_dof_index (v,0) ==
+                    DoFHandler<1,spacedim>::invalid_dof_index)
+                  for (unsigned int d=0;
+                       d<dof_handler.get_fe().dofs_per_vertex; ++d)
+                    {
+                      Assert ((cell->vertex_dof_index (v,d) ==
+                               DoFHandler<1,spacedim>::invalid_dof_index),
+                              ExcInternalError());
+                      cell->set_vertex_dof_index (v, d, next_free_dof++);
+                    }
+                else
+                  for (unsigned int d=0;
+                       d<dof_handler.get_fe().dofs_per_vertex; ++d)
+                    Assert ((cell->vertex_dof_index (v,d) !=
+                             DoFHandler<1,spacedim>::invalid_dof_index),
+                            ExcInternalError());
+              }
+
+          // dofs of line
+          for (unsigned int d=0;
+               d<dof_handler.get_fe().dofs_per_line; ++d)
+            cell->set_dof_index (d, next_free_dof++);
+
+          // note that this cell has been
+          // processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+
+        template <int spacedim>
+        static
+        types::global_dof_index
+        distribute_dofs_on_cell (const DoFHandler<2,spacedim> &dof_handler,
+                                 const typename DoFHandler<2,spacedim>::active_cell_iterator &cell,
+                                 types::global_dof_index next_free_dof)
+        {
+          if (dof_handler.get_fe().dofs_per_vertex > 0)
+            // number dofs on vertices
+            for (unsigned int vertex=0; vertex<GeometryInfo<2>::vertices_per_cell; ++vertex)
+              // check whether dofs for this
+              // vertex have been distributed
+              // (only check the first dof)
+              if (cell->vertex_dof_index(vertex, 0) == DoFHandler<2,spacedim>::invalid_dof_index)
+                for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_vertex; ++d)
+                  cell->set_vertex_dof_index (vertex, d, next_free_dof++);
+
+          // for the four sides
+          if (dof_handler.get_fe().dofs_per_line > 0)
+            for (unsigned int side=0; side<GeometryInfo<2>::faces_per_cell; ++side)
+              {
+                const typename DoFHandler<2,spacedim>::line_iterator
+                line = cell->line(side);
+
+                // distribute dofs if necessary:
+                // check whether line dof is already
+                // numbered (check only first dof)
+                if (line->dof_index(0) == DoFHandler<2,spacedim>::invalid_dof_index)
+                  // if not: distribute dofs
+                  for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_line; ++d)
+                    line->set_dof_index (d, next_free_dof++);
+              }
+
+
+          // dofs of quad
+          if (dof_handler.get_fe().dofs_per_quad > 0)
+            for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_quad; ++d)
+              cell->set_dof_index (d, next_free_dof++);
+
+
+          // note that this cell has been processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        template <int spacedim>
+        static
+        types::global_dof_index
+        distribute_dofs_on_cell (const DoFHandler<3,spacedim> &dof_handler,
+                                 const typename DoFHandler<3,spacedim>::active_cell_iterator &cell,
+                                 types::global_dof_index next_free_dof)
+        {
+          if (dof_handler.get_fe().dofs_per_vertex > 0)
+            // number dofs on vertices
+            for (unsigned int vertex=0; vertex<GeometryInfo<3>::vertices_per_cell; ++vertex)
+              // check whether dofs for this
+              // vertex have been distributed
+              // (only check the first dof)
+              if (cell->vertex_dof_index(vertex, 0) == DoFHandler<3,spacedim>::invalid_dof_index)
+                for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_vertex; ++d)
+                  cell->set_vertex_dof_index (vertex, d, next_free_dof++);
+
+          // for the lines
+          if (dof_handler.get_fe().dofs_per_line > 0)
+            for (unsigned int l=0; l<GeometryInfo<3>::lines_per_cell; ++l)
+              {
+                const typename DoFHandler<3,spacedim>::line_iterator
+                line = cell->line(l);
+
+                // distribute dofs if necessary:
+                // check whether line dof is already
+                // numbered (check only first dof)
+                if (line->dof_index(0) == DoFHandler<3,spacedim>::invalid_dof_index)
+                  // if not: distribute dofs
+                  for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_line; ++d)
+                    line->set_dof_index (d, next_free_dof++);
+              }
+
+          // for the quads
+          if (dof_handler.get_fe().dofs_per_quad > 0)
+            for (unsigned int q=0; q<GeometryInfo<3>::quads_per_cell; ++q)
+              {
+                const typename DoFHandler<3,spacedim>::quad_iterator
+                quad = cell->quad(q);
+
+                // distribute dofs if necessary:
+                // check whether quad dof is already
+                // numbered (check only first dof)
+                if (quad->dof_index(0) == DoFHandler<3,spacedim>::invalid_dof_index)
+                  // if not: distribute dofs
+                  for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_quad; ++d)
+                    quad->set_dof_index (d, next_free_dof++);
+              }
+
+
+          // dofs of hex
+          if (dof_handler.get_fe().dofs_per_hex > 0)
+            for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_hex; ++d)
+              cell->set_dof_index (d, next_free_dof++);
+
+
+          // note that this cell has been
+          // processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        /**
+         * Distribute degrees of freedom on all cells, or on cells with the
+         * correct subdomain_id if the corresponding argument is not equal to
+         * numbers::invalid_subdomain_id. Return the total number of dofs
+         * distributed.
+         */
+        template <int dim, int spacedim>
+        static
+        types::global_dof_index
+        distribute_dofs (const types::global_dof_index offset,
+                         const types::subdomain_id subdomain_id,
+                         DoFHandler<dim,spacedim> &dof_handler)
+        {
+          const dealii::Triangulation<dim,spacedim> &tria
+            = dof_handler.get_triangulation();
+          Assert (tria.n_levels() > 0, ExcMessage("Empty triangulation"));
+
+          // Clear user flags because we will need them. But first we save
+          // them and make sure that we restore them later such that at the
+          // end of this function the Triangulation will be in the same state
+          // as it was at the beginning of this function.
+          std::vector<bool> user_flags;
+          tria.save_user_flags(user_flags);
+          const_cast<dealii::Triangulation<dim,spacedim> &>(tria).clear_user_flags ();
+
+          types::global_dof_index next_free_dof = offset;
+          typename DoFHandler<dim,spacedim>::active_cell_iterator
+          cell = dof_handler.begin_active(),
+          endc = dof_handler.end();
+
+          for (; cell != endc; ++cell)
+            if ((subdomain_id == numbers::invalid_subdomain_id)
+                ||
+                (cell->subdomain_id() == subdomain_id))
+              next_free_dof
+                = Implementation::distribute_dofs_on_cell (dof_handler,
+                                                           cell,
+                                                           next_free_dof);
+
+          // update the cache used for cell dof indices
+          for (cell = dof_handler.begin_active(); cell != endc; ++cell)
+            if (!cell->is_artificial())
+              cell->update_cell_dof_indices_cache ();
+
+          // finally restore the user flags
+          const_cast<dealii::Triangulation<dim,spacedim> &>(tria).load_user_flags(user_flags);
+
+          return next_free_dof;
+        }
+
+
+        /**
+         * Distribute dofs on the given
+         * cell, with new dofs starting
+         * with index
+         * @p next_free_dof. Return the
+         * next unused index number.
+         *
+         * This function is excluded from
+         * the @p distribute_dofs
+         * function since it can not be
+         * implemented dimension
+         * independent.
+         *
+         * Note that unlike for the usual
+         * dofs, here all cells and not
+         * only active ones are allowed.
+         */
+
+        // These three function
+        // have an unused
+        // DoFHandler object as
+        // their first
+        // argument. Without it,
+        // the file was not
+        // compileable under gcc
+        // 4.4.5 (Debian).
+        template <int spacedim>
+        static
+        unsigned int
+        distribute_mg_dofs_on_cell (const DoFHandler<1,spacedim> &,
+                                    typename DoFHandler<1,spacedim>::level_cell_iterator &cell,
+                                    unsigned int   next_free_dof)
+        {
+          const unsigned int dim = 1;
+
+          // distribute dofs of vertices
+          if (cell->get_fe().dofs_per_vertex > 0)
+            for (unsigned int v=0; v<GeometryInfo<1>::vertices_per_cell; ++v)
+              {
+                typename DoFHandler<dim,spacedim>::level_cell_iterator neighbor = cell->neighbor(v);
+
+                if (neighbor.state() == IteratorState::valid)
+                  {
+                    // has neighbor already been processed?
+                    if (neighbor->user_flag_set() &&
+                        (neighbor->level() == cell->level()))
+                      // copy dofs if the neighbor is on
+                      // the same level (only then are
+                      // mg dofs the same)
+                      {
+                        if (v==0)
+                          for (unsigned int d=0; d<cell->get_fe().dofs_per_vertex; ++d)
+                            cell->set_mg_vertex_dof_index (cell->level(), 0, d,
+                                                           neighbor->mg_vertex_dof_index (cell->level(), 1, d));
+                        else
+                          for (unsigned int d=0; d<cell->get_fe().dofs_per_vertex; ++d)
+                            cell->set_mg_vertex_dof_index (cell->level(), 1, d,
+                                                           neighbor->mg_vertex_dof_index (cell->level(), 0, d));
+
+                        // next neighbor
+                        continue;
+                      };
+                  };
+
+                // otherwise: create dofs newly
+                for (unsigned int d=0; d<cell->get_fe().dofs_per_vertex; ++d)
+                  cell->set_mg_vertex_dof_index (cell->level(), v, d, next_free_dof++);
+              };
+
+          // dofs of line
+          if (cell->get_fe().dofs_per_line > 0)
+            for (unsigned int d=0; d<cell->get_fe().dofs_per_line; ++d)
+              cell->set_mg_dof_index (cell->level(), d, next_free_dof++);
+
+          // note that this cell has been processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        template <int spacedim>
+        static
+        unsigned int
+        distribute_mg_dofs_on_cell (const DoFHandler<2,spacedim> &,
+                                    typename DoFHandler<2,spacedim>::level_cell_iterator &cell,
+                                    unsigned int   next_free_dof)
+        {
+          const unsigned int dim = 2;
+          if (cell->get_fe().dofs_per_vertex > 0)
+            // number dofs on vertices
+            for (unsigned int vertex=0; vertex<GeometryInfo<2>::vertices_per_cell; ++vertex)
+              // check whether dofs for this
+              // vertex have been distributed
+              // (only check the first dof)
+              if (cell->mg_vertex_dof_index(cell->level(), vertex, 0) == DoFHandler<2>::invalid_dof_index)
+                for (unsigned int d=0; d<cell->get_fe().dofs_per_vertex; ++d)
+                  cell->set_mg_vertex_dof_index (cell->level(), vertex, d, next_free_dof++);
+
+          // for the four sides
+          if (cell->get_fe().dofs_per_line > 0)
+            for (unsigned int side=0; side<GeometryInfo<2>::faces_per_cell; ++side)
+              {
+                typename DoFHandler<dim,spacedim>::line_iterator line = cell->line(side);
+
+                // distribute dofs if necessary:
+                // check whether line dof is already
+                // numbered (check only first dof)
+                if (line->mg_dof_index(cell->level(), 0) == DoFHandler<2>::invalid_dof_index)
+                  // if not: distribute dofs
+                  for (unsigned int d=0; d<cell->get_fe().dofs_per_line; ++d)
+                    line->set_mg_dof_index (cell->level(), d, next_free_dof++);
+              };
+
+
+          // dofs of quad
+          if (cell->get_fe().dofs_per_quad > 0)
+            for (unsigned int d=0; d<cell->get_fe().dofs_per_quad; ++d)
+              cell->set_mg_dof_index (cell->level(), d, next_free_dof++);
+
+
+          // note that this cell has been processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        template <int spacedim>
+        static
+        unsigned int
+        distribute_mg_dofs_on_cell (const DoFHandler<3,spacedim> &,
+                                    typename DoFHandler<3,spacedim>::level_cell_iterator &cell,
+                                    unsigned int   next_free_dof)
+        {
+          const unsigned int dim = 3;
+          if (cell->get_fe().dofs_per_vertex > 0)
+            // number dofs on vertices
+            for (unsigned int vertex=0; vertex<GeometryInfo<3>::vertices_per_cell; ++vertex)
+              // check whether dofs for this
+              // vertex have been distributed
+              // (only check the first dof)
+              if (cell->mg_vertex_dof_index(cell->level(), vertex, 0) == DoFHandler<3>::invalid_dof_index)
+                for (unsigned int d=0; d<cell->get_fe().dofs_per_vertex; ++d)
+                  cell->set_mg_vertex_dof_index (cell->level(), vertex, d, next_free_dof++);
+
+          // for the lines
+          if (cell->get_fe().dofs_per_line > 0)
+            for (unsigned int l=0; l<GeometryInfo<3>::lines_per_cell; ++l)
+              {
+                typename DoFHandler<dim,spacedim>::line_iterator line = cell->line(l);
+
+                // distribute dofs if necessary:
+                // check whether line dof is already
+                // numbered (check only first dof)
+                if (line->mg_dof_index(cell->level(), 0) == DoFHandler<3>::invalid_dof_index)
+                  // if not: distribute dofs
+                  for (unsigned int d=0; d<cell->get_fe().dofs_per_line; ++d)
+                    line->set_mg_dof_index (cell->level(), d, next_free_dof++);
+              };
+
+          // for the quads
+          if (cell->get_fe().dofs_per_quad > 0)
+            for (unsigned int q=0; q<GeometryInfo<3>::quads_per_cell; ++q)
+              {
+                typename DoFHandler<dim,spacedim>::quad_iterator quad = cell->quad(q);
+
+                // distribute dofs if necessary:
+                // check whether line dof is already
+                // numbered (check only first dof)
+                if (quad->mg_dof_index(cell->level(), 0) == DoFHandler<3>::invalid_dof_index)
+                  // if not: distribute dofs
+                  for (unsigned int d=0; d<cell->get_fe().dofs_per_quad; ++d)
+                    quad->set_mg_dof_index (cell->level(), d, next_free_dof++);
+              };
+
+
+          // dofs of cell
+          if (cell->get_fe().dofs_per_hex > 0)
+            for (unsigned int d=0; d<cell->get_fe().dofs_per_hex; ++d)
+              cell->set_mg_dof_index (cell->level(), d, next_free_dof++);
+
+
+          // note that this cell has
+          // been processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        template <int dim, int spacedim>
+        static
+        unsigned int
+        distribute_dofs_on_level (const unsigned int        offset,
+                                  const types::subdomain_id level_subdomain_id,
+                                  DoFHandler<dim,spacedim> &dof_handler,
+                                  const unsigned int level)
+        {
+          const dealii::Triangulation<dim,spacedim> &tria
+            = dof_handler.get_triangulation();
+          Assert (tria.n_levels() > 0, ExcMessage("Empty triangulation"));
+          if (level>=tria.n_levels())
+            return 0; //this is allowed for multigrid
+
+          // Clear user flags because we will
+          // need them. But first we save
+          // them and make sure that we
+          // restore them later such that at
+          // the end of this function the
+          // Triangulation will be in the
+          // same state as it was at the
+          // beginning of this function.
+          std::vector<bool> user_flags;
+          tria.save_user_flags(user_flags);
+          const_cast<dealii::Triangulation<dim,spacedim> &>(tria).clear_user_flags ();
+
+          unsigned int next_free_dof = offset;
+          typename DoFHandler<dim,spacedim>::level_cell_iterator
+          cell = dof_handler.begin(level),
+          endc = dof_handler.end(level);
+
+          for (; cell != endc; ++cell)
+            if ((level_subdomain_id == numbers::invalid_subdomain_id)
+                ||
+                (cell->level_subdomain_id() == level_subdomain_id))
+              next_free_dof
+                = Implementation::distribute_mg_dofs_on_cell (dof_handler, cell, next_free_dof);
+
+//                                               // update the cache used
+//                                               // for cell dof indices
+//              for (typename DoFHandler<dim,spacedim>::level_cell_iterator
+//                     cell = dof_handler.begin(); cell != dof_handler.end(); ++cell)
+//                if (cell->subdomain_id() != numbers::artificial_subdomain_id)
+//                  cell->update_cell_dof_indices_cache ();
+
+          // finally restore the user flags
+          const_cast<dealii::Triangulation<dim,spacedim> &>(tria).load_user_flags(user_flags);
+
+          return next_free_dof;
+        }
+
+
+        /* --------------------- renumber_dofs functionality ---------------- */
+
+
+        /**
+         * Implementation of the
+         * general template of same
+         * name.
+         *
+         * If the second argument
+         * has any elements set,
+         * elements of the then the
+         * vector of new numbers do
+         * not relate to the old
+         * DoF number but instead
+         * to the index of the old
+         * DoF number within the
+         * set of locally owned
+         * DoFs.
+         */
+        template <int spacedim>
+        static
+        void
+        renumber_dofs (const std::vector<types::global_dof_index> &new_numbers,
+                       const IndexSet &,
+                       DoFHandler<1,spacedim>          &dof_handler,
+                       const bool check_validity)
+        {
+          // note that we can not use cell
+          // iterators in this function since
+          // then we would renumber the dofs on
+          // the interface of two cells more
+          // than once. Anyway, this way it's
+          // not only more correct but also
+          // faster; note, however, that dof
+          // numbers may be invalid_dof_index,
+          // namely when the appropriate
+          // vertex/line/etc is unused
+          for (std::vector<types::global_dof_index>::iterator
+               i=dof_handler.vertex_dofs.begin();
+               i!=dof_handler.vertex_dofs.end(); ++i)
+            if (*i != DoFHandler<1,spacedim>::invalid_dof_index)
+              *i = new_numbers[*i];
+            else if (check_validity)
+              // if index is
+              // invalid_dof_index:
+              // check if this one
+              // really is unused
+              Assert (dof_handler.get_triangulation()
+                      .vertex_used((i-dof_handler.vertex_dofs.begin()) /
+                                   dof_handler.selected_fe->dofs_per_vertex)
+                      == false,
+                      ExcInternalError ());
+
+          for (unsigned int level=0; level<dof_handler.levels.size(); ++level)
+            for (std::vector<types::global_dof_index>::iterator
+                 i=dof_handler.levels[level]->dof_object.dofs.begin();
+                 i!=dof_handler.levels[level]->dof_object.dofs.end(); ++i)
+              if (*i != DoFHandler<1,spacedim>::invalid_dof_index)
+                *i = new_numbers[*i];
+
+          // update the cache
+          // used for cell dof
+          // indices
+          for (typename DoFHandler<1,spacedim>::level_cell_iterator
+               cell = dof_handler.begin();
+               cell != dof_handler.end(); ++cell)
+            cell->update_cell_dof_indices_cache ();
+        }
+
+        template <int spacedim>
+        static
+        void
+        renumber_mg_dofs (const std::vector<dealii::types::global_dof_index> &new_numbers,
+                          const IndexSet &indices,
+                          DoFHandler<1,spacedim>          &dof_handler,
+                          const unsigned int level,
+                          const bool check_validity)
+        {
+          for (typename std::vector<typename DoFHandler<1,spacedim>::MGVertexDoFs>::iterator
+               i=dof_handler.mg_vertex_dofs.begin();
+               i!=dof_handler.mg_vertex_dofs.end();
+               ++i)
+            // if the present vertex lives on
+            // the current level
+            if ((i->get_coarsest_level() <= level) &&
+                (i->get_finest_level() >= level))
+              for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_vertex; ++d)
+                {
+                  dealii::types::global_dof_index idx = i->get_index (level, d);
+                  if (idx != DoFHandler<1>::invalid_dof_index)
+                    i->set_index (level, d,
+                                  (indices.n_elements() == 0)?
+                                  (new_numbers[idx]) :
+                                  (new_numbers[indices.index_within_set(idx)]));
+
+                  if (check_validity)
+                    Assert(idx != DoFHandler<1>::invalid_dof_index, ExcInternalError ());
+                }
+
+
+          for (std::vector<types::global_dof_index>::iterator
+               i=dof_handler.mg_levels[level]->dof_object.dofs.begin();
+               i!=dof_handler.mg_levels[level]->dof_object.dofs.end();
+               ++i)
+            {
+              if (*i != DoFHandler<1>::invalid_dof_index)
+                {
+                  Assert(*i<new_numbers.size(), ExcInternalError());
+                  *i = (indices.n_elements() == 0)?
+                       (new_numbers[*i]) :
+                       (new_numbers[indices.index_within_set(*i)]);
+                }
+            }
+        }
+
+
+        template <int spacedim>
+        static
+        void
+        renumber_dofs (const std::vector<types::global_dof_index> &new_numbers,
+                       const IndexSet &indices,
+                       DoFHandler<2,spacedim> &dof_handler,
+                       const bool check_validity)
+        {
+          // note that we can not use cell
+          // iterators in this function since
+          // then we would renumber the dofs on
+          // the interface of two cells more
+          // than once. Anyway, this way it's
+          // not only more correct but also
+          // faster; note, however, that dof
+          // numbers may be invalid_dof_index,
+          // namely when the appropriate
+          // vertex/line/etc is unused
+          for (std::vector<types::global_dof_index>::iterator
+               i=dof_handler.vertex_dofs.begin();
+               i!=dof_handler.vertex_dofs.end(); ++i)
+            if (*i != DoFHandler<2,spacedim>::invalid_dof_index)
+              *i = (indices.n_elements() == 0)?
+                   (new_numbers[*i]) :
+                   (new_numbers[indices.index_within_set(*i)]);
+            else if (check_validity)
+              // if index is invalid_dof_index:
+              // check if this one really is
+              // unused
+              Assert (dof_handler.get_triangulation()
+                      .vertex_used((i-dof_handler.vertex_dofs.begin()) /
+                                   dof_handler.selected_fe->dofs_per_vertex)
+                      == false,
+                      ExcInternalError ());
+
+          for (std::vector<types::global_dof_index>::iterator
+               i=dof_handler.faces->lines.dofs.begin();
+               i!=dof_handler.faces->lines.dofs.end(); ++i)
+            if (*i != DoFHandler<2,spacedim>::invalid_dof_index)
+              *i = ((indices.n_elements() == 0) ?
+                    new_numbers[*i] :
+                    new_numbers[indices.index_within_set(*i)]);
+
+          for (unsigned int level=0; level<dof_handler.levels.size(); ++level)
+            {
+              for (std::vector<types::global_dof_index>::iterator
+                   i=dof_handler.levels[level]->dof_object.dofs.begin();
+                   i!=dof_handler.levels[level]->dof_object.dofs.end(); ++i)
+                if (*i != DoFHandler<2,spacedim>::invalid_dof_index)
+                  *i = ((indices.n_elements() == 0) ?
+                        new_numbers[*i] :
+                        new_numbers[indices.index_within_set(*i)]);
+            }
+
+          // update the cache
+          // used for cell dof
+          // indices
+          for (typename DoFHandler<2,spacedim>::level_cell_iterator
+               cell = dof_handler.begin();
+               cell != dof_handler.end(); ++cell)
+            cell->update_cell_dof_indices_cache ();
+        }
+
+        template <int spacedim>
+        static
+        void
+        renumber_mg_dofs (const std::vector<dealii::types::global_dof_index> &new_numbers,
+                          const IndexSet &indices,
+                          DoFHandler<2,spacedim>          &dof_handler,
+                          const unsigned int level,
+                          const bool check_validity)
+        {
+          if (level>=dof_handler.get_triangulation().n_levels())
+            return;
+          for (typename std::vector<typename DoFHandler<2,spacedim>::MGVertexDoFs>::iterator i=dof_handler.mg_vertex_dofs.begin();
+               i!=dof_handler.mg_vertex_dofs.end(); ++i)
+            // if the present vertex lives on
+            // the present level
+            if ((i->get_coarsest_level() <= level) &&
+                (i->get_finest_level() >= level))
+              for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_vertex; ++d)
+                {
+                  dealii::types::global_dof_index idx =i->get_index (level, d/*,                    dof_handler.get_fe().dofs_per_vertex*/);
+                  if (idx != DoFHandler<1>::invalid_dof_index)
+                    i->set_index (level, d/*, dof_handler.get_fe().dofs_per_vertex*/,
+                                  ((indices.n_elements() == 0) ?
+                                   new_numbers[idx] :
+                                   new_numbers[indices.index_within_set(idx)]));
+
+                  if (check_validity)
+                    Assert(idx != DoFHandler<2>::invalid_dof_index, ExcInternalError ());
+                }
+
+          if (dof_handler.get_fe().dofs_per_line > 0)
+            {
+              // save user flags as they will be modified
+              std::vector<bool> user_flags;
+              dof_handler.get_triangulation().save_user_flags(user_flags);
+              const_cast<dealii::Triangulation<2,spacedim> &>(dof_handler.get_triangulation()).clear_user_flags ();
+
+              // flag all lines adjacent to cells of the current
+              // level, as those lines logically belong to the same
+              // level as the cell, at least for for isotropic
+              // refinement
+              for (typename DoFHandler<2,spacedim>::level_cell_iterator cell = dof_handler.begin(level);
+                   cell != dof_handler.end(level); ++cell)
+                for (unsigned int line=0; line < GeometryInfo<2>::faces_per_cell; ++line)
+                  cell->face(line)->set_user_flag();
+
+              for (typename DoFHandler<2,spacedim>::cell_iterator cell = dof_handler.begin();
+                   cell != dof_handler.end(); ++cell)
+                for (unsigned int l=0; l<GeometryInfo<2>::lines_per_cell; ++l)
+                  if (cell->line(l)->user_flag_set())
+                    {
+                      for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_line; ++d)
+                        {
+                          dealii::types::global_dof_index idx = cell->line(l)->mg_dof_index(level, d);
+                          if (idx != DoFHandler<1>::invalid_dof_index)
+                            cell->line(l)->set_mg_dof_index (level, d, ((indices.n_elements() == 0) ?
+                                                                        new_numbers[idx] :
+                                                                        new_numbers[indices.index_within_set(idx)]));
+                          if (check_validity)
+                            Assert(idx != DoFHandler<2>::invalid_dof_index, ExcInternalError ());
+                        }
+                      cell->line(l)->clear_user_flag();
+                    }
+              // finally, restore user flags
+              const_cast<dealii::Triangulation<2,spacedim> &>(dof_handler.get_triangulation()).load_user_flags (user_flags);
+            }
+
+          for (std::vector<types::global_dof_index>::iterator i=dof_handler.mg_levels[level]->dof_object.dofs.begin();
+               i!=dof_handler.mg_levels[level]->dof_object.dofs.end(); ++i)
+            {
+              if (*i != DoFHandler<2>::invalid_dof_index)
+                {
+                  Assert(*i<new_numbers.size(), ExcInternalError());
+                  *i = ((indices.n_elements() == 0) ?
+                        new_numbers[*i] :
+                        new_numbers[indices.index_within_set(*i)]);
+                }
+            }
+
+        }
+
+
+        template <int spacedim>
+        static
+        void
+        renumber_dofs (const std::vector<types::global_dof_index> &new_numbers,
+                       const IndexSet &indices,
+                       DoFHandler<3,spacedim>          &dof_handler,
+                       const bool check_validity)
+        {
+          // note that we can not use cell
+          // iterators in this function since
+          // then we would renumber the dofs on
+          // the interface of two cells more
+          // than once. Anyway, this way it's
+          // not only more correct but also
+          // faster; note, however, that dof
+          // numbers may be invalid_dof_index,
+          // namely when the appropriate
+          // vertex/line/etc is unused
+          for (std::vector<types::global_dof_index>::iterator
+               i=dof_handler.vertex_dofs.begin();
+               i!=dof_handler.vertex_dofs.end(); ++i)
+            if (*i != DoFHandler<3,spacedim>::invalid_dof_index)
+              *i = ((indices.n_elements() == 0) ?
+                    new_numbers[*i] :
+                    new_numbers[indices.index_within_set(*i)]);
+            else if (check_validity)
+              // if index is invalid_dof_index:
+              // check if this one really is
+              // unused
+              Assert (dof_handler.get_triangulation()
+                      .vertex_used((i-dof_handler.vertex_dofs.begin()) /
+                                   dof_handler.selected_fe->dofs_per_vertex)
+                      == false,
+                      ExcInternalError ());
+
+          for (std::vector<types::global_dof_index>::iterator
+               i=dof_handler.faces->lines.dofs.begin();
+               i!=dof_handler.faces->lines.dofs.end(); ++i)
+            if (*i != DoFHandler<3,spacedim>::invalid_dof_index)
+              *i = ((indices.n_elements() == 0) ?
+                    new_numbers[*i] :
+                    new_numbers[indices.index_within_set(*i)]);
+          for (std::vector<types::global_dof_index>::iterator
+               i=dof_handler.faces->quads.dofs.begin();
+               i!=dof_handler.faces->quads.dofs.end(); ++i)
+            if (*i != DoFHandler<3,spacedim>::invalid_dof_index)
+              *i = ((indices.n_elements() == 0) ?
+                    new_numbers[*i] :
+                    new_numbers[indices.index_within_set(*i)]);
+
+          for (unsigned int level=0; level<dof_handler.levels.size(); ++level)
+            {
+              for (std::vector<types::global_dof_index>::iterator
+                   i=dof_handler.levels[level]->dof_object.dofs.begin();
+                   i!=dof_handler.levels[level]->dof_object.dofs.end(); ++i)
+                if (*i != DoFHandler<3,spacedim>::invalid_dof_index)
+                  *i = ((indices.n_elements() == 0) ?
+                        new_numbers[*i] :
+                        new_numbers[indices.index_within_set(*i)]);
+            }
+
+          // update the cache
+          // used for cell dof
+          // indices
+          for (typename DoFHandler<3,spacedim>::level_cell_iterator
+               cell = dof_handler.begin();
+               cell != dof_handler.end(); ++cell)
+            cell->update_cell_dof_indices_cache ();
+        }
+
+        template <int spacedim>
+        static
+        void
+        renumber_mg_dofs (const std::vector<dealii::types::global_dof_index> &new_numbers,
+                          const IndexSet &indices,
+                          DoFHandler<3,spacedim> &dof_handler,
+                          const unsigned int level,
+                          const bool check_validity)
+        {
+          if (level>=dof_handler.get_triangulation().n_levels())
+            return;
+          for (typename std::vector<typename DoFHandler<3,spacedim>::MGVertexDoFs>::iterator i=dof_handler.mg_vertex_dofs.begin();
+               i!=dof_handler.mg_vertex_dofs.end(); ++i)
+            // if the present vertex lives on
+            // the present level
+            if ((i->get_coarsest_level() <= level) &&
+                (i->get_finest_level() >= level))
+              for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_vertex; ++d)
+                {
+                  dealii::types::global_dof_index idx =i->get_index (level, d);
+                  if (idx != DoFHandler<3>::invalid_dof_index)
+                    i->set_index (level, d,
+                                  ((indices.n_elements() == 0) ?
+                                   new_numbers[idx] :
+                                   new_numbers[indices.index_within_set(idx)]));
+
+                  if (check_validity)
+                    Assert(idx != DoFHandler<3>::invalid_dof_index, ExcInternalError ());
+                }
+
+          if (dof_handler.get_fe().dofs_per_line > 0 ||
+              dof_handler.get_fe().dofs_per_quad > 0)
+            {
+              // save user flags as they will be modified
+              std::vector<bool> user_flags;
+              dof_handler.get_triangulation().save_user_flags(user_flags);
+              const_cast<dealii::Triangulation<3,spacedim> &>(dof_handler.get_triangulation()).clear_user_flags ();
+
+              // flag all lines adjacent to cells of the current level, as
+              // those lines logically belong to the same level as the cell,
+              // at least for isotropic refinement
+              for (typename DoFHandler<3,spacedim>::level_cell_iterator cell = dof_handler.begin(level);
+                   cell != dof_handler.end(level); ++cell)
+                for (unsigned int line=0; line < GeometryInfo<3>::lines_per_cell; ++line)
+                  cell->line(line)->set_user_flag();
+
+              for (typename DoFHandler<3,spacedim>::cell_iterator cell = dof_handler.begin();
+                   cell != dof_handler.end(); ++cell)
+                for (unsigned int l=0; l<GeometryInfo<3>::lines_per_cell; ++l)
+                  if (cell->line(l)->user_flag_set())
+                    {
+                      for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_line; ++d)
+                        {
+                          dealii::types::global_dof_index idx = cell->line(l)->mg_dof_index(level, d);
+                          if (idx != DoFHandler<3>::invalid_dof_index)
+                            cell->line(l)->set_mg_dof_index (level, d, ((indices.n_elements() == 0) ?
+                                                                        new_numbers[idx] :
+                                                                        new_numbers[indices.index_within_set(idx)]));
+                          if (check_validity)
+                            Assert(idx != DoFHandler<3>::invalid_dof_index, ExcInternalError ());
+                        }
+                      cell->line(l)->clear_user_flag();
+                    }
+
+              // flag all quads adjacent to cells of the current level, as
+              // those quads logically belong to the same level as the cell,
+              // at least for isotropic refinement
+              for (typename DoFHandler<3,spacedim>::level_cell_iterator cell = dof_handler.begin(level);
+                   cell != dof_handler.end(level); ++cell)
+                for (unsigned int quad=0; quad < GeometryInfo<3>::quads_per_cell; ++quad)
+                  cell->quad(quad)->set_user_flag();
+
+              for (typename DoFHandler<3,spacedim>::cell_iterator cell = dof_handler.begin();
+                   cell != dof_handler.end(); ++cell)
+                for (unsigned int l=0; l<GeometryInfo<3>::quads_per_cell; ++l)
+                  if (cell->quad(l)->user_flag_set())
+                    {
+                      for (unsigned int d=0; d<dof_handler.get_fe().dofs_per_quad; ++d)
+                        {
+                          dealii::types::global_dof_index idx = cell->quad(l)->mg_dof_index(level, d);
+                          if (idx != DoFHandler<1>::invalid_dof_index)
+                            cell->quad(l)->set_mg_dof_index (level, d, ((indices.n_elements() == 0) ?
+                                                                        new_numbers[idx] :
+                                                                        new_numbers[indices.index_within_set(idx)]));
+                          if (check_validity)
+                            Assert(idx != DoFHandler<3>::invalid_dof_index, ExcInternalError ());
+                        }
+                      cell->quad(l)->clear_user_flag();
+                    }
+
+              // finally, restore user flags
+              const_cast<dealii::Triangulation<3,spacedim> &>(dof_handler.get_triangulation()).load_user_flags (user_flags);
+            }
+
+          for (std::vector<types::global_dof_index>::iterator i=dof_handler.mg_levels[level]->dof_object.dofs.begin();
+               i!=dof_handler.mg_levels[level]->dof_object.dofs.end(); ++i)
+            {
+              if (*i != DoFHandler<3>::invalid_dof_index)
+                {
+                  Assert(*i<new_numbers.size(), ExcInternalError());
+                  *i = ((indices.n_elements() == 0) ?
+                        new_numbers[*i] :
+                        new_numbers[indices.index_within_set(*i)]);
+                }
+            }
+        }
+
+
+      };
+
+
+
+      /* --------------------- class PolicyBase ---------------- */
+
+      template <int dim, int spacedim>
+      PolicyBase<dim,spacedim>::~PolicyBase ()
+      {}
+
+
+      /* --------------------- class Sequential ---------------- */
+
+
+      template <int dim, int spacedim>
+      void
+      Sequential<dim,spacedim>::
+      distribute_dofs (DoFHandler<dim,spacedim> &dof_handler,
+                       NumberCache &number_cache_current ) const
+      {
+        const types::global_dof_index n_dofs =
+          Implementation::distribute_dofs (0,
+                                           numbers::invalid_subdomain_id,
+                                           dof_handler);
+
+        // now set the elements of the
+        // number cache appropriately
+        NumberCache number_cache;
+        number_cache.n_global_dofs        = n_dofs;
+        number_cache.n_locally_owned_dofs = number_cache.n_global_dofs;
+
+        number_cache.locally_owned_dofs
+          = IndexSet (number_cache.n_global_dofs);
+        number_cache.locally_owned_dofs.add_range (0,
+                                                   number_cache.n_global_dofs);
+        number_cache.locally_owned_dofs.compress();
+
+        number_cache.n_locally_owned_dofs_per_processor
+          = std::vector<types::global_dof_index> (1,
+                                                  number_cache.n_global_dofs);
+
+        number_cache.locally_owned_dofs_per_processor
+          = std::vector<IndexSet> (1,
+                                   number_cache.locally_owned_dofs);
+        number_cache_current = number_cache;
+      }
+
+
+      template <int dim, int spacedim>
+      void
+      Sequential<dim,spacedim>::
+      distribute_mg_dofs (DoFHandler<dim,spacedim> &dof_handler,
+                          std::vector<NumberCache> &number_caches) const
+      {
+        std::vector<bool> user_flags;
+
+        dof_handler.get_triangulation().save_user_flags (user_flags);
+        const_cast<dealii::Triangulation<dim, spacedim>&>(dof_handler.get_triangulation()).clear_user_flags ();
+
+        for (unsigned int level = 0; level < dof_handler.get_triangulation().n_levels(); ++level)
+          {
+            types::global_dof_index next_free_dof = Implementation::distribute_dofs_on_level(0, numbers::invalid_subdomain_id, dof_handler, level);
+
+            number_caches[level].n_global_dofs = next_free_dof;
+            number_caches[level].n_locally_owned_dofs = next_free_dof;
+            number_caches[level].locally_owned_dofs = complete_index_set(next_free_dof);
+            number_caches[level].locally_owned_dofs_per_processor.resize(1);
+            number_caches[level].locally_owned_dofs_per_processor[0] = complete_index_set(next_free_dof);
+            number_caches[level].n_locally_owned_dofs_per_processor.resize(1);
+            number_caches[level].n_locally_owned_dofs_per_processor[0] = next_free_dof;
+          }
+        const_cast<dealii::Triangulation<dim, spacedim>&>(dof_handler.get_triangulation()).load_user_flags (user_flags);
+      }
+
+      template <int dim, int spacedim>
+      void
+      Sequential<dim,spacedim>::
+      renumber_dofs (const std::vector<types::global_dof_index> &new_numbers,
+                     dealii::DoFHandler<dim,spacedim> &dof_handler,
+                     NumberCache &number_cache_current) const
+      {
+        Implementation::renumber_dofs (new_numbers, IndexSet(0),
+                                       dof_handler, true);
+
+        // in the sequential case,
+        // the number cache should
+        // not have changed but we
+        // have to set the elements
+        // of the structure
+        // appropriately anyway
+        NumberCache number_cache;
+        number_cache.n_global_dofs        = dof_handler.n_dofs();
+        number_cache.n_locally_owned_dofs = number_cache.n_global_dofs;
+
+        number_cache.locally_owned_dofs
+          = IndexSet (number_cache.n_global_dofs);
+        number_cache.locally_owned_dofs.add_range (0,
+                                                   number_cache.n_global_dofs);
+        number_cache.locally_owned_dofs.compress();
+
+        number_cache.n_locally_owned_dofs_per_processor
+          = std::vector<types::global_dof_index> (1,
+                                                  number_cache.n_global_dofs);
+
+        number_cache.locally_owned_dofs_per_processor
+          = std::vector<IndexSet> (1,
+                                   number_cache.locally_owned_dofs);
+        number_cache_current = number_cache;
+      }
+
+      /* --------------------- class ParallelShared ---------------- */
+
+      template <int dim, int spacedim>
+      void
+      ParallelShared<dim,spacedim>::
+      distribute_dofs (DoFHandler<dim,spacedim> &dof_handler,
+                       NumberCache &number_cache) const
+      {
+        // If the underlying shared::Tria allows artifical cells, we need to do
+        // some tricks here to make Sequential algorithms play nicely.
+        // Namely, we first restore original partition (without artificial cells)
+        // and then turn artificial cells on at the end of this function.
+        const parallel::shared::Triangulation<dim, spacedim> *tr =
+          (dynamic_cast<const parallel::shared::Triangulation<dim, spacedim>*> (&dof_handler.get_triangulation()));
+        Assert(tr != 0, ExcInternalError());
+        typename parallel::shared::Triangulation<dim,spacedim>::active_cell_iterator
+        cell = dof_handler.get_triangulation().begin_active(),
+        endc = dof_handler.get_triangulation().end();
+        std::vector<types::subdomain_id> current_subdomain_ids(tr->n_active_cells());
+        const std::vector<types::subdomain_id> &true_subdomain_ids = tr->get_true_subdomain_ids_of_cells();
+        if (tr->with_artificial_cells())
+          for (unsigned int index=0; cell != endc; cell++, index++)
+            {
+              current_subdomain_ids[index] = cell->subdomain_id();
+              cell->set_subdomain_id(true_subdomain_ids[index]);
+            }
+
+        Sequential<dim,spacedim>::distribute_dofs (dof_handler,number_cache);
+        DoFRenumbering::subdomain_wise (dof_handler);
+        // dofrenumbering will reset subdomains, this is ugly but we need to do it again:
+        cell = tr->begin_active();
+        if (tr->with_artificial_cells())
+          for (unsigned int index=0; cell != endc; cell++, index++)
+            cell->set_subdomain_id(true_subdomain_ids[index]);
+
+        number_cache.locally_owned_dofs_per_processor = DoFTools::locally_owned_dofs_per_subdomain (dof_handler);
+        number_cache.locally_owned_dofs = number_cache.locally_owned_dofs_per_processor[dof_handler.get_triangulation().locally_owned_subdomain()];
+        number_cache.n_locally_owned_dofs_per_processor.resize (number_cache.locally_owned_dofs_per_processor.size());
+        for (unsigned int i = 0; i < number_cache.n_locally_owned_dofs_per_processor.size(); i++)
+          number_cache.n_locally_owned_dofs_per_processor[i] = number_cache.locally_owned_dofs_per_processor[i].n_elements();
+        number_cache.n_locally_owned_dofs = number_cache.n_locally_owned_dofs_per_processor[dof_handler.get_triangulation().locally_owned_subdomain()];
+
+        // restore current subdomain ids
+        cell = tr->begin_active();
+        if (tr->with_artificial_cells())
+          for (unsigned int index=0; cell != endc; cell++, index++)
+            cell->set_subdomain_id(current_subdomain_ids[index]);
+      }
+
+      template <int dim, int spacedim>
+      void
+      ParallelShared<dim,spacedim>::
+      distribute_mg_dofs (DoFHandler<dim,spacedim> &dof_handler,
+                          std::vector<NumberCache> &number_caches) const
+      {
+        // first, call the sequential function to distribute dofs
+        Sequential<dim,spacedim>:: distribute_mg_dofs (dof_handler, number_caches);
+        // now we need to update the number cache.
+        // This part is not yet implemented.
+        AssertThrow(false,ExcNotImplemented());
+      }
+
+      template <int dim, int spacedim>
+      void
+      ParallelShared<dim,spacedim>::
+      renumber_dofs (const std::vector<types::global_dof_index> &new_numbers,
+                     dealii::DoFHandler<dim,spacedim> &dof_handler,
+                     NumberCache &number_cache) const
+      {
+
+#ifndef DEAL_II_WITH_MPI
+        (void)new_numbers;
+        (void)dof_handler;
+        (void)number_cache;
+        Assert (false, ExcNotImplemented());
+#else
+        // Similar to distribute_dofs() we need to have a special treatment in
+        // case artificial cells are present.
+        const parallel::shared::Triangulation<dim, spacedim> *tr =
+          (dynamic_cast<const parallel::shared::Triangulation<dim, spacedim>*> (&dof_handler.get_triangulation()));
+        Assert(tr != 0, ExcInternalError());
+        typename parallel::shared::Triangulation<dim,spacedim>::active_cell_iterator
+        cell = dof_handler.get_triangulation().begin_active(),
+        endc = dof_handler.get_triangulation().end();
+        std::vector<types::subdomain_id> current_subdomain_ids(tr->n_active_cells());
+        const std::vector<types::subdomain_id> &true_subdomain_ids = tr->get_true_subdomain_ids_of_cells();
+        if (tr->with_artificial_cells())
+          for (unsigned int index=0; cell != endc; cell++, index++)
+            {
+              current_subdomain_ids[index] = cell->subdomain_id();
+              cell->set_subdomain_id(true_subdomain_ids[index]);
+            }
+
+        std::vector<types::global_dof_index> global_gathered_numbers (dof_handler.n_dofs (), 0);
+        // as we call DoFRenumbering::subdomain_wise (dof_handler) from distribute_dofs(),
+        // we need to support sequential-like input.
+        // Distributed-like input from, for example, component_wise renumbering is also supported.
+        if (new_numbers.size () == dof_handler.n_dofs ())
+          {
+            global_gathered_numbers = new_numbers;
+          }
+        else
+          {
+            Assert(new_numbers.size() == dof_handler.locally_owned_dofs().n_elements(),
+                   ExcInternalError());
+            const unsigned int n_cpu = Utilities::MPI::n_mpi_processes (tr->get_communicator ());
+            std::vector<types::global_dof_index> gathered_new_numbers (dof_handler.n_dofs (), 0);
+            Assert(Utilities::MPI::this_mpi_process (tr->get_communicator ()) ==
+                   dof_handler.get_triangulation().locally_owned_subdomain (),
+                   ExcInternalError())
+
+            //gather new numbers among processors into one vector
+            {
+              std::vector<types::global_dof_index> new_numbers_copy (new_numbers);
+              // displs:
+              // Entry i specifies the displacement (relative to recvbuf )
+              // at which to place the incoming data from process i
+              // rcounts:
+              // containing the number of elements that are to be received from each process
+              std::vector<int> displs(n_cpu),
+                  rcounts(n_cpu);
+              types::global_dof_index shift = 0;
+              //set rcounts based on new_numbers:
+              int cur_count = new_numbers_copy.size ();
+              MPI_Allgather (&cur_count,  1, MPI_INT,
+                             &rcounts[0], 1, MPI_INT,
+                             tr->get_communicator ());
+
+              for (unsigned int i = 0; i < n_cpu; i++)
+                {
+                  displs[i]  = shift;
+                  shift     += rcounts[i];
+                }
+              Assert(((int)new_numbers_copy.size()) ==
+                     rcounts[Utilities::MPI::this_mpi_process (tr->get_communicator ())],
+                     ExcInternalError());
+              MPI_Allgatherv (&new_numbers_copy[0],     new_numbers_copy.size (),
+                              DEAL_II_DOF_INDEX_MPI_TYPE,
+                              &gathered_new_numbers[0], &rcounts[0],
+                              &displs[0],
+                              DEAL_II_DOF_INDEX_MPI_TYPE,
+                              tr->get_communicator ());
+            }
+
+            // put new numbers according to the current locally_owned_dofs_per_processor IndexSets
+            types::global_dof_index shift = 0;
+            // flag_1 and flag_2 are
+            // used to control that there is a
+            // one-to-one relation between old and new DoFs.
+            std::vector<unsigned int> flag_1 (dof_handler.n_dofs (), 0),
+                flag_2 (dof_handler.n_dofs (), 0);
+            for (unsigned int i = 0; i < n_cpu; i++)
+              {
+                const IndexSet &iset =
+                  number_cache.locally_owned_dofs_per_processor[i];
+                for (types::global_dof_index ind = 0;
+                     ind < iset.n_elements (); ind++)
+                  {
+                    const types::global_dof_index target = iset.nth_index_in_set (ind);
+                    const types::global_dof_index value  = gathered_new_numbers[shift + ind];
+                    Assert(target < dof_handler.n_dofs(), ExcInternalError());
+                    Assert(value  < dof_handler.n_dofs(), ExcInternalError());
+                    global_gathered_numbers[target] = value;
+                    flag_1[target]++;
+                    flag_2[value]++;
+                  }
+                shift += iset.n_elements ();
+              }
+
+            Assert(*std::max_element(flag_1.begin(), flag_1.end()) == 1,
+                   ExcInternalError());
+            Assert(*std::min_element(flag_1.begin(), flag_1.end()) == 1,
+                   ExcInternalError());
+            Assert((*std::max_element(flag_2.begin(), flag_2.end())) == 1,
+                   ExcInternalError());
+            Assert((*std::min_element(flag_2.begin(), flag_2.end())) == 1,
+                   ExcInternalError());
+          }
+        Sequential<dim, spacedim>::renumber_dofs (global_gathered_numbers, dof_handler, number_cache);
+        // correct number_cache:
+        number_cache.locally_owned_dofs_per_processor =
+          DoFTools::locally_owned_dofs_per_subdomain (dof_handler);
+        number_cache.locally_owned_dofs =
+          number_cache.locally_owned_dofs_per_processor[dof_handler.get_triangulation().locally_owned_subdomain ()];
+        // sequential renumbering returns a vector of size 1 here,
+        // correct this:
+        number_cache.n_locally_owned_dofs_per_processor.resize(number_cache.locally_owned_dofs_per_processor.size());
+        for (unsigned int i = 0;
+             i < number_cache.n_locally_owned_dofs_per_processor.size (); i++)
+          number_cache.n_locally_owned_dofs_per_processor[i] = number_cache.locally_owned_dofs_per_processor[i].n_elements ();
+
+        number_cache.n_locally_owned_dofs =
+          number_cache.n_locally_owned_dofs_per_processor[dof_handler.get_triangulation().locally_owned_subdomain ()];
+
+        // restore artificial cells
+        cell = tr->begin_active();
+        if (tr->with_artificial_cells())
+          for (unsigned int index=0; cell != endc; cell++, index++)
+            cell->set_subdomain_id(current_subdomain_ids[index]);
+#endif
+      }
+
+      /* --------------------- class ParallelDistributed ---------------- */
+
+#ifdef DEAL_II_WITH_P4EST
+
+      namespace
+      {
+        template <int dim>
+        struct types
+        {
+
+          /**
+           * A list of tree+quadrant and
+           * their dof indices. dofs is of
+           * the form num_dofindices of
+           * quadrant 0, followed by
+           * num_dofindices indices,
+           * num_dofindices of quadrant 1,
+           * ...
+           */
+          struct cellinfo
+          {
+            std::vector<unsigned int> tree_index;
+            std::vector<typename dealii::internal::p4est::types<dim>::quadrant> quadrants;
+            std::vector<dealii::types::global_dof_index> dofs;
+
+            unsigned int bytes_for_buffer () const
+            {
+              return (sizeof(unsigned int) +
+                      tree_index.size() * sizeof(unsigned int) +
+                      quadrants.size() * sizeof(typename dealii::internal::p4est
+                                                ::types<dim>::quadrant) +
+                      dofs.size() * sizeof(dealii::types::global_dof_index));
+            }
+
+            void pack_data (std::vector<char> &buffer) const
+            {
+              buffer.resize(bytes_for_buffer());
+
+              char *ptr = &buffer[0];
+
+              const unsigned int num_cells = tree_index.size();
+              std::memcpy(ptr, &num_cells, sizeof(unsigned int));
+              ptr += sizeof(unsigned int);
+
+              std::memcpy(ptr,
+                          &tree_index[0],
+                          num_cells*sizeof(unsigned int));
+              ptr += num_cells*sizeof(unsigned int);
+
+              std::memcpy(ptr,
+                          &quadrants[0],
+                          num_cells * sizeof(typename dealii::internal::p4est::
+                                             types<dim>::quadrant));
+              ptr += num_cells*sizeof(typename dealii::internal::p4est::types<dim>::
+                                      quadrant);
+
+              std::memcpy(ptr,
+                          &dofs[0],
+                          dofs.size() * sizeof(dealii::types::global_dof_index));
+              ptr += dofs.size() * sizeof(dealii::types::global_dof_index);
+
+              Assert (ptr == &buffer[0]+buffer.size(),
+                      ExcInternalError());
+
+            }
+          };
+        };
+
+
+
+        template <int dim, int spacedim>
+        void
+        fill_dofindices_recursively (const typename parallel::distributed::Triangulation<dim,spacedim> &tria,
+                                     const unsigned int tree_index,
+                                     const typename DoFHandler<dim,spacedim>::level_cell_iterator &dealii_cell,
+                                     const typename dealii::internal::p4est::types<dim>::quadrant &p4est_cell,
+                                     const std::map<unsigned int, std::set<dealii::types::subdomain_id> > &vertices_with_ghost_neighbors,
+                                     std::map<dealii::types::subdomain_id, typename types<dim>::cellinfo> &needs_to_get_cell)
+        {
+          // see if we have to
+          // recurse...
+          if (dealii_cell->has_children())
+            {
+              typename dealii::internal::p4est::types<dim>::quadrant
+              p4est_child[GeometryInfo<dim>::max_children_per_cell];
+              internal::p4est::init_quadrant_children<dim>(p4est_cell, p4est_child);
+
+
+              for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+                fill_dofindices_recursively<dim,spacedim>(tria,
+                                                          tree_index,
+                                                          dealii_cell->child(c),
+                                                          p4est_child[c],
+                                                          vertices_with_ghost_neighbors,
+                                                          needs_to_get_cell);
+              return;
+            }
+
+          // we're at a leaf cell. see if
+          // the cell is flagged as
+          // interesting. note that we
+          // have only flagged our own
+          // cells before
+          if (dealii_cell->user_flag_set() && !dealii_cell->is_ghost())
+            {
+              Assert (!dealii_cell->is_artificial(), ExcInternalError());
+
+              // check each vertex if
+              // it is interesting and
+              // push dofindices if yes
+              std::set<dealii::types::subdomain_id> send_to;
+              for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+                {
+                  const std::map<unsigned int, std::set<dealii::types::subdomain_id> >::const_iterator
+                  neighbor_subdomains_of_vertex
+                    = vertices_with_ghost_neighbors.find (dealii_cell->vertex_index(v));
+
+                  if (neighbor_subdomains_of_vertex ==
+                      vertices_with_ghost_neighbors.end())
+                    continue;
+
+                  Assert(neighbor_subdomains_of_vertex->second.size()!=0,
+                         ExcInternalError());
+
+                  send_to.insert(neighbor_subdomains_of_vertex->second.begin(),
+                                 neighbor_subdomains_of_vertex->second.end());
+                }
+
+              if (send_to.size() > 0)
+                {
+                  // this cell's dof_indices
+                  // need to be sent to
+                  // someone
+                  std::vector<dealii::types::global_dof_index>
+                  local_dof_indices (dealii_cell->get_fe().dofs_per_cell);
+                  dealii_cell->get_dof_indices (local_dof_indices);
+
+                  for (std::set<dealii::types::subdomain_id>::iterator it=send_to.begin();
+                       it!=send_to.end(); ++it)
+                    {
+                      const dealii::types::subdomain_id subdomain = *it;
+
+                      // get an iterator
+                      // to what needs to
+                      // be sent to that
+                      // subdomain (if
+                      // already exists),
+                      // or create such
+                      // an object
+                      typename std::map<dealii::types::subdomain_id, typename types<dim>::cellinfo>::iterator
+                      p
+                        = needs_to_get_cell.insert (std::make_pair(subdomain,
+                                                                   typename types<dim>::cellinfo()))
+                          .first;
+
+                      p->second.tree_index.push_back(tree_index);
+                      p->second.quadrants.push_back(p4est_cell);
+
+                      p->second.dofs.push_back(dealii_cell->get_fe().dofs_per_cell);
+                      p->second.dofs.insert(p->second.dofs.end(),
+                                            local_dof_indices.begin(),
+                                            local_dof_indices.end());
+
+                    }
+                }
+            }
+        }
+
+        template <int dim, int spacedim>
+        void
+        fill_mg_dofindices_recursively (const typename parallel::distributed::Triangulation<dim,spacedim> &tria,
+                                        const unsigned int tree_index,
+                                        const typename DoFHandler<dim,spacedim>::level_cell_iterator &dealii_cell,
+                                        const typename dealii::internal::p4est::types<dim>::quadrant &p4est_cell,
+                                        const std::map<unsigned int, std::set<dealii::types::subdomain_id> > &vertices_with_ghost_neighbors,
+                                        std::map<dealii::types::subdomain_id, typename types<dim>::cellinfo> &needs_to_get_cell,
+                                        const unsigned int level)
+        {
+          if (dealii_cell->level()>(int)level)
+            return;
+          // see if we have to
+          // recurse...
+          if (dealii_cell->has_children())
+            {
+              typename dealii::internal::p4est::types<dim>::quadrant
+              p4est_child[GeometryInfo<dim>::max_children_per_cell];
+              internal::p4est::init_quadrant_children<dim>(p4est_cell, p4est_child);
+
+
+              for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+                fill_mg_dofindices_recursively<dim,spacedim>(tria,
+                                                             tree_index,
+                                                             dealii_cell->child(c),
+                                                             p4est_child[c],
+                                                             vertices_with_ghost_neighbors,
+                                                             needs_to_get_cell,
+                                                             level);
+            }
+
+          if (dealii_cell->level()<(int)level)
+            return;
+
+          // now we are on the right level!
+          Assert(dealii_cell->level()==(int)level, ExcInternalError());
+
+          // see if
+          // the cell is flagged as
+          // interesting. note that we
+          // have only flagged our own
+          // cells before
+          if (dealii_cell->user_flag_set() && dealii_cell->level_subdomain_id() == tria.locally_owned_subdomain())
+            {
+              // check each vertex if
+              // it is interesting and
+              // push dofindices if yes
+              std::set<dealii::types::subdomain_id> send_to;
+              for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+                {
+                  const std::map<unsigned int, std::set<dealii::types::subdomain_id> >::const_iterator
+                  neighbor_subdomains_of_vertex
+                    = vertices_with_ghost_neighbors.find (dealii_cell->vertex_index(v));
+
+                  if (neighbor_subdomains_of_vertex ==
+                      vertices_with_ghost_neighbors.end())
+                    continue;
+
+                  Assert(neighbor_subdomains_of_vertex->second.size()!=0,
+                         ExcInternalError());
+
+                  send_to.insert(neighbor_subdomains_of_vertex->second.begin(),
+                                 neighbor_subdomains_of_vertex->second.end());
+                }
+
+              // additionally, if we need to send to all our direct children (multigrid only)
+              if (dealii_cell->has_children())
+                {
+                  for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+                    {
+                      //TODO: we don't know about our children if proc 0 owns all coarse cells!
+                      dealii::types::subdomain_id dest = dealii_cell->child(c)->level_subdomain_id();
+                      Assert(dest!=dealii::numbers::artificial_subdomain_id && dest!=dealii::numbers::invalid_subdomain_id, ExcInternalError());
+                      if (dest != tria.locally_owned_subdomain())
+                        send_to.insert(dest);
+                    }
+                }
+
+              //additionally (multigrid only), we can have the case that children of our neighbor
+              //have us as a neighbor. In this case we and the children are active.
+              if (dealii_cell->active())
+                {
+                  for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+                    {
+                      if (dealii_cell->at_boundary(f))
+                        continue;
+                      typename DoFHandler<dim,spacedim>::level_cell_iterator neighbor = dealii_cell->neighbor(f);
+                      if (!neighbor->has_children())
+                        continue;
+
+                      for (unsigned int subface=0; subface<GeometryInfo<dim>::max_children_per_face; ++subface)
+                        {
+                          typename DoFHandler<dim,spacedim>::level_cell_iterator child = dealii_cell->neighbor_child_on_subface(f,subface);
+                          dealii::types::subdomain_id dest = child->subdomain_id();
+                          Assert(dest != dealii::numbers::artificial_subdomain_id, ExcInternalError());
+                          if (dest != tria.locally_owned_subdomain())
+                            send_to.insert(dest);
+                        }
+
+                    }
+
+                }
+
+              // Finally, if we are neighboring a coarser cell, add them to
+              // the destination list
+              if (dealii_cell->active())
+                {
+                  for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+                    {
+                      if (dealii_cell->at_boundary(f))
+                        continue;
+                      typename DoFHandler<dim,spacedim>::level_cell_iterator neighbor = dealii_cell->neighbor(f);
+                      if (neighbor->level()>=dealii_cell->level())
+                        continue;
+
+                      dealii::types::subdomain_id dest = neighbor->level_subdomain_id();
+                      Assert(dest != dealii::numbers::artificial_subdomain_id, ExcInternalError());
+                      if (dest != tria.locally_owned_subdomain())
+                        send_to.insert(dest);
+
+                    }
+                }
+
+
+              // send if we have something to send
+              if (send_to.size() > 0)
+                {
+                  // this cell's dof_indices
+                  // need to be sent to
+                  // someone
+                  std::vector<dealii::types::global_dof_index>
+                  local_dof_indices (dealii_cell->get_fe().dofs_per_cell);
+                  dealii_cell->get_mg_dof_indices (local_dof_indices);
+
+                  for (std::set<dealii::types::subdomain_id>::iterator it=send_to.begin();
+                       it!=send_to.end(); ++it)
+                    {
+                      const dealii::types::subdomain_id subdomain = *it;
+
+                      // get an iterator
+                      // to what needs to
+                      // be sent to that
+                      // subdomain (if
+                      // already exists),
+                      // or create such
+                      // an object
+                      typename std::map<dealii::types::subdomain_id, typename types<dim>::cellinfo>::iterator
+                      p
+                        = needs_to_get_cell.insert (std::make_pair(subdomain,
+                                                                   typename types<dim>::cellinfo()))
+                          .first;
+
+                      p->second.tree_index.push_back(tree_index);
+                      p->second.quadrants.push_back(p4est_cell);
+
+                      p->second.dofs.push_back(dealii_cell->get_fe().dofs_per_cell);
+                      p->second.dofs.insert(p->second.dofs.end(),
+                                            local_dof_indices.begin(),
+                                            local_dof_indices.end());
+
+                    }
+                }
+            }
+        }
+
+
+        template <int dim, int spacedim>
+        void
+        set_dofindices_recursively (
+          const parallel::distributed::Triangulation<dim,spacedim> &tria,
+          const typename dealii::internal::p4est::types<dim>::quadrant &p4est_cell,
+          const typename DoFHandler<dim,spacedim>::level_cell_iterator &dealii_cell,
+          const typename dealii::internal::p4est::types<dim>::quadrant &quadrant,
+          dealii::types::global_dof_index *dofs)
+        {
+          if (internal::p4est::quadrant_is_equal<dim>(p4est_cell, quadrant))
+            {
+              Assert(!dealii_cell->has_children(), ExcInternalError());
+              Assert(dealii_cell->is_ghost(), ExcInternalError());
+
+              // update dof indices of cell
+              std::vector<dealii::types::global_dof_index>
+              dof_indices (dealii_cell->get_fe().dofs_per_cell);
+              dealii_cell->update_cell_dof_indices_cache();
+              dealii_cell->get_dof_indices(dof_indices);
+
+              bool complete = true;
+              for (unsigned int i=0; i<dof_indices.size(); ++i)
+                if (dofs[i] != DoFHandler<dim,spacedim>::invalid_dof_index)
+                  {
+                    Assert((dof_indices[i] ==
+                            (DoFHandler<dim,spacedim>::invalid_dof_index))
+                           ||
+                           (dof_indices[i]==dofs[i]),
+                           ExcInternalError());
+                    dof_indices[i]=dofs[i];
+                  }
+                else
+                  complete=false;
+
+              if (!complete)
+                const_cast
+                <typename DoFHandler<dim,spacedim>::level_cell_iterator &>
+                (dealii_cell)->set_user_flag();
+              else
+                const_cast
+                <typename DoFHandler<dim,spacedim>::level_cell_iterator &>
+                (dealii_cell)->clear_user_flag();
+
+              const_cast
+              <typename DoFHandler<dim,spacedim>::level_cell_iterator &>
+              (dealii_cell)->set_dof_indices(dof_indices);
+
+              return;
+            }
+
+          if (! dealii_cell->has_children())
+            return;
+
+          if (! internal::p4est::quadrant_is_ancestor<dim> (p4est_cell, quadrant))
+            return;
+
+          typename dealii::internal::p4est::types<dim>::quadrant
+          p4est_child[GeometryInfo<dim>::max_children_per_cell];
+          internal::p4est::init_quadrant_children<dim>(p4est_cell, p4est_child);
+
+          for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+            set_dofindices_recursively<dim,spacedim> (tria, p4est_child[c],
+                                                      dealii_cell->child(c),
+                                                      quadrant, dofs);
+        }
+
+
+        template <int dim, int spacedim>
+        void
+        set_mg_dofindices_recursively (
+          const parallel::distributed::Triangulation<dim,spacedim> &tria,
+          const typename dealii::internal::p4est::types<dim>::quadrant &p4est_cell,
+          const typename DoFHandler<dim,spacedim>::level_cell_iterator &dealii_cell,
+          const typename dealii::internal::p4est::types<dim>::quadrant &quadrant,
+          dealii::types::global_dof_index *dofs,
+          unsigned int level)
+        {
+          if (internal::p4est::quadrant_is_equal<dim>(p4est_cell, quadrant))
+            {
+              Assert(dealii_cell->level_subdomain_id()!=dealii::numbers::artificial_subdomain_id, ExcInternalError());
+              Assert(dealii_cell->level()==(int)level, ExcInternalError());
+
+              // update dof indices of cell
+              std::vector<dealii::types::global_dof_index>
+              dof_indices (dealii_cell->get_fe().dofs_per_cell);
+              dealii_cell->get_mg_dof_indices(dof_indices);
+
+              bool complete = true;
+              for (unsigned int i=0; i<dof_indices.size(); ++i)
+                if (dofs[i] != DoFHandler<dim,spacedim>::invalid_dof_index)
+                  {
+                    Assert((dof_indices[i] ==
+                            (DoFHandler<dim,spacedim>::invalid_dof_index))
+                           ||
+                           (dof_indices[i]==dofs[i]),
+                           ExcInternalError());
+                    dof_indices[i]=dofs[i];
+                  }
+                else
+                  complete=false;
+
+              if (!complete)
+                const_cast
+                <typename DoFHandler<dim,spacedim>::level_cell_iterator &>
+                (dealii_cell)->set_user_flag();
+              else
+                const_cast
+                <typename DoFHandler<dim,spacedim>::level_cell_iterator &>
+                (dealii_cell)->clear_user_flag();
+
+              const_cast
+              <typename DoFHandler<dim,spacedim>::level_cell_iterator &>
+              (dealii_cell)->set_mg_dof_indices(dof_indices);
+              return;
+            }
+
+          if (! dealii_cell->has_children())
+            return;
+
+          if (! internal::p4est::quadrant_is_ancestor<dim> (p4est_cell, quadrant))
+            return;
+
+          typename dealii::internal::p4est::types<dim>::quadrant
+          p4est_child[GeometryInfo<dim>::max_children_per_cell];
+          internal::p4est::init_quadrant_children<dim>(p4est_cell, p4est_child);
+
+          for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+            set_mg_dofindices_recursively<dim,spacedim> (tria, p4est_child[c],
+                                                         dealii_cell->child(c),
+                                                         quadrant, dofs, level);
+
+        }
+
+
+
+        template <int spacedim>
+        void
+        communicate_dof_indices_on_marked_cells
+        (const DoFHandler<1,spacedim> &,
+         const std::map<unsigned int, std::set<dealii::types::subdomain_id> > &,
+         const std::vector<dealii::types::global_dof_index> &,
+         const std::vector<dealii::types::global_dof_index> &)
+        {
+          Assert (false, ExcNotImplemented());
+        }
+
+
+
+        template <int dim, int spacedim>
+        void
+        communicate_dof_indices_on_marked_cells
+        (const DoFHandler<dim,spacedim> &dof_handler,
+         const std::map<unsigned int, std::set<dealii::types::subdomain_id> > &vertices_with_ghost_neighbors,
+         const std::vector<dealii::types::global_dof_index> &coarse_cell_to_p4est_tree_permutation,
+         const std::vector<dealii::types::global_dof_index> &p4est_tree_to_coarse_cell_permutation)
+        {
+#ifndef DEAL_II_WITH_P4EST
+          (void)vertices_with_ghost_neighbors;
+          Assert (false, ExcNotImplemented());
+#else
+
+          const parallel::distributed::Triangulation< dim, spacedim > *tr
+            = (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+               (&dof_handler.get_triangulation()));
+          Assert (tr != 0, ExcInternalError());
+
+          // now collect cells and their
+          // dof_indices for the
+          // interested neighbors
+          typedef
+          std::map<dealii::types::subdomain_id, typename types<dim>::cellinfo>
+          cellmap_t;
+          cellmap_t needs_to_get_cells;
+
+          for (typename DoFHandler<dim,spacedim>::level_cell_iterator
+               cell = dof_handler.begin(0);
+               cell != dof_handler.end(0);
+               ++cell)
+            {
+              typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+              internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+              fill_dofindices_recursively<dim,spacedim>
+              (*tr,
+               coarse_cell_to_p4est_tree_permutation[cell->index()],
+               cell,
+               p4est_coarse_cell,
+               vertices_with_ghost_neighbors,
+               needs_to_get_cells);
+            }
+
+
+          //sending
+          std::vector<std::vector<char> > sendbuffers (needs_to_get_cells.size());
+          std::vector<std::vector<char> >::iterator buffer = sendbuffers.begin();
+          std::vector<MPI_Request> requests (needs_to_get_cells.size());
+
+          unsigned int idx=0;
+
+          for (typename cellmap_t::iterator it=needs_to_get_cells.begin();
+               it!=needs_to_get_cells.end();
+               ++it, ++buffer, ++idx)
+            {
+              const unsigned int num_cells = it->second.tree_index.size();
+              (void)num_cells;
+
+              Assert(num_cells==it->second.quadrants.size(), ExcInternalError());
+              Assert(num_cells>0, ExcInternalError());
+
+              // pack all the data into
+              // the buffer for this
+              // recipient and send
+              // it. keep data around
+              // till we can make sure
+              // that the packet has been
+              // received
+              it->second.pack_data (*buffer);
+              MPI_Isend(&(*buffer)[0], buffer->size(),
+                        MPI_BYTE, it->first,
+                        123, tr->get_communicator(), &requests[idx]);
+            }
+
+
+          // mark all own cells, that miss some
+          // dof_data and collect the neighbors
+          // that are going to send stuff to us
+          std::set<dealii::types::subdomain_id> senders;
+          {
+            std::vector<dealii::types::global_dof_index> local_dof_indices;
+            typename DoFHandler<dim,spacedim>::active_cell_iterator
+            cell, endc = dof_handler.end();
+
+            for (cell = dof_handler.begin_active(); cell != endc; ++cell)
+              if (!cell->is_artificial())
+                {
+                  if (cell->is_ghost())
+                    {
+                      if (cell->user_flag_set())
+                        senders.insert(cell->subdomain_id());
+                    }
+                  else
+                    {
+                      local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+                      cell->get_dof_indices (local_dof_indices);
+                      if (local_dof_indices.end() !=
+                          std::find (local_dof_indices.begin(),
+                                     local_dof_indices.end(),
+                                     DoFHandler<dim,spacedim>::invalid_dof_index))
+                        cell->set_user_flag();
+                      else
+                        cell->clear_user_flag();
+                    }
+
+                }
+          }
+
+
+          //* 5. receive ghostcelldata
+          std::vector<char> receive;
+          typename types<dim>::cellinfo cellinfo;
+          for (unsigned int i=0; i<senders.size(); ++i)
+            {
+              MPI_Status status;
+              int len;
+              MPI_Probe(MPI_ANY_SOURCE, 123, tr->get_communicator(), &status);
+              MPI_Get_count(&status, MPI_BYTE, &len);
+              receive.resize(len);
+
+              char *ptr = &receive[0];
+              MPI_Recv(ptr, len, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG,
+                       tr->get_communicator(), &status);
+
+              unsigned int cells;
+              memcpy(&cells, ptr, sizeof(unsigned int));
+              ptr+=sizeof(unsigned int);
+
+              //TODO: reinterpret too evil?
+              unsigned int *treeindex=reinterpret_cast<unsigned int *>(ptr);
+              ptr+=cells*sizeof(unsigned int);
+              typename dealii::internal::p4est::types<dim>::quadrant *quadrant
+                =reinterpret_cast<typename dealii::internal::p4est::types<dim>::quadrant *>(ptr);
+              ptr+=cells*sizeof(typename dealii::internal::p4est::types<dim>::quadrant);
+              dealii::types::global_dof_index *dofs
+                = reinterpret_cast<dealii::types::global_dof_index *>(ptr);
+
+              // the dofs pointer contains for each cell the number of dofs
+              // on that cell (dofs[0]) followed by the dof indices itself.
+              for (unsigned int c=0; c<cells; ++c, dofs+=1+dofs[0])
+                {
+                  typename DoFHandler<dim,spacedim>::level_cell_iterator
+                  cell (&dof_handler.get_triangulation(),
+                        0,
+                        p4est_tree_to_coarse_cell_permutation[treeindex[c]],
+                        &dof_handler);
+
+                  typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+                  internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+                  Assert(cell->get_fe().dofs_per_cell==dofs[0], ExcInternalError());
+
+                  set_dofindices_recursively<dim,spacedim> (*tr,
+                                                            p4est_coarse_cell,
+                                                            cell,
+                                                            quadrant[c],
+                                                            (dofs+1));
+                }
+            }
+
+          // complete all sends, so that we can
+          // safely destroy the buffers.
+          if (requests.size() > 0)
+            MPI_Waitall(requests.size(), &requests[0], MPI_STATUSES_IGNORE);
+
+
+#ifdef DEBUG
+          {
+            //check all msgs got sent and received
+            unsigned int sum_send=0;
+            unsigned int sum_recv=0;
+            unsigned int sent=needs_to_get_cells.size();
+            unsigned int recv=senders.size();
+
+            MPI_Allreduce(&sent, &sum_send, 1, MPI_UNSIGNED, MPI_SUM, tr->get_communicator());
+            MPI_Allreduce(&recv, &sum_recv, 1, MPI_UNSIGNED, MPI_SUM, tr->get_communicator());
+            Assert(sum_send==sum_recv, ExcInternalError());
+          }
+#endif
+
+          //update dofindices
+          {
+            typename DoFHandler<dim,spacedim>::active_cell_iterator
+            cell, endc = dof_handler.end();
+
+            for (cell = dof_handler.begin_active(); cell != endc; ++cell)
+              if (!cell->is_artificial())
+                cell->update_cell_dof_indices_cache();
+          }
+
+          // important, so that sends between two
+          // calls to this function are not mixed
+          // up.
+          //
+          // this is necessary because above we
+          // just see if there are messages and
+          // then receive them, without
+          // discriminating where they come from
+          // and whether they were sent in phase
+          // 1 or 2. the need for a global
+          // communication step like this barrier
+          // could be avoided by receiving
+          // messages specifically from those
+          // processors from which we expect
+          // messages, and by using different
+          // tags for phase 1 and 2
+          MPI_Barrier(tr->get_communicator());
+#endif
+        }
+
+
+
+        template <int spacedim>
+        void
+        communicate_mg_dof_indices_on_marked_cells
+        (const DoFHandler<1,spacedim> &,
+         const std::map<unsigned int, std::set<dealii::types::subdomain_id> > &,
+         const std::vector<dealii::types::global_dof_index> &,
+         const std::vector<dealii::types::global_dof_index> &,
+         const unsigned int)
+        {
+          Assert (false, ExcNotImplemented());
+        }
+
+
+
+        template <int dim, int spacedim>
+        void
+        communicate_mg_dof_indices_on_marked_cells
+        (const DoFHandler<dim,spacedim> &dof_handler,
+         const std::map<unsigned int, std::set<dealii::types::subdomain_id> > &vertices_with_ghost_neighbors,
+         const std::vector<dealii::types::global_dof_index> &coarse_cell_to_p4est_tree_permutation,
+         const std::vector<dealii::types::global_dof_index> &p4est_tree_to_coarse_cell_permutation,
+         const unsigned int level)
+        {
+#ifndef DEAL_II_WITH_P4EST
+          (void)dof_handler;
+          (void)vertices_with_ghost_neighbors;
+          (void)coarse_cell_to_p4est_tree_permutation;
+          (void)p4est_tree_to_coarse_cell_permutation;
+          (void)level;
+          Assert (false, ExcNotImplemented());
+#else
+
+          const parallel::distributed::Triangulation< dim, spacedim > *tr
+            = (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+               (&dof_handler.get_triangulation()));
+          Assert (tr != 0, ExcInternalError());
+
+          // now collect cells and their
+          // dof_indices for the
+          // interested neighbors
+          typedef
+          std::map<dealii::types::subdomain_id, typename types<dim>::cellinfo>
+          cellmap_t;
+          cellmap_t needs_to_get_cells;
+
+          for (typename DoFHandler<dim,spacedim>::level_cell_iterator
+               cell = dof_handler.begin(0);
+               cell != dof_handler.end(0);
+               ++cell)
+            {
+              typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+              internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+              fill_mg_dofindices_recursively<dim,spacedim>
+              (*tr,
+               coarse_cell_to_p4est_tree_permutation[cell->index()],
+               cell,
+               p4est_coarse_cell,
+               vertices_with_ghost_neighbors,
+               needs_to_get_cells,
+               level);
+            }
+
+
+          //sending
+          std::vector<std::vector<char> > sendbuffers (needs_to_get_cells.size());
+          std::vector<std::vector<char> >::iterator buffer = sendbuffers.begin();
+          std::vector<MPI_Request> requests (needs_to_get_cells.size());
+
+          unsigned int idx=0;
+
+          for (typename cellmap_t::iterator it=needs_to_get_cells.begin();
+               it!=needs_to_get_cells.end();
+               ++it, ++buffer, ++idx)
+            {
+              const unsigned int num_cells = it->second.tree_index.size();
+              (void)num_cells;
+
+              Assert(num_cells==it->second.quadrants.size(), ExcInternalError());
+              Assert(num_cells>0, ExcInternalError());
+
+              // pack all the data into
+              // the buffer for this
+              // recipient and send
+              // it. keep data around
+              // till we can make sure
+              // that the packet has been
+              // received
+              it->second.pack_data (*buffer);
+              MPI_Isend(&(*buffer)[0], buffer->size(),
+                        MPI_BYTE, it->first,
+                        123, tr->get_communicator(), &requests[idx]);
+            }
+
+
+          // mark all own cells, that miss some dof_data and collect the
+          // neighbors that are going to send stuff to us
+          std::set<dealii::types::subdomain_id> senders;
+          {
+            std::vector<dealii::types::global_dof_index> local_dof_indices;
+            typename DoFHandler<dim,spacedim>::level_cell_iterator
+            cell, endc = dof_handler.end(level);
+
+            for (cell = dof_handler.begin(level); cell != endc; ++cell)
+              {
+                if (cell->level_subdomain_id()==dealii::numbers::artificial_subdomain_id)
+                  {
+                    //artificial
+                  }
+                else if (cell->level_subdomain_id()==dof_handler.get_triangulation().locally_owned_subdomain())
+                  {
+                    //own
+                    local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+                    cell->get_mg_dof_indices (local_dof_indices);
+                    if (local_dof_indices.end() !=
+                        std::find (local_dof_indices.begin(),
+                                   local_dof_indices.end(),
+                                   DoFHandler<dim,spacedim>::invalid_dof_index))
+                      cell->set_user_flag();
+                    else
+                      cell->clear_user_flag();
+                  }
+                else
+                  {
+                    //ghost
+                    if (cell->user_flag_set())
+                      senders.insert(cell->level_subdomain_id());
+                  }
+              }
+
+          }
+
+
+          //* 5. receive ghostcelldata
+          std::vector<char> receive;
+          typename types<dim>::cellinfo cellinfo;
+          for (unsigned int i=0; i<senders.size(); ++i)
+            {
+              MPI_Status status;
+              int len;
+              MPI_Probe(MPI_ANY_SOURCE, 123, tr->get_communicator(), &status);
+              MPI_Get_count(&status, MPI_BYTE, &len);
+              receive.resize(len);
+
+#ifdef DEBUG
+              Assert(senders.find(status.MPI_SOURCE)!=senders.end(), ExcInternalError());
+#endif
+
+              char *ptr = &receive[0];
+              MPI_Recv(ptr, len, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG,
+                       tr->get_communicator(), &status);
+
+              unsigned int cells;
+              memcpy(&cells, ptr, sizeof(unsigned int));
+              ptr+=sizeof(unsigned int);
+
+              //reinterpret too evil?
+              unsigned int *treeindex=reinterpret_cast<unsigned int *>(ptr);
+              ptr+=cells*sizeof(unsigned int);
+              typename dealii::internal::p4est::types<dim>::quadrant *quadrant
+                =reinterpret_cast<typename dealii::internal::p4est::types<dim>::quadrant *>(ptr);
+              ptr+=cells*sizeof(typename dealii::internal::p4est::types<dim>::quadrant);
+              dealii::types::global_dof_index *dofs
+                = reinterpret_cast<dealii::types::global_dof_index *>(ptr);
+
+              // the dofs pointer contains for each cell the number of dofs
+              // on that cell (dofs[0]) followed by the dof indices itself.
+              for (unsigned int c=0; c<cells; ++c, dofs+=1+dofs[0])
+                {
+                  typename DoFHandler<dim,spacedim>::level_cell_iterator
+                  cell (&dof_handler.get_triangulation(),
+                        0,
+                        p4est_tree_to_coarse_cell_permutation[treeindex[c]],
+                        &dof_handler);
+
+                  typename dealii::internal::p4est::types<dim>::quadrant p4est_coarse_cell;
+                  internal::p4est::init_coarse_quadrant<dim>(p4est_coarse_cell);
+
+                  Assert(cell->get_fe().dofs_per_cell==dofs[0], ExcInternalError());
+
+                  set_mg_dofindices_recursively<dim,spacedim> (*tr,
+                                                               p4est_coarse_cell,
+                                                               cell,
+                                                               quadrant[c],
+                                                               (dofs+1),
+                                                               level);
+                }
+            }
+
+          // complete all sends, so that we can
+          // safely destroy the buffers.
+          if (requests.size() > 0)
+            MPI_Waitall(requests.size(), &requests[0], MPI_STATUSES_IGNORE);
+
+
+#ifdef DEBUG
+          {
+            //check all msgs got sent and received
+            unsigned int sum_send=0;
+            unsigned int sum_recv=0;
+            unsigned int sent=needs_to_get_cells.size();
+            unsigned int recv=senders.size();
+
+            MPI_Allreduce(&sent, &sum_send, 1, MPI_UNSIGNED, MPI_SUM, tr->get_communicator());
+            MPI_Allreduce(&recv, &sum_recv, 1, MPI_UNSIGNED, MPI_SUM, tr->get_communicator());
+            Assert(sum_send==sum_recv, ExcInternalError());
+          }
+#endif
+
+
+          // important, so that sends between two
+          // calls to this function are not mixed
+          // up.
+          //
+          // this is necessary because above we
+          // just see if there are messages and
+          // then receive them, without
+          // discriminating where they come from
+          // and whether they were sent in phase
+          // 1 or 2. the need for a global
+          // communication step like this barrier
+          // could be avoided by receiving
+          // messages specifically from those
+          // processors from which we expect
+          // messages, and by using different
+          // tags for phase 1 and 2
+          MPI_Barrier(tr->get_communicator());
+#endif
+        }
+
+
+
+
+      }
+
+#endif // DEAL_II_WITH_P4EST
+
+
+
+      template <int dim, int spacedim>
+      void
+      ParallelDistributed<dim, spacedim>::
+      distribute_dofs (DoFHandler<dim,spacedim> &dof_handler,
+                       NumberCache &number_cache_current) const
+      {
+        NumberCache number_cache;
+
+#ifndef DEAL_II_WITH_P4EST
+        (void)dof_handler;
+        Assert (false, ExcNotImplemented());
+#else
+
+        parallel::distributed::Triangulation< dim, spacedim > *tr
+          = (dynamic_cast<parallel::distributed::Triangulation<dim,spacedim>*>
+             (const_cast<dealii::Triangulation< dim, spacedim >*>
+              (&dof_handler.get_triangulation())));
+        Assert (tr != 0, ExcInternalError());
+
+        const unsigned int
+        n_cpus = Utilities::MPI::n_mpi_processes (tr->get_communicator());
+
+        //* 1. distribute on own
+        //* subdomain
+        const dealii::types::global_dof_index n_initial_local_dofs =
+          Implementation::distribute_dofs (0, tr->locally_owned_subdomain(),
+                                           dof_handler);
+
+        //* 2. iterate over ghostcells and
+        //kill dofs that are not owned
+        //by us
+        std::vector<dealii::types::global_dof_index> renumbering(n_initial_local_dofs);
+        for (unsigned int i=0; i<renumbering.size(); ++i)
+          renumbering[i] = i;
+
+        {
+          std::vector<dealii::types::global_dof_index> local_dof_indices;
+
+          typename DoFHandler<dim,spacedim>::active_cell_iterator
+          cell = dof_handler.begin_active(),
+          endc = dof_handler.end();
+
+          for (; cell != endc; ++cell)
+            if (cell->is_ghost() &&
+                (cell->subdomain_id() < tr->locally_owned_subdomain()))
+              {
+                // we found a
+                // neighboring ghost
+                // cell whose subdomain
+                // is "stronger" than
+                // our own subdomain
+
+                // delete all dofs that
+                // live there and that
+                // we have previously
+                // assigned a number to
+                // (i.e. the ones on
+                // the interface)
+                local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+                cell->get_dof_indices (local_dof_indices);
+                for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                  if (local_dof_indices[i] != DoFHandler<dim,spacedim>::invalid_dof_index)
+                    renumbering[local_dof_indices[i]]
+                      = DoFHandler<dim,spacedim>::invalid_dof_index;
+              }
+        }
+
+
+        // make indices consecutive
+        number_cache.n_locally_owned_dofs = 0;
+        for (std::vector<dealii::types::global_dof_index>::iterator it=renumbering.begin();
+             it!=renumbering.end(); ++it)
+          if (*it != DoFHandler<dim,spacedim>::invalid_dof_index)
+            *it = number_cache.n_locally_owned_dofs++;
+
+        //* 3. communicate local dofcount and
+        //shift ids to make them unique
+        number_cache.n_locally_owned_dofs_per_processor.resize(n_cpus);
+
+        MPI_Allgather ( &number_cache.n_locally_owned_dofs,
+                        1, DEAL_II_DOF_INDEX_MPI_TYPE,
+                        &number_cache.n_locally_owned_dofs_per_processor[0],
+                        1, DEAL_II_DOF_INDEX_MPI_TYPE,
+                        tr->get_communicator());
+
+        const dealii::types::global_dof_index
+        shift = std::accumulate (number_cache
+                                 .n_locally_owned_dofs_per_processor.begin(),
+                                 number_cache
+                                 .n_locally_owned_dofs_per_processor.begin()
+                                 + tr->locally_owned_subdomain(),
+                                 static_cast<dealii::types::global_dof_index>(0));
+        for (std::vector<dealii::types::global_dof_index>::iterator it=renumbering.begin();
+             it!=renumbering.end(); ++it)
+          if (*it != DoFHandler<dim,spacedim>::invalid_dof_index)
+            (*it) += shift;
+
+        // now re-enumerate all dofs to
+        // this shifted and condensed
+        // numbering form.  we renumber
+        // some dofs as invalid, so
+        // choose the nocheck-version.
+        Implementation::renumber_dofs (renumbering, IndexSet(0),
+                                       dof_handler, false);
+
+        // now a little bit of
+        // housekeeping
+        number_cache.n_global_dofs
+          = std::accumulate (number_cache
+                             .n_locally_owned_dofs_per_processor.begin(),
+                             number_cache
+                             .n_locally_owned_dofs_per_processor.end(),
+                             static_cast<dealii::types::global_dof_index>(0));
+
+        number_cache.locally_owned_dofs = IndexSet(number_cache.n_global_dofs);
+        number_cache.locally_owned_dofs
+        .add_range(shift,
+                   shift+number_cache.n_locally_owned_dofs);
+        number_cache.locally_owned_dofs.compress();
+
+        // fill global_dof_indexsets
+        number_cache.locally_owned_dofs_per_processor.resize(n_cpus);
+        {
+          dealii::types::global_dof_index lshift = 0;
+          for (unsigned int i=0; i<n_cpus; ++i)
+            {
+              number_cache.locally_owned_dofs_per_processor[i]
+                = IndexSet(number_cache.n_global_dofs);
+              number_cache.locally_owned_dofs_per_processor[i]
+              .add_range(lshift,
+                         lshift +
+                         number_cache.n_locally_owned_dofs_per_processor[i]);
+              lshift += number_cache.n_locally_owned_dofs_per_processor[i];
+            }
+        }
+        Assert(number_cache.locally_owned_dofs_per_processor
+               [tr->locally_owned_subdomain()].n_elements()
+               ==
+               number_cache.n_locally_owned_dofs,
+               ExcInternalError());
+        Assert(!number_cache.locally_owned_dofs_per_processor
+               [tr->locally_owned_subdomain()].n_elements()
+               ||
+               number_cache.locally_owned_dofs_per_processor
+               [tr->locally_owned_subdomain()].nth_index_in_set(0)
+               == shift,
+               ExcInternalError());
+
+        //* 4. send dofids of cells that are
+        //ghostcells on other machines
+
+        std::vector<bool> user_flags;
+        tr->save_user_flags(user_flags);
+        tr->clear_user_flags ();
+
+        //mark all own cells for transfer
+        for (typename DoFHandler<dim,spacedim>::active_cell_iterator cell = dof_handler.begin_active();
+             cell != dof_handler.end(); ++cell)
+          if (!cell->is_artificial())
+            cell->set_user_flag();
+
+        // add each ghostcells'
+        // subdomain to the vertex and
+        // keep track of interesting
+        // neighbors
+        std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+        vertices_with_ghost_neighbors;
+
+        tr->fill_vertices_with_ghost_neighbors (vertices_with_ghost_neighbors);
+
+
+        /* Send and receive cells. After this,
+           only the local cells are marked,
+           that received new data. This has to
+           be communicated in a second
+           communication step. */
+        communicate_dof_indices_on_marked_cells (dof_handler,
+                                                 vertices_with_ghost_neighbors,
+                                                 tr->coarse_cell_to_p4est_tree_permutation,
+                                                 tr->p4est_tree_to_coarse_cell_permutation);
+
+        communicate_dof_indices_on_marked_cells (dof_handler,
+                                                 vertices_with_ghost_neighbors,
+                                                 tr->coarse_cell_to_p4est_tree_permutation,
+                                                 tr->p4est_tree_to_coarse_cell_permutation);
+
+        tr->load_user_flags(user_flags);
+
+#ifdef DEBUG
+        //check that we are really done
+        {
+          std::vector<dealii::types::global_dof_index> local_dof_indices;
+
+          for (typename DoFHandler<dim,spacedim>::active_cell_iterator cell = dof_handler.begin_active();
+               cell != dof_handler.end(); ++cell)
+            if (!cell->is_artificial())
+              {
+                local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+                cell->get_dof_indices (local_dof_indices);
+                if (local_dof_indices.end() !=
+                    std::find (local_dof_indices.begin(),
+                               local_dof_indices.end(),
+                               DoFHandler<dim,spacedim>::invalid_dof_index))
+                  {
+                    if (cell->is_ghost())
+                      {
+                        Assert(false, ExcMessage ("Not a ghost cell"));
+                      }
+                    else
+                      {
+                        Assert(false, ExcMessage ("Not one of our own cells"));
+                      }
+                  }
+              }
+        }
+#endif // DEBUG
+#endif // DEAL_II_WITH_P4EST
+
+        number_cache_current = number_cache;
+      }
+
+
+
+      template <int dim, int spacedim>
+      void
+      ParallelDistributed<dim, spacedim>::
+      distribute_mg_dofs (DoFHandler<dim,spacedim> &dof_handler,
+                          std::vector<NumberCache> &number_caches) const
+      {
+#ifndef DEAL_II_WITH_P4EST
+        (void)dof_handler;
+        (void)number_caches;
+        Assert (false, ExcNotImplemented());
+#else
+
+        parallel::distributed::Triangulation< dim, spacedim > *tr
+          = (dynamic_cast<parallel::distributed::Triangulation<dim,spacedim>*>
+             (const_cast<dealii::Triangulation< dim, spacedim >*>
+              (&dof_handler.get_triangulation())));
+        Assert (tr != 0, ExcInternalError());
+
+        AssertThrow(
+          (tr->settings &  parallel::distributed::Triangulation< dim, spacedim >::construct_multigrid_hierarchy),
+          ExcMessage("Multigrid DoFs can only be distributed on a parallel Triangulation if the flag construct_multigrid_hierarchy is set in the constructor."));
+
+
+        const unsigned int
+        n_cpus = Utilities::MPI::n_mpi_processes (tr->get_communicator());
+
+        unsigned int n_levels = Utilities::MPI::max(dof_handler.get_triangulation().n_levels(), tr->get_communicator());
+
+        for (unsigned int level = 0; level < n_levels; ++level)
+          {
+            NumberCache &number_cache = number_caches[level];
+
+            //* 1. distribute on own
+            //* subdomain
+            const unsigned int n_initial_local_dofs =
+              Implementation::distribute_dofs_on_level(0, tr->locally_owned_subdomain(), dof_handler, level);
+
+            //* 2. iterate over ghostcells and
+            //kill dofs that are not owned
+            //by us
+            std::vector<dealii::types::global_dof_index> renumbering(n_initial_local_dofs);
+            for (dealii::types::global_dof_index i=0; i<renumbering.size(); ++i)
+              renumbering[i] = i;
+
+            if (level<tr->n_levels())
+              {
+                std::vector<dealii::types::global_dof_index> local_dof_indices;
+
+                typename DoFHandler<dim,spacedim>::level_cell_iterator
+                cell = dof_handler.begin(level),
+                endc = dof_handler.end(level);
+
+                for (; cell != endc; ++cell)
+                  if (cell->level_subdomain_id()!=numbers::artificial_subdomain_id &&
+                      (cell->level_subdomain_id() < tr->locally_owned_subdomain()))
+                    {
+                      // we found a
+                      // neighboring ghost
+                      // cell whose subdomain
+                      // is "stronger" than
+                      // our own subdomain
+
+                      // delete all dofs that
+                      // live there and that
+                      // we have previously
+                      // assigned a number to
+                      // (i.e. the ones on
+                      // the interface)
+                      local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+                      cell->get_mg_dof_indices (local_dof_indices);
+                      for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                        if (local_dof_indices[i] != DoFHandler<dim,spacedim>::invalid_dof_index)
+                          renumbering[local_dof_indices[i]]
+                            = DoFHandler<dim,spacedim>::invalid_dof_index;
+                    }
+              }
+
+
+            // make indices consecutive
+            number_cache.n_locally_owned_dofs = 0;
+            for (std::vector<dealii::types::global_dof_index>::iterator it=renumbering.begin();
+                 it!=renumbering.end(); ++it)
+              if (*it != DoFHandler<dim,spacedim>::invalid_dof_index)
+                *it = number_cache.n_locally_owned_dofs++;
+
+            //* 3. communicate local dofcount and
+            //shift ids to make them unique
+            number_cache.n_locally_owned_dofs_per_processor.resize(n_cpus);
+
+            MPI_Allgather ( &number_cache.n_locally_owned_dofs,
+                            1, DEAL_II_DOF_INDEX_MPI_TYPE,
+                            &number_cache.n_locally_owned_dofs_per_processor[0],
+                            1, DEAL_II_DOF_INDEX_MPI_TYPE,
+                            tr->get_communicator());
+
+            const dealii::types::global_dof_index
+            shift = std::accumulate (number_cache
+                                     .n_locally_owned_dofs_per_processor.begin(),
+                                     number_cache
+                                     .n_locally_owned_dofs_per_processor.begin()
+                                     + tr->locally_owned_subdomain(),
+                                     static_cast<dealii::types::global_dof_index>(0));
+            for (std::vector<dealii::types::global_dof_index>::iterator it=renumbering.begin();
+                 it!=renumbering.end(); ++it)
+              if (*it != DoFHandler<dim,spacedim>::invalid_dof_index)
+                (*it) += shift;
+
+            // now re-enumerate all dofs to
+            // this shifted and condensed
+            // numbering form.  we renumber
+            // some dofs as invalid, so
+            // choose the nocheck-version.
+            Implementation::renumber_mg_dofs (renumbering, IndexSet(0),
+                                              dof_handler, level, false);
+
+            // now a little bit of
+            // housekeeping
+            number_cache.n_global_dofs
+              = std::accumulate (number_cache
+                                 .n_locally_owned_dofs_per_processor.begin(),
+                                 number_cache
+                                 .n_locally_owned_dofs_per_processor.end(),
+                                 static_cast<dealii::types::global_dof_index>(0));
+
+            number_cache.locally_owned_dofs = IndexSet(number_cache.n_global_dofs);
+            number_cache.locally_owned_dofs
+            .add_range(shift,
+                       shift+number_cache.n_locally_owned_dofs);
+            number_cache.locally_owned_dofs.compress();
+
+            // fill global_dof_indexsets
+            number_cache.locally_owned_dofs_per_processor.resize(n_cpus);
+            {
+              dealii::types::global_dof_index lshift = 0;
+              for (unsigned int i=0; i<n_cpus; ++i)
+                {
+                  number_cache.locally_owned_dofs_per_processor[i]
+                    = IndexSet(number_cache.n_global_dofs);
+                  number_cache.locally_owned_dofs_per_processor[i]
+                  .add_range(lshift,
+                             lshift +
+                             number_cache.n_locally_owned_dofs_per_processor[i]);
+                  lshift += number_cache.n_locally_owned_dofs_per_processor[i];
+                }
+            }
+            Assert(number_cache.locally_owned_dofs_per_processor
+                   [tr->locally_owned_subdomain()].n_elements()
+                   ==
+                   number_cache.n_locally_owned_dofs,
+                   ExcInternalError());
+            Assert(!number_cache.locally_owned_dofs_per_processor
+                   [tr->locally_owned_subdomain()].n_elements()
+                   ||
+                   number_cache.locally_owned_dofs_per_processor
+                   [tr->locally_owned_subdomain()].nth_index_in_set(0)
+                   == shift,
+                   ExcInternalError());
+
+            //* 4. send dofids of cells that are
+            //ghostcells on other machines
+            std::vector<bool> user_flags;
+            tr->save_user_flags(user_flags);
+            tr->clear_user_flags ();
+
+            //mark all own cells for transfer
+            if (level < tr->n_levels())
+              {
+                typename DoFHandler<dim,spacedim>::level_cell_iterator
+                cell, endc = dof_handler.end(level);
+                for (cell = dof_handler.begin(level); cell != endc; ++cell)
+                  if (cell->level_subdomain_id() != dealii::numbers::artificial_subdomain_id)
+                    cell->set_user_flag();
+              }
+
+            //mark the vertices we are interested in, i.e. belonging to own
+            //and marked cells
+            const std::vector<bool> locally_active_vertices
+              = tr->mark_locally_active_vertices_on_level (level);
+
+            // add each ghostcells' subdomain to the vertex and keep track of
+            // interesting neighbors
+            std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+            vertices_with_ghost_neighbors;
+            tr->fill_level_vertices_with_ghost_neighbors(level,
+                                                         vertices_with_ghost_neighbors);
+
+            // Send and receive cells. After this, only the local cells are
+            // marked, that received new data. This has to be communicated in
+            // a second communication step.
+
+            communicate_mg_dof_indices_on_marked_cells( dof_handler,
+                                                        vertices_with_ghost_neighbors,
+                                                        tr->coarse_cell_to_p4est_tree_permutation,
+                                                        tr->p4est_tree_to_coarse_cell_permutation,
+                                                        level);
+            communicate_mg_dof_indices_on_marked_cells( dof_handler,
+                                                        vertices_with_ghost_neighbors,
+                                                        tr->coarse_cell_to_p4est_tree_permutation,
+                                                        tr->p4est_tree_to_coarse_cell_permutation,
+                                                        level);
+
+            tr->load_user_flags(user_flags);
+
+#ifdef DEBUG
+            //check that we are really done
+            if (level < tr->n_levels())
+              {
+                std::vector<dealii::types::global_dof_index> local_dof_indices;
+                typename DoFHandler<dim,spacedim>::level_cell_iterator
+                cell, endc = dof_handler.end(level);
+
+                for (cell = dof_handler.begin(level); cell != endc; ++cell)
+                  if (cell->level_subdomain_id() != dealii::numbers::artificial_subdomain_id)
+                    {
+                      local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+                      cell->get_mg_dof_indices (local_dof_indices);
+                      if (local_dof_indices.end() !=
+                          std::find (local_dof_indices.begin(),
+                                     local_dof_indices.end(),
+                                     DoFHandler<dim,spacedim>::invalid_dof_index))
+                        {
+                          Assert(false, ExcMessage ("not all DoFs got distributed!"));
+                        }
+                    }
+              }
+#endif // DEBUG
+
+          }
+
+#endif // DEAL_II_WITH_P4EST
+      }
+
+
+      template <int dim, int spacedim>
+      void
+      ParallelDistributed<dim, spacedim>::
+      renumber_dofs (const std::vector<dealii::types::global_dof_index> &new_numbers,
+                     dealii::DoFHandler<dim,spacedim> &dof_handler,
+                     NumberCache &number_cache_current) const
+      {
+        (void)new_numbers;
+        (void)dof_handler;
+
+        Assert (new_numbers.size() == dof_handler.locally_owned_dofs().n_elements(),
+                ExcInternalError());
+
+        NumberCache number_cache;
+
+#ifndef DEAL_II_WITH_P4EST
+        Assert (false, ExcNotImplemented());
+#else
+
+
+        // calculate new IndexSet. First try to find out if the new indices
+        // are contiguous blocks. This avoids inserting each index
+        // individually into the IndexSet, which is slow.  If we own no DoFs,
+        // we still need to go through this function, but we can skip this
+        // calculation.
+
+        number_cache.locally_owned_dofs = IndexSet (dof_handler.n_dofs());
+        if (dof_handler.locally_owned_dofs().n_elements()>0)
+          {
+            std::vector<dealii::types::global_dof_index> new_numbers_sorted (new_numbers);
+            std::sort(new_numbers_sorted.begin(), new_numbers_sorted.end());
+            std::vector<dealii::types::global_dof_index>::const_iterator it = new_numbers_sorted.begin();
+            const unsigned int n_blocks = dof_handler.get_fe().n_blocks();
+            std::vector<std::pair<dealii::types::global_dof_index,unsigned int> > block_indices(n_blocks);
+            block_indices[0].first = *it++;
+            block_indices[0].second = 1;
+            unsigned int current_block = 0, n_filled_blocks = 1;
+            for ( ; it != new_numbers_sorted.end(); ++it)
+              {
+                bool done = false;
+
+                // search from the current block onwards whether the next
+                // index is shifted by one from the previous one.
+                for (unsigned int i=0; i<n_filled_blocks; ++i)
+                  if (*it == block_indices[current_block].first
+                      +block_indices[current_block].second)
+                    {
+                      block_indices[current_block].second++;
+                      done = true;
+                      break;
+                    }
+                  else
+                    {
+                      if (current_block == n_filled_blocks-1)
+                        current_block = 0;
+                      else
+                        ++current_block;
+                    }
+
+                // could not find any contiguous range: need to add a new
+                // block if possible. Abort otherwise, which will add all
+                // elements individually to the IndexSet.
+                if (done == false)
+                  {
+                    if (n_filled_blocks < n_blocks)
+                      {
+                        block_indices[n_filled_blocks].first = *it;
+                        block_indices[n_filled_blocks].second = 1;
+                        current_block = n_filled_blocks;
+                        ++n_filled_blocks;
+                      }
+                    else
+                      break;
+                  }
+              }
+
+            // check whether all indices could be assigned to blocks. If yes,
+            // we can add the block ranges to the IndexSet, otherwise we need
+            // to go through the indices once again and add each element
+            // individually
+            unsigned int sum = 0;
+            for (unsigned int i=0; i<n_filled_blocks; ++i)
+              sum += block_indices[i].second;
+            if (sum == new_numbers.size())
+              for (unsigned int i=0; i<n_filled_blocks; ++i)
+                number_cache.locally_owned_dofs.add_range (block_indices[i].first,
+                                                           block_indices[i].first+
+                                                           block_indices[i].second);
+            else
+              number_cache.locally_owned_dofs.add_indices(new_numbers_sorted.begin(),
+                                                          new_numbers_sorted.end());
+          }
+
+
+        number_cache.locally_owned_dofs.compress();
+        Assert (number_cache.locally_owned_dofs.n_elements() == new_numbers.size(),
+                ExcInternalError());
+        // also check with the number of locally owned degrees of freedom that
+        // the DoFHandler object still stores
+        Assert (number_cache.locally_owned_dofs.n_elements() ==
+                dof_handler.n_locally_owned_dofs(),
+                ExcInternalError());
+
+        // then also set this number in our own copy
+        number_cache.n_locally_owned_dofs = dof_handler.n_locally_owned_dofs();
+
+        // mark not locally active DoFs as invalid
+        {
+          std::vector<dealii::types::global_dof_index> local_dof_indices;
+
+          typename DoFHandler<dim,spacedim>::active_cell_iterator
+          cell = dof_handler.begin_active(),
+          endc = dof_handler.end();
+
+          for (; cell != endc; ++cell)
+            if (!cell->is_artificial())
+              {
+                local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+                cell->get_dof_indices (local_dof_indices);
+                for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                  {
+                    if (local_dof_indices[i] == DoFHandler<dim,spacedim>::invalid_dof_index)
+                      continue;
+
+                    if (!dof_handler.locally_owned_dofs().is_element(local_dof_indices[i]))
+                      {
+                        //this DoF is not owned by us, so set it to invalid.
+                        local_dof_indices[i]
+                          = DoFHandler<dim,spacedim>::invalid_dof_index;
+                      }
+                  }
+
+                cell->set_dof_indices (local_dof_indices);
+              }
+        }
+
+
+        // renumber. Skip when there is nothing to do because we own no DoF.
+        if (dof_handler.locally_owned_dofs().n_elements() > 0)
+          Implementation::renumber_dofs (new_numbers,
+                                         dof_handler.locally_owned_dofs(),
+                                         dof_handler,
+                                         false);
+
+        // communication
+        {
+          parallel::distributed::Triangulation< dim, spacedim > *tr
+            = (dynamic_cast<parallel::distributed::Triangulation<dim,spacedim>*>
+               (const_cast<dealii::Triangulation< dim, spacedim >*>
+                (&dof_handler.get_triangulation())));
+          Assert (tr != 0, ExcInternalError());
+
+          std::vector<bool> user_flags;
+          tr->save_user_flags(user_flags);
+          tr->clear_user_flags ();
+
+          //mark all own cells for transfer
+          typename DoFHandler<dim,spacedim>::active_cell_iterator
+          cell, endc = dof_handler.end();
+          for (cell = dof_handler.begin_active(); cell != endc; ++cell)
+            if (!cell->is_artificial())
+              cell->set_user_flag();
+
+          // add each ghostcells' subdomain to the vertex and keep track of
+          // interesting neighbors
+          std::map<unsigned int, std::set<dealii::types::subdomain_id> >
+          vertices_with_ghost_neighbors;
+
+          tr->fill_vertices_with_ghost_neighbors (vertices_with_ghost_neighbors);
+
+          // Send and receive cells. After this, only the local cells are
+          // marked, that received new data. This has to be communicated in a
+          // second communication step.
+          communicate_dof_indices_on_marked_cells (dof_handler,
+                                                   vertices_with_ghost_neighbors,
+                                                   tr->coarse_cell_to_p4est_tree_permutation,
+                                                   tr->p4est_tree_to_coarse_cell_permutation);
+
+          communicate_dof_indices_on_marked_cells (dof_handler,
+                                                   vertices_with_ghost_neighbors,
+                                                   tr->coarse_cell_to_p4est_tree_permutation,
+                                                   tr->p4est_tree_to_coarse_cell_permutation);
+
+
+          // * Create global_dof_indexsets by transferring our own owned_dofs
+          // to every other machine.
+          const unsigned int n_cpus = Utilities::MPI::n_mpi_processes (tr->get_communicator());
+
+          // Serialize our IndexSet and determine size.
+          std::ostringstream oss;
+          number_cache.locally_owned_dofs.block_write(oss);
+          std::string oss_str=oss.str();
+          std::vector<char> my_data(oss_str.begin(), oss_str.end());
+          unsigned int my_size = oss_str.size();
+
+          // determine maximum size of IndexSet
+          const unsigned int max_size
+            = Utilities::MPI::max (my_size, tr->get_communicator());
+
+          // as we are reading past the end, we need to increase the size of
+          // the local buffer. This is filled with zeros.
+          my_data.resize(max_size);
+
+          std::vector<char> buffer(max_size*n_cpus);
+          MPI_Allgather(&my_data[0], max_size, MPI_BYTE,
+                        &buffer[0], max_size, MPI_BYTE,
+                        tr->get_communicator());
+
+          number_cache.locally_owned_dofs_per_processor.resize (n_cpus);
+          number_cache.n_locally_owned_dofs_per_processor.resize (n_cpus);
+          for (unsigned int i=0; i<n_cpus; ++i)
+            {
+              std::stringstream strstr;
+              strstr.write(&buffer[i*max_size],max_size);
+              // This does not read the whole buffer, when the size is smaller
+              // than max_size. Therefore we need to create a new stringstream
+              // in each iteration (resetting would be fine too).
+              number_cache.locally_owned_dofs_per_processor[i]
+              .block_read(strstr);
+              number_cache.n_locally_owned_dofs_per_processor[i]
+                = number_cache.locally_owned_dofs_per_processor[i].n_elements();
+            }
+
+          number_cache.n_global_dofs
+            = std::accumulate (number_cache
+                               .n_locally_owned_dofs_per_processor.begin(),
+                               number_cache
+                               .n_locally_owned_dofs_per_processor.end(),
+                               static_cast<dealii::types::global_dof_index>(0));
+
+          tr->load_user_flags(user_flags);
+        }
+#endif
+
+        number_cache_current = number_cache;
+      }
+    }
+  }
+}
+
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "dof_handler_policy.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_handler_policy.inst.in b/source/dofs/dof_handler_policy.inst.in
new file mode 100644
index 0000000..defe7b4
--- /dev/null
+++ b/source/dofs/dof_handler_policy.inst.in
@@ -0,0 +1,47 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+namespace internal
+\{
+  namespace DoFHandler
+  \{
+    namespace Policy
+    \{
+      template class PolicyBase<deal_II_dimension,deal_II_dimension>;
+      template class Sequential<deal_II_dimension,deal_II_dimension>;
+      template class ParallelShared<deal_II_dimension,deal_II_dimension>;
+      template class ParallelDistributed<deal_II_dimension,deal_II_dimension>;
+
+#if deal_II_dimension==1 || deal_II_dimension==2
+      template class PolicyBase<deal_II_dimension,deal_II_dimension+1>;
+      template class Sequential<deal_II_dimension,deal_II_dimension+1>;
+      template class ParallelShared<deal_II_dimension,deal_II_dimension+1>;
+      template class ParallelDistributed<deal_II_dimension,deal_II_dimension+1>;
+#endif
+
+#if deal_II_dimension==3
+      template class PolicyBase<1,3>;
+      template class Sequential<1,3>;
+      template class ParallelShared<1,3>;
+      template class ParallelDistributed<1,3>;
+#endif
+    \}
+  \}
+\}
+
+}
diff --git a/source/dofs/dof_objects.cc b/source/dofs/dof_objects.cc
new file mode 100644
index 0000000..59456fc
--- /dev/null
+++ b/source/dofs/dof_objects.cc
@@ -0,0 +1,73 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/dofs/dof_objects.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/fe/fe.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    template <int dim>
+    std::size_t
+    DoFObjects<dim>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (dofs));
+    }
+
+
+
+    template <int dim>
+    template <int dh_dim, int spacedim>
+    void
+    DoFObjects<dim>::
+    set_dof_index (const dealii::DoFHandler<dh_dim, spacedim> &dof_handler,
+                   const unsigned int       obj_index,
+                   const unsigned int       fe_index,
+                   const unsigned int       local_index,
+                   const types::global_dof_index       global_index)
+    {
+      (void)fe_index;
+      Assert ((fe_index == dealii::DoFHandler<dh_dim, spacedim>::default_fe_index),
+              ExcMessage ("Only the default FE index is allowed for non-hp DoFHandler objects"));
+      Assert (local_index<dof_handler.get_fe().template n_dofs_per_object<dim>(),
+              ExcIndexRange (local_index, 0, dof_handler.get_fe().template n_dofs_per_object<dim>()));
+      Assert (obj_index * dof_handler.get_fe().template n_dofs_per_object<dim>()+local_index
+              <
+              dofs.size(),
+              ExcInternalError());
+
+      dofs[obj_index * dof_handler.get_fe()
+           .template n_dofs_per_object<dim>() + local_index] = global_index;
+    }
+  }
+}
+
+
+// explicit instantiations
+namespace internal
+{
+  namespace DoFHandler
+  {
+#include "dof_objects.inst"
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_objects.inst.in b/source/dofs/dof_objects.inst.in
new file mode 100644
index 0000000..85cf814
--- /dev/null
+++ b/source/dofs/dof_objects.inst.in
@@ -0,0 +1,84 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class DoFObjects<deal_II_dimension>;
+  }
+
+
+for (deal_II_dimension, structdim : DIMENSIONS)
+  {
+    template
+    types::global_dof_index
+    DoFObjects<structdim>::
+    get_dof_index (const dealii::DoFHandler<deal_II_dimension> &dof_handler,
+		   const unsigned int       obj_index,
+		   const unsigned int       fe_index,
+		   const unsigned int       local_index) const;
+
+    template
+    void
+    DoFObjects<structdim>::
+    set_dof_index (const dealii::DoFHandler<deal_II_dimension> &dof_handler,
+		   const unsigned int       obj_index,
+		   const unsigned int       fe_index,
+		   const unsigned int       local_index,
+		   const types::global_dof_index       global_index);
+
+#if (deal_II_dimension < 3) && (structdim < 3)
+
+    template
+    types::global_dof_index
+    DoFObjects<structdim>::
+    get_dof_index (const dealii::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler,
+		   const unsigned int       obj_index,
+		   const unsigned int       fe_index,
+		   const unsigned int       local_index) const;
+
+    template
+    void
+    DoFObjects<structdim>::
+    set_dof_index (const dealii::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler,
+		   const unsigned int       obj_index,
+		   const unsigned int       fe_index,
+		   const unsigned int       local_index,
+		   const types::global_dof_index global_index);
+#endif
+
+
+#if (deal_II_dimension == 3) && (structdim < 3)
+
+    template
+    types::global_dof_index
+    DoFObjects<structdim>::
+    get_dof_index (const dealii::DoFHandler<1,3> &dof_handler,
+		   const unsigned int       obj_index,
+		   const unsigned int       fe_index,
+		   const unsigned int       local_index) const;
+
+    template
+    void
+    DoFObjects<structdim>::
+    set_dof_index (const dealii::DoFHandler<1,3> &dof_handler,
+		   const unsigned int       obj_index,
+		   const unsigned int       fe_index,
+		   const unsigned int       local_index,
+		   const types::global_dof_index       global_index);
+#endif
+  }
+
diff --git a/source/dofs/dof_renumbering.cc b/source/dofs/dof_renumbering.cc
new file mode 100644
index 0000000..ae35017
--- /dev/null
+++ b/source/dofs/dof_renumbering.cc
@@ -0,0 +1,1953 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/types.h>
+#include <deal.II/base/template_constraints.h>
+
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/constraint_matrix.h>
+
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/dofs/dof_renumbering.h>
+
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria.h>
+
+#include <deal.II/fe/fe.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/fe_values.h>
+
+#include <deal.II/multigrid/mg_tools.h>
+
+#include <deal.II/distributed/tria.h>
+
+#include <boost/config.hpp>
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/cuthill_mckee_ordering.hpp>
+#include <boost/graph/king_ordering.hpp>
+#include <boost/graph/minimum_degree_ordering.hpp>
+#include <boost/graph/properties.hpp>
+#include <boost/graph/bandwidth.hpp>
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#include <boost/random.hpp>
+#include <boost/random/uniform_int_distribution.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <cmath>
+#include <functional>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace DoFRenumbering
+{
+  namespace boost
+  {
+    namespace boosttypes
+    {
+      using namespace ::boost;
+      using namespace std;
+
+      typedef adjacency_list<vecS, vecS, undirectedS,
+              property<vertex_color_t, default_color_type,
+              property<vertex_degree_t,int> > > Graph;
+      typedef graph_traits<Graph>::vertex_descriptor Vertex;
+      typedef graph_traits<Graph>::vertices_size_type size_type;
+
+      typedef std::pair<size_type, size_type> Pair;
+    }
+
+
+    namespace internal
+    {
+      template <typename DoFHandlerType>
+      void create_graph
+      (const DoFHandlerType                                                          &dof_handler,
+       const bool                                                                     use_constraints,
+       boosttypes::Graph                                                             &graph,
+       boosttypes::property_map<boosttypes::Graph,boosttypes::vertex_degree_t>::type &graph_degree)
+      {
+        {
+          // create intermediate sparsity pattern
+          // (faster than directly submitting
+          // indices)
+          ConstraintMatrix constraints;
+          if (use_constraints)
+            DoFTools::make_hanging_node_constraints (dof_handler, constraints);
+          constraints.close ();
+          DynamicSparsityPattern dsp (dof_handler.n_dofs(),
+                                      dof_handler.n_dofs());
+          DoFTools::make_sparsity_pattern (dof_handler, dsp, constraints);
+
+          // submit the entries to the boost graph
+          for (unsigned int row=0; row<dsp.n_rows(); ++row)
+            for (unsigned int col=0; col < dsp.row_length(row); ++col)
+              add_edge (row, dsp.column_number (row, col), graph);
+        }
+
+        boosttypes::graph_traits<boosttypes::Graph>::vertex_iterator ui, ui_end;
+
+        graph_degree = get(::boost::vertex_degree, graph);
+        for (::boost::tie(ui, ui_end) = vertices(graph); ui != ui_end; ++ui)
+          graph_degree[*ui] = degree(*ui, graph);
+      }
+    }
+
+
+    template <typename DoFHandlerType>
+    void
+    Cuthill_McKee (DoFHandlerType  &dof_handler,
+                   const bool       reversed_numbering,
+                   const bool       use_constraints)
+    {
+      std::vector<types::global_dof_index> renumbering(dof_handler.n_dofs(),
+                                                       DoFHandlerType::invalid_dof_index);
+      compute_Cuthill_McKee(renumbering, dof_handler, reversed_numbering,
+                            use_constraints);
+
+      // actually perform renumbering;
+      // this is dimension specific and
+      // thus needs an own function
+      dof_handler.renumber_dofs (renumbering);
+    }
+
+
+    template <typename DoFHandlerType>
+    void
+    compute_Cuthill_McKee (std::vector<types::global_dof_index> &new_dof_indices,
+                           const DoFHandlerType                 &dof_handler,
+                           const bool                            reversed_numbering,
+                           const bool                            use_constraints)
+    {
+      boosttypes::Graph
+      graph(dof_handler.n_dofs());
+      boosttypes::property_map<boosttypes::Graph,boosttypes::vertex_degree_t>::type
+      graph_degree;
+
+      internal::create_graph (dof_handler, use_constraints, graph, graph_degree);
+
+      boosttypes::property_map<boosttypes::Graph, boosttypes::vertex_index_t>::type
+      index_map = get(::boost::vertex_index, graph);
+
+
+      std::vector<boosttypes::Vertex> inv_perm(num_vertices(graph));
+
+      if (reversed_numbering == false)
+        ::boost::cuthill_mckee_ordering(graph, inv_perm.rbegin(),
+                                        get(::boost::vertex_color, graph),
+                                        make_degree_map(graph));
+      else
+        ::boost::cuthill_mckee_ordering(graph, inv_perm.begin(),
+                                        get(::boost::vertex_color, graph),
+                                        make_degree_map(graph));
+
+      for (boosttypes::size_type c = 0; c != inv_perm.size(); ++c)
+        new_dof_indices[index_map[inv_perm[c]]] = c;
+
+      Assert (std::find (new_dof_indices.begin(), new_dof_indices.end(),
+                         DoFHandlerType::invalid_dof_index) == new_dof_indices.end(),
+              ExcInternalError());
+    }
+
+
+
+    template <typename DoFHandlerType>
+    void
+    king_ordering (DoFHandlerType  &dof_handler,
+                   const bool       reversed_numbering,
+                   const bool       use_constraints)
+    {
+      std::vector<types::global_dof_index> renumbering(dof_handler.n_dofs(),
+                                                       DoFHandlerType::invalid_dof_index);
+      compute_king_ordering(renumbering, dof_handler, reversed_numbering,
+                            use_constraints);
+
+      // actually perform renumbering;
+      // this is dimension specific and
+      // thus needs an own function
+      dof_handler.renumber_dofs (renumbering);
+    }
+
+
+    template <typename DoFHandlerType>
+    void
+    compute_king_ordering (std::vector<types::global_dof_index> &new_dof_indices,
+                           const DoFHandlerType                 &dof_handler,
+                           const bool                            reversed_numbering,
+                           const bool                            use_constraints)
+    {
+      boosttypes::Graph
+      graph(dof_handler.n_dofs());
+      boosttypes::property_map<boosttypes::Graph,boosttypes::vertex_degree_t>::type
+      graph_degree;
+
+      internal::create_graph (dof_handler, use_constraints, graph, graph_degree);
+
+      boosttypes::property_map<boosttypes::Graph, boosttypes::vertex_index_t>::type
+      index_map = get(::boost::vertex_index, graph);
+
+
+      std::vector<boosttypes::Vertex> inv_perm(num_vertices(graph));
+
+      if (reversed_numbering == false)
+        ::boost::king_ordering(graph, inv_perm.rbegin());
+      else
+        ::boost::king_ordering(graph, inv_perm.begin());
+
+      for (boosttypes::size_type c = 0; c != inv_perm.size(); ++c)
+        new_dof_indices[index_map[inv_perm[c]]] = c;
+
+      Assert (std::find (new_dof_indices.begin(), new_dof_indices.end(),
+                         DoFHandlerType::invalid_dof_index) == new_dof_indices.end(),
+              ExcInternalError());
+    }
+
+
+
+    template <typename DoFHandlerType>
+    void
+    minimum_degree (DoFHandlerType  &dof_handler,
+                    const bool       reversed_numbering,
+                    const bool       use_constraints)
+    {
+      std::vector<types::global_dof_index> renumbering(dof_handler.n_dofs(),
+                                                       DoFHandlerType::invalid_dof_index);
+      compute_minimum_degree(renumbering, dof_handler, reversed_numbering,
+                             use_constraints);
+
+      // actually perform renumbering;
+      // this is dimension specific and
+      // thus needs an own function
+      dof_handler.renumber_dofs (renumbering);
+    }
+
+
+    template <typename DoFHandlerType>
+    void
+    compute_minimum_degree (std::vector<types::global_dof_index> &new_dof_indices,
+                            const DoFHandlerType                 &dof_handler,
+                            const bool                            reversed_numbering,
+                            const bool                            use_constraints)
+    {
+      (void)use_constraints;
+      Assert (use_constraints == false, ExcNotImplemented());
+
+      // the following code is pretty
+      // much a verbatim copy of the
+      // sample code for the
+      // minimum_degree_ordering manual
+      // page from the BOOST Graph
+      // Library
+      using namespace ::boost;
+
+      int delta = 0;
+
+      typedef double Type;
+
+      // must be BGL directed graph now
+      typedef adjacency_list<vecS, vecS, directedS>  Graph;
+      typedef graph_traits<Graph>::vertex_descriptor Vertex;
+
+      int n = dof_handler.n_dofs();
+
+      Graph G(n);
+
+      std::vector<dealii::types::global_dof_index> dofs_on_this_cell;
+
+      typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                    endc = dof_handler.end();
+
+      for (; cell!=endc; ++cell)
+        {
+
+          const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+
+          dofs_on_this_cell.resize (dofs_per_cell);
+
+          cell->get_active_or_mg_dof_indices (dofs_on_this_cell);
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              if (dofs_on_this_cell[i] > dofs_on_this_cell[j])
+                {
+                  add_edge (dofs_on_this_cell[i], dofs_on_this_cell[j], G);
+                  add_edge (dofs_on_this_cell[j], dofs_on_this_cell[i], G);
+                }
+        }
+
+
+      typedef std::vector<int> Vector;
+
+
+      Vector inverse_perm(n, 0);
+
+      Vector perm(n, 0);
+
+
+      Vector supernode_sizes(n, 1);
+      // init has to be 1
+
+      ::boost::property_map<Graph, vertex_index_t>::type
+      id = get(vertex_index, G);
+
+
+      Vector degree(n, 0);
+
+
+      minimum_degree_ordering
+      (G,
+       make_iterator_property_map(&degree[0], id, degree[0]),
+       &inverse_perm[0],
+       &perm[0],
+       make_iterator_property_map(&supernode_sizes[0], id, supernode_sizes[0]),
+       delta, id);
+
+
+      for (int i=0; i<n; ++i)
+        {
+          Assert (std::find (perm.begin(), perm.end(), i)
+                  != perm.end(),
+                  ExcInternalError());
+          Assert (std::find (inverse_perm.begin(), inverse_perm.end(), i)
+                  != inverse_perm.end(),
+                  ExcInternalError());
+          Assert (inverse_perm[perm[i]] == i, ExcInternalError());
+        }
+
+      if (reversed_numbering == true)
+        std::copy (perm.begin(), perm.end(),
+                   new_dof_indices.begin());
+      else
+        std::copy (inverse_perm.begin(), inverse_perm.end(),
+                   new_dof_indices.begin());
+    }
+
+  }  // namespace boost
+
+
+
+  template <typename DoFHandlerType>
+  void
+  Cuthill_McKee (DoFHandlerType                             &dof_handler,
+                 const bool                                  reversed_numbering,
+                 const bool                                  use_constraints,
+                 const std::vector<types::global_dof_index> &starting_indices)
+  {
+    std::vector<types::global_dof_index> renumbering(dof_handler.locally_owned_dofs().n_elements(),
+                                                     DoFHandlerType::invalid_dof_index);
+    compute_Cuthill_McKee(renumbering, dof_handler, reversed_numbering,
+                          use_constraints, starting_indices);
+
+    // actually perform renumbering;
+    // this is dimension specific and
+    // thus needs an own function
+    dof_handler.renumber_dofs (renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_Cuthill_McKee (std::vector<types::global_dof_index>       &new_indices,
+                         const DoFHandlerType                       &dof_handler,
+                         const bool                                  reversed_numbering,
+                         const bool                                  use_constraints,
+                         const std::vector<types::global_dof_index> &starting_indices)
+  {
+    // see if there is anything to do at all or whether we can skip the work on this processor
+    if (dof_handler.locally_owned_dofs().n_elements() == 0)
+      {
+        Assert (new_indices.size() == 0, ExcInternalError());
+        return;
+      }
+
+    // make the connection graph. in 2d/3d use an intermediate compressed
+    // sparsity pattern since the we don't have very good estimates for
+    // max_couplings_between_dofs() in 3d and this then leads to excessive
+    // memory consumption
+    //
+    // note that if constraints are not requested, then the 'constraints'
+    // object will be empty and nothing happens
+    ConstraintMatrix constraints;
+    if (use_constraints)
+      DoFTools::make_hanging_node_constraints (dof_handler, constraints);
+    constraints.close ();
+
+    const IndexSet locally_owned = dof_handler.locally_owned_dofs();
+
+    // otherwise compute the Cuthill-McKee permutation
+    DynamicSparsityPattern dsp (dof_handler.n_dofs(),
+                                dof_handler.n_dofs(),
+                                locally_owned);
+    DoFTools::make_sparsity_pattern (dof_handler, dsp, constraints);
+
+    // constraints are not needed anymore
+    constraints.clear ();
+
+    // If the index set is not complete, need to get indices in local index
+    // space.
+    if (locally_owned.n_elements() !=
+        locally_owned.size())
+      {
+        // Create sparsity pattern from dsp by transferring its indices to
+        // processor-local index space and doing Cuthill-McKee there
+        DynamicSparsityPattern sparsity(locally_owned.n_elements(),
+                                        locally_owned.n_elements());
+        std::vector<types::global_dof_index> row_entries;
+        for (unsigned int i=0; i<locally_owned.n_elements(); ++i)
+          {
+            const types::global_dof_index row = locally_owned.nth_index_in_set(i);
+            row_entries.clear();
+            for (DynamicSparsityPattern::iterator it =
+                   dsp.begin(row); it != dsp.end(row); ++it)
+              if (it->column() != row && locally_owned.is_element(it->column()))
+                row_entries.push_back(locally_owned.index_within_set(it->column()));
+            sparsity.add_entries(i, row_entries.begin(), row_entries.end(),
+                                 true);
+          }
+
+        // translate starting indices from global to local indices
+        std::vector<types::global_dof_index> local_starting_indices (starting_indices.size());
+        for (unsigned int i=0; i<starting_indices.size(); ++i)
+          {
+            Assert (locally_owned.is_element (starting_indices[i]),
+                    ExcMessage ("You specified global degree of freedom "
+                                + Utilities::to_string(starting_indices[i]) +
+                                " as a starting index, but this index is not among the "
+                                "locally owned ones on this processor."));
+            local_starting_indices[i] = locally_owned.index_within_set(starting_indices[i]);
+          }
+
+        // then do the renumbering on the locally owned portion
+        AssertDimension(new_indices.size(), locally_owned.n_elements());
+        SparsityTools::reorder_Cuthill_McKee (sparsity, new_indices,
+                                              local_starting_indices);
+        if (reversed_numbering)
+          new_indices = Utilities::reverse_permutation (new_indices);
+
+        // convert indices back to global index space
+        for (std::size_t i=0; i<new_indices.size(); ++i)
+          new_indices[i] = locally_owned.nth_index_in_set(new_indices[i]);
+      }
+    else
+      {
+        AssertDimension(new_indices.size(), dsp.n_rows());
+        SparsityTools::reorder_Cuthill_McKee (dsp, new_indices,
+                                              starting_indices);
+        if (reversed_numbering)
+          new_indices = Utilities::reverse_permutation (new_indices);
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void Cuthill_McKee (DoFHandlerType                             &dof_handler,
+                      const unsigned int                          level,
+                      const bool                                  reversed_numbering,
+                      const std::vector<types::global_dof_index> &starting_indices)
+  {
+    Assert(dof_handler.n_dofs(level) != numbers::invalid_dof_index,
+           ExcNotInitialized());
+
+    // make the connection graph
+    DynamicSparsityPattern dsp (dof_handler.n_dofs(level),
+                                dof_handler.n_dofs(level));
+    MGTools::make_sparsity_pattern (dof_handler, dsp, level);
+
+    std::vector<types::global_dof_index> new_indices(dsp.n_rows());
+    SparsityTools::reorder_Cuthill_McKee (dsp, new_indices,
+                                          starting_indices);
+
+    if (reversed_numbering)
+      new_indices = Utilities::reverse_permutation (new_indices);
+
+    // actually perform renumbering;
+    // this is dimension specific and
+    // thus needs an own function
+    dof_handler.renumber_dofs (level, new_indices);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  component_wise (DoFHandler<dim,spacedim>        &dof_handler,
+                  const std::vector<unsigned int> &component_order_arg)
+  {
+    std::vector<types::global_dof_index> renumbering (dof_handler.n_locally_owned_dofs(),
+                                                      DoFHandler<dim>::invalid_dof_index);
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator
+    start = dof_handler.begin_active();
+    const typename DoFHandler<dim,spacedim>::level_cell_iterator
+    end = dof_handler.end();
+
+    const types::global_dof_index result =
+      compute_component_wise<dim,spacedim,
+      typename DoFHandler<dim,spacedim>::active_cell_iterator,
+      typename DoFHandler<dim,spacedim>::level_cell_iterator>
+      (renumbering, start, end, component_order_arg, false);
+    if (result == 0)
+      return;
+
+    // verify that the last numbered
+    // degree of freedom is either
+    // equal to the number of degrees
+    // of freedom in total (the
+    // sequential case) or in the
+    // distributed case at least
+    // makes sense
+    Assert ((result == dof_handler.n_locally_owned_dofs())
+            ||
+            ((dof_handler.n_locally_owned_dofs() < dof_handler.n_dofs())
+             &&
+             (result <= dof_handler.n_dofs())),
+            ExcRenumberingIncomplete());
+
+    dof_handler.renumber_dofs (renumbering);
+
+    // for (unsigned int level=0;level<dof_handler.get_triangulation().n_levels();++level)
+    //   if (dof_handler.n_dofs(level) != numbers::invalid_dof_index)
+    //  component_wise(dof_handler, level, component_order_arg);
+  }
+
+
+
+  template <int dim>
+  void
+  component_wise (hp::DoFHandler<dim>             &dof_handler,
+                  const std::vector<unsigned int> &component_order_arg)
+  {
+//TODO: Merge with previous function
+    std::vector<types::global_dof_index> renumbering (dof_handler.n_dofs(),
+                                                      hp::DoFHandler<dim>::invalid_dof_index);
+
+    typename hp::DoFHandler<dim>::active_cell_iterator
+    start = dof_handler.begin_active();
+    const typename hp::DoFHandler<dim>::level_cell_iterator
+    end = dof_handler.end();
+
+    const types::global_dof_index result =
+      compute_component_wise<hp::DoFHandler<dim>::dimension,hp::DoFHandler<dim>::space_dimension,
+      typename hp::DoFHandler<dim>::active_cell_iterator,
+      typename hp::DoFHandler<dim>::level_cell_iterator>
+      (renumbering, start, end, component_order_arg, false);
+
+    if (result == 0) return;
+
+    Assert (result == dof_handler.n_dofs(),
+            ExcRenumberingIncomplete());
+
+    dof_handler.renumber_dofs (renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  component_wise (DoFHandlerType                  &dof_handler,
+                  const unsigned int               level,
+                  const std::vector<unsigned int> &component_order_arg)
+  {
+    Assert(dof_handler.n_dofs(level) != numbers::invalid_dof_index,
+           ExcNotInitialized());
+
+    std::vector<types::global_dof_index> renumbering (dof_handler.n_dofs(level),
+                                                      DoFHandlerType::invalid_dof_index);
+
+    typename DoFHandlerType::level_cell_iterator start =dof_handler.begin(level);
+    typename DoFHandlerType::level_cell_iterator end = dof_handler.end(level);
+
+    const types::global_dof_index result =
+      compute_component_wise<DoFHandlerType::dimension, DoFHandlerType::space_dimension,
+      typename DoFHandlerType::level_cell_iterator, typename DoFHandlerType::level_cell_iterator>
+      (renumbering, start, end, component_order_arg, true);
+
+    if (result == 0) return;
+
+    Assert (result == dof_handler.n_dofs(level),
+            ExcRenumberingIncomplete());
+
+    if (renumbering.size()!=0)
+      dof_handler.renumber_dofs (level, renumbering);
+  }
+
+
+
+  template <int dim, int spacedim, class ITERATOR, class ENDITERATOR>
+  types::global_dof_index
+  compute_component_wise (std::vector<types::global_dof_index> &new_indices,
+                          const ITERATOR    &start,
+                          const ENDITERATOR &end,
+                          const std::vector<unsigned int> &component_order_arg,
+                          bool is_level_operation)
+  {
+    const hp::FECollection<dim,spacedim>
+    fe_collection (start->get_dof_handler().get_fe ());
+
+    // do nothing if the FE has only
+    // one component
+    if (fe_collection.n_components() == 1)
+      {
+        new_indices.resize(0);
+        return 0;
+      }
+
+    // Copy last argument into a
+    // writable vector.
+    std::vector<unsigned int> component_order (component_order_arg);
+    // If the last argument was an
+    // empty vector, set up things to
+    // store components in the order
+    // found in the system.
+    if (component_order.size() == 0)
+      for (unsigned int i=0; i<fe_collection.n_components(); ++i)
+        component_order.push_back (i);
+
+    Assert (component_order.size() == fe_collection.n_components(),
+            ExcDimensionMismatch(component_order.size(), fe_collection.n_components()));
+
+    for (unsigned int i=0; i<component_order.size(); ++i)
+      Assert(component_order[i] < fe_collection.n_components(),
+             ExcIndexRange(component_order[i], 0, fe_collection.n_components()));
+
+    // vector to hold the dof indices on
+    // the cell we visit at a time
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    // prebuilt list to which component
+    // a given dof on a cell
+    // should go. note that we get into
+    // trouble here if the shape
+    // function is not primitive, since
+    // then there is no single vector
+    // component to which it
+    // belongs. in this case, assign it
+    // to the first vector component to
+    // which it belongs
+    std::vector<std::vector<unsigned int> > component_list (fe_collection.size());
+    for (unsigned int f=0; f<fe_collection.size(); ++f)
+      {
+        const FiniteElement<dim,spacedim> &fe = fe_collection[f];
+        const unsigned int dofs_per_cell = fe.dofs_per_cell;
+        component_list[f].resize(dofs_per_cell);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          if (fe.is_primitive(i))
+            component_list[f][i]
+              = component_order[fe.system_to_component_index(i).first];
+          else
+            {
+              const unsigned int comp
+                = fe.get_nonzero_components(i).first_selected_component();
+
+              // then associate this degree
+              // of freedom with this
+              // component
+              component_list[f][i] = component_order[comp];
+            }
+      }
+
+    // set up a map where for each
+    // component the respective degrees
+    // of freedom are collected.
+    //
+    // note that this map is sorted by
+    // component but that within each
+    // component it is NOT sorted by
+    // dof index. note also that some
+    // dof indices are entered
+    // multiply, so we will have to
+    // take care of that
+    std::vector<std::vector<types::global_dof_index> >
+    component_to_dof_map (fe_collection.n_components());
+    for (ITERATOR cell=start; cell!=end; ++cell)
+      {
+        if (is_level_operation)
+          {
+            //we are dealing with mg dofs, skip foreign level cells:
+            if (!cell->is_locally_owned_on_level())
+              continue;
+          }
+        else
+          {
+            //we are dealing with active dofs, skip the loop if not locally
+            // owned:
+            if (!cell->is_locally_owned())
+              continue;
+          }
+        // on each cell: get dof indices
+        // and insert them into the global
+        // list using their component
+        const unsigned int fe_index = cell->active_fe_index();
+        const unsigned int dofs_per_cell = fe_collection[fe_index].dofs_per_cell;
+        local_dof_indices.resize (dofs_per_cell);
+        cell->get_active_or_mg_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          if (start->get_dof_handler().locally_owned_dofs().is_element(local_dof_indices[i]))
+            component_to_dof_map[component_list[fe_index][i]].
+            push_back (local_dof_indices[i]);
+      }
+
+    // now we've got all indices sorted
+    // into buckets labeled by their
+    // target component number. we've
+    // only got to traverse this list
+    // and assign the new indices
+    //
+    // however, we first want to sort
+    // the indices entered into the
+    // buckets to preserve the order
+    // within each component and during
+    // this also remove duplicate
+    // entries
+    //
+    // note that we no longer have to
+    // care about non-primitive shape
+    // functions since the buckets
+    // corresponding to the second and
+    // following vector components of a
+    // non-primitive FE will simply be
+    // empty, everything being shoved
+    // into the first one. The same
+    // holds if several components were
+    // joined into a single target.
+    for (unsigned int component=0; component<fe_collection.n_components();
+         ++component)
+      {
+        std::sort (component_to_dof_map[component].begin(),
+                   component_to_dof_map[component].end());
+        component_to_dof_map[component]
+        .erase (std::unique (component_to_dof_map[component].begin(),
+                             component_to_dof_map[component].end()),
+                component_to_dof_map[component].end());
+      }
+
+    // calculate the number of locally owned
+    // DoFs per bucket
+    const unsigned int n_buckets = fe_collection.n_components();
+    std::vector<types::global_dof_index> shifts(n_buckets);
+
+    if (const parallel::Triangulation<dim,spacedim> *tria
+        = (dynamic_cast<const parallel::Triangulation<dim,spacedim>*>
+           (&start->get_dof_handler().get_triangulation())))
+      {
+#ifdef DEAL_II_WITH_MPI
+        std::vector<types::global_dof_index> local_dof_count(n_buckets);
+
+        for (unsigned int c=0; c<n_buckets; ++c)
+          local_dof_count[c] = component_to_dof_map[c].size();
+
+
+        // gather information from all CPUs
+        std::vector<types::global_dof_index>
+        all_dof_counts(fe_collection.n_components() *
+                       Utilities::MPI::n_mpi_processes (tria->get_communicator()));
+
+        MPI_Allgather ( &local_dof_count[0],
+                        n_buckets, DEAL_II_DOF_INDEX_MPI_TYPE,
+                        &all_dof_counts[0],
+                        n_buckets, DEAL_II_DOF_INDEX_MPI_TYPE,
+                        tria->get_communicator());
+
+        for (unsigned int i=0; i<n_buckets; ++i)
+          Assert (all_dof_counts[n_buckets*tria->locally_owned_subdomain()+i]
+                  ==
+                  local_dof_count[i],
+                  ExcInternalError());
+
+        //calculate shifts
+        unsigned int cumulated = 0;
+        for (unsigned int c=0; c<n_buckets; ++c)
+          {
+            shifts[c]=cumulated;
+            for (types::subdomain_id i=0; i<tria->locally_owned_subdomain(); ++i)
+              shifts[c] += all_dof_counts[c+n_buckets*i];
+            for (unsigned int i=0; i<Utilities::MPI::n_mpi_processes (tria->get_communicator()); ++i)
+              cumulated += all_dof_counts[c+n_buckets*i];
+          }
+#else
+        (void)tria;
+        Assert (false, ExcInternalError());
+#endif
+      }
+    else
+      {
+        shifts[0] = 0;
+        for (unsigned int c=1; c<fe_collection.n_components(); ++c)
+          shifts[c] = shifts[c-1] + component_to_dof_map[c-1].size();
+      }
+
+
+
+
+    // now concatenate all the
+    // components in the order the user
+    // desired to see
+    types::global_dof_index next_free_index = 0;
+    for (unsigned int component=0; component<fe_collection.n_components(); ++component)
+      {
+        const typename std::vector<types::global_dof_index>::const_iterator
+        begin_of_component = component_to_dof_map[component].begin(),
+        end_of_component   = component_to_dof_map[component].end();
+
+        next_free_index = shifts[component];
+
+        for (typename std::vector<types::global_dof_index>::const_iterator
+             dof_index = begin_of_component;
+             dof_index != end_of_component; ++dof_index)
+          {
+            Assert (start->get_dof_handler().locally_owned_dofs()
+                    .index_within_set(*dof_index)
+                    <
+                    new_indices.size(),
+                    ExcInternalError());
+            new_indices[start->get_dof_handler().locally_owned_dofs()
+                        .index_within_set(*dof_index)]
+              = next_free_index++;
+          }
+      }
+
+    return next_free_index;
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  block_wise (DoFHandler<dim,spacedim> &dof_handler)
+  {
+    std::vector<types::global_dof_index> renumbering (dof_handler.n_locally_owned_dofs(),
+                                                      DoFHandler<dim>::invalid_dof_index);
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator
+    start = dof_handler.begin_active();
+    const typename DoFHandler<dim,spacedim>::level_cell_iterator
+    end = dof_handler.end();
+
+    const types::global_dof_index result =
+      compute_block_wise<dim, spacedim, typename DoFHandler<dim,spacedim>::active_cell_iterator,
+      typename DoFHandler<dim,spacedim>::level_cell_iterator>
+      (renumbering, start, end, false);
+    if (result == 0)
+      return;
+
+    // verify that the last numbered
+    // degree of freedom is either
+    // equal to the number of degrees
+    // of freedom in total (the
+    // sequential case) or in the
+    // distributed case at least
+    // makes sense
+    Assert ((result == dof_handler.n_locally_owned_dofs())
+            ||
+            ((dof_handler.n_locally_owned_dofs() < dof_handler.n_dofs())
+             &&
+             (result <= dof_handler.n_dofs())),
+            ExcRenumberingIncomplete());
+
+    dof_handler.renumber_dofs (renumbering);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  block_wise (hp::DoFHandler<dim,spacedim> &dof_handler)
+  {
+    std::vector<types::global_dof_index> renumbering (dof_handler.n_dofs(),
+                                                      hp::DoFHandler<dim,spacedim>::invalid_dof_index);
+
+    typename hp::DoFHandler<dim,spacedim>::active_cell_iterator
+    start = dof_handler.begin_active();
+    const typename hp::DoFHandler<dim,spacedim>::level_cell_iterator
+    end = dof_handler.end();
+
+    const types::global_dof_index result =
+      compute_block_wise<dim, spacedim, typename hp::DoFHandler<dim,spacedim>::active_cell_iterator,
+      typename hp::DoFHandler<dim,spacedim>::level_cell_iterator>(renumbering,
+          start, end, false);
+
+    if (result == 0)
+      return;
+
+    Assert (result == dof_handler.n_dofs(),
+            ExcRenumberingIncomplete());
+
+    dof_handler.renumber_dofs (renumbering);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  block_wise (DoFHandler<dim,spacedim> &dof_handler, const unsigned int level)
+  {
+    Assert(dof_handler.n_dofs(level) != numbers::invalid_dof_index,
+           ExcNotInitialized());
+
+    std::vector<types::global_dof_index> renumbering (dof_handler.n_dofs(level),
+                                                      DoFHandler<dim, spacedim>::invalid_dof_index);
+
+    typename DoFHandler<dim, spacedim>::level_cell_iterator
+    start =dof_handler.begin(level);
+    typename DoFHandler<dim, spacedim>::level_cell_iterator
+    end = dof_handler.end(level);
+
+    const types::global_dof_index result =
+      compute_block_wise<dim, spacedim, typename DoFHandler<dim, spacedim>::level_cell_iterator,
+      typename DoFHandler<dim, spacedim>::level_cell_iterator>(
+        renumbering, start, end, true);
+
+    if (result == 0) return;
+
+    Assert (result == dof_handler.n_dofs(level),
+            ExcRenumberingIncomplete());
+
+    if (renumbering.size()!=0)
+      dof_handler.renumber_dofs (level, renumbering);
+  }
+
+
+
+  template <int dim, int spacedim, class ITERATOR, class ENDITERATOR>
+  types::global_dof_index
+  compute_block_wise (std::vector<types::global_dof_index> &new_indices,
+                      const ITERATOR    &start,
+                      const ENDITERATOR &end,
+                      const bool is_level_operation)
+  {
+    const hp::FECollection<dim,spacedim>
+    fe_collection (start->get_dof_handler().get_fe ());
+
+    // do nothing if the FE has only
+    // one component
+    if (fe_collection.n_blocks() == 1)
+      {
+        new_indices.resize(0);
+        return 0;
+      }
+
+    // vector to hold the dof indices on
+    // the cell we visit at a time
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    // prebuilt list to which block
+    // a given dof on a cell
+    // should go.
+    std::vector<std::vector<types::global_dof_index> > block_list (fe_collection.size());
+    for (unsigned int f=0; f<fe_collection.size(); ++f)
+      {
+        const FiniteElement<dim,spacedim> &fe = fe_collection[f];
+        block_list[f].resize(fe.dofs_per_cell);
+        for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+          block_list[f][i]
+            = fe.system_to_block_index(i).first;
+      }
+
+    // set up a map where for each
+    // block the respective degrees
+    // of freedom are collected.
+    //
+    // note that this map is sorted by
+    // block but that within each
+    // block it is NOT sorted by
+    // dof index. note also that some
+    // dof indices are entered
+    // multiply, so we will have to
+    // take care of that
+    std::vector<std::vector<types::global_dof_index> >
+    block_to_dof_map (fe_collection.n_blocks());
+    for (ITERATOR cell=start; cell!=end; ++cell)
+      {
+        if (is_level_operation)
+          {
+            //we are dealing with mg dofs, skip foreign level cells:
+            if (!cell->is_locally_owned_on_level())
+              continue;
+          }
+        else
+          {
+            //we are dealing with active dofs, skip the loop if not locally
+            // owned:
+            if (!cell->is_locally_owned())
+              continue;
+          }
+
+        // on each cell: get dof indices
+        // and insert them into the global
+        // list using their component
+        const unsigned int fe_index = cell->active_fe_index();
+        const unsigned int dofs_per_cell =fe_collection[fe_index].dofs_per_cell;
+        local_dof_indices.resize (dofs_per_cell);
+        cell->get_active_or_mg_dof_indices (local_dof_indices);
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          if (start->get_dof_handler().locally_owned_dofs().is_element(local_dof_indices[i]))
+            block_to_dof_map[block_list[fe_index][i]].
+            push_back (local_dof_indices[i]);
+      }
+
+    // now we've got all indices sorted
+    // into buckets labeled by their
+    // target block number. we've
+    // only got to traverse this list
+    // and assign the new indices
+    //
+    // however, we first want to sort
+    // the indices entered into the
+    // buckets to preserve the order
+    // within each component and during
+    // this also remove duplicate
+    // entries
+    for (unsigned int block=0; block<fe_collection.n_blocks();
+         ++block)
+      {
+        std::sort (block_to_dof_map[block].begin(),
+                   block_to_dof_map[block].end());
+        block_to_dof_map[block]
+        .erase (std::unique (block_to_dof_map[block].begin(),
+                             block_to_dof_map[block].end()),
+                block_to_dof_map[block].end());
+      }
+
+    // calculate the number of locally owned
+    // DoFs per bucket
+    const unsigned int n_buckets = fe_collection.n_blocks();
+    std::vector<types::global_dof_index> shifts(n_buckets);
+
+    if (const parallel::Triangulation<dim,spacedim> *tria
+        = (dynamic_cast<const parallel::Triangulation<dim,spacedim>*>
+           (&start->get_dof_handler().get_triangulation())))
+      {
+#ifdef DEAL_II_WITH_MPI
+        std::vector<types::global_dof_index> local_dof_count(n_buckets);
+
+        for (unsigned int c=0; c<n_buckets; ++c)
+          local_dof_count[c] = block_to_dof_map[c].size();
+
+
+        // gather information from all CPUs
+        std::vector<types::global_dof_index>
+        all_dof_counts(fe_collection.n_components() *
+                       Utilities::MPI::n_mpi_processes (tria->get_communicator()));
+
+        MPI_Allgather ( &local_dof_count[0],
+                        n_buckets, DEAL_II_DOF_INDEX_MPI_TYPE,
+                        &all_dof_counts[0],
+                        n_buckets, DEAL_II_DOF_INDEX_MPI_TYPE,
+                        tria->get_communicator());
+
+        for (unsigned int i=0; i<n_buckets; ++i)
+          Assert (all_dof_counts[n_buckets*tria->locally_owned_subdomain()+i]
+                  ==
+                  local_dof_count[i],
+                  ExcInternalError());
+
+        //calculate shifts
+        types::global_dof_index cumulated = 0;
+        for (unsigned int c=0; c<n_buckets; ++c)
+          {
+            shifts[c]=cumulated;
+            for (types::subdomain_id i=0; i<tria->locally_owned_subdomain(); ++i)
+              shifts[c] += all_dof_counts[c+n_buckets*i];
+            for (unsigned int i=0; i<Utilities::MPI::n_mpi_processes (tria->get_communicator()); ++i)
+              cumulated += all_dof_counts[c+n_buckets*i];
+          }
+#else
+        (void)tria;
+        Assert (false, ExcInternalError());
+#endif
+      }
+    else
+      {
+        shifts[0] = 0;
+        for (unsigned int c=1; c<fe_collection.n_blocks(); ++c)
+          shifts[c] = shifts[c-1] + block_to_dof_map[c-1].size();
+      }
+
+
+
+
+    // now concatenate all the
+    // components in the order the user
+    // desired to see
+    types::global_dof_index next_free_index = 0;
+    for (unsigned int block=0; block<fe_collection.n_blocks(); ++block)
+      {
+        const typename std::vector<types::global_dof_index>::const_iterator
+        begin_of_component = block_to_dof_map[block].begin(),
+        end_of_component   = block_to_dof_map[block].end();
+
+        next_free_index = shifts[block];
+
+        for (typename std::vector<types::global_dof_index>::const_iterator
+             dof_index = begin_of_component;
+             dof_index != end_of_component; ++dof_index)
+          {
+            Assert (start->get_dof_handler().locally_owned_dofs()
+                    .index_within_set(*dof_index)
+                    <
+                    new_indices.size(),
+                    ExcInternalError());
+            new_indices[start->get_dof_handler().locally_owned_dofs()
+                        .index_within_set(*dof_index)]
+              = next_free_index++;
+          }
+      }
+
+    return next_free_index;
+  }
+
+
+
+  namespace
+  {
+    // helper function for hierarchical()
+
+// Note that this function only works for active dofs.
+    template <int dim, class iterator>
+    types::global_dof_index
+    compute_hierarchical_recursive (
+      types::global_dof_index next_free,
+      std::vector<types::global_dof_index> &new_indices,
+      const iterator &cell,
+      const IndexSet &locally_owned)
+    {
+      if (cell->has_children())
+        {
+          //recursion
+          for (unsigned int c = 0; c < GeometryInfo<dim>::max_children_per_cell; ++c)
+            next_free = compute_hierarchical_recursive<dim> (
+                          next_free,
+                          new_indices,
+                          cell->child (c),
+                          locally_owned);
+        }
+      else
+        {
+          if (cell->is_locally_owned())
+            {
+              const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+              std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+              cell->get_dof_indices (local_dof_indices);
+
+              for (unsigned int i = 0; i < dofs_per_cell; ++i)
+                {
+                  if (locally_owned.is_element (local_dof_indices[i]))
+                    {
+                      // this is a locally owned DoF, assign new number if not assigned a number yet
+                      unsigned int idx = locally_owned.index_within_set (local_dof_indices[i]);
+                      if (new_indices[idx] == DoFHandler<dim>::invalid_dof_index)
+                        {
+                          new_indices[idx] = locally_owned.nth_index_in_set (next_free);
+                          next_free++;
+                        }
+                    }
+                }
+            }
+        }
+      return next_free;
+    }
+  }
+
+
+
+  template <int dim>
+  void
+  hierarchical (DoFHandler<dim> &dof_handler)
+  {
+    std::vector<types::global_dof_index> renumbering (dof_handler.n_locally_owned_dofs(),
+                                                      DoFHandler<dim>::invalid_dof_index);
+
+    typename DoFHandler<dim>::level_cell_iterator cell;
+
+    types::global_dof_index next_free = 0;
+    const IndexSet locally_owned = dof_handler.locally_owned_dofs();
+
+    const parallel::distributed::Triangulation<dim> *tria
+      = dynamic_cast<const parallel::distributed::Triangulation<dim>*>
+        (&dof_handler.get_triangulation());
+
+    if (tria)
+      {
+#ifdef DEAL_II_WITH_P4EST
+        // this is a distributed Triangulation. We need to traverse the coarse
+        // cells in the order p4est does
+        for (unsigned int c = 0; c < tria->n_cells (0); ++c)
+          {
+            const unsigned int coarse_cell_index =
+              tria->get_p4est_tree_to_coarse_cell_permutation() [c];
+
+            const typename DoFHandler<dim>::level_cell_iterator
+            this_cell (tria, 0, coarse_cell_index, &dof_handler);
+
+            next_free = compute_hierarchical_recursive<dim> (next_free,
+                                                             renumbering,
+                                                             this_cell,
+                                                             locally_owned);
+          }
+#else
+        Assert (false, ExcNotImplemented());
+#endif
+      }
+    else
+      {
+        //this is not a distributed Triangulation. Traverse coarse cells in the
+        //normal order
+        for (cell = dof_handler.begin (0); cell != dof_handler.end (0); ++cell)
+          next_free = compute_hierarchical_recursive<dim> (next_free,
+                                                           renumbering,
+                                                           cell,
+                                                           locally_owned);
+      }
+
+    // verify that the last numbered
+    // degree of freedom is either
+    // equal to the number of degrees
+    // of freedom in total (the
+    // sequential case) or in the
+    // distributed case at least
+    // makes sense
+    Assert ((next_free == dof_handler.n_locally_owned_dofs())
+            ||
+            ((dof_handler.n_locally_owned_dofs() < dof_handler.n_dofs())
+             &&
+             (next_free <= dof_handler.n_dofs())),
+            ExcRenumberingIncomplete());
+
+    // make sure that all local DoFs got new numbers assigned
+    Assert (std::find (renumbering.begin(), renumbering.end(),
+                       numbers::invalid_dof_index)
+            == renumbering.end(),
+            ExcInternalError());
+
+    dof_handler.renumber_dofs(renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  sort_selected_dofs_back (DoFHandlerType          &dof_handler,
+                           const std::vector<bool> &selected_dofs)
+  {
+    std::vector<types::global_dof_index> renumbering(dof_handler.n_dofs(),
+                                                     DoFHandlerType::invalid_dof_index);
+    compute_sort_selected_dofs_back(renumbering, dof_handler, selected_dofs);
+
+    dof_handler.renumber_dofs(renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  sort_selected_dofs_back (DoFHandlerType          &dof_handler,
+                           const std::vector<bool> &selected_dofs,
+                           const unsigned int       level)
+  {
+    Assert(dof_handler.n_dofs(level) != numbers::invalid_dof_index,
+           ExcNotInitialized());
+
+    std::vector<types::global_dof_index> renumbering(dof_handler.n_dofs(level),
+                                                     DoFHandlerType::invalid_dof_index);
+    compute_sort_selected_dofs_back(renumbering, dof_handler, selected_dofs, level);
+
+    dof_handler.renumber_dofs(level, renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_sort_selected_dofs_back (std::vector<types::global_dof_index> &new_indices,
+                                   const DoFHandlerType                 &dof_handler,
+                                   const std::vector<bool>              &selected_dofs)
+  {
+    const types::global_dof_index n_dofs = dof_handler.n_dofs();
+    Assert (selected_dofs.size() == n_dofs,
+            ExcDimensionMismatch (selected_dofs.size(), n_dofs));
+
+    // re-sort the dofs according to
+    // their selection state
+    Assert (new_indices.size() == n_dofs,
+            ExcDimensionMismatch(new_indices.size(), n_dofs));
+
+    const types::global_dof_index   n_selected_dofs = std::count (selected_dofs.begin(),
+                                                      selected_dofs.end(),
+                                                      false);
+
+    types::global_dof_index next_unselected = 0;
+    types::global_dof_index next_selected   = n_selected_dofs;
+    for (types::global_dof_index i=0; i<n_dofs; ++i)
+      if (selected_dofs[i] == false)
+        {
+          new_indices[i] = next_unselected;
+          ++next_unselected;
+        }
+      else
+        {
+          new_indices[i] = next_selected;
+          ++next_selected;
+        };
+    Assert (next_unselected == n_selected_dofs, ExcInternalError());
+    Assert (next_selected == n_dofs, ExcInternalError());
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_sort_selected_dofs_back (std::vector<types::global_dof_index> &new_indices,
+                                   const DoFHandlerType                 &dof_handler,
+                                   const std::vector<bool>              &selected_dofs,
+                                   const unsigned int                    level)
+  {
+    Assert(dof_handler.n_dofs(level) != numbers::invalid_dof_index,
+           ExcNotInitialized());
+
+    const unsigned int n_dofs = dof_handler.n_dofs(level);
+    Assert (selected_dofs.size() == n_dofs,
+            ExcDimensionMismatch (selected_dofs.size(), n_dofs));
+
+    // re-sort the dofs according to
+    // their selection state
+    Assert (new_indices.size() == n_dofs,
+            ExcDimensionMismatch(new_indices.size(), n_dofs));
+
+    const unsigned int   n_selected_dofs = std::count (selected_dofs.begin(),
+                                                       selected_dofs.end(),
+                                                       false);
+
+    unsigned int next_unselected = 0;
+    unsigned int next_selected   = n_selected_dofs;
+    for (unsigned int i=0; i<n_dofs; ++i)
+      if (selected_dofs[i] == false)
+        {
+          new_indices[i] = next_unselected;
+          ++next_unselected;
+        }
+      else
+        {
+          new_indices[i] = next_selected;
+          ++next_selected;
+        };
+    Assert (next_unselected == n_selected_dofs, ExcInternalError());
+    Assert (next_selected == n_dofs, ExcInternalError());
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  cell_wise (DoFHandlerType &dof,
+             const std::vector<typename DoFHandlerType::active_cell_iterator> &cells)
+  {
+    std::vector<types::global_dof_index> renumbering(dof.n_dofs());
+    std::vector<types::global_dof_index> reverse(dof.n_dofs());
+    compute_cell_wise(renumbering, reverse, dof, cells);
+
+    dof.renumber_dofs(renumbering);
+  }
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_cell_wise
+  (std::vector<types::global_dof_index>                                      &new_indices,
+   std::vector<types::global_dof_index>                                      &reverse,
+   const DoFHandlerType                                                      &dof,
+   const typename std::vector<typename DoFHandlerType::active_cell_iterator> &cells)
+  {
+    Assert(cells.size() == dof.get_triangulation().n_active_cells(),
+           ExcDimensionMismatch(cells.size(),
+                                dof.get_triangulation().n_active_cells()));
+
+    types::global_dof_index n_global_dofs = dof.n_dofs();
+
+    // Actually, we compute the
+    // inverse of the reordering
+    // vector, called reverse here.
+    // Later, irs inverse is computed
+    // into new_indices, which is the
+    // return argument.
+
+    Assert(new_indices.size() == n_global_dofs,
+           ExcDimensionMismatch(new_indices.size(), n_global_dofs));
+    Assert(reverse.size() == n_global_dofs,
+           ExcDimensionMismatch(reverse.size(), n_global_dofs));
+
+    // For continuous elements, we must
+    // make sure, that each dof is
+    // reordered only once.
+    std::vector<bool> already_sorted(n_global_dofs, false);
+    std::vector<types::global_dof_index> cell_dofs;
+
+    unsigned int global_index = 0;
+
+    typename std::vector<typename DoFHandlerType::active_cell_iterator>::const_iterator cell;
+
+    for (cell = cells.begin(); cell != cells.end(); ++cell)
+      {
+        // Determine the number of dofs
+        // on this cell and reinit the
+        // vector storing these
+        // numbers.
+        unsigned int n_cell_dofs = (*cell)->get_fe().n_dofs_per_cell();
+        cell_dofs.resize(n_cell_dofs);
+
+        (*cell)->get_active_or_mg_dof_indices(cell_dofs);
+
+        // Sort here to make sure that
+        // degrees of freedom inside a
+        // single cell are in the same
+        // order after renumbering.
+        std::sort(cell_dofs.begin(), cell_dofs.end());
+
+        for (unsigned int i=0; i<n_cell_dofs; ++i)
+          {
+            if (!already_sorted[cell_dofs[i]])
+              {
+                already_sorted[cell_dofs[i]] = true;
+                reverse[global_index++] = cell_dofs[i];
+              }
+          }
+      }
+    Assert(global_index == n_global_dofs, ExcRenumberingIncomplete());
+
+    for (types::global_dof_index i=0; i<reverse.size(); ++i)
+      new_indices[reverse[i]] = i;
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void cell_wise
+  (DoFHandlerType                                                           &dof,
+   const unsigned int                                                        level,
+   const typename std::vector<typename DoFHandlerType::level_cell_iterator> &cells)
+  {
+    Assert(dof.n_dofs(level) != numbers::invalid_dof_index,
+           ExcNotInitialized());
+
+    std::vector<types::global_dof_index> renumbering(dof.n_dofs(level));
+    std::vector<types::global_dof_index> reverse(dof.n_dofs(level));
+
+    compute_cell_wise(renumbering, reverse, dof, level, cells);
+    dof.renumber_dofs(level, renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void compute_cell_wise
+  (std::vector<types::global_dof_index>                                     &new_order,
+   std::vector<types::global_dof_index>                                     &reverse,
+   const DoFHandlerType                                                     &dof,
+   const unsigned int                                                        level,
+   const typename std::vector<typename DoFHandlerType::level_cell_iterator> &cells)
+  {
+    Assert(cells.size() == dof.get_triangulation().n_cells(level),
+           ExcDimensionMismatch(cells.size(),
+                                dof.get_triangulation().n_cells(level)));
+    Assert (new_order.size() == dof.n_dofs(level),
+            ExcDimensionMismatch(new_order.size(), dof.n_dofs(level)));
+    Assert (reverse.size() == dof.n_dofs(level),
+            ExcDimensionMismatch(reverse.size(), dof.n_dofs(level)));
+
+    unsigned int n_global_dofs = dof.n_dofs(level);
+    unsigned int n_cell_dofs = dof.get_fe().n_dofs_per_cell();
+
+    std::vector<bool> already_sorted(n_global_dofs, false);
+    std::vector<types::global_dof_index> cell_dofs(n_cell_dofs);
+
+    unsigned int global_index = 0;
+
+    typename std::vector<typename DoFHandlerType::level_cell_iterator>::const_iterator cell;
+
+    for (cell = cells.begin(); cell != cells.end(); ++cell)
+      {
+        Assert ((*cell)->level() == (int) level, ExcInternalError());
+
+        (*cell)->get_active_or_mg_dof_indices(cell_dofs);
+        std::sort(cell_dofs.begin(), cell_dofs.end());
+
+        for (unsigned int i=0; i<n_cell_dofs; ++i)
+          {
+            if (!already_sorted[cell_dofs[i]])
+              {
+                already_sorted[cell_dofs[i]] = true;
+                reverse[global_index++] = cell_dofs[i];
+              }
+          }
+      }
+    Assert(global_index == n_global_dofs, ExcRenumberingIncomplete());
+
+    for (types::global_dof_index i=0; i<new_order.size(); ++i)
+      new_order[reverse[i]] = i;
+  }
+
+
+
+
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_downstream
+  (std::vector<types::global_dof_index>         &new_indices,
+   std::vector<types::global_dof_index>         &reverse,
+   const DoFHandlerType                         &dof,
+   const Point<DoFHandlerType::space_dimension> &direction,
+   const bool                                    dof_wise_renumbering)
+  {
+    if (dof_wise_renumbering == false)
+      {
+        std::vector<typename DoFHandlerType::active_cell_iterator> ordered_cells;
+        ordered_cells.reserve(dof.get_triangulation().n_active_cells());
+        const CompareDownstream<typename DoFHandlerType::active_cell_iterator,
+              DoFHandlerType::space_dimension> comparator(direction);
+
+        typename DoFHandlerType::active_cell_iterator p = dof.begin_active();
+        typename DoFHandlerType::active_cell_iterator end = dof.end();
+
+        while (p!=end)
+          {
+            ordered_cells.push_back(p);
+            ++p;
+          }
+        std::sort (ordered_cells.begin(), ordered_cells.end(), comparator);
+
+        compute_cell_wise(new_indices, reverse, dof, ordered_cells);
+      }
+    else
+      {
+        // similar code as for
+        // DoFTools::map_dofs_to_support_points, but
+        // need to do this for general DoFHandlerType classes and
+        // want to be able to sort the result
+        // (otherwise, could use something like
+        // DoFTools::map_support_points_to_dofs)
+        const unsigned int n_dofs = dof.n_dofs();
+        std::vector<std::pair<Point<DoFHandlerType::space_dimension>,unsigned int> > support_point_list
+        (n_dofs);
+
+        const hp::FECollection<DoFHandlerType::dimension> fe_collection (dof.get_fe ());
+        Assert (fe_collection[0].has_support_points(),
+                typename FiniteElement<DoFHandlerType::dimension>::ExcFEHasNoSupportPoints());
+        hp::QCollection<DoFHandlerType::dimension> quadrature_collection;
+        for (unsigned int comp=0; comp<fe_collection.size(); ++comp)
+          {
+            Assert (fe_collection[comp].has_support_points(),
+                    typename FiniteElement<DoFHandlerType::dimension>::ExcFEHasNoSupportPoints());
+            quadrature_collection.push_back
+            (Quadrature<DoFHandlerType::dimension> (fe_collection[comp].
+                                                    get_unit_support_points()));
+          }
+        hp::FEValues<DoFHandlerType::dimension,DoFHandlerType::space_dimension>
+        hp_fe_values (fe_collection, quadrature_collection,
+                      update_quadrature_points);
+
+        std::vector<bool> already_touched (n_dofs, false);
+
+        std::vector<types::global_dof_index> local_dof_indices;
+        typename DoFHandlerType::active_cell_iterator begin = dof.begin_active();
+        typename DoFHandlerType::active_cell_iterator end = dof.end();
+        for ( ; begin != end; ++begin)
+          {
+            const unsigned int dofs_per_cell = begin->get_fe().dofs_per_cell;
+            local_dof_indices.resize (dofs_per_cell);
+            hp_fe_values.reinit (begin);
+            const FEValues<DoFHandlerType::dimension> &fe_values =
+              hp_fe_values.get_present_fe_values ();
+            begin->get_active_or_mg_dof_indices(local_dof_indices);
+            const std::vector<Point<DoFHandlerType::space_dimension> > &points
+              = fe_values.get_quadrature_points ();
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              if (!already_touched[local_dof_indices[i]])
+                {
+                  support_point_list[local_dof_indices[i]].first = points[i];
+                  support_point_list[local_dof_indices[i]].second =
+                    local_dof_indices[i];
+                  already_touched[local_dof_indices[i]] = true;
+                }
+          }
+
+        ComparePointwiseDownstream<DoFHandlerType::space_dimension> comparator (direction);
+        std::sort (support_point_list.begin(), support_point_list.end(),
+                   comparator);
+        for (types::global_dof_index i=0; i<n_dofs; ++i)
+          new_indices[support_point_list[i].second] = i;
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void downstream (DoFHandlerType                               &dof,
+                   const unsigned int                            level,
+                   const Point<DoFHandlerType::space_dimension> &direction,
+                   const bool                                    dof_wise_renumbering)
+  {
+    std::vector<types::global_dof_index> renumbering(dof.n_dofs(level));
+    std::vector<types::global_dof_index> reverse(dof.n_dofs(level));
+    compute_downstream(renumbering, reverse, dof, level, direction,
+                       dof_wise_renumbering);
+
+    dof.renumber_dofs(level, renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_downstream
+  (std::vector<types::global_dof_index>         &new_indices,
+   std::vector<types::global_dof_index>         &reverse,
+   const DoFHandlerType                         &dof,
+   const unsigned int                            level,
+   const Point<DoFHandlerType::space_dimension> &direction,
+   const bool                                    dof_wise_renumbering)
+  {
+    if (dof_wise_renumbering == false)
+      {
+        std::vector<typename DoFHandlerType::level_cell_iterator> ordered_cells;
+        ordered_cells.reserve (dof.get_triangulation().n_cells(level));
+        const CompareDownstream<typename DoFHandlerType::level_cell_iterator,
+              DoFHandlerType::space_dimension> comparator(direction);
+
+        typename DoFHandlerType::level_cell_iterator p = dof.begin(level);
+        typename DoFHandlerType::level_cell_iterator end = dof.end(level);
+
+        while (p!=end)
+          {
+            ordered_cells.push_back(p);
+            ++p;
+          }
+        std::sort (ordered_cells.begin(), ordered_cells.end(), comparator);
+
+        compute_cell_wise(new_indices, reverse, dof, level, ordered_cells);
+      }
+    else
+      {
+        Assert (dof.get_fe().has_support_points(),
+                typename FiniteElement<DoFHandlerType::dimension>::ExcFEHasNoSupportPoints());
+        const unsigned int n_dofs = dof.n_dofs(level);
+        std::vector<std::pair<Point<DoFHandlerType::space_dimension>,unsigned int> > support_point_list
+        (n_dofs);
+
+        Quadrature<DoFHandlerType::dimension>   q_dummy(dof.get_fe().get_unit_support_points());
+        FEValues<DoFHandlerType::dimension,DoFHandlerType::space_dimension> fe_values (dof.get_fe(), q_dummy,
+            update_quadrature_points);
+
+        std::vector<bool> already_touched (dof.n_dofs(), false);
+
+        const unsigned int dofs_per_cell = dof.get_fe().dofs_per_cell;
+        std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+        typename DoFHandlerType::level_cell_iterator begin = dof.begin(level);
+        typename DoFHandlerType::level_cell_iterator end = dof.end(level);
+        for ( ; begin != end; ++begin)
+          {
+            const typename Triangulation<DoFHandlerType::dimension,
+                  DoFHandlerType::space_dimension>::cell_iterator &begin_tria = begin;
+            begin->get_active_or_mg_dof_indices(local_dof_indices);
+            fe_values.reinit (begin_tria);
+            const std::vector<Point<DoFHandlerType::space_dimension> > &points
+              = fe_values.get_quadrature_points ();
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              if (!already_touched[local_dof_indices[i]])
+                {
+                  support_point_list[local_dof_indices[i]].first = points[i];
+                  support_point_list[local_dof_indices[i]].second =
+                    local_dof_indices[i];
+                  already_touched[local_dof_indices[i]] = true;
+                }
+          }
+
+        ComparePointwiseDownstream<DoFHandlerType::space_dimension> comparator (direction);
+        std::sort (support_point_list.begin(), support_point_list.end(),
+                   comparator);
+        for (types::global_dof_index i=0; i<n_dofs; ++i)
+          new_indices[support_point_list[i].second] = i;
+      }
+  }
+
+
+
+  /**
+   * Provide comparator for DoFCellAccessors
+   */
+  namespace internal
+  {
+    template <int dim>
+    struct ClockCells
+    {
+      /**
+       * Center of rotation.
+       */
+      const Point<dim> ¢er;
+      /**
+       * Revert sorting order.
+       */
+      bool counter;
+
+      /**
+       * Constructor.
+       */
+      ClockCells (const Point<dim> &center, bool counter) :
+        center(center),
+        counter(counter)
+      {}
+
+      /**
+       * Comparison operator
+       */
+      template <class DHCellIterator>
+      bool operator () (const DHCellIterator &c1,
+                        const DHCellIterator &c2) const
+      {
+        // dispatch to
+        // dimension-dependent functions
+        return compare (c1, c2, dealii::internal::int2type<dim>());
+      }
+
+    private:
+      /**
+       * Comparison operator for dim>=2
+       */
+      template <class DHCellIterator, int xdim>
+      bool compare (const DHCellIterator &c1,
+                    const DHCellIterator &c2,
+                    dealii::internal::int2type<xdim>) const
+      {
+        const Tensor<1,dim> v1 = c1->center() - center;
+        const Tensor<1,dim> v2 = c2->center() - center;
+        const double s1 = std::atan2(v1[0], v1[1]);
+        const double s2 = std::atan2(v2[0], v2[1]);
+        return ( counter ? (s1>s2) : (s2>s1));
+      }
+
+
+      /**
+       * Comparison operator for dim==1
+       * where this function makes no sense
+       */
+      template <class DHCellIterator>
+      bool compare (const DHCellIterator &,
+                    const DHCellIterator &,
+                    dealii::internal::int2type<1>) const
+      {
+        Assert (dim >= 2,
+                ExcMessage ("This operation only makes sense for dim>=2."));
+        return false;
+      }
+
+    };
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  clockwise_dg (
+    DoFHandlerType &dof,
+    const Point<DoFHandlerType::space_dimension> &center,
+    const bool counter)
+  {
+    std::vector<types::global_dof_index> renumbering(dof.n_dofs());
+    compute_clockwise_dg(renumbering, dof, center, counter);
+
+    dof.renumber_dofs(renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_clockwise_dg
+  (std::vector<types::global_dof_index>         &new_indices,
+   const DoFHandlerType                         &dof,
+   const Point<DoFHandlerType::space_dimension> &center,
+   const bool                                    counter)
+  {
+    std::vector<typename DoFHandlerType::active_cell_iterator> ordered_cells;
+    ordered_cells.reserve (dof.get_triangulation().n_active_cells());
+    internal::ClockCells<DoFHandlerType::space_dimension> comparator(center, counter);
+
+    typename DoFHandlerType::active_cell_iterator p = dof.begin_active();
+    typename DoFHandlerType::active_cell_iterator end = dof.end();
+
+    while (p!=end)
+      {
+        ordered_cells.push_back(p);
+        ++p;
+      }
+    std::sort (ordered_cells.begin(), ordered_cells.end(), comparator);
+
+    std::vector<types::global_dof_index> reverse(new_indices.size());
+    compute_cell_wise(new_indices, reverse, dof, ordered_cells);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void clockwise_dg (DoFHandlerType                               &dof,
+                     const unsigned int                            level,
+                     const Point<DoFHandlerType::space_dimension> &center,
+                     const bool                                    counter)
+  {
+    std::vector<typename DoFHandlerType::level_cell_iterator> ordered_cells;
+    ordered_cells.reserve(dof.get_triangulation().n_active_cells());
+    internal::ClockCells<DoFHandlerType::space_dimension> comparator(center, counter);
+
+    typename DoFHandlerType::level_cell_iterator p = dof.begin(level);
+    typename DoFHandlerType::level_cell_iterator end = dof.end(level);
+
+    while (p!=end)
+      {
+        ordered_cells.push_back(p);
+        ++p;
+      }
+    std::sort (ordered_cells.begin(), ordered_cells.end(), comparator);
+
+    cell_wise(dof, level, ordered_cells);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  random (DoFHandlerType &dof_handler)
+  {
+    std::vector<types::global_dof_index> renumbering(dof_handler.n_dofs(),
+                                                     DoFHandlerType::invalid_dof_index);
+    compute_random(renumbering, dof_handler);
+
+    dof_handler.renumber_dofs(renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_random (
+    std::vector<types::global_dof_index> &new_indices,
+    const DoFHandlerType      &dof_handler)
+  {
+    const types::global_dof_index n_dofs = dof_handler.n_dofs();
+    Assert(new_indices.size() == n_dofs,
+           ExcDimensionMismatch(new_indices.size(), n_dofs));
+
+    for (unsigned int i=0; i<n_dofs; ++i)
+      new_indices[i] = i;
+
+    // shuffle the elements; the following is essentially the
+    // std::random_shuffle algorithm but uses a predictable
+    // random number generator
+    ::boost::mt19937 random_number_generator;
+    for (unsigned int i=1; i<n_dofs; ++i)
+      {
+        // get a random number between 0 and i (inclusive)
+        const unsigned int j
+          = ::boost::random::uniform_int_distribution<>(0, i)(random_number_generator);
+
+        // if possible, swap the elements
+        if (i != j)
+          std::swap (new_indices[i], new_indices[j]);
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  subdomain_wise (DoFHandlerType &dof_handler)
+  {
+    std::vector<types::global_dof_index> renumbering(dof_handler.n_dofs(),
+                                                     DoFHandlerType::invalid_dof_index);
+    compute_subdomain_wise(renumbering, dof_handler);
+
+    dof_handler.renumber_dofs(renumbering);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  compute_subdomain_wise (std::vector<types::global_dof_index> &new_dof_indices,
+                          const DoFHandlerType      &dof_handler)
+  {
+    const types::global_dof_index n_dofs = dof_handler.n_dofs();
+    Assert (new_dof_indices.size() == n_dofs,
+            ExcDimensionMismatch (new_dof_indices.size(), n_dofs));
+
+    // first get the association of each dof
+    // with a subdomain and determine the total
+    // number of subdomain ids used
+    std::vector<types::subdomain_id> subdomain_association (n_dofs);
+    DoFTools::get_subdomain_association (dof_handler,
+                                         subdomain_association);
+    const unsigned int n_subdomains
+      = *std::max_element (subdomain_association.begin(),
+                           subdomain_association.end()) + 1;
+
+    // then renumber the subdomains by first
+    // looking at those belonging to subdomain
+    // 0, then those of subdomain 1, etc. note
+    // that the algorithm is stable, i.e. if
+    // two dofs i,j have i<j and belong to the
+    // same subdomain, then they will be in
+    // this order also after reordering
+    std::fill (new_dof_indices.begin(), new_dof_indices.end(),
+               numbers::invalid_dof_index);
+    types::global_dof_index next_free_index = 0;
+    for (types::subdomain_id subdomain=0; subdomain<n_subdomains; ++subdomain)
+      for (types::global_dof_index i=0; i<n_dofs; ++i)
+        if (subdomain_association[i] == subdomain)
+          {
+            Assert (new_dof_indices[i] == numbers::invalid_dof_index,
+                    ExcInternalError());
+            new_dof_indices[i] = next_free_index;
+            ++next_free_index;
+          }
+
+    // we should have numbered all dofs
+    Assert (next_free_index == n_dofs, ExcInternalError());
+    Assert (std::find (new_dof_indices.begin(), new_dof_indices.end(),
+                       numbers::invalid_dof_index)
+            == new_dof_indices.end(),
+            ExcInternalError());
+  }
+
+} // namespace DoFRenumbering
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "dof_renumbering.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_renumbering.inst.in b/source/dofs/dof_renumbering.inst.in
new file mode 100644
index 0000000..27a403e
--- /dev/null
+++ b/source/dofs/dof_renumbering.inst.in
@@ -0,0 +1,323 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace DoFRenumbering
+      \{
+      namespace boost
+	\{
+//TODO[WB]: also implement the following boost for hp DoFHandlers etc.
+	\}
+
+
+// non-boost functions:
+	template
+	  void Cuthill_McKee<DoFHandler<deal_II_dimension,deal_II_space_dimension> >
+	  (DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+	   const bool,
+	   const bool,
+	   const std::vector<types::global_dof_index>&);
+
+	template
+	  void
+	  compute_Cuthill_McKee<DoFHandler<deal_II_dimension,deal_II_space_dimension> >
+	  (std::vector<types::global_dof_index>&,
+	   const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+	   const bool,
+	   const bool,
+	   const std::vector<types::global_dof_index>&);
+
+	template
+	  void component_wise<deal_II_dimension,deal_II_space_dimension>
+	  (DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+	   const std::vector<unsigned int>&);
+
+	template
+	  void block_wise<deal_II_dimension,deal_II_space_dimension>
+	  (DoFHandler<deal_II_dimension,deal_II_space_dimension>&);
+
+	template
+	  void subdomain_wise<DoFHandler<deal_II_dimension,deal_II_space_dimension> >
+	  (DoFHandler<deal_II_dimension,deal_II_space_dimension> &);
+
+        template
+        void
+        compute_subdomain_wise (std::vector<types::global_dof_index> &new_dof_indices,
+                          const DoFHandler<deal_II_dimension,deal_II_space_dimension> &dof_handler);
+
+	\}  // namespace DoFRenumbering
+#endif
+  }
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension == deal_II_space_dimension
+namespace DoFRenumbering
+  \{
+  namespace boost
+    \{
+//TODO[WB]: also implement the following boost for hp DoFHandlers etc.
+    template
+    void
+    Cuthill_McKee (DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    compute_Cuthill_McKee (std::vector<dealii::types::global_dof_index> &,
+			   const DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    king_ordering (DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    compute_king_ordering (std::vector<dealii::types::global_dof_index> &,
+			   const DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    minimum_degree (DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    compute_minimum_degree (std::vector<dealii::types::global_dof_index> &,
+			    const DoFHandler<deal_II_dimension> &, bool, bool);
+
+
+    template
+    void
+    Cuthill_McKee (hp::DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    compute_Cuthill_McKee (std::vector<dealii::types::global_dof_index> &,
+			   const hp::DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    king_ordering (hp::DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    compute_king_ordering (std::vector<dealii::types::global_dof_index> &,
+			   const hp::DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    minimum_degree (hp::DoFHandler<deal_II_dimension> &, bool, bool);
+
+    template
+    void
+    compute_minimum_degree (std::vector<dealii::types::global_dof_index> &,
+			    const hp::DoFHandler<deal_II_dimension> &, bool, bool);
+    \}
+
+
+// non-boost functions:
+
+    template
+      void Cuthill_McKee<hp::DoFHandler<deal_II_dimension> >
+      (hp::DoFHandler<deal_II_dimension>&,
+       const bool,
+       const bool,
+       const std::vector<types::global_dof_index>&);
+
+    template
+      void
+      compute_Cuthill_McKee<hp::DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,
+       const hp::DoFHandler<deal_II_dimension>&,
+       const bool,
+       const bool,
+       const std::vector<types::global_dof_index>&);
+
+    template
+      void component_wise
+      (hp::DoFHandler<deal_II_dimension>&,
+       const std::vector<unsigned int>&);
+
+    template
+      void component_wise
+      (DoFHandler<deal_II_dimension>&,
+       unsigned int,
+       const std::vector<unsigned int>&);
+
+    template
+      void block_wise<deal_II_dimension>
+      (hp::DoFHandler<deal_II_dimension>&);
+
+    template
+      void block_wise<deal_II_dimension>
+      (DoFHandler<deal_II_dimension>&,
+       unsigned int);
+
+    template
+      void hierarchical<deal_II_dimension>
+      (DoFHandler<deal_II_dimension>&);
+
+    template void
+      cell_wise<DoFHandler<deal_II_dimension> >
+      (DoFHandler<deal_II_dimension>&,
+       const std::vector<DoFHandler<deal_II_dimension>::active_cell_iterator>&);
+
+    template void
+      compute_cell_wise<DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&, std::vector<types::global_dof_index>&,
+       const DoFHandler<deal_II_dimension>&,
+       const std::vector<DoFHandler<deal_II_dimension>::active_cell_iterator>&);
+
+    template void
+      cell_wise<DoFHandler<deal_II_dimension> >
+      (DoFHandler<deal_II_dimension>&, unsigned int,
+       const std::vector<DoFHandler<deal_II_dimension>::level_cell_iterator>&);
+
+    template void
+      compute_cell_wise<DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&, std::vector<types::global_dof_index>&,
+       const DoFHandler<deal_II_dimension>&, unsigned int,
+       const std::vector<DoFHandler<deal_II_dimension>::level_cell_iterator>&);
+
+    template void
+      compute_downstream<DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,std::vector<types::global_dof_index>&,
+       const DoFHandler<deal_II_dimension>&, const Point<deal_II_dimension>&,
+       const bool);
+
+    template
+      void
+      clockwise_dg<DoFHandler<deal_II_dimension> >
+      (DoFHandler<deal_II_dimension>&, const Point<deal_II_dimension>&, bool);
+
+    template
+      void
+      compute_clockwise_dg<DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&, const DoFHandler<deal_II_dimension>&,
+       const Point<deal_II_dimension>&, const bool);
+
+// Renumbering for hp::DoFHandler
+
+    template void
+      cell_wise<hp::DoFHandler<deal_II_dimension> >
+      (hp::DoFHandler<deal_II_dimension>&,
+       const std::vector<hp::DoFHandler<deal_II_dimension>::active_cell_iterator>&);
+
+    template void
+      compute_cell_wise<hp::DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&, std::vector<types::global_dof_index>&,
+       const hp::DoFHandler<deal_II_dimension>&,
+       const std::vector<hp::DoFHandler<deal_II_dimension>::active_cell_iterator>&);
+
+    template void
+      compute_downstream<hp::DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,std::vector<types::global_dof_index>&,
+       const hp::DoFHandler<deal_II_dimension>&,
+       const Point<deal_II_dimension>&,
+       const bool);
+
+    template
+      void
+      clockwise_dg<hp::DoFHandler<deal_II_dimension> >
+      (hp::DoFHandler<deal_II_dimension>&,
+       const Point<deal_II_dimension>&, bool);
+
+    template
+      void
+      compute_clockwise_dg<hp::DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,
+       const hp::DoFHandler<deal_II_dimension>&,
+       const Point<deal_II_dimension>&,
+       const bool);
+
+// MG
+
+    template
+      void downstream
+      (DoFHandler<deal_II_dimension>&,
+       const unsigned int,
+       const Point<deal_II_dimension>&,
+       const bool);
+
+    template
+      void clockwise_dg
+      (DoFHandler<deal_II_dimension>&,
+       const unsigned int,
+       const Point<deal_II_dimension>&, bool);
+
+// Generic numbering schemes
+
+    template
+      void random<DoFHandler<deal_II_dimension> >
+      (DoFHandler<deal_II_dimension> &);
+
+    template
+      void
+      compute_random<DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,
+       const DoFHandler<deal_II_dimension> &);
+
+    template
+      void sort_selected_dofs_back<DoFHandler<deal_II_dimension> >
+      (DoFHandler<deal_II_dimension> &,
+       const std::vector<bool> &);
+
+    template
+      void
+      compute_sort_selected_dofs_back<DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,
+       const DoFHandler<deal_II_dimension> &,
+       const std::vector<bool> &);
+
+    template
+      void random<hp::DoFHandler<deal_II_dimension> >
+      (hp::DoFHandler<deal_II_dimension> &);
+
+    template
+      void
+      compute_random<hp::DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,
+       const hp::DoFHandler<deal_II_dimension> &);
+
+    template
+      void sort_selected_dofs_back<hp::DoFHandler<deal_II_dimension> >
+      (hp::DoFHandler<deal_II_dimension> &,
+       const std::vector<bool> &);
+
+    template
+      void
+      compute_sort_selected_dofs_back<hp::DoFHandler<deal_II_dimension> >
+      (std::vector<types::global_dof_index>&,
+       const hp::DoFHandler<deal_II_dimension> &,
+       const std::vector<bool> &);
+
+
+    template
+      void subdomain_wise<hp::DoFHandler<deal_II_dimension> >
+      (hp::DoFHandler<deal_II_dimension> &);
+
+    template
+      void Cuthill_McKee<DoFHandler<deal_II_dimension> >
+      (DoFHandler<deal_II_dimension>&,
+       const unsigned int,
+       const bool,
+       const std::vector<types::global_dof_index>&);
+    \}  // namespace DoFRenumbering
+#endif
+  }
diff --git a/source/dofs/dof_tools.cc b/source/dofs/dof_tools.cc
new file mode 100644
index 0000000..0563e73
--- /dev/null
+++ b/source/dofs/dof_tools.cc
@@ -0,0 +1,2366 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/trilinos_sparsity_pattern.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/intergrid_map.h>
+#include <deal.II/grid/filtered_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/shared_tria.h>
+
+#include <algorithm>
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace DoFTools
+{
+  namespace internal
+  {
+    // return an array that for each dof on the reference cell
+    // lists the corresponding vector component.
+    //
+    // if an element is non-primitive then we assign to each degree of freedom
+    // the following component:
+    // - if the nonzero components that belong to a shape function are not
+    //   selected in the component_mask, then the shape function is assigned
+    //   to the first nonzero vector component that corresponds to this
+    //   shape function
+    // - otherwise, the shape function is assigned the first component selected
+    //   in the component_mask that corresponds to this shape function
+    template <int dim, int spacedim>
+    std::vector<unsigned char>
+    get_local_component_association (const FiniteElement<dim,spacedim>  &fe,
+                                     const ComponentMask        &component_mask)
+    {
+      std::vector<unsigned char> local_component_association (fe.dofs_per_cell,
+                                                              (unsigned char)(-1));
+
+      // compute the component each local dof belongs to.
+      // if the shape function is primitive, then this
+      // is simple and we can just associate it with
+      // what system_to_component_index gives us
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        if (fe.is_primitive(i))
+          local_component_association[i] =
+            fe.system_to_component_index(i).first;
+        else
+          // if the shape function is not primitive, then either use the
+          // component of the first nonzero component corresponding
+          // to this shape function (if the component is not specified
+          // in the component_mask), or the first component of this block
+          // that is listed in the component_mask (if the block this
+          // component corresponds to is indeed specified in the component
+          // mask)
+          {
+            const unsigned int first_comp =
+              fe.get_nonzero_components(i).first_selected_component();
+
+            if ((fe.get_nonzero_components(i)
+                 &
+                 component_mask).n_selected_components(fe.n_components()) == 0)
+              local_component_association[i] = first_comp;
+            else
+              // pick the component selected. we know from the previous 'if'
+              // that within the components that are nonzero for this
+              // shape function there must be at least one for which the
+              // mask is true, so we will for sure run into the break()
+              // at one point
+              for (unsigned int c=first_comp; c<fe.n_components(); ++c)
+                if (component_mask[c] == true)
+                  {
+                    local_component_association[i] = c;
+                    break;
+                  }
+          }
+
+      Assert (std::find (local_component_association.begin(),
+                         local_component_association.end(),
+                         (unsigned char)(-1))
+              ==
+              local_component_association.end(),
+              ExcInternalError());
+
+      return local_component_association;
+    }
+
+
+    // this internal function assigns to each dof the respective component
+    // of the vector system.
+    //
+    // the output array dofs_by_component lists for each dof the
+    // corresponding vector component. if the DoFHandler is based on a
+    // parallel distributed triangulation then the output array is index by
+    // dof.locally_owned_dofs().index_within_set(indices[i])
+    //
+    // if an element is non-primitive then we assign to each degree of
+    // freedom the following component:
+    // - if the nonzero components that belong to a shape function are not
+    //   selected in the component_mask, then the shape function is assigned
+    //   to the first nonzero vector component that corresponds to this
+    //   shape function
+    // - otherwise, the shape function is assigned the first component selected
+    //   in the component_mask that corresponds to this shape function
+    template <typename DoFHandlerType>
+    void
+    get_component_association (const DoFHandlerType       &dof,
+                               const ComponentMask        &component_mask,
+                               std::vector<unsigned char> &dofs_by_component)
+    {
+      const dealii::hp::FECollection<DoFHandlerType::dimension,DoFHandlerType::space_dimension>
+      fe_collection (dof.get_fe());
+      Assert (fe_collection.n_components() < 256, ExcNotImplemented());
+      Assert (dofs_by_component.size() == dof.n_locally_owned_dofs(),
+              ExcDimensionMismatch(dofs_by_component.size(),
+                                   dof.n_locally_owned_dofs()));
+
+      // next set up a table for the degrees of freedom on each of the
+      // cells (regardless of the fact whether it is listed in the
+      // component_select argument or not)
+      //
+      // for each element 'f' of the FECollection,
+      // local_component_association[f][d] then returns the vector
+      // component that degree of freedom 'd' belongs to
+      std::vector<std::vector<unsigned char> >
+      local_component_association (fe_collection.size());
+      for (unsigned int f=0; f<fe_collection.size(); ++f)
+        {
+          const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe =
+            fe_collection[f];
+          local_component_association[f]
+            = get_local_component_association (fe, component_mask);
+        }
+
+      // then loop over all cells and do the work
+      std::vector<types::global_dof_index> indices;
+      for (typename DoFHandlerType::active_cell_iterator c=dof.begin_active();
+           c!=dof.end(); ++ c)
+        if (c->is_locally_owned())
+          {
+            const unsigned int fe_index = c->active_fe_index();
+            const unsigned int dofs_per_cell = c->get_fe().dofs_per_cell;
+            indices.resize(dofs_per_cell);
+            c->get_dof_indices(indices);
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              if (dof.locally_owned_dofs().is_element(indices[i]))
+                dofs_by_component[dof.locally_owned_dofs().index_within_set(indices[i])]
+                  = local_component_association[fe_index][i];
+          }
+    }
+
+
+    // this is the function corresponding to the one above but working on
+    // blocks instead of components.
+    //
+    // the output array dofs_by_block lists for each dof the corresponding
+    // vector block. if the DoFHandler is based on a parallel distributed
+    // triangulation then the output array is index by
+    // dof.locally_owned_dofs().index_within_set(indices[i])
+    template <typename DoFHandlerType>
+    inline
+    void
+    get_block_association (const DoFHandlerType       &dof,
+                           std::vector<unsigned char> &dofs_by_block)
+    {
+      const dealii::hp::FECollection<DoFHandlerType::dimension,DoFHandlerType::space_dimension>
+      fe_collection (dof.get_fe());
+      Assert (fe_collection.n_components() < 256, ExcNotImplemented());
+      Assert (dofs_by_block.size() == dof.n_locally_owned_dofs(),
+              ExcDimensionMismatch(dofs_by_block.size(),
+                                   dof.n_locally_owned_dofs()));
+
+      // next set up a table for the degrees of freedom on each of the
+      // cells (regardless of the fact whether it is listed in the
+      // component_select argument or not)
+      //
+      // for each element 'f' of the FECollection,
+      // local_block_association[f][d] then returns the vector block that
+      // degree of freedom 'd' belongs to
+      std::vector<std::vector<unsigned char> > local_block_association
+      (fe_collection.size());
+      for (unsigned int f=0; f<fe_collection.size(); ++f)
+        {
+          const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe =
+            fe_collection[f];
+          local_block_association[f].resize(fe.dofs_per_cell,
+                                            (unsigned char)(-1));
+          for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+            local_block_association[f][i] = fe.system_to_block_index(i).first;
+
+          Assert (std::find (local_block_association[f].begin(),
+                             local_block_association[f].end(),
+                             (unsigned char)(-1))
+                  ==
+                  local_block_association[f].end(),
+                  ExcInternalError());
+        }
+
+      // then loop over all cells and do the work
+      std::vector<types::global_dof_index> indices;
+      for (typename DoFHandlerType::active_cell_iterator c=dof.begin_active();
+           c!=dof.end(); ++ c)
+        if (c->is_locally_owned())
+          {
+            const unsigned int fe_index = c->active_fe_index();
+            const unsigned int dofs_per_cell = c->get_fe().dofs_per_cell;
+            indices.resize(dofs_per_cell);
+            c->get_dof_indices(indices);
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              if (dof.locally_owned_dofs().is_element(indices[i]))
+                dofs_by_block[dof.locally_owned_dofs().index_within_set(indices[i])]
+                  = local_block_association[fe_index][i];
+          }
+    }
+  }
+
+
+
+  template <typename DoFHandlerType, typename Number>
+  void distribute_cell_to_dof_vector (const DoFHandlerType &dof_handler,
+                                      const Vector<Number> &cell_data,
+                                      Vector<double>       &dof_data,
+                                      const unsigned int    component)
+  {
+    const unsigned int dim = DoFHandlerType::dimension;
+    const unsigned int spacedim = DoFHandlerType::space_dimension;
+    const Triangulation<dim,spacedim> &tria = dof_handler.get_triangulation();
+    (void)tria;
+
+    AssertDimension (cell_data.size(), tria.n_active_cells());
+    AssertDimension (dof_data.size(), dof_handler.n_dofs());
+    AssertIndexRange (component, n_components(dof_handler));
+    Assert (fe_is_primitive(dof_handler) == true,
+            typename FiniteElement<dim>::ExcFENotPrimitive());
+
+    // store a flag whether we should care about different components. this
+    // is just a simplification, we could ask for this at every single
+    // place equally well
+    const bool consider_components = (n_components(dof_handler) != 1);
+
+    // zero out the components that we will touch
+    if (consider_components == false)
+      dof_data = 0;
+    else
+      {
+        std::vector<unsigned char> component_dofs (dof_handler.n_locally_owned_dofs());
+        internal::get_component_association (dof_handler,
+                                             dof_handler.get_fe().component_mask
+                                             (FEValuesExtractors::Scalar(component)),
+                                             component_dofs);
+
+        for (unsigned int i=0; i<dof_data.size(); ++i)
+          if (component_dofs[i] == static_cast<unsigned char>(component))
+            dof_data(i) = 0;
+      }
+
+    // count how often we have added a value in the sum for each dof
+    std::vector<unsigned char> touch_count (dof_handler.n_dofs(), 0);
+
+    typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                  endc = dof_handler.end();
+    std::vector<types::global_dof_index> dof_indices;
+    dof_indices.reserve (max_dofs_per_cell(dof_handler));
+
+    for (unsigned int present_cell = 0; cell!=endc; ++cell, ++present_cell)
+      {
+        const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+        dof_indices.resize (dofs_per_cell);
+        cell->get_dof_indices (dof_indices);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          // consider this dof only if it is the right component. if there
+          // is only one component, short cut the test
+          if (!consider_components ||
+              (cell->get_fe().system_to_component_index(i).first == component))
+            {
+              // sum up contribution of the present_cell to this dof
+              dof_data(dof_indices[i]) += cell_data(present_cell);
+
+              // note that we added another summand
+              ++touch_count[dof_indices[i]];
+            }
+      }
+
+    // compute the mean value on all the dofs by dividing with the number
+    // of summands.
+    for (types::global_dof_index i=0; i<dof_handler.n_dofs(); ++i)
+      {
+        // assert that each dof was used at least once. this needs not be
+        // the case if the vector has more than one component
+        Assert (consider_components || (touch_count[i]!=0),
+                ExcInternalError());
+        if (touch_count[i] != 0)
+          dof_data(i) /=  touch_count[i];
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const DoFHandler<dim,spacedim> &dof,
+                const ComponentMask            &component_mask,
+                std::vector<bool>              &selected_dofs)
+  {
+    const FiniteElement<dim,spacedim> &fe = dof.get_fe();
+    (void)fe;
+
+    Assert(component_mask.represents_n_components(fe.n_components()),
+           ExcMessage ("The given component mask is not sized correctly to represent the "
+                       "components of the given finite element."));
+    Assert(selected_dofs.size() == dof.n_locally_owned_dofs(),
+           ExcDimensionMismatch(selected_dofs.size(), dof.n_locally_owned_dofs()));
+
+    // two special cases: no component is selected, and all components are
+    // selected; both rather stupid, but easy to catch
+    if (component_mask.n_selected_components(n_components(dof)) == 0)
+      {
+        std::fill_n (selected_dofs.begin(), dof.n_locally_owned_dofs(), false);
+        return;
+      }
+    else if (component_mask.n_selected_components(n_components(dof)) == n_components(dof))
+      {
+        std::fill_n (selected_dofs.begin(), dof.n_locally_owned_dofs(), true);
+        return;
+      }
+
+
+    // preset all values by false
+    std::fill_n (selected_dofs.begin(), dof.n_locally_owned_dofs(), false);
+
+    // get the component association of each DoF and then select the ones
+    // that match the given set of blocks
+    std::vector<unsigned char> dofs_by_component (dof.n_locally_owned_dofs());
+    internal::get_component_association (dof, component_mask,
+                                         dofs_by_component);
+
+    for (types::global_dof_index i=0; i<dof.n_locally_owned_dofs(); ++i)
+      if (component_mask[dofs_by_component[i]] == true)
+        selected_dofs[i] = true;
+  }
+
+
+  // TODO: Unify the following two functions with the non-hp case
+
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const hp::DoFHandler<dim,spacedim> &dof,
+                const ComponentMask                &component_mask,
+                std::vector<bool>                  &selected_dofs)
+  {
+    const FiniteElement<dim,spacedim> &fe = dof.begin_active()->get_fe();
+    (void)fe;
+
+    Assert(component_mask.represents_n_components(fe.n_components()),
+           ExcMessage ("The given component mask is not sized correctly to represent the "
+                       "components of the given finite element."));
+    Assert(selected_dofs.size() == dof.n_dofs(),
+           ExcDimensionMismatch(selected_dofs.size(), dof.n_dofs()));
+
+    // two special cases: no component is selected, and all components are
+    // selected; both rather stupid, but easy to catch
+    if (component_mask.n_selected_components(n_components(dof)) == 0)
+      {
+        std::fill_n (selected_dofs.begin(), dof.n_dofs(), false);
+        return;
+      }
+    else if (component_mask.n_selected_components(n_components(dof)) == n_components(dof))
+      {
+        std::fill_n (selected_dofs.begin(), dof.n_dofs(), true);
+        return;
+      }
+
+
+    // preset all values by false
+    std::fill_n (selected_dofs.begin(), dof.n_dofs(), false);
+
+    // get the component association of each DoF and then select the ones
+    // that match the given set of components
+    std::vector<unsigned char> dofs_by_component (dof.n_dofs());
+    internal::get_component_association (dof, component_mask,
+                                         dofs_by_component);
+
+    for (types::global_dof_index i=0; i<dof.n_dofs(); ++i)
+      if (component_mask[dofs_by_component[i]] == true)
+        selected_dofs[i] = true;
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const DoFHandler<dim,spacedim>   &dof,
+                const BlockMask     &block_mask,
+                std::vector<bool>       &selected_dofs)
+  {
+    // simply forward to the function that works based on a component mask
+    extract_dofs (dof, dof.get_fe().component_mask (block_mask),
+                  selected_dofs);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  extract_dofs (const hp::DoFHandler<dim,spacedim>   &dof,
+                const BlockMask     &block_mask,
+                std::vector<bool>       &selected_dofs)
+  {
+    // simply forward to the function that works based on a component mask
+    extract_dofs (dof, dof.get_fe().component_mask (block_mask),
+                  selected_dofs);
+  }
+
+
+
+  template<typename DoFHandlerType>
+  void
+  extract_level_dofs (const unsigned int    level,
+                      const DoFHandlerType &dof,
+                      const ComponentMask  &component_mask,
+                      std::vector<bool>    &selected_dofs)
+  {
+    const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe = dof.get_fe();
+
+    Assert(component_mask.represents_n_components(n_components(dof)),
+           ExcMessage ("The given component mask is not sized correctly to represent the "
+                       "components of the given finite element."));
+    Assert(selected_dofs.size() == dof.n_dofs(level),
+           ExcDimensionMismatch(selected_dofs.size(), dof.n_dofs(level)));
+
+    // two special cases: no component is selected, and all components are
+    // selected, both rather stupid, but easy to catch
+    if (component_mask.n_selected_components(n_components(dof)) == 0)
+      {
+        std::fill_n (selected_dofs.begin(), dof.n_dofs(level), false);
+        return;
+      }
+    else if (component_mask.n_selected_components(n_components(dof)) == n_components(dof))
+      {
+        std::fill_n (selected_dofs.begin(), dof.n_dofs(level), true);
+        return;
+      }
+
+    // preset all values by false
+    std::fill_n (selected_dofs.begin(), dof.n_dofs(level), false);
+
+    // next set up a table for the degrees of freedom on each of the cells
+    // whether it is something interesting or not
+    std::vector<unsigned char> local_component_asssociation
+      = internal::get_local_component_association (fe, component_mask);
+    std::vector<bool> local_selected_dofs (fe.dofs_per_cell);
+    for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+      local_selected_dofs[i] = component_mask[local_component_asssociation[i]];
+
+    // then loop over all cells and do work
+    std::vector<types::global_dof_index> indices(fe.dofs_per_cell);
+    typename DoFHandlerType::level_cell_iterator c;
+    for (c = dof.begin(level) ; c != dof.end(level) ; ++ c)
+      {
+        c->get_mg_dof_indices(indices);
+        for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+          selected_dofs[indices[i]] = local_selected_dofs[i];
+      }
+  }
+
+
+
+  template<typename DoFHandlerType>
+  void
+  extract_level_dofs (const unsigned int    level,
+                      const DoFHandlerType &dof,
+                      const BlockMask      &block_mask,
+                      std::vector<bool>    &selected_dofs)
+  {
+    // simply defer to the other extract_level_dofs() function
+    extract_level_dofs (level, dof, dof.get_fe().component_mask(block_mask),
+                        selected_dofs);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_boundary_dofs (const DoFHandlerType               &dof_handler,
+                         const ComponentMask                &component_mask,
+                         std::vector<bool>                  &selected_dofs,
+                         const std::set<types::boundary_id> &boundary_ids)
+  {
+    Assert ((dynamic_cast<const parallel::distributed::Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+             (&dof_handler.get_triangulation())
+             == 0),
+            ExcMessage ("This function can not be used with distributed triangulations."
+                        "See the documentation for more information."));
+
+    IndexSet indices;
+    extract_boundary_dofs (dof_handler, component_mask,
+                           indices, boundary_ids);
+
+    // clear and reset array by default values
+    selected_dofs.clear ();
+    selected_dofs.resize (dof_handler.n_dofs(), false);
+
+    // then convert the values computed above to the binary vector
+    indices.fill_binary_vector(selected_dofs);
+  }
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_boundary_dofs (const DoFHandlerType               &dof_handler,
+                         const ComponentMask                &component_mask,
+                         IndexSet                           &selected_dofs,
+                         const std::set<types::boundary_id> &boundary_ids)
+  {
+    Assert (component_mask.represents_n_components(n_components(dof_handler)),
+            ExcMessage ("Component mask has invalid size."));
+    Assert (boundary_ids.find (numbers::internal_face_boundary_id) == boundary_ids.end(),
+            ExcInvalidBoundaryIndicator());
+    const unsigned int dim=DoFHandlerType::dimension;
+
+    // first reset output argument
+    selected_dofs.clear ();
+    selected_dofs.set_size(dof_handler.n_dofs());
+
+    // let's see whether we have to check for certain boundary indicators
+    // or whether we can accept all
+    const bool check_boundary_id = (boundary_ids.size() != 0);
+
+    // also see whether we have to check whether a certain vector component
+    // is selected, or all
+    const bool check_vector_component
+      = ((component_mask.represents_the_all_selected_mask() == false)
+         ||
+         (component_mask.n_selected_components(n_components(dof_handler)) !=
+          n_components(dof_handler)));
+
+    std::vector<types::global_dof_index> face_dof_indices;
+    face_dof_indices.reserve (max_dofs_per_face(dof_handler));
+
+    // now loop over all cells and check whether their faces are at the
+    // boundary. note that we need not take special care of single lines
+    // being at the boundary (using @p{cell->has_boundary_lines}), since we
+    // do not support boundaries of dimension dim-2, and so every isolated
+    // boundary line is also part of a boundary face which we will be
+    // visiting sooner or later
+    for (typename DoFHandlerType::active_cell_iterator cell=dof_handler.begin_active();
+         cell!=dof_handler.end(); ++cell)
+
+      // only work on cells that are either locally owned or at least ghost
+      // cells
+      if (cell->is_artificial() == false)
+        for (unsigned int face=0;
+             face<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++face)
+          if (cell->at_boundary(face))
+            if (! check_boundary_id ||
+                (boundary_ids.find (cell->face(face)->boundary_id())
+                 != boundary_ids.end()))
+              {
+                const FiniteElement<DoFHandlerType::dimension, DoFHandlerType::space_dimension>
+                &fe = cell->get_fe();
+
+                const unsigned int dofs_per_face = fe.dofs_per_face;
+                face_dof_indices.resize (dofs_per_face);
+                cell->face(face)->get_dof_indices (face_dof_indices,
+                                                   cell->active_fe_index());
+
+                for (unsigned int i=0; i<fe.dofs_per_face; ++i)
+                  if (!check_vector_component)
+                    selected_dofs.add_index (face_dof_indices[i]);
+                  else
+                    // check for component is required. somewhat tricky as
+                    // usual for the case that the shape function is
+                    // non-primitive, but use usual convention (see docs)
+                    {
+                      // first get at the cell-global number of a face dof,
+                      // to ask the fe certain questions
+                      const unsigned int cell_index
+                        = (dim == 1 ?
+                           i
+                           :
+                           (dim == 2 ?
+                            (i<2*fe.dofs_per_vertex ? i : i+2*fe.dofs_per_vertex)
+                            :
+                            (dim == 3 ?
+                             (i<4*fe.dofs_per_vertex ?
+                              i
+                              :
+                              (i<4*fe.dofs_per_vertex+4*fe.dofs_per_line ?
+                               i+4*fe.dofs_per_vertex
+                               :
+                               i+4*fe.dofs_per_vertex+8*fe.dofs_per_line))
+                             :
+                             numbers::invalid_unsigned_int)));
+                      if (fe.is_primitive (cell_index))
+                        {
+                          if (component_mask[fe.face_system_to_component_index(i).first]
+                              == true)
+                            selected_dofs.add_index (face_dof_indices[i]);
+                        }
+                      else // not primitive
+                        {
+                          const unsigned int first_nonzero_comp
+                            = fe.get_nonzero_components(cell_index).first_selected_component();
+                          Assert (first_nonzero_comp < fe.n_components(),
+                                  ExcInternalError());
+
+                          if (component_mask[first_nonzero_comp] == true)
+                            selected_dofs.add_index (face_dof_indices[i]);
+                        }
+                    }
+              }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_dofs_with_support_on_boundary (const DoFHandlerType               &dof_handler,
+                                         const ComponentMask                &component_mask,
+                                         std::vector<bool>                  &selected_dofs,
+                                         const std::set<types::boundary_id> &boundary_ids)
+  {
+    Assert (component_mask.represents_n_components (n_components(dof_handler)),
+            ExcMessage ("This component mask has the wrong size."));
+    Assert (boundary_ids.find (numbers::internal_face_boundary_id) == boundary_ids.end(),
+            ExcInvalidBoundaryIndicator());
+
+    // let's see whether we have to check for certain boundary indicators
+    // or whether we can accept all
+    const bool check_boundary_id = (boundary_ids.size() != 0);
+
+    // also see whether we have to check whether a certain vector component
+    // is selected, or all
+    const bool check_vector_component
+      = (component_mask.represents_the_all_selected_mask() == false);
+
+    // clear and reset array by default values
+    selected_dofs.clear ();
+    selected_dofs.resize (dof_handler.n_dofs(), false);
+    std::vector<types::global_dof_index> cell_dof_indices;
+    cell_dof_indices.reserve (max_dofs_per_cell(dof_handler));
+
+    // now loop over all cells and check whether their faces are at the
+    // boundary. note that we need not take special care of single lines
+    // being at the boundary (using @p{cell->has_boundary_lines}), since we
+    // do not support boundaries of dimension dim-2, and so every isolated
+    // boundary line is also part of a boundary face which we will be
+    // visiting sooner or later
+    for (typename DoFHandlerType::active_cell_iterator cell=dof_handler.begin_active();
+         cell!=dof_handler.end(); ++cell)
+      for (unsigned int face=0;
+           face<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++face)
+        if (cell->at_boundary(face))
+          if (! check_boundary_id ||
+              (boundary_ids.find (cell->face(face)->boundary_id())
+               != boundary_ids.end()))
+            {
+              const FiniteElement<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &fe
+                = cell->get_fe();
+
+              const unsigned int dofs_per_cell = fe.dofs_per_cell;
+              cell_dof_indices.resize (dofs_per_cell);
+              cell->get_dof_indices (cell_dof_indices);
+
+              for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+                if (fe.has_support_on_face(i,face))
+                  {
+                    if (!check_vector_component)
+                      selected_dofs[cell_dof_indices[i]] = true;
+                    else
+                      // check for component is required. somewhat tricky
+                      // as usual for the case that the shape function is
+                      // non-primitive, but use usual convention (see docs)
+                      {
+                        if (fe.is_primitive (i))
+                          selected_dofs[cell_dof_indices[i]]
+                            = (component_mask[fe.system_to_component_index(i).first]
+                               == true);
+                        else // not primitive
+                          {
+                            const unsigned int first_nonzero_comp
+                              = fe.get_nonzero_components(i).first_selected_component();
+                            Assert (first_nonzero_comp < fe.n_components(),
+                                    ExcInternalError());
+
+                            selected_dofs[cell_dof_indices[i]]
+                              = (component_mask[first_nonzero_comp]
+                                 == true);
+                          }
+                      }
+                  }
+            }
+  }
+
+
+
+
+  namespace internal
+  {
+    namespace
+    {
+      template <int spacedim>
+      void extract_hanging_node_dofs (const dealii::DoFHandler<1,spacedim> &dof_handler,
+                                      std::vector<bool>           &selected_dofs)
+      {
+        Assert(selected_dofs.size() == dof_handler.n_dofs(),
+               ExcDimensionMismatch(selected_dofs.size(), dof_handler.n_dofs()));
+        // preset all values by false
+        std::fill_n (selected_dofs.begin(), dof_handler.n_dofs(), false);
+
+        // there are no hanging nodes in 1d
+      }
+
+
+      template <int spacedim>
+      void extract_hanging_node_dofs (const dealii::DoFHandler<2,spacedim> &dof_handler,
+                                      std::vector<bool>           &selected_dofs)
+      {
+        const unsigned int dim = 2;
+
+        Assert(selected_dofs.size() == dof_handler.n_dofs(),
+               ExcDimensionMismatch(selected_dofs.size(), dof_handler.n_dofs()));
+        // preset all values by false
+        std::fill_n (selected_dofs.begin(), dof_handler.n_dofs(), false);
+
+        const FiniteElement<dim,spacedim> &fe = dof_handler.get_fe();
+
+        // this function is similar to the make_sparsity_pattern function,
+        // see there for more information
+        typename dealii::DoFHandler<dim,spacedim>::active_cell_iterator
+        cell = dof_handler.begin_active(),
+        endc = dof_handler.end();
+        for (; cell!=endc; ++cell)
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->has_children())
+              {
+                const typename dealii::DoFHandler<dim,spacedim>::line_iterator
+                line = cell->face(face);
+
+                for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                  selected_dofs[line->child(0)->vertex_dof_index(1,dof)] = true;
+
+                for (unsigned int child=0; child<2; ++child)
+                  for (unsigned int dof=0; dof!=fe.dofs_per_line; ++dof)
+                    selected_dofs[line->child(child)->dof_index(dof)] = true;
+              }
+      }
+
+
+      template <int spacedim>
+      void extract_hanging_node_dofs (const dealii::DoFHandler<3,spacedim> &dof_handler,
+                                      std::vector<bool>           &selected_dofs)
+      {
+        const unsigned int dim = 3;
+
+        Assert(selected_dofs.size() == dof_handler.n_dofs(),
+               ExcDimensionMismatch(selected_dofs.size(), dof_handler.n_dofs()));
+        // preset all values by false
+        std::fill_n (selected_dofs.begin(), dof_handler.n_dofs(), false);
+
+        const FiniteElement<dim,spacedim> &fe = dof_handler.get_fe();
+
+        // this function is similar to the make_sparsity_pattern function,
+        // see there for more information
+
+        typename dealii::DoFHandler<dim,spacedim>::active_cell_iterator
+        cell = dof_handler.begin_active(),
+        endc = dof_handler.end();
+        for (; cell!=endc; ++cell)
+          for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+            if (cell->face(f)->has_children())
+              {
+                const typename dealii::DoFHandler<dim,spacedim>::face_iterator
+                face = cell->face(f);
+
+                for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                  selected_dofs[face->child(0)->vertex_dof_index(2,dof)] = true;
+
+                // dof numbers on the centers of the lines bounding this
+                // face
+                for (unsigned int line=0; line<4; ++line)
+                  for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                    selected_dofs[face->line(line)->child(0)->vertex_dof_index(1,dof)] = true;
+
+                // next the dofs on the lines interior to the face; the
+                // order of these lines is laid down in the FiniteElement
+                // class documentation
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  selected_dofs[face->child(0)->line(1)->dof_index(dof)] = true;
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  selected_dofs[face->child(1)->line(2)->dof_index(dof)] = true;
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  selected_dofs[face->child(2)->line(3)->dof_index(dof)] = true;
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  selected_dofs[face->child(3)->line(0)->dof_index(dof)] = true;
+
+                // dofs on the bordering lines
+                for (unsigned int line=0; line<4; ++line)
+                  for (unsigned int child=0; child<2; ++child)
+                    for (unsigned int dof=0; dof!=fe.dofs_per_line; ++dof)
+                      selected_dofs[face->line(line)->child(child)->dof_index(dof)] = true;
+
+                // finally, for the dofs interior to the four child faces
+                for (unsigned int child=0; child<4; ++child)
+                  for (unsigned int dof=0; dof!=fe.dofs_per_quad; ++dof)
+                    selected_dofs[face->child(child)->dof_index(dof)] = true;
+              }
+      }
+    }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+
+  extract_hanging_node_dofs (const DoFHandler<dim,spacedim> &dof_handler,
+                             std::vector<bool>              &selected_dofs)
+  {
+    internal::extract_hanging_node_dofs (dof_handler,
+                                         selected_dofs);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_subdomain_dofs (const DoFHandlerType      &dof_handler,
+                          const types::subdomain_id  subdomain_id,
+                          std::vector<bool>         &selected_dofs)
+  {
+    Assert(selected_dofs.size() == dof_handler.n_dofs(),
+           ExcDimensionMismatch(selected_dofs.size(), dof_handler.n_dofs()));
+
+    // preset all values by false
+    std::fill_n (selected_dofs.begin(), dof_handler.n_dofs(), false);
+
+    std::vector<types::global_dof_index> local_dof_indices;
+    local_dof_indices.reserve (max_dofs_per_cell(dof_handler));
+
+    // this function is similar to the make_sparsity_pattern function, see
+    // there for more information
+    typename DoFHandlerType::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->subdomain_id() == subdomain_id)
+        {
+          const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+          local_dof_indices.resize (dofs_per_cell);
+          cell->get_dof_indices (local_dof_indices);
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            selected_dofs[local_dof_indices[i]] = true;
+        };
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_locally_owned_dofs (const DoFHandlerType &dof_handler,
+                              IndexSet             &dof_set)
+  {
+    // collect all the locally owned dofs
+    dof_set = dof_handler.locally_owned_dofs();
+    dof_set.compress ();
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_locally_active_dofs (const DoFHandlerType &dof_handler,
+                               IndexSet             &dof_set)
+  {
+    // collect all the locally owned dofs
+    dof_set = dof_handler.locally_owned_dofs();
+
+    // add the DoF on the adjacent ghost cells to the IndexSet, cache them
+    // in a set. need to check each dof manually because we can't be sure
+    // that the dof range of locally_owned_dofs is really contiguous.
+    std::vector<types::global_dof_index> dof_indices;
+    std::set<types::global_dof_index> global_dof_indices;
+
+    typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                  endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          dof_indices.resize(cell->get_fe().dofs_per_cell);
+          cell->get_dof_indices(dof_indices);
+
+          for (std::vector<types::global_dof_index>::iterator it=dof_indices.begin();
+               it!=dof_indices.end();
+               ++it)
+            if (!dof_set.is_element(*it))
+              global_dof_indices.insert(*it);
+        }
+
+    dof_set.add_indices(global_dof_indices.begin(), global_dof_indices.end());
+
+    dof_set.compress();
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_locally_relevant_dofs (const DoFHandlerType &dof_handler,
+                                 IndexSet             &dof_set)
+  {
+    // collect all the locally owned dofs
+    dof_set = dof_handler.locally_owned_dofs();
+
+    // now add the DoF on the adjacent ghost cells to the IndexSet
+
+    // Note: For certain meshes (in particular in 3D and with many
+    // processors), it is really necessary to cache intermediate data. After
+    // trying several objects such as std::set, a vector that is always kept
+    // sorted, and a vector that is initially unsorted and sorted once at the
+    // end, the latter has been identified to provide the best performance.
+    // Martin Kronbichler
+    std::vector<types::global_dof_index> dof_indices;
+    std::vector<types::global_dof_index> dofs_on_ghosts;
+
+    typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                  endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if (cell->is_ghost())
+        {
+          dof_indices.resize(cell->get_fe().dofs_per_cell);
+          cell->get_dof_indices(dof_indices);
+          for (unsigned int i=0; i<dof_indices.size(); ++i)
+            if (!dof_set.is_element(dof_indices[i]))
+              dofs_on_ghosts.push_back(dof_indices[i]);
+        }
+
+    // sort, compress out duplicates, fill into index set
+    std::sort(dofs_on_ghosts.begin(), dofs_on_ghosts.end());
+    dof_set.add_indices(dofs_on_ghosts.begin(), std::unique(dofs_on_ghosts.begin(),
+                                                            dofs_on_ghosts.end()));
+    dof_set.compress();
+  }
+
+
+  template <typename DoFHandlerType>
+  void
+  extract_locally_relevant_level_dofs (const DoFHandlerType &dof_handler,
+                                       const unsigned int    level,
+                                       IndexSet             &dof_set)
+  {
+    // collect all the locally owned dofs
+    dof_set = dof_handler.locally_owned_mg_dofs(level);
+
+    // add the DoF on the adjacent ghost cells to the IndexSet
+
+    // Note: For certain meshes (in particular in 3D and with many
+    // processors), it is really necessary to cache intermediate data. After
+    // trying several objects such as std::set, a vector that is always kept
+    // sorted, and a vector that is initially unsorted and sorted once at the
+    // end, the latter has been identified to provide the best performance.
+    // Martin Kronbichler
+    std::vector<types::global_dof_index> dof_indices;
+    std::vector<types::global_dof_index> dofs_on_ghosts;
+
+    typename DoFHandlerType::cell_iterator cell = dof_handler.begin(level),
+                                           endc = dof_handler.end(level);
+    for (; cell!=endc; ++cell)
+      {
+        const types::subdomain_id id = cell->level_subdomain_id();
+
+        // skip artificial and own cells (only look at ghost cells)
+        if (id == dof_handler.get_triangulation().locally_owned_subdomain()
+            || id == numbers::artificial_subdomain_id)
+          continue;
+
+        dof_indices.resize(cell->get_fe().dofs_per_cell);
+        cell->get_mg_dof_indices(dof_indices);
+        for (unsigned int i=0; i<dof_indices.size(); ++i)
+          if (!dof_set.is_element(dof_indices[i]))
+            dofs_on_ghosts.push_back(dof_indices[i]);
+      }
+
+    // sort, compress out duplicates, fill into index set
+    std::sort(dofs_on_ghosts.begin(), dofs_on_ghosts.end());
+    dof_set.add_indices(dofs_on_ghosts.begin(), std::unique(dofs_on_ghosts.begin(),
+                                                            dofs_on_ghosts.end()));
+
+    dof_set.compress();
+  }
+
+  template <typename DoFHandlerType>
+  void
+  extract_constant_modes (const DoFHandlerType            &dof_handler,
+                          const ComponentMask             &component_mask,
+                          std::vector<std::vector<bool> > &constant_modes)
+  {
+    const unsigned int n_components = dof_handler.get_fe().n_components();
+    Assert (component_mask.represents_n_components(n_components),
+            ExcDimensionMismatch(n_components,
+                                 component_mask.size()));
+
+    std::vector<unsigned char> dofs_by_component (dof_handler.n_locally_owned_dofs());
+    internal::get_component_association (dof_handler, component_mask,
+                                         dofs_by_component);
+    unsigned int n_selected_dofs = 0;
+    for (unsigned int i=0; i<n_components; ++i)
+      if (component_mask[i] == true)
+        n_selected_dofs += std::count (dofs_by_component.begin(),
+                                       dofs_by_component.end(), i);
+
+    // Find local numbering within the selected components
+    const IndexSet &locally_owned_dofs = dof_handler.locally_owned_dofs();
+    std::vector<unsigned int> component_numbering(locally_owned_dofs.n_elements(),
+                                                  numbers::invalid_unsigned_int);
+    for (unsigned int i=0, count=0; i<locally_owned_dofs.n_elements(); ++i)
+      if (component_mask[dofs_by_component[i]])
+        component_numbering[i] = count++;
+
+    // get the element constant modes and find a translation table between
+    // index in the constant modes and the components.
+    //
+    // TODO: We might be able to extend this also for elements which do not
+    // have the same constant modes, but that is messy...
+    const dealii::hp::FECollection<DoFHandlerType::dimension,DoFHandlerType::space_dimension>
+    fe_collection (dof_handler.get_fe());
+    std::vector<Table<2,bool> > element_constant_modes;
+    std::vector<std::vector<std::pair<unsigned int, unsigned int> > >
+    constant_mode_to_component_translation(n_components);
+    unsigned int n_constant_modes = 0;
+    for (unsigned int f=0; f<fe_collection.size(); ++f)
+      {
+        std::pair<Table<2,bool>, std::vector<unsigned int> > data
+          = fe_collection[f].get_constant_modes();
+        element_constant_modes.push_back(data.first);
+        if (f==0)
+          for (unsigned int i=0; i<data.second.size(); ++i)
+            if (component_mask[data.second[i]])
+              constant_mode_to_component_translation[data.second[i]].
+              push_back(std::make_pair(n_constant_modes++,i));
+        AssertDimension(element_constant_modes.back().n_rows(),
+                        element_constant_modes[0].n_rows());
+      }
+
+    // First count the number of dofs in the current component.
+    constant_modes.clear ();
+    constant_modes.resize (n_constant_modes, std::vector<bool>(n_selected_dofs,
+                                                               false));
+
+    // Loop over all owned cells and ask the element for the constant modes
+
+    typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                  endc = dof_handler.end();
+    std::vector<types::global_dof_index> dof_indices;
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          dof_indices.resize(cell->get_fe().dofs_per_cell);
+          cell->get_dof_indices(dof_indices);
+
+          for (unsigned int i=0; i<dof_indices.size(); ++i)
+            if (locally_owned_dofs.is_element(dof_indices[i]))
+              {
+                const unsigned int loc_index =
+                  locally_owned_dofs.index_within_set(dof_indices[i]);
+                const unsigned int comp = dofs_by_component[loc_index];
+                if (component_mask[comp])
+                  for (unsigned int j=0; j<constant_mode_to_component_translation[comp].size(); ++j)
+                    constant_modes[constant_mode_to_component_translation[comp][j].first]
+                    [component_numbering[loc_index]] =
+                      element_constant_modes[cell->active_fe_index()]
+                      (constant_mode_to_component_translation[comp][j].second,i);
+              }
+        }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  get_active_fe_indices (const DoFHandlerType      &dof_handler,
+                         std::vector<unsigned int> &active_fe_indices)
+  {
+    AssertDimension (active_fe_indices.size(), dof_handler.get_triangulation().n_active_cells());
+
+    typename DoFHandlerType::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      active_fe_indices[cell->active_cell_index()] = cell->active_fe_index();
+  }
+
+  template <typename DoFHandlerType>
+  std::vector<IndexSet>
+  locally_owned_dofs_per_subdomain (const DoFHandlerType  &dof_handler)
+  {
+    // If the Triangulation is distributed, the only thing we can usefully
+    // ask is for its locally owned subdomain
+    Assert ((dynamic_cast<const parallel::distributed::
+             Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *>
+             (&dof_handler.get_triangulation()) == 0),
+            ExcMessage ("For parallel::distributed::Triangulation objects and "
+                        "associated DoF handler objects, asking for any information "
+                        "related to a subdomain other than the locally owned one does "
+                        "not make sense."));
+
+    //the following is a random process (flip of a coin), thus should be called once only.
+    std::vector< dealii::types::subdomain_id > subdomain_association (dof_handler.n_dofs ());
+    dealii::DoFTools::get_subdomain_association (dof_handler, subdomain_association);
+
+    const unsigned int n_subdomains = 1 + (*std::max_element (subdomain_association.begin (),
+                                                              subdomain_association.end ()   ));
+
+    std::vector<dealii::IndexSet> index_sets (n_subdomains,dealii::IndexSet(dof_handler.n_dofs()));
+
+    // loop over subdomain_association and populate IndexSet when a
+    // change in subdomain ID is found
+    dealii::types::global_dof_index i_min          = 0;
+    dealii::types::global_dof_index this_subdomain = subdomain_association[0];
+
+    for (dealii::types::global_dof_index index = 1;
+         index < subdomain_association.size (); ++index)
+      {
+        //found index different from the current one
+        if (subdomain_association[index] != this_subdomain)
+          {
+            index_sets[this_subdomain].add_range (i_min, index);
+            i_min = index;
+            this_subdomain = subdomain_association[index];
+          }
+      }
+
+    // the very last element is of different index
+    if (i_min == subdomain_association.size () - 1)
+      {
+        index_sets[this_subdomain].add_index (i_min);
+      }
+
+    // otherwise there are at least two different indices
+    else
+      {
+        index_sets[this_subdomain].add_range (
+          i_min, subdomain_association.size ());
+      }
+
+    for (unsigned int i = 0; i < n_subdomains; i++)
+      index_sets[i].compress ();
+
+    return index_sets;
+  }
+
+  template <typename DoFHandlerType>
+  std::vector<IndexSet>
+  locally_relevant_dofs_per_subdomain (const DoFHandlerType  &dof_handler)
+  {
+    // If the Triangulation is distributed, the only thing we can usefully
+    // ask is for its locally owned subdomain
+    Assert ((dynamic_cast<const parallel::distributed::
+             Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *>
+             (&dof_handler.get_triangulation()) == 0),
+            ExcMessage ("For parallel::distributed::Triangulation objects and "
+                        "associated DoF handler objects, asking for any information "
+                        "related to a subdomain other than the locally owned one does "
+                        "not make sense."));
+
+    // Collect all the locally owned DoFs
+    // Note: Even though the distribution of DoFs by the locally_owned_dofs_per_subdomain
+    // function is pseudo-random, we will collect all the DoFs on the subdomain
+    // and its layer cell. Therefore, the random nature of this function does
+    // not play a role in the extraction of the locally relevant DoFs
+    std::vector<IndexSet> dof_set = locally_owned_dofs_per_subdomain(dof_handler);
+    const dealii::types::subdomain_id n_subdomains = dof_set.size();
+
+    // Add the DoFs on the adjacent (equivalent ghost) cells to the IndexSet,
+    // cache them in a set. Need to check each DoF manually because we can't
+    // be sure that the DoF range of locally_owned_dofs is really contiguous.
+    for (dealii::types::subdomain_id subdomain_id = 0;
+         subdomain_id < n_subdomains; ++subdomain_id)
+      {
+        // Extract the layer of cells around this subdomain
+        std_cxx11::function<bool (const typename DoFHandlerType::active_cell_iterator &)> predicate
+          = IteratorFilters::SubdomainEqualTo(subdomain_id);
+        const std::vector<typename DoFHandlerType::active_cell_iterator>
+        active_halo_layer = GridTools::compute_active_cell_halo_layer (dof_handler,
+                            predicate);
+
+        // Extract DoFs associated with halo layer
+        std::vector<types::global_dof_index> local_dof_indices;
+        std::set<types::global_dof_index> subdomain_halo_global_dof_indices;
+        for (typename std::vector<typename DoFHandlerType::active_cell_iterator>::const_iterator
+             it_cell = active_halo_layer.begin(); it_cell!=active_halo_layer.end(); ++it_cell)
+          {
+            const typename DoFHandlerType::active_cell_iterator &cell = *it_cell;
+            Assert(cell->subdomain_id() != subdomain_id,
+                   ExcMessage("The subdomain ID of the halo cell should not match that of the vector entry."));
+
+            local_dof_indices.resize(cell->get_fe().dofs_per_cell);
+            cell->get_dof_indices(local_dof_indices);
+
+            for (std::vector<types::global_dof_index>::iterator it=local_dof_indices.begin();
+                 it!=local_dof_indices.end();
+                 ++it)
+              subdomain_halo_global_dof_indices.insert(*it);
+          }
+
+        dof_set[subdomain_id].add_indices(subdomain_halo_global_dof_indices.begin(),
+                                          subdomain_halo_global_dof_indices.end());
+
+        dof_set[subdomain_id].compress();
+      }
+
+    return dof_set;
+  }
+
+  template <typename DoFHandlerType>
+  void
+  get_subdomain_association (const DoFHandlerType &dof_handler,
+                             std::vector<types::subdomain_id> &subdomain_association)
+  {
+    // if the Triangulation is distributed, the only thing we can usefully
+    // ask is for its locally owned subdomain
+    Assert ((dynamic_cast<const parallel::distributed::
+             Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *>
+             (&dof_handler.get_triangulation()) == 0),
+            ExcMessage ("For parallel::distributed::Triangulation objects and "
+                        "associated DoF handler objects, asking for any subdomain other "
+                        "than the locally owned one does not make sense."));
+
+    Assert(subdomain_association.size() == dof_handler.n_dofs(),
+           ExcDimensionMismatch(subdomain_association.size(),
+                                dof_handler.n_dofs()));
+
+    Assert(dof_handler.n_dofs() > 0,
+           ExcMessage("Number of DoF is not positive. "
+                      "This could happen when the function is called before NumberCache is written."));
+
+    // In case this function is executed with parallel::shared::Triangulation
+    // with possibly artifical cells, we need to take "true" subdomain IDs (i.e. without
+    // artificial cells). Otherwise we are good to use subdomain_id as stored
+    // in cell->subdomain_id().
+    std::vector<types::subdomain_id> cell_owners (dof_handler.get_triangulation().n_active_cells());
+    if (const parallel::shared::Triangulation<DoFHandlerType::dimension, DoFHandlerType::space_dimension>
+        *tr = (dynamic_cast<const parallel::shared::Triangulation<DoFHandlerType::dimension,
+               DoFHandlerType::space_dimension>*> (&dof_handler.get_triangulation())))
+      {
+        cell_owners = tr->get_true_subdomain_ids_of_cells();
+        Assert (tr->get_true_subdomain_ids_of_cells().size() == tr->n_active_cells(),
+                ExcInternalError());
+      }
+    else
+      {
+        for (typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active();
+             cell!= dof_handler.end(); cell++)
+          if (cell->is_locally_owned())
+            cell_owners[cell->active_cell_index()] = cell->subdomain_id();
+      }
+
+    // preset all values by an invalid value
+    std::fill_n (subdomain_association.begin(), dof_handler.n_dofs(),
+                 numbers::invalid_subdomain_id);
+
+    std::vector<types::global_dof_index> local_dof_indices;
+    local_dof_indices.reserve (max_dofs_per_cell(dof_handler));
+
+    // pseudo-randomly assign variables which lie on the interface between
+    // subdomains to each of the two or more
+    bool coin_flip = true;
+
+    // loop over all cells and record which subdomain a DoF belongs to.
+    // toss a coin in case it is on an interface
+    typename DoFHandlerType::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        const types::subdomain_id subdomain_id = cell_owners[cell->active_cell_index()];
+        const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+        local_dof_indices.resize (dofs_per_cell);
+        cell->get_dof_indices (local_dof_indices);
+
+        // set subdomain ids. if dofs already have their values set then
+        // they must be on partition interfaces. in that case randomly
+        // assign them to either the previous association or the current
+        // one, where we take "random" to be "once this way once that way"
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          if (subdomain_association[local_dof_indices[i]] ==
+              numbers::invalid_unsigned_int)
+            subdomain_association[local_dof_indices[i]] = subdomain_id;
+          else
+            {
+              if (coin_flip == true)
+                subdomain_association[local_dof_indices[i]] = subdomain_id;
+              coin_flip = !coin_flip;
+            }
+      }
+
+    Assert (std::find (subdomain_association.begin(),
+                       subdomain_association.end(),
+                       numbers::invalid_subdomain_id)
+            == subdomain_association.end(),
+            ExcInternalError());
+  }
+
+
+
+  template <typename DoFHandlerType>
+  unsigned int
+  count_dofs_with_subdomain_association (const DoFHandlerType      &dof_handler,
+                                         const types::subdomain_id  subdomain)
+  {
+    std::vector<types::subdomain_id> subdomain_association (dof_handler.n_dofs());
+    get_subdomain_association (dof_handler, subdomain_association);
+
+    return std::count (subdomain_association.begin(),
+                       subdomain_association.end(),
+                       subdomain);
+  }
+
+
+
+  template <typename DoFHandlerType>
+  IndexSet
+  dof_indices_with_subdomain_association (const DoFHandlerType      &dof_handler,
+                                          const types::subdomain_id  subdomain)
+  {
+
+    // If we have a distributed::Triangulation only allow locally_owned
+    // subdomain.
+    Assert (
+      (dof_handler.get_triangulation().locally_owned_subdomain() == numbers::invalid_subdomain_id)
+      ||
+      (subdomain == dof_handler.get_triangulation().locally_owned_subdomain()),
+      ExcMessage ("For parallel::distributed::Triangulation objects and "
+                  "associated DoF handler objects, asking for any subdomain other "
+                  "than the locally owned one does not make sense."));
+
+    IndexSet index_set (dof_handler.n_dofs());
+
+    std::vector<types::global_dof_index> local_dof_indices;
+    local_dof_indices.reserve (max_dofs_per_cell(dof_handler));
+
+    // first generate an unsorted list of all indices which we fill from
+    // the back. could also insert them directly into the IndexSet, but
+    // that inserts indices in the middle, which is an O(n^2) algorithm and
+    // hence too expensive. Could also use std::set, but that is in general
+    // more expensive than a vector
+    std::vector<types::global_dof_index> subdomain_indices;
+
+    typename DoFHandlerType::active_cell_iterator
+    cell = dof_handler.begin_active(),
+    endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      if ((cell->is_artificial() == false)
+          &&
+          (cell->subdomain_id() == subdomain))
+        {
+          const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+          local_dof_indices.resize (dofs_per_cell);
+          cell->get_dof_indices (local_dof_indices);
+          subdomain_indices.insert(subdomain_indices.end(),
+                                   local_dof_indices.begin(),
+                                   local_dof_indices.end());
+        }
+    // sort indices and remove duplicates
+    std::sort (subdomain_indices.begin(), subdomain_indices.end());
+    subdomain_indices.erase (std::unique(subdomain_indices.begin(),
+                                         subdomain_indices.end()),
+                             subdomain_indices.end());
+
+    // insert into IndexSet
+    index_set.add_indices (subdomain_indices.begin(), subdomain_indices.end());
+    index_set.compress ();
+
+    return index_set;
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  count_dofs_with_subdomain_association (const DoFHandlerType      &dof_handler,
+                                         const types::subdomain_id  subdomain,
+                                         std::vector<unsigned int> &n_dofs_on_subdomain)
+  {
+    Assert (n_dofs_on_subdomain.size() == dof_handler.get_fe().n_components(),
+            ExcDimensionMismatch (n_dofs_on_subdomain.size(),
+                                  dof_handler.get_fe().n_components()));
+    std::fill (n_dofs_on_subdomain.begin(), n_dofs_on_subdomain.end(), 0);
+
+    // in debug mode, make sure that there are some cells at least with
+    // this subdomain id
+#ifdef DEBUG
+    {
+      bool found = false;
+      for (typename Triangulation<DoFHandlerType::dimension,
+           DoFHandlerType::space_dimension>::active_cell_iterator
+           cell=dof_handler.get_triangulation().begin_active();
+           cell!=dof_handler.get_triangulation().end(); ++cell)
+        if (cell->subdomain_id() == subdomain)
+          {
+            found = true;
+            break;
+          }
+      Assert (found == true,
+              ExcMessage ("There are no cells for the given subdomain!"));
+    }
+#endif
+
+    std::vector<types::subdomain_id> subdomain_association (dof_handler.n_dofs());
+    get_subdomain_association (dof_handler, subdomain_association);
+
+    std::vector<unsigned char> component_association (dof_handler.n_dofs());
+    internal::get_component_association (dof_handler, std::vector<bool>(),
+                                         component_association);
+
+    for (unsigned int c=0; c<dof_handler.get_fe().n_components(); ++c)
+      {
+        for (types::global_dof_index i=0; i<dof_handler.n_dofs(); ++i)
+          if ((subdomain_association[i] == subdomain) &&
+              (component_association[i] == static_cast<unsigned char>(c)))
+            ++n_dofs_on_subdomain[c];
+      }
+  }
+
+
+
+  namespace internal
+  {
+    // TODO: why is this function so complicated? It would be nice to have
+    // comments that explain why we can't just loop over all components and
+    // count the entries in dofs_by_component that have this component's
+    // index
+    template <int dim, int spacedim>
+    void
+    resolve_components (const FiniteElement<dim,spacedim> &fe,
+                        const std::vector<unsigned char> &dofs_by_component,
+                        const std::vector<unsigned int>  &target_component,
+                        const bool                        only_once,
+                        std::vector<types::global_dof_index> &dofs_per_component,
+                        unsigned int                     &component)
+    {
+      for (unsigned int b=0; b<fe.n_base_elements(); ++b)
+        {
+          const FiniteElement<dim,spacedim> &base = fe.base_element(b);
+          // Dimension of base element
+          unsigned int d = base.n_components();
+
+          for (unsigned int m=0; m<fe.element_multiplicity(b); ++m)
+            {
+              if (base.n_base_elements() > 1)
+                resolve_components(base, dofs_by_component, target_component,
+                                   only_once, dofs_per_component, component);
+              else
+                {
+                  for (unsigned int dd=0; dd<d; ++dd,++component)
+                    dofs_per_component[target_component[component]]
+                    += std::count(dofs_by_component.begin(),
+                                  dofs_by_component.end(),
+                                  component);
+
+                  // if we have non-primitive FEs and want all components
+                  // to show the number of dofs, need to copy the result to
+                  // those components
+                  if (!base.is_primitive() && !only_once)
+                    for (unsigned int dd=1; dd<d; ++dd)
+                      dofs_per_component[target_component[component-d+dd]] =
+                        dofs_per_component[target_component[component-d]];
+                }
+            }
+        }
+    }
+
+
+    template <int dim, int spacedim>
+    void
+    resolve_components (const hp::FECollection<dim,spacedim> &fe_collection,
+                        const std::vector<unsigned char> &dofs_by_component,
+                        const std::vector<unsigned int>  &target_component,
+                        const bool                        only_once,
+                        std::vector<types::global_dof_index> &dofs_per_component,
+                        unsigned int                     &component)
+    {
+      // assert that all elements in the collection have the same structure
+      // (base elements and multiplicity, components per base element) and
+      // then simply call the function above
+      for (unsigned int fe=1; fe<fe_collection.size(); ++fe)
+        {
+          Assert (fe_collection[fe].n_components() == fe_collection[0].n_components(),
+                  ExcNotImplemented());
+          Assert (fe_collection[fe].n_base_elements() == fe_collection[0].n_base_elements(),
+                  ExcNotImplemented());
+          for (unsigned int b=0; b<fe_collection[0].n_base_elements(); ++b)
+            {
+              Assert (fe_collection[fe].base_element(b).n_components() == fe_collection[0].base_element(b).n_components(),
+                      ExcNotImplemented());
+              Assert (fe_collection[fe].base_element(b).n_base_elements() == fe_collection[0].base_element(b).n_base_elements(),
+                      ExcNotImplemented());
+            }
+        }
+
+      resolve_components (fe_collection[0], dofs_by_component,
+                          target_component, only_once, dofs_per_component,
+                          component);
+    }
+  }
+
+
+
+  namespace internal
+  {
+    namespace
+    {
+      /**
+       * Return true if the given element is primitive.
+       */
+      template <int dim, int spacedim>
+      bool all_elements_are_primitive (const FiniteElement<dim,spacedim> &fe)
+      {
+        return fe.is_primitive();
+      }
+
+
+      /**
+       * Return true if each element of the given element collection is primitive.
+       */
+      template <int dim, int spacedim>
+      bool all_elements_are_primitive (const dealii::hp::FECollection<dim,spacedim> &fe_collection)
+      {
+        for (unsigned int i=0; i<fe_collection.size(); ++i)
+          if (fe_collection[i].is_primitive() == false)
+            return false;
+
+        return true;
+      }
+    }
+  }
+
+  template <typename DoFHandlerType>
+  void
+  count_dofs_per_component (const DoFHandlerType                 &dof_handler,
+                            std::vector<types::global_dof_index> &dofs_per_component,
+                            bool                                  only_once,
+                            std::vector<unsigned int>             target_component)
+  {
+    const unsigned int n_components = dof_handler.get_fe().n_components();
+
+    std::fill (dofs_per_component.begin(), dofs_per_component.end(),
+               types::global_dof_index(0));
+
+    // If the empty vector was given as default argument, set up this
+    // vector as identity.
+    if (target_component.size()==0)
+      {
+        target_component.resize(n_components);
+        for (unsigned int i=0; i<n_components; ++i)
+          target_component[i] = i;
+      }
+    else
+      Assert (target_component.size()==n_components,
+              ExcDimensionMismatch(target_component.size(),
+                                   n_components));
+
+
+    const unsigned int max_component
+      = *std::max_element (target_component.begin(),
+                           target_component.end());
+    const unsigned int n_target_components = max_component + 1;
+    (void)n_target_components; // silence possible warning about unused variable
+
+    AssertDimension (dofs_per_component.size(), n_target_components);
+
+    // special case for only one component. treat this first since it does
+    // not require any computations
+    if (n_components == 1)
+      {
+        dofs_per_component[0] = dof_handler.n_locally_owned_dofs();
+        return;
+      }
+
+
+    // otherwise determine the number of dofs in each component separately.
+    // do so in parallel
+    std::vector<unsigned char> dofs_by_component (dof_handler.n_locally_owned_dofs());
+    internal::get_component_association (dof_handler, ComponentMask(),
+                                         dofs_by_component);
+
+    // next count what we got
+    unsigned int component = 0;
+    internal::resolve_components(dof_handler.get_fe(),
+                                 dofs_by_component, target_component,
+                                 only_once, dofs_per_component, component);
+    Assert (n_components == component, ExcInternalError());
+
+    // finally sanity check. this is only valid if the finite element is
+    // actually primitive, so exclude other elements from this
+    Assert ((internal::all_elements_are_primitive(dof_handler.get_fe()) == false)
+            ||
+            (std::accumulate (dofs_per_component.begin(),
+                              dofs_per_component.end(),
+                              types::global_dof_index(0))
+             == dof_handler.n_locally_owned_dofs()),
+            ExcInternalError());
+
+    // reduce information from all CPUs
+#ifdef DEAL_II_WITH_MPI
+    const unsigned int dim = DoFHandlerType::dimension;
+    const unsigned int spacedim = DoFHandlerType::space_dimension;
+
+    if (const parallel::Triangulation<dim,spacedim> *tria
+        = (dynamic_cast<const parallel::Triangulation<dim,spacedim>*>
+           (&dof_handler.get_triangulation())))
+      {
+        std::vector<types::global_dof_index> local_dof_count = dofs_per_component;
+
+        MPI_Allreduce ( &local_dof_count[0], &dofs_per_component[0], n_target_components,
+                        DEAL_II_DOF_INDEX_MPI_TYPE,
+                        MPI_SUM, tria->get_communicator());
+      }
+#endif
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  count_dofs_per_block (const DoFHandlerType                 &dof_handler,
+                        std::vector<types::global_dof_index> &dofs_per_block,
+                        const std::vector<unsigned int>      &target_block_)
+  {
+    std::vector<unsigned int>  target_block = target_block_;
+
+    const dealii::hp::FECollection<DoFHandlerType::dimension,DoFHandlerType::space_dimension>
+    fe_collection (dof_handler.get_fe());
+    Assert (fe_collection.size() < 256, ExcNotImplemented());
+
+    for (unsigned int this_fe=0; this_fe<fe_collection.size(); ++this_fe)
+      {
+        const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe = fe_collection[this_fe];
+        std::fill (dofs_per_block.begin(), dofs_per_block.end(),
+                   types::global_dof_index(0));
+
+        // If the empty vector was given as default argument, set up this
+        // vector as identity.
+        if (target_block.size()==0)
+          {
+            target_block.resize(fe.n_blocks());
+            for (unsigned int i=0; i<fe.n_blocks(); ++i)
+              target_block[i] = i;
+          }
+        else
+          Assert (target_block.size()==fe.n_blocks(),
+                  ExcDimensionMismatch(target_block.size(),
+                                       fe.n_blocks()));
+
+
+
+        const unsigned int max_block
+          = *std::max_element (target_block.begin(),
+                               target_block.end());
+        const unsigned int n_target_blocks = max_block + 1;
+        (void)n_target_blocks; // silence possible warning about unused variable
+
+        const unsigned int n_blocks = fe.n_blocks();
+
+        AssertDimension (dofs_per_block.size(), n_target_blocks);
+
+        // special case for only one block. treat this first since it does
+        // not require any computations
+        if (n_blocks == 1)
+          {
+            dofs_per_block[0] = dof_handler.n_dofs();
+            return;
+          }
+        // otherwise determine the number of dofs in each block separately.
+        std::vector<unsigned char> dofs_by_block (dof_handler.n_locally_owned_dofs());
+        internal::get_block_association (dof_handler, dofs_by_block);
+
+        // next count what we got
+        for (unsigned int block=0; block<fe.n_blocks(); ++block)
+          dofs_per_block[target_block[block]]
+          += std::count(dofs_by_block.begin(), dofs_by_block.end(),
+                        block);
+
+#ifdef DEAL_II_WITH_MPI
+        // if we are working on a parallel mesh, we now need to collect
+        // this information from all processors
+        if (const parallel::Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> *tria
+            = (dynamic_cast<const parallel::Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension>*>
+               (&dof_handler.get_triangulation())))
+          {
+            std::vector<types::global_dof_index> local_dof_count = dofs_per_block;
+            MPI_Allreduce ( &local_dof_count[0], &dofs_per_block[0],
+                            n_target_blocks,
+                            DEAL_II_DOF_INDEX_MPI_TYPE,
+                            MPI_SUM, tria->get_communicator());
+          }
+#endif
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  map_dof_to_boundary_indices (const DoFHandlerType &dof_handler,
+                               std::vector<types::global_dof_index> &mapping)
+  {
+    mapping.clear ();
+    mapping.insert (mapping.end(), dof_handler.n_dofs(),
+                    DoFHandlerType::invalid_dof_index);
+
+    std::vector<types::global_dof_index> dofs_on_face;
+    dofs_on_face.reserve (max_dofs_per_face(dof_handler));
+    types::global_dof_index next_boundary_index = 0;
+
+    // now loop over all cells and check whether their faces are at the
+    // boundary. note that we need not take special care of single lines
+    // being at the boundary (using @p{cell->has_boundary_lines}), since we
+    // do not support boundaries of dimension dim-2, and so every isolated
+    // boundary line is also part of a boundary face which we will be
+    // visiting sooner or later
+    typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                  endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++f)
+        if (cell->at_boundary(f))
+          {
+            const unsigned int dofs_per_face = cell->get_fe().dofs_per_face;
+            dofs_on_face.resize (dofs_per_face);
+            cell->face(f)->get_dof_indices (dofs_on_face,
+                                            cell->active_fe_index());
+            for (unsigned int i=0; i<dofs_per_face; ++i)
+              if (mapping[dofs_on_face[i]] == DoFHandlerType::invalid_dof_index)
+                mapping[dofs_on_face[i]] = next_boundary_index++;
+          }
+
+    AssertDimension (next_boundary_index, dof_handler.n_boundary_dofs());
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void map_dof_to_boundary_indices
+  (const DoFHandlerType                 &dof_handler,
+   const std::set<types::boundary_id>   &boundary_ids,
+   std::vector<types::global_dof_index> &mapping)
+  {
+    Assert (boundary_ids.find (numbers::internal_face_boundary_id) == boundary_ids.end(),
+            ExcInvalidBoundaryIndicator());
+
+    mapping.clear ();
+    mapping.insert (mapping.end(), dof_handler.n_dofs(),
+                    DoFHandlerType::invalid_dof_index);
+
+    // return if there is nothing to do
+    if (boundary_ids.size() == 0)
+      return;
+
+    std::vector<types::global_dof_index> dofs_on_face;
+    dofs_on_face.reserve (max_dofs_per_face(dof_handler));
+    types::global_dof_index next_boundary_index = 0;
+
+    typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                  endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++f)
+        if (boundary_ids.find (cell->face(f)->boundary_id()) !=
+            boundary_ids.end())
+          {
+            const unsigned int dofs_per_face = cell->get_fe().dofs_per_face;
+            dofs_on_face.resize (dofs_per_face);
+            cell->face(f)->get_dof_indices (dofs_on_face, cell->active_fe_index());
+            for (unsigned int i=0; i<dofs_per_face; ++i)
+              if (mapping[dofs_on_face[i]] == DoFHandlerType::invalid_dof_index)
+                mapping[dofs_on_face[i]] = next_boundary_index++;
+          }
+
+    AssertDimension (next_boundary_index,
+                     dof_handler.n_boundary_dofs (boundary_ids));
+  }
+
+  namespace internal
+  {
+    namespace
+    {
+      template <typename DoFHandlerType>
+      void
+      map_dofs_to_support_points
+      (const hp::MappingCollection<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+       const DoFHandlerType                                                      &dof_handler,
+       std::map<types::global_dof_index,Point<DoFHandlerType::space_dimension> > &support_points)
+      {
+        const unsigned int dim = DoFHandlerType::dimension;
+        const unsigned int spacedim = DoFHandlerType::space_dimension;
+
+        hp::FECollection<dim, spacedim> fe_collection(dof_handler.get_fe());
+        hp::QCollection<dim> q_coll_dummy;
+
+        for (unsigned int fe_index = 0; fe_index < fe_collection.size(); ++fe_index)
+          {
+            // check whether every fe in the collection has support points
+            Assert(fe_collection[fe_index].has_support_points(),
+                   typename FiniteElement<dim>::ExcFEHasNoSupportPoints());
+            q_coll_dummy.push_back(
+              Quadrature<dim> (
+                fe_collection[fe_index].get_unit_support_points()));
+          }
+
+        // Now loop over all cells and enquire the support points on each
+        // of these. we use dummy quadrature formulas where the quadrature
+        // points are located at the unit support points to enquire the
+        // location of the support points in real space.
+        //
+        // The weights of the quadrature rule have been set to invalid
+        // values by the used constructor.
+        hp::FEValues<dim, spacedim> hp_fe_values(mapping, fe_collection,
+                                                 q_coll_dummy, update_quadrature_points);
+        typename DoFHandlerType::active_cell_iterator cell =
+          dof_handler.begin_active(), endc = dof_handler.end();
+
+        std::vector<types::global_dof_index> local_dof_indices;
+        for (; cell != endc; ++cell)
+          // only work on locally relevant cells
+          if (cell->is_artificial() == false)
+            {
+              hp_fe_values.reinit(cell);
+              const FEValues<dim, spacedim> &fe_values = hp_fe_values.get_present_fe_values();
+
+              local_dof_indices.resize(cell->get_fe().dofs_per_cell);
+              cell->get_dof_indices(local_dof_indices);
+
+              const std::vector<Point<spacedim> > &points =
+                fe_values.get_quadrature_points();
+              for (unsigned int i = 0; i < cell->get_fe().dofs_per_cell; ++i)
+                // insert the values into the map
+                support_points[local_dof_indices[i]] = points[i];
+            }
+      }
+
+
+      template <typename DoFHandlerType>
+      void
+      map_dofs_to_support_points
+      (const hp::MappingCollection<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &mapping,
+       const DoFHandlerType                                 &dof_handler,
+       std::vector<Point<DoFHandlerType::space_dimension> > &support_points)
+      {
+        // get the data in the form of the map as above
+        std::map<types::global_dof_index,Point<DoFHandlerType::space_dimension> >  x_support_points;
+        map_dofs_to_support_points(mapping, dof_handler, x_support_points);
+
+        // now convert from the map to the linear vector. make sure every
+        // entry really appeared in the map
+        for (types::global_dof_index i=0; i<dof_handler.n_dofs(); ++i)
+          {
+            Assert (x_support_points.find(i) != x_support_points.end(),
+                    ExcInternalError());
+            support_points[i] = x_support_points[i];
+          }
+      }
+    }
+  }
+
+  template <int dim, int spacedim>
+  void
+  map_dofs_to_support_points (const Mapping<dim,spacedim>    &mapping,
+                              const DoFHandler<dim,spacedim> &dof_handler,
+                              std::vector<Point<spacedim> >  &support_points)
+  {
+    AssertDimension(support_points.size(), dof_handler.n_dofs());
+    Assert ((dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+             (&dof_handler.get_triangulation())
+             ==
+             0),
+            ExcMessage ("This function can not be used with distributed triangulations."
+                        "See the documentation for more information."));
+
+    // Let the internal function do all the work, just make sure that it
+    // gets a MappingCollection
+    const hp::MappingCollection<dim, spacedim> mapping_collection(mapping);
+
+    internal::map_dofs_to_support_points (mapping_collection,
+                                          dof_handler,
+                                          support_points);
+  }
+
+
+  template<int dim, int spacedim>
+  void
+  map_dofs_to_support_points(const hp::MappingCollection<dim, spacedim> &mapping,
+                             const hp::DoFHandler<dim, spacedim>        &dof_handler,
+                             std::vector<Point<spacedim> >              &support_points)
+  {
+    AssertDimension(support_points.size(), dof_handler.n_dofs());
+    Assert ((dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+             (&dof_handler.get_triangulation())
+             ==
+             0),
+            ExcMessage ("This function can not be used with distributed triangulations."
+                        "See the documentation for more information."));
+
+    // Let the internal function do all the work, just make sure that it
+    // gets a MappingCollection
+    internal::map_dofs_to_support_points (mapping,
+                                          dof_handler,
+                                          support_points);
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  map_dofs_to_support_points (const Mapping<dim,spacedim>    &mapping,
+                              const DoFHandler<dim,spacedim> &dof_handler,
+                              std::map<types::global_dof_index, Point<spacedim> > &support_points)
+  {
+    support_points.clear();
+
+    // Let the internal function do all the work, just make sure that it
+    // gets a MappingCollection
+    const hp::MappingCollection<dim, spacedim> mapping_collection(mapping);
+
+    internal::map_dofs_to_support_points (mapping_collection,
+                                          dof_handler,
+                                          support_points);
+  }
+
+
+  template<int dim, int spacedim>
+  void
+  map_dofs_to_support_points(const hp::MappingCollection<dim, spacedim> &mapping,
+                             const hp::DoFHandler<dim, spacedim>        &dof_handler,
+                             std::map<types::global_dof_index, Point<spacedim> > &support_points)
+  {
+    support_points.clear();
+
+    // Let the internal function do all the work, just make sure that it
+    // gets a MappingCollection
+    internal::map_dofs_to_support_points (mapping,
+                                          dof_handler,
+                                          support_points);
+  }
+
+
+  template<int dim, int spacedim>
+  void
+  convert_couplings_to_blocks
+  (const DoFHandler<dim,spacedim>  &dof_handler,
+   const Table<2, Coupling>        &table,
+   std::vector<Table<2,Coupling> > &tables_by_block)
+  {
+    const FiniteElement<dim,spacedim> &fe = dof_handler.get_fe();
+    const unsigned int nb = fe.n_blocks();
+
+    tables_by_block.resize(1);
+    tables_by_block[0].reinit(nb, nb);
+    tables_by_block[0].fill(none);
+
+    for (unsigned int i=0; i<fe.n_components(); ++i)
+      {
+        const unsigned int ib = fe.component_to_block_index(i);
+        for (unsigned int j=0; j<fe.n_components(); ++j)
+          {
+            const unsigned int jb = fe.component_to_block_index(j);
+            tables_by_block[0](ib,jb) |= table(i,j);
+          }
+      }
+  }
+
+
+  template<int dim, int spacedim>
+  void
+  convert_couplings_to_blocks (const hp::DoFHandler<dim,spacedim> &dof_handler,
+                               const Table<2, Coupling>           &table,
+                               std::vector<Table<2,Coupling> >    &tables_by_block)
+  {
+    const hp::FECollection<dim> &fe_collection = dof_handler.get_fe();
+    tables_by_block.resize(fe_collection.size());
+
+    for (unsigned int f=0; f<fe_collection.size(); ++f)
+      {
+        const FiniteElement<dim,spacedim> &fe = fe_collection[f];
+
+        const unsigned int nb = fe.n_blocks();
+        tables_by_block[f].reinit(nb, nb);
+        tables_by_block[f].fill(none);
+        for (unsigned int i=0; i<fe.n_components(); ++i)
+          {
+            const unsigned int ib = fe.component_to_block_index(i);
+            for (unsigned int j=0; j<fe.n_components(); ++j)
+              {
+                const unsigned int jb = fe.component_to_block_index(j);
+                tables_by_block[f](ib,jb) |= table(i,j);
+              }
+          }
+      }
+  }
+
+
+
+  template <typename DoFHandlerType, class Sparsity>
+  void make_cell_patches(Sparsity                &block_list,
+                         const DoFHandlerType    &dof_handler,
+                         const unsigned int       level,
+                         const std::vector<bool> &selected_dofs,
+                         types::global_dof_index  offset)
+  {
+    typename DoFHandlerType::level_cell_iterator cell;
+    typename DoFHandlerType::level_cell_iterator endc = dof_handler.end(level);
+    std::vector<types::global_dof_index> indices;
+
+    unsigned int i=0;
+    for (cell=dof_handler.begin(level); cell != endc; ++i, ++cell)
+      {
+        indices.resize(cell->get_fe().dofs_per_cell);
+        cell->get_mg_dof_indices(indices);
+
+        if (selected_dofs.size()!=0)
+          AssertDimension(indices.size(), selected_dofs.size());
+
+        for (types::global_dof_index j=0; j<indices.size(); ++j)
+          {
+            if (selected_dofs.size() == 0)
+              block_list.add(i,indices[j]-offset);
+            else
+              {
+                if (selected_dofs[j])
+                  block_list.add(i,indices[j]-offset);
+              }
+          }
+      }
+  }
+
+
+  template <typename DoFHandlerType>
+  void make_single_patch(SparsityPattern      &block_list,
+                         const DoFHandlerType &dof_handler,
+                         const unsigned int    level,
+                         const bool            interior_only)
+  {
+    const FiniteElement<DoFHandlerType::dimension> &fe = dof_handler.get_fe();
+    block_list.reinit(1, dof_handler.n_dofs(level), dof_handler.n_dofs(level));
+    typename DoFHandlerType::level_cell_iterator cell;
+    typename DoFHandlerType::level_cell_iterator endc = dof_handler.end(level);
+
+    std::vector<types::global_dof_index> indices;
+    std::vector<bool> exclude;
+
+    for (cell=dof_handler.begin(level); cell != endc; ++cell)
+      {
+        indices.resize(cell->get_fe().dofs_per_cell);
+        cell->get_mg_dof_indices(indices);
+
+        if (interior_only)
+          {
+            // Exclude degrees of freedom on faces opposite to the vertex
+            exclude.resize(fe.dofs_per_cell);
+            std::fill(exclude.begin(), exclude.end(), false);
+            const unsigned int dpf = fe.dofs_per_face;
+
+            for (unsigned int face=0; face<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++face)
+              if (cell->at_boundary(face) || cell->neighbor(face)->level() != cell->level())
+                for (unsigned int i=0; i<dpf; ++i)
+                  exclude[fe.face_to_cell_index(i,face)] = true;
+            for (types::global_dof_index j=0; j<indices.size(); ++j)
+              if (!exclude[j])
+                block_list.add(0, indices[j]);
+          }
+        else
+          {
+            for (types::global_dof_index j=0; j<indices.size(); ++j)
+              block_list.add(0, indices[j]);
+          }
+      }
+  }
+
+
+  template <typename DoFHandlerType>
+  void make_child_patches (SparsityPattern      &block_list,
+                           const DoFHandlerType &dof_handler,
+                           const unsigned int    level,
+                           const bool            interior_dofs_only,
+                           const bool            boundary_dofs)
+  {
+    Assert(level > 0 && level < dof_handler.get_triangulation().n_levels(),
+           ExcIndexRange(level, 1, dof_handler.get_triangulation().n_levels()));
+
+    typename DoFHandlerType::level_cell_iterator pcell = dof_handler.begin(level-1);
+    typename DoFHandlerType::level_cell_iterator endc = dof_handler.end(level-1);
+
+    std::vector<types::global_dof_index> indices;
+    std::vector<bool> exclude;
+
+    for (unsigned int block = 0; pcell != endc; ++pcell)
+      {
+        if (!pcell->has_children())
+          continue;
+
+        for (unsigned int child=0; child<pcell->n_children(); ++child)
+          {
+            const typename DoFHandlerType::level_cell_iterator cell = pcell->child(child);
+
+            // For hp, only this line here would have to be replaced.
+            const FiniteElement<DoFHandlerType::dimension> &fe = dof_handler.get_fe();
+            const unsigned int n_dofs = fe.dofs_per_cell;
+            indices.resize(n_dofs);
+            exclude.resize(n_dofs);
+            std::fill(exclude.begin(), exclude.end(), false);
+            cell->get_mg_dof_indices(indices);
+
+            if (interior_dofs_only)
+              {
+                // Eliminate dofs on faces of the child which are on faces
+                // of the parent
+                const unsigned int dpf = fe.dofs_per_face;
+
+                for (unsigned int d=0; d<DoFHandlerType::dimension; ++d)
+                  {
+                    const unsigned int face = GeometryInfo<DoFHandlerType::dimension>::vertex_to_face[child][d];
+                    for (unsigned int i=0; i<dpf; ++i)
+                      exclude[fe.face_to_cell_index(i,face)] = true;
+                  }
+
+                // Now remove all degrees of freedom on the domain boundary
+                // from the exclusion list
+                if (boundary_dofs)
+                  for (unsigned int face=0; face< GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++face)
+                    if (cell->at_boundary(face))
+                      for (unsigned int i=0; i<dpf; ++i)
+                        exclude[fe.face_to_cell_index(i,face)] = false;
+              }
+
+            for (unsigned int i=0; i<n_dofs; ++i)
+              if (!exclude[i])
+                block_list.add(block, indices[i]);
+          }
+        ++block;
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void make_vertex_patches (SparsityPattern       &block_list,
+                            const DoFHandlerType &dof_handler,
+                            const unsigned int    level,
+                            const bool            interior_only,
+                            const bool            boundary_patches,
+                            const bool            level_boundary_patches,
+                            const bool            single_cell_patches)
+  {
+    typename DoFHandlerType::level_cell_iterator cell;
+    typename DoFHandlerType::level_cell_iterator endc = dof_handler.end(level);
+
+    // Vector mapping from vertex index in the triangulation to consecutive
+    // block indices on this level The number of cells at a vertex
+    std::vector<unsigned int> vertex_cell_count(dof_handler.get_triangulation().n_vertices(), 0);
+
+    // Is a vertex at the boundary?
+    std::vector<bool> vertex_boundary(dof_handler.get_triangulation().n_vertices(), false);
+
+    std::vector<unsigned int> vertex_mapping(dof_handler.get_triangulation().n_vertices(),
+                                             numbers::invalid_unsigned_int);
+
+    // Estimate for the number of dofs at this point
+    std::vector<unsigned int> vertex_dof_count(dof_handler.get_triangulation().n_vertices(), 0);
+
+    // Identify all vertices active on this level and remember some data
+    // about them
+    for (cell=dof_handler.begin(level); cell != endc; ++cell)
+      for (unsigned int v=0; v<GeometryInfo<DoFHandlerType::dimension>::vertices_per_cell; ++v)
+        {
+          const unsigned int vg = cell->vertex_index(v);
+          vertex_dof_count[vg] += cell->get_fe().dofs_per_cell;
+          ++vertex_cell_count[vg];
+          for (unsigned int d=0; d<DoFHandlerType::dimension; ++d)
+            {
+              const unsigned int face = GeometryInfo<DoFHandlerType::dimension>::vertex_to_face[v][d];
+              if (cell->at_boundary(face))
+                vertex_boundary[vg] = true;
+              else if ((!level_boundary_patches)
+                       && (cell->neighbor(face)->level() != (int) level))
+                vertex_boundary[vg] = true;
+            }
+        }
+    // From now on, only vertices with positive dof count are "in".
+
+    // Remove vertices at boundaries or in corners
+    for (unsigned int vg=0; vg<vertex_dof_count.size(); ++vg)
+      if ((!single_cell_patches && vertex_cell_count[vg] < 2)
+          ||
+          (!boundary_patches && vertex_boundary[vg]))
+        vertex_dof_count[vg] = 0;
+
+    // Create a mapping from all vertices to the ones used here
+    unsigned int n_vertex_count=0;
+    for (unsigned int vg=0; vg<vertex_mapping.size(); ++vg)
+      if (vertex_dof_count[vg] != 0)
+        vertex_mapping[vg] = n_vertex_count++;
+
+    // Compactify dof count
+    for (unsigned int vg=0; vg<vertex_mapping.size(); ++vg)
+      if (vertex_dof_count[vg] != 0)
+        vertex_dof_count[vertex_mapping[vg]] = vertex_dof_count[vg];
+
+    // Now that we have all the data, we reduce it to the part we actually
+    // want
+    vertex_dof_count.resize(n_vertex_count);
+
+    // At this point, the list of patches is ready. Now we enter the dofs
+    // into the sparsity pattern.
+    block_list.reinit(vertex_dof_count.size(), dof_handler.n_dofs(level), vertex_dof_count);
+
+    std::vector<types::global_dof_index> indices;
+    std::vector<bool> exclude;
+
+    for (cell=dof_handler.begin(level); cell != endc; ++cell)
+      {
+        const FiniteElement<DoFHandlerType::dimension> &fe = cell->get_fe();
+        indices.resize(fe.dofs_per_cell);
+        cell->get_mg_dof_indices(indices);
+
+        for (unsigned int v=0; v<GeometryInfo<DoFHandlerType::dimension>::vertices_per_cell; ++v)
+          {
+            const unsigned int vg = cell->vertex_index(v);
+            const unsigned int block = vertex_mapping[vg];
+            if (block == numbers::invalid_unsigned_int)
+              continue;
+
+            if (interior_only)
+              {
+                // Exclude degrees of freedom on faces opposite to the
+                // vertex
+                exclude.resize(fe.dofs_per_cell);
+                std::fill(exclude.begin(), exclude.end(), false);
+                const unsigned int dpf = fe.dofs_per_face;
+
+                for (unsigned int d=0; d<DoFHandlerType::dimension; ++d)
+                  {
+                    const unsigned int a_face = GeometryInfo<DoFHandlerType::dimension>::vertex_to_face[v][d];
+                    const unsigned int face = GeometryInfo<DoFHandlerType::dimension>::opposite_face[a_face];
+                    for (unsigned int i=0; i<dpf; ++i)
+                      exclude[fe.face_to_cell_index(i,face)] = true;
+                  }
+                for (unsigned int j=0; j<indices.size(); ++j)
+                  if (!exclude[j])
+                    block_list.add(block, indices[j]);
+              }
+            else
+              {
+                for (unsigned int j=0; j<indices.size(); ++j)
+                  block_list.add(block, indices[j]);
+              }
+          }
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  unsigned int
+  count_dofs_on_patch (const std::vector<typename DoFHandlerType::active_cell_iterator> &patch)
+  {
+    std::set<types::global_dof_index> dofs_on_patch;
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    // loop over the cells in the patch and get the DoFs on each.
+    // add all of them to a std::set which automatically makes sure
+    // all duplicates are ignored
+    for (unsigned int i=0; i<patch.size(); ++i)
+      {
+        const typename DoFHandlerType::active_cell_iterator cell = patch[i];
+        Assert (cell->is_artificial() == false,
+                ExcMessage("This function can not be called with cells that are "
+                           "not either locally owned or ghost cells."));
+        local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+        cell->get_dof_indices (local_dof_indices);
+        dofs_on_patch.insert (local_dof_indices.begin(),
+                              local_dof_indices.end());
+      }
+
+    // now return the number of DoFs (duplicates were ignored)
+    return dofs_on_patch.size();
+  }
+
+
+
+  template <typename DoFHandlerType>
+  std::vector<types::global_dof_index>
+  get_dofs_on_patch (const std::vector<typename DoFHandlerType::active_cell_iterator> &patch)
+  {
+    std::set<types::global_dof_index> dofs_on_patch;
+    std::vector<types::global_dof_index> local_dof_indices;
+
+    // loop over the cells in the patch and get the DoFs on each.
+    // add all of them to a std::set which automatically makes sure
+    // all duplicates are ignored
+    for (unsigned int i=0; i<patch.size(); ++i)
+      {
+        const typename DoFHandlerType::active_cell_iterator cell = patch[i];
+        Assert (cell->is_artificial() == false,
+                ExcMessage("This function can not be called with cells that are "
+                           "not either locally owned or ghost cells."));
+        local_dof_indices.resize (cell->get_fe().dofs_per_cell);
+        cell->get_dof_indices (local_dof_indices);
+        dofs_on_patch.insert (local_dof_indices.begin(),
+                              local_dof_indices.end());
+      }
+
+    Assert (dofs_on_patch.size() == count_dofs_on_patch<DoFHandlerType>(patch),
+            ExcInternalError());
+
+    // return a vector with the content of the set above. copying
+    // also ensures that we retain sortedness as promised in the
+    // documentation and as necessary to retain the block structure
+    // also on the local system
+    return std::vector<types::global_dof_index> (dofs_on_patch.begin(),
+                                                 dofs_on_patch.end());
+  }
+
+
+} // end of namespace DoFTools
+
+
+
+// explicit instantiations
+
+#include "dof_tools.inst"
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_tools.inst.in b/source/dofs/dof_tools.inst.in
new file mode 100644
index 0000000..feabaa7
--- /dev/null
+++ b/source/dofs/dof_tools.inst.in
@@ -0,0 +1,836 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+
+
+
+for (SP : SPARSITY_PATTERNS; deal_II_dimension : DIMENSIONS)
+  {
+   template void
+   DoFTools::make_cell_patches<DoFHandler<deal_II_dimension>,SP>
+   (SP&, const DoFHandler<deal_II_dimension>&, const unsigned int, const std::vector<bool>&, types::global_dof_index);
+  }
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+
+#if deal_II_dimension > 1
+  template
+  void 
+  DoFTools::extract_locally_relevant_dofs<DoFHandler<deal_II_dimension-1,deal_II_dimension > >
+  (const DoFHandler<deal_II_dimension-1,deal_II_dimension > & dof_handler,
+   IndexSet & dof_set);
+#endif
+
+  template
+  void DoFTools::make_vertex_patches (SparsityPattern&, const DoFHandler<deal_II_dimension>&,
+  unsigned int, bool, bool, bool, bool);
+
+  template
+  void DoFTools::make_single_patch (SparsityPattern&, const DoFHandler<deal_II_dimension>&,
+  unsigned int, bool);
+
+  template
+  void DoFTools::make_child_patches(SparsityPattern&, const DoFHandler<deal_II_dimension>&,
+  unsigned int, bool, bool);
+
+// TODO: can cleanup a bit more to fit into the scheme used above
+
+template
+void
+DoFTools::distribute_cell_to_dof_vector<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler,
+ const Vector<float> &cell_data,
+ Vector<double>      &dof_data,
+ const unsigned int   component);
+template
+void
+DoFTools::distribute_cell_to_dof_vector<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler,
+ const Vector<double> &cell_data,
+ Vector<double>       &dof_data,
+ const unsigned int    component);
+
+template
+void
+DoFTools::distribute_cell_to_dof_vector<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler,
+ const Vector<float> &cell_data,
+ Vector<double>      &dof_data,
+ const unsigned int   component);
+template
+void
+DoFTools::distribute_cell_to_dof_vector<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler,
+ const Vector<double> &cell_data,
+ Vector<double>       &dof_data,
+ const unsigned int    component);
+
+
+
+template void DoFTools::extract_level_dofs<DoFHandler<deal_II_dimension> >
+(const unsigned int level,
+ const DoFHandler<deal_II_dimension>&,
+ const ComponentMask &,
+ std::vector<bool>&);
+
+template void DoFTools::extract_level_dofs<DoFHandler<deal_II_dimension> >
+(const unsigned int level,
+ const DoFHandler<deal_II_dimension>&,
+ const BlockMask &,
+ std::vector<bool>&);
+
+#if deal_II_dimension > 1
+template void DoFTools::extract_level_dofs<DoFHandler<1, deal_II_dimension> >
+(const unsigned int level,
+ const DoFHandler<1, deal_II_dimension>&,
+ const BlockMask &,
+ std::vector<bool>&);
+#endif
+
+#if deal_II_dimension > 2
+template void DoFTools::extract_level_dofs<DoFHandler<2, deal_II_dimension> >
+(const unsigned int level,
+ const DoFHandler<2, deal_II_dimension>&,
+ const BlockMask &,
+ std::vector<bool>&);
+#endif
+
+template
+void
+DoFTools::extract_boundary_dofs<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ const ComponentMask                  &,
+ std::vector<bool>                        &,
+ const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::extract_boundary_dofs<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ const ComponentMask                  &,
+ std::vector<bool>                        &,
+ const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::extract_boundary_dofs<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ const ComponentMask                  &,
+ IndexSet                        &,
+ const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::extract_boundary_dofs<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ const ComponentMask                  &,
+ IndexSet                        &,
+ const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::extract_dofs_with_support_on_boundary<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ const ComponentMask                  &,
+ std::vector<bool>                        &,
+ const std::set<types::boundary_id> &);
+template
+void
+DoFTools::extract_dofs_with_support_on_boundary<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ const ComponentMask                  &,
+ std::vector<bool>                        &,
+ const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::extract_hanging_node_dofs
+(const DoFHandler<deal_II_dimension> &dof_handler,
+ std::vector<bool>     &selected_dofs);
+
+template
+void
+DoFTools::extract_subdomain_dofs<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler,
+ const types::subdomain_id  subdomain_id,
+ std::vector<bool>     &selected_dofs);
+template
+void
+DoFTools::extract_subdomain_dofs<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler,
+ const types::subdomain_id subdomain_id,
+ std::vector<bool>     &selected_dofs);
+
+template
+void
+DoFTools::extract_locally_owned_dofs<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> & dof_handler,
+ IndexSet & dof_set);
+
+template
+void
+DoFTools::extract_locally_owned_dofs<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> & dof_handler,
+ IndexSet & dof_set);
+
+template
+void
+DoFTools::extract_locally_active_dofs<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> & dof_handler,
+ IndexSet & dof_set);
+
+template
+void
+DoFTools::extract_locally_active_dofs<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> & dof_handler,
+ IndexSet & dof_set);
+
+template
+void
+DoFTools::extract_locally_relevant_dofs<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> & dof_handler,
+ IndexSet & dof_set);
+ 
+template
+void
+DoFTools::extract_locally_relevant_dofs<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> & dof_handler,
+ IndexSet & dof_set);
+
+template
+void
+DoFTools::extract_locally_relevant_level_dofs<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> & dof_handler,
+ const unsigned int level,
+ IndexSet & dof_set);
+
+template
+void
+DoFTools::extract_constant_modes<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler,
+ const ComponentMask &selected_components,
+ std::vector<std::vector<bool> > &constant_modes);
+
+template
+void
+DoFTools::extract_constant_modes<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler,
+ const ComponentMask &selected_components,
+ std::vector<std::vector<bool> > &constant_modes);
+
+template
+void
+DoFTools::get_active_fe_indices<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler,
+ std::vector<unsigned int> &active_fe_indices);
+
+template
+void
+DoFTools::get_active_fe_indices<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler,
+ std::vector<unsigned int> &active_fe_indices);
+
+template
+void
+DoFTools::get_subdomain_association<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler,
+ std::vector<types::subdomain_id>           &subdomain_association);
+template
+void
+DoFTools::get_subdomain_association<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler,
+ std::vector<types::subdomain_id>           &subdomain_association);
+ 
+template
+std::vector<IndexSet>
+DoFTools::locally_owned_dofs_per_subdomain<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler);
+template
+std::vector<IndexSet>
+DoFTools::locally_owned_dofs_per_subdomain<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler);
+
+template
+std::vector<IndexSet>
+DoFTools::locally_relevant_dofs_per_subdomain<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &dof_handler);
+template
+std::vector<IndexSet>
+DoFTools::locally_relevant_dofs_per_subdomain<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &dof_handler);
+
+template
+unsigned int
+DoFTools::count_dofs_with_subdomain_association<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ const types::subdomain_id);
+template
+IndexSet
+DoFTools::dof_indices_with_subdomain_association<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ const types::subdomain_id);
+template
+void
+DoFTools::count_dofs_with_subdomain_association<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ const types::subdomain_id,
+ std::vector<unsigned int> &);
+
+template
+unsigned int
+DoFTools::count_dofs_with_subdomain_association<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ const types::subdomain_id);
+template
+IndexSet
+DoFTools::dof_indices_with_subdomain_association<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ const types::subdomain_id);
+template
+void
+DoFTools::count_dofs_with_subdomain_association<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ const types::subdomain_id,
+ std::vector<unsigned int> &);
+
+#if deal_II_dimension < 3
+
+template
+void
+DoFTools::extract_boundary_dofs<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+  (const DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+   const ComponentMask                  &,
+   std::vector<bool>                        &,
+   const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::extract_boundary_dofs<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+  (const DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+   const ComponentMask                  &,
+   IndexSet                        &,
+   const std::set<types::boundary_id> &);
+
+template
+unsigned int
+DoFTools::count_dofs_with_subdomain_association<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ const types::subdomain_id);
+
+template
+IndexSet
+DoFTools::dof_indices_with_subdomain_association<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ const types::subdomain_id);
+template
+void
+DoFTools::count_dofs_with_subdomain_association<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ const types::subdomain_id,
+ std::vector<unsigned int> &);
+
+template
+unsigned int
+DoFTools::count_dofs_with_subdomain_association<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ const types::subdomain_id);
+
+template
+IndexSet
+DoFTools::dof_indices_with_subdomain_association<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ const types::subdomain_id);
+
+template
+void
+DoFTools::get_subdomain_association<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler,
+ std::vector<types::subdomain_id>           &subdomain_association);
+template
+void
+DoFTools::get_subdomain_association<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler,
+ std::vector<types::subdomain_id>           &subdomain_association);
+ 
+template
+std::vector<IndexSet>
+DoFTools::locally_owned_dofs_per_subdomain<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler);
+template
+std::vector<IndexSet>
+DoFTools::locally_owned_dofs_per_subdomain<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler);
+
+template
+std::vector<IndexSet>
+DoFTools::locally_relevant_dofs_per_subdomain<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler);
+template
+std::vector<IndexSet>
+DoFTools::locally_relevant_dofs_per_subdomain<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler);
+
+template
+void
+DoFTools::count_dofs_with_subdomain_association<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ const types::subdomain_id,
+ std::vector<unsigned int> &);
+#endif
+
+#if deal_II_dimension == 3
+template
+void
+DoFTools::extract_boundary_dofs<DoFHandler<1,3> >
+  (const DoFHandler<1,3> &,
+   const ComponentMask                  &,
+   std::vector<bool>                        &,
+   const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::extract_boundary_dofs<DoFHandler<1,3> >
+  (const DoFHandler<1,3> &,
+   const ComponentMask                  &,
+   IndexSet                        &,
+   const std::set<types::boundary_id> &);
+
+template
+void
+DoFTools::get_subdomain_association<DoFHandler<1,3> >
+(const DoFHandler<1,3> &dof_handler,
+ std::vector<types::subdomain_id>           &subdomain_association);
+template
+void
+DoFTools::get_subdomain_association<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &dof_handler,
+ std::vector<types::subdomain_id>           &subdomain_association);
+ 
+template
+std::vector<IndexSet>
+DoFTools::locally_owned_dofs_per_subdomain<DoFHandler<1,3> >
+(const DoFHandler<1,3>     &dof_handler);
+template
+std::vector<IndexSet>
+DoFTools::locally_owned_dofs_per_subdomain<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &dof_handler);
+
+template
+std::vector<IndexSet>
+DoFTools::locally_relevant_dofs_per_subdomain<DoFHandler<1,3> >
+(const DoFHandler<1,3>     &dof_handler);
+template
+std::vector<IndexSet>
+DoFTools::locally_relevant_dofs_per_subdomain<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &dof_handler);
+
+template
+unsigned int
+DoFTools::count_dofs_with_subdomain_association<DoFHandler<1,3> >
+(const DoFHandler<1,3> &,
+ const types::subdomain_id);
+template
+IndexSet
+DoFTools::dof_indices_with_subdomain_association<DoFHandler<1,3> >
+(const DoFHandler<1,3> &,
+ const types::subdomain_id);
+template
+void
+DoFTools::count_dofs_with_subdomain_association<DoFHandler<1,3> >
+(const DoFHandler<1,3> &,
+ const types::subdomain_id,
+ std::vector<unsigned int> &);
+
+template
+unsigned int
+DoFTools::count_dofs_with_subdomain_association<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &,
+ const types::subdomain_id);
+template
+IndexSet
+DoFTools::dof_indices_with_subdomain_association<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &,
+ const types::subdomain_id);
+template
+void
+DoFTools::count_dofs_with_subdomain_association<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &,
+ const types::subdomain_id,
+ std::vector<unsigned int> &);
+#endif
+
+
+
+template
+void
+DoFTools::count_dofs_per_component<DoFHandler<deal_II_dimension> > (
+  const DoFHandler<deal_II_dimension>&,
+  std::vector<types::global_dof_index>&, bool, std::vector<unsigned int>);
+
+template
+void
+DoFTools::count_dofs_per_component<hp::DoFHandler<deal_II_dimension> > (
+  const hp::DoFHandler<deal_II_dimension>&,
+  std::vector<types::global_dof_index>&, bool, std::vector<unsigned int>);
+
+
+#if deal_II_dimension < 3
+template
+void
+DoFTools::count_dofs_per_component<DoFHandler<deal_II_dimension, deal_II_dimension+1> > (
+  const DoFHandler<deal_II_dimension, deal_II_dimension+1>&,
+  std::vector<types::global_dof_index>&, bool, std::vector<unsigned int>);
+
+template
+void
+DoFTools::count_dofs_per_component<hp::DoFHandler<deal_II_dimension, deal_II_dimension+1> > (
+  const hp::DoFHandler<deal_II_dimension, deal_II_dimension+1>&,
+  std::vector<types::global_dof_index>&, bool, std::vector<unsigned int>);
+
+template
+void
+DoFTools::extract_level_dofs<DoFHandler<deal_II_dimension, deal_II_dimension+1> >
+(const unsigned int level,
+ const DoFHandler<deal_II_dimension, deal_II_dimension+1>&,
+ const ComponentMask&,
+ std::vector<bool>&);
+
+#endif
+
+
+#if deal_II_dimension == 3
+template
+void
+DoFTools::count_dofs_per_component<DoFHandler<1,3> > (
+  const DoFHandler<1,3>&,
+  std::vector<types::global_dof_index>&, bool, std::vector<unsigned int>);
+
+template
+void
+DoFTools::count_dofs_per_component<hp::DoFHandler<1,3> > (
+  const hp::DoFHandler<1,3>&,
+  std::vector<types::global_dof_index>&, bool, std::vector<unsigned int>);
+
+template
+void
+DoFTools::extract_level_dofs<DoFHandler<1, 3> >
+(const unsigned int level,
+ const DoFHandler<1,3>&,
+ const ComponentMask &,
+ std::vector<bool>&);
+
+#endif
+
+
+template
+void
+DoFTools::count_dofs_per_block<DoFHandler<deal_II_dimension> > (
+  const DoFHandler<deal_II_dimension>&,
+  std::vector<types::global_dof_index>&,
+  const std::vector<unsigned int> &);
+
+template
+void
+DoFTools::count_dofs_per_block<hp::DoFHandler<deal_II_dimension> > (
+  const hp::DoFHandler<deal_II_dimension>&,
+  std::vector<types::global_dof_index>&,
+  const std::vector<unsigned int> &);
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ const std::set<types::boundary_id> &,
+ std::vector<types::global_dof_index> &);
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<DoFHandler<deal_II_dimension> >
+(const DoFHandler<deal_II_dimension> &,
+ std::vector<types::global_dof_index> &);
+
+
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ const std::set<types::boundary_id> &,
+ std::vector<types::global_dof_index> &);
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<hp::DoFHandler<deal_II_dimension> >
+(const hp::DoFHandler<deal_II_dimension> &,
+ std::vector<types::global_dof_index> &);
+
+
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension>
+(const Mapping<deal_II_dimension,deal_II_dimension>&,
+ const DoFHandler<deal_II_dimension>&,
+ std::vector<Point<deal_II_dimension> >&);
+
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension>
+(const hp::MappingCollection<deal_II_dimension,deal_II_dimension>&,
+ const hp::DoFHandler<deal_II_dimension>&,
+ std::vector<Point<deal_II_dimension> >&);
+
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension>
+(const Mapping<deal_II_dimension,deal_II_dimension>&,
+ const DoFHandler<deal_II_dimension>&,
+ std::map<types::global_dof_index, Point<deal_II_dimension> >&);
+
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension>
+(const hp::MappingCollection<deal_II_dimension,deal_II_dimension>&,
+ const hp::DoFHandler<deal_II_dimension>&,
+ std::map<types::global_dof_index, Point<deal_II_dimension> >&);
+
+#if deal_II_dimension < 3
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension,deal_II_dimension+1>
+(const Mapping<deal_II_dimension,deal_II_dimension+1>&,
+ const DoFHandler<deal_II_dimension, deal_II_dimension+1>&,
+ std::vector<Point<deal_II_dimension+1> >&);
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension,deal_II_dimension+1>
+(const hp::MappingCollection<deal_II_dimension,deal_II_dimension+1>&,
+ const hp::DoFHandler<deal_II_dimension, deal_II_dimension+1>&,
+ std::vector<Point<deal_II_dimension+1> >&);
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension,deal_II_dimension+1>
+(const Mapping<deal_II_dimension,deal_II_dimension+1>&,
+ const DoFHandler<deal_II_dimension, deal_II_dimension+1>&,
+ std::map<types::global_dof_index, Point<deal_II_dimension+1> >&);
+
+template
+void
+DoFTools::map_dofs_to_support_points<deal_II_dimension,deal_II_dimension+1>
+(const hp::MappingCollection<deal_II_dimension,deal_II_dimension+1>&,
+ const hp::DoFHandler<deal_II_dimension, deal_II_dimension+1>&,
+ std::map<types::global_dof_index, Point<deal_II_dimension+1> >&);
+
+
+template
+void
+DoFTools::count_dofs_per_block<DoFHandler<deal_II_dimension,deal_II_dimension+1> > (
+  const DoFHandler<deal_II_dimension,deal_II_dimension+1>&,
+  std::vector<types::global_dof_index>&,
+  const std::vector<unsigned int> &);
+
+template
+void
+DoFTools::count_dofs_per_block<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> > (
+  const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>&,
+  std::vector<types::global_dof_index>&,
+  const std::vector<unsigned int> &);
+
+#endif
+
+
+#if deal_II_dimension == 3
+
+template
+void
+DoFTools::map_dofs_to_support_points<1,3>
+(const Mapping<1,3>&,
+ const DoFHandler<1,3>&,
+ std::vector<Point<3> >&);
+
+template
+void
+DoFTools::count_dofs_per_block<DoFHandler<1,3> > (
+  const DoFHandler<1,3>&,
+  std::vector<types::global_dof_index>&,
+  const std::vector<unsigned int> &);
+
+template
+void
+DoFTools::count_dofs_per_block<hp::DoFHandler<1,3> > (
+  const hp::DoFHandler<1,3>&,
+  std::vector<types::global_dof_index>&,
+  const std::vector<unsigned int> &);
+
+#endif
+
+template
+void
+DoFTools::convert_couplings_to_blocks (
+  const DoFHandler<deal_II_dimension>&, const Table<2, Coupling>&,
+  std::vector<Table<2,Coupling> >&);
+
+template
+void
+DoFTools::convert_couplings_to_blocks (
+  const hp::DoFHandler<deal_II_dimension>&, const Table<2, Coupling>&,
+  std::vector<Table<2,Coupling> >&);
+
+
+#if deal_II_dimension < 3
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ const std::set<types::boundary_id> &,
+ std::vector<types::global_dof_index> &);
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+ std::vector<types::global_dof_index> &);
+
+
+template
+void
+DoFTools::extract_hanging_node_dofs
+(const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler,
+ std::vector<bool>     &selected_dofs);
+
+ template
+ void
+ DoFTools::map_dof_to_boundary_indices<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+ (const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+  const std::set<types::boundary_id> &,
+  std::vector<types::global_dof_index> &);
+
+ template
+ void
+ DoFTools::map_dof_to_boundary_indices<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> >
+ (const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &,
+  std::vector<types::global_dof_index> &);
+
+#endif
+
+#if deal_II_dimension == 3
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<DoFHandler<1,3> >
+(const DoFHandler<1,3> &,
+ const std::set<types::boundary_id> &,
+ std::vector<types::global_dof_index> &);
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<DoFHandler<1,3> >
+(const DoFHandler<1,3> &,
+ std::vector<types::global_dof_index> &);
+
+
+template
+void
+DoFTools::extract_hanging_node_dofs
+(const DoFHandler<1,3> &dof_handler,
+ std::vector<bool>     &selected_dofs);
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &,
+ const std::set<unsigned char> &,
+ std::vector<types::global_dof_index> &);
+
+template
+void
+DoFTools::map_dof_to_boundary_indices<hp::DoFHandler<1,3> >
+(const hp::DoFHandler<1,3> &,
+ std::vector<types::global_dof_index> &);
+
+#endif
+
+
+}
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension : DIMENSIONS)
+{
+    #if deal_II_dimension <= deal_II_space_dimension
+      namespace DoFTools
+      \{
+        template
+        unsigned int
+        count_dofs_on_patch<DoFHandler<deal_II_dimension,deal_II_space_dimension> >
+        (const std::vector<DoFHandler<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator> &patch);
+
+        template
+        std::vector<types::global_dof_index>
+        get_dofs_on_patch<DoFHandler<deal_II_dimension,deal_II_space_dimension> >
+        (const std::vector<DoFHandler<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator> &patch);
+
+        template
+        unsigned int
+        count_dofs_on_patch<hp::DoFHandler<deal_II_dimension,deal_II_space_dimension> >
+        (const std::vector<hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator> &patch);
+
+        template
+        std::vector<types::global_dof_index>
+        get_dofs_on_patch<hp::DoFHandler<deal_II_dimension,deal_II_space_dimension> >
+        (const std::vector<hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator> &patch);
+
+        template
+	void
+	extract_dofs<deal_II_dimension,deal_II_space_dimension>
+        (const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const ComponentMask &,
+         std::vector<bool>&);
+
+        template
+	void
+	extract_dofs<deal_II_dimension,deal_II_space_dimension>
+        (const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const BlockMask &,
+         std::vector<bool>&);
+
+        template
+	void
+	extract_dofs<deal_II_dimension,deal_II_space_dimension>
+        (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const ComponentMask &,
+         std::vector<bool>&);
+
+        template
+	void
+	extract_dofs<deal_II_dimension,deal_II_space_dimension>
+        (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const BlockMask &,
+         std::vector<bool>&);
+
+      \}
+    #endif
+}
diff --git a/source/dofs/dof_tools_constraints.cc b/source/dofs/dof_tools_constraints.cc
new file mode 100644
index 0000000..8531af2
--- /dev/null
+++ b/source/dofs/dof_tools_constraints.cc
@@ -0,0 +1,3350 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/std_cxx1x/array.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/intergrid_map.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#ifdef DEAL_II_WITH_MPI
+#include <deal.II/lac/parallel_vector.h>
+#endif
+
+#include <algorithm>
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace DoFTools
+{
+  namespace internal
+  {
+    namespace
+    {
+      inline bool
+      check_master_dof_list (const FullMatrix<double> &face_interpolation_matrix,
+                             const std::vector<types::global_dof_index> &master_dof_list)
+      {
+        const unsigned int N = master_dof_list.size();
+
+        FullMatrix<double> tmp (N,N);
+        for (unsigned int i=0; i<N; ++i)
+          for (unsigned int j=0; j<N; ++j)
+            tmp(i,j) = face_interpolation_matrix (master_dof_list[i], j);
+
+        // then use the algorithm from FullMatrix::gauss_jordan on this
+        // matrix to find out whether it is singular. the algorithm there
+        // does pivoting and at the end swaps rows back into their proper
+        // order -- we omit this step here, since we don't care about the
+        // inverse matrix, all we care about is whether the matrix is
+        // regular or singular
+
+        // first get an estimate of the size of the elements of this
+        // matrix, for later checks whether the pivot element is large
+        // enough, or whether we have to fear that the matrix is not
+        // regular
+        double diagonal_sum = 0;
+        for (unsigned int i=0; i<N; ++i)
+          diagonal_sum += std::fabs(tmp(i,i));
+        const double typical_diagonal_element = diagonal_sum/N;
+
+        // initialize the array that holds the permutations that we find
+        // during pivot search
+        std::vector<unsigned int> p(N);
+        for (unsigned int i=0; i<N; ++i)
+          p[i] = i;
+
+        for (unsigned int j=0; j<N; ++j)
+          {
+            // pivot search: search that part of the line on and right of
+            // the diagonal for the largest element
+            double       max = std::fabs(tmp(j,j));
+            unsigned int r   = j;
+            for (unsigned int i=j+1; i<N; ++i)
+              {
+                if (std::fabs(tmp(i,j)) > max)
+                  {
+                    max = std::fabs(tmp(i,j));
+                    r = i;
+                  }
+              }
+            // check whether the pivot is too small. if that is the case,
+            // then the matrix is singular and we shouldn't use this set of
+            // master dofs
+            if (max < 1.e-12*typical_diagonal_element)
+              return false;
+
+            // row interchange
+            if (r>j)
+              {
+                for (unsigned int k=0; k<N; ++k)
+                  std::swap (tmp(j,k), tmp(r,k));
+
+                std::swap (p[j], p[r]);
+              }
+
+            // transformation
+            const double hr = 1./tmp(j,j);
+            tmp(j,j) = hr;
+            for (unsigned int k=0; k<N; ++k)
+              {
+                if (k==j) continue;
+                for (unsigned int i=0; i<N; ++i)
+                  {
+                    if (i==j) continue;
+                    tmp(i,k) -= tmp(i,j)*tmp(j,k)*hr;
+                  }
+              }
+            for (unsigned int i=0; i<N; ++i)
+              {
+                tmp(i,j) *= hr;
+                tmp(j,i) *= -hr;
+              }
+            tmp(j,j) = hr;
+          }
+
+        // everything went fine, so we can accept this set of master dofs
+        // (at least as far as they have already been collected)
+        return true;
+      }
+
+
+
+      /**
+       * When restricting, on a face, the degrees of freedom of fe1 to the
+       * space described by fe2 (for example for the complex case described
+       * in the @ref hp_paper "hp paper"), we have to select
+       * fe2.dofs_per_face out of the fe1.dofs_per_face face DoFs as the
+       * master DoFs, and the rest become slave dofs. This function selects
+       * which ones will be masters, and which ones will be slaves.
+       *
+       * The function assumes that master_dofs already has size
+       * fe1.dofs_per_face. After the function, exactly fe2.dofs_per_face
+       * entries will be true.
+       *
+       * The function is a bit complicated since it has to figure out a set
+       * a DoFs so that the corresponding rows in the face interpolation
+       * matrix are all linearly independent. we have a good heuristic (see
+       * the function body) for selecting these rows, but there are cases
+       * where this fails and we have to pick them differently. what we do
+       * is to run the heuristic and then go back to determine whether we
+       * have a set of rows with full row rank. if this isn't the case, go
+       * back and select dofs differently
+       */
+      template <int dim, int spacedim>
+      void
+      select_master_dofs_for_face_restriction (const FiniteElement<dim,spacedim> &fe1,
+                                               const FiniteElement<dim,spacedim> &fe2,
+                                               const FullMatrix<double> &face_interpolation_matrix,
+                                               std::vector<bool>        &master_dof_mask)
+      {
+        Assert (fe1.dofs_per_face >= fe2.dofs_per_face,
+                ExcInternalError());
+        AssertDimension (master_dof_mask.size(), fe1.dofs_per_face);
+
+        Assert (fe2.dofs_per_vertex <= fe1.dofs_per_vertex,
+                ExcInternalError());
+        Assert (fe2.dofs_per_line <= fe1.dofs_per_line,
+                ExcInternalError());
+        Assert ((dim < 3)
+                ||
+                (fe2.dofs_per_quad <= fe1.dofs_per_quad),
+                ExcInternalError());
+
+        // the idea here is to designate as many DoFs in fe1 per object
+        // (vertex, line, quad) as master as there are such dofs in fe2
+        // (indices are int, because we want to avoid the 'unsigned int < 0
+        // is always false warning for the cases at the bottom in 1d and
+        // 2d)
+        //
+        // as mentioned in the paper, it is not always easy to find a set
+        // of master dofs that produces an invertible matrix. to this end,
+        // we check in each step whether the matrix is still invertible and
+        // simply discard this dof if the matrix is not invertible anymore.
+        //
+        // the cases where we did have trouble in the past were with adding
+        // more quad dofs when Q3 and Q4 elements meet at a refined face in
+        // 3d (see the hp/crash_12 test that tests that we can do exactly
+        // this, and failed before we had code to compensate for this
+        // case). the other case are system elements: if we have say a Q1Q2
+        // vs a Q2Q3 element, then we can't just take all master dofs on a
+        // line from a single base element, since the shape functions of
+        // that base element are independent of that of the other one. this
+        // latter case shows up when running hp/hp_constraints_q_system_06
+
+        std::vector<types::global_dof_index> master_dof_list;
+        unsigned int index = 0;
+        for (int v=0;
+             v<static_cast<signed int>(GeometryInfo<dim>::vertices_per_face);
+             ++v)
+          {
+            unsigned int dofs_added = 0;
+            unsigned int i          = 0;
+            while (dofs_added < fe2.dofs_per_vertex)
+              {
+                // make sure that we were able to find a set of master dofs
+                // and that the code down below didn't just reject all our
+                // efforts
+                Assert (i < fe1.dofs_per_vertex,
+                        ExcInternalError());
+
+                // tentatively push this vertex dof
+                master_dof_list.push_back (index+i);
+
+                // then see what happens. if it succeeds, fine
+                if (check_master_dof_list (face_interpolation_matrix,
+                                           master_dof_list)
+                    == true)
+                  ++dofs_added;
+                else
+                  // well, it didn't. simply pop that dof from the list
+                  // again and try with the next dof
+                  master_dof_list.pop_back ();
+
+                // forward counter by one
+                ++i;
+              }
+            index += fe1.dofs_per_vertex;
+          }
+
+        for (int l=0;
+             l<static_cast<signed int>(GeometryInfo<dim>::lines_per_face);
+             ++l)
+          {
+            // same algorithm as above
+            unsigned int dofs_added = 0;
+            unsigned int i          = 0;
+            while (dofs_added < fe2.dofs_per_line)
+              {
+                Assert (i < fe1.dofs_per_line,
+                        ExcInternalError());
+
+                master_dof_list.push_back (index+i);
+                if (check_master_dof_list (face_interpolation_matrix,
+                                           master_dof_list)
+                    == true)
+                  ++dofs_added;
+                else
+                  master_dof_list.pop_back ();
+
+                ++i;
+              }
+            index += fe1.dofs_per_line;
+          }
+
+        for (int q=0;
+             q<static_cast<signed int>(GeometryInfo<dim>::quads_per_face);
+             ++q)
+          {
+            // same algorithm as above
+            unsigned int dofs_added = 0;
+            unsigned int i          = 0;
+            while (dofs_added < fe2.dofs_per_quad)
+              {
+                Assert (i < fe1.dofs_per_quad,
+                        ExcInternalError());
+
+                master_dof_list.push_back (index+i);
+                if (check_master_dof_list (face_interpolation_matrix,
+                                           master_dof_list)
+                    == true)
+                  ++dofs_added;
+                else
+                  master_dof_list.pop_back ();
+
+                ++i;
+              }
+            index += fe1.dofs_per_quad;
+          }
+
+        AssertDimension (index, fe1.dofs_per_face);
+        AssertDimension (master_dof_list.size(), fe2.dofs_per_face);
+
+        // finally copy the list into the mask
+        std::fill (master_dof_mask.begin(), master_dof_mask.end(), false);
+        for (std::vector<types::global_dof_index>::const_iterator i=master_dof_list.begin();
+             i!=master_dof_list.end(); ++i)
+          master_dof_mask[*i] = true;
+      }
+
+
+
+      /**
+       * Make sure that the mask exists that determines which dofs will be
+       * the masters on refined faces where an fe1 and a fe2 meet.
+       */
+      template <int dim, int spacedim>
+      void
+      ensure_existence_of_master_dof_mask (const FiniteElement<dim,spacedim> &fe1,
+                                           const FiniteElement<dim,spacedim> &fe2,
+                                           const FullMatrix<double> &face_interpolation_matrix,
+                                           std_cxx11::shared_ptr<std::vector<bool> > &master_dof_mask)
+      {
+        if (master_dof_mask == std_cxx11::shared_ptr<std::vector<bool> >())
+          {
+            master_dof_mask = std_cxx11::shared_ptr<std::vector<bool> >
+                              (new std::vector<bool> (fe1.dofs_per_face));
+            select_master_dofs_for_face_restriction (fe1,
+                                                     fe2,
+                                                     face_interpolation_matrix,
+                                                     *master_dof_mask);
+          }
+      }
+
+
+
+      /**
+       * Make sure that the given @p face_interpolation_matrix pointer
+       * points to a valid matrix. If the pointer is zero beforehand,
+       * create an entry with the correct data. If it is nonzero, don't
+       * touch it.
+       */
+      template <int dim, int spacedim>
+      void
+      ensure_existence_of_face_matrix (const FiniteElement<dim,spacedim> &fe1,
+                                       const FiniteElement<dim,spacedim> &fe2,
+                                       std_cxx11::shared_ptr<FullMatrix<double> > &matrix)
+      {
+        if (matrix == std_cxx11::shared_ptr<FullMatrix<double> >())
+          {
+            matrix = std_cxx11::shared_ptr<FullMatrix<double> >
+                     (new FullMatrix<double> (fe2.dofs_per_face,
+                                              fe1.dofs_per_face));
+            fe1.get_face_interpolation_matrix (fe2,
+                                               *matrix);
+          }
+      }
+
+
+
+      /**
+       * Same, but for subface interpolation matrices.
+       */
+      template <int dim, int spacedim>
+      void
+      ensure_existence_of_subface_matrix (const FiniteElement<dim,spacedim> &fe1,
+                                          const FiniteElement<dim,spacedim> &fe2,
+                                          const unsigned int        subface,
+                                          std_cxx11::shared_ptr<FullMatrix<double> > &matrix)
+      {
+        if (matrix == std_cxx11::shared_ptr<FullMatrix<double> >())
+          {
+            matrix = std_cxx11::shared_ptr<FullMatrix<double> >
+                     (new FullMatrix<double> (fe2.dofs_per_face,
+                                              fe1.dofs_per_face));
+            fe1.get_subface_interpolation_matrix (fe2,
+                                                  subface,
+                                                  *matrix);
+          }
+      }
+
+
+
+      /**
+       * Given the face interpolation matrix between two elements, split it
+       * into its master and slave parts and invert the master part as
+       * explained in the @ref hp_paper "hp paper".
+       */
+      void
+      ensure_existence_of_split_face_matrix (const FullMatrix<double> &face_interpolation_matrix,
+                                             const std::vector<bool> &master_dof_mask,
+                                             std_cxx11::shared_ptr<std::pair<FullMatrix<double>,FullMatrix<double> > > &split_matrix)
+      {
+        AssertDimension (master_dof_mask.size(), face_interpolation_matrix.m());
+        Assert (std::count (master_dof_mask.begin(), master_dof_mask.end(), true) ==
+                static_cast<signed int>(face_interpolation_matrix.n()),
+                ExcInternalError());
+
+        if (split_matrix ==
+            std_cxx11::shared_ptr<std::pair<FullMatrix<double>,FullMatrix<double> > >())
+          {
+            split_matrix
+              = std_cxx11::shared_ptr<std::pair<FullMatrix<double>,FullMatrix<double> > >
+                (new std::pair<FullMatrix<double>,FullMatrix<double> >());
+
+            const unsigned int n_master_dofs = face_interpolation_matrix.n();
+            const unsigned int n_dofs        = face_interpolation_matrix.m();
+
+            Assert (n_master_dofs <= n_dofs, ExcInternalError());
+
+            // copy and invert the master
+            // component, copy the slave
+            // component
+            split_matrix->first.reinit (n_master_dofs, n_master_dofs);
+            split_matrix->second.reinit (n_dofs-n_master_dofs, n_master_dofs);
+
+            unsigned int nth_master_dof = 0,
+                         nth_slave_dof  = 0;
+
+            for (unsigned int i=0; i<n_dofs; ++i)
+              if (master_dof_mask[i] == true)
+                {
+                  for (unsigned int j=0; j<n_master_dofs; ++j)
+                    split_matrix->first(nth_master_dof,j)
+                      = face_interpolation_matrix(i,j);
+                  ++nth_master_dof;
+                }
+              else
+                {
+                  for (unsigned int j=0; j<n_master_dofs; ++j)
+                    split_matrix->second(nth_slave_dof,j)
+                      = face_interpolation_matrix(i,j);
+                  ++nth_slave_dof;
+                }
+
+            AssertDimension (nth_master_dof, n_master_dofs);
+            AssertDimension (nth_slave_dof, n_dofs-n_master_dofs);
+
+            //TODO[WB]: We should make sure very small entries are removed after inversion
+            split_matrix->first.gauss_jordan ();
+          }
+      }
+
+
+      // a template that can determine statically whether a given
+      // DoFHandler class supports different finite element elements
+      template <typename>
+      struct DoFHandlerSupportsDifferentFEs
+      {
+        static const bool value = true;
+      };
+
+
+      template <int dim, int spacedim>
+      struct DoFHandlerSupportsDifferentFEs< dealii::DoFHandler<dim,spacedim> >
+      {
+        static const bool value = false;
+      };
+
+
+      /**
+       * A function that returns how many different finite elements a dof
+       * handler uses. This is one for non-hp DoFHandlers and
+       * dof_handler.get_fe().size() for the hp-versions.
+       */
+      template <int dim, int spacedim>
+      unsigned int
+      n_finite_elements (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler)
+      {
+        return dof_handler.get_fe().size();
+      }
+
+
+      template <typename DoFHandlerType>
+      unsigned int
+      n_finite_elements (const DoFHandlerType &)
+      {
+        return 1;
+      }
+
+
+
+      /**
+       * Copy constraints into a constraint matrix object.
+       *
+       * This function removes zero constraints and those, which constrain
+       * a DoF which was already eliminated in one of the previous steps of
+       * the hp hanging node procedure.
+       *
+       * It also suppresses very small entries in the constraint matrix to
+       * avoid making the sparsity pattern fuller than necessary.
+       */
+      void
+      filter_constraints (const std::vector<types::global_dof_index> &master_dofs,
+                          const std::vector<types::global_dof_index> &slave_dofs,
+                          const FullMatrix<double> &face_constraints,
+                          ConstraintMatrix &constraints)
+      {
+        Assert (face_constraints.n () == master_dofs.size (),
+                ExcDimensionMismatch(master_dofs.size (),
+                                     face_constraints.n()));
+        Assert (face_constraints.m () == slave_dofs.size (),
+                ExcDimensionMismatch(slave_dofs.size (),
+                                     face_constraints.m()));
+
+        const unsigned int n_master_dofs = master_dofs.size ();
+        const unsigned int n_slave_dofs = slave_dofs.size ();
+
+        // check for a couple conditions that happened in parallel
+        // distributed mode
+        for (unsigned int row=0; row!=n_slave_dofs; ++row)
+          Assert (slave_dofs[row] != numbers::invalid_dof_index,
+                  ExcInternalError());
+        for (unsigned int col=0; col!=n_master_dofs; ++col)
+          Assert (master_dofs[col] != numbers::invalid_dof_index,
+                  ExcInternalError());
+
+
+        for (unsigned int row=0; row!=n_slave_dofs; ++row)
+          if (constraints.is_constrained (slave_dofs[row]) == false)
+            {
+              bool constraint_already_satisfied = false;
+
+              // Check if we have an identity constraint, which is already
+              // satisfied by unification of the corresponding global dof
+              // indices
+              for (unsigned int i=0; i<n_master_dofs; ++i)
+                if (face_constraints (row,i) == 1.0)
+                  if (master_dofs[i] == slave_dofs[row])
+                    {
+                      constraint_already_satisfied = true;
+                      break;
+                    }
+
+              if (constraint_already_satisfied == false)
+                {
+                  // add up the absolute values of all constraints in this
+                  // line to get a measure of their absolute size
+                  double abs_sum = 0;
+                  for (unsigned int i=0; i<n_master_dofs; ++i)
+                    abs_sum += std::abs (face_constraints(row,i));
+
+                  // then enter those constraints that are larger than
+                  // 1e-14*abs_sum. everything else probably originated
+                  // from inexact inversion of matrices and similar
+                  // effects. having those constraints in here will only
+                  // lead to problems because it makes sparsity patterns
+                  // fuller than necessary without producing any
+                  // significant effect
+                  constraints.add_line (slave_dofs[row]);
+                  for (unsigned int i=0; i<n_master_dofs; ++i)
+                    if ((face_constraints(row,i) != 0)
+                        &&
+                        (std::fabs(face_constraints(row,i)) >= 1e-14*abs_sum))
+                      constraints.add_entry (slave_dofs[row],
+                                             master_dofs[i],
+                                             face_constraints (row,i));
+                  constraints.set_inhomogeneity (slave_dofs[row], 0.);
+                }
+            }
+      }
+
+    }
+
+
+
+    void
+    make_hp_hanging_node_constraints (const dealii::DoFHandler<1> &,
+                                      ConstraintMatrix &)
+    {
+      // nothing to do for regular dof handlers in 1d
+    }
+
+
+
+    void
+    make_oldstyle_hanging_node_constraints (const dealii::DoFHandler<1> &,
+                                            ConstraintMatrix &,
+                                            dealii::internal::int2type<1>)
+    {
+      // nothing to do for regular dof handlers in 1d
+    }
+
+
+    void
+    make_hp_hanging_node_constraints (const dealii::hp::DoFHandler<1> &/*dof_handler*/,
+                                      ConstraintMatrix        &/*constraints*/)
+    {
+      // we may have to compute constraints for vertices. gotta think about
+      // that a bit more
+
+      //TODO[WB]: think about what to do here...
+    }
+
+
+
+    void
+    make_oldstyle_hanging_node_constraints (const dealii::hp::DoFHandler<1> &/*dof_handler*/,
+                                            ConstraintMatrix        &/*constraints*/,
+                                            dealii::internal::int2type<1>)
+    {
+      // we may have to compute constraints for vertices. gotta think about
+      // that a bit more
+
+      //TODO[WB]: think about what to do here...
+    }
+
+
+    void
+    make_hp_hanging_node_constraints (const dealii::DoFHandler<1,2> &,
+                                      ConstraintMatrix &)
+    {
+      // nothing to do for regular dof handlers in 1d
+    }
+
+
+
+    void
+    make_oldstyle_hanging_node_constraints (const dealii::DoFHandler<1,2> &,
+                                            ConstraintMatrix &,
+                                            dealii::internal::int2type<1>)
+    {
+      // nothing to do for regular dof handlers in 1d
+    }
+
+
+    void
+    make_hp_hanging_node_constraints (const dealii::DoFHandler<1,3> &,
+                                      ConstraintMatrix &)
+    {
+      // nothing to do for regular dof handlers in 1d
+    }
+
+    void
+    make_oldstyle_hanging_node_constraints (const dealii::DoFHandler<1,3> &,
+                                            ConstraintMatrix &,
+                                            dealii::internal::int2type<1>)
+    {
+      // nothing to do for regular dof handlers in 1d
+    }
+
+
+//   currently not used but may be in the future:
+
+//     void
+//     make_hp_hanging_node_constraints (const dealii::MDoFHandler<1,2> &,
+//                                    ConstraintMatrix    &)
+//     {
+//                                     // nothing to do for regular
+//                                     // dof handlers in 1d
+//     }
+
+
+
+//     void
+//     make_oldstyle_hanging_node_constraints (const dealii::DoFHandler<1,2> &,
+//                                          ConstraintMatrix    &,
+//                                          dealii::internal::int2type<1>)
+//     {
+//                                     // nothing to do for regular
+//                                     // dof handlers in 1d
+//     }
+
+
+//     void
+//     make_oldstyle_hanging_node_constraints (const dealii::hp::DoFHandler<1,2> &/*dof_handler*/,
+//                                          ConstraintMatrix        &/*constraints*/,
+//                                          dealii::internal::int2type<1>)
+//     {
+//                                     // we may have to compute
+//                                     // constraints for
+//                                     // vertices. gotta think about
+//                                     // that a bit more
+//
+// //TODO[WB]: think about what to do here...
+//     }
+//#endif
+
+
+
+    template <typename DoFHandlerType>
+    void
+    make_oldstyle_hanging_node_constraints (const DoFHandlerType &dof_handler,
+                                            ConstraintMatrix     &constraints,
+                                            dealii::internal::int2type<2>)
+    {
+      const unsigned int dim = 2;
+
+      const unsigned int spacedim = DoFHandlerType::space_dimension;
+
+      std::vector<types::global_dof_index> dofs_on_mother;
+      std::vector<types::global_dof_index> dofs_on_children;
+
+      // loop over all lines; only on lines there can be constraints. We do
+      // so by looping over all active cells and checking whether any of
+      // the faces are refined which can only be from the neighboring cell
+      // because this one is active. In that case, the face is subject to
+      // constraints
+      //
+      // note that even though we may visit a face twice if the neighboring
+      // cells are equally refined, we can only visit each face with
+      // hanging nodes once
+      typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                    endc = dof_handler.end();
+      for (; cell!=endc; ++cell)
+        {
+          // artificial cells can at best neighbor ghost cells, but we're not
+          // interested in these interfaces
+          if (cell->is_artificial ())
+            continue;
+
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->has_children())
+              {
+                // in any case, faces can have at most two active fe
+                // indices, but here the face can have only one (namely the
+                // same as that from the cell we're sitting on), and each
+                // of the children can have only one as well. check this
+                Assert (cell->face(face)->n_active_fe_indices() == 1,
+                        ExcInternalError());
+                Assert (cell->face(face)->fe_index_is_active(cell->active_fe_index())
+                        == true,
+                        ExcInternalError());
+                for (unsigned int c=0; c<cell->face(face)->n_children(); ++c)
+                  if (!cell->neighbor_child_on_subface(face,c)->is_artificial())
+                    Assert (cell->face(face)->child(c)->n_active_fe_indices() == 1,
+                            ExcInternalError());
+
+                // right now, all that is implemented is the case that both
+                // sides use the same fe
+                for (unsigned int c=0; c<cell->face(face)->n_children(); ++c)
+                  if (!cell->neighbor_child_on_subface(face,c)->is_artificial())
+                    Assert (cell->face(face)->child(c)
+                            ->fe_index_is_active(cell->active_fe_index()) == true,
+                            ExcNotImplemented());
+
+                // ok, start up the work
+                const FiniteElement<dim,spacedim> &fe       = cell->get_fe();
+                const unsigned int        fe_index = cell->active_fe_index();
+
+                const unsigned int
+                n_dofs_on_mother   = 2*fe.dofs_per_vertex + fe.dofs_per_line,
+                n_dofs_on_children = fe.dofs_per_vertex + 2*fe.dofs_per_line;
+
+                dofs_on_mother.resize (n_dofs_on_mother);
+                // we might not use all of those in case of artificial cells,
+                // so do not resize(), but reserve() and use push_back later.
+                dofs_on_children.clear();
+                dofs_on_children.reserve (n_dofs_on_children);
+
+                Assert(n_dofs_on_mother == fe.constraints().n(),
+                       ExcDimensionMismatch(n_dofs_on_mother,
+                                            fe.constraints().n()));
+                Assert(n_dofs_on_children == fe.constraints().m(),
+                       ExcDimensionMismatch(n_dofs_on_children,
+                                            fe.constraints().m()));
+
+                const typename DoFHandlerType::line_iterator this_face = cell->face(face);
+
+                // fill the dofs indices. Use same enumeration scheme as in
+                // @p{FiniteElement::constraints()}
+                unsigned int next_index = 0;
+                for (unsigned int vertex=0; vertex<2; ++vertex)
+                  for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                    dofs_on_mother[next_index++] = this_face->vertex_dof_index(vertex,dof,
+                                                                               fe_index);
+                for (unsigned int dof=0; dof!=fe.dofs_per_line; ++dof)
+                  dofs_on_mother[next_index++] = this_face->dof_index(dof, fe_index);
+                AssertDimension (next_index, dofs_on_mother.size());
+
+                for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                  dofs_on_children.push_back(
+                    this_face->child(0)->vertex_dof_index(1,dof,fe_index));
+                for (unsigned int child=0; child<2; ++child)
+                  {
+                    // skip artificial cells
+                    if (cell->neighbor_child_on_subface (face, child)->is_artificial())
+                      continue;
+                    for (unsigned int dof=0; dof!=fe.dofs_per_line; ++dof)
+                      dofs_on_children.push_back(
+                        this_face->child(child)->dof_index(dof, fe_index));
+                  }
+                // note: can get fewer DoFs when we have artificial cells
+                Assert(dofs_on_children.size() <= n_dofs_on_children, ExcInternalError());
+
+                // for each row in the constraint matrix for this line:
+                for (unsigned int row=0; row!=dofs_on_children.size(); ++row)
+                  {
+                    constraints.add_line (dofs_on_children[row]);
+                    for (unsigned int i=0; i!=dofs_on_mother.size(); ++i)
+                      constraints.add_entry (dofs_on_children[row],
+                                             dofs_on_mother[i],
+                                             fe.constraints()(row,i));
+
+                    constraints.set_inhomogeneity (dofs_on_children[row], 0.);
+                  }
+              }
+            else
+              {
+                // this face has no children, but it could still be that it
+                // is shared by two cells that use a different fe index.
+                // check a couple of things, but ignore the case that the
+                // neighbor is an artificial cell
+                if (!cell->at_boundary(face) &&
+                    !cell->neighbor(face)->is_artificial())
+                  {
+                    Assert (cell->face(face)->n_active_fe_indices() == 1,
+                            ExcNotImplemented());
+                    Assert (cell->face(face)
+                            ->fe_index_is_active(cell->active_fe_index()) == true,
+                            ExcInternalError());
+                  }
+              }
+        }
+    }
+
+
+
+    template <typename DoFHandlerType>
+    void
+    make_oldstyle_hanging_node_constraints (const DoFHandlerType &dof_handler,
+                                            ConstraintMatrix     &constraints,
+                                            dealii::internal::int2type<3>)
+    {
+      const unsigned int dim = 3;
+
+      std::vector<types::global_dof_index> dofs_on_mother;
+      std::vector<types::global_dof_index> dofs_on_children;
+
+      // loop over all quads; only on quads there can be constraints. We do
+      // so by looping over all active cells and checking whether any of
+      // the faces are refined which can only be from the neighboring cell
+      // because this one is active. In that case, the face is subject to
+      // constraints
+      //
+      // note that even though we may visit a face twice if the neighboring
+      // cells are equally refined, we can only visit each face with
+      // hanging nodes once
+      typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                    endc = dof_handler.end();
+      for (; cell!=endc; ++cell)
+        {
+          // artificial cells can at best neighbor ghost cells, but we're not
+          // interested in these interfaces
+          if (cell->is_artificial ())
+            continue;
+
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->has_children())
+              {
+                // first of all, make sure that we treat a case which is
+                // possible, i.e. either no dofs on the face at all or no
+                // anisotropic refinement
+                if (cell->get_fe().dofs_per_face == 0)
+                  continue;
+
+                Assert(cell->face(face)->refinement_case()==RefinementCase<dim-1>::isotropic_refinement,
+                       ExcNotImplemented());
+
+                // in any case, faces can have at most two active fe
+                // indices, but here the face can have only one (namely the
+                // same as that from the cell we're sitting on), and each
+                // of the children can have only one as well. check this
+                AssertDimension (cell->face(face)->n_active_fe_indices(), 1);
+                Assert (cell->face(face)->fe_index_is_active(cell->active_fe_index())
+                        == true,
+                        ExcInternalError());
+                for (unsigned int c=0; c<cell->face(face)->n_children(); ++c)
+                  AssertDimension (cell->face(face)->child(c)->n_active_fe_indices(), 1);
+
+                // right now, all that is implemented is the case that both
+                // sides use the same fe, and not only that but also that
+                // all lines bounding this face and the children have the
+                // same fe
+                for (unsigned int c=0; c<cell->face(face)->n_children(); ++c)
+                  if (!cell->neighbor_child_on_subface(face,c)->is_artificial())
+                    {
+                      Assert (cell->face(face)->child(c)
+                              ->fe_index_is_active(cell->active_fe_index()) == true,
+                              ExcNotImplemented());
+                      for (unsigned int e=0; e<4; ++e)
+                        {
+                          Assert (cell->face(face)->child(c)->line(e)
+                                  ->n_active_fe_indices() == 1,
+                                  ExcNotImplemented());
+                          Assert (cell->face(face)->child(c)->line(e)
+                                  ->fe_index_is_active(cell->active_fe_index()) == true,
+                                  ExcNotImplemented());
+                        }
+                    }
+                for (unsigned int e=0; e<4; ++e)
+                  {
+                    Assert (cell->face(face)->line(e)
+                            ->n_active_fe_indices() == 1,
+                            ExcNotImplemented());
+                    Assert (cell->face(face)->line(e)
+                            ->fe_index_is_active(cell->active_fe_index()) == true,
+                            ExcNotImplemented());
+                  }
+
+                // ok, start up the work
+                const FiniteElement<dim> &fe       = cell->get_fe();
+                const unsigned int        fe_index = cell->active_fe_index();
+
+                const unsigned int n_dofs_on_mother = fe.dofs_per_face;
+                const unsigned int n_dofs_on_children = (5*fe.dofs_per_vertex+
+                                                         12*fe.dofs_per_line+
+                                                         4*fe.dofs_per_quad);
+
+                //TODO[TL]: think about this and the following in case of anisotropic refinement
+
+                dofs_on_mother.resize (n_dofs_on_mother);
+                // we might not use all of those in case of artificial cells,
+                // so do not resize(), but reserve() and use push_back later.
+                dofs_on_children.clear();
+                dofs_on_children.reserve (n_dofs_on_children);
+
+                Assert(n_dofs_on_mother == fe.constraints().n(),
+                       ExcDimensionMismatch(n_dofs_on_mother,
+                                            fe.constraints().n()));
+                Assert(n_dofs_on_children == fe.constraints().m(),
+                       ExcDimensionMismatch(n_dofs_on_children,
+                                            fe.constraints().m()));
+
+                const typename DoFHandlerType::face_iterator this_face = cell->face(face);
+
+                // fill the dofs indices. Use same enumeration scheme as in
+                // @p{FiniteElement::constraints()}
+                unsigned int next_index = 0;
+                for (unsigned int vertex=0; vertex<4; ++vertex)
+                  for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                    dofs_on_mother[next_index++] = this_face->vertex_dof_index(vertex,dof,
+                                                                               fe_index);
+                for (unsigned int line=0; line<4; ++line)
+                  for (unsigned int dof=0; dof!=fe.dofs_per_line; ++dof)
+                    dofs_on_mother[next_index++]
+                      = this_face->line(line)->dof_index(dof, fe_index);
+                for (unsigned int dof=0; dof!=fe.dofs_per_quad; ++dof)
+                  dofs_on_mother[next_index++] = this_face->dof_index(dof, fe_index);
+                AssertDimension (next_index, dofs_on_mother.size());
+
+                //TODO: assert some consistency assumptions
+
+                //TODO[TL]: think about this in case of anisotropic
+                //refinement
+
+                Assert (dof_handler.get_triangulation().get_anisotropic_refinement_flag() ||
+                        ((this_face->child(0)->vertex_index(3) ==
+                          this_face->child(1)->vertex_index(2)) &&
+                         (this_face->child(0)->vertex_index(3) ==
+                          this_face->child(2)->vertex_index(1)) &&
+                         (this_face->child(0)->vertex_index(3) ==
+                          this_face->child(3)->vertex_index(0))),
+                        ExcInternalError());
+
+                for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                  dofs_on_children.push_back(
+                    this_face->child(0)->vertex_dof_index(3,dof));
+
+                // dof numbers on the centers of the lines bounding this
+                // face
+                for (unsigned int line=0; line<4; ++line)
+                  for (unsigned int dof=0; dof!=fe.dofs_per_vertex; ++dof)
+                    dofs_on_children.push_back(
+                      this_face->line(line)->child(0)->vertex_dof_index(1,dof, fe_index));
+
+                // next the dofs on the lines interior to the face; the
+                // order of these lines is laid down in the FiniteElement
+                // class documentation
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  dofs_on_children.push_back(
+                    this_face->child(0)->line(1)->dof_index(dof, fe_index));
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  dofs_on_children.push_back(
+                    this_face->child(2)->line(1)->dof_index(dof, fe_index));
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  dofs_on_children.push_back(
+                    this_face->child(0)->line(3)->dof_index(dof, fe_index));
+                for (unsigned int dof=0; dof<fe.dofs_per_line; ++dof)
+                  dofs_on_children.push_back(
+                    this_face->child(1)->line(3)->dof_index(dof, fe_index));
+
+                // dofs on the bordering lines
+                for (unsigned int line=0; line<4; ++line)
+                  for (unsigned int child=0; child<2; ++child)
+                    {
+                      for (unsigned int dof=0; dof!=fe.dofs_per_line; ++dof)
+                        dofs_on_children.push_back(
+                          this_face->line(line)->child(child)->dof_index(dof, fe_index));
+                    }
+
+                // finally, for the dofs interior to the four child faces
+                for (unsigned int child=0; child<4; ++child)
+                  {
+                    // skip artificial cells
+                    if (cell->neighbor_child_on_subface (face, child)->is_artificial())
+                      continue;
+                    for (unsigned int dof=0; dof!=fe.dofs_per_quad; ++dof)
+                      dofs_on_children.push_back(
+                        this_face->child(child)->dof_index(dof, fe_index));
+                  }
+
+                // note: can get fewer DoFs when we have artificial cells:
+                Assert(dofs_on_children.size() <= n_dofs_on_children, ExcInternalError());
+
+                // for each row in the constraint matrix for this line:
+                for (unsigned int row=0; row!=dofs_on_children.size(); ++row)
+                  {
+                    constraints.add_line (dofs_on_children[row]);
+                    for (unsigned int i=0; i!=dofs_on_mother.size(); ++i)
+                      constraints.add_entry (dofs_on_children[row],
+                                             dofs_on_mother[i],
+                                             fe.constraints()(row,i));
+
+                    constraints.set_inhomogeneity(dofs_on_children[row], 0.);
+                  }
+              }
+            else
+              {
+                // this face has no children, but it could still be that it
+                // is shared by two cells that use a different fe index.
+                // check a couple of things, but ignore the case that the
+                // neighbor is an artificial cell
+                if (!cell->at_boundary(face) &&
+                    !cell->neighbor(face)->is_artificial())
+                  {
+                    Assert (cell->face(face)->n_active_fe_indices() == 1,
+                            ExcNotImplemented());
+                    Assert (cell->face(face)
+                            ->fe_index_is_active(cell->active_fe_index()) == true,
+                            ExcInternalError());
+                  }
+              }
+        }
+    }
+
+    namespace internal
+    {
+      /**
+       * get FECollection
+       */
+      template <int dim, int spacedim>
+      const dealii::hp::FECollection<dim,spacedim> *
+      get_fe_collection (const dealii::hp::DoFHandler<dim,spacedim> &dof_handler)
+      {
+        return &dof_handler.get_fe();
+      }
+
+      template <int dim, int spacedim>
+      const dealii::hp::FECollection<dim,spacedim> *
+      get_fe_collection (const dealii::DoFHandler<dim,spacedim> &)
+      {
+        AssertThrow(false, ExcInternalError());
+        return NULL;
+      }
+    }
+
+
+    template <typename DoFHandlerType>
+    void
+    make_hp_hanging_node_constraints (const DoFHandlerType &dof_handler,
+                                      ConstraintMatrix     &constraints)
+    {
+      // note: this function is going to be hard to understand if you
+      // haven't read the hp paper. however, we try to follow the notation
+      // laid out there, so go read the paper before you try to understand
+      // what is going on here
+
+      const unsigned int dim = DoFHandlerType::dimension;
+
+      const unsigned int spacedim = DoFHandlerType::space_dimension;
+
+
+      // a matrix to be used for constraints below. declared here and
+      // simply resized down below to avoid permanent re-allocation of
+      // memory
+      FullMatrix<double> constraint_matrix;
+
+      // similarly have arrays that will hold master and slave dof numbers,
+      // as well as a scratch array needed for the complicated case below
+      std::vector<types::global_dof_index> master_dofs;
+      std::vector<types::global_dof_index> slave_dofs;
+      std::vector<types::global_dof_index> scratch_dofs;
+
+      // caches for the face and subface interpolation matrices between
+      // different (or the same) finite elements. we compute them only
+      // once, namely the first time they are needed, and then just reuse
+      // them
+      Table<2,std_cxx11::shared_ptr<FullMatrix<double> > >
+      face_interpolation_matrices (n_finite_elements (dof_handler),
+                                   n_finite_elements (dof_handler));
+      Table<3,std_cxx11::shared_ptr<FullMatrix<double> > >
+      subface_interpolation_matrices (n_finite_elements (dof_handler),
+                                      n_finite_elements (dof_handler),
+                                      GeometryInfo<dim>::max_children_per_face);
+
+      // similarly have a cache for the matrices that are split into their
+      // master and slave parts, and for which the master part is inverted.
+      // these two matrices are derived from the face interpolation matrix
+      // as described in the @ref hp_paper "hp paper"
+      Table<2,std_cxx11::shared_ptr<std::pair<FullMatrix<double>,FullMatrix<double> > > >
+      split_face_interpolation_matrices (n_finite_elements (dof_handler),
+                                         n_finite_elements (dof_handler));
+
+      // finally, for each pair of finite elements, have a mask that states
+      // which of the degrees of freedom on the coarse side of a refined
+      // face will act as master dofs.
+      Table<2,std_cxx11::shared_ptr<std::vector<bool> > >
+      master_dof_masks (n_finite_elements (dof_handler),
+                        n_finite_elements (dof_handler));
+
+      // loop over all faces
+      //
+      // note that even though we may visit a face twice if the neighboring
+      // cells are equally refined, we can only visit each face with
+      // hanging nodes once
+      typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                    endc = dof_handler.end();
+      for (; cell!=endc; ++cell)
+        {
+          // artificial cells can at best neighbor ghost cells, but we're not
+          // interested in these interfaces
+          if (cell->is_artificial ())
+            continue;
+
+          for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+            if (cell->face(face)->has_children())
+              {
+                // first of all, make sure that we treat a case which is
+                // possible, i.e. either no dofs on the face at all or no
+                // anisotropic refinement
+                if (cell->get_fe().dofs_per_face == 0)
+                  continue;
+
+                Assert(cell->face(face)->refinement_case()==RefinementCase<dim-1>::isotropic_refinement,
+                       ExcNotImplemented());
+
+                // so now we've found a face of an active cell that has
+                // children. that means that there are hanging nodes here.
+
+                // in any case, faces can have at most two sets of active
+                // fe indices, but here the face can have only one (namely
+                // the same as that from the cell we're sitting on), and
+                // each of the children can have only one as well. check
+                // this
+                Assert (cell->face(face)->n_active_fe_indices() == 1,
+                        ExcInternalError());
+                Assert (cell->face(face)->fe_index_is_active(cell->active_fe_index())
+                        == true,
+                        ExcInternalError());
+                for (unsigned int c=0; c<cell->face(face)->n_children(); ++c)
+                  Assert (cell->face(face)->child(c)->n_active_fe_indices() == 1,
+                          ExcInternalError());
+
+                // first find out whether we can constrain each of the
+                // subfaces to the mother face. in the lingo of the hp
+                // paper, this would be the simple case. note that we can
+                // short-circuit this decision if the dof_handler doesn't
+                // support hp at all
+                //
+                // ignore all interfaces with artificial cells
+                FiniteElementDomination::Domination
+                mother_face_dominates = FiniteElementDomination::either_element_can_dominate;
+
+                // auxiliary variable which holds FE indices of the mother face
+                // and its subfaces. This knowledge will be needed in hp-case
+                // with neither_element_dominates.
+                std::set<unsigned int> fe_ind_face_subface;
+                fe_ind_face_subface.insert(cell->active_fe_index());
+
+                if (DoFHandlerSupportsDifferentFEs<DoFHandlerType>::value == true)
+                  for (unsigned int c=0; c<cell->face(face)->number_of_children(); ++c)
+                    if (!cell->neighbor_child_on_subface (face, c)->is_artificial())
+                      {
+                        mother_face_dominates = mother_face_dominates &
+                                                (cell->get_fe().compare_for_face_domination
+                                                 (cell->neighbor_child_on_subface (face, c)->get_fe()));
+                        fe_ind_face_subface.insert(cell->neighbor_child_on_subface (face, c)->active_fe_index());
+                      }
+
+                switch (mother_face_dominates)
+                  {
+                  case FiniteElementDomination::this_element_dominates:
+                  case FiniteElementDomination::either_element_can_dominate:
+                  {
+                    // Case 1 (the simple case and the only case that can
+                    // happen for non-hp DoFHandlers): The coarse element
+                    // dominates the elements on the subfaces (or they are
+                    // all the same)
+                    //
+                    // so we are going to constrain the DoFs on the face
+                    // children against the DoFs on the face itself
+                    master_dofs.resize (cell->get_fe().dofs_per_face);
+
+                    cell->face(face)->get_dof_indices (master_dofs,
+                                                       cell->active_fe_index ());
+
+                    // Now create constraint matrix for the subfaces and
+                    // assemble it. ignore all interfaces with artificial
+                    // cells because we can only get to such interfaces if
+                    // the current cell is a ghost cell
+                    for (unsigned int c=0; c<cell->face(face)->n_children(); ++c)
+                      {
+                        if (cell->neighbor_child_on_subface (face, c)->is_artificial())
+                          continue;
+
+                        const typename DoFHandlerType::active_face_iterator
+                        subface = cell->face(face)->child(c);
+
+                        Assert (subface->n_active_fe_indices() == 1,
+                                ExcInternalError());
+
+                        const unsigned int
+                        subface_fe_index = subface->nth_active_fe_index(0);
+
+                        // we sometime run into the situation where for
+                        // example on one big cell we have a FE_Q(1) and on
+                        // the subfaces we have a mixture of FE_Q(1) and
+                        // FE_Nothing. In that case, the face domination is
+                        // either_element_can_dominate for the whole
+                        // collection of subfaces, but on the particular
+                        // subface between FE_Q(1) and FE_Nothing, there
+                        // are no constraints that we need to take care of.
+                        // in that case, just continue
+                        if (cell->get_fe().compare_for_face_domination
+                            (subface->get_fe(subface_fe_index))
+                            ==
+                            FiniteElementDomination::no_requirements)
+                          continue;
+
+                        // Same procedure as for the mother cell. Extract
+                        // the face DoFs from the cell DoFs.
+                        slave_dofs.resize (subface->get_fe(subface_fe_index)
+                                           .dofs_per_face);
+                        subface->get_dof_indices (slave_dofs, subface_fe_index);
+
+                        for (unsigned int i=0; i<slave_dofs.size(); ++i)
+                          Assert (slave_dofs[i] != numbers::invalid_dof_index,
+                                  ExcInternalError());
+
+                        // Now create the element constraint for this
+                        // subface.
+                        //
+                        // As a side remark, one may wonder the following:
+                        // neighbor_child is clearly computed correctly,
+                        // i.e. taking into account face_orientation (just
+                        // look at the implementation of that function).
+                        // however, we don't care about this here, when we
+                        // ask for subface_interpolation on subface c. the
+                        // question rather is: do we have to translate 'c'
+                        // here as well?
+                        //
+                        // the answer is in fact 'no'. if one does that,
+                        // results are wrong: constraints are added twice
+                        // for the same pair of nodes but with differing
+                        // weights. in addition, one can look at the
+                        // deal.II/project_*_03 tests that look at exactly
+                        // this case: there, we have a mesh with at least
+                        // one face_orientation==false and hanging nodes,
+                        // and the results of those tests show that the
+                        // result of projection verifies the approximation
+                        // properties of a finite element onto that mesh
+                        ensure_existence_of_subface_matrix
+                        (cell->get_fe(),
+                         subface->get_fe(subface_fe_index),
+                         c,
+                         subface_interpolation_matrices
+                         [cell->active_fe_index()][subface_fe_index][c]);
+
+                        // Add constraints to global constraint matrix.
+                        filter_constraints (master_dofs,
+                                            slave_dofs,
+                                            *(subface_interpolation_matrices
+                                              [cell->active_fe_index()][subface_fe_index][c]),
+                                            constraints);
+                      }
+
+                    break;
+                  }
+
+                  case FiniteElementDomination::other_element_dominates:
+                  case FiniteElementDomination::neither_element_dominates:
+                  {
+                    // Case 2 (the "complex" case): at least one (the
+                    // neither_... case) of the finer elements or all of
+                    // them (the other_... case) is dominating. See the hp
+                    // paper for a way how to deal with this situation
+                    //
+                    // since this is something that can only happen for hp
+                    // dof handlers, add a check here...
+                    Assert (DoFHandlerSupportsDifferentFEs<DoFHandlerType>::value == true,
+                            ExcInternalError());
+
+                    const dealii::hp::FECollection<dim,spacedim> &fe_collection =
+                      *internal::get_fe_collection (dof_handler);
+                    // we first have to find the finite element that is
+                    // able to generate a space that all the other ones can
+                    // be constrained to.
+                    // At this point we potentially have different scenarios:
+                    // 1) sub-faces dominate mother face and there is a
+                    // dominating FE among sub faces. We could loop over sub
+                    // faces to find the needed FE index. However, this will not
+                    // work in the case when
+                    // 2) there is no dominating FE among sub faces (e.g. Q1xQ2 vs Q2xQ1),
+                    // but subfaces still dominate mother face (e.g. Q2xQ2).
+                    // To cover this case we would have to use find_least_face_dominating_fe()
+                    // of FECollection with fe_indices of sub faces.
+                    // 3) Finally, it could happen that we got here because
+                    // neither_element_dominates (e.g. Q1xQ1xQ2 and Q1xQ2xQ1 for
+                    // subfaces and Q2xQ1xQ1 for mother face).
+                    // This requires usage of find_least_face_dominating_fe()
+                    // with fe_indices of sub-faces and the mother face.
+                    // Note that the last solution covers the first two scenarios,
+                    // thus we stick with it assuming that we won't lose much time/efficiency.
+                    const unsigned int dominating_fe_index = fe_collection.find_least_face_dominating_fe(fe_ind_face_subface);
+                    AssertThrow(dominating_fe_index != numbers::invalid_unsigned_int,
+                                ExcMessage("Could not find a least face dominating FE."));
+
+                    const FiniteElement<dim,spacedim> &dominating_fe
+                      = dof_handler.get_fe()[dominating_fe_index];
+
+                    // first get the interpolation matrix from the mother
+                    // to the virtual dofs
+                    Assert (dominating_fe.dofs_per_face <=
+                            cell->get_fe().dofs_per_face,
+                            ExcInternalError());
+
+                    ensure_existence_of_face_matrix
+                    (dominating_fe,
+                     cell->get_fe(),
+                     face_interpolation_matrices
+                     [dominating_fe_index][cell->active_fe_index()]);
+
+                    // split this matrix into master and slave components.
+                    // invert the master component
+                    ensure_existence_of_master_dof_mask
+                    (cell->get_fe(),
+                     dominating_fe,
+                     (*face_interpolation_matrices
+                      [dominating_fe_index]
+                      [cell->active_fe_index()]),
+                     master_dof_masks
+                     [dominating_fe_index]
+                     [cell->active_fe_index()]);
+
+                    ensure_existence_of_split_face_matrix
+                    (*face_interpolation_matrices
+                     [dominating_fe_index][cell->active_fe_index()],
+                     (*master_dof_masks
+                      [dominating_fe_index][cell->active_fe_index()]),
+                     split_face_interpolation_matrices
+                     [dominating_fe_index][cell->active_fe_index()]);
+
+                    const FullMatrix<double> &restrict_mother_to_virtual_master_inv
+                      = (split_face_interpolation_matrices
+                         [dominating_fe_index][cell->active_fe_index()]->first);
+
+                    const FullMatrix<double> &restrict_mother_to_virtual_slave
+                      = (split_face_interpolation_matrices
+                         [dominating_fe_index][cell->active_fe_index()]->second);
+
+                    // now compute the constraint matrix as the product
+                    // between the inverse matrix and the slave part
+                    constraint_matrix.reinit (cell->get_fe().dofs_per_face -
+                                              dominating_fe.dofs_per_face,
+                                              dominating_fe.dofs_per_face);
+                    restrict_mother_to_virtual_slave
+                    .mmult (constraint_matrix,
+                            restrict_mother_to_virtual_master_inv);
+
+                    // then figure out the global numbers of master and
+                    // slave dofs and apply constraints
+                    scratch_dofs.resize (cell->get_fe().dofs_per_face);
+                    cell->face(face)->get_dof_indices (scratch_dofs,
+                                                       cell->active_fe_index ());
+
+                    // split dofs into master and slave components
+                    master_dofs.clear ();
+                    slave_dofs.clear ();
+                    for (unsigned int i=0; i<cell->get_fe().dofs_per_face; ++i)
+                      if ((*master_dof_masks
+                           [dominating_fe_index][cell->active_fe_index()])[i] == true)
+                        master_dofs.push_back (scratch_dofs[i]);
+                      else
+                        slave_dofs.push_back (scratch_dofs[i]);
+
+                    AssertDimension (master_dofs.size(), dominating_fe.dofs_per_face);
+                    AssertDimension (slave_dofs.size(),
+                                     cell->get_fe().dofs_per_face - dominating_fe.dofs_per_face);
+
+                    filter_constraints (master_dofs,
+                                        slave_dofs,
+                                        constraint_matrix,
+                                        constraints);
+
+
+
+                    // next we have to deal with the subfaces. do as
+                    // discussed in the hp paper
+                    for (unsigned int sf=0;
+                         sf<cell->face(face)->n_children(); ++sf)
+                      {
+                        // ignore interfaces with artificial cells as well
+                        // as interfaces between ghost cells in 2d
+                        if (cell->neighbor_child_on_subface (face, sf)->is_artificial()
+                            ||
+                            (dim==2 && cell->is_ghost()
+                             &&
+                             cell->neighbor_child_on_subface (face, sf)->is_ghost()))
+                          continue;
+
+                        Assert (cell->face(face)->child(sf)
+                                ->n_active_fe_indices() == 1,
+                                ExcInternalError());
+
+                        const unsigned int subface_fe_index
+                          = cell->face(face)->child(sf)->nth_active_fe_index(0);
+                        const FiniteElement<dim,spacedim> &subface_fe
+                          = dof_handler.get_fe()[subface_fe_index];
+
+                        // first get the interpolation matrix from the
+                        // subface to the virtual dofs
+                        Assert (dominating_fe.dofs_per_face <=
+                                subface_fe.dofs_per_face,
+                                ExcInternalError());
+                        ensure_existence_of_subface_matrix
+                        (dominating_fe,
+                         subface_fe,
+                         sf,
+                         subface_interpolation_matrices
+                         [dominating_fe_index][subface_fe_index][sf]);
+
+                        const FullMatrix<double> &restrict_subface_to_virtual
+                          = *(subface_interpolation_matrices
+                              [dominating_fe_index][subface_fe_index][sf]);
+
+                        constraint_matrix.reinit (subface_fe.dofs_per_face,
+                                                  dominating_fe.dofs_per_face);
+
+                        restrict_subface_to_virtual
+                        .mmult (constraint_matrix,
+                                restrict_mother_to_virtual_master_inv);
+
+                        slave_dofs.resize (subface_fe.dofs_per_face);
+                        cell->face(face)->child(sf)->get_dof_indices (slave_dofs,
+                                                                      subface_fe_index);
+
+                        filter_constraints (master_dofs,
+                                            slave_dofs,
+                                            constraint_matrix,
+                                            constraints);
+                      }
+
+                    break;
+                  }
+
+                  case FiniteElementDomination::no_requirements:
+                    // there are no continuity requirements between the two
+                    // elements. record no constraints
+                    break;
+
+                  default:
+                    // we shouldn't get here
+                    Assert (false, ExcInternalError());
+                  }
+              }
+            else
+              {
+                // this face has no children, but it could still be that it
+                // is shared by two cells that use a different fe index
+                Assert (cell->face(face)
+                        ->fe_index_is_active(cell->active_fe_index()) == true,
+                        ExcInternalError());
+
+                // see if there is a neighbor that is an artificial cell.
+                // in that case, we're not interested in this interface. we
+                // test this case first since artificial cells may not have
+                // an active_fe_index set, etc
+                if (!cell->at_boundary(face)
+                    &&
+                    cell->neighbor(face)->is_artificial())
+                  continue;
+
+                // Only if there is a neighbor with a different
+                // active_fe_index and the same h-level, some action has to
+                // be taken.
+                if ((DoFHandlerSupportsDifferentFEs<DoFHandlerType>::value == true)
+                    &&
+                    !cell->face(face)->at_boundary ()
+                    &&
+                    (cell->neighbor(face)->active_fe_index () !=
+                     cell->active_fe_index ())
+                    &&
+                    (!cell->face(face)->has_children() &&
+                     !cell->neighbor_is_coarser(face) ))
+                  {
+                    const typename DoFHandlerType::level_cell_iterator neighbor = cell->neighbor (face);
+
+                    // see which side of the face we have to constrain
+                    switch (cell->get_fe().compare_for_face_domination (neighbor->get_fe ()))
+                      {
+                      case FiniteElementDomination::this_element_dominates:
+                      {
+                        // Get DoFs on dominating and dominated side of the
+                        // face
+                        master_dofs.resize (cell->get_fe().dofs_per_face);
+                        cell->face(face)->get_dof_indices (master_dofs,
+                                                           cell->active_fe_index ());
+
+                        slave_dofs.resize (neighbor->get_fe().dofs_per_face);
+                        cell->face(face)->get_dof_indices (slave_dofs,
+                                                           neighbor->active_fe_index ());
+
+                        // break if the n_master_dofs == 0, because we are
+                        // attempting to constrain to an element that has
+                        // no face dofs
+                        if (master_dofs.size() == 0) break;
+
+                        // make sure the element constraints for this face
+                        // are available
+                        ensure_existence_of_face_matrix
+                        (cell->get_fe(),
+                         neighbor->get_fe(),
+                         face_interpolation_matrices
+                         [cell->active_fe_index()][neighbor->active_fe_index()]);
+
+                        // Add constraints to global constraint matrix.
+                        filter_constraints (master_dofs,
+                                            slave_dofs,
+                                            *(face_interpolation_matrices
+                                              [cell->active_fe_index()]
+                                              [neighbor->active_fe_index()]),
+                                            constraints);
+
+                        break;
+                      }
+
+                      case FiniteElementDomination::other_element_dominates:
+                      {
+                        // we don't do anything here since we will come
+                        // back to this face from the other cell, at which
+                        // time we will fall into the first case clause
+                        // above
+                        break;
+                      }
+
+                      case FiniteElementDomination::either_element_can_dominate:
+                      {
+                        // it appears as if neither element has any
+                        // constraints on its neighbor. this may be because
+                        // neither element has any DoFs on faces at all. or
+                        // that the two elements are actually the same,
+                        // although they happen to run under different
+                        // fe_indices (this is what happens in
+                        // hp/hp_hanging_nodes_01 for example).
+                        //
+                        // another possibility is what happens in crash_13.
+                        // there, we have FESystem(FE_Q(1),FE_DGQ(0)) vs.
+                        // FESystem(FE_Q(1),FE_DGQ(1)). neither of them
+                        // dominates the other.
+                        //
+                        // a final possibility is that we have something like
+                        // FESystem(FE_Q(1),FE_Q(1)) vs
+                        // FESystem(FE_Q(1),FE_Nothing()), see
+                        // hp/fe_nothing_18/19.
+                        //
+                        // in any case, the point is that it doesn't
+                        // matter. there is nothing to do here.
+                        break;
+                      }
+
+                      case FiniteElementDomination::neither_element_dominates:
+                      {
+                        // make sure we don't get here twice from each cell
+                        if (cell < neighbor)
+                          break;
+
+                        // our best bet is to find the common space among other
+                        // FEs in FECollection and then constrain both FEs
+                        // to that one.
+                        // More precisely, we follow the strategy outlined on
+                        // page 17 of the hp paper:
+                        // First we find the dominant FE space S.
+                        // Then we divide our dofs in master and slave such that
+                        // I^{face,master}_{S^{face}->S} is invertible.
+                        // And finally constrain slave dofs to master dofs based
+                        // on the interpolation matrix.
+
+                        const unsigned int this_fe_index = cell->active_fe_index();
+                        const unsigned int neighbor_fe_index = neighbor->active_fe_index();
+                        std::set<unsigned int> fes;
+                        fes.insert(this_fe_index);
+                        fes.insert(neighbor_fe_index);
+                        const dealii::hp::FECollection<dim,spacedim> &fe_collection =
+                          *internal::get_fe_collection (dof_handler);
+                        const unsigned int dominating_fe_index = fe_collection.find_least_face_dominating_fe(fes);
+
+                        AssertThrow(dominating_fe_index != numbers::invalid_unsigned_int,
+                                    ExcMessage("Could not find the dominating FE for "
+                                               +cell->get_fe().get_name()
+                                               +" and "
+                                               +neighbor->get_fe().get_name()
+                                               +" inside FECollection."));
+
+                        const FiniteElement<dim,spacedim> &dominating_fe = fe_collection[dominating_fe_index];
+
+                        // TODO: until we hit the second face, the code is
+                        // a copy-paste from h-refinement case...
+
+                        // first get the interpolation matrix from main FE
+                        // to the virtual dofs
+                        Assert (dominating_fe.dofs_per_face <=
+                                cell->get_fe().dofs_per_face,
+                                ExcInternalError());
+
+                        ensure_existence_of_face_matrix
+                        (dominating_fe,
+                         cell->get_fe(),
+                         face_interpolation_matrices
+                         [dominating_fe_index][cell->active_fe_index()]);
+
+                        // split this matrix into master and slave components.
+                        // invert the master component
+                        ensure_existence_of_master_dof_mask
+                        (cell->get_fe(),
+                         dominating_fe,
+                         (*face_interpolation_matrices
+                          [dominating_fe_index]
+                          [cell->active_fe_index()]),
+                         master_dof_masks
+                         [dominating_fe_index]
+                         [cell->active_fe_index()]);
+
+                        ensure_existence_of_split_face_matrix
+                        (*face_interpolation_matrices
+                         [dominating_fe_index][cell->active_fe_index()],
+                         (*master_dof_masks
+                          [dominating_fe_index][cell->active_fe_index()]),
+                         split_face_interpolation_matrices
+                         [dominating_fe_index][cell->active_fe_index()]);
+
+                        const FullMatrix<double> &restrict_mother_to_virtual_master_inv
+                          = (split_face_interpolation_matrices
+                             [dominating_fe_index][cell->active_fe_index()]->first);
+
+                        const FullMatrix<double> &restrict_mother_to_virtual_slave
+                          = (split_face_interpolation_matrices
+                             [dominating_fe_index][cell->active_fe_index()]->second);
+
+                        // now compute the constraint matrix as the product
+                        // between the inverse matrix and the slave part
+                        constraint_matrix.reinit (cell->get_fe().dofs_per_face -
+                                                  dominating_fe.dofs_per_face,
+                                                  dominating_fe.dofs_per_face);
+                        restrict_mother_to_virtual_slave
+                        .mmult (constraint_matrix,
+                                restrict_mother_to_virtual_master_inv);
+
+                        // then figure out the global numbers of master and
+                        // slave dofs and apply constraints
+                        scratch_dofs.resize (cell->get_fe().dofs_per_face);
+                        cell->face(face)->get_dof_indices (scratch_dofs,
+                                                           cell->active_fe_index ());
+
+                        // split dofs into master and slave components
+                        master_dofs.clear ();
+                        slave_dofs.clear ();
+                        for (unsigned int i=0; i<cell->get_fe().dofs_per_face; ++i)
+                          if ((*master_dof_masks
+                               [dominating_fe_index][cell->active_fe_index()])[i] == true)
+                            master_dofs.push_back (scratch_dofs[i]);
+                          else
+                            slave_dofs.push_back (scratch_dofs[i]);
+
+                        AssertDimension (master_dofs.size(), dominating_fe.dofs_per_face);
+                        AssertDimension (slave_dofs.size(),
+                                         cell->get_fe().dofs_per_face - dominating_fe.dofs_per_face);
+
+                        filter_constraints (master_dofs,
+                                            slave_dofs,
+                                            constraint_matrix,
+                                            constraints);
+
+                        // now do the same for another FE
+                        // this is pretty much the same we do above to
+                        // resolve h-refinement constraints
+                        Assert (dominating_fe.dofs_per_face <=
+                                neighbor->get_fe().dofs_per_face,
+                                ExcInternalError());
+
+                        ensure_existence_of_face_matrix
+                        (dominating_fe,
+                         neighbor->get_fe(),
+                         face_interpolation_matrices
+                         [dominating_fe_index][neighbor->active_fe_index()]);
+
+                        const FullMatrix<double> &restrict_secondface_to_virtual
+                          = *(face_interpolation_matrices
+                              [dominating_fe_index][neighbor->active_fe_index()]);
+
+                        constraint_matrix.reinit (neighbor->get_fe().dofs_per_face,
+                                                  dominating_fe.dofs_per_face);
+
+                        restrict_secondface_to_virtual
+                        .mmult (constraint_matrix,
+                                restrict_mother_to_virtual_master_inv);
+
+                        slave_dofs.resize (neighbor->get_fe().dofs_per_face);
+                        cell->face(face)->get_dof_indices (slave_dofs,
+                                                           neighbor->active_fe_index ());
+
+                        filter_constraints (master_dofs,
+                                            slave_dofs,
+                                            constraint_matrix,
+                                            constraints);
+
+                        break;
+                      }
+
+                      case FiniteElementDomination::no_requirements:
+                      {
+                        // nothing to do here
+                        break;
+                      }
+
+                      default:
+                        // we shouldn't get here
+                        Assert (false, ExcInternalError());
+                      }
+                  }
+              }
+        }
+    }
+  }
+
+
+
+
+  template <typename DoFHandlerType>
+  void
+  make_hanging_node_constraints (const DoFHandlerType &dof_handler,
+                                 ConstraintMatrix     &constraints)
+  {
+    // Decide whether to use the new or old make_hanging_node_constraints
+    // function. If all the FiniteElement or all elements in a FECollection
+    // support the new face constraint matrix, the new code will be used.
+    // Otherwise, the old implementation is used for the moment.
+    if (dof_handler.get_fe().hp_constraints_are_implemented ())
+      internal::
+      make_hp_hanging_node_constraints (dof_handler,
+                                        constraints);
+    else
+      internal::
+      make_oldstyle_hanging_node_constraints (dof_handler,
+                                              constraints,
+                                              dealii::internal::int2type<DoFHandlerType::dimension>());
+  }
+
+
+
+  namespace
+  {
+    /**
+     * @internal
+     *
+     * Internally used in make_periodicity_constraints.
+     *
+     * enter constraints for periodicity into the given ConstraintMatrix object.
+     * this function is called when at least one of the two face iterators corresponds
+     * to an active object without further children
+     *
+     * @param transformation A matrix that maps degrees of freedom from one face
+     * to another. If the DoFs on the two faces are supposed to match exactly, then
+     * the matrix so provided will be the identity matrix. if face 2 is once refined
+     * from face 1, then the matrix needs to be the interpolation matrix from a face
+     * to this particular child
+     *
+     * @precondition: face_1 is supposed to be active
+     *
+     * @note As bug #82 ((http://code.google.com/p/dealii/issues/detail?id=82) and the
+     * corresponding testcase bits/periodicity_05 demonstrate, we can occasionally
+     * get into trouble if we already have the constraint x1=x2 and want to insert
+     * x2=x1. we avoid this by skipping an identity constraint if the opposite
+     * constraint already exists
+     */
+    template <typename FaceIterator>
+    void
+    set_periodicity_constraints (const FaceIterator                          &face_1,
+                                 const typename identity<FaceIterator>::type &face_2,
+                                 const FullMatrix<double>                    &transformation,
+                                 dealii::ConstraintMatrix                    &constraint_matrix,
+                                 const ComponentMask                         &component_mask,
+                                 const bool                                   face_orientation,
+                                 const bool                                   face_flip,
+                                 const bool                                   face_rotation)
+    {
+      static const int dim      = FaceIterator::AccessorType::dimension;
+      static const int spacedim = FaceIterator::AccessorType::space_dimension;
+
+      // we should be in the case where face_1 is active, i.e. has no children:
+      Assert (!face_1->has_children(),
+              ExcInternalError());
+
+      Assert (face_1->n_active_fe_indices() == 1,
+              ExcInternalError());
+
+      // if face_2 does have children, then we need to iterate over them
+      if (face_2->has_children())
+        {
+          Assert (face_2->n_children() == GeometryInfo<dim>::max_children_per_face,
+                  ExcNotImplemented());
+          const unsigned int dofs_per_face
+            = face_1->get_fe(face_1->nth_active_fe_index(0)).dofs_per_face;
+          FullMatrix<double> child_transformation (dofs_per_face, dofs_per_face);
+          FullMatrix<double> subface_interpolation (dofs_per_face, dofs_per_face);
+          for (unsigned int c=0; c<face_2->n_children(); ++c)
+            {
+              // get the interpolation matrix recursively from the one that
+              // interpolated from face_1 to face_2 by multiplying from the
+              // left with the one that interpolates from face_2 to
+              // its child
+              face_1->get_fe(face_1->nth_active_fe_index(0))
+              .get_subface_interpolation_matrix (face_1->get_fe(face_1->nth_active_fe_index(0)),
+                                                 c,
+                                                 subface_interpolation);
+              subface_interpolation.mmult (child_transformation, transformation);
+              set_periodicity_constraints(face_1, face_2->child(c),
+                                          child_transformation,
+                                          constraint_matrix, component_mask,
+                                          face_orientation, face_flip, face_rotation);
+            }
+        }
+      else
+        // both faces are active. we need to match the corresponding DoFs of both faces
+        {
+          const unsigned int face_1_index = face_1->nth_active_fe_index(0);
+          const unsigned int face_2_index = face_2->nth_active_fe_index(0);
+          Assert(face_1->get_fe(face_1_index) == face_2->get_fe(face_2_index),
+                 ExcMessage ("Matching periodic cells need to use the same finite element"));
+
+          const FiniteElement<dim, spacedim> &fe = face_1->get_fe(face_1_index);
+
+          Assert(component_mask.represents_n_components(fe.n_components()),
+                 ExcMessage ("The number of components in the mask has to be either "
+                             "zero or equal to the number of components in the finite " "element."));
+
+          const unsigned int dofs_per_face = fe.dofs_per_face;
+
+          std::vector<types::global_dof_index> dofs_1(dofs_per_face);
+          std::vector<types::global_dof_index> dofs_2(dofs_per_face);
+
+          face_1->get_dof_indices(dofs_1, face_1_index);
+          face_2->get_dof_indices(dofs_2, face_2_index);
+
+          for (unsigned int i=0; i < dofs_per_face; i++)
+            {
+              if (dofs_1[i] == numbers::invalid_dof_index ||
+                  dofs_2[i] == numbers::invalid_dof_index)
+                {
+                  /* If either of these faces have no indices, stop.  This is so
+                   * that there is no attempt to match artificial cells of
+                   * parallel distributed triangulations.
+                   *
+                   * While it seems like we ought to be able to avoid even calling
+                   * set_periodicity_constraints for artificial faces, this
+                   * situation can arise when a face that is being made periodic
+                   * is only partially touched by the local subdomain.
+                   * make_periodicity_constraints will be called recursively even
+                   * for the section of the face that is not touched by the local
+                   * subdomain.
+                   *
+                   * Until there is a better way to determine if the cells that
+                   * neighbor a face are artificial, we simply test to see if the
+                   * face does not have a valid dof initialization.
+                   */
+                  return;
+                }
+            }
+
+          // Well, this is a hack:
+          //
+          // There is no
+          //   face_to_face_index(face_index,
+          //                      face_orientation,
+          //                      face_flip,
+          //                      face_rotation)
+          // function in FiniteElementData, so we have to use
+          //   face_to_cell_index(face_index, face
+          //                      face_orientation,
+          //                      face_flip,
+          //                      face_rotation)
+          // But this will give us an index on a cell - something we cannot work
+          // with directly. But luckily we can match them back :-]
+
+          std::map<unsigned int, unsigned int> cell_to_rotated_face_index;
+
+          // Build up a cell to face index for face_2:
+          for (unsigned int i = 0; i < dofs_per_face; ++i)
+            {
+              const unsigned int cell_index = fe.face_to_cell_index(i, 0, /* It doesn't really matter, just assume
+                                                                           * we're on the first face...
+                                                                           */
+                                                                    true, false, false // default orientation
+                                                                   );
+              cell_to_rotated_face_index[cell_index] = i;
+            }
+
+          // loop over all dofs on face 2 and constrain them against the ones on face 1
+          for (unsigned int i=0; i<dofs_per_face; ++i)
+            if (!constraint_matrix.is_constrained(dofs_2[i]))
+              if ((component_mask.n_selected_components(fe.n_components())
+                   == fe.n_components())
+                  ||
+                  component_mask[fe.face_system_to_component_index(i).first])
+                {
+                  // as mentioned in the comment above this function, we need
+                  // to be careful about treating identity constraints differently.
+                  // consequently, find out whether this dof 'i' will be
+                  // identity constrained
+                  //
+                  // to check whether this is the case, first see whether there are
+                  // any weights other than 0 and 1, then in a first stage make sure
+                  // that if so there is only one weight equal to 1
+                  //
+                  // afterwards do the same for constraints of type dof1=-dof2
+                  bool is_identity_constrained = true;
+                  const double eps = 1.e-13;
+                  for (unsigned int jj=0; jj<dofs_per_face; ++jj)
+                    if (((std::abs(transformation(i,jj)) < eps) ||
+                         (std::abs(transformation(i,jj)-1) < eps)) == false)
+                      {
+                        is_identity_constrained = false;
+                        break;
+                      }
+                  unsigned int identity_constraint_target = numbers::invalid_unsigned_int;
+                  if (is_identity_constrained == true)
+                    {
+                      bool one_identity_found = false;
+                      for (unsigned int jj=0; jj<dofs_per_face; ++jj)
+                        if (std::abs(transformation(i,jj)-1.) < eps)
+                          {
+                            if (one_identity_found == false)
+                              {
+                                one_identity_found = true;
+                                identity_constraint_target = jj;
+                              }
+                            else
+                              {
+                                is_identity_constrained = false;
+                                identity_constraint_target = numbers::invalid_unsigned_int;
+                                break;
+                              }
+                          }
+                    }
+
+                  bool is_inverse_constrained = !is_identity_constrained;
+                  unsigned int inverse_constraint_target = numbers::invalid_unsigned_int;
+                  if (is_inverse_constrained)
+                    for (unsigned int jj=0; jj<dofs_per_face; ++jj)
+                      if (((std::abs(transformation(i,jj)) < eps) ||
+                           (std::abs(transformation(i,jj)+1) < eps)) == false)
+                        {
+                          is_inverse_constrained = false;
+                          break;
+                        }
+                  if (is_inverse_constrained)
+                    {
+                      bool one_identity_found = false;
+                      for (unsigned int jj=0; jj<dofs_per_face; ++jj)
+                        if (std::abs(transformation(i,jj)+1) < eps)
+                          {
+                            if (one_identity_found == false)
+                              {
+                                one_identity_found = true;
+                                inverse_constraint_target = jj;
+                              }
+                            else
+                              {
+                                is_inverse_constrained = false;
+                                inverse_constraint_target = numbers::invalid_unsigned_int;
+                                break;
+                              }
+                          }
+                    }
+
+                  const unsigned int target = is_identity_constrained
+                                              ? identity_constraint_target
+                                              : inverse_constraint_target;
+
+                  // find out whether this dof also exists on face 1
+                  // if this is true and the constraint is no identity
+                  // constraint to itself, set it to zero
+                  bool constrained_set = false;
+                  for (unsigned int j=0; j<dofs_per_face; ++j)
+                    {
+                      if (dofs_2[i] == dofs_1[j])
+                        if (!(is_identity_constrained && target==i))
+                          {
+                            constraint_matrix.add_line(dofs_2[i]);
+                            constrained_set = true;
+                          }
+                    }
+
+                  if (!constrained_set)
+                    {
+                      // now treat constraints, either as an equality constraint or
+                      // as a sequence of constraints
+                      if (is_identity_constrained == true || is_inverse_constrained == true)
+                        {
+                          // Query the correct face_index on face_1 respecting the given
+                          // orientation:
+                          const unsigned int j
+                            = cell_to_rotated_face_index[fe.face_to_cell_index(target,
+                                                                               0, /* It doesn't really matter, just assume
+                                                                                 * we're on the first face...
+                                                                                 */
+                                                                               face_orientation, face_flip, face_rotation)];
+
+                          // if the two aren't already identity constrained (whichever way
+                          // around) or already identical (in case of rotated periodicity constraints),
+                          // then enter the constraint. otherwise there is nothing for us still to do
+                          bool enter_constraint = false;
+                          if (!constraint_matrix.is_constrained(dofs_1[j]))
+                            {
+                              if (dofs_2[i] != dofs_1[j])
+                                enter_constraint = true;
+                            }
+                          else //dofs_1[j] is constrained, is it identity or inverse constrained?
+                            {
+                              const std::vector<std::pair<types::global_dof_index, double > > *constraint_entries
+                                = constraint_matrix.get_constraint_entries(dofs_1[j]);
+                              if (constraint_entries->size()==1 && (*constraint_entries)[0].first == dofs_2[i])
+                                {
+                                  if ((is_identity_constrained && std::abs((*constraint_entries)[0].second-1) > eps) ||
+                                      (is_inverse_constrained && std::abs((*constraint_entries)[0].second+1) > eps))
+                                    {
+                                      //this pair of constraints means that both dofs have to be constrained to 0.
+                                      constraint_matrix.add_line(dofs_2[i]);
+                                    }
+                                }
+                              else
+                                enter_constraint = true;
+                            }
+
+                          if (enter_constraint)
+                            {
+                              constraint_matrix.add_line(dofs_2[i]);
+                              constraint_matrix.add_entry(dofs_2[i], dofs_1[j], is_identity_constrained?1.0:-1.0);
+                            }
+                        }
+                      else
+                        {
+                          // this is just a regular constraint. enter it piece by piece
+                          constraint_matrix.add_line(dofs_2[i]);
+                          for (unsigned int jj=0; jj<dofs_per_face; ++jj)
+                            {
+                              // Query the correct face_index on face_1 respecting the given
+                              // orientation:
+                              const unsigned int j =
+                                cell_to_rotated_face_index[fe.face_to_cell_index
+                                                           (jj, 0, face_orientation, face_flip, face_rotation)];
+
+                              // And finally constrain the two DoFs respecting component_mask:
+                              if (transformation(i,jj) != 0)
+                                constraint_matrix.add_entry(dofs_2[i], dofs_1[j],
+                                                            transformation(i,jj));
+                            }
+                        }
+                    }
+                }
+        }
+    }
+
+
+    // Internally used in make_periodicity_constraints.
+    //
+    // Build up a (possibly rotated) interpolation matrix that is used in
+    // set_periodicity_constraints with the help of user supplied matrix
+    // and first_vector_components.
+    template<int dim, int spacedim>
+    FullMatrix<double> compute_transformation(
+      const FiniteElement<dim, spacedim> &fe,
+      const FullMatrix<double>           &matrix,
+      const std::vector<unsigned int>    &first_vector_components)
+    {
+      Assert(matrix.m() == matrix.n(), ExcInternalError());
+
+      const unsigned int n_dofs_per_face = fe.dofs_per_face;
+
+      if (matrix.m() == n_dofs_per_face)
+        {
+          // In case of m == n == n_dofs_per_face the supplied matrix is already
+          // an interpolation matrix, so we use it directly:
+          return matrix;
+        }
+
+      if (first_vector_components.empty() && matrix.m() == 0)
+        {
+          // Just the identity matrix in case no rotation is specified:
+          return IdentityMatrix(n_dofs_per_face);
+        }
+
+      // The matrix describes a rotation and we have to build a
+      // transformation matrix, we assume that for a 0* rotation
+      // we would have to build the identity matrix
+
+      Assert(matrix.m() == (int)spacedim, ExcInternalError())
+
+      Quadrature<dim-1> quadrature (fe.get_unit_face_support_points());
+
+      // have an array that stores the location of each vector-dof tuple
+      // we want to rotate.
+      typedef std_cxx1x::array<unsigned int, spacedim> DoFTuple;
+
+      // start with a pristine interpolation matrix...
+      FullMatrix<double> transformation = IdentityMatrix(n_dofs_per_face);
+
+      for (unsigned int i=0; i < n_dofs_per_face; ++i)
+        {
+          std::vector<unsigned int>::const_iterator comp_it
+            = std::find (first_vector_components.begin(),
+                         first_vector_components.end(),
+                         fe.face_system_to_component_index(i).first);
+          if (comp_it != first_vector_components.end())
+            {
+              const unsigned int first_vector_component = *comp_it;
+
+              // find corresponding other components of vector
+              DoFTuple vector_dofs;
+              vector_dofs[0] = i;
+              unsigned int n_found = 1;
+
+              Assert(*comp_it + spacedim <= fe.n_components(),
+                     ExcMessage("Error: the finite element does not have enough components "
+                                "to define rotated periodic boundaries."));
+
+              for (unsigned int k = 0; k < n_dofs_per_face; ++k)
+                if ((k != i) &&
+                    (quadrature.point(k) == quadrature.point(i)) &&
+                    (fe.face_system_to_component_index(k).first >=
+                     first_vector_component) &&
+                    (fe.face_system_to_component_index(k).first <
+                     first_vector_component + spacedim))
+                  {
+                    vector_dofs[fe.face_system_to_component_index(k).first -
+                                first_vector_component]
+                      = k;
+                    n_found++;
+                    if (n_found==dim)
+                      break;
+                  }
+
+              // ... and rotate all dofs belonging to vector valued
+              // components that are selected by first_vector_components:
+              for (int i = 0; i < spacedim; ++i)
+                {
+                  transformation[vector_dofs[i]][vector_dofs[i]] = 0.;
+                  for (int j = 0; j < spacedim; ++j)
+                    transformation[vector_dofs[i]][vector_dofs[j]] = matrix[i][j];
+                }
+            }
+        }
+      return transformation;
+    }
+
+  } /*namespace*/
+
+
+  // Low level interface:
+
+
+  template <typename FaceIterator>
+  void
+  make_periodicity_constraints (const FaceIterator                          &face_1,
+                                const typename identity<FaceIterator>::type &face_2,
+                                dealii::ConstraintMatrix                    &constraint_matrix,
+                                const ComponentMask                         &component_mask,
+                                const bool                                   face_orientation,
+                                const bool                                   face_flip,
+                                const bool                                   face_rotation,
+                                const FullMatrix<double>                    &matrix,
+                                const std::vector<unsigned int>             &first_vector_components)
+  {
+    static const int dim = FaceIterator::AccessorType::dimension;
+    static const int spacedim = FaceIterator::AccessorType::space_dimension;
+
+    Assert( (dim != 1) ||
+            (face_orientation == true &&
+             face_flip == false &&
+             face_rotation == false),
+            ExcMessage ("The supplied orientation "
+                        "(face_orientation, face_flip, face_rotation) "
+                        "is invalid for 1D"));
+
+    Assert( (dim != 2) ||
+            (face_orientation == true &&
+             face_rotation == false),
+            ExcMessage ("The supplied orientation "
+                        "(face_orientation, face_flip, face_rotation) "
+                        "is invalid for 2D"));
+
+    Assert(face_1 != face_2,
+           ExcMessage ("face_1 and face_2 are equal! Cannot constrain DoFs "
+                       "on the very same face"));
+
+    Assert(face_1->at_boundary() && face_2->at_boundary(),
+           ExcMessage ("Faces for periodicity constraints must be on the "
+                       "boundary"));
+
+    Assert(matrix.m() == matrix.n(),
+           ExcMessage("The supplied (rotation or interpolation) matrix must "
+                      "be a square matrix"));
+
+    Assert(first_vector_components.empty() || matrix.m() == (int)spacedim,
+           ExcMessage ("first_vector_components is nonempty, so matrix must "
+                       "be a rotation matrix exactly of size spacedim"));
+
+#ifdef DEBUG
+    if (!face_1->has_children())
+      {
+        Assert(face_1->n_active_fe_indices() == 1, ExcInternalError());
+        const unsigned int n_dofs_per_face =
+          face_1->get_fe(face_1->nth_active_fe_index(0)).dofs_per_face;
+
+        Assert(matrix.m() == 0
+               || (first_vector_components.empty() && matrix.m() == n_dofs_per_face)
+               || (!first_vector_components.empty() && matrix.m() == (int)spacedim),
+               ExcMessage ("The matrix must have either size 0 or spacedim "
+                           "(if first_vector_components is nonempty) "
+                           "or the size must be equal to the # of DoFs on the face "
+                           "(if first_vector_components is empty)."));
+      }
+
+    if (!face_2->has_children())
+      {
+        Assert(face_2->n_active_fe_indices() == 1, ExcInternalError());
+        const unsigned int n_dofs_per_face =
+          face_2->get_fe(face_2->nth_active_fe_index(0)).dofs_per_face;
+
+        Assert(matrix.m() == 0
+               || (first_vector_components.empty() && matrix.m() == n_dofs_per_face)
+               || (!first_vector_components.empty() && matrix.m() == (int)spacedim),
+               ExcMessage ("The matrix must have either size 0 or spacedim "
+                           "(if first_vector_components is nonempty) "
+                           "or the size must be equal to the # of DoFs on the face "
+                           "(if first_vector_components is empty)."));
+      }
+#endif
+
+    // A lookup table on how to go through the child faces depending on the
+    // orientation:
+
+    static const int lookup_table_2d[2][2] =
+    {
+      //          flip:
+      {0, 1}, //  false
+      {1, 0}, //  true
+    };
+
+    static const int lookup_table_3d[2][2][2][4] =
+    {
+      //                    orientation flip  rotation
+      { { {0, 2, 1, 3}, //  false       false false
+          {2, 3, 0, 1}, //  false       false true
+        },
+        { {3, 1, 2, 0}, //  false       true  false
+          {1, 0, 3, 2}, //  false       true  true
+        },
+      },
+      { { {0, 1, 2, 3}, //  true        false false
+          {1, 3, 0, 2}, //  true        false true
+        },
+        { {3, 2, 1, 0}, //  true        true  false
+          {2, 0, 3, 1}, //  true        true  true
+        },
+      },
+    };
+
+    if (face_1->has_children() && face_2->has_children())
+      {
+        // In the case that both faces have children, we loop over all
+        // children and apply make_periodicty_constrains recursively:
+
+        Assert(face_1->n_children() == GeometryInfo<dim>::max_children_per_face &&
+               face_2->n_children() ==
+               GeometryInfo<dim>::max_children_per_face,
+               ExcNotImplemented());
+
+        for (unsigned int i = 0; i < GeometryInfo<dim>::max_children_per_face;
+             ++i)
+          {
+            // Lookup the index for the second face
+            unsigned int j;
+            switch (dim)
+              {
+              case 2:
+                j = lookup_table_2d[face_flip][i];
+                break;
+              case 3:
+                j = lookup_table_3d[face_orientation][face_flip][face_rotation][i];
+                break;
+              default:
+                AssertThrow(false, ExcNotImplemented());
+              }
+
+            make_periodicity_constraints (face_1->child(i),
+                                          face_2->child(j),
+                                          constraint_matrix,
+                                          component_mask,
+                                          face_orientation,
+                                          face_flip,
+                                          face_rotation,
+                                          matrix,
+                                          first_vector_components);
+          }
+      }
+    else
+      {
+        // Otherwise at least one of the two faces is active and
+        // we need to do some work and enter the constraints!
+
+        // The finite element that matters is the one on the active face:
+        const FiniteElement<dim,spacedim> &fe =
+          face_1->has_children()
+          ? face_2->get_fe(face_2->nth_active_fe_index(0))
+          : face_1->get_fe(face_1->nth_active_fe_index(0));
+
+        const unsigned int n_dofs_per_face = fe.dofs_per_face;
+
+        // Sometimes we just have nothing to do (for all finite elements,
+        // or systems which accidentally don't have any dofs on the
+        // boundary).
+        if (n_dofs_per_face == 0)
+          return;
+
+        const FullMatrix<double> transformation =
+          compute_transformation(fe, matrix, first_vector_components);
+
+        if (! face_2->has_children())
+          {
+            // Performance hack: We do not need to compute an inverse if
+            // the matrix is the identity matrix.
+            if (first_vector_components.empty() && matrix.m() == 0)
+              {
+                set_periodicity_constraints(face_2,
+                                            face_1,
+                                            transformation,
+                                            constraint_matrix,
+                                            component_mask,
+                                            face_orientation,
+                                            face_flip,
+                                            face_rotation);
+              }
+            else
+              {
+                FullMatrix<double> inverse(transformation.m());
+                inverse.invert(transformation);
+
+                set_periodicity_constraints(face_2,
+                                            face_1,
+                                            inverse,
+                                            constraint_matrix,
+                                            component_mask,
+                                            face_orientation,
+                                            face_flip,
+                                            face_rotation);
+              }
+          }
+        else
+          {
+            Assert(!face_1->has_children(), ExcInternalError());
+
+            // Important note:
+            // In 3D we have to take care of the fact that face_rotation
+            // gives the relative rotation of face_1 to face_2, i.e. we
+            // have to invert the rotation when constraining face_2 to
+            // face_1. Therefore face_flip has to be toggled if
+            // face_rotation is true:
+            // In case of inverted orientation, nothing has to be done.
+            set_periodicity_constraints(face_1,
+                                        face_2,
+                                        transformation,
+                                        constraint_matrix,
+                                        component_mask,
+                                        face_orientation,
+                                        face_orientation
+                                        ? face_rotation ^ face_flip
+                                        : face_flip,
+                                        face_rotation);
+          }
+      }
+  }
+
+
+
+  template<typename DoFHandlerType>
+  void
+  make_periodicity_constraints
+  (const std::vector<GridTools::PeriodicFacePair<typename DoFHandlerType::cell_iterator> >
+   &periodic_faces,
+   dealii::ConstraintMatrix        &constraint_matrix,
+   const ComponentMask             &component_mask,
+   const std::vector<unsigned int> &first_vector_components)
+  {
+    typedef std::vector<GridTools::PeriodicFacePair<typename DoFHandlerType::cell_iterator> >
+    FaceVector;
+    typename FaceVector::const_iterator it, end_periodic;
+    it = periodic_faces.begin();
+    end_periodic = periodic_faces.end();
+
+    // Loop over all periodic faces...
+    for (; it!=end_periodic; ++it)
+      {
+        typedef typename DoFHandlerType::face_iterator FaceIterator;
+        const FaceIterator face_1 = it->cell[0]->face(it->face_idx[0]);
+        const FaceIterator face_2 = it->cell[1]->face(it->face_idx[1]);
+
+        Assert(face_1->at_boundary() && face_2->at_boundary(),
+               ExcInternalError());
+
+        Assert (face_1 != face_2,
+                ExcInternalError());
+
+        // ... and apply the low level make_periodicity_constraints function to
+        // every matching pair:
+        make_periodicity_constraints(face_1,
+                                     face_2,
+                                     constraint_matrix,
+                                     component_mask,
+                                     it->orientation[0],
+                                     it->orientation[1],
+                                     it->orientation[2],
+                                     it->matrix,
+                                     first_vector_components);
+      }
+  }
+
+
+  // High level interface variants:
+
+
+  template<typename DoFHandlerType>
+  void
+  make_periodicity_constraints (const DoFHandlerType     &dof_handler,
+                                const types::boundary_id  b_id1,
+                                const types::boundary_id  b_id2,
+                                const int                 direction,
+                                dealii::ConstraintMatrix &constraint_matrix,
+                                const ComponentMask      &component_mask)
+  {
+    static const int space_dim = DoFHandlerType::space_dimension;
+    (void)space_dim;
+    Assert (0<=direction && direction<space_dim,
+            ExcIndexRange (direction, 0, space_dim));
+
+    Assert (b_id1 != b_id2,
+            ExcMessage ("The boundary indicators b_id1 and b_id2 must be"
+                        "different to denote different boundaries."));
+
+    std::vector<GridTools::PeriodicFacePair
+    <typename DoFHandlerType::cell_iterator> > matched_faces;
+
+    // Collect matching periodic cells on the coarsest level:
+    GridTools::collect_periodic_faces(dof_handler, b_id1, b_id2, direction,
+                                      matched_faces);
+
+    make_periodicity_constraints<DoFHandlerType>
+    (matched_faces, constraint_matrix, component_mask);
+  }
+
+
+
+  template<typename DoFHandlerType>
+  void
+  make_periodicity_constraints (const DoFHandlerType            &dof_handler,
+                                const types::boundary_id         b_id,
+                                const int                        direction,
+                                dealii::ConstraintMatrix        &constraint_matrix,
+                                const ComponentMask             &component_mask)
+  {
+    static const int dim = DoFHandlerType::dimension;
+    static const int space_dim = DoFHandlerType::space_dimension;
+    (void)dim;
+    (void)space_dim;
+
+    Assert (0<=direction && direction<space_dim,
+            ExcIndexRange (direction, 0, space_dim));
+
+    Assert(dim == space_dim,
+           ExcNotImplemented());
+
+    std::vector<GridTools::PeriodicFacePair
+    <typename DoFHandlerType::cell_iterator> > matched_faces;
+
+    // Collect matching periodic cells on the coarsest level:
+    GridTools::collect_periodic_faces(dof_handler, b_id, direction,
+                                      matched_faces);
+
+    make_periodicity_constraints<DoFHandlerType>
+    (matched_faces, constraint_matrix, component_mask);
+  }
+
+
+
+  namespace internal
+  {
+    namespace Assembler
+    {
+      struct Scratch {};
+
+
+      template <int dim,int spacedim>
+      struct CopyData
+      {
+        unsigned int                         dofs_per_cell;
+        std::vector<types::global_dof_index> parameter_dof_indices;
+#ifdef DEAL_II_WITH_MPI
+        std::vector<dealii::parallel::distributed::Vector<double> > global_parameter_representation;
+#else
+        std::vector<dealii::Vector<double> > global_parameter_representation;
+#endif
+      };
+    }
+
+    namespace
+    {
+      /**
+       * This is a function that is called by the _2 function and that
+       * operates on one cell only. It is worked in parallel if
+       * multhithreading is available.
+       */
+      template <int dim, int spacedim>
+      void compute_intergrid_weights_3 (
+        const typename dealii::DoFHandler<dim,spacedim>::active_cell_iterator &cell,
+        const Assembler::Scratch &,
+        Assembler::CopyData<dim,spacedim>                                     &copy_data,
+        const unsigned int                                                     coarse_component,
+        const FiniteElement<dim,spacedim>                                     &coarse_fe,
+        const InterGridMap<dealii::DoFHandler<dim,spacedim> >                 &coarse_to_fine_grid_map,
+        const std::vector<dealii::Vector<double> >                            &parameter_dofs)
+      {
+        // for each cell on the parameter grid: find out which degrees of
+        // freedom on the fine grid correspond in which way to the degrees
+        // of freedom on the parameter grid
+        //
+        // since for continuous FEs some dofs exist on more than one cell,
+        // we have to track which ones were already visited. the problem is
+        // that if we visit a dof first on one cell and compute its weight
+        // with respect to some global dofs to be non-zero, and later visit
+        // the dof again on another cell and (since we are on another cell)
+        // recompute the weights with respect to the same dofs as above to
+        // be zero now, we have to preserve them. we therefore overwrite
+        // all weights if they are nonzero and do not enforce zero weights
+        // since that might be only due to the fact that we are on another
+        // cell.
+        //
+        // example:
+        // coarse grid
+        //  |     |     |
+        //  *-----*-----*
+        //  | cell|cell |
+        //  |  1  |  2  |
+        //  |     |     |
+        //  0-----1-----*
+        //
+        // fine grid
+        //  |  |  |  |  |
+        //  *--*--*--*--*
+        //  |  |  |  |  |
+        //  *--*--*--*--*
+        //  |  |  |  |  |
+        //  *--x--y--*--*
+        //
+        // when on cell 1, we compute the weights of dof 'x' to be 1/2 from
+        // parameter dofs 0 and 1, respectively. however, when later we are
+        // on cell 2, we again compute the prolongation of shape function 1
+        // restricted to cell 2 to the globla grid and find that the weight
+        // of global dof 'x' now is zero. however, we should not overwrite
+        // the old value.
+        //
+        // we therefore always only set nonzero values. why adding up is
+        // not useful: dof 'y' would get weight 1 from parameter dof 1 on
+        // both cells 1 and 2, but the correct weight is nevertheless only
+        // 1.
+
+        // vector to hold the representation of a single degree of freedom
+        // on the coarse grid (for the selected fe) on the fine grid
+
+        copy_data.dofs_per_cell = coarse_fe.dofs_per_cell;
+        copy_data.parameter_dof_indices.resize(copy_data.dofs_per_cell);
+
+        // get the global indices of the parameter dofs on this
+        // parameter grid cell
+        cell->get_dof_indices (copy_data.parameter_dof_indices);
+
+        // loop over all dofs on this cell and check whether they are
+        // interesting for us
+        for (unsigned int local_dof=0; local_dof<copy_data.dofs_per_cell; ++local_dof)
+          if (coarse_fe.system_to_component_index(local_dof).first
+              ==
+              coarse_component)
+            {
+              // the how-many-th parameter is this on this cell?
+              const unsigned int local_parameter_dof
+                = coarse_fe.system_to_component_index(local_dof).second;
+
+              copy_data.global_parameter_representation[local_parameter_dof] = 0.;
+
+              // distribute the representation of
+              // @p{local_parameter_dof} on the parameter grid cell
+              // @p{cell} to the global data space
+              coarse_to_fine_grid_map[cell]->
+              set_dof_values_by_interpolation (parameter_dofs[local_parameter_dof],
+                                               copy_data.global_parameter_representation[local_parameter_dof]);
+            }
+      }
+
+
+
+      /**
+       * This is a function that is called by the _2 function and that
+       * operates on one cell only. It is worked in parallel if
+       * multhithreading is available.
+       */
+      template <int dim,int spacedim>
+      void copy_intergrid_weights_3(const Assembler::CopyData<dim,spacedim>                &copy_data,
+                                    const unsigned int                                      coarse_component,
+                                    const FiniteElement<dim,spacedim>                      &coarse_fe,
+                                    const std::vector<types::global_dof_index>             &weight_mapping,
+                                    const bool                                              is_called_in_parallel,
+                                    std::vector<std::map<types::global_dof_index, float> > &weights)
+      {
+        unsigned int pos = 0;
+        for (unsigned int local_dof=0; local_dof<copy_data.dofs_per_cell; ++local_dof)
+          if (coarse_fe.system_to_component_index(local_dof).first
+              ==
+              coarse_component)
+            {
+              // now that we've got the global representation of each
+              // parameter dof, we've only got to clobber the non-zero
+              // entries in that vector and store the result
+              //
+              // what we have learned: if entry @p{i} of the global
+              // vector holds the value @p{v[i]}, then this is the
+              // weight with which the present dof contributes to
+              // @p{i}. there may be several such @p{i}s and their
+              // weights' sum should be one. Then, @p{v[i]} should be
+              // equal to @p{\sum_j w_{ij} p[j]} with @p{p[j]} be the
+              // values of the degrees of freedom on the coarse grid.
+              // we can thus compute constraints which link the degrees
+              // of freedom @p{v[i]} on the fine grid to those on the
+              // coarse grid, @p{p[j]}. Now to use these as real
+              // constraints, rather than as additional equations, we
+              // have to identify representants among the @p{i} for
+              // each @p{j}. this will be done by simply taking the
+              // first @p{i} for which @p{w_{ij}==1}.
+              //
+              // guard modification of the weights array by a Mutex.
+              // since it should happen rather rarely that there are
+              // several threads operating on different intergrid
+              // weights, have only one mutex for all of them
+              for (types::global_dof_index i=0; i<copy_data.global_parameter_representation[pos].size();
+                   ++i)
+                // set this weight if it belongs to a parameter dof.
+                if (weight_mapping[i] != numbers::invalid_dof_index)
+                  {
+                    // only overwrite old value if not by zero
+                    if (copy_data.global_parameter_representation[pos](i) != 0)
+                      {
+                        const types::global_dof_index wi = copy_data.parameter_dof_indices[local_dof],
+                                                      wj = weight_mapping[i];
+                        weights[wi][wj] = copy_data.global_parameter_representation[pos](i);
+                      }
+                  }
+                else if (!is_called_in_parallel)
+                  {
+                    // Note that when this function operates with distributed
+                    // fine grid, this assertion is switched off since the
+                    // condition does not necessarily hold
+                    Assert (copy_data.global_parameter_representation[pos](i) == 0,
+                            ExcInternalError());
+                  }
+
+              ++pos;
+            }
+
+      }
+
+
+
+      /**
+       * This is a helper function that is used in the computation of
+       * intergrid constraints. See the function for a thorough description
+       * of how it works.
+       */
+      template <int dim, int spacedim>
+      void
+      compute_intergrid_weights_2 (
+        const dealii::DoFHandler<dim,spacedim>                &coarse_grid,
+        const unsigned int                                     coarse_component,
+        const InterGridMap<dealii::DoFHandler<dim,spacedim> > &coarse_to_fine_grid_map,
+        const std::vector<dealii::Vector<double> >            &parameter_dofs,
+        const std::vector<types::global_dof_index>            &weight_mapping,
+        std::vector<std::map<types::global_dof_index,float> > &weights)
+      {
+        Assembler::Scratch scratch;
+        Assembler::CopyData<dim,spacedim> copy_data;
+
+        unsigned int n_interesting_dofs = 0;
+        for (unsigned int local_dof=0; local_dof<coarse_grid.get_fe().dofs_per_cell; ++local_dof)
+          if (coarse_grid.get_fe().system_to_component_index(local_dof).first
+              ==
+              coarse_component)
+            ++n_interesting_dofs;
+
+        copy_data.global_parameter_representation.resize(n_interesting_dofs);
+
+        bool is_called_in_parallel = false;
+        for (size_t i=0; i<copy_data.global_parameter_representation.size(); ++i)
+          {
+#ifdef DEAL_II_WITH_MPI
+            MPI_Comm communicator = MPI_COMM_SELF;
+            try
+              {
+                const typename dealii::parallel::Triangulation<dim, spacedim> &tria =
+                  dynamic_cast<const typename dealii::parallel::Triangulation<dim, spacedim>&>
+                  (coarse_to_fine_grid_map.get_destination_grid().get_triangulation());
+                communicator = tria.get_communicator ();
+                is_called_in_parallel = true;
+              }
+            catch (std::bad_cast &exp)
+              {
+                // Nothing bad happened: the user used serial Triangulation
+              }
+
+            IndexSet locally_owned_dofs, locally_relevant_dofs;
+            DoFTools::extract_locally_owned_dofs
+            (coarse_to_fine_grid_map.get_destination_grid (), locally_owned_dofs);
+            DoFTools::extract_locally_relevant_dofs
+            (coarse_to_fine_grid_map.get_destination_grid (), locally_relevant_dofs);
+
+            copy_data.global_parameter_representation[i].reinit
+            (locally_owned_dofs, locally_relevant_dofs, communicator);
+#else
+            const types::global_dof_index n_fine_dofs = weight_mapping.size();
+            copy_data.global_parameter_representation[i].reinit (n_fine_dofs);
+#endif
+          }
+
+        WorkStream::run(coarse_grid.begin_active(),
+                        coarse_grid.end(),
+                        std_cxx11::bind(&compute_intergrid_weights_3<dim,spacedim>,
+                                        std_cxx11::_1,
+                                        std_cxx11::_2,
+                                        std_cxx11::_3,
+                                        coarse_component,
+                                        std_cxx11::cref(coarse_grid.get_fe()),
+                                        std_cxx11::cref(coarse_to_fine_grid_map),
+                                        std_cxx11::cref(parameter_dofs)),
+                        std_cxx11::bind(&copy_intergrid_weights_3<dim,spacedim>,
+                                        std_cxx11::_1,
+                                        coarse_component,
+                                        std_cxx11::cref(coarse_grid.get_fe()),
+                                        std_cxx11::cref(weight_mapping),
+                                        is_called_in_parallel,
+                                        std_cxx11::ref(weights)),
+                        scratch,
+                        copy_data);
+
+#ifdef DEAL_II_WITH_MPI
+        for (size_t i=0; i<copy_data.global_parameter_representation.size(); ++i)
+          copy_data.global_parameter_representation[i].update_ghost_values ();
+#endif
+      }
+
+
+
+      /**
+       * This is a helper function that is used in the computation of
+       * integrid constraints. See the function for a thorough description
+       * of how it works.
+       */
+      template <int dim, int spacedim>
+      unsigned int
+      compute_intergrid_weights_1 (
+        const dealii::DoFHandler<dim,spacedim>              &coarse_grid,
+        const unsigned int                  coarse_component,
+        const dealii::DoFHandler<dim,spacedim>              &fine_grid,
+        const unsigned int                  fine_component,
+        const InterGridMap<dealii::DoFHandler<dim,spacedim> > &coarse_to_fine_grid_map,
+        std::vector<std::map<types::global_dof_index, float> > &weights,
+        std::vector<types::global_dof_index>                   &weight_mapping)
+      {
+        // aliases to the finite elements used by the dof handlers:
+        const FiniteElement<dim,spacedim> &coarse_fe = coarse_grid.get_fe(),
+                                           &fine_fe   = fine_grid.get_fe();
+
+        // global numbers of dofs
+        const types::global_dof_index n_coarse_dofs = coarse_grid.n_dofs(),
+                                      n_fine_dofs   = fine_grid.n_dofs();
+
+        // local numbers of dofs
+        const unsigned int fine_dofs_per_cell   = fine_fe.dofs_per_cell;
+
+        // alias the number of dofs per cell belonging to the
+        // coarse_component which is to be the restriction of the fine
+        // grid:
+        const unsigned int coarse_dofs_per_cell_component
+          = coarse_fe.base_element(coarse_fe.component_to_base_index(coarse_component).first).dofs_per_cell;
+
+
+        // Try to find out whether the grids stem from the same coarse
+        // grid. This is a rather crude test, but better than nothing
+        Assert (coarse_grid.get_triangulation().n_cells(0) == fine_grid.get_triangulation().n_cells(0),
+                ExcGridsDontMatch());
+
+        // check whether the map correlates the right objects
+        Assert (&coarse_to_fine_grid_map.get_source_grid() == &coarse_grid,
+                ExcGridsDontMatch ());
+        Assert (&coarse_to_fine_grid_map.get_destination_grid() == &fine_grid,
+                ExcGridsDontMatch ());
+
+
+        // check whether component numbers are valid
+        AssertIndexRange (coarse_component,coarse_fe.n_components());
+        AssertIndexRange (fine_component, fine_fe.n_components());
+
+        // check whether respective finite elements are equal
+        Assert (coarse_fe.base_element (coarse_fe.component_to_base_index(coarse_component).first)
+                ==
+                fine_fe.base_element (fine_fe.component_to_base_index(fine_component).first),
+                ExcFiniteElementsDontMatch());
+
+#ifdef DEBUG
+        // if in debug mode, check whether the coarse grid is indeed
+        // coarser everywhere than the fine grid
+        for (typename dealii::DoFHandler<dim,spacedim>::active_cell_iterator
+             cell=coarse_grid.begin_active();
+             cell != coarse_grid.end(); ++cell)
+          Assert (cell->level() <= coarse_to_fine_grid_map[cell]->level(),
+                  ExcGridNotCoarser());
+#endif
+
+        /*
+         * From here on: the term `parameter' refers to the selected
+         * component on the coarse grid and its analogon on the fine grid.
+         * The naming of variables containing this term is due to the fact
+         * that `selected_component' is longer, but also due to the fact
+         * that the code of this function was initially written for a
+         * program where the component which we wanted to match between
+         * grids was actually the `parameter' variable.
+         *
+         * Likewise, the terms `parameter grid' and `state grid' refer to
+         * the coarse and fine grids, respectively.
+         *
+         * Changing the names of variables would in principle be a good
+         * idea, but would not make things simpler and would be another
+         * source of errors. If anyone feels like doing so: patches would
+         * be welcome!
+         */
+
+
+
+        // set up vectors of cell-local data; each vector represents one
+        // degree of freedom of the coarse-grid variable in the fine-grid
+        // element
+        std::vector<dealii::Vector<double> >
+        parameter_dofs (coarse_dofs_per_cell_component,
+                        dealii::Vector<double>(fine_dofs_per_cell));
+        // for each coarse dof: find its position within the fine element
+        // and set this value to one in the respective vector (all other
+        // values are zero by construction)
+        for (unsigned int local_coarse_dof=0;
+             local_coarse_dof<coarse_dofs_per_cell_component;
+             ++local_coarse_dof)
+          for (unsigned int fine_dof=0; fine_dof<fine_fe.dofs_per_cell; ++fine_dof)
+            if (fine_fe.system_to_component_index(fine_dof)
+                ==
+                std::make_pair (fine_component, local_coarse_dof))
+              {
+                parameter_dofs[local_coarse_dof](fine_dof) = 1.;
+                break;
+              };
+
+
+        // find out how many DoFs there are on the grids belonging to the
+        // components we want to match
+        unsigned int n_parameters_on_fine_grid=0;
+        if (true)
+          {
+            // have a flag for each dof on the fine grid and set it to true
+            // if this is an interesting dof. finally count how many true's
+            // there
+            std::vector<bool> dof_is_interesting (fine_grid.n_dofs(), false);
+            std::vector<types::global_dof_index>  local_dof_indices (fine_fe.dofs_per_cell);
+
+            for (typename dealii::DoFHandler<dim,spacedim>::active_cell_iterator
+                 cell=fine_grid.begin_active();
+                 cell!=fine_grid.end(); ++cell)
+              if (cell->is_locally_owned ())
+                {
+                  cell->get_dof_indices (local_dof_indices);
+                  for (unsigned int i=0; i<fine_fe.dofs_per_cell; ++i)
+                    if (fine_fe.system_to_component_index(i).first == fine_component)
+                      dof_is_interesting[local_dof_indices[i]] = true;
+                };
+
+            n_parameters_on_fine_grid = std::count (dof_is_interesting.begin(),
+                                                    dof_is_interesting.end(),
+                                                    true);
+          };
+
+
+        // set up the weights mapping
+        weights.clear ();
+        weights.resize (n_coarse_dofs);
+
+        weight_mapping.clear ();
+        weight_mapping.resize (n_fine_dofs, numbers::invalid_dof_index);
+
+        if (true)
+          {
+            std::vector<types::global_dof_index> local_dof_indices(fine_fe.dofs_per_cell);
+            unsigned int next_free_index=0;
+            for (typename dealii::DoFHandler<dim,spacedim>::active_cell_iterator
+                 cell=fine_grid.begin_active();
+                 cell != fine_grid.end(); ++cell)
+              if (cell->is_locally_owned ())
+                {
+                  cell->get_dof_indices (local_dof_indices);
+                  for (unsigned int i=0; i<fine_fe.dofs_per_cell; ++i)
+                    // if this DoF is a parameter dof and has not yet been
+                    // numbered, then do so
+                    if ((fine_fe.system_to_component_index(i).first == fine_component) &&
+                        (weight_mapping[local_dof_indices[i]] == numbers::invalid_dof_index))
+                      {
+                        weight_mapping[local_dof_indices[i]] = next_free_index;
+                        ++next_free_index;
+                      };
+                };
+
+            Assert (next_free_index == n_parameters_on_fine_grid,
+                    ExcInternalError());
+          };
+
+
+        // for each cell on the parameter grid: find out which degrees of
+        // freedom on the fine grid correspond in which way to the degrees
+        // of freedom on the parameter grid
+        //
+        // do this in a separate function to allow for multithreading
+        // there. see this function also if you want to read more
+        // information on the algorithm used.
+        compute_intergrid_weights_2 (coarse_grid, coarse_component,
+                                     coarse_to_fine_grid_map, parameter_dofs,
+                                     weight_mapping, weights);
+
+
+        // ok, now we have all weights for each dof on the fine grid. if in
+        // debug mode lets see if everything went smooth, i.e. each dof has
+        // sum of weights one
+        //
+        // in other words this means that if the sum of all shape functions
+        // on the parameter grid is one (which is always the case), then
+        // the representation on the state grid should be as well (division
+        // of unity)
+        //
+        // if the parameter grid has more than one component, then the
+        // respective dofs of the other components have sum of weights
+        // zero, of course. we do not explicitly ask which component a dof
+        // belongs to, but this at least tests some errors
+#ifdef DEBUG
+        for (unsigned int col=0; col<n_parameters_on_fine_grid; ++col)
+          {
+            double sum=0;
+            for (types::global_dof_index row=0; row<n_coarse_dofs; ++row)
+              if (weights[row].find(col) != weights[row].end())
+                sum += weights[row][col];
+            Assert ((std::fabs(sum-1) < 1.e-12) ||
+                    ((coarse_fe.n_components()>1) && (sum==0)), ExcInternalError());
+          };
+#endif
+
+
+        return n_parameters_on_fine_grid;
+      }
+
+
+    }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  compute_intergrid_constraints (
+    const DoFHandler<dim,spacedim>              &coarse_grid,
+    const unsigned int                  coarse_component,
+    const DoFHandler<dim,spacedim>              &fine_grid,
+    const unsigned int                  fine_component,
+    const InterGridMap<DoFHandler<dim,spacedim> > &coarse_to_fine_grid_map,
+    ConstraintMatrix                   &constraints)
+  {
+    // store the weights with which a dof on the parameter grid contributes
+    // to a dof on the fine grid. see the long doc below for more info
+    //
+    // allocate as many rows as there are parameter dofs on the coarse grid
+    // and as many columns as there are parameter dofs on the fine grid.
+    //
+    // weight_mapping is used to map the global (fine grid) parameter dof
+    // indices to the columns
+    //
+    // in the original implementation, the weights array was actually of
+    // FullMatrix<double> type. this wasted huge amounts of memory, but was
+    // fast. nonetheless, since the memory consumption was quadratic in the
+    // number of degrees of freedom, this was not very practical, so we now
+    // use a vector of rows of the matrix, and in each row a vector of
+    // pairs (colnum,value). this seems like the best tradeoff between
+    // memory and speed, as it is now linear in memory and still fast
+    // enough.
+    //
+    // to save some memory and since the weights are usually (negative)
+    // powers of 2, we choose the value type of the matrix to be @p{float}
+    // rather than @p{double}.
+    std::vector<std::map<types::global_dof_index, float> > weights;
+
+    // this is this mapping. there is one entry for each dof on the fine
+    // grid; if it is a parameter dof, then its value is the column in
+    // weights for that parameter dof, if it is any other dof, then its
+    // value is -1, indicating an error
+    std::vector<types::global_dof_index> weight_mapping;
+
+    const unsigned int n_parameters_on_fine_grid
+      = internal::compute_intergrid_weights_1 (coarse_grid, coarse_component,
+                                               fine_grid, fine_component,
+                                               coarse_to_fine_grid_map,
+                                               weights, weight_mapping);
+    (void)n_parameters_on_fine_grid;
+
+    // global numbers of dofs
+    const types::global_dof_index n_coarse_dofs = coarse_grid.n_dofs(),
+                                  n_fine_dofs   = fine_grid.n_dofs();
+
+
+    // get an array in which we store which dof on the coarse grid is a
+    // parameter and which is not
+    std::vector<bool> coarse_dof_is_parameter (coarse_grid.n_dofs());
+    if (true)
+      {
+        std::vector<bool> mask (coarse_grid.get_fe().n_components(),
+                                false);
+        mask[coarse_component] = true;
+        extract_dofs (coarse_grid, ComponentMask(mask), coarse_dof_is_parameter);
+      }
+
+    // now we know that the weights in each row constitute a constraint.
+    // enter this into the constraints object
+    //
+    // first task: for each parameter dof on the parameter grid, find a
+    // representant on the fine, global grid. this is possible since we use
+    // conforming finite element. we take this representant to be the first
+    // element in this row with weight identical to one. the representant
+    // will become an unconstrained degree of freedom, while all others
+    // will be constrained to this dof (and possibly others)
+    std::vector<types::global_dof_index> representants(n_coarse_dofs, numbers::invalid_dof_index);
+    for (types::global_dof_index parameter_dof=0; parameter_dof<n_coarse_dofs;
+         ++parameter_dof)
+      if (coarse_dof_is_parameter[parameter_dof] == true)
+        {
+          // if this is the line of a parameter dof on the coarse grid,
+          // then it should have at least one dependent node on the fine
+          // grid
+          Assert (weights[parameter_dof].size() > 0, ExcInternalError());
+
+          // find the column where the representant is mentioned
+          std::map<types::global_dof_index,float>::const_iterator i = weights[parameter_dof].begin();
+          for (; i!=weights[parameter_dof].end(); ++i)
+            if (i->second == 1)
+              break;
+          Assert (i!=weights[parameter_dof].end(), ExcInternalError());
+          const types::global_dof_index column = i->first;
+
+          // now we know in which column of weights the representant is,
+          // but we don't know its global index. get it using the inverse
+          // operation of the weight_mapping
+          types::global_dof_index global_dof=0;
+          for (; global_dof<weight_mapping.size(); ++global_dof)
+            if (weight_mapping[global_dof] == static_cast<types::global_dof_index>(column))
+              break;
+          Assert (global_dof < weight_mapping.size(), ExcInternalError());
+
+          // now enter the representants global index into our list
+          representants[parameter_dof] = global_dof;
+        }
+      else
+        {
+          // consistency check: if this is no parameter dof on the coarse
+          // grid, then the respective row must be empty!
+          Assert (weights[parameter_dof].size() == 0, ExcInternalError());
+        };
+
+
+
+    // note for people that want to optimize this function: the largest
+    // part of the computing time is spent in the following, rather
+    // innocent block of code. basically, it must be the
+    // ConstraintMatrix::add_entry call which takes the bulk of the time,
+    // but it is not known to the author how to make it faster...
+    std::vector<std::pair<types::global_dof_index,double> > constraint_line;
+    for (types::global_dof_index global_dof=0; global_dof<n_fine_dofs; ++global_dof)
+      if (weight_mapping[global_dof] != numbers::invalid_dof_index)
+        // this global dof is a parameter dof, so it may carry a constraint
+        // note that for each global dof, the sum of weights shall be one,
+        // so we can find out whether this dof is constrained in the
+        // following way: if the only weight in this row is a one, and the
+        // representant for the parameter dof of the line in which this one
+        // is is the present dof, then we consider this dof to be
+        // unconstrained. otherwise, all other dofs are constrained
+        {
+          const types::global_dof_index col = weight_mapping[global_dof];
+          Assert (col < n_parameters_on_fine_grid, ExcInternalError());
+
+          types::global_dof_index first_used_row=0;
+
+          {
+            Assert (weights.size() > 0, ExcInternalError());
+            std::map<types::global_dof_index,float>::const_iterator
+            col_entry = weights[0].end();
+            for (; first_used_row<n_coarse_dofs; ++first_used_row)
+              {
+                col_entry = weights[first_used_row].find(col);
+                if (col_entry != weights[first_used_row].end())
+                  break;
+              }
+
+            Assert (col_entry != weights[first_used_row].end(), ExcInternalError());
+
+            if ((col_entry->second == 1) &&
+                (representants[first_used_row] == global_dof))
+              // dof unconstrained or constrained to itself (in case this
+              // cell is mapped to itself, rather than to children of
+              // itself)
+              continue;
+          }
+
+
+          // otherwise enter all constraints
+          constraints.add_line (global_dof);
+
+          constraint_line.clear ();
+          for (types::global_dof_index row=first_used_row; row<n_coarse_dofs; ++row)
+            {
+              const std::map<types::global_dof_index,float>::const_iterator
+              j = weights[row].find(col);
+              if ((j != weights[row].end()) && (j->second != 0))
+                constraint_line.push_back (std::pair<types::global_dof_index,double>(representants[row],
+                                           j->second));
+            };
+
+          constraints.add_entries (global_dof, constraint_line);
+        };
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  compute_intergrid_transfer_representation (
+    const DoFHandler<dim,spacedim>              &coarse_grid,
+    const unsigned int                  coarse_component,
+    const DoFHandler<dim,spacedim>              &fine_grid,
+    const unsigned int                  fine_component,
+    const InterGridMap<DoFHandler<dim,spacedim> > &coarse_to_fine_grid_map,
+    std::vector<std::map<types::global_dof_index, float> > &transfer_representation)
+  {
+    // store the weights with which a dof on the parameter grid contributes
+    // to a dof on the fine grid. see the long doc below for more info
+    //
+    // allocate as many rows as there are parameter dofs on the coarse grid
+    // and as many columns as there are parameter dofs on the fine grid.
+    //
+    // weight_mapping is used to map the global (fine grid) parameter dof
+    // indices to the columns
+    //
+    // in the original implementation, the weights array was actually of
+    // FullMatrix<double> type. this wasted huge amounts of memory, but was
+    // fast. nonetheless, since the memory consumption was quadratic in the
+    // number of degrees of freedom, this was not very practical, so we now
+    // use a vector of rows of the matrix, and in each row a vector of
+    // pairs (colnum,value). this seems like the best tradeoff between
+    // memory and speed, as it is now linear in memory and still fast
+    // enough.
+    //
+    // to save some memory and since the weights are usually (negative)
+    // powers of 2, we choose the value type of the matrix to be @p{float}
+    // rather than @p{double}.
+    std::vector<std::map<types::global_dof_index, float> > weights;
+
+    // this is this mapping. there is one entry for each dof on the fine
+    // grid; if it is a parameter dof, then its value is the column in
+    // weights for that parameter dof, if it is any other dof, then its
+    // value is -1, indicating an error
+    std::vector<types::global_dof_index> weight_mapping;
+
+    internal::compute_intergrid_weights_1 (coarse_grid, coarse_component,
+                                           fine_grid, fine_component,
+                                           coarse_to_fine_grid_map,
+                                           weights, weight_mapping);
+
+    // now compute the requested representation
+    const types::global_dof_index n_global_parm_dofs
+      = std::count_if (weight_mapping.begin(), weight_mapping.end(),
+                       std::bind2nd (std::not_equal_to<types::global_dof_index> (), numbers::invalid_dof_index));
+
+    // first construct the inverse mapping of weight_mapping
+    std::vector<types::global_dof_index> inverse_weight_mapping (n_global_parm_dofs,
+        DoFHandler<dim,spacedim>::invalid_dof_index);
+    for (types::global_dof_index i=0; i<weight_mapping.size(); ++i)
+      {
+        const types::global_dof_index parameter_dof = weight_mapping[i];
+        // if this global dof is a parameter
+        if (parameter_dof != numbers::invalid_dof_index)
+          {
+            Assert (parameter_dof < n_global_parm_dofs, ExcInternalError());
+            Assert ((inverse_weight_mapping[parameter_dof] == DoFHandler<dim,spacedim>::invalid_dof_index),
+                    ExcInternalError());
+
+            inverse_weight_mapping[parameter_dof] = i;
+          };
+      };
+
+    // next copy over weights array and replace respective numbers
+    const types::global_dof_index n_rows = weight_mapping.size();
+
+    transfer_representation.clear ();
+    transfer_representation.resize (n_rows);
+
+    const types::global_dof_index n_coarse_dofs = coarse_grid.n_dofs();
+    for (types::global_dof_index i=0; i<n_coarse_dofs; ++i)
+      {
+        std::map<types::global_dof_index, float>::const_iterator j = weights[i].begin();
+        for (; j!=weights[i].end(); ++j)
+          {
+            const types::global_dof_index p = inverse_weight_mapping[j->first];
+            Assert (p<n_rows, ExcInternalError());
+
+            transfer_representation[p][i] = j->second;
+          };
+      };
+  }
+
+
+
+  template <int dim, int spacedim, template <int, int> class DoFHandlerType>
+  void
+  make_zero_boundary_constraints (const DoFHandlerType<dim, spacedim> &dof,
+                                  const types::boundary_id             boundary_id,
+                                  ConstraintMatrix                    &zero_boundary_constraints,
+                                  const ComponentMask                 &component_mask)
+  {
+    Assert (component_mask.represents_n_components(dof.get_fe().n_components()),
+            ExcMessage ("The number of components in the mask has to be either "
+                        "zero or equal to the number of components in the finite "
+                        "element."));
+
+    const unsigned int n_components = DoFTools::n_components (dof);
+
+    Assert (component_mask.n_selected_components(n_components) > 0,
+            ComponentMask::ExcNoComponentSelected());
+
+    // a field to store the indices on the face
+    std::vector<types::global_dof_index> face_dofs;
+    face_dofs.reserve (max_dofs_per_face(dof));
+    // a field to store the indices on the cell
+    std::vector<types::global_dof_index> cell_dofs;
+    cell_dofs.reserve (max_dofs_per_cell(dof));
+
+    typename DoFHandlerType<dim,spacedim>::active_cell_iterator
+    cell = dof.begin_active(),
+    endc = dof.end();
+    for (; cell!=endc; ++cell)
+      if (!cell->is_artificial()
+          &&
+          cell->at_boundary ())
+        {
+          const FiniteElement<dim,spacedim> &fe = cell->get_fe();
+
+          // get global indices of dofs on the cell
+          cell_dofs.resize (fe.dofs_per_cell);
+          cell->get_dof_indices (cell_dofs);
+
+          for (unsigned int face_no = 0; face_no < GeometryInfo<dim>::faces_per_cell;
+               ++face_no)
+            {
+              const typename DoFHandlerType<dim,spacedim>::face_iterator face = cell->face(face_no);
+
+              // if face is on the boundary and satisfies the correct
+              // boundary id property
+              if (face->at_boundary ()
+                  &&
+                  ((boundary_id == numbers::invalid_boundary_id)
+                   ||
+                   (face->boundary_id() == boundary_id)))
+                {
+                  // get indices and physical location on this face
+                  face_dofs.resize (fe.dofs_per_face);
+                  face->get_dof_indices (face_dofs, cell->active_fe_index());
+
+                  // enter those dofs into the list that match the component
+                  // signature.
+                  for (unsigned int i=0; i<face_dofs.size(); ++i)
+                    {
+                      // Find out if a dof has a contribution in this
+                      // component, and if so, add it to the list
+                      const std::vector<types::global_dof_index>::iterator it_index_on_cell
+                        = std::find (cell_dofs.begin(), cell_dofs.end(), face_dofs[i]);
+                      Assert (it_index_on_cell != cell_dofs.end(), ExcInvalidIterator());
+                      const unsigned int index_on_cell = std::distance(cell_dofs.begin(),
+                                                                       it_index_on_cell);
+                      const ComponentMask &nonzero_component_array
+                        = cell->get_fe().get_nonzero_components (index_on_cell);
+                      bool nonzero = false;
+                      for (unsigned int c=0; c<n_components; ++c)
+                        if (nonzero_component_array[c] && component_mask[c])
+                          {
+                            nonzero = true;
+                            break;
+                          }
+
+                      if (nonzero)
+                        zero_boundary_constraints.add_line (face_dofs[i]);
+                    }
+                }
+            }
+        }
+  }
+
+
+
+  template <int dim, int spacedim, template <int, int> class DoFHandlerType>
+  void
+  make_zero_boundary_constraints (const DoFHandlerType<dim, spacedim> &dof,
+                                  ConstraintMatrix                    &zero_boundary_constraints,
+                                  const ComponentMask                 &component_mask)
+  {
+    make_zero_boundary_constraints(dof, numbers::invalid_boundary_id,
+                                   zero_boundary_constraints, component_mask);
+  }
+
+
+} // end of namespace DoFTools
+
+
+
+// explicit instantiations
+
+#include "dof_tools_constraints.inst"
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_tools_constraints.inst.in b/source/dofs/dof_tools_constraints.inst.in
new file mode 100644
index 0000000..b652ebd
--- /dev/null
+++ b/source/dofs/dof_tools_constraints.inst.in
@@ -0,0 +1,130 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+
+for (DH : DOFHANDLERS; deal_II_dimension : DIMENSIONS)
+{
+  template
+  void
+  DoFTools::make_hanging_node_constraints (const DH &dof_handler,
+                                           ConstraintMatrix &constraints);
+
+#if deal_II_dimension != 1
+  template
+  void
+  DoFTools::make_periodicity_constraints (const DH::face_iterator &,
+                                          const DH::face_iterator &,
+                                          dealii::ConstraintMatrix &,
+                                          const ComponentMask &,
+                                          bool, bool, bool,
+                                          const FullMatrix<double> &,
+                                          const std::vector<unsigned int> &);
+
+  template
+  void
+  DoFTools::make_periodicity_constraints<DH>
+  (const std::vector<GridTools::PeriodicFacePair<DH::cell_iterator> > &,
+   dealii::ConstraintMatrix &,
+   const ComponentMask &,
+   const std::vector<unsigned int> &);
+
+
+  template
+  void
+  DoFTools::make_periodicity_constraints(const DH &,
+                                         const types::boundary_id,
+                                         const types::boundary_id,
+                                         const int,
+                                         dealii::ConstraintMatrix &,
+                                         const ComponentMask &);
+
+  template
+  void
+  DoFTools::make_periodicity_constraints(const DH &,
+                                         const types::boundary_id,
+                                         const int,
+                                         dealii::ConstraintMatrix &,
+                                         const ComponentMask &);
+#endif
+}
+
+for (deal_II_dimension : DIMENSIONS)
+{
+#if deal_II_dimension < 3
+
+template
+void
+DoFTools::
+make_hanging_node_constraints (const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_handler,
+                               ConstraintMatrix &constraints);
+#endif
+
+#if deal_II_dimension == 3
+template
+void
+DoFTools::
+make_hanging_node_constraints (const DoFHandler<1,3> &dof_handler,
+                               ConstraintMatrix &constraints);
+#endif
+
+template
+void
+DoFTools::make_zero_boundary_constraints
+(const DoFHandler<deal_II_dimension> &,
+ ConstraintMatrix                    &,
+ const ComponentMask             &);
+
+template
+void
+DoFTools::make_zero_boundary_constraints
+(const DoFHandler<deal_II_dimension> &,
+ const types::boundary_id          ,
+ ConstraintMatrix                    &,
+ const ComponentMask             &);
+
+template
+void
+DoFTools::make_zero_boundary_constraints
+(const hp::DoFHandler<deal_II_dimension> &,
+ ConstraintMatrix                        &,
+ const ComponentMask                 &);
+
+template
+void
+DoFTools::make_zero_boundary_constraints
+(const hp::DoFHandler<deal_II_dimension> &,
+ const types::boundary_id          ,
+ ConstraintMatrix                        &,
+ const ComponentMask                 &);
+
+template
+void
+DoFTools::compute_intergrid_constraints<deal_II_dimension> (
+  const DoFHandler<deal_II_dimension> &, const unsigned int,
+  const DoFHandler<deal_II_dimension> &, const unsigned int,
+  const InterGridMap<DoFHandler<deal_II_dimension> > &,
+  ConstraintMatrix&);
+
+template
+void
+DoFTools::compute_intergrid_transfer_representation<deal_II_dimension>
+(const DoFHandler<deal_II_dimension> &, const unsigned int,
+ const DoFHandler<deal_II_dimension> &, const unsigned int,
+ const InterGridMap<DoFHandler<deal_II_dimension> > &,
+ std::vector<std::map<types::global_dof_index, float> > &);
+
+}
diff --git a/source/dofs/dof_tools_sparsity.cc b/source/dofs/dof_tools_sparsity.cc
new file mode 100644
index 0000000..733f218
--- /dev/null
+++ b/source/dofs/dof_tools_sparsity.cc
@@ -0,0 +1,1189 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/trilinos_sparsity_pattern.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/intergrid_map.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/numerics/vector_tools.h>
+
+
+#include <algorithm>
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace DoFTools
+{
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_sparsity_pattern (const DoFHandlerType      &dof,
+                         SparsityPatternType       &sparsity,
+                         const ConstraintMatrix    &constraints,
+                         const bool                 keep_constrained_dofs,
+                         const types::subdomain_id  subdomain_id)
+  {
+    const types::global_dof_index n_dofs = dof.n_dofs();
+    (void)n_dofs;
+
+    Assert (sparsity.n_rows() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), n_dofs));
+    Assert (sparsity.n_cols() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), n_dofs));
+
+    // If we have a distributed::Triangulation only allow locally_owned
+    // subdomain. Not setting a subdomain is also okay, because we skip
+    // ghost cells in the loop below.
+    Assert (
+      (dof.get_triangulation().locally_owned_subdomain() == numbers::invalid_subdomain_id)
+      ||
+      (subdomain_id == numbers::invalid_subdomain_id)
+      ||
+      (subdomain_id == dof.get_triangulation().locally_owned_subdomain()),
+      ExcMessage ("For parallel::distributed::Triangulation objects and "
+                  "associated DoF handler objects, asking for any subdomain other "
+                  "than the locally owned one does not make sense."));
+
+    std::vector<types::global_dof_index> dofs_on_this_cell;
+    dofs_on_this_cell.reserve (max_dofs_per_cell(dof));
+    typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                  endc = dof.end();
+
+    // In case we work with a distributed sparsity pattern of Trilinos
+    // type, we only have to do the work if the current cell is owned by
+    // the calling processor. Otherwise, just continue.
+    for (; cell!=endc; ++cell)
+      if (((subdomain_id == numbers::invalid_subdomain_id)
+           ||
+           (subdomain_id == cell->subdomain_id()))
+          &&
+          cell->is_locally_owned())
+        {
+          const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+          dofs_on_this_cell.resize (dofs_per_cell);
+          cell->get_dof_indices (dofs_on_this_cell);
+
+          // make sparsity pattern for this cell. if no constraints pattern
+          // was given, then the following call acts as if simply no
+          // constraints existed
+          constraints.add_entries_local_to_global (dofs_on_this_cell,
+                                                   sparsity,
+                                                   keep_constrained_dofs);
+        }
+  }
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_sparsity_pattern (const DoFHandlerType      &dof,
+                         const Table<2,Coupling>   &couplings,
+                         SparsityPatternType       &sparsity,
+                         const ConstraintMatrix    &constraints,
+                         const bool                 keep_constrained_dofs,
+                         const types::subdomain_id  subdomain_id)
+  {
+    const types::global_dof_index n_dofs = dof.n_dofs();
+    (void)n_dofs;
+
+    Assert (sparsity.n_rows() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), n_dofs));
+    Assert (sparsity.n_cols() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), n_dofs));
+    Assert (couplings.n_rows() == dof.get_fe().n_components(),
+            ExcDimensionMismatch(couplings.n_rows(), dof.get_fe().n_components()));
+    Assert (couplings.n_cols() == dof.get_fe().n_components(),
+            ExcDimensionMismatch(couplings.n_cols(), dof.get_fe().n_components()));
+
+    // If we have a distributed::Triangulation only allow locally_owned
+    // subdomain. Not setting a subdomain is also okay, because we skip
+    // ghost cells in the loop below.
+    Assert (
+      (dof.get_triangulation().locally_owned_subdomain() == numbers::invalid_subdomain_id)
+      ||
+      (subdomain_id == numbers::invalid_subdomain_id)
+      ||
+      (subdomain_id == dof.get_triangulation().locally_owned_subdomain()),
+      ExcMessage ("For parallel::distributed::Triangulation objects and "
+                  "associated DoF handler objects, asking for any subdomain other "
+                  "than the locally owned one does not make sense."));
+
+    const hp::FECollection<DoFHandlerType::dimension,DoFHandlerType::space_dimension> fe_collection (dof.get_fe());
+
+    // first, for each finite element, build a mask for each dof, not like
+    // the one given which represents components. make sure we do the right
+    // thing also with respect to non-primitive shape functions, which
+    // takes some additional thought
+    std::vector<Table<2,bool> > dof_mask(fe_collection.size());
+
+    // check whether the table of couplings contains only true arguments,
+    // i.e., we do not exclude any index. that is the easy case, since we
+    // don't have to set up the tables
+    bool need_dof_mask = false;
+    for (unsigned int i=0; i<couplings.n_rows(); ++i)
+      for (unsigned int j=0; j<couplings.n_cols(); ++j)
+        if (couplings(i,j) == none)
+          need_dof_mask = true;
+
+    if (need_dof_mask == true)
+      for (unsigned int f=0; f<fe_collection.size(); ++f)
+        {
+          const unsigned int dofs_per_cell = fe_collection[f].dofs_per_cell;
+
+          dof_mask[f].reinit (dofs_per_cell, dofs_per_cell);
+
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              if (fe_collection[f].is_primitive(i) &&
+                  fe_collection[f].is_primitive(j))
+                dof_mask[f](i,j)
+                  = (couplings(fe_collection[f].system_to_component_index(i).first,
+                               fe_collection[f].system_to_component_index(j).first) != none);
+              else
+                {
+                  const unsigned int first_nonzero_comp_i
+                    = fe_collection[f].get_nonzero_components(i).first_selected_component();
+                  const unsigned int first_nonzero_comp_j
+                    = fe_collection[f].get_nonzero_components(j).first_selected_component();
+                  Assert (first_nonzero_comp_i < fe_collection[f].n_components(),
+                          ExcInternalError());
+                  Assert (first_nonzero_comp_j < fe_collection[f].n_components(),
+                          ExcInternalError());
+
+                  dof_mask[f](i,j)
+                    = (couplings(first_nonzero_comp_i,first_nonzero_comp_j) != none);
+                }
+        }
+
+
+    std::vector<types::global_dof_index> dofs_on_this_cell(fe_collection.max_dofs_per_cell());
+    typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                  endc = dof.end();
+
+    // In case we work with a distributed sparsity pattern of Trilinos
+    // type, we only have to do the work if the current cell is owned by
+    // the calling processor. Otherwise, just continue.
+    for (; cell!=endc; ++cell)
+      if (((subdomain_id == numbers::invalid_subdomain_id)
+           ||
+           (subdomain_id == cell->subdomain_id()))
+          &&
+          cell->is_locally_owned())
+        {
+          const unsigned int fe_index = cell->active_fe_index();
+          const unsigned int dofs_per_cell =fe_collection[fe_index].dofs_per_cell;
+
+          dofs_on_this_cell.resize (dofs_per_cell);
+          cell->get_dof_indices (dofs_on_this_cell);
+
+
+          // make sparsity pattern for this cell. if no constraints pattern
+          // was given, then the following call acts as if simply no
+          // constraints existed
+          constraints.add_entries_local_to_global (dofs_on_this_cell,
+                                                   sparsity,
+                                                   keep_constrained_dofs,
+                                                   dof_mask[fe_index]);
+        }
+  }
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_sparsity_pattern (const DoFHandlerType &dof_row,
+                         const DoFHandlerType &dof_col,
+                         SparsityPatternType  &sparsity)
+  {
+    const types::global_dof_index n_dofs_row = dof_row.n_dofs();
+    const types::global_dof_index n_dofs_col = dof_col.n_dofs();
+    (void)n_dofs_row;
+    (void)n_dofs_col;
+
+    Assert (sparsity.n_rows() == n_dofs_row,
+            ExcDimensionMismatch (sparsity.n_rows(), n_dofs_row));
+    Assert (sparsity.n_cols() == n_dofs_col,
+            ExcDimensionMismatch (sparsity.n_cols(), n_dofs_col));
+
+//TODO: Looks like wasteful memory management here
+
+    const std::list<std::pair<typename DoFHandlerType::cell_iterator,
+          typename DoFHandlerType::cell_iterator> >
+          cell_list
+          = GridTools::get_finest_common_cells (dof_row, dof_col);
+
+
+    typename std::list<std::pair<typename DoFHandlerType::cell_iterator,
+             typename DoFHandlerType::cell_iterator> >::const_iterator
+             cell_iter = cell_list.begin();
+
+    for (; cell_iter!=cell_list.end(); ++cell_iter)
+      {
+        const typename DoFHandlerType::cell_iterator cell_row = cell_iter->first;
+        const typename DoFHandlerType::cell_iterator cell_col = cell_iter->second;
+
+        if (!cell_row->has_children() && !cell_col->has_children())
+          {
+            const unsigned int dofs_per_cell_row =
+              cell_row->get_fe().dofs_per_cell;
+            const unsigned int dofs_per_cell_col =
+              cell_col->get_fe().dofs_per_cell;
+            std::vector<types::global_dof_index>
+            local_dof_indices_row(dofs_per_cell_row);
+            std::vector<types::global_dof_index>
+            local_dof_indices_col(dofs_per_cell_col);
+            cell_row->get_dof_indices (local_dof_indices_row);
+            cell_col->get_dof_indices (local_dof_indices_col);
+            for (unsigned int i=0; i<dofs_per_cell_row; ++i)
+              sparsity.add_entries (local_dof_indices_row[i],
+                                    local_dof_indices_col.begin(),
+                                    local_dof_indices_col.end());
+          }
+        else if (cell_row->has_children())
+          {
+            const std::vector<typename DoFHandlerType::active_cell_iterator >
+            child_cells = GridTools::get_active_child_cells<DoFHandlerType> (cell_row);
+            for (unsigned int i=0; i<child_cells.size(); i++)
+              {
+                const typename DoFHandlerType::cell_iterator
+                cell_row_child = child_cells[i];
+                const unsigned int dofs_per_cell_row =
+                  cell_row_child->get_fe().dofs_per_cell;
+                const unsigned int dofs_per_cell_col =
+                  cell_col->get_fe().dofs_per_cell;
+                std::vector<types::global_dof_index>
+                local_dof_indices_row(dofs_per_cell_row);
+                std::vector<types::global_dof_index>
+                local_dof_indices_col(dofs_per_cell_col);
+                cell_row_child->get_dof_indices (local_dof_indices_row);
+                cell_col->get_dof_indices (local_dof_indices_col);
+                for (unsigned int r=0; r<dofs_per_cell_row; ++r)
+                  sparsity.add_entries (local_dof_indices_row[r],
+                                        local_dof_indices_col.begin(),
+                                        local_dof_indices_col.end());
+              }
+          }
+        else
+          {
+            std::vector<typename DoFHandlerType::active_cell_iterator>
+            child_cells = GridTools::get_active_child_cells<DoFHandlerType> (cell_col);
+            for (unsigned int i=0; i<child_cells.size(); i++)
+              {
+                const typename DoFHandlerType::active_cell_iterator
+                cell_col_child = child_cells[i];
+                const unsigned int dofs_per_cell_row =
+                  cell_row->get_fe().dofs_per_cell;
+                const unsigned int dofs_per_cell_col =
+                  cell_col_child->get_fe().dofs_per_cell;
+                std::vector<types::global_dof_index>
+                local_dof_indices_row(dofs_per_cell_row);
+                std::vector<types::global_dof_index>
+                local_dof_indices_col(dofs_per_cell_col);
+                cell_row->get_dof_indices (local_dof_indices_row);
+                cell_col_child->get_dof_indices (local_dof_indices_col);
+                for (unsigned int r=0; r<dofs_per_cell_row; ++r)
+                  sparsity.add_entries (local_dof_indices_row[r],
+                                        local_dof_indices_col.begin(),
+                                        local_dof_indices_col.end());
+              }
+          }
+      }
+  }
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_boundary_sparsity_pattern
+  (const DoFHandlerType                       &dof,
+   const std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+   SparsityPatternType                        &sparsity)
+  {
+    if (DoFHandlerType::dimension == 1)
+      {
+        // there are only 2 boundary indicators in 1d, so it is no
+        // performance problem to call the other function
+        typename DoFHandlerType::FunctionMap boundary_ids;
+        boundary_ids[0] = 0;
+        boundary_ids[1] = 0;
+        make_boundary_sparsity_pattern<DoFHandlerType, SparsityPatternType>
+        (dof,
+         boundary_ids,
+         dof_to_boundary_mapping,
+         sparsity);
+        return;
+      }
+
+    const types::global_dof_index n_dofs = dof.n_dofs();
+    (void)n_dofs;
+
+    AssertDimension (dof_to_boundary_mapping.size(), n_dofs);
+    AssertDimension (sparsity.n_rows(), dof.n_boundary_dofs());
+    AssertDimension (sparsity.n_cols(), dof.n_boundary_dofs());
+#ifdef DEBUG
+    if (sparsity.n_rows() != 0)
+      {
+        types::global_dof_index max_element = 0;
+        for (std::vector<types::global_dof_index>::const_iterator i=dof_to_boundary_mapping.begin();
+             i!=dof_to_boundary_mapping.end(); ++i)
+          if ((*i != DoFHandlerType::invalid_dof_index) &&
+              (*i > max_element))
+            max_element = *i;
+        AssertDimension (max_element, sparsity.n_rows()-1);
+      };
+#endif
+
+    std::vector<types::global_dof_index> dofs_on_this_face;
+    dofs_on_this_face.reserve (max_dofs_per_face(dof));
+
+    // loop over all faces to check whether they are at a boundary. note
+    // that we need not take special care of single lines (using
+    // @p{cell->has_boundary_lines}), since we do not support boundaries of
+    // dimension dim-2, and so every boundary line is also part of a
+    // boundary face.
+    typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                  endc = dof.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++f)
+        if (cell->at_boundary(f))
+          {
+            const unsigned int dofs_per_face = cell->get_fe().dofs_per_face;
+            dofs_on_this_face.resize (dofs_per_face);
+            cell->face(f)->get_dof_indices (dofs_on_this_face,
+                                            cell->active_fe_index());
+
+            // make sparsity pattern for this cell
+            for (unsigned int i=0; i<dofs_per_face; ++i)
+              for (unsigned int j=0; j<dofs_per_face; ++j)
+                sparsity.add (dof_to_boundary_mapping[dofs_on_this_face[i]],
+                              dof_to_boundary_mapping[dofs_on_this_face[j]]);
+          }
+  }
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void make_boundary_sparsity_pattern
+  (const DoFHandlerType                                              &dof,
+   const typename FunctionMap<DoFHandlerType::space_dimension>::type &boundary_ids,
+   const std::vector<types::global_dof_index>                        &dof_to_boundary_mapping,
+   SparsityPatternType                                               &sparsity)
+  {
+    if (DoFHandlerType::dimension == 1)
+      {
+        // first check left, then right boundary point
+        for (unsigned int direction=0; direction<2; ++direction)
+          {
+            // if this boundary is not requested, then go on with next one
+            if (boundary_ids.find(direction) ==
+                boundary_ids.end())
+              continue;
+
+            // find active cell at that boundary: first go to left/right,
+            // then to children
+            typename DoFHandlerType::level_cell_iterator cell = dof.begin(0);
+            while (!cell->at_boundary(direction))
+              cell = cell->neighbor(direction);
+            while (!cell->active())
+              cell = cell->child(direction);
+
+            const unsigned int dofs_per_vertex = cell->get_fe().dofs_per_vertex;
+            std::vector<types::global_dof_index> boundary_dof_boundary_indices (dofs_per_vertex);
+
+            // next get boundary mapped dof indices of boundary dofs
+            for (unsigned int i=0; i<dofs_per_vertex; ++i)
+              boundary_dof_boundary_indices[i]
+                = dof_to_boundary_mapping[cell->vertex_dof_index(direction,i)];
+
+            for (unsigned int i=0; i<dofs_per_vertex; ++i)
+              sparsity.add_entries (boundary_dof_boundary_indices[i],
+                                    boundary_dof_boundary_indices.begin(),
+                                    boundary_dof_boundary_indices.end());
+          };
+        return;
+      }
+
+    const types::global_dof_index n_dofs = dof.n_dofs();
+    (void)n_dofs;
+
+    AssertDimension (dof_to_boundary_mapping.size(), n_dofs);
+    Assert (boundary_ids.find(numbers::internal_face_boundary_id) == boundary_ids.end(),
+            typename DoFHandlerType::ExcInvalidBoundaryIndicator());
+    Assert (sparsity.n_rows() == dof.n_boundary_dofs (boundary_ids),
+            ExcDimensionMismatch (sparsity.n_rows(), dof.n_boundary_dofs (boundary_ids)));
+    Assert (sparsity.n_cols() == dof.n_boundary_dofs (boundary_ids),
+            ExcDimensionMismatch (sparsity.n_cols(), dof.n_boundary_dofs (boundary_ids)));
+#ifdef DEBUG
+    if (sparsity.n_rows() != 0)
+      {
+        types::global_dof_index max_element = 0;
+        for (std::vector<types::global_dof_index>::const_iterator i=dof_to_boundary_mapping.begin();
+             i!=dof_to_boundary_mapping.end(); ++i)
+          if ((*i != DoFHandlerType::invalid_dof_index) &&
+              (*i > max_element))
+            max_element = *i;
+        AssertDimension (max_element, sparsity.n_rows()-1);
+      };
+#endif
+
+    std::vector<types::global_dof_index> dofs_on_this_face;
+    dofs_on_this_face.reserve (max_dofs_per_face(dof));
+    typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                  endc = dof.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++f)
+        if (boundary_ids.find(cell->face(f)->boundary_id()) !=
+            boundary_ids.end())
+          {
+            const unsigned int dofs_per_face = cell->get_fe().dofs_per_face;
+            dofs_on_this_face.resize (dofs_per_face);
+            cell->face(f)->get_dof_indices (dofs_on_this_face,
+                                            cell->active_fe_index());
+
+            // make sparsity pattern for this cell
+            for (unsigned int i=0; i<dofs_per_face; ++i)
+              for (unsigned int j=0; j<dofs_per_face; ++j)
+                sparsity.add (dof_to_boundary_mapping[dofs_on_this_face[i]],
+                              dof_to_boundary_mapping[dofs_on_this_face[j]]);
+          }
+  }
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_flux_sparsity_pattern (const DoFHandlerType      &dof,
+                              SparsityPatternType       &sparsity,
+                              const ConstraintMatrix    &constraints,
+                              const bool                 keep_constrained_dofs,
+                              const types::subdomain_id  subdomain_id)
+
+  // TODO: QA: reduce the indentation level of this method..., Maier 2012
+
+  {
+    const types::global_dof_index n_dofs = dof.n_dofs();
+    (void)n_dofs;
+
+    AssertDimension (sparsity.n_rows(), n_dofs);
+    AssertDimension (sparsity.n_cols(), n_dofs);
+
+    // If we have a distributed::Triangulation only allow locally_owned
+    // subdomain. Not setting a subdomain is also okay, because we skip
+    // ghost cells in the loop below.
+    Assert (
+      (dof.get_triangulation().locally_owned_subdomain() == numbers::invalid_subdomain_id)
+      ||
+      (subdomain_id == numbers::invalid_subdomain_id)
+      ||
+      (subdomain_id == dof.get_triangulation().locally_owned_subdomain()),
+      ExcMessage ("For parallel::distributed::Triangulation objects and "
+                  "associated DoF handler objects, asking for any subdomain other "
+                  "than the locally owned one does not make sense."));
+
+    std::vector<types::global_dof_index> dofs_on_this_cell;
+    std::vector<types::global_dof_index> dofs_on_other_cell;
+    dofs_on_this_cell.reserve (max_dofs_per_cell(dof));
+    dofs_on_other_cell.reserve (max_dofs_per_cell(dof));
+    typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                  endc = dof.end();
+
+    // TODO: in an old implementation, we used user flags before to tag
+    // faces that were already touched. this way, we could reduce the work
+    // a little bit. now, we instead add only data from one side. this
+    // should be OK, but we need to actually verify it.
+
+    // In case we work with a distributed sparsity pattern of Trilinos
+    // type, we only have to do the work if the current cell is owned by
+    // the calling processor. Otherwise, just continue.
+    for (; cell!=endc; ++cell)
+      if (((subdomain_id == numbers::invalid_subdomain_id)
+           ||
+           (subdomain_id == cell->subdomain_id()))
+          &&
+          cell->is_locally_owned())
+        {
+          const unsigned int n_dofs_on_this_cell = cell->get_fe().dofs_per_cell;
+          dofs_on_this_cell.resize (n_dofs_on_this_cell);
+          cell->get_dof_indices (dofs_on_this_cell);
+
+          // make sparsity pattern for this cell. if no constraints pattern
+          // was given, then the following call acts as if simply no
+          // constraints existed
+          constraints.add_entries_local_to_global (dofs_on_this_cell,
+                                                   sparsity,
+                                                   keep_constrained_dofs);
+
+          for (unsigned int face = 0;
+               face < GeometryInfo<DoFHandlerType::dimension>::faces_per_cell;
+               ++face)
+            {
+              typename DoFHandlerType::face_iterator cell_face = cell->face(face);
+              if (! cell->at_boundary(face) )
+                {
+                  typename DoFHandlerType::level_cell_iterator neighbor = cell->neighbor(face);
+
+                  // in 1d, we do not need to worry whether the neighbor
+                  // might have children and then loop over those children.
+                  // rather, we may as well go straight to to cell behind
+                  // this particular cell's most terminal child
+                  if (DoFHandlerType::dimension==1)
+                    while (neighbor->has_children())
+                      neighbor = neighbor->child(face==0 ? 1 : 0);
+
+                  if (neighbor->has_children())
+                    {
+                      for (unsigned int sub_nr = 0;
+                           sub_nr != cell_face->number_of_children();
+                           ++sub_nr)
+                        {
+                          const typename DoFHandlerType::level_cell_iterator
+                          sub_neighbor
+                            = cell->neighbor_child_on_subface (face, sub_nr);
+
+                          const unsigned int n_dofs_on_neighbor
+                            = sub_neighbor->get_fe().dofs_per_cell;
+                          dofs_on_other_cell.resize (n_dofs_on_neighbor);
+                          sub_neighbor->get_dof_indices (dofs_on_other_cell);
+
+                          constraints.add_entries_local_to_global
+                          (dofs_on_this_cell, dofs_on_other_cell,
+                           sparsity, keep_constrained_dofs);
+                          constraints.add_entries_local_to_global
+                          (dofs_on_other_cell, dofs_on_this_cell,
+                           sparsity, keep_constrained_dofs);
+                          // only need to add this when the neighbor is not
+                          // owned by the current processor, otherwise we add
+                          // the entries for the neighbor there
+                          if (sub_neighbor->subdomain_id() != cell->subdomain_id())
+                            constraints.add_entries_local_to_global
+                            (dofs_on_other_cell, sparsity, keep_constrained_dofs);
+                        }
+                    }
+                  else
+                    {
+                      // Refinement edges are taken care of by coarser
+                      // cells
+                      if (cell->neighbor_is_coarser(face) &&
+                          neighbor->subdomain_id() == cell->subdomain_id())
+                        continue;
+
+                      const unsigned int n_dofs_on_neighbor
+                        = neighbor->get_fe().dofs_per_cell;
+                      dofs_on_other_cell.resize (n_dofs_on_neighbor);
+
+                      neighbor->get_dof_indices (dofs_on_other_cell);
+
+                      constraints.add_entries_local_to_global
+                      (dofs_on_this_cell, dofs_on_other_cell,
+                       sparsity, keep_constrained_dofs);
+
+                      // only need to add these in case the neighbor cell
+                      // is not locally owned - otherwise, we touch each
+                      // face twice and hence put the indices the other way
+                      // around
+                      if (!cell->neighbor(face)->active()
+                          ||
+                          (neighbor->subdomain_id() != cell->subdomain_id()))
+                        {
+                          constraints.add_entries_local_to_global
+                          (dofs_on_other_cell, dofs_on_this_cell,
+                           sparsity, keep_constrained_dofs);
+                          if (neighbor->subdomain_id() != cell->subdomain_id())
+                            constraints.add_entries_local_to_global
+                            (dofs_on_other_cell, sparsity, keep_constrained_dofs);
+                        }
+                    }
+                }
+            }
+        }
+  }
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_flux_sparsity_pattern (const DoFHandlerType &dof,
+                              SparsityPatternType  &sparsity)
+  {
+    ConstraintMatrix constraints;
+    make_flux_sparsity_pattern (dof, sparsity, constraints);
+  }
+
+  template <int dim, int spacedim>
+  Table<2,Coupling>
+  dof_couplings_from_component_couplings (const FiniteElement<dim,spacedim> &fe,
+                                          const Table<2,Coupling> &component_couplings)
+  {
+    Assert(component_couplings.n_rows() == fe.n_components(),
+           ExcDimensionMismatch(component_couplings.n_rows(),
+                                fe.n_components()));
+    Assert(component_couplings.n_cols() == fe.n_components(),
+           ExcDimensionMismatch(component_couplings.n_cols(),
+                                fe.n_components()));
+
+    const unsigned int n_dofs = fe.dofs_per_cell;
+
+    Table<2,Coupling> dof_couplings (n_dofs, n_dofs);
+
+    for (unsigned int i=0; i<n_dofs; ++i)
+      {
+        const unsigned int ii
+          = (fe.is_primitive(i) ?
+             fe.system_to_component_index(i).first
+             :
+             fe.get_nonzero_components(i).first_selected_component()
+            );
+        Assert (ii < fe.n_components(), ExcInternalError());
+
+        for (unsigned int j=0; j<n_dofs; ++j)
+          {
+            const unsigned int jj
+              = (fe.is_primitive(j) ?
+                 fe.system_to_component_index(j).first
+                 :
+                 fe.get_nonzero_components(j).first_selected_component()
+                );
+            Assert (jj < fe.n_components(), ExcInternalError());
+
+            dof_couplings(i,j) = component_couplings(ii,jj);
+          }
+      }
+    return dof_couplings;
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::vector<Table<2,Coupling> >
+  dof_couplings_from_component_couplings
+  (const hp::FECollection<dim,spacedim> &fe,
+   const Table<2,Coupling> &component_couplings)
+  {
+    std::vector<Table<2,Coupling> > return_value (fe.size());
+    for (unsigned int i=0; i<fe.size(); ++i)
+      return_value[i]
+        = dof_couplings_from_component_couplings(fe[i], component_couplings);
+
+    return return_value;
+  }
+
+
+
+  namespace internal
+  {
+    namespace
+    {
+
+      // implementation of the same function in namespace DoFTools for
+      // non-hp DoFHandlers
+      template <typename DoFHandlerType, typename SparsityPatternType>
+      void
+      make_flux_sparsity_pattern (const DoFHandlerType    &dof,
+                                  SparsityPatternType     &sparsity,
+                                  const Table<2,Coupling> &int_mask,
+                                  const Table<2,Coupling> &flux_mask)
+      {
+        const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe = dof.get_fe();
+
+        std::vector<types::global_dof_index> dofs_on_this_cell(fe.dofs_per_cell);
+        std::vector<types::global_dof_index> dofs_on_other_cell(fe.dofs_per_cell);
+
+        const Table<2,Coupling>
+        int_dof_mask  = dof_couplings_from_component_couplings(fe, int_mask),
+        flux_dof_mask = dof_couplings_from_component_couplings(fe, flux_mask);
+
+        Table<2,bool> support_on_face(fe.dofs_per_cell,
+                                      GeometryInfo<DoFHandlerType::dimension>::faces_per_cell);
+        for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+          for (unsigned int f=0; f<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++f)
+            support_on_face(i,f) = fe.has_support_on_face(i,f);
+
+        typename DoFHandlerType::active_cell_iterator cell = dof.begin_active(),
+                                                      endc = dof.end();
+        for (; cell!=endc; ++cell)
+          if (cell->is_locally_owned())
+            {
+              cell->get_dof_indices (dofs_on_this_cell);
+              // make sparsity pattern for this cell
+              for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+                for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+                  if (int_dof_mask(i,j) != none)
+                    sparsity.add (dofs_on_this_cell[i],
+                                  dofs_on_this_cell[j]);
+
+              // Loop over all interior neighbors
+              for (unsigned int face = 0;
+                   face < GeometryInfo<DoFHandlerType::dimension>::faces_per_cell;
+                   ++face)
+                {
+                  const typename DoFHandlerType::face_iterator
+                  cell_face = cell->face(face);
+                  if (cell_face->user_flag_set ())
+                    continue;
+
+                  if (cell->at_boundary (face) )
+                    {
+                      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+                        {
+                          const bool i_non_zero_i = support_on_face (i, face);
+                          for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+                            {
+                              const bool j_non_zero_i = support_on_face (j, face);
+
+                              if ((flux_dof_mask(i,j) == always)
+                                  ||
+                                  (flux_dof_mask(i,j) == nonzero
+                                   &&
+                                   i_non_zero_i
+                                   &&
+                                   j_non_zero_i))
+                                sparsity.add (dofs_on_this_cell[i],
+                                              dofs_on_this_cell[j]);
+                            }
+                        }
+                    }
+                  else
+                    {
+                      typename DoFHandlerType::level_cell_iterator
+                      neighbor = cell->neighbor(face);
+                      // Refinement edges are taken care of by coarser
+                      // cells
+                      if (cell->neighbor_is_coarser(face))
+                        continue;
+
+                      const unsigned int
+                      neighbor_face = cell->neighbor_of_neighbor(face);
+
+                      if (cell_face->has_children())
+                        {
+                          for (unsigned int sub_nr = 0;
+                               sub_nr != cell_face->n_children();
+                               ++sub_nr)
+                            {
+                              const typename DoFHandlerType::level_cell_iterator
+                              sub_neighbor
+                                = cell->neighbor_child_on_subface (face, sub_nr);
+
+                              sub_neighbor->get_dof_indices (dofs_on_other_cell);
+                              for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+                                {
+                                  const bool i_non_zero_i = support_on_face (i, face);
+                                  const bool i_non_zero_e = support_on_face (i, neighbor_face);
+                                  for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+                                    {
+                                      const bool j_non_zero_i = support_on_face (j, face);
+                                      const bool j_non_zero_e = support_on_face (j, neighbor_face);
+
+                                      if (flux_dof_mask(i,j) == always)
+                                        {
+                                          sparsity.add (dofs_on_this_cell[i],
+                                                        dofs_on_other_cell[j]);
+                                          sparsity.add (dofs_on_other_cell[i],
+                                                        dofs_on_this_cell[j]);
+                                          sparsity.add (dofs_on_this_cell[i],
+                                                        dofs_on_this_cell[j]);
+                                          sparsity.add (dofs_on_other_cell[i],
+                                                        dofs_on_other_cell[j]);
+                                        }
+                                      else if (flux_dof_mask(i,j) == nonzero)
+                                        {
+                                          if (i_non_zero_i && j_non_zero_e)
+                                            sparsity.add (dofs_on_this_cell[i],
+                                                          dofs_on_other_cell[j]);
+                                          if (i_non_zero_e && j_non_zero_i)
+                                            sparsity.add (dofs_on_other_cell[i],
+                                                          dofs_on_this_cell[j]);
+                                          if (i_non_zero_i && j_non_zero_i)
+                                            sparsity.add (dofs_on_this_cell[i],
+                                                          dofs_on_this_cell[j]);
+                                          if (i_non_zero_e && j_non_zero_e)
+                                            sparsity.add (dofs_on_other_cell[i],
+                                                          dofs_on_other_cell[j]);
+                                        }
+
+                                      if (flux_dof_mask(j,i) == always)
+                                        {
+                                          sparsity.add (dofs_on_this_cell[j],
+                                                        dofs_on_other_cell[i]);
+                                          sparsity.add (dofs_on_other_cell[j],
+                                                        dofs_on_this_cell[i]);
+                                          sparsity.add (dofs_on_this_cell[j],
+                                                        dofs_on_this_cell[i]);
+                                          sparsity.add (dofs_on_other_cell[j],
+                                                        dofs_on_other_cell[i]);
+                                        }
+                                      else if (flux_dof_mask(j,i) == nonzero)
+                                        {
+                                          if (j_non_zero_i && i_non_zero_e)
+                                            sparsity.add (dofs_on_this_cell[j],
+                                                          dofs_on_other_cell[i]);
+                                          if (j_non_zero_e && i_non_zero_i)
+                                            sparsity.add (dofs_on_other_cell[j],
+                                                          dofs_on_this_cell[i]);
+                                          if (j_non_zero_i && i_non_zero_i)
+                                            sparsity.add (dofs_on_this_cell[j],
+                                                          dofs_on_this_cell[i]);
+                                          if (j_non_zero_e && i_non_zero_e)
+                                            sparsity.add (dofs_on_other_cell[j],
+                                                          dofs_on_other_cell[i]);
+                                        }
+                                    }
+                                }
+                              sub_neighbor->face(neighbor_face)->set_user_flag ();
+                            }
+                        }
+                      else
+                        {
+                          neighbor->get_dof_indices (dofs_on_other_cell);
+                          for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+                            {
+                              const bool i_non_zero_i = support_on_face (i, face);
+                              const bool i_non_zero_e = support_on_face (i, neighbor_face);
+                              for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+                                {
+                                  const bool j_non_zero_i = support_on_face (j, face);
+                                  const bool j_non_zero_e = support_on_face (j, neighbor_face);
+                                  if (flux_dof_mask(i,j) == always)
+                                    {
+                                      sparsity.add (dofs_on_this_cell[i],
+                                                    dofs_on_other_cell[j]);
+                                      sparsity.add (dofs_on_other_cell[i],
+                                                    dofs_on_this_cell[j]);
+                                      sparsity.add (dofs_on_this_cell[i],
+                                                    dofs_on_this_cell[j]);
+                                      sparsity.add (dofs_on_other_cell[i],
+                                                    dofs_on_other_cell[j]);
+                                    }
+                                  if (flux_dof_mask(i,j) == nonzero)
+                                    {
+                                      if (i_non_zero_i && j_non_zero_e)
+                                        sparsity.add (dofs_on_this_cell[i],
+                                                      dofs_on_other_cell[j]);
+                                      if (i_non_zero_e && j_non_zero_i)
+                                        sparsity.add (dofs_on_other_cell[i],
+                                                      dofs_on_this_cell[j]);
+                                      if (i_non_zero_i && j_non_zero_i)
+                                        sparsity.add (dofs_on_this_cell[i],
+                                                      dofs_on_this_cell[j]);
+                                      if (i_non_zero_e && j_non_zero_e)
+                                        sparsity.add (dofs_on_other_cell[i],
+                                                      dofs_on_other_cell[j]);
+                                    }
+
+                                  if (flux_dof_mask(j,i) == always)
+                                    {
+                                      sparsity.add (dofs_on_this_cell[j],
+                                                    dofs_on_other_cell[i]);
+                                      sparsity.add (dofs_on_other_cell[j],
+                                                    dofs_on_this_cell[i]);
+                                      sparsity.add (dofs_on_this_cell[j],
+                                                    dofs_on_this_cell[i]);
+                                      sparsity.add (dofs_on_other_cell[j],
+                                                    dofs_on_other_cell[i]);
+                                    }
+                                  if (flux_dof_mask(j,i) == nonzero)
+                                    {
+                                      if (j_non_zero_i && i_non_zero_e)
+                                        sparsity.add (dofs_on_this_cell[j],
+                                                      dofs_on_other_cell[i]);
+                                      if (j_non_zero_e && i_non_zero_i)
+                                        sparsity.add (dofs_on_other_cell[j],
+                                                      dofs_on_this_cell[i]);
+                                      if (j_non_zero_i && i_non_zero_i)
+                                        sparsity.add (dofs_on_this_cell[j],
+                                                      dofs_on_this_cell[i]);
+                                      if (j_non_zero_e && i_non_zero_e)
+                                        sparsity.add (dofs_on_other_cell[j],
+                                                      dofs_on_other_cell[i]);
+                                    }
+                                }
+                            }
+                          neighbor->face(neighbor_face)->set_user_flag ();
+                        }
+                    }
+                }
+            }
+      }
+
+
+      // implementation of the same function in namespace DoFTools for
+      // non-hp DoFHandlers
+      template <int dim, int spacedim, typename SparsityPatternType>
+      void
+      make_flux_sparsity_pattern (const dealii::hp::DoFHandler<dim,spacedim> &dof,
+                                  SparsityPatternType                        &sparsity,
+                                  const Table<2,Coupling>                    &int_mask,
+                                  const Table<2,Coupling>                    &flux_mask)
+      {
+        // while the implementation above is quite optimized and caches a
+        // lot of data (see e.g. the int/flux_dof_mask tables), this is no
+        // longer practical for the hp version since we would have to have
+        // it for all combinations of elements in the hp::FECollection.
+        // consequently, the implementation here is simpler and probably
+        // less efficient but at least readable...
+
+        const dealii::hp::FECollection<dim,spacedim> &fe = dof.get_fe();
+
+        std::vector<types::global_dof_index> dofs_on_this_cell(DoFTools::max_dofs_per_cell(dof));
+        std::vector<types::global_dof_index> dofs_on_other_cell(DoFTools::max_dofs_per_cell(dof));
+
+        const std::vector<Table<2,Coupling> >
+        int_dof_mask
+          = dof_couplings_from_component_couplings(fe, int_mask);
+
+        typename dealii::hp::DoFHandler<dim,spacedim>::active_cell_iterator
+        cell = dof.begin_active(),
+        endc = dof.end();
+        for (; cell!=endc; ++cell)
+          {
+            dofs_on_this_cell.resize (cell->get_fe().dofs_per_cell);
+            cell->get_dof_indices (dofs_on_this_cell);
+
+            // make sparsity pattern for this cell
+            for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+              for (unsigned int j=0; j<cell->get_fe().dofs_per_cell; ++j)
+                if (int_dof_mask[cell->active_fe_index()](i,j) != none)
+                  sparsity.add (dofs_on_this_cell[i],
+                                dofs_on_this_cell[j]);
+
+            // Loop over all interior neighbors
+            for (unsigned int face = 0;
+                 face < GeometryInfo<dim>::faces_per_cell;
+                 ++face)
+              {
+                const typename dealii::hp::DoFHandler<dim,spacedim>::face_iterator
+                cell_face = cell->face(face);
+                if (cell_face->user_flag_set ())
+                  continue;
+
+                if (cell->at_boundary (face) )
+                  {
+                    for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                      for (unsigned int j=0; j<cell->get_fe().dofs_per_cell; ++j)
+                        if ((flux_mask(cell->get_fe().system_to_component_index(i).first,
+                                       cell->get_fe().system_to_component_index(j).first)
+                             == always)
+                            ||
+                            (flux_mask(cell->get_fe().system_to_component_index(i).first,
+                                       cell->get_fe().system_to_component_index(j).first)
+                             == nonzero))
+                          sparsity.add (dofs_on_this_cell[i],
+                                        dofs_on_this_cell[j]);
+                  }
+                else
+                  {
+                    typename dealii::hp::DoFHandler<dim,spacedim>::level_cell_iterator
+                    neighbor = cell->neighbor(face);
+
+                    // Refinement edges are taken care of by coarser cells
+                    if (cell->neighbor_is_coarser(face))
+                      continue;
+
+                    const unsigned int
+                    neighbor_face = cell->neighbor_of_neighbor(face);
+
+                    if (cell_face->has_children())
+                      {
+                        for (unsigned int sub_nr = 0;
+                             sub_nr != cell_face->n_children();
+                             ++sub_nr)
+                          {
+                            const typename dealii::hp::DoFHandler<dim,spacedim>::level_cell_iterator
+                            sub_neighbor
+                              = cell->neighbor_child_on_subface (face, sub_nr);
+
+                            dofs_on_other_cell.resize (sub_neighbor->get_fe().dofs_per_cell);
+                            sub_neighbor->get_dof_indices (dofs_on_other_cell);
+                            for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                              {
+                                for (unsigned int j=0; j<sub_neighbor->get_fe().dofs_per_cell;
+                                     ++j)
+                                  {
+                                    if ((flux_mask(cell->get_fe().system_to_component_index(i).first,
+                                                   sub_neighbor->get_fe().system_to_component_index(j).first)
+                                         == always)
+                                        ||
+                                        (flux_mask(cell->get_fe().system_to_component_index(i).first,
+                                                   sub_neighbor->get_fe().system_to_component_index(j).first)
+                                         == nonzero))
+                                      {
+                                        sparsity.add (dofs_on_this_cell[i],
+                                                      dofs_on_other_cell[j]);
+                                        sparsity.add (dofs_on_other_cell[i],
+                                                      dofs_on_this_cell[j]);
+                                        sparsity.add (dofs_on_this_cell[i],
+                                                      dofs_on_this_cell[j]);
+                                        sparsity.add (dofs_on_other_cell[i],
+                                                      dofs_on_other_cell[j]);
+                                      }
+
+                                    if ((flux_mask(sub_neighbor->get_fe().system_to_component_index(j).first,
+                                                   cell->get_fe().system_to_component_index(i).first)
+                                         == always)
+                                        ||
+                                        (flux_mask(sub_neighbor->get_fe().system_to_component_index(j).first,
+                                                   cell->get_fe().system_to_component_index(i).first)
+                                         == nonzero))
+                                      {
+                                        sparsity.add (dofs_on_this_cell[j],
+                                                      dofs_on_other_cell[i]);
+                                        sparsity.add (dofs_on_other_cell[j],
+                                                      dofs_on_this_cell[i]);
+                                        sparsity.add (dofs_on_this_cell[j],
+                                                      dofs_on_this_cell[i]);
+                                        sparsity.add (dofs_on_other_cell[j],
+                                                      dofs_on_other_cell[i]);
+                                      }
+                                  }
+                              }
+                            sub_neighbor->face(neighbor_face)->set_user_flag ();
+                          }
+                      }
+                    else
+                      {
+                        dofs_on_other_cell.resize (neighbor->get_fe().dofs_per_cell);
+                        neighbor->get_dof_indices (dofs_on_other_cell);
+                        for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                          {
+                            for (unsigned int j=0; j<neighbor->get_fe().dofs_per_cell; ++j)
+                              {
+                                if ((flux_mask(cell->get_fe().system_to_component_index(i).first,
+                                               neighbor->get_fe().system_to_component_index(j).first)
+                                     == always)
+                                    ||
+                                    (flux_mask(cell->get_fe().system_to_component_index(i).first,
+                                               neighbor->get_fe().system_to_component_index(j).first)
+                                     == nonzero))
+                                  {
+                                    sparsity.add (dofs_on_this_cell[i],
+                                                  dofs_on_other_cell[j]);
+                                    sparsity.add (dofs_on_other_cell[i],
+                                                  dofs_on_this_cell[j]);
+                                    sparsity.add (dofs_on_this_cell[i],
+                                                  dofs_on_this_cell[j]);
+                                    sparsity.add (dofs_on_other_cell[i],
+                                                  dofs_on_other_cell[j]);
+                                  }
+
+                                if ((flux_mask(neighbor->get_fe().system_to_component_index(j).first,
+                                               cell->get_fe().system_to_component_index(i).first)
+                                     == always)
+                                    ||
+                                    (flux_mask(neighbor->get_fe().system_to_component_index(j).first,
+                                               cell->get_fe().system_to_component_index(i).first)
+                                     == nonzero))
+                                  {
+                                    sparsity.add (dofs_on_this_cell[j],
+                                                  dofs_on_other_cell[i]);
+                                    sparsity.add (dofs_on_other_cell[j],
+                                                  dofs_on_this_cell[i]);
+                                    sparsity.add (dofs_on_this_cell[j],
+                                                  dofs_on_this_cell[i]);
+                                    sparsity.add (dofs_on_other_cell[j],
+                                                  dofs_on_other_cell[i]);
+                                  }
+                              }
+                          }
+                        neighbor->face(neighbor_face)->set_user_flag ();
+                      }
+                  }
+              }
+          }
+      }
+    }
+
+  }
+
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void
+  make_flux_sparsity_pattern (const DoFHandlerType    &dof,
+                              SparsityPatternType     &sparsity,
+                              const Table<2,Coupling> &int_mask,
+                              const Table<2,Coupling> &flux_mask)
+  {
+    // do the error checking and frame code here, and then pass on to more
+    // specialized functions in the internal namespace
+    const types::global_dof_index n_dofs = dof.n_dofs();
+    (void)n_dofs;
+    const unsigned int n_comp = dof.get_fe().n_components();
+    (void)n_comp;
+
+    Assert (sparsity.n_rows() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), n_dofs));
+    Assert (sparsity.n_cols() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), n_dofs));
+    Assert (int_mask.n_rows() == n_comp,
+            ExcDimensionMismatch (int_mask.n_rows(), n_comp));
+    Assert (int_mask.n_cols() == n_comp,
+            ExcDimensionMismatch (int_mask.n_cols(), n_comp));
+    Assert (flux_mask.n_rows() == n_comp,
+            ExcDimensionMismatch (flux_mask.n_rows(), n_comp));
+    Assert (flux_mask.n_cols() == n_comp,
+            ExcDimensionMismatch (flux_mask.n_cols(), n_comp));
+
+    // Clear user flags because we will need them. But first we save them
+    // and make sure that we restore them later such that at the end of
+    // this function the Triangulation will be in the same state as it was
+    // at the beginning of this function.
+    std::vector<bool> user_flags;
+    dof.get_triangulation().save_user_flags(user_flags);
+    const_cast<Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &>
+    (dof.get_triangulation()).clear_user_flags ();
+
+    internal::make_flux_sparsity_pattern (dof, sparsity,
+                                          int_mask, flux_mask);
+
+    // finally restore the user flags
+    const_cast<Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &>
+    (dof.get_triangulation()).load_user_flags(user_flags);
+  }
+
+
+} // end of namespace DoFTools
+
+
+// --------------------------------------------------- explicit instantiations
+
+#include "dof_tools_sparsity.inst"
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dofs/dof_tools_sparsity.inst.in b/source/dofs/dof_tools_sparsity.inst.in
new file mode 100644
index 0000000..dc2f352
--- /dev/null
+++ b/source/dofs/dof_tools_sparsity.inst.in
@@ -0,0 +1,307 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (SP : SPARSITY_PATTERNS; deal_II_dimension : DIMENSIONS)
+  {
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension>, SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension>, SP>
+    (const hp::DoFHandler<deal_II_dimension,deal_II_dimension> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension>, SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension>&,
+     const Table<2,Coupling>&,
+     SP &,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension>, SP>
+    (const hp::DoFHandler<deal_II_dimension,deal_II_dimension>&,
+     const Table<2,Coupling>&,
+     SP &,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension>, SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension> &dof_row,
+     const DoFHandler<deal_II_dimension,deal_II_dimension> &dof_col,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension>, SP>
+    (const hp::DoFHandler<deal_II_dimension,deal_II_dimension> &dof_row,
+     const hp::DoFHandler<deal_II_dimension,deal_II_dimension> &dof_col,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<DoFHandler<deal_II_dimension>,SP>
+    (const DoFHandler<deal_II_dimension>& dof,
+     const std::vector<types::global_dof_index>  &,
+     SP    &);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<hp::DoFHandler<deal_II_dimension>,SP>
+    (const hp::DoFHandler<deal_II_dimension>& dof,
+     const std::vector<types::global_dof_index>  &,
+     SP    &);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<DoFHandler<deal_II_dimension>,SP>
+    (const DoFHandler<deal_II_dimension>& dof,
+     const FunctionMap<deal_II_dimension>::type  &boundary_ids,
+     const std::vector<types::global_dof_index>  &dof_to_boundary_mapping,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<hp::DoFHandler<deal_II_dimension>,SP>
+    (const hp::DoFHandler<deal_II_dimension>& dof,
+     const FunctionMap<deal_II_dimension>::type  &boundary_ids,
+     const std::vector<types::global_dof_index>  &dof_to_boundary_mapping,
+     SP    &sparsity);
+
+#if deal_II_dimension < 3
+    template void
+    DoFTools::make_boundary_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>,SP>
+    (const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>& dof,
+     const FunctionMap<deal_II_dimension+1>::type  &boundary_ids,
+     const std::vector<types::global_dof_index>  &dof_to_boundary_mapping,
+     SP    &sparsity);
+ #endif
+
+    template void
+    DoFTools::make_flux_sparsity_pattern<DoFHandler<deal_II_dimension>,SP>
+    (const DoFHandler<deal_II_dimension> &dof,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_flux_sparsity_pattern<hp::DoFHandler<deal_II_dimension>,SP>
+    (const hp::DoFHandler<deal_II_dimension> &dof,
+     SP    &sparsity);
+
+
+    template void
+    DoFTools::make_flux_sparsity_pattern<DoFHandler<deal_II_dimension>,SP>
+    (const DoFHandler<deal_II_dimension> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &constraints,
+     const bool, const unsigned int);
+
+    template void
+    DoFTools::make_flux_sparsity_pattern<hp::DoFHandler<deal_II_dimension>,SP>
+    (const hp::DoFHandler<deal_II_dimension> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &constraints,
+     const bool, const unsigned int);
+
+
+#if deal_II_dimension > 1
+
+    template void
+    DoFTools::make_flux_sparsity_pattern<DoFHandler<deal_II_dimension>,SP>
+    (const DoFHandler<deal_II_dimension> &dof,
+     SP    &,
+     const Table<2,Coupling>&,
+     const Table<2,Coupling>&);
+
+    template void
+    DoFTools::make_flux_sparsity_pattern<hp::DoFHandler<deal_II_dimension>,SP>
+    (const hp::DoFHandler<deal_II_dimension> &dof,
+     SP    &,
+     const Table<2,Coupling>&,
+     const Table<2,Coupling>&);
+#endif
+
+#if deal_II_dimension < 3
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension+1>, SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, SP>
+    (const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension+1>, SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension+1>&,
+     const Table<2,Coupling>&,
+     SP &,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, SP>
+    (const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>&,
+     const Table<2,Coupling>&,
+     SP &,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension+1>, SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_row,
+     const DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_col,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>, SP>
+    (const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_row,
+     const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1> &dof_col,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension+1>,SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension+1>& dof,
+     const std::vector<types::global_dof_index>  &,
+     SP    &);
+
+    //template void
+    //DoFTools::make_boundary_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>,SP>
+    //(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>& dof,
+    // const std::vector<types::global_dof_index>  &,
+    // SP    &);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<DoFHandler<deal_II_dimension,deal_II_dimension+1>,SP>
+    (const DoFHandler<deal_II_dimension,deal_II_dimension+1>& dof,
+     const FunctionMap<deal_II_dimension+1>::type  &boundary_ids,
+     const std::vector<types::global_dof_index>  &dof_to_boundary_mapping,
+     SP    &sparsity);
+
+    //template void
+    //DoFTools::make_boundary_sparsity_pattern<hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>,SP>
+    //(const hp::DoFHandler<deal_II_dimension,deal_II_dimension+1>& dof,
+    // const FunctionMap<deal_II_dimension+1>::type  &boundary_ids,
+    // const std::vector<types::global_dof_index>  &dof_to_boundary_mapping,
+    // SP    &sparsity);
+
+#endif
+
+
+#if deal_II_dimension == 3
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<1,3>, SP>
+    (const DoFHandler<1,3> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<1,3>, SP>
+    (const hp::DoFHandler<1,3> &dof,
+     SP    &sparsity,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<1,3>, SP>
+    (const DoFHandler<1,3>&,
+     const Table<2,Coupling>&,
+     SP &,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<1,3>, SP>
+    (const hp::DoFHandler<1,3>&,
+     const Table<2,Coupling>&,
+     SP &,
+     const ConstraintMatrix &,
+     const bool,
+     const unsigned int);
+
+    template void
+    DoFTools::make_sparsity_pattern<DoFHandler<1,3>, SP>
+    (const DoFHandler<1,3> &dof_row,
+     const DoFHandler<1,3> &dof_col,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_sparsity_pattern<hp::DoFHandler<1,3>, SP>
+    (const hp::DoFHandler<1,3> &dof_row,
+     const hp::DoFHandler<1,3> &dof_col,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<DoFHandler<1,3>,SP>
+    (const DoFHandler<1,3>& dof,
+     const std::vector<types::global_dof_index>  &,
+     SP    &);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<hp::DoFHandler<1,3>,SP>
+    (const hp::DoFHandler<1,3>& dof,
+     const std::vector<types::global_dof_index>  &,
+     SP    &);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<DoFHandler<1,3>,SP>
+    (const DoFHandler<1,3>& dof,
+     const FunctionMap<3>::type  &boundary_ids,
+     const std::vector<types::global_dof_index>  &dof_to_boundary_mapping,
+     SP    &sparsity);
+
+    template void
+    DoFTools::make_boundary_sparsity_pattern<hp::DoFHandler<1,3>,SP>
+    (const hp::DoFHandler<1,3>& dof,
+     const FunctionMap<3>::type  &boundary_ids,
+     const std::vector<types::global_dof_index>  &dof_to_boundary_mapping,
+     SP    &sparsity);
+
+#endif
+
+  }
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  template
+  Table<2,DoFTools::Coupling>
+  DoFTools::dof_couplings_from_component_couplings
+  (const FiniteElement<deal_II_dimension> &fe,
+   const Table<2,DoFTools::Coupling> &component_couplings);
+}
diff --git a/source/dofs/number_cache.cc b/source/dofs/number_cache.cc
new file mode 100644
index 0000000..ea16f3d
--- /dev/null
+++ b/source/dofs/number_cache.cc
@@ -0,0 +1,54 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/dofs/number_cache.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace DoFHandler
+  {
+    NumberCache::NumberCache ()
+      :
+      n_global_dofs (0),
+      n_locally_owned_dofs (0)
+    {}
+
+
+    void NumberCache::clear ()
+    {
+      n_global_dofs = 0;
+      n_locally_owned_dofs = 0;
+      locally_owned_dofs.clear();
+      n_locally_owned_dofs_per_processor.clear();
+      locally_owned_dofs_per_processor.clear();
+    }
+
+    std::size_t
+    NumberCache::memory_consumption () const
+    {
+      return
+        MemoryConsumption::memory_consumption (n_global_dofs) +
+        MemoryConsumption::memory_consumption (n_locally_owned_dofs) +
+        MemoryConsumption::memory_consumption (locally_owned_dofs) +
+        MemoryConsumption::memory_consumption (n_locally_owned_dofs_per_processor) +
+        MemoryConsumption::memory_consumption (locally_owned_dofs_per_processor);
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/dummy.cc b/source/dummy.cc
new file mode 100644
index 0000000..37ebf06
--- /dev/null
+++ b/source/dummy.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/*
+ * Workaround for a bug in the Xcode generator.
+ *
+ * This file contains a dummy global symbol to trigger the link phase in
+ * the generated Xcode project.
+ */
+
+const int global_symbol_42 = 42;
+
diff --git a/source/fe/CMakeLists.txt b/source/fe/CMakeLists.txt
new file mode 100644
index 0000000..d00504a
--- /dev/null
+++ b/source/fe/CMakeLists.txt
@@ -0,0 +1,111 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  block_mask.cc
+  component_mask.cc
+  fe_abf.cc
+  fe_bdm.cc
+  fe.cc
+  fe_bernstein.cc
+  fe_data.cc
+  fe_dgp.cc
+  fe_dgp_monomial.cc
+  fe_dgp_nonparametric.cc
+  fe_dgq.cc
+  fe_dg_vector.cc
+  fe_face.cc
+  fe_nedelec.cc
+  fe_nothing.cc
+  fe_poly.cc
+  fe_poly_tensor.cc
+  fe_q_base.cc
+  fe_q.cc
+  fe_q_bubbles.cc
+  fe_q_dg0.cc
+  fe_q_hierarchical.cc
+  fe_q_iso_q1.cc
+  fe_rannacher_turek.cc
+  fe_raviart_thomas.cc
+  fe_raviart_thomas_nodal.cc
+  fe_system.cc
+  fe_tools.cc
+  fe_tools_interpolate.cc
+  fe_trace.cc
+  fe_values.cc
+  fe_values_inst2.cc
+  mapping_c1.cc
+  mapping_cartesian.cc
+  mapping.cc
+  mapping_fe_field.cc
+  mapping_q_generic.cc
+  mapping_q1.cc
+  mapping_q1_eulerian.cc
+  mapping_q.cc
+  mapping_q_eulerian.cc
+  )
+
+SET(_inst
+  fe_abf.inst.in
+  fe_bdm.inst.in
+  fe_bernstein.inst.in
+  fe_dgp.inst.in
+  fe_dgp_monomial.inst.in
+  fe_dgp_nonparametric.inst.in
+  fe_dgq.inst.in
+  fe_dg_vector.inst.in
+  fe_face.inst.in
+  fe.inst.in
+  fe_nedelec.inst.in
+  fe_nothing.inst.in
+  fe_poly.inst.in
+  fe_poly_tensor.inst.in
+  fe_q_base.inst.in
+  fe_q_bubbles.inst.in
+  fe_q_dg0.inst.in
+  fe_q_hierarchical.inst.in
+  fe_q.inst.in
+  fe_q_iso_q1.inst.in
+  fe_rannacher_turek.inst.in
+  fe_raviart_thomas.inst.in
+  fe_raviart_thomas_nodal.inst.in
+  fe_system.inst.in
+  fe_tools.inst.in
+  fe_tools_interpolate.inst.in
+  fe_trace.inst.in
+  fe_values.decl.1.inst.in
+  fe_values.decl.2.inst.in
+  fe_values.impl.1.inst.in
+  fe_values.impl.2.inst.in
+  fe_values.inst.in
+  mapping_c1.inst.in
+  mapping_cartesian.inst.in
+  mapping.inst.in
+  mapping_fe_field.inst.in
+  mapping_q_generic.inst.in
+  mapping_q1_eulerian.inst.in
+  mapping_q1.inst.in
+  mapping_q_eulerian.inst.in
+  mapping_q.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/fe/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_fe OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_fe "${_inst}")
diff --git a/source/fe/block_mask.cc b/source/fe/block_mask.cc
new file mode 100644
index 0000000..6156789
--- /dev/null
+++ b/source/fe/block_mask.cc
@@ -0,0 +1,53 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/block_mask.h>
+
+#include <iostream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+std::ostream &operator << (std::ostream &out,
+                           const BlockMask &mask)
+{
+  if (mask.block_mask.size() == 0)
+    out << "[all blocks selected]";
+  else
+    {
+      out << '[';
+      for (unsigned int i=0; i<mask.block_mask.size(); ++i)
+        {
+          out << (mask.block_mask[i] ? "true" : "false");
+          if (i != mask.block_mask.size()-1)
+            out << ',';
+        }
+      out << ']';
+    }
+
+  return out;
+}
+
+
+
+std::size_t
+BlockMask::memory_consumption () const
+{
+  return sizeof(*this) + MemoryConsumption::memory_consumption (block_mask);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/component_mask.cc b/source/fe/component_mask.cc
new file mode 100644
index 0000000..b0fc723
--- /dev/null
+++ b/source/fe/component_mask.cc
@@ -0,0 +1,53 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/component_mask.h>
+
+#include <iostream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+std::ostream &operator << (std::ostream &out,
+                           const ComponentMask &mask)
+{
+  if (mask.component_mask.size() == 0)
+    out << "[all components selected]";
+  else
+    {
+      out << '[';
+      for (unsigned int i=0; i<mask.component_mask.size(); ++i)
+        {
+          out << (mask.component_mask[i] ? "true" : "false");
+          if (i != mask.component_mask.size()-1)
+            out << ',';
+        }
+      out << ']';
+    }
+
+  return out;
+}
+
+
+
+std::size_t
+ComponentMask::memory_consumption () const
+{
+  return sizeof(*this) + MemoryConsumption::memory_consumption (component_mask);
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe.cc b/source/fe/fe.cc
new file mode 100644
index 0000000..a1f1104
--- /dev/null
+++ b/source/fe/fe.cc
@@ -0,0 +1,1252 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_boundary.h>
+
+#include <algorithm>
+#include <functional>
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+/*------------------------------- FiniteElement ----------------------*/
+
+
+template <int dim, int spacedim>
+FiniteElement<dim, spacedim>::InternalDataBase::InternalDataBase ():
+  update_each(update_default)
+{}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim>::InternalDataBase::~InternalDataBase ()
+{}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FiniteElement<dim, spacedim>::InternalDataBase::memory_consumption () const
+{
+  return sizeof(*this);
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim>::
+FiniteElement (const FiniteElementData<dim> &fe_data,
+               const std::vector<bool> &r_i_a_f,
+               const std::vector<ComponentMask> &nonzero_c)
+  :
+  FiniteElementData<dim> (fe_data),
+  adjust_quad_dof_index_for_face_orientation_table (dim == 3 ?
+                                                    this->dofs_per_quad : 0 ,
+                                                    dim==3 ? 8 : 0),
+  adjust_line_dof_index_for_line_orientation_table (dim == 3 ?
+                                                    this->dofs_per_line : 0),
+  system_to_base_table(this->dofs_per_cell),
+  face_system_to_base_table(this->dofs_per_face),
+  component_to_base_table (this->components,
+                           std::make_pair(std::make_pair(0U, 0U), 0U)),
+
+  // Special handling of vectors of length one: in this case, we
+  // assume that all entries were supposed to be equal
+  restriction_is_additive_flags(r_i_a_f.size() == 1
+                                ?
+                                std::vector<bool> (fe_data.dofs_per_cell, r_i_a_f[0])
+                                :
+                                r_i_a_f),
+  nonzero_components (nonzero_c.size() == 1
+                      ?
+                      std::vector<ComponentMask> (fe_data.dofs_per_cell, nonzero_c[0])
+                      :
+                      nonzero_c),
+  n_nonzero_components_table (compute_n_nonzero_components(nonzero_components))
+{
+  this->set_primitivity(std::find_if (n_nonzero_components_table.begin(),
+                                      n_nonzero_components_table.end(),
+                                      std::bind2nd(std::not_equal_to<unsigned int>(),
+                                                   1U))
+                        == n_nonzero_components_table.end());
+
+
+  Assert (restriction_is_additive_flags.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(restriction_is_additive_flags.size(),
+                               this->dofs_per_cell));
+  AssertDimension (nonzero_components.size(), this->dofs_per_cell);
+  for (unsigned int i=0; i<nonzero_components.size(); ++i)
+    {
+      Assert (nonzero_components[i].size() == this->n_components(),
+              ExcInternalError());
+      Assert (nonzero_components[i].n_selected_components ()
+              >= 1,
+              ExcInternalError());
+      Assert (n_nonzero_components_table[i] >= 1,
+              ExcInternalError());
+      Assert (n_nonzero_components_table[i] <= this->n_components(),
+              ExcInternalError());
+    }
+
+  // initialize some tables in the default way, i.e. if there is only one
+  // (vector-)component; if the element is not primitive, leave these tables
+  // empty.
+  if (this->is_primitive())
+    {
+      system_to_component_table.resize(this->dofs_per_cell);
+      face_system_to_component_table.resize(this->dofs_per_face);
+      for (unsigned int j=0 ; j<this->dofs_per_cell ; ++j)
+        system_to_component_table[j] = std::pair<unsigned,unsigned>(0,j);
+      for (unsigned int j=0 ; j<this->dofs_per_face ; ++j)
+        face_system_to_component_table[j] = std::pair<unsigned,unsigned>(0,j);
+    }
+
+  for (unsigned int j=0 ; j<this->dofs_per_cell ; ++j)
+    system_to_base_table[j] = std::make_pair(std::make_pair(0U,0U),j);
+  for (unsigned int j=0 ; j<this->dofs_per_face ; ++j)
+    face_system_to_base_table[j] = std::make_pair(std::make_pair(0U,0U),j);
+
+  // Fill with default value; may be changed by constructor of derived class.
+  base_to_block_indices.reinit(1,1);
+
+  // initialize the restriction and prolongation matrices. the default
+  // constructor of FullMatrix<dim> initializes them with size zero
+  prolongation.resize(RefinementCase<dim>::isotropic_refinement);
+  restriction.resize(RefinementCase<dim>::isotropic_refinement);
+  for (unsigned int ref=RefinementCase<dim>::cut_x;
+       ref<RefinementCase<dim>::isotropic_refinement+1; ++ref)
+    {
+      prolongation[ref-1].resize (GeometryInfo<dim>::
+                                  n_children(RefinementCase<dim>(ref)),
+                                  FullMatrix<double>());
+      restriction[ref-1].resize (GeometryInfo<dim>::
+                                 n_children(RefinementCase<dim>(ref)),
+                                 FullMatrix<double>());
+    }
+
+  adjust_quad_dof_index_for_face_orientation_table.fill(0);
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim>::~FiniteElement ()
+{}
+
+
+
+
+template <int dim, int spacedim>
+double
+FiniteElement<dim,spacedim>::shape_value (const unsigned int,
+                                          const Point<dim> &) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return 0.;
+}
+
+
+
+template <int dim, int spacedim>
+double
+FiniteElement<dim,spacedim>::shape_value_component (const unsigned int,
+                                                    const Point<dim> &,
+                                                    const unsigned int) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return 0.;
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<1,dim>
+FiniteElement<dim,spacedim>::shape_grad (const unsigned int,
+                                         const Point<dim> &) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<1,dim> ();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<1,dim>
+FiniteElement<dim,spacedim>::shape_grad_component (const unsigned int,
+                                                   const Point<dim> &,
+                                                   const unsigned int) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<1,dim> ();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<2,dim>
+FiniteElement<dim,spacedim>::shape_grad_grad (const unsigned int,
+                                              const Point<dim> &) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<2,dim> ();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<2,dim>
+FiniteElement<dim,spacedim>::shape_grad_grad_component (const unsigned int,
+                                                        const Point<dim> &,
+                                                        const unsigned int) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<2,dim> ();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<3,dim>
+FiniteElement<dim,spacedim>::shape_3rd_derivative (const unsigned int,
+                                                   const Point<dim> &) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<3,dim> ();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<3,dim>
+FiniteElement<dim,spacedim>::shape_3rd_derivative_component (const unsigned int,
+    const Point<dim> &,
+    const unsigned int) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<3,dim> ();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<4,dim>
+FiniteElement<dim,spacedim>::shape_4th_derivative (const unsigned int,
+                                                   const Point<dim> &) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<4,dim> ();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<4,dim>
+FiniteElement<dim,spacedim>::shape_4th_derivative_component (const unsigned int,
+    const Point<dim> &,
+    const unsigned int) const
+{
+  AssertThrow(false, ExcUnitShapeValuesDoNotExist());
+  return Tensor<4,dim> ();
+}
+
+
+template <int dim, int spacedim>
+void
+FiniteElement<dim,spacedim>::reinit_restriction_and_prolongation_matrices (
+  const bool isotropic_restriction_only,
+  const bool isotropic_prolongation_only)
+{
+  for (unsigned int ref_case=RefinementCase<dim>::cut_x;
+       ref_case <= RefinementCase<dim>::isotropic_refinement; ++ref_case)
+    {
+      const unsigned int nc = GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case));
+
+      for (unsigned int i=0; i<nc; ++i)
+        {
+          if (this->restriction[ref_case-1][i].m() != this->dofs_per_cell
+              &&
+              (!isotropic_restriction_only || ref_case==RefinementCase<dim>::isotropic_refinement))
+            this->restriction[ref_case-1][i].reinit (this->dofs_per_cell,
+                                                     this->dofs_per_cell);
+          if (this->prolongation[ref_case-1][i].m() != this->dofs_per_cell
+              &&
+              (!isotropic_prolongation_only || ref_case==RefinementCase<dim>::isotropic_refinement))
+            this->prolongation[ref_case-1][i].reinit (this->dofs_per_cell,
+                                                      this->dofs_per_cell);
+        }
+    }
+}
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FiniteElement<dim,spacedim>::get_restriction_matrix (const unsigned int child,
+                                                     const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Restriction matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case)),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case))));
+  // we use refinement_case-1 here. the -1 takes care of the origin of the
+  // vector, as for RefinementCase<dim>::no_refinement (=0) there is no data
+  // available and so the vector indices are shifted
+  Assert (restriction[refinement_case-1][child].n() == this->dofs_per_cell, ExcProjectionVoid());
+  return restriction[refinement_case-1][child];
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FiniteElement<dim,spacedim>::get_prolongation_matrix (const unsigned int child,
+                                                      const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Prolongation matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case)),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case))));
+  // we use refinement_case-1 here. the -1 takes care
+  // of the origin of the vector, as for
+  // RefinementCase::no_refinement (=0) there is no
+  // data available and so the vector indices
+  // are shifted
+  Assert (prolongation[refinement_case-1][child].n() == this->dofs_per_cell, ExcEmbeddingVoid());
+  return prolongation[refinement_case-1][child];
+}
+
+
+//TODO:[GK] This is probably not the most efficient way of doing this.
+template <int dim, int spacedim>
+unsigned int
+FiniteElement<dim,spacedim>::component_to_block_index (const unsigned int index) const
+{
+  Assert (index < this->n_components(),
+          ExcIndexRange(index, 0, this->n_components()));
+
+  return first_block_of_base(component_to_base_table[index].first.first)
+         + component_to_base_table[index].second;
+}
+
+
+template <int dim, int spacedim>
+ComponentMask
+FiniteElement<dim,spacedim>::
+component_mask (const FEValuesExtractors::Scalar &scalar) const
+{
+  AssertIndexRange(scalar.component, this->n_components());
+
+//TODO: it would be nice to verify that it is indeed possible
+// to select this scalar component, i.e., that it is not part
+// of a non-primitive element. unfortunately, there is no simple
+// way to write such a condition...
+
+  std::vector<bool> mask (this->n_components(), false);
+  mask[scalar.component] = true;
+  return mask;
+}
+
+
+template <int dim, int spacedim>
+ComponentMask
+FiniteElement<dim,spacedim>::
+component_mask (const FEValuesExtractors::Vector &vector) const
+{
+  AssertIndexRange(vector.first_vector_component+dim-1, this->n_components());
+
+  //TODO: it would be nice to verify that it is indeed possible
+  // to select these vector components, i.e., that they don't span
+  // beyond the beginning or end of anon-primitive element.
+  // unfortunately, there is no simple way to write such a condition...
+
+  std::vector<bool> mask (this->n_components(), false);
+  for (unsigned int c=vector.first_vector_component; c<vector.first_vector_component+dim; ++c)
+    mask[c] = true;
+  return mask;
+}
+
+
+template <int dim, int spacedim>
+ComponentMask
+FiniteElement<dim,spacedim>::
+component_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const
+{
+  AssertIndexRange((sym_tensor.first_tensor_component +
+                    SymmetricTensor<2,dim>::n_independent_components-1),
+                   this->n_components());
+
+  //TODO: it would be nice to verify that it is indeed possible
+  // to select these vector components, i.e., that they don't span
+  // beyond the beginning or end of anon-primitive element.
+  // unfortunately, there is no simple way to write such a condition...
+
+  std::vector<bool> mask (this->n_components(), false);
+  for (unsigned int c=sym_tensor.first_tensor_component;
+       c<sym_tensor.first_tensor_component+SymmetricTensor<2,dim>::n_independent_components; ++c)
+    mask[c] = true;
+  return mask;
+}
+
+
+
+template <int dim, int spacedim>
+ComponentMask
+FiniteElement<dim,spacedim>::
+component_mask (const BlockMask &block_mask) const
+{
+  // if we get a block mask that represents all blocks, then
+  // do the same for the returned component mask
+  if (block_mask.represents_the_all_selected_mask())
+    return ComponentMask();
+
+  AssertDimension(block_mask.size(), this->n_blocks());
+
+  std::vector<bool> component_mask (this->n_components(), false);
+  for (unsigned int c=0; c<this->n_components(); ++c)
+    if (block_mask[component_to_block_index(c)] == true)
+      component_mask[c] = true;
+
+  return component_mask;
+}
+
+
+
+template <int dim, int spacedim>
+BlockMask
+FiniteElement<dim,spacedim>::
+block_mask (const FEValuesExtractors::Scalar &scalar) const
+{
+  // simply create the corresponding component mask (a simpler
+  // process) and then convert it to a block mask
+  return block_mask(component_mask(scalar));
+}
+
+
+template <int dim, int spacedim>
+BlockMask
+FiniteElement<dim,spacedim>::
+block_mask (const FEValuesExtractors::Vector &vector) const
+{
+  // simply create the corresponding component mask (a simpler
+  // process) and then convert it to a block mask
+  return block_mask(component_mask(vector));
+}
+
+
+template <int dim, int spacedim>
+BlockMask
+FiniteElement<dim,spacedim>::
+block_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const
+{
+  // simply create the corresponding component mask (a simpler
+  // process) and then convert it to a block mask
+  return block_mask(component_mask(sym_tensor));
+}
+
+
+
+template <int dim, int spacedim>
+BlockMask
+FiniteElement<dim,spacedim>::
+block_mask (const ComponentMask &component_mask) const
+{
+  // if we get a component mask that represents all component, then
+  // do the same for the returned block mask
+  if (component_mask.represents_the_all_selected_mask())
+    return BlockMask();
+
+  AssertDimension(component_mask.size(), this->n_components());
+
+  // walk over all of the components
+  // of this finite element and see
+  // if we need to set the
+  // corresponding block. inside the
+  // block, walk over all the
+  // components that correspond to
+  // this block and make sure the
+  // component mask is set for all of
+  // them
+  std::vector<bool> block_mask (this->n_blocks(), false);
+  for (unsigned int c=0; c<this->n_components();)
+    {
+      const unsigned int block = component_to_block_index(c);
+      if (component_mask[c] == true)
+        block_mask[block] = true;
+
+      // now check all of the other
+      // components that correspond
+      // to this block
+      ++c;
+      while ((c<this->n_components())
+             &&
+             (component_to_block_index(c) == block))
+        {
+          Assert (component_mask[c] == block_mask[block],
+                  ExcMessage ("The component mask argument given to this function "
+                              "is not a mask where the individual components belonging "
+                              "to one block of the finite element are either all "
+                              "selected or not selected. You can't call this function "
+                              "with a component mask that splits blocks."));
+          ++c;
+        }
+    }
+
+
+  return block_mask;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+FiniteElement<dim,spacedim>::
+face_to_cell_index (const unsigned int face_index,
+                    const unsigned int face,
+                    const bool face_orientation,
+                    const bool face_flip,
+                    const bool face_rotation) const
+{
+  Assert (face_index < this->dofs_per_face,
+          ExcIndexRange(face_index, 0, this->dofs_per_face));
+  Assert (face < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange(face, 0, GeometryInfo<dim>::faces_per_cell));
+
+//TODO: we could presumably solve the 3d case below using the
+// adjust_quad_dof_index_for_face_orientation_table field. for the
+// 2d case, we can't use adjust_line_dof_index_for_line_orientation_table
+// since that array is empty (presumably because we thought that
+// there are no flipped edges in 2d, but these can happen in
+// DoFTools::make_periodicity_constraints, for example). so we
+// would need to either fill this field, or rely on derived classes
+// implementing this function, as we currently do
+
+  // see the function's documentation for an explanation of this
+  // assertion -- in essence, derived classes have to implement
+  // an overloaded version of this function if we are to use any
+  // other than standard orientation
+  if ((face_orientation != true) || (face_flip != false) || (face_rotation != false))
+    Assert ((this->dofs_per_line <= 1) && (this->dofs_per_quad <= 1),
+            ExcMessage ("The function in this base class can not handle this case. "
+                        "Rather, the derived class you are using must provide "
+                        "an overloaded version but apparently hasn't done so. See "
+                        "the documentation of this function for more information."));
+
+  // we need to distinguish between DoFs on vertices, lines and in 3d quads.
+  // do so in a sequence of if-else statements
+  if (face_index < this->first_face_line_index)
+    // DoF is on a vertex
+    {
+      // get the number of the vertex on the face that corresponds to this DoF,
+      // along with the number of the DoF on this vertex
+      const unsigned int face_vertex         = face_index / this->dofs_per_vertex;
+      const unsigned int dof_index_on_vertex = face_index % this->dofs_per_vertex;
+
+      // then get the number of this vertex on the cell and translate
+      // this to a DoF number on the cell
+      return (GeometryInfo<dim>::face_to_cell_vertices(face, face_vertex,
+                                                       face_orientation,
+                                                       face_flip,
+                                                       face_rotation)
+              * this->dofs_per_vertex
+              +
+              dof_index_on_vertex);
+    }
+  else if (face_index < this->first_face_quad_index)
+    // DoF is on a face
+    {
+      // do the same kind of translation as before. we need to only consider
+      // DoFs on the lines, i.e., ignoring those on the vertices
+      const unsigned int index = face_index - this->first_face_line_index;
+
+      const unsigned int face_line         = index / this->dofs_per_line;
+      const unsigned int dof_index_on_line = index % this->dofs_per_line;
+
+      return (this->first_line_index
+              + GeometryInfo<dim>::face_to_cell_lines(face, face_line,
+                                                      face_orientation,
+                                                      face_flip,
+                                                      face_rotation)
+              * this->dofs_per_line
+              +
+              dof_index_on_line);
+    }
+  else
+    // DoF is on a quad
+    {
+      Assert (dim >= 3, ExcInternalError());
+
+      // ignore vertex and line dofs
+      const unsigned int index = face_index - this->first_face_quad_index;
+
+      return (this->first_quad_index
+              + face * this->dofs_per_quad
+              + index);
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+unsigned int
+FiniteElement<dim,spacedim>::adjust_quad_dof_index_for_face_orientation (const unsigned int index,
+    const bool face_orientation,
+    const bool face_flip,
+    const bool face_rotation) const
+{
+  // general template for 1D and 2D: not
+  // implemented. in fact, the function
+  // shouldn't even be called unless we are
+  // in 3d, so throw an internal error
+  Assert (dim==3, ExcInternalError());
+  if (dim < 3)
+    return index;
+
+  // adjust dofs on 3d faces if the face is
+  // flipped. note that we query a table that
+  // derived elements need to have set up
+  // front. the exception are discontinuous
+  // elements for which there should be no
+  // face dofs anyway (i.e. dofs_per_quad==0
+  // in 3d), so we don't need the table, but
+  // the function should also not have been
+  // called
+  Assert (index<this->dofs_per_quad, ExcIndexRange(index,0,this->dofs_per_quad));
+  Assert (adjust_quad_dof_index_for_face_orientation_table.n_elements()==8*this->dofs_per_quad,
+          ExcInternalError());
+  return index+adjust_quad_dof_index_for_face_orientation_table(index,4*face_orientation+2*face_flip+face_rotation);
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+FiniteElement<dim,spacedim>::adjust_line_dof_index_for_line_orientation (const unsigned int index,
+    const bool line_orientation) const
+{
+  // general template for 1D and 2D: do
+  // nothing. Do not throw an Assertion,
+  // however, in order to allow to call this
+  // function in 2D as well
+  if (dim<3)
+    return index;
+
+  Assert (index<this->dofs_per_line, ExcIndexRange(index,0,this->dofs_per_line));
+  Assert (adjust_line_dof_index_for_line_orientation_table.size()==this->dofs_per_line,
+          ExcInternalError());
+  if (line_orientation)
+    return index;
+  else
+    return index+adjust_line_dof_index_for_line_orientation_table[index];
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::prolongation_is_implemented () const
+{
+  for (unsigned int ref_case=RefinementCase<dim>::cut_x;
+       ref_case<RefinementCase<dim>::isotropic_refinement+1; ++ref_case)
+    for (unsigned int c=0;
+         c<GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case)); ++c)
+      {
+        // make sure also the lazily initialized matrices are created
+        get_prolongation_matrix(c, RefinementCase<dim>(ref_case));
+        Assert ((prolongation[ref_case-1][c].m() == this->dofs_per_cell) ||
+                (prolongation[ref_case-1][c].m() == 0),
+                ExcInternalError());
+        Assert ((prolongation[ref_case-1][c].n() == this->dofs_per_cell) ||
+                (prolongation[ref_case-1][c].n() == 0),
+                ExcInternalError());
+        if ((prolongation[ref_case-1][c].m() == 0) ||
+            (prolongation[ref_case-1][c].n() == 0))
+          return false;
+      }
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::restriction_is_implemented () const
+{
+  for (unsigned int ref_case=RefinementCase<dim>::cut_x;
+       ref_case<RefinementCase<dim>::isotropic_refinement+1; ++ref_case)
+    for (unsigned int c=0;
+         c<GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case)); ++c)
+      {
+        // make sure also the lazily initialized matrices are created
+        get_restriction_matrix(c, RefinementCase<dim>(ref_case));
+        Assert ((restriction[ref_case-1][c].m() == this->dofs_per_cell) ||
+                (restriction[ref_case-1][c].m() == 0),
+                ExcInternalError());
+        Assert ((restriction[ref_case-1][c].n() == this->dofs_per_cell) ||
+                (restriction[ref_case-1][c].n() == 0),
+                ExcInternalError());
+        if ((restriction[ref_case-1][c].m() == 0) ||
+            (restriction[ref_case-1][c].n() == 0))
+          return false;
+      }
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::isotropic_prolongation_is_implemented () const
+{
+  const RefinementCase<dim> ref_case=RefinementCase<dim>::isotropic_refinement;
+
+  for (unsigned int c=0;
+       c<GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case)); ++c)
+    {
+      // make sure also the lazily initialized matrices are created
+      get_prolongation_matrix(c, RefinementCase<dim>(ref_case));
+      Assert ((prolongation[ref_case-1][c].m() == this->dofs_per_cell) ||
+              (prolongation[ref_case-1][c].m() == 0),
+              ExcInternalError());
+      Assert ((prolongation[ref_case-1][c].n() == this->dofs_per_cell) ||
+              (prolongation[ref_case-1][c].n() == 0),
+              ExcInternalError());
+      if ((prolongation[ref_case-1][c].m() == 0) ||
+          (prolongation[ref_case-1][c].n() == 0))
+        return false;
+    }
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::isotropic_restriction_is_implemented () const
+{
+  const RefinementCase<dim> ref_case = RefinementCase<dim>::isotropic_refinement;
+
+  for (unsigned int c=0;
+       c<GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case)); ++c)
+    {
+      // make sure also the lazily initialized matrices are created
+      get_restriction_matrix(c, RefinementCase<dim>(ref_case));
+      Assert ((restriction[ref_case-1][c].m() == this->dofs_per_cell) ||
+              (restriction[ref_case-1][c].m() == 0),
+              ExcInternalError());
+      Assert ((restriction[ref_case-1][c].n() == this->dofs_per_cell) ||
+              (restriction[ref_case-1][c].n() == 0),
+              ExcInternalError());
+      if ((restriction[ref_case-1][c].m() == 0) ||
+          (restriction[ref_case-1][c].n() == 0))
+        return false;
+    }
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::constraints_are_implemented (const internal::SubfaceCase<dim> &subface_case) const
+{
+  if (subface_case==internal::SubfaceCase<dim>::case_isotropic)
+    return (this->dofs_per_face  == 0) || (interface_constraints.m() != 0);
+  else
+    return false;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return false;
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FiniteElement<dim,spacedim>::constraints (const internal::SubfaceCase<dim> &subface_case) const
+{
+  (void)subface_case;
+  Assert (subface_case==internal::SubfaceCase<dim>::case_isotropic,
+          ExcMessage("Constraints for this element are only implemented "
+                     "for the case that faces are refined isotropically "
+                     "(which is always the case in 2d, and in 3d requires "
+                     "that the neighboring cell of a coarse cell presents "
+                     "exactly four children on the common face)."));
+  Assert ((this->dofs_per_face  == 0) || (interface_constraints.m() != 0),
+          ExcMessage ("The finite element for which you try to obtain "
+                      "hanging node constraints does not appear to "
+                      "implement them."));
+
+  if (dim==1)
+    Assert ((interface_constraints.m()==0) && (interface_constraints.n()==0),
+            ExcWrongInterfaceMatrixSize(interface_constraints.m(),
+                                        interface_constraints.n()));
+
+  return interface_constraints;
+}
+
+
+
+template <int dim, int spacedim>
+TableIndices<2>
+FiniteElement<dim,spacedim>::interface_constraints_size () const
+{
+  switch (dim)
+    {
+    case 1:
+      return TableIndices<2> (0U, 0U);
+    case 2:
+      return TableIndices<2> (this->dofs_per_vertex +
+                              2*this->dofs_per_line,
+                              this->dofs_per_face);
+    case 3:
+      return TableIndices<2> (5*this->dofs_per_vertex +
+                              12*this->dofs_per_line  +
+                              4*this->dofs_per_quad,
+                              this->dofs_per_face);
+    default:
+      Assert (false, ExcNotImplemented());
+    };
+  return TableIndices<2> (numbers::invalid_unsigned_int,
+                          numbers::invalid_unsigned_int);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FiniteElement<dim,spacedim>::
+get_interpolation_matrix (const FiniteElement<dim,spacedim> &,
+                          FullMatrix<double> &) const
+{
+  // by default, no interpolation
+  // implemented. so throw exception,
+  // as documentation says
+  typedef FiniteElement<dim,spacedim> FEE;
+  AssertThrow (false,
+               typename FEE::
+               ExcInterpolationNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void
+FiniteElement<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &,
+                               FullMatrix<double> &) const
+{
+  // by default, no interpolation
+  // implemented. so throw exception,
+  // as documentation says
+  typedef    FiniteElement<dim,spacedim> FEE;
+  AssertThrow (false,
+               typename FEE::
+               ExcInterpolationNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void
+FiniteElement<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &,
+                                  const unsigned int,
+                                  FullMatrix<double> &) const
+{
+  // by default, no interpolation
+  // implemented. so throw exception,
+  // as documentation says
+  typedef    FiniteElement<dim,spacedim> FEE;
+  AssertThrow (false,
+               typename FEE::ExcInterpolationNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FiniteElement<dim,spacedim>::
+hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FiniteElement<dim,spacedim>::
+hp_line_dof_identities (const FiniteElement<dim,spacedim> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FiniteElement<dim,spacedim>::
+hp_quad_dof_identities (const FiniteElement<dim,spacedim> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FiniteElement<dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::operator == (const FiniteElement<dim,spacedim> &f) const
+{
+  return ((static_cast<const FiniteElementData<dim>&>(*this) ==
+           static_cast<const FiniteElementData<dim>&>(f)) &&
+          (interface_constraints == f.interface_constraints));
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<Point<dim> > &
+FiniteElement<dim,spacedim>::get_unit_support_points () const
+{
+  // a finite element may define
+  // support points, but only if
+  // there are as many as there are
+  // degrees of freedom
+  Assert ((unit_support_points.size() == 0) ||
+          (unit_support_points.size() == this->dofs_per_cell),
+          ExcInternalError());
+  return unit_support_points;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::has_support_points () const
+{
+  return (unit_support_points.size() != 0);
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<Point<dim> > &
+FiniteElement<dim,spacedim>::get_generalized_support_points () const
+{
+  // a finite element may define
+  // support points, but only if
+  // there are as many as there are
+  // degrees of freedom
+  return ((generalized_support_points.size() == 0)
+          ? unit_support_points
+          : generalized_support_points);
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::has_generalized_support_points () const
+{
+  return (get_generalized_support_points().size() != 0);
+}
+
+
+
+template <int dim, int spacedim>
+Point<dim>
+FiniteElement<dim,spacedim>::unit_support_point (const unsigned int index) const
+{
+  Assert (index < this->dofs_per_cell,
+          ExcIndexRange (index, 0, this->dofs_per_cell));
+  Assert (unit_support_points.size() == this->dofs_per_cell,
+          ExcFEHasNoSupportPoints ());
+  return unit_support_points[index];
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<Point<dim-1> > &
+FiniteElement<dim,spacedim>::get_unit_face_support_points () const
+{
+  // a finite element may define
+  // support points, but only if
+  // there are as many as there are
+  // degrees of freedom on a face
+  Assert ((unit_face_support_points.size() == 0) ||
+          (unit_face_support_points.size() == this->dofs_per_face),
+          ExcInternalError());
+  return unit_face_support_points;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::has_face_support_points () const
+{
+  return (unit_face_support_points.size() != 0);
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<Point<dim-1> > &
+FiniteElement<dim,spacedim>::get_generalized_face_support_points () const
+{
+  // a finite element may define
+  // support points, but only if
+  // there are as many as there are
+  // degrees of freedom on a face
+  return ((generalized_face_support_points.size() == 0)
+          ? unit_face_support_points
+          : generalized_face_support_points);
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::has_generalized_face_support_points () const
+{
+  return (generalized_face_support_points.size() != 0);
+}
+
+
+
+template <int dim, int spacedim>
+Point<dim-1>
+FiniteElement<dim,spacedim>::unit_face_support_point (const unsigned int index) const
+{
+  Assert (index < this->dofs_per_face,
+          ExcIndexRange (index, 0, this->dofs_per_face));
+  Assert (unit_face_support_points.size() == this->dofs_per_face,
+          ExcFEHasNoSupportPoints ());
+  return unit_face_support_points[index];
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FiniteElement<dim,spacedim>::has_support_on_face (
+  const unsigned int,
+  const unsigned int) const
+{
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FiniteElement<dim,spacedim>::get_constant_modes () const
+{
+  Assert (false, ExcNotImplemented());
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (Table<2,bool>(this->n_components(), this->dofs_per_cell),
+          std::vector<unsigned int>(this->n_components()));
+}
+
+
+
+template <int dim, int spacedim>
+void
+FiniteElement<dim,spacedim>::interpolate(
+  std::vector<double>       &local_dofs,
+  const std::vector<double> &values) const
+{
+  Assert (has_support_points(), ExcFEHasNoSupportPoints());
+  Assert (values.size() == unit_support_points.size(),
+          ExcDimensionMismatch(values.size(), unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (this->n_components() == 1,
+          ExcDimensionMismatch(this->n_components(), 1));
+
+  std::copy(values.begin(), values.end(), local_dofs.begin());
+}
+
+
+
+
+template <int dim, int spacedim>
+void
+FiniteElement<dim,spacedim>::interpolate(
+  std::vector<double>    &local_dofs,
+  const std::vector<Vector<double> > &values,
+  unsigned int offset) const
+{
+  Assert (has_support_points(), ExcFEHasNoSupportPoints());
+  Assert (values.size() == unit_support_points.size(),
+          ExcDimensionMismatch(values.size(), unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values[0].size() >= offset+this->n_components(),
+          ExcDimensionMismatch(values[0].size(),offset+this->n_components()));
+
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    {
+      const std::pair<unsigned int, unsigned int> index
+        = this->system_to_component_index(i);
+      local_dofs[i] = values[i](offset+index.first);
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+void
+FiniteElement<dim,spacedim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  Assert (has_support_points(), ExcFEHasNoSupportPoints());
+  Assert (values[0].size() == unit_support_points.size(),
+          ExcDimensionMismatch(values.size(), unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values.size() == this->n_components(),
+          ExcDimensionMismatch(values.size(), this->n_components()));
+
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    {
+      const std::pair<unsigned int, unsigned int> index
+        = this->system_to_component_index(i);
+      local_dofs[i] = values[index.first][i];
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FiniteElement<dim,spacedim>::memory_consumption () const
+{
+  return (sizeof(FiniteElementData<dim>) +
+          MemoryConsumption::memory_consumption (restriction)+
+          MemoryConsumption::memory_consumption (prolongation) +
+          MemoryConsumption::memory_consumption (interface_constraints) +
+          MemoryConsumption::memory_consumption (system_to_component_table) +
+          MemoryConsumption::memory_consumption (face_system_to_component_table) +
+          MemoryConsumption::memory_consumption (system_to_base_table) +
+          MemoryConsumption::memory_consumption (face_system_to_base_table) +
+          MemoryConsumption::memory_consumption (component_to_base_table) +
+          MemoryConsumption::memory_consumption (restriction_is_additive_flags) +
+          MemoryConsumption::memory_consumption (nonzero_components) +
+          MemoryConsumption::memory_consumption (n_nonzero_components_table));
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FiniteElement<dim,spacedim>::compute_n_nonzero_components (
+  const std::vector<ComponentMask> &nonzero_components)
+{
+  std::vector<unsigned int> retval (nonzero_components.size());
+  for (unsigned int i=0; i<nonzero_components.size(); ++i)
+    retval[i] = nonzero_components[i].n_selected_components();
+  return retval;
+}
+
+
+
+/*------------------------------- FiniteElement ----------------------*/
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase *
+FiniteElement<dim,spacedim>::get_face_data (const UpdateFlags       flags,
+                                            const Mapping<dim,spacedim>      &mapping,
+                                            const Quadrature<dim-1> &quadrature,
+                                            dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  return get_data (flags, mapping,
+                   QProjector<dim>::project_to_all_faces(quadrature),
+                   output_data);
+}
+
+
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase *
+FiniteElement<dim,spacedim>::get_subface_data (const UpdateFlags        flags,
+                                               const Mapping<dim,spacedim>      &mapping,
+                                               const Quadrature<dim-1> &quadrature,
+                                               dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  return get_data (flags, mapping,
+                   QProjector<dim>::project_to_all_subfaces(quadrature),
+                   output_data);
+}
+
+
+
+template <int dim, int spacedim>
+const FiniteElement<dim,spacedim> &
+FiniteElement<dim,spacedim>::base_element(const unsigned int index) const
+{
+  (void)index;
+  Assert (index==0, ExcIndexRange(index,0,1));
+  // This function should not be
+  // called for a system element
+  Assert (base_to_block_indices.size() == 1, ExcInternalError());
+  return *this;
+}
+
+
+
+/*------------------------------- Explicit Instantiations -------------*/
+#include "fe.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe.inst.in b/source/fe/fe.inst.in
new file mode 100644
index 0000000..24d2c1c
--- /dev/null
+++ b/source/fe/fe.inst.in
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class FiniteElement<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/fe/fe_abf.cc b/source/fe/fe_abf.cc
new file mode 100644
index 0000000..32b7046
--- /dev/null
+++ b/source/fe/fe_abf.cc
@@ -0,0 +1,657 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/table.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_abf.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+
+#include <sstream>
+#include <iostream>
+
+//TODO: implement the adjust_quad_dof_index_for_face_orientation_table and
+//adjust_line_dof_index_for_line_orientation_table fields, and write tests
+//similar to bits/face_orientation_and_fe_q_*
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+FE_ABF<dim>::FE_ABF (const unsigned int deg)
+  :
+  FE_PolyTensor<PolynomialsABF<dim>, dim> (
+    deg,
+    FiniteElementData<dim>(get_dpo_vector(deg),
+                           dim, deg+1, FiniteElementData<dim>::Hdiv),
+    std::vector<bool>(PolynomialsABF<dim>::compute_n_pols(deg), true),
+    std::vector<ComponentMask>(PolynomialsABF<dim>::compute_n_pols(deg),
+                               std::vector<bool>(dim,true))),
+  rt_order(deg)
+{
+  Assert (dim >= 2, ExcImpossibleInDim(dim));
+  const unsigned int n_dofs = this->dofs_per_cell;
+
+  this->mapping_type = mapping_raviart_thomas;
+  // First, initialize the
+  // generalized support points and
+  // quadrature weights, since they
+  // are required for interpolation.
+  initialize_support_points(deg);
+  // Now compute the inverse node
+  //matrix, generating the correct
+  //basis functions from the raw
+  //ones.
+  FullMatrix<double> M(n_dofs, n_dofs);
+  FETools::compute_node_matrix(M, *this);
+
+  this->inverse_node_matrix.reinit(n_dofs, n_dofs);
+  this->inverse_node_matrix.invert(M);
+  // From now on, the shape functions
+  // will be the correct ones, not
+  // the raw shape functions anymore.
+
+  // Reinit the vectors of
+  // restriction and prolongation
+  // matrices to the right sizes.
+  // Restriction only for isotropic
+  // refinement
+  this->reinit_restriction_and_prolongation_matrices(true);
+  // Fill prolongation matrices with embedding operators
+  FETools::compute_embedding_matrices (*this, this->prolongation);
+
+  initialize_restriction ();
+
+  // TODO[TL]: for anisotropic refinement we will probably need a table of submatrices with an array for each refine case
+  std::vector<FullMatrix<double> >
+  face_embeddings(1<<(dim-1), FullMatrix<double>(this->dofs_per_face,
+                                                 this->dofs_per_face));
+  // TODO: Something goes wrong there. The error of the least squares fit
+  // is to large ...
+  // FETools::compute_face_embedding_matrices(*this, &face_embeddings[0], 0, 0);
+  this->interface_constraints.reinit((1<<(dim-1)) * this->dofs_per_face,
+                                     this->dofs_per_face);
+  unsigned int target_row=0;
+  for (unsigned int d=0; d<face_embeddings.size(); ++d)
+    for (unsigned int i=0; i<face_embeddings[d].m(); ++i)
+      {
+        for (unsigned int j=0; j<face_embeddings[d].n(); ++j)
+          this->interface_constraints(target_row,j) = face_embeddings[d](i,j);
+        ++target_row;
+      }
+}
+
+
+
+template <int dim>
+std::string
+FE_ABF<dim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+
+  namebuf << "FE_ABF<" << dim << ">(" << rt_order << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int dim>
+FiniteElement<dim> *
+FE_ABF<dim>::clone() const
+{
+  return new FE_ABF<dim>(rt_order);
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary and internal functions
+//---------------------------------------------------------------------------
+
+
+
+// Version for 2d and higher. See above for 1d version
+template <int dim>
+void
+FE_ABF<dim>::initialize_support_points (const unsigned int deg)
+{
+  QGauss<dim> cell_quadrature(deg+2);
+  const unsigned int n_interior_points = cell_quadrature.size();
+
+  unsigned int n_face_points = (dim>1) ? 1 : 0;
+  // compute (deg+1)^(dim-1)
+  for (unsigned int d=1; d<dim; ++d)
+    n_face_points *= deg+1;
+
+  this->generalized_support_points.resize (GeometryInfo<dim>::faces_per_cell*n_face_points
+                                           + n_interior_points);
+  this->generalized_face_support_points.resize (n_face_points);
+
+
+  // These might be required when the faces contribution is computed
+  // Therefore they will be initialised at this point.
+  std::vector<AnisotropicPolynomials<dim>* > polynomials_abf(dim);
+
+  // Generate x_1^{i} x_2^{r+1} ...
+  for (unsigned int dd=0; dd<dim; ++dd)
+    {
+      std::vector<std::vector<Polynomials::Polynomial<double> > > poly(dim);
+      for (unsigned int d=0; d<dim; ++d)
+        poly[d].push_back (Polynomials::Monomial<double> (deg+1));
+      poly[dd] = Polynomials::Monomial<double>::generate_complete_basis(deg);
+
+      polynomials_abf[dd] = new AnisotropicPolynomials<dim>(poly);
+    }
+
+  // Number of the point being entered
+  unsigned int current = 0;
+
+  if (dim>1)
+    {
+      QGauss<dim-1> face_points (deg+1);
+      TensorProductPolynomials<dim-1> legendre =
+        Polynomials::Legendre::generate_complete_basis(deg);
+
+      boundary_weights.reinit(n_face_points, legendre.n());
+
+//       Assert (face_points.size() == this->dofs_per_face,
+//            ExcInternalError());
+
+      for (unsigned int k=0; k<n_face_points; ++k)
+        {
+          this->generalized_face_support_points[k] = face_points.point(k);
+          // Compute its quadrature
+          // contribution for each
+          // moment.
+          for (unsigned int i=0; i<legendre.n(); ++i)
+            {
+              boundary_weights(k, i)
+                = face_points.weight(k)
+                  * legendre.compute_value(i, face_points.point(k));
+            }
+        }
+
+      Quadrature<dim> faces = QProjector<dim>::project_to_all_faces(face_points);
+      for (; current<GeometryInfo<dim>::faces_per_cell*n_face_points;
+           ++current)
+        {
+          // Enter the support point
+          // into the vector
+          this->generalized_support_points[current] = faces.point(current);
+        }
+
+
+      // Now initialise edge interior weights for the ABF elements.
+      // These are completely independent from the usual edge moments. They
+      // stem from applying the Gauss theorem to the nodal values, which
+      // was necessary to cast the ABF elements into the deal.II framework
+      // for vector valued elements.
+      boundary_weights_abf.reinit(faces.size(), polynomials_abf[0]->n() * dim);
+      for (unsigned int k=0; k < faces.size(); ++k)
+        {
+          for (unsigned int i=0; i<polynomials_abf[0]->n() * dim; ++i)
+            {
+              boundary_weights_abf(k,i) = polynomials_abf[i%dim]->
+                                          compute_value(i / dim, faces.point(k)) * faces.weight(k);
+            }
+        }
+    }
+
+  // Create Legendre basis for the
+  // space D_xi Q_k
+  if (deg>0)
+    {
+      std::vector<AnisotropicPolynomials<dim>* > polynomials(dim);
+
+      for (unsigned int dd=0; dd<dim; ++dd)
+        {
+          std::vector<std::vector<Polynomials::Polynomial<double> > > poly(dim);
+          for (unsigned int d=0; d<dim; ++d)
+            poly[d] = Polynomials::Legendre::generate_complete_basis(deg);
+          poly[dd] = Polynomials::Legendre::generate_complete_basis(deg-1);
+
+          polynomials[dd] = new AnisotropicPolynomials<dim>(poly);
+        }
+
+      interior_weights.reinit(TableIndices<3>(n_interior_points, polynomials[0]->n(), dim));
+
+      for (unsigned int k=0; k<cell_quadrature.size(); ++k)
+        {
+          for (unsigned int i=0; i<polynomials[0]->n(); ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              interior_weights(k,i,d) = cell_quadrature.weight(k)
+                                        * polynomials[d]->compute_value(i,cell_quadrature.point(k));
+        }
+
+      for (unsigned int d=0; d<dim; ++d)
+        delete polynomials[d];
+    }
+
+
+  // Decouple the creation of the generalized support points
+  // from computation of interior weights.
+  for (unsigned int k=0; k<cell_quadrature.size(); ++k)
+    this->generalized_support_points[current++] = cell_quadrature.point(k);
+
+  // Additional functionality for the ABF elements
+  // TODO: Here the canonical extension of the principle
+  // behind the ABF elements is implemented. It is unclear,
+  // if this really leads to the ABF spaces in 3D!
+  interior_weights_abf.reinit(TableIndices<3>(cell_quadrature.size(),
+                                              polynomials_abf[0]->n() * dim, dim));
+  Tensor<1, dim> poly_grad;
+
+  for (unsigned int k=0; k<cell_quadrature.size(); ++k)
+    {
+      for (unsigned int i=0; i<polynomials_abf[0]->n() * dim; ++i)
+        {
+          poly_grad = polynomials_abf[i%dim]->compute_grad(i / dim,cell_quadrature.point(k))
+                      * cell_quadrature.weight(k);
+          // The minus sign comes from the use of the Gauss theorem to replace the divergence.
+          for (unsigned int d=0; d<dim; ++d)
+            interior_weights_abf(k,i,d) = -poly_grad[d];
+        }
+    }
+
+  for (unsigned int d=0; d<dim; ++d)
+    delete polynomials_abf[d];
+
+  Assert (current == this->generalized_support_points.size(),
+          ExcInternalError());
+}
+
+
+
+// This function is the same Raviart-Thomas interpolation performed by
+// interpolate. Still, we cannot use interpolate, since it was written
+// for smooth functions. The functions interpolated here are not
+// smooth, maybe even not continuous. Therefore, we must double the
+// number of quadrature points in each direction in order to integrate
+// only smooth functions.
+
+// Then again, the interpolated function is chosen such that the
+// moments coincide with the function to be interpolated.
+
+template <int dim>
+void
+FE_ABF<dim>::initialize_restriction()
+{
+  if (dim==1)
+    {
+      unsigned int iso=RefinementCase<dim>::isotropic_refinement-1;
+      for (unsigned int i=0; i<GeometryInfo<dim>::max_children_per_cell; ++i)
+        this->restriction[iso][i].reinit(0,0);
+      return;
+    }
+  unsigned int iso=RefinementCase<dim>::isotropic_refinement-1;
+  QGauss<dim-1> q_base (rt_order+1);
+  const unsigned int n_face_points = q_base.size();
+  // First, compute interpolation on
+  // subfaces
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    {
+      // The shape functions of the
+      // child cell are evaluated
+      // in the quadrature points
+      // of a full face.
+      Quadrature<dim> q_face
+        = QProjector<dim>::project_to_face(q_base, face);
+      // Store shape values, since the
+      // evaluation suffers if not
+      // ordered by point
+      Table<2,double> cached_values(this->dofs_per_cell, q_face.size());
+      for (unsigned int k=0; k<q_face.size(); ++k)
+        for (unsigned int i = 0; i < this->dofs_per_cell; ++i)
+          cached_values(i,k)
+            = this->shape_value_component(i, q_face.point(k),
+                                          GeometryInfo<dim>::unit_normal_direction[face]);
+
+      for (unsigned int sub=0; sub<GeometryInfo<dim>::max_children_per_face; ++sub)
+        {
+          // The weight functions for
+          // the coarse face are
+          // evaluated on the subface
+          // only.
+          Quadrature<dim> q_sub
+            = QProjector<dim>::project_to_subface(q_base, face, sub);
+          const unsigned int child
+            = GeometryInfo<dim>::child_cell_on_face(
+                RefinementCase<dim>::isotropic_refinement, face, sub);
+
+          // On a certain face, we must
+          // compute the moments of ALL
+          // fine level functions with
+          // the coarse level weight
+          // functions belonging to
+          // that face. Due to the
+          // orthogonalization process
+          // when building the shape
+          // functions, these weights
+          // are equal to the
+          // corresponding shape
+          // functions.
+          for (unsigned int k=0; k<n_face_points; ++k)
+            for (unsigned int i_child = 0; i_child < this->dofs_per_cell; ++i_child)
+              for (unsigned int i_face = 0; i_face < this->dofs_per_face; ++i_face)
+                {
+                  // The quadrature
+                  // weights on the
+                  // subcell are NOT
+                  // transformed, so we
+                  // have to do it here.
+                  this->restriction[iso][child](face*this->dofs_per_face+i_face,
+                                                i_child)
+                  += Utilities::fixed_power<dim-1>(.5) * q_sub.weight(k)
+                     * cached_values(i_child, k)
+                     * this->shape_value_component(face*this->dofs_per_face+i_face,
+                                                   q_sub.point(k),
+                                                   GeometryInfo<dim>::unit_normal_direction[face]);
+                }
+        }
+    }
+
+  if (rt_order==0) return;
+
+  // Create Legendre basis for the
+  // space D_xi Q_k. Here, we cannot
+  // use the shape functions
+  std::vector<AnisotropicPolynomials<dim>* > polynomials(dim);
+  for (unsigned int dd=0; dd<dim; ++dd)
+    {
+      std::vector<std::vector<Polynomials::Polynomial<double> > > poly(dim);
+      for (unsigned int d=0; d<dim; ++d)
+        poly[d] = Polynomials::Legendre::generate_complete_basis(rt_order);
+      poly[dd] = Polynomials::Legendre::generate_complete_basis(rt_order-1);
+
+      polynomials[dd] = new AnisotropicPolynomials<dim>(poly);
+    }
+
+  QGauss<dim> q_cell(rt_order+1);
+  const unsigned int start_cell_dofs
+    = GeometryInfo<dim>::faces_per_cell*this->dofs_per_face;
+
+  // Store shape values, since the
+  // evaluation suffers if not
+  // ordered by point
+  Table<3,double> cached_values(this->dofs_per_cell, q_cell.size(), dim);
+  for (unsigned int k=0; k<q_cell.size(); ++k)
+    for (unsigned int i = 0; i < this->dofs_per_cell; ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        cached_values(i,k,d) = this->shape_value_component(i, q_cell.point(k), d);
+
+  for (unsigned int child=0; child<GeometryInfo<dim>::max_children_per_cell; ++child)
+    {
+      Quadrature<dim> q_sub = QProjector<dim>::project_to_child(q_cell, child);
+
+      for (unsigned int k=0; k<q_sub.size(); ++k)
+        for (unsigned int i_child = 0; i_child < this->dofs_per_cell; ++i_child)
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int i_weight=0; i_weight<polynomials[d]->n(); ++i_weight)
+              {
+                this->restriction[iso][child](start_cell_dofs+i_weight*dim+d,
+                                              i_child)
+                += q_sub.weight(k)
+                   * cached_values(i_child, k, d)
+                   * polynomials[d]->compute_value(i_weight, q_sub.point(k));
+              }
+    }
+
+  for (unsigned int d=0; d<dim; ++d)
+    delete polynomials[d];
+}
+
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_ABF<dim>::get_dpo_vector (const unsigned int rt_order)
+{
+  if (dim == 1)
+    {
+      Assert (false, ExcImpossibleInDim(1));
+      return std::vector<unsigned int>();
+    }
+
+  // the element is face-based (not
+  // to be confused with George
+  // W. Bush's Faith Based
+  // Initiative...), and we have
+  // (rt_order+1)^(dim-1) DoFs per face
+  unsigned int dofs_per_face = 1;
+  for (int d=0; d<dim-1; ++d)
+    dofs_per_face *= rt_order+1;
+
+  // and then there are interior dofs
+  const unsigned int
+  interior_dofs = dim*(rt_order+1)*dofs_per_face;
+
+  std::vector<unsigned int> dpo(dim+1);
+  dpo[dim-1] = dofs_per_face;
+  dpo[dim]   = interior_dofs;
+
+  return dpo;
+}
+
+//---------------------------------------------------------------------------
+// Data field initialization
+//---------------------------------------------------------------------------
+
+template <int dim>
+bool
+FE_ABF<dim>::has_support_on_face (const unsigned int shape_index,
+                                  const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  // Return computed values if we
+  // know them easily. Otherwise, it
+  // is always safe to return true.
+  switch (rt_order)
+    {
+    case 0:
+    {
+      switch (dim)
+        {
+        case 2:
+        {
+          // only on the one
+          // non-adjacent face
+          // are the values
+          // actually zero. list
+          // these in a table
+          return (face_index != GeometryInfo<dim>::opposite_face[shape_index]);
+        }
+
+        default:
+          return true;
+        };
+    };
+
+    default:  // other rt_order
+      return true;
+    };
+
+  return true;
+}
+
+
+
+template <int dim>
+void
+FE_ABF<dim>::interpolate(
+  std::vector<double> &,
+  const std::vector<double> &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+
+template <int dim>
+void
+FE_ABF<dim>::interpolate(
+  std::vector<double>    &local_dofs,
+  const std::vector<Vector<double> > &values,
+  unsigned int offset) const
+{
+  Assert (values.size() == this->generalized_support_points.size(),
+          ExcDimensionMismatch(values.size(), this->generalized_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values[0].size() >= offset+this->n_components(),
+          ExcDimensionMismatch(values[0].size(),offset+this->n_components()));
+
+  std::fill(local_dofs.begin(), local_dofs.end(), 0.);
+
+  const unsigned int n_face_points = boundary_weights.size(0);
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    for (unsigned int k=0; k<n_face_points; ++k)
+      for (unsigned int i=0; i<boundary_weights.size(1); ++i)
+        {
+          local_dofs[i+face*this->dofs_per_face] += boundary_weights(k,i)
+                                                    * values[face*n_face_points+k](GeometryInfo<dim>::unit_normal_direction[face]+offset);
+        }
+
+  const unsigned int start_cell_dofs = GeometryInfo<dim>::faces_per_cell*this->dofs_per_face;
+  const unsigned int start_cell_points = GeometryInfo<dim>::faces_per_cell*n_face_points;
+
+  for (unsigned int k=0; k<interior_weights.size(0); ++k)
+    for (unsigned int i=0; i<interior_weights.size(1); ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        local_dofs[start_cell_dofs+i*dim+d] += interior_weights(k,i,d) * values[k+start_cell_points](d+offset);
+
+  const unsigned int start_abf_dofs = start_cell_dofs + interior_weights.size(1) * dim;
+
+  // Cell integral of ABF terms
+  for (unsigned int k=0; k<interior_weights_abf.size(0); ++k)
+    for (unsigned int i=0; i<interior_weights_abf.size(1); ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        local_dofs[start_abf_dofs+i] += interior_weights_abf(k,i,d) * values[k+start_cell_points](d+offset);
+
+  // Face integral of ABF terms
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    {
+      double n_orient = (double) GeometryInfo<dim>::unit_normal_orientation[face];
+      for (unsigned int fp=0; fp < n_face_points; ++fp)
+        {
+          // TODO: Check what the face_orientation, face_flip and face_rotation  have to be in 3D
+          unsigned int k = QProjector<dim>::DataSetDescriptor::face (face, false, false, false, n_face_points);
+          for (unsigned int i=0; i<boundary_weights_abf.size(1); ++i)
+            local_dofs[start_abf_dofs+i] += n_orient * boundary_weights_abf(k + fp, i)
+                                            * values[k + fp](GeometryInfo<dim>::unit_normal_direction[face]+offset);
+        }
+    }
+
+  // TODO: Check if this "correction" can be removed.
+  for (unsigned int i=0; i<boundary_weights_abf.size(1); ++i)
+    if (std::fabs (local_dofs[start_abf_dofs+i]) < 1.0e-16)
+      local_dofs[start_abf_dofs+i] = 0.0;
+}
+
+template <int dim>
+void
+FE_ABF<dim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  Assert (values.size() == this->n_components(),
+          ExcDimensionMismatch(values.size(), this->n_components()));
+  Assert (values[0].size() == this->generalized_support_points.size(),
+          ExcDimensionMismatch(values[0].size(), this->generalized_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+
+  std::fill(local_dofs.begin(), local_dofs.end(), 0.);
+
+  const unsigned int n_face_points = boundary_weights.size(0);
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    for (unsigned int k=0; k<n_face_points; ++k)
+      for (unsigned int i=0; i<boundary_weights.size(1); ++i)
+        {
+          local_dofs[i+face*this->dofs_per_face] += boundary_weights(k,i)
+                                                    * values[GeometryInfo<dim>::unit_normal_direction[face]][face*n_face_points+k];
+        }
+
+  const unsigned int start_cell_dofs = GeometryInfo<dim>::faces_per_cell*this->dofs_per_face;
+  const unsigned int start_cell_points = GeometryInfo<dim>::faces_per_cell*n_face_points;
+
+  for (unsigned int k=0; k<interior_weights.size(0); ++k)
+    for (unsigned int i=0; i<interior_weights.size(1); ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        local_dofs[start_cell_dofs+i*dim+d] += interior_weights(k,i,d) * values[d][k+start_cell_points];
+
+  const unsigned int start_abf_dofs = start_cell_dofs + interior_weights.size(1) * dim;
+
+  // Cell integral of ABF terms
+  for (unsigned int k=0; k<interior_weights_abf.size(0); ++k)
+    for (unsigned int i=0; i<interior_weights_abf.size(1); ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        local_dofs[start_abf_dofs+i] += interior_weights_abf(k,i,d) * values[d][k+start_cell_points];
+
+  // Face integral of ABF terms
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    {
+      double n_orient = (double) GeometryInfo<dim>::unit_normal_orientation[face];
+      for (unsigned int fp=0; fp < n_face_points; ++fp)
+        {
+          // TODO: Check what the face_orientation, face_flip and face_rotation have to be in 3D
+          unsigned int k = QProjector<dim>::DataSetDescriptor::face (face, false, false, false, n_face_points);
+          for (unsigned int i=0; i<boundary_weights_abf.size(1); ++i)
+            local_dofs[start_abf_dofs+i] += n_orient * boundary_weights_abf(k + fp, i)
+                                            * values[GeometryInfo<dim>::unit_normal_direction[face]][k + fp];
+        }
+    }
+
+  // TODO: Check if this "correction" can be removed.
+  for (unsigned int i=0; i<boundary_weights_abf.size(1); ++i)
+    if (std::fabs (local_dofs[start_abf_dofs+i]) < 1.0e-16)
+      local_dofs[start_abf_dofs+i] = 0.0;
+}
+
+
+template <int dim>
+std::size_t
+FE_ABF<dim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "fe_abf.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_abf.inst.in b/source/fe/fe_abf.inst.in
new file mode 100644
index 0000000..2010ba1
--- /dev/null
+++ b/source/fe/fe_abf.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_ABF<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_bdm.cc b/source/fe/fe_bdm.cc
new file mode 100644
index 0000000..b51b641
--- /dev/null
+++ b/source/fe/fe_bdm.cc
@@ -0,0 +1,392 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/polynomials_p.h>
+#include <deal.II/base/table.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_bdm.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+
+#include <iostream>
+#include <sstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim>
+FE_BDM<dim>::FE_BDM (const unsigned int deg)
+  :
+  FE_PolyTensor<PolynomialsBDM<dim>, dim> (
+    deg,
+    FiniteElementData<dim>(get_dpo_vector(deg),
+                           dim, deg+1, FiniteElementData<dim>::Hdiv),
+    get_ria_vector (deg),
+    std::vector<ComponentMask>(PolynomialsBDM<dim>::compute_n_pols(deg),
+                               std::vector<bool>(dim,true)))
+{
+  Assert (dim >= 2, ExcImpossibleInDim(dim));
+  Assert (deg > 0, ExcMessage("Lowest order BDM element are degree 1, but you asked for degree 0"));
+
+  const unsigned int n_dofs = this->dofs_per_cell;
+
+  this->mapping_type = mapping_bdm;
+  // These must be done first, since
+  // they change the evaluation of
+  // basis functions
+
+  // Set up the generalized support
+  // points
+  initialize_support_points (deg);
+  //Now compute the inverse node
+  //matrix, generating the correct
+  //basis functions from the raw
+  //ones.
+
+  // We use an auxiliary matrix in
+  // this function. Therefore,
+  // inverse_node_matrix is still
+  // empty and shape_value_component
+  // returns the 'raw' shape values.
+  FullMatrix<double> M(n_dofs, n_dofs);
+  FETools::compute_node_matrix(M, *this);
+
+//   std::cout << std::endl;
+//   M.print_formatted(std::cout, 2, true);
+
+  this->inverse_node_matrix.reinit(n_dofs, n_dofs);
+  this->inverse_node_matrix.invert(M);
+  // From now on, the shape functions
+  // will be the correct ones, not
+  // the raw shape functions anymore.
+
+  // Embedding errors become pretty large, so we just replace the
+  // regular threshold in both "computing_..." functions by 1.
+  this->reinit_restriction_and_prolongation_matrices(true, true);
+  FETools::compute_embedding_matrices (*this, this->prolongation, true, 1.);
+
+  FullMatrix<double> face_embeddings[GeometryInfo<dim>::max_children_per_face];
+  for (unsigned int i=0; i<GeometryInfo<dim>::max_children_per_face; ++i)
+    face_embeddings[i].reinit (this->dofs_per_face, this->dofs_per_face);
+  FETools::compute_face_embedding_matrices(*this, face_embeddings, 0, 0, 1.);
+  this->interface_constraints.reinit((1<<(dim-1)) * this->dofs_per_face,
+                                     this->dofs_per_face);
+  unsigned int target_row=0;
+  for (unsigned int d=0; d<GeometryInfo<dim>::max_children_per_face; ++d)
+    for (unsigned int i=0; i<face_embeddings[d].m(); ++i)
+      {
+        for (unsigned int j=0; j<face_embeddings[d].n(); ++j)
+          this->interface_constraints(target_row,j) = face_embeddings[d](i,j);
+        ++target_row;
+      }
+}
+
+
+
+template <int dim>
+std::string
+FE_BDM<dim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  // note that this->degree is the maximal
+  // polynomial degree and is thus one higher
+  // than the argument given to the
+  // constructor
+  std::ostringstream namebuf;
+  namebuf << "FE_BDM<" << dim << ">(" << this->degree-1 << ")";
+
+  return namebuf.str();
+}
+
+
+template <int dim>
+FiniteElement<dim> *
+FE_BDM<dim>::clone() const
+{
+  return new FE_BDM<dim>(*this);
+}
+
+
+
+template <int dim>
+void
+FE_BDM<dim>::interpolate(
+  std::vector<double> &,
+  const std::vector<double> &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template <int dim>
+void
+FE_BDM<dim>::interpolate(
+  std::vector<double> &,
+  const std::vector<Vector<double> > &,
+  unsigned int) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+
+template <int dim>
+void
+FE_BDM<dim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  AssertDimension (values.size(), dim);
+  Assert (values[0].size() == this->generalized_support_points.size(),
+          ExcDimensionMismatch(values.size(), this->generalized_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+
+  // First do interpolation on faces. There, the component evaluated
+  // depends on the face direction and orientation.
+
+  // The index of the first dof on this face or the cell
+  unsigned int dbase = 0;
+  // The index of the first generalized support point on this face or the cell
+  unsigned int pbase = 0;
+  for (unsigned int f = 0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+    {
+      // Old version with no moments in 2D. See comment below in
+      // initialize_support_points()
+      if (test_values_face.size() == 0)
+        {
+          for (unsigned int i=0; i<this->dofs_per_face; ++i)
+            local_dofs[dbase+i] = values[GeometryInfo<dim>::unit_normal_direction[f]][pbase+i];
+          pbase += this->dofs_per_face;
+        }
+      else
+        {
+          for (unsigned int i=0; i<this->dofs_per_face; ++i)
+            {
+              double s = 0.;
+              for (unsigned int k=0; k<test_values_face.size(); ++k)
+                s += values[GeometryInfo<dim>::unit_normal_direction[f]][pbase+k] * test_values_face[k][i];
+              local_dofs[dbase+i] = s;
+            }
+          pbase += test_values_face.size();
+        }
+      dbase += this->dofs_per_face;
+    }
+
+  AssertDimension (dbase, this->dofs_per_face * GeometryInfo<dim>::faces_per_cell);
+  AssertDimension (pbase, this->generalized_support_points.size() - test_values_cell.size());
+
+  // Done for BDM1
+  if (dbase == this->dofs_per_cell) return;
+
+  // What's missing are the interior
+  // degrees of freedom. In each
+  // point, we take all components of
+  // the solution.
+  Assert ((this->dofs_per_cell - dbase) % dim == 0, ExcInternalError());
+
+  for (unsigned int d=0; d<dim; ++d, dbase += test_values_cell[0].size())
+    {
+      for (unsigned int i=0; i<test_values_cell[0].size(); ++i)
+        {
+          double s = 0.;
+          for (unsigned int k=0; k<test_values_cell.size(); ++k)
+            s += values[d][pbase+k] * test_values_cell[k][i];
+          local_dofs[dbase+i] = s;
+        }
+    }
+
+  Assert (dbase == this->dofs_per_cell, ExcInternalError());
+}
+
+
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_BDM<dim>::get_dpo_vector (const unsigned int deg)
+{
+  // the element is face-based and we have as many degrees of freedom
+  // on the faces as there are polynomials of degree up to
+  // deg. Observe the odd convention of
+  // PolynomialSpace::compute_n_pols()!
+  unsigned int dofs_per_face = PolynomialSpace<dim-1>::compute_n_pols(deg+1);
+
+  // and then there are interior dofs, namely the number of
+  // polynomials up to degree deg-2 in dim dimensions.
+  unsigned int interior_dofs = 0;
+  if (deg>1)
+    interior_dofs = dim * PolynomialSpace<dim>::compute_n_pols(deg-1);
+
+  std::vector<unsigned int> dpo(dim+1);
+  dpo[dim-1] = dofs_per_face;
+  dpo[dim]   = interior_dofs;
+
+  return dpo;
+}
+
+
+
+template <int dim>
+std::vector<bool>
+FE_BDM<dim>::get_ria_vector (const unsigned int deg)
+{
+  if (dim==1)
+    {
+      Assert (false, ExcImpossibleInDim(1));
+      return std::vector<bool>();
+    }
+
+  const unsigned int dofs_per_cell = PolynomialsBDM<dim>::compute_n_pols(deg);
+  const unsigned int dofs_per_face = PolynomialSpace<dim-1>::compute_n_pols(deg);
+
+  Assert(GeometryInfo<dim>::faces_per_cell*dofs_per_face < dofs_per_cell,
+         ExcInternalError());
+
+  // all dofs need to be
+  // non-additive, since they have
+  // continuity requirements.
+  // however, the interior dofs are
+  // made additive
+  std::vector<bool> ret_val(dofs_per_cell,false);
+  for (unsigned int i=GeometryInfo<dim>::faces_per_cell*dofs_per_face;
+       i < dofs_per_cell; ++i)
+    ret_val[i] = true;
+
+  return ret_val;
+}
+
+
+namespace
+{
+  // This function sets up the values of the polynomials we want to
+  // take moments with in the quadrature points. In fact, we multiply
+  // thos by the weights, such that the sum of function values and
+  // test_values over quadrature points yields the interpolated degree
+  // of freedom.
+  template <int dim>
+  void
+  initialize_test_values (std::vector<std::vector<double> > &test_values,
+                          const Quadrature<dim> &quadrature,
+                          const unsigned int deg)
+  {
+    PolynomialsP<dim> poly(deg);
+    std::vector<Tensor<1,dim> > dummy1;
+    std::vector<Tensor<2,dim> > dummy2;
+    std::vector<Tensor<3,dim> > dummy3;
+    std::vector<Tensor<4,dim> > dummy4;
+
+    test_values.resize(quadrature.size());
+
+    for (unsigned int k=0; k<quadrature.size(); ++k)
+      {
+        test_values[k].resize(poly.n());
+        poly.compute(quadrature.point(k), test_values[k], dummy1, dummy2,
+                     dummy3, dummy4);
+        for (unsigned int i=0; i < poly.n(); ++i)
+          {
+            test_values[k][i] *= quadrature.weight(k);
+          }
+      }
+  }
+
+  // This specialization only serves to avoid error messages. Nothing
+  // useful can be computed in dimension zero and thus the vector
+  // length stays zero.
+  template <>
+  void
+  initialize_test_values (std::vector<std::vector<double> > &,
+                          const Quadrature<0> &,
+                          const unsigned int)
+  {}
+}
+
+
+template <int dim>
+void
+FE_BDM<dim>::initialize_support_points (const unsigned int deg)
+{
+  // Our support points are quadrature points on faces and inside the
+  // cell. First on the faces, we have to test polynomials of degree
+  // up to deg, which means we need dg+1 points in each direction. The
+  // fact that we do not have tensor product polynomials will be
+  // considered later. In 2D, we can use point values.
+  QGauss<dim-1> face_points (deg+1);
+
+  // Copy the quadrature formula to the face points.
+  this->generalized_face_support_points.resize (face_points.size());
+  for (unsigned int k=0; k<face_points.size(); ++k)
+    this->generalized_face_support_points[k] = face_points.point(k);
+
+  // In the interior, we only test with polynomials of degree up to
+  // deg-2, thus we use deg points. Note that deg>=1 and the lowest
+  // order element has no points in the cell, such that we have to
+  // distinguish this case.
+  QGauss<dim> cell_points(deg==1 ? 0 : deg);
+
+  // Compute the size of the whole support point set
+  const unsigned int npoints
+    = cell_points.size() + GeometryInfo<dim>::faces_per_cell * face_points.size();
+
+  this->generalized_support_points.resize (npoints);
+
+  Quadrature<dim> faces = QProjector<dim>::project_to_all_faces(face_points);
+  for (unsigned int k=0; k < face_points.size()*GeometryInfo<dim>::faces_per_cell; ++k)
+    this->generalized_support_points[k]
+      = faces.point(k+QProjector<dim>
+                    ::DataSetDescriptor::face(0, true, false, false,
+                                              this->dofs_per_face));
+
+  // Currently, for backward compatibility, we do not use moments, but
+  // point values on faces in 2D. In 3D, this is impossible, since the
+  // moments are only taken with respect to PolynomialsP.
+  if (dim>2)
+    initialize_test_values(test_values_face, face_points, deg);
+
+  if (deg<=1) return;
+
+  // Remember where interior points start
+  const unsigned int ibase = face_points.size()*GeometryInfo<dim>::faces_per_cell;
+  for (unsigned int k=0; k<cell_points.size(); ++k)
+    {
+      this->generalized_support_points[ibase+k] = cell_points.point(k);
+    }
+  // Finally, compute the values of
+  // the test functions in the
+  // interior quadrature points
+
+  initialize_test_values(test_values_cell, cell_points, deg-2);
+}
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "fe_bdm.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/fe/fe_bdm.inst.in b/source/fe/fe_bdm.inst.in
new file mode 100644
index 0000000..411ef0d
--- /dev/null
+++ b/source/fe/fe_bdm.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_BDM<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_bernstein.cc b/source/fe/fe_bernstein.cc
new file mode 100644
index 0000000..fdba3c3
--- /dev/null
+++ b/source/fe/fe_bernstein.cc
@@ -0,0 +1,312 @@
+// ---------------------------------------------------------------------
+// $Id: fe_q.cc 30037 2013-07-18 16:55:40Z maier $
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe_bernstein.h>
+#include <deal.II/base/polynomials_bernstein.h>
+
+#include <vector>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <int dim, int spacedim>
+FE_Bernstein<dim,spacedim>::FE_Bernstein (const unsigned int degree)
+  :
+  FE_Q_Base<TensorProductPolynomials<dim>, dim, spacedim> (
+    this->renumber_bases(degree),
+    FiniteElementData<dim>(this->get_dpo_vector(degree),
+                           1, degree,
+                           FiniteElementData<dim>::H1),
+    std::vector<bool> (1, false))
+{}
+
+
+template <int dim, int spacedim>
+void
+FE_Bernstein<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  Assert (dim > 1, ExcImpossibleInDim(1));
+  get_subface_interpolation_matrix (source_fe, numbers::invalid_unsigned_int,
+                                    interpolation_matrix);
+}
+
+
+template <int dim, int spacedim>
+void
+FE_Bernstein<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int        subface,
+                                  FullMatrix<double>       &interpolation_matrix) const
+{
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // see if source is a Bernstein element
+  if (const FE_Bernstein<dim,spacedim> *source_fe
+      = dynamic_cast<const FE_Bernstein<dim,spacedim> *>(&x_source_fe))
+    {
+      // have this test in here since a table of size 2x0 reports its size as
+      // 0x0
+      Assert (interpolation_matrix.n() == this->dofs_per_face,
+              ExcDimensionMismatch (interpolation_matrix.n(),
+                                    this->dofs_per_face));
+
+      // Make sure that the element for which the DoFs should be constrained
+      // is the one with the higher polynomial degree.  Actually the procedure
+      // will work also if this assertion is not satisfied. But the matrices
+      // produced in that case might lead to problems in the hp procedures,
+      // which use this method.
+      Assert (this->dofs_per_face <= source_fe->dofs_per_face,
+              (typename FiniteElement<dim,spacedim>::
+               ExcInterpolationNotImplemented ()));
+
+      const Quadrature<dim-1>
+      quad_face_support(FE_Q<dim,spacedim>(source_fe->degree).get_unit_face_support_points ());
+
+      // Rule of thumb for FP accuracy, that can be expected for a given
+      // polynomial degree.  This value is used to cut off values close to
+      // zero.
+      double eps = 2e-13 * std::max(this->degree, source_fe->degree) * (dim-1);
+
+      // compute the interpolation matrix by simply taking the value at the
+      // support points.
+//TODO: Verify that all faces are the same with respect to
+// these support points. Furthermore, check if something has to
+// be done for the face orientation flag in 3D.
+      const Quadrature<dim> subface_quadrature
+        = subface == numbers::invalid_unsigned_int
+          ?
+          QProjector<dim>::project_to_face (quad_face_support, 0)
+          :
+          QProjector<dim>::project_to_subface (quad_face_support, 0, subface);
+
+      for (unsigned int i=0; i<source_fe->dofs_per_face; ++i)
+        {
+          const Point<dim> &p = subface_quadrature.point (i);
+          for (unsigned int j=0; j<this->dofs_per_face; ++j)
+            {
+              double matrix_entry = this->shape_value (this->face_to_cell_index(j, 0), p);
+
+              // Correct the interpolated value. I.e. if it is close to 1 or
+              // 0, make it exactly 1 or 0. Unfortunately, this is required to
+              // avoid problems with higher order elements.
+              if (std::fabs (matrix_entry - 1.0) < eps)
+                matrix_entry = 1.0;
+              if (std::fabs (matrix_entry) < eps)
+                matrix_entry = 0.0;
+
+              interpolation_matrix(i,j) = matrix_entry;
+            }
+        }
+
+      // make sure that the row sum of each of the matrices is 1 at this
+      // point. this must be so since the shape functions sum up to 1
+      for (unsigned int j=0; j<source_fe->dofs_per_face; ++j)
+        {
+          double sum = 0.;
+
+          for (unsigned int i=0; i<this->dofs_per_face; ++i)
+            sum += interpolation_matrix(j,i);
+
+          Assert (std::fabs(sum-1) < eps, ExcInternalError());
+        }
+    }
+  else if (dynamic_cast<const FE_Nothing<dim> *>(&x_source_fe) != 0)
+    {
+      // nothing to do here, the FE_Nothing has no degrees of freedom anyway
+    }
+  else
+    AssertThrow (false,(typename FiniteElement<dim,spacedim>::
+                        ExcInterpolationNotImplemented()));
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_Bernstein<dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Bernstein<dim,spacedim>::hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // we can presently only compute these identities if both FEs are FE_Bernsteins
+  // or if the other one is an FE_Nothing. in the first case, there should be
+  // exactly one single DoF of each FE at a vertex, and they should have
+  // identical value
+  if (dynamic_cast<const FE_Bernstein<dim,spacedim>*>(&fe_other) != 0)
+    {
+      return
+        std::vector<std::pair<unsigned int, unsigned int> >
+        (1, std::make_pair (0U, 0U));
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe_other) != 0)
+    {
+      // the FE_Nothing has no degrees of freedom, so there are no
+      // equivalencies to be recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else if (fe_other.dofs_per_face == 0)
+    {
+      // if the other element has no elements on faces at all,
+      // then it would be impossible to enforce any kind of
+      // continuity even if we knew exactly what kind of element
+      // we have -- simply because the other element declares
+      // that it is discontinuous because it has no DoFs on
+      // its faces. in that case, just state that we have no
+      // constraints to declare
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Bernstein<dim,spacedim>::hp_line_dof_identities (const FiniteElement<dim,spacedim> &) const
+{
+  // Since this fe is not interpolatory but on the vertices, we can
+  // not identify dofs on lines and on quads even if there are dofs
+  // on lines and on quads.
+  //
+  // we also have nothing to say about interpolation to other finite
+  // elements. consequently, we never have anything to say at all
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Bernstein<dim,spacedim>::hp_quad_dof_identities (const FiniteElement<dim,spacedim> &) const
+{
+  // Since this fe is not interpolatory but on the vertices, we can
+  // not identify dofs on lines and on quads even if there are dofs
+  // on lines and on quads.
+  //
+  // we also have nothing to say about interpolation to other finite
+  // elements. consequently, we never have anything to say at all
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_Bernstein<dim,spacedim>::compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  if (const FE_Bernstein<dim,spacedim> *fe_b_other
+      = dynamic_cast<const FE_Bernstein<dim,spacedim>*>(&fe_other))
+    {
+      if (this->degree < fe_b_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_b_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+template <int dim, int spacedim>
+std::string
+FE_Bernstein<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_Bernstein<" << dim << ">(" << this->degree << ")";
+  return namebuf.str();
+}
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_Bernstein<dim,spacedim>::clone() const
+{
+  return new FE_Bernstein<dim,spacedim>(*this);
+}
+
+
+/**
+ * Only the assertion differs from the same function in FE_Q_Base!!
+ */
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_Bernstein<dim,spacedim>::get_dpo_vector(const unsigned int deg)
+{
+  AssertThrow(deg>0,ExcMessage("FE_Bernstein needs to be of degree > 0."));
+  std::vector<unsigned int> dpo(dim+1, 1U);
+  for (unsigned int i=1; i<dpo.size(); ++i)
+    dpo[i]=dpo[i-1]*(deg-1);
+  return dpo;
+}
+
+
+template <int dim, int spacedim>
+TensorProductPolynomials<dim>
+FE_Bernstein<dim, spacedim>::renumber_bases(const unsigned int deg)
+{
+  TensorProductPolynomials<dim> tpp(dealii::generate_complete_bernstein_basis<double>(deg));
+  std::vector<unsigned int> renumber(Utilities::fixed_power<dim>(deg+1));
+  const FiniteElementData<dim> fe(this->get_dpo_vector(deg),1,
+                                  deg);
+  FETools::hierarchic_to_lexicographic_numbering (fe, renumber);
+  tpp.set_numbering(renumber);
+  return tpp;
+}
+
+
+// explicit instantiations
+#include "fe_bernstein.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_bernstein.inst.in b/source/fe/fe_bernstein.inst.in
new file mode 100644
index 0000000..cf1e237
--- /dev/null
+++ b/source/fe/fe_bernstein.inst.in
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+   {
+ #if deal_II_dimension <= deal_II_space_dimension 	
+     template class FE_Bernstein<deal_II_dimension, deal_II_space_dimension>;
+ #endif
+   }
diff --git a/source/fe/fe_data.cc b/source/fe/fe_data.cc
new file mode 100644
index 0000000..7eae662
--- /dev/null
+++ b/source/fe/fe_data.cc
@@ -0,0 +1,88 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/fe/fe.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim>
+FiniteElementData<dim>::
+FiniteElementData (const std::vector<unsigned int> &dofs_per_object,
+                   const unsigned int               n_components,
+                   const unsigned int               degree,
+                   const Conformity                 conformity,
+                   const BlockIndices              &block_indices)
+  :
+  dofs_per_vertex(dofs_per_object[0]),
+  dofs_per_line(dofs_per_object[1]),
+  dofs_per_quad(dim>1? dofs_per_object[2]:0),
+  dofs_per_hex(dim>2? dofs_per_object[3]:0),
+  first_line_index(GeometryInfo<dim>::vertices_per_cell
+                   * dofs_per_vertex),
+  first_quad_index(first_line_index+
+                   GeometryInfo<dim>::lines_per_cell
+                   * dofs_per_line),
+  first_hex_index(first_quad_index+
+                  GeometryInfo<dim>::quads_per_cell
+                  * dofs_per_quad),
+  first_face_line_index(GeometryInfo<dim-1>::vertices_per_cell
+                        * dofs_per_vertex),
+  first_face_quad_index((dim==3 ?
+                         GeometryInfo<dim-1>::vertices_per_cell
+                         * dofs_per_vertex :
+                         GeometryInfo<dim>::vertices_per_cell
+                         * dofs_per_vertex) +
+                        GeometryInfo<dim-1>::lines_per_cell
+                        * dofs_per_line),
+  dofs_per_face(GeometryInfo<dim>::vertices_per_face * dofs_per_vertex +
+                GeometryInfo<dim>::lines_per_face * dofs_per_line +
+                GeometryInfo<dim>::quads_per_face *dofs_per_quad),
+  dofs_per_cell (GeometryInfo<dim>::vertices_per_cell * dofs_per_vertex +
+                 GeometryInfo<dim>::lines_per_cell * dofs_per_line +
+                 GeometryInfo<dim>::quads_per_cell * dofs_per_quad +
+                 GeometryInfo<dim>::hexes_per_cell *dofs_per_hex),
+  components(n_components),
+  degree(degree),
+  conforming_space(conformity),
+  block_indices_data(block_indices.size() == 0
+                     ?
+                     BlockIndices(1, dofs_per_cell)
+                     :
+                     block_indices)
+{
+  Assert(dofs_per_object.size()==dim+1, ExcDimensionMismatch(dofs_per_object.size()-1,dim));
+}
+
+
+
+template<int dim>
+bool FiniteElementData<dim>::operator== (const FiniteElementData<dim> &f) const
+{
+  return ((dofs_per_vertex == f.dofs_per_vertex) &&
+          (dofs_per_line == f.dofs_per_line) &&
+          (dofs_per_quad == f.dofs_per_quad) &&
+          (dofs_per_hex == f.dofs_per_hex) &&
+          (components == f.components) &&
+          (degree == f.degree) &&
+          (conforming_space == f.conforming_space));
+}
+
+
+template class FiniteElementData<1>;
+template class FiniteElementData<2>;
+template class FiniteElementData<3>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_dg_vector.cc b/source/fe/fe_dg_vector.cc
new file mode 100644
index 0000000..e657e5b
--- /dev/null
+++ b/source/fe/fe_dg_vector.cc
@@ -0,0 +1,106 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_dg_vector.templates.h>
+#include <deal.II/base/polynomials_abf.h>
+#include <deal.II/base/polynomials_bdm.h>
+#include <deal.II/base/polynomials_nedelec.h>
+#include <deal.II/base/polynomials_raviart_thomas.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim>
+FE_DGNedelec<dim, spacedim>::FE_DGNedelec (const unsigned int p)
+  : FE_DGVector<PolynomialsNedelec<dim>, dim, spacedim>(p, mapping_nedelec)
+{}
+
+
+template <int dim, int spacedim>
+std::string
+FE_DGNedelec<dim, spacedim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_DGNedelec<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree-1 << ")";
+
+  return namebuf.str();
+}
+
+
+template <int dim, int spacedim>
+FE_DGRaviartThomas<dim, spacedim>::FE_DGRaviartThomas (const unsigned int p)
+  : FE_DGVector<PolynomialsRaviartThomas<dim>, dim, spacedim>(p, mapping_raviart_thomas)
+{}
+
+
+template <int dim, int spacedim>
+std::string
+FE_DGRaviartThomas<dim, spacedim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_DGRaviartThomas<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree-1 << ")";
+
+  return namebuf.str();
+}
+
+
+template <int dim, int spacedim>
+FE_DGBDM<dim, spacedim>::FE_DGBDM (const unsigned int p)
+  : FE_DGVector<PolynomialsBDM<dim>, dim, spacedim>(p, mapping_bdm)
+{}
+
+
+template <int dim, int spacedim>
+std::string
+FE_DGBDM<dim, spacedim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_DGBDM<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree-1 << ")";
+
+  return namebuf.str();
+}
+
+
+#include "fe_dg_vector.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/fe/fe_dg_vector.inst.in b/source/fe/fe_dg_vector.inst.in
new file mode 100644
index 0000000..2a81f94
--- /dev/null
+++ b/source/fe/fe_dg_vector.inst.in
@@ -0,0 +1,29 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_DGVector<PolynomialsABF<deal_II_dimension>, deal_II_dimension>;
+    template class FE_DGVector<PolynomialsBDM<deal_II_dimension>, deal_II_dimension>;
+    template class FE_DGVector<PolynomialsNedelec<deal_II_dimension>, deal_II_dimension>;
+    template class FE_DGVector<PolynomialsRaviartThomas<deal_II_dimension>, deal_II_dimension>;
+    
+    template class FE_DGNedelec<deal_II_dimension>;
+    template class FE_DGRaviartThomas<deal_II_dimension>;
+    template class FE_DGBDM<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_dgp.cc b/source/fe/fe_dgp.cc
new file mode 100644
index 0000000..62d55ad
--- /dev/null
+++ b/source/fe/fe_dgp.cc
@@ -0,0 +1,269 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_dgp.h>
+#include <deal.II/fe/fe_tools.h>
+
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim>
+FE_DGP<dim,spacedim>::FE_DGP (const unsigned int degree)
+  :
+  FE_Poly<PolynomialSpace<dim>, dim, spacedim> (
+    PolynomialSpace<dim>(Polynomials::Legendre::generate_complete_basis(degree)),
+    FiniteElementData<dim>(get_dpo_vector(degree), 1, degree, FiniteElementData<dim>::L2),
+    std::vector<bool>(FiniteElementData<dim>(get_dpo_vector(degree), 1, degree).dofs_per_cell,true),
+    std::vector<ComponentMask>(FiniteElementData<dim>(
+                                 get_dpo_vector(degree), 1, degree).dofs_per_cell, std::vector<bool>(1,true)))
+{
+  // Reinit the vectors of restriction and prolongation matrices to the right
+  // sizes
+  this->reinit_restriction_and_prolongation_matrices();
+  // Fill prolongation matrices with embedding operators
+  if (dim == spacedim)
+    {
+      FETools::compute_embedding_matrices (*this, this->prolongation);
+      // Fill restriction matrices with L2-projection
+      FETools::compute_projection_matrices (*this, this->restriction);
+    }
+}
+
+
+template <int dim, int spacedim>
+std::string
+FE_DGP<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in sync
+
+  std::ostringstream namebuf;
+  namebuf << "FE_DGP<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_DGP<dim,spacedim>::clone() const
+{
+  return new FE_DGP<dim,spacedim>(*this);
+}
+
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_DGP<dim,spacedim>::get_dpo_vector (const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 0U);
+  dpo[dim] = deg+1;
+  for (unsigned int i=1; i<dim; ++i)
+    {
+      dpo[dim] *= deg+1+i;
+      dpo[dim] /= i+1;
+    }
+  return dpo;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGP<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the source FE is also a DGP element. in that
+  // case, both elements have no dofs on their faces and the face
+  // interpolation matrix is necessarily empty -- i.e. there isn't much we
+  // need to do here.
+  (void)interpolation_matrix;
+  typedef FiniteElement<dim,spacedim> FE;
+  typedef FE_DGP<dim,spacedim> FEDGP;
+  AssertThrow ((x_source_fe.get_name().find ("FE_DGP<") == 0)
+               ||
+               (dynamic_cast<const FEDGP *>(&x_source_fe) != 0),
+               typename FE::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                0));
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGP<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int ,
+                                  FullMatrix<double>           &interpolation_matrix) const
+{
+  // this is only implemented, if the source FE is also a DGP element. in that
+  // case, both elements have no dofs on their faces and the face
+  // interpolation matrix is necessarily empty -- i.e. there isn't much we
+  // need to do here.
+  (void)interpolation_matrix;
+  typedef FiniteElement<dim,spacedim> FE;
+  typedef FE_DGP<dim,spacedim> FEDGP;
+  AssertThrow ((x_source_fe.get_name().find ("FE_DGP<") == 0)
+               ||
+               (dynamic_cast<const FEDGP *>(&x_source_fe) != 0),
+               typename FE::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                0));
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_DGP<dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGP<dim,spacedim>::
+hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // there are no such constraints for DGP elements at all
+  if (dynamic_cast<const FE_DGP<dim,spacedim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGP<dim,spacedim>::
+hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // there are no such constraints for DGP elements at all
+  if (dynamic_cast<const FE_DGP<dim,spacedim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGP<dim,spacedim>::
+hp_quad_dof_identities (const FiniteElement<dim,spacedim>        &fe_other) const
+{
+  // there are no such constraints for DGP elements at all
+  if (dynamic_cast<const FE_DGP<dim,spacedim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_DGP<dim,spacedim>::compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // check whether both are discontinuous elements, see the description of
+  // FiniteElementDomination::Domination
+  if (dynamic_cast<const FE_DGP<dim,spacedim>*>(&fe_other) != 0)
+    return FiniteElementDomination::no_requirements;
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_DGP<dim,spacedim>::has_support_on_face (const unsigned int,
+                                           const unsigned int) const
+{
+  // all shape functions have support on all faces
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_DGP<dim,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  constant_modes(0,0) = true;
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1, 0));
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FE_DGP<dim,spacedim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+
+// explicit instantiations
+#include "fe_dgp.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_dgp.inst.in b/source/fe/fe_dgp.inst.in
new file mode 100644
index 0000000..e41e49e
--- /dev/null
+++ b/source/fe/fe_dgp.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class FE_DGP<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
+
diff --git a/source/fe/fe_dgp_monomial.cc b/source/fe/fe_dgp_monomial.cc
new file mode 100644
index 0000000..9ad2899
--- /dev/null
+++ b/source/fe/fe_dgp_monomial.cc
@@ -0,0 +1,458 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_dgp_monomial.h>
+#include <deal.II/fe/fe_tools.h>
+
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+// namespace for some functions that are used in this file.
+namespace
+{
+  // storage of hand-chosen support
+  // points
+  //
+  // For dim=2, dofs_per_cell of
+  // FE_DGPMonomial(k) is given by
+  // 0.5(k+1)(k+2), i.e.
+  //
+  // k    0  1  2  3  4  5  6  7
+  // dofs 1  3  6 10 15 21 28 36
+  //
+  // indirect access of unit points:
+  // the points for degree k are
+  // located at
+  //
+  // points[start_index[k]..start_index[k+1]-1]
+  const unsigned int start_index2d[6]= {0,1,4,10,20,35};
+  const double points2d[35][2]=
+  {
+    {0,0},
+    {0,0},{1,0},{0,1},
+    {0,0},{1,0},{0,1},{1,1},{0.5,0},{0,0.5},
+    {0,0},{1,0},{0,1},{1,1},{1./3.,0},{2./3.,0},{0,1./3.},{0,2./3.},{0.5,1},{1,0.5},
+    {0,0},{1,0},{0,1},{1,1},{0.25,0},{0.5,0},{0.75,0},{0,0.25},{0,0.5},{0,0.75},{1./3.,1},{2./3.,1},{1,1./3.},{1,2./3.},{0.5,0.5}
+  };
+
+  // For dim=3, dofs_per_cell of
+  // FE_DGPMonomial(k) is given by
+  // 1./6.(k+1)(k+2)(k+3), i.e.
+  //
+  // k    0  1  2  3  4  5  6   7
+  // dofs 1  4 10 20 35 56 84 120
+  const unsigned int start_index3d[6]= {0,1,5,15/*,35*/};
+  const double points3d[35][3]=
+  {
+    {0,0,0},
+    {0,0,0},{1,0,0},{0,1,0},{0,0,1},
+    {0,0,0},{1,0,0},{0,1,0},{0,0,1},{0.5,0,0},{0,0.5,0},{0,0,0.5},{1,1,0},{1,0,1},{0,1,1}
+  };
+
+
+  template<int dim>
+  void generate_unit_points (const unsigned int,
+                             std::vector<Point<dim> > &);
+
+  template <>
+  void generate_unit_points (const unsigned int k,
+                             std::vector<Point<1> > &p)
+  {
+    Assert(p.size()==k+1, ExcDimensionMismatch(p.size(), k+1));
+    const double h = 1./k;
+    for (unsigned int i=0; i<p.size(); ++i)
+      p[i](0)=i*h;
+  }
+
+  template <>
+  void generate_unit_points (const unsigned int k,
+                             std::vector<Point<2> > &p)
+  {
+    Assert(k<=4, ExcNotImplemented());
+    Assert(p.size()==start_index2d[k+1]-start_index2d[k], ExcInternalError());
+    for (unsigned int i=0; i<p.size(); ++i)
+      {
+        p[i](0)=points2d[start_index2d[k]+i][0];
+        p[i](1)=points2d[start_index2d[k]+i][1];
+      }
+  }
+
+  template <>
+  void generate_unit_points (const unsigned int k,
+                             std::vector<Point<3> > &p)
+  {
+    Assert(k<=2, ExcNotImplemented());
+    Assert(p.size()==start_index3d[k+1]-start_index3d[k], ExcInternalError());
+    for (unsigned int i=0; i<p.size(); ++i)
+      {
+        p[i](0)=points3d[start_index3d[k]+i][0];
+        p[i](1)=points3d[start_index3d[k]+i][1];
+        p[i](2)=points3d[start_index3d[k]+i][2];
+      }
+  }
+}
+
+
+
+template <int dim>
+FE_DGPMonomial<dim>::FE_DGPMonomial (const unsigned int degree)
+  :
+  FE_Poly<PolynomialsP<dim>, dim> (
+    PolynomialsP<dim>(degree),
+    FiniteElementData<dim>(get_dpo_vector(degree), 1, degree, FiniteElementData<dim>::L2),
+    std::vector<bool>(FiniteElementData<dim>(get_dpo_vector(degree), 1, degree).dofs_per_cell,true),
+    std::vector<ComponentMask>(FiniteElementData<dim>(
+                                 get_dpo_vector(degree), 1, degree).dofs_per_cell, std::vector<bool>(1,true)))
+{
+  Assert(this->poly_space.n()==this->dofs_per_cell, ExcInternalError());
+  Assert(this->poly_space.degree()==this->degree, ExcInternalError());
+
+  // DG doesn't have constraints, so
+  // leave them empty
+
+  // Reinit the vectors of
+  // restriction and prolongation
+  // matrices to the right sizes
+  this->reinit_restriction_and_prolongation_matrices();
+  // Fill prolongation matrices with embedding operators
+  FETools::compute_embedding_matrices (*this, this->prolongation);
+  // Fill restriction matrices with L2-projection
+  FETools::compute_projection_matrices (*this, this->restriction);
+}
+
+
+
+template <int dim>
+std::string
+FE_DGPMonomial<dim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_DGPMonomial<" << dim << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int dim>
+FiniteElement<dim> *
+FE_DGPMonomial<dim>::clone() const
+{
+  return new FE_DGPMonomial<dim>(*this);
+}
+
+
+
+//TODO: Remove this function and use the one in FETools, if needed
+template <int dim>
+void
+FE_DGPMonomial<dim>::
+get_interpolation_matrix (const FiniteElement<dim> &source_fe,
+                          FullMatrix<double>           &interpolation_matrix) const
+{
+  const FE_DGPMonomial<dim> *source_dgp_monomial
+    = dynamic_cast<const FE_DGPMonomial<dim> *>(&source_fe);
+
+  if (source_dgp_monomial)
+    {
+      // ok, source_fe is a DGP_Monomial
+      // element. Then, the interpolation
+      // matrix is simple
+      const unsigned int m=interpolation_matrix.m();
+      const unsigned int n=interpolation_matrix.n();
+      (void)m;
+      (void)n;
+      Assert (m == this->dofs_per_cell, ExcDimensionMismatch (m, this->dofs_per_cell));
+      Assert (n == source_dgp_monomial->dofs_per_cell,
+              ExcDimensionMismatch (n, source_dgp_monomial->dofs_per_cell));
+
+      const unsigned int min_mn=
+        interpolation_matrix.m()<interpolation_matrix.n() ?
+        interpolation_matrix.m() : interpolation_matrix.n();
+
+      for (unsigned int i=0; i<min_mn; ++i)
+        interpolation_matrix(i,i)=1.;
+    }
+  else
+    {
+      std::vector<Point<dim> > unit_points(this->dofs_per_cell);
+      generate_unit_points(this->degree, unit_points);
+
+      FullMatrix<double> source_fe_matrix(unit_points.size(), source_fe.dofs_per_cell);
+      for (unsigned int j=0; j<source_fe.dofs_per_cell; ++j)
+        for (unsigned int k=0; k<unit_points.size(); ++k)
+          source_fe_matrix(k,j)=source_fe.shape_value(j, unit_points[k]);
+
+      FullMatrix<double> this_matrix(this->dofs_per_cell, this->dofs_per_cell);
+      for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+        for (unsigned int k=0; k<unit_points.size(); ++k)
+          this_matrix(k,j)=this->poly_space.compute_value (j, unit_points[k]);
+
+      this_matrix.gauss_jordan();
+
+      this_matrix.mmult(interpolation_matrix, source_fe_matrix);
+    }
+}
+
+
+
+template <int dim>
+void
+FE_DGPMonomial<dim>::initialize_restriction ()
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_DGPMonomial<dim>::get_dpo_vector (const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 0U);
+  dpo[dim] = deg+1;
+  for (unsigned int i=1; i<dim; ++i)
+    {
+      dpo[dim] *= deg+1+i;
+      dpo[dim] /= i+1;
+    }
+  return dpo;
+}
+
+
+template <int dim>
+void
+FE_DGPMonomial<dim>::
+get_face_interpolation_matrix (const FiniteElement<dim> &x_source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the source
+  // FE is also a DGPMonomial element. in that case,
+  // both elements have no dofs on their
+  // faces and the face interpolation matrix
+  // is necessarily empty -- i.e. there isn't
+  // much we need to do here.
+  (void)interpolation_matrix;
+  AssertThrow ((x_source_fe.get_name().find ("FE_DGPMonomial<") == 0)
+               ||
+               (dynamic_cast<const FE_DGPMonomial<dim>*>(&x_source_fe) != 0),
+               typename FiniteElement<dim>::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                0));
+}
+
+
+
+template <int dim>
+void
+FE_DGPMonomial<dim>::
+get_subface_interpolation_matrix (const FiniteElement<dim> &x_source_fe,
+                                  const unsigned int ,
+                                  FullMatrix<double>           &interpolation_matrix) const
+{
+  // this is only implemented, if the source
+  // FE is also a DGPMonomial element. in that case,
+  // both elements have no dofs on their
+  // faces and the face interpolation matrix
+  // is necessarily empty -- i.e. there isn't
+  // much we need to do here.
+  (void)interpolation_matrix;
+  AssertThrow ((x_source_fe.get_name().find ("FE_DGPMonomial<") == 0)
+               ||
+               (dynamic_cast<const FE_DGPMonomial<dim>*>(&x_source_fe) != 0),
+               typename FiniteElement<dim>::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                0));
+}
+
+
+
+template <int dim>
+bool
+FE_DGPMonomial<dim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGPMonomial<dim>::
+hp_vertex_dof_identities (const FiniteElement<dim> &fe_other) const
+{
+  // there are no such constraints for DGPMonomial
+  // elements at all
+  if (dynamic_cast<const FE_DGPMonomial<dim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGPMonomial<dim>::
+hp_line_dof_identities (const FiniteElement<dim> &fe_other) const
+{
+  // there are no such constraints for DGPMonomial
+  // elements at all
+  if (dynamic_cast<const FE_DGPMonomial<dim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGPMonomial<dim>::
+hp_quad_dof_identities (const FiniteElement<dim>        &fe_other) const
+{
+  // there are no such constraints for DGPMonomial
+  // elements at all
+  if (dynamic_cast<const FE_DGPMonomial<dim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim>
+FiniteElementDomination::Domination
+FE_DGPMonomial<dim>::
+compare_for_face_domination (const FiniteElement<dim> &fe_other) const
+{
+  // check whether both are discontinuous
+  // elements, see
+  // the description of
+  // FiniteElementDomination::Domination
+  if (dynamic_cast<const FE_DGPMonomial<dim>*>(&fe_other) != 0)
+    return FiniteElementDomination::no_requirements;
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <>
+bool
+FE_DGPMonomial<1>::has_support_on_face (const unsigned int,
+                                        const unsigned int face_index) const
+{
+  return face_index==1 || (face_index==0 && this->degree==0);
+}
+
+
+
+template <>
+bool
+FE_DGPMonomial<2>::has_support_on_face (const unsigned int shape_index,
+                                        const unsigned int face_index) const
+{
+  bool support_on_face=false;
+  if (face_index==1 || face_index==2)
+    support_on_face=true;
+  else
+    {
+      unsigned int degrees[2];
+      this->poly_space.directional_degrees(shape_index, degrees);
+      if ((face_index==0 && degrees[1]==0) ||
+          (face_index==3 && degrees[0]==0))
+        support_on_face=true;
+    }
+  return support_on_face;
+}
+
+
+
+template <>
+bool
+FE_DGPMonomial<3>::has_support_on_face (const unsigned int shape_index,
+                                        const unsigned int face_index) const
+{
+  bool support_on_face=false;
+  if (face_index==1 || face_index==3 || face_index==4)
+    support_on_face=true;
+  else
+    {
+      unsigned int degrees[3];
+      this->poly_space.directional_degrees(shape_index, degrees);
+      if ((face_index==0 && degrees[1]==0) ||
+          (face_index==2 && degrees[2]==0) ||
+          (face_index==5 && degrees[0]==0))
+        support_on_face=true;
+    }
+  return support_on_face;
+}
+
+
+
+template <int dim>
+std::size_t
+FE_DGPMonomial<dim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+
+// explicit instantiations
+#include "fe_dgp_monomial.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_dgp_monomial.inst.in b/source/fe/fe_dgp_monomial.inst.in
new file mode 100644
index 0000000..79de6ae
--- /dev/null
+++ b/source/fe/fe_dgp_monomial.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_DGPMonomial<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_dgp_nonparametric.cc b/source/fe/fe_dgp_nonparametric.cc
new file mode 100644
index 0000000..584ed1c
--- /dev/null
+++ b/source/fe/fe_dgp_nonparametric.cc
@@ -0,0 +1,597 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_dgp_nonparametric.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim>
+FE_DGPNonparametric<dim,spacedim>::FE_DGPNonparametric (const unsigned int degree)
+  :
+  FiniteElement<dim,spacedim> (
+    FiniteElementData<dim>(get_dpo_vector(degree), 1, degree,
+                           FiniteElementData<dim>::L2),
+    std::vector<bool>(
+      FiniteElementData<dim>(get_dpo_vector(degree), 1, degree).dofs_per_cell,true),
+    std::vector<ComponentMask>(
+      FiniteElementData<dim>(get_dpo_vector(degree),1, degree).dofs_per_cell,
+      std::vector<bool>(1,true))),
+  degree(degree),
+  polynomial_space (Polynomials::Legendre::generate_complete_basis(degree))
+{
+  const unsigned int n_dofs = this->dofs_per_cell;
+  for (unsigned int ref_case = RefinementCase<dim>::cut_x;
+       ref_case<RefinementCase<dim>::isotropic_refinement+1; ++ref_case)
+    {
+      if (dim!=2 && ref_case!=RefinementCase<dim>::isotropic_refinement)
+        // do nothing, as anisotropic
+        // refinement is not
+        // implemented so far
+        continue;
+
+      const unsigned int nc = GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case));
+      for (unsigned int i=0; i<nc; ++i)
+        {
+          this->prolongation[ref_case-1][i].reinit (n_dofs, n_dofs);
+          // Fill prolongation matrices with
+          // embedding operators
+          for (unsigned int j=0; j<n_dofs; ++j)
+            this->prolongation[ref_case-1][i](j,j) = 1.;
+        }
+    }
+
+  // restriction can be defined
+  // through projection for
+  // discontinuous elements, but is
+  // presently not implemented for DGPNonparametric
+  // elements.
+  //
+  // if it were, then the following
+  // snippet would be the right code
+//    if ((degree < Matrices::n_projection_matrices) &&
+//        (Matrices::projection_matrices[degree] != 0))
+//      {
+//        restriction[0].fill (Matrices::projection_matrices[degree]);
+//      }
+//    else
+//                                   // matrix undefined, set size to zero
+//      for (unsigned int i=0;i<GeometryInfo<dim>::max_children_per_cell;++i)
+//        restriction[i].reinit(0, 0);
+  // since not implemented, set to
+  // "empty". however, that is done in the
+  // default constructor already, so do nothing
+//  for (unsigned int i=0;i<GeometryInfo<dim>::max_children_per_cell;++i)
+//    this->restriction[i].reinit(0, 0);
+
+  // note further, that these
+  // elements have neither support
+  // nor face-support points, so
+  // leave these fields empty
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_DGPNonparametric<dim,spacedim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_DGPNonparametric<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_DGPNonparametric<dim,spacedim>::clone() const
+{
+  return new FE_DGPNonparametric<dim,spacedim>(*this);
+}
+
+
+
+template <int dim, int spacedim>
+double
+FE_DGPNonparametric<dim,spacedim>::shape_value (const unsigned int i,
+                                                const Point<dim> &p) const
+{
+  (void)i;
+  (void)p;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  AssertThrow (false, (typename FiniteElement<dim>::ExcUnitShapeValuesDoNotExist()));
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+double
+FE_DGPNonparametric<dim,spacedim>::shape_value_component (const unsigned int i,
+                                                          const Point<dim> &p,
+                                                          const unsigned int component) const
+{
+  (void)i;
+  (void)p;
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  AssertThrow (false, (typename FiniteElement<dim>::ExcUnitShapeValuesDoNotExist()));
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<1,dim>
+FE_DGPNonparametric<dim,spacedim>::shape_grad (const unsigned int i,
+                                               const Point<dim> &p) const
+{
+  (void)i;
+  (void)p;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  AssertThrow (false, (typename FiniteElement<dim>::ExcUnitShapeValuesDoNotExist()));
+  return Tensor<1,dim>();
+}
+
+
+template <int dim, int spacedim>
+Tensor<1,dim>
+FE_DGPNonparametric<dim,spacedim>::shape_grad_component (const unsigned int i,
+                                                         const Point<dim> &p,
+                                                         const unsigned int component) const
+{
+  (void)i;
+  (void)p;
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  AssertThrow (false, (typename FiniteElement<dim>::ExcUnitShapeValuesDoNotExist()));
+  return Tensor<1,dim>();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<2,dim>
+FE_DGPNonparametric<dim,spacedim>::shape_grad_grad (const unsigned int i,
+                                                    const Point<dim> &p) const
+{
+  (void)i;
+  (void)p;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  AssertThrow (false, (typename FiniteElement<dim>::ExcUnitShapeValuesDoNotExist()));
+  return Tensor<2,dim>();
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<2,dim>
+FE_DGPNonparametric<dim,spacedim>::shape_grad_grad_component (const unsigned int i,
+    const Point<dim> &p,
+    const unsigned int component) const
+{
+  (void)i;
+  (void)p;
+  (void)component;
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component == 0, ExcIndexRange (component, 0, 1));
+  AssertThrow (false, (typename FiniteElement<dim>::ExcUnitShapeValuesDoNotExist()));
+  return Tensor<2,dim>();
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_DGPNonparametric<dim,spacedim>::get_dpo_vector (const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, static_cast<unsigned int>(0));
+  dpo[dim] = deg+1;
+  for (unsigned int i=1; i<dim; ++i)
+    {
+      dpo[dim] *= deg+1+i;
+      dpo[dim] /= i+1;
+    }
+  return dpo;
+}
+
+
+
+template <int dim, int spacedim>
+UpdateFlags
+FE_DGPNonparametric<dim,spacedim>::requires_update_flags (const UpdateFlags flags) const
+{
+  UpdateFlags out = flags;
+
+  if (flags & (update_values | update_gradients | update_hessians))
+    out |= update_quadrature_points ;
+
+  return out;
+}
+
+
+
+//---------------------------------------------------------------------------
+// Data field initialization
+//---------------------------------------------------------------------------
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase *
+FE_DGPNonparametric<dim,spacedim>::
+get_data (const UpdateFlags                                                    update_flags,
+          const Mapping<dim,spacedim> &,
+          const Quadrature<dim> &,
+          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+{
+  // generate a new data object
+  typename FiniteElement<dim,spacedim>::InternalDataBase *data
+    = new typename FiniteElement<dim,spacedim>::InternalDataBase;
+  data->update_each = requires_update_flags(update_flags);
+
+  // other than that, there is nothing we can add here as discussed
+  // in the general documentation of this class
+
+  return data;
+}
+
+
+
+//---------------------------------------------------------------------------
+// Fill data of FEValues
+//---------------------------------------------------------------------------
+
+template <int dim, int spacedim>
+void
+FE_DGPNonparametric<dim,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                const CellSimilarity::Similarity                                     ,
+                const Quadrature<dim> &,
+                const Mapping<dim,spacedim> &,
+                const typename Mapping<dim,spacedim>::InternalDataBase &,
+                const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  Assert (fe_internal.update_each & update_quadrature_points, ExcInternalError());
+
+  const unsigned int n_q_points = mapping_data.quadrature_points.size();
+
+  std::vector<double> values(fe_internal.update_each & update_values ? this->dofs_per_cell : 0);
+  std::vector<Tensor<1,dim> > grads(fe_internal.update_each & update_gradients ? this->dofs_per_cell : 0);
+  std::vector<Tensor<2,dim> > grad_grads(fe_internal.update_each & update_hessians ? this->dofs_per_cell : 0);
+  std::vector<Tensor<3,dim> > empty_vector_of_3rd_order_tensors;
+  std::vector<Tensor<4,dim> > empty_vector_of_4th_order_tensors;
+
+  if (fe_internal.update_each & (update_values | update_gradients))
+    for (unsigned int i=0; i<n_q_points; ++i)
+      {
+        polynomial_space.compute(mapping_data.quadrature_points[i],
+                                 values, grads, grad_grads,
+                                 empty_vector_of_3rd_order_tensors,
+                                 empty_vector_of_4th_order_tensors);
+
+        if (fe_internal.update_each & update_values)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_values[k][i] = values[k];
+
+        if (fe_internal.update_each & update_gradients)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_gradients[k][i] = grads[k];
+
+        if (fe_internal.update_each & update_hessians)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_hessians[k][i] = grad_grads[k];
+      }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGPNonparametric<dim,spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                     const unsigned int                                                   ,
+                     const Quadrature<dim-1>                                             &,
+                     const Mapping<dim,spacedim> &,
+                     const typename Mapping<dim,spacedim>::InternalDataBase &,
+                     const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                     const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                     dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  Assert (fe_internal.update_each & update_quadrature_points, ExcInternalError());
+
+  const unsigned int n_q_points = mapping_data.quadrature_points.size();
+
+  std::vector<double> values(fe_internal.update_each & update_values ? this->dofs_per_cell : 0);
+  std::vector<Tensor<1,dim> > grads(fe_internal.update_each & update_gradients ? this->dofs_per_cell : 0);
+  std::vector<Tensor<2,dim> > grad_grads(fe_internal.update_each & update_hessians ? this->dofs_per_cell : 0);
+  std::vector<Tensor<3,dim> > empty_vector_of_3rd_order_tensors;
+  std::vector<Tensor<4,dim> > empty_vector_of_4th_order_tensors;
+
+  if (fe_internal.update_each & (update_values | update_gradients))
+    for (unsigned int i=0; i<n_q_points; ++i)
+      {
+        polynomial_space.compute(mapping_data.quadrature_points[i],
+                                 values, grads, grad_grads,
+                                 empty_vector_of_3rd_order_tensors,
+                                 empty_vector_of_4th_order_tensors);
+
+        if (fe_internal.update_each & update_values)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_values[k][i] = values[k];
+
+        if (fe_internal.update_each & update_gradients)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_gradients[k][i] = grads[k];
+
+        if (fe_internal.update_each & update_hessians)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_hessians[k][i] = grad_grads[k];
+      }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGPNonparametric<dim,spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                        const unsigned int                                                   ,
+                        const unsigned int                                                   ,
+                        const Quadrature<dim-1>                                             &,
+                        const Mapping<dim,spacedim> &,
+                        const typename Mapping<dim,spacedim>::InternalDataBase &,
+                        const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                        const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                        dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  Assert (fe_internal.update_each & update_quadrature_points, ExcInternalError());
+
+  const unsigned int n_q_points = mapping_data.quadrature_points.size();
+
+  std::vector<double> values(fe_internal.update_each & update_values ? this->dofs_per_cell : 0);
+  std::vector<Tensor<1,dim> > grads(fe_internal.update_each & update_gradients ? this->dofs_per_cell : 0);
+  std::vector<Tensor<2,dim> > grad_grads(fe_internal.update_each & update_hessians ? this->dofs_per_cell : 0);
+  std::vector<Tensor<3,dim> > empty_vector_of_3rd_order_tensors;
+  std::vector<Tensor<4,dim> > empty_vector_of_4th_order_tensors;
+
+  if (fe_internal.update_each & (update_values | update_gradients))
+    for (unsigned int i=0; i<n_q_points; ++i)
+      {
+        polynomial_space.compute(mapping_data.quadrature_points[i],
+                                 values, grads, grad_grads,
+                                 empty_vector_of_3rd_order_tensors,
+                                 empty_vector_of_4th_order_tensors);
+
+        if (fe_internal.update_each & update_values)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_values[k][i] = values[k];
+
+        if (fe_internal.update_each & update_gradients)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_gradients[k][i] = grads[k];
+
+        if (fe_internal.update_each & update_hessians)
+          for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+            output_data.shape_hessians[k][i] = grad_grads[k];
+      }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGPNonparametric<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the source
+  // FE is also a DGPNonparametric element. in that case,
+  // both elements have no dofs on their
+  // faces and the face interpolation matrix
+  // is necessarily empty -- i.e. there isn't
+  // much we need to do here.
+  (void)interpolation_matrix;
+  typedef              FiniteElement<dim,spacedim> FEE;
+  AssertThrow ((x_source_fe.get_name().find ("FE_DGPNonparametric<") == 0)
+               ||
+               (dynamic_cast<const FE_DGPNonparametric<dim,spacedim>*>(&x_source_fe) != 0),
+               typename FEE::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                0));
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGPNonparametric<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int ,
+                                  FullMatrix<double>           &interpolation_matrix) const
+{
+  // this is only implemented, if the source
+  // FE is also a DGPNonparametric element. in that case,
+  // both elements have no dofs on their
+  // faces and the face interpolation matrix
+  // is necessarily empty -- i.e. there isn't
+  // much we need to do here.
+  (void)interpolation_matrix;
+  typedef              FiniteElement<dim,spacedim> FEE;
+  AssertThrow ((x_source_fe.get_name().find ("FE_DGPNonparametric<") == 0)
+               ||
+               (dynamic_cast<const FE_DGPNonparametric<dim,spacedim>*>(&x_source_fe) != 0),
+               typename FEE::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                0));
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_DGPNonparametric<dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGPNonparametric<dim,spacedim>::
+hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // there are no such constraints for DGPNonparametric
+  // elements at all
+  if (dynamic_cast<const FE_DGPNonparametric<dim,spacedim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGPNonparametric<dim,spacedim>::
+hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // there are no such constraints for DGPNonparametric
+  // elements at all
+  if (dynamic_cast<const FE_DGPNonparametric<dim,spacedim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGPNonparametric<dim,spacedim>::
+hp_quad_dof_identities (const FiniteElement<dim,spacedim>        &fe_other) const
+{
+  // there are no such constraints for DGPNonparametric
+  // elements at all
+  if (dynamic_cast<const FE_DGPNonparametric<dim,spacedim>*>(&fe_other) != 0)
+    return
+      std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_DGPNonparametric<dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // check whether both are discontinuous
+  // elements, see the description of
+  // FiniteElementDomination::Domination
+  if (dynamic_cast<const FE_DGPNonparametric<dim,spacedim>*>(&fe_other) != 0)
+    return FiniteElementDomination::no_requirements;
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_DGPNonparametric<dim,spacedim>::has_support_on_face (const unsigned int,
+                                                        const unsigned int) const
+{
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FE_DGPNonparametric<dim,spacedim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+FE_DGPNonparametric<dim,spacedim>::get_degree () const
+{
+  return degree;
+}
+
+
+
+// explicit instantiations
+#include "fe_dgp_nonparametric.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_dgp_nonparametric.inst.in b/source/fe/fe_dgp_nonparametric.inst.in
new file mode 100644
index 0000000..a38dc38
--- /dev/null
+++ b/source/fe/fe_dgp_nonparametric.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_DGPNonparametric<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_dgq.cc b/source/fe/fe_dgq.cc
new file mode 100644
index 0000000..7fac222
--- /dev/null
+++ b/source/fe/fe_dgq.cc
@@ -0,0 +1,894 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_tools.h>
+
+
+#include <iostream>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// namespace for some functions that are used in this file. they are
+// specific to numbering conventions used for the FE_DGQ element, and
+// are thus not very interesting to the outside world
+namespace
+{
+  // given an integer N, compute its
+  // integer square root (if it
+  // exists, otherwise give up)
+  inline unsigned int int_sqrt (const unsigned int N)
+  {
+    for (unsigned int i=0; i<=N; ++i)
+      if (i*i == N)
+        return i;
+    Assert (false, ExcInternalError());
+    return numbers::invalid_unsigned_int;
+  }
+
+
+  // given an integer N, compute its
+  // integer cube root (if it
+  // exists, otherwise give up)
+  inline unsigned int int_cuberoot (const unsigned int N)
+  {
+    for (unsigned int i=0; i<=N; ++i)
+      if (i*i*i == N)
+        return i;
+    Assert (false, ExcInternalError());
+    return numbers::invalid_unsigned_int;
+  }
+
+
+  // given N, generate i=0...N-1
+  // equidistant points in the
+  // interior of the interval [0,1]
+  inline Point<1>
+  generate_unit_point (const unsigned int i,
+                       const unsigned int N,
+                       const dealii::internal::int2type<1>  )
+  {
+    Assert (i<N, ExcInternalError());
+    if (N==1)
+      return Point<1> (.5);
+    else
+      {
+        const double h = 1./(N-1);
+        return Point<1>(i*h);
+      }
+  }
+
+
+  // given N, generate i=0...N-1
+  // equidistant points in the domain
+  // [0,1]^2
+  inline Point<2>
+  generate_unit_point (const unsigned int i,
+                       const unsigned int N,
+                       const dealii::internal::int2type<2>  )
+  {
+    Assert (i<N, ExcInternalError());
+
+    if (N==1)
+      return Point<2> (.5, .5);
+    else
+      {
+        Assert (N>=4, ExcInternalError());
+        const unsigned int N1d = int_sqrt(N);
+        const double h = 1./(N1d-1);
+
+        return Point<2> (i%N1d * h,
+                         i/N1d * h);
+      }
+  }
+
+
+
+
+  // given N, generate i=0...N-1
+  // equidistant points in the domain
+  // [0,1]^3
+  inline Point<3>
+  generate_unit_point (const unsigned int i,
+                       const unsigned int N,
+                       const dealii::internal::int2type<3>  )
+  {
+    Assert (i<N, ExcInternalError());
+    if (N==1)
+      return Point<3> (.5, .5, .5);
+    else
+      {
+        Assert (N>=8, ExcInternalError());
+
+        const unsigned int N1d = int_cuberoot(N);
+        const double h = 1./(N1d-1);
+
+        return Point<3> (i%N1d * h,
+                         (i/N1d)%N1d * h,
+                         i/(N1d*N1d) * h);
+      }
+  }
+}
+
+
+
+
+template <int dim, int spacedim>
+FE_DGQ<dim, spacedim>::FE_DGQ (const unsigned int degree)
+  :
+  FE_Poly<TensorProductPolynomials<dim>, dim, spacedim> (
+    TensorProductPolynomials<dim>(Polynomials::LagrangeEquidistant::generate_complete_basis(degree)),
+    FiniteElementData<dim>(get_dpo_vector(degree), 1, degree, FiniteElementData<dim>::L2),
+    std::vector<bool>(FiniteElementData<dim>(get_dpo_vector(degree),1, degree).dofs_per_cell, true),
+    std::vector<ComponentMask>(FiniteElementData<dim>(
+                                 get_dpo_vector(degree),1, degree).dofs_per_cell, std::vector<bool>(1,true)))
+{
+  // fill in support points
+  if (degree == 0)
+    {
+      // constant elements, take
+      // midpoint
+      this->unit_support_points.resize(1);
+      for (unsigned int i=0; i<dim; ++i)
+        this->unit_support_points[0](i) = 0.5;
+    }
+  else
+    {
+      // number of points: (degree+1)^dim
+      unsigned int n = degree+1;
+      for (unsigned int i=1; i<dim; ++i)
+        n *= degree+1;
+
+      this->unit_support_points.resize(n);
+
+      const double step = 1./degree;
+      Point<dim> p;
+
+      unsigned int k=0;
+      for (unsigned int iz=0; iz <= ((dim>2) ? degree : 0) ; ++iz)
+        for (unsigned int iy=0; iy <= ((dim>1) ? degree : 0) ; ++iy)
+          for (unsigned int ix=0; ix<=degree; ++ix)
+            {
+              p(0) = ix * step;
+              if (dim>1)
+                p(1) = iy * step;
+              if (dim>2)
+                p(2) = iz * step;
+
+              this->unit_support_points[k++] = p;
+            };
+    };
+
+  // do not initialize embedding and restriction here. these matrices are
+  // initialized on demand in get_restriction_matrix and
+  // get_prolongation_matrix
+
+  // note: no face support points for DG elements
+}
+
+
+
+template <int dim, int spacedim>
+FE_DGQ<dim, spacedim>::FE_DGQ (const Quadrature<1> &points)
+  :
+  FE_Poly<TensorProductPolynomials<dim>, dim, spacedim> (
+    TensorProductPolynomials<dim>(Polynomials::generate_complete_Lagrange_basis(points.get_points())),
+    FiniteElementData<dim>(get_dpo_vector(points.size()-1), 1, points.size()-1, FiniteElementData<dim>::L2),
+    std::vector<bool>(FiniteElementData<dim>(get_dpo_vector(points.size()-1),1, points.size()-1).dofs_per_cell, true),
+    std::vector<ComponentMask>(FiniteElementData<dim>(
+                                 get_dpo_vector(points.size()-1),1, points.size()-1).dofs_per_cell, std::vector<bool>(1,true)))
+{
+  // Compute support points, which
+  // are the tensor product of the
+  // Lagrange interpolation points in
+  // the constructor.
+  Quadrature<dim> support_quadrature(points);
+  this->unit_support_points = support_quadrature.get_points();
+
+
+  // do not initialize embedding and restriction here. these matrices are
+  // initialized on demand in get_restriction_matrix and
+  // get_prolongation_matrix
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_DGQ<dim, spacedim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_DGQ<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree << ")";
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_DGQ<dim, spacedim>::clone() const
+{
+  return new FE_DGQ<dim, spacedim>(*this);
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_DGQ<dim, spacedim>::get_dpo_vector (const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 0U);
+  dpo[dim] = deg+1;
+  for (unsigned int i=1; i<dim; ++i)
+    dpo[dim] *= deg+1;
+  return dpo;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGQ<dim, spacedim>::rotate_indices (std::vector<unsigned int> &numbers,
+                                       const char                 direction) const
+{
+  const unsigned int n = this->degree+1;
+  unsigned int s = n;
+  for (unsigned int i=1; i<dim; ++i)
+    s *= n;
+  numbers.resize (s);
+
+  unsigned int l = 0;
+
+  if (dim==1)
+    {
+      // Mirror around midpoint
+      for (unsigned int i=n; i>0;)
+        numbers[l++]=--i;
+    }
+  else
+    {
+      switch (direction)
+        {
+        // Rotate xy-plane
+        // counter-clockwise
+        case 'z':
+          for (unsigned int iz=0; iz<((dim>2) ? n:1); ++iz)
+            for (unsigned int j=0; j<n; ++j)
+              for (unsigned int i=0; i<n; ++i)
+                {
+                  unsigned int k = n*i-j+n-1 + n*n*iz;
+                  numbers[l++] = k;
+                }
+          break;
+        // Rotate xy-plane
+        // clockwise
+        case 'Z':
+          for (unsigned int iz=0; iz<((dim>2) ? n:1); ++iz)
+            for (unsigned int iy=0; iy<n; ++iy)
+              for (unsigned int ix=0; ix<n; ++ix)
+                {
+                  unsigned int k = n*ix-iy+n-1 + n*n*iz;
+                  numbers[k] = l++;
+                }
+          break;
+        // Rotate yz-plane
+        // counter-clockwise
+        case 'x':
+          Assert (dim>2, ExcDimensionMismatch (dim,3));
+          for (unsigned int iz=0; iz<n; ++iz)
+            for (unsigned int iy=0; iy<n; ++iy)
+              for (unsigned int ix=0; ix<n; ++ix)
+                {
+                  unsigned int k = n*(n*iy-iz+n-1) + ix;
+                  numbers[l++] = k;
+                }
+          break;
+        // Rotate yz-plane
+        // clockwise
+        case 'X':
+          Assert (dim>2, ExcDimensionMismatch (dim,3));
+          for (unsigned int iz=0; iz<n; ++iz)
+            for (unsigned int iy=0; iy<n; ++iy)
+              for (unsigned int ix=0; ix<n; ++ix)
+                {
+                  unsigned int k = n*(n*iy-iz+n-1) + ix;
+                  numbers[k] = l++;
+                }
+          break;
+        default:
+          Assert (false, ExcNotImplemented ());
+        }
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGQ<dim, spacedim>::
+get_interpolation_matrix (const FiniteElement<dim, spacedim> &x_source_fe,
+                          FullMatrix<double>           &interpolation_matrix) const
+{
+  // this is only implemented, if the
+  // source FE is also a
+  // DGQ element
+  typedef FiniteElement<dim, spacedim> FE;
+  AssertThrow ((dynamic_cast<const FE_DGQ<dim, spacedim>*>(&x_source_fe) != 0),
+               typename FE::ExcInterpolationNotImplemented() );
+
+  // ok, source is a Q element, so
+  // we will be able to do the work
+  const FE_DGQ<dim, spacedim> &source_fe
+    = dynamic_cast<const FE_DGQ<dim, spacedim>&>(x_source_fe);
+
+  Assert (interpolation_matrix.m() == this->dofs_per_cell,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                this->dofs_per_cell));
+  Assert (interpolation_matrix.n() == source_fe.dofs_per_cell,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                source_fe.dofs_per_cell));
+
+
+  // compute the interpolation
+  // matrices in much the same way as
+  // we do for the embedding matrices
+  // from mother to child.
+  FullMatrix<double> cell_interpolation (this->dofs_per_cell,
+                                         this->dofs_per_cell);
+  FullMatrix<double> source_interpolation (this->dofs_per_cell,
+                                           source_fe.dofs_per_cell);
+  FullMatrix<double> tmp (this->dofs_per_cell,
+                          source_fe.dofs_per_cell);
+  for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+    {
+      // generate a point on this
+      // cell and evaluate the
+      // shape functions there
+      const Point<dim> p = generate_unit_point (j, this->dofs_per_cell,
+                                                dealii::internal::int2type<dim>());
+      for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+        cell_interpolation(j,i)
+          = this->poly_space.compute_value (i, p);
+
+      for (unsigned int i=0; i<source_fe.dofs_per_cell; ++i)
+        source_interpolation(j,i)
+          = source_fe.poly_space.compute_value (i, p);
+    }
+
+  // then compute the
+  // interpolation matrix matrix
+  // for this coordinate
+  // direction
+  cell_interpolation.gauss_jordan ();
+  cell_interpolation.mmult (interpolation_matrix,
+                            source_interpolation);
+
+  // cut off very small values
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    for (unsigned int j=0; j<source_fe.dofs_per_cell; ++j)
+      if (std::fabs(interpolation_matrix(i,j)) < 1e-15)
+        interpolation_matrix(i,j) = 0.;
+
+  // make sure that the row sum of
+  // each of the matrices is 1 at
+  // this point. this must be so
+  // since the shape functions sum up
+  // to 1
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    {
+      double sum = 0.;
+      for (unsigned int j=0; j<source_fe.dofs_per_cell; ++j)
+        sum += interpolation_matrix(i,j);
+
+      Assert (std::fabs(sum-1) < 5e-14*std::max(this->degree,1U)*dim,
+              ExcInternalError());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGQ<dim, spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim, spacedim> &x_source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the source
+  // FE is also a DGQ element. in that case,
+  // both elements have no dofs on their
+  // faces and the face interpolation matrix
+  // is necessarily empty -- i.e. there isn't
+  // much we need to do here.
+  (void)interpolation_matrix;
+  typedef FiniteElement<dim,spacedim> FE;
+  AssertThrow ((dynamic_cast<const FE_DGQ<dim, spacedim>*>(&x_source_fe) != 0),
+               typename FE::ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_DGQ<dim, spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim, spacedim> &x_source_fe,
+                                  const unsigned int ,
+                                  FullMatrix<double>           &interpolation_matrix) const
+{
+  // this is only implemented, if the source
+  // FE is also a DGQ element. in that case,
+  // both elements have no dofs on their
+  // faces and the face interpolation matrix
+  // is necessarily empty -- i.e. there isn't
+  // much we need to do here.
+  (void)interpolation_matrix;
+  typedef FiniteElement<dim, spacedim> FE;
+  AssertThrow ((dynamic_cast<const FE_DGQ<dim, spacedim>*>(&x_source_fe) != 0),
+               typename FE::ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FE_DGQ<dim,spacedim>
+::get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Prolongation matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  // initialization upon first request
+  if (this->prolongation[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      // if matrix got updated while waiting for the lock
+      if (this->prolongation[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->prolongation[refinement_case-1][child];
+
+      // now do the work. need to get a non-const version of data in order to
+      // be able to modify them inside a const function
+      FE_DGQ<dim,spacedim> &this_nonconst = const_cast<FE_DGQ<dim,spacedim>& >(*this);
+      if (refinement_case == RefinementCase<dim>::isotropic_refinement)
+        {
+          std::vector<std::vector<FullMatrix<double> > >
+          isotropic_matrices(RefinementCase<dim>::isotropic_refinement);
+          isotropic_matrices.back().
+          resize(GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case)),
+                 FullMatrix<double>(this->dofs_per_cell, this->dofs_per_cell));
+          if (dim == spacedim)
+            FETools::compute_embedding_matrices (*this, isotropic_matrices, true);
+          else
+            FETools::compute_embedding_matrices (FE_DGQ<dim>(this->degree),
+                                                 isotropic_matrices, true);
+          this_nonconst.prolongation[refinement_case-1].swap(isotropic_matrices.back());
+        }
+      else
+        {
+          // must compute both restriction and prolongation matrices because
+          // we only check for their size and the reinit call initializes them
+          // all
+          this_nonconst.reinit_restriction_and_prolongation_matrices();
+          if (dim == spacedim)
+            {
+              FETools::compute_embedding_matrices (*this, this_nonconst.prolongation);
+              FETools::compute_projection_matrices (*this, this_nonconst.restriction);
+            }
+          else
+            {
+              FE_DGQ<dim> tmp(this->degree);
+              FETools::compute_embedding_matrices (tmp, this_nonconst.prolongation);
+              FETools::compute_projection_matrices (tmp, this_nonconst.restriction);
+            }
+        }
+    }
+
+  // finally return the matrix
+  return this->prolongation[refinement_case-1][child];
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FE_DGQ<dim,spacedim>
+::get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Restriction matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  // initialization upon first request
+  if (this->restriction[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      // if matrix got updated while waiting for the lock...
+      if (this->restriction[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->restriction[refinement_case-1][child];
+
+      // now do the work. need to get a non-const version of data in order to
+      // be able to modify them inside a const function
+      FE_DGQ<dim,spacedim> &this_nonconst = const_cast<FE_DGQ<dim,spacedim>& >(*this);
+      if (refinement_case == RefinementCase<dim>::isotropic_refinement)
+        {
+          std::vector<std::vector<FullMatrix<double> > >
+          isotropic_matrices(RefinementCase<dim>::isotropic_refinement);
+          isotropic_matrices.back().
+          resize(GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case)),
+                 FullMatrix<double>(this->dofs_per_cell, this->dofs_per_cell));
+          if (dim == spacedim)
+            FETools::compute_projection_matrices (*this, isotropic_matrices, true);
+          else
+            FETools::compute_projection_matrices (FE_DGQ<dim>(this->degree),
+                                                  isotropic_matrices, true);
+          this_nonconst.restriction[refinement_case-1].swap(isotropic_matrices.back());
+        }
+      else
+        {
+          // must compute both restriction and prolongation matrices because
+          // we only check for their size and the reinit call initializes them
+          // all
+          this_nonconst.reinit_restriction_and_prolongation_matrices();
+          if (dim == spacedim)
+            {
+              FETools::compute_embedding_matrices (*this, this_nonconst.prolongation);
+              FETools::compute_projection_matrices (*this, this_nonconst.restriction);
+            }
+          else
+            {
+              FE_DGQ<dim> tmp(this->degree);
+              FETools::compute_embedding_matrices (tmp, this_nonconst.prolongation);
+              FETools::compute_projection_matrices (tmp, this_nonconst.restriction);
+            }
+        }
+    }
+
+  // finally return the matrix
+  return this->restriction[refinement_case-1][child];
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_DGQ<dim, spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGQ<dim, spacedim>::
+hp_vertex_dof_identities (const FiniteElement<dim, spacedim> &/*fe_other*/) const
+{
+  // this element is discontinuous, so by definition there can
+  // be no identities between its dofs and those of any neighbor
+  // (of whichever type the neighbor may be -- after all, we have
+  // no face dofs on this side to begin with)
+  return
+    std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGQ<dim, spacedim>::
+hp_line_dof_identities (const FiniteElement<dim, spacedim> &/*fe_other*/) const
+{
+  // this element is discontinuous, so by definition there can
+  // be no identities between its dofs and those of any neighbor
+  // (of whichever type the neighbor may be -- after all, we have
+  // no face dofs on this side to begin with)
+  return
+    std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_DGQ<dim, spacedim>::
+hp_quad_dof_identities (const FiniteElement<dim, spacedim> &/*fe_other*/) const
+{
+  // this element is discontinuous, so by definition there can
+  // be no identities between its dofs and those of any neighbor
+  // (of whichever type the neighbor may be -- after all, we have
+  // no face dofs on this side to begin with)
+  return
+    std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_DGQ<dim, spacedim>::compare_for_face_domination (const FiniteElement<dim, spacedim> &/*fe_other*/) const
+{
+  // this is a discontinuous element, so by definition there will
+  // be no constraints wherever this element comes together with
+  // any other kind of element
+  return FiniteElementDomination::no_requirements;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_DGQ<dim, spacedim>::has_support_on_face (const unsigned int shape_index,
+                                            const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  unsigned int n = this->degree+1;
+
+  // for DGQ(0) elements or arbitrary node DGQ with support points not located
+  // at the element boundary, the single shape functions is constant and
+  // therefore lives on the boundary
+  bool support_points_on_boundary = true;
+  for (unsigned int d=0; d<dim; ++d)
+    if (std::abs(this->unit_support_points[0][d]) > 1e-13)
+      support_points_on_boundary = false;
+  for (unsigned int d=0; d<dim; ++d)
+    if (std::abs(this->unit_support_points.back()[d]-1.) > 1e-13)
+      support_points_on_boundary = false;
+  if (support_points_on_boundary == false)
+    return true;
+
+  unsigned int n2 = n*n;
+
+  switch (dim)
+    {
+    case 1:
+    {
+      // in 1d, things are simple. since
+      // there is only one degree of
+      // freedom per vertex in this
+      // class, the first is on vertex 0
+      // (==face 0 in some sense), the
+      // second on face 1:
+      return (((shape_index == 0) && (face_index == 0)) ||
+              ((shape_index == this->degree) && (face_index == 1)));
+    };
+
+    case 2:
+    {
+      if (face_index==0 && (shape_index % n) == 0)
+        return true;
+      if (face_index==1 && (shape_index % n) == this->degree)
+        return true;
+      if (face_index==2 && shape_index < n)
+        return true;
+      if (face_index==3 && shape_index >= this->dofs_per_cell-n)
+        return true;
+      return false;
+    };
+
+    case 3:
+    {
+      const unsigned int in2 = shape_index % n2;
+
+      // x=0
+      if (face_index==0 && (shape_index % n) == 0)
+        return true;
+      // x=1
+      if (face_index==1 && (shape_index % n) == n-1)
+        return true;
+      // y=0
+      if (face_index==2 && in2 < n )
+        return true;
+      // y=1
+      if (face_index==3 && in2 >= n2-n)
+        return true;
+      // z=0
+      if (face_index==4 && shape_index < n2)
+        return true;
+      // z=1
+      if (face_index==5 && shape_index >= this->dofs_per_cell - n2)
+        return true;
+      return false;
+    };
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_DGQ<dim,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  constant_modes.fill(true);
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1, 0));
+}
+
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FE_DGQ<dim, spacedim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+FE_DGQArbitraryNodes<dim,spacedim>::FE_DGQArbitraryNodes (const Quadrature<1> &points)
+  : FE_DGQ<dim,spacedim>(points)
+{}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_DGQArbitraryNodes<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function does not work for
+  // FE_DGQArbitraryNodes since there is no initialization by a degree value.
+  std::ostringstream namebuf;
+  bool equidistant = true;
+  std::vector<double> points(this->degree+1);
+
+  std::vector<unsigned int> lexicographic = this->poly_space.get_numbering_inverse();
+  for (unsigned int j=0; j<=this->degree; j++)
+    points[j] = this->unit_support_points[lexicographic[j]][0];
+
+  // Check whether the support points are equidistant.
+  for (unsigned int j=0; j<=this->degree; j++)
+    if (std::fabs(points[j] - (double)j/this->degree) > 1e-15)
+      {
+        equidistant = false;
+        break;
+      }
+
+  if (equidistant == true)
+    {
+      namebuf << "FE_DGQ<" << Utilities::dim_string(dim,spacedim) << ">(" << this->degree << ")";
+      return namebuf.str();
+    }
+
+  // Check whether the support points come from QGaussLobatto.
+  const QGaussLobatto<1> points_gl(this->degree+1);
+  bool gauss_lobatto = true;
+  for (unsigned int j=0; j<=this->degree; j++)
+    if (points[j] != points_gl.point(j)(0))
+      {
+        gauss_lobatto = false;
+        break;
+      }
+
+  if (gauss_lobatto == true)
+    {
+      namebuf << "FE_DGQArbitraryNodes<" << Utilities::dim_string(dim,spacedim) << ">(QGaussLobatto(" << this->degree+1 << "))";
+      return namebuf.str();
+    }
+
+  // Check whether the support points come from QGauss.
+  const QGauss<1> points_g(this->degree+1);
+  bool gauss = true;
+  for (unsigned int j=0; j<=this->degree; j++)
+    if (points[j] != points_g.point(j)(0))
+      {
+        gauss = false;
+        break;
+      }
+
+  if (gauss == true)
+    {
+      namebuf << "FE_DGQArbitraryNodes<" << Utilities::dim_string(dim,spacedim) << ">(QGauss(" << this->degree+1 << "))";
+      return namebuf.str();
+    }
+
+  // Check whether the support points come from QGauss.
+  const QGaussLog<1> points_glog(this->degree+1);
+  bool gauss_log = true;
+  for (unsigned int j=0; j<=this->degree; j++)
+    if (points[j] != points_glog.point(j)(0))
+      {
+        gauss_log = false;
+        break;
+      }
+
+  if (gauss_log == true)
+    {
+      namebuf << "FE_DGQArbitraryNodes<" << Utilities::dim_string(dim,spacedim) << ">(QGaussLog(" << this->degree+1 << "))";
+      return namebuf.str();
+    }
+
+  // All guesses exhausted
+  namebuf << "FE_DGQArbitraryNodes<" << Utilities::dim_string(dim,spacedim) << ">(QUnknownNodes(" << this->degree+1 << "))";
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_DGQArbitraryNodes<dim,spacedim>::clone() const
+{
+  // Construct a dummy quadrature formula containing the FE's nodes:
+  std::vector<Point<1> > qpoints(this->degree+1);
+  std::vector<unsigned int> lexicographic = this->poly_space.get_numbering_inverse();
+  for (unsigned int i=0; i<=this->degree; ++i)
+    qpoints[i] = Point<1>(this->unit_support_points[lexicographic[i]][0]);
+  Quadrature<1> pquadrature(qpoints);
+
+  return new FE_DGQArbitraryNodes<dim,spacedim>(pquadrature);
+}
+
+
+
+// explicit instantiations
+#include "fe_dgq.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_dgq.inst.in b/source/fe/fe_dgq.inst.in
new file mode 100644
index 0000000..e3c6a0b
--- /dev/null
+++ b/source/fe/fe_dgq.inst.in
@@ -0,0 +1,35 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class FE_DGQ<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_DGQArbitraryNodes<deal_II_dimension>;
+
+#if deal_II_dimension != 3
+    template class FE_DGQArbitraryNodes<deal_II_dimension, deal_II_dimension+1>;
+#endif
+
+  }
+
diff --git a/source/fe/fe_face.cc b/source/fe/fe_face.cc
new file mode 100644
index 0000000..186dcbe
--- /dev/null
+++ b/source/fe/fe_face.cc
@@ -0,0 +1,783 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_face.h>
+#include <deal.II/fe/fe_poly_face.templates.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/lac/householder.h>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  std::vector<Point<1> >
+  get_QGaussLobatto_points (const unsigned int degree)
+  {
+    if (degree > 0)
+      return QGaussLobatto<1>(degree+1).get_points();
+    else
+      return std::vector<Point<1> >(1, Point<1>(0.5));
+  }
+}
+
+template <int dim, int spacedim>
+FE_FaceQ<dim,spacedim>::FE_FaceQ (const unsigned int degree)
+  :
+  FE_PolyFace<TensorProductPolynomials<dim-1>, dim, spacedim> (
+    TensorProductPolynomials<dim-1>(Polynomials::generate_complete_Lagrange_basis(get_QGaussLobatto_points(degree))),
+    FiniteElementData<dim>(get_dpo_vector(degree), 1, degree, FiniteElementData<dim>::L2),
+    std::vector<bool>(1,true))
+{
+  // initialize unit face support points
+  const unsigned int codim = dim-1;
+  this->unit_face_support_points.resize(Utilities::fixed_power<codim>(this->degree+1));
+
+  if (this->degree == 0)
+    for (unsigned int d=0; d<codim; ++d)
+      this->unit_face_support_points[0][d] = 0.5;
+  else
+    {
+      std::vector<Point<1> > points = get_QGaussLobatto_points(degree);
+
+      unsigned int k=0;
+      for (unsigned int iz=0; iz <= ((codim>2) ? this->degree : 0) ; ++iz)
+        for (unsigned int iy=0; iy <= ((codim>1) ? this->degree : 0) ; ++iy)
+          for (unsigned int ix=0; ix<=this->degree; ++ix)
+            {
+              Point<codim> p;
+
+              p(0) = points[ix][0];
+              if (codim>1)
+                p(1) = points[iy][0];
+              if (codim>2)
+                p(2) = points[iz][0];
+
+              this->unit_face_support_points[k++] = p;
+            }
+      AssertDimension (k, this->unit_face_support_points.size());
+    }
+
+  // initialize unit support points (this makes it possible to assign initial
+  // values to FE_FaceQ)
+  this->unit_support_points.resize(GeometryInfo<dim>::faces_per_cell*
+                                   this->unit_face_support_points.size());
+  const unsigned int n_face_dofs = this->unit_face_support_points.size();
+  for (unsigned int i=0; i<n_face_dofs; ++i)
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        for (unsigned int e=0, c=0; e<dim; ++e)
+          if (d!=e)
+            {
+              // faces in y-direction are oriented differently
+              unsigned int renumber = i;
+              if (dim == 3 && d == 1)
+                renumber = i/(degree+1)+(degree+1)*(i%(degree+1));
+              this->unit_support_points[n_face_dofs*2*d+i][e] =
+                this->unit_face_support_points[renumber][c];
+              this->unit_support_points[n_face_dofs*(2*d+1)+i][e] =
+                this->unit_face_support_points[renumber][c];
+              this->unit_support_points[n_face_dofs*(2*d+1)+i][d] = 1;
+              ++c;
+            }
+      }
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_FaceQ<dim,spacedim>::clone() const
+{
+  return new FE_FaceQ<dim,spacedim>(this->degree);
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_FaceQ<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+  std::ostringstream namebuf;
+  namebuf << "FE_FaceQ<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_FaceQ<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  get_subface_interpolation_matrix (source_fe, numbers::invalid_unsigned_int,
+                                    interpolation_matrix);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_FaceQ<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int        subface,
+                                  FullMatrix<double>       &interpolation_matrix) const
+{
+  // this function is similar to the respective method in FE_Q
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // see if source is a FaceQ element
+  if (const FE_FaceQ<dim,spacedim> *source_fe
+      = dynamic_cast<const FE_FaceQ<dim,spacedim> *>(&x_source_fe))
+    {
+
+      // Make sure that the element for which the DoFs should be constrained
+      // is the one with the higher polynomial degree.  Actually the procedure
+      // will work also if this assertion is not satisfied. But the matrices
+      // produced in that case might lead to problems in the hp procedures,
+      // which use this method.
+      Assert (this->dofs_per_face <= source_fe->dofs_per_face,
+              (typename FiniteElement<dim,spacedim>::
+               ExcInterpolationNotImplemented ()));
+
+      // generate a quadrature with the unit face support points.
+      const Quadrature<dim-1> face_quadrature (source_fe->get_unit_face_support_points ());
+
+      // Rule of thumb for FP accuracy, that can be expected for a given
+      // polynomial degree.  This value is used to cut off values close to
+      // zero.
+      const double eps = 2e-13*(this->degree+1)*(dim-1);
+
+      // compute the interpolation matrix by simply taking the value at the
+      // support points.
+      for (unsigned int i=0; i<source_fe->dofs_per_face; ++i)
+        {
+          const Point<dim-1> p =
+            subface == numbers::invalid_unsigned_int
+            ?
+            face_quadrature.point(i)
+            :
+            GeometryInfo<dim-1>::child_to_cell_coordinates (face_quadrature.point(i),
+                                                            subface);
+
+          for (unsigned int j=0; j<this->dofs_per_face; ++j)
+            {
+              double matrix_entry = this->poly_space.compute_value (j, p);
+
+              // Correct the interpolated value. I.e. if it is close to 1 or 0,
+              // make it exactly 1 or 0. Unfortunately, this is required to avoid
+              // problems with higher order elements.
+              if (std::fabs (matrix_entry - 1.0) < eps)
+                matrix_entry = 1.0;
+              if (std::fabs (matrix_entry) < eps)
+                matrix_entry = 0.0;
+
+              interpolation_matrix(i,j) = matrix_entry;
+            }
+        }
+
+      // make sure that the row sum of each of the matrices is 1 at this
+      // point. this must be so since the shape functions sum up to 1
+      for (unsigned int j=0; j<source_fe->dofs_per_face; ++j)
+        {
+          double sum = 0.;
+
+          for (unsigned int i=0; i<this->dofs_per_face; ++i)
+            sum += interpolation_matrix(j,i);
+
+          Assert (std::fabs(sum-1) < eps, ExcInternalError());
+        }
+    }
+  else if (dynamic_cast<const FE_Nothing<dim> *>(&x_source_fe) != 0)
+    {
+      // nothing to do here, the FE_Nothing has no degrees of freedom anyway
+    }
+  else
+    AssertThrow (false,(typename FiniteElement<dim,spacedim>::
+                        ExcInterpolationNotImplemented()));
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_FaceQ<dim,spacedim>::has_support_on_face (
+  const unsigned int shape_index,
+  const unsigned int face_index) const
+{
+  return (face_index == (shape_index/this->dofs_per_face));
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_FaceQ<dim,spacedim>::get_dpo_vector (const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 0U);
+  dpo[dim-1] = deg+1;
+  for (unsigned int i=1; i<dim-1; ++i)
+    dpo[dim-1] *= deg+1;
+  return dpo;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_FaceQ<dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_FaceQ<dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  if (const FE_FaceQ<dim,spacedim> *fe_q_other
+      = dynamic_cast<const FE_FaceQ<dim,spacedim>*>(&fe_other))
+    {
+      if (this->degree < fe_q_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_q_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_FaceQ<dim,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    constant_modes(0,i) = true;
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1, 0));
+}
+
+
+
+// ----------------------------- FE_FaceQ<1,spacedim> ------------------------
+
+template <int spacedim>
+FE_FaceQ<1,spacedim>::FE_FaceQ (const unsigned int degree)
+  :
+  FiniteElement<1,spacedim> (FiniteElementData<1>(get_dpo_vector(degree), 1, degree, FiniteElementData<1>::L2),
+                             std::vector<bool>(1,true),
+                             std::vector<ComponentMask> (1, ComponentMask(1,true)))
+{
+  this->unit_face_support_points.resize(1);
+
+  // initialize unit support points (this makes it possible to assign initial
+  // values to FE_FaceQ)
+  this->unit_support_points.resize(GeometryInfo<1>::faces_per_cell);
+  this->unit_support_points[1] = Point<1>(1.);
+}
+
+
+
+template <int spacedim>
+FiniteElement<1,spacedim> *
+FE_FaceQ<1,spacedim>::clone() const
+{
+  return new FE_FaceQ<1,spacedim>(this->degree);
+}
+
+
+
+template <int spacedim>
+std::string
+FE_FaceQ<1,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+  std::ostringstream namebuf;
+  namebuf << "FE_FaceQ<"
+          << Utilities::dim_string(1,spacedim)
+          << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int spacedim>
+void
+FE_FaceQ<1,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<1,spacedim> &source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  get_subface_interpolation_matrix (source_fe, numbers::invalid_unsigned_int,
+                                    interpolation_matrix);
+}
+
+
+
+template <int spacedim>
+void
+FE_FaceQ<1,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<1,spacedim> &x_source_fe,
+                                  const unsigned int        /*subface*/,
+                                  FullMatrix<double>       &interpolation_matrix) const
+{
+  (void)x_source_fe;
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+  interpolation_matrix(0,0) = 1.;
+}
+
+
+
+template <int spacedim>
+bool
+FE_FaceQ<1,spacedim>::has_support_on_face (
+  const unsigned int shape_index,
+  const unsigned int face_index) const
+{
+  AssertIndexRange(shape_index, 2);
+  return (face_index == shape_index);
+}
+
+
+
+template <int spacedim>
+std::vector<unsigned int>
+FE_FaceQ<1,spacedim>::get_dpo_vector (const unsigned int)
+{
+  std::vector<unsigned int> dpo(2, 0U);
+  dpo[0] = 1;
+  return dpo;
+}
+
+
+
+template <int spacedim>
+bool
+FE_FaceQ<1,spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+template <int spacedim>
+FiniteElementDomination::Domination
+FE_FaceQ<1,spacedim>::
+compare_for_face_domination (const FiniteElement<1,spacedim> &/*fe_other*/) const
+{
+  return FiniteElementDomination::no_requirements;
+}
+
+
+
+template <int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_FaceQ<1,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    constant_modes(0,i) = true;
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1,0));
+}
+
+
+
+template <int spacedim>
+UpdateFlags
+FE_FaceQ<1,spacedim>::requires_update_flags (const UpdateFlags flags) const
+{
+  UpdateFlags out = flags & update_values;
+  if (flags & update_gradients)
+    out |= update_gradients | update_covariant_transformation;
+  if (flags & update_hessians)
+    out |= update_hessians | update_covariant_transformation;
+  if (flags & update_cell_normal_vectors)
+    out |= update_cell_normal_vectors | update_JxW_values;
+
+  return out;
+}
+
+
+template <int spacedim>
+void
+FE_FaceQ<1,spacedim>::
+fill_fe_values(const typename Triangulation<1,spacedim>::cell_iterator &,
+               const CellSimilarity::Similarity                                   ,
+               const Quadrature<1> &,
+               const Mapping<1,spacedim> &,
+               const typename Mapping<1,spacedim>::InternalDataBase &,
+               const dealii::internal::FEValues::MappingRelatedData<1, spacedim> &,
+               const typename FiniteElement<1,spacedim>::InternalDataBase &,
+               dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &) const
+{
+  // Do nothing, since we do not have values in the interior
+}
+
+
+
+template <int spacedim>
+void
+FE_FaceQ<1,spacedim>::
+fill_fe_face_values (const typename Triangulation<1,spacedim>::cell_iterator &,
+                     const unsigned int                                                 face,
+                     const Quadrature<0> &,
+                     const Mapping<1,spacedim> &,
+                     const typename Mapping<1,spacedim>::InternalDataBase &,
+                     const dealii::internal::FEValues::MappingRelatedData<1, spacedim> &,
+                     const typename FiniteElement<1,spacedim>::InternalDataBase        &fe_internal,
+                     dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &output_data) const
+{
+  const unsigned int foffset = face;
+  if (fe_internal.update_each & update_values)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        output_data.shape_values(k,0) = 0.;
+      output_data.shape_values(foffset,0) = 1;
+    }
+}
+
+
+template <int spacedim>
+void
+FE_FaceQ<1,spacedim>::
+fill_fe_subface_values (const typename Triangulation<1,spacedim>::cell_iterator &,
+                        const unsigned int                                                 ,
+                        const unsigned int                                                 ,
+                        const Quadrature<0> &,
+                        const Mapping<1,spacedim> &,
+                        const typename Mapping<1,spacedim>::InternalDataBase &,
+                        const dealii::internal::FEValues::MappingRelatedData<1, spacedim> &,
+                        const typename FiniteElement<1,spacedim>::InternalDataBase &,
+                        dealii::internal::FEValues::FiniteElementRelatedData<1, spacedim> &) const
+{
+  Assert(false, ExcMessage("There are no sub-face values to fill in 1D!"));
+}
+
+
+
+// --------------------------------------- FE_FaceP --------------------------
+
+template <int dim, int spacedim>
+FE_FaceP<dim,spacedim>::FE_FaceP (const unsigned int degree)
+  :
+  FE_PolyFace<PolynomialSpace<dim-1>, dim, spacedim>
+  (PolynomialSpace<dim-1>(Polynomials::Legendre::generate_complete_basis(degree)),
+   FiniteElementData<dim>(get_dpo_vector(degree), 1, degree, FiniteElementData<dim>::L2),
+   std::vector<bool>(1,true))
+{}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_FaceP<dim,spacedim>::clone() const
+{
+  return new FE_FaceP<dim,spacedim>(this->degree);
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_FaceP<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+  std::ostringstream namebuf;
+  namebuf << "FE_FaceP<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_FaceP<dim,spacedim>::has_support_on_face (
+  const unsigned int shape_index,
+  const unsigned int face_index) const
+{
+  return (face_index == (shape_index/this->dofs_per_face));
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_FaceP<dim,spacedim>::get_dpo_vector (const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 0U);
+  dpo[dim-1] = deg+1;
+  for (unsigned int i=1; i<dim-1; ++i)
+    {
+      dpo[dim-1] *= deg+1+i;
+      dpo[dim-1] /= i+1;
+    }
+  return dpo;
+}
+
+
+
+
+template <int dim, int spacedim>
+bool
+FE_FaceP<dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_FaceP<dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  if (const FE_FaceP<dim,spacedim> *fe_q_other
+      = dynamic_cast<const FE_FaceP<dim,spacedim>*>(&fe_other))
+    {
+      if (this->degree < fe_q_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_q_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+
+template <int dim, int spacedim>
+void
+FE_FaceP<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  get_subface_interpolation_matrix (source_fe, numbers::invalid_unsigned_int,
+                                    interpolation_matrix);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_FaceP<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int        subface,
+                                  FullMatrix<double>       &interpolation_matrix) const
+{
+  // this function is similar to the respective method in FE_Q
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // see if source is a FaceP element
+  if (const FE_FaceP<dim,spacedim> *source_fe
+      = dynamic_cast<const FE_FaceP<dim,spacedim> *>(&x_source_fe))
+    {
+      // Make sure that the element for which the DoFs should be constrained
+      // is the one with the higher polynomial degree.  Actually the procedure
+      // will work also if this assertion is not satisfied. But the matrices
+      // produced in that case might lead to problems in the hp procedures,
+      // which use this method.
+      Assert (this->dofs_per_face <= source_fe->dofs_per_face,
+              (typename FiniteElement<dim,spacedim>::
+               ExcInterpolationNotImplemented ()));
+
+      // do this as in FETools by solving a least squares problem where we
+      // force the source FE polynomial to be equal the given FE on all
+      // quadrature points
+      const QGauss<dim-1> face_quadrature (source_fe->degree+1);
+
+      // Rule of thumb for FP accuracy, that can be expected for a given
+      // polynomial degree.  This value is used to cut off values close to
+      // zero.
+      const double eps = 2e-13*(this->degree+1)*(dim-1);
+
+      FullMatrix<double> mass (face_quadrature.size(), source_fe->dofs_per_face);
+
+      for (unsigned int k = 0; k < face_quadrature.size(); ++k)
+        {
+          const Point<dim-1> p =
+            subface == numbers::invalid_unsigned_int ?
+            face_quadrature.point(k) :
+            GeometryInfo<dim-1>::child_to_cell_coordinates (face_quadrature.point(k),
+                                                            subface);
+
+          for (unsigned int j = 0; j < source_fe->dofs_per_face; ++j)
+            mass (k , j) = source_fe->poly_space.compute_value(j, p);
+        }
+
+      Householder<double> H(mass);
+      Vector<double> v_in(face_quadrature.size());
+      Vector<double> v_out(source_fe->dofs_per_face);
+
+
+      // compute the interpolation matrix by evaluating on the fine side and
+      // then solving the least squares problem
+      for (unsigned int i=0; i<this->dofs_per_face; ++i)
+        {
+          for (unsigned int k = 0; k < face_quadrature.size(); ++k)
+            {
+              const Point<dim-1> p = numbers::invalid_unsigned_int ?
+                                     face_quadrature.point(k) :
+                                     GeometryInfo<dim-1>::child_to_cell_coordinates (face_quadrature.point(k),
+                                         subface);
+              v_in(k) = this->poly_space.compute_value(i, p);
+            }
+          const double result = H.least_squares(v_out, v_in);
+          (void)result;
+          Assert(result < 1e-12, FETools::ExcLeastSquaresError (result));
+
+          for (unsigned int j = 0; j < source_fe->dofs_per_face; ++j)
+            {
+              double matrix_entry = v_out(j);
+
+              // Correct the interpolated value. I.e. if it is close to 1 or 0,
+              // make it exactly 1 or 0. Unfortunately, this is required to avoid
+              // problems with higher order elements.
+              if (std::fabs (matrix_entry - 1.0) < eps)
+                matrix_entry = 1.0;
+              if (std::fabs (matrix_entry) < eps)
+                matrix_entry = 0.0;
+
+              interpolation_matrix(j,i) = matrix_entry;
+            }
+        }
+    }
+  else if (dynamic_cast<const FE_Nothing<dim> *>(&x_source_fe) != 0)
+    {
+      // nothing to do here, the FE_Nothing has no degrees of freedom anyway
+    }
+  else
+    AssertThrow (false,(typename FiniteElement<dim,spacedim>::
+                        ExcInterpolationNotImplemented()));
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_FaceP<dim,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    constant_modes(0, face*this->dofs_per_face) = true;
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1, 0));
+}
+
+
+
+template <int spacedim>
+FE_FaceP<1,spacedim>::FE_FaceP (const unsigned int degree)
+  :
+  FE_FaceQ<1,spacedim> (degree)
+{}
+
+
+
+template <int spacedim>
+std::string
+FE_FaceP<1,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+  std::ostringstream namebuf;
+  namebuf << "FE_FaceP<"
+          << Utilities::dim_string(1,spacedim)
+          << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+// explicit instantiations
+#include "fe_face.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_face.inst.in b/source/fe/fe_face.inst.in
new file mode 100644
index 0000000..9f42ca6
--- /dev/null
+++ b/source/fe/fe_face.inst.in
@@ -0,0 +1,28 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+#if deal_II_dimension > 1
+    template class FE_PolyFace<TensorProductPolynomials<deal_II_dimension-1> >;
+    template class FE_PolyFace<PolynomialSpace<deal_II_dimension-1>, deal_II_dimension>;
+    //template class FE_PolyFace<PolynomialsP<deal_II_dimension>, deal_II_dimension>;
+#endif
+    template class FE_FaceQ<deal_II_dimension,deal_II_dimension>;
+    template class FE_FaceP<deal_II_dimension,deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_nedelec.cc b/source/fe/fe_nedelec.cc
new file mode 100644
index 0000000..d9011e1
--- /dev/null
+++ b/source/fe/fe_nedelec.cc
@@ -0,0 +1,5597 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_nedelec.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <sstream>
+#include <iostream>
+
+//TODO: implement the adjust_quad_dof_index_for_face_orientation_table and
+//adjust_line_dof_index_for_line_orientation_table fields, and write tests
+//similar to bits/face_orientation_and_fe_q_*
+
+
+DEAL_II_NAMESPACE_OPEN
+
+//#define DEBUG_NEDELEC
+
+
+template <int dim>
+FE_Nedelec<dim>::FE_Nedelec (const unsigned int p) :
+  FE_PolyTensor<PolynomialsNedelec<dim>, dim>
+  (p,
+   FiniteElementData<dim> (get_dpo_vector (p), dim, p + 1,
+                           FiniteElementData<dim>::Hcurl),
+   std::vector<bool> (PolynomialsNedelec<dim>::compute_n_pols (p), true),
+   std::vector<ComponentMask>
+   (PolynomialsNedelec<dim>::compute_n_pols (p),
+    std::vector<bool> (dim, true)))
+{
+#ifdef DEBUG_NEDELEC
+  deallog << get_name() << std::endl;
+#endif
+
+  Assert (dim >= 2, ExcImpossibleInDim(dim));
+
+  const unsigned int n_dofs = this->dofs_per_cell;
+
+  this->mapping_type = mapping_nedelec;
+  // First, initialize the
+  // generalized support points and
+  // quadrature weights, since they
+  // are required for interpolation.
+  initialize_support_points (p);
+  this->inverse_node_matrix.reinit (n_dofs, n_dofs);
+  this->inverse_node_matrix.fill
+  (FullMatrix<double> (IdentityMatrix (n_dofs)));
+  // From now on, the shape functions
+  // will be the correct ones, not
+  // the raw shape functions anymore.
+
+  // do not initialize embedding and restriction here. these matrices are
+  // initialized on demand in get_restriction_matrix and
+  // get_prolongation_matrix
+
+#ifdef DEBUG_NEDELEC
+  deallog << "Face Embedding" << std::endl;
+#endif
+  FullMatrix<double> face_embeddings[GeometryInfo<dim>::max_children_per_face];
+
+  for (unsigned int i = 0; i < GeometryInfo<dim>::max_children_per_face; ++i)
+    face_embeddings[i].reinit (this->dofs_per_face, this->dofs_per_face);
+
+  FETools::compute_face_embedding_matrices<dim,double>
+  (*this, face_embeddings, 0, 0);
+
+  switch (dim)
+    {
+    case 1:
+    {
+      this->interface_constraints.reinit (0, 0);
+      break;
+    }
+
+    case 2:
+    {
+      this->interface_constraints.reinit (2 * this->dofs_per_face,
+                                          this->dofs_per_face);
+
+      for (unsigned int i = 0; i < GeometryInfo<2>::max_children_per_face;
+           ++i)
+        for (unsigned int j = 0; j < this->dofs_per_face; ++j)
+          for (unsigned int k = 0; k < this->dofs_per_face; ++k)
+            this->interface_constraints (i * this->dofs_per_face + j, k)
+              = face_embeddings[i] (j, k);
+
+      break;
+    }
+
+    case 3:
+    {
+      this->interface_constraints.reinit
+      (4 * (this->dofs_per_face - this->degree), this->dofs_per_face);
+
+      unsigned int target_row = 0;
+
+      for (unsigned int i = 0; i < 2; ++i)
+        for (unsigned int j = this->degree; j < 2 * this->degree;
+             ++j, ++target_row)
+          for (unsigned int k = 0; k < this->dofs_per_face; ++k)
+            this->interface_constraints (target_row, k)
+              = face_embeddings[2 * i] (j, k);
+
+      for (unsigned int i = 0; i < 2; ++i)
+        for (unsigned int j = 3 * this->degree;
+             j < GeometryInfo<3>::lines_per_face * this->degree;
+             ++j, ++target_row)
+          for (unsigned int k = 0; k < this->dofs_per_face; ++k)
+            this->interface_constraints (target_row, k)
+              = face_embeddings[i] (j, k);
+
+      for (unsigned int i = 0; i < 2; ++i)
+        for (unsigned int j = 0; j < 2; ++j)
+          for (unsigned int k = i * this->degree;
+               k < (i + 1) * this->degree; ++k, ++target_row)
+            for (unsigned int l = 0; l < this->dofs_per_face; ++l)
+              this->interface_constraints (target_row, l)
+                = face_embeddings[i + 2 * j] (k, l);
+
+      for (unsigned int i = 0; i < 2; ++i)
+        for (unsigned int j = 0; j < 2; ++j)
+          for (unsigned int k = (i + 2) * this->degree;
+               k < (i + 3) * this->degree; ++k, ++target_row)
+            for (unsigned int l = 0; l < this->dofs_per_face; ++l)
+              this->interface_constraints (target_row, l)
+                = face_embeddings[2 * i + j] (k, l);
+
+      for (unsigned int i = 0; i < GeometryInfo<3>::max_children_per_face;
+           ++i)
+        for (unsigned int
+             j = GeometryInfo<3>::lines_per_face * this->degree;
+             j < this->dofs_per_face; ++j, ++target_row)
+          for (unsigned int k = 0; k < this->dofs_per_face; ++k)
+            this->interface_constraints (target_row, k)
+              = face_embeddings[i] (j, k);
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+
+}
+
+
+
+template <int dim>
+std::string
+FE_Nedelec<dim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_Nedelec<" << dim << ">(" << this->degree-1 << ")";
+
+  return namebuf.str();
+}
+
+
+template <int dim>
+FiniteElement<dim>
+*FE_Nedelec<dim>::clone () const
+{
+  return new FE_Nedelec<dim> (*this);
+}
+
+//---------------------------------------------------------------------------
+// Auxiliary and internal functions
+//---------------------------------------------------------------------------
+
+
+
+// Set the generalized support
+// points and precompute the
+// parts of the projection-based
+// interpolation, which does
+// not depend on the interpolated
+// function.
+template <>
+void
+FE_Nedelec<1>::initialize_support_points (const unsigned int)
+{
+  Assert (false, ExcNotImplemented ());
+}
+
+
+
+template <>
+void
+FE_Nedelec<2>::initialize_support_points (const unsigned int degree)
+{
+  const int dim = 2;
+
+  // Create polynomial basis.
+  const std::vector<Polynomials::Polynomial<double> > &lobatto_polynomials
+    = Polynomials::Lobatto::generate_complete_basis (degree + 1);
+  std::vector<Polynomials::Polynomial<double> >
+  lobatto_polynomials_grad (degree + 1);
+
+  for (unsigned int i = 0; i < lobatto_polynomials_grad.size (); ++i)
+    lobatto_polynomials_grad[i] = lobatto_polynomials[i + 1].derivative ();
+
+  // Initialize quadratures to obtain
+  // quadrature points later on.
+  const QGauss<dim - 1> reference_edge_quadrature (degree + 1);
+  const unsigned int n_edge_points = reference_edge_quadrature.size ();
+  const unsigned int n_boundary_points
+    = GeometryInfo<dim>::lines_per_cell * n_edge_points;
+  const Quadrature<dim> edge_quadrature
+    = QProjector<dim>::project_to_all_faces (reference_edge_quadrature);
+
+  this->generalized_face_support_points.resize (n_edge_points);
+
+  // Create face support points.
+  for (unsigned int q_point = 0; q_point < n_edge_points; ++q_point)
+    this->generalized_face_support_points[q_point]
+      = reference_edge_quadrature.point (q_point);
+
+  if (degree > 0)
+    {
+      // If the polynomial degree is positive
+      // we have support points on the faces
+      // and in the interior of a cell.
+      const QGauss<dim> quadrature (degree + 1);
+      const unsigned int &n_interior_points = quadrature.size ();
+
+      this->generalized_support_points.resize
+      (n_boundary_points + n_interior_points);
+      boundary_weights.reinit (n_edge_points, degree);
+
+      for (unsigned int q_point = 0; q_point < n_edge_points;
+           ++q_point)
+        {
+          for (unsigned int line = 0;
+               line < GeometryInfo<dim>::lines_per_cell; ++line)
+            this->generalized_support_points[line * n_edge_points
+                                             + q_point]
+              = edge_quadrature.point
+                (QProjector<dim>::DataSetDescriptor::face
+                 (line, true, false, false, n_edge_points) + q_point);
+
+          for (unsigned int i = 0; i < degree; ++i)
+            boundary_weights (q_point, i)
+              = reference_edge_quadrature.weight (q_point)
+                * lobatto_polynomials_grad[i + 1].value
+                (this->generalized_face_support_points[q_point] (0));
+        }
+
+      for (unsigned int q_point = 0; q_point < n_interior_points;
+           ++q_point)
+        this->generalized_support_points[q_point + n_boundary_points]
+          = quadrature.point (q_point);
+    }
+
+  else
+    {
+      // In this case we only need support points
+      // on the faces of a cell.
+      this->generalized_support_points.resize (n_boundary_points);
+
+      for (unsigned int line = 0;
+           line < GeometryInfo<dim>::lines_per_cell; ++line)
+        for (unsigned int q_point = 0; q_point < n_edge_points;
+             ++q_point)
+          this->generalized_support_points[line * n_edge_points
+                                           + q_point]
+            = edge_quadrature.point
+              (QProjector<dim>::DataSetDescriptor::face
+               (line, true, false, false, n_edge_points) + q_point);
+    }
+}
+
+
+
+template <>
+void
+FE_Nedelec<3>::initialize_support_points (const unsigned int degree)
+{
+  const int dim = 3;
+
+  // Create polynomial basis.
+  const std::vector<Polynomials::Polynomial<double> > &lobatto_polynomials
+    = Polynomials::Lobatto::generate_complete_basis (degree + 1);
+  std::vector<Polynomials::Polynomial<double> >
+  lobatto_polynomials_grad (degree + 1);
+
+  for (unsigned int i = 0; i < lobatto_polynomials_grad.size (); ++i)
+    lobatto_polynomials_grad[i] = lobatto_polynomials[i + 1].derivative ();
+
+  // Initialize quadratures to obtain
+  // quadrature points later on.
+  const QGauss<1> reference_edge_quadrature (degree + 1);
+  const unsigned int &n_edge_points = reference_edge_quadrature.size ();
+  const Quadrature<dim - 1>& edge_quadrature
+    = QProjector<dim - 1>::project_to_all_faces
+      (reference_edge_quadrature);
+
+  if (degree > 0)
+    {
+      // If the polynomial degree is positive
+      // we have support points on the edges,
+      // faces and in the interior of a cell.
+      const QGauss<dim - 1> reference_face_quadrature (degree + 1);
+      const unsigned int &n_face_points
+        = reference_face_quadrature.size ();
+      const unsigned int n_boundary_points
+        = GeometryInfo<dim>::lines_per_cell * n_edge_points
+          + GeometryInfo<dim>::faces_per_cell * n_face_points;
+      const QGauss<dim> quadrature (degree + 1);
+      const unsigned int &n_interior_points = quadrature.size ();
+
+      boundary_weights.reinit (n_edge_points + n_face_points,
+                               2 * (degree + 1) * degree);
+      this->generalized_face_support_points.resize
+      (4 * n_edge_points + n_face_points);
+      this->generalized_support_points.resize
+      (n_boundary_points + n_interior_points);
+
+      // Create support points on edges.
+      for (unsigned int q_point = 0; q_point < n_edge_points; ++q_point)
+        {
+          for (unsigned int line = 0;
+               line < GeometryInfo<dim - 1>::lines_per_cell; ++line)
+            this->generalized_face_support_points[line * n_edge_points
+                                                  + q_point]
+              = edge_quadrature.point
+                (QProjector<dim - 1>::DataSetDescriptor::face
+                 (line, true, false, false, n_edge_points) + q_point);
+
+          for (unsigned int i = 0; i < 2; ++i)
+            for (unsigned int j = 0; j < 2; ++j)
+              {
+                this->generalized_support_points
+                [q_point + (i + 4 * j) * n_edge_points]
+                  = Point<dim>
+                    (i, reference_edge_quadrature.point (q_point) (0),
+                     j);
+                this->generalized_support_points
+                [q_point + (i + 4 * j + 2) * n_edge_points]
+                  = Point<dim>
+                    (reference_edge_quadrature.point (q_point) (0),
+                     i, j);
+                this->generalized_support_points
+                [q_point + (i + 2 * (j + 4)) * n_edge_points]
+                  = Point<dim>
+                    (i, j,
+                     reference_edge_quadrature.point (q_point) (0));
+              }
+
+          for (unsigned int i = 0; i < degree; ++i)
+            boundary_weights (q_point, i)
+              = reference_edge_quadrature.weight (q_point)
+                * lobatto_polynomials_grad[i + 1].value
+                (this->generalized_face_support_points[q_point] (1));
+        }
+
+      // Create support points on faces.
+      for (unsigned int q_point = 0; q_point < n_face_points;
+           ++q_point)
+        {
+          this->generalized_face_support_points[q_point
+                                                + 4 * n_edge_points]
+            = reference_face_quadrature.point (q_point);
+
+          for (unsigned int i = 0; i <= degree; ++i)
+            for (unsigned int j = 0; j < degree; ++j)
+              {
+                boundary_weights (q_point + n_edge_points,
+                                  2 * (i * degree + j))
+                  = reference_face_quadrature.weight (q_point)
+                    * lobatto_polynomials_grad[i].value
+                    (this->generalized_face_support_points
+                     [q_point + 4 * n_edge_points] (0))
+                    * lobatto_polynomials[j + 2].value
+                    (this->generalized_face_support_points
+                     [q_point + 4 * n_edge_points] (1));
+                boundary_weights (q_point + n_edge_points,
+                                  2 * (i * degree + j) + 1)
+                  = reference_face_quadrature.weight (q_point)
+                    * lobatto_polynomials_grad[i].value
+                    (this->generalized_face_support_points
+                     [q_point + 4 * n_edge_points] (1))
+                    * lobatto_polynomials[j + 2].value
+                    (this->generalized_face_support_points
+                     [q_point + 4 * n_edge_points] (0));
+              }
+        }
+
+      const Quadrature<dim> &face_quadrature
+        = QProjector<dim>::project_to_all_faces
+          (reference_face_quadrature);
+
+      for (unsigned int face = 0;
+           face < GeometryInfo<dim>::faces_per_cell; ++face)
+        for (unsigned int q_point = 0; q_point < n_face_points;
+             ++q_point)
+          {
+            this->generalized_support_points
+            [face * n_face_points + q_point
+             + GeometryInfo<dim>::lines_per_cell * n_edge_points]
+              = face_quadrature.point
+                (QProjector<dim>::DataSetDescriptor::face
+                 (face, true, false, false, n_face_points) + q_point);
+          }
+
+      // Create support points in the interior.
+      for (unsigned int q_point = 0; q_point < n_interior_points;
+           ++q_point)
+        this->generalized_support_points[q_point + n_boundary_points]
+          = quadrature.point (q_point);
+    }
+
+  else
+    {
+      this->generalized_face_support_points.resize (4 * n_edge_points);
+      this->generalized_support_points.resize
+      (GeometryInfo<dim>::lines_per_cell * n_edge_points);
+
+      for (unsigned int q_point = 0; q_point < n_edge_points;
+           ++q_point)
+        {
+          for (unsigned int line = 0;
+               line < GeometryInfo<dim - 1>::lines_per_cell; ++line)
+            this->generalized_face_support_points[line * n_edge_points
+                                                  + q_point]
+              = edge_quadrature.point
+                (QProjector<dim - 1>::DataSetDescriptor::face
+                 (line, true, false, false, n_edge_points) + q_point);
+
+          for (unsigned int i = 0; i < 2; ++i)
+            for (unsigned int j = 0; j < 2; ++j)
+              {
+                this->generalized_support_points
+                [q_point + (i + 4 * j) * n_edge_points]
+                  = Point<dim>
+                    (i, reference_edge_quadrature.point (q_point) (0),
+                     j);
+                this->generalized_support_points
+                [q_point + (i + 4 * j + 2) * n_edge_points]
+                  = Point<dim>
+                    (reference_edge_quadrature.point (q_point) (0),
+                     i, j);
+                this->generalized_support_points
+                [q_point + (i + 2 * (j + 4)) * n_edge_points]
+                  = Point<dim>
+                    (i, j,
+                     reference_edge_quadrature.point (q_point) (0));
+              }
+        }
+    }
+}
+
+
+
+// Set the restriction matrices.
+template <>
+void
+FE_Nedelec<1>::initialize_restriction ()
+{
+  // there is only one refinement case in 1d,
+  // which is the isotropic one
+  for (unsigned int i = 0; i < GeometryInfo<1>::max_children_per_cell; ++i)
+    this->restriction[0][i].reinit(0, 0);
+}
+
+
+
+// Restriction operator
+template <int dim>
+void
+FE_Nedelec<dim>::initialize_restriction ()
+{
+  // This function does the same as the
+  // function interpolate further below.
+  // But since the functions, which we
+  // interpolate here, are discontinuous
+  // we have to use more quadrature
+  // points as in interpolate.
+  const QGauss<1> edge_quadrature (2 * this->degree);
+  const std::vector<Point<1> > &edge_quadrature_points
+    = edge_quadrature.get_points ();
+  const unsigned int &
+  n_edge_quadrature_points = edge_quadrature.size ();
+  const unsigned int
+  index = RefinementCase<dim>::isotropic_refinement - 1;
+
+  switch (dim)
+    {
+    case 2:
+    {
+      // First interpolate the shape
+      // functions of the child cells
+      // to the lowest order shape
+      // functions of the parent cell.
+      for (unsigned int dof = 0; dof < this->dofs_per_cell; ++dof)
+        for (unsigned int q_point = 0; q_point < n_edge_quadrature_points;
+             ++q_point)
+          {
+            const double weight = 2.0 * edge_quadrature.weight (q_point);
+
+            if (edge_quadrature_points[q_point] (0) < 0.5)
+              {
+                Point<dim> quadrature_point (0.0,
+                                             2.0 * edge_quadrature_points[q_point] (0));
+
+                this->restriction[index][0] (0, dof) += weight
+                                                        * this->shape_value_component
+                                                        (dof,
+                                                         quadrature_point,
+                                                         1);
+                quadrature_point (0) = 1.0;
+                this->restriction[index][1] (this->degree, dof)
+                += weight * this->shape_value_component (dof,
+                                                         quadrature_point,
+                                                         1);
+                quadrature_point (0) = quadrature_point (1);
+                quadrature_point (1) = 0.0;
+                this->restriction[index][0] (2 * this->degree, dof)
+                += weight * this->shape_value_component (dof,
+                                                         quadrature_point,
+                                                         0);
+                quadrature_point (1) = 1.0;
+                this->restriction[index][2] (3 * this->degree, dof)
+                += weight * this->shape_value_component (dof,
+                                                         quadrature_point,
+                                                         0);
+              }
+
+            else
+              {
+                Point<dim> quadrature_point (0.0,
+                                             2.0 * edge_quadrature_points[q_point] (0)
+                                             - 1.0);
+
+                this->restriction[index][2] (0, dof) += weight
+                                                        * this->shape_value_component
+                                                        (dof,
+                                                         quadrature_point,
+                                                         1);
+                quadrature_point (0) = 1.0;
+                this->restriction[index][3] (this->degree, dof)
+                += weight * this->shape_value_component (dof,
+                                                         quadrature_point,
+                                                         1);
+                quadrature_point (0) = quadrature_point (1);
+                quadrature_point (1) = 0.0;
+                this->restriction[index][1] (2 * this->degree, dof)
+                += weight * this->shape_value_component (dof,
+                                                         quadrature_point,
+                                                         0);
+                quadrature_point (1) = 1.0;
+                this->restriction[index][3] (3 * this->degree, dof)
+                += weight * this->shape_value_component (dof,
+                                                         quadrature_point,
+                                                         0);
+              }
+          }
+
+      // Then project the shape functions
+      // of the child cells to the higher
+      // order shape functions of the
+      // parent cell.
+      if (this->degree > 1)
+        {
+          const unsigned int deg = this->degree-1;
+          const std::vector<Polynomials::Polynomial<double> > &
+          legendre_polynomials
+            = Polynomials::Legendre::generate_complete_basis (deg);
+          FullMatrix<double> system_matrix_inv (deg, deg);
+
+          {
+            FullMatrix<double> assembling_matrix (deg,
+                                                  n_edge_quadrature_points);
+
+            for (unsigned int q_point = 0;
+                 q_point < n_edge_quadrature_points; ++q_point)
+              {
+                const double weight
+                  = std::sqrt (edge_quadrature.weight (q_point));
+
+                for (unsigned int i = 0; i < deg; ++i)
+                  assembling_matrix (i, q_point) = weight
+                                                   * legendre_polynomials[i + 1].value
+                                                   (edge_quadrature_points[q_point] (0));
+              }
+
+            FullMatrix<double> system_matrix (deg, deg);
+
+            assembling_matrix.mTmult (system_matrix, assembling_matrix);
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          FullMatrix<double> solution (this->degree-1, 4);
+          FullMatrix<double> system_rhs (this->degree-1, 4);
+          Vector<double> tmp (4);
+
+          for (unsigned int dof = 0; dof < this->dofs_per_cell; ++dof)
+            for (unsigned int i = 0; i < 2; ++i)
+              {
+                system_rhs = 0.0;
+
+                for (unsigned int q_point = 0;
+                     q_point < n_edge_quadrature_points; ++q_point)
+                  {
+                    const double weight
+                      = edge_quadrature.weight (q_point);
+                    const Point<dim> quadrature_point_0 (i,
+                                                         edge_quadrature_points[q_point] (0));
+                    const Point<dim> quadrature_point_1
+                    (edge_quadrature_points[q_point] (0),
+                     i);
+
+                    if (edge_quadrature_points[q_point] (0) < 0.5)
+                      {
+                        Point<dim> quadrature_point_2 (i,
+                                                       2.0 * edge_quadrature_points[q_point] (0));
+
+                        tmp (0) = weight
+                                  * (2.0 * this->shape_value_component
+                                     (dof, quadrature_point_2, 1)
+                                     - this->restriction[index][i]
+                                     (i * this->degree, dof)
+                                     * this->shape_value_component
+                                     (i * this->degree,
+                                      quadrature_point_0, 1));
+                        tmp (1) = -1.0 * weight
+                                  * this->restriction[index][i + 2]
+                                  (i * this->degree, dof)
+                                  * this->shape_value_component
+                                  (i * this->degree,
+                                   quadrature_point_0, 1);
+                        quadrature_point_2
+                          = Point<dim> (2.0 * edge_quadrature_points[q_point] (0),
+                                        i);
+                        tmp (2) = weight
+                                  * (2.0 * this->shape_value_component
+                                     (dof, quadrature_point_2, 0)
+                                     - this->restriction[index][2 * i]
+                                     ((i + 2) * this->degree, dof)
+                                     * this->shape_value_component
+                                     ((i + 2) * this->degree,
+                                      quadrature_point_1, 0));
+                        tmp (3) = -1.0 * weight
+                                  * this->restriction[index][2 * i + 1]
+                                  ((i + 2) * this->degree, dof)
+                                  * this->shape_value_component
+                                  ((i + 2) * this->degree,
+                                   quadrature_point_1, 0);
+                      }
+
+                    else
+                      {
+                        tmp (0) = -1.0 * weight
+                                  * this->restriction[index][i]
+                                  (i * this->degree, dof)
+                                  * this->shape_value_component
+                                  (i * this->degree,
+                                   quadrature_point_0, 1);
+
+                        Point<dim> quadrature_point_2 (i,
+                                                       2.0 * edge_quadrature_points[q_point] (0)
+                                                       - 1.0);
+
+                        tmp (1) = weight
+                                  * (2.0 * this->shape_value_component
+                                     (dof, quadrature_point_2, 1)
+                                     - this->restriction[index][i + 2]
+                                     (i * this->degree, dof)
+                                     * this->shape_value_component
+                                     (i * this->degree,
+                                      quadrature_point_0, 1));
+                        tmp (2) = -1.0 * weight
+                                  * this->restriction[index][2 * i]
+                                  ((i + 2) * this->degree, dof)
+                                  * this->shape_value_component
+                                  ((i + 2) * this->degree,
+                                   quadrature_point_1, 0);
+                        quadrature_point_2
+                          = Point<dim> (2.0 * edge_quadrature_points[q_point] (0)
+                                        - 1.0, i);
+                        tmp (3) = weight
+                                  * (2.0 * this->shape_value_component
+                                     (dof, quadrature_point_2, 0)
+                                     - this->restriction[index][2 * i + 1]
+                                     ((i + 2) * this->degree, dof)
+                                     * this->shape_value_component
+                                     ((i + 2) * this->degree,
+                                      quadrature_point_1, 0));
+                      }
+
+                    for (unsigned int j = 0; j < this->degree-1; ++j)
+                      {
+                        const double L_j
+                          = legendre_polynomials[j + 1].value
+                            (edge_quadrature_points[q_point] (0));
+
+                        for (unsigned int k = 0; k < tmp.size (); ++k)
+                          system_rhs (j, k) += tmp (k) * L_j;
+                      }
+                  }
+
+                system_matrix_inv.mmult (solution, system_rhs);
+
+                for (unsigned int j = 0; j < this->degree-1; ++j)
+                  for (unsigned int k = 0; k < 2; ++k)
+                    {
+                      if (std::abs (solution (j, k)) > 1e-14)
+                        this->restriction[index][i + 2 * k]
+                        (i * this->degree + j + 1, dof)
+                          = solution (j, k);
+
+                      if (std::abs (solution (j, k + 2)) > 1e-14)
+                        this->restriction[index][2 * i + k]
+                        ((i + 2) * this->degree + j + 1, dof)
+                          = solution (j, k + 2);
+                    }
+              }
+
+          const QGauss<dim> quadrature (2 * this->degree);
+          const std::vector<Point<dim> > &
+          quadrature_points = quadrature.get_points ();
+          const std::vector<Polynomials::Polynomial<double> > &
+          lobatto_polynomials
+            = Polynomials::Lobatto::generate_complete_basis
+              (this->degree);
+          const unsigned int n_boundary_dofs
+            = GeometryInfo<dim>::faces_per_cell * this->degree;
+          const unsigned int &n_quadrature_points = quadrature.size ();
+
+          {
+            FullMatrix<double> assembling_matrix ((this->degree-1) * this->degree,
+                                                  n_quadrature_points);
+
+            for (unsigned int q_point = 0; q_point < n_quadrature_points;
+                 ++q_point)
+              {
+                const double weight
+                  = std::sqrt (quadrature.weight (q_point));
+
+                for (unsigned int i = 0; i < this->degree; ++i)
+                  {
+                    const double L_i = weight
+                                       * legendre_polynomials[i].value
+                                       (quadrature_points[q_point] (0));
+
+                    for (unsigned int j = 0; j < this->degree-1; ++j)
+                      assembling_matrix (i * (this->degree-1) + j, q_point)
+                        = L_i * lobatto_polynomials[j + 2].value
+                          (quadrature_points[q_point] (1));
+                  }
+              }
+
+            FullMatrix<double> system_matrix (assembling_matrix.m (),
+                                              assembling_matrix.m ());
+
+            assembling_matrix.mTmult (system_matrix, assembling_matrix);
+            system_matrix_inv.reinit (system_matrix.m (), system_matrix.m ());
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          solution.reinit (system_matrix_inv.m (), 8);
+          system_rhs.reinit (system_matrix_inv.m (), 8);
+          tmp.reinit (8);
+
+          for (unsigned int dof = 0; dof < this->dofs_per_cell; ++dof)
+            {
+              system_rhs = 0.0;
+
+              for (unsigned int q_point = 0;
+                   q_point < n_quadrature_points; ++q_point)
+                {
+                  tmp = 0.0;
+
+                  if (quadrature_points[q_point] (0) < 0.5)
+                    {
+                      if (quadrature_points[q_point] (1) < 0.5)
+                        {
+                          const Point<dim> quadrature_point
+                          (2.0 * quadrature_points[q_point] (0),
+                           2.0 * quadrature_points[q_point] (1));
+
+                          tmp (0) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 0);
+                          tmp (1) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 1);
+                        }
+
+                      else
+                        {
+                          const Point<dim> quadrature_point
+                          (2.0 * quadrature_points[q_point] (0),
+                           2.0 * quadrature_points[q_point] (1)
+                           - 1.0);
+
+                          tmp (4) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 0);
+                          tmp (5) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 1);
+                        }
+                    }
+
+                  else if (quadrature_points[q_point] (1) < 0.5)
+                    {
+                      const Point<dim> quadrature_point
+                      (2.0 * quadrature_points[q_point] (0)
+                       - 1.0,
+                       2.0 * quadrature_points[q_point] (1));
+
+                      tmp (2) += 2.0 * this->shape_value_component
+                                 (dof, quadrature_point, 0);
+                      tmp (3) += 2.0 * this->shape_value_component
+                                 (dof, quadrature_point, 1);
+                    }
+
+                  else
+                    {
+                      const Point<dim> quadrature_point
+                      (2.0 * quadrature_points[q_point] (0)
+                       - 1.0,
+                       2.0 * quadrature_points[q_point] (1)
+                       - 1.0);
+
+                      tmp (6) += 2.0 * this->shape_value_component
+                                 (dof, quadrature_point, 0);
+                      tmp (7) += 2.0 * this->shape_value_component
+                                 (dof, quadrature_point, 1);
+                    }
+
+                  for (unsigned int i = 0; i < 2; ++i)
+                    for (unsigned int j = 0; j < this->degree; ++j)
+                      {
+                        tmp (2 * i) -= this->restriction[index][i]
+                                       (j + 2 * this->degree, dof)
+                                       * this->shape_value_component
+                                       (j + 2 * this->degree,
+                                        quadrature_points[q_point], 0);
+                        tmp (2 * i + 1) -= this->restriction[index][i]
+                                           (i * this->degree + j, dof)
+                                           * this->shape_value_component
+                                           (i * this->degree + j,
+                                            quadrature_points[q_point], 1);
+                        tmp (2 * (i + 2)) -= this->restriction[index][i + 2]
+                                             (j + 3 * this->degree, dof)
+                                             * this->shape_value_component
+                                             (j + 3 * this->degree,
+                                              quadrature_points[q_point],
+                                              0);
+                        tmp (2 * i + 5) -= this->restriction[index][i + 2]
+                                           (i * this->degree + j, dof)
+                                           * this->shape_value_component
+                                           (i * this->degree + j,
+                                            quadrature_points[q_point], 1);
+                      }
+
+                  tmp *= quadrature.weight (q_point);
+
+                  for (unsigned int i = 0; i < this->degree; ++i)
+                    {
+                      const double L_i_0
+                        = legendre_polynomials[i].value
+                          (quadrature_points[q_point] (0));
+                      const double L_i_1
+                        = legendre_polynomials[i].value
+                          (quadrature_points[q_point] (1));
+
+                      for (unsigned int j = 0; j < this->degree-1; ++j)
+                        {
+                          const double l_j_0
+                            = L_i_0 * lobatto_polynomials[j + 2].value
+                              (quadrature_points[q_point] (1));
+                          const double l_j_1
+                            = L_i_1 * lobatto_polynomials[j + 2].value
+                              (quadrature_points[q_point] (0));
+
+                          for (unsigned int k = 0; k < 4; ++k)
+                            {
+                              system_rhs (i * (this->degree-1) + j, 2 * k)
+                              += tmp (2 * k) * l_j_0;
+                              system_rhs (i * (this->degree-1) + j, 2 * k + 1)
+                              += tmp (2 * k + 1) * l_j_1;
+                            }
+                        }
+                    }
+                }
+
+              system_matrix_inv.mmult (solution, system_rhs);
+
+              for (unsigned int i = 0; i < this->degree; ++i)
+                for (unsigned int j = 0; j < this->degree-1; ++j)
+                  for (unsigned int k = 0; k < 4; ++k)
+                    {
+                      if (std::abs (solution (i * (this->degree-1) + j, 2 * k))
+                          > 1e-14)
+                        this->restriction[index][k]
+                        (i * (this->degree-1) + j + n_boundary_dofs, dof)
+                          = solution (i * (this->degree-1) + j, 2 * k);
+
+                      if (std::abs (solution (i * (this->degree-1) + j, 2 * k + 1))
+                          > 1e-14)
+                        this->restriction[index][k]
+                        (i + (this->degree-1 + j) * this->degree + n_boundary_dofs,
+                         dof)
+                          = solution (i * (this->degree-1) + j, 2 * k + 1);
+                    }
+            }
+        }
+
+      break;
+    }
+
+    case 3:
+    {
+      // First interpolate the shape
+      // functions of the child cells
+      // to the lowest order shape
+      // functions of the parent cell.
+      for (unsigned int dof = 0; dof < this->dofs_per_cell; ++dof)
+        for (unsigned int q_point = 0; q_point < n_edge_quadrature_points;
+             ++q_point)
+          {
+            const double weight = 2.0 * edge_quadrature.weight (q_point);
+
+            if (edge_quadrature_points[q_point] (0) < 0.5)
+              for (unsigned int i = 0; i < 2; ++i)
+                for (unsigned int j = 0; j < 2; ++j)
+                  {
+                    Point<dim> quadrature_point (i,
+                                                 2.0 * edge_quadrature_points[q_point] (0),
+                                                 j);
+
+                    this->restriction[index][i + 4 * j]
+                    ((i + 4 * j) * this->degree, dof)
+                    += weight * this->shape_value_component (dof,
+                                                             quadrature_point,
+                                                             1);
+                    quadrature_point
+                      = Point<dim> (2.0 * edge_quadrature_points[q_point] (0),
+                                    i, j);
+                    this->restriction[index][2 * (i + 2 * j)]
+                    ((i + 4 * j + 2) * this->degree, dof)
+                    += weight * this->shape_value_component (dof,
+                                                             quadrature_point,
+                                                             0);
+                    quadrature_point = Point<dim> (i, j,
+                                                   2.0 * edge_quadrature_points[q_point] (0));
+                    this->restriction[index][i + 2 * j]
+                    ((i + 2 * (j + 4)) * this->degree, dof)
+                    += weight * this->shape_value_component (dof,
+                                                             quadrature_point,
+                                                             2);
+                  }
+
+            else
+              for (unsigned int i = 0; i < 2; ++i)
+                for (unsigned int j = 0; j < 2; ++j)
+                  {
+                    Point<dim> quadrature_point (i,
+                                                 2.0 * edge_quadrature_points[q_point] (0)
+                                                 - 1.0, j);
+
+                    this->restriction[index][i + 4 * j + 2]
+                    ((i + 4 * j) * this->degree, dof)
+                    += weight * this->shape_value_component (dof,
+                                                             quadrature_point,
+                                                             1);
+                    quadrature_point
+                      = Point<dim> (2.0 * edge_quadrature_points[q_point] (0)
+                                    - 1.0, i, j);
+                    this->restriction[index][2 * (i + 2 * j) + 1]
+                    ((i + 4 * j + 2) * this->degree, dof)
+                    += weight * this->shape_value_component (dof,
+                                                             quadrature_point,
+                                                             0);
+                    quadrature_point = Point<dim> (i, j,
+                                                   2.0 * edge_quadrature_points[q_point] (0)
+                                                   - 1.0);
+                    this->restriction[index][i + 2 * (j + 2)]
+                    ((i + 2 * (j + 4)) * this->degree, dof)
+                    += weight * this->shape_value_component (dof,
+                                                             quadrature_point,
+                                                             2);
+                  }
+          }
+
+      // Then project the shape functions
+      // of the child cells to the higher
+      // order shape functions of the
+      // parent cell.
+      if (this->degree > 1)
+        {
+          const unsigned int deg = this->degree-1;
+          const std::vector<Polynomials::Polynomial<double> > &
+          legendre_polynomials
+            = Polynomials::Legendre::generate_complete_basis (deg);
+          FullMatrix<double> system_matrix_inv (deg, deg);
+
+          {
+            FullMatrix<double> assembling_matrix (deg,
+                                                  n_edge_quadrature_points);
+
+            for (unsigned int q_point = 0;
+                 q_point < n_edge_quadrature_points; ++q_point)
+              {
+                const double weight = std::sqrt (edge_quadrature.weight
+                                                 (q_point));
+
+                for (unsigned int i = 0; i < deg; ++i)
+                  assembling_matrix (i, q_point) = weight
+                                                   * legendre_polynomials[i + 1].value
+                                                   (edge_quadrature_points[q_point] (0));
+              }
+
+            FullMatrix<double> system_matrix (deg, deg);
+
+            assembling_matrix.mTmult (system_matrix, assembling_matrix);
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          FullMatrix<double> solution (deg, 6);
+          FullMatrix<double> system_rhs (deg, 6);
+          Vector<double> tmp (6);
+
+          for (unsigned int i = 0; i < 2; ++i)
+            for (unsigned int j = 0; j < 2; ++j)
+              for (unsigned int dof = 0; dof < this->dofs_per_cell; ++dof)
+                {
+                  system_rhs = 0.0;
+
+                  for (unsigned int q_point = 0;
+                       q_point < n_edge_quadrature_points; ++q_point)
+                    {
+                      const double weight = edge_quadrature.weight
+                                            (q_point);
+                      const Point<dim> quadrature_point_0 (i,
+                                                           edge_quadrature_points[q_point] (0),
+                                                           j);
+                      const Point<dim>
+                      quadrature_point_1
+                      (edge_quadrature_points[q_point] (0), i, j);
+                      const Point<dim> quadrature_point_2 (i, j,
+                                                           edge_quadrature_points[q_point] (0));
+
+                      if (edge_quadrature_points[q_point] (0) < 0.5)
+                        {
+                          Point<dim> quadrature_point_3 (i,
+                                                         2.0 * edge_quadrature_points[q_point] (0),
+                                                         j);
+
+                          tmp (0) = weight
+                                    * (2.0 * this->shape_value_component
+                                       (dof, quadrature_point_3, 1)
+                                       - this->restriction[index][i + 4 * j]
+                                       ((i + 4 * j) * this->degree,
+                                        dof)
+                                       * this->shape_value_component
+                                       ((i + 4 * j) * this->degree,
+                                        quadrature_point_0, 1));
+                          tmp (1) = -1.0 * weight
+                                    * this->restriction[index][i + 4 * j + 2]
+                                    ((i + 4 * j) * this->degree,
+                                     dof)
+                                    * this->shape_value_component
+                                    ((i + 4 * j) * this->degree,
+                                     quadrature_point_0, 1);
+                          quadrature_point_3
+                            = Point<dim> (2.0 * edge_quadrature_points[q_point] (0),
+                                          i, j);
+                          tmp (2) = weight
+                                    * (2.0 * this->shape_value_component
+                                       (dof, quadrature_point_3, 0)
+                                       - this->restriction[index][2 * (i + 2 * j)]
+                                       ((i + 4 * j + 2) * this->degree,
+                                        dof)
+                                       * this->shape_value_component
+                                       ((i + 4 * j + 2) * this->degree,
+                                        quadrature_point_1, 0));
+                          tmp (3) = -1.0 * weight
+                                    * this->restriction[index][2 * (i + 2 * j) + 1]
+                                    ((i + 4 * j + 2) * this->degree,
+                                     dof)
+                                    * this->shape_value_component
+                                    ((i + 4 * j + 2) * this->degree,
+                                     quadrature_point_1, 0);
+                          quadrature_point_3 = Point<dim> (i, j,
+                                                           2.0 * edge_quadrature_points[q_point] (0));
+                          tmp (4) = weight
+                                    * (2.0 * this->shape_value_component
+                                       (dof, quadrature_point_3, 2)
+                                       - this->restriction[index][i + 2 * j]
+                                       ((i + 2 * (j + 4)) * this->degree,
+                                        dof)
+                                       * this->shape_value_component
+                                       ((i + 2 * (j + 4)) * this->degree,
+                                        quadrature_point_2, 2));
+                          tmp (5) = -1.0 * weight
+                                    * this->restriction[index][i + 2 * (j + 2)]
+                                    ((i + 2 * (j + 4)) * this->degree,
+                                     dof)
+                                    * this->shape_value_component
+                                    ((i + 2 * (j + 4)) * this->degree,
+                                     quadrature_point_2, 2);
+                        }
+
+                      else
+                        {
+                          tmp (0) = -1.0 * weight
+                                    * this->restriction[index][i + 4 * j]
+                                    ((i + 4 * j) * this->degree,
+                                     dof)
+                                    * this->shape_value_component
+                                    ((i + 4 * j) * this->degree,
+                                     quadrature_point_0, 1);
+
+                          Point<dim> quadrature_point_3 (i,
+                                                         2.0 * edge_quadrature_points[q_point] (0)
+                                                         - 1.0, j);
+
+                          tmp (1) = weight
+                                    * (2.0 * this->shape_value_component
+                                       (dof, quadrature_point_3, 1)
+                                       - this->restriction[index][i + 4 * j + 2]
+                                       ((i + 4 * j) * this->degree,
+                                        dof)
+                                       * this->shape_value_component
+                                       ((i + 4 * j) * this->degree,
+                                        quadrature_point_0, 1));
+                          tmp (2) = -1.0 * weight
+                                    * this->restriction[index][2 * (i + 2 * j)]
+                                    ((i + 4 * j + 2) * this->degree,
+                                     dof)
+                                    * this->shape_value_component
+                                    ((i + 4 * j + 2) * this->degree,
+                                     quadrature_point_1, 0);
+                          quadrature_point_3
+                            = Point<dim> (2.0 * edge_quadrature_points[q_point] (0)
+                                          - 1.0, i, j);
+                          tmp (3) = weight
+                                    * (2.0 * this->shape_value_component
+                                       (dof, quadrature_point_3, 0)
+                                       - this->restriction[index][2 * (i + 2 * j) + 1]
+                                       ((i + 4 * j + 2) * this->degree,
+                                        dof)
+                                       * this->shape_value_component
+                                       ((i + 4 * j + 2) * this->degree,
+                                        quadrature_point_1, 0));
+                          tmp (4) = -1.0 * weight
+                                    * this->restriction[index][i + 2 * j]
+                                    ((i + 2 * (j + 4)) * this->degree,
+                                     dof)
+                                    * this->shape_value_component
+                                    ((i + 2 * (j + 4)) * this->degree,
+                                     quadrature_point_2, 2);
+                          quadrature_point_3 = Point<dim> (i, j,
+                                                           2.0 * edge_quadrature_points[q_point] (0)
+                                                           - 1.0);
+                          tmp (5) = weight
+                                    * (2.0 * this->shape_value_component
+                                       (dof, quadrature_point_3, 2)
+                                       - this->restriction[index][i + 2 * (j + 2)]
+                                       ((i + 2 * (j + 4)) * this->degree,
+                                        dof)
+                                       * this->shape_value_component
+                                       ((i + 2 * (j + 4)) * this->degree,
+                                        quadrature_point_2, 2));
+                        }
+
+                      for (unsigned int k = 0; k < deg; ++k)
+                        {
+                          const double L_k
+                            = legendre_polynomials[k + 1].value
+                              (edge_quadrature_points[q_point] (0));
+
+                          for (unsigned int l = 0; l < tmp.size (); ++l)
+                            system_rhs (k, l) += tmp (l) * L_k;
+                        }
+                    }
+
+                  system_matrix_inv.mmult (solution, system_rhs);
+
+                  for (unsigned int k = 0; k < 2; ++k)
+                    for (unsigned int l = 0; l < deg; ++l)
+                      {
+                        if (std::abs (solution (l, k)) > 1e-14)
+                          this->restriction[index][i + 2 * (2 * j + k)]
+                          ((i + 4 * j) * this->degree + l + 1, dof)
+                            = solution (l, k);
+
+                        if (std::abs (solution (l, k + 2)) > 1e-14)
+                          this->restriction[index][2 * (i + 2 * j) + k]
+                          ((i + 4 * j + 2) * this->degree + l + 1, dof)
+                            = solution (l, k + 2);
+
+                        if (std::abs (solution (l, k + 4)) > 1e-14)
+                          this->restriction[index][i + 2 * (j + 2 * k)]
+                          ((i + 2 * (j + 4)) * this->degree + l + 1,
+                           dof)
+                            = solution (l, k + 4);
+                      }
+                }
+
+          const QGauss<2> face_quadrature (2 * this->degree);
+          const std::vector<Point<2> > &face_quadrature_points
+            = face_quadrature.get_points ();
+          const std::vector<Polynomials::Polynomial<double> > &
+          lobatto_polynomials
+            = Polynomials::Lobatto::generate_complete_basis
+              (this->degree);
+          const unsigned int n_edge_dofs
+            = GeometryInfo<dim>::lines_per_cell * this->degree;
+          const unsigned int &n_face_quadrature_points
+            = face_quadrature.size ();
+
+          {
+            FullMatrix<double> assembling_matrix
+            (deg * this->degree,
+             n_face_quadrature_points);
+
+            for (unsigned int q_point = 0;
+                 q_point < n_face_quadrature_points; ++q_point)
+              {
+                const double weight
+                  = std::sqrt (face_quadrature.weight (q_point));
+
+                for (unsigned int i = 0; i <= deg; ++i)
+                  {
+                    const double L_i = weight
+                                       * legendre_polynomials[i].value
+                                       (face_quadrature_points[q_point] (0));
+
+                    for (unsigned int j = 0; j < deg; ++j)
+                      assembling_matrix (i * deg + j, q_point)
+                        = L_i * lobatto_polynomials[j + 2].value
+                          (face_quadrature_points[q_point] (1));
+                  }
+              }
+
+            FullMatrix<double> system_matrix (assembling_matrix.m (),
+                                              assembling_matrix.m ());
+
+            assembling_matrix.mTmult (system_matrix,
+                                      assembling_matrix);
+            system_matrix_inv.reinit (system_matrix.m (),
+                                      system_matrix.m ());
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          solution.reinit (system_matrix_inv.m (), 24);
+          system_rhs.reinit (system_matrix_inv.m (), 24);
+          tmp.reinit (24);
+
+          for (unsigned int i = 0; i < 2; ++i)
+            for (unsigned int dof = 0; dof < this->dofs_per_cell; ++dof)
+              {
+                system_rhs = 0.0;
+
+                for (unsigned int q_point = 0;
+                     q_point < n_face_quadrature_points; ++q_point)
+                  {
+                    tmp = 0.0;
+
+                    if (face_quadrature_points[q_point] (0) < 0.5)
+                      {
+                        if (face_quadrature_points[q_point] (1) < 0.5)
+                          {
+                            Point<dim> quadrature_point_0 (i,
+                                                           2.0 * face_quadrature_points[q_point] (0),
+                                                           2.0 * face_quadrature_points[q_point] (1));
+
+                            tmp (0) += 2.0 * this->shape_value_component
+                                       (dof, quadrature_point_0, 1);
+                            tmp (1) += 2.0 * this->shape_value_component
+                                       (dof, quadrature_point_0, 2);
+                            quadrature_point_0
+                              = Point<dim> (2.0 * face_quadrature_points[q_point] (0),
+                                            i,
+                                            2.0 * face_quadrature_points[q_point] (1));
+                            tmp (8) += 2.0 * this->shape_value_component
+                                       (dof, quadrature_point_0, 2);
+                            tmp (9) += 2.0 * this->shape_value_component
+                                       (dof, quadrature_point_0, 0);
+                            quadrature_point_0
+                              = Point<dim> (2.0 * face_quadrature_points[q_point] (0),
+                                            2.0 * face_quadrature_points[q_point] (1),
+                                            i);
+                            tmp (16) += 2.0 * this->shape_value_component
+                                        (dof, quadrature_point_0, 0);
+                            tmp (17) += 2.0 * this->shape_value_component
+                                        (dof, quadrature_point_0, 1);
+                          }
+
+                        else
+                          {
+                            Point<dim> quadrature_point_0 (i,
+                                                           2.0 * face_quadrature_points[q_point] (0),
+                                                           2.0 * face_quadrature_points[q_point] (1)
+                                                           - 1.0);
+
+                            tmp (2) += 2.0 * this->shape_value_component
+                                       (dof, quadrature_point_0, 1);
+                            tmp (3) += 2.0 * this->shape_value_component
+                                       (dof, quadrature_point_0, 2);
+                            quadrature_point_0
+                              = Point<dim> (2.0 * face_quadrature_points[q_point] (0),
+                                            i,
+                                            2.0 * face_quadrature_points[q_point] (1)
+                                            - 1.0);
+                            tmp (10) += 2.0 * this->shape_value_component
+                                        (dof, quadrature_point_0, 2);
+                            tmp (11) += 2.0 * this->shape_value_component
+                                        (dof, quadrature_point_0, 0);
+                            quadrature_point_0
+                              = Point<dim> (2.0 * face_quadrature_points[q_point] (0),
+                                            2.0 * face_quadrature_points[q_point] (1)
+                                            - 1.0, i);
+                            tmp (18) += 2.0 * this->shape_value_component
+                                        (dof, quadrature_point_0, 0);
+                            tmp (19) += 2.0 * this->shape_value_component
+                                        (dof, quadrature_point_0, 1);
+                          }
+                      }
+
+                    else if (face_quadrature_points[q_point] (1) < 0.5)
+                      {
+                        Point<dim> quadrature_point_0 (i,
+                                                       2.0 * face_quadrature_points[q_point] (0)
+                                                       - 1.0,
+                                                       2.0 * face_quadrature_points[q_point] (1));
+
+                        tmp (4) += 2.0 * this->shape_value_component
+                                   (dof, quadrature_point_0, 1);
+                        tmp (5) += 2.0 * this->shape_value_component
+                                   (dof, quadrature_point_0, 2);
+                        quadrature_point_0
+                          = Point<dim> (2.0 * face_quadrature_points[q_point] (0)
+                                        - 1.0, i,
+                                        2.0 * face_quadrature_points[q_point] (1));
+                        tmp (12) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 2);
+                        tmp (13) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 0);
+                        quadrature_point_0
+                          = Point<dim> (2.0 * face_quadrature_points[q_point] (0)
+                                        - 1.0,
+                                        2.0 * face_quadrature_points[q_point] (1),
+                                        i);
+                        tmp (20) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 0);
+                        tmp (21) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 1);
+                      }
+
+                    else
+                      {
+                        Point<dim> quadrature_point_0 (i,
+                                                       2.0 * face_quadrature_points[q_point] (0)
+                                                       - 1.0,
+                                                       2.0 * face_quadrature_points[q_point] (1)
+                                                       - 1.0);
+
+                        tmp (6) += 2.0 * this->shape_value_component
+                                   (dof, quadrature_point_0, 1);
+                        tmp (7) += 2.0 * this->shape_value_component
+                                   (dof, quadrature_point_0, 2);
+                        quadrature_point_0
+                          = Point<dim> (2.0 * face_quadrature_points[q_point] (0)
+                                        - 1.0, i,
+                                        2.0 * face_quadrature_points[q_point] (1)
+                                        - 1.0);
+                        tmp (14) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 2);
+                        tmp (15) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 0);
+                        quadrature_point_0
+                          = Point<dim> (2.0 * face_quadrature_points[q_point] (0)
+                                        - 1.0,
+                                        2.0 * face_quadrature_points[q_point] (1)
+                                        - 1.0, i);
+                        tmp (22) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 0);
+                        tmp (23) += 2.0 * this->shape_value_component
+                                    (dof, quadrature_point_0, 1);
+                      }
+
+                    const Point<dim> quadrature_point_0 (i,
+                                                         face_quadrature_points[q_point] (0),
+                                                         face_quadrature_points[q_point] (1));
+                    const Point<dim> quadrature_point_1
+                    (face_quadrature_points[q_point] (0),
+                     i,
+                     face_quadrature_points[q_point] (1));
+                    const Point<dim> quadrature_point_2
+                    (face_quadrature_points[q_point] (0),
+                     face_quadrature_points[q_point] (1),
+                     i);
+
+                    for (unsigned int j = 0; j < 2; ++j)
+                      for (unsigned int k = 0; k < 2; ++k)
+                        for (unsigned int l = 0; l <= deg; ++l)
+                          {
+                            tmp (2 * (j + 2 * k))
+                            -= this->restriction[index][i + 2 * (2 * j + k)]
+                               ((i + 4 * j) * this->degree + l, dof)
+                               * this->shape_value_component
+                               ((i + 4 * j) * this->degree + l,
+                                quadrature_point_0, 1);
+                            tmp (2 * (j + 2 * k) + 1)
+                            -= this->restriction[index][i + 2 * (2 * j + k)]
+                               ((i + 2 * (k + 4)) * this->degree + l,
+                                dof)
+                               * this->shape_value_component
+                               ((i + 2 * (k + 4)) * this->degree + l,
+                                quadrature_point_0, 2);
+                            tmp (2 * (j + 2 * (k + 2)))
+                            -= this->restriction[index][2 * (i + 2 * j) + k]
+                               ((2 * (i + 4) + k) * this->degree + l,
+                                dof)
+                               * this->shape_value_component
+                               ((2 * (i + 4) + k) * this->degree + l,
+                                quadrature_point_1, 2);
+                            tmp (2 * (j + 2 * k) + 9)
+                            -= this->restriction[index][2 * (i + 2 * j) + k]
+                               ((i + 4 * j + 2) * this->degree + l,
+                                dof)
+                               * this->shape_value_component
+                               ((i + 4 * j + 2) * this->degree + l,
+                                quadrature_point_1, 0);
+                            tmp (2 * (j + 2 * (k + 4)))
+                            -= this->restriction[index][2 * (2 * i + j) + k]
+                               ((4 * i + j + 2) * this->degree + l,
+                                dof)
+                               * this->shape_value_component
+                               ((4 * i + j + 2) * this->degree + l,
+                                quadrature_point_2, 0);
+                            tmp (2 * (j + 2 * k) + 17)
+                            -= this->restriction[index][2 * (2 * i + j) + k]
+                               ((4 * i + k) * this->degree + l, dof)
+                               * this->shape_value_component
+                               ((4 * i + k) * this->degree + l,
+                                quadrature_point_2, 1);
+                          }
+
+                    tmp *= face_quadrature.weight (q_point);
+
+                    for (unsigned int j = 0; j <= deg; ++j)
+                      {
+                        const double L_j_0
+                          = legendre_polynomials[j].value
+                            (face_quadrature_points[q_point] (0));
+                        const double L_j_1
+                          = legendre_polynomials[j].value
+                            (face_quadrature_points[q_point] (1));
+
+                        for (unsigned int k = 0; k < deg; ++k)
+                          {
+                            const double l_k_0
+                              = L_j_0 * lobatto_polynomials[k + 2].value
+                                (face_quadrature_points[q_point] (1));
+                            const double l_k_1
+                              = L_j_1 * lobatto_polynomials[k + 2].value
+                                (face_quadrature_points[q_point] (0));
+
+                            for (unsigned int l = 0; l < 4; ++l)
+                              {
+                                system_rhs (j * deg + k, 2 * l)
+                                += tmp (2 * l) * l_k_0;
+                                system_rhs (j * deg + k, 2 * l + 1)
+                                += tmp (2 * l + 1) * l_k_1;
+                                system_rhs (j * deg + k, 2 * (l + 4))
+                                += tmp (2 * (l + 4)) * l_k_1;
+                                system_rhs (j * deg + k, 2 * l + 9)
+                                += tmp (2 * l + 9) * l_k_0;
+                                system_rhs (j * deg + k, 2 * (l + 8))
+                                += tmp (2 * (l + 8)) * l_k_0;
+                                system_rhs (j * deg + k, 2 * l + 17)
+                                += tmp (2 * l + 17) * l_k_1;
+                              }
+                          }
+                      }
+                  }
+
+                system_matrix_inv.mmult (solution, system_rhs);
+
+                for (unsigned int j = 0; j < 2; ++j)
+                  for (unsigned int k = 0; k < 2; ++k)
+                    for (unsigned int l = 0; l <= deg; ++l)
+                      for (unsigned int m = 0; m < deg; ++m)
+                        {
+                          if (std::abs (solution (l * deg + m,
+                                                  2 * (j + 2 * k)))
+                              > 1e-14)
+                            this->restriction[index][i + 2 * (2 * j + k)]
+                            ((2 * i * this->degree + l) * deg + m
+                             + n_edge_dofs,
+                             dof) = solution (l * deg + m,
+                                              2 * (j + 2 * k));
+
+                          if (std::abs (solution (l * deg + m,
+                                                  2 * (j + 2 * k) + 1))
+                              > 1e-14)
+                            this->restriction[index][i + 2 * (2 * j + k)]
+                            (((2 * i + 1) * deg + m) * this->degree + l
+                             + n_edge_dofs, dof)
+                              = solution (l * deg + m,
+                                          2 * (j + 2 * k) + 1);
+
+                          if (std::abs (solution (l * deg + m,
+                                                  2 * (j + 2 * (k + 2))))
+                              > 1e-14)
+                            this->restriction[index][2 * (i + 2 * j) + k]
+                            ((2 * (i + 2) * this->degree + l) * deg + m
+                             + n_edge_dofs,
+                             dof) = solution (l * deg + m,
+                                              2 * (j + 2 * (k + 2)));
+
+                          if (std::abs (solution (l * deg + m,
+                                                  2 * (j + 2 * k) + 9))
+                              > 1e-14)
+                            this->restriction[index][2 * (i + 2 * j) + k]
+                            (((2 * i + 5) * deg + m) * this->degree + l
+                             + n_edge_dofs, dof)
+                              = solution (l * deg + m,
+                                          2 * (j + 2 * k) + 9);
+
+                          if (std::abs (solution (l * deg + m,
+                                                  2 * (j + 2 * (k + 4))))
+                              > 1e-14)
+                            this->restriction[index][2 * (2 * i + j) + k]
+                            ((2 * (i + 4) * this->degree + l) * deg + m
+                             + n_edge_dofs,
+                             dof) = solution (l * deg + m,
+                                              2 * (j + 2 * (k + 4)));
+
+                          if (std::abs (solution (l * deg + m,
+                                                  2 * (j + 2 * k) + 17))
+                              > 1e-14)
+                            this->restriction[index][2 * (2 * i + j) + k]
+                            (((2 * i + 9) * deg + m) * this->degree + l
+                             + n_edge_dofs, dof)
+                              = solution (l * deg + m,
+                                          2 * (j + 2 * k) + 17);
+                        }
+              }
+
+          const QGauss<dim> quadrature (2 * this->degree);
+          const std::vector<Point<dim> > &
+          quadrature_points = quadrature.get_points ();
+          const unsigned int n_boundary_dofs
+            = 2 * GeometryInfo<dim>::faces_per_cell * deg * this->degree
+              + n_edge_dofs;
+          const unsigned int &n_quadrature_points = quadrature.size ();
+
+          {
+            FullMatrix<double>
+            assembling_matrix (deg * deg * this->degree,
+                               n_quadrature_points);
+
+            for (unsigned int q_point = 0; q_point < n_quadrature_points;
+                 ++q_point)
+              {
+                const double weight = std::sqrt (quadrature.weight
+                                                 (q_point));
+
+                for (unsigned int i = 0; i <= deg; ++i)
+                  {
+                    const double L_i = weight
+                                       * legendre_polynomials[i].value
+                                       (quadrature_points[q_point] (0));
+
+                    for (unsigned int j = 0; j < deg; ++j)
+                      {
+                        const double l_j
+                          = L_i * lobatto_polynomials[j + 2].value
+                            (quadrature_points[q_point] (1));
+
+                        for (unsigned int k = 0; k < deg; ++k)
+                          assembling_matrix ((i * deg + j) * deg + k,
+                                             q_point)
+                            = l_j * lobatto_polynomials[k + 2].value
+                              (quadrature_points[q_point] (2));
+                      }
+                  }
+              }
+
+            FullMatrix<double> system_matrix (assembling_matrix.m (),
+                                              assembling_matrix.m ());
+
+            assembling_matrix.mTmult (system_matrix, assembling_matrix);
+            system_matrix_inv.reinit (system_matrix.m (),
+                                      system_matrix.m ());
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          solution.reinit (system_matrix_inv.m (), 24);
+          system_rhs.reinit (system_matrix_inv.m (), 24);
+          tmp.reinit (24);
+
+          for (unsigned int dof = 0; dof < this->dofs_per_cell; ++dof)
+            {
+              system_rhs = 0.0;
+
+              for (unsigned int q_point = 0;
+                   q_point < n_quadrature_points; ++q_point)
+                {
+                  tmp = 0.0;
+
+                  if (quadrature_points[q_point] (0) < 0.5)
+                    {
+                      if (quadrature_points[q_point] (1) < 0.5)
+                        {
+                          if (quadrature_points[q_point] (2) < 0.5)
+                            {
+                              const Point<dim> quadrature_point
+                              (2.0 * quadrature_points[q_point] (0),
+                               2.0 * quadrature_points[q_point] (1),
+                               2.0 * quadrature_points[q_point] (2));
+
+                              tmp (0) += 2.0 * this->shape_value_component
+                                         (dof, quadrature_point, 0);
+                              tmp (1) += 2.0 * this->shape_value_component
+                                         (dof, quadrature_point, 1);
+                              tmp (2) += 2.0 * this->shape_value_component
+                                         (dof, quadrature_point, 2);
+                            }
+
+                          else
+                            {
+                              const Point<dim> quadrature_point
+                              (2.0 * quadrature_points[q_point] (0),
+                               2.0 * quadrature_points[q_point] (1),
+                               2.0 * quadrature_points[q_point] (2)
+                               - 1.0);
+
+                              tmp (3) += 2.0 * this->shape_value_component
+                                         (dof, quadrature_point, 0);
+                              tmp (4) += 2.0 * this->shape_value_component
+                                         (dof, quadrature_point, 1);
+                              tmp (5) += 2.0 * this->shape_value_component
+                                         (dof, quadrature_point, 2);
+                            }
+                        }
+
+                      else if (quadrature_points[q_point] (2) < 0.5)
+                        {
+                          const Point<dim> quadrature_point
+                          (2.0 * quadrature_points[q_point] (0),
+                           2.0 * quadrature_points[q_point] (1)
+                           - 1.0,
+                           2.0 * quadrature_points[q_point] (2));
+
+                          tmp (6) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 0);
+                          tmp (7) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 1);
+                          tmp (8) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 2);
+                        }
+
+                      else
+                        {
+                          const Point<dim> quadrature_point
+                          (2.0 * quadrature_points[q_point] (0),
+                           2.0 * quadrature_points[q_point] (1)
+                           - 1.0,
+                           2.0 * quadrature_points[q_point] (2)
+                           - 1.0);
+
+                          tmp (9) += 2.0 * this->shape_value_component
+                                     (dof, quadrature_point, 0);
+                          tmp (10) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 1);
+                          tmp (11) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 2);
+                        }
+                    }
+
+                  else if (quadrature_points[q_point] (1) < 0.5)
+                    {
+                      if (quadrature_points[q_point] (2) < 0.5)
+                        {
+                          const Point<dim> quadrature_point
+                          (2.0 * quadrature_points[q_point] (0)
+                           - 1.0,
+                           2.0 * quadrature_points[q_point] (1),
+                           2.0 * quadrature_points[q_point] (2));
+
+                          tmp (12) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 0);
+                          tmp (13) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 1);
+                          tmp (14) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 2);
+                        }
+
+                      else
+                        {
+                          const Point<dim> quadrature_point
+                          (2.0 * quadrature_points[q_point] (0)
+                           - 1.0,
+                           2.0 * quadrature_points[q_point] (1),
+                           2.0 * quadrature_points[q_point] (2)
+                           - 1.0);
+
+                          tmp (15) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 0);
+                          tmp (16) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 1);
+                          tmp (17) += 2.0 * this->shape_value_component
+                                      (dof, quadrature_point, 2);
+                        }
+                    }
+
+                  else if (quadrature_points[q_point] (2) < 0.5)
+                    {
+                      const Point<dim> quadrature_point
+                      (2.0 * quadrature_points[q_point] (0)
+                       - 1.0,
+                       2.0 * quadrature_points[q_point] (1)
+                       - 1.0,
+                       2.0 * quadrature_points[q_point] (2));
+
+                      tmp (18) += 2.0 * this->shape_value_component
+                                  (dof, quadrature_point, 0);
+                      tmp (19) += 2.0 * this->shape_value_component
+                                  (dof, quadrature_point, 1);
+                      tmp (20) += 2.0 * this->shape_value_component
+                                  (dof, quadrature_point, 2);
+                    }
+
+                  else
+                    {
+                      const Point<dim> quadrature_point
+                      (2.0 * quadrature_points[q_point] (0)
+                       - 1.0,
+                       2.0 * quadrature_points[q_point] (1)
+                       - 1.0,
+                       2.0 * quadrature_points[q_point] (2)
+                       - 1.0);
+
+                      tmp (21) += 2.0 * this->shape_value_component
+                                  (dof, quadrature_point, 0);
+                      tmp (22) += 2.0 * this->shape_value_component
+                                  (dof, quadrature_point, 1);
+                      tmp (23) += 2.0 * this->shape_value_component
+                                  (dof, quadrature_point, 2);
+                    }
+
+                  for (unsigned int i = 0; i < 2; ++i)
+                    for (unsigned int j = 0; j < 2; ++j)
+                      for (unsigned int k = 0; k < 2; ++k)
+                        for (unsigned int l = 0; l <= deg; ++l)
+                          {
+                            tmp (3 * (i + 2 * (j + 2 * k)))
+                            -= this->restriction[index][2 * (2 * i + j) + k]
+                               ((4 * i + j + 2) * this->degree + l, dof)
+                               * this->shape_value_component
+                               ((4 * i + j + 2) * this->degree + l,
+                                quadrature_points[q_point], 0);
+                            tmp (3 * (i + 2 * (j + 2 * k)) + 1)
+                            -= this->restriction[index][2 * (2 * i + j) + k]
+                               ((4 * i + k) * this->degree + l, dof)
+                               * this->shape_value_component
+                               ((4 * i + k) * this->degree + l,
+                                quadrature_points[q_point], 1);
+                            tmp (3 * (i + 2 * (j + 2 * k)) + 2)
+                            -= this->restriction[index][2 * (2 * i + j) + k]
+                               ((2 * (j + 4) + k) * this->degree + l,
+                                dof)
+                               * this->shape_value_component
+                               ((2 * (j + 4) + k) * this->degree + l,
+                                quadrature_points[q_point], 2);
+
+                            for (unsigned int m = 0; m < deg; ++m)
+                              {
+                                tmp (3 * (i + 2 * (j + 2 * k)))
+                                -= this->restriction[index][2 * (2 * i + j) + k]
+                                   (((2 * j + 5) * deg + m)
+                                    * this->degree + l + n_edge_dofs,
+                                    dof)
+                                   * this->shape_value_component
+                                   (((2 * j + 5) * deg + m)
+                                    * this->degree + l + n_edge_dofs,
+                                    quadrature_points[q_point], 0);
+                                tmp (3 * (i + 2 * (j + 2 * k)))
+                                -= this->restriction[index][2 * (2 * i + j) + k]
+                                   ((2 * (i + 4) * this->degree + l)
+                                    * deg + m + n_edge_dofs, dof)
+                                   * this->shape_value_component
+                                   ((2 * (i + 4) * this->degree + l)
+                                    * deg + m + n_edge_dofs,
+                                    quadrature_points[q_point], 0);
+                                tmp (3 * (i + 2 * (j + 2 * k)) + 1)
+                                -= this->restriction[index][2 * (2 * i + j) + k]
+                                   ((2 * k * this->degree + l) * deg + m
+                                    + n_edge_dofs,
+                                    dof)
+                                   * this->shape_value_component
+                                   ((2 * k * this->degree + l) * deg + m
+                                    + n_edge_dofs,
+                                    quadrature_points[q_point], 1);
+                                tmp (3 * (i + 2 * (j + 2 * k)) + 1)
+                                -= this->restriction[index][2 * (2 * i + j) + k]
+                                   (((2 * i + 9) * deg + m)
+                                    * this->degree + l + n_edge_dofs,
+                                    dof)
+                                   * this->shape_value_component
+                                   (((2 * i + 9) * deg + m)
+                                    * this->degree + l + n_edge_dofs,
+                                    quadrature_points[q_point], 1);
+                                tmp (3 * (i + 2 * (j + 2 * k)) + 2)
+                                -= this->restriction[index][2 * (2 * i + j) + k]
+                                   (((2 * k + 1) * deg + m)
+                                    * this->degree + l + n_edge_dofs,
+                                    dof)
+                                   * this->shape_value_component
+                                   (((2 * k + 1) * deg + m)
+                                    * this->degree + l + n_edge_dofs,
+                                    quadrature_points[q_point], 2);
+                                tmp (3 * (i + 2 * (j + 2 * k)) + 2)
+                                -= this->restriction[index][2 * (2 * i + j) + k]
+                                   ((2 * (j + 2) * this->degree + l)
+                                    * deg + m + n_edge_dofs, dof)
+                                   * this->shape_value_component
+                                   ((2 * (j + 2) * this->degree + l)
+                                    * deg + m + n_edge_dofs,
+                                    quadrature_points[q_point], 2);
+                              }
+                          }
+
+                  tmp *= quadrature.weight (q_point);
+
+                  for (unsigned int i = 0; i <= deg; ++i)
+                    {
+                      const double L_i_0
+                        = legendre_polynomials[i].value
+                          (quadrature_points[q_point] (0));
+                      const double L_i_1
+                        = legendre_polynomials[i].value
+                          (quadrature_points[q_point] (1));
+                      const double L_i_2
+                        = legendre_polynomials[i].value
+                          (quadrature_points[q_point] (2));
+
+                      for (unsigned int j = 0; j < deg; ++j)
+                        {
+                          const double l_j_0
+                            = L_i_0 * lobatto_polynomials[j + 2].value
+                              (quadrature_points[q_point] (1));
+                          const double l_j_1
+                            = L_i_1 * lobatto_polynomials[j + 2].value
+                              (quadrature_points[q_point] (0));
+                          const double l_j_2
+                            = L_i_2 * lobatto_polynomials[j + 2].value
+                              (quadrature_points[q_point] (0));
+
+                          for (unsigned int k = 0; k < deg; ++k)
+                            {
+                              const double l_k_0
+                                = l_j_0 * lobatto_polynomials[k + 2].value
+                                  (quadrature_points[q_point] (2));
+                              const double l_k_1
+                                = l_j_1 * lobatto_polynomials[k + 2].value
+                                  (quadrature_points[q_point] (2));
+                              const double l_k_2
+                                = l_j_2 * lobatto_polynomials[k + 2].value
+                                  (quadrature_points[q_point] (1));
+
+                              for (unsigned int l = 0; l < 8; ++l)
+                                {
+                                  system_rhs ((i * deg + j) * deg + k,
+                                              3 * l)
+                                  += tmp (3 * l) * l_k_0;
+                                  system_rhs ((i * deg + j) * deg + k,
+                                              3 * l + 1)
+                                  += tmp (3 * l + 1) * l_k_1;
+                                  system_rhs ((i * deg + j) * deg + k,
+                                              3 * l + 2)
+                                  += tmp (3 * l + 2) * l_k_2;
+                                }
+                            }
+                        }
+                    }
+                }
+
+              system_matrix_inv.mmult (solution, system_rhs);
+
+              for (unsigned int i = 0; i < 2; ++i)
+                for (unsigned int j = 0; j < 2; ++j)
+                  for (unsigned int k = 0; k < 2; ++k)
+                    for (unsigned int l = 0; l <= deg; ++l)
+                      for (unsigned int m = 0; m < deg; ++m)
+                        for (unsigned int n = 0; n < deg; ++n)
+                          {
+                            if (std::abs (solution
+                                          ((l * deg + m) * deg + n,
+                                           3 * (i + 2 * (j + 2 * k))))
+                                > 1e-14)
+                              this->restriction[index][2 * (2 * i + j) + k]
+                              ((l * deg + m) * deg + n + n_boundary_dofs,
+                               dof) = solution ((l * deg + m) * deg + n,
+                                                3 * (i + 2 * (j + 2 * k)));
+
+                            if (std::abs (solution
+                                          ((l * deg + m) * deg + n,
+                                           3 * (i + 2 * (j + 2 * k)) + 1))
+                                > 1e-14)
+                              this->restriction[index][2 * (2 * i + j) + k]
+                              ((l + (m + deg) * this->degree) * deg + n
+                               + n_boundary_dofs,
+                               dof) = solution ((l * deg + m) * deg + n,
+                                                3 * (i + 2 * (j + 2 * k)) + 1);
+
+                            if (std::abs (solution
+                                          ((l * deg + m) * deg + n,
+                                           3 * (i + 2 * (j + 2 * k)) + 2))
+                                > 1e-14)
+                              this->restriction[index][2 * (2 * i + j) + k]
+                              (l + ((m + 2 * deg) * deg + n) * this->degree
+                               + n_boundary_dofs, dof)
+                                = solution ((l * deg + m) * deg + n,
+                                            3 * (i + 2 * (j + 2 * k)) + 2);
+                          }
+            }
+        }
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+}
+
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_Nedelec<dim>::get_dpo_vector (const unsigned int degree, bool dg)
+{
+  std::vector<unsigned int> dpo (dim + 1);
+
+  if (dg)
+    {
+      dpo[dim] = PolynomialsNedelec<dim>::compute_n_pols(degree);
+    }
+  else
+    {
+      dpo[0] = 0;
+      dpo[1] = degree + 1;
+      dpo[2] = 2 * degree * (degree + 1);
+
+      if (dim == 3)
+        dpo[3] = 3 * degree * degree * (degree + 1);
+    }
+
+  return dpo;
+}
+
+//---------------------------------------------------------------------------
+// Data field initialization
+//---------------------------------------------------------------------------
+
+// Chech wheter a given shape
+// function has support on a
+// given face.
+
+// We just switch through the
+// faces of the cell and return
+// true, if the shape function
+// has support on the face
+// and false otherwise.
+template <int dim>
+bool
+FE_Nedelec<dim>::has_support_on_face (const unsigned int shape_index,
+                                      const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  const unsigned int deg = this->degree-1;
+  switch (dim)
+    {
+    case 2:
+      switch (face_index)
+        {
+        case 0:
+          if (!((shape_index > deg) && (shape_index < 2 * this->degree)))
+            return true;
+
+          else
+            return false;
+
+        case 1:
+          if ((shape_index > deg) &&
+              (shape_index
+               < GeometryInfo<2>::lines_per_cell * this->degree))
+            return true;
+
+          else
+            return false;
+
+        case 2:
+          if (shape_index < 3 * this->degree)
+            return true;
+
+          else
+            return false;
+
+        case 3:
+          if (!((shape_index >= 2 * this->degree) &&
+                (shape_index < 3 * this->degree)))
+            return true;
+
+          else
+            return false;
+
+        default:
+        {
+          Assert (false, ExcNotImplemented ());
+          return false;
+        }
+        }
+
+    case 3:
+      switch (face_index)
+        {
+        case 0:
+          if (((shape_index > deg) && (shape_index < 2 * this->degree)) ||
+              ((shape_index >= 5 * this->degree) &&
+               (shape_index < 6 * this->degree)) ||
+              ((shape_index >= 9 * this->degree) &&
+               (shape_index < 10 * this->degree)) ||
+              ((shape_index >= 11 * this->degree) &&
+               (shape_index
+                < GeometryInfo<3>::lines_per_cell * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 2 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 4 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 5 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 6 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 7 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 9 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 10 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 11 * deg)
+                * this->degree)))
+            return false;
+
+          else
+            return true;
+
+        case 1:
+          if (((shape_index > deg) && (shape_index < 4 * this->degree)) ||
+              ((shape_index >= 5 * this->degree) &&
+               (shape_index < 8 * this->degree)) ||
+              ((shape_index >= 9 * this->degree) &&
+               (shape_index < 10 * this->degree)) ||
+              ((shape_index >= 11 * this->degree) &&
+               (shape_index
+                < GeometryInfo<3>::lines_per_cell * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 2 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 5 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 6 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 7 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 9 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 10 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 11 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 12 * deg)
+                * this->degree)))
+            return true;
+
+          else
+            return false;
+
+        case 2:
+          if ((shape_index < 3 * this->degree) ||
+              ((shape_index >= 4 * this->degree) &&
+               (shape_index < 7 * this->degree)) ||
+              ((shape_index >= 8 * this->degree) &&
+               (shape_index < 10 * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 2 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 3 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 6 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 8 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 9 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 10 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 11 * deg)
+                * this->degree)))
+            return true;
+
+          else
+            return false;
+
+        case 3:
+          if ((shape_index < 2 * this->degree) ||
+              ((shape_index >= 3 * this->degree) &&
+               (shape_index < 6 * this->degree)) ||
+              ((shape_index >= 7 * this->degree) &&
+               (shape_index < 8 * this->degree)) ||
+              ((shape_index >= 10 * this->degree) &&
+               (shape_index
+                < GeometryInfo<3>::lines_per_cell * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 2 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 3 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 4 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 6 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 9 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 10 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 11 * deg)
+                * this->degree)))
+            return true;
+
+          else
+            return false;
+
+        case 4:
+          if ((shape_index < 4 * this->degree) ||
+              ((shape_index >= 8 * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 2 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 3 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 5 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 6 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 7 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 10 * deg)
+                * this->degree)))
+            return true;
+
+          else
+            return false;
+
+        case 5:
+          if (((shape_index >= 4 * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 2 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 3 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 5 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 6 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 7 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 8 * deg)
+                * this->degree)) ||
+              ((shape_index
+                >= (GeometryInfo<3>::lines_per_cell + 10 * deg)
+                * this->degree) &&
+               (shape_index
+                < (GeometryInfo<3>::lines_per_cell + 12 * deg)
+                * this->degree)))
+            return true;
+
+          else
+            return false;
+
+        default:
+        {
+          Assert (false, ExcNotImplemented ());
+          return false;
+        }
+        }
+
+    default:
+    {
+      Assert (false, ExcNotImplemented ());
+      return false;
+    }
+    }
+}
+
+template <int dim>
+FiniteElementDomination::Domination
+FE_Nedelec<dim>::compare_for_face_domination (const FiniteElement<dim> &fe_other) const
+{
+  if (const FE_Nedelec<dim> *fe_nedelec_other
+      = dynamic_cast<const FE_Nedelec<dim>*>(&fe_other))
+    {
+      if (this->degree < fe_nedelec_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_nedelec_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      // TODO: ???
+      // the FE_Nothing has no
+      // degrees of
+      // freedom. nevertheless, we
+      // say that the FE_Q element
+      // dominates so that we don't
+      // have to force the FE_Q side
+      // to become a zero function
+      // and rather allow the
+      // function to be discontinuous
+      // along the interface
+//      return FiniteElementDomination::other_element_dominates;
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+template <int dim>
+bool
+FE_Nedelec<dim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Nedelec<dim>::hp_vertex_dof_identities (const FiniteElement<dim> &)
+const
+{
+  // Nedelec elements do not have any dofs
+  // on vertices, hence return an empty vector.
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Nedelec<dim>::hp_line_dof_identities (const FiniteElement<dim> &fe_other)
+const
+{
+  // we can presently only compute these
+  // identities if both FEs are
+  // FE_Nedelec or if the other one is an
+  // FE_Nothing
+  if (const FE_Nedelec<dim> *fe_nedelec_other
+      = dynamic_cast<const FE_Nedelec<dim>*> (&fe_other))
+    {
+      // dofs are located on lines, so
+      // two dofs are identical, if their
+      // edge shape functions have the
+      // same polynomial degree.
+      std::vector<std::pair<unsigned int, unsigned int> > identities;
+
+      for (unsigned int i = 0;
+           i < std::min (fe_nedelec_other->degree, this->degree); ++i)
+        identities.push_back (std::make_pair (i, i));
+
+      return identities;
+    }
+
+  else if (dynamic_cast<const FE_Nothing<dim>*> (&fe_other) != 0)
+    {
+      // the FE_Nothing has no
+      // degrees of freedom, so there
+      // are no equivalencies to be
+      // recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+
+  else
+    {
+      Assert (false, ExcNotImplemented ());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Nedelec<dim>::hp_quad_dof_identities (const FiniteElement<dim> &fe_other)
+const
+{
+  // we can presently only compute
+  // these identities if both FEs are
+  // FE_Nedelec or if the other one is an
+  // FE_Nothing
+  if (const FE_Nedelec<dim> *fe_nedelec_other
+      = dynamic_cast<const FE_Nedelec<dim>*> (&fe_other))
+    {
+      // dofs are located on the interior
+      // of faces, so two dofs are identical,
+      // if their face shape functions have
+      // the same polynomial degree.
+      const unsigned int p = fe_nedelec_other->degree;
+      const unsigned int q = this->degree;
+      const unsigned int p_min = std::min (p, q);
+      std::vector<std::pair<unsigned int, unsigned int> > identities;
+
+      for (unsigned int i = 0; i < p_min; ++i)
+        for (unsigned int j = 0; j < p_min - 1; ++j)
+          {
+            identities.push_back (std::make_pair (i * (q - 1) + j,
+                                                  i * (p - 1) + j));
+            identities.push_back (std::make_pair (i + (j + q - 1) * q,
+                                                  i + (j + p - 1) * p));
+          }
+
+      return identities;
+    }
+
+  else if (dynamic_cast<const FE_Nothing<dim>*> (&fe_other) != 0)
+    {
+      // the FE_Nothing has no
+      // degrees of freedom, so there
+      // are no equivalencies to be
+      // recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+
+  else
+    {
+      Assert (false, ExcNotImplemented ());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+// In this function we compute the face
+// interpolation matrix. This is usually
+// done by projection-based interpolation,
+// but, since one can compute the entries
+// easy per hand, we save some computation
+// time at this point and just fill in the
+// correct values.
+template <int dim>
+void
+FE_Nedelec<dim>::get_face_interpolation_matrix
+(const FiniteElement<dim> &source, FullMatrix<double> &interpolation_matrix)
+const
+{
+  // this is only implemented, if the
+  // source FE is also a
+  // Nedelec element
+  typedef FE_Nedelec<dim> FEN;
+  typedef FiniteElement<dim> FEL;
+
+  AssertThrow ((source.get_name ().find ("FE_Nedelec<") == 0) ||
+               (dynamic_cast<const FEN *> (&source) != 0),
+               typename FEL::ExcInterpolationNotImplemented());
+  Assert (interpolation_matrix.m () == source.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m (),
+                                source.dofs_per_face));
+  Assert (interpolation_matrix.n () == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n (),
+                                this->dofs_per_face));
+
+  // ok, source is a Nedelec element, so
+  // we will be able to do the work
+  const FE_Nedelec<dim> &source_fe
+    = dynamic_cast<const FE_Nedelec<dim>&> (source);
+
+  // Make sure, that the element,
+  // for which the DoFs should be
+  // constrained is the one with
+  // the higher polynomial degree.
+  // Actually the procedure will work
+  // also if this assertion is not
+  // satisfied. But the matrices
+  // produced in that case might
+  // lead to problems in the
+  // hp procedures, which use this
+  // method.
+  Assert (this->dofs_per_face <= source_fe.dofs_per_face,
+          typename FEL::ExcInterpolationNotImplemented ());
+  interpolation_matrix = 0;
+
+  // On lines we can just identify
+  // all degrees of freedom.
+  for (unsigned int i = 0; i <this->degree; ++i)
+    interpolation_matrix (i, i) = 1.0;
+
+  // In 3d we have some lines more
+  // and a face. The procedure stays
+  // the same as above, but we have
+  // to take a bit more care of the
+  // indices of the degrees of
+  // freedom.
+  if (dim == 3)
+    {
+      const unsigned int p = source_fe.degree;
+      const unsigned int q = this->degree;
+
+      for (unsigned int i = 0; i <q; ++i)
+        {
+          for (int j = 1; j < (int) GeometryInfo<dim>::lines_per_face; ++j)
+            interpolation_matrix (j * p + i,
+                                  j * q + i) = 1.0;
+
+          for (unsigned int j = 0; j < q-1; ++j)
+            {
+              interpolation_matrix (GeometryInfo<dim>::lines_per_face * p + i * (p - 1) + j,
+                                    GeometryInfo<dim>::lines_per_face * q + i * (q - 1) + j)
+                = 1.0;
+              interpolation_matrix (GeometryInfo<dim>::lines_per_face * p + i + (j + p - 1) * p,
+                                    GeometryInfo<dim>::lines_per_face * q + i + (j + q - 1) * q)
+                = 1.0;
+            }
+        }
+    }
+}
+
+
+
+template <>
+void
+FE_Nedelec<1>::get_subface_interpolation_matrix(
+  const FiniteElement<1,1> &,
+  const unsigned int,
+  FullMatrix<double> &) const
+{
+  Assert (false, ExcNotImplemented ());
+}
+
+
+
+// In this function we compute the
+// subface interpolation matrix.
+// This is done by a projection-
+// based interpolation. Therefore
+// we first interpolate the
+// shape functions of the higher
+// order element on the lowest
+// order edge shape functions.
+// Then the remaining part of
+// the interpolated shape
+// functions is projected on the
+// higher order edge shape
+// functions, the face shape
+// functions and the interior
+// shape functions (if they all
+// exist).
+template <int dim>
+void
+FE_Nedelec<dim>::get_subface_interpolation_matrix(
+  const FiniteElement<dim> &source,
+  const unsigned int subface,
+  FullMatrix<double> &interpolation_matrix) const
+{
+  // this is only implemented, if the
+  // source FE is also a
+  // Nedelec element
+  typedef FE_Nedelec<dim> FEN;
+  typedef FiniteElement<dim> FEL;
+
+  AssertThrow ((source.get_name ().find ("FE_Nedelec<") == 0) ||
+               (dynamic_cast<const FEN *> (&source) != 0),
+               typename FEL::ExcInterpolationNotImplemented ());
+  Assert (interpolation_matrix.m () == source.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m (),
+                                source.dofs_per_face));
+  Assert (interpolation_matrix.n () == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n (),
+                                this->dofs_per_face));
+
+  // ok, source is a Nedelec element, so
+  // we will be able to do the work
+  const FE_Nedelec<dim> &source_fe
+    = dynamic_cast<const FE_Nedelec<dim>&> (source);
+
+  // Make sure, that the element,
+  // for which the DoFs should be
+  // constrained is the one with
+  // the higher polynomial degree.
+  // Actually the procedure will work
+  // also if this assertion is not
+  // satisfied. But the matrices
+  // produced in that case might
+  // lead to problems in the
+  // hp procedures, which use this
+  // method.
+  Assert (this->dofs_per_face <= source_fe.dofs_per_face,
+          typename FEL::ExcInterpolationNotImplemented ());
+  interpolation_matrix = 0.0;
+  // Perform projection-based interpolation
+  // as usual.
+  const QGauss<1> edge_quadrature (source_fe.degree);
+  const std::vector<Point<1> > &
+  edge_quadrature_points = edge_quadrature.get_points ();
+  const unsigned int &n_edge_quadrature_points = edge_quadrature.size ();
+
+  switch (dim)
+    {
+    case 2:
+    {
+      for (unsigned int dof = 0; dof < this->dofs_per_face; ++dof)
+        for (unsigned int q_point = 0; q_point < n_edge_quadrature_points;
+             ++q_point)
+          {
+            const Point<dim> quadrature_point (0.0,
+                                               0.5 * (edge_quadrature_points[q_point] (0)
+                                                      + subface));
+
+            interpolation_matrix (0, dof) += 0.5
+                                             * edge_quadrature.weight (q_point)
+                                             * this->shape_value_component
+                                             (dof, quadrature_point, 1);
+          }
+
+      if (source_fe.degree > 1)
+        {
+          const std::vector<Polynomials::Polynomial<double> > &
+          legendre_polynomials
+            = Polynomials::Legendre::generate_complete_basis (source_fe.degree - 1);
+          FullMatrix<double> system_matrix_inv (source_fe.degree - 1,
+                                                source_fe.degree - 1);
+
+          {
+            FullMatrix<double> assembling_matrix (source_fe.degree - 1,
+                                                  n_edge_quadrature_points);
+
+            for (unsigned int q_point = 0;
+                 q_point < n_edge_quadrature_points; ++q_point)
+              {
+                const double weight
+                  = std::sqrt (edge_quadrature.weight (q_point));
+
+                for (unsigned int i = 0; i < source_fe.degree - 1; ++i)
+                  assembling_matrix (i, q_point) = weight
+                                                   * legendre_polynomials[i + 1].value
+                                                   (edge_quadrature_points[q_point] (0));
+              }
+
+            FullMatrix<double> system_matrix (source_fe.degree - 1, source_fe.degree - 1);
+
+            assembling_matrix.mTmult (system_matrix, assembling_matrix);
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          Vector<double> solution (source_fe.degree - 1);
+          Vector<double> system_rhs (source_fe.degree - 1);
+
+          for (unsigned int dof = 0; dof < this->dofs_per_face; ++dof)
+            {
+              system_rhs = 0.0;
+
+              for (unsigned int q_point = 0;
+                   q_point < n_edge_quadrature_points; ++q_point)
+                {
+                  const Point<dim> quadrature_point_0 (0.0,
+                                                       0.5 * (edge_quadrature_points[q_point] (0)
+                                                              + subface));
+                  const Point<dim> quadrature_point_1 (0.0,
+                                                       edge_quadrature_points[q_point] (0));
+                  const double tmp = edge_quadrature.weight (q_point)
+                                     * (0.5 * this->shape_value_component
+                                        (dof, quadrature_point_0, 1)
+                                        - interpolation_matrix (0,
+                                                                dof)
+                                        * source_fe.shape_value_component
+                                        (0, quadrature_point_1, 1));
+
+                  for (unsigned int i = 0; i < source_fe.degree - 1; ++i)
+                    system_rhs (i) += tmp
+                                      * legendre_polynomials[i + 1].value
+                                      (edge_quadrature_points[q_point] (0));
+                }
+
+              system_matrix_inv.vmult (solution, system_rhs);
+
+              for (unsigned int i = 0; i < source_fe.degree - 1; ++i)
+                if (std::abs (solution (i)) > 1e-14)
+                  interpolation_matrix (i + 1, dof) = solution (i);
+            }
+        }
+
+      break;
+    }
+
+    case 3:
+    {
+      const double shifts[4][2] = { { 0.0, 0.0 }, { 1.0, 0.0 },
+        { 0.0, 1.0 }, { 1.0, 1.0 }
+      };
+
+      for (unsigned int dof = 0; dof < this->dofs_per_face; ++dof)
+        for (unsigned int q_point = 0; q_point < n_edge_quadrature_points;
+             ++q_point)
+          {
+            const double weight = 0.5 * edge_quadrature.weight (q_point);
+
+            for (unsigned int i = 0; i < 2; ++i)
+              {
+                Point<dim>
+                quadrature_point (0.5 * (i + shifts[subface][0]),
+                                  0.5 * (edge_quadrature_points[q_point] (0)
+                                         + shifts[subface][1]),
+                                  0.0);
+
+                interpolation_matrix (i * source_fe.degree, dof) += weight
+                                                                    * this->shape_value_component
+                                                                    (this->face_to_cell_index (dof, 4),
+                                                                     quadrature_point,
+                                                                     1);
+                quadrature_point
+                  = Point<dim> (0.5 * (edge_quadrature_points[q_point] (0)
+                                       + shifts[subface][0]),
+                                0.5 * (i + shifts[subface][1]), 0.0);
+                interpolation_matrix ((i + 2) * source_fe.degree, dof)
+                += weight * this->shape_value_component
+                   (this->face_to_cell_index (dof, 4),
+                    quadrature_point, 0);
+              }
+          }
+
+      if (source_fe.degree > 1)
+        {
+          const std::vector<Polynomials::Polynomial<double> > &
+          legendre_polynomials
+            = Polynomials::Legendre::generate_complete_basis (source_fe.degree - 1);
+          FullMatrix<double> system_matrix_inv (source_fe.degree - 1,
+                                                source_fe.degree - 1);
+
+          {
+            FullMatrix<double> assembling_matrix (source_fe.degree - 1,
+                                                  n_edge_quadrature_points);
+
+            for (unsigned int q_point = 0;
+                 q_point < n_edge_quadrature_points; ++q_point)
+              {
+                const double weight
+                  = std::sqrt (edge_quadrature.weight (q_point));
+
+                for (unsigned int i = 0; i < source_fe.degree - 1; ++i)
+                  assembling_matrix (i, q_point) = weight
+                                                   * legendre_polynomials[i + 1].value
+                                                   (edge_quadrature_points[q_point] (0));
+              }
+
+            FullMatrix<double> system_matrix (source_fe.degree - 1, source_fe.degree - 1);
+
+            assembling_matrix.mTmult (system_matrix, assembling_matrix);
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          FullMatrix<double> solution (source_fe.degree - 1,
+                                       GeometryInfo<dim>::lines_per_face);
+          FullMatrix<double> system_rhs (source_fe.degree - 1,
+                                         GeometryInfo<dim>::lines_per_face);
+          Vector<double> tmp (GeometryInfo<dim>::lines_per_face);
+
+          for (unsigned int dof = 0; dof < this->dofs_per_face; ++dof)
+            {
+              system_rhs = 0.0;
+
+              for (unsigned int q_point = 0;
+                   q_point < n_edge_quadrature_points; ++q_point)
+                {
+                  const double weight = edge_quadrature.weight (q_point);
+
+                  for (unsigned int i = 0; i < 2; ++i)
+                    {
+                      Point<dim>
+                      quadrature_point_0
+                      (0.5 * (i + shifts[subface][0]),
+                       0.5 * (edge_quadrature_points[q_point] (0)
+                              + shifts[subface][1]), 0.0);
+                      Point<dim> quadrature_point_1 (i,
+                                                     edge_quadrature_points[q_point] (0),
+                                                     0.0);
+
+                      tmp (i) = weight
+                                * (0.5 * this->shape_value_component
+                                   (this->face_to_cell_index (dof, 4),
+                                    quadrature_point_0, 1)
+                                   - interpolation_matrix
+                                   (i * source_fe.degree, dof)
+                                   * source_fe.shape_value_component
+                                   (i * source_fe.degree,
+                                    quadrature_point_1, 1));
+                      quadrature_point_0
+                        = Point<dim> (0.5 * (edge_quadrature_points[q_point] (0)
+                                             + shifts[subface][0]),
+                                      0.5 * (i + shifts[subface][1]),
+                                      0.0);
+                      quadrature_point_1
+                        = Point<dim> (edge_quadrature_points[q_point] (0),
+                                      i, 0.0);
+                      tmp (i + 2) = weight
+                                    * (0.5 * this->shape_value_component
+                                       (this->face_to_cell_index (dof, 4),
+                                        quadrature_point_0, 0)
+                                       - interpolation_matrix
+                                       ((i + 2) * source_fe.degree,
+                                        dof)
+                                       * source_fe.shape_value_component
+                                       ((i + 2) * source_fe.degree,
+                                        quadrature_point_1, 0));
+                    }
+
+                  for (unsigned int i = 0; i < source_fe.degree - 1; ++i)
+                    {
+                      const double L_i
+                        = legendre_polynomials[i + 1].value
+                          (edge_quadrature_points[q_point] (0));
+
+                      for (unsigned int j = 0;
+                           j < GeometryInfo<dim>::lines_per_face; ++j)
+                        system_rhs (i, j) += tmp (j) * L_i;
+                    }
+                }
+
+              system_matrix_inv.mmult (solution, system_rhs);
+
+              for (unsigned int i = 0;
+                   i < GeometryInfo<dim>::lines_per_face; ++i)
+                for (unsigned int j = 0; j < source_fe.degree - 1; ++j)
+                  if (std::abs (solution (j, i)) > 1e-14)
+                    interpolation_matrix (i * source_fe.degree + j + 1,
+                                          dof) = solution (j, i);
+            }
+
+          const QGauss<2> quadrature (source_fe.degree);
+          const std::vector<Point<2> > &
+          quadrature_points = quadrature.get_points ();
+          const std::vector<Polynomials::Polynomial<double> > &
+          lobatto_polynomials
+            = Polynomials::Lobatto::generate_complete_basis
+              (source_fe.degree);
+          const unsigned int n_boundary_dofs
+            = GeometryInfo<dim>::lines_per_face * source_fe.degree;
+          const unsigned int &n_quadrature_points = quadrature.size ();
+
+          {
+            FullMatrix<double>
+            assembling_matrix (source_fe.degree * (source_fe.degree - 1),
+                               n_quadrature_points);
+
+            for (unsigned int q_point = 0; q_point < n_quadrature_points;
+                 ++q_point)
+              {
+                const double weight = std::sqrt (quadrature.weight (q_point));
+
+                for (unsigned int i = 0; i < source_fe.degree; ++i)
+                  {
+                    const double L_i = weight
+                                       * legendre_polynomials[i].value
+                                       (quadrature_points[q_point] (0));
+
+                    for (unsigned int j = 0; j < source_fe.degree - 1; ++j)
+                      assembling_matrix (i * (source_fe.degree - 1) + j,
+                                         q_point)
+                        = L_i * lobatto_polynomials[j + 2].value
+                          (quadrature_points[q_point] (1));
+                  }
+              }
+
+            FullMatrix<double> system_matrix (assembling_matrix.m (),
+                                              assembling_matrix.m ());
+
+            assembling_matrix.mTmult (system_matrix, assembling_matrix);
+            system_matrix_inv.reinit (system_matrix.m (),
+                                      system_matrix.m ());
+            system_matrix_inv.invert (system_matrix);
+          }
+
+          solution.reinit (system_matrix_inv.m (), 2);
+          system_rhs.reinit (system_matrix_inv.m (), 2);
+          tmp.reinit (2);
+
+          for (unsigned int dof = 0; dof < this->dofs_per_face; ++dof)
+            {
+              system_rhs = 0.0;
+
+              for (unsigned int q_point = 0;
+                   q_point < n_quadrature_points; ++q_point)
+                {
+                  Point<dim>
+                  quadrature_point
+                  (0.5 * (quadrature_points[q_point] (0)
+                          + shifts[subface][0]),
+                   0.5 * (quadrature_points[q_point] (1)
+                          + shifts[subface][1]), 0.0);
+                  tmp (0) = 0.5 * this->shape_value_component
+                            (this->face_to_cell_index (dof, 4),
+                             quadrature_point, 0);
+                  tmp (1) = 0.5 * this->shape_value_component
+                            (this->face_to_cell_index (dof, 4),
+                             quadrature_point, 1);
+                  quadrature_point
+                    = Point<dim> (quadrature_points[q_point] (0),
+                                  quadrature_points[q_point] (1), 0.0);
+
+                  for (unsigned int i = 0; i < 2; ++i)
+                    for (unsigned int j = 0; j < source_fe.degree; ++j)
+                      {
+                        tmp (0) -= interpolation_matrix
+                                   ((i + 2) * source_fe.degree + j, dof)
+                                   * source_fe.shape_value_component
+                                   ((i + 2) * source_fe.degree + j,
+                                    quadrature_point, 0);
+                        tmp (1) -= interpolation_matrix
+                                   (i * source_fe.degree + j, dof)
+                                   * source_fe.shape_value_component
+                                   (i * source_fe.degree + j,
+                                    quadrature_point, 1);
+                      }
+
+                  tmp *= quadrature.weight (q_point);
+
+                  for (unsigned int i = 0; i < source_fe.degree; ++i)
+                    {
+                      const double L_i_0 = legendre_polynomials[i].value
+                                           (quadrature_points[q_point] (0));
+                      const double L_i_1 = legendre_polynomials[i].value
+                                           (quadrature_points[q_point] (1));
+
+                      for (unsigned int j = 0; j < source_fe.degree - 1; ++j)
+                        {
+                          system_rhs (i * (source_fe.degree - 1) + j, 0)
+                          += tmp (0) * L_i_0
+                             * lobatto_polynomials[j + 2].value
+                             (quadrature_points[q_point] (1));
+                          system_rhs (i * (source_fe.degree - 1) + j, 1)
+                          += tmp (1) * L_i_1
+                             * lobatto_polynomials[j + 2].value
+                             (quadrature_points[q_point] (0));
+                        }
+                    }
+                }
+
+              system_matrix_inv.mmult (solution, system_rhs);
+
+              for (unsigned int i = 0; i < source_fe.degree; ++i)
+                for (unsigned int j = 0; j < source_fe.degree - 1; ++j)
+                  {
+                    if (std::abs (solution (i * (source_fe.degree - 1) + j, 0))
+                        > 1e-14)
+                      interpolation_matrix (i * (source_fe.degree - 1)
+                                            + j + n_boundary_dofs, dof)
+                        = solution (i * (source_fe.degree - 1) + j, 0);
+
+                    if (std::abs (solution (i * (source_fe.degree - 1) + j, 1))
+                        > 1e-14)
+                      interpolation_matrix (i + (j + source_fe.degree - 1)
+                                            * source_fe.degree
+                                            + n_boundary_dofs, dof)
+                        = solution (i * (source_fe.degree - 1) + j, 1);
+                  }
+            }
+        }
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+}
+
+template <int dim>
+const FullMatrix<double> &
+FE_Nedelec<dim>
+::get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Prolongation matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  // initialization upon first request
+  if (this->prolongation[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      // if matrix got updated while waiting for the lock
+      if (this->prolongation[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->prolongation[refinement_case-1][child];
+
+      // now do the work. need to get a non-const version of data in order to
+      // be able to modify them inside a const function
+      FE_Nedelec<dim> &this_nonconst = const_cast<FE_Nedelec<dim>& >(*this);
+
+      // Reinit the vectors of
+      // restriction and prolongation
+      // matrices to the right sizes.
+      // Restriction only for isotropic
+      // refinement
+#ifdef DEBUG_NEDELEC
+      deallog << "Embedding" << std::endl;
+#endif
+      this_nonconst.reinit_restriction_and_prolongation_matrices ();
+      // Fill prolongation matrices with embedding operators
+      FETools::compute_embedding_matrices (this_nonconst, this_nonconst.prolongation, true);
+#ifdef DEBUG_NEDELEC
+      deallog << "Restriction" << std::endl;
+#endif
+      this_nonconst.initialize_restriction ();
+    }
+
+  // we use refinement_case-1 here. the -1 takes care of the origin of the
+  // vector, as for RefinementCase<dim>::no_refinement (=0) there is no data
+  // available and so the vector indices are shifted
+  return this->prolongation[refinement_case-1][child];
+}
+
+template <int dim>
+const FullMatrix<double> &
+FE_Nedelec<dim>
+::get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Restriction matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case)),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case))));
+
+  // initialization upon first request
+  if (this->restriction[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      // if matrix got updated while waiting for the lock...
+      if (this->restriction[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->restriction[refinement_case-1][child];
+
+      // now do the work. need to get a non-const version of data in order to
+      // be able to modify them inside a const function
+      FE_Nedelec<dim> &this_nonconst = const_cast<FE_Nedelec<dim>& >(*this);
+
+      // Reinit the vectors of
+      // restriction and prolongation
+      // matrices to the right sizes.
+      // Restriction only for isotropic
+      // refinement
+#ifdef DEBUG_NEDELEC
+      deallog << "Embedding" << std::endl;
+#endif
+      this_nonconst.reinit_restriction_and_prolongation_matrices ();
+      // Fill prolongation matrices with embedding operators
+      FETools::compute_embedding_matrices (this_nonconst, this_nonconst.prolongation, true);
+#ifdef DEBUG_NEDELEC
+      deallog << "Restriction" << std::endl;
+#endif
+      this_nonconst.initialize_restriction ();
+    }
+
+  // we use refinement_case-1 here. the -1 takes care of the origin of the
+  // vector, as for RefinementCase<dim>::no_refinement (=0) there is no data
+  // available and so the vector indices are shifted
+  return this->restriction[refinement_case-1][child];
+}
+
+// Since this is a vector valued element,
+// we cannot interpolate a scalar function.
+template <int dim>
+void FE_Nedelec<dim>::interpolate (std::vector<double> &, const std::vector<double> &) const
+{
+  Assert(false, ExcNotImplemented ());
+}
+
+
+// Interpolate a function, which is given by
+// its values at the generalized support
+// points in the finite element space on the
+// reference cell.
+// This is done as usual by projection-based
+// interpolation.
+template <int dim>
+void
+FE_Nedelec<dim>::interpolate (std::vector<double> &local_dofs,
+                              const std::vector<Vector<double> > &values,
+                              unsigned int offset) const
+{
+  const unsigned int deg = this->degree-1;
+
+  Assert (values.size () == this->generalized_support_points.size (),
+          ExcDimensionMismatch (values.size (),
+                                this->generalized_support_points.size ()));
+  Assert (local_dofs.size () == this->dofs_per_cell,
+          ExcDimensionMismatch (local_dofs.size (),this->dofs_per_cell));
+  Assert (values[0].size () >= offset + this->n_components (),
+          ExcDimensionMismatch (values[0].size (),
+                                offset + this->n_components ()));
+  std::fill (local_dofs.begin (), local_dofs.end (), 0.);
+
+  if (offset < dim)
+    switch (dim)
+      {
+      case 2:
+      {
+        const QGauss<1> reference_edge_quadrature (this->degree);
+        const unsigned int &n_edge_points
+          = reference_edge_quadrature.size ();
+
+        // Let us begin with the
+        // interpolation part.
+        for (unsigned int i = 0; i < 2; ++i)
+          {
+            for (unsigned int q_point = 0; q_point < n_edge_points;
+                 ++q_point)
+              local_dofs[i * this->degree]
+              += reference_edge_quadrature.weight (q_point)
+                 * values[q_point + i * n_edge_points] (1);
+
+            // Add the computed values
+            // to the resulting vector
+            // only, if they are not
+            // too small.
+            if (std::abs (local_dofs[i * this->degree]) < 1e-14)
+              local_dofs[i * this->degree] = 0.0;
+          }
+
+        if (offset == 0)
+          for (unsigned int i = 0; i < 2; ++i)
+            {
+              for (unsigned int q_point = 0; q_point < n_edge_points;
+                   ++q_point)
+                local_dofs[(i + 2) * this->degree]
+                += reference_edge_quadrature.weight (q_point)
+                   * values[q_point + (i + 2) * n_edge_points] (0);
+
+              if (std::abs (local_dofs[(i + 2) * this->degree]) < 1e-14)
+                local_dofs[(i + 2) * this->degree] = 0.0;
+            }
+
+        // If the degree is greater
+        // than 0, then we have still
+        // some higher order edge
+        // shape functions to
+        // consider.
+        // Here the projection part
+        // starts. The dof values
+        // are obtained by solving
+        // a linear system of
+        // equations.
+        if (this->degree > 1)
+          {
+            // We start with projection
+            // on the higher order edge
+            // shape function.
+            const std::vector<Polynomials::Polynomial<double> > &
+            lobatto_polynomials
+              = Polynomials::Lobatto::generate_complete_basis
+                (this->degree);
+            const unsigned int
+            line_coordinate[GeometryInfo<2>::lines_per_cell]
+              = {1, 1, 0, 0};
+            std::vector<Polynomials::Polynomial<double> >
+            lobatto_polynomials_grad (this->degree);
+
+            for (unsigned int i = 0; i < lobatto_polynomials_grad.size ();
+                 ++i)
+              lobatto_polynomials_grad[i]
+                = lobatto_polynomials[i + 1].derivative ();
+
+            // Set up the system matrix.
+            // This can be used for all
+            // edges.
+            FullMatrix<double> system_matrix (this->degree-1, this->degree-1);
+
+            for (unsigned int i = 0; i < system_matrix.m (); ++i)
+              for (unsigned int j = 0; j < system_matrix.n (); ++j)
+                for (unsigned int q_point = 0; q_point < n_edge_points;
+                     ++q_point)
+                  system_matrix (i, j)
+                  += boundary_weights (q_point, j)
+                     * lobatto_polynomials_grad[i + 1].value
+                     (this->generalized_face_support_points[q_point]
+                      (1));
+
+            FullMatrix<double> system_matrix_inv (this->degree-1, this->degree-1);
+
+            system_matrix_inv.invert (system_matrix);
+
+            Vector<double> system_rhs (system_matrix.m ());
+            Vector<double> solution (system_rhs.size ());
+
+            for (unsigned int line = 0;
+                 line < GeometryInfo<dim>::lines_per_cell; ++line)
+              if ((line < 2) || (offset == 0))
+                {
+                  // Set up the right hand side.
+                  system_rhs = 0;
+
+                  for (unsigned int q_point = 0; q_point < n_edge_points;
+                       ++q_point)
+                    {
+                      const double tmp
+                        = values[line * n_edge_points + q_point]
+                          (line_coordinate[line])
+                          - local_dofs[line * this->degree]
+                          * this->shape_value_component
+                          (line * this->degree,
+                           this->generalized_support_points[line
+                                                            * n_edge_points
+                                                            + q_point],
+                           line_coordinate[line]);
+
+                      for (unsigned int i = 0; i < system_rhs.size ();
+                           ++i)
+                        system_rhs (i) += boundary_weights (q_point, i)
+                                          * tmp;
+                    }
+
+                  system_matrix_inv.vmult (solution, system_rhs);
+
+                  // Add the computed values
+                  // to the resulting vector
+                  // only, if they are not
+                  // too small.
+                  for (unsigned int i = 0; i < solution.size (); ++i)
+                    if (std::abs (solution (i)) > 1e-14)
+                      local_dofs[line * this->degree + i + 1]
+                        = solution (i);
+                }
+
+            // Then we go on to the
+            // interior shape
+            // functions. Again we
+            // set up the system
+            // matrix and use it
+            // for both, the
+            // horizontal and the
+            // vertical, interior
+            // shape functions.
+            const QGauss<dim> reference_quadrature (this->degree);
+            const std::vector<Polynomials::Polynomial<double> > &
+            legendre_polynomials
+              = Polynomials::Legendre::generate_complete_basis (this->degree-1);
+            const unsigned int &n_interior_points
+              = reference_quadrature.size ();
+
+            system_matrix.reinit ((this->degree-1) * this->degree,
+                                  (this->degree-1) * this->degree);
+            system_matrix = 0;
+
+            for (unsigned int i = 0; i < this->degree; ++i)
+              for (unsigned int j = 0; j < this->degree-1; ++j)
+                for (unsigned int k = 0; k < this->degree; ++k)
+                  for (unsigned int l = 0; l < this->degree-1; ++l)
+                    for (unsigned int q_point = 0;
+                         q_point < n_interior_points; ++q_point)
+                      system_matrix (i * (this->degree-1) + j, k * (this->degree-1) + l)
+                      += reference_quadrature.weight (q_point)
+                         * legendre_polynomials[i].value
+                         (this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points]
+                          (0))
+                         * lobatto_polynomials[j + 2].value
+                         (this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points]
+                          (1))
+                         * lobatto_polynomials_grad[k].value
+                         (this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points]
+                          (0))
+                         * lobatto_polynomials[l + 2].value
+                         (this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points]
+                          (1));
+
+            system_matrix_inv.reinit (system_matrix.m (),
+                                      system_matrix.m ());
+            system_matrix_inv.invert (system_matrix);
+            solution.reinit (system_matrix_inv.m ());
+            system_rhs.reinit (system_matrix.m ());
+
+            if (offset == 0)
+              {
+                // Set up the right hand side
+                // for the horizontal shape
+                // functions.
+                system_rhs = 0;
+
+                for (unsigned int q_point = 0;
+                     q_point < n_interior_points; ++q_point)
+                  {
+                    double tmp
+                      = values[q_point + GeometryInfo<dim>::lines_per_cell
+                               * n_edge_points] (0);
+
+                    for (unsigned int i = 0; i < 2; ++i)
+                      for (unsigned int j = 0; j < this->degree; ++j)
+                        tmp -= local_dofs[(i + 2) * this->degree + j]
+                               * this->shape_value_component
+                               ((i + 2) * this->degree + j,
+                                this->generalized_support_points[q_point
+                                                                 + GeometryInfo<dim>::lines_per_cell
+                                                                 * n_edge_points],
+                                0);
+
+                    for (unsigned int i = 0; i < this->degree; ++i)
+                      for (unsigned int j = 0; j < this->degree-1; ++j)
+                        system_rhs (i * (this->degree-1) + j)
+                        += reference_quadrature.weight (q_point) * tmp
+                           * lobatto_polynomials_grad[i].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points]
+                            (0))
+                           * lobatto_polynomials[j + 2].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points]
+                            (1));
+                  }
+
+                system_matrix_inv.vmult (solution, system_rhs);
+
+                // Add the computed values
+                // to the resulting vector
+                // only, if they are not
+                // too small.
+                for (unsigned int i = 0; i < this->degree; ++i)
+                  for (unsigned int j = 0; j < this->degree-1; ++j)
+                    if (std::abs (solution (i * (this->degree-1) + j)) > 1e-14)
+                      local_dofs[(i + GeometryInfo<dim>::lines_per_cell)
+                                 * (this->degree-1) + j
+                                 + GeometryInfo<dim>::lines_per_cell]
+                        = solution (i * (this->degree-1) + j);
+              }
+
+            // Set up the right hand side
+            // for the vertical shape
+            // functions.
+            system_rhs = 0;
+
+            for (unsigned int q_point = 0; q_point < n_interior_points;
+                 ++q_point)
+              {
+                double tmp
+                  = values[q_point + GeometryInfo<dim>::lines_per_cell
+                           * n_edge_points] (1);
+
+                for (unsigned int i = 0; i < 2; ++i)
+                  for (unsigned int j = 0; j < this->degree; ++j)
+                    tmp -= local_dofs[i * this->degree + j]
+                           * this->shape_value_component
+                           (i * this->degree + j,
+                            this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points],
+                            1);
+
+                for (unsigned int i = 0; i < this->degree; ++i)
+                  for (unsigned int j = 0; j < this->degree-1; ++j)
+                    system_rhs (i * (this->degree-1) + j)
+                    += reference_quadrature.weight (q_point) * tmp
+                       * lobatto_polynomials_grad[i].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points]
+                        (1))
+                       * lobatto_polynomials[j + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points]
+                        (0));
+              }
+
+            system_matrix_inv.vmult (solution, system_rhs);
+
+            // Add the computed values
+            // to the resulting vector
+            // only, if they are not
+            // too small.
+            for (unsigned int i = 0; i < this->degree; ++i)
+              for (unsigned int j = 0; j < this->degree-1; ++j)
+                if (std::abs (solution (i * (this->degree-1) + j)) > 1e-14)
+                  local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                  + this->degree-1) * this->degree]
+                    = solution (i * (this->degree-1) + j);
+          }
+
+        break;
+      }
+
+      case 3:
+      {
+        const QGauss<1>
+        reference_edge_quadrature (this->degree);
+        const unsigned int &
+        n_edge_points = reference_edge_quadrature.size ();
+
+        // Let us begin with the
+        // interpolation part.
+        for (unsigned int i = 0; i < 4; ++i)
+          {
+            for (unsigned int q_point = 0; q_point < n_edge_points;
+                 ++q_point)
+              local_dofs[(i + 8) * this->degree]
+              += reference_edge_quadrature.weight (q_point)
+                 * values[q_point + (i + 8) * n_edge_points] (2);
+
+            // Add the computed values
+            // to the resulting vector
+            // only, if they are not
+            // too small.
+            if (std::abs (local_dofs[(i + 8) * this->degree]) < 1e-14)
+              local_dofs[(i + 8) * this->degree] = 0.0;
+          }
+
+        if (offset + 1 < dim)
+          {
+            for (unsigned int i = 0; i < 2; ++i)
+              for (unsigned int j = 0; j < 2; ++j)
+                {
+                  for (unsigned int q_point = 0; q_point < n_edge_points;
+                       ++q_point)
+                    local_dofs[(i + 4 * j) * this->degree]
+                    += reference_edge_quadrature.weight (q_point)
+                       * values[q_point + (i + 4 * j) * n_edge_points]
+                       (1);
+
+                  // Add the computed values
+                  // to the resulting vector
+                  // only, if they are not
+                  // too small.
+                  if (std::abs (local_dofs[(i + 4 * j) * this->degree])
+                      < 1e-14)
+                    local_dofs[(i + 4 * j) * this->degree] = 0.0;
+                }
+
+            if (offset == 0)
+              for (unsigned int i = 0; i < 2; ++i)
+                for (unsigned int j = 0; j < 2; ++j)
+                  {
+                    for (unsigned int q_point = 0;
+                         q_point < n_edge_points; ++q_point)
+                      local_dofs[(i + 4 * j + 2) * this->degree]
+                      += reference_edge_quadrature.weight (q_point)
+                         * values[q_point + (i + 4 * j + 2)
+                                  * n_edge_points] (0);
+
+                    // Add the computed values
+                    // to the resulting vector
+                    // only, if they are not
+                    // too small.
+                    if (std::abs (local_dofs[(i + 4 * j + 2)
+                                             * this->degree]) < 1e-14)
+                      local_dofs[(i + 4 * j + 2) * this->degree] = 0.0;
+                  }
+          }
+
+        // If the degree is greater
+        // than 0, then we have still
+        // some higher order shape
+        // functions to consider.
+        // Here the projection part
+        // starts. The dof values
+        // are obtained by solving
+        // a linear system of
+        // equations.
+        if (this->degree > 1)
+          {
+            // We start with projection
+            // on the higher order edge
+            // shape function.
+            const std::vector<Polynomials::Polynomial<double> > &
+            lobatto_polynomials
+              = Polynomials::Lobatto::generate_complete_basis
+                (this->degree);
+            const unsigned int
+            line_coordinate[GeometryInfo<3>::lines_per_cell]
+              = {1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2};
+            FullMatrix<double> system_matrix (this->degree-1, this->degree-1);
+            FullMatrix<double> system_matrix_inv (this->degree-1, this->degree-1);
+            std::vector<Polynomials::Polynomial<double> >
+            lobatto_polynomials_grad (this->degree);
+
+            for (unsigned int i = 0; i < lobatto_polynomials_grad.size ();
+                 ++i)
+              lobatto_polynomials_grad[i]
+                = lobatto_polynomials[i + 1].derivative ();
+
+            Vector<double> system_rhs (system_matrix.m ());
+            Vector<double> solution (system_rhs.size ());
+
+            // Set up the system matrix.
+            // This can be used for all
+            // edges.
+            for (unsigned int i = 0; i < system_matrix.m (); ++i)
+              for (unsigned int j = 0; j < system_matrix.n (); ++j)
+                for (unsigned int q_point = 0; q_point < n_edge_points;
+                     ++q_point)
+                  system_matrix (i, j)
+                  += boundary_weights (q_point, j)
+                     * lobatto_polynomials_grad[i + 1].value
+                     (this->generalized_face_support_points[q_point]
+                      (1));
+
+            system_matrix_inv.invert (system_matrix);
+
+            for (unsigned int line = 0;
+                 line < GeometryInfo<dim>::lines_per_cell; ++line)
+              {
+                // Set up the right hand side.
+                system_rhs = 0;
+
+                if ((((line == 0) || (line == 1) || (line == 4) ||
+                      (line == 5)) && (offset + 1 < dim)) ||
+                    (((line == 2) || (line == 3) || (line == 6) ||
+                      (line == 7)) && (offset == 0)) || (line > 7))
+                  {
+                    for (unsigned int q_point = 0; q_point < n_edge_points;
+                         ++q_point)
+                      {
+                        double tmp
+                          = values[line * n_edge_points + q_point]
+                            (line_coordinate[line])
+                            - local_dofs[line * this->degree]
+                            * this->shape_value_component
+                            (line * this->degree,
+                             this->generalized_support_points[line
+                                                              * this->degree
+                                                              + q_point],
+                             line_coordinate[line]);
+
+                        for (unsigned int i = 0; i < system_rhs.size ();
+                             ++i)
+                          system_rhs (i)
+                          += boundary_weights (q_point, i) * tmp;
+                      }
+
+                    system_matrix_inv.vmult (solution, system_rhs);
+
+                    // Add the computed values
+                    // to the resulting vector
+                    // only, if they are not
+                    // too small.
+                    for (unsigned int i = 0; i < solution.size (); ++i)
+                      if (std::abs (solution (i)) > 1e-14)
+                        local_dofs[line * this->degree + i + 1]
+                          = solution (i);
+                  }
+              }
+
+            // Then we go on to the
+            // face shape functions.
+            // Again we set up the
+            // system matrix and
+            // use it for both, the
+            // horizontal and the
+            // vertical, shape
+            // functions.
+            const std::vector<Polynomials::Polynomial<double> > &
+            legendre_polynomials
+              = Polynomials::Legendre::generate_complete_basis (this->degree-1);
+            const unsigned int
+            n_face_points = n_edge_points * n_edge_points;
+
+            system_matrix.reinit ((this->degree-1) * this->degree,
+                                  (this->degree-1) * this->degree);
+            system_matrix = 0;
+
+            for (unsigned int i = 0; i < this->degree; ++i)
+              for (unsigned int j = 0; j < this->degree-1; ++j)
+                for (unsigned int k = 0; k < this->degree; ++k)
+                  for (unsigned int l = 0; l < this->degree-1; ++l)
+                    for (unsigned int q_point = 0; q_point < n_face_points;
+                         ++q_point)
+                      system_matrix (i * (this->degree-1) + j, k * (this->degree-1) + l)
+                      += boundary_weights (q_point + n_edge_points,
+                                           2 * (k * (this->degree-1) + l))
+                         * legendre_polynomials[i].value
+                         (this->generalized_face_support_points[q_point
+                                                                + 4
+                                                                * n_edge_points]
+                          (0))
+                         * lobatto_polynomials[j + 2].value
+                         (this->generalized_face_support_points[q_point
+                                                                + 4
+                                                                * n_edge_points]
+                          (1));
+
+            system_matrix_inv.reinit (system_matrix.m (),
+                                      system_matrix.n ());
+            system_matrix_inv.invert (system_matrix);
+            solution.reinit (system_matrix.m ());
+            system_rhs.reinit (system_matrix.m ());
+
+            for (unsigned int face = 0;
+                 face < GeometryInfo<dim>::faces_per_cell; ++face)
+              {
+                switch (face)
+                  {
+                  case 0:
+                  {
+                    if (offset + 1 < dim)
+                      {
+                        // Set up the right hand side
+                        // for the horizontal shape
+                        // functions.
+                        system_rhs = 0;
+
+                        for (unsigned int q_point = 0;
+                             q_point < n_face_points; ++q_point)
+                          {
+                            double tmp
+                              = values[q_point
+                                       + GeometryInfo<dim>::lines_per_cell
+                                       * n_edge_points] (1);
+
+                            for (unsigned int i = 0; i < 2; ++i)
+                              for (unsigned int j = 0; j < this->degree; ++j)
+                                tmp
+                                -= local_dofs[4 * i * this->degree
+                                              + j]
+                                   * this->shape_value_component
+                                   (4 * i * this->degree + j,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points],
+                                    1);
+
+                            for (unsigned int i = 0; i < this->degree; ++i)
+                              for (unsigned int j = 0; j < this->degree-1; ++j)
+                                system_rhs (i * (this->degree-1) + j)
+                                += boundary_weights
+                                   (q_point + n_edge_points,
+                                    2 * (i * (this->degree-1) + j)) * tmp;
+                          }
+
+                        system_matrix_inv.vmult (solution, system_rhs);
+
+                        // Add the computed values
+                        // to the resulting vector
+                        // only, if they are not
+                        // too small.
+                        for (unsigned int i = 0; i < this->degree; ++i)
+                          for (unsigned int j = 0; j < this->degree-1; ++j)
+                            if (std::abs (solution (i * (this->degree-1) + j))
+                                > 1e-14)
+                              local_dofs[(i
+                                          + GeometryInfo<dim>::lines_per_cell)
+                                         * (this->degree-1) + j
+                                         + GeometryInfo<dim>::lines_per_cell]
+                                = solution (i * (this->degree-1) + j);
+                      }
+
+                    // Set up the right hand side
+                    // for the vertical shape
+                    // functions.
+                    system_rhs = 0;
+
+                    for (unsigned int q_point = 0;
+                         q_point < n_face_points; ++q_point)
+                      {
+                        double tmp
+                          = values[q_point
+                                   + GeometryInfo<dim>::lines_per_cell
+                                   * n_edge_points] (2);
+
+                        for (unsigned int i = 0; i < 2; ++i)
+                          for (unsigned int j = 0; j < this->degree; ++j)
+                            tmp -= local_dofs[2 * (i + 4)
+                                              * this->degree + j]
+                                   * this->shape_value_component
+                                   (2 * (i + 4) * this->degree + j,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points],
+                                    2);
+
+                        for (unsigned int i = 0; i < this->degree; ++i)
+                          for (unsigned int j = 0; j < this->degree-1; ++j)
+                            system_rhs (i * (this->degree-1) + j)
+                            += boundary_weights
+                               (q_point + n_edge_points,
+                                2 * (i * (this->degree-1) + j) + 1)
+                               * tmp;
+                      }
+
+                    system_matrix_inv.vmult (solution, system_rhs);
+
+                    // Add the computed values
+                    // to the resulting vector
+                    // only, if they are not
+                    // too small.
+                    for (unsigned int i = 0; i < this->degree; ++i)
+                      for (unsigned int j = 0; j < this->degree-1; ++j)
+                        if (std::abs (solution (i * (this->degree-1) + j)) > 1e-14)
+                          local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                          + this->degree-1)
+                                     * this->degree]
+                            = solution (i * (this->degree-1) + j);
+
+                    break;
+                  }
+
+                  case 1:
+                  {
+                    if (offset + 1 < dim)
+                      {
+                        // Set up the right hand side
+                        // for the horizontal shape
+                        // functions.
+                        system_rhs = 0;
+
+                        for (unsigned int q_point = 0;
+                             q_point < n_face_points; ++q_point)
+                          {
+                            double tmp
+                              = values[q_point
+                                       + GeometryInfo<dim>::lines_per_cell
+                                       * n_edge_points
+                                       + n_face_points] (1);
+
+                            for (unsigned int i = 0; i < 2; ++i)
+                              for (unsigned int j = 0; j <= deg; ++j)
+                                tmp -= local_dofs[(4 * i + 1)
+                                                  * this->degree + j]
+                                       * this->shape_value_component
+                                       ((4 * i + 1) * this->degree
+                                        + j,
+                                        this->generalized_support_points[q_point
+                                                                         + GeometryInfo<dim>::lines_per_cell
+                                                                         * n_edge_points
+                                                                         + n_face_points],
+                                        1);
+
+                            for (unsigned int i = 0; i <= deg; ++i)
+                              for (unsigned int j = 0; j < deg; ++j)
+                                system_rhs (i * deg + j)
+                                += boundary_weights
+                                   (q_point + n_edge_points,
+                                    2 * (i * deg + j)) * tmp;
+                          }
+
+                        system_matrix_inv.vmult (solution, system_rhs);
+
+                        // Add the computed values
+                        // to the resulting vector
+                        // only, if they are not
+                        // too small.
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            if (std::abs (solution (i * deg + j))
+                                > 1e-14)
+                              local_dofs[(i + GeometryInfo<dim>::lines_per_cell
+                                          + 2 * this->degree) * deg + j
+                                         + GeometryInfo<dim>::lines_per_cell]
+                                = solution (i * deg + j);
+                      }
+
+                    // Set up the right hand side
+                    // for the vertical shape
+                    // functions.
+                    system_rhs = 0;
+
+                    for (unsigned int q_point = 0;
+                         q_point < n_face_points; ++q_point)
+                      {
+                        double tmp
+                          = values[q_point
+                                   + GeometryInfo<dim>::lines_per_cell
+                                   * n_edge_points + n_face_points]
+                            (2);
+
+                        for (unsigned int i = 0; i < 2; ++i)
+                          for (unsigned int j = 0; j <= deg; ++j)
+                            tmp -= local_dofs[(2 * (i + 4) + 1)
+                                              * this->degree + j]
+                                   * this->shape_value_component
+                                   ((2 * (i + 4) + 1) * this->degree
+                                    + j,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points
+                                                                     + n_face_points],
+                                    2);
+
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            system_rhs (i * deg + j)
+                            += boundary_weights
+                               (q_point + n_edge_points,
+                                2 * (i * deg + j) + 1) * tmp;
+                      }
+
+                    system_matrix_inv.vmult (solution, system_rhs);
+
+                    // Add the computed values
+                    // to the resulting vector
+                    // only, if they are not
+                    // too small.
+                    for (unsigned int i = 0; i <= deg; ++i)
+                      for (unsigned int j = 0; j < deg; ++j)
+                        if (std::abs (solution (i * deg + j)) > 1e-14)
+                          local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                          + 3 * deg)
+                                     * this->degree]
+                            = solution (i * deg + j);
+
+                    break;
+                  }
+
+                  case 2:
+                  {
+                    if (offset == 0)
+                      {
+                        // Set up the right hand side
+                        // for the horizontal shape
+                        // functions.
+                        system_rhs = 0;
+
+                        for (unsigned int q_point = 0;
+                             q_point < n_face_points; ++q_point)
+                          {
+                            double tmp
+                              = values[q_point
+                                       + GeometryInfo<dim>::lines_per_cell
+                                       * n_edge_points + 2 * n_face_points]
+                                (2);
+
+                            for (unsigned int i = 0; i < 2; ++i)
+                              for (unsigned int j = 0; j <= deg; ++j)
+                                tmp -= local_dofs[(i + 8) * this->degree
+                                                  + j]
+                                       * this->shape_value_component
+                                       ((i + 8) * this->degree + j,
+                                        this->generalized_support_points[q_point
+                                                                         + GeometryInfo<dim>::lines_per_cell
+                                                                         * n_edge_points
+                                                                         + 2
+                                                                         * n_face_points],
+                                        2);
+
+                            for (unsigned int i = 0; i <= deg; ++i)
+                              for (unsigned int j = 0; j < deg; ++j)
+                                system_rhs (i * deg + j)
+                                += boundary_weights
+                                   (q_point + n_edge_points,
+                                    2 * (i * deg + j)) * tmp;
+                          }
+
+                        system_matrix_inv.vmult (solution, system_rhs);
+
+                        // Add the computed values
+                        // to the resulting vector
+                        // only, if they are not
+                        // too small.
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            if (std::abs (solution (i * deg + j))
+                                > 1e-14)
+                              local_dofs[(i + GeometryInfo<dim>::lines_per_cell
+                                          + 4 * this->degree) * deg
+                                         + j
+                                         + GeometryInfo<dim>::lines_per_cell]
+                                = solution (i * deg + j);
+                      }
+
+                    // Set up the right hand side
+                    // for the vertical shape
+                    // functions.
+                    system_rhs = 0;
+
+                    for (unsigned int q_point = 0;
+                         q_point < n_face_points; ++q_point)
+                      {
+                        double tmp
+                          = values[q_point
+                                   + GeometryInfo<dim>::lines_per_cell
+                                   * n_edge_points
+                                   + 2 * n_face_points] (0);
+
+                        for (unsigned int i = 0; i < 2; ++i)
+                          for (unsigned int j = 0; j <= deg; ++j)
+                            tmp -= local_dofs[(4 * i + 2)
+                                              * this->degree + j]
+                                   * this->shape_value_component
+                                   ((4 * i + 2) * this->degree
+                                    + j,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points
+                                                                     + 2
+                                                                     * n_face_points],
+                                    0);
+
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            system_rhs (i * deg + j)
+                            += boundary_weights
+                               (q_point + n_edge_points,
+                                2 * (i * deg + j) + 1) * tmp;
+                      }
+
+                    system_matrix_inv.vmult (solution, system_rhs);
+
+                    // Add the computed values
+                    // to the resulting vector
+                    // only, if they are not
+                    // too small.
+                    for (unsigned int i = 0; i <= deg; ++i)
+                      for (unsigned int j = 0; j < deg; ++j)
+                        if (std::abs (solution (i * deg + j)) > 1e-14)
+                          local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                          + 5 * deg) * this->degree]
+                            = solution (i * deg + j);
+
+                    break;
+                  }
+
+                  case 3:
+                  {
+                    if (offset == 0)
+                      {
+                        // Set up the right hand side
+                        // for the horizontal shape
+                        // functions.
+                        system_rhs = 0;
+
+                        for (unsigned int q_point = 0;
+                             q_point < n_face_points; ++q_point)
+                          {
+                            double tmp
+                              = values[q_point
+                                       + GeometryInfo<dim>::lines_per_cell
+                                       * n_edge_points + 3 * n_face_points]
+                                (2);
+
+                            for (unsigned int i = 0; i < 2; ++i)
+                              for (unsigned int j = 0; j <= deg; ++j)
+                                tmp -= local_dofs[(i + 10) * this->degree
+                                                  + j]
+                                       * this->shape_value_component
+                                       ((i + 10) * this->degree + j,
+                                        this->generalized_support_points[q_point
+                                                                         + GeometryInfo<dim>::lines_per_cell
+                                                                         * n_edge_points
+                                                                         + 3
+                                                                         * n_face_points],
+                                        2);
+
+                            for (unsigned int i = 0; i <= deg; ++i)
+                              for (unsigned int j = 0; j < deg; ++j)
+                                system_rhs (i * deg + j)
+                                += boundary_weights
+                                   (q_point + n_edge_points,
+                                    2 * (i * deg + j)) * tmp;
+                          }
+
+                        system_matrix_inv.vmult (solution, system_rhs);
+
+                        // Add the computed values
+                        // to the resulting vector
+                        // only, if they are not
+                        // too small.
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            if (std::abs (solution (i * deg + j))
+                                > 1e-14)
+                              local_dofs[(i + GeometryInfo<dim>::lines_per_cell
+                                          + 6 * this->degree) * deg + j
+                                         + GeometryInfo<dim>::lines_per_cell]
+                                = solution (i * deg + j);
+                      }
+
+                    // Set up the right hand side
+                    // for the vertical shape
+                    // functions.
+                    system_rhs = 0;
+
+                    for (unsigned int q_point = 0;
+                         q_point < n_face_points; ++q_point)
+                      {
+                        double tmp
+                          = values[q_point
+                                   + GeometryInfo<dim>::lines_per_cell
+                                   * n_edge_points + 3
+                                   * n_face_points] (0);
+
+                        for (unsigned int i = 0; i < 2; ++i)
+                          for (unsigned int j = 0; j <= deg; ++j)
+                            tmp -= local_dofs[(4 * i + 3)
+                                              * this->degree + j]
+                                   * this->shape_value_component
+                                   ((4 * i + 3) * this->degree
+                                    + j,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points
+                                                                     + 3
+                                                                     * n_face_points],
+                                    0);
+
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            system_rhs (i * deg + j)
+                            += boundary_weights
+                               (q_point + n_edge_points,
+                                2 * (i * deg + j) + 1) * tmp;
+                      }
+
+                    system_matrix_inv.vmult (solution, system_rhs);
+
+                    // Add the computed values
+                    // to the resulting vector
+                    // only, if they are not
+                    // too small.
+                    for (unsigned int i = 0; i <= deg; ++i)
+                      for (unsigned int j = 0; j < deg; ++j)
+                        if (std::abs (solution (i * deg + j)) > 1e-14)
+                          local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                          + 7 * deg) * this->degree]
+                            = solution (i * deg + j);
+
+                    break;
+                  }
+
+                  case 4:
+                  {
+                    if (offset + 1 < dim)
+                      {
+                        // Set up the right hand side
+                        // for the horizontal shape
+                        // functions.
+                        if (offset == 0)
+                          {
+                            system_rhs = 0;
+
+                            for (unsigned int q_point = 0;
+                                 q_point < n_face_points; ++q_point)
+                              {
+                                double tmp
+                                  = values[q_point
+                                           + GeometryInfo<dim>::lines_per_cell
+                                           * n_edge_points + 4
+                                           * n_face_points] (0);
+
+                                for (unsigned int i = 0; i < 2; ++i)
+                                  for (unsigned int j = 0; j <= deg; ++j)
+                                    tmp -= local_dofs[(i + 2)
+                                                      * this->degree
+                                                      + j]
+                                           * this->shape_value_component
+                                           ((i + 2) * this->degree
+                                            + j,
+                                            this->generalized_support_points[q_point
+                                                                             + GeometryInfo<dim>::lines_per_cell
+                                                                             * n_edge_points
+                                                                             + 4
+                                                                             * n_face_points],
+                                            0);
+
+                                for (unsigned int i = 0; i <= deg; ++i)
+                                  for (unsigned int j = 0; j < deg; ++j)
+                                    system_rhs (i * deg + j)
+                                    += boundary_weights
+                                       (q_point + n_edge_points,
+                                        2 * (i * deg + j)) * tmp;
+                              }
+
+                            system_matrix_inv.vmult
+                            (solution, system_rhs);
+
+                            // Add the computed values
+                            // to the resulting vector
+                            // only, if they are not
+                            // too small.
+                            for (unsigned int i = 0; i <= deg; ++i)
+                              for (unsigned int j = 0; j < deg; ++j)
+                                if (std::abs (solution (i * deg + j))
+                                    > 1e-14)
+                                  local_dofs[(i + GeometryInfo<dim>::lines_per_cell
+                                              + 8 * this->degree) * deg
+                                             + j
+                                             + GeometryInfo<dim>::lines_per_cell]
+                                    = solution (i * deg + j);
+                          }
+
+                        // Set up the right hand side
+                        // for the vertical shape
+                        // functions.
+                        system_rhs = 0;
+
+                        for (unsigned int q_point = 0;
+                             q_point < n_face_points; ++q_point)
+                          {
+                            double tmp
+                              = values[q_point
+                                       + GeometryInfo<dim>::lines_per_cell
+                                       * n_edge_points + 4
+                                       * n_face_points] (1);
+
+                            for (unsigned int i = 0; i < 2; ++i)
+                              for (unsigned int j = 0; j <= deg; ++j)
+                                tmp -= local_dofs[i * this->degree + j]
+                                       * this->shape_value_component
+                                       (i * this->degree + j,
+                                        this->generalized_support_points[q_point
+                                                                         + GeometryInfo<dim>::lines_per_cell
+                                                                         * n_edge_points
+                                                                         + 4
+                                                                         * n_face_points],
+                                        1);
+
+                            for (unsigned int i = 0; i <= deg; ++i)
+                              for (unsigned int j = 0; j < deg; ++j)
+                                system_rhs (i * deg + j)
+                                += boundary_weights
+                                   (q_point + n_edge_points,
+                                    2 * (i * deg + j) + 1) * tmp;
+                          }
+
+                        system_matrix_inv.vmult (solution, system_rhs);
+
+                        // Add the computed values
+                        // to the resulting vector
+                        // only, if they are not
+                        // too small.
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            if (std::abs (solution (i * deg + j))
+                                > 1e-14)
+                              local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                              + 9 * deg)
+                                         * this->degree]
+                                = solution (i * deg + j);
+                      }
+
+                    break;
+                  }
+
+                  default:
+                    if (offset + 1 < dim)
+                      {
+                        // Set up the right hand side
+                        // for the horizontal shape
+                        // functions.
+                        if (offset == 0)
+                          {
+                            system_rhs = 0;
+
+                            for (unsigned int q_point = 0;
+                                 q_point < n_face_points; ++q_point)
+                              {
+                                double tmp
+                                  = values[q_point
+                                           + GeometryInfo<dim>::lines_per_cell
+                                           * n_edge_points
+                                           + 5 * n_face_points] (0);
+
+                                for (unsigned int i = 0; i < 2; ++i)
+                                  for (unsigned int j = 0; j <= deg; ++j)
+                                    tmp -= local_dofs[(i + 6)
+                                                      * this->degree + j]
+                                           * this->shape_value_component
+                                           ((i + 6) * this->degree + j,
+                                            this->generalized_support_points[q_point
+                                                                             + GeometryInfo<dim>::lines_per_cell
+                                                                             * n_edge_points
+                                                                             + 5
+                                                                             * n_face_points],
+                                            0);
+
+                                for (unsigned int i = 0; i <= deg; ++i)
+                                  for (unsigned int j = 0; j < deg; ++j)
+                                    system_rhs (i * deg + j)
+                                    += boundary_weights
+                                       (q_point + n_edge_points,
+                                        2 * (i * deg + j)) * tmp;
+                              }
+
+                            system_matrix_inv.vmult
+                            (solution, system_rhs);
+
+                            // Add the computed values
+                            // to the resulting vector
+                            // only, if they are not
+                            // too small.
+                            for (unsigned int i = 0; i <= deg; ++i)
+                              for (unsigned int j = 0; j < deg; ++j)
+                                if (std::abs (solution (i * deg + j))
+                                    > 1e-14)
+                                  local_dofs[(i + GeometryInfo<dim>::lines_per_cell
+                                              + 10 * this->degree)
+                                             * deg + j
+                                             + GeometryInfo<dim>::lines_per_cell]
+                                    = solution (i * deg + j);
+                          }
+
+                        // Set up the right hand side
+                        // for the vertical shape
+                        // functions.
+                        system_rhs = 0;
+
+                        for (unsigned int q_point = 0;
+                             q_point < n_face_points; ++q_point)
+                          {
+                            double tmp
+                              = values[q_point
+                                       + GeometryInfo<dim>::lines_per_cell
+                                       * n_edge_points + 5
+                                       * n_face_points] (1);
+
+                            for (unsigned int i = 0; i < 2; ++i)
+                              for (unsigned int j = 0; j <= deg; ++j)
+                                tmp -= local_dofs[(i + 4)
+                                                  * this->degree + j]
+                                       * this->shape_value_component
+                                       ((i + 4) * this->degree + j,
+                                        this->generalized_support_points[q_point
+                                                                         + GeometryInfo<dim>::lines_per_cell
+                                                                         * n_edge_points
+                                                                         + 5
+                                                                         * n_face_points],
+                                        1);
+
+                            for (unsigned int i = 0; i <= deg; ++i)
+                              for (unsigned int j = 0; j < deg; ++j)
+                                system_rhs (i * deg + j)
+                                += boundary_weights
+                                   (q_point + n_edge_points,
+                                    2 * (i * deg + j) + 1) * tmp;
+                          }
+
+                        system_matrix_inv.vmult (solution, system_rhs);
+
+                        // Add the computed values
+                        // to the resulting vector
+                        // only, if they are not
+                        // too small.
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            if (std::abs (solution (i * deg + j))
+                                > 1e-14)
+                              local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                              + 11 * deg) * this->degree]
+                                = solution (i * deg + j);
+                      }
+                  }
+              }
+
+            // Finally we project
+            // the remaining parts
+            // of the function on
+            // the interior shape
+            // functions.
+            const QGauss<dim> reference_quadrature (this->degree);
+            const unsigned int &
+            n_interior_points = reference_quadrature.size ();
+
+            // We create the
+            // system matrix.
+            system_matrix.reinit (this->degree * deg * deg,
+                                  this->degree * deg * deg);
+            system_matrix = 0;
+
+            for (unsigned int i = 0; i <= deg; ++i)
+              for (unsigned int j = 0; j < deg; ++j)
+                for (unsigned int k = 0; k < deg; ++k)
+                  for (unsigned int l = 0; l <= deg; ++l)
+                    for (unsigned int m = 0; m < deg; ++m)
+                      for (unsigned int n = 0; n < deg; ++n)
+                        for (unsigned int q_point = 0;
+                             q_point < n_interior_points; ++q_point)
+                          system_matrix ((i * deg + j) * deg + k,
+                                         (l * deg + m) * deg + n)
+                          += reference_quadrature.weight (q_point)
+                             * legendre_polynomials[i].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (0)) * lobatto_polynomials[j + 2].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (1))
+                             * lobatto_polynomials[k + 2].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (2))
+                             * lobatto_polynomials_grad[l].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (0))
+                             * lobatto_polynomials[m + 2].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (1))
+                             * lobatto_polynomials[n + 2].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (2));
+
+            system_matrix_inv.reinit (system_matrix.m (),
+                                      system_matrix.m ());
+            system_matrix_inv.invert (system_matrix);
+            system_rhs.reinit (system_matrix_inv.m ());
+            solution.reinit (system_matrix.m ());
+
+            if (offset + 1 < dim)
+              {
+                if (offset == 0)
+                  {
+                    // Set up the right hand side.
+                    system_rhs = 0;
+
+                    for (unsigned int q_point = 0;
+                         q_point < n_interior_points; ++q_point)
+                      {
+                        double tmp
+                          = values[q_point
+                                   + GeometryInfo<dim>::lines_per_cell
+                                   * n_edge_points
+                                   + GeometryInfo<dim>::faces_per_cell
+                                   * n_face_points] (0);
+
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          {
+                            for (unsigned int j = 0; j < 2; ++j)
+                              for (unsigned int k = 0; k < 2; ++k)
+                                tmp -= local_dofs[i + (j + 4 * k + 2)
+                                                  * this->degree]
+                                       * this->shape_value_component
+                                       (i + (j + 4 * k + 2)
+                                        * this->degree,
+                                        this->generalized_support_points[q_point
+                                                                         + GeometryInfo<dim>::lines_per_cell
+                                                                         * n_edge_points
+                                                                         + GeometryInfo<dim>::faces_per_cell
+                                                                         * n_face_points],
+                                        0);
+
+                            for (unsigned int j = 0; j < deg; ++j)
+                              for (unsigned int k = 0; k < 4; ++k)
+                                tmp -= local_dofs[(i + 2 * (k + 2)
+                                                   * this->degree
+                                                   + GeometryInfo<dim>::lines_per_cell)
+                                                  * deg + j
+                                                  + GeometryInfo<dim>::lines_per_cell]
+                                       * this->shape_value_component
+                                       ((i + 2 * (k + 2) * this->degree
+                                         + GeometryInfo<dim>::lines_per_cell)
+                                        * deg + j
+                                        + GeometryInfo<dim>::lines_per_cell,
+                                        this->generalized_support_points[q_point
+                                                                         + GeometryInfo<dim>::lines_per_cell
+                                                                         * n_edge_points
+                                                                         + GeometryInfo<dim>::faces_per_cell
+                                                                         * n_face_points],
+                                        0);
+                          }
+
+                        for (unsigned int i = 0; i <= deg; ++i)
+                          for (unsigned int j = 0; j < deg; ++j)
+                            for (unsigned int k = 0; k < deg; ++k)
+                              system_rhs ((i * deg + j) * deg + k)
+                              += reference_quadrature.weight (q_point)
+                                 * tmp
+                                 * lobatto_polynomials_grad[i].value
+                                 (this->generalized_support_points[q_point
+                                                                   + GeometryInfo<dim>::lines_per_cell
+                                                                   * n_edge_points
+                                                                   + GeometryInfo<dim>::faces_per_cell
+                                                                   * n_face_points]
+                                  (0))
+                                 * lobatto_polynomials[j + 2].value
+                                 (this->generalized_support_points[q_point
+                                                                   + GeometryInfo<dim>::lines_per_cell
+                                                                   * n_edge_points
+                                                                   + GeometryInfo<dim>::faces_per_cell
+                                                                   * n_face_points]
+                                  (1))
+                                 * lobatto_polynomials[k + 2].value
+                                 (this->generalized_support_points[q_point
+                                                                   + GeometryInfo<dim>::lines_per_cell
+                                                                   * n_edge_points
+                                                                   + GeometryInfo<dim>::faces_per_cell
+                                                                   * n_face_points]
+                                  (2));
+                      }
+
+                    system_matrix_inv.vmult (solution, system_rhs);
+
+                    // Add the computed values
+                    // to the resulting vector
+                    // only, if they are not
+                    // too small.
+                    for (unsigned int i = 0; i <= deg; ++i)
+                      for (unsigned int j = 0; j < deg; ++j)
+                        for (unsigned int k = 0; k < deg; ++k)
+                          if (std::abs (solution ((i * deg + j) * deg + k))
+                              > 1e-14)
+                            local_dofs[((i + 2
+                                         * GeometryInfo<dim>::faces_per_cell)
+                                        * deg + j
+                                        + GeometryInfo<dim>::lines_per_cell
+                                        + 2
+                                        * GeometryInfo<dim>::faces_per_cell)
+                                       * deg + k
+                                       + GeometryInfo<dim>::lines_per_cell]
+                              = solution ((i * deg + j) * deg + k);
+                  }
+
+                // Set up the right hand side.
+                system_rhs = 0;
+
+                for (unsigned int q_point = 0; q_point < n_interior_points;
+                     ++q_point)
+                  {
+                    double tmp
+                      = values[q_point + GeometryInfo<dim>::lines_per_cell
+                               * n_edge_points
+                               + GeometryInfo<dim>::faces_per_cell
+                               * n_face_points] (1);
+
+                    for (unsigned int i = 0; i <= deg; ++i)
+                      for (unsigned int j = 0; j < 2; ++j)
+                        {
+                          for (unsigned int k = 0; k < 2; ++k)
+                            tmp -= local_dofs[i + (4 * j + k)
+                                              * this->degree]
+                                   * this->shape_value_component
+                                   (i + (4 * j + k) * this->degree,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points
+                                                                     + GeometryInfo<dim>::faces_per_cell
+                                                                     * n_face_points],
+                                    1);
+
+                          for (unsigned int k = 0; k < deg; ++k)
+                            tmp -= local_dofs[(i + 2 * j * this->degree
+                                               + GeometryInfo<dim>::lines_per_cell)
+                                              * deg + k
+                                              + GeometryInfo<dim>::lines_per_cell]
+                                   * this->shape_value_component
+                                   ((i + 2 * j * this->degree
+                                     + GeometryInfo<dim>::lines_per_cell)
+                                    * deg + k
+                                    + GeometryInfo<dim>::lines_per_cell,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points
+                                                                     + GeometryInfo<dim>::faces_per_cell
+                                                                     * n_face_points],
+                                    1)
+                                   + local_dofs[i + ((2 * j + 9) * deg + k
+                                                     + GeometryInfo<dim>::lines_per_cell)
+                                                * this->degree]
+                                   * this->shape_value_component
+                                   (i + ((2 * j + 9) * deg + k
+                                         + GeometryInfo<dim>::lines_per_cell)
+                                    * this->degree,
+                                    this->generalized_support_points[q_point
+                                                                     + GeometryInfo<dim>::lines_per_cell
+                                                                     * n_edge_points
+                                                                     + GeometryInfo<dim>::faces_per_cell
+                                                                     * n_face_points],
+                                    1);
+                        }
+
+                    for (unsigned int i = 0; i <= deg; ++i)
+                      for (unsigned int j = 0; j < deg; ++j)
+                        for (unsigned int k = 0; k < deg; ++k)
+                          system_rhs ((i * deg + j) * deg + k)
+                          += reference_quadrature.weight (q_point) * tmp
+                             * lobatto_polynomials_grad[i].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (1))
+                             * lobatto_polynomials[j + 2].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (0))
+                             * lobatto_polynomials[k + 2].value
+                             (this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points]
+                              (2));
+                  }
+
+                system_matrix_inv.vmult (solution, system_rhs);
+
+                // Add the computed values
+                // to the resulting vector
+                // only, if they are not
+                // too small.
+                for (unsigned int i = 0; i <= deg; ++i)
+                  for (unsigned int j = 0; j < deg; ++j)
+                    for (unsigned int k = 0; k < deg; ++k)
+                      if (std::abs (solution ((i * deg + j) * deg + k))
+                          > 1e-14)
+                        local_dofs[((i + this->degree + 2
+                                     * GeometryInfo<dim>::faces_per_cell)
+                                    * deg + j
+                                    + GeometryInfo<dim>::lines_per_cell + 2
+                                    * GeometryInfo<dim>::faces_per_cell)
+                                   * deg + k
+                                   + GeometryInfo<dim>::lines_per_cell]
+                          = solution ((i * deg + j) * deg + k);
+              }
+
+            // Set up the right hand side.
+            system_rhs = 0;
+
+            for (unsigned int q_point = 0; q_point < n_interior_points;
+                 ++q_point)
+              {
+                double tmp
+                  = values[q_point + GeometryInfo<dim>::lines_per_cell
+                           * n_edge_points
+                           + GeometryInfo<dim>::faces_per_cell
+                           * n_face_points] (2);
+
+                for (unsigned int i = 0; i <= deg; ++i)
+                  for (unsigned int j = 0; j < 4; ++j)
+                    {
+                      tmp -= local_dofs[i + (j + 8) * this->degree]
+                             * this->shape_value_component
+                             (i + (j + 8) * this->degree,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points],
+                              2);
+
+                      for (unsigned int k = 0; k < deg; ++k)
+                        tmp -= local_dofs[i + ((2 * j + 1) * deg + k
+                                               + GeometryInfo<dim>::lines_per_cell)
+                                          * this->degree]
+                               * this->shape_value_component
+                               (i + ((2 * j + 1) * deg + k
+                                     + GeometryInfo<dim>::lines_per_cell)
+                                * this->degree,
+                                this->generalized_support_points[q_point
+                                                                 + GeometryInfo<dim>::lines_per_cell
+                                                                 * n_edge_points
+                                                                 + GeometryInfo<dim>::faces_per_cell
+                                                                 * n_face_points],
+                                2);
+                    }
+
+                for (unsigned int i = 0; i <= deg; ++i)
+                  for (unsigned int j = 0; j < deg; ++j)
+                    for (unsigned int k = 0; k < deg; ++k)
+                      system_rhs ((i * deg + j) * deg + k)
+                      += reference_quadrature.weight (q_point) * tmp
+                         * lobatto_polynomials_grad[i].value
+                         (this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points
+                                                           + GeometryInfo<dim>::faces_per_cell
+                                                           * n_face_points]
+                          (2))
+                         * lobatto_polynomials[j + 2].value
+                         (this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points
+                                                           + GeometryInfo<dim>::faces_per_cell
+                                                           * n_face_points]
+                          (0))
+                         * lobatto_polynomials[k + 2].value
+                         (this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points
+                                                           + GeometryInfo<dim>::faces_per_cell
+                                                           * n_face_points]
+                          (1));
+              }
+
+            system_matrix_inv.vmult (solution, system_rhs);
+
+            // Add the computed values
+            // to the resulting vector
+            // only, if they are not
+            // too small.
+            for (unsigned int i = 0; i <= deg; ++i)
+              for (unsigned int j = 0; j < deg; ++j)
+                for (unsigned int k = 0; k < deg; ++k)
+                  if (std::abs (solution ((i * deg + j) * deg + k))
+                      > 1e-14)
+                    local_dofs[i + ((j + 2
+                                     * (deg + GeometryInfo<dim>::faces_per_cell))
+                                    * deg + k
+                                    + GeometryInfo<dim>::lines_per_cell)
+                               * this->degree]
+                      = solution ((i * deg + j) * deg + k);
+          }
+
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented ());
+      }
+}
+
+
+// Interpolate a function, which is given by
+// its values at the generalized support
+// points in the finite element space on the
+// reference cell.
+// This is done as usual by projection-based
+// interpolation.
+template <int dim>
+void
+FE_Nedelec<dim>::interpolate (std::vector<double> &local_dofs,
+                              const VectorSlice<const std::vector<std::vector<double> > > &values)
+const
+{
+  const unsigned int deg = this->degree-1;
+  Assert (values.size () == this->n_components (),
+          ExcDimensionMismatch (values.size (), this->n_components ()));
+  Assert (values[0].size () == this->generalized_support_points.size (),
+          ExcDimensionMismatch (values[0].size (),
+                                this->generalized_support_points.size ()));
+  Assert (local_dofs.size () == this->dofs_per_cell,
+          ExcDimensionMismatch (local_dofs.size (), this->dofs_per_cell));
+  std::fill (local_dofs.begin (), local_dofs.end (), 0.0);
+
+  switch (dim)
+    {
+    case 2:
+    {
+      // Let us begin with the
+      // interpolation part.
+      const QGauss<dim - 1> reference_edge_quadrature (this->degree);
+      const unsigned int &
+      n_edge_points = reference_edge_quadrature.size ();
+
+      for (unsigned int i = 0; i < 2; ++i)
+        for (unsigned int j = 0; j < 2; ++j)
+          {
+            for (unsigned int q_point = 0; q_point < n_edge_points;
+                 ++q_point)
+              local_dofs[(i + 2 * j) * this->degree]
+              += reference_edge_quadrature.weight (q_point)
+                 * values[1 - j][q_point + (i + 2 * j) * n_edge_points];
+
+            // Add the computed values
+            // to the resulting vector
+            // only, if they are not
+            // too small.
+            if (std::abs (local_dofs[(i + 2 * j) * this->degree]) < 1e-14)
+              local_dofs[(i + 2 * j) * this->degree] = 0.0;
+          }
+
+      // If the degree is greater
+      // than 0, then we have still
+      // some higher order edge
+      // shape functions to
+      // consider.
+      // Here the projection part
+      // starts. The dof values
+      // are obtained by solving
+      // a linear system of
+      // equations.
+      if (this->degree-1 > 1)
+        {
+          // We start with projection
+          // on the higher order edge
+          // shape function.
+          const std::vector<Polynomials::Polynomial<double> > &
+          lobatto_polynomials
+            = Polynomials::Lobatto::generate_complete_basis
+              (this->degree);
+          FullMatrix<double> system_matrix (this->degree-1, this->degree-1);
+          std::vector<Polynomials::Polynomial<double> >
+          lobatto_polynomials_grad (this->degree);
+
+          for (unsigned int i = 0; i < lobatto_polynomials_grad.size ();
+               ++i)
+            lobatto_polynomials_grad[i]
+              = lobatto_polynomials[i + 1].derivative ();
+
+          // Set up the system matrix.
+          // This can be used for all
+          // edges.
+          for (unsigned int i = 0; i < system_matrix.m (); ++i)
+            for (unsigned int j = 0; j < system_matrix.n (); ++j)
+              for (unsigned int q_point = 0; q_point < n_edge_points;
+                   ++q_point)
+                system_matrix (i, j)
+                += boundary_weights (q_point, j)
+                   * lobatto_polynomials_grad[i + 1].value
+                   (this->generalized_face_support_points[q_point]
+                    (1));
+
+          FullMatrix<double> system_matrix_inv (this->degree-1, this->degree-1);
+
+          system_matrix_inv.invert (system_matrix);
+
+          const unsigned int
+          line_coordinate[GeometryInfo<2>::lines_per_cell]
+            = {1, 1, 0, 0};
+          Vector<double> system_rhs (system_matrix.m ());
+          Vector<double> solution (system_rhs.size ());
+
+          for (unsigned int line = 0;
+               line < GeometryInfo<dim>::lines_per_cell; ++line)
+            {
+              // Set up the right hand side.
+              system_rhs = 0;
+
+              for (unsigned int q_point = 0; q_point < n_edge_points;
+                   ++q_point)
+                {
+                  const double tmp
+                    = values[line_coordinate[line]][line * n_edge_points
+                                                    + q_point]
+                      - local_dofs[line * this->degree]
+                      * this->shape_value_component
+                      (line * this->degree,
+                       this->generalized_support_points[line
+                                                        * n_edge_points
+                                                        + q_point],
+                       line_coordinate[line]);
+
+                  for (unsigned int i = 0; i < system_rhs.size (); ++i)
+                    system_rhs (i) += boundary_weights (q_point, i) * tmp;
+                }
+
+              system_matrix_inv.vmult (solution, system_rhs);
+
+              // Add the computed values
+              // to the resulting vector
+              // only, if they are not
+              // too small.
+              for (unsigned int i = 0; i < solution.size (); ++i)
+                if (std::abs (solution (i)) > 1e-14)
+                  local_dofs[line * this->degree + i + 1] = solution (i);
+            }
+
+          // Then we go on to the
+          // interior shape
+          // functions. Again we
+          // set up the system
+          // matrix and use it
+          // for both, the
+          // horizontal and the
+          // vertical, interior
+          // shape functions.
+          const QGauss<dim> reference_quadrature (this->degree);
+          const unsigned int &
+          n_interior_points = reference_quadrature.size ();
+          const std::vector<Polynomials::Polynomial<double> > &
+          legendre_polynomials
+            = Polynomials::Legendre::generate_complete_basis (this->degree-1);
+
+          system_matrix.reinit ((this->degree-1) * this->degree,
+                                (this->degree-1) * this->degree);
+          system_matrix = 0;
+
+          for (unsigned int i = 0; i < this->degree; ++i)
+            for (unsigned int j = 0; j < this->degree-1; ++j)
+              for (unsigned int k = 0; k < this->degree; ++k)
+                for (unsigned int l = 0; l < this->degree-1; ++l)
+                  for (unsigned int q_point = 0;
+                       q_point < n_interior_points; ++q_point)
+                    system_matrix (i * (this->degree-1) + j, k * (this->degree-1) + l)
+                    += reference_quadrature.weight (q_point)
+                       * legendre_polynomials[i].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points]
+                        (0))
+                       * lobatto_polynomials[j + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points]
+                        (1))
+                       * lobatto_polynomials_grad[k].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points]
+                        (0))
+                       * lobatto_polynomials[l + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points]
+                        (1));
+
+          system_matrix_inv.reinit (system_matrix.m (),
+                                    system_matrix.m ());
+          system_matrix_inv.invert (system_matrix);
+          // Set up the right hand side
+          // for the horizontal shape
+          // functions.
+          system_rhs.reinit (system_matrix_inv.m ());
+          system_rhs = 0;
+
+          for (unsigned int q_point = 0; q_point < n_interior_points;
+               ++q_point)
+            {
+              double tmp
+                = values[0][q_point + GeometryInfo<dim>::lines_per_cell
+                            * n_edge_points];
+
+              for (unsigned int i = 0; i < 2; ++i)
+                for (unsigned int j = 0; j <= deg; ++j)
+                  tmp -= local_dofs[(i + 2) * this->degree + j]
+                         * this->shape_value_component
+                         ((i + 2) * this->degree + j,
+                          this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points],
+                          0);
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < deg; ++j)
+                  system_rhs (i * deg + j)
+                  += reference_quadrature.weight (q_point) * tmp
+                     * lobatto_polynomials_grad[i].value
+                     (this->generalized_support_points[q_point
+                                                       + GeometryInfo<dim>::lines_per_cell
+                                                       * n_edge_points]
+                      (0))
+                     * lobatto_polynomials[j + 2].value
+                     (this->generalized_support_points[q_point
+                                                       + GeometryInfo<dim>::lines_per_cell
+                                                       * n_edge_points]
+                      (1));
+            }
+
+          solution.reinit (system_matrix.m ());
+          system_matrix_inv.vmult (solution, system_rhs);
+
+          // Add the computed values
+          // to the resulting vector
+          // only, if they are not
+          // too small.
+          for (unsigned int i = 0; i <= deg; ++i)
+            for (unsigned int j = 0; j < deg; ++j)
+              if (std::abs (solution (i * deg + j)) > 1e-14)
+                local_dofs[(i + GeometryInfo<dim>::lines_per_cell) * deg
+                           + j + GeometryInfo<dim>::lines_per_cell]
+                  = solution (i * deg + j);
+
+          system_rhs = 0;
+          // Set up the right hand side
+          // for the vertical shape
+          // functions.
+
+          for (unsigned int q_point = 0; q_point < n_interior_points;
+               ++q_point)
+            {
+              double tmp
+                = values[1][q_point + GeometryInfo<dim>::lines_per_cell
+                            * n_edge_points];
+
+              for (unsigned int i = 0; i < 2; ++i)
+                for (unsigned int j = 0; j <= deg; ++j)
+                  tmp -= local_dofs[i * this->degree + j]
+                         * this->shape_value_component
+                         (i * this->degree + j,
+                          this->generalized_support_points[q_point
+                                                           + GeometryInfo<dim>::lines_per_cell
+                                                           * n_edge_points],
+                          1);
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < deg; ++j)
+                  system_rhs (i * deg + j)
+                  += reference_quadrature.weight (q_point) * tmp
+                     * lobatto_polynomials_grad[i].value
+                     (this->generalized_support_points[q_point
+                                                       + GeometryInfo<dim>::lines_per_cell
+                                                       * n_edge_points]
+                      (1))
+                     * lobatto_polynomials[j + 2].value
+                     (this->generalized_support_points[q_point
+                                                       + GeometryInfo<dim>::lines_per_cell
+                                                       * n_edge_points]
+                      (0));
+            }
+
+          system_matrix_inv.vmult (solution, system_rhs);
+
+          // Add the computed values
+          // to the resulting vector
+          // only, if they are not
+          // too small.
+          for (unsigned int i = 0; i <= deg; ++i)
+            for (unsigned int j = 0; j < deg; ++j)
+              if (std::abs (solution (i * deg + j)) > 1e-14)
+                local_dofs[i + (j + GeometryInfo<dim>::lines_per_cell
+                                + deg) * this->degree]
+                  = solution (i * deg + j);
+        }
+
+      break;
+    }
+
+    case 3:
+    {
+      // Let us begin with the
+      // interpolation part.
+      const QGauss<1> reference_edge_quadrature (this->degree);
+      const unsigned int &
+      n_edge_points = reference_edge_quadrature.size ();
+
+      for (unsigned int q_point = 0; q_point < n_edge_points; ++q_point)
+        {
+          for (unsigned int i = 0; i < 4; ++i)
+            local_dofs[(i + 8) * this->degree]
+            += reference_edge_quadrature.weight (q_point)
+               * values[2][q_point + (i + 8) * n_edge_points];
+
+          for (unsigned int i = 0; i < 2; ++i)
+            for (unsigned int j = 0; j < 2; ++j)
+              for (unsigned int k = 0; k < 2; ++k)
+                local_dofs[(i + 2 * (2 * j + k)) * this->degree]
+                += reference_edge_quadrature.weight (q_point)
+                   * values[1 - k][q_point + (i + 2 * (2 * j + k))
+                                   * n_edge_points];
+        }
+
+      // Add the computed values
+      // to the resulting vector
+      // only, if they are not
+      // too small.
+      for (unsigned int i = 0; i < 4; ++i)
+        if (std::abs (local_dofs[(i + 8) * this->degree]) < 1e-14)
+          local_dofs[(i + 8) * this->degree] = 0.0;
+
+      for (unsigned int i = 0; i < 2; ++i)
+        for (unsigned int j = 0; j < 2; ++j)
+          for (unsigned int k = 0; k < 2; ++k)
+            if (std::abs (local_dofs[(i + 2 * (2 * j + k)) * this->degree])
+                < 1e-14)
+              local_dofs[(i + 2 * (2 * j + k)) * this->degree] = 0.0;
+
+      // If the degree is greater
+      // than 0, then we have still
+      // some higher order shape
+      // functions to consider.
+      // Here the projection part
+      // starts. The dof values
+      // are obtained by solving
+      // a linear system of
+      // equations.
+      if (this->degree > 1)
+        {
+          // We start with projection
+          // on the higher order edge
+          // shape function.
+          const std::vector<Polynomials::Polynomial<double> > &
+          lobatto_polynomials
+            = Polynomials::Lobatto::generate_complete_basis
+              (this->degree);
+          FullMatrix<double> system_matrix (this->degree-1, this->degree-1);
+          std::vector<Polynomials::Polynomial<double> >
+          lobatto_polynomials_grad (this->degree);
+
+          for (unsigned int i = 0; i < lobatto_polynomials_grad.size ();
+               ++i)
+            lobatto_polynomials_grad[i]
+              = lobatto_polynomials[i + 1].derivative ();
+
+          // Set up the system matrix.
+          // This can be used for all
+          // edges.
+          for (unsigned int i = 0; i < system_matrix.m (); ++i)
+            for (unsigned int j = 0; j < system_matrix.n (); ++j)
+              for (unsigned int q_point = 0; q_point < n_edge_points;
+                   ++q_point)
+                system_matrix (i, j)
+                += boundary_weights (q_point, j)
+                   * lobatto_polynomials_grad[i + 1].value
+                   (this->generalized_face_support_points[q_point]
+                    (1));
+
+          FullMatrix<double> system_matrix_inv (this->degree-1, this->degree-1);
+
+          system_matrix_inv.invert (system_matrix);
+
+          const unsigned int
+          line_coordinate[GeometryInfo<3>::lines_per_cell]
+            = {1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2};
+          Vector<double> system_rhs (system_matrix.m ());
+          Vector<double> solution (system_rhs.size ());
+
+          for (unsigned int line = 0;
+               line < GeometryInfo<dim>::lines_per_cell; ++line)
+            {
+              // Set up the right hand side.
+              system_rhs = 0;
+
+              for (unsigned int q_point = 0; q_point < this->degree; ++q_point)
+                {
+                  const double tmp
+                    = values[line_coordinate[line]][line * this->degree
+                                                    + q_point]
+                      - local_dofs[line * this->degree]
+                      * this->shape_value_component
+                      (line * this->degree,
+                       this->generalized_support_points[line
+                                                        * this->degree
+                                                        + q_point],
+                       line_coordinate[line]);
+
+                  for (unsigned int i = 0; i < system_rhs.size (); ++i)
+                    system_rhs (i) += boundary_weights (q_point, i)
+                                      * tmp;
+                }
+
+              system_matrix_inv.vmult (solution, system_rhs);
+
+              // Add the computed values
+              // to the resulting vector
+              // only, if they are not
+              // too small.
+              for (unsigned int i = 0; i < solution.size (); ++i)
+                if (std::abs (solution (i)) > 1e-14)
+                  local_dofs[line * this->degree + i + 1] = solution (i);
+            }
+
+          // Then we go on to the
+          // face shape functions.
+          // Again we set up the
+          // system matrix and
+          // use it for both, the
+          // horizontal and the
+          // vertical, shape
+          // functions.
+          const std::vector<Polynomials::Polynomial<double> > &
+          legendre_polynomials
+            = Polynomials::Legendre::generate_complete_basis (this->degree-1);
+          const unsigned int n_face_points = n_edge_points * n_edge_points;
+
+          system_matrix.reinit ((this->degree-1) * this->degree,
+                                (this->degree-1) * this->degree);
+          system_matrix = 0;
+
+          for (unsigned int i = 0; i < this->degree; ++i)
+            for (unsigned int j = 0; j < this->degree-1; ++j)
+              for (unsigned int k = 0; k < this->degree; ++k)
+                for (unsigned int l = 0; l < this->degree-1; ++l)
+                  for (unsigned int q_point = 0; q_point < n_face_points;
+                       ++q_point)
+                    system_matrix (i * (this->degree-1) + j, k * (this->degree-1) + l)
+                    += boundary_weights (q_point + n_edge_points,
+                                         2 * (k * (this->degree-1) + l))
+                       * legendre_polynomials[i].value
+                       (this->generalized_face_support_points[q_point
+                                                              + 4
+                                                              * n_edge_points]
+                        (0))
+                       * lobatto_polynomials[j + 2].value
+                       (this->generalized_face_support_points[q_point
+                                                              + 4
+                                                              * n_edge_points]
+                        (1));
+
+          system_matrix_inv.reinit (system_matrix.m (),
+                                    system_matrix.m ());
+          system_matrix_inv.invert (system_matrix);
+          solution.reinit (system_matrix.m ());
+          system_rhs.reinit (system_matrix.m ());
+
+          const unsigned int
+          face_coordinates[GeometryInfo<3>::faces_per_cell][2]
+          = {{1, 2}, {1, 2}, {2, 0}, {2, 0}, {0, 1}, {0, 1}};
+          const unsigned int
+          edge_indices[GeometryInfo<3>::faces_per_cell][GeometryInfo<3>::lines_per_face]
+          = {{0, 4, 8, 10}, {1, 5, 9, 11}, {8, 9, 2, 6},
+            {10, 11, 3, 7}, {2, 3, 0, 1}, {6, 7, 4, 5}
+          };
+
+          for (unsigned int face = 0;
+               face < GeometryInfo<dim>::faces_per_cell; ++face)
+            {
+              // Set up the right hand side
+              // for the horizontal shape
+              // functions.
+              system_rhs = 0;
+
+              for (unsigned int q_point = 0; q_point < n_face_points;
+                   ++q_point)
+                {
+                  double tmp
+                    = values[face_coordinates[face][0]][q_point
+                                                        + GeometryInfo<dim>::lines_per_cell
+                                                        * n_edge_points];
+
+                  for (unsigned int i = 0; i < 2; ++i)
+                    for (unsigned int j = 0; j <= deg; ++j)
+                      tmp -= local_dofs[edge_indices[face][i]
+                                        * this->degree + j]
+                             * this->shape_value_component
+                             (edge_indices[face][i] * this->degree + j,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points],
+                              face_coordinates[face][0]);
+
+                  for (unsigned int i = 0; i <= deg; ++i)
+                    for (unsigned int j = 0; j < deg; ++j)
+                      system_rhs (i * deg + j)
+                      += boundary_weights (q_point + n_edge_points,
+                                           2 * (i * deg + j)) * tmp;
+                }
+
+              system_matrix_inv.vmult (solution, system_rhs);
+
+              // Add the computed values
+              // to the resulting vector
+              // only, if they are not
+              // too small.
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < deg; ++j)
+                  if (std::abs (solution (i * deg + j)) > 1e-14)
+                    local_dofs[(2 * face * this->degree + i
+                                + GeometryInfo<dim>::lines_per_cell) * deg
+                               + j + GeometryInfo<dim>::lines_per_cell]
+                      = solution (i * deg + j);
+
+              // Set up the right hand side
+              // for the vertical shape
+              // functions.
+              system_rhs = 0;
+
+              for (unsigned int q_point = 0; q_point < n_face_points;
+                   ++q_point)
+                {
+                  double tmp
+                    = values[face_coordinates[face][1]][q_point
+                                                        + GeometryInfo<dim>::lines_per_cell
+                                                        * n_edge_points];
+
+                  for (int i = 2; i < (int) GeometryInfo<dim>::lines_per_face; ++i)
+                    for (unsigned int j = 0; j <= deg; ++j)
+                      tmp -= local_dofs[edge_indices[face][i]
+                                        * this->degree + j]
+                             * this->shape_value_component
+                             (edge_indices[face][i] * this->degree + j,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points],
+                              face_coordinates[face][1]);
+
+                  for (unsigned int i = 0; i <= deg; ++i)
+                    for (unsigned int j = 0; j < deg; ++j)
+                      system_rhs (i * deg + j)
+                      += boundary_weights (q_point + n_edge_points,
+                                           2 * (i * deg + j) + 1)
+                         * tmp;
+                }
+
+              system_matrix_inv.vmult (solution, system_rhs);
+
+              // Add the computed values
+              // to the resulting vector
+              // only, if they are not
+              // too small.
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < deg; ++j)
+                  if (std::abs (solution (i * deg + j)) > 1e-14)
+                    local_dofs[((2 * face + 1) * deg + j + GeometryInfo<dim>::lines_per_cell)
+                               * this->degree + i]
+                      = solution (i * deg + j);
+            }
+
+          // Finally we project
+          // the remaining parts
+          // of the function on
+          // the interior shape
+          // functions.
+          const QGauss<dim> reference_quadrature (this->degree);
+          const unsigned int
+          n_interior_points = reference_quadrature.size ();
+
+          // We create the
+          // system matrix.
+          system_matrix.reinit (this->degree * deg * deg,
+                                this->degree * deg * deg);
+          system_matrix = 0;
+
+          for (unsigned int i = 0; i <= deg; ++i)
+            for (unsigned int j = 0; j < deg; ++j)
+              for (unsigned int k = 0; k < deg; ++k)
+                for (unsigned int l = 0; l <= deg; ++l)
+                  for (unsigned int m = 0; m < deg; ++m)
+                    for (unsigned int n = 0; n < deg; ++n)
+                      for (unsigned int q_point = 0;
+                           q_point < n_interior_points; ++q_point)
+                        system_matrix ((i * deg + j) * deg + k,
+                                       (l * deg + m) * deg + n)
+                        += reference_quadrature.weight (q_point)
+                           * legendre_polynomials[i].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points
+                                                             + GeometryInfo<dim>::faces_per_cell
+                                                             * n_face_points]
+                            (0))
+                           * lobatto_polynomials[j + 2].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points
+                                                             + GeometryInfo<dim>::faces_per_cell
+                                                             * n_face_points]
+                            (1))
+                           * lobatto_polynomials[k + 2].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points
+                                                             + GeometryInfo<dim>::faces_per_cell
+                                                             * n_face_points]
+                            (2))
+                           * lobatto_polynomials_grad[l].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points
+                                                             + GeometryInfo<dim>::faces_per_cell
+                                                             * n_face_points]
+                            (0))
+                           * lobatto_polynomials[m + 2].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points
+                                                             + GeometryInfo<dim>::faces_per_cell
+                                                             * n_face_points]
+                            (1))
+                           * lobatto_polynomials[n + 2].value
+                           (this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points
+                                                             + GeometryInfo<dim>::faces_per_cell
+                                                             * n_face_points]
+                            (2));
+
+          system_matrix_inv.reinit (system_matrix.m (),
+                                    system_matrix.m ());
+          system_matrix_inv.invert (system_matrix);
+          // Set up the right hand side.
+          system_rhs.reinit (system_matrix.m ());
+          system_rhs = 0;
+
+          for (unsigned int q_point = 0; q_point < n_interior_points;
+               ++q_point)
+            {
+              double tmp
+                = values[0][q_point + GeometryInfo<dim>::lines_per_cell
+                            * n_edge_points
+                            + GeometryInfo<dim>::faces_per_cell
+                            * n_face_points];
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                {
+                  for (unsigned int j = 0; j < 2; ++j)
+                    for (unsigned int k = 0; k < 2; ++k)
+                      tmp -= local_dofs[i + (j + 4 * k + 2) * this->degree]
+                             * this->shape_value_component
+                             (i + (j + 4 * k + 2) * this->degree,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points],
+                              0);
+
+                  for (unsigned int j = 0; j < deg; ++j)
+                    for (unsigned int k = 0; k < 4; ++k)
+                      tmp -= local_dofs[(i + 2 * (k + 2) * this->degree
+                                         + GeometryInfo<dim>::lines_per_cell)
+                                        * deg + j
+                                        + GeometryInfo<dim>::lines_per_cell]
+                             * this->shape_value_component
+                             ((i + 2 * (k + 2) * this->degree
+                               + GeometryInfo<dim>::lines_per_cell)
+                              * deg + j
+                              + GeometryInfo<dim>::lines_per_cell,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points],
+                              0);
+                }
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < deg; ++j)
+                  for (unsigned int k = 0; k < deg; ++k)
+                    system_rhs ((i * deg + j) * deg + k)
+                    += reference_quadrature.weight (q_point) * tmp
+                       * lobatto_polynomials_grad[i].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (0))
+                       * lobatto_polynomials[j + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (1))
+                       * lobatto_polynomials[k + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (2));
+            }
+
+          solution.reinit (system_rhs.size ());
+          system_matrix_inv.vmult (solution, system_rhs);
+
+          // Add the computed values
+          // to the resulting vector
+          // only, if they are not
+          // too small.
+          for (unsigned int i = 0; i <= deg; ++i)
+            for (unsigned int j = 0; j < deg; ++j)
+              for (unsigned int k = 0; k < deg; ++k)
+                if (std::abs (solution ((i * deg + j) * deg + k)) > 1e-14)
+                  local_dofs[((i + 2 * GeometryInfo<dim>::faces_per_cell)
+                              * deg + j + GeometryInfo<dim>::lines_per_cell
+                              + 2 * GeometryInfo<dim>::faces_per_cell)
+                             * deg + k + GeometryInfo<dim>::lines_per_cell]
+                    = solution ((i * deg + j) * deg + k);
+
+          // Set up the right hand side.
+          system_rhs = 0;
+
+          for (unsigned int q_point = 0; q_point < n_interior_points;
+               ++q_point)
+            {
+              double tmp
+                = values[1][q_point + GeometryInfo<dim>::lines_per_cell
+                            * n_edge_points
+                            + GeometryInfo<dim>::faces_per_cell
+                            * n_face_points];
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < 2; ++j)
+                  {
+                    for (unsigned int k = 0; k < 2; ++k)
+                      tmp -= local_dofs[i + (4 * j + k) * this->degree]
+                             * this->shape_value_component
+                             (i + (4 * j + k) * this->degree,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points],
+                              1);
+
+                    for (unsigned int k = 0; k < deg; ++k)
+                      tmp -= local_dofs[(i + 2 * j * this->degree
+                                         + GeometryInfo<dim>::lines_per_cell)
+                                        * deg + k
+                                        + GeometryInfo<dim>::lines_per_cell]
+                             * this->shape_value_component
+                             ((i + 2 * j * this->degree
+                               + GeometryInfo<dim>::lines_per_cell)
+                              * deg + k
+                              + GeometryInfo<dim>::lines_per_cell,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points],
+                              1)
+                             + local_dofs[i + ((2 * j + 9) * deg + k
+                                               + GeometryInfo<dim>::lines_per_cell)
+                                          * this->degree]
+                             * this->shape_value_component
+                             (i + ((2 * j + 9) * deg + k
+                                   + GeometryInfo<dim>::lines_per_cell)
+                              * this->degree,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points],
+                              1);
+                  }
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < deg; ++j)
+                  for (unsigned int k = 0; k < deg; ++k)
+                    system_rhs ((i * deg + j) * deg + k)
+                    += reference_quadrature.weight (q_point) * tmp
+                       * lobatto_polynomials_grad[i].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (1))
+                       * lobatto_polynomials[j + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (0))
+                       * lobatto_polynomials[k + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (2));
+            }
+
+          system_matrix_inv.vmult (solution, system_rhs);
+
+          // Add the computed values
+          // to the resulting vector
+          // only, if they are not
+          // too small.
+          for (unsigned int i = 0; i <= deg; ++i)
+            for (unsigned int j = 0; j < deg; ++j)
+              for (unsigned int k = 0; k < deg; ++k)
+                if (std::abs (solution ((i * deg + j) * deg + k)) > 1e-14)
+                  local_dofs[((i + this->degree + 2
+                               * GeometryInfo<dim>::faces_per_cell) * deg
+                              + j + GeometryInfo<dim>::lines_per_cell + 2
+                              * GeometryInfo<dim>::faces_per_cell) * deg
+                             + k + GeometryInfo<dim>::lines_per_cell]
+                    = solution ((i * deg + j) * deg + k);
+
+          // Set up the right hand side.
+          system_rhs = 0;
+
+          for (unsigned int q_point = 0; q_point < n_interior_points;
+               ++q_point)
+            {
+              double tmp
+                = values[2][q_point + GeometryInfo<dim>::lines_per_cell
+                            * n_edge_points
+                            + GeometryInfo<dim>::faces_per_cell
+                            * n_face_points];
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < 4; ++j)
+                  {
+                    tmp -= local_dofs[i + (j + 8) * this->degree]
+                           * this->shape_value_component
+                           (i + (j + 8) * this->degree,
+                            this->generalized_support_points[q_point
+                                                             + GeometryInfo<dim>::lines_per_cell
+                                                             * n_edge_points
+                                                             + GeometryInfo<dim>::faces_per_cell
+                                                             * n_face_points],
+                            2);
+
+                    for (unsigned int k = 0; k < deg; ++k)
+                      tmp -= local_dofs[i + ((2 * j + 1) * deg + k
+                                             + GeometryInfo<dim>::lines_per_cell)
+                                        * this->degree]
+                             * this->shape_value_component
+                             (i + ((2 * j + 1) * deg + k
+                                   + GeometryInfo<dim>::lines_per_cell)
+                              * this->degree,
+                              this->generalized_support_points[q_point
+                                                               + GeometryInfo<dim>::lines_per_cell
+                                                               * n_edge_points
+                                                               + GeometryInfo<dim>::faces_per_cell
+                                                               * n_face_points],
+                              2);
+                  }
+
+              for (unsigned int i = 0; i <= deg; ++i)
+                for (unsigned int j = 0; j < deg; ++j)
+                  for (unsigned int k = 0; k < deg; ++k)
+                    system_rhs ((i * deg + j) * deg + k)
+                    += reference_quadrature.weight (q_point) * tmp
+                       * lobatto_polynomials_grad[i].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (2))
+                       * lobatto_polynomials[j + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (0))
+                       * lobatto_polynomials[k + 2].value
+                       (this->generalized_support_points[q_point
+                                                         + GeometryInfo<dim>::lines_per_cell
+                                                         * n_edge_points
+                                                         + GeometryInfo<dim>::faces_per_cell
+                                                         * n_face_points]
+                        (1));
+            }
+
+          system_matrix_inv.vmult (solution, system_rhs);
+
+          // Add the computed values
+          // to the resulting vector
+          // only, if they are not
+          // too small.
+          for (unsigned int i = 0; i <= deg; ++i)
+            for (unsigned int j = 0; j < deg; ++j)
+              for (unsigned int k = 0; k < deg; ++k)
+                if (std::abs (solution ((i * deg + j) * deg + k)) > 1e-14)
+                  local_dofs[i + ((j + 2 * (deg
+                                            + GeometryInfo<dim>::faces_per_cell))
+                                  * deg + k
+                                  + GeometryInfo<dim>::lines_per_cell)
+                             * this->degree]
+                    = solution ((i * deg + j) * deg + k);
+        }
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+}
+
+
+
+template <int dim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_Nedelec<dim>::get_constant_modes() const
+{
+  Table<2,bool> constant_modes(dim, this->dofs_per_cell);
+  for (unsigned int d=0; d<dim; ++d)
+    for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+      constant_modes(d,i) = true;
+  std::vector<unsigned int> components;
+  for (unsigned int d=0; d<dim; ++d)
+    components.push_back(d);
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, components);
+}
+
+
+template <int dim>
+std::size_t
+FE_Nedelec<dim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+//----------------------------------------------------------------------//
+
+
+// explicit instantiations
+#include "fe_nedelec.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_nedelec.inst.in b/source/fe/fe_nedelec.inst.in
new file mode 100644
index 0000000..ae05b11
--- /dev/null
+++ b/source/fe/fe_nedelec.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_Nedelec<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_nothing.cc b/source/fe/fe_nothing.cc
new file mode 100644
index 0000000..3ed5782
--- /dev/null
+++ b/source/fe/fe_nothing.cc
@@ -0,0 +1,287 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_nothing.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace
+{
+  const char *
+  zero_dof_message = "This element has no shape functions.";
+}
+
+
+
+
+template <int dim, int spacedim>
+FE_Nothing<dim,spacedim>::FE_Nothing (const unsigned int n_components,
+                                      const bool dominate)
+  :
+  FiniteElement<dim,spacedim>
+  (FiniteElementData<dim>(std::vector<unsigned>(dim+1,0),
+                          n_components, 0,
+                          FiniteElementData<dim>::unknown),
+   std::vector<bool>(),
+   std::vector<ComponentMask>() ),
+  dominate(dominate)
+{
+// in most other elements we have to set up all sorts of stuff
+// here. there isn't much that we have to do here; in particular,
+// we can simply leave the restriction and prolongation matrices
+// empty since their proper size is in fact zero given that the
+// element here has no degrees of freedom
+}
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_Nothing<dim,spacedim>::clone() const
+{
+  return new FE_Nothing<dim,spacedim>(*this);
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_Nothing<dim,spacedim>::get_name () const
+{
+  std::ostringstream namebuf;
+  namebuf << "FE_Nothing<" << dim << ">(";
+  if (this->n_components() > 1)
+    namebuf << this->n_components();
+  namebuf << ")";
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+UpdateFlags
+FE_Nothing<dim,spacedim>::requires_update_flags (const UpdateFlags flags) const
+{
+  return flags;
+}
+
+
+
+template <int dim, int spacedim>
+double
+FE_Nothing<dim,spacedim>::shape_value (const unsigned int /*i*/,
+                                       const Point<dim> & /*p*/) const
+{
+  (void)zero_dof_message;
+  Assert(false,ExcMessage(zero_dof_message));
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase *
+FE_Nothing<dim,spacedim>::get_data (const UpdateFlags                                                    /*update_flags*/,
+                                    const Mapping<dim,spacedim>                                         &/*mapping*/,
+                                    const Quadrature<dim>                                               &/*quadrature*/,
+                                    dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+{
+  // Create a default data object.  Normally we would then
+  // need to resize things to hold the appropriate numbers
+  // of dofs, but in this case all data fields are empty.
+  typename FiniteElement<dim,spacedim>::InternalDataBase *data
+    = new typename FiniteElement<dim,spacedim>::InternalDataBase();
+  return data;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Nothing<dim,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                const CellSimilarity::Similarity                                     ,
+                const Quadrature<dim> &,
+                const Mapping<dim,spacedim> &,
+                const typename Mapping<dim,spacedim>::InternalDataBase &,
+                const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &,
+                const typename FiniteElement<dim,spacedim>::InternalDataBase &,
+                dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &) const
+{
+  // leave data fields empty
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Nothing<dim,spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                     const unsigned int                                                   ,
+                     const Quadrature<dim-1>                                             &,
+                     const Mapping<dim,spacedim> &,
+                     const typename Mapping<dim,spacedim>::InternalDataBase &,
+                     const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &,
+                     const typename FiniteElement<dim,spacedim>::InternalDataBase &,
+                     dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &) const
+{
+  // leave data fields empty
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Nothing<dim,spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                        const unsigned int                                                   ,
+                        const unsigned int                                                   ,
+                        const Quadrature<dim-1>                                             &,
+                        const Mapping<dim,spacedim> &,
+                        const typename Mapping<dim,spacedim>::InternalDataBase &,
+                        const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &,
+                        const typename FiniteElement<dim,spacedim>::InternalDataBase &,
+                        dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &) const
+{
+  // leave data fields empty
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_Nothing<dim,spacedim>::is_dominating() const
+{
+  return dominate;
+}
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_Nothing<dim,spacedim> ::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe) const
+{
+  // if FE_Nothing does not dominate, there are no requirements
+  if (!dominate)
+    {
+      return FiniteElementDomination::no_requirements;
+    }
+  // if it does and the other is FE_Nothing, either can dominate
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe) != 0)
+    {
+      return FiniteElementDomination::either_element_can_dominate;
+    }
+  // otherwise we dominate whatever fe is provided
+  else
+    {
+      return FiniteElementDomination::this_element_dominates;
+    }
+}
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Nothing<dim,spacedim> ::
+hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &/*fe_other*/) const
+{
+  // the FE_Nothing has no
+  // degrees of freedom, so there
+  // are no equivalencies to be
+  // recorded
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Nothing<dim,spacedim> ::
+hp_line_dof_identities (const FiniteElement<dim,spacedim> &/*fe_other*/) const
+{
+  // the FE_Nothing has no
+  // degrees of freedom, so there
+  // are no equivalencies to be
+  // recorded
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Nothing<dim,spacedim> ::
+hp_quad_dof_identities (const FiniteElement<dim,spacedim> &/*fe_other*/) const
+{
+  // the FE_Nothing has no
+  // degrees of freedom, so there
+  // are no equivalencies to be
+  // recorded
+  return std::vector<std::pair<unsigned int, unsigned int> > ();
+}
+
+
+template <int dim, int spacedim>
+bool
+FE_Nothing<dim,spacedim> ::
+hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+template <int dim, int spacedim>
+void
+FE_Nothing<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &/*source_fe*/,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  // since this element has no face dofs, the
+  // interpolation matrix is necessarily empty
+  (void)interpolation_matrix;
+
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+}
+
+
+template <int dim, int spacedim>
+void
+FE_Nothing<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> & /*source_fe*/,
+                                  const unsigned int /*index*/,
+                                  FullMatrix<double>  &interpolation_matrix) const
+{
+  // since this element has no face dofs, the
+  // interpolation matrix is necessarily empty
+
+  (void)interpolation_matrix;
+  Assert (interpolation_matrix.m() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+  Assert (interpolation_matrix.n() == 0,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                0));
+}
+
+
+
+// explicit instantiations
+#include "fe_nothing.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/fe/fe_nothing.inst.in b/source/fe/fe_nothing.inst.in
new file mode 100644
index 0000000..6948f0f
--- /dev/null
+++ b/source/fe/fe_nothing.inst.in
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension, deal_II_space_dimension : DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class FE_Nothing<deal_II_dimension,deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/fe/fe_poly.cc b/source/fe/fe_poly.cc
new file mode 100644
index 0000000..54dbb5b
--- /dev/null
+++ b/source/fe/fe_poly.cc
@@ -0,0 +1,266 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/tensor_product_polynomials_const.h>
+#include <deal.II/base/tensor_product_polynomials_bubbles.h>
+#include <deal.II/base/polynomials_p.h>
+#include <deal.II/base/polynomials_piecewise.h>
+#include <deal.II/base/polynomials_rannacher_turek.h>
+#include <deal.II/fe/fe_poly.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_poly.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <>
+void
+FE_Poly<TensorProductPolynomials<1>,1,2>::
+fill_fe_values (const Triangulation<1,2>::cell_iterator &,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<1>                                       &quadrature,
+                const Mapping<1,2>                                        &mapping,
+                const Mapping<1,2>::InternalDataBase                      &mapping_internal,
+                const dealii::internal::FEValues::MappingRelatedData<1,2> &mapping_data,
+                const FiniteElement<1,2>::InternalDataBase                &fe_internal,
+                dealii::internal::FEValues::FiniteElementRelatedData<1,2> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  // transform gradients and higher derivatives. there is nothing to do
+  // for values since we already emplaced them into output_data when
+  // we were in get_data()
+  if (fe_data.update_each & update_gradients && cell_similarity != CellSimilarity::translation)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      mapping.transform (make_array_view(fe_data.shape_gradients, k),
+                         mapping_covariant,
+                         mapping_internal,
+                         make_array_view(output_data.shape_gradients, k));
+
+  if (fe_data.update_each & update_hessians && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_hessians, k),
+                           mapping_covariant_gradient,
+                           mapping_internal,
+                           make_array_view(output_data.shape_hessians, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          for (unsigned int j=0; j<2; ++j)
+            output_data.shape_hessians[k][i] -=
+              mapping_data.jacobian_pushed_forward_grads[i][j]
+              * output_data.shape_gradients[k][i][j];
+    }
+
+  if (fe_data.update_each & update_3rd_derivatives && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_3rd_derivatives, k),
+                           mapping_covariant_hessian,
+                           mapping_internal,
+                           make_array_view(output_data.shape_3rd_derivatives, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        correct_third_derivatives(output_data, mapping_data, quadrature.size(), k);
+    }
+}
+
+
+
+template <>
+void
+FE_Poly<TensorProductPolynomials<2>,2,3>::
+fill_fe_values (const Triangulation<2,3>::cell_iterator &,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<2>                                       &quadrature,
+                const Mapping<2,3>                                        &mapping,
+                const Mapping<2,3>::InternalDataBase                      &mapping_internal,
+                const dealii::internal::FEValues::MappingRelatedData<2,3> &mapping_data,
+                const FiniteElement<2,3>::InternalDataBase                &fe_internal,
+                dealii::internal::FEValues::FiniteElementRelatedData<2,3> &output_data) const
+{
+
+  // assert that the following dynamics
+  // cast is really well-defined.
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  // transform gradients and higher derivatives. there is nothing to do
+  // for values since we already emplaced them into output_data when
+  // we were in get_data()
+  if (fe_data.update_each & update_gradients && cell_similarity != CellSimilarity::translation)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      mapping.transform (make_array_view(fe_data.shape_gradients, k),
+                         mapping_covariant,
+                         mapping_internal,
+                         make_array_view(output_data.shape_gradients, k));
+
+  if (fe_data.update_each & update_hessians && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_hessians, k),
+                           mapping_covariant_gradient,
+                           mapping_internal,
+                           make_array_view(output_data.shape_hessians, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          for (unsigned int j=0; j<3; ++j)
+            output_data.shape_hessians[k][i] -=
+              mapping_data.jacobian_pushed_forward_grads[i][j]
+              * output_data.shape_gradients[k][i][j];
+    }
+
+  if (fe_data.update_each & update_3rd_derivatives && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_3rd_derivatives, k),
+                           mapping_covariant_hessian,
+                           mapping_internal,
+                           make_array_view(output_data.shape_3rd_derivatives, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        correct_third_derivatives(output_data, mapping_data, quadrature.size(), k);
+    }
+}
+
+
+template <>
+void
+FE_Poly<PolynomialSpace<1>,1,2>::
+fill_fe_values (const Triangulation<1,2>::cell_iterator &,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<1>                                       &quadrature,
+                const Mapping<1,2>                                        &mapping,
+                const Mapping<1,2>::InternalDataBase                      &mapping_internal,
+                const dealii::internal::FEValues::MappingRelatedData<1,2> &mapping_data,
+                const FiniteElement<1,2>::InternalDataBase                &fe_internal,
+                dealii::internal::FEValues::FiniteElementRelatedData<1,2> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  // transform gradients and higher derivatives. there is nothing to do
+  // for values since we already emplaced them into output_data when
+  // we were in get_data()
+  if (fe_data.update_each & update_gradients && cell_similarity != CellSimilarity::translation)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      mapping.transform (make_array_view(fe_data.shape_gradients, k),
+                         mapping_covariant,
+                         mapping_internal,
+                         make_array_view(output_data.shape_gradients, k));
+
+  if (fe_data.update_each & update_hessians && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_hessians, k),
+                           mapping_covariant_gradient,
+                           mapping_internal,
+                           make_array_view(output_data.shape_hessians, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          for (unsigned int j=0; j<2; ++j)
+            output_data.shape_hessians[k][i] -=
+              mapping_data.jacobian_pushed_forward_grads[i][j]
+              * output_data.shape_gradients[k][i][j];
+    }
+
+  if (fe_data.update_each & update_3rd_derivatives && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_3rd_derivatives, k),
+                           mapping_covariant_hessian,
+                           mapping_internal,
+                           make_array_view(output_data.shape_3rd_derivatives, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        correct_third_derivatives(output_data, mapping_data, quadrature.size(), k);
+    }
+}
+
+
+template <>
+void
+FE_Poly<PolynomialSpace<2>,2,3>::
+fill_fe_values (const Triangulation<2,3>::cell_iterator &,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<2>                                       &quadrature,
+                const Mapping<2,3>                                        &mapping,
+                const Mapping<2,3>::InternalDataBase                      &mapping_internal,
+                const dealii::internal::FEValues::MappingRelatedData<2,3> &mapping_data,
+                const FiniteElement<2,3>::InternalDataBase                &fe_internal,
+                dealii::internal::FEValues::FiniteElementRelatedData<2,3> &output_data) const
+{
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  // transform gradients and higher derivatives. there is nothing to do
+  // for values since we already emplaced them into output_data when
+  // we were in get_data()
+  if (fe_data.update_each & update_gradients && cell_similarity != CellSimilarity::translation)
+    for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+      mapping.transform (make_array_view(fe_data.shape_gradients, k),
+                         mapping_covariant,
+                         mapping_internal,
+                         make_array_view(output_data.shape_gradients, k));
+
+  if (fe_data.update_each & update_hessians && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_hessians, k),
+                           mapping_covariant_gradient,
+                           mapping_internal,
+                           make_array_view(output_data.shape_hessians, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        for (unsigned int i=0; i<quadrature.size(); ++i)
+          for (unsigned int j=0; j<3; ++j)
+            output_data.shape_hessians[k][i] -=
+              mapping_data.jacobian_pushed_forward_grads[i][j]
+              * output_data.shape_gradients[k][i][j];
+    }
+
+  if (fe_data.update_each & update_3rd_derivatives && cell_similarity != CellSimilarity::translation)
+    {
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        mapping.transform (make_array_view(fe_data.shape_3rd_derivatives, k),
+                           mapping_covariant_hessian,
+                           mapping_internal,
+                           make_array_view(output_data.shape_3rd_derivatives, k));
+
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        correct_third_derivatives(output_data, mapping_data, quadrature.size(), k);
+    }
+}
+
+
+#include "fe_poly.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_poly.inst.in b/source/fe/fe_poly.inst.in
new file mode 100644
index 0000000..d9dd853
--- /dev/null
+++ b/source/fe/fe_poly.inst.in
@@ -0,0 +1,30 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class FE_Poly<TensorProductPolynomials<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Poly<TensorProductPolynomialsConst<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Poly<TensorProductPolynomialsBubbles<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Poly<TensorProductPolynomials<deal_II_dimension,Polynomials::PiecewisePolynomial<double> >, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Poly<PolynomialSpace<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Poly<PolynomialsP<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Poly<PolynomialsRannacherTurek<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/fe/fe_poly_tensor.cc b/source/fe/fe_poly_tensor.cc
new file mode 100644
index 0000000..b2a6a0c
--- /dev/null
+++ b/source/fe/fe_poly_tensor.cc
@@ -0,0 +1,1811 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/polynomials_bdm.h>
+#include <deal.II/base/polynomials_raviart_thomas.h>
+#include <deal.II/base/polynomials_abf.h>
+#include <deal.II/base/polynomials_nedelec.h>
+#include <deal.II/fe/fe_poly_tensor.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_cartesian.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace
+{
+//---------------------------------------------------------------------------
+// Utility method, which is used to determine the change of sign for
+// the DoFs on the faces of the given cell.
+//---------------------------------------------------------------------------
+
+  /**
+   * On noncartesian grids, the sign of the DoFs associated with the faces of
+   * the elements has to be changed in some cases.  This procedure implements an
+   * algorithm, which determines the DoFs, which need this sign change for a
+   * given cell.
+   */
+  void
+  get_face_sign_change_rt (const Triangulation<1>::cell_iterator &,
+                           const unsigned int                     ,
+                           std::vector<double>                   &face_sign)
+  {
+    // nothing to do in 1d
+    std::fill (face_sign.begin (), face_sign.end (), 1.0);
+  }
+
+
+
+  void
+  get_face_sign_change_rt (const Triangulation<2>::cell_iterator &cell,
+                           const unsigned int                     dofs_per_face,
+                           std::vector<double>                   &face_sign)
+  {
+    const unsigned int dim = 2;
+    const unsigned int spacedim = 2;
+
+    // Default is no sign
+    // change. I.e. multiply by one.
+    std::fill (face_sign.begin (), face_sign.end (), 1.0);
+
+    for (unsigned int f = GeometryInfo<dim>::faces_per_cell / 2;
+         f < GeometryInfo<dim>::faces_per_cell; ++f)
+      {
+        Triangulation<dim,spacedim>::face_iterator face = cell->face (f);
+        if (!face->at_boundary ())
+          {
+            const unsigned int nn = cell->neighbor_face_no(f);
+
+            if (nn < GeometryInfo<dim>::faces_per_cell / 2)
+              for (unsigned int j = 0; j < dofs_per_face; ++j)
+                {
+                  Assert (f * dofs_per_face + j < face_sign.size(),
+                          ExcInternalError());
+
+//TODO: This is probably only going to work for those elements for which all dofs are face dofs
+                  face_sign[f * dofs_per_face + j] = -1.0;
+                }
+          }
+      }
+  }
+
+
+
+  void
+  get_face_sign_change_rt (const Triangulation<3>::cell_iterator &/*cell*/,
+                           const unsigned int                     /*dofs_per_face*/,
+                           std::vector<double>                   &face_sign)
+  {
+    std::fill (face_sign.begin (), face_sign.end (), 1.0);
+//TODO: think about what it would take here
+  }
+
+  void
+  get_face_sign_change_nedelec (const Triangulation<1>::cell_iterator &/*cell*/,
+                                const unsigned int                     /*dofs_per_face*/,
+                                std::vector<double>                   &face_sign)
+  {
+    // nothing to do in 1d
+    std::fill (face_sign.begin (), face_sign.end (), 1.0);
+  }
+
+
+
+  void
+  get_face_sign_change_nedelec (const Triangulation<2>::cell_iterator &/*cell*/,
+                                const unsigned int                     /*dofs_per_face*/,
+                                std::vector<double>                   &face_sign)
+  {
+    std::fill (face_sign.begin (), face_sign.end (), 1.0);
+//TODO: think about what it would take here
+  }
+
+
+  void
+  get_face_sign_change_nedelec (const Triangulation<3>::cell_iterator &cell,
+                                const unsigned int                     /*dofs_per_face*/,
+                                std::vector<double>                   &face_sign)
+  {
+    const unsigned int dim = 3;
+    std::fill (face_sign.begin (), face_sign.end (), 1.0);
+//TODO: This is probably only going to work for those elements for which all dofs are face dofs
+    for (unsigned int l = 0; l < GeometryInfo<dim>::lines_per_cell; ++l)
+      if (!(cell->line_orientation (l)))
+        face_sign[l] = -1.0;
+  }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+FE_PolyTensor<PolynomialType,dim,spacedim>::FE_PolyTensor
+(const unsigned int                degree,
+ const FiniteElementData<dim>     &fe_data,
+ const std::vector<bool>          &restriction_is_additive_flags,
+ const std::vector<ComponentMask> &nonzero_components)
+  :
+  FiniteElement<dim,spacedim> (fe_data,
+                               restriction_is_additive_flags,
+                               nonzero_components),
+  poly_space(PolynomialType(degree))
+{
+  cached_point(0) = -1;
+  // Set up the table converting
+  // components to base
+  // components. Since we have only
+  // one base element, everything
+  // remains zero except the
+  // component in the base, which is
+  // the component itself
+  for (unsigned int comp=0; comp<this->n_components() ; ++comp)
+    this->component_to_base_table[comp].first.second = comp;
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+double
+FE_PolyTensor<PolynomialType,dim,spacedim>::shape_value
+(const unsigned int, const Point<dim> &) const
+
+{
+  typedef    FiniteElement<dim,spacedim> FEE;
+  Assert(false, typename FEE::ExcFENotPrimitive());
+  return 0.;
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+double
+FE_PolyTensor<PolynomialType,dim,spacedim>::shape_value_component
+(const unsigned int  i,
+ const Point<dim>   &p,
+ const unsigned int  component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component < dim, ExcIndexRange (component, 0, dim));
+
+  if (cached_point != p || cached_values.size() == 0)
+    {
+      cached_point = p;
+      cached_values.resize(poly_space.n());
+
+      std::vector<Tensor<4,dim> > dummy1;
+      std::vector<Tensor<5,dim> > dummy2;
+      poly_space.compute(p, cached_values, cached_grads, cached_grad_grads, dummy1, dummy2);
+    }
+
+  double s = 0;
+  if (inverse_node_matrix.n_cols() == 0)
+    return cached_values[i][component];
+  else
+    for (unsigned int j=0; j<inverse_node_matrix.n_cols(); ++j)
+      s += inverse_node_matrix(j,i) * cached_values[j][component];
+  return s;
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<1,dim>
+FE_PolyTensor<PolynomialType,dim,spacedim>::shape_grad (const unsigned int,
+                                                        const Point<dim> &) const
+{
+  typedef    FiniteElement<dim,spacedim> FEE;
+  Assert(false, typename FEE::ExcFENotPrimitive());
+  return Tensor<1,dim>();
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<1,dim>
+FE_PolyTensor<PolynomialType,dim,spacedim>::shape_grad_component
+(const unsigned int  i,
+ const Point<dim>   &p,
+ const unsigned int  component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component < dim, ExcIndexRange (component, 0, dim));
+
+  if (cached_point != p || cached_grads.size() == 0)
+    {
+      cached_point = p;
+      cached_grads.resize(poly_space.n());
+
+      std::vector<Tensor<4,dim> > dummy1;
+      std::vector<Tensor<5,dim> > dummy2;
+      poly_space.compute(p, cached_values, cached_grads, cached_grad_grads, dummy1, dummy2);
+    }
+
+  Tensor<1,dim> s;
+  if (inverse_node_matrix.n_cols() == 0)
+    return cached_grads[i][component];
+  else
+    for (unsigned int j=0; j<inverse_node_matrix.n_cols(); ++j)
+      s += inverse_node_matrix(j,i) * cached_grads[j][component];
+
+  return s;
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<2,dim>
+FE_PolyTensor<PolynomialType,dim,spacedim>::shape_grad_grad
+(const unsigned int,
+ const Point<dim> &) const
+{
+  typedef    FiniteElement<dim,spacedim> FEE;
+  Assert(false, typename FEE::ExcFENotPrimitive());
+  return Tensor<2,dim>();
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+Tensor<2,dim>
+FE_PolyTensor<PolynomialType,dim,spacedim>::shape_grad_grad_component
+(const unsigned int  i,
+ const Point<dim>   &p,
+ const unsigned int  component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i,0,this->dofs_per_cell));
+  Assert (component < dim, ExcIndexRange (component, 0, dim));
+
+  if (cached_point != p || cached_grad_grads.size() == 0)
+    {
+      cached_point = p;
+      cached_grad_grads.resize(poly_space.n());
+
+      std::vector<Tensor<4,dim> > dummy1;
+      std::vector<Tensor<5,dim> > dummy2;
+      poly_space.compute(p, cached_values, cached_grads, cached_grad_grads, dummy1, dummy2);
+    }
+
+  Tensor<2,dim> s;
+  if (inverse_node_matrix.n_cols() == 0)
+    return cached_grad_grads[i][component];
+  else
+    for (unsigned int j=0; j<inverse_node_matrix.n_cols(); ++j)
+      s += inverse_node_matrix(i,j) * cached_grad_grads[j][component];
+
+  return s;
+}
+
+
+//---------------------------------------------------------------------------
+// Fill data of FEValues
+//---------------------------------------------------------------------------
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_PolyTensor<PolynomialType,dim,spacedim>::
+fill_fe_values
+(const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+ const CellSimilarity::Similarity                                     cell_similarity,
+ const Quadrature<dim>                                               &quadrature,
+ const Mapping<dim,spacedim>                                         &mapping,
+ const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+ const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+ const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+ dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0,
+          ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  const unsigned int n_q_points = quadrature.size();
+
+  Assert(!(fe_data.update_each & update_values) || fe_data.shape_values.size()[0] == this->dofs_per_cell,
+         ExcDimensionMismatch(fe_data.shape_values.size()[0], this->dofs_per_cell));
+  Assert(!(fe_data.update_each & update_values) || fe_data.shape_values.size()[1] == n_q_points,
+         ExcDimensionMismatch(fe_data.shape_values.size()[1], n_q_points));
+
+  // Create table with sign changes, due to the special structure of the RT elements.
+  // TODO: Preliminary hack to demonstrate the overall prinicple!
+
+  // Compute eventual sign changes depending on the neighborhood
+  // between two faces.
+  std::fill( fe_data.sign_change.begin(), fe_data.sign_change.end(), 1.0 );
+
+  if (mapping_type == mapping_raviart_thomas)
+    get_face_sign_change_rt (cell, this->dofs_per_face, fe_data.sign_change);
+  else if (mapping_type == mapping_nedelec)
+    get_face_sign_change_nedelec (cell, this->dofs_per_face, fe_data.sign_change);
+
+
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    {
+      const unsigned int first = output_data.shape_function_to_row_table[i * this->n_components() +
+                                 this->get_nonzero_components(i).first_selected_component()];
+
+      // update the shape function values as necessary
+      //
+      // we only need to do this if the current cell is not a translation of
+      // the previous one; or, even if it is a translation, if we use mappings
+      // other than the standard mappings that require us to recompute values
+      // and derivatives because of possible sign changes
+      if (fe_data.update_each & update_values &&
+          ((cell_similarity != CellSimilarity::translation)
+           ||
+           ((mapping_type == mapping_piola) || (mapping_type == mapping_raviart_thomas)
+            || (mapping_type == mapping_nedelec))))
+        {
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k) = fe_data.shape_values[i][k][d];
+              break;
+            }
+
+            case mapping_covariant:
+            case mapping_contravariant:
+            {
+              mapping.transform (make_array_view(fe_data.shape_values, i),
+                                 mapping_type,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_values));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k) = fe_data.transformed_shape_values[k][d];
+
+              break;
+            }
+
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              mapping.transform (make_array_view(fe_data.shape_values, i),
+                                 mapping_piola,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_values));
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k)
+                    = fe_data.sign_change[i] * fe_data.transformed_shape_values[k][d];
+              break;
+            }
+
+            case mapping_nedelec:
+            {
+              mapping.transform (make_array_view(fe_data.shape_values, i),
+                                 mapping_covariant,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_values));
+
+              for (unsigned int k = 0; k < n_q_points; ++k)
+                for (unsigned int d = 0; d < dim; ++d)
+                  output_data.shape_values(first+d,k) = fe_data.sign_change[i]
+                                                        * fe_data.transformed_shape_values[k][d];
+
+              break;
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      // update gradients. apply the same logic as above
+      if (fe_data.update_each & update_gradients
+          &&
+          ((cell_similarity != CellSimilarity::translation)
+           ||
+           ((mapping_type == mapping_piola) || (mapping_type == mapping_raviart_thomas)
+            || (mapping_type == mapping_nedelec))))
+
+        {
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              mapping.transform (make_array_view(fe_data.shape_grads, i),
+                                 mapping_covariant,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_grads));
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = fe_data.transformed_shape_grads[k][d];
+              break;
+            }
+            case mapping_covariant:
+            {
+              mapping.transform (make_array_view(fe_data.shape_grads, i),
+                                 mapping_covariant_gradient,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_grads));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    fe_data.transformed_shape_grads[k][d] -= output_data.shape_values(first+n,k)
+                                                             * mapping_data.jacobian_pushed_forward_grads[k][n][d];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = fe_data.transformed_shape_grads[k][d];
+
+              break;
+            }
+            case mapping_contravariant:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k] = fe_data.shape_grads[i][k];
+              mapping.transform (make_array_view(fe_data.untransformed_shape_grads),
+                                 mapping_contravariant_gradient,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_grads));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    fe_data.transformed_shape_grads[k][d] += output_data.shape_values(first+n,k)
+                                                             * mapping_data.jacobian_pushed_forward_grads[k][d][n];
+
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = fe_data.transformed_shape_grads[k][d];
+
+              break;
+            }
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k] = fe_data.shape_grads[i][k];
+              mapping.transform (make_array_view(fe_data.untransformed_shape_grads),
+                                 mapping_piola_gradient,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_grads));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    fe_data.transformed_shape_grads[k][d] += ( output_data.shape_values(first+n,k)
+                                                               * mapping_data.jacobian_pushed_forward_grads[k][d][n] )
+                                                             - ( output_data.shape_values(first+d,k)
+                                                                 * mapping_data.jacobian_pushed_forward_grads[k][n][n] );
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = fe_data.sign_change[i]
+                                                            * fe_data.transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            case mapping_nedelec:
+            {
+              // treat the gradients of
+              // this particular shape
+              // function at all
+              // q-points. if Dv is the
+              // gradient of the shape
+              // function on the unit
+              // cell, then
+              // (J^-T)Dv(J^-1) is the
+              // value we want to have on
+              // the real cell.
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k] = fe_data.shape_grads[i][k];
+
+              mapping.transform (make_array_view(fe_data.untransformed_shape_grads),
+                                 mapping_covariant_gradient,
+                                 mapping_internal,
+                                 make_array_view(fe_data.transformed_shape_grads));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    fe_data.transformed_shape_grads[k][d] -= output_data.shape_values(first+n,k)
+                                                             * mapping_data.jacobian_pushed_forward_grads[k][n][d];
+
+              for (unsigned int k = 0; k < n_q_points; ++k)
+                for (unsigned int d = 0; d < dim; ++d)
+                  output_data.shape_gradients[first + d][k] = fe_data.sign_change[i]
+                                                              * fe_data.transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      // update hessians. apply the same logic as above
+      if (fe_data.update_each & update_hessians
+          &&
+          ((cell_similarity != CellSimilarity::translation)
+           ||
+           ((mapping_type == mapping_piola) || (mapping_type == mapping_raviart_thomas)
+            || (mapping_type == mapping_nedelec))))
+
+        {
+
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+
+              mapping.transform(make_array_view(fe_data.shape_grad_grads, i),
+                                mapping_covariant_gradient,
+                                mapping_internal,
+                                make_array_view(fe_data.transformed_shape_hessians));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    fe_data.transformed_shape_hessians[k][d] -= output_data.shape_gradients[first+d][k][n]
+                                                                * mapping_data.jacobian_pushed_forward_grads[k][n];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+            case mapping_covariant:
+            {
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k] = fe_data.shape_grad_grads[i][k];
+
+              mapping.transform(make_array_view(fe_data.untransformed_shape_hessian_tensors),
+                                mapping_covariant_hessian, mapping_internal,
+                                make_array_view(fe_data.transformed_shape_hessians));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          fe_data.transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][d][i][j])
+                             + (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][d][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][d]);
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+            case mapping_contravariant:
+            {
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k] = fe_data.shape_grad_grads[i][k];
+
+              mapping.transform(make_array_view(fe_data.untransformed_shape_hessian_tensors),
+                                mapping_contravariant_hessian,
+                                mapping_internal,
+                                make_array_view(fe_data.transformed_shape_hessians));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          fe_data.transformed_shape_hessians[k][d][i][j]
+                          += (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][d][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][n][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][i][n])
+                             - (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j]);
+                          for (unsigned int m=0; m<spacedim; ++m)
+                            fe_data.transformed_shape_hessians[k][d][i][j]
+                            -= (mapping_data.jacobian_pushed_forward_grads[k][d][i][m]
+                                * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                * output_data.shape_values(first+n,k))
+                               + (mapping_data.jacobian_pushed_forward_grads[k][d][m][j]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                  * output_data.shape_values(first+n,k));
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k] = fe_data.shape_grad_grads[i][k];
+
+              mapping.transform(make_array_view(fe_data.untransformed_shape_hessian_tensors),
+                                mapping_piola_hessian,
+                                mapping_internal,
+                                make_array_view(fe_data.transformed_shape_hessians));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          fe_data.transformed_shape_hessians[k][d][i][j]
+                          += (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][d][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][n][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][i][n])
+                             - (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j]);
+
+                          fe_data.transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+d,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][n][i][j])
+                             + (output_data.shape_gradients[first+d][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][n][j])
+                             + (output_data.shape_gradients[first+d][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][n][i]);
+
+                          for (unsigned int m=0; m<spacedim; ++m)
+                            {
+                              fe_data.transformed_shape_hessians[k][d][i][j]
+                              -= (mapping_data.jacobian_pushed_forward_grads[k][d][i][m]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                  * output_data.shape_values(first+n,k))
+                                 + (mapping_data.jacobian_pushed_forward_grads[k][d][m][j]
+                                    * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                    * output_data.shape_values(first+n,k));
+
+                              fe_data.transformed_shape_hessians[k][d][i][j]
+                              += (mapping_data.jacobian_pushed_forward_grads[k][n][i][m]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                  * output_data.shape_values(first+d,k))
+                                 + (mapping_data.jacobian_pushed_forward_grads[k][n][m][j]
+                                    * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                    * output_data.shape_values(first+d,k));
+                            }
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.sign_change[i] * fe_data.transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+
+            case mapping_nedelec:
+            {
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k] = fe_data.shape_grad_grads[i][k];
+
+              mapping.transform(make_array_view(fe_data.untransformed_shape_hessian_tensors),
+                                mapping_covariant_hessian, mapping_internal,
+                                make_array_view(fe_data.transformed_shape_hessians));
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          fe_data.transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][d][i][j])
+                             + (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][d][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][d]);
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.sign_change[i] * fe_data.transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      // third derivatives are not implemented
+      if (fe_data.update_each & update_3rd_derivatives
+          &&
+          ((cell_similarity != CellSimilarity::translation)
+           ||
+           ((mapping_type == mapping_piola) || (mapping_type == mapping_raviart_thomas)
+            || (mapping_type == mapping_nedelec))))
+        {
+          Assert(false, ExcNotImplemented())
+        }
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_PolyTensor<PolynomialType,dim,spacedim>::
+fill_fe_face_values
+(const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+ const unsigned int                                                   face_no,
+ const Quadrature<dim-1>                                             &quadrature,
+ const Mapping<dim,spacedim>                                         &mapping,
+ const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+ const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+ const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+ dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0,
+          ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  const unsigned int n_q_points = quadrature.size();
+  // offset determines which data set
+  // to take (all data sets for all
+  // faces are stored contiguously)
+
+  const typename QProjector<dim>::DataSetDescriptor offset
+    = QProjector<dim>::DataSetDescriptor::face (face_no,
+                                                cell->face_orientation(face_no),
+                                                cell->face_flip(face_no),
+                                                cell->face_rotation(face_no),
+                                                n_q_points);
+
+//TODO: Size assertions
+
+// Create table with sign changes, due to the special structure of the RT elements.
+// TODO: Preliminary hack to demonstrate the overall prinicple!
+
+  // Compute eventual sign changes depending
+  // on the neighborhood between two faces.
+  std::fill( fe_data.sign_change.begin(), fe_data.sign_change.end(), 1.0 );
+
+  if (mapping_type == mapping_raviart_thomas)
+    get_face_sign_change_rt (cell, this->dofs_per_face, fe_data.sign_change);
+
+  else if (mapping_type == mapping_nedelec)
+    get_face_sign_change_nedelec (cell, this->dofs_per_face, fe_data.sign_change);
+
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    {
+      const unsigned int first = output_data.shape_function_to_row_table[i * this->n_components() +
+                                 this->get_nonzero_components(i).first_selected_component()];
+
+      if (fe_data.update_each & update_values)
+        {
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k) = fe_data.shape_values[i][k+offset][d];
+              break;
+            }
+
+            case mapping_covariant:
+            case mapping_contravariant:
+            {
+              const ArrayView<Tensor<1,spacedim> > transformed_shape_values
+                = make_array_view(fe_data.transformed_shape_values, offset, n_q_points);
+              mapping.transform (make_array_view(fe_data.shape_values, i, offset, n_q_points),
+                                 mapping_type,
+                                 mapping_internal,
+                                 transformed_shape_values);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k) = transformed_shape_values[k][d];
+
+              break;
+            }
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              const ArrayView<Tensor<1,spacedim> > transformed_shape_values
+                = make_array_view(fe_data.transformed_shape_values, offset, n_q_points);
+              mapping.transform (make_array_view(fe_data.shape_values, i, offset, n_q_points),
+                                 mapping_piola,
+                                 mapping_internal,
+                                 transformed_shape_values);
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k)
+                    = fe_data.sign_change[i] * transformed_shape_values[k][d];
+              break;
+            }
+
+            case mapping_nedelec:
+            {
+              const ArrayView<Tensor<1,spacedim> > transformed_shape_values
+                = make_array_view(fe_data.transformed_shape_values, offset, n_q_points);
+              mapping.transform (make_array_view (fe_data.shape_values, i, offset, n_q_points),
+                                 mapping_covariant,
+                                 mapping_internal,
+                                 transformed_shape_values);
+
+              for (unsigned int k = 0; k < n_q_points; ++k)
+                for (unsigned int d = 0; d < dim; ++d)
+                  output_data.shape_values(first+d,k) =
+                    fe_data.sign_change[i] * transformed_shape_values[k][d];
+
+              break;
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      if (fe_data.update_each & update_gradients)
+        {
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              const ArrayView<Tensor<2,spacedim> > transformed_shape_grads
+                = make_array_view(fe_data.transformed_shape_grads, offset, n_q_points);
+              mapping.transform (make_array_view(fe_data.shape_grads, i, offset, n_q_points),
+                                 mapping_covariant,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = transformed_shape_grads[k][d];
+              break;
+            }
+
+            case mapping_covariant:
+            {
+              const ArrayView<Tensor<2,spacedim> > transformed_shape_grads
+                = make_array_view(fe_data.transformed_shape_grads, offset, n_q_points);
+              mapping.transform (make_array_view(fe_data.shape_grads, i, offset, n_q_points),
+                                 mapping_covariant_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] -= output_data.shape_values(first+n,k)
+                                                     * mapping_data.jacobian_pushed_forward_grads[k][n][d];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = transformed_shape_grads[k][d];
+              break;
+            }
+
+            case mapping_contravariant:
+            {
+              const ArrayView<Tensor<2,spacedim> > transformed_shape_grads
+                = make_array_view(fe_data.transformed_shape_grads, offset, n_q_points);
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k+offset] = fe_data.shape_grads[i][k+offset];
+              mapping.transform (make_array_view(fe_data.untransformed_shape_grads, offset, n_q_points),
+                                 mapping_contravariant_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] += output_data.shape_values(first+n,k)
+                                                     * mapping_data.jacobian_pushed_forward_grads[k][d][n];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              const ArrayView<Tensor<2,spacedim> > transformed_shape_grads
+                = make_array_view(fe_data.transformed_shape_grads, offset, n_q_points);
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k+offset] = fe_data.shape_grads[i][k+offset];
+              mapping.transform (make_array_view(fe_data.untransformed_shape_grads, offset, n_q_points),
+                                 mapping_piola_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] += ( output_data.shape_values(first+n,k)
+                                                       * mapping_data.jacobian_pushed_forward_grads[k][d][n] )
+                                                     -
+                                                     ( output_data.shape_values(first+d,k)
+                                                       * mapping_data.jacobian_pushed_forward_grads[k][n][n] );
+
+              for (unsigned int k = 0; k < n_q_points; ++k)
+                for (unsigned int d = 0; d < dim; ++d)
+                  output_data.shape_gradients[first + d][k] = fe_data.sign_change[i]
+                                                              * transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            case mapping_nedelec:
+            {
+              // treat the gradients of
+              // this particular shape
+              // function at all
+              // q-points. if Dv is the
+              // gradient of the shape
+              // function on the unit
+              // cell, then
+              // (J^-T)Dv(J^-1) is the
+              // value we want to have on
+              // the real cell.
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k+offset] = fe_data.shape_grads[i][k+offset];
+
+              const ArrayView<Tensor<2,spacedim> > transformed_shape_grads
+                = make_array_view(fe_data.transformed_shape_grads, offset, n_q_points);
+              mapping.transform (make_array_view (fe_data.untransformed_shape_grads, offset, n_q_points),
+                                 mapping_covariant_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] -= output_data.shape_values(first+n,k)
+                                                     * mapping_data.jacobian_pushed_forward_grads[k][n][d];
+
+              for (unsigned int k = 0; k < n_q_points; ++k)
+                for (unsigned int d = 0; d < dim; ++d)
+                  output_data.shape_gradients[first + d][k] = fe_data.sign_change[i]
+                                                              * transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      if (fe_data.update_each & update_hessians)
+        {
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.shape_grad_grads, i, offset, n_q_points),
+                                mapping_covariant_gradient,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_hessians[k][d] -= output_data.shape_gradients[first+d][k][n]
+                                                        *mapping_data.jacobian_pushed_forward_grads[k][n];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+            case mapping_covariant:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_covariant_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][d][i][j])
+                             + (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][d][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][d]);
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+
+            case mapping_contravariant:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_contravariant_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          += (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][d][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][n][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][i][n])
+                             - (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j]);
+                          for (unsigned int m=0; m<spacedim; ++m)
+                            transformed_shape_hessians[k][d][i][j]
+                            -= (mapping_data.jacobian_pushed_forward_grads[k][d][i][m]
+                                * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                * output_data.shape_values(first+n,k))
+                               + (mapping_data.jacobian_pushed_forward_grads[k][d][m][j]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                  * output_data.shape_values(first+n,k));
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = transformed_shape_hessians[k][d];
+
+              break;
+            }
+
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_piola_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          += (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][d][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][n][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][i][n])
+                             - (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j]);
+
+                          transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+d,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][n][i][j])
+                             + (output_data.shape_gradients[first+d][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][n][j])
+                             + (output_data.shape_gradients[first+d][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][n][i]);
+
+                          for (unsigned int m=0; m<spacedim; ++m)
+                            {
+                              transformed_shape_hessians[k][d][i][j]
+                              -= (mapping_data.jacobian_pushed_forward_grads[k][d][i][m]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                  * output_data.shape_values(first+n,k))
+                                 + (mapping_data.jacobian_pushed_forward_grads[k][d][m][j]
+                                    * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                    * output_data.shape_values(first+n,k));
+
+                              transformed_shape_hessians[k][d][i][j]
+                              += (mapping_data.jacobian_pushed_forward_grads[k][n][i][m]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                  * output_data.shape_values(first+d,k))
+                                 + (mapping_data.jacobian_pushed_forward_grads[k][n][m][j]
+                                    * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                    * output_data.shape_values(first+d,k));
+                            }
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.sign_change[i] * transformed_shape_hessians[k][d];
+
+              break;
+            }
+
+            case mapping_nedelec:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_covariant_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][d][i][j])
+                             + (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][d][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][d]);
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.sign_change[i] * transformed_shape_hessians[k][d];
+
+              break;
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      // third derivatives are not implemented
+      if (fe_data.update_each & update_3rd_derivatives)
+        {
+          Assert(false, ExcNotImplemented())
+        }
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_PolyTensor<PolynomialType,dim,spacedim>::
+fill_fe_subface_values
+(const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+ const unsigned int                                                   face_no,
+ const unsigned int                                                   sub_no,
+ const Quadrature<dim-1>                                             &quadrature,
+ const Mapping<dim,spacedim>                                         &mapping,
+ const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+ const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+ const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+ dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0,
+          ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  const unsigned int n_q_points = quadrature.size();
+
+  // offset determines which data set
+  // to take (all data sets for all
+  // sub-faces are stored contiguously)
+  const typename QProjector<dim>::DataSetDescriptor offset
+    = QProjector<dim>::DataSetDescriptor::subface (face_no, sub_no,
+                                                   cell->face_orientation(face_no),
+                                                   cell->face_flip(face_no),
+                                                   cell->face_rotation(face_no),
+                                                   n_q_points,
+                                                   cell->subface_case(face_no));
+
+//   Assert(mapping_type == independent
+//       || ( mapping_type == independent_on_cartesian
+//            && dynamic_cast<const MappingCartesian<dim>*>(&mapping) != 0),
+//       ExcNotImplemented());
+//TODO: Size assertions
+
+//TODO: Sign change for the face DoFs!
+
+  // Compute eventual sign changes depending
+  // on the neighborhood between two faces.
+  std::fill( fe_data.sign_change.begin(), fe_data.sign_change.end(), 1.0 );
+
+  if (mapping_type == mapping_raviart_thomas)
+    get_face_sign_change_rt (cell, this->dofs_per_face, fe_data.sign_change);
+
+  else if (mapping_type == mapping_nedelec)
+    get_face_sign_change_nedelec (cell, this->dofs_per_face, fe_data.sign_change);
+
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    {
+      const unsigned int first = output_data.shape_function_to_row_table[i * this->n_components() +
+                                 this->get_nonzero_components(i).first_selected_component()];
+
+      if (fe_data.update_each & update_values)
+        {
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k) = fe_data.shape_values[i][k+offset][d];
+              break;
+            }
+
+            case mapping_covariant:
+            case mapping_contravariant:
+            {
+              const ArrayView<Tensor<1,spacedim> > transformed_shape_values
+                = make_array_view(fe_data.transformed_shape_values, offset, n_q_points);
+              mapping.transform (make_array_view(fe_data.shape_values, i, offset, n_q_points),
+                                 mapping_type,
+                                 mapping_internal,
+                                 transformed_shape_values);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k) = transformed_shape_values[k][d];
+
+              break;
+            }
+
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              const ArrayView<Tensor<1,spacedim> > transformed_shape_values
+                = make_array_view(fe_data.transformed_shape_values, offset, n_q_points);
+
+              mapping.transform(make_array_view(fe_data.shape_values, i, offset, n_q_points),
+                                mapping_piola,
+                                mapping_internal,
+                                transformed_shape_values);
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_values(first+d,k)
+                    = fe_data.sign_change[i] * transformed_shape_values[k][d];
+              break;
+            }
+
+            case mapping_nedelec:
+            {
+              const ArrayView<Tensor<1,spacedim> > transformed_shape_values
+                = make_array_view(fe_data.transformed_shape_values, offset, n_q_points);
+
+              mapping.transform (make_array_view (fe_data.shape_values, i, offset, n_q_points),
+                                 mapping_covariant,
+                                 mapping_internal,
+                                 transformed_shape_values);
+
+              for (unsigned int k = 0; k < n_q_points; ++k)
+                for (unsigned int d = 0; d < dim; ++d)
+                  output_data.shape_values(first+d,k) =
+                    fe_data.sign_change[i] * transformed_shape_values[k][d];
+
+              break;
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      if (fe_data.update_each & update_gradients)
+        {
+          const ArrayView<Tensor<2, spacedim> > transformed_shape_grads
+            = make_array_view(fe_data.transformed_shape_grads, offset, n_q_points);
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              mapping.transform (make_array_view(fe_data.shape_grads, i, offset, n_q_points),
+                                 mapping_covariant,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = transformed_shape_grads[k][d];
+              break;
+            }
+
+            case mapping_covariant:
+            {
+              mapping.transform (make_array_view(fe_data.shape_grads, i, offset, n_q_points),
+                                 mapping_covariant_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] -= output_data.shape_values(first+n,k)
+                                                     * mapping_data.jacobian_pushed_forward_grads[k][n][d];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            case mapping_contravariant:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k+offset] = fe_data.shape_grads[i][k+offset];
+
+              mapping.transform (make_array_view(fe_data.untransformed_shape_grads, offset, n_q_points),
+                                 mapping_contravariant_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] += output_data.shape_values(first+n,k)
+                                                     * mapping_data.jacobian_pushed_forward_grads[k][d][n];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] = transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k+offset] = fe_data.shape_grads[i][k+offset];
+
+              mapping.transform (make_array_view(fe_data.untransformed_shape_grads, offset, n_q_points),
+                                 mapping_piola_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] += ( output_data.shape_values(first+n,k)
+                                                       * mapping_data.jacobian_pushed_forward_grads[k][d][n])
+                                                     - ( output_data.shape_values(first+d,k)
+                                                         * mapping_data.jacobian_pushed_forward_grads[k][n][n]);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_gradients[first+d][k] =
+                    fe_data.sign_change[i] * transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            case mapping_nedelec:
+            {
+              // this particular shape
+              // function at all
+              // q-points. if Dv is the
+              // gradient of the shape
+              // function on the unit
+              // cell, then
+              // (J^-T)Dv(J^-1) is the
+              // value we want to have on
+              // the real cell.
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_grads[k+offset] = fe_data.shape_grads[i][k+offset];
+
+              mapping.transform (make_array_view (fe_data.untransformed_shape_grads, offset, n_q_points),
+                                 mapping_covariant_gradient,
+                                 mapping_internal,
+                                 transformed_shape_grads);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_grads[k][d] -= output_data.shape_values(first+n,k)
+                                                     * mapping_data.jacobian_pushed_forward_grads[k][n][d];
+
+              for (unsigned int k = 0; k < n_q_points; ++k)
+                for (unsigned int d = 0; d < dim; ++d)
+                  output_data.shape_gradients[first + d][k] =
+                    fe_data.sign_change[i] * transformed_shape_grads[k][d];
+
+              break;
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      if (fe_data.update_each & update_hessians)
+        {
+          switch (mapping_type)
+            {
+            case mapping_none:
+            {
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.shape_grad_grads, i, offset, n_q_points),
+                                mapping_covariant_gradient, mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    transformed_shape_hessians[k][d] -= output_data.shape_gradients[first+d][k][n]
+                                                        *mapping_data.jacobian_pushed_forward_grads[k][n];
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+            case mapping_covariant:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_covariant_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][d][i][j])
+                             + (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][d][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][d]);
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+
+            case mapping_contravariant:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_contravariant_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          += (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][d][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][n][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][i][n])
+                             - (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j]);
+                          for (unsigned int m=0; m<spacedim; ++m)
+                            transformed_shape_hessians[k][d][i][j]
+                            -= (mapping_data.jacobian_pushed_forward_grads[k][d][i][m]
+                                * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                * output_data.shape_values(first+n,k))
+                               + (mapping_data.jacobian_pushed_forward_grads[k][d][m][j]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                  * output_data.shape_values(first+n,k));
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+
+            case mapping_raviart_thomas:
+            case mapping_piola:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view (fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_piola_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          += (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][d][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][n][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][d][i][n])
+                             - (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j]);
+
+                          transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+d,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][n][i][j])
+                             + (output_data.shape_gradients[first+d][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][n][j])
+                             + (output_data.shape_gradients[first+d][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][n][i]);
+                          for (unsigned int m=0; m<spacedim; ++m)
+                            {
+                              transformed_shape_hessians[k][d][i][j]
+                              -= (mapping_data.jacobian_pushed_forward_grads[k][d][i][m]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                  * output_data.shape_values(first+n,k))
+                                 + (mapping_data.jacobian_pushed_forward_grads[k][d][m][j]
+                                    * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                    * output_data.shape_values(first+n,k));
+
+                              transformed_shape_hessians[k][d][i][j]
+                              += (mapping_data.jacobian_pushed_forward_grads[k][n][i][m]
+                                  * mapping_data.jacobian_pushed_forward_grads[k][m][n][j]
+                                  * output_data.shape_values(first+d,k))
+                                 + (mapping_data.jacobian_pushed_forward_grads[k][n][m][j]
+                                    * mapping_data.jacobian_pushed_forward_grads[k][m][i][n]
+                                    * output_data.shape_values(first+d,k));
+                            }
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.sign_change[i] * transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+
+            case mapping_nedelec:
+            {
+              for (unsigned int k=0; k<n_q_points; ++k)
+                fe_data.untransformed_shape_hessian_tensors[k+offset] = fe_data.shape_grad_grads[i][k+offset];
+
+              const ArrayView<Tensor<3,spacedim> > transformed_shape_hessians
+                = make_array_view(fe_data.transformed_shape_hessians, offset, n_q_points);
+              mapping.transform(make_array_view(fe_data.untransformed_shape_hessian_tensors, offset, n_q_points),
+                                mapping_covariant_hessian,
+                                mapping_internal,
+                                transformed_shape_hessians);
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<spacedim; ++d)
+                  for (unsigned int n=0; n<spacedim; ++n)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<spacedim; ++j)
+                        {
+                          transformed_shape_hessians[k][d][i][j]
+                          -= (output_data.shape_values(first+n,k)
+                              * mapping_data.jacobian_pushed_forward_2nd_derivatives[k][n][d][i][j])
+                             + (output_data.shape_gradients[first+d][k][n]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][j])
+                             + (output_data.shape_gradients[first+n][k][i]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][d][j])
+                             + (output_data.shape_gradients[first+n][k][j]
+                                * mapping_data.jacobian_pushed_forward_grads[k][n][i][d]);
+                        }
+
+              for (unsigned int k=0; k<n_q_points; ++k)
+                for (unsigned int d=0; d<dim; ++d)
+                  output_data.shape_hessians[first+d][k] = fe_data.sign_change[i] * transformed_shape_hessians[k][d];
+
+              break;
+
+            }
+
+            default:
+              Assert(false, ExcNotImplemented());
+            }
+        }
+
+      // third derivatives are not implemented
+      if (fe_data.update_each & update_3rd_derivatives)
+        {
+          Assert(false, ExcNotImplemented())
+        }
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+UpdateFlags
+FE_PolyTensor<PolynomialType,dim,spacedim>::requires_update_flags(const UpdateFlags flags) const
+{
+  UpdateFlags out = update_default;
+
+  switch (mapping_type)
+    {
+    case mapping_none:
+    {
+      if (flags & update_values)
+        out |= update_values;
+
+      if (flags & update_gradients)
+        out |= update_gradients | update_values | update_jacobian_pushed_forward_grads;
+
+      if (flags & update_hessians)
+        out |= update_hessians |  update_values | update_gradients |
+               update_jacobian_pushed_forward_grads |
+               update_jacobian_pushed_forward_2nd_derivatives;
+    }
+    case mapping_raviart_thomas:
+    case mapping_piola:
+    {
+      if (flags & update_values)
+        out |= update_values | update_piola;
+
+      if (flags & update_gradients)
+        out |= update_gradients | update_values | update_piola | update_jacobian_pushed_forward_grads |
+               update_covariant_transformation | update_contravariant_transformation;
+
+      if (flags & update_hessians)
+        out |= update_hessians | update_piola | update_values | update_gradients |
+               update_jacobian_pushed_forward_grads |
+               update_jacobian_pushed_forward_2nd_derivatives |
+               update_covariant_transformation;
+
+      break;
+    }
+
+    case mapping_contravariant:
+    {
+      if (flags & update_values)
+        out |= update_values | update_piola;
+
+      if (flags & update_gradients)
+        out |= update_gradients | update_values | update_jacobian_pushed_forward_grads |
+               update_covariant_transformation | update_contravariant_transformation;
+
+      if (flags & update_hessians)
+        out |= update_hessians | update_piola | update_values | update_gradients |
+               update_jacobian_pushed_forward_grads |
+               update_jacobian_pushed_forward_2nd_derivatives |
+               update_covariant_transformation;
+
+      break;
+    }
+
+    case mapping_nedelec:
+    case mapping_covariant:
+    {
+      if (flags & update_values)
+        out |= update_values | update_covariant_transformation;
+
+      if (flags & update_gradients)
+        out |= update_gradients | update_values | update_jacobian_pushed_forward_grads |
+               update_covariant_transformation;
+
+      if (flags & update_hessians)
+        out |= update_hessians |  update_values | update_gradients |
+               update_jacobian_pushed_forward_grads |
+               update_jacobian_pushed_forward_2nd_derivatives |
+               update_covariant_transformation;
+
+      break;
+    }
+
+    default:
+    {
+      Assert (false, ExcNotImplemented());
+    }
+    }
+
+  return out;
+}
+
+
+// explicit instantiations
+#include "fe_poly_tensor.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_poly_tensor.inst.in b/source/fe/fe_poly_tensor.inst.in
new file mode 100644
index 0000000..807126d
--- /dev/null
+++ b/source/fe/fe_poly_tensor.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_PolyTensor<PolynomialsRaviartThomas<deal_II_dimension>,deal_II_dimension>;
+    template class FE_PolyTensor<PolynomialsABF<deal_II_dimension>,deal_II_dimension>;
+    template class FE_PolyTensor<PolynomialsBDM<deal_II_dimension>,deal_II_dimension>;
+    template class FE_PolyTensor<PolynomialsNedelec<deal_II_dimension>, deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_q.cc b/source/fe/fe_q.cc
new file mode 100644
index 0000000..d170222
--- /dev/null
+++ b/source/fe/fe_q.cc
@@ -0,0 +1,137 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/fe/fe_q.h>
+
+#include <vector>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <int dim, int spacedim>
+FE_Q<dim,spacedim>::FE_Q (const unsigned int degree)
+  :
+  FE_Q_Base<TensorProductPolynomials<dim>, dim, spacedim> (
+    TensorProductPolynomials<dim>(Polynomials::LagrangeEquidistant::generate_complete_basis(degree)),
+    FiniteElementData<dim>(this->get_dpo_vector(degree),
+                           1, degree,
+                           FiniteElementData<dim>::H1),
+    std::vector<bool> (1, false))
+{
+  Assert (degree > 0,
+          ExcMessage ("This element can only be used for polynomial degrees "
+                      "greater than zero. If you want an element of polynomial "
+                      "degree zero, then it cannot be continuous and you "
+                      "will want to use FE_DGQ<dim>(0)."));
+  std::vector<Point<1> > support_points_1d(degree+1);
+  for (unsigned int i=0; i<=degree; ++i)
+    support_points_1d[i][0] = static_cast<double>(i)/degree;
+
+  this->initialize(support_points_1d);
+}
+
+
+
+template <int dim, int spacedim>
+FE_Q<dim,spacedim>::FE_Q (const Quadrature<1> &points)
+  :
+  FE_Q_Base<TensorProductPolynomials<dim>, dim, spacedim> (
+    TensorProductPolynomials<dim>(Polynomials::generate_complete_Lagrange_basis(points.get_points())),
+    FiniteElementData<dim>(this->get_dpo_vector(points.size()-1),
+                           1, points.size()-1,
+                           FiniteElementData<dim>::H1),
+    std::vector<bool> (1, false))
+{
+  const unsigned int degree = points.size()-1;
+  (void)degree;
+  Assert (degree > 0,
+          ExcMessage ("This element can only be used for polynomial degrees "
+                      "at least zero"));
+
+  this->initialize(points.get_points());
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_Q<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+
+  std::ostringstream namebuf;
+  bool equidistant = true;
+  std::vector<double> points(this->degree+1);
+
+  // Decode the support points in one coordinate direction.
+  std::vector<unsigned int> lexicographic = this->poly_space.get_numbering_inverse();
+  for (unsigned int j=0; j<=this->degree; j++)
+    points[j] = this->unit_support_points[lexicographic[j]][0];
+
+  // Check whether the support points are equidistant.
+  for (unsigned int j=0; j<=this->degree; j++)
+    if (std::fabs(points[j] - (double)j/this->degree) > 1e-15)
+      {
+        equidistant = false;
+        break;
+      }
+
+  if (equidistant == true)
+    namebuf << "FE_Q<"
+            << Utilities::dim_string(dim,spacedim)
+            << ">(" << this->degree << ")";
+  else
+    {
+      // Check whether the support points come from QGaussLobatto.
+      const QGaussLobatto<1> points_gl(this->degree+1);
+      bool gauss_lobatto = true;
+      for (unsigned int j=0; j<=this->degree; j++)
+        if (points[j] != points_gl.point(j)(0))
+          {
+            gauss_lobatto = false;
+            break;
+          }
+      if (gauss_lobatto == true)
+        namebuf << "FE_Q<"
+                << Utilities::dim_string(dim,spacedim)
+                << ">(QGaussLobatto(" << this->degree+1 << "))";
+      else
+        namebuf << "FE_Q<"
+                << Utilities::dim_string(dim,spacedim)
+                << ">(QUnknownNodes(" << this->degree << "))";
+    }
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_Q<dim,spacedim>::clone() const
+{
+  return new FE_Q<dim,spacedim>(*this);
+}
+
+
+// explicit instantiations
+#include "fe_q.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_q.inst.in b/source/fe/fe_q.inst.in
new file mode 100644
index 0000000..93ca7d1
--- /dev/null
+++ b/source/fe/fe_q.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class FE_Q<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
+
diff --git a/source/fe/fe_q_base.cc b/source/fe/fe_q_base.cc
new file mode 100644
index 0000000..9137d03
--- /dev/null
+++ b/source/fe/fe_q_base.cc
@@ -0,0 +1,1575 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/tensor_product_polynomials_const.h>
+#include <deal.II/base/tensor_product_polynomials_bubbles.h>
+#include <deal.II/base/polynomials_piecewise.h>
+#include <deal.II/fe/fe_q_base.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/base/quadrature_lib.h>
+
+#include <vector>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace FE_Q_Helper
+{
+  namespace
+  {
+    // get the renumbering for faces
+    template <int dim>
+    inline
+    std::vector<unsigned int>
+    face_lexicographic_to_hierarchic_numbering (const unsigned int degree)
+    {
+      std::vector<unsigned int> dpo(dim, 1U);
+      for (unsigned int i=1; i<dpo.size(); ++i)
+        dpo[i]=dpo[i-1]*(degree-1);
+      const dealii::FiniteElementData<dim-1> face_data(dpo,1,degree);
+      std::vector<unsigned int> face_renumber (face_data.dofs_per_cell);
+      FETools::lexicographic_to_hierarchic_numbering (face_data, face_renumber);
+      return face_renumber;
+    }
+
+    // dummy specialization for dim == 1 to avoid linker errors
+    template <>
+    inline
+    std::vector<unsigned int>
+    face_lexicographic_to_hierarchic_numbering<1> (const unsigned int)
+    {
+      return std::vector<unsigned int>();
+    }
+
+
+
+    // in get_restriction_matrix() and get_prolongation_matrix(), want to undo
+    // tensorization on inner loops for performance reasons. this clears a
+    // dim-array
+    template <int dim>
+    inline
+    void
+    zero_indices (unsigned int (&indices)[dim])
+    {
+      for (unsigned int d=0; d<dim; ++d)
+        indices[d] = 0;
+    }
+
+
+
+    // in get_restriction_matrix() and get_prolongation_matrix(), want to undo
+    // tensorization on inner loops for performance reasons. this increments
+    // tensor product indices
+    template <int dim>
+    inline
+    void
+    increment_indices (unsigned int       (&indices)[dim],
+                       const unsigned int   dofs1d)
+    {
+      ++indices[0];
+      for (int d=0; d<dim-1; ++d)
+        if (indices[d]==dofs1d)
+          {
+            indices[d] = 0;
+            indices[d+1]++;
+          }
+    }
+  }
+}
+
+
+
+/**
+ * A class with the same purpose as the similarly named class of the
+ * Triangulation class. See there for more information.
+ */
+template <class PolynomialType, int xdim, int xspacedim>
+struct FE_Q_Base<PolynomialType,xdim,xspacedim>::Implementation
+{
+  /**
+   * Initialize the hanging node constraints matrices. Called from the
+   * constructor in case the finite element is based on quadrature points.
+   */
+  template <int spacedim>
+  static
+  void initialize_constraints (const std::vector<Point<1> > &,
+                               FE_Q_Base<PolynomialType,1,spacedim> &)
+  {
+    // no constraints in 1d
+  }
+
+
+  template <int spacedim>
+  static
+  void initialize_constraints (const std::vector<Point<1> > &/*points*/,
+                               FE_Q_Base<PolynomialType,2,spacedim> &fe)
+  {
+    const unsigned int dim = 2;
+
+    unsigned int q_deg = fe.degree;
+    if (types_are_equal<PolynomialType, TensorProductPolynomialsBubbles<dim> >::value)
+      q_deg = fe.degree-1;
+
+    // restricted to each face, the traces of the shape functions is an
+    // element of P_{k} (in 2d), or Q_{k} (in 3d), where k is the degree of
+    // the element.  from this, we interpolate between mother and cell face.
+
+    // the interpolation process works as follows: on each subface, we want
+    // that finite element solutions from both sides coincide. i.e. if a and b
+    // are expansion coefficients for the shape functions from both sides, we
+    // seek a relation between a and b such that
+    //   sum_j a_j phi^c_j(x) == sum_j b_j phi_j(x)
+    // for all points x on the interface. here, phi^c_j are the shape
+    // functions on the small cell on one side of the face, and phi_j those on
+    // the big cell on the other side. To get this relation, it suffices to
+    // look at a sufficient number of points for which this has to hold. if
+    // there are n functions, then we need n evaluation points, and we choose
+    // them equidistantly.
+    //
+    // we obtain the matrix system
+    //    A a  ==  B b
+    // where
+    //    A_ij = phi^c_j(x_i)
+    //    B_ij = phi_j(x_i)
+    // and the relation we are looking for is
+    //    a = A^-1 B b
+    //
+    // for the special case of Lagrange interpolation polynomials, A_ij
+    // reduces to delta_ij, and
+    //    a_i = B_ij b_j
+    // Hence, interface_constraints(i,j)=B_ij.
+    //
+    // for the general case, where we don't have Lagrange interpolation
+    // polynomials, this is a little more complicated. Then we would evaluate
+    // at a number of points and invert the interpolation matrix A.
+    //
+    // Note, that we build up these matrices for all subfaces at once, rather
+    // than considering them separately. the reason is that we finally will
+    // want to have them in this order anyway, as this is the format we need
+    // inside deal.II
+
+    // In the following the points x_i are constructed in following order
+    // (n=degree-1)
+    // *----------*---------*
+    //     1..n   0  n+1..2n
+    // i.e. first the midpoint of the line, then the support points on subface
+    // 0 and on subface 1
+    std::vector<Point<dim-1> > constraint_points;
+    // Add midpoint
+    constraint_points.push_back (Point<dim-1> (0.5));
+
+    if (q_deg>1)
+      {
+        const unsigned int n=q_deg-1;
+        const double step=1./q_deg;
+        // subface 0
+        for (unsigned int i=1; i<=n; ++i)
+          constraint_points.push_back (
+            GeometryInfo<dim-1>::child_to_cell_coordinates(Point<dim-1>(i*step),0));
+        // subface 1
+        for (unsigned int i=1; i<=n; ++i)
+          constraint_points.push_back (
+            GeometryInfo<dim-1>::child_to_cell_coordinates(Point<dim-1>(i*step),1));
+      }
+
+    // Now construct relation between destination (child) and source (mother)
+    // dofs.
+
+    fe.interface_constraints
+    .TableBase<2,double>::reinit (fe.interface_constraints_size());
+
+    // use that the element evaluates to 1 at index 0 and along the line at
+    // zero
+    const std::vector<unsigned int> &index_map_inverse =
+      fe.poly_space.get_numbering_inverse();
+    const std::vector<unsigned int> face_index_map =
+      FE_Q_Helper::face_lexicographic_to_hierarchic_numbering<dim>(q_deg);
+    Assert(std::abs(fe.poly_space.compute_value(index_map_inverse[0],Point<dim>())
+                    - 1.) < 1e-14,
+           ExcInternalError());
+
+    for (unsigned int i=0; i<constraint_points.size(); ++i)
+      for (unsigned int j=0; j<q_deg+1; ++j)
+        {
+          Point<dim> p;
+          p[0] = constraint_points[i](0);
+          fe.interface_constraints(i,face_index_map[j]) =
+            fe.poly_space.compute_value(index_map_inverse[j], p);
+
+          // if the value is small up to round-off, then simply set it to zero
+          // to avoid unwanted fill-in of the constraint matrices (which would
+          // then increase the number of other DoFs a constrained DoF would
+          // couple to)
+          if (std::fabs(fe.interface_constraints(i,face_index_map[j])) < 1e-13)
+            fe.interface_constraints(i,face_index_map[j]) = 0;
+        }
+  }
+
+
+  template <int spacedim>
+  static
+  void initialize_constraints (const std::vector<Point<1> > &/*points*/,
+                               FE_Q_Base<PolynomialType,3,spacedim> &fe)
+  {
+    const unsigned int dim = 3;
+
+    unsigned int q_deg = fe.degree;
+    if (types_are_equal<PolynomialType,TensorProductPolynomialsBubbles<dim> >::value)
+      q_deg = fe.degree-1;
+
+    // For a detailed documentation of the interpolation see the
+    // FE_Q_Base<2>::initialize_constraints function.
+
+    // In the following the points x_i are constructed in the order as
+    // described in the documentation of the FiniteElement class (fe_base.h),
+    // i.e.
+    //   *--15--4--16--*
+    //   |      |      |
+    //   10 19  6  20  12
+    //   |      |      |
+    //   1--7---0--8---2
+    //   |      |      |
+    //   9  17  5  18  11
+    //   |      |      |
+    //   *--13--3--14--*
+    std::vector<Point<dim-1> > constraint_points;
+
+    // Add midpoint
+    constraint_points.push_back (Point<dim-1> (0.5, 0.5));
+
+    // Add midpoints of lines of "mother-face"
+    constraint_points.push_back (Point<dim-1> (0, 0.5));
+    constraint_points.push_back (Point<dim-1> (1, 0.5));
+    constraint_points.push_back (Point<dim-1> (0.5, 0));
+    constraint_points.push_back (Point<dim-1> (0.5, 1));
+
+    if (q_deg>1)
+      {
+        const unsigned int n=q_deg-1;
+        const double step=1./q_deg;
+        std::vector<Point<dim-2> > line_support_points(n);
+        for (unsigned int i=0; i<n; ++i)
+          line_support_points[i](0)=(i+1)*step;
+        Quadrature<dim-2> qline(line_support_points);
+
+        // auxiliary points in 2d
+        std::vector<Point<dim-1> > p_line(n);
+
+        // Add nodes of lines interior in the "mother-face"
+
+        // line 5: use line 9
+        QProjector<dim-1>::project_to_subface(qline, 0, 0, p_line);
+        for (unsigned int i=0; i<n; ++i)
+          constraint_points.push_back (p_line[i] + Point<dim-1> (0.5, 0));
+        // line 6: use line 10
+        QProjector<dim-1>::project_to_subface(qline, 0, 1, p_line);
+        for (unsigned int i=0; i<n; ++i)
+          constraint_points.push_back (p_line[i] + Point<dim-1> (0.5, 0));
+        // line 7: use line 13
+        QProjector<dim-1>::project_to_subface(qline, 2, 0, p_line);
+        for (unsigned int i=0; i<n; ++i)
+          constraint_points.push_back (p_line[i] + Point<dim-1> (0, 0.5));
+        // line 8: use line 14
+        QProjector<dim-1>::project_to_subface(qline, 2, 1, p_line);
+        for (unsigned int i=0; i<n; ++i)
+          constraint_points.push_back (p_line[i] + Point<dim-1> (0, 0.5));
+
+        // DoFs on bordering lines lines 9-16
+        for (unsigned int face=0; face<GeometryInfo<dim-1>::faces_per_cell; ++face)
+          for (unsigned int subface=0;
+               subface<GeometryInfo<dim-1>::max_children_per_face; ++subface)
+            {
+              QProjector<dim-1>::project_to_subface(qline, face, subface, p_line);
+              constraint_points.insert(constraint_points.end(),
+                                       p_line.begin(), p_line.end());
+            }
+
+        // Create constraints for interior nodes
+        std::vector<Point<dim-1> > inner_points(n*n);
+        for (unsigned int i=0, iy=1; iy<=n; ++iy)
+          for (unsigned int ix=1; ix<=n; ++ix)
+            inner_points[i++] = Point<dim-1> (ix*step, iy*step);
+
+        // at the moment do this for isotropic face refinement only
+        for (unsigned int child=0;
+             child<GeometryInfo<dim-1>::max_children_per_cell; ++child)
+          for (unsigned int i=0; i<inner_points.size(); ++i)
+            constraint_points.push_back (
+              GeometryInfo<dim-1>::child_to_cell_coordinates(inner_points[i], child));
+      }
+
+    // Now construct relation between destination (child) and source (mother)
+    // dofs.
+    const unsigned int pnts=(q_deg+1)*(q_deg+1);
+
+    // use that the element evaluates to 1 at index 0 and along the line at
+    // zero
+    const std::vector<unsigned int> &index_map_inverse =
+      fe.poly_space.get_numbering_inverse();
+    const std::vector<unsigned int> face_index_map =
+      FE_Q_Helper::face_lexicographic_to_hierarchic_numbering<dim>(q_deg);
+    Assert(std::abs(fe.poly_space.compute_value(index_map_inverse[0],Point<dim>())
+                    - 1.) < 1e-14,
+           ExcInternalError());
+
+    fe.interface_constraints
+    .TableBase<2,double>::reinit (fe.interface_constraints_size());
+
+    for (unsigned int i=0; i<constraint_points.size(); ++i)
+      {
+        const double interval = (double) (q_deg * 2);
+        bool mirror[dim - 1];
+        Point<dim> constraint_point;
+
+        // Eliminate FP errors in constraint points. Due to their origin, they
+        // must all be fractions of the unit interval. If we have polynomial
+        // degree 4, the refined element has 8 intervals.  Hence the
+        // coordinates must be 0, 0.125, 0.25, 0.375 etc.  Now the coordinates
+        // of the constraint points will be multiplied by the inverse of the
+        // interval size (in the example by 8).  After that the coordinates
+        // must be integral numbers. Hence a normal truncation is performed
+        // and the coordinates will be scaled back. The equal treatment of all
+        // coordinates should eliminate any FP errors.
+        for (unsigned int k=0; k<dim-1; ++k)
+          {
+            const int coord_int =
+              static_cast<int> (constraint_points[i](k) * interval + 0.25);
+            constraint_point(k) = 1.*coord_int / interval;
+
+            // The following lines of code should eliminate the problems with
+            // the Constraint-Matrix, which appeared for P>=4. The
+            // ConstraintMatrix class complained about different constraints
+            // for the same entry of the Constraint-Matrix.  Actually this
+            // difference could be attributed to FP errors, as it was in the
+            // range of 1.0e-16. These errors originate in the loss of
+            // symmetry in the FP approximation of the shape-functions.
+            // Considering a 3rd order shape function in 1D, we have
+            // N0(x)=N3(1-x) and N1(x)=N2(1-x).  For higher order polynomials
+            // the FP approximations of the shape functions do not satisfy
+            // these equations any more!  Thus in the following code
+            // everything is computed in the interval x \in [0..0.5], which is
+            // sufficient to express all values that could come out from a
+            // computation of any shape function in the full interval
+            // [0..1]. If x > 0.5 the computation is done for 1-x with the
+            // shape function N_{p-n} instead of N_n.  Hence symmetry is
+            // preserved and everything works fine...
+            //
+            // For a different explanation of the problem, see the discussion
+            // in the FiniteElement class for constraint matrices in 3d.
+            mirror[k] = (constraint_point(k) > 0.5);
+            if (mirror[k])
+              constraint_point(k) = 1.0 - constraint_point(k);
+          }
+
+        for (unsigned int j=0; j<pnts; ++j)
+          {
+            unsigned int indices[2] = { j % (q_deg+1), j / (q_deg+1) };
+
+            for (unsigned int k = 0; k<2; ++k)
+              if (mirror[k])
+                indices[k] = q_deg - indices[k];
+
+            const unsigned int
+            new_index = indices[1] * (q_deg + 1) + indices[0];
+
+            fe.interface_constraints(i,face_index_map[j]) =
+              fe.poly_space.compute_value (index_map_inverse[new_index],
+                                           constraint_point);
+
+            // if the value is small up to round-off, then simply set it to
+            // zero to avoid unwanted fill-in of the constraint matrices
+            // (which would then increase the number of other DoFs a
+            // constrained DoF would couple to)
+            if (std::fabs(fe.interface_constraints(i,face_index_map[j])) < 1e-13)
+              fe.interface_constraints(i,face_index_map[j]) = 0;
+          }
+      }
+  }
+};
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+FE_Q_Base<PolynomialType,dim,spacedim>::FE_Q_Base
+(const PolynomialType         &poly_space,
+ const FiniteElementData<dim> &fe_data,
+ const std::vector<bool>      &restriction_is_additive_flags)
+  :
+  FE_Poly<PolynomialType,dim,spacedim>(poly_space, fe_data, restriction_is_additive_flags,
+                                       std::vector<ComponentMask>(1, std::vector<bool>(1,true))),
+  q_degree (types_are_equal<PolynomialType, TensorProductPolynomialsBubbles<dim> >::value
+            ?this->degree-1
+            :this->degree)
+{}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Q_Base<PolynomialType,dim,spacedim>::initialize (const std::vector<Point<1> > &points)
+{
+  Assert (points[0][0] == 0,
+          ExcMessage ("The first support point has to be zero."));
+  Assert (points.back()[0] == 1,
+          ExcMessage ("The last support point has to be one."));
+
+  // distinguish q/q_dg0 case: need to be flexible enough to allow more
+  // degrees of freedom than there are FE_Q degrees of freedom for derived
+  // class FE_Q_DG0 that otherwise shares 95% of the code.
+  const unsigned int q_dofs_per_cell = Utilities::fixed_power<dim>(q_degree+1);
+  Assert(q_dofs_per_cell == this->dofs_per_cell ||
+         q_dofs_per_cell+1 == this->dofs_per_cell ||
+         q_dofs_per_cell+dim == this->dofs_per_cell , ExcInternalError());
+
+  {
+    std::vector<unsigned int> renumber(q_dofs_per_cell);
+    const FiniteElementData<dim> fe(get_dpo_vector(q_degree),1,
+                                    q_degree);
+    FETools::hierarchic_to_lexicographic_numbering (fe, renumber);
+    for (unsigned int i= q_dofs_per_cell; i<this->dofs_per_cell; ++i)
+      renumber.push_back(i);
+    this->poly_space.set_numbering(renumber);
+  }
+
+  // finally fill in support points on cell and face
+  initialize_unit_support_points (points);
+  initialize_unit_face_support_points (points);
+
+  // reinit constraints
+  initialize_constraints (points);
+
+  // do not initialize embedding and restriction here. these matrices are
+  // initialized on demand in get_restriction_matrix and
+  // get_prolongation_matrix
+
+  this->initialize_quad_dof_index_permutation();
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Q_Base<PolynomialType,dim,spacedim>::
+get_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                          FullMatrix<double>                &interpolation_matrix) const
+{
+  // go through the list of elements we can interpolate from
+  if (const FE_Q_Base<PolynomialType,dim,spacedim> *source_fe
+      = dynamic_cast<const FE_Q_Base<PolynomialType,dim,spacedim>*>(&x_source_fe))
+    {
+      // ok, source is a Q element, so we will be able to do the work
+      Assert (interpolation_matrix.m() == this->dofs_per_cell,
+              ExcDimensionMismatch (interpolation_matrix.m(),
+                                    this->dofs_per_cell));
+      Assert (interpolation_matrix.n() == x_source_fe.dofs_per_cell,
+              ExcDimensionMismatch (interpolation_matrix.m(),
+                                    x_source_fe.dofs_per_cell));
+
+      // only evaluate Q dofs
+      const unsigned int q_dofs_per_cell = Utilities::fixed_power<dim>(q_degree+1);
+      const unsigned int source_q_dofs_per_cell = Utilities::fixed_power<dim>(source_fe->degree+1);
+
+      // evaluation is simply done by evaluating the other FE's basis functions on
+      // the unit support points (FE_Q has the property that the cell
+      // interpolation matrix is a unit matrix, so no need to evaluate it and
+      // invert it)
+      for (unsigned int j=0; j<q_dofs_per_cell; ++j)
+        {
+          // read in a point on this cell and evaluate the shape functions there
+          const Point<dim> p = this->unit_support_points[j];
+
+          // FE_Q element evaluates to 1 in unit support point and to zero in all
+          // other points by construction
+          Assert(std::abs(this->poly_space.compute_value (j, p)-1.)<1e-13,
+                 ExcInternalError());
+
+          for (unsigned int i=0; i<source_q_dofs_per_cell; ++i)
+            interpolation_matrix(j,i) = source_fe->poly_space.compute_value (i, p);
+        }
+
+      // for FE_Q_DG0, add one last row of identity
+      if (q_dofs_per_cell < this->dofs_per_cell)
+        {
+          AssertDimension(source_q_dofs_per_cell+1, source_fe->dofs_per_cell);
+          for (unsigned int i=0; i<source_q_dofs_per_cell; ++i)
+            interpolation_matrix(q_dofs_per_cell, i) = 0.;
+          for (unsigned int j=0; j<q_dofs_per_cell; ++j)
+            interpolation_matrix(j, source_q_dofs_per_cell) = 0.;
+          interpolation_matrix(q_dofs_per_cell, source_q_dofs_per_cell) = 1.;
+        }
+
+      // cut off very small values
+      const double eps = 2e-13*q_degree*dim;
+      for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+        for (unsigned int j=0; j<source_fe->dofs_per_cell; ++j)
+          if (std::fabs(interpolation_matrix(i,j)) < eps)
+            interpolation_matrix(i,j) = 0.;
+
+      // make sure that the row sum of each of the matrices is 1 at this
+      // point. this must be so since the shape functions sum up to 1
+      for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+        {
+          double sum = 0.;
+          for (unsigned int j=0; j<source_fe->dofs_per_cell; ++j)
+            sum += interpolation_matrix(i,j);
+
+          Assert (std::fabs(sum-1) < eps, ExcInternalError());
+        }
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&x_source_fe))
+    {
+      // the element we want to interpolate from is an FE_Nothing. this
+      // element represents a function that is constant zero and has no
+      // degrees of freedom, so the interpolation is simply a multiplication
+      // with a n_dofs x 0 matrix. there is nothing to do here
+
+      // we would like to verify that the number of rows and columns of
+      // the matrix equals this->dofs_per_cell and zero. unfortunately,
+      // whenever we do FullMatrix::reinit(m,0), it sets both rows and
+      // columns to zero, instead of m and zero. thus, only test the
+      // number of columns
+      Assert (interpolation_matrix.n() == x_source_fe.dofs_per_cell,
+              ExcDimensionMismatch (interpolation_matrix.m(),
+                                    x_source_fe.dofs_per_cell));
+
+    }
+  else
+    AssertThrow (false,
+                 (typename FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented()));
+
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Q_Base<PolynomialType,dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source_fe,
+                               FullMatrix<double>                &interpolation_matrix) const
+{
+  Assert (dim > 1, ExcImpossibleInDim(1));
+  get_subface_interpolation_matrix (source_fe, numbers::invalid_unsigned_int,
+                                    interpolation_matrix);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Q_Base<PolynomialType,dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int                 subface,
+                                  FullMatrix<double>                &interpolation_matrix) const
+{
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // see if source is a Q element
+  if (const FE_Q_Base<PolynomialType,dim,spacedim> *source_fe
+      = dynamic_cast<const FE_Q_Base<PolynomialType,dim,spacedim> *>(&x_source_fe))
+    {
+      // have this test in here since a table of size 2x0 reports its size as
+      // 0x0
+      Assert (interpolation_matrix.n() == this->dofs_per_face,
+              ExcDimensionMismatch (interpolation_matrix.n(),
+                                    this->dofs_per_face));
+
+      // Make sure that the element for which the DoFs should be constrained
+      // is the one with the higher polynomial degree.  Actually the procedure
+      // will work also if this assertion is not satisfied. But the matrices
+      // produced in that case might lead to problems in the hp procedures,
+      // which use this method.
+      Assert (this->dofs_per_face <= source_fe->dofs_per_face,
+              (typename FiniteElement<dim,spacedim>::
+               ExcInterpolationNotImplemented ()));
+
+      // generate a point on this cell and evaluate the shape functions there
+      const Quadrature<dim-1>
+      quad_face_support (source_fe->get_unit_face_support_points ());
+
+      // Rule of thumb for FP accuracy, that can be expected for a given
+      // polynomial degree.  This value is used to cut off values close to
+      // zero.
+      double eps = 2e-13*q_degree*(dim-1);
+
+      // compute the interpolation matrix by simply taking the value at the
+      // support points.
+//TODO: Verify that all faces are the same with respect to
+// these support points. Furthermore, check if something has to
+// be done for the face orientation flag in 3D.
+      const Quadrature<dim> subface_quadrature
+        = subface == numbers::invalid_unsigned_int
+          ?
+          QProjector<dim>::project_to_face (quad_face_support, 0)
+          :
+          QProjector<dim>::project_to_subface (quad_face_support, 0, subface);
+      for (unsigned int i=0; i<source_fe->dofs_per_face; ++i)
+        {
+          const Point<dim> &p = subface_quadrature.point (i);
+
+          for (unsigned int j=0; j<this->dofs_per_face; ++j)
+            {
+              double matrix_entry = this->shape_value (this->face_to_cell_index(j, 0), p);
+
+              // Correct the interpolated value. I.e. if it is close to 1 or
+              // 0, make it exactly 1 or 0. Unfortunately, this is required to
+              // avoid problems with higher order elements.
+              if (std::fabs (matrix_entry - 1.0) < eps)
+                matrix_entry = 1.0;
+              if (std::fabs (matrix_entry) < eps)
+                matrix_entry = 0.0;
+
+              interpolation_matrix(i,j) = matrix_entry;
+            }
+        }
+
+      // make sure that the row sum of each of the matrices is 1 at this
+      // point. this must be so since the shape functions sum up to 1
+      for (unsigned int j=0; j<source_fe->dofs_per_face; ++j)
+        {
+          double sum = 0.;
+
+          for (unsigned int i=0; i<this->dofs_per_face; ++i)
+            sum += interpolation_matrix(j,i);
+
+          Assert (std::fabs(sum-1) < eps, ExcInternalError());
+        }
+    }
+  else if (dynamic_cast<const FE_Nothing<dim> *>(&x_source_fe) != 0)
+    {
+      // nothing to do here, the FE_Nothing has no degrees of freedom anyway
+    }
+  else
+    AssertThrow (false,(typename FiniteElement<dim,spacedim>::
+                        ExcInterpolationNotImplemented()));
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+bool
+FE_Q_Base<PolynomialType,dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Q_Base<PolynomialType,dim,spacedim>::
+hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // we can presently only compute these identities if both FEs are FE_Qs or
+  // if the other one is an FE_Nothing. in the first case, there should be
+  // exactly one single DoF of each FE at a vertex, and they should have
+  // identical value
+  if (dynamic_cast<const FE_Q_Base<PolynomialType,dim,spacedim>*>(&fe_other) != 0)
+    {
+      return
+        std::vector<std::pair<unsigned int, unsigned int> >
+        (1, std::make_pair (0U, 0U));
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe_other) != 0)
+    {
+      // the FE_Nothing has no degrees of freedom, so there are no
+      // equivalencies to be recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else if (fe_other.dofs_per_face == 0)
+    {
+      // if the other element has no elements on faces at all,
+      // then it would be impossible to enforce any kind of
+      // continuity even if we knew exactly what kind of element
+      // we have -- simply because the other element declares
+      // that it is discontinuous because it has no DoFs on
+      // its faces. in that case, just state that we have no
+      // constraints to declare
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Q_Base<PolynomialType,dim,spacedim>::
+hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // we can presently only compute these identities if both FEs are FE_Qs or
+  // if the other one is an FE_Nothing
+  if (const FE_Q_Base<PolynomialType,dim,spacedim> *fe_q_other
+      = dynamic_cast<const FE_Q_Base<PolynomialType,dim,spacedim>*>(&fe_other))
+    {
+      // dofs are located along lines, so two dofs are identical if they are
+      // located at identical positions. if we had only equidistant points, we
+      // could simply check for similarity like (i+1)*q == (j+1)*p, but we
+      // might have other support points (e.g. Gauss-Lobatto
+      // points). Therefore, read the points in unit_support_points for the
+      // first coordinate direction. We take the lexicographic ordering of the
+      // points in the first direction (i.e., x-direction), which we access
+      // between index 1 and p-1 (index 0 and p are vertex dofs).
+      const unsigned int p = this->degree;
+      const unsigned int q = fe_q_other->degree;
+
+      std::vector<std::pair<unsigned int, unsigned int> > identities;
+
+      const std::vector<unsigned int> &index_map_inverse=
+        this->poly_space.get_numbering_inverse();
+      const std::vector<unsigned int> &index_map_inverse_other=
+        fe_q_other->poly_space.get_numbering_inverse();
+
+      for (unsigned int i=0; i<p-1; ++i)
+        for (unsigned int j=0; j<q-1; ++j)
+          if (std::fabs(this->unit_support_points[index_map_inverse[i+1]][0]-
+                        fe_q_other->unit_support_points[index_map_inverse_other[j+1]][0])
+              < 1e-14)
+            identities.push_back (std::make_pair(i,j));
+
+      return identities;
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe_other) != 0)
+    {
+      // the FE_Nothing has no degrees of freedom, so there are no
+      // equivalencies to be recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else if (fe_other.dofs_per_face == 0)
+    {
+      // if the other element has no elements on faces at all,
+      // then it would be impossible to enforce any kind of
+      // continuity even if we knew exactly what kind of element
+      // we have -- simply because the other element declares
+      // that it is discontinuous because it has no DoFs on
+      // its faces. in that case, just state that we have no
+      // constraints to declare
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Q_Base<PolynomialType,dim,spacedim>::
+hp_quad_dof_identities (const FiniteElement<dim,spacedim>        &fe_other) const
+{
+  // we can presently only compute these identities if both FEs are FE_Qs or
+  // if the other one is an FE_Nothing
+  if (const FE_Q_Base<PolynomialType,dim,spacedim> *fe_q_other
+      = dynamic_cast<const FE_Q_Base<PolynomialType,dim,spacedim>*>(&fe_other))
+    {
+      // this works exactly like the line case above, except that now we have
+      // to have two indices i1, i2 and j1, j2 to characterize the dofs on the
+      // face of each of the finite elements. since they are ordered
+      // lexicographically along the first line and we have a tensor product,
+      // the rest is rather straightforward
+      const unsigned int p = this->degree;
+      const unsigned int q = fe_q_other->degree;
+
+      std::vector<std::pair<unsigned int, unsigned int> > identities;
+
+      const std::vector<unsigned int> &index_map_inverse=
+        this->poly_space.get_numbering_inverse();
+      const std::vector<unsigned int> &index_map_inverse_other=
+        fe_q_other->poly_space.get_numbering_inverse();
+
+      for (unsigned int i1=0; i1<p-1; ++i1)
+        for (unsigned int i2=0; i2<p-1; ++i2)
+          for (unsigned int j1=0; j1<q-1; ++j1)
+            for (unsigned int j2=0; j2<q-1; ++j2)
+              if ((std::fabs(this->unit_support_points[index_map_inverse[i1+1]][0]-
+                             fe_q_other->unit_support_points[index_map_inverse_other[j1+1]][0])
+                   < 1e-14)
+                  &&
+                  (std::fabs(this->unit_support_points[index_map_inverse[i2+1]][0]-
+                             fe_q_other->unit_support_points[index_map_inverse_other[j2+1]][0])
+                   < 1e-14))
+                identities.push_back (std::make_pair(i1*(p-1)+i2,
+                                                     j1*(q-1)+j2));
+
+      return identities;
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe_other) != 0)
+    {
+      // the FE_Nothing has no degrees of freedom, so there are no
+      // equivalencies to be recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else if (fe_other.dofs_per_face == 0)
+    {
+      // if the other element has no elements on faces at all,
+      // then it would be impossible to enforce any kind of
+      // continuity even if we knew exactly what kind of element
+      // we have -- simply because the other element declares
+      // that it is discontinuous because it has no DoFs on
+      // its faces. in that case, just state that we have no
+      // constraints to declare
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_Q_Base<PolynomialType,dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  if (const FE_Q_Base<PolynomialType,dim,spacedim> *fe_q_other
+      = dynamic_cast<const FE_Q_Base<PolynomialType,dim,spacedim>*>(&fe_other))
+    {
+      if (this->degree < fe_q_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_q_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void FE_Q_Base<PolynomialType,dim,spacedim>::initialize_unit_support_points
+(const std::vector<Point<1> > &points)
+{
+  const std::vector<unsigned int> &index_map_inverse=
+    this->poly_space.get_numbering_inverse();
+
+  Quadrature<1> support_1d(points);
+  Quadrature<dim> support_quadrature(support_1d);
+  this->unit_support_points.resize(support_quadrature.size());
+
+  for (unsigned int k=0; k<support_quadrature.size(); k++)
+    this->unit_support_points[index_map_inverse[k]] = support_quadrature.point(k);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void FE_Q_Base<PolynomialType,dim,spacedim>::initialize_unit_face_support_points
+(const std::vector<Point<1> > &points)
+{
+  // no faces in 1d, so nothing to do
+  if (dim == 1)
+    return;
+
+  const unsigned int codim = dim-1;
+  this->unit_face_support_points.resize(Utilities::fixed_power<codim>(q_degree+1));
+
+  // find renumbering of faces and assign from values of quadrature
+  std::vector<unsigned int> face_index_map =
+    FE_Q_Helper::face_lexicographic_to_hierarchic_numbering<dim>(q_degree);
+  Quadrature<1> support_1d(points);
+  Quadrature<codim> support_quadrature(support_1d);
+  this->unit_face_support_points.resize(support_quadrature.size());
+
+  for (unsigned int k=0; k<support_quadrature.size(); k++)
+    this->unit_face_support_points[face_index_map[k]] = support_quadrature.point(k);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Q_Base<PolynomialType,dim,spacedim>::initialize_quad_dof_index_permutation ()
+{
+  // for 1D and 2D, do nothing
+  if (dim < 3)
+    return;
+
+  Assert (this->adjust_quad_dof_index_for_face_orientation_table.n_elements()==8*this->dofs_per_quad,
+          ExcInternalError());
+
+  const unsigned int n=q_degree-1;
+  Assert(n*n==this->dofs_per_quad, ExcInternalError());
+
+  // alias for the table to fill
+  Table<2,int> &data=this->adjust_quad_dof_index_for_face_orientation_table;
+
+  // the dofs on a face are connected to a n x n matrix. for example, for
+  // degree==4 we have the following dofs on a quad
+
+  //  ___________
+  // |           |
+  // |  6  7  8  |
+  // |           |
+  // |  3  4  5  |
+  // |           |
+  // |  0  1  2  |
+  // |___________|
+  //
+  // we have dof_no=i+n*j with index i in x-direction and index j in
+  // y-direction running from 0 to n-1.  to extract i and j we can use
+  // i=dof_no%n and j=dof_no/n. i and j can then be used to construct the
+  // rotated and mirrored numbers.
+
+
+  for (unsigned int local=0; local<this->dofs_per_quad; ++local)
+    // face support points are in lexicographic ordering with x running
+    // fastest. invert that (y running fastest)
+    {
+      unsigned int i=local%n,
+                   j=local/n;
+
+      // face_orientation=false, face_flip=false, face_rotation=false
+      data(local,0)=j       + i      *n - local;
+      // face_orientation=false, face_flip=false, face_rotation=true
+      data(local,1)=i       + (n-1-j)*n - local;
+      // face_orientation=false, face_flip=true,  face_rotation=false
+      data(local,2)=(n-1-j) + (n-1-i)*n - local;
+      // face_orientation=false, face_flip=true,  face_rotation=true
+      data(local,3)=(n-1-i) + j      *n - local;
+      // face_orientation=true,  face_flip=false, face_rotation=false
+      data(local,4)=0;
+      // face_orientation=true,  face_flip=false, face_rotation=true
+      data(local,5)=j       + (n-1-i)*n - local;
+      // face_orientation=true,  face_flip=true,  face_rotation=false
+      data(local,6)=(n-1-i) + (n-1-j)*n - local;
+      // face_orientation=true,  face_flip=true,  face_rotation=true
+      data(local,7)=(n-1-j) + i      *n - local;
+    }
+
+  // additionally initialize reordering of line dofs
+  for (unsigned int i=0; i<this->dofs_per_line; ++i)
+    this->adjust_line_dof_index_for_line_orientation_table[i]=this->dofs_per_line-1-i - i;
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+unsigned int
+FE_Q_Base<PolynomialType,dim,spacedim>::
+face_to_cell_index (const unsigned int face_index,
+                    const unsigned int face,
+                    const bool face_orientation,
+                    const bool face_flip,
+                    const bool face_rotation) const
+{
+  Assert (face_index < this->dofs_per_face,
+          ExcIndexRange(face_index, 0, this->dofs_per_face));
+  Assert (face < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange(face, 0, GeometryInfo<dim>::faces_per_cell));
+
+//TODO: we could presumably solve the 3d case below using the
+// adjust_quad_dof_index_for_face_orientation_table field. for the
+// 2d case, we can't use adjust_line_dof_index_for_line_orientation_table
+// since that array is empty (presumably because we thought that
+// there are no flipped edges in 2d, but these can happen in
+// DoFTools::make_periodicity_constraints, for example). so we
+// would need to either fill this field, or rely on derived classes
+// implementing this function, as we currently do
+
+  // we need to distinguish between DoFs on vertices, lines and in 3d quads.
+  // do so in a sequence of if-else statements
+  if (face_index < this->first_face_line_index)
+    // DoF is on a vertex
+    {
+      // get the number of the vertex on the face that corresponds to this DoF,
+      // along with the number of the DoF on this vertex
+      const unsigned int face_vertex         = face_index / this->dofs_per_vertex;
+      const unsigned int dof_index_on_vertex = face_index % this->dofs_per_vertex;
+
+      // then get the number of this vertex on the cell and translate
+      // this to a DoF number on the cell
+      return (GeometryInfo<dim>::face_to_cell_vertices(face, face_vertex,
+                                                       face_orientation,
+                                                       face_flip,
+                                                       face_rotation)
+              * this->dofs_per_vertex
+              +
+              dof_index_on_vertex);
+    }
+  else if (face_index < this->first_face_quad_index)
+    // DoF is on a face
+    {
+      // do the same kind of translation as before. we need to only consider
+      // DoFs on the lines, i.e., ignoring those on the vertices
+      const unsigned int index = face_index - this->first_face_line_index;
+
+      const unsigned int face_line         = index / this->dofs_per_line;
+      const unsigned int dof_index_on_line = index % this->dofs_per_line;
+
+      // we now also need to adjust the line index for the case of
+      // face orientation, face flips, etc
+      unsigned int adjusted_dof_index_on_line;
+      switch (dim)
+        {
+        case 1:
+          Assert (false, ExcInternalError());
+
+        case 2:
+          // in 2d, only face_flip has a meaning. if it is set, consider
+          // dofs in reverse order
+          if (face_flip == false)
+            adjusted_dof_index_on_line = dof_index_on_line;
+          else
+            adjusted_dof_index_on_line = this->dofs_per_line - dof_index_on_line - 1;
+          break;
+
+        case 3:
+          // in 3d, things are difficult. someone will have to think
+          // about how this code here should look like, by drawing a bunch
+          // of pictures of how all the faces can look like with the various
+          // flips and rotations.
+          //
+          // that said, the Q2 case is easy enough to implement, as is the case
+          // where everything is in standard orientation
+          Assert ((this->dofs_per_line <= 1) ||
+                  ((face_orientation == true) &&
+                   (face_flip == false) &&
+                   (face_rotation == false)),
+                  ExcNotImplemented());
+          adjusted_dof_index_on_line = dof_index_on_line;
+          break;
+        }
+
+      return (this->first_line_index
+              + GeometryInfo<dim>::face_to_cell_lines(face, face_line,
+                                                      face_orientation,
+                                                      face_flip,
+                                                      face_rotation)
+              * this->dofs_per_line
+              +
+              adjusted_dof_index_on_line);
+    }
+  else
+    // DoF is on a quad
+    {
+      Assert (dim >= 3, ExcInternalError());
+
+      // ignore vertex and line dofs
+      const unsigned int index = face_index - this->first_face_quad_index;
+
+      // the same is true here as above for the 3d case -- someone will
+      // just have to draw a bunch of pictures. in the meantime,
+      // we can implement the Q2 case in which it is simple
+      Assert ((this->dofs_per_quad <= 1) ||
+              ((face_orientation == true) &&
+               (face_flip == false) &&
+               (face_rotation == false)),
+              ExcNotImplemented());
+      return (this->first_quad_index
+              + face * this->dofs_per_quad
+              + index);
+    }
+}
+
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+std::vector<unsigned int>
+FE_Q_Base<PolynomialType,dim,spacedim>::get_dpo_vector(const unsigned int deg)
+{
+  AssertThrow(deg>0,ExcMessage("FE_Q needs to be of degree > 0."));
+  std::vector<unsigned int> dpo(dim+1, 1U);
+  for (unsigned int i=1; i<dpo.size(); ++i)
+    dpo[i]=dpo[i-1]*(deg-1);
+  return dpo;
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+void
+FE_Q_Base<PolynomialType,dim,spacedim>::initialize_constraints
+(const std::vector<Point<1> > &points)
+{
+  Implementation::initialize_constraints (points, *this);
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+const FullMatrix<double> &
+FE_Q_Base<PolynomialType,dim,spacedim>
+::get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Prolongation matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  // initialization upon first request
+  if (this->prolongation[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      // if matrix got updated while waiting for the lock
+      if (this->prolongation[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->prolongation[refinement_case-1][child];
+
+      // distinguish q/q_dg0 case: only treat Q dofs first
+      const unsigned int q_dofs_per_cell = Utilities::fixed_power<dim>(q_degree+1);
+
+      // compute the interpolation matrices in much the same way as we do for
+      // the constraints. it's actually simpler here, since we don't have this
+      // weird renumbering stuff going on. The trick is again that we the
+      // interpolation matrix is formed by a permutation of the indices of the
+      // cell matrix. The value eps is used a threshold to decide when certain
+      // evaluations of the Lagrange polynomials are zero or one.
+      const double eps = 1e-15*q_degree*dim;
+
+#ifdef DEBUG
+      // in DEBUG mode, check that the evaluation of support points in the
+      // current numbering gives the identity operation
+      for (unsigned int i=0; i<q_dofs_per_cell; ++i)
+        {
+          Assert (std::fabs (1.-this->poly_space.compute_value
+                             (i, this->unit_support_points[i])) < eps,
+                  ExcInternalError());
+          for (unsigned int j=0; j<q_dofs_per_cell; ++j)
+            if (j!=i)
+              Assert (std::fabs (this->poly_space.compute_value
+                                 (i, this->unit_support_points[j])) < eps,
+                      ExcInternalError());
+        }
+#endif
+
+      // to efficiently evaluate the polynomial at the subcell, make use of
+      // the tensor product structure of this element and only evaluate 1D
+      // information from the polynomial. This makes the cost of this function
+      // almost negligible also for high order elements
+      const unsigned int dofs1d = q_degree+1;
+      std::vector<Table<2,double> >
+      subcell_evaluations (dim, Table<2,double>(dofs1d, dofs1d));
+      const std::vector<unsigned int> &index_map_inverse =
+        this->poly_space.get_numbering_inverse();
+
+      // helper value: step size how to walk through diagonal and how many
+      // points we have left apart from the first dimension
+      unsigned int step_size_diag = 0;
+      {
+        unsigned int factor = 1;
+        for (unsigned int d=0; d<dim; ++d)
+          {
+            step_size_diag += factor;
+            factor *= dofs1d;
+          }
+      }
+
+      FullMatrix<double> prolongate (this->dofs_per_cell, this->dofs_per_cell);
+
+      // go through the points in diagonal to capture variation in all
+      // directions simultaneously
+      for (unsigned int j=0; j<dofs1d; ++j)
+        {
+          const unsigned int diag_comp = index_map_inverse[j*step_size_diag];
+          const Point<dim> p_subcell = this->unit_support_points[diag_comp];
+          const Point<dim> p_cell =
+            GeometryInfo<dim>::child_to_cell_coordinates (p_subcell, child,
+                                                          refinement_case);
+          for (unsigned int i=0; i<dofs1d; ++i)
+            for (unsigned int d=0; d<dim; ++d)
+              {
+                // evaluate along line where only x is different from zero
+                Point<dim> point;
+                point[0] = p_cell[d];
+                const double cell_value =
+                  this->poly_space.compute_value(index_map_inverse[i], point);
+
+                // cut off values that are too small. note that we have here
+                // Lagrange interpolation functions, so they should be zero at
+                // almost all points, and one at the others, at least on the
+                // subcells. so set them to their exact values
+                //
+                // the actual cut-off value is somewhat fuzzy, but it works
+                // for 2e-13*degree*dim (see above), which is kind of
+                // reasonable given that we compute the values of the
+                // polynomials via an degree-step recursion and then multiply
+                // the 1d-values. this gives us a linear growth in degree*dim,
+                // times a small constant.
+                //
+                // the embedding matrix is given by applying the inverse of
+                // the subcell matrix on the cell_interpolation matrix. since
+                // the subcell matrix is actually only a permutation vector,
+                // all we need to do is to switch the rows we write the data
+                // into. moreover, cut off very small values here
+                if (std::fabs(cell_value) < eps)
+                  subcell_evaluations[d](j,i) = 0;
+                else
+                  subcell_evaluations[d](j,i) = cell_value;
+              }
+        }
+
+      // now expand from 1D info. block innermost dimension (x_0) in order to
+      // avoid difficult checks at innermost loop
+      unsigned int j_indices[dim];
+      FE_Q_Helper::zero_indices<dim> (j_indices);
+      for (unsigned int j=0; j<q_dofs_per_cell; j+=dofs1d)
+        {
+          unsigned int i_indices[dim];
+          FE_Q_Helper::zero_indices<dim> (i_indices);
+          for (unsigned int i=0; i<q_dofs_per_cell; i+=dofs1d)
+            {
+              double val_extra_dim = 1.;
+              for (unsigned int d=1; d<dim; ++d)
+                val_extra_dim *= subcell_evaluations[d](j_indices[d-1],
+                                                        i_indices[d-1]);
+
+              // innermost sum where we actually compute. the same as
+              // prolongate(j,i) = this->poly_space.compute_value (i, p_cell)
+              for (unsigned int jj=0; jj<dofs1d; ++jj)
+                {
+                  const unsigned int j_ind = index_map_inverse[j+jj];
+                  for (unsigned int ii=0; ii<dofs1d; ++ii)
+                    prolongate(j_ind,index_map_inverse[i+ii])
+                      = val_extra_dim * subcell_evaluations[0](jj,ii);
+                }
+
+              // update indices that denote the tensor product position. a bit
+              // fuzzy and therefore not done for innermost x_0 direction
+              FE_Q_Helper::increment_indices<dim> (i_indices, dofs1d);
+            }
+          Assert (i_indices[dim-1] == 1, ExcInternalError());
+          FE_Q_Helper::increment_indices<dim> (j_indices, dofs1d);
+        }
+
+      // the discontinuous node is simply mapped on the discontinuous node on
+      // the child element
+      if (q_dofs_per_cell < this->dofs_per_cell)
+        prolongate(q_dofs_per_cell,q_dofs_per_cell) = 1.;
+
+      // and make sure that the row sum is 1. this must be so since for this
+      // element, the shape functions add up to one
+#ifdef DEBUG
+      for (unsigned int row=0; row<this->dofs_per_cell; ++row)
+        {
+          double sum = 0;
+          for (unsigned int col=0; col<this->dofs_per_cell; ++col)
+            sum += prolongate(row,col);
+          Assert (std::fabs(sum-1.) < eps, ExcInternalError());
+        }
+#endif
+
+      // swap matrices
+      prolongate.swap(const_cast<FullMatrix<double> &>
+                      (this->prolongation[refinement_case-1][child]));
+    }
+
+  // finally return the matrix
+  return this->prolongation[refinement_case-1][child];
+}
+
+
+
+template <class PolynomialType, int dim, int spacedim>
+const FullMatrix<double> &
+FE_Q_Base<PolynomialType,dim,spacedim>
+::get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Restriction matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  // initialization upon first request
+  if (this->restriction[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      // if matrix got updated while waiting for the lock...
+      if (this->restriction[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->restriction[refinement_case-1][child];
+
+      FullMatrix<double> restriction(this->dofs_per_cell, this->dofs_per_cell);
+      // distinguish q/q_dg0 case
+      const unsigned int q_dofs_per_cell = Utilities::fixed_power<dim>(q_degree+1);
+
+      // for these Lagrange interpolation polynomials, construction of the
+      // restriction matrices is relatively simple. the reason is that the
+      // interpolation points on the mother cell are (except for the case with
+      // arbitrary nonequidistant nodes) always also interpolation points for
+      // some shape function on one or the other child, because we have chosen
+      // equidistant Lagrange interpolation points for the polynomials
+      //
+      // so the only thing we have to find out is: for each shape function on
+      // the mother cell, which is the child cell (possibly more than one) on
+      // which it is located, and which is the corresponding shape function
+      // there. rather than doing it for the shape functions on the mother
+      // cell, we take the interpolation points there
+      //
+      // note that the interpolation point of a shape function can be on the
+      // boundary between subcells. in that case, restriction from children to
+      // mother may produce two or more entries for a dof on the mother
+      // cell. however, this doesn't hurt: since the element is continuous,
+      // the contribution from each child should yield the same result, and
+      // since the element is non-additive we just overwrite one value
+      // (compute on one child) by the same value (compute on a later child),
+      // so we don't have to care about this
+
+      const double eps = 1e-15*q_degree*dim;
+      const std::vector<unsigned int> &index_map_inverse =
+        this->poly_space.get_numbering_inverse();
+
+      const unsigned int dofs1d = q_degree+1;
+      std::vector<Tensor<1,dim> > evaluations1d (dofs1d);
+
+      restriction.reinit(this->dofs_per_cell, this->dofs_per_cell);
+
+      for (unsigned int i=0; i<q_dofs_per_cell; ++i)
+        {
+          unsigned int mother_dof = index_map_inverse[i];
+          const Point<dim> p_cell = this->unit_support_points[mother_dof];
+
+          // check whether this interpolation point is inside this child cell
+          const Point<dim> p_subcell
+            = GeometryInfo<dim>::cell_to_child_coordinates (p_cell, child,
+                                                            refinement_case);
+          if (GeometryInfo<dim>::is_inside_unit_cell (p_subcell))
+            {
+              // same logic as in initialize_embedding to evaluate the
+              // polynomial faster than from the tensor product: since we
+              // evaluate all polynomials, it is much faster to just compute
+              // the 1D values for all polynomials before and then get the
+              // dim-data.
+              for (unsigned int j=0; j<dofs1d; ++j)
+                for (unsigned int d=0; d<dim; ++d)
+                  {
+                    Point<dim> point;
+                    point[0] = p_subcell[d];
+                    evaluations1d[j][d] =
+                      this->poly_space.compute_value(index_map_inverse[j], point);
+                  }
+              unsigned int j_indices[dim];
+              FE_Q_Helper::zero_indices<dim> (j_indices);
+              double sum_check = 0;
+              for (unsigned int j = 0; j<q_dofs_per_cell; j += dofs1d)
+                {
+                  double val_extra_dim = 1.;
+                  for (unsigned int d=1; d<dim; ++d)
+                    val_extra_dim *= evaluations1d[j_indices[d-1]][d];
+                  for (unsigned int jj=0; jj<dofs1d; ++jj)
+                    {
+
+                      // find the child shape function(s) corresponding to
+                      // this point. Usually this is just one function;
+                      // however, when we use FE_Q on arbitrary nodes a parent
+                      // support point might not be a child support point, and
+                      // then we will get more than one nonzero value per
+                      // row. Still, the values should sum up to 1
+                      const double val
+                        = val_extra_dim * evaluations1d[jj][0];
+                      const unsigned int child_dof =
+                        index_map_inverse[j+jj];
+                      if (std::fabs (val-1.) < eps)
+                        restriction(mother_dof,child_dof)=1.;
+                      else if (std::fabs(val) > eps)
+                        restriction(mother_dof,child_dof)=val;
+                      sum_check += val;
+                    }
+                  FE_Q_Helper::increment_indices<dim> (j_indices, dofs1d);
+                }
+              Assert (std::fabs(sum_check-1) < eps,
+                      ExcInternalError());
+            }
+
+          // part for FE_Q_DG0
+          if (q_dofs_per_cell < this->dofs_per_cell)
+            restriction(this->dofs_per_cell-1,this->dofs_per_cell-1) =
+              1./GeometryInfo<dim>::n_children(RefinementCase<dim>(refinement_case));
+        }
+
+      // swap matrices
+      restriction.swap(const_cast<FullMatrix<double> &>
+                       (this->restriction[refinement_case-1][child]));
+    }
+
+  return this->restriction[refinement_case-1][child];
+}
+
+
+
+//---------------------------------------------------------------------------
+// Data field initialization
+//---------------------------------------------------------------------------
+
+
+template <class PolynomialType, int dim, int spacedim>
+bool
+FE_Q_Base<PolynomialType,dim,spacedim>::has_support_on_face
+(const unsigned int shape_index,
+ const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  // in 1d, things are simple. since there is only one degree of freedom per
+  // vertex in this class, the first is on vertex 0 (==face 0 in some sense),
+  // the second on face 1:
+  if (dim == 1)
+    return (((shape_index == 0) && (face_index == 0)) ||
+            ((shape_index == 1) && (face_index == 1)));
+
+  // first, special-case interior shape functions, since they have no support
+  // no-where on the boundary
+  if (((dim==2) && (shape_index>=this->first_quad_index))
+      ||
+      ((dim==3) && (shape_index>=this->first_hex_index)))
+    return false;
+
+  // let's see whether this is a vertex
+  if (shape_index < this->first_line_index)
+    {
+      // for Q elements, there is one dof per vertex, so
+      // shape_index==vertex_number. check whether this vertex is on the given
+      // face. thus, for each face, give a list of vertices
+      const unsigned int vertex_no = shape_index;
+      Assert (vertex_no < GeometryInfo<dim>::vertices_per_cell,
+              ExcInternalError());
+
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_face; ++v)
+        if (GeometryInfo<dim>::face_to_cell_vertices(face_index, v) == vertex_no)
+          return true;
+
+      return false;
+    }
+  else if (shape_index < this->first_quad_index)
+    // ok, dof is on a line
+    {
+      const unsigned int line_index
+        = (shape_index - this->first_line_index) / this->dofs_per_line;
+      Assert (line_index < GeometryInfo<dim>::lines_per_cell,
+              ExcInternalError());
+
+      // in 2d, the line is the face, so get the line index
+      if (dim == 2)
+        return (line_index == face_index);
+      else if (dim == 3)
+        {
+          // silence compiler warning
+          const unsigned int lines_per_face =
+            dim == 3 ? GeometryInfo<dim>::lines_per_face : 1;
+          // see whether the given line is on the given face.
+          for (unsigned int l=0; l<lines_per_face; ++l)
+            if (GeometryInfo<3>::face_to_cell_lines(face_index, l) == line_index)
+              return true;
+
+          return false;
+        }
+      else
+        Assert (false, ExcNotImplemented());
+    }
+  else if (shape_index < this->first_hex_index)
+    // dof is on a quad
+    {
+      const unsigned int quad_index
+        = (shape_index - this->first_quad_index) / this->dofs_per_quad;
+      Assert (static_cast<signed int>(quad_index) <
+              static_cast<signed int>(GeometryInfo<dim>::quads_per_cell),
+              ExcInternalError());
+
+      // in 2d, cell bubble are zero on all faces. but we have treated this
+      // case above already
+      Assert (dim != 2, ExcInternalError());
+
+      // in 3d, quad_index=face_index
+      if (dim == 3)
+        return (quad_index == face_index);
+      else
+        Assert (false, ExcNotImplemented());
+    }
+  else
+    // dof on hex
+    {
+      // can only happen in 3d, but this case has already been covered above
+      Assert (false, ExcNotImplemented());
+      return false;
+    }
+
+  // we should not have gotten here
+  Assert (false, ExcInternalError());
+  return false;
+}
+
+
+
+template <typename PolynomialType, int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_Q_Base<PolynomialType,dim,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  // We here just care for the constant mode due to the polynomial space
+  // without any enrichments
+  // As there may be more constant modes derived classes may to implement this
+  // themselves. An example for this is FE_Q_DG0.
+  for (unsigned int i=0; i<Utilities::fixed_power<dim>(q_degree+1); ++i)
+    constant_modes(0, i) = true;
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1, 0));
+}
+
+
+// explicit instantiations
+#include "fe_q_base.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_q_base.inst.in b/source/fe/fe_q_base.inst.in
new file mode 100644
index 0000000..b67cf87
--- /dev/null
+++ b/source/fe/fe_q_base.inst.in
@@ -0,0 +1,28 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class FE_Q_Base<TensorProductPolynomials<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Q_Base<TensorProductPolynomialsConst<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Q_Base<TensorProductPolynomialsBubbles<deal_II_dimension>, deal_II_dimension, deal_II_space_dimension>;
+    template class FE_Q_Base<TensorProductPolynomials<deal_II_dimension,Polynomials::PiecewisePolynomial<double> >, deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
+
diff --git a/source/fe/fe_q_bubbles.cc b/source/fe/fe_q_bubbles.cc
new file mode 100644
index 0000000..44b7e5f
--- /dev/null
+++ b/source/fe/fe_q_bubbles.cc
@@ -0,0 +1,527 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/fe/fe_q_bubbles.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/grid/tria.h>
+
+#include <deal.II/grid/grid_generator.h>
+
+
+#include <vector>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace FE_Q_Bubbles_Helper
+{
+  namespace
+  {
+    template <int dim, int spacedim>
+    inline
+    void
+    compute_embedding_matrices(const FE_Q_Bubbles<dim, spacedim> &fe,
+                               std::vector<std::vector<FullMatrix<double> > > &matrices,
+                               const bool isotropic_only)
+    {
+      const unsigned int dpc    = fe.dofs_per_cell;
+      const unsigned int degree = fe.degree;
+
+      // Initialize quadrature formula on fine cells
+      std_cxx11::unique_ptr<Quadrature<dim> > q_fine;
+      Quadrature<1> q_dummy(std::vector<Point<1> >(1), std::vector<double> (1,1.));
+      switch (dim)
+        {
+        case 1:
+          if (spacedim==1)
+            q_fine.reset(new QGauss<dim> (degree+1));
+          else if (spacedim==2)
+            q_fine.reset(new QAnisotropic<dim>(QGauss<1>(degree+1), q_dummy));
+          else
+            q_fine.reset(new QAnisotropic<dim>(QGauss<1>(degree+1), q_dummy, q_dummy));
+          break;
+        case 2:
+          if (spacedim==2)
+            q_fine.reset(new QGauss<dim> (degree+1));
+          else
+            q_fine.reset(new QAnisotropic<dim>(QGauss<1>(degree+1), QGauss<1>(degree+1), q_dummy));
+          break;
+        case 3:
+          q_fine.reset(new QGauss<dim> (degree+1));
+          break;
+        default:
+          AssertThrow(false, ExcInternalError());
+        }
+
+      Assert(q_fine.get() != NULL, ExcInternalError());
+      const unsigned int nq = q_fine->size();
+
+      // loop over all possible refinement cases
+      unsigned int ref_case = (isotropic_only)
+                              ? RefinementCase<dim>::isotropic_refinement
+                              : RefinementCase<dim>::cut_x;
+      for (; ref_case <= RefinementCase<dim>::isotropic_refinement; ++ref_case)
+        {
+          const unsigned int nc
+            = GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case));
+
+          for (unsigned int i=0; i<nc; ++i)
+            {
+              Assert(matrices[ref_case-1][i].n() == dpc,
+                     ExcDimensionMismatch(matrices[ref_case-1][i].n(),dpc));
+              Assert(matrices[ref_case-1][i].m() == dpc,
+                     ExcDimensionMismatch(matrices[ref_case-1][i].m(),dpc));
+            }
+
+          // create a respective refinement on the triangulation
+          Triangulation<dim, spacedim> tr;
+          GridGenerator::hyper_cube (tr, 0, 1);
+          tr.begin_active()->set_refine_flag(RefinementCase<dim>(ref_case));
+          tr.execute_coarsening_and_refinement();
+
+          DoFHandler<dim, spacedim> dh(tr);
+          dh.distribute_dofs(fe);
+
+          FEValues<dim, spacedim> fine (StaticMappingQ1<dim,spacedim>::mapping, fe, *q_fine,
+                                        update_quadrature_points
+                                        | update_JxW_values | update_values);
+
+          const unsigned int n_dofs = dh.n_dofs();
+
+          FullMatrix<double> fine_mass(n_dofs);
+          FullMatrix<double> coarse_rhs_matrix(n_dofs, dpc);
+
+          std::vector<std::vector<types::global_dof_index> > child_ldi
+          (nc, std::vector<types::global_dof_index>(fe.dofs_per_cell));
+
+          //now create the mass matrix and all the right_hand sides
+          unsigned int child_no = 0;
+          typename dealii::DoFHandler<dim>::active_cell_iterator cell
+            = dh.begin_active();
+          for (; cell!=dh.end(); ++cell, ++child_no)
+            {
+              fine.reinit(cell);
+              cell->get_dof_indices(child_ldi[child_no]);
+
+              for (unsigned int q=0; q<nq; ++q)
+                for (unsigned int i=0; i<dpc; ++i)
+                  for (unsigned int j=0; j<dpc; ++j)
+                    {
+                      const unsigned int gdi=child_ldi[child_no][i];
+                      const unsigned int gdj=child_ldi[child_no][j];
+                      fine_mass(gdi, gdj)+=fine.shape_value(i,q)
+                                           *fine.shape_value(j,q)
+                                           *fine.JxW(q);
+                      Point<dim> quad_tmp;
+                      for (unsigned int k=0; k<dim; ++k)
+                        quad_tmp(k) = fine.quadrature_point(q)(k);
+                      coarse_rhs_matrix(gdi, j)
+                      +=fine.shape_value(i,q)
+                        *fe.shape_value(j, quad_tmp)
+                        *fine.JxW(q);
+                    }
+            }
+
+          //now solve for all right-hand sides simultaneously
+          dealii::FullMatrix<double> solution (n_dofs, dpc);
+          fine_mass.gauss_jordan();
+          fine_mass.mmult(solution, coarse_rhs_matrix);
+
+          //and distribute to the fine cell matrices
+          for (unsigned int child_no=0; child_no<nc; ++child_no)
+            for (unsigned int i=0; i<dpc; ++i)
+              for (unsigned int j=0; j<dpc; ++j)
+                {
+                  const unsigned int gdi=child_ldi[child_no][i];
+                  //remove small entries
+                  if (std::fabs(solution(gdi, j)) > 1.e-12)
+                    matrices[ref_case-1][child_no](i,j)=solution(gdi, j);
+                }
+        }
+    }
+  }
+}
+
+
+template <int dim, int spacedim>
+FE_Q_Bubbles<dim,spacedim>::FE_Q_Bubbles (const unsigned int q_degree)
+  :
+  FE_Q_Base<TensorProductPolynomialsBubbles<dim>, dim, spacedim> (
+    TensorProductPolynomialsBubbles<dim>(Polynomials::LagrangeEquidistant::generate_complete_basis(q_degree)),
+    FiniteElementData<dim>(get_dpo_vector(q_degree),
+                           1, q_degree+1,
+                           FiniteElementData<dim>::H1),
+    get_riaf_vector(q_degree)),
+  n_bubbles((q_degree<=1)?1:dim)
+{
+  Assert (q_degree > 0,
+          ExcMessage ("This element can only be used for polynomial degrees "
+                      "greater than zero"));
+
+  std::vector<Point<1> > support_points_1d(q_degree+1);
+  for (unsigned int i=0; i<=q_degree; ++i)
+    support_points_1d[i][0] = static_cast<double>(i)/q_degree;
+
+  this->initialize(support_points_1d);
+
+  // adjust unit support point for discontinuous node
+  Point<dim> point;
+  for (unsigned int d=0; d<dim; ++d)
+    point[d] = 0.5;
+  for (unsigned int i=0; i<n_bubbles; ++i)
+    this->unit_support_points.push_back(point);
+  AssertDimension(this->dofs_per_cell, this->unit_support_points.size());
+
+  this->reinit_restriction_and_prolongation_matrices();
+  if (dim == spacedim)
+    {
+      FE_Q_Bubbles_Helper::compute_embedding_matrices
+      (*this, this->prolongation, false);
+      // Fill restriction matrices with L2-projection
+      FETools::compute_projection_matrices (*this, this->restriction);
+    }
+
+}
+
+
+
+template <int dim, int spacedim>
+FE_Q_Bubbles<dim,spacedim>::FE_Q_Bubbles (const Quadrature<1> &points)
+  :
+  FE_Q_Base<TensorProductPolynomialsBubbles<dim>, dim, spacedim> (
+    TensorProductPolynomialsBubbles<dim>(Polynomials::generate_complete_Lagrange_basis(points.get_points())),
+    FiniteElementData<dim>(get_dpo_vector(points.size()-1),
+                           1, points.size(),
+                           FiniteElementData<dim>::H1),
+    get_riaf_vector(points.size()-1)),
+  n_bubbles((points.size()-1<=1)?1:dim)
+{
+  const unsigned int q_degree = points.size()-1;
+  (void) q_degree;
+  Assert (q_degree > 0,
+          ExcMessage ("This element can only be used for polynomial degrees "
+                      "at least one"));
+
+  this->initialize(points.get_points());
+
+  // adjust unit support point for discontinuous node
+  Point<dim> point;
+  for (unsigned int d=0; d<dim; ++d)
+    point[d] = 0.5;
+  for (unsigned int i=0; i< n_bubbles; ++i)
+    this->unit_support_points.push_back(point);
+  AssertDimension(this->dofs_per_cell, this->unit_support_points.size());
+
+  this->reinit_restriction_and_prolongation_matrices();
+  if (dim == spacedim)
+    {
+      FE_Q_Bubbles_Helper::compute_embedding_matrices
+      (*this, this->prolongation, false);
+      // Fill restriction matrices with L2-projection
+      FETools::compute_projection_matrices (*this, this->restriction);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_Q_Bubbles<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+
+  std::ostringstream namebuf;
+  bool type = true;
+  const unsigned int n_points = this->degree;
+  std::vector<double> points(n_points);
+  const unsigned int dofs_per_cell = this->dofs_per_cell;
+  const std::vector<Point<dim> > &unit_support_points = this->unit_support_points;
+  unsigned int index = 0;
+
+  // Decode the support points in one coordinate direction.
+  for (unsigned int j=0; j<dofs_per_cell; j++)
+    {
+      if ((dim>1) ? (unit_support_points[j](1)==0 &&
+                     ((dim>2) ? unit_support_points[j](2)==0: true)) : true)
+        {
+          if (index == 0)
+            points[index] = unit_support_points[j](0);
+          else if (index == 1)
+            points[n_points-1] = unit_support_points[j](0);
+          else
+            points[index-1] = unit_support_points[j](0);
+
+          index++;
+        }
+    }
+  // Do not consider the discontinuous node for dimension 1
+  Assert (index == n_points || (dim==1 && index == n_points+n_bubbles),
+          ExcMessage ("Could not decode support points in one coordinate direction."));
+
+  // Check whether the support points are equidistant.
+  for (unsigned int j=0; j<n_points; j++)
+    if (std::fabs(points[j] - (double)j/(this->degree-1)) > 1e-15)
+      {
+        type = false;
+        break;
+      }
+
+  if (type == true)
+    namebuf << "FE_Q_Bubbles<" << dim << ">(" << this->degree-1 << ")";
+  else
+    {
+
+      // Check whether the support points come from QGaussLobatto.
+      const QGaussLobatto<1> points_gl(n_points);
+      type = true;
+      for (unsigned int j=0; j<n_points; j++)
+        if (points[j] != points_gl.point(j)(0))
+          {
+            type = false;
+            break;
+          }
+      if (type == true)
+        namebuf << "FE_Q_Bubbles<" << dim << ">(QGaussLobatto(" << this->degree << "))";
+      else
+        namebuf << "FE_Q_Bubbles<" << dim << ">(QUnknownNodes(" << this->degree-1 << "))";
+    }
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_Q_Bubbles<dim,spacedim>::clone() const
+{
+  return new FE_Q_Bubbles<dim,spacedim>(*this);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_Bubbles<dim,spacedim>::interpolate(std::vector<double>       &local_dofs,
+                                        const std::vector<double> &values) const
+{
+  Assert (values.size() == this->unit_support_points.size(),
+          ExcDimensionMismatch(values.size(),
+                               this->unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (this->n_components() == 1,
+          ExcDimensionMismatch(this->n_components(), 1));
+
+  std::copy(values.begin(), values.end(), local_dofs.begin());
+
+  // We don't use the bubble functions for local interpolation
+  for (unsigned int i = 0; i<n_bubbles; ++i)
+    local_dofs[local_dofs.size()-i-1] = 0.;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_Bubbles<dim,spacedim>::interpolate(std::vector<double>    &local_dofs,
+                                        const std::vector<Vector<double> > &values,
+                                        unsigned int offset) const
+{
+  Assert (values.size() == this->unit_support_points.size(),
+          ExcDimensionMismatch(values.size(),
+                               this->unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values[0].size() >= offset+this->n_components(),
+          ExcDimensionMismatch(values[0].size(),offset+this->n_components()));
+
+  for (unsigned int i=0; i<this->dofs_per_cell-1; ++i)
+    {
+      const std::pair<unsigned int, unsigned int> index
+        = this->system_to_component_index(i);
+      local_dofs[i] = values[i](offset+index.first);
+    }
+
+  // We don't use the bubble functions for local interpolation
+  for (unsigned int i = 0; i<n_bubbles; ++i)
+    local_dofs[local_dofs.size()-i-1] = 0.;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_Bubbles<dim,spacedim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  Assert (values[0].size() == this->unit_support_points.size(),
+          ExcDimensionMismatch(values.size(),
+                               this->unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values.size() == this->n_components(),
+          ExcDimensionMismatch(values.size(), this->n_components()));
+
+  for (unsigned int i=0; i<this->dofs_per_cell-1; ++i)
+    {
+      const std::pair<unsigned int, unsigned int> index
+        = this->system_to_component_index(i);
+      local_dofs[i] = values[index.first][i];
+    }
+
+  // We don't use the bubble functions for local interpolation
+  for (unsigned int i = 0; i<n_bubbles; ++i)
+    local_dofs[local_dofs.size()-i-1] = 0.;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_Bubbles<dim,spacedim>::
+get_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                          FullMatrix<double>       &interpolation_matrix) const
+{
+  // We don't know how to do this properly, yet.
+  // However, for SolutionTransfer to work we need to provide an implementation
+  // for the case that the x_source_fe is identical to this FE
+  typedef FE_Q_Bubbles<dim,spacedim> FEQBUBBLES;
+  typedef FiniteElement<dim,spacedim> FEL;
+
+  AssertThrow ((x_source_fe.get_name().find ("FE_Q_Bubbles<") == 0)
+               ||
+               (dynamic_cast<const FEQBUBBLES *>(&x_source_fe) != 0)
+               ,
+               typename FEL::
+               ExcInterpolationNotImplemented());
+  Assert (interpolation_matrix.m() == this->dofs_per_cell,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                this->dofs_per_cell));
+  Assert (interpolation_matrix.n() == x_source_fe.dofs_per_cell,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_cell));
+
+  //Provide a short cut in case we are just inquiring the identity
+  if (dynamic_cast<const FEQBUBBLES *>(&x_source_fe)->degree == this->degree)
+    for (unsigned int i=0; i<interpolation_matrix.m(); ++i)
+      interpolation_matrix.set(i,i,1.);
+  //else we need to do more...
+  else
+    Assert(false, typename FEL::ExcInterpolationNotImplemented())
+  }
+
+
+
+template <int dim, int spacedim>
+std::vector<bool>
+FE_Q_Bubbles<dim,spacedim>::get_riaf_vector(const unsigned int q_deg)
+{
+  unsigned int n_cont_dofs = Utilities::fixed_power<dim>(q_deg+1);
+  const unsigned int n_bubbles = (q_deg<=1?1:dim);
+  return std::vector<bool> (n_cont_dofs+n_bubbles,true);
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_Q_Bubbles<dim,spacedim>::get_dpo_vector(const unsigned int q_deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 1U);
+  for (unsigned int i=1; i<dpo.size(); ++i)
+    dpo[i]=dpo[i-1]*(q_deg-1);
+
+  dpo[dim]+=(q_deg<=1?1:dim);//all the bubble functions are discontinuous
+  return dpo;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_Q_Bubbles<dim,spacedim>::has_support_on_face
+(const unsigned int shape_index,
+ const unsigned int face_index) const
+{
+  // discontinuous functions have no support on faces
+  if (shape_index >= this->n_dofs_per_cell()-n_bubbles)
+    return false;
+  else
+    return FE_Q_Base<TensorProductPolynomialsBubbles<dim>,dim,spacedim>::has_support_on_face(shape_index, face_index);
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FE_Q_Bubbles<dim,spacedim>::get_prolongation_matrix
+(const unsigned int child,
+ const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Prolongation matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  Assert (this->prolongation[refinement_case-1][child].n() != 0,
+          ExcMessage("This prolongation matrix has not been computed yet!"));
+  // finally return the matrix
+  return this->prolongation[refinement_case-1][child];
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FE_Q_Bubbles<dim,spacedim>::get_restriction_matrix
+(const unsigned int child,
+ const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Restriction matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  Assert(this->restriction[refinement_case-1][child].n() != 0,
+         ExcMessage("This restriction matrix has not been computed yet!"));
+
+  //finally return the matrix
+  return this->restriction[refinement_case-1][child];
+}
+
+// explicit instantiations
+#include "fe_q_bubbles.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_q_bubbles.inst.in b/source/fe/fe_q_bubbles.inst.in
new file mode 100644
index 0000000..afbf260
--- /dev/null
+++ b/source/fe/fe_q_bubbles.inst.in
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class FE_Q_Bubbles<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
diff --git a/source/fe/fe_q_dg0.cc b/source/fe/fe_q_dg0.cc
new file mode 100644
index 0000000..d8cb1c8
--- /dev/null
+++ b/source/fe/fe_q_dg0.cc
@@ -0,0 +1,339 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/fe/fe_q_dg0.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/dofs/dof_accessor.h>
+
+
+#include <vector>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, int spacedim>
+FE_Q_DG0<dim,spacedim>::FE_Q_DG0 (const unsigned int degree)
+  :
+  FE_Q_Base<TensorProductPolynomialsConst<dim>, dim, spacedim> (
+    TensorProductPolynomialsConst<dim>(Polynomials::LagrangeEquidistant::generate_complete_basis(degree)),
+    FiniteElementData<dim>(get_dpo_vector(degree),
+                           1, degree,
+                           FiniteElementData<dim>::L2),
+    get_riaf_vector(degree))
+{
+  Assert (degree > 0,
+          ExcMessage ("This element can only be used for polynomial degrees "
+                      "greater than zero"));
+
+  std::vector<Point<1> > support_points_1d(degree+1);
+  for (unsigned int i=0; i<=degree; ++i)
+    support_points_1d[i][0] = static_cast<double>(i)/degree;
+
+  this->initialize(support_points_1d);
+
+  // adjust unit support point for discontinuous node
+  Point<dim> point;
+  for (unsigned int d=0; d<dim; ++d)
+    point[d] = 0.5;
+  this->unit_support_points.push_back(point);
+  AssertDimension(this->dofs_per_cell, this->unit_support_points.size());
+}
+
+
+
+template <int dim, int spacedim>
+FE_Q_DG0<dim,spacedim>::FE_Q_DG0 (const Quadrature<1> &points)
+  :
+  FE_Q_Base<TensorProductPolynomialsConst<dim>, dim, spacedim> (
+    TensorProductPolynomialsConst<dim>(Polynomials::generate_complete_Lagrange_basis(points.get_points())),
+    FiniteElementData<dim>(get_dpo_vector(points.size()-1),
+                           1, points.size()-1,
+                           FiniteElementData<dim>::L2),
+    get_riaf_vector(points.size()-1))
+{
+  const int degree = points.size()-1;
+  (void)degree;
+
+  Assert (degree > 0,
+          ExcMessage ("This element can only be used for polynomial degrees "
+                      "at least zero"));
+
+  this->initialize(points.get_points());
+
+  // adjust unit support point for discontinuous node
+  Point<dim> point;
+  for (unsigned int d=0; d<dim; ++d)
+    point[d] = 0.5;
+  this->unit_support_points.push_back(point);
+  AssertDimension(this->dofs_per_cell, this->unit_support_points.size());
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_Q_DG0<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+
+  std::ostringstream namebuf;
+  bool type = true;
+  const unsigned int n_points = this->degree +1;
+  std::vector<double> points(n_points);
+  const unsigned int dofs_per_cell = this->dofs_per_cell;
+  const std::vector<Point<dim> > &unit_support_points = this->unit_support_points;
+  unsigned int index = 0;
+
+  // Decode the support points in one coordinate direction.
+  for (unsigned int j=0; j<dofs_per_cell; j++)
+    {
+      if ((dim>1) ? (unit_support_points[j](1)==0 &&
+                     ((dim>2) ? unit_support_points[j](2)==0: true)) : true)
+        {
+          if (index == 0)
+            points[index] = unit_support_points[j](0);
+          else if (index == 1)
+            points[n_points-1] = unit_support_points[j](0);
+          else
+            points[index-1] = unit_support_points[j](0);
+
+          index++;
+        }
+    }
+  // Do not consider the discontinuous node for dimension 1
+  Assert (index == n_points || (dim==1 && index == n_points+1),
+          ExcMessage ("Could not decode support points in one coordinate direction."));
+
+  // Check whether the support points are equidistant.
+  for (unsigned int j=0; j<n_points; j++)
+    if (std::fabs(points[j] - (double)j/this->degree) > 1e-15)
+      {
+        type = false;
+        break;
+      }
+
+  if (type == true)
+    namebuf << "FE_Q_DG0<"
+            << Utilities::dim_string(dim,spacedim)
+            << ">(" << this->degree << ")";
+  else
+    {
+
+      // Check whether the support points come from QGaussLobatto.
+      const QGaussLobatto<1> points_gl(n_points);
+      type = true;
+      for (unsigned int j=0; j<n_points; j++)
+        if (points[j] != points_gl.point(j)(0))
+          {
+            type = false;
+            break;
+          }
+      if (type == true)
+        namebuf << "FE_Q_DG0<"
+                << Utilities::dim_string(dim,spacedim)
+                << ">(QGaussLobatto(" << this->degree+1 << "))";
+      else
+        namebuf << "FE_Q_DG0<"
+                << Utilities::dim_string(dim,spacedim)
+                << ">(QUnknownNodes(" << this->degree << "))";
+    }
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_Q_DG0<dim,spacedim>::clone() const
+{
+  return new FE_Q_DG0<dim,spacedim>(*this);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_DG0<dim,spacedim>::interpolate(std::vector<double>       &local_dofs,
+                                    const std::vector<double> &values) const
+{
+  Assert (values.size() == this->unit_support_points.size(),
+          ExcDimensionMismatch(values.size(),
+                               this->unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (this->n_components() == 1,
+          ExcDimensionMismatch(this->n_components(), 1));
+
+  std::copy(values.begin(), values.end(), local_dofs.begin());
+
+  // We don't need the discontinuous function for local interpolation
+  local_dofs[local_dofs.size()-1] = 0.;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_DG0<dim,spacedim>::interpolate(std::vector<double>    &local_dofs,
+                                    const std::vector<Vector<double> > &values,
+                                    unsigned int offset) const
+{
+  Assert (values.size() == this->unit_support_points.size(),
+          ExcDimensionMismatch(values.size(),
+                               this->unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values[0].size() >= offset+this->n_components(),
+          ExcDimensionMismatch(values[0].size(),offset+this->n_components()));
+
+  for (unsigned int i=0; i<this->dofs_per_cell-1; ++i)
+    {
+      const std::pair<unsigned int, unsigned int> index
+        = this->system_to_component_index(i);
+      local_dofs[i] = values[i](offset+index.first);
+    }
+
+  // We don't need the discontinuous function for local interpolation
+  local_dofs[local_dofs.size()-1] = 0.;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_DG0<dim,spacedim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  Assert (values[0].size() == this->unit_support_points.size(),
+          ExcDimensionMismatch(values.size(),
+                               this->unit_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values.size() == this->n_components(),
+          ExcDimensionMismatch(values.size(), this->n_components()));
+
+  for (unsigned int i=0; i<this->dofs_per_cell-1; ++i)
+    {
+      const std::pair<unsigned int, unsigned int> index
+        = this->system_to_component_index(i);
+      local_dofs[i] = values[index.first][i];
+    }
+
+  // We don't need the discontinuous function for local interpolation
+  local_dofs[local_dofs.size()-1] = 0.;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_Q_DG0<dim,spacedim>::
+get_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                          FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the source FE is also a Q_DG0 element
+  typedef FE_Q_DG0<dim,spacedim> FEQDG0;
+  typedef FiniteElement<dim,spacedim> FEL;
+
+  AssertThrow ((x_source_fe.get_name().find ("FE_Q_DG0<") == 0)
+               ||
+               (dynamic_cast<const FEQDG0 *>(&x_source_fe) != 0),
+               typename FEL::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.m() == this->dofs_per_cell,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                this->dofs_per_cell));
+  Assert (interpolation_matrix.n() == x_source_fe.dofs_per_cell,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_cell));
+
+  this->FE_Q_Base<TensorProductPolynomialsConst<dim>,dim,spacedim>::
+  get_interpolation_matrix(x_source_fe, interpolation_matrix);
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<bool>
+FE_Q_DG0<dim,spacedim>::get_riaf_vector(const unsigned int deg)
+{
+  std::vector<bool> riaf(Utilities::fixed_power<dim>(deg+1)+1,false);
+  riaf[riaf.size()-1]=true;
+  return riaf;
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_Q_DG0<dim,spacedim>::get_dpo_vector(const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 1U);
+  for (unsigned int i=1; i<dpo.size(); ++i)
+    dpo[i]=dpo[i-1]*(deg-1);
+
+  dpo[dim]++;//we need an additional DG0-node for a dim-dimensional object
+  return dpo;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_Q_DG0<dim,spacedim>::has_support_on_face (const unsigned int shape_index,
+                                             const unsigned int face_index) const
+{
+  // discontinuous function has support on all faces
+  if (shape_index == this->dofs_per_cell-1)
+    return true;
+  else
+    return FE_Q_Base<TensorProductPolynomialsConst<dim>,dim,spacedim>::has_support_on_face(shape_index, face_index);
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_Q_DG0<dim,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(2, this->dofs_per_cell);
+
+  // 1 represented by FE_Q part
+  for (unsigned int i=0; i<this->dofs_per_cell-1; ++i)
+    constant_modes(0, i) = true;
+
+  // 1 represented by DG0 part
+  constant_modes(1, this->dofs_per_cell-1) = true;
+
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int> (2, 0));
+}
+
+
+
+// explicit instantiations
+#include "fe_q_dg0.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_q_dg0.inst.in b/source/fe/fe_q_dg0.inst.in
new file mode 100644
index 0000000..3ab2cfd
--- /dev/null
+++ b/source/fe/fe_q_dg0.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class FE_Q_DG0<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
+
diff --git a/source/fe/fe_q_hierarchical.cc b/source/fe/fe_q_hierarchical.cc
new file mode 100644
index 0000000..a3a7641
--- /dev/null
+++ b/source/fe/fe_q_hierarchical.cc
@@ -0,0 +1,2133 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_q_hierarchical.h>
+#include <deal.II/fe/fe_nothing.h>
+
+#include <cmath>
+#include <sstream>
+
+//TODO: implement the adjust_quad_dof_index_for_face_orientation_table and
+//adjust_line_dof_index_for_line_orientation_table fields, and write tests
+//similar to bits/face_orientation_and_fe_q_*
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace
+{
+  /**
+   * A function which maps  in[i] to i,i.e. output[in[i]] = i;
+   */
+  inline
+  std::vector<unsigned int>
+  invert_numbering (const std::vector<unsigned int> &in)
+  {
+    std::vector<unsigned int> out (in.size());
+    for (unsigned int i=0; i<in.size(); ++i)
+      {
+        Assert (in[i] < out.size(),
+                dealii::ExcIndexRange(in[i],0,out.size()));
+        out[in[i]]=i;
+      }
+    return out;
+  }
+}
+
+
+
+template <int dim>
+FE_Q_Hierarchical<dim>::FE_Q_Hierarchical (const unsigned int degree)
+  :
+  FE_Poly<TensorProductPolynomials<dim>, dim> (
+    Polynomials::Hierarchical::generate_complete_basis(degree),
+    FiniteElementData<dim>(get_dpo_vector(degree),1, degree,
+                           FiniteElementData<dim>::H1),
+    std::vector<bool> (FiniteElementData<dim>(
+                         get_dpo_vector(degree),1, degree).dofs_per_cell, false),
+    std::vector<ComponentMask>(FiniteElementData<dim>(
+                                 get_dpo_vector(degree),1, degree).dofs_per_cell, std::vector<bool>(1,true))),
+  face_renumber(face_fe_q_hierarchical_to_hierarchic_numbering (degree))
+{
+  this->poly_space.set_numbering(
+    hierarchic_to_fe_q_hierarchical_numbering(*this));
+
+  // The matrix @p{dofs_cell} contains the
+  // values of the linear functionals of
+  // the master 1d cell applied to the
+  // shape functions of the two 1d subcells.
+  // The matrix @p{dofs_subcell} contains
+  // the values of the linear functionals
+  // on each 1d subcell applied to the
+  // shape functions on the master 1d
+  // subcell.
+  // We use @p{dofs_cell} and
+  // @p{dofs_subcell} to compute the
+  // @p{prolongation}, @p{restriction} and
+  // @p{interface_constraints} matrices
+  // for all dimensions.
+  std::vector<FullMatrix<double> >
+  dofs_cell (GeometryInfo<1>::max_children_per_cell,
+             FullMatrix<double> (2*this->dofs_per_vertex + this->dofs_per_line,
+                                 2*this->dofs_per_vertex + this->dofs_per_line));
+  std::vector<FullMatrix<double> >
+  dofs_subcell (GeometryInfo<1>::max_children_per_cell,
+                FullMatrix<double> (2*this->dofs_per_vertex + this->dofs_per_line,
+                                    2*this->dofs_per_vertex + this->dofs_per_line));
+  // build these fields, as they are
+  // needed as auxiliary fields later
+  // on
+  build_dofs_cell (dofs_cell, dofs_subcell);
+
+  // then use them to initialize
+  // other fields
+  initialize_constraints (dofs_subcell);
+  initialize_embedding_and_restriction (dofs_cell, dofs_subcell);
+
+  // finally fill in support points
+  // on cell and face
+  initialize_unit_support_points ();
+  initialize_unit_face_support_points ();
+}
+
+
+
+template <int dim>
+std::string
+FE_Q_Hierarchical<dim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_Q_Hierarchical<" << dim << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::interpolate(
+  std::vector<double> &,
+  const std::vector<double> &) const
+{
+  // The default implementation assumes that the FE has a delta property,
+  // i.e., values at the support points equal the corresponding DoFs. This
+  // is obviously not the case here.
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::interpolate(
+  std::vector<double> &,
+  const std::vector<Vector<double> > &,
+  unsigned int) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::interpolate(
+  std::vector<double> &,
+  const VectorSlice<const std::vector<std::vector<double> > > &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <int dim>
+FiniteElement<dim> *
+FE_Q_Hierarchical<dim>::clone() const
+{
+  return new FE_Q_Hierarchical<dim>(*this);
+}
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::get_interpolation_matrix(const FiniteElement< dim> &source,
+                                                 FullMatrix< double > &matrix) const
+{
+  // support interpolation between FE_Q_Hierarchical only.
+  if (const FE_Q_Hierarchical<dim> *source_fe
+      = dynamic_cast<const FE_Q_Hierarchical<dim>*>(&source))
+    {
+      // ok, source is a Q_Hierarchical element, so we will be able to do the work
+      Assert (matrix.m() == this->dofs_per_cell,
+              ExcDimensionMismatch (matrix.m(),
+                                    this->dofs_per_cell));
+      Assert (matrix.n() == source.dofs_per_cell,
+              ExcDimensionMismatch (matrix.m(),
+                                    source_fe->dofs_per_cell));
+
+      // Recall that DoFs are renumbered in the following order:
+      // vertices, lines, quads, hexes.
+      // As we deal with hierarchical FE, interpolation matrix is rather easy:
+      // it has 1 on pairs of dofs which are the same.
+      // To get those use get_embedding_dofs();
+
+      matrix = 0.;
+
+      // distinguish between the case when we interpolate to a richer element
+      if (this->dofs_per_cell >= source_fe->dofs_per_cell)
+        {
+          const std::vector<unsigned int> dof_map = this->get_embedding_dofs(source_fe->degree);
+          for (unsigned int j=0; j < dof_map.size(); j++)
+            matrix[dof_map[j]][j] = 1.;
+        }
+      // and when just truncate higher modes.
+      else
+        {
+          const std::vector<unsigned int> dof_map = source_fe->get_embedding_dofs(this->degree);
+          for (unsigned int j=0; j < dof_map.size(); j++)
+            matrix[j][dof_map[j]] = 1.;
+        }
+    }
+  else
+    {
+      AssertThrow(false, dealii::ExcMessage("Interpolation is supported only between FE_Q_Hierarchical"));
+    }
+
+}
+
+template <int dim>
+const FullMatrix<double> &
+FE_Q_Hierarchical<dim>::get_prolongation_matrix (const unsigned int child,
+                                                 const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case==RefinementCase<dim>::isotropic_refinement,
+          ExcMessage("Prolongation matrices are only available for isotropic refinement!"));
+
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  return this->prolongation[refinement_case-1][child];
+
+}
+
+
+template <int dim>
+bool
+FE_Q_Hierarchical<dim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Q_Hierarchical<dim>::
+hp_vertex_dof_identities (const FiniteElement<dim> &fe_other) const
+{
+  // we can presently only compute
+  // these identities if both FEs are
+  // FE_Q_Hierarchicals or if the other
+  // one is an FE_Nothing. in the first
+  // case, there should be exactly one
+  // single DoF of each FE at a
+  // vertex, and they should have
+  // identical value
+  if (dynamic_cast<const FE_Q_Hierarchical<dim>*>(&fe_other) != 0)
+    {
+      return
+        std::vector<std::pair<unsigned int, unsigned int> >
+        (1, std::make_pair (0U, 0U));
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe_other) != 0)
+    {
+      // the FE_Nothing has no
+      // degrees of freedom, so there
+      // are no equivalencies to be
+      // recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Q_Hierarchical<dim>::
+hp_line_dof_identities (const FiniteElement<dim> &fe_other) const
+{
+  // we can presently only compute
+  // these identities if both FEs are
+  // FE_Q_Hierarchicals or if the other
+  // one is an FE_Nothing.
+  if (dynamic_cast<const FE_Q_Hierarchical<dim>*>(&fe_other) != 0)
+    {
+      const unsigned int &this_dpl  = this->dofs_per_line;
+      const unsigned int &other_dpl = fe_other.dofs_per_line;
+
+      // we deal with hierarchical 1d polynomials where dofs are enumerated increasingly.
+      // Thus we return a vector of pairs
+      // for the first N-1, where N is minimum number of
+      // dofs_per_line for each FE_Q_Hierarchical.
+      std::vector<std::pair<unsigned int, unsigned int> > res;
+      for (unsigned int i = 0; i < std::min(this_dpl,other_dpl); i++)
+        res.push_back(std::make_pair (i, i));
+
+      return res;
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe_other) != 0)
+    {
+      // the FE_Nothing has no
+      // degrees of freedom, so there
+      // are no equivalencies to be
+      // recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_Q_Hierarchical<dim>::
+hp_quad_dof_identities (const FiniteElement<dim> &fe_other) const
+{
+  // we can presently only compute
+  // these identities if both FEs are
+  // FE_Q_Hierarchicals or if the other
+  // one is an FE_Nothing.
+  if (dynamic_cast<const FE_Q_Hierarchical<dim>*>(&fe_other) != 0)
+    {
+      const unsigned int &this_dpq  = this->dofs_per_quad;
+      const unsigned int &other_dpq = fe_other.dofs_per_quad;
+
+      // we deal with hierarchical 1d polynomials where dofs are enumerated increasingly.
+      // Thus we return a vector of pairs
+      // for the first N-1, where N is minimum number of
+      // dofs_per_line for each FE_Q_Hierarchical.
+      std::vector<std::pair<unsigned int, unsigned int> > res;
+      for (unsigned int i = 0; i < std::min(this_dpq,other_dpq); i++)
+        res.push_back(std::make_pair (i, i));
+
+      return res;
+    }
+  else if (dynamic_cast<const FE_Nothing<dim>*>(&fe_other) != 0)
+    {
+      // the FE_Nothing has no
+      // degrees of freedom, so there
+      // are no equivalencies to be
+      // recorded
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+template <int dim>
+FiniteElementDomination::Domination
+FE_Q_Hierarchical<dim>::
+compare_for_face_domination (const FiniteElement<dim> &fe_other) const
+{
+  if (const FE_Q_Hierarchical<dim> *fe_q_other
+      = dynamic_cast<const FE_Q_Hierarchical<dim>*>(&fe_other))
+    {
+      // the element with lowest polynomial degree
+      // dominates the other.
+      if (this->degree < fe_q_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_q_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary functions
+//---------------------------------------------------------------------------
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::build_dofs_cell (std::vector<FullMatrix<double> > &dofs_cell,
+                                         std::vector<FullMatrix<double> > &dofs_subcell) const
+{
+  const unsigned int dofs_1d = 2*this->dofs_per_vertex + this->dofs_per_line;
+
+  // The dofs_subcell matrices are transposed
+  // (4.19), (4.21) and (4.27),(4.28),(4.30) in
+  // Demkowicz, Oden, Rachowicz, Hardy, CMAMAE 77, 79-112, 1989
+  // so that
+  // DoFs_c(j) = dofs_subcell[c](j,k) dofs_cell(k)
+
+  // TODO: The dofs_subcell shall differ by a factor 2^p due to shape functions
+  // defined on [0,1] instead of [-1,1]. However, that does not seem to be
+  // the case. Perhaps this factor is added later on in auxiliary functions which
+  // use these matrices.
+
+  // dofs_cells[0](j,k):
+  //    0  1 |  2  3  4.
+  // 0  1  0 |         .
+  // 1  0  0 |         .
+  // -------------------
+  // 2          \      .
+  // 3            \  2^k * k! / (k-j)!j!
+  // 4               \ .
+
+  // dofs_subcells[0](j,k):
+  //    0    1   |  2  3   4  5  6 .
+  // 0  1    0   |                 .
+  // 1  1/2  1/2 | -1  0  -1  0  -1.
+  // -------------------------------
+  // 2              \              .
+  // 3                 \     (-1)^(k+j)/ 2^k * k!/(k-j)!j!
+  // 4                     \       .
+
+  // dofs_cells[1](j,k):
+  //    0  1 |  2  3  4.
+  // 0  0  0 |         .
+  // 1  0  1 |         .
+  // -------------------
+  // 2          \      .
+  // 3             \   (-1)^(k+j) * 2^k * k!/(k-j)!j!
+  // 4                \.
+
+  // dofs_subcells[1](j,k):
+  //    0    1   |  2  3   4  5  6 .
+  // 0  1/2  1/2 | -1  0  -1  0  -1.
+  // 1  0    1   |                 .
+  // -------------------------------
+  // 2              \              .
+  // 3                 \      1/ 2^k * k!/(k-j)!j!
+  // 4                             .
+
+  for (unsigned int c=0; c<GeometryInfo<1>::max_children_per_cell; ++c)
+    for (unsigned int j=0; j<dofs_1d; ++j)
+      for (unsigned int k=0; k<dofs_1d; ++k)
+        {
+          // upper diagonal block
+          if ((j<=1) && (k<=1))
+            {
+              if (((c==0) && (j==0) && (k==0)) ||
+                  ((c==1) && (j==1) && (k==1)))
+                dofs_cell[c](j,k) = 1.;
+              else
+                dofs_cell[c](j,k) = 0.;
+
+              if      (((c==0) && (j==1)) || ((c==1) && (j==0)))
+                dofs_subcell[c](j,k) = .5;
+              else if (((c==0) && (k==0)) || ((c==1) && (k==1)))
+                dofs_subcell[c](j,k) = 1.;
+              else
+                dofs_subcell[c](j,k) = 0.;
+            }
+          // upper right block
+          else if ((j<=1) && (k>=2))
+            {
+              if (((c==0) && (j==1) && ((k % 2)==0)) ||
+                  ((c==1) && (j==0) && ((k % 2)==0)))
+                dofs_subcell[c](j,k) = -1.;
+            }
+          // upper diagonal block
+          else if ((j>=2) && (k>=2) && (j<=k))
+            {
+              double factor = 1.;
+              for (unsigned int i=1; i<=j; ++i)
+                factor *= ((double) (k-i+1))/((double) i);
+              // factor == k * (k-1) * ... * (k-j+1) / j! = k! / (k-j)! / j!
+              if (c==0)
+                {
+                  dofs_subcell[c](j,k) = ((k+j) % 2 == 0) ?
+                                         std::pow(.5,static_cast<double>(k))*factor :
+                                         -std::pow(.5,static_cast<double>(k))*factor;
+                  dofs_cell[c](j,k) = std::pow(2.,static_cast<double>(j))*factor;
+                }
+              else
+                {
+                  dofs_subcell[c](j,k) = std::pow(.5,static_cast<double>(k))*factor;
+                  dofs_cell[c](j,k) = ((k+j) % 2 == 0) ?
+                                      std::pow(2.,static_cast<double>(j))*factor :
+                                      -std::pow(2.,static_cast<double>(j))*factor;
+                }
+            }
+        }
+}
+
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::
+initialize_constraints (const std::vector<FullMatrix<double> > &dofs_subcell)
+{
+  const unsigned int dofs_1d = 2*this->dofs_per_vertex + this->dofs_per_line;
+  const unsigned int degree=this->degree;
+
+  this->interface_constraints
+  .TableBase<2,double>::reinit (this->interface_constraints_size());
+
+  switch (dim)
+    {
+    case 1:
+    {
+      // no constraints in 1d
+      break;
+    }
+
+    case 2:
+    {
+      // vertex node
+      for (unsigned int i=0; i<dofs_1d; ++i)
+        this->interface_constraints(0,i) = dofs_subcell[0](1,i);
+      // edge nodes
+      for (unsigned int c=0; c<GeometryInfo<1>::max_children_per_cell; ++c)
+        for (unsigned int i=0; i<dofs_1d; ++i)
+          for (unsigned int j=2; j<dofs_1d; ++j)
+            this->interface_constraints(1 + c*(degree-1) + j - 2,i) =
+              dofs_subcell[c](j,i);
+      break;
+    }
+
+    case 3:
+    {
+      for (unsigned int i=0; i<dofs_1d * dofs_1d; i++)
+        {
+          // center vertex node
+          this->interface_constraints(0,face_renumber[i]) =
+            dofs_subcell[0](1,i % dofs_1d) *
+            dofs_subcell[0](1,(i - (i % dofs_1d)) / dofs_1d);
+
+          // boundary vertex nodes
+          this->interface_constraints(1,face_renumber[i]) =
+            dofs_subcell[0](0, i % dofs_1d) *
+            dofs_subcell[0](1, (i - (i % dofs_1d)) / dofs_1d);
+          this->interface_constraints(2,face_renumber[i]) =
+            dofs_subcell[1](1, i % dofs_1d) *
+            dofs_subcell[0](1, (i - (i % dofs_1d)) / dofs_1d);
+          this->interface_constraints(3,face_renumber[i]) =
+            dofs_subcell[0](1, i % dofs_1d) *
+            dofs_subcell[0](0, (i - (i % dofs_1d)) / dofs_1d);
+          this->interface_constraints(4,face_renumber[i]) =
+            dofs_subcell[1](0, i % dofs_1d) *
+            dofs_subcell[1](1, (i - (i % dofs_1d)) / dofs_1d);
+
+          // interior edges
+          for (unsigned int j=0; j<(degree-1); j++)
+            {
+              this->interface_constraints(5 + j,face_renumber[i]) =
+                dofs_subcell[0](1, i % dofs_1d) *
+                dofs_subcell[0](2 + j, (i - (i % dofs_1d)) / dofs_1d);
+              this->interface_constraints(5 + (degree-1) + j,face_renumber[i]) =
+                dofs_subcell[0](1,i % dofs_1d) *
+                dofs_subcell[1](2 + j, (i - (i % dofs_1d)) / dofs_1d);
+              this->interface_constraints(5 + 2*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[0](2 + j,i % dofs_1d) *
+                dofs_subcell[1](0, (i - (i % dofs_1d)) / dofs_1d);
+              this->interface_constraints(5 + 3*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[1](2 + j, i % dofs_1d) *
+                dofs_subcell[0](1, (i - (i % dofs_1d)) / dofs_1d);
+            }
+
+          // boundary edges
+          for (unsigned int j=0; j<(degree-1); j++)
+            {
+              // left edge
+              this->interface_constraints(5 + 4*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[0](0,     i % dofs_1d) *
+                dofs_subcell[0](2 + j, (i - (i % dofs_1d)) / dofs_1d);
+              this->interface_constraints(5 + 4*(degree-1) + (degree-1) + j,face_renumber[i]) =
+                dofs_subcell[0](0,     i % dofs_1d) *
+                dofs_subcell[1](2 + j, (i - (i % dofs_1d)) / dofs_1d);
+              // right edge
+              this->interface_constraints(5 + 4*(degree-1) + 2*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[1](1,     i % dofs_1d) *
+                dofs_subcell[0](2 + j, (i - (i % dofs_1d)) / dofs_1d);
+              this->interface_constraints(5 + 4*(degree-1) + 3*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[1](1,     i % dofs_1d) *
+                dofs_subcell[1](2 + j, (i - (i % dofs_1d)) / dofs_1d);
+              // bottom edge
+              this->interface_constraints(5 + 4*(degree-1) + 4*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[0](2 + j, i % dofs_1d) *
+                dofs_subcell[0](0,     (i - (i % dofs_1d)) / dofs_1d);
+              this->interface_constraints(5 + 4*(degree-1) + 5*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[1](2 + j, i % dofs_1d) *
+                dofs_subcell[0](0,     (i - (i % dofs_1d)) / dofs_1d);
+              // top edge
+              this->interface_constraints(5 + 4*(degree-1) + 6*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[0](2 + j, i % dofs_1d) *
+                dofs_subcell[1](1,     (i - (i % dofs_1d)) / dofs_1d);
+              this->interface_constraints(5 + 4*(degree-1) + 7*(degree-1) + j,face_renumber[i]) =
+                dofs_subcell[1](2 + j, i % dofs_1d) *
+                dofs_subcell[1](1,     (i - (i % dofs_1d)) / dofs_1d);
+            }
+
+          // interior faces
+          for (unsigned int j=0; j<(degree-1); j++)
+            for (unsigned int k=0; k<(degree-1); k++)
+              {
+                // subcell 0
+                this->interface_constraints(5 + 12*(degree-1) + j + k*(degree-1),face_renumber[i]) =
+                  dofs_subcell[0](2 + j, i % dofs_1d) *
+                  dofs_subcell[0](2 + k, (i - (i % dofs_1d)) / dofs_1d);
+                // subcell 1
+                this->interface_constraints(5 + 12*(degree-1) + j + k*(degree-1) + (degree-1)*(degree-1),face_renumber[i]) =
+                  dofs_subcell[1](2 + j, i % dofs_1d) *
+                  dofs_subcell[0](2 + k, (i - (i % dofs_1d)) / dofs_1d);
+                // subcell 2
+                this->interface_constraints(5 + 12*(degree-1) + j + k*(degree-1) + 2*(degree-1)*(degree-1),face_renumber[i]) =
+                  dofs_subcell[0](2 + j, i % dofs_1d) *
+                  dofs_subcell[1](2 + k, (i - (i % dofs_1d)) / dofs_1d);
+                // subcell 3
+                this->interface_constraints(5 + 12*(degree-1) + j + k*(degree-1) + 3*(degree-1)*(degree-1),face_renumber[i]) =
+                  dofs_subcell[1](2 + j, i % dofs_1d) *
+                  dofs_subcell[1](2 + k, (i - (i % dofs_1d)) / dofs_1d);
+              }
+        }
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+}
+
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::
+initialize_embedding_and_restriction (const std::vector<FullMatrix<double> > &dofs_cell,
+                                      const std::vector<FullMatrix<double> > &dofs_subcell)
+{
+  unsigned int iso=RefinementCase<dim>::isotropic_refinement-1;
+
+  const unsigned int dofs_1d = 2*this->dofs_per_vertex + this->dofs_per_line;
+  const std::vector<unsigned int> &renumber=
+    this->poly_space.get_numbering();
+
+  for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+    {
+      this->prolongation[iso][c].reinit (this->dofs_per_cell, this->dofs_per_cell);
+      this->restriction[iso][c].reinit (this->dofs_per_cell, this->dofs_per_cell);
+    }
+
+  // the 1d case is particularly
+  // simple, so special case it:
+  if (dim==1)
+    {
+      for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+        {
+          this->prolongation[iso][c].fill (dofs_subcell[c]);
+          this->restriction[iso][c].fill (dofs_cell[c]);
+        }
+      return;
+    }
+
+  // for higher dimensions, things
+  // are a little more tricky:
+
+  // j loops over dofs in the
+  // subcell.  These are the rows in
+  // the embedding matrix.
+  //
+  // i loops over the dofs in the
+  // master cell. These are the
+  // columns in the embedding matrix.
+  for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+    for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+      switch (dim)
+        {
+        case 2:
+        {
+          for (unsigned int c=0; c<GeometryInfo<2>::max_children_per_cell; ++c)
+            {
+              // left/right line: 0/1
+              const unsigned int c0 = c%2;
+              // bottom/top line: 0/1
+              const unsigned int c1 = c/2;
+
+              this->prolongation[iso][c](j,i) =
+                dofs_subcell[c0](renumber[j] % dofs_1d,
+                                 renumber[i] % dofs_1d) *
+                dofs_subcell[c1]((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d,
+                                 (renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d);
+
+              this->restriction[iso][c](j,i) =
+                dofs_cell[c0](renumber[j] % dofs_1d,
+                              renumber[i] % dofs_1d) *
+                dofs_cell[c1]((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d,
+                              (renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d);
+            }
+          break;
+        }
+
+        case 3:
+        {
+          for (unsigned int c=0; c<GeometryInfo<3>::max_children_per_cell; ++c)
+            {
+              // left/right face: 0/1
+              const unsigned int c0 = c%2;
+              // front/back face: 0/1
+              const unsigned int c1 = (c%4)/2;
+              // bottom/top face: 0/1
+              const unsigned int c2 = c/4;
+
+              this->prolongation[iso][c](j,i) =
+                dofs_subcell[c0](renumber[j] % dofs_1d,
+                                 renumber[i] % dofs_1d) *
+                dofs_subcell[c1](((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d) % dofs_1d,
+                                 ((renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d) % dofs_1d) *
+                dofs_subcell[c2](((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d - (((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d ) % dofs_1d)) / dofs_1d,
+                                 ((renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d - (((renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d ) % dofs_1d)) / dofs_1d);
+
+              this->restriction[iso][c](j,i) =
+                dofs_cell[c0](renumber[j] % dofs_1d,
+                              renumber[i] % dofs_1d) *
+                dofs_cell[c1](((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d) % dofs_1d,
+                              ((renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d) % dofs_1d) *
+                dofs_cell[c2](((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d - (((renumber[j] - (renumber[j] % dofs_1d)) / dofs_1d ) % dofs_1d)) / dofs_1d,
+                              ((renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d - (((renumber[i] - (renumber[i] % dofs_1d)) / dofs_1d ) % dofs_1d)) / dofs_1d);
+            }
+          break;
+        }
+
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+}
+
+
+
+template <int dim>
+void FE_Q_Hierarchical<dim>::initialize_unit_support_points ()
+{
+  // number of points: (degree+1)^dim
+  unsigned int n = this->degree+1;
+  for (unsigned int i=1; i<dim; ++i)
+    n *= this->degree+1;
+
+  this->unit_support_points.resize(n);
+
+  const std::vector<unsigned int> &index_map_inverse=
+    this->poly_space.get_numbering_inverse();
+
+  Point<dim> p;
+  // the method of numbering allows
+  // each dof to be associated with a
+  // support point. There is
+  // only one support point per
+  // vertex, line, quad, hex, etc.
+  //
+  // note, however, that the support
+  // points thus associated with
+  // shape functions are not unique:
+  // the linear shape functions are
+  // associated with the vertices,
+  // but all others are associated
+  // with either line, quad, or hex
+  // midpoints, and there may be
+  // multiple shape functions
+  // associated with them. there
+  // really is no other useful
+  // numbering, since the
+  // hierarchical shape functions do
+  // not vanish at all-but-one
+  // interpolation points (like the
+  // Lagrange functions used in
+  // FE_Q), so there's not much we
+  // can do here.
+
+  // TODO shouldn't we just at least make support points unique,
+  // even though the delta property is not satisfied for this FE?
+  unsigned int k=0;
+  for (unsigned int iz=0; iz <= ((dim>2) ? this->degree : 0) ; ++iz)
+    for (unsigned int iy=0; iy <= ((dim>1) ? this->degree : 0) ; ++iy)
+      for (unsigned int ix=0; ix<=this->degree; ++ix)
+        {
+          if (ix==0)
+            p(0) =  0.;
+          else if (ix==1)
+            p(0) =  1.;
+          else
+            p(0) = .5;
+          if (dim>1)
+            {
+              if (iy==0)
+                p(1) =  0.;
+              else if (iy==1)
+                p(1) =  1.;
+              else
+                p(1) = .5;
+            }
+          if (dim>2)
+            {
+              if (iz==0)
+                p(2) =  0.;
+              else if (iz==1)
+                p(2) =  1.;
+              else
+                p(2) = .5;
+            }
+          this->unit_support_points[index_map_inverse[k++]] = p;
+        };
+}
+
+
+
+template <>
+void FE_Q_Hierarchical<1>::initialize_unit_face_support_points ()
+{
+  // no faces in 1d, so nothing to do
+}
+
+
+template <>
+void FE_Q_Hierarchical<1>::
+get_face_interpolation_matrix (const FiniteElement<1,1> &/*x_source_fe*/,
+                               FullMatrix<double>     &/*interpolation_matrix*/) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+template <>
+void
+FE_Q_Hierarchical<1>::
+get_subface_interpolation_matrix (const FiniteElement<1,1> &/*x_source_fe*/,
+                                  const unsigned int      /*subface*/,
+                                  FullMatrix<double>     &/*interpolation_matrix*/) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::
+get_face_interpolation_matrix (const FiniteElement<dim> &x_source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the
+  // source FE is also a
+  // Q_Hierarchical element
+  typedef FE_Q_Hierarchical<dim> FEQHierarchical;
+  typedef FiniteElement<dim> FEL;
+  AssertThrow ((x_source_fe.get_name().find ("FE_Q_Hierarchical<") == 0)
+               ||
+               (dynamic_cast<const FEQHierarchical *>(&x_source_fe) != 0),
+               typename FEL::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // ok, source is a Q_Hierarchical element, so
+  // we will be able to do the work
+  const FE_Q_Hierarchical<dim> &source_fe
+    = dynamic_cast<const FE_Q_Hierarchical<dim>&>(x_source_fe);
+  (void)source_fe;
+
+  // Make sure, that the element,
+  // for which the DoFs should be
+  // constrained is the one with
+  // the higher polynomial degree.
+  // Actually the procedure will work
+  // also if this assertion is not
+  // satisfied. But the matrices
+  // produced in that case might
+  // lead to problems in the
+  // hp procedures, which use this
+  // method.
+  Assert (this->dofs_per_face <= source_fe.dofs_per_face,
+          typename FEL::
+          ExcInterpolationNotImplemented ());
+  interpolation_matrix = 0;
+
+  switch (dim)
+    {
+    case 2:
+    {
+      // In 2-dimension the constraints are trivial.
+      // First this->dofs_per_face DoFs of the constrained
+      // element are made equal to the current (dominating)
+      // element, which corresponds to 1 on diagonal of the matrix.
+      // DoFs which correspond to higher polynomials
+      // are zeroed (zero rows in the matrix).
+      for (unsigned int i = 0; i < this->dofs_per_face; ++i)
+        interpolation_matrix (i, i) = 1;
+
+      break;
+    }
+
+    case 3:
+    {
+      for (unsigned int i = 0; i < GeometryInfo<3>::vertices_per_face; ++i)
+        interpolation_matrix (i, i) = 1;
+
+      for (unsigned int i = 0; i < this->degree - 1; ++i)
+        {
+          for (unsigned int j = 0; j < GeometryInfo<3>::lines_per_face; ++j)
+            interpolation_matrix (
+              i + j * (x_source_fe.degree - 1) + GeometryInfo<3>::vertices_per_face,
+              i + j * (this->degree - 1) + GeometryInfo<3>::vertices_per_face) = 1;
+
+          for (unsigned int j = 0; j < this->degree - 1; ++j)
+            interpolation_matrix (
+              (i + GeometryInfo<3>::lines_per_face) * (x_source_fe.degree - 1) + j
+              + GeometryInfo<3>::vertices_per_face,
+              (i + GeometryInfo<3>::lines_per_face) * (this->degree - 1) + j
+              + GeometryInfo<3>::vertices_per_face) = 1;
+        }
+    }
+    }
+}
+
+
+
+template <int dim>
+void
+FE_Q_Hierarchical<dim>::
+get_subface_interpolation_matrix (const FiniteElement<dim> &x_source_fe,
+                                  const unsigned int        subface,
+                                  FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the
+  // source FE is also a
+  // Q_Hierarchical element
+  typedef FE_Q_Hierarchical<dim> FEQHierarchical;
+  typedef FiniteElement<dim> FEL;
+  AssertThrow ((x_source_fe.get_name().find ("FE_Q_Hierarchical<") == 0)
+               ||
+               (dynamic_cast<const FEQHierarchical *>(&x_source_fe) != 0),
+               typename FEL::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // ok, source is a Q_Hierarchical element, so
+  // we will be able to do the work
+  const FE_Q_Hierarchical<dim> &source_fe
+    = dynamic_cast<const FE_Q_Hierarchical<dim>&>(x_source_fe);
+
+  // Make sure, that the element,
+  // for which the DoFs should be
+  // constrained is the one with
+  // the higher polynomial degree.
+  // Actually the procedure will work
+  // also if this assertion is not
+  // satisfied. But the matrices
+  // produced in that case might
+  // lead to problems in the
+  // hp procedures, which use this
+  // method.
+  Assert (this->dofs_per_face <= source_fe.dofs_per_face,
+          typename FEL::
+          ExcInterpolationNotImplemented ());
+
+  switch (dim)
+    {
+    case 2:
+    {
+      switch (subface)
+        {
+        case 0:
+        {
+          interpolation_matrix (0, 0) = 1.0;
+          interpolation_matrix (1, 0) = 0.5;
+          interpolation_matrix (1, 1) = 0.5;
+
+          for (unsigned int dof = 2; dof < this->dofs_per_face;)
+            {
+              interpolation_matrix (1, dof) = -1.0;
+              dof = dof + 2;
+            }
+
+          int factorial_i = 1;
+          int factorial_ij;
+          int factorial_j;
+
+          for (int i = 2; i < (int) this->dofs_per_face; ++i)
+            {
+              interpolation_matrix (i, i) = std::pow (0.5, i);
+              factorial_i *= i;
+              factorial_j = factorial_i;
+              factorial_ij = 1;
+
+              for (int j = i + 1; j < (int) this->dofs_per_face; ++j)
+                {
+                  factorial_ij *= j - i;
+                  factorial_j *= j;
+
+                  if ((i + j) & 1)
+                    interpolation_matrix (i, j)
+                      = -1.0 * std::pow (0.5, j) *
+                        factorial_j / (factorial_i * factorial_ij);
+
+                  else
+                    interpolation_matrix (i, j)
+                      = std::pow (0.5, j) *
+                        factorial_j / (factorial_i * factorial_ij);
+                }
+            }
+
+          break;
+        }
+
+        case 1:
+        {
+          interpolation_matrix (0, 0) = 0.5;
+          interpolation_matrix (0, 1) = 0.5;
+
+          for (unsigned int dof = 2; dof < this->dofs_per_face;)
+            {
+              interpolation_matrix (0, dof) = -1.0;
+              dof = dof + 2;
+            }
+
+          interpolation_matrix (1, 1) = 1.0;
+
+          int factorial_i = 1;
+          int factorial_ij;
+          int factorial_j;
+
+          for (int i = 2; i < (int) this->dofs_per_face; ++i)
+            {
+              interpolation_matrix (i, i) = std::pow (0.5, i);
+              factorial_i *= i;
+              factorial_j = factorial_i;
+              factorial_ij = 1;
+
+              for (int j = i + 1; j < (int) this->dofs_per_face; ++j)
+                {
+                  factorial_ij *= j - i;
+                  factorial_j *= j;
+                  interpolation_matrix (i, j)
+                    = std::pow (0.5, j) * factorial_j / (factorial_i * factorial_ij);
+                }
+            }
+        }
+        }
+
+      break;
+    }
+
+    case 3:
+    {
+      switch (subface)
+        {
+        case 0:
+        {
+          interpolation_matrix (0, 0) = 1.0;
+          interpolation_matrix (1, 0) = 0.5;
+          interpolation_matrix (1, 1) = 0.5;
+          interpolation_matrix (2, 0) = 0.5;
+          interpolation_matrix (2, 2) = 0.5;
+
+          for (unsigned int i = 0; i < this->degree - 1;)
+            {
+              for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_face; ++line)
+                interpolation_matrix (3, i + line * (this->degree - 1) + 4) = -0.5;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (3, i + (j + 4) * this->degree - j) = 1.0;
+                  j = j + 2;
+                }
+
+              interpolation_matrix (1, i + 2 * (this->degree + 1)) = -1.0;
+              interpolation_matrix (2, i + 4) = -1.0;
+              i = i + 2;
+            }
+
+          for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_face; ++vertex)
+            interpolation_matrix (3, vertex) = 0.25;
+
+          int factorial_i = 1;
+          int factorial_ij;
+          int factorial_j;
+          int factorial_k;
+          int factorial_kl;
+          int factorial_l;
+
+          for (int i = 2; i <= (int) this->degree; ++i)
+            {
+              double tmp = std::pow (0.5, i);
+              interpolation_matrix (i + 2, i + 2) = tmp;
+              interpolation_matrix (i + 2 * source_fe.degree, i + 2 * this->degree) = tmp;
+              tmp *= 0.5;
+              interpolation_matrix (i + source_fe.degree + 1, i + 2) = tmp;
+              interpolation_matrix (i + source_fe.degree + 1, i + this->degree + 1) = tmp;
+              interpolation_matrix (i + 3 * source_fe.degree - 1, i + 2 * this->degree) = tmp;
+              interpolation_matrix (i + 3 * source_fe.degree - 1, i + 3 * this->degree - 1) = tmp;
+              tmp *= -2.0;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (i + source_fe.degree + 1, (i + 2) * this->degree + j + 2 - i) = tmp;
+                  interpolation_matrix (i + 3 * source_fe.degree - 1, i + (j + 4) * this->degree - j - 2) = tmp;
+                  j = j + 2;
+                }
+
+              factorial_k = 1;
+
+              for (int j = 2; j <= (int) this->degree; ++j)
+                {
+                  interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (j + 2) * this->degree - j) = std::pow (0.5, i + j);
+                  factorial_k *= j;
+                  factorial_kl = 1;
+                  factorial_l = factorial_k;
+
+                  for (int k = j + 1; k < (int) this->degree; ++k)
+                    {
+                      factorial_kl *= k - j;
+                      factorial_l *= k;
+
+                      if ((j + k) & 1)
+                        interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (k + 2) * this->degree - k) = -1.0 * std::pow (0.5, i + k) * factorial_l / (factorial_k * factorial_kl);
+
+                      else
+                        interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (k + 2) * this->degree - k) = std::pow (0.5, i + k) * factorial_l / (factorial_k * factorial_kl);
+                    }
+                }
+
+              factorial_i *= i;
+              factorial_j = factorial_i;
+              factorial_ij = 1;
+
+              for (int j = i + 1; j <= (int) this->degree; ++j)
+                {
+                  factorial_ij *= j - i;
+                  factorial_j *= j;
+
+                  if ((i + j) & 1)
+                    {
+                      tmp = -1.0 * std::pow (0.5, j) * factorial_j / (factorial_i * factorial_ij);
+                      interpolation_matrix (i + 2, j + 2) = tmp;
+                      interpolation_matrix (i + 2 * source_fe.degree, j + 2 * this->degree) = tmp;
+                      factorial_k = 1;
+
+                      for (int k = 2; k <= (int) this->degree; ++k)
+                        {
+                          interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (k + 2) * this->degree - k) = tmp * std::pow (0.5, k);
+                          factorial_k *= k;
+                          factorial_l = factorial_k;
+                          factorial_kl = 1;
+
+                          for (int l = k + 1; l <= (int) this->degree; ++l)
+                            {
+                              factorial_kl *= l - k;
+                              factorial_l *= l;
+
+                              if ((k + l) & 1)
+                                interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = -1.0 * tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+
+                              else
+                                interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+                            }
+                        }
+
+                      tmp *= 0.5;
+                      interpolation_matrix (i + source_fe.degree + 1, j + 2) = tmp;
+                      interpolation_matrix (i + source_fe.degree + 1, j + this->degree + 1) = tmp;
+                      interpolation_matrix (i + 3 * source_fe.degree - 1, j + 2 * this->degree) = tmp;
+                      interpolation_matrix (i + 3 * source_fe.degree - 1, j + 3 * this->degree - 1) = tmp;
+                      tmp *= -2.0;
+
+                      for (unsigned int k = 0; k < this->degree - 1;)
+                        {
+                          interpolation_matrix (i + source_fe.degree + 1, (j + 2) * this->degree + k + 2 - j) = tmp;
+                          interpolation_matrix (i + 3 * source_fe.degree - 1, j + (k + 4) * this->degree - k - 2) = tmp;
+                          k = k + 2;
+                        }
+                    }
+                  else
+                    {
+                      tmp = std::pow (0.5, j) * factorial_j / (factorial_i * factorial_ij);
+                      interpolation_matrix (i + 2, j + 2) = tmp;
+                      interpolation_matrix (i + 2 * source_fe.degree, j + 2 * this->degree) = tmp;
+                      factorial_k = 1;
+
+                      for (int k = 2; k <= (int) this->degree; ++k)
+                        {
+                          interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (k + 2) * this->degree - k) = tmp * std::pow (0.5, k);
+                          factorial_k *= k;
+                          factorial_l = factorial_k;
+                          factorial_kl = 1;
+
+                          for (int l = k + 1; l <= (int) this->degree; ++l)
+                            {
+                              factorial_kl *= l - k;
+                              factorial_l *= l;
+
+                              if ((k + l) & 1)
+                                interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = -1.0 * tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+
+                              else
+                                interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+                            }
+                        }
+
+                      tmp *= 0.5;
+                      interpolation_matrix (i + source_fe.degree + 1, j + 2) = tmp;
+                      interpolation_matrix (i + source_fe.degree + 1, j + this->degree + 1) = tmp;
+                      interpolation_matrix (i + 3 * source_fe.degree - 1, j + 2 * this->degree) = tmp;
+                      interpolation_matrix (i + 3 * source_fe.degree - 1, j + 3 * this->degree - 1) = tmp;
+                      tmp *= -2.0;
+
+                      for (unsigned int k = 0; k < this->degree - 1;)
+                        {
+                          interpolation_matrix (i + source_fe.degree + 1, (j + 2) * this->degree + k + 2 - j) = tmp;
+                          interpolation_matrix (i + 3 * source_fe.degree - 1, j + (k + 4) * this->degree - k - 2) = tmp;
+                          k = k + 2;
+                        }
+                    }
+                }
+            }
+
+          break;
+        }
+
+        case 1:
+        {
+          interpolation_matrix (0, 0) = 0.5;
+          interpolation_matrix (0, 1) = 0.5;
+          interpolation_matrix (1, 1) = 1.0;
+          interpolation_matrix (3, 1) = 0.5;
+          interpolation_matrix (3, 3) = 0.5;
+
+          for (unsigned int i = 0; i < this->degree - 1;)
+            {
+              for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_face; ++line)
+                interpolation_matrix (2, i + line * (this->degree - 1) + 4) = -0.5;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (2, i + (j + 4) * this->degree - j) = 1.0;
+                  j = j + 2;
+                }
+
+              interpolation_matrix (0, i + 2 * (this->degree + 1)) = -1.0;
+              interpolation_matrix (3, i + this->degree + 3) = -1.0;
+              i = i + 2;
+            }
+
+          for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_face; ++vertex)
+            interpolation_matrix (2, vertex) = 0.25;
+
+          int factorial_i = 1;
+          int factorial_ij;
+          int factorial_j;
+          int factorial_k;
+          int factorial_kl;
+          int factorial_l;
+
+          for (int i = 2; i <= (int) this->degree; ++i)
+            {
+              double tmp = std::pow (0.5, i + 1);
+              interpolation_matrix (i + 2, i + 2) = tmp;
+              interpolation_matrix (i + 2, i + this->degree + 1) = tmp;
+              interpolation_matrix (i + 3 * source_fe.degree - 1, i + 2 * this->degree) = tmp;
+              interpolation_matrix (i + 3 * source_fe.degree - 1, i + 3 * this->degree - 1) = tmp;
+              tmp *= -2.0;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (i + 2, j + (i + 2) * this->degree + 2 - i) = tmp;
+                  interpolation_matrix (i + 3 * source_fe.degree - 1, i + (j + 4) * this->degree - j - 2) = tmp;
+                  j = j + 2;
+                }
+
+              tmp *= - 1.0;
+              interpolation_matrix (i + source_fe.degree + 1, i + this->degree + 1) = tmp;
+              interpolation_matrix (i + 2 * source_fe.degree, i + 2 * this->degree) = tmp;
+              factorial_i *= i;
+              factorial_j = factorial_i;
+              factorial_ij = 1;
+
+              for (int j = i + 1; j <= (int) this->degree; ++j)
+                {
+                  factorial_ij *= j - i;
+                  factorial_j *= j;
+                  tmp = std::pow (0.5, j) * factorial_j / (factorial_i * factorial_ij);
+                  interpolation_matrix (i + 2 * source_fe.degree, j + 2 * this->degree) = tmp;
+                  factorial_k = 1;
+
+                  for (int k = 2; k <= (int) this->degree; ++k)
+                    {
+                      interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (k + 2) * this->degree - k) = tmp * std::pow (0.5, k);
+                      factorial_k *= k;
+                      factorial_l = factorial_k;
+                      factorial_kl = 1;
+
+                      for (int l = k + 1; l <= (int) this->degree; ++l)
+                        {
+                          factorial_kl *= l - k;
+                          factorial_l *= l;
+
+                          if ((k + l) & 1)
+                            interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = -1.0 * tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+
+                          else
+                            interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+                        }
+                    }
+
+                  tmp *= -1.0;
+
+                  for (unsigned int k = 0; k < this->degree - 1;)
+                    {
+                      interpolation_matrix (i + 3 * source_fe.degree - 1, j + (k + 4) * this->degree - k - 2) = tmp;
+                      k = k + 2;
+                    }
+
+                  tmp *= -0.5;
+                  interpolation_matrix (i + 3 * source_fe.degree - 1, j + 2 * this->degree) = tmp;
+                  interpolation_matrix (i + 3 * source_fe.degree - 1, j + 3 * this->degree - 1) = tmp;
+
+                  if ((i + j) & 1)
+                    tmp *= -1.0;
+
+                  interpolation_matrix (i + 2, j + 2) = tmp;
+                  interpolation_matrix (i + 2, j + this->degree + 1) = tmp;
+                  interpolation_matrix (i + source_fe.degree + 1, j + this->degree + 1) = 2.0 * tmp;
+                  tmp *= -2.0;
+
+                  for (unsigned int k = 0; k < this->degree - 1;)
+                    {
+                      interpolation_matrix (i + 2, k + (j + 2) * this->degree + 2 - j) = tmp;
+                      k = k + 2;
+                    }
+                }
+
+              factorial_k = 1;
+
+              for (int j = 2; j <= (int) this->degree; ++j)
+                {
+                  interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (j + 2) * this->degree - j) = std::pow (0.5, i + j);
+                  factorial_k *= j;
+                  factorial_l = factorial_k;
+                  factorial_kl = 1;
+
+                  for (int k = j + 1; k <= (int) this->degree; ++k)
+                    {
+                      factorial_kl *= k - j;
+                      factorial_l *= k;
+
+                      if ((j + k) & 1)
+                        interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (k + 2) * this->degree - k) = -1.0 * std::pow (0.5, i + k) * factorial_l / (factorial_k * factorial_kl);
+
+                      else
+                        interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (k + 2) * this->degree - k) = std::pow (0.5, i + k) * factorial_l / (factorial_k * factorial_kl);
+                    }
+                }
+            }
+
+          break;
+        }
+
+        case 2:
+        {
+          interpolation_matrix (0, 0) = 0.5;
+          interpolation_matrix (0, 2) = 0.5;
+          interpolation_matrix (2, 2) = 1.0;
+          interpolation_matrix (3, 2) = 0.5;
+          interpolation_matrix (3, 3) = 0.5;
+
+          for (unsigned int i = 0; i < this->degree - 1;)
+            {
+              for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_face; ++line)
+                interpolation_matrix (1, i + line * (this->degree - 1) + 4) = -0.5;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (1, i + (j + 4) * this->degree - j) = 1.0;
+                  j = j + 2;
+                }
+
+              interpolation_matrix (0, i + 4) = -1.0;
+              interpolation_matrix (3, i + 3 * this->degree + 1) = -1.0;
+              i = i + 2;
+            }
+
+          for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_face; ++vertex)
+            interpolation_matrix (1, vertex) = 0.25;
+
+          int factorial_i = 1;
+          int factorial_ij;
+          int factorial_j;
+          int factorial_k;
+          int factorial_kl;
+          int factorial_l;
+
+          for (int i = 2; i <= (int) this->degree; ++i)
+            {
+              double tmp = std::pow (0.5, i);
+              interpolation_matrix (i + 2, i + 2) = tmp;
+              interpolation_matrix (i + 3 * source_fe.degree - 1, i + 3 * this->degree - 1) = tmp;
+              tmp *= 0.5;
+              interpolation_matrix (i + source_fe.degree + 1, i + 2) = tmp;
+              interpolation_matrix (i + source_fe.degree + 1, i + this->degree + 1) = tmp;
+              interpolation_matrix (i + 2 * source_fe.degree, i + 2 * this->degree) = tmp;
+              interpolation_matrix (i + 2 * source_fe.degree, i + 3 * this->degree - 1) = tmp;
+              tmp *= -2.0;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (i + source_fe.degree + 1, j + (i + 2) * this->degree + 2 - i) = tmp;
+                  interpolation_matrix (i + 2 * source_fe.degree, i + (j + 4) * this->degree - j - 2) = tmp;
+                  j = j + 2;
+                }
+
+              factorial_k = 1;
+
+              for (int j = 2; j <= (int) this->degree; ++j)
+                {
+                  interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (j + 2) * this->degree - j) = std::pow (0.5, i + j);
+                  factorial_k *= j;
+                  factorial_l = factorial_k;
+                  factorial_kl = 1;
+
+                  for (int k = j + 1; k <= (int) this->degree; ++k)
+                    {
+                      factorial_kl *= k - j;
+                      factorial_l *= k;
+                      interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (k + 2) * this->degree - k) = std::pow (0.5, i + k) * factorial_l / (factorial_k * factorial_kl);
+                    }
+                }
+
+              factorial_i *= i;
+              factorial_j = factorial_i;
+              factorial_ij = 1;
+
+              for (int j = i + 1; j <= (int) this->degree; ++j)
+                {
+                  factorial_ij *= j - i;
+                  factorial_j *= j;
+                  tmp = std::pow (0.5, j) * factorial_j / (factorial_i * factorial_ij);
+                  interpolation_matrix (i + 2, j + 2) = tmp;
+                  tmp *= -1.0;
+
+                  for (unsigned int k = 0; k < this->degree - 1;)
+                    {
+                      interpolation_matrix (i + source_fe.degree + 1, k + (j + 2) * this->degree + 2 - j) = tmp;
+                      k = k + 2;
+                    }
+
+                  tmp *= -0.5;
+                  interpolation_matrix (i + source_fe.degree + 1, j + 2) = tmp;
+                  interpolation_matrix (i + source_fe.degree + 1, j + this->degree + 1) = tmp;
+
+                  if ((i + j) & 1)
+                    tmp *= -1.0;
+
+                  interpolation_matrix (i + 2 * source_fe.degree, j + 2 * this->degree) = tmp;
+                  interpolation_matrix (i + 2 * source_fe.degree, j + 3 * this->degree - 1) = tmp;
+                  tmp *= 2.0;
+                  interpolation_matrix (i + 3 * source_fe.degree - 1, j + 3 * this->degree - 1) = tmp;
+                  factorial_k = 1;
+
+                  for (int k = 2; k <= (int) this->degree; ++k)
+                    {
+                      interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (k + 2) * this->degree - k) = tmp * std::pow (0.5, k);
+                      factorial_k *= k;
+                      factorial_l = factorial_k;
+                      factorial_kl = 1;
+
+                      for (int l = k + 1; l <= (int) this->degree; ++l)
+                        {
+                          factorial_kl *= l - k;
+                          factorial_l *= l;
+                          interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+                        }
+                    }
+
+                  tmp *= -1.0;
+
+                  for (unsigned int k = 0; k < this->degree - 1;)
+                    {
+                      interpolation_matrix (i + 2 * source_fe.degree, j + (k + 4) * this->degree - k - 2) = tmp;
+                      k = k + 2;
+                    }
+                }
+            }
+
+          break;
+        }
+
+        case 3:
+        {
+          for (unsigned int vertex = 0; vertex < GeometryInfo<3>::vertices_per_face; ++vertex)
+            interpolation_matrix (0, vertex) = 0.25;
+
+          for (unsigned int i = 0; i < this->degree - 1;)
+            {
+              for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_face; ++line)
+                interpolation_matrix (0, i + line * (this->degree - 1) + 4) = -0.5;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (0, i + (j + 4) * this->degree - j) = 1.0;
+                  j = j + 2;
+                }
+
+              interpolation_matrix (1, i + 4) = -1.0;
+              interpolation_matrix (2, i + 3 * this->degree + 1) = -1.0;
+              i = i + 2;
+            }
+
+          interpolation_matrix (1, 0) = 0.5;
+          interpolation_matrix (1, 1) = 0.5;
+          interpolation_matrix (2, 2) = 0.5;
+          interpolation_matrix (2, 3) = 0.5;
+          interpolation_matrix (3, 3) = 1.0;
+
+          int factorial_i = 1;
+          int factorial_ij;
+          int factorial_j;
+          int factorial_k;
+          int factorial_kl;
+          int factorial_l;
+
+          for (int i = 2; i <= (int) this->degree; ++i)
+            {
+              double tmp = std::pow (0.5, i + 1);
+              interpolation_matrix (i + 2, i + 2) = tmp;
+              interpolation_matrix (i + 2, i + this->degree + 1) = tmp;
+              interpolation_matrix (i + 2 * source_fe.degree, i + 2 * this->degree) = tmp;
+              interpolation_matrix (i + 2 * source_fe.degree, i + 3 * this->degree - 1) = tmp;
+              tmp *= -2.0;
+
+              for (unsigned int j = 0; j < this->degree - 1;)
+                {
+                  interpolation_matrix (i + 2, j + (i + 2) * this->degree + 2 - i) = tmp;
+                  interpolation_matrix (i + 2 * source_fe.degree, i + (j + 4) * this->degree - 2) = tmp;
+                  j = j + 2;
+                }
+
+              tmp *= -1.0;
+              interpolation_matrix (i + source_fe.degree + 1, i + this->degree + 1) = tmp;
+              interpolation_matrix (i + 3 * source_fe.degree - 1, i + 3 * this->degree - 1) = tmp;
+              factorial_k = 1;
+
+              for (int j = 2; j <= (int) this->degree; ++j)
+                {
+                  interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (j + 2) * this->degree - j) = std::pow (0.5, i + j);
+                  factorial_k *= j;
+                  factorial_l = factorial_k;
+                  factorial_kl = 1;
+
+                  for (int k = j + 1; k <= (int) this->degree; ++k)
+                    {
+                      factorial_kl *= k - j;
+                      factorial_l *= k;
+                      interpolation_matrix (i + (j + 2) * source_fe.degree - j, i + (k + 2) * this->degree - k) = std::pow (0.5, i + k) * factorial_l / (factorial_k * factorial_kl);
+                    }
+                }
+
+              factorial_i *= i;
+              factorial_j = factorial_i;
+              factorial_ij = 1;
+
+              for (int j = i + 1; j <= (int) this->degree; ++j)
+                {
+                  factorial_ij *= j - i;
+                  factorial_j *= j;
+                  tmp = std::pow (0.5, j + 1) * factorial_j / (factorial_i * factorial_ij);
+                  interpolation_matrix (i + 2, j + 2) = tmp;
+                  interpolation_matrix (i + 2, j + this->degree + 1) = tmp;
+                  interpolation_matrix (i + 2 * source_fe.degree, j + 2 * this->degree) = tmp;
+                  interpolation_matrix (i + 2 * source_fe.degree, j + 3 * this->degree - 1) = tmp;
+                  tmp *= 2.0;
+                  interpolation_matrix (i + source_fe.degree + 1, j + this->degree + 1) = tmp;
+                  interpolation_matrix (i + 3 * source_fe.degree - 1, j + 3 * this->degree - 1) = tmp;
+                  factorial_k = 1;
+
+                  for (int k = 2; k <= (int) this->degree; ++k)
+                    {
+                      interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (k + 2) * this->degree - k) = tmp * std::pow (0.5, k);
+                      factorial_k *= k;
+                      factorial_l = factorial_k;
+                      factorial_kl = 1;
+
+                      for (int l = k + 1; l <= (int) this->degree; ++l)
+                        {
+                          factorial_kl *= l - k;
+                          factorial_l *= l;
+                          interpolation_matrix (i + (k + 2) * source_fe.degree - k, j + (l + 2) * this->degree - l) = tmp * std::pow (0.5, l) * factorial_l / (factorial_k * factorial_kl);
+                        }
+                    }
+
+                  tmp *= -1.0;
+
+                  for (unsigned int k = 0; k < this->degree - 1;)
+                    {
+                      interpolation_matrix (i + 2, k + (j + 2) * this->degree + 2 - j) = tmp;
+                      interpolation_matrix (i + 2 * source_fe.degree, j + (k + 4) * this->degree - 2) = tmp;
+                      k = k + 2;
+                    }
+                }
+            }
+        }
+        }
+    }
+    }
+}
+
+
+
+template <int dim>
+void FE_Q_Hierarchical<dim>::initialize_unit_face_support_points ()
+{
+  const unsigned int codim = dim-1;
+
+  // number of points: (degree+1)^codim
+  unsigned int n = this->degree+1;
+  for (unsigned int i=1; i<codim; ++i)
+    n *= this->degree+1;
+
+  this->unit_face_support_points.resize(n);
+
+  Point<codim> p;
+
+  unsigned int k=0;
+  for (unsigned int iz=0; iz <= ((codim>2) ? this->degree : 0) ; ++iz)
+    for (unsigned int iy=0; iy <= ((codim>1) ? this->degree : 0) ; ++iy)
+      for (unsigned int ix=0; ix<=this->degree; ++ix)
+        {
+          if (ix==0)
+            p(0) =  0.;
+          else if (ix==1)
+            p(0) =  1.;
+          else
+            p(0) = .5;
+          if (codim>1)
+            {
+              if (iy==0)
+                p(1) =  0.;
+              else if (iy==1)
+                p(1) =  1.;
+              else
+                p(1) = .5;
+            }
+          if (codim>2)
+            {
+              if (iz==0)
+                p(2) =  0.;
+              else if (iz==1)
+                p(2) =  1.;
+              else
+                p(2) = .5;
+            }
+          this->unit_face_support_points[face_renumber[k++]] = p;
+        };
+}
+
+
+// we use same dpo_vector as FE_Q
+template <int dim>
+std::vector<unsigned int>
+FE_Q_Hierarchical<dim>::get_dpo_vector(const unsigned int deg)
+{
+  std::vector<unsigned int> dpo(dim+1, 1U);
+  for (unsigned int i=1; i<dpo.size(); ++i)
+    dpo[i]=dpo[i-1]*(deg-1);
+  return dpo;
+}
+
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_Q_Hierarchical<dim>::
+hierarchic_to_fe_q_hierarchical_numbering (const FiniteElementData<dim> &fe)
+{
+  Assert (fe.n_components() == 1, ExcInternalError());
+  std::vector<unsigned int> h2l(fe.dofs_per_cell);
+
+  // polynomial degree
+  const unsigned int degree = fe.dofs_per_line+1;
+  // number of grid points in each
+  // direction
+  const unsigned int n = degree+1;
+
+  // the following lines of code are
+  // somewhat odd, due to the way the
+  // hierarchic numbering is
+  // organized. if someone would
+  // really want to understand these
+  // lines, you better draw some
+  // pictures where you indicate the
+  // indices and orders of vertices,
+  // lines, etc, along with the
+  // numbers of the degrees of
+  // freedom in hierarchical and
+  // lexicographical order
+  switch (dim)
+    {
+    case 1:
+    {
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        h2l[i] = i;
+
+      break;
+    }
+
+    case 2:
+    {
+      // Example: degree=3
+      //
+      // hierarchical numbering:
+      //  2 10 11  3
+      //  5 14 15  7
+      //  4 12 13  6
+      //  0  8  9  1
+      //
+      // fe_q_hierarchical numbering:
+      //  4  6  7  5
+      // 12 14 15 13
+      //  8 10 11  9
+      //  0  2  3  1
+      unsigned int next_index = 0;
+      // first the four vertices
+      h2l[next_index++] = 0;
+      h2l[next_index++] = 1;
+      h2l[next_index++] = n;
+      h2l[next_index++] = n+1;
+      // left line
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n;
+      // right line
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n+1;
+      // bottom line
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = 2+i;
+      // top line
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = n+2+i;
+      // inside quad
+      Assert (fe.dofs_per_quad == fe.dofs_per_line*fe.dofs_per_line,
+              ExcInternalError());
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          h2l[next_index++] = (2+i)*n+2+j;
+
+      Assert (next_index == fe.dofs_per_cell, ExcInternalError());
+
+      break;
+    }
+
+    case 3:
+    {
+      unsigned int next_index = 0;
+      const unsigned int n2=n*n;
+      // first the eight vertices
+      // bottom face, lexicographic
+      h2l[next_index++] = 0;
+      h2l[next_index++] = 1;
+      h2l[next_index++] = n;
+      h2l[next_index++] = n+1;
+      // top face, lexicographic
+      h2l[next_index++] = n2;
+      h2l[next_index++] = n2+1;
+      h2l[next_index++] = n2+n;
+      h2l[next_index++] = n2+n+1;
+
+      // now the lines
+      // bottom face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n+1;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = 2+i;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = n+2+i;
+      // top face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = n2+(2+i)*n;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = n2+(2+i)*n+1;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = n2+2+i;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = n2+n+2+i;
+      // lines in z-direction
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n2;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n2+1;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n2+n;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        h2l[next_index++] = (2+i)*n2+n+1;
+
+      // inside quads
+      Assert (fe.dofs_per_quad == fe.dofs_per_line*fe.dofs_per_line,
+              ExcInternalError());
+      // left face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          h2l[next_index++] = (2+i)*n2+(2+j)*n;
+      // right face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          h2l[next_index++] = (2+i)*n2+(2+j)*n+1;
+      // front face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          h2l[next_index++] = (2+i)*n2+2+j;
+      // back face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          h2l[next_index++] = (2+i)*n2+n+2+j;
+      // bottom face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          h2l[next_index++] = (2+i)*n+2+j;
+      // top face
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          h2l[next_index++] = n2+(2+i)*n+2+j;
+
+      // inside hex
+      Assert (fe.dofs_per_hex == fe.dofs_per_quad*fe.dofs_per_line,
+              ExcInternalError());
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+          for (unsigned int k=0; k<fe.dofs_per_line; ++k)
+            h2l[next_index++] = (2+i)*n2+(2+j)*n+2+k;
+
+      Assert (next_index == fe.dofs_per_cell, ExcInternalError());
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return h2l;
+}
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_Q_Hierarchical<dim>::
+face_fe_q_hierarchical_to_hierarchic_numbering (const unsigned int degree)
+{
+  FiniteElementData<dim-1> fe_data(FE_Q_Hierarchical<dim-1>::get_dpo_vector(degree),1,degree);
+  return invert_numbering(FE_Q_Hierarchical<dim-1>::
+                          hierarchic_to_fe_q_hierarchical_numbering (fe_data));
+}
+
+
+
+template <>
+std::vector<unsigned int>
+FE_Q_Hierarchical<1>::face_fe_q_hierarchical_to_hierarchic_numbering (const unsigned int)
+{
+  return std::vector<unsigned int> ();
+}
+
+
+template <>
+bool
+FE_Q_Hierarchical<1>::has_support_on_face (const unsigned int shape_index,
+                                           const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<1>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<1>::faces_per_cell));
+
+
+  // in 1d, things are simple. since
+  // there is only one degree of
+  // freedom per vertex in this
+  // class, the first is on vertex 0
+  // (==face 0 in some sense), the
+  // second on face 1:
+  return (((shape_index == 0) && (face_index == 0)) ||
+          ((shape_index == 1) && (face_index == 1)));
+}
+
+
+
+
+template <int dim>
+bool
+FE_Q_Hierarchical<dim>::has_support_on_face (const unsigned int shape_index,
+                                             const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  // first, special-case interior
+  // shape functions, since they
+  // have no support no-where on
+  // the boundary
+  if (((dim==2) && (shape_index>=this->first_quad_index))
+      ||
+      ((dim==3) && (shape_index>=this->first_hex_index)))
+    return false;
+
+  // let's see whether this is a
+  // vertex
+  if (shape_index < this->first_line_index)
+    {
+      // for Q elements, there is
+      // one dof per vertex, so
+      // shape_index==vertex_number. check
+      // whether this vertex is
+      // on the given face.
+      const unsigned int vertex_no = shape_index;
+      Assert (vertex_no < GeometryInfo<dim>::vertices_per_cell,
+              ExcInternalError());
+      for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_face; ++i)
+        if (GeometryInfo<dim>::face_to_cell_vertices(face_index,i) == vertex_no)
+          return true;
+      return false;
+    }
+  else if (shape_index < this->first_quad_index)
+    // ok, dof is on a line
+    {
+      const unsigned int line_index
+        = (shape_index - this->first_line_index) / this->dofs_per_line;
+      Assert (line_index < GeometryInfo<dim>::lines_per_cell,
+              ExcInternalError());
+
+      for (unsigned int i=0; i<GeometryInfo<dim>::lines_per_face; ++i)
+        if (GeometryInfo<dim>::face_to_cell_lines(face_index,i) == line_index)
+          return true;
+      return false;
+    }
+  else if (shape_index < this->first_hex_index)
+    // dof is on a quad
+    {
+      const unsigned int quad_index
+        = (shape_index - this->first_quad_index) / this->dofs_per_quad;
+      Assert (static_cast<signed int>(quad_index) <
+              static_cast<signed int>(GeometryInfo<dim>::quads_per_cell),
+              ExcInternalError());
+
+      // in 2d, cell bubble are
+      // zero on all faces. but
+      // we have treated this
+      // case above already
+      Assert (dim != 2, ExcInternalError());
+
+      // in 3d,
+      // quad_index=face_index
+      if (dim == 3)
+        return (quad_index == face_index);
+      else
+        Assert (false, ExcNotImplemented());
+    }
+  else
+    // dof on hex
+    {
+      // can only happen in 3d, but
+      // this case has already been
+      // covered above
+      Assert (false, ExcNotImplemented());
+      return false;
+    }
+
+  // we should not have gotten here
+  Assert (false, ExcInternalError());
+  return false;
+}
+
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_Q_Hierarchical<dim>::get_embedding_dofs (const unsigned int sub_degree) const
+{
+  Assert ((sub_degree>0) && (sub_degree<=this->degree),
+          ExcIndexRange(sub_degree, 1, this->degree));
+
+  if (dim==1)
+    {
+      std::vector<unsigned int> embedding_dofs (sub_degree+1);
+      for (unsigned int i=0; i<(sub_degree+1); ++i)
+        embedding_dofs[i] = i;
+
+      return embedding_dofs;
+    }
+
+  if (sub_degree==1)
+    {
+      std::vector<unsigned int> embedding_dofs (GeometryInfo<dim>::vertices_per_cell);
+      for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+        embedding_dofs[i] = i;
+
+      return embedding_dofs;
+    }
+  else if (sub_degree==this->degree)
+    {
+      std::vector<unsigned int> embedding_dofs (this->dofs_per_cell);
+      for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+        embedding_dofs[i] = i;
+
+      return embedding_dofs;
+    }
+
+  if ((dim==2) || (dim==3))
+    {
+      std::vector<unsigned int> embedding_dofs ( (dim==2) ?
+                                                 (sub_degree+1) * (sub_degree+1) :
+                                                 (sub_degree+1) * (sub_degree+1) * (sub_degree+1) );
+
+      for (unsigned int i=0; i<( (dim==2) ?
+                                 (sub_degree+1) * (sub_degree+1) :
+                                 (sub_degree+1) * (sub_degree+1) * (sub_degree+1) ); ++i)
+        {
+          // vertex
+          if (i<GeometryInfo<dim>::vertices_per_cell)
+            embedding_dofs[i] = i;
+          // line
+          else if (i<(GeometryInfo<dim>::vertices_per_cell +
+                      GeometryInfo<dim>::lines_per_cell * (sub_degree-1)))
+            {
+              const unsigned int j = (i - GeometryInfo<dim>::vertices_per_cell) %
+                                     (sub_degree-1);
+              const unsigned int line = (i - GeometryInfo<dim>::vertices_per_cell - j) / (sub_degree-1);
+
+              embedding_dofs[i] = GeometryInfo<dim>::vertices_per_cell +
+                                  line * (this->degree-1) + j;
+            }
+          // quad
+          else if (i<(GeometryInfo<dim>::vertices_per_cell +
+                      GeometryInfo<dim>::lines_per_cell * (sub_degree-1)) +
+                   GeometryInfo<dim>::quads_per_cell * (sub_degree-1) * (sub_degree-1))
+            {
+              const unsigned int j = (i - GeometryInfo<dim>::vertices_per_cell -
+                                      GeometryInfo<dim>::lines_per_cell * (sub_degree-1)) % (sub_degree-1);
+              const unsigned int k = ( (i - GeometryInfo<dim>::vertices_per_cell -
+                                        GeometryInfo<dim>::lines_per_cell * (sub_degree-1) - j) / (sub_degree-1) ) % (sub_degree-1);
+              const unsigned int face = (i - GeometryInfo<dim>::vertices_per_cell -
+                                         GeometryInfo<dim>::lines_per_cell * (sub_degree-1) - k * (sub_degree-1) - j) / ( (sub_degree-1) * (sub_degree-1) );
+
+              embedding_dofs[i] = GeometryInfo<dim>::vertices_per_cell +
+                                  GeometryInfo<dim>::lines_per_cell * (this->degree-1) +
+                                  face * (this->degree-1) * (this->degree-1) +
+                                  k * (this->degree-1) + j;
+            }
+          // hex
+          else if (i<(GeometryInfo<dim>::vertices_per_cell +
+                      GeometryInfo<dim>::lines_per_cell * (sub_degree-1)) +
+                   GeometryInfo<dim>::quads_per_cell * (sub_degree-1) * (sub_degree-1) +
+                   GeometryInfo<dim>::hexes_per_cell * (sub_degree-1) * (sub_degree-1) * (sub_degree-1))
+            {
+              const unsigned int j = (i - GeometryInfo<dim>::vertices_per_cell -
+                                      GeometryInfo<dim>::lines_per_cell * (sub_degree-1) -
+                                      GeometryInfo<dim>::quads_per_cell * (sub_degree-1) * (sub_degree-1) ) % (sub_degree-1);
+              const unsigned int k = ( (i - GeometryInfo<dim>::vertices_per_cell -
+                                        GeometryInfo<dim>::lines_per_cell * (sub_degree-1) -
+                                        GeometryInfo<dim>::quads_per_cell * (sub_degree-1) * (sub_degree-1) - j) / (sub_degree-1) ) % (sub_degree-1);
+              const unsigned int l = (i - GeometryInfo<dim>::vertices_per_cell -
+                                      GeometryInfo<dim>::lines_per_cell * (sub_degree-1) -
+                                      GeometryInfo<dim>::quads_per_cell * (sub_degree-1) * (sub_degree-1) - j - k * (sub_degree-1)) / ( (sub_degree-1) * (sub_degree-1) );
+
+              embedding_dofs[i] = GeometryInfo<dim>::vertices_per_cell +
+                                  GeometryInfo<dim>::lines_per_cell * (this->degree-1) +
+                                  GeometryInfo<dim>::quads_per_cell * (this->degree-1) * (this->degree-1) +
+                                  l * (this->degree-1) * (this->degree-1) + k * (this->degree-1) + j;
+            }
+        }
+
+      return embedding_dofs;
+    }
+  else
+    {
+      Assert(false, ExcNotImplemented ());
+      return std::vector<unsigned int> ();
+    }
+}
+
+
+
+template <int dim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_Q_Hierarchical<dim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+    constant_modes(0,i) = true;
+  for (unsigned int i=GeometryInfo<dim>::vertices_per_cell; i<this->dofs_per_cell; ++i)
+    constant_modes(0,i) = false;
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1, 0));
+}
+
+
+
+template <int dim>
+std::size_t
+FE_Q_Hierarchical<dim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+
+// explicit instantiations
+#include "fe_q_hierarchical.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_q_hierarchical.inst.in b/source/fe/fe_q_hierarchical.inst.in
new file mode 100644
index 0000000..06f3529
--- /dev/null
+++ b/source/fe/fe_q_hierarchical.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_Q_Hierarchical<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_q_iso_q1.cc b/source/fe/fe_q_iso_q1.cc
new file mode 100644
index 0000000..35747b1
--- /dev/null
+++ b/source/fe/fe_q_iso_q1.cc
@@ -0,0 +1,123 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/fe/fe_q_iso_q1.h>
+#include <deal.II/fe/fe_nothing.h>
+
+#include <vector>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+
+
+template <int dim, int spacedim>
+FE_Q_iso_Q1<dim,spacedim>::FE_Q_iso_Q1 (const unsigned int subdivisions)
+  :
+  FE_Q_Base<TensorProductPolynomials<dim,Polynomials::PiecewisePolynomial<double> >, dim, spacedim> (
+    TensorProductPolynomials<dim,Polynomials::PiecewisePolynomial<double> >
+    (Polynomials::generate_complete_Lagrange_basis_on_subdivisions(subdivisions, 1)),
+    FiniteElementData<dim>(this->get_dpo_vector(subdivisions),
+                           1, subdivisions,
+                           FiniteElementData<dim>::H1),
+    std::vector<bool> (1, false))
+{
+  Assert (subdivisions > 0,
+          ExcMessage ("This element can only be used with a positive number of "
+                      "subelements"));
+
+  QTrapez<1> trapez;
+  QIterated<1> points (trapez, subdivisions);
+
+  this->initialize(points.get_points());
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_Q_iso_Q1<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in sync
+
+  std::ostringstream namebuf;
+  namebuf << "FE_Q_iso_Q1<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree << ")";
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_Q_iso_Q1<dim,spacedim>::clone() const
+{
+  return new FE_Q_iso_Q1<dim,spacedim>(*this);
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_Q_iso_Q1<dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  if (const FE_Q_iso_Q1<dim,spacedim> *fe_q_iso_q1_other
+      = dynamic_cast<const FE_Q_iso_Q1<dim,spacedim>*>(&fe_other))
+    {
+      // different behavior as in FE_Q: as FE_Q_iso_Q1(2) is not a subspace of
+      // FE_Q_iso_Q1(3), need that the element degrees are multiples of each
+      // other
+      if (this->degree < fe_q_iso_q1_other->degree &&
+          fe_q_iso_q1_other->degree % this->degree == 0)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_q_iso_q1_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else if (this->degree > fe_q_iso_q1_other->degree &&
+               this->degree % fe_q_iso_q1_other->degree == 0)
+        return FiniteElementDomination::other_element_dominates;
+      else
+        return FiniteElementDomination::neither_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+// explicit instantiations
+#include "fe_q_iso_q1.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_q_iso_q1.inst.in b/source/fe/fe_q_iso_q1.inst.in
new file mode 100644
index 0000000..7699e47
--- /dev/null
+++ b/source/fe/fe_q_iso_q1.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class FE_Q_iso_Q1<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
+
diff --git a/source/fe/fe_rannacher_turek.cc b/source/fe/fe_rannacher_turek.cc
new file mode 100644
index 0000000..17802be
--- /dev/null
+++ b/source/fe/fe_rannacher_turek.cc
@@ -0,0 +1,175 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/fe/fe_rannacher_turek.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/lac/vector.h>
+#include <algorithm>
+
+#include <sstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+FE_RannacherTurek<dim>::FE_RannacherTurek(const unsigned int degree,
+                                          const unsigned int n_face_support_points) :
+  FE_Poly<PolynomialsRannacherTurek<dim>, dim>(
+    PolynomialsRannacherTurek<dim>(),
+    FiniteElementData<dim>(this->get_dpo_vector(),
+                           1,
+                           2,
+                           FiniteElementData<dim>::L2),
+    std::vector<bool>(4, false), // restriction not implemented
+    std::vector<ComponentMask>(4, std::vector<bool>(1, true))),
+  degree(degree),
+  n_face_support_points(n_face_support_points)
+{
+  Assert(dim == 2, ExcNotImplemented());
+  Assert(degree == 0, ExcNotImplemented());
+  this->initialize_support_points();
+}
+
+
+
+template <int dim>
+std::vector<unsigned int> FE_RannacherTurek<dim>::get_dpo_vector()
+{
+  std::vector<unsigned int> dpo(dim + 1, 0);
+  dpo[dim - 1] = 1;
+
+  return dpo;
+}
+
+
+
+template <int dim>
+std::string FE_RannacherTurek<dim>::get_name() const
+{
+  std::ostringstream namebuf;
+  namebuf << "FE_RannacherTurek"
+          << "<" << dim << ">"
+          << "(" << this->degree << ", " << this->n_face_support_points << ")";
+  return namebuf.str();
+}
+
+
+
+template <int dim>
+FiniteElement<dim> *FE_RannacherTurek<dim>::clone() const
+{
+  return new FE_RannacherTurek<dim>(this->degree, this->n_face_support_points);
+}
+
+
+
+template <int dim>
+void FE_RannacherTurek<dim>::initialize_support_points()
+{
+  Assert(dim == 2, ExcNotImplemented());
+  dealii::QGauss<dim-1> face_quadrature(this->n_face_support_points);
+  this->weights = face_quadrature.get_weights();
+  this->generalized_support_points.resize(4*face_quadrature.size());
+  for (unsigned int q = 0;
+       q < face_quadrature.size();
+       ++q)
+    {
+      this->generalized_support_points[0*face_quadrature.size() + q] =
+        dealii::Point<dim>(0, 1 - face_quadrature.point(q)(0));
+      this->generalized_support_points[1*face_quadrature.size() + q] =
+        dealii::Point<dim>(1, 1 - face_quadrature.point(q)(0));
+      this->generalized_support_points[2*face_quadrature.size() + q] =
+        dealii::Point<dim>(face_quadrature.point(q)(0), 0);
+      this->generalized_support_points[3*face_quadrature.size() + q] =
+        dealii::Point<dim>(face_quadrature.point(q)(0), 1);
+    }
+}
+
+
+
+template <int dim>
+void FE_RannacherTurek<dim>::interpolate(
+  std::vector<double> &local_dofs,
+  const std::vector<double> &values) const
+{
+  AssertDimension(values.size(), this->generalized_support_points.size());
+  AssertDimension(local_dofs.size(), this->dofs_per_cell);
+
+  const unsigned int q_points_per_face = this->weights.size();
+  std::fill(local_dofs.begin(), local_dofs.end(), 0.0);
+
+  std::vector<double>::const_iterator value = values.begin();
+  for (unsigned int face = 0;
+       face < dealii::GeometryInfo<dim>::faces_per_cell;
+       ++face)
+    {
+      for (unsigned int q = 0;
+           q < q_points_per_face;
+           ++q)
+        {
+          local_dofs[face] += (*value) * this->weights[q];
+          ++value;
+        }
+    }
+}
+
+
+
+template <int dim>
+void FE_RannacherTurek<dim>::interpolate(
+  std::vector<double> &local_dofs,
+  const std::vector<Vector<double> > &values,
+  unsigned int offset) const
+{
+  AssertDimension(values.size(), this->generalized_support_points.size());
+  AssertDimension(local_dofs.size(), this->dofs_per_cell);
+
+  // extract component at offset and call scalar version of this function
+  std::vector<double> scalar_values(values.size());
+  for (unsigned int q = 0; q < values.size(); ++q)
+    {
+      scalar_values[q] = values[q][offset];
+    }
+  this->interpolate(local_dofs, scalar_values);
+}
+
+
+
+template <int dim>
+void FE_RannacherTurek<dim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  AssertDimension(values.size(), 1);
+  AssertDimension(values[0].size(), this->generalized_support_points.size());
+  AssertDimension(local_dofs.size(), this->dofs_per_cell);
+
+  // convert data structure to use scalar version of this function
+  std::vector<double> scalar_values(values[0].size());
+  for (unsigned int q = 0; q < values[0].size(); ++q)
+    {
+      scalar_values[q] = values[0][q];
+    }
+  this->interpolate(local_dofs, scalar_values);
+}
+
+
+
+// explicit instantiations
+#include "fe_rannacher_turek.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_rannacher_turek.inst.in b/source/fe/fe_rannacher_turek.inst.in
new file mode 100644
index 0000000..5c165e2
--- /dev/null
+++ b/source/fe/fe_rannacher_turek.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_RannacherTurek<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_raviart_thomas.cc b/source/fe/fe_raviart_thomas.cc
new file mode 100644
index 0000000..39b7b1e
--- /dev/null
+++ b/source/fe/fe_raviart_thomas.cc
@@ -0,0 +1,566 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/table.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_raviart_thomas.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+
+#include <sstream>
+#include <iostream>
+
+//TODO: implement the adjust_quad_dof_index_for_face_orientation_table and
+//adjust_line_dof_index_for_line_orientation_table fields, and write tests
+//similar to bits/face_orientation_and_fe_q_*
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+FE_RaviartThomas<dim>::FE_RaviartThomas (const unsigned int deg)
+  :
+  FE_PolyTensor<PolynomialsRaviartThomas<dim>, dim> (
+    deg,
+    FiniteElementData<dim>(get_dpo_vector(deg),
+                           dim, deg+1, FiniteElementData<dim>::Hdiv),
+    std::vector<bool>(PolynomialsRaviartThomas<dim>::compute_n_pols(deg), true),
+    std::vector<ComponentMask>(PolynomialsRaviartThomas<dim>::compute_n_pols(deg),
+                               std::vector<bool>(dim,true)))
+{
+  Assert (dim >= 2, ExcImpossibleInDim(dim));
+  const unsigned int n_dofs = this->dofs_per_cell;
+
+  this->mapping_type = mapping_raviart_thomas;
+  // First, initialize the
+  // generalized support points and
+  // quadrature weights, since they
+  // are required for interpolation.
+  initialize_support_points(deg);
+  // Now compute the inverse node
+  //matrix, generating the correct
+  //basis functions from the raw
+  //ones.
+
+  // We use an auxiliary matrix in
+  // this function. Therefore,
+  // inverse_node_matrix is still
+  // empty and shape_value_component
+  // returns the 'raw' shape values.
+  FullMatrix<double> M(n_dofs, n_dofs);
+  FETools::compute_node_matrix(M, *this);
+  this->inverse_node_matrix.reinit(n_dofs, n_dofs);
+  this->inverse_node_matrix.invert(M);
+  // From now on, the shape functions
+  // will be the correct ones, not
+  // the raw shape functions anymore.
+
+  // Reinit the vectors of
+  // restriction and prolongation
+  // matrices to the right sizes.
+  // Restriction only for isotropic
+  // refinement
+  this->reinit_restriction_and_prolongation_matrices(true);
+  // Fill prolongation matrices with embedding operators
+  FETools::compute_embedding_matrices (*this, this->prolongation);
+  initialize_restriction();
+
+  // TODO[TL]: for anisotropic refinement we will probably need a table of submatrices with an array for each refine case
+  FullMatrix<double> face_embeddings[GeometryInfo<dim>::max_children_per_face];
+  for (unsigned int i=0; i<GeometryInfo<dim>::max_children_per_face; ++i)
+    face_embeddings[i].reinit (this->dofs_per_face, this->dofs_per_face);
+  FETools::compute_face_embedding_matrices<dim,double>(*this, face_embeddings, 0, 0);
+  this->interface_constraints.reinit((1<<(dim-1)) * this->dofs_per_face,
+                                     this->dofs_per_face);
+  unsigned int target_row=0;
+  for (unsigned int d=0; d<GeometryInfo<dim>::max_children_per_face; ++d)
+    for (unsigned int i=0; i<face_embeddings[d].m(); ++i)
+      {
+        for (unsigned int j=0; j<face_embeddings[d].n(); ++j)
+          this->interface_constraints(target_row,j) = face_embeddings[d](i,j);
+        ++target_row;
+      }
+}
+
+
+
+template <int dim>
+std::string
+FE_RaviartThomas<dim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  // note that this->degree is the maximal
+  // polynomial degree and is thus one higher
+  // than the argument given to the
+  // constructor
+  std::ostringstream namebuf;
+  namebuf << "FE_RaviartThomas<" << dim << ">(" << this->degree-1 << ")";
+
+  return namebuf.str();
+}
+
+
+template <int dim>
+FiniteElement<dim> *
+FE_RaviartThomas<dim>::clone() const
+{
+  return new FE_RaviartThomas<dim>(*this);
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary and internal functions
+//---------------------------------------------------------------------------
+
+
+template <int dim>
+void
+FE_RaviartThomas<dim>::initialize_support_points (const unsigned int deg)
+{
+  QGauss<dim> cell_quadrature(deg+1);
+  const unsigned int n_interior_points
+    = (deg>0) ? cell_quadrature.size() : 0;
+
+  unsigned int n_face_points = (dim>1) ? 1 : 0;
+  // compute (deg+1)^(dim-1)
+  for (unsigned int d=1; d<dim; ++d)
+    n_face_points *= deg+1;
+
+
+  this->generalized_support_points.resize (GeometryInfo<dim>::faces_per_cell*n_face_points
+                                           + n_interior_points);
+  this->generalized_face_support_points.resize (n_face_points);
+
+  // Number of the point being entered
+  unsigned int current = 0;
+
+  if (dim>1)
+    {
+      QGauss<dim-1> face_points (deg+1);
+      TensorProductPolynomials<dim-1> legendre
+        = Polynomials::Legendre::generate_complete_basis(deg);
+
+      boundary_weights.reinit(n_face_points, legendre.n());
+
+//       Assert (face_points.size() == this->dofs_per_face,
+//            ExcInternalError());
+
+      for (unsigned int k=0; k<n_face_points; ++k)
+        {
+          this->generalized_face_support_points[k] = face_points.point(k);
+          // Compute its quadrature
+          // contribution for each
+          // moment.
+          for (unsigned int i=0; i<legendre.n(); ++i)
+            {
+              boundary_weights(k, i)
+                = face_points.weight(k)
+                  * legendre.compute_value(i, face_points.point(k));
+            }
+        }
+
+      Quadrature<dim> faces = QProjector<dim>::project_to_all_faces(face_points);
+      for (; current<GeometryInfo<dim>::faces_per_cell*n_face_points;
+           ++current)
+        {
+          // Enter the support point
+          // into the vector
+          this->generalized_support_points[current] = faces.point(current+QProjector<dim>::DataSetDescriptor::face(0,true,false,false,n_face_points));
+        }
+    }
+
+  if (deg==0) return;
+
+  // Create Legendre basis for the
+  // space D_xi Q_k
+  std::vector<AnisotropicPolynomials<dim>* > polynomials(dim);
+  for (unsigned int dd=0; dd<dim; ++dd)
+    {
+      std::vector<std::vector<Polynomials::Polynomial<double> > > poly(dim);
+      for (unsigned int d=0; d<dim; ++d)
+        poly[d] = Polynomials::Legendre::generate_complete_basis(deg);
+      poly[dd] = Polynomials::Legendre::generate_complete_basis(deg-1);
+
+      polynomials[dd] = new AnisotropicPolynomials<dim>(poly);
+    }
+
+  interior_weights.reinit(TableIndices<3>(n_interior_points, polynomials[0]->n(), dim));
+
+  for (unsigned int k=0; k<cell_quadrature.size(); ++k)
+    {
+      this->generalized_support_points[current++] = cell_quadrature.point(k);
+      for (unsigned int i=0; i<polynomials[0]->n(); ++i)
+        for (unsigned int d=0; d<dim; ++d)
+          interior_weights(k,i,d) = cell_quadrature.weight(k)
+                                    * polynomials[d]->compute_value(i,cell_quadrature.point(k));
+    }
+
+  for (unsigned int d=0; d<dim; ++d)
+    delete polynomials[d];
+
+  Assert (current == this->generalized_support_points.size(),
+          ExcInternalError());
+}
+
+
+
+template <>
+void
+FE_RaviartThomas<1>::initialize_restriction()
+{
+  // there is only one refinement case in 1d,
+  // which is the isotropic one (first index of
+  // the matrix array has to be 0)
+  for (unsigned int i=0; i<GeometryInfo<1>::max_children_per_cell; ++i)
+    this->restriction[0][i].reinit(0,0);
+}
+
+
+
+// This function is the same Raviart-Thomas interpolation performed by
+// interpolate. Still, we cannot use interpolate, since it was written
+// for smooth functions. The functions interpolated here are not
+// smooth, maybe even not continuous. Therefore, we must double the
+// number of quadrature points in each direction in order to integrate
+// only smooth functions.
+
+// Then again, the interpolated function is chosen such that the
+// moments coincide with the function to be interpolated.
+
+template <int dim>
+void
+FE_RaviartThomas<dim>::initialize_restriction()
+{
+  const unsigned int iso=RefinementCase<dim>::isotropic_refinement-1;
+
+  QGauss<dim-1> q_base (this->degree);
+  const unsigned int n_face_points = q_base.size();
+  // First, compute interpolation on
+  // subfaces
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    {
+      // The shape functions of the
+      // child cell are evaluated
+      // in the quadrature points
+      // of a full face.
+      Quadrature<dim> q_face
+        = QProjector<dim>::project_to_face(q_base, face);
+      // Store shape values, since the
+      // evaluation suffers if not
+      // ordered by point
+      Table<2,double> cached_values(this->dofs_per_cell, q_face.size());
+      for (unsigned int k=0; k<q_face.size(); ++k)
+        for (unsigned int i = 0; i < this->dofs_per_cell; ++i)
+          cached_values(i,k)
+            = this->shape_value_component(i, q_face.point(k),
+                                          GeometryInfo<dim>::unit_normal_direction[face]);
+
+      for (unsigned int sub=0; sub<GeometryInfo<dim>::max_children_per_face; ++sub)
+        {
+          // The weight functions for
+          // the coarse face are
+          // evaluated on the subface
+          // only.
+          Quadrature<dim> q_sub
+            = QProjector<dim>::project_to_subface(q_base, face, sub);
+          const unsigned int child
+            = GeometryInfo<dim>::child_cell_on_face(
+                RefinementCase<dim>::isotropic_refinement, face, sub);
+
+          // On a certain face, we must
+          // compute the moments of ALL
+          // fine level functions with
+          // the coarse level weight
+          // functions belonging to
+          // that face. Due to the
+          // orthogonalization process
+          // when building the shape
+          // functions, these weights
+          // are equal to the
+          // corresponding shape
+          // functions.
+          for (unsigned int k=0; k<n_face_points; ++k)
+            for (unsigned int i_child = 0; i_child < this->dofs_per_cell; ++i_child)
+              for (unsigned int i_face = 0; i_face < this->dofs_per_face; ++i_face)
+                {
+                  // The quadrature
+                  // weights on the
+                  // subcell are NOT
+                  // transformed, so we
+                  // have to do it here.
+                  this->restriction[iso][child](face*this->dofs_per_face+i_face,
+                                                i_child)
+                  += Utilities::fixed_power<dim-1>(.5) * q_sub.weight(k)
+                     * cached_values(i_child, k)
+                     * this->shape_value_component(face*this->dofs_per_face+i_face,
+                                                   q_sub.point(k),
+                                                   GeometryInfo<dim>::unit_normal_direction[face]);
+                }
+        }
+    }
+
+  if (this->degree == 1) return;
+
+  // Create Legendre basis for the
+  // space D_xi Q_k. Here, we cannot
+  // use the shape functions
+  std::vector<AnisotropicPolynomials<dim>* > polynomials(dim);
+  for (unsigned int dd=0; dd<dim; ++dd)
+    {
+      std::vector<std::vector<Polynomials::Polynomial<double> > > poly(dim);
+      for (unsigned int d=0; d<dim; ++d)
+        poly[d] = Polynomials::Legendre::generate_complete_basis(this->degree-1);
+      poly[dd] = Polynomials::Legendre::generate_complete_basis(this->degree-2);
+
+      polynomials[dd] = new AnisotropicPolynomials<dim>(poly);
+    }
+
+  QGauss<dim> q_cell(this->degree);
+  const unsigned int start_cell_dofs
+    = GeometryInfo<dim>::faces_per_cell*this->dofs_per_face;
+
+  // Store shape values, since the
+  // evaluation suffers if not
+  // ordered by point
+  Table<3,double> cached_values(this->dofs_per_cell, q_cell.size(), dim);
+  for (unsigned int k=0; k<q_cell.size(); ++k)
+    for (unsigned int i = 0; i < this->dofs_per_cell; ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        cached_values(i,k,d) = this->shape_value_component(i, q_cell.point(k), d);
+
+  for (unsigned int child=0; child<GeometryInfo<dim>::max_children_per_cell; ++child)
+    {
+      Quadrature<dim> q_sub = QProjector<dim>::project_to_child(q_cell, child);
+
+      for (unsigned int k=0; k<q_sub.size(); ++k)
+        for (unsigned int i_child = 0; i_child < this->dofs_per_cell; ++i_child)
+          for (unsigned int d=0; d<dim; ++d)
+            for (unsigned int i_weight=0; i_weight<polynomials[d]->n(); ++i_weight)
+              {
+                this->restriction[iso][child](start_cell_dofs+i_weight*dim+d,
+                                              i_child)
+                += q_sub.weight(k)
+                   * cached_values(i_child, k, d)
+                   * polynomials[d]->compute_value(i_weight, q_sub.point(k));
+              }
+    }
+
+  for (unsigned int d=0; d<dim; ++d)
+    delete polynomials[d];
+}
+
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_RaviartThomas<dim>::get_dpo_vector (const unsigned int deg)
+{
+  // the element is face-based and we have
+  // (deg+1)^(dim-1) DoFs per face
+  unsigned int dofs_per_face = 1;
+  for (unsigned int d=1; d<dim; ++d)
+    dofs_per_face *= deg+1;
+
+  // and then there are interior dofs
+  const unsigned int
+  interior_dofs = dim*deg*dofs_per_face;
+
+  std::vector<unsigned int> dpo(dim+1);
+  dpo[dim-1] = dofs_per_face;
+  dpo[dim]   = interior_dofs;
+
+  return dpo;
+}
+
+
+
+template <int dim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_RaviartThomas<dim>::get_constant_modes() const
+{
+  Table<2,bool> constant_modes(dim, this->dofs_per_cell);
+  for (unsigned int d=0; d<dim; ++d)
+    for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+      constant_modes(d,i) = true;
+  std::vector<unsigned int> components;
+  for (unsigned int d=0; d<dim; ++d)
+    components.push_back(d);
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, components);
+}
+
+
+
+//---------------------------------------------------------------------------
+// Data field initialization
+//---------------------------------------------------------------------------
+
+
+template <int dim>
+bool
+FE_RaviartThomas<dim>::has_support_on_face (
+  const unsigned int shape_index,
+  const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  // Return computed values if we
+  // know them easily. Otherwise, it
+  // is always safe to return true.
+  switch (this->degree)
+    {
+    case 1:
+    {
+      switch (dim)
+        {
+        case 2:
+        {
+          // only on the one
+          // non-adjacent face
+          // are the values
+          // actually zero. list
+          // these in a table
+          return (face_index != GeometryInfo<dim>::opposite_face[shape_index]);
+        }
+
+        default:
+          return true;
+        };
+    };
+
+    default:  // other rt_order
+      return true;
+    };
+
+  return true;
+}
+
+
+// Since this is a vector valued element, we cannot interpolate a
+// scalar function
+template <int dim>
+void
+FE_RaviartThomas<dim>::interpolate(
+  std::vector<double> &,
+  const std::vector<double> &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template <int dim>
+void
+FE_RaviartThomas<dim>::interpolate(
+  std::vector<double>    &local_dofs,
+  const std::vector<Vector<double> > &values,
+  unsigned int offset) const
+{
+  Assert (values.size() == this->generalized_support_points.size(),
+          ExcDimensionMismatch(values.size(), this->generalized_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values[0].size() >= offset+this->n_components(),
+          ExcDimensionMismatch(values[0].size(),offset+this->n_components()));
+
+  std::fill(local_dofs.begin(), local_dofs.end(), 0.);
+
+  const unsigned int n_face_points = boundary_weights.size(0);
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    for (unsigned int k=0; k<n_face_points; ++k)
+      for (unsigned int i=0; i<boundary_weights.size(1); ++i)
+        {
+          local_dofs[i+face*this->dofs_per_face] += boundary_weights(k,i)
+                                                    * values[face*n_face_points+k](GeometryInfo<dim>::unit_normal_direction[face]+offset);
+        }
+
+  const unsigned int start_cell_dofs = GeometryInfo<dim>::faces_per_cell*this->dofs_per_face;
+  const unsigned int start_cell_points = GeometryInfo<dim>::faces_per_cell*n_face_points;
+
+  for (unsigned int k=0; k<interior_weights.size(0); ++k)
+    for (unsigned int i=0; i<interior_weights.size(1); ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        local_dofs[start_cell_dofs+i*dim+d] += interior_weights(k,i,d) * values[k+start_cell_points](d+offset);
+}
+
+
+template <int dim>
+void
+FE_RaviartThomas<dim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  Assert (values.size() == this->n_components(),
+          ExcDimensionMismatch(values.size(), this->n_components()));
+  Assert (values[0].size() == this->generalized_support_points.size(),
+          ExcDimensionMismatch(values[0].size(), this->generalized_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+
+  std::fill(local_dofs.begin(), local_dofs.end(), 0.);
+
+  const unsigned int n_face_points = boundary_weights.size(0);
+  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+    for (unsigned int k=0; k<n_face_points; ++k)
+      for (unsigned int i=0; i<boundary_weights.size(1); ++i)
+        {
+          local_dofs[i+face*this->dofs_per_face] += boundary_weights(k,i)
+                                                    * values[GeometryInfo<dim>::unit_normal_direction[face]][face*n_face_points+k];
+        }
+
+  const unsigned int start_cell_dofs = GeometryInfo<dim>::faces_per_cell*this->dofs_per_face;
+  const unsigned int start_cell_points = GeometryInfo<dim>::faces_per_cell*n_face_points;
+
+  for (unsigned int k=0; k<interior_weights.size(0); ++k)
+    for (unsigned int i=0; i<interior_weights.size(1); ++i)
+      for (unsigned int d=0; d<dim; ++d)
+        local_dofs[start_cell_dofs+i*dim+d] += interior_weights(k,i,d) * values[d][k+start_cell_points];
+}
+
+
+
+template <int dim>
+std::size_t
+FE_RaviartThomas<dim>::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented ());
+  return 0;
+}
+
+
+
+// explicit instantiations
+#include "fe_raviart_thomas.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_raviart_thomas.inst.in b/source/fe/fe_raviart_thomas.inst.in
new file mode 100644
index 0000000..9a92cde
--- /dev/null
+++ b/source/fe/fe_raviart_thomas.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_RaviartThomas<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_raviart_thomas_nodal.cc b/source/fe/fe_raviart_thomas_nodal.cc
new file mode 100644
index 0000000..7a103be
--- /dev/null
+++ b/source/fe/fe_raviart_thomas_nodal.cc
@@ -0,0 +1,783 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/table.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_raviart_thomas.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_tools.h>
+
+#include <sstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim>
+FE_RaviartThomasNodal<dim>::FE_RaviartThomasNodal (const unsigned int deg)
+  :
+  FE_PolyTensor<PolynomialsRaviartThomas<dim>, dim> (
+    deg,
+    FiniteElementData<dim>(get_dpo_vector(deg),
+                           dim, deg+1, FiniteElementData<dim>::Hdiv),
+    get_ria_vector (deg),
+    std::vector<ComponentMask>(PolynomialsRaviartThomas<dim>::compute_n_pols(deg),
+                               std::vector<bool>(dim,true)))
+{
+  Assert (dim >= 2, ExcImpossibleInDim(dim));
+  const unsigned int n_dofs = this->dofs_per_cell;
+
+  this->mapping_type = mapping_raviart_thomas;
+  // First, initialize the
+  // generalized support points and
+  // quadrature weights, since they
+  // are required for interpolation.
+  initialize_support_points(deg);
+  // Now compute the inverse node
+  //matrix, generating the correct
+  //basis functions from the raw
+  //ones.
+
+  // We use an auxiliary matrix in
+  // this function. Therefore,
+  // inverse_node_matrix is still
+  // empty and shape_value_component
+  // returns the 'raw' shape values.
+  FullMatrix<double> M(n_dofs, n_dofs);
+  FETools::compute_node_matrix(M, *this);
+  this->inverse_node_matrix.reinit(n_dofs, n_dofs);
+  this->inverse_node_matrix.invert(M);
+  // From now on, the shape functions
+  // will be the correct ones, not
+  // the raw shape functions anymore.
+
+  // Reinit the vectors of
+  // prolongation matrices to the
+  // right sizes. There are no
+  // restriction matrices implemented
+  for (unsigned int ref_case=RefinementCase<dim>::cut_x;
+       ref_case<RefinementCase<dim>::isotropic_refinement+1; ++ref_case)
+    {
+      const unsigned int nc = GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case));
+
+      for (unsigned int i=0; i<nc; ++i)
+        this->prolongation[ref_case-1][i].reinit (n_dofs, n_dofs);
+    }
+  // Fill prolongation matrices with embedding operators
+  FETools::compute_embedding_matrices (*this, this->prolongation);
+  // TODO[TL]: for anisotropic refinement we will probably need a table of submatrices with an array for each refine case
+  FullMatrix<double> face_embeddings[GeometryInfo<dim>::max_children_per_face];
+  for (unsigned int i=0; i<GeometryInfo<dim>::max_children_per_face; ++i)
+    face_embeddings[i].reinit (this->dofs_per_face, this->dofs_per_face);
+  FETools::compute_face_embedding_matrices<dim,double>(*this, face_embeddings, 0, 0);
+  this->interface_constraints.reinit((1<<(dim-1)) * this->dofs_per_face,
+                                     this->dofs_per_face);
+  unsigned int target_row=0;
+  for (unsigned int d=0; d<GeometryInfo<dim>::max_children_per_face; ++d)
+    for (unsigned int i=0; i<face_embeddings[d].m(); ++i)
+      {
+        for (unsigned int j=0; j<face_embeddings[d].n(); ++j)
+          this->interface_constraints(target_row,j) = face_embeddings[d](i,j);
+        ++target_row;
+      }
+}
+
+
+
+template <int dim>
+std::string
+FE_RaviartThomasNodal<dim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  // note that this->degree is the maximal
+  // polynomial degree and is thus one higher
+  // than the argument given to the
+  // constructor
+  std::ostringstream namebuf;
+  namebuf << "FE_RaviartThomasNodal<" << dim << ">(" << this->degree-1 << ")";
+
+  return namebuf.str();
+}
+
+
+template <int dim>
+FiniteElement<dim> *
+FE_RaviartThomasNodal<dim>::clone() const
+{
+  return new FE_RaviartThomasNodal<dim>(*this);
+}
+
+
+//---------------------------------------------------------------------------
+// Auxiliary and internal functions
+//---------------------------------------------------------------------------
+
+
+
+template <int dim>
+void
+FE_RaviartThomasNodal<dim>::initialize_support_points (const unsigned int deg)
+{
+  this->generalized_support_points.resize (this->dofs_per_cell);
+  this->generalized_face_support_points.resize (this->dofs_per_face);
+
+  // Number of the point being entered
+  unsigned int current = 0;
+
+  // On the faces, we choose as many
+  // Gauss points as necessary to
+  // determine the normal component
+  // uniquely. This is the deg of
+  // the Raviart-Thomas element plus
+  // one.
+  if (dim>1)
+    {
+      QGauss<dim-1> face_points (deg+1);
+      Assert (face_points.size() == this->dofs_per_face,
+              ExcInternalError());
+      for (unsigned int k=0; k<this->dofs_per_face; ++k)
+        this->generalized_face_support_points[k] = face_points.point(k);
+      Quadrature<dim> faces = QProjector<dim>::project_to_all_faces(face_points);
+      for (unsigned int k=0;
+           k<this->dofs_per_face*GeometryInfo<dim>::faces_per_cell;
+           ++k)
+        this->generalized_support_points[k] = faces.point(k+QProjector<dim>
+                                                          ::DataSetDescriptor::face(0,
+                                                              true,
+                                                              false,
+                                                              false,
+                                                              this->dofs_per_face));
+
+      current = this->dofs_per_face*GeometryInfo<dim>::faces_per_cell;
+    }
+
+  if (deg==0) return;
+  // In the interior, we need
+  // anisotropic Gauss quadratures,
+  // different for each direction.
+  QGauss<1> high(deg+1);
+  QGauss<1> low(deg);
+
+  for (unsigned int d=0; d<dim; ++d)
+    {
+      QAnisotropic<dim> *quadrature;
+      if (dim == 1) quadrature = new QAnisotropic<dim>(high);
+      if (dim == 2) quadrature = new QAnisotropic<dim>(((d==0) ? low : high),
+                                                         ((d==1) ? low : high));
+      if (dim == 3) quadrature = new QAnisotropic<dim>(((d==0) ? low : high),
+                                                         ((d==1) ? low : high),
+                                                         ((d==2) ? low : high));
+      Assert(dim<=3, ExcNotImplemented());
+
+      for (unsigned int k=0; k<quadrature->size(); ++k)
+        this->generalized_support_points[current++] = quadrature->point(k);
+      delete quadrature;
+    }
+  Assert (current == this->dofs_per_cell, ExcInternalError());
+}
+
+
+
+template <int dim>
+std::vector<unsigned int>
+FE_RaviartThomasNodal<dim>::get_dpo_vector (const unsigned int deg)
+{
+  // the element is face-based and we have
+  // (deg+1)^(dim-1) DoFs per face
+  unsigned int dofs_per_face = 1;
+  for (unsigned int d=1; d<dim; ++d)
+    dofs_per_face *= deg+1;
+
+  // and then there are interior dofs
+  const unsigned int
+  interior_dofs = dim*deg*dofs_per_face;
+
+  std::vector<unsigned int> dpo(dim+1);
+  dpo[dim-1] = dofs_per_face;
+  dpo[dim]   = interior_dofs;
+
+  return dpo;
+}
+
+
+
+template <>
+std::vector<bool>
+FE_RaviartThomasNodal<1>::get_ria_vector (const unsigned int)
+{
+  Assert (false, ExcImpossibleInDim(1));
+  return std::vector<bool>();
+}
+
+
+
+template <int dim>
+std::vector<bool>
+FE_RaviartThomasNodal<dim>::get_ria_vector (const unsigned int deg)
+{
+  const unsigned int dofs_per_cell = PolynomialsRaviartThomas<dim>::compute_n_pols(deg);
+  unsigned int dofs_per_face = deg+1;
+  for (unsigned int d=2; d<dim; ++d)
+    dofs_per_face *= deg+1;
+  // all face dofs need to be
+  // non-additive, since they have
+  // continuity requirements.
+  // however, the interior dofs are
+  // made additive
+  std::vector<bool> ret_val(dofs_per_cell,false);
+  for (unsigned int i=GeometryInfo<dim>::faces_per_cell*dofs_per_face;
+       i < dofs_per_cell; ++i)
+    ret_val[i] = true;
+
+  return ret_val;
+}
+
+
+template <int dim>
+bool
+FE_RaviartThomasNodal<dim>::has_support_on_face (
+  const unsigned int shape_index,
+  const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  // The first degrees of freedom are
+  // on the faces and each face has
+  // degree degrees.
+  const unsigned int support_face = shape_index / this->degree;
+
+  // The only thing we know for sure
+  // is that shape functions with
+  // support on one face are zero on
+  // the opposite face.
+  if (support_face < GeometryInfo<dim>::faces_per_cell)
+    return (face_index != GeometryInfo<dim>::opposite_face[support_face]);
+
+  // In all other cases, return true,
+  // which is safe
+  return true;
+}
+
+
+template <int dim>
+void
+FE_RaviartThomasNodal<dim>::interpolate(
+  std::vector<double> &,
+  const std::vector<double> &) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template <int dim>
+void
+FE_RaviartThomasNodal<dim>::interpolate(
+  std::vector<double>    &local_dofs,
+  const std::vector<Vector<double> > &values,
+  unsigned int offset) const
+{
+  Assert (values.size() == this->generalized_support_points.size(),
+          ExcDimensionMismatch(values.size(), this->generalized_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  Assert (values[0].size() >= offset+this->n_components(),
+          ExcDimensionMismatch(values[0].size(),offset+this->n_components()));
+
+  // First do interpolation on
+  // faces. There, the component
+  // evaluated depends on the face
+  // direction and orientation.
+  unsigned int fbase = 0;
+  unsigned int f=0;
+  for (; f<GeometryInfo<dim>::faces_per_cell;
+       ++f, fbase+=this->dofs_per_face)
+    {
+      for (unsigned int i=0; i<this->dofs_per_face; ++i)
+        {
+          local_dofs[fbase+i] = values[fbase+i](offset+GeometryInfo<dim>::unit_normal_direction[f]);
+        }
+    }
+
+  // The remaining points form dim
+  // chunks, one for each component.
+  const unsigned int istep = (this->dofs_per_cell - fbase) / dim;
+  Assert ((this->dofs_per_cell - fbase) % dim == 0, ExcInternalError());
+
+  f = 0;
+  while (fbase < this->dofs_per_cell)
+    {
+      for (unsigned int i=0; i<istep; ++i)
+        {
+          local_dofs[fbase+i] = values[fbase+i](offset+f);
+        }
+      fbase+=istep;
+      ++f;
+    }
+  Assert (fbase == this->dofs_per_cell, ExcInternalError());
+}
+
+
+template <int dim>
+void
+FE_RaviartThomasNodal<dim>::interpolate(
+  std::vector<double> &local_dofs,
+  const VectorSlice<const std::vector<std::vector<double> > > &values) const
+{
+  Assert (values.size() == this->n_components(),
+          ExcDimensionMismatch(values.size(), this->n_components()));
+  Assert (values[0].size() == this->generalized_support_points.size(),
+          ExcDimensionMismatch(values.size(), this->generalized_support_points.size()));
+  Assert (local_dofs.size() == this->dofs_per_cell,
+          ExcDimensionMismatch(local_dofs.size(),this->dofs_per_cell));
+  // First do interpolation on
+  // faces. There, the component
+  // evaluated depends on the face
+  // direction and orientation.
+  unsigned int fbase = 0;
+  unsigned int f=0;
+  for (; f<GeometryInfo<dim>::faces_per_cell;
+       ++f, fbase+=this->dofs_per_face)
+    {
+      for (unsigned int i=0; i<this->dofs_per_face; ++i)
+        {
+          local_dofs[fbase+i] = values[GeometryInfo<dim>::unit_normal_direction[f]][fbase+i];
+        }
+    }
+  // The remaining points form dim
+  // chunks, one for each component.
+  const unsigned int istep = (this->dofs_per_cell - fbase) / dim;
+  Assert ((this->dofs_per_cell - fbase) % dim == 0, ExcInternalError());
+
+  f = 0;
+  while (fbase < this->dofs_per_cell)
+    {
+      for (unsigned int i=0; i<istep; ++i)
+        {
+          local_dofs[fbase+i] = values[f][fbase+i];
+        }
+      fbase+=istep;
+      ++f;
+    }
+  Assert (fbase == this->dofs_per_cell, ExcInternalError());
+}
+
+
+//TODO: There are tests that check that the following few functions don't produce assertion failures, but none that actually check whether they do the right thing. one example for such a test would be to project a function onto an hp space and make sure that the convergence order is correct with regard to the lowest used polynomial degree
+
+template <int dim>
+bool
+FE_RaviartThomasNodal<dim>::hp_constraints_are_implemented () const
+{
+  return true;
+}
+
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_RaviartThomasNodal<dim>::hp_vertex_dof_identities (
+  const FiniteElement<dim> &fe_other) const
+{
+  // we can presently only compute these
+  // identities if both FEs are
+  // FE_RaviartThomasNodals. in that case, no
+  // dofs are assigned on the vertex, so we
+  // shouldn't be getting here at all.
+  if (dynamic_cast<const FE_RaviartThomasNodal<dim>*>(&fe_other)!=0)
+    return std::vector<std::pair<unsigned int, unsigned int> > ();
+  else
+    {
+      Assert(false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_RaviartThomasNodal<dim>::
+hp_line_dof_identities (const FiniteElement<dim> &fe_other) const
+{
+  // we can presently only compute
+  // these identities if both FEs are
+  // FE_RaviartThomasNodals
+  if (const FE_RaviartThomasNodal<dim> *fe_q_other
+      = dynamic_cast<const FE_RaviartThomasNodal<dim>*>(&fe_other))
+    {
+      // dofs are located on faces; these are
+      // only lines in 2d
+      if (dim != 2)
+        return std::vector<std::pair<unsigned int, unsigned int> >();
+
+      // dofs are located along lines, so two
+      // dofs are identical only if in the
+      // following two cases (remember that
+      // the face support points are Gauss
+      // points):
+      //1. this->degree = fe_q_other->degree,
+      //   in the case, all the dofs on
+      //   the line are identical
+      //2. this->degree-1 and fe_q_other->degree-1
+      //   are both even, i.e. this->dof_per_line
+      //   and fe_q_other->dof_per_line are both odd,
+      //   there exists only one point (the middle one)
+      //   such that dofs are identical on this point
+      //
+      // to understand this, note that
+      // this->degree is the *maximal*
+      // polynomial degree, and is thus one
+      // higher than the argument given to
+      // the constructor
+      const unsigned int p = this->degree-1;
+      const unsigned int q = fe_q_other->degree-1;
+
+      std::vector<std::pair<unsigned int, unsigned int> > identities;
+
+      if (p==q)
+        for (unsigned int i=0; i<p+1; ++i)
+          identities.push_back (std::make_pair(i,i));
+
+      else if (p%2==0 && q%2==0)
+        identities.push_back(std::make_pair(p/2,q/2));
+
+      return identities;
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+template <int dim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FE_RaviartThomasNodal<dim>::hp_quad_dof_identities (
+  const FiniteElement<dim> &fe_other) const
+{
+  // we can presently only compute
+  // these identities if both FEs are
+  // FE_RaviartThomasNodals
+  if (const FE_RaviartThomasNodal<dim> *fe_q_other
+      = dynamic_cast<const FE_RaviartThomasNodal<dim>*>(&fe_other))
+    {
+      // dofs are located on faces; these are
+      // only quads in 3d
+      if (dim != 3)
+        return std::vector<std::pair<unsigned int, unsigned int> >();
+
+      // this works exactly like the line
+      // case above
+      const unsigned int p = this->dofs_per_quad;
+      const unsigned int q = fe_q_other->dofs_per_quad;
+
+      std::vector<std::pair<unsigned int, unsigned int> > identities;
+
+      if (p==q)
+        for (unsigned int i=0; i<p; ++i)
+          identities.push_back (std::make_pair(i,i));
+
+      else if (p%2!=0 && q%2!=0)
+        identities.push_back(std::make_pair(p/2, q/2));
+
+      return identities;
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> > ();
+    }
+}
+
+
+template <int dim>
+FiniteElementDomination::Domination
+FE_RaviartThomasNodal<dim>::compare_for_face_domination (
+  const FiniteElement<dim> &fe_other) const
+{
+  if (const FE_RaviartThomasNodal<dim> *fe_q_other
+      = dynamic_cast<const FE_RaviartThomasNodal<dim>*>(&fe_other))
+    {
+      if (this->degree < fe_q_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_q_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <>
+void
+FE_RaviartThomasNodal<1>::get_face_interpolation_matrix (
+  const FiniteElement<1,1> &/*x_source_fe*/,
+  FullMatrix<double>     &/*interpolation_matrix*/) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+template <>
+void
+FE_RaviartThomasNodal<1>::get_subface_interpolation_matrix (
+  const FiniteElement<1,1> &/*x_source_fe*/,
+  const unsigned int      /*subface*/,
+  FullMatrix<double>     &/*interpolation_matrix*/) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+template <int dim>
+void
+FE_RaviartThomasNodal<dim>::get_face_interpolation_matrix (
+  const FiniteElement<dim> &x_source_fe,
+  FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the
+  // source FE is also a
+  // RaviartThomasNodal element
+  AssertThrow ((x_source_fe.get_name().find ("FE_RaviartThomasNodal<") == 0)
+               ||
+               (dynamic_cast<const FE_RaviartThomasNodal<dim>*>(&x_source_fe) != 0),
+               typename FiniteElement<dim>::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // ok, source is a RaviartThomasNodal element, so
+  // we will be able to do the work
+  const FE_RaviartThomasNodal<dim> &source_fe
+    = dynamic_cast<const FE_RaviartThomasNodal<dim>&>(x_source_fe);
+
+  // Make sure, that the element,
+  // for which the DoFs should be
+  // constrained is the one with
+  // the higher polynomial degree.
+  // Actually the procedure will work
+  // also if this assertion is not
+  // satisfied. But the matrices
+  // produced in that case might
+  // lead to problems in the
+  // hp procedures, which use this
+  // method.
+  Assert (this->dofs_per_face <= source_fe.dofs_per_face,
+          typename FiniteElement<dim>::
+          ExcInterpolationNotImplemented ());
+
+  // generate a quadrature
+  // with the generalized support points.
+  // This is later based as a
+  // basis for the QProjector,
+  // which returns the support
+  // points on the face.
+  Quadrature<dim-1> quad_face_support (source_fe.get_generalized_face_support_points ());
+
+  // Rule of thumb for FP accuracy,
+  // that can be expected for a
+  // given polynomial degree.
+  // This value is used to cut
+  // off values close to zero.
+  double eps = 2e-13*this->degree*(dim-1);
+
+  // compute the interpolation
+  // matrix by simply taking the
+  // value at the support points.
+  const Quadrature<dim> face_projection
+    = QProjector<dim>::project_to_face (quad_face_support, 0);
+
+  for (unsigned int i=0; i<source_fe.dofs_per_face; ++i)
+    {
+      const Point<dim> &p = face_projection.point (i);
+
+      for (unsigned int j=0; j<this->dofs_per_face; ++j)
+        {
+          double matrix_entry
+            = this->shape_value_component (this->face_to_cell_index(j, 0),
+                                           p, 0);
+
+          // Correct the interpolated
+          // value. I.e. if it is close
+          // to 1 or 0, make it exactly
+          // 1 or 0. Unfortunately, this
+          // is required to avoid problems
+          // with higher order elements.
+          if ( std::fabs(matrix_entry - 1.0) < eps )
+            matrix_entry = 1.0;
+          if ( std::fabs(matrix_entry) < eps )
+            matrix_entry = 0.0;
+
+          interpolation_matrix(i,j) = matrix_entry;
+        }
+    }
+
+  // make sure that the row sum of
+  // each of the matrices is 1 at
+  // this point. this must be so
+  // since the shape functions sum up
+  // to 1
+  for (unsigned int j=0; j<source_fe.dofs_per_face; ++j)
+    {
+      double sum = 0.;
+
+      for (unsigned int i=0; i<this->dofs_per_face; ++i)
+        sum += interpolation_matrix(j,i);
+
+      Assert (std::fabs(sum-1) < 2e-13*this->degree*(dim-1),
+              ExcInternalError());
+    }
+}
+
+
+template <int dim>
+void
+FE_RaviartThomasNodal<dim>::get_subface_interpolation_matrix (
+  const FiniteElement<dim> &x_source_fe,
+  const unsigned int subface,
+  FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is only implemented, if the
+  // source FE is also a
+  // RaviartThomasNodal element
+  AssertThrow ((x_source_fe.get_name().find ("FE_RaviartThomasNodal<") == 0)
+               ||
+               (dynamic_cast<const FE_RaviartThomasNodal<dim>*>(&x_source_fe) != 0),
+               typename FiniteElement<dim>::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // ok, source is a RaviartThomasNodal element, so
+  // we will be able to do the work
+  const FE_RaviartThomasNodal<dim> &source_fe
+    = dynamic_cast<const FE_RaviartThomasNodal<dim>&>(x_source_fe);
+
+  // Make sure, that the element,
+  // for which the DoFs should be
+  // constrained is the one with
+  // the higher polynomial degree.
+  // Actually the procedure will work
+  // also if this assertion is not
+  // satisfied. But the matrices
+  // produced in that case might
+  // lead to problems in the
+  // hp procedures, which use this
+  // method.
+  Assert (this->dofs_per_face <= source_fe.dofs_per_face,
+          typename FiniteElement<dim>::
+          ExcInterpolationNotImplemented ());
+
+  // generate a quadrature
+  // with the generalized support points.
+  // This is later based as a
+  // basis for the QProjector,
+  // which returns the support
+  // points on the face.
+  Quadrature<dim-1> quad_face_support (source_fe.get_generalized_face_support_points ());
+
+  // Rule of thumb for FP accuracy,
+  // that can be expected for a
+  // given polynomial degree.
+  // This value is used to cut
+  // off values close to zero.
+  double eps = 2e-13*this->degree*(dim-1);
+
+  // compute the interpolation
+  // matrix by simply taking the
+  // value at the support points.
+
+  const Quadrature<dim> subface_projection
+    = QProjector<dim>::project_to_subface (quad_face_support, 0, subface);
+
+  for (unsigned int i=0; i<source_fe.dofs_per_face; ++i)
+    {
+      const Point<dim> &p = subface_projection.point (i);
+
+      for (unsigned int j=0; j<this->dofs_per_face; ++j)
+        {
+          double matrix_entry
+            = this->shape_value_component (this->face_to_cell_index(j, 0), p, 0);
+
+          // Correct the interpolated
+          // value. I.e. if it is close
+          // to 1 or 0, make it exactly
+          // 1 or 0. Unfortunately, this
+          // is required to avoid problems
+          // with higher order elements.
+          if ( std::fabs(matrix_entry - 1.0) < eps )
+            matrix_entry = 1.0;
+          if ( std::fabs(matrix_entry) < eps )
+            matrix_entry = 0.0;
+
+          interpolation_matrix(i,j) = matrix_entry;
+        }
+    }
+
+  // make sure that the row sum of
+  // each of the matrices is 1 at
+  // this point. this must be so
+  // since the shape functions sum up
+  // to 1
+  for (unsigned int j=0; j<source_fe.dofs_per_face; ++j)
+    {
+      double sum = 0.;
+
+      for (unsigned int i=0; i<this->dofs_per_face; ++i)
+        sum += interpolation_matrix(j,i);
+
+      Assert (std::fabs(sum-1) < 2e-13*this->degree*(dim-1),
+              ExcInternalError());
+    }
+}
+
+
+
+// explicit instantiations
+#include "fe_raviart_thomas_nodal.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_raviart_thomas_nodal.inst.in b/source/fe/fe_raviart_thomas_nodal.inst.in
new file mode 100644
index 0000000..c546410
--- /dev/null
+++ b/source/fe/fe_raviart_thomas_nodal.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_RaviartThomasNodal<deal_II_dimension>;
+  }
+
diff --git a/source/fe/fe_system.cc b/source/fe/fe_system.cc
new file mode 100644
index 0000000..2245f46
--- /dev/null
+++ b/source/fe/fe_system.cc
@@ -0,0 +1,3041 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <sstream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace
+{
+  bool IsNonZero (unsigned int i)
+  {
+    return i>0;
+  }
+
+  unsigned int count_nonzeros(const std::vector<unsigned int> &vec)
+  {
+    return std::count_if(vec.begin(), vec.end(), IsNonZero);
+  }
+
+}
+/* ----------------------- FESystem::InternalData ------------------- */
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::InternalData::InternalData(const unsigned int n_base_elements)
+  :
+  base_fe_datas(n_base_elements),
+  base_fe_output_objects(n_base_elements)
+{}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::InternalData::~InternalData()
+{
+  // delete pointers and set them to zero to avoid inadvertent use
+  for (unsigned int i=0; i<base_fe_datas.size(); ++i)
+    if (base_fe_datas[i])
+      {
+        delete base_fe_datas[i];
+        base_fe_datas[i] = 0;
+      }
+}
+
+
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase &
+FESystem<dim,spacedim>::
+InternalData::get_fe_data (const unsigned int base_no) const
+{
+  Assert(base_no<base_fe_datas.size(),
+         ExcIndexRange(base_no,0,base_fe_datas.size()));
+  return *base_fe_datas[base_no];
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+InternalData::set_fe_data (const unsigned int base_no,
+                           typename FiniteElement<dim,spacedim>::InternalDataBase *ptr)
+{
+  Assert(base_no<base_fe_datas.size(),
+         ExcIndexRange(base_no,0,base_fe_datas.size()));
+  base_fe_datas[base_no]=ptr;
+}
+
+
+
+template <int dim, int spacedim>
+internal::FEValues::FiniteElementRelatedData<dim,spacedim> &
+FESystem<dim,spacedim>::
+InternalData::get_fe_output_object (const unsigned int base_no) const
+{
+  Assert(base_no<base_fe_output_objects.size(),
+         ExcIndexRange(base_no,0,base_fe_output_objects.size()));
+  return base_fe_output_objects[base_no];
+}
+
+
+
+/* ---------------------------------- FESystem ------------------- */
+
+
+template <int dim, int spacedim>
+const unsigned int FESystem<dim,spacedim>::invalid_face_number;
+
+namespace
+{
+
+  /**
+   * Take vectors of finite elements and multiplicities and multiply out
+   * how many degrees of freedom the composed element has per vertex,
+   * line, etc.
+   */
+  template <int dim, int spacedim>
+  FiniteElementData<dim>
+  multiply_dof_numbers (const std::vector<const FiniteElement<dim,spacedim>*> &fes,
+                        const std::vector<unsigned int>                       &multiplicities)
+  {
+    AssertDimension(fes.size(), multiplicities.size());
+
+    unsigned int multiplied_dofs_per_vertex = 0;
+    unsigned int multiplied_dofs_per_line = 0;
+    unsigned int multiplied_dofs_per_quad = 0;
+    unsigned int multiplied_dofs_per_hex = 0;
+
+    unsigned int multiplied_n_components = 0;
+
+    unsigned int degree = 0; // degree is the maximal degree of the components
+
+    for (unsigned int i=0; i<fes.size(); i++)
+      if (multiplicities[i]>0)
+        {
+          multiplied_dofs_per_vertex += fes[i]->dofs_per_vertex * multiplicities[i];
+          multiplied_dofs_per_line   += fes[i]->dofs_per_line * multiplicities[i];
+          multiplied_dofs_per_quad   += fes[i]->dofs_per_quad * multiplicities[i];
+          multiplied_dofs_per_hex    += fes[i]->dofs_per_hex * multiplicities[i];
+
+          multiplied_n_components+=fes[i]->n_components() * multiplicities[i];
+
+          degree = std::max(degree, fes[i]->tensor_degree() );
+        }
+
+    // assume conformity of the first finite element and then take away
+    // bits as indicated by the base elements. if all multiplicities
+    // happen to be zero, then it doesn't matter what we set it to.
+    typename FiniteElementData<dim>::Conformity total_conformity
+      = typename FiniteElementData<dim>::Conformity();
+    {
+      unsigned int index = 0;
+      for (index=0; index<fes.size(); ++index)
+        if (multiplicities[index]>0)
+          {
+            total_conformity = fes[index]->conforming_space;
+            break;
+          }
+
+      for (; index<fes.size(); ++index)
+        if (multiplicities[index]>0)
+          total_conformity =
+            typename FiniteElementData<dim>::Conformity(total_conformity
+                                                        &
+                                                        fes[index]->conforming_space);
+    }
+
+    std::vector<unsigned int> dpo;
+    dpo.push_back(multiplied_dofs_per_vertex);
+    dpo.push_back(multiplied_dofs_per_line);
+    if (dim>1) dpo.push_back(multiplied_dofs_per_quad);
+    if (dim>2) dpo.push_back(multiplied_dofs_per_hex);
+
+    BlockIndices block_indices (0,0);
+
+    for (unsigned int base=0; base < fes.size(); ++base)
+      for (unsigned int m = 0; m < multiplicities[base]; ++m)
+        block_indices.push_back(fes[base]->dofs_per_cell);
+
+    return FiniteElementData<dim> (dpo,
+                                   multiplied_n_components,
+                                   degree,
+                                   total_conformity,
+                                   block_indices);
+  }
+
+  /**
+   * Same as above but for a specific number of sub-elements.
+   */
+  template <int dim, int spacedim>
+  FiniteElementData<dim>
+  multiply_dof_numbers (const FiniteElement<dim,spacedim> *fe1,
+                        const unsigned int            N1,
+                        const FiniteElement<dim,spacedim> *fe2=NULL,
+                        const unsigned int            N2=0,
+                        const FiniteElement<dim,spacedim> *fe3=NULL,
+                        const unsigned int            N3=0,
+                        const FiniteElement<dim,spacedim> *fe4=NULL,
+                        const unsigned int            N4=0,
+                        const FiniteElement<dim,spacedim> *fe5=NULL,
+                        const unsigned int            N5=0)
+  {
+    std::vector<const FiniteElement<dim,spacedim>*> fes;
+    fes.push_back(fe1);
+    fes.push_back(fe2);
+    fes.push_back(fe3);
+    fes.push_back(fe4);
+    fes.push_back(fe5);
+
+    std::vector<unsigned int> mult;
+    mult.push_back(N1);
+    mult.push_back(N2);
+    mult.push_back(N3);
+    mult.push_back(N4);
+    mult.push_back(N5);
+    return multiply_dof_numbers(fes, mult);
+  }
+
+
+
+  /**
+   * Compute the named flags for a list of finite elements with multiplicities
+   * given in the second argument. This function is called from all the above
+   * functions.
+   */
+  template <int dim, int spacedim>
+  std::vector<bool>
+  compute_restriction_is_additive_flags (const std::vector<const FiniteElement<dim,spacedim>*> &fes,
+                                         const std::vector<unsigned int>              &multiplicities)
+  {
+    AssertDimension(fes.size(), multiplicities.size());
+
+    // first count the number of dofs and components that will emerge from the
+    // given FEs
+    unsigned int n_shape_functions = 0;
+    for (unsigned int i=0; i<fes.size(); ++i)
+      if (multiplicities[i]>0) // check needed as fe might be NULL
+        n_shape_functions += fes[i]->dofs_per_cell * multiplicities[i];
+
+    // generate the array that will hold the output
+    std::vector<bool> retval (n_shape_functions, false);
+
+    // finally go through all the shape functions of the base elements, and copy
+    // their flags. this somehow copies the code in build_cell_table, which is
+    // not nice as it uses too much implicit knowledge about the layout of the
+    // individual bases in the composed FE, but there seems no way around...
+    //
+    // for each shape function, copy the flags from the base element to this
+    // one, taking into account multiplicities, and other complications
+    unsigned int total_index = 0;
+    for (unsigned int vertex_number=0;
+         vertex_number<GeometryInfo<dim>::vertices_per_cell;
+         ++vertex_number)
+      {
+        for (unsigned int base=0; base<fes.size(); ++base)
+          for (unsigned int m=0; m<multiplicities[base]; ++m)
+            for (unsigned int local_index = 0;
+                 local_index < fes[base]->dofs_per_vertex;
+                 ++local_index, ++total_index)
+              {
+                const unsigned int index_in_base
+                  = (fes[base]->dofs_per_vertex*vertex_number +
+                     local_index);
+
+                Assert (index_in_base < fes[base]->dofs_per_cell,
+                        ExcInternalError());
+                retval[total_index] = fes[base]->restriction_is_additive(index_in_base);
+              }
+      }
+
+    // 2. Lines
+    if (GeometryInfo<dim>::lines_per_cell > 0)
+      for (unsigned int line_number= 0;
+           line_number != GeometryInfo<dim>::lines_per_cell;
+           ++line_number)
+        {
+          for (unsigned int base=0; base<fes.size(); ++base)
+            for (unsigned int m=0; m<multiplicities[base]; ++m)
+              for (unsigned int local_index = 0;
+                   local_index < fes[base]->dofs_per_line;
+                   ++local_index, ++total_index)
+                {
+                  const unsigned int index_in_base
+                    = (fes[base]->dofs_per_line*line_number +
+                       local_index +
+                       fes[base]->first_line_index);
+
+                  Assert (index_in_base < fes[base]->dofs_per_cell,
+                          ExcInternalError());
+                  retval[total_index] = fes[base]->restriction_is_additive(index_in_base);
+                }
+        }
+
+    // 3. Quads
+    if (GeometryInfo<dim>::quads_per_cell > 0)
+      for (unsigned int quad_number= 0;
+           quad_number != GeometryInfo<dim>::quads_per_cell;
+           ++quad_number)
+        {
+          for (unsigned int base=0; base<fes.size(); ++base)
+            for (unsigned int m=0; m<multiplicities[base]; ++m)
+              for (unsigned int local_index = 0;
+                   local_index < fes[base]->dofs_per_quad;
+                   ++local_index, ++total_index)
+                {
+                  const unsigned int index_in_base
+                    = (fes[base]->dofs_per_quad*quad_number +
+                       local_index +
+                       fes[base]->first_quad_index);
+
+                  Assert (index_in_base < fes[base]->dofs_per_cell,
+                          ExcInternalError());
+                  retval[total_index] = fes[base]->restriction_is_additive(index_in_base);
+                }
+        }
+
+    // 4. Hexes
+    if (GeometryInfo<dim>::hexes_per_cell > 0)
+      for (unsigned int hex_number= 0;
+           hex_number != GeometryInfo<dim>::hexes_per_cell;
+           ++hex_number)
+        {
+          for (unsigned int base=0; base<fes.size(); ++base)
+            for (unsigned int m=0; m<multiplicities[base]; ++m)
+              for (unsigned int local_index = 0;
+                   local_index < fes[base]->dofs_per_hex;
+                   ++local_index, ++total_index)
+                {
+                  const unsigned int index_in_base
+                    = (fes[base]->dofs_per_hex*hex_number +
+                       local_index +
+                       fes[base]->first_hex_index);
+
+                  Assert (index_in_base < fes[base]->dofs_per_cell,
+                          ExcInternalError());
+                  retval[total_index] = fes[base]->restriction_is_additive(index_in_base);
+                }
+        }
+
+    Assert (total_index == n_shape_functions, ExcInternalError());
+
+    return retval;
+  }
+
+
+
+  /**
+   * Take a @p FiniteElement object
+   * and return an boolean vector including the @p
+   * restriction_is_additive_flags of the mixed element consisting of @p N
+   * elements of the sub-element @p fe.
+   */
+  template <int dim, int spacedim>
+  std::vector<bool>
+  compute_restriction_is_additive_flags (const FiniteElement<dim,spacedim> *fe1,
+                                         const unsigned int        N1,
+                                         const FiniteElement<dim,spacedim> *fe2=NULL,
+                                         const unsigned int        N2=0,
+                                         const FiniteElement<dim,spacedim> *fe3=NULL,
+                                         const unsigned int        N3=0,
+                                         const FiniteElement<dim,spacedim> *fe4=NULL,
+                                         const unsigned int        N4=0,
+                                         const FiniteElement<dim,spacedim> *fe5=NULL,
+                                         const unsigned int        N5=0)
+  {
+    std::vector<const FiniteElement<dim,spacedim>*> fe_list;
+    std::vector<unsigned int>              multiplicities;
+
+    fe_list.push_back (fe1);
+    multiplicities.push_back (N1);
+
+    fe_list.push_back (fe2);
+    multiplicities.push_back (N2);
+
+    fe_list.push_back (fe3);
+    multiplicities.push_back (N3);
+
+    fe_list.push_back (fe4);
+    multiplicities.push_back (N4);
+
+    fe_list.push_back (fe5);
+    multiplicities.push_back (N5);
+    return compute_restriction_is_additive_flags (fe_list, multiplicities);
+  }
+
+
+
+  /**
+   * Compute the nonzero components of a list of finite elements with
+   * multiplicities given in the second argument.
+   */
+  template <int dim, int spacedim>
+  std::vector<ComponentMask>
+  compute_nonzero_components (const std::vector<const FiniteElement<dim,spacedim>*> &fes,
+                              const std::vector<unsigned int>              &multiplicities)
+  {
+    AssertDimension(fes.size(), multiplicities.size());
+
+    // first count the number of dofs and components that will emerge from the
+    // given FEs
+    unsigned int n_shape_functions = 0;
+    for (unsigned int i=0; i<fes.size(); ++i)
+      if (multiplicities[i]>0) //needed because fe might be NULL
+        n_shape_functions += fes[i]->dofs_per_cell * multiplicities[i];
+
+    unsigned int n_components = 0;
+    for (unsigned int i=0; i<fes.size(); ++i)
+      if (multiplicities[i]>0) //needed because fe might be NULL
+        n_components += fes[i]->n_components() * multiplicities[i];
+
+    // generate the array that will hold the output
+    std::vector<std::vector<bool> >
+    retval (n_shape_functions, std::vector<bool> (n_components, false));
+
+    // finally go through all the shape functions of the base elements, and copy
+    // their flags. this somehow copies the code in build_cell_table, which is
+    // not nice as it uses too much implicit knowledge about the layout of the
+    // individual bases in the composed FE, but there seems no way around...
+    //
+    // for each shape function, copy the non-zero flags from the base element to
+    // this one, taking into account multiplicities, multiple components in base
+    // elements, and other complications
+    unsigned int total_index = 0;
+    for (unsigned int vertex_number=0;
+         vertex_number<GeometryInfo<dim>::vertices_per_cell;
+         ++vertex_number)
+      {
+        unsigned int comp_start = 0;
+        for (unsigned int base=0; base<fes.size(); ++base)
+          for (unsigned int m=0; m<multiplicities[base];
+               ++m, comp_start+=fes[base]->n_components())
+            for (unsigned int local_index = 0;
+                 local_index < fes[base]->dofs_per_vertex;
+                 ++local_index, ++total_index)
+              {
+                const unsigned int index_in_base
+                  = (fes[base]->dofs_per_vertex*vertex_number +
+                     local_index);
+
+                Assert (comp_start+fes[base]->n_components() <=
+                        retval[total_index].size(),
+                        ExcInternalError());
+                for (unsigned int c=0; c<fes[base]->n_components(); ++c)
+                  {
+                    Assert (c < fes[base]->get_nonzero_components(index_in_base).size(),
+                            ExcInternalError());
+                    retval[total_index][comp_start+c]
+                      = fes[base]->get_nonzero_components(index_in_base)[c];
+                  }
+              }
+      }
+
+    // 2. Lines
+    if (GeometryInfo<dim>::lines_per_cell > 0)
+      for (unsigned int line_number= 0;
+           line_number != GeometryInfo<dim>::lines_per_cell;
+           ++line_number)
+        {
+          unsigned int comp_start = 0;
+          for (unsigned int base=0; base<fes.size(); ++base)
+            for (unsigned int m=0; m<multiplicities[base];
+                 ++m, comp_start+=fes[base]->n_components())
+              for (unsigned int local_index = 0;
+                   local_index < fes[base]->dofs_per_line;
+                   ++local_index, ++total_index)
+                {
+                  const unsigned int index_in_base
+                    = (fes[base]->dofs_per_line*line_number +
+                       local_index +
+                       fes[base]->first_line_index);
+
+                  Assert (comp_start+fes[base]->n_components() <=
+                          retval[total_index].size(),
+                          ExcInternalError());
+                  for (unsigned int c=0; c<fes[base]->n_components(); ++c)
+                    {
+                      Assert (c < fes[base]->get_nonzero_components(index_in_base).size(),
+                              ExcInternalError());
+                      retval[total_index][comp_start+c]
+                        = fes[base]->get_nonzero_components(index_in_base)[c];
+                    }
+                }
+        }
+
+    // 3. Quads
+    if (GeometryInfo<dim>::quads_per_cell > 0)
+      for (unsigned int quad_number= 0;
+           quad_number != GeometryInfo<dim>::quads_per_cell;
+           ++quad_number)
+        {
+          unsigned int comp_start = 0;
+          for (unsigned int base=0; base<fes.size(); ++base)
+            for (unsigned int m=0; m<multiplicities[base];
+                 ++m, comp_start+=fes[base]->n_components())
+              for (unsigned int local_index = 0;
+                   local_index < fes[base]->dofs_per_quad;
+                   ++local_index, ++total_index)
+                {
+                  const unsigned int index_in_base
+                    = (fes[base]->dofs_per_quad*quad_number +
+                       local_index +
+                       fes[base]->first_quad_index);
+
+                  Assert (comp_start+fes[base]->n_components() <=
+                          retval[total_index].size(),
+                          ExcInternalError());
+                  for (unsigned int c=0; c<fes[base]->n_components(); ++c)
+                    {
+                      Assert (c < fes[base]->get_nonzero_components(index_in_base).size(),
+                              ExcInternalError());
+                      retval[total_index][comp_start+c]
+                        = fes[base]->get_nonzero_components(index_in_base)[c];
+                    }
+                }
+        }
+
+    // 4. Hexes
+    if (GeometryInfo<dim>::hexes_per_cell > 0)
+      for (unsigned int hex_number= 0;
+           hex_number != GeometryInfo<dim>::hexes_per_cell;
+           ++hex_number)
+        {
+          unsigned int comp_start = 0;
+          for (unsigned int base=0; base<fes.size(); ++base)
+            for (unsigned int m=0; m<multiplicities[base];
+                 ++m, comp_start+=fes[base]->n_components())
+              for (unsigned int local_index = 0;
+                   local_index < fes[base]->dofs_per_hex;
+                   ++local_index, ++total_index)
+                {
+                  const unsigned int index_in_base
+                    = (fes[base]->dofs_per_hex*hex_number +
+                       local_index +
+                       fes[base]->first_hex_index);
+
+                  Assert (comp_start+fes[base]->n_components() <=
+                          retval[total_index].size(),
+                          ExcInternalError());
+                  for (unsigned int c=0; c<fes[base]->n_components(); ++c)
+                    {
+                      Assert (c < fes[base]->get_nonzero_components(index_in_base).size(),
+                              ExcInternalError());
+                      retval[total_index][comp_start+c]
+                        = fes[base]->get_nonzero_components(index_in_base)[c];
+                    }
+                }
+        }
+
+    Assert (total_index == n_shape_functions, ExcInternalError());
+
+    // now copy the vector<vector<bool> > into a vector<ComponentMask>.
+    // this appears complicated but we do it this way since it's just
+    // awkward to generate ComponentMasks directly and so we need the
+    // recourse of the inner vector<bool> anyway.
+    std::vector<ComponentMask> xretval (retval.size());
+    for (unsigned int i=0; i<retval.size(); ++i)
+      xretval[i] = ComponentMask(retval[i]);
+    return xretval;
+  }
+
+
+  /**
+   * Compute the non-zero vector components of a composed finite element.
+   */
+  template <int dim, int spacedim>
+  std::vector<ComponentMask>
+  compute_nonzero_components (const FiniteElement<dim,spacedim> *fe1,
+                              const unsigned int        N1,
+                              const FiniteElement<dim,spacedim> *fe2=NULL,
+                              const unsigned int        N2=0,
+                              const FiniteElement<dim,spacedim> *fe3=NULL,
+                              const unsigned int        N3=0,
+                              const FiniteElement<dim,spacedim> *fe4=NULL,
+                              const unsigned int        N4=0,
+                              const FiniteElement<dim,spacedim> *fe5=NULL,
+                              const unsigned int        N5=0)
+  {
+    std::vector<const FiniteElement<dim,spacedim>*> fe_list;
+    std::vector<unsigned int>              multiplicities;
+
+    fe_list.push_back (fe1);
+    multiplicities.push_back (N1);
+
+    fe_list.push_back (fe2);
+    multiplicities.push_back (N2);
+
+    fe_list.push_back (fe3);
+    multiplicities.push_back (N3);
+
+    fe_list.push_back (fe4);
+    multiplicities.push_back (N4);
+
+    fe_list.push_back (fe5);
+    multiplicities.push_back (N5);
+
+    return compute_nonzero_components (fe_list, multiplicities);
+  }
+}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::FESystem (const FiniteElement<dim,spacedim> &fe,
+                                  const unsigned int n_elements) :
+  FiniteElement<dim,spacedim> (multiply_dof_numbers(&fe, n_elements),
+                               compute_restriction_is_additive_flags (&fe, n_elements),
+                               compute_nonzero_components(&fe, n_elements)),
+  base_elements((n_elements>0))
+{
+  std::vector<const FiniteElement<dim,spacedim>*> fes;
+  fes.push_back(&fe);
+  std::vector<unsigned int> multiplicities;
+  multiplicities.push_back(n_elements);
+  initialize(fes, multiplicities);
+}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::FESystem (const FiniteElement<dim,spacedim> &fe1,
+                                  const unsigned int        n1,
+                                  const FiniteElement<dim,spacedim> &fe2,
+                                  const unsigned int        n2) :
+  FiniteElement<dim,spacedim> (multiply_dof_numbers(&fe1, n1, &fe2, n2),
+                               compute_restriction_is_additive_flags (&fe1, n1,
+                                   &fe2, n2),
+                               compute_nonzero_components(&fe1, n1,
+                                                          &fe2, n2)),
+  base_elements((n1>0)+(n2>0))
+{
+  std::vector<const FiniteElement<dim,spacedim>*> fes;
+  fes.push_back(&fe1);
+  fes.push_back(&fe2);
+  std::vector<unsigned int> multiplicities;
+  multiplicities.push_back(n1);
+  multiplicities.push_back(n2);
+  initialize(fes, multiplicities);
+}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::FESystem (const FiniteElement<dim,spacedim> &fe1,
+                                  const unsigned int        n1,
+                                  const FiniteElement<dim,spacedim> &fe2,
+                                  const unsigned int        n2,
+                                  const FiniteElement<dim,spacedim> &fe3,
+                                  const unsigned int        n3) :
+  FiniteElement<dim,spacedim> (multiply_dof_numbers(&fe1, n1,
+                                                    &fe2, n2,
+                                                    &fe3, n3),
+                               compute_restriction_is_additive_flags (&fe1, n1,
+                                   &fe2, n2,
+                                   &fe3, n3),
+                               compute_nonzero_components(&fe1, n1,
+                                                          &fe2, n2,
+                                                          &fe3, n3)),
+  base_elements((n1>0)+(n2>0)+(n3>0))
+{
+  std::vector<const FiniteElement<dim,spacedim>*> fes;
+  fes.push_back(&fe1);
+  fes.push_back(&fe2);
+  fes.push_back(&fe3);
+  std::vector<unsigned int> multiplicities;
+  multiplicities.push_back(n1);
+  multiplicities.push_back(n2);
+  multiplicities.push_back(n3);
+  initialize(fes, multiplicities);
+}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::FESystem (const FiniteElement<dim,spacedim> &fe1,
+                                  const unsigned int        n1,
+                                  const FiniteElement<dim,spacedim> &fe2,
+                                  const unsigned int        n2,
+                                  const FiniteElement<dim,spacedim> &fe3,
+                                  const unsigned int        n3,
+                                  const FiniteElement<dim,spacedim> &fe4,
+                                  const unsigned int        n4) :
+  FiniteElement<dim,spacedim> (multiply_dof_numbers(&fe1, n1,
+                                                    &fe2, n2,
+                                                    &fe3, n3,
+                                                    &fe4, n4),
+                               compute_restriction_is_additive_flags (&fe1, n1,
+                                   &fe2, n2,
+                                   &fe3, n3,
+                                   &fe4, n4),
+                               compute_nonzero_components(&fe1, n1,
+                                                          &fe2, n2,
+                                                          &fe3, n3,
+                                                          &fe4 ,n4)),
+  base_elements((n1>0)+(n2>0)+(n3>0)+(n4>0))
+{
+  std::vector<const FiniteElement<dim,spacedim>*> fes;
+  fes.push_back(&fe1);
+  fes.push_back(&fe2);
+  fes.push_back(&fe3);
+  fes.push_back(&fe4);
+  std::vector<unsigned int> multiplicities;
+  multiplicities.push_back(n1);
+  multiplicities.push_back(n2);
+  multiplicities.push_back(n3);
+  multiplicities.push_back(n4);
+  initialize(fes, multiplicities);
+}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::FESystem (const FiniteElement<dim,spacedim> &fe1,
+                                  const unsigned int        n1,
+                                  const FiniteElement<dim,spacedim> &fe2,
+                                  const unsigned int        n2,
+                                  const FiniteElement<dim,spacedim> &fe3,
+                                  const unsigned int        n3,
+                                  const FiniteElement<dim,spacedim> &fe4,
+                                  const unsigned int        n4,
+                                  const FiniteElement<dim,spacedim> &fe5,
+                                  const unsigned int        n5) :
+  FiniteElement<dim,spacedim> (multiply_dof_numbers(&fe1, n1,
+                                                    &fe2, n2,
+                                                    &fe3, n3,
+                                                    &fe4, n4,
+                                                    &fe5, n5),
+                               compute_restriction_is_additive_flags (&fe1, n1,
+                                   &fe2, n2,
+                                   &fe3, n3,
+                                   &fe4, n4,
+                                   &fe5, n5),
+                               compute_nonzero_components(&fe1, n1,
+                                                          &fe2, n2,
+                                                          &fe3, n3,
+                                                          &fe4 ,n4,
+                                                          &fe5, n5)),
+  base_elements((n1>0)+(n2>0)+(n3>0)+(n4>0)+(n5>0))
+{
+  std::vector<const FiniteElement<dim,spacedim>*> fes;
+  fes.push_back(&fe1);
+  fes.push_back(&fe2);
+  fes.push_back(&fe3);
+  fes.push_back(&fe4);
+  fes.push_back(&fe5);
+  std::vector<unsigned int> multiplicities;
+  multiplicities.push_back(n1);
+  multiplicities.push_back(n2);
+  multiplicities.push_back(n3);
+  multiplicities.push_back(n4);
+  multiplicities.push_back(n5);
+  initialize(fes, multiplicities);
+}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::FESystem (
+  const std::vector<const FiniteElement<dim,spacedim>*>  &fes,
+  const std::vector<unsigned int>                  &multiplicities)
+  :
+  FiniteElement<dim,spacedim> (multiply_dof_numbers(fes, multiplicities),
+                               compute_restriction_is_additive_flags (fes, multiplicities),
+                               compute_nonzero_components(fes, multiplicities)),
+  base_elements(count_nonzeros(multiplicities))
+{
+  initialize(fes, multiplicities);
+}
+
+
+
+template <int dim, int spacedim>
+FESystem<dim,spacedim>::~FESystem ()
+{}
+
+
+
+template <int dim, int spacedim>
+std::string
+FESystem<dim,spacedim>::get_name () const
+{
+  // note that the
+  // FETools::get_fe_from_name
+  // function depends on the
+  // particular format of the string
+  // this function returns, so they
+  // have to be kept in synch
+
+  std::ostringstream namebuf;
+
+  namebuf << "FESystem<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">[";
+  for (unsigned int i=0; i< this->n_base_elements(); ++i)
+    {
+      namebuf << base_element(i).get_name();
+      if (this->element_multiplicity(i) != 1)
+        namebuf << '^' << this->element_multiplicity(i);
+      if (i != this->n_base_elements()-1)
+        namebuf << '-';
+    }
+  namebuf << ']';
+
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FESystem<dim,spacedim>::clone() const
+{
+  std::vector< const FiniteElement<dim,spacedim>* >  fes;
+  std::vector<unsigned int> multiplicities;
+
+  for (unsigned int i=0; i<this->n_base_elements(); i++)
+    {
+      fes.push_back( & base_element(i) );
+      multiplicities.push_back(this->element_multiplicity(i) );
+    }
+  return new FESystem<dim,spacedim>(fes, multiplicities);
+}
+
+
+
+template <int dim, int spacedim>
+double
+FESystem<dim,spacedim>::shape_value (const unsigned int i,
+                                     const Point<dim> &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (this->is_primitive(i),
+          (typename FiniteElement<dim,spacedim>::ExcShapeFunctionNotPrimitive(i)));
+
+  return (base_element(this->system_to_base_table[i].first.first)
+          .shape_value(this->system_to_base_table[i].second, p));
+}
+
+
+
+template <int dim, int spacedim>
+double
+FESystem<dim,spacedim>::shape_value_component (const unsigned int i,
+                                               const Point<dim>  &p,
+                                               const unsigned int component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component < this->n_components(),
+          ExcIndexRange (component, 0, this->n_components()));
+
+  // if this value is supposed to be
+  // zero, then return right away...
+  if (this->nonzero_components[i][component] == false)
+    return 0;
+
+  // ...otherwise: first find out to
+  // which of the base elements this
+  // desired component belongs, and
+  // which component within this base
+  // element it is
+  const unsigned int base              = this->component_to_base_index(component).first;
+  const unsigned int component_in_base = this->component_to_base_index(component).second;
+
+  // then get value from base
+  // element. note that that will
+  // throw an error should the
+  // respective shape function not be
+  // primitive; thus, there is no
+  // need to check this here
+  return (base_element(base).
+          shape_value_component(this->system_to_base_table[i].second,
+                                p,
+                                component_in_base));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<1,dim>
+FESystem<dim,spacedim>::shape_grad (const unsigned int i,
+                                    const Point<dim> &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (this->is_primitive(i),
+          (typename FiniteElement<dim,spacedim>::ExcShapeFunctionNotPrimitive(i)));
+
+  return (base_element(this->system_to_base_table[i].first.first)
+          .shape_grad(this->system_to_base_table[i].second, p));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<1,dim>
+FESystem<dim,spacedim>::shape_grad_component (const unsigned int i,
+                                              const Point<dim>  &p,
+                                              const unsigned int component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component < this->n_components(),
+          ExcIndexRange (component, 0, this->n_components()));
+
+  // if this value is supposed to be zero, then return right away...
+  if (this->nonzero_components[i][component] == false)
+    return Tensor<1,dim>();
+
+  // ...otherwise: first find out to which of the base elements this desired
+  // component belongs, and which component within this base element it is
+  const unsigned int base              = this->component_to_base_index(component).first;
+  const unsigned int component_in_base = this->component_to_base_index(component).second;
+
+  // then get value from base element. note that that will throw an error
+  // should the respective shape function not be primitive; thus, there is no
+  // need to check this here
+  return (base_element(base).
+          shape_grad_component(this->system_to_base_table[i].second,
+                               p,
+                               component_in_base));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<2,dim>
+FESystem<dim,spacedim>::shape_grad_grad (const unsigned int i,
+                                         const Point<dim> &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (this->is_primitive(i),
+          (typename FiniteElement<dim,spacedim>::ExcShapeFunctionNotPrimitive(i)));
+
+  return (base_element(this->system_to_base_table[i].first.first)
+          .shape_grad_grad(this->system_to_base_table[i].second, p));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<2,dim>
+FESystem<dim,spacedim>::shape_grad_grad_component (const unsigned int i,
+                                                   const Point<dim>  &p,
+                                                   const unsigned int component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component < this->n_components(),
+          ExcIndexRange (component, 0, this->n_components()));
+
+  // if this value is supposed to be zero, then return right away...
+  if (this->nonzero_components[i][component] == false)
+    return Tensor<2,dim>();
+
+  // ...otherwise: first find out to which of the base elements this desired
+  // component belongs, and which component within this base element it is
+  const unsigned int base              = this->component_to_base_index(component).first;
+  const unsigned int component_in_base = this->component_to_base_index(component).second;
+
+  // then get value from base element. note that that will throw an error
+  // should the respective shape function not be primitive; thus, there is no
+  // need to check this here
+  return (base_element(base).
+          shape_grad_grad_component(this->system_to_base_table[i].second,
+                                    p,
+                                    component_in_base));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<3,dim>
+FESystem<dim,spacedim>::shape_3rd_derivative (const unsigned int i,
+                                              const Point<dim> &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (this->is_primitive(i),
+          (typename FiniteElement<dim,spacedim>::ExcShapeFunctionNotPrimitive(i)));
+
+  return (base_element(this->system_to_base_table[i].first.first)
+          .shape_3rd_derivative(this->system_to_base_table[i].second, p));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<3,dim>
+FESystem<dim,spacedim>::shape_3rd_derivative_component (const unsigned int i,
+                                                        const Point<dim> &p,
+                                                        const unsigned int component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component < this->n_components(),
+          ExcIndexRange (component, 0, this->n_components()));
+
+  // if this value is supposed to be zero, then return right away...
+  if (this->nonzero_components[i][component] == false)
+    return Tensor<3,dim>();
+
+  // ...otherwise: first find out to which of the base elements this desired
+  // component belongs, and which component within this base element it is
+  const unsigned int base              = this->component_to_base_index(component).first;
+  const unsigned int component_in_base = this->component_to_base_index(component).second;
+
+  // then get value from base element. note that that will throw an error
+  // should the respective shape function not be primitive; thus, there is no
+  // need to check this here
+  return (base_element(base).
+          shape_3rd_derivative_component(this->system_to_base_table[i].second,
+                                         p,
+                                         component_in_base));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<4,dim>
+FESystem<dim,spacedim>::shape_4th_derivative (const unsigned int i,
+                                              const Point<dim> &p) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (this->is_primitive(i),
+          (typename FiniteElement<dim,spacedim>::ExcShapeFunctionNotPrimitive(i)));
+
+  return (base_element(this->system_to_base_table[i].first.first)
+          .shape_4th_derivative(this->system_to_base_table[i].second, p));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<4,dim>
+FESystem<dim,spacedim>::shape_4th_derivative_component (const unsigned int i,
+                                                        const Point<dim> &p,
+                                                        const unsigned int component) const
+{
+  Assert (i<this->dofs_per_cell, ExcIndexRange(i, 0, this->dofs_per_cell));
+  Assert (component < this->n_components(),
+          ExcIndexRange (component, 0, this->n_components()));
+
+  // if this value is supposed to be zero, then return right away...
+  if (this->nonzero_components[i][component] == false)
+    return Tensor<4,dim>();
+
+  // ...otherwise: first find out to which of the base elements this desired
+  // component belongs, and which component within this base element it is
+  const unsigned int base              = this->component_to_base_index(component).first;
+  const unsigned int component_in_base = this->component_to_base_index(component).second;
+
+  // then get value from base element. note that that will throw an error
+  // should the respective shape function not be primitive; thus, there is no
+  // need to check this here
+  return (base_element(base).
+          shape_4th_derivative_component(this->system_to_base_table[i].second,
+                                         p,
+                                         component_in_base));
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::get_interpolation_matrix (
+  const FiniteElement<dim,spacedim> &x_source_fe,
+  FullMatrix<double>           &interpolation_matrix) const
+{
+  // check that the size of the matrices is correct. for historical
+  // reasons, if you call matrix.reinit(8,0), it sets the sizes
+  // to m==n==0 internally. this may happen when we use a FE_Nothing,
+  // so write the test in a more lenient way
+  Assert ((interpolation_matrix.m() == this->dofs_per_cell)
+          ||
+          (x_source_fe.dofs_per_cell == 0),
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                this->dofs_per_cell));
+  Assert ((interpolation_matrix.n() == x_source_fe.dofs_per_cell)
+          ||
+          (this->dofs_per_cell == 0),
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_cell));
+
+  // there are certain conditions that the two elements have to satisfy so
+  // that this can work.
+  //
+  // condition 1: the other element must also be a system element
+
+  typedef FiniteElement<dim,spacedim> FEL;
+  AssertThrow ((x_source_fe.get_name().find ("FESystem<") == 0)
+               ||
+               (dynamic_cast<const FESystem<dim,spacedim>*>(&x_source_fe) != 0),
+               typename FEL::
+               ExcInterpolationNotImplemented());
+
+  // ok, source is a system element, so we may be able to do the work
+  const FESystem<dim,spacedim> &source_fe
+    = dynamic_cast<const FESystem<dim,spacedim>&>(x_source_fe);
+
+  // condition 2: same number of basis elements
+  AssertThrow (this->n_base_elements() == source_fe.n_base_elements(),
+               typename FEL::
+               ExcInterpolationNotImplemented());
+
+  // condition 3: same number of basis elements
+  for (unsigned int i=0; i< this->n_base_elements(); ++i)
+    AssertThrow (this->element_multiplicity(i) ==
+                 source_fe.element_multiplicity(i),
+                 typename FEL::
+                 ExcInterpolationNotImplemented());
+
+  // ok, so let's try whether it works:
+
+  // first let's see whether all the basis elements actually generate their
+  // interpolation matrices. if we get past the following loop, then
+  // apparently none of the called base elements threw an exception, so we're
+  // fine continuing and assembling the one big matrix from the small ones of
+  // the base elements
+  std::vector<FullMatrix<double> > base_matrices (this->n_base_elements());
+  for (unsigned int i=0; i<this->n_base_elements(); ++i)
+    {
+      base_matrices[i].reinit (base_element(i).dofs_per_cell,
+                               source_fe.base_element(i).dofs_per_cell);
+      base_element(i).get_interpolation_matrix (source_fe.base_element(i),
+                                                base_matrices[i]);
+    }
+
+  // first clear big matrix, to make sure that entries that would couple
+  // different bases (or multiplicity indices) are really zero. then assign
+  // entries
+  interpolation_matrix = 0;
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    for (unsigned int j=0; j<source_fe.dofs_per_cell; ++j)
+      if (this->system_to_base_table[i].first ==
+          source_fe.system_to_base_table[j].first)
+        interpolation_matrix(i,j)
+          = (base_matrices[this->system_to_base_table[i].first.first]
+             (this->system_to_base_table[i].second,
+              source_fe.system_to_base_table[j].second));
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FESystem<dim,spacedim>
+::get_restriction_matrix (const unsigned int child,
+                          const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Restriction matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  // initialization upon first request
+  if (this->restriction[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      // check if updated while waiting for lock
+      if (this->restriction[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->restriction[refinement_case-1][child];
+
+      // Check if some of the matrices of the base elements are void.
+      bool do_restriction = true;
+
+      // shortcut for accessing local restrictions further down
+      std::vector<const FullMatrix<double> *>
+      base_matrices(this->n_base_elements());
+
+      for (unsigned int i=0; i<this->n_base_elements(); ++i)
+        {
+          base_matrices[i] =
+            &base_element(i).get_restriction_matrix(child, refinement_case);
+          if (base_matrices[i]->n() != base_element(i).dofs_per_cell)
+            do_restriction = false;
+        }
+      Assert(do_restriction,
+             (typename FiniteElement<dim,spacedim>::ExcProjectionVoid()));
+
+      // if we did not encounter void matrices, initialize the matrix sizes
+      if (do_restriction)
+        {
+          FullMatrix<double> restriction(this->dofs_per_cell,
+                                         this->dofs_per_cell);
+
+          // distribute the matrices of the base finite elements to the
+          // matrices of this object. for this, loop over all degrees of
+          // freedom and take the respective entry of the underlying base
+          // element.
+          //
+          // note that we by definition of a base element, they are
+          // independent, i.e. do not couple. only DoFs that belong to the
+          // same instance of a base element may couple
+          for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+            for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+              {
+                // first find out to which base element indices i and j
+                // belong, and which instance thereof in case the base element
+                // has a multiplicity greater than one. if they should not
+                // happen to belong to the same instance of a base element,
+                // then they cannot couple, so go on with the next index
+                if (this->system_to_base_table[i].first !=
+                    this->system_to_base_table[j].first)
+                  continue;
+
+                // so get the common base element and the indices therein:
+                const unsigned int
+                base = this->system_to_base_table[i].first.first;
+
+                const unsigned int
+                base_index_i = this->system_to_base_table[i].second,
+                base_index_j = this->system_to_base_table[j].second;
+
+                // if we are sure that DoFs i and j may couple, then copy
+                // entries of the matrices:
+                restriction(i,j) = (*base_matrices[base])(base_index_i,base_index_j);
+              }
+
+          restriction.swap(const_cast<FullMatrix<double> &>
+                           (this->restriction[refinement_case-1][child]));
+        }
+    }
+
+  return this->restriction[refinement_case-1][child];
+}
+
+
+
+template <int dim, int spacedim>
+const FullMatrix<double> &
+FESystem<dim,spacedim>
+::get_prolongation_matrix (const unsigned int child,
+                           const RefinementCase<dim> &refinement_case) const
+{
+  Assert (refinement_case<RefinementCase<dim>::isotropic_refinement+1,
+          ExcIndexRange(refinement_case,0,RefinementCase<dim>::isotropic_refinement+1));
+  Assert (refinement_case!=RefinementCase<dim>::no_refinement,
+          ExcMessage("Restriction matrices are only available for refined cells!"));
+  Assert (child<GeometryInfo<dim>::n_children(refinement_case),
+          ExcIndexRange(child,0,GeometryInfo<dim>::n_children(refinement_case)));
+
+  // initialization upon first request, construction completely analogous to
+  // restriction matrix
+  if (this->prolongation[refinement_case-1][child].n() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(this->mutex);
+
+      if (this->prolongation[refinement_case-1][child].n() ==
+          this->dofs_per_cell)
+        return this->prolongation[refinement_case-1][child];
+
+      bool do_prolongation = true;
+      std::vector<const FullMatrix<double> *>
+      base_matrices(this->n_base_elements());
+      for (unsigned int i=0; i<this->n_base_elements(); ++i)
+        {
+          base_matrices[i] =
+            &base_element(i).get_prolongation_matrix(child, refinement_case);
+          if (base_matrices[i]->n() != base_element(i).dofs_per_cell)
+            do_prolongation = false;
+        }
+      Assert(do_prolongation,
+             (typename FiniteElement<dim,spacedim>::ExcEmbeddingVoid()));
+
+      if (do_prolongation)
+        {
+          FullMatrix<double> prolongate (this->dofs_per_cell,
+                                         this->dofs_per_cell);
+
+          for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+            for (unsigned int j=0; j<this->dofs_per_cell; ++j)
+              {
+                if (this->system_to_base_table[i].first !=
+                    this->system_to_base_table[j].first)
+                  continue;
+                const unsigned int
+                base = this->system_to_base_table[i].first.first;
+
+                const unsigned int
+                base_index_i = this->system_to_base_table[i].second,
+                base_index_j = this->system_to_base_table[j].second;
+                prolongate(i,j) = (*base_matrices[base])(base_index_i,base_index_j);
+              }
+          prolongate.swap(const_cast<FullMatrix<double> &>
+                          (this->prolongation[refinement_case-1][child]));
+        }
+    }
+
+  return this->prolongation[refinement_case-1][child];
+}
+
+
+template <int dim, int spacedim>
+unsigned int
+FESystem<dim,spacedim>::
+face_to_cell_index (const unsigned int face_dof_index,
+                    const unsigned int face,
+                    const bool face_orientation,
+                    const bool face_flip,
+                    const bool face_rotation) const
+{
+  // we need to ask the base elements how they want to translate
+  // the DoFs within their own numbering. thus, translate to
+  // the base element numbering and then back
+  const std::pair<std::pair<unsigned int, unsigned int>, unsigned int>
+  face_base_index = this->face_system_to_base_index(face_dof_index);
+
+  const unsigned int
+  base_face_to_cell_index
+    = this->base_element(face_base_index.first.first).face_to_cell_index (face_base_index.second,
+        face,
+        face_orientation,
+        face_flip,
+        face_rotation);
+
+  // it would be nice if we had a base_to_system_index function, but
+  // all that exists is a component_to_system_index function. we can't do
+  // this here because it won't work for non-primitive elements. consequently,
+  // simply do a loop over all dofs till we find whether it corresponds
+  // to the one we're interested in -- crude, maybe, but works for now
+  const std::pair<std::pair<unsigned int, unsigned int>, unsigned int>
+  target = std::make_pair (face_base_index.first, base_face_to_cell_index);
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    if (this->system_to_base_index(i) == target)
+      return i;
+
+  Assert (false, ExcInternalError());
+  return numbers::invalid_unsigned_int;
+}
+
+
+
+//---------------------------------------------------------------------------
+// Data field initialization
+//---------------------------------------------------------------------------
+
+
+
+template <int dim, int spacedim>
+UpdateFlags
+FESystem<dim,spacedim>::requires_update_flags (const UpdateFlags flags) const
+{
+  UpdateFlags out = update_default;
+  // generate maximal set of flags
+  // that are necessary
+  for (unsigned int base_no=0; base_no<this->n_base_elements(); ++base_no)
+    out |= base_element(base_no).requires_update_flags (flags);
+  return out;
+}
+
+
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase *
+FESystem<dim,spacedim>::
+get_data (const UpdateFlags                                                    flags,
+          const Mapping<dim,spacedim>                                         &mapping,
+          const Quadrature<dim>                                               &quadrature,
+          dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+{
+  // create an internal data object and set the update flags we will need
+  // to deal with. the current object does not make use of these flags,
+  // but we need to nevertheless set them correctly since we look
+  // into the update_each flag of base elements in fill_fe_values,
+  // and so the current object's update_each flag needs to be
+  // correct in case the current FESystem is a base element for another,
+  // higher-level FESystem itself.
+  InternalData *data = new InternalData(this->n_base_elements());
+  data->update_each = requires_update_flags(flags);
+
+  // get data objects from each of the base elements and store
+  // them. one might think that doing this in parallel (over the
+  // base elements) would be a good idea, but this turns out to
+  // be wrong because we would then run these jobs on different
+  // threads/processors and this allocates memory in different
+  // NUMA domains; this has large detrimental effects when later
+  // writing into these objects in fill_fe_*_values. all of this
+  // is particularly true when using FEValues objects in
+  // WorkStream contexts where we explicitly make sure that
+  // every function only uses objects previously allocated
+  // in the same NUMA context and on the same thread as the
+  // function is called
+  for (unsigned int base_no=0; base_no<this->n_base_elements(); ++base_no)
+    {
+      internal::FEValues::FiniteElementRelatedData<dim,spacedim> &base_fe_output_object
+        = data->get_fe_output_object(base_no);
+      base_fe_output_object.initialize (quadrature.size(), base_element(base_no),
+                                        flags | base_element(base_no).requires_update_flags(flags));
+
+      // let base objects produce their scratch objects. they may
+      // also at this time write into the output objects we provide
+      // for them; it would be nice if we could already copy something
+      // out of the base output object into the system output object,
+      // but we can't because we can't know what the elements already
+      // copied and/or will want to update on every cell
+      typename FiniteElement<dim,spacedim>::InternalDataBase *base_fe_data =
+        base_element(base_no).get_data (flags, mapping, quadrature,
+                                        base_fe_output_object);
+
+      data->set_fe_data(base_no, base_fe_data);
+    }
+
+  return data;
+}
+
+// The following function is a clone of get_data, with the exception
+// that get_face_data of the base elements is called.
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase *
+FESystem<dim,spacedim>::
+get_face_data (const UpdateFlags                                                    flags,
+               const Mapping<dim,spacedim>                                         &mapping,
+               const Quadrature<dim-1>                                             &quadrature,
+               dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+{
+  // create an internal data object and set the update flags we will need
+  // to deal with. the current object does not make use of these flags,
+  // but we need to nevertheless set them correctly since we look
+  // into the update_each flag of base elements in fill_fe_values,
+  // and so the current object's update_each flag needs to be
+  // correct in case the current FESystem is a base element for another,
+  // higher-level FESystem itself.
+  InternalData *data = new InternalData(this->n_base_elements());
+  data->update_each = requires_update_flags(flags);
+
+  // get data objects from each of the base elements and store
+  // them. one might think that doing this in parallel (over the
+  // base elements) would be a good idea, but this turns out to
+  // be wrong because we would then run these jobs on different
+  // threads/processors and this allocates memory in different
+  // NUMA domains; this has large detrimental effects when later
+  // writing into these objects in fill_fe_*_values. all of this
+  // is particularly true when using FEValues objects in
+  // WorkStream contexts where we explicitly make sure that
+  // every function only uses objects previously allocated
+  // in the same NUMA context and on the same thread as the
+  // function is called
+  for (unsigned int base_no=0; base_no<this->n_base_elements(); ++base_no)
+    {
+      internal::FEValues::FiniteElementRelatedData<dim,spacedim> &base_fe_output_object
+        = data->get_fe_output_object(base_no);
+      base_fe_output_object.initialize (quadrature.size(), base_element(base_no),
+                                        flags | base_element(base_no).requires_update_flags(flags));
+
+      // let base objects produce their scratch objects. they may
+      // also at this time write into the output objects we provide
+      // for them; it would be nice if we could already copy something
+      // out of the base output object into the system output object,
+      // but we can't because we can't know what the elements already
+      // copied and/or will want to update on every cell
+      typename FiniteElement<dim,spacedim>::InternalDataBase *base_fe_data =
+        base_element(base_no).get_face_data (flags, mapping, quadrature,
+                                             base_fe_output_object);
+
+      data->set_fe_data(base_no, base_fe_data);
+    }
+
+  return data;
+}
+
+
+
+// The following function is a clone of get_data, with the exception
+// that get_subface_data of the base elements is called.
+
+template <int dim, int spacedim>
+typename FiniteElement<dim,spacedim>::InternalDataBase *
+FESystem<dim,spacedim>::
+get_subface_data (const UpdateFlags                                                    flags,
+                  const Mapping<dim,spacedim>                                         &mapping,
+                  const Quadrature<dim-1>                                             &quadrature,
+                  dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &/*output_data*/) const
+{
+  // create an internal data object and set the update flags we will need
+  // to deal with. the current object does not make use of these flags,
+  // but we need to nevertheless set them correctly since we look
+  // into the update_each flag of base elements in fill_fe_values,
+  // and so the current object's update_each flag needs to be
+  // correct in case the current FESystem is a base element for another,
+  // higher-level FESystem itself.
+  InternalData *data = new InternalData(this->n_base_elements());
+  data->update_each = requires_update_flags(flags);
+
+  // get data objects from each of the base elements and store
+  // them. one might think that doing this in parallel (over the
+  // base elements) would be a good idea, but this turns out to
+  // be wrong because we would then run these jobs on different
+  // threads/processors and this allocates memory in different
+  // NUMA domains; this has large detrimental effects when later
+  // writing into these objects in fill_fe_*_values. all of this
+  // is particularly true when using FEValues objects in
+  // WorkStream contexts where we explicitly make sure that
+  // every function only uses objects previously allocated
+  // in the same NUMA context and on the same thread as the
+  // function is called
+  for (unsigned int base_no=0; base_no<this->n_base_elements(); ++base_no)
+    {
+      internal::FEValues::FiniteElementRelatedData<dim,spacedim> &base_fe_output_object
+        = data->get_fe_output_object(base_no);
+      base_fe_output_object.initialize (quadrature.size(), base_element(base_no),
+                                        flags | base_element(base_no).requires_update_flags(flags));
+
+      // let base objects produce their scratch objects. they may
+      // also at this time write into the output objects we provide
+      // for them; it would be nice if we could already copy something
+      // out of the base output object into the system output object,
+      // but we can't because we can't know what the elements already
+      // copied and/or will want to update on every cell
+      typename FiniteElement<dim,spacedim>::InternalDataBase *base_fe_data =
+        base_element(base_no).get_subface_data (flags, mapping, quadrature,
+                                                base_fe_output_object);
+
+      data->set_fe_data(base_no, base_fe_data);
+    }
+
+  return data;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                const CellSimilarity::Similarity                                     cell_similarity,
+                const Quadrature<dim>                                               &quadrature,
+                const Mapping<dim,spacedim>                                         &mapping,
+                const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  compute_fill(mapping, cell, invalid_face_number, invalid_face_number,
+               quadrature, cell_similarity, mapping_internal, fe_internal,
+               mapping_data, output_data);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                     const unsigned int                                                   face_no,
+                     const Quadrature<dim-1>                                             &quadrature,
+                     const Mapping<dim,spacedim>                                         &mapping,
+                     const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                     const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                     const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                     dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  compute_fill (mapping, cell, face_no, invalid_face_number, quadrature,
+                CellSimilarity::none, mapping_internal, fe_internal,
+                mapping_data, output_data);
+}
+
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator           &cell,
+                        const unsigned int                                                   face_no,
+                        const unsigned int                                                   sub_no,
+                        const Quadrature<dim-1>                                             &quadrature,
+                        const Mapping<dim,spacedim>                                         &mapping,
+                        const typename Mapping<dim,spacedim>::InternalDataBase              &mapping_internal,
+                        const dealii::internal::FEValues::MappingRelatedData<dim, spacedim> &mapping_data,
+                        const typename FiniteElement<dim,spacedim>::InternalDataBase        &fe_internal,
+                        dealii::internal::FEValues::FiniteElementRelatedData<dim, spacedim> &output_data) const
+{
+  compute_fill (mapping, cell, face_no, sub_no, quadrature,
+                CellSimilarity::none, mapping_internal, fe_internal,
+                mapping_data, output_data);
+}
+
+
+
+template <int dim, int spacedim>
+template <int dim_1>
+void
+FESystem<dim,spacedim>::
+compute_fill (const Mapping<dim,spacedim>                      &mapping,
+              const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+              const unsigned int                                face_no,
+              const unsigned int                                sub_no,
+              const Quadrature<dim_1>                          &quadrature,
+              const CellSimilarity::Similarity                  cell_similarity,
+              const typename Mapping<dim,spacedim>::InternalDataBase &mapping_internal,
+              const typename FiniteElement<dim,spacedim>::InternalDataBase &fe_internal,
+              const internal::FEValues::MappingRelatedData<dim,spacedim> &mapping_data,
+              internal::FEValues::FiniteElementRelatedData<dim,spacedim> &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&fe_internal) != 0, ExcInternalError());
+  const InternalData &fe_data = static_cast<const InternalData &> (fe_internal);
+
+  // Either dim_1==dim
+  // (fill_fe_values) or dim_1==dim-1
+  // (fill_fe_(sub)face_values)
+  Assert(dim_1==dim || dim_1==dim-1, ExcInternalError());
+  const UpdateFlags flags = fe_data.update_each;
+
+
+  // loop over the base elements, let them compute what they need to compute,
+  // and then copy what is necessary.
+  //
+  // one may think that it would be a good idea to parallelize this over
+  // base elements, but it turns out to be not worthwhile: doing so lets
+  // multiple threads access data objects that were created by the current
+  // thread, leading to many NUMA memory access inefficiencies. we specifically
+  // want to avoid this if this class is called in a WorkStream context where
+  // we very carefully allocate objects only on the thread where they
+  // will actually be used; spawning new tasks here would be counterproductive
+  if (flags & (update_values | update_gradients
+               | update_hessians | update_3rd_derivatives ))
+    for (unsigned int base_no=0; base_no<this->n_base_elements(); ++base_no)
+      {
+        const FiniteElement<dim,spacedim> &
+        base_fe      = base_element(base_no);
+        typename FiniteElement<dim,spacedim>::InternalDataBase &
+        base_fe_data = fe_data.get_fe_data(base_no);
+        internal::FEValues::FiniteElementRelatedData<dim,spacedim> &
+        base_data    = fe_data.get_fe_output_object(base_no);
+
+        // fill_fe_face_values needs argument Quadrature<dim-1> for both cases
+        // dim_1==dim-1 and dim_1=dim. Hence the following workaround
+        const Quadrature<dim>   *cell_quadrature = 0;
+        const Quadrature<dim-1> *face_quadrature = 0;
+        const unsigned int n_q_points = quadrature.size();
+
+        // static cast to the common base class of quadrature being either
+        // Quadrature<dim> or Quadrature<dim-1>:
+        const Subscriptor *quadrature_base_pointer = &quadrature;
+
+        if (face_no==invalid_face_number)
+          {
+            Assert(dim_1==dim, ExcDimensionMismatch(dim_1,dim));
+            Assert (dynamic_cast<const Quadrature<dim> *>(quadrature_base_pointer) != 0,
+                    ExcInternalError());
+
+            cell_quadrature
+              = static_cast<const Quadrature<dim> *>(quadrature_base_pointer);
+          }
+        else
+          {
+            Assert(dim_1==dim-1, ExcDimensionMismatch(dim_1,dim-1));
+            Assert (dynamic_cast<const Quadrature<dim-1> *>(quadrature_base_pointer) != 0,
+                    ExcInternalError());
+
+            face_quadrature
+              = static_cast<const Quadrature<dim-1> *>(quadrature_base_pointer);
+          }
+
+
+        // Make sure that in the case of fill_fe_values the data is only
+        // copied from base_data to data if base_data is changed. therefore
+        // use fe_fe_data.current_update_flags()
+        //
+        // for the case of fill_fe_(sub)face_values the data needs to be
+        // copied from base_data to data on each face, therefore use
+        // base_fe_data.update_flags.
+        if (face_no==invalid_face_number)
+          base_fe.fill_fe_values(cell, cell_similarity,
+                                 *cell_quadrature,
+                                 mapping, mapping_internal, mapping_data,
+                                 base_fe_data, base_data);
+        else if (sub_no==invalid_face_number)
+          base_fe.fill_fe_face_values(cell, face_no,
+                                      *face_quadrature,
+                                      mapping, mapping_internal, mapping_data,
+                                      base_fe_data, base_data);
+        else
+          base_fe.fill_fe_subface_values(cell, face_no, sub_no,
+                                         *face_quadrature,
+                                         mapping, mapping_internal, mapping_data,
+                                         base_fe_data, base_data);
+
+        // now data has been generated, so copy it. we used to work by
+        // looping over all base elements (i.e. this outer loop), then over
+        // multiplicity, then over the shape functions from that base
+        // element, but that requires that we can infer the global number of
+        // a shape function from its number in the base element. for that we
+        // had the component_to_system_table.
+        //
+        // however, this does of course no longer work since we have
+        // non-primitive elements. so we go the other way round: loop over
+        // all shape functions of the composed element, and here only treat
+        // those shape functions that belong to a given base element
+        //TODO: Introduce the needed table and loop only over base element shape functions. This here is not efficient at all AND very bad style
+        const UpdateFlags base_flags = base_fe_data.update_each;
+
+        // if the current cell is just a translation of the previous one,
+        // the underlying data has not changed, and we don't even need to
+        // enter this section
+        if (cell_similarity != CellSimilarity::translation)
+          for (unsigned int system_index=0; system_index<this->dofs_per_cell;
+               ++system_index)
+            if (this->system_to_base_table[system_index].first.first == base_no)
+              {
+                const unsigned int
+                base_index = this->system_to_base_table[system_index].second;
+                Assert (base_index<base_fe.dofs_per_cell, ExcInternalError());
+
+                // now copy. if the shape function is primitive, then there
+                // is only one value to be copied, but for non-primitive
+                // elements, there might be more values to be copied
+                //
+                // so, find out from which index to take this one value, and
+                // to which index to put
+                unsigned int out_index = 0;
+                for (unsigned int i=0; i<system_index; ++i)
+                  out_index += this->n_nonzero_components(i);
+                unsigned int in_index = 0;
+                for (unsigned int i=0; i<base_index; ++i)
+                  in_index += base_fe.n_nonzero_components(i);
+
+                // then loop over the number of components to be copied
+                Assert (this->n_nonzero_components(system_index) ==
+                        base_fe.n_nonzero_components(base_index),
+                        ExcInternalError());
+
+                if (base_flags & update_values)
+                  for (unsigned int s=0; s<this->n_nonzero_components(system_index); ++s)
+                    for (unsigned int q=0; q<n_q_points; ++q)
+                      output_data.shape_values[out_index+s][q] =
+                        base_data.shape_values(in_index+s,q);
+
+                if (base_flags & update_gradients)
+                  for (unsigned int s=0; s<this->n_nonzero_components(system_index); ++s)
+                    for (unsigned int q=0; q<n_q_points; ++q)
+                      output_data.shape_gradients[out_index+s][q] =
+                        base_data.shape_gradients[in_index+s][q];
+
+                if (base_flags & update_hessians)
+                  for (unsigned int s=0; s<this->n_nonzero_components(system_index); ++s)
+                    for (unsigned int q=0; q<n_q_points; ++q)
+                      output_data.shape_hessians[out_index+s][q] =
+                        base_data.shape_hessians[in_index+s][q];
+
+                if (base_flags & update_3rd_derivatives)
+                  for (unsigned int s=0; s<this->n_nonzero_components(system_index); ++s)
+                    for (unsigned int q=0; q<n_q_points; ++q)
+                      output_data.shape_3rd_derivatives[out_index+s][q] =
+                        base_data.shape_3rd_derivatives[in_index+s][q];
+
+              }
+      }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::build_cell_tables()
+{
+  // If the system is not primitive, these have not been initialized by
+  // FiniteElement
+  this->system_to_component_table.resize(this->dofs_per_cell);
+  this->face_system_to_component_table.resize(this->dofs_per_face);
+
+  unsigned int total_index = 0;
+
+  for (unsigned int base=0; base < this->n_base_elements(); ++base)
+    for (unsigned int m = 0; m < this->element_multiplicity(base); ++m)
+      {
+        for (unsigned int k=0; k<base_element(base).n_components(); ++k)
+          this->component_to_base_table[total_index++]
+            = std::make_pair(std::make_pair(base,k), m);
+      }
+  Assert (total_index == this->component_to_base_table.size(),
+          ExcInternalError());
+
+  // Initialize index tables.  Multi-component base elements have to be
+  // thought of. For non-primitive shape functions, have a special invalid
+  // index.
+  const std::pair<unsigned int, unsigned int>
+  non_primitive_index (numbers::invalid_unsigned_int,
+                       numbers::invalid_unsigned_int);
+
+  // First enumerate vertex indices, where we first enumerate all indices on
+  // the first vertex in the order of the base elements, then of the second
+  // vertex, etc
+  total_index = 0;
+  for (unsigned int vertex_number=0;
+       vertex_number<GeometryInfo<dim>::vertices_per_cell;
+       ++vertex_number)
+    {
+      unsigned int comp_start = 0;
+      for (unsigned int base=0; base<this->n_base_elements(); ++base)
+        for (unsigned int m=0; m<this->element_multiplicity(base);
+             ++m, comp_start+=base_element(base).n_components())
+          for (unsigned int local_index = 0;
+               local_index < base_element(base).dofs_per_vertex;
+               ++local_index, ++total_index)
+            {
+              const unsigned int index_in_base
+                = (base_element(base).dofs_per_vertex*vertex_number +
+                   local_index);
+
+              this->system_to_base_table[total_index]
+                = std::make_pair (std::make_pair(base, m), index_in_base);
+
+              if (base_element(base).is_primitive(index_in_base))
+                {
+                  const unsigned int comp_in_base
+                    = base_element(base).system_to_component_index(index_in_base).first;
+                  const unsigned int comp
+                    = comp_start + comp_in_base;
+                  const unsigned int index_in_comp
+                    = base_element(base).system_to_component_index(index_in_base).second;
+                  this->system_to_component_table[total_index]
+                    = std::make_pair (comp, index_in_comp);
+                }
+              else
+                this->system_to_component_table[total_index] = non_primitive_index;
+            }
+    }
+
+  // 2. Lines
+  if (GeometryInfo<dim>::lines_per_cell > 0)
+    for (unsigned int line_number= 0;
+         line_number != GeometryInfo<dim>::lines_per_cell;
+         ++line_number)
+      {
+        unsigned int comp_start = 0;
+        for (unsigned int base=0; base<this->n_base_elements(); ++base)
+          for (unsigned int m=0; m<this->element_multiplicity(base);
+               ++m, comp_start+=base_element(base).n_components())
+            for (unsigned int local_index = 0;
+                 local_index < base_element(base).dofs_per_line;
+                 ++local_index, ++total_index)
+              {
+                const unsigned int index_in_base
+                  = (base_element(base).dofs_per_line*line_number +
+                     local_index +
+                     base_element(base).first_line_index);
+
+                this->system_to_base_table[total_index]
+                  = std::make_pair (std::make_pair(base,m), index_in_base);
+
+                if (base_element(base).is_primitive(index_in_base))
+                  {
+                    const unsigned int comp_in_base
+                      = base_element(base).system_to_component_index(index_in_base).first;
+                    const unsigned int comp
+                      = comp_start + comp_in_base;
+                    const unsigned int index_in_comp
+                      = base_element(base).system_to_component_index(index_in_base).second;
+                    this->system_to_component_table[total_index]
+                      = std::make_pair (comp, index_in_comp);
+                  }
+                else
+                  this->system_to_component_table[total_index] = non_primitive_index;
+              }
+      }
+
+  // 3. Quads
+  if (GeometryInfo<dim>::quads_per_cell > 0)
+    for (unsigned int quad_number= 0;
+         quad_number != GeometryInfo<dim>::quads_per_cell;
+         ++quad_number)
+      {
+        unsigned int comp_start = 0;
+        for (unsigned int base=0; base<this->n_base_elements(); ++base)
+          for (unsigned int m=0; m<this->element_multiplicity(base);
+               ++m, comp_start += base_element(base).n_components())
+            for (unsigned int local_index = 0;
+                 local_index < base_element(base).dofs_per_quad;
+                 ++local_index, ++total_index)
+              {
+                const unsigned int index_in_base
+                  = (base_element(base).dofs_per_quad*quad_number +
+                     local_index +
+                     base_element(base).first_quad_index);
+
+                this->system_to_base_table[total_index]
+                  = std::make_pair (std::make_pair(base,m), index_in_base);
+
+                if (base_element(base).is_primitive(index_in_base))
+                  {
+                    const unsigned int comp_in_base
+                      = base_element(base).system_to_component_index(index_in_base).first;
+                    const unsigned int comp
+                      = comp_start + comp_in_base;
+                    const unsigned int index_in_comp
+                      = base_element(base).system_to_component_index(index_in_base).second;
+                    this->system_to_component_table[total_index]
+                      = std::make_pair (comp, index_in_comp);
+                  }
+                else
+                  this->system_to_component_table[total_index] = non_primitive_index;
+              }
+      }
+
+  // 4. Hexes
+  if (GeometryInfo<dim>::hexes_per_cell > 0)
+    for (unsigned int hex_number= 0;
+         hex_number != GeometryInfo<dim>::hexes_per_cell;
+         ++hex_number)
+      {
+        unsigned int comp_start = 0;
+        for (unsigned int base=0; base<this->n_base_elements(); ++base)
+          for (unsigned int m=0; m<this->element_multiplicity(base);
+               ++m, comp_start+=base_element(base).n_components())
+            for (unsigned int local_index = 0;
+                 local_index < base_element(base).dofs_per_hex;
+                 ++local_index, ++total_index)
+              {
+                const unsigned int index_in_base
+                  = (base_element(base).dofs_per_hex*hex_number +
+                     local_index +
+                     base_element(base).first_hex_index);
+
+                this->system_to_base_table[total_index]
+                  = std::make_pair (std::make_pair(base,m), index_in_base);
+
+                if (base_element(base).is_primitive(index_in_base))
+                  {
+                    const unsigned int comp_in_base
+                      = base_element(base).system_to_component_index(index_in_base).first;
+                    const unsigned int comp
+                      = comp_start + comp_in_base;
+                    const unsigned int index_in_comp
+                      = base_element(base).system_to_component_index(index_in_base).second;
+                    this->system_to_component_table[total_index]
+                      = std::make_pair (comp, index_in_comp);
+                  }
+                else
+                  this->system_to_component_table[total_index] = non_primitive_index;
+              }
+      }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::build_face_tables()
+{
+  // Initialize index tables. do this in the same way as done for the cell
+  // tables, except that we now loop over the objects of faces
+
+  // For non-primitive shape functions, have a special invalid index
+  const std::pair<unsigned int, unsigned int>
+  non_primitive_index (numbers::invalid_unsigned_int,
+                       numbers::invalid_unsigned_int);
+
+  // 1. Vertices
+  unsigned int total_index = 0;
+  for (unsigned int vertex_number=0;
+       vertex_number<GeometryInfo<dim>::vertices_per_face;
+       ++vertex_number)
+    {
+      unsigned int comp_start = 0;
+      for (unsigned int base=0; base<this->n_base_elements(); ++base)
+        for (unsigned int m=0; m<this->element_multiplicity(base);
+             ++m, comp_start += base_element(base).n_components())
+          for (unsigned int local_index = 0;
+               local_index < base_element(base).dofs_per_vertex;
+               ++local_index, ++total_index)
+            {
+              // get (cell) index of this shape function inside the base
+              // element to see whether the shape function is primitive
+              // (assume that all shape functions on vertices share the same
+              // primitivity property; assume likewise for all shape functions
+              // located on lines, quads, etc. this way, we can ask for
+              // primitivity of only _one_ shape function, which is taken as
+              // representative for all others located on the same type of
+              // object):
+              const unsigned int index_in_base
+                = (base_element(base).dofs_per_vertex*vertex_number +
+                   local_index);
+
+              const unsigned int face_index_in_base
+                = (base_element(base).dofs_per_vertex*vertex_number +
+                   local_index);
+
+              this->face_system_to_base_table[total_index]
+                = std::make_pair (std::make_pair(base,m), face_index_in_base);
+
+              if (base_element(base).is_primitive(index_in_base))
+                {
+                  const unsigned int comp_in_base
+                    = base_element(base).face_system_to_component_index(face_index_in_base).first;
+                  const unsigned int comp
+                    = comp_start + comp_in_base;
+                  const unsigned int face_index_in_comp
+                    = base_element(base).face_system_to_component_index(face_index_in_base).second;
+                  this->face_system_to_component_table[total_index]
+                    = std::make_pair (comp, face_index_in_comp);
+                }
+              else
+                this->face_system_to_component_table[total_index] = non_primitive_index;
+            }
+    }
+
+  // 2. Lines
+  if (GeometryInfo<dim>::lines_per_face > 0)
+    for (unsigned int line_number= 0;
+         line_number != GeometryInfo<dim>::lines_per_face;
+         ++line_number)
+      {
+        unsigned int comp_start = 0;
+        for (unsigned int base = 0; base < this->n_base_elements(); ++base)
+          for (unsigned int m=0; m<this->element_multiplicity(base);
+               ++m, comp_start += base_element(base).n_components())
+            for (unsigned int local_index = 0;
+                 local_index < base_element(base).dofs_per_line;
+                 ++local_index, ++total_index)
+              {
+                // do everything alike for this type of object
+                const unsigned int index_in_base
+                  = (base_element(base).dofs_per_line*line_number +
+                     local_index +
+                     base_element(base).first_line_index);
+
+                const unsigned int face_index_in_base
+                  = (base_element(base).first_face_line_index +
+                     base_element(base).dofs_per_line * line_number +
+                     local_index);
+
+                this->face_system_to_base_table[total_index]
+                  = std::make_pair (std::make_pair(base,m), face_index_in_base);
+
+                if (base_element(base).is_primitive(index_in_base))
+                  {
+                    const unsigned int comp_in_base
+                      = base_element(base).face_system_to_component_index(face_index_in_base).first;
+                    const unsigned int comp
+                      = comp_start + comp_in_base;
+                    const unsigned int face_index_in_comp
+                      = base_element(base).face_system_to_component_index(face_index_in_base).second;
+                    this->face_system_to_component_table[total_index]
+                      = std::make_pair (comp, face_index_in_comp);
+                  }
+                else
+                  this->face_system_to_component_table[total_index] = non_primitive_index;
+              }
+      }
+
+  // 3. Quads
+  if (GeometryInfo<dim>::quads_per_face > 0)
+    for (unsigned int quad_number= 0;
+         quad_number != GeometryInfo<dim>::quads_per_face;
+         ++quad_number)
+      {
+        unsigned int comp_start = 0;
+        for (unsigned int base=0; base<this->n_base_elements(); ++base)
+          for (unsigned int m=0; m<this->element_multiplicity(base);
+               ++m, comp_start += base_element(base).n_components())
+            for (unsigned int local_index = 0;
+                 local_index < base_element(base).dofs_per_quad;
+                 ++local_index, ++total_index)
+              {
+                // do everything alike for this type of object
+                const unsigned int index_in_base
+                  = (base_element(base).dofs_per_quad*quad_number +
+                     local_index +
+                     base_element(base).first_quad_index);
+
+                const unsigned int face_index_in_base
+                  = (base_element(base).first_face_quad_index +
+                     base_element(base).dofs_per_quad * quad_number +
+                     local_index);
+
+                this->face_system_to_base_table[total_index]
+                  = std::make_pair (std::make_pair(base,m), face_index_in_base);
+
+                if (base_element(base).is_primitive(index_in_base))
+                  {
+                    const unsigned int comp_in_base
+                      = base_element(base).face_system_to_component_index(face_index_in_base).first;
+                    const unsigned int comp
+                      = comp_start + comp_in_base;
+                    const unsigned int face_index_in_comp
+                      = base_element(base).face_system_to_component_index(face_index_in_base).second;
+                    this->face_system_to_component_table[total_index]
+                      = std::make_pair (comp, face_index_in_comp);
+                  }
+                else
+                  this->face_system_to_component_table[total_index] = non_primitive_index;
+              }
+      }
+  Assert (total_index == this->dofs_per_face, ExcInternalError());
+  Assert (total_index == this->face_system_to_component_table.size(),
+          ExcInternalError());
+  Assert (total_index == this->face_system_to_base_table.size(),
+          ExcInternalError());
+}
+
+
+
+template <int dim, int spacedim>
+void FESystem<dim,spacedim>::build_interface_constraints ()
+{
+  // check whether all base elements implement their interface constraint
+  // matrices. if this is not the case, then leave the interface costraints of
+  // this composed element empty as well; however, the rest of the element is
+  // usable
+  for (unsigned int base=0; base<this->n_base_elements(); ++base)
+    if (base_element(base).constraints_are_implemented() == false)
+      return;
+
+  this->interface_constraints.
+  TableBase<2,double>::reinit (this->interface_constraints_size());
+
+  // the layout of the constraints matrix is described in the FiniteElement
+  // class. you may want to look there first before trying to understand the
+  // following, especially the mapping of the @p{m} index.
+  //
+  // in order to map it to the fe-system class, we have to know which base
+  // element a degree of freedom within a vertex, line, etc belongs to. this
+  // can be accomplished by the system_to_component_index function in
+  // conjunction with the numbers first_{line,quad,...}_index
+  for (unsigned int n=0; n<this->interface_constraints.n(); ++n)
+    for (unsigned int m=0; m<this->interface_constraints.m(); ++m)
+      {
+        // for the pair (n,m) find out which base element they belong to and
+        // the number therein
+        //
+        // first for the n index. this is simple since the n indices are in
+        // the same order as they are usually on a face. note that for the
+        // data type, first value in pair is (base element,instance of base
+        // element), second is index within this instance
+        const std::pair<std::pair<unsigned int,unsigned int>, unsigned int> n_index
+          = this->face_system_to_base_table[n];
+
+        // likewise for the m index. this is more complicated due to the
+        // strange ordering we have for the dofs on the refined faces.
+        std::pair<std::pair<unsigned int,unsigned int>, unsigned int> m_index;
+        switch (dim)
+          {
+          case 1:
+          {
+            // we should never get here!  (in 1d, the constraints matrix
+            // should be of size zero)
+            Assert (false, ExcInternalError());
+            break;
+          };
+
+          case 2:
+          {
+            // the indices m=0..d_v-1 are from the center vertex.  their order
+            // is the same as for the first vertex of the whole cell, so we
+            // can use the system_to_base_table variable (using the
+            // face_s_t_base_t function would yield the same)
+            if (m < this->dofs_per_vertex)
+              m_index = this->system_to_base_table[m];
+            else
+              // then come the two sets of line indices
+              {
+                const unsigned int index_in_line
+                  = (m-this->dofs_per_vertex) % this->dofs_per_line;
+                const unsigned int sub_line
+                  = (m-this->dofs_per_vertex) / this->dofs_per_line;
+                Assert (sub_line < 2, ExcInternalError());
+
+                // from this information, try to get base element and instance
+                // of base element. we do so by constructing the corresponding
+                // face index of m in the present element, then use
+                // face_system_to_base_table
+                const unsigned int tmp1 = 2*this->dofs_per_vertex+index_in_line;
+                m_index.first = this->face_system_to_base_table[tmp1].first;
+
+                // what we are still missing is the index of m within the base
+                // elements interface_constraints table
+                //
+                // here, the second value of face_system_to_base_table can
+                // help: it denotes the face index of that shape function
+                // within the base element. since we know that it is a line
+                // dof, we can construct the rest: tmp2 will denote the index
+                // of this shape function among the line shape functions:
+                Assert (this->face_system_to_base_table[tmp1].second >=
+                        2*base_element(m_index.first.first).dofs_per_vertex,
+                        ExcInternalError());
+                const unsigned int tmp2 = this->face_system_to_base_table[tmp1].second -
+                                          2*base_element(m_index.first.first).dofs_per_vertex;
+                Assert (tmp2 < base_element(m_index.first.first).dofs_per_line,
+                        ExcInternalError());
+                m_index.second = base_element(m_index.first.first).dofs_per_vertex +
+                                 base_element(m_index.first.first).dofs_per_line*sub_line +
+                                 tmp2;
+              };
+            break;
+          };
+
+          case 3:
+          {
+            // same way as above, although a little more complicated...
+
+            // the indices m=0..5*d_v-1 are from the center and the four
+            // subline vertices.  their order is the same as for the first
+            // vertex of the whole cell, so we can use the simple arithmetic
+            if (m < 5*this->dofs_per_vertex)
+              m_index = this->system_to_base_table[m];
+            else
+              // then come the 12 sets of line indices
+              if (m < 5*this->dofs_per_vertex + 12*this->dofs_per_line)
+                {
+                  // for the meaning of all this, see the 2d part
+                  const unsigned int index_in_line
+                    = (m-5*this->dofs_per_vertex) % this->dofs_per_line;
+                  const unsigned int sub_line
+                    = (m-5*this->dofs_per_vertex) / this->dofs_per_line;
+                  Assert (sub_line < 12, ExcInternalError());
+
+                  const unsigned int tmp1 = 4*this->dofs_per_vertex+index_in_line;
+                  m_index.first = this->face_system_to_base_table[tmp1].first;
+
+                  Assert (this->face_system_to_base_table[tmp1].second >=
+                          4*base_element(m_index.first.first).dofs_per_vertex,
+                          ExcInternalError());
+                  const unsigned int tmp2 = this->face_system_to_base_table[tmp1].second -
+                                            4*base_element(m_index.first.first).dofs_per_vertex;
+                  Assert (tmp2 < base_element(m_index.first.first).dofs_per_line,
+                          ExcInternalError());
+                  m_index.second = 5*base_element(m_index.first.first).dofs_per_vertex +
+                                   base_element(m_index.first.first).dofs_per_line*sub_line +
+                                   tmp2;
+                }
+              else
+                // on one of the four sub-quads
+                {
+                  // for the meaning of all this, see the 2d part
+                  const unsigned int index_in_quad
+                    = (m-5*this->dofs_per_vertex-12*this->dofs_per_line) %
+                      this->dofs_per_quad;
+                  Assert (index_in_quad < this->dofs_per_quad,
+                          ExcInternalError());
+                  const unsigned int sub_quad
+                    = ((m-5*this->dofs_per_vertex-12*this->dofs_per_line) /
+                       this->dofs_per_quad);
+                  Assert (sub_quad < 4, ExcInternalError());
+
+                  const unsigned int tmp1 = 4*this->dofs_per_vertex +
+                                            4*this->dofs_per_line +
+                                            index_in_quad;
+                  Assert (tmp1 < this->face_system_to_base_table.size(),
+                          ExcInternalError());
+                  m_index.first = this->face_system_to_base_table[tmp1].first;
+
+                  Assert (this->face_system_to_base_table[tmp1].second >=
+                          4*base_element(m_index.first.first).dofs_per_vertex +
+                          4*base_element(m_index.first.first).dofs_per_line,
+                          ExcInternalError());
+                  const unsigned int tmp2 = this->face_system_to_base_table[tmp1].second -
+                                            4*base_element(m_index.first.first).dofs_per_vertex -
+                                            4*base_element(m_index.first.first).dofs_per_line;
+                  Assert (tmp2 < base_element(m_index.first.first).dofs_per_quad,
+                          ExcInternalError());
+                  m_index.second = 5*base_element(m_index.first.first).dofs_per_vertex +
+                                   12*base_element(m_index.first.first).dofs_per_line +
+                                   base_element(m_index.first.first).dofs_per_quad*sub_quad +
+                                   tmp2;
+                };
+
+            break;
+          };
+
+          default:
+            Assert (false, ExcNotImplemented());
+          };
+
+        // now that we gathered all information: use it to build the
+        // matrix. note that if n and m belong to different base elements or
+        // instances, then there definitely will be no coupling
+        if (n_index.first == m_index.first)
+          this->interface_constraints(m,n)
+            = (base_element(n_index.first.first).constraints()(m_index.second,
+                                                               n_index.second));
+      };
+}
+
+
+
+template <int dim, int spacedim>
+void FESystem<dim,spacedim>::initialize (const std::vector<const FiniteElement<dim,spacedim>*> &fes,
+                                         const std::vector<unsigned int> &multiplicities)
+{
+  Assert (fes.size() == multiplicities.size(),
+          ExcDimensionMismatch (fes.size(), multiplicities.size()) );
+  Assert (fes.size() > 0,
+          ExcMessage ("Need to pass at least one finite element."));
+  Assert (count_nonzeros(multiplicities) > 0,
+          ExcMessage("You only passed FiniteElements with multiplicity 0."));
+
+  // Note that we need to skip every fe with multiplicity 0 in the following block of code
+
+  this->base_to_block_indices.reinit(0, 0);
+
+  for (unsigned int i=0; i<fes.size(); i++)
+    if (multiplicities[i]>0)
+      this->base_to_block_indices.push_back( multiplicities[i] );
+
+  std::vector<Threads::Task<FiniteElement<dim,spacedim>*> > clone_base_elements;
+
+  for (unsigned int i=0; i<fes.size(); i++)
+    if (multiplicities[i]>0)
+      clone_base_elements.push_back (Threads::new_task (&FiniteElement<dim,spacedim>::clone,
+                                                        *fes[i]));
+
+  unsigned int ind=0;
+  for (unsigned int i=0; i<fes.size(); i++)
+    {
+      if (multiplicities[i]>0)
+        {
+          base_elements[ind] =
+            std::make_pair (std_cxx11::shared_ptr<const FiniteElement<dim,spacedim> >
+                            (clone_base_elements[ind].return_value()),
+                            multiplicities[i]);
+          ++ind;
+        }
+    }
+
+  Assert(ind>0, ExcInternalError());
+
+  build_cell_tables();
+  build_face_tables();
+
+  // restriction and prolongation matrices are build on demand
+
+  // now set up the interface constraints.  this is kind'o hairy, so don't try
+  // to do it dimension independent
+  build_interface_constraints ();
+
+  // finally fill in support points on cell and face
+  initialize_unit_support_points ();
+  initialize_unit_face_support_points ();
+
+  initialize_quad_dof_index_permutation ();
+}
+
+
+
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+initialize_unit_support_points ()
+{
+  // if one of the base elements has no support points, then it makes no sense
+  // to define support points for the composed element, so return an empty
+  // array to demonstrate that fact. Note that we ignore FE_Nothing in this logic.
+  for (unsigned int base_el=0; base_el<this->n_base_elements(); ++base_el)
+    if (!base_element(base_el).has_support_points() && base_element(base_el).dofs_per_cell!=0)
+      {
+        this->unit_support_points.resize(0);
+        return;
+      };
+
+  // generate unit support points from unit support points of sub elements
+  this->unit_support_points.resize(this->dofs_per_cell);
+
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    {
+      const unsigned int
+      base       = this->system_to_base_table[i].first.first,
+      base_index = this->system_to_base_table[i].second;
+      Assert (base<this->n_base_elements(), ExcInternalError());
+      Assert (base_index<base_element(base).unit_support_points.size(),
+              ExcInternalError());
+      this->unit_support_points[i] = base_element(base).unit_support_points[base_index];
+    };
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+initialize_unit_face_support_points ()
+{
+  // Nothing to do in 1D
+  if (dim == 1)
+    return;
+
+  // if one of the base elements has no support points, then it makes no sense
+  // to define support points for the composed element. In that case, return
+  // an empty array to demonstrate that fact (note that we ask whether the
+  // base element has no support points at all, not only none on the face!)
+  //
+  // on the other hand, if there is an element that simply has no degrees of
+  // freedom on the face at all, then we don't care whether it has support
+  // points or not. this is, for example, the case for the stable Stokes
+  // element Q(p)^dim \times DGP(p-1).
+  for (unsigned int base_el=0; base_el<this->n_base_elements(); ++base_el)
+    if (!base_element(base_el).has_support_points()
+        &&
+        (base_element(base_el).dofs_per_face > 0))
+      {
+        this->unit_face_support_points.resize(0);
+        return;
+      }
+
+
+  // generate unit face support points from unit support points of sub
+  // elements
+  this->unit_face_support_points.resize(this->dofs_per_face);
+
+  for (unsigned int i=0; i<this->dofs_per_face; ++i)
+    {
+      const unsigned int base_i = this->face_system_to_base_table[i].first.first;
+      const unsigned int index_in_base = this->face_system_to_base_table[i].second;
+
+      Assert (index_in_base < base_element(base_i).unit_face_support_points.size(),
+              ExcInternalError());
+
+      this->unit_face_support_points[i]
+        = base_element(base_i).unit_face_support_points[index_in_base];
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::initialize_quad_dof_index_permutation ()
+{
+  // nothing to do in other dimensions than 3
+  if (dim < 3)
+    return;
+
+  // the array into which we want to write should have the correct size
+  // already.
+  Assert (this->adjust_quad_dof_index_for_face_orientation_table.n_elements()==
+          8*this->dofs_per_quad, ExcInternalError());
+
+  // to obtain the shifts for this composed element, copy the shift
+  // information of the base elements
+  unsigned int index = 0;
+  for (unsigned int b=0; b<this->n_base_elements(); ++b)
+    {
+      const Table<2,int> &temp
+        = this->base_element(b).adjust_quad_dof_index_for_face_orientation_table;
+      for (unsigned int c=0; c<this->element_multiplicity(b); ++c)
+        {
+          for (unsigned int i=0; i<temp.size(0); ++i)
+            for (unsigned int j=0; j<8; ++j)
+              this->adjust_quad_dof_index_for_face_orientation_table(index+i,j)=
+                temp(i,j);
+          index += temp.size(0);
+        }
+    }
+  Assert (index == this->dofs_per_quad,
+          ExcInternalError());
+
+  // aditionally compose the permutation information for lines
+  Assert (this->adjust_line_dof_index_for_line_orientation_table.size()==
+          this->dofs_per_line, ExcInternalError());
+  index = 0;
+  for (unsigned int b=0; b<this->n_base_elements(); ++b)
+    {
+      const std::vector<int> &temp2
+        = this->base_element(b).adjust_line_dof_index_for_line_orientation_table;
+      for (unsigned int c=0; c<this->element_multiplicity(b); ++c)
+        {
+          std::copy(temp2.begin(), temp2.end(),
+                    this->adjust_line_dof_index_for_line_orientation_table.begin()
+                    +index);
+          index += temp2.size();
+        }
+    }
+  Assert (index == this->dofs_per_line,
+          ExcInternalError());
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FESystem<dim,spacedim>::
+hp_constraints_are_implemented () const
+{
+  for (unsigned int b=0; b<this->n_base_elements(); ++b)
+    if (base_element(b).hp_constraints_are_implemented() == false)
+      return false;
+
+  return true;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  typedef FiniteElement<dim,spacedim> FEL;
+  AssertThrow ((x_source_fe.get_name().find ("FE_System<") == 0)
+               ||
+               (dynamic_cast<const FESystem<dim,spacedim>*>(&x_source_fe) != 0),
+               typename FEL::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // since dofs for each base are independent, we only have to stack things up
+  // from base element to base element
+  //
+  // the problem is that we have to work with two FEs (this and
+  // fe_other). only deal with the case that both are FESystems and that they
+  // both have the same number of bases (counting multiplicity) each of which
+  // match in their number of components. this covers
+  // FESystem(FE_Q(p),1,FE_Q(q),2) vs FESystem(FE_Q(r),2,FE_Q(s),1), but not
+  // FESystem(FE_Q(p),1,FE_Q(q),2) vs
+  // FESystem(FESystem(FE_Q(r),2),1,FE_Q(s),1)
+  const FESystem<dim,spacedim> *fe_other_system
+    = dynamic_cast<const FESystem<dim,spacedim>*>(&x_source_fe);
+
+  // clear matrix, since we will not get to set all elements
+  interpolation_matrix = 0;
+
+  // loop over all the base elements of this and the other element, counting
+  // their multiplicities
+  unsigned int base_index       = 0,
+               base_index_other = 0;
+  unsigned int multiplicity       = 0,
+               multiplicity_other = 0;
+
+  FullMatrix<double> base_to_base_interpolation;
+
+  while (true)
+    {
+      const FiniteElement<dim,spacedim>
+      &base       = base_element(base_index),
+       &base_other = fe_other_system->base_element(base_index_other);
+
+      Assert (base.n_components() == base_other.n_components(),
+              ExcNotImplemented());
+
+      // get the interpolation from the bases
+      base_to_base_interpolation.reinit (base_other.dofs_per_face,
+                                         base.dofs_per_face);
+      base.get_face_interpolation_matrix (base_other,
+                                          base_to_base_interpolation);
+
+      // now translate entries. we'd like to have something like
+      // face_base_to_system_index, but that doesn't exist. rather, all we
+      // have is the reverse. well, use that then
+      for (unsigned int i=0; i<this->dofs_per_face; ++i)
+        if (this->face_system_to_base_index(i).first
+            ==
+            std::make_pair (base_index, multiplicity))
+          for (unsigned int j=0; j<fe_other_system->dofs_per_face; ++j)
+            if (fe_other_system->face_system_to_base_index(j).first
+                ==
+                std::make_pair (base_index_other, multiplicity_other))
+              interpolation_matrix(j, i)
+                = base_to_base_interpolation(fe_other_system->face_system_to_base_index(j).second,
+                                             this->face_system_to_base_index(i).second);
+
+      // advance to the next base element for this and the other fe_system;
+      // see if we can simply advance the multiplicity by one, or if have to
+      // move on to the next base element
+      ++multiplicity;
+      if (multiplicity == this->element_multiplicity(base_index))
+        {
+          multiplicity = 0;
+          ++base_index;
+        }
+      ++multiplicity_other;
+      if (multiplicity_other ==
+          fe_other_system->element_multiplicity(base_index_other))
+        {
+          multiplicity_other = 0;
+          ++base_index_other;
+        }
+
+      // see if we have reached the end of the present element. if so, we
+      // should have reached the end of the other one as well
+      if (base_index == this->n_base_elements())
+        {
+          Assert (base_index_other == fe_other_system->n_base_elements(),
+                  ExcInternalError());
+          break;
+        }
+
+      // if we haven't reached the end of this element, we shouldn't have
+      // reached the end of the other one either
+      Assert (base_index_other != fe_other_system->n_base_elements(),
+              ExcInternalError());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESystem<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int        subface,
+                                  FullMatrix<double>       &interpolation_matrix) const
+{
+  typedef FiniteElement<dim,spacedim> FEL;
+  AssertThrow ((x_source_fe.get_name().find ("FE_System<") == 0)
+               ||
+               (dynamic_cast<const FESystem<dim,spacedim>*>(&x_source_fe) != 0),
+               typename FEL::
+               ExcInterpolationNotImplemented());
+
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // since dofs for each base are independent, we only have to stack things up
+  // from base element to base element
+  //
+  // the problem is that we have to work with two FEs (this and
+  // fe_other). only deal with the case that both are FESystems and that they
+  // both have the same number of bases (counting multiplicity) each of which
+  // match in their number of components. this covers
+  // FESystem(FE_Q(p),1,FE_Q(q),2) vs FESystem(FE_Q(r),2,FE_Q(s),1), but not
+  // FESystem(FE_Q(p),1,FE_Q(q),2) vs
+  // FESystem(FESystem(FE_Q(r),2),1,FE_Q(s),1)
+  const FESystem<dim,spacedim> *fe_other_system
+    = dynamic_cast<const FESystem<dim,spacedim>*>(&x_source_fe);
+
+  // clear matrix, since we will not get to set all elements
+  interpolation_matrix = 0;
+
+  // loop over all the base elements of this and the other element, counting
+  // their multiplicities
+  unsigned int base_index       = 0,
+               base_index_other = 0;
+  unsigned int multiplicity       = 0,
+               multiplicity_other = 0;
+
+  FullMatrix<double> base_to_base_interpolation;
+
+  while (true)
+    {
+      const FiniteElement<dim,spacedim>
+      &base       = base_element(base_index),
+       &base_other = fe_other_system->base_element(base_index_other);
+
+      Assert (base.n_components() == base_other.n_components(),
+              ExcNotImplemented());
+
+      // get the interpolation from the bases
+      base_to_base_interpolation.reinit (base_other.dofs_per_face,
+                                         base.dofs_per_face);
+      base.get_subface_interpolation_matrix (base_other,
+                                             subface,
+                                             base_to_base_interpolation);
+
+      // now translate entries. we'd like to have something like
+      // face_base_to_system_index, but that doesn't exist. rather, all we
+      // have is the reverse. well, use that then
+      for (unsigned int i=0; i<this->dofs_per_face; ++i)
+        if (this->face_system_to_base_index(i).first
+            ==
+            std::make_pair (base_index, multiplicity))
+          for (unsigned int j=0; j<fe_other_system->dofs_per_face; ++j)
+            if (fe_other_system->face_system_to_base_index(j).first
+                ==
+                std::make_pair (base_index_other, multiplicity_other))
+              interpolation_matrix(j, i)
+                = base_to_base_interpolation(fe_other_system->face_system_to_base_index(j).second,
+                                             this->face_system_to_base_index(i).second);
+
+      // advance to the next base element for this and the other fe_system;
+      // see if we can simply advance the multiplicity by one, or if have to
+      // move on to the next base element
+      ++multiplicity;
+      if (multiplicity == this->element_multiplicity(base_index))
+        {
+          multiplicity = 0;
+          ++base_index;
+        }
+      ++multiplicity_other;
+      if (multiplicity_other ==
+          fe_other_system->element_multiplicity(base_index_other))
+        {
+          multiplicity_other = 0;
+          ++base_index_other;
+        }
+
+      // see if we have reached the end of the present element. if so, we
+      // should have reached the end of the other one as well
+      if (base_index == this->n_base_elements())
+        {
+          Assert (base_index_other == fe_other_system->n_base_elements(),
+                  ExcInternalError());
+          break;
+        }
+
+      // if we haven't reached the end of this element, we shouldn't have
+      // reached the end of the other one either
+      Assert (base_index_other != fe_other_system->n_base_elements(),
+              ExcInternalError());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <int structdim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FESystem<dim,spacedim>::hp_object_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // since dofs on each subobject (vertex, line, ...) are ordered such that
+  // first come all from the first base element all multiplicities, then
+  // second base element all multiplicities, etc., we simply have to stack all
+  // the identities after each other
+  //
+  // the problem is that we have to work with two FEs (this and
+  // fe_other). only deal with the case that both are FESystems and that they
+  // both have the same number of bases (counting multiplicity) each of which
+  // match in their number of components. this covers
+  // FESystem(FE_Q(p),1,FE_Q(q),2) vs FESystem(FE_Q(r),2,FE_Q(s),1), but not
+  // FESystem(FE_Q(p),1,FE_Q(q),2) vs
+  // FESystem(FESystem(FE_Q(r),2),1,FE_Q(s),1)
+  if (const FESystem<dim,spacedim> *fe_other_system
+      = dynamic_cast<const FESystem<dim,spacedim>*>(&fe_other))
+    {
+      // loop over all the base elements of this and the other element,
+      // counting their multiplicities
+      unsigned int base_index       = 0,
+                   base_index_other = 0;
+      unsigned int multiplicity       = 0,
+                   multiplicity_other = 0;
+
+      // we also need to keep track of the number of dofs already treated for
+      // each of the elements
+      unsigned int dof_offset       = 0,
+                   dof_offset_other = 0;
+
+      std::vector<std::pair<unsigned int, unsigned int> > identities;
+
+      while (true)
+        {
+          const FiniteElement<dim,spacedim>
+          &base       = base_element(base_index),
+           &base_other = fe_other_system->base_element(base_index_other);
+
+          Assert (base.n_components() == base_other.n_components(),
+                  ExcNotImplemented());
+
+          // now translate the identities returned by the base elements to the
+          // indices of this system element
+          std::vector<std::pair<unsigned int, unsigned int> > base_identities;
+          switch (structdim)
+            {
+            case 0:
+              base_identities = base.hp_vertex_dof_identities (base_other);
+              break;
+            case 1:
+              base_identities = base.hp_line_dof_identities (base_other);
+              break;
+            case 2:
+              base_identities = base.hp_quad_dof_identities (base_other);
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+
+          for (unsigned int i=0; i<base_identities.size(); ++i)
+            identities.push_back
+            (std::make_pair (base_identities[i].first + dof_offset,
+                             base_identities[i].second + dof_offset_other));
+
+          // record the dofs treated above as already taken care of
+          dof_offset       += base.template n_dofs_per_object<structdim>();
+          dof_offset_other += base_other.template n_dofs_per_object<structdim>();
+
+          // advance to the next base element for this and the other
+          // fe_system; see if we can simply advance the multiplicity by one,
+          // or if have to move on to the next base element
+          ++multiplicity;
+          if (multiplicity == this->element_multiplicity(base_index))
+            {
+              multiplicity = 0;
+              ++base_index;
+            }
+          ++multiplicity_other;
+          if (multiplicity_other ==
+              fe_other_system->element_multiplicity(base_index_other))
+            {
+              multiplicity_other = 0;
+              ++base_index_other;
+            }
+
+          // see if we have reached the end of the present element. if so, we
+          // should have reached the end of the other one as well
+          if (base_index == this->n_base_elements())
+            {
+              Assert (base_index_other == fe_other_system->n_base_elements(),
+                      ExcInternalError());
+              break;
+            }
+
+          // if we haven't reached the end of this element, we shouldn't have
+          // reached the end of the other one either
+          Assert (base_index_other != fe_other_system->n_base_elements(),
+                  ExcInternalError());
+        }
+
+      return identities;
+    }
+  else
+    {
+      Assert (false, ExcNotImplemented());
+      return std::vector<std::pair<unsigned int, unsigned int> >();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FESystem<dim,spacedim>::hp_vertex_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  return hp_object_dof_identities<0> (fe_other);
+}
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FESystem<dim,spacedim>::hp_line_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  return hp_object_dof_identities<1> (fe_other);
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<std::pair<unsigned int, unsigned int> >
+FESystem<dim,spacedim>::hp_quad_dof_identities (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  return hp_object_dof_identities<2> (fe_other);
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FESystem<dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  // at present all we can do is to compare with other FESystems that have the
+  // same number of components and bases
+  if (const FESystem<dim,spacedim> *fe_sys_other
+      = dynamic_cast<const FESystem<dim,spacedim>*>(&fe_other))
+    {
+      Assert (this->n_components() == fe_sys_other->n_components(),
+              ExcNotImplemented());
+      Assert (this->n_base_elements() == fe_sys_other->n_base_elements(),
+              ExcNotImplemented());
+
+      FiniteElementDomination::Domination
+      domination = FiniteElementDomination::no_requirements;
+
+      // loop over all base elements and do some sanity checks
+      for (unsigned int b=0; b<this->n_base_elements(); ++b)
+        {
+          Assert (this->base_element(b).n_components() ==
+                  fe_sys_other->base_element(b).n_components(),
+                  ExcNotImplemented());
+          Assert (this->element_multiplicity(b) ==
+                  fe_sys_other->element_multiplicity(b),
+                  ExcNotImplemented());
+
+          // for this pair of base elements, check who dominates and combine
+          // with previous result
+          const FiniteElementDomination::Domination
+          base_domination = (this->base_element(b)
+                             .compare_for_face_domination (fe_sys_other->base_element(b)));
+          domination = domination & base_domination;
+        }
+
+      return domination;
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <int dim, int spacedim>
+const FiniteElement<dim,spacedim> &
+FESystem<dim,spacedim>::base_element (const unsigned int index) const
+{
+  Assert (index < base_elements.size(),
+          ExcIndexRange(index, 0, base_elements.size()));
+  return *base_elements[index].first;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FESystem<dim,spacedim>::has_support_on_face (const unsigned int shape_index,
+                                             const unsigned int face_index) const
+{
+  return (base_element(this->system_to_base_index(shape_index).first.first)
+          .has_support_on_face(this->system_to_base_index(shape_index).second,
+                               face_index));
+}
+
+
+
+template <int dim, int spacedim>
+Point<dim>
+FESystem<dim,spacedim>::unit_support_point (const unsigned int index) const
+{
+  Assert (index < this->dofs_per_cell,
+          ExcIndexRange (index, 0, this->dofs_per_cell));
+  typedef FiniteElement<dim,spacedim> FEL;
+  Assert ((this->unit_support_points.size() == this->dofs_per_cell) ||
+          (this->unit_support_points.size() == 0),
+          typename FEL::ExcFEHasNoSupportPoints ());
+
+  // let's see whether we have the information pre-computed
+  if (this->unit_support_points.size() != 0)
+    return this->unit_support_points[index];
+  else
+    // no. ask the base element whether it would like to provide this
+    // information
+    return (base_element(this->system_to_base_index(index).first.first)
+            .unit_support_point(this->system_to_base_index(index).second));
+}
+
+
+
+template <int dim, int spacedim>
+Point<dim-1>
+FESystem<dim,spacedim>::unit_face_support_point (const unsigned int index) const
+{
+  Assert (index < this->dofs_per_face,
+          ExcIndexRange (index, 0, this->dofs_per_face));
+  typedef  FiniteElement<dim,spacedim> FEL;
+  Assert ((this->unit_face_support_points.size() == this->dofs_per_face) ||
+          (this->unit_face_support_points.size() == 0),
+          typename FEL::ExcFEHasNoSupportPoints ());
+
+  // let's see whether we have the information pre-computed
+  if (this->unit_face_support_points.size() != 0)
+    return this->unit_face_support_points[index];
+  else
+    // no. ask the base element whether it would like to provide this
+    // information
+    return (base_element(this->face_system_to_base_index(index).first.first)
+            .unit_face_support_point(this->face_system_to_base_index(index).second));
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FESystem<dim,spacedim>::get_constant_modes () const
+{
+  // Note that this->n_components() is actually only an estimate of how many
+  // constant modes we will need. There might be more than one such mode
+  // (e.g. FE_Q_DG0).
+  Table<2,bool> constant_modes(this->n_components(), this->dofs_per_cell);
+  std::vector<unsigned int> components;
+  for (unsigned int i=0; i<base_elements.size(); ++i)
+    {
+      std::pair<Table<2,bool>, std::vector<unsigned int> >
+      base_table = base_elements[i].first->get_constant_modes();
+      AssertDimension(base_table.first.n_rows(), base_table.second.size());
+      const unsigned int element_multiplicity = this->element_multiplicity(i);
+
+      // there might be more than one constant mode for some scalar elements,
+      // so make sure the table actually fits: Create a new table with more
+      // rows
+      const unsigned int comp = components.size();
+      if (constant_modes.n_rows() < comp+base_table.first.n_rows()*element_multiplicity)
+        {
+          Table<2,bool> new_constant_modes(comp+base_table.first.n_rows()*
+                                           element_multiplicity,
+                                           constant_modes.n_cols());
+          for (unsigned int r=0; r<comp; ++r)
+            for (unsigned int c=0; c<this->dofs_per_cell; ++c)
+              new_constant_modes(r,c) = constant_modes(r,c);
+          constant_modes.swap(new_constant_modes);
+        }
+
+      // next, fill the constant modes from the individual components as well
+      // as the component numbers corresponding to the constant mode rows
+      for (unsigned int k=0; k<this->dofs_per_cell; ++k)
+        {
+          std::pair<std::pair<unsigned int,unsigned int>, unsigned int> ind
+            = this->system_to_base_index(k);
+          if (ind.first.first == i)
+            for (unsigned int c=0; c<base_table.first.n_rows(); ++c)
+              constant_modes(comp+ind.first.second*base_table.first.n_rows()+c,k)
+                = base_table.first(c,ind.second);
+        }
+      for (unsigned int r=0; r<element_multiplicity; ++r)
+        for (unsigned int c=0; c<base_table.second.size(); ++c)
+          components.push_back(comp+r*this->base_elements[i].first->n_components()
+                               +base_table.second[c]);
+    }
+  AssertDimension(components.size(), constant_modes.n_rows());
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >(constant_modes,
+                                                              components);
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FESystem<dim,spacedim>::memory_consumption () const
+{
+  // neglect size of data stored in @p{base_elements} due to some problems
+  // with the compiler. should be neglectable after all, considering the size
+  // of the data of the subelements
+  std::size_t mem = (FiniteElement<dim,spacedim>::memory_consumption () +
+                     sizeof (base_elements));
+  for (unsigned int i=0; i<base_elements.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (*base_elements[i].first);
+  return mem;
+}
+
+
+
+
+// explicit instantiations
+#include "fe_system.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_system.inst.in b/source/fe/fe_system.inst.in
new file mode 100644
index 0000000..ccda2dc
--- /dev/null
+++ b/source/fe/fe_system.inst.in
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class FESystem<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+  
+
diff --git a/source/fe/fe_tools.cc b/source/fe/fe_tools.cc
new file mode 100644
index 0000000..6173189
--- /dev/null
+++ b/source/fe/fe_tools.cc
@@ -0,0 +1,2111 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/householder.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_bernstein.h>
+#include <deal.II/fe/fe_q_hierarchical.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_dgp.h>
+#include <deal.II/fe/fe_dgp_monomial.h>
+#include <deal.II/fe/fe_dgp_nonparametric.h>
+#include <deal.II/fe/fe_nedelec.h>
+#include <deal.II/fe/fe_abf.h>
+#include <deal.II/fe/fe_bdm.h>
+#include <deal.II/fe/fe_raviart_thomas.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_cartesian.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/hp/dof_handler.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+#include <deal.II/base/index_set.h>
+
+#include <cctype>
+#include <iostream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace FETools
+{
+  // Not implemented in the general case.
+  template <class FE>
+  FiniteElement<FE::dimension, FE::space_dimension> *
+  FEFactory<FE>::get (const Quadrature<1> &) const
+  {
+    Assert(false, ExcNotImplemented());
+    return 0;
+  }
+
+  // Specializations for FE_Q.
+  template <>
+  FiniteElement<1, 1> *
+  FEFactory<FE_Q<1, 1> >::get (const Quadrature<1> &quad) const
+  {
+    return new FE_Q<1>(quad);
+  }
+  template <>
+  FiniteElement<2, 2> *
+  FEFactory<FE_Q<2, 2> >::get (const Quadrature<1> &quad) const
+  {
+    return new FE_Q<2>(quad);
+  }
+  template <>
+  FiniteElement<3, 3> *
+  FEFactory<FE_Q<3, 3> >::get (const Quadrature<1> &quad) const
+  {
+    return new FE_Q<3>(quad);
+  }
+
+
+  // Specializations for FE_DGQArbitraryNodes.
+  template <>
+  FiniteElement<1, 1> *
+  FEFactory<FE_DGQ<1> >::get (const Quadrature<1> &quad) const
+  {
+    return new FE_DGQArbitraryNodes<1>(quad);
+  }
+  template <>
+  FiniteElement<2, 2> *
+  FEFactory<FE_DGQ<2> >::get (const Quadrature<1> &quad) const
+  {
+    return new FE_DGQArbitraryNodes<2>(quad);
+  }
+  template <>
+  FiniteElement<3, 3> *
+  FEFactory<FE_DGQ<3> >::get (const Quadrature<1> &quad) const
+  {
+    return new FE_DGQArbitraryNodes<3>(quad);
+  }
+}
+
+namespace
+{
+  // The following three functions serve to fill the maps from element
+  // names to elements fe_name_map below. The first one exists because
+  // we have finite elements which are not implemented for nonzero
+  // codimension. These should be transferred to the second function
+  // eventually.
+
+  template <int dim>
+  void
+  fill_no_codim_fe_names (std::map<std::string,std_cxx11::shared_ptr<const Subscriptor> > &result)
+  {
+    typedef std_cxx11::shared_ptr<const Subscriptor> FEFactoryPointer;
+
+    result["FE_Q_Hierarchical"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_Q_Hierarchical<dim> >);
+    result["FE_ABF"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_ABF<dim> >);
+    result["FE_BDM"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_BDM<dim> >);
+    result["FE_RaviartThomas"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_RaviartThomas<dim> >);
+    result["FE_RaviartThomasNodal"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_RaviartThomasNodal<dim> >);
+    result["FE_Nedelec"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_Nedelec<dim> >);
+    result["FE_DGPNonparametric"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGPNonparametric<dim> >);
+    result["FE_DGP"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGP<dim> >);
+    result["FE_DGPMonomial"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGPMonomial<dim> >);
+    result["FE_DGQ"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGQ<dim> >);
+    result["FE_DGQArbitraryNodes"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGQ<dim> >);
+    result["FE_Q"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_Q<dim> >);
+    result["FE_Bernstein"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_Bernstein<dim> >);
+    result["FE_Nothing"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_Nothing<dim> >);
+  }
+
+  // This function fills a map from names to finite elements for any
+  // dimension and codimension for those elements which support
+  // nonzero codimension.
+  template <int dim, int spacedim>
+  void
+  fill_codim_fe_names (std::map<std::string,std_cxx11::shared_ptr<const Subscriptor> > &result)
+  {
+    typedef std_cxx11::shared_ptr<const Subscriptor> FEFactoryPointer;
+
+    result["FE_DGP"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGP<dim,spacedim> >);
+    result["FE_DGQ"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGQ<dim,spacedim> >);
+    result["FE_DGQArbitraryNodes"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_DGQ<dim,spacedim> >);
+    result["FE_Q"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_Q<dim,spacedim> >);
+    result["FE_Bernstein"]
+      = FEFactoryPointer(new FETools::FEFactory<FE_Bernstein<dim,spacedim> >);
+  }
+
+  // The function filling the vector fe_name_map below. It iterates
+  // through all legal dimension/spacedimension pairs and fills
+  // fe_name_map[dimension][spacedimension] with the maps generated
+  // by the functions above.
+  std::vector<std::vector<
+  std::map<std::string,
+      std_cxx11::shared_ptr<const Subscriptor> > > >
+      fill_default_map()
+  {
+    std::vector<std::vector<
+    std::map<std::string,
+        std_cxx11::shared_ptr<const Subscriptor> > > >
+        result(4);
+
+    for (unsigned int d=0; d<4; ++d)
+      result[d].resize(4);
+
+    fill_no_codim_fe_names<1> (result[1][1]);
+    fill_no_codim_fe_names<2> (result[2][2]);
+    fill_no_codim_fe_names<3> (result[3][3]);
+
+    fill_codim_fe_names<1,2> (result[1][2]);
+    fill_codim_fe_names<1,3> (result[1][3]);
+    fill_codim_fe_names<2,3> (result[2][3]);
+
+    return result;
+  }
+
+
+  // have a lock that guarantees that at most one thread is changing
+  // and accessing the fe_name_map variable. make this lock local to
+  // this file.
+  //
+  // this and the next variable are declared static (even though
+  // they're in an anonymous namespace) in order to make icc happy
+  // (which otherwise reports a multiply defined symbol when linking
+  // libraries for more than one space dimension together
+  static
+  Threads::Mutex fe_name_map_lock;
+
+  // This is the map used by FETools::get_fe_from_name and
+  // FETools::add_fe_name. It is only accessed by functions in this
+  // file, so it is safe to make it a static variable here. It must be
+  // static so that we can link several dimensions together.
+
+  // The organization of this storage is such that
+  // fe_name_map[dim][spacedim][name] points to an
+  // FEFactoryBase<dim,spacedim> with the name given. Since
+  // all entries of this vector are of different type, we store
+  // pointers to generic objects and cast them when needed.
+
+  // We use a shared pointer to factory objects, to ensure that they
+  // get deleted at the end of the program run and don't end up as
+  // apparent memory leaks to programs like valgrind.
+
+  // This vector is initialized at program start time using the
+  // function above. because at this time there are no threads
+  // running, there are no thread-safety issues here. since this is
+  // compiled for all dimensions at once, need to create objects for
+  // each dimension and then separate between them further down
+  static
+  std::vector<std::vector<
+  std::map<std::string,
+      std_cxx11::shared_ptr<const Subscriptor> > > >
+      fe_name_map = fill_default_map();
+}
+
+
+
+
+
+
+namespace
+{
+
+  // forwarder function for
+  // FE::get_interpolation_matrix. we
+  // will want to call that function
+  // for arbitrary FullMatrix<T>
+  // types, but it only accepts
+  // double arguments. since it is a
+  // virtual function, this can also
+  // not be changed. so have a
+  // forwarder function that calls
+  // that function directly if
+  // T==double, and otherwise uses a
+  // temporary
+  template <int dim, int spacedim>
+  inline
+  void gim_forwarder (const FiniteElement<dim,spacedim> &fe1,
+                      const FiniteElement<dim,spacedim> &fe2,
+                      FullMatrix<double> &interpolation_matrix)
+  {
+    fe2.get_interpolation_matrix (fe1, interpolation_matrix);
+  }
+
+
+  template <int dim, typename number, int spacedim>
+  inline
+  void gim_forwarder (const FiniteElement<dim,spacedim> &fe1,
+                      const FiniteElement<dim,spacedim> &fe2,
+                      FullMatrix<number> &interpolation_matrix)
+  {
+    FullMatrix<double> tmp (interpolation_matrix.m(),
+                            interpolation_matrix.n());
+    fe2.get_interpolation_matrix (fe1, tmp);
+    interpolation_matrix = tmp;
+  }
+
+
+
+  // return how many characters
+  // starting at the given position
+  // of the string match either the
+  // generic string "<dim>" or the
+  // specialized string with "dim"
+  // replaced with the numeric value
+  // of the template argument
+  template <int dim, int spacedim>
+  inline
+  unsigned int match_dimension (const std::string &name,
+                                const unsigned int position)
+  {
+    if (position >= name.size())
+      return 0;
+
+    if ((position+5 < name.size())
+        &&
+        (name[position] == '<')
+        &&
+        (name[position+1] == 'd')
+        &&
+        (name[position+2] == 'i')
+        &&
+        (name[position+3] == 'm')
+        &&
+        (name[position+4] == '>'))
+      return 5;
+
+    Assert (dim<10, ExcNotImplemented());
+    const char dim_char = '0'+dim;
+
+    if ((position+3 < name.size())
+        &&
+        (name[position] == '<')
+        &&
+        (name[position+1] == dim_char)
+        &&
+        (name[position+2] == '>'))
+      return 3;
+
+    // some other string that doesn't
+    // match
+    return 0;
+  }
+}
+
+
+namespace FETools
+{
+  template <int dim, int spacedim>
+  FEFactoryBase<dim,spacedim>::~FEFactoryBase()
+  {}
+
+
+  template<int dim, int spacedim>
+  void compute_component_wise(
+    const FiniteElement<dim,spacedim> &element,
+    std::vector<unsigned int> &renumbering,
+    std::vector<std::vector<unsigned int> > &comp_start)
+  {
+    Assert(renumbering.size() == element.dofs_per_cell,
+           ExcDimensionMismatch(renumbering.size(),
+                                element.dofs_per_cell));
+
+    comp_start.resize(element.n_base_elements());
+
+    unsigned int k=0;
+    for (unsigned int i=0; i<comp_start.size(); ++i)
+      {
+        comp_start[i].resize(element.element_multiplicity(i));
+        const unsigned int increment
+          = element.base_element(i).dofs_per_cell;
+
+        for (unsigned int j=0; j<comp_start[i].size(); ++j)
+          {
+            comp_start[i][j] = k;
+            k += increment;
+          }
+      }
+
+    // For each index i of the
+    // unstructured cellwise
+    // numbering, renumbering
+    // contains the index of the
+    // cell-block numbering
+    for (unsigned int i=0; i<element.dofs_per_cell; ++i)
+      {
+        std::pair<std::pair<unsigned int, unsigned int>, unsigned int>
+        indices = element.system_to_base_index(i);
+        renumbering[i] = comp_start[indices.first.first][indices.first.second]
+                         +indices.second;
+      }
+  }
+
+
+
+  template<int dim, int spacedim>
+  void compute_block_renumbering (
+    const FiniteElement<dim,spacedim> &element,
+    std::vector<types::global_dof_index> &renumbering,
+    std::vector<types::global_dof_index> &block_data,
+    bool return_start_indices)
+  {
+    Assert(renumbering.size() == element.dofs_per_cell,
+           ExcDimensionMismatch(renumbering.size(),
+                                element.dofs_per_cell));
+    Assert(block_data.size() == element.n_blocks(),
+           ExcDimensionMismatch(block_data.size(),
+                                element.n_blocks()));
+
+    types::global_dof_index k=0;
+    unsigned int count=0;
+    for (unsigned int b=0; b<element.n_base_elements(); ++b)
+      for (unsigned int m=0; m<element.element_multiplicity(b); ++m)
+        {
+          block_data[count++] = (return_start_indices)
+                                ? k
+                                : (element.base_element(b).n_dofs_per_cell());
+          k += element.base_element(b).n_dofs_per_cell();
+        }
+    Assert (count == element.n_blocks(), ExcInternalError());
+
+    std::vector<types::global_dof_index> start_indices(block_data.size());
+    k = 0;
+    for (unsigned int i=0; i<block_data.size(); ++i)
+      if (return_start_indices)
+        start_indices[i] = block_data[i];
+      else
+        {
+          start_indices[i] = k;
+          k += block_data[i];
+        }
+
+    for (unsigned int i=0; i<element.dofs_per_cell; ++i)
+      {
+        std::pair<unsigned int, types::global_dof_index>
+        indices = element.system_to_block_index(i);
+        renumbering[i] = start_indices[indices.first]
+                         +indices.second;
+      }
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void get_interpolation_matrix (const FiniteElement<dim,spacedim> &fe1,
+                                 const FiniteElement<dim,spacedim> &fe2,
+                                 FullMatrix<number> &interpolation_matrix)
+  {
+    Assert (fe1.n_components() == fe2.n_components(),
+            ExcDimensionMismatch(fe1.n_components(), fe2.n_components()));
+    Assert(interpolation_matrix.m()==fe2.dofs_per_cell &&
+           interpolation_matrix.n()==fe1.dofs_per_cell,
+           ExcMatrixDimensionMismatch(interpolation_matrix.m(),
+                                      interpolation_matrix.n(),
+                                      fe2.dofs_per_cell,
+                                      fe1.dofs_per_cell));
+
+    // first try the easy way: maybe
+    // the FE wants to implement things
+    // itself:
+    bool fe_implements_interpolation = true;
+    try
+      {
+        gim_forwarder (fe1, fe2, interpolation_matrix);
+      }
+    catch (typename FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented &)
+      {
+        // too bad....
+        fe_implements_interpolation = false;
+      }
+    if (fe_implements_interpolation == true)
+      return;
+
+    // uh, so this was not the
+    // case. hm. then do it the hard
+    // way. note that this will only
+    // work if the element is
+    // primitive, so check this first
+    Assert (fe1.is_primitive() == true, ExcFENotPrimitive());
+    Assert (fe2.is_primitive() == true, ExcFENotPrimitive());
+
+    // Initialize FEValues for fe1 at
+    // the unit support points of the
+    // fe2 element.
+    const std::vector<Point<dim> > &
+    fe2_support_points = fe2.get_unit_support_points ();
+
+    typedef FiniteElement<dim,spacedim> FEL;
+    Assert(fe2_support_points.size()==fe2.dofs_per_cell,
+           typename FEL::ExcFEHasNoSupportPoints());
+
+    for (unsigned int i=0; i<fe2.dofs_per_cell; ++i)
+      {
+        const unsigned int i1 = fe2.system_to_component_index(i).first;
+        for (unsigned int j=0; j<fe1.dofs_per_cell; ++j)
+          {
+            const unsigned int j1 = fe1.system_to_component_index(j).first;
+            if (i1==j1)
+              interpolation_matrix(i,j) = fe1.shape_value (j,fe2_support_points[i]);
+            else
+              interpolation_matrix(i,j)=0.;
+          }
+      }
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void get_back_interpolation_matrix(const FiniteElement<dim,spacedim> &fe1,
+                                     const FiniteElement<dim,spacedim> &fe2,
+                                     FullMatrix<number> &interpolation_matrix)
+  {
+    Assert (fe1.n_components() == fe2.n_components(),
+            ExcDimensionMismatch(fe1.n_components(), fe2.n_components()));
+    Assert(interpolation_matrix.m()==fe1.dofs_per_cell &&
+           interpolation_matrix.n()==fe1.dofs_per_cell,
+           ExcMatrixDimensionMismatch(interpolation_matrix.m(),
+                                      interpolation_matrix.n(),
+                                      fe1.dofs_per_cell,
+                                      fe1.dofs_per_cell));
+
+    FullMatrix<number> first_matrix (fe2.dofs_per_cell, fe1.dofs_per_cell);
+    FullMatrix<number> second_matrix(fe1.dofs_per_cell, fe2.dofs_per_cell);
+
+    get_interpolation_matrix(fe1, fe2, first_matrix);
+    get_interpolation_matrix(fe2, fe1, second_matrix);
+
+    // int_matrix=second_matrix*first_matrix
+    second_matrix.mmult(interpolation_matrix, first_matrix);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void get_interpolation_difference_matrix (const FiniteElement<dim,spacedim> &fe1,
+                                            const FiniteElement<dim,spacedim> &fe2,
+                                            FullMatrix<number> &difference_matrix)
+  {
+    Assert (fe1.n_components() == fe2.n_components(),
+            ExcDimensionMismatch(fe1.n_components(), fe2.n_components()));
+    Assert(difference_matrix.m()==fe1.dofs_per_cell &&
+           difference_matrix.n()==fe1.dofs_per_cell,
+           ExcMatrixDimensionMismatch(difference_matrix.m(),
+                                      difference_matrix.n(),
+                                      fe1.dofs_per_cell,
+                                      fe1.dofs_per_cell));
+
+    FullMatrix<number> interpolation_matrix(fe1.dofs_per_cell);
+    get_back_interpolation_matrix(fe1, fe2, interpolation_matrix);
+
+    for (unsigned int i=0; i<fe1.dofs_per_cell; ++i)
+      difference_matrix(i,i) = 1.;
+
+    // compute difference
+    difference_matrix.add (-1, interpolation_matrix);
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void get_projection_matrix (const FiniteElement<dim,spacedim> &fe1,
+                              const FiniteElement<dim,spacedim> &fe2,
+                              FullMatrix<number> &matrix)
+  {
+    Assert (fe1.n_components() == 1, ExcNotImplemented());
+    Assert (fe1.n_components() == fe2.n_components(),
+            ExcDimensionMismatch(fe1.n_components(), fe2.n_components()));
+    Assert(matrix.m()==fe2.dofs_per_cell && matrix.n()==fe1.dofs_per_cell,
+           ExcMatrixDimensionMismatch(matrix.m(), matrix.n(),
+                                      fe2.dofs_per_cell,
+                                      fe1.dofs_per_cell));
+    matrix = 0;
+
+    unsigned int n1 = fe1.dofs_per_cell;
+    unsigned int n2 = fe2.dofs_per_cell;
+
+    // First, create a local mass matrix for
+    // the unit cell
+    Triangulation<dim,spacedim> tr;
+    GridGenerator::hyper_cube(tr);
+
+    // Choose a quadrature rule
+    // Gauss is exact up to degree 2n-1
+    const unsigned int degree = std::max(fe1.tensor_degree(), fe2.tensor_degree());
+    Assert (degree != numbers::invalid_unsigned_int,
+            ExcNotImplemented());
+
+    QGauss<dim> quadrature(degree+1);
+    // Set up FEValues.
+    const UpdateFlags flags = update_values | update_quadrature_points | update_JxW_values;
+    FEValues<dim> val1 (fe1, quadrature, update_values);
+    val1.reinit (tr.begin_active());
+    FEValues<dim> val2 (fe2, quadrature, flags);
+    val2.reinit (tr.begin_active());
+
+    // Integrate and invert mass matrix
+    // This happens in the target space
+    FullMatrix<double> mass (n2, n2);
+
+    for (unsigned int k=0; k<quadrature.size(); ++k)
+      {
+        const double w = val2.JxW(k);
+        for (unsigned int i=0; i<n2; ++i)
+          {
+            const double v = val2.shape_value(i,k);
+            for (unsigned int j=0; j<n2; ++j)
+              mass(i,j) += w*v * val2.shape_value(j,k);
+          }
+      }
+    // Gauss-Jordan should be
+    // sufficient since we expect the
+    // mass matrix to be
+    // well-conditioned
+    mass.gauss_jordan();
+
+    // Now, test every function of fe1
+    // with test functions of fe2 and
+    // compute the projection of each
+    // unit vector.
+    Vector<double> b(n2);
+    Vector<double> x(n2);
+
+    for (unsigned int j=0; j<n1; ++j)
+      {
+        b = 0.;
+        for (unsigned int i=0; i<n2; ++i)
+          for (unsigned int k=0; k<quadrature.size(); ++k)
+            {
+              const double w = val2.JxW(k);
+              const double u = val1.shape_value(j,k);
+              const double v = val2.shape_value(i,k);
+              b(i) += u*v*w;
+            }
+
+        // Multiply by the inverse
+        mass.vmult(x,b);
+        for (unsigned int i=0; i<n2; ++i)
+          matrix(i,j) = x(i);
+      }
+  }
+
+
+  template<int dim, int spacedim>
+  void
+  compute_node_matrix(
+    FullMatrix<double> &N,
+    const FiniteElement<dim,spacedim> &fe)
+  {
+    const unsigned int n_dofs = fe.dofs_per_cell;
+    Assert (fe.has_generalized_support_points(), ExcNotInitialized());
+    Assert (N.n()==n_dofs, ExcDimensionMismatch(N.n(), n_dofs));
+    Assert (N.m()==n_dofs, ExcDimensionMismatch(N.m(), n_dofs));
+
+    const std::vector<Point<dim> > &points = fe.get_generalized_support_points();
+
+    // We need the values of the
+    // polynomials in all generalized
+    // support points.
+    std::vector<std::vector<double> >
+    values (dim, std::vector<double>(points.size()));
+
+    // In this vector, we store the
+    // result of the interpolation
+    std::vector<double> local_dofs(n_dofs);
+
+    // One row per shape
+    // function. Remember that these
+    // are the 'raw' shape functions
+    // where the inverse node matrix is
+    // empty. Otherwise, this would
+    // yield identity.
+    for (unsigned int i=0; i<n_dofs; ++i)
+      {
+        for (unsigned int k=0; k<values[0].size(); ++k)
+          for (unsigned int d=0; d<dim; ++d)
+            values[d][k] = fe.shape_value_component(i,points[k],d);
+        fe.interpolate(local_dofs, values);
+        // Enter the interpolated dofs
+        // into the matrix
+        for (unsigned int j=0; j<n_dofs; ++j)
+          N(j,i) = local_dofs[j];
+      }
+  }
+
+
+  /*
+    template<>
+    void
+    compute_embedding_matrices(const FiniteElement<1,2> &,
+                               std::vector<std::vector<FullMatrix<double> > > &,
+                               const bool)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+
+    template<>
+    void
+    compute_embedding_matrices(const FiniteElement<1,3> &,
+                               std::vector<std::vector<FullMatrix<double> > > &,
+                               const bool)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+
+
+    template<>
+    void
+    compute_embedding_matrices(const FiniteElement<2,3>&,
+                               std::vector<std::vector<FullMatrix<double> > >&,
+                               const bool)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+  */
+
+  namespace
+  {
+    template<int dim, typename number, int spacedim>
+    void
+    compute_embedding_for_shape_function (
+      const unsigned int i,
+      const FiniteElement<dim, spacedim> &fe,
+      const FEValues<dim, spacedim> &coarse,
+      const Householder<double> &H,
+      FullMatrix<number> &this_matrix,
+      const double threshold)
+    {
+      const unsigned int n  = fe.dofs_per_cell;
+      const unsigned int nd = fe.n_components ();
+      const unsigned int nq = coarse.n_quadrature_points;
+
+      Vector<number> v_coarse(nq*nd);
+      Vector<number> v_fine(n);
+
+      // The right hand side of
+      // the least squares
+      // problem consists of the
+      // function values of the
+      // coarse grid function in
+      // each quadrature point.
+      if (fe.is_primitive ())
+        {
+          const unsigned int
+          d = fe.system_to_component_index (i).first;
+          const double *phi_i = &coarse.shape_value (i, 0);
+
+          for (unsigned int k = 0; k < nq; ++k)
+            v_coarse (k * nd + d) = phi_i[k];
+        }
+
+      else
+        for (unsigned int d = 0; d < nd; ++d)
+          for (unsigned int k = 0; k < nq; ++k)
+            v_coarse (k * nd + d) = coarse.shape_value_component (i, k, d);
+
+      // solve the least squares
+      // problem.
+      const double result = H.least_squares (v_fine, v_coarse);
+      Assert (result <= threshold, ExcLeastSquaresError (result));
+      // Avoid warnings in release mode
+      (void)result;
+      (void)threshold;
+
+      // Copy into the result
+      // matrix. Since the matrix
+      // maps a coarse grid
+      // function to a fine grid
+      // function, the columns
+      // are fine grid.
+      for (unsigned int j = 0; j < n; ++j)
+        this_matrix(j, i) = v_fine(j);
+    }
+
+
+    template<int dim, typename number, int spacedim>
+    void
+    compute_embedding_matrices_for_refinement_case (
+      const FiniteElement<dim, spacedim> &fe,
+      std::vector<FullMatrix<number> > &matrices,
+      const unsigned int ref_case,
+      const double threshold)
+    {
+      const unsigned int n  = fe.dofs_per_cell;
+      const unsigned int nc = GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case));
+      for (unsigned int i = 0; i < nc; ++i)
+        {
+          Assert(matrices[i].n() == n, ExcDimensionMismatch(matrices[i].n (), n));
+          Assert(matrices[i].m() == n, ExcDimensionMismatch(matrices[i].m (), n));
+        }
+
+      // Set up meshes, one with a single
+      // reference cell and refine it once
+      Triangulation<dim,spacedim> tria;
+      GridGenerator::hyper_cube (tria, 0, 1);
+      tria.begin_active()->set_refine_flag (RefinementCase<dim>(ref_case));
+      tria.execute_coarsening_and_refinement ();
+
+      const unsigned int degree = fe.degree;
+      QGauss<dim> q_fine (degree+1);
+      const unsigned int nq = q_fine.size();
+
+      FEValues<dim,spacedim> fine (fe, q_fine,
+                                   update_quadrature_points |
+                                   update_JxW_values |
+                                   update_values);
+
+      // We search for the polynomial on
+      // the small cell, being equal to
+      // the coarse polynomial in all
+      // quadrature points.
+
+      // First build the matrix for this
+      // least squares problem. This
+      // contains the values of the fine
+      // cell polynomials in the fine
+      // cell grid points.
+
+      // This matrix is the same for all
+      // children.
+      fine.reinit (tria.begin_active ());
+      const unsigned int nd = fe.n_components ();
+      FullMatrix<number> A (nq*nd, n);
+
+      for (unsigned int j = 0; j < n; ++j)
+        for (unsigned int d = 0; d < nd; ++d)
+          for (unsigned int k = 0; k < nq; ++k)
+            A (k * nd + d, j) = fine.shape_value_component (j, k, d);
+
+      Householder<double> H (A);
+      unsigned int cell_number = 0;
+
+      Threads::TaskGroup<void> task_group;
+
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           fine_cell = tria.begin_active (); fine_cell != tria.end ();
+           ++fine_cell, ++cell_number)
+        {
+          fine.reinit (fine_cell);
+
+          // evaluate on the coarse cell (which
+          // is the first -- inactive -- cell on
+          // the lowest level of the
+          // triangulation we have created)
+          const std::vector<Point<spacedim> > &q_points_fine = fine.get_quadrature_points();
+          std::vector<Point<dim> > q_points_coarse(q_points_fine.size());
+          for (unsigned int i=0; i<q_points_fine.size(); ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              q_points_coarse[i](j) = q_points_fine[i](j);
+          const Quadrature<dim> q_coarse (q_points_coarse,
+                                          fine.get_JxW_values ());
+          FEValues<dim,spacedim> coarse (fe, q_coarse, update_values);
+
+          coarse.reinit (tria.begin (0));
+
+          FullMatrix<double> &this_matrix = matrices[cell_number];
+
+          // Compute this once for each
+          // coarse grid basis function. can
+          // spawn subtasks if n is
+          // sufficiently large so that there
+          // are more than about 5000
+          // operations in the inner loop
+          // (which is basically const * n^2
+          // operations).
+          if (n > 30)
+            {
+              for (unsigned int i = 0; i < n; ++i)
+                {
+                  task_group +=
+                    Threads::new_task (&compute_embedding_for_shape_function<dim, number, spacedim>,
+                                       i, fe, coarse, H, this_matrix, threshold);
+                }
+              task_group.join_all();
+            }
+          else
+            {
+              for (unsigned int i = 0; i < n; ++i)
+                {
+                  compute_embedding_for_shape_function<dim, number, spacedim>
+                  (i, fe, coarse, H, this_matrix, threshold);
+                }
+            }
+
+          // Remove small entries from
+          // the matrix
+          for (unsigned int i = 0; i < this_matrix.m (); ++i)
+            for (unsigned int j = 0; j < this_matrix.n (); ++j)
+              if (std::fabs (this_matrix (i, j)) < 1e-12)
+                this_matrix (i, j) = 0.;
+        }
+
+      Assert (cell_number == GeometryInfo<dim>::n_children (RefinementCase<dim> (ref_case)),
+              ExcInternalError ());
+    }
+  }
+
+
+  template <int dim, typename number, int spacedim>
+  void
+  compute_embedding_matrices(const FiniteElement<dim,spacedim> &fe,
+                             std::vector<std::vector<FullMatrix<number> > > &matrices,
+                             const bool isotropic_only,
+                             const double threshold)
+  {
+    Threads::TaskGroup<void> task_group;
+
+    // loop over all possible refinement cases
+    unsigned int ref_case = (isotropic_only)
+                            ? RefinementCase<dim>::isotropic_refinement
+                            : RefinementCase<dim>::cut_x;
+
+    for (; ref_case <= RefinementCase<dim>::isotropic_refinement; ++ref_case)
+      task_group += Threads::new_task (&compute_embedding_matrices_for_refinement_case<dim, number, spacedim>,
+                                       fe, matrices[ref_case-1], ref_case, threshold);
+
+    task_group.join_all ();
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void
+  compute_face_embedding_matrices(const FiniteElement<dim,spacedim> &fe,
+                                  FullMatrix<number> (&matrices)[GeometryInfo<dim>::max_children_per_face],
+                                  const unsigned int face_coarse,
+                                  const unsigned int face_fine,
+                                  const double threshold)
+  {
+    Assert(face_coarse==0, ExcNotImplemented());
+    Assert(face_fine==0, ExcNotImplemented());
+
+    const unsigned int nc = GeometryInfo<dim>::max_children_per_face;
+    const unsigned int n  = fe.dofs_per_face;
+    const unsigned int nd = fe.n_components();
+    const unsigned int degree = fe.degree;
+
+    const bool normal = fe.conforms(FiniteElementData<dim>::Hdiv);
+    const bool tangential = fe.conforms(FiniteElementData<dim>::Hcurl);
+
+    for (unsigned int i=0; i<nc; ++i)
+      {
+        Assert(matrices[i].n() == n, ExcDimensionMismatch(matrices[i].n(),n));
+        Assert(matrices[i].m() == n, ExcDimensionMismatch(matrices[i].m(),n));
+      }
+
+    // In order to make the loops below
+    // simpler, we introduce vectors
+    // containing for indices 0-n the
+    // number of the corresponding
+    // shape value on the cell.
+    std::vector<unsigned int> face_c_dofs(n);
+    std::vector<unsigned int> face_f_dofs(n);
+    {
+      unsigned int face_dof=0;
+      for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_face; ++i)
+        {
+          const unsigned int offset_c = GeometryInfo<dim>::face_to_cell_vertices(face_coarse, i)
+                                        *fe.dofs_per_vertex;
+          const unsigned int offset_f = GeometryInfo<dim>::face_to_cell_vertices(face_fine, i)
+                                        *fe.dofs_per_vertex;
+          for (unsigned int j=0; j<fe.dofs_per_vertex; ++j)
+            {
+              face_c_dofs[face_dof] = offset_c + j;
+              face_f_dofs[face_dof] = offset_f + j;
+              ++face_dof;
+            }
+        }
+      for (unsigned int i=1; i<=GeometryInfo<dim>::lines_per_face; ++i)
+        {
+          const unsigned int offset_c = fe.first_line_index
+                                        + GeometryInfo<dim>::face_to_cell_lines(face_coarse, i-1)
+                                        *fe.dofs_per_line;
+          const unsigned int offset_f = fe.first_line_index
+                                        + GeometryInfo<dim>::face_to_cell_lines(face_fine, i-1)
+                                        *fe.dofs_per_line;
+          for (unsigned int j=0; j<fe.dofs_per_line; ++j)
+            {
+              face_c_dofs[face_dof] = offset_c + j;
+              face_f_dofs[face_dof] = offset_f + j;
+              ++face_dof;
+            }
+        }
+      for (unsigned int i=1; i<=GeometryInfo<dim>::quads_per_face; ++i)
+        {
+          const unsigned int offset_c = fe.first_quad_index
+                                        + face_coarse
+                                        *fe.dofs_per_quad;
+          const unsigned int offset_f = fe.first_quad_index
+                                        + face_fine
+                                        *fe.dofs_per_quad;
+          for (unsigned int j=0; j<fe.dofs_per_quad; ++j)
+            {
+              face_c_dofs[face_dof] = offset_c + j;
+              face_f_dofs[face_dof] = offset_f + j;
+              ++face_dof;
+            }
+        }
+      Assert (face_dof == fe.dofs_per_face, ExcInternalError());
+    }
+
+    // Set up meshes, one with a single
+    // reference cell and refine it once
+    Triangulation<dim,spacedim> tria;
+    GridGenerator::hyper_cube (tria, 0, 1);
+    tria.refine_global(1);
+    MappingCartesian<dim> mapping;
+
+    // Setup quadrature and FEValues
+    // for a face. We cannot use
+    // FEFaceValues and
+    // FESubfaceValues because of
+    // some nifty handling of
+    // refinement cases. Guido stops
+    // disliking and instead starts
+    // hating the anisotropic implementation
+    QGauss<dim-1> q_gauss(degree+1);
+    const Quadrature<dim> q_fine = QProjector<dim>::project_to_face(q_gauss, face_fine);
+    const unsigned int nq = q_fine.size();
+
+    FEValues<dim> fine (mapping, fe, q_fine,
+                        update_quadrature_points | update_JxW_values | update_values);
+
+    // We search for the polynomial on
+    // the small cell, being equal to
+    // the coarse polynomial in all
+    // quadrature points.
+
+    // First build the matrix for this
+    // least squares problem. This
+    // contains the values of the fine
+    // cell polynomials in the fine
+    // cell grid points.
+
+    // This matrix is the same for all
+    // children.
+    fine.reinit(tria.begin_active());
+    FullMatrix<number> A(nq*nd, n);
+    for (unsigned int j=0; j<n; ++j)
+      for (unsigned int k=0; k<nq; ++k)
+        if (nd != dim)
+          for (unsigned int d=0; d<nd; ++d)
+            A(k*nd+d,j) = fine.shape_value_component(face_f_dofs[j],k,d);
+        else
+          {
+            if (normal)
+              A(k*nd,j) = fine.shape_value_component(face_f_dofs[j],k,0);
+            if (tangential)
+              for (unsigned int d=1; d<dim; ++d)
+                A(k*nd+d,j) = fine.shape_value_component(face_f_dofs[j],k,d);
+          }
+
+    Householder<double> H(A);
+
+    Vector<number> v_coarse(nq*nd);
+    Vector<number> v_fine(n);
+
+
+
+    for (unsigned int cell_number = 0; cell_number < GeometryInfo<dim>::max_children_per_face;
+         ++cell_number)
+      {
+        const Quadrature<dim> q_coarse
+          = QProjector<dim>::project_to_subface(q_gauss, face_coarse, cell_number);
+        FEValues<dim> coarse (mapping, fe, q_coarse, update_values);
+
+        typename Triangulation<dim,spacedim>::active_cell_iterator fine_cell
+          = tria.begin(0)->child(GeometryInfo<dim>::child_cell_on_face(
+                                   tria.begin(0)->refinement_case(), face_coarse, cell_number));
+        fine.reinit(fine_cell);
+        coarse.reinit(tria.begin(0));
+
+        FullMatrix<double> &this_matrix = matrices[cell_number];
+
+        // Compute this once for each
+        // coarse grid basis function
+        for (unsigned int i=0; i<n; ++i)
+          {
+            // The right hand side of
+            // the least squares
+            // problem consists of the
+            // function values of the
+            // coarse grid function in
+            // each quadrature point.
+            for (unsigned int k=0; k<nq; ++k)
+              if (nd != dim)
+                for (unsigned int d=0; d<nd; ++d)
+                  v_coarse(k*nd+d) = coarse.shape_value_component (face_c_dofs[i],k,d);
+              else
+                {
+                  if (normal)
+                    v_coarse(k*nd) = coarse.shape_value_component(face_c_dofs[i],k,0);
+                  if (tangential)
+                    for (unsigned int d=1; d<dim; ++d)
+                      v_coarse(k*nd+d) = coarse.shape_value_component(face_c_dofs[i],k,d);
+                }
+            // solve the least squares
+            // problem.
+            const double result = H.least_squares(v_fine, v_coarse);
+            Assert (result <= threshold, ExcLeastSquaresError(result));
+            // Avoid compiler warnings in Release mode
+            (void)result;
+            (void)threshold;
+
+            // Copy into the result
+            // matrix. Since the matrix
+            // maps a coarse grid
+            // function to a fine grid
+            // function, the columns
+            // are fine grid.
+            for (unsigned int j=0; j<n; ++j)
+              this_matrix(j,i) = v_fine(j);
+          }
+        // Remove small entries from
+        // the matrix
+        for (unsigned int i=0; i<this_matrix.m(); ++i)
+          for (unsigned int j=0; j<this_matrix.n(); ++j)
+            if (std::fabs(this_matrix(i,j)) < 1e-12)
+              this_matrix(i,j) = 0.;
+      }
+  }
+
+
+
+  template <int dim, typename number, int spacedim>
+  void
+  compute_projection_matrices(const FiniteElement<dim,spacedim> &fe,
+                              std::vector<std::vector<FullMatrix<number> > > &matrices,
+                              const bool isotropic_only)
+  {
+    const unsigned int n  = fe.dofs_per_cell;
+    const unsigned int nd = fe.n_components();
+    const unsigned int degree = fe.degree;
+
+    // prepare FEValues, quadrature etc on
+    // coarse cell
+    QGauss<dim> q_fine(degree+1);
+    const unsigned int nq = q_fine.size();
+
+    // create mass matrix on coarse cell.
+    FullMatrix<number> mass(n, n);
+    {
+      // set up a triangulation for coarse cell
+      Triangulation<dim,spacedim> tr;
+      GridGenerator::hyper_cube (tr, 0, 1);
+
+      FEValues<dim,spacedim> coarse (fe, q_fine,
+                                     update_JxW_values | update_values);
+
+      typename Triangulation<dim,spacedim>::cell_iterator coarse_cell
+        = tr.begin(0);
+      coarse.reinit (coarse_cell);
+
+      const std::vector<double> &JxW = coarse.get_JxW_values();
+      for (unsigned int i=0; i<n; ++i)
+        for (unsigned int j=0; j<n; ++j)
+          if (fe.is_primitive())
+            {
+              const double *coarse_i = &coarse.shape_value(i,0);
+              const double *coarse_j = &coarse.shape_value(j,0);
+              double mass_ij = 0;
+              for (unsigned int k=0; k<nq; ++k)
+                mass_ij += JxW[k] * coarse_i[k] * coarse_j[k];
+              mass(i,j) = mass_ij;
+            }
+          else
+            {
+              double mass_ij = 0;
+              for (unsigned int d=0; d<nd; ++d)
+                for (unsigned int k=0; k<nq; ++k)
+                  mass_ij += JxW[k] * coarse.shape_value_component(i,k,d)
+                             * coarse.shape_value_component(j,k,d);
+              mass(i,j) = mass_ij;
+            }
+
+      // invert mass matrix
+      mass.gauss_jordan();
+    }
+
+    // loop over all possible
+    // refinement cases
+    unsigned int ref_case = (isotropic_only)
+                            ? RefinementCase<dim>::isotropic_refinement
+                            : RefinementCase<dim>::cut_x;
+    for (; ref_case <= RefinementCase<dim>::isotropic_refinement; ++ref_case)
+      {
+        const unsigned int
+        nc = GeometryInfo<dim>::n_children(RefinementCase<dim>(ref_case));
+
+        for (unsigned int i=0; i<nc; ++i)
+          {
+            Assert(matrices[ref_case-1][i].n() == n,
+                   ExcDimensionMismatch(matrices[ref_case-1][i].n(),n));
+            Assert(matrices[ref_case-1][i].m() == n,
+                   ExcDimensionMismatch(matrices[ref_case-1][i].m(),n));
+          }
+
+        // create a respective refinement on the
+        // triangulation
+        Triangulation<dim,spacedim> tr;
+        GridGenerator::hyper_cube (tr, 0, 1);
+        tr.begin_active()->set_refine_flag(RefinementCase<dim>(ref_case));
+        tr.execute_coarsening_and_refinement();
+
+        FEValues<dim,spacedim> fine (StaticMappingQ1<dim,spacedim>::mapping, fe, q_fine,
+                                     update_quadrature_points | update_JxW_values |
+                                     update_values);
+
+        typename Triangulation<dim,spacedim>::cell_iterator coarse_cell
+          = tr.begin(0);
+
+        Vector<number> v_coarse(n);
+        Vector<number> v_fine(n);
+
+        for (unsigned int cell_number=0; cell_number<nc; ++cell_number)
+          {
+            FullMatrix<double> &this_matrix = matrices[ref_case-1][cell_number];
+
+            // Compute right hand side,
+            // which is a fine level basis
+            // function tested with the
+            // coarse level functions.
+            fine.reinit(coarse_cell->child(cell_number));
+            const std::vector<Point<spacedim> > &q_points_fine = fine.get_quadrature_points();
+            std::vector<Point<dim> > q_points_coarse(q_points_fine.size());
+            for (unsigned int q=0; q<q_points_fine.size(); ++q)
+              for (unsigned int j=0; j<dim; ++j)
+                q_points_coarse[q](j) = q_points_fine[q](j);
+            Quadrature<dim> q_coarse (q_points_coarse,
+                                      fine.get_JxW_values());
+            FEValues<dim,spacedim> coarse (StaticMappingQ1<dim,spacedim>::mapping, fe, q_coarse, update_values);
+            coarse.reinit(coarse_cell);
+
+            // Build RHS
+
+            const std::vector<double> &JxW = fine.get_JxW_values();
+
+            // Outer loop over all fine
+            // grid shape functions phi_j
+            for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+              {
+                for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+                  {
+                    if (fe.is_primitive())
+                      {
+                        const double *coarse_i = &coarse.shape_value(i,0);
+                        const double *fine_j = &fine.shape_value(j,0);
+
+                        double update = 0;
+                        for (unsigned int k=0; k<nq; ++k)
+                          update += JxW[k] * coarse_i[k] * fine_j[k];
+                        v_fine(i) = update;
+                      }
+                    else
+                      {
+                        double update = 0;
+                        for (unsigned int d=0; d<nd; ++d)
+                          for (unsigned int k=0; k<nq; ++k)
+                            update += JxW[k] * coarse.shape_value_component(i,k,d)
+                                      * fine.shape_value_component(j,k,d);
+                        v_fine(i) = update;
+                      }
+                  }
+
+                // RHS ready. Solve system
+                // and enter row into
+                // matrix
+                mass.vmult (v_coarse, v_fine);
+                for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+                  this_matrix(i,j) = v_coarse(i);
+              }
+
+            // Remove small entries from
+            // the matrix
+            for (unsigned int i=0; i<this_matrix.m(); ++i)
+              for (unsigned int j=0; j<this_matrix.n(); ++j)
+                if (std::fabs(this_matrix(i,j)) < 1e-12)
+                  this_matrix(i,j) = 0.;
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  add_fe_name(const std::string &parameter_name,
+              const FEFactoryBase<dim,spacedim> *factory)
+  {
+    // Erase everything after the
+    // actual class name
+    std::string name = parameter_name;
+    unsigned int name_end =
+      name.find_first_not_of(std::string("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"));
+    if (name_end < name.size())
+      name.erase(name_end);
+    // first make sure that no other
+    // thread intercepts the
+    // operation of this function;
+    // for this, acquire the lock
+    // until we quit this function
+    Threads::Mutex::ScopedLock lock(fe_name_map_lock);
+
+    Assert(fe_name_map[dim][spacedim].find(name) == fe_name_map[dim][spacedim].end(),
+           ExcMessage("Cannot change existing element in finite element name list"));
+
+    // Insert the normalized name into
+    // the map
+    fe_name_map[dim][spacedim][name] =
+      std_cxx11::shared_ptr<const Subscriptor> (factory);
+  }
+
+
+  namespace internal
+  {
+    namespace
+    {
+      // TODO: this encapsulates the call to the
+      // dimension-dependent fe_name_map so that we
+      // have a unique interface. could be done
+      // smarter?
+      template <int dim, int spacedim>
+      FiniteElement<dim,spacedim> *
+      get_fe_from_name_ext (std::string &name,
+                            const std::map<std::string,
+                            std_cxx11::shared_ptr<const Subscriptor> >
+                            &fe_name_map)
+      {
+        // Extract the name of the
+        // finite element class, which only
+        // contains characters, numbers and
+        // underscores.
+        unsigned int name_end =
+          name.find_first_not_of(std::string("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"));
+        const std::string name_part(name, 0, name_end);
+        name.erase(0, name_part.size());
+
+        // now things get a little more
+        // complicated: FESystem. it's
+        // more complicated, since we
+        // have to figure out what the
+        // base elements are. this can
+        // only be done recursively
+        if (name_part == "FESystem")
+          {
+            // next we have to get at the
+            // base elements. start with
+            // the first. wrap the whole
+            // block into try-catch to
+            // make sure we destroy the
+            // pointers we got from
+            // recursive calls if one of
+            // these calls should throw
+            // an exception
+            std::vector<const FiniteElement<dim,spacedim>*> base_fes;
+            std::vector<unsigned int>        base_multiplicities;
+            try
+              {
+                // Now, just the [...]
+                // part should be left.
+                if (name.size() == 0 || name[0] != '[')
+                  throw (std::string("Invalid first character in ") + name);
+                do
+                  {
+                    // Erase the
+                    // leading '[' or '-'
+                    name.erase(0,1);
+                    // Now, the name of the
+                    // first base element is
+                    // first... Let's get it
+                    base_fes.push_back (get_fe_from_name_ext<dim,spacedim> (name,
+                                                                            fe_name_map));
+                    // next check whether
+                    // FESystem placed a
+                    // multiplicity after
+                    // the element name
+                    if (name[0] == '^')
+                      {
+                        // yes. Delete the '^'
+                        // and read this
+                        // multiplicity
+                        name.erase(0,1);
+
+                        const std::pair<int,unsigned int> tmp
+                          = Utilities::get_integer_at_position (name, 0);
+                        name.erase(0, tmp.second);
+                        // add to length,
+                        // including the '^'
+                        base_multiplicities.push_back (tmp.first);
+                      }
+                    else
+                      // no, so
+                      // multiplicity is
+                      // 1
+                      base_multiplicities.push_back (1);
+
+                    // so that's it for
+                    // this base
+                    // element. base
+                    // elements are
+                    // separated by '-',
+                    // and the list is
+                    // terminated by ']',
+                    // so loop while the
+                    // next character is
+                    // '-'
+                  }
+                while (name[0] == '-');
+
+                // so we got to the end
+                // of the '-' separated
+                // list. make sure that
+                // we actually had a ']'
+                // there
+                if (name.size() == 0 || name[0] != ']')
+                  throw (std::string("Invalid first character in ") + name);
+                name.erase(0,1);
+                // just one more sanity check
+                Assert ((base_fes.size() == base_multiplicities.size())
+                        &&
+                        (base_fes.size() > 0),
+                        ExcInternalError());
+
+                // ok, apparently
+                // everything went ok. so
+                // generate the composed
+                // element
+                FiniteElement<dim,spacedim> *system_element = 0;
+
+                // uses new FESystem constructor
+                // which is independent of
+                // the number of FEs in the system
+                system_element = new FESystem<dim,spacedim>(base_fes, base_multiplicities);
+
+                // now we don't need the
+                // list of base elements
+                // any more
+                for (unsigned int i=0; i<base_fes.size(); ++i)
+                  delete base_fes[i];
+
+                // finally return our
+                // findings
+                // Add the closing ']' to
+                // the length
+                return system_element;
+
+              }
+            catch (...)
+              {
+                // ups, some exception
+                // was thrown. prevent a
+                // memory leak, and then
+                // pass on the exception
+                // to the caller
+                for (unsigned int i=0; i<base_fes.size(); ++i)
+                  delete base_fes[i];
+                throw;
+              }
+
+            // this is a place where we
+            // should really never get,
+            // since above we have either
+            // returned from the
+            // try-clause, or have
+            // re-thrown in the catch
+            // clause. check that we
+            // never get here
+            Assert (false, ExcInternalError());
+          }
+        else if (name_part == "FE_Nothing")
+          {
+            // remove the () from FE_Nothing()
+            name.erase(0,2);
+
+            // this is a bit of a hack, as
+            // FE_Nothing does not take a
+            // degree, but it does take an
+            // argument, which defaults to 1,
+            // so this properly returns
+            // FE_Nothing()
+            const Subscriptor *ptr = fe_name_map.find(name_part)->second.get();
+            const FEFactoryBase<dim,spacedim> *fef=dynamic_cast<const FEFactoryBase<dim,spacedim>*>(ptr);
+            return fef->get(1);
+          }
+        else
+          {
+            // Make sure no other thread
+            // is just adding an element
+            Threads::Mutex::ScopedLock lock (fe_name_map_lock);
+            AssertThrow (fe_name_map.find(name_part) != fe_name_map.end(),
+                         ExcInvalidFEName(name));
+
+            // Now, just the (degree)
+            // or (Quadrature<1>(degree+1))
+            // part should be left.
+            if (name.size() == 0 || name[0] != '(')
+              throw (std::string("Invalid first character in ") + name);
+            name.erase(0,1);
+            if (name[0] != 'Q')
+              {
+                const std::pair<int,unsigned int> tmp
+                  = Utilities::get_integer_at_position (name, 0);
+                name.erase(0, tmp.second+1);
+                const Subscriptor *ptr = fe_name_map.find(name_part)->second.get();
+                const FEFactoryBase<dim,spacedim> *fef=dynamic_cast<const FEFactoryBase<dim,spacedim>*>(ptr);
+                return fef->get(tmp.first);
+              }
+            else
+              {
+                unsigned int position = name.find('(');
+                const std::string quadrature_name(name, 0, position);
+                name.erase(0,position+1);
+                if (quadrature_name.compare("QGaussLobatto") == 0)
+                  {
+                    const std::pair<int,unsigned int> tmp
+                      = Utilities::get_integer_at_position (name, 0);
+                    // delete "))"
+                    name.erase(0, tmp.second+2);
+                    const Subscriptor *ptr = fe_name_map.find(name_part)->second.get();
+                    const FEFactoryBase<dim,spacedim> *fef=dynamic_cast<const FEFactoryBase<dim,spacedim>*>(ptr);
+                    return fef->get(QGaussLobatto<1>(tmp.first));
+                  }
+                else
+                  {
+                    AssertThrow (false,ExcNotImplemented());
+                  }
+              }
+          }
+
+
+        // hm, if we have come thus far, we
+        // didn't know what to do with the
+        // string we got. so do as the docs
+        // say: raise an exception
+        AssertThrow (false, ExcInvalidFEName(name));
+
+        // make some compilers happy that
+        // do not realize that we can't get
+        // here after throwing
+        return 0;
+      }
+
+
+
+      template <int dim,int spacedim>
+      FiniteElement<dim,spacedim> *get_fe_from_name (std::string &name)
+      {
+        return get_fe_from_name_ext<dim,spacedim> (name, fe_name_map[dim][spacedim]);
+      }
+    }
+  }
+
+
+
+
+
+  template <int dim, int spacedim>
+  FiniteElement<dim, spacedim> *
+  get_fe_by_name (const std::string &parameter_name)
+  {
+    std::string name = Utilities::trim(parameter_name);
+    std::size_t index = 1;
+    // remove spaces that are not between two word (things that match the
+    // regular expression [A-Za-z0-9_]) characters.
+    while (2 < name.size() && index < name.size() - 1)
+      {
+        if (name[index] == ' ' &&
+            (!(std::isalnum(name[index - 1]) || name[index - 1] == '_') ||
+             !(std::isalnum(name[index + 1]) || name[index + 1] == '_')))
+          {
+            name.erase(index, 1);
+          }
+        else
+          {
+            ++index;
+          }
+      }
+
+    // Create a version of the name
+    // string where all template
+    // parameters are eliminated.
+    for (unsigned int pos1 = name.find('<');
+         pos1 < name.size();
+         pos1 = name.find('<'))
+      {
+
+        const unsigned int pos2 = name.find('>');
+        // If there is only a single
+        // character between those two,
+        // it should be 'd' or the number
+        // representing the dimension.
+        if (pos2-pos1 == 2)
+          {
+            const char dimchar = '0' + dim;
+            (void)dimchar;
+            if (name.at(pos1+1) != 'd')
+              Assert (name.at(pos1+1) == dimchar,
+                      ExcInvalidFEDimension(name.at(pos1+1), dim));
+          }
+        else
+          Assert(pos2-pos1 == 4, ExcInvalidFEName(name));
+
+        // If pos1==pos2, then we are
+        // probably at the end of the
+        // string
+        if (pos2 != pos1)
+          name.erase(pos1, pos2-pos1+1);
+      }
+    // Replace all occurrences of "^dim"
+    // by "^d" to be handled by the
+    // next loop
+    for (unsigned int pos = name.find("^dim");
+         pos < name.size();
+         pos = name.find("^dim"))
+      name.erase(pos+2, 2);
+
+    // Replace all occurrences of "^d"
+    // by using the actual dimension
+    for (unsigned int pos = name.find("^d");
+         pos < name.size();
+         pos = name.find("^d"))
+      name.at(pos+1) = '0' + dim;
+
+    try
+      {
+        FiniteElement<dim,spacedim> *fe = internal::get_fe_from_name<dim,spacedim> (name);
+
+        // Make sure the auxiliary function
+        // ate up all characters of the name.
+        AssertThrow (name.size() == 0,
+                     ExcInvalidFEName(parameter_name
+                                      + std::string(" extra characters after "
+                                                    "end of name")));
+        return fe;
+      }
+    catch (const std::string &errline)
+      {
+        AssertThrow(false, ExcInvalidFEName(parameter_name
+                                            + std::string(" at ")
+                                            + errline));
+        return 0;
+      }
+  }
+
+
+  template <int dim>
+  FiniteElement<dim> *
+  get_fe_from_name (const std::string &parameter_name)
+  {
+    return get_fe_by_name<dim,dim> (parameter_name);
+  }
+
+
+  template <int dim, int spacedim>
+  void
+
+  compute_projection_from_quadrature_points_matrix (const FiniteElement<dim,spacedim> &fe,
+                                                    const Quadrature<dim>    &lhs_quadrature,
+                                                    const Quadrature<dim>    &rhs_quadrature,
+                                                    FullMatrix<double>       &X)
+  {
+    Assert (fe.n_components() == 1, ExcNotImplemented());
+
+    // first build the matrices M and Q
+    // described in the documentation
+    FullMatrix<double> M (fe.dofs_per_cell, fe.dofs_per_cell);
+    FullMatrix<double> Q (fe.dofs_per_cell, rhs_quadrature.size());
+
+    for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+      for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+        for (unsigned int q=0; q<lhs_quadrature.size(); ++q)
+          M(i,j) += fe.shape_value (i, lhs_quadrature.point(q)) *
+                    fe.shape_value (j, lhs_quadrature.point(q)) *
+                    lhs_quadrature.weight(q);
+
+    for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+      for (unsigned int q=0; q<rhs_quadrature.size(); ++q)
+        Q(i,q) += fe.shape_value (i, rhs_quadrature.point(q)) *
+                  rhs_quadrature.weight(q);
+
+    // then invert M
+    FullMatrix<double> M_inverse (fe.dofs_per_cell, fe.dofs_per_cell);
+    M_inverse.invert (M);
+
+    // finally compute the result
+    X.reinit (fe.dofs_per_cell, rhs_quadrature.size());
+    M_inverse.mmult (X, Q);
+
+    Assert (X.m() == fe.dofs_per_cell, ExcInternalError());
+    Assert (X.n() == rhs_quadrature.size(), ExcInternalError());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  compute_interpolation_to_quadrature_points_matrix (const FiniteElement<dim,spacedim> &fe,
+                                                     const Quadrature<dim>    &quadrature,
+                                                     FullMatrix<double>       &I_q)
+  {
+    Assert (fe.n_components() == 1, ExcNotImplemented());
+    Assert (I_q.m() == quadrature.size(),
+            ExcMessage ("Wrong matrix size"));
+    Assert (I_q.n() == fe.dofs_per_cell, ExcMessage ("Wrong matrix size"));
+
+    for (unsigned int q=0; q<quadrature.size(); ++q)
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        I_q(q,i) = fe.shape_value (i, quadrature.point(q));
+  }
+
+
+
+  template <int dim>
+  void
+  compute_projection_from_quadrature_points(
+    const FullMatrix<double>                &projection_matrix,
+    const std::vector< Tensor<1, dim > >    &vector_of_tensors_at_qp,
+    std::vector< Tensor<1, dim > >          &vector_of_tensors_at_nodes)
+  {
+
+    // check that the number columns of the projection_matrix
+    // matches the size of the vector_of_tensors_at_qp
+    Assert(projection_matrix.n_cols() == vector_of_tensors_at_qp.size(),
+           ExcDimensionMismatch(projection_matrix.n_cols(),
+                                vector_of_tensors_at_qp.size()));
+
+    // check that the number rows of the projection_matrix
+    // matches the size of the vector_of_tensors_at_nodes
+    Assert(projection_matrix.n_rows() == vector_of_tensors_at_nodes.size(),
+           ExcDimensionMismatch(projection_matrix.n_rows(),
+                                vector_of_tensors_at_nodes.size()));
+
+    // number of support points (nodes) to project to
+    const unsigned int n_support_points = projection_matrix.n_rows();
+    // number of quadrature points to project from
+    const unsigned int n_quad_points = projection_matrix.n_cols();
+
+    // component projected to the nodes
+    Vector<double> component_at_node(n_support_points);
+    // component at the quadrature point
+    Vector<double> component_at_qp(n_quad_points);
+
+    for (unsigned int ii = 0; ii < dim; ++ii)
+      {
+
+        component_at_qp = 0;
+
+        // populate the vector of components at the qps
+        // from vector_of_tensors_at_qp
+        // vector_of_tensors_at_qp data is in form:
+        //      columns:        0, 1, ...,  dim
+        //      rows:           0,1,....,  n_quad_points
+        // so extract the ii'th column of vector_of_tensors_at_qp
+        for (unsigned int q = 0; q < n_quad_points; ++q)
+          {
+            component_at_qp(q) = vector_of_tensors_at_qp[q][ii];
+          }
+
+        // project from the qps -> nodes
+        // component_at_node = projection_matrix_u * component_at_qp
+        projection_matrix.vmult(component_at_node, component_at_qp);
+
+        // rewrite the projection of the components
+        // back into the vector of tensors
+        for (unsigned int nn =0; nn <n_support_points; ++nn)
+          {
+            vector_of_tensors_at_nodes[nn][ii] = component_at_node(nn);
+          }
+      }
+  }
+
+
+
+  template <int dim>
+  void
+  compute_projection_from_quadrature_points(
+    const FullMatrix<double>                        &projection_matrix,
+    const std::vector< SymmetricTensor<2, dim > >   &vector_of_tensors_at_qp,
+    std::vector< SymmetricTensor<2, dim > >         &vector_of_tensors_at_nodes)
+  {
+
+    // check that the number columns of the projection_matrix
+    // matches the size of the vector_of_tensors_at_qp
+    Assert(projection_matrix.n_cols() == vector_of_tensors_at_qp.size(),
+           ExcDimensionMismatch(projection_matrix.n_cols(),
+                                vector_of_tensors_at_qp.size()));
+
+    // check that the number rows of the projection_matrix
+    // matches the size of the vector_of_tensors_at_nodes
+    Assert(projection_matrix.n_rows() == vector_of_tensors_at_nodes.size(),
+           ExcDimensionMismatch(projection_matrix.n_rows(),
+                                vector_of_tensors_at_nodes.size()));
+
+    // number of support points (nodes)
+    const unsigned int n_support_points = projection_matrix.n_rows();
+    // number of quadrature points to project from
+    const unsigned int n_quad_points = projection_matrix.n_cols();
+
+    // number of unique entries in a symmetric second-order tensor
+    const unsigned int n_independent_components =
+      SymmetricTensor<2, dim >::n_independent_components;
+
+    // component projected to the nodes
+    Vector<double> component_at_node(n_support_points);
+    // component at the quadrature point
+    Vector<double> component_at_qp(n_quad_points);
+
+    // loop over the number of unique dimensions of the tensor
+    for (unsigned int ii = 0; ii < n_independent_components; ++ii)
+      {
+
+        component_at_qp = 0;
+
+        // row-column entry of tensor corresponding the unrolled index
+        TableIndices<2>  row_column_index = SymmetricTensor< 2, dim >::unrolled_to_component_indices(ii);
+        const unsigned int row = row_column_index[0];
+        const unsigned int column = row_column_index[1];
+
+        //  populate the vector of components at the qps
+        //  from vector_of_tensors_at_qp
+        //  vector_of_tensors_at_qp is in form:
+        //      columns:       0, 1, ..., n_independent_components
+        //      rows:           0,1,....,  n_quad_points
+        //  so extract the ii'th column of vector_of_tensors_at_qp
+        for (unsigned int q = 0; q < n_quad_points; ++q)
+          {
+            component_at_qp(q) = (vector_of_tensors_at_qp[q])[row][column];
+          }
+
+        // project from the qps -> nodes
+        // component_at_node = projection_matrix_u * component_at_qp
+        projection_matrix.vmult(component_at_node, component_at_qp);
+
+        // rewrite the projection of the components back into the vector of tensors
+        for (unsigned int nn =0; nn <n_support_points; ++nn)
+          {
+            (vector_of_tensors_at_nodes[nn])[row][column] = component_at_node(nn);
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  compute_projection_from_face_quadrature_points_matrix (const FiniteElement<dim, spacedim> &fe,
+                                                         const Quadrature<dim-1>    &lhs_quadrature,
+                                                         const Quadrature<dim-1>    &rhs_quadrature,
+                                                         const typename DoFHandler<dim, spacedim>::active_cell_iterator &cell,
+                                                         const unsigned int face,
+                                                         FullMatrix<double>       &X)
+  {
+    Assert (fe.n_components() == 1, ExcNotImplemented());
+    Assert (lhs_quadrature.size () > fe.degree, ExcNotGreaterThan (lhs_quadrature.size (), fe.degree));
+
+
+
+    // build the matrices M and Q
+    // described in the documentation
+    FullMatrix<double> M (fe.dofs_per_cell, fe.dofs_per_cell);
+    FullMatrix<double> Q (fe.dofs_per_cell, rhs_quadrature.size());
+
+    {
+      // need an FEFaceValues object to evaluate shape function
+      // values on the specified face.
+      FEFaceValues <dim> fe_face_values (fe, lhs_quadrature, update_values);
+      fe_face_values.reinit (cell, face); // setup shape_value on this face.
+
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+          for (unsigned int q=0; q<lhs_quadrature.size(); ++q)
+            M(i,j) += fe_face_values.shape_value (i, q) *
+                      fe_face_values.shape_value (j, q) *
+                      lhs_quadrature.weight(q);
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        {
+          M(i,i) = (M(i,i) == 0 ? 1 : M(i,i));
+        }
+    }
+
+    {
+      FEFaceValues <dim> fe_face_values (fe, rhs_quadrature, update_values);
+      fe_face_values.reinit (cell, face); // setup shape_value on this face.
+
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        for (unsigned int q=0; q<rhs_quadrature.size(); ++q)
+          Q(i,q) += fe_face_values.shape_value (i, q) *
+                    rhs_quadrature.weight(q);
+    }
+    // then invert M
+    FullMatrix<double> M_inverse (fe.dofs_per_cell, fe.dofs_per_cell);
+    M_inverse.invert (M);
+
+    // finally compute the result
+    X.reinit (fe.dofs_per_cell, rhs_quadrature.size());
+    M_inverse.mmult (X, Q);
+
+    Assert (X.m() == fe.dofs_per_cell, ExcInternalError());
+    Assert (X.n() == rhs_quadrature.size(), ExcInternalError());
+  }
+
+
+
+  template <int dim>
+  void
+  hierarchic_to_lexicographic_numbering (unsigned int degree, std::vector<unsigned int> &h2l)
+  {
+    // number of support points in each
+    // direction
+    const unsigned int n = degree+1;
+
+    unsigned int dofs_per_cell = n;
+    for (unsigned int i=1; i<dim; ++i)
+      dofs_per_cell *= n;
+
+    // Assert size maches degree
+    AssertDimension (h2l.size(), dofs_per_cell);
+
+    // polynomial degree
+    const unsigned int dofs_per_line = degree - 1;
+
+    // the following lines of code are somewhat odd, due to the way the
+    // hierarchic numbering is organized. if someone would really want to
+    // understand these lines, you better draw some pictures where you
+    // indicate the indices and orders of vertices, lines, etc, along with the
+    // numbers of the degrees of freedom in hierarchical and lexicographical
+    // order
+    switch (dim)
+      {
+      case 1:
+      {
+        h2l[0] = 0;
+        h2l[1] = dofs_per_cell-1;
+        for (unsigned int i=2; i<dofs_per_cell; ++i)
+          h2l[i] = i-1;
+
+        break;
+      }
+
+      case 2:
+      {
+        unsigned int next_index = 0;
+        // first the four vertices
+        h2l[next_index++] = 0;
+        h2l[next_index++] = n-1;
+        h2l[next_index++] = n*(n-1);
+        h2l[next_index++] = n*n-1;
+
+        // left   line
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = (1+i)*n;
+
+        // right  line
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = (2+i)*n-1;
+
+        // bottom line
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = 1+i;
+
+        // top    line
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = n*(n-1)+i+1;
+
+        // inside quad
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            h2l[next_index++] = n*(i+1)+j+1;
+
+        Assert (next_index == dofs_per_cell, ExcInternalError());
+
+        break;
+      }
+
+      case 3:
+      {
+        unsigned int next_index = 0;
+        // first the eight vertices
+        h2l[next_index++] = 0;                 // 0
+        h2l[next_index++] = (      1)*degree;  // 1
+        h2l[next_index++] = (    n  )*degree;  // 2
+        h2l[next_index++] = (    n+1)*degree;  // 3
+        h2l[next_index++] = (n*n    )*degree;  // 4
+        h2l[next_index++] = (n*n  +1)*degree;  // 5
+        h2l[next_index++] = (n*n+n  )*degree;  // 6
+        h2l[next_index++] = (n*n+n+1)*degree;  // 7
+
+        // line 0
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = (i+1)*n;
+        // line 1
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = n-1+(i+1)*n;
+        // line 2
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = 1+i;
+        // line 3
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = 1+i+n*(n-1);
+
+        // line 4
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = (n-1)*n*n+(i+1)*n;
+        // line 5
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = (n-1)*(n*n+1)+(i+1)*n;
+        // line 6
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = n*n*(n-1)+i+1;
+        // line 7
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = n*n*(n-1)+i+1+n*(n-1);
+
+        // line 8
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = (i+1)*n*n;
+        // line 9
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = n-1+(i+1)*n*n;
+        // line 10
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = (i+1)*n*n+n*(n-1);
+        // line 11
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          h2l[next_index++] = n-1+(i+1)*n*n+n*(n-1);
+
+
+        // inside quads
+        // face 0
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            h2l[next_index++] = (i+1)*n*n+n*(j+1);
+        // face 1
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            h2l[next_index++] = (i+1)*n*n+n-1+n*(j+1);
+        // face 2, note the orientation!
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            h2l[next_index++] = (j+1)*n*n+i+1;
+        // face 3, note the orientation!
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            h2l[next_index++] = (j+1)*n*n+n*(n-1)+i+1;
+        // face 4
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            h2l[next_index++] = n*(i+1)+j+1;
+        // face 5
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            h2l[next_index++] = (n-1)*n*n+n*(i+1)+j+1;
+
+        // inside hex
+        for (unsigned int i=0; i<dofs_per_line; ++i)
+          for (unsigned int j=0; j<dofs_per_line; ++j)
+            for (unsigned int k=0; k<dofs_per_line; ++k)
+              h2l[next_index++]       = n*n*(i+1)+n*(j+1)+k+1;
+
+        Assert (next_index == dofs_per_cell, ExcInternalError());
+
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+  }
+
+
+
+  template <int dim>
+  void
+  hierarchic_to_lexicographic_numbering (const FiniteElementData<dim> &fe,
+                                         std::vector<unsigned int> &h2l)
+  {
+    Assert (h2l.size() == fe.dofs_per_cell,
+            ExcDimensionMismatch (h2l.size(), fe.dofs_per_cell));
+    hierarchic_to_lexicographic_numbering<dim> (fe.dofs_per_line+1, h2l);
+  }
+
+
+
+  template <int dim>
+  std::vector<unsigned int>
+  hierarchic_to_lexicographic_numbering (const FiniteElementData<dim> &fe)
+  {
+    Assert (fe.n_components() == 1, ExcInvalidFE());
+    std::vector<unsigned int> h2l(fe.dofs_per_cell);
+    hierarchic_to_lexicographic_numbering<dim> (fe.dofs_per_line+1, h2l);
+    return (h2l);
+  }
+
+  template <int dim>
+  void
+  lexicographic_to_hierarchic_numbering (const FiniteElementData<dim> &fe,
+                                         std::vector<unsigned int>    &l2h)
+  {
+    l2h = lexicographic_to_hierarchic_numbering (fe);
+  }
+
+
+
+  template <int dim>
+  std::vector<unsigned int>
+  lexicographic_to_hierarchic_numbering (const FiniteElementData<dim> &fe)
+  {
+    return Utilities::invert_permutation(hierarchic_to_lexicographic_numbering (fe));
+  }
+
+} // end of namespace FETools
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "fe_tools.inst"
+
+
+/*----------------------------   fe_tools.cc     ---------------------------*/
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_tools.inst.in b/source/fe/fe_tools.inst.in
new file mode 100644
index 0000000..8c6e7b4
--- /dev/null
+++ b/source/fe/fe_tools.inst.in
@@ -0,0 +1,185 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+    namespace FETools
+      \{
+#if deal_II_dimension <= deal_II_space_dimension
+      template
+      void compute_block_renumbering (
+        const FiniteElement<deal_II_dimension,deal_II_space_dimension> & ,
+        std::vector<types::global_dof_index> &, std::vector<types::global_dof_index> &, bool);
+
+      template
+      void compute_projection_matrices<deal_II_dimension, double, deal_II_space_dimension>
+      (const FiniteElement<deal_II_dimension,deal_II_space_dimension> &,
+       std::vector<std::vector<FullMatrix<double> > > &, bool);
+
+      template
+      void compute_embedding_matrices<deal_II_dimension, double, deal_II_space_dimension>
+      (const FiniteElement<deal_II_dimension,deal_II_space_dimension> &,
+       std::vector<std::vector<FullMatrix<double> > > &, const bool, const double);
+#endif
+      \}
+  }
+
+
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+    namespace FETools
+      \{
+
+#if deal_II_dimension <= deal_II_space_dimension
+      template
+      void get_interpolation_matrix<deal_II_dimension,double,deal_II_space_dimension>
+      (const FiniteElement<deal_II_dimension,deal_II_space_dimension> &,
+       const FiniteElement<deal_II_dimension,deal_II_space_dimension> &,
+       FullMatrix<double> &);
+
+      template FiniteElement<deal_II_dimension,deal_II_space_dimension> *
+      get_fe_by_name<deal_II_dimension,deal_II_space_dimension> (const std::string &);
+
+#endif
+
+#if deal_II_dimension == deal_II_space_dimension
+
+      template class FEFactoryBase<deal_II_dimension>;
+
+      template FiniteElement<deal_II_dimension> *
+      get_fe_from_name<deal_II_dimension> (const std::string &);
+
+      template
+      void compute_node_matrix(
+        FullMatrix<double> &,
+        const FiniteElement<deal_II_dimension> &);
+
+      template
+      void compute_component_wise(
+        const FiniteElement<deal_II_dimension> & ,
+        std::vector<unsigned int> &, std::vector<std::vector<unsigned int> > &);
+
+      template
+      void get_back_interpolation_matrix<deal_II_dimension>
+      (const FiniteElement<deal_II_dimension> &,
+       const FiniteElement<deal_II_dimension> &,
+       FullMatrix<double> &);
+      template
+      void get_interpolation_difference_matrix<deal_II_dimension>
+      (const FiniteElement<deal_II_dimension> &,
+       const FiniteElement<deal_II_dimension> &,
+       FullMatrix<double> &);
+      template
+      void get_interpolation_matrix<deal_II_dimension>
+      (const FiniteElement<deal_II_dimension> &,
+       const FiniteElement<deal_II_dimension> &,
+       FullMatrix<float> &);
+      template
+      void get_back_interpolation_matrix<deal_II_dimension>
+      (const FiniteElement<deal_II_dimension> &,
+       const FiniteElement<deal_II_dimension> &,
+       FullMatrix<float> &);
+      template
+      void get_interpolation_difference_matrix<deal_II_dimension>
+      (const FiniteElement<deal_II_dimension> &,
+       const FiniteElement<deal_II_dimension> &,
+       FullMatrix<float> &);
+
+      template
+      void get_projection_matrix<deal_II_dimension>
+      (const FiniteElement<deal_II_dimension> &,
+       const FiniteElement<deal_II_dimension> &,
+       FullMatrix<double> &);
+
+      template
+      void compute_face_embedding_matrices<deal_II_dimension,double>
+      (const FiniteElement<deal_II_dimension> &, FullMatrix<double> ( &)[GeometryInfo<deal_II_dimension>::max_children_per_face],
+       unsigned int, unsigned int, const double);
+
+      template
+      void
+      compute_projection_from_quadrature_points_matrix (const FiniteElement<deal_II_dimension> &,
+                                                        const Quadrature<deal_II_dimension> &,
+                                                        const Quadrature<deal_II_dimension> &,
+                                                        FullMatrix<double> &);
+
+      template
+      void
+      compute_projection_from_quadrature_points(
+        const FullMatrix<double> &,
+        const std::vector< Tensor<1, deal_II_dimension > > &,
+        std::vector< Tensor<1, deal_II_dimension > > &);
+
+      template
+      void
+      compute_projection_from_quadrature_points(
+        const FullMatrix<double> &,
+        const std::vector<SymmetricTensor<2, deal_II_dimension> > &,
+        std::vector<SymmetricTensor<2, deal_II_dimension> > &);
+
+
+      template
+      void
+      compute_interpolation_to_quadrature_points_matrix (const FiniteElement<deal_II_dimension> &,
+                                                         const Quadrature<deal_II_dimension> &,
+                                                         FullMatrix<double> &);
+
+#if deal_II_dimension != 1
+      template
+      void
+      compute_projection_from_face_quadrature_points_matrix (const FiniteElement<deal_II_dimension> &,
+                                                             const Quadrature<deal_II_dimension-1>    &,
+                                                             const Quadrature<deal_II_dimension-1>    &,
+                                                             const DoFHandler<deal_II_dimension>::active_cell_iterator & ,
+                                                             unsigned int,
+                                                             FullMatrix<double> &);
+#endif
+
+      template
+      void
+      hierarchic_to_lexicographic_numbering<deal_II_dimension>
+      (unsigned int,
+       std::vector<unsigned int> &);
+
+      template
+      void
+      hierarchic_to_lexicographic_numbering<deal_II_dimension>
+      (const FiniteElementData<deal_II_dimension> &,
+       std::vector<unsigned int> &);
+
+      template
+      void
+      lexicographic_to_hierarchic_numbering<deal_II_dimension>
+      (const FiniteElementData<deal_II_dimension> &,
+       std::vector<unsigned int> &);
+
+      template
+      std::vector<unsigned int>
+      hierarchic_to_lexicographic_numbering<deal_II_dimension>
+      (const FiniteElementData<deal_II_dimension> &);
+
+      template
+      std::vector<unsigned int>
+      lexicographic_to_hierarchic_numbering<deal_II_dimension>
+      (const FiniteElementData<deal_II_dimension> &);
+
+#endif
+      \}
+  }
diff --git a/source/fe/fe_tools_interpolate.cc b/source/fe/fe_tools_interpolate.cc
new file mode 100644
index 0000000..1a611c6
--- /dev/null
+++ b/source/fe/fe_tools_interpolate.cc
@@ -0,0 +1,835 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/hp/dof_handler.h>
+
+#include <deal.II/base/std_cxx11/shared_ptr.h>
+
+#include <deal.II/base/index_set.h>
+
+#include <iostream>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace FETools
+{
+  template <int dim, int spacedim,
+            template <int, int> class DoFHandlerType1,
+            template <int, int> class DoFHandlerType2,
+            class InVector, class OutVector>
+  void
+  interpolate(const DoFHandlerType1<dim, spacedim> &dof1,
+              const InVector                       &u1,
+              const DoFHandlerType2<dim, spacedim> &dof2,
+              OutVector                            &u2)
+  {
+    ConstraintMatrix dummy;
+    dummy.close();
+    interpolate(dof1, u1, dof2, dummy, u2);
+  }
+
+
+
+  template <int dim, int spacedim,
+            template <int, int> class DoFHandlerType1,
+            template <int, int> class DoFHandlerType2,
+            class InVector, class OutVector>
+  void
+  interpolate (const DoFHandlerType1<dim, spacedim> &dof1,
+               const InVector                       &u1,
+               const DoFHandlerType2<dim, spacedim> &dof2,
+               const ConstraintMatrix               &constraints,
+               OutVector                            &u2)
+  {
+    Assert(&dof1.get_triangulation()==&dof2.get_triangulation(), ExcTriangulationMismatch());
+
+    Assert(u1.size()==dof1.n_dofs(),
+           ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+    Assert(u2.size()==dof2.n_dofs(),
+           ExcDimensionMismatch(u2.size(), dof2.n_dofs()));
+
+#ifdef DEAL_II_WITH_PETSC
+    if (dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof1) != 0)
+        {
+          // if u1 is a parallel distributed
+          // PETSc vector, we check the local
+          // size of u1 for safety
+          Assert(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size() == dof1.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size(), dof1.n_locally_owned_dofs()));
+        };
+
+    if (dynamic_cast<PETScWrappers::MPI::Vector *>(&u2) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof2) != 0)
+        {
+          Assert(dynamic_cast<PETScWrappers::MPI::Vector *>(&u2)->local_size() == dof2.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<PETScWrappers::MPI::Vector *>(&u2)->local_size(), dof2.n_locally_owned_dofs()));
+        };
+#endif
+    // allocate vectors at maximal
+    // size. will be reinited in inner
+    // cell, but Vector makes sure that
+    // this does not lead to
+    // reallocation of memory
+    Vector<typename OutVector::value_type> u1_local(DoFTools::max_dofs_per_cell(dof1));
+    Vector<typename OutVector::value_type> u2_local(DoFTools::max_dofs_per_cell(dof2));
+
+    // have a map for interpolation
+    // matrices. shared_ptr make sure
+    // that memory is released again
+    std::map<const FiniteElement<dim,spacedim> *,
+        std::map<const FiniteElement<dim,spacedim> *,
+        std_cxx11::shared_ptr<FullMatrix<double> > > >
+        interpolation_matrices;
+
+    typename DoFHandlerType1<dim,spacedim>::active_cell_iterator cell1 = dof1.begin_active(),
+                                                                 endc1 = dof1.end();
+    typename DoFHandlerType2<dim,spacedim>::active_cell_iterator cell2 = dof2.begin_active(),
+                                                                 endc2 = dof2.end();
+    (void)endc2;
+
+    std::vector<types::global_dof_index> dofs;
+    dofs.reserve (DoFTools::max_dofs_per_cell (dof2));
+
+    u2 = 0;
+    OutVector touch_count(u2);
+    touch_count = 0;
+
+    // for distributed triangulations,
+    // we can only interpolate u1 on
+    // a cell, which this processor owns,
+    // so we have to know the subdomain_id
+    const types::subdomain_id subdomain_id =
+      dof1.get_triangulation().locally_owned_subdomain();
+
+    for (; cell1!=endc1; ++cell1, ++cell2)
+      if ((cell1->subdomain_id() == subdomain_id)
+          ||
+          (subdomain_id == numbers::invalid_subdomain_id))
+        {
+          Assert(cell1->get_fe().n_components() == cell2->get_fe().n_components(),
+                 ExcDimensionMismatch (cell1->get_fe().n_components(),
+                                       cell2->get_fe().n_components()));
+
+          // for continuous elements on
+          // grids with hanging nodes we
+          // need hanging node
+          // constraints. Consequently,
+          // if there are no constraints
+          // then hanging nodes are not
+          // allowed.
+          const bool hanging_nodes_not_allowed
+            = ((cell2->get_fe().dofs_per_vertex != 0) &&
+               (constraints.n_constraints() == 0));
+
+          if (hanging_nodes_not_allowed)
+            for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+              Assert (cell1->at_boundary(face) ||
+                      cell1->neighbor(face)->level() == cell1->level(),
+                      ExcHangingNodesNotAllowed(0));
+
+
+          const unsigned int dofs_per_cell1 = cell1->get_fe().dofs_per_cell;
+          const unsigned int dofs_per_cell2 = cell2->get_fe().dofs_per_cell;
+          u1_local.reinit (dofs_per_cell1);
+          u2_local.reinit (dofs_per_cell2);
+
+          // check if interpolation
+          // matrix for this particular
+          // pair of elements is already
+          // there
+          if (interpolation_matrices[&cell1->get_fe()][&cell2->get_fe()].get() == 0)
+            {
+              std_cxx11::shared_ptr<FullMatrix<double> >
+              interpolation_matrix (new FullMatrix<double> (dofs_per_cell2,
+                                                            dofs_per_cell1));
+              interpolation_matrices[&cell1->get_fe()][&cell2->get_fe()]
+                = interpolation_matrix;
+
+              get_interpolation_matrix(cell1->get_fe(),
+                                       cell2->get_fe(),
+                                       *interpolation_matrix);
+            }
+
+          cell1->get_dof_values(u1, u1_local);
+          interpolation_matrices[&cell1->get_fe()][&cell2->get_fe()]
+          ->vmult(u2_local, u1_local);
+
+          dofs.resize (dofs_per_cell2);
+          cell2->get_dof_indices(dofs);
+
+          for (unsigned int i=0; i<dofs_per_cell2; ++i)
+            {
+              u2(dofs[i])+=u2_local(i);
+              touch_count(dofs[i]) += 1;
+            }
+        }
+    // cell1 is at the end, so should
+    // be cell2
+    Assert (cell2 == endc2, ExcInternalError());
+
+    u2.compress(VectorOperation::add);
+    touch_count.compress(VectorOperation::add);
+
+    // if we work on parallel distributed
+    // vectors, we have to ensure, that we only
+    // work on dofs this processor owns.
+    IndexSet  locally_owned_dofs = dof2.locally_owned_dofs();
+
+    // when a discontinuous element is
+    // interpolated to a continuous
+    // one, we take the mean values.
+    // for parallel vectors check,
+    // if this component is owned by
+    // this processor.
+    for (types::global_dof_index i=0; i<dof2.n_dofs(); ++i)
+      if (locally_owned_dofs.is_element(i))
+        {
+          Assert(touch_count(i) != typename OutVector::value_type(),
+                 ExcInternalError());
+          u2(i) /= touch_count(i);
+        }
+
+    // finish the work on parallel vectors
+    u2.compress(VectorOperation::insert);
+    // Apply hanging node constraints.
+    constraints.distribute(u2);
+  }
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void
+  back_interpolate(const DoFHandler<dim,spacedim>    &dof1,
+                   const InVector           &u1,
+                   const FiniteElement<dim,spacedim> &fe2,
+                   OutVector                &u1_interpolated)
+  {
+    Assert(dof1.get_fe().n_components() == fe2.n_components(),
+           ExcDimensionMismatch(dof1.get_fe().n_components(), fe2.n_components()));
+    Assert(u1.size()==dof1.n_dofs(), ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+    Assert(u1_interpolated.size()==dof1.n_dofs(),
+           ExcDimensionMismatch(u1_interpolated.size(), dof1.n_dofs()));
+
+#ifdef DEAL_II_WITH_PETSC
+    if (dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof1) != 0)
+        {
+          // if u1 is a parallel distributed
+          // PETSc vector, we check the local
+          // size of u1 for safety
+          Assert(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size() == dof1.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size(), dof1.n_locally_owned_dofs()));
+        };
+
+    if (dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_interpolated) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof1) != 0)
+        {
+          Assert(dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_interpolated)->local_size() == dof1.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_interpolated)->local_size(), dof1.n_locally_owned_dofs()));
+        };
+#endif
+
+    // For continuous elements on grids
+    // with hanging nodes we need
+    // hanging node
+    // constraints. Consequently, when
+    // the elements are continuous no
+    // hanging node constraints are
+    // allowed.
+    const bool hanging_nodes_not_allowed=
+      (dof1.get_fe().dofs_per_vertex != 0) || (fe2.dofs_per_vertex != 0);
+
+    const unsigned int dofs_per_cell1=dof1.get_fe().dofs_per_cell;
+
+    Vector<typename OutVector::value_type> u1_local(dofs_per_cell1);
+    Vector<typename OutVector::value_type> u1_int_local(dofs_per_cell1);
+
+    const types::subdomain_id subdomain_id =
+      dof1.get_triangulation().locally_owned_subdomain();
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell = dof1.begin_active(),
+                                                            endc = dof1.end();
+
+    FullMatrix<double> interpolation_matrix(dofs_per_cell1, dofs_per_cell1);
+    get_back_interpolation_matrix(dof1.get_fe(), fe2,
+                                  interpolation_matrix);
+    for (; cell!=endc; ++cell)
+      if ((cell->subdomain_id() == subdomain_id)
+          ||
+          (subdomain_id == numbers::invalid_subdomain_id))
+        {
+          if (hanging_nodes_not_allowed)
+            for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+              Assert (cell->at_boundary(face) ||
+                      cell->neighbor(face)->level() == cell->level(),
+                      ExcHangingNodesNotAllowed(0));
+
+          cell->get_dof_values(u1, u1_local);
+          interpolation_matrix.vmult(u1_int_local, u1_local);
+          cell->set_dof_values(u1_int_local, u1_interpolated);
+        }
+
+    // if we work on a parallel PETSc vector
+    // we have to finish the work
+    u1_interpolated.compress(VectorOperation::insert);
+  }
+
+
+
+  template <int dim,
+            template <int> class DoFHandlerType,
+            class InVector, class OutVector, int spacedim>
+  void
+  back_interpolate(const DoFHandlerType<dim>         &dof1,
+                   const InVector                    &u1,
+                   const FiniteElement<dim,spacedim> &fe2,
+                   OutVector                         &u1_interpolated)
+  {
+    Assert(u1.size() == dof1.n_dofs(),
+           ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+    Assert(u1_interpolated.size() == dof1.n_dofs(),
+           ExcDimensionMismatch(u1_interpolated.size(), dof1.n_dofs()));
+
+#ifdef DEAL_II_WITH_PETSC
+    if (dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof1) != 0)
+        {
+          // if u1 is a parallel distributed
+          // PETSc vector, we check the local
+          // size of u1 for safety
+          Assert(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size() == dof1.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size(), dof1.n_locally_owned_dofs()));
+        };
+
+    if (dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_interpolated) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof1) != 0)
+        {
+          Assert(dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_interpolated)->local_size() == dof1.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_interpolated)->local_size(), dof1.n_locally_owned_dofs()));
+        };
+#endif
+
+    Vector<typename OutVector::value_type> u1_local(DoFTools::max_dofs_per_cell(dof1));
+    Vector<typename OutVector::value_type> u1_int_local(DoFTools::max_dofs_per_cell(dof1));
+
+    const types::subdomain_id subdomain_id =
+      dof1.get_triangulation().locally_owned_subdomain();
+
+    typename DoFHandlerType<dim>::active_cell_iterator cell = dof1.begin_active(),
+                                                       endc = dof1.end();
+
+    // map from possible fe objects in
+    // dof1 to the back_interpolation
+    // matrices
+    std::map<const FiniteElement<dim> *,
+        std_cxx11::shared_ptr<FullMatrix<double> > > interpolation_matrices;
+
+    for (; cell!=endc; ++cell)
+      if ((cell->subdomain_id() == subdomain_id)
+          ||
+          (subdomain_id == numbers::invalid_subdomain_id))
+        {
+          Assert(cell->get_fe().n_components() == fe2.n_components(),
+                 ExcDimensionMismatch(cell->get_fe().n_components(),
+                                      fe2.n_components()));
+
+          // For continuous elements on
+          // grids with hanging nodes we
+          // need hanging node
+          // constraints. Consequently,
+          // when the elements are
+          // continuous no hanging node
+          // constraints are allowed.
+          const bool hanging_nodes_not_allowed=
+            (cell->get_fe().dofs_per_vertex != 0) || (fe2.dofs_per_vertex != 0);
+
+          if (hanging_nodes_not_allowed)
+            for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+              Assert (cell->at_boundary(face) ||
+                      cell->neighbor(face)->level() == cell->level(),
+                      ExcHangingNodesNotAllowed(0));
+
+          const unsigned int dofs_per_cell1 = cell->get_fe().dofs_per_cell;
+
+          // make sure back_interpolation
+          // matrix is available
+          if (interpolation_matrices[&cell->get_fe()] != 0)
+            {
+              interpolation_matrices[&cell->get_fe()] =
+                std_cxx11::shared_ptr<FullMatrix<double> >
+                (new FullMatrix<double>(dofs_per_cell1, dofs_per_cell1));
+              get_back_interpolation_matrix(dof1.get_fe(), fe2,
+                                            *interpolation_matrices[&cell->get_fe()]);
+            }
+
+          u1_local.reinit (dofs_per_cell1);
+          u1_int_local.reinit (dofs_per_cell1);
+
+          cell->get_dof_values(u1, u1_local);
+          interpolation_matrices[&cell->get_fe()]->vmult(u1_int_local, u1_local);
+          cell->set_dof_values(u1_int_local, u1_interpolated);
+        };
+
+    // if we work on a parallel PETSc vector
+    // we have to finish the work
+    u1_interpolated.compress(VectorOperation::insert);
+  }
+
+
+
+  namespace internal
+  {
+    namespace
+    {
+      template <int dim, int spacedim, class InVector>
+      void back_interpolate (const DoFHandler<dim,spacedim> &dof1,
+                             const ConstraintMatrix &constraints1,
+                             const InVector &u1,
+                             const DoFHandler<dim,spacedim> &dof2,
+                             const ConstraintMatrix &constraints2,
+                             InVector &u1_interpolated)
+      {
+        Vector<typename InVector::value_type> u2(dof2.n_dofs());
+        interpolate(dof1, u1, dof2, constraints2, u2);
+        interpolate(dof2, u2, dof1, constraints1, u1_interpolated);
+      }
+
+      // special version for PETSc
+#ifdef DEAL_II_WITH_PETSC
+      template <int dim, int spacedim>
+      void back_interpolate (const DoFHandler<dim,spacedim> &dof1,
+                             const ConstraintMatrix &constraints1,
+                             const PETScWrappers::MPI::Vector &u1,
+                             const DoFHandler<dim,spacedim> &dof2,
+                             const ConstraintMatrix &constraints2,
+                             PETScWrappers::MPI::Vector &u1_interpolated)
+      {
+        // if u1 is a parallel distributed PETSc vector, we create a
+        // vector u2 with based on the sets of locally owned and relevant
+        // dofs of dof2
+        IndexSet  dof2_locally_owned_dofs = dof2.locally_owned_dofs();
+        IndexSet  dof2_locally_relevant_dofs;
+        DoFTools::extract_locally_relevant_dofs (dof2,
+                                                 dof2_locally_relevant_dofs);
+
+        PETScWrappers::MPI::Vector  u2_out (dof2_locally_owned_dofs,
+                                            u1.get_mpi_communicator());
+        interpolate(dof1, u1, dof2, constraints2, u2_out);
+        PETScWrappers::MPI::Vector  u2 (dof2_locally_owned_dofs,
+                                        dof2_locally_relevant_dofs,
+                                        u1.get_mpi_communicator());
+        u2 = u2_out;
+        interpolate(dof2, u2, dof1, constraints1, u1_interpolated);
+      }
+#endif
+
+      // special version for Trilinos
+#ifdef DEAL_II_WITH_TRILINOS
+      template <int dim, int spacedim>
+      void back_interpolate (const DoFHandler<dim,spacedim> &dof1,
+                             const ConstraintMatrix &constraints1,
+                             const TrilinosWrappers::MPI::Vector &u1,
+                             const DoFHandler<dim,spacedim> &dof2,
+                             const ConstraintMatrix &constraints2,
+                             TrilinosWrappers::MPI::Vector &u1_interpolated)
+      {
+        // if u1 is a parallel distributed Trilinos vector, we create a
+        // vector u2 with based on the sets of locally owned and relevant
+        // dofs of dof2
+        IndexSet  dof2_locally_owned_dofs = dof2.locally_owned_dofs();
+        IndexSet  dof2_locally_relevant_dofs;
+        DoFTools::extract_locally_relevant_dofs (dof2,
+                                                 dof2_locally_relevant_dofs);
+
+        TrilinosWrappers::MPI::Vector  u2_out (dof2_locally_owned_dofs,
+                                               u1.get_mpi_communicator());
+        interpolate(dof1, u1, dof2, constraints2, u2_out);
+        TrilinosWrappers::MPI::Vector  u2 (dof2_locally_owned_dofs,
+                                           dof2_locally_relevant_dofs,
+                                           u1.get_mpi_communicator());
+        u2 = u2_out;
+        interpolate(dof2, u2, dof1, constraints1, u1_interpolated);
+      }
+#endif
+
+      // special version for parallel::distributed::Vector
+      template <int dim, int spacedim, typename Number>
+      void back_interpolate (const DoFHandler<dim,spacedim> &dof1,
+                             const ConstraintMatrix &constraints1,
+                             const parallel::distributed::Vector<Number> &u1,
+                             const DoFHandler<dim,spacedim> &dof2,
+                             const ConstraintMatrix &constraints2,
+                             parallel::distributed::Vector<Number> &u1_interpolated)
+      {
+        IndexSet dof2_locally_owned_dofs = dof2.locally_owned_dofs();
+        IndexSet dof2_locally_relevant_dofs;
+        DoFTools::extract_locally_relevant_dofs (dof2,
+                                                 dof2_locally_relevant_dofs);
+
+        parallel::distributed::Vector<Number>
+        u2 (dof2_locally_owned_dofs,
+            dof2_locally_relevant_dofs,
+            u1.get_mpi_communicator());
+
+        interpolate(dof1, u1, dof2, constraints2, u2);
+        u2.update_ghost_values ();
+        interpolate(dof2, u2, dof1, constraints1, u1_interpolated);
+      }
+    }
+  }
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void back_interpolate(const DoFHandler<dim,spacedim> &dof1,
+                        const ConstraintMatrix &constraints1,
+                        const InVector &u1,
+                        const DoFHandler<dim,spacedim> &dof2,
+                        const ConstraintMatrix &constraints2,
+                        OutVector &u1_interpolated)
+  {
+    // For discontinuous elements without constraints take the simpler version
+    // of the back_interpolate function.
+    if (dof1.get_fe().dofs_per_vertex==0 && dof2.get_fe().dofs_per_vertex==0
+        && constraints1.n_constraints()==0 && constraints2.n_constraints()==0)
+      back_interpolate(dof1, u1, dof2.get_fe(), u1_interpolated);
+    else
+      {
+        Assert(dof1.get_fe().n_components() == dof2.get_fe().n_components(),
+               ExcDimensionMismatch(dof1.get_fe().n_components(), dof2.get_fe().n_components()));
+        Assert(u1.size()==dof1.n_dofs(), ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+        Assert(u1_interpolated.size()==dof1.n_dofs(),
+               ExcDimensionMismatch(u1_interpolated.size(), dof1.n_dofs()));
+
+        // For continuous elements first interpolate to dof2, taking into
+        // account constraints2, and then interpolate back to dof1 taking into
+        // account constraints1
+        internal::back_interpolate(dof1, constraints1, u1, dof2, constraints2,
+                                   u1_interpolated);
+      }
+  }
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void interpolation_difference (const DoFHandler<dim,spacedim> &dof1,
+                                 const InVector &u1,
+                                 const FiniteElement<dim,spacedim> &fe2,
+                                 OutVector &u1_difference)
+  {
+    Assert(dof1.get_fe().n_components() == fe2.n_components(),
+           ExcDimensionMismatch(dof1.get_fe().n_components(), fe2.n_components()));
+    Assert(u1.size()==dof1.n_dofs(), ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+    Assert(u1_difference.size()==dof1.n_dofs(),
+           ExcDimensionMismatch(u1_difference.size(), dof1.n_dofs()));
+
+#ifdef DEAL_II_WITH_PETSC
+    if (dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof1) != 0)
+        {
+          // if u1 is a parallel distributed
+          // PETSc vector, we check the local
+          // size of u1 for safety
+          Assert(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size() == dof1.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<const PETScWrappers::MPI::Vector *>(&u1)->local_size(), dof1.n_locally_owned_dofs()));
+        };
+
+    if (dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_difference) != 0)
+      if (dynamic_cast<const DoFHandler<dim>*>(&dof1) != 0)
+        {
+          Assert(dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_difference)->local_size() == dof1.n_locally_owned_dofs(),
+                 ExcDimensionMismatch(dynamic_cast<PETScWrappers::MPI::Vector *>(&u1_difference)->local_size(), dof1.n_locally_owned_dofs()));
+        };
+#endif
+
+    // For continuous elements on grids
+    // with hanging nodes we need
+    // hanging node
+    // constraints. Consequently, when
+    // the elements are continuous no
+    // hanging node constraints are
+    // allowed.
+    const bool hanging_nodes_not_allowed=
+      (dof1.get_fe().dofs_per_vertex != 0) || (fe2.dofs_per_vertex != 0);
+
+    const unsigned int dofs_per_cell=dof1.get_fe().dofs_per_cell;
+
+    Vector<typename OutVector::value_type> u1_local(dofs_per_cell);
+    Vector<typename OutVector::value_type> u1_diff_local(dofs_per_cell);
+
+    const types::subdomain_id subdomain_id =
+      dof1.get_triangulation().locally_owned_subdomain();
+
+    FullMatrix<double> difference_matrix(dofs_per_cell, dofs_per_cell);
+    get_interpolation_difference_matrix(dof1.get_fe(), fe2,
+                                        difference_matrix);
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell = dof1.begin_active(),
+                                                            endc = dof1.end();
+
+    for (; cell!=endc; ++cell)
+      if ((cell->subdomain_id() == subdomain_id)
+          ||
+          (subdomain_id == numbers::invalid_subdomain_id))
+        {
+          if (hanging_nodes_not_allowed)
+            for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+              Assert (cell->at_boundary(face) ||
+                      cell->neighbor(face)->level() == cell->level(),
+                      ExcHangingNodesNotAllowed(0));
+
+          cell->get_dof_values(u1, u1_local);
+          difference_matrix.vmult(u1_diff_local, u1_local);
+          cell->set_dof_values(u1_diff_local, u1_difference);
+        }
+
+    // if we work on a parallel PETSc vector
+    // we have to finish the work and
+    // update ghost values
+    u1_difference.compress(VectorOperation::insert);
+  }
+
+
+
+  namespace internal
+  {
+    namespace
+    {
+      template <int dim, class InVector, class OutVector, int spacedim>
+      void interpolation_difference (const DoFHandler<dim,spacedim> &dof1,
+                                     const ConstraintMatrix &constraints1,
+                                     const InVector &u1,
+                                     const DoFHandler<dim,spacedim> &dof2,
+                                     const ConstraintMatrix &constraints2,
+                                     OutVector &u1_difference)
+      {
+        back_interpolate(dof1, constraints1, u1, dof2, constraints2, u1_difference);
+        u1_difference.sadd(-1, u1);
+      }
+
+      // special version for Trilinos
+#ifdef DEAL_II_WITH_TRILINOS
+      template <int dim, int spacedim>
+      void interpolation_difference (const DoFHandler<dim,spacedim> &dof1,
+                                     const ConstraintMatrix &constraints1,
+                                     const TrilinosWrappers::MPI::Vector &u1,
+                                     const DoFHandler<dim,spacedim> &dof2,
+                                     const ConstraintMatrix &constraints2,
+                                     TrilinosWrappers::MPI::Vector &u1_difference)
+      {
+        back_interpolate(dof1, constraints1, u1, dof2, constraints2, u1_difference);
+
+        // Trilinos vectors with and without ghost entries are very different
+        // and we cannot use the sadd function directly, so we have to create
+        // a completely distributed vector first and copy the local entries
+        // from the vector with ghost entries
+        TrilinosWrappers::MPI::Vector  u1_completely_distributed (u1_difference.vector_partitioner ());
+
+        u1_completely_distributed = u1;
+
+        u1_difference.sadd(-1, u1_completely_distributed);
+      }
+#endif
+    }
+  }
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void interpolation_difference(const DoFHandler<dim,spacedim> &dof1,
+                                const ConstraintMatrix &constraints1,
+                                const InVector &u1,
+                                const DoFHandler<dim,spacedim> &dof2,
+                                const ConstraintMatrix &constraints2,
+                                OutVector &u1_difference)
+  {
+    // For discontinuous elements
+    // without constraints take the
+    // cheaper version of the
+    // interpolation_difference function.
+    if (dof1.get_fe().dofs_per_vertex==0 && dof2.get_fe().dofs_per_vertex==0
+        && constraints1.n_constraints()==0 && constraints2.n_constraints()==0)
+      interpolation_difference(dof1, u1, dof2.get_fe(), u1_difference);
+    else
+      {
+        internal::interpolation_difference(dof1, constraints1, u1, dof2, constraints2, u1_difference);
+      }
+  }
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void project_dg(const DoFHandler<dim,spacedim> &dof1,
+                  const InVector &u1,
+                  const DoFHandler<dim,spacedim> &dof2,
+                  OutVector &u2)
+  {
+    Assert(&dof1.get_triangulation()==&dof2.get_triangulation(), ExcTriangulationMismatch());
+    Assert(dof1.get_fe().n_components() == dof2.get_fe().n_components(),
+           ExcDimensionMismatch(dof1.get_fe().n_components(), dof2.get_fe().n_components()));
+    Assert(u1.size()==dof1.n_dofs(), ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+    Assert(u2.size()==dof2.n_dofs(), ExcDimensionMismatch(u2.size(), dof2.n_dofs()));
+
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell1 = dof1.begin_active();
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell2 = dof2.begin_active();
+    typename DoFHandler<dim,spacedim>::active_cell_iterator end = dof2.end();
+
+    const unsigned int n1 = dof1.get_fe().dofs_per_cell;
+    const unsigned int n2 = dof2.get_fe().dofs_per_cell;
+
+    Vector<double> u1_local(n1);
+    Vector<double> u2_local(n2);
+    std::vector<types::global_dof_index> dofs(n2);
+
+    FullMatrix<double> matrix(n2,n1);
+    get_projection_matrix(dof1.get_fe(), dof2.get_fe(), matrix);
+
+    u2 = 0;
+    while (cell2 != end)
+      {
+        cell1->get_dof_values(u1, u1_local);
+        matrix.vmult(u2_local, u1_local);
+        cell2->get_dof_indices(dofs);
+        for (unsigned int i=0; i<n2; ++i)
+          {
+            u2(dofs[i])+=u2_local(i);
+          }
+
+        ++cell1;
+        ++cell2;
+      }
+  }
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void extrapolate(const DoFHandler<dim,spacedim> &dof1,
+                   const InVector &u1,
+                   const DoFHandler<dim,spacedim> &dof2,
+                   OutVector &u2)
+  {
+    ConstraintMatrix dummy;
+    dummy.close();
+    extrapolate(dof1, u1, dof2, dummy, u2);
+  }
+
+
+
+  template <int dim, class InVector, class OutVector, int spacedim>
+  void extrapolate(const DoFHandler<dim,spacedim> &dof1,
+                   const InVector &u1,
+                   const DoFHandler<dim,spacedim> &dof2,
+                   const ConstraintMatrix &constraints,
+                   OutVector &u2)
+  {
+    Assert(dof1.get_fe().n_components() == dof2.get_fe().n_components(),
+           ExcDimensionMismatch(dof1.get_fe().n_components(), dof2.get_fe().n_components()));
+    Assert(&dof1.get_triangulation()==&dof2.get_triangulation(), ExcTriangulationMismatch());
+    Assert(u1.size()==dof1.n_dofs(), ExcDimensionMismatch(u1.size(), dof1.n_dofs()));
+    Assert(u2.size()==dof2.n_dofs(), ExcDimensionMismatch(u2.size(), dof2.n_dofs()));
+
+    OutVector u3;
+    u3.reinit(u2);
+    interpolate(dof1, u1, dof2, constraints, u3);
+
+    const unsigned int dofs_per_cell  = dof2.get_fe().dofs_per_cell;
+    Vector<typename OutVector::value_type> dof_values(dofs_per_cell);
+
+    // make sure that each cell on the
+    // coarsest level is at least once
+    // refined. otherwise, we can't
+    // treat these cells and would
+    // generate a bogus result
+    {
+      typename DoFHandler<dim,spacedim>::cell_iterator cell = dof2.begin(0),
+                                                       endc = dof2.end(0);
+      for (; cell!=endc; ++cell)
+        Assert (cell->has_children(), ExcGridNotRefinedAtLeastOnce());
+    }
+
+    // then traverse grid bottom up
+    for (unsigned int level=0; level<dof1.get_triangulation().n_levels()-1; ++level)
+      {
+        typename DoFHandler<dim,spacedim>::cell_iterator cell=dof2.begin(level),
+                                                         endc=dof2.end(level);
+
+        for (; cell!=endc; ++cell)
+          if (!cell->active())
+            {
+              // check whether this
+              // cell has active
+              // children
+              bool active_children=false;
+              for (unsigned int child_n=0; child_n<cell->n_children(); ++child_n)
+                if (cell->child(child_n)->active())
+                  {
+                    active_children=true;
+                    break;
+                  }
+
+              // if there are active
+              // children, the we have
+              // to work on this
+              // cell. get the data
+              // from the one vector
+              // and set it on the
+              // other
+              if (active_children)
+                {
+                  cell->get_interpolated_dof_values(u3, dof_values);
+                  cell->set_dof_values_by_interpolation(dof_values, u2);
+                }
+            }
+      }
+
+    // Apply hanging node constraints.
+    constraints.distribute(u2);
+  }
+
+} // end of namespace FETools
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "fe_tools_interpolate.inst"
+
+
+/*----------------------------   fe_tools.cc     ---------------------------*/
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_tools_interpolate.inst.in b/source/fe/fe_tools_interpolate.inst.in
new file mode 100644
index 0000000..d88909b
--- /dev/null
+++ b/source/fe/fe_tools_interpolate.inst.in
@@ -0,0 +1,106 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; Vector : SERIAL_VECTORS)
+  {
+    namespace FETools
+      \{
+#if deal_II_dimension <= deal_II_space_dimension
+      template
+	void interpolate<deal_II_dimension,deal_II_space_dimension>
+	(const DoFHandler<deal_II_dimension,deal_II_space_dimension> &, const Vector &,
+	 const DoFHandler<deal_II_dimension,deal_II_space_dimension> &, Vector &);
+
+      template
+	void interpolate<deal_II_dimension,deal_II_space_dimension>
+	(const DoFHandler<deal_II_dimension,deal_II_space_dimension> &, const Vector &,
+	 const DoFHandler<deal_II_dimension,deal_II_space_dimension> &, const ConstraintMatrix &,
+	 Vector &);
+#endif
+      \}
+  }
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+    namespace FETools
+      \{
+#if deal_II_dimension == deal_II_space_dimension
+      template
+	void interpolate<deal_II_dimension>
+	(const hp::DoFHandler<deal_II_dimension> &, const Vector<double> &,
+	 const hp::DoFHandler<deal_II_dimension> &, Vector<double> &);
+      template
+	void interpolate<deal_II_dimension>
+	(const hp::DoFHandler<deal_II_dimension> &, const Vector<double> &,
+	 const hp::DoFHandler<deal_II_dimension> &, const ConstraintMatrix &,
+	 Vector<double> &);
+      template
+	void interpolate<deal_II_dimension>
+	(const hp::DoFHandler<deal_II_dimension> &, const Vector<float> &,
+	 const hp::DoFHandler<deal_II_dimension> &, Vector<float> &);
+      template
+	void interpolate<deal_II_dimension>
+	(const hp::DoFHandler<deal_II_dimension> &, const Vector<float> &,
+	 const hp::DoFHandler<deal_II_dimension> &, const ConstraintMatrix &,
+	 Vector<float> &);
+#endif
+      \}
+  }
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; VEC : SERIAL_VECTORS)
+  {
+    namespace FETools
+      \{
+#if deal_II_dimension == deal_II_space_dimension
+     template
+	void back_interpolate<deal_II_dimension>
+	(const DoFHandler<deal_II_dimension> &, const VEC &,
+	 const FiniteElement<deal_II_dimension> &, VEC &);
+      template
+	void back_interpolate<deal_II_dimension>
+	(const DoFHandler<deal_II_dimension> &, const ConstraintMatrix &,
+	 const VEC &,
+	 const DoFHandler<deal_II_dimension> &, const ConstraintMatrix &,
+	 VEC &);
+      template
+	void interpolation_difference<deal_II_dimension>
+	(const DoFHandler<deal_II_dimension> &, const VEC &,
+	 const FiniteElement<deal_II_dimension> &, VEC &);
+      template
+	void interpolation_difference<deal_II_dimension>
+	(const DoFHandler<deal_II_dimension> &, const ConstraintMatrix &,
+	 const VEC &,
+	 const DoFHandler<deal_II_dimension> &, const ConstraintMatrix &,
+	 VEC &);
+      template
+	void project_dg<deal_II_dimension>
+	(const DoFHandler<deal_II_dimension> &, const VEC &,
+	 const DoFHandler<deal_II_dimension> &, VEC &);
+      template
+	void extrapolate<deal_II_dimension>
+	(const DoFHandler<deal_II_dimension> &, const VEC &,
+	 const DoFHandler<deal_II_dimension> &, VEC &);
+      template
+	void extrapolate<deal_II_dimension>
+	(const DoFHandler<deal_II_dimension> &, const VEC &,
+	 const DoFHandler<deal_II_dimension> &, const ConstraintMatrix &,
+	 VEC &);
+#endif
+      \}
+  }
diff --git a/source/fe/fe_trace.cc b/source/fe/fe_trace.cc
new file mode 100644
index 0000000..87ed272
--- /dev/null
+++ b/source/fe/fe_trace.cc
@@ -0,0 +1,252 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/config.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/fe/fe_poly_face.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe_trace.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <int dim, int spacedim>
+FE_TraceQ<dim,spacedim>::FE_TraceQ (const unsigned int degree)
+  :
+  FE_PolyFace<TensorProductPolynomials<dim-1>, dim, spacedim> (
+    TensorProductPolynomials<dim-1>(Polynomials::LagrangeEquidistant::generate_complete_basis(degree)),
+    FiniteElementData<dim>(get_dpo_vector(degree), 1, degree, FiniteElementData<dim>::L2),
+    std::vector<bool>(1,true)),
+  fe_q (degree)
+{
+  Assert (degree > 0,
+          ExcMessage ("FE_Trace can only be used for polynomial degrees "
+                      "greater than zero"));
+  std::vector<unsigned int> renumber (this->dofs_per_face);
+  FETools::hierarchic_to_lexicographic_numbering<dim-1> (degree, renumber);
+  this->poly_space.set_numbering(renumber);
+
+  // Initialize face support points
+  this->unit_face_support_points = fe_q.get_unit_face_support_points();
+  // Initialize constraint matrices
+  this->interface_constraints = fe_q.constraints();
+}
+
+
+
+template <int dim, int spacedim>
+FiniteElement<dim,spacedim> *
+FE_TraceQ<dim,spacedim>::clone() const
+{
+  return new FE_TraceQ<dim,spacedim>(this->degree);
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+FE_TraceQ<dim,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+
+  std::ostringstream namebuf;
+  namebuf << "FE_TraceQ<"
+          << Utilities::dim_string(dim,spacedim)
+          << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_TraceQ<dim,spacedim>::has_support_on_face (const unsigned int shape_index,
+                                              const unsigned int face_index) const
+{
+  Assert (shape_index < this->dofs_per_cell,
+          ExcIndexRange (shape_index, 0, this->dofs_per_cell));
+  Assert (face_index < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_index, 0, GeometryInfo<dim>::faces_per_cell));
+
+  // FE_TraceQ shares the numbering of elemental degrees of freedom with FE_Q
+  // except for the missing interior ones (quad dofs in 2D and hex dofs in
+  // 3D). Therefore, it is safe to ask fe_q for the corresponding
+  // information. The assertion 'shape_index < this->dofs_per_cell' will make
+  // sure that we only access the trace dofs.
+  return fe_q.has_support_on_face (shape_index, face_index);
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<Table<2,bool>, std::vector<unsigned int> >
+FE_TraceQ<dim,spacedim>::get_constant_modes () const
+{
+  Table<2,bool> constant_modes(1, this->dofs_per_cell);
+  for (unsigned int i=0; i<this->dofs_per_cell; ++i)
+    constant_modes(0,i) = true;
+  return std::pair<Table<2,bool>, std::vector<unsigned int> >
+         (constant_modes, std::vector<unsigned int>(1, 0));
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<unsigned int>
+FE_TraceQ<dim,spacedim>::get_dpo_vector (const unsigned int deg)
+{
+  // This constructs FE_TraceQ in exactly the same way as FE_Q except for the
+  // interior degrees of freedom that are not present here (line in 1D, quad
+  // in 2D, hex in 3D).
+  AssertThrow(deg>0,ExcMessage("FE_TraceQ needs to be of degree > 0."));
+  std::vector<unsigned int> dpo(dim+1, 1U);
+  dpo[dim]=0;
+  dpo[0]=1;
+  for (unsigned int i=1; i<dim; ++i)
+    dpo[i] = dpo[i-1]*(deg-1);
+  return dpo;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+FE_TraceQ<dim,spacedim>::hp_constraints_are_implemented () const
+{
+  return fe_q.hp_constraints_are_implemented ();
+}
+
+
+template <int dim, int spacedim>
+FiniteElementDomination::Domination
+FE_TraceQ<dim,spacedim>::
+compare_for_face_domination (const FiniteElement<dim,spacedim> &fe_other) const
+{
+  if (const FE_TraceQ<dim,spacedim> *fe_q_other
+      = dynamic_cast<const FE_TraceQ<dim,spacedim>*>(&fe_other))
+    {
+      if (this->degree < fe_q_other->degree)
+        return FiniteElementDomination::this_element_dominates;
+      else if (this->degree == fe_q_other->degree)
+        return FiniteElementDomination::either_element_can_dominate;
+      else
+        return FiniteElementDomination::other_element_dominates;
+    }
+  else if (const FE_Nothing<dim> *fe_nothing = dynamic_cast<const FE_Nothing<dim>*>(&fe_other))
+    {
+      if (fe_nothing->is_dominating())
+        {
+          return FiniteElementDomination::other_element_dominates;
+        }
+      else
+        {
+          // the FE_Nothing has no degrees of freedom and it is typically used in
+          // a context where we don't require any continuity along the interface
+          return FiniteElementDomination::no_requirements;
+        }
+    }
+
+  Assert (false, ExcNotImplemented());
+  return FiniteElementDomination::neither_element_dominates;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_TraceQ<dim,spacedim>::
+get_face_interpolation_matrix (const FiniteElement<dim,spacedim> &source_fe,
+                               FullMatrix<double>       &interpolation_matrix) const
+{
+  get_subface_interpolation_matrix (source_fe, numbers::invalid_unsigned_int,
+                                    interpolation_matrix);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FE_TraceQ<dim,spacedim>::
+get_subface_interpolation_matrix (const FiniteElement<dim,spacedim> &x_source_fe,
+                                  const unsigned int        subface,
+                                  FullMatrix<double>       &interpolation_matrix) const
+{
+  // this is the code from FE_FaceQ
+  Assert (interpolation_matrix.n() == this->dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.n(),
+                                this->dofs_per_face));
+  Assert (interpolation_matrix.m() == x_source_fe.dofs_per_face,
+          ExcDimensionMismatch (interpolation_matrix.m(),
+                                x_source_fe.dofs_per_face));
+
+  // see if source is a FaceQ element
+  if (const FE_TraceQ<dim,spacedim> *source_fe
+      = dynamic_cast<const FE_TraceQ<dim,spacedim> *>(&x_source_fe))
+    {
+      fe_q.get_subface_interpolation_matrix (source_fe->fe_q, subface, interpolation_matrix);
+    }
+  else if (dynamic_cast<const FE_Nothing<dim> *>(&x_source_fe) != 0)
+    {
+      // nothing to do here, the FE_Nothing has no degrees of freedom anyway
+    }
+  else
+    AssertThrow (false,(typename FiniteElement<dim,spacedim>::
+                        ExcInterpolationNotImplemented()));
+}
+
+
+
+template <int spacedim>
+FE_TraceQ<1,spacedim>::FE_TraceQ (const unsigned int degree)
+  :
+  FE_FaceQ<1,spacedim> (degree)
+{}
+
+
+
+template <int spacedim>
+std::string
+FE_TraceQ<1,spacedim>::get_name () const
+{
+  // note that the FETools::get_fe_from_name function depends on the
+  // particular format of the string this function returns, so they have to be
+  // kept in synch
+  std::ostringstream namebuf;
+  namebuf << "FE_TraceQ<"
+          << Utilities::dim_string(1,spacedim)
+          << ">(" << this->degree << ")";
+
+  return namebuf.str();
+}
+
+
+
+// explicit instantiations
+#include "fe_trace.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_trace.inst.in b/source/fe/fe_trace.inst.in
new file mode 100644
index 0000000..22aaa5a
--- /dev/null
+++ b/source/fe/fe_trace.inst.in
@@ -0,0 +1,19 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class FE_TraceQ<deal_II_dimension>;
+  }
diff --git a/source/fe/fe_values.cc b/source/fe/fe_values.cc
new file mode 100644
index 0000000..4392910
--- /dev/null
+++ b/source/fe/fe_values.cc
@@ -0,0 +1,4259 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/signaling_nan.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe.h>
+
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  template <class VectorType>
+  typename VectorType::value_type
+  get_vector_element (const VectorType &vector,
+                      const types::global_dof_index cell_number)
+  {
+    return vector[cell_number];
+  }
+
+
+  IndexSet::value_type
+  get_vector_element (const IndexSet &is,
+                      const types::global_dof_index cell_number)
+  {
+    return (is.is_element(cell_number) ? 1 : 0);
+  }
+}
+
+
+namespace
+{
+  template <int dim, int spacedim>
+  inline
+  std::vector<unsigned int>
+  make_shape_function_to_row_table (const FiniteElement<dim,spacedim> &fe)
+  {
+    std::vector<unsigned int> shape_function_to_row_table (fe.dofs_per_cell * fe.n_components(),
+                                                           numbers::invalid_unsigned_int);
+    unsigned int row = 0;
+    for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+      {
+        // loop over all components that are nonzero for this particular
+        // shape function. if a component is zero then we leave the
+        // value in the table unchanged (at the invalid value)
+        // otherwise it is mapped to the next free entry
+        unsigned int nth_nonzero_component = 0;
+        for (unsigned int c=0; c<fe.n_components(); ++c)
+          if (fe.get_nonzero_components(i)[c] == true)
+            {
+              shape_function_to_row_table[i*fe.n_components()+c] = row + nth_nonzero_component;
+              ++nth_nonzero_component;
+            }
+        row += fe.n_nonzero_components (i);
+      }
+
+    return shape_function_to_row_table;
+  }
+}
+
+
+
+namespace FEValuesViews
+{
+  template <int dim, int spacedim>
+  Scalar<dim,spacedim>::Scalar (const FEValuesBase<dim,spacedim> &fe_values,
+                                const unsigned int                component)
+    :
+    fe_values (fe_values),
+    component (component),
+    shape_function_data (fe_values.fe->dofs_per_cell)
+  {
+    Assert (component < fe_values.fe->n_components(),
+            ExcIndexRange(component, 0, fe_values.fe->n_components()));
+
+//TODO: we'd like to use the fields with the same name as these
+// variables from FEValuesBase, but they aren't initialized yet
+// at the time we get here, so re-create it all
+    const std::vector<unsigned int> shape_function_to_row_table
+      = make_shape_function_to_row_table (*fe_values.fe);
+
+    for (unsigned int i=0; i<fe_values.fe->dofs_per_cell; ++i)
+      {
+        const bool is_primitive = (fe_values.fe->is_primitive() ||
+                                   fe_values.fe->is_primitive(i));
+
+        if (is_primitive == true)
+          shape_function_data[i].is_nonzero_shape_function_component
+            = (component ==
+               fe_values.fe->system_to_component_index(i).first);
+        else
+          shape_function_data[i].is_nonzero_shape_function_component
+            = (fe_values.fe->get_nonzero_components(i)[component]
+               == true);
+
+        if (shape_function_data[i].is_nonzero_shape_function_component == true)
+          shape_function_data[i].row_index
+            = shape_function_to_row_table[i*fe_values.fe->n_components()+component];
+        else
+          shape_function_data[i].row_index = numbers::invalid_unsigned_int;
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  Scalar<dim,spacedim>::Scalar ()
+    :
+    fe_values (*static_cast<dealii::FEValuesBase<dim,spacedim>*>(0)),
+    component (numbers::invalid_unsigned_int)
+  {}
+
+
+  template <int dim, int spacedim>
+  Scalar<dim,spacedim> &
+  Scalar<dim,spacedim>::operator= (const Scalar<dim,spacedim> &)
+  {
+    // we shouldn't be copying these objects
+    Assert (false, ExcInternalError());
+    return *this;
+  }
+
+
+
+  template <int dim, int spacedim>
+  Vector<dim,spacedim>::Vector (const FEValuesBase<dim,spacedim> &fe_values,
+                                const unsigned int       first_vector_component)
+    :
+    fe_values (fe_values),
+    first_vector_component (first_vector_component),
+    shape_function_data (fe_values.fe->dofs_per_cell)
+  {
+    Assert (first_vector_component+spacedim-1 < fe_values.fe->n_components(),
+            ExcIndexRange(first_vector_component+spacedim-1, 0,
+                          fe_values.fe->n_components()));
+
+//TODO: we'd like to use the fields with the same name as these
+// variables from FEValuesBase, but they aren't initialized yet
+// at the time we get here, so re-create it all
+    const std::vector<unsigned int> shape_function_to_row_table
+      = make_shape_function_to_row_table (*fe_values.fe);
+
+    for (unsigned int d=0; d<spacedim; ++d)
+      {
+        const unsigned int component = first_vector_component + d;
+
+        for (unsigned int i=0; i<fe_values.fe->dofs_per_cell; ++i)
+          {
+            const bool is_primitive = (fe_values.fe->is_primitive() ||
+                                       fe_values.fe->is_primitive(i));
+
+            if (is_primitive == true)
+              shape_function_data[i].is_nonzero_shape_function_component[d]
+                = (component ==
+                   fe_values.fe->system_to_component_index(i).first);
+            else
+              shape_function_data[i].is_nonzero_shape_function_component[d]
+                = (fe_values.fe->get_nonzero_components(i)[component]
+                   == true);
+
+            if (shape_function_data[i].is_nonzero_shape_function_component[d]
+                == true)
+              shape_function_data[i].row_index[d]
+                = shape_function_to_row_table[i*fe_values.fe->n_components()+component];
+            else
+              shape_function_data[i].row_index[d]
+                = numbers::invalid_unsigned_int;
+          }
+      }
+
+    for (unsigned int i=0; i<fe_values.fe->dofs_per_cell; ++i)
+      {
+        unsigned int n_nonzero_components = 0;
+        for (unsigned int d=0; d<spacedim; ++d)
+          if (shape_function_data[i].is_nonzero_shape_function_component[d]
+              == true)
+            ++n_nonzero_components;
+
+        if (n_nonzero_components == 0)
+          shape_function_data[i].single_nonzero_component = -2;
+        else if (n_nonzero_components > 1)
+          shape_function_data[i].single_nonzero_component = -1;
+        else
+          {
+            for (unsigned int d=0; d<spacedim; ++d)
+              if (shape_function_data[i].is_nonzero_shape_function_component[d]
+                  == true)
+                {
+                  shape_function_data[i].single_nonzero_component
+                    = shape_function_data[i].row_index[d];
+                  shape_function_data[i].single_nonzero_component_index
+                    = d;
+                  break;
+                }
+          }
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  Vector<dim,spacedim>::Vector ()
+    :
+    fe_values (*static_cast<dealii::FEValuesBase<dim,spacedim>*>(0)),
+    first_vector_component (numbers::invalid_unsigned_int)
+  {}
+
+
+
+  template <int dim, int spacedim>
+  Vector<dim,spacedim> &
+  Vector<dim,spacedim>::operator= (const Vector<dim,spacedim> &)
+  {
+    // we shouldn't be copying these objects
+    Assert (false, ExcInternalError());
+    return *this;
+  }
+
+
+  template <int dim, int spacedim>
+  SymmetricTensor<2, dim, spacedim>::
+  SymmetricTensor(const FEValuesBase<dim, spacedim> &fe_values,
+                  const unsigned int first_tensor_component)
+    :
+    fe_values(fe_values),
+    first_tensor_component(first_tensor_component),
+    shape_function_data(fe_values.fe->dofs_per_cell)
+  {
+    Assert(first_tensor_component + (dim*dim+dim)/2 - 1
+           <
+           fe_values.fe->n_components(),
+           ExcIndexRange(first_tensor_component +
+                         dealii::SymmetricTensor<2,dim>::n_independent_components - 1,
+                         0,
+                         fe_values.fe->n_components()));
+//TODO: we'd like to use the fields with the same name as these
+// variables from FEValuesBase, but they aren't initialized yet
+// at the time we get here, so re-create it all
+    const std::vector<unsigned int> shape_function_to_row_table
+      = make_shape_function_to_row_table (*fe_values.fe);
+
+    for (unsigned int d = 0; d < dealii::SymmetricTensor<2,dim>::n_independent_components; ++d)
+      {
+        const unsigned int component = first_tensor_component + d;
+
+        for (unsigned int i = 0; i < fe_values.fe->dofs_per_cell; ++i)
+          {
+            const bool is_primitive = (fe_values.fe->is_primitive() ||
+                                       fe_values.fe->is_primitive(i));
+
+            if (is_primitive == true)
+              shape_function_data[i].is_nonzero_shape_function_component[d]
+                = (component ==
+                   fe_values.fe->system_to_component_index(i).first);
+            else
+              shape_function_data[i].is_nonzero_shape_function_component[d]
+                = (fe_values.fe->get_nonzero_components(i)[component]
+                   == true);
+
+            if (shape_function_data[i].is_nonzero_shape_function_component[d]
+                == true)
+              shape_function_data[i].row_index[d]
+                = shape_function_to_row_table[i*fe_values.fe->n_components()+component];
+            else
+              shape_function_data[i].row_index[d]
+                = numbers::invalid_unsigned_int;
+          }
+      }
+
+    for (unsigned int i = 0; i < fe_values.fe->dofs_per_cell; ++i)
+      {
+        unsigned int n_nonzero_components = 0;
+        for (unsigned int d = 0; d < dealii::SymmetricTensor<2,dim>::n_independent_components; ++d)
+          if (shape_function_data[i].is_nonzero_shape_function_component[d]
+              == true)
+            ++n_nonzero_components;
+
+        if (n_nonzero_components == 0)
+          shape_function_data[i].single_nonzero_component = -2;
+        else if (n_nonzero_components > 1)
+          shape_function_data[i].single_nonzero_component = -1;
+        else
+          {
+            for (unsigned int d = 0; d < dealii::SymmetricTensor<2,dim>::n_independent_components; ++d)
+              if (shape_function_data[i].is_nonzero_shape_function_component[d]
+                  == true)
+                {
+                  shape_function_data[i].single_nonzero_component
+                    = shape_function_data[i].row_index[d];
+                  shape_function_data[i].single_nonzero_component_index
+                    = d;
+                  break;
+                }
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  SymmetricTensor<2, dim, spacedim>::SymmetricTensor()
+    :
+    fe_values(*static_cast<dealii::FEValuesBase<dim, spacedim>*> (0)),
+    first_tensor_component(numbers::invalid_unsigned_int)
+  {}
+
+
+
+  template <int dim, int spacedim>
+  SymmetricTensor<2, dim, spacedim> &
+  SymmetricTensor<2, dim, spacedim>::operator=(const SymmetricTensor<2, dim, spacedim> &)
+  {
+    // we shouldn't be copying these objects
+    Assert(false, ExcInternalError());
+    return *this;
+  }
+
+
+  template <int dim, int spacedim>
+  Tensor<2, dim, spacedim>::
+  Tensor(const FEValuesBase<dim, spacedim> &fe_values,
+         const unsigned int first_tensor_component)
+    :
+    fe_values(fe_values),
+    first_tensor_component(first_tensor_component),
+    shape_function_data(fe_values.fe->dofs_per_cell)
+  {
+    Assert(first_tensor_component + dim*dim - 1
+           <
+           fe_values.fe->n_components(),
+           ExcIndexRange(first_tensor_component +
+                         dim*dim - 1,
+                         0,
+                         fe_values.fe->n_components()));
+//TODO: we'd like to use the fields with the same name as these
+// variables from FEValuesBase, but they aren't initialized yet
+// at the time we get here, so re-create it all
+    const std::vector<unsigned int> shape_function_to_row_table
+      = make_shape_function_to_row_table (*fe_values.fe);
+
+    for (unsigned int d = 0; d < dim*dim; ++d)
+      {
+        const unsigned int component = first_tensor_component + d;
+
+        for (unsigned int i = 0; i < fe_values.fe->dofs_per_cell; ++i)
+          {
+            const bool is_primitive = (fe_values.fe->is_primitive() ||
+                                       fe_values.fe->is_primitive(i));
+
+            if (is_primitive == true)
+              shape_function_data[i].is_nonzero_shape_function_component[d]
+                = (component ==
+                   fe_values.fe->system_to_component_index(i).first);
+            else
+              shape_function_data[i].is_nonzero_shape_function_component[d]
+                = (fe_values.fe->get_nonzero_components(i)[component]
+                   == true);
+
+            if (shape_function_data[i].is_nonzero_shape_function_component[d]
+                == true)
+              shape_function_data[i].row_index[d]
+                = shape_function_to_row_table[i*fe_values.fe->n_components()+component];
+            else
+              shape_function_data[i].row_index[d]
+                = numbers::invalid_unsigned_int;
+          }
+      }
+
+    for (unsigned int i = 0; i < fe_values.fe->dofs_per_cell; ++i)
+      {
+        unsigned int n_nonzero_components = 0;
+        for (unsigned int d = 0; d < dim*dim; ++d)
+          if (shape_function_data[i].is_nonzero_shape_function_component[d]
+              == true)
+            ++n_nonzero_components;
+
+        if (n_nonzero_components == 0)
+          shape_function_data[i].single_nonzero_component = -2;
+        else if (n_nonzero_components > 1)
+          shape_function_data[i].single_nonzero_component = -1;
+        else
+          {
+            for (unsigned int d = 0; d < dim*dim; ++d)
+              if (shape_function_data[i].is_nonzero_shape_function_component[d]
+                  == true)
+                {
+                  shape_function_data[i].single_nonzero_component
+                    = shape_function_data[i].row_index[d];
+                  shape_function_data[i].single_nonzero_component_index
+                    = d;
+                  break;
+                }
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  Tensor<2, dim, spacedim>::Tensor()
+    :
+    fe_values(*static_cast<dealii::FEValuesBase<dim, spacedim>*> (0)),
+    first_tensor_component(numbers::invalid_unsigned_int)
+  {}
+
+
+
+  template <int dim, int spacedim>
+  Tensor<2, dim, spacedim> &
+  Tensor<2, dim, spacedim>::operator=(const Tensor<2, dim, spacedim> &)
+  {
+    // we shouldn't be copying these objects
+    Assert(false, ExcInternalError());
+    return *this;
+  }
+
+
+  namespace internal
+  {
+    // Given values of degrees of freedom, evaluate the
+    // values/gradients/... at quadrature points
+
+    // ------------------------- scalar functions --------------------------
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_values (const ::dealii::Vector<Number> &dof_values,
+                        const Table<2,double>          &shape_values,
+                        const std::vector<typename Scalar<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                        std::vector<typename ProductType<Number,double>::type>            &values)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_values.n_cols() : values.size();
+      AssertDimension (values.size(), n_quadrature_points);
+
+      std::fill (values.begin(), values.end(), Number());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        if (shape_function_data[shape_function].is_nonzero_shape_function_component)
+          {
+            const Number value = dof_values(shape_function);
+            if (value == Number() )
+              continue;
+
+            const double *shape_value_ptr =
+              &shape_values(shape_function_data[shape_function].row_index, 0);
+            for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+              values[q_point] += value **shape_value_ptr++;
+          }
+    }
+
+
+
+    // same code for gradient and Hessian, template argument 'order' to give
+    // the order of the derivative (= rank of gradient/Hessian tensor)
+    template <int order, int dim, int spacedim, typename Number>
+    void
+    do_function_derivatives (const ::dealii::Vector<Number> &dof_values,
+                             const Table<2,dealii::Tensor<order,spacedim> > &shape_derivatives,
+                             const std::vector<typename Scalar<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                             std::vector<typename ProductType<Number,dealii::Tensor<order,spacedim> >::type> &derivatives)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_derivatives[0].size() : derivatives.size();
+      AssertDimension (derivatives.size(), n_quadrature_points);
+
+      std::fill (derivatives.begin(), derivatives.end(),
+                 typename ProductType<Number,dealii::Tensor<order,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        if (shape_function_data[shape_function].is_nonzero_shape_function_component)
+          {
+            const Number value = dof_values(shape_function);
+            if (value == Number() )
+              continue;
+
+            const dealii::Tensor<order,spacedim> *shape_derivative_ptr =
+              &shape_derivatives[shape_function_data[shape_function].row_index][0];
+            for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+              derivatives[q_point] += value *
+                                      typename ProductType<Number,dealii::Tensor<order,spacedim> >::type(*shape_derivative_ptr++);
+          }
+    }
+
+
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_laplacians (const ::dealii::Vector<Number> &dof_values,
+                            const Table<2,dealii::Tensor<2,spacedim> > &shape_hessians,
+                            const std::vector<typename Scalar<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                            std::vector<typename ProductType<Number,double>::type>           &laplacians)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_hessians[0].size() : laplacians.size();
+      AssertDimension (laplacians.size(), n_quadrature_points);
+
+      std::fill (laplacians.begin(), laplacians.end(), typename ProductType<Number,double>::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        if (shape_function_data[shape_function].is_nonzero_shape_function_component)
+          {
+            const Number value = dof_values(shape_function);
+            if (value == Number())
+              continue;
+
+            const dealii::Tensor<2,spacedim> *shape_hessian_ptr =
+              &shape_hessians[shape_function_data[shape_function].row_index][0];
+            for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+              laplacians[q_point] += value * trace(*shape_hessian_ptr++);
+          }
+    }
+
+
+
+    // ----------------------------- vector part ---------------------------
+
+    template <int dim, int spacedim, typename Number>
+    void do_function_values (const ::dealii::Vector<Number> &dof_values,
+                             const Table<2,double>          &shape_values,
+                             const std::vector<typename Vector<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                             std::vector<typename ProductType<Number,dealii::Tensor<1,spacedim> >::type> &values)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_values.n_cols() : values.size();
+      AssertDimension (values.size(), n_quadrature_points);
+
+      std::fill (values.begin(), values.end(), typename ProductType<Number,dealii::Tensor<1,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+              const double *shape_value_ptr = &shape_values(snc,0);
+              for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                values[q_point][comp] += value **shape_value_ptr++;
+            }
+          else
+            for (unsigned int d=0; d<spacedim; ++d)
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                {
+                  const double *shape_value_ptr =
+                    &shape_values(shape_function_data[shape_function].row_index[d],0);
+                  for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                    values[q_point][d] += value **shape_value_ptr++;
+                }
+        }
+    }
+
+
+
+    template <int order, int dim, int spacedim, typename Number>
+    void
+    do_function_derivatives (const ::dealii::Vector<Number> &dof_values,
+                             const Table<2,dealii::Tensor<order,spacedim> > &shape_derivatives,
+                             const std::vector<typename Vector<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                             std::vector<typename ProductType<Number,dealii::Tensor<order+1,spacedim> >::type> &derivatives)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_derivatives[0].size() : derivatives.size();
+      AssertDimension (derivatives.size(), n_quadrature_points);
+
+      std::fill (derivatives.begin(), derivatives.end(),
+                 typename ProductType<Number,dealii::Tensor<order+1,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+              const dealii::Tensor<order,spacedim> *shape_derivative_ptr =
+                &shape_derivatives[snc][0];
+              for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                derivatives[q_point][comp] += value *
+                                              typename ProductType<Number,dealii::Tensor<order,spacedim> >::type(*shape_derivative_ptr++);
+            }
+          else
+            for (unsigned int d=0; d<spacedim; ++d)
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                {
+                  const dealii::Tensor<order,spacedim> *shape_derivative_ptr =
+                    &shape_derivatives[shape_function_data[shape_function].
+                                       row_index[d]][0];
+                  for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                    derivatives[q_point][d] += value *
+                                               typename ProductType<Number,dealii::Tensor<order,spacedim> >::type(*shape_derivative_ptr++);
+                }
+        }
+    }
+
+
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_symmetric_gradients (const ::dealii::Vector<Number> &dof_values,
+                                     const Table<2,dealii::Tensor<1,spacedim> > &shape_gradients,
+                                     const std::vector<typename Vector<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                                     std::vector<typename ProductType<Number,dealii::SymmetricTensor<2,spacedim> >::type> &symmetric_gradients)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_gradients[0].size() : symmetric_gradients.size();
+      AssertDimension (symmetric_gradients.size(), n_quadrature_points);
+
+      std::fill (symmetric_gradients.begin(), symmetric_gradients.end(),
+                 typename ProductType<Number,dealii::SymmetricTensor<2,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+              const dealii::Tensor<1,spacedim> *shape_gradient_ptr =
+                &shape_gradients[snc][0];
+              for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                symmetric_gradients[q_point] += value *
+                                                typename ProductType<Number,dealii::SymmetricTensor<2,spacedim> >::type (symmetrize_single_row(comp, *shape_gradient_ptr++));
+            }
+          else
+            for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+              {
+                typename ProductType<Number,dealii::Tensor<2,spacedim> >::type grad;
+                for (unsigned int d=0; d<spacedim; ++d)
+                  if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                    grad[d] = value *
+                              shape_gradients[shape_function_data[shape_function].row_index[d]][q_point];
+                symmetric_gradients[q_point] += symmetrize(grad);
+              }
+        }
+    }
+
+
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_divergences (const ::dealii::Vector<Number> &dof_values,
+                             const Table<2,dealii::Tensor<1,spacedim> > &shape_gradients,
+                             const std::vector<typename Vector<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                             std::vector<typename ProductType<Number,double>::type> &divergences)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_gradients[0].size() : divergences.size();
+      AssertDimension (divergences.size(), n_quadrature_points);
+
+      std::fill (divergences.begin(), divergences.end(), typename ProductType<Number,double>::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+              const dealii::Tensor<1,spacedim> *shape_gradient_ptr = &shape_gradients[snc][0];
+              for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                divergences[q_point] += value * (*shape_gradient_ptr++)[comp];
+            }
+          else
+            for (unsigned int d=0; d<spacedim; ++d)
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                {
+                  const dealii::Tensor<1,spacedim> *shape_gradient_ptr =
+                    &shape_gradients[shape_function_data[shape_function].
+                                     row_index[d]][0];
+                  for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                    divergences[q_point] += value * (*shape_gradient_ptr++)[d];
+                }
+        }
+    }
+
+
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_curls (const ::dealii::Vector<Number> &dof_values,
+                       const Table<2,dealii::Tensor<1,spacedim> > &shape_gradients,
+                       const std::vector<typename Vector<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                       std::vector<typename ProductType<Number,typename dealii::internal::CurlType<spacedim>::type>::type> &curls)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_gradients[0].size() : curls.size();
+      AssertDimension (curls.size(), n_quadrature_points);
+
+      std::fill (curls.begin(), curls.end(), typename ProductType<Number,typename dealii::internal::CurlType<spacedim>::type>::type());
+
+      switch (spacedim)
+        {
+        case 1:
+        {
+          Assert (false, ExcMessage("Computing the curl in 1d is not a useful operation"));
+          break;
+        }
+
+        case 2:
+        {
+          for (unsigned int shape_function = 0;
+               shape_function < dofs_per_cell; ++shape_function)
+            {
+              const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+              if (snc == -2)
+                // shape function is zero for the selected components
+                continue;
+
+              const Number value = dof_values (shape_function);
+
+              if (value == Number())
+                continue;
+
+              if (snc != -1)
+                {
+                  const dealii::Tensor<1, spacedim> *shape_gradient_ptr =
+                    &shape_gradients[snc][0];
+
+                  Assert (shape_function_data[shape_function].single_nonzero_component >= 0,
+                          ExcInternalError());
+                  // we're in 2d, so the formula for the curl is simple:
+                  if (shape_function_data[shape_function].single_nonzero_component_index == 0)
+                    for (unsigned int q_point = 0;
+                         q_point < n_quadrature_points; ++q_point)
+                      curls[q_point][0] -= value * (*shape_gradient_ptr++)[1];
+                  else
+                    for (unsigned int q_point = 0;
+                         q_point < n_quadrature_points; ++q_point)
+                      curls[q_point][0] += value * (*shape_gradient_ptr++)[0];
+                }
+              else
+                // we have multiple non-zero components in the shape functions. not
+                // all of them must necessarily be within the 2-component window
+                // this FEValuesViews::Vector object considers, however.
+                {
+                  if (shape_function_data[shape_function].is_nonzero_shape_function_component[0])
+                    {
+                      const dealii::Tensor<1,spacedim> *shape_gradient_ptr =
+                        &shape_gradients[shape_function_data[shape_function].row_index[0]][0];
+
+                      for (unsigned int q_point = 0; q_point < n_quadrature_points; ++q_point)
+                        curls[q_point][0] -= value * (*shape_gradient_ptr++)[1];
+                    }
+
+                  if (shape_function_data[shape_function].is_nonzero_shape_function_component[1])
+                    {
+                      const dealii::Tensor<1,spacedim> *shape_gradient_ptr =
+                        &shape_gradients[shape_function_data[shape_function].row_index[1]][0];
+
+                      for (unsigned int q_point = 0; q_point < n_quadrature_points; ++q_point)
+                        curls[q_point][0] += value * (*shape_gradient_ptr++)[0];
+                    }
+                }
+            }
+          break;
+        }
+
+        case 3:
+        {
+          for (unsigned int shape_function = 0;
+               shape_function < dofs_per_cell; ++shape_function)
+            {
+              const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+              if (snc == -2)
+                // shape function is zero for the selected components
+                continue;
+
+              const Number value = dof_values (shape_function);
+
+              if (value == Number())
+                continue;
+
+              if (snc != -1)
+                {
+                  const dealii::Tensor<1, spacedim> *shape_gradient_ptr = &shape_gradients[snc][0];
+
+                  switch (shape_function_data[shape_function].single_nonzero_component_index)
+                    {
+                    case 0:
+                    {
+                      for (unsigned int q_point = 0;
+                           q_point < n_quadrature_points; ++q_point)
+                        {
+                          curls[q_point][1] += value * (*shape_gradient_ptr)[2];
+                          curls[q_point][2] -= value * (*shape_gradient_ptr++)[1];
+                        }
+
+                      break;
+                    }
+
+                    case 1:
+                    {
+                      for (unsigned int q_point = 0;
+                           q_point < n_quadrature_points; ++q_point)
+                        {
+                          curls[q_point][0] -= value * (*shape_gradient_ptr)[2];
+                          curls[q_point][2] += value * (*shape_gradient_ptr++)[0];
+                        }
+
+                      break;
+                    }
+
+                    case 2:
+                    {
+                      for (unsigned int q_point = 0;
+                           q_point < n_quadrature_points; ++q_point)
+                        {
+                          curls[q_point][0] += value * (*shape_gradient_ptr)[1];
+                          curls[q_point][1] -= value * (*shape_gradient_ptr++)[0];
+                        }
+                      break;
+                    }
+
+                    default:
+                      Assert (false, ExcInternalError());
+                    }
+                }
+
+              else
+                // we have multiple non-zero components in the shape functions. not
+                // all of them must necessarily be within the 3-component window
+                // this FEValuesViews::Vector object considers, however.
+                {
+                  if (shape_function_data[shape_function].is_nonzero_shape_function_component[0])
+                    {
+                      const dealii::Tensor<1,spacedim> *shape_gradient_ptr =
+                        &shape_gradients[shape_function_data[shape_function].row_index[0]][0];
+
+                      for (unsigned int q_point = 0; q_point < n_quadrature_points; ++q_point)
+                        {
+                          curls[q_point][1] += value * (*shape_gradient_ptr)[2];
+                          curls[q_point][2] -= value * (*shape_gradient_ptr++)[1];
+                        }
+                    }
+
+                  if (shape_function_data[shape_function].is_nonzero_shape_function_component[1])
+                    {
+                      const dealii::Tensor<1,spacedim> *shape_gradient_ptr =
+                        &shape_gradients[shape_function_data[shape_function].row_index[1]][0];
+
+                      for (unsigned int q_point = 0; q_point < n_quadrature_points; ++q_point)
+                        {
+                          curls[q_point][0] -= value * (*shape_gradient_ptr)[2];
+                          curls[q_point][2] += value * (*shape_gradient_ptr++)[0];
+                        }
+                    }
+
+                  if (shape_function_data[shape_function].is_nonzero_shape_function_component[2])
+                    {
+                      const dealii::Tensor<1,spacedim> *shape_gradient_ptr =
+                        &shape_gradients[shape_function_data[shape_function].row_index[2]][0];
+
+                      for (unsigned int q_point = 0; q_point < n_quadrature_points; ++q_point)
+                        {
+                          curls[q_point][0] += value * (*shape_gradient_ptr)[1];
+                          curls[q_point][1] -= value * (*shape_gradient_ptr++)[0];
+                        }
+                    }
+                }
+            }
+        }
+        }
+    }
+
+
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_laplacians (const ::dealii::Vector<Number> &dof_values,
+                            const Table<2,dealii::Tensor<2,spacedim> > &shape_hessians,
+                            const std::vector<typename Vector<dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                            std::vector<typename ProductType<Number,dealii::Tensor<1,spacedim> >::type> &laplacians)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_hessians[0].size() : laplacians.size();
+      AssertDimension (laplacians.size(), n_quadrature_points);
+
+      std::fill (laplacians.begin(), laplacians.end(),
+                 typename ProductType<Number,dealii::Tensor<1,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+              const dealii::Tensor<2,spacedim> *shape_hessian_ptr =
+                &shape_hessians[snc][0];
+              for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                laplacians[q_point][comp] += value * trace(*shape_hessian_ptr++);
+            }
+          else
+            for (unsigned int d=0; d<spacedim; ++d)
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                {
+                  const dealii::Tensor<2,spacedim> *shape_hessian_ptr =
+                    &shape_hessians[shape_function_data[shape_function].
+                                    row_index[d]][0];
+                  for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                    laplacians[q_point][d] += value * trace(*shape_hessian_ptr++);
+                }
+        }
+    }
+
+
+
+    // ---------------------- symmetric tensor part ------------------------
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_values (const ::dealii::Vector<Number> &dof_values,
+                        const dealii::Table<2,double>          &shape_values,
+                        const std::vector<typename SymmetricTensor<2,dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                        std::vector<typename ProductType<Number,dealii::SymmetricTensor<2,spacedim> >::type> &values)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_values.n_cols() : values.size();
+      AssertDimension (values.size(), n_quadrature_points);
+
+      std::fill (values.begin(), values.end(),
+                 typename ProductType<Number,dealii::SymmetricTensor<2,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const TableIndices<2> comp =
+                dealii::SymmetricTensor<2,spacedim>::unrolled_to_component_indices
+                (shape_function_data[shape_function].single_nonzero_component_index);
+              const double *shape_value_ptr = &shape_values(snc,0);
+              for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                values[q_point][comp] += value **shape_value_ptr++;
+            }
+          else
+            for (unsigned int d=0;
+                 d<dealii::SymmetricTensor<2,spacedim>::n_independent_components; ++d)
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                {
+                  const TableIndices<2> comp =
+                    dealii::SymmetricTensor<2,spacedim>::unrolled_to_component_indices(d);
+                  const double *shape_value_ptr =
+                    &shape_values(shape_function_data[shape_function].row_index[d],0);
+                  for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                    values[q_point][comp] += value **shape_value_ptr++;
+                }
+        }
+    }
+
+
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_divergences (const ::dealii::Vector<Number> &dof_values,
+                             const Table<2,dealii::Tensor<1,spacedim> > &shape_gradients,
+                             const std::vector<typename SymmetricTensor<2,dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                             std::vector<typename ProductType<Number,dealii::Tensor<1,spacedim> >::type> &divergences)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_gradients[0].size() : divergences.size();
+      AssertDimension (divergences.size(), n_quadrature_points);
+
+      std::fill (divergences.begin(), divergences.end(),
+                 typename ProductType<Number,dealii::Tensor<1,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+
+              const dealii::Tensor < 1, spacedim> *shape_gradient_ptr =
+                &shape_gradients[snc][0];
+
+              const unsigned int ii = dealii::SymmetricTensor<2,spacedim>::
+                                      unrolled_to_component_indices(comp)[0];
+              const unsigned int jj = dealii::SymmetricTensor<2,spacedim>::
+                                      unrolled_to_component_indices(comp)[1];
+
+              for (unsigned int q_point = 0; q_point < n_quadrature_points;
+                   ++q_point, ++shape_gradient_ptr)
+                {
+                  divergences[q_point][ii] += value * (*shape_gradient_ptr)[jj];
+
+                  if (ii != jj)
+                    divergences[q_point][jj] += value * (*shape_gradient_ptr)[ii];
+                }
+            }
+          else
+            {
+              for (unsigned int d = 0;
+                   d < dealii::SymmetricTensor<2,spacedim>::n_independent_components; ++d)
+                if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                  {
+                    Assert (false, ExcNotImplemented());
+
+                    // the following implementation needs to be looked over -- I
+                    // think it can't be right, because we are in a case where
+                    // there is no single nonzero component
+                    //
+                    // the following is not implemented! we need to consider the
+                    // interplay between multiple non-zero entries in shape
+                    // function and the representation as a symmetric
+                    // second-order tensor
+                    const unsigned int comp =
+                      shape_function_data[shape_function].single_nonzero_component_index;
+
+                    const dealii::Tensor < 1, spacedim> *shape_gradient_ptr =
+                      &shape_gradients[shape_function_data[shape_function].
+                                       row_index[d]][0];
+                    for (unsigned int q_point = 0; q_point < n_quadrature_points;
+                         ++q_point, ++shape_gradient_ptr)
+                      {
+                        for (unsigned int j = 0; j < spacedim; ++j)
+                          {
+                            const unsigned int vector_component = dealii::SymmetricTensor<2,spacedim>::component_to_unrolled_index (TableIndices<2>(comp,j));
+                            divergences[q_point][vector_component] += value * (*shape_gradient_ptr++)[j];
+                          }
+                      }
+                  }
+            }
+        }
+    }
+
+    // ---------------------- non-symmetric tensor part ------------------------
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_values (const ::dealii::Vector<Number> &dof_values,
+                        const dealii::Table<2,double>          &shape_values,
+                        const std::vector<typename Tensor<2,dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                        std::vector<typename ProductType<Number,dealii::Tensor<2,spacedim> >::type> &values)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_values.n_cols() : values.size();
+      AssertDimension (values.size(), n_quadrature_points);
+
+      std::fill (values.begin(), values.end(),
+                 typename ProductType<Number,dealii::Tensor<2,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+
+              const TableIndices<2> indices = dealii::Tensor<2,spacedim>::unrolled_to_component_indices(comp);
+
+              const double *shape_value_ptr = &shape_values(snc,0);
+              for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                values[q_point][indices] += value **shape_value_ptr++;
+            }
+          else
+            for (unsigned int d=0;
+                 d<dim*dim; ++d)
+              if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                {
+                  const TableIndices<2> indices = dealii::Tensor<2,spacedim>::unrolled_to_component_indices(d);
+
+                  const double *shape_value_ptr =
+                    &shape_values(shape_function_data[shape_function].row_index[d],0);
+                  for (unsigned int q_point=0; q_point<n_quadrature_points; ++q_point)
+                    values[q_point][indices] += value **shape_value_ptr++;
+                }
+        }
+    }
+
+
+
+    template <int dim, int spacedim, typename Number>
+    void
+    do_function_divergences (const ::dealii::Vector<Number> &dof_values,
+                             const Table<2,dealii::Tensor<1,spacedim> > &shape_gradients,
+                             const std::vector<typename Tensor<2,dim,spacedim>::ShapeFunctionData> &shape_function_data,
+                             std::vector<typename ProductType<Number,dealii::Tensor<1,spacedim> >::type> &divergences)
+    {
+      const unsigned int dofs_per_cell = dof_values.size();
+      const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                               shape_gradients[0].size() : divergences.size();
+      AssertDimension (divergences.size(), n_quadrature_points);
+
+      std::fill (divergences.begin(), divergences.end(),
+                 typename ProductType<Number,dealii::Tensor<1,spacedim> >::type());
+
+      for (unsigned int shape_function=0;
+           shape_function<dofs_per_cell; ++shape_function)
+        {
+          const int snc = shape_function_data[shape_function].single_nonzero_component;
+
+          if (snc == -2)
+            // shape function is zero for the selected components
+            continue;
+
+          const Number value = dof_values(shape_function);
+          if (value == Number())
+            continue;
+
+          if (snc != -1)
+            {
+              const unsigned int comp =
+                shape_function_data[shape_function].single_nonzero_component_index;
+
+              const dealii::Tensor < 1, spacedim> *shape_gradient_ptr =
+                &shape_gradients[snc][0];
+
+              const TableIndices<2> indices = dealii::Tensor<2,spacedim>::unrolled_to_component_indices(comp);
+              const unsigned int ii = indices[0];
+              const unsigned int jj = indices[1];
+
+              for (unsigned int q_point = 0; q_point < n_quadrature_points;
+                   ++q_point, ++shape_gradient_ptr)
+                {
+                  divergences[q_point][jj] += value * (*shape_gradient_ptr)[ii];
+                }
+            }
+          else
+            {
+              for (unsigned int d = 0;
+                   d < dim*dim; ++d)
+                if (shape_function_data[shape_function].is_nonzero_shape_function_component[d])
+                  {
+                    Assert (false, ExcNotImplemented());
+                  }
+            }
+        }
+    }
+
+  } // end of namespace internal
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Scalar<dim,spacedim>::
+  get_function_values (const InputVector &fe_function,
+                       std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_values,
+            typename FVB::ExcAccessToUninitializedField("update_values"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell and call internal worker function
+    dealii::Vector<typename InputVector::value_type> dof_values(fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_values<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_values, shape_function_data, values);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Scalar<dim,spacedim>::
+  get_function_gradients (const InputVector &fe_function,
+                          std::vector<typename ProductType<gradient_type,typename InputVector::value_type>::type> &gradients) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_derivatives<1,dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_gradients, shape_function_data, gradients);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Scalar<dim,spacedim>::
+  get_function_hessians (const InputVector &fe_function,
+                         std::vector<typename ProductType<hessian_type,typename InputVector::value_type>::type> &hessians) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_hessians,
+            typename FVB::ExcAccessToUninitializedField("update_hessians"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_derivatives<2,dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_hessians, shape_function_data, hessians);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Scalar<dim,spacedim>::
+  get_function_laplacians (const InputVector &fe_function,
+                           std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &laplacians) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_hessians,
+            typename FVB::ExcAccessToUninitializedField("update_hessians"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_laplacians<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_hessians, shape_function_data, laplacians);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Scalar<dim,spacedim>::
+  get_function_third_derivatives (const InputVector &fe_function,
+                                  std::vector<typename ProductType<third_derivative_type,typename InputVector::value_type>::type> &third_derivatives) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_3rd_derivatives,
+            typename FVB::ExcAccessToUninitializedField("update_3rd_derivatives"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_derivatives<3,dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_3rd_derivatives, shape_function_data, third_derivatives);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_values (const InputVector &fe_function,
+                       std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_values,
+            typename FVB::ExcAccessToUninitializedField("update_values"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_values<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_values, shape_function_data, values);
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_gradients (const InputVector &fe_function,
+                          std::vector<typename ProductType<gradient_type,typename InputVector::value_type>::type> &gradients) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_derivatives<1,dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_gradients, shape_function_data, gradients);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_symmetric_gradients (const InputVector &fe_function,
+                                    std::vector<typename ProductType<symmetric_gradient_type,typename InputVector::value_type>::type> &symmetric_gradients) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_symmetric_gradients<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_gradients, shape_function_data,
+     symmetric_gradients);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_divergences (const InputVector &fe_function,
+                            std::vector<typename ProductType<divergence_type,typename InputVector::value_type>::type> &divergences) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs
+    // on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_divergences<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_gradients, shape_function_data, divergences);
+  }
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_curls (const InputVector &fe_function,
+                      std::vector<typename ProductType<curl_type,typename InputVector::value_type>::type> &curls) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+
+    Assert (fe_values.update_flags & update_gradients,
+            typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    Assert (fe_values.present_cell.get () != 0,
+            ExcMessage ("FEValues object is not reinited to any cell"));
+    AssertDimension (fe_function.size (),
+                     fe_values.present_cell->n_dofs_for_dof_handler ());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values (fe_function, dof_values);
+    internal::do_function_curls<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_gradients, shape_function_data, curls);
+  }
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_hessians (const InputVector &fe_function,
+                         std::vector<typename ProductType<hessian_type,typename InputVector::value_type>::type> &hessians) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_hessians,
+            typename FVB::ExcAccessToUninitializedField("update_hessians"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_derivatives<2,dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_hessians, shape_function_data, hessians);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_laplacians (const InputVector &fe_function,
+                           std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &laplacians) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_hessians,
+            typename FVB::ExcAccessToUninitializedField("update_hessians"));
+    Assert (laplacians.size() == fe_values.n_quadrature_points,
+            ExcDimensionMismatch(laplacians.size(), fe_values.n_quadrature_points));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    Assert (fe_function.size() == fe_values.present_cell->n_dofs_for_dof_handler(),
+            ExcDimensionMismatch(fe_function.size(),
+                                 fe_values.present_cell->n_dofs_for_dof_handler()));
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_laplacians<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_hessians, shape_function_data, laplacians);
+  }
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Vector<dim,spacedim>::
+  get_function_third_derivatives (const InputVector &fe_function,
+                                  std::vector<typename ProductType<third_derivative_type,typename InputVector::value_type>::type> &third_derivatives) const
+  {
+    typedef FEValuesBase<dim,spacedim> FVB;
+    Assert (fe_values.update_flags & update_3rd_derivatives,
+            typename FVB::ExcAccessToUninitializedField("update_3rd_derivatives"));
+    Assert (fe_values.present_cell.get() != 0,
+            ExcMessage ("FEValues object is not reinit'ed to any cell"));
+    AssertDimension (fe_function.size(),
+                     fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values (fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_derivatives<3,dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_3rd_derivatives, shape_function_data, third_derivatives);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  SymmetricTensor<2, dim, spacedim>::
+  get_function_values(const InputVector &fe_function,
+                      std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const
+  {
+    typedef FEValuesBase<dim, spacedim> FVB;
+    Assert(fe_values.update_flags & update_values,
+           typename FVB::ExcAccessToUninitializedField("update_values"));
+    Assert(fe_values.present_cell.get() != 0,
+           ExcMessage("FEValues object is not reinit'ed to any cell"));
+    AssertDimension(fe_function.size(),
+                    fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values(fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_values<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_values, shape_function_data, values);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  SymmetricTensor<2, dim, spacedim>::
+  get_function_divergences(const InputVector &fe_function,
+                           std::vector<typename ProductType<divergence_type,typename InputVector::value_type>::type> &divergences) const
+  {
+    typedef FEValuesBase<dim, spacedim> FVB;
+    Assert(fe_values.update_flags & update_gradients,
+           typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    Assert(fe_values.present_cell.get() != 0,
+           ExcMessage("FEValues object is not reinit'ed to any cell"));
+    AssertDimension(fe_function.size(),
+                    fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs
+    // on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values(fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_divergences<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_gradients, shape_function_data, divergences);
+  }
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Tensor<2, dim, spacedim>::
+  get_function_values(const InputVector &fe_function,
+                      std::vector<typename ProductType<value_type,typename InputVector::value_type>::type> &values) const
+  {
+    typedef FEValuesBase<dim, spacedim> FVB;
+    Assert(fe_values.update_flags & update_values,
+           typename FVB::ExcAccessToUninitializedField("update_values"));
+    Assert(fe_values.present_cell.get() != 0,
+           ExcMessage("FEValues object is not reinit'ed to any cell"));
+    AssertDimension(fe_function.size(),
+                    fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values(fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_values<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_values, shape_function_data, values);
+  }
+
+
+
+  template <int dim, int spacedim>
+  template <class InputVector>
+  void
+  Tensor<2, dim, spacedim>::
+  get_function_divergences(const InputVector &fe_function,
+                           std::vector<typename ProductType<divergence_type,typename InputVector::value_type>::type> &divergences) const
+  {
+    typedef FEValuesBase<dim, spacedim> FVB;
+    Assert(fe_values.update_flags & update_gradients,
+           typename FVB::ExcAccessToUninitializedField("update_gradients"));
+    Assert(fe_values.present_cell.get() != 0,
+           ExcMessage("FEValues object is not reinit'ed to any cell"));
+    AssertDimension(fe_function.size(),
+                    fe_values.present_cell->n_dofs_for_dof_handler());
+
+    // get function values of dofs
+    // on this cell
+    dealii::Vector<typename InputVector::value_type> dof_values(fe_values.dofs_per_cell);
+    fe_values.present_cell->get_interpolated_dof_values(fe_function, dof_values);
+    internal::do_function_divergences<dim,spacedim>
+    (dof_values, fe_values.finite_element_output.shape_gradients, shape_function_data, divergences);
+  }
+}
+
+
+namespace internal
+{
+  namespace FEValuesViews
+  {
+    template <int dim, int spacedim>
+    Cache<dim,spacedim>::Cache (const FEValuesBase<dim,spacedim> &fe_values)
+    {
+      const FiniteElement<dim,spacedim> &fe = fe_values.get_fe();
+
+      // create the views objects. allocate a
+      // bunch of default-constructed ones
+      // then destroy them again and do
+      // in-place construction of those we
+      // actually want to use (copying stuff
+      // is wasteful and we can't do that
+      // anyway because the class has
+      // reference members)
+      const unsigned int n_scalars = fe.n_components();
+      scalars.resize (n_scalars);
+      for (unsigned int component=0; component<n_scalars; ++component)
+        {
+          // Use a typedef here to work around an issue with gcc-4.1:
+          typedef dealii::FEValuesViews::Scalar<dim,spacedim> ScalarView;
+          scalars[component].ScalarView::~ScalarView ();
+
+          new (&scalars[component])
+          dealii::FEValuesViews::Scalar<dim,spacedim>(fe_values,
+                                                      component);
+        }
+
+      // compute number of vectors
+      // that we can fit into
+      // this finite element. note
+      // that this is based on the
+      // dimensionality 'dim' of the
+      // manifold, not 'spacedim' of
+      // the output vector
+      const unsigned int n_vectors = (fe.n_components() >= spacedim ?
+                                      fe.n_components()-spacedim+1 :
+                                      0);
+      vectors.resize (n_vectors);
+      for (unsigned int component=0; component<n_vectors; ++component)
+        {
+          // Use a typedef here to work around an issue with gcc-4.1:
+          typedef dealii::FEValuesViews::Vector<dim,spacedim> VectorView;
+          vectors[component].VectorView::~VectorView ();
+
+          new (&vectors[component])
+          dealii::FEValuesViews::Vector<dim,spacedim>(fe_values,
+                                                      component);
+        }
+
+      // compute number of symmetric
+      // tensors in the same way as above
+      const unsigned int n_symmetric_second_order_tensors
+        = (fe.n_components() >= (dim*dim + dim)/2 ?
+           fe.n_components() - (dim*dim + dim)/2 + 1 :
+           0);
+      symmetric_second_order_tensors.resize(n_symmetric_second_order_tensors);
+      for (unsigned int component = 0; component < n_symmetric_second_order_tensors; ++component)
+        {
+          // Use a typedef here to work around an issue with gcc-4.1:
+          typedef dealii::FEValuesViews::SymmetricTensor<2, dim, spacedim> SymmetricTensorView;
+          symmetric_second_order_tensors[component].SymmetricTensorView::~SymmetricTensorView();
+
+          new (&symmetric_second_order_tensors[component])
+          dealii::FEValuesViews::SymmetricTensor<2, dim, spacedim > (fe_values,
+                                                                     component);
+        }
+
+
+      // compute number of symmetric
+      // tensors in the same way as above
+      const unsigned int n_second_order_tensors
+        = (fe.n_components() >= dim*dim ?
+           fe.n_components() - dim*dim + 1 :
+           0);
+      second_order_tensors.resize(n_second_order_tensors);
+      for (unsigned int component = 0; component < n_second_order_tensors; ++component)
+        {
+          // Use a typedef here to work around an issue with gcc-4.1:
+          typedef dealii::FEValuesViews::Tensor<2, dim, spacedim> TensorView;
+          second_order_tensors[component].TensorView::~TensorView();
+
+          new (&second_order_tensors[component])
+          dealii::FEValuesViews::Tensor<2, dim, spacedim > (fe_values,
+                                                            component);
+        }
+    }
+  }
+}
+
+
+/* ---------------- FEValuesBase<dim,spacedim>::CellIteratorBase --------- */
+
+template <int dim, int spacedim>
+class FEValuesBase<dim,spacedim>::CellIteratorBase
+{
+public:
+  /**
+   * Destructor. Made virtual
+   * since we store only
+   * pointers to the base
+   * class.
+   */
+  virtual ~CellIteratorBase ();
+
+  /**
+   * Conversion operator to an
+   * iterator for
+   * triangulations. This
+   * conversion is implicit for
+   * the original iterators,
+   * since they are derived
+   * classes. However, since
+   * here we have kind of a
+   * parallel class hierarchy,
+   * we have to have a
+   * conversion operator.
+   */
+  virtual
+  operator typename Triangulation<dim,spacedim>::cell_iterator () const = 0;
+
+  /**
+   * Return the number of
+   * degrees of freedom the DoF
+   * handler object has to
+   * which the iterator belongs
+   * to.
+   */
+  virtual
+  types::global_dof_index
+  n_dofs_for_dof_handler () const = 0;
+
+#include "fe_values.decl.1.inst"
+
+  /// Call
+  /// @p get_interpolated_dof_values
+  /// of the iterator with the
+  /// given arguments.
+  virtual
+  void
+  get_interpolated_dof_values (const IndexSet &in,
+                               Vector<IndexSet::value_type> &out) const = 0;
+};
+
+
+template <int dim, int spacedim>
+FEValuesBase<dim,spacedim>::CellIteratorBase::~CellIteratorBase ()
+{}
+
+/* ---------------- classes derived from FEValuesBase<dim,spacedim>::CellIteratorBase --------- */
+
+
+/**
+ * Implementation of derived
+ * classes of the
+ * CellIteratorBase
+ * interface. See there for a
+ * description of the use of
+ * these classes.
+ *
+ * @author Wolfgang Bangerth, 2003
+ */
+template <int dim, int spacedim>
+template <typename CI>
+class FEValuesBase<dim,spacedim>::CellIterator : public FEValuesBase<dim,spacedim>::CellIteratorBase
+{
+public:
+  /**
+   * Constructor. Take an
+   * iterator and store it in
+   * this class.
+   */
+  CellIterator (const CI &cell);
+
+  /**
+   * Conversion operator to an
+   * iterator for
+   * triangulations. This
+   * conversion is implicit for
+   * the original iterators,
+   * since they are derived
+   * classes. However, since
+   * here we have kind of a
+   * parallel class hierarchy,
+   * we have to have a
+   * conversion operator.
+   */
+  virtual
+  operator typename Triangulation<dim,spacedim>::cell_iterator () const;
+
+  /**
+   * Return the number of
+   * degrees of freedom the DoF
+   * handler object has to
+   * which the iterator belongs
+   * to.
+   */
+  virtual
+  types::global_dof_index
+  n_dofs_for_dof_handler () const;
+
+#include "fe_values.decl.2.inst"
+
+  /// Call
+  /// @p get_interpolated_dof_values
+  /// of the iterator with the
+  /// given arguments.
+  virtual
+  void
+  get_interpolated_dof_values (const IndexSet &in,
+                               Vector<IndexSet::value_type> &out) const;
+
+private:
+  /**
+   * Copy of the iterator which
+   * we use in this object.
+   */
+  const CI cell;
+};
+
+
+/**
+ * Implementation of a derived
+ * class of the
+ * CellIteratorBase
+ * interface. See there for a
+ * description of the use of
+ * these classes.
+ *
+ * This class is basically a
+ * specialization of the general
+ * template for iterators into
+ * Triangulation objects (but
+ * since C++ does not allow
+ * something like this for nested
+ * classes, it runs under a
+ * separate name). Since these do
+ * not implement the interface
+ * that we would like to call,
+ * the functions of this class
+ * cannot be implemented
+ * meaningfully. However, most
+ * functions of the FEValues
+ * class do not make any use of
+ * degrees of freedom at all, so
+ * it should be possible to call
+ * FEValues::reinit() with a tria
+ * iterator only; this class
+ * makes this possible, but
+ * whenever one of the functions
+ * of FEValues tries to call
+ * any of the functions of this
+ * class, an exception will be
+ * raised reminding the user that
+ * if she wants to use these
+ * features, then the
+ * FEValues object has to be
+ * reinitialized with a cell
+ * iterator that allows to
+ * extract degree of freedom
+ * information.
+ *
+ * @author Wolfgang Bangerth, 2003
+ */
+template <int dim, int spacedim>
+class FEValuesBase<dim,spacedim>::TriaCellIterator : public FEValuesBase<dim,spacedim>::CellIteratorBase
+{
+public:
+  /**
+   * Constructor. Take an
+   * iterator and store it in
+   * this class.
+   */
+  TriaCellIterator (const typename Triangulation<dim,spacedim>::cell_iterator &cell);
+
+  /**
+   * Conversion operator to an
+   * iterator for
+   * triangulations. This
+   * conversion is implicit for
+   * the original iterators,
+   * since they are derived
+   * classes. However, since
+   * here we have kind of a
+   * parallel class hierarchy,
+   * we have to have a
+   * conversion operator. Here,
+   * the conversion is trivial,
+   * from and to the same time.
+   */
+  virtual
+  operator typename Triangulation<dim,spacedim>::cell_iterator () const;
+
+  /**
+   * Implement the respective
+   * function of the base
+   * class. Since this is not
+   * possible, we just raise an
+   * error.
+   */
+  virtual
+  types::global_dof_index
+  n_dofs_for_dof_handler () const;
+
+#include "fe_values.decl.2.inst"
+
+  /// Call
+  /// @p get_interpolated_dof_values
+  /// of the iterator with the
+  /// given arguments.
+  virtual
+  void
+  get_interpolated_dof_values (const IndexSet &in,
+                               Vector<IndexSet::value_type> &out) const;
+
+private:
+  /**
+   * Copy of the iterator which
+   * we use in this object.
+   */
+  const typename Triangulation<dim,spacedim>::cell_iterator cell;
+
+  /**
+   * String to be displayed
+   * whenever one of the
+   * functions of this class is
+   * called. Make it a static
+   * member variable, since we
+   * show the same message for
+   * all member functions.
+   */
+  static const char *const message_string;
+};
+
+
+
+
+/* ---------------- FEValuesBase<dim,spacedim>::CellIterator<CI> --------- */
+
+
+template <int dim, int spacedim>
+template <typename CI>
+FEValuesBase<dim,spacedim>::CellIterator<CI>::CellIterator (const CI &cell)
+  :
+  cell(cell)
+{}
+
+
+
+template <int dim, int spacedim>
+template <typename CI>
+FEValuesBase<dim,spacedim>::CellIterator<CI>::
+operator typename Triangulation<dim,spacedim>::cell_iterator () const
+{
+  return cell;
+}
+
+
+
+template <int dim, int spacedim>
+template <typename CI>
+types::global_dof_index
+FEValuesBase<dim,spacedim>::CellIterator<CI>::n_dofs_for_dof_handler () const
+{
+  return cell->get_dof_handler().n_dofs();
+}
+
+
+
+#include "fe_values.impl.1.inst"
+
+
+template <int dim, int spacedim>
+template <typename CI>
+void
+FEValuesBase<dim,spacedim>::CellIterator<CI>::
+get_interpolated_dof_values (const IndexSet &in,
+                             Vector<IndexSet::value_type> &out) const
+{
+  Assert (cell->has_children() == false, ExcNotImplemented());
+
+  std::vector<types::global_dof_index> dof_indices (cell->get_fe().dofs_per_cell);
+  cell->get_dof_indices (dof_indices);
+
+  for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+    out[i] = (in.is_element (dof_indices[i]) ? 1 : 0);
+}
+
+
+/* ---------------- FEValuesBase<dim,spacedim>::TriaCellIterator --------- */
+
+template <int dim, int spacedim>
+const char *const
+FEValuesBase<dim,spacedim>::TriaCellIterator::message_string
+  = ("You have previously called the FEValues::reinit function with a\n"
+     "cell iterator of type Triangulation<dim,spacedim>::cell_iterator. However,\n"
+     "when you do this, you cannot call some functions in the FEValues\n"
+     "class, such as the get_function_values/gradients/hessians/third_derivatives\n"
+     "functions. If you need these functions, then you need to call\n"
+     "FEValues::reinit with an iterator type that allows to extract\n"
+     "degrees of freedom, such as DoFHandler<dim,spacedim>::cell_iterator.");
+
+
+template <int dim, int spacedim>
+FEValuesBase<dim,spacedim>::TriaCellIterator::
+TriaCellIterator (const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+  :
+  cell(cell)
+{}
+
+
+
+template <int dim, int spacedim>
+FEValuesBase<dim,spacedim>::TriaCellIterator::
+operator typename Triangulation<dim,spacedim>::cell_iterator () const
+{
+  return cell;
+}
+
+
+
+template <int dim, int spacedim>
+types::global_dof_index
+FEValuesBase<dim,spacedim>::TriaCellIterator::n_dofs_for_dof_handler () const
+{
+  Assert (false, ExcMessage (message_string));
+  return 0;
+}
+
+
+#include "fe_values.impl.2.inst"
+
+
+template <int dim, int spacedim>
+void
+FEValuesBase<dim,spacedim>::TriaCellIterator::
+get_interpolated_dof_values (const IndexSet &,
+                             Vector<IndexSet::value_type> &) const
+{
+  Assert (false, ExcMessage (message_string));
+}
+
+
+
+namespace internal
+{
+  namespace FEValues
+  {
+    template <int dim, int spacedim>
+    void
+    MappingRelatedData<dim,spacedim>::initialize (const unsigned int        n_quadrature_points,
+                                                  const UpdateFlags         flags)
+    {
+      if (flags & update_quadrature_points)
+        this->quadrature_points.resize(n_quadrature_points,
+                                       Point<spacedim>(numbers::signaling_nan<Tensor<1,spacedim> >()));
+
+      if (flags & update_JxW_values)
+        this->JxW_values.resize(n_quadrature_points,
+                                numbers::signaling_nan<double>());
+
+      if (flags & update_jacobians)
+        this->jacobians.resize(n_quadrature_points,
+                               numbers::signaling_nan<DerivativeForm<1,dim,spacedim> >());
+
+      if (flags & update_jacobian_grads)
+        this->jacobian_grads.resize(n_quadrature_points,
+                                    numbers::signaling_nan<DerivativeForm<2,dim,spacedim> >());
+
+      if (flags & update_jacobian_pushed_forward_grads)
+        this->jacobian_pushed_forward_grads.resize(n_quadrature_points,
+                                                   numbers::signaling_nan<Tensor<3,spacedim> >());
+
+      if (flags & update_jacobian_2nd_derivatives)
+        this->jacobian_2nd_derivatives.resize(n_quadrature_points,
+                                              numbers::signaling_nan<DerivativeForm<3,dim,spacedim> >());
+
+      if (flags & update_jacobian_pushed_forward_2nd_derivatives)
+        this->jacobian_pushed_forward_2nd_derivatives.resize(n_quadrature_points,
+                                                             numbers::signaling_nan<Tensor<4,spacedim> >());
+
+      if (flags & update_jacobian_3rd_derivatives)
+        this->jacobian_3rd_derivatives.resize(n_quadrature_points);
+
+      if (flags & update_jacobian_pushed_forward_3rd_derivatives)
+        this->jacobian_pushed_forward_3rd_derivatives.resize(n_quadrature_points,
+                                                             numbers::signaling_nan<Tensor<5,spacedim> >());
+
+      if (flags & update_inverse_jacobians)
+        this->inverse_jacobians.resize(n_quadrature_points,
+                                       numbers::signaling_nan<DerivativeForm<1,spacedim,dim> >());
+
+      if (flags & update_boundary_forms)
+        this->boundary_forms.resize(n_quadrature_points,
+                                    numbers::signaling_nan<Tensor<1,spacedim> >());
+
+      if (flags & update_normal_vectors)
+        this->normal_vectors.resize(n_quadrature_points,
+                                    numbers::signaling_nan<Tensor<1,spacedim> >());
+    }
+
+
+
+    template <int dim, int spacedim>
+    std::size_t
+    MappingRelatedData<dim,spacedim>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (JxW_values) +
+              MemoryConsumption::memory_consumption (jacobians) +
+              MemoryConsumption::memory_consumption (jacobian_grads) +
+              MemoryConsumption::memory_consumption (jacobian_pushed_forward_grads) +
+              MemoryConsumption::memory_consumption (jacobian_2nd_derivatives) +
+              MemoryConsumption::memory_consumption (jacobian_pushed_forward_2nd_derivatives) +
+              MemoryConsumption::memory_consumption (jacobian_3rd_derivatives) +
+              MemoryConsumption::memory_consumption (jacobian_pushed_forward_3rd_derivatives) +
+              MemoryConsumption::memory_consumption (inverse_jacobians) +
+              MemoryConsumption::memory_consumption (quadrature_points) +
+              MemoryConsumption::memory_consumption (normal_vectors) +
+              MemoryConsumption::memory_consumption (boundary_forms));
+    }
+
+
+
+
+    template <int dim, int spacedim>
+    void
+    FiniteElementRelatedData<dim,spacedim>::initialize (const unsigned int        n_quadrature_points,
+                                                        const FiniteElement<dim,spacedim> &fe,
+                                                        const UpdateFlags         flags)
+    {
+      // initialize the table mapping from shape function number to
+      // the rows in the tables storing the data by shape function and
+      // nonzero component
+      this->shape_function_to_row_table
+        = make_shape_function_to_row_table (fe);
+
+      // count the total number of non-zero components accumulated
+      // over all shape functions
+      unsigned int n_nonzero_shape_components = 0;
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        n_nonzero_shape_components += fe.n_nonzero_components (i);
+      Assert (n_nonzero_shape_components >= fe.dofs_per_cell,
+              ExcInternalError());
+
+      // with the number of rows now
+      // known, initialize those fields
+      // that we will need to their
+      // correct size
+      if (flags & update_values)
+        {
+          this->shape_values.reinit(n_nonzero_shape_components,
+                                    n_quadrature_points);
+          this->shape_values.fill(numbers::signaling_nan<double>());
+        }
+
+      if (flags & update_gradients)
+        {
+          this->shape_gradients.reinit(n_nonzero_shape_components,
+                                       n_quadrature_points);
+          this->shape_gradients.fill (numbers::signaling_nan<Tensor<1,spacedim> >());
+        }
+
+      if (flags & update_hessians)
+        {
+          this->shape_hessians.reinit(n_nonzero_shape_components,
+                                      n_quadrature_points);
+          this->shape_hessians.fill (numbers::signaling_nan<Tensor<2,spacedim> >());
+        }
+
+      if (flags & update_3rd_derivatives)
+        {
+          this->shape_3rd_derivatives.reinit(n_nonzero_shape_components,
+                                             n_quadrature_points);
+          this->shape_3rd_derivatives.fill (numbers::signaling_nan<Tensor<3,spacedim> >());
+        }
+    }
+
+
+
+
+    template <int dim, int spacedim>
+    std::size_t
+    FiniteElementRelatedData<dim,spacedim>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (shape_values) +
+              MemoryConsumption::memory_consumption (shape_gradients) +
+              MemoryConsumption::memory_consumption (shape_hessians) +
+              MemoryConsumption::memory_consumption (shape_3rd_derivatives) +
+              MemoryConsumption::memory_consumption (shape_function_to_row_table));
+    }
+  }
+}
+
+
+
+/*------------------------------- FEValuesBase ---------------------------*/
+
+
+template <int dim, int spacedim>
+FEValuesBase<dim,spacedim>::FEValuesBase (const unsigned int n_q_points,
+                                          const unsigned int dofs_per_cell,
+                                          const UpdateFlags flags,
+                                          const Mapping<dim,spacedim>       &mapping,
+                                          const FiniteElement<dim,spacedim> &fe)
+  :
+  n_quadrature_points (n_q_points),
+  dofs_per_cell (dofs_per_cell),
+  mapping(&mapping, typeid(*this).name()),
+  fe(&fe, typeid(*this).name()),
+  fe_values_views_cache (*this)
+{
+  Assert (n_q_points > 0,
+          ExcMessage ("There is nothing useful you can do with an FEValues "
+                      "object when using a quadrature formula with zero "
+                      "quadrature points!"));
+  this->update_flags = flags;
+}
+
+
+
+template <int dim, int spacedim>
+FEValuesBase<dim,spacedim>::~FEValuesBase ()
+{
+  tria_listener.disconnect ();
+}
+
+
+
+namespace internal
+{
+  // put shape function part of get_function_xxx methods into separate
+  // internal functions. this allows us to reuse the same code for several
+  // functions (e.g. both the versions with and without indices) as well as
+  // the same code for gradients and Hessians. Moreover, this speeds up
+  // compilation and reduces the size of the final file since all the
+  // different global vectors get channeled through the same code.
+
+  template <typename Number, typename Number2>
+  void
+  do_function_values (const Number2         *dof_values_ptr,
+                      const dealii::Table<2,double> &shape_values,
+                      std::vector<Number>   &values)
+  {
+    // scalar finite elements, so shape_values.size() == dofs_per_cell
+    const unsigned int dofs_per_cell = shape_values.n_rows();
+    const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                             shape_values.n_cols() : values.size();
+    AssertDimension(values.size(), n_quadrature_points);
+
+    // initialize with zero
+    std::fill_n (values.begin(), n_quadrature_points, Number());
+
+    // add up contributions of trial functions. note that here we deal with
+    // scalar finite elements, so no need to check for non-primitivity of
+    // shape functions. in order to increase the speed of this function, we
+    // directly access the data in the shape_values array, and increment
+    // pointers for accessing the data. this saves some lookup time and
+    // indexing. moreover, the order of the loops is such that we can access
+    // the shape_values data stored contiguously
+    for (unsigned int shape_func=0; shape_func<dofs_per_cell; ++shape_func)
+      {
+        const Number2 value = dof_values_ptr[shape_func];
+        if (value == Number2())
+          continue;
+
+        const double *shape_value_ptr = &shape_values(shape_func, 0);
+        for (unsigned int point=0; point<n_quadrature_points; ++point)
+          values[point] += value **shape_value_ptr++;
+      }
+  }
+
+  template <int dim, int spacedim, typename VectorType, typename Number>
+  void
+  do_function_values (const Number                      *dof_values_ptr,
+                      const dealii::Table<2,double>             &shape_values,
+                      const FiniteElement<dim,spacedim> &fe,
+                      const std::vector<unsigned int> &shape_function_to_row_table,
+                      VectorSlice<std::vector<VectorType> > &values,
+                      const bool quadrature_points_fastest  = false,
+                      const unsigned int component_multiple = 1)
+  {
+    // initialize with zero
+    for (unsigned int i=0; i<values.size(); ++i)
+      std::fill_n (values[i].begin(), values[i].size(),
+                   typename VectorType::value_type());
+
+    // see if there the current cell has DoFs at all, and if not
+    // then there is nothing else to do.
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    if (dofs_per_cell == 0)
+      return;
+
+    const unsigned int n_quadrature_points = shape_values.n_cols();
+    const unsigned int n_components = fe.n_components();
+
+    // Assert that we can write all components into the result vectors
+    const unsigned result_components = n_components * component_multiple;
+    (void)result_components;
+    if (quadrature_points_fastest)
+      {
+        AssertDimension(values.size(), result_components);
+        for (unsigned int i=0; i<values.size(); ++i)
+          AssertDimension (values[i].size(), n_quadrature_points);
+      }
+    else
+      {
+        AssertDimension(values.size(), n_quadrature_points);
+        for (unsigned int i=0; i<values.size(); ++i)
+          AssertDimension (values[i].size(), result_components);
+      }
+
+    // add up contributions of trial functions.  now check whether the shape
+    // function is primitive or not. if it is, then set its only non-zero
+    // component, otherwise loop over components
+    for (unsigned int mc = 0; mc < component_multiple; ++mc)
+      for (unsigned int shape_func=0; shape_func<dofs_per_cell; ++shape_func)
+        {
+          const Number value = dof_values_ptr[shape_func+mc*dofs_per_cell];
+          if (value == Number())
+            continue;
+
+          if (fe.is_primitive(shape_func))
+            {
+              const unsigned int comp =
+                fe.system_to_component_index(shape_func).first
+                + mc * n_components;
+              const unsigned int
+              row = shape_function_to_row_table[shape_func*n_components+comp];
+
+              const double *shape_value_ptr = &shape_values(row, 0);
+
+              if (quadrature_points_fastest)
+                {
+                  VectorType &values_comp = values[comp];
+                  for (unsigned int point=0; point<n_quadrature_points; ++point)
+                    values_comp[point] += value **shape_value_ptr++;
+                }
+              else
+                for (unsigned int point=0; point<n_quadrature_points; ++point)
+                  values[point][comp] += value **shape_value_ptr++;
+            }
+          else
+            for (unsigned int c=0; c<n_components; ++c)
+              {
+                if (fe.get_nonzero_components(shape_func)[c] == false)
+                  continue;
+
+                const unsigned int
+                row = shape_function_to_row_table[shape_func*n_components+c];
+
+                const double *shape_value_ptr = &shape_values(row, 0);
+                const unsigned int comp = c + mc * n_components;
+
+                if (quadrature_points_fastest)
+                  {
+                    VectorType &values_comp = values[comp];
+                    for (unsigned int point=0; point<n_quadrature_points;
+                         ++point)
+                      values_comp[point] += value **shape_value_ptr++;
+                  }
+                else
+                  for (unsigned int point=0; point<n_quadrature_points; ++point)
+                    values[point][comp] += value **shape_value_ptr++;
+              }
+        }
+  }
+
+  // use the same implementation for gradients and Hessians, distinguish them
+  // by the rank of the tensors
+  template <int order, int spacedim, typename Number>
+  void
+  do_function_derivatives (const Number                     *dof_values_ptr,
+                           const dealii::Table<2,Tensor<order,spacedim> > &shape_derivatives,
+                           std::vector<Tensor<order,spacedim,Number> > &derivatives)
+  {
+    const unsigned int dofs_per_cell = shape_derivatives.size()[0];
+    const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                             shape_derivatives[0].size() : derivatives.size();
+    AssertDimension(derivatives.size(), n_quadrature_points);
+
+    // initialize with zero
+    std::fill_n (derivatives.begin(), n_quadrature_points, Tensor<order,spacedim,Number>());
+
+    // add up contributions of trial functions. note that here we deal with
+    // scalar finite elements, so no need to check for non-primitivity of
+    // shape functions. in order to increase the speed of this function, we
+    // directly access the data in the shape_gradients/hessians array, and
+    // increment pointers for accessing the data. this saves some lookup time
+    // and indexing. moreover, the order of the loops is such that we can
+    // access the shape_gradients/hessians data stored contiguously
+    for (unsigned int shape_func=0; shape_func<dofs_per_cell; ++shape_func)
+      {
+        const Number value = dof_values_ptr[shape_func];
+        if (value == Number())
+          continue;
+
+        const Tensor<order,spacedim> *shape_derivative_ptr
+          = &shape_derivatives[shape_func][0];
+        for (unsigned int point=0; point<n_quadrature_points; ++point)
+          derivatives[point] += value *
+                                dealii::Tensor<order,spacedim,Number>(*shape_derivative_ptr++);
+      }
+  }
+
+  template <int order, int dim, int spacedim, typename Number>
+  void
+  do_function_derivatives (const Number                      *dof_values_ptr,
+                           const dealii::Table<2,Tensor<order,spacedim> > &shape_derivatives,
+                           const FiniteElement<dim,spacedim> &fe,
+                           const std::vector<unsigned int> &shape_function_to_row_table,
+                           VectorSlice<std::vector<std::vector<Tensor<order,spacedim,Number> > > > &derivatives,
+                           const bool quadrature_points_fastest  = false,
+                           const unsigned int component_multiple = 1)
+  {
+    // initialize with zero
+    for (unsigned int i=0; i<derivatives.size(); ++i)
+      std::fill_n (derivatives[i].begin(), derivatives[i].size(),
+                   Tensor<order,spacedim,Number>());
+
+    // see if there the current cell has DoFs at all, and if not
+    // then there is nothing else to do.
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    if (dofs_per_cell == 0)
+      return;
+
+
+    const unsigned int n_quadrature_points = shape_derivatives[0].size();
+    const unsigned int n_components = fe.n_components();
+
+    // Assert that we can write all components into the result vectors
+    const unsigned result_components = n_components * component_multiple;
+    (void)result_components;
+    if (quadrature_points_fastest)
+      {
+        AssertDimension(derivatives.size(), result_components);
+        for (unsigned int i=0; i<derivatives.size(); ++i)
+          AssertDimension (derivatives[i].size(), n_quadrature_points);
+      }
+    else
+      {
+        AssertDimension(derivatives.size(), n_quadrature_points);
+        for (unsigned int i=0; i<derivatives.size(); ++i)
+          AssertDimension (derivatives[i].size(), result_components);
+      }
+
+    // add up contributions of trial functions.  now check whether the shape
+    // function is primitive or not. if it is, then set its only non-zero
+    // component, otherwise loop over components
+    for (unsigned int mc = 0; mc < component_multiple; ++mc)
+      for (unsigned int shape_func=0; shape_func<dofs_per_cell; ++shape_func)
+        {
+          const Number value = dof_values_ptr[shape_func+mc*dofs_per_cell];
+          if (value == Number())
+            continue;
+
+          if (fe.is_primitive(shape_func))
+            {
+              const unsigned int comp =
+                fe.system_to_component_index(shape_func).first
+                + mc * n_components;
+              const unsigned int
+              row = shape_function_to_row_table[shape_func*n_components+comp];
+
+              const Tensor<order,spacedim> *shape_derivative_ptr =
+                &shape_derivatives[row][0];
+
+              if (quadrature_points_fastest)
+                for (unsigned int point=0; point<n_quadrature_points; ++point)
+                  derivatives[comp][point] += value *
+                                              dealii::Tensor<order,spacedim,Number>(*shape_derivative_ptr++);
+              else
+                for (unsigned int point=0; point<n_quadrature_points; ++point)
+                  derivatives[point][comp] += value *
+                                              dealii::Tensor<order,spacedim,Number>(*shape_derivative_ptr++);
+            }
+          else
+            for (unsigned int c=0; c<n_components; ++c)
+              {
+                if (fe.get_nonzero_components(shape_func)[c] == false)
+                  continue;
+
+                const unsigned int
+                row = shape_function_to_row_table[shape_func*n_components+c];
+
+                const Tensor<order,spacedim> *shape_derivative_ptr =
+                  &shape_derivatives[row][0];
+                const unsigned int comp = c + mc * n_components;
+
+                if (quadrature_points_fastest)
+                  for (unsigned int point=0; point<n_quadrature_points; ++point)
+                    derivatives[comp][point] += value *
+                                                dealii::Tensor<order,spacedim,Number>(*shape_derivative_ptr++);
+                else
+                  for (unsigned int point=0; point<n_quadrature_points; ++point)
+                    derivatives[point][comp] += value *
+                                                dealii::Tensor<order,spacedim,Number>(*shape_derivative_ptr++);
+              }
+        }
+  }
+
+  template <int spacedim, typename Number, typename Number2>
+  void
+  do_function_laplacians (const Number2        *dof_values_ptr,
+                          const dealii::Table<2,Tensor<2,spacedim> > &shape_hessians,
+                          std::vector<Number> &laplacians)
+  {
+    const unsigned int dofs_per_cell = shape_hessians.size()[0];
+    const unsigned int n_quadrature_points = dofs_per_cell > 0 ?
+                                             shape_hessians[0].size() : laplacians.size();
+    AssertDimension(laplacians.size(), n_quadrature_points);
+
+    // initialize with zero
+    std::fill_n (laplacians.begin(), n_quadrature_points, Number());
+
+    // add up contributions of trial functions. note that here we deal with
+    // scalar finite elements and also note that the Laplacian is
+    // the trace of the Hessian.
+    for (unsigned int shape_func=0; shape_func<dofs_per_cell; ++shape_func)
+      {
+        const Number2 value = dof_values_ptr[shape_func];
+        if (value == Number2())
+          continue;
+
+        const Tensor<2,spacedim> *shape_hessian_ptr
+          = &shape_hessians[shape_func][0];
+        for (unsigned int point=0; point<n_quadrature_points; ++point)
+          laplacians[point] += value * trace(*shape_hessian_ptr++);
+      }
+  }
+
+  template <int dim, int spacedim, typename VectorType, typename Number>
+  void
+  do_function_laplacians (const Number                    *dof_values_ptr,
+                          const dealii::Table<2,Tensor<2,spacedim> > &shape_hessians,
+                          const FiniteElement<dim,spacedim> &fe,
+                          const std::vector<unsigned int> &shape_function_to_row_table,
+                          std::vector<VectorType>         &laplacians,
+                          const bool quadrature_points_fastest  = false,
+                          const unsigned int component_multiple = 1)
+  {
+    // initialize with zero
+    for (unsigned int i=0; i<laplacians.size(); ++i)
+      std::fill_n (laplacians[i].begin(), laplacians[i].size(),
+                   typename VectorType::value_type());
+
+    // see if there the current cell has DoFs at all, and if not
+    // then there is nothing else to do.
+    const unsigned int dofs_per_cell = fe.dofs_per_cell;
+    if (dofs_per_cell == 0)
+      return;
+
+
+    const unsigned int n_quadrature_points = shape_hessians[0].size();
+    const unsigned int n_components = fe.n_components();
+
+    // Assert that we can write all components into the result vectors
+    const unsigned result_components = n_components * component_multiple;
+    (void)result_components;
+    if (quadrature_points_fastest)
+      {
+        AssertDimension(laplacians.size(), result_components);
+        for (unsigned int i=0; i<laplacians.size(); ++i)
+          AssertDimension (laplacians[i].size(), n_quadrature_points);
+      }
+    else
+      {
+        AssertDimension(laplacians.size(), n_quadrature_points);
+        for (unsigned int i=0; i<laplacians.size(); ++i)
+          AssertDimension (laplacians[i].size(), result_components);
+      }
+
+    // add up contributions of trial functions.  now check whether the shape
+    // function is primitive or not. if it is, then set its only non-zero
+    // component, otherwise loop over components
+    for (unsigned int mc = 0; mc < component_multiple; ++mc)
+      for (unsigned int shape_func=0; shape_func<dofs_per_cell; ++shape_func)
+        {
+          const Number value = dof_values_ptr[shape_func+mc*dofs_per_cell];
+          if (value == Number())
+            continue;
+
+          if (fe.is_primitive(shape_func))
+            {
+              const unsigned int comp =
+                fe.system_to_component_index(shape_func).first
+                + mc * n_components;
+              const unsigned int
+              row = shape_function_to_row_table[shape_func*n_components+comp];
+
+              const Tensor<2,spacedim> *shape_hessian_ptr =
+                &shape_hessians[row][0];
+              if (quadrature_points_fastest)
+                {
+                  VectorType &laplacians_comp = laplacians[comp];
+                  for (unsigned int point=0; point<n_quadrature_points; ++point)
+                    laplacians_comp[point] += value * trace(*shape_hessian_ptr++);
+                }
+              else
+                for (unsigned int point=0; point<n_quadrature_points; ++point)
+                  laplacians[point][comp] += value * trace(*shape_hessian_ptr++);
+            }
+          else
+            for (unsigned int c=0; c<n_components; ++c)
+              {
+                if (fe.get_nonzero_components(shape_func)[c] == false)
+                  continue;
+
+                const unsigned int
+                row = shape_function_to_row_table[shape_func*n_components+c];
+
+                const Tensor<2,spacedim> *shape_hessian_ptr =
+                  &shape_hessians[row][0];
+                const unsigned int comp = c + mc * n_components;
+
+                if (quadrature_points_fastest)
+                  {
+                    VectorType &laplacians_comp = laplacians[comp];
+                    for (unsigned int point=0; point<n_quadrature_points;
+                         ++point)
+                      laplacians_comp[point] += value * trace(*shape_hessian_ptr++);
+                  }
+                else
+                  for (unsigned int point=0; point<n_quadrature_points; ++point)
+                    laplacians[point][comp] += value * trace(*shape_hessian_ptr++);
+              }
+        }
+  }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_values (
+  const InputVector   &fe_function,
+  std::vector<typename InputVector::value_type> &values) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_values,
+          ExcAccessToUninitializedField("update_values"));
+  AssertDimension (fe->n_components(), 1);
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(),
+                   present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  internal::do_function_values (dof_values.begin(), this->finite_element_output.shape_values,
+                                values);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_values (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<typename InputVector::value_type> &values) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_values,
+          ExcAccessToUninitializedField("update_values"));
+  AssertDimension (fe->n_components(), 1);
+  AssertDimension (indices.size(), dofs_per_cell);
+
+  // avoid allocation when the local size is small enough
+  if (dofs_per_cell <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_values(&dof_values[0], this->finite_element_output.shape_values, values);
+    }
+  else
+    {
+      Vector<Number> dof_values(dofs_per_cell);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_values(dof_values.begin(), this->finite_element_output.shape_values,
+                                   values);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_values (
+  const InputVector            &fe_function,
+  std::vector<Vector<typename InputVector::value_type> > &values) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+
+  Assert (this->update_flags & update_values,
+          ExcAccessToUninitializedField("update_values"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  VectorSlice<std::vector<Vector<Number> > > val(values);
+  internal::do_function_values(dof_values.begin(), this->finite_element_output.shape_values, *fe,
+                               this->finite_element_output.shape_function_to_row_table, val);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_values (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<Vector<typename InputVector::value_type> > &values) const
+{
+  typedef typename InputVector::value_type Number;
+  // Size of indices must be a multiple of dofs_per_cell such that an integer
+  // number of function values is generated in each point.
+  Assert (indices.size() % dofs_per_cell == 0,
+          ExcNotMultiple(indices.size(), dofs_per_cell));
+  Assert (this->update_flags & update_values,
+          ExcAccessToUninitializedField("update_values"));
+
+  VectorSlice<std::vector<Vector<Number> > > val(values);
+  if (indices.size() <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_values(&dof_values[0], this->finite_element_output.shape_values, *fe,
+                                   this->finite_element_output.shape_function_to_row_table, val,
+                                   false, indices.size()/dofs_per_cell);
+    }
+  else
+    {
+      Vector<Number> dof_values(100);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_values(dof_values.begin(), this->finite_element_output.shape_values, *fe,
+                                   this->finite_element_output.shape_function_to_row_table, val,
+                                   false, indices.size()/dofs_per_cell);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_values (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  VectorSlice<std::vector<std::vector<typename InputVector::value_type> > > values,
+  bool quadrature_points_fastest) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_values,
+          ExcAccessToUninitializedField("update_values"));
+
+  // Size of indices must be a multiple of dofs_per_cell such that an integer
+  // number of function values is generated in each point.
+  Assert (indices.size() % dofs_per_cell == 0,
+          ExcNotMultiple(indices.size(), dofs_per_cell));
+
+  if (indices.size() <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_values(&dof_values[0], this->finite_element_output.shape_values, *fe,
+                                   this->finite_element_output.shape_function_to_row_table, values,
+                                   quadrature_points_fastest,
+                                   indices.size()/dofs_per_cell);
+    }
+  else
+    {
+      Vector<Number> dof_values(indices.size());
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_values(dof_values.begin(), this->finite_element_output.shape_values, *fe,
+                                   this->finite_element_output.shape_function_to_row_table, values,
+                                   quadrature_points_fastest,
+                                   indices.size()/dofs_per_cell);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void
+FEValuesBase<dim,spacedim>::get_function_gradients (
+  const InputVector           &fe_function,
+  std::vector<Tensor<1,spacedim,typename InputVector::value_type> > &gradients) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_gradients,
+          ExcAccessToUninitializedField("update_gradients"));
+  AssertDimension (fe->n_components(), 1);
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_gradients,
+                                    gradients);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_gradients (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<Tensor<1,spacedim,typename InputVector::value_type> > &gradients) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_gradients,
+          ExcAccessToUninitializedField("update_gradients"));
+  AssertDimension (fe->n_components(), 1);
+  AssertDimension (indices.size(), dofs_per_cell);
+  if (dofs_per_cell <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(&dof_values[0], this->finite_element_output.shape_gradients,
+                                        gradients);
+    }
+  else
+    {
+      Vector<Number> dof_values(dofs_per_cell);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_gradients,
+                                        gradients);
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void
+FEValuesBase<dim,spacedim>::get_function_gradients (
+  const InputVector                              &fe_function,
+  std::vector<std::vector<Tensor<1,spacedim,typename InputVector::value_type> > > &gradients) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_gradients,
+          ExcAccessToUninitializedField("update_gradients"));
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  VectorSlice<std::vector<std::vector<Tensor<1,spacedim,Number> > > > grads(gradients);
+  internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_gradients,
+                                    *fe, this->finite_element_output.shape_function_to_row_table,
+                                    grads);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_gradients (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  VectorSlice<std::vector<std::vector<Tensor<1,spacedim,typename InputVector::value_type> > > > gradients,
+  bool quadrature_points_fastest) const
+{
+  typedef typename InputVector::value_type Number;
+  // Size of indices must be a multiple of dofs_per_cell such that an integer
+  // number of function values is generated in each point.
+  Assert (indices.size() % dofs_per_cell == 0,
+          ExcNotMultiple(indices.size(), dofs_per_cell));
+  Assert (this->update_flags & update_gradients,
+          ExcAccessToUninitializedField("update_gradients"));
+
+  if (indices.size() <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(&dof_values[0], this->finite_element_output.shape_gradients,
+                                        *fe, this->finite_element_output.shape_function_to_row_table,
+                                        gradients, quadrature_points_fastest,
+                                        indices.size()/dofs_per_cell);
+    }
+  else
+    {
+      Vector<Number> dof_values(indices.size());
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(dof_values.begin(),this->finite_element_output.shape_gradients,
+                                        *fe, this->finite_element_output.shape_function_to_row_table,
+                                        gradients, quadrature_points_fastest,
+                                        indices.size()/dofs_per_cell);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void
+FEValuesBase<dim,spacedim>::
+get_function_hessians (const InputVector                &fe_function,
+                       std::vector<Tensor<2,spacedim,typename InputVector::value_type> > &hessians) const
+{
+  typedef typename InputVector::value_type Number;
+  AssertDimension (fe->n_components(), 1);
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_hessians,
+                                    hessians);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_hessians (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<Tensor<2,spacedim,typename InputVector::value_type> > &hessians) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+  AssertDimension (indices.size(), dofs_per_cell);
+  if (dofs_per_cell <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(&dof_values[0], this->finite_element_output.shape_hessians,
+                                        hessians);
+    }
+  else
+    {
+      Vector<Number> dof_values(dofs_per_cell);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_hessians,
+                                        hessians);
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void
+FEValuesBase<dim,spacedim>::
+get_function_hessians (const InputVector                         &fe_function,
+                       std::vector<std::vector<Tensor<2,spacedim,typename InputVector::value_type> > > &hessians,
+                       bool quadrature_points_fastest) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  VectorSlice<std::vector<std::vector<Tensor<2,spacedim,Number> > > > hes(hessians);
+  internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_hessians,
+                                    *fe, this->finite_element_output.shape_function_to_row_table,
+                                    hes, quadrature_points_fastest);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim, spacedim>::get_function_hessians (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  VectorSlice<std::vector<std::vector<Tensor<2,spacedim,typename InputVector::value_type> > > > hessians,
+  bool quadrature_points_fastest) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  Assert (indices.size() % dofs_per_cell == 0,
+          ExcNotMultiple(indices.size(), dofs_per_cell));
+  if (indices.size() <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(&dof_values[0], this->finite_element_output.shape_hessians,
+                                        *fe, this->finite_element_output.shape_function_to_row_table,
+                                        hessians, quadrature_points_fastest,
+                                        indices.size()/dofs_per_cell);
+    }
+  else
+    {
+      Vector<Number> dof_values(indices.size());
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(dof_values.begin(),this->finite_element_output.shape_hessians,
+                                        *fe, this->finite_element_output.shape_function_to_row_table,
+                                        hessians, quadrature_points_fastest,
+                                        indices.size()/dofs_per_cell);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_laplacians (
+  const InputVector   &fe_function,
+  std::vector<typename InputVector::value_type> &laplacians) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  AssertDimension (fe->n_components(), 1);
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  internal::do_function_laplacians(dof_values.begin(), this->finite_element_output.shape_hessians,
+                                   laplacians);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_laplacians (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<typename InputVector::value_type> &laplacians) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  AssertDimension (fe->n_components(), 1);
+  AssertDimension (indices.size(), dofs_per_cell);
+  if (dofs_per_cell <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_laplacians(&dof_values[0], this->finite_element_output.shape_hessians,
+                                       laplacians);
+    }
+  else
+    {
+      Vector<Number> dof_values(dofs_per_cell);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_laplacians(dof_values.begin(), this->finite_element_output.shape_hessians,
+                                       laplacians);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_laplacians (
+  const InputVector            &fe_function,
+  std::vector<Vector<typename InputVector::value_type> > &laplacians) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  internal::do_function_laplacians(dof_values.begin(), this->finite_element_output.shape_hessians,
+                                   *fe, this->finite_element_output.shape_function_to_row_table,
+                                   laplacians);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_laplacians (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<Vector<typename InputVector::value_type> > &laplacians) const
+{
+  typedef typename InputVector::value_type Number;
+  // Size of indices must be a multiple of dofs_per_cell such that an integer
+  // number of function values is generated in each point.
+  Assert (indices.size() % dofs_per_cell == 0,
+          ExcNotMultiple(indices.size(), dofs_per_cell));
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  if (indices.size() <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_laplacians(&dof_values[0], this->finite_element_output.shape_hessians,
+                                       *fe, this->finite_element_output.shape_function_to_row_table,
+                                       laplacians, false,
+                                       indices.size()/dofs_per_cell);
+    }
+  else
+    {
+      Vector<Number> dof_values(indices.size());
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_laplacians(dof_values.begin(),this->finite_element_output.shape_hessians,
+                                       *fe, this->finite_element_output.shape_function_to_row_table,
+                                       laplacians, false,
+                                       indices.size()/dofs_per_cell);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_laplacians (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<std::vector<typename InputVector::value_type> > &laplacians,
+  bool quadrature_points_fastest) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (indices.size() % dofs_per_cell == 0,
+          ExcNotMultiple(indices.size(), dofs_per_cell));
+  Assert (this->update_flags & update_hessians,
+          ExcAccessToUninitializedField("update_hessians"));
+  if (indices.size() <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_laplacians(&dof_values[0], this->finite_element_output.shape_hessians,
+                                       *fe, this->finite_element_output.shape_function_to_row_table,
+                                       laplacians, quadrature_points_fastest,
+                                       indices.size()/dofs_per_cell);
+    }
+  else
+    {
+      Vector<Number> dof_values(indices.size());
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_laplacians(dof_values.begin(),this->finite_element_output.shape_hessians,
+                                       *fe, this->finite_element_output.shape_function_to_row_table,
+                                       laplacians, quadrature_points_fastest,
+                                       indices.size()/dofs_per_cell);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void
+FEValuesBase<dim,spacedim>::
+get_function_third_derivatives (const InputVector                &fe_function,
+                                std::vector<Tensor<3,spacedim,typename InputVector::value_type> > &third_derivatives) const
+{
+  typedef typename InputVector::value_type Number;
+  AssertDimension (fe->n_components(), 1);
+  Assert (this->update_flags & update_3rd_derivatives,
+          ExcAccessToUninitializedField("update_3rd_derivatives"));
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_3rd_derivatives,
+                                    third_derivatives);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim,spacedim>::get_function_third_derivatives (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  std::vector<Tensor<3,spacedim,typename InputVector::value_type> > &third_derivatives) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_3rd_derivatives,
+          ExcAccessToUninitializedField("update_3rd_derivatives"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+  AssertDimension (indices.size(), dofs_per_cell);
+  if (dofs_per_cell <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(&dof_values[0], this->finite_element_output.shape_3rd_derivatives,
+                                        third_derivatives);
+    }
+  else
+    {
+      Vector<Number> dof_values(dofs_per_cell);
+      for (unsigned int i=0; i<dofs_per_cell; ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_3rd_derivatives,
+                                        third_derivatives);
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void
+FEValuesBase<dim,spacedim>::
+get_function_third_derivatives (const InputVector                         &fe_function,
+                                std::vector<std::vector<Tensor<3,spacedim,typename InputVector::value_type> > > &third_derivatives,
+                                bool quadrature_points_fastest) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_3rd_derivatives,
+          ExcAccessToUninitializedField("update_3rd_derivatives"));
+  Assert (present_cell.get() != 0,
+          ExcMessage ("FEValues object is not reinit'ed to any cell"));
+  AssertDimension (fe_function.size(), present_cell->n_dofs_for_dof_handler());
+
+  // get function values of dofs on this cell
+  Vector<Number> dof_values (dofs_per_cell);
+  present_cell->get_interpolated_dof_values(fe_function, dof_values);
+  VectorSlice<std::vector<std::vector<Tensor<3,spacedim,Number> > > > third(third_derivatives);
+  internal::do_function_derivatives(dof_values.begin(), this->finite_element_output.shape_3rd_derivatives,
+                                    *fe, this->finite_element_output.shape_function_to_row_table,
+                                    third, quadrature_points_fastest);
+}
+
+
+
+template <int dim, int spacedim>
+template <class InputVector>
+void FEValuesBase<dim, spacedim>::get_function_third_derivatives (
+  const InputVector &fe_function,
+  const VectorSlice<const std::vector<types::global_dof_index> > &indices,
+  VectorSlice<std::vector<std::vector<Tensor<3,spacedim,typename InputVector::value_type> > > > third_derivatives,
+  bool quadrature_points_fastest) const
+{
+  typedef typename InputVector::value_type Number;
+  Assert (this->update_flags & update_3rd_derivatives,
+          ExcAccessToUninitializedField("update_3rd_derivatives"));
+  Assert (indices.size() % dofs_per_cell == 0,
+          ExcNotMultiple(indices.size(), dofs_per_cell));
+  if (indices.size() <= 100)
+    {
+      Number dof_values[100];
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(&dof_values[0], this->finite_element_output.shape_3rd_derivatives,
+                                        *fe, this->finite_element_output.shape_function_to_row_table,
+                                        third_derivatives, quadrature_points_fastest,
+                                        indices.size()/dofs_per_cell);
+    }
+  else
+    {
+      Vector<Number> dof_values(indices.size());
+      for (unsigned int i=0; i<indices.size(); ++i)
+        dof_values[i] = get_vector_element (fe_function, indices[i]);
+      internal::do_function_derivatives(dof_values.begin(),this->finite_element_output.shape_3rd_derivatives,
+                                        *fe, this->finite_element_output.shape_function_to_row_table,
+                                        third_derivatives, quadrature_points_fastest,
+                                        indices.size()/dofs_per_cell);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+const typename Triangulation<dim,spacedim>::cell_iterator
+FEValuesBase<dim,spacedim>::get_cell () const
+{
+  return *present_cell;
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<Tensor<1,spacedim> > &
+FEValuesBase<dim,spacedim>::get_all_normal_vectors () const
+{
+  typedef FEValuesBase<dim,spacedim> FEVB;
+  Assert (this->update_flags & update_normal_vectors,
+          typename FEVB::ExcAccessToUninitializedField("update_normal_vectors"));
+  return this->mapping_output.normal_vectors;
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<Point<spacedim> >
+FEValuesBase<dim,spacedim>::get_normal_vectors () const
+{
+  typedef FEValuesBase<dim,spacedim> FEVB;
+  Assert (this->update_flags & update_normal_vectors,
+          typename FEVB::ExcAccessToUninitializedField("update_normal_vectors"));
+
+  // copy things into a vector of Points, then return that
+  std::vector<Point<spacedim> > tmp (this->mapping_output.normal_vectors.size());
+  for (unsigned int q=0; q<this->mapping_output.normal_vectors.size(); ++q)
+    tmp[q] = Point<spacedim>(this->mapping_output.normal_vectors[q]);
+
+  return tmp;
+}
+
+
+
+template <int dim, int spacedim>
+void
+FEValuesBase<dim,spacedim>::
+transform (std::vector<Tensor<1,spacedim> > &transformed,
+           const std::vector<Tensor<1,dim> > &original,
+           MappingType type) const
+{
+  mapping->transform(make_array_view(original),
+                     type,
+                     *mapping_data,
+                     make_array_view(transformed));
+}
+
+
+template <int dim, int spacedim>
+std::size_t
+FEValuesBase<dim,spacedim>::memory_consumption () const
+{
+  return (sizeof(this->update_flags) +
+          MemoryConsumption::memory_consumption (n_quadrature_points) +
+          sizeof (cell_similarity) +
+          MemoryConsumption::memory_consumption (dofs_per_cell) +
+          MemoryConsumption::memory_consumption (mapping) +
+          MemoryConsumption::memory_consumption (mapping_data) +
+          MemoryConsumption::memory_consumption (*mapping_data) +
+          MemoryConsumption::memory_consumption (mapping_output) +
+          MemoryConsumption::memory_consumption (fe) +
+          MemoryConsumption::memory_consumption (fe_data) +
+          MemoryConsumption::memory_consumption (*fe_data) +
+          MemoryConsumption::memory_consumption (finite_element_output));
+}
+
+
+
+template <int dim, int spacedim>
+UpdateFlags
+FEValuesBase<dim,spacedim>::compute_update_flags (const UpdateFlags update_flags) const
+{
+  // first find out which objects need to be recomputed on each
+  // cell we visit. this we have to ask the finite element and mapping.
+  // elements are first since they might require update in mapping
+  //
+  // there is no need to iterate since mappings will never require
+  // the finite element to compute something for them
+  UpdateFlags flags = update_flags
+                      | fe->requires_update_flags (update_flags);
+  flags |= mapping->requires_update_flags (flags);
+
+  return flags;
+}
+
+
+template <int dim, int spacedim>
+void
+FEValuesBase< dim, spacedim >::invalidate_present_cell ()
+{
+  // if there is no present cell, then we shouldn't be
+  // connected via a signal to a triangulation
+  Assert (present_cell.get() != 0, ExcInternalError());
+
+  // so delete the present cell and
+  // disconnect from the signal we have with
+  // it
+  tria_listener.disconnect ();
+  present_cell.reset ();
+}
+
+
+template <int dim, int spacedim>
+void
+FEValuesBase< dim, spacedim >::
+maybe_invalidate_previous_present_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+{
+  if (present_cell.get() != 0)
+    {
+      if (&cell->get_triangulation() !=
+          &present_cell->operator typename Triangulation<dim,spacedim>::cell_iterator()
+          ->get_triangulation())
+        {
+          // the triangulations for the previous cell and the current cell
+          // do not match. disconnect from the previous triangulation and
+          // connect to the current one; also invalidate the previous
+          // cell because we shouldn't be comparing cells from different
+          // triangulations
+          tria_listener.disconnect ();
+          invalidate_present_cell();
+          tria_listener =
+            cell->get_triangulation().signals.any_change.connect
+            (std_cxx11::bind (&FEValuesBase<dim,spacedim>::invalidate_present_cell,
+                              std_cxx11::ref(static_cast<FEValuesBase<dim,spacedim>&>(*this))));
+        }
+    }
+  else
+    {
+      // if this FEValues has never been set to any cell at all, then
+      // at least subscribe to the triangulation to get notified of
+      // changes
+      tria_listener =
+        cell->get_triangulation().signals.post_refinement.connect
+        (std_cxx11::bind (&FEValuesBase<dim,spacedim>::invalidate_present_cell,
+                          std_cxx11::ref(static_cast<FEValuesBase<dim,spacedim>&>(*this))));
+    }
+}
+
+
+template <int dim, int spacedim>
+inline
+void
+FEValuesBase<dim,spacedim>::check_cell_similarity
+(const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+{
+  // Unfortunately, the detection of simple geometries with CellSimilarity is
+  // sensitive to the first cell detected. When doing this with multiple
+  // threads, each thread will get its own scratch data object with an
+  // FEValues object in the implementation framework from late 2013, which is
+  // initialized to the first cell the thread sees. As this number might
+  // different between different runs (after all, the tasks are scheduled
+  // dynamically onto threads), this slight deviation leads to difference in
+  // roundoff errors that propagate through the program. Therefore, we need to
+  // disable CellSimilarity in case there is more than one thread in the
+  // problem. This will likely not affect many MPI test cases as there
+  // multithreading is disabled on default, but in many other situations
+  // because we rarely explicitly set the number of threads.
+  //
+  // TODO: Is it reasonable to introduce a flag "unsafe" in the constructor of
+  // FEValues to re-enable this feature?
+  if (MultithreadInfo::n_threads() > 1)
+    {
+      cell_similarity = CellSimilarity::none;
+      return;
+    }
+
+  // case that there has not been any cell before
+  if (this->present_cell.get() == 0)
+    cell_similarity = CellSimilarity::none;
+  else
+    // in MappingQ, data can have been modified during the last call. Then, we
+    // can't use that data on the new cell.
+    if (cell_similarity == CellSimilarity::invalid_next_cell)
+      cell_similarity = CellSimilarity::none;
+    else
+      cell_similarity = (cell->is_translation_of
+                         (static_cast<const typename Triangulation<dim,spacedim>::cell_iterator &>(*this->present_cell))
+                         ?
+                         CellSimilarity::translation
+                         :
+                         CellSimilarity::none);
+
+  if ( (dim<spacedim) &&  (cell_similarity == CellSimilarity::translation) )
+    {
+      if (static_cast<const typename Triangulation<dim,spacedim>::cell_iterator &>
+          (*this->present_cell)->direction_flag()
+          != cell->direction_flag() )
+        cell_similarity =  CellSimilarity::inverted_translation;
+    }
+  // TODO: here, one could implement other checks for similarity, e.g. for
+  // children of a parallelogram.
+}
+
+
+
+template <int dim, int spacedim>
+CellSimilarity::Similarity
+FEValuesBase<dim,spacedim>::get_cell_similarity () const
+{
+  return cell_similarity;
+}
+
+
+template <int dim, int spacedim>
+const unsigned int FEValuesBase<dim,spacedim>::dimension;
+
+
+template <int dim, int spacedim>
+const unsigned int FEValuesBase<dim,spacedim>::space_dimension;
+
+/*------------------------------- FEValues -------------------------------*/
+
+template <int dim, int spacedim>
+const unsigned int FEValues<dim,spacedim>::integral_dimension;
+
+
+
+
+template <int dim, int spacedim>
+FEValues<dim,spacedim>::FEValues (const Mapping<dim,spacedim>       &mapping,
+                                  const FiniteElement<dim,spacedim> &fe,
+                                  const Quadrature<dim>             &q,
+                                  const UpdateFlags                  update_flags)
+  :
+  FEValuesBase<dim,spacedim> (q.size(),
+                              fe.dofs_per_cell,
+                              update_default,
+                              mapping,
+                              fe),
+  quadrature (q)
+{
+  initialize (update_flags);
+}
+
+
+
+template <int dim, int spacedim>
+FEValues<dim,spacedim>::FEValues (const FiniteElement<dim,spacedim> &fe,
+                                  const Quadrature<dim>             &q,
+                                  const UpdateFlags                  update_flags)
+  :
+  FEValuesBase<dim,spacedim> (q.size(),
+                              fe.dofs_per_cell,
+                              update_default,
+                              StaticMappingQ1<dim,spacedim>::mapping,
+                              fe),
+  quadrature (q)
+{
+  initialize (update_flags);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FEValues<dim,spacedim>::initialize (const UpdateFlags update_flags)
+{
+  // You can compute normal vectors
+  // to the cells only in the
+  // codimension one case.
+  typedef FEValuesBase<dim,spacedim> FEVB;
+  if (dim != spacedim-1)
+    Assert ((update_flags & update_normal_vectors) == false,
+            typename FEVB::ExcInvalidUpdateFlag());
+
+  const UpdateFlags flags = this->compute_update_flags (update_flags);
+
+  // initialize the base classes
+  this->mapping_output.initialize(this->n_quadrature_points, flags);
+  this->finite_element_output.initialize(this->n_quadrature_points, *this->fe, flags);
+
+  // then get objects into which the FE and the Mapping can store
+  // intermediate data used across calls to reinit. we can do this in parallel
+  Threads::Task<typename FiniteElement<dim,spacedim>::InternalDataBase *>
+  fe_get_data = Threads::new_task (&FiniteElement<dim,spacedim>::get_data,
+                                   *this->fe,
+                                   flags,
+                                   *this->mapping,
+                                   quadrature,
+                                   this->finite_element_output);
+  Threads::Task<typename Mapping<dim,spacedim>::InternalDataBase *>
+  mapping_get_data = Threads::new_task (&Mapping<dim,spacedim>::get_data,
+                                        *this->mapping,
+                                        flags,
+                                        quadrature);
+
+  this->update_flags = flags;
+
+  // then collect answers from the two task above
+  this->fe_data.reset (fe_get_data.return_value());
+  this->mapping_data.reset (mapping_get_data.return_value());
+}
+
+
+namespace
+{
+  // Reset a unique_ptr. If we can, do not de-allocate the previously
+  // held memory but re-use it for the next item to avoid the repeated
+  // memory allocation. We do this because FEValues objects are heavily
+  // used in multithreaded contexts where memory allocations are evil.
+  template <typename Type, typename Pointer, typename Iterator>
+  void
+  reset_pointer_in_place_if_possible
+  (std_cxx11::unique_ptr<Pointer> &present_cell,
+   const Iterator         &new_cell)
+  {
+    // see if the existing pointer is non-null and if the type of
+    // the old object pointed to matches that of the one we'd
+    // like to create
+    if (present_cell.get()
+        &&
+        (typeid(*present_cell.get()) == typeid(Type)))
+      {
+        // call destructor of the old object
+        static_cast<const Type *>(present_cell.get())->~Type();
+
+        // then construct a new object in-place
+        new(const_cast<void *>(static_cast<const void *>(present_cell.get()))) Type(new_cell);
+      }
+    else
+      // if the types don't match, there is nothing we can do here
+      present_cell.reset (new Type(new_cell));
+  }
+}
+
+
+template <int dim, int spacedim>
+void FEValues<dim,spacedim>::reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+{
+  // no FE in this cell, so no assertion
+  // necessary here
+  this->maybe_invalidate_previous_present_cell (cell);
+  this->check_cell_similarity(cell);
+
+  reset_pointer_in_place_if_possible<typename FEValuesBase<dim,spacedim>::TriaCellIterator>
+  (this->present_cell, cell);
+
+  // this was the part of the work
+  // that is dependent on the actual
+  // data type of the iterator. now
+  // pass on to the function doing
+  // the real work.
+  do_reinit ();
+}
+
+
+
+template <int dim, int spacedim>
+template <template <int, int> class DoFHandlerType, bool lda>
+void
+FEValues<dim,spacedim>::reinit
+(const TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>, lda> > &cell)
+{
+  // assert that the finite elements
+  // passed to the constructor and
+  // used by the DoFHandler used by
+  // this cell, are the same
+  typedef FEValuesBase<dim,spacedim> FEVB;
+  Assert (static_cast<const FiniteElementData<dim>&>(*this->fe) ==
+          static_cast<const FiniteElementData<dim>&>(cell->get_fe()),
+          typename FEVB::ExcFEDontMatch());
+
+  this->maybe_invalidate_previous_present_cell (cell);
+  this->check_cell_similarity(cell);
+
+  reset_pointer_in_place_if_possible<typename FEValuesBase<dim,spacedim>::template
+  CellIterator<TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>,
+                                            lda> > > >
+  (this->present_cell, cell);
+
+  // this was the part of the work
+  // that is dependent on the actual
+  // data type of the iterator. now
+  // pass on to the function doing
+  // the real work.
+  do_reinit ();
+}
+
+
+
+template <int dim, int spacedim>
+void FEValues<dim,spacedim>::do_reinit ()
+{
+  // first call the mapping and let it generate the data
+  // specific to the mapping. also let it inspect the
+  // cell similarity flag and, if necessary, update
+  // it
+  this->cell_similarity
+    = this->get_mapping().fill_fe_values(*this->present_cell,
+                                         this->cell_similarity,
+                                         quadrature,
+                                         *this->mapping_data,
+                                         this->mapping_output);
+
+  // then call the finite element and, with the data
+  // already filled by the mapping, let it compute the
+  // data for the mapped shape function values, gradients,
+  // etc.
+  this->get_fe().fill_fe_values(*this->present_cell,
+                                this->cell_similarity,
+                                this->quadrature,
+                                this->get_mapping(),
+                                *this->mapping_data,
+                                this->mapping_output,
+                                *this->fe_data,
+                                this->finite_element_output);
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FEValues<dim,spacedim>::memory_consumption () const
+{
+  return (FEValuesBase<dim,spacedim>::memory_consumption () +
+          MemoryConsumption::memory_consumption (quadrature));
+}
+
+
+/*------------------------------- FEFaceValuesBase --------------------------*/
+
+
+template <int dim, int spacedim>
+FEFaceValuesBase<dim,spacedim>::FEFaceValuesBase (const unsigned int n_q_points,
+                                                  const unsigned int dofs_per_cell,
+                                                  const UpdateFlags,
+                                                  const Mapping<dim,spacedim> &mapping,
+                                                  const FiniteElement<dim,spacedim> &fe,
+                                                  const Quadrature<dim-1>& quadrature)
+  :
+  FEValuesBase<dim,spacedim> (n_q_points,
+                              dofs_per_cell,
+                              update_default,
+                              mapping,
+                              fe),
+  quadrature(quadrature)
+{}
+
+
+
+template <int dim, int spacedim>
+const std::vector<Tensor<1,spacedim> > &
+FEFaceValuesBase<dim,spacedim>::get_boundary_forms () const
+{
+  typedef FEValuesBase<dim,spacedim> FEVB;
+  Assert (this->update_flags & update_boundary_forms,
+          typename FEVB::ExcAccessToUninitializedField("update_boundary_forms"));
+  return this->mapping_output.boundary_forms;
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+FEFaceValuesBase<dim,spacedim>::memory_consumption () const
+{
+  return (FEValuesBase<dim,spacedim>::memory_consumption () +
+          MemoryConsumption::memory_consumption (quadrature));
+}
+
+
+/*------------------------------- FEFaceValues -------------------------------*/
+
+template <int dim, int spacedim>
+const unsigned int FEFaceValues<dim,spacedim>::dimension;
+
+template <int dim, int spacedim>
+const unsigned int FEFaceValues<dim,spacedim>::integral_dimension;
+
+
+template <int dim, int spacedim>
+FEFaceValues<dim,spacedim>::FEFaceValues (const Mapping<dim,spacedim>       &mapping,
+                                          const FiniteElement<dim,spacedim> &fe,
+                                          const Quadrature<dim-1>  &quadrature,
+                                          const UpdateFlags         update_flags)
+  :
+  FEFaceValuesBase<dim,spacedim> (quadrature.size(),
+                                  fe.dofs_per_cell,
+                                  update_flags,
+                                  mapping,
+                                  fe, quadrature)
+{
+  initialize (update_flags);
+}
+
+
+
+template <int dim, int spacedim>
+FEFaceValues<dim,spacedim>::FEFaceValues (const FiniteElement<dim,spacedim> &fe,
+                                          const Quadrature<dim-1>  &quadrature,
+                                          const UpdateFlags         update_flags)
+  :
+  FEFaceValuesBase<dim,spacedim> (quadrature.size(),
+                                  fe.dofs_per_cell,
+                                  update_flags,
+                                  StaticMappingQ1<dim,spacedim>::mapping,
+                                  fe, quadrature)
+{
+  initialize (update_flags);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FEFaceValues<dim,spacedim>::initialize (const UpdateFlags update_flags)
+{
+  const UpdateFlags flags = this->compute_update_flags (update_flags);
+
+  // initialize the base classes
+  this->mapping_output.initialize(this->n_quadrature_points, flags);
+  this->finite_element_output.initialize(this->n_quadrature_points, *this->fe, flags);
+
+  // then get objects into which the FE and the Mapping can store
+  // intermediate data used across calls to reinit. this can be done in parallel
+  Threads::Task<typename FiniteElement<dim,spacedim>::InternalDataBase *>
+  fe_get_data = Threads::new_task (&FiniteElement<dim,spacedim>::get_face_data,
+                                   *this->fe,
+                                   flags,
+                                   *this->mapping,
+                                   this->quadrature,
+                                   this->finite_element_output);
+  Threads::Task<typename Mapping<dim,spacedim>::InternalDataBase *>
+  mapping_get_data = Threads::new_task (&Mapping<dim,spacedim>::get_face_data,
+                                        *this->mapping,
+                                        flags,
+                                        this->quadrature);
+
+  this->update_flags = flags;
+
+  // then collect answers from the two task above
+  this->fe_data.reset (fe_get_data.return_value());
+  this->mapping_data.reset (mapping_get_data.return_value());
+}
+
+
+
+template <int dim, int spacedim>
+template <template <int, int> class DoFHandlerType, bool lda>
+void
+FEFaceValues<dim,spacedim>::reinit
+(const TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>, lda> > &cell,
+ const unsigned int face_no)
+{
+  // assert that the finite elements
+  // passed to the constructor and
+  // used by the DoFHandler used by
+  // this cell, are the same
+  typedef FEValuesBase<dim,spacedim> FEVB;
+  Assert (static_cast<const FiniteElementData<dim>&>(*this->fe) ==
+          static_cast<const FiniteElementData<dim>&>(
+            cell->get_dof_handler().get_fe()[cell->active_fe_index ()]),
+          typename FEVB::ExcFEDontMatch());
+
+  Assert (face_no < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_no, 0, GeometryInfo<dim>::faces_per_cell));
+
+  this->maybe_invalidate_previous_present_cell (cell);
+  reset_pointer_in_place_if_possible<typename FEValuesBase<dim,spacedim>::template
+  CellIterator<TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>,
+                                            lda> > > >
+  (this->present_cell, cell);
+
+  // this was the part of the work
+  // that is dependent on the actual
+  // data type of the iterator. now
+  // pass on to the function doing
+  // the real work.
+  do_reinit (face_no);
+}
+
+
+
+template <int dim, int spacedim>
+void FEFaceValues<dim,spacedim>::reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                         const unsigned int              face_no)
+{
+  Assert (face_no < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_no, 0, GeometryInfo<dim>::faces_per_cell));
+
+  this->maybe_invalidate_previous_present_cell (cell);
+  reset_pointer_in_place_if_possible<typename FEValuesBase<dim,spacedim>::TriaCellIterator>
+  (this->present_cell, cell);
+
+  // this was the part of the work
+  // that is dependent on the actual
+  // data type of the iterator. now
+  // pass on to the function doing
+  // the real work.
+  do_reinit (face_no);
+}
+
+
+
+template <int dim, int spacedim>
+void FEFaceValues<dim,spacedim>::do_reinit (const unsigned int face_no)
+{
+  // first of all, set the present_face_index (if available)
+  const typename Triangulation<dim,spacedim>::cell_iterator cell=*this->present_cell;
+  this->present_face_index=cell->face_index(face_no);
+
+  this->get_mapping().fill_fe_face_values(*this->present_cell,
+                                          face_no,
+                                          this->quadrature,
+                                          *this->mapping_data,
+                                          this->mapping_output);
+
+  this->get_fe().fill_fe_face_values(*this->present_cell,
+                                     face_no,
+                                     this->quadrature,
+                                     this->get_mapping(),
+                                     *this->mapping_data,
+                                     this->mapping_output,
+                                     *this->fe_data,
+                                     this->finite_element_output);
+}
+
+
+/*------------------------------- FESubFaceValues -------------------------------*/
+
+
+template <int dim, int spacedim>
+const unsigned int FESubfaceValues<dim,spacedim>::dimension;
+
+template <int dim, int spacedim>
+const unsigned int FESubfaceValues<dim,spacedim>::integral_dimension;
+
+
+
+template <int dim, int spacedim>
+FESubfaceValues<dim,spacedim>::FESubfaceValues (const Mapping<dim,spacedim>       &mapping,
+                                                const FiniteElement<dim,spacedim> &fe,
+                                                const Quadrature<dim-1>  &quadrature,
+                                                const UpdateFlags         update_flags)
+  :
+  FEFaceValuesBase<dim,spacedim> (quadrature.size(),
+                                  fe.dofs_per_cell,
+                                  update_flags,
+                                  mapping,
+                                  fe, quadrature)
+{
+  initialize (update_flags);
+}
+
+
+
+template <int dim, int spacedim>
+FESubfaceValues<dim,spacedim>::FESubfaceValues (const FiniteElement<dim,spacedim> &fe,
+                                                const Quadrature<dim-1>  &quadrature,
+                                                const UpdateFlags         update_flags)
+  :
+  FEFaceValuesBase<dim,spacedim> (quadrature.size(),
+                                  fe.dofs_per_cell,
+                                  update_flags,
+                                  StaticMappingQ1<dim,spacedim>::mapping,
+                                  fe, quadrature)
+{
+  initialize (update_flags);
+}
+
+
+
+template <int dim, int spacedim>
+void
+FESubfaceValues<dim,spacedim>::initialize (const UpdateFlags update_flags)
+{
+  const UpdateFlags flags = this->compute_update_flags (update_flags);
+
+  // initialize the base classes
+  this->mapping_output.initialize(this->n_quadrature_points, flags);
+  this->finite_element_output.initialize(this->n_quadrature_points, *this->fe, flags);
+
+  // then get objects into which the FE and the Mapping can store
+  // intermediate data used across calls to reinit. this can be done
+  // in parallel
+  Threads::Task<typename FiniteElement<dim,spacedim>::InternalDataBase *>
+  fe_get_data = Threads::new_task (&FiniteElement<dim,spacedim>::get_subface_data,
+                                   *this->fe,
+                                   flags,
+                                   *this->mapping,
+                                   this->quadrature,
+                                   this->finite_element_output);
+  Threads::Task<typename Mapping<dim,spacedim>::InternalDataBase *>
+  mapping_get_data = Threads::new_task (&Mapping<dim,spacedim>::get_subface_data,
+                                        *this->mapping,
+                                        flags,
+                                        this->quadrature);
+
+  this->update_flags = flags;
+
+  // then collect answers from the two task above
+  this->fe_data.reset (fe_get_data.return_value());
+  this->mapping_data.reset (mapping_get_data.return_value());
+}
+
+
+template <int dim, int spacedim>
+template <template <int, int> class DoFHandlerType, bool lda>
+void FESubfaceValues<dim,spacedim>::reinit
+(const TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>, lda> > &cell,
+ const unsigned int face_no,
+ const unsigned int subface_no)
+{
+  // assert that the finite elements
+  // passed to the constructor and
+  // used by the hp::DoFHandler used by
+  // this cell, are the same
+  typedef FEValuesBase<dim,spacedim> FEVB;
+  Assert (static_cast<const FiniteElementData<dim>&>(*this->fe) ==
+          static_cast<const FiniteElementData<dim>&>(
+            cell->get_dof_handler().get_fe()[cell->active_fe_index ()]),
+          typename FEVB::ExcFEDontMatch());
+  Assert (face_no < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_no, 0, GeometryInfo<dim>::faces_per_cell));
+  // We would like to check for
+  // subface_no < cell->face(face_no)->n_children(),
+  // but unfortunately the current
+  // function is also called for
+  // faces without children (see
+  // tests/fe/mapping.cc). Therefore,
+  // we must use following workaround
+  // of two separate assertions
+  Assert (cell->face(face_no)->has_children() ||
+          subface_no < GeometryInfo<dim>::max_children_per_face,
+          ExcIndexRange (subface_no, 0, GeometryInfo<dim>::max_children_per_face));
+  Assert (!cell->face(face_no)->has_children() ||
+          subface_no < cell->face(face_no)->number_of_children(),
+          ExcIndexRange (subface_no, 0, cell->face(face_no)->number_of_children()));
+  Assert (cell->has_children() == false,
+          ExcMessage ("You can't use subface data for cells that are "
+                      "already refined. Iterate over their children "
+                      "instead in these cases."));
+
+  this->maybe_invalidate_previous_present_cell (cell);
+  reset_pointer_in_place_if_possible<typename FEValuesBase<dim,spacedim>::template
+  CellIterator<TriaIterator<DoFCellAccessor<DoFHandlerType<dim,spacedim>,
+                                            lda> > > >
+  (this->present_cell, cell);
+
+  // this was the part of the work
+  // that is dependent on the actual
+  // data type of the iterator. now
+  // pass on to the function doing
+  // the real work.
+  do_reinit (face_no, subface_no);
+}
+
+
+template <int dim, int spacedim>
+void FESubfaceValues<dim,spacedim>::reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                            const unsigned int         face_no,
+                                            const unsigned int         subface_no)
+{
+  Assert (face_no < GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (face_no, 0, GeometryInfo<dim>::faces_per_cell));
+  Assert (subface_no < cell->face(face_no)->n_children(),
+          ExcIndexRange (subface_no, 0, cell->face(face_no)->n_children()));
+
+  this->maybe_invalidate_previous_present_cell (cell);
+  reset_pointer_in_place_if_possible<typename FEValuesBase<dim,spacedim>::TriaCellIterator>
+  (this->present_cell, cell);
+
+  // this was the part of the work
+  // that is dependent on the actual
+  // data type of the iterator. now
+  // pass on to the function doing
+  // the real work.
+  do_reinit (face_no, subface_no);
+}
+
+
+
+template <int dim, int spacedim>
+void FESubfaceValues<dim,spacedim>::do_reinit (const unsigned int face_no,
+                                               const unsigned int subface_no)
+{
+  // first of all, set the present_face_index
+  // (if available)
+  const typename Triangulation<dim,spacedim>::cell_iterator cell=*this->present_cell;
+
+  if (!cell->face(face_no)->has_children())
+    // no subfaces at all, so set
+    // present_face_index to this face rather
+    // than any subface
+    this->present_face_index=cell->face_index(face_no);
+  else if (dim!=3)
+    this->present_face_index=cell->face(face_no)->child_index(subface_no);
+  else
+    {
+      // this is the same logic we use in
+      // cell->neighbor_child_on_subface(). See
+      // there for an explanation of the
+      // different cases
+      unsigned int subface_index=numbers::invalid_unsigned_int;
+      switch (cell->subface_case(face_no))
+        {
+        case internal::SubfaceCase<3>::case_x:
+        case internal::SubfaceCase<3>::case_y:
+        case internal::SubfaceCase<3>::case_xy:
+          subface_index=cell->face(face_no)->child_index(subface_no);
+          break;
+        case internal::SubfaceCase<3>::case_x1y2y:
+        case internal::SubfaceCase<3>::case_y1x2x:
+          subface_index=cell->face(face_no)->child(subface_no/2)->child_index(subface_no%2);
+          break;
+        case internal::SubfaceCase<3>::case_x1y:
+        case internal::SubfaceCase<3>::case_y1x:
+          switch (subface_no)
+            {
+            case 0:
+            case 1:
+              subface_index=cell->face(face_no)->child(0)->child_index(subface_no);
+              break;
+            case 2:
+              subface_index=cell->face(face_no)->child_index(1);
+              break;
+            default:
+              Assert(false, ExcInternalError());
+            }
+          break;
+        case internal::SubfaceCase<3>::case_x2y:
+        case internal::SubfaceCase<3>::case_y2x:
+          switch (subface_no)
+            {
+            case 0:
+              subface_index=cell->face(face_no)->child_index(0);
+              break;
+            case 1:
+            case 2:
+              subface_index=cell->face(face_no)->child(1)->child_index(subface_no-1);
+              break;
+            default:
+              Assert(false, ExcInternalError());
+            }
+          break;
+        default:
+          Assert(false, ExcInternalError());
+          break;
+        }
+      Assert(subface_index!=numbers::invalid_unsigned_int,
+             ExcInternalError());
+      this->present_face_index=subface_index;
+    }
+
+  // now ask the mapping and the finite element to do the actual work
+  this->get_mapping().fill_fe_subface_values(*this->present_cell,
+                                             face_no,
+                                             subface_no,
+                                             this->quadrature,
+                                             *this->mapping_data,
+                                             this->mapping_output);
+
+  this->get_fe().fill_fe_subface_values(*this->present_cell,
+                                        face_no,
+                                        subface_no,
+                                        this->quadrature,
+                                        this->get_mapping(),
+                                        *this->mapping_data,
+                                        this->mapping_output,
+                                        *this->fe_data,
+                                        this->finite_element_output);
+}
+
+
+/*------------------------------- Explicit Instantiations -------------*/
+#define SPLIT_INSTANTIATIONS_COUNT 2
+#ifndef SPLIT_INSTANTIATIONS_INDEX
+#define SPLIT_INSTANTIATIONS_INDEX 0
+#endif
+#include "fe_values.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/fe_values.decl.1.inst.in b/source/fe/fe_values.decl.1.inst.in
new file mode 100644
index 0000000..30129eb
--- /dev/null
+++ b/source/fe/fe_values.decl.1.inst.in
@@ -0,0 +1,31 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+// Declarations of member functions of FEValuesBase::CellIteratorBase
+// and derived classes
+
+for (VEC : SERIAL_VECTORS)
+  {
+				      /// Call
+				      /// @p get_interpolated_dof_values
+				      /// of the iterator with the
+				      /// given arguments.
+    virtual
+    void
+    get_interpolated_dof_values (const VEC &in,
+				 Vector<VEC::value_type> &out) const = 0;
+  }
diff --git a/source/fe/fe_values.decl.2.inst.in b/source/fe/fe_values.decl.2.inst.in
new file mode 100644
index 0000000..9a91f98
--- /dev/null
+++ b/source/fe/fe_values.decl.2.inst.in
@@ -0,0 +1,31 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+// Declarations of member functions of FEValuesBase::CellIteratorBase
+// and derived classes
+
+for (VEC : SERIAL_VECTORS)
+  {
+				      /// Call
+				      /// @p get_interpolated_dof_values
+				      /// of the iterator with the
+				      /// given arguments.
+    virtual
+    void
+    get_interpolated_dof_values (const VEC      &in,
+				 Vector<VEC::value_type> &out) const;
+  }
diff --git a/source/fe/fe_values.impl.1.inst.in b/source/fe/fe_values.impl.1.inst.in
new file mode 100644
index 0000000..7dbdc34
--- /dev/null
+++ b/source/fe/fe_values.impl.1.inst.in
@@ -0,0 +1,28 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS)
+  {
+    template <int dim, int spacedim>
+    template <typename CI>
+    void
+    FEValuesBase<dim,spacedim>::CellIterator<CI>::
+    get_interpolated_dof_values (const VEC      &in,
+			         Vector<VEC::value_type> &out) const
+    \{
+      cell->get_interpolated_dof_values (in, out);
+    \}
+  }
diff --git a/source/fe/fe_values.impl.2.inst.in b/source/fe/fe_values.impl.2.inst.in
new file mode 100644
index 0000000..5aa37db
--- /dev/null
+++ b/source/fe/fe_values.impl.2.inst.in
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS)
+  {
+    template <int dim, int spacedim>
+    void
+    FEValuesBase<dim,spacedim>::TriaCellIterator::
+    get_interpolated_dof_values (const VEC &,
+    			         Vector<VEC::value_type> &) const
+    \{
+      Assert (false, ExcMessage (message_string));
+    \}
+  }
diff --git a/source/fe/fe_values.inst.in b/source/fe/fe_values.inst.in
new file mode 100644
index 0000000..e4842ec
--- /dev/null
+++ b/source/fe/fe_values.inst.in
@@ -0,0 +1,558 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+// Instantiations of functions in FEValuesViews (explicitly write
+// dealii:: namespace in order to not confuse the compiler with
+// FEValuesViews::Vector).
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class FEValuesBase<deal_II_dimension,deal_II_space_dimension>;
+    template class FEValues<deal_II_dimension,deal_II_space_dimension>;
+    template class FEValuesBase<deal_II_dimension,deal_II_space_dimension>::
+      CellIterator<DoFHandler<deal_II_dimension,deal_II_space_dimension>::cell_iterator>;
+
+    template class FEFaceValuesBase<deal_II_dimension,deal_II_space_dimension>;
+    template class FEFaceValues<deal_II_dimension,deal_II_space_dimension>;
+    template class FESubfaceValues<deal_II_dimension,deal_II_space_dimension>;
+
+
+    namespace FEValuesViews
+      \{
+      template class Scalar<deal_II_dimension, deal_II_space_dimension>;
+      template class Vector<deal_II_dimension, deal_II_space_dimension>;
+      template class SymmetricTensor<2, deal_II_dimension, deal_II_space_dimension>;
+      template class Tensor<2, deal_II_dimension, deal_II_space_dimension>;
+      \}
+#endif
+  }
+
+
+for (dof_handler : DOFHANDLER_TEMPLATES; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; lda : BOOL)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template void FEValues<deal_II_dimension,deal_II_space_dimension>::reinit(
+    const TriaIterator<DoFCellAccessor<dof_handler<deal_II_dimension,deal_II_space_dimension>, lda> >&);
+    template void FEFaceValues<deal_II_dimension,deal_II_space_dimension>::reinit(
+    const TriaIterator<DoFCellAccessor<dof_handler<deal_II_dimension,deal_II_space_dimension>, lda> >&, unsigned int);
+    template void FESubfaceValues<deal_II_dimension,deal_II_space_dimension>::reinit(
+    const TriaIterator<DoFCellAccessor<dof_handler<deal_II_dimension,deal_II_space_dimension>, lda> >&, unsigned int, unsigned int);
+#endif
+  }
+
+
+
+for (VEC : SERIAL_VECTORS; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template
+      void FEValuesViews::Scalar<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_values<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,value_type>::type>&) const;
+    template
+      void FEValuesViews::Scalar<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_gradients<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<1,deal_II_space_dimension> >::type>&) const;
+    template
+      void FEValuesViews::Scalar<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_hessians<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<2,deal_II_space_dimension> >::type>&) const;
+    template
+      void FEValuesViews::Scalar<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_laplacians<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,double>::type> &) const;
+    template
+      void FEValuesViews::Scalar<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_third_derivatives<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<3,deal_II_space_dimension> >::type>&) const;
+
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_values<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<1,deal_II_space_dimension> >::type >&) const;
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_gradients<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<2,deal_II_space_dimension> >::type >&) const;
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_symmetric_gradients<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::SymmetricTensor<2,deal_II_space_dimension> >::type >&) const;
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_curls<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,curl_type>::type>&) const;
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_divergences<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,divergence_type>::type>&) const;
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_hessians<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<3,deal_II_space_dimension> >::type >&) const;
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_laplacians<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<1,deal_II_space_dimension> >::type >&) const;
+    template
+      void FEValuesViews::Vector<deal_II_dimension, deal_II_space_dimension>
+      ::get_function_third_derivatives<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<4,deal_II_space_dimension> >::type >&) const;
+
+    template
+      void FEValuesViews::SymmetricTensor<2, deal_II_dimension, deal_II_space_dimension>
+      ::get_function_values<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::SymmetricTensor<2,deal_II_space_dimension> >::type>&) const;
+    template
+      void FEValuesViews::SymmetricTensor<2, deal_II_dimension, deal_II_space_dimension>
+      ::get_function_divergences<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<1,deal_II_space_dimension> >::type>&) const;
+
+    template
+      void FEValuesViews::Tensor<2, deal_II_dimension, deal_II_space_dimension>
+      ::get_function_values<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<2,deal_II_space_dimension> >::type>&) const;
+    template
+      void FEValuesViews::Tensor<2, deal_II_dimension, deal_II_space_dimension>
+      ::get_function_divergences<dealii::VEC>
+      (const dealii::VEC&, std::vector<ProductType<dealii::VEC::value_type,dealii::Tensor<1,deal_II_space_dimension> >::type>&) const;
+#endif
+  }
+
+
+
+for (VEC : SERIAL_VECTORS; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_values<VEC>
+      (const VEC&, std::vector<VEC::value_type>&) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_values<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&, std::vector<VEC::value_type>&) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_values<VEC>
+      (const VEC&, std::vector<Vector<VEC::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_values<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<Vector<VEC::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_values<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<VEC::value_type> > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_gradients<VEC>
+      (const VEC&, std::vector<dealii::Tensor<1,deal_II_space_dimension,VEC::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_gradients<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<1,deal_II_space_dimension,VEC::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_gradients<VEC>
+      (const VEC&, std::vector<std::vector<dealii::Tensor<1,deal_II_space_dimension,VEC::value_type> > > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_gradients<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<1,deal_II_space_dimension,VEC::value_type> > > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_hessians<VEC>
+      (const VEC&, std::vector<dealii::Tensor<2,deal_II_space_dimension,VEC::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_hessians<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<2,deal_II_space_dimension,VEC::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_hessians<VEC>
+      (const VEC&, std::vector<std::vector<dealii::Tensor<2,deal_II_space_dimension,VEC::value_type> > > &, bool) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_hessians<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<2,deal_II_space_dimension,VEC::value_type> > > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_laplacians<VEC>
+      (const VEC&, std::vector<VEC::value_type>&) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_laplacians<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&, std::vector<VEC::value_type>&) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_laplacians<VEC>
+      (const VEC&, std::vector<Vector<VEC::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_laplacians<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<Vector<VEC::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_laplacians<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<std::vector<VEC::value_type> > &, bool) const;
+       
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_third_derivatives<VEC>
+      (const VEC&, std::vector<dealii::Tensor<3,deal_II_space_dimension,VEC::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_third_derivatives<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<3,deal_II_space_dimension,VEC::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_third_derivatives<VEC>
+      (const VEC&, std::vector<std::vector<dealii::Tensor<3,deal_II_space_dimension,VEC::value_type> > > &, bool) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_space_dimension>::get_function_third_derivatives<VEC>
+      (const VEC&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<3,deal_II_space_dimension,VEC::value_type> > > >, bool) const;
+
+#endif
+  }
+
+
+// instantiations for VEC=IndexSet
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template
+        void FEValuesViews::Scalar<deal_II_dimension>::get_function_values<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,double>::type> &) const;
+    template
+        void FEValuesViews::Scalar<deal_II_dimension>::get_function_gradients<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Scalar<deal_II_dimension>::get_function_hessians<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<2,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Scalar<deal_II_dimension>::get_function_laplacians<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,double>::type>&) const;
+    template
+        void FEValuesViews::Scalar<deal_II_dimension>::get_function_third_derivatives<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<3,deal_II_dimension> >::type>&) const;
+
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_values<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_gradients<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<2,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_symmetric_gradients<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::SymmetricTensor<2,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_curls<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,curl_type>::type>&) const;
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_divergences<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,divergence_type>::type>&) const;
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_hessians<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<3,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_laplacians<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Vector<deal_II_dimension>::get_function_third_derivatives<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<4,deal_II_dimension> >::type>&) const;
+
+    template
+        void FEValuesViews::SymmetricTensor<2,deal_II_dimension,deal_II_dimension>::get_function_values<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::SymmetricTensor<2,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::SymmetricTensor<2,deal_II_dimension,deal_II_dimension>::get_function_divergences<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension> >::type>&) const;
+
+    template
+        void FEValuesViews::Tensor<2,deal_II_dimension,deal_II_dimension>::get_function_values<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<2,deal_II_dimension> >::type>&) const;
+    template
+        void FEValuesViews::Tensor<2,deal_II_dimension,deal_II_dimension>::get_function_divergences<dealii::IndexSet>
+        (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension> >::type>&) const;
+
+
+#if deal_II_dimension != 3
+    template
+    void FEValuesViews::Scalar<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_values<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,value_type>::type>&) const;
+    template
+    void FEValuesViews::Scalar<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_gradients<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Scalar<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_hessians<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<2,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Scalar<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_laplacians<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,double>::type>&) const;
+    template
+    void FEValuesViews::Scalar<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_third_derivatives<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<3,deal_II_dimension+1> >::type>&) const;
+
+    template
+    void FEValuesViews::Vector<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_values<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Vector<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_gradients<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<2,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Vector<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_symmetric_gradients<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::SymmetricTensor<2,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Vector<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_divergences<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,double>::type>&) const;
+    template
+    void FEValuesViews::Vector<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_hessians<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<3,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Vector<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_laplacians<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Vector<deal_II_dimension, deal_II_dimension+1>
+    ::get_function_third_derivatives<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<4,deal_II_dimension+1> >::type>&) const;
+
+    template
+    void FEValuesViews::SymmetricTensor<2, deal_II_dimension, deal_II_dimension+1>
+        ::get_function_values<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::SymmetricTensor<2,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::SymmetricTensor<2, deal_II_dimension, deal_II_dimension+1>
+        ::get_function_divergences<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension+1> >::type>&) const;
+
+    template
+    void FEValuesViews::Tensor<2, deal_II_dimension, deal_II_dimension+1>
+        ::get_function_values<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<2,deal_II_dimension+1> >::type>&) const;
+    template
+    void FEValuesViews::Tensor<2, deal_II_dimension, deal_II_dimension+1>
+        ::get_function_divergences<dealii::IndexSet>
+    (const dealii::IndexSet&, std::vector<ProductType<IndexSet::value_type,dealii::Tensor<1,deal_II_dimension+1> >::type>&) const;
+
+#endif
+  }
+
+
+
+// Instantiations of functions in FEValuesBase and IndexSet=IndexSet
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template
+    void FEValuesBase<deal_II_dimension>::get_function_values<IndexSet>
+      (const IndexSet&, std::vector<IndexSet::value_type>&) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_values<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&, std::vector<IndexSet::value_type>&) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_values<IndexSet>
+      (const IndexSet&, std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_values<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_values<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<IndexSet::value_type> > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_gradients<IndexSet>
+      (const IndexSet&, std::vector<dealii::Tensor<1,deal_II_dimension,IndexSet::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_gradients<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<1,deal_II_dimension,IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_gradients<IndexSet>
+      (const IndexSet&, std::vector<std::vector<dealii::Tensor<1,deal_II_dimension,IndexSet::value_type> > > &) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_gradients<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<1,deal_II_dimension,IndexSet::value_type> > > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_hessians<IndexSet>
+      (const IndexSet&, std::vector<dealii::Tensor<2,deal_II_dimension,IndexSet::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_hessians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<2,deal_II_dimension,IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_hessians<IndexSet>
+      (const IndexSet&, std::vector<std::vector<dealii::Tensor<2,deal_II_dimension,IndexSet::value_type> > > &, bool) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_hessians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<2,deal_II_dimension,IndexSet::value_type> > > >, bool) const;
+
+    template
+    void FEValuesBase<deal_II_dimension>::get_function_laplacians<IndexSet>
+      (const IndexSet&, std::vector<IndexSet::value_type>&) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_laplacians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&, std::vector<IndexSet::value_type>&) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_laplacians<IndexSet>
+      (const IndexSet&, std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_laplacians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_laplacians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<std::vector<IndexSet::value_type> > &, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, std::vector<dealii::Tensor<3,deal_II_dimension,IndexSet::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<3,deal_II_dimension,IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, std::vector<std::vector<dealii::Tensor<3,deal_II_dimension,IndexSet::value_type> > > &, bool) const;
+    template
+      void FEValuesBase<deal_II_dimension>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<3,deal_II_dimension,IndexSet::value_type> > > >, bool) const;
+
+
+#if deal_II_dimension != 3
+
+    template
+    void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_values<IndexSet>
+      (const IndexSet&, std::vector<IndexSet::value_type>&) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_values<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&, std::vector<IndexSet::value_type>&) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_values<IndexSet>
+      (const IndexSet&, std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_values<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_values<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<IndexSet::value_type> > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_gradients<IndexSet>
+      (const IndexSet&, std::vector<dealii::Tensor<1,deal_II_dimension+1,IndexSet::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_gradients<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<1,deal_II_dimension+1,IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_gradients<IndexSet>
+      (const IndexSet&, std::vector<std::vector<dealii::Tensor<1,deal_II_dimension+1,IndexSet::value_type> > > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_gradients<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<1,deal_II_dimension+1,IndexSet::value_type> > > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_hessians<IndexSet>
+      (const IndexSet&, std::vector<dealii::Tensor<2,deal_II_dimension+1,IndexSet::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_hessians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<2,deal_II_dimension+1,IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_hessians<IndexSet>
+      (const IndexSet&, std::vector<std::vector<dealii::Tensor<2,deal_II_dimension+1,IndexSet::value_type> > > &, bool) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_hessians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<2,deal_II_dimension+1,IndexSet::value_type> > > >, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_laplacians<IndexSet>
+      (const IndexSet&, std::vector<IndexSet::value_type>&) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_laplacians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&, std::vector<IndexSet::value_type>&) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_laplacians<IndexSet>
+      (const IndexSet&, std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_laplacians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<Vector<IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_laplacians<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<std::vector<IndexSet::value_type> > &, bool) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, std::vector<dealii::Tensor<3,deal_II_dimension+1,IndexSet::value_type> > &) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       std::vector<dealii::Tensor<3,deal_II_dimension+1,IndexSet::value_type> > &) const;
+
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, std::vector<std::vector<dealii::Tensor<3,deal_II_dimension+1,IndexSet::value_type> > > &, bool) const;
+    template
+      void FEValuesBase<deal_II_dimension,deal_II_dimension+1>::get_function_third_derivatives<IndexSet>
+      (const IndexSet&, const VectorSlice<const std::vector<types::global_dof_index> >&,
+       VectorSlice<std::vector<std::vector<dealii::Tensor<3,deal_II_dimension+1,IndexSet::value_type> > > >, bool) const;
+
+#endif
+  }
diff --git a/source/fe/fe_values_inst2.cc b/source/fe/fe_values_inst2.cc
new file mode 100644
index 0000000..aacdfb8
--- /dev/null
+++ b/source/fe/fe_values_inst2.cc
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// This file compiles the second half of the instantiations from fe_values.cc
+// to get the memory consumption below 1.5gb with gcc.
+
+#define SPLIT_INSTANTIATIONS_INDEX 1
+#include "fe_values.cc"
diff --git a/source/fe/mapping.cc b/source/fe/mapping.cc
new file mode 100644
index 0000000..c8e7d64
--- /dev/null
+++ b/source/fe/mapping.cc
@@ -0,0 +1,111 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/fe/mapping.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, int spacedim>
+Mapping<dim, spacedim>::~Mapping ()
+{}
+
+
+template <int dim, int spacedim>
+std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+Mapping<dim, spacedim>::get_vertices (
+  const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell> vertices;
+  for (unsigned int i = 0; i < GeometryInfo<dim>::vertices_per_cell; ++i)
+    {
+      vertices[i] = cell->vertex(i);
+    }
+  return vertices;
+}
+
+
+template<int dim, int spacedim>
+Point<dim-1>
+Mapping<dim,spacedim>::
+project_real_point_to_unit_point_on_face (
+  const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+  const unsigned int &face_no,
+  const Point<spacedim> &p) const
+{
+  //The function doesn't make physical sense for dim=1
+  Assert(dim>1, ExcNotImplemented());
+  //Not implemented for higher dimensions
+  Assert(dim<=3, ExcNotImplemented());
+
+  Point<dim> unit_cell_pt = transform_real_to_unit_cell(cell, p);
+
+  Point<dim-1> unit_face_pt;
+
+  if (dim==2)
+    {
+      if (GeometryInfo<dim>::unit_normal_direction[face_no] == 0)
+        unit_face_pt = Point<dim-1>(unit_cell_pt(1));
+      else if (GeometryInfo<dim>::unit_normal_direction[face_no] == 1)
+        unit_face_pt = Point<dim-1>(unit_cell_pt(0));
+    }
+  else if (dim==3)
+    {
+      if (GeometryInfo<dim>::unit_normal_direction[face_no] == 0)
+        unit_face_pt = Point<dim-1>(unit_cell_pt(1), unit_cell_pt(2));
+      else if (GeometryInfo<dim>::unit_normal_direction[face_no] == 1)
+        unit_face_pt = Point<dim-1>(unit_cell_pt(0), unit_cell_pt(2));
+      else if (GeometryInfo<dim>::unit_normal_direction[face_no] == 2)
+        unit_face_pt = Point<dim-1>(unit_cell_pt(0), unit_cell_pt(1));
+    }
+
+  return unit_face_pt;
+}
+
+/*------------------------------ InternalDataBase ------------------------------*/
+
+
+template <int dim, int spacedim>
+Mapping<dim, spacedim>::InternalDataBase::InternalDataBase ():
+  update_each(update_default)
+{}
+
+
+
+template <int dim, int spacedim>
+Mapping<dim, spacedim>::InternalDataBase::~InternalDataBase ()
+{}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+Mapping<dim, spacedim>::InternalDataBase::memory_consumption () const
+{
+  return sizeof(*this);
+}
+
+
+/*------------------------------ InternalData ------------------------------*/
+
+
+
+// explicit instantiations
+#include "mapping.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping.inst.in b/source/fe/mapping.inst.in
new file mode 100644
index 0000000..adee71b
--- /dev/null
+++ b/source/fe/mapping.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class Mapping<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/fe/mapping_c1.cc b/source/fe/mapping_c1.cc
new file mode 100644
index 0000000..d161417
--- /dev/null
+++ b/source/fe/mapping_c1.cc
@@ -0,0 +1,221 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/fe/mapping_c1.h>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <int dim, int spacedim>
+MappingC1<dim,spacedim>::MappingC1Generic::MappingC1Generic ()
+  :
+  MappingQGeneric<dim,spacedim> (3)
+{}
+
+
+
+template <int dim, int spacedim>
+MappingC1<dim,spacedim>::MappingC1 ()
+  :
+  MappingQ<dim,spacedim> (3)
+{
+  Assert (dim > 1, ExcImpossibleInDim(dim));
+
+  // replace the mapping_qp objects of the base class by something
+  // that knows about generating data points based on the geometry
+  //
+  // we only need to replace the Qp mapping because that's the one that's
+  // used on boundary cells where it matters
+  this->qp_mapping.reset (new MappingC1<dim,spacedim>::MappingC1Generic());
+}
+
+
+
+template <>
+void
+MappingC1<1>::MappingC1Generic::add_line_support_points (const Triangulation<1>::cell_iterator &,
+                                                         std::vector<Point<1> > &) const
+{
+  const unsigned int dim = 1;
+  (void)dim;
+  Assert (dim > 1, ExcImpossibleInDim(dim));
+}
+
+
+
+template <>
+void
+MappingC1<2>::MappingC1Generic::add_line_support_points (const Triangulation<2>::cell_iterator &cell,
+                                                         std::vector<Point<2> > &a) const
+{
+  const unsigned int dim = 2;
+  std::vector<Point<dim> > line_points (2);
+
+  // loop over each of the lines, and if it is at the boundary, then first get
+  // the boundary description and second compute the points on it. if not at
+  // the boundary, get the respective points from another function
+  for (unsigned int line_no=0; line_no<GeometryInfo<dim>::lines_per_cell; ++line_no)
+    {
+      const Triangulation<dim>::line_iterator line = cell->line(line_no);
+
+      if (line->at_boundary())
+        {
+          // first get the normal vectors at the two vertices of this line
+          // from the boundary description
+          const Boundary<dim> &boundary
+            = line->get_triangulation().get_boundary(line->boundary_id());
+
+          Boundary<dim>::FaceVertexNormals face_vertex_normals;
+          boundary.get_normals_at_vertices (line, face_vertex_normals);
+
+          // then transform them into interpolation points for a cubic
+          // polynomial
+          //
+          // for this, note that if we describe the boundary curve as a
+          // polynomial in tangential coordinate @p{t=0..1} (along the line)
+          // and @p{s} in normal direction, then the cubic mapping is such
+          // that @p{s = a*t**3 + b*t**2 + c*t + d}, and we want to determine
+          // the interpolation points at @p{t=0.276} and @p{t=0.724}
+          // (Gauss-Lobatto points). Since at @p{t=0,1} we want a vertex which
+          // is actually at the boundary, we know that @p{d=0} and @p{a=-b-c},
+          // which gives @p{s(0.276)} and @p{s(0.726)} in terms of @p{b,c}. As
+          // side-conditions, we want that the derivatives at @p{t=0} and
+          // @p{t=1}, i.e. at the vertices match those returned by the
+          // boundary.
+          //
+          // The task is then first to determine the coefficients from the
+          // tangentials. for that, first rotate the tangents of @p{s(t)} into
+          // the global coordinate system. they are @p{A (1,c)} and @p{A
+          // (1,-b-2c)} with @p{A} the rotation matrix, since the tangentials
+          // in the coordinate system relative to the line are @p{(1,c)} and
+          // @p{(1,-b-2c)} at the two vertices, respectively. We then have to
+          // make sure by matching @p{b,c} that these tangentials are
+          // orthogonal to the normals returned by the boundary object
+          const Tensor<1,2> coordinate_vector = line->vertex(1) - line->vertex(0);
+          const double      h                 = std::sqrt(coordinate_vector * coordinate_vector);
+          Tensor<1,2> coordinate_axis = coordinate_vector;
+          coordinate_axis /= h;
+
+          const double alpha = std::atan2(coordinate_axis[1], coordinate_axis[0]);
+          const double c = -((face_vertex_normals[0][1] * std::sin(alpha)
+                              +face_vertex_normals[0][0] * std::cos(alpha)) /
+                             (face_vertex_normals[0][1] * std::cos(alpha)
+                              -face_vertex_normals[0][0] * std::sin(alpha)));
+          const double b = ((face_vertex_normals[1][1] * std::sin(alpha)
+                             +face_vertex_normals[1][0] * std::cos(alpha)) /
+                            (face_vertex_normals[1][1] * std::cos(alpha)
+                             -face_vertex_normals[1][0] * std::sin(alpha)))
+                           -2*c;
+
+          QGaussLobatto<1> quad_points(4);
+          const double t1 = quad_points.point(1)[0];
+          const double t2 = quad_points.point(2)[0];
+          const double s_t1 = (((-b-c)*t1+b)*t1+c)*t1;
+          const double s_t2 = (((-b-c)*t2+b)*t2+c)*t2;
+
+          // next evaluate the so determined cubic polynomial at the points
+          // 1/3 and 2/3, first in unit coordinates
+          const Point<2> new_unit_points[2] = { Point<2>(t1, s_t1),
+                                                Point<2>(t2, s_t2)
+                                              };
+          // then transform these points to real coordinates by rotating,
+          // scaling and shifting
+          for (unsigned int i=0; i<2; ++i)
+            {
+              Point<2> real_point (std::cos(alpha) * new_unit_points[i][0]
+                                   - std::sin(alpha) * new_unit_points[i][1],
+                                   std::sin(alpha) * new_unit_points[i][0]
+                                   + std::cos(alpha) * new_unit_points[i][1]);
+              real_point *= h;
+              real_point += line->vertex(0);
+              a.push_back (real_point);
+            };
+        }
+      else
+        // not at boundary
+        {
+          static const StraightBoundary<dim> straight_boundary;
+          straight_boundary.get_intermediate_points_on_line (line, line_points);
+          a.insert (a.end(), line_points.begin(), line_points.end());
+        }
+    }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingC1<dim,spacedim>::MappingC1Generic::add_line_support_points (const typename Triangulation<dim>::cell_iterator &,
+    std::vector<Point<dim> > &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <>
+void
+MappingC1<1>::MappingC1Generic::add_quad_support_points (const Triangulation<1>::cell_iterator &,
+                                                         std::vector<Point<1> > &) const
+{
+  const unsigned int dim = 1;
+  (void)dim;
+  Assert (dim > 2, ExcImpossibleInDim(dim));
+}
+
+
+
+template <>
+void
+MappingC1<2>::MappingC1Generic::add_quad_support_points (const Triangulation<2>::cell_iterator &,
+                                                         std::vector<Point<2> > &) const
+{
+  const unsigned int dim = 2;
+  (void)dim;
+  Assert (dim > 2, ExcImpossibleInDim(dim));
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingC1<dim,spacedim>::MappingC1Generic::add_quad_support_points (const typename Triangulation<dim>::cell_iterator &,
+    std::vector<Point<dim> > &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template<int dim, int spacedim>
+Mapping<dim, spacedim> *
+MappingC1<dim,spacedim>::clone () const
+{
+  return new MappingC1<dim,spacedim>();
+}
+
+
+
+
+// explicit instantiations
+#include "mapping_c1.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_c1.inst.in b/source/fe/mapping_c1.inst.in
new file mode 100644
index 0000000..0760bad
--- /dev/null
+++ b/source/fe/mapping_c1.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class MappingC1<deal_II_dimension>;
+  }
+
diff --git a/source/fe/mapping_cartesian.cc b/source/fe/mapping_cartesian.cc
new file mode 100644
index 0000000..5ffce23
--- /dev/null
+++ b/source/fe/mapping_cartesian.cc
@@ -0,0 +1,1075 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/tensor.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping_cartesian.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <cmath>
+#include <algorithm>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<int dim, int spacedim>
+const unsigned int MappingCartesian<dim,spacedim>::invalid_face_number;
+
+
+
+template<int dim, int spacedim>
+MappingCartesian<dim, spacedim>::InternalData::InternalData (const Quadrature<dim> &q)
+  :
+  quadrature_points (q.get_points ())
+{}
+
+
+
+template<int dim, int spacedim>
+std::size_t
+MappingCartesian<dim, spacedim>::InternalData::memory_consumption () const
+{
+  return (Mapping<dim, spacedim>::InternalDataBase::memory_consumption() +
+          MemoryConsumption::memory_consumption (cell_extents) +
+          MemoryConsumption::memory_consumption (volume_element) +
+          MemoryConsumption::memory_consumption (quadrature_points));
+}
+
+
+
+template <int dim, int spacedim>
+bool
+MappingCartesian<dim,spacedim>::preserves_vertex_locations () const
+{
+  return true;
+}
+
+
+
+template<int dim, int spacedim>
+UpdateFlags
+MappingCartesian<dim, spacedim>::requires_update_flags (const UpdateFlags in) const
+{
+  // this mapping is pretty simple in that it can basically compute
+  // every piece of information wanted by FEValues without requiring
+  // computing any other quantities. boundary forms are one exception
+  // since they can be computed from the normal vectors without much
+  // further ado
+  UpdateFlags out = in;
+  if (out & update_boundary_forms)
+    out |= update_normal_vectors;
+
+  return out;
+}
+
+
+
+template<int dim, int spacedim>
+typename Mapping<dim, spacedim>::InternalDataBase *
+MappingCartesian<dim, spacedim>::get_data (const UpdateFlags      update_flags,
+                                           const Quadrature<dim> &q) const
+{
+  InternalData *data = new InternalData (q);
+
+  // store the flags in the internal data object so we can access them
+  // in fill_fe_*_values(). use the transitive hull of the required
+  // flags
+  data->update_each = requires_update_flags(update_flags);
+
+  return data;
+}
+
+
+
+template<int dim, int spacedim>
+typename Mapping<dim, spacedim>::InternalDataBase *
+MappingCartesian<dim, spacedim>::get_face_data (const UpdateFlags update_flags,
+                                                const Quadrature<dim-1>& quadrature) const
+{
+  InternalData *data
+    = new InternalData (QProjector<dim>::project_to_all_faces(quadrature));
+
+  // verify that we have computed the transitive hull of the required
+  // flags and that FEValues has faithfully passed them on to us
+  Assert (update_flags == requires_update_flags (update_flags),
+          ExcInternalError());
+
+  // store the flags in the internal data object so we can access them
+  // in fill_fe_*_values()
+  data->update_each = update_flags;
+
+  return data;
+}
+
+
+
+template<int dim, int spacedim>
+typename Mapping<dim, spacedim>::InternalDataBase *
+MappingCartesian<dim, spacedim>::get_subface_data (const UpdateFlags update_flags,
+                                                   const Quadrature<dim-1> &quadrature) const
+{
+  InternalData *data
+    = new InternalData (QProjector<dim>::project_to_all_subfaces(quadrature));
+
+  // verify that we have computed the transitive hull of the required
+  // flags and that FEValues has faithfully passed them on to us
+  Assert (update_flags == requires_update_flags (update_flags),
+          ExcInternalError());
+
+  // store the flags in the internal data object so we can access them
+  // in fill_fe_*_values()
+  data->update_each = update_flags;
+
+  return data;
+}
+
+
+
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim, spacedim>::compute_fill (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                               const unsigned int        face_no,
+                                               const unsigned int        sub_no,
+                                               const CellSimilarity::Similarity cell_similarity,
+                                               const InternalData             &data,
+                                               std::vector<Point<dim> > &quadrature_points,
+                                               std::vector<Tensor<1,dim> > &normal_vectors) const
+{
+  const UpdateFlags update_flags = data.update_each;
+
+  // some more sanity checks
+  if (face_no != invalid_face_number)
+    {
+      // Add 1 on both sides of
+      // assertion to avoid compiler
+      // warning about testing
+      // unsigned int < 0 in 1d.
+      Assert (face_no+1 < GeometryInfo<dim>::faces_per_cell+1,
+              ExcIndexRange (face_no, 0, GeometryInfo<dim>::faces_per_cell));
+
+      // We would like to check for
+      // sub_no < cell->face(face_no)->n_children(),
+      // but unfortunately the current
+      // function is also called for
+      // faces without children (see
+      // tests/fe/mapping.cc). Therefore,
+      // we must use following workaround
+      // of two separate assertions
+      Assert ((sub_no == invalid_face_number) ||
+              cell->face(face_no)->has_children() ||
+              (sub_no+1 < GeometryInfo<dim>::max_children_per_face+1),
+              ExcIndexRange (sub_no, 0,
+                             GeometryInfo<dim>::max_children_per_face));
+      Assert ((sub_no == invalid_face_number) ||
+              !cell->face(face_no)->has_children() ||
+              (sub_no < cell->face(face_no)->n_children()),
+              ExcIndexRange (sub_no, 0, cell->face(face_no)->n_children()));
+    }
+  else
+    // invalid face number, so
+    // subface should be invalid as
+    // well
+    Assert (sub_no == invalid_face_number, ExcInternalError());
+
+  // let @p{start} be the origin of a
+  // local coordinate system. it is
+  // chosen as the (lower) left
+  // vertex
+  const Point<dim> start = cell->vertex(0);
+
+  // Compute start point and sizes
+  // along axes.  Strange vertex
+  // numbering makes this complicated
+  // again.
+  if (cell_similarity != CellSimilarity::translation)
+    {
+      switch (dim)
+        {
+        case 1:
+          data.cell_extents[0] = cell->vertex(1)(0) - start(0);
+          break;
+        case 2:
+          data.cell_extents[0] = cell->vertex(1)(0) - start(0);
+          data.cell_extents[1] = cell->vertex(2)(1) - start(1);
+          break;
+        case 3:
+          data.cell_extents[0] = cell->vertex(1)(0) - start(0);
+          data.cell_extents[1] = cell->vertex(2)(1) - start(1);
+          data.cell_extents[2] = cell->vertex(4)(2) - start(2);
+          break;
+        default:
+          Assert(false, ExcNotImplemented());
+        }
+    }
+
+
+  // transform quadrature point. this
+  // is obtained simply by scaling
+  // unit coordinates with lengths in
+  // each direction
+  if (update_flags & update_quadrature_points)
+    {
+      const typename QProjector<dim>::DataSetDescriptor offset
+        = (face_no == invalid_face_number
+           ?
+           QProjector<dim>::DataSetDescriptor::cell()
+           :
+           (sub_no == invalid_face_number
+            ?
+            // called from FEFaceValues
+            QProjector<dim>::DataSetDescriptor::face (face_no,
+                                                      cell->face_orientation(face_no),
+                                                      cell->face_flip(face_no),
+                                                      cell->face_rotation(face_no),
+                                                      quadrature_points.size())
+            :
+            // called from FESubfaceValues
+            QProjector<dim>::DataSetDescriptor::subface (face_no, sub_no,
+                                                         cell->face_orientation(face_no),
+                                                         cell->face_flip(face_no),
+                                                         cell->face_rotation(face_no),
+                                                         quadrature_points.size(),
+                                                         cell->subface_case(face_no))
+           ));
+
+      for (unsigned int i=0; i<quadrature_points.size(); ++i)
+        {
+          quadrature_points[i] = start;
+          for (unsigned int d=0; d<dim; ++d)
+            quadrature_points[i](d) += data.cell_extents[d] *
+                                       data.quadrature_points[i+offset](d);
+        }
+    }
+
+
+  // compute normal vectors. since
+  // cells are aligned to coordinate
+  // axes, they are simply vectors
+  // with exactly one entry equal to
+  // 1 or -1. Furthermore, all
+  // normals on a face have the same
+  // value
+  if (update_flags & update_normal_vectors)
+    {
+      Assert (face_no < GeometryInfo<dim>::faces_per_cell,
+              ExcInternalError());
+
+      switch (dim)
+        {
+        case 1:
+        {
+          static const Point<dim>
+          normals[GeometryInfo<1>::faces_per_cell]
+            = { Point<dim>(-1.),
+                Point<dim>( 1.)
+              };
+          std::fill (normal_vectors.begin(),
+                     normal_vectors.end(),
+                     normals[face_no]);
+          break;
+        }
+
+        case 2:
+        {
+          static const Point<dim>
+          normals[GeometryInfo<2>::faces_per_cell]
+            = { Point<dim>(-1, 0),
+                Point<dim>( 1, 0),
+                Point<dim>( 0,-1),
+                Point<dim>( 0, 1)
+              };
+          std::fill (normal_vectors.begin(),
+                     normal_vectors.end(),
+                     normals[face_no]);
+          break;
+        }
+
+        case 3:
+        {
+          static const Point<dim>
+          normals[GeometryInfo<3>::faces_per_cell]
+            = { Point<dim>(-1, 0, 0),
+                Point<dim>( 1, 0, 0),
+                Point<dim>( 0,-1, 0),
+                Point<dim>( 0, 1, 0),
+                Point<dim>( 0, 0,-1),
+                Point<dim>( 0, 0, 1)
+              };
+          std::fill (normal_vectors.begin(),
+                     normal_vectors.end(),
+                     normals[face_no]);
+          break;
+        }
+
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+    }
+}
+
+
+
+template<int dim, int spacedim>
+CellSimilarity::Similarity
+MappingCartesian<dim, spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<dim>                                     &quadrature,
+                const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with
+  // an exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0, ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  std::vector<Tensor<1,dim> > dummy;
+
+  compute_fill (cell, invalid_face_number, invalid_face_number, cell_similarity,
+                data,
+                output_data.quadrature_points,
+                dummy);
+
+  // compute Jacobian determinant. all values are equal and are the
+  // product of the local lengths in each coordinate direction
+  if (data.update_each & (update_JxW_values | update_volume_elements))
+    if (cell_similarity != CellSimilarity::translation)
+      {
+        double J = data.cell_extents[0];
+        for (unsigned int d=1; d<dim; ++d)
+          J *= data.cell_extents[d];
+        data.volume_element = J;
+        if (data.update_each & update_JxW_values)
+          for (unsigned int i=0; i<output_data.JxW_values.size(); ++i)
+            output_data.JxW_values[i] = J * quadrature.weight(i);
+      }
+  // "compute" Jacobian at the quadrature points, which are all the
+  // same
+  if (data.update_each & update_jacobians)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.jacobians.size(); ++i)
+        {
+          output_data.jacobians[i] =  DerivativeForm<1,dim,spacedim>();
+          for (unsigned int j=0; j<dim; ++j)
+            output_data.jacobians[i][j][j] = data.cell_extents[j];
+        }
+  // "compute" the derivative of the Jacobian at the quadrature
+  // points, which are all zero of course
+  if (data.update_each & update_jacobian_grads)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.jacobian_grads.size(); ++i)
+        output_data.jacobian_grads[i] = DerivativeForm<2,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_grads)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.jacobian_pushed_forward_grads.size(); ++i)
+        output_data.jacobian_pushed_forward_grads[i] = Tensor<3,spacedim>();
+
+  // "compute" the hessian of the Jacobian at the quadrature points,
+  // which are all also zero of course
+  if (data.update_each & update_jacobian_2nd_derivatives)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.jacobian_2nd_derivatives.size(); ++i)
+        output_data.jacobian_2nd_derivatives[i] = DerivativeForm<3,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_2nd_derivatives)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.jacobian_pushed_forward_2nd_derivatives.size(); ++i)
+        output_data.jacobian_pushed_forward_2nd_derivatives[i] = Tensor<4,spacedim>();
+
+  if (data.update_each & update_jacobian_3rd_derivatives)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.jacobian_3rd_derivatives.size(); ++i)
+        output_data.jacobian_3rd_derivatives[i] = DerivativeForm<4,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_3rd_derivatives)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.jacobian_pushed_forward_3rd_derivatives.size(); ++i)
+        output_data.jacobian_pushed_forward_3rd_derivatives[i] = Tensor<5,spacedim>();
+
+  // "compute" inverse Jacobian at the quadrature points, which are
+  // all the same
+  if (data.update_each & update_inverse_jacobians)
+    if (cell_similarity != CellSimilarity::translation)
+      for (unsigned int i=0; i<output_data.inverse_jacobians.size(); ++i)
+        {
+          output_data.inverse_jacobians[i] = Tensor<2,dim>();
+          for (unsigned int j=0; j<dim; ++j)
+            output_data.inverse_jacobians[j][j]=1./data.cell_extents[j];
+        }
+
+  return cell_similarity;
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim, spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                     const unsigned int                                         face_no,
+                     const Quadrature<dim-1>                                   &quadrature,
+                     const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                     internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal
+  // data for this class. fails with
+  // an exception if that is not
+  // possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  compute_fill (cell, face_no, invalid_face_number,
+                CellSimilarity::none,
+                data,
+                output_data.quadrature_points,
+                output_data.normal_vectors);
+
+  // first compute Jacobian determinant, which is simply the product
+  // of the local lengths since the jacobian is diagonal
+  double J = 1.;
+  for (unsigned int d=0; d<dim; ++d)
+    if (d != GeometryInfo<dim>::unit_normal_direction[face_no])
+      J *= data.cell_extents[d];
+
+  if (data.update_each & update_JxW_values)
+    for (unsigned int i=0; i<output_data.JxW_values.size(); ++i)
+      output_data.JxW_values[i] = J * quadrature.weight(i);
+
+  if (data.update_each & update_boundary_forms)
+    for (unsigned int i=0; i<output_data.boundary_forms.size(); ++i)
+      output_data.boundary_forms[i] = J * output_data.normal_vectors[i];
+
+  if (data.update_each & update_volume_elements)
+    {
+      J = data.cell_extents[0];
+      for (unsigned int d=1; d<dim; ++d)
+        J *= data.cell_extents[d];
+      data.volume_element = J;
+    }
+
+  if (data.update_each & update_jacobians)
+    for (unsigned int i=0; i<output_data.jacobians.size(); ++i)
+      {
+        output_data.jacobians[i] = DerivativeForm<1,dim,spacedim>();
+        for (unsigned int d=0; d<dim; ++d)
+          output_data.jacobians[i][d][d] = data.cell_extents[d];
+      }
+
+  if (data.update_each & update_jacobian_grads)
+    for (unsigned int i=0; i<output_data.jacobian_grads.size(); ++i)
+      output_data.jacobian_grads[i] = DerivativeForm<2,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_grads)
+    for (unsigned int i=0; i<output_data.jacobian_pushed_forward_grads.size(); ++i)
+      output_data.jacobian_pushed_forward_grads[i] = Tensor<3,spacedim>();
+
+  if (data.update_each & update_jacobian_2nd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_2nd_derivatives.size(); ++i)
+      output_data.jacobian_2nd_derivatives[i] = DerivativeForm<3,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_2nd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_pushed_forward_2nd_derivatives.size(); ++i)
+      output_data.jacobian_pushed_forward_2nd_derivatives[i] = Tensor<4,spacedim>();
+
+  if (data.update_each & update_jacobian_3rd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_3rd_derivatives.size(); ++i)
+      output_data.jacobian_3rd_derivatives[i] = DerivativeForm<4,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_3rd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_pushed_forward_3rd_derivatives.size(); ++i)
+      output_data.jacobian_pushed_forward_3rd_derivatives[i] = Tensor<5,spacedim>();
+
+  if (data.update_each & update_inverse_jacobians)
+    for (unsigned int i=0; i<output_data.inverse_jacobians.size(); ++i)
+      {
+        output_data.inverse_jacobians[i] = DerivativeForm<1,dim,spacedim>();
+        for (unsigned int d=0; d<dim; ++d)
+          output_data.inverse_jacobians[i][d][d] = 1./data.cell_extents[d];
+      }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim, spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                        const unsigned int                                         face_no,
+                        const unsigned int                                         subface_no,
+                        const Quadrature<dim-1>                                   &quadrature,
+                        const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                        internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with
+  // an exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0, ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  compute_fill (cell, face_no, subface_no, CellSimilarity::none,
+                data,
+                output_data.quadrature_points,
+                output_data.normal_vectors);
+
+  // first compute Jacobian determinant, which is simply the product
+  // of the local lengths since the jacobian is diagonal
+  double J = 1.;
+  for (unsigned int d=0; d<dim; ++d)
+    if (d != GeometryInfo<dim>::unit_normal_direction[face_no])
+      J *= data.cell_extents[d];
+
+  if (data.update_each & update_JxW_values)
+    {
+      // Here, cell->face(face_no)->n_children() would be the right
+      // choice, but unfortunately the current function is also called
+      // for faces without children (see tests/fe/mapping.cc). Add
+      // following switch to avoid diffs in tests/fe/mapping.OK
+      const unsigned int n_subfaces=
+        cell->face(face_no)->has_children() ?
+        cell->face(face_no)->n_children() :
+        GeometryInfo<dim>::max_children_per_face;
+      for (unsigned int i=0; i<output_data.JxW_values.size(); ++i)
+        output_data.JxW_values[i] = J * quadrature.weight(i) / n_subfaces;
+    }
+
+  if (data.update_each & update_boundary_forms)
+    for (unsigned int i=0; i<output_data.boundary_forms.size(); ++i)
+      output_data.boundary_forms[i] = J * output_data.normal_vectors[i];
+
+  if (data.update_each & update_volume_elements)
+    {
+      J = data.cell_extents[0];
+      for (unsigned int d=1; d<dim; ++d)
+        J *= data.cell_extents[d];
+      data.volume_element = J;
+    }
+
+  if (data.update_each & update_jacobians)
+    for (unsigned int i=0; i<output_data.jacobians.size(); ++i)
+      {
+        output_data.jacobians[i] = DerivativeForm<1,dim,spacedim>();
+        for (unsigned int d=0; d<dim; ++d)
+          output_data.jacobians[i][d][d] = data.cell_extents[d];
+      }
+
+  if (data.update_each & update_jacobian_grads)
+    for (unsigned int i=0; i<output_data.jacobian_grads.size(); ++i)
+      output_data.jacobian_grads[i] = DerivativeForm<2,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_grads)
+    for (unsigned int i=0; i<output_data.jacobian_pushed_forward_grads.size(); ++i)
+      output_data.jacobian_pushed_forward_grads[i] = Tensor<3,spacedim>();
+
+  if (data.update_each & update_jacobian_2nd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_2nd_derivatives.size(); ++i)
+      output_data.jacobian_2nd_derivatives[i] = DerivativeForm<3,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_2nd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_pushed_forward_2nd_derivatives.size(); ++i)
+      output_data.jacobian_pushed_forward_2nd_derivatives[i] = Tensor<4,spacedim>();
+
+  if (data.update_each & update_jacobian_3rd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_3rd_derivatives.size(); ++i)
+      output_data.jacobian_3rd_derivatives[i] = DerivativeForm<4,dim,spacedim>();
+
+  if (data.update_each & update_jacobian_pushed_forward_3rd_derivatives)
+    for (unsigned int i=0; i<output_data.jacobian_pushed_forward_3rd_derivatives.size(); ++i)
+      output_data.jacobian_pushed_forward_3rd_derivatives[i] = Tensor<5,spacedim>();
+
+  if (data.update_each & update_inverse_jacobians)
+    for (unsigned int i=0; i<output_data.inverse_jacobians.size(); ++i)
+      {
+        output_data.inverse_jacobians[i] = DerivativeForm<1,spacedim,dim>();
+        for (unsigned int d=0; d<dim; ++d)
+          output_data.inverse_jacobians[i][d][d] = 1./data.cell_extents[d];
+      }
+}
+
+
+
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim,spacedim>::
+transform (const ArrayView<const Tensor<1,dim> >                  &input,
+           const MappingType                                       mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<1,spacedim> >                   &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert (dynamic_cast<const InternalData *>(&mapping_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (mapping_data);
+
+  switch (mapping_type)
+    {
+    case mapping_covariant:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d=0; d<dim; ++d)
+          output[i][d] = input[i][d]/data.cell_extents[d];
+      return;
+    }
+
+    case mapping_contravariant:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d=0; d<dim; ++d)
+          output[i][d] = input[i][d]*data.cell_extents[d];
+      return;
+    }
+    case mapping_piola:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+      Assert (data.update_each & update_volume_elements,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d=0; d<dim; ++d)
+          output[i][d] = input[i][d] * data.cell_extents[d] / data.volume_element;
+      return;
+    }
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim,spacedim>::
+transform (const ArrayView<const DerivativeForm<1, dim,spacedim> > &input,
+           const MappingType                                        mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase  &mapping_data,
+           const ArrayView<Tensor<2,spacedim> >                    &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert (dynamic_cast<const InternalData *>(&mapping_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &>(mapping_data);
+
+  switch (mapping_type)
+    {
+    case mapping_covariant:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] / data.cell_extents[d2];
+      return;
+    }
+
+    case mapping_contravariant:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2];
+      return;
+    }
+
+    case mapping_covariant_gradient:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] / data.cell_extents[d2] / data.cell_extents[d1];
+      return;
+    }
+
+    case mapping_contravariant_gradient:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2] / data.cell_extents[d1];
+      return;
+    }
+
+    case mapping_piola:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+      Assert (data.update_each & update_volume_elements,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2]
+                                / data.volume_element;
+      return;
+    }
+
+    case mapping_piola_gradient:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+      Assert (data.update_each & update_volume_elements,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2]
+                                / data.cell_extents[d1] / data.volume_element;
+      return;
+    }
+
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim,spacedim>::
+transform (const ArrayView<const Tensor<2, dim> >                 &input,
+           const MappingType                                       mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<2, spacedim> >                  &output) const
+{
+
+  AssertDimension (input.size(), output.size());
+  Assert (dynamic_cast<const InternalData *>(&mapping_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &>(mapping_data);
+
+  switch (mapping_type)
+    {
+    case mapping_covariant:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] / data.cell_extents[d2];
+      return;
+    }
+
+    case mapping_contravariant:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2];
+      return;
+    }
+
+    case mapping_covariant_gradient:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] / data.cell_extents[d2] / data.cell_extents[d1];
+      return;
+    }
+
+    case mapping_contravariant_gradient:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2] / data.cell_extents[d1];
+      return;
+    }
+
+    case mapping_piola:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+      Assert (data.update_each & update_volume_elements,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2]
+                                / data.volume_element;
+      return;
+    }
+
+    case mapping_piola_gradient:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+      Assert (data.update_each & update_volume_elements,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+
+      for (unsigned int i=0; i<output.size(); ++i)
+        for (unsigned int d1=0; d1<dim; ++d1)
+          for (unsigned int d2=0; d2<dim; ++d2)
+            output[i][d1][d2] = input[i][d1][d2] * data.cell_extents[d2]
+                                / data.cell_extents[d1] / data.volume_element;
+      return;
+    }
+
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+
+}
+
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim,spacedim>::
+transform (const ArrayView<const  DerivativeForm<2, dim, spacedim> > &input,
+           const MappingType                                          mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase    &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                      &output) const
+{
+
+  AssertDimension (input.size(), output.size());
+  Assert (dynamic_cast<const InternalData *>(&mapping_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &>(mapping_data);
+
+  switch (mapping_type)
+    {
+    case mapping_covariant_gradient:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int q=0; q<output.size(); ++q)
+        for (unsigned int i=0; i<spacedim; ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            for (unsigned int k=0; k<spacedim; ++k)
+              {
+
+                output[q][i][j][k] = input[q][i][j][k] / data.cell_extents[j] / data.cell_extents[k];
+
+              }
+      return;
+    }
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+
+}
+
+template<int dim, int spacedim>
+void
+MappingCartesian<dim,spacedim>::
+transform (const ArrayView<const  Tensor<3,dim> >                 &input,
+           const MappingType                                       mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                   &output) const
+{
+
+  AssertDimension (input.size(), output.size());
+  Assert (dynamic_cast<const InternalData *>(&mapping_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &>(mapping_data);
+
+  switch (mapping_type)
+    {
+    case mapping_contravariant_hessian:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+      for (unsigned int q=0; q<output.size(); ++q)
+        for (unsigned int i=0; i<spacedim; ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            for (unsigned int k=0; k<spacedim; ++k)
+              {
+                output[q][i][j][k] =    input[q][i][j][k]
+                                        * data.cell_extents[i]
+                                        / data.cell_extents[j]
+                                        / data.cell_extents[k];
+              }
+      return;
+    }
+
+    case mapping_covariant_hessian:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int q=0; q<output.size(); ++q)
+        for (unsigned int i=0; i<spacedim; ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            for (unsigned int k=0; k<spacedim; ++k)
+              {
+                output[q][i][j][k] =    input[q][i][j][k]
+                                        / data.cell_extents[i]
+                                        / data.cell_extents[j]
+                                        / data.cell_extents[k];
+              }
+
+      return;
+    }
+
+    case mapping_piola_hessian:
+    {
+      Assert (data.update_each & update_covariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+      Assert (data.update_each & update_volume_elements,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+
+      for (unsigned int q=0; q<output.size(); ++q)
+        for (unsigned int i=0; i<spacedim; ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            for (unsigned int k=0; k<spacedim; ++k)
+              {
+                output[q][i][j][k] =    input[q][i][j][k]
+                                        * data.cell_extents[i]
+                                        / data.volume_element
+                                        / data.cell_extents[j]
+                                        / data.cell_extents[k];
+              }
+
+      return;
+    }
+
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+MappingCartesian<dim, spacedim>::transform_unit_to_real_cell (
+  const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+  const Point<dim>                                 &p) const
+{
+  Tensor<1,dim> length;
+  const Point<dim> start = cell->vertex(0);
+  switch (dim)
+    {
+    case 1:
+      length[0] = cell->vertex(1)(0) - start(0);
+      break;
+    case 2:
+      length[0] = cell->vertex(1)(0) - start(0);
+      length[1] = cell->vertex(2)(1) - start(1);
+      break;
+    case 3:
+      length[0] = cell->vertex(1)(0) - start(0);
+      length[1] = cell->vertex(2)(1) - start(1);
+      length[2] = cell->vertex(4)(2) - start(2);
+      break;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+
+  Point<dim> p_real = cell->vertex(0);
+  for (unsigned int d=0; d<dim; ++d)
+    p_real(d) += length[d]*p(d);
+
+  return p_real;
+}
+
+
+
+template<int dim, int spacedim>
+Point<dim>
+MappingCartesian<dim, spacedim>::transform_real_to_unit_cell (
+  const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+  const Point<spacedim>                            &p) const
+{
+
+  if (dim != spacedim)
+    Assert(false, ExcNotImplemented());
+  const Point<dim> &start = cell->vertex(0);
+  Point<dim> real = p;
+  real -= start;
+
+  switch (dim)
+    {
+    case 1:
+      real(0) /= cell->vertex(1)(0) - start(0);
+      break;
+    case 2:
+      real(0) /= cell->vertex(1)(0) - start(0);
+      real(1) /= cell->vertex(2)(1) - start(1);
+      break;
+    case 3:
+      real(0) /= cell->vertex(1)(0) - start(0);
+      real(1) /= cell->vertex(2)(1) - start(1);
+      real(2) /= cell->vertex(4)(2) - start(2);
+      break;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+  return real;
+}
+
+
+template<int dim, int spacedim>
+Mapping<dim, spacedim> *
+MappingCartesian<dim, spacedim>::clone () const
+{
+  return new MappingCartesian<dim, spacedim>(*this);
+}
+
+
+//---------------------------------------------------------------------------
+// explicit instantiations
+#include "mapping_cartesian.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_cartesian.inst.in b/source/fe/mapping_cartesian.inst.in
new file mode 100644
index 0000000..c7c042e
--- /dev/null
+++ b/source/fe/mapping_cartesian.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class MappingCartesian<deal_II_dimension>;
+  }
+
diff --git a/source/fe/mapping_fe_field.cc b/source/fe/mapping_fe_field.cc
new file mode 100644
index 0000000..54b2da4
--- /dev/null
+++ b/source/fe/mapping_fe_field.cc
@@ -0,0 +1,1965 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/mapping_fe_field.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/mapping.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/numerics/vector_tools.h>
+
+#include <numeric>
+#include <fstream>
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData::InternalData
+(const FiniteElement<dim,spacedim> &fe,
+ const ComponentMask                mask)
+  :
+  n_shape_functions (fe.dofs_per_cell),
+  mask (mask),
+  local_dof_indices(fe.dofs_per_cell),
+  local_dof_values(fe.dofs_per_cell)
+{}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+std::size_t
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData::memory_consumption () const
+{
+  Assert (false, ExcNotImplemented());
+  return 0;
+}
+
+
+
+template<int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+double &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::shape
+(const unsigned int qpoint,
+ const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_values.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_values.size()));
+  return shape_values [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template<int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+const Tensor<1,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_derivatives.size()));
+  return shape_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template<int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+Tensor<1,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_derivatives.size()));
+  return shape_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+const Tensor<2,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::second_derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_second_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_second_derivatives.size()));
+  return shape_second_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+Tensor<2,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::second_derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_second_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_second_derivatives.size()));
+  return shape_second_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+const Tensor<3,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::third_derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_third_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_third_derivatives.size()));
+  return shape_third_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+Tensor<3,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::third_derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_third_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_third_derivatives.size()));
+  return shape_third_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+const Tensor<4,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::fourth_derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_fourth_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_fourth_derivatives.size()));
+  return shape_fourth_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+Tensor<4,dim> &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::fourth_derivative
+(const unsigned int qpoint,
+ const unsigned int shape_nr)
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_fourth_derivatives.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_fourth_derivatives.size()));
+  return shape_fourth_derivatives [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::MappingFEField
+(const DoFHandlerType            &euler_dof_handler,
+ const VectorType    &euler_vector,
+ const ComponentMask  mask)
+  :
+  euler_vector(&euler_vector),
+  fe(&euler_dof_handler.get_fe()),
+  euler_dof_handler(&euler_dof_handler),
+  fe_mask(mask.size() ? mask :
+          ComponentMask(fe->get_nonzero_components(0).size(), true)),
+  fe_to_real(fe_mask.size(), numbers::invalid_unsigned_int)
+{
+  unsigned int size = 0;
+  for (unsigned int i=0; i<fe_mask.size(); ++i)
+    {
+      if (fe_mask[i])
+        fe_to_real[i] = size++;
+    }
+  AssertDimension(size,spacedim);
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::MappingFEField
+(const MappingFEField<dim,spacedim,VectorType,DoFHandlerType> &mapping)
+  :
+  euler_vector(mapping.euler_vector),
+  fe(mapping.fe),
+  euler_dof_handler(mapping.euler_dof_handler),
+  fe_mask(mapping.fe_mask),
+  fe_to_real(mapping.fe_to_real)
+{}
+
+
+
+template<int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+inline
+const double &
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::InternalData::shape
+(const unsigned int qpoint,
+ const unsigned int shape_nr) const
+{
+  Assert(qpoint*n_shape_functions + shape_nr < shape_values.size(),
+         ExcIndexRange(qpoint*n_shape_functions + shape_nr, 0,
+                       shape_values.size()));
+  return shape_values [qpoint*n_shape_functions + shape_nr];
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType, typename VectorType>
+bool
+MappingFEField<dim,spacedim,DoFHandlerType,VectorType>::preserves_vertex_locations () const
+{
+  return false;
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+compute_shapes_virtual (const std::vector<Point<dim> >                       &unit_points,
+                        typename MappingFEField<dim, spacedim>::InternalData &data) const
+{
+  const unsigned int n_points=unit_points.size();
+
+  for (unsigned int point=0; point<n_points; ++point)
+    {
+      if (data.shape_values.size()!=0)
+        for (unsigned int i=0; i<data.n_shape_functions; ++i)
+          data.shape(point, i) = fe->shape_value(i, unit_points[point]);
+
+      if (data.shape_derivatives.size()!=0)
+        for (unsigned int i=0; i<data.n_shape_functions; ++i)
+          data.derivative(point, i) = fe->shape_grad(i, unit_points[point]);
+
+      if (data.shape_second_derivatives.size()!=0)
+        for (unsigned int i=0; i<data.n_shape_functions; ++i)
+          data.second_derivative(point, i) = fe->shape_grad_grad(i, unit_points[point]);
+
+      if (data.shape_third_derivatives.size()!=0)
+        for (unsigned int i=0; i<data.n_shape_functions; ++i)
+          data.third_derivative(point, i) = fe->shape_3rd_derivative(i, unit_points[point]);
+
+      if (data.shape_fourth_derivatives.size()!=0)
+        for (unsigned int i=0; i<data.n_shape_functions; ++i)
+          data.fourth_derivative(point, i) = fe->shape_4th_derivative(i, unit_points[point]);
+    }
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+UpdateFlags
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::requires_update_flags (const UpdateFlags in) const
+{
+  // add flags if the respective quantities are necessary to compute
+  // what we need. note that some flags appear in both conditions and
+  // in subsequent set operations. this leads to some circular
+  // logic. the only way to treat this is to iterate. since there are
+  // 5 if-clauses in the loop, it will take at most 4 iterations to
+  // converge. do them:
+  UpdateFlags out = in;
+  for (unsigned int i=0; i<5; ++i)
+    {
+      // The following is a little incorrect:
+      // If not applied on a face,
+      // update_boundary_forms does not
+      // make sense. On the other hand,
+      // it is necessary on a
+      // face. Currently,
+      // update_boundary_forms is simply
+      // ignored for the interior of a
+      // cell.
+      if (out & (update_JxW_values
+                 | update_normal_vectors))
+        out |= update_boundary_forms;
+
+      if (out & (update_covariant_transformation
+                 | update_JxW_values
+                 | update_jacobians
+                 | update_jacobian_grads
+                 | update_boundary_forms
+                 | update_normal_vectors))
+        out |= update_contravariant_transformation;
+
+      if (out & (update_inverse_jacobians
+                 | update_jacobian_pushed_forward_grads
+                 | update_jacobian_pushed_forward_2nd_derivatives
+                 | update_jacobian_pushed_forward_3rd_derivatives) )
+        out |= update_covariant_transformation;
+
+      // The contravariant transformation
+      // is a Piola transformation, which
+      // requires the determinant of the
+      // Jacobi matrix of the transformation.
+      // Therefore these values have to be
+      // updated for each cell.
+      if (out & update_contravariant_transformation)
+        out |= update_JxW_values;
+
+      if (out & update_normal_vectors)
+        out |= update_JxW_values;
+    }
+
+  return out;
+}
+
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::compute_data
+(const UpdateFlags      update_flags,
+ const Quadrature<dim> &q,
+ const unsigned int     n_original_q_points,
+ InternalData          &data) const
+{
+  // store the flags in the internal data object so we can access them
+  // in fill_fe_*_values(). use the transitive hull of the required
+  // flags
+  data.update_each = requires_update_flags(update_flags);
+
+  const unsigned int n_q_points = q.size();
+
+  // see if we need the (transformation) shape function values
+  // and/or gradients and resize the necessary arrays
+  if (data.update_each & update_quadrature_points)
+    data.shape_values.resize(data.n_shape_functions * n_q_points);
+
+  if (data.update_each & (update_covariant_transformation
+                          | update_contravariant_transformation
+                          | update_JxW_values
+                          | update_boundary_forms
+                          | update_normal_vectors
+                          | update_jacobians
+                          | update_jacobian_grads
+                          | update_inverse_jacobians))
+    data.shape_derivatives.resize(data.n_shape_functions * n_q_points);
+
+  if (data.update_each & update_covariant_transformation)
+    data.covariant.resize(n_original_q_points);
+
+  if (data.update_each & update_contravariant_transformation)
+    data.contravariant.resize(n_original_q_points);
+
+  if (data.update_each & update_volume_elements)
+    data.volume_elements.resize(n_original_q_points);
+
+  if (data.update_each & (update_jacobian_grads | update_jacobian_pushed_forward_grads) )
+    data.shape_second_derivatives.resize(data.n_shape_functions * n_q_points);
+
+  if (data.update_each & (update_jacobian_2nd_derivatives | update_jacobian_pushed_forward_2nd_derivatives) )
+    data.shape_third_derivatives.resize(data.n_shape_functions * n_q_points);
+
+  if (data.update_each & (update_jacobian_3rd_derivatives | update_jacobian_pushed_forward_3rd_derivatives) )
+    data.shape_fourth_derivatives.resize(data.n_shape_functions * n_q_points);
+
+  compute_shapes_virtual (q.get_points(), data);
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::compute_face_data
+(const UpdateFlags      update_flags,
+ const Quadrature<dim> &q,
+ const unsigned int     n_original_q_points,
+ InternalData          &data) const
+{
+  compute_data (update_flags, q, n_original_q_points, data);
+
+  if (dim > 1)
+    {
+      if (data.update_each & update_boundary_forms)
+        {
+          data.aux.resize (dim-1, std::vector<Tensor<1,spacedim> > (n_original_q_points));
+
+          // Compute tangentials to the
+          // unit cell.
+          const unsigned int nfaces = GeometryInfo<dim>::faces_per_cell;
+          data.unit_tangentials.resize (nfaces*(dim-1),
+                                        std::vector<Tensor<1,dim> > (n_original_q_points));
+          if (dim==2)
+            {
+              // ensure a counterclockwise
+              // orientation of tangentials
+              static const int tangential_orientation[4]= {-1,1,1,-1};
+              for (unsigned int i=0; i<nfaces; ++i)
+                {
+                  Tensor<1,dim> tang;
+                  tang[1-i/2]=tangential_orientation[i];
+                  std::fill (data.unit_tangentials[i].begin(),
+                             data.unit_tangentials[i].end(), tang);
+                }
+            }
+          else if (dim==3)
+            {
+              for (unsigned int i=0; i<nfaces; ++i)
+                {
+                  Tensor<1,dim> tang1, tang2;
+
+                  const unsigned int nd=
+                    GeometryInfo<dim>::unit_normal_direction[i];
+
+                  // first tangential
+                  // vector in direction
+                  // of the (nd+1)%3 axis
+                  // and inverted in case
+                  // of unit inward normal
+                  tang1[(nd+1)%dim]=GeometryInfo<dim>::unit_normal_orientation[i];
+                  // second tangential
+                  // vector in direction
+                  // of the (nd+2)%3 axis
+                  tang2[(nd+2)%dim]=1.;
+
+                  // same unit tangents
+                  // for all quadrature
+                  // points on this face
+                  std::fill (data.unit_tangentials[i].begin(),
+                             data.unit_tangentials[i].end(), tang1);
+                  std::fill (data.unit_tangentials[nfaces+i].begin(),
+                             data.unit_tangentials[nfaces+i].end(), tang2);
+                }
+            }
+        }
+    }
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+typename
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData *
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::get_data (const UpdateFlags      update_flags,
+    const Quadrature<dim> &quadrature) const
+{
+  InternalData *data = new InternalData(*fe, fe_mask);
+  this->compute_data (update_flags, quadrature,
+                      quadrature.size(), *data);
+  return data;
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+typename Mapping<dim,spacedim>::InternalDataBase *
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::get_face_data
+(const UpdateFlags        update_flags,
+ const Quadrature<dim-1> &quadrature) const
+{
+  InternalData *data = new InternalData(*fe, fe_mask);
+  const Quadrature<dim> q (QProjector<dim>::project_to_all_faces(quadrature));
+  this->compute_face_data (update_flags, q,
+                           quadrature.size(), *data);
+
+  return data;
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+typename Mapping<dim,spacedim>::InternalDataBase *
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::get_subface_data
+(const UpdateFlags        update_flags,
+ const Quadrature<dim-1> &quadrature) const
+{
+  InternalData *data = new InternalData(*fe, fe_mask);
+  const Quadrature<dim> q (QProjector<dim>::project_to_all_subfaces(quadrature));
+  this->compute_face_data (update_flags, q,
+                           quadrature.size(), *data);
+
+  return data;
+}
+
+
+
+namespace internal
+{
+  namespace
+  {
+    /**
+     * Compute the locations of quadrature points on the object described by
+     * the first argument (and the cell for which the mapping support points
+     * have already been set), but only if the update_flags of the @p data
+     * argument indicate so.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_compute_q_points (const typename dealii::QProjector<dim>::DataSetDescriptor data_set,
+                            const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+                            const FiniteElement<dim, spacedim>   &fe,
+                            const ComponentMask                  &fe_mask,
+                            const std::vector<unsigned int>      &fe_to_real,
+                            std::vector<Point<spacedim> >        &quadrature_points)
+    {
+      const UpdateFlags update_flags = data.update_each;
+
+      if (update_flags & update_quadrature_points)
+        {
+          for (unsigned int point=0; point<quadrature_points.size(); ++point)
+            {
+              Point<spacedim> result;
+              const double *shape = &data.shape(point+data_set,0);
+
+              for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                {
+                  unsigned int comp_k = fe.system_to_component_index(k).first;
+                  if (fe_mask[comp_k])
+                    result[fe_to_real[comp_k]] += data.local_dof_values[k] * shape[k];
+                }
+
+              quadrature_points[point] = result;
+            }
+        }
+    }
+
+    /**
+     * Update the co- and contravariant matrices as well as their determinant,
+     * for the cell described stored in the data object, but only if the
+     * update_flags of the @p data argument indicate so.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_Jacobians (const CellSimilarity::Similarity    cell_similarity,
+                            const typename dealii::QProjector<dim>::DataSetDescriptor  data_set,
+                            const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+                            const FiniteElement<dim, spacedim> &fe,
+                            const ComponentMask                &fe_mask,
+                            const std::vector<unsigned int>    &fe_to_real)
+    {
+      const UpdateFlags update_flags = data.update_each;
+
+      // then Jacobians
+      if (update_flags & update_contravariant_transformation)
+        {
+
+          // if the current cell is just a translation of the previous one, no
+          // need to recompute jacobians...
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              const unsigned int n_q_points = data.contravariant.size();
+
+              Assert (data.n_shape_functions > 0, ExcInternalError());
+
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<1,dim> *data_derv =
+                    &data.derivative(point+data_set, 0);
+
+                  Tensor<1, dim> result[spacedim];
+
+                  for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                    {
+                      unsigned int comp_k = fe.system_to_component_index(k).first;
+                      if (fe_mask[comp_k])
+                        result[fe_to_real[comp_k]] += data.local_dof_values[k] * data_derv[k];
+                    }
+
+                  // write result into contravariant data
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    {
+                      data.contravariant[point][i] = result[i];
+                    }
+                }
+            }
+        }
+
+      if (update_flags & update_covariant_transformation)
+        {
+          AssertDimension(data.covariant.size(), data.contravariant.size());
+          if (cell_similarity != CellSimilarity::translation)
+            for (unsigned int point=0; point<data.contravariant.size(); ++point)
+              data.covariant[point] = (data.contravariant[point]).covariant_form();
+        }
+
+      if (update_flags & update_volume_elements)
+        {
+          AssertDimension(data.covariant.size(), data.volume_elements.size());
+          if (cell_similarity != CellSimilarity::translation)
+            for (unsigned int point=0; point<data.contravariant.size(); ++point)
+              data.volume_elements[point] = data.contravariant[point].determinant();
+        }
+    }
+
+    /**
+     * Update the Hessian of the transformation from unit to real cell, the
+     * Jacobian gradients.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_grads (const CellSimilarity::Similarity              cell_similarity,
+                                 const typename dealii::QProjector<dim>::DataSetDescriptor data_set,
+                                 const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+                                 const FiniteElement<dim, spacedim>           &fe,
+                                 const ComponentMask                          &fe_mask,
+                                 const std::vector<unsigned int>              &fe_to_real,
+                                 std::vector<DerivativeForm<2,dim,spacedim> > &jacobian_grads)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_grads)
+        {
+          const unsigned int n_q_points = jacobian_grads.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<2,dim> *second =
+                    &data.second_derivative(point+data_set, 0);
+
+                  DerivativeForm<2,dim,spacedim> result;
+
+                  for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                    {
+                      unsigned int comp_k = fe.system_to_component_index(k).first;
+                      if (fe_mask[comp_k])
+                        for (unsigned int j=0; j<dim; ++j)
+                          for (unsigned int l=0; l<dim; ++l)
+                            result[fe_to_real[comp_k]][j][l] += (second[k][j][l]
+                                                                 * data.local_dof_values[k]);
+                    }
+
+                  // never touch any data for j=dim in case dim<spacedim, so
+                  // it will always be zero as it was initialized
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        jacobian_grads[point][i][j][l] = result[i][j][l];
+                }
+            }
+        }
+    }
+
+    /**
+     * Update the Hessian of the transformation from unit to real cell, the
+     * Jacobian gradients, pushed forward to the real cell coordinates.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_pushed_forward_grads (
+      const CellSimilarity::Similarity              cell_similarity,
+      const typename dealii::QProjector<dim>::DataSetDescriptor data_set,
+      const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+      const FiniteElement<dim, spacedim>           &fe,
+      const ComponentMask                          &fe_mask,
+      const std::vector<unsigned int>              &fe_to_real,
+      std::vector<Tensor<3,spacedim> >             &jacobian_pushed_forward_grads )
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_pushed_forward_grads)
+        {
+          const unsigned int n_q_points = jacobian_pushed_forward_grads.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              double tmp[spacedim][spacedim][spacedim];
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<2,dim> *second =
+                    &data.second_derivative(point+data_set, 0);
+
+                  DerivativeForm<2,dim,spacedim> result;
+
+                  for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                    {
+                      unsigned int comp_k = fe.system_to_component_index(k).first;
+                      if (fe_mask[comp_k])
+                        for (unsigned int j=0; j<dim; ++j)
+                          for (unsigned int l=0; l<dim; ++l)
+                            result[fe_to_real[comp_k]][j][l] += (second[k][j][l]
+                                                                 * data.local_dof_values[k]);
+                    }
+
+                  // first push forward the j-components
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        {
+                          tmp[i][j][l] = result[i][0][l] *
+                                         data.covariant[point][j][0];
+                          for (unsigned int jr=1; jr<dim; ++jr)
+                            {
+                              tmp[i][j][l] += result[i][jr][l] *
+                                              data.covariant[point][j][jr];
+                            }
+                        }
+
+                  // now, pushing forward the l-components
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        {
+                          jacobian_pushed_forward_grads[point][i][j][l] = tmp[i][j][0] *
+                                                                          data.covariant[point][l][0];
+                          for (unsigned int lr=1; lr<dim; ++lr)
+                            {
+                              jacobian_pushed_forward_grads[point][i][j][l] += tmp[i][j][lr] *
+                                                                               data.covariant[point][l][lr];
+                            }
+
+                        }
+                }
+            }
+        }
+    }
+
+    /**
+     * Update the third derivative of the transformation from unit to real
+     * cell, the Jacobian hessians.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_2nd_derivatives (const CellSimilarity::Similarity              cell_similarity,
+                                           const typename dealii::QProjector<dim>::DataSetDescriptor data_set,
+                                           const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+                                           const FiniteElement<dim, spacedim>           &fe,
+                                           const ComponentMask                          &fe_mask,
+                                           const std::vector<unsigned int>              &fe_to_real,
+                                           std::vector<DerivativeForm<3,dim,spacedim> > &jacobian_2nd_derivatives)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_2nd_derivatives)
+        {
+          const unsigned int n_q_points = jacobian_2nd_derivatives.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<3,dim> *third =
+                    &data.third_derivative(point+data_set, 0);
+
+                  DerivativeForm<3,dim,spacedim> result;
+
+                  for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                    {
+                      unsigned int comp_k = fe.system_to_component_index(k).first;
+                      if (fe_mask[comp_k])
+                        for (unsigned int j=0; j<dim; ++j)
+                          for (unsigned int l=0; l<dim; ++l)
+                            for (unsigned int m=0; m<dim; ++m)
+                              result[fe_to_real[comp_k]][j][l][m] += (third[k][j][l][m]
+                                                                      * data.local_dof_values[k]);
+                    }
+
+                  // never touch any data for j=dim in case dim<spacedim, so
+                  // it will always be zero as it was initialized
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          jacobian_2nd_derivatives[point][i][j][l][m] = result[i][j][l][m];
+                }
+            }
+        }
+    }
+
+    /**
+     * Update the third derivative of the transformation from unit to real cell,
+     * the Jacobian hessians, pushed forward to the real cell coordinates.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_pushed_forward_2nd_derivatives (
+      const CellSimilarity::Similarity              cell_similarity,
+      const typename dealii::QProjector<dim>::DataSetDescriptor data_set,
+      const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+      const FiniteElement<dim, spacedim>           &fe,
+      const ComponentMask                          &fe_mask,
+      const std::vector<unsigned int>              &fe_to_real,
+      std::vector<Tensor<4,spacedim> >             &jacobian_pushed_forward_2nd_derivatives )
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_pushed_forward_2nd_derivatives)
+        {
+          const unsigned int n_q_points = jacobian_pushed_forward_2nd_derivatives.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              double tmp[spacedim][spacedim][spacedim][spacedim];
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<3,dim> *third =
+                    &data.third_derivative(point+data_set, 0);
+
+                  DerivativeForm<3,dim,spacedim> result;
+
+                  for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                    {
+                      unsigned int comp_k = fe.system_to_component_index(k).first;
+                      if (fe_mask[comp_k])
+                        for (unsigned int j=0; j<dim; ++j)
+                          for (unsigned int l=0; l<dim; ++l)
+                            for (unsigned int m=0; m<dim; ++m)
+                              result[fe_to_real[comp_k]][j][l][m] += (third[k][j][l][m]
+                                                                      * data.local_dof_values[k]);
+                    }
+
+                  // push forward the j-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          {
+                            jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              = result[i][0][l][m]*
+                                data.covariant[point][j][0];
+                            for (unsigned int jr=1; jr<dim; ++jr)
+                              jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              += result[i][jr][l][m]*
+                                 data.covariant[point][j][jr];
+                          }
+
+                  // push forward the l-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          {
+                            tmp[i][j][l][m]
+                              = jacobian_pushed_forward_2nd_derivatives[point][i][j][0][m]*
+                                data.covariant[point][l][0];
+                            for (unsigned int lr=1; lr<dim; ++lr)
+                              tmp[i][j][l][m]
+                              += jacobian_pushed_forward_2nd_derivatives[point][i][j][lr][m]*
+                                 data.covariant[point][l][lr];
+                          }
+
+                  // push forward the m-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        for (unsigned int m=0; m<spacedim; ++m)
+                          {
+                            jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              = tmp[i][j][l][0]*
+                                data.covariant[point][m][0];
+                            for (unsigned int mr=1; mr<dim; ++mr)
+                              jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              += tmp[i][j][l][mr]*
+                                 data.covariant[point][m][mr];
+                          }
+                }
+            }
+        }
+    }
+  }
+
+  /**
+   * Update the fourth derivative of the transformation from unit to real
+   * cell, the Jacobian hessian gradients.
+   *
+   * Skip the computation if possible as indicated by the first argument.
+   */
+  template <int dim, int spacedim>
+  void
+  maybe_update_jacobian_3rd_derivatives (const CellSimilarity::Similarity              cell_similarity,
+                                         const typename dealii::QProjector<dim>::DataSetDescriptor data_set,
+                                         const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+                                         const FiniteElement<dim, spacedim>           &fe,
+                                         const ComponentMask                          &fe_mask,
+                                         const std::vector<unsigned int>              &fe_to_real,
+                                         std::vector<DerivativeForm<4,dim,spacedim> > &jacobian_3rd_derivatives)
+  {
+    const UpdateFlags update_flags = data.update_each;
+    if (update_flags & update_jacobian_3rd_derivatives)
+      {
+        const unsigned int n_q_points = jacobian_3rd_derivatives.size();
+
+        if (cell_similarity != CellSimilarity::translation)
+          {
+            for (unsigned int point=0; point<n_q_points; ++point)
+              {
+                const Tensor<4,dim> *fourth =
+                  &data.fourth_derivative(point+data_set, 0);
+
+                DerivativeForm<4,dim,spacedim> result;
+
+                for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                  {
+                    unsigned int comp_k = fe.system_to_component_index(k).first;
+                    if (fe_mask[comp_k])
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          for (unsigned int m=0; m<dim; ++m)
+                            for (unsigned int n=0; n<dim; ++n)
+                              result[fe_to_real[comp_k]][j][l][m][n] += (fourth[k][j][l][m][n]
+                                                                         * data.local_dof_values[k]);
+                  }
+
+                // never touch any data for j,l,m,n=dim in case dim<spacedim, so
+                // it will always be zero as it was initialized
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<dim; ++j)
+                    for (unsigned int l=0; l<dim; ++l)
+                      for (unsigned int m=0; m<dim; ++m)
+                        for (unsigned int n=0; n<dim; ++n)
+                          jacobian_3rd_derivatives[point][i][j][l][m][n] = result[i][j][l][m][n];
+              }
+          }
+      }
+  }
+
+  /**
+   * Update the fourth derivative of the transformation from unit to real cell,
+   * the Jacobian hessian gradients, pushed forward to the real cell
+   * coordinates.
+   *
+   * Skip the computation if possible as indicated by the first argument.
+   */
+  template <int dim, int spacedim>
+  void
+  maybe_update_jacobian_pushed_forward_3rd_derivatives (
+    const CellSimilarity::Similarity              cell_similarity,
+    const typename dealii::QProjector<dim>::DataSetDescriptor data_set,
+    const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+    const FiniteElement<dim, spacedim>           &fe,
+    const ComponentMask                          &fe_mask,
+    const std::vector<unsigned int>              &fe_to_real,
+    std::vector<Tensor<5,spacedim> >             &jacobian_pushed_forward_3rd_derivatives )
+  {
+    const UpdateFlags update_flags = data.update_each;
+    if (update_flags & update_jacobian_pushed_forward_3rd_derivatives)
+      {
+        const unsigned int n_q_points = jacobian_pushed_forward_3rd_derivatives.size();
+
+        if (cell_similarity != CellSimilarity::translation)
+          {
+            double tmp[spacedim][spacedim][spacedim][spacedim][spacedim];
+            for (unsigned int point=0; point<n_q_points; ++point)
+              {
+                const Tensor<4,dim> *fourth =
+                  &data.fourth_derivative(point+data_set, 0);
+
+                DerivativeForm<4,dim,spacedim> result;
+
+                for (unsigned int k=0; k<data.n_shape_functions; ++k)
+                  {
+                    unsigned int comp_k = fe.system_to_component_index(k).first;
+                    if (fe_mask[comp_k])
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          for (unsigned int m=0; m<dim; ++m)
+                            for (unsigned int n=0; n<dim; ++n)
+                              result[fe_to_real[comp_k]][j][l][m][n]
+                              += (fourth[k][j][l][m][n]
+                                  * data.local_dof_values[k]);
+                  }
+
+                // push-forward the j-coordinate
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<spacedim; ++j)
+                    for (unsigned int l=0; l<dim; ++l)
+                      for (unsigned int m=0; m<dim; ++m)
+                        for (unsigned int n=0; n<dim; ++n)
+                          {
+                            tmp[i][j][l][m][n] = result[i][0][l][m][n] *
+                                                 data.covariant[point][j][0];
+                            for (unsigned int jr=1; jr<dim; ++jr)
+                              tmp[i][j][l][m][n] += result[i][jr][l][m][n] *
+                                                    data.covariant[point][j][jr];
+                          }
+
+                // push-forward the l-coordinate
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<spacedim; ++j)
+                    for (unsigned int l=0; l<spacedim; ++l)
+                      for (unsigned int m=0; m<dim; ++m)
+                        for (unsigned int n=0; n<dim; ++n)
+                          {
+                            jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                              = tmp[i][j][0][m][n] *
+                                data.covariant[point][l][0];
+                            for (unsigned int lr=1; lr<dim; ++lr)
+                              jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                              += tmp[i][j][lr][m][n] *
+                                 data.covariant[point][l][lr];
+                          }
+
+                // push-forward the m-coordinate
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<spacedim; ++j)
+                    for (unsigned int l=0; l<spacedim; ++l)
+                      for (unsigned int m=0; m<spacedim; ++m)
+                        for (unsigned int n=0; n<dim; ++n)
+                          {
+                            tmp[i][j][l][m][n]
+                              = jacobian_pushed_forward_3rd_derivatives[point][i][j][l][0][n] *
+                                data.covariant[point][m][0];
+                            for (unsigned int mr=1; mr<dim; ++mr)
+                              tmp[i][j][l][m][n]
+                              += jacobian_pushed_forward_3rd_derivatives[point][i][j][l][mr][n] *
+                                 data.covariant[point][m][mr];
+                          }
+
+                // push-forward the n-coordinate
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<spacedim; ++j)
+                    for (unsigned int l=0; l<spacedim; ++l)
+                      for (unsigned int m=0; m<spacedim; ++m)
+                        for (unsigned int n=0; n<spacedim; ++n)
+                          {
+                            jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                              = tmp[i][j][l][m][0] *
+                                data.covariant[point][n][0];
+                            for (unsigned int nr=1; nr<dim; ++nr)
+                              jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                              += tmp[i][j][l][m][nr] *
+                                 data.covariant[point][n][nr];
+                          }
+              }
+          }
+      }
+  }
+
+
+  /**
+   * Depending on what information is called for in the update flags of the
+   * @p data object, compute the various pieces of information that is
+   * required by the fill_fe_face_values() and fill_fe_subface_values()
+   * functions.  This function simply unifies the work that would be done by
+   * those two functions.
+   *
+   * The resulting data is put into the @p output_data argument.
+   */
+  template <int dim, int spacedim>
+  void
+  maybe_compute_face_data (const dealii::MappingFEField<dim,spacedim> &mapping,
+                           const typename dealii::Triangulation<dim,spacedim>::cell_iterator &cell,
+                           const unsigned int               face_no,
+                           const unsigned int               subface_no,
+                           const std::vector<double>        &weights,
+                           const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+                           internal::FEValues::MappingRelatedData<dim,spacedim>         &output_data)
+  {
+    const UpdateFlags update_flags = data.update_each;
+
+    if (update_flags & update_boundary_forms)
+      {
+        const unsigned int n_q_points = output_data.boundary_forms.size();
+        if (update_flags & update_normal_vectors)
+          AssertDimension (output_data.normal_vectors.size(), n_q_points);
+        if (update_flags & update_JxW_values)
+          AssertDimension (output_data.JxW_values.size(), n_q_points);
+
+        // map the unit tangentials to the real cell. checking for d!=dim-1
+        // eliminates compiler warnings regarding unsigned int expressions <
+        // 0.
+        for (unsigned int d=0; d!=dim-1; ++d)
+          {
+            Assert (face_no+GeometryInfo<dim>::faces_per_cell*d <
+                    data.unit_tangentials.size(),
+                    ExcInternalError());
+            Assert (data.aux[d].size() <=
+                    data.unit_tangentials[face_no+GeometryInfo<dim>::faces_per_cell*d].size(),
+                    ExcInternalError());
+
+            mapping.transform (make_array_view(data.unit_tangentials[face_no+GeometryInfo<dim>::faces_per_cell*d]),
+                               mapping_contravariant,
+                               data,
+                               make_array_view(data.aux[d]));
+          }
+
+        // if dim==spacedim, we can use the unit tangentials to compute the
+        // boundary form by simply taking the cross product
+        if (dim == spacedim)
+          {
+            for (unsigned int i=0; i<n_q_points; ++i)
+              switch (dim)
+                {
+                case 1:
+                  // in 1d, we don't have access to any of the data.aux
+                  // fields (because it has only dim-1 components), but we
+                  // can still compute the boundary form by simply looking
+                  // at the number of the face
+                  output_data.boundary_forms[i][0] = (face_no == 0 ?
+                                                      -1 : +1);
+                  break;
+                case 2:
+                  output_data.boundary_forms[i] = cross_product_2d(data.aux[0][i]);
+                  break;
+                case 3:
+                  output_data.boundary_forms[i] =
+                    cross_product_3d(data.aux[0][i], data.aux[1][i]);
+                  break;
+                default:
+                  Assert(false, ExcNotImplemented());
+                }
+          }
+        else //(dim < spacedim)
+          {
+            // in the codim-one case, the boundary form results from the
+            // cross product of all the face tangential vectors and the cell
+            // normal vector
+            //
+            // to compute the cell normal, use the same method used in
+            // fill_fe_values for cells above
+            AssertDimension (data.contravariant.size(), n_q_points);
+
+            for (unsigned int point=0; point<n_q_points; ++point)
+              {
+                if (dim==1)
+                  {
+                    // J is a tangent vector
+                    output_data.boundary_forms[point] = data.contravariant[point].transpose()[0];
+                    output_data.boundary_forms[point] /=
+                      (face_no == 0 ? -1. : +1.) * output_data.boundary_forms[point].norm();
+
+                  }
+
+                if (dim==2)
+                  {
+                    const DerivativeForm<1,spacedim,dim> DX_t =
+                      data.contravariant[point].transpose();
+
+                    Tensor<1, spacedim> cell_normal =
+                      cross_product_3d(DX_t[0], DX_t[1]);
+                    cell_normal /= cell_normal.norm();
+
+                    // then compute the face normal from the face tangent
+                    // and the cell normal:
+                    output_data.boundary_forms[point] =
+                      cross_product_3d(data.aux[0][point], cell_normal);
+                  }
+
+              }
+          }
+
+        if (update_flags & (update_normal_vectors | update_JxW_values))
+          for (unsigned int i=0; i<output_data.boundary_forms.size(); ++i)
+            {
+              if (update_flags & update_JxW_values)
+                {
+                  output_data.JxW_values[i] = output_data.boundary_forms[i].norm() * weights[i];
+
+                  if (subface_no != numbers::invalid_unsigned_int)
+                    {
+                      const double area_ratio=GeometryInfo<dim>::subface_ratio(
+                                                cell->subface_case(face_no), subface_no);
+                      output_data.JxW_values[i] *= area_ratio;
+                    }
+                }
+
+              if (update_flags & update_normal_vectors)
+                output_data.normal_vectors[i] = Point<spacedim>(output_data.boundary_forms[i] / output_data.boundary_forms[i].norm());
+            }
+
+        if (update_flags & update_jacobians)
+          for (unsigned int point=0; point<n_q_points; ++point)
+            output_data.jacobians[point] = data.contravariant[point];
+
+        if (update_flags & update_inverse_jacobians)
+          for (unsigned int point=0; point<n_q_points; ++point)
+            output_data.inverse_jacobians[point] = data.covariant[point].transpose();
+      }
+  }
+
+  /**
+   * Do the work of MappingFEField::fill_fe_face_values() and
+   * MappingFEField::fill_fe_subface_values() in a generic way, using the
+   * 'data_set' to differentiate whether we will work on a face (and if so,
+   * which one) or subface.
+   */
+  template<int dim, int spacedim>
+  void
+  do_fill_fe_face_values (const dealii::MappingFEField<dim,spacedim>                        &mapping,
+                          const typename dealii::Triangulation<dim,spacedim>::cell_iterator &cell,
+                          const unsigned int                                                 face_no,
+                          const unsigned int                                                 subface_no,
+                          const typename dealii::QProjector<dim>::DataSetDescriptor          data_set,
+                          const Quadrature<dim-1>                                           &quadrature,
+                          const typename dealii::MappingFEField<dim,spacedim>::InternalData &data,
+                          const FiniteElement<dim, spacedim>                                &fe,
+                          const ComponentMask                                               &fe_mask,
+                          const std::vector<unsigned int>                                   &fe_to_real,
+                          internal::FEValues::MappingRelatedData<dim,spacedim>              &output_data)
+  {
+    maybe_compute_q_points<dim,spacedim> (data_set,
+                                          data,
+                                          fe, fe_mask, fe_to_real,
+                                          output_data.quadrature_points);
+
+    maybe_update_Jacobians<dim,spacedim> (CellSimilarity::none,
+                                          data_set,
+                                          data,
+                                          fe, fe_mask, fe_to_real);
+
+    maybe_update_jacobian_grads<dim,spacedim> (CellSimilarity::none,
+                                               data_set,
+                                               data,
+                                               fe, fe_mask, fe_to_real,
+                                               output_data.jacobian_grads);
+
+    maybe_update_jacobian_pushed_forward_grads<dim,spacedim> (CellSimilarity::none,
+                                                              data_set,
+                                                              data,
+                                                              fe, fe_mask, fe_to_real,
+                                                              output_data.jacobian_pushed_forward_grads);
+
+    maybe_update_jacobian_2nd_derivatives<dim,spacedim> (CellSimilarity::none,
+                                                         data_set,
+                                                         data,
+                                                         fe, fe_mask, fe_to_real,
+                                                         output_data.jacobian_2nd_derivatives);
+
+    maybe_update_jacobian_pushed_forward_2nd_derivatives<dim,spacedim> (CellSimilarity::none,
+        data_set,
+        data,
+        fe, fe_mask, fe_to_real,
+        output_data.jacobian_pushed_forward_2nd_derivatives);
+
+    maybe_update_jacobian_3rd_derivatives<dim,spacedim> (CellSimilarity::none,
+                                                         data_set,
+                                                         data,
+                                                         fe, fe_mask, fe_to_real,
+                                                         output_data.jacobian_3rd_derivatives);
+
+    maybe_update_jacobian_pushed_forward_3rd_derivatives<dim,spacedim> (CellSimilarity::none,
+        data_set,
+        data,
+        fe, fe_mask, fe_to_real,
+        output_data.jacobian_pushed_forward_3rd_derivatives);
+
+    maybe_compute_face_data (mapping,
+                             cell, face_no, subface_no,
+                             quadrature.get_weights(), data,
+                             output_data);
+  }
+}
+
+
+// Note that the CellSimilarity flag is modifiable, since MappingFEField can need to
+// recalculate data even when cells are similar.
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+CellSimilarity::Similarity
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<dim>                                     &quadrature,
+                const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with an
+  // exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0, ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  const unsigned int n_q_points=quadrature.size();
+  const  CellSimilarity::Similarity updated_cell_similarity
+    = (get_degree() == 1
+       ?
+       cell_similarity
+       :
+       CellSimilarity::invalid_next_cell);
+
+  update_internal_dofs(cell, data);
+
+  internal::maybe_compute_q_points(QProjector<dim>::DataSetDescriptor::cell (),
+                                   data, *fe, fe_mask, fe_to_real,
+                                   output_data.quadrature_points);
+
+  internal::maybe_update_Jacobians(cell_similarity,
+                                   QProjector<dim>::DataSetDescriptor::cell (),
+                                   data, *fe, fe_mask, fe_to_real);
+
+  const UpdateFlags update_flags = data.update_each;
+  const std::vector<double> &weights=quadrature.get_weights();
+
+  // Multiply quadrature weights by absolute value of Jacobian determinants or
+  // the area element g=sqrt(DX^t DX) in case of codim > 0
+
+  if (update_flags & (update_normal_vectors | update_JxW_values))
+    {
+      AssertDimension (output_data.JxW_values.size(), n_q_points);
+
+      Assert( !(update_flags & update_normal_vectors ) ||
+              (output_data.normal_vectors.size() == n_q_points),
+              ExcDimensionMismatch(output_data.normal_vectors.size(), n_q_points));
+
+
+      if (cell_similarity != CellSimilarity::translation)
+        for (unsigned int point=0; point<n_q_points; ++point)
+          {
+            if (dim == spacedim)
+              {
+                const double det = data.contravariant[point].determinant();
+
+                // check for distorted cells.
+
+                // TODO: this allows for anisotropies of up to 1e6 in 3D and
+                // 1e12 in 2D. might want to find a finer
+                // (dimension-independent) criterion
+                Assert (det > 1e-12*Utilities::fixed_power<dim>(cell->diameter()/
+                                                                std::sqrt(double(dim))),
+                        (typename Mapping<dim,spacedim>::ExcDistortedMappedCell(cell->center(), det, point)));
+                output_data.JxW_values[point] = weights[point] * det;
+              }
+            // if dim==spacedim, then there is no cell normal to
+            // compute. since this is for FEValues (and not FEFaceValues),
+            // there are also no face normals to compute
+            else //codim>0 case
+              {
+                Tensor<1, spacedim> DX_t [dim];
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<dim; ++j)
+                    DX_t[j][i] = data.contravariant[point][i][j];
+
+                Tensor<2, dim> G; //First fundamental form
+                for (unsigned int i=0; i<dim; ++i)
+                  for (unsigned int j=0; j<dim; ++j)
+                    G[i][j] = DX_t[i] * DX_t[j];
+
+                output_data.JxW_values[point] = sqrt(determinant(G)) * weights[point];
+
+                if (cell_similarity == CellSimilarity::inverted_translation)
+                  {
+                    // we only need to flip the normal
+                    if (update_flags & update_normal_vectors)
+                      output_data.normal_vectors[point] *= -1.;
+                  }
+                else
+                  {
+                    if (update_flags & update_normal_vectors)
+                      {
+                        Assert (spacedim - dim == 1,
+                                ExcMessage("There is no cell normal in codim 2."));
+
+                        if (dim==1)
+                          output_data.normal_vectors[point] =
+                            cross_product_2d(-DX_t[0]);
+                        else //dim == 2
+                          output_data.normal_vectors[point] =
+                            cross_product_3d(DX_t[0], DX_t[1]);
+
+                        output_data.normal_vectors[point] /= output_data.normal_vectors[point].norm();
+
+                        if (cell->direction_flag() == false)
+                          output_data.normal_vectors[point] *= -1.;
+                      }
+
+                  }
+              } //codim>0 case
+          }
+    }
+
+  // copy values from InternalData to vector given by reference
+  if (update_flags & update_jacobians)
+    {
+      AssertDimension (output_data.jacobians.size(), n_q_points);
+      if (cell_similarity != CellSimilarity::translation)
+        for (unsigned int point=0; point<n_q_points; ++point)
+          output_data.jacobians[point] = data.contravariant[point];
+    }
+
+  // copy values from InternalData to vector given by reference
+  if (update_flags & update_inverse_jacobians)
+    {
+      AssertDimension (output_data.inverse_jacobians.size(), n_q_points);
+      if (cell_similarity != CellSimilarity::translation)
+        for (unsigned int point=0; point<n_q_points; ++point)
+          output_data.inverse_jacobians[point] = data.covariant[point].transpose();
+    }
+
+  // calculate derivatives of the Jacobians
+  internal::maybe_update_jacobian_grads(cell_similarity,
+                                        QProjector<dim>::DataSetDescriptor::cell(),
+                                        data, *fe, fe_mask, fe_to_real,
+                                        output_data.jacobian_grads);
+
+  // calculate derivatives of the Jacobians pushed forward to real cell coordinates
+  internal::maybe_update_jacobian_pushed_forward_grads(cell_similarity,
+                                                       QProjector<dim>::DataSetDescriptor::cell(),
+                                                       data, *fe, fe_mask, fe_to_real,
+                                                       output_data.jacobian_pushed_forward_grads);
+
+  // calculate hessians of the Jacobians
+  internal::maybe_update_jacobian_2nd_derivatives(cell_similarity,
+                                                  QProjector<dim>::DataSetDescriptor::cell(),
+                                                  data, *fe, fe_mask, fe_to_real,
+                                                  output_data.jacobian_2nd_derivatives);
+
+  // calculate hessians of the Jacobians pushed forward to real cell coordinates
+  internal::maybe_update_jacobian_pushed_forward_2nd_derivatives(cell_similarity,
+      QProjector<dim>::DataSetDescriptor::cell(),
+      data, *fe, fe_mask, fe_to_real,
+      output_data.jacobian_pushed_forward_2nd_derivatives);
+
+  // calculate gradients of the hessians of the Jacobians
+  internal::maybe_update_jacobian_3rd_derivatives(cell_similarity,
+                                                  QProjector<dim>::DataSetDescriptor::cell(),
+                                                  data, *fe, fe_mask, fe_to_real,
+                                                  output_data.jacobian_3rd_derivatives);
+
+  // calculate gradients of the hessians of the Jacobians pushed forward to real
+  // cell coordinates
+  internal::maybe_update_jacobian_pushed_forward_3rd_derivatives(cell_similarity,
+      QProjector<dim>::DataSetDescriptor::cell(),
+      data, *fe, fe_mask, fe_to_real,
+      output_data.jacobian_pushed_forward_3rd_derivatives);
+
+  return updated_cell_similarity;
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                     const unsigned int                                         face_no,
+                     const Quadrature<dim-1>                                   &quadrature,
+                     const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                     internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with an
+  // exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  update_internal_dofs(cell, data);
+
+  internal::do_fill_fe_face_values (*this,
+                                    cell, face_no, numbers::invalid_unsigned_int,
+                                    QProjector<dim>::DataSetDescriptor::
+                                    face (face_no,
+                                          cell->face_orientation(face_no),
+                                          cell->face_flip(face_no),
+                                          cell->face_rotation(face_no),
+                                          quadrature.size()),
+                                    quadrature,
+                                    data,
+                                    *fe, fe_mask, fe_to_real,
+                                    output_data);
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                        const unsigned int                                         face_no,
+                        const unsigned int                                         subface_no,
+                        const Quadrature<dim-1>                                   &quadrature,
+                        const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                        internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with an
+  // exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  update_internal_dofs(cell, data);
+
+  internal::do_fill_fe_face_values (*this,
+                                    cell, face_no, numbers::invalid_unsigned_int,
+                                    QProjector<dim>::DataSetDescriptor::
+                                    subface (face_no, subface_no,
+                                             cell->face_orientation(face_no),
+                                             cell->face_flip(face_no),
+                                             cell->face_rotation(face_no),
+                                             quadrature.size(),
+                                             cell->subface_case(face_no)),
+                                    quadrature,
+                                    data,
+                                    *fe, fe_mask, fe_to_real,
+                                    output_data);
+}
+
+
+namespace
+{
+  template<int dim, int spacedim, int rank, typename VectorType, typename DoFHandlerType>
+  void
+  transform_fields(const ArrayView<const Tensor<rank,dim> >                &input,
+                   const MappingType                                        mapping_type,
+                   const typename Mapping<dim,spacedim>::InternalDataBase  &mapping_data,
+                   const ArrayView<Tensor<rank,spacedim> >                 &output)
+  {
+    AssertDimension (input.size(), output.size());
+    Assert ((dynamic_cast<const typename MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData *>(&mapping_data) != 0),
+            ExcInternalError());
+    const typename MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData
+    &data = static_cast<const typename MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData &>(mapping_data);
+
+    switch (mapping_type)
+      {
+      case mapping_contravariant:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          output[i] = apply_transformation(data.contravariant[i], input[i]);
+
+        return;
+      }
+
+      case mapping_piola:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+        Assert (data.update_each & update_volume_elements,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+        Assert (rank==1, ExcMessage("Only for rank 1"));
+        for (unsigned int i=0; i<output.size(); ++i)
+          {
+            output[i] = apply_transformation(data.contravariant[i], input[i]);
+            output[i] /= data.volume_elements[i];
+          }
+        return;
+      }
+
+
+      //We still allow this operation as in the
+      //reference cell Derivatives are Tensor
+      //rather than DerivativeForm
+      case mapping_covariant:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          output[i] = apply_transformation(data.covariant[i], input[i]);
+
+        return;
+      }
+
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+  }
+
+
+  template<int dim, int spacedim, int rank, typename VectorType, typename DoFHandlerType>
+  void
+  transform_differential_forms
+  (const ArrayView<const DerivativeForm<rank, dim,spacedim> >  &input,
+   const MappingType                                            mapping_type,
+   const typename Mapping<dim,spacedim>::InternalDataBase      &mapping_data,
+   const ArrayView<Tensor<rank+1, spacedim> >                  &output)
+  {
+
+    AssertDimension (input.size(), output.size());
+    Assert ((dynamic_cast<const typename MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData *>(&mapping_data) != 0),
+            ExcInternalError());
+    const typename MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData
+    &data = static_cast<const typename MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::InternalData &>(mapping_data);
+
+    switch (mapping_type)
+      {
+      case mapping_covariant:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          output[i] = apply_transformation(data.covariant[i], input[i]);
+
+        return;
+      }
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+
+  }
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+transform (const ArrayView<const Tensor<1,dim> >                  &input,
+           const MappingType                                       mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<1,spacedim> >                   &output) const
+{
+  AssertDimension (input.size(), output.size());
+
+  transform_fields<dim,spacedim,1,VectorType,DoFHandlerType>(input, mapping_type, mapping_data, output);
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+transform (const ArrayView<const DerivativeForm<1, dim ,spacedim> > &input,
+           const MappingType                                         mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase   &mapping_data,
+           const ArrayView<Tensor<2,spacedim> >                     &output) const
+{
+  AssertDimension (input.size(), output.size());
+
+  transform_differential_forms<dim,spacedim,1,VectorType,DoFHandlerType>(input, mapping_type, mapping_data, output);
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+transform (const ArrayView<const Tensor<2, dim> >                 &input,
+           const MappingType                                       ,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<2,spacedim> >                   &output) const
+{
+  (void)input;
+  (void)output;
+  (void)mapping_data;
+  AssertDimension (input.size(), output.size());
+
+  AssertThrow(false, ExcNotImplemented());
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+transform (const ArrayView<const DerivativeForm<2, dim, spacedim> >  &input,
+           const MappingType                                          mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase    &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                      &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert (dynamic_cast<const InternalData *>(&mapping_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &>(mapping_data);
+
+  switch (mapping_type)
+    {
+    case mapping_covariant_gradient:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int q=0; q<output.size(); ++q)
+        for (unsigned int i=0; i<spacedim; ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            for (unsigned int k=0; k<spacedim; ++k)
+              {
+                output[q][i][j][k] = data.covariant[q][j][0]
+                                     * data.covariant[q][k][0]
+                                     * input[q][i][0][0];
+                for (unsigned int J=0; J<dim; ++J)
+                  {
+                    const unsigned int K0 = (0==J)? 1 : 0;
+                    for (unsigned int K=K0; K<dim; ++K)
+                      output[q][i][j][k] += data.covariant[q][j][J]
+                                            * data.covariant[q][k][K]
+                                            * input[q][i][J][K];
+                  }
+
+              }
+      return;
+    }
+
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+transform (const ArrayView<const Tensor<3,dim> >                  &input,
+           const MappingType                                     /*mapping_type*/,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                   &output) const
+{
+
+  (void)input;
+  (void)output;
+  (void)mapping_data;
+  AssertDimension (input.size(), output.size());
+
+  AssertThrow(false, ExcNotImplemented());
+
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+Point<spacedim>
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+transform_unit_to_real_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                             const Point<dim>                                          &p) const
+{
+//  Use the get_data function to create an InternalData with data vectors of
+//  the right size and transformation shape values already computed at point
+//  p.
+  const Quadrature<dim> point_quadrature(p);
+  std_cxx11::unique_ptr<InternalData> mdata (get_data(update_quadrature_points | update_jacobians,
+                                                      point_quadrature));
+
+  update_internal_dofs(cell, *mdata);
+
+  return do_transform_unit_to_real_cell(*mdata);
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+Point<spacedim>
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+do_transform_unit_to_real_cell (const InternalData &data) const
+{
+  Point<spacedim> p_real;
+
+  for (unsigned int i=0; i<data.n_shape_functions; ++i)
+    {
+      unsigned int comp_i = fe->system_to_component_index(i).first;
+      if (fe_mask[comp_i])
+        p_real[fe_to_real[comp_i]] += data.local_dof_values[i] * data.shape(0,i);
+    }
+
+  return p_real;
+}
+
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+Point<dim>
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                             const Point<spacedim>                                     &p) const
+{
+  // first a Newton iteration based on the real mapping. It uses the center
+  // point of the cell as a starting point
+  Point<dim> initial_p_unit;
+  try
+    {
+      initial_p_unit
+        = StaticMappingQ1<dim,spacedim>::mapping.transform_real_to_unit_cell(cell, p);
+    }
+  catch (const typename Mapping<dim,spacedim>::ExcTransformationFailed &)
+    {
+      // mirror the conditions of the code below to determine if we need to
+      // use an arbitrary starting point or if we just need to rethrow the
+      // exception
+      for (unsigned int d=0; d<dim; ++d)
+        initial_p_unit[d] = 0.5;
+    }
+
+  initial_p_unit = GeometryInfo<dim>::project_to_unit_cell(initial_p_unit);
+
+  // for (unsigned int d=0; d<dim; ++d)
+  //   initial_p_unit[d] = 0.;
+
+  const Quadrature<dim> point_quadrature(initial_p_unit);
+
+  UpdateFlags update_flags = update_quadrature_points | update_jacobians;
+  if (spacedim>dim)
+    update_flags |= update_jacobian_grads;
+  std_cxx11::unique_ptr<InternalData>
+  mdata (get_data(update_flags,point_quadrature));
+
+  update_internal_dofs(cell, *mdata);
+
+  return do_transform_real_to_unit_cell(cell, p, initial_p_unit, *mdata);
+
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+Point<dim>
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::
+do_transform_real_to_unit_cell
+(const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+ const Point<spacedim>                                     &p,
+ const Point<dim>                                          &initial_p_unit,
+ InternalData                                              &mdata) const
+{
+  const unsigned int n_shapes=mdata.shape_values.size();
+  (void)n_shapes;
+  Assert(n_shapes!=0, ExcInternalError());
+  AssertDimension (mdata.shape_derivatives.size(), n_shapes);
+
+
+  // Newton iteration to solve
+  // f(x)=p(x)-p=0
+  // x_{n+1}=x_n-[f'(x)]^{-1}f(x)
+  // The start value was set to be the
+  // linear approximation to the cell
+  // The shape values and derivatives
+  // of the mapping at this point are
+  // previously computed.
+  // f(x)
+  Point<dim> p_unit = initial_p_unit;
+  Point<dim> f;
+  compute_shapes_virtual(std::vector<Point<dim> > (1, p_unit), mdata);
+  Point<spacedim> p_real(do_transform_unit_to_real_cell(mdata));
+  Tensor<1,spacedim> p_minus_F = p - p_real;
+  const double eps = 1.e-12*cell->diameter();
+  const unsigned int newton_iteration_limit = 20;
+  unsigned int newton_iteration=0;
+  while (p_minus_F.norm_square() > eps*eps)
+    {
+      // f'(x)
+      Point<spacedim>  DF[dim];
+      Tensor<2,dim>  df;
+      for (unsigned int k=0; k<mdata.n_shape_functions; ++k)
+        {
+          const Tensor<1,dim> &grad_k = mdata.derivative(0,k);
+          unsigned int comp_k = fe->system_to_component_index(k).first;
+          if (fe_mask[comp_k])
+            for (unsigned int j=0; j<dim; ++j)
+              DF[j][fe_to_real[comp_k]] += mdata.local_dof_values[k] * grad_k[j];
+        }
+      for (unsigned int j=0; j<dim; ++j)
+        {
+          f[j] = DF[j] * p_minus_F;
+          for (unsigned int l=0; l<dim; ++l)
+            df[j][l] = -DF[j] * DF[l];
+        }
+      // Solve  [f'(x)]d=f(x)
+      const Tensor<1, dim> delta =
+        invert(df) * static_cast<const Tensor<1, dim> &>(f);
+      // do a line search
+      double step_length = 1;
+      do
+        {
+          // update of p_unit. The
+          // spacedimth component of
+          // transformed point is simply
+          // ignored in codimension one
+          // case. When this component is
+          // not zero, then we are
+          // projecting the point to the
+          // surface or curve identified
+          // by the cell.
+          Point<dim> p_unit_trial = p_unit;
+          for (unsigned int i=0; i<dim; ++i)
+            p_unit_trial[i] -= step_length * delta[i];
+          // shape values and derivatives
+          // at new p_unit point
+          compute_shapes_virtual(std::vector<Point<dim> > (1, p_unit_trial), mdata);
+          // f(x)
+          Point<spacedim> p_real_trial = do_transform_unit_to_real_cell(mdata);
+          const Tensor<1,spacedim> f_trial = p - p_real_trial;
+          // see if we are making progress with the current step length
+          // and if not, reduce it by a factor of two and try again
+          if (f_trial.norm() < p_minus_F.norm())
+            {
+              p_real = p_real_trial;
+              p_unit = p_unit_trial;
+              p_minus_F = f_trial;
+              break;
+            }
+          else if (step_length > 0.05)
+            step_length /= 2;
+          else
+            goto failure;
+        }
+      while (true);
+      ++newton_iteration;
+      if (newton_iteration > newton_iteration_limit)
+        goto failure;
+    }
+  return p_unit;
+  // if we get to the following label, then we have either run out
+  // of Newton iterations, or the line search has not converged.
+  // in either case, we need to give up, so throw an exception that
+  // can then be caught
+failure:
+  AssertThrow (false, (typename Mapping<dim,spacedim>::ExcTransformationFailed()));
+  // ...the compiler wants us to return something, though we can
+  // of course never get here...
+  return Point<dim>();
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+unsigned int
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::get_degree() const
+{
+  return fe->degree;
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+ComponentMask
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::get_component_mask() const
+{
+  return this->fe_mask;
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+Mapping<dim,spacedim> *
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::clone () const
+{
+  return new MappingFEField<dim,spacedim,VectorType,DoFHandlerType>(*this);
+}
+
+
+template<int dim, int spacedim, typename VectorType, typename DoFHandlerType>
+void
+MappingFEField<dim,spacedim,VectorType,DoFHandlerType>::update_internal_dofs
+(const typename Triangulation<dim,spacedim>::cell_iterator  &cell,
+ const typename MappingFEField<dim, spacedim>::InternalData &data) const
+{
+  Assert(euler_dof_handler != 0, ExcMessage("euler_dof_handler is empty"));
+
+  typename DoFHandlerType::cell_iterator dof_cell(*cell, euler_dof_handler);
+  Assert (dof_cell->active() == true, ExcInactiveCell());
+
+  dof_cell->get_dof_indices(data.local_dof_indices);
+
+  for (unsigned int i=0; i<data.local_dof_values.size(); ++i)
+    data.local_dof_values[i] = (*euler_vector)(data.local_dof_indices[i]);
+}
+
+// explicit instantiations
+#include "mapping_fe_field.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_fe_field.inst.in b/source/fe/mapping_fe_field.inst.in
new file mode 100644
index 0000000..ce65316
--- /dev/null
+++ b/source/fe/mapping_fe_field.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class MappingFEField<deal_II_dimension, deal_II_space_dimension,  dealii::Vector<double>, dealii::DoFHandler<deal_II_dimension, deal_II_space_dimension> >;
+#endif
+  }
+
diff --git a/source/fe/mapping_q.cc b/source/fe/mapping_q.cc
new file mode 100644
index 0000000..345b839
--- /dev/null
+++ b/source/fe/mapping_q.cc
@@ -0,0 +1,516 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/polynomial.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/mapping_q.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<int dim, int spacedim>
+MappingQ<dim,spacedim>::InternalData::InternalData ()
+  :
+  use_mapping_q1_on_current_cell(false)
+{}
+
+
+
+template<int dim, int spacedim>
+std::size_t
+MappingQ<dim,spacedim>::InternalData::memory_consumption () const
+{
+  return (Mapping<dim,spacedim>::InternalDataBase::memory_consumption () +
+          MemoryConsumption::memory_consumption (use_mapping_q1_on_current_cell) +
+          MemoryConsumption::memory_consumption (mapping_q1_data) +
+          MemoryConsumption::memory_consumption (mapping_qp_data));
+}
+
+
+
+template<int dim, int spacedim>
+MappingQ<dim,spacedim>::MappingQ (const unsigned int degree,
+                                  const bool use_mapping_q_on_all_cells)
+  :
+  polynomial_degree (degree),
+
+  // see whether we want to use *this* mapping objects on *all* cells,
+  // or defer to an explicit Q1 mapping on interior cells. if
+  // degree==1, then we are already that Q1 mapping, so we don't need
+  // it; if dim!=spacedim, there is also no need for anything because
+  // we're most likely on a curved manifold
+  use_mapping_q_on_all_cells (degree==1
+                              ||
+                              use_mapping_q_on_all_cells
+                              ||
+                              (dim != spacedim)),
+  // create a Q1 mapping for use on interior cells (if necessary)
+  // or to create a good initial guess in transform_real_to_unit_cell()
+  q1_mapping (new MappingQGeneric<dim,spacedim>(1)),
+
+  // create a Q_p mapping; if p=1, simply share the Q_1 mapping already
+  // created via the shared_ptr objects
+  qp_mapping (this->polynomial_degree>1
+              ?
+              std_cxx11::shared_ptr<const MappingQGeneric<dim,spacedim> >(new MappingQGeneric<dim,spacedim>(degree))
+              :
+              q1_mapping)
+{}
+
+
+
+template<int dim, int spacedim>
+MappingQ<dim,spacedim>::MappingQ (const MappingQ<dim,spacedim> &mapping)
+  :
+  polynomial_degree (mapping.polynomial_degree),
+  use_mapping_q_on_all_cells (mapping.use_mapping_q_on_all_cells),
+  // clone the Q1 mapping for use on interior cells (if necessary)
+  // or to create a good initial guess in transform_real_to_unit_cell()
+  q1_mapping (dynamic_cast<MappingQGeneric<dim,spacedim>*>(mapping.q1_mapping->clone())),
+  // create a Q_p mapping; if p=1, simply share the Q_1 mapping already
+  // created via the shared_ptr objects
+  qp_mapping (this->polynomial_degree>1
+              ?
+              std_cxx11::shared_ptr<const MappingQGeneric<dim,spacedim> >(dynamic_cast<MappingQGeneric<dim,spacedim>*>(mapping.qp_mapping->clone()))
+              :
+              q1_mapping)
+{}
+
+
+
+template<int dim, int spacedim>
+unsigned int
+MappingQ<dim,spacedim>::get_degree() const
+{
+  return polynomial_degree;
+}
+
+
+
+template <int dim, int spacedim>
+inline
+bool
+MappingQ<dim,spacedim>::preserves_vertex_locations () const
+{
+  return true;
+}
+
+
+
+template<int dim, int spacedim>
+UpdateFlags
+MappingQ<dim,spacedim>::requires_update_flags (const UpdateFlags in) const
+{
+  return (q1_mapping->requires_update_flags(in)
+          |
+          qp_mapping->requires_update_flags(in));
+}
+
+
+
+template<int dim, int spacedim>
+typename MappingQ<dim,spacedim>::InternalData *
+MappingQ<dim,spacedim>::get_data (const UpdateFlags update_flags,
+                                  const Quadrature<dim> &quadrature) const
+{
+  InternalData *data = new InternalData;
+
+  // build the Q1 and Qp internal data objects in parallel
+  Threads::Task<typename MappingQGeneric<dim,spacedim>::InternalData *>
+  do_get_data = Threads::new_task (&MappingQGeneric<dim,spacedim>::get_data,
+                                   *qp_mapping,
+                                   update_flags,
+                                   quadrature);
+
+  if (!use_mapping_q_on_all_cells)
+    data->mapping_q1_data.reset (q1_mapping->get_data (update_flags, quadrature));
+
+  // wait for the task above to finish and use returned value
+  data->mapping_qp_data.reset (do_get_data.return_value());
+  return data;
+}
+
+
+
+template<int dim, int spacedim>
+typename MappingQ<dim,spacedim>::InternalData *
+MappingQ<dim,spacedim>::get_face_data (const UpdateFlags update_flags,
+                                       const Quadrature<dim-1>& quadrature) const
+{
+  InternalData *data = new InternalData;
+
+  // build the Q1 and Qp internal data objects in parallel
+  Threads::Task<typename MappingQGeneric<dim,spacedim>::InternalData *>
+  do_get_data = Threads::new_task (&MappingQGeneric<dim,spacedim>::get_face_data,
+                                   *qp_mapping,
+                                   update_flags,
+                                   quadrature);
+
+  if (!use_mapping_q_on_all_cells)
+    data->mapping_q1_data.reset (q1_mapping->get_face_data (update_flags, quadrature));
+
+  // wait for the task above to finish and use returned value
+  data->mapping_qp_data.reset (do_get_data.return_value());
+  return data;
+}
+
+
+
+template<int dim, int spacedim>
+typename MappingQ<dim,spacedim>::InternalData *
+MappingQ<dim,spacedim>::get_subface_data (const UpdateFlags update_flags,
+                                          const Quadrature<dim-1>& quadrature) const
+{
+  InternalData *data = new InternalData;
+
+  // build the Q1 and Qp internal data objects in parallel
+  Threads::Task<typename MappingQGeneric<dim,spacedim>::InternalData *>
+  do_get_data = Threads::new_task (&MappingQGeneric<dim,spacedim>::get_subface_data,
+                                   *qp_mapping,
+                                   update_flags,
+                                   quadrature);
+
+  if (!use_mapping_q_on_all_cells)
+    data->mapping_q1_data.reset (q1_mapping->get_subface_data (update_flags, quadrature));
+
+  // wait for the task above to finish and use returned value
+  data->mapping_qp_data.reset (do_get_data.return_value());
+  return data;
+}
+
+
+// Note that the CellSimilarity flag is modifiable, since MappingQ can need to
+// recalculate data even when cells are similar.
+template<int dim, int spacedim>
+CellSimilarity::Similarity
+MappingQ<dim,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<dim>                                     &quadrature,
+                const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with an
+  // exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0, ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  // check whether this cell needs the full mapping or can be treated by a
+  // reduced Q1 mapping, e.g. if the cell is in the interior of the domain
+  data.use_mapping_q1_on_current_cell = !(use_mapping_q_on_all_cells
+                                          || cell->has_boundary_lines());
+
+
+  // call the base class. we need to ensure that the flag indicating whether
+  // we can use some similarity has to be modified - for a general MappingQ,
+  // the data needs to be recomputed anyway since then the mapping changes the
+  // data. this needs to be known also for later operations, so modify the
+  // variable here. this also affects the calculation of the next cell -- if
+  // we use Q1 data on the next cell, the data will still be invalid.
+  const CellSimilarity::Similarity updated_cell_similarity
+    = ((data.use_mapping_q1_on_current_cell == false)
+       &&
+       (this->polynomial_degree > 1)
+       ?
+       CellSimilarity::invalid_next_cell
+       :
+       cell_similarity);
+
+  // depending on the results above, decide whether the Q1 mapping or
+  // the Qp mapping needs to handle this cell
+  if (data.use_mapping_q1_on_current_cell)
+    q1_mapping->fill_fe_values (cell,
+                                updated_cell_similarity,
+                                quadrature,
+                                *data.mapping_q1_data,
+                                output_data);
+  else
+    qp_mapping->fill_fe_values(cell,
+                               updated_cell_similarity,
+                               quadrature,
+                               *data.mapping_qp_data,
+                               output_data);
+
+  return updated_cell_similarity;
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQ<dim,spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                     const unsigned int                                         face_no,
+                     const Quadrature<dim-1>                                   &quadrature,
+                     const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                     internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with an
+  // exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  // check whether this cell needs the full mapping or can be treated by a
+  // reduced Q1 mapping, e.g. if the cell is entirely in the interior of the
+  // domain. note that it is not sufficient to ask whether the present _face_
+  // is in the interior, as the mapping on the face depends on the mapping of
+  // the cell, which in turn depends on the fact whether _any_ of the faces of
+  // this cell is at the boundary, not only the present face
+  data.use_mapping_q1_on_current_cell = !(use_mapping_q_on_all_cells
+                                          || cell->has_boundary_lines());
+
+  // depending on the results above, decide whether the Q1 mapping or
+  // the Qp mapping needs to handle this cell
+  if (data.use_mapping_q1_on_current_cell)
+    q1_mapping->fill_fe_face_values (cell,
+                                     face_no,
+                                     quadrature,
+                                     *data.mapping_q1_data,
+                                     output_data);
+  else
+    qp_mapping->fill_fe_face_values(cell,
+                                    face_no,
+                                    quadrature,
+                                    *data.mapping_qp_data,
+                                    output_data);
+}
+
+
+template<int dim, int spacedim>
+void
+MappingQ<dim,spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                        const unsigned int                                         face_no,
+                        const unsigned int                                         subface_no,
+                        const Quadrature<dim-1>                                   &quadrature,
+                        const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                        internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // convert data object to internal data for this class. fails with an
+  // exception if that is not possible
+  Assert (dynamic_cast<const InternalData *> (&internal_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &> (internal_data);
+
+  // check whether this cell needs the full mapping or can be treated by a
+  // reduced Q1 mapping, e.g. if the cell is entirely in the interior of the
+  // domain. note that it is not sufficient to ask whether the present _face_
+  // is in the interior, as the mapping on the face depends on the mapping of
+  // the cell, which in turn depends on the fact whether _any_ of the faces of
+  // this cell is at the boundary, not only the present face
+  data.use_mapping_q1_on_current_cell = !(use_mapping_q_on_all_cells
+                                          || cell->has_boundary_lines());
+
+  // depending on the results above, decide whether the Q1 mapping or
+  // the Qp mapping needs to handle this cell
+  if (data.use_mapping_q1_on_current_cell)
+    q1_mapping->fill_fe_subface_values (cell,
+                                        face_no,
+                                        subface_no,
+                                        quadrature,
+                                        *data.mapping_q1_data,
+                                        output_data);
+  else
+    qp_mapping->fill_fe_subface_values(cell,
+                                       face_no,
+                                       subface_no,
+                                       quadrature,
+                                       *data.mapping_qp_data,
+                                       output_data);
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQ<dim,spacedim>::
+transform (const ArrayView<const Tensor<1,dim> >                  &input,
+           const MappingType                                       mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<1,spacedim> >                   &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert ((dynamic_cast<const typename MappingQ<dim,spacedim>::InternalData *> (&mapping_data)
+           != 0),
+          ExcInternalError());
+  const InternalData *data = dynamic_cast<const InternalData *>(&mapping_data);
+
+  // check whether we should in fact work on the Q1 portion of it
+  if (data->use_mapping_q1_on_current_cell)
+    q1_mapping->transform (input, mapping_type, *data->mapping_q1_data, output);
+  else
+    // otherwise use the full mapping
+    qp_mapping->transform(input, mapping_type, *data->mapping_qp_data, output);
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQ<dim,spacedim>::
+transform (const ArrayView<const DerivativeForm<1, dim ,spacedim> >  &input,
+           const MappingType                                          mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase    &mapping_data,
+           const ArrayView<Tensor<2,spacedim> >                      &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert ((dynamic_cast<const typename MappingQ<dim,spacedim>::InternalData *> (&mapping_data)
+           != 0),
+          ExcInternalError());
+  const InternalData *data = dynamic_cast<const InternalData *>(&mapping_data);
+
+  // check whether we should in fact work on the Q1 portion of it
+  if (data->use_mapping_q1_on_current_cell)
+    q1_mapping->transform (input, mapping_type, *data->mapping_q1_data, output);
+  else
+    // otherwise use the full mapping
+    qp_mapping->transform(input, mapping_type, *data->mapping_qp_data, output);
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQ<dim,spacedim>::
+transform (const ArrayView<const Tensor<2, dim> >                 &input,
+           const MappingType                                       mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<2,spacedim> >                   &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert ((dynamic_cast<const typename MappingQ<dim,spacedim>::InternalData *> (&mapping_data)
+           != 0),
+          ExcInternalError());
+  const InternalData *data = dynamic_cast<const InternalData *>(&mapping_data);
+
+  // check whether we should in fact work on the Q1 portion of it
+  if (data->use_mapping_q1_on_current_cell)
+    q1_mapping->transform (input, mapping_type, *data->mapping_q1_data, output);
+  else
+    // otherwise use the full mapping
+    qp_mapping->transform(input, mapping_type, *data->mapping_qp_data, output);
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQ<dim,spacedim>::
+transform (const ArrayView<const DerivativeForm<2, dim ,spacedim> >  &input,
+           const MappingType                                          mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase    &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                      &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert ((dynamic_cast<const typename MappingQ<dim,spacedim>::InternalData *> (&mapping_data)
+           != 0),
+          ExcInternalError());
+  const InternalData *data = dynamic_cast<const InternalData *>(&mapping_data);
+
+  // check whether we should in fact work on the Q1 portion of it
+  if (data->use_mapping_q1_on_current_cell)
+    q1_mapping->transform (input, mapping_type, *data->mapping_q1_data, output);
+  else
+    // otherwise use the full mapping
+    qp_mapping->transform(input, mapping_type, *data->mapping_qp_data, output);
+}
+
+
+
+template<int dim, int spacedim>
+void MappingQ<dim,spacedim>::
+transform (const ArrayView<const Tensor<3, dim> >                 &input,
+           const MappingType                                       mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                   &output) const
+{
+  AssertDimension (input.size(), output.size());
+  Assert ((dynamic_cast<const typename MappingQ<dim,spacedim>::InternalData *> (&mapping_data)
+           != 0),
+          ExcInternalError());
+  const InternalData *data = dynamic_cast<const InternalData *>(&mapping_data);
+
+  // check whether we should in fact work on the Q1 portion of it
+  if (data->use_mapping_q1_on_current_cell)
+    q1_mapping->transform (input, mapping_type, *data->mapping_q1_data, output);
+  else
+    // otherwise use the full mapping
+    qp_mapping->transform(input, mapping_type, *data->mapping_qp_data, output);
+}
+
+
+
+template<int dim, int spacedim>
+Point<spacedim>
+MappingQ<dim,spacedim>::
+transform_unit_to_real_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                             const Point<dim>                                 &p) const
+{
+  // first see, whether we want to use a linear or a higher order
+  // mapping, then either use our own facilities or that of the Q1
+  // mapping we store
+  if (use_mapping_q_on_all_cells || cell->has_boundary_lines())
+    return qp_mapping->transform_unit_to_real_cell (cell, p);
+  else
+    return q1_mapping->transform_unit_to_real_cell (cell, p);
+}
+
+
+
+template<int dim, int spacedim>
+Point<dim>
+MappingQ<dim,spacedim>::
+transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                             const Point<spacedim>                            &p) const
+{
+  if (cell->has_boundary_lines()
+      ||
+      use_mapping_q_on_all_cells
+      ||
+      (dim!=spacedim) )
+    return qp_mapping->transform_real_to_unit_cell(cell, p);
+  else
+    return q1_mapping->transform_real_to_unit_cell(cell, p);
+}
+
+
+
+template<int dim, int spacedim>
+Mapping<dim,spacedim> *
+MappingQ<dim,spacedim>::clone () const
+{
+  return new MappingQ<dim,spacedim>(this->polynomial_degree);
+}
+
+
+
+// explicit instantiations
+#include "mapping_q.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_q.inst.in b/source/fe/mapping_q.inst.in
new file mode 100644
index 0000000..eda826c
--- /dev/null
+++ b/source/fe/mapping_q.inst.in
@@ -0,0 +1,25 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class MappingQ<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/fe/mapping_q1.cc b/source/fe/mapping_q1.cc
new file mode 100644
index 0000000..fdbda6f
--- /dev/null
+++ b/source/fe/mapping_q1.cc
@@ -0,0 +1,71 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/std_cxx11/array.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+
+#include <cmath>
+#include <algorithm>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+
+template<int dim, int spacedim>
+MappingQ1<dim,spacedim>::MappingQ1 ()
+  :
+  MappingQGeneric<dim,spacedim> (1)
+{}
+
+
+
+template<int dim, int spacedim>
+MappingQ1<dim,spacedim> *
+MappingQ1<dim,spacedim>::clone () const
+{
+  return new MappingQ1<dim,spacedim>(*this);
+}
+
+//---------------------------------------------------------------------------
+
+
+template<int dim, int spacedim>
+MappingQGeneric<dim,spacedim>
+StaticMappingQ1<dim,spacedim>::mapping = MappingQGeneric<dim,spacedim>(1);
+
+
+
+//--------------------------- Explicit instantiations -----------------------
+#include "mapping_q1.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_q1.inst.in b/source/fe/mapping_q1.inst.in
new file mode 100644
index 0000000..fc5ab8b
--- /dev/null
+++ b/source/fe/mapping_q1.inst.in
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class MappingQ1<deal_II_dimension, deal_II_space_dimension>;
+    template struct StaticMappingQ1<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/fe/mapping_q1_eulerian.cc b/source/fe/mapping_q1_eulerian.cc
new file mode 100644
index 0000000..35919e3
--- /dev/null
+++ b/source/fe/mapping_q1_eulerian.cc
@@ -0,0 +1,149 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/std_cxx11/array.h>
+#include <deal.II/fe/mapping_q1_eulerian.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_parallel_block_vector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, class EulerVectorType, int spacedim>
+MappingQ1Eulerian<dim, EulerVectorType, spacedim>::
+MappingQ1Eulerian (const EulerVectorType  &euler_transform_vectors,
+                   const DoFHandler<dim,spacedim> &shiftmap_dof_handler)
+  :
+  MappingQGeneric<dim,spacedim>(1),
+  euler_transform_vectors(&euler_transform_vectors),
+  shiftmap_dof_handler(&shiftmap_dof_handler)
+{}
+
+
+
+template <int dim, class EulerVectorType, int spacedim>
+std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+MappingQ1Eulerian<dim, EulerVectorType, spacedim>::
+get_vertices
+(const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell> vertices;
+  // The assertions can not be in the constructor, since this would
+  // require to call dof_handler.distribute_dofs(fe) *before* the mapping
+  // object is constructed, which is not necessarily what we want.
+
+  //TODO: Only one of these two assertions should be relevant
+  AssertDimension (spacedim, shiftmap_dof_handler->get_fe().n_dofs_per_vertex());
+  AssertDimension (shiftmap_dof_handler->get_fe().n_components(), spacedim);
+
+  AssertDimension (shiftmap_dof_handler->n_dofs(), euler_transform_vectors->size());
+
+  // cast the Triangulation<dim>::cell_iterator into a
+  // DoFHandler<dim>::cell_iterator which is necessary for access to
+  // DoFCellAccessor::get_dof_values()
+  typename DoFHandler<dim,spacedim>::cell_iterator dof_cell (*cell, shiftmap_dof_handler);
+
+  // We require the cell to be active since we can only then get nodal
+  // values for the shifts
+  Assert (dof_cell->active() == true, ExcInactiveCell());
+
+  // now get the values of the shift vectors at the vertices
+  Vector<double> mapping_values (shiftmap_dof_handler->get_fe().dofs_per_cell);
+  dof_cell->get_dof_values (*euler_transform_vectors, mapping_values);
+
+  for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+    {
+      Point<spacedim> shift_vector;
+
+      // pick out the value of the shift vector at the present
+      // vertex. since vertex dofs are always numbered first, we can
+      // access them easily
+      for (unsigned int j=0; j<spacedim; ++j)
+        shift_vector[j] = mapping_values(i*spacedim+j);
+
+      // compute new support point by old (reference) value and added
+      // shift
+      vertices[i] = cell->vertex(i) + shift_vector;
+    }
+  return vertices;
+}
+
+
+
+template<int dim, class EulerVectorType, int spacedim>
+std::vector<Point<spacedim> >
+MappingQ1Eulerian<dim,EulerVectorType,spacedim>::
+compute_mapping_support_points(const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  const std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+  vertices = this->get_vertices(cell);
+
+  std::vector<Point<spacedim> > a(GeometryInfo<dim>::vertices_per_cell);
+  for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+    a[i] = vertices[i];
+
+  return a;
+}
+
+
+
+
+
+template <int dim, class EulerVectorType, int spacedim>
+MappingQ1Eulerian<dim,EulerVectorType,spacedim> *
+MappingQ1Eulerian<dim, EulerVectorType, spacedim>::clone () const
+{
+  return new MappingQ1Eulerian<dim,EulerVectorType,spacedim>(*this);
+}
+
+
+
+template<int dim, class EulerVectorType, int spacedim>
+CellSimilarity::Similarity
+MappingQ1Eulerian<dim,EulerVectorType,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                const CellSimilarity::Similarity                           ,
+                const Quadrature<dim>                                     &quadrature,
+                const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // call the function of the base class, but ignoring
+  // any potentially detected cell similarity between
+  // the current and the previous cell
+  MappingQGeneric<dim,spacedim>::fill_fe_values (cell,
+                                                 CellSimilarity::invalid_next_cell,
+                                                 quadrature,
+                                                 internal_data,
+                                                 output_data);
+  // also return the updated flag since any detected
+  // similarity wasn't based on the mapped field, but
+  // the original vertices which are meaningless
+  return CellSimilarity::invalid_next_cell;
+}
+
+
+
+// explicit instantiations
+#include "mapping_q1_eulerian.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_q1_eulerian.inst.in b/source/fe/mapping_q1_eulerian.inst.in
new file mode 100644
index 0000000..32d1028
--- /dev/null
+++ b/source/fe/mapping_q1_eulerian.inst.in
@@ -0,0 +1,40 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class MappingQ1Eulerian<deal_II_dimension, Vector<double>,	deal_II_space_dimension>;
+    template class MappingQ1Eulerian<deal_II_dimension, Vector<float>,	deal_II_space_dimension>;
+#  ifdef DEAL_II_WITH_PETSC
+    template class MappingQ1Eulerian<deal_II_dimension,
+			 	    PETScWrappers::Vector, deal_II_space_dimension>;
+    template class MappingQ1Eulerian<deal_II_dimension,
+				    PETScWrappers::MPI::Vector, deal_II_space_dimension>;
+#  endif   
+#  ifdef DEAL_II_WITH_TRILINOS
+    template class MappingQ1Eulerian<deal_II_dimension,
+                                    TrilinosWrappers::Vector, deal_II_space_dimension>;
+    template class MappingQ1Eulerian<deal_II_dimension,
+                                    TrilinosWrappers::MPI::Vector, deal_II_space_dimension>;
+    template class MappingQ1Eulerian<deal_II_dimension,
+                                    TrilinosWrappers::BlockVector, deal_II_space_dimension>;
+    template class MappingQ1Eulerian<deal_II_dimension,
+                                    TrilinosWrappers::MPI::BlockVector, deal_II_space_dimension>;
+#  endif
+#endif
+  }
diff --git a/source/fe/mapping_q_eulerian.cc b/source/fe/mapping_q_eulerian.cc
new file mode 100644
index 0000000..672328a
--- /dev/null
+++ b/source/fe/mapping_q_eulerian.cc
@@ -0,0 +1,265 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_parallel_block_vector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/mapping_q_eulerian.h>
+#include <deal.II/fe/mapping_q1_eulerian.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+// .... MAPPING Q EULERIAN CONSTRUCTOR
+
+template <int dim, class EulerVectorType, int spacedim>
+MappingQEulerian<dim, EulerVectorType, spacedim>::MappingQEulerianGeneric::
+MappingQEulerianGeneric (const unsigned int degree,
+                         const MappingQEulerian<dim, EulerVectorType, spacedim> &mapping_q_eulerian)
+  :
+  MappingQGeneric<dim,spacedim>(degree),
+  mapping_q_eulerian (mapping_q_eulerian),
+  support_quadrature(degree),
+  fe_values(mapping_q_eulerian.euler_dof_handler->get_fe(),
+            support_quadrature,
+            update_values | update_q_points)
+{}
+
+template <int dim, class EulerVectorType, int spacedim>
+MappingQEulerian<dim, EulerVectorType, spacedim>::
+MappingQEulerian (const unsigned int degree,
+                  const EulerVectorType &euler_vector,
+                  const DoFHandler<dim,spacedim> &euler_dof_handler)
+  :
+  MappingQ<dim,spacedim>(degree, true),
+  euler_vector(&euler_vector),
+  euler_dof_handler(&euler_dof_handler)
+{
+  // reset the q1 mapping we use for interior cells (and previously
+  // set by the MappingQ constructor) to a MappingQ1Eulerian with the
+  // current vector
+  this->q1_mapping.reset (new MappingQ1Eulerian<dim,EulerVectorType,spacedim>(euler_vector,
+                          euler_dof_handler));
+
+  // also reset the qp mapping pointer with our own class
+  this->qp_mapping.reset (new MappingQEulerianGeneric(degree,*this));
+}
+
+
+
+template <int dim, class EulerVectorType, int spacedim>
+MappingQEulerian<dim, EulerVectorType, spacedim>::
+MappingQEulerian (const unsigned int degree,
+                  const DoFHandler<dim,spacedim> &euler_dof_handler,
+                  const EulerVectorType &euler_vector)
+  :
+  MappingQ<dim,spacedim>(degree, true),
+  euler_vector(&euler_vector),
+  euler_dof_handler(&euler_dof_handler)
+{
+  // reset the q1 mapping we use for interior cells (and previously
+  // set by the MappingQ constructor) to a MappingQ1Eulerian with the
+  // current vector
+  this->q1_mapping.reset (new MappingQ1Eulerian<dim,EulerVectorType,spacedim>(euler_vector,
+                          euler_dof_handler));
+
+  // also reset the qp mapping pointer with our own class
+  this->qp_mapping.reset (new MappingQEulerianGeneric(degree,*this));
+}
+
+
+
+template <int dim, class EulerVectorType, int spacedim>
+Mapping<dim,spacedim> *
+MappingQEulerian<dim, EulerVectorType, spacedim>::clone () const
+{
+  return new MappingQEulerian<dim,EulerVectorType,spacedim>(this->get_degree(),
+                                                            *euler_vector,
+                                                            *euler_dof_handler);
+}
+
+
+
+// .... SUPPORT QUADRATURE CONSTRUCTOR
+
+template <int dim, class EulerVectorType, int spacedim>
+MappingQEulerian<dim,EulerVectorType,spacedim>::MappingQEulerianGeneric::
+SupportQuadrature::
+SupportQuadrature (const unsigned int map_degree)
+  :
+  Quadrature<dim>(Utilities::fixed_power<dim>(map_degree+1))
+{
+  // first we determine the support points on the unit cell in lexicographic
+  // order, which are (in accordance with MappingQ) the support points of
+  // QGaussLobatto.
+  const QGaussLobatto<dim> q_iterated(map_degree+1);
+  const unsigned int n_q_points = q_iterated.size();
+
+  // we then need to define a renumbering vector that allows us to go from a
+  // lexicographic numbering scheme to a hierarchic one.  this fragment is
+  // taking almost verbatim from the MappingQ class.
+  std::vector<unsigned int> renumber(n_q_points);
+  std::vector<unsigned int> dpo(dim+1, 1U);
+  for (unsigned int i=1; i<dpo.size(); ++i)
+    dpo[i]=dpo[i-1]*(map_degree-1);
+
+  FETools::lexicographic_to_hierarchic_numbering (FiniteElementData<dim> (dpo, 1, map_degree),
+                                                  renumber);
+
+  // finally we assign the quadrature points in the required order.
+  for (unsigned int q=0; q<n_q_points; ++q)
+    this->quadrature_points[renumber[q]] = q_iterated.point(q);
+}
+
+
+
+// .... COMPUTE MAPPING SUPPORT POINTS
+
+template <int dim, class EulerVectorType, int spacedim>
+std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+MappingQEulerian<dim, EulerVectorType, spacedim>::
+get_vertices
+(const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  // get the vertices as the first 2^dim mapping support points
+  const std::vector<Point<spacedim> > a
+    = dynamic_cast<const MappingQEulerianGeneric &>(*this->qp_mapping).compute_mapping_support_points(cell);
+
+  std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell> vertex_locations;
+  std::copy (a.begin(),
+             a.begin()+GeometryInfo<dim>::vertices_per_cell,
+             vertex_locations.begin());
+
+  return vertex_locations;
+}
+
+
+
+template <int dim, class EulerVectorType, int spacedim>
+std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+MappingQEulerian<dim, EulerVectorType, spacedim>::MappingQEulerianGeneric::
+get_vertices
+(const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  return mapping_q_eulerian.get_vertices (cell);
+}
+
+
+
+
+template <int dim, class EulerVectorType, int spacedim>
+std::vector<Point<spacedim> >
+MappingQEulerian<dim, EulerVectorType, spacedim>::MappingQEulerianGeneric::
+compute_mapping_support_points (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  // first, basic assertion with respect to vector size,
+
+  const types::global_dof_index n_dofs  = mapping_q_eulerian.euler_dof_handler->n_dofs();
+  const types::global_dof_index vector_size = mapping_q_eulerian.euler_vector->size();
+  (void)n_dofs;
+  (void)vector_size;
+
+  AssertDimension(vector_size,n_dofs);
+
+  // we then transform our tria iterator into a dof iterator so we can access
+  // data not associated with triangulations
+  typename DoFHandler<dim,spacedim>::cell_iterator dof_cell(*cell,
+                                                            mapping_q_eulerian.euler_dof_handler);
+
+  Assert (dof_cell->active() == true, ExcInactiveCell());
+
+  // our quadrature rule is chosen so that each quadrature point corresponds
+  // to a support point in the undeformed configuration.  we can then query
+  // the given displacement field at these points to determine the shift
+  // vector that maps the support points to the deformed configuration.
+
+  // we assume that the given field contains dim displacement components, but
+  // that there may be other solution components as well (e.g. pressures).
+  // this class therefore assumes that the first dim components represent the
+  // actual shift vector we need, and simply ignores any components after
+  // that.  this implies that the user should order components appropriately,
+  // or create a separate dof handler for the displacements.
+
+  const unsigned int n_support_pts = support_quadrature.size();
+  const unsigned int n_components  = mapping_q_eulerian.euler_dof_handler->get_fe().n_components();
+
+  Assert (n_components >= spacedim, ExcDimensionMismatch(n_components, spacedim) );
+
+  std::vector<Vector<typename EulerVectorType::value_type> >
+  shift_vector(n_support_pts,
+               Vector<typename EulerVectorType::value_type>(n_components));
+
+  // fill shift vector for each support point using an fe_values object. make
+  // sure that the fe_values variable isn't used simultaneously from different
+  // threads
+  Threads::Mutex::ScopedLock lock(fe_values_mutex);
+  fe_values.reinit(dof_cell);
+  fe_values.get_function_values(*mapping_q_eulerian.euler_vector, shift_vector);
+
+  // and finally compute the positions of the support points in the deformed
+  // configuration.
+  std::vector<Point<spacedim> > a(n_support_pts);
+  for (unsigned int q=0; q<n_support_pts; ++q)
+    {
+      a[q] = fe_values.quadrature_point(q);
+      for (unsigned int d=0; d<spacedim; ++d)
+        a[q](d) += shift_vector[q](d);
+    }
+
+  return a;
+}
+
+
+
+template<int dim, class EulerVectorType, int spacedim>
+CellSimilarity::Similarity
+MappingQEulerian<dim,EulerVectorType,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                const CellSimilarity::Similarity                           ,
+                const Quadrature<dim>                                     &quadrature,
+                const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // call the function of the base class, but ignoring
+  // any potentially detected cell similarity between
+  // the current and the previous cell
+  MappingQ<dim,spacedim>::fill_fe_values (cell,
+                                          CellSimilarity::invalid_next_cell,
+                                          quadrature,
+                                          internal_data,
+                                          output_data);
+  // also return the updated flag since any detected
+  // similarity wasn't based on the mapped field, but
+  // the original vertices which are meaningless
+  return CellSimilarity::invalid_next_cell;
+}
+
+
+
+// explicit instantiations
+#include "mapping_q_eulerian.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_q_eulerian.inst.in b/source/fe/mapping_q_eulerian.inst.in
new file mode 100644
index 0000000..dd6397f
--- /dev/null
+++ b/source/fe/mapping_q_eulerian.inst.in
@@ -0,0 +1,42 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class MappingQEulerian<deal_II_dimension, Vector<double>, deal_II_space_dimension>;
+    template class MappingQEulerian<deal_II_dimension, Vector<float>,  deal_II_space_dimension>;
+#  ifdef DEAL_II_WITH_PETSC
+    template class MappingQEulerian<deal_II_dimension,
+				    PETScWrappers::Vector, deal_II_space_dimension>;
+    template class MappingQEulerian<deal_II_dimension,
+				    PETScWrappers::MPI::Vector, deal_II_space_dimension>;
+#  endif   
+#  ifdef DEAL_II_WITH_TRILINOS
+    template class MappingQEulerian<deal_II_dimension,
+                                    TrilinosWrappers::Vector, deal_II_space_dimension>;
+    template class MappingQEulerian<deal_II_dimension,
+                                    TrilinosWrappers::MPI::Vector, deal_II_space_dimension>;
+    template class MappingQEulerian<deal_II_dimension,
+                                    TrilinosWrappers::BlockVector, deal_II_space_dimension>;
+    template class MappingQEulerian<deal_II_dimension,
+                                    TrilinosWrappers::MPI::BlockVector, deal_II_space_dimension>;
+#  endif
+
+#endif
+  }
+
+
diff --git a/source/fe/mapping_q_generic.cc b/source/fe/mapping_q_generic.cc
new file mode 100644
index 0000000..f8452f9
--- /dev/null
+++ b/source/fe/mapping_q_generic.cc
@@ -0,0 +1,3894 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/derivative_form.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/tensor_product_polynomials.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/std_cxx11/array.h>
+#include <deal.II/base/std_cxx11/unique_ptr.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q_generic.h>
+#include <deal.II/fe/mapping_q1.h>
+
+#include <cmath>
+#include <algorithm>
+#include <numeric>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace MappingQ1
+  {
+    namespace
+    {
+
+      // These are left as templates on the spatial dimension (even though dim
+      // == spacedim must be true for them to make sense) because templates are
+      // expanded before the compiler eliminates code due to the 'if (dim ==
+      // spacedim)' statement (see the body of the general
+      // transform_real_to_unit_cell).
+      template<int spacedim>
+      Point<1>
+      transform_real_to_unit_cell
+      (const std_cxx11::array<Point<spacedim>, GeometryInfo<1>::vertices_per_cell> &vertices,
+       const Point<spacedim> &p)
+      {
+        Assert(spacedim == 1, ExcInternalError());
+        return Point<1>((p[0] - vertices[0](0))/(vertices[1](0) - vertices[0](0)));
+      }
+
+
+
+      template<int spacedim>
+      Point<2>
+      transform_real_to_unit_cell
+      (const std_cxx11::array<Point<spacedim>, GeometryInfo<2>::vertices_per_cell> &vertices,
+       const Point<spacedim> &p)
+      {
+        Assert(spacedim == 2, ExcInternalError());
+        const double x = p(0);
+        const double y = p(1);
+
+        const double x0 = vertices[0](0);
+        const double x1 = vertices[1](0);
+        const double x2 = vertices[2](0);
+        const double x3 = vertices[3](0);
+
+        const double y0 = vertices[0](1);
+        const double y1 = vertices[1](1);
+        const double y2 = vertices[2](1);
+        const double y3 = vertices[3](1);
+
+        const double a = (x1 - x3)*(y0 - y2) - (x0 - x2)*(y1 - y3);
+        const double b = -(x0 - x1 - x2 + x3)*y + (x - 2*x1 + x3)*y0 - (x - 2*x0 + x2)*y1
+                         - (x - x1)*y2 + (x - x0)*y3;
+        const double c = (x0 - x1)*y - (x - x1)*y0 + (x - x0)*y1;
+
+        const double discriminant = b*b - 4*a*c;
+        // exit if the point is not in the cell (this is the only case where the
+        // discriminant is negative)
+        if (discriminant < 0.0)
+          {
+            AssertThrow (false,
+                         (typename Mapping<spacedim,spacedim>::ExcTransformationFailed()));
+          }
+
+        double eta1;
+        double eta2;
+        // special case #1: if a is zero, then use the linear formula
+        if (a == 0.0 && b != 0.0)
+          {
+            eta1 = -c/b;
+            eta2 = -c/b;
+          }
+        // special case #2: if c is very small or the square root of the
+        // discriminant is nearly b.
+        else if (std::abs(c) < 1e-12*std::abs(b)
+                 || std::abs(std::sqrt(discriminant) - b) <= 1e-14*std::abs(b))
+          {
+            eta1 = (-b - std::sqrt(discriminant)) / (2*a);
+            eta2 = (-b + std::sqrt(discriminant)) / (2*a);
+          }
+        // finally, use the numerically stable version of the quadratic formula:
+        else
+          {
+            eta1 = 2*c / (-b - std::sqrt(discriminant));
+            eta2 = 2*c / (-b + std::sqrt(discriminant));
+          }
+        // pick the one closer to the center of the cell.
+        const double eta = (std::abs(eta1 - 0.5) < std::abs(eta2 - 0.5)) ? eta1 : eta2;
+
+        /*
+         * There are two ways to compute xi from eta, but either one may have a
+         * zero denominator.
+         */
+        const double subexpr0 = -eta*x2 + x0*(eta - 1);
+        const double xi_denominator0 = eta*x3 - x1*(eta - 1) + subexpr0;
+        const double max_x = std::max(std::max(std::abs(x0), std::abs(x1)),
+                                      std::max(std::abs(x2), std::abs(x3)));
+
+        if (std::abs(xi_denominator0) > 1e-10*max_x)
+          {
+            const double xi = (x + subexpr0)/xi_denominator0;
+            return Point<2>(xi, eta);
+          }
+        else
+          {
+            const double max_y = std::max(std::max(std::abs(y0), std::abs(y1)),
+                                          std::max(std::abs(y2), std::abs(y3)));
+            const double subexpr1 = -eta*y2 + y0*(eta - 1);
+            const double xi_denominator1 = eta*y3 - y1*(eta - 1) + subexpr1;
+            if (std::abs(xi_denominator1) > 1e-10*max_y)
+              {
+                const double xi = (subexpr1 + y)/xi_denominator1;
+                return Point<2>(xi, eta);
+              }
+            else // give up and try Newton iteration
+              {
+                AssertThrow (false,
+                             (typename Mapping<spacedim,spacedim>::ExcTransformationFailed()));
+              }
+          }
+        // bogus return to placate compiler. It should not be possible to get
+        // here.
+        Assert(false, ExcInternalError());
+        return Point<2>(std::numeric_limits<double>::quiet_NaN(),
+                        std::numeric_limits<double>::quiet_NaN());
+      }
+
+
+
+      template<int spacedim>
+      Point<3>
+      transform_real_to_unit_cell
+      (const std_cxx11::array<Point<spacedim>, GeometryInfo<3>::vertices_per_cell> &/*vertices*/,
+       const Point<spacedim> &/*p*/)
+      {
+        // It should not be possible to get here
+        Assert(false, ExcInternalError());
+        return Point<3>();
+      }
+
+
+
+      /**
+       * Compute an initial guess to pass to the Newton method in
+       * transform_real_to_unit_cell.  For the initial guess we proceed in the
+       * following way:
+       * <ul>
+       * <li> find the least square dim-dimensional plane approximating the cell
+       * vertices, i.e. we find an affine map A x_hat + b from the reference cell
+       * to the real space.
+       * <li> Solve the equation A x_hat + b = p for x_hat
+       * <li> This x_hat is the initial solution used for the Newton Method.
+       * </ul>
+       *
+       * @note if dim<spacedim we first project p onto the plane.
+       *
+       * @note if dim==1 (for any spacedim) the initial guess is the exact
+       * solution and no Newton iteration is needed.
+       *
+       * Some details about how we compute the least square plane. We look
+       * for a spacedim x (dim + 1) matrix X such that X * M = Y where M is
+       * a (dim+1) x n_vertices matrix and Y a spacedim x n_vertices.  And:
+       * The i-th column of M is unit_vertex[i] and the last row all
+       * 1's. The i-th column of Y is real_vertex[i].  If we split X=[A|b],
+       * the least square approx is A x_hat+b Classically X = Y * (M^t (M
+       * M^t)^{-1}) Let K = M^t * (M M^t)^{-1} = [KA Kb] this can be
+       * precomputed, and that is exactly what we do.  Finally A = Y*KA and
+       * b = Y*Kb.
+       */
+      template <int dim>
+      struct TransformR2UInitialGuess
+      {
+        static const double KA[GeometryInfo<dim>::vertices_per_cell][dim];
+        static const double Kb[GeometryInfo<dim>::vertices_per_cell];
+      };
+
+
+      /*
+        Octave code:
+        M=[0 1; 1 1];
+        K1 = transpose(M) * inverse (M*transpose(M));
+        printf ("{%f, %f},\n", K1' );
+      */
+      template <>
+      const double
+      TransformR2UInitialGuess<1>::
+      KA[GeometryInfo<1>::vertices_per_cell][1] =
+      {
+        {-1.000000},
+        {1.000000}
+      };
+
+      template <>
+      const double
+      TransformR2UInitialGuess<1>::
+      Kb[GeometryInfo<1>::vertices_per_cell] = {1.000000, 0.000000};
+
+
+      /*
+        Octave code:
+        M=[0 1 0 1;0 0 1 1;1 1 1 1];
+        K2 = transpose(M) * inverse (M*transpose(M));
+        printf ("{%f, %f, %f},\n", K2' );
+      */
+      template <>
+      const double
+      TransformR2UInitialGuess<2>::
+      KA[GeometryInfo<2>::vertices_per_cell][2] =
+      {
+        {-0.500000, -0.500000},
+        { 0.500000, -0.500000},
+        {-0.500000,  0.500000},
+        { 0.500000,  0.500000}
+      };
+
+      /*
+        Octave code:
+        M=[0 1 0 1 0 1 0 1;0 0 1 1 0 0 1 1; 0 0 0 0 1 1 1 1; 1 1 1 1 1 1 1 1];
+        K3 = transpose(M) * inverse (M*transpose(M))
+        printf ("{%f, %f, %f, %f},\n", K3' );
+      */
+      template <>
+      const double
+      TransformR2UInitialGuess<2>::
+      Kb[GeometryInfo<2>::vertices_per_cell] =
+      {0.750000,0.250000,0.250000,-0.250000 };
+
+
+      template <>
+      const double
+      TransformR2UInitialGuess<3>::
+      KA[GeometryInfo<3>::vertices_per_cell][3] =
+      {
+        {-0.250000, -0.250000, -0.250000},
+        { 0.250000, -0.250000, -0.250000},
+        {-0.250000,  0.250000, -0.250000},
+        { 0.250000,  0.250000, -0.250000},
+        {-0.250000, -0.250000,  0.250000},
+        { 0.250000, -0.250000,  0.250000},
+        {-0.250000,  0.250000,  0.250000},
+        { 0.250000,  0.250000,  0.250000}
+
+      };
+
+
+      template <>
+      const double
+      TransformR2UInitialGuess<3>::
+      Kb[GeometryInfo<3>::vertices_per_cell] =
+      {0.500000,0.250000,0.250000,0.000000,0.250000,0.000000,0.000000,-0.250000};
+
+      template<int dim, int spacedim>
+      Point<dim>
+      transform_real_to_unit_cell_initial_guess (const std::vector<Point<spacedim> > &vertex,
+                                                 const Point<spacedim>               &p)
+      {
+        Point<dim> p_unit;
+
+        dealii::FullMatrix<double>  KA(GeometryInfo<dim>::vertices_per_cell, dim);
+        dealii::Vector <double>  Kb(GeometryInfo<dim>::vertices_per_cell);
+
+        KA.fill( (double *)(TransformR2UInitialGuess<dim>::KA) );
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          Kb(i) = TransformR2UInitialGuess<dim>::Kb[i];
+
+        FullMatrix<double> Y(spacedim, GeometryInfo<dim>::vertices_per_cell);
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; v++)
+          for (unsigned int i=0; i<spacedim; ++i)
+            Y(i,v) = vertex[v][i];
+
+        FullMatrix<double> A(spacedim,dim);
+        Y.mmult(A,KA); // A = Y*KA
+        dealii::Vector<double> b(spacedim);
+        Y.vmult(b,Kb); // b = Y*Kb
+
+        for (unsigned int i=0; i<spacedim; ++i)
+          b(i) -= p[i];
+        b*=-1;
+
+        dealii::Vector<double> dest(dim);
+
+        FullMatrix<double> A_1(dim,spacedim);
+        if (dim<spacedim)
+          A_1.left_invert(A);
+        else
+          A_1.invert(A);
+
+        A_1.vmult(dest,b); //A^{-1}*b
+
+        for (unsigned int i=0; i<dim; ++i)
+          p_unit[i]=dest(i);
+
+        return p_unit;
+      }
+      template <int spacedim>
+      void
+      compute_shape_function_values (const unsigned int            n_shape_functions,
+                                     const std::vector<Point<1> > &unit_points,
+                                     typename dealii::MappingQGeneric<1,spacedim>::InternalData &data)
+      {
+        (void)n_shape_functions;
+        const unsigned int n_points=unit_points.size();
+        for (unsigned int k = 0 ; k < n_points ; ++k)
+          {
+            double x = unit_points[k](0);
+
+            if (data.shape_values.size()!=0)
+              {
+                Assert(data.shape_values.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.shape(k,0) = 1.-x;
+                data.shape(k,1) = x;
+              }
+            if (data.shape_derivatives.size()!=0)
+              {
+                Assert(data.shape_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.derivative(k,0)[0] = -1.;
+                data.derivative(k,1)[0] = 1.;
+              }
+            if (data.shape_second_derivatives.size()!=0)
+              {
+                // the following may or may not
+                // work if dim != spacedim
+                Assert (spacedim == 1, ExcNotImplemented());
+
+                Assert(data.shape_second_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.second_derivative(k,0)[0][0] = 0;
+                data.second_derivative(k,1)[0][0] = 0;
+              }
+            if (data.shape_third_derivatives.size()!=0)
+              {
+                // if lower order derivative don't work, neither should this
+                Assert (spacedim == 1, ExcNotImplemented());
+
+                Assert(data.shape_third_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+
+                Tensor<3,1> zero;
+                data.third_derivative(k,0) = zero;
+                data.third_derivative(k,1) = zero;
+              }
+            if (data.shape_fourth_derivatives.size()!=0)
+              {
+                // if lower order derivative don't work, neither should this
+                Assert (spacedim == 1, ExcNotImplemented());
+
+                Assert(data.shape_fourth_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+
+                Tensor<4,1> zero;
+                data.fourth_derivative(k,0) = zero;
+                data.fourth_derivative(k,1) = zero;
+              }
+          }
+      }
+
+
+      template <int spacedim>
+      void
+      compute_shape_function_values (const unsigned int            n_shape_functions,
+                                     const std::vector<Point<2> > &unit_points,
+                                     typename dealii::MappingQGeneric<2,spacedim>::InternalData &data)
+      {
+        (void)n_shape_functions;
+        const unsigned int n_points=unit_points.size();
+        for (unsigned int k = 0 ; k < n_points ; ++k)
+          {
+            double x = unit_points[k](0);
+            double y = unit_points[k](1);
+
+            if (data.shape_values.size()!=0)
+              {
+                Assert(data.shape_values.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.shape(k,0) = (1.-x)*(1.-y);
+                data.shape(k,1) = x*(1.-y);
+                data.shape(k,2) = (1.-x)*y;
+                data.shape(k,3) = x*y;
+              }
+            if (data.shape_derivatives.size()!=0)
+              {
+                Assert(data.shape_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.derivative(k,0)[0] = (y-1.);
+                data.derivative(k,1)[0] = (1.-y);
+                data.derivative(k,2)[0] = -y;
+                data.derivative(k,3)[0] = y;
+                data.derivative(k,0)[1] = (x-1.);
+                data.derivative(k,1)[1] = -x;
+                data.derivative(k,2)[1] = (1.-x);
+                data.derivative(k,3)[1] = x;
+              }
+            if (data.shape_second_derivatives.size()!=0)
+              {
+                Assert(data.shape_second_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.second_derivative(k,0)[0][0] = 0;
+                data.second_derivative(k,1)[0][0] = 0;
+                data.second_derivative(k,2)[0][0] = 0;
+                data.second_derivative(k,3)[0][0] = 0;
+                data.second_derivative(k,0)[0][1] = 1.;
+                data.second_derivative(k,1)[0][1] = -1.;
+                data.second_derivative(k,2)[0][1] = -1.;
+                data.second_derivative(k,3)[0][1] = 1.;
+                data.second_derivative(k,0)[1][0] = 1.;
+                data.second_derivative(k,1)[1][0] = -1.;
+                data.second_derivative(k,2)[1][0] = -1.;
+                data.second_derivative(k,3)[1][0] = 1.;
+                data.second_derivative(k,0)[1][1] = 0;
+                data.second_derivative(k,1)[1][1] = 0;
+                data.second_derivative(k,2)[1][1] = 0;
+                data.second_derivative(k,3)[1][1] = 0;
+              }
+            if (data.shape_third_derivatives.size()!=0)
+              {
+                Assert(data.shape_third_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+
+                Tensor<3,2> zero;
+                for (unsigned int i=0; i<4; ++i)
+                  data.third_derivative(k,i) = zero;
+              }
+            if (data.shape_fourth_derivatives.size()!=0)
+              {
+                Assert(data.shape_fourth_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                Tensor<4,2> zero;
+                for (unsigned int i=0; i<4; ++i)
+                  data.fourth_derivative(k,i) = zero;
+              }
+          }
+      }
+
+
+
+      template <int spacedim>
+      void
+      compute_shape_function_values (const unsigned int            n_shape_functions,
+                                     const std::vector<Point<3> > &unit_points,
+                                     typename dealii::MappingQGeneric<3,spacedim>::InternalData &data)
+      {
+        (void)n_shape_functions;
+        const unsigned int n_points=unit_points.size();
+        for (unsigned int k = 0 ; k < n_points ; ++k)
+          {
+            double x = unit_points[k](0);
+            double y = unit_points[k](1);
+            double z = unit_points[k](2);
+
+            if (data.shape_values.size()!=0)
+              {
+                Assert(data.shape_values.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.shape(k,0) = (1.-x)*(1.-y)*(1.-z);
+                data.shape(k,1) = x*(1.-y)*(1.-z);
+                data.shape(k,2) = (1.-x)*y*(1.-z);
+                data.shape(k,3) = x*y*(1.-z);
+                data.shape(k,4) = (1.-x)*(1.-y)*z;
+                data.shape(k,5) = x*(1.-y)*z;
+                data.shape(k,6) = (1.-x)*y*z;
+                data.shape(k,7) = x*y*z;
+              }
+            if (data.shape_derivatives.size()!=0)
+              {
+                Assert(data.shape_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.derivative(k,0)[0] = (y-1.)*(1.-z);
+                data.derivative(k,1)[0] = (1.-y)*(1.-z);
+                data.derivative(k,2)[0] = -y*(1.-z);
+                data.derivative(k,3)[0] = y*(1.-z);
+                data.derivative(k,4)[0] = (y-1.)*z;
+                data.derivative(k,5)[0] = (1.-y)*z;
+                data.derivative(k,6)[0] = -y*z;
+                data.derivative(k,7)[0] = y*z;
+                data.derivative(k,0)[1] = (x-1.)*(1.-z);
+                data.derivative(k,1)[1] = -x*(1.-z);
+                data.derivative(k,2)[1] = (1.-x)*(1.-z);
+                data.derivative(k,3)[1] = x*(1.-z);
+                data.derivative(k,4)[1] = (x-1.)*z;
+                data.derivative(k,5)[1] = -x*z;
+                data.derivative(k,6)[1] = (1.-x)*z;
+                data.derivative(k,7)[1] = x*z;
+                data.derivative(k,0)[2] = (x-1)*(1.-y);
+                data.derivative(k,1)[2] = x*(y-1.);
+                data.derivative(k,2)[2] = (x-1.)*y;
+                data.derivative(k,3)[2] = -x*y;
+                data.derivative(k,4)[2] = (1.-x)*(1.-y);
+                data.derivative(k,5)[2] = x*(1.-y);
+                data.derivative(k,6)[2] = (1.-x)*y;
+                data.derivative(k,7)[2] = x*y;
+              }
+            if (data.shape_second_derivatives.size()!=0)
+              {
+                // the following may or may not
+                // work if dim != spacedim
+                Assert (spacedim == 3, ExcNotImplemented());
+
+                Assert(data.shape_second_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                data.second_derivative(k,0)[0][0] = 0;
+                data.second_derivative(k,1)[0][0] = 0;
+                data.second_derivative(k,2)[0][0] = 0;
+                data.second_derivative(k,3)[0][0] = 0;
+                data.second_derivative(k,4)[0][0] = 0;
+                data.second_derivative(k,5)[0][0] = 0;
+                data.second_derivative(k,6)[0][0] = 0;
+                data.second_derivative(k,7)[0][0] = 0;
+                data.second_derivative(k,0)[1][1] = 0;
+                data.second_derivative(k,1)[1][1] = 0;
+                data.second_derivative(k,2)[1][1] = 0;
+                data.second_derivative(k,3)[1][1] = 0;
+                data.second_derivative(k,4)[1][1] = 0;
+                data.second_derivative(k,5)[1][1] = 0;
+                data.second_derivative(k,6)[1][1] = 0;
+                data.second_derivative(k,7)[1][1] = 0;
+                data.second_derivative(k,0)[2][2] = 0;
+                data.second_derivative(k,1)[2][2] = 0;
+                data.second_derivative(k,2)[2][2] = 0;
+                data.second_derivative(k,3)[2][2] = 0;
+                data.second_derivative(k,4)[2][2] = 0;
+                data.second_derivative(k,5)[2][2] = 0;
+                data.second_derivative(k,6)[2][2] = 0;
+                data.second_derivative(k,7)[2][2] = 0;
+
+                data.second_derivative(k,0)[0][1] = (1.-z);
+                data.second_derivative(k,1)[0][1] = -(1.-z);
+                data.second_derivative(k,2)[0][1] = -(1.-z);
+                data.second_derivative(k,3)[0][1] = (1.-z);
+                data.second_derivative(k,4)[0][1] = z;
+                data.second_derivative(k,5)[0][1] = -z;
+                data.second_derivative(k,6)[0][1] = -z;
+                data.second_derivative(k,7)[0][1] = z;
+                data.second_derivative(k,0)[1][0] = (1.-z);
+                data.second_derivative(k,1)[1][0] = -(1.-z);
+                data.second_derivative(k,2)[1][0] = -(1.-z);
+                data.second_derivative(k,3)[1][0] = (1.-z);
+                data.second_derivative(k,4)[1][0] = z;
+                data.second_derivative(k,5)[1][0] = -z;
+                data.second_derivative(k,6)[1][0] = -z;
+                data.second_derivative(k,7)[1][0] = z;
+
+                data.second_derivative(k,0)[0][2] = (1.-y);
+                data.second_derivative(k,1)[0][2] = -(1.-y);
+                data.second_derivative(k,2)[0][2] = y;
+                data.second_derivative(k,3)[0][2] = -y;
+                data.second_derivative(k,4)[0][2] = -(1.-y);
+                data.second_derivative(k,5)[0][2] = (1.-y);
+                data.second_derivative(k,6)[0][2] = -y;
+                data.second_derivative(k,7)[0][2] = y;
+                data.second_derivative(k,0)[2][0] = (1.-y);
+                data.second_derivative(k,1)[2][0] = -(1.-y);
+                data.second_derivative(k,2)[2][0] = y;
+                data.second_derivative(k,3)[2][0] = -y;
+                data.second_derivative(k,4)[2][0] = -(1.-y);
+                data.second_derivative(k,5)[2][0] = (1.-y);
+                data.second_derivative(k,6)[2][0] = -y;
+                data.second_derivative(k,7)[2][0] = y;
+
+                data.second_derivative(k,0)[1][2] = (1.-x);
+                data.second_derivative(k,1)[1][2] = x;
+                data.second_derivative(k,2)[1][2] = -(1.-x);
+                data.second_derivative(k,3)[1][2] = -x;
+                data.second_derivative(k,4)[1][2] = -(1.-x);
+                data.second_derivative(k,5)[1][2] = -x;
+                data.second_derivative(k,6)[1][2] = (1.-x);
+                data.second_derivative(k,7)[1][2] = x;
+                data.second_derivative(k,0)[2][1] = (1.-x);
+                data.second_derivative(k,1)[2][1] = x;
+                data.second_derivative(k,2)[2][1] = -(1.-x);
+                data.second_derivative(k,3)[2][1] = -x;
+                data.second_derivative(k,4)[2][1] = -(1.-x);
+                data.second_derivative(k,5)[2][1] = -x;
+                data.second_derivative(k,6)[2][1] = (1.-x);
+                data.second_derivative(k,7)[2][1] = x;
+              }
+            if (data.shape_third_derivatives.size()!=0)
+              {
+                // if lower order derivative don't work, neither should this
+                Assert (spacedim == 3, ExcNotImplemented());
+
+                Assert(data.shape_third_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+
+                for (unsigned int i=0; i<3; ++i)
+                  for (unsigned int j=0; j<3; ++j)
+                    for (unsigned int l=0; l<3; ++l)
+                      if ((i==j)||(j==l)||(l==i))
+                        {
+                          for (unsigned int m=0; m<8; ++m)
+                            data.third_derivative(k,m)[i][j][l] = 0;
+                        }
+                      else
+                        {
+                          data.third_derivative(k,0)[i][j][l] = -1.;
+                          data.third_derivative(k,1)[i][j][l] = 1.;
+                          data.third_derivative(k,2)[i][j][l] = 1.;
+                          data.third_derivative(k,3)[i][j][l] = -1.;
+                          data.third_derivative(k,4)[i][j][l] = 1.;
+                          data.third_derivative(k,5)[i][j][l] = -1.;
+                          data.third_derivative(k,6)[i][j][l] = -1.;
+                          data.third_derivative(k,7)[i][j][l] = 1.;
+                        }
+
+              }
+            if (data.shape_fourth_derivatives.size()!=0)
+              {
+                // if lower order derivative don't work, neither should this
+                Assert (spacedim == 3, ExcNotImplemented());
+
+                Assert(data.shape_fourth_derivatives.size()==n_shape_functions*n_points,
+                       ExcInternalError());
+                Tensor<4,3> zero;
+                for (unsigned int i=0; i<8; ++i)
+                  data.fourth_derivative(k,i) = zero;
+              }
+          }
+      }
+    }
+  }
+}
+
+
+
+
+
+template<int dim, int spacedim>
+MappingQGeneric<dim,spacedim>::InternalData::InternalData (const unsigned int polynomial_degree)
+  :
+  polynomial_degree (polynomial_degree),
+  n_shape_functions (Utilities::fixed_power<dim>(polynomial_degree+1))
+{}
+
+
+
+template<int dim, int spacedim>
+std::size_t
+MappingQGeneric<dim,spacedim>::InternalData::memory_consumption () const
+{
+  return (Mapping<dim,spacedim>::InternalDataBase::memory_consumption() +
+          MemoryConsumption::memory_consumption (shape_values) +
+          MemoryConsumption::memory_consumption (shape_derivatives) +
+          MemoryConsumption::memory_consumption (covariant) +
+          MemoryConsumption::memory_consumption (contravariant) +
+          MemoryConsumption::memory_consumption (unit_tangentials) +
+          MemoryConsumption::memory_consumption (aux) +
+          MemoryConsumption::memory_consumption (mapping_support_points) +
+          MemoryConsumption::memory_consumption (cell_of_current_support_points) +
+          MemoryConsumption::memory_consumption (volume_elements) +
+          MemoryConsumption::memory_consumption (polynomial_degree) +
+          MemoryConsumption::memory_consumption (n_shape_functions));
+}
+
+
+template <int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::InternalData::
+initialize (const UpdateFlags      update_flags,
+            const Quadrature<dim> &q,
+            const unsigned int     n_original_q_points)
+{
+  // store the flags in the internal data object so we can access them
+  // in fill_fe_*_values()
+  this->update_each = update_flags;
+
+  const unsigned int n_q_points = q.size();
+
+  // see if we need the (transformation) shape function values
+  // and/or gradients and resize the necessary arrays
+  if (this->update_each & update_quadrature_points)
+    shape_values.resize(n_shape_functions * n_q_points);
+
+  if (this->update_each & (update_covariant_transformation
+                           | update_contravariant_transformation
+                           | update_JxW_values
+                           | update_boundary_forms
+                           | update_normal_vectors
+                           | update_jacobians
+                           | update_jacobian_grads
+                           | update_inverse_jacobians
+                           | update_jacobian_pushed_forward_grads
+                           | update_jacobian_2nd_derivatives
+                           | update_jacobian_pushed_forward_2nd_derivatives
+                           | update_jacobian_3rd_derivatives
+                           | update_jacobian_pushed_forward_3rd_derivatives))
+    shape_derivatives.resize(n_shape_functions * n_q_points);
+
+  if (this->update_each & update_covariant_transformation)
+    covariant.resize(n_original_q_points);
+
+  if (this->update_each & update_contravariant_transformation)
+    contravariant.resize(n_original_q_points);
+
+  if (this->update_each & update_volume_elements)
+    volume_elements.resize(n_original_q_points);
+
+  if (this->update_each &
+      (update_jacobian_grads | update_jacobian_pushed_forward_grads) )
+    shape_second_derivatives.resize(n_shape_functions * n_q_points);
+
+  if (this->update_each &
+      (update_jacobian_2nd_derivatives | update_jacobian_pushed_forward_2nd_derivatives) )
+    shape_third_derivatives.resize(n_shape_functions * n_q_points);
+
+  if (this->update_each &
+      (update_jacobian_3rd_derivatives | update_jacobian_pushed_forward_3rd_derivatives) )
+    shape_fourth_derivatives.resize(n_shape_functions * n_q_points);
+
+  // now also fill the various fields with their correct values
+  compute_shape_function_values (q.get_points());
+}
+
+
+
+template <int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::InternalData::
+initialize_face (const UpdateFlags      update_flags,
+                 const Quadrature<dim> &q,
+                 const unsigned int     n_original_q_points)
+{
+  initialize (update_flags, q, n_original_q_points);
+
+  if (dim > 1)
+    {
+      if (this->update_each & update_boundary_forms)
+        {
+          aux.resize (dim-1, std::vector<Tensor<1,spacedim> > (n_original_q_points));
+
+          // Compute tangentials to the
+          // unit cell.
+          const unsigned int nfaces = GeometryInfo<dim>::faces_per_cell;
+          unit_tangentials.resize (nfaces*(dim-1),
+                                   std::vector<Tensor<1,dim> > (n_original_q_points));
+          if (dim==2)
+            {
+              // ensure a counterclockwise
+              // orientation of tangentials
+              static const int tangential_orientation[4]= {-1,1,1,-1};
+              for (unsigned int i=0; i<nfaces; ++i)
+                {
+                  Tensor<1,dim> tang;
+                  tang[1-i/2]=tangential_orientation[i];
+                  std::fill (unit_tangentials[i].begin(),
+                             unit_tangentials[i].end(), tang);
+                }
+            }
+          else if (dim==3)
+            {
+              for (unsigned int i=0; i<nfaces; ++i)
+                {
+                  Tensor<1,dim> tang1, tang2;
+
+                  const unsigned int nd=
+                    GeometryInfo<dim>::unit_normal_direction[i];
+
+                  // first tangential
+                  // vector in direction
+                  // of the (nd+1)%3 axis
+                  // and inverted in case
+                  // of unit inward normal
+                  tang1[(nd+1)%dim]=GeometryInfo<dim>::unit_normal_orientation[i];
+                  // second tangential
+                  // vector in direction
+                  // of the (nd+2)%3 axis
+                  tang2[(nd+2)%dim]=1.;
+
+                  // same unit tangents
+                  // for all quadrature
+                  // points on this face
+                  std::fill (unit_tangentials[i].begin(),
+                             unit_tangentials[i].end(), tang1);
+                  std::fill (unit_tangentials[nfaces+i].begin(),
+                             unit_tangentials[nfaces+i].end(), tang2);
+                }
+            }
+        }
+    }
+}
+
+
+
+namespace
+{
+  template <int dim>
+  std::vector<unsigned int>
+  get_dpo_vector (const unsigned int degree)
+  {
+    std::vector<unsigned int> dpo(dim+1, 1U);
+    for (unsigned int i=1; i<dpo.size(); ++i)
+      dpo[i]=dpo[i-1]*(degree-1);
+    return dpo;
+  }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::InternalData::
+compute_shape_function_values (const std::vector<Point<dim> > &unit_points)
+{
+  // if the polynomial degree is one, then we can simplify code a bit
+  // by using hard-coded shape functions.
+  if ((polynomial_degree == 1)
+      &&
+      (dim == spacedim))
+    internal::MappingQ1::compute_shape_function_values<spacedim> (n_shape_functions,
+        unit_points, *this);
+  else
+    // otherwise ask an object that describes the polynomial space
+    {
+      const unsigned int n_points=unit_points.size();
+
+      // Construct the tensor product polynomials used as shape functions for the
+      // Qp mapping of cells at the boundary.
+      const QGaussLobatto<1> line_support_points (polynomial_degree + 1);
+      const TensorProductPolynomials<dim>
+      tensor_pols (Polynomials::generate_complete_Lagrange_basis(line_support_points.get_points()));
+      Assert (n_shape_functions==tensor_pols.n(),
+              ExcInternalError());
+
+      // then also construct the mapping from lexicographic to the Qp shape function numbering
+      const std::vector<unsigned int>
+      renumber (FETools::
+                lexicographic_to_hierarchic_numbering (
+                  FiniteElementData<dim> (get_dpo_vector<dim>(polynomial_degree), 1,
+                                          polynomial_degree)));
+
+      std::vector<double> values;
+      std::vector<Tensor<1,dim> > grads;
+      if (shape_values.size()!=0)
+        {
+          Assert(shape_values.size()==n_shape_functions*n_points,
+                 ExcInternalError());
+          values.resize(n_shape_functions);
+        }
+      if (shape_derivatives.size()!=0)
+        {
+          Assert(shape_derivatives.size()==n_shape_functions*n_points,
+                 ExcInternalError());
+          grads.resize(n_shape_functions);
+        }
+
+      std::vector<Tensor<2,dim> > grad2;
+      if (shape_second_derivatives.size()!=0)
+        {
+          Assert(shape_second_derivatives.size()==n_shape_functions*n_points,
+                 ExcInternalError());
+          grad2.resize(n_shape_functions);
+        }
+
+      std::vector<Tensor<3,dim> > grad3;
+      if (shape_third_derivatives.size()!=0)
+        {
+          Assert(shape_third_derivatives.size()==n_shape_functions*n_points,
+                 ExcInternalError());
+          grad3.resize(n_shape_functions);
+        }
+
+      std::vector<Tensor<4,dim> > grad4;
+      if (shape_fourth_derivatives.size()!=0)
+        {
+          Assert(shape_fourth_derivatives.size()==n_shape_functions*n_points,
+                 ExcInternalError());
+          grad4.resize(n_shape_functions);
+        }
+
+
+      if (shape_values.size()!=0 ||
+          shape_derivatives.size()!=0 ||
+          shape_second_derivatives.size()!=0 ||
+          shape_third_derivatives.size()!=0 ||
+          shape_fourth_derivatives.size()!=0 )
+        for (unsigned int point=0; point<n_points; ++point)
+          {
+            tensor_pols.compute(unit_points[point], values, grads, grad2, grad3, grad4);
+
+            if (shape_values.size()!=0)
+              for (unsigned int i=0; i<n_shape_functions; ++i)
+                shape(point,renumber[i]) = values[i];
+
+            if (shape_derivatives.size()!=0)
+              for (unsigned int i=0; i<n_shape_functions; ++i)
+                derivative(point,renumber[i]) = grads[i];
+
+            if (shape_second_derivatives.size()!=0)
+              for (unsigned int i=0; i<n_shape_functions; ++i)
+                second_derivative(point,renumber[i]) = grad2[i];
+
+            if (shape_third_derivatives.size()!=0)
+              for (unsigned int i=0; i<n_shape_functions; ++i)
+                third_derivative(point,renumber[i]) = grad3[i];
+
+            if (shape_fourth_derivatives.size()!=0)
+              for (unsigned int i=0; i<n_shape_functions; ++i)
+                fourth_derivative(point,renumber[i]) = grad4[i];
+          }
+    }
+}
+
+
+namespace
+{
+  /**
+   * Compute the <tt>support_point_weights_on_quad(hex)</tt> arrays.
+   *
+   * Called by the <tt>compute_support_point_weights_on_quad(hex)</tt> functions if the
+   * data is not yet hardcoded.
+   *
+   * For the definition of the <tt>support_point_weights_on_quad(hex)</tt> please
+   * refer to equation (8) of the `mapping' report.
+   */
+  template<int dim>
+  Table<2,double>
+  compute_laplace_vector(const unsigned int polynomial_degree)
+  {
+    Table<2,double> lvs;
+
+    Assert(lvs.n_rows()==0, ExcInternalError());
+    Assert(dim==2 || dim==3, ExcNotImplemented());
+
+    // for degree==1, we shouldn't have to compute any support points, since all
+    // of them are on the vertices
+    Assert(polynomial_degree>1, ExcInternalError());
+
+    const unsigned int n_inner = Utilities::fixed_power<dim>(polynomial_degree-1);
+    const unsigned int n_outer = (dim==1) ? 2 :
+                                 ((dim==2) ?
+                                  4+4*(polynomial_degree-1) :
+                                  8+12*(polynomial_degree-1)+6*(polynomial_degree-1)*(polynomial_degree-1));
+
+
+    // compute the shape gradients at the quadrature points on the unit cell
+    const QGauss<dim> quadrature(polynomial_degree+1);
+    const unsigned int n_q_points=quadrature.size();
+
+    typename MappingQGeneric<dim>::InternalData quadrature_data(polynomial_degree);
+    quadrature_data.shape_derivatives.resize(quadrature_data.n_shape_functions *
+                                             n_q_points);
+    quadrature_data.compute_shape_function_values(quadrature.get_points());
+
+    // Compute the stiffness matrix of the inner dofs
+    FullMatrix<long double> S(n_inner);
+    for (unsigned int point=0; point<n_q_points; ++point)
+      for (unsigned int i=0; i<n_inner; ++i)
+        for (unsigned int j=0; j<n_inner; ++j)
+          {
+            long double res = 0.;
+            for (unsigned int l=0; l<dim; ++l)
+              res += (long double)quadrature_data.derivative(point, n_outer+i)[l] *
+                     (long double)quadrature_data.derivative(point, n_outer+j)[l];
+
+            S(i,j) += res * (long double)quadrature.weight(point);
+          }
+
+    // Compute the components of T to be the product of gradients of inner and
+    // outer shape functions.
+    FullMatrix<long double> T(n_inner, n_outer);
+    for (unsigned int point=0; point<n_q_points; ++point)
+      for (unsigned int i=0; i<n_inner; ++i)
+        for (unsigned int k=0; k<n_outer; ++k)
+          {
+            long double res = 0.;
+            for (unsigned int l=0; l<dim; ++l)
+              res += (long double)quadrature_data.derivative(point, n_outer+i)[l] *
+                     (long double)quadrature_data.derivative(point, k)[l];
+
+            T(i,k) += res *(long double)quadrature.weight(point);
+          }
+
+    FullMatrix<long double> S_1(n_inner);
+    S_1.invert(S);
+
+    FullMatrix<long double> S_1_T(n_inner, n_outer);
+
+    // S:=S_1*T
+    S_1.mmult(S_1_T,T);
+
+    // Resize and initialize the lvs
+    lvs.reinit (n_inner, n_outer);
+    for (unsigned int i=0; i<n_inner; ++i)
+      for (unsigned int k=0; k<n_outer; ++k)
+        lvs(i,k) = -S_1_T(i,k);
+
+    return lvs;
+  }
+
+
+  /**
+   * This function is needed by the constructor of
+   * <tt>MappingQ<dim,spacedim></tt> for <tt>dim=</tt> 2 and 3.
+   *
+   * For <tt>degree<4</tt> this function sets the @p support_point_weights_on_quad to
+   * the hardcoded data. For <tt>degree>=4</tt> and MappingQ<2> this vector is
+   * computed.
+   *
+   * For the definition of the @p support_point_weights_on_quad please refer to
+   * equation (8) of the `mapping' report.
+   */
+  template<int dim>
+  Table<2,double>
+  compute_support_point_weights_on_quad(const unsigned int polynomial_degree)
+  {
+    Table<2,double> loqvs;
+
+    // in 1d, there are no quads, so return an empty object
+    if (dim == 1)
+      return loqvs;
+
+    // we are asked to compute weights for interior support points, but
+    // there are no interior points if degree==1
+    if (polynomial_degree == 1)
+      return loqvs;
+
+    const unsigned int n_inner_2d=(polynomial_degree-1)*(polynomial_degree-1);
+    const unsigned int n_outer_2d=4+4*(polynomial_degree-1);
+
+    // first check whether we have precomputed the values for some polynomial
+    // degree; the sizes of arrays is n_inner_2d*n_outer_2d
+    if (polynomial_degree == 2)
+      {
+        // (checked these values against the output of compute_laplace_vector
+        // again, and found they're indeed right -- just in case someone wonders
+        // where they come from -- WB)
+        static const double loqv2[1*8]
+          = {1/16., 1/16., 1/16., 1/16., 3/16., 3/16., 3/16., 3/16.};
+        Assert (sizeof(loqv2)/sizeof(loqv2[0]) ==
+                n_inner_2d * n_outer_2d,
+                ExcInternalError());
+
+        // copy and return
+        loqvs.reinit(n_inner_2d, n_outer_2d);
+        for (unsigned int unit_point=0; unit_point<n_inner_2d; ++unit_point)
+          for (unsigned int k=0; k<n_outer_2d; ++k)
+            loqvs[unit_point][k] = loqv2[unit_point*n_outer_2d+k];
+      }
+    else
+      {
+        // not precomputed, then do so now
+        loqvs = compute_laplace_vector<2>(polynomial_degree);
+      }
+
+    // the sum of weights of the points at the outer rim should be one. check
+    // this
+    for (unsigned int unit_point=0; unit_point<loqvs.n_rows(); ++unit_point)
+      Assert(std::fabs(std::accumulate(loqvs[unit_point].begin(),
+                                       loqvs[unit_point].end(),0.)-1)<1e-13*polynomial_degree,
+             ExcInternalError());
+
+    return loqvs;
+  }
+
+
+
+  /**
+   * This function is needed by the constructor of <tt>MappingQ<3></tt>.
+   *
+   * For <tt>degree==2</tt> this function sets the @p support_point_weights_on_hex to
+   * the hardcoded data. For <tt>degree>2</tt> this vector is computed.
+   *
+   * For the definition of the @p support_point_weights_on_hex please refer to
+   * equation (8) of the `mapping' report.
+   */
+  template <int dim>
+  Table<2,double>
+  compute_support_point_weights_on_hex(const unsigned int polynomial_degree)
+  {
+    Table<2,double> lohvs;
+
+    // in 1d and 2d, there are no hexes, so return an empty object
+    if (dim < 3)
+      return lohvs;
+
+    // we are asked to compute weights for interior support points, but
+    // there are no interior points if degree==1
+    if (polynomial_degree == 1)
+      return lohvs;
+
+    const unsigned int n_inner = Utilities::fixed_power<dim>(polynomial_degree-1);
+    const unsigned int n_outer = 8+12*(polynomial_degree-1)+6*(polynomial_degree-1)*(polynomial_degree-1);
+
+    // first check whether we have precomputed the values for some polynomial
+    // degree; the sizes of arrays is n_inner_2d*n_outer_2d
+    if (polynomial_degree == 2)
+      {
+        static const double lohv2[26]
+          = {1/128., 1/128., 1/128., 1/128., 1/128., 1/128., 1/128., 1/128.,
+             7/192., 7/192., 7/192., 7/192., 7/192., 7/192., 7/192., 7/192.,
+             7/192., 7/192., 7/192., 7/192.,
+             1/12., 1/12., 1/12., 1/12., 1/12., 1/12.
+            };
+
+        // copy and return
+        lohvs.reinit(n_inner, n_outer);
+        for (unsigned int unit_point=0; unit_point<n_inner; ++unit_point)
+          for (unsigned int k=0; k<n_outer; ++k)
+            lohvs[unit_point][k] = lohv2[unit_point*n_outer+k];
+      }
+    else
+      {
+        // not precomputed, then do so now
+        lohvs = compute_laplace_vector<dim>(polynomial_degree);
+      }
+
+    // the sum of weights of the points at the outer rim should be one. check
+    // this
+    for (unsigned int unit_point=0; unit_point<n_inner; ++unit_point)
+      Assert(std::fabs(std::accumulate(lohvs[unit_point].begin(),
+                                       lohvs[unit_point].end(),0.) - 1)<1e-13*polynomial_degree,
+             ExcInternalError());
+
+    return lohvs;
+  }
+}
+
+
+
+
+template<int dim, int spacedim>
+MappingQGeneric<dim,spacedim>::MappingQGeneric (const unsigned int p)
+  :
+  polynomial_degree(p),
+  line_support_points(this->polynomial_degree+1),
+  fe_q(dim == 3 ? new FE_Q<dim>(this->polynomial_degree) : 0),
+  support_point_weights_on_quad (compute_support_point_weights_on_quad<dim>(this->polynomial_degree)),
+  support_point_weights_on_hex (compute_support_point_weights_on_hex<dim>(this->polynomial_degree))
+{
+  Assert (p >= 1, ExcMessage ("It only makes sense to create polynomial mappings "
+                              "with a polynomial degree greater or equal to one."));
+}
+
+
+
+template<int dim, int spacedim>
+MappingQGeneric<dim,spacedim>::MappingQGeneric (const MappingQGeneric<dim,spacedim> &mapping)
+  :
+  polynomial_degree(mapping.polynomial_degree),
+  line_support_points(mapping.line_support_points),
+  fe_q(dim == 3 ? new FE_Q<dim>(*mapping.fe_q) : 0),
+  support_point_weights_on_quad (mapping.support_point_weights_on_quad),
+  support_point_weights_on_hex (mapping.support_point_weights_on_hex)
+{}
+
+
+
+
+template<int dim, int spacedim>
+Mapping<dim,spacedim> *
+MappingQGeneric<dim,spacedim>::clone () const
+{
+  return new MappingQGeneric<dim,spacedim>(*this);
+}
+
+
+
+
+template<int dim, int spacedim>
+unsigned int
+MappingQGeneric<dim,spacedim>::get_degree() const
+{
+  return polynomial_degree;
+}
+
+
+
+template<int dim, int spacedim>
+Point<spacedim>
+MappingQGeneric<dim,spacedim>::
+transform_unit_to_real_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                             const Point<dim> &p) const
+{
+  // set up the polynomial space
+  const QGaussLobatto<1> line_support_points (polynomial_degree + 1);
+  const TensorProductPolynomials<dim>
+  tensor_pols (Polynomials::generate_complete_Lagrange_basis(line_support_points.get_points()));
+  Assert (tensor_pols.n() == Utilities::fixed_power<dim>(polynomial_degree+1),
+          ExcInternalError());
+
+  // then also construct the mapping from lexicographic to the Qp shape function numbering
+  const std::vector<unsigned int>
+  renumber (FETools::
+            lexicographic_to_hierarchic_numbering (
+              FiniteElementData<dim> (get_dpo_vector<dim>(polynomial_degree), 1,
+                                      polynomial_degree)));
+
+  const std::vector<Point<spacedim> > support_points
+    = this->compute_mapping_support_points(cell);
+
+  Point<spacedim> mapped_point;
+  for (unsigned int i=0; i<tensor_pols.n(); ++i)
+    mapped_point += support_points[renumber[i]] * tensor_pols.compute_value (i, p);
+
+  return mapped_point;
+}
+
+
+// In the code below, GCC tries to instantiate MappingQGeneric<3,4> when
+// seeing which of the overloaded versions of
+// do_transform_real_to_unit_cell_internal() to call. This leads to bad
+// error messages and, generally, nothing very good. Avoid this by ensuring
+// that this class exists, but does not have an inner InternalData
+// type, thereby ruling out the codim-1 version of the function
+// below when doing overload resolution.
+template <>
+class MappingQGeneric<3,4>
+{};
+
+namespace
+{
+  /**
+   * Using the relative weights of the shape functions evaluated at
+   * one point on the reference cell (and stored in data.shape_values
+   * and accessed via data.shape(0,i)) and the locations of mapping
+   * support points (stored in data.mapping_support_points), compute
+   * the mapped location of that point in real space.
+   */
+  template<int dim, int spacedim>
+  Point<spacedim>
+  compute_mapped_location_of_point (const typename MappingQGeneric<dim,spacedim>::InternalData &data)
+  {
+    AssertDimension (data.shape_values.size(),
+                     data.mapping_support_points.size());
+
+    // use now the InternalData to compute the point in real space.
+    Point<spacedim> p_real;
+    for (unsigned int i=0; i<data.mapping_support_points.size(); ++i)
+      p_real += data.mapping_support_points[i] * data.shape(0,i);
+
+    return p_real;
+  }
+
+
+  /**
+   * Implementation of transform_real_to_unit_cell for dim==spacedim
+   */
+  template <int dim>
+  Point<dim>
+  do_transform_real_to_unit_cell_internal
+  (const typename Triangulation<dim,dim>::cell_iterator &cell,
+   const Point<dim>                                     &p,
+   const Point<dim>                                     &initial_p_unit,
+   typename MappingQGeneric<dim,dim>::InternalData      &mdata)
+  {
+    const unsigned int spacedim = dim;
+
+    const unsigned int n_shapes=mdata.shape_values.size();
+    (void)n_shapes;
+    Assert(n_shapes!=0, ExcInternalError());
+    AssertDimension (mdata.shape_derivatives.size(), n_shapes);
+
+    std::vector<Point<spacedim> > &points=mdata.mapping_support_points;
+    AssertDimension (points.size(), n_shapes);
+
+
+    // Newton iteration to solve
+    //    f(x)=p(x)-p=0
+    // where we are looking for 'x' and p(x) is the forward transformation
+    // from unit to real cell. We solve this using a Newton iteration
+    //    x_{n+1}=x_n-[f'(x)]^{-1}f(x)
+    // The start value is set to be the linear approximation to the cell
+
+    // The shape values and derivatives of the mapping at this point are
+    // previously computed.
+
+    Point<dim> p_unit = initial_p_unit;
+
+    mdata.compute_shape_function_values(std::vector<Point<dim> > (1, p_unit));
+
+    Point<spacedim> p_real = compute_mapped_location_of_point<dim,spacedim>(mdata);
+    Tensor<1,spacedim> f = p_real-p;
+
+    // early out if we already have our point
+    if (f.norm_square() < 1e-24 * cell->diameter() * cell->diameter())
+      return p_unit;
+
+    // we need to compare the position of the computed p(x) against the given
+    // point 'p'. We will terminate the iteration and return 'x' if they are
+    // less than eps apart. The question is how to choose eps -- or, put maybe
+    // more generally: in which norm we want these 'p' and 'p(x)' to be eps
+    // apart.
+    //
+    // the question is difficult since we may have to deal with very elongated
+    // cells where we may achieve 1e-12*h for the distance of these two points
+    // in the 'long' direction, but achieving this tolerance in the 'short'
+    // direction of the cell may not be possible
+    //
+    // what we do instead is then to terminate iterations if
+    //    \| p(x) - p \|_A < eps
+    // where the A-norm is somehow induced by the transformation of the cell.
+    // in particular, we want to measure distances relative to the sizes of
+    // the cell in its principal directions.
+    //
+    // to define what exactly A should be, note that to first order we have
+    // the following (assuming that x* is the solution of the problem, i.e.,
+    // p(x*)=p):
+    //    p(x) - p = p(x) - p(x*)
+    //             = -grad p(x) * (x*-x) + higher order terms
+    // This suggest to measure with a norm that corresponds to
+    //    A = {[grad p(x]^T [grad p(x)]}^{-1}
+    // because then
+    //    \| p(x) - p \|_A  \approx  \| x - x* \|
+    // Consequently, we will try to enforce that
+    //    \| p(x) - p \|_A  =  \| f \|  <=  eps
+    //
+    // Note that using this norm is a bit dangerous since the norm changes
+    // in every iteration (A isn't fixed by depends on xk). However, if the
+    // cell is not too deformed (it may be stretched, but not twisted) then
+    // the mapping is almost linear and A is indeed constant or nearly so.
+    const double eps = 1.e-11;
+    const unsigned int newton_iteration_limit = 20;
+
+    unsigned int newton_iteration = 0;
+    double last_f_weighted_norm;
+    do
+      {
+#ifdef DEBUG_TRANSFORM_REAL_TO_UNIT_CELL
+        std::cout << "Newton iteration " << newton_iteration << std::endl;
+#endif
+
+        // f'(x)
+        Tensor<2,spacedim> df;
+        for (unsigned int k=0; k<mdata.n_shape_functions; ++k)
+          {
+            const Tensor<1,dim> &grad_transform=mdata.derivative(0,k);
+            const Point<spacedim> &point=points[k];
+
+            for (unsigned int i=0; i<spacedim; ++i)
+              for (unsigned int j=0; j<dim; ++j)
+                df[i][j]+=point[i]*grad_transform[j];
+          }
+
+        // Solve  [f'(x)]d=f(x)
+        Tensor<2,spacedim> df_inverse = invert(df);
+        const Tensor<1,spacedim> delta = df_inverse * static_cast<const Tensor<1,spacedim>&>(f);
+
+#ifdef DEBUG_TRANSFORM_REAL_TO_UNIT_CELL
+        std::cout << "   delta=" << delta  << std::endl;
+#endif
+
+        // do a line search
+        double step_length = 1;
+        do
+          {
+            // update of p_unit. The spacedim-th component of transformed point
+            // is simply ignored in codimension one case. When this component is
+            // not zero, then we are projecting the point to the surface or
+            // curve identified by the cell.
+            Point<dim> p_unit_trial = p_unit;
+            for (unsigned int i=0; i<dim; ++i)
+              p_unit_trial[i] -= step_length * delta[i];
+
+            // shape values and derivatives
+            // at new p_unit point
+            mdata.compute_shape_function_values(std::vector<Point<dim> > (1, p_unit_trial));
+
+            // f(x)
+            Point<spacedim> p_real_trial = compute_mapped_location_of_point<dim,spacedim>(mdata);
+            const Tensor<1,spacedim> f_trial = p_real_trial-p;
+
+#ifdef DEBUG_TRANSFORM_REAL_TO_UNIT_CELL
+            std::cout << "     step_length=" << step_length << std::endl
+                      << "       ||f ||   =" << f.norm() << std::endl
+                      << "       ||f*||   =" << f_trial.norm() << std::endl
+                      << "       ||f*||_A =" << (df_inverse * f_trial).norm() << std::endl;
+#endif
+
+            // see if we are making progress with the current step length
+            // and if not, reduce it by a factor of two and try again
+            //
+            // strictly speaking, we should probably use the same norm as we use
+            // for the outer algorithm. in practice, line search is just a
+            // crutch to find a "reasonable" step length, and so using the l2
+            // norm is probably just fine
+            if (f_trial.norm() < f.norm())
+              {
+                p_real = p_real_trial;
+                p_unit = p_unit_trial;
+                f = f_trial;
+                break;
+              }
+            else if (step_length > 0.05)
+              step_length /= 2;
+            else
+              AssertThrow (false,
+                           (typename Mapping<dim,spacedim>::ExcTransformationFailed()));
+          }
+        while (true);
+
+        ++newton_iteration;
+        if (newton_iteration > newton_iteration_limit)
+          AssertThrow (false,
+                       (typename Mapping<dim,spacedim>::ExcTransformationFailed()));
+        last_f_weighted_norm = (df_inverse * f).norm();
+      }
+    while (last_f_weighted_norm > eps);
+
+    return p_unit;
+  }
+
+
+
+  /**
+   * Implementation of transform_real_to_unit_cell for dim==spacedim-1
+   */
+  template <int dim>
+  Point<dim>
+  do_transform_real_to_unit_cell_internal_codim1
+  (const typename Triangulation<dim,dim+1>::cell_iterator &cell,
+   const Point<dim+1>                                       &p,
+   const Point<dim>                                         &initial_p_unit,
+   typename MappingQGeneric<dim,dim+1>::InternalData       &mdata)
+  {
+    const unsigned int spacedim = dim+1;
+
+    const unsigned int n_shapes=mdata.shape_values.size();
+    (void)n_shapes;
+    Assert(n_shapes!=0, ExcInternalError());
+    Assert(mdata.shape_derivatives.size()==n_shapes, ExcInternalError());
+    Assert(mdata.shape_second_derivatives.size()==n_shapes, ExcInternalError());
+
+    std::vector<Point<spacedim> > &points=mdata.mapping_support_points;
+    Assert(points.size()==n_shapes, ExcInternalError());
+
+    Point<spacedim> p_minus_F;
+
+    Tensor<1,spacedim>  DF[dim];
+    Tensor<1,spacedim>  D2F[dim][dim];
+
+    Point<dim> p_unit = initial_p_unit;
+    Point<dim> f;
+    Tensor<2,dim>  df;
+
+    // Evaluate first and second derivatives
+    mdata.compute_shape_function_values(std::vector<Point<dim> > (1, p_unit));
+
+    for (unsigned int k=0; k<mdata.n_shape_functions; ++k)
+      {
+        const Tensor<1,dim>   &grad_phi_k = mdata.derivative(0,k);
+        const Tensor<2,dim>   &hessian_k  = mdata.second_derivative(0,k);
+        const Point<spacedim> &point_k = points[k];
+
+        for (unsigned int j=0; j<dim; ++j)
+          {
+            DF[j] += grad_phi_k[j] * point_k;
+            for (unsigned int l=0; l<dim; ++l)
+              D2F[j][l] += hessian_k[j][l] * point_k;
+          }
+      }
+
+    p_minus_F = p;
+    p_minus_F -= compute_mapped_location_of_point<dim,spacedim>(mdata);
+
+
+    for (unsigned int j=0; j<dim; ++j)
+      f[j] = DF[j] * p_minus_F;
+
+    for (unsigned int j=0; j<dim; ++j)
+      {
+        f[j] = DF[j] * p_minus_F;
+        for (unsigned int l=0; l<dim; ++l)
+          df[j][l] = -DF[j]*DF[l] + D2F[j][l] * p_minus_F;
+      }
+
+
+    const double eps = 1.e-12*cell->diameter();
+    const unsigned int loop_limit = 10;
+
+    unsigned int loop=0;
+
+    while (f.norm()>eps && loop++<loop_limit)
+      {
+        // Solve  [df(x)]d=f(x)
+        const Tensor<1,dim> d = invert(df) * static_cast<const Tensor<1,dim>&>(f);
+        p_unit -= d;
+
+        for (unsigned int j=0; j<dim; ++j)
+          {
+            DF[j].clear();
+            for (unsigned int l=0; l<dim; ++l)
+              D2F[j][l].clear();
+          }
+
+        mdata.compute_shape_function_values(std::vector<Point<dim> > (1, p_unit));
+
+        for (unsigned int k=0; k<mdata.n_shape_functions; ++k)
+          {
+            const Tensor<1,dim>   &grad_phi_k = mdata.derivative(0,k);
+            const Tensor<2,dim>   &hessian_k  = mdata.second_derivative(0,k);
+            const Point<spacedim> &point_k = points[k];
+
+            for (unsigned int j=0; j<dim; ++j)
+              {
+                DF[j] += grad_phi_k[j] * point_k;
+                for (unsigned int l=0; l<dim; ++l)
+                  D2F[j][l] += hessian_k[j][l] * point_k;
+              }
+          }
+
+        //TODO: implement a line search here in much the same way as for
+        // the corresponding function above that does so for dim==spacedim
+        p_minus_F = p;
+        p_minus_F -= compute_mapped_location_of_point<dim,spacedim>(mdata);
+
+        for (unsigned int j=0; j<dim; ++j)
+          {
+            f[j] = DF[j] * p_minus_F;
+            for (unsigned int l=0; l<dim; ++l)
+              df[j][l] = -DF[j]*DF[l] + D2F[j][l] * p_minus_F;
+          }
+
+      }
+
+
+    // Here we check that in the last execution of while the first
+    // condition was already wrong, meaning the residual was below
+    // eps. Only if the first condition failed, loop will have been
+    // increased and tested, and thus have reached the limit.
+    AssertThrow (loop<loop_limit, (typename Mapping<dim,spacedim>::ExcTransformationFailed()));
+
+    return p_unit;
+  }
+
+
+}
+
+
+
+// visual studio freaks out when trying to determine if
+// do_transform_real_to_unit_cell_internal with dim=3 and spacedim=4 is a good
+// candidate. So instead of letting the compiler pick the correct overload, we
+// use template specialization to make sure we pick up the right function to
+// call:
+
+template<int dim, int spacedim>
+Point<dim>
+MappingQGeneric<dim,spacedim>::
+transform_real_to_unit_cell_internal
+(const typename Triangulation<dim,spacedim>::cell_iterator &,
+ const Point<spacedim> &,
+ const Point<dim> &) const
+{
+  // default implementation (should never be called)
+  Assert(false, ExcInternalError());
+  return Point<dim>();
+}
+
+template<>
+Point<1>
+MappingQGeneric<1,1>::
+transform_real_to_unit_cell_internal
+(const Triangulation<1,1>::cell_iterator &cell,
+ const Point<1>                            &p,
+ const Point<1>                                 &initial_p_unit) const
+{
+  const int dim = 1;
+  const int spacedim = 1;
+
+  const Quadrature<dim> point_quadrature(initial_p_unit);
+
+  UpdateFlags update_flags = update_quadrature_points | update_jacobians;
+  if (spacedim>dim)
+    update_flags |= update_jacobian_grads;
+  std_cxx11::unique_ptr<InternalData> mdata (get_data(update_flags,
+                                                      point_quadrature));
+
+  mdata->mapping_support_points = this->compute_mapping_support_points (cell);
+
+  // dispatch to the various specializations for spacedim=dim,
+  // spacedim=dim+1, etc
+  return do_transform_real_to_unit_cell_internal<1>(cell, p, initial_p_unit, *mdata);
+}
+
+template<>
+Point<2>
+MappingQGeneric<2, 2>::
+transform_real_to_unit_cell_internal
+(const Triangulation<2, 2>::cell_iterator &cell,
+ const Point<2>                            &p,
+ const Point<2>                                 &initial_p_unit) const
+{
+  const int dim = 2;
+  const int spacedim = 2;
+
+  const Quadrature<dim> point_quadrature(initial_p_unit);
+
+  UpdateFlags update_flags = update_quadrature_points | update_jacobians;
+  if (spacedim>dim)
+    update_flags |= update_jacobian_grads;
+  std_cxx11::unique_ptr<InternalData> mdata (get_data(update_flags,
+                                                      point_quadrature));
+
+  mdata->mapping_support_points = this->compute_mapping_support_points (cell);
+
+  // dispatch to the various specializations for spacedim=dim,
+  // spacedim=dim+1, etc
+  return do_transform_real_to_unit_cell_internal<2>(cell, p, initial_p_unit, *mdata);
+}
+
+template<>
+Point<3>
+MappingQGeneric<3, 3>::
+transform_real_to_unit_cell_internal
+(const Triangulation<3, 3>::cell_iterator &cell,
+ const Point<3>                            &p,
+ const Point<3>                                 &initial_p_unit) const
+{
+  const int dim = 3;
+  const int spacedim = 3;
+
+  const Quadrature<dim> point_quadrature(initial_p_unit);
+
+  UpdateFlags update_flags = update_quadrature_points | update_jacobians;
+  if (spacedim>dim)
+    update_flags |= update_jacobian_grads;
+  std_cxx11::unique_ptr<InternalData> mdata (get_data(update_flags,
+                                                      point_quadrature));
+
+  mdata->mapping_support_points = this->compute_mapping_support_points (cell);
+
+  // dispatch to the various specializations for spacedim=dim,
+  // spacedim=dim+1, etc
+  return do_transform_real_to_unit_cell_internal<3>(cell, p, initial_p_unit, *mdata);
+}
+
+template<>
+Point<1>
+MappingQGeneric<1, 2>::
+transform_real_to_unit_cell_internal
+(const Triangulation<1, 2>::cell_iterator &cell,
+ const Point<2>                            &p,
+ const Point<1>                                 &initial_p_unit) const
+{
+  const int dim = 1;
+  const int spacedim = 2;
+
+  const Quadrature<dim> point_quadrature(initial_p_unit);
+
+  UpdateFlags update_flags = update_quadrature_points | update_jacobians;
+  if (spacedim>dim)
+    update_flags |= update_jacobian_grads;
+  std_cxx11::unique_ptr<InternalData> mdata (get_data(update_flags,
+                                                      point_quadrature));
+
+  mdata->mapping_support_points = this->compute_mapping_support_points (cell);
+
+  // dispatch to the various specializations for spacedim=dim,
+  // spacedim=dim+1, etc
+  return do_transform_real_to_unit_cell_internal_codim1<1>(cell, p, initial_p_unit, *mdata);
+}
+
+template<>
+Point<2>
+MappingQGeneric<2, 3>::
+transform_real_to_unit_cell_internal
+(const Triangulation<2, 3>::cell_iterator &cell,
+ const Point<3>                            &p,
+ const Point<2>                                 &initial_p_unit) const
+{
+  const int dim = 2;
+  const int spacedim = 3;
+
+  const Quadrature<dim> point_quadrature(initial_p_unit);
+
+  UpdateFlags update_flags = update_quadrature_points | update_jacobians;
+  if (spacedim>dim)
+    update_flags |= update_jacobian_grads;
+  std_cxx11::unique_ptr<InternalData> mdata (get_data(update_flags,
+                                                      point_quadrature));
+
+  mdata->mapping_support_points = this->compute_mapping_support_points (cell);
+
+  // dispatch to the various specializations for spacedim=dim,
+  // spacedim=dim+1, etc
+  return do_transform_real_to_unit_cell_internal_codim1<2>(cell, p, initial_p_unit, *mdata);
+}
+
+template<>
+Point<1>
+MappingQGeneric<1, 3>::
+transform_real_to_unit_cell_internal
+(const Triangulation<1, 3>::cell_iterator &,
+ const Point<3> &,
+ const Point<1> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return Point<1>();
+}
+
+
+
+template<int dim, int spacedim>
+Point<dim>
+MappingQGeneric<dim,spacedim>::
+transform_real_to_unit_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                             const Point<spacedim>                            &p) const
+{
+  // Use an exact formula if one is available. this is only the case
+  // for Q1 mappings in 1d, and in 2d if dim==spacedim
+  if ((polynomial_degree == 1) &&
+      ((dim == 1)
+       ||
+       ((dim == 2) && (dim == spacedim))))
+    {
+      // The dimension-dependent algorithms are much faster (about 25-45x in
+      // 2D) but fail most of the time when the given point (p) is not in the
+      // cell. The dimension-independent Newton algorithm given below is
+      // slower, but more robust (though it still sometimes fails). Therefore
+      // this function implements the following strategy based on the
+      // p's dimension:
+      //
+      // * In 1D this mapping is linear, so the mapping is always invertible
+      //   (and the exact formula is known) as long as the cell has non-zero
+      //   length.
+      // * In 2D the exact (quadratic) formula is called first. If either the
+      //   exact formula does not succeed (negative discriminant in the
+      //   quadratic formula) or succeeds but finds a solution outside of the
+      //   unit cell, then the Newton solver is called. The rationale for the
+      //   second choice is that the exact formula may provide two different
+      //   answers when mapping a point outside of the real cell, but the
+      //   Newton solver (if it converges) will only return one answer.
+      //   Otherwise the exact formula successfully found a point in the unit
+      //   cell and that value is returned.
+      // * In 3D there is no (known to the authors) exact formula, so the Newton
+      //   algorithm is used.
+      const std_cxx11::array<Point<spacedim>, GeometryInfo<dim>::vertices_per_cell>
+      vertices = this->get_vertices(cell);
+      try
+        {
+          switch (dim)
+            {
+            case 1:
+            {
+              // formula not subject to any issues in 1d
+              if (spacedim == 1)
+                return internal::MappingQ1::transform_real_to_unit_cell(vertices, p);
+              else
+                {
+                  const std::vector<Point<spacedim> > a (vertices.begin(),
+                                                         vertices.end());
+                  return internal::MappingQ1::transform_real_to_unit_cell_initial_guess<dim,spacedim>(a,p);
+                }
+            }
+
+            case 2:
+            {
+              const Point<dim> point
+                = internal::MappingQ1::transform_real_to_unit_cell(vertices, p);
+
+              // formula not guaranteed to work for points outside of
+              // the cell. only take the computed point if it lies
+              // inside the reference cell
+              const double eps = 1e-15;
+              if (-eps <= point(1) && point(1) <= 1 + eps &&
+                  -eps <= point(0) && point(0) <= 1 + eps)
+                {
+                  return point;
+                }
+              else
+                break;
+            }
+
+            default:
+            {
+              // we should get here, based on the if-condition at the top
+              Assert(false, ExcInternalError());
+            }
+            }
+        }
+      catch (const typename Mapping<spacedim,spacedim>::ExcTransformationFailed &)
+        {
+          // simply fall through and continue on to the standard Newton code
+        }
+    }
+  else
+    {
+      // we can't use an explicit formula,
+    }
+
+
+  Point<dim> initial_p_unit;
+  if (polynomial_degree == 1)
+    {
+      // Find the initial value for the Newton iteration by a normal
+      // projection to the least square plane determined by the vertices
+      // of the cell
+      const std::vector<Point<spacedim> > a
+        = this->compute_mapping_support_points (cell);
+      Assert(a.size() == GeometryInfo<dim>::vertices_per_cell,
+             ExcInternalError());
+      initial_p_unit = internal::MappingQ1::transform_real_to_unit_cell_initial_guess<dim,spacedim>(a,p);
+    }
+  else
+    {
+      try
+        {
+          // Find the initial value for the Newton iteration by a normal
+          // projection to the least square plane determined by the vertices
+          // of the cell
+          //
+          // we do this by first getting all support points, then
+          // throwing away all but the vertices, and finally calling
+          // the same function as above
+          std::vector<Point<spacedim> > a
+            = this->compute_mapping_support_points (cell);
+          a.resize(GeometryInfo<dim>::vertices_per_cell);
+          initial_p_unit = internal::MappingQ1::transform_real_to_unit_cell_initial_guess<dim,spacedim>(a,p);
+        }
+      catch (const typename Mapping<dim,spacedim>::ExcTransformationFailed &)
+        {
+          for (unsigned int d=0; d<dim; ++d)
+            initial_p_unit[d] = 0.5;
+        }
+
+      // in case the function above should have given us something
+      // back that lies outside the unit cell (that might happen
+      // because we may have given a point 'p' that lies inside the
+      // cell with the higher order mapping, but outside the Q1-mapped
+      // reference cell), then project it back into the reference cell
+      // in hopes that this gives a better starting point to the
+      // following iteration
+      initial_p_unit = GeometryInfo<dim>::project_to_unit_cell(initial_p_unit);
+    }
+
+  // perform the Newton iteration and return the result. note that
+  // this statement may throw an exception, which we simply pass up to
+  // the caller
+  return this->transform_real_to_unit_cell_internal(cell, p, initial_p_unit);
+}
+
+
+
+template<int dim, int spacedim>
+UpdateFlags
+MappingQGeneric<dim,spacedim>::requires_update_flags (const UpdateFlags in) const
+{
+  // add flags if the respective quantities are necessary to compute
+  // what we need. note that some flags appear in both the conditions
+  // and in subsequent set operations. this leads to some circular
+  // logic. the only way to treat this is to iterate. since there are
+  // 5 if-clauses in the loop, it will take at most 5 iterations to
+  // converge. do them:
+  UpdateFlags out = in;
+  for (unsigned int i=0; i<5; ++i)
+    {
+      // The following is a little incorrect:
+      // If not applied on a face,
+      // update_boundary_forms does not
+      // make sense. On the other hand,
+      // it is necessary on a
+      // face. Currently,
+      // update_boundary_forms is simply
+      // ignored for the interior of a
+      // cell.
+      if (out & (update_JxW_values
+                 | update_normal_vectors))
+        out |= update_boundary_forms;
+
+      if (out & (update_covariant_transformation
+                 | update_JxW_values
+                 | update_jacobians
+                 | update_jacobian_grads
+                 | update_boundary_forms
+                 | update_normal_vectors))
+        out |= update_contravariant_transformation;
+
+      if (out & (update_inverse_jacobians
+                 | update_jacobian_pushed_forward_grads
+                 | update_jacobian_pushed_forward_2nd_derivatives
+                 | update_jacobian_pushed_forward_3rd_derivatives) )
+        out |= update_covariant_transformation;
+
+      // The contravariant transformation
+      // used in the Piola transformation, which
+      // requires the determinant of the
+      // Jacobi matrix of the transformation.
+      // Because we have no way of knowing here whether the finite
+      // elements wants to use the contravariant of the Piola
+      // transforms, we add the JxW values to the list of flags to be
+      // updated for each cell.
+      if (out & update_contravariant_transformation)
+        out |= update_JxW_values;
+
+      if (out & update_normal_vectors)
+        out |= update_JxW_values;
+    }
+
+  return out;
+}
+
+
+
+template<int dim, int spacedim>
+typename MappingQGeneric<dim,spacedim>::InternalData *
+MappingQGeneric<dim,spacedim>::get_data (const UpdateFlags update_flags,
+                                         const Quadrature<dim> &q) const
+{
+  InternalData *data = new InternalData(polynomial_degree);
+  data->initialize (this->requires_update_flags(update_flags), q, q.size());
+
+  return data;
+}
+
+
+
+template<int dim, int spacedim>
+typename MappingQGeneric<dim,spacedim>::InternalData *
+MappingQGeneric<dim,spacedim>::get_face_data (const UpdateFlags        update_flags,
+                                              const Quadrature<dim-1> &quadrature) const
+{
+  InternalData *data = new InternalData(polynomial_degree);
+  data->initialize_face (this->requires_update_flags(update_flags),
+                         QProjector<dim>::project_to_all_faces(quadrature),
+                         quadrature.size());
+
+  return data;
+}
+
+
+
+template<int dim, int spacedim>
+typename MappingQGeneric<dim,spacedim>::InternalData *
+MappingQGeneric<dim,spacedim>::get_subface_data (const UpdateFlags update_flags,
+                                                 const Quadrature<dim-1>& quadrature) const
+{
+  InternalData *data = new InternalData(polynomial_degree);
+  data->initialize_face (this->requires_update_flags(update_flags),
+                         QProjector<dim>::project_to_all_subfaces(quadrature),
+                         quadrature.size());
+
+  return data;
+}
+
+
+
+namespace internal
+{
+  namespace
+  {
+    /**
+     * Compute the locations of quadrature points on the object described by
+     * the first argument (and the cell for which the mapping support points
+     * have already been set), but only if the update_flags of the @p data
+     * argument indicate so.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_compute_q_points (const typename QProjector<dim>::DataSetDescriptor                 data_set,
+                            const typename dealii::MappingQGeneric<dim,spacedim>::InternalData      &data,
+                            std::vector<Point<spacedim> >                                     &quadrature_points)
+    {
+      const UpdateFlags update_flags = data.update_each;
+
+      if (update_flags & update_quadrature_points)
+        {
+          for (unsigned int point=0; point<quadrature_points.size(); ++point)
+            {
+              const double *shape = &data.shape(point+data_set,0);
+              Point<spacedim> result = (shape[0] *
+                                        data.mapping_support_points[0]);
+              for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                for (unsigned int i=0; i<spacedim; ++i)
+                  result[i] += shape[k] * data.mapping_support_points[k][i];
+              quadrature_points[point] = result;
+            }
+        }
+    }
+
+
+    /**
+     * Update the co- and contravariant matrices as well as their determinant, for the cell
+     * described stored in the data object, but only if the update_flags of the @p data
+     * argument indicate so.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_Jacobians (const CellSimilarity::Similarity                                   cell_similarity,
+                            const typename dealii::QProjector<dim>::DataSetDescriptor          data_set,
+                            const typename dealii::MappingQGeneric<dim,spacedim>::InternalData      &data)
+    {
+      const UpdateFlags update_flags = data.update_each;
+
+      if (update_flags & update_contravariant_transformation)
+        // if the current cell is just a
+        // translation of the previous one, no
+        // need to recompute jacobians...
+        if (cell_similarity != CellSimilarity::translation)
+          {
+            const unsigned int n_q_points = data.contravariant.size();
+
+            std::fill(data.contravariant.begin(), data.contravariant.end(),
+                      DerivativeForm<1,dim,spacedim>());
+
+            Assert (data.n_shape_functions > 0, ExcInternalError());
+            const Tensor<1,spacedim> *supp_pts =
+              &data.mapping_support_points[0];
+
+            for (unsigned int point=0; point<n_q_points; ++point)
+              {
+                const Tensor<1,dim> *data_derv =
+                  &data.derivative(point+data_set, 0);
+
+                double result [spacedim][dim];
+
+                // peel away part of sum to avoid zeroing the
+                // entries and adding for the first time
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<dim; ++j)
+                    result[i][j] = data_derv[0][j] * supp_pts[0][i];
+                for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      result[i][j] += data_derv[k][j] * supp_pts[k][i];
+
+                // write result into contravariant data. for
+                // j=dim in the case dim<spacedim, there will
+                // never be any nonzero data that arrives in
+                // here, so it is ok anyway because it was
+                // initialized to zero at the initialization
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<dim; ++j)
+                    data.contravariant[point][i][j] = result[i][j];
+              }
+          }
+
+      if (update_flags & update_covariant_transformation)
+        if (cell_similarity != CellSimilarity::translation)
+          {
+            const unsigned int n_q_points = data.contravariant.size();
+            for (unsigned int point=0; point<n_q_points; ++point)
+              {
+                data.covariant[point] = (data.contravariant[point]).covariant_form();
+              }
+          }
+
+      if (update_flags & update_volume_elements)
+        if (cell_similarity != CellSimilarity::translation)
+          {
+            const unsigned int n_q_points = data.contravariant.size();
+            for (unsigned int point=0; point<n_q_points; ++point)
+              data.volume_elements[point] = data.contravariant[point].determinant();
+          }
+
+    }
+
+    /**
+     * Update the Hessian of the transformation from unit to real cell, the
+     * Jacobian gradients.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_grads (const CellSimilarity::Similarity                                   cell_similarity,
+                                 const typename QProjector<dim>::DataSetDescriptor                  data_set,
+                                 const typename dealii::MappingQGeneric<dim,spacedim>::InternalData      &data,
+                                 std::vector<DerivativeForm<2,dim,spacedim> >                      &jacobian_grads)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_grads)
+        {
+          const unsigned int n_q_points = jacobian_grads.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<2,dim> *second =
+                    &data.second_derivative(point+data_set, 0);
+                  double result [spacedim][dim][dim];
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        result[i][j][l] = (second[0][j][l] *
+                                           data.mapping_support_points[0][i]);
+                  for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          result[i][j][l]
+                          += (second[k][j][l]
+                              *
+                              data.mapping_support_points[k][i]);
+
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        jacobian_grads[point][i][j][l] = result[i][j][l];
+                }
+            }
+        }
+    }
+
+    /**
+     * Update the Hessian of the transformation from unit to real cell, the
+     * Jacobian gradients, pushed forward to the real cell coordinates.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_pushed_forward_grads (const CellSimilarity::Similarity                                   cell_similarity,
+                                                const typename QProjector<dim>::DataSetDescriptor                  data_set,
+                                                const typename dealii::MappingQGeneric<dim,spacedim>::InternalData      &data,
+                                                std::vector<Tensor<3,spacedim> >                      &jacobian_pushed_forward_grads)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_pushed_forward_grads)
+        {
+          const unsigned int n_q_points = jacobian_pushed_forward_grads.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              double tmp[spacedim][spacedim][spacedim];
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<2,dim> *second =
+                    &data.second_derivative(point+data_set, 0);
+                  double result [spacedim][dim][dim];
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        result[i][j][l] = (second[0][j][l] *
+                                           data.mapping_support_points[0][i]);
+                  for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          result[i][j][l]
+                          += (second[k][j][l]
+                              *
+                              data.mapping_support_points[k][i]);
+
+                  // first push forward the j-components
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        {
+                          tmp[i][j][l] = result[i][0][l] *
+                                         data.covariant[point][j][0];
+                          for (unsigned int jr=1; jr<dim; ++jr)
+                            {
+                              tmp[i][j][l] += result[i][jr][l] *
+                                              data.covariant[point][j][jr];
+                            }
+                        }
+
+                  // now, pushing forward the l-components
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        {
+                          jacobian_pushed_forward_grads[point][i][j][l] = tmp[i][j][0] *
+                                                                          data.covariant[point][l][0];
+                          for (unsigned int lr=1; lr<dim; ++lr)
+                            {
+                              jacobian_pushed_forward_grads[point][i][j][l] += tmp[i][j][lr] *
+                                                                               data.covariant[point][l][lr];
+                            }
+
+                        }
+                }
+            }
+        }
+    }
+
+    /**
+     * Update the third derivatives of the transformation from unit to real cell, the
+     * Jacobian hessians.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_2nd_derivatives (const CellSimilarity::Similarity                              cell_similarity,
+                                           const typename QProjector<dim>::DataSetDescriptor             data_set,
+                                           const typename dealii::MappingQGeneric<dim,spacedim>::InternalData &data,
+                                           std::vector<DerivativeForm<3,dim,spacedim> >                 &jacobian_2nd_derivatives)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_2nd_derivatives)
+        {
+          const unsigned int n_q_points = jacobian_2nd_derivatives.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<3,dim> *third =
+                    &data.third_derivative(point+data_set, 0);
+                  double result [spacedim][dim][dim][dim];
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          result[i][j][l][m] = (third[0][j][l][m] *
+                                                data.mapping_support_points[0][i]);
+                  for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          for (unsigned int m=0; m<dim; ++m)
+                            result[i][j][l][m]
+                            += (third[k][j][l][m]
+                                *
+                                data.mapping_support_points[k][i]);
+
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          jacobian_2nd_derivatives[point][i][j][l][m] = result[i][j][l][m];
+                }
+            }
+        }
+    }
+
+    /**
+     * Update the Hessian of the Hessian of the transformation from unit
+     * to real cell, the Jacobian Hessian gradients, pushed forward to the
+     * real cell coordinates.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_pushed_forward_2nd_derivatives (const CellSimilarity::Similarity                                   cell_similarity,
+                                                          const typename QProjector<dim>::DataSetDescriptor                  data_set,
+                                                          const typename dealii::MappingQGeneric<dim,spacedim>::InternalData      &data,
+                                                          std::vector<Tensor<4,spacedim> >                      &jacobian_pushed_forward_2nd_derivatives)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_pushed_forward_2nd_derivatives)
+        {
+          const unsigned int n_q_points = jacobian_pushed_forward_2nd_derivatives.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              double tmp[spacedim][spacedim][spacedim][spacedim];
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<3,dim> *third =
+                    &data.third_derivative(point+data_set, 0);
+                  double result [spacedim][dim][dim][dim];
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          result[i][j][l][m] = (third[0][j][l][m] *
+                                                data.mapping_support_points[0][i]);
+                  for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          for (unsigned int m=0; m<dim; ++m)
+                            result[i][j][l][m]
+                            += (third[k][j][l][m]
+                                *
+                                data.mapping_support_points[k][i]);
+
+                  // push forward the j-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          {
+                            jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              = result[i][0][l][m]*
+                                data.covariant[point][j][0];
+                            for (unsigned int jr=1; jr<dim; ++jr)
+                              jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              += result[i][jr][l][m]*
+                                 data.covariant[point][j][jr];
+                          }
+
+                  // push forward the l-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          {
+                            tmp[i][j][l][m]
+                              = jacobian_pushed_forward_2nd_derivatives[point][i][j][0][m]*
+                                data.covariant[point][l][0];
+                            for (unsigned int lr=1; lr<dim; ++lr)
+                              tmp[i][j][l][m]
+                              += jacobian_pushed_forward_2nd_derivatives[point][i][j][lr][m]*
+                                 data.covariant[point][l][lr];
+                          }
+
+                  // push forward the m-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        for (unsigned int m=0; m<spacedim; ++m)
+                          {
+                            jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              = tmp[i][j][l][0]*
+                                data.covariant[point][m][0];
+                            for (unsigned int mr=1; mr<dim; ++mr)
+                              jacobian_pushed_forward_2nd_derivatives[point][i][j][l][m]
+                              += tmp[i][j][l][mr]*
+                                 data.covariant[point][m][mr];
+                          }
+                }
+            }
+        }
+    }
+
+    /**
+         * Update the fourth derivatives of the transformation from unit to real cell, the
+         * Jacobian hessian gradients.
+         *
+         * Skip the computation if possible as indicated by the first argument.
+         */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_3rd_derivatives (const CellSimilarity::Similarity                              cell_similarity,
+                                           const typename QProjector<dim>::DataSetDescriptor             data_set,
+                                           const typename dealii::MappingQGeneric<dim,spacedim>::InternalData &data,
+                                           std::vector<DerivativeForm<4,dim,spacedim> >                 &jacobian_3rd_derivatives)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_3rd_derivatives)
+        {
+          const unsigned int n_q_points = jacobian_3rd_derivatives.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<4,dim> *fourth =
+                    &data.fourth_derivative(point+data_set, 0);
+                  double result [spacedim][dim][dim][dim][dim];
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          for (unsigned int n=0; n<dim; ++n)
+                            result[i][j][l][m][n] = (fourth[0][j][l][m][n] *
+                                                     data.mapping_support_points[0][i]);
+                  for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          for (unsigned int m=0; m<dim; ++m)
+                            for (unsigned int n=0; n<dim; ++n)
+                              result[i][j][l][m][n]
+                              += (fourth[k][j][l][m][n]
+                                  *
+                                  data.mapping_support_points[k][i]);
+
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          for (unsigned int n=0; n<dim; ++n)
+                            jacobian_3rd_derivatives[point][i][j][l][m][n] = result[i][j][l][m][n];
+                }
+            }
+        }
+    }
+
+    /**
+     * Update the Hessian gradient of the transformation from unit to real cell, the
+     * Jacobian Hessians, pushed forward to the real cell coordinates.
+     *
+     * Skip the computation if possible as indicated by the first argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_update_jacobian_pushed_forward_3rd_derivatives (const CellSimilarity::Similarity                                   cell_similarity,
+                                                          const typename QProjector<dim>::DataSetDescriptor                  data_set,
+                                                          const typename dealii::MappingQGeneric<dim,spacedim>::InternalData      &data,
+                                                          std::vector<Tensor<5,spacedim> >                      &jacobian_pushed_forward_3rd_derivatives)
+    {
+      const UpdateFlags update_flags = data.update_each;
+      if (update_flags & update_jacobian_pushed_forward_3rd_derivatives)
+        {
+          const unsigned int n_q_points = jacobian_pushed_forward_3rd_derivatives.size();
+
+          if (cell_similarity != CellSimilarity::translation)
+            {
+              double tmp[spacedim][spacedim][spacedim][spacedim][spacedim];
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  const Tensor<4,dim> *fourth =
+                    &data.fourth_derivative(point+data_set, 0);
+                  double result [spacedim][dim][dim][dim][dim];
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<dim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          for (unsigned int n=0; n<dim; ++n)
+                            result[i][j][l][m][n] = (fourth[0][j][l][m][n] *
+                                                     data.mapping_support_points[0][i]);
+                  for (unsigned int k=1; k<data.n_shape_functions; ++k)
+                    for (unsigned int i=0; i<spacedim; ++i)
+                      for (unsigned int j=0; j<dim; ++j)
+                        for (unsigned int l=0; l<dim; ++l)
+                          for (unsigned int m=0; m<dim; ++m)
+                            for (unsigned int n=0; n<dim; ++n)
+                              result[i][j][l][m][n]
+                              += (fourth[k][j][l][m][n]
+                                  *
+                                  data.mapping_support_points[k][i]);
+
+                  // push-forward the j-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<dim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          for (unsigned int n=0; n<dim; ++n)
+                            {
+                              tmp[i][j][l][m][n] = result[i][0][l][m][n] *
+                                                   data.covariant[point][j][0];
+                              for (unsigned int jr=1; jr<dim; ++jr)
+                                tmp[i][j][l][m][n] += result[i][jr][l][m][n] *
+                                                      data.covariant[point][j][jr];
+                            }
+
+                  // push-forward the l-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        for (unsigned int m=0; m<dim; ++m)
+                          for (unsigned int n=0; n<dim; ++n)
+                            {
+                              jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                                = tmp[i][j][0][m][n] *
+                                  data.covariant[point][l][0];
+                              for (unsigned int lr=1; lr<dim; ++lr)
+                                jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                                += tmp[i][j][lr][m][n] *
+                                   data.covariant[point][l][lr];
+                            }
+
+                  // push-forward the m-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        for (unsigned int m=0; m<spacedim; ++m)
+                          for (unsigned int n=0; n<dim; ++n)
+                            {
+                              tmp[i][j][l][m][n]
+                                = jacobian_pushed_forward_3rd_derivatives[point][i][j][l][0][n] *
+                                  data.covariant[point][m][0];
+                              for (unsigned int mr=1; mr<dim; ++mr)
+                                tmp[i][j][l][m][n]
+                                += jacobian_pushed_forward_3rd_derivatives[point][i][j][l][mr][n] *
+                                   data.covariant[point][m][mr];
+                            }
+
+                  // push-forward the n-coordinate
+                  for (unsigned int i=0; i<spacedim; ++i)
+                    for (unsigned int j=0; j<spacedim; ++j)
+                      for (unsigned int l=0; l<spacedim; ++l)
+                        for (unsigned int m=0; m<spacedim; ++m)
+                          for (unsigned int n=0; n<spacedim; ++n)
+                            {
+                              jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                                = tmp[i][j][l][m][0] *
+                                  data.covariant[point][n][0];
+                              for (unsigned int nr=1; nr<dim; ++nr)
+                                jacobian_pushed_forward_3rd_derivatives[point][i][j][l][m][n]
+                                += tmp[i][j][l][m][nr] *
+                                   data.covariant[point][n][nr];
+                            }
+                }
+            }
+        }
+    }
+  }
+}
+
+
+
+
+template<int dim, int spacedim>
+CellSimilarity::Similarity
+MappingQGeneric<dim,spacedim>::
+fill_fe_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                const CellSimilarity::Similarity                           cell_similarity,
+                const Quadrature<dim>                                     &quadrature,
+                const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // ensure that the following static_cast is really correct:
+  Assert (dynamic_cast<const InternalData *>(&internal_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &>(internal_data);
+
+  const unsigned int n_q_points=quadrature.size();
+
+  // if necessary, recompute the support points of the transformation of this cell
+  // (note that we need to first check the triangulation pointer, since otherwise
+  // the second test might trigger an exception if the triangulations are not the
+  // same)
+  if ((data.mapping_support_points.size() == 0)
+      ||
+      (&cell->get_triangulation() !=
+       &data.cell_of_current_support_points->get_triangulation())
+      ||
+      (cell != data.cell_of_current_support_points))
+    {
+      data.mapping_support_points = this->compute_mapping_support_points(cell);
+      data.cell_of_current_support_points = cell;
+    }
+
+  internal::maybe_compute_q_points<dim,spacedim> (QProjector<dim>::DataSetDescriptor::cell (),
+                                                  data,
+                                                  output_data.quadrature_points);
+  internal::maybe_update_Jacobians<dim,spacedim> (cell_similarity,
+                                                  QProjector<dim>::DataSetDescriptor::cell (),
+                                                  data);
+
+  const UpdateFlags update_flags = data.update_each;
+  const std::vector<double> &weights=quadrature.get_weights();
+
+  // Multiply quadrature weights by absolute value of Jacobian determinants or
+  // the area element g=sqrt(DX^t DX) in case of codim > 0
+
+  if (update_flags & (update_normal_vectors
+                      | update_JxW_values))
+    {
+      AssertDimension (output_data.JxW_values.size(), n_q_points);
+
+      Assert( !(update_flags & update_normal_vectors ) ||
+              (output_data.normal_vectors.size() == n_q_points),
+              ExcDimensionMismatch(output_data.normal_vectors.size(), n_q_points));
+
+
+      if (cell_similarity != CellSimilarity::translation)
+        for (unsigned int point=0; point<n_q_points; ++point)
+          {
+
+            if (dim == spacedim)
+              {
+                const double det = data.contravariant[point].determinant();
+
+                // check for distorted cells.
+
+                // TODO: this allows for anisotropies of up to 1e6 in 3D and
+                // 1e12 in 2D. might want to find a finer
+                // (dimension-independent) criterion
+                Assert (det > 1e-12*Utilities::fixed_power<dim>(cell->diameter()/
+                                                                std::sqrt(double(dim))),
+                        (typename Mapping<dim,spacedim>::ExcDistortedMappedCell(cell->center(), det, point)));
+
+                output_data.JxW_values[point] = weights[point] * det;
+              }
+            // if dim==spacedim, then there is no cell normal to
+            // compute. since this is for FEValues (and not FEFaceValues),
+            // there are also no face normals to compute
+            else //codim>0 case
+              {
+                Tensor<1, spacedim> DX_t [dim];
+                for (unsigned int i=0; i<spacedim; ++i)
+                  for (unsigned int j=0; j<dim; ++j)
+                    DX_t[j][i] = data.contravariant[point][i][j];
+
+                Tensor<2, dim> G; //First fundamental form
+                for (unsigned int i=0; i<dim; ++i)
+                  for (unsigned int j=0; j<dim; ++j)
+                    G[i][j] = DX_t[i] * DX_t[j];
+
+                output_data.JxW_values[point]
+                  = sqrt(determinant(G)) * weights[point];
+
+                if (cell_similarity == CellSimilarity::inverted_translation)
+                  {
+                    // we only need to flip the normal
+                    if (update_flags & update_normal_vectors)
+                      output_data.normal_vectors[point] *= -1.;
+                  }
+                else
+                  {
+                    const unsigned int codim = spacedim-dim;
+                    (void)codim;
+
+                    if (update_flags & update_normal_vectors)
+                      {
+                        Assert( codim==1 , ExcMessage("There is no cell normal in codim 2."));
+
+                        if (dim==1)
+                          output_data.normal_vectors[point] =
+                            cross_product_2d(-DX_t[0]);
+                        else //dim == 2
+                          output_data.normal_vectors[point] =
+                            cross_product_3d(DX_t[0], DX_t[1]);
+
+                        output_data.normal_vectors[point] /= output_data.normal_vectors[point].norm();
+
+                        if (cell->direction_flag() == false)
+                          output_data.normal_vectors[point] *= -1.;
+                      }
+
+                  }
+              } //codim>0 case
+
+          }
+    }
+
+
+
+  // copy values from InternalData to vector given by reference
+  if (update_flags & update_jacobians)
+    {
+      AssertDimension (output_data.jacobians.size(), n_q_points);
+      if (cell_similarity != CellSimilarity::translation)
+        for (unsigned int point=0; point<n_q_points; ++point)
+          output_data.jacobians[point] = data.contravariant[point];
+    }
+
+  // copy values from InternalData to vector given by reference
+  if (update_flags & update_inverse_jacobians)
+    {
+      AssertDimension (output_data.inverse_jacobians.size(), n_q_points);
+      if (cell_similarity != CellSimilarity::translation)
+        for (unsigned int point=0; point<n_q_points; ++point)
+          output_data.inverse_jacobians[point] = data.covariant[point].transpose();
+    }
+
+  internal::maybe_update_jacobian_grads<dim,spacedim> (cell_similarity,
+                                                       QProjector<dim>::DataSetDescriptor::cell (),
+                                                       data,
+                                                       output_data.jacobian_grads);
+
+  internal::maybe_update_jacobian_pushed_forward_grads<dim,spacedim> (cell_similarity,
+      QProjector<dim>::DataSetDescriptor::cell (),
+      data,
+      output_data.jacobian_pushed_forward_grads);
+
+  internal::maybe_update_jacobian_2nd_derivatives<dim,spacedim> (cell_similarity,
+      QProjector<dim>::DataSetDescriptor::cell (),
+      data,
+      output_data.jacobian_2nd_derivatives);
+
+  internal::maybe_update_jacobian_pushed_forward_2nd_derivatives<dim,spacedim> (cell_similarity,
+      QProjector<dim>::DataSetDescriptor::cell (),
+      data,
+      output_data.jacobian_pushed_forward_2nd_derivatives);
+
+  internal::maybe_update_jacobian_3rd_derivatives<dim,spacedim> (cell_similarity,
+      QProjector<dim>::DataSetDescriptor::cell (),
+      data,
+      output_data.jacobian_3rd_derivatives);
+
+  internal::maybe_update_jacobian_pushed_forward_3rd_derivatives<dim,spacedim> (cell_similarity,
+      QProjector<dim>::DataSetDescriptor::cell (),
+      data,
+      output_data.jacobian_pushed_forward_3rd_derivatives);
+
+  return cell_similarity;
+}
+
+
+
+
+
+
+namespace internal
+{
+  namespace
+  {
+    /**
+     * Depending on what information is called for in the update flags of the
+     * @p data object, compute the various pieces of information that is required
+     * by the fill_fe_face_values() and fill_fe_subface_values() functions.
+     * This function simply unifies the work that would be done by
+     * those two functions.
+     *
+     * The resulting data is put into the @p output_data argument.
+     */
+    template <int dim, int spacedim>
+    void
+    maybe_compute_face_data (const dealii::MappingQGeneric<dim,spacedim> &mapping,
+                             const typename dealii::Triangulation<dim,spacedim>::cell_iterator &cell,
+                             const unsigned int               face_no,
+                             const unsigned int               subface_no,
+                             const unsigned int               n_q_points,
+                             const std::vector<double>        &weights,
+                             const typename dealii::MappingQGeneric<dim,spacedim>::InternalData &data,
+                             internal::FEValues::MappingRelatedData<dim,spacedim>         &output_data)
+    {
+      const UpdateFlags update_flags = data.update_each;
+
+      if (update_flags & update_boundary_forms)
+        {
+          AssertDimension (output_data.boundary_forms.size(), n_q_points);
+          if (update_flags & update_normal_vectors)
+            AssertDimension (output_data.normal_vectors.size(), n_q_points);
+          if (update_flags & update_JxW_values)
+            AssertDimension (output_data.JxW_values.size(), n_q_points);
+
+          // map the unit tangentials to the real cell. checking for d!=dim-1
+          // eliminates compiler warnings regarding unsigned int expressions <
+          // 0.
+          for (unsigned int d=0; d!=dim-1; ++d)
+            {
+              Assert (face_no+GeometryInfo<dim>::faces_per_cell*d <
+                      data.unit_tangentials.size(),
+                      ExcInternalError());
+              Assert (data.aux[d].size() <=
+                      data.unit_tangentials[face_no+GeometryInfo<dim>::faces_per_cell*d].size(),
+                      ExcInternalError());
+
+              mapping.transform (make_array_view(data.unit_tangentials[face_no+GeometryInfo<dim>::faces_per_cell*d]),
+                                 mapping_contravariant,
+                                 data,
+                                 make_array_view(data.aux[d]));
+            }
+
+          // if dim==spacedim, we can use the unit tangentials to compute the
+          // boundary form by simply taking the cross product
+          if (dim == spacedim)
+            {
+              for (unsigned int i=0; i<n_q_points; ++i)
+                switch (dim)
+                  {
+                  case 1:
+                    // in 1d, we don't have access to any of the data.aux
+                    // fields (because it has only dim-1 components), but we
+                    // can still compute the boundary form by simply
+                    // looking at the number of the face
+                    output_data.boundary_forms[i][0] = (face_no == 0 ?
+                                                        -1 : +1);
+                    break;
+                  case 2:
+                    output_data.boundary_forms[i] =
+                      cross_product_2d(data.aux[0][i]);
+                    break;
+                  case 3:
+                    output_data.boundary_forms[i] =
+                      cross_product_3d(data.aux[0][i], data.aux[1][i]);
+                    break;
+                  default:
+                    Assert(false, ExcNotImplemented());
+                  }
+            }
+          else //(dim < spacedim)
+            {
+              // in the codim-one case, the boundary form results from the
+              // cross product of all the face tangential vectors and the cell
+              // normal vector
+              //
+              // to compute the cell normal, use the same method used in
+              // fill_fe_values for cells above
+              AssertDimension (data.contravariant.size(), n_q_points);
+
+              for (unsigned int point=0; point<n_q_points; ++point)
+                {
+                  if (dim==1)
+                    {
+                      // J is a tangent vector
+                      output_data.boundary_forms[point] = data.contravariant[point].transpose()[0];
+                      output_data.boundary_forms[point] /=
+                        (face_no == 0 ? -1. : +1.) * output_data.boundary_forms[point].norm();
+                    }
+
+                  if (dim==2)
+                    {
+                      const DerivativeForm<1,spacedim,dim> DX_t =
+                        data.contravariant[point].transpose();
+
+                      Tensor<1, spacedim> cell_normal =
+                        cross_product_3d(DX_t[0], DX_t[1]);
+                      cell_normal /= cell_normal.norm();
+
+                      // then compute the face normal from the face tangent
+                      // and the cell normal:
+                      output_data.boundary_forms[point] =
+                        cross_product_3d(data.aux[0][point], cell_normal);
+                    }
+                }
+            }
+
+          if (update_flags & (update_normal_vectors
+                              | update_JxW_values))
+            for (unsigned int i=0; i<output_data.boundary_forms.size(); ++i)
+              {
+                if (update_flags & update_JxW_values)
+                  {
+                    output_data.JxW_values[i] = output_data.boundary_forms[i].norm() * weights[i];
+
+                    if (subface_no!=numbers::invalid_unsigned_int)
+                      {
+                        const double area_ratio=GeometryInfo<dim>::subface_ratio(
+                                                  cell->subface_case(face_no), subface_no);
+                        output_data.JxW_values[i] *= area_ratio;
+                      }
+                  }
+
+                if (update_flags & update_normal_vectors)
+                  output_data.normal_vectors[i] = Point<spacedim>(output_data.boundary_forms[i] /
+                                                                  output_data.boundary_forms[i].norm());
+              }
+
+          if (update_flags & update_jacobians)
+            for (unsigned int point=0; point<n_q_points; ++point)
+              output_data.jacobians[point] = data.contravariant[point];
+
+          if (update_flags & update_inverse_jacobians)
+            for (unsigned int point=0; point<n_q_points; ++point)
+              output_data.inverse_jacobians[point] = data.covariant[point].transpose();
+        }
+    }
+
+
+    /**
+     * Do the work of MappingQGeneric::fill_fe_face_values() and
+     * MappingQGeneric::fill_fe_subface_values() in a generic way,
+     * using the 'data_set' to differentiate whether we will
+     * work on a face (and if so, which one) or subface.
+     */
+    template<int dim, int spacedim>
+    void
+    do_fill_fe_face_values (const dealii::MappingQGeneric<dim,spacedim>                             &mapping,
+                            const typename dealii::Triangulation<dim,spacedim>::cell_iterator &cell,
+                            const unsigned int                                                 face_no,
+                            const unsigned int                                                 subface_no,
+                            const typename QProjector<dim>::DataSetDescriptor                  data_set,
+                            const Quadrature<dim-1>                                           &quadrature,
+                            const typename dealii::MappingQGeneric<dim,spacedim>::InternalData      &data,
+                            internal::FEValues::MappingRelatedData<dim,spacedim>              &output_data)
+    {
+      maybe_compute_q_points<dim,spacedim> (data_set,
+                                            data,
+                                            output_data.quadrature_points);
+      maybe_update_Jacobians<dim,spacedim> (CellSimilarity::none,
+                                            data_set,
+                                            data);
+      maybe_update_jacobian_grads<dim,spacedim> (CellSimilarity::none,
+                                                 data_set,
+                                                 data,
+                                                 output_data.jacobian_grads);
+      maybe_update_jacobian_pushed_forward_grads<dim,spacedim> (CellSimilarity::none,
+                                                                data_set,
+                                                                data,
+                                                                output_data.jacobian_pushed_forward_grads);
+      maybe_update_jacobian_2nd_derivatives<dim,spacedim> (CellSimilarity::none,
+                                                           data_set,
+                                                           data,
+                                                           output_data.jacobian_2nd_derivatives);
+      maybe_update_jacobian_pushed_forward_2nd_derivatives<dim,spacedim> (CellSimilarity::none,
+          data_set,
+          data,
+          output_data.jacobian_pushed_forward_2nd_derivatives);
+      maybe_update_jacobian_3rd_derivatives<dim,spacedim> (CellSimilarity::none,
+                                                           data_set,
+                                                           data,
+                                                           output_data.jacobian_3rd_derivatives);
+      maybe_update_jacobian_pushed_forward_3rd_derivatives<dim,spacedim> (CellSimilarity::none,
+          data_set,
+          data,
+          output_data.jacobian_pushed_forward_3rd_derivatives);
+
+      maybe_compute_face_data (mapping,
+                               cell, face_no, subface_no, quadrature.size(),
+                               quadrature.get_weights(), data,
+                               output_data);
+    }
+  }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+fill_fe_face_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                     const unsigned int                                         face_no,
+                     const Quadrature<dim-1>                                   &quadrature,
+                     const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                     internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // ensure that the following cast is really correct:
+  Assert ((dynamic_cast<const InternalData *>(&internal_data) != 0),
+          ExcInternalError());
+  const InternalData &data
+    = static_cast<const InternalData &>(internal_data);
+
+  // if necessary, recompute the support points of the transformation of this cell
+  // (note that we need to first check the triangulation pointer, since otherwise
+  // the second test might trigger an exception if the triangulations are not the
+  // same)
+  if ((data.mapping_support_points.size() == 0)
+      ||
+      (&cell->get_triangulation() !=
+       &data.cell_of_current_support_points->get_triangulation())
+      ||
+      (cell != data.cell_of_current_support_points))
+    {
+      data.mapping_support_points = this->compute_mapping_support_points(cell);
+      data.cell_of_current_support_points = cell;
+    }
+
+  internal::do_fill_fe_face_values (*this,
+                                    cell, face_no, numbers::invalid_unsigned_int,
+                                    QProjector<dim>::DataSetDescriptor::face (face_no,
+                                        cell->face_orientation(face_no),
+                                        cell->face_flip(face_no),
+                                        cell->face_rotation(face_no),
+                                        quadrature.size()),
+                                    quadrature,
+                                    data,
+                                    output_data);
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+fill_fe_subface_values (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                        const unsigned int                                         face_no,
+                        const unsigned int                                         subface_no,
+                        const Quadrature<dim-1>                                   &quadrature,
+                        const typename Mapping<dim,spacedim>::InternalDataBase    &internal_data,
+                        internal::FEValues::MappingRelatedData<dim,spacedim>      &output_data) const
+{
+  // ensure that the following cast is really correct:
+  Assert ((dynamic_cast<const InternalData *>(&internal_data) != 0),
+          ExcInternalError());
+  const InternalData &data
+    = static_cast<const InternalData &>(internal_data);
+
+  // if necessary, recompute the support points of the transformation of this cell
+  // (note that we need to first check the triangulation pointer, since otherwise
+  // the second test might trigger an exception if the triangulations are not the
+  // same)
+  if ((data.mapping_support_points.size() == 0)
+      ||
+      (&cell->get_triangulation() !=
+       &data.cell_of_current_support_points->get_triangulation())
+      ||
+      (cell != data.cell_of_current_support_points))
+    {
+      data.mapping_support_points = this->compute_mapping_support_points(cell);
+      data.cell_of_current_support_points = cell;
+    }
+
+  internal::do_fill_fe_face_values (*this,
+                                    cell, face_no, subface_no,
+                                    QProjector<dim>::DataSetDescriptor::subface (face_no, subface_no,
+                                        cell->face_orientation(face_no),
+                                        cell->face_flip(face_no),
+                                        cell->face_rotation(face_no),
+                                        quadrature.size(),
+                                        cell->subface_case(face_no)),
+                                    quadrature,
+                                    data,
+                                    output_data);
+}
+
+
+
+namespace
+{
+  template <int dim, int spacedim, int rank>
+  void
+  transform_fields(const ArrayView<const Tensor<rank,dim> >               &input,
+                   const MappingType                                       mapping_type,
+                   const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+                   const ArrayView<Tensor<rank,spacedim> >                &output)
+  {
+    AssertDimension (input.size(), output.size());
+    Assert ((dynamic_cast<const typename MappingQGeneric<dim,spacedim>::InternalData *>(&mapping_data) != 0),
+            ExcInternalError());
+    const typename MappingQGeneric<dim,spacedim>::InternalData
+    &data = static_cast<const typename MappingQGeneric<dim,spacedim>::InternalData &>(mapping_data);
+
+    switch (mapping_type)
+      {
+      case mapping_contravariant:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          output[i] = apply_transformation(data.contravariant[i], input[i]);
+
+        return;
+      }
+
+      case mapping_piola:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+        Assert (data.update_each & update_volume_elements,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+        Assert (rank==1, ExcMessage("Only for rank 1"));
+        if (rank!=1)
+          return;
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          {
+            output[i] = apply_transformation(data.contravariant[i], input[i]);
+            output[i] /= data.volume_elements[i];
+          }
+        return;
+      }
+      //We still allow this operation as in the
+      //reference cell Derivatives are Tensor
+      //rather than DerivativeForm
+      case mapping_covariant:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          output[i] = apply_transformation(data.covariant[i], input[i]);
+
+        return;
+      }
+
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+  }
+
+
+  template <int dim, int spacedim, int rank>
+  void
+  transform_gradients(const ArrayView<const Tensor<rank,dim> >                &input,
+                      const MappingType                                        mapping_type,
+                      const typename Mapping<dim,spacedim>::InternalDataBase  &mapping_data,
+                      const ArrayView<Tensor<rank,spacedim> >                 &output)
+  {
+    AssertDimension (input.size(), output.size());
+    Assert ((dynamic_cast<const typename MappingQGeneric<dim,spacedim>::InternalData *>(&mapping_data) != 0),
+            ExcInternalError());
+    const typename MappingQGeneric<dim,spacedim>::InternalData
+    &data = static_cast<const typename MappingQGeneric<dim,spacedim>::InternalData &>(mapping_data);
+
+    switch (mapping_type)
+      {
+      case mapping_contravariant_gradient:
+      {
+        Assert (data.update_each & update_covariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+        Assert (rank==2, ExcMessage("Only for rank 2"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          {
+            DerivativeForm<1,spacedim,dim> A =
+              apply_transformation(data.contravariant[i], transpose(input[i]) );
+            output[i] = apply_transformation(data.covariant[i], A.transpose() );
+          }
+
+        return;
+      }
+
+      case mapping_covariant_gradient:
+      {
+        Assert (data.update_each & update_covariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+        Assert (rank==2, ExcMessage("Only for rank 2"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          {
+            DerivativeForm<1,spacedim,dim> A =
+              apply_transformation(data.covariant[i], transpose(input[i]) );
+            output[i] = apply_transformation(data.covariant[i], A.transpose() );
+          }
+
+        return;
+      }
+
+      case mapping_piola_gradient:
+      {
+        Assert (data.update_each & update_covariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+        Assert (data.update_each & update_volume_elements,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+        Assert (rank==2, ExcMessage("Only for rank 2"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          {
+            DerivativeForm<1,spacedim,dim> A =
+              apply_transformation(data.covariant[i], input[i] );
+            Tensor<2,spacedim> T =
+              apply_transformation(data.contravariant[i], A.transpose() );
+
+            output[i] = transpose(T);
+            output[i] /= data.volume_elements[i];
+          }
+
+        return;
+      }
+
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  void
+  transform_hessians(const ArrayView<const Tensor<3,dim> >                  &input,
+                     const MappingType                                       mapping_type,
+                     const typename Mapping<dim,spacedim>::InternalDataBase &mapping_data,
+                     const ArrayView<Tensor<3,spacedim> >                   &output)
+  {
+    AssertDimension (input.size(), output.size());
+    Assert ((dynamic_cast<const typename MappingQGeneric<dim,spacedim>::InternalData *>(&mapping_data) != 0),
+            ExcInternalError());
+    const typename MappingQGeneric<dim,spacedim>::InternalData
+    &data = static_cast<const typename MappingQGeneric<dim,spacedim>::InternalData &>(mapping_data);
+
+    switch (mapping_type)
+      {
+      case mapping_contravariant_hessian:
+      {
+        Assert (data.update_each & update_covariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+
+        for (unsigned int q=0; q<output.size(); ++q)
+          for (unsigned int i=0; i<spacedim; ++i)
+            {
+              double tmp1[dim][dim];
+              for (unsigned int J=0; J<dim; ++J)
+                for (unsigned int K=0; K<dim; ++K)
+                  {
+                    tmp1[J][K] = data.contravariant[q][i][0] * input[q][0][J][K];
+                    for (unsigned int I=1; I<dim; ++I)
+                      tmp1[J][K] += data.contravariant[q][i][I] * input[q][I][J][K];
+                  }
+              for (unsigned int j=0; j<spacedim; ++j)
+                {
+                  double tmp2[dim];
+                  for (unsigned int K=0; K<dim; ++K)
+                    {
+                      tmp2[K] = data.covariant[q][j][0] * tmp1[0][K];
+                      for (unsigned int J=1; J<dim; ++J)
+                        tmp2[K] += data.covariant[q][j][J] * tmp1[J][K];
+                    }
+                  for (unsigned int k=0; k<spacedim; ++k)
+                    {
+                      output[q][i][j][k] = data.covariant[q][k][0] * tmp2[0];
+                      for (unsigned int K=1; K<dim; ++K)
+                        output[q][i][j][k] += data.covariant[q][k][K] * tmp2[K];
+                    }
+                }
+            }
+        return;
+      }
+
+      case mapping_covariant_hessian:
+      {
+        Assert (data.update_each & update_covariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+        for (unsigned int q=0; q<output.size(); ++q)
+          for (unsigned int i=0; i<spacedim; ++i)
+            {
+              double tmp1[dim][dim];
+              for (unsigned int J=0; J<dim; ++J)
+                for (unsigned int K=0; K<dim; ++K)
+                  {
+                    tmp1[J][K] = data.covariant[q][i][0] * input[q][0][J][K];
+                    for (unsigned int I=1; I<dim; ++I)
+                      tmp1[J][K] += data.covariant[q][i][I] * input[q][I][J][K];
+                  }
+              for (unsigned int j=0; j<spacedim; ++j)
+                {
+                  double tmp2[dim];
+                  for (unsigned int K=0; K<dim; ++K)
+                    {
+                      tmp2[K] = data.covariant[q][j][0] * tmp1[0][K];
+                      for (unsigned int J=1; J<dim; ++J)
+                        tmp2[K] += data.covariant[q][j][J] * tmp1[J][K];
+                    }
+                  for (unsigned int k=0; k<spacedim; ++k)
+                    {
+                      output[q][i][j][k] = data.covariant[q][k][0] * tmp2[0];
+                      for (unsigned int K=1; K<dim; ++K)
+                        output[q][i][j][k] += data.covariant[q][k][K] * tmp2[K];
+                    }
+                }
+            }
+
+        return;
+      }
+
+      case mapping_piola_hessian:
+      {
+        Assert (data.update_each & update_covariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_contravariant_transformation"));
+        Assert (data.update_each & update_volume_elements,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_volume_elements"));
+
+        for (unsigned int q=0; q<output.size(); ++q)
+          for (unsigned int i=0; i<spacedim; ++i)
+            {
+              double factor[dim];
+              for (unsigned int I=0; I<dim; ++I)
+                factor[I] = data.contravariant[q][i][I] / data.volume_elements[q];
+              double tmp1[dim][dim];
+              for (unsigned int J=0; J<dim; ++J)
+                for (unsigned int K=0; K<dim; ++K)
+                  {
+                    tmp1[J][K] = factor[0] * input[q][0][J][K];
+                    for (unsigned int I=1; I<dim; ++I)
+                      tmp1[J][K] += factor[I] * input[q][I][J][K];
+                  }
+              for (unsigned int j=0; j<spacedim; ++j)
+                {
+                  double tmp2[dim];
+                  for (unsigned int K=0; K<dim; ++K)
+                    {
+                      tmp2[K] = data.covariant[q][j][0] * tmp1[0][K];
+                      for (unsigned int J=1; J<dim; ++J)
+                        tmp2[K] += data.covariant[q][j][J] * tmp1[J][K];
+                    }
+                  for (unsigned int k=0; k<spacedim; ++k)
+                    {
+                      output[q][i][j][k] = data.covariant[q][k][0] * tmp2[0];
+                      for (unsigned int K=1; K<dim; ++K)
+                        output[q][i][j][k] += data.covariant[q][k][K] * tmp2[K];
+                    }
+                }
+            }
+
+        return;
+      }
+
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+  }
+
+
+
+
+  template<int dim, int spacedim, int rank>
+  void
+  transform_differential_forms(const ArrayView<const DerivativeForm<rank, dim,spacedim> >   &input,
+                               const MappingType                                             mapping_type,
+                               const typename Mapping<dim,spacedim>::InternalDataBase       &mapping_data,
+                               const ArrayView<Tensor<rank+1, spacedim> >                   &output)
+  {
+    AssertDimension (input.size(), output.size());
+    Assert ((dynamic_cast<const typename MappingQGeneric<dim,spacedim>::InternalData *>(&mapping_data) != 0),
+            ExcInternalError());
+    const typename MappingQGeneric<dim,spacedim>::InternalData
+    &data = static_cast<const typename MappingQGeneric<dim,spacedim>::InternalData &>(mapping_data);
+
+    switch (mapping_type)
+      {
+      case mapping_covariant:
+      {
+        Assert (data.update_each & update_contravariant_transformation,
+                typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+        for (unsigned int i=0; i<output.size(); ++i)
+          output[i] = apply_transformation(data.covariant[i], input[i]);
+
+        return;
+      }
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+  }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+transform (const ArrayView<const Tensor<1, dim> >                  &input,
+           const MappingType                                        mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase  &mapping_data,
+           const ArrayView<Tensor<1, spacedim> >                   &output) const
+{
+  transform_fields(input, mapping_type, mapping_data, output);
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+transform (const ArrayView<const DerivativeForm<1, dim,spacedim> >  &input,
+           const MappingType                                         mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase   &mapping_data,
+           const ArrayView<Tensor<2, spacedim> >                    &output) const
+{
+  transform_differential_forms(input, mapping_type, mapping_data, output);
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+transform (const ArrayView<const Tensor<2, dim> >                  &input,
+           const MappingType                                        mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase  &mapping_data,
+           const ArrayView<Tensor<2, spacedim> >                   &output) const
+{
+  switch (mapping_type)
+    {
+    case mapping_contravariant:
+      transform_fields(input, mapping_type, mapping_data, output);
+      return;
+
+    case mapping_piola_gradient:
+    case mapping_contravariant_gradient:
+    case mapping_covariant_gradient:
+      transform_gradients(input, mapping_type, mapping_data, output);
+      return;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+transform (const ArrayView<const  DerivativeForm<2, dim, spacedim> > &input,
+           const MappingType                                          mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase    &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                      &output) const
+{
+
+  AssertDimension (input.size(), output.size());
+  Assert (dynamic_cast<const InternalData *>(&mapping_data) != 0,
+          ExcInternalError());
+  const InternalData &data = static_cast<const InternalData &>(mapping_data);
+
+  switch (mapping_type)
+    {
+    case mapping_covariant_gradient:
+    {
+      Assert (data.update_each & update_contravariant_transformation,
+              typename FEValuesBase<dim>::ExcAccessToUninitializedField("update_covariant_transformation"));
+
+      for (unsigned int q=0; q<output.size(); ++q)
+        for (unsigned int i=0; i<spacedim; ++i)
+          for (unsigned int j=0; j<spacedim; ++j)
+            {
+              double tmp[dim];
+              for (unsigned int K=0; K<dim; ++K)
+                {
+                  tmp[K] = data.covariant[q][j][0] * input[q][i][0][K];
+                  for (unsigned int J=1; J<dim; ++J)
+                    tmp[K] += data.covariant[q][j][J] * input[q][i][J][K];
+                }
+              for (unsigned int k=0; k<spacedim; ++k)
+                {
+                  output[q][i][j][k] = data.covariant[q][k][0] * tmp[0];
+                  for (unsigned int K=1; K<dim; ++K)
+                    output[q][i][j][k] += data.covariant[q][k][K] * tmp[K];
+                }
+            }
+      return;
+    }
+
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+
+template<int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+transform (const ArrayView<const  Tensor<3,dim> >                  &input,
+           const MappingType                                        mapping_type,
+           const typename Mapping<dim,spacedim>::InternalDataBase  &mapping_data,
+           const ArrayView<Tensor<3,spacedim> >                    &output) const
+{
+  switch (mapping_type)
+    {
+    case mapping_piola_hessian:
+    case mapping_contravariant_hessian:
+    case mapping_covariant_hessian:
+      transform_hessians(input, mapping_type, mapping_data, output);
+      return;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+}
+
+
+
+namespace
+{
+  /**
+   * Ask the manifold descriptor to return intermediate points on lines or
+   * faces. The function needs to return one or multiple points (depending on
+   * the number of elements in the output vector @p points that lie inside a
+   * line, quad or hex). Whether it is a line, quad or hex doesn't really
+   * matter to this function but it can be inferred from the number of input
+   * points in the @p surrounding_points vector.
+   */
+  template<int dim, int spacedim>
+  void
+  get_intermediate_points (const Manifold<dim, spacedim> &manifold,
+                           const QGaussLobatto<1>        &line_support_points,
+                           const std::vector<Point<spacedim> > &surrounding_points,
+                           std::vector<Point<spacedim> > &points)
+  {
+    Assert(surrounding_points.size() >= 2, ExcMessage("At least 2 surrounding points are required"));
+    const unsigned int n=points.size();
+    Assert(n>0, ExcMessage("You can't ask for 0 intermediate points."));
+    std::vector<double> w(surrounding_points.size());
+
+    switch (surrounding_points.size())
+      {
+      case 2:
+      {
+        // If two points are passed, these are the two vertices, and
+        // we can only compute degree-1 intermediate points.
+        for (unsigned int i=0; i<n; ++i)
+          {
+            const double x = line_support_points.point(i+1)[0];
+            w[1] = x;
+            w[0] = (1-x);
+            Quadrature<spacedim> quadrature(surrounding_points, w);
+            points[i] = manifold.get_new_point(quadrature);
+          }
+        break;
+      }
+
+      case 4:
+      {
+        Assert(spacedim >= 2, ExcImpossibleInDim(spacedim));
+        const unsigned m=
+          static_cast<unsigned int>(std::sqrt(static_cast<double>(n)));
+        // is n a square number
+        Assert(m*m==n, ExcInternalError());
+
+        // If four points are passed, these are the two vertices, and
+        // we can only compute (degree-1)*(degree-1) intermediate
+        // points.
+        for (unsigned int i=0; i<m; ++i)
+          {
+            const double y=line_support_points.point(1+i)[0];
+            for (unsigned int j=0; j<m; ++j)
+              {
+                const double x=line_support_points.point(1+j)[0];
+
+                w[0] = (1-x)*(1-y);
+                w[1] =     x*(1-y);
+                w[2] = (1-x)*y    ;
+                w[3] =     x*y    ;
+                Quadrature<spacedim> quadrature(surrounding_points, w);
+                points[i*m+j]=manifold.get_new_point(quadrature);
+              }
+          }
+        break;
+      }
+
+      case 8:
+        Assert(false, ExcNotImplemented());
+        break;
+      default:
+        Assert(false, ExcInternalError());
+        break;
+      }
+  }
+
+
+
+
+  /**
+   * Ask the manifold descriptor to return intermediate points on the object
+   * pointed to by the TriaIterator @p iter. This function tries to be
+   * backward compatible with respect to the differences between
+   * Boundary<dim,spacedim> and Manifold<dim,spacedim>, querying the first
+   * whenever the passed @p manifold can be upgraded to a
+   * Boundary<dim,spacedim>.
+   */
+  template <int dim, int spacedim, class TriaIterator>
+  void get_intermediate_points_on_object(const Manifold<dim, spacedim> &manifold,
+                                         const QGaussLobatto<1>        &line_support_points,
+                                         const TriaIterator &iter,
+                                         std::vector<Point<spacedim> > &points)
+  {
+    const unsigned int structdim = TriaIterator::AccessorType::structure_dimension;
+
+    // Try backward compatibility option.
+    if (const Boundary<dim,spacedim> *boundary
+        = dynamic_cast<const Boundary<dim,spacedim> *>(&manifold))
+      // This is actually a boundary. Call old methods.
+      {
+        switch (structdim)
+          {
+          case 1:
+          {
+            const typename Triangulation<dim,spacedim>::line_iterator line = iter;
+            boundary->get_intermediate_points_on_line(line, points);
+            return;
+          }
+          case 2:
+          {
+            const typename Triangulation<dim,spacedim>::quad_iterator quad = iter;
+            boundary->get_intermediate_points_on_quad(quad, points);
+            return;
+          }
+          default:
+            Assert(false, ExcInternalError());
+            return;
+          }
+      }
+    else
+      {
+        std::vector<Point<spacedim> > sp(GeometryInfo<structdim>::vertices_per_cell);
+        for (unsigned int i=0; i<sp.size(); ++i)
+          sp[i] = iter->vertex(i);
+        get_intermediate_points(manifold, line_support_points, sp, points);
+      }
+  }
+
+
+  /**
+   * Take a <tt>support_point_weights_on_hex(quad)</tt> and apply it to the vector
+   * @p a to compute the inner support points as a linear combination of the
+   * exterior points.
+   *
+   * The vector @p a initially contains the locations of the @p n_outer
+   * points, the @p n_inner computed inner points are appended.
+   *
+   * See equation (7) of the `mapping' report.
+   */
+  template <int spacedim>
+  void add_weighted_interior_points(const Table<2,double>   &lvs,
+                                    std::vector<Point<spacedim> > &a)
+  {
+    const unsigned int n_inner_apply=lvs.n_rows();
+    const unsigned int n_outer_apply=lvs.n_cols();
+    Assert(a.size()==n_outer_apply,
+           ExcDimensionMismatch(a.size(), n_outer_apply));
+
+    // compute each inner point as linear combination of the outer points. the
+    // weights are given by the lvs entries, the outer points are the first
+    // (existing) elements of a
+    for (unsigned int unit_point=0; unit_point<n_inner_apply; ++unit_point)
+      {
+        Assert(lvs.n_cols()==n_outer_apply, ExcInternalError());
+        Point<spacedim> p;
+        for (unsigned int k=0; k<n_outer_apply; ++k)
+          p+=lvs[unit_point][k]*a[k];
+
+        a.push_back(p);
+      }
+  }
+}
+
+
+template <int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+add_line_support_points (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                         std::vector<Point<spacedim> > &a) const
+{
+  // if we only need the midpoint, then ask for it.
+  if (this->polynomial_degree==2)
+    {
+      for (unsigned int line_no=0; line_no<GeometryInfo<dim>::lines_per_cell; ++line_no)
+        {
+          const typename Triangulation<dim,spacedim>::line_iterator line =
+            (dim == 1  ?
+             static_cast<typename Triangulation<dim,spacedim>::line_iterator>(cell) :
+             cell->line(line_no));
+
+          const Manifold<dim,spacedim> &manifold =
+            ( ( line->manifold_id() == numbers::invalid_manifold_id ) &&
+              ( dim < spacedim )
+              ?
+              cell->get_manifold()
+              :
+              line->get_manifold() );
+          a.push_back(manifold.get_new_point_on_line(line));
+        }
+    }
+  else
+    // otherwise call the more complicated functions and ask for inner points
+    // from the boundary description
+    {
+      std::vector<Point<spacedim> > line_points (this->polynomial_degree-1);
+      // loop over each of the lines, and if it is at the boundary, then first
+      // get the boundary description and second compute the points on it
+      for (unsigned int line_no=0; line_no<GeometryInfo<dim>::lines_per_cell; ++line_no)
+        {
+          const typename Triangulation<dim,spacedim>::line_iterator
+          line = (dim == 1
+                  ?
+                  static_cast<typename Triangulation<dim,spacedim>::line_iterator>(cell)
+                  :
+                  cell->line(line_no));
+
+          const Manifold<dim,spacedim> &manifold =
+            ( ( line->manifold_id() == numbers::invalid_manifold_id ) &&
+              ( dim < spacedim )
+              ?
+              cell->get_manifold() :
+              line->get_manifold() );
+
+          get_intermediate_points_on_object (manifold, line_support_points, line, line_points);
+
+          if (dim==3)
+            {
+              // in 3D, lines might be in wrong orientation. if so, reverse
+              // the vector
+              if (cell->line_orientation(line_no))
+                a.insert (a.end(), line_points.begin(), line_points.end());
+              else
+                a.insert (a.end(), line_points.rbegin(), line_points.rend());
+            }
+          else
+            // in 2D, lines always have the correct orientation. simply append
+            // all points
+            a.insert (a.end(), line_points.begin(), line_points.end());
+        }
+    }
+}
+
+
+
+template <>
+void
+MappingQGeneric<3,3>::
+add_quad_support_points(const Triangulation<3,3>::cell_iterator &cell,
+                        std::vector<Point<3> >                &a) const
+{
+  const unsigned int faces_per_cell    = GeometryInfo<3>::faces_per_cell,
+                     vertices_per_face = GeometryInfo<3>::vertices_per_face,
+                     lines_per_face    = GeometryInfo<3>::lines_per_face,
+                     vertices_per_cell = GeometryInfo<3>::vertices_per_cell;
+
+  static const StraightBoundary<3> straight_boundary;
+  // used if face quad at boundary or entirely in the interior of the domain
+  std::vector<Point<3> > quad_points ((polynomial_degree-1)*(polynomial_degree-1));
+  // used if only one line of face quad is at boundary
+  std::vector<Point<3> > b(4*polynomial_degree);
+
+  // Used by the new Manifold interface. This vector collects the
+  // vertices used to compute the intermediate points.
+  std::vector<Point<3> > vertices(4);
+
+  // loop over all faces and collect points on them
+  for (unsigned int face_no=0; face_no<faces_per_cell; ++face_no)
+    {
+      const Triangulation<3>::face_iterator face = cell->face(face_no);
+
+      // select the correct mappings for the present face
+      const bool face_orientation = cell->face_orientation(face_no),
+                 face_flip        = cell->face_flip       (face_no),
+                 face_rotation    = cell->face_rotation   (face_no);
+
+#ifdef DEBUG
+      // some sanity checks up front
+      for (unsigned int i=0; i<vertices_per_face; ++i)
+        Assert(face->vertex_index(i)==cell->vertex_index(
+                 GeometryInfo<3>::face_to_cell_vertices(face_no, i,
+                                                        face_orientation,
+                                                        face_flip,
+                                                        face_rotation)),
+               ExcInternalError());
+
+      // indices of the lines that bound a face are given by GeometryInfo<3>::
+      // face_to_cell_lines
+      for (unsigned int i=0; i<lines_per_face; ++i)
+        Assert(face->line(i)==cell->line(GeometryInfo<3>::face_to_cell_lines(
+                                           face_no, i, face_orientation, face_flip, face_rotation)),
+               ExcInternalError());
+#endif
+
+      // if face at boundary, then ask boundary object to return intermediate
+      // points on it
+      if (face->at_boundary())
+        {
+          get_intermediate_points_on_object(face->get_manifold(), line_support_points, face, quad_points);
+
+          // in 3D, the orientation, flip and rotation of the face might not
+          // match what we expect here, namely the standard orientation. thus
+          // reorder points accordingly. since a Mapping uses the same shape
+          // function as an FE_Q, we can ask a FE_Q to do the reordering for us.
+          for (unsigned int i=0; i<quad_points.size(); ++i)
+            a.push_back(quad_points[fe_q->adjust_quad_dof_index_for_face_orientation(i,
+                                    face_orientation,
+                                    face_flip,
+                                    face_rotation)]);
+        }
+      else
+        {
+          // face is not at boundary, but maybe some of its lines are. count
+          // them
+          unsigned int lines_at_boundary=0;
+          for (unsigned int i=0; i<lines_per_face; ++i)
+            if (face->line(i)->at_boundary())
+              ++lines_at_boundary;
+
+          Assert(lines_at_boundary<=lines_per_face, ExcInternalError());
+
+          // if at least one of the lines bounding this quad is at the
+          // boundary, then collect points separately
+          if (lines_at_boundary>0)
+            {
+              // call of function add_weighted_interior_points increases size of b
+              // about 1. There resize b for the case the mentioned function
+              // was already called.
+              b.resize(4*polynomial_degree);
+
+              // b is of size 4*degree, make sure that this is the right size
+              Assert(b.size()==vertices_per_face+lines_per_face*(polynomial_degree-1),
+                     ExcDimensionMismatch(b.size(),
+                                          vertices_per_face+lines_per_face*(polynomial_degree-1)));
+
+              // sort the points into b. We used access from the cell (not
+              // from the face) to fill b, so we can assume a standard face
+              // orientation. Doing so, the calculated points will be in
+              // standard orientation as well.
+              for (unsigned int i=0; i<vertices_per_face; ++i)
+                b[i]=a[GeometryInfo<3>::face_to_cell_vertices(face_no, i)];
+
+              for (unsigned int i=0; i<lines_per_face; ++i)
+                for (unsigned int j=0; j<polynomial_degree-1; ++j)
+                  b[vertices_per_face+i*(polynomial_degree-1)+j]=
+                    a[vertices_per_cell + GeometryInfo<3>::face_to_cell_lines(
+                        face_no, i)*(polynomial_degree-1)+j];
+
+              // Now b includes the support points on the quad and we can
+              // apply the laplace vector
+              add_weighted_interior_points (support_point_weights_on_quad, b);
+              AssertDimension (b.size(),
+                               4*this->polynomial_degree +
+                               (this->polynomial_degree-1)*(this->polynomial_degree-1));
+
+              for (unsigned int i=0; i<(polynomial_degree-1)*(polynomial_degree-1); ++i)
+                a.push_back(b[4*polynomial_degree+i]);
+            }
+          else
+            {
+              // face is entirely in the interior. get intermediate
+              // points from the relevant manifold object.
+              vertices.resize(4);
+              for (unsigned int i=0; i<4; ++i)
+                vertices[i] = face->vertex(i);
+              get_intermediate_points (face->get_manifold(), line_support_points, vertices, quad_points);
+              // in 3D, the orientation, flip and rotation of the face might
+              // not match what we expect here, namely the standard
+              // orientation. thus reorder points accordingly. since a Mapping
+              // uses the same shape function as an FE_Q, we can ask a FE_Q to
+              // do the reordering for us.
+              for (unsigned int i=0; i<quad_points.size(); ++i)
+                a.push_back(quad_points[fe_q->adjust_quad_dof_index_for_face_orientation(i,
+                                        face_orientation,
+                                        face_flip,
+                                        face_rotation)]);
+            }
+        }
+    }
+}
+
+
+
+template <>
+void
+MappingQGeneric<2,3>::
+add_quad_support_points(const Triangulation<2,3>::cell_iterator &cell,
+                        std::vector<Point<3> >                &a) const
+{
+  std::vector<Point<3> > quad_points ((polynomial_degree-1)*(polynomial_degree-1));
+  get_intermediate_points_on_object (cell->get_manifold(), line_support_points,
+                                     cell, quad_points);
+  for (unsigned int i=0; i<quad_points.size(); ++i)
+    a.push_back(quad_points[i]);
+}
+
+
+
+template <int dim, int spacedim>
+void
+MappingQGeneric<dim,spacedim>::
+add_quad_support_points(const typename Triangulation<dim,spacedim>::cell_iterator &,
+                        std::vector<Point<spacedim> > &) const
+{
+  Assert (false, ExcInternalError());
+}
+
+
+
+template<int dim, int spacedim>
+std::vector<Point<spacedim> >
+MappingQGeneric<dim,spacedim>::
+compute_mapping_support_points(const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  // get the vertices first
+  std::vector<Point<spacedim> > a(GeometryInfo<dim>::vertices_per_cell);
+  for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+    a[i] = cell->vertex(i);
+
+  if (this->polynomial_degree>1)
+    switch (dim)
+      {
+      case 1:
+        add_line_support_points(cell, a);
+        break;
+      case 2:
+        // in 2d, add the points on the four bounding lines to the exterior
+        // (outer) points
+        add_line_support_points(cell, a);
+
+        // then get the support points on the quad if we are on a
+        // manifold, otherwise compute them from the points around it
+        if (dim != spacedim)
+          add_quad_support_points(cell, a);
+        else
+          add_weighted_interior_points (support_point_weights_on_quad, a);
+        break;
+
+      case 3:
+      {
+        // in 3d also add the points located on the boundary faces
+        add_line_support_points (cell, a);
+        add_quad_support_points (cell, a);
+
+        // then compute the interior points
+        add_weighted_interior_points (support_point_weights_on_hex, a);
+        break;
+      }
+
+      default:
+        Assert(false, ExcNotImplemented());
+        break;
+      }
+
+  return a;
+}
+
+
+
+//--------------------------- Explicit instantiations -----------------------
+#include "mapping_q_generic.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/fe/mapping_q_generic.inst.in b/source/fe/mapping_q_generic.inst.in
new file mode 100644
index 0000000..06156a2
--- /dev/null
+++ b/source/fe/mapping_q_generic.inst.in
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class MappingQGeneric<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/grid/CMakeLists.txt b/source/grid/CMakeLists.txt
new file mode 100644
index 0000000..8279350
--- /dev/null
+++ b/source/grid/CMakeLists.txt
@@ -0,0 +1,60 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  cell_id.cc
+  grid_generator.cc
+  grid_in.cc
+  grid_out.cc
+  grid_refinement.cc
+  grid_reordering.cc
+  grid_tools.cc
+  intergrid_map.cc
+  manifold.cc
+  manifold_lib.cc
+  persistent_tria.cc
+  tria_accessor.cc
+  tria_boundary.cc
+  tria_boundary_lib.cc
+  tria.cc
+  tria_faces.cc
+  tria_levels.cc
+  tria_objects.cc
+  )
+
+SET(_inst
+  grid_generator.inst.in
+  grid_in.inst.in
+  grid_out.inst.in
+  grid_refinement.inst.in
+  grid_tools.inst.in
+  intergrid_map.inst.in
+  manifold.inst.in
+  manifold_lib.inst.in
+  tria_accessor.inst.in
+  tria_boundary.inst.in
+  tria_boundary_lib.inst.in
+  tria.inst.in
+  tria_objects.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/grid/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_grid OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_grid "${_inst}")
diff --git a/source/grid/cell_id.cc b/source/grid/cell_id.cc
new file mode 100644
index 0000000..57bd276
--- /dev/null
+++ b/source/grid/cell_id.cc
@@ -0,0 +1,30 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/grid/cell_id.h>
+
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+std::string
+CellId::to_string() const
+{
+  std::ostringstream ss;
+  ss << *this;
+  return ss.str();
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/grid_generator.cc b/source/grid/grid_generator.cc
new file mode 100644
index 0000000..92225b8
--- /dev/null
+++ b/source/grid/grid_generator.cc
@@ -0,0 +1,4336 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_reordering.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/intergrid_map.h>
+
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/shared_tria.h>
+
+#include <iostream>
+#include <cmath>
+#include <limits>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace GridGenerator
+{
+  namespace
+  {
+    // Corner points of the cube [-1,1]^3
+    const Point<3> hexahedron[8] =
+    {
+      Point<3>(-1,-1,-1),
+      Point<3>(+1,-1,-1),
+      Point<3>(-1,+1,-1),
+      Point<3>(+1,+1,-1),
+      Point<3>(-1,-1,+1),
+      Point<3>(+1,-1,+1),
+      Point<3>(-1,+1,+1),
+      Point<3>(+1,+1,+1)
+    };
+
+    // Octahedron inscribed in the cube
+    // [-1,1]^3
+    const Point<3> octahedron[6] =
+    {
+      Point<3>(-1, 0, 0),
+      Point<3>( 1, 0, 0),
+      Point<3>( 0,-1, 0),
+      Point<3>( 0, 1, 0),
+      Point<3>( 0, 0,-1),
+      Point<3>( 0, 0, 1)
+    };
+
+
+    /**
+     * Perform the action specified by the @p colorize flag of the
+     * hyper_rectangle() function of this class.
+     */
+    template <int dim, int spacedim>
+    void
+    colorize_hyper_rectangle (Triangulation<dim,spacedim> &tria)
+    {
+      // there is nothing to do in 1d
+      if (dim > 1)
+        {
+          // there is only one cell, so
+          // simple task
+          const typename Triangulation<dim,spacedim>::cell_iterator
+          cell = tria.begin();
+          for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+            cell->face(f)->set_boundary_id (f);
+        }
+    }
+
+
+
+    template<int spacedim>
+    void
+    colorize_subdivided_hyper_rectangle (Triangulation<1,spacedim> &tria,
+                                         const Point<spacedim> &,
+                                         const Point<spacedim> &,
+                                         const double)
+    {
+      for (typename Triangulation<1,spacedim>::cell_iterator cell = tria.begin();
+           cell != tria.end(); ++cell)
+        if (cell->center()(0) > 0)
+          cell->set_material_id(1);
+      // boundary indicators are set to
+      // 0 (left) and 1 (right) by default.
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    colorize_subdivided_hyper_rectangle (Triangulation<dim,spacedim> &tria,
+                                         const Point<spacedim>   &p1,
+                                         const Point<spacedim>   &p2,
+                                         const double        epsilon)
+    {
+
+      // run through all faces and check
+      // if one of their center coordinates matches
+      // one of the corner points. Comparisons
+      // are made using an epsilon which
+      // should be smaller than the smallest cell
+      // diameter.
+
+      typename Triangulation<dim,spacedim>::face_iterator face = tria.begin_face(),
+                                                          endface = tria.end_face();
+      for (; face!=endface; ++face)
+        {
+          if (face->boundary_id() == 0)
+            {
+              const Point<spacedim> center (face->center());
+              if (std::abs(center(0)-p1[0]) < epsilon)
+                face->set_boundary_id(0);
+              else if (std::abs(center(0) - p2[0]) < epsilon)
+                face->set_boundary_id(1);
+              else if (dim > 1 && std::abs(center(1) - p1[1]) < epsilon)
+                face->set_boundary_id(2);
+              else if (dim > 1 && std::abs(center(1) - p2[1]) < epsilon)
+                face->set_boundary_id(3);
+              else if (dim > 2 && std::abs(center(2) - p1[2]) < epsilon)
+                face->set_boundary_id(4);
+              else if (dim > 2 && std::abs(center(2) - p2[2]) < epsilon)
+                face->set_boundary_id(5);
+              else
+                // triangulation says it
+                // is on the boundary,
+                // but we could not find
+                // on which boundary.
+                Assert (false, ExcInternalError());
+
+            }
+        }
+      for (typename Triangulation<dim,spacedim>::cell_iterator cell = tria.begin();
+           cell != tria.end(); ++cell)
+        {
+          char id = 0;
+          for (unsigned int d=0; d<dim; ++d)
+            if (cell->center()(d) > 0) id += 1 << d;
+          cell->set_material_id(id);
+        }
+    }
+
+
+    /**
+     * Assign boundary number zero to the inner shell boundary and 1 to the
+     * outer.
+     */
+    void
+    colorize_hyper_shell (Triangulation<2> &tria,
+                          const Point<2> &,
+                          const double,
+                          const double)
+    {
+      // In spite of receiving geometrical
+      // data, we do this only based on
+      // topology.
+
+      // For the mesh based on  cube,
+      // this is highly irregular
+      for (Triangulation<2>::cell_iterator cell = tria.begin ();
+           cell != tria.end (); ++cell)
+        {
+          Assert(cell->face(2)->at_boundary(), ExcInternalError());
+          cell->face (2)->set_all_boundary_ids (1);
+        }
+    }
+
+
+    /**
+     * Assign boundary number zero to the inner shell boundary and 1 to the
+     * outer.
+     */
+    void
+    colorize_hyper_shell (Triangulation<3> &tria,
+                          const Point<3> &,
+                          const double,
+                          const double)
+    {
+      // the following uses a good amount
+      // of knowledge about the
+      // orientation of cells. this is
+      // probably not good style...
+      if (tria.n_cells() == 6)
+        {
+          Triangulation<3>::cell_iterator cell = tria.begin();
+
+          Assert (cell->face(4)->at_boundary(), ExcInternalError());
+          cell->face(4)->set_all_boundary_ids(1);
+
+          ++cell;
+          Assert (cell->face(2)->at_boundary(), ExcInternalError());
+          cell->face(2)->set_all_boundary_ids(1);
+
+          ++cell;
+          Assert (cell->face(2)->at_boundary(), ExcInternalError());
+          cell->face(2)->set_all_boundary_ids(1);
+
+          ++cell;
+          Assert (cell->face(0)->at_boundary(), ExcInternalError());
+          cell->face(0)->set_all_boundary_ids(1);
+
+          ++cell;
+          Assert (cell->face(2)->at_boundary(), ExcInternalError());
+          cell->face(2)->set_all_boundary_ids(1);
+
+          ++cell;
+          Assert (cell->face(0)->at_boundary(), ExcInternalError());
+          cell->face(0)->set_all_boundary_ids(1);
+        }
+      else if (tria.n_cells() == 12)
+        {
+          // again use some internal
+          // knowledge
+          for (Triangulation<3>::cell_iterator cell = tria.begin();
+               cell != tria.end(); ++cell)
+            {
+              Assert (cell->face(5)->at_boundary(), ExcInternalError());
+              cell->face(5)->set_all_boundary_ids(1);
+            }
+        }
+      else if (tria.n_cells() == 96)
+        {
+          // the 96-cell hypershell is
+          // based on a once refined
+          // 12-cell mesh. consequently,
+          // since the outer faces all
+          // are face_no==5 above, so
+          // they are here (unless they
+          // are in the interior). Use
+          // this to assign boundary
+          // indicators, but also make
+          // sure that we encounter
+          // exactly 48 such faces
+          unsigned int count = 0;
+          for (Triangulation<3>::cell_iterator cell = tria.begin();
+               cell != tria.end(); ++cell)
+            if (cell->face(5)->at_boundary())
+              {
+                cell->face(5)->set_all_boundary_ids(1);
+                ++count;
+              }
+          Assert (count == 48, ExcInternalError());
+        }
+      else
+        Assert (false, ExcNotImplemented());
+    }
+
+
+
+    /**
+     * Assign boundary number zero the inner shell boundary, one to the outer
+     * shell boundary, two to the face with x=0, three to the face with y=0,
+     * four to the face with z=0.
+     */
+    void
+    colorize_quarter_hyper_shell(Triangulation<3> &tria,
+                                 const Point<3> &center,
+                                 const double inner_radius,
+                                 const double outer_radius)
+    {
+      if (tria.n_cells() != 3)
+        AssertThrow (false, ExcNotImplemented());
+
+      double middle = (outer_radius-inner_radius)/2e0 + inner_radius;
+      double eps = 1e-3*middle;
+      Triangulation<3>::cell_iterator cell = tria.begin();
+
+      for (; cell!=tria.end(); ++cell)
+        for (unsigned int f=0; f<GeometryInfo<3>::faces_per_cell; ++f)
+          {
+            if (!cell->face(f)->at_boundary())
+              continue;
+
+            double radius = cell->face(f)->center().norm() - center.norm();
+            if (std::fabs(cell->face(f)->center()(0)) < eps ) // x = 0 set boundary 2
+              {
+                cell->face(f)->set_boundary_id(2);
+                for (unsigned int j=0; j<GeometryInfo<3>::lines_per_face; ++j)
+                  if (cell->face(f)->line(j)->at_boundary())
+                    if (std::fabs(cell->face(f)->line(j)->vertex(0).norm() - cell->face(f)->line(j)->vertex(1).norm()) > eps)
+                      cell->face(f)->line(j)->set_boundary_id(2);
+              }
+            else if (std::fabs(cell->face(f)->center()(1)) < eps) // y = 0 set boundary 3
+              {
+                cell->face(f)->set_boundary_id(3);
+                for (unsigned int j=0; j<GeometryInfo<3>::lines_per_face; ++j)
+                  if (cell->face(f)->line(j)->at_boundary())
+                    if (std::fabs(cell->face(f)->line(j)->vertex(0).norm() - cell->face(f)->line(j)->vertex(1).norm()) > eps)
+                      cell->face(f)->line(j)->set_boundary_id(3);
+              }
+            else if (std::fabs(cell->face(f)->center()(2)) < eps ) // z = 0 set boundary 4
+              {
+                cell->face(f)->set_boundary_id(4);
+                for (unsigned int j=0; j<GeometryInfo<3>::lines_per_face; ++j)
+                  if (cell->face(f)->line(j)->at_boundary())
+                    if (std::fabs(cell->face(f)->line(j)->vertex(0).norm() - cell->face(f)->line(j)->vertex(1).norm()) > eps)
+                      cell->face(f)->line(j)->set_boundary_id(4);
+              }
+            else if (radius < middle) // inner radius set boundary 0
+              {
+                cell->face(f)->set_boundary_id(0);
+                for (unsigned int j=0; j<GeometryInfo<3>::lines_per_face; ++j)
+                  if (cell->face(f)->line(j)->at_boundary())
+                    if (std::fabs(cell->face(f)->line(j)->vertex(0).norm() - cell->face(f)->line(j)->vertex(1).norm()) < eps)
+                      cell->face(f)->line(j)->set_boundary_id(0);
+              }
+            else if (radius > middle) // outer radius set boundary 1
+              {
+                cell->face(f)->set_boundary_id(1);
+                for (unsigned int j=0; j<GeometryInfo<3>::lines_per_face; ++j)
+                  if (cell->face(f)->line(j)->at_boundary())
+                    if (std::fabs(cell->face(f)->line(j)->vertex(0).norm() - cell->face(f)->line(j)->vertex(1).norm()) < eps)
+                      cell->face(f)->line(j)->set_boundary_id(1);
+              }
+            else
+              AssertThrow (false, ExcInternalError());
+          }
+    }
+
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  hyper_rectangle (Triangulation<dim,spacedim> &tria,
+                   const Point<dim>   &p_1,
+                   const Point<dim>   &p_2,
+                   const bool          colorize)
+  {
+    // First, extend dimensions from dim to spacedim and
+    // normalize such that p1 is lower in all coordinate
+    // directions. Additional entries will be 0.
+    Point<spacedim> p1, p2;
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        p1(i) = std::min(p_1(i), p_2(i));
+        p2(i) = std::max(p_1(i), p_2(i));
+      }
+
+    std::vector<Point<spacedim> > vertices (GeometryInfo<dim>::vertices_per_cell);
+    switch (dim)
+      {
+      case 1:
+        vertices[0] = p1;
+        vertices[1] = p2;
+        break;
+      case 2:
+        vertices[0] = vertices[1] = p1;
+        vertices[2] = vertices[3] = p2;
+
+        vertices[1](0) = p2(0);
+        vertices[2](0) = p1(0);
+        break;
+      case 3:
+        vertices[0] = vertices[1] = vertices[2] = vertices[3] = p1;
+        vertices[4] = vertices[5] = vertices[6] = vertices[7] = p2;
+
+        vertices[1](0) = p2(0);
+        vertices[2](1) = p2(1);
+        vertices[3](0) = p2(0);
+        vertices[3](1) = p2(1);
+
+        vertices[4](0) = p1(0);
+        vertices[4](1) = p1(1);
+        vertices[5](1) = p1(1);
+        vertices[6](0) = p1(0);
+
+        break;
+      default:
+        Assert (false, ExcNotImplemented ());
+      }
+
+    // Prepare cell data
+    std::vector<CellData<dim> > cells (1);
+    for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+      cells[0].vertices[i] = i;
+    cells[0].material_id = 0;
+
+    tria.create_triangulation (vertices, cells, SubCellData());
+
+    // Assign boundary indicators
+    if (colorize)
+      colorize_hyper_rectangle (tria);
+  }
+
+
+  template <int dim, int spacedim>
+  void hyper_cube (Triangulation<dim,spacedim> &tria,
+                   const double                 left,
+                   const double                 right,
+                   const bool                   colorize)
+  {
+    Assert (left < right,
+            ExcMessage ("Invalid left-to-right bounds of hypercube"));
+
+    Point<dim> p1, p2;
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        p1(i) = left;
+        p2(i) = right;
+      }
+    hyper_rectangle (tria, p1, p2, colorize);
+  }
+
+  template <int dim>
+  void
+  simplex(Triangulation<dim> &tria,
+          const std::vector<Point<dim> > &vertices)
+  {
+    AssertDimension(vertices.size(), dim+1);
+    Assert(dim>1, ExcNotImplemented());
+    Assert(dim<4, ExcNotImplemented());
+
+#ifdef DEBUG
+    Tensor<2,dim> vector_matrix;
+    for (unsigned int d=0; d<dim; ++d)
+      for (unsigned int c=1; c<=dim; ++c)
+        vector_matrix[c-1][d] = vertices[c](d) - vertices[0](d);
+    Assert(determinant(vector_matrix) > 0., ExcMessage("Vertices of simplex must form a right handed system"));
+#endif
+
+    // Set up the vertices by first copying into points.
+    std::vector<Point<dim> > points = vertices;
+    Point<dim> center;
+    // Compute the edge midpoints and add up everything to compute the
+    // center point.
+    for (unsigned int i=0; i<=dim; ++i)
+      {
+        points.push_back(0.5*(points[i]+points[(i+1)%(dim+1)]));
+        center += points[i];
+      }
+    if (dim>2)
+      {
+        // In 3D, we have some more edges to deal with
+        for (unsigned int i=1; i<dim; ++i)
+          points.push_back(0.5*(points[i-1]+points[i+1]));
+        // And we need face midpoints
+        for (unsigned int i=0; i<=dim; ++i)
+          points.push_back(1./3.*
+                           (points[i]+
+                            points[(i+1)%(dim+1)]+
+                            points[(i+2)%(dim+1)]));
+      }
+    points.push_back((1./(dim+1))*center);
+
+    std::vector<CellData<dim> > cells(dim+1);
+    switch (dim)
+      {
+      case 2:
+        AssertDimension(points.size(), 7);
+        cells[0].vertices[0] = 0;
+        cells[0].vertices[1] = 3;
+        cells[0].vertices[2] = 5;
+        cells[0].vertices[3] = 6;
+        cells[0].material_id = 0;
+
+        cells[1].vertices[0] = 3;
+        cells[1].vertices[1] = 1;
+        cells[1].vertices[2] = 6;
+        cells[1].vertices[3] = 4;
+        cells[1].material_id = 0;
+
+        cells[2].vertices[0] = 5;
+        cells[2].vertices[1] = 6;
+        cells[2].vertices[2] = 2;
+        cells[2].vertices[3] = 4;
+        cells[2].material_id = 0;
+        break;
+      case 3:
+        AssertDimension(points.size(), 15);
+        cells[0].vertices[0] = 0;
+        cells[0].vertices[1] = 4;
+        cells[0].vertices[2] = 8;
+        cells[0].vertices[3] = 10;
+        cells[0].vertices[4] = 7;
+        cells[0].vertices[5] = 13;
+        cells[0].vertices[6] = 12;
+        cells[0].vertices[7] = 14;
+        cells[0].material_id = 0;
+
+        cells[1].vertices[0] = 4;
+        cells[1].vertices[1] = 1;
+        cells[1].vertices[2] = 10;
+        cells[1].vertices[3] = 5;
+        cells[1].vertices[4] = 13;
+        cells[1].vertices[5] = 9;
+        cells[1].vertices[6] = 14;
+        cells[1].vertices[7] = 11;
+        cells[1].material_id = 0;
+
+        cells[2].vertices[0] = 8;
+        cells[2].vertices[1] = 10;
+        cells[2].vertices[2] = 2;
+        cells[2].vertices[3] = 5;
+        cells[2].vertices[4] = 12;
+        cells[2].vertices[5] = 14;
+        cells[2].vertices[6] = 6;
+        cells[2].vertices[7] = 11;
+        cells[2].material_id = 0;
+
+        cells[3].vertices[0] = 7;
+        cells[3].vertices[1] = 13;
+        cells[3].vertices[2] = 12;
+        cells[3].vertices[3] = 14;
+        cells[3].vertices[4] = 3;
+        cells[3].vertices[5] = 9;
+        cells[3].vertices[6] = 6;
+        cells[3].vertices[7] = 11;
+        cells[3].material_id = 0;
+        break;
+      default:
+        Assert(false, ExcNotImplemented());
+      }
+    tria.create_triangulation (points, cells, SubCellData());
+  }
+
+
+  void
+  moebius (Triangulation<3>  &tria,
+           const unsigned int      n_cells,
+           const unsigned int   n_rotations,
+           const double         R,
+           const double         r)
+  {
+    const unsigned int dim=3;
+    Assert (n_cells>4, ExcMessage("More than 4 cells are needed to create a moebius grid."));
+    Assert (r>0 && R>0, ExcMessage("Outer and inner radius must be positive."));
+    Assert (R>r, ExcMessage("Outer radius must be greater than inner radius."));
+
+
+    std::vector<Point<dim> > vertices (4*n_cells);
+    double beta_step=n_rotations*numbers::PI/2.0/n_cells;
+    double alpha_step=2.0*numbers::PI/n_cells;
+
+    for (unsigned int i=0; i<n_cells; ++i)
+      for (unsigned int j=0; j<4; ++j)
+        {
+          vertices[4*i+j][0]=R*std::cos(i*alpha_step)+r*std::cos(i*beta_step+j*numbers::PI/2.0)*std::cos(i*alpha_step);
+          vertices[4*i+j][1]=R*std::sin(i*alpha_step)+r*std::cos(i*beta_step+j*numbers::PI/2.0)*std::sin(i*alpha_step);
+          vertices[4*i+j][2]=r*std::sin(i*beta_step+j*numbers::PI/2.0);
+        }
+
+    unsigned int offset=0;
+
+    std::vector<CellData<dim> > cells (n_cells);
+    for (unsigned int i=0; i<n_cells; ++i)
+      {
+        for (unsigned int j=0; j<2; ++j)
+          {
+            cells[i].vertices[0+4*j]=offset+0+4*j;
+            cells[i].vertices[1+4*j]=offset+3+4*j;
+            cells[i].vertices[2+4*j]=offset+2+4*j;
+            cells[i].vertices[3+4*j]=offset+1+4*j;
+          }
+        offset+=4;
+        cells[i].material_id=0;
+      }
+
+    // now correct the last four vertices
+    cells[n_cells-1].vertices[4]=(0+n_rotations)%4;
+    cells[n_cells-1].vertices[5]=(3+n_rotations)%4;
+    cells[n_cells-1].vertices[6]=(2+n_rotations)%4;
+    cells[n_cells-1].vertices[7]=(1+n_rotations)%4;
+
+    GridReordering<dim>::invert_all_cells_of_negative_grid(vertices,cells);
+    tria.create_triangulation_compatibility (vertices, cells, SubCellData());
+  }
+
+
+
+  void
+  torus (Triangulation<2,3>  &tria,
+         const double         R,
+         const double         r)
+  {
+    Assert (R>r, ExcMessage("Outer radius must be greater than inner radius."));
+
+    const unsigned int dim=2;
+    const unsigned int spacedim=3;
+    std::vector<Point<spacedim> > vertices (16);
+
+    vertices[0]=Point<spacedim>(R-r,0,0);
+    vertices[1]=Point<spacedim>(R,-r,0);
+    vertices[2]=Point<spacedim>(R+r,0,0);
+    vertices[3]=Point<spacedim>(R, r,0);
+    vertices[4]=Point<spacedim>(0,0,R-r);
+    vertices[5]=Point<spacedim>(0,-r,R);
+    vertices[6]=Point<spacedim>(0,0,R+r);
+    vertices[7]=Point<spacedim>(0,r,R);
+    vertices[8]=Point<spacedim>(-(R-r),0,0);
+    vertices[9]=Point<spacedim>(-R,-r,0);
+    vertices[10]=Point<spacedim>(-(R+r),0,0);
+    vertices[11]=Point<spacedim>(-R, r,0);
+    vertices[12]=Point<spacedim>(0,0,-(R-r));
+    vertices[13]=Point<spacedim>(0,-r,-R);
+    vertices[14]=Point<spacedim>(0,0,-(R+r));
+    vertices[15]=Point<spacedim>(0,r,-R);
+
+    std::vector<CellData<dim> > cells (16);
+    //Right Hand Orientation
+    cells[0].vertices[0] =  0;
+    cells[0].vertices[1] =  4;
+    cells[0].vertices[2] =  7;
+    cells[0].vertices[3] =  3;
+    cells[0].material_id = 0;
+
+    cells[1].vertices[0] =  1;
+    cells[1].vertices[1] =  5;
+    cells[1].vertices[2] =  4;
+    cells[1].vertices[3] =  0;
+    cells[1].material_id = 0;
+
+    cells[2].vertices[0] =  2;
+    cells[2].vertices[1] =  6;
+    cells[2].vertices[2] =  5;
+    cells[2].vertices[3] =  1;
+    cells[2].material_id = 0;
+
+    cells[3].vertices[0] =  3;
+    cells[3].vertices[1] =  7;
+    cells[3].vertices[2] =  6;
+    cells[3].vertices[3] =  2;
+    cells[3].material_id = 0;
+
+    cells[4].vertices[0] =  4;
+    cells[4].vertices[1] =  8;
+    cells[4].vertices[2] =  11;
+    cells[4].vertices[3] =  7;
+    cells[4].material_id = 0;
+
+    cells[5].vertices[0] =  5;
+    cells[5].vertices[1] =  9;
+    cells[5].vertices[2] =  8;
+    cells[5].vertices[3] =  4;
+    cells[5].material_id = 0;
+
+    cells[6].vertices[0] =  6;
+    cells[6].vertices[1] =  10;
+    cells[6].vertices[2] =  9;
+    cells[6].vertices[3] =  5;
+    cells[6].material_id = 0;
+
+    cells[7].vertices[0] =  7;
+    cells[7].vertices[1] =  11;
+    cells[7].vertices[2] =  10;
+    cells[7].vertices[3] =  6;
+    cells[7].material_id = 0;
+
+    cells[8].vertices[0] =  8;
+    cells[8].vertices[1] =  12;
+    cells[8].vertices[2] =  15;
+    cells[8].vertices[3] =  11;
+    cells[8].material_id = 0;
+
+    cells[9].vertices[0] =  9;
+    cells[9].vertices[1] =  13;
+    cells[9].vertices[2] =  12;
+    cells[9].vertices[3] =  8;
+    cells[9].material_id = 0;
+
+    cells[10].vertices[0] =  10;
+    cells[10].vertices[1] =  14;
+    cells[10].vertices[2] =  13;
+    cells[10].vertices[3] =  9;
+    cells[10].material_id = 0;
+
+    cells[11].vertices[0] =  11;
+    cells[11].vertices[1] =  15;
+    cells[11].vertices[2] =  14;
+    cells[11].vertices[3] =  10;
+    cells[11].material_id = 0;
+
+    cells[12].vertices[0] =  12;
+    cells[12].vertices[1] =  0;
+    cells[12].vertices[2] =  3;
+    cells[12].vertices[3] =  15;
+    cells[12].material_id = 0;
+
+    cells[13].vertices[0] =  13;
+    cells[13].vertices[1] =  1;
+    cells[13].vertices[2] =  0;
+    cells[13].vertices[3] =  12;
+    cells[13].material_id = 0;
+
+    cells[14].vertices[0] =  14;
+    cells[14].vertices[1] =  2;
+    cells[14].vertices[2] =  1;
+    cells[14].vertices[3] =  13;
+    cells[14].material_id = 0;
+
+    cells[15].vertices[0] =  15;
+    cells[15].vertices[1] =  3;
+    cells[15].vertices[2] =  2;
+    cells[15].vertices[3] =  14;
+    cells[15].material_id = 0;
+
+    // Must call this to be able to create a
+    // correct triangulation in dealii, read
+    // GridReordering<> doc
+    GridReordering<dim,spacedim>::reorder_cells (cells);
+    tria.create_triangulation_compatibility (vertices, cells, SubCellData());
+  }
+
+
+
+  template<>
+  void
+  parallelogram (Triangulation<3> &,
+                 const Point<3>   ( &/*corners*/)[3],
+                 const bool         /*colorize*/)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+  template<>
+  void
+  parallelogram (Triangulation<1> &,
+                 const Point<1>   ( &/*corners*/)[1],
+                 const bool         /*colorize*/)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+// Implementation for 2D only
+  template<>
+  void
+  parallelogram (Triangulation<2>  &tria,
+                 const Point<2> (&corners)[2],
+                 const bool         colorize)
+  {
+    Point<2> origin;
+    std_cxx11::array<Tensor<1,2>,2> edges;
+    edges[0] = corners[0];
+    edges[1] = corners[1];
+    std::vector<unsigned int> subdivisions;
+    subdivided_parallelepiped<2,2>(tria, origin, edges, subdivisions, colorize);
+  }
+
+
+
+  template<int dim>
+  void
+  parallelepiped (Triangulation<dim>  &tria,
+                  const Point<dim>   (&corners) [dim],
+                  const bool           colorize)
+  {
+    unsigned int n_subdivisions [dim];
+    for (unsigned int i=0; i<dim; ++i)
+      n_subdivisions[i] = 1;
+
+    // and call the function below
+    subdivided_parallelepiped (tria, n_subdivisions,
+                               corners,
+                               colorize);
+  }
+
+  template<int dim>
+  void
+  subdivided_parallelepiped (Triangulation<dim>  &tria,
+                             const unsigned int      n_subdivisions,
+                             const Point<dim>   (&corners) [dim],
+                             const bool           colorize)
+  {
+    // Equalise number of subdivisions in each dim-direction, their
+    // validity will be checked later
+    unsigned int n_subdivisions_ [dim];
+    for (unsigned int i=0; i<dim; ++i)
+      n_subdivisions_[i] = n_subdivisions;
+
+    // and call the function below
+    subdivided_parallelepiped (tria, n_subdivisions_,
+                               corners,
+                               colorize);
+  }
+
+  template<int dim>
+  void
+  subdivided_parallelepiped (Triangulation<dim>  &tria,
+#ifndef _MSC_VER
+                             const unsigned int(&n_subdivisions)[dim],
+#else
+                             const unsigned int *n_subdivisions,
+#endif
+                             const Point<dim>   (&corners) [dim],
+                             const bool           colorize)
+  {
+    Point<dim> origin;
+    std::vector<unsigned int> subdivisions;
+    std_cxx11::array<Tensor<1,dim>,dim> edges;
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        subdivisions.push_back(n_subdivisions[i]);
+        edges[i] = corners[i];
+      }
+
+    subdivided_parallelepiped<dim,dim> (tria, origin, edges, subdivisions, colorize);
+  }
+
+  // Parallelepiped implementation in 1d, 2d, and 3d. @note The
+  // implementation in 1d is similar to hyper_rectangle(), and in 2d is
+  // similar to parallelogram().
+  //
+  // The GridReordering::reorder_grid is made use of towards the end of
+  // this function. Thus the triangulation is explicitly constructed for
+  // all dim here since it is slightly different in that respect
+  // (cf. hyper_rectangle(), parallelogram()).
+  template <int dim, int spacedim>
+  void
+  subdivided_parallelepiped (Triangulation<dim, spacedim>  &tria,
+                             const Point<spacedim> &origin,
+                             const std_cxx11::array<Tensor<1,spacedim>,dim> &edges,
+                             const std::vector<unsigned int> &subdivisions,
+                             const bool colorize)
+  {
+    if (subdivisions.size()==0)
+      {
+        std::vector<unsigned int> new_subdivisions(dim, 1);
+        subdivided_parallelepiped<dim,spacedim>(tria, origin, edges, new_subdivisions, colorize);
+        return;
+      }
+
+    Assert(subdivisions.size()==dim, ExcMessage(""));
+
+    // check subdivisions
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        Assert (subdivisions[i]>0, ExcInvalidRepetitions(subdivisions[i]));
+        Assert (edges[i].norm()>0, ExcMessage("Edges in subdivided_parallelepiped() must not be degenerate."));
+      }
+
+    // Check corners do not overlap (unique)
+    for (unsigned int i=0; i<dim; ++i)
+      for (unsigned int j=i+1; j<dim; ++j)
+        Assert ((edges[i]!=edges[j]),
+                ExcMessage ("Degenerate edges of subdivided_parallelepiped encountered."));
+
+    // Create a list of points
+    std::vector<Point<spacedim> > points;
+
+    switch (dim)
+      {
+      case 1:
+        for (unsigned int x=0; x<=subdivisions[0]; ++x)
+          points.push_back (origin + edges[0]/subdivisions[0]*x);
+        break;
+
+      case 2:
+        for (unsigned int y=0; y<=subdivisions[1]; ++y)
+          for (unsigned int x=0; x<=subdivisions[0]; ++x)
+            points.push_back (origin
+                              + edges[0]/subdivisions[0]*x
+                              + edges[1]/subdivisions[1]*y);
+        break;
+
+      case 3:
+        for (unsigned int z=0; z<=subdivisions[2]; ++z)
+          for (unsigned int y=0; y<=subdivisions[1]; ++y)
+            for (unsigned int x=0; x<=subdivisions[0]; ++x)
+              points.push_back (
+                origin
+                + edges[0]/subdivisions[0]*x
+                + edges[1]/subdivisions[1]*y
+                + edges[2]/subdivisions[2]*z);
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // Prepare cell data
+    unsigned int n_cells = 1;
+    for (unsigned int i=0; i<dim; ++i)
+      n_cells *= subdivisions[i];
+    std::vector<CellData<dim> > cells (n_cells);
+
+    // Create fixed ordering of
+    switch (dim)
+      {
+      case 1:
+        for (unsigned int x=0; x<subdivisions[0]; ++x)
+          {
+            cells[x].vertices[0] = x;
+            cells[x].vertices[1] = x+1;
+
+            // wipe material id
+            cells[x].material_id = 0;
+          }
+        break;
+
+      case 2:
+      {
+        // Shorthand
+        const unsigned int n_dy = subdivisions[1];
+        const unsigned int n_dx = subdivisions[0];
+
+        for (unsigned int y=0; y<n_dy; ++y)
+          for (unsigned int x=0; x<n_dx; ++x)
+            {
+              const unsigned int c = y*n_dx         + x;
+              cells[c].vertices[0] = y*(n_dx+1)     + x;
+              cells[c].vertices[1] = y*(n_dx+1)     + x+1;
+              cells[c].vertices[2] = (y+1)*(n_dx+1) + x;
+              cells[c].vertices[3] = (y+1)*(n_dx+1) + x+1;
+
+              // wipe material id
+              cells[c].material_id = 0;
+            }
+      }
+      break;
+
+      case 3:
+      {
+        // Shorthand
+        const unsigned int n_dz = subdivisions[2];
+        const unsigned int n_dy = subdivisions[1];
+        const unsigned int n_dx = subdivisions[0];
+
+        for (unsigned int z=0; z<n_dz; ++z)
+          for (unsigned int y=0; y<n_dy; ++y)
+            for (unsigned int x=0; x<n_dx; ++x)
+              {
+                const unsigned int c = z*n_dy*n_dx             + y*n_dx         + x;
+
+                cells[c].vertices[0] = z*(n_dy+1)*(n_dx+1)     + y*(n_dx+1)     + x;
+                cells[c].vertices[1] = z*(n_dy+1)*(n_dx+1)     + y*(n_dx+1)     + x+1;
+                cells[c].vertices[2] = z*(n_dy+1)*(n_dx+1)     + (y+1)*(n_dx+1) + x;
+                cells[c].vertices[3] = z*(n_dy+1)*(n_dx+1)     + (y+1)*(n_dx+1) + x+1;
+                cells[c].vertices[4] = (z+1)*(n_dy+1)*(n_dx+1) + y*(n_dx+1)     + x;
+                cells[c].vertices[5] = (z+1)*(n_dy+1)*(n_dx+1) + y*(n_dx+1)     + x+1;
+                cells[c].vertices[6] = (z+1)*(n_dy+1)*(n_dx+1) + (y+1)*(n_dx+1) + x;
+                cells[c].vertices[7] = (z+1)*(n_dy+1)*(n_dx+1) + (y+1)*(n_dx+1) + x+1;
+
+                // wipe material id
+                cells[c].material_id = 0;
+              }
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // Create triangulation
+    // reorder the cells to ensure that they satisfy the convention for
+    // edge and face directions
+    GridReordering<dim>::reorder_cells(cells, true);
+    tria.create_triangulation (points, cells, SubCellData());
+
+    // Finally assign boundary indicators according to hyper_rectangle
+    if (colorize)
+      {
+        typename Triangulation<dim>::active_cell_iterator
+        cell = tria.begin_active(),
+        endc = tria.end();
+        for (; cell!=endc; ++cell)
+          {
+            for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+              {
+                if (cell->face(face)->at_boundary())
+                  cell->face(face)->set_boundary_id(face);
+              }
+          }
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  subdivided_hyper_cube (Triangulation<dim,spacedim> &tria,
+                         const unsigned int  repetitions,
+                         const double        left,
+                         const double        right)
+  {
+    Assert (repetitions >= 1, ExcInvalidRepetitions(repetitions));
+    Assert (left < right,
+            ExcMessage ("Invalid left-to-right bounds of hypercube"));
+
+    Point<dim> p0, p1;
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        p0[i] = left;
+        p1[i] = right;
+      }
+
+    std::vector<unsigned int> reps(dim, repetitions);
+    subdivided_hyper_rectangle(tria, reps, p0, p1);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  subdivided_hyper_rectangle (
+    Triangulation<dim, spacedim>              &tria,
+    const std::vector<unsigned int> &repetitions,
+    const Point<dim>                &p_1,
+    const Point<dim>                &p_2,
+    const bool                       colorize)
+  {
+    Assert(repetitions.size() == dim,
+           ExcInvalidRepetitionsDimension(dim));
+
+    // First, extend dimensions from dim to spacedim and
+    // normalize such that p1 is lower in all coordinate
+    // directions. Additional entries will be 0.
+    Point<spacedim> p1, p2;
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        p1(i) = std::min(p_1(i), p_2(i));
+        p2(i) = std::max(p_1(i), p_2(i));
+      }
+
+    // calculate deltas and validate input
+    std::vector<Point<spacedim> > delta(dim);
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        Assert (repetitions[i] >= 1, ExcInvalidRepetitions(repetitions[i]));
+
+        delta[i][i] = (p2[i]-p1[i])/repetitions[i];
+        Assert(delta[i][i]>0.0,
+               ExcMessage("The first dim entries of coordinates of p1 and p2 need to be different."));
+      }
+
+    // then generate the points
+    std::vector<Point<spacedim> > points;
+    switch (dim)
+      {
+      case 1:
+        for (unsigned int x=0; x<=repetitions[0]; ++x)
+          points.push_back (p1+(double)x*delta[0]);
+        break;
+
+      case 2:
+        for (unsigned int y=0; y<=repetitions[1]; ++y)
+          for (unsigned int x=0; x<=repetitions[0]; ++x)
+            points.push_back (p1+(double)x*delta[0]
+                              +(double)y*delta[1]);
+        break;
+
+      case 3:
+        for (unsigned int z=0; z<=repetitions[2]; ++z)
+          for (unsigned int y=0; y<=repetitions[1]; ++y)
+            for (unsigned int x=0; x<=repetitions[0]; ++x)
+              points.push_back (p1+(double)x*delta[0] +
+                                (double)y*delta[1] + (double)z*delta[2]);
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // next create the cells
+    std::vector<CellData<dim> > cells;
+    switch (dim)
+      {
+      case 1:
+      {
+        cells.resize (repetitions[0]);
+        for (unsigned int x=0; x<repetitions[0]; ++x)
+          {
+            cells[x].vertices[0] = x;
+            cells[x].vertices[1] = x+1;
+            cells[x].material_id = 0;
+          }
+        break;
+      }
+
+      case 2:
+      {
+        cells.resize (repetitions[1]*repetitions[0]);
+        for (unsigned int y=0; y<repetitions[1]; ++y)
+          for (unsigned int x=0; x<repetitions[0]; ++x)
+            {
+              const unsigned int c = x+y*repetitions[0];
+              cells[c].vertices[0] = y*(repetitions[0]+1)+x;
+              cells[c].vertices[1] = y*(repetitions[0]+1)+x+1;
+              cells[c].vertices[2] = (y+1)*(repetitions[0]+1)+x;
+              cells[c].vertices[3] = (y+1)*(repetitions[0]+1)+x+1;
+              cells[c].material_id = 0;
+            }
+        break;
+      }
+
+      case 3:
+      {
+        const unsigned int n_x  = (repetitions[0]+1);
+        const unsigned int n_xy = (repetitions[0]+1)*(repetitions[1]+1);
+
+        cells.resize (repetitions[2]*repetitions[1]*repetitions[0]);
+        for (unsigned int z=0; z<repetitions[2]; ++z)
+          for (unsigned int y=0; y<repetitions[1]; ++y)
+            for (unsigned int x=0; x<repetitions[0]; ++x)
+              {
+                const unsigned int c = x+y*repetitions[0] +
+                                       z*repetitions[0]*repetitions[1];
+                cells[c].vertices[0] = z*n_xy + y*n_x + x;
+                cells[c].vertices[1] = z*n_xy + y*n_x + x+1;
+                cells[c].vertices[2] = z*n_xy + (y+1)*n_x + x;
+                cells[c].vertices[3] = z*n_xy + (y+1)*n_x + x+1;
+                cells[c].vertices[4] = (z+1)*n_xy + y*n_x + x;
+                cells[c].vertices[5] = (z+1)*n_xy + y*n_x + x+1;
+                cells[c].vertices[6] = (z+1)*n_xy + (y+1)*n_x + x;
+                cells[c].vertices[7] = (z+1)*n_xy + (y+1)*n_x + x+1;
+                cells[c].material_id = 0;
+              }
+        break;
+
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    tria.create_triangulation (points, cells, SubCellData());
+
+    if (colorize)
+      {
+        // to colorize, run through all
+        // faces of all cells and set
+        // boundary indicator to the
+        // correct value if it was 0.
+
+        // use a large epsilon to
+        // compare numbers to avoid
+        // roundoff problems.
+        double epsilon = 10;
+        for (unsigned int i=0; i<dim; ++i)
+          epsilon = std::min(epsilon, 0.01*delta[i][i]);
+        Assert (epsilon > 0,
+                ExcMessage ("The distance between corner points must be positive."))
+
+        // actual code is external since
+        // 1-D is different from 2/3D.
+        colorize_subdivided_hyper_rectangle (tria, p1, p2, epsilon);
+      }
+  }
+
+
+
+  template <int dim>
+  void
+  subdivided_hyper_rectangle(
+    Triangulation<dim>              &tria,
+    const std::vector<std::vector<double> > &step_sz,
+    const Point<dim>                &p_1,
+    const Point<dim>                &p_2,
+    const bool                       colorize)
+  {
+    Assert(step_sz.size() == dim,
+           ExcInvalidRepetitionsDimension(dim));
+
+    // First, normalize input such that
+    // p1 is lower in all coordinate
+    // directions and check the consistency of
+    // step sizes, i.e. that they all
+    // add up to the sizes specified by
+    // p_1 and p_2
+    Point<dim> p1(p_1);
+    Point<dim> p2(p_2);
+    std::vector< std::vector<double> > step_sizes(step_sz);
+
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        if (p1(i) > p2(i))
+          {
+            std::swap (p1(i), p2(i));
+            std::reverse (step_sizes[i].begin(), step_sizes[i].end());
+          }
+
+        double x = 0;
+        for (unsigned int j=0; j<step_sizes.at(i).size(); j++)
+          x += step_sizes[i][j];
+        Assert(std::fabs(x - (p2(i)-p1(i))) <= 1e-12*std::fabs(x),
+               ExcInvalidRepetitions (i) );
+      }
+
+
+    // then generate the necessary
+    // points
+    std::vector<Point<dim> > points;
+    switch (dim)
+      {
+      case 1:
+      {
+        double x=0;
+        for (unsigned int i=0; ; ++i)
+          {
+            points.push_back (Point<dim> (p1[0]+x));
+
+            // form partial sums. in
+            // the last run through
+            // avoid accessing
+            // non-existent values
+            // and exit early instead
+            if (i == step_sizes[0].size())
+              break;
+
+            x += step_sizes[0][i];
+          }
+        break;
+      }
+
+      case 2:
+      {
+        double y=0;
+        for (unsigned int j=0; ; ++j)
+          {
+            double x=0;
+            for (unsigned int i=0; ; ++i)
+              {
+                points.push_back (Point<dim> (p1[0]+x,
+                                              p1[1]+y));
+                if (i == step_sizes[0].size())
+                  break;
+
+                x += step_sizes[0][i];
+              }
+
+            if (j == step_sizes[1].size())
+              break;
+
+            y += step_sizes[1][j];
+          }
+        break;
+
+      }
+      case 3:
+      {
+        double z=0;
+        for (unsigned int k=0; ; ++k)
+          {
+            double y=0;
+            for (unsigned int j=0; ; ++j)
+              {
+                double x=0;
+                for (unsigned int i=0; ; ++i)
+                  {
+                    points.push_back (Point<dim> (p1[0]+x,
+                                                  p1[1]+y,
+                                                  p1[2]+z));
+                    if (i == step_sizes[0].size())
+                      break;
+
+                    x += step_sizes[0][i];
+                  }
+
+                if (j == step_sizes[1].size())
+                  break;
+
+                y += step_sizes[1][j];
+              }
+
+            if (k == step_sizes[2].size())
+              break;
+
+            z += step_sizes[2][k];
+          }
+        break;
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // next create the cells
+    // Prepare cell data
+    std::vector<CellData<dim> > cells;
+    switch (dim)
+      {
+      case 1:
+      {
+        cells.resize (step_sizes[0].size());
+        for (unsigned int x=0; x<step_sizes[0].size(); ++x)
+          {
+            cells[x].vertices[0] = x;
+            cells[x].vertices[1] = x+1;
+            cells[x].material_id = 0;
+          }
+        break;
+      }
+
+      case 2:
+      {
+        cells.resize (step_sizes[1].size()*step_sizes[0].size());
+        for (unsigned int y=0; y<step_sizes[1].size(); ++y)
+          for (unsigned int x=0; x<step_sizes[0].size(); ++x)
+            {
+              const unsigned int c = x+y*step_sizes[0].size();
+              cells[c].vertices[0] = y*(step_sizes[0].size()+1)+x;
+              cells[c].vertices[1] = y*(step_sizes[0].size()+1)+x+1;
+              cells[c].vertices[2] = (y+1)*(step_sizes[0].size()+1)+x;
+              cells[c].vertices[3] = (y+1)*(step_sizes[0].size()+1)+x+1;
+              cells[c].material_id = 0;
+            }
+        break;
+      }
+
+      case 3:
+      {
+        const unsigned int n_x  = (step_sizes[0].size()+1);
+        const unsigned int n_xy = (step_sizes[0].size()+1)*(step_sizes[1].size()+1);
+
+        cells.resize (step_sizes[2].size()*step_sizes[1].size()*step_sizes[0].size());
+        for (unsigned int z=0; z<step_sizes[2].size(); ++z)
+          for (unsigned int y=0; y<step_sizes[1].size(); ++y)
+            for (unsigned int x=0; x<step_sizes[0].size(); ++x)
+              {
+                const unsigned int    c = x+y*step_sizes[0].size() +
+                                          z*step_sizes[0].size()*step_sizes[1].size();
+                cells[c].vertices[0] = z*n_xy + y*n_x + x;
+                cells[c].vertices[1] = z*n_xy + y*n_x + x+1;
+                cells[c].vertices[2] = z*n_xy + (y+1)*n_x + x;
+                cells[c].vertices[3] = z*n_xy + (y+1)*n_x + x+1;
+                cells[c].vertices[4] = (z+1)*n_xy + y*n_x + x;
+                cells[c].vertices[5] = (z+1)*n_xy + y*n_x + x+1;
+                cells[c].vertices[6] = (z+1)*n_xy + (y+1)*n_x + x;
+                cells[c].vertices[7] = (z+1)*n_xy + (y+1)*n_x + x+1;
+                cells[c].material_id = 0;
+              }
+        break;
+
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    tria.create_triangulation (points, cells, SubCellData());
+
+    if (colorize)
+      {
+        // to colorize, run through all
+        // faces of all cells and set
+        // boundary indicator to the
+        // correct value if it was 0.
+
+        // use a large epsilon to
+        // compare numbers to avoid
+        // roundoff problems.
+        double min_size = *std::min_element (step_sizes[0].begin(),
+                                             step_sizes[0].end());
+        for (unsigned int i=1; i<dim; ++i)
+          min_size = std::min (min_size,
+                               *std::min_element (step_sizes[i].begin(),
+                                                  step_sizes[i].end()));
+        const double epsilon = 0.01 * min_size;
+
+        // actual code is external since
+        // 1-D is different from 2/3D.
+        colorize_subdivided_hyper_rectangle (tria, p1, p2, epsilon);
+      }
+  }
+
+
+
+  template <>
+  void
+  subdivided_hyper_rectangle (
+    Triangulation<1>                             &tria,
+    const std::vector< std::vector<double> >     &spacing,
+    const Point<1>                               &p,
+    const Table<1,types::material_id>                 &material_id,
+    const bool                                    colorize)
+  {
+    Assert(spacing.size() == 1,
+           ExcInvalidRepetitionsDimension(1));
+
+    const unsigned int n_cells = material_id.size(0);
+
+    Assert(spacing[0].size() == n_cells,
+           ExcInvalidRepetitionsDimension(1));
+
+    double delta = std::numeric_limits<double>::max();
+    for (unsigned int i=0; i<n_cells; i++)
+      {
+        Assert (spacing[0][i] >= 0, ExcInvalidRepetitions(-1));
+        delta = std::min (delta, spacing[0][i]);
+      }
+
+    // generate the necessary points
+    std::vector<Point<1> > points;
+    double ax = p[0];
+    for (unsigned int x=0; x<=n_cells; ++x)
+      {
+        points.push_back (Point<1> (ax));
+        if (x<n_cells)
+          ax += spacing[0][x];
+      }
+    // create the cells
+    unsigned int n_val_cells = 0;
+    for (unsigned int i=0; i<n_cells; i++)
+      if (material_id[i]!=numbers::invalid_material_id) n_val_cells++;
+
+    std::vector<CellData<1> > cells(n_val_cells);
+    unsigned int id = 0;
+    for (unsigned int x=0; x<n_cells; ++x)
+      if (material_id[x] != numbers::invalid_material_id)
+        {
+          cells[id].vertices[0] = x;
+          cells[id].vertices[1] = x+1;
+          cells[id].material_id = material_id[x];
+          id++;
+        }
+    // create triangulation
+    SubCellData t;
+    GridTools::delete_unused_vertices (points, cells, t);
+
+    tria.create_triangulation (points, cells, t);
+
+    // set boundary indicator
+    if (colorize)
+      Assert (false, ExcNotImplemented());
+  }
+
+
+  template <>
+  void
+  subdivided_hyper_rectangle (
+    Triangulation<2>                         &tria,
+    const std::vector< std::vector<double> >     &spacing,
+    const Point<2>                               &p,
+    const Table<2,types::material_id>          &material_id,
+    const bool                                    colorize)
+  {
+    Assert(spacing.size() == 2,
+           ExcInvalidRepetitionsDimension(2));
+
+    std::vector<unsigned int> repetitions(2);
+    unsigned int n_cells = 1;
+    double delta = std::numeric_limits<double>::max();
+    for (unsigned int i=0; i<2; i++)
+      {
+        repetitions[i] = spacing[i].size();
+        n_cells *= repetitions[i];
+        for (unsigned int j=0; j<repetitions[i]; j++)
+          {
+            Assert (spacing[i][j] >= 0, ExcInvalidRepetitions(-1));
+            delta = std::min (delta, spacing[i][j]);
+          }
+        Assert(material_id.size(i) == repetitions[i],
+               ExcInvalidRepetitionsDimension(i));
+      }
+
+    // generate the necessary points
+    std::vector<Point<2> > points;
+    double ay = p[1];
+    for (unsigned int y=0; y<=repetitions[1]; ++y)
+      {
+        double ax = p[0];
+        for (unsigned int x=0; x<=repetitions[0]; ++x)
+          {
+            points.push_back (Point<2> (ax,ay));
+            if (x<repetitions[0])
+              ax += spacing[0][x];
+          }
+        if (y<repetitions[1])
+          ay += spacing[1][y];
+      }
+
+    // create the cells
+    unsigned int n_val_cells = 0;
+    for (unsigned int i=0; i<material_id.size(0); i++)
+      for (unsigned int j=0; j<material_id.size(1); j++)
+        if (material_id[i][j] != numbers::invalid_material_id)
+          n_val_cells++;
+
+    std::vector<CellData<2> > cells(n_val_cells);
+    unsigned int id = 0;
+    for (unsigned int y=0; y<repetitions[1]; ++y)
+      for (unsigned int x=0; x<repetitions[0]; ++x)
+        if (material_id[x][y]!=numbers::invalid_material_id)
+          {
+            cells[id].vertices[0] = y*(repetitions[0]+1)+x;
+            cells[id].vertices[1] = y*(repetitions[0]+1)+x+1;
+            cells[id].vertices[2] = (y+1)*(repetitions[0]+1)+x;
+            cells[id].vertices[3] = (y+1)*(repetitions[0]+1)+x+1;
+            cells[id].material_id = material_id[x][y];
+            id++;
+          }
+
+    // create triangulation
+    SubCellData t;
+    GridTools::delete_unused_vertices (points, cells, t);
+
+    tria.create_triangulation (points, cells, t);
+
+    // set boundary indicator
+    if (colorize)
+      {
+        double eps = 0.01 * delta;
+        Triangulation<2>::cell_iterator cell = tria.begin(),
+                                        endc = tria.end();
+        for (; cell !=endc; ++cell)
+          {
+            Point<2> cell_center = cell->center();
+            for (unsigned int f=0; f<GeometryInfo<2>::faces_per_cell; ++f)
+              if (cell->face(f)->boundary_id() == 0)
+                {
+                  Point<2> face_center = cell->face(f)->center();
+                  for (unsigned int i=0; i<2; ++i)
+                    {
+                      if (face_center[i]<cell_center[i]-eps)
+                        cell->face(f)->set_boundary_id(i*2);
+                      if (face_center[i]>cell_center[i]+eps)
+                        cell->face(f)->set_boundary_id(i*2+1);
+                    }
+                }
+          }
+      }
+  }
+
+
+  template <>
+  void
+  subdivided_hyper_rectangle (
+    Triangulation<3>                           &tria,
+    const std::vector< std::vector<double> >     &spacing,
+    const Point<3>                             &p,
+    const Table<3,types::material_id>               &material_id,
+    const bool                                    colorize)
+  {
+    const unsigned int dim = 3;
+
+    Assert(spacing.size() == dim,
+           ExcInvalidRepetitionsDimension(dim));
+
+    std::vector<unsigned int > repetitions(dim);
+    unsigned int n_cells = 1;
+    double delta = std::numeric_limits<double>::max();
+    for (unsigned int i=0; i<dim; i++)
+      {
+        repetitions[i] = spacing[i].size();
+        n_cells *= repetitions[i];
+        for (unsigned int j=0; j<repetitions[i]; j++)
+          {
+            Assert (spacing[i][j] >= 0, ExcInvalidRepetitions(-1));
+            delta = std::min (delta, spacing[i][j]);
+          }
+        Assert(material_id.size(i) == repetitions[i],
+               ExcInvalidRepetitionsDimension(i));
+      }
+
+    // generate the necessary points
+    std::vector<Point<dim> > points;
+    double az = p[2];
+    for (unsigned int z=0; z<=repetitions[2]; ++z)
+      {
+        double ay = p[1];
+        for (unsigned int y=0; y<=repetitions[1]; ++y)
+          {
+            double ax = p[0];
+            for (unsigned int x=0; x<=repetitions[0]; ++x)
+              {
+                points.push_back (Point<dim> (ax,ay,az));
+                if (x<repetitions[0])
+                  ax += spacing[0][x];
+              }
+            if (y<repetitions[1])
+              ay += spacing[1][y];
+          }
+        if (z<repetitions[2])
+          az += spacing[2][z];
+      }
+
+    // create the cells
+    unsigned int n_val_cells = 0;
+    for (unsigned int i=0; i<material_id.size(0); i++)
+      for (unsigned int j=0; j<material_id.size(1); j++)
+        for (unsigned int k=0; k<material_id.size(2); k++)
+          if (material_id[i][j][k]!=numbers::invalid_material_id)
+            n_val_cells++;
+
+    std::vector<CellData<dim> > cells(n_val_cells);
+    unsigned int id = 0;
+    const unsigned int n_x  = (repetitions[0]+1);
+    const unsigned int n_xy = (repetitions[0]+1)*(repetitions[1]+1);
+    for (unsigned int z=0; z<repetitions[2]; ++z)
+      for (unsigned int y=0; y<repetitions[1]; ++y)
+        for (unsigned int x=0; x<repetitions[0]; ++x)
+          if (material_id[x][y][z]!=numbers::invalid_material_id)
+            {
+              cells[id].vertices[0] = z*n_xy + y*n_x + x;
+              cells[id].vertices[1] = z*n_xy + y*n_x + x+1;
+              cells[id].vertices[2] = z*n_xy + (y+1)*n_x + x;
+              cells[id].vertices[3] = z*n_xy + (y+1)*n_x + x+1;
+              cells[id].vertices[4] = (z+1)*n_xy + y*n_x + x;
+              cells[id].vertices[5] = (z+1)*n_xy + y*n_x + x+1;
+              cells[id].vertices[6] = (z+1)*n_xy + (y+1)*n_x + x;
+              cells[id].vertices[7] = (z+1)*n_xy + (y+1)*n_x + x+1;
+              cells[id].material_id = material_id[x][y][z];
+              id++;
+            }
+
+    // create triangulation
+    SubCellData t;
+    GridTools::delete_unused_vertices (points, cells, t);
+
+    tria.create_triangulation (points, cells, t);
+
+    // set boundary indicator
+    if (colorize && dim>1)
+      {
+        double eps = 0.01 * delta;
+        Triangulation<dim>::cell_iterator cell = tria.begin(),
+                                          endc = tria.end();
+        for (; cell !=endc; ++cell)
+          {
+            Point<dim> cell_center = cell->center();
+            for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+              if (cell->face(f)->boundary_id() == 0)
+                {
+                  Point<dim> face_center = cell->face(f)->center();
+                  for (unsigned int i=0; i<dim; ++i)
+                    {
+                      if (face_center[i]<cell_center[i]-eps)
+                        cell->face(f)->set_boundary_id(i*2);
+                      if (face_center[i]>cell_center[i]+eps)
+                        cell->face(f)->set_boundary_id(i*2+1);
+                    }
+                }
+          }
+      }
+  }
+
+  template <int dim, int spacedim>
+  void
+  cheese (
+    Triangulation<dim, spacedim> &tria,
+    const std::vector<unsigned int> &holes)
+  {
+    AssertDimension(holes.size(), dim);
+    // The corner points of the first cell. If there is a desire at
+    // some point to change the geometry of the cells, they can be
+    // made an argument to the function.
+
+    Point<spacedim> p1;
+    Point<spacedim> p2;
+    for (unsigned int d=0; d<dim; ++d)
+      p2(d) = 1.;
+
+    // then check that all repetitions
+    // are >= 1, and calculate deltas
+    // convert repetitions from double
+    // to int by taking the ceiling.
+    std::vector<Point<spacedim> > delta(dim);
+    unsigned int repetitions[dim];
+    for (unsigned int i=0; i<dim; ++i)
+      {
+        Assert (holes[i] >= 1, ExcMessage("At least one hole needed in each direction"));
+        repetitions[i] = 2*holes[i]+1;
+        delta[i][i] = (p2[i]-p1[i]);
+      }
+
+    // then generate the necessary
+    // points
+    std::vector<Point<spacedim> > points;
+    switch (dim)
+      {
+      case 1:
+        for (unsigned int x=0; x<=repetitions[0]; ++x)
+          points.push_back (p1+(double)x*delta[0]);
+        break;
+
+      case 2:
+        for (unsigned int y=0; y<=repetitions[1]; ++y)
+          for (unsigned int x=0; x<=repetitions[0]; ++x)
+            points.push_back (p1+(double)x*delta[0]
+                              +(double)y*delta[1]);
+        break;
+
+      case 3:
+        for (unsigned int z=0; z<=repetitions[2]; ++z)
+          for (unsigned int y=0; y<=repetitions[1]; ++y)
+            for (unsigned int x=0; x<=repetitions[0]; ++x)
+              points.push_back (p1+(double)x*delta[0] +
+                                (double)y*delta[1] + (double)z*delta[2]);
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // next create the cells
+    // Prepare cell data
+    std::vector<CellData<dim> > cells;
+    switch (dim)
+      {
+      case 2:
+      {
+        cells.resize (repetitions[1]*repetitions[0]-holes[1]*holes[0]);
+        unsigned int c=0;
+        for (unsigned int y=0; y<repetitions[1]; ++y)
+          for (unsigned int x=0; x<repetitions[0]; ++x)
+            {
+              if ((x%2 == 1) && (y%2 ==1)) continue;
+              Assert(c<cells.size(), ExcInternalError());
+              cells[c].vertices[0] = y*(repetitions[0]+1)+x;
+              cells[c].vertices[1] = y*(repetitions[0]+1)+x+1;
+              cells[c].vertices[2] = (y+1)*(repetitions[0]+1)+x;
+              cells[c].vertices[3] = (y+1)*(repetitions[0]+1)+x+1;
+              cells[c].material_id = 0;
+              ++c;
+            }
+        break;
+      }
+
+      case 3:
+      {
+        const unsigned int n_x  = (repetitions[0]+1);
+        const unsigned int n_xy = (repetitions[0]+1)*(repetitions[1]+1);
+
+        cells.resize (repetitions[2]*repetitions[1]*repetitions[0]);
+
+        unsigned int c=0;
+        for (unsigned int z=0; z<repetitions[2]; ++z)
+          for (unsigned int y=0; y<repetitions[1]; ++y)
+            for (unsigned int x=0; x<repetitions[0]; ++x)
+              {
+                Assert(c<cells.size(),ExcInternalError());
+                cells[c].vertices[0] = z*n_xy + y*n_x + x;
+                cells[c].vertices[1] = z*n_xy + y*n_x + x+1;
+                cells[c].vertices[2] = z*n_xy + (y+1)*n_x + x;
+                cells[c].vertices[3] = z*n_xy + (y+1)*n_x + x+1;
+                cells[c].vertices[4] = (z+1)*n_xy + y*n_x + x;
+                cells[c].vertices[5] = (z+1)*n_xy + y*n_x + x+1;
+                cells[c].vertices[6] = (z+1)*n_xy + (y+1)*n_x + x;
+                cells[c].vertices[7] = (z+1)*n_xy + (y+1)*n_x + x+1;
+                cells[c].material_id = 0;
+                ++c;
+              }
+        break;
+
+      }
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    tria.create_triangulation (points, cells, SubCellData());
+  }
+
+  template <int dim, int spacedim>
+  void hyper_cross(Triangulation<dim, spacedim> &tria,
+                   const std::vector<unsigned int> &sizes,
+                   const bool colorize)
+  {
+    AssertDimension(sizes.size(), GeometryInfo<dim>::faces_per_cell);
+    Assert(dim>1, ExcNotImplemented());
+    Assert(dim<4, ExcNotImplemented());
+
+    // If there is a desire at some point to change the geometry of
+    // the cells, this tensor can be made an argument to the function.
+    Tensor<1,dim> dimensions;
+    for (unsigned int d=0; d<dim; ++d)
+      dimensions[d] = 1.;
+
+    std::vector<Point<spacedim> > points;
+    unsigned int n_cells = 1;
+    for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+      n_cells += sizes[i];
+
+    std::vector<CellData<dim> > cells(n_cells);
+    // Vertices of the center cell
+    for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+      {
+        Point<spacedim> p;
+        for (unsigned int d=0; d<dim; ++d)
+          p(d) = 0.5 * dimensions[d] *
+                 GeometryInfo<dim>::unit_normal_orientation[GeometryInfo<dim>::vertex_to_face[i][d]];
+        points.push_back(p);
+        cells[0].vertices[i] = i;
+      }
+    cells[0].material_id = 0;
+
+    // The index of the first cell of the leg.
+    unsigned int cell_index = 1;
+    // The legs of the cross
+    for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+      {
+        const unsigned int oface = GeometryInfo<dim>::opposite_face[face];
+        const unsigned int dir = GeometryInfo<dim>::unit_normal_direction[face];
+
+        // We are moving in the direction of face
+        for (unsigned int j=0; j<sizes[face]; ++j,++cell_index)
+          {
+            const unsigned int last_cell = (j==0) ? 0U : (cell_index-1);
+
+            for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_face; ++v)
+              {
+                const unsigned int cellv = GeometryInfo<dim>::face_to_cell_vertices(face, v);
+                const unsigned int ocellv = GeometryInfo<dim>::face_to_cell_vertices(oface, v);
+                // First the vertices which already exist
+                cells[cell_index].vertices[ocellv] = cells[last_cell].vertices[cellv];
+
+                // Now the new vertices
+                cells[cell_index].vertices[cellv] = points.size();
+
+                Point<spacedim> p = points[cells[cell_index].vertices[ocellv]];
+                p(dir) += GeometryInfo<dim>::unit_normal_orientation[face] * dimensions[dir];
+                points.push_back(p);
+              }
+            cells[cell_index].material_id = (colorize) ? (face+1U) : 0U;
+          }
+      }
+    tria.create_triangulation (points, cells, SubCellData());
+  }
+
+
+  template <>
+  void hyper_cube_slit (Triangulation<1> &,
+                        const double,
+                        const double,
+                        const bool)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void enclosed_hyper_cube (Triangulation<1> &,
+                            const double,
+                            const double,
+                            const double,
+                            const bool)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void hyper_L (Triangulation<1> &,
+                const double,
+                const double)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void hyper_ball (Triangulation<1> &,
+                   const Point<1> &,
+                   const double)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void cylinder (Triangulation<1> &,
+                 const double,
+                 const double)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void truncated_cone (Triangulation<1> &,
+                       const double,
+                       const double,
+                       const double)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void hyper_shell (Triangulation<1> &,
+                    const Point<1> &,
+                    const double,
+                    const double,
+                    const unsigned int ,
+                    const bool)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+  template <>
+  void cylinder_shell (Triangulation<1> &,
+                       const double,
+                       const double,
+                       const double,
+                       const unsigned int ,
+                       const unsigned int )
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+  template <>
+  void
+  half_hyper_ball (Triangulation<1> &,
+                   const Point<1> &,
+                   const double)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+  template <>
+  void
+  half_hyper_shell (Triangulation<1> &,
+                    const Point<1> &,
+                    const double,
+                    const double,
+                    const unsigned int ,
+                    const bool)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+  template <>
+  void quarter_hyper_shell (Triangulation<1> &,
+                            const Point<1> &,
+                            const double,
+                            const double,
+                            const unsigned int ,
+                            const bool)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+  template <>
+  void enclosed_hyper_cube (Triangulation<2> &tria,
+                            const double        left,
+                            const double        right,
+                            const double        thickness,
+                            const bool          colorize)
+  {
+    Assert(left<right,
+           ExcMessage ("Invalid left-to-right bounds of enclosed hypercube"));
+
+    std::vector<Point<2> > vertices(16);
+    double coords[4];
+    coords[0] = left-thickness;
+    coords[1] = left;
+    coords[2] = right;
+    coords[3] = right+thickness;
+
+    unsigned int k=0;
+    for (unsigned int i0=0; i0<4; ++i0)
+      for (unsigned int i1=0; i1<4; ++i1)
+        vertices[k++] = Point<2>(coords[i1], coords[i0]);
+
+    const types::material_id materials[9] = { 5, 4, 6,
+                                              1, 0, 2,
+                                              9, 8,10
+                                            };
+
+    std::vector<CellData<2> > cells(9);
+    k = 0;
+    for (unsigned int i0=0; i0<3; ++i0)
+      for (unsigned int i1=0; i1<3; ++i1)
+        {
+          cells[k].vertices[0] = i1+4*i0;
+          cells[k].vertices[1] = i1+4*i0+1;
+          cells[k].vertices[2] = i1+4*i0+4;
+          cells[k].vertices[3] = i1+4*i0+5;
+          if (colorize)
+            cells[k].material_id = materials[k];
+          ++k;
+        }
+    tria.create_triangulation (vertices,
+                               cells,
+                               SubCellData());       // no boundary information
+  }
+
+
+
+// Implementation for 2D only
+  template <>
+  void
+  hyper_cube_slit (Triangulation<2> &tria,
+                   const double left,
+                   const double right,
+                   const bool colorize)
+  {
+    const double rl2=(right+left)/2;
+    const Point<2> vertices[10] = { Point<2>(left, left ),
+                                    Point<2>(rl2,  left ),
+                                    Point<2>(rl2,  rl2  ),
+                                    Point<2>(left, rl2  ),
+                                    Point<2>(right,left ),
+                                    Point<2>(right,rl2  ),
+                                    Point<2>(rl2,  right),
+                                    Point<2>(left, right),
+                                    Point<2>(right,right),
+                                    Point<2>(rl2,  left )
+                                  };
+    const int cell_vertices[4][4] = { { 0,1,3,2 },
+      { 9,4,2,5 },
+      { 3,2,7,6 },
+      { 2,5,6,8 }
+    };
+    std::vector<CellData<2> > cells (4, CellData<2>());
+    for (unsigned int i=0; i<4; ++i)
+      {
+        for (unsigned int j=0; j<4; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+    tria.create_triangulation (
+      std::vector<Point<2> >(&vertices[0], &vertices[10]),
+      cells,
+      SubCellData());       // no boundary information
+
+    if (colorize)
+      {
+        Triangulation<2>::cell_iterator cell = tria.begin();
+        cell->face(1)->set_boundary_id(1);
+        ++cell;
+        cell->face(3)->set_boundary_id(2);
+      }
+  }
+
+
+
+  template <>
+  void truncated_cone (Triangulation<2> &triangulation,
+                       const double radius_0,
+                       const double radius_1,
+                       const double half_length)
+  {
+    Point<2> vertices_tmp[4];
+
+    vertices_tmp[0] = Point<2> (-half_length, -radius_0);
+    vertices_tmp[1] = Point<2> (half_length, -radius_1);
+    vertices_tmp[2] = Point<2> (-half_length, radius_0);
+    vertices_tmp[3] = Point<2> (half_length, radius_1);
+
+    const std::vector<Point<2> > vertices (&vertices_tmp[0], &vertices_tmp[4]);
+    unsigned int cell_vertices[1][GeometryInfo<2>::vertices_per_cell];
+
+    for (unsigned int i = 0; i < GeometryInfo<2>::vertices_per_cell; ++i)
+      cell_vertices[0][i] = i;
+
+    std::vector<CellData<2> > cells (1, CellData<2> ());
+
+    for (unsigned int i = 0; i < GeometryInfo<2>::vertices_per_cell; ++i)
+      cells[0].vertices[i] = cell_vertices[0][i];
+
+    cells[0].material_id = 0;
+    triangulation.create_triangulation (vertices, cells, SubCellData ());
+
+    Triangulation<2>::cell_iterator cell = triangulation.begin ();
+
+    cell->face (0)->set_boundary_id (1);
+    cell->face (1)->set_boundary_id (2);
+
+    for (unsigned int i = 2; i < 4; ++i)
+      cell->face (i)->set_boundary_id (0);
+  }
+
+
+
+//TODO: Colorize edges as circumference, left and right radius
+// Implementation for 2D only
+  template <>
+  void
+  hyper_L (Triangulation<2> &tria,
+           const double a,
+           const double b)
+  {
+    const Point<2> vertices[8] = { Point<2> (a,a),
+                                   Point<2> ((a+b)/2,a),
+                                   Point<2> (b,a),
+                                   Point<2> (a,(a+b)/2),
+                                   Point<2> ((a+b)/2,(a+b)/2),
+                                   Point<2> (b,(a+b)/2),
+                                   Point<2> (a,b),
+                                   Point<2> ((a+b)/2,b)
+                                 };
+    const int cell_vertices[3][4] = {{0, 1, 3, 4},
+      {1, 2, 4, 5},
+      {3, 4, 6, 7}
+    };
+
+    std::vector<CellData<2> > cells (3, CellData<2>());
+
+    for (unsigned int i=0; i<3; ++i)
+      {
+        for (unsigned int j=0; j<4; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      std::vector<Point<2> >(&vertices[0], &vertices[8]),
+      cells,
+      SubCellData());       // no boundary information
+  }
+
+
+
+// Implementation for 2D only
+  template <>
+  void
+  hyper_ball (Triangulation<2> &tria,
+              const Point<2>   &p,
+              const double      radius)
+  {
+    // equilibrate cell sizes at
+    // transition from the inner part
+    // to the radial cells
+    const double a = 1./(1+std::sqrt(2.0));
+    const Point<2> vertices[8] = { p+Point<2>(-1,-1) *(radius/std::sqrt(2.0)),
+                                   p+Point<2>(+1,-1) *(radius/std::sqrt(2.0)),
+                                   p+Point<2>(-1,-1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(+1,-1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(-1,+1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(+1,+1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(-1,+1) *(radius/std::sqrt(2.0)),
+                                   p+Point<2>(+1,+1) *(radius/std::sqrt(2.0))
+                                 };
+
+    const int cell_vertices[5][4] = {{0, 1, 2, 3},
+      {0, 2, 6, 4},
+      {2, 3, 4, 5},
+      {1, 7, 3, 5},
+      {6, 4, 7, 5}
+    };
+
+    std::vector<CellData<2> > cells (5, CellData<2>());
+
+    for (unsigned int i=0; i<5; ++i)
+      {
+        for (unsigned int j=0; j<4; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      std::vector<Point<2> >(&vertices[0], &vertices[8]),
+      cells,
+      SubCellData());       // no boundary information
+  }
+
+
+
+  template <>
+  void hyper_shell (Triangulation<2> &tria,
+                    const Point<2>   &center,
+                    const double      inner_radius,
+                    const double      outer_radius,
+                    const unsigned int   n_cells,
+                    const bool colorize)
+  {
+    Assert ((inner_radius > 0) && (inner_radius < outer_radius),
+            ExcInvalidRadii ());
+
+    const double pi = numbers::PI;
+
+    // determine the number of cells
+    // for the grid. if not provided by
+    // the user determine it such that
+    // the length of each cell on the
+    // median (in the middle between
+    // the two circles) is equal to its
+    // radial extent (which is the
+    // difference between the two
+    // radii)
+    const unsigned int N = (n_cells == 0 ?
+                            static_cast<unsigned int>
+                            (std::ceil((2*pi* (outer_radius + inner_radius)/2) /
+                                       (outer_radius - inner_radius))) :
+                            n_cells);
+
+    // set up N vertices on the
+    // outer and N vertices on
+    // the inner circle. the
+    // first N ones are on the
+    // outer one, and all are
+    // numbered counter-clockwise
+    std::vector<Point<2> > vertices(2*N);
+    for (unsigned int i=0; i<N; ++i)
+      {
+        vertices[i] = Point<2>( std::cos(2*pi * i/N),
+                                std::sin(2*pi * i/N)) * outer_radius;
+        vertices[i+N] = vertices[i] * (inner_radius/outer_radius);
+
+        vertices[i]   += center;
+        vertices[i+N] += center;
+      };
+
+    std::vector<CellData<2> > cells (N, CellData<2>());
+
+    for (unsigned int i=0; i<N; ++i)
+      {
+        cells[i].vertices[0] = i;
+        cells[i].vertices[1] = (i+1)%N;
+        cells[i].vertices[2] = N+i;
+        cells[i].vertices[3] = N+((i+1)%N);
+
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      vertices, cells, SubCellData());
+
+    if (colorize)
+      colorize_hyper_shell(tria, center, inner_radius, outer_radius);
+  }
+
+
+// Implementation for 2D only
+  template <>
+  void
+  cylinder (Triangulation<2> &tria,
+            const double radius,
+            const double half_length)
+  {
+    Point<2> p1 (-half_length, -radius);
+    Point<2> p2 (half_length, radius);
+
+    hyper_rectangle(tria, p1, p2, true);
+
+    Triangulation<2>::face_iterator f = tria.begin_face();
+    Triangulation<2>::face_iterator end = tria.end_face();
+    while (f != end)
+      {
+        switch (f->boundary_id())
+          {
+          case 0:
+            f->set_boundary_id(1);
+            break;
+          case 1:
+            f->set_boundary_id(2);
+            break;
+          default:
+            f->set_boundary_id(0);
+            break;
+          }
+        ++f;
+      }
+  }
+
+
+
+// Implementation for 2D only
+  template <>
+  void cylinder_shell (Triangulation<2> &,
+                       const double,
+                       const double,
+                       const double,
+                       const unsigned int,
+                       const unsigned int)
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+  template <>
+  void
+  half_hyper_ball (Triangulation<2> &tria,
+                   const Point<2>   &p,
+                   const double      radius)
+  {
+    // equilibrate cell sizes at
+    // transition from the inner part
+    // to the radial cells
+    const double a = 1./(1+std::sqrt(2.0));
+    const Point<2> vertices[8] = { p+Point<2>(0,-1) *radius,
+                                   p+Point<2>(+1,-1) *(radius/std::sqrt(2.0)),
+                                   p+Point<2>(0,-1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(+1,-1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(0,+1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(+1,+1) *(radius/std::sqrt(2.0)*a),
+                                   p+Point<2>(0,+1) *radius,
+                                   p+Point<2>(+1,+1) *(radius/std::sqrt(2.0))
+                                 };
+
+    const int cell_vertices[5][4] = {{0, 1, 2, 3},
+      {2, 3, 4, 5},
+      {1, 7, 3, 5},
+      {6, 4, 7, 5}
+    };
+
+    std::vector<CellData<2> > cells (4, CellData<2>());
+
+    for (unsigned int i=0; i<4; ++i)
+      {
+        for (unsigned int j=0; j<4; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      std::vector<Point<2> >(&vertices[0], &vertices[8]),
+      cells,
+      SubCellData());       // no boundary information
+
+    Triangulation<2>::cell_iterator cell = tria.begin();
+    Triangulation<2>::cell_iterator end = tria.end();
+
+
+    while (cell != end)
+      {
+        for (unsigned int i=0; i<GeometryInfo<2>::faces_per_cell; ++i)
+          {
+            if (cell->face(i)->boundary_id() == numbers::internal_face_boundary_id)
+              continue;
+
+            // If x is zero, then this is part of the plane
+            if (cell->face(i)->center()(0) < p(0)+1.e-5 * radius)
+              cell->face(i)->set_boundary_id(1);
+          }
+        ++cell;
+      }
+  }
+
+
+
+// Implementation for 2D only
+  template <>
+  void
+  half_hyper_shell (Triangulation<2> &tria,
+                    const Point<2>   &center,
+                    const double      inner_radius,
+                    const double      outer_radius,
+                    const unsigned int   n_cells,
+                    const bool colorize)
+  {
+    Assert ((inner_radius > 0) && (inner_radius < outer_radius),
+            ExcInvalidRadii ());
+
+    const double pi     = numbers::PI;
+    // determine the number of cells
+    // for the grid. if not provided by
+    // the user determine it such that
+    // the length of each cell on the
+    // median (in the middle between
+    // the two circles) is equal to its
+    // radial extent (which is the
+    // difference between the two
+    // radii)
+    const unsigned int N = (n_cells == 0 ?
+                            static_cast<unsigned int>
+                            (std::ceil((pi* (outer_radius + inner_radius)/2) /
+                                       (outer_radius - inner_radius))) :
+                            n_cells);
+
+    // set up N+1 vertices on the
+    // outer and N+1 vertices on
+    // the inner circle. the
+    // first N+1 ones are on the
+    // outer one, and all are
+    // numbered counter-clockwise
+    std::vector<Point<2> > vertices(2*(N+1));
+    for (unsigned int i=0; i<=N; ++i)
+      {
+        // enforce that the x-coordinates
+        // of the first and last point of
+        // each half-circle are exactly
+        // zero (contrary to what we may
+        // compute using the imprecise
+        // value of pi)
+        vertices[i] =  Point<2>( ( (i==0) || (i==N) ?
+                                   0 :
+                                   std::cos(pi * i/N - pi/2) ),
+                                 std::sin(pi * i/N - pi/2)) * outer_radius;
+        vertices[i+N+1] = vertices[i] * (inner_radius/outer_radius);
+
+        vertices[i]     += center;
+        vertices[i+N+1] += center;
+      };
+
+
+    std::vector<CellData<2> > cells (N, CellData<2>());
+
+    for (unsigned int i=0; i<N; ++i)
+      {
+        cells[i].vertices[0] = i;
+        cells[i].vertices[1] = (i+1)%(N+1);
+        cells[i].vertices[2] = N+1+i;
+        cells[i].vertices[3] = N+1+((i+1)%(N+1));
+
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (vertices, cells, SubCellData());
+
+    if (colorize)
+      {
+        Triangulation<2>::cell_iterator cell = tria.begin();
+        for (; cell!=tria.end(); ++cell)
+          {
+            cell->face(2)->set_boundary_id(1);
+          }
+        tria.begin()->face(0)->set_boundary_id(3);
+
+        tria.last()->face(1)->set_boundary_id(2);
+      }
+  }
+
+
+  template <>
+  void quarter_hyper_shell (Triangulation<2> &tria,
+                            const Point<2>   &center,
+                            const double      inner_radius,
+                            const double      outer_radius,
+                            const unsigned int   n_cells,
+                            const bool colorize)
+  {
+    Assert ((inner_radius > 0) && (inner_radius < outer_radius),
+            ExcInvalidRadii ());
+
+    const double pi     = numbers::PI;
+    // determine the number of cells
+    // for the grid. if not provided by
+    // the user determine it such that
+    // the length of each cell on the
+    // median (in the middle between
+    // the two circles) is equal to its
+    // radial extent (which is the
+    // difference between the two
+    // radii)
+    const unsigned int N = (n_cells == 0 ?
+                            static_cast<unsigned int>
+                            (std::ceil((pi* (outer_radius + inner_radius)/4) /
+                                       (outer_radius - inner_radius))) :
+                            n_cells);
+
+    // set up N+1 vertices on the
+    // outer and N+1 vertices on
+    // the inner circle. the
+    // first N+1 ones are on the
+    // outer one, and all are
+    // numbered counter-clockwise
+    std::vector<Point<2> > vertices(2*(N+1));
+    for (unsigned int i=0; i<=N; ++i)
+      {
+        // enforce that the x-coordinates
+        // of the last point is exactly
+        // zero (contrary to what we may
+        // compute using the imprecise
+        // value of pi)
+        vertices[i] =  Point<2>( ( (i==N) ?
+                                   0 :
+                                   std::cos(pi * i/N/2) ),
+                                 std::sin(pi * i/N/2)) * outer_radius;
+        vertices[i+N+1] = vertices[i] * (inner_radius/outer_radius);
+
+        vertices[i]     += center;
+        vertices[i+N+1] += center;
+      };
+
+
+    std::vector<CellData<2> > cells (N, CellData<2>());
+
+    for (unsigned int i=0; i<N; ++i)
+      {
+        cells[i].vertices[0] = i;
+        cells[i].vertices[1] = (i+1)%(N+1);
+        cells[i].vertices[2] = N+1+i;
+        cells[i].vertices[3] = N+1+((i+1)%(N+1));
+
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (vertices, cells, SubCellData());
+
+    if (colorize)
+      {
+        Triangulation<2>::cell_iterator cell = tria.begin();
+        for (; cell!=tria.end(); ++cell)
+          {
+            cell->face(2)->set_boundary_id(1);
+          }
+        tria.begin()->face(0)->set_boundary_id(3);
+
+        tria.last()->face(1)->set_boundary_id(2);
+      }
+  }
+
+
+
+// Implementation for 3D only
+  template <>
+  void hyper_cube_slit (Triangulation<3> &tria,
+                        const double left,
+                        const double right,
+                        const bool colorize)
+  {
+    const double rl2=(right+left)/2;
+    const double len = (right-left)/2.;
+
+    const Point<3> vertices[20] =
+    {
+      Point<3>(left, left , -len/2.),
+      Point<3>(rl2,  left , -len/2.),
+      Point<3>(rl2,  rl2  , -len/2.),
+      Point<3>(left, rl2  , -len/2.),
+      Point<3>(right,left , -len/2.),
+      Point<3>(right,rl2  , -len/2.),
+      Point<3>(rl2,  right, -len/2.),
+      Point<3>(left, right, -len/2.),
+      Point<3>(right,right, -len/2.),
+      Point<3>(rl2,  left , -len/2.),
+      Point<3>(left, left , len/2.),
+      Point<3>(rl2,  left , len/2.),
+      Point<3>(rl2,  rl2  , len/2.),
+      Point<3>(left, rl2  , len/2.),
+      Point<3>(right,left , len/2.),
+      Point<3>(right,rl2  , len/2.),
+      Point<3>(rl2,  right, len/2.),
+      Point<3>(left, right, len/2.),
+      Point<3>(right,right, len/2.),
+      Point<3>(rl2,  left , len/2.)
+    };
+    const int cell_vertices[4][8] = { { 0,1,3,2, 10, 11, 13, 12 },
+      { 9,4,2,5, 19,14, 12, 15 },
+      { 3,2,7,6,13,12,17,16 },
+      { 2,5,6,8,12,15,16,18 }
+    };
+    std::vector<CellData<3> > cells (4, CellData<3>());
+    for (unsigned int i=0; i<4; ++i)
+      {
+        for (unsigned int j=0; j<8; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+    tria.create_triangulation (
+      std::vector<Point<3> >(&vertices[0], &vertices[20]),
+      cells,
+      SubCellData());       // no boundary information
+
+    if (colorize)
+      {
+        Assert(false, ExcNotImplemented());
+        Triangulation<3>::cell_iterator cell = tria.begin();
+        cell->face(1)->set_boundary_id(1);
+        ++cell;
+        cell->face(3)->set_boundary_id(2);
+      }
+  }
+
+
+
+// Implementation for 3D only
+  template <>
+  void enclosed_hyper_cube (Triangulation<3> &tria,
+                            const double        left,
+                            const double        right,
+                            const double        thickness,
+                            const bool          colorize)
+  {
+    Assert(left<right,
+           ExcMessage ("Invalid left-to-right bounds of enclosed hypercube"));
+
+    std::vector<Point<3> > vertices(64);
+    double coords[4];
+    coords[0] = left-thickness;
+    coords[1] = left;
+    coords[2] = right;
+    coords[3] = right+thickness;
+
+    unsigned int k=0;
+    for (unsigned int z=0; z<4; ++z)
+      for (unsigned int y=0; y<4; ++y)
+        for (unsigned int x=0; x<4; ++x)
+          vertices[k++] = Point<3>(coords[x], coords[y], coords[z]);
+
+    const types::material_id materials[27] =
+    {
+      21,20,22,
+      17,16,18,
+      25,24,26,
+      5 , 4, 6,
+      1 , 0, 2,
+      9 , 8,10,
+      37,36,38,
+      33,32,34,
+      41,40,42
+    };
+
+    std::vector<CellData<3> > cells(27);
+    k = 0;
+    for (unsigned int z=0; z<3; ++z)
+      for (unsigned int y=0; y<3; ++y)
+        for (unsigned int x=0; x<3; ++x)
+          {
+            cells[k].vertices[0] = x+4*y+16*z;
+            cells[k].vertices[1] = x+4*y+16*z+1;
+            cells[k].vertices[2] = x+4*y+16*z+4;
+            cells[k].vertices[3] = x+4*y+16*z+5;
+            cells[k].vertices[4] = x+4*y+16*z+16;
+            cells[k].vertices[5] = x+4*y+16*z+17;
+            cells[k].vertices[6] = x+4*y+16*z+20;
+            cells[k].vertices[7] = x+4*y+16*z+21;
+            if (colorize)
+              cells[k].material_id = materials[k];
+            ++k;
+          }
+    tria.create_triangulation (
+      vertices,
+      cells,
+      SubCellData());       // no boundary information
+  }
+
+
+
+  template <>
+  void truncated_cone (Triangulation<3> &triangulation,
+                       const double radius_0,
+                       const double radius_1,
+                       const double half_length)
+  {
+    // Determine number of cells and vertices
+    const unsigned int
+    n_cells = static_cast<unsigned int>(std::ceil (half_length /
+                                                   std::max (radius_0,
+                                                             radius_1)));
+    const unsigned int n_vertices = 4 * (n_cells + 1);
+    std::vector<Point<3> > vertices_tmp(n_vertices);
+
+    vertices_tmp[0] = Point<3> (-half_length, 0, -radius_0);
+    vertices_tmp[1] = Point<3> (-half_length, radius_0, 0);
+    vertices_tmp[2] = Point<3> (-half_length, -radius_0, 0);
+    vertices_tmp[3] = Point<3> (-half_length, 0, radius_0);
+
+    const double dx = 2 * half_length / n_cells;
+
+    for (unsigned int i = 0; i < n_cells; ++i)
+      {
+        vertices_tmp[4 * (i + 1)]
+          = vertices_tmp[4 * i] +
+            Point<3> (dx, 0, 0.5 * (radius_0 - radius_1) * dx / half_length);
+        vertices_tmp[4 * i + 5]
+          = vertices_tmp[4 * i + 1] +
+            Point<3> (dx, 0.5 * (radius_1 - radius_0) * dx / half_length, 0);
+        vertices_tmp[4 * i + 6]
+          = vertices_tmp[4 * i + 2] +
+            Point<3> (dx, 0.5 * (radius_0 - radius_1) * dx / half_length, 0);
+        vertices_tmp[4 * i + 7]
+          = vertices_tmp[4 * i + 3] +
+            Point<3> (dx, 0, 0.5 * (radius_1 - radius_0) * dx / half_length);
+      }
+
+    const std::vector<Point<3> > vertices (vertices_tmp.begin(),
+                                           vertices_tmp.end());
+    Table<2,unsigned int> cell_vertices(n_cells,GeometryInfo<3>::vertices_per_cell);
+
+    for (unsigned int i = 0; i < n_cells; ++i)
+      for (unsigned int j = 0; j < GeometryInfo<3>::vertices_per_cell; ++j)
+        cell_vertices[i][j] = 4 * i + j;
+
+    std::vector<CellData<3> > cells (n_cells, CellData<3> ());
+
+    for (unsigned int i = 0; i < n_cells; ++i)
+      {
+        for (unsigned int j = 0; j < GeometryInfo<3>::vertices_per_cell; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+
+        cells[i].material_id = 0;
+      }
+
+    triangulation.create_triangulation (vertices, cells, SubCellData ());
+
+    for (Triangulation<3>::cell_iterator cell = triangulation.begin ();
+         cell != triangulation.end (); ++cell)
+      {
+        if (cell->vertex (0) (0) == -half_length)
+          {
+            cell->face (4)->set_boundary_id (1);
+
+            for (unsigned int i = 0; i < 4; ++i)
+              cell->line (i)->set_boundary_id (0);
+          }
+
+        if (cell->vertex (4) (0) == half_length)
+          {
+            cell->face (5)->set_boundary_id (2);
+
+            for (unsigned int i = 4; i < 8; ++i)
+              cell->line (i)->set_boundary_id (0);
+          }
+
+        for (unsigned int i = 0; i < 4; ++i)
+          cell->face (i)->set_boundary_id (0);
+      }
+  }
+
+
+// Implementation for 3D only
+  template <>
+  void
+  hyper_L (Triangulation<3> &tria,
+           const double      a,
+           const double      b)
+  {
+    // we slice out the top back right
+    // part of the cube
+    const Point<3> vertices[26]
+    =
+    {
+      // front face of the big cube
+      Point<3> (a,      a,a),
+      Point<3> ((a+b)/2,a,a),
+      Point<3> (b,      a,a),
+      Point<3> (a,      a,(a+b)/2),
+      Point<3> ((a+b)/2,a,(a+b)/2),
+      Point<3> (b,      a,(a+b)/2),
+      Point<3> (a,      a,b),
+      Point<3> ((a+b)/2,a,b),
+      Point<3> (b,      a,b),
+      // middle face of the big cube
+      Point<3> (a,      (a+b)/2,a),
+      Point<3> ((a+b)/2,(a+b)/2,a),
+      Point<3> (b,      (a+b)/2,a),
+      Point<3> (a,      (a+b)/2,(a+b)/2),
+      Point<3> ((a+b)/2,(a+b)/2,(a+b)/2),
+      Point<3> (b,      (a+b)/2,(a+b)/2),
+      Point<3> (a,      (a+b)/2,b),
+      Point<3> ((a+b)/2,(a+b)/2,b),
+      Point<3> (b,      (a+b)/2,b),
+      // back face of the big cube
+      // last (top right) point is missing
+      Point<3> (a,      b,a),
+      Point<3> ((a+b)/2,b,a),
+      Point<3> (b,      b,a),
+      Point<3> (a,      b,(a+b)/2),
+      Point<3> ((a+b)/2,b,(a+b)/2),
+      Point<3> (b,      b,(a+b)/2),
+      Point<3> (a,      b,b),
+      Point<3> ((a+b)/2,b,b)
+    };
+    const int cell_vertices[7][8] = {{0, 1, 9, 10, 3, 4, 12, 13},
+      {1, 2, 10, 11, 4, 5, 13, 14},
+      {3, 4, 12, 13, 6, 7, 15, 16},
+      {4, 5, 13, 14, 7, 8, 16, 17},
+      {9, 10, 18, 19, 12, 13, 21, 22},
+      {10, 11, 19, 20, 13, 14, 22, 23},
+      {12, 13, 21, 22, 15, 16, 24, 25}
+    };
+
+    std::vector<CellData<3> > cells (7, CellData<3>());
+
+    for (unsigned int i=0; i<7; ++i)
+      {
+        for (unsigned int j=0; j<8; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      std::vector<Point<3> >(&vertices[0], &vertices[26]),
+      cells,
+      SubCellData());       // no boundary information
+  }
+
+
+
+// Implementation for 3D only
+  template <>
+  void
+  hyper_ball (Triangulation<3> &tria,
+              const Point<3>   &p,
+              const double radius)
+  {
+    const double a = 1./(1+std::sqrt(3.0)); // equilibrate cell sizes at transition
+    // from the inner part to the radial
+    // cells
+    const unsigned int n_vertices = 16;
+    const Point<3> vertices[n_vertices]
+    =
+    {
+      // first the vertices of the inner
+      // cell
+      p+Point<3>(-1,-1,-1) *(radius/std::sqrt(3.0)*a),
+      p+Point<3>(+1,-1,-1) *(radius/std::sqrt(3.0)*a),
+      p+Point<3>(+1,-1,+1) *(radius/std::sqrt(3.0)*a),
+      p+Point<3>(-1,-1,+1) *(radius/std::sqrt(3.0)*a),
+      p+Point<3>(-1,+1,-1) *(radius/std::sqrt(3.0)*a),
+      p+Point<3>(+1,+1,-1) *(radius/std::sqrt(3.0)*a),
+      p+Point<3>(+1,+1,+1) *(radius/std::sqrt(3.0)*a),
+      p+Point<3>(-1,+1,+1) *(radius/std::sqrt(3.0)*a),
+      // now the eight vertices at
+      // the outer sphere
+      p+Point<3>(-1,-1,-1) *(radius/std::sqrt(3.0)),
+      p+Point<3>(+1,-1,-1) *(radius/std::sqrt(3.0)),
+      p+Point<3>(+1,-1,+1) *(radius/std::sqrt(3.0)),
+      p+Point<3>(-1,-1,+1) *(radius/std::sqrt(3.0)),
+      p+Point<3>(-1,+1,-1) *(radius/std::sqrt(3.0)),
+      p+Point<3>(+1,+1,-1) *(radius/std::sqrt(3.0)),
+      p+Point<3>(+1,+1,+1) *(radius/std::sqrt(3.0)),
+      p+Point<3>(-1,+1,+1) *(radius/std::sqrt(3.0)),
+    };
+
+    // one needs to draw the seven cubes to
+    // understand what's going on here
+    const unsigned int n_cells = 7;
+    const int cell_vertices[n_cells][8] = {{0, 1, 4, 5, 3, 2, 7, 6}, // center
+      {8, 9, 12, 13, 0, 1, 4, 5}, // bottom
+      {9, 13, 1, 5, 10, 14, 2, 6}, // right
+      {11, 10, 3, 2, 15, 14, 7, 6}, // top
+      {8, 0, 12, 4, 11, 3, 15, 7}, // left
+      {8, 9, 0, 1, 11, 10, 3, 2}, // front
+      {12, 4, 13, 5, 15, 7, 14, 6}
+    }; // back
+
+    std::vector<CellData<3> > cells (n_cells, CellData<3>());
+
+    for (unsigned int i=0; i<n_cells; ++i)
+      {
+        for (unsigned int j=0; j<GeometryInfo<3>::vertices_per_cell; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      std::vector<Point<3> >(&vertices[0], &vertices[n_vertices]),
+      cells,
+      SubCellData());       // no boundary information
+  }
+
+  template <int dim, int spacedim>
+  void
+  hyper_sphere (Triangulation<dim,spacedim> &tria,
+                const Point<spacedim>   &p,
+                const double      radius)
+  {
+    Triangulation<spacedim> volume_mesh;
+    GridGenerator::hyper_ball(volume_mesh,p,radius);
+    std::set<types::boundary_id> boundary_ids;
+    boundary_ids.insert (0);
+    GridGenerator::extract_boundary_mesh (volume_mesh, tria,
+                                          boundary_ids);
+  }
+
+
+
+// Implementation for 3D only
+  template <>
+  void
+  cylinder (Triangulation<3> &tria,
+            const double radius,
+            const double half_length)
+  {
+    // Copy the base from hyper_ball<3>
+    // and transform it to yz
+    const double d = radius/std::sqrt(2.0);
+    const double a = d/(1+std::sqrt(2.0));
+    Point<3> vertices[24] =
+    {
+      Point<3>(-d, -half_length,-d),
+      Point<3>( d, -half_length,-d),
+      Point<3>(-a, -half_length,-a),
+      Point<3>( a, -half_length,-a),
+      Point<3>(-a, -half_length, a),
+      Point<3>( a, -half_length, a),
+      Point<3>(-d, -half_length, d),
+      Point<3>( d, -half_length, d),
+      Point<3>(-d, 0,-d),
+      Point<3>( d, 0,-d),
+      Point<3>(-a, 0,-a),
+      Point<3>( a, 0,-a),
+      Point<3>(-a, 0, a),
+      Point<3>( a, 0, a),
+      Point<3>(-d, 0, d),
+      Point<3>( d, 0, d),
+      Point<3>(-d, half_length,-d),
+      Point<3>( d, half_length,-d),
+      Point<3>(-a, half_length,-a),
+      Point<3>( a, half_length,-a),
+      Point<3>(-a, half_length, a),
+      Point<3>( a, half_length, a),
+      Point<3>(-d, half_length, d),
+      Point<3>( d, half_length, d),
+    };
+    // Turn cylinder such that y->x
+    for (unsigned int i=0; i<24; ++i)
+      {
+        const double h = vertices[i](1);
+        vertices[i](1) = -vertices[i](0);
+        vertices[i](0) = h;
+      }
+
+    int cell_vertices[10][8] =
+    {
+      {0, 1, 8, 9, 2, 3, 10, 11},
+      {0, 2, 8, 10, 6, 4, 14, 12},
+      {2, 3, 10, 11, 4, 5, 12, 13},
+      {1, 7, 9, 15, 3, 5, 11, 13},
+      {6, 4, 14, 12, 7, 5, 15, 13}
+    };
+    for (unsigned int i=0; i<5; ++i)
+      for (unsigned int j=0; j<8; ++j)
+        cell_vertices[i+5][j] = cell_vertices[i][j]+8;
+
+    std::vector<CellData<3> > cells (10, CellData<3>());
+
+    for (unsigned int i=0; i<10; ++i)
+      {
+        for (unsigned int j=0; j<8; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      std::vector<Point<3> >(&vertices[0], &vertices[24]),
+      cells,
+      SubCellData());       // no boundary information
+
+    // set boundary indicators for the
+    // faces at the ends to 1 and 2,
+    // respectively. note that we also
+    // have to deal with those lines
+    // that are purely in the interior
+    // of the ends. we determine whether
+    // an edge is purely in the
+    // interior if one of its vertices
+    // is at coordinates '+-a' as set
+    // above
+    Triangulation<3>::cell_iterator cell = tria.begin();
+    Triangulation<3>::cell_iterator end = tria.end();
+
+    for (; cell != end; ++cell)
+      for (unsigned int i=0; i<GeometryInfo<3>::faces_per_cell; ++i)
+        if (cell->at_boundary(i))
+          {
+            if (cell->face(i)->center()(0) > half_length-1.e-5)
+              {
+                cell->face(i)->set_boundary_id(2);
+
+                for (unsigned int e=0; e<GeometryInfo<3>::lines_per_face; ++e)
+                  if ((std::fabs(cell->face(i)->line(e)->vertex(0)[1]) == a) ||
+                      (std::fabs(cell->face(i)->line(e)->vertex(0)[2]) == a) ||
+                      (std::fabs(cell->face(i)->line(e)->vertex(1)[1]) == a) ||
+                      (std::fabs(cell->face(i)->line(e)->vertex(1)[2]) == a))
+                    cell->face(i)->line(e)->set_boundary_id(2);
+              }
+            else if (cell->face(i)->center()(0) < -half_length+1.e-5)
+              {
+                cell->face(i)->set_boundary_id(1);
+
+                for (unsigned int e=0; e<GeometryInfo<3>::lines_per_face; ++e)
+                  if ((std::fabs(cell->face(i)->line(e)->vertex(0)[1]) == a) ||
+                      (std::fabs(cell->face(i)->line(e)->vertex(0)[2]) == a) ||
+                      (std::fabs(cell->face(i)->line(e)->vertex(1)[1]) == a) ||
+                      (std::fabs(cell->face(i)->line(e)->vertex(1)[2]) == a))
+                    cell->face(i)->line(e)->set_boundary_id(1);
+              }
+          }
+  }
+
+
+
+// Implementation for 3D only
+  template <>
+  void
+  half_hyper_ball (Triangulation<3> &tria,
+                   const Point<3> &center,
+                   const double radius)
+  {
+    // These are for the two lower squares
+    const double d = radius/std::sqrt(2.0);
+    const double a = d/(1+std::sqrt(2.0));
+    // These are for the two upper square
+    const double b = a/2.0;
+    const double c = d/2.0;
+    // And so are these
+    const double hb = radius*std::sqrt(3.0)/4.0;
+    const double hc = radius*std::sqrt(3.0)/2.0;
+
+    Point<3> vertices[16] =
+    {
+      center+Point<3>( 0,  d, -d),
+      center+Point<3>( 0, -d, -d),
+      center+Point<3>( 0,  a, -a),
+      center+Point<3>( 0, -a, -a),
+      center+Point<3>( 0,  a,  a),
+      center+Point<3>( 0, -a,  a),
+      center+Point<3>( 0,  d,  d),
+      center+Point<3>( 0, -d,  d),
+
+      center+Point<3>(hc,  c, -c),
+      center+Point<3>(hc, -c, -c),
+      center+Point<3>(hb,  b, -b),
+      center+Point<3>(hb, -b, -b),
+      center+Point<3>(hb,  b,  b),
+      center+Point<3>(hb, -b,  b),
+      center+Point<3>(hc,  c,  c),
+      center+Point<3>(hc, -c,  c),
+    };
+
+    int cell_vertices[6][8] =
+    {
+      {0, 1, 8, 9, 2, 3, 10, 11},
+      {0, 2, 8, 10, 6, 4, 14, 12},
+      {2, 3, 10, 11, 4, 5, 12, 13},
+      {1, 7, 9, 15, 3, 5, 11, 13},
+      {6, 4, 14, 12, 7, 5, 15, 13},
+      {8, 10, 9, 11, 14, 12, 15, 13}
+    };
+
+    std::vector<CellData<3> > cells (6, CellData<3>());
+
+    for (unsigned int i=0; i<6; ++i)
+      {
+        for (unsigned int j=0; j<8; ++j)
+          cells[i].vertices[j] = cell_vertices[i][j];
+        cells[i].material_id = 0;
+      };
+
+    tria.create_triangulation (
+      std::vector<Point<3> >(&vertices[0], &vertices[16]),
+      cells,
+      SubCellData());       // no boundary information
+
+    Triangulation<3>::cell_iterator cell = tria.begin();
+    Triangulation<3>::cell_iterator end = tria.end();
+
+    // go over all faces. for the ones on the flat face, set boundary
+    // indicator for face and edges to one; the rest will remain at
+    // zero but we have to pay attention to those edges that are
+    // at the perimeter of the flat face since they should not be
+    // set to one
+    while (cell != end)
+      {
+        for (unsigned int i=0; i<GeometryInfo<3>::faces_per_cell; ++i)
+          {
+            if (!cell->at_boundary(i))
+              continue;
+
+            // If the center is on the plane x=0, this is a planar element. set
+            // its boundary indicator. also set the boundary indicators of the
+            // bounding faces unless both vertices are on the perimeter
+            if (cell->face(i)->center()(0) < center(0)+1.e-5*radius)
+              {
+                cell->face(i)->set_boundary_id(1);
+                for (unsigned int j=0; j<GeometryInfo<3>::lines_per_face; ++j)
+                  {
+                    const Point<3> line_vertices[2]
+                      = { cell->face(i)->line(j)->vertex(0),
+                          cell->face(i)->line(j)->vertex(1)
+                        };
+                    if ((std::fabs(line_vertices[0].distance(center)-radius) >
+                         1e-5*radius)
+                        ||
+                        (std::fabs(line_vertices[1].distance(center)-radius) >
+                         1e-5*radius))
+                      cell->face(i)->line(j)->set_boundary_id(1);
+                  }
+              }
+          }
+        ++cell;
+      }
+  }
+
+
+  template <>
+  void
+  hyper_shell (Triangulation<3> &tria,
+               const Point<3> &p,
+               const double inner_radius,
+               const double outer_radius,
+               const unsigned int n_cells,
+               const bool colorize)
+  {
+    Assert ((inner_radius > 0) && (inner_radius < outer_radius),
+            ExcInvalidRadii ());
+
+    const unsigned int n = (n_cells==0) ? 6 : n_cells;
+
+    const double irad = inner_radius/std::sqrt(3.0);
+    const double orad = outer_radius/std::sqrt(3.0);
+    std::vector<Point<3> > vertices;
+    std::vector<CellData<3> > cells;
+
+    // Start with the shell bounded by
+    // two nested cubes
+    if (n == 6)
+      {
+        for (unsigned int i=0; i<8; ++i)
+          vertices.push_back(p+hexahedron[i]*irad);
+        for (unsigned int i=0; i<8; ++i)
+          vertices.push_back(p+hexahedron[i]*orad);
+
+        const unsigned int n_cells = 6;
+        const int cell_vertices[n_cells][8] =
+        {
+          {8, 9, 10, 11, 0, 1, 2, 3}, // bottom
+          {9, 11, 1, 3, 13, 15, 5, 7}, // right
+          {12, 13, 4, 5, 14, 15, 6, 7}, // top
+          {8, 0, 10, 2, 12, 4, 14, 6}, // left
+          {8, 9, 0, 1, 12, 13, 4, 5}, // front
+          {10, 2, 11, 3, 14, 6, 15, 7}
+        }; // back
+
+        cells.resize(n_cells, CellData<3>());
+
+        for (unsigned int i=0; i<n_cells; ++i)
+          {
+            for (unsigned int j=0; j<GeometryInfo<3>::vertices_per_cell; ++j)
+              cells[i].vertices[j] = cell_vertices[i][j];
+            cells[i].material_id = 0;
+          }
+
+        tria.create_triangulation (vertices, cells, SubCellData());
+      }
+    // A more regular subdivision can
+    // be obtained by two nested
+    // rhombic dodecahedra
+    else if (n == 12)
+      {
+        for (unsigned int i=0; i<8; ++i)
+          vertices.push_back(p+hexahedron[i]*irad);
+        for (unsigned int i=0; i<6; ++i)
+          vertices.push_back(p+octahedron[i]*inner_radius);
+        for (unsigned int i=0; i<8; ++i)
+          vertices.push_back(p+hexahedron[i]*orad);
+        for (unsigned int i=0; i<6; ++i)
+          vertices.push_back(p+octahedron[i]*outer_radius);
+
+        const unsigned int n_cells = 12;
+        const unsigned int rhombi[n_cells][4] =
+        {
+          { 10,  4,  0,  8},
+          {  4, 13,  8,  6},
+          { 10,  5,  4, 13},
+          {  1,  9, 10,  5},
+          {  9,  7,  5, 13},
+          {  7, 11, 13,  6},
+          {  9,  3,  7, 11},
+          {  1, 12,  9,  3},
+          { 12,  2,  3, 11},
+          {  2,  8, 11,  6},
+          { 12,  0,  2,  8},
+          {  1, 10, 12,  0}
+        };
+
+        cells.resize(n_cells, CellData<3>());
+
+        for (unsigned int i=0; i<n_cells; ++i)
+          {
+            for (unsigned int j=0; j<4; ++j)
+              {
+                cells[i].vertices[j  ] = rhombi[i][j];
+                cells[i].vertices[j+4] = rhombi[i][j] + 14;
+              }
+            cells[i].material_id = 0;
+          }
+
+        tria.create_triangulation (vertices, cells, SubCellData());
+      }
+    else if (n == 96)
+      {
+        // create a triangulation based on the
+        // 12-cell one where we refine the mesh
+        // once and then re-arrange all
+        // interior nodes so that the mesh is
+        // the least distorted
+        HyperShellBoundary<3> boundary (p);
+        Triangulation<3> tmp;
+        hyper_shell (tmp, p, inner_radius, outer_radius, 12);
+        tmp.set_boundary(0, boundary);
+        tmp.set_boundary(1, boundary);
+        tmp.refine_global (1);
+
+        // let's determine the distance at
+        // which the interior nodes should be
+        // from the center. let's say we
+        // measure distances in multiples of
+        // outer_radius and call
+        // r=inner_radius.
+        //
+        // then note
+        // that we now have 48 faces on the
+        // inner and 48 on the outer sphere,
+        // each with an area of approximately
+        // 4*pi/48*r^2 and 4*pi/48, for
+        // a face edge length of approximately
+        // sqrt(pi/12)*r and sqrt(pi/12)
+        //
+        // let's say we put the interior nodes
+        // at a distance rho, then a measure of
+        // deformation for the inner cells
+        // would be
+        //   di=max(sqrt(pi/12)*r/(rho-r),
+        //          (rho-r)/sqrt(pi/12)/r)
+        // and for the outer cells
+        //   do=max(sqrt(pi/12)/(1-rho),
+        //          (1-rho)/sqrt(pi/12))
+        //
+        // we now seek a rho so that the
+        // deformation of cells on the inside
+        // and outside is equal. there are in
+        // principle four possibilities for one
+        // of the branches of do== one of the
+        // branches of di, though not all of
+        // them satisfy do==di, of
+        // course. however, we are not
+        // interested in cases where the inner
+        // cell is long and skinny and the
+        // outer one tall -- yes, they have the
+        // same aspect ratio, but in different
+        // space directions.
+        //
+        // so it only boils down to the
+        // following two possibilities: the
+        // first branch of each max(.,.)
+        // functions are equal, or the second
+        // one are. on the other hand, since
+        // they two branches are reciprocals of
+        // each other, if one pair of branches
+        // is equal, so is the other
+        //
+        // this yields the following equation
+        // for rho:
+        //   sqrt(pi/12)*r/(rho-r)
+        //   == sqrt(pi/12)/(1-rho)
+        // with solution rho=2r/(1+r)
+        const double r = inner_radius / outer_radius;
+        const double rho = 2*r/(1+r);
+
+        // then this is the distance of the
+        // interior nodes from the center:
+        const double middle_radius = rho * outer_radius;
+
+        // mark vertices we've already moved or
+        // that we want to ignore: we don't
+        // want to move vertices at the inner
+        // or outer boundaries
+        std::vector<bool> vertex_already_treated (tmp.n_vertices(), false);
+        for (Triangulation<3>::active_cell_iterator cell = tmp.begin_active();
+             cell != tmp.end(); ++cell)
+          for (unsigned int f=0; f<GeometryInfo<3>::faces_per_cell; ++f)
+            if (cell->at_boundary(f))
+              for (unsigned int v=0; v<GeometryInfo<3>::vertices_per_face; ++v)
+                vertex_already_treated[cell->face(f)->vertex_index(v)] = true;
+
+        // now move the remaining vertices
+        for (Triangulation<3>::active_cell_iterator cell = tmp.begin_active();
+             cell != tmp.end(); ++cell)
+          for (unsigned int v=0; v<GeometryInfo<3>::vertices_per_cell; ++v)
+            if (vertex_already_treated[cell->vertex_index(v)] == false)
+              {
+                // this is a new interior
+                // vertex. mesh refinement may
+                // have placed it at a number
+                // of places in radial
+                // direction and oftentimes not
+                // in a particularly good
+                // one. move it to halfway
+                // between inner and outer
+                // sphere
+                const Tensor<1,3> old_distance = cell->vertex(v) - p;
+                const double old_radius = cell->vertex(v).distance(p);
+                cell->vertex(v) = p + old_distance * (middle_radius / old_radius);
+
+                vertex_already_treated[cell->vertex_index(v)] = true;
+              }
+
+        // now copy the resulting level 1 cells
+        // into the new triangulation,
+        cells.resize(tmp.n_active_cells(), CellData<3>());
+        for (Triangulation<3>::active_cell_iterator cell = tmp.begin_active();
+             cell != tmp.end(); ++cell)
+          {
+            const unsigned int cell_index = cell->active_cell_index();
+            for (unsigned int v=0; v<GeometryInfo<3>::vertices_per_cell; ++v)
+              cells[cell_index].vertices[v] = cell->vertex_index(v);
+            cells[cell_index].material_id = 0;
+          }
+
+        tria.create_triangulation (tmp.get_vertices(), cells, SubCellData());
+      }
+    else
+      {
+        Assert(false, ExcMessage ("Invalid number of coarse mesh cells."));
+      }
+
+    if (colorize)
+      colorize_hyper_shell(tria, p, inner_radius, outer_radius);
+  }
+
+
+
+
+// Implementation for 3D only
+  template <>
+  void
+  half_hyper_shell (Triangulation<3> &tria,
+                    const Point<3> &center,
+                    const double inner_radius,
+                    const double outer_radius,
+                    const unsigned int n,
+                    const bool colorize)
+  {
+    Assert ((inner_radius > 0) && (inner_radius < outer_radius),
+            ExcInvalidRadii ());
+
+    if (n <= 5)
+      {
+        // These are for the two lower squares
+        const double d = outer_radius/std::sqrt(2.0);
+        const double a = inner_radius/std::sqrt(2.0);
+        // These are for the two upper square
+        const double b = a/2.0;
+        const double c = d/2.0;
+        // And so are these
+        const double hb = inner_radius*std::sqrt(3.0)/2.0;
+        const double hc = outer_radius*std::sqrt(3.0)/2.0;
+
+        Point<3> vertices[16] =
+        {
+          center+Point<3>( 0,  d, -d),
+          center+Point<3>( 0, -d, -d),
+          center+Point<3>( 0,  a, -a),
+          center+Point<3>( 0, -a, -a),
+          center+Point<3>( 0,  a,  a),
+          center+Point<3>( 0, -a,  a),
+          center+Point<3>( 0,  d,  d),
+          center+Point<3>( 0, -d,  d),
+
+          center+Point<3>(hc,  c, -c),
+          center+Point<3>(hc, -c, -c),
+          center+Point<3>(hb,  b, -b),
+          center+Point<3>(hb, -b, -b),
+          center+Point<3>(hb,  b,  b),
+          center+Point<3>(hb, -b,  b),
+          center+Point<3>(hc,  c,  c),
+          center+Point<3>(hc, -c,  c),
+        };
+
+        int cell_vertices[5][8] =
+        {
+          {0, 1, 8, 9, 2, 3, 10, 11},
+          {0, 2, 8, 10, 6, 4, 14, 12},
+          {1, 7, 9, 15, 3, 5, 11, 13},
+          {6, 4, 14, 12, 7, 5, 15, 13},
+          {8, 10, 9, 11, 14, 12, 15, 13}
+        };
+
+        std::vector<CellData<3> > cells (5, CellData<3>());
+
+        for (unsigned int i=0; i<5; ++i)
+          {
+            for (unsigned int j=0; j<8; ++j)
+              cells[i].vertices[j] = cell_vertices[i][j];
+            cells[i].material_id = 0;
+          };
+
+        tria.create_triangulation (
+          std::vector<Point<3> >(&vertices[0], &vertices[16]),
+          cells,
+          SubCellData());       // no boundary information
+      }
+    else
+      {
+        Assert(false, ExcIndexRange(n, 0, 5));
+      }
+    if (colorize)
+      {
+        // We want to use a standard boundary description where
+        // the boundary is not curved. Hence set boundary id 2 to
+        // to all faces in a first step.
+        Triangulation<3>::cell_iterator cell = tria.begin();
+        for (; cell!=tria.end(); ++cell)
+          for (unsigned int i=0; i<GeometryInfo<3>::faces_per_cell; ++i)
+            if (cell->at_boundary(i))
+              cell->face(i)->set_all_boundary_ids(2);
+
+        // Next look for the curved boundaries. If the x value of the
+        // center of the face is not equal to center(0), we're on a curved
+        // boundary. Then decide whether the center is nearer to the inner
+        // or outer boundary to set the correct boundary id.
+        for (cell=tria.begin(); cell!=tria.end(); ++cell)
+          for (unsigned int i=0; i<GeometryInfo<3>::faces_per_cell; ++i)
+            if (cell->at_boundary(i))
+              {
+                const Triangulation<3>::face_iterator face
+                  = cell->face(i);
+
+                const Point<3> face_center (face->center());
+                if (std::abs(face_center(0)-center(0)) > 1.e-6 * face_center.norm())
+                  {
+                    if (std::abs((face_center-center).norm()-inner_radius) <
+                        std::abs((face_center-center).norm()-outer_radius))
+                      face->set_all_boundary_ids(0);
+                    else
+                      face->set_all_boundary_ids(1);
+                  }
+              }
+      }
+  }
+
+
+// Implementation for 3D only
+  template <>
+  void quarter_hyper_shell (Triangulation<3> &tria,
+                            const Point<3> &center,
+                            const double inner_radius,
+                            const double outer_radius,
+                            const unsigned int n,
+                            const bool colorize)
+  {
+    Assert ((inner_radius > 0) && (inner_radius < outer_radius),
+            ExcInvalidRadii ());
+    if (n == 0 || n == 3)
+      {
+        const double a = inner_radius*std::sqrt(2.0)/2e0;
+        const double b = outer_radius*std::sqrt(2.0)/2e0;
+        const double c = a*std::sqrt(3.0)/2e0;
+        const double d = b*std::sqrt(3.0)/2e0;
+        const double e = outer_radius/2e0;
+        const double h = inner_radius/2e0;
+
+        std::vector<Point<3> > vertices;
+
+        vertices.push_back (center+Point<3>( 0,  inner_radius, 0)); //0
+        vertices.push_back (center+Point<3>( a,  a, 0));                  //1
+        vertices.push_back (center+Point<3>( b,  b, 0));                  //2
+        vertices.push_back (center+Point<3>( 0, outer_radius, 0));        //3
+        vertices.push_back (center+Point<3>( 0, a , a));                  //4
+        vertices.push_back (center+Point<3>( c, c , h));                  //5
+        vertices.push_back (center+Point<3>( d, d , e));                  //6
+        vertices.push_back (center+Point<3>( 0, b , b));                  //7
+        vertices.push_back (center+Point<3>( inner_radius, 0 , 0));       //8
+        vertices.push_back (center+Point<3>( outer_radius, 0 , 0));       //9
+        vertices.push_back (center+Point<3>( a, 0 , a));  //10
+        vertices.push_back (center+Point<3>( b, 0 , b));  //11
+        vertices.push_back (center+Point<3>( 0, 0 , inner_radius));       //12
+        vertices.push_back (center+Point<3>( 0, 0 , outer_radius));       //13
+
+        const int cell_vertices[3][8] =
+        {
+          {0, 1, 3, 2, 4, 5, 7, 6},
+          {1, 8, 2, 9, 5, 10, 6, 11},
+          {4, 5, 7, 6, 12, 10, 13, 11},
+        };
+        std::vector<CellData<3> > cells(3);
+
+        for (unsigned int i=0; i<3; ++i)
+          {
+            for (unsigned int j=0; j<8; ++j)
+              cells[i].vertices[j] = cell_vertices[i][j];
+            cells[i].material_id = 0;
+          }
+
+        tria.create_triangulation ( vertices, cells, SubCellData());       // no boundary information
+      }
+    else
+      {
+        AssertThrow(false, ExcNotImplemented());
+      }
+
+    if (colorize)
+      colorize_quarter_hyper_shell(tria, center, inner_radius, outer_radius);
+  }
+
+
+// Implementation for 3D only
+  template <>
+  void cylinder_shell (Triangulation<3> &tria,
+                       const double      length,
+                       const double      inner_radius,
+                       const double      outer_radius,
+                       const unsigned int   n_radial_cells,
+                       const unsigned int   n_axial_cells)
+  {
+    Assert ((inner_radius > 0) && (inner_radius < outer_radius),
+            ExcInvalidRadii ());
+
+    const double pi = numbers::PI;
+
+    // determine the number of cells
+    // for the grid. if not provided by
+    // the user determine it such that
+    // the length of each cell on the
+    // median (in the middle between
+    // the two circles) is equal to its
+    // radial extent (which is the
+    // difference between the two
+    // radii)
+    const unsigned int N_r = (n_radial_cells == 0 ?
+                              static_cast<unsigned int>
+                              (std::ceil((2*pi* (outer_radius + inner_radius)/2) /
+                                         (outer_radius - inner_radius))) :
+                              n_radial_cells);
+    const unsigned int N_z = (n_axial_cells == 0 ?
+                              static_cast<unsigned int>
+                              (std::ceil (length /
+                                          (2*pi*(outer_radius + inner_radius)/2/N_r))) :
+                              n_axial_cells);
+
+    // set up N vertices on the
+    // outer and N vertices on
+    // the inner circle. the
+    // first N ones are on the
+    // outer one, and all are
+    // numbered counter-clockwise
+    std::vector<Point<2> > vertices_2d(2*N_r);
+    for (unsigned int i=0; i<N_r; ++i)
+      {
+        vertices_2d[i] = Point<2>( std::cos(2*pi * i/N_r),
+                                   std::sin(2*pi * i/N_r)) * outer_radius;
+        vertices_2d[i+N_r] = vertices_2d[i] * (inner_radius/outer_radius);
+      };
+
+    std::vector<Point<3> > vertices_3d;
+    vertices_3d.reserve (2*N_r*(N_z+1));
+    for (unsigned int j=0; j<=N_z; ++j)
+      for (unsigned int i=0; i<2*N_r; ++i)
+        {
+          const Point<3> v (vertices_2d[i][0],
+                            vertices_2d[i][1],
+                            j*length/N_z);
+          vertices_3d.push_back (v);
+        }
+
+    std::vector<CellData<3> > cells (N_r*N_z, CellData<3>());
+
+    for (unsigned int j=0; j<N_z; ++j)
+      for (unsigned int i=0; i<N_r; ++i)
+        {
+          cells[i+j*N_r].vertices[0] = i + (j+1)*2*N_r;
+          cells[i+j*N_r].vertices[1] = (i+1)%N_r + (j+1)*2*N_r;
+          cells[i+j*N_r].vertices[2] = i + j*2*N_r;
+          cells[i+j*N_r].vertices[3] = (i+1)%N_r + j*2*N_r;
+
+          cells[i+j*N_r].vertices[4] = N_r+i + (j+1)*2*N_r;
+          cells[i+j*N_r].vertices[5] = N_r+((i+1)%N_r) + (j+1)*2*N_r;
+          cells[i+j*N_r].vertices[6] = N_r+i + j*2*N_r;
+          cells[i+j*N_r].vertices[7] = N_r+((i+1)%N_r) + j*2*N_r;
+
+          cells[i+j*N_r].material_id = 0;
+        }
+
+    tria.create_triangulation (
+      vertices_3d, cells, SubCellData());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  merge_triangulations (const Triangulation<dim, spacedim> &triangulation_1,
+                        const Triangulation<dim, spacedim> &triangulation_2,
+                        Triangulation<dim, spacedim>       &result)
+  {
+    Assert (triangulation_1.n_levels() == 1,
+            ExcMessage ("The input triangulations must be coarse meshes."));
+    Assert (triangulation_2.n_levels() == 1,
+            ExcMessage ("The input triangulations must be coarse meshes."));
+
+    // get the union of the set of vertices
+    std::vector<Point<spacedim> > vertices = triangulation_1.get_vertices();
+    vertices.insert (vertices.end(),
+                     triangulation_2.get_vertices().begin(),
+                     triangulation_2.get_vertices().end());
+
+    // now form the union of the set of cells
+    std::vector<CellData<dim> > cells;
+    cells.reserve (triangulation_1.n_cells() + triangulation_2.n_cells());
+    for (typename Triangulation<dim,spacedim>::cell_iterator
+         cell = triangulation_1.begin(); cell != triangulation_1.end(); ++cell)
+      {
+        CellData<dim> this_cell;
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+          this_cell.vertices[v] = cell->vertex_index(v);
+        this_cell.material_id = cell->material_id();
+        cells.push_back (this_cell);
+      }
+
+    // now do the same for the other other mesh. note that we have to
+    // translate the vertex indices
+    for (typename Triangulation<dim,spacedim>::cell_iterator
+         cell = triangulation_2.begin(); cell != triangulation_2.end(); ++cell)
+      {
+        CellData<dim> this_cell;
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+          this_cell.vertices[v] = cell->vertex_index(v) + triangulation_1.n_vertices();
+        this_cell.material_id = cell->material_id();
+        cells.push_back (this_cell);
+      }
+
+    // throw out duplicated vertices from the two meshes, reorder vertices as
+    // necessary and create the triangulation
+    SubCellData subcell_data;
+    std::vector<unsigned int> considered_vertices;
+    GridTools::delete_duplicated_vertices (vertices, cells,
+                                           subcell_data,
+                                           considered_vertices);
+
+    // reorder the cells to ensure that they satisfy the convention for
+    // edge and face directions
+    GridReordering<dim, spacedim>::reorder_cells(cells, true);
+    result.clear ();
+    result.create_triangulation (vertices, cells, subcell_data);
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  create_union_triangulation (const Triangulation<dim, spacedim> &triangulation_1,
+                              const Triangulation<dim, spacedim> &triangulation_2,
+                              Triangulation<dim, spacedim>       &result)
+  {
+    Assert (GridTools::have_same_coarse_mesh (triangulation_1, triangulation_2),
+            ExcMessage ("The two input triangulations are not derived from "
+                        "the same coarse mesh as required."));
+    Assert ((dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>(&triangulation_1) == 0)
+            &&
+            (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>(&triangulation_2) == 0),
+            ExcMessage ("The source triangulations for this function must both "
+                        "be available entirely locally, and not be distributed "
+                        "triangulations."));
+
+    // first copy triangulation_1, and
+    // then do as many iterations as
+    // there are levels in
+    // triangulation_2 to refine
+    // additional cells. since this is
+    // the maximum number of
+    // refinements to get from the
+    // coarse grid to triangulation_2,
+    // it is clear that this is also
+    // the maximum number of
+    // refinements to get from any cell
+    // on triangulation_1 to
+    // triangulation_2
+    result.clear ();
+    result.copy_triangulation (triangulation_1);
+    for (unsigned int iteration=0; iteration<triangulation_2.n_levels();
+         ++iteration)
+      {
+        InterGridMap<Triangulation<dim, spacedim> > intergrid_map;
+        intergrid_map.make_mapping (result, triangulation_2);
+
+        bool any_cell_flagged = false;
+        for (typename Triangulation<dim, spacedim>::active_cell_iterator
+             result_cell = result.begin_active();
+             result_cell != result.end(); ++result_cell)
+          if (intergrid_map[result_cell]->has_children())
+            {
+              any_cell_flagged = true;
+              result_cell->set_refine_flag ();
+            }
+
+        if (any_cell_flagged == false)
+          break;
+        else
+          result.execute_coarsening_and_refinement();
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  create_triangulation_with_removed_cells (const Triangulation<dim, spacedim> &input_triangulation,
+                                           const std::set<typename Triangulation<dim, spacedim>::active_cell_iterator> &cells_to_remove,
+                                           Triangulation<dim, spacedim>       &result)
+  {
+    // simply copy the vertices; we will later strip those
+    // that turn out to be unused
+    std::vector<Point<spacedim> > vertices = input_triangulation.get_vertices();
+
+    // the loop through the cells and copy stuff, excluding
+    // the ones we are to remove
+    std::vector<CellData<dim> > cells;
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell = input_triangulation.begin_active(); cell != input_triangulation.end(); ++cell)
+      if (cells_to_remove.find(cell) == cells_to_remove.end())
+        {
+          Assert (static_cast<unsigned int>(cell->level()) == input_triangulation.n_levels()-1,
+                  ExcMessage ("Your input triangulation appears to have "
+                              "adaptively refined cells. This is not allowed. You can "
+                              "only call this function on a triangulation in which "
+                              "all cells are on the same refinement level."));
+
+          CellData<dim> this_cell;
+          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+            this_cell.vertices[v] = cell->vertex_index(v);
+          this_cell.material_id = cell->material_id();
+          cells.push_back (this_cell);
+        }
+
+    // throw out duplicated vertices from the two meshes, reorder vertices as
+    // necessary and create the triangulation
+    SubCellData subcell_data;
+    std::vector<unsigned int> considered_vertices;
+    GridTools::delete_duplicated_vertices (vertices, cells,
+                                           subcell_data,
+                                           considered_vertices);
+
+    // then clear the old triangulation and create the new one
+    result.clear ();
+    result.create_triangulation (vertices, cells, subcell_data);
+  }
+
+
+
+  void
+  extrude_triangulation(const Triangulation<2, 2> &input,
+                        const unsigned int n_slices,
+                        const double height,
+                        Triangulation<3,3> &result)
+  {
+    Assert (input.n_levels() == 1,
+            ExcMessage ("The input triangulation must be a coarse mesh, i.e., it must "
+                        "not have been refined."));
+    Assert(result.n_cells()==0,
+           ExcMessage("The output triangulation object needs to be empty."));
+    Assert(height>0,
+           ExcMessage("The given height for extrusion must be positive."));
+    Assert(n_slices>=2,
+           ExcMessage("The number of slices for extrusion must be at least 2."));
+
+    std::vector<Point<3> > points(n_slices*input.n_vertices());
+    std::vector<CellData<3> > cells;
+    cells.reserve((n_slices-1)*input.n_active_cells());
+
+    // copy the array of points as many times as there will be slices,
+    // one slice at a time
+    for (unsigned int slice=0; slice<n_slices; ++slice)
+      {
+        for (unsigned int i=0; i<input.n_vertices(); ++i)
+          {
+            const Point<2> &v = input.get_vertices()[i];
+            points[slice*input.n_vertices()+i](0) = v(0);
+            points[slice*input.n_vertices()+i](1) = v(1);
+            points[slice*input.n_vertices()+i](2) = height * slice / (n_slices-1);
+          }
+      }
+
+    // then create the cells of each of the slices, one stack at a
+    // time
+    for (Triangulation<2,2>::cell_iterator
+         cell = input.begin(); cell != input.end(); ++cell)
+      {
+        for (unsigned int slice=0; slice<n_slices-1; ++slice)
+          {
+            CellData<3> this_cell;
+            for (unsigned int v=0; v<GeometryInfo<2>::vertices_per_cell; ++v)
+              {
+                this_cell.vertices[v]
+                  = cell->vertex_index(v)+slice*input.n_vertices();
+                this_cell.vertices[v+GeometryInfo<2>::vertices_per_cell]
+                  = cell->vertex_index(v)+(slice+1)*input.n_vertices();
+              }
+
+            this_cell.material_id = cell->material_id();
+            cells.push_back(this_cell);
+          }
+      }
+
+    // next, create face data for all of the outer faces for which the
+    // boundary indicator will not be equal to zero (where we would
+    // explicitly set it to something that is already the default --
+    // no need to do that)
+    SubCellData s;
+    types::boundary_id max_boundary_id=0;
+    s.boundary_quads.reserve(input.n_active_lines()*(n_slices-1) + input.n_active_cells()*2);
+    for (Triangulation<2,2>::cell_iterator
+         cell = input.begin(); cell != input.end(); ++cell)
+      {
+        CellData<2> quad;
+        for (unsigned int f=0; f<4; ++f)
+          if (cell->at_boundary(f)
+              &&
+              (cell->face(f)->boundary_indicator() != 0))
+            {
+              quad.boundary_id = cell->face(f)->boundary_id();
+              max_boundary_id = std::max(max_boundary_id, quad.boundary_id);
+              for (unsigned int slice=0; slice<n_slices-1; ++slice)
+                {
+                  quad.vertices[0] = cell->face(f)->vertex_index(0)+slice*input.n_vertices();
+                  quad.vertices[1] = cell->face(f)->vertex_index(1)+slice*input.n_vertices();
+                  quad.vertices[2] = cell->face(f)->vertex_index(0)+(slice+1)*input.n_vertices();
+                  quad.vertices[3] = cell->face(f)->vertex_index(1)+(slice+1)*input.n_vertices();
+                  s.boundary_quads.push_back(quad);
+                }
+            }
+      }
+
+    // then mark the bottom and top boundaries of the extruded mesh
+    // with max_boundary_id+1 and max_boundary_id+2. check that this
+    // remains valid
+    Assert ((max_boundary_id != numbers::invalid_boundary_id) &&
+            (max_boundary_id+1 != numbers::invalid_boundary_id) &&
+            (max_boundary_id+2 != numbers::invalid_boundary_id),
+            ExcMessage ("The input triangulation to this function is using boundary "
+                        "indicators in a range that do not allow using "
+                        "max_boundary_id+1 and max_boundary_id+2 as boundary "
+                        "indicators for the bottom and top faces of the "
+                        "extruded triangulation."));
+    for (Triangulation<2,2>::cell_iterator
+         cell = input.begin(); cell != input.end(); ++cell)
+      {
+        CellData<2> quad;
+        quad.boundary_id = max_boundary_id + 1;
+        quad.vertices[0] = cell->vertex_index(0);
+        quad.vertices[1] = cell->vertex_index(1);
+        quad.vertices[2] = cell->vertex_index(2);
+        quad.vertices[3] = cell->vertex_index(3);
+        s.boundary_quads.push_back(quad);
+
+        quad.boundary_id = max_boundary_id + 2;
+        for (int i=0; i<4; ++i)
+          quad.vertices[i] += (n_slices-1)*input.n_vertices();
+        s.boundary_quads.push_back(quad);
+      }
+
+    // use all of this to finally create the extruded 3d triangulation
+    result.create_triangulation (points,
+                                 cells,
+                                 s);
+  }
+
+
+  template <>
+  void hyper_cube_with_cylindrical_hole (Triangulation<1> &,
+                                         const double,
+                                         const double,
+                                         const double,
+                                         const unsigned int,
+                                         bool)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void
+  hyper_cube_with_cylindrical_hole (Triangulation<2> &triangulation,
+                                    const double inner_radius,
+                                    const double outer_radius,
+                                    const double, // width,
+                                    const unsigned int, // width_repetition,
+                                    bool colorize)
+  {
+    const int dim = 2;
+
+    Assert(inner_radius < outer_radius,
+           ExcMessage("outer_radius has to be bigger than inner_radius."));
+
+    Point<dim> center;
+    // We create an hyper_shell in two dimensions, and then we modify it.
+    hyper_shell (triangulation,
+                 center, inner_radius, outer_radius,
+                 8);
+    Triangulation<dim>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+    std::vector<bool> treated_vertices(triangulation.n_vertices(), false);
+    for (; cell != endc; ++cell)
+      {
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->face(f)->at_boundary())
+            {
+              for (unsigned int v=0; v < GeometryInfo<dim>::vertices_per_face; ++v)
+                {
+                  unsigned int vv = cell->face(f)->vertex_index(v);
+                  if (treated_vertices[vv] == false)
+                    {
+                      treated_vertices[vv] = true;
+                      switch (vv)
+                        {
+                        case 1:
+                          cell->face(f)->vertex(v) = center+Point<dim>(outer_radius,outer_radius);
+                          break;
+                        case 3:
+                          cell->face(f)->vertex(v) = center+Point<dim>(-outer_radius,outer_radius);
+                          break;
+                        case 5:
+                          cell->face(f)->vertex(v) = center+Point<dim>(-outer_radius,-outer_radius);
+                          break;
+                        case 7:
+                          cell->face(f)->vertex(v) = center+Point<dim>(outer_radius,-outer_radius);
+                        default:
+                          break;
+                        }
+                    }
+                }
+            }
+      }
+    double eps = 1e-3 * outer_radius;
+    cell = triangulation.begin_active();
+    for (; cell != endc; ++cell)
+      {
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->face(f)->at_boundary())
+            {
+              double dx = cell->face(f)->center()(0) - center(0);
+              double dy = cell->face(f)->center()(1) - center(1);
+              if (colorize)
+                {
+                  if (std::abs(dx + outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(0);
+                  else if (std::abs(dx - outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(1);
+                  else if (std::abs(dy + outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(2);
+                  else if (std::abs(dy - outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(3);
+                  else
+                    cell->face(f)->set_boundary_id(4);
+                }
+              else
+                {
+                  double d = (cell->face(f)->center() - center).norm();
+                  if (d-inner_radius < 0)
+                    cell->face(f)->set_boundary_id(1);
+                  else
+                    cell->face(f)->set_boundary_id(0);
+                }
+            }
+      }
+  }
+
+
+
+  template <>
+  void hyper_cube_with_cylindrical_hole(Triangulation<3> &triangulation,
+                                        const double inner_radius,
+                                        const double outer_radius,
+                                        const double L,
+                                        const unsigned int Nz,
+                                        bool colorize)
+  {
+    const int dim = 3;
+
+    Assert(inner_radius < outer_radius,
+           ExcMessage("outer_radius has to be bigger than inner_radius."));
+    Assert(L > 0,
+           ExcMessage("Must give positive extension L"));
+    Assert(Nz >= 1, ExcLowerRange(1, Nz));
+
+    cylinder_shell (triangulation,
+                    L, inner_radius, outer_radius,
+                    8,
+                    Nz);
+
+    Triangulation<dim>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+    std::vector<bool> treated_vertices(triangulation.n_vertices(), false);
+    for (; cell != endc; ++cell)
+      {
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->face(f)->at_boundary())
+            {
+              for (unsigned int v=0; v < GeometryInfo<dim>::vertices_per_face; ++v)
+                {
+                  unsigned int vv = cell->face(f)->vertex_index(v);
+                  if (treated_vertices[vv] == false)
+                    {
+                      treated_vertices[vv] = true;
+                      for (unsigned int i=0; i<=Nz; ++i)
+                        {
+                          double d = ((double) i)*L/((double) Nz);
+                          switch (vv-i*16)
+                            {
+                            case 1:
+                              cell->face(f)->vertex(v) = Point<dim>(outer_radius,outer_radius,d);
+                              break;
+                            case 3:
+                              cell->face(f)->vertex(v) = Point<dim>(-outer_radius,outer_radius,d);
+                              break;
+                            case 5:
+                              cell->face(f)->vertex(v) = Point<dim>(-outer_radius,-outer_radius,d);
+                              break;
+                            case 7:
+                              cell->face(f)->vertex(v) = Point<dim>(outer_radius,-outer_radius,d);
+                              break;
+                            default:
+                              break;
+                            }
+                        }
+                    }
+                }
+            }
+      }
+    double eps = 1e-3 * outer_radius;
+    cell = triangulation.begin_active();
+    for (; cell != endc; ++cell)
+      {
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->face(f)->at_boundary())
+            {
+              double dx = cell->face(f)->center()(0);
+              double dy = cell->face(f)->center()(1);
+              double dz = cell->face(f)->center()(2);
+
+              if (colorize)
+                {
+                  if (std::abs(dx + outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(0);
+
+                  else if (std::abs(dx - outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(1);
+
+                  else if (std::abs(dy + outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(2);
+
+                  else if (std::abs(dy - outer_radius) < eps)
+                    cell->face(f)->set_boundary_id(3);
+
+                  else if (std::abs(dz) < eps)
+                    cell->face(f)->set_boundary_id(4);
+
+                  else if (std::abs(dz - L) < eps)
+                    cell->face(f)->set_boundary_id(5);
+
+                  else
+                    {
+                      cell->face(f)->set_boundary_id(6);
+                      for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_face; ++l)
+                        cell->face(f)->line(l)->set_boundary_id(6);
+                    }
+
+                }
+              else
+                {
+                  Point<dim> c = cell->face(f)->center();
+                  c(2) = 0;
+                  double d = c.norm();
+                  if (d-inner_radius < 0)
+                    {
+                      cell->face(f)->set_boundary_id(1);
+                      for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_face; ++l)
+                        cell->face(f)->line(l)->set_boundary_id(1);
+                    }
+                  else
+                    cell->face(f)->set_boundary_id(0);
+                }
+            }
+      }
+  }
+
+  template <int dim, int spacedim1, int spacedim2>
+  void flatten_triangulation(const Triangulation<dim, spacedim1> &in_tria,
+                             Triangulation<dim,spacedim2> &out_tria)
+  {
+    const parallel::distributed::Triangulation<dim, spacedim1> *pt =
+      dynamic_cast<const parallel::distributed::Triangulation<dim, spacedim1> *>(&in_tria);
+
+    (void)pt;
+    Assert (pt == NULL,
+            ExcMessage("Cannot use this function on parallel::distributed::Triangulation."));
+
+    std::vector<Point<spacedim2> > v;
+    std::vector<CellData<dim> > cells;
+    SubCellData subcelldata;
+
+    const unsigned int spacedim = std::min(spacedim1,spacedim2);
+    const std::vector<Point<spacedim1> > &in_vertices = in_tria.get_vertices();
+
+    v.resize(in_vertices.size());
+    for (unsigned int i=0; i<in_vertices.size(); ++i)
+      for (unsigned int d=0; d<spacedim; ++d)
+        v[i][d] = in_vertices[i][d];
+
+    cells.resize(in_tria.n_active_cells());
+    typename Triangulation<dim,spacedim1>::active_cell_iterator
+    cell = in_tria.begin_active(),
+    endc = in_tria.end();
+
+    for (unsigned int id=0; cell != endc; ++cell, ++id)
+      {
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          cells[id].vertices[i] = cell->vertex_index(i);
+        cells[id].material_id = cell->material_id();
+        cells[id].manifold_id = cell->manifold_id();
+      }
+
+    if (dim>1)
+      {
+        typename Triangulation<dim,spacedim1>::active_face_iterator
+        face = in_tria.begin_active_face(),
+        endf = in_tria.end_face();
+
+        // Face counter for both dim == 2 and dim == 3
+        unsigned int f=0;
+        switch (dim)
+          {
+          case 2:
+          {
+            subcelldata.boundary_lines.resize(in_tria.n_active_faces());
+            for (; face != endf; ++face)
+              if (face->at_boundary())
+                {
+                  for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_face; ++i)
+                    subcelldata.boundary_lines[f].vertices[i] = face->vertex_index(i);
+                  subcelldata.boundary_lines[f].boundary_id = face->boundary_id();
+                  subcelldata.boundary_lines[f].manifold_id = face->manifold_id();
+                  ++f;
+                }
+            subcelldata.boundary_lines.resize(f);
+          }
+          break;
+          case 3:
+          {
+            subcelldata.boundary_quads.resize(in_tria.n_active_faces());
+            for (; face != endf; ++face)
+              if (face->at_boundary())
+                {
+                  for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_face; ++i)
+                    subcelldata.boundary_quads[f].vertices[i] = face->vertex_index(i);
+                  subcelldata.boundary_quads[f].boundary_id = face->boundary_id();
+                  subcelldata.boundary_quads[f].manifold_id = face->manifold_id();
+                  ++f;
+                }
+            subcelldata.boundary_quads.resize(f);
+          }
+          break;
+          default:
+            Assert(false, ExcInternalError());
+          }
+      }
+    out_tria.create_triangulation(v, cells, subcelldata);
+  }
+
+
+
+  template <template <int,int> class MeshType, int dim, int spacedim>
+#ifndef _MSC_VER
+  std::map<typename MeshType<dim-1,spacedim>::cell_iterator,
+      typename MeshType<dim,spacedim>::face_iterator>
+#else
+  typename ExtractBoundaryMesh<MeshType,dim,spacedim>::return_type
+#endif
+      extract_boundary_mesh (const MeshType<dim,spacedim>       &volume_mesh,
+                             MeshType<dim-1,spacedim>           &surface_mesh,
+                             const std::set<types::boundary_id> &boundary_ids)
+  {
+// This function works using the following assumption:
+//    Triangulation::create_triangulation(...) will create cells that preserve
+//    the order of cells passed in using the CellData argument; also,
+//    that it will not reorder the vertices.
+
+    std::map<typename MeshType<dim-1,spacedim>::cell_iterator,
+        typename MeshType<dim,spacedim>::face_iterator>
+        surface_to_volume_mapping;
+
+    const unsigned int boundary_dim = dim-1; //dimension of the boundary mesh
+
+    // First create surface mesh and mapping
+    // from only level(0) cells of volume_mesh
+    std::vector<typename MeshType<dim,spacedim>::face_iterator>
+    mapping;  // temporary map for level==0
+
+
+    std::vector< bool > touched (volume_mesh.get_triangulation().n_vertices(), false);
+    std::vector< CellData< boundary_dim > > cells;
+    SubCellData                             subcell_data;
+    std::vector< Point<spacedim> >          vertices;
+
+    std::map<unsigned int,unsigned int> map_vert_index; //volume vertex indices to surf ones
+
+    for (typename MeshType<dim,spacedim>::cell_iterator
+         cell = volume_mesh.begin(0);
+         cell != volume_mesh.end(0);
+         ++cell)
+      for (unsigned int i=0; i < GeometryInfo<dim>::faces_per_cell; ++i)
+        {
+          const typename MeshType<dim,spacedim>::face_iterator
+          face = cell->face(i);
+
+          if ( face->at_boundary()
+               &&
+               (boundary_ids.empty() ||
+                ( boundary_ids.find(face->boundary_id()) != boundary_ids.end())) )
+            {
+              CellData< boundary_dim > c_data;
+
+              for (unsigned int j=0;
+                   j<GeometryInfo<boundary_dim>::vertices_per_cell; ++j)
+                {
+                  const unsigned int v_index = face->vertex_index(j);
+
+                  if ( !touched[v_index] )
+                    {
+                      vertices.push_back(face->vertex(j));
+                      map_vert_index[v_index] = vertices.size() - 1;
+                      touched[v_index] = true;
+                    }
+
+                  c_data.vertices[j] = map_vert_index[v_index];
+                  c_data.material_id = static_cast<types::material_id>(face->boundary_id());
+                }
+
+              // if we start from a 3d mesh, then we have copied the
+              // vertex information in the same order in which they
+              // appear in the face; however, this means that we
+              // impart a coordinate system that is right-handed when
+              // looked at *from the outside* of the cell if the
+              // current face has index 0, 2, 4 within a 3d cell, but
+              // right-handed when looked at *from the inside* for the
+              // other faces. we fix this by flipping opposite
+              // vertices if we are on a face 1, 3, 5
+              if (dim == 3)
+                if (i % 2 == 1)
+                  std::swap (c_data.vertices[1], c_data.vertices[2]);
+
+              // in 3d, we also need to make sure we copy the manifold
+              // indicators from the edges of the volume mesh to the
+              // edges of the surface mesh
+              //
+              // one might think that we we can also prescribe
+              // boundary indicators for edges, but this is only
+              // possible for edges that aren't just on the boundary
+              // of the domain (all of the edges we consider are!) but
+              // that would actually end up at the boundary of the
+              // surface mesh. there is no easy way to check this, so
+              // we simply don't do it and instead set it to an
+              // invalid value that makes sure
+              // Triangulation::create_triangulation doesn't copy it
+              if (dim == 3)
+                for (unsigned int e=0; e<4; ++e)
+                  {
+                    // see if we already saw this edge from a
+                    // neighboring face, either in this or the reverse
+                    // orientation. if so, skip it.
+                    {
+                      bool edge_found = false;
+                      for (unsigned int i=0; i<subcell_data.boundary_lines.size(); ++i)
+                        if (((subcell_data.boundary_lines[i].vertices[0]
+                              == map_vert_index[face->line(e)->vertex_index(0)])
+                             &&
+                             (subcell_data.boundary_lines[i].vertices[1]
+                              == map_vert_index[face->line(e)->vertex_index(1)]))
+                            ||
+                            ((subcell_data.boundary_lines[i].vertices[0]
+                              == map_vert_index[face->line(e)->vertex_index(1)])
+                             &&
+                             (subcell_data.boundary_lines[i].vertices[1]
+                              == map_vert_index[face->line(e)->vertex_index(0)])))
+                          {
+                            edge_found = true;
+                            break;
+                          }
+                      if (edge_found == true)
+                        continue;   // try next edge of current face
+                    }
+
+                    CellData<1> edge;
+                    edge.vertices[0] = map_vert_index[face->line(e)->vertex_index(0)];
+                    edge.vertices[1] = map_vert_index[face->line(e)->vertex_index(1)];
+                    edge.boundary_id = numbers::internal_face_boundary_id;
+                    edge.manifold_id = face->line(e)->manifold_id();
+
+                    subcell_data.boundary_lines.push_back (edge);
+                  }
+
+
+              cells.push_back(c_data);
+              mapping.push_back(face);
+            }
+        }
+
+    // create level 0 surface triangulation
+    Assert (cells.size() > 0, ExcMessage ("No boundary faces selected"));
+    const_cast<Triangulation<dim-1,spacedim>&>(surface_mesh.get_triangulation())
+    .create_triangulation (vertices, cells, subcell_data);
+
+    // Make the actual mapping
+    for (typename MeshType<dim-1,spacedim>::active_cell_iterator
+         cell = surface_mesh.begin(0);
+         cell!=surface_mesh.end(0); ++cell)
+      surface_to_volume_mapping[cell] = mapping.at(cell->index());
+
+    do
+      {
+        bool changed = false;
+
+        for (typename MeshType<dim-1,spacedim>::active_cell_iterator
+             cell = surface_mesh.begin_active(); cell!=surface_mesh.end(); ++cell)
+          if (surface_to_volume_mapping[cell]->has_children() == true )
+            {
+              cell->set_refine_flag ();
+              changed = true;
+            }
+
+        if (changed)
+          {
+            const_cast<Triangulation<dim-1,spacedim>&>(surface_mesh.get_triangulation())
+            .execute_coarsening_and_refinement();
+
+            for (typename MeshType<dim-1,spacedim>::cell_iterator
+                 surface_cell = surface_mesh.begin(); surface_cell!=surface_mesh.end(); ++surface_cell)
+              for (unsigned int c=0; c<surface_cell->n_children(); c++)
+                if (surface_to_volume_mapping.find(surface_cell->child(c)) == surface_to_volume_mapping.end())
+                  surface_to_volume_mapping[surface_cell->child(c)]
+                    = surface_to_volume_mapping[surface_cell]->child(c);
+          }
+        else
+          break;
+      }
+    while (true);
+
+    return surface_to_volume_mapping;
+  }
+
+}
+
+// explicit instantiations
+namespace GridGenerator
+{
+
+  template void
+  hyper_sphere< 1 ,  2 > (Triangulation< 1 ,   2> &,
+                          const Point<2>   &center,
+                          const double        radius);
+  template void
+  hyper_sphere< 2 ,  3 > (Triangulation< 2 ,   3> &,
+                          const Point<3>   &center,
+                          const double        radius);
+}
+#include "grid_generator.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/grid_generator.inst.in b/source/grid/grid_generator.inst.in
new file mode 100644
index 0000000..77b852f
--- /dev/null
+++ b/source/grid/grid_generator.inst.in
@@ -0,0 +1,175 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+namespace GridGenerator 
+\{
+#if deal_II_dimension <= deal_II_space_dimension
+    template void
+      hyper_rectangle<deal_II_dimension, deal_II_space_dimension> (
+	Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+  const Point<deal_II_dimension>&, const Point<deal_II_dimension>&,
+	const bool);
+    
+    template void
+      hyper_cube<deal_II_dimension, deal_II_space_dimension> (
+	Triangulation<deal_II_dimension, deal_II_space_dimension> &, const double, const double, const bool);
+    
+    template void
+      subdivided_hyper_cube<deal_II_dimension, deal_II_space_dimension> (
+      Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+      const unsigned int, const double, const double);
+
+
+  template void
+    subdivided_hyper_rectangle<deal_II_dimension, deal_II_space_dimension>
+    (Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+     const std::vector<unsigned int>&,
+     const Point<deal_II_dimension>&,
+     const Point<deal_II_dimension>&,
+     const bool);
+
+      template void
+      subdivided_parallelepiped<deal_II_dimension, deal_II_space_dimension>
+      (Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+          const Point<deal_II_space_dimension> &,
+          const std_cxx11::array<Tensor<1,deal_II_space_dimension>,deal_II_dimension> &,
+                                 const std::vector<unsigned int> &,
+                                 const bool colorize);
+
+  template void
+    hyper_cross<deal_II_dimension, deal_II_space_dimension>
+    (Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+     const std::vector<unsigned int> &, const bool);
+  
+  template void
+    cheese<deal_II_dimension, deal_II_space_dimension>
+    (Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+     const std::vector<unsigned int> &);
+  
+    template
+      void
+      merge_triangulations
+      (const Triangulation<deal_II_dimension,deal_II_space_dimension> &triangulation_1,
+       const Triangulation<deal_II_dimension,deal_II_space_dimension> &triangulation_2,
+       Triangulation<deal_II_dimension,deal_II_space_dimension>       &result);
+      
+    template
+      void
+      create_union_triangulation
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension> &triangulation_1,
+       const Triangulation<deal_II_dimension, deal_II_space_dimension> &triangulation_2,
+       Triangulation<deal_II_dimension, deal_II_space_dimension>       &result);
+
+    template
+      void
+      create_triangulation_with_removed_cells (const Triangulation<deal_II_dimension, deal_II_space_dimension> &input_triangulation,
+                                               const std::set<Triangulation<deal_II_dimension, deal_II_space_dimension>::active_cell_iterator> &cells_to_remove,
+                                               Triangulation<deal_II_dimension, deal_II_space_dimension>       &result);
+      
+#endif
+\}  
+  }
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; Container : TRIANGULATION_AND_DOFHANDLER_TEMPLATES)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace GridGenerator \{
+
+#if deal_II_dimension != 1
+      template
+#ifndef _MSC_VER
+      std::map<Container<deal_II_dimension-1,deal_II_space_dimension>::cell_iterator,
+                   Container<deal_II_dimension,deal_II_space_dimension>::face_iterator>
+#else
+      ExtractBoundaryMesh<Container,deal_II_dimension,deal_II_space_dimension>::return_type
+#endif
+      extract_boundary_mesh (const Container<deal_II_dimension, deal_II_space_dimension> &mesh,
+                             Container<deal_II_dimension-1,deal_II_space_dimension>  &boundary_mesh,
+                             const std::set<types::boundary_id> &boundary_ids);
+#endif
+      \}
+#endif
+
+  }
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+namespace GridGenerator \{  
+  
+  template
+    void
+    subdivided_hyper_rectangle(
+      Triangulation<deal_II_dimension>              &,
+      const std::vector<std::vector<double> > &,
+      const Point<deal_II_dimension>                &,
+      const Point<deal_II_dimension>                &,
+      const bool                       );
+  
+    template void
+      simplex<deal_II_dimension> (
+	Triangulation<deal_II_dimension, deal_II_dimension> &,
+	const std::vector<Point<deal_II_dimension> >&);
+    
+    template void
+      parallelepiped<deal_II_dimension> (
+	Triangulation<deal_II_dimension>&,
+	const Point<deal_II_dimension> (&) [deal_II_dimension], 
+	const bool);  
+  
+    template void
+      subdivided_parallelepiped<deal_II_dimension> (
+	Triangulation<deal_II_dimension>&,
+	const unsigned int,
+	const Point<deal_II_dimension> (&) [deal_II_dimension], 
+	const bool);  
+
+  #ifndef _MSC_VER
+    template void
+      subdivided_parallelepiped<deal_II_dimension> (
+	Triangulation<deal_II_dimension>&,
+	const unsigned int (&) [deal_II_dimension], 
+	const Point<deal_II_dimension> (&) [deal_II_dimension], 
+	const bool);
+  #else
+  template void
+      subdivided_parallelepiped<deal_II_dimension> (
+	Triangulation<deal_II_dimension>&,
+	const unsigned int*, 
+	const Point<deal_II_dimension> (&) [deal_II_dimension], 
+	const bool);
+  #endif
+
+  
+\}  
+ }
+
+
+for (deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS; deal_II_space_dimension_2 : SPACE_DIMENSIONS)
+{
+namespace GridGenerator \{
+#if (deal_II_dimension <= deal_II_space_dimension) && (deal_II_dimension <= deal_II_space_dimension_2)
+     template 
+     void 
+     flatten_triangulation<>(const Triangulation<deal_II_dimension,deal_II_space_dimension> &,
+                             Triangulation<deal_II_dimension,deal_II_space_dimension_2>&);
+#endif
+\}    
+}
+
diff --git a/source/grid/grid_in.cc b/source/grid/grid_in.cc
new file mode 100644
index 0000000..478895e
--- /dev/null
+++ b/source/grid/grid_in.cc
@@ -0,0 +1,3432 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/path_search.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/exceptions.h>
+
+#include <deal.II/grid/grid_in.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_reordering.h>
+#include <deal.II/grid/grid_tools.h>
+
+#include <map>
+#include <algorithm>
+#include <fstream>
+#include <functional>
+#include <cctype>
+
+
+#ifdef DEAL_II_WITH_NETCDF
+#include <netcdfcpp.h>
+#endif
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  /**
+   * In 1d, boundary indicators are associated with vertices, but this is not
+   * currently passed through the SubcellData structure. This function sets
+   * boundary indicators on vertices after the triangulation has already been
+   * created.
+   *
+   * TODO: Fix this properly via SubcellData
+   */
+  template <int spacedim>
+  void
+  assign_1d_boundary_indicators (const std::map<unsigned int, types::boundary_id> &boundary_ids,
+                                 Triangulation<1,spacedim>                        &triangulation)
+  {
+    if (boundary_ids.size() > 0)
+      for (typename Triangulation<1,spacedim>::active_cell_iterator
+           cell = triangulation.begin_active();
+           cell != triangulation.end(); ++cell)
+        for (unsigned int f=0; f<GeometryInfo<1>::faces_per_cell; ++f)
+          if (boundary_ids.find(cell->vertex_index(f)) != boundary_ids.end())
+            {
+              AssertThrow (cell->at_boundary(f),
+                           ExcMessage ("You are trying to prescribe boundary ids on the face "
+                                       "of a 1d cell (i.e., on a vertex), but this face is not actually at "
+                                       "the boundary of the mesh. This is not allowed."));
+              cell->face(f)->set_boundary_id (boundary_ids.find(cell->vertex_index(f))->second);
+            }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  assign_1d_boundary_indicators (const std::map<unsigned int, types::boundary_id> &,
+                                 Triangulation<dim,spacedim> &)
+  {
+    // we shouldn't get here since boundary ids are not assigned to
+    // vertices except in 1d
+    Assert (dim != 1, ExcInternalError());
+  }
+}
+
+template <int dim, int spacedim>
+GridIn<dim, spacedim>::GridIn () :
+  tria(0, typeid(*this).name()), default_format(ucd)
+{}
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::attach_triangulation (Triangulation<dim, spacedim> &t)
+{
+  tria = &t;
+}
+
+
+
+template<int dim, int spacedim>
+void GridIn<dim, spacedim>::read_vtk(std::istream &in)
+{
+  Assert((dim == 2)||(dim == 3), ExcNotImplemented());
+  std::string line;
+
+  // verify that the first, third and fourth lines match
+  // expectations. the second line of the file may essentially be
+  // anything the author of the file chose to identify what's in
+  // there, so we just ensure that we can read it
+  {
+    std::string text[4];
+    text[0] = "# vtk DataFile Version 3.0";
+    text[1] = "****";
+    text[2] = "ASCII";
+    text[3] = "DATASET UNSTRUCTURED_GRID";
+
+    for (unsigned int i = 0; i < 4; ++i)
+      {
+        getline(in,line);
+        if (i != 1)
+          AssertThrow (line.compare(text[i]) == 0,
+                       ExcMessage(std::string("While reading VTK file, failed to find a header line with text <") +
+                                  text[i] + ">"));
+      }
+  }
+
+  ///////////////////Declaring storage and mappings//////////////////
+
+  std::vector< Point<spacedim> > vertices;//vector of vertices
+  std::vector< CellData<dim> > cells;//vector of cells
+  SubCellData subcelldata;//subcell data that includes bounds and material IDs.
+  std::map<int, int> vertex_indices; // # vert in unv (key) ---> # vert in deal.II (value)
+  std::map<int, int> cell_indices; // # cell in unv (key) ---> # cell in deal.II (value)
+  std::map<int, int> quad_indices; // # quad in unv (key) ---> # quad in deal.II (value)
+  std::map<int, int> line_indices; // # line in unv(key) ---> # line in deal.II (value)
+
+  unsigned int no_vertices, no_quads=0, no_lines=0;
+
+  std::string keyword;
+
+  in >> keyword;
+
+  //////////////////Processing the POINTS section///////////////
+
+  if (keyword == "POINTS")
+    {
+      in>>no_vertices;// taking the no. of vertices
+      in.ignore(256, '\n');//ignoring the number beside the total no. of points.
+
+      for (unsigned int count = 0; count < no_vertices; count++) //loop to read three values till the no . vertices is satisfied
+        {
+          // VTK format always specifies vertex coordinates with 3 components
+          Point<3> x;
+          in >> x(0) >> x(1) >> x(2);
+
+          vertices.push_back(Point<spacedim>());
+          for (unsigned int d=0; d<spacedim; ++d)
+            vertices.back()(d) = x(d);
+
+          vertex_indices[count] = count;
+        }
+    }
+
+  else
+    AssertThrow (false,
+                 ExcMessage ("While reading VTK file, failed to find POINTS section"));
+
+
+  //////////////////ignoring space between points and cells sections////////////////////
+  std::string checkline;
+  int no;
+  in.ignore(256, '\n');//this move pointer to the next line ignoring unwanted no.
+  no = in.tellg();
+  getline(in,checkline);
+  if (checkline.compare("") != 0)
+    {
+      in.seekg(no);
+    }
+
+  in >> keyword;
+
+  unsigned int total_cells, no_cells = 0, type;// declaring counters, refer to the order of declaring variables for an idea of what is what!
+
+  ///////////////////Processing the CELLS section that contains cells(cells) and bound_quads(subcelldata)///////////////////////
+
+  if (keyword == "CELLS")
+    {
+      in>>total_cells;
+      in.ignore(256,'\n');
+
+      if (dim == 3)
+        {
+          for (unsigned int count = 0; count < total_cells; count++)
+            {
+              in>>type;
+
+              if (type == 8)
+                {
+
+                  cells.push_back(CellData<dim>());
+
+                  for (unsigned int j = 0; j < type; j++) //loop to feed data
+                    in >> cells.back().vertices[j];
+
+
+                  cells.back().material_id = 0;
+
+                  for (unsigned int j = 0; j < type; j++) //loop to feed the data of the vertices to the cell
+                    {
+                      cells.back().vertices[j] = vertex_indices[cells.back().vertices[j]];
+                    }
+                  cell_indices[count] = count;
+                  no_cells++;
+                }
+
+              else if ( type == 4)
+                {
+
+                  subcelldata.boundary_quads.push_back(CellData<2>());
+
+                  for (unsigned int j = 0; j < type; j++) //loop to feed the data to the boundary
+                    {
+                      in >> subcelldata.boundary_quads.back().vertices[j];
+                    }
+                  subcelldata.boundary_quads.back().material_id = 0;
+                  for (unsigned int j = 0; j < type; j++)
+                    {
+                      subcelldata.boundary_quads.back().vertices[j] = vertex_indices[subcelldata.boundary_quads.back().vertices[j]];
+                    }
+                  quad_indices[no_quads] = no_quads + 1;
+                  no_quads++;
+                }
+
+              else
+                AssertThrow (false,
+                             ExcMessage ("While reading VTK file, unknown file type encountered"));
+            }
+        }
+
+      else if (dim == 2)
+        {
+          for (unsigned int count = 0; count < total_cells; count++)
+            {
+              in>>type;
+
+              if (type == 4)
+                {
+                  cells.push_back(CellData<dim>());
+
+                  for (unsigned int j = 0; j < type; j++) //loop to feed data
+                    in >> cells.back().vertices[j];
+
+                  cells.back().material_id = 0;
+
+                  for (unsigned int j = 0; j < type; j++) //loop to feed the data of the vertices to the cell
+                    {
+                      cells.back().vertices[j] = vertex_indices[cells.back().vertices[j]];
+                    }
+                  cell_indices[count] = count;
+                  no_cells++;
+                }
+
+              else if (type == 2)
+                {
+                  //If this is encountered, the pointer comes out of the loop
+                  //and starts processing boundaries.
+                  subcelldata.boundary_lines.push_back(CellData<1>());
+
+                  for (unsigned int j = 0; j < type; j++) //loop to feed the data to the boundary
+                    {
+                      in >> subcelldata.boundary_lines.back().vertices[j];
+                    }
+                  subcelldata.boundary_lines.back().material_id = 0;
+                  for (unsigned int j = 0; j < type; j++)
+                    {
+                      subcelldata.boundary_lines.back().vertices[j] = vertex_indices[subcelldata.boundary_lines.back().vertices[j]];
+                    }
+                  line_indices[no_lines] = no_lines + 1;
+                  no_lines++;
+                }
+
+              else
+                AssertThrow (false,
+                             ExcMessage ("While reading VTK file, unknown file type encountered"));
+            }
+        }
+      else
+        AssertThrow (false,
+                     ExcMessage ("While reading VTK file, failed to find CELLS section"));
+
+      /////////////////////Processing the CELL_TYPES section////////////////////////
+
+      in >> keyword;
+
+      if (keyword == "CELL_TYPES")//Entering the cell_types section and ignoring data.
+        {
+          in.ignore(256, '\n');
+
+          while (!in.eof())
+            {
+              in>>keyword;
+              if (keyword != "12" && keyword != "9")
+                {
+                  break;
+                }
+            }
+        }
+
+      ////////////////////////Processing the CELL_DATA section/////////////////////////////
+
+      if (keyword == "CELL_DATA")
+        {
+          int no_ids;
+          in>>no_ids;
+
+          std::string linenew;
+          std::string textnew[2];
+          textnew[0] = "SCALARS MaterialID double";
+          textnew[1] = "LOOKUP_TABLE default";
+
+          in.ignore(256, '\n');
+
+          for (unsigned int i = 0; i < 2; i++)
+            {
+              getline(in, linenew);
+              if (i == 0)
+                if (linenew.size() > textnew[0].size())
+                  linenew.resize(textnew[0].size());
+
+              AssertThrow (linenew.compare(textnew[i]) == 0,
+                           ExcMessage (std::string("While reading VTK file, failed to find <") +
+                                       textnew[i] + "> section"));
+            }
+
+          for (unsigned int i = 0; i < no_cells; i++) //assigning IDs to cells.
+            {
+              int id;
+              in>>id;
+              cells[cell_indices[i]].material_id = id;
+            }
+
+          if (dim == 3)
+            {
+              for (unsigned int i = 0; i < no_quads; i++) //assigning IDs to bounds.
+                {
+                  int id;
+                  in>>id;
+                  subcelldata.boundary_quads[quad_indices[i]].material_id = id;
+                }
+            }
+          else if (dim == 2)
+            {
+              for (unsigned int i = 0; i < no_lines; i++) //assigning IDs to bounds.
+                {
+                  int id;
+                  in>>id;
+                  subcelldata.boundary_lines[line_indices[i]].material_id = id;
+                }
+            }
+        }
+
+      Assert(subcelldata.check_consistency(dim), ExcInternalError());
+
+      GridTools::delete_unused_vertices(vertices,
+                                        cells,
+                                        subcelldata);
+
+      if (dim == spacedim)
+        GridReordering<dim, spacedim>::invert_all_cells_of_negative_grid(vertices,
+            cells);
+
+      GridReordering<dim, spacedim>::reorder_cells(cells);
+      tria->create_triangulation_compatibility(vertices,
+                                               cells,
+                                               subcelldata);
+
+      return;
+    }
+  else
+    AssertThrow (false,
+                 ExcMessage ("While reading VTK file, failed to find CELLS section"));
+}
+
+
+
+template<int dim, int spacedim>
+void GridIn<dim, spacedim>::read_unv(std::istream &in)
+{
+  Assert(tria != 0, ExcNoTriangulationSelected());
+  Assert((dim == 2)||(dim == 3), ExcNotImplemented());
+
+  AssertThrow(in, ExcIO());
+  skip_comment_lines(in, '#'); // skip comments (if any) at beginning of file
+
+  int tmp;
+
+  AssertThrow(in, ExcIO());
+  in >> tmp;
+  AssertThrow(in, ExcIO());
+  in >> tmp;
+
+  AssertThrow(tmp == 2411, ExcUnknownSectionType(tmp)); // section 2411 describes vertices http://www.sdrl.uc.edu/universal-file-formats-for-modal-analysis-testing-1/file-format-storehouse/unv_2411.htm/
+
+  std::vector< Point<spacedim> > vertices; // vector of vertex coordinates
+  std::map<int, int> vertex_indices; // # vert in unv (key) ---> # vert in deal.II (value)
+
+  int no_vertex = 0; // deal.II
+
+  while (tmp != -1) // we do until reach end of 2411
+    {
+      int no; // unv
+      int dummy;
+      double x[3];
+
+      AssertThrow(in, ExcIO());
+      in >> no;
+
+      tmp = no;
+      if (tmp == -1)
+        break;
+
+      in >> dummy >> dummy >> dummy;
+
+      AssertThrow(in, ExcIO());
+      in >> x[0] >> x[1] >> x[2];
+
+      vertices.push_back(Point<spacedim>());
+
+      for (unsigned int d = 0; d < spacedim; d++)
+        vertices.back()(d) = x[d];
+
+      vertex_indices[no] = no_vertex;
+
+      no_vertex++;
+    }
+
+  AssertThrow(in, ExcIO());
+  in >> tmp;
+  AssertThrow(in, ExcIO());
+  in >> tmp;
+
+  AssertThrow(tmp == 2412, ExcUnknownSectionType(tmp)); // section 2412 describes elements http://www.sdrl.uc.edu/universal-file-formats-for-modal-analysis-testing-1/file-format-storehouse/unv_2412.htm/
+
+  std::vector< CellData<dim> > cells; // vector of cells
+  SubCellData subcelldata;
+
+  std::map<int, int> cell_indices; // # cell in unv (key) ---> # cell in deal.II (value)
+  std::map<int, int> line_indices; // # line in unv (key) ---> # line in deal.II (value)
+  std::map<int, int> quad_indices; // # quad in unv (key) ---> # quad in deal.II (value)
+
+  int no_cell = 0; // deal.II
+  int no_line = 0; // deal.II
+  int no_quad = 0; // deal.II
+
+  while (tmp != -1) // we do until reach end of 2412
+    {
+      int no; // unv
+      int type;
+      int dummy;
+
+      AssertThrow(in, ExcIO());
+      in >> no;
+
+      tmp = no;
+      if (tmp == -1)
+        break;
+
+      in >> type >> dummy >> dummy >> dummy >> dummy;
+
+      AssertThrow((type == 11)||(type == 44)||(type == 94)||(type == 115), ExcUnknownElementType(type));
+
+      if ( (((type == 44)||(type == 94))&&(dim == 2)) || ((type == 115)&&(dim == 3)) ) // cell
+        {
+          cells.push_back(CellData<dim>());
+
+          AssertThrow(in, ExcIO());
+          for (unsigned int v = 0; v < GeometryInfo<dim>::vertices_per_cell; v++)
+            in >> cells.back().vertices[v];
+
+          cells.back().material_id = 0;
+
+          for (unsigned int v = 0; v < GeometryInfo<dim>::vertices_per_cell; v++)
+            cells.back().vertices[v] = vertex_indices[cells.back().vertices[v]];
+
+          cell_indices[no] = no_cell;
+
+          no_cell++;
+        }
+      else if ( ((type == 11)&&(dim == 2)) || ((type == 11)&&(dim == 3)) ) // boundary line
+        {
+          AssertThrow(in, ExcIO());
+          in >> dummy >> dummy >> dummy;
+
+          subcelldata.boundary_lines.push_back(CellData<1>());
+
+          AssertThrow(in, ExcIO());
+          for (unsigned int v = 0; v < 2; v++)
+            in >> subcelldata.boundary_lines.back().vertices[v];
+
+          subcelldata.boundary_lines.back().material_id = 0;
+
+          for (unsigned int v = 0; v < 2; v++)
+            subcelldata.boundary_lines.back().vertices[v] = vertex_indices[subcelldata.boundary_lines.back().vertices[v]];
+
+          line_indices[no] = no_line;
+
+          no_line++;
+        }
+      else if ( ((type == 44)||(type == 94)) && (dim == 3) ) // boundary quad
+        {
+          subcelldata.boundary_quads.push_back(CellData<2>());
+
+          AssertThrow(in, ExcIO());
+          for (unsigned int v = 0; v < 4; v++)
+            in >> subcelldata.boundary_quads.back().vertices[v];
+
+          subcelldata.boundary_quads.back().material_id = 0;
+
+          for (unsigned int v = 0; v < 4; v++)
+            subcelldata.boundary_quads.back().vertices[v] = vertex_indices[subcelldata.boundary_quads.back().vertices[v]];
+
+          quad_indices[no] = no_quad;
+
+          no_quad++;
+        }
+      else
+        AssertThrow (false,
+                     ExcMessage ("Unknown element label <"
+                                 + Utilities::int_to_string(type)
+                                 + "> when running in dim="
+                                 + Utilities::int_to_string(dim)));
+    }
+
+// note that so far all materials and bcs are explicitly set to 0
+// if we do not need more info on materials and bcs - this is end of file
+// if we do - section 2467 or 2477 comes
+
+  in >> tmp; // tmp can be either -1 or end-of-file
+
+  if ( !in.eof() )
+    {
+      AssertThrow(in, ExcIO());
+      in >> tmp;
+
+      AssertThrow((tmp == 2467)||(tmp == 2477), ExcUnknownSectionType(tmp)); // section 2467 (2477) describes (materials - first and bcs - second) or (bcs - first and materials - second) - sequence depends on which group is created first
+      // http://www.sdrl.uc.edu/universal-file-formats-for-modal-analysis-testing-1/file-format-storehouse/unv_2467.htm/
+
+      while (tmp != -1) // we do until reach end of 2467 or 2477
+        {
+          int n_entities; // number of entities in group
+          int id;         // id is either material or bc
+          int no;         // unv
+          int dummy;
+
+          AssertThrow(in, ExcIO());
+          in >> dummy;
+
+          tmp = dummy;
+          if (tmp == -1)
+            break;
+
+          in >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> n_entities;
+
+          AssertThrow(in, ExcIO());
+          in >> id;
+
+          const unsigned int n_lines = (n_entities%2 == 0)?(n_entities/2):((n_entities+1)/2);
+
+          for (unsigned int line = 0; line < n_lines; line++)
+            {
+              unsigned int n_fragments;
+
+              if (line == n_lines-1)
+                n_fragments = (n_entities%2 == 0)?(2):(1);
+              else
+                n_fragments = 2;
+
+              for (unsigned int no_fragment = 0; no_fragment < n_fragments; no_fragment++)
+                {
+                  AssertThrow(in, ExcIO());
+                  in >> dummy >> no >> dummy >> dummy;
+
+                  if ( cell_indices.count(no) > 0 ) // cell - material
+                    cells[cell_indices[no]].material_id = id;
+
+                  if ( line_indices.count(no) > 0 ) // boundary line - bc
+                    subcelldata.boundary_lines[line_indices[no]].material_id = id;
+
+                  if ( quad_indices.count(no) > 0 ) // boundary quad - bc
+                    subcelldata.boundary_quads[quad_indices[no]].material_id = id;
+                }
+            }
+        }
+    }
+
+  Assert(subcelldata.check_consistency(dim), ExcInternalError());
+
+  GridTools::delete_unused_vertices(vertices,
+                                    cells,
+                                    subcelldata);
+
+  if (dim == spacedim)
+    GridReordering<dim, spacedim>::invert_all_cells_of_negative_grid(vertices,
+        cells);
+
+  GridReordering<dim, spacedim>::reorder_cells(cells);
+
+  tria->create_triangulation_compatibility(vertices,
+                                           cells,
+                                           subcelldata);
+}
+
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read_ucd (std::istream &in)
+{
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  AssertThrow (in, ExcIO());
+
+  // skip comments at start of file
+  skip_comment_lines (in, '#');
+
+
+  unsigned int n_vertices;
+  unsigned int n_cells;
+  int dummy;
+
+  in >> n_vertices
+     >> n_cells
+     >> dummy         // number of data vectors
+     >> dummy         // cell data
+     >> dummy;        // model data
+  AssertThrow (in, ExcIO());
+
+  // set up array of vertices
+  std::vector<Point<spacedim> >     vertices (n_vertices);
+  // set up mapping between numbering
+  // in ucd-file (key) and in the
+  // vertices vector
+  std::map<int,int> vertex_indices;
+
+  for (unsigned int vertex=0; vertex<n_vertices; ++vertex)
+    {
+      int vertex_number;
+      double x[3];
+
+      // read vertex
+      AssertThrow (in, ExcIO());
+      in >> vertex_number
+         >> x[0] >> x[1] >> x[2];
+
+      // store vertex
+      for (unsigned int d=0; d<spacedim; ++d)
+        vertices[vertex](d) = x[d];
+      // store mapping; note that
+      // vertices_indices[i] is automatically
+      // created upon first usage
+      vertex_indices[vertex_number] = vertex;
+    };
+
+  // set up array of cells
+  std::vector<CellData<dim> > cells;
+  SubCellData                 subcelldata;
+
+  for (unsigned int cell=0; cell<n_cells; ++cell)
+    {
+      // note that since in the input
+      // file we found the number of
+      // cells at the top, there
+      // should still be input here,
+      // so check this:
+      AssertThrow (in, ExcIO());
+
+      std::string cell_type;
+
+      // we use an unsigned int because we
+      // fill this variable through an read-in process
+      unsigned int material_id;
+
+      in >> dummy          // cell number
+         >> material_id;
+      in >> cell_type;
+
+      if (((cell_type == "line") && (dim == 1)) ||
+          ((cell_type == "quad") && (dim == 2)) ||
+          ((cell_type == "hex" ) && (dim == 3)))
+        // found a cell
+        {
+          // allocate and read indices
+          cells.push_back (CellData<dim>());
+          for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+            in >> cells.back().vertices[i];
+
+          // to make sure that the cast wont fail
+          Assert(material_id<= std::numeric_limits<types::material_id>::max(),
+                 ExcIndexRange(material_id,0,std::numeric_limits<types::material_id>::max()));
+          // we use only material_ids in the range from 0 to numbers::invalid_material_id-1
+          Assert(material_id < numbers::invalid_material_id,
+                 ExcIndexRange(material_id,0,numbers::invalid_material_id));
+
+          cells.back().material_id = static_cast<types::material_id>(material_id);
+
+          // transform from ucd to
+          // consecutive numbering
+          for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+            if (vertex_indices.find (cells.back().vertices[i]) != vertex_indices.end())
+              // vertex with this index exists
+              cells.back().vertices[i] = vertex_indices[cells.back().vertices[i]];
+            else
+              {
+                // no such vertex index
+                AssertThrow (false,
+                             ExcInvalidVertexIndex(cell, cells.back().vertices[i]));
+
+                cells.back().vertices[i] = numbers::invalid_unsigned_int;
+              }
+        }
+      else if ((cell_type == "line") && ((dim == 2) || (dim == 3)))
+        // boundary info
+        {
+          subcelldata.boundary_lines.push_back (CellData<1>());
+          in >> subcelldata.boundary_lines.back().vertices[0]
+             >> subcelldata.boundary_lines.back().vertices[1];
+
+          // to make sure that the cast wont fail
+          Assert(material_id<= std::numeric_limits<types::boundary_id>::max(),
+                 ExcIndexRange(material_id,0,std::numeric_limits<types::boundary_id>::max()));
+          // we use only boundary_ids in the range from 0 to numbers::internal_face_boundary_id-1
+          Assert(material_id < numbers::internal_face_boundary_id,
+                 ExcIndexRange(material_id,0,numbers::internal_face_boundary_id));
+
+          subcelldata.boundary_lines.back().boundary_id
+            = static_cast<types::boundary_id>(material_id);
+
+          // transform from ucd to
+          // consecutive numbering
+          for (unsigned int i=0; i<2; ++i)
+            if (vertex_indices.find (subcelldata.boundary_lines.back().vertices[i]) !=
+                vertex_indices.end())
+              // vertex with this index exists
+              subcelldata.boundary_lines.back().vertices[i]
+                = vertex_indices[subcelldata.boundary_lines.back().vertices[i]];
+            else
+              {
+                // no such vertex index
+                AssertThrow (false,
+                             ExcInvalidVertexIndex(cell,
+                                                   subcelldata.boundary_lines.back().vertices[i]));
+                subcelldata.boundary_lines.back().vertices[i]
+                  = numbers::invalid_unsigned_int;
+              };
+        }
+      else if ((cell_type == "quad") && (dim == 3))
+        // boundary info
+        {
+          subcelldata.boundary_quads.push_back (CellData<2>());
+          in >> subcelldata.boundary_quads.back().vertices[0]
+             >> subcelldata.boundary_quads.back().vertices[1]
+             >> subcelldata.boundary_quads.back().vertices[2]
+             >> subcelldata.boundary_quads.back().vertices[3];
+
+          // to make sure that the cast wont fail
+          Assert(material_id<= std::numeric_limits<types::boundary_id>::max(),
+                 ExcIndexRange(material_id,0,std::numeric_limits<types::boundary_id>::max()));
+          // we use only boundary_ids in the range from 0 to numbers::internal_face_boundary_id-1
+          Assert(material_id < numbers::internal_face_boundary_id,
+                 ExcIndexRange(material_id,0,numbers::internal_face_boundary_id));
+
+          subcelldata.boundary_quads.back().boundary_id
+            = static_cast<types::boundary_id>(material_id);
+
+          // transform from ucd to
+          // consecutive numbering
+          for (unsigned int i=0; i<4; ++i)
+            if (vertex_indices.find (subcelldata.boundary_quads.back().vertices[i]) !=
+                vertex_indices.end())
+              // vertex with this index exists
+              subcelldata.boundary_quads.back().vertices[i]
+                = vertex_indices[subcelldata.boundary_quads.back().vertices[i]];
+            else
+              {
+                // no such vertex index
+                Assert (false,
+                        ExcInvalidVertexIndex(cell,
+                                              subcelldata.boundary_quads.back().vertices[i]));
+                subcelldata.boundary_quads.back().vertices[i] =
+                  numbers::invalid_unsigned_int;
+              };
+
+        }
+      else
+        // cannot read this
+        AssertThrow (false, ExcUnknownIdentifier(cell_type));
+    };
+
+
+  // check that no forbidden arrays are used
+  Assert (subcelldata.check_consistency(dim), ExcInternalError());
+
+  AssertThrow (in, ExcIO());
+
+  // do some clean-up on vertices...
+  GridTools::delete_unused_vertices (vertices, cells, subcelldata);
+  // ... and cells
+  if (dim==spacedim)
+    GridReordering<dim,spacedim>::invert_all_cells_of_negative_grid (vertices, cells);
+  GridReordering<dim,spacedim>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+}
+
+namespace
+{
+  template <int dim>
+  class Abaqus_to_UCD
+  {
+  public:
+    Abaqus_to_UCD ();
+
+    void read_in_abaqus (std::istream &in);
+    void write_out_avs_ucd (std::ostream &out) const;
+
+  private:
+    const double tolerance;
+
+    std::vector<double> get_global_node_numbers (const int face_cell_no,
+                                                 const int face_cell_face_no) const;
+
+    // NL: Stored as [ global node-id (int), x-coord, y-coord, z-coord ]
+    std::vector< std::vector<double> >  node_list;
+    // CL: Stored as [ material-id (int), node1, node2, node3, node4, node5, node6, node7, node8 ]
+    std::vector< std::vector<double> >  cell_list;
+    // FL: Stored as [ sideset-id (int), node1, node2, node3, node4 ]
+    std::vector< std::vector<double> >  face_list;
+    // ELSET: Stored as [ (std::string) elset_name = (std::vector) of cells numbers]
+    std::map< std::string, std::vector<int> > elsets_list;
+  };
+}
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read_abaqus (std::istream &in)
+{
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  Assert (dim==2 || dim==3, ExcNotImplemented());
+  AssertThrow (in, ExcIO());
+
+  // Read in the Abaqus file into an intermediate object
+  // that is to be passed along to the UCD reader
+  Abaqus_to_UCD<dim> abaqus_to_ucd;
+  abaqus_to_ucd.read_in_abaqus(in);
+
+  std::stringstream in_ucd;
+  abaqus_to_ucd.write_out_avs_ucd(in_ucd);
+
+  // This next call is wrapped in a try-catch for the following reason:
+  // It ensures that if the Abaqus mesh is read in correctly but produces
+  // an erroneous result then the user is alerted to the source of the problem
+  // and doesn't think that they've somehow called the wrong function.
+  try
+    {
+      read_ucd(in_ucd);
+    }
+  catch (...)
+    {
+      AssertThrow(false, ExcMessage("Internal conversion from ABAQUS file to UCD format was unsuccessful. \
+                                   Are you sure that your ABAQUS mesh file conforms with the requirements \
+                                   listed in the documentation?"));
+    }
+}
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read_dbmesh (std::istream &in)
+{
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  Assert (dim==2, ExcNotImplemented());
+
+  AssertThrow (in, ExcIO());
+
+  // skip comments at start of file
+  skip_comment_lines (in, '#');
+
+  // first read in identifier string
+  std::string line;
+  getline (in, line);
+
+  AssertThrow (line=="MeshVersionFormatted 0",
+               ExcInvalidDBMESHInput(line));
+
+  skip_empty_lines (in);
+
+  // next read dimension
+  getline (in, line);
+  AssertThrow (line=="Dimension", ExcInvalidDBMESHInput(line));
+  unsigned int dimension;
+  in >> dimension;
+  AssertThrow (dimension == dim, ExcDBMESHWrongDimension(dimension));
+  skip_empty_lines (in);
+
+  // now there are a lot of fields of
+  // which we don't know the exact
+  // meaning and which are far from
+  // being properly documented in the
+  // manual. we skip everything until
+  // we find a comment line with the
+  // string "# END". at some point in
+  // the future, someone may have the
+  // knowledge to parse and interpret
+  // the other fields in between as
+  // well...
+  while (getline(in,line), line.find("# END")==std::string::npos)
+    ;
+  skip_empty_lines (in);
+
+
+  // now read vertices
+  getline (in, line);
+  AssertThrow (line=="Vertices", ExcInvalidDBMESHInput(line));
+
+  unsigned int n_vertices;
+  double dummy;
+
+  in >> n_vertices;
+  std::vector<Point<spacedim> >     vertices (n_vertices);
+  for (unsigned int vertex=0; vertex<n_vertices; ++vertex)
+    {
+      // read vertex coordinates
+      for (unsigned int d=0; d<dim; ++d)
+        in >> vertices[vertex][d];
+      // read Ref phi_i, whatever that may be
+      in >> dummy;
+    };
+  AssertThrow (in, ExcInvalidDBMeshFormat());
+
+  skip_empty_lines(in);
+
+  // read edges. we ignore them at
+  // present, so just read them and
+  // discard the input
+  getline (in, line);
+  AssertThrow (line=="Edges", ExcInvalidDBMESHInput(line));
+
+  unsigned int n_edges;
+  in >> n_edges;
+  for (unsigned int edge=0; edge<n_edges; ++edge)
+    {
+      // read vertex indices
+      in >> dummy >> dummy;
+      // read Ref phi_i, whatever that may be
+      in >> dummy;
+    };
+  AssertThrow (in, ExcInvalidDBMeshFormat());
+
+  skip_empty_lines(in);
+
+
+
+  // read cracked edges (whatever
+  // that may be). we ignore them at
+  // present, so just read them and
+  // discard the input
+  getline (in, line);
+  AssertThrow (line=="CrackedEdges", ExcInvalidDBMESHInput(line));
+
+  in >> n_edges;
+  for (unsigned int edge=0; edge<n_edges; ++edge)
+    {
+      // read vertex indices
+      in >> dummy >> dummy;
+      // read Ref phi_i, whatever that may be
+      in >> dummy;
+    };
+  AssertThrow (in, ExcInvalidDBMeshFormat());
+
+  skip_empty_lines(in);
+
+
+  // now read cells.
+  // set up array of cells
+  getline (in, line);
+  AssertThrow (line=="Quadrilaterals", ExcInvalidDBMESHInput(line));
+
+  std::vector<CellData<dim> > cells;
+  SubCellData            subcelldata;
+  unsigned int n_cells;
+  in >> n_cells;
+  for (unsigned int cell=0; cell<n_cells; ++cell)
+    {
+      // read in vertex numbers. they
+      // are 1-based, so subtract one
+      cells.push_back (CellData<dim>());
+      for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+        {
+          in >> cells.back().vertices[i];
+
+          AssertThrow ((cells.back().vertices[i] >= 1)
+                       &&
+                       (static_cast<unsigned int>(cells.back().vertices[i]) <= vertices.size()),
+                       ExcInvalidVertexIndex(cell, cells.back().vertices[i]));
+
+          --cells.back().vertices[i];
+        };
+
+      // read and discard Ref phi_i
+      in >> dummy;
+    };
+  AssertThrow (in, ExcInvalidDBMeshFormat());
+
+  skip_empty_lines(in);
+
+
+  // then there are again a whole lot
+  // of fields of which I have no
+  // clue what they mean. skip them
+  // all and leave the interpretation
+  // to other implementors...
+  while (getline(in,line), ((line.find("End")==std::string::npos) && (in)))
+    ;
+  // ok, so we are not at the end of
+  // the file, that's it, mostly
+
+
+  // check that no forbidden arrays are used
+  Assert (subcelldata.check_consistency(dim), ExcInternalError());
+
+  AssertThrow (in, ExcIO());
+
+  // do some clean-up on vertices...
+  GridTools::delete_unused_vertices (vertices, cells, subcelldata);
+  // ...and cells
+  GridReordering<dim,spacedim>::invert_all_cells_of_negative_grid (vertices, cells);
+  GridReordering<dim,spacedim>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+}
+
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read_xda (std::istream &)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <>
+void GridIn<2>::read_xda (std::istream &in)
+{
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  AssertThrow (in, ExcIO());
+
+  std::string line;
+  // skip comments at start of file
+  getline (in, line);
+
+
+  unsigned int n_vertices;
+  unsigned int n_cells;
+
+  // read cells, throw away rest of line
+  in >> n_cells;
+  getline (in, line);
+
+  in >> n_vertices;
+  getline (in, line);
+
+  // ignore following 8 lines
+  for (unsigned int i=0; i<8; ++i)
+    getline (in, line);
+
+  // set up array of cells
+  std::vector<CellData<2> > cells (n_cells);
+  SubCellData subcelldata;
+
+  for (unsigned int cell=0; cell<n_cells; ++cell)
+    {
+      // note that since in the input
+      // file we found the number of
+      // cells at the top, there
+      // should still be input here,
+      // so check this:
+      AssertThrow (in, ExcIO());
+      Assert (GeometryInfo<2>::vertices_per_cell == 4,
+              ExcInternalError());
+
+      for (unsigned int i=0; i<4; ++i)
+        in >> cells[cell].vertices[i];
+    };
+
+
+
+  // set up array of vertices
+  std::vector<Point<2> > vertices (n_vertices);
+  for (unsigned int vertex=0; vertex<n_vertices; ++vertex)
+    {
+      double x[3];
+
+      // read vertex
+      in >> x[0] >> x[1] >> x[2];
+
+      // store vertex
+      for (unsigned int d=0; d<2; ++d)
+        vertices[vertex](d) = x[d];
+    };
+  AssertThrow (in, ExcIO());
+
+  // do some clean-up on vertices...
+  GridTools::delete_unused_vertices (vertices, cells, subcelldata);
+  // ... and cells
+  GridReordering<2>::invert_all_cells_of_negative_grid (vertices, cells);
+  GridReordering<2>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+}
+
+
+
+template <>
+void GridIn<3>::read_xda (std::istream &in)
+{
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  AssertThrow (in, ExcIO());
+
+  static const unsigned int xda_to_dealII_map[] = {0,1,5,4,3,2,6,7};
+
+  std::string line;
+  // skip comments at start of file
+  getline (in, line);
+
+
+  unsigned int n_vertices;
+  unsigned int n_cells;
+
+  // read cells, throw away rest of line
+  in >> n_cells;
+  getline (in, line);
+
+  in >> n_vertices;
+  getline (in, line);
+
+  // ignore following 8 lines
+  for (unsigned int i=0; i<8; ++i)
+    getline (in, line);
+
+  // set up array of cells
+  std::vector<CellData<3> > cells (n_cells);
+  SubCellData subcelldata;
+
+  for (unsigned int cell=0; cell<n_cells; ++cell)
+    {
+      // note that since in the input
+      // file we found the number of
+      // cells at the top, there
+      // should still be input here,
+      // so check this:
+      AssertThrow (in, ExcIO());
+      Assert(GeometryInfo<3>::vertices_per_cell == 8,
+             ExcInternalError());
+
+      unsigned int xda_ordered_nodes[8];
+
+      for (unsigned int i=0; i<8; ++i)
+        in >> xda_ordered_nodes[i];
+
+      for (unsigned int i=0; i<8; i++)
+        cells[cell].vertices[i] = xda_ordered_nodes[xda_to_dealII_map[i]];
+    };
+
+
+
+  // set up array of vertices
+  std::vector<Point<3> > vertices (n_vertices);
+  for (unsigned int vertex=0; vertex<n_vertices; ++vertex)
+    {
+      double x[3];
+
+      // read vertex
+      in >> x[0] >> x[1] >> x[2];
+
+      // store vertex
+      for (unsigned int d=0; d<3; ++d)
+        vertices[vertex](d) = x[d];
+    };
+  AssertThrow (in, ExcIO());
+
+  // do some clean-up on vertices...
+  GridTools::delete_unused_vertices (vertices, cells, subcelldata);
+  // ... and cells
+  GridReordering<3>::invert_all_cells_of_negative_grid (vertices, cells);
+  GridReordering<3>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+}
+
+
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read_msh (std::istream &in)
+{
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  AssertThrow (in, ExcIO());
+
+  unsigned int n_vertices;
+  unsigned int n_cells;
+  unsigned int dummy;
+  std::string line;
+
+  in >> line;
+
+  // first determine file format
+  unsigned int gmsh_file_format = 0;
+  if (line == "$NOD")
+    gmsh_file_format = 1;
+  else if (line == "$MeshFormat")
+    gmsh_file_format = 2;
+  else
+    AssertThrow (false, ExcInvalidGMSHInput(line));
+
+  // if file format is 2 or greater
+  // then we also have to read the
+  // rest of the header
+  if (gmsh_file_format == 2)
+    {
+      double version;
+      unsigned int file_type, data_size;
+
+      in >> version >> file_type >> data_size;
+
+      Assert ( (version >= 2.0) &&
+               (version <= 2.2), ExcNotImplemented());
+      Assert (file_type == 0, ExcNotImplemented());
+      Assert (data_size == sizeof(double), ExcNotImplemented());
+
+      // read the end of the header
+      // and the first line of the
+      // nodes description to synch
+      // ourselves with the format 1
+      // handling above
+      in >> line;
+      AssertThrow (line == "$EndMeshFormat",
+                   ExcInvalidGMSHInput(line));
+
+      in >> line;
+      // if the next block is of kind
+      // $PhysicalNames, ignore it
+      if (line == "$PhysicalNames")
+        {
+          do
+            {
+              in >> line;
+            }
+          while (line != "$EndPhysicalNames");
+          in >> line;
+        }
+
+      // but the next thing should,
+      // in any case, be the list of
+      // nodes:
+      AssertThrow (line == "$Nodes",
+                   ExcInvalidGMSHInput(line));
+    }
+
+  // now read the nodes list
+  in >> n_vertices;
+  std::vector<Point<spacedim> >     vertices (n_vertices);
+  // set up mapping between numbering
+  // in msh-file (nod) and in the
+  // vertices vector
+  std::map<int,int> vertex_indices;
+
+  for (unsigned int vertex=0; vertex<n_vertices; ++vertex)
+    {
+      int vertex_number;
+      double x[3];
+
+      // read vertex
+      in >> vertex_number
+         >> x[0] >> x[1] >> x[2];
+
+      for (unsigned int d=0; d<spacedim; ++d)
+        vertices[vertex](d) = x[d];
+      // store mapping
+      vertex_indices[vertex_number] = vertex;
+    }
+
+  // Assert we reached the end of the block
+  in >> line;
+  static const std::string end_nodes_marker[] = {"$ENDNOD", "$EndNodes" };
+  AssertThrow (line==end_nodes_marker[gmsh_file_format-1],
+               ExcInvalidGMSHInput(line));
+
+  // Now read in next bit
+  in >> line;
+  static const std::string begin_elements_marker[] = {"$ELM", "$Elements" };
+  AssertThrow (line==begin_elements_marker[gmsh_file_format-1],
+               ExcInvalidGMSHInput(line));
+
+  in >> n_cells;
+
+  // set up array of cells and subcells (faces). In 1d, there is currently no
+  // standard way in deal.II to pass boundary indicators attached to individual
+  // vertices, so do this by hand via the boundary_ids_1d array
+  std::vector<CellData<dim> >                cells;
+  SubCellData                                subcelldata;
+  std::map<unsigned int, types::boundary_id> boundary_ids_1d;
+
+  for (unsigned int cell=0; cell<n_cells; ++cell)
+    {
+      // note that since in the input
+      // file we found the number of
+      // cells at the top, there
+      // should still be input here,
+      // so check this:
+      AssertThrow (in, ExcIO());
+
+      unsigned int cell_type;
+      unsigned int material_id;
+      unsigned int nod_num;
+
+      /*
+        For file format version 1, the format of each cell is as follows:
+          elm-number elm-type reg-phys reg-elem number-of-nodes node-number-list
+
+        However, for version 2, the format reads like this:
+          elm-number elm-type number-of-tags < tag > ... node-number-list
+
+        In the following, we will ignore the element number (we simply enumerate
+        them in the order in which we read them, and we will take reg-phys
+        (version 1) or the first tag (version 2, if any tag is given at all) as
+        material id.
+      */
+
+      in >> dummy          // ELM-NUMBER
+         >> cell_type;     // ELM-TYPE
+
+      switch (gmsh_file_format)
+        {
+        case 1:
+        {
+          in >> material_id  // REG-PHYS
+             >> dummy        // reg_elm
+             >> nod_num;
+          break;
+        }
+
+        case 2:
+        {
+          // read the tags; ignore all but the first one which we will
+          // interpret as the material_id (for cells) or boundary_id
+          // (for faces)
+          unsigned int n_tags;
+          in >> n_tags;
+          if (n_tags > 0)
+            in >> material_id;
+          else
+            material_id = 0;
+
+          for (unsigned int i=1; i<n_tags; ++i)
+            in >> dummy;
+
+          nod_num = GeometryInfo<dim>::vertices_per_cell;
+
+          break;
+        }
+
+        default:
+          AssertThrow (false, ExcNotImplemented());
+        }
+
+
+      /*       `ELM-TYPE'
+               defines the geometrical type of the N-th element:
+               `1'
+               Line (2 nodes, 1 edge).
+
+               `3'
+               Quadrangle (4 nodes, 4 edges).
+
+               `5'
+               Hexahedron (8 nodes, 12 edges, 6 faces).
+
+               `15'
+               Point (1 node).
+      */
+
+      if (((cell_type == 1) && (dim == 1)) ||
+          ((cell_type == 3) && (dim == 2)) ||
+          ((cell_type == 5) && (dim == 3)))
+        // found a cell
+        {
+          AssertThrow (nod_num == GeometryInfo<dim>::vertices_per_cell,
+                       ExcMessage ("Number of nodes does not coincide with the "
+                                   "number required for this object"));
+
+          // allocate and read indices
+          cells.push_back (CellData<dim>());
+          for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+            in >> cells.back().vertices[i];
+
+          // to make sure that the cast wont fail
+          Assert(material_id<= std::numeric_limits<types::material_id>::max(),
+                 ExcIndexRange(material_id,0,std::numeric_limits<types::material_id>::max()));
+          // we use only material_ids in the range from 0 to numbers::invalid_material_id-1
+          Assert(material_id < numbers::invalid_material_id,
+                 ExcIndexRange(material_id,0,numbers::invalid_material_id));
+
+          cells.back().material_id = static_cast<types::material_id>(material_id);
+
+          // transform from ucd to
+          // consecutive numbering
+          for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+            {
+              AssertThrow (vertex_indices.find (cells.back().vertices[i]) !=
+                           vertex_indices.end(),
+                           ExcInvalidVertexIndex(cell, cells.back().vertices[i]));
+
+              // vertex with this index exists
+              cells.back().vertices[i] = vertex_indices[cells.back().vertices[i]];
+            }
+        }
+      else if ((cell_type == 1) && ((dim == 2) || (dim == 3)))
+        // boundary info
+        {
+          subcelldata.boundary_lines.push_back (CellData<1>());
+          in >> subcelldata.boundary_lines.back().vertices[0]
+             >> subcelldata.boundary_lines.back().vertices[1];
+
+          // to make sure that the cast wont fail
+          Assert(material_id<= std::numeric_limits<types::boundary_id>::max(),
+                 ExcIndexRange(material_id,0,std::numeric_limits<types::boundary_id>::max()));
+          // we use only boundary_ids in the range from 0 to numbers::internal_face_boundary_id-1
+          Assert(material_id < numbers::internal_face_boundary_id,
+                 ExcIndexRange(material_id,0,numbers::internal_face_boundary_id));
+
+          subcelldata.boundary_lines.back().boundary_id
+            = static_cast<types::boundary_id>(material_id);
+
+          // transform from ucd to
+          // consecutive numbering
+          for (unsigned int i=0; i<2; ++i)
+            if (vertex_indices.find (subcelldata.boundary_lines.back().vertices[i]) !=
+                vertex_indices.end())
+              // vertex with this index exists
+              subcelldata.boundary_lines.back().vertices[i]
+                = vertex_indices[subcelldata.boundary_lines.back().vertices[i]];
+            else
+              {
+                // no such vertex index
+                AssertThrow (false,
+                             ExcInvalidVertexIndex(cell,
+                                                   subcelldata.boundary_lines.back().vertices[i]));
+                subcelldata.boundary_lines.back().vertices[i] =
+                  numbers::invalid_unsigned_int;
+              };
+        }
+      else if ((cell_type == 3) && (dim == 3))
+        // boundary info
+        {
+          subcelldata.boundary_quads.push_back (CellData<2>());
+          in >> subcelldata.boundary_quads.back().vertices[0]
+             >> subcelldata.boundary_quads.back().vertices[1]
+             >> subcelldata.boundary_quads.back().vertices[2]
+             >> subcelldata.boundary_quads.back().vertices[3];
+
+          // to make sure that the cast wont fail
+          Assert(material_id<= std::numeric_limits<types::boundary_id>::max(),
+                 ExcIndexRange(material_id,0,std::numeric_limits<types::boundary_id>::max()));
+          // we use only boundary_ids in the range from 0 to numbers::internal_face_boundary_id-1
+          Assert(material_id < numbers::internal_face_boundary_id,
+                 ExcIndexRange(material_id,0,numbers::internal_face_boundary_id));
+
+          subcelldata.boundary_quads.back().boundary_id
+            = static_cast<types::boundary_id>(material_id);
+
+          // transform from gmsh to
+          // consecutive numbering
+          for (unsigned int i=0; i<4; ++i)
+            if (vertex_indices.find (subcelldata.boundary_quads.back().vertices[i]) !=
+                vertex_indices.end())
+              // vertex with this index exists
+              subcelldata.boundary_quads.back().vertices[i]
+                = vertex_indices[subcelldata.boundary_quads.back().vertices[i]];
+            else
+              {
+                // no such vertex index
+                Assert (false,
+                        ExcInvalidVertexIndex(cell,
+                                              subcelldata.boundary_quads.back().vertices[i]));
+                subcelldata.boundary_quads.back().vertices[i] =
+                  numbers::invalid_unsigned_int;
+              }
+
+        }
+      else if (cell_type == 15)
+        {
+          // read the indices of nodes given
+          unsigned int node_index;
+          switch (gmsh_file_format)
+            {
+            case 1:
+            {
+              for (unsigned int i=0; i<nod_num; ++i)
+                in >> node_index;
+              break;
+            }
+            case 2:
+            {
+              in >> node_index;
+              break;
+            }
+            }
+
+          // we only care about boundary indicators assigned to individual
+          // vertices in 1d (because otherwise the vertices are not faces)
+          if (dim == 1)
+            boundary_ids_1d[vertex_indices[node_index]] = material_id;
+        }
+      else
+        // cannot read this, so throw
+        // an exception. treat
+        // triangles and tetrahedra
+        // specially since this
+        // deserves a more explicit
+        // error message
+        {
+          AssertThrow (cell_type != 2,
+                       ExcMessage("Found triangles while reading a file "
+                                  "in gmsh format. deal.II does not "
+                                  "support triangles"));
+          AssertThrow (cell_type != 11,
+                       ExcMessage("Found tetrahedra while reading a file "
+                                  "in gmsh format. deal.II does not "
+                                  "support tetrahedra"));
+
+          AssertThrow (false, ExcGmshUnsupportedGeometry(cell_type));
+        }
+    }
+
+  // Assert we reached the end of the block
+  in >> line;
+  static const std::string end_elements_marker[] = {"$ENDELM", "$EndElements" };
+  AssertThrow (line==end_elements_marker[gmsh_file_format-1],
+               ExcInvalidGMSHInput(line));
+
+  // check that no forbidden arrays are used
+  Assert (subcelldata.check_consistency(dim), ExcInternalError());
+
+  AssertThrow (in, ExcIO());
+
+  // check that we actually read some
+  // cells.
+  AssertThrow(cells.size() > 0, ExcGmshNoCellInformation());
+
+  // do some clean-up on
+  // vertices...
+  GridTools::delete_unused_vertices (vertices, cells, subcelldata);
+  // ... and cells
+  if (dim==spacedim)
+    GridReordering<dim,spacedim>::invert_all_cells_of_negative_grid (vertices, cells);
+  GridReordering<dim,spacedim>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+
+  // in 1d, we also have to attach boundary ids to vertices, which does not
+  // currently work through the call above
+  if (dim == 1)
+    assign_1d_boundary_indicators (boundary_ids_1d, *tria);
+}
+
+
+template <>
+void GridIn<1>::read_netcdf (const std::string &)
+{
+  AssertThrow(false, ExcImpossibleInDim(1));
+}
+
+template <>
+void GridIn<1,2>::read_netcdf (const std::string &)
+{
+  AssertThrow(false, ExcImpossibleInDim(1));
+}
+
+
+template <>
+void GridIn<1,3>::read_netcdf (const std::string &)
+{
+  AssertThrow(false, ExcImpossibleInDim(1));
+}
+
+
+template <>
+void GridIn<2, 3>::read_netcdf (const std::string &)
+{
+  Assert(false, ExcNotImplemented());
+}
+
+template <>
+void GridIn<2>::read_netcdf (const std::string &filename)
+{
+#ifndef DEAL_II_WITH_NETCDF
+  (void)filename;
+  AssertThrow(false, ExcNeedsNetCDF());
+#else
+  const unsigned int dim=2;
+  const unsigned int spacedim=2;
+  const bool output=false;
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  // this function assumes the TAU
+  // grid format.
+  //
+  // This format stores 2d grids as
+  // 3d grids. In particular, a 2d
+  // grid of n_cells quadrilaterals
+  // in the y=0 plane is duplicated
+  // to y=1 to build n_cells
+  // hexaeders.  The surface
+  // quadrilaterals of this 3d grid
+  // are marked with boundary
+  // marker. In the following we read
+  // in all data required, find the
+  // boundary marker associated with
+  // the plane y=0, and extract the
+  // corresponding 2d data to build a
+  // Triangulation<2>.
+
+  // In the following, we assume that
+  // the 2d grid lies in the x-z
+  // plane (y=0). I.e. we choose:
+  // point[coord]=0, with coord=1
+  const unsigned int coord=1;
+  // Also x-y-z (0-1-2) point
+  // coordinates will be transformed
+  // to x-y (x2d-y2d) coordinates.
+  // With coord=1 as above, we have
+  // x-z (0-2) -> (x2d-y2d)
+  const unsigned int x2d=0;
+  const unsigned int y2d=2;
+  // For the case, the 2d grid lies
+  // in x-y or y-z plane instead, the
+  // following code must be extended
+  // to find the right value for
+  // coord, and setting x2d and y2d
+  // accordingly.
+
+  // First, open the file
+  NcFile nc (filename.c_str());
+  AssertThrow(nc.is_valid(), ExcIO());
+
+  // then read n_cells
+  NcDim *elements_dim=nc.get_dim("no_of_elements");
+  AssertThrow(elements_dim->is_valid(), ExcIO());
+  const unsigned int n_cells=elements_dim->size();
+
+  // then we read
+  //   int marker(no_of_markers)
+  NcDim *marker_dim=nc.get_dim("no_of_markers");
+  AssertThrow(marker_dim->is_valid(), ExcIO());
+  const unsigned int n_markers=marker_dim->size();
+
+  NcVar *marker_var=nc.get_var("marker");
+  AssertThrow(marker_var->is_valid(), ExcIO());
+  AssertThrow(marker_var->num_dims()==1, ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                marker_var->get_dim(0)->size())==n_markers, ExcIO());
+
+  std::vector<int> marker(n_markers);
+  // use &* to convert
+  // vector<int>::iterator to int *
+  marker_var->get(&*marker.begin(), n_markers);
+
+  if (output)
+    {
+      std::cout << "n_cell=" << n_cells << std::endl;
+      std::cout << "marker: ";
+      for (unsigned int i=0; i<n_markers; ++i)
+        std::cout << marker[i] << " ";
+      std::cout << std::endl;
+    }
+
+  // next we read
+  // int boundarymarker_of_surfaces(
+  //   no_of_surfaceelements)
+  NcDim *bquads_dim=nc.get_dim("no_of_surfacequadrilaterals");
+  AssertThrow(bquads_dim->is_valid(), ExcIO());
+  const unsigned int n_bquads=bquads_dim->size();
+
+  NcVar *bmarker_var=nc.get_var("boundarymarker_of_surfaces");
+  AssertThrow(bmarker_var->is_valid(), ExcIO());
+  AssertThrow(bmarker_var->num_dims()==1, ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                bmarker_var->get_dim(0)->size())==n_bquads, ExcIO());
+
+  std::vector<int> bmarker(n_bquads);
+  bmarker_var->get(&*bmarker.begin(), n_bquads);
+
+  // for each marker count the
+  // number of boundary quads
+  // which carry this marker
+  std::map<int, unsigned int> n_bquads_per_bmarker;
+  for (unsigned int i=0; i<n_markers; ++i)
+    {
+      // the markers should all be
+      // different
+      AssertThrow(n_bquads_per_bmarker.find(marker[i])==
+                  n_bquads_per_bmarker.end(), ExcIO());
+
+      n_bquads_per_bmarker[marker[i]]=
+        count(bmarker.begin(), bmarker.end(), marker[i]);
+    }
+  // Note: the n_bquads_per_bmarker
+  // map could be used to find the
+  // right coord by finding the
+  // marker0 such that
+  // a/ n_bquads_per_bmarker[marker0]==n_cells
+  // b/ point[coord]==0,
+  // Condition a/ would hold for at
+  // least two markers, marker0 and
+  // marker1, whereas b/ would hold
+  // for marker0 only. For marker1 we
+  // then had point[coord]=constant
+  // with e.g. constant=1 or -1
+  if (output)
+    {
+      std::cout << "n_bquads_per_bmarker: " << std::endl;
+      std::map<int, unsigned int>::const_iterator
+      iter=n_bquads_per_bmarker.begin();
+      for (; iter!=n_bquads_per_bmarker.end(); ++iter)
+        std::cout << "  n_bquads_per_bmarker[" << iter->first
+                  << "]=" << iter->second << std::endl;
+    }
+
+  // next we read
+  // int points_of_surfacequadrilaterals(
+  //   no_of_surfacequadrilaterals,
+  //   points_per_surfacequadrilateral)
+  NcDim *quad_vertices_dim=nc.get_dim("points_per_surfacequadrilateral");
+  AssertThrow(quad_vertices_dim->is_valid(), ExcIO());
+  const unsigned int vertices_per_quad=quad_vertices_dim->size();
+  AssertThrow(vertices_per_quad==GeometryInfo<dim>::vertices_per_cell, ExcIO());
+
+  NcVar *vertex_indices_var=nc.get_var("points_of_surfacequadrilaterals");
+  AssertThrow(vertex_indices_var->is_valid(), ExcIO());
+  AssertThrow(vertex_indices_var->num_dims()==2, ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                vertex_indices_var->get_dim(0)->size())==n_bquads, ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                vertex_indices_var->get_dim(1)->size())==vertices_per_quad, ExcIO());
+
+  std::vector<int> vertex_indices(n_bquads*vertices_per_quad);
+  vertex_indices_var->get(&*vertex_indices.begin(), n_bquads, vertices_per_quad);
+
+  for (unsigned int i=0; i<vertex_indices.size(); ++i)
+    AssertThrow(vertex_indices[i]>=0, ExcInternalError());
+
+  if (output)
+    {
+      std::cout << "vertex_indices:" << std::endl;
+      for (unsigned int i=0, v=0; i<n_bquads; ++i)
+        {
+          for (unsigned int j=0; j<vertices_per_quad; ++j)
+            std::cout << vertex_indices[v++] << " ";
+          std::cout << std::endl;
+        }
+    }
+
+  // next we read
+  //   double points_xc(no_of_points)
+  //   double points_yc(no_of_points)
+  //   double points_zc(no_of_points)
+  NcDim *vertices_dim=nc.get_dim("no_of_points");
+  AssertThrow(vertices_dim->is_valid(), ExcIO());
+  const unsigned int n_vertices=vertices_dim->size();
+  if (output)
+    std::cout << "n_vertices=" << n_vertices << std::endl;
+
+  NcVar *points_xc=nc.get_var("points_xc");
+  NcVar *points_yc=nc.get_var("points_yc");
+  NcVar *points_zc=nc.get_var("points_zc");
+  AssertThrow(points_xc->is_valid(), ExcIO());
+  AssertThrow(points_yc->is_valid(), ExcIO());
+  AssertThrow(points_zc->is_valid(), ExcIO());
+  AssertThrow(points_xc->num_dims()==1, ExcIO());
+  AssertThrow(points_yc->num_dims()==1, ExcIO());
+  AssertThrow(points_zc->num_dims()==1, ExcIO());
+  AssertThrow(points_yc->get_dim(0)->size()==
+              static_cast<int>(n_vertices), ExcIO());
+  AssertThrow(points_zc->get_dim(0)->size()==
+              static_cast<int>(n_vertices), ExcIO());
+  AssertThrow(points_xc->get_dim(0)->size()==
+              static_cast<int>(n_vertices), ExcIO());
+  std::vector<std::vector<double> > point_values(
+    3, std::vector<double> (n_vertices));
+  points_xc->get(&*point_values[0].begin(), n_vertices);
+  points_yc->get(&*point_values[1].begin(), n_vertices);
+  points_zc->get(&*point_values[2].begin(), n_vertices);
+
+  // and fill the vertices
+  std::vector<Point<spacedim> > vertices (n_vertices);
+  for (unsigned int i=0; i<n_vertices; ++i)
+    {
+      vertices[i](0)=point_values[x2d][i];
+      vertices[i](1)=point_values[y2d][i];
+    }
+
+  // For all boundary quads in the
+  // point[coord]=0 plane add the
+  // bmarker to zero_plane_markers
+  std::map<int, bool> zero_plane_markers;
+  for (unsigned int quad=0; quad<n_bquads; ++quad)
+    {
+      bool zero_plane=true;
+      for (unsigned int i=0; i<vertices_per_quad; ++i)
+        if (point_values[coord][vertex_indices[quad*vertices_per_quad+i]]!=0)
+          {
+            zero_plane=false;
+            break;
+          }
+
+      if (zero_plane)
+        zero_plane_markers[bmarker[quad]]=true;
+    }
+  unsigned int sum_of_zero_plane_cells=0;
+  for (std::map<int, bool>::const_iterator iter=zero_plane_markers.begin();
+       iter != zero_plane_markers.end(); ++iter)
+    {
+      sum_of_zero_plane_cells+=n_bquads_per_bmarker[iter->first];
+      if (output)
+        std::cout << "bmarker=" << iter->first << std::endl;
+    }
+  AssertThrow(sum_of_zero_plane_cells==n_cells, ExcIO());
+
+  // fill cells with all quads
+  // associated with
+  // zero_plane_markers
+  std::vector<CellData<dim> > cells(n_cells);
+  for (unsigned int quad=0, cell=0; quad<n_bquads; ++quad)
+    {
+      bool zero_plane=false;
+      for (std::map<int, bool>::const_iterator iter=zero_plane_markers.begin();
+           iter != zero_plane_markers.end(); ++iter)
+        if (bmarker[quad]==iter->first)
+          {
+            zero_plane=true;
+            break;
+          }
+
+      if (zero_plane)
+        {
+          for (unsigned int i=0; i<vertices_per_quad; ++i)
+            {
+              Assert(point_values[coord][vertex_indices[
+                                           quad*vertices_per_quad+i]]==0, ExcNotImplemented());
+              cells[cell].vertices[i]=vertex_indices[quad*vertices_per_quad+i];
+            }
+          ++cell;
+        }
+    }
+
+  SubCellData subcelldata;
+  GridTools::delete_unused_vertices(vertices, cells, subcelldata);
+  GridReordering<dim,spacedim>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+#endif
+}
+
+
+template <>
+void GridIn<3>::read_netcdf (const std::string &filename)
+{
+#ifndef DEAL_II_WITH_NETCDF
+  // do something with the function argument
+  // to make sure it at least looks used,
+  // even if it is not
+  (void)filename;
+  AssertThrow(false, ExcNeedsNetCDF());
+#else
+  const unsigned int dim=3;
+  const unsigned int spacedim=3;
+  const bool output=false;
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  // this function assumes the TAU
+  // grid format.
+
+  // First, open the file
+  NcFile nc (filename.c_str());
+  AssertThrow(nc.is_valid(), ExcIO());
+
+  // then read n_cells
+  NcDim *elements_dim=nc.get_dim("no_of_elements");
+  AssertThrow(elements_dim->is_valid(), ExcIO());
+  const unsigned int n_cells=elements_dim->size();
+  if (output)
+    std::cout << "n_cell=" << n_cells << std::endl;
+  // and n_hexes
+  NcDim *hexes_dim=nc.get_dim("no_of_hexaeders");
+  AssertThrow(hexes_dim->is_valid(), ExcIO());
+  const unsigned int n_hexes=hexes_dim->size();
+  AssertThrow(n_hexes==n_cells,
+              ExcMessage("deal.II can handle purely hexaedral grids, only."));
+
+  // next we read
+  // int points_of_hexaeders(
+  //   no_of_hexaeders,
+  //   points_per_hexaeder)
+  NcDim *hex_vertices_dim=nc.get_dim("points_per_hexaeder");
+  AssertThrow(hex_vertices_dim->is_valid(), ExcIO());
+  const unsigned int vertices_per_hex=hex_vertices_dim->size();
+  AssertThrow(vertices_per_hex==GeometryInfo<dim>::vertices_per_cell, ExcIO());
+
+  NcVar *vertex_indices_var=nc.get_var("points_of_hexaeders");
+  AssertThrow(vertex_indices_var->is_valid(), ExcIO());
+  AssertThrow(vertex_indices_var->num_dims()==2, ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                vertex_indices_var->get_dim(0)->size())==n_cells, ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                vertex_indices_var->get_dim(1)->size())==vertices_per_hex, ExcIO());
+
+  std::vector<int> vertex_indices(n_cells*vertices_per_hex);
+  // use &* to convert
+  // vector<int>::iterator to int *
+  vertex_indices_var->get(&*vertex_indices.begin(), n_cells, vertices_per_hex);
+
+  for (unsigned int i=0; i<vertex_indices.size(); ++i)
+    AssertThrow(vertex_indices[i]>=0, ExcInternalError());
+
+  if (output)
+    {
+      std::cout << "vertex_indices:" << std::endl;
+      for (unsigned int cell=0, v=0; cell<n_cells; ++cell)
+        {
+          for (unsigned int i=0; i<vertices_per_hex; ++i)
+            std::cout << vertex_indices[v++] << " ";
+          std::cout << std::endl;
+        }
+    }
+
+  // next we read
+  //   double points_xc(no_of_points)
+  //   double points_yc(no_of_points)
+  //   double points_zc(no_of_points)
+  NcDim *vertices_dim=nc.get_dim("no_of_points");
+  AssertThrow(vertices_dim->is_valid(), ExcIO());
+  const unsigned int n_vertices=vertices_dim->size();
+  if (output)
+    std::cout << "n_vertices=" << n_vertices << std::endl;
+
+  NcVar *points_xc=nc.get_var("points_xc");
+  NcVar *points_yc=nc.get_var("points_yc");
+  NcVar *points_zc=nc.get_var("points_zc");
+  AssertThrow(points_xc->is_valid(), ExcIO());
+  AssertThrow(points_yc->is_valid(), ExcIO());
+  AssertThrow(points_zc->is_valid(), ExcIO());
+  AssertThrow(points_xc->num_dims()==1, ExcIO());
+  AssertThrow(points_yc->num_dims()==1, ExcIO());
+  AssertThrow(points_zc->num_dims()==1, ExcIO());
+  AssertThrow(points_yc->get_dim(0)->size()==
+              static_cast<int>(n_vertices), ExcIO());
+  AssertThrow(points_zc->get_dim(0)->size()==
+              static_cast<int>(n_vertices), ExcIO());
+  AssertThrow(points_xc->get_dim(0)->size()==
+              static_cast<int>(n_vertices), ExcIO());
+  std::vector<std::vector<double> > point_values(
+    3, std::vector<double> (n_vertices));
+  // we switch y and z
+  const bool switch_y_z=false;
+  points_xc->get(&*point_values[0].begin(), n_vertices);
+  if (switch_y_z)
+    {
+      points_yc->get(&*point_values[2].begin(), n_vertices);
+      points_zc->get(&*point_values[1].begin(), n_vertices);
+    }
+  else
+    {
+      points_yc->get(&*point_values[1].begin(), n_vertices);
+      points_zc->get(&*point_values[2].begin(), n_vertices);
+    }
+
+  // and fill the vertices
+  std::vector<Point<spacedim> > vertices (n_vertices);
+  for (unsigned int i=0; i<n_vertices; ++i)
+    {
+      vertices[i](0)=point_values[0][i];
+      vertices[i](1)=point_values[1][i];
+      vertices[i](2)=point_values[2][i];
+    }
+
+  // and cells
+  std::vector<CellData<dim> > cells(n_cells);
+  for (unsigned int cell=0; cell<n_cells; ++cell)
+    for (unsigned int i=0; i<vertices_per_hex; ++i)
+      cells[cell].vertices[i]=vertex_indices[cell*vertices_per_hex+i];
+
+  // for setting up the SubCellData
+  // we read the vertex indices of
+  // the boundary quadrilaterals and
+  // their boundary markers
+
+  // first we read
+  // int points_of_surfacequadrilaterals(
+  //   no_of_surfacequadrilaterals,
+  //   points_per_surfacequadrilateral)
+  NcDim *quad_vertices_dim=nc.get_dim("points_per_surfacequadrilateral");
+  AssertThrow(quad_vertices_dim->is_valid(), ExcIO());
+  const unsigned int vertices_per_quad=quad_vertices_dim->size();
+  AssertThrow(vertices_per_quad==GeometryInfo<dim>::vertices_per_face, ExcIO());
+
+  NcVar *bvertex_indices_var=nc.get_var("points_of_surfacequadrilaterals");
+  AssertThrow(bvertex_indices_var->is_valid(), ExcIO());
+  AssertThrow(bvertex_indices_var->num_dims()==2, ExcIO());
+  const unsigned int n_bquads=bvertex_indices_var->get_dim(0)->size();
+  AssertThrow(static_cast<unsigned int>(
+                bvertex_indices_var->get_dim(1)->size())==
+              GeometryInfo<dim>::vertices_per_face, ExcIO());
+
+  std::vector<int> bvertex_indices(n_bquads*vertices_per_quad);
+  bvertex_indices_var->get(&*bvertex_indices.begin(), n_bquads, vertices_per_quad);
+
+  if (output)
+    {
+      std::cout << "bquads: ";
+      for (unsigned int i=0; i<n_bquads; ++i)
+        {
+          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_face; ++v)
+            std::cout << bvertex_indices[
+                        i*GeometryInfo<dim>::vertices_per_face+v] << " ";
+          std::cout << std::endl;
+        }
+    }
+
+  // next we read
+  // int boundarymarker_of_surfaces(
+  //   no_of_surfaceelements)
+  NcDim *bquads_dim=nc.get_dim("no_of_surfacequadrilaterals");
+  AssertThrow(bquads_dim->is_valid(), ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                bquads_dim->size())==n_bquads, ExcIO());
+
+  NcVar *bmarker_var=nc.get_var("boundarymarker_of_surfaces");
+  AssertThrow(bmarker_var->is_valid(), ExcIO());
+  AssertThrow(bmarker_var->num_dims()==1, ExcIO());
+  AssertThrow(static_cast<unsigned int>(
+                bmarker_var->get_dim(0)->size())==n_bquads, ExcIO());
+
+  std::vector<int> bmarker(n_bquads);
+  bmarker_var->get(&*bmarker.begin(), n_bquads);
+  // we only handle boundary
+  // indicators that fit into an
+  // types::boundary_id. Also, we don't
+  // take numbers::internal_face_boundary_id
+  // as it denotes an internal face
+  for (unsigned int i=0; i<bmarker.size(); ++i)
+    Assert(0<=bmarker[i] && bmarker[i]<numbers::internal_face_boundary_id, ExcIO());
+
+  // finally we setup the boundary
+  // information
+  SubCellData subcelldata;
+  subcelldata.boundary_quads.resize(n_bquads);
+  for (unsigned int i=0; i<n_bquads; ++i)
+    {
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_face; ++v)
+        subcelldata.boundary_quads[i].vertices[v]=bvertex_indices[
+                                                    i*GeometryInfo<dim>::vertices_per_face+v];
+      subcelldata.boundary_quads[i].boundary_id
+        = static_cast<types::boundary_id>(bmarker[i]);
+    }
+
+  GridTools::delete_unused_vertices(vertices, cells, subcelldata);
+  GridReordering<dim,spacedim>::invert_all_cells_of_negative_grid (vertices, cells);
+  GridReordering<dim,spacedim>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+#endif
+}
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::parse_tecplot_header(std::string &header,
+                                                 std::vector<unsigned int> &tecplot2deal,
+                                                 unsigned int &n_vars,
+                                                 unsigned int &n_vertices,
+                                                 unsigned int &n_cells,
+                                                 std::vector<unsigned int> &IJK,
+                                                 bool &structured,
+                                                 bool &blocked)
+{
+  Assert(tecplot2deal.size()==dim, ExcInternalError());
+  Assert(IJK.size()==dim, ExcInternalError());
+  // initialize the output variables
+  n_vars=0;
+  n_vertices=0;
+  n_cells=0;
+  switch (dim)
+    {
+    case 3:
+      IJK[2]=0;
+    case 2:
+      IJK[1]=0;
+    case 1:
+      IJK[0]=0;
+    }
+  structured=true;
+  blocked=false;
+
+  // convert the string to upper case
+  std::transform(header.begin(),header.end(),header.begin(),::toupper);
+
+  // replace all tabs, commas, newlines by
+  // whitespaces
+  std::replace(header.begin(),header.end(),'\t',' ');
+  std::replace(header.begin(),header.end(),',',' ');
+  std::replace(header.begin(),header.end(),'\n',' ');
+
+  // now remove whitespace in front of and
+  // after '='
+  std::string::size_type pos=header.find("=");
+
+  while (pos!=static_cast<std::string::size_type>(std::string::npos))
+    if (header[pos+1]==' ')
+      header.erase(pos+1,1);
+    else if (header[pos-1]==' ')
+      {
+        header.erase(pos-1,1);
+        --pos;
+      }
+    else
+      pos=header.find("=",++pos);
+
+  // split the string into individual entries
+  std::vector<std::string> entries=Utilities::break_text_into_lines(header,1,' ');
+
+  // now go through the list and try to extract
+  for (unsigned int i=0; i<entries.size(); ++i)
+    {
+      if (Utilities::match_at_string_start(entries[i],"VARIABLES=\""))
+        {
+          ++n_vars;
+          // we assume, that the first variable
+          // is x or no coordinate at all (not y or z)
+          if (Utilities::match_at_string_start(entries[i],"VARIABLES=\"X\""))
+            {
+              tecplot2deal[0]=0;
+            }
+          ++i;
+          while (entries[i][0]=='"')
+            {
+              if (entries[i]=="\"X\"")
+                tecplot2deal[0]=n_vars;
+              else if (entries[i]=="\"Y\"")
+                {
+                  // we assume, that y contains
+                  // zero data in 1d, so do
+                  // nothing
+                  if (dim>1)
+                    tecplot2deal[1]=n_vars;
+                }
+              else if (entries[i]=="\"Z\"")
+                {
+                  // we assume, that z contains
+                  // zero data in 1d and 2d, so
+                  // do nothing
+                  if (dim>2)
+                    tecplot2deal[2]=n_vars;
+                }
+              ++n_vars;
+              ++i;
+            }
+          // set i back, so that the next
+          // string is treated correctly
+          --i;
+
+          AssertThrow(n_vars>=dim,
+                      ExcMessage("Tecplot file must contain at least one variable for each dimension"));
+          for (unsigned int d=1; d<dim; ++d)
+            AssertThrow(tecplot2deal[d]>0,
+                        ExcMessage("Tecplot file must contain at least one variable for each dimension."));
+        }
+      else if (Utilities::match_at_string_start(entries[i],"ZONETYPE=ORDERED"))
+        structured=true;
+      else if (Utilities::match_at_string_start(entries[i],"ZONETYPE=FELINESEG") && dim==1)
+        structured=false;
+      else if (Utilities::match_at_string_start(entries[i],"ZONETYPE=FEQUADRILATERAL") && dim==2)
+        structured=false;
+      else if (Utilities::match_at_string_start(entries[i],"ZONETYPE=FEBRICK") && dim==3)
+        structured=false;
+      else if (Utilities::match_at_string_start(entries[i],"ZONETYPE="))
+        // unsupported ZONETYPE
+        {
+          AssertThrow(false,ExcMessage("The tecplot file contains an unsupported ZONETYPE."));
+        }
+      else if (Utilities::match_at_string_start(entries[i],"DATAPACKING=POINT"))
+        blocked=false;
+      else if (Utilities::match_at_string_start(entries[i],"DATAPACKING=BLOCK"))
+        blocked=true;
+      else if (Utilities::match_at_string_start(entries[i],"F=POINT"))
+        {
+          structured=true;
+          blocked=false;
+        }
+      else if (Utilities::match_at_string_start(entries[i],"F=BLOCK"))
+        {
+          structured=true;
+          blocked=true;
+        }
+      else if (Utilities::match_at_string_start(entries[i],"F=FEPOINT"))
+        {
+          structured=false;
+          blocked=false;
+        }
+      else if (Utilities::match_at_string_start(entries[i],"F=FEBLOCK"))
+        {
+          structured=false;
+          blocked=true;
+        }
+      else if (Utilities::match_at_string_start(entries[i],"ET=QUADRILATERAL") && dim==2)
+        structured=false;
+      else if (Utilities::match_at_string_start(entries[i],"ET=BRICK") && dim==3)
+        structured=false;
+      else if (Utilities::match_at_string_start(entries[i],"ET="))
+        // unsupported ElementType
+        {
+          AssertThrow(false,ExcMessage("The tecplot file contains an unsupported ElementType."));
+        }
+      else if (Utilities::match_at_string_start(entries[i],"I="))
+        IJK[0]=Utilities::get_integer_at_position(entries[i],2).first;
+      else if (Utilities::match_at_string_start(entries[i],"J="))
+        {
+          IJK[1]=Utilities::get_integer_at_position(entries[i],2).first;
+          AssertThrow(dim>1 || IJK[1]==1,
+                      ExcMessage("Parameter 'J=' found in tecplot, although this is only possible for dimensions greater than 1."));
+        }
+      else if (Utilities::match_at_string_start(entries[i],"K="))
+        {
+          IJK[2]=Utilities::get_integer_at_position(entries[i],2).first;
+          AssertThrow(dim>2 || IJK[2]==1,
+                      ExcMessage("Parameter 'K=' found in tecplot, although this is only possible for dimensions greater than 2."));
+        }
+      else if (Utilities::match_at_string_start(entries[i],"N="))
+        n_vertices=Utilities::get_integer_at_position(entries[i],2).first;
+      else if (Utilities::match_at_string_start(entries[i],"E="))
+        n_cells=Utilities::get_integer_at_position(entries[i],2).first;
+    }
+
+  // now we have read all the fields we are
+  // interested in. do some checks and
+  // calculate the variables
+  if (structured)
+    {
+      n_vertices=1;
+      n_cells=1;
+      for (unsigned int d=0; d<dim; ++d)
+        {
+          AssertThrow(IJK[d]>0,
+                      ExcMessage("Tecplot file does not contain a complete and consistent set of parameters"));
+          n_vertices*=IJK[d];
+          n_cells*=(IJK[d]-1);
+        }
+    }
+  else
+    {
+      AssertThrow(n_vertices>0,
+                  ExcMessage("Tecplot file does not contain a complete and consistent set of parameters"));
+      if (n_cells==0)
+        // this means an error, although
+        // tecplot itself accepts entries like
+        // 'J=20' instead of 'E=20'. therefore,
+        // take the max of IJK
+        n_cells=*std::max_element(IJK.begin(),IJK.end());
+      AssertThrow(n_cells>0,
+                  ExcMessage("Tecplot file does not contain a complete and consistent set of parameters"));
+    }
+}
+
+
+
+
+template <>
+void GridIn<2>::read_tecplot (std::istream &in)
+{
+  const unsigned int dim=2;
+  const unsigned int spacedim=2;
+  Assert (tria != 0, ExcNoTriangulationSelected());
+  AssertThrow (in, ExcIO());
+
+  // skip comments at start of file
+  skip_comment_lines (in, '#');
+
+  // some strings for parsing the header
+  std::string line, header;
+
+  // first, concatenate all header lines
+  // create a searchstring with almost all
+  // letters. exclude e and E from the letters
+  // to search, as they might appear in
+  // exponential notation
+  std::string letters ="abcdfghijklmnopqrstuvwxyzABCDFGHIJKLMNOPQRSTUVWXYZ";
+
+  getline(in,line);
+  while (line.find_first_of(letters)!=std::string::npos)
+    {
+      header+=" "+line;
+      getline(in,line);
+    }
+
+  // now create some variables holding
+  // important information on the mesh, get
+  // this information from the header string
+  std::vector<unsigned int> tecplot2deal(dim);
+  std::vector<unsigned int> IJK(dim);
+  unsigned int n_vars,
+           n_vertices,
+           n_cells;
+  bool structured,
+       blocked;
+
+  parse_tecplot_header(header,
+                       tecplot2deal,n_vars,n_vertices,n_cells,IJK,
+                       structured,blocked);
+
+  // reserve space for vertices. note, that in
+  // tecplot vertices are ordered beginning
+  // with 1, whereas in deal all indices start
+  // with 0. in order not to use -1 for all the
+  // connectivity information, a 0th vertex
+  // (unused) is inserted at the origin.
+  std::vector<Point<spacedim> > vertices(n_vertices+1);
+  vertices[0]=Point<spacedim>();
+  // reserve space for cells
+  std::vector<CellData<dim> > cells(n_cells);
+  SubCellData                 subcelldata;
+
+  if (blocked)
+    {
+      // blocked data format. first we get all
+      // the values of the first variable for
+      // all points, after that we get all
+      // values for the second variable and so
+      // on.
+
+      // dummy variable to read in all the info
+      // we do not want to use
+      double dummy;
+      // which is the first index to read in
+      // the loop (see below)
+      unsigned int next_index=0;
+
+      // note, that we have already read the
+      // first line containing the first variable
+      if (tecplot2deal[0]==0)
+        {
+          // we need the information in this
+          // line, so extract it
+          std::vector<std::string> first_var=Utilities::break_text_into_lines(line,1);
+          char *endptr;
+          for (unsigned int i=1; i<first_var.size()+1; ++i)
+            vertices[i](0) = std::strtod (first_var[i-1].c_str(), &endptr);
+
+          // if there are many points, the data
+          // for this var might continue in the
+          // next line(s)
+          for (unsigned int j=first_var.size()+1; j<n_vertices+1; ++j)
+            in>>vertices[j](next_index);
+          // now we got all values of the first
+          // variable, so increase the counter
+          next_index=1;
+        }
+
+      // main loop over all variables
+      for (unsigned int i=1; i<n_vars; ++i)
+        {
+          // if we read all the important
+          // variables and do not want to
+          // read further, because we are
+          // using a structured grid, we can
+          // stop here (and skip, for
+          // example, a whole lot of solution
+          // variables)
+          if (next_index==dim && structured)
+            break;
+
+          if ((next_index<dim) && (i==tecplot2deal[next_index]))
+            {
+              // we need this line, read it in
+              for (unsigned int j=1; j<n_vertices+1; ++j)
+                in>>vertices[j](next_index);
+              ++next_index;
+            }
+          else
+            {
+              // we do not need this line, read
+              // it in and discard it
+              for (unsigned int j=1; j<n_vertices+1; ++j)
+                in>>dummy;
+            }
+        }
+      Assert(next_index==dim, ExcInternalError());
+    }
+  else
+    {
+      // the data is not blocked, so we get all
+      // the variables for one point, then the
+      // next and so on. create a vector to
+      // hold these components
+      std::vector<double> vars(n_vars);
+
+      // now fill the first vertex. note, that we
+      // have already read the first line
+      // containing the first vertex
+      std::vector<std::string> first_vertex=Utilities::break_text_into_lines(line,1);
+      char *endptr;
+      for (unsigned int d=0; d<dim; ++d)
+        vertices[1](d) = std::strtod (first_vertex[tecplot2deal[d]].c_str(), &endptr);
+
+      // read the remaining vertices from the
+      // list
+      for (unsigned int v=2; v<n_vertices+1; ++v)
+        {
+          for (unsigned int i=0; i<n_vars; ++i)
+            in>>vars[i];
+          // fill the vertex
+          // coordinates. respect the position
+          // of coordinates in the list of
+          // variables
+          for (unsigned int i=0; i<dim; ++i)
+            vertices[v](i)=vars[tecplot2deal[i]];
+        }
+    }
+
+  if (structured)
+    {
+      // this is the part of the code that only
+      // works in 2d
+      unsigned int I=IJK[0],
+                   J=IJK[1];
+
+      unsigned int cell=0;
+      // set up array of cells
+      for (unsigned int j=0; j<J-1; ++j)
+        for (unsigned int i=1; i<I; ++i)
+          {
+            cells[cell].vertices[0]=i+  j    *I;
+            cells[cell].vertices[1]=i+1+j    *I;
+            cells[cell].vertices[2]=i+1+(j+1)*I;
+            cells[cell].vertices[3]=i  +(j+1)*I;
+            ++cell;
+          }
+      Assert(cell=n_cells, ExcInternalError());
+      std::vector<unsigned int> boundary_vertices(2*I+2*J-4);
+      unsigned int k=0;
+      for (unsigned int i=1; i<I+1; ++i)
+        {
+          boundary_vertices[k]=i;
+          ++k;
+          boundary_vertices[k]=i+(J-1)*I;
+          ++k;
+        }
+      for (unsigned int j=1; j<J-1; ++j)
+        {
+          boundary_vertices[k]=1+j*I;
+          ++k;
+          boundary_vertices[k]=I+j*I;
+          ++k;
+        }
+      Assert(k==boundary_vertices.size(), ExcInternalError());
+      // delete the duplicated vertices at the
+      // boundary, which occur, e.g. in c-type
+      // or o-type grids around a body
+      // (airfoil). this automatically deletes
+      // unused vertices as well.
+      GridTools::delete_duplicated_vertices(vertices,cells,subcelldata,boundary_vertices);
+    }
+  else
+    {
+      // set up array of cells, unstructured
+      // mode, so the connectivity is
+      // explicitly given
+      for (unsigned int i=0; i<n_cells; ++i)
+        {
+          // note that since in the input file
+          // we found the number of cells at
+          // the top, there should still be
+          // input here, so check this:
+          AssertThrow (in, ExcIO());
+
+          // get the connectivity from the
+          // input file. the vertices are
+          // ordered like in the ucd format
+          for (unsigned int j=0; j<GeometryInfo<dim>::vertices_per_cell; ++j)
+            in>>cells[i].vertices[j];
+        }
+      // do some clean-up on vertices
+      GridTools::delete_unused_vertices (vertices, cells, subcelldata);
+    }
+
+  // check that no forbidden arrays are
+  // used. as we do not read in any
+  // subcelldata, nothing should happen here.
+  Assert (subcelldata.check_consistency(dim), ExcInternalError());
+  AssertThrow (in, ExcIO());
+
+  // do some cleanup on cells
+  GridReordering<dim,spacedim>::invert_all_cells_of_negative_grid (vertices, cells);
+  GridReordering<dim,spacedim>::reorder_cells (cells);
+  tria->create_triangulation_compatibility (vertices, cells, subcelldata);
+}
+
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read_tecplot(std::istream &)
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::skip_empty_lines (std::istream &in)
+{
+  std::string line;
+  while (in)
+    {
+      // get line
+      getline (in, line);
+
+      // check if this is a line that
+      // consists only of spaces, and
+      // if not put the whole thing
+      // back and return
+      if (std::find_if (line.begin(), line.end(),
+                        std::bind2nd (std::not_equal_to<char>(),' '))
+          != line.end())
+        {
+          in.putback ('\n');
+          for (int i=line.length()-1; i>=0; --i)
+            in.putback (line[i]);
+          return;
+        }
+
+      // else: go on with next line
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::skip_comment_lines (std::istream &in,
+                                                const char    comment_start)
+{
+  char c;
+  // loop over the following comment
+  // lines
+  while ((c=in.get()) == comment_start)
+    // loop over the characters after
+    // the comment starter
+    while (in.get() != '\n')
+      ;
+
+
+  // put back first character of
+  // first non-comment line
+  in.putback (c);
+
+  // at last: skip additional empty lines, if present
+  skip_empty_lines(in);
+}
+
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::debug_output_grid (const std::vector<CellData<dim> > &/*cells*/,
+                                               const std::vector<Point<spacedim> >    &/*vertices*/,
+                                               std::ostream                      &/*out*/)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <>
+void
+GridIn<2>::debug_output_grid (const std::vector<CellData<2> > &cells,
+                              const std::vector<Point<2> >    &vertices,
+                              std::ostream                    &out)
+{
+  double min_x = vertices[cells[0].vertices[0]](0),
+         max_x = vertices[cells[0].vertices[0]](0),
+         min_y = vertices[cells[0].vertices[0]](1),
+         max_y = vertices[cells[0].vertices[0]](1);
+
+  for (unsigned int i=0; i<cells.size(); ++i)
+    {
+      for (unsigned int v=0; v<4; ++v)
+        {
+          const Point<2> &p = vertices[cells[i].vertices[v]];
+
+          if (p(0) < min_x)
+            min_x = p(0);
+          if (p(0) > max_x)
+            max_x = p(0);
+          if (p(1) < min_y)
+            min_y = p(1);
+          if (p(1) > max_y)
+            max_y = p(1);
+        };
+
+      out << "# cell " << i << std::endl;
+      Point<2> center;
+      for (unsigned int f=0; f<4; ++f)
+        center += vertices[cells[i].vertices[f]];
+      center /= 4;
+
+      out << "set label \"" << i << "\" at "
+          << center(0) << ',' << center(1)
+          << " center"
+          << std::endl;
+
+      // first two line right direction
+      for (unsigned int f=0; f<2; ++f)
+        out << "set arrow from "
+            << vertices[cells[i].vertices[f]](0) << ','
+            << vertices[cells[i].vertices[f]](1)
+            << " to "
+            << vertices[cells[i].vertices[(f+1)%4]](0) << ','
+            << vertices[cells[i].vertices[(f+1)%4]](1)
+            << std::endl;
+      // other two lines reverse direction
+      for (unsigned int f=2; f<4; ++f)
+        out << "set arrow from "
+            << vertices[cells[i].vertices[(f+1)%4]](0) << ','
+            << vertices[cells[i].vertices[(f+1)%4]](1)
+            << " to "
+            << vertices[cells[i].vertices[f]](0) << ','
+            << vertices[cells[i].vertices[f]](1)
+            << std::endl;
+      out << std::endl;
+    };
+
+
+  out << std::endl
+      << "set nokey" << std::endl
+      << "pl [" << min_x << ':' << max_x << "]["
+      << min_y << ':' << max_y <<  "] "
+      << min_y << std::endl
+      << "pause -1" << std::endl;
+}
+
+
+
+template <>
+void
+GridIn<3>::debug_output_grid (const std::vector<CellData<3> > &cells,
+                              const std::vector<Point<3> >    &vertices,
+                              std::ostream                    &out)
+{
+  for (unsigned int cell=0; cell<cells.size(); ++cell)
+    {
+      // line 0
+      out << vertices[cells[cell].vertices[0]]
+          << std::endl
+          << vertices[cells[cell].vertices[1]]
+          << std::endl << std::endl << std::endl;
+      // line 1
+      out << vertices[cells[cell].vertices[1]]
+          << std::endl
+          << vertices[cells[cell].vertices[2]]
+          << std::endl << std::endl << std::endl;
+      // line 2
+      out << vertices[cells[cell].vertices[3]]
+          << std::endl
+          << vertices[cells[cell].vertices[2]]
+          << std::endl << std::endl << std::endl;
+      // line 3
+      out << vertices[cells[cell].vertices[0]]
+          << std::endl
+          << vertices[cells[cell].vertices[3]]
+          << std::endl << std::endl << std::endl;
+      // line 4
+      out << vertices[cells[cell].vertices[4]]
+          << std::endl
+          << vertices[cells[cell].vertices[5]]
+          << std::endl << std::endl << std::endl;
+      // line 5
+      out << vertices[cells[cell].vertices[5]]
+          << std::endl
+          << vertices[cells[cell].vertices[6]]
+          << std::endl << std::endl << std::endl;
+      // line 6
+      out << vertices[cells[cell].vertices[7]]
+          << std::endl
+          << vertices[cells[cell].vertices[6]]
+          << std::endl << std::endl << std::endl;
+      // line 7
+      out << vertices[cells[cell].vertices[4]]
+          << std::endl
+          << vertices[cells[cell].vertices[7]]
+          << std::endl << std::endl << std::endl;
+      // line 8
+      out << vertices[cells[cell].vertices[0]]
+          << std::endl
+          << vertices[cells[cell].vertices[4]]
+          << std::endl << std::endl << std::endl;
+      // line 9
+      out << vertices[cells[cell].vertices[1]]
+          << std::endl
+          << vertices[cells[cell].vertices[5]]
+          << std::endl << std::endl << std::endl;
+      // line 10
+      out << vertices[cells[cell].vertices[2]]
+          << std::endl
+          << vertices[cells[cell].vertices[6]]
+          << std::endl << std::endl << std::endl;
+      // line 11
+      out << vertices[cells[cell].vertices[3]]
+          << std::endl
+          << vertices[cells[cell].vertices[7]]
+          << std::endl << std::endl << std::endl;
+    };
+}
+
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read (const std::string &filename,
+                                  Format format)
+{
+  // Search file class for meshes
+  PathSearch search("MESH");
+  std::string name;
+  // Open the file and remember its name
+  if (format == Default)
+    name = search.find(filename);
+  else
+    name = search.find(filename, default_suffix(format));
+
+  std::ifstream in(name.c_str());
+
+  if (format == Default)
+    {
+      const std::string::size_type slashpos = name.find_last_of('/');
+      const std::string::size_type dotpos = name.find_last_of('.');
+      if (dotpos < name.length()
+          && (dotpos > slashpos || slashpos == std::string::npos))
+        {
+          std::string ext = name.substr(dotpos+1);
+          format = parse_format(ext);
+        }
+    }
+  if (format == netcdf)
+    read_netcdf(filename);
+  else
+    read(in, format);
+}
+
+
+template <int dim, int spacedim>
+void GridIn<dim, spacedim>::read (std::istream &in,
+                                  Format format)
+{
+  if (format == Default)
+    format = default_format;
+
+  switch (format)
+    {
+    case dbmesh:
+      read_dbmesh (in);
+      return;
+
+    case msh:
+      read_msh (in);
+      return;
+
+    case vtk:
+      read_vtk (in);
+      return;
+
+    case unv:
+      read_unv (in);
+      return;
+
+    case ucd:
+      read_ucd (in);
+      return;
+
+    case abaqus:
+      read_abaqus(in);
+      return;
+
+    case xda:
+      read_xda (in);
+      return;
+
+    case netcdf:
+      Assert(false, ExcMessage("There is no read_netcdf(istream &) function. "
+                               "Use the read(_netcdf)(string &filename) "
+                               "functions, instead."));
+      return;
+
+    case tecplot:
+      read_tecplot (in);
+      return;
+
+    case Default:
+      break;
+    }
+  Assert (false, ExcInternalError());
+}
+
+
+
+template <int dim, int spacedim>
+std::string
+GridIn<dim, spacedim>::default_suffix (const Format format)
+{
+  switch (format)
+    {
+    case dbmesh:
+      return ".dbmesh";
+    case msh:
+      return ".msh";
+    case vtk:
+      return ".vtk";
+    case unv:
+      return ".unv";
+    case ucd:
+      return ".inp";
+    case abaqus:
+      return ".inp"; // Typical suffix for Abaqus mesh files conflicts with UCD.
+    case xda:
+      return ".xda";
+    case netcdf:
+      return ".nc";
+    case tecplot:
+      return ".dat";
+    default:
+      Assert (false, ExcNotImplemented());
+      return ".unknown_format";
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename GridIn<dim, spacedim>::Format
+GridIn<dim, spacedim>::parse_format (const std::string &format_name)
+{
+  if (format_name == "dbmesh")
+    return dbmesh;
+
+  if (format_name == "msh")
+    return msh;
+
+  if (format_name == "unv")
+    return unv;
+
+  if (format_name == "vtk")
+    return vtk;
+
+  // This is also the typical extension of Abaqus input files.
+  if (format_name == "inp")
+    return ucd;
+
+  if (format_name == "ucd")
+    return ucd;
+
+  if (format_name == "xda")
+    return xda;
+
+  if (format_name == "netcdf")
+    return netcdf;
+
+  if (format_name == "nc")
+    return netcdf;
+
+  if (format_name == "tecplot")
+    return tecplot;
+
+  if (format_name == "dat")
+    return tecplot;
+
+  if (format_name == "plt")
+    // Actually, this is the extension for the
+    // tecplot binary format, which we do not
+    // support right now. However, some people
+    // tend to create tecplot ascii files with
+    // the extension 'plt' instead of
+    // 'dat'. Thus, include this extension
+    // here. If it actually is a binary file,
+    // the read_tecplot() function will fail
+    // and throw an exception, anyway.
+    return tecplot;
+
+  AssertThrow (false, ExcInvalidState ());
+  // return something weird
+  return Format(Default);
+}
+
+
+
+template <int dim, int spacedim>
+std::string GridIn<dim, spacedim>::get_format_names ()
+{
+  return "dbmesh|msh|unv|vtk|ucd|abaqus|xda|netcdf|tecplot";
+}
+
+namespace
+{
+  template <int dim>
+  Abaqus_to_UCD<dim>::Abaqus_to_UCD ()
+    : tolerance (5e-16) // Used to offset Cubit tolerance error when outputting value close to zero
+  {
+    AssertThrow(dim==2 || dim==3, ExcNotImplemented());
+  }
+
+  // Convert from a string to some other data type
+  // Reference: http://www.codeguru.com/forum/showthread.php?t=231054
+  template <class T> bool
+  from_string (T &t,
+               const std::string &s,
+               std::ios_base& (*f) (std::ios_base &))
+  {
+    std::istringstream iss (s);
+    return ! (iss >> f >> t).fail();
+  }
+
+  // Extract an integer from a string
+  int
+  extract_int (const std::string &s)
+  {
+    std::string tmp;
+    for (unsigned int i = 0; i<s.size(); ++i)
+      {
+        if (isdigit(s[i]))
+          {
+            tmp += s[i];
+          }
+      }
+
+    int number = 0;
+    from_string(number, tmp, std::dec);
+    return number;
+  }
+
+  template <int dim>
+  void
+  Abaqus_to_UCD<dim>::read_in_abaqus (std::istream &input_stream)
+  {
+    // References:
+    // http://www.egr.msu.edu/software/abaqus/Documentation/docs/v6.7/books/usb/default.htm?startat=pt01ch02.html
+    // http://www.cprogramming.com/tutorial/string.html
+
+    AssertThrow (input_stream, ExcIO());
+    std::string line;
+    std::getline (input_stream, line);
+
+    while (!input_stream.eof())
+      {
+        std::transform(line.begin(), line.end(), line.begin(), ::toupper);
+
+        if (line.compare ("*HEADING") == 0 ||
+            line.compare (0, 2, "**") == 0 ||
+            line.compare (0, 5, "*PART") == 0)
+          {
+            // Skip header and comments
+            while (!input_stream.eof())
+              {
+                std::getline (input_stream, line);
+                if (line[0] == '*')
+                  goto cont; // My eyes, they burn!
+              }
+          }
+        else if (line.compare (0, 5, "*NODE") == 0)
+          {
+            // Extract list of vertices
+            // Header line might be:
+            // *NODE, NSET=ALLNODES
+            // *NODE
+
+            // Contains lines in the form:
+            // Index, x, y, z
+            while (!input_stream.eof())
+              {
+                std::getline (input_stream, line);
+                if (line[0] == '*')
+                  goto cont;
+
+                std::vector <double> node (dim+1);
+
+                std::istringstream iss (line);
+                char comma;
+                for (unsigned int i = 0; i < dim+1; ++i)
+                  iss >> node[i] >> comma;
+
+                node_list.push_back (node);
+              }
+          }
+        else if (line.compare (0, 8, "*ELEMENT") == 0)
+          {
+            // Element construction.
+            // There are different header formats, the details
+            // of which we're not particularly interested in except
+            // whether they represent quads or hexahedrals.
+            // *ELEMENT, TYPE=S4R, ELSET=EB<material id>
+            // *ELEMENT, TYPE=C3D8R, ELSET=EB<material id>
+            // *ELEMENT, TYPE=C3D8
+            // Elements itself (n=4 or n=8):
+            // Index, i[0], ..., i[n]
+
+            int material = 0;
+            // Scan for material id
+            {
+              const std::string before_material = "ELSET=EB";
+              const std::size_t idx = line.find (before_material);
+              if (idx != std::string::npos)
+                {
+                  from_string (material, line.substr (idx + before_material.size()), std::dec);
+                }
+            }
+
+            // Read ELEMENT definition
+            std::getline (input_stream, line);
+            while (!input_stream.eof())
+              {
+                if (line[0] == '*')
+                  goto cont;
+
+                std::istringstream iss (line);
+                char comma;
+
+                // We will store the material id in the zeroth entry of the
+                // vector and the rest of the elements represent the global
+                // node numbers
+                const unsigned int n_data_per_cell = 1+GeometryInfo<dim>::vertices_per_cell;
+                std::vector <double> cell (n_data_per_cell);
+                for (unsigned int i = 0; i < n_data_per_cell; ++i)
+                  iss >> cell[i] >> comma;
+
+                // Overwrite cell index from file by material
+                cell[0] = static_cast<double> (material);
+                cell_list.push_back (cell);
+
+                std::getline (input_stream, line);
+              }
+          }
+        else if (line.compare (0, 8, "*SURFACE") == 0)
+          {
+            // Extract the definitions of boundary surfaces
+            // Old format from Cubit:
+            // *SURFACE, NAME=SS<boundary indicator>
+            //    <element index>,     S<face number>
+            // Abaqus default format:
+            // *SURFACE, TYPE=ELEMENT, NAME=SURF-<indicator>
+
+            // Get name of the surface and extract id from it;
+            // this will be the boundary indicator
+            const std::string name_key = "NAME=";
+            const std::size_t name_idx_start = line.find(name_key) + name_key.size();
+            std::size_t name_idx_end = line.find(',', name_idx_start);
+            if (name_idx_end == std::string::npos)
+              {
+                name_idx_end = line.size();
+              }
+            const int b_indicator = extract_int(line.substr(name_idx_start, name_idx_end - name_idx_start));
+
+            // Read SURFACE definition
+            // Note that the orientation of the faces is embedded within the
+            // definition of each "set" of faces that comprise the surface
+            // These are either marked by an "S" or "E" in 3d or 2d respectively.
+            std::getline (input_stream, line);
+            while (!input_stream.eof())
+              {
+                if (line[0] == '*')
+                  goto cont;
+
+                // Change all characters to upper case
+                std::transform(line.begin(), line.end(), line.begin(), ::toupper);
+
+                // Surface can be created from ELSET, or directly from cells
+                // If elsets_list contains a key with specific name - refers to that ELSET, otherwise refers to cell
+                std::istringstream iss (line);
+                char comma;
+                int el_idx;
+                int face_number;
+                char temp;
+
+                // Get relevant faces, taking into account the element orientation
+                std::vector <double> quad_node_list;
+                const std::string elset_name = line.substr(0, line.find(','));
+                if (elsets_list.count(elset_name) != 0)
+                  {
+                    // Surface refers to ELSET
+                    std::string stmp;
+                    iss >> stmp >> temp >> face_number;
+
+                    const std::vector<int> cells = elsets_list[elset_name];
+                    for (unsigned int i = 0; i <cells.size(); ++i)
+                      {
+                        el_idx = cells[i];
+                        quad_node_list = get_global_node_numbers (el_idx, face_number);
+                        quad_node_list.insert (quad_node_list.begin(), b_indicator);
+
+                        face_list.push_back (quad_node_list);
+                      }
+                  }
+                else
+                  {
+                    // Surface refers directly to elements
+                    iss >> el_idx >> comma >> temp >> face_number;
+                    quad_node_list = get_global_node_numbers (el_idx, face_number);
+                    quad_node_list.insert (quad_node_list.begin(), b_indicator);
+
+                    face_list.push_back (quad_node_list);
+                  }
+
+                std::getline (input_stream, line);
+              }
+          }
+        else if (line.compare (0, 6, "*ELSET") == 0)
+          {
+            // Get ELSET name.
+            // Materials are attached to elsets with specific name
+            std::string elset_name;
+            {
+              const std::string elset_key = "*ELSET, ELSET=";
+              const std::size_t idx = line.find(elset_key);
+              if (idx != std::string::npos)
+                {
+                  const std::string comma = ",";
+                  const std::size_t first_comma = line.find(comma);
+                  const std::size_t second_comma = line.find(comma, first_comma+1);
+                  const std::size_t elset_name_start = line.find(elset_key) + elset_key.size();
+                  elset_name = line.substr(elset_name_start, second_comma-elset_name_start);
+                }
+
+            }
+
+            // There are two possibilities of storing cells numbers in ELSET:
+            // 1. If the header contains the 'GENERATE' keyword, then the next line describes range of cells as:
+            //    cell_id_start, cell_id_end, cell_step
+            // 2. If the header does not contain the 'GENERATE' keyword, then the next lines contain cells numbers
+            std::vector<int> elements;
+            const std::size_t generate_idx = line.find("GENERATE");
+            if (generate_idx != std::string::npos)
+              {
+                // Option (1)
+                std::getline (input_stream, line);
+                std::istringstream iss (line);
+                char comma;
+                int elid_start;
+                int elid_end;
+                int elis_step = 1; // Default value set in case stride not provided
+                // Some files don't have the stride size
+                // Compare mesh test cases ./grids/abaqus/3d/other_simple.inp to ./grids/abaqus/2d/2d_test_abaqus.inp
+                iss >> elid_start >> comma >> elid_end;
+                // https://stackoverflow.com/questions/8046357/how-do-i-check-if-a-stringstream-variable-is-empty-null
+                if (iss.rdbuf()->in_avail() != 0)
+                  iss >> comma >> elis_step;
+                for (int i = elid_start; i <= elid_end; i+= elis_step)
+                  {
+                    elements.push_back(i);
+                  }
+                elsets_list[elset_name] = elements;
+
+                std::getline (input_stream, line);
+              }
+            else
+              {
+                // Option (2)
+                std::getline (input_stream, line);
+                while (!input_stream.eof())
+                  {
+                    if (line[0] == '*')
+                      break;
+
+                    std::istringstream iss (line);
+                    char comma;
+                    int elid;
+                    while (!iss.eof())
+                      {
+                        iss >> elid >> comma;
+                        elements.push_back (elid);
+                      }
+
+                    std::getline (input_stream, line);
+                  }
+
+                elsets_list[elset_name] = elements;
+              }
+
+            goto cont;
+          }
+        else if (line.compare (0, 5, "*NSET") == 0)
+          {
+            // Skip nodesets; we have no use for them
+            while (!input_stream.eof())
+              {
+                std::getline (input_stream, line);
+                if (line[0] == '*')
+                  goto cont;
+              }
+          }
+        else if (line.compare(0, 14, "*SOLID SECTION") == 0)
+          {
+            // The ELSET name, which describes a section for particular material
+            const std::string elset_key = "ELSET=";
+            const std::size_t elset_start = line.find("ELSET=") + elset_key.size();
+            const std::size_t elset_end = line.find(',', elset_start+1);
+            const std::string elset_name = line.substr(elset_start, elset_end-elset_start);
+
+            // Solid material definition.
+            // We assume that material id is taken from material name,
+            // eg. "Material-1" -> ID=1
+            const std::string material_key = "MATERIAL=";
+            const std::size_t last_equal = line.find("MATERIAL=") + material_key.size();
+            const std::size_t material_id_start = line.find('-', last_equal);
+            int material_id = 0;
+            from_string(material_id, line.substr(material_id_start+1), std::dec);
+
+            // Assign material id to cells
+            const std::vector<int> &elset_cells = elsets_list[elset_name];
+            for (unsigned int i = 0; i < elset_cells.size(); ++i)
+              {
+                const int cell_id = elset_cells[i] - 1;
+                cell_list[cell_id][0] = material_id;
+              }
+          }
+        // Note: All other lines / entries are ignored
+
+        std::getline (input_stream, line);
+
+cont:
+        (void) 0;
+      }
+  }
+
+  template <int dim>
+  std::vector<double>
+  Abaqus_to_UCD<dim>::get_global_node_numbers (const int face_cell_no,
+                                               const int face_cell_face_no) const
+  {
+    std::vector<double> quad_node_list (GeometryInfo<dim>::vertices_per_face);
+
+    // These orderings were reverse engineered by hand and may
+    // conceivably be erroneous.
+    // TODO: Currently one test (2d unstructured mesh) in the test
+    // suite fails, presumably because of an ordering issue.
+    if (dim == 2)
+      {
+        if (face_cell_face_no == 1)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][1];
+            quad_node_list[1] = cell_list[face_cell_no - 1][2];
+          }
+        else if (face_cell_face_no == 2)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][2];
+            quad_node_list[1] = cell_list[face_cell_no - 1][3];
+          }
+        else if (face_cell_face_no == 3)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][3];
+            quad_node_list[1] = cell_list[face_cell_no - 1][4];
+          }
+        else if (face_cell_face_no == 4)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][4];
+            quad_node_list[1] = cell_list[face_cell_no - 1][1];
+          }
+        else
+          {
+            AssertThrow(face_cell_face_no <= 4, ExcMessage("Invalid face number in 2d"));
+          }
+      }
+    else if (dim == 3)
+      {
+        if (face_cell_face_no == 1)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][1];
+            quad_node_list[1] = cell_list[face_cell_no - 1][4];
+            quad_node_list[2] = cell_list[face_cell_no - 1][3];
+            quad_node_list[3] = cell_list[face_cell_no - 1][2];
+          }
+        else if (face_cell_face_no == 2)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][5];
+            quad_node_list[1] = cell_list[face_cell_no - 1][8];
+            quad_node_list[2] = cell_list[face_cell_no - 1][7];
+            quad_node_list[3] = cell_list[face_cell_no - 1][6];
+          }
+        else if (face_cell_face_no == 3)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][1];
+            quad_node_list[1] = cell_list[face_cell_no - 1][2];
+            quad_node_list[2] = cell_list[face_cell_no - 1][6];
+            quad_node_list[3] = cell_list[face_cell_no - 1][5];
+          }
+        else if (face_cell_face_no == 4)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][2];
+            quad_node_list[1] = cell_list[face_cell_no - 1][3];
+            quad_node_list[2] = cell_list[face_cell_no - 1][7];
+            quad_node_list[3] = cell_list[face_cell_no - 1][6];
+          }
+        else if (face_cell_face_no == 5)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][3];
+            quad_node_list[1] = cell_list[face_cell_no - 1][4];
+            quad_node_list[2] = cell_list[face_cell_no - 1][8];
+            quad_node_list[3] = cell_list[face_cell_no - 1][7];
+          }
+        else if (face_cell_face_no == 6)
+          {
+            quad_node_list[0] = cell_list[face_cell_no - 1][1];
+            quad_node_list[1] = cell_list[face_cell_no - 1][5];
+            quad_node_list[2] = cell_list[face_cell_no - 1][8];
+            quad_node_list[3] = cell_list[face_cell_no - 1][4];
+          }
+        else
+          {
+            AssertThrow(face_cell_no <= 6, ExcMessage("Invalid face number in 3d"));
+          }
+      }
+    else
+      {
+        AssertThrow(dim==2 || dim==3, ExcNotImplemented());
+      }
+
+    return quad_node_list;
+  }
+
+  template <int dim>
+  void
+  Abaqus_to_UCD<dim>::write_out_avs_ucd (std::ostream &output) const
+  {
+    // References:
+    // http://www.dealii.org/developer/doxygen/deal.II/structGeometryInfo.html
+    // http://people.scs.fsu.edu/~burkardt/data/ucd/ucd.html
+
+    AssertThrow (output, ExcIO());
+
+    // Write out title - Note: No other commented text can be inserted below the
+    // title in a UCD file
+    output << "# Abaqus to UCD mesh conversion" << std::endl;
+    output << "# Mesh type: AVS UCD" << std::endl;
+
+    // ========================================================
+    // ASCII UCD File Format
+    // The input file cannot contain blank lines or lines with leading blanks.
+    // Comments, if present, must precede all data in the file.
+    // Comments within the data will cause read errors.
+    // The general order of the data is as follows:
+    // 1. Numbers defining the overall structure, including the number of nodes,
+    //    the number of cells, and the length of the vector of data associated
+    //    with the nodes, cells, and the model.
+    //     e.g. 1:
+    //        <num_nodes> <num_cells> <num_ndata> <num_cdata> <num_mdata>
+    //     e.g. 2:
+    //        n_elements = n_hex_cells + n_bc_quads + n_quad_cells + n_bc_edges
+    //        outfile.write(str(n_nodes) + " " + str(n_elements) + " 0 0 0\n")
+    // 2. For each node, its node id and the coordinates of that node in space.
+    //    Node-ids must be integers, but any number including non sequential
+    //    numbers can be used. Mid-edge nodes are treated like any other node.
+    // 3. For each cell: its cell-id, material, cell type (hexahedral, pyramid,
+    //    etc.), and the list of node-ids that correspond to each of the cell's
+    //    vertices. The below table specifies the different cell types and the
+    //    keyword used to represent them in the file.
+
+    // Write out header
+    output
+        << node_list.size() << "\t"
+        << (cell_list.size() + face_list.size()) << "\t0\t0\t0"
+        << std::endl;
+
+    // Write out node numbers
+    for (unsigned int ii = 0; ii < node_list.size(); ++ii) // Loop over all nodes
+      {
+        for (unsigned int jj = 0; jj < dim + 1; ++jj) // Loop over entries to be outputted
+          {
+            if (jj == 0)        // Node number
+              {
+                output.precision();
+                output << node_list[ii][jj] << "\t";
+              }
+            else                // Node coordinates
+              {
+                output.width (16);
+                output.setf (std::ios::scientific,
+                             std::ios::floatfield);
+                output.precision (8);
+                if (std::abs (node_list[ii][jj]) > tolerance) // invoke tolerance -> set points close to zero equal to zero
+                  output << static_cast<double> (node_list[ii][jj]) << "\t";
+                else
+                  output << 0.0 << "\t";
+              }
+          }
+        if (dim == 2)
+          output << 0.0 << "\t";
+
+        output
+            << std::endl;
+        output.unsetf (std::ios::floatfield);
+      }
+
+    // Write out cell node numbers
+    for (unsigned int ii = 0; ii < cell_list.size(); ++ii)
+      {
+        output
+            << ii + 1 << "\t"
+            << cell_list[ii][0] << "\t"
+            << (dim == 2 ? "quad" : "hex") << "\t";
+        for (unsigned int jj = 1; jj < GeometryInfo<dim>::vertices_per_cell + 1; ++jj)
+          output
+              << cell_list[ii][jj] << "\t";
+
+        output
+            << std::endl;
+      }
+
+    // Write out quad node numbers
+    for (unsigned int ii = 0; ii < face_list.size(); ++ii)
+      {
+        output
+            << ii + 1 << "\t"
+            << face_list[ii][0] << "\t"
+            << (dim == 2 ? "line" : "quad") << "\t";
+        for (unsigned int jj = 1; jj < GeometryInfo<dim>::vertices_per_face + 1; ++jj)
+          output
+              << face_list[ii][jj] << "\t";
+
+        output
+            << std::endl;
+      }
+  }
+}
+
+
+//explicit instantiations
+#include "grid_in.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/grid_in.inst.in b/source/grid/grid_in.inst.in
new file mode 100644
index 0000000..4a17b02
--- /dev/null
+++ b/source/grid/grid_in.inst.in
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension, deal_II_space_dimension : DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class GridIn<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
diff --git a/source/grid/grid_out.cc b/source/grid/grid_out.cc
new file mode 100644
index 0000000..324b6ea
--- /dev/null
+++ b/source/grid/grid_out.cc
@@ -0,0 +1,3977 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/grid/grid_out.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/exceptions.h>
+#include <deal.II/base/point.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/qprojector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/mapping.h>
+
+#include <cstring>
+#include <iomanip>
+#include <algorithm>
+#include <list>
+#include <set>
+#include <ctime>
+#include <cmath>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace GridOutFlags
+{
+  DX::DX (const bool write_cells,
+          const bool write_faces,
+          const bool write_diameter,
+          const bool write_measure,
+          const bool write_all_faces) :
+    write_cells (write_cells),
+    write_faces (write_faces),
+    write_diameter (write_diameter),
+    write_measure (write_measure),
+    write_all_faces (write_all_faces)
+  {}
+
+  void DX::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Write cells", "true", Patterns::Bool(),
+                        "Write the mesh connectivity as DX grid cells");
+    param.declare_entry("Write faces", "false", Patterns::Bool(),
+                        "Write faces of cells. These may be boundary faces "
+                        "or all faces between mesh cells, according to "
+                        "\"Write all faces\"");
+    param.declare_entry("Write diameter", "false", Patterns::Bool(),
+                        "If cells are written, additionally write their"
+                        " diameter as data for visualization");
+    param.declare_entry("Write measure", "false", Patterns::Bool(),
+                        "Write the volume of each cell as data");
+    param.declare_entry("Write all faces", "true", Patterns::Bool(),
+                        "Write all faces, not only boundary");
+  }
+
+  void DX::parse_parameters (ParameterHandler &param)
+  {
+    write_cells = param.get_bool("Write cells");
+    write_faces = param.get_bool("Write faces");
+    write_diameter = param.get_bool("Write diameter");
+    write_measure = param.get_bool("Write measure");
+    write_all_faces = param.get_bool("Write all faces");
+  }
+
+
+  Msh::Msh (const bool write_faces,
+            const bool write_lines) :
+    write_faces (write_faces),
+    write_lines (write_lines)
+  {}
+
+  void Msh::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Write faces", "false", Patterns::Bool());
+    param.declare_entry("Write lines", "false", Patterns::Bool());
+  }
+
+
+  void Msh::parse_parameters (ParameterHandler &param)
+  {
+    write_faces = param.get_bool("Write faces");
+    write_lines = param.get_bool("Write lines");
+  }
+
+
+  Ucd::Ucd (const bool write_preamble,
+            const bool write_faces,
+            const bool write_lines) :
+    write_preamble (write_preamble),
+    write_faces (write_faces),
+    write_lines (write_lines)
+  {}
+
+
+
+  void Ucd::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Write preamble", "true", Patterns::Bool());
+    param.declare_entry("Write faces", "false", Patterns::Bool());
+    param.declare_entry("Write lines", "false", Patterns::Bool());
+  }
+
+
+  void Ucd::parse_parameters (ParameterHandler &param)
+  {
+    write_preamble = param.get_bool("Write preamble");
+    write_faces = param.get_bool("Write faces");
+    write_lines = param.get_bool("Write lines");
+  }
+
+
+  Gnuplot::Gnuplot (const bool write_cell_numbers,
+                    const unsigned int n_boundary_face_points,
+                    const bool         curved_inner_cells) :
+    write_cell_numbers (write_cell_numbers),
+    n_boundary_face_points(n_boundary_face_points),
+    curved_inner_cells(curved_inner_cells)
+  {}
+
+
+
+  void Gnuplot::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Cell number", "false", Patterns::Bool());
+    param.declare_entry("Boundary points", "2", Patterns::Integer());
+  }
+
+
+  void Gnuplot::parse_parameters (ParameterHandler &param)
+  {
+    write_cell_numbers = param.get_bool("Cell number");
+    n_boundary_face_points = param.get_integer("Boundary points");
+  }
+
+
+  EpsFlagsBase::EpsFlagsBase (const SizeType     size_type,
+                              const unsigned int size,
+                              const double       line_width,
+                              const bool color_lines_on_user_flag,
+                              const unsigned int n_boundary_face_points,
+                              const bool color_lines_level) :
+    size_type (size_type),
+    size (size),
+    line_width (line_width),
+    color_lines_on_user_flag(color_lines_on_user_flag),
+    n_boundary_face_points(n_boundary_face_points),
+    color_lines_level(color_lines_level)
+  {}
+
+
+  void EpsFlagsBase::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Size by", "width",
+                        Patterns::Selection("width|height"),
+                        "Depending on this parameter, either the"
+                        "width or height "
+                        "of the eps is scaled to \"Size\"");
+    param.declare_entry("Size", "300", Patterns::Integer(),
+                        "Size of the output in points");
+    param.declare_entry("Line width", "0.5", Patterns::Double(),
+                        "Width of the lines drawn in points");
+    param.declare_entry("Color by flag", "false", Patterns::Bool(),
+                        "Draw lines with user flag set in different color");
+    param.declare_entry("Boundary points", "2", Patterns::Integer(),
+                        "Number of points on boundary edges. "
+                        "Increase this beyond 2 to see curved boundaries.");
+    param.declare_entry("Color by level", "false", Patterns::Bool(),
+                        "Draw different colors according to grid level.");
+  }
+
+
+  void EpsFlagsBase::parse_parameters (ParameterHandler &param)
+  {
+    if (param.get("Size by") == std::string("width"))
+      size_type = width;
+    else if (param.get("Size by") == std::string("height"))
+      size_type = height;
+    size = param.get_integer("Size");
+    line_width = param.get_double("Line width");
+    color_lines_on_user_flag = param.get_bool("Color by flag");
+    n_boundary_face_points = param.get_integer("Boundary points");
+    color_lines_level = param.get_bool("Color by level");
+  }
+
+
+
+  Eps<1>::Eps (const SizeType     size_type,
+               const unsigned int size,
+               const double       line_width,
+               const bool color_lines_on_user_flag,
+               const unsigned int n_boundary_face_points)
+    :
+    EpsFlagsBase(size_type, size, line_width,
+                 color_lines_on_user_flag,
+                 n_boundary_face_points)
+  {}
+
+
+  void Eps<1>::declare_parameters (ParameterHandler &)
+  {}
+
+
+  void Eps<1>::parse_parameters (ParameterHandler &param)
+  {
+    EpsFlagsBase::parse_parameters(param);
+  }
+
+
+
+  Eps<2>::Eps (const SizeType     size_type,
+               const unsigned int size,
+               const double       line_width,
+               const bool color_lines_on_user_flag,
+               const unsigned int n_boundary_face_points,
+               const bool         write_cell_numbers,
+               const bool         write_cell_number_level,
+               const bool         write_vertex_numbers,
+               const bool         color_lines_level
+              )
+    :
+    EpsFlagsBase(size_type, size, line_width,
+                 color_lines_on_user_flag,
+                 n_boundary_face_points,
+                 color_lines_level),
+    write_cell_numbers (write_cell_numbers),
+    write_cell_number_level (write_cell_number_level),
+    write_vertex_numbers (write_vertex_numbers)
+  {}
+
+
+  void Eps<2>::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Cell number", "false", Patterns::Bool(),
+                        "(2D only) Write cell numbers"
+                        " into the centers of cells");
+    param.declare_entry("Level number", "false", Patterns::Bool(),
+                        "(2D only) if \"Cell number\" is true, write"
+                        "numbers in the form level.number");
+    param.declare_entry("Vertex number", "false", Patterns::Bool(),
+                        "Write numbers for each vertex");
+  }
+
+
+  void Eps<2>::parse_parameters (ParameterHandler &param)
+  {
+    EpsFlagsBase::parse_parameters(param);
+    write_cell_numbers = param.get_bool("Cell number");
+    write_cell_number_level = param.get_bool("Level number");
+    write_vertex_numbers = param.get_bool("Vertex number");
+  }
+
+
+
+  Eps<3>::Eps (const SizeType     size_type,
+               const unsigned int size,
+               const double       line_width,
+               const bool color_lines_on_user_flag,
+               const unsigned int n_boundary_face_points,
+               const double        azimut_angle,
+               const double        turn_angle)
+    :
+    EpsFlagsBase(size_type, size, line_width,
+                 color_lines_on_user_flag,
+                 n_boundary_face_points),
+    azimut_angle (azimut_angle),
+    turn_angle (turn_angle)
+  {}
+
+
+  void Eps<3>::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Azimuth", "30", Patterns::Double(),
+                        "Azimuth of the viw point, that is, the angle "
+                        "in the plane from the x-axis.");
+    param.declare_entry("Elevation", "30", Patterns::Double(),
+                        "Elevation of the view point above the xy-plane.");
+  }
+
+
+  void Eps<3>::parse_parameters (ParameterHandler &param)
+  {
+    EpsFlagsBase::parse_parameters(param);
+    azimut_angle = 90- param.get_double("Elevation");
+    turn_angle = param.get_double("Azimuth");
+  }
+
+
+
+  XFig::XFig ()
+    :
+    draw_boundary(true),
+    color_by(material_id),
+    level_depth(true),
+    n_boundary_face_points(0),
+    scaling(1.,1.),
+    fill_style (20),
+    line_style(0),
+    line_thickness(1),
+    boundary_style(0),
+    boundary_thickness(3)
+  {}
+
+
+  void XFig::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry("Boundary", "true", Patterns::Bool());
+    param.declare_entry("Level color", "false", Patterns::Bool());
+    param.declare_entry("Level depth", "true", Patterns::Bool());
+//TODO: Unify this number with other output formats
+    param.declare_entry("Boundary points", "0", Patterns::Integer());
+    param.declare_entry("Fill style", "20", Patterns::Integer());
+    param.declare_entry("Line style", "0", Patterns::Integer());
+    param.declare_entry("Line width", "1", Patterns::Integer());
+    param.declare_entry("Boundary style", "0", Patterns::Integer());
+    param.declare_entry("Boundary width", "3", Patterns::Integer());
+  }
+
+
+  void XFig::parse_parameters (ParameterHandler &param)
+  {
+    draw_boundary = param.get_bool("Boundary");
+    level_depth = param.get_bool("Level depth");
+    n_boundary_face_points = param.get_integer("Boundary points");
+    fill_style = param.get_integer("Fill style");
+    line_style = param.get_integer("Line style");
+    line_thickness = param.get_integer("Line width");
+    boundary_style = param.get_integer("Boundary style");
+    boundary_thickness = param.get_integer("Boundary width");
+  }
+
+  Svg::Svg(const unsigned int line_thickness,
+           const unsigned int boundary_line_thickness,
+           bool margin,
+           const Background background,
+           const int azimuth_angle,
+           const int polar_angle,
+           const Coloring coloring,
+           const bool convert_level_number_to_height,
+           const bool label_level_number,
+           const bool label_cell_index,
+           const bool label_material_id,
+           const bool label_subdomain_id,
+           const bool draw_colorbar,
+           const bool draw_legend)
+    :
+    height(1000),
+    width(0),
+    line_thickness(line_thickness),
+    boundary_line_thickness(boundary_line_thickness),
+    margin(margin),
+    background(background),
+    azimuth_angle(azimuth_angle),
+    polar_angle(polar_angle),
+    coloring(coloring),
+    convert_level_number_to_height(convert_level_number_to_height),
+    level_height_factor(0.3f),
+    cell_font_scaling(1.f),
+    label_level_number(label_level_number),
+    label_cell_index(label_cell_index),
+    label_material_id(label_material_id),
+    label_subdomain_id(label_subdomain_id),
+    label_level_subdomain_id(false),
+    draw_colorbar(draw_colorbar),
+    draw_legend(draw_legend)
+  {}
+
+  MathGL::MathGL ()
+    :
+    draw_bounding_box (false) // box
+  {}
+
+  void MathGL::declare_parameters (ParameterHandler &param)
+  {
+    param.declare_entry ("Draw bounding box", "false", Patterns::Bool ());
+  }
+
+  void MathGL::parse_parameters (ParameterHandler &param)
+  {
+    draw_bounding_box = param.get_bool ("Draw bounding box");
+  }
+}  // end namespace GridOutFlags
+
+
+
+GridOut::GridOut ()
+  :
+  default_format (none)
+{}
+
+
+void GridOut::set_flags (const GridOutFlags::DX &flags)
+{
+  dx_flags = flags;
+}
+
+
+
+void GridOut::set_flags (const GridOutFlags::Msh &flags)
+{
+  msh_flags = flags;
+}
+
+
+void GridOut::set_flags (const GridOutFlags::Ucd &flags)
+{
+  ucd_flags = flags;
+}
+
+
+
+void GridOut::set_flags (const GridOutFlags::Gnuplot &flags)
+{
+  gnuplot_flags = flags;
+}
+
+
+
+void GridOut::set_flags (const GridOutFlags::Eps<1> &flags)
+{
+  eps_flags_1 = flags;
+}
+
+
+
+void GridOut::set_flags (const GridOutFlags::Eps<2> &flags)
+{
+  eps_flags_2 = flags;
+}
+
+
+
+void GridOut::set_flags (const GridOutFlags::Eps<3> &flags)
+{
+  eps_flags_3 = flags;
+}
+
+
+
+void GridOut::set_flags (const GridOutFlags::XFig &flags)
+{
+  xfig_flags = flags;
+}
+
+
+void GridOut::set_flags (const GridOutFlags::Svg &flags)
+{
+  svg_flags = flags;
+}
+
+
+void GridOut::set_flags (const GridOutFlags::MathGL &flags)
+{
+  mathgl_flags = flags;
+}
+
+void GridOut::set_flags (const GridOutFlags::Vtk &flags)
+{
+  vtk_flags = flags;
+}
+
+void GridOut::set_flags (const GridOutFlags::Vtu &flags)
+{
+  vtu_flags = flags;
+}
+
+std::string
+GridOut::default_suffix (const OutputFormat output_format)
+{
+  switch (output_format)
+    {
+    case none:
+      return "";
+    case dx:
+      return ".dx";
+    case gnuplot:
+      return ".gnuplot";
+    case ucd:
+      return ".inp";
+    case eps:
+      return ".eps";
+    case xfig:
+      return ".fig";
+    case msh:
+      return ".msh";
+    case svg:
+      return ".svg";
+    case mathgl:
+      return ".mathgl";
+    case vtk:
+      return ".vtk";
+    case vtu:
+      return ".vtu";
+    default:
+      Assert (false, ExcNotImplemented());
+      return "";
+    }
+}
+
+
+
+std::string
+GridOut::default_suffix () const
+{
+  return default_suffix(default_format);
+}
+
+
+
+GridOut::OutputFormat
+GridOut::parse_output_format (const std::string &format_name)
+{
+  if (format_name == "none" || format_name == "false")
+    return none;
+
+  if (format_name == "dx")
+    return dx;
+
+  if (format_name == "ucd")
+    return ucd;
+
+  if (format_name == "gnuplot")
+    return gnuplot;
+
+  if (format_name == "eps")
+    return eps;
+
+  if (format_name == "xfig")
+    return xfig;
+
+  if (format_name == "msh")
+    return msh;
+
+  if (format_name == "svg")
+    return svg;
+
+  if (format_name == "mathgl")
+    return mathgl;
+
+  if (format_name == "vtk")
+    return vtk;
+
+  if (format_name == "vtu")
+    return vtu;
+
+  AssertThrow (false, ExcInvalidState ());
+  // return something weird
+  return OutputFormat(-1);
+}
+
+
+
+std::string GridOut::get_output_format_names ()
+{
+  return "none|dx|gnuplot|eps|ucd|xfig|msh|svg|mathgl|vtk|vtu";
+}
+
+
+void
+GridOut::declare_parameters(ParameterHandler &param)
+{
+  param.declare_entry("Format", "none",
+                      Patterns::Selection(get_output_format_names()));
+
+  param.enter_subsection("DX");
+  GridOutFlags::DX::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Msh");
+  GridOutFlags::Msh::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Ucd");
+  GridOutFlags::Ucd::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Gnuplot");
+  GridOutFlags::Gnuplot::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Eps");
+  GridOutFlags::EpsFlagsBase::declare_parameters(param);
+  GridOutFlags::Eps<1>::declare_parameters(param);
+  GridOutFlags::Eps<2>::declare_parameters(param);
+  GridOutFlags::Eps<3>::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("XFig");
+  GridOutFlags::XFig::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("MathGL");
+  GridOutFlags::MathGL::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Vtk");
+  GridOutFlags::Vtk::declare_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Vtu");
+  GridOutFlags::Vtu::declare_parameters(param);
+  param.leave_subsection();
+}
+
+
+
+void
+GridOut::parse_parameters(ParameterHandler &param)
+{
+  default_format = parse_output_format(param.get("Format"));
+
+  param.enter_subsection("DX");
+  dx_flags.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Msh");
+  msh_flags.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Ucd");
+  ucd_flags.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Gnuplot");
+  gnuplot_flags.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Eps");
+  eps_flags_1.parse_parameters(param);
+  eps_flags_2.parse_parameters(param);
+  eps_flags_3.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("XFig");
+  xfig_flags.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("MathGL");
+  mathgl_flags.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Vtk");
+  vtk_flags.parse_parameters(param);
+  param.leave_subsection();
+
+  param.enter_subsection("Vtu");
+  vtu_flags.parse_parameters(param);
+  param.leave_subsection();
+}
+
+
+
+std::size_t
+GridOut::memory_consumption () const
+{
+  return (sizeof(dx_flags)      +
+          sizeof(msh_flags)     +
+          sizeof(ucd_flags)     +
+          sizeof(gnuplot_flags) +
+          sizeof(eps_flags_1)   +
+          sizeof(eps_flags_2)   +
+          sizeof(eps_flags_3)   +
+          sizeof(xfig_flags)    +
+          sizeof(svg_flags)     +
+          sizeof(mathgl_flags)  +
+          sizeof(vtk_flags)     +
+          sizeof(vtu_flags));
+}
+
+
+
+template <>
+void GridOut::write_dx (const Triangulation<1> &,
+                        std::ostream &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+template <>
+void GridOut::write_dx (const Triangulation<1,2> &,
+                        std::ostream &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+template <>
+void GridOut::write_dx (const Triangulation<1,3> &,
+                        std::ostream &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void GridOut::write_dx (const Triangulation<dim, spacedim> &tria,
+                        std::ostream             &out) const
+{
+//TODO:[GK] allow for boundary faces only
+  Assert(dx_flags.write_all_faces, ExcNotImplemented());
+  AssertThrow (out, ExcIO());
+  // Copied and adapted from write_ucd
+  const std::vector<Point<spacedim> > &vertices    = tria.get_vertices();
+  const std::vector<bool>        &vertex_used = tria.get_used_vertices();
+
+  const unsigned int n_vertices = tria.n_used_vertices();
+
+  // vertices are implicitly numbered from 0 to
+  // n_vertices-1. we have to renumber the
+  // vertices, because otherwise we would end
+  // up with wrong results, if there are unused
+  // vertices
+  std::vector<unsigned int> renumber(vertices.size());
+  // fill this vector with new vertex numbers
+  // ranging from 0 to n_vertices-1
+  unsigned int new_number=0;
+  for (unsigned int i=0; i<vertices.size(); ++i)
+    if (vertex_used[i])
+      renumber[i]=new_number++;
+  Assert(new_number==n_vertices, ExcInternalError());
+
+  typename Triangulation<dim, spacedim>::active_cell_iterator       cell;
+  const typename Triangulation<dim, spacedim>::active_cell_iterator endc=tria.end();
+
+
+  // write the vertices
+  out << "object \"vertices\" class array type float rank 1 shape " << dim
+      << " items " << n_vertices << " data follows"
+      << '\n';
+
+  for (unsigned int i=0; i<vertices.size(); ++i)
+    if (vertex_used[i])
+      out << '\t' << vertices[i] << '\n';
+
+  // write cells or faces
+  const bool write_cells = dx_flags.write_cells;
+  const bool write_faces = (dim>1) ? dx_flags.write_faces : false;
+
+  const unsigned int n_cells = tria.n_active_cells();
+  const unsigned int n_faces = tria.n_active_cells()
+                               * GeometryInfo<dim>::faces_per_cell;
+
+  const unsigned int n_vertices_per_cell = GeometryInfo<dim>::vertices_per_cell;
+  const unsigned int n_vertices_per_face = GeometryInfo<dim>::vertices_per_face;
+
+  if (write_cells)
+    {
+      out << "object \"cells\" class array type int rank 1 shape "
+          << n_vertices_per_cell
+          << " items " << n_cells << " data follows" << '\n';
+
+      for (cell = tria.begin_active(); cell != endc; ++cell)
+        {
+          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+            out << '\t' << renumber[cell->vertex_index(GeometryInfo<dim>::dx_to_deal[v])];
+          out << '\n';
+        }
+      out << "attribute \"element type\" string \"";
+      if (dim==1) out << "lines";
+      if (dim==2) out << "quads";
+      if (dim==3) out << "cubes";
+      out << "\"" << '\n'
+          << "attribute \"ref\" string \"positions\"" << '\n' << '\n';
+
+      // Additional cell information
+
+      out << "object \"material\" class array type int rank 0 items "
+          << n_cells << " data follows" << '\n';
+      for (cell = tria.begin_active(); cell != endc; ++cell)
+        out << ' ' << (unsigned int)cell->material_id();
+      out  << '\n'
+           << "attribute \"dep\" string \"connections\"" << '\n' << '\n';
+
+      out << "object \"level\" class array type int rank 0 items "
+          << n_cells << " data follows" << '\n';
+      for (cell = tria.begin_active(); cell != endc; ++cell)
+        out << ' ' << cell->level();
+      out  << '\n'
+           << "attribute \"dep\" string \"connections\"" << '\n' << '\n';
+
+      if (dx_flags.write_measure)
+        {
+          out << "object \"measure\" class array type float rank 0 items "
+              << n_cells << " data follows" << '\n';
+          for (cell = tria.begin_active(); cell != endc; ++cell)
+            out << '\t' << cell->measure();
+          out  << '\n'
+               << "attribute \"dep\" string \"connections\"" << '\n' << '\n';
+        }
+
+      if (dx_flags.write_diameter)
+        {
+          out << "object \"diameter\" class array type float rank 0 items "
+              << n_cells << " data follows" << '\n';
+          for (cell = tria.begin_active(); cell != endc; ++cell)
+            out << '\t' << cell->diameter();
+          out  << '\n'
+               << "attribute \"dep\" string \"connections\"" << '\n' << '\n';
+        }
+    }
+
+  if (write_faces)
+    {
+      out << "object \"faces\" class array type int rank 1 shape "
+          << n_vertices_per_face
+          << " items " << n_faces << " data follows"
+          << '\n';
+
+      for (cell = tria.begin_active(); cell != endc; ++cell)
+        {
+          for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+            {
+              typename Triangulation<dim, spacedim>::face_iterator face = cell->face(f);
+
+              for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_face; ++v)
+                out << '\t' << renumber[face->vertex_index(GeometryInfo<dim-1>::dx_to_deal[v])];
+              out << '\n';
+            }
+        }
+      out << "attribute \"element type\" string \"";
+      if (dim==2) out << "lines";
+      if (dim==3) out << "quads";
+      out << "\"" << '\n'
+          << "attribute \"ref\" string \"positions\"" << '\n' << '\n';
+
+
+      // Additional face information
+
+      out << "object \"boundary\" class array type int rank 0 items "
+          << n_faces << " data follows" << '\n';
+      for (cell = tria.begin_active(); cell != endc; ++cell)
+        {
+          // Little trick to get -1
+          // for the interior
+          for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+            out << ' ' << (int)(signed char)cell->face(f)->boundary_id();
+          out << '\n';
+        }
+      out << "attribute \"dep\" string \"connections\"" << '\n' << '\n';
+
+      if (dx_flags.write_measure)
+        {
+          out << "object \"face measure\" class array type float rank 0 items "
+              << n_faces << " data follows" << '\n';
+          for (cell = tria.begin_active(); cell != endc; ++cell)
+            {
+              for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+                out << ' ' << cell->face(f)->measure();
+              out << '\n';
+            }
+          out << "attribute \"dep\" string \"connections\"" << '\n' << '\n';
+        }
+
+      if (dx_flags.write_diameter)
+        {
+          out << "object \"face diameter\" class array type float rank 0 items "
+              << n_faces << " data follows" << '\n';
+          for (cell = tria.begin_active(); cell != endc; ++cell)
+            {
+              for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+                out << ' ' << cell->face(f)->diameter();
+              out << '\n';
+            }
+          out << "attribute \"dep\" string \"connections\"" << '\n' << '\n';
+        }
+
+    }
+
+
+  // Write additional face information
+
+  if (write_faces)
+    {
+
+    }
+  else
+    {
+    }
+
+  // The wrapper
+  out << "object \"deal data\" class field" << '\n'
+      << "component \"positions\" value \"vertices\"" << '\n'
+      << "component \"connections\" value \"cells\"" << '\n';
+
+  if (write_cells)
+    {
+      out << "object \"cell data\" class field" << '\n'
+          << "component \"positions\" value \"vertices\"" << '\n'
+          << "component \"connections\" value \"cells\"" << '\n';
+      out << "component \"material\" value \"material\"" << '\n';
+      out << "component \"level\" value \"level\"" << '\n';
+      if (dx_flags.write_measure)
+        out << "component \"measure\" value \"measure\"" << '\n';
+      if (dx_flags.write_diameter)
+        out << "component \"diameter\" value \"diameter\"" << '\n';
+    }
+
+  if (write_faces)
+    {
+      out << "object \"face data\" class field" << '\n'
+          << "component \"positions\" value \"vertices\"" << '\n'
+          << "component \"connections\" value \"faces\"" << '\n';
+      out << "component \"boundary\" value \"boundary\"" << '\n';
+      if (dx_flags.write_measure)
+        out << "component \"measure\" value \"face measure\"" << '\n';
+      if (dx_flags.write_diameter)
+        out << "component \"diameter\" value \"face diameter\"" << '\n';
+    }
+
+  out << '\n'
+      << "object \"grid data\" class group" << '\n';
+  if (write_cells)
+    out << "member \"cells\" value \"cell data\"" << '\n';
+  if (write_faces)
+    out << "member \"faces\" value \"face data\"" << '\n';
+  out << "end" << '\n';
+
+  // make sure everything now gets to
+  // disk
+  out.flush ();
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+
+template <int dim, int spacedim>
+void GridOut::write_msh (const Triangulation<dim, spacedim> &tria,
+                         std::ostream             &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // get the positions of the
+  // vertices and whether they are
+  // used.
+  const std::vector<Point<spacedim> > &vertices    = tria.get_vertices();
+  const std::vector<bool>        &vertex_used = tria.get_used_vertices();
+
+  const unsigned int n_vertices = tria.n_used_vertices();
+
+  typename Triangulation<dim,spacedim>::active_cell_iterator       cell=tria.begin_active();
+  const typename Triangulation<dim,spacedim>::active_cell_iterator endc=tria.end();
+
+  // Write Header
+  // The file format is:
+  /*
+
+
+  $NOD
+  number-of-nodes
+  node-number x-coord y-coord z-coord
+  ...
+  $ENDNOD
+  $ELM
+  number-of-elements
+  elm-number elm-type reg-phys reg-elem number-of-nodes node-number-list
+  ...
+  $ENDELM
+  */
+  out << "$NOD" << '\n'
+      << n_vertices << '\n';
+
+  // actually write the vertices.
+  // note that we shall number them
+  // with first index 1 instead of 0
+  for (unsigned int i=0; i<vertices.size(); ++i)
+    if (vertex_used[i])
+      {
+        out << i+1                 // vertex index
+            << "  "
+            << vertices[i];
+        for (unsigned int d=spacedim+1; d<=3; ++d)
+          out << " 0";             // fill with zeroes
+        out << '\n';
+      }
+
+  // Write cells preamble
+  out << "$ENDNOD" << '\n'
+      << "$ELM" << '\n'
+      << tria.n_active_cells() + ((msh_flags.write_faces ?
+                                   n_boundary_faces(tria) : 0) +
+                                  (msh_flags.write_lines ?
+                                   n_boundary_lines(tria) : 0)) << '\n';
+
+  /*
+    elm-type
+    defines the geometrical type of the n-th element:
+    1
+    Line (2 nodes).
+    2
+    Triangle (3 nodes).
+    3
+    Quadrangle (4 nodes).
+    4
+    Tetrahedron (4 nodes).
+    5
+    Hexahedron (8 nodes).
+    6
+    Prism (6 nodes).
+    7
+    Pyramid (5 nodes).
+    8
+    Second order line (3 nodes: 2 associated with the vertices and 1 with the edge).
+    9
+    Second order triangle (6 nodes: 3 associated with the vertices and 3 with the edges).
+    10
+    Second order quadrangle (9 nodes: 4 associated with the vertices, 4 with the edges and 1 with the face).
+    11
+    Second order tetrahedron (10 nodes: 4 associated with the vertices and 6 with the edges).
+    12
+    Second order hexahedron (27 nodes: 8 associated with the vertices, 12 with the edges, 6 with the faces and 1 with the volume).
+    13
+    Second order prism (18 nodes: 6 associated with the vertices, 9 with the edges and 3 with the quadrangular faces).
+    14
+    Second order pyramid (14 nodes: 5 associated with the vertices, 8 with the edges and 1 with the quadrangular face).
+    15
+    Point (1 node).
+  */
+  unsigned int elm_type;
+  switch (dim)
+    {
+    case 1:
+      elm_type = 1;
+      break;
+    case 2:
+      elm_type = 3;
+      break;
+    case 3:
+      elm_type = 5;
+      break;
+    default:
+      Assert(false, ExcNotImplemented());
+    }
+
+  // write cells. Enumerate cells
+  // consecutively, starting with 1
+  for (cell=tria.begin_active(); cell!=endc; ++cell)
+    {
+      out << cell->active_cell_index()+1 << ' ' << elm_type << ' '
+          << static_cast<unsigned int>(cell->material_id()) << ' '
+          << cell->subdomain_id() << ' '
+          << GeometryInfo<dim>::vertices_per_cell << ' ';
+
+      // Vertex numbering follows UCD conventions.
+
+      for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+           ++vertex)
+        out << cell->vertex_index(GeometryInfo<dim>::ucd_to_deal[vertex])+1 << ' ';
+      out << '\n';
+    }
+
+  // write faces and lines with non-zero boundary indicator
+  unsigned int next_element_index = tria.n_active_cells()+1;
+  if (msh_flags.write_faces)
+    {
+      next_element_index = write_msh_faces (tria, next_element_index, out);
+    }
+  if (msh_flags.write_lines)
+    {
+      next_element_index = write_msh_lines (tria, next_element_index, out);
+    }
+
+  out << "$ENDELM\n";
+
+  // make sure everything now gets to
+  // disk
+  out.flush ();
+
+  AssertThrow (out, ExcIO());
+}
+
+
+template <int dim, int spacedim>
+void GridOut::write_ucd (const Triangulation<dim,spacedim> &tria,
+                         std::ostream             &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // get the positions of the
+  // vertices and whether they are
+  // used.
+  const std::vector<Point<spacedim> > &vertices    = tria.get_vertices();
+  const std::vector<bool>             &vertex_used = tria.get_used_vertices();
+
+  const unsigned int n_vertices = tria.n_used_vertices();
+
+  typename Triangulation<dim,spacedim>::active_cell_iterator       cell=tria.begin_active();
+  const typename Triangulation<dim,spacedim>::active_cell_iterator endc=tria.end();
+
+  // write preamble
+  if (ucd_flags.write_preamble)
+    {
+      // block this to have local
+      // variables destroyed after
+      // use
+      std::time_t  time1= std::time (0);
+      std::tm     *time = std::localtime(&time1);
+      out << "# This file was generated by the deal.II library." << '\n'
+          << "# Date =  "
+          << time->tm_year+1900 << "/"
+          << time->tm_mon+1 << "/"
+          << time->tm_mday << '\n'
+          << "# Time =  "
+          << time->tm_hour << ":"
+          << std::setw(2) << time->tm_min << ":"
+          << std::setw(2) << time->tm_sec << '\n'
+          << "#" << '\n'
+          << "# For a description of the UCD format see the AVS Developer's guide."
+          << '\n'
+          << "#" << '\n';
+    }
+
+  // start with ucd data
+  out << n_vertices << ' '
+      << tria.n_active_cells() + ( (ucd_flags.write_faces ?
+                                    n_boundary_faces(tria) : 0) +
+                                   (ucd_flags.write_lines ?
+                                    n_boundary_lines(tria) : 0) )
+      << " 0 0 0"                  // no data
+      << '\n';
+
+  // actually write the vertices.
+  // note that we shall number them
+  // with first index 1 instead of 0
+  for (unsigned int i=0; i<vertices.size(); ++i)
+    if (vertex_used[i])
+      {
+        out << i+1                 // vertex index
+            << "  "
+            << vertices[i];
+        for (unsigned int d=spacedim+1; d<=3; ++d)
+          out << " 0";             // fill with zeroes
+        out << '\n';
+      }
+
+  // write cells. Enumerate cells
+  // consecutively, starting with 1
+  for (cell=tria.begin_active();  cell!=endc; ++cell)
+    {
+      out << cell->active_cell_index()+1 << ' '
+          << static_cast<unsigned int>(cell->material_id())
+          << ' ';
+      switch (dim)
+        {
+        case 1:
+          out << "line    ";
+          break;
+        case 2:
+          out << "quad    ";
+          break;
+        case 3:
+          out << "hex     ";
+          break;
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+
+      // it follows a list of the
+      // vertices of each cell. in 1d
+      // this is simply a list of the
+      // two vertices, in 2d its counter
+      // clockwise, as usual in this
+      // library. in 3d, the same applies
+      // (special thanks to AVS for
+      // numbering their vertices in a
+      // way compatible to deal.II!)
+      //
+      // technical reference:
+      // AVS Developer's Guide, Release 4,
+      // May, 1992, p. E6
+      //
+      // note: vertex numbers are 1-base
+      for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+           ++vertex)
+        out << cell->vertex_index(GeometryInfo<dim>::ucd_to_deal[vertex])+1 << ' ';
+      out << '\n';
+    }
+
+  // write faces and lines with non-zero boundary indicator
+  unsigned int next_element_index = tria.n_active_cells()+1;
+  if (ucd_flags.write_faces)
+    {
+      next_element_index = write_ucd_faces (tria, next_element_index, out);
+    }
+  if (ucd_flags.write_lines)
+    {
+      next_element_index = write_ucd_lines (tria, next_element_index, out);
+    }
+
+  // make sure everything now gets to
+  // disk
+  out.flush ();
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <int dim, int spacedim>
+void GridOut::write_xfig (
+  const Triangulation<dim, spacedim> &,
+  std::ostream &,
+  const Mapping<dim, spacedim> *) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+//TODO:[GK] Obey parameters
+template <>
+void GridOut::write_xfig (
+  const Triangulation<2> &tria,
+  std::ostream             &out,
+  const Mapping<2> *       /*mapping*/) const
+{
+  const int dim = 2;
+  const int spacedim = 2;
+
+  const unsigned int nv = GeometryInfo<dim>::vertices_per_cell;
+  const unsigned int nf = GeometryInfo<dim>::faces_per_cell;
+  const unsigned int nvf = GeometryInfo<dim>::vertices_per_face;
+
+  // The following text was copied
+  // from an existing XFig file.
+  out << "#FIG 3.2\nLandscape\nCenter\nInches" << std::endl
+      << "A4\n100.00\nSingle" << std::endl
+      // Background is transparent
+      << "-3" << std::endl
+      << "# generated by deal.II GridOut class" << std::endl
+      << "# reduce first number to scale up image" << std::endl
+      << "1200 2" << std::endl;
+  // Write custom palette
+  //grey
+  unsigned int colno = 32;
+  out << "0 " << colno++ << " #ff0000" << std::endl;
+  out << "0 " << colno++ << " #ff8000" << std::endl;
+  out << "0 " << colno++ << " #ffd000" << std::endl;
+  out << "0 " << colno++ << " #ffff00" << std::endl;
+  out << "0 " << colno++ << " #c0ff00" << std::endl;
+  out << "0 " << colno++ << " #80ff00" << std::endl;
+  out << "0 " << colno++ << " #00f000" << std::endl;
+  out << "0 " << colno++ << " #00f0c0" << std::endl;
+  out << "0 " << colno++ << " #00f0ff" << std::endl;
+  out << "0 " << colno++ << " #00c0ff" << std::endl;
+  out << "0 " << colno++ << " #0080ff" << std::endl;
+  out << "0 " << colno++ << " #0040ff" << std::endl;
+  out << "0 " << colno++ << " #0000c0" << std::endl;
+  out << "0 " << colno++ << " #5000ff" << std::endl;
+  out << "0 " << colno++ << " #8000ff" << std::endl;
+  out << "0 " << colno++ << " #b000ff" << std::endl;
+  out << "0 " << colno++ << " #ff00ff" << std::endl;
+  out << "0 " << colno++ << " #ff80ff" << std::endl;
+  // grey
+  for (unsigned int i=0; i<8; ++i)
+    out << "0 " << colno++ << " #" << std::hex << 32*i+31 << 32*i+31 << 32*i+31 << std::dec << std::endl;
+  // green
+  for (unsigned int i=1; i<16; ++i)
+    out << "0 " << colno++ << " #00" << std::hex << 16*i+15 << std::dec << "00" << std::endl;
+  // yellow
+  for (unsigned int i=1; i<16; ++i)
+    out << "0 " << colno++ << " #" << std::hex << 16*i+15 << 16*i+15 << std::dec << "00" << std::endl;
+  // red
+  for (unsigned int i=1; i<16; ++i)
+    out << "0 " << colno++ << " #" << std::hex << 16*i+15 << std::dec << "0000" << std::endl;
+  // purple
+  for (unsigned int i=1; i<16; ++i)
+    out << "0 " << colno++ << " #" << std::hex << 16*i+15 << "00" << 16*i+15 << std::dec << std::endl;
+  // blue
+  for (unsigned int i=1; i<16; ++i)
+    out << "0 " << colno++ << " #0000" << std::hex << 16*i+15 << std::dec << std::endl;
+  // cyan
+  for (unsigned int i=1; i<16; ++i)
+    out << "0 " << colno++ << " #00" << std::hex << 16*i+15 << 16*i+15 << std::dec << std::endl;
+
+  // We write all cells and cells on
+  // coarser levels are behind cells
+  // on finer levels. Level 0
+  // corresponds to a depth of 900,
+  // each level subtracting 1
+  Triangulation<dim, spacedim>::cell_iterator cell = tria.begin();
+  const Triangulation<dim, spacedim>::cell_iterator end = tria.end();
+
+  for (; cell != end; ++cell)
+    {
+      // If depth is not encoded, write finest level only
+      if (!xfig_flags.level_depth && !cell->active())
+        continue;
+      // Code for polygon
+      out << "2 3  "
+          << xfig_flags.line_style << ' '
+          << xfig_flags.line_thickness
+          // with black line
+          << " 0 ";
+      // Fill color
+      switch (xfig_flags.color_by)
+        {
+//TODO[GK]: Simplify after deprecation period is over
+        case GridOutFlags::XFig::material_id:
+          out << static_cast<unsigned int>(cell->material_id()) + 32;
+          break;
+        case GridOutFlags::XFig::level_number:
+          out << cell->level() + 8;
+          break;
+        case GridOutFlags::XFig::subdomain_id:
+          out << cell->subdomain_id() + 32;
+          break;
+        case GridOutFlags::XFig::level_subdomain_id:
+          out << cell->level_subdomain_id() + 32;
+          break;
+        default:
+          Assert(false, ExcInternalError());
+        }
+
+      // Depth, unused, fill
+      out << ' '
+          << (xfig_flags.level_depth
+              ? (900-cell->level())
+              : (900+cell->material_id()))
+          << " 0 "
+          << xfig_flags.fill_style << " 0.0 "
+          // some style parameters
+          << " 0 0 -1 0 0 "
+          // number of points
+          << nv+1 << std::endl;
+
+      // For each point, write scaled
+      // and shifted coordinates
+      // multiplied by 1200
+      // (dots/inch)
+      for (unsigned int k=0; k<=nv; ++k)
+        {
+          const Point<dim> &p = cell->vertex(
+                                  GeometryInfo<dim>::ucd_to_deal[k % nv]);
+          for (unsigned int d=0; d<static_cast<unsigned int>(dim); ++d)
+            {
+              int val = (int)(1200 * xfig_flags.scaling(d) *
+                              (p(d)-xfig_flags.offset(d)));
+              out << '\t' << ((d==0) ? val : -val);
+            }
+          out << std::endl;
+        }
+      // Now write boundary edges
+      static const unsigned int face_reorder[4]= {2,1,3,0};
+      if (xfig_flags.draw_boundary)
+        for (unsigned int f=0; f<nf; ++f)
+          {
+            Triangulation<dim, spacedim>::face_iterator
+            face = cell->face(face_reorder[f]);
+            const types::boundary_id bi = face->boundary_id();
+            if (bi != numbers::internal_face_boundary_id)
+              {
+                // Code for polyline
+                out << "2 1 "
+                    // with line style and thickness
+                    << xfig_flags.boundary_style << ' '
+                    << xfig_flags.boundary_thickness << ' '
+                    << (1 + (unsigned int) bi);
+                // Fill color
+                out << " -1 ";
+                // Depth 100 less than cells
+                out << (xfig_flags.level_depth
+                        ? (800-cell->level())
+                        : 800+bi)
+                    // unused, no fill
+                    << " 0 -1 0.0 "
+                    // some style parameters
+                    << " 0 0 -1 0 0 "
+                    // number of points
+                    << nvf << std::endl;
+
+                // For each point, write scaled
+                // and shifted coordinates
+                // multiplied by 1200
+                // (dots/inch)
+
+                for (unsigned int k=0; k<nvf; ++k)
+                  {
+                    const Point<dim> &p = face->vertex(k % nv);
+                    for (unsigned int d=0; d<static_cast<unsigned int>(dim); ++d)
+                      {
+                        int val = (int)(1200 * xfig_flags.scaling(d) *
+                                        (p(d)-xfig_flags.offset(d)));
+                        out << '\t' << ((d==0) ? val : -val);
+                      }
+                    out << std::endl;
+                  }
+              }
+          }
+    }
+
+  // make sure everything now gets to
+  // disk
+  out.flush ();
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <int dim, int spacedim>
+void GridOut::write_svg (const Triangulation<dim,spacedim> &,
+                         std::ostream &/*out*/) const
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+void GridOut::write_svg(const Triangulation<2,2> &tria, std::ostream &out) const
+{
+
+  unsigned int n_materials = 0;
+  unsigned int n_levels = 0;
+  unsigned int n_subdomains = 0;
+  unsigned int n_level_subdomains = 0;
+
+  unsigned int n = 0;
+
+  unsigned int min_level, max_level;
+
+  // Svg files require an underlying drawing grid. The size of this
+  // grid is provided in the parameters height and width. Each of them
+  // may be zero, such that it is computed from the other. Obviously,
+  // both of them zero does not produce reasonable output.
+  unsigned int height = svg_flags.height;
+  unsigned int width = svg_flags.width;
+  Assert (height != 0 || width != 0, ExcMessage("You have to set at least one of width and height"));
+
+  unsigned int margin_in_percent = 0;
+  if (svg_flags.margin || svg_flags.background == GridOutFlags::Svg::dealii)
+    margin_in_percent = 8;
+
+  // initial font size for cell labels
+  unsigned int cell_label_font_size;
+
+  // get date and time
+  // time_t time_stamp;
+  // tm *now;
+  // time_stamp = time(0);
+  // now = localtime(&time_stamp);
+
+  // vectors and variables for the perspective view
+  Point<3> camera_position;
+  Point<3> camera_direction;
+  Point<3> camera_horizontal;
+  float  camera_focus;
+
+  Point<3> point;
+  Point<2> projection_decomposition;
+
+  float x_max_perspective, x_min_perspective;
+  float y_max_perspective, y_min_perspective;
+
+  float x_dimension_perspective, y_dimension_perspective;
+
+
+  // auxiliary variables for the bounding box and the range of cell levels
+  double x_min = tria.begin()->vertex(0)[0];
+  double x_max = tria.begin()->vertex(0)[0];
+  double y_min = tria.begin()->vertex(0)[1];
+  double y_max = tria.begin()->vertex(0)[1];
+
+  double x_dimension, y_dimension;
+
+  min_level = max_level = tria.begin()->level();
+
+  // auxiliary array for the materials being used (material ids 255 max.)
+  unsigned int materials[256];
+  for (unsigned int material_index = 0; material_index < 256; material_index++)
+    materials[material_index] = 0;
+
+  // auxiliary array for the levels being used (level number 255 max.)
+  unsigned int levels[256];
+  for (unsigned int level_index = 0; level_index < 256; level_index++)
+    levels[level_index] = 0;
+
+  // auxiliary array for the subdomains being used (subdomain id 255 max.)
+  unsigned int subdomains[256];
+  for (unsigned int subdomain_index = 0; subdomain_index < 256; subdomain_index++)
+    subdomains[subdomain_index] = 0;
+
+  // auxiliary array for the level subdomains being used
+  int level_subdomains[256];
+  for (int level_subdomain_index = 0; level_subdomain_index < 256; level_subdomain_index++)
+    level_subdomains[level_subdomain_index] = 0;
+
+  // We use an active cell iterator to determine the
+  // bounding box of the given triangulation and check
+  // the cells for material id, level number, subdomain id
+  // (, and level subdomain id).
+  for (Triangulation<2,2>::cell_iterator cell = tria.begin(); cell != tria.end(); ++cell)
+    {
+      for (unsigned int vertex_index = 0; vertex_index < 4; vertex_index++)
+        {
+          if (cell->vertex(vertex_index)[0] < x_min) x_min = cell->vertex(vertex_index)[0];
+          if (cell->vertex(vertex_index)[0] > x_max) x_max = cell->vertex(vertex_index)[0];
+
+          if (cell->vertex(vertex_index)[1] < y_min) y_min = cell->vertex(vertex_index)[1];
+          if (cell->vertex(vertex_index)[1] > y_max) y_max = cell->vertex(vertex_index)[1];
+        }
+
+      if ((unsigned int)cell->level() < min_level) min_level = cell->level();
+      if ((unsigned int)cell->level() > max_level) max_level = cell->level();
+
+      materials[(unsigned int)cell->material_id()] = 1;
+      levels[(unsigned int)cell->level()] = 1;
+      if (cell->active())
+        subdomains[cell->subdomain_id()+2] = 1;
+      level_subdomains[cell->level_subdomain_id()+2] = 1;
+    }
+
+  x_dimension = x_max - x_min;
+  y_dimension = y_max - y_min;
+
+  // count the materials being used
+  for (unsigned int material_index = 0; material_index < 256; material_index++)
+    {
+      if (materials[material_index]) n_materials++;
+    }
+
+  // count the levels being used
+  for (unsigned int level_index = 0; level_index < 256; level_index++)
+    {
+      if (levels[level_index]) n_levels++;
+    }
+
+  // count the subdomains being used
+  for (unsigned int subdomain_index = 0; subdomain_index < 256; subdomain_index++)
+    {
+      if (subdomains[subdomain_index]) n_subdomains++;
+    }
+
+  // count the level subdomains being used
+  for (int level_subdomain_index = 0; level_subdomain_index < 256; level_subdomain_index++)
+    {
+      if (level_subdomains[level_subdomain_index]) n_level_subdomains++;
+    }
+
+  switch (svg_flags.coloring)
+    {
+    case GridOutFlags::Svg::material_id:
+      n = n_materials;
+      break;
+    case GridOutFlags::Svg::level_number:
+      n = n_levels;
+      break;
+    case GridOutFlags::Svg::subdomain_id:
+      n = n_subdomains;
+      break;
+    case GridOutFlags::Svg::level_subdomain_id:
+      n = n_level_subdomains;
+      break;
+    default:
+      break;
+    }
+
+  // set the camera position to top view, targeting at the origin
+  camera_position[0] = 0;
+  camera_position[1] = 0;
+  camera_position[2] = 2. * std::max(x_dimension, y_dimension);
+
+  camera_direction[0] = 0;
+  camera_direction[1] = 0;
+  camera_direction[2] = -1;
+
+  camera_horizontal[0] = 1;
+  camera_horizontal[1] = 0;
+  camera_horizontal[2] = 0;
+
+  camera_focus = .5 * std::max(x_dimension, y_dimension);
+
+  Point<3> camera_position_temp;
+  Point<3> camera_direction_temp;
+  Point<3> camera_horizontal_temp;
+
+  const double angle_factor = 3.14159265 / 180.;
+
+  // (I) rotate the camera to the chosen polar angle
+  camera_position_temp[1] = cos(angle_factor * svg_flags.polar_angle) * camera_position[1] - sin(angle_factor * svg_flags.polar_angle) * camera_position[2];
+  camera_position_temp[2] = sin(angle_factor * svg_flags.polar_angle) * camera_position[1] + cos(angle_factor * svg_flags.polar_angle) * camera_position[2];
+
+  camera_direction_temp[1] = cos(angle_factor * svg_flags.polar_angle) * camera_direction[1] - sin(angle_factor * svg_flags.polar_angle) * camera_direction[2];
+  camera_direction_temp[2] = sin(angle_factor * svg_flags.polar_angle) * camera_direction[1] + cos(angle_factor * svg_flags.polar_angle) * camera_direction[2];
+
+  camera_horizontal_temp[1] = cos(angle_factor * svg_flags.polar_angle) * camera_horizontal[1] - sin(angle_factor * svg_flags.polar_angle) * camera_horizontal[2];
+  camera_horizontal_temp[2] = sin(angle_factor * svg_flags.polar_angle) * camera_horizontal[1] + cos(angle_factor * svg_flags.polar_angle) * camera_horizontal[2];
+
+  camera_position[1] = camera_position_temp[1];
+  camera_position[2] = camera_position_temp[2];
+
+  camera_direction[1] = camera_direction_temp[1];
+  camera_direction[2] = camera_direction_temp[2];
+
+  camera_horizontal[1] = camera_horizontal_temp[1];
+  camera_horizontal[2] = camera_horizontal_temp[2];
+
+  // (II) rotate the camera to the chosen azimuth angle
+  camera_position_temp[0] = cos(angle_factor * svg_flags.azimuth_angle) * camera_position[0] - sin(angle_factor * svg_flags.azimuth_angle) * camera_position[1];
+  camera_position_temp[1] = sin(angle_factor * svg_flags.azimuth_angle) * camera_position[0] + cos(angle_factor * svg_flags.azimuth_angle) * camera_position[1];
+
+  camera_direction_temp[0] = cos(angle_factor * svg_flags.azimuth_angle) * camera_direction[0] - sin(angle_factor * svg_flags.azimuth_angle) * camera_direction[1];
+  camera_direction_temp[1] = sin(angle_factor * svg_flags.azimuth_angle) * camera_direction[0] + cos(angle_factor * svg_flags.azimuth_angle) * camera_direction[1];
+
+  camera_horizontal_temp[0] = cos(angle_factor * svg_flags.azimuth_angle) * camera_horizontal[0] - sin(angle_factor * svg_flags.azimuth_angle) * camera_horizontal[1];
+  camera_horizontal_temp[1] = sin(angle_factor * svg_flags.azimuth_angle) * camera_horizontal[0] + cos(angle_factor * svg_flags.azimuth_angle) * camera_horizontal[1];
+
+  camera_position[0] = camera_position_temp[0];
+  camera_position[1] = camera_position_temp[1];
+
+  camera_direction[0] = camera_direction_temp[0];
+  camera_direction[1] = camera_direction_temp[1];
+
+  camera_horizontal[0] = camera_horizontal_temp[0];
+  camera_horizontal[1] = camera_horizontal_temp[1];
+
+  // translate the camera to the given triangulation
+  camera_position[0] = x_min + .5 * x_dimension;
+  camera_position[1] = y_min + .5 * y_dimension;
+
+  camera_position[0] += 2. * std::max(x_dimension, y_dimension) * sin(angle_factor * svg_flags.polar_angle) * sin(angle_factor * svg_flags.azimuth_angle);
+  camera_position[1] -= 2. * std::max(x_dimension, y_dimension) * sin(angle_factor * svg_flags.polar_angle) * cos(angle_factor * svg_flags.azimuth_angle);
+
+
+  // determine the bounding box of the given triangulation on the projection plane of the camera viewing system
+  point[0] = tria.begin()->vertex(0)[0];
+  point[1] = tria.begin()->vertex(0)[1];
+  point[2] = 0;
+
+  float min_level_min_vertex_distance = 0;
+
+  if (svg_flags.convert_level_number_to_height)
+    {
+      point[2] = svg_flags.level_height_factor * ((float)tria.begin()->level() / (float)n_levels) * std::max(x_dimension, y_dimension);
+    }
+
+  projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+  x_max_perspective = projection_decomposition[0];
+  x_min_perspective = projection_decomposition[0];
+
+  y_max_perspective = projection_decomposition[1];
+  y_min_perspective = projection_decomposition[1];
+
+  for (Triangulation<2,2>::cell_iterator cell = tria.begin(); cell != tria.end(); ++cell)
+    {
+      point[0] = cell->vertex(0)[0];
+      point[1] = cell->vertex(0)[1];
+      point[2] = 0;
+
+      if (svg_flags.convert_level_number_to_height)
+        {
+          point[2] = svg_flags.level_height_factor * ((float)cell->level() / (float)n_levels) * std::max(x_dimension, y_dimension);
+        }
+
+      projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+      if (x_max_perspective < projection_decomposition[0]) x_max_perspective = projection_decomposition[0];
+      if (x_min_perspective > projection_decomposition[0]) x_min_perspective = projection_decomposition[0];
+
+      if (y_max_perspective < projection_decomposition[1]) y_max_perspective = projection_decomposition[1];
+      if (y_min_perspective > projection_decomposition[1]) y_min_perspective = projection_decomposition[1];
+
+      point[0] = cell->vertex(1)[0];
+      point[1] = cell->vertex(1)[1];
+
+      projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+      if (x_max_perspective < projection_decomposition[0]) x_max_perspective = projection_decomposition[0];
+      if (x_min_perspective > projection_decomposition[0]) x_min_perspective = projection_decomposition[0];
+
+      if (y_max_perspective < projection_decomposition[1]) y_max_perspective = projection_decomposition[1];
+      if (y_min_perspective > projection_decomposition[1]) y_min_perspective = projection_decomposition[1];
+
+      point[0] = cell->vertex(2)[0];
+      point[1] = cell->vertex(2)[1];
+
+      projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+      if (x_max_perspective < projection_decomposition[0]) x_max_perspective = projection_decomposition[0];
+      if (x_min_perspective > projection_decomposition[0]) x_min_perspective = projection_decomposition[0];
+
+      if (y_max_perspective < projection_decomposition[1]) y_max_perspective = projection_decomposition[1];
+      if (y_min_perspective > projection_decomposition[1]) y_min_perspective = projection_decomposition[1];
+
+      point[0] = cell->vertex(3)[0];
+      point[1] = cell->vertex(3)[1];
+
+      projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+      if (x_max_perspective < projection_decomposition[0]) x_max_perspective = projection_decomposition[0];
+      if (x_min_perspective > projection_decomposition[0]) x_min_perspective = projection_decomposition[0];
+
+      if (y_max_perspective < projection_decomposition[1]) y_max_perspective = projection_decomposition[1];
+      if (y_min_perspective > projection_decomposition[1]) y_min_perspective = projection_decomposition[1];
+
+      if ((unsigned int)cell->level() == min_level) min_level_min_vertex_distance = cell->minimum_vertex_distance();
+    }
+
+  x_dimension_perspective = x_max_perspective - x_min_perspective;
+  y_dimension_perspective = y_max_perspective - y_min_perspective;
+
+// create the svg file with an internal style sheet
+  if (width == 0)
+    width = static_cast<unsigned int>(.5 + height * (x_dimension_perspective / y_dimension_perspective));
+  else if (height == 0)
+    height = static_cast<unsigned int>(.5 + width * (y_dimension_perspective / x_dimension_perspective));
+  unsigned int additional_width = 0;
+  // font size for date, time, legend, and colorbar
+  unsigned int font_size = static_cast<unsigned int>(.5 + (height/100.) * 1.75);
+  cell_label_font_size = static_cast<unsigned int>(.5 +
+                                                   (height * .15
+                                                    * svg_flags.cell_font_scaling
+                                                    * min_level_min_vertex_distance
+                                                    / std::min(x_dimension, y_dimension)));
+
+  if (svg_flags.draw_legend && (svg_flags.label_level_number || svg_flags.label_cell_index || svg_flags.label_material_id || svg_flags.label_subdomain_id || svg_flags.label_level_subdomain_id ))
+    {
+      additional_width = static_cast<unsigned int>(.5 + height * .4); // additional width for legend
+    }
+  else if (svg_flags.draw_colorbar && svg_flags.coloring)
+    {
+      additional_width = static_cast<unsigned int>(.5 + height * .175); // additional width for colorbar
+    }
+
+  //out << "<!-- deal.ii GridOut " << now->tm_mday << '/' << now->tm_mon + 1 << '/' << now->tm_year + 1900
+  //    << ' ' << now->tm_hour << ':';
+  //
+  //if (now->tm_min < 10) out << '0';
+  //
+  //out << now->tm_min << " -->" << '\n';
+
+  // basic svg header
+  out << "<svg width=\"" << width + additional_width << "\" height=\"" << height << "\" xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\">"
+      << '\n' << '\n';
+
+
+  if (svg_flags.background == GridOutFlags::Svg::dealii)
+    {
+      out << " <linearGradient id=\"background_gradient\" gradientUnits=\"userSpaceOnUse\" x1=\"0\" y1=\"0\" x2=\"0\" y2=\"" << height << "\">" << '\n'
+          << "  <stop offset=\"0\" style=\"stop-color:white\"/>" << '\n'
+          << "  <stop offset=\"1\" style=\"stop-color:lightsteelblue\"/>" << '\n'
+          << " </linearGradient>" << '\n';
+    }
+
+  out << '\n';
+
+  // header for the internal style sheet
+  out << "<!-- internal style sheet -->" << '\n'
+      << "<style type=\"text/css\"><![CDATA[" << '\n';
+
+  // set the background of the output graphic
+  if (svg_flags.background == GridOutFlags::Svg::dealii) out << " rect.background{fill:url(#background_gradient)}" << '\n';
+  else if (svg_flags.background == GridOutFlags::Svg::white) out << " rect.background{fill:white}" << '\n';
+  else out << " rect.background{fill:none}" << '\n';
+
+  // basic svg graphic element styles
+  out << " rect{fill:none; stroke:rgb(25,25,25); stroke-width:" << svg_flags.line_thickness << '}' << '\n'
+      << " text{font-family:Helvetica; text-anchor:middle; fill:rgb(25,25,25)}" << '\n'
+      << " line{stroke:rgb(25,25,25); stroke-width:" << svg_flags.boundary_line_thickness << '}' << '\n'
+      << " path{fill:none; stroke:rgb(25,25,25); stroke-width:" << svg_flags.line_thickness << '}' << '\n'
+      << '\n';
+
+  // polygon styles with respect to the chosen cell coloring
+  if (svg_flags.coloring)
+    {
+      unsigned int labeling_index = 0;
+
+      for (unsigned int index = 0; index < n; index++)
+        {
+          double h;
+
+          if (n != 1)  h = .6 - (index / (n-1.)) * .6;
+          else h = .6;
+
+          unsigned int  r = 0;
+          unsigned int  g = 0;
+          unsigned int  b = 0;
+
+          unsigned int  i = static_cast<unsigned int>(h * 6);
+
+          double f = h * 6 - i;
+          double q = 1 - f;
+          double t = f;
+
+          switch (i % 6)
+            {
+            case 0:
+              r = 255, g = static_cast<unsigned int>(.5 + 255*t);
+              break;
+            case 1:
+              r = static_cast<unsigned int>(.5 + 255*q), g = 255;
+              break;
+            case 2:
+              g = 255, b = static_cast<unsigned int>(.5 + 255*t);
+              break;
+            case 3:
+              g = static_cast<unsigned int>(.5 + 255*q), b = 255;
+              break;
+            case 4:
+              r = static_cast<unsigned int>(.5 + 255*t), b = 255;
+              break;
+            case 5:
+              r = 255, b = static_cast<unsigned int>(.5 + 255*q);
+              break;
+            default:
+              break;
+            }
+
+          switch (svg_flags.coloring)
+            {
+            case GridOutFlags::Svg::material_id:
+              while (!materials[labeling_index]) labeling_index++;
+              break;
+            case GridOutFlags::Svg::level_number:
+              while (!levels[labeling_index]) labeling_index++;
+              break;
+            case GridOutFlags::Svg::subdomain_id:
+              while (!subdomains[labeling_index]) labeling_index++;
+              break;
+            case GridOutFlags::Svg::level_subdomain_id:
+              while (!level_subdomains[labeling_index]) labeling_index++;
+              break;
+            default:
+              break;
+            }
+
+          out << " path.p" << labeling_index
+              << "{fill:rgb(" << r << ',' << g << ',' << b << "); "
+              << "stroke:rgb(25,25,25); stroke-width:" << svg_flags.line_thickness << '}' << '\n';
+
+          out << " path.ps" << labeling_index
+              << "{fill:rgb(" << static_cast<unsigned int>(.5 + .75 * r) << ',' << static_cast<unsigned int>(.5 + .75 * g) << ',' << static_cast<unsigned int>(.5 + .75 * b) << "); "
+              << "stroke:rgb(20,20,20); stroke-width:" << svg_flags.line_thickness << '}' << '\n';
+
+          out << " rect.r" << labeling_index
+              << "{fill:rgb(" << r << ',' << g << ',' << b << "); "
+              << "stroke:rgb(25,25,25); stroke-width:" << svg_flags.line_thickness << '}' << '\n';
+
+          labeling_index++;
+        }
+    }
+
+  out << "]]></style>" << '\n' << '\n';
+
+  // background rectangle
+  out << " <rect class=\"background\" width=\"" << width << "\" height=\"" << height << "\"/>" << '\n';
+
+  if (svg_flags.background == GridOutFlags::Svg::dealii)
+    {
+      unsigned int x_offset = 0;
+
+      if (svg_flags.margin) x_offset = static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent/2.));
+      else x_offset = static_cast<unsigned int>(.5 + height * .025);
+
+      out << " <text x=\"" << x_offset << "\" y=\"" << static_cast<unsigned int>(.5 + height * .0525) << '\"'
+          << " style=\"font-weight:100; fill:lightsteelblue; text-anchor:start; font-family:Courier; font-size:" << static_cast<unsigned int>(.5 + height * .045) << "px\">"
+          << "deal.II" << "</text>" << '\n';
+
+      // out << " <text x=\"" << x_offset + static_cast<unsigned int>(.5 + height * .045 * 4.75) << "\" y=\"" << static_cast<unsigned int>(.5 + height * .0525) << '\"'
+      //     << " style=\"fill:lightsteelblue; text-anchor:start; font-size:" << font_size << "\">"
+      //     << now->tm_mday << '/' << now->tm_mon + 1 << '/' << now->tm_year + 1900
+      //     << " - " << now->tm_hour << ':';
+      //
+      // if(now->tm_min < 10) out << '0';
+      //
+      // out << now->tm_min
+      //     << "</text>"<< '\n' << '\n';
+    }
+
+// draw the cells, starting out from the minimal level (in order to guaranty a correct perspective view)
+  out << "  <!-- cells -->" << '\n';
+
+  for (unsigned int level_index = min_level; level_index <= max_level; level_index++)
+    {
+      Triangulation<2,2>::cell_iterator
+      cell = tria.begin(level_index),
+      endc = tria.end(level_index);
+
+      for (; cell != endc; ++cell)
+        {
+          if (!svg_flags.convert_level_number_to_height && !cell->active()) continue;
+
+          // draw the current cell
+          out << "  <path";
+
+          if (svg_flags.coloring)
+            {
+              out << " class=\"p";
+
+              if (!cell->active() && svg_flags.convert_level_number_to_height) out << 's';
+
+              switch (svg_flags.coloring)
+                {
+                case GridOutFlags::Svg::material_id:
+                  out << (unsigned int)cell->material_id();
+                  break;
+                case GridOutFlags::Svg::level_number:
+                  out << (unsigned int)cell->level();
+                  break;
+                case GridOutFlags::Svg::subdomain_id:
+                  if (cell->active())
+                    out << cell->subdomain_id() + 2;
+                  else
+                    out << 'X';
+                  break;
+                case GridOutFlags::Svg::level_subdomain_id:
+                  out << cell->level_subdomain_id() + 2;
+                  break;
+                default:
+                  break;
+                }
+
+              out << '\"';
+            }
+
+          out << " d=\"M ";
+
+          point[0] = cell->vertex(0)[0];
+          point[1] = cell->vertex(0)[1];
+          point[2] = 0;
+
+          if (svg_flags.convert_level_number_to_height)
+            {
+              point[2] = svg_flags.level_height_factor * ((float)cell->level() / (float)n_levels) * std::max(x_dimension, y_dimension);
+            }
+
+          projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+          out << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent)) << ' '
+              << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent));
+
+          out << " L ";
+
+          point[0] = cell->vertex(1)[0];
+          point[1] = cell->vertex(1)[1];
+
+          projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+          out << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent)) << ' '
+              << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent));
+
+          out << " L ";
+
+          point[0] = cell->vertex(3)[0];
+          point[1] = cell->vertex(3)[1];
+
+          projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+          out << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent)) << ' '
+              << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent));
+
+          out << " L ";
+
+          point[0] = cell->vertex(2)[0];
+          point[1] = cell->vertex(2)[1];
+
+          projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+          out << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent)) << ' '
+              << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent));
+
+          out << " L ";
+
+          point[0] = cell->vertex(0)[0];
+          point[1] = cell->vertex(0)[1];
+
+          projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+          out << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent)) << ' '
+              << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent));
+
+          out << "\"/>" << '\n';
+
+          // label the current cell
+          if (svg_flags.label_level_number || svg_flags.label_cell_index || svg_flags.label_material_id || svg_flags.label_subdomain_id || svg_flags.label_level_subdomain_id)
+            {
+              point[0] = cell->center()[0];
+              point[1] = cell->center()[1];
+              point[2] = 0;
+
+              if (svg_flags.convert_level_number_to_height)
+                {
+                  point[2] = svg_flags.level_height_factor * ((float)cell->level() / (float)n_levels) * std::max(x_dimension, y_dimension);
+                }
+
+              float distance_to_camera = sqrt(pow(point[0] - camera_position[0], 2.) + pow(point[1] - camera_position[1], 2.) + pow(point[2] - camera_position[2], 2.));
+              float distance_factor = distance_to_camera / (2. * std::max(x_dimension, y_dimension));
+
+              projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+              const unsigned int font_size_this_cell = static_cast<unsigned int>(.5 + cell_label_font_size * pow(.5, (float)cell->level() - 4. + 3.5 * distance_factor));
+
+              out << "  <text"
+                  << " x=\"" << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                  << "\" y=\"" << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent) + 0.5 * font_size_this_cell)
+                  << "\" style=\"font-size:" << font_size_this_cell
+                  << "px\">";
+
+              if (svg_flags.label_level_number)
+                {
+                  out << cell->level();
+                }
+
+              if (svg_flags.label_cell_index)
+                {
+                  if (svg_flags.label_level_number) out << ',';
+                  out << cell->index();
+                }
+
+              if (svg_flags.label_material_id)
+                {
+                  if (svg_flags.label_level_number || svg_flags.label_cell_index) out << ',';
+                  out << (int)cell->material_id();
+                }
+
+              if (svg_flags.label_subdomain_id)
+                {
+                  if (svg_flags.label_level_number
+                      || svg_flags.label_cell_index
+                      || svg_flags.label_material_id)
+                    out << ',';
+                  if (cell->active())
+                    out << static_cast<int>(cell->subdomain_id());
+                  else
+                    out << 'X';
+                }
+
+              if (svg_flags.label_level_subdomain_id)
+                {
+                  if (svg_flags.label_level_number
+                      || svg_flags.label_cell_index
+                      || svg_flags.label_material_id
+                      || svg_flags.label_subdomain_id)
+                    out << ',';
+                  out << static_cast<int>(cell->level_subdomain_id());
+                }
+
+              out << "</text>" << '\n';
+            }
+
+          // if the current cell lies at the boundary of the triangulation, draw the additional boundary line
+          if (svg_flags.boundary_line_thickness)
+            {
+              for (unsigned int faceIndex = 0; faceIndex < 4; faceIndex++)
+                {
+                  if (cell->at_boundary(faceIndex))
+                    {
+
+                      point[0] = cell->face(faceIndex)->vertex(0)[0];
+                      point[1] = cell->face(faceIndex)->vertex(0)[1];
+                      point[2] = 0;
+
+                      if (svg_flags.convert_level_number_to_height)
+                        {
+                          point[2] = svg_flags.level_height_factor * ((float)cell->level() / (float)n_levels) * std::max(x_dimension, y_dimension);
+                        }
+
+                      projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+                      out << "  <line x1=\""
+                          << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                          << "\" y1=\""
+                          << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent));
+
+                      point[0] = cell->face(faceIndex)->vertex(1)[0];
+                      point[1] = cell->face(faceIndex)->vertex(1)[1];
+                      point[2] = 0;
+
+                      if (svg_flags.convert_level_number_to_height)
+                        {
+                          point[2] = svg_flags.level_height_factor * ((float)cell->level() / (float)n_levels) * std::max(x_dimension, y_dimension);
+                        }
+
+                      projection_decomposition = GridOut::svg_project_point(point, camera_position, camera_direction, camera_horizontal, camera_focus);
+
+                      out << "\" x2=\""
+                          << static_cast<unsigned int>(.5 + ((projection_decomposition[0] - x_min_perspective) / x_dimension_perspective) * (width - (width/100.) * 2. * margin_in_percent) + ((width/100.) * margin_in_percent))
+                          << "\" y2=\""
+                          << static_cast<unsigned int>(.5 + height - (height/100.) * margin_in_percent - ((projection_decomposition[1] - y_min_perspective) / y_dimension_perspective) * (height - (height/100.) * 2. * margin_in_percent))
+                          << "\"/>" << '\n';
+                    }
+                }
+            }
+        }
+    }
+
+
+// draw the legend
+  if (svg_flags.draw_legend) out << '\n' << " <!-- legend -->" << '\n';
+
+  unsigned int line_offset = 0;
+
+  additional_width = 0;
+  if (!svg_flags.margin) additional_width = static_cast<unsigned int>(.5 + (height/100.) * 2.5);
+
+  // explanation of the cell labeling
+  if (svg_flags.draw_legend && (svg_flags.label_level_number || svg_flags.label_cell_index || svg_flags.label_material_id || svg_flags.label_subdomain_id || svg_flags.label_level_subdomain_id ))
+    {
+      out << " <rect x=\"" << width + additional_width << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent)
+          << "\" width=\"" << static_cast<unsigned int>(.5 + (height/100.) * (40. - margin_in_percent)) << "\" height=\"" << static_cast<unsigned int>(.5 + height * .165) << "\"/>" << '\n';
+
+      out << " <text x=\"" << width + additional_width + static_cast<unsigned int>(.5 + (height/100.) * 1.25)
+          << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent + (++line_offset) * 1.5 * font_size)
+          << "\" style=\"text-anchor:start; font-weight:bold; font-size:" << font_size
+          << "px\">" << "cell label"
+          << "</text>" << '\n';
+
+      if (svg_flags.label_level_number)
+        {
+          out << "  <text x=\"" << width + additional_width + static_cast<unsigned int>(.5 + (height/100.) * 2.)
+              << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent + (++line_offset) * 1.5 * font_size)
+              << "\" style=\"text-anchor:start; font-style:oblique; font-size:" << font_size
+              << "px\">" << "level_number";
+
+          if (svg_flags.label_cell_index || svg_flags.label_material_id || svg_flags.label_subdomain_id || svg_flags.label_level_subdomain_id)
+            out << ',';
+
+          out << "</text>" << '\n';
+        }
+
+      if (svg_flags.label_cell_index)
+        {
+          out << "  <text x=\"" << width + additional_width + static_cast<unsigned int>(.5 + (height/100.) * 2.)
+              << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent + (++line_offset) * 1.5 * font_size )
+              << "\" style=\"text-anchor:start; font-style:oblique; font-size:" << font_size
+              << "px\">"
+              << "cell_index";
+
+          if (svg_flags.label_material_id || svg_flags.label_subdomain_id || svg_flags.label_level_subdomain_id)
+            out << ',';
+
+          out << "</text>" << '\n';
+        }
+
+      if (svg_flags.label_material_id)
+        {
+          out << "  <text x=\"" << width + additional_width + static_cast<unsigned int>(.5 + (height/100.) * 2.)
+              << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent + (++line_offset) * 1.5 * font_size )
+              << "\" style=\"text-anchor:start; font-style:oblique; font-size:" << font_size
+              << "px\">"
+              << "material_id";
+
+          if (svg_flags.label_subdomain_id || svg_flags.label_level_subdomain_id)
+            out << ',';
+
+          out << "</text>" << '\n';
+        }
+
+      if (svg_flags.label_subdomain_id)
+        {
+          out << "  <text x= \"" << width + additional_width + static_cast<unsigned int>(.5 + (height/100.) * 2.)
+              << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent + (++line_offset) * 1.5 * font_size )
+              << "\" style=\"text-anchor:start; font-style:oblique; font-size:" << font_size
+              << "px\">"
+              << "subdomain_id";
+
+          if (svg_flags.label_level_subdomain_id)
+            out << ',';
+
+          out << "</text>" << '\n';
+        }
+
+      if (svg_flags.label_level_subdomain_id)
+        {
+          out << "  <text x= \"" << width + additional_width + static_cast<unsigned int>(.5 + (height/100.) * 2.)
+              << "\" y=\""       << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent + (++line_offset) * 1.5 * font_size )
+              << "\" style=\"text-anchor:start; font-style:oblique; font-size:" << font_size
+              << "px\">"
+              << "level_subdomain_id"
+              << "</text>" << '\n';
+        }
+    }
+
+  // show azimuth angle and polar angle as text below the explanation of the cell labeling
+  if (svg_flags.draw_legend)
+    {
+      out << "  <text x=\"" << width + additional_width
+          << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * margin_in_percent + 10.75 * font_size)
+          << "\" style=\"text-anchor:start; font-size:" << font_size << "px\">"
+          << "azimuth: " << svg_flags.azimuth_angle << "°, polar: " << svg_flags.polar_angle << "°</text>" << '\n';
+    }
+
+
+// draw the colorbar
+  if (svg_flags.draw_colorbar && svg_flags.coloring)
+    {
+      out << '\n' << " <!-- colorbar -->" << '\n';
+
+      out << " <text x=\"" << width + additional_width
+          << "\" y=\""     << static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent + 29.) - (font_size/1.25))
+          << "\" style=\"text-anchor:start; font-weight:bold; font-size:" << font_size << "px\">";
+
+      switch (svg_flags.coloring)
+        {
+        case 1:
+          out << "material_id";
+          break;
+        case 2:
+          out << "level_number";
+          break;
+        case 3:
+          out << "subdomain_id";
+          break;
+        case 4:
+          out << "level_subdomain_id";
+          break;
+        default:
+          break;
+        }
+
+      out << "</text>" << '\n';
+
+      unsigned int element_height = static_cast<unsigned int>(((height/100.) * (71. - 2.*margin_in_percent)) / n);
+      unsigned int element_width = static_cast<unsigned int>(.5 + (height/100.) * 2.5);
+
+      int labeling_index = 0;
+
+      for (unsigned int index = 0; index < n; index++)
+        {
+          switch (svg_flags.coloring)
+            {
+            case GridOutFlags::Svg::material_id:
+              while (!materials[labeling_index]) labeling_index++;
+              break;
+            case GridOutFlags::Svg::level_number:
+              while (!levels[labeling_index]) labeling_index++;
+              break;
+            case GridOutFlags::Svg::subdomain_id:
+              while (!subdomains[labeling_index]) labeling_index++;
+              break;
+            case GridOutFlags::Svg::level_subdomain_id:
+              while (!level_subdomains[labeling_index]) labeling_index++;
+              break;
+            default:
+              break;
+            }
+
+          out << "  <rect class=\"r" << labeling_index
+              << "\" x=\"" << width + additional_width
+              << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent + 29)) + (n-index-1) * element_height
+              << "\" width=\"" << element_width
+              << "\" height=\"" << element_height
+              << "\"/>" << '\n';
+
+          out << "  <text x=\"" << width + additional_width + 1.5 * element_width
+              << "\" y=\"" << static_cast<unsigned int>(.5 + (height/100.) * (margin_in_percent + 29)) + (n-index-1 + .5) * element_height + static_cast<unsigned int>(.5 + font_size * .35) << "\""
+              << " style=\"text-anchor:start; font-size:" << static_cast<unsigned int>(.5 + font_size) << "px";
+
+          if (index == 0 || index == n-1) out << "; font-weight:bold";
+
+          out << "\">" << labeling_index;
+
+          if (index == n-1) out << " max";
+          if (index == 0) out << " min";
+
+          out << "</text>" << '\n';
+
+          labeling_index++;
+        }
+    }
+
+
+// finalize the svg file
+  out << '\n' << "</svg>";
+  out.flush();
+
+}
+
+
+template <>
+void GridOut::write_mathgl (const Triangulation<1> &,
+                            std::ostream &) const
+{
+  // 1d specialization not done yet
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <int dim, int spacedim>
+void GridOut::write_mathgl (const Triangulation<dim, spacedim> &tria,
+                            std::ostream             &out) const
+{
+  AssertThrow (out, ExcIO ());
+
+  // (i) write header
+  if (true)
+    {
+      // block this to have local variables destroyed after use
+      const std::time_t  time1 = std::time (0);
+      const std::tm     *time  = std::localtime (&time1);
+
+      out << "\n#"
+          << "\n# This file was generated by the deal.II library."
+          << "\n#   Date =  "
+          << time->tm_year+1900 << "/"
+          << std::setfill('0') << std::setw (2) << time->tm_mon+1 << "/"
+          << std::setfill('0') << std::setw (2) << time->tm_mday
+          << "\n#   Time =  "
+          << std::setfill('0') << std::setw (2) << time->tm_hour << ":"
+          << std::setfill('0') << std::setw (2) << time->tm_min  << ":"
+          << std::setfill('0') << std::setw (2) << time->tm_sec
+          << "\n#"
+          << "\n# For a description of the MathGL script format see the MathGL manual.  "
+          << "\n#"
+          << "\n# Note: This file is understood by MathGL v2.1 and higher only, and can "
+          << "\n#       be quickly viewed in a graphical environment using \'mglview\'. "
+          << "\n#" << "\n"
+          ;
+    }
+
+  // define a helper to keep loops approximately dim-independent
+  // since MathGL labels axes as x, y, z
+  const std::string axes = "xyz";
+
+  // (ii) write preamble and graphing tweaks
+  out << "\n#"
+      << "\n#   Preamble."
+      << "\n#" << "\n";
+
+  if (mathgl_flags.draw_bounding_box)
+    out << "\nbox";
+
+  // deal with dimension dependent preamble; eg. default sizes and
+  // views for MathGL (cf. gnuplot).
+  switch (dim)
+    {
+    case 2:
+      out << "\nsetsize 800 800";
+      out << "\nrotate 0 0";
+      break;
+    case 3:
+      out << "\nsetsize 800 800";
+      out << "\nrotate 60 40";
+      break;
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+  out << "\n";
+
+  // (iii) write vertex ordering
+  out << "\n#"
+      << "\n#   Vertex ordering."
+      << "\n#   list <vertex order> <vertex indices>"
+      << "\n#" << "\n";
+
+  // todo: This denotes the natural ordering of vertices, but it needs
+  // to check this is really always true for a given grid (it's not
+  // true in step-1 grid-2 for instance).
+  switch (dim)
+    {
+    case 2:
+      out << "\nlist f 0 1 2 3"
+          << "\n";
+      break;
+    case 3:
+      out << "\nlist f 0 2 4 6 | 1 3 5 7 | 0 4 1 5 | 2 6 3 7 | 0 1 2 3 | 4 5 6 7"
+          << "\n";
+      break;
+    default:
+      Assert (false, ExcNotImplemented ());
+    }
+
+  // (iv) write a list of vertices of cells
+  out << "\n#"
+      << "\n#   List of vertices."
+      << "\n#   list <id> <vertices>"
+      << "\n#" << "\n";
+
+  // run over all active cells and write out a list of
+  // xyz-coordinates that correspond to vertices
+  typename dealii::Triangulation<dim, spacedim>::active_cell_iterator
+  cell=tria.begin_active (),
+  endc=tria.end ();
+
+  // No global indices in deal.II, so we make one up here.
+  for (; cell!=endc; ++cell)
+    {
+      for (unsigned int i=0; i<dim; ++i)
+        {
+          // if (cell->direction_flag ()==true)
+          //   out << "\ntrue";
+          // else
+          //   out << "\nfalse";
+
+          out << "\nlist " << axes[i] << cell->active_cell_index() << " ";
+          for (unsigned int j=0; j<GeometryInfo<dim>::vertices_per_cell; ++j)
+            out << cell->vertex(j)[i] << " ";
+        }
+      out << '\n';
+    }
+
+  // (v) write out cells to plot as quadplot objects
+  out << "\n#"
+      << "\n#   List of cells to quadplot."
+      << "\n#   quadplot <vertex order> <id> <style>"
+      << "\n#" << "\n";
+  for (unsigned int i=0; i<tria.n_active_cells (); ++i)
+    {
+      out << "\nquadplot f ";
+      for (unsigned int j=0; j<dim; ++j)
+        out << axes[j] << i << " ";
+      out << "\'k#\'";
+    }
+  out << "\n";
+
+  // (vi) write footer
+  out << "\n#"
+      << "\n#"
+      << "\n#" << "\n";
+
+  // make sure everything now gets to the output stream
+  out.flush ();
+  AssertThrow (out, ExcIO ());
+}
+
+
+
+namespace
+{
+  /**
+   * A function that is able to convert each cell of a triangulation into
+   * a patch that can then be output by the functions in DataOutBase.
+   * This is made particularly simple because the patch only needs to
+   * contain geometry info and additional properties of cells
+   */
+  template <int dim, int spacedim, typename ITERATOR, typename END>
+  void
+  generate_triangulation_patches (std::vector<DataOutBase::Patch<dim,spacedim> > &patches,
+                                  ITERATOR cell, END end)
+  {
+    // convert each of the active cells into a patch
+    for (; cell != end; ++cell)
+      {
+        DataOutBase::Patch<dim,spacedim> patch;
+        patch.n_subdivisions = 1;
+        patch.data.reinit (5,GeometryInfo<dim>::vertices_per_cell);
+
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+          {
+            patch.vertices[v] = cell->vertex(v);
+            patch.data(0,v) = cell->level();
+            patch.data(1,v) = static_cast<int>(cell->manifold_id());
+            patch.data(2,v) = cell->material_id();
+            if (!cell->has_children())
+              patch.data(3,v) = static_cast<int>(cell->subdomain_id());
+            else
+              patch.data(3,v) = -1;
+            patch.data(4,v) = static_cast<int>(cell->level_subdomain_id());
+          }
+        patches.push_back (patch);
+      }
+  }
+
+  std::vector<std::string> triangulation_patch_data_names ()
+  {
+    std::vector<std::string> v(5);
+    v[0] = "level";
+    v[1] = "manifold";
+    v[2] = "material";
+    v[3] = "subdomain";
+    v[4] = "level_subdomain";
+    return v;
+  }
+}
+
+
+
+template <int dim, int spacedim>
+void GridOut::write_vtk (const Triangulation<dim,spacedim> &tria,
+                         std::ostream             &out) const
+{
+  AssertThrow (out, ExcIO ());
+
+  // convert the cells of the triangulation into a set of patches
+  // and then have them output. since there is no data attached to
+  // the geometry, we also do not have to provide any names, identifying
+  // information, etc.
+  std::vector<DataOutBase::Patch<dim,spacedim> > patches;
+  patches.reserve (tria.n_active_cells());
+  generate_triangulation_patches(patches, tria.begin_active(), tria.end());
+  DataOutBase::write_vtk (patches,
+                          triangulation_patch_data_names(),
+                          std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >(),
+                          vtk_flags,
+                          out);
+
+  AssertThrow (out, ExcIO ());
+}
+
+
+
+template <int dim, int spacedim>
+void GridOut::write_vtu (const Triangulation<dim,spacedim> &tria,
+                         std::ostream             &out) const
+{
+  AssertThrow (out, ExcIO ());
+
+  // convert the cells of the triangulation into a set of patches
+  // and then have them output. since there is no data attached to
+  // the geometry, we also do not have to provide any names, identifying
+  // information, etc.
+  std::vector<DataOutBase::Patch<dim,spacedim> > patches;
+  patches.reserve (tria.n_active_cells());
+  generate_triangulation_patches(patches, tria.begin_active(), tria.end());
+  DataOutBase::write_vtu (patches,
+                          triangulation_patch_data_names(),
+                          std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >(),
+                          vtu_flags,
+                          out);
+
+  AssertThrow (out, ExcIO ());
+}
+
+
+
+unsigned int GridOut::n_boundary_faces (const Triangulation<1> &) const
+{
+  return 0;
+}
+
+unsigned int GridOut::n_boundary_lines (const Triangulation<1> &) const
+{
+  return 0;
+}
+
+
+unsigned int GridOut::n_boundary_faces (const Triangulation<1,2> &) const
+{
+  return 0;
+}
+
+unsigned int GridOut::n_boundary_lines (const Triangulation<1,2> &) const
+{
+  return 0;
+}
+
+unsigned int GridOut::n_boundary_faces (const Triangulation<1,3> &) const
+{
+  return 0;
+}
+
+unsigned int GridOut::n_boundary_lines (const Triangulation<1,3> &) const
+{
+  return 0;
+}
+
+unsigned int GridOut::n_boundary_lines (const Triangulation<2> &) const
+{
+  return 0;
+}
+
+unsigned int GridOut::n_boundary_lines (const Triangulation<2,3> &) const
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int GridOut::n_boundary_faces (const Triangulation<dim,spacedim> &tria) const
+{
+  typename Triangulation<dim,spacedim>::active_face_iterator face, endf;
+  unsigned int n_faces = 0;
+
+  for (face=tria.begin_active_face(), endf=tria.end_face();
+       face != endf; ++face)
+    if ((face->at_boundary()) &&
+        (face->boundary_id() != 0))
+      n_faces++;
+
+  return n_faces;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int GridOut::n_boundary_lines (const Triangulation<dim, spacedim> &tria) const
+{
+  // save the user flags for lines so
+  // we can use these flags to track
+  // which ones we've already counted
+  std::vector<bool> line_flags;
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .save_user_flags_line (line_flags);
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .clear_user_flags_line ();
+
+  unsigned int n_lines = 0;
+
+  typename Triangulation<dim, spacedim>::active_cell_iterator cell, endc;
+
+  for (cell=tria.begin_active(), endc=tria.end();
+       cell != endc; ++cell)
+    for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+      if (cell->line(l)->at_boundary()
+          &&
+          (cell->line(l)->boundary_id() != 0)
+          &&
+          (cell->line(l)->user_flag_set() == false))
+        {
+          ++n_lines;
+          cell->line(l)->set_user_flag();
+        }
+
+  // at the end, restore the user
+  // flags for the lines
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .load_user_flags_line (line_flags);
+
+  return n_lines;
+}
+
+
+
+
+unsigned int
+GridOut::write_msh_faces (const Triangulation<1> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+unsigned int
+GridOut::write_msh_faces (const Triangulation<1,2> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_msh_faces (const Triangulation<1,3> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+unsigned int
+GridOut::write_msh_lines (const Triangulation<1> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_msh_lines (const Triangulation<1,2> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+unsigned int
+GridOut::write_msh_lines (const Triangulation<1,3> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+unsigned int
+GridOut::write_msh_lines (const Triangulation<2> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_msh_lines (const Triangulation<2,3> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+
+
+template <int dim, int spacedim>
+unsigned int
+GridOut::write_msh_faces (const Triangulation<dim,spacedim> &tria,
+                          const unsigned int                 next_element_index,
+                          std::ostream                      &out) const
+{
+  unsigned int current_element_index = next_element_index;
+  typename Triangulation<dim,spacedim>::active_face_iterator face, endf;
+
+  for (face=tria.begin_active_face(), endf=tria.end_face();
+       face != endf; ++face)
+    if (face->at_boundary() &&
+        (face->boundary_id() != 0))
+      {
+        out << current_element_index << ' ';
+        switch (dim)
+          {
+          case 2:
+            out << 1 << ' ';
+            break;
+          case 3:
+            out << 3 << ' ';
+            break;
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+        out << static_cast<unsigned int>(face->boundary_id())
+            << ' '
+            << static_cast<unsigned int>(face->boundary_id())
+            << ' ' << GeometryInfo<dim>::vertices_per_face;
+        // note: vertex numbers are 1-base
+        for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_face; ++vertex)
+          out << ' '
+              << face->vertex_index(GeometryInfo<dim-1>::ucd_to_deal[vertex])+1;
+        out << '\n';
+
+        ++current_element_index;
+      }
+  return current_element_index;
+}
+
+
+template <int dim, int spacedim>
+unsigned int
+GridOut::write_msh_lines (const Triangulation<dim,spacedim> &tria,
+                          const unsigned int                 next_element_index,
+                          std::ostream                      &out) const
+{
+  unsigned int current_element_index = next_element_index;
+  // save the user flags for lines so
+  // we can use these flags to track
+  // which ones we've already taken
+  // care of
+  std::vector<bool> line_flags;
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .save_user_flags_line (line_flags);
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .clear_user_flags_line ();
+
+  typename Triangulation<dim, spacedim>::active_cell_iterator cell, endc;
+
+  for (cell=tria.begin_active(), endc=tria.end();
+       cell != endc; ++cell)
+    for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+      if (cell->line(l)->at_boundary()
+          &&
+          (cell->line(l)->boundary_id() != 0)
+          &&
+          (cell->line(l)->user_flag_set() == false))
+        {
+          out << next_element_index << " 1 ";
+          out << static_cast<unsigned int>(cell->line(l)->boundary_id())
+              << ' '
+              << static_cast<unsigned int>(cell->line(l)->boundary_id())
+              << " 2 ";
+          // note: vertex numbers are 1-base
+          for (unsigned int vertex=0; vertex<2; ++vertex)
+            out << ' '
+                << cell->line(l)->vertex_index(GeometryInfo<dim-2>::ucd_to_deal[vertex])+1;
+          out << '\n';
+
+          // move on to the next line
+          // but mark the current one
+          // as taken care of
+          ++current_element_index;
+          cell->line(l)->set_user_flag();
+        }
+
+  // at the end, restore the user
+  // flags for the lines
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .load_user_flags_line (line_flags);
+
+  return current_element_index;
+}
+
+
+
+
+unsigned int
+GridOut::write_ucd_faces (const Triangulation<1> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_ucd_faces (const Triangulation<1,2> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_ucd_faces (const Triangulation<1,3> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_ucd_lines (const Triangulation<1> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_ucd_lines (const Triangulation<1,2> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+unsigned int
+GridOut::write_ucd_lines (const Triangulation<1,3> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+unsigned int
+GridOut::write_ucd_lines (const Triangulation<2> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+unsigned int
+GridOut::write_ucd_lines (const Triangulation<2,3> &,
+                          const unsigned int next_element_index,
+                          std::ostream &) const
+{
+  return next_element_index;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+GridOut::write_ucd_faces (const Triangulation<dim,spacedim> &tria,
+                          const unsigned int                 next_element_index,
+                          std::ostream                      &out) const
+{
+  unsigned int current_element_index = next_element_index;
+  typename Triangulation<dim,spacedim>::active_face_iterator face, endf;
+
+  for (face=tria.begin_active_face(), endf=tria.end_face();
+       face != endf; ++face)
+    if (face->at_boundary() &&
+        (face->boundary_id() != 0))
+      {
+        out << current_element_index << "  "
+            << static_cast<unsigned int>(face->boundary_id())
+            << "  ";
+        switch (dim)
+          {
+          case 2:
+            out << "line    ";
+            break;
+          case 3:
+            out << "quad    ";
+            break;
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+        // note: vertex numbers are 1-base
+        for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_face; ++vertex)
+          out << face->vertex_index(GeometryInfo<dim-1>::ucd_to_deal[vertex])+1 << ' ';
+        out << '\n';
+
+        ++current_element_index;
+      }
+  return current_element_index;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+GridOut::write_ucd_lines(const Triangulation<dim, spacedim> &tria,
+                         const unsigned int                  next_element_index,
+                         std::ostream                       &out) const
+{
+  unsigned int current_element_index = next_element_index;
+  // save the user flags for lines so
+  // we can use these flags to track
+  // which ones we've already taken
+  // care of
+  std::vector<bool> line_flags;
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .save_user_flags_line (line_flags);
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .clear_user_flags_line ();
+
+  typename Triangulation<dim, spacedim>::active_cell_iterator cell, endc;
+
+  for (cell=tria.begin_active(), endc=tria.end();
+       cell != endc; ++cell)
+    for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+      if (cell->line(l)->at_boundary()
+          &&
+          (cell->line(l)->boundary_id() != 0)
+          &&
+          (cell->line(l)->user_flag_set() == false))
+        {
+          out << current_element_index << "  "
+              << static_cast<unsigned int>(cell->line(l)->boundary_id())
+              << "  line    ";
+          // note: vertex numbers in ucd format are 1-base
+          for (unsigned int vertex=0; vertex<2; ++vertex)
+            out << cell->line(l)->vertex_index(GeometryInfo<dim-2>::ucd_to_deal[vertex])+1
+                << ' ';
+          out << '\n';
+
+          // move on to the next line
+          // but mark the current one
+          // as taken care of
+          ++current_element_index;
+          cell->line(l)->set_user_flag();
+        }
+
+  // at the end, restore the user
+  // flags for the lines
+  const_cast<dealii::Triangulation<dim,spacedim>&>(tria)
+  .load_user_flags_line (line_flags);
+  return current_element_index;
+}
+
+
+Point<2> GridOut::svg_project_point(Point<3> point, Point<3> camera_position, Point<3> camera_direction, Point<3> camera_horizontal, float camera_focus)
+{
+  // ...
+  Point<3> camera_vertical;
+  camera_vertical[0] = camera_horizontal[1] * camera_direction[2] - camera_horizontal[2] * camera_direction[1];
+  camera_vertical[1] = camera_horizontal[2] * camera_direction[0] - camera_horizontal[0] * camera_direction[2];
+  camera_vertical[2] = camera_horizontal[0] * camera_direction[1] - camera_horizontal[1] * camera_direction[0];
+
+  float phi;
+  phi  = camera_focus;
+  phi /= (point[0] - camera_position[0]) * camera_direction[0] + (point[1] - camera_position[1]) * camera_direction[1] + (point[2] - camera_position[2]) * camera_direction[2];
+
+  Point<3> projection;
+  projection[0] = camera_position[0] + phi * (point[0] - camera_position[0]);
+  projection[1] = camera_position[1] + phi * (point[1] - camera_position[1]);
+  projection[2] = camera_position[2] + phi * (point[2] - camera_position[2]);
+
+  Point<2> projection_decomposition;
+  projection_decomposition[0]  = (projection[0] - camera_position[0] - camera_focus * camera_direction[0]) * camera_horizontal[0];
+  projection_decomposition[0] += (projection[1] - camera_position[1] - camera_focus * camera_direction[1]) * camera_horizontal[1];
+  projection_decomposition[0] += (projection[2] - camera_position[2] - camera_focus * camera_direction[2]) * camera_horizontal[2];
+
+  projection_decomposition[1]  = (projection[0] - camera_position[0] - camera_focus * camera_direction[0]) * camera_vertical[0];
+  projection_decomposition[1] += (projection[1] - camera_position[1] - camera_focus * camera_direction[1]) * camera_vertical[1];
+  projection_decomposition[1] += (projection[2] - camera_position[2] - camera_focus * camera_direction[2]) * camera_vertical[2];
+
+  return projection_decomposition;
+}
+
+
+
+namespace internal
+{
+  namespace
+  {
+    template <int spacedim>
+    void write_gnuplot (const dealii::Triangulation<1,spacedim> &tria,
+                        std::ostream             &out,
+                        const Mapping<1,spacedim> *,
+                        const GridOutFlags::Gnuplot &gnuplot_flags)
+    {
+      AssertThrow (out, ExcIO());
+
+      const int dim = 1;
+
+      typename dealii::Triangulation<dim,spacedim>::active_cell_iterator
+      cell=tria.begin_active();
+      const typename dealii::Triangulation<dim,spacedim>::active_cell_iterator
+      endc=tria.end();
+      for (; cell!=endc; ++cell)
+        {
+          if (gnuplot_flags.write_cell_numbers)
+            out << "# cell " << cell << '\n';
+
+          out << cell->vertex(0)
+              << ' ' << cell->level()
+              << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+              << cell->vertex(1)
+              << ' ' << cell->level()
+              << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+              << "\n\n";
+        }
+
+      // make sure everything now gets to
+      // disk
+      out.flush ();
+
+      AssertThrow (out, ExcIO());
+    }
+
+
+
+    template <int spacedim>
+    void write_gnuplot (const dealii::Triangulation<2,spacedim> &tria,
+                        std::ostream           &out,
+                        const Mapping<2,spacedim>       *mapping,
+                        const GridOutFlags::Gnuplot &gnuplot_flags)
+    {
+      AssertThrow (out, ExcIO());
+
+      const int dim = 2;
+
+      const unsigned int n_additional_points=
+        gnuplot_flags.n_boundary_face_points;
+      const unsigned int n_points=2+n_additional_points;
+
+      typename dealii::Triangulation<dim,spacedim>::active_cell_iterator
+      cell=tria.begin_active();
+      const typename dealii::Triangulation<dim,spacedim>::active_cell_iterator
+      endc=tria.end();
+
+      // if we are to treat curved
+      // boundaries, then generate a
+      // quadrature formula which will be
+      // used to probe boundary points at
+      // curved faces
+      Quadrature<dim> *q_projector=0;
+      std::vector<Point<dim-1> > boundary_points;
+      if (mapping!=0)
+        {
+          boundary_points.resize(n_points);
+          boundary_points[0][0]=0;
+          boundary_points[n_points-1][0]=1;
+          for (unsigned int i=1; i<n_points-1; ++i)
+            boundary_points[i](0)= 1.*i/(n_points-1);
+
+          std::vector<double> dummy_weights(n_points, 1./n_points);
+          Quadrature<dim-1> quadrature(boundary_points, dummy_weights);
+
+          q_projector = new Quadrature<dim> (QProjector<dim>::project_to_all_faces(quadrature));
+        }
+
+      for (; cell!=endc; ++cell)
+        {
+          if (gnuplot_flags.write_cell_numbers)
+            out << "# cell " << cell << '\n';
+
+          if (mapping==0 ||
+              (!cell->at_boundary() && !gnuplot_flags.curved_inner_cells))
+            {
+              // write out the four sides
+              // of this cell by putting
+              // the four points (+ the
+              // initial point again) in
+              // a row and lifting the
+              // drawing pencil at the
+              // end
+              for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+                out << cell->vertex(GeometryInfo<dim>::ucd_to_deal[i])
+                    << ' ' << cell->level()
+                    << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+              out << cell->vertex(0)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << '\n'  // double new line for gnuplot 3d plots
+                  << '\n';
+            }
+          else
+            // cell is at boundary and we
+            // are to treat curved
+            // boundaries. so loop over
+            // all faces and draw them as
+            // small pieces of lines
+            {
+              for (unsigned int face_no=0;
+                   face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+                {
+                  const typename dealii::Triangulation<dim,spacedim>::face_iterator
+                  face = cell->face(face_no);
+                  if (face->at_boundary() || gnuplot_flags.curved_inner_cells)
+                    {
+                      // compute offset
+                      // of quadrature
+                      // points within
+                      // set of projected
+                      // points
+                      const unsigned int offset=face_no*n_points;
+                      for (unsigned int i=0; i<n_points; ++i)
+                        out << (mapping->transform_unit_to_real_cell
+                                (cell, q_projector->point(offset+i)))
+                            << ' ' << cell->level()
+                            << ' ' << static_cast<unsigned int>(cell->material_id())
+                            << '\n';
+
+                      out << '\n'
+                          << '\n';
+                    }
+                  else
+                    {
+                      // if, however, the
+                      // face is not at
+                      // the boundary,
+                      // then draw it as
+                      // usual
+                      out << face->vertex(0)
+                          << ' ' << cell->level()
+                          << ' ' << static_cast<unsigned int>(cell->material_id())
+                          << '\n'
+                          << face->vertex(1)
+                          << ' ' << cell->level()
+                          << ' ' << static_cast<unsigned int>(cell->material_id())
+                          << '\n'
+                          << '\n'
+                          << '\n';
+                    }
+                }
+            }
+        }
+
+      if (q_projector != 0)
+        delete q_projector;
+
+      // make sure everything now gets to
+      // disk
+      out.flush ();
+
+      AssertThrow (out, ExcIO());
+    }
+
+
+
+    template <int spacedim>
+    void write_gnuplot (const dealii::Triangulation<3,spacedim> &tria,
+                        std::ostream           &out,
+                        const Mapping<3,spacedim>       *mapping,
+                        const GridOutFlags::Gnuplot &gnuplot_flags)
+    {
+      AssertThrow (out, ExcIO());
+
+      const int dim = 3;
+
+      const unsigned int n_additional_points=
+        gnuplot_flags.n_boundary_face_points;
+      const unsigned int n_points=2+n_additional_points;
+
+      typename dealii::Triangulation<dim,spacedim>::active_cell_iterator
+      cell=tria.begin_active();
+      const typename dealii::Triangulation<dim,spacedim>::active_cell_iterator
+      endc=tria.end();
+
+      // if we are to treat curved
+      // boundaries, then generate a
+      // quadrature formula which will be
+      // used to probe boundary points at
+      // curved faces
+      Quadrature<dim> *q_projector=0;
+      std::vector<Point<1> > boundary_points;
+      if (mapping!=0)
+        {
+          boundary_points.resize(n_points);
+          boundary_points[0][0]=0;
+          boundary_points[n_points-1][0]=1;
+          for (unsigned int i=1; i<n_points-1; ++i)
+            boundary_points[i](0)= 1.*i/(n_points-1);
+
+          std::vector<double> dummy_weights(n_points, 1./n_points);
+          Quadrature<1> quadrature1d(boundary_points, dummy_weights);
+
+          // tensor product of points,
+          // only one copy
+          QIterated<dim-1> quadrature(quadrature1d, 1);
+          q_projector = new Quadrature<dim> (QProjector<dim>::project_to_all_faces(quadrature));
+        }
+
+      for (; cell!=endc; ++cell)
+        {
+          if (gnuplot_flags.write_cell_numbers)
+            out << "# cell " << cell << '\n';
+
+          if (mapping==0 || n_points==2 ||
+              (!cell->has_boundary_lines() && !gnuplot_flags.curved_inner_cells))
+            {
+              // front face
+              out << cell->vertex(0)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(1)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(5)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(4)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(0)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << '\n';
+              // back face
+              out << cell->vertex(2)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(3)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(7)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(6)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(2)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << '\n';
+
+              // now for the four connecting lines
+              out << cell->vertex(0)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(2)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << '\n';
+              out << cell->vertex(1)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(3)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << '\n';
+              out << cell->vertex(5)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(7)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << '\n';
+              out << cell->vertex(4)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << cell->vertex(6)
+                  << ' ' << cell->level()
+                  << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                  << '\n';
+            }
+          else
+            {
+              for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+                {
+                  const typename dealii::Triangulation<dim,spacedim>::face_iterator
+                  face = cell->face(face_no);
+
+                  if (face->at_boundary())
+                    {
+                      const unsigned int offset=face_no*n_points*n_points;
+                      for (unsigned int i=0; i<n_points-1; ++i)
+                        for (unsigned int j=0; j<n_points-1; ++j)
+                          {
+                            const Point<spacedim> p0=mapping->transform_unit_to_real_cell(
+                                                       cell, q_projector->point(offset+i*n_points+j));
+                            out << p0
+                                << ' ' << cell->level()
+                                << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+                            out << (mapping->transform_unit_to_real_cell(
+                                      cell, q_projector->point(offset+(i+1)*n_points+j)))
+                                << ' ' << cell->level()
+                                << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+                            out << (mapping->transform_unit_to_real_cell(
+                                      cell, q_projector->point(offset+(i+1)*n_points+j+1)))
+                                << ' ' << cell->level()
+                                << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+                            out << (mapping->transform_unit_to_real_cell(
+                                      cell, q_projector->point(offset+i*n_points+j+1)))
+                                << ' ' << cell->level()
+                                << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+                            // and the
+                            // first
+                            // point
+                            // again
+                            out << p0
+                                << ' ' << cell->level()
+                                << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+                            out << '\n' << '\n';
+                          }
+                    }
+                  else
+                    {
+                      for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_face; ++l)
+                        {
+                          const typename dealii::Triangulation<dim,spacedim>::line_iterator
+                          line=face->line(l);
+
+                          const Point<spacedim> &v0=line->vertex(0),
+                                                 &v1=line->vertex(1);
+                          if (line->at_boundary() || gnuplot_flags.curved_inner_cells)
+                            {
+                              // transform_real_to_unit_cell
+                              // could be
+                              // replaced
+                              // by using
+                              // QProjector<dim>::project_to_line
+                              // which is
+                              // not yet
+                              // implemented
+                              const Point<spacedim> u0=mapping->transform_real_to_unit_cell(cell, v0),
+                                                    u1=mapping->transform_real_to_unit_cell(cell, v1);
+
+                              for (unsigned int i=0; i<n_points; ++i)
+                                out << (mapping->transform_unit_to_real_cell
+                                        (cell, (1-boundary_points[i][0])*u0+boundary_points[i][0]*u1))
+                                    << ' ' << cell->level()
+                                    << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+                            }
+                          else
+                            out << v0
+                                << ' ' << cell->level()
+                                << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n'
+                                << v1
+                                << ' ' << cell->level()
+                                << ' ' << static_cast<unsigned int>(cell->material_id()) << '\n';
+
+                          out << '\n' << '\n';
+                        }
+                    }
+                }
+            }
+        }
+
+      if (q_projector != 0)
+        delete q_projector;
+
+
+      // make sure everything now gets to
+      // disk
+      out.flush ();
+
+      AssertThrow (out, ExcIO());
+    }
+  }
+}
+
+
+
+template <int dim, int spacedim>
+void GridOut::write_gnuplot (
+  const Triangulation<dim,spacedim> &tria,
+  std::ostream             &out,
+  const Mapping<dim,spacedim>       *mapping) const
+{
+  internal::write_gnuplot (tria, out, mapping, gnuplot_flags);
+}
+
+
+
+namespace internal
+{
+  namespace
+  {
+    struct LineEntry
+    {
+      Point<2> first;
+      Point<2> second;
+      bool colorize;
+      unsigned int level;
+      LineEntry (const Point<2>    &f,
+                 const Point<2>    &s,
+                 const bool         c,
+                 const unsigned int l)
+        :
+        first(f), second(s),
+        colorize(c), level(l)
+      {}
+    };
+
+
+    void write_eps (const dealii::Triangulation<1> &,
+                    std::ostream &,
+                    const Mapping<1> *,
+                    const GridOutFlags::Eps<2> &,
+                    const GridOutFlags::Eps<3> &)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+    void write_eps (const dealii::Triangulation<1,2> &,
+                    std::ostream &,
+                    const Mapping<1,2> *,
+                    const GridOutFlags::Eps<2> &,
+                    const GridOutFlags::Eps<3> &)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+    void write_eps (const dealii::Triangulation<1,3> &,
+                    std::ostream &,
+                    const Mapping<1,3> *,
+                    const GridOutFlags::Eps<2> &,
+                    const GridOutFlags::Eps<3> &)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+    void write_eps (const dealii::Triangulation<2,3> &,
+                    std::ostream &,
+                    const Mapping<2,3> *,
+                    const GridOutFlags::Eps<2> &,
+                    const GridOutFlags::Eps<3> &)
+    {
+      Assert(false, ExcNotImplemented());
+    }
+
+
+
+    template <int dim, int spacedim>
+    void write_eps (const dealii::Triangulation<dim, spacedim> &tria,
+                    std::ostream             &out,
+                    const Mapping<dim,spacedim>       *mapping,
+                    const GridOutFlags::Eps<2> &eps_flags_2,
+                    const GridOutFlags::Eps<3> &eps_flags_3)
+    {
+      typedef std::list<LineEntry> LineList;
+
+      // get a pointer to the flags
+      // common to all dimensions, in
+      // order to avoid the recurring
+      // distinctions between
+      // eps_flags_1, eps_flags_2, ...
+      const GridOutFlags::EpsFlagsBase
+      &eps_flags_base = (dim==2 ?
+                         static_cast<const GridOutFlags::EpsFlagsBase &>(eps_flags_2) :
+                         (dim==3 ?
+                          static_cast<const GridOutFlags::EpsFlagsBase &>(eps_flags_3) :
+                          *static_cast<const GridOutFlags::EpsFlagsBase *>(0)));
+
+      AssertThrow (out, ExcIO());
+      const unsigned int n_points = eps_flags_base.n_boundary_face_points;
+
+      // make up a list of lines by which
+      // we will construct the triangulation
+      //
+      // this part unfortunately is a bit
+      // dimension dependent, so we have to
+      // treat every dimension different.
+      // however, by directly producing
+      // the lines to be printed, i.e. their
+      // 2d images, we can later do the
+      // actual output dimension independent
+      // again
+      LineList line_list;
+
+      switch (dim)
+        {
+        case 1:
+        {
+          Assert(false, ExcInternalError());
+          break;
+        }
+
+        case 2:
+        {
+          for (typename dealii::Triangulation<dim, spacedim>::active_cell_iterator
+               cell=tria.begin_active();
+               cell!=tria.end(); ++cell)
+            for (unsigned int line_no=0;
+                 line_no<GeometryInfo<dim>::lines_per_cell; ++line_no)
+              {
+                typename dealii::Triangulation<dim, spacedim>::line_iterator
+                line=cell->line(line_no);
+
+                // first treat all
+                // interior lines and
+                // make up a list of
+                // them. if curved
+                // lines shall not be
+                // supported (i.e. no
+                // mapping is
+                // provided), then also
+                // treat all other
+                // lines
+                if (!line->has_children() &&
+                    (mapping==0 || !line->at_boundary()))
+                  // one would expect
+                  // make_pair(line->vertex(0),
+                  //           line->vertex(1))
+                  // here, but that is
+                  // not dimension
+                  // independent, since
+                  // vertex(i) is
+                  // Point<dim>, but we
+                  // want a Point<2>.
+                  // in fact, whenever
+                  // we're here, the
+                  // vertex is a
+                  // Point<dim>, but
+                  // the compiler does
+                  // not know
+                  // this. hopefully,
+                  // the compiler will
+                  // optimize away this
+                  // little kludge
+                  line_list.push_back (LineEntry(Point<2>(line->vertex(0)(0),
+                                                          line->vertex(0)(1)),
+                                                 Point<2>(line->vertex(1)(0),
+                                                          line->vertex(1)(1)),
+                                                 line->user_flag_set(),
+                                                 cell->level()));
+              }
+
+          // next if we are to treat
+          // curved boundaries
+          // specially, then add lines
+          // to the list consisting of
+          // pieces of the boundary
+          // lines
+          if (mapping!=0)
+            {
+              // to do so, first
+              // generate a sequence of
+              // points on a face and
+              // project them onto the
+              // faces of a unit cell
+              std::vector<Point<dim-1> > boundary_points (n_points);
+
+              for (unsigned int i=0; i<n_points; ++i)
+                boundary_points[i](0) = 1.*(i+1)/(n_points+1);
+
+              Quadrature<dim-1> quadrature (boundary_points);
+              Quadrature<dim>   q_projector (QProjector<dim>::project_to_all_faces(quadrature));
+
+              // next loop over all
+              // boundary faces and
+              // generate the info from
+              // them
+              for (typename dealii::Triangulation<dim, spacedim>::active_cell_iterator
+                   cell=tria.begin_active();
+                   cell!=tria.end(); ++cell)
+                for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+                  {
+                    const typename dealii::Triangulation<dim, spacedim>::face_iterator
+                    face = cell->face(face_no);
+
+                    if (face->at_boundary())
+                      {
+                        Point<dim> p0_dim(face->vertex(0));
+                        Point<2>   p0    (p0_dim(0), p0_dim(1));
+
+                        // loop over
+                        // all pieces
+                        // of the line
+                        // and generate
+                        // line-lets
+                        const unsigned int offset=face_no*n_points;
+                        for (unsigned int i=0; i<n_points; ++i)
+                          {
+                            const Point<dim> p1_dim (mapping->transform_unit_to_real_cell
+                                                     (cell, q_projector.point(offset+i)));
+                            const Point<2>   p1     (p1_dim(0), p1_dim(1));
+
+                            line_list.push_back (LineEntry(p0, p1,
+                                                           face->user_flag_set(),
+                                                           cell->level() ));
+                            p0=p1;
+                          }
+
+                        // generate last piece
+                        const Point<dim> p1_dim (face->vertex(1));
+                        const Point<2>   p1     (p1_dim(0), p1_dim(1));
+                        line_list.push_back (LineEntry(p0, p1,
+                                                       face->user_flag_set(),
+                                                       cell->level()));
+                      }
+                  }
+            }
+
+          break;
+        }
+
+        case 3:
+        {
+          // curved boundary output
+          // presently not supported
+          Assert (mapping == 0, ExcNotImplemented());
+
+          typename dealii::Triangulation<dim, spacedim>::active_cell_iterator
+          cell=tria.begin_active(),
+          endc=tria.end();
+
+          // loop over all lines and compute their
+          // projection on the plane perpendicular
+          // to the direction of sight
+
+          // direction of view equals the unit
+          // vector of the position of the
+          // spectator to the origin.
+          //
+          // we chose here the viewpoint as in
+          // gnuplot as default.
+          //
+          //TODO:[WB] Fix a potential problem with viewing angles in 3d Eps GridOut
+          // note: the following might be wrong
+          // if one of the base vectors below
+          // is in direction of the viewer, but
+          // I am too tired at present to fix
+          // this
+          const double pi = numbers::PI;
+          const double z_angle    = eps_flags_3.azimut_angle;
+          const double turn_angle = eps_flags_3.turn_angle;
+          const Point<dim> view_direction(-std::sin(z_angle * 2.*pi / 360.) * std::sin(turn_angle * 2.*pi / 360.),
+                                          +std::sin(z_angle * 2.*pi / 360.) * std::cos(turn_angle * 2.*pi / 360.),
+                                          -std::cos(z_angle * 2.*pi / 360.));
+
+          // decide about the two unit vectors
+          // in this plane. we chose the first one
+          // to be the projection of the z-axis
+          // to this plane
+          const Tensor<1,dim> vector1
+            = Point<dim>(0,0,1) - ((Point<dim>(0,0,1) * view_direction) * view_direction);
+          const Tensor<1,dim> unit_vector1 = vector1 / vector1.norm();
+
+          // now the third vector is fixed. we
+          // chose the projection of a more or
+          // less arbitrary vector to the plane
+          // perpendicular to the first one
+          const Tensor<1,dim> vector2
+            = (Point<dim>(1,0,0)
+               - ((Point<dim>(1,0,0) * view_direction) * view_direction)
+               - ((Point<dim>(1,0,0) * unit_vector1)   * unit_vector1));
+          const Tensor<1,dim> unit_vector2 = vector2 / vector2.norm();
+
+
+          for (; cell!=endc; ++cell)
+            for (unsigned int line_no=0;
+                 line_no<GeometryInfo<dim>::lines_per_cell; ++line_no)
+              {
+                typename dealii::Triangulation<dim, spacedim>::line_iterator
+                line=cell->line(line_no);
+                line_list.push_back (LineEntry(Point<2>(line->vertex(0) * unit_vector2,
+                                                        line->vertex(0) * unit_vector1),
+                                               Point<2>(line->vertex(1) * unit_vector2,
+                                                        line->vertex(1) * unit_vector1),
+                                               line->user_flag_set(),
+                                               cell->level()));
+              }
+
+          break;
+        }
+
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+
+
+
+      // find out minimum and maximum x and
+      // y coordinates to compute offsets
+      // and scaling factors
+      double x_min = tria.begin_active()->vertex(0)(0);
+      double x_max = x_min;
+      double y_min = tria.begin_active()->vertex(0)(1);
+      double y_max = y_min;
+      unsigned int  max_level = line_list.begin()->level;
+
+      for (LineList::const_iterator line=line_list.begin();
+           line!=line_list.end(); ++line)
+        {
+          x_min = std::min (x_min, line->first(0));
+          x_min = std::min (x_min, line->second(0));
+
+          x_max = std::max (x_max, line->first(0));
+          x_max = std::max (x_max, line->second(0));
+
+          y_min = std::min (y_min, line->first(1));
+          y_min = std::min (y_min, line->second(1));
+
+          y_max = std::max (y_max, line->first(1));
+          y_max = std::max (y_max, line->second(1));
+
+          max_level = std::max (max_level,  line->level);
+        }
+
+      // scale in x-direction such that
+      // in the output 0 <= x <= 300.
+      // don't scale in y-direction to
+      // preserve the shape of the
+      // triangulation
+      const double scale = (eps_flags_base.size /
+                            (eps_flags_base.size_type==GridOutFlags::EpsFlagsBase::width ?
+                             x_max - x_min :
+                             y_min - y_max));
+
+
+      // now write preamble
+      if (true)
+        {
+          // block this to have local
+          // variables destroyed after
+          // use
+          std::time_t  time1= std::time (0);
+          std::tm     *time = std::localtime(&time1);
+          out << "%!PS-Adobe-2.0 EPSF-1.2" << '\n'
+              << "%%Title: deal.II Output" << '\n'
+              << "%%Creator: the deal.II library" << '\n'
+              << "%%Creation Date: "
+              << time->tm_year+1900 << "/"
+              << time->tm_mon+1 << "/"
+              << time->tm_mday << " - "
+              << time->tm_hour << ":"
+              << std::setw(2) << time->tm_min << ":"
+              << std::setw(2) << time->tm_sec << '\n'
+              << "%%BoundingBox: "
+              // lower left corner
+              << "0 0 "
+              // upper right corner
+              << static_cast<unsigned int>(std::floor(( (x_max-x_min) * scale )+1))
+              << ' '
+              << static_cast<unsigned int>(std::floor(( (y_max-y_min) * scale )+1))
+              << '\n';
+
+          // define some abbreviations to keep
+          // the output small:
+          // m=move turtle to
+          // x=execute line stroke
+          // b=black pen
+          // r=red pen
+          out << "/m {moveto} bind def" << '\n'
+              << "/x {lineto stroke} bind def" << '\n'
+              << "/b {0 0 0 setrgbcolor} def" << '\n'
+              << "/r {1 0 0 setrgbcolor} def" << '\n';
+
+          // calculate colors for level
+          // coloring; level 0 is black,
+          // other levels are blue
+          // ... red
+          if (eps_flags_base.color_lines_level)
+            out  << "/l  { neg "
+                 << (max_level)
+                 << " add "
+                 << (0.66666/std::max(1U,(max_level-1)))
+                 << " mul 1 0.8 sethsbcolor} def" << '\n';
+
+          // in 2d, we can also plot cell
+          // and vertex numbers, but this
+          // requires a somewhat more
+          // lengthy preamble. please
+          // don't ask me what most of
+          // this means, it is reverse
+          // engineered from what GNUPLOT
+          // uses in its output
+          if ((dim == 2) && (eps_flags_2.write_cell_numbers ||
+                             eps_flags_2.write_vertex_numbers))
+            {
+              out << ("/R {rmoveto} bind def\n"
+                      "/Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont\n"
+                      "dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall\n"
+                      "currentdict end definefont\n"
+                      "/MFshow {{dup dup 0 get findfont exch 1 get scalefont setfont\n"
+                      "[ currentpoint ] exch dup 2 get 0 exch rmoveto dup dup 5 get exch 4 get\n"
+                      "{show} {stringwidth pop 0 rmoveto}ifelse dup 3 get\n"
+                      "{2 get neg 0 exch rmoveto pop} {pop aload pop moveto}ifelse} forall} bind def\n"
+                      "/MFwidth {0 exch {dup 3 get{dup dup 0 get findfont exch 1 get scalefont setfont\n"
+                      "5 get stringwidth pop add}\n"
+                      "{pop} ifelse} forall} bind def\n"
+                      "/MCshow { currentpoint stroke m\n"
+                      "exch dup MFwidth -2 div 3 -1 roll R MFshow } def\n")
+                  << '\n';
+            }
+
+          out << "%%EndProlog" << '\n'
+              << '\n';
+
+          // set fine lines
+          out << eps_flags_base.line_width << " setlinewidth" << '\n';
+        }
+
+      // now write the lines
+      const Point<2> offset(x_min, y_min);
+
+      for (LineList::const_iterator line=line_list.begin();
+           line!=line_list.end(); ++line)
+        if (eps_flags_base.color_lines_level && (line->level > 0))
+          // lines colored according to
+          // refinement level,
+          // contributed by J�rg
+          // R. Weimar
+          out << line->level
+              << " l "
+              << (line->first  - offset) * scale << " m "
+              << (line->second - offset) * scale << " x" << '\n';
+        else
+          out << ((line->colorize && eps_flags_base.color_lines_on_user_flag) ? "r " : "b ")
+              << (line->first  - offset) * scale << " m "
+              << (line->second - offset) * scale << " x" << '\n';
+
+      // finally write the cell numbers
+      // in 2d, if that is desired
+      if ((dim == 2) && (eps_flags_2.write_cell_numbers == true))
+        {
+          out << "(Helvetica) findfont 140 scalefont setfont"
+              << '\n';
+
+          typename dealii::Triangulation<dim, spacedim>::active_cell_iterator
+          cell = tria.begin_active (),
+          endc = tria.end ();
+          for (; cell!=endc; ++cell)
+            {
+              out << (cell->center()(0)-offset(0))*scale << ' '
+                  << (cell->center()(1)-offset(1))*scale
+                  << " m" << '\n'
+                  << "[ [(Helvetica) 12.0 0.0 true true (";
+              if (eps_flags_2.write_cell_number_level)
+                out << cell;
+              else
+                out << cell->index();
+
+              out << ")] "
+                  << "] -6 MCshow"
+                  << '\n';
+            }
+        }
+
+      // and the vertex numbers
+      if ((dim == 2) && (eps_flags_2.write_vertex_numbers == true))
+        {
+          out << "(Helvetica) findfont 140 scalefont setfont"
+              << '\n';
+
+          // have a list of those
+          // vertices which we have
+          // already tracked, to avoid
+          // doing this multiply
+          std::set<unsigned int> treated_vertices;
+          typename dealii::Triangulation<dim, spacedim>::active_cell_iterator
+          cell = tria.begin_active (),
+          endc = tria.end ();
+          for (; cell!=endc; ++cell)
+            for (unsigned int vertex=0;
+                 vertex<GeometryInfo<dim>::vertices_per_cell;
+                 ++vertex)
+              if (treated_vertices.find(cell->vertex_index(vertex))
+                  ==
+                  treated_vertices.end())
+                {
+                  treated_vertices.insert (cell->vertex_index(vertex));
+
+                  out << (cell->vertex(vertex)(0)-offset(0))*scale << ' '
+                      << (cell->vertex(vertex)(1)-offset(1))*scale
+                      << " m" << '\n'
+                      << "[ [(Helvetica) 10.0 0.0 true true ("
+                      << cell->vertex_index(vertex)
+                      << ")] "
+                      << "] -6 MCshow"
+                      << '\n';
+                }
+        }
+
+      out << "showpage" << '\n';
+
+      // make sure everything now gets to
+      // disk
+      out.flush ();
+
+      AssertThrow (out, ExcIO());
+    }
+  }
+}
+
+
+template <int dim, int spacedim>
+void GridOut::write_eps (const Triangulation<dim, spacedim> &tria,
+                         std::ostream             &out,
+                         const Mapping<dim,spacedim>       *mapping) const
+{
+  internal::write_eps (tria, out, mapping,
+                       eps_flags_2, eps_flags_3);
+}
+
+
+template <int dim, int spacedim>
+void GridOut::write (const Triangulation<dim, spacedim> &tria,
+                     std::ostream             &out,
+                     const OutputFormat        output_format,
+                     const Mapping<dim,spacedim>       *mapping) const
+{
+  switch (output_format)
+    {
+    case none:
+      return;
+
+    case dx:
+      write_dx (tria, out);
+      return;
+
+    case ucd:
+      write_ucd (tria, out);
+      return;
+
+    case gnuplot:
+      write_gnuplot (tria, out, mapping);
+      return;
+
+    case eps:
+      write_eps (tria, out, mapping);
+      return;
+
+    case xfig:
+      write_xfig (tria, out, mapping);
+      return;
+
+    case msh:
+      write_msh (tria, out);
+      return;
+
+    case svg:
+      write_svg (tria, out);
+      return;
+
+    case mathgl:
+      write_mathgl (tria, out);
+      return;
+
+    case vtk:
+      write_vtk (tria, out);
+      return;
+
+    case vtu:
+      write_vtu (tria, out);
+      return;
+    }
+
+  Assert (false, ExcInternalError());
+}
+
+
+template <int dim, int spacedim>
+void GridOut::write (const Triangulation<dim, spacedim> &tria,
+                     std::ostream             &out,
+                     const Mapping<dim,spacedim>  *mapping) const
+{
+  write(tria, out, default_format, mapping);
+}
+
+
+// explicit instantiations
+#include "grid_out.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/grid_out.inst.in b/source/grid/grid_out.inst.in
new file mode 100644
index 0000000..420ba1d
--- /dev/null
+++ b/source/grid/grid_out.inst.in
@@ -0,0 +1,97 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+#if deal_II_dimension > 1
+    template void GridOut::write_dx
+      (const Triangulation<deal_II_dimension>&,
+       std::ostream&) const;
+    template void GridOut::write_mathgl
+      (const Triangulation<deal_II_dimension>&,
+       std::ostream&) const;
+#endif
+
+    template void GridOut::write_msh
+      (const Triangulation<deal_II_dimension>&,
+       std::ostream&) const;
+
+#if deal_II_dimension != 2
+    template void GridOut::write_xfig
+      (const Triangulation<deal_II_dimension>&,
+       std::ostream&,
+       const Mapping<deal_II_dimension,deal_II_dimension>*) const;
+#endif
+
+    template void GridOut::write_gnuplot
+      (const Triangulation<deal_II_dimension>&,
+       std::ostream&,
+       const Mapping<deal_II_dimension,deal_II_dimension>*) const;
+    template void GridOut::write_ucd<deal_II_dimension>
+      (const Triangulation<deal_II_dimension> &,
+       std::ostream &) const;
+    template void GridOut::write_eps<deal_II_dimension>
+      (const Triangulation<deal_II_dimension> &,
+       std::ostream &,
+       const Mapping<deal_II_dimension,deal_II_dimension> *) const;
+    template void GridOut::write_vtk
+      (const Triangulation<deal_II_dimension>&,
+       std::ostream&) const;       
+    template void GridOut::write_vtu
+      (const Triangulation<deal_II_dimension>&,
+       std::ostream&) const;       
+       
+    template void GridOut::write<deal_II_dimension>
+      (const Triangulation<deal_II_dimension> &,
+       std::ostream &, const OutputFormat,
+       const Mapping<deal_II_dimension,deal_II_dimension> *) const;
+    template void GridOut::write<deal_II_dimension>
+      (const Triangulation<deal_II_dimension> &,
+       std::ostream &, const Mapping<deal_II_dimension,deal_II_dimension> *) const;
+}
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension < deal_II_space_dimension
+    template void GridOut::write_msh
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension>&,
+       std::ostream&) const;
+    template void GridOut::write_ucd
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+       std::ostream &) const;
+   template void GridOut::write_gnuplot
+      (const Triangulation<deal_II_dimension,deal_II_space_dimension>&,
+       std::ostream&,
+       const Mapping<deal_II_dimension,deal_II_space_dimension>*) const;
+   template void GridOut::write_vtk
+      (const Triangulation<deal_II_dimension,deal_II_space_dimension>&,
+       std::ostream&) const;
+   template void GridOut::write_vtu
+      (const Triangulation<deal_II_dimension,deal_II_space_dimension>&,
+       std::ostream&) const;
+
+    template void GridOut::write<deal_II_dimension,deal_II_space_dimension>
+      (const Triangulation<deal_II_dimension,deal_II_space_dimension> &,
+       std::ostream &, const OutputFormat,
+       const Mapping<deal_II_dimension,deal_II_space_dimension> *) const;
+    template void GridOut::write<deal_II_dimension,deal_II_space_dimension>
+      (const Triangulation<deal_II_dimension,deal_II_space_dimension> &,
+       std::ostream &, const Mapping<deal_II_dimension,deal_II_space_dimension> *) const;
+
+#endif
+  }
+
diff --git a/source/grid/grid_refinement.cc b/source/grid/grid_refinement.cc
new file mode 100644
index 0000000..629e6ed
--- /dev/null
+++ b/source/grid/grid_refinement.cc
@@ -0,0 +1,588 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/template_constraints.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector_base.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria.h>
+
+#include <numeric>
+#include <algorithm>
+#include <cmath>
+#include <functional>
+#include <fstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  namespace internal
+  {
+    template <typename number>
+    inline
+    number
+    max_element (const Vector<number> &criteria)
+    {
+      return *std::max_element(criteria.begin(), criteria.end());
+    }
+
+
+    template <typename number>
+    inline
+    number
+    min_element (const Vector<number> &criteria)
+    {
+      return *std::min_element(criteria.begin(), criteria.end());
+    }
+
+    // Silence a (bogus) warning in clang-3.6 about the following four
+    // functions being unused:
+    DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+
+#ifdef DEAL_II_WITH_PETSC
+    inline
+    PetscScalar
+    max_element (const PETScWrappers::Vector &criteria)
+    {
+      // this is horribly slow (since we have
+      // to get the array of values from PETSc
+      // in every iteration), but works
+      PetscScalar m = 0;
+      for (unsigned int i=0; i<criteria.size(); ++i)
+        m = std::max (m, criteria(i));
+      return m;
+    }
+
+
+    inline
+    PetscScalar
+    min_element (const PETScWrappers::Vector &criteria)
+    {
+      // this is horribly slow (since we have
+      // to get the array of values from PETSc
+      // in every iteration), but works
+      PetscScalar m = criteria(0);
+      for (unsigned int i=1; i<criteria.size(); ++i)
+        m = std::min (m, criteria(i));
+      return m;
+    }
+#endif
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+    inline
+    TrilinosScalar
+    max_element (const TrilinosWrappers::Vector &criteria)
+    {
+      TrilinosScalar m = 0;
+      criteria.trilinos_vector().MaxValue(&m);
+      return m;
+    }
+
+
+    inline
+    TrilinosScalar
+    min_element (const TrilinosWrappers::Vector &criteria)
+    {
+      TrilinosScalar m = 0;
+      criteria.trilinos_vector().MinValue(&m);
+      return m;
+    }
+#endif
+
+    DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+  } /* namespace internal */
+
+
+  template <typename VectorType>
+  typename constraint_and_return_value<!IsBlockVector<VectorType>::value,
+           typename VectorType::value_type>::type
+           min_element (const VectorType &criteria)
+  {
+    return internal::min_element (criteria);
+  }
+
+
+  template <typename VectorType>
+  typename constraint_and_return_value<!IsBlockVector<VectorType>::value,
+           typename VectorType::value_type>::type
+           max_element (const VectorType &criteria)
+  {
+    return internal::max_element (criteria);
+  }
+
+
+  template <typename VectorType>
+  typename constraint_and_return_value<IsBlockVector<VectorType>::value,
+           typename VectorType::value_type>::type
+           min_element (const VectorType &criteria)
+  {
+    typename VectorType::value_type t = internal::min_element(criteria.block(0));
+    for (unsigned int b=1; b<criteria.n_blocks(); ++b)
+      t = std::min (t, internal::min_element(criteria.block(b)));
+
+    return t;
+  }
+
+
+  template <typename VectorType>
+  typename constraint_and_return_value<IsBlockVector<VectorType>::value,
+           typename VectorType::value_type>::type
+           max_element (const VectorType &criteria)
+  {
+    typename VectorType::value_type t = internal::max_element(criteria.block(0));
+    for (unsigned int b=1; b<criteria.n_blocks(); ++b)
+      t = std::max (t, internal::max_element(criteria.block(b)));
+
+    return t;
+  }
+
+}
+
+
+namespace
+{
+  /**
+   * Sorts the vector @p ind as an index vector of @p a in increasing order.
+   * This implementation of quicksort seems to be faster than the standard
+   * library version and is needed in @p refine_and_coarsen_optimize.
+   */
+
+  template <class VectorType>
+  void qsort_index (const VectorType          &a,
+                    std::vector<unsigned int> &ind,
+                    int                        l,
+                    int                        r)
+  {
+    int i,j;
+    typename VectorType::value_type v;
+
+    if (r<=l)
+      return;
+
+    v = a(ind[r]);
+    i = l-1;
+    j = r;
+    do
+      {
+        do
+          {
+            ++i;
+          }
+        while ((a(ind[i])>v) && (i<r));
+        do
+          {
+            --j;
+          }
+        while ((a(ind[j])<v) && (j>0));
+
+        if (i<j)
+          std::swap (ind[i], ind[j]);
+        else
+          std::swap (ind[i], ind[r]);
+      }
+    while (i<j);
+    qsort_index(a,ind,l,i-1);
+    qsort_index(a,ind,i+1,r);
+  }
+}
+
+
+
+
+template <int dim, class VectorType, int spacedim>
+void GridRefinement::refine (Triangulation<dim,spacedim> &tria,
+                             const VectorType   &criteria,
+                             const double        threshold,
+                             const unsigned int max_to_mark)
+{
+  Assert (criteria.size() == tria.n_active_cells(),
+          ExcDimensionMismatch(criteria.size(), tria.n_active_cells()));
+  Assert (criteria.is_non_negative (), ExcNegativeCriteria());
+
+  // when all indicators are zero we
+  // do not need to refine but only
+  // to coarsen
+  if (criteria.all_zero())
+    return;
+
+  const unsigned int n_cells = criteria.size();
+
+//TODO: This is undocumented, looks fishy and seems unnecessary
+
+  double new_threshold=threshold;
+  // when threshold==0 find the
+  // smallest value in criteria
+  // greater 0
+  if (new_threshold==0)
+    {
+      new_threshold = criteria(0);
+      for (unsigned int index=1; index<n_cells; ++index)
+        if (criteria(index)>0
+            && (criteria(index)<new_threshold))
+          new_threshold=criteria(index);
+    }
+
+  unsigned int marked=0;
+  for (typename Triangulation<dim,spacedim>::active_cell_iterator cell = tria.begin_active();
+       cell != tria.end(); ++cell)
+    if (std::fabs(criteria(cell->active_cell_index())) >= new_threshold)
+      {
+        if (max_to_mark!=numbers::invalid_unsigned_int && marked>=max_to_mark)
+          break;
+        ++marked;
+        cell->set_refine_flag();
+      }
+}
+
+
+
+template <int dim, class VectorType, int spacedim>
+void GridRefinement::coarsen (Triangulation<dim,spacedim> &tria,
+                              const VectorType            &criteria,
+                              const double                 threshold)
+{
+  Assert (criteria.size() == tria.n_active_cells(),
+          ExcDimensionMismatch(criteria.size(), tria.n_active_cells()));
+  Assert (criteria.is_non_negative (), ExcNegativeCriteria());
+
+  for (typename Triangulation<dim,spacedim>::active_cell_iterator cell = tria.begin_active();
+       cell != tria.end(); ++cell)
+    if (std::fabs(criteria(cell->active_cell_index())) <= threshold)
+      if (!cell->refine_flag_set())
+        cell->set_coarsen_flag();
+}
+
+template <int dim>
+std::pair<double, double>
+GridRefinement::adjust_refine_and_coarsen_number_fraction (const unsigned int  current_n_cells,
+                                                           const unsigned int  max_n_cells,
+                                                           const double        top_fraction,
+                                                           const double        bottom_fraction)
+{
+  Assert (top_fraction>=0, ExcInvalidParameterValue());
+  Assert (top_fraction<=1, ExcInvalidParameterValue());
+  Assert (bottom_fraction>=0, ExcInvalidParameterValue());
+  Assert (bottom_fraction<=1, ExcInvalidParameterValue());
+  Assert (top_fraction+bottom_fraction <= 1, ExcInvalidParameterValue());
+
+  double refine_cells  = current_n_cells * top_fraction;
+  double coarsen_cells = current_n_cells * bottom_fraction;
+
+  const double cell_increase_on_refine  = GeometryInfo<dim>::max_children_per_cell - 1.0;
+  const double cell_decrease_on_coarsen = 1.0 - 1.0/GeometryInfo<dim>::max_children_per_cell;
+
+  std::pair<double, double> adjusted_fractions(top_fraction, bottom_fraction);
+  // first we have to see whether we
+  // currently already exceed the target
+  // number of cells
+  if (current_n_cells >= max_n_cells)
+    {
+      // if yes, then we need to stop
+      // refining cells and instead try to
+      // only coarsen as many as it would
+      // take to get to the target
+
+      // as we have no information on cells
+      // being refined isotropically or
+      // anisotropically, assume isotropic
+      // refinement here, though that may
+      // result in a worse approximation
+      adjusted_fractions.first  = 0;
+      coarsen_cells          = (current_n_cells - max_n_cells) /
+                               cell_decrease_on_coarsen;
+      adjusted_fractions.second = std::min(coarsen_cells/current_n_cells, 1.0);
+    }
+  // otherwise, see if we would exceed the
+  // maximum desired number of cells with the
+  // number of cells that are likely going to
+  // result from refinement. here, each cell
+  // to be refined is replaced by
+  // C=GeometryInfo<dim>::max_children_per_cell
+  // new cells, i.e. there will be C-1 more
+  // cells than before. similarly, C cells
+  // will be replaced by 1
+
+  // again, this is true for isotropically
+  // refined cells. we take this as an
+  // approximation of a mixed refinement.
+  else if (static_cast<unsigned int>
+           (current_n_cells
+            + refine_cells * cell_increase_on_refine
+            - coarsen_cells * cell_decrease_on_coarsen)
+           >
+           max_n_cells)
+    {
+      // we have to adjust the
+      // fractions. assume we want
+      // alpha*refine_fraction and
+      // alpha*coarsen_fraction as new
+      // fractions and the resulting number
+      // of cells to be equal to
+      // max_n_cells. this leads to the
+      // following equation for alpha
+      const double alpha
+        =
+          1. *
+          (max_n_cells - current_n_cells)
+          /
+          (refine_cells * cell_increase_on_refine
+           - coarsen_cells * cell_decrease_on_coarsen);
+
+      adjusted_fractions.first  = alpha * top_fraction;
+      adjusted_fractions.second = alpha * bottom_fraction;
+    }
+  return (adjusted_fractions);
+}
+
+template <int dim, class VectorType, int spacedim>
+void
+GridRefinement::refine_and_coarsen_fixed_number (Triangulation<dim,spacedim> &tria,
+                                                 const VectorType            &criteria,
+                                                 const double                 top_fraction,
+                                                 const double                 bottom_fraction,
+                                                 const unsigned int           max_n_cells)
+{
+  // correct number of cells is
+  // checked in @p{refine}
+  Assert ((top_fraction>=0) && (top_fraction<=1), ExcInvalidParameterValue());
+  Assert ((bottom_fraction>=0) && (bottom_fraction<=1), ExcInvalidParameterValue());
+  Assert (top_fraction+bottom_fraction <= 1, ExcInvalidParameterValue());
+  Assert (criteria.is_non_negative (), ExcNegativeCriteria());
+
+  const std::pair<double, double> adjusted_fractions =
+    adjust_refine_and_coarsen_number_fraction<dim> (criteria.size(),
+                                                    max_n_cells,
+                                                    top_fraction,
+                                                    bottom_fraction);
+
+  const int refine_cells  = static_cast<int>(adjusted_fractions.first  * criteria.size());
+  const int coarsen_cells = static_cast<int>(adjusted_fractions.second * criteria.size());
+
+  if (refine_cells || coarsen_cells)
+    {
+      Vector<typename VectorType::value_type> tmp (criteria);
+      if (refine_cells)
+        {
+          std::nth_element (tmp.begin(), tmp.begin()+refine_cells,
+                            tmp.end(),
+                            std::greater<double>());
+          refine (tria, criteria, *(tmp.begin() + refine_cells));
+        }
+
+      if (coarsen_cells)
+        {
+          std::nth_element (tmp.begin(), tmp.begin()+tmp.size()-coarsen_cells,
+                            tmp.end(),
+                            std::greater<double>());
+          coarsen (tria, criteria,
+                   *(tmp.begin() + tmp.size() - coarsen_cells));
+        }
+    }
+}
+
+
+
+template <int dim, typename VectorType, int spacedim>
+void
+GridRefinement::refine_and_coarsen_fixed_fraction (Triangulation<dim,spacedim> &tria,
+                                                   const VectorType   &criteria,
+                                                   const double        top_fraction,
+                                                   const double        bottom_fraction,
+                                                   const unsigned int  max_n_cells)
+{
+  // correct number of cells is
+  // checked in @p{refine}
+  Assert ((top_fraction>=0) && (top_fraction<=1), ExcInvalidParameterValue());
+  Assert ((bottom_fraction>=0) && (bottom_fraction<=1), ExcInvalidParameterValue());
+  Assert (top_fraction+bottom_fraction <= 1, ExcInvalidParameterValue());
+  Assert (criteria.is_non_negative (), ExcNegativeCriteria());
+
+  // let tmp be the cellwise square of the
+  // error, which is what we have to sum
+  // up and compare with
+  // @p{fraction_of_error*total_error}.
+  Vector<typename VectorType::value_type> tmp;
+  tmp = criteria;
+  const double total_error = tmp.l1_norm();
+
+  // sort the largest criteria to the
+  // beginning of the vector
+  std::sort (tmp.begin(), tmp.end(), std::greater<double>());
+
+  // compute thresholds
+  typename Vector<typename VectorType::value_type>::const_iterator
+  pp=tmp.begin();
+  for (double sum=0;
+       (sum<top_fraction*total_error) && (pp!=(tmp.end()-1));
+       ++pp)
+    sum += *pp;
+  double top_threshold = ( pp != tmp.begin () ?
+                           (*pp+*(pp-1))/2 :
+                           *pp );
+  typename Vector<typename VectorType::value_type>::const_iterator
+  qq=(tmp.end()-1);
+  for (double sum=0;
+       (sum<bottom_fraction*total_error) && (qq!=tmp.begin());
+       --qq)
+    sum += *qq;
+  double bottom_threshold = ( qq != (tmp.end()-1) ?
+                              (*qq + *(qq+1))/2 :
+                              0);
+
+  // we now have an idea how many cells we
+  // are going to refine and coarsen. we use
+  // this information to see whether we are
+  // over the limit and if so use a function
+  // that knows how to deal with this
+  // situation
+
+  // note, that at this point, we have no
+  // information about anisotropically refined
+  // cells, thus use the situation of purely
+  // isotropic refinement as guess for a mixed
+  // refinemnt as well.
+  {
+    const unsigned int refine_cells  = pp - tmp.begin(),
+                       coarsen_cells = tmp.end() - qq;
+
+    if (static_cast<unsigned int>
+        (tria.n_active_cells()
+         + refine_cells * (GeometryInfo<dim>::max_children_per_cell - 1)
+         - (coarsen_cells *
+            (GeometryInfo<dim>::max_children_per_cell - 1) /
+            GeometryInfo<dim>::max_children_per_cell))
+        >
+        max_n_cells)
+      {
+        refine_and_coarsen_fixed_number (tria,
+                                         criteria,
+                                         1.*refine_cells/criteria.size(),
+                                         1.*coarsen_cells/criteria.size(),
+                                         max_n_cells);
+        return;
+      }
+  }
+
+
+  // in some rare cases it may happen that
+  // both thresholds are the same (e.g. if
+  // there are many cells with the same
+  // error indicator). That would mean that
+  // all cells will be flagged for
+  // refinement or coarsening, but some will
+  // be flagged for both, namely those for
+  // which the indicator equals the
+  // thresholds. This is forbidden, however.
+  //
+  // In some rare cases with very few cells
+  // we also could get integer round off
+  // errors and get problems with
+  // the top and bottom fractions.
+  //
+  // In these case we arbitrarily reduce the
+  // bottom threshold by one permille below
+  // the top threshold
+  //
+  // Finally, in some cases
+  // (especially involving symmetric
+  // solutions) there are many cells
+  // with the same error indicator
+  // values. if there are many with
+  // indicator equal to the top
+  // threshold, no refinement will
+  // take place below; to avoid this
+  // case, we also lower the top
+  // threshold if it equals the
+  // largest indicator and the
+  // top_fraction!=1
+  if ((top_threshold == max_element(criteria)) &&
+      (top_fraction != 1))
+    top_threshold *= 0.999;
+
+  if (bottom_threshold>=top_threshold)
+    bottom_threshold = 0.999*top_threshold;
+
+  // actually flag cells
+  if (top_threshold < max_element(criteria))
+    refine (tria, criteria, top_threshold, pp - tmp.begin());
+
+  if (bottom_threshold > min_element(criteria))
+    coarsen (tria, criteria, bottom_threshold);
+}
+
+
+
+template <int dim, typename VectorType, int spacedim>
+void
+GridRefinement::refine_and_coarsen_optimize (Triangulation<dim,spacedim> &tria,
+                                             const VectorType            &criteria,
+                                             const unsigned int           order)
+{
+  Assert (criteria.size() == tria.n_active_cells(),
+          ExcDimensionMismatch(criteria.size(), tria.n_active_cells()));
+  Assert (criteria.is_non_negative (), ExcNegativeCriteria());
+
+  // get an increasing order on
+  // the error indicator
+  std::vector<unsigned int> tmp(criteria.size());
+  for (unsigned int i=0; i<criteria.size(); ++i)
+    tmp[i] = i;
+
+  qsort_index (criteria, tmp, 0, criteria.size()-1);
+
+  double expected_error_reduction = 0;
+  const double original_error     = criteria.l1_norm();
+
+  const unsigned int N = criteria.size();
+
+  // minimize the cost functional discussed in the documentation
+  double min_cost = std::numeric_limits<double>::max();
+  unsigned int min_arg = 0;
+
+  for (unsigned int M = 0; M<criteria.size(); ++M)
+    {
+      expected_error_reduction += (1-std::pow(2.,-1.*order)) * criteria(tmp[M]);
+
+      const double cost = std::pow(((std::pow(2.,dim)-1)*(1+M)+N),
+                                   (double)order/dim) *
+                          (original_error-expected_error_reduction);
+      if (cost <= min_cost)
+        {
+          min_cost = cost;
+          min_arg = M;
+        }
+    }
+
+  refine (tria, criteria, criteria(tmp[min_arg]));
+}
+
+
+// explicit instantiations
+#include "grid_refinement.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/grid_refinement.inst.in b/source/grid/grid_refinement.inst.in
new file mode 100644
index 0000000..c442c29
--- /dev/null
+++ b/source/grid/grid_refinement.inst.in
@@ -0,0 +1,125 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+
+for (S : REAL_SCALARS; deal_II_dimension : DIMENSIONS)
+{
+  template
+  void
+  GridRefinement::
+  refine<deal_II_dimension,dealii::Vector<S>,deal_II_dimension>
+         (Triangulation<deal_II_dimension> &,
+          const dealii::Vector<S> &,
+          const double,
+          const unsigned int);
+
+  template
+  void
+  GridRefinement::
+  coarsen<deal_II_dimension,dealii::Vector<S>,deal_II_dimension>
+         (Triangulation<deal_II_dimension> &,
+          const dealii::Vector<S> &,
+          const double);
+
+  template
+  void
+  GridRefinement::
+  refine_and_coarsen_fixed_number<deal_II_dimension,dealii::Vector<S>,deal_II_dimension>
+         (Triangulation<deal_II_dimension> &,
+          const dealii::Vector<S> &,
+          const double,
+          const double,
+          const unsigned int);
+
+  template
+  void
+  GridRefinement::
+  refine_and_coarsen_fixed_fraction<deal_II_dimension,dealii::Vector<S>,deal_II_dimension>
+         (Triangulation<deal_II_dimension> &,
+          const dealii::Vector<S> &,
+          const double,
+          const double,
+          const unsigned int);
+
+  template
+  void
+  GridRefinement::
+  refine_and_coarsen_optimize<deal_II_dimension,dealii::Vector<S>,deal_II_dimension>
+  (Triangulation<deal_II_dimension> &,
+   const dealii::Vector<S> &,
+   const unsigned int);
+
+#if deal_II_dimension < 3
+  template
+  void
+  GridRefinement::
+  refine<deal_II_dimension,dealii::Vector<S>,deal_II_dimension+1>
+         (Triangulation<deal_II_dimension,deal_II_dimension+1> &,
+          const dealii::Vector<S> &,
+          const double,
+          const unsigned int);
+
+  template
+  void
+  GridRefinement::
+  coarsen<deal_II_dimension,dealii::Vector<S>,deal_II_dimension+1>
+         (Triangulation<deal_II_dimension,deal_II_dimension+1> &,
+          const dealii::Vector<S> &,
+          const double);
+
+  template
+  void
+  GridRefinement::
+  refine_and_coarsen_fixed_number<deal_II_dimension,dealii::Vector<S>,deal_II_dimension+1>
+         (Triangulation<deal_II_dimension,deal_II_dimension+1> &,
+          const dealii::Vector<S> &,
+          const double,
+          const double,
+          const unsigned int);
+
+  template
+  void
+  GridRefinement::
+  refine_and_coarsen_fixed_fraction<deal_II_dimension,dealii::Vector<S>,deal_II_dimension+1>
+         (Triangulation<deal_II_dimension,deal_II_dimension+1> &,
+          const dealii::Vector<S> &,
+          const double,
+          const double,
+          const unsigned int);
+
+  template
+  void
+  GridRefinement::
+  refine_and_coarsen_optimize<deal_II_dimension,dealii::Vector<S>,deal_II_dimension+1>
+  (Triangulation<deal_II_dimension,deal_II_dimension+1> &,
+   const dealii::Vector<S> &,
+   const unsigned int);
+#endif
+}
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  template
+  std::pair<double, double>
+  GridRefinement::
+  adjust_refine_and_coarsen_number_fraction <deal_II_dimension>
+                                            (const unsigned int,
+                                             const unsigned int,
+                                             const double,
+                                             const double);
+}
+
diff --git a/source/grid/grid_reordering.cc b/source/grid/grid_reordering.cc
new file mode 100644
index 0000000..37fd5f1
--- /dev/null
+++ b/source/grid/grid_reordering.cc
@@ -0,0 +1,1718 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/grid/grid_reordering.h>
+#include <deal.II/grid/grid_reordering_internal.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/std_cxx11/bind.h>
+
+#include <algorithm>
+#include <set>
+#include <iostream>
+#include <fstream>
+#include <functional>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<>
+void
+GridReordering<1>::reorder_cells (std::vector<CellData<1> > &,
+                                  const bool)
+{
+  // there should not be much to do
+  // in 1d...
+}
+
+
+template<>
+void
+GridReordering<1>::invert_all_cells_of_negative_grid(const std::vector<Point<1> > &,
+                                                     std::vector<CellData<1> > &)
+{
+  // nothing to be done in 1d
+}
+
+template<>
+void
+GridReordering<1,2>::reorder_cells (std::vector<CellData<1> > &,
+                                    const bool)
+{
+  // there should not be much to do
+  // in 1d...
+}
+
+
+template<>
+void
+GridReordering<1,2>::invert_all_cells_of_negative_grid(const std::vector<Point<2> > &,
+                                                       std::vector<CellData<1> > &)
+{
+  // nothing to be done in 1d
+}
+
+
+template<>
+void
+GridReordering<1,3>::reorder_cells (std::vector<CellData<1> > &,
+                                    const bool)
+{
+  // there should not be much to do
+  // in 1d...
+}
+
+
+template<>
+void
+GridReordering<1,3>::invert_all_cells_of_negative_grid(const std::vector<Point<3> > &,
+                                                       std::vector<CellData<1> > &)
+{
+  // nothing to be done in 1d
+}
+
+
+namespace internal
+{
+  namespace GridReordering2d
+  {
+// -- Definition of connectivity information --
+    const int ConnectGlobals::EdgeToNode[4][2] =
+    { {0,1},{1,2},{2,3},{3,0} };
+
+    const int ConnectGlobals::NodeToEdge[4][2] =
+    { {3,0},{0,1},{1,2},{2,3} };
+
+    const int ConnectGlobals::DefaultOrientation[4][2] =
+    {{0,1},{1,2},{3,2},{0,3}};
+
+
+    /**
+     * Simple data structure denoting
+     * an edge, i.e. the ordered pair
+     * of its vertices. This is only
+     * used in the is_consistent
+     * function.
+     */
+    struct Edge
+    {
+      Edge (const unsigned int v0,
+            const unsigned int v1)
+        :
+        v0(v0), v1(v1)
+      {}
+
+      const unsigned int v0, v1;
+      bool operator < (const Edge &e) const
+      {
+        return ((v0 < e.v0) || ((v0 == e.v0) && (v1 < e.v1)));
+      }
+    };
+
+
+    bool
+    is_consistent  (const std::vector<CellData<2> > &cells)
+    {
+      std::set<Edge> edges;
+
+      std::vector<CellData<2> >::const_iterator c = cells.begin();
+      for (; c != cells.end(); ++c)
+        {
+          // construct the four edges
+          // in reverse order
+          const Edge reverse_edges[4] = { Edge (c->vertices[1],
+                                                c->vertices[0]),
+                                          Edge (c->vertices[2],
+                                                c->vertices[1]),
+                                          Edge (c->vertices[2],
+                                                c->vertices[3]),
+                                          Edge (c->vertices[3],
+                                                c->vertices[0])
+                                        };
+          // for each of them, check
+          // whether they are already
+          // in the set
+          if ((edges.find (reverse_edges[0]) != edges.end()) ||
+              (edges.find (reverse_edges[1]) != edges.end()) ||
+              (edges.find (reverse_edges[2]) != edges.end()) ||
+              (edges.find (reverse_edges[3]) != edges.end()))
+            return false;
+          // ok, not. insert them
+          // in the order in which
+          // we want them
+          // (std::set eliminates
+          // duplicated by itself)
+          for (unsigned int i = 0; i<4; ++i)
+            {
+              const Edge e(reverse_edges[i].v1, reverse_edges[i].v0);
+              edges.insert (e);
+            }
+          // then go on with next
+          // cell
+        }
+      // no conflicts found, so
+      // return true
+      return true;
+    }
+
+
+
+    struct MSide::SideRectify : public std::unary_function<MSide,void>
+    {
+      void operator() (MSide &s) const
+      {
+        if (s.v0>s.v1)
+          std::swap (s.v0, s.v1);
+      }
+    };
+
+
+    struct MSide::SideSortLess : public std::binary_function<MSide,MSide,bool>
+    {
+      bool operator()(const MSide &s1, const MSide &s2) const
+      {
+        int s1vmin,s1vmax;
+        int s2vmin,s2vmax;
+        if (s1.v0<s1.v1)
+          {
+            s1vmin = s1.v0;
+            s1vmax = s1.v1;
+          }
+        else
+          {
+            s1vmin = s1.v1;
+            s1vmax = s1.v0;
+          }
+        if (s2.v0<s2.v1)
+          {
+            s2vmin = s2.v0;
+            s2vmax = s2.v1;
+          }
+        else
+          {
+            s2vmin = s2.v1;
+            s2vmax = s2.v0;
+          }
+
+        if (s1vmin<s2vmin)
+          return true;
+        if (s1vmin>s2vmin)
+          return false;
+        return s1vmax<s2vmax;
+      }
+    };
+
+
+    /**
+     * Returns an MSide corresponding to the
+     * specified side of a deal.II CellData<2> object.
+     */
+    MSide quadside(const CellData<2> &q, unsigned int i)
+    {
+      Assert (i<4, ExcInternalError());
+      return MSide(q.vertices[ConnectGlobals::EdgeToNode[i][0]],
+                   q.vertices[ConnectGlobals::EdgeToNode[i][1]]);
+    }
+
+
+    /**
+     * Wrapper class for the quadside() function
+     */
+    struct QuadSide: public std::binary_function<CellData<2>,int,MSide>
+    {
+      MSide operator()(const CellData<2> &q, int i) const
+      {
+        return quadside(q,i);
+      }
+    };
+
+
+
+    MQuad::MQuad (const unsigned int v0,
+                  const unsigned int v1,
+                  const unsigned int v2,
+                  const unsigned int v3,
+                  const unsigned int s0,
+                  const unsigned int s1,
+                  const unsigned int s2,
+                  const unsigned int s3,
+                  const CellData<2>  &cd)
+      :
+      original_cell_data (cd)
+    {
+      v[0] = v0;
+      v[1] = v1;
+      v[2] = v2;
+      v[3] = v3;
+      side[0] = s0;
+      side[1] = s1;
+      side[2] = s2;
+      side[3] = s3;
+    }
+
+
+    MSide::MSide (const unsigned int initv0,
+                  const unsigned int initv1)
+      :
+      v0(initv0), v1(initv1),
+      Q0(numbers::invalid_unsigned_int),
+      Q1(numbers::invalid_unsigned_int),
+      lsn0(numbers::invalid_unsigned_int),
+      lsn1(numbers::invalid_unsigned_int),
+      Oriented(false)
+    {}
+
+
+
+    bool
+    MSide::operator == (const MSide &s2) const
+    {
+      if ((v0 == s2.v0)&&(v1 == s2.v1))
+        {
+          return true;
+        }
+      if ((v0 == s2.v1)&&(v1 == s2.v0))
+        {
+          return true;
+        }
+      return false;
+    }
+
+
+    bool
+    MSide::operator != (const MSide &s2) const
+    {
+      return !(*this == s2);
+    }
+
+
+    namespace
+    {
+      /**
+       * Create an MQuad object from the
+       * indices of the four vertices by
+       * looking up the indices of the four
+       * sides.
+       */
+      MQuad build_quad_from_vertices(const CellData<2> &q,
+                                     const std::vector<MSide> &elist)
+      {
+        // compute the indices of the four
+        // sides that bound this quad. note
+        // that the incoming list elist is
+        // sorted with regard to the
+        // MSide::SideSortLess criterion
+        unsigned int edges[4] = { numbers::invalid_unsigned_int,
+                                  numbers::invalid_unsigned_int,
+                                  numbers::invalid_unsigned_int,
+                                  numbers::invalid_unsigned_int
+                                };
+
+        for (unsigned int i=0; i<4; ++i)
+          edges[i] = (Utilities::lower_bound (elist.begin(),
+                                              elist.end(),
+                                              quadside(q,i),
+                                              MSide::SideSortLess())
+                      -
+                      elist.begin());
+
+        return MQuad(q.vertices[0],q.vertices[1], q.vertices[2], q.vertices[3],
+                     edges[0], edges[1], edges[2], edges[3],
+                     q);
+      }
+    }
+
+
+
+    void
+    GridReordering::reorient(std::vector<CellData<2> > &quads)
+    {
+      build_graph(quads);
+      orient();
+      get_quads(quads);
+    }
+
+
+    void
+    GridReordering::build_graph (const std::vector<CellData<2> > &inquads)
+    {
+      //Reserve some space
+      sides.reserve(4*inquads.size());
+
+      //Insert all the sides into the side vector
+      for (int i = 0; i<4; ++i)
+        {
+          std::transform(inquads.begin(),inquads.end(),
+                         std::back_inserter(sides), std::bind2nd(QuadSide(),i));
+        }
+
+      //Change each edge so that v0<v1
+      std::for_each(sides.begin(),sides.end(),
+                    MSide::SideRectify() );
+
+      //Sort them by Sidevertices.
+      std::sort(sides.begin(),sides.end(),
+                MSide::SideSortLess());
+
+      //Remove duplicates
+      sides.erase(std::unique(sides.begin(),sides.end()),
+                  sides.end());
+
+      // Swap trick to shrink the
+      // side vector
+      std::vector<MSide>(sides).swap(sides);
+
+      // Now assign the correct sides to
+      // each quads
+      mquads.reserve(inquads.size());
+      std::transform(inquads.begin(),
+                     inquads.end(),
+                     std::back_inserter(mquads),
+                     std_cxx11::bind(build_quad_from_vertices,
+                                     std_cxx11::_1,
+                                     std_cxx11::cref(sides)) );
+
+      // Assign the quads to their sides also.
+      int qctr = 0;
+      for (std::vector<MQuad>::iterator it = mquads.begin(); it != mquads.end(); ++it)
+        {
+          for (unsigned int i = 0; i<4; ++i)
+            {
+              MSide &ss = sides[(*it).side[i]];
+              if (ss.Q0 == numbers::invalid_unsigned_int)
+                {
+                  ss.Q0 = qctr;
+                  ss.lsn0 = i;
+                }
+              else if (ss.Q1 == numbers::invalid_unsigned_int)
+                {
+                  ss.Q1 = qctr;
+                  ss.lsn1 = i;
+                }
+              else
+                AssertThrow (false, ExcInternalError());
+            }
+          qctr++;
+        }
+    }
+
+
+    void GridReordering::orient()
+    {
+      // do what the comment in the
+      // class declaration says
+      unsigned int qnum = 0;
+      while (get_unoriented_quad(qnum))
+        {
+          unsigned int lsn = 0;
+          while (get_unoriented_side(qnum,lsn))
+            {
+              orient_side(qnum,lsn);
+              unsigned int qqnum = qnum;
+              while (side_hop(qqnum,lsn))
+                {
+                  // switch this face
+                  lsn = (lsn+2)%4;
+                  if (!is_oriented_side(qqnum,lsn))
+                    orient_side(qqnum,lsn);
+                  else
+                    //We've found a
+                    //cycle.. and
+                    //oriented all
+                    //quads in it.
+                    break;
+                }
+            }
+        }
+    }
+
+
+    void
+    GridReordering::orient_side(const unsigned int quadnum,
+                                const unsigned int localsidenum)
+    {
+      MQuad &quad = mquads[quadnum];
+      int op_side_l = (localsidenum+2)%4;
+      MSide &side = sides[mquads[quadnum].side[localsidenum]];
+      const MSide &op_side = sides[mquads[quadnum].side[op_side_l]];
+
+      //is the opposite side oriented?
+      if (op_side.Oriented)
+        {
+          //YES - Make the orientations match
+          //Is op side in default orientation?
+          if (op_side.v0 == quad.v[ConnectGlobals::DefaultOrientation[op_side_l][0]])
+            {
+              //YES
+              side.v0 = quad.v[ConnectGlobals::DefaultOrientation[localsidenum][0]];
+              side.v1 = quad.v[ConnectGlobals::DefaultOrientation[localsidenum][1]];
+            }
+          else
+            {
+              //NO, its reversed
+              side.v0 = quad.v[ConnectGlobals::DefaultOrientation[localsidenum][1]];
+              side.v1 = quad.v[ConnectGlobals::DefaultOrientation[localsidenum][0]];
+            }
+        }
+      else
+        {
+          //NO
+          //Just use the default orientation
+          side.v0 = quad.v[ConnectGlobals::DefaultOrientation[localsidenum][0]];
+          side.v1 = quad.v[ConnectGlobals::DefaultOrientation[localsidenum][1]];
+        }
+      side.Oriented = true;
+    }
+
+
+
+    bool
+    GridReordering::is_fully_oriented_quad(const unsigned int quadnum) const
+    {
+      return (
+               (sides[mquads[quadnum].side[0]].Oriented)&&
+               (sides[mquads[quadnum].side[1]].Oriented)&&
+               (sides[mquads[quadnum].side[2]].Oriented)&&
+               (sides[mquads[quadnum].side[3]].Oriented)
+             );
+    }
+
+
+
+    bool
+    GridReordering::is_oriented_side(const unsigned int quadnum,
+                                     const unsigned int lsn) const
+    {
+      return (sides[mquads[quadnum].side[lsn]].Oriented);
+    }
+
+
+
+
+    bool
+    GridReordering::get_unoriented_quad(unsigned int &UnOrQLoc) const
+    {
+      while ( (UnOrQLoc<mquads.size()) &&
+              is_fully_oriented_quad(UnOrQLoc) )
+        UnOrQLoc++;
+      return (UnOrQLoc != mquads.size());
+    }
+
+
+
+    bool
+    GridReordering::get_unoriented_side (const unsigned int quadnum,
+                                         unsigned int &lsn) const
+    {
+      const MQuad &mq = mquads[quadnum];
+      if (!sides[mq.side[0]].Oriented)
+        {
+          lsn = 0;
+          return true;
+        }
+      if (!sides[mq.side[1]].Oriented)
+        {
+          lsn = 1;
+          return true;
+        }
+      if (!sides[mq.side[2]].Oriented)
+        {
+          lsn = 2;
+          return true;
+        }
+      if (!sides[mq.side[3]].Oriented)
+        {
+          lsn = 3;
+          return true;
+        }
+      return false;
+    }
+
+
+    bool
+    GridReordering::side_hop (unsigned int &qnum, unsigned int &lsn) const
+    {
+      const MQuad &mq = mquads[qnum];
+      const MSide &s = sides[mq.side[lsn]];
+      unsigned int opquad = 0;
+      if (s.Q0 == qnum)
+        {
+          opquad = s.Q1;
+          lsn = s.lsn1;
+        }
+      else
+        {
+          opquad = s.Q0;
+          lsn = s.lsn0;
+        }
+
+      if (opquad != numbers::invalid_unsigned_int)
+        {
+          qnum = opquad;
+          return true;
+        }
+
+      return false;
+    }
+
+
+    void
+    GridReordering::get_quads (std::vector<CellData<2> > &outquads) const
+    {
+      outquads.clear();
+      outquads.reserve(mquads.size());
+      for (unsigned int qn = 0; qn<mquads.size(); ++qn)
+        {
+          // initialize CellData object with
+          // previous contents, and the
+          // overwrite all the fields that
+          // might have changed in the
+          // process of rotating things
+          CellData<2> q = mquads[qn].original_cell_data;
+
+          // Are the sides oriented?
+          Assert (is_fully_oriented_quad(qn), ExcInternalError());
+          bool s[4]; //whether side 1 ,2, 3, 4 are in the default orientation
+          for (int sn = 0; sn<4; sn++)
+            {
+              s[sn] = is_side_default_oriented(qn,sn);
+            }
+          // Are they oriented in the "deal way"?
+          Assert (s[0] == s[2], ExcInternalError());
+          Assert (s[1] == s[3], ExcInternalError());
+          // How much we rotate them by.
+          int rotn = 2*(s[0]?1:0)+ ((s[0]^s[1])?1:0);
+
+          for (int i = 0; i<4; ++i)
+            {
+              q.vertices[(i+rotn)%4] = mquads[qn].v[i];
+            }
+          outquads.push_back(q);
+        }
+
+    }
+
+    bool
+    GridReordering::is_side_default_oriented (const unsigned int qnum,
+                                              const unsigned int lsn) const
+    {
+      return (sides[mquads[qnum].side[lsn]].v0 ==
+              mquads[qnum].v[ConnectGlobals::DefaultOrientation[lsn][0]]);
+    }
+  } // namespace GridReordering2d
+} // namespace internal
+
+
+// anonymous namespace for internal helper functions
+namespace
+{
+  /**
+   * A set of functions that
+   * reorder the data from the
+   * "current" to the "classic"
+   * format of vertex numbering of
+   * cells and faces. These functions
+   * do the reordering of their
+   * arguments in-place.
+   */
+  void
+  reorder_new_to_old_style (std::vector<CellData<2> > &cells)
+  {
+    for (unsigned int cell=0; cell<cells.size(); ++cell)
+      std::swap(cells[cell].vertices[2], cells[cell].vertices[3]);
+  }
+
+
+  void
+  reorder_new_to_old_style (std::vector<CellData<3> > &cells)
+  {
+    unsigned int tmp[GeometryInfo<3>::vertices_per_cell];
+    for (unsigned int cell=0; cell<cells.size(); ++cell)
+      {
+        for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+          tmp[i] = cells[cell].vertices[i];
+        for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+          cells[cell].vertices[i] = tmp[GeometryInfo<3>::ucd_to_deal[i]];
+      }
+  }
+
+
+  /**
+   * And now also in the opposite direction.
+   */
+  void
+  reorder_old_to_new_style (std::vector<CellData<2> > &cells)
+  {
+    // just invert the permutation:
+    reorder_new_to_old_style(cells);
+  }
+
+
+  void
+  reorder_old_to_new_style (std::vector<CellData<3> > &cells)
+  {
+    // undo the ordering above
+    unsigned int tmp[GeometryInfo<3>::vertices_per_cell];
+    for (unsigned int cell=0; cell<cells.size(); ++cell)
+      {
+        for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+          tmp[i] = cells[cell].vertices[i];
+        for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+          cells[cell].vertices[GeometryInfo<3>::ucd_to_deal[i]] = tmp[i];
+      }
+  }
+}
+
+
+template<>
+void
+GridReordering<2>::reorder_cells (std::vector<CellData<2> > &cells,
+                                  const bool use_new_style_ordering)
+{
+  // if necessary, convert to old-style format
+  if (use_new_style_ordering)
+    reorder_new_to_old_style(cells);
+
+  // check if grids are already
+  // consistent. if so, do
+  // nothing. if not, then do the
+  // reordering
+  if (!internal::GridReordering2d::is_consistent (cells))
+    internal::GridReordering2d::GridReordering().reorient(cells);
+
+
+  // and convert back if necessary
+  if (use_new_style_ordering)
+    reorder_old_to_new_style(cells);
+}
+
+
+template<>
+void
+GridReordering<2,3>::reorder_cells (std::vector<CellData<2> > &cells,
+                                    const bool use_new_style_ordering)
+{
+  // if necessary, convert to old-style format
+  if (use_new_style_ordering)
+    reorder_new_to_old_style(cells);
+
+  GridReordering<2>::reorder_cells(cells);
+
+
+  // and convert back if necessary
+  if (use_new_style_ordering)
+    reorder_old_to_new_style(cells);
+}
+
+
+
+template<>
+void
+GridReordering<2>::invert_all_cells_of_negative_grid(const std::vector<Point<2> > &all_vertices,
+                                                     std::vector<CellData<2> >    &cells)
+{
+  unsigned int vertices_lex[GeometryInfo<2>::vertices_per_cell];
+  unsigned int n_negative_cells=0;
+  for (unsigned int cell_no=0; cell_no<cells.size(); ++cell_no)
+    {
+      // GridTools::cell_measure
+      // requires the vertices to be
+      // in lexicographic ordering
+      for (unsigned int i=0; i<GeometryInfo<2>::vertices_per_cell; ++i)
+        vertices_lex[GeometryInfo<2>::ucd_to_deal[i]]=cells[cell_no].vertices[i];
+      if (GridTools::cell_measure<2>(all_vertices, vertices_lex) < 0)
+        {
+          ++n_negative_cells;
+          std::swap(cells[cell_no].vertices[1], cells[cell_no].vertices[3]);
+
+          // check whether the
+          // resulting cell is now ok.
+          // if not, then the grid is
+          // seriously broken and
+          // should be sticked into the
+          // bin
+          for (unsigned int i=0; i<GeometryInfo<2>::vertices_per_cell; ++i)
+            vertices_lex[GeometryInfo<2>::ucd_to_deal[i]]=cells[cell_no].vertices[i];
+          AssertThrow(GridTools::cell_measure<2>(all_vertices, vertices_lex) > 0,
+                      ExcInternalError());
+        }
+    }
+
+  // We assume that all cells of a grid have
+  // either positive or negative volumes but
+  // not both mixed. Although above reordering
+  // might work also on single cells, grids
+  // with both kind of cells are very likely to
+  // be broken. Check for this here.
+  AssertThrow(n_negative_cells==0 || n_negative_cells==cells.size(),
+              ExcMessage(std::string("This class assumes that either all cells have positive "
+                                     "volume, or that all cells have been specified in an "
+                                     "inverted vertex order so that their volume is negative. "
+                                     "(In the latter case, this class automatically inverts "
+                                     "every cell.) However, the mesh you have specified "
+                                     "appears to have both cells with positive and cells with "
+                                     "negative volume. You need to check your mesh which "
+                                     "cells these are and how they got there.\n"
+                                     "As a hint, of the total ")
+                         + Utilities::to_string (cells.size())
+                         + " cells in the mesh, "
+                         + Utilities::to_string (n_negative_cells)
+                         + " appear to have a negative volume."));
+}
+
+
+
+template<>
+void
+GridReordering<2,3>::invert_all_cells_of_negative_grid(const std::vector<Point<3> > &,
+                                                       std::vector<CellData<2> > &)
+{
+  Assert(false, ExcNotImplemented());
+}
+
+
+
+namespace internal
+{
+  namespace GridReordering3d
+  {
+    DeclException1 (ExcGridOrientError,
+                    char *,
+                    <<  "Grid Orientation Error: " << arg1);
+
+    const EdgeOrientation unoriented_edge = {'u'};
+    const EdgeOrientation forward_edge    = {'f'};
+    const EdgeOrientation backward_edge   = {'b'};
+
+
+    inline
+    bool
+    EdgeOrientation::
+    operator == (const EdgeOrientation &edge_orientation) const
+    {
+      Assert ((orientation == 'u') || (orientation == 'f') || (orientation == 'b'),
+              ExcInternalError());
+      return orientation == edge_orientation.orientation;
+    }
+
+
+
+    inline
+    bool
+    EdgeOrientation::
+    operator != (const EdgeOrientation &edge_orientation) const
+    {
+      return ! (*this == edge_orientation);
+    }
+
+
+
+    namespace ElementInfo
+    {
+      /**
+       * The numbers of the edges
+       * coming into node i are
+       * given by
+       * edge_to_node[i][k] where
+       * k=0,1,2.
+       */
+      static const unsigned int edge_to_node[8][3] =
+      {
+        {0,4,8},
+        {0,5,9},
+        {3,5,10},
+        {3,4,11},
+        {1,7,8},
+        {1,6,9},
+        {2,6,10},
+        {2,7,11}
+      };
+
+
+      /**
+       * The orientation of edge
+       * coming into node i is
+       * given by
+       * edge_to_node_orient[i][k]
+       * where k=0,1,2. 1 means the
+       * given node is the start of
+       * the edge -1 means the end
+       * of the edge.
+       */
+      static const EdgeOrientation edge_to_node_orient[8][3] =
+      {
+        {forward_edge,  forward_edge,  forward_edge},
+        {backward_edge, forward_edge,  forward_edge},
+        {backward_edge, backward_edge, forward_edge},
+        {forward_edge,  backward_edge, forward_edge},
+        {forward_edge,  forward_edge,  backward_edge},
+        {backward_edge, forward_edge,  backward_edge},
+        {backward_edge, backward_edge, backward_edge},
+        {forward_edge,  backward_edge, backward_edge}
+      };
+
+      /**
+       * nodesonedge[i][0] is the
+       * start node for edge i.
+       * nodesonedge[i][1] is the
+       * end node for edge i.
+       */
+      static const unsigned int nodes_on_edge[12][2] =
+      {
+        {0,1},
+        {4,5},
+        {7,6},
+        {3,2},
+        {0,3},
+        {1,2},
+        {5,6},
+        {4,7},
+        {0,4},
+        {1,5},
+        {2,6},
+        {3,7}
+      };
+    }
+
+
+    CheapEdge::CheapEdge (const unsigned int n0,
+                          const unsigned int n1)
+      :
+      // sort the
+      // entries so
+      // that
+      // node0<node1
+      node0(std::min (n0, n1)),
+      node1(std::max (n0, n1))
+    {}
+
+
+
+    bool CheapEdge::operator< (const CheapEdge &e2) const
+    {
+      if (node0 < e2.node0) return true;
+      if (node0 > e2.node0) return false;
+      if (node1 < e2.node1) return true;
+      return false;
+    }
+
+
+    Edge::Edge (const unsigned int n0,
+                const unsigned int n1)
+      :
+      orientation_flag (unoriented_edge),
+      group (numbers::invalid_unsigned_int)
+    {
+      nodes[0] = n0;
+      nodes[1] = n1;
+    }
+
+
+
+    Cell::Cell ()
+    {
+      for (unsigned int i=0; i<GeometryInfo<3>::lines_per_cell; ++i)
+        {
+          edges[i] = numbers::invalid_unsigned_int;
+          local_orientation_flags[i] = forward_edge;
+        }
+
+      for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+        nodes[i] = numbers::invalid_unsigned_int;
+
+      waiting_to_be_processed = false;
+    }
+
+
+
+    Mesh::Mesh (const std::vector<CellData<3> > &incubes)
+    {
+      // copy the cells into our own
+      // internal data format.
+      const unsigned int numelems = incubes.size();
+      for (unsigned int i=0; i<numelems; ++i)
+        {
+          Cell the_cell;
+          std::copy (&incubes[i].vertices[0],
+                     &incubes[i].vertices[GeometryInfo<3>::vertices_per_cell],
+                     &the_cell.nodes[0]);
+
+          cell_list.push_back(the_cell);
+        }
+
+      // then build edges and
+      // connectivity
+      build_connectivity ();
+    }
+
+
+
+    void
+    Mesh::sanity_check () const
+    {
+      for (unsigned int i=0; i<cell_list.size(); ++i)
+        for (unsigned int j=0; j<8; ++j)
+          sanity_check_node (cell_list[i], j);
+    }
+
+
+
+    void
+    Mesh::sanity_check_node (const Cell         &c,
+                             const unsigned int local_node_num) const
+    {
+#ifdef DEBUG
+      // check that every edge
+      // coming into a node has the
+      // same node value
+
+      // Get the Local Node Numbers
+      // of the incoming edges
+      const unsigned int e0 = ElementInfo::edge_to_node[local_node_num][0];
+      const unsigned int e1 = ElementInfo::edge_to_node[local_node_num][1];
+      const unsigned int e2 = ElementInfo::edge_to_node[local_node_num][2];
+
+      // Global Edge Numbers
+      const unsigned int ge0 = c.edges[e0];
+      const unsigned int ge1 = c.edges[e1];
+      const unsigned int ge2 = c.edges[e2];
+
+      const EdgeOrientation or0 = ElementInfo::edge_to_node_orient[local_node_num][0] ==
+                                  c.local_orientation_flags[e0] ?
+                                  forward_edge : backward_edge;
+      const EdgeOrientation or1 = ElementInfo::edge_to_node_orient[local_node_num][1] ==
+                                  c.local_orientation_flags[e1] ?
+                                  forward_edge : backward_edge;
+      const EdgeOrientation or2 = ElementInfo::edge_to_node_orient[local_node_num][2] ==
+                                  c.local_orientation_flags[e2] ?
+                                  forward_edge : backward_edge;
+
+      // Make sure that edges agree
+      // what the current node should
+      // be.
+      Assert ((edge_list[ge0].nodes[or0 == forward_edge ? 0 : 1] ==
+               edge_list[ge1].nodes[or1 == forward_edge ? 0 : 1])
+              &&
+              (edge_list[ge1].nodes[or1 == forward_edge ? 0 : 1] ==
+               edge_list[ge2].nodes[or2 == forward_edge ? 0 : 1]),
+              ExcMessage ("This message does not satisfy the internal "
+                          "consistency check"));
+#else
+      (void)c;
+      (void)local_node_num;
+#endif
+    }
+
+
+
+    // This is the guts of the matter...
+    void Mesh::build_connectivity ()
+    {
+      const unsigned int n_cells = cell_list.size();
+
+      unsigned int n_edges = 0;
+      // Correctly build the edge
+      // list
+      {
+        // edge_map stores the
+        // edge_number associated
+        // with a given CheapEdge
+        std::map<CheapEdge,unsigned int> edge_map;
+        unsigned int ctr = 0;
+        for (unsigned int cur_cell_id = 0;
+             cur_cell_id<n_cells;
+             ++cur_cell_id)
+          {
+            // Get the local node
+            // numbers on edge
+            // edge_num
+            const Cell &cur_cell = cell_list[cur_cell_id];
+
+            for (unsigned short int edge_num = 0;
+                 edge_num<12;
+                 ++edge_num)
+              {
+                unsigned int gl_edge_num = 0;
+                EdgeOrientation l_edge_orient = forward_edge;
+
+                // Construct the
+                // CheapEdge
+                const unsigned int
+                node0 = cur_cell.nodes[ElementInfo::nodes_on_edge[edge_num][0]],
+                node1 = cur_cell.nodes[ElementInfo::nodes_on_edge[edge_num][1]];
+                const CheapEdge cur_edge (node0, node1);
+
+                if (edge_map.count(cur_edge) == 0)
+                  // Edge not in map
+                  {
+                    // put edge in
+                    // hash map with
+                    // ctr value;
+                    edge_map[cur_edge] = ctr;
+                    gl_edge_num = ctr;
+
+                    // put the edge
+                    // into the
+                    // global edge
+                    // list
+                    edge_list.push_back(Edge(node0,node1));
+                    ctr++;
+                  }
+                else
+                  {
+                    // get edge_num
+                    // from hash_map
+                    gl_edge_num = edge_map[cur_edge];
+                    if (edge_list[gl_edge_num].nodes[0] != node0)
+                      l_edge_orient = backward_edge;
+                  }
+                // set edge number to
+                // edgenum
+                cell_list[cur_cell_id].edges[edge_num] = gl_edge_num;
+                cell_list[cur_cell_id].local_orientation_flags[edge_num]
+                  = l_edge_orient;
+              }
+          }
+        n_edges = ctr;
+      }
+
+      // Count each of the edges.
+      {
+        std::vector<int> edge_count(n_edges,0);
+
+
+        // Count every time an edge
+        // occurs in a cube.
+        for (unsigned int cur_cell_id=0; cur_cell_id<n_cells; ++cur_cell_id)
+          for (unsigned short int edge_num = 0; edge_num<12; ++edge_num)
+            ++edge_count[cell_list[cur_cell_id].edges[edge_num]];
+
+        // So we now know how many
+        // cubes contain a given
+        // edge. Just need to store
+        // the list of cubes in the
+        // edge
+
+        // Allocate the space for the
+        // neighbor list
+        for (unsigned int cur_edge_id=0; cur_edge_id<n_edges; ++cur_edge_id)
+          edge_list[cur_edge_id].neighboring_cubes
+          .resize (edge_count[cur_edge_id]);
+
+        // Store the position of the
+        // current neighbor in the
+        // edge's neighbor list
+        std::vector<int> cur_cell_edge_list_posn(n_edges,0);
+        for (unsigned int cur_cell_id=0; cur_cell_id<n_cells; ++cur_cell_id)
+          for (unsigned short int edge_num=0; edge_num<12; ++edge_num)
+            {
+              const unsigned int
+              gl_edge_id = cell_list[cur_cell_id].edges[edge_num];
+              Edge &cur_edge = edge_list[gl_edge_id];
+              cur_edge.neighboring_cubes[cur_cell_edge_list_posn[gl_edge_id]]
+                = cur_cell_id;
+              cur_cell_edge_list_posn[gl_edge_id]++;
+            }
+      }
+    }
+
+
+
+    void
+    Mesh::export_to_deal_format (std::vector<CellData<3> > &outcubes) const
+    {
+      Assert (outcubes.size() == cell_list.size(),
+              ExcInternalError());
+
+      // simply overwrite the output
+      // array with the new
+      // information
+      for (unsigned int i=0; i<cell_list.size(); ++i)
+        std::copy (&cell_list[i].nodes[0],
+                   &cell_list[i].nodes[GeometryInfo<3>::vertices_per_cell],
+                   &outcubes[i].vertices[0]);
+    }
+
+
+
+    Orienter::Orienter (const std::vector<CellData<3> > &incubes)
+      :
+      mesh (incubes),
+      cur_posn (0),
+      marker_cube (0),
+      cur_edge_group  (0)
+    {
+      for (unsigned int i = 0; i<12; ++i)
+        edge_orient_array[i] = false;
+    }
+
+
+
+    bool Orienter::orient_mesh (std::vector<CellData<3> > &incubes)
+    {
+      Orienter orienter (incubes);
+
+      // First check that the mesh is
+      // sensible
+      orienter.mesh.sanity_check ();
+
+      // Orient the mesh
+
+      // if not successful, break here, else go
+      // on
+      if (!orienter.orient_edges ())
+        return false;
+
+      // Now we have a bunch of oriented
+      // edges int the structure we only
+      // have to turn the cubes so they
+      // match the edge orientation.
+      orienter.orient_cubes ();
+
+      // Copy the elements from our
+      // internal structure back into
+      // their original location.
+      orienter.mesh.export_to_deal_format (incubes);
+      // reordering was successful
+      return true;
+    }
+
+    /**
+     * This assigns an orientation
+     * to each edge so that every
+     * cube is a rotated Deal.II
+     * cube.
+     */
+    bool Orienter::orient_edges ()
+    {
+      // While there are still cubes
+      // to orient
+      while (get_next_unoriented_cube())
+        // And there are edges in
+        // the cube to orient
+        while (orient_next_unoriented_edge())
+          {
+            // Make all the sides
+            // in the current set
+            // match
+            orient_edges_in_current_cube();
+
+            // Add the adjacent
+            // cubes to the list
+            // for processing
+            get_adjacent_cubes();
+            // Start working on
+            // this list of cubes
+            while (get_next_active_cube())
+              {
+                // Make sure the
+                // Cube doesn't
+                // have a
+                // contradiction
+                if (!cell_is_consistent(cur_posn))
+                  return false;
+
+                // If we needed to
+                // orient any edges
+                // in the current
+                // cube then we may
+                // have to process
+                // the neighbor.
+                if (orient_edges_in_current_cube())
+                  get_adjacent_cubes();
+              }
+
+            // start the next sheet
+            // (equivalence class
+            // of edges)
+            ++cur_edge_group;
+          }
+      return true;
+    }
+
+
+
+    bool Orienter::get_next_unoriented_cube ()
+    {
+      // The last cube in the list
+      const unsigned int n_cubes = mesh.cell_list.size();
+      // Keep shifting along the list
+      // until we find a cube which
+      // is not fully oriented or the
+      // end.
+      while ( (marker_cube<n_cubes) &&
+              (is_oriented(marker_cube)) )
+        ++marker_cube;
+      cur_posn = marker_cube;
+      // Return true if we now point
+      // at a valid cube.
+      return (cur_posn < n_cubes);
+    }
+
+
+
+    bool Orienter::is_oriented (const unsigned int cell_num) const
+    {
+      for (unsigned int i=0; i<12; ++i)
+        if (mesh.edge_list[mesh.cell_list[cell_num].edges[i]].orientation_flag
+            == unoriented_edge)
+          return false;
+      return true;
+    }
+
+
+
+    bool
+    Orienter::cell_is_consistent(const unsigned int cell_num) const
+    {
+
+      const Cell &c = mesh.cell_list[cell_num];
+
+      // Checks that all oriented
+      // edges in the group are
+      // oriented consistently.
+      for (unsigned int group=0; group<3; ++group)
+        {
+          // When a nonzero
+          // orientation is first
+          // encountered in the group
+          // it is stored in this
+          EdgeOrientation value = unoriented_edge;
+          // Loop over all parallel
+          // edges
+          for (unsigned int i=4*group; i<4*(group+1); ++i)
+            {
+              // If the edge has
+              // orientation
+              if ((c.local_orientation_flags[i] !=
+                   unoriented_edge)
+                  &&
+                  (mesh.edge_list[c.edges[i]].orientation_flag !=
+                   unoriented_edge))
+                {
+                  const EdgeOrientation this_edge_direction
+                    = (c.local_orientation_flags[i]
+                       == mesh.edge_list[c.edges[i]].orientation_flag  ?
+                       forward_edge : backward_edge);
+
+                  // If we haven't
+                  // seen an oriented
+                  // edge before,
+                  // then store its
+                  // value:
+                  if (value == unoriented_edge)
+                    value = this_edge_direction;
+                  else
+                    // If we have
+                    // seen an
+                    // oriented edge
+                    // in this group
+                    // we'd better
+                    // have the same
+                    // orientation.
+                    if (value != this_edge_direction)
+                      return false;
+                }
+            }
+        }
+      return true;
+    }
+
+
+
+    bool Orienter::orient_next_unoriented_edge ()
+    {
+      cur_posn = marker_cube;
+      const Cell &c = mesh.cell_list[cur_posn];
+      unsigned int edge = 0;
+
+      // search for the unoriented
+      // side
+      while ((edge<12) &&
+             (mesh.edge_list[c.edges[edge]].orientation_flag !=
+              unoriented_edge))
+        ++edge;
+
+      // if we found none then return
+      // false
+      if (edge == 12)
+        return false;
+
+      // Which edge group we're in.
+      const unsigned int edge_group = edge/4;
+
+      // A sanity check that none of
+      // the other edges in the group
+      // have been oriented yet Each
+      // of the edges in the group
+      // should be un-oriented
+      for (unsigned int j = edge_group*4; j<edge_group*4+4; ++j)
+        Assert (mesh.edge_list[c.edges[j]].orientation_flag ==
+                unoriented_edge,
+                ExcGridOrientError("Tried to orient edge when other edges "
+                                   "in group are already oriented!"));
+
+      // Make the edge alignment
+      // match that of the local
+      // cube.
+      mesh.edge_list[c.edges[edge]].orientation_flag
+        = c.local_orientation_flags[edge];
+      mesh.edge_list[c.edges[edge]].group = cur_edge_group;
+
+      // Remember that we have oriented
+      // this edge in the current cell.
+      edge_orient_array[edge] = true;
+
+      return true;
+    }
+
+
+
+    bool Orienter::orient_edges_in_current_cube ()
+    {
+      for (unsigned int edge_group=0; edge_group<3; ++edge_group)
+        if (orient_edge_set_in_current_cube(edge_group) == true)
+          return true;
+
+      return false;
+    }
+
+
+
+    bool
+    Orienter::orient_edge_set_in_current_cube (const unsigned int n)
+    {
+      const Cell &c = mesh.cell_list[cur_posn];
+
+      // Check if any edge is
+      // oriented
+      unsigned int n_oriented = 0;
+      EdgeOrientation glorient   = unoriented_edge;
+      unsigned int edge_flags = 0;
+      unsigned int cur_flag   = 1;
+      for (unsigned int i = 4*n; i<4*(n+1); ++i, cur_flag<<=1)
+        {
+          if ((mesh.edge_list[c.edges[i]].orientation_flag !=
+               unoriented_edge)
+              &&
+              (c.local_orientation_flags[i] !=
+               unoriented_edge))
+            {
+              ++n_oriented;
+
+              const EdgeOrientation orient
+                = (mesh.edge_list[c.edges[i]].orientation_flag ==
+                   c.local_orientation_flags[i] ?
+                   forward_edge : backward_edge);
+
+              if (glorient == unoriented_edge)
+                glorient = orient;
+              else
+                AssertThrow(orient == glorient,
+                            ExcGridOrientError("Attempted to Orient Misaligned cube"));
+            }
+          else
+            edge_flags |= cur_flag;
+        }
+
+      // were any of the sides
+      // oriented?  were they all
+      // already oriented?
+      if ((glorient == unoriented_edge) || (n_oriented == 4))
+        return false;
+
+      // If so orient all edges
+      // consistently.
+      cur_flag = 1;
+      for (unsigned int i=4*n; i<4*(n+1); ++i, cur_flag<<=1)
+        if ((edge_flags & cur_flag) != 0)
+          {
+            mesh.edge_list[c.edges[i]].orientation_flag
+              = (c.local_orientation_flags[i] == glorient ?
+                 forward_edge : backward_edge);
+
+            mesh.edge_list[c.edges[i]].group = cur_edge_group;
+            // Remember that we have oriented
+            // this edge in the current cell.
+            edge_orient_array[i] = true;
+          }
+
+      return true;
+    }
+
+
+
+    void Orienter::get_adjacent_cubes ()
+    {
+      const Cell &c = mesh.cell_list[cur_posn];
+      for (unsigned int e=0; e<12; ++e)
+        // Only need to add the adjacent
+        // cubes for edges we recently
+        // oriented
+        if (edge_orient_array[e] == true)
+          {
+            const Edge &the_edge = mesh.edge_list[c.edges[e]];
+            for (unsigned int local_cube_num = 0;
+                 local_cube_num < the_edge.neighboring_cubes.size();
+                 ++local_cube_num)
+              {
+                const unsigned int
+                global_cell_num = the_edge.neighboring_cubes[local_cube_num];
+                Cell &ncell = mesh.cell_list[global_cell_num];
+
+                // If the cell is waiting to be
+                // processed we dont want to add
+                // it to the list a second time.
+                if (!ncell.waiting_to_be_processed)
+                  {
+                    sheet_to_process.push_back(global_cell_num);
+                    ncell.waiting_to_be_processed = true;
+                  }
+              }
+          }
+      // we're done with this cube so
+      // clear its processing flags.
+      for (unsigned int e=0; e<12; ++e)
+        edge_orient_array[e] = false;
+
+    }
+
+
+
+    bool Orienter::get_next_active_cube ()
+    {
+      // Mark the curent Cube as
+      // finished with.
+      Cell &c = mesh.cell_list[cur_posn];
+      c.waiting_to_be_processed = false;
+      if (sheet_to_process.empty() == false)
+        {
+          cur_posn = sheet_to_process.back();
+          sheet_to_process.pop_back();
+          return true;
+        }
+      return false;
+    }
+
+
+    void Orienter::orient_cubes ()
+    {
+      // We assume that the mesh has
+      // all edges oriented already.
+
+      // This is a list of
+      // permutations that take node
+      // 0 to node i but only rotate
+      // the cube.  (This set is far
+      // from unique (there are 3 for
+      // each node - for our
+      // algorithm it doesn't matter
+      // which of the three we use)
+      static const unsigned int CubePermutations[8][8] =
+      {
+        {0,1,2,3,4,5,6,7},
+        {1,2,3,0,5,6,7,4},
+        {2,3,0,1,6,7,4,5},
+        {3,0,1,2,7,4,5,6},
+        {4,7,6,5,0,3,2,1},
+        {5,4,7,6,1,0,3,2},
+        {6,5,4,7,2,1,0,3},
+        {7,6,5,4,3,2,1,0}
+      };
+
+      // So now we need to work out
+      // which node needs to be
+      // mapped to the zero node.
+      // The trick is that the node
+      // that should be the local
+      // zero node has three edges
+      // coming into it.
+      for (unsigned int i=0; i<mesh.cell_list.size(); ++i)
+        {
+          Cell &the_cell = mesh.cell_list[i];
+
+          // This stores whether the
+          // global oriented edge
+          // points in the same
+          // direction as it's local
+          // edge on the current
+          // cube. (for each edge on
+          // the curent cube)
+          EdgeOrientation local_edge_orientation[12];
+          for (unsigned int j = 0; j<12; ++j)
+            {
+              // get the global edge
+              const Edge &the_edge = mesh.edge_list[the_cell.edges[j]];
+              // All edges should be
+              // oriented at this
+              // stage..
+              Assert (the_edge.orientation_flag != unoriented_edge,
+                      ExcGridOrientError ("Unoriented edge encountered"));
+              // calculate whether it
+              // points the right way
+              // or not
+              local_edge_orientation[j] = (the_cell.local_orientation_flags[j] ==
+                                           the_edge.orientation_flag ?
+                                           forward_edge : backward_edge);
+            }
+
+          // Here the number of
+          // incoming edges is
+          // tallied for each node.
+          unsigned int perm_num = numbers::invalid_unsigned_int;
+          for (unsigned int node_num=0; node_num<8; ++node_num)
+            {
+              // The local edge
+              // numbers coming into
+              // the node
+              const unsigned int e0 = ElementInfo::edge_to_node[node_num][0];
+              const unsigned int e1 = ElementInfo::edge_to_node[node_num][1];
+              const unsigned int e2 = ElementInfo::edge_to_node[node_num][2];
+
+              // The local
+              // orientation of the
+              // edge coming into the
+              // node.
+              const EdgeOrientation sign0 = ElementInfo::edge_to_node_orient[node_num][0];
+              const EdgeOrientation sign1 = ElementInfo::edge_to_node_orient[node_num][1];
+              const EdgeOrientation sign2 = ElementInfo::edge_to_node_orient[node_num][2];
+
+              // Add one to the total
+              // for each edge
+              // pointing in
+              Assert (local_edge_orientation[e0] != unoriented_edge,
+                      ExcInternalError());
+              Assert (local_edge_orientation[e1] != unoriented_edge,
+                      ExcInternalError());
+              Assert (local_edge_orientation[e2] != unoriented_edge,
+                      ExcInternalError());
+
+              const unsigned int
+              total  = (((local_edge_orientation[e0] == sign0) ? 1 : 0)
+                        +((local_edge_orientation[e1] == sign1) ? 1 : 0)
+                        +((local_edge_orientation[e2] == sign2) ? 1 : 0));
+
+              if (total == 3)
+                {
+                  Assert (perm_num == numbers::invalid_unsigned_int,
+                          ExcGridOrientError("More than one node with 3 incoming "
+                                             "edges found in curent hex."));
+                  perm_num = node_num;
+                }
+            }
+          // We should now have a
+          // valid permutation number
+          Assert (perm_num != numbers::invalid_unsigned_int,
+                  ExcGridOrientError("No node having 3 incoming edges found in curent hex."));
+
+          // So use the appropriate
+          // rotation to get the new
+          // cube
+          unsigned int temp[8];
+          for (unsigned int v=0; v<8; ++v)
+            temp[v] = the_cell.nodes[CubePermutations[perm_num][v]];
+          for (unsigned int v=0; v<8; ++v)
+            the_cell.nodes[v] = temp[v];
+        }
+    }
+  } // namespace GridReordering3d
+} // namespace internal
+
+
+
+template<>
+void
+GridReordering<3>::reorder_cells (std::vector<CellData<3> > &cells,
+                                  const bool use_new_style_ordering)
+{
+  Assert (cells.size() != 0,
+          ExcMessage("List of elements to orient must have at least one cell"));
+
+  // if necessary, convert to old-style format
+  if (use_new_style_ordering)
+    reorder_new_to_old_style(cells);
+
+  // create a backup to use if GridReordering
+  // was not successful
+  std::vector<CellData<3> > backup=cells;
+
+  // This does the real work
+  const bool success=
+    internal::GridReordering3d::Orienter::orient_mesh (cells);
+
+  // if reordering was not successful use
+  // original connectivity, otherwise do
+  // nothing (i.e. use the reordered
+  // connectivity)
+  if (!success)
+    cells=backup;
+
+  // and convert back if necessary
+  if (use_new_style_ordering)
+    reorder_old_to_new_style(cells);
+}
+
+
+
+template<>
+void
+GridReordering<3>::invert_all_cells_of_negative_grid(
+  const std::vector<Point<3> > &all_vertices,
+  std::vector<CellData<3> > &cells)
+{
+  unsigned int vertices_lex[GeometryInfo<3>::vertices_per_cell];
+  unsigned int n_negative_cells=0;
+  for (unsigned int cell_no=0; cell_no<cells.size(); ++cell_no)
+    {
+      // GridTools::cell_measure
+      // requires the vertices to be
+      // in lexicographic ordering
+      for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+        vertices_lex[GeometryInfo<3>::ucd_to_deal[i]]=cells[cell_no].vertices[i];
+      if (GridTools::cell_measure<3>(all_vertices, vertices_lex) < 0)
+        {
+          ++n_negative_cells;
+          // reorder vertices: swap front and back face
+          for (unsigned int i=0; i<4; ++i)
+            std::swap(cells[cell_no].vertices[i], cells[cell_no].vertices[i+4]);
+
+          // check whether the
+          // resulting cell is now ok.
+          // if not, then the grid is
+          // seriously broken and
+          // should be sticked into the
+          // bin
+          for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+            vertices_lex[GeometryInfo<3>::ucd_to_deal[i]]=cells[cell_no].vertices[i];
+          AssertThrow(GridTools::cell_measure<3>(all_vertices, vertices_lex) > 0,
+                      ExcInternalError());
+        }
+    }
+
+  // We assume that all cells of a
+  // grid have either positive or
+  // negative volumes but not both
+  // mixed. Although above reordering
+  // might work also on single cells,
+  // grids with both kind of cells
+  // are very likely to be
+  // broken. Check for this here.
+  AssertThrow(n_negative_cells==0 || n_negative_cells==cells.size(), ExcInternalError());
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/grid_tools.cc b/source/grid/grid_tools.cc
new file mode 100644
index 0000000..eabb5ff
--- /dev/null
+++ b/source/grid/grid_tools.cc
@@ -0,0 +1,3769 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/std_cxx11/array.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/filtered_matrix.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/grid/filtered_iterator.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/shared_tria.h>
+#include <deal.II/distributed/tria_base.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_nothing.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/fe/mapping_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+#include <boost/random/uniform_real_distribution.hpp>
+#include <boost/random/mersenne_twister.hpp>
+
+#include <cmath>
+#include <numeric>
+#include <list>
+#include <set>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace GridTools
+{
+
+  template <int dim, int spacedim>
+  double
+  diameter (const Triangulation<dim, spacedim> &tria)
+  {
+    // we can't deal with distributed meshes since we don't have all
+    // vertices locally. there is one exception, however: if the mesh has
+    // never been refined. the way to test this is not to ask
+    // tria.n_levels()==1, since this is something that can happen on one
+    // processor without being true on all. however, we can ask for the
+    // global number of active cells and use that
+#if defined(DEAL_II_WITH_P4EST) && defined(DEBUG)
+    if (const parallel::distributed::Triangulation<dim,spacedim> *p_tria
+        = dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>(&tria))
+      Assert (p_tria->n_global_active_cells() == tria.n_cells(0),
+              ExcNotImplemented());
+#endif
+
+    // the algorithm used simply traverses all cells and picks out the
+    // boundary vertices. it may or may not be faster to simply get all
+    // vectors, don't mark boundary vertices, and compute the distances
+    // thereof, but at least as the mesh is refined, it seems better to
+    // first mark boundary nodes, as marking is O(N) in the number of
+    // cells/vertices, while computing the maximal distance is O(N*N)
+    const std::vector<Point<spacedim> > &vertices = tria.get_vertices ();
+    std::vector<bool> boundary_vertices (vertices.size(), false);
+
+    typename Triangulation<dim,spacedim>::active_cell_iterator
+    cell = tria.begin_active();
+    const typename Triangulation<dim,spacedim>::active_cell_iterator
+    endc = tria.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+        if (cell->face(face)->at_boundary ())
+          for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_face; ++i)
+            boundary_vertices[cell->face(face)->vertex_index(i)] = true;
+
+    // now traverse the list of boundary vertices and check distances.
+    // since distances are symmetric, we only have to check one half
+    double max_distance_sqr = 0;
+    std::vector<bool>::const_iterator pi = boundary_vertices.begin();
+    const unsigned int N = boundary_vertices.size();
+    for (unsigned int i=0; i<N; ++i, ++pi)
+      {
+        std::vector<bool>::const_iterator pj = pi+1;
+        for (unsigned int j=i+1; j<N; ++j, ++pj)
+          if ((*pi==true) && (*pj==true) &&
+              ((vertices[i]-vertices[j]).norm_square() > max_distance_sqr))
+            max_distance_sqr = (vertices[i]-vertices[j]).norm_square();
+      };
+
+    return std::sqrt(max_distance_sqr);
+  }
+
+
+
+  template <int dim, int spacedim>
+  double
+  volume (const Triangulation<dim, spacedim> &triangulation,
+          const Mapping<dim,spacedim> &mapping)
+  {
+    // get the degree of the mapping if possible. if not, just assume 1
+    const unsigned int mapping_degree
+      = (dynamic_cast<const MappingQ<dim,spacedim>*>(&mapping) != 0 ?
+         dynamic_cast<const MappingQ<dim,spacedim>*>(&mapping)->get_degree() :
+         1);
+
+    // then initialize an appropriate quadrature formula
+    const QGauss<dim> quadrature_formula (mapping_degree + 1);
+    const unsigned int n_q_points = quadrature_formula.size();
+
+    // we really want the JxW values from the FEValues object, but it
+    // wants a finite element. create a cheap element as a dummy
+    // element
+    FE_Nothing<dim,spacedim> dummy_fe;
+    FEValues<dim,spacedim> fe_values (mapping, dummy_fe, quadrature_formula,
+                                      update_JxW_values);
+
+    typename Triangulation<dim,spacedim>::active_cell_iterator
+    cell = triangulation.begin_active(),
+    endc = triangulation.end();
+
+    double local_volume = 0;
+
+    // compute the integral quantities by quadrature
+    for (; cell!=endc; ++cell)
+      if (cell->is_locally_owned())
+        {
+          fe_values.reinit (cell);
+          for (unsigned int q=0; q<n_q_points; ++q)
+            local_volume += fe_values.JxW(q);
+        }
+
+    double global_volume = 0;
+
+#ifdef DEAL_II_WITH_MPI
+    if (const parallel::Triangulation<dim,spacedim> *p_tria
+        = dynamic_cast<const parallel::Triangulation<dim,spacedim>*>(&triangulation))
+      global_volume = Utilities::MPI::sum (local_volume, p_tria->get_communicator());
+    else
+#endif
+      global_volume = local_volume;
+
+    return global_volume;
+  }
+
+
+  template <>
+  double
+  cell_measure<3>(const std::vector<Point<3> > &all_vertices,
+                  const unsigned int (&vertex_indices)[GeometryInfo<3>::vertices_per_cell])
+  {
+    // note that this is the
+    // cell_measure based on the new
+    // deal.II numbering. When called
+    // from inside GridReordering make
+    // sure that you reorder the
+    // vertex_indices before
+    const double x[8] = { all_vertices[vertex_indices[0]](0),
+                          all_vertices[vertex_indices[1]](0),
+                          all_vertices[vertex_indices[2]](0),
+                          all_vertices[vertex_indices[3]](0),
+                          all_vertices[vertex_indices[4]](0),
+                          all_vertices[vertex_indices[5]](0),
+                          all_vertices[vertex_indices[6]](0),
+                          all_vertices[vertex_indices[7]](0)
+                        };
+    const double y[8] = { all_vertices[vertex_indices[0]](1),
+                          all_vertices[vertex_indices[1]](1),
+                          all_vertices[vertex_indices[2]](1),
+                          all_vertices[vertex_indices[3]](1),
+                          all_vertices[vertex_indices[4]](1),
+                          all_vertices[vertex_indices[5]](1),
+                          all_vertices[vertex_indices[6]](1),
+                          all_vertices[vertex_indices[7]](1)
+                        };
+    const double z[8] = { all_vertices[vertex_indices[0]](2),
+                          all_vertices[vertex_indices[1]](2),
+                          all_vertices[vertex_indices[2]](2),
+                          all_vertices[vertex_indices[3]](2),
+                          all_vertices[vertex_indices[4]](2),
+                          all_vertices[vertex_indices[5]](2),
+                          all_vertices[vertex_indices[6]](2),
+                          all_vertices[vertex_indices[7]](2)
+                        };
+
+    /*
+      This is the same Maple script as in the barycenter method above
+      except of that here the shape functions tphi[0]-tphi[7] are ordered
+      according to the lexicographic numbering.
+
+      x := array(0..7):
+      y := array(0..7):
+      z := array(0..7):
+      tphi[0] := (1-xi)*(1-eta)*(1-zeta):
+      tphi[1] :=     xi*(1-eta)*(1-zeta):
+      tphi[2] := (1-xi)*    eta*(1-zeta):
+      tphi[3] :=     xi*    eta*(1-zeta):
+      tphi[4] := (1-xi)*(1-eta)*zeta:
+      tphi[5] :=     xi*(1-eta)*zeta:
+      tphi[6] := (1-xi)*    eta*zeta:
+      tphi[7] :=     xi*    eta*zeta:
+      x_real := sum(x[s]*tphi[s], s=0..7):
+      y_real := sum(y[s]*tphi[s], s=0..7):
+      z_real := sum(z[s]*tphi[s], s=0..7):
+      with (linalg):
+      J := matrix(3,3, [[diff(x_real, xi), diff(x_real, eta), diff(x_real, zeta)],
+      [diff(y_real, xi), diff(y_real, eta), diff(y_real, zeta)],
+      [diff(z_real, xi), diff(z_real, eta), diff(z_real, zeta)]]):
+      detJ := det (J):
+
+      measure := simplify ( int ( int ( int (detJ, xi=0..1), eta=0..1), zeta=0..1)):
+
+      readlib(C):
+
+      C(measure, optimized);
+
+      The C code produced by this maple script is further optimized by
+      hand. In particular, division by 12 is performed only once, not
+      hundred of times.
+    */
+
+    const double t3 = y[3]*x[2];
+    const double t5 = z[1]*x[5];
+    const double t9 = z[3]*x[2];
+    const double t11 = x[1]*y[0];
+    const double t14 = x[4]*y[0];
+    const double t18 = x[5]*y[7];
+    const double t20 = y[1]*x[3];
+    const double t22 = y[5]*x[4];
+    const double t26 = z[7]*x[6];
+    const double t28 = x[0]*y[4];
+    const double t34 = z[3]*x[1]*y[2]+t3*z[1]-t5*y[7]+y[7]*x[4]*z[6]+t9*y[6]-t11*z[4]-t5*y[3]-t14*z[2]+z[1]*x[4]*y[0]-t18*z[3]+t20*z[0]-t22*z[0]-y[0]*x[5]*z[4]-t26*y[3]+t28*z[2]-t9*y[1]-y[1]*x[4]*z[0]-t11*z[5];
+    const double t37 = y[1]*x[0];
+    const double t44 = x[1]*y[5];
+    const double t46 = z[1]*x[0];
+    const double t49 = x[0]*y[2];
+    const double t52 = y[5]*x[7];
+    const double t54 = x[3]*y[7];
+    const double t56 = x[2]*z[0];
+    const double t58 = x[3]*y[2];
+    const double t64 = -x[6]*y[4]*z[2]-t37*z[2]+t18*z[6]-x[3]*y[6]*z[2]+t11*z[2]+t5*y[0]+t44*z[4]-t46*y[4]-t20*z[7]-t49*z[6]-t22*z[1]+t52*z[3]-t54*z[2]-t56*y[4]-t58*z[0]+y[1]*x[2]*z[0]+t9*y[7]+t37*z[4];
+    const double t66 = x[1]*y[7];
+    const double t68 = y[0]*x[6];
+    const double t70 = x[7]*y[6];
+    const double t73 = z[5]*x[4];
+    const double t76 = x[6]*y[7];
+    const double t90 = x[4]*z[0];
+    const double t92 = x[1]*y[3];
+    const double t95 = -t66*z[3]-t68*z[2]-t70*z[2]+t26*y[5]-t73*y[6]-t14*z[6]+t76*z[2]-t3*z[6]+x[6]*y[2]*z[4]-z[3]*x[6]*y[2]+t26*y[4]-t44*z[3]-x[1]*y[2]*z[0]+x[5]*y[6]*z[4]+t54*z[5]+t90*y[2]-t92*z[2]+t46*y[2];
+    const double t102 = x[2]*y[0];
+    const double t107 = y[3]*x[7];
+    const double t114 = x[0]*y[6];
+    const double t125 = y[0]*x[3]*z[2]-z[7]*x[5]*y[6]-x[2]*y[6]*z[4]+t102*z[6]-t52*z[6]+x[2]*y[4]*z[6]-t107*z[5]-t54*z[6]+t58*z[6]-x[7]*y[4]*z[6]+t37*z[5]-t114*z[4]+t102*z[4]-z[1]*x[2]*y[0]+t28*z[6]-y[5]*x[6]*z[4]-z[5]*x[1]*y[4]-t73*y[7];
+    const double t129 = z[0]*x[6];
+    const double t133 = y[1]*x[7];
+    const double t145 = y[1]*x[5];
+    const double t156 = t90*y[6]-t129*y[4]+z[7]*x[2]*y[6]-t133*z[5]+x[5]*y[3]*z[7]-t26*y[2]-t70*z[3]+t46*y[3]+z[5]*x[7]*y[4]+z[7]*x[3]*y[6]-t49*z[4]+t145*z[7]-x[2]*y[7]*z[6]+t70*z[5]+t66*z[5]-z[7]*x[4]*y[6]+t18*z[4]+x[1]*y[4]*z[0];
+    const double t160 = x[5]*y[4];
+    const double t165 = z[1]*x[7];
+    const double t178 = z[1]*x[3];
+    const double t181 = t107*z[6]+t22*z[7]+t76*z[3]+t160*z[1]-x[4]*y[2]*z[6]+t70*z[4]+t165*y[5]+x[7]*y[2]*z[6]-t76*z[5]-t76*z[4]+t133*z[3]-t58*z[1]+y[5]*x[0]*z[4]+t114*z[2]-t3*z[7]+t20*z[2]+t178*y[7]+t129*y[2];
+    const double t207 = t92*z[7]+t22*z[6]+z[3]*x[0]*y[2]-x[0]*y[3]*z[2]-z[3]*x[7]*y[2]-t165*y[3]-t9*y[0]+t58*z[7]+y[3]*x[6]*z[2]+t107*z[2]+t73*y[0]-x[3]*y[5]*z[7]+t3*z[0]-t56*y[6]-z[5]*x[0]*y[4]+t73*y[1]-t160*z[6]+t160*z[0];
+    const double t228 = -t44*z[7]+z[5]*x[6]*y[4]-t52*z[4]-t145*z[4]+t68*z[4]+t92*z[5]-t92*z[0]+t11*z[3]+t44*z[0]+t178*y[5]-t46*y[5]-t178*y[0]-t145*z[0]-t20*z[5]-t37*z[3]-t160*z[7]+t145*z[3]+x[4]*y[6]*z[2];
+
+    return (t34+t64+t95+t125+t156+t181+t207+t228)/12.;
+  }
+
+
+
+  template <>
+  double
+  cell_measure(const std::vector<Point<2> > &all_vertices,
+               const unsigned int (&vertex_indices) [GeometryInfo<2>::vertices_per_cell])
+  {
+    /*
+      Get the computation of the measure by this little Maple script. We
+      use the blinear mapping of the unit quad to the real quad. However,
+      every transformation mapping the unit faces to straight lines should
+      do.
+
+      Remember that the area of the quad is given by
+      \int_K 1 dx dy  = \int_{\hat K} |det J| d(xi) d(eta)
+
+      # x and y are arrays holding the x- and y-values of the four vertices
+      # of this cell in real space.
+      x := array(0..3);
+      y := array(0..3);
+      z := array(0..3);
+      tphi[0] := (1-xi)*(1-eta):
+      tphi[1] :=     xi*(1-eta):
+      tphi[2] := (1-xi)*eta:
+      tphi[3] :=     xi*eta:
+      x_real := sum(x[s]*tphi[s], s=0..3):
+      y_real := sum(y[s]*tphi[s], s=0..3):
+      z_real := sum(z[s]*tphi[s], s=0..3):
+
+      Jxi := <diff(x_real,xi)  | diff(y_real,xi) | diff(z_real,xi)>;
+      Jeta := <diff(x_real,eta)| diff(y_real,eta)| diff(z_real,eta)>;
+      with(VectorCalculus):
+      J := CrossProduct(Jxi, Jeta);
+      detJ := sqrt(J[1]^2 + J[2]^2 +J[3]^2);
+
+      # measure := evalf (Int (Int (detJ, xi=0..1, method = _NCrule ) , eta=0..1, method = _NCrule  ) ):
+      # readlib(C):
+
+      # C(measure, optimized);
+
+      additional optimizaton: divide by 2 only one time
+    */
+
+    const double x[4] = { all_vertices[vertex_indices[0]](0),
+                          all_vertices[vertex_indices[1]](0),
+                          all_vertices[vertex_indices[2]](0),
+                          all_vertices[vertex_indices[3]](0)
+                        };
+
+    const double y[4] = { all_vertices[vertex_indices[0]](1),
+                          all_vertices[vertex_indices[1]](1),
+                          all_vertices[vertex_indices[2]](1),
+                          all_vertices[vertex_indices[3]](1)
+                        };
+
+    return (-x[1]*y[0]+x[1]*y[3]+y[0]*x[2]+x[0]*y[1]-x[0]*y[2]-y[1]*x[3]-x[2]*y[3]+x[3]*y[2])/2;
+
+  }
+
+
+
+
+  template <int dim>
+  double
+  cell_measure(const std::vector<Point<dim> > &,
+               const unsigned int ( &) [GeometryInfo<dim>::vertices_per_cell])
+  {
+    Assert(false, ExcNotImplemented());
+    return 0.;
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  delete_unused_vertices (std::vector<Point<spacedim> >    &vertices,
+                          std::vector<CellData<dim> > &cells,
+                          SubCellData                 &subcelldata)
+  {
+    // first check which vertices are
+    // actually used
+    std::vector<bool> vertex_used (vertices.size(), false);
+    for (unsigned int c=0; c<cells.size(); ++c)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        vertex_used[cells[c].vertices[v]] = true;
+
+    // then renumber the vertices that
+    // are actually used in the same
+    // order as they were beforehand
+    const unsigned int invalid_vertex = numbers::invalid_unsigned_int;
+    std::vector<unsigned int> new_vertex_numbers (vertices.size(), invalid_vertex);
+    unsigned int next_free_number = 0;
+    for (unsigned int i=0; i<vertices.size(); ++i)
+      if (vertex_used[i] == true)
+        {
+          new_vertex_numbers[i] = next_free_number;
+          ++next_free_number;
+        };
+
+    // next replace old vertex numbers
+    // by the new ones
+    for (unsigned int c=0; c<cells.size(); ++c)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        cells[c].vertices[v] = new_vertex_numbers[cells[c].vertices[v]];
+
+    // same for boundary data
+    for (unsigned int c=0; c<subcelldata.boundary_lines.size(); ++c)
+      for (unsigned int v=0; v<GeometryInfo<1>::vertices_per_cell; ++v)
+        subcelldata.boundary_lines[c].vertices[v]
+          = new_vertex_numbers[subcelldata.boundary_lines[c].vertices[v]];
+    for (unsigned int c=0; c<subcelldata.boundary_quads.size(); ++c)
+      for (unsigned int v=0; v<GeometryInfo<2>::vertices_per_cell; ++v)
+        subcelldata.boundary_quads[c].vertices[v]
+          = new_vertex_numbers[subcelldata.boundary_quads[c].vertices[v]];
+
+    // finally copy over the vertices
+    // which we really need to a new
+    // array and replace the old one by
+    // the new one
+    std::vector<Point<spacedim> > tmp;
+    tmp.reserve (std::count(vertex_used.begin(), vertex_used.end(), true));
+    for (unsigned int v=0; v<vertices.size(); ++v)
+      if (vertex_used[v] == true)
+        tmp.push_back (vertices[v]);
+    swap (vertices, tmp);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  delete_duplicated_vertices (std::vector<Point<spacedim> >    &vertices,
+                              std::vector<CellData<dim> > &cells,
+                              SubCellData                 &subcelldata,
+                              std::vector<unsigned int>   &considered_vertices,
+                              double                       tol)
+  {
+    // create a vector of vertex
+    // indices. initialize it to the identity,
+    // later on change that if necessary.
+    std::vector<unsigned int> new_vertex_numbers(vertices.size());
+    for (unsigned int i=0; i<vertices.size(); ++i)
+      new_vertex_numbers[i]=i;
+
+    // if the considered_vertices vector is
+    // empty, consider all vertices
+    if (considered_vertices.size()==0)
+      considered_vertices=new_vertex_numbers;
+
+    // now loop over all vertices to be
+    // considered and try to find an identical
+    // one
+    for (unsigned int i=0; i<considered_vertices.size(); ++i)
+      {
+        if (new_vertex_numbers[considered_vertices[i]]!=considered_vertices[i])
+          // this vertex has been identified with
+          // another one already, skip it in the
+          // test
+          continue;
+        // this vertex is not identified with
+        // another one so far. search in the list
+        // of remaining vertices. if a duplicate
+        // vertex is found, set the new vertex
+        // index for that vertex to this vertex'
+        // index.
+        for (unsigned int j=i+1; j<considered_vertices.size(); ++j)
+          {
+            bool equal=true;
+            for (unsigned int d=0; d<spacedim; ++d)
+              equal &= (fabs(vertices[considered_vertices[j]](d)-vertices[considered_vertices[i]](d))<tol);
+            if (equal)
+              {
+                new_vertex_numbers[considered_vertices[j]]=considered_vertices[i];
+                // we do not suppose, that there might be another duplicate
+                // vertex, so break here
+                break;
+              }
+          }
+      }
+
+    // now we got a renumbering list. simply
+    // renumber all vertices (non-duplicate
+    // vertices get renumbered to themselves, so
+    // nothing bad happens). after that, the
+    // duplicate vertices will be unused, so call
+    // delete_unused_vertices() to do that part
+    // of the job.
+    for (unsigned int c=0; c<cells.size(); ++c)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        cells[c].vertices[v]=new_vertex_numbers[cells[c].vertices[v]];
+
+    delete_unused_vertices(vertices,cells,subcelldata);
+  }
+
+
+
+// define some transformations in an anonymous namespace
+  namespace
+  {
+    template <int spacedim>
+    class Shift
+    {
+    public:
+      Shift (const Tensor<1,spacedim> &shift)
+        :
+        shift(shift)
+      {}
+      Point<spacedim> operator() (const Point<spacedim> p) const
+      {
+        return p+shift;
+      }
+    private:
+      const Tensor<1,spacedim> shift;
+    };
+
+
+    // the following class is only
+    // needed in 2d, so avoid trouble
+    // with compilers warning otherwise
+    class Rotate2d
+    {
+    public:
+      Rotate2d (const double angle)
+        :
+        angle(angle)
+      {}
+      Point<2> operator() (const Point<2> &p) const
+      {
+        return Point<2> (std::cos(angle)*p(0) - std::sin(angle) * p(1),
+                         std::sin(angle)*p(0) + std::cos(angle) * p(1));
+      }
+    private:
+      const double angle;
+    };
+
+
+    template <int spacedim>
+    class Scale
+    {
+    public:
+      Scale (const double factor)
+        :
+        factor(factor)
+      {}
+      Point<spacedim> operator() (const Point<spacedim> p) const
+      {
+        return p*factor;
+      }
+    private:
+      const double factor;
+    };
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  shift (const Tensor<1,spacedim>   &shift_vector,
+         Triangulation<dim, spacedim> &triangulation)
+  {
+    transform (Shift<spacedim>(shift_vector), triangulation);
+  }
+
+
+
+  void
+  rotate (const double      angle,
+          Triangulation<2> &triangulation)
+  {
+    transform (Rotate2d(angle), triangulation);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  scale (const double        scaling_factor,
+         Triangulation<dim, spacedim> &triangulation)
+  {
+    Assert (scaling_factor>0, ExcScalingFactorNotPositive (scaling_factor));
+    transform (Scale<spacedim>(scaling_factor), triangulation);
+  }
+
+
+  namespace
+  {
+    /**
+     * Solve the Laplace equation for the @p laplace_transform function for one
+     * of the @p dim space dimensions. Factorized into a function of its own
+     * in order to allow parallel execution.
+     */
+    void laplace_solve (const SparseMatrix<double> &S,
+                        const std::map<unsigned int,double> &m,
+                        Vector<double> &u)
+    {
+      const unsigned int n_dofs=S.n();
+      FilteredMatrix<Vector<double> > SF (S);
+      PreconditionJacobi<SparseMatrix<double> > prec;
+      prec.initialize(S, 1.2);
+      FilteredMatrix<Vector<double> > PF (prec);
+
+      SolverControl control (n_dofs, 1.e-10, false, false);
+      GrowingVectorMemory<Vector<double> > mem;
+      SolverCG<Vector<double> > solver (control, mem);
+
+      Vector<double> f(n_dofs);
+
+      SF.add_constraints(m);
+      SF.apply_constraints (f, true);
+      solver.solve(SF, u, f, PF);
+    }
+  }
+
+
+
+  // Implementation for 1D only
+  template <>
+  void laplace_transform (const std::map<unsigned int,Point<1> > &,
+                          Triangulation<1> &,
+                          const Function<1> *)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+  // Implementation for dimensions except 1
+  template <int dim>
+  void
+  laplace_transform (const std::map<unsigned int,Point<dim> > &new_points,
+                     Triangulation<dim> &triangulation,
+                     const Function<dim> *coefficient)
+  {
+    // first provide everything that is needed for solving a Laplace
+    // equation.
+    FE_Q<dim> q1(1);
+
+    DoFHandler<dim> dof_handler(triangulation);
+    dof_handler.distribute_dofs(q1);
+
+    DynamicSparsityPattern dsp (dof_handler.n_dofs (),
+                                dof_handler.n_dofs ());
+    DoFTools::make_sparsity_pattern (dof_handler, dsp);
+    dsp.compress ();
+
+    SparsityPattern sparsity_pattern;
+    sparsity_pattern.copy_from (dsp);
+    sparsity_pattern.compress ();
+
+    SparseMatrix<double> S(sparsity_pattern);
+
+    QGauss<dim> quadrature(4);
+
+    MatrixCreator::create_laplace_matrix(StaticMappingQ1<dim>::mapping, dof_handler, quadrature, S, coefficient);
+
+    // set up the boundary values for
+    // the laplace problem
+    std::vector<std::map<unsigned int,double> > m(dim);
+    typename std::map<unsigned int,Point<dim> >::const_iterator map_end=new_points.end();
+
+    // fill these maps using the data
+    // given by new_points
+    typename DoFHandler<dim>::cell_iterator cell=dof_handler.begin_active(),
+                                            endc=dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+          {
+            const typename DoFHandler<dim>::face_iterator face=cell->face(face_no);
+
+            // loop over all vertices of the cell and see if it is listed in the map
+            // given as first argument of the function
+            for (unsigned int vertex_no=0;
+                 vertex_no<GeometryInfo<dim>::vertices_per_face; ++vertex_no)
+              {
+                const unsigned int vertex_index=face->vertex_index(vertex_no);
+
+                const typename std::map<unsigned int,Point<dim> >::const_iterator map_iter
+                  = new_points.find(vertex_index);
+
+                if (map_iter!=map_end)
+                  for (unsigned int i=0; i<dim; ++i)
+                    m[i].insert(std::pair<unsigned int,double> (
+                                  face->vertex_dof_index(vertex_no, 0), map_iter->second(i)));
+              }
+          }
+      }
+
+    // solve the dim problems with
+    // different right hand sides.
+    Vector<double> us[dim];
+    for (unsigned int i=0; i<dim; ++i)
+      us[i].reinit (dof_handler.n_dofs());
+
+    // solve linear systems in parallel
+    Threads::TaskGroup<> tasks;
+    for (unsigned int i=0; i<dim; ++i)
+      tasks += Threads::new_task (&laplace_solve,
+                                  S, m[i], us[i]);
+    tasks.join_all ();
+
+    // change the coordinates of the
+    // points of the triangulation
+    // according to the computed values
+    for (cell=dof_handler.begin_active(); cell!=endc; ++cell)
+      for (unsigned int vertex_no=0;
+           vertex_no<GeometryInfo<dim>::vertices_per_cell; ++vertex_no)
+        {
+          Point<dim> &v=cell->vertex(vertex_no);
+          const unsigned int dof_index=cell->vertex_dof_index(vertex_no, 0);
+          for (unsigned int i=0; i<dim; ++i)
+            v(i)=us[i](dof_index);
+        }
+  }
+
+  template <int dim, int spacedim>
+  std::map<unsigned int, Point<spacedim> >
+  get_all_vertices_at_boundary (const Triangulation<dim, spacedim> &tria)
+  {
+    std::map<unsigned int, Point<spacedim> > vertex_map;
+    typename Triangulation<dim,spacedim>::active_cell_iterator
+    cell = tria.begin_active(),
+    endc = tria.end();
+    for (; cell!=endc; ++cell)
+      {
+        for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+          {
+            const typename Triangulation<dim, spacedim>::face_iterator &face
+              = cell->face(i);
+            if (face->at_boundary())
+              {
+                for (unsigned j = 0; j < GeometryInfo<dim>::vertices_per_face; ++j)
+                  {
+                    const Point<spacedim> &vertex = face->vertex(j);
+                    const unsigned int vertex_index = face->vertex_index(j);
+                    vertex_map[vertex_index] = vertex;
+                  }
+              }
+          }
+      }
+    return vertex_map;
+  }
+
+  /**
+    * Distort a triangulation in
+    * some random way.
+    */
+  template <int dim, int spacedim>
+  void
+  distort_random (const double                 factor,
+                  Triangulation<dim,spacedim> &triangulation,
+                  const bool                   keep_boundary)
+  {
+    // if spacedim>dim we need to make sure that we perturb
+    // points but keep them on
+    // the manifold. however, this isn't implemented right now
+    Assert (spacedim == dim, ExcNotImplemented());
+
+
+    // find the smallest length of the
+    // lines adjacent to the
+    // vertex. take the initial value
+    // to be larger than anything that
+    // might be found: the diameter of
+    // the triangulation, here
+    // estimated by adding up the
+    // diameters of the coarse grid
+    // cells.
+    double almost_infinite_length = 0;
+    for (typename Triangulation<dim,spacedim>::cell_iterator
+         cell=triangulation.begin(0); cell!=triangulation.end(0); ++cell)
+      almost_infinite_length += cell->diameter();
+
+    std::vector<double> minimal_length (triangulation.n_vertices(),
+                                        almost_infinite_length);
+
+    // also note if a vertex is at the boundary
+    std::vector<bool>   at_boundary (triangulation.n_vertices(), false);
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell=triangulation.begin_active(); cell!=triangulation.end(); ++cell)
+      if (cell->is_locally_owned())
+        {
+          if (dim>1)
+            {
+              for (unsigned int i=0; i<GeometryInfo<dim>::lines_per_cell; ++i)
+                {
+                  const typename Triangulation<dim,spacedim>::line_iterator line
+                    = cell->line(i);
+
+                  if (keep_boundary && line->at_boundary())
+                    {
+                      at_boundary[line->vertex_index(0)] = true;
+                      at_boundary[line->vertex_index(1)] = true;
+                    }
+
+                  minimal_length[line->vertex_index(0)]
+                    = std::min(line->diameter(),
+                               minimal_length[line->vertex_index(0)]);
+                  minimal_length[line->vertex_index(1)]
+                    = std::min(line->diameter(),
+                               minimal_length[line->vertex_index(1)]);
+                }
+            }
+          else //dim==1
+            {
+              if (keep_boundary)
+                for (unsigned int vertex=0; vertex<2; ++vertex)
+                  if (cell->at_boundary(vertex) == true)
+                    at_boundary[cell->vertex_index(vertex)] = true;
+
+              minimal_length[cell->vertex_index(0)]
+                = std::min(cell->diameter(),
+                           minimal_length[cell->vertex_index(0)]);
+              minimal_length[cell->vertex_index(1)]
+                = std::min(cell->diameter(),
+                           minimal_length[cell->vertex_index(1)]);
+            }
+        }
+
+    // create a random number generator for the interval [-1,1]. we use
+    // this to make sure the distribution we get is repeatable, i.e.,
+    // if you call the function twice on the same mesh, then you will
+    // get the same mesh. this would not be the case if you used
+    // the rand() function, which carries around some internal state
+    boost::random::mt19937 rng;
+    boost::random::uniform_real_distribution<> uniform_distribution(-1,1);
+
+    // If the triangulation is distributed, we need to
+    // exchange the moved vertices across mpi processes
+    if (parallel::distributed::Triangulation< dim, spacedim > *distributed_triangulation
+        = dynamic_cast<parallel::distributed::Triangulation<dim,spacedim>*> (&triangulation))
+      {
+        const std::vector<bool> locally_owned_vertices = get_locally_owned_vertices(triangulation);
+        std::vector<bool>       vertex_moved (triangulation.n_vertices(), false);
+
+        // Next move vertices on locally owned cells
+        for (typename Triangulation<dim,spacedim>::active_cell_iterator
+             cell=triangulation.begin_active(); cell!=triangulation.end(); ++cell)
+          if (cell->is_locally_owned())
+            {
+              for (unsigned int vertex_no=0; vertex_no<GeometryInfo<dim>::vertices_per_cell;
+                   ++vertex_no)
+                {
+                  const unsigned global_vertex_no = cell->vertex_index(vertex_no);
+
+                  // ignore this vertex if we shall keep the boundary and
+                  // this vertex *is* at the boundary, if it is already moved
+                  // or if another process moves this vertex
+                  if ((keep_boundary && at_boundary[global_vertex_no])
+                      || vertex_moved[global_vertex_no]
+                      || !locally_owned_vertices[global_vertex_no])
+                    continue;
+
+                  // first compute a random shift vector
+                  Point<spacedim> shift_vector;
+                  for (unsigned int d=0; d<spacedim; ++d)
+                    shift_vector(d) = uniform_distribution(rng);
+
+                  shift_vector *= factor * minimal_length[global_vertex_no] /
+                                  std::sqrt(shift_vector.square());
+
+                  // finally move the vertex
+                  cell->vertex(vertex_no) += shift_vector;
+                  vertex_moved[global_vertex_no] = true;
+                }
+            }
+
+#ifdef DEAL_II_WITH_P4EST
+        distributed_triangulation
+        ->communicate_locally_moved_vertices(locally_owned_vertices);
+#else
+        (void)distributed_triangulation;
+        Assert (false, ExcInternalError());
+#endif
+      }
+    else
+      // if this is a sequential triangulation, we could in principle
+      // use the algorithm above, but we'll use an algorithm that we used
+      // before the parallel::distributed::Triangulation was introduced
+      // in order to preserve backward compatibility
+      {
+        // loop over all vertices and compute their new locations
+        const unsigned int n_vertices = triangulation.n_vertices();
+        std::vector<Point<spacedim> > new_vertex_locations (n_vertices);
+        const std::vector<Point<spacedim> > &old_vertex_locations
+          = triangulation.get_vertices();
+
+        for (unsigned int vertex=0; vertex<n_vertices; ++vertex)
+          {
+            // ignore this vertex if we will keep the boundary and
+            // this vertex *is* at the boundary
+            if (keep_boundary && at_boundary[vertex])
+              new_vertex_locations[vertex] = old_vertex_locations[vertex];
+            else
+              {
+                // compute a random shift vector
+                Point<spacedim> shift_vector;
+                for (unsigned int d=0; d<spacedim; ++d)
+                  shift_vector(d) = uniform_distribution(rng);
+
+                shift_vector *= factor * minimal_length[vertex] /
+                                std::sqrt(shift_vector.square());
+
+                // record new vertex location
+                new_vertex_locations[vertex] = old_vertex_locations[vertex] + shift_vector;
+              }
+          }
+
+        // now do the actual move of the vertices
+        for (typename Triangulation<dim,spacedim>::active_cell_iterator
+             cell=triangulation.begin_active(); cell!=triangulation.end(); ++cell)
+          for (unsigned int vertex_no=0;
+               vertex_no<GeometryInfo<dim>::vertices_per_cell; ++vertex_no)
+            cell->vertex(vertex_no) = new_vertex_locations[cell->vertex_index(vertex_no)];
+      }
+
+    // Correct hanging nodes if necessary
+    if (dim>=2)
+      {
+        // We do the same as in GridTools::transform
+        //
+        // exclude hanging nodes at the boundaries of artificial cells:
+        // these may belong to ghost cells for which we know the exact
+        // location of vertices, whereas the artificial cell may or may
+        // not be further refined, and so we cannot know whether
+        // the location of the hanging node is correct or not
+        typename Triangulation<dim,spacedim>::active_cell_iterator
+        cell = triangulation.begin_active(),
+        endc = triangulation.end();
+        for (; cell!=endc; ++cell)
+          if (!cell->is_artificial())
+            for (unsigned int face=0;
+                 face<GeometryInfo<dim>::faces_per_cell; ++face)
+              if (cell->face(face)->has_children() &&
+                  !cell->face(face)->at_boundary())
+                {
+                  // this face has hanging nodes
+                  if (dim==2)
+                    cell->face(face)->child(0)->vertex(1)
+                      = (cell->face(face)->vertex(0) +
+                         cell->face(face)->vertex(1)) / 2;
+                  else if (dim==3)
+                    {
+                      cell->face(face)->child(0)->vertex(1)
+                        = .5*(cell->face(face)->vertex(0)
+                              +cell->face(face)->vertex(1));
+                      cell->face(face)->child(0)->vertex(2)
+                        = .5*(cell->face(face)->vertex(0)
+                              +cell->face(face)->vertex(2));
+                      cell->face(face)->child(1)->vertex(3)
+                        = .5*(cell->face(face)->vertex(1)
+                              +cell->face(face)->vertex(3));
+                      cell->face(face)->child(2)->vertex(3)
+                        = .5*(cell->face(face)->vertex(2)
+                              +cell->face(face)->vertex(3));
+
+                      // center of the face
+                      cell->face(face)->child(0)->vertex(3)
+                        = .25*(cell->face(face)->vertex(0)
+                               +cell->face(face)->vertex(1)
+                               +cell->face(face)->vertex(2)
+                               +cell->face(face)->vertex(3));
+                    }
+                }
+      }
+  }
+
+
+
+  template <int dim, template <int, int> class MeshType, int spacedim>
+  unsigned int
+  find_closest_vertex (const MeshType<dim,spacedim> &mesh,
+                       const Point<spacedim>        &p)
+  {
+    // first get the underlying
+    // triangulation from the
+    // mesh and determine vertices
+    // and used vertices
+    const Triangulation<dim, spacedim> &tria = mesh.get_triangulation();
+
+    const std::vector< Point<spacedim> > &vertices = tria.get_vertices();
+    const std::vector< bool       > &used     = tria.get_used_vertices();
+
+    // At the beginning, the first
+    // used vertex is the closest one
+    std::vector<bool>::const_iterator first =
+      std::find(used.begin(), used.end(), true);
+
+    // Assert that at least one vertex
+    // is actually used
+    Assert(first != used.end(), ExcInternalError());
+
+    unsigned int best_vertex = std::distance(used.begin(), first);
+    double       best_dist   = (p - vertices[best_vertex]).norm_square();
+
+    // For all remaining vertices, test
+    // whether they are any closer
+    for (unsigned int j = best_vertex+1; j < vertices.size(); j++)
+      if (used[j])
+        {
+          double dist = (p - vertices[j]).norm_square();
+          if (dist < best_dist)
+            {
+              best_vertex = j;
+              best_dist   = dist;
+            }
+        }
+
+    return best_vertex;
+  }
+
+
+  template<int dim, template<int, int> class MeshType, int spacedim>
+#ifndef _MSC_VER
+  std::vector<typename MeshType<dim, spacedim>::active_cell_iterator>
+#else
+  std::vector<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type>
+#endif
+  find_cells_adjacent_to_vertex(const MeshType<dim,spacedim> &mesh,
+                                const unsigned int            vertex)
+  {
+    // make sure that the given vertex is
+    // an active vertex of the underlying
+    // triangulation
+    Assert(vertex < mesh.get_triangulation().n_vertices(),
+           ExcIndexRange(0,mesh.get_triangulation().n_vertices(),vertex));
+    Assert(mesh.get_triangulation().get_used_vertices()[vertex],
+           ExcVertexNotUsed(vertex));
+
+    // use a set instead of a vector
+    // to ensure that cells are inserted only
+    // once
+    std::set<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type> adjacent_cells;
+
+    typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type
+    cell = mesh.begin_active(),
+    endc = mesh.end();
+
+    // go through all active cells and look if the vertex is part of that cell
+    //
+    // in 1d, this is all we need to care about. in 2d/3d we also need to worry
+    // that the vertex might be a hanging node on a face or edge of a cell; in
+    // this case, we would want to add those cells as well on whose faces the
+    // vertex is located but for which it is not a vertex itself.
+    //
+    // getting this right is a lot simpler in 2d than in 3d. in 2d, a hanging
+    // node can only be in the middle of a face and we can query the neighboring
+    // cell from the current cell. on the other hand, in 3d a hanging node
+    // vertex can also be on an edge but there can be many other cells on
+    // this edge and we can not access them from the cell we are currently
+    // on.
+    //
+    // so, in the 3d case, if we run the algorithm as in 2d, we catch all
+    // those cells for which the vertex we seek is on a *subface*, but we
+    // miss the case of cells for which the vertex we seek is on a
+    // sub-edge for which there is no corresponding sub-face (because the
+    // immediate neighbor behind this face is not refined), see for example
+    // the bits/find_cells_adjacent_to_vertex_6 testcase. thus, if we
+    // haven't yet found the vertex for the current cell we also need to
+    // look at the mid-points of edges
+    //
+    // as a final note, deciding whether a neighbor is actually coarser is
+    // simple in the case of isotropic refinement (we just need to look at
+    // the level of the current and the neighboring cell). however, this
+    // isn't so simple if we have used anisotropic refinement since then
+    // the level of a cell is not indicative of whether it is coarser or
+    // not than the current cell. ultimately, we want to add all cells on
+    // which the vertex is, independent of whether they are coarser or
+    // finer and so in the 2d case below we simply add *any* *active* neighbor.
+    // in the worst case, we add cells multiple times to the adjacent_cells
+    // list, but std::set throws out those cells already entered
+    for (; cell != endc; ++cell)
+      {
+        for (unsigned int v = 0; v < GeometryInfo<dim>::vertices_per_cell; v++)
+          if (cell->vertex_index(v) == vertex)
+            {
+              // OK, we found a cell that contains
+              // the given vertex. We add it
+              // to the list.
+              adjacent_cells.insert(cell);
+
+              // as explained above, in 2+d we need to check whether
+              // this vertex is on a face behind which there is a
+              // (possibly) coarser neighbor. if this is the case,
+              // then we need to also add this neighbor
+              if (dim >= 2)
+                for (unsigned int vface = 0; vface < dim; vface++)
+                  {
+                    const unsigned int face =
+                      GeometryInfo<dim>::vertex_to_face[v][vface];
+
+                    if (!cell->at_boundary(face)
+                        &&
+                        cell->neighbor(face)->active())
+                      {
+                        // there is a (possibly) coarser cell behind a
+                        // face to which the vertex belongs. the
+                        // vertex we are looking at is then either a
+                        // vertex of that coarser neighbor, or it is a
+                        // hanging node on one of the faces of that
+                        // cell. in either case, it is adjacent to the
+                        // vertex, so add it to the list as well (if
+                        // the cell was already in the list then the
+                        // std::set makes sure that we get it only
+                        // once)
+                        adjacent_cells.insert (cell->neighbor(face));
+                      }
+                  }
+
+              // in any case, we have found a cell, so go to the next cell
+              goto next_cell;
+            }
+
+        // in 3d also loop over the edges
+        if (dim >= 3)
+          {
+            for (unsigned int e=0; e<GeometryInfo<dim>::lines_per_cell; ++e)
+              if (cell->line(e)->has_children())
+                // the only place where this vertex could have been
+                // hiding is on the mid-edge point of the edge we
+                // are looking at
+                if (cell->line(e)->child(0)->vertex_index(1) == vertex)
+                  {
+                    adjacent_cells.insert(cell);
+
+                    // jump out of this tangle of nested loops
+                    goto next_cell;
+                  }
+          }
+
+        // in more than 3d we would probably have to do the same as
+        // above also for even lower-dimensional objects
+        Assert (dim <= 3, ExcNotImplemented());
+
+        // move on to the next cell if we have found the
+        // vertex on the current one
+next_cell:
+        ;
+      }
+
+    // if this was an active vertex then there needs to have been
+    // at least one cell to which it is adjacent!
+    Assert (adjacent_cells.size() > 0, ExcInternalError());
+
+    // return the result as a vector, rather than the set we built above
+    return
+      std::vector<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type>
+      (adjacent_cells.begin(), adjacent_cells.end());
+  }
+
+
+
+  namespace
+  {
+    template <int dim, template<int, int> class MeshType, int spacedim>
+    void find_active_cell_around_point_internal
+    (const MeshType<dim,spacedim> &mesh,
+#ifndef _MSC_VER
+     std::set<typename MeshType<dim, spacedim>::active_cell_iterator> &searched_cells,
+     std::set<typename MeshType<dim, spacedim>::active_cell_iterator> &adjacent_cells)
+#else
+     std::set<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type> &searched_cells,
+     std::set<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type> &adjacent_cells)
+#endif
+    {
+#ifndef _MSC_VER
+      typedef typename MeshType<dim, spacedim>::active_cell_iterator cell_iterator;
+#else
+      typedef typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type cell_iterator;
+#endif
+
+      // update the searched cells
+      searched_cells.insert(adjacent_cells.begin(), adjacent_cells.end());
+      // now we to collect all neighbors
+      // of the cells in adjacent_cells we
+      // have not yet searched.
+      std::set<cell_iterator> adjacent_cells_new;
+
+      typename std::set<cell_iterator>::const_iterator
+      cell = adjacent_cells.begin(),
+      endc = adjacent_cells.end();
+      for (; cell != endc; ++cell)
+        {
+          std::vector<cell_iterator> active_neighbors;
+          get_active_neighbors<MeshType<dim, spacedim> >(*cell, active_neighbors);
+          for (unsigned int i=0; i<active_neighbors.size(); ++i)
+            if (searched_cells.find(active_neighbors[i]) == searched_cells.end())
+              adjacent_cells_new.insert(active_neighbors[i]);
+        }
+      adjacent_cells.clear();
+      adjacent_cells.insert(adjacent_cells_new.begin(), adjacent_cells_new.end());
+      if (adjacent_cells.size() == 0)
+        {
+          // we haven't found any other cell that would be a
+          // neighbor of a previously found cell, but we know
+          // that we haven't checked all cells yet. that means
+          // that the domain is disconnected. in that case,
+          // choose the first previously untouched cell we
+          // can find
+          cell_iterator it = mesh.begin_active();
+          for ( ; it!=mesh.end(); ++it)
+            if (searched_cells.find(it) == searched_cells.end())
+              {
+                adjacent_cells.insert(it);
+                break;
+              }
+        }
+    }
+  }
+
+  template <int dim, template<int, int> class MeshType, int spacedim>
+#ifndef _MSC_VER
+  typename MeshType<dim, spacedim>::active_cell_iterator
+#else
+  typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type
+#endif
+  find_active_cell_around_point (const MeshType<dim,spacedim> &mesh,
+                                 const Point<spacedim>        &p)
+  {
+    return
+      find_active_cell_around_point<dim,MeshType,spacedim>
+      (StaticMappingQ1<dim,spacedim>::mapping,
+       mesh, p).first;
+  }
+
+
+  template <int dim, template <int, int> class MeshType, int spacedim>
+#ifndef _MSC_VER
+  std::pair<typename MeshType<dim, spacedim>::active_cell_iterator, Point<dim> >
+#else
+  std::pair<typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type, Point<dim> >
+#endif
+  find_active_cell_around_point (const Mapping<dim,spacedim>  &mapping,
+                                 const MeshType<dim,spacedim> &mesh,
+                                 const Point<spacedim>        &p)
+  {
+    typedef typename dealii::internal::ActiveCellIterator<dim, spacedim, MeshType<dim, spacedim> >::type active_cell_iterator;
+
+    // The best distance is set to the
+    // maximum allowable distance from
+    // the unit cell; we assume a
+    // max. deviation of 1e-10
+    double best_distance = 1e-10;
+    int    best_level = -1;
+    std::pair<active_cell_iterator, Point<dim> > best_cell;
+
+    // Find closest vertex and determine
+    // all adjacent cells
+    std::vector<active_cell_iterator> adjacent_cells_tmp
+      = find_cells_adjacent_to_vertex(mesh,
+                                      find_closest_vertex(mesh, p));
+
+    // Make sure that we have found
+    // at least one cell adjacent to vertex.
+    Assert(adjacent_cells_tmp.size()>0, ExcInternalError());
+
+    // Copy all the cells into a std::set
+    std::set<active_cell_iterator> adjacent_cells (adjacent_cells_tmp.begin(),
+                                                   adjacent_cells_tmp.end());
+    std::set<active_cell_iterator> searched_cells;
+
+    // Determine the maximal number of cells
+    // in the grid.
+    // As long as we have not found
+    // the cell and have not searched
+    // every cell in the triangulation,
+    // we keep on looking.
+    const unsigned int n_active_cells = mesh.get_triangulation().n_active_cells();
+    bool found = false;
+    unsigned int cells_searched = 0;
+    while (!found && cells_searched < n_active_cells)
+      {
+        typename std::set<active_cell_iterator>::const_iterator
+        cell = adjacent_cells.begin(),
+        endc = adjacent_cells.end();
+        for (; cell != endc; ++cell)
+          {
+            try
+              {
+                const Point<dim> p_cell = mapping.transform_real_to_unit_cell(*cell, p);
+
+                // calculate the infinity norm of
+                // the distance vector to the unit cell.
+                const double dist = GeometryInfo<dim>::distance_to_unit_cell(p_cell);
+
+                // We compare if the point is inside the
+                // unit cell (or at least not too far
+                // outside). If it is, it is also checked
+                // that the cell has a more refined state
+                if ((dist < best_distance)
+                    ||
+                    ((dist == best_distance)
+                     &&
+                     ((*cell)->level() > best_level)))
+                  {
+                    found         = true;
+                    best_distance = dist;
+                    best_level    = (*cell)->level();
+                    best_cell     = std::make_pair(*cell, p_cell);
+                  }
+              }
+            catch (typename MappingQGeneric<dim,spacedim>::ExcTransformationFailed &)
+              {
+                // ok, the transformation
+                // failed presumably
+                // because the point we
+                // are looking for lies
+                // outside the current
+                // cell. this means that
+                // the current cell can't
+                // be the cell around the
+                // point, so just ignore
+                // this cell and move on
+                // to the next
+              }
+          }
+
+        // update the number of cells searched
+        cells_searched += adjacent_cells.size();
+
+        // if we have not found the cell in
+        // question and have not yet searched every
+        // cell, we expand our search to
+        // all the not already searched neighbors of
+        // the cells in adjacent_cells. This is
+        // what find_active_cell_around_point_internal
+        // is for.
+        if (!found && cells_searched < n_active_cells)
+          {
+            find_active_cell_around_point_internal<dim,MeshType,spacedim>
+            (mesh, searched_cells, adjacent_cells);
+          }
+      }
+
+    AssertThrow (best_cell.first.state() == IteratorState::valid,
+                 ExcPointNotFound<spacedim>(p));
+
+    return best_cell;
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::pair<typename hp::DoFHandler<dim,spacedim>::active_cell_iterator, Point<dim> >
+  find_active_cell_around_point (const hp::MappingCollection<dim,spacedim>   &mapping,
+                                 const hp::DoFHandler<dim,spacedim> &mesh,
+                                 const Point<spacedim>     &p)
+  {
+    Assert ((mapping.size() == 1) ||
+            (mapping.size() == mesh.get_fe().size()),
+            ExcMessage ("Mapping collection needs to have either size 1 "
+                        "or size equal to the number of elements in "
+                        "the FECollection."));
+
+    typedef typename hp::DoFHandler<dim,spacedim>::active_cell_iterator cell_iterator;
+
+    std::pair<cell_iterator, Point<dim> > best_cell;
+    //If we have only one element in the MappingCollection,
+    //we use find_active_cell_around_point using only one
+    //mapping.
+    if (mapping.size() == 1)
+      best_cell = find_active_cell_around_point(mapping[0], mesh, p);
+    else
+      {
+
+
+        // The best distance is set to the
+        // maximum allowable distance from
+        // the unit cell; we assume a
+        // max. deviation of 1e-10
+        double best_distance = 1e-10;
+        int    best_level = -1;
+
+
+        // Find closest vertex and determine
+        // all adjacent cells
+        unsigned int vertex = find_closest_vertex(mesh, p);
+
+        std::vector<cell_iterator> adjacent_cells_tmp =
+          find_cells_adjacent_to_vertex(mesh, vertex);
+
+        // Make sure that we have found
+        // at least one cell adjacent to vertex.
+        Assert(adjacent_cells_tmp.size()>0, ExcInternalError());
+
+        // Copy all the cells into a std::set
+        std::set<cell_iterator> adjacent_cells(adjacent_cells_tmp.begin(), adjacent_cells_tmp.end());
+        std::set<cell_iterator> searched_cells;
+
+        // Determine the maximal number of cells
+        // in the grid.
+        // As long as we have not found
+        // the cell and have not searched
+        // every cell in the triangulation,
+        // we keep on looking.
+        const unsigned int n_cells = mesh.get_triangulation().n_cells();
+        bool found = false;
+        unsigned int cells_searched = 0;
+        while (!found && cells_searched < n_cells)
+          {
+            typename std::set<cell_iterator>::const_iterator
+            cell = adjacent_cells.begin(),
+            endc = adjacent_cells.end();
+            for (; cell != endc; ++cell)
+              {
+                try
+                  {
+                    const Point<dim> p_cell = mapping[(*cell)->active_fe_index()].transform_real_to_unit_cell(*cell, p);
+
+
+                    // calculate the infinity norm of
+                    // the distance vector to the unit cell.
+                    const double dist = GeometryInfo<dim>::distance_to_unit_cell(p_cell);
+
+                    // We compare if the point is inside the
+                    // unit cell (or at least not too far
+                    // outside). If it is, it is also checked
+                    // that the cell has a more refined state
+                    if (dist < best_distance ||
+                        (dist == best_distance && (*cell)->level() > best_level))
+                      {
+                        found       = true;
+                        best_distance = dist;
+                        best_level    = (*cell)->level();
+                        best_cell     = std::make_pair(*cell, p_cell);
+                      }
+                  }
+                catch (typename MappingQGeneric<dim,spacedim>::ExcTransformationFailed &)
+                  {
+                    // ok, the transformation
+                    // failed presumably
+                    // because the point we
+                    // are looking for lies
+                    // outside the current
+                    // cell. this means that
+                    // the current cell can't
+                    // be the cell around the
+                    // point, so just ignore
+                    // this cell and move on
+                    // to the next
+                  }
+              }
+            //udpate the number of cells searched
+            cells_searched += adjacent_cells.size();
+            // if we have not found the cell in
+            // question and have not yet searched every
+            // cell, we expand our search to
+            // all the not already searched neighbors of
+            // the cells in adjacent_cells.
+            if (!found && cells_searched < n_cells)
+              {
+                find_active_cell_around_point_internal<dim,hp::DoFHandler,spacedim>
+                (mesh, searched_cells, adjacent_cells);
+              }
+
+          }
+      }
+
+    AssertThrow (best_cell.first.state() == IteratorState::valid,
+                 ExcPointNotFound<spacedim>(p));
+
+    return best_cell;
+  }
+
+
+  namespace
+  {
+
+    template<class MeshType>
+    bool
+    contains_locally_owned_cells (const std::vector<typename MeshType::active_cell_iterator> &cells)
+    {
+      for (typename std::vector<typename MeshType::active_cell_iterator>::const_iterator
+           it = cells.begin(); it != cells.end(); ++it)
+        {
+          if ((*it)->is_locally_owned())
+            return true;
+        }
+      return false;
+    }
+
+    template<class MeshType>
+    bool
+    contains_artificial_cells (const std::vector<typename MeshType::active_cell_iterator> &cells)
+    {
+      for (typename std::vector<typename MeshType::active_cell_iterator>::const_iterator
+           it = cells.begin(); it != cells.end(); ++it)
+        {
+          if ((*it)->is_artificial())
+            return true;
+        }
+      return false;
+    }
+
+  }
+
+
+
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  compute_active_cell_halo_layer
+  (const MeshType                                                                    &mesh,
+   const std_cxx11::function<bool (const typename MeshType::active_cell_iterator &)> &predicate)
+  {
+    std::vector<typename MeshType::active_cell_iterator> active_halo_layer;
+    std::vector<bool> locally_active_vertices_on_subdomain (mesh.get_triangulation().n_vertices(),
+                                                            false);
+
+    // Find the cells for which the predicate is true
+    // These are the cells around which we wish to construct
+    // the halo layer
+    for (typename MeshType::active_cell_iterator
+         cell = mesh.begin_active();
+         cell != mesh.end(); ++cell)
+      if (predicate(cell)) // True predicate --> Part of subdomain
+        for (unsigned int v=0; v<GeometryInfo<MeshType::dimension>::vertices_per_cell; ++v)
+          locally_active_vertices_on_subdomain[cell->vertex_index(v)] = true;
+
+    // Find the cells that do not conform to the predicate
+    // but share a vertex with the selected subdomain
+    // These comprise the halo layer
+    for (typename MeshType::active_cell_iterator
+         cell = mesh.begin_active();
+         cell != mesh.end(); ++cell)
+      if (!predicate(cell)) // False predicate --> Potential halo cell
+        for (unsigned int v=0; v<GeometryInfo<MeshType::dimension>::vertices_per_cell; ++v)
+          if (locally_active_vertices_on_subdomain[cell->vertex_index(v)] == true)
+            {
+              active_halo_layer.push_back(cell);
+              break;
+            }
+
+    return active_halo_layer;
+  }
+
+
+
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  compute_ghost_cell_halo_layer (const MeshType &mesh)
+  {
+    std_cxx11::function<bool (const typename MeshType::active_cell_iterator &)> predicate
+      = IteratorFilters::LocallyOwnedCell();
+
+    const std::vector<typename MeshType::active_cell_iterator>
+    active_halo_layer = compute_active_cell_halo_layer (mesh, predicate);
+
+    // Check that we never return locally owned or artificial cells
+    // What is left should only be the ghost cells
+    Assert(contains_locally_owned_cells<MeshType>(active_halo_layer) == false,
+           ExcMessage("Halo layer contains locally owned cells"));
+    Assert(contains_artificial_cells<MeshType>(active_halo_layer) == false,
+           ExcMessage("Halo layer contains artificial cells"));
+
+    return active_halo_layer;
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::vector<std::set<typename Triangulation<dim,spacedim>::active_cell_iterator> >
+  vertex_to_cell_map(const Triangulation<dim,spacedim> &triangulation)
+  {
+    std::vector<std::set<typename Triangulation<dim,spacedim>::active_cell_iterator> >
+    vertex_to_cell_map(triangulation.n_vertices());
+    typename Triangulation<dim,spacedim>::active_cell_iterator cell = triangulation.begin_active(),
+                                                               endc = triangulation.end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+        vertex_to_cell_map[cell->vertex_index(i)].insert(cell);
+
+    // Take care of hanging nodes
+    cell = triangulation.begin_active();
+    for (; cell!=endc; ++cell)
+      {
+        for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+          {
+            if ((cell->at_boundary(i)==false) && (cell->neighbor(i)->active()))
+              {
+                typename Triangulation<dim,spacedim>::active_cell_iterator adjacent_cell =
+                  cell->neighbor(i);
+                for (unsigned int j=0; j<GeometryInfo<dim>::vertices_per_face; ++j)
+                  vertex_to_cell_map[cell->face(i)->vertex_index(j)].insert(adjacent_cell);
+              }
+          }
+
+        // in 3d also loop over the edges
+        if (dim==3)
+          {
+            for (unsigned int i=0; i<GeometryInfo<dim>::lines_per_cell; ++i)
+              if (cell->line(i)->has_children())
+                // the only place where this vertex could have been
+                // hiding is on the mid-edge point of the edge we
+                // are looking at
+                vertex_to_cell_map[cell->line(i)->child(0)->vertex_index(1)].insert(cell);
+          }
+      }
+
+    return vertex_to_cell_map;
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::map<unsigned int,types::global_vertex_index>
+  compute_local_to_global_vertex_index_map(
+    const parallel::distributed::Triangulation<dim,spacedim> &triangulation)
+  {
+    std::map<unsigned int,types::global_vertex_index> local_to_global_vertex_index;
+
+#ifndef DEAL_II_WITH_MPI
+
+    // without MPI, this function doesn't make sense because on cannot
+    // use parallel::distributed::Triangulation in any meaninful
+    // way
+    (void)triangulation;
+    Assert (false, ExcMessage ("This function does not make any sense "
+                               "for parallel::distributed::Triangulation "
+                               "objects if you do not have MPI enabled."));
+
+#else
+
+    typedef typename Triangulation<dim,spacedim>::active_cell_iterator active_cell_iterator;
+    const std::vector<std::set<active_cell_iterator> > vertex_to_cell =
+      vertex_to_cell_map(triangulation);
+
+    // Create a local index for the locally "owned" vertices
+    types::global_vertex_index next_index = 0;
+    unsigned int max_cellid_size = 0;
+    std::set<std::pair<types::subdomain_id,types::global_vertex_index> > vertices_added;
+    std::map<types::subdomain_id,std::set<unsigned int> > vertices_to_recv;
+    std::map<types::subdomain_id,std::vector<std_cxx11::tuple<types::global_vertex_index,
+        types::global_vertex_index,std::string> > > vertices_to_send;
+    active_cell_iterator cell = triangulation.begin_active(),
+                         endc = triangulation.end();
+    std::set<active_cell_iterator> missing_vert_cells;
+    std::set<unsigned int> used_vertex_index;
+    for (; cell!=endc; ++cell)
+      {
+        if (cell->is_locally_owned())
+          {
+            for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+              {
+                types::subdomain_id lowest_subdomain_id = cell->subdomain_id();
+                typename std::set<active_cell_iterator>::iterator
+                adjacent_cell = vertex_to_cell[cell->vertex_index(i)].begin(),
+                end_adj_cell = vertex_to_cell[cell->vertex_index(i)].end();
+                for (; adjacent_cell!=end_adj_cell; ++adjacent_cell)
+                  lowest_subdomain_id = std::min(lowest_subdomain_id,
+                                                 (*adjacent_cell)->subdomain_id());
+
+                // See if I "own" this vertex
+                if (lowest_subdomain_id==cell->subdomain_id())
+                  {
+                    // Check that the vertex we are working on a vertex that has not be
+                    // dealt with yet
+                    if (used_vertex_index.find(cell->vertex_index(i))==used_vertex_index.end())
+                      {
+                        // Set the local index
+                        local_to_global_vertex_index[cell->vertex_index(i)] = next_index++;
+
+                        // Store the information that will be sent to the adjacent cells
+                        // on other subdomains
+                        adjacent_cell = vertex_to_cell[cell->vertex_index(i)].begin();
+                        std::vector<types::subdomain_id> subdomain_ids;
+                        for (; adjacent_cell!=end_adj_cell; ++adjacent_cell)
+                          if ((*adjacent_cell)->subdomain_id()!=cell->subdomain_id())
+                            {
+                              std::pair<types::subdomain_id,types::global_vertex_index>
+                              tmp((*adjacent_cell)->subdomain_id(), cell->vertex_index(i));
+                              if (vertices_added.find(tmp)==vertices_added.end())
+                                {
+                                  vertices_to_send[(*adjacent_cell)->subdomain_id()].push_back(
+                                    std_cxx11::tuple<types::global_vertex_index,types::global_vertex_index,
+                                    std::string> (i,cell->vertex_index(i),
+                                                  cell->id().to_string()));
+                                  if (cell->id().to_string().size() > max_cellid_size)
+                                    max_cellid_size = cell->id().to_string().size();
+                                  vertices_added.insert(tmp);
+                                }
+                            }
+                        used_vertex_index.insert(cell->vertex_index(i));
+                      }
+                  }
+                else
+                  {
+                    // We don't own the vertex so we will receive its global index
+                    vertices_to_recv[lowest_subdomain_id].insert(cell->vertex_index(i));
+                    missing_vert_cells.insert(cell);
+                  }
+              }
+          }
+
+        // Some hanging nodes are vertices of ghost cells. They need to be
+        // received.
+        if (cell->is_ghost())
+          {
+            for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+              {
+                if (cell->at_boundary(i)==false)
+                  {
+                    if (cell->neighbor(i)->active())
+                      {
+                        typename Triangulation<dim,spacedim>::active_cell_iterator adjacent_cell =
+                          cell->neighbor(i);
+                        if ((adjacent_cell->is_locally_owned()))
+                          {
+                            types::subdomain_id adj_subdomain_id = adjacent_cell->subdomain_id();
+                            if (cell->subdomain_id()<adj_subdomain_id)
+                              for (unsigned int j=0; j<GeometryInfo<dim>::vertices_per_face; ++j)
+                                {
+                                  vertices_to_recv[cell->subdomain_id()].insert(cell->face(i)->vertex_index(j));
+                                  missing_vert_cells.insert(cell);
+                                }
+                          }
+                      }
+                  }
+              }
+          }
+      }
+
+    // Get the size of the largest CellID string
+    max_cellid_size = Utilities::MPI::max(max_cellid_size, triangulation.get_communicator());
+
+    // Make indices global by getting the number of vertices owned by each
+    // processors and shifting the indices accordingly
+    const unsigned int n_cpu = Utilities::MPI::n_mpi_processes(triangulation.get_communicator());
+    std::vector<types::global_vertex_index> indices(n_cpu);
+    MPI_Allgather(&next_index, 1, DEAL_II_DOF_INDEX_MPI_TYPE, &indices[0],
+                  indices.size(), DEAL_II_DOF_INDEX_MPI_TYPE, triangulation.get_communicator());
+    const types::global_vertex_index shift = std::accumulate(&indices[0],
+                                                             &indices[0]+triangulation.locally_owned_subdomain(),0);
+
+    std::map<unsigned int,types::global_vertex_index>::iterator
+    global_index_it = local_to_global_vertex_index.begin(),
+    global_index_end = local_to_global_vertex_index.end();
+    for (; global_index_it!=global_index_end; ++global_index_it)
+      global_index_it->second += shift;
+
+    // In a first message, send the global ID of the vertices and the local
+    // positions in the cells. In a second messages, send the cell ID as a
+    // resize string. This is done in two messages so that types are not mixed
+
+    // Send the first message
+    std::vector<std::vector<types::global_vertex_index> > vertices_send_buffers(
+      vertices_to_send.size());
+    std::vector<MPI_Request> first_requests(vertices_to_send.size());
+    typename std::map<types::subdomain_id,
+             std::vector<std_cxx11::tuple<types::global_vertex_index,
+             types::global_vertex_index,std::string> > >::iterator
+             vert_to_send_it = vertices_to_send.begin(),
+             vert_to_send_end = vertices_to_send.end();
+    for (unsigned int i=0; vert_to_send_it!=vert_to_send_end;
+         ++vert_to_send_it, ++i)
+      {
+        int destination = vert_to_send_it->first;
+        const unsigned int n_vertices = vert_to_send_it->second.size();
+        const int buffer_size = 2*n_vertices;
+        vertices_send_buffers[i].resize(buffer_size);
+
+        // fill the buffer
+        for (unsigned int j=0; j<n_vertices; ++j)
+          {
+            vertices_send_buffers[i][2*j] = std_cxx11::get<0>(vert_to_send_it->second[j]);
+            vertices_send_buffers[i][2*j+1] =
+              local_to_global_vertex_index[std_cxx11::get<1>(vert_to_send_it->second[j])];
+          }
+
+        // Send the message
+        MPI_Isend(&vertices_send_buffers[i][0],buffer_size,DEAL_II_VERTEX_INDEX_MPI_TYPE,
+                  destination, 0, triangulation.get_communicator(), &first_requests[i]);
+      }
+
+    // Receive the first message
+    std::vector<std::vector<types::global_vertex_index> > vertices_recv_buffers(
+      vertices_to_recv.size());
+    typename std::map<types::subdomain_id,std::set<unsigned int> >::iterator
+    vert_to_recv_it = vertices_to_recv.begin(),
+    vert_to_recv_end = vertices_to_recv.end();
+    for (unsigned int i=0; vert_to_recv_it!=vert_to_recv_end; ++vert_to_recv_it, ++i)
+      {
+        int source = vert_to_recv_it->first;
+        const unsigned int n_vertices = vert_to_recv_it->second.size();
+        const int buffer_size = 2*n_vertices;
+        vertices_recv_buffers[i].resize(buffer_size);
+
+        // Receive the message
+        MPI_Recv(&vertices_recv_buffers[i][0],buffer_size,DEAL_II_VERTEX_INDEX_MPI_TYPE,
+                 source, 0, triangulation.get_communicator(), MPI_STATUS_IGNORE);
+      }
+
+
+    // Send second message
+    std::vector<std::vector<char> > cellids_send_buffers(vertices_to_send.size());
+    std::vector<MPI_Request> second_requests(vertices_to_send.size());
+    vert_to_send_it = vertices_to_send.begin();
+    for (unsigned int i=0; vert_to_send_it!=vert_to_send_end;
+         ++vert_to_send_it, ++i)
+      {
+        int destination = vert_to_send_it->first;
+        const unsigned int n_vertices = vert_to_send_it->second.size();
+        const int buffer_size = max_cellid_size*n_vertices;
+        cellids_send_buffers[i].resize(buffer_size);
+
+        // fill the buffer
+        unsigned int pos = 0;
+        for (unsigned int j=0; j<n_vertices; ++j)
+          {
+            std::string cell_id = std_cxx11::get<2>(vert_to_send_it->second[j]);
+            for (unsigned int k=0; k<max_cellid_size; ++k, ++pos)
+              {
+                if (k<cell_id.size())
+                  cellids_send_buffers[i][pos] = cell_id[k];
+                // if necessary fill up the reserved part of the buffer with an
+                // invalid value
+                else
+                  cellids_send_buffers[i][pos] = '-';
+              }
+          }
+
+        // Send the message
+        MPI_Isend(&cellids_send_buffers[i][0], buffer_size, MPI_CHAR,
+                  destination, 0, triangulation.get_communicator(), &second_requests[i]);
+      }
+
+    // Receive the second message
+    std::vector<std::vector<char> > cellids_recv_buffers(vertices_to_recv.size());
+    vert_to_recv_it = vertices_to_recv.begin();
+    for (unsigned int i=0; vert_to_recv_it!=vert_to_recv_end; ++vert_to_recv_it, ++i)
+      {
+        int source = vert_to_recv_it->first;
+        const unsigned int n_vertices = vert_to_recv_it->second.size();
+        const int buffer_size = max_cellid_size*n_vertices;
+        cellids_recv_buffers[i].resize(buffer_size);
+
+        // Receive the message
+        MPI_Recv(&cellids_recv_buffers[i][0],buffer_size, MPI_CHAR,
+                 source, 0, triangulation.get_communicator(), MPI_STATUS_IGNORE);
+      }
+
+
+    // Match the data received with the required vertices
+    vert_to_recv_it = vertices_to_recv.begin();
+    for (unsigned int i=0; vert_to_recv_it!=vert_to_recv_end; ++i, ++vert_to_recv_it)
+      {
+        for (unsigned int j=0; j<vert_to_recv_it->second.size(); ++j)
+          {
+            const unsigned int local_pos_recv = vertices_recv_buffers[i][2*j];
+            const types::global_vertex_index global_id_recv = vertices_recv_buffers[i][2*j+1];
+            const std::string cellid_recv(&cellids_recv_buffers[i][max_cellid_size*j],
+                                          &cellids_recv_buffers[i][max_cellid_size*(j+1)]);
+            bool found = false;
+            typename std::set<active_cell_iterator>::iterator
+            cell_set_it = missing_vert_cells.begin(),
+            end_cell_set = missing_vert_cells.end();
+            for (; (found==false) && (cell_set_it!=end_cell_set); ++cell_set_it)
+              {
+                typename std::set<active_cell_iterator>::iterator
+                candidate_cell = vertex_to_cell[(*cell_set_it)->vertex_index(i)].begin(),
+                end_cell = vertex_to_cell[(*cell_set_it)->vertex_index(i)].end();
+                for (; candidate_cell!=end_cell; ++candidate_cell)
+                  {
+                    std::string current_cellid = (*candidate_cell)->id().to_string();
+                    current_cellid.resize(max_cellid_size,'-');
+                    if (current_cellid.compare(cellid_recv)==0)
+                      {
+                        local_to_global_vertex_index[(*candidate_cell)->vertex_index(local_pos_recv)] =
+                          global_id_recv;
+                        found = true;
+
+                        break;
+                      }
+                  }
+              }
+          }
+      }
+#endif
+
+    return local_to_global_vertex_index;
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  get_face_connectivity_of_cells (const Triangulation<dim,spacedim> &triangulation,
+                                  DynamicSparsityPattern            &cell_connectivity)
+  {
+    cell_connectivity.reinit (triangulation.n_active_cells(),
+                              triangulation.n_active_cells());
+
+    // create a map pair<lvl,idx> -> SparsityPattern index
+    // TODO: we are no longer using user_indices for this because we can get
+    // pointer/index clashes when saving/restoring them. The following approach
+    // works, but this map can get quite big. Not sure about more efficient solutions.
+    std::map< std::pair<unsigned int,unsigned int>, unsigned int >
+    indexmap;
+    for (typename dealii::internal::ActiveCellIterator<dim, spacedim, Triangulation<dim, spacedim> >::type
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      indexmap[std::pair<unsigned int,unsigned int>(cell->level(),cell->index())] = cell->active_cell_index();
+
+    // next loop over all cells and their neighbors to build the sparsity
+    // pattern. note that it's a bit hard to enter all the connections when a
+    // neighbor has children since we would need to find out which of its
+    // children is adjacent to the current cell. this problem can be omitted
+    // if we only do something if the neighbor has no children -- in that case
+    // it is either on the same or a coarser level than we are. in return, we
+    // have to add entries in both directions for both cells
+    for (typename dealii::internal::ActiveCellIterator<dim, spacedim, Triangulation<dim, spacedim> >::type
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      {
+        const unsigned int index = cell->active_cell_index();
+        cell_connectivity.add (index, index);
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if ((cell->at_boundary(f) == false)
+              &&
+              (cell->neighbor(f)->has_children() == false))
+            {
+              unsigned int other_index = indexmap.find(
+                                           std::pair<unsigned int,unsigned int>(cell->neighbor(f)->level(),cell->neighbor(f)->index()))->second;
+              cell_connectivity.add (index, other_index);
+              cell_connectivity.add (other_index, index);
+            }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  get_face_connectivity_of_cells (const Triangulation<dim,spacedim> &triangulation,
+                                  SparsityPattern                   &cell_connectivity)
+  {
+    DynamicSparsityPattern dsp;
+    get_face_connectivity_of_cells(triangulation, dsp);
+    cell_connectivity.copy_from(dsp);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  get_vertex_connectivity_of_cells (const Triangulation<dim,spacedim> &triangulation,
+                                    DynamicSparsityPattern            &cell_connectivity)
+  {
+    std::vector<std::vector<unsigned int> > vertex_to_cell(triangulation.n_vertices());
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator cell=
+           triangulation.begin_active(); cell != triangulation.end(); ++cell)
+      {
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+          vertex_to_cell[cell->vertex_index(v)].push_back(cell->active_cell_index());
+      }
+
+    cell_connectivity.reinit (triangulation.n_active_cells(),
+                              triangulation.n_active_cells());
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator cell=
+           triangulation.begin_active(); cell != triangulation.end(); ++cell)
+      {
+        for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+          for (unsigned int n=0; n<vertex_to_cell[cell->vertex_index(v)].size(); ++n)
+            cell_connectivity.add(cell->active_cell_index(), vertex_to_cell[cell->vertex_index(v)][n]);
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  partition_triangulation (const unsigned int           n_partitions,
+                           Triangulation<dim,spacedim> &triangulation)
+  {
+    Assert ((dynamic_cast<parallel::distributed::Triangulation<dim,spacedim>*>
+             (&triangulation)
+             == 0),
+            ExcMessage ("Objects of type parallel::distributed::Triangulation "
+                        "are already partitioned implicitly and can not be "
+                        "partitioned again explicitly."));
+    Assert (n_partitions > 0, ExcInvalidNumberOfPartitions(n_partitions));
+
+    // check for an easy return
+    if (n_partitions == 1)
+      {
+        for (typename dealii::internal::ActiveCellIterator<dim, spacedim, Triangulation<dim, spacedim> >::type
+             cell = triangulation.begin_active();
+             cell != triangulation.end(); ++cell)
+          cell->set_subdomain_id (0);
+        return;
+      }
+
+    // we decompose the domain by first
+    // generating the connection graph of all
+    // cells with their neighbors, and then
+    // passing this graph off to METIS.
+    // finally defer to the other function for
+    // partitioning and assigning subdomain ids
+    SparsityPattern cell_connectivity;
+    get_face_connectivity_of_cells (triangulation, cell_connectivity);
+
+    partition_triangulation (n_partitions,
+                             cell_connectivity,
+                             triangulation);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  partition_triangulation (const unsigned int           n_partitions,
+                           const SparsityPattern        &cell_connection_graph,
+                           Triangulation<dim,spacedim>  &triangulation)
+  {
+    Assert ((dynamic_cast<parallel::distributed::Triangulation<dim,spacedim>*>
+             (&triangulation)
+             == 0),
+            ExcMessage ("Objects of type parallel::distributed::Triangulation "
+                        "are already partitioned implicitly and can not be "
+                        "partitioned again explicitly."));
+    Assert (n_partitions > 0, ExcInvalidNumberOfPartitions(n_partitions));
+    Assert (cell_connection_graph.n_rows() == triangulation.n_active_cells(),
+            ExcMessage ("Connectivity graph has wrong size"));
+    Assert (cell_connection_graph.n_cols() == triangulation.n_active_cells(),
+            ExcMessage ("Connectivity graph has wrong size"));
+
+    // check for an easy return
+    if (n_partitions == 1)
+      {
+        for (typename dealii::internal::ActiveCellIterator<dim, spacedim, Triangulation<dim, spacedim> >::type
+             cell = triangulation.begin_active();
+             cell != triangulation.end(); ++cell)
+          cell->set_subdomain_id (0);
+        return;
+      }
+
+    // partition this connection graph and get
+    // back a vector of indices, one per degree
+    // of freedom (which is associated with a
+    // cell)
+    std::vector<unsigned int> partition_indices (triangulation.n_active_cells());
+    SparsityTools::partition (cell_connection_graph, n_partitions,  partition_indices);
+
+    // finally loop over all cells and set the
+    // subdomain ids
+    for (typename dealii::internal::ActiveCellIterator<dim, spacedim, Triangulation<dim, spacedim> >::type
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      cell->set_subdomain_id (partition_indices[cell->active_cell_index()]);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  get_subdomain_association (const Triangulation<dim, spacedim>  &triangulation,
+                             std::vector<types::subdomain_id> &subdomain)
+  {
+    Assert (subdomain.size() == triangulation.n_active_cells(),
+            ExcDimensionMismatch (subdomain.size(),
+                                  triangulation.n_active_cells()));
+    for (typename Triangulation<dim, spacedim>::active_cell_iterator
+         cell = triangulation.begin_active(); cell!=triangulation.end(); ++cell)
+      subdomain[cell->active_cell_index()] = cell->subdomain_id();
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  count_cells_with_subdomain_association (const Triangulation<dim, spacedim> &triangulation,
+                                          const types::subdomain_id       subdomain)
+  {
+    unsigned int count = 0;
+    for (typename Triangulation<dim, spacedim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell!=triangulation.end(); ++cell)
+      if (cell->subdomain_id() == subdomain)
+        ++count;
+
+    return count;
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::vector<bool>
+  get_locally_owned_vertices (const Triangulation<dim,spacedim> &triangulation)
+  {
+    // start with all vertices
+    std::vector<bool> locally_owned_vertices = triangulation.get_used_vertices();
+
+    // if the triangulation is distributed, eliminate those that
+    // are owned by other processors -- either because the vertex is
+    // on an artificial cell, or because it is on a ghost cell with
+    // a smaller subdomain
+    if (const parallel::distributed::Triangulation<dim,spacedim> *tr
+        = dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim> *>
+        (&triangulation))
+      for (typename dealii::internal::ActiveCellIterator<dim, spacedim, Triangulation<dim, spacedim> >::type
+           cell = triangulation.begin_active();
+           cell != triangulation.end(); ++cell)
+        if (cell->is_artificial()
+            ||
+            (cell->is_ghost() &&
+             (cell->subdomain_id() < tr->locally_owned_subdomain())))
+          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+            locally_owned_vertices[cell->vertex_index(v)] = false;
+
+    return locally_owned_vertices;
+  }
+
+
+
+  template <typename MeshType>
+  std::list<std::pair<typename MeshType::cell_iterator,
+      typename MeshType::cell_iterator> >
+      get_finest_common_cells (const MeshType &mesh_1,
+                               const MeshType &mesh_2)
+  {
+    Assert (have_same_coarse_mesh (mesh_1, mesh_2),
+            ExcMessage ("The two meshes must be represent triangulations that "
+                        "have the same coarse meshes"));
+
+    // the algorithm goes as follows:
+    // first, we fill a list with pairs
+    // of iterators common to the two
+    // meshes on the coarsest
+    // level. then we traverse the
+    // list; each time, we find a pair
+    // of iterators for which both
+    // correspond to non-active cells,
+    // we delete this item and push the
+    // pairs of iterators to their
+    // children to the back. if these
+    // again both correspond to
+    // non-active cells, we will get to
+    // the later on for further
+    // consideration
+    typedef
+    std::list<std::pair<typename MeshType::cell_iterator,
+        typename MeshType::cell_iterator> >
+        CellList;
+
+    CellList cell_list;
+
+    // first push the coarse level cells
+    typename MeshType::cell_iterator
+    cell_1 = mesh_1.begin(0),
+    cell_2 = mesh_2.begin(0);
+    for (; cell_1 != mesh_1.end(0); ++cell_1, ++cell_2)
+      cell_list.push_back (std::make_pair (cell_1, cell_2));
+
+    // then traverse list as described
+    // above
+    typename CellList::iterator cell_pair = cell_list.begin();
+    while (cell_pair != cell_list.end())
+      {
+        // if both cells in this pair
+        // have children, then erase
+        // this element and push their
+        // children instead
+        if (cell_pair->first->has_children()
+            &&
+            cell_pair->second->has_children())
+          {
+            Assert(cell_pair->first->refinement_case()==
+                   cell_pair->second->refinement_case(), ExcNotImplemented());
+            for (unsigned int c=0; c<cell_pair->first->n_children(); ++c)
+              cell_list.push_back (std::make_pair (cell_pair->first->child(c),
+                                                   cell_pair->second->child(c)));
+
+            // erasing an iterator
+            // keeps other iterators
+            // valid, so already
+            // advance the present
+            // iterator by one and then
+            // delete the element we've
+            // visited before
+            const typename CellList::iterator previous_cell_pair = cell_pair;
+            ++cell_pair;
+
+            cell_list.erase (previous_cell_pair);
+          }
+        else
+          // both cells are active, do
+          // nothing
+          ++cell_pair;
+      }
+
+    // just to make sure everything is ok,
+    // validate that all pairs have at least one
+    // active iterator or have different
+    // refinement_cases
+    for (cell_pair = cell_list.begin(); cell_pair != cell_list.end(); ++cell_pair)
+      Assert (cell_pair->first->active()
+              ||
+              cell_pair->second->active()
+              ||
+              (cell_pair->first->refinement_case()
+               != cell_pair->second->refinement_case()),
+              ExcInternalError());
+
+    return cell_list;
+  }
+
+  template <int dim, int spacedim>
+  bool
+  have_same_coarse_mesh (const Triangulation<dim, spacedim> &mesh_1,
+                         const Triangulation<dim, spacedim> &mesh_2)
+  {
+    // make sure the two meshes have
+    // the same number of coarse cells
+    if (mesh_1.n_cells (0) != mesh_2.n_cells (0))
+      return false;
+
+    // if so, also make sure they have
+    // the same vertices on the cells
+    // of the coarse mesh
+    typename Triangulation<dim, spacedim>::cell_iterator
+    cell_1 = mesh_1.begin(0),
+    cell_2 = mesh_2.begin(0),
+    endc   = mesh_1.end(0);
+    for (; cell_1!=endc; ++cell_1, ++cell_2)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        if (cell_1->vertex(v) != cell_2->vertex(v))
+          return false;
+
+    // if we've gotten through all
+    // this, then the meshes really
+    // seem to have a common coarse
+    // mesh
+    return true;
+  }
+
+
+
+  template <typename MeshType>
+  bool
+  have_same_coarse_mesh (const MeshType &mesh_1,
+                         const MeshType &mesh_2)
+  {
+    return have_same_coarse_mesh (mesh_1.get_triangulation(),
+                                  mesh_2.get_triangulation());
+  }
+
+
+
+  template <int dim, int spacedim>
+  double
+  minimal_cell_diameter (const Triangulation<dim, spacedim> &triangulation)
+  {
+    double min_diameter = triangulation.begin_active()->diameter();
+    for (typename Triangulation<dim, spacedim>::active_cell_iterator
+         cell = triangulation.begin_active(); cell != triangulation.end();
+         ++cell)
+      min_diameter = std::min (min_diameter,
+                               cell->diameter());
+    return min_diameter;
+  }
+
+
+
+  template <int dim, int spacedim>
+  double
+  maximal_cell_diameter (const Triangulation<dim, spacedim> &triangulation)
+  {
+    double max_diameter = triangulation.begin_active()->diameter();
+    for (typename Triangulation<dim, spacedim>::active_cell_iterator
+         cell = triangulation.begin_active(); cell != triangulation.end();
+         ++cell)
+      max_diameter = std::max (max_diameter,
+                               cell->diameter());
+    return max_diameter;
+  }
+
+
+
+  namespace internal
+  {
+    namespace FixUpDistortedChildCells
+    {
+      // compute the mean square
+      // deviation of the alternating
+      // forms of the children of the
+      // given object from that of
+      // the object itself. for
+      // objects with
+      // structdim==spacedim, the
+      // alternating form is the
+      // determinant of the jacobian,
+      // whereas for faces with
+      // structdim==spacedim-1, the
+      // alternating form is the
+      // (signed and scaled) normal
+      // vector
+      //
+      // this average square
+      // deviation is computed for an
+      // object where the center node
+      // has been replaced by the
+      // second argument to this
+      // function
+      template <typename Iterator, int spacedim>
+      double
+      objective_function (const Iterator &object,
+                          const Point<spacedim> &object_mid_point)
+      {
+        const unsigned int structdim = Iterator::AccessorType::structure_dimension;
+        Assert (spacedim == Iterator::AccessorType::dimension,
+                ExcInternalError());
+
+        // everything below is wrong
+        // if not for the following
+        // condition
+        Assert (object->refinement_case() == RefinementCase<structdim>::isotropic_refinement,
+                ExcNotImplemented());
+        // first calculate the
+        // average alternating form
+        // for the parent cell/face
+        Point<spacedim> parent_vertices
+        [GeometryInfo<structdim>::vertices_per_cell];
+        Tensor<spacedim-structdim,spacedim> parent_alternating_forms
+        [GeometryInfo<structdim>::vertices_per_cell];
+
+        for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+          parent_vertices[i] = object->vertex(i);
+
+        GeometryInfo<structdim>::alternating_form_at_vertices (parent_vertices,
+                                                               parent_alternating_forms);
+
+        const Tensor<spacedim-structdim,spacedim>
+        average_parent_alternating_form
+          = std::accumulate (&parent_alternating_forms[0],
+                             &parent_alternating_forms[GeometryInfo<structdim>::vertices_per_cell],
+                             Tensor<spacedim-structdim,spacedim>());
+
+        // now do the same
+        // computation for the
+        // children where we use the
+        // given location for the
+        // object mid point instead of
+        // the one the triangulation
+        // currently reports
+        Point<spacedim> child_vertices
+        [GeometryInfo<structdim>::max_children_per_cell]
+        [GeometryInfo<structdim>::vertices_per_cell];
+        Tensor<spacedim-structdim,spacedim> child_alternating_forms
+        [GeometryInfo<structdim>::max_children_per_cell]
+        [GeometryInfo<structdim>::vertices_per_cell];
+
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+            child_vertices[c][i] = object->child(c)->vertex(i);
+
+        // replace mid-object
+        // vertex. note that for
+        // child i, the mid-object
+        // vertex happens to have the
+        // number
+        // max_children_per_cell-i
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          child_vertices[c][GeometryInfo<structdim>::max_children_per_cell-c-1]
+            = object_mid_point;
+
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          GeometryInfo<structdim>::alternating_form_at_vertices (child_vertices[c],
+                                                                 child_alternating_forms[c]);
+
+        // on a uniformly refined
+        // hypercube object, the child
+        // alternating forms should
+        // all be smaller by a factor
+        // of 2^structdim than the
+        // ones of the parent. as a
+        // consequence, we'll use the
+        // squared deviation from
+        // this ideal value as an
+        // objective function
+        double objective = 0;
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+            objective += (child_alternating_forms[c][i] -
+                          average_parent_alternating_form/std::pow(2.,1.*structdim))
+                         .norm_square();
+
+        return objective;
+      }
+
+
+      /**
+       * Return the location of the midpoint
+       * of the 'f'th face (vertex) of this 1d
+       * object.
+       */
+      template <typename Iterator>
+      Point<Iterator::AccessorType::space_dimension>
+      get_face_midpoint (const Iterator &object,
+                         const unsigned int f,
+                         dealii::internal::int2type<1>)
+      {
+        return object->vertex(f);
+      }
+
+
+
+      /**
+       * Return the location of the midpoint
+       * of the 'f'th face (line) of this 2d
+       * object.
+       */
+      template <typename Iterator>
+      Point<Iterator::AccessorType::space_dimension>
+      get_face_midpoint (const Iterator &object,
+                         const unsigned int f,
+                         dealii::internal::int2type<2>)
+      {
+        return object->line(f)->center();
+      }
+
+
+
+      /**
+       * Return the location of the midpoint
+       * of the 'f'th face (quad) of this 3d
+       * object.
+       */
+      template <typename Iterator>
+      Point<Iterator::AccessorType::space_dimension>
+      get_face_midpoint (const Iterator &object,
+                         const unsigned int f,
+                         dealii::internal::int2type<3>)
+      {
+        return object->face(f)->center();
+      }
+
+
+
+
+      /**
+       * Compute the minimal diameter of an
+       * object by looking for the minimal
+       * distance between the mid-points of
+       * its faces. This minimal diameter is
+       * used to determine the step length
+       * for our grid cell improvement
+       * algorithm, and it should be small
+       * enough that the point moves around
+       * within the cell even if it is highly
+       * elongated -- thus, the diameter of
+       * the object is not a good measure,
+       * while the minimal diameter is. Note
+       * that the algorithm below works for
+       * both cells that are long rectangles
+       * with parallel sides where the
+       * nearest distance is between opposite
+       * edges as well as highly slanted
+       * parallelograms where the shortest
+       * distance is between neighboring
+       * edges.
+       */
+      template <typename Iterator>
+      double
+      minimal_diameter (const Iterator &object)
+      {
+        const unsigned int
+        structdim = Iterator::AccessorType::structure_dimension;
+
+        double diameter = object->diameter();
+        for (unsigned int f=0;
+             f<GeometryInfo<structdim>::faces_per_cell;
+             ++f)
+          for (unsigned int e=f+1;
+               e<GeometryInfo<structdim>::faces_per_cell;
+               ++e)
+            diameter = std::min (diameter,
+                                 get_face_midpoint
+                                 (object, f,
+                                  dealii::internal::int2type<structdim>())
+                                 .distance (get_face_midpoint
+                                            (object,
+                                             e,
+                                             dealii::internal::int2type<structdim>())));
+
+        return diameter;
+      }
+
+
+
+      /**
+       * Try to fix up a single cell. Return
+       * whether we succeeded with this.
+       *
+       * The second argument indicates
+       * whether we need to respect the
+       * manifold/boundary on which this
+       * object lies when moving around its
+       * mid-point.
+       */
+      template <typename Iterator>
+      bool
+      fix_up_object (const Iterator &object,
+                     const bool respect_manifold)
+      {
+        const Boundary<Iterator::AccessorType::dimension,
+              Iterator::AccessorType::space_dimension>
+              *manifold = (respect_manifold ?
+                           &object->get_boundary() :
+                           0);
+
+        const unsigned int structdim = Iterator::AccessorType::structure_dimension;
+        const unsigned int spacedim  = Iterator::AccessorType::space_dimension;
+
+        // right now we can only deal
+        // with cells that have been
+        // refined isotropically
+        // because that is the only
+        // case where we have a cell
+        // mid-point that can be moved
+        // around without having to
+        // consider boundary
+        // information
+        Assert (object->has_children(), ExcInternalError());
+        Assert (object->refinement_case() == RefinementCase<structdim>::isotropic_refinement,
+                ExcNotImplemented());
+
+        // get the current location of
+        // the object mid-vertex:
+        Point<spacedim> object_mid_point
+          = object->child(0)->vertex (GeometryInfo<structdim>::max_children_per_cell-1);
+
+        // now do a few steepest descent
+        // steps to reduce the objective
+        // function. compute the diameter in
+        // the helper function above
+        unsigned int iteration = 0;
+        const double diameter = minimal_diameter (object);
+
+        // current value of objective
+        // function and initial delta
+        double current_value = objective_function (object, object_mid_point);
+        double initial_delta = 0;
+
+        do
+          {
+            // choose a step length
+            // that is initially 1/4
+            // of the child objects'
+            // diameter, and a sequence
+            // whose sum does not
+            // converge (to avoid
+            // premature termination of
+            // the iteration)
+            const double step_length = diameter / 4 / (iteration + 1);
+
+            // compute the objective
+            // function's derivative using a
+            // two-sided difference formula
+            // with eps=step_length/10
+            Tensor<1,spacedim> gradient;
+            for (unsigned int d=0; d<spacedim; ++d)
+              {
+                const double eps = step_length/10;
+
+                Tensor<1,spacedim> h;
+                h[d] = eps/2;
+
+                if (respect_manifold == false)
+                  gradient[d]
+                    = ((objective_function (object, object_mid_point + h)
+                        -
+                        objective_function (object, object_mid_point - h))
+                       /
+                       eps);
+                else
+                  gradient[d]
+                    = ((objective_function (object,
+                                            manifold->project_to_surface(object,
+                                                                         object_mid_point + h))
+                        -
+                        objective_function (object,
+                                            manifold->project_to_surface(object,
+                                                                         object_mid_point - h)))
+                       /
+                       eps);
+              }
+
+            // sometimes, the
+            // (unprojected) gradient
+            // is perpendicular to
+            // the manifold, but we
+            // can't go there if
+            // respect_manifold==true. in
+            // that case, gradient=0,
+            // and we simply need to
+            // quite the loop here
+            if (gradient.norm() == 0)
+              break;
+
+            // so we need to go in
+            // direction -gradient. the
+            // optimal value of the
+            // objective function is
+            // zero, so assuming that
+            // the model is quadratic
+            // we would have to go
+            // -2*val/||gradient|| in
+            // this direction, make
+            // sure we go at most
+            // step_length into this
+            // direction
+            object_mid_point -= std::min(2 * current_value / (gradient*gradient),
+                                         step_length / gradient.norm()) *
+                                gradient;
+
+            if (respect_manifold == true)
+              object_mid_point = manifold->project_to_surface(object,
+                                                              object_mid_point);
+
+            // compute current value of the
+            // objective function
+            const double previous_value = current_value;
+            current_value = objective_function (object, object_mid_point);
+
+            if (iteration == 0)
+              initial_delta = (previous_value - current_value);
+
+            // stop if we aren't moving much
+            // any more
+            if ((iteration >= 1) &&
+                ((previous_value - current_value < 0)
+                 ||
+                 (std::fabs (previous_value - current_value)
+                  <
+                  0.001 * initial_delta)))
+              break;
+
+            ++iteration;
+          }
+        while (iteration < 20);
+
+        // verify that the new
+        // location is indeed better
+        // than the one before. check
+        // this by comparing whether
+        // the minimum value of the
+        // products of parent and
+        // child alternating forms is
+        // positive. for cells this
+        // means that the
+        // determinants have the same
+        // sign, for faces that the
+        // face normals of parent and
+        // children point in the same
+        // general direction
+        double old_min_product, new_min_product;
+
+        Point<spacedim> parent_vertices
+        [GeometryInfo<structdim>::vertices_per_cell];
+        for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+          parent_vertices[i] = object->vertex(i);
+
+        Tensor<spacedim-structdim,spacedim> parent_alternating_forms
+        [GeometryInfo<structdim>::vertices_per_cell];
+        GeometryInfo<structdim>::alternating_form_at_vertices (parent_vertices,
+                                                               parent_alternating_forms);
+
+        Point<spacedim> child_vertices
+        [GeometryInfo<structdim>::max_children_per_cell]
+        [GeometryInfo<structdim>::vertices_per_cell];
+
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+            child_vertices[c][i] = object->child(c)->vertex(i);
+
+        Tensor<spacedim-structdim,spacedim> child_alternating_forms
+        [GeometryInfo<structdim>::max_children_per_cell]
+        [GeometryInfo<structdim>::vertices_per_cell];
+
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          GeometryInfo<structdim>::alternating_form_at_vertices (child_vertices[c],
+                                                                 child_alternating_forms[c]);
+
+        old_min_product = child_alternating_forms[0][0] * parent_alternating_forms[0];
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+            for (unsigned int j=0; j<GeometryInfo<structdim>::vertices_per_cell; ++j)
+              old_min_product =
+                std::min<double> (old_min_product,
+                                  child_alternating_forms[c][i] *
+                                  parent_alternating_forms[j]);
+
+        // for the new minimum value,
+        // replace mid-object
+        // vertex. note that for child
+        // i, the mid-object vertex
+        // happens to have the number
+        // max_children_per_cell-i
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          child_vertices[c][GeometryInfo<structdim>::max_children_per_cell-c-1]
+            = object_mid_point;
+
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          GeometryInfo<structdim>::alternating_form_at_vertices (child_vertices[c],
+                                                                 child_alternating_forms[c]);
+
+        new_min_product = child_alternating_forms[0][0] * parent_alternating_forms[0];
+        for (unsigned int c=0; c<object->n_children(); ++c)
+          for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+            for (unsigned int j=0; j<GeometryInfo<structdim>::vertices_per_cell; ++j)
+              new_min_product =
+                std::min<double> (new_min_product,
+                                  child_alternating_forms[c][i] *
+                                  parent_alternating_forms[j]);
+
+        // if new minimum value is
+        // better than before, then set the
+        // new mid point. otherwise
+        // return this object as one of
+        // those that can't apparently
+        // be fixed
+        if (new_min_product >= old_min_product)
+          object->child(0)->vertex (GeometryInfo<structdim>::max_children_per_cell-1)
+            = object_mid_point;
+
+        // return whether after this
+        // operation we have an object that
+        // is well oriented
+        return (std::max (new_min_product, old_min_product) > 0);
+      }
+
+
+
+      void fix_up_faces (const dealii::Triangulation<1,1>::cell_iterator &,
+                         dealii::internal::int2type<1>,
+                         dealii::internal::int2type<1>)
+      {
+        // nothing to do for the faces of
+        // cells in 1d
+      }
+
+
+
+      // possibly fix up the faces of
+      // a cell by moving around its
+      // mid-points
+      template <int structdim, int spacedim>
+      void fix_up_faces (const typename dealii::Triangulation<structdim,spacedim>::cell_iterator &cell,
+                         dealii::internal::int2type<structdim>,
+                         dealii::internal::int2type<spacedim>)
+      {
+        // see if we first can fix up
+        // some of the faces of this
+        // object. we can mess with
+        // faces if and only if it is
+        // not at the boundary (since
+        // otherwise the location of
+        // the face mid-point has been
+        // determined by the boundary
+        // object) and if the
+        // neighboring cell is not even
+        // more refined than we are
+        // (since in that case the
+        // sub-faces have themselves
+        // children that we can't move
+        // around any more). however,
+        // the latter case shouldn't
+        // happen anyway: if the
+        // current face is distorted
+        // but the neighbor is even
+        // more refined, then the face
+        // had been deformed before
+        // already, and had been
+        // ignored at the time; we
+        // should then also be able to
+        // ignore it this time as well
+        for (unsigned int f=0; f<GeometryInfo<structdim>::faces_per_cell; ++f)
+          {
+            Assert (cell->face(f)->has_children(), ExcInternalError());
+            Assert (cell->face(f)->refinement_case() ==
+                    RefinementCase<structdim-1>::isotropic_refinement,
+                    ExcInternalError());
+
+            bool subface_is_more_refined = false;
+            for (unsigned int g=0; g<GeometryInfo<structdim>::max_children_per_face; ++g)
+              if (cell->face(f)->child(g)->has_children())
+                {
+                  subface_is_more_refined = true;
+                  break;
+                }
+
+            if (subface_is_more_refined == true)
+              continue;
+
+            // so, now we finally know
+            // that we can do something
+            // about this face
+            fix_up_object (cell->face(f), cell->at_boundary(f));
+          }
+      }
+
+
+    } /* namespace FixUpDistortedChildCells */
+  } /* namespace internal */
+
+
+  template <int dim, int spacedim>
+  typename Triangulation<dim,spacedim>::DistortedCellList
+
+  fix_up_distorted_child_cells (const typename Triangulation<dim,spacedim>::DistortedCellList &distorted_cells,
+                                Triangulation<dim,spacedim> &/*triangulation*/)
+  {
+    typename Triangulation<dim,spacedim>::DistortedCellList unfixable_subset;
+
+    // loop over all cells that we have
+    // to fix up
+    for (typename std::list<typename Triangulation<dim,spacedim>::cell_iterator>::const_iterator
+         cell_ptr = distorted_cells.distorted_cells.begin();
+         cell_ptr != distorted_cells.distorted_cells.end(); ++cell_ptr)
+      {
+        const typename Triangulation<dim,spacedim>::cell_iterator
+        cell = *cell_ptr;
+
+        internal::FixUpDistortedChildCells
+        ::fix_up_faces (cell,
+                        dealii::internal::int2type<dim>(),
+                        dealii::internal::int2type<spacedim>());
+
+        // fix up the object. we need to
+        // respect the manifold if the cell is
+        // embedded in a higher dimensional
+        // space; otherwise, like a hex in 3d,
+        // every point within the cell interior
+        // is fair game
+        if (! internal::FixUpDistortedChildCells::fix_up_object (cell,
+                                                                 (dim < spacedim)))
+          unfixable_subset.distorted_cells.push_back (cell);
+      }
+
+    return unfixable_subset;
+  }
+
+
+
+  template <class MeshType>
+  std::vector<typename MeshType::active_cell_iterator>
+  get_patch_around_cell(const typename MeshType::active_cell_iterator &cell)
+  {
+    Assert (cell->is_locally_owned(),
+            ExcMessage ("This function only makes sense if the cell for "
+                        "which you are asking for a patch, is locally "
+                        "owned."));
+
+    std::vector<typename MeshType::active_cell_iterator> patch;
+    patch.push_back (cell);
+    for (unsigned int face_number=0; face_number<GeometryInfo<MeshType::dimension>::faces_per_cell; ++face_number)
+      if (cell->face(face_number)->at_boundary()==false)
+        {
+          if (cell->neighbor(face_number)->has_children() == false)
+            patch.push_back (cell->neighbor(face_number));
+          else
+            // the neighbor is refined. in 2d/3d, we can simply ask for the children
+            // of the neighbor because they can not be further refined and,
+            // consequently, the children is active
+            if (MeshType::dimension > 1)
+              {
+                for (unsigned int subface=0; subface<cell->face(face_number)->n_children(); ++subface)
+                  patch.push_back (cell->neighbor_child_on_subface (face_number, subface));
+              }
+            else
+              {
+                // in 1d, we need to work a bit harder: iterate until we find
+                // the child by going from cell to child to child etc
+                typename MeshType::cell_iterator neighbor
+                  = cell->neighbor (face_number);
+                while (neighbor->has_children())
+                  neighbor = neighbor->child(1-face_number);
+
+                Assert (neighbor->neighbor(1-face_number) == cell, ExcInternalError());
+                patch.push_back (neighbor);
+              }
+        }
+    return patch;
+  }
+
+
+
+  template <class Container>
+  std::vector<typename Container::cell_iterator>
+  get_cells_at_coarsest_common_level (
+    const std::vector<typename Container::active_cell_iterator> &patch)
+  {
+    Assert (patch.size() > 0, ExcMessage("Vector containing patch cells should not be an empty vector!"));
+    // In order to extract the set of cells with the coarsest common level from the give vector of cells:
+    // First it finds the number associated with the minimum level of refinmenet, namely "min_level"
+    int min_level = patch[0]->level();
+
+    for (unsigned int i=0; i<patch.size(); ++i)
+      min_level = std::min (min_level, patch[i]->level() );
+    std::set<typename Container::cell_iterator>  uniform_cells;
+    typename std::vector<typename Container::active_cell_iterator>::const_iterator  patch_cell;
+    // it loops through all cells of the input vector
+    for (patch_cell=patch.begin(); patch_cell!=patch.end () ; ++patch_cell)
+      {
+        // If the refinement level of each cell i the loop be equal to the min_level, so that
+        // that cell inserted into the set of uniform_cells, as the set of cells with the coarsest common refinement level
+        if ((*patch_cell)->level() == min_level)
+          uniform_cells.insert (*patch_cell);
+        else
+          // If not, it asks for the parent of the cell, until it finds the parent cell
+          // with the refinement level equal to the min_level and inserts that parent cell into the
+          // the set of uniform_cells, as the set of cells with the coarsest common refinement level.
+          {
+            typename Container::cell_iterator parent = *patch_cell;
+
+            while (parent->level() > min_level)
+              parent = parent-> parent();
+            uniform_cells.insert (parent);
+          }
+      }
+
+    return std::vector<typename Container::cell_iterator> (uniform_cells.begin(),
+                                                           uniform_cells.end());
+  }
+
+
+
+  template <class Container>
+  void build_triangulation_from_patch(const std::vector<typename Container::active_cell_iterator> &patch,
+                                      Triangulation<Container::dimension,Container::space_dimension> &local_triangulation,
+                                      std::map<typename Triangulation<Container::dimension,Container::space_dimension>::active_cell_iterator,
+                                      typename Container::active_cell_iterator> &patch_to_global_tria_map)
+
+  {
+    const std::vector<typename Container::cell_iterator> uniform_cells =
+      get_cells_at_coarsest_common_level <Container> (patch);
+    // First it creates triangulation from the vector of "uniform_cells"
+    local_triangulation.clear();
+    std::vector<Point<Container::space_dimension> > vertices;
+    const unsigned int n_uniform_cells=uniform_cells.size();
+    std::vector<CellData<Container::dimension> > cells(n_uniform_cells);
+    unsigned int k=0;// for enumerating cells
+    unsigned int i=0;// for enumerating vertices
+    typename std::vector<typename Container::cell_iterator>::const_iterator uniform_cell;
+    for (uniform_cell=uniform_cells.begin(); uniform_cell!=uniform_cells.end(); ++uniform_cell)
+      {
+        bool repeat_vertex;
+        for (unsigned int j=0;  j< GeometryInfo<Container::dimension>::vertices_per_cell; ++j)
+          {
+            Point<Container::space_dimension> position=(*uniform_cell)->vertex (j);
+            repeat_vertex=false;
+
+            for (unsigned int m=0; m<i; ++m)
+              {
+                if (position == vertices[m])
+                  {
+                    repeat_vertex=true;
+                    cells[k].vertices[j]=m;
+                    break;
+                  }
+              }
+            if (repeat_vertex==false)
+              {
+                vertices.push_back(position);
+                cells[k].vertices[j]=i;
+                i=i+1;
+              }
+
+          }//for vertices_per_cell
+        k=k+1;
+      }
+    local_triangulation.create_triangulation(vertices,cells,SubCellData());
+    Assert (local_triangulation.n_active_cells() == uniform_cells.size(), ExcInternalError());
+    local_triangulation.clear_user_flags ();
+    unsigned int index=0;
+    // Create a map between cells of class DofHandler into class Triangulation
+    std::map<typename Triangulation<Container::dimension,Container::space_dimension>::cell_iterator,
+        typename Container::cell_iterator> patch_to_global_tria_map_tmp;
+    for (typename Triangulation<Container::dimension,Container::space_dimension>::cell_iterator coarse_cell = local_triangulation.begin();
+         coarse_cell != local_triangulation.end(); ++coarse_cell, ++index)
+      {
+        patch_to_global_tria_map_tmp.insert (std::make_pair(coarse_cell, uniform_cells[index]));
+        // To ensure that the cells with the same coordinates (here, we compare their centers) are mapped into each other.
+
+        Assert(coarse_cell->center().distance( uniform_cells[index]->center())<=1e-15*coarse_cell->diameter(),
+               ExcInternalError());
+      }
+    bool refinement_necessary;
+    // In this loop we start to do refinement on the above coarse triangulation to reach
+    // to the same level of refinement as the patch cells are really on
+    do
+      {
+        refinement_necessary = false;
+        for (typename Triangulation<Container::dimension,Container::space_dimension>::active_cell_iterator
+             active_tria_cell = local_triangulation.begin_active();
+             active_tria_cell != local_triangulation.end(); ++active_tria_cell)
+          {
+            if (patch_to_global_tria_map_tmp[active_tria_cell]->has_children())
+              {
+                active_tria_cell -> set_refine_flag();
+                refinement_necessary = true;
+              }
+            else for (unsigned int i=0; i<patch.size(); ++i)
+                {
+                  if (patch_to_global_tria_map_tmp[active_tria_cell]==patch[i])
+                    {
+                      active_tria_cell->set_user_flag();
+                      break;
+                    }
+                }
+          }
+
+        if (refinement_necessary)
+          {
+            local_triangulation.execute_coarsening_and_refinement ();
+
+            for (typename Triangulation<Container::dimension,Container::space_dimension>::cell_iterator
+                 cell = local_triangulation.begin();
+                 cell != local_triangulation.end(); ++cell)
+              {
+
+                if (patch_to_global_tria_map_tmp.find(cell)!=patch_to_global_tria_map_tmp.end())
+                  {
+                    if (cell-> has_children())
+                      {
+                        // Note: Since the cell got children, then it should not be in the map anymore
+                        // children may be added into the map, instead
+
+                        // these children may not yet be in the map
+                        for (unsigned int c=0; c< cell ->n_children(); ++c)
+                          {
+                            if (patch_to_global_tria_map_tmp.find(cell->child(c)) ==
+                                patch_to_global_tria_map_tmp.end())
+                              {
+                                patch_to_global_tria_map_tmp.insert (std::make_pair(cell ->child(c),
+                                                                                    patch_to_global_tria_map_tmp[cell]->child(c)));
+
+                                Assert(cell->child(c)->center().distance( patch_to_global_tria_map_tmp[cell]->child(c)->center())
+                                       <=1e-15*cell->child(c)->diameter(),
+                                       ExcInternalError());
+                              }
+                          }
+                        // The parent cell whose children were added
+                        // into the map should be deleted from the map
+                        patch_to_global_tria_map_tmp.erase(cell);
+                      }
+                  }
+              }
+          }
+
+      }
+    while (refinement_necessary);
+    typename std::map<typename Triangulation<Container::dimension,Container::space_dimension>::cell_iterator,
+             typename Container::cell_iterator>::iterator map_tmp_it =
+               patch_to_global_tria_map_tmp.begin(),map_tmp_end = patch_to_global_tria_map_tmp.end();
+    // Now we just need to take the temporary map of pairs of type cell_iterator "patch_to_global_tria_map_tmp"
+    // making pair of active_cell_iterators so that filling out the final map "patch_to_global_tria_map"
+    for (; map_tmp_it!=map_tmp_end; ++map_tmp_it)
+      patch_to_global_tria_map[map_tmp_it->first] = map_tmp_it->second;
+  }
+
+
+
+
+  template <class DoFHandlerType>
+  std::map< types::global_dof_index,std::vector<typename DoFHandlerType::active_cell_iterator> >
+  get_dof_to_support_patch_map(DoFHandlerType &dof_handler)
+  {
+
+    // This is the map from global_dof_index to
+    // a set of cells on patch.  We first map into
+    // a set because it is very likely that we
+    // will attempt to add a cell more than once
+    // to a particular patch and we want to preserve
+    // uniqueness of cell iterators. std::set does this
+    // automatically for us.  Later after it is all
+    // constructed, we will copy to a map of vectors
+    // since that is the prefered output for other
+    // functions.
+    std::map< types::global_dof_index,std::set<typename DoFHandlerType::active_cell_iterator> > dof_to_set_of_cells_map;
+
+    std::vector<types::global_dof_index> local_dof_indices;
+    std::vector<types::global_dof_index> local_face_dof_indices;
+    std::vector<types::global_dof_index> local_line_dof_indices;
+
+    // a place to save the dof_handler user flags and restore them later
+    // to maintain const of dof_handler.
+    std::vector<bool> user_flags;
+
+
+    // in 3d, we need pointers from active lines to the
+    // active parent lines, so we construct it as needed.
+    std::map<typename DoFHandlerType::active_line_iterator, typename DoFHandlerType::line_iterator > lines_to_parent_lines_map;
+    if (DoFHandlerType::dimension == 3)
+      {
+
+        // save user flags as they will be modified and then later restored
+        dof_handler.get_triangulation().save_user_flags(user_flags);
+        const_cast<dealii::Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &>(dof_handler.get_triangulation()).clear_user_flags ();
+
+
+        typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                      endc = dof_handler.end();
+        for (; cell!=endc; ++cell)
+          {
+            // We only want lines that are locally_relevant
+            // although it doesn't hurt to have lines that
+            // are children of ghost cells since there are
+            // few and we don't have to use them.
+            if (cell->is_artificial() == false)
+              {
+                for (unsigned int l=0; l<GeometryInfo<DoFHandlerType::dimension>::lines_per_cell; ++l)
+                  if (cell->line(l)->has_children())
+                    for (unsigned int c=0; c<cell->line(l)->n_children(); ++c)
+                      {
+                        lines_to_parent_lines_map[cell->line(l)->child(c)] = cell->line(l);
+                        // set flags to know that child
+                        // line has an active parent.
+                        cell->line(l)->child(c)->set_user_flag();
+                      }
+              }
+          }
+      }
+
+
+    // We loop through all cells and add cell to the
+    // map for the dofs that it immediately touches
+    // and then account for all the other dofs of
+    // which it is a part, mainly the ones that must
+    // be added on account of adaptivity hanging node
+    // constraints.
+    typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active(),
+                                                  endc = dof_handler.end();
+    for (; cell!=endc; ++cell)
+      {
+        // Need to loop through all cells that could
+        // be in the patch of dofs on locally_owned
+        // cells including ghost cells
+        if (cell->is_artificial() == false)
+          {
+            const unsigned int n_dofs_per_cell = cell->get_fe().dofs_per_cell;
+            local_dof_indices.resize(n_dofs_per_cell);
+
+            // Take care of adding cell pointer to each
+            // dofs that exists on cell.
+            cell->get_dof_indices(local_dof_indices);
+            for (unsigned int i=0; i< n_dofs_per_cell; ++i )
+              dof_to_set_of_cells_map[local_dof_indices[i]].insert(cell);
+
+            // In the case of the adjacent cell (over
+            // faces or edges) being more refined, we
+            // want to add all of the children to the
+            // patch since the support function at that
+            // dof could be non-zero along that entire
+            // face (or line).
+
+            // Take care of dofs on neighbor faces
+            for (unsigned int f=0; f<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++f)
+              {
+                if (cell->face(f)->has_children())
+                  {
+                    for (unsigned int c=0; c<cell->face(f)->n_children(); ++c)
+                      {
+                        //  Add cell to dofs of all subfaces
+                        //
+                        //   *-------------------*----------*---------*
+                        //   |                   | add cell |         |
+                        //   |                   |<- to dofs|         |
+                        //   |                   |of subface|         |
+                        //   |        cell       *----------*---------*
+                        //   |                   | add cell |         |
+                        //   |                   |<- to dofs|         |
+                        //   |                   |of subface|         |
+                        //   *-------------------*----------*---------*
+                        //
+                        Assert (cell->face(f)->child(c)->has_children() == false, ExcInternalError());
+
+                        const unsigned int n_dofs_per_face = cell->get_fe().dofs_per_face;
+                        local_face_dof_indices.resize(n_dofs_per_face);
+
+                        cell->face(f)->child(c)->get_dof_indices(local_face_dof_indices);
+                        for (unsigned int i=0; i< n_dofs_per_face; ++i )
+                          dof_to_set_of_cells_map[local_face_dof_indices[i]].insert(cell);
+                      }
+                  }
+                else if ((cell->face(f)->at_boundary() == false) && (cell->neighbor_is_coarser(f)))
+                  {
+
+                    // Add cell to dofs of parent face and all
+                    // child faces of parent face
+                    //
+                    //   *-------------------*----------*---------*
+                    //   |                   |          |         |
+                    //   |                   |   cell   |         |
+                    //   |      add cell     |          |         |
+                    //   |      to dofs   -> *----------*---------*
+                    //   |      of parent    | add cell |         |
+                    //   |       face        |<- to dofs|         |
+                    //   |                   |of subface|         |
+                    //   *-------------------*----------*---------*
+                    //
+
+                    // Add cell to all dofs of parent face
+                    std::pair<unsigned int, unsigned int> neighbor_face_no_subface_no = cell->neighbor_of_coarser_neighbor(f);
+                    unsigned int face_no = neighbor_face_no_subface_no.first;
+                    unsigned int subface = neighbor_face_no_subface_no.second;
+
+                    const unsigned int n_dofs_per_face = cell->get_fe().dofs_per_face;
+                    local_face_dof_indices.resize(n_dofs_per_face);
+
+                    cell->neighbor(f)->face(face_no)->get_dof_indices(local_face_dof_indices);
+                    for (unsigned int i=0; i< n_dofs_per_face; ++i )
+                      dof_to_set_of_cells_map[local_face_dof_indices[i]].insert(cell);
+
+                    // Add cell to all dofs of children of
+                    // parent face
+                    for (unsigned int c=0; c<cell->neighbor(f)->face(face_no)->n_children(); ++c)
+                      {
+                        if (c != subface) // don't repeat work on dofs of original cell
+                          {
+                            const unsigned int n_dofs_per_face = cell->get_fe().dofs_per_face;
+                            local_face_dof_indices.resize(n_dofs_per_face);
+
+                            Assert (cell->neighbor(f)->face(face_no)->child(c)->has_children() == false, ExcInternalError());
+                            cell->neighbor(f)->face(face_no)->child(c)->get_dof_indices(local_face_dof_indices);
+                            for (unsigned int i=0; i<n_dofs_per_face; ++i )
+                              dof_to_set_of_cells_map[local_face_dof_indices[i]].insert(cell);
+                          }
+                      }
+                  }
+              }
+
+
+            // If 3d, take care of dofs on lines in the
+            // same pattern as faces above. That is, if
+            // a cell's line has children, distribute
+            // cell to dofs of children of line,  and
+            // if cell's line has an active parent, then
+            // distribute cell to dofs on parent line
+            // and dofs on all children of parent line.
+            if (DoFHandlerType::dimension == 3)
+              {
+                for (unsigned int l=0; l<GeometryInfo<DoFHandlerType::dimension>::lines_per_cell; ++l)
+                  {
+                    if (cell->line(l)->has_children())
+                      {
+                        for (unsigned int c=0; c<cell->line(l)->n_children(); ++c)
+                          {
+                            Assert (cell->line(l)->child(c)->has_children() == false, ExcInternalError());
+
+                            // dofs_per_line returns number of dofs
+                            // on line not including the vertices of the line.
+                            const unsigned int n_dofs_per_line = 2*cell->get_fe().dofs_per_vertex
+                                                                 + cell->get_fe().dofs_per_line;
+                            local_line_dof_indices.resize(n_dofs_per_line);
+
+                            cell->line(l)->child(c)->get_dof_indices(local_line_dof_indices);
+                            for (unsigned int i=0; i<n_dofs_per_line; ++i )
+                              dof_to_set_of_cells_map[local_line_dof_indices[i]].insert(cell);
+                          }
+                      }
+                    // user flag was set above to denote that
+                    // an active parent line exists so add
+                    // cell to dofs of parent and all it's
+                    // children
+                    else if (cell->line(l)->user_flag_set() == true)
+                      {
+                        typename DoFHandlerType::line_iterator parent_line = lines_to_parent_lines_map[cell->line(l)];
+                        Assert (parent_line->has_children(), ExcInternalError() );
+
+                        // dofs_per_line returns number of dofs
+                        // on line not including the vertices of the line.
+                        const unsigned int n_dofs_per_line = 2*cell->get_fe().dofs_per_vertex
+                                                             + cell->get_fe().dofs_per_line;
+                        local_line_dof_indices.resize(n_dofs_per_line);
+
+                        parent_line->get_dof_indices(local_line_dof_indices);
+                        for (unsigned int i=0; i<n_dofs_per_line; ++i )
+                          dof_to_set_of_cells_map[local_line_dof_indices[i]].insert(cell);
+
+                        for (unsigned int c=0; c<parent_line->n_children(); ++c)
+                          {
+                            Assert (parent_line->child(c)->has_children() == false, ExcInternalError());
+
+                            const unsigned int n_dofs_per_line = 2*cell->get_fe().dofs_per_vertex
+                                                                 + cell->get_fe().dofs_per_line;
+                            local_line_dof_indices.resize(n_dofs_per_line);
+
+                            parent_line->child(c)->get_dof_indices(local_line_dof_indices);
+                            for (unsigned int i=0; i<n_dofs_per_line; ++i )
+                              dof_to_set_of_cells_map[local_line_dof_indices[i]].insert(cell);
+                          }
+
+
+                      }
+                  } // for lines l
+              }// if DoFHandlerType::dimension == 3
+          }// if cell->is_locally_owned()
+      }// for cells
+
+
+    if (DoFHandlerType::dimension == 3)
+      {
+        // finally, restore user flags that were changed above
+        // to when we constructed the pointers to parent of lines
+        // Since dof_handler is const, we must leave it unchanged.
+        const_cast<dealii::Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &>(dof_handler.get_triangulation()).load_user_flags (user_flags);
+      }
+
+    // Finally, we copy map of sets to
+    // map of vectors using the std::vector::assign() function
+    std::map< types::global_dof_index, std::vector<typename DoFHandlerType::active_cell_iterator> > dof_to_cell_patches;
+
+    typename std::map<types::global_dof_index, std::set< typename DoFHandlerType::active_cell_iterator> >::iterator
+    it = dof_to_set_of_cells_map.begin(),
+    it_end = dof_to_set_of_cells_map.end();
+    for ( ; it!=it_end; ++it)
+      dof_to_cell_patches[it->first].assign( it->second.begin(), it->second.end() );
+
+    return dof_to_cell_patches;
+  }
+
+
+
+  /*
+   * Internally used in orthogonal_equality
+   *
+   * An orthogonal equality test for points:
+   *
+   * point1 and point2 are considered equal, if
+   *   matrix.point1 + offset - point2
+   * is parallel to the unit vector in <direction>
+   */
+  template<int spacedim>
+  inline bool orthogonal_equality (const Point<spacedim>    &point1,
+                                   const Point<spacedim>    &point2,
+                                   const int                 direction,
+                                   const Tensor<1,spacedim> &offset,
+                                   const FullMatrix<double> &matrix)
+  {
+    Assert (0<=direction && direction<spacedim,
+            ExcIndexRange (direction, 0, spacedim));
+
+    Assert(matrix.m() == matrix.n(), ExcInternalError());
+
+    Point<spacedim> distance;
+
+    if (matrix.m() == spacedim)
+      for (int i = 0; i < spacedim; ++i)
+        for (int j = 0; j < spacedim; ++j)
+          distance(i) += matrix(i,j) * point1(j);
+    else
+      distance = point1;
+
+    distance += offset - point2;
+
+    for (int i = 0; i < spacedim; ++i)
+      {
+        // Only compare coordinate-components != direction:
+        if (i == direction)
+          continue;
+
+        if (fabs(distance(i)) > 1.e-10)
+          return false;
+      }
+
+    return true;
+  }
+
+
+  /*
+   * Internally used in orthogonal_equality
+   *
+   * A lookup table to transform vertex matchings to orientation flags of
+   * the form (face_orientation, face_flip, face_rotation)
+   *
+   * See the comment on the next function as well as the detailed
+   * documentation of make_periodicity_constraints and
+   * collect_periodic_faces for details
+   */
+  template<int dim> struct OrientationLookupTable {};
+
+  template<> struct OrientationLookupTable<1>
+  {
+    typedef std_cxx11::array<unsigned int, GeometryInfo<1>::vertices_per_face> MATCH_T;
+    static inline std::bitset<3> lookup (const MATCH_T &)
+    {
+      // The 1D case is trivial
+      return 1; // [true ,false,false]
+    }
+  };
+
+  template<> struct OrientationLookupTable<2>
+  {
+    typedef std_cxx11::array<unsigned int, GeometryInfo<2>::vertices_per_face> MATCH_T;
+    static inline std::bitset<3> lookup (const MATCH_T &matching)
+    {
+      // In 2D matching faces (=lines) results in two cases: Either
+      // they are aligned or flipped. We store this "line_flip"
+      // property somewhat sloppy as "face_flip"
+      // (always: face_orientation = true, face_rotation = false)
+
+      static const MATCH_T m_tff = {{ 0 , 1 }};
+      if (matching == m_tff) return 1;           // [true ,false,false]
+      static const MATCH_T m_ttf = {{ 1 , 0 }};
+      if (matching == m_ttf) return 3;           // [true ,true ,false]
+      AssertThrow(false, ExcInternalError());
+      // what follows is dead code, but it avoids warnings about the lack
+      // of a return value
+      return 0;
+    }
+  };
+
+  template<> struct OrientationLookupTable<3>
+  {
+    typedef std_cxx11::array<unsigned int, GeometryInfo<3>::vertices_per_face> MATCH_T;
+    static inline std::bitset<3> lookup (const MATCH_T &matching)
+    {
+      // The full fledged 3D case. *Yay*
+      // See the documentation in include/deal.II/base/geometry_info.h
+      // as well as the actual implementation in source/grid/tria.cc
+      // for more details...
+
+      static const MATCH_T m_tff = {{ 0 , 1 , 2 , 3 }};
+      if (matching == m_tff) return 1;                   // [true ,false,false]
+      static const MATCH_T m_tft = {{ 1 , 3 , 0 , 2 }};
+      if (matching == m_tft) return 5;                   // [true ,false,true ]
+      static const MATCH_T m_ttf = {{ 3 , 2 , 1 , 0 }};
+      if (matching == m_ttf) return 3;                   // [true ,true ,false]
+      static const MATCH_T m_ttt = {{ 2 , 0 , 3 , 1 }};
+      if (matching == m_ttt) return 7;                   // [true ,true ,true ]
+      static const MATCH_T m_fff = {{ 0 , 2 , 1 , 3 }};
+      if (matching == m_fff) return 0;                   // [false,false,false]
+      static const MATCH_T m_fft = {{ 2 , 3 , 0 , 1 }};
+      if (matching == m_fft) return 4;                   // [false,false,true ]
+      static const MATCH_T m_ftf = {{ 3 , 1 , 2 , 0 }};
+      if (matching == m_ftf) return 2;                   // [false,true ,false]
+      static const MATCH_T m_ftt = {{ 1 , 0 , 3 , 2 }};
+      if (matching == m_ftt) return 6;                   // [false,true ,true ]
+      AssertThrow(false, ExcInternalError());
+      // what follows is dead code, but it avoids warnings about the lack
+      // of a return value
+      return 0;
+    }
+  };
+
+
+
+  template<typename FaceIterator>
+  inline bool
+  orthogonal_equality (std::bitset<3>     &orientation,
+                       const FaceIterator &face1,
+                       const FaceIterator &face2,
+                       const int          direction,
+                       const Tensor<1,FaceIterator::AccessorType::space_dimension> &offset,
+                       const FullMatrix<double> &matrix)
+  {
+    Assert(matrix.m() == matrix.n(),
+           ExcMessage("The supplied matrix must be a square matrix"));
+
+    static const int dim = FaceIterator::AccessorType::dimension;
+
+    // Do a full matching of the face vertices:
+
+    std_cxx11::
+    array<unsigned int, GeometryInfo<dim>::vertices_per_face> matching;
+
+    std::set<unsigned int> face2_vertices;
+    for (unsigned int i = 0; i < GeometryInfo<dim>::vertices_per_face; ++i)
+      face2_vertices.insert(i);
+
+    for (unsigned int i = 0; i < GeometryInfo<dim>::vertices_per_face; ++i)
+      {
+        for (std::set<unsigned int>::iterator it = face2_vertices.begin();
+             it != face2_vertices.end();
+             it++)
+          {
+            if (orthogonal_equality(face1->vertex(i),face2->vertex(*it),
+                                    direction, offset, matrix))
+              {
+                matching[i] = *it;
+                face2_vertices.erase(it);
+                break; // jump out of the innermost loop
+              }
+          }
+      }
+
+    // And finally, a lookup to determine the ordering bitmask:
+    if (face2_vertices.empty())
+      orientation = OrientationLookupTable<dim>::lookup(matching);
+
+    return face2_vertices.empty();
+  }
+
+
+
+  template<typename FaceIterator>
+  inline bool
+  orthogonal_equality (const FaceIterator &face1,
+                       const FaceIterator &face2,
+                       const int          direction,
+                       const Tensor<1,FaceIterator::AccessorType::space_dimension> &offset,
+                       const FullMatrix<double> &matrix)
+  {
+    // Call the function above with a dummy orientation array
+    std::bitset<3> dummy;
+    return orthogonal_equality (dummy, face1, face2, direction, offset, matrix);
+  }
+
+
+
+  /*
+   * Internally used in collect_periodic_faces
+   */
+  template<typename CellIterator>
+  void
+  match_periodic_face_pairs
+  (std::set<std::pair<CellIterator, unsigned int> > &pairs1,
+   std::set<std::pair<typename identity<CellIterator>::type, unsigned int> > &pairs2,
+   const int                                        direction,
+   std::vector<PeriodicFacePair<CellIterator> >     &matched_pairs,
+   const dealii::Tensor<1,CellIterator::AccessorType::space_dimension> &offset,
+   const FullMatrix<double>                         &matrix)
+  {
+    static const int space_dim = CellIterator::AccessorType::space_dimension;
+    (void)space_dim;
+    Assert (0<=direction && direction<space_dim,
+            ExcIndexRange (direction, 0, space_dim));
+
+    Assert (pairs1.size() == pairs2.size(),
+            ExcMessage ("Unmatched faces on periodic boundaries"));
+
+    unsigned int n_matches = 0;
+
+    // Match with a complexity of O(n^2). This could be improved...
+    std::bitset<3> orientation;
+    typedef typename std::set
+    <std::pair<CellIterator, unsigned int> >::const_iterator PairIterator;
+    for (PairIterator it1 = pairs1.begin(); it1 != pairs1.end(); ++it1)
+      {
+        for (PairIterator it2 = pairs2.begin(); it2 != pairs2.end(); ++it2)
+          {
+            const CellIterator cell1 = it1->first;
+            const CellIterator cell2 = it2->first;
+            const unsigned int face_idx1 = it1->second;
+            const unsigned int face_idx2 = it2->second;
+            if (GridTools::orthogonal_equality(orientation,
+                                               cell1->face(face_idx1),
+                                               cell2->face(face_idx2),
+                                               direction, offset,
+                                               matrix))
+              {
+                // We have a match, so insert the matching pairs and
+                // remove the matched cell in pairs2 to speed up the
+                // matching:
+                const PeriodicFacePair<CellIterator> matched_face =
+                {
+                  {cell1, cell2},
+                  {face_idx1, face_idx2},
+                  orientation,
+                  matrix
+                };
+                matched_pairs.push_back(matched_face);
+                pairs2.erase(it2);
+                ++n_matches;
+                break;
+              }
+          }
+      }
+
+    //Assure that all faces are matched
+    AssertThrow (n_matches == pairs1.size() && pairs2.size() == 0,
+                 ExcMessage ("Unmatched faces on periodic boundaries"));
+  }
+
+
+
+  template<typename MeshType>
+  void
+  collect_periodic_faces
+  (const MeshType                        &mesh,
+   const types::boundary_id               b_id1,
+   const types::boundary_id               b_id2,
+   const int                              direction,
+   std::vector<PeriodicFacePair<typename MeshType::cell_iterator> > &matched_pairs,
+   const Tensor<1,MeshType::space_dimension> &offset,
+   const FullMatrix<double>              &matrix)
+  {
+    static const int dim = MeshType::dimension;
+    static const int space_dim = MeshType::space_dimension;
+    (void)dim;
+    (void)space_dim;
+    Assert (0<=direction && direction<space_dim,
+            ExcIndexRange (direction, 0, space_dim));
+
+    // Loop over all cells on the highest level and collect all boundary
+    // faces belonging to b_id1 and b_id2:
+
+    std::set<std::pair<typename MeshType::cell_iterator, unsigned int> > pairs1;
+    std::set<std::pair<typename MeshType::cell_iterator, unsigned int> > pairs2;
+
+    for (typename MeshType::cell_iterator cell = mesh.begin(0);
+         cell != mesh.end(0); ++cell)
+      {
+        for (unsigned int i = 0; i < GeometryInfo<dim>::faces_per_cell; ++i)
+          {
+            const typename MeshType::face_iterator face = cell->face(i);
+            if (face->at_boundary() && face->boundary_id() == b_id1)
+              {
+                const std::pair<typename MeshType::cell_iterator, unsigned int> pair1
+                  = std::make_pair(cell, i);
+                pairs1.insert(pair1);
+              }
+
+            if (face->at_boundary() && face->boundary_id() == b_id2)
+              {
+                const std::pair<typename MeshType::cell_iterator, unsigned int> pair2
+                  = std::make_pair(cell, i);
+                pairs2.insert(pair2);
+              }
+          }
+      }
+
+    Assert (pairs1.size() == pairs2.size(),
+            ExcMessage ("Unmatched faces on periodic boundaries"));
+
+    // and call match_periodic_face_pairs that does the actual matching:
+    match_periodic_face_pairs(pairs1, pairs2, direction, matched_pairs, offset,
+                              matrix);
+  }
+
+
+
+  template<typename MeshType>
+  void
+  collect_periodic_faces
+  (const MeshType                        &mesh,
+   const types::boundary_id               b_id,
+   const int                              direction,
+   std::vector<PeriodicFacePair<typename MeshType::cell_iterator> > &matched_pairs,
+   const Tensor<1,MeshType::space_dimension> &offset,
+   const FullMatrix<double>              &matrix)
+  {
+    static const int dim = MeshType::dimension;
+    static const int space_dim = MeshType::space_dimension;
+    (void)dim;
+    (void)space_dim;
+    Assert (0<=direction && direction<space_dim,
+            ExcIndexRange (direction, 0, space_dim));
+
+    Assert(dim == space_dim,
+           ExcNotImplemented());
+
+    // Loop over all cells on the highest level and collect all boundary
+    // faces 2*direction and 2*direction*1:
+
+    std::set<std::pair<typename MeshType::cell_iterator, unsigned int> > pairs1;
+    std::set<std::pair<typename MeshType::cell_iterator, unsigned int> > pairs2;
+
+    for (typename MeshType::cell_iterator cell = mesh.begin(0);
+         cell != mesh.end(0); ++cell)
+      {
+        const typename MeshType::face_iterator face_1 = cell->face(2*direction);
+        const typename MeshType::face_iterator face_2 = cell->face(2*direction+1);
+
+        if (face_1->at_boundary() && face_1->boundary_id() == b_id)
+          {
+            const std::pair<typename MeshType::cell_iterator, unsigned int> pair1
+              = std::make_pair(cell, 2*direction);
+            pairs1.insert(pair1);
+          }
+
+        if (face_2->at_boundary() && face_2->boundary_id() == b_id)
+          {
+            const std::pair<typename MeshType::cell_iterator, unsigned int> pair2
+              = std::make_pair(cell, 2*direction+1);
+            pairs2.insert(pair2);
+          }
+      }
+
+    Assert (pairs1.size() == pairs2.size(),
+            ExcMessage ("Unmatched faces on periodic boundaries"));
+
+
+#ifdef DEBUG
+    const unsigned int size_old = matched_pairs.size();
+#endif
+
+    // and call match_periodic_face_pairs that does the actual matching:
+    match_periodic_face_pairs(pairs1, pairs2, direction, matched_pairs, offset,
+                              matrix);
+
+#ifdef DEBUG
+    //check for standard orientation
+    const unsigned int size_new = matched_pairs.size();
+    for (unsigned int i = size_old; i < size_new; ++i)
+      {
+        Assert(matched_pairs[i].orientation == 1,
+               ExcMessage("Found a face match with non standard orientation. "
+                          "This function is only suitable for meshes with cells "
+                          "in default orientation"));
+      }
+#endif
+  }
+
+
+
+  template <int dim, int spacedim>
+  void copy_boundary_to_manifold_id(Triangulation<dim, spacedim> &tria,
+                                    const bool reset_boundary_ids)
+  {
+    // in 3d, we not only have to copy boundary ids of faces, but also of edges
+    // because we see them twice (once from each adjacent boundary face),
+    // we cannot immediately reset their boundary ids. thus, copy first
+    // and reset later
+    if (dim >= 3)
+      for (typename Triangulation<dim,spacedim>::active_cell_iterator
+           cell=tria.begin_active();
+           cell != tria.end(); ++cell)
+        for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+          if (cell->face(f)->at_boundary())
+            for (signed int e=0; e<static_cast<signed int>(GeometryInfo<dim>::lines_per_face); ++e)
+              cell->face(f)->line(e)->set_manifold_id
+              (static_cast<types::manifold_id>(cell->face(f)->line(e)->boundary_id()));
+
+    // now do cells
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell=tria.begin_active();
+         cell != tria.end(); ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->face(f)->at_boundary())
+          {
+            // copy boundary to manifold ids
+            cell->face(f)->set_manifold_id
+            (static_cast<types::manifold_id>(cell->face(f)->boundary_id()));
+
+            // then reset boundary ids if so desired, and in 3d also that
+            // of edges
+            if (reset_boundary_ids == true)
+              {
+                cell->face(f)->set_boundary_id(0);
+                if (dim >= 3)
+                  for (signed int e=0; e<static_cast<signed int>(GeometryInfo<dim>::lines_per_face); ++e)
+                    cell->face(f)->line(e)->set_boundary_id(0);
+              }
+          }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void copy_material_to_manifold_id(Triangulation<dim, spacedim> &tria,
+                                    const bool compute_face_ids)
+  {
+    typename Triangulation<dim,spacedim>::active_cell_iterator
+    cell=tria.begin_active(), endc=tria.end();
+
+    for (; cell != endc; ++cell)
+      {
+        cell->set_manifold_id(cell->material_id());
+        if (compute_face_ids == true)
+          {
+            for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+              {
+                if (cell->at_boundary(f) == false)
+                  cell->face(f)->set_manifold_id
+                  (std::min(cell->material_id(),
+                            cell->neighbor(f)->material_id()));
+                else
+                  cell->face(f)->set_manifold_id(cell->material_id());
+              }
+          }
+      }
+  }
+
+} /* namespace GridTools */
+
+
+// explicit instantiations
+#include "grid_tools.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/grid_tools.inst.in b/source/grid/grid_tools.inst.in
new file mode 100644
index 0000000..cce84f3
--- /dev/null
+++ b/source/grid/grid_tools.inst.in
@@ -0,0 +1,374 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (X : TRIANGULATION_AND_DOFHANDLERS; deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS)
+{
+
+#if deal_II_dimension <= deal_II_space_dimension
+  namespace GridTools \{
+
+  template
+    unsigned int
+    find_closest_vertex (const X &,
+                         const Point<deal_II_space_dimension> &);
+
+  template
+    std::vector<dealii::internal::ActiveCellIterator<deal_II_dimension, deal_II_space_dimension, X>::type>
+    find_cells_adjacent_to_vertex(const X &, const unsigned int);
+
+  template
+    dealii::internal::ActiveCellIterator<deal_II_dimension, deal_II_space_dimension, X>::type
+    find_active_cell_around_point (const X &, const Point<deal_II_space_dimension> &p);
+
+  template
+    std::pair<dealii::internal::ActiveCellIterator<deal_II_dimension, deal_II_space_dimension, X>::type, Point<deal_II_dimension> >
+    find_active_cell_around_point (const Mapping<deal_II_dimension, deal_II_space_dimension> &,
+                                   const X &,
+                                   const Point<deal_II_space_dimension> &);
+
+  template
+    std::vector<dealii::internal::ActiveCellIterator<deal_II_dimension, deal_II_space_dimension, X>::type>
+    compute_active_cell_halo_layer (const X &,
+                                    const std_cxx11::function<bool (const dealii::internal::ActiveCellIterator<deal_II_dimension, deal_II_space_dimension, X>::type&)> &);
+
+  template
+    std::vector<dealii::internal::ActiveCellIterator<deal_II_dimension, deal_II_space_dimension, X>::type>
+    compute_ghost_cell_halo_layer (const X &);
+
+  template
+    std::list<std::pair<X::cell_iterator, X::cell_iterator> >
+    get_finest_common_cells (const X &mesh_1,
+                             const X &mesh_2);
+
+
+  template
+    bool
+    have_same_coarse_mesh (const X &mesh_1,
+                           const X &mesh_2);
+
+  \}
+
+  #endif
+}
+
+// now also instantiate a few additional functions for parallel::distributed::Triangulation
+for (deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS)
+{
+
+#if deal_II_dimension <= deal_II_space_dimension
+  namespace GridTools \{
+
+  template
+  std::map<unsigned int,types::global_vertex_index>
+  compute_local_to_global_vertex_index_map(const parallel::distributed::Triangulation<deal_II_dimension,deal_II_space_dimension> &triangulation);
+  \}
+
+  #endif
+}
+
+
+
+for (deal_II_space_dimension : SPACE_DIMENSIONS)
+{
+
+    dealii::internal::ActiveCellIterator<deal_II_space_dimension, deal_II_space_dimension, parallel::distributed::Triangulation<deal_II_space_dimension, deal_II_space_dimension> >::type
+    find_active_cell_around_point (const parallel::distributed::Triangulation<deal_II_space_dimension> &,
+                                   const Point<deal_II_space_dimension> &p);
+
+
+    std::pair<dealii::internal::ActiveCellIterator<deal_II_space_dimension, deal_II_space_dimension, parallel::distributed::Triangulation<deal_II_space_dimension, deal_II_space_dimension> >::type, Point<deal_II_space_dimension> >
+    find_active_cell_around_point (const Mapping<deal_II_space_dimension> &,
+                                   const parallel::distributed::Triangulation<deal_II_space_dimension> &,
+                                   const Point<deal_II_space_dimension> &);
+}
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace GridTools \{
+
+    template
+      double
+      diameter
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension> &);
+
+    template
+      double
+      volume
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+       const Mapping<deal_II_dimension, deal_II_space_dimension> &);
+
+    template
+      void delete_unused_vertices (std::vector<Point<deal_II_space_dimension> > &,
+                                   std::vector<CellData<deal_II_dimension> > &,
+                                   SubCellData &);
+
+    template
+      void delete_duplicated_vertices (std::vector<Point<deal_II_space_dimension> > &,
+                                       std::vector<CellData<deal_II_dimension> > &,
+                                       SubCellData &,
+                                       std::vector<unsigned int> &,
+                                       double);
+
+    template
+      void shift<deal_II_dimension> (const Tensor<1,deal_II_space_dimension> &,
+                                                Triangulation<deal_II_dimension, deal_II_space_dimension> &);
+
+    template
+      void scale<deal_II_dimension> (const double,
+                                     Triangulation<deal_II_dimension, deal_II_space_dimension> &);
+
+    template
+      void distort_random<deal_II_dimension> (const double,
+                                     Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+                                     const bool);
+
+    template
+      void get_face_connectivity_of_cells
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension> &triangulation,
+       DynamicSparsityPattern   &cell_connectivity);
+
+    template
+      void get_face_connectivity_of_cells
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension> &triangulation,
+       SparsityPattern          &cell_connectivity);
+
+    template
+      void get_vertex_connectivity_of_cells
+      (const Triangulation<deal_II_dimension, deal_II_space_dimension> &triangulation,
+       DynamicSparsityPattern            &cell_connectivity);
+
+    template
+      void partition_triangulation (const unsigned int,
+                               Triangulation<deal_II_dimension, deal_II_space_dimension> &);
+
+    template
+      void partition_triangulation (const unsigned int,
+                                    const SparsityPattern &,
+                                    Triangulation<deal_II_dimension, deal_II_space_dimension> &);
+
+    template
+      std::pair<hp::DoFHandler<deal_II_dimension, deal_II_space_dimension>::active_cell_iterator,
+                Point<deal_II_dimension> >
+      find_active_cell_around_point
+      (const hp::MappingCollection<deal_II_dimension, deal_II_space_dimension> &,
+       const hp::DoFHandler<deal_II_dimension, deal_II_space_dimension> &,
+       const Point<deal_II_space_dimension> &);
+
+    template
+      void get_subdomain_association (const Triangulation<deal_II_dimension, deal_II_space_dimension>  &,
+                                      std::vector<types::subdomain_id> &);
+
+    template
+    unsigned int count_cells_with_subdomain_association(
+      const Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+      const types::subdomain_id);
+
+    template
+    std::vector<bool>
+    get_locally_owned_vertices (const Triangulation<deal_II_dimension, deal_II_space_dimension> &);
+
+    template
+      double
+      minimal_cell_diameter (const Triangulation<deal_II_dimension, deal_II_space_dimension> &triangulation);
+
+    template
+      double
+      maximal_cell_diameter (const Triangulation<deal_II_dimension, deal_II_space_dimension> &triangulation);
+
+    template
+    std::map<unsigned int,Point<deal_II_space_dimension> >
+    get_all_vertices_at_boundary (const Triangulation<deal_II_dimension,deal_II_space_dimension> &tria);
+
+    template
+    std::vector<std::set<Triangulation<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator> >
+    vertex_to_cell_map(const Triangulation<deal_II_dimension,deal_II_space_dimension> &triangulation);
+
+#if deal_II_dimension == deal_II_space_dimension
+#  if deal_II_dimension > 1
+    template
+    void
+    laplace_transform (const std::map<unsigned int,Point<deal_II_dimension> > &new_points,
+                       Triangulation<deal_II_dimension> &triangulation,
+                       const Function<deal_II_dimension> *coefficient);
+
+#  endif
+
+    template
+      Triangulation<deal_II_dimension,deal_II_space_dimension>::DistortedCellList
+      fix_up_distorted_child_cells
+      (const Triangulation<deal_II_dimension,deal_II_space_dimension>::DistortedCellList &distorted_cells,
+       Triangulation<deal_II_dimension,deal_II_space_dimension> &triangulation);
+
+#endif
+
+    \}
+#endif
+
+
+
+  }
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; Container : DOFHANDLER_TEMPLATES)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace GridTools \{
+
+      template
+      std::map< types::global_dof_index,std::vector<Container<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator> >
+      get_dof_to_support_patch_map<Container<deal_II_dimension,deal_II_space_dimension> >
+      (Container<deal_II_dimension,deal_II_space_dimension> &dof_handler);
+      
+      
+    \}
+#endif
+  }
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; Container : TRIANGULATION_AND_DOFHANDLER_TEMPLATES)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace GridTools \{
+
+      template
+      std::vector<Container<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator>
+      get_patch_around_cell<Container<deal_II_dimension,deal_II_space_dimension> >
+      (const Container<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator &cell);
+      
+      template
+      std::vector< Container<deal_II_dimension,deal_II_space_dimension>::cell_iterator> 
+      get_cells_at_coarsest_common_level <Container<deal_II_dimension,deal_II_space_dimension> > (
+      const std::vector< Container<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator> & patch_cells);
+      
+      template
+      void build_triangulation_from_patch <Container<deal_II_dimension,deal_II_space_dimension> > (
+      const std::vector<Container<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator>  &patch,
+      Triangulation<Container<deal_II_dimension,deal_II_space_dimension>::dimension,Container<deal_II_dimension,deal_II_space_dimension>::space_dimension> &local_triangulation,
+      std::map<Triangulation<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator,
+      Container<deal_II_dimension,deal_II_space_dimension>::active_cell_iterator > &patch_to_global_tria_map);
+      
+    \}
+#endif
+  }
+
+
+// instantiate the following functions only for the "sequential" containers. this
+// is a misnomer here, however: the point is simply that we only instantiate
+// these functions for certain *iterator* types, and the iterator types are
+// the same for sequential and parallel containers; consequently, we get duplicate
+// instantiation errors if we instantiate for *all* container types, rather than
+// only the sequential ones
+for (X : SEQUENTIAL_TRIANGULATION_AND_DOFHANDLERS; deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS)
+{
+#if deal_II_dimension <= deal_II_space_dimension
+  namespace GridTools \{
+
+    template
+    bool orthogonal_equality<X::active_face_iterator> (std::bitset<3> &,
+                                                       const X::active_face_iterator&,
+                                                       const X::active_face_iterator&,
+                                                       const int,
+                                                       const Tensor<1,deal_II_space_dimension> &,
+                                                       const FullMatrix<double> &);
+
+    template
+    bool orthogonal_equality<X::face_iterator> (std::bitset<3> &,
+                                                const X::face_iterator&,
+                                                const X::face_iterator&,
+                                                const int,
+                                                const Tensor<1,deal_II_space_dimension> &,
+                                                const FullMatrix<double> &);
+
+    template
+    bool orthogonal_equality<X::active_face_iterator> (const X::active_face_iterator&,
+                                                       const X::active_face_iterator&,
+                                                       const int,
+                                                       const Tensor<1,deal_II_space_dimension> &,
+                                                       const FullMatrix<double> &);
+
+    template
+    bool orthogonal_equality<X::face_iterator> (const X::face_iterator&,
+                                                const X::face_iterator&,
+                                                const int,
+                                                const Tensor<1,deal_II_space_dimension> &,
+                                                const FullMatrix<double> &);
+
+    template
+    void collect_periodic_faces<X> (const X &,
+                                    const types::boundary_id,
+                                    const types::boundary_id,
+                                    const int,
+                                    std::vector<PeriodicFacePair<X::cell_iterator> > &,
+                                    const Tensor<1,X::space_dimension> &,
+                                    const FullMatrix<double> &);
+
+    template
+    void collect_periodic_faces<X> (const X &,
+                                    const types::boundary_id,
+                                    const int,
+                                    std::vector<PeriodicFacePair<X::cell_iterator> > &,
+                                    const Tensor<1,X::space_dimension> &,
+                                    const FullMatrix<double> &);
+
+  \}
+#endif
+}
+
+for (deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS)
+{
+#if deal_II_dimension <= deal_II_space_dimension
+   #if deal_II_dimension >= 2
+
+     namespace GridTools \{
+      template
+      void
+      collect_periodic_faces<parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension> >
+                                 (const parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+                                  const types::boundary_id,
+                                  const types::boundary_id,
+                                  const int,
+                                  std::vector<PeriodicFacePair<parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension>::cell_iterator> > &,
+                                  const Tensor<1,parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension>::space_dimension> &,
+                                  const FullMatrix<double> &);
+
+      template
+      void
+      collect_periodic_faces<parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension> >
+                                 (const parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension> &,
+                                  const types::boundary_id,
+                                  const int,
+                                  std::vector<PeriodicFacePair<parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension>::cell_iterator> > &,
+                                  const Tensor<1,parallel::distributed::Triangulation<deal_II_dimension, deal_II_space_dimension>::space_dimension> &,
+                                  const FullMatrix<double> &);
+     \}
+   #endif
+#endif
+}
+
+for (deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS)
+{
+#if deal_II_space_dimension >= deal_II_dimension
+ namespace GridTools \{
+      template void copy_boundary_to_manifold_id<deal_II_dimension, deal_II_space_dimension>
+      (Triangulation<deal_II_dimension, deal_II_space_dimension> &, const bool);
+      template void copy_material_to_manifold_id<deal_II_dimension, deal_II_space_dimension>
+      (Triangulation<deal_II_dimension, deal_II_space_dimension> &, const bool);
+
+\}
+#endif
+}
diff --git a/source/grid/intergrid_map.cc b/source/grid/intergrid_map.cc
new file mode 100644
index 0000000..bd86dfd
--- /dev/null
+++ b/source/grid/intergrid_map.cc
@@ -0,0 +1,215 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/smartpointer.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/intergrid_map.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/shared_tria.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <class MeshType>
+InterGridMap<MeshType>::InterGridMap ()
+  :
+  source_grid(0, typeid(*this).name()),
+  destination_grid(0, typeid(*this).name())
+{}
+
+
+
+
+template <class MeshType>
+void InterGridMap<MeshType>::make_mapping (const MeshType &source_grid,
+                                           const MeshType &destination_grid)
+{
+  // first delete all contents
+  clear ();
+
+  // next store pointers to grids
+  this->source_grid      = &source_grid;
+  this->destination_grid = &destination_grid;
+
+  // then set up the meshes from
+  // scratch and fill them with end-iterators
+  const unsigned int n_levels = source_grid.get_triangulation().n_levels();
+  mapping.resize (n_levels);
+  for (unsigned int level=0; level<n_levels; ++level)
+    {
+      // first find out about the highest
+      // index used on this level. We could
+      // in principle ask the triangulation
+      // about this, but we would have to
+      // know the underlying data structure
+      // for this and we would like to
+      // avoid such knowledge here
+      unsigned int n_cells = 0;
+      cell_iterator cell = source_grid.begin(level),
+                    endc = source_grid.end(level);
+      for (; cell!=endc; ++cell)
+        if (static_cast<unsigned int>(cell->index()) > n_cells)
+          n_cells = cell->index();
+
+      // note: n_cells is now the largest
+      // zero-based index, but we need the
+      // number of cells, which is one larger
+      mapping[level].resize (n_cells+1, destination_grid.end());
+    };
+
+  // now make up the mapping
+  // loop over all cells and set the user
+  // pointers as well as the contents of
+  // the two arrays. note that the function
+  // takes a *reference* to the int and
+  // this may change it
+  cell_iterator src_cell = source_grid.begin(0),
+                dst_cell = destination_grid.begin(0),
+                endc     = source_grid.end(0);
+  for (; src_cell!=endc; ++src_cell, ++dst_cell)
+    set_mapping (src_cell, dst_cell);
+
+  // little assertion that the two grids
+  // are indeed related:
+  Assert (dst_cell == destination_grid.end(0),
+          ExcIncompatibleGrids ());
+}
+
+
+
+template <class MeshType>
+void
+InterGridMap<MeshType>::set_mapping (const cell_iterator &src_cell,
+                                     const cell_iterator &dst_cell)
+{
+  // first set the map for this cell
+  mapping[src_cell->level()][src_cell->index()] = dst_cell;
+
+  // if both cells have children, we may
+  // recurse further into the hierarchy
+  if (src_cell->has_children() && dst_cell->has_children())
+    {
+      Assert(src_cell->n_children()==
+             GeometryInfo<MeshType::dimension>::max_children_per_cell,
+             ExcNotImplemented());
+      Assert(dst_cell->n_children()==
+             GeometryInfo<MeshType::dimension>::max_children_per_cell,
+             ExcNotImplemented());
+      Assert(src_cell->refinement_case()==dst_cell->refinement_case(),
+             ExcNotImplemented());
+      for (unsigned int c=0; c<GeometryInfo<MeshType::dimension>::max_children_per_cell; ++c)
+        set_mapping (src_cell->child(c),
+                     dst_cell->child(c));
+    }
+  else if (src_cell->has_children() &&
+           !dst_cell->has_children())
+    // src grid is more refined here.
+    // set entries for all children
+    // of this cell to the one
+    // dst_cell
+    for (unsigned int c=0; c<src_cell->n_children(); ++c)
+      set_entries_to_cell (src_cell->child(c),
+                           dst_cell);
+  // else (no cell is refined or
+  // dst_cell is refined): no pointers
+  // to be set
+}
+
+
+
+template <class MeshType>
+void
+InterGridMap<MeshType>::set_entries_to_cell (const cell_iterator &src_cell,
+                                             const cell_iterator &dst_cell)
+{
+  // first set the map for this cell
+  mapping[src_cell->level()][src_cell->index()] = dst_cell;
+
+  // then do so for the children as well
+  // if there are any
+  if (src_cell->has_children())
+    for (unsigned int c=0; c<src_cell->n_children(); ++c)
+      set_entries_to_cell (src_cell->child(c),
+                           dst_cell);
+}
+
+
+template <class MeshType>
+typename InterGridMap<MeshType>::cell_iterator
+InterGridMap<MeshType>::operator [] (const cell_iterator &source_cell) const
+{
+  Assert (source_cell.state() == IteratorState::valid,
+          ExcInvalidKey (source_cell));
+  Assert (source_cell->level() <= static_cast<int>(mapping.size()),
+          ExcInvalidKey (source_cell));
+  Assert (source_cell->index() <= static_cast<int>(mapping[source_cell->level()].size()),
+          ExcInvalidKey (source_cell));
+
+  return mapping[source_cell->level()][source_cell->index()];
+}
+
+
+
+template <class MeshType>
+void InterGridMap<MeshType>::clear ()
+{
+  mapping.clear ();
+  source_grid      = 0;
+  destination_grid = 0;
+}
+
+
+
+template <class MeshType>
+const MeshType &
+InterGridMap<MeshType>::get_source_grid () const
+{
+  return *source_grid;
+}
+
+
+
+template <class MeshType>
+const MeshType &
+InterGridMap<MeshType>::get_destination_grid () const
+{
+  return *destination_grid;
+}
+
+
+
+template <class MeshType>
+std::size_t
+InterGridMap<MeshType>::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (mapping) +
+          MemoryConsumption::memory_consumption (source_grid) +
+          MemoryConsumption::memory_consumption (destination_grid));
+}
+
+
+
+// explicit instantiations
+#include "intergrid_map.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/intergrid_map.inst.in b/source/grid/intergrid_map.inst.in
new file mode 100644
index 0000000..7cda634
--- /dev/null
+++ b/source/grid/intergrid_map.inst.in
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (X : TRIANGULATION_AND_DOFHANDLERS; deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class InterGridMap<X>;
+#endif
+  }
diff --git a/source/grid/manifold.cc b/source/grid/manifold.cc
new file mode 100644
index 0000000..57a8a52
--- /dev/null
+++ b/source/grid/manifold.cc
@@ -0,0 +1,318 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/tensor.h>
+#include <deal.II/grid/manifold.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/fe/fe_q.h>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+using namespace Manifolds;
+
+/* -------------------------- Manifold --------------------- */
+
+
+template <int dim, int spacedim>
+Manifold<dim, spacedim>::~Manifold ()
+{}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Manifold<dim, spacedim>::
+project_to_manifold (const std::vector<Point<spacedim> > &,
+                     const Point<spacedim> &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return Point<spacedim>();
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Manifold<dim, spacedim>::
+get_new_point (const Quadrature<spacedim> &quad) const
+{
+  const std::vector<Point<spacedim> > &surrounding_points = quad.get_points();
+  const std::vector<double> &weights = quad.get_weights();
+  Point<spacedim> p;
+
+#ifdef DEBUG
+  double sum=0;
+  for (unsigned int i=0; i<weights.size(); ++i)
+    sum+= weights[i];
+  Assert(std::abs(sum-1.0) < 1e-10, ExcMessage("Weights should sum to 1!"));
+#endif
+
+  for (unsigned int i=0; i<surrounding_points.size(); ++i)
+    p += surrounding_points[i]*weights[i];
+
+  return project_to_manifold(surrounding_points, p);
+}
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Manifold<dim, spacedim>::
+get_new_point_on_line (const typename Triangulation<dim, spacedim>::line_iterator &line) const
+{
+  return get_new_point (get_default_quadrature(line));
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Manifold<dim, spacedim>::
+get_new_point_on_quad (const typename Triangulation<dim, spacedim>::quad_iterator &quad) const
+{
+  return get_new_point (get_default_quadrature(quad));
+}
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Manifold<dim,spacedim>::
+get_new_point_on_face (const typename Triangulation<dim,spacedim>::face_iterator &face) const
+{
+  Assert (dim>1, ExcImpossibleInDim(dim));
+
+  switch (dim)
+    {
+    case 2:
+      return get_new_point_on_line (face);
+    case 3:
+      return get_new_point_on_quad (face);
+    }
+
+  return Point<spacedim>();
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Manifold<dim,spacedim>::
+get_new_point_on_cell (const typename Triangulation<dim,spacedim>::cell_iterator &cell) const
+{
+  switch (dim)
+    {
+    case 1:
+      return get_new_point_on_line (cell);
+    case 2:
+      return get_new_point_on_quad (cell);
+    case 3:
+      return get_new_point_on_hex (cell);
+    }
+
+  return Point<spacedim>();
+}
+
+
+template <>
+Point<1>
+Manifold<1,1>::
+get_new_point_on_face (const Triangulation<1,1>::face_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+  return Point<1>();
+}
+
+
+template <>
+Point<2>
+Manifold<1,2>::
+get_new_point_on_face (const Triangulation<1,2>::face_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+  return Point<2>();
+}
+
+
+
+template <>
+Point<3>
+Manifold<1,3>::
+get_new_point_on_face (const Triangulation<1,3>::face_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+  return Point<3>();
+}
+
+
+template <>
+Point<1>
+Manifold<1,1>::
+get_new_point_on_quad (const Triangulation<1,1>::quad_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+  return Point<1>();
+}
+
+template <>
+Point<2>
+Manifold<1,2>::
+get_new_point_on_quad (const Triangulation<1,2>::quad_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+  return Point<2>();
+}
+
+
+template <>
+Point<3>
+Manifold<1,3>::
+get_new_point_on_quad (const Triangulation<1,3>::quad_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+  return Point<3>();
+}
+
+template <int dim, int spacedim>
+Point<spacedim>
+Manifold<dim, spacedim>::
+get_new_point_on_hex (const typename Triangulation<dim, spacedim>::hex_iterator &/*hex*/) const
+{
+  Assert (false, ExcImpossibleInDim(dim));
+  return Point<spacedim>();
+}
+
+template <>
+Point<3>
+Manifold<3,3>::
+get_new_point_on_hex (const Triangulation<3, 3>::hex_iterator &hex) const
+{
+  return get_new_point(get_default_quadrature(hex, true));
+}
+
+
+/* -------------------------- FlatManifold --------------------- */
+
+
+template <int dim, int spacedim>
+FlatManifold<dim,spacedim>::FlatManifold (const Point<spacedim> periodicity,
+                                          const double tolerance) :
+  periodicity(periodicity),
+  tolerance(tolerance)
+{}
+
+template <int dim, int spacedim>
+Point<spacedim>
+FlatManifold<dim, spacedim>::
+get_new_point (const Quadrature<spacedim> &quad) const
+{
+  const std::vector<Point<spacedim> > &surrounding_points = quad.get_points();
+  const std::vector<double> &weights = quad.get_weights();
+
+#ifdef DEBUG
+  double sum=0;
+  for (unsigned int i=0; i<weights.size(); ++i)
+    sum+= weights[i];
+  // Here it is correct to use tolerance as an absolute one, since
+  // this should be relative to unity.
+  Assert(std::abs(sum-1.0) < tolerance, ExcMessage("Weights should sum to 1!"));
+#endif
+
+
+  Point<spacedim> p;
+  Point<spacedim> dp;
+  Point<spacedim> minP = periodicity;
+  const bool check_period = (periodicity.norm() > tolerance);
+  if (check_period)
+    for (unsigned int i=0; i<surrounding_points.size(); ++i)
+      for (unsigned int d=0; d<spacedim; ++d)
+        {
+          minP[d] = std::min(minP[d], surrounding_points[i][d]);
+          if (periodicity[d] > 0)
+            Assert( (surrounding_points[i][d] < periodicity[d]+tolerance*periodicity.norm()) ||
+                    (surrounding_points[i][d] >= -tolerance*periodicity.norm()),
+                    ExcPeriodicBox(d, surrounding_points[i], periodicity, tolerance*periodicity.norm()));
+        }
+
+  for (unsigned int i=0; i<surrounding_points.size(); ++i)
+    {
+      dp = Point<spacedim>();
+      if (check_period)
+        {
+          for (unsigned int d=0; d<spacedim; ++d)
+            if (periodicity[d] > 0)
+              dp[d] = ( (surrounding_points[i][d]-minP[d]) > periodicity[d]/2.0 ?
+                        -periodicity[d] : 0.0 );
+        }
+      p += (surrounding_points[i]+dp)*weights[i];
+    }
+  if (check_period)
+    for (unsigned int d=0; d<spacedim; ++d)
+      if (periodicity[d] > 0)
+        p[d] = (p[d] < 0 ? p[d] + periodicity[d] : p[d]);
+
+  return project_to_manifold(surrounding_points, p);
+}
+
+template <int dim, int spacedim>
+Point<spacedim>
+FlatManifold<dim, spacedim>::project_to_manifold (const std::vector<Point<spacedim> > &/*vertices*/,
+                                                  const Point<spacedim> &candidate) const
+{
+  return candidate;
+}
+
+
+/* -------------------------- ChartManifold --------------------- */
+
+template <int dim, int spacedim, int chartdim>
+ChartManifold<dim,spacedim,chartdim>::~ChartManifold ()
+{}
+
+template <int dim, int spacedim, int chartdim>
+ChartManifold<dim,spacedim,chartdim>::ChartManifold (const Point<chartdim> periodicity):
+  sub_manifold(periodicity)
+{}
+
+
+template <int dim, int spacedim, int chartdim>
+Point<spacedim>
+ChartManifold<dim,spacedim,chartdim>::
+get_new_point (const Quadrature<spacedim> &quad) const
+{
+  const std::vector<Point<spacedim> > &surrounding_points = quad.get_points();
+  const std::vector<double> &weights = quad.get_weights();
+  std::vector<Point<chartdim> > chart_points(surrounding_points.size());
+
+  for (unsigned int i=0; i<surrounding_points.size(); ++i)
+    chart_points[i] = pull_back(surrounding_points[i]);
+
+  const Quadrature<chartdim> chart_quad(chart_points, weights);
+  const Point<chartdim> p_chart = sub_manifold.get_new_point(chart_quad);
+
+  return push_forward(p_chart);
+}
+
+
+
+
+
+
+// explicit instantiations
+#include "manifold.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/manifold.inst.in b/source/grid/manifold.inst.in
new file mode 100644
index 0000000..1a7962e
--- /dev/null
+++ b/source/grid/manifold.inst.in
@@ -0,0 +1,31 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class Manifold<deal_II_dimension, deal_II_space_dimension>;
+    template class FlatManifold<deal_II_dimension, deal_II_space_dimension>;
+
+    template class ChartManifold<deal_II_dimension, deal_II_space_dimension, 1>;
+    template class ChartManifold<deal_II_dimension, deal_II_space_dimension, 2>;
+    template class ChartManifold<deal_II_dimension, deal_II_space_dimension, 3>;
+#endif
+  }
+
+
+
diff --git a/source/grid/manifold_lib.cc b/source/grid/manifold_lib.cc
new file mode 100644
index 0000000..8f3cd38
--- /dev/null
+++ b/source/grid/manifold_lib.cc
@@ -0,0 +1,297 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/manifold_lib.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/lac/vector.h>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <int dim, int spacedim>
+SphericalManifold<dim,spacedim>::SphericalManifold(const Point<spacedim> center):
+  ChartManifold<dim,spacedim,spacedim>(SphericalManifold<dim,spacedim>::get_periodicity()),
+  center(center)
+{}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+SphericalManifold<dim,spacedim>::get_periodicity()
+{
+  Point<spacedim> periodicity;
+  periodicity[spacedim-1] = 2*numbers::PI; // theta and phi period.
+  return periodicity;
+}
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+SphericalManifold<dim,spacedim>::get_new_point(const Quadrature<spacedim> &quad) const
+{
+  if (spacedim == 2)
+    return ChartManifold<dim,spacedim,spacedim>::get_new_point(quad);
+  else
+    {
+      double rho_average = 0;
+      Point<spacedim> mid_point;
+      for (unsigned int i=0; i<quad.size(); ++i)
+        {
+          rho_average += quad.weight(i)*(quad.point(i)-center).norm();
+          mid_point += quad.weight(i)*quad.point(i);
+        }
+      // Project the mid_point back to the right location
+      Tensor<1,spacedim> R = mid_point-center;
+      // Scale it to have radius rho_average
+      R *= rho_average/R.norm();
+      // And return it.
+      return center+R;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+SphericalManifold<dim,spacedim>::push_forward(const Point<spacedim> &spherical_point) const
+{
+  Assert(spherical_point[0] >=0.0,
+         ExcMessage("Negative radius for given point."));
+  const double rho = spherical_point[0];
+  const double theta = spherical_point[1];
+
+  Point<spacedim> p;
+  if (rho > 1e-10)
+    switch (spacedim)
+      {
+      case 2:
+        p[0] = rho*cos(theta);
+        p[1] = rho*sin(theta);
+        break;
+      case 3:
+      {
+        const double &phi= spherical_point[2];
+        p[0] = rho*sin(theta)*cos(phi);
+        p[1] = rho*sin(theta)*sin(phi);
+        p[2] = rho*cos(theta);
+      }
+      break;
+      default:
+        Assert(false, ExcInternalError());
+      }
+  return p+center;
+}
+
+template <int dim, int spacedim>
+Point<spacedim>
+SphericalManifold<dim,spacedim>::pull_back(const Point<spacedim> &space_point) const
+{
+  const Tensor<1,spacedim> R = space_point-center;
+  const double rho = R.norm();
+
+  Point<spacedim> p;
+  p[0] = rho;
+
+  switch (spacedim)
+    {
+    case 2:
+      p[1] = atan2(R[1],R[0]);
+      if (p[1] < 0)
+        p[1] += 2*numbers::PI;
+      break;
+    case 3:
+    {
+      const double z = R[2];
+      p[2] = atan2(R[1],R[0]); // phi
+      if (p[2] < 0)
+        p[2] += 2*numbers::PI; // phi is periodic
+      p[1] = atan2(sqrt(R[0]*R[0]+R[1]*R[1]),z);  // theta
+    }
+    break;
+    default:
+      Assert(false, ExcInternalError());
+    }
+  return p;
+}
+
+
+// ============================================================
+// CylindricalManifold
+// ============================================================
+
+template <int dim, int spacedim>
+CylindricalManifold<dim,spacedim>::CylindricalManifold(const unsigned int axis,
+                                                       const double tolerance) :
+  direction (Point<spacedim>::unit_vector(axis)),
+  point_on_axis (Point<spacedim>()),
+  tolerance(tolerance)
+{
+  Assert(spacedim > 1, ExcImpossibleInDim(1));
+}
+
+
+template <int dim, int spacedim>
+CylindricalManifold<dim,spacedim>::CylindricalManifold(const Point<spacedim> &direction,
+                                                       const Point<spacedim> &point_on_axis,
+                                                       const double tolerance) :
+  direction (direction),
+  point_on_axis (point_on_axis),
+  tolerance(tolerance)
+{
+  Assert(spacedim > 2, ExcImpossibleInDim(spacedim));
+}
+
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+CylindricalManifold<dim,spacedim>::
+get_new_point (const Quadrature<spacedim> &quad) const
+{
+  const std::vector<Point<spacedim> > &surrounding_points = quad.get_points();
+  const std::vector<double> &weights = quad.get_weights();
+
+  // compute a proposed new point
+  Point<spacedim> middle = flat_manifold.get_new_point(quad);
+
+  double radius = 0;
+  Tensor<1,spacedim> on_plane;
+
+  for (unsigned int i=0; i<surrounding_points.size(); ++i)
+    {
+      on_plane = surrounding_points[i]-point_on_axis;
+      on_plane = on_plane - (on_plane*direction) * direction;
+      radius += weights[i]*on_plane.norm();
+    }
+
+  // we then have to project this point out to the given radius from
+  // the axis. to this end, we have to take into account the offset
+  // point_on_axis and the direction of the axis
+  const Tensor<1,spacedim> vector_from_axis = (middle-point_on_axis) -
+                                              ((middle-point_on_axis) * direction) * direction;
+
+  // scale it to the desired length and put everything back together,
+  // unless we have a point on the axis
+  if (vector_from_axis.norm() <= tolerance * middle.norm())
+    return middle;
+
+  else
+    return Point<spacedim>((vector_from_axis / vector_from_axis.norm() * radius +
+                            ((middle-point_on_axis) * direction) * direction +
+                            point_on_axis));
+}
+
+
+// ============================================================
+// FunctionChartManifold
+// ============================================================
+template <int dim, int spacedim, int chartdim>
+FunctionManifold<dim,spacedim,chartdim>::FunctionManifold
+(const Function<chartdim> &push_forward_function,
+ const Function<spacedim> &pull_back_function,
+ const Point<chartdim> periodicity,
+ const double tolerance):
+  ChartManifold<dim,spacedim,chartdim>(periodicity),
+  push_forward_function(&push_forward_function),
+  pull_back_function(&pull_back_function),
+  tolerance(tolerance),
+  owns_pointers(false)
+{
+  AssertDimension(push_forward_function.n_components, spacedim);
+  AssertDimension(pull_back_function.n_components, chartdim);
+}
+
+template <int dim, int spacedim, int chartdim>
+FunctionManifold<dim,spacedim,chartdim>::FunctionManifold
+(const std::string push_forward_expression,
+ const std::string pull_back_expression,
+ const Point<chartdim> periodicity,
+ const typename FunctionParser<spacedim>::ConstMap const_map,
+ const std::string chart_vars,
+ const std::string space_vars,
+ const double tolerance) :
+  ChartManifold<dim,spacedim,chartdim>(periodicity),
+  const_map(const_map),
+  tolerance(tolerance),
+  owns_pointers(true)
+{
+  FunctionParser<chartdim> *pf = new FunctionParser<chartdim>(spacedim);
+  FunctionParser<spacedim> *pb = new FunctionParser<spacedim>(chartdim);
+  pf->initialize(chart_vars, push_forward_expression, const_map);
+  pb->initialize(space_vars, pull_back_expression, const_map);
+  push_forward_function = pf;
+  pull_back_function = pb;
+}
+
+template <int dim, int spacedim, int chartdim>
+FunctionManifold<dim,spacedim,chartdim>::~FunctionManifold()
+{
+  if (owns_pointers == true)
+    {
+      const Function<chartdim> *pf = push_forward_function;
+      push_forward_function = 0;
+      delete pf;
+
+      const Function<spacedim> *pb = pull_back_function;
+      pull_back_function = 0;
+      delete pb;
+    }
+}
+
+template <int dim, int spacedim, int chartdim>
+Point<spacedim>
+FunctionManifold<dim,spacedim,chartdim>::push_forward(const Point<chartdim> &chart_point) const
+{
+  Vector<double> pf(spacedim);
+  Point<spacedim> result;
+  push_forward_function->vector_value(chart_point, pf);
+  for (unsigned int i=0; i<spacedim; ++i)
+    result[i] = pf[i];
+
+#ifdef DEBUG
+  Vector<double> pb(chartdim);
+  pull_back_function->vector_value(result, pb);
+  for (unsigned int i=0; i<chartdim; ++i)
+    Assert((chart_point.norm() > tolerance &&
+            (std::abs(pb[i]-chart_point[i]) < tolerance*chart_point.norm())) ||
+           (std::abs(pb[i]-chart_point[i]) < tolerance),
+           ExcMessage("The push forward is not the inverse of the pull back! Bailing out."));
+#endif
+
+  return result;
+}
+
+
+template <int dim, int spacedim, int chartdim>
+Point<chartdim>
+FunctionManifold<dim,spacedim,chartdim>::pull_back(const Point<spacedim> &space_point) const
+{
+  Vector<double> pb(chartdim);
+  Point<chartdim> result;
+  pull_back_function->vector_value(space_point, pb);
+  for (unsigned int i=0; i<chartdim; ++i)
+    result[i] = pb[i];
+  return result;
+}
+
+// explicit instantiations
+#include "manifold_lib.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/manifold_lib.inst.in b/source/grid/manifold_lib.inst.in
new file mode 100644
index 0000000..eaaf53d
--- /dev/null
+++ b/source/grid/manifold_lib.inst.in
@@ -0,0 +1,30 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class SphericalManifold<deal_II_dimension, deal_II_space_dimension>;
+#endif
+#if deal_II_dimension <= deal_II_space_dimension
+    template class CylindricalManifold<deal_II_dimension, deal_II_space_dimension>;
+    template class FunctionManifold<deal_II_dimension, deal_II_space_dimension, 1>;			  
+    template class FunctionManifold<deal_II_dimension, deal_II_space_dimension, 2>;
+    template class FunctionManifold<deal_II_dimension, deal_II_space_dimension, 3>;
+#endif
+  }
+
+
diff --git a/source/grid/persistent_tria.cc b/source/grid/persistent_tria.cc
new file mode 100644
index 0000000..b9c7098
--- /dev/null
+++ b/source/grid/persistent_tria.cc
@@ -0,0 +1,255 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/grid/persistent_tria.h>
+#include <deal.II/grid/magic_numbers.h>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, int spacedim>
+const unsigned int
+PersistentTriangulation<dim,spacedim>::dimension;
+
+template <int dim, int spacedim>
+const unsigned int
+PersistentTriangulation<dim,spacedim>::spacedimension;
+
+
+template <int dim, int spacedim>
+PersistentTriangulation<dim,spacedim>::
+PersistentTriangulation (const Triangulation<dim,spacedim> &coarse_grid)
+  :
+  coarse_grid (&coarse_grid, typeid(*this).name())
+{}
+
+
+
+template <int dim, int spacedim>
+PersistentTriangulation<dim,spacedim>::
+PersistentTriangulation (const PersistentTriangulation<dim,spacedim> &old_tria)
+  :
+  // default initialize
+  // tria, i.e. it will be
+  // empty on first use
+  Triangulation<dim,spacedim> (),
+  coarse_grid (old_tria.coarse_grid),
+  refine_flags (old_tria.refine_flags),
+  coarsen_flags (old_tria.coarsen_flags)
+{
+  Assert (old_tria.n_levels() == 0, ExcTriaNotEmpty ());
+}
+
+
+
+template <int dim, int spacedim>
+PersistentTriangulation<dim,spacedim>::~PersistentTriangulation ()
+{}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::execute_coarsening_and_refinement ()
+{
+  // first save flags
+  refine_flags.push_back (std::vector<bool>());
+  coarsen_flags.push_back (std::vector<bool>());
+  this->save_refine_flags (refine_flags.back());
+  this->save_coarsen_flags (coarsen_flags.back());
+
+  // then refine triangulation. if
+  // this function throws an
+  // exception, that's fine since it
+  // is the last call here
+  Triangulation<dim,spacedim>::execute_coarsening_and_refinement ();
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::restore ()
+{
+  // for each of the previous
+  // refinement sweeps
+  for (unsigned int i=0; i<refine_flags.size()+1; ++i)
+    restore(i);
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::restore (const unsigned int step)
+{
+
+  if (step==0)
+    // copy the old triangulation.
+    // this will yield an error if
+    // the underlying triangulation
+    // was not empty
+    Triangulation<dim,spacedim>::copy_triangulation (*coarse_grid);
+  else
+    // for each of the previous
+    // refinement sweeps
+    {
+      Assert(step<refine_flags.size()+1,
+             ExcDimensionMismatch(step, refine_flags.size()+1));
+
+      this->load_refine_flags  (refine_flags[step-1]);
+      this->load_coarsen_flags (coarsen_flags[step-1]);
+
+      Triangulation<dim,spacedim>::execute_coarsening_and_refinement ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+PersistentTriangulation<dim,spacedim>::n_refinement_steps() const
+{
+  return refine_flags.size();
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::copy_triangulation (const Triangulation<dim,spacedim> &old_grid)
+{
+  this->clear ();
+  coarse_grid  = &old_grid;
+  refine_flags.clear ();
+  coarsen_flags.clear ();
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::create_triangulation (const std::vector<Point<spacedim> > &,
+    const std::vector<CellData<dim> > &,
+    const SubCellData &)
+{
+  Assert (false, ExcImpossibleInDim(dim));
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::create_triangulation_compatibility (
+  const std::vector<Point<spacedim> > &,
+  const std::vector<CellData<dim> > &,
+  const SubCellData &)
+{
+  Assert (false, ExcImpossibleInDim(dim));
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::write_flags(std::ostream &out) const
+{
+  const unsigned int n_flag_levels=refine_flags.size();
+
+  AssertThrow (out, ExcIO());
+
+  out << mn_persistent_tria_flags_begin << ' ' << n_flag_levels << std::endl;
+
+  for (unsigned int i=0; i<n_flag_levels; ++i)
+    {
+      this->write_bool_vector (mn_tria_refine_flags_begin, refine_flags[i],
+                               mn_tria_refine_flags_end, out);
+      this->write_bool_vector (mn_tria_coarsen_flags_begin, coarsen_flags[i],
+                               mn_tria_coarsen_flags_end, out);
+    }
+
+  out << mn_persistent_tria_flags_end << std::endl;
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::read_flags(std::istream &in)
+{
+  Assert(refine_flags.size()==0 && coarsen_flags.size()==0,
+         ExcFlagsNotCleared());
+  AssertThrow (in, ExcIO());
+
+  unsigned int magic_number;
+  in >> magic_number;
+  AssertThrow(magic_number==mn_persistent_tria_flags_begin,
+              typename Triangulation<dim>::ExcGridReadError());
+
+  unsigned int n_flag_levels;
+  in >> n_flag_levels;
+  for (unsigned int i=0; i<n_flag_levels; ++i)
+    {
+      refine_flags.push_back (std::vector<bool>());
+      coarsen_flags.push_back (std::vector<bool>());
+      this->read_bool_vector (mn_tria_refine_flags_begin, refine_flags.back(),
+                              mn_tria_refine_flags_end, in);
+      this->read_bool_vector (mn_tria_coarsen_flags_begin, coarsen_flags.back(),
+                              mn_tria_coarsen_flags_end, in);
+    }
+
+  in >> magic_number;
+  AssertThrow(magic_number==mn_persistent_tria_flags_end,
+              typename Triangulation<dim>::ExcGridReadError());
+
+  AssertThrow (in, ExcIO());
+}
+
+
+
+template <int dim, int spacedim>
+void
+PersistentTriangulation<dim,spacedim>::clear_flags()
+{
+  refine_flags.clear();
+  coarsen_flags.clear();
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+PersistentTriangulation<dim,spacedim>::memory_consumption () const
+{
+  return (Triangulation<dim,spacedim>::memory_consumption () +
+          MemoryConsumption::memory_consumption (coarse_grid) +
+          MemoryConsumption::memory_consumption (refine_flags) +
+          MemoryConsumption::memory_consumption (coarsen_flags));
+}
+
+
+// explicit instantiations
+template class PersistentTriangulation<1>;
+template class PersistentTriangulation<2>;
+template class PersistentTriangulation<3>;
+template class PersistentTriangulation<1,2>;
+template class PersistentTriangulation<2,3>;
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/tria.cc b/source/grid/tria.cc
new file mode 100644
index 0000000..3147a4d
--- /dev/null
+++ b/source/grid/tria.cc
@@ -0,0 +1,13297 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/table.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/std_cxx11/bind.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_levels.h>
+#include <deal.II/grid/tria_faces.h>
+#include <deal.II/grid/manifold.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/magic_numbers.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/full_matrix.h>
+
+#include <algorithm>
+#include <numeric>
+#include <map>
+#include <list>
+#include <cmath>
+#include <functional>
+
+#include <deal.II/base/std_cxx11/array.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+bool
+SubCellData::check_consistency (const unsigned int dim) const
+{
+  switch (dim)
+    {
+    case 1:
+      return ((boundary_lines.size() == 0) &&
+              (boundary_quads.size() == 0));
+    case 2:
+      return (boundary_quads.size() == 0);
+    };
+  return true;
+}
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+
+    NumberCache<1>::NumberCache ()
+      :
+      n_levels (0),
+      n_lines (0),
+      n_active_lines (0)
+      // all other fields are
+      // default constructed
+    {}
+
+
+
+    std::size_t
+    NumberCache<1>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (n_levels) +
+              MemoryConsumption::memory_consumption (n_lines) +
+              MemoryConsumption::memory_consumption (n_lines_level) +
+              MemoryConsumption::memory_consumption (n_active_lines) +
+              MemoryConsumption::memory_consumption (n_active_lines_level));
+    }
+
+
+    NumberCache<2>::NumberCache () :
+      n_quads (0),
+      n_active_quads (0)
+      // all other fields are
+      // default constructed
+    {}
+
+
+
+    std::size_t
+    NumberCache<2>::memory_consumption () const
+    {
+      return (NumberCache<1>::memory_consumption () +
+              MemoryConsumption::memory_consumption (n_quads) +
+              MemoryConsumption::memory_consumption (n_quads_level) +
+              MemoryConsumption::memory_consumption (n_active_quads) +
+              MemoryConsumption::memory_consumption (n_active_quads_level));
+    }
+
+
+
+    NumberCache<3>::NumberCache () :
+      n_hexes (0),
+      n_active_hexes (0)
+      // all other fields are
+      // default constructed
+    {}
+
+
+
+    std::size_t
+    NumberCache<3>::memory_consumption () const
+    {
+      return (NumberCache<2>::memory_consumption () +
+              MemoryConsumption::memory_consumption (n_hexes) +
+              MemoryConsumption::memory_consumption (n_hexes_level) +
+              MemoryConsumption::memory_consumption (n_active_hexes) +
+              MemoryConsumption::memory_consumption (n_active_hexes_level));
+    }
+  }
+}
+
+// anonymous namespace for internal helper functions
+namespace
+{
+  // return whether the given cell is
+  // patch_level_1, i.e. determine
+  // whether either all or none of
+  // its children are further
+  // refined. this function can only
+  // be called for non-active cells.
+  template <int dim, int spacedim>
+  bool cell_is_patch_level_1 (const TriaIterator<dealii::CellAccessor<dim, spacedim> > &cell)
+  {
+    Assert (cell->active() == false, ExcInternalError());
+
+    unsigned int n_active_children = 0;
+    for (unsigned int i=0; i<cell->n_children(); ++i)
+      if (cell->child(i)->active())
+        ++n_active_children;
+
+    return (n_active_children == 0) || (n_active_children == cell->n_children());
+  }
+
+
+
+  // return, whether a given @p cell will be
+  // coarsened, which is the case if all
+  // children are active and have their coarsen
+  // flag set. In case only part of the coarsen
+  // flags are set, remove them.
+  template <int dim, int spacedim>
+  bool cell_will_be_coarsened (const TriaIterator<dealii::CellAccessor<dim,spacedim> > &cell)
+  {
+    // only cells with children should be
+    // considered for coarsening
+
+    if (cell->has_children())
+      {
+        unsigned int children_to_coarsen=0;
+        const unsigned int n_children=cell->n_children();
+
+        for (unsigned int c=0; c<n_children; ++c)
+          if (cell->child(c)->active() &&
+              cell->child(c)->coarsen_flag_set())
+            ++children_to_coarsen;
+        if (children_to_coarsen==n_children)
+          return true;
+        else
+          for (unsigned int c=0; c<n_children; ++c)
+            if (cell->child(c)->active())
+              cell->child(c)->clear_coarsen_flag();
+      }
+    // no children, so no coarsening
+    // possible. however, no children also
+    // means that this cell will be in the same
+    // state as if it had children and was
+    // coarsened. So, what should we return -
+    // false or true?
+    // make sure we do not have to do this at
+    // all...
+    Assert(cell->has_children(), ExcInternalError());
+    // ... and then simply return false
+    return false;
+  }
+
+
+  // return, whether the face @p face_no of the
+  // given @p cell will be refined after the
+  // current refinement step, considering
+  // refine and coarsen flags and considering
+  // only those refinemnts that will be caused
+  // by the neighboring cell.
+
+  // this function is used on both active cells
+  // and cells with children. on cells with
+  // children it also of interest to know 'how'
+  // the face will be refined. thus there is an
+  // additional third argument @p
+  // expected_face_ref_case returning just
+  // that. be aware, that this vriable will
+  // only contain useful information if this
+  // function is called for an active cell.
+  //
+  // thus, this is an internal function, users
+  // should call one of the two alternatives
+  // following below.
+  template <int dim, int spacedim>
+  bool
+  face_will_be_refined_by_neighbor_internal(const TriaIterator<dealii::CellAccessor<dim,spacedim> > &cell,
+                                            const unsigned int                                   face_no,
+                                            RefinementCase<dim-1>                                    &expected_face_ref_case)
+  {
+    // first of all: set the default value for
+    // expected_face_ref_case, which is no
+    // refinement at all
+    expected_face_ref_case=RefinementCase<dim-1>::no_refinement;
+
+    const typename Triangulation<dim,spacedim>::cell_iterator neighbor=cell->neighbor(face_no);
+
+    // If we are at the boundary, there is no
+    // neighbor which could refine the face
+    if (neighbor.state()!=IteratorState::valid)
+      return false;
+
+    if (neighbor->has_children())
+      {
+        // if the neighbor is refined, it may be
+        // coarsened. if so, then it won't refine
+        // the face, no matter what else happens
+        if (cell_will_be_coarsened(neighbor))
+          return false;
+        else
+          // if the neighor is refined, then he
+          // is also refined at our current
+          // face. He will stay so without
+          // coarsening, so return true in that
+          // case.
+          {
+            expected_face_ref_case=cell->face(face_no)->refinement_case();
+            return true;
+          }
+      }
+
+    // now, the neighbor is not refined, but
+    // perhaps he will be
+    const RefinementCase<dim> nb_ref_flag=neighbor->refine_flag_set();
+    if (nb_ref_flag != RefinementCase<dim>::no_refinement)
+      {
+        // now we need to know, which of the
+        // neighbors faces points towards us
+        const unsigned int neighbor_neighbor=cell->neighbor_face_no(face_no);
+        // check, whether the cell will be
+        // refined in a way that refines our
+        // face
+        const RefinementCase<dim-1> face_ref_case=
+          GeometryInfo<dim>::face_refinement_case(nb_ref_flag,
+                                                  neighbor_neighbor,
+                                                  neighbor->face_orientation(neighbor_neighbor),
+                                                  neighbor->face_flip(neighbor_neighbor),
+                                                  neighbor->face_rotation(neighbor_neighbor));
+        if (face_ref_case != RefinementCase<dim-1>::no_refinement)
+          {
+            const typename Triangulation<dim,spacedim>::face_iterator neighbor_face=neighbor->face(neighbor_neighbor);
+            const int this_face_index=cell->face_index(face_no);
+
+            // there are still two basic
+            // possibilities here: the neighbor
+            // might be coarser or as coarse
+            // as we are
+            if (neighbor_face->index()==this_face_index)
+              // the neighbor is as coarse as
+              // we are and will be refined at
+              // the face of consideration, so
+              // return true
+              {
+                expected_face_ref_case = face_ref_case;
+                return true;
+              }
+            else
+              {
+
+                // the neighbor is coarser.
+                // this is the most complicated
+                // case. It might be, that the
+                // neighbor's face will be
+                // refined, but that we will
+                // not see this, as we are
+                // refined in a similar way.
+
+                // so, the neighbor's face must
+                // have children. check, if our
+                // cell's face is one of these
+                // (it could also be a
+                // grand_child)
+                for (unsigned int c=0; c<neighbor_face->n_children(); ++c)
+                  if (neighbor_face->child_index(c)==this_face_index)
+                    {
+                      // if the flagged refine
+                      // case of the face is a
+                      // subset or the same as
+                      // the current refine case,
+                      // then the face, as seen
+                      // from our cell, won't be
+                      // refined by the neighbor
+                      if ((neighbor_face->refinement_case() | face_ref_case)
+                          == neighbor_face->refinement_case())
+                        return false;
+                      else
+                        {
+                          // if we are active, we
+                          // must be an
+                          // anisotropic child
+                          // and the coming
+                          // face_ref_case is
+                          // isotropic. Thus,
+                          // from our cell we
+                          // will see exactly the
+                          // opposite refine case
+                          // that the face has
+                          // now...
+                          Assert(face_ref_case==RefinementCase<dim-1>::isotropic_refinement, ExcInternalError());
+                          expected_face_ref_case = ~neighbor_face->refinement_case();
+                          return true;
+                        }
+                    }
+
+                // so, obviously we were not
+                // one of the children, but a
+                // grandchild. This is only
+                // possible in 3d.
+                Assert(dim==3, ExcInternalError());
+                // In that case, however, no
+                // matter what the neighbor
+                // does, he won't be finer
+                // after the next refinement
+                // step.
+                return false;
+              }
+          }// if face will be refined
+      }// if neighbor is flagged for refinement
+
+    // no cases left, so the neighbor will not
+    // refine the face
+    return false;
+  }
+
+  // version of above function for both active
+  // and non-active cells
+  template <int dim, int spacedim>
+  bool
+  face_will_be_refined_by_neighbor(const TriaIterator<dealii::CellAccessor<dim, spacedim> > &cell,
+                                   const unsigned int                                   face_no)
+  {
+    RefinementCase<dim-1> dummy = RefinementCase<dim-1>::no_refinement;
+    return face_will_be_refined_by_neighbor_internal(cell, face_no, dummy);
+  }
+
+  // version of above function for active cells
+  // only. Additionally returning the refine
+  // case (to come) of the face under
+  // consideration
+  template <int dim, int spacedim>
+  bool
+  face_will_be_refined_by_neighbor(const TriaActiveIterator<dealii::CellAccessor<dim,spacedim> > &cell,
+                                   const unsigned int                                         face_no,
+                                   RefinementCase<dim-1>                                          &expected_face_ref_case)
+  {
+    return face_will_be_refined_by_neighbor_internal(cell, face_no,
+                                                     expected_face_ref_case);
+  }
+
+
+
+  template <int dim, int spacedim>
+  bool
+  satisfies_level1_at_vertex_rule (const Triangulation<dim,spacedim> &triangulation)
+  {
+    std::vector<unsigned int> min_adjacent_cell_level (triangulation.n_vertices(),
+                                                       triangulation.n_levels());
+    std::vector<unsigned int> max_adjacent_cell_level (triangulation.n_vertices(),
+                                                       0);
+
+    for (typename Triangulation<dim,spacedim>::active_cell_iterator
+         cell = triangulation.begin_active();
+         cell != triangulation.end(); ++cell)
+      for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+        {
+          min_adjacent_cell_level[cell->vertex_index(v)]
+            = std::min<unsigned int>
+              (min_adjacent_cell_level[cell->vertex_index(v)],
+               cell->level());
+          max_adjacent_cell_level[cell->vertex_index(v)]
+            = std::max<unsigned int> (min_adjacent_cell_level[cell->vertex_index(v)],
+                                      cell->level());
+        }
+
+    for (unsigned int k=0; k<triangulation.n_vertices(); ++k)
+      if (triangulation.vertex_used(k))
+        if (max_adjacent_cell_level[k] -
+            min_adjacent_cell_level[k] > 1)
+          return false;
+    return true;
+  }
+
+
+
+  /**
+   * Fill the vector @p line_cell_count
+   * needed by @p delete_children with the
+   * number of cells bounded by a given
+   * line.
+   */
+  template <int dim, int spacedim>
+  std::vector<unsigned int>
+  count_cells_bounded_by_line (const Triangulation<dim,spacedim> &triangulation)
+  {
+    if (dim >= 2)
+      {
+        std::vector<unsigned int> line_cell_count(triangulation.n_raw_lines(),0);
+        typename Triangulation<dim,spacedim>::cell_iterator
+        cell=triangulation.begin(),
+        endc=triangulation.end();
+        for (; cell!=endc; ++cell)
+          for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+            ++line_cell_count[cell->line_index(l)];
+        return line_cell_count;
+      }
+    else
+      return std::vector<unsigned int>();
+  }
+
+
+
+
+  /**
+   * Fill the vector @p quad_cell_count
+   * needed by @p delete_children with the
+   * number of cells bounded by a given
+   * quad.
+   */
+  template <int dim, int spacedim>
+  std::vector<unsigned int>
+  count_cells_bounded_by_quad (const Triangulation<dim,spacedim> &triangulation)
+  {
+    if (dim >= 3)
+      {
+        std::vector<unsigned int> quad_cell_count (triangulation.n_raw_quads(),0);
+        typename Triangulation<dim,spacedim>::cell_iterator
+        cell=triangulation.begin(),
+        endc=triangulation.end();
+        for (; cell!=endc; ++cell)
+          for (unsigned int q=0; q<GeometryInfo<dim>::faces_per_cell; ++q)
+            ++quad_cell_count[cell->quad_index(q)];
+        return quad_cell_count;
+      }
+    else
+      return std::vector<unsigned int>();
+  }
+
+
+
+  /**
+   * A set of three functions that
+   * reorder the data given to
+   * create_triangulation_compatibility
+   * from the "classic" to the
+   * "current" format of vertex
+   * numbering of cells and
+   * faces. These functions do the
+   * reordering of their arguments
+   * in-place.
+   */
+  void
+  reorder_compatibility (const std::vector<CellData<1> > &,
+                         const SubCellData &)
+  {
+    // nothing to do here: the format
+    // hasn't changed for 1d
+  }
+
+
+  void
+  reorder_compatibility (std::vector<CellData<2> > &cells,
+                         const SubCellData &)
+  {
+    for (unsigned int cell=0; cell<cells.size(); ++cell)
+      std::swap(cells[cell].vertices[2],cells[cell].vertices[3]);
+  }
+
+
+  void
+  reorder_compatibility (std::vector<CellData<3> > &cells,
+                         SubCellData               &subcelldata)
+  {
+    unsigned int tmp[GeometryInfo<3>::vertices_per_cell];
+    for (unsigned int cell=0; cell<cells.size(); ++cell)
+      {
+        for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+          tmp[i] = cells[cell].vertices[i];
+        for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+          cells[cell].vertices[GeometryInfo<3>::ucd_to_deal[i]] = tmp[i];
+      }
+
+    // now points in boundary quads
+    std::vector<CellData<2> >::iterator boundary_quad
+      = subcelldata.boundary_quads.begin();
+    std::vector<CellData<2> >::iterator end_quad
+      = subcelldata.boundary_quads.end();
+    for (unsigned int quad_no=0; boundary_quad!=end_quad; ++boundary_quad, ++quad_no)
+      std::swap(boundary_quad->vertices[2], boundary_quad->vertices[3]);
+  }
+
+
+
+  /**
+   * Return the index of the vertex
+   * in the middle of this object,
+   * if it exists. In order to
+   * exist, the object needs to be
+   * refined - for 2D and 3D it
+   * needs to be refined
+   * isotropically or else the
+   * anisotropic children have to
+   * be refined again. If the
+   * middle vertex does not exist,
+   * return
+   * <tt>numbers::invalid_unsigned_int</tt>.
+   *
+   * This function should not really be
+   * used in application programs.
+   */
+  template <int dim, int spacedim>
+  unsigned int
+  middle_vertex_index(const typename Triangulation<dim,spacedim>::line_iterator &line)
+  {
+    if (line->has_children())
+      return line->child(0)->vertex_index(1);
+    return numbers::invalid_unsigned_int;
+  }
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  middle_vertex_index(const typename Triangulation<dim,spacedim>::quad_iterator &quad)
+  {
+    switch (static_cast<unsigned char> (quad->refinement_case()))
+      {
+      case RefinementCase<2>::cut_x:
+        return middle_vertex_index<dim,spacedim>(quad->child(0)->line(1));
+        break;
+      case RefinementCase<2>::cut_y:
+        return middle_vertex_index<dim,spacedim>(quad->child(0)->line(3));
+        break;
+      case RefinementCase<2>::cut_xy:
+        return quad->child(0)->vertex_index(3);
+        break;
+      default:
+        break;
+      }
+    return numbers::invalid_unsigned_int;
+  }
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  middle_vertex_index(const typename Triangulation<dim,spacedim>::hex_iterator &hex)
+  {
+    switch (static_cast<unsigned char> (hex->refinement_case()))
+      {
+      case RefinementCase<3>::cut_x:
+        return middle_vertex_index<dim,spacedim>(hex->child(0)->quad(1));
+        break;
+      case RefinementCase<3>::cut_y:
+        return middle_vertex_index<dim,spacedim>(hex->child(0)->quad(3));
+        break;
+      case RefinementCase<3>::cut_z:
+        return middle_vertex_index<dim,spacedim>(hex->child(0)->quad(5));
+        break;
+      case RefinementCase<3>::cut_xy:
+        return middle_vertex_index<dim,spacedim>(hex->child(0)->line(11));
+        break;
+      case RefinementCase<3>::cut_xz:
+        return middle_vertex_index<dim,spacedim>(hex->child(0)->line(5));
+        break;
+      case RefinementCase<3>::cut_yz:
+        return middle_vertex_index<dim,spacedim>(hex->child(0)->line(7));
+        break;
+      case RefinementCase<3>::cut_xyz:
+        return hex->child(0)->vertex_index(7);
+        break;
+      default:
+        break;
+      }
+    return numbers::invalid_unsigned_int;
+  }
+
+
+  /**
+   * Collect all coarse mesh cells
+   * with at least one vertex at
+   * which the determinant of the
+   * Jacobian is zero or
+   * negative. This is the function
+   * for the case dim!=spacedim,
+   * where we can not determine
+   * whether a cell is twisted as it
+   * may, for example, discretize a
+   * manifold with a twist.
+   */
+  template <class TRIANGULATION>
+  inline
+  typename TRIANGULATION::DistortedCellList
+  collect_distorted_coarse_cells (const TRIANGULATION &)
+  {
+    return typename TRIANGULATION::DistortedCellList();
+  }
+
+
+
+  /**
+   * Collect all coarse mesh cells
+   * with at least one vertex at
+   * which the determinant of the
+   * Jacobian is zero or
+   * negative. This is the function
+   * for the case dim==spacedim.
+   */
+  template <int dim>
+  inline
+  typename Triangulation<dim,dim>::DistortedCellList
+  collect_distorted_coarse_cells (const Triangulation<dim,dim> &triangulation)
+  {
+    typename Triangulation<dim,dim>::DistortedCellList distorted_cells;
+    for (typename Triangulation<dim,dim>::cell_iterator
+         cell = triangulation.begin(0); cell != triangulation.end(0); ++cell)
+      {
+        Point<dim> vertices[GeometryInfo<dim>::vertices_per_cell];
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          vertices[i] = cell->vertex(i);
+
+        Tensor<0,dim> determinants[GeometryInfo<dim>::vertices_per_cell];
+        GeometryInfo<dim>::alternating_form_at_vertices (vertices,
+                                                         determinants);
+
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          if (determinants[i] <= 1e-9 * std::pow (cell->diameter(),
+                                                  1.*dim))
+            {
+              distorted_cells.distorted_cells.push_back (cell);
+              break;
+            }
+      }
+
+    return distorted_cells;
+  }
+
+
+  /**
+   * Return whether any of the
+   * children of the given cell is
+   * distorted or not. This is the
+   * function for dim==spacedim.
+   */
+  template <int dim>
+  bool
+  has_distorted_children (const typename Triangulation<dim,dim>::cell_iterator &cell,
+                          internal::int2type<dim>,
+                          internal::int2type<dim>)
+  {
+    Assert (cell->has_children(), ExcInternalError());
+
+    for (unsigned int c=0; c<cell->n_children(); ++c)
+      {
+        Point<dim> vertices[GeometryInfo<dim>::vertices_per_cell];
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          vertices[i] = cell->child(c)->vertex(i);
+
+        Tensor<0,dim> determinants[GeometryInfo<dim>::vertices_per_cell];
+        GeometryInfo<dim>::alternating_form_at_vertices (vertices,
+                                                         determinants);
+
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          if (determinants[i] <= 1e-9 * std::pow (cell->child(c)->diameter(),
+                                                  1.*dim))
+            return true;
+      }
+
+    return false;
+  }
+
+
+  /**
+   * Function for dim!=spacedim. As
+   * for
+   * collect_distorted_coarse_cells,
+   * there is nothing that we can do
+   * in this case.
+   */
+  template <int dim, int spacedim>
+  bool
+  has_distorted_children (const typename Triangulation<dim,spacedim>::cell_iterator &,
+                          internal::int2type<dim>,
+                          internal::int2type<spacedim>)
+  {
+    return false;
+  }
+
+
+
+  /**
+   * For a given triangulation: set up the
+   * neighbor information on all cells.
+   */
+  template <int spacedim>
+  void
+  update_neighbors (Triangulation<1,spacedim> &)
+  {
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  update_neighbors (Triangulation<dim,spacedim> &triangulation)
+  {
+    // each face can be neighbored on two sides
+    // by cells. according to the face's
+    // intrinsic normal we define the left
+    // neighbor as the one for which the face
+    // normal points outward, and store that
+    // one first; the second one is then
+    // the right neighbor for which the
+    // face normal points inward. This
+    // information depends on the type of cell
+    // and local number of face for the
+    // 'standard ordering and orientation' of
+    // faces and then on the face_orientation
+    // information for the real mesh. Set up a
+    // table to have fast access to those
+    // offsets (0 for left and 1 for
+    // right). Some of the values are invalid
+    // as they reference too large face
+    // numbers, but we just leave them at a
+    // zero value.
+    //
+    // Note, that in 2d for lines as faces the
+    // normal direction given in the
+    // GeometryInfo class is not consistent. We
+    // thus define here that the normal for a
+    // line points to the right if the line
+    // points upwards.
+    //
+    // There is one more point to
+    // consider, however: if we have
+    // dim<spacedim, then we may have
+    // cases where cells are
+    // inverted. In effect, both
+    // cells think they are the left
+    // neighbor of an edge, for
+    // example, which leads us to
+    // forget neighborship
+    // information (a case that shows
+    // this is
+    // codim_one/hanging_nodes_02). We
+    // store whether a cell is
+    // inverted using the
+    // direction_flag, so if a cell
+    // has a false direction_flag,
+    // then we need to invert our
+    // selection whether we are a
+    // left or right neighbor in all
+    // following computations.
+    //
+    // first index:  dimension (minus 2)
+    // second index: local face index
+    // third index:  face_orientation (false and true)
+    static const unsigned int left_right_offset[2][6][2] =
+    {
+      // quadrilateral
+      { {0,1}, // face 0, face_orientation = false and true
+        {1,0}, // face 1, face_orientation = false and true
+        {1,0}, // face 2, face_orientation = false and true
+        {0,1}, // face 3, face_orientation = false and true
+        {0,0}, // face 4, invalid face
+        {0,0}
+      },// face 5, invalid face
+      // hexahedron
+      { {0,1},
+        {1,0},
+        {0,1},
+        {1,0},
+        {0,1},
+        {1,0}
+      }
+    };
+
+    // now create a vector of the two active
+    // neighbors (left and right) for each face
+    // and fill it by looping over all cells. For
+    // cases with anisotropic refinement and more
+    // then one cell neighboring at a given side
+    // of the face we will automatically get the
+    // active one on the highest level as we loop
+    // over cells from lower levels first.
+    const typename Triangulation<dim,spacedim>::cell_iterator dummy;
+    std::vector<typename Triangulation<dim,spacedim>::cell_iterator>
+    adjacent_cells(2*triangulation.n_raw_faces(), dummy);
+
+    typename Triangulation<dim,spacedim>::cell_iterator
+    cell = triangulation.begin(),
+    endc = triangulation.end();
+    for (; cell != endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        {
+          const typename Triangulation<dim,spacedim>::face_iterator
+          face=cell->face(f);
+
+          const unsigned int
+          offset = (cell->direction_flag()
+                    ?
+                    left_right_offset[dim-2][f][cell->face_orientation(f)]
+                    :
+                    1-left_right_offset[dim-2][f][cell->face_orientation(f)]);
+
+          adjacent_cells[2*face->index() + offset] = cell;
+
+          // if this cell is not refined, but the
+          // face is, then we'll have to set our
+          // cell as neighbor for the child faces
+          // as well. Fortunately the normal
+          // orientation of children will be just
+          // the same.
+          if (dim==2)
+            {
+              if (cell->active() && face->has_children())
+                {
+                  adjacent_cells[2*face->child(0)->index() + offset] = cell;
+                  adjacent_cells[2*face->child(1)->index() + offset] = cell;
+                }
+            }
+          else // -> dim == 3
+            {
+              // We need the same as in 2d
+              // here. Furthermore, if the face is
+              // refined with cut_x or cut_y then
+              // those children again in the other
+              // direction, and if this cell is
+              // refined isotropically (along the
+              // face) then the neighbor will
+              // (probably) be refined as cut_x or
+              // cut_y along the face. For those
+              // neighboring children cells, their
+              // neighbor will be the current,
+              // inactive cell, as our children are
+              // too fine to be neighbors. Catch that
+              // case by also acting on inactive
+              // cells with isotropic refinement
+              // along the face. If the situation
+              // described is not present, the data
+              // will be overwritten later on when we
+              // visit cells on finer levels, so no
+              // harm will be done.
+              if (face->has_children() &&
+                  (cell->active() ||
+                   GeometryInfo<dim>::face_refinement_case(cell->refinement_case(),f) == RefinementCase<dim-1>::isotropic_refinement))
+                {
+
+                  for (unsigned int c=0; c<face->n_children(); ++c)
+                    adjacent_cells[2*face->child(c)->index() + offset] = cell;
+                  if (face->child(0)->has_children())
+                    {
+                      adjacent_cells[2*face->child(0)->child(0)->index() + offset] = cell;
+                      adjacent_cells[2*face->child(0)->child(1)->index() + offset] = cell;
+                    }
+                  if (face->child(1)->has_children())
+                    {
+                      adjacent_cells[2*face->child(1)->child(0)->index() + offset] = cell;
+                      adjacent_cells[2*face->child(1)->child(1)->index() + offset] = cell;
+                    }
+                } // if cell active and face refined
+            } // else -> dim==3
+        } // for all faces of all cells
+
+    // now loop again over all cells and set the
+    // corresponding neighbor cell. Note, that we
+    // have to use the opposite of the
+    // left_right_offset in this case as we want
+    // the offset of the neighbor, not our own.
+    for (cell=triangulation.begin(); cell != endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        {
+          const unsigned int
+          offset = (cell->direction_flag()
+                    ?
+                    left_right_offset[dim-2][f][cell->face_orientation(f)]
+                    :
+                    1-left_right_offset[dim-2][f][cell->face_orientation(f)]);
+          cell->set_neighbor(f,
+                             adjacent_cells[2*cell->face(f)->index() + 1 - offset]);
+        }
+  }
+
+}// end of anonymous namespace
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    // make sure that if in the following we
+    // write Triangulation<dim,spacedim>
+    // we mean the *class*
+    // dealii::Triangulation, not the
+    // enclosing namespace
+    // internal::Triangulation
+    using dealii::Triangulation;
+
+    /**
+     * Exception
+     * @ingroup Exceptions
+     */
+    DeclException1 (ExcGridHasInvalidCell,
+                    int,
+                    << "Something went wrong when making cell " << arg1
+                    << ". Read the docs and the source code "
+                    << "for more information.");
+    /**
+     * Exception
+     * @ingroup Exceptions
+     */
+    DeclException1 (ExcInternalErrorOnCell,
+                    int,
+                    << "Something went wrong upon construction of cell "
+                    << arg1);
+    /**
+     * A cell was entered which has
+     * negative measure. In most
+     * cases, this is due to a wrong
+     * order of the vertices of the
+     * cell.
+     *
+     * @ingroup Exceptions
+     */
+    DeclException1 (ExcCellHasNegativeMeasure,
+                    int,
+                    << "Cell " << arg1 << " has negative measure. This typically "
+                    << "indicates some distortion in the cell, or a mistakenly "
+                    << "swapped pair of vertices in the input to "
+                    << "Triangulation::create_triangulation().");
+    /**
+     * A cell is created with a
+     * vertex number exceeding the
+     * vertex array.
+     *
+     * @ingroup Exceptions
+     */
+    DeclException3 (ExcInvalidVertexIndex,
+                    int, int, int,
+                    << "Error while creating cell " << arg1
+                    << ": the vertex index " << arg2 << " must be between 0 and "
+                    << arg3 << ".");
+    /**
+     * Exception
+     * @ingroup Exceptions
+     */
+    DeclException2 (ExcLineInexistant,
+                    int, int,
+                    << "While trying to assign a boundary indicator to a line: "
+                    << "the line with end vertices " << arg1 << " and "
+                    << arg2 << " does not exist.");
+    /**
+     * Exception
+     * @ingroup Exceptions
+     */
+    DeclException4 (ExcQuadInexistant,
+                    int, int, int, int,
+                    << "While trying to assign a boundary indicator to a quad: "
+                    << "the quad with bounding lines " << arg1 << ", " << arg2
+                    << ", " << arg3 << ", " << arg4 << " does not exist.");
+    /**
+     * Exception
+     * @ingroup Exceptions
+     */
+    DeclException3 (ExcInteriorLineCantBeBoundary,
+                    int, int,
+                    types::boundary_id,
+                    << "The input data for creating a triangulation contained "
+                    << "information about a line with indices "
+                    << arg1 << " and " << arg2
+                    << " that is described to have boundary indicator "
+                    << (int)arg3
+                    << ". However, this is an internal line not located on the "
+                    << "boundary. You cannot assign a boundary indicator to it."
+                    << std::endl
+                    << std::endl
+                    << "If this happened at a place where you call "
+                    << "Triangulation::create_triangulation() yourself, you need "
+                    << "to check the SubCellData object you pass to this function."
+                    << std::endl
+                    << std::endl
+                    << "If this happened in a place where you are reading a mesh "
+                    << "from a file, then you need to investigate why such a line "
+                    << "ended up in the input file. A typical case is a geometry "
+                    << "that consisted of multiple parts and for which the mesh "
+                    << "generator program assumes that the interface between "
+                    << "two parts is a boundary when that isn't supposed to be "
+                    << "the case, or where the mesh generator simply assigns "
+                    << "'geometry indicators' to lines at the perimeter of "
+                    << "a part that are not supposed to be interpreted as "
+                    << "'boundary indicators'.");
+    /**
+     * Exception
+     * @ingroup Exceptions
+     */
+    DeclException5 (ExcInteriorQuadCantBeBoundary,
+                    int, int, int, int,
+                    types::boundary_id,
+                    << "The input data for creating a triangulation contained "
+                    << "information about a quad with indices "
+                    << arg1 << ", " << arg2 << ", " << arg3 << ", and " << arg4
+                    << " that is described to have boundary indicator "
+                    << (int)arg5
+                    << ". However, this is an internal quad not located on the "
+                    << "boundary. You cannot assign a boundary indicator to it."
+                    << std::endl
+                    << std::endl
+                    << "If this happened at a place where you call "
+                    << "Triangulation::create_triangulation() yourself, you need "
+                    << "to check the SubCellData object you pass to this function."
+                    << std::endl
+                    << std::endl
+                    << "If this happened in a place where you are reading a mesh "
+                    << "from a file, then you need to investigate why such a quad "
+                    << "ended up in the input file. A typical case is a geometry "
+                    << "that consisted of multiple parts and for which the mesh "
+                    << "generator program assumes that the interface between "
+                    << "two parts is a boundary when that isn't supposed to be "
+                    << "the case, or where the mesh generator simply assigns "
+                    << "'geometry indicators' to quads at the surface of "
+                    << "a part that are not supposed to be interpreted as "
+                    << "'boundary indicators'.");
+    /**
+     * Exception
+     * @ingroup Exceptions
+     */
+    DeclException2 (ExcMultiplySetLineInfoOfLine,
+                    int, int,
+                    << "In SubCellData the line info of the line with vertex indices "
+                    << arg1 << " and " << arg2 << " appears more than once. "
+                    << "This is not allowed.");
+
+
+    /**
+     * A class into which we put many of the functions that implement
+     * functionality of the Triangulation class. The main reason for this
+     * class is as follows: the majority of the functions in Triangulation
+     * need to be implemented differently for dim==1, dim==2, and
+     * dim==3. However, their implementation is largly independent of the
+     * spacedim template parameter. So we would like to write things like
+     *
+     * @code
+     * template <int spacedim>
+     * void Triangulation<1,spacedim>::create_triangulation (...) {...}
+     * @endcode
+     *
+     * Unfortunately, C++ doesn't allow this: member functions of class
+     * templates have to be either not specialized at all, or fully
+     * specialized. No partial specialization is allowed. One possible
+     * solution would be to just duplicate the bodies of the functions and
+     * have equally implemented functions
+     *
+     * @code
+     * template <>
+     * void Triangulation<1,1>::create_triangulation (...) {...}
+     *
+     * template <>
+     * void Triangulation<1,2>::create_triangulation (...) {...}
+     * @endcode
+     *
+     * but that is clearly an unsatisfactory solution. Rather, what we do
+     * is introduce the current Implementation class in which we can write
+     * these functions as member templates over spacedim, i.e. we can have
+     *
+     * @code
+     * template <int dim_, int spacedim_>
+     * template <int spacedim>
+     * void Triangulation<dim_,spacedim_>::Implementation::
+     *            create_triangulation (...,
+     *                                  Triangulation<1,spacedim> &tria ) {...}
+     * @endcode
+     *
+     * The outer template parameters are here unused, only the inner
+     * ones are of real interest.
+     *
+     * One may ask why we put these functions into an class rather
+     * than an anonymous namespace, for example?
+     *
+     * First, these implementation functions need to be friends of the
+     * Triangulation class. It is simpler to make the entire class a friend
+     * rather than listing all members of an implementation namespace as
+     * friends of the Triangulation class (there is no such thing as a "friend
+     * namespace XXX" directive).
+     *
+     * Ideally, we would make this class a member class of the
+     * Triangulation<dim,spacedim> class, since then our implementation functions
+     * have immediate access to the typedefs and static functions of the
+     * surrounding Triangulation class. I.e., we do not have to write "typename
+     * Triangulation<dim,spacedim>::active_cell_iterator" but can write
+     * "active_cell_iterator" right away. This is, in fact, the way it was
+     * implemented first, but we ran into a bug in gcc4.0:
+     * @code
+     *  class Triangulation {
+     *    struct Implementation;
+     *    friend class TriaAccessor;
+     *  };
+     *
+     *  class TriaAccessor {
+     *    struct Implementation;
+     *    friend class Triangulation;
+     *  };
+     * @endcode
+     *
+     * Here, friendship (per C++ standard) is supposed to extend to all members of
+     * the befriended class, including its 'Implementation' member class. But gcc4.0
+     * gets this wrong: the members of Triangulation::Implementation are not friends
+     * of TriaAccessor and the other way around. Ideally, one would fix this by
+     * saying
+     * @code
+     *  class Triangulation {
+     *    struct Implementation;
+     *    friend class TriaAccessor;
+     *    friend class TriaAccessor::Implementation;   // **
+     *  };
+     *
+     *  class TriaAccessor {
+     *    struct Implementation;
+     *    friend class Triangulation;
+     *    friend class Triangulation::Implementation;
+     *  };
+     * @endcode
+     * but that's not legal because in ** we don't know yet that TriaAccessor has
+     * a member class Implementation and so we can't make it a friend. The only
+     * way forward at this point was to make Implementation a class in the
+     * internal namespace so that we can forward declare it and make it a friend
+     * of the respective other outer class -- not quite what we wanted but the
+     * only way I could see to make it work...
+     */
+    struct Implementation
+    {
+      /**
+       * For a given Triangulation, update the
+       * number cache for lines. For 1d, we have
+       * to deal with the fact that lines have
+       * levels, whereas for higher dimensions
+       * they do not.
+       *
+       * The second argument indicates
+       * for how many levels the
+       * Triangulation has objects,
+       * though the highest levels need
+       * not contain active cells if they
+       * have previously all been
+       * coarsened away.
+       */
+      template <int dim, int spacedim>
+      static
+      void compute_number_cache (const Triangulation<dim,spacedim>       &triangulation,
+                                 const unsigned int                       level_objects,
+                                 internal::Triangulation::NumberCache<1> &number_cache)
+      {
+        typedef
+        typename Triangulation<dim,spacedim>::line_iterator line_iterator;
+        typedef
+        typename Triangulation<dim,spacedim>::active_line_iterator active_line_iterator;
+
+        number_cache.n_levels = 0;
+        if (level_objects > 0)
+          // find the last level
+          // on which there are
+          // used cells
+          for (unsigned int level=0; level<level_objects; ++level)
+            if (triangulation.begin(level) !=
+                triangulation.end(level))
+              number_cache.n_levels = level+1;
+
+        // no cells at all?
+        Assert (number_cache.n_levels > 0, ExcInternalError());
+
+        ///////////////////////////////////
+        // update the number of lines
+        // on the different levels in
+        // the cache
+        number_cache.n_lines_level.resize (number_cache.n_levels);
+        number_cache.n_lines = 0;
+
+        number_cache.n_active_lines_level.resize (number_cache.n_levels);
+        number_cache.n_active_lines = 0;
+
+        // for 1d, lines have levels so take
+        // count the objects per level and
+        // globally
+        if (dim == 1)
+          {
+            for (unsigned int level=0; level<number_cache.n_levels; ++level)
+              {
+                // count lines on this level
+                number_cache.n_lines_level[level] = 0;
+
+                line_iterator line = triangulation.begin_line (level),
+                              endc = (level == number_cache.n_levels-1 ?
+                                      line_iterator(triangulation.end_line()) :
+                                      triangulation.begin_line (level+1));
+                for (; line!=endc; ++line)
+                  ++number_cache.n_lines_level[level];
+
+                // update total number of lines
+                number_cache.n_lines += number_cache.n_lines_level[level];
+              }
+
+            // do the update for the number of
+            // active lines as well
+            for (unsigned int level=0; level<number_cache.n_levels; ++level)
+              {
+                // count lines on this level
+                number_cache.n_active_lines_level[level] = 0;
+
+                active_line_iterator line = triangulation.begin_active_line (level),
+                                     endc = triangulation.end_line ();
+                for (; (line!=endc) && (line->level() == static_cast<signed int>(level)); ++line)
+                  ++number_cache.n_active_lines_level[level];
+
+                // update total number of lines
+                number_cache.n_active_lines += number_cache.n_active_lines_level[level];
+              }
+          }
+        else
+          {
+            // for dim>1, there are no
+            // levels for lines
+            {
+              line_iterator line = triangulation.begin_line (),
+                            endc = triangulation.end_line();
+              for (; line!=endc; ++line)
+                ++number_cache.n_lines;
+            }
+
+            {
+              active_line_iterator line = triangulation.begin_active_line (),
+                                   endc = triangulation.end_line();
+              for (; line!=endc; ++line)
+                ++number_cache.n_active_lines;
+            }
+          }
+      }
+
+      /**
+       * For a given Triangulation, update the
+       * number cache for quads. For 2d, we have
+       * to deal with the fact that quads have
+       * levels, whereas for higher dimensions
+       * they do not.
+       *
+       * The second argument indicates
+       * for how many levels the
+       * Triangulation has objects,
+       * though the highest levels need
+       * not contain active cells if they
+       * have previously all been
+       * coarsened away.
+       *
+       * At the beginning of the function, we call the
+       * respective function to update the number
+       * cache for lines.
+       */
+      template <int dim, int spacedim>
+      static
+      void compute_number_cache (const Triangulation<dim,spacedim>       &triangulation,
+                                 const unsigned int                       level_objects,
+                                 internal::Triangulation::NumberCache<2> &number_cache)
+      {
+        // update lines and n_levels
+        compute_number_cache (triangulation,
+                              level_objects,
+                              static_cast<internal::Triangulation::NumberCache<1>&>
+                              (number_cache));
+
+        typedef
+        typename Triangulation<dim,spacedim>::quad_iterator quad_iterator;
+        typedef
+        typename Triangulation<dim,spacedim>::active_quad_iterator active_quad_iterator;
+
+        ///////////////////////////////////
+        // update the number of quads
+        // on the different levels in
+        // the cache
+        number_cache.n_quads_level.resize (number_cache.n_levels);
+        number_cache.n_quads = 0;
+
+        number_cache.n_active_quads_level.resize (number_cache.n_levels);
+        number_cache.n_active_quads = 0;
+
+        // for 2d, quads have levels so take
+        // count the objects per level and
+        // globally
+        if (dim == 2)
+          {
+            for (unsigned int level=0; level<number_cache.n_levels; ++level)
+              {
+                // count quads on this level
+                number_cache.n_quads_level[level] = 0;
+
+                quad_iterator quad = triangulation.begin_quad (level),
+                              endc = (level == number_cache.n_levels-1 ?
+                                      quad_iterator(triangulation.end_quad()) :
+                                      triangulation.begin_quad (level+1));
+                for (; quad!=endc; ++quad)
+                  ++number_cache.n_quads_level[level];
+
+                // update total number of quads
+                number_cache.n_quads += number_cache.n_quads_level[level];
+              }
+
+            // do the update for the number of
+            // active quads as well
+            for (unsigned int level=0; level<number_cache.n_levels; ++level)
+              {
+                // count quads on this level
+                number_cache.n_active_quads_level[level] = 0;
+
+                active_quad_iterator quad = triangulation.begin_active_quad (level),
+                                     endc = triangulation.end_quad ();
+                for (; (quad!=endc) && (quad->level() == static_cast<signed int>(level)); ++quad)
+                  ++number_cache.n_active_quads_level[level];
+
+                // update total number of quads
+                number_cache.n_active_quads += number_cache.n_active_quads_level[level];
+              }
+          }
+        else
+          {
+            // for dim>2, there are no
+            // levels for quads
+            {
+              quad_iterator quad = triangulation.begin_quad (),
+                            endc = triangulation.end_quad();
+              for (; quad!=endc; ++quad)
+                ++number_cache.n_quads;
+            }
+
+            {
+              active_quad_iterator quad = triangulation.begin_active_quad (),
+                                   endc = triangulation.end_quad();
+              for (; quad!=endc; ++quad)
+                ++number_cache.n_active_quads;
+            }
+          }
+      }
+
+      /**
+       * For a given Triangulation, update the
+       * number cache for hexes. For 3d, we have
+       * to deal with the fact that hexes have
+       * levels, whereas for higher dimensions
+       * they do not.
+       *
+       * The second argument indicates
+       * for how many levels the
+       * Triangulation has objects,
+       * though the highest levels need
+       * not contain active cells if they
+       * have previously all been
+       * coarsened away.
+       *
+       * At the end of the function, we call the
+       * respective function to update the number
+       * cache for quads, which will in turn call
+       * the respective function for lines.
+       */
+      template <int dim, int spacedim>
+      static
+      void compute_number_cache (const Triangulation<dim,spacedim>       &triangulation,
+                                 const unsigned int                       level_objects,
+                                 internal::Triangulation::NumberCache<3> &number_cache)
+      {
+        // update quads, lines and n_levels
+        compute_number_cache (triangulation,
+                              level_objects,
+                              static_cast<internal::Triangulation::NumberCache<2>&>
+                              (number_cache));
+
+        typedef
+        typename Triangulation<dim,spacedim>::hex_iterator hex_iterator;
+        typedef
+        typename Triangulation<dim,spacedim>::active_hex_iterator active_hex_iterator;
+
+        ///////////////////////////////////
+        // update the number of hexes
+        // on the different levels in
+        // the cache
+        number_cache.n_hexes_level.resize (number_cache.n_levels);
+        number_cache.n_hexes = 0;
+
+        number_cache.n_active_hexes_level.resize (number_cache.n_levels);
+        number_cache.n_active_hexes = 0;
+
+        // for 3d, hexes have levels so take
+        // count the objects per level and
+        // globally
+        if (dim == 3)
+          {
+            for (unsigned int level=0; level<number_cache.n_levels; ++level)
+              {
+                // count hexes on this level
+                number_cache.n_hexes_level[level] = 0;
+
+                hex_iterator hex = triangulation.begin_hex (level),
+                             endc = (level == number_cache.n_levels-1 ?
+                                     hex_iterator(triangulation.end_hex()) :
+                                     triangulation.begin_hex (level+1));
+                for (; hex!=endc; ++hex)
+                  ++number_cache.n_hexes_level[level];
+
+                // update total number of hexes
+                number_cache.n_hexes += number_cache.n_hexes_level[level];
+              }
+
+            // do the update for the number of
+            // active hexes as well
+            for (unsigned int level=0; level<number_cache.n_levels; ++level)
+              {
+                // count hexes on this level
+                number_cache.n_active_hexes_level[level] = 0;
+
+                active_hex_iterator hex = triangulation.begin_active_hex (level),
+                                    endc = triangulation.end_hex ();
+                for (; (hex!=endc) && (hex->level() == static_cast<signed int>(level)); ++hex)
+                  ++number_cache.n_active_hexes_level[level];
+
+                // update total number of hexes
+                number_cache.n_active_hexes += number_cache.n_active_hexes_level[level];
+              }
+          }
+        else
+          {
+            // for dim>3, there are no
+            // levels for hexs
+            {
+              hex_iterator hex  = triangulation.begin_hex (),
+                           endc = triangulation.end_hex();
+              for (; hex!=endc; ++hex)
+                ++number_cache.n_hexes;
+            }
+
+            {
+              active_hex_iterator hex  = triangulation.begin_active_hex (),
+                                  endc = triangulation.end_hex();
+              for (; hex!=endc; ++hex)
+                ++number_cache.n_active_hexes;
+            }
+          }
+      }
+
+
+      /**
+       * Create a triangulation from
+       * given data. This function does
+       * this work for 1-dimensional
+       * triangulations independently
+       * of the actual space dimension.
+       */
+      template <int spacedim>
+      static
+      void
+      create_triangulation (const std::vector<Point<spacedim> > &v,
+                            const std::vector<CellData<1> >     &cells,
+                            const SubCellData                   &/*subcelldata*/,
+                            Triangulation<1,spacedim>           &triangulation)
+      {
+        AssertThrow (v.size() > 0, ExcMessage ("No vertices given"));
+        AssertThrow (cells.size() > 0, ExcMessage ("No cells given"));
+
+        // note: since no boundary
+        // information can be given in one
+        // dimension, the @p{subcelldata}
+        // field is ignored. (only used for
+        // error checking, which is a good
+        // idea in any case)
+        const unsigned int dim=1;
+
+        // copy vertices
+        triangulation.vertices = v;
+        triangulation.vertices_used = std::vector<bool> (v.size(), true);
+
+        // store the indices of the lines
+        // which are adjacent to a given
+        // vertex
+        std::vector<std::vector<int> > lines_at_vertex (v.size());
+
+        // reserve enough space
+        triangulation.levels.push_back (new internal::Triangulation::TriaLevel<dim>);
+        triangulation.levels[0]->reserve_space (cells.size(), dim, spacedim);
+        triangulation.levels[0]->cells.reserve_space (0,cells.size());
+
+        // make up cells
+        typename Triangulation<dim,spacedim>::raw_line_iterator
+        next_free_line = triangulation.begin_raw_line ();
+        for (unsigned int cell=0; cell<cells.size(); ++cell)
+          {
+            while (next_free_line->used())
+              ++next_free_line;
+
+            next_free_line->set (internal::Triangulation
+                                 ::TriaObject<1> (cells[cell].vertices[0],
+                                                  cells[cell].vertices[1]));
+            next_free_line->set_used_flag ();
+            next_free_line->set_material_id (cells[cell].material_id);
+            next_free_line->set_manifold_id (cells[cell].manifold_id);
+            next_free_line->clear_user_data ();
+            next_free_line->set_subdomain_id (0);
+
+            // note that this cell is
+            // adjacent to these vertices
+            lines_at_vertex[cells[cell].vertices[0]].push_back (cell);
+            lines_at_vertex[cells[cell].vertices[1]].push_back (cell);
+          }
+
+
+        // some security tests
+        {
+          unsigned int boundary_nodes = 0;
+          for (unsigned int i=0; i<lines_at_vertex.size(); ++i)
+            switch (lines_at_vertex[i].size())
+              {
+              case 1:
+                // this vertex has only
+                // one adjacent line
+                ++boundary_nodes;
+                break;
+              case 2:
+                break;
+              default:
+                // in 1d, a node must have one or two adjacent lines
+                if (spacedim==1)
+                  AssertThrow (false, ExcInternalError())
+                  else
+                    AssertThrow (false,
+                                 ExcMessage ("You have a vertex in your triangulation "
+                                             "at which more than two cells come together. "
+                                             "(For one dimensional triangulation, cells are "
+                                             "line segments.)"
+                                             "\n\n"
+                                             "This is not currently supported because the "
+                                             "Triangulation class makes the assumption that "
+                                             "every cell has zero or one neighbors behind "
+                                             "each face (here, behind each vertex), but in your "
+                                             "situation there would be more than one."
+                                             "\n\n"
+                                             "Support for this is not currently implemented. "
+                                             "If you need to work with triangulations where "
+                                             "more than two cells come together at a vertex, "
+                                             "duplicate the vertices once per cell (i.e., put "
+                                             "multiple vertices at the same physical location, "
+                                             "but using different vertex indices for each) "
+                                             "and then ensure continuity of the solution by "
+                                             "explicitly creating constraints that the degrees "
+                                             "of freedom at these vertices have the same "
+                                             "value, using the ConstraintMatrix class."));
+              }
+
+          // assert there are no more
+          // than two boundary
+          // nodes. note that if the
+          // space dimension is
+          // bigger than 1, then we
+          // can have fewer than 2
+          // nodes (for example a
+          // ring of cells -- no end
+          // points at all)
+          AssertThrow (((spacedim == 1) && (boundary_nodes == 2))
+                       ||
+                       (spacedim > 1),
+                       ExcMessage("The Triangulation has too many end points"));
+        }
+
+
+
+        // update neighborship info
+        typename Triangulation<dim,spacedim>::active_line_iterator
+        line = triangulation.begin_active_line ();
+        // for all lines
+        for (; line!=triangulation.end(); ++line)
+          // for each of the two vertices
+          for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell; ++vertex)
+            // if first cell adjacent to
+            // this vertex is the present
+            // one, then the neighbor is
+            // the second adjacent cell and
+            // vice versa
+            if (lines_at_vertex[line->vertex_index(vertex)][0] == line->index())
+              if (lines_at_vertex[line->vertex_index(vertex)].size() == 2)
+                {
+                  const typename Triangulation<dim,spacedim>::cell_iterator
+                  neighbor (&triangulation,
+                            0,              // level
+                            lines_at_vertex[line->vertex_index(vertex)][1]);
+                  line->set_neighbor (vertex, neighbor);
+                }
+              else
+                // no second adjacent cell
+                // entered -> cell at
+                // boundary
+                line->set_neighbor (vertex, triangulation.end());
+            else
+              // present line is not first
+              // adjacent one -> first
+              // adjacent one is neighbor
+              {
+                const typename Triangulation<dim,spacedim>::cell_iterator
+                neighbor (&triangulation,
+                          0,              // level
+                          lines_at_vertex[line->vertex_index(vertex)][0]);
+                line->set_neighbor (vertex, neighbor);
+              }
+
+        // finally set the
+        // vertex_to_boundary_id_map_1d
+        // and vertex_to_manifold_id_map_1d
+        // maps
+        triangulation.vertex_to_boundary_id_map_1d->clear();
+        triangulation.vertex_to_manifold_id_map_1d->clear();
+        for (typename Triangulation<dim,spacedim>::active_cell_iterator
+             cell = triangulation.begin_active();
+             cell != triangulation.end(); ++cell)
+          for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+            {
+              (*triangulation
+               .vertex_to_manifold_id_map_1d)[cell->face(f)->vertex_index()]
+                = numbers::invalid_manifold_id;
+
+              if (cell->at_boundary(f))
+                (*triangulation
+                 .vertex_to_boundary_id_map_1d)[cell->face(f)->vertex_index()]
+                  = f;
+            }
+      }
+
+
+      /**
+       * Create a triangulation from
+       * given data. This function does
+       * this work for 2-dimensional
+       * triangulations independently
+       * of the actual space dimension.
+       */
+      template <int spacedim>
+      static
+      void
+      create_triangulation (const std::vector<Point<spacedim> > &v,
+                            const std::vector<CellData<2> >     &cells,
+                            const SubCellData                   &subcelldata,
+                            Triangulation<2,spacedim>           &triangulation)
+      {
+        AssertThrow (v.size() > 0, ExcMessage ("No vertices given"));
+        AssertThrow (cells.size() > 0, ExcMessage ("No cells given"));
+
+        const unsigned int dim=2;
+
+        // copy vertices
+        triangulation.vertices = v;
+        triangulation.vertices_used = std::vector<bool> (v.size(), true);
+
+        // make up a list of the needed
+        // lines each line is a pair of
+        // vertices. The list is kept
+        // sorted and it is guaranteed that
+        // each line is inserted only once.
+        // While the key of such an entry
+        // is the pair of vertices, the
+        // thing it points to is an
+        // iterator pointing to the line
+        // object itself. In the first run,
+        // these iterators are all invalid
+        // ones, but they are filled
+        // afterwards
+        std::map<std::pair<int,int>,
+            typename Triangulation<dim,spacedim>::line_iterator> needed_lines;
+        for (unsigned int cell=0; cell<cells.size(); ++cell)
+          {
+            for (unsigned int vertex=0; vertex<4; ++vertex)
+              AssertThrow (cells[cell].vertices[vertex] < triangulation.vertices.size(),
+                           ExcInvalidVertexIndex (cell, cells[cell].vertices[vertex],
+                                                  triangulation.vertices.size()));
+
+            for (unsigned int line=0; line<GeometryInfo<dim>::faces_per_cell; ++line)
+              {
+                // given a line vertex number
+                // (0,1) on a specific line we
+                // get the cell vertex number
+                // (0-4) through the
+                // line_to_cell_vertices
+                // function
+                std::pair<int,int> line_vertices(
+                  cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 0)],
+                  cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 1)]);
+
+                // assert that the line was
+                // not already inserted in
+                // reverse order. This
+                // happens in spite of the
+                // vertex rotation above,
+                // if the sense of the cell
+                // was incorrect.
+                //
+                // Here is what usually
+                // happened when this
+                // exception is thrown:
+                // consider these two cells
+                // and the vertices
+                //  3---4---5
+                //  |   |   |
+                //  0---1---2
+                // If in the input vector
+                // the two cells are given
+                // with vertices <0 1 4 3>
+                // and <4 1 2 5>, in the
+                // first cell the middle
+                // line would have
+                // direction 1->4, while in
+                // the second it would be
+                // 4->1.  This will cause
+                // the exception.
+                AssertThrow (needed_lines.find(std::make_pair(line_vertices.second,
+                                                              line_vertices.first))
+                             ==
+                             needed_lines.end(),
+                             ExcGridHasInvalidCell(cell));
+
+                // insert line, with
+                // invalid iterator if line
+                // already exists, then
+                // nothing bad happens here
+                needed_lines[line_vertices] = triangulation.end_line();
+              }
+          }
+
+
+        // check that every vertex has at
+        // least two adjacent lines
+        {
+          std::vector<unsigned short int> vertex_touch_count (v.size(), 0);
+          typename std::map<std::pair<int,int>,
+                   typename Triangulation<dim,spacedim>::line_iterator>::iterator i;
+          for (i=needed_lines.begin(); i!=needed_lines.end(); i++)
+            {
+              // touch the vertices of
+              // this line
+              ++vertex_touch_count[i->first.first];
+              ++vertex_touch_count[i->first.second];
+            }
+
+          // assert minimum touch count
+          // is at least two. if not so,
+          // then clean triangulation and
+          // exit with an exception
+          AssertThrow (* (std::min_element(vertex_touch_count.begin(),
+                                           vertex_touch_count.end())) >= 2,
+                       ExcMessage("During creation of a triangulation, a part of the "
+                                  "algorithm encountered a vertex that is part of only "
+                                  "a single adjacent line. However, in 2d, every vertex "
+                                  "needs to be at least part of two lines."));
+        }
+
+        // reserve enough space
+        triangulation.levels.push_back (new internal::Triangulation::TriaLevel<dim>);
+        triangulation.faces = new internal::Triangulation::TriaFaces<dim>;
+        triangulation.levels[0]->reserve_space (cells.size(), dim, spacedim);
+        triangulation.faces->lines.reserve_space (0,needed_lines.size());
+        triangulation.levels[0]->cells.reserve_space (0,cells.size());
+
+        // make up lines
+        {
+          typename Triangulation<dim,spacedim>::raw_line_iterator
+          line = triangulation.begin_raw_line();
+          typename std::map<std::pair<int,int>,
+                   typename Triangulation<dim,spacedim>::line_iterator>::iterator i;
+          for (i = needed_lines.begin();
+               line!=triangulation.end_line(); ++line, ++i)
+            {
+              line->set (internal::Triangulation::TriaObject<1>(i->first.first,
+                                                                i->first.second));
+              line->set_used_flag ();
+              line->clear_user_flag ();
+              line->clear_user_data ();
+              i->second = line;
+            }
+        }
+
+
+        // store for each line index
+        // the adjacent cells
+        std::map<int,std::vector<typename Triangulation<dim,spacedim>::cell_iterator> >
+        adjacent_cells;
+
+        // finally make up cells
+        {
+          typename Triangulation<dim,spacedim>::raw_cell_iterator
+          cell = triangulation.begin_raw_quad();
+          for (unsigned int c=0; c<cells.size(); ++c, ++cell)
+            {
+              typename Triangulation<dim,spacedim>::line_iterator
+              lines[GeometryInfo<dim>::lines_per_cell];
+              for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                lines[line]=needed_lines[std::make_pair(
+                                           cells[c].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 0)],
+                                           cells[c].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 1)])];
+
+              cell->set (internal::Triangulation::TriaObject<2> (lines[0]->index(),
+                                                                 lines[1]->index(),
+                                                                 lines[2]->index(),
+                                                                 lines[3]->index()));
+
+              cell->set_used_flag ();
+              cell->set_material_id (cells[c].material_id);
+              cell->set_manifold_id (cells[c].manifold_id);
+              cell->clear_user_data ();
+              cell->set_subdomain_id (0);
+
+              // note that this cell is
+              // adjacent to the four
+              // lines
+              for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                adjacent_cells[lines[line]->index()].push_back (cell);
+            }
+        }
+
+
+        for (typename Triangulation<dim,spacedim>::line_iterator
+             line=triangulation.begin_line();
+             line!=triangulation.end_line(); ++line)
+          {
+            const unsigned int n_adj_cells = adjacent_cells[line->index()].size();
+
+            // assert that every line has one or two adjacent cells.
+            // this has to be the case for 2d triangulations in 2d.
+            // in higher dimensions, this may happen but is not
+            // implemented
+            if (spacedim==2)
+              AssertThrow ((n_adj_cells >= 1) &&
+                           (n_adj_cells <= 2),
+                           ExcInternalError())
+              else
+                AssertThrow ((n_adj_cells >= 1) &&
+                             (n_adj_cells <= 2),
+                             ExcMessage ("You have a line in your triangulation "
+                                         "at which more than two cells come together. "
+                                         "\n\n"
+                                         "This is not currently supported because the "
+                                         "Triangulation class makes the assumption that "
+                                         "every cell has zero or one neighbors behind "
+                                         "each face (here, behind each line), but in your "
+                                         "situation there would be more than one."
+                                         "\n\n"
+                                         "Support for this is not currently implemented. "
+                                         "If you need to work with triangulations where "
+                                         "more than two cells come together at a line, "
+                                         "duplicate the vertices once per cell (i.e., put "
+                                         "multiple vertices at the same physical location, "
+                                         "but using different vertex indices for each) "
+                                         "and then ensure continuity of the solution by "
+                                         "explicitly creating constraints that the degrees "
+                                         "of freedom at these lines have the same "
+                                         "value, using the ConstraintMatrix class."));
+
+            // if only one cell: line is at
+            // boundary -> give it the
+            // boundary indicator zero by
+            // default
+            if (n_adj_cells == 1)
+              line->set_boundary_id (0);
+            else
+              // interior line -> numbers::internal_face_boundary_id
+              line->set_boundary_id (numbers::internal_face_boundary_id);
+            line->set_manifold_id(numbers::flat_manifold_id);
+          }
+
+        // set boundary indicators where given
+        std::vector<CellData<1> >::const_iterator boundary_line
+          = subcelldata.boundary_lines.begin();
+        std::vector<CellData<1> >::const_iterator end_boundary_line
+          = subcelldata.boundary_lines.end();
+        for (; boundary_line!=end_boundary_line; ++boundary_line)
+          {
+            typename Triangulation<dim,spacedim>::line_iterator line;
+            std::pair<int,int> line_vertices(std::make_pair(boundary_line->vertices[0],
+                                                            boundary_line->vertices[1]));
+            if (needed_lines.find(line_vertices) != needed_lines.end())
+              // line found in this direction
+              line = needed_lines[line_vertices];
+            else
+              {
+                // look whether it exists in reverse direction
+                std::swap (line_vertices.first, line_vertices.second);
+                if (needed_lines.find(line_vertices) != needed_lines.end())
+                  line = needed_lines[line_vertices];
+                else
+                  // line does not exist
+                  AssertThrow (false, ExcLineInexistant(line_vertices.first,
+                                                        line_vertices.second));
+              }
+
+            // assert that we only set boundary info once
+            AssertThrow (! (line->boundary_id() != 0 &&
+                            line->boundary_id() != numbers::internal_face_boundary_id),
+                         ExcMultiplySetLineInfoOfLine(line_vertices.first,
+                                                      line_vertices.second));
+
+            // Assert that only exterior lines are given a boundary
+            // indicator; however, it is possible that someone may
+            // want to give an interior line a manifold id (and thus
+            // lists this line in the subcell_data structure), and we
+            // need to allow that
+            if (boundary_line->boundary_id != numbers::internal_face_boundary_id)
+              {
+                AssertThrow (! (line->boundary_id() == numbers::internal_face_boundary_id),
+                             ExcInteriorLineCantBeBoundary(line->vertex_index(0),
+                                                           line->vertex_index(1),
+                                                           boundary_line->boundary_id));
+                line->set_boundary_id (boundary_line->boundary_id);
+              }
+
+            line->set_manifold_id (boundary_line->manifold_id);
+          }
+
+
+        // finally update neighborship info
+        for (typename Triangulation<dim,spacedim>::cell_iterator
+             cell=triangulation.begin(); cell!=triangulation.end(); ++cell)
+          for (unsigned int side=0; side<4; ++side)
+            if (adjacent_cells[cell->line(side)->index()][0] == cell)
+              // first adjacent cell is
+              // this one
+              {
+                if (adjacent_cells[cell->line(side)->index()].size() == 2)
+                  // there is another
+                  // adjacent cell
+                  cell->set_neighbor (side,
+                                      adjacent_cells[cell->line(side)->index()][1]);
+              }
+        // first adjacent cell is not this
+        // one, -> it must be the neighbor
+        // we are looking for
+            else
+              cell->set_neighbor (side,
+                                  adjacent_cells[cell->line(side)->index()][0]);
+      }
+
+
+      /**
+       * Invent an object which compares two internal::Triangulation::TriaObject<2>
+       * against each other. This comparison is needed in order to establish a map
+       * of TriaObject<2> to iterators in the Triangulation<3,3>::create_triangulation
+       * function.
+       *
+       * Since this comparison is not canonical, we do not include it into the
+       * general internal::Triangulation::TriaObject<2> class.
+       */
+      struct QuadComparator
+      {
+        inline bool operator () (const internal::Triangulation::TriaObject<2> &q1,
+                                 const internal::Triangulation::TriaObject<2> &q2) const
+        {
+          // here is room to
+          // optimize the repeated
+          // equality test of the
+          // previous lines; the
+          // compiler will probably
+          // take care of most of
+          // it anyway
+          if ((q1.face(0) < q2.face(0))          ||
+              ((q1.face(0) == q2.face(0)) &&
+               (q1.face(1) <  q2.face(1)))       ||
+              ((q1.face(0) == q2.face(0)) &&
+               (q1.face(1) == q2.face(1)) &&
+               (q1.face(2) <  q2.face(2)))       ||
+              ((q1.face(0) == q2.face(0)) &&
+               (q1.face(1) == q2.face(1)) &&
+               (q1.face(2) == q2.face(2)) &&
+               (q1.face(3) <  q2.face(3))))
+            return true;
+          else
+            return false;
+        }
+      };
+
+
+      /**
+      * Create a triangulation from
+      * given data. This function does
+      * this work for 3-dimensional
+      * triangulations independently
+      * of the actual space dimension.
+      */
+      template <int spacedim>
+      static
+      void
+      create_triangulation (const std::vector<Point<spacedim> > &v,
+                            const std::vector<CellData<3> >     &cells,
+                            const SubCellData                   &subcelldata,
+                            Triangulation<3,spacedim>           &triangulation)
+      {
+        AssertThrow (v.size() > 0, ExcMessage ("No vertices given"));
+        AssertThrow (cells.size() > 0, ExcMessage ("No cells given"));
+
+        const unsigned int dim=3;
+
+        // copy vertices
+        triangulation.vertices = v;
+        triangulation.vertices_used = std::vector<bool> (v.size(), true);
+
+        // check that all cells have
+        // positive volume. if not call the
+        // invert_all_cells_of_negative_grid
+        // and reorder_cells function of
+        // GridReordering before creating
+        // the triangulation
+#ifndef _MSC_VER
+        //TODO: The following code does not compile with MSVC. Find a way around it
+        for (unsigned int cell_no = 0; cell_no<cells.size(); ++cell_no)
+          AssertThrow(dealii::GridTools::cell_measure(triangulation.vertices,
+                                                      cells[cell_no].vertices) >= 0,
+                      ExcGridHasInvalidCell(cell_no));
+#endif
+
+        ///////////////////////////////////////
+        // first set up some collections of data
+        //
+        // make up a list of the needed
+        // lines
+        //
+        // each line is a pair of
+        // vertices. The list is kept
+        // sorted and it is guaranteed that
+        // each line is inserted only once.
+        // While the key of such an entry
+        // is the pair of vertices, the
+        // thing it points to is an
+        // iterator pointing to the line
+        // object itself. In the first run,
+        // these iterators are all invalid
+        // ones, but they are filled
+        // afterwards same applies for the
+        // quads
+        typename std::map<std::pair<int,int>,
+                 typename Triangulation<dim,spacedim>::line_iterator> needed_lines;
+        for (unsigned int cell=0; cell<cells.size(); ++cell)
+          {
+            // check whether vertex indices
+            // are valid ones
+            for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell; ++vertex)
+              AssertThrow (cells[cell].vertices[vertex] < triangulation.vertices.size(),
+                           ExcInvalidVertexIndex (cell, cells[cell].vertices[vertex],
+                                                  triangulation.vertices.size()));
+
+            for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+              {
+                // given a line vertex number
+                // (0,1) on a specific line we
+                // get the cell vertex number
+                // (0-7) through the
+                // line_to_cell_vertices
+                // function
+                std::pair<int,int> line_vertices(
+                  cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 0)],
+                  cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 1)]);
+
+                // if that line was already inserted
+                // in reverse order do nothing, else
+                // insert the line
+                if ( (needed_lines.find(std::make_pair(line_vertices.second,
+                                                       line_vertices.first))
+                      ==
+                      needed_lines.end()))
+                  {
+                    // insert line, with
+                    // invalid iterator. if line
+                    // already exists, then
+                    // nothing bad happens here
+                    needed_lines[line_vertices] = triangulation.end_line();
+                  }
+              }
+          }
+
+
+        /////////////////////////////////
+        // now for some sanity-checks:
+        //
+        // check that every vertex has at
+        // least tree adjacent lines
+        {
+          std::vector<unsigned short int> vertex_touch_count (v.size(), 0);
+          typename std::map<std::pair<int,int>,
+                   typename Triangulation<dim,spacedim>::line_iterator>::iterator i;
+          for (i=needed_lines.begin(); i!=needed_lines.end(); i++)
+            {
+              // touch the vertices of
+              // this line
+              ++vertex_touch_count[i->first.first];
+              ++vertex_touch_count[i->first.second];
+            }
+
+          // assert minimum touch count
+          // is at least three. if not so,
+          // then clean triangulation and
+          // exit with an exception
+          AssertThrow (* (std::min_element(vertex_touch_count.begin(),
+                                           vertex_touch_count.end())) >= 3,
+                       ExcMessage("During creation of a triangulation, a part of the "
+                                  "algorithm encountered a vertex that is part of only "
+                                  "one or two adjacent lines. However, in 3d, every vertex "
+                                  "needs to be at least part of three lines."));
+        }
+
+
+        ///////////////////////////////////
+        // actually set up data structures
+        // for the lines
+        // reserve enough space
+        triangulation.levels.push_back (new internal::Triangulation::TriaLevel<dim>);
+        triangulation.faces = new internal::Triangulation::TriaFaces<dim>;
+        triangulation.levels[0]->reserve_space (cells.size(), dim, spacedim);
+        triangulation.faces->lines.reserve_space (0,needed_lines.size());
+
+        // make up lines
+        {
+          typename Triangulation<dim,spacedim>::raw_line_iterator
+          line = triangulation.begin_raw_line();
+          typename std::map<std::pair<int,int>,
+                   typename Triangulation<dim,spacedim>::line_iterator>::iterator i;
+          for (i = needed_lines.begin(); line!=triangulation.end_line(); ++line, ++i)
+            {
+              line->set (internal::Triangulation::TriaObject<1>(i->first.first,
+                                                                i->first.second));
+              line->set_used_flag ();
+              line->clear_user_flag ();
+              line->clear_user_data ();
+
+              // now set the iterator for
+              // this line
+              i->second = line;
+            }
+        }
+
+
+        ///////////////////////////////////////////
+        // make up the quads of this triangulation
+        //
+        // same thing: the iterators are
+        // set to the invalid value at
+        // first, we only collect the data
+        // now
+
+        // the bool array stores, whether the lines
+        // are in the standard orientation or not
+
+        // note that QuadComparator is a
+        // class declared and defined in
+        // this file
+        std::map<internal::Triangulation::TriaObject<2>,
+            std::pair<typename Triangulation<dim,spacedim>::quad_iterator,
+            std_cxx11::array<bool,GeometryInfo<dim>::lines_per_face> >,
+            QuadComparator>
+            needed_quads;
+        for (unsigned int cell=0; cell<cells.size(); ++cell)
+          {
+            // the faces are quads which
+            // consist of four numbers
+            // denoting the index of the
+            // four lines bounding the
+            // quad. we can get this index
+            // by asking @p{needed_lines}
+            // for an iterator to this
+            // line, dereferencing it and
+            // thus return an iterator into
+            // the @p{lines} array of the
+            // triangulation, which is
+            // already set up. we can then
+            // ask this iterator for its
+            // index within the present
+            // level (the level is zero, of
+            // course)
+            //
+            // to make things easier, we
+            // don't create the lines
+            // (pairs of their vertex
+            // indices) in place, but
+            // before they are really
+            // needed.
+            std::pair<int,int> line_list[GeometryInfo<dim>::lines_per_cell],
+                inverse_line_list[GeometryInfo<dim>::lines_per_cell];
+            unsigned int face_line_list[GeometryInfo<dim>::lines_per_face];
+            std_cxx11::array<bool,GeometryInfo<dim>::lines_per_face> orientation;
+
+            for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+              {
+                line_list[line]=std::pair<int,int> (
+                                  cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 0)],
+                                  cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 1)]);
+                inverse_line_list[line]=std::pair<int,int> (
+                                          cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 1)],
+                                          cells[cell].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 0)]);
+              }
+
+            for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+              {
+                // set up a list of the lines to be
+                // used for this face. check the
+                // direction for each line
+                //
+                // given a face line number (0-3) on
+                // a specific face we get the cell
+                // line number (0-11) through the
+                // face_to_cell_lines function
+                for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_face; ++l)
+                  if (needed_lines.find (inverse_line_list[GeometryInfo<dim>::
+                                                           face_to_cell_lines(face,l)]) == needed_lines.end())
+                    {
+                      face_line_list[l]=needed_lines[line_list[GeometryInfo<dim>::
+                                                               face_to_cell_lines(face,l)]]->index();
+                      orientation[l]=true;
+                    }
+                  else
+                    {
+                      face_line_list[l]=needed_lines[inverse_line_list[GeometryInfo<dim>::
+                                                                       face_to_cell_lines(face,l)]]->index();
+                      orientation[l]=false;
+                    }
+
+
+                internal::Triangulation::TriaObject<2>
+                quad(face_line_list[0],
+                     face_line_list[1],
+                     face_line_list[2],
+                     face_line_list[3]);
+
+                // insert quad, with
+                // invalid iterator
+                //
+                // if quad already exists,
+                // then nothing bad happens
+                // here, as this will then
+                // simply become an
+                // interior face of the
+                // triangulation. however,
+                // we will run into major
+                // trouble if the face was
+                // already inserted in the
+                // opposite
+                // direction. there are
+                // really only two
+                // orientations for a face
+                // to be in, since the edge
+                // directions are already
+                // set. thus, vertex 0 is
+                // the one from which two
+                // edges originate, and
+                // vertex 3 is the one to
+                // which they converge. we
+                // are then left with
+                // orientations 0-1-2-3 and
+                // 2-3-0-1 for the order of
+                // lines. the
+                // corresponding quad can
+                // be easily constructed by
+                // exchanging lines. we do
+                // so here, just to check
+                // that that flipped quad
+                // isn't already in the
+                // triangulation. if it is,
+                // then don't insert the
+                // new one and instead
+                // later set the
+                // face_orientation flag
+                const internal::Triangulation::TriaObject<2>
+                test_quad_1(quad.face(2), quad.face(3),
+                            quad.face(0), quad.face(1)),//face_orientation=false, face_flip=false, face_rotation=false
+                                      test_quad_2(quad.face(0), quad.face(1),
+                                                  quad.face(3), quad.face(2)),//face_orientation=false, face_flip=false, face_rotation=true
+                                      test_quad_3(quad.face(3), quad.face(2),
+                                                  quad.face(1), quad.face(0)),//face_orientation=false, face_flip=true,  face_rotation=false
+                                      test_quad_4(quad.face(1), quad.face(0),
+                                                  quad.face(2), quad.face(3)),//face_orientation=false, face_flip=true,  face_rotation=true
+                                      test_quad_5(quad.face(2), quad.face(3),
+                                                  quad.face(1), quad.face(0)),//face_orientation=true,  face_flip=false, face_rotation=true
+                                      test_quad_6(quad.face(1), quad.face(0),
+                                                  quad.face(3), quad.face(2)),//face_orientation=true,  face_flip=true,  face_rotation=false
+                                      test_quad_7(quad.face(3), quad.face(2),
+                                                  quad.face(0), quad.face(1));//face_orientation=true,  face_flip=true,  face_rotation=true
+                if (needed_quads.find (test_quad_1) == needed_quads.end() &&
+                    needed_quads.find (test_quad_2) == needed_quads.end() &&
+                    needed_quads.find (test_quad_3) == needed_quads.end() &&
+                    needed_quads.find (test_quad_4) == needed_quads.end() &&
+                    needed_quads.find (test_quad_5) == needed_quads.end() &&
+                    needed_quads.find (test_quad_6) == needed_quads.end() &&
+                    needed_quads.find (test_quad_7) == needed_quads.end())
+                  needed_quads[quad] = std::make_pair(triangulation.end_quad(),orientation);
+              }
+          }
+
+
+        /////////////////////////////////
+        // enter the resulting quads into
+        // the arrays of the Triangulation
+        //
+        // first reserve enough space
+        triangulation.faces->quads.reserve_space (0,needed_quads.size());
+
+        {
+          typename Triangulation<dim,spacedim>::raw_quad_iterator
+          quad = triangulation.begin_raw_quad();
+          typename std::map<internal::Triangulation::TriaObject<2>,
+                   std::pair<typename Triangulation<dim,spacedim>::quad_iterator,
+                   std_cxx11::array<bool,GeometryInfo<dim>::lines_per_face> >,
+                   QuadComparator>
+                   ::iterator q;
+          for (q = needed_quads.begin(); quad!=triangulation.end_quad(); ++quad, ++q)
+            {
+              quad->set (q->first);
+              quad->set_used_flag ();
+              quad->clear_user_flag ();
+              quad->clear_user_data ();
+              // set the line orientation
+              quad->set_line_orientation(0,q->second.second[0]);
+              quad->set_line_orientation(1,q->second.second[1]);
+              quad->set_line_orientation(2,q->second.second[2]);
+              quad->set_line_orientation(3,q->second.second[3]);
+
+
+              // now set the iterator for
+              // this quad
+              q->second.first = quad;
+            }
+        }
+
+        /////////////////////////////////
+        // finally create the cells
+        triangulation.levels[0]->cells.reserve_space (cells.size());
+
+        // store for each quad index the
+        // adjacent cells
+        std::map<int,std::vector<typename Triangulation<dim,spacedim>::cell_iterator> >
+        adjacent_cells;
+
+        // finally make up cells
+        {
+          typename Triangulation<dim,spacedim>::raw_cell_iterator
+          cell = triangulation.begin_raw_hex();
+          for (unsigned int c=0; c<cells.size(); ++c, ++cell)
+            {
+              // first find for each of
+              // the cells the quad
+              // iterator of the
+              // respective faces.
+              //
+              // to this end, set up the
+              // lines of this cell and
+              // find the quads that are
+              // bounded by these lines;
+              // these are then the faces
+              // of the present cell
+              std::pair<int,int> line_list[GeometryInfo<dim>::lines_per_cell],
+                  inverse_line_list[GeometryInfo<dim>::lines_per_cell];
+              unsigned int face_line_list[4];
+              for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                {
+                  line_list[line]=std::make_pair(
+                                    cells[c].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 0)],
+                                    cells[c].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 1)]);
+                  inverse_line_list[line]=std::pair<int,int> (
+                                            cells[c].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 1)],
+                                            cells[c].vertices[GeometryInfo<dim>::line_to_cell_vertices(line, 0)]);
+                }
+
+              // get the iterators
+              // corresponding to the
+              // faces. also store
+              // whether they are
+              // reversed or not
+              typename Triangulation<dim,spacedim>::quad_iterator
+              face_iterator[GeometryInfo<dim>::faces_per_cell];
+              bool face_orientation[GeometryInfo<dim>::faces_per_cell];
+              bool face_flip[GeometryInfo<dim>::faces_per_cell];
+              bool face_rotation[GeometryInfo<dim>::faces_per_cell];
+              for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                {
+                  for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_face; ++l)
+                    if (needed_lines.find (inverse_line_list[GeometryInfo<dim>::
+                                                             face_to_cell_lines(face,l)]) == needed_lines.end())
+                      face_line_list[l]=needed_lines[line_list[GeometryInfo<dim>::
+                                                               face_to_cell_lines(face,l)]]->index();
+                    else
+                      face_line_list[l]=needed_lines[inverse_line_list[GeometryInfo<dim>::
+                                                                       face_to_cell_lines(face,l)]]->index();
+
+                  internal::Triangulation::TriaObject<2>
+                  quad(face_line_list[0],
+                       face_line_list[1],
+                       face_line_list[2],
+                       face_line_list[3]);
+
+                  if (needed_quads.find (quad) != needed_quads.end())
+                    {
+                      // face is in standard
+                      // orientation (and not
+                      // flipped or rotated). this
+                      // must be true for at least
+                      // one of the two cells
+                      // containing this face
+                      // (i.e. for the cell which
+                      // originally inserted the
+                      // face)
+                      face_iterator[face] = needed_quads[quad].first;
+                      face_orientation[face] = true;
+                      face_flip[face]=false;
+                      face_rotation[face]=false;
+                    }
+                  else
+                    {
+                      // face must be available in
+                      // reverse order
+                      // then. construct all
+                      // possibilities and check
+                      // them one after the other
+                      const internal::Triangulation::TriaObject<2>
+                      test_quad_1(quad.face(2), quad.face(3),
+                                  quad.face(0), quad.face(1)),//face_orientation=false, face_flip=false, face_rotation=false
+                                            test_quad_2(quad.face(0), quad.face(1),
+                                                        quad.face(3), quad.face(2)),//face_orientation=false, face_flip=false, face_rotation=true
+                                            test_quad_3(quad.face(3), quad.face(2),
+                                                        quad.face(1), quad.face(0)),//face_orientation=false, face_flip=true,  face_rotation=false
+                                            test_quad_4(quad.face(1), quad.face(0),
+                                                        quad.face(2), quad.face(3)),//face_orientation=false, face_flip=true,  face_rotation=true
+                                            test_quad_5(quad.face(2), quad.face(3),
+                                                        quad.face(1), quad.face(0)),//face_orientation=true,  face_flip=false, face_rotation=true
+                                            test_quad_6(quad.face(1), quad.face(0),
+                                                        quad.face(3), quad.face(2)),//face_orientation=true,  face_flip=true,  face_rotation=false
+                                            test_quad_7(quad.face(3), quad.face(2),
+                                                        quad.face(0), quad.face(1));//face_orientation=true,  face_flip=true,  face_rotation=true
+                      if (needed_quads.find (test_quad_1) != needed_quads.end())
+                        {
+                          face_iterator[face] = needed_quads[test_quad_1].first;
+                          face_orientation[face] = false;
+                          face_flip[face]=false;
+                          face_rotation[face]=false;
+                        }
+                      else if (needed_quads.find (test_quad_2) != needed_quads.end())
+                        {
+                          face_iterator[face] = needed_quads[test_quad_2].first;
+                          face_orientation[face] = false;
+                          face_flip[face]=false;
+                          face_rotation[face]=true;
+                        }
+                      else if (needed_quads.find (test_quad_3) != needed_quads.end())
+                        {
+                          face_iterator[face] = needed_quads[test_quad_3].first;
+                          face_orientation[face] = false;
+                          face_flip[face]=true;
+                          face_rotation[face]=false;
+                        }
+                      else if (needed_quads.find (test_quad_4) != needed_quads.end())
+                        {
+                          face_iterator[face] = needed_quads[test_quad_4].first;
+                          face_orientation[face] = false;
+                          face_flip[face]=true;
+                          face_rotation[face]=true;
+                        }
+                      else if (needed_quads.find (test_quad_5) != needed_quads.end())
+                        {
+                          face_iterator[face] = needed_quads[test_quad_5].first;
+                          face_orientation[face] = true;
+                          face_flip[face]=false;
+                          face_rotation[face]=true;
+                        }
+                      else if (needed_quads.find (test_quad_6) != needed_quads.end())
+                        {
+                          face_iterator[face] = needed_quads[test_quad_6].first;
+                          face_orientation[face] = true;
+                          face_flip[face]=true;
+                          face_rotation[face]=false;
+                        }
+                      else if (needed_quads.find (test_quad_7) != needed_quads.end())
+                        {
+                          face_iterator[face] = needed_quads[test_quad_7].first;
+                          face_orientation[face] = true;
+                          face_flip[face]=true;
+                          face_rotation[face]=true;
+                        }
+
+                      else
+                        // we didn't find the
+                        // face in any direction,
+                        // so something went
+                        // wrong above
+                        Assert(false,ExcInternalError());
+
+                    }
+                }// for all faces
+
+              // make the cell out of
+              // these iterators
+              cell->set (internal::Triangulation
+                         ::TriaObject<3> (face_iterator[0]->index(),
+                                          face_iterator[1]->index(),
+                                          face_iterator[2]->index(),
+                                          face_iterator[3]->index(),
+                                          face_iterator[4]->index(),
+                                          face_iterator[5]->index()));
+
+              cell->set_used_flag ();
+              cell->set_material_id (cells[c].material_id);
+              cell->set_manifold_id (cells[c].manifold_id);
+              cell->clear_user_flag ();
+              cell->clear_user_data ();
+              cell->set_subdomain_id (0);
+
+              // set orientation flag for
+              // each of the faces
+              for (unsigned int quad=0; quad<GeometryInfo<dim>::faces_per_cell; ++quad)
+                {
+                  cell->set_face_orientation (quad, face_orientation[quad]);
+                  cell->set_face_flip (quad, face_flip[quad]);
+                  cell->set_face_rotation (quad, face_rotation[quad]);
+                }
+
+
+              // note that this cell is
+              // adjacent to the six
+              // quads
+              for (unsigned int quad=0; quad<GeometryInfo<dim>::faces_per_cell; ++quad)
+                adjacent_cells[face_iterator[quad]->index()].push_back (cell);
+
+#ifdef DEBUG
+              // make some checks on the
+              // lines and their
+              // ordering
+
+              // first map all cell lines
+              // to the two face lines
+              // which should
+              // coincide. all face lines
+              // are included with a cell
+              // line number (0-11)
+              // key. At the end all keys
+              // will be included twice
+              // (for each of the two
+              // coinciding lines once)
+              std::multimap<unsigned int, std::pair<unsigned int, unsigned int> >
+              cell_to_face_lines;
+              for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_face; ++line)
+                  cell_to_face_lines.insert(
+                    std::pair<unsigned int, std::pair<unsigned int, unsigned int> > (
+                      GeometryInfo<dim>::face_to_cell_lines(face,line),
+                      std::pair<unsigned int, unsigned int> (face,line)));
+              std::multimap<unsigned int, std::pair<unsigned int, unsigned int> >::const_iterator
+              map_iter=cell_to_face_lines.begin();
+
+              for (; map_iter!=cell_to_face_lines.end(); ++map_iter)
+                {
+                  const unsigned int cell_line=map_iter->first;
+                  const unsigned int face1=map_iter->second.first;
+                  const unsigned int line1=map_iter->second.second;
+                  ++map_iter;
+                  Assert(map_iter!=cell_to_face_lines.end(), ExcInternalErrorOnCell(c));
+                  Assert(map_iter->first==cell_line, ExcInternalErrorOnCell(c));
+                  const unsigned int face2=map_iter->second.first;
+                  const unsigned int line2=map_iter->second.second;
+
+                  // check that the pair
+                  // of lines really
+                  // coincide. Take care
+                  // about the face
+                  // orientation;
+                  Assert (face_iterator[face1]->line(GeometryInfo<dim>::standard_to_real_face_line(
+                                                       line1,
+                                                       face_orientation[face1],
+                                                       face_flip[face1],
+                                                       face_rotation[face1])) ==
+                          face_iterator[face2]->line(GeometryInfo<dim>::standard_to_real_face_line(
+                                                       line2,
+                                                       face_orientation[face2],
+                                                       face_flip[face2],
+                                                       face_rotation[face2])),
+                          ExcInternalErrorOnCell(c));
+                }
+#endif
+            }
+        }
+
+
+        /////////////////////////////////////////
+        // find those quads which are at the
+        // boundary and mark them appropriately
+        for (typename Triangulation<dim,spacedim>::quad_iterator
+             quad=triangulation.begin_quad(); quad!=triangulation.end_quad(); ++quad)
+          {
+            const unsigned int n_adj_cells = adjacent_cells[quad->index()].size();
+            // assert that every quad has
+            // one or two adjacent cells
+            AssertThrow ((n_adj_cells >= 1) &&
+                         (n_adj_cells <= 2),
+                         ExcInternalError());
+
+            // if only one cell: quad is at
+            // boundary -> give it the
+            // boundary indicator zero by
+            // default
+            if (n_adj_cells == 1)
+              quad->set_boundary_id (0);
+            else
+              // interior quad -> numbers::internal_face_boundary_id
+              quad->set_boundary_id (numbers::internal_face_boundary_id);
+            // Manifold ids are set
+            // independently of where
+            // they are
+            quad->set_manifold_id(numbers::flat_manifold_id);
+          }
+
+        /////////////////////////////////////////
+        // next find those lines which are at
+        // the boundary and mark all others as
+        // interior ones
+        //
+        // for this: first mark all lines as interior. use this loop
+        // to also set all manifold ids of all lines
+        for (typename Triangulation<dim,spacedim>::line_iterator
+             line=triangulation.begin_line(); line!=triangulation.end_line(); ++line)
+          {
+            line->set_boundary_id (numbers::internal_face_boundary_id);
+            line->set_manifold_id(numbers::flat_manifold_id);
+          }
+
+        // next reset all lines bounding
+        // boundary quads as on the
+        // boundary also. note that since
+        // we are in 3d, there are cases
+        // where one or more lines of a
+        // quad that is not on the
+        // boundary, are actually boundary
+        // lines. they will not be marked
+        // when visiting this
+        // face. however, since we do not
+        // support dim-2 dimensional
+        // boundaries (i.e. internal lines
+        // constituting boundaries), every
+        // such line is also part of a face
+        // that is actually on the
+        // boundary, so sooner or later we
+        // get to mark that line for being
+        // on the boundary
+        for (typename Triangulation<dim,spacedim>::quad_iterator
+             quad=triangulation.begin_quad(); quad!=triangulation.end_quad(); ++quad)
+          if (quad->at_boundary())
+            for (unsigned int l=0; l<4; ++l)
+              quad->line(l)->set_boundary_id (0);
+
+        ///////////////////////////////////////
+        // now set boundary indicators
+        // where given
+        //
+        // first do so for lines
+        std::vector<CellData<1> >::const_iterator boundary_line
+          = subcelldata.boundary_lines.begin();
+        std::vector<CellData<1> >::const_iterator end_boundary_line
+          = subcelldata.boundary_lines.end();
+        for (; boundary_line!=end_boundary_line; ++boundary_line)
+          {
+            typename Triangulation<dim,spacedim>::line_iterator line;
+            std::pair <int, int> line_vertices(std::make_pair(boundary_line->vertices[0],
+                                                              boundary_line->vertices[1]));
+            if (needed_lines.find(line_vertices) != needed_lines.end())
+              // line found in this
+              // direction
+              line = needed_lines[line_vertices];
+
+            else
+              {
+                // look whether it exists in
+                // reverse direction
+                std::swap (line_vertices.first, line_vertices.second);
+                if (needed_lines.find(line_vertices) != needed_lines.end())
+                  line = needed_lines[line_vertices];
+                else
+                  // line does not exist
+                  AssertThrow (false, ExcLineInexistant(line_vertices.first,
+                                                        line_vertices.second));
+              }
+            // Assert that only exterior
+            // lines are given a boundary
+            // indicator
+            AssertThrow (line->at_boundary(),
+                         ExcInteriorLineCantBeBoundary(line->vertex_index(0),
+                                                       line->vertex_index(1),
+                                                       boundary_line->boundary_id));
+
+            // and make sure that we don't
+            // attempt to reset the
+            // boundary indicator to a
+            // different than the
+            // previously set value
+            if (line->boundary_id() != 0)
+              AssertThrow (line->boundary_id() == boundary_line->boundary_id,
+                           ExcMessage ("Duplicate boundary lines are only allowed "
+                                       "if they carry the same boundary indicator."));
+
+            line->set_boundary_id (boundary_line->boundary_id);
+            // Set manifold id if given
+            line->set_manifold_id(boundary_line->manifold_id);
+          }
+
+
+        // now go on with boundary faces
+        std::vector<CellData<2> >::const_iterator boundary_quad
+          = subcelldata.boundary_quads.begin();
+        std::vector<CellData<2> >::const_iterator end_boundary_quad
+          = subcelldata.boundary_quads.end();
+        for (; boundary_quad!=end_boundary_quad; ++boundary_quad)
+          {
+            typename Triangulation<dim,spacedim>::quad_iterator quad;
+            typename Triangulation<dim,spacedim>::line_iterator line[4];
+
+            // first find the lines that
+            // are made up of the given
+            // vertices, then build up a
+            // quad from these lines
+            // finally use the find
+            // function of the map template
+            // to find the quad
+            for (unsigned int i=0; i<4; ++i)
+              {
+                std::pair<int, int> line_vertices(
+                  boundary_quad->vertices[GeometryInfo<dim-1>::line_to_cell_vertices(i,0)],
+                  boundary_quad->vertices[GeometryInfo<dim-1>::line_to_cell_vertices(i,1)]);
+
+                // check whether line
+                // already exists
+                if (needed_lines.find(line_vertices) != needed_lines.end())
+                  line[i] = needed_lines[line_vertices];
+                else
+                  // look whether it exists
+                  // in reverse direction
+                  {
+                    std::swap (line_vertices.first, line_vertices.second);
+                    if (needed_lines.find(line_vertices) != needed_lines.end())
+                      line[i] = needed_lines[line_vertices];
+                    else
+                      // line does
+                      // not exist
+                      AssertThrow (false, ExcLineInexistant(line_vertices.first,
+                                                            line_vertices.second));
+                  }
+              }
+
+
+            // Set up 2 quads that are
+            // built up from the lines for
+            // reasons of comparison to
+            // needed_quads.  The second
+            // quad is the reversed version
+            // of the first quad in order
+            // find the quad regardless of
+            // its orientation.  This is
+            // introduced for convenience
+            // and because boundary quad
+            // orientation does not carry
+            // any information.
+            internal::Triangulation::TriaObject<2>
+            quad_compare_1(line[0]->index(), line[1]->index(),
+                           line[2]->index(), line[3]->index());
+            internal::Triangulation::TriaObject<2>
+            quad_compare_2(line[2]->index(), line[3]->index(),
+                           line[0]->index(), line[1]->index());
+
+            // try to find the quad with
+            // lines situated as
+            // constructed above.  if it
+            // could not be found, rotate
+            // the boundary lines 3 times
+            // until it is found or it does
+            // not exist.
+
+            // mapping from counterclock to
+            // lexicographic ordering of
+            // quad lines
+            static const unsigned int lex2cclock[4]= {3,1,0,2};
+            // copy lines from
+            // lexicographic to
+            // counterclock ordering, as
+            // rotation is much simpler in
+            // counterclock ordering
+            typename Triangulation<dim,spacedim>::line_iterator
+            line_counterclock[4];
+            for (unsigned int i=0; i<4; ++i)
+              line_counterclock[lex2cclock[i]]=line[i];
+            unsigned int n_rotations=0;
+            bool not_found_quad_1;
+            while ( (not_found_quad_1=(needed_quads.find(quad_compare_1) == needed_quads.end())) &&
+                    (                  needed_quads.find(quad_compare_2) == needed_quads.end()) &&
+                    (n_rotations<4))
+              {
+                // use the rotate defined
+                // in <algorithms>
+                rotate(line_counterclock, line_counterclock+1, line_counterclock+4);
+                // update the quads with
+                // rotated lines (i runs in
+                // lexicographic ordering)
+                for (unsigned int i=0; i<4; ++i)
+                  {
+                    quad_compare_1.set_face(i,       line_counterclock[lex2cclock[i]]->index());
+                    quad_compare_2.set_face((i+2)%4, line_counterclock[lex2cclock[i]]->index());
+                  }
+
+                ++n_rotations;
+              }
+
+            AssertThrow (n_rotations!=4,
+                         ExcQuadInexistant(line[0]->index(), line[1]->index(),
+                                           line[2]->index(), line[3]->index()));
+
+            if (not_found_quad_1)
+              quad = needed_quads[quad_compare_2].first;
+            else
+              quad = needed_quads[quad_compare_1].first;
+
+            // check whether this face is
+            // really an exterior one
+            AssertThrow (quad->at_boundary(),
+                         ExcInteriorQuadCantBeBoundary(quad->vertex_index(0),
+                                                       quad->vertex_index(1),
+                                                       quad->vertex_index(2),
+                                                       quad->vertex_index(3),
+                                                       boundary_quad->boundary_id));
+
+            // and make sure that we don't
+            // attempt to reset the
+            // boundary indicator to a
+            // different than the
+            // previously set value
+            if (quad->boundary_id() != 0)
+              AssertThrow (quad->boundary_id() == boundary_quad->boundary_id,
+                           ExcMessage ("Duplicate boundary quads are only allowed "
+                                       "if they carry the same boundary indicator."));
+
+            quad->set_boundary_id (boundary_quad->boundary_id);
+            quad->set_manifold_id (boundary_quad->manifold_id);
+          }
+
+
+        /////////////////////////////////////////
+        // finally update neighborship info
+        for (typename Triangulation<dim,spacedim>::cell_iterator
+             cell=triangulation.begin(); cell!=triangulation.end(); ++cell)
+          for (unsigned int face=0; face<6; ++face)
+            if (adjacent_cells[cell->quad(face)->index()][0] == cell)
+              // first adjacent cell is
+              // this one
+              {
+                if (adjacent_cells[cell->quad(face)->index()].size() == 2)
+                  // there is another
+                  // adjacent cell
+                  cell->set_neighbor (face,
+                                      adjacent_cells[cell->quad(face)->index()][1]);
+              }
+        // first adjacent cell is not this
+        // one, -> it must be the neighbor
+        // we are looking for
+            else
+              cell->set_neighbor (face,
+                                  adjacent_cells[cell->quad(face)->index()][0]);
+      }
+
+
+      /**
+       * Actually delete a cell, or rather all
+       * its children, which is the main step for
+       * the coarsening process.  This is the
+       * dimension dependent part of @p
+       * execute_coarsening. The second argument
+       * is a vector which gives for each line
+       * index the number of cells containing
+       * this line. This information is needed to
+       * decide whether a refined line may be
+       * coarsened or not in 3D. In 1D and 2D
+       * this argument is not needed and thus
+       * ignored. The same applies for the last
+       * argument and quads instead of lines.
+       */
+      template <int spacedim>
+      static
+      void
+      delete_children (Triangulation<1,spacedim> &triangulation,
+                       typename Triangulation<1,spacedim>::cell_iterator &cell,
+                       std::vector<unsigned int> &,
+                       std::vector<unsigned int> &)
+      {
+        const unsigned int dim = 1;
+
+        // first we need to reset the
+        // neighbor pointers of the
+        // neighbors of this cell's
+        // children to this cell. This is
+        // different for one dimension,
+        // since there neighbors can have a
+        // refinement level differing from
+        // that of this cell's children by
+        // more than one level.
+
+        Assert (!cell->child(0)->has_children() && !cell->child(1)->has_children(),
+                ExcInternalError());
+
+        // first do it for the cells to the
+        // left
+        if (cell->neighbor(0).state() == IteratorState::valid)
+          if (cell->neighbor(0)->has_children())
+            {
+              typename Triangulation<dim,spacedim>::cell_iterator
+              neighbor = cell->neighbor(0);
+              Assert (neighbor->level() == cell->level(), ExcInternalError());
+
+              // right child
+              neighbor = neighbor->child(1);
+              while (1)
+                {
+                  Assert (neighbor->neighbor(1) == cell->child(0),
+                          ExcInternalError());
+                  neighbor->set_neighbor (1, cell);
+
+                  // move on to further
+                  // children on the
+                  // boundary between this
+                  // cell and its neighbor
+                  if (neighbor->has_children())
+                    neighbor = neighbor->child(1);
+                  else
+                    break;
+                }
+            }
+
+        // now do it for the cells to the
+        // left
+        if (cell->neighbor(1).state() == IteratorState::valid)
+          if (cell->neighbor(1)->has_children())
+            {
+              typename Triangulation<dim,spacedim>::cell_iterator
+              neighbor = cell->neighbor(1);
+              Assert (neighbor->level() == cell->level(), ExcInternalError());
+
+              // left child
+              neighbor = neighbor->child(0);
+              while (1)
+                {
+                  Assert (neighbor->neighbor(0) == cell->child(1),
+                          ExcInternalError());
+                  neighbor->set_neighbor (0, cell);
+
+                  // move on to further
+                  // children on the
+                  // boundary between this
+                  // cell and its neighbor
+                  if (neighbor->has_children())
+                    neighbor = neighbor->child(0);
+                  else
+                    break;
+                }
+            }
+
+
+        // delete the vertex which will not
+        // be needed anymore. This vertex
+        // is the second of the first child
+        triangulation.vertices_used[cell->child(0)->vertex_index(1)] = false;
+
+        // invalidate children.  clear user
+        // pointers, to avoid that they may
+        // appear at unwanted places later
+        // on...
+        for (unsigned int child=0; child<cell->n_children(); ++child)
+          {
+            cell->child(child)->clear_user_data();
+            cell->child(child)->clear_user_flag();
+            cell->child(child)->clear_used_flag();
+          }
+
+
+        // delete pointer to children
+        cell->clear_children ();
+        cell->clear_user_flag();
+      }
+
+
+
+      template <int spacedim>
+      static
+      void
+      delete_children (Triangulation<2,spacedim> &triangulation,
+                       typename Triangulation<2,spacedim>::cell_iterator &cell,
+                       std::vector<unsigned int> &line_cell_count,
+                       std::vector<unsigned int> &)
+      {
+        const unsigned int dim=2;
+        const RefinementCase<dim> ref_case=cell->refinement_case();
+
+        Assert(line_cell_count.size()==triangulation.n_raw_lines(), ExcInternalError());
+
+        // vectors to hold all lines which
+        // may be deleted
+        std::vector<typename Triangulation<dim,spacedim>::line_iterator>
+        lines_to_delete(0);
+
+        lines_to_delete.reserve(4*2+4);
+
+        // now we decrease the counters for
+        // lines contained in the child
+        // cells
+        for (unsigned int c=0; c<cell->n_children(); ++c)
+          {
+            typename Triangulation<dim,spacedim>::cell_iterator
+            child=cell->child(c);
+            for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+              --line_cell_count[child->line_index(l)];
+          }
+
+
+        // delete the vertex which will not
+        // be needed anymore. This vertex
+        // is the second of the second line
+        // of the first child, if the cell
+        // is refined with cut_xy, else there
+        // is no inner vertex.
+        // additionally delete unneeded inner
+        // lines
+        if (ref_case==RefinementCase<dim>::cut_xy)
+          {
+            triangulation.vertices_used[cell->child(0)->line(1)->vertex_index(1)] = false;
+
+            lines_to_delete.push_back(cell->child(0)->line(1));
+            lines_to_delete.push_back(cell->child(0)->line(3));
+            lines_to_delete.push_back(cell->child(3)->line(0));
+            lines_to_delete.push_back(cell->child(3)->line(2));
+          }
+        else
+          {
+            unsigned int inner_face_no=ref_case==RefinementCase<dim>::cut_x ? 1 : 3;
+
+            // the inner line will not be
+            // used any more
+            lines_to_delete.push_back(cell->child(0)->line(inner_face_no));
+          }
+
+        // invalidate children
+        for (unsigned int child=0; child<cell->n_children(); ++child)
+          {
+            cell->child(child)->clear_user_data();
+            cell->child(child)->clear_user_flag();
+            cell->child(child)->clear_used_flag();
+          }
+
+
+        // delete pointer to children
+        cell->clear_children ();
+        cell->clear_refinement_case();
+        cell->clear_user_flag();
+
+        // look at the refinement of outer
+        // lines. if nobody needs those
+        // anymore we can add them to the
+        // list of lines to be deleted.
+        for (unsigned int line_no=0; line_no<GeometryInfo<dim>::lines_per_cell; ++line_no)
+          {
+            typename Triangulation<dim,spacedim>::line_iterator
+            line=cell->line(line_no);
+
+            if (line->has_children())
+              {
+                // if one of the cell counters is
+                // zero, the other has to be as well
+
+                Assert((line_cell_count[line->child_index(0)] == 0 &&
+                        line_cell_count[line->child_index(1)] == 0) ||
+                       (line_cell_count[line->child_index(0)] > 0 &&
+                        line_cell_count[line->child_index(1)] > 0),
+                       ExcInternalError());
+
+                if (line_cell_count[line->child_index(0)]==0)
+                  {
+                    for (unsigned int c=0; c<2; ++c)
+                      Assert (!line->child(c)->has_children(),
+                              ExcInternalError());
+
+                    // we may delete the line's
+                    // children and the middle vertex
+                    // as no cell references them
+                    // anymore
+                    triangulation.vertices_used[line->child(0)->vertex_index(1)] = false;
+
+                    lines_to_delete.push_back(line->child(0));
+                    lines_to_delete.push_back(line->child(1));
+
+                    line->clear_children();
+                  }
+              }
+          }
+
+        // finally, delete unneeded lines
+
+        // clear user pointers, to avoid that
+        // they may appear at unwanted places
+        // later on...
+        // same for user flags, then finally
+        // delete the lines
+        typename std::vector<typename Triangulation<dim,spacedim>::line_iterator>::iterator
+        line=lines_to_delete.begin(),
+        endline=lines_to_delete.end();
+        for (; line!=endline; ++line)
+          {
+            (*line)->clear_user_data();
+            (*line)->clear_user_flag();
+            (*line)->clear_used_flag();
+          }
+      }
+
+
+
+      template <int spacedim>
+      static
+      void
+      delete_children (Triangulation<3,spacedim> &triangulation,
+                       typename Triangulation<3,spacedim>::cell_iterator &cell,
+                       std::vector<unsigned int> &line_cell_count,
+                       std::vector<unsigned int> &quad_cell_count)
+      {
+        const unsigned int dim=3;
+
+        Assert(line_cell_count.size()==triangulation.n_raw_lines(), ExcInternalError());
+        Assert(quad_cell_count.size()==triangulation.n_raw_quads(), ExcInternalError());
+
+        // first of all, we store the RefineCase of
+        // this cell
+        const RefinementCase<dim> ref_case=cell->refinement_case();
+        // vectors to hold all lines and quads which
+        // may be deleted
+        std::vector<typename Triangulation<dim,spacedim>::line_iterator>
+        lines_to_delete(0);
+        std::vector<typename Triangulation<dim,spacedim>::quad_iterator>
+        quads_to_delete(0);
+
+        lines_to_delete.reserve(12*2+6*4+6);
+        quads_to_delete.reserve(6*4+12);
+
+        // now we decrease the counters for lines and
+        // quads contained in the child cells
+        for (unsigned int c=0; c<cell->n_children(); ++c)
+          {
+            typename Triangulation<dim,spacedim>::cell_iterator
+            child=cell->child(c);
+            for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+              --line_cell_count[child->line_index(l)];
+            for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+              --quad_cell_count[child->quad_index(f)];
+          }
+
+        ///////////////////////////////////////
+        // delete interior quads and lines and the
+        // interior vertex, depending on the
+        // refinement case of the cell
+        //
+        // for append quads and lines: only append
+        // them to the list of objects to be deleted
+
+        switch (ref_case)
+          {
+          case RefinementCase<dim>::cut_x:
+            quads_to_delete.push_back(cell->child(0)->face(1));
+            break;
+          case RefinementCase<dim>::cut_y:
+            quads_to_delete.push_back(cell->child(0)->face(3));
+            break;
+          case RefinementCase<dim>::cut_z:
+            quads_to_delete.push_back(cell->child(0)->face(5));
+            break;
+          case RefinementCase<dim>::cut_xy:
+            quads_to_delete.push_back(cell->child(0)->face(1));
+            quads_to_delete.push_back(cell->child(0)->face(3));
+            quads_to_delete.push_back(cell->child(3)->face(0));
+            quads_to_delete.push_back(cell->child(3)->face(2));
+
+            lines_to_delete.push_back(cell->child(0)->line(11));
+            break;
+          case RefinementCase<dim>::cut_xz:
+            quads_to_delete.push_back(cell->child(0)->face(1));
+            quads_to_delete.push_back(cell->child(0)->face(5));
+            quads_to_delete.push_back(cell->child(3)->face(0));
+            quads_to_delete.push_back(cell->child(3)->face(4));
+
+            lines_to_delete.push_back(cell->child(0)->line(5));
+            break;
+          case RefinementCase<dim>::cut_yz:
+            quads_to_delete.push_back(cell->child(0)->face(3));
+            quads_to_delete.push_back(cell->child(0)->face(5));
+            quads_to_delete.push_back(cell->child(3)->face(2));
+            quads_to_delete.push_back(cell->child(3)->face(4));
+
+            lines_to_delete.push_back(cell->child(0)->line(7));
+            break;
+          case RefinementCase<dim>::cut_xyz:
+            quads_to_delete.push_back(cell->child(0)->face(1));
+            quads_to_delete.push_back(cell->child(2)->face(1));
+            quads_to_delete.push_back(cell->child(4)->face(1));
+            quads_to_delete.push_back(cell->child(6)->face(1));
+
+            quads_to_delete.push_back(cell->child(0)->face(3));
+            quads_to_delete.push_back(cell->child(1)->face(3));
+            quads_to_delete.push_back(cell->child(4)->face(3));
+            quads_to_delete.push_back(cell->child(5)->face(3));
+
+            quads_to_delete.push_back(cell->child(0)->face(5));
+            quads_to_delete.push_back(cell->child(1)->face(5));
+            quads_to_delete.push_back(cell->child(2)->face(5));
+            quads_to_delete.push_back(cell->child(3)->face(5));
+
+            lines_to_delete.push_back(cell->child(0)->line(5));
+            lines_to_delete.push_back(cell->child(0)->line(7));
+            lines_to_delete.push_back(cell->child(0)->line(11));
+            lines_to_delete.push_back(cell->child(7)->line(0));
+            lines_to_delete.push_back(cell->child(7)->line(2));
+            lines_to_delete.push_back(cell->child(7)->line(8));
+            // delete the vertex which will not
+            // be needed anymore. This vertex
+            // is the vertex at the heart of
+            // this cell, which is the sixth of
+            // the first child
+            triangulation.vertices_used[cell->child(0)->vertex_index(7)] = false;
+            break;
+          default:
+            // only remaining case is
+            // no_refinement, thus an error
+            Assert(false, ExcInternalError());
+            break;
+          }
+
+
+        // invalidate children
+        for (unsigned int child=0; child<cell->n_children(); ++child)
+          {
+            cell->child(child)->clear_user_data();
+            cell->child(child)->clear_user_flag();
+
+            for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+              {
+                // set flags denoting deviations from
+                // standard orientation of faces back
+                // to initialization values
+                cell->child(child)->set_face_orientation (f, true);
+                cell->child(child)->set_face_flip(f,false);
+                cell->child(child)->set_face_rotation(f,false);
+              }
+
+            cell->child(child)->clear_used_flag();
+          }
+
+
+        // delete pointer to children
+        cell->clear_children ();
+        cell->clear_refinement_case ();
+        cell->clear_user_flag();
+
+        // so far we only looked at inner quads,
+        // lines and vertices. Now we have to
+        // consider outer ones as well. here, we have
+        // to check, whether there are other cells
+        // still needing these objects. oherwise we
+        // can delete them. first for quads (and
+        // their inner lines).
+
+        for (unsigned int quad_no=0; quad_no<GeometryInfo<dim>::faces_per_cell; ++quad_no)
+          {
+            typename Triangulation<dim,spacedim>::quad_iterator
+            quad=cell->face(quad_no);
+
+            Assert((GeometryInfo<dim>::face_refinement_case(ref_case,quad_no) && quad->has_children()) ||
+                   GeometryInfo<dim>::face_refinement_case(ref_case,quad_no)==RefinementCase<dim-1>::no_refinement,
+                   ExcInternalError());
+
+            switch (quad->refinement_case())
+              {
+              case RefinementCase<dim-1>::no_refinement:
+                // nothing to do as the quad
+                // is not refined
+                break;
+              case RefinementCase<dim-1>::cut_x:
+              case RefinementCase<dim-1>::cut_y:
+              {
+                // if one of the cell counters is
+                // zero, the other has to be as
+                // well
+                Assert((quad_cell_count[quad->child_index(0)] == 0 &&
+                        quad_cell_count[quad->child_index(1)] == 0) ||
+                       (quad_cell_count[quad->child_index(0)] > 0 &&
+                        quad_cell_count[quad->child_index(1)] > 0),
+                       ExcInternalError());
+                // it might be, that the quad is
+                // refined twice anisotropically,
+                // first check, whether we may
+                // delete possible grand_children
+                unsigned int deleted_grandchildren=0;
+                unsigned int number_of_child_refinements=0;
+
+                for (unsigned int c=0; c<2; ++c)
+                  if (quad->child(c)->has_children())
+                    {
+                      ++number_of_child_refinements;
+                      // if one of the cell counters is
+                      // zero, the other has to be as
+                      // well
+                      Assert((quad_cell_count[quad->child(c)->child_index(0)] == 0 &&
+                              quad_cell_count[quad->child(c)->child_index(1)] == 0) ||
+                             (quad_cell_count[quad->child(c)->child_index(0)] > 0 &&
+                              quad_cell_count[quad->child(c)->child_index(1)] > 0),
+                             ExcInternalError());
+                      if (quad_cell_count[quad->child(c)->child_index(0)]==0)
+                        {
+                          // Assert, that the two
+                          // anisotropic
+                          // refinements add up to
+                          // isotropic refinement
+                          Assert(quad->refinement_case()+quad->child(c)->refinement_case()==RefinementCase<dim>::cut_xy,
+                                 ExcInternalError());
+                          // we may delete the
+                          // quad's children and
+                          // the inner line as no
+                          // cell references them
+                          // anymore
+                          quads_to_delete.push_back(quad->child(c)->child(0));
+                          quads_to_delete.push_back(quad->child(c)->child(1));
+                          if (quad->child(c)->refinement_case()==RefinementCase<2>::cut_x)
+                            lines_to_delete.push_back(quad->child(c)->child(0)->line(1));
+                          else
+                            lines_to_delete.push_back(quad->child(c)->child(0)->line(3));
+                          quad->child(c)->clear_children();
+                          quad->child(c)->clear_refinement_case();
+                          ++deleted_grandchildren;
+                        }
+                    }
+                // if no grandchildren are left, we
+                // may as well delete the
+                // refinement of the inner line
+                // between our children and the
+                // corresponding vertex
+                if (number_of_child_refinements>0 &&
+                    deleted_grandchildren==number_of_child_refinements)
+                  {
+                    typename Triangulation<dim,spacedim>::line_iterator
+                    middle_line;
+                    if (quad->refinement_case()==RefinementCase<2>::cut_x)
+                      middle_line=quad->child(0)->line(1);
+                    else
+                      middle_line=quad->child(0)->line(3);
+
+                    lines_to_delete.push_back(middle_line->child(0));
+                    lines_to_delete.push_back(middle_line->child(1));
+                    triangulation.vertices_used[middle_vertex_index<dim,spacedim>(middle_line)]
+                      = false;
+                    middle_line->clear_children();
+                  }
+
+                // now consider the direct children
+                // of the given quad
+                if (quad_cell_count[quad->child_index(0)]==0)
+                  {
+                    // we may delete the quad's
+                    // children and the inner line
+                    // as no cell references them
+                    // anymore
+                    quads_to_delete.push_back(quad->child(0));
+                    quads_to_delete.push_back(quad->child(1));
+                    if (quad->refinement_case()==RefinementCase<2>::cut_x)
+                      lines_to_delete.push_back(quad->child(0)->line(1));
+                    else
+                      lines_to_delete.push_back(quad->child(0)->line(3));
+
+                    // if the counters just dropped
+                    // to zero, otherwise the
+                    // children would have been
+                    // deleted earlier, then this
+                    // cell's children must have
+                    // contained the anisotropic
+                    // quad children. thus, if
+                    // those have again anisotropic
+                    // children, which are in
+                    // effect isotropic children of
+                    // the original quad, those are
+                    // still needed by a
+                    // neighboring cell and we
+                    // cannot delete them. instead,
+                    // we have to reset this quad's
+                    // refine case to isotropic and
+                    // set the children
+                    // accordingly.
+                    if (quad->child(0)->has_children())
+                      if (quad->refinement_case()==RefinementCase<2>::cut_x)
+                        {
+                          // now evereything is
+                          // quite complicated. we
+                          // have the children
+                          // numbered according to
+                          //
+                          // *---*---*
+                          // |n+1|m+1|
+                          // *---*---*
+                          // | n | m |
+                          // *---*---*
+                          //
+                          // from the original
+                          // anisotropic
+                          // refinement. we have to
+                          // reorder them as
+                          //
+                          // *---*---*
+                          // | m |m+1|
+                          // *---*---*
+                          // | n |n+1|
+                          // *---*---*
+                          //
+                          // for isotropic refinement.
+                          //
+                          // this is a bit ugly, of
+                          // course: loop over all
+                          // cells on all levels
+                          // and look for faces n+1
+                          // (switch_1) and m
+                          // (switch_2).
+                          const typename Triangulation<dim,spacedim>::quad_iterator
+                          switch_1=quad->child(0)->child(1),
+                          switch_2=quad->child(1)->child(0);
+
+                          Assert(!switch_1->has_children(), ExcInternalError());
+                          Assert(!switch_2->has_children(), ExcInternalError());
+
+                          const int switch_1_index=switch_1->index();
+                          const int switch_2_index=switch_2->index();
+                          for (unsigned int l=0; l<triangulation.levels.size(); ++l)
+                            for (unsigned int h=0; h<triangulation.levels[l]->cells.cells.size(); ++h)
+                              for (unsigned int q=0; q<GeometryInfo<dim>::faces_per_cell; ++q)
+                                {
+                                  const int index=triangulation.levels[l]->cells.cells[h].face(q);
+                                  if (index==switch_1_index)
+                                    triangulation.levels[l]->cells.cells[h].set_face(q,switch_2_index);
+                                  else if (index==switch_2_index)
+                                    triangulation.levels[l]->cells.cells[h].set_face(q,switch_1_index);
+                                }
+                          // now we have to copy
+                          // all information of the
+                          // two quads
+                          const int switch_1_lines[4]=
+                          {
+                            static_cast<signed int>(switch_1->line_index(0)),
+                            static_cast<signed int>(switch_1->line_index(1)),
+                            static_cast<signed int>(switch_1->line_index(2)),
+                            static_cast<signed int>(switch_1->line_index(3))
+                          };
+                          const bool switch_1_line_orientations[4]=
+                          {
+                            switch_1->line_orientation(0),
+                            switch_1->line_orientation(1),
+                            switch_1->line_orientation(2),
+                            switch_1->line_orientation(3)
+                          };
+                          const types::boundary_id switch_1_boundary_id=switch_1->boundary_id();
+                          const unsigned int switch_1_user_index=switch_1->user_index();
+                          const bool switch_1_user_flag=switch_1->user_flag_set();
+
+                          switch_1->set(internal::Triangulation::TriaObject<2>(switch_2->line_index(0),
+                                                                               switch_2->line_index(1),
+                                                                               switch_2->line_index(2),
+                                                                               switch_2->line_index(3)));
+                          switch_1->set_line_orientation(0, switch_2->line_orientation(0));
+                          switch_1->set_line_orientation(1, switch_2->line_orientation(1));
+                          switch_1->set_line_orientation(2, switch_2->line_orientation(2));
+                          switch_1->set_line_orientation(3, switch_2->line_orientation(3));
+                          switch_1->set_boundary_id(switch_2->boundary_id());
+                          switch_1->set_manifold_id(switch_2->manifold_id());
+                          switch_1->set_user_index(switch_2->user_index());
+                          if (switch_2->user_flag_set())
+                            switch_1->set_user_flag();
+                          else
+                            switch_1->clear_user_flag();
+
+                          switch_2->set(internal::Triangulation::TriaObject<2>(switch_1_lines[0],
+                                                                               switch_1_lines[1],
+                                                                               switch_1_lines[2],
+                                                                               switch_1_lines[3]));
+                          switch_2->set_line_orientation(0, switch_1_line_orientations[0]);
+                          switch_2->set_line_orientation(1, switch_1_line_orientations[1]);
+                          switch_2->set_line_orientation(2, switch_1_line_orientations[2]);
+                          switch_2->set_line_orientation(3, switch_1_line_orientations[3]);
+                          switch_2->set_boundary_id(switch_1_boundary_id);
+                          switch_2->set_manifold_id(switch_1->manifold_id());
+                          switch_2->set_user_index(switch_1_user_index);
+                          if (switch_1_user_flag)
+                            switch_2->set_user_flag();
+                          else
+                            switch_2->clear_user_flag();
+
+                          const unsigned int child_0=quad->child(0)->child_index(0);
+                          const unsigned int child_2=quad->child(1)->child_index(0);
+                          quad->clear_children();
+                          quad->clear_refinement_case();
+                          quad->set_refinement_case(RefinementCase<2>::cut_xy);
+                          quad->set_children(0,child_0);
+                          quad->set_children(2,child_2);
+                          std::swap(quad_cell_count[child_0+1],quad_cell_count[child_2]);
+                        }
+                      else
+                        {
+                          // the face was refined
+                          // with cut_y, thus the
+                          // children are already
+                          // in correct order. we
+                          // only have to set them
+                          // correctly, deleting
+                          // the indirection of two
+                          // anisotropic refinement
+                          // and going directly
+                          // from the quad to
+                          // isotropic children
+                          const unsigned int child_0=quad->child(0)->child_index(0);
+                          const unsigned int child_2=quad->child(1)->child_index(0);
+                          quad->clear_children();
+                          quad->clear_refinement_case();
+                          quad->set_refinement_case(RefinementCase<2>::cut_xy);
+                          quad->set_children(0,child_0);
+                          quad->set_children(2,child_2);
+                        }
+                    else
+                      {
+                        quad->clear_children();
+                        quad->clear_refinement_case();
+                      }
+
+
+                  }
+                break;
+              }
+              case RefinementCase<dim-1>::cut_xy:
+              {
+                // if one of the cell counters is
+                // zero, the others have to be as
+                // well
+
+                Assert((quad_cell_count[quad->child_index(0)] == 0 &&
+                        quad_cell_count[quad->child_index(1)] == 0 &&
+                        quad_cell_count[quad->child_index(2)] == 0 &&
+                        quad_cell_count[quad->child_index(3)] == 0) ||
+                       (quad_cell_count[quad->child_index(0)] > 0 &&
+                        quad_cell_count[quad->child_index(1)] > 0 &&
+                        quad_cell_count[quad->child_index(2)] > 0 &&
+                        quad_cell_count[quad->child_index(3)] > 0),
+                       ExcInternalError());
+
+                if (quad_cell_count[quad->child_index(0)]==0)
+                  {
+                    // we may delete the quad's
+                    // children, the inner lines
+                    // and the middle vertex as no
+                    // cell references them anymore
+                    lines_to_delete.push_back(quad->child(0)->line(1));
+                    lines_to_delete.push_back(quad->child(3)->line(0));
+                    lines_to_delete.push_back(quad->child(0)->line(3));
+                    lines_to_delete.push_back(quad->child(3)->line(2));
+
+                    for (unsigned int child=0; child<quad->n_children(); ++child)
+                      quads_to_delete.push_back(quad->child(child));
+
+                    triangulation.vertices_used[quad->child(0)->vertex_index(3)] = false;
+
+                    quad->clear_children();
+                    quad->clear_refinement_case();
+                  }
+              }
+              break;
+
+              default:
+                Assert(false, ExcInternalError());
+                break;
+              }
+
+          }
+
+        // now we repeat a similar procedure
+        // for the outer lines of this cell.
+
+        // if in debug mode: check that each
+        // of the lines for which we consider
+        // deleting the children in fact has
+        // children (the bits/coarsening_3d
+        // test tripped over this initially)
+        for (unsigned int line_no=0; line_no<GeometryInfo<dim>::lines_per_cell; ++line_no)
+          {
+            typename Triangulation<dim,spacedim>::line_iterator
+            line=cell->line(line_no);
+
+            Assert((GeometryInfo<dim>::line_refinement_case(ref_case,line_no) && line->has_children()) ||
+                   GeometryInfo<dim>::line_refinement_case(ref_case,line_no)==RefinementCase<1>::no_refinement,
+                   ExcInternalError());
+
+            if (line->has_children())
+              {
+                // if one of the cell counters is
+                // zero, the other has to be as well
+
+                Assert((line_cell_count[line->child_index(0)] == 0 &&
+                        line_cell_count[line->child_index(1)] == 0) ||
+                       (line_cell_count[line->child_index(0)] > 0 &&
+                        line_cell_count[line->child_index(1)] > 0),
+                       ExcInternalError());
+
+                if (line_cell_count[line->child_index(0)]==0)
+                  {
+                    for (unsigned int c=0; c<2; ++c)
+                      Assert (!line->child(c)->has_children(),
+                              ExcInternalError());
+
+                    // we may delete the line's
+                    // children and the middle vertex
+                    // as no cell references them
+                    // anymore
+                    triangulation.vertices_used[line->child(0)->vertex_index(1)] = false;
+
+                    lines_to_delete.push_back(line->child(0));
+                    lines_to_delete.push_back(line->child(1));
+
+                    line->clear_children();
+                  }
+              }
+          }
+
+        // finally, delete unneeded quads and lines
+
+        // clear user pointers, to avoid that
+        // they may appear at unwanted places
+        // later on...
+        // same for user flags, then finally
+        // delete the quads and lines
+        typename std::vector<typename Triangulation<dim,spacedim>::line_iterator>::iterator
+        line=lines_to_delete.begin(),
+        endline=lines_to_delete.end();
+        for (; line!=endline; ++line)
+          {
+            (*line)->clear_user_data();
+            (*line)->clear_user_flag();
+            (*line)->clear_used_flag();
+          }
+
+        typename std::vector<typename Triangulation<dim,spacedim>::quad_iterator>::iterator
+        quad=quads_to_delete.begin(),
+        endquad=quads_to_delete.end();
+        for (; quad!=endquad; ++quad)
+          {
+            (*quad)->clear_user_data();
+            (*quad)->clear_children();
+            (*quad)->clear_refinement_case();
+            (*quad)->clear_user_flag();
+            (*quad)->clear_used_flag();
+          }
+      }
+
+
+      /**
+       * Create the children of a 2d
+       * cell. The arguments indicate
+       * the next free spots in the
+       * vertices, lines, and cells
+       * arrays.
+       *
+       * The faces of the cell have to
+       * be refined already, whereas
+       * the inner lines in 2D will be
+       * created in this
+       * function. Therefore iterator
+       * pointers into the vectors of
+       * lines, quads and cells have to
+       * be passed, which point at (or
+       * "before") the reserved space.
+       */
+      template <int spacedim>
+      static
+      void
+      create_children (Triangulation<2,spacedim> &triangulation,
+                       unsigned int &next_unused_vertex,
+                       typename Triangulation<2,spacedim>::raw_line_iterator &next_unused_line,
+                       typename Triangulation<2,spacedim>::raw_cell_iterator &next_unused_cell,
+                       typename Triangulation<2,spacedim>::cell_iterator &cell)
+      {
+        const unsigned int dim=2;
+        // clear refinement flag
+        const RefinementCase<dim> ref_case=cell->refine_flag_set();
+        cell->clear_refine_flag ();
+
+        /* For the refinement process: since we go the levels up from the lowest, there
+           are (unlike above) only two possibilities: a neighbor cell is on the same
+           level or one level up (in both cases, it may or may not be refined later on,
+           but we don't care here).
+
+           First:
+           Set up an array of the 3x3 vertices, which are distributed on the cell
+           (the array consists of indices into the @p{vertices} std::vector
+
+           2--7--3
+           |  |  |
+           4--8--5
+           |  |  |
+           0--6--1
+
+           note: in case of cut_x or cut_y not all these vertices are needed for the new
+           cells
+
+           Second:
+           Set up an array of the new lines (the array consists of iterator pointers
+           into the lines arrays)
+
+           .-6-.-7-.         The directions are:  .->-.->-.
+           1   9   3                              ^   ^   ^
+           .-10.11-.                             .->-.->-.
+           0   8   2                              ^   ^   ^
+           .-4-.-5-.                              .->-.->-.
+
+           cut_x:
+           .-4-.-5-.
+           |   |   |
+           0   6   1
+           |   |   |
+           .-2-.-3-.
+
+           cut_y:
+           .---5---.
+           1       3
+           .---6---.
+           0       2
+           .---4---.
+
+
+           Third:
+           Set up an array of neighbors:
+
+           6  7
+           .--.--.
+           1|  |  |3
+           .--.--.
+           0|  |  |2
+           .--.--.
+           4   5
+
+           We need this array for two reasons: first to get the lines which will
+           bound the four subcells (if the neighboring cell is refined, these
+           lines already exist), and second to update neighborship information.
+           Since if a neighbor is not refined, its neighborship record only
+           points to the present, unrefined, cell rather than the children we
+           are presently creating, we only need the neighborship information
+           if the neighbor cells are refined. In all other cases, we store
+           the unrefined neighbor address
+
+           We also need for every neighbor (if refined) which number among its
+           neighbors the present (unrefined) cell has, since that number is to
+           be replaced and because that also is the number of the subline which
+           will be the interface between that neighbor and the to be created cell.
+           We will store this number (between 0 and 3) in the field
+           @p{neighbors_neighbor}.
+
+           It would be sufficient to use the children of the common line to the
+           neighbor, if we only wanted to get the new sublines and the new vertex,
+           but because we need to update the neighborship information of the
+           two refined subcells of the neighbor, we need to search these anyway.
+
+           Convention:
+           The created children are numbered like this:
+
+           .--.--.
+           |2 . 3|
+           .--.--.
+           |0 | 1|
+           .--.--.
+        */
+        // collect the
+        // indices of the
+        // eight
+        // surrounding
+        // vertices
+        //   2--7--3
+        //   |  |  |
+        //   4--9--5
+        //   |  |  |
+        //   0--6--1
+        int new_vertices[9];
+        for (unsigned int vertex_no=0; vertex_no<4; ++vertex_no)
+          new_vertices[vertex_no]=cell->vertex_index(vertex_no);
+        for (unsigned int line_no=0; line_no<4; ++line_no)
+          if (cell->line(line_no)->has_children())
+            new_vertices[4+line_no]=cell->line(line_no)->child(0)->vertex_index(1);
+
+        if (ref_case==RefinementCase<dim>::cut_xy)
+          {
+
+            // find the next
+            // unused vertex and
+            // allocate it for
+            // the new vertex we
+            // need here
+            while (triangulation.vertices_used[next_unused_vertex] == true)
+              ++next_unused_vertex;
+            Assert (next_unused_vertex < triangulation.vertices.size(),
+                    ExcMessage("Internal error: During refinement, the triangulation wants to access an element of the 'vertices' array but it turns out that the array is not large enough."));
+            triangulation.vertices_used[next_unused_vertex] = true;
+
+            new_vertices[8] = next_unused_vertex;
+
+            // if this quad lives
+            // in 2d, then we can
+            // compute the new
+            // central vertex
+            // location just from
+            // the surrounding
+            // ones. If this is
+            // not the case, then
+            // we need to ask a
+            // boundary object
+            if (dim == spacedim)
+              {
+                // triangulation.vertices[next_unused_vertex] = new_point;
+                triangulation.vertices[next_unused_vertex] = cell->center(true);
+
+                // if the user_flag is set, i.e. if the
+                // cell is at the boundary, use a
+                // different calculation of the middle
+                // vertex here. this is of advantage, if
+                // the boundary is strongly curved and
+                // the cell has a high aspect ratio. this
+                // can happen for example, if it was
+                // refined anisotropically before.
+                if (cell->user_flag_set())
+                  {
+                    // first reset the user_flag
+                    cell->clear_user_flag();
+                    // the user flag indicates: at least
+                    // one face is at the boundary. if it
+                    // is only one, set the new middle
+                    // vertex in a different way to avoid
+                    // some mis-shaped elements if the
+                    // new point on the boundary is not
+                    // where we expect it, especially if
+                    // it is to far inside the current
+                    // cell
+                    unsigned int boundary_face=GeometryInfo<dim>::faces_per_cell;
+                    for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                      if (cell->face(face)->at_boundary())
+                        {
+                          if (boundary_face == GeometryInfo<dim>::faces_per_cell)
+                            // no boundary face found so
+                            // far, so set it now
+                            boundary_face=face;
+                          else
+                            // there is another boundary
+                            // face, so reset boundary_face to
+                            // invalid value as a flag to
+                            // do nothing in the following
+                            boundary_face=GeometryInfo<dim>::faces_per_cell+1;
+                        }
+
+                    if (boundary_face<GeometryInfo<dim>::faces_per_cell)
+                      // reset the cell's middle vertex to the middle
+                      // of the straight connection between the new
+                      // points on this face and on the opposite face,
+                      // as returned by the underlying manifold
+                      // object.
+                      {
+                        std::vector<Point<spacedim> > ps(2);
+                        std::vector<double> ws(2, 0.5);
+                        ps[0] = cell->face(boundary_face)
+                                ->child(0)->vertex(1);
+                        ps[1] = cell->face(GeometryInfo<dim>
+                                           ::opposite_face[boundary_face])
+                                ->child(0)->vertex(1);
+                        Quadrature<spacedim> qs(ps,ws);
+                        triangulation.vertices[next_unused_vertex]
+                          = cell->get_manifold().get_new_point(qs);
+                      }
+                  }
+              }
+            else
+              {
+                // if this quad lives in a higher dimensional space
+                // then we don't need to worry if it is at the
+                // boundary of the manifold -- we always have to use
+                // the boundary object anyway; so ignore whether the
+                // user flag is set or not
+                cell->clear_user_flag();
+
+                // An assert to make sure that the static_cast in the
+                // next line has the chance to give reasonable
+                // results.
+                Assert(cell->material_id()<= std::numeric_limits<types::material_id>::max(),
+                       ExcIndexRange(cell->material_id(),0,std::numeric_limits<types::material_id>::max()));
+
+                // new vertex is placed on the surface according to
+                // the information stored in the boundary class
+                triangulation.vertices[next_unused_vertex] =
+                  cell->center(true);
+              }
+          }
+
+
+        // Now the lines:
+        typename Triangulation<dim,spacedim>::raw_line_iterator new_lines[12];
+        unsigned int lmin=8;
+        unsigned int lmax=12;
+        if (ref_case!=RefinementCase<dim>::cut_xy)
+          {
+            lmin=6;
+            lmax=7;
+          }
+
+        for (unsigned int l=lmin; l<lmax; ++l)
+          {
+            while (next_unused_line->used() == true)
+              ++next_unused_line;
+            new_lines[l] = next_unused_line;
+            ++next_unused_line;
+
+            Assert (new_lines[l]->used() == false,
+                    ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+          }
+
+        if (ref_case==RefinementCase<dim>::cut_xy)
+          {
+            //   .-6-.-7-.
+            //   1   9   3
+            //   .-10.11-.
+            //   0   8   2
+            //   .-4-.-5-.
+
+            // lines 0-7 already exist, create only the four interior
+            // lines 8-11
+            unsigned int l=0;
+            for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+              for (unsigned int c=0; c<2; ++c, ++l)
+                new_lines[l]=cell->line(face_no)->child(c);
+            Assert(l==8, ExcInternalError());
+
+            new_lines[8] ->set (internal::Triangulation::
+                                TriaObject<1>(new_vertices[6], new_vertices[8]));
+            new_lines[9] ->set (internal::Triangulation::
+                                TriaObject<1>(new_vertices[8], new_vertices[7]));
+            new_lines[10]->set (internal::Triangulation::
+                                TriaObject<1>(new_vertices[4], new_vertices[8]));
+            new_lines[11]->set (internal::Triangulation::
+                                TriaObject<1>(new_vertices[8], new_vertices[5]));
+          }
+        else if (ref_case==RefinementCase<dim>::cut_x)
+          {
+            //   .-4-.-5-.
+            //   |   |   |
+            //   0   6   1
+            //   |   |   |
+            //   .-2-.-3-.
+            new_lines[0]=cell->line(0);
+            new_lines[1]=cell->line(1);
+            new_lines[2]=cell->line(2)->child(0);
+            new_lines[3]=cell->line(2)->child(1);
+            new_lines[4]=cell->line(3)->child(0);
+            new_lines[5]=cell->line(3)->child(1);
+            new_lines[6]->set (internal::Triangulation::
+                               TriaObject<1>(new_vertices[6], new_vertices[7]));
+          }
+        else
+          {
+            Assert(ref_case==RefinementCase<dim>::cut_y, ExcInternalError());
+            //   .---5---.
+            //   1       3
+            //   .---6---.
+            //   0       2
+            //   .---4---.
+            new_lines[0]=cell->line(0)->child(0);
+            new_lines[1]=cell->line(0)->child(1);
+            new_lines[2]=cell->line(1)->child(0);
+            new_lines[3]=cell->line(1)->child(1);
+            new_lines[4]=cell->line(2);
+            new_lines[5]=cell->line(3);
+            new_lines[6]->set (internal::Triangulation::
+                               TriaObject<1>(new_vertices[4], new_vertices[5]));
+          }
+
+        for (unsigned int l=lmin; l<lmax; ++l)
+          {
+            new_lines[l]->set_used_flag();
+            new_lines[l]->clear_user_flag();
+            new_lines[l]->clear_user_data();
+            new_lines[l]->clear_children();
+            // interior line
+            new_lines[l]->set_boundary_id(numbers::internal_face_boundary_id);
+            new_lines[l]->set_manifold_id(cell->manifold_id());
+          }
+
+        // Now add the four (two)
+        // new cells!
+        typename Triangulation<dim,spacedim>::raw_cell_iterator
+        subcells[GeometryInfo<dim>::max_children_per_cell];
+        while (next_unused_cell->used() == true)
+          ++next_unused_cell;
+
+        const unsigned int n_children=
+          GeometryInfo<dim>::n_children(ref_case);
+        for (unsigned int i=0; i<n_children; ++i)
+          {
+            Assert (next_unused_cell->used() == false,
+                    ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+            subcells[i] = next_unused_cell;
+            ++next_unused_cell;
+            if (i%2==1 && i<n_children-1)
+              while (next_unused_cell->used() == true)
+                ++next_unused_cell;
+          }
+
+        if (ref_case==RefinementCase<dim>::cut_xy)
+          {
+            // children:
+            //   .--.--.
+            //   |2 . 3|
+            //   .--.--.
+            //   |0 | 1|
+            //   .--.--.
+            // lines:
+            //   .-6-.-7-.
+            //   1   9   3
+            //   .-10.11-.
+            //   0   8   2
+            //   .-4-.-5-.
+            subcells[0]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[0]->index(),
+                                            new_lines[8]->index(),
+                                            new_lines[4]->index(),
+                                            new_lines[10]->index()));
+            subcells[1]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[8]->index(),
+                                            new_lines[2]->index(),
+                                            new_lines[5]->index(),
+                                            new_lines[11]->index()));
+            subcells[2]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[1]->index(),
+                                            new_lines[9]->index(),
+                                            new_lines[10]->index(),
+                                            new_lines[6]->index()));
+            subcells[3]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[9]->index(),
+                                            new_lines[3]->index(),
+                                            new_lines[11]->index(),
+                                            new_lines[7]->index()));
+          }
+        else if (ref_case==RefinementCase<dim>::cut_x)
+          {
+            // children:
+            //   .--.--.
+            //   |  .  |
+            //   .0 . 1.
+            //   |  |  |
+            //   .--.--.
+            // lines:
+            //   .-4-.-5-.
+            //   |   |   |
+            //   0   6   1
+            //   |   |   |
+            //   .-2-.-3-.
+            subcells[0]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[0]->index(),
+                                            new_lines[6]->index(),
+                                            new_lines[2]->index(),
+                                            new_lines[4]->index()));
+            subcells[1]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[6]->index(),
+                                            new_lines[1]->index(),
+                                            new_lines[3]->index(),
+                                            new_lines[5]->index()));
+          }
+        else
+          {
+            Assert(ref_case==RefinementCase<dim>::cut_y, ExcInternalError());
+            // children:
+            //   .-----.
+            //   |  1  |
+            //   .-----.
+            //   |  0  |
+            //   .-----.
+            // lines:
+            //   .---5---.
+            //   1       3
+            //   .---6---.
+            //   0       2
+            //   .---4---.
+            subcells[0]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[0]->index(),
+                                            new_lines[2]->index(),
+                                            new_lines[4]->index(),
+                                            new_lines[6]->index()));
+            subcells[1]->set (internal::Triangulation::
+                              TriaObject<2>(new_lines[1]->index(),
+                                            new_lines[3]->index(),
+                                            new_lines[6]->index(),
+                                            new_lines[5]->index()));
+          }
+
+        types::subdomain_id subdomainid = cell->subdomain_id();
+
+        for (unsigned int i=0; i<n_children; ++i)
+          {
+            subcells[i]->set_used_flag();
+            subcells[i]->clear_refine_flag();
+            subcells[i]->clear_user_flag();
+            subcells[i]->clear_user_data();
+            subcells[i]->clear_children();
+            // inherit material
+            // properties
+            subcells[i]->set_material_id (cell->material_id());
+            subcells[i]->set_manifold_id (cell->manifold_id());
+            subcells[i]->set_subdomain_id (subdomainid);
+
+            if (i%2==0)
+              subcells[i]->set_parent (cell->index ());
+          }
+
+
+
+        // set child index for
+        // even children children
+        // i=0,2 (0)
+        for (unsigned int i=0; i<n_children/2; ++i)
+          cell->set_children (2*i, subcells[2*i]->index());
+        // set the refine case
+        cell->set_refinement_case(ref_case);
+
+        // note that the
+        // refinement flag was
+        // already cleared at the
+        // beginning of this function
+
+        if (dim < spacedim)
+          for (unsigned int c=0; c<n_children; ++c)
+            cell->child(c)->set_direction_flag (cell->direction_flag());
+
+      }
+
+
+
+      /**
+       * A function that performs the
+       * refinement of a triangulation in 1d.
+       */
+      template <int spacedim>
+      static
+      typename Triangulation<1,spacedim>::DistortedCellList
+      execute_refinement (Triangulation<1,spacedim> &triangulation,
+                          const bool /*check_for_distorted_cells*/)
+      {
+        const unsigned int dim = 1;
+
+        // check whether a new level is needed we have to check for
+        // this on the highest level only (on this, all used cells are
+        // also active, so we only have to check for this)
+        {
+          typename Triangulation<dim,spacedim>::raw_cell_iterator
+          cell = triangulation.begin_active (triangulation.levels.size()-1),
+          endc = triangulation.end();
+          for (; cell != endc; ++cell)
+            if (cell->used())
+              if (cell->refine_flag_set())
+                {
+                  triangulation.levels
+                  .push_back (new internal::Triangulation::TriaLevel<dim>);
+                  break;
+                }
+        }
+
+
+        // check how much space is needed on every level we need not
+        // check the highest level since either - on the highest level
+        // no cells are flagged for refinement - there are, but
+        // prepare_refinement added another empty level
+        unsigned int needed_vertices = 0;
+        for (int level=triangulation.levels.size()-2; level>=0; --level)
+          {
+            // count number of flagged
+            // cells on this level
+            unsigned int flagged_cells = 0;
+            typename Triangulation<dim,spacedim>::active_cell_iterator
+            acell = triangulation.begin_active(level),
+            aendc = triangulation.begin_active(level+1);
+            for (; acell!=aendc; ++acell)
+              if (acell->refine_flag_set())
+                ++flagged_cells;
+
+            // count number of used cells
+            // on the next higher level
+            const unsigned int used_cells
+              =  std::count_if (triangulation.levels[level+1]->cells.used.begin(),
+                                triangulation.levels[level+1]->cells.used.end(),
+                                std::bind2nd (std::equal_to<bool>(), true));
+
+            // reserve space for the used_cells cells already existing
+            // on the next higher level as well as for the
+            // 2*flagged_cells that will be created on that level
+            triangulation.levels[level+1]
+            ->reserve_space(used_cells+
+                            GeometryInfo<1>::max_children_per_cell *
+                            flagged_cells,
+                            1,
+                            spacedim);
+            // reserve space for 2*flagged_cells new lines on the next
+            // higher level
+            triangulation.levels[level+1]->cells
+            .reserve_space (GeometryInfo<1>::max_children_per_cell *
+                            flagged_cells,
+                            0);
+
+            needed_vertices += flagged_cells;
+          }
+
+        // add to needed vertices how many
+        // vertices are already in use
+        needed_vertices += std::count_if (triangulation.vertices_used.begin(),
+                                          triangulation.vertices_used.end(),
+                                          std::bind2nd (std::equal_to<bool>(),
+                                                        true));
+        // if we need more vertices: create them, if not: leave the
+        // array as is, since shrinking is not really possible because
+        // some of the vertices at the end may be in use
+        if (needed_vertices > triangulation.vertices.size())
+          {
+            triangulation.vertices.resize (needed_vertices,
+                                           Point<spacedim>());
+            triangulation.vertices_used.resize (needed_vertices, false);
+          }
+
+
+        // Do REFINEMENT on every level; exclude highest level as
+        // above
+
+        // index of next unused vertex
+        unsigned int next_unused_vertex = 0;
+
+        for (int level=triangulation.levels.size()-2; level>=0; --level)
+          {
+            typename Triangulation<dim,spacedim>::active_cell_iterator
+            cell = triangulation.begin_active(level),
+            endc = triangulation.begin_active(level+1);
+
+            typename Triangulation<dim,spacedim>::raw_cell_iterator
+            next_unused_cell = triangulation.begin_raw (level+1);
+
+            for (; (cell!=endc) && (cell->level()==level); ++cell)
+              if (cell->refine_flag_set())
+                {
+                  // clear refinement flag
+                  cell->clear_refine_flag ();
+
+                  // search for next unused
+                  // vertex
+                  while (triangulation.vertices_used[next_unused_vertex] == true)
+                    ++next_unused_vertex;
+                  Assert (next_unused_vertex < triangulation.vertices.size(),
+                          ExcMessage("Internal error: During refinement, the triangulation wants to access an element of the 'vertices' array but it turns out that the array is not large enough."));
+
+                  // Now we always ask the cell itself where to put
+                  // the new point. The cell in turn will query the
+                  // manifold object internally.
+                  triangulation.vertices[next_unused_vertex] =
+                    cell->center(true);
+
+                  triangulation.vertices_used[next_unused_vertex] = true;
+
+                  // search for next two unused cell (++ takes care of
+                  // the end of the vector)
+                  typename Triangulation<dim,spacedim>::raw_cell_iterator
+                  first_child,
+                  second_child;
+                  while (next_unused_cell->used() == true)
+                    ++next_unused_cell;
+                  first_child = next_unused_cell;
+                  first_child->set_used_flag ();
+                  first_child->clear_user_data ();
+                  ++next_unused_cell;
+                  Assert (next_unused_cell->used() == false,
+                          ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                  second_child = next_unused_cell;
+                  second_child->set_used_flag ();
+                  second_child->clear_user_data ();
+
+                  types::subdomain_id subdomainid = cell->subdomain_id();
+
+                  // insert first child
+                  cell->set_children (0, first_child->index());
+                  first_child->clear_children ();
+                  first_child->set (internal::Triangulation
+                                    ::TriaObject<1> (cell->vertex_index(0),
+                                                     next_unused_vertex));
+                  first_child->set_material_id (cell->material_id());
+                  first_child->set_manifold_id (cell->manifold_id());
+                  first_child->set_subdomain_id (subdomainid);
+                  first_child->set_direction_flag (cell->direction_flag());
+
+                  first_child->set_parent (cell->index ());
+
+                  // Set manifold id of the right face. Only do this
+                  // on the first child.
+                  first_child->face(1)->set_manifold_id(cell->manifold_id());
+
+                  // reset neighborship info (refer to
+                  // internal::Triangulation::TriaLevel<0> for
+                  // details)
+                  first_child->set_neighbor (1, second_child);
+                  if (cell->neighbor(0).state() != IteratorState::valid)
+                    first_child->set_neighbor (0, cell->neighbor(0));
+                  else if (cell->neighbor(0)->active())
+                    {
+                      // since the neighbors level is always <=level,
+                      // if the cell is active, then there are no
+                      // cells to the left which may want to know
+                      // about this new child cell.
+                      Assert (cell->neighbor (0)->level () <= cell->level (),
+                              ExcInternalError ());
+                      first_child->set_neighbor (0, cell->neighbor(0));
+                    }
+                  else
+                    // left neighbor is refined
+                    {
+                      // set neighbor to cell on same level
+                      const unsigned int nbnb = cell->neighbor_of_neighbor (0);
+                      first_child->set_neighbor (0, cell->neighbor(0)->child(nbnb));
+
+                      // reset neighbor info of all right descendant
+                      // of the left neighbor of cell
+                      typename Triangulation<dim,spacedim>::cell_iterator
+                      left_neighbor = cell->neighbor(0);
+                      while (left_neighbor->has_children())
+                        {
+                          left_neighbor = left_neighbor->child(nbnb);
+                          left_neighbor->set_neighbor (nbnb, first_child);
+                        }
+                    }
+
+                  // insert second child
+                  second_child->clear_children ();
+                  second_child->set (internal::Triangulation
+                                     ::TriaObject<1>(next_unused_vertex,
+                                                     cell->vertex_index(1)));
+                  second_child->set_neighbor (0, first_child);
+                  second_child->set_material_id (cell->material_id());
+                  second_child->set_manifold_id (cell->manifold_id());
+                  second_child->set_subdomain_id (subdomainid);
+                  second_child->set_direction_flag (cell->direction_flag());
+
+                  if (cell->neighbor(1).state() != IteratorState::valid)
+                    second_child->set_neighbor (1, cell->neighbor(1));
+                  else if (cell->neighbor(1)->active())
+                    {
+                      Assert (cell->neighbor (1)->level () <= cell->level (),
+                              ExcInternalError ());
+                      second_child->set_neighbor (1, cell->neighbor(1));
+                    }
+                  else
+                    // right neighbor is refined same as above
+                    {
+                      const unsigned int nbnb = cell->neighbor_of_neighbor (1);
+                      second_child->set_neighbor (1, cell->neighbor(1)->child(nbnb));
+
+                      typename Triangulation<dim,spacedim>::cell_iterator
+                      right_neighbor = cell->neighbor(1);
+                      while (right_neighbor->has_children())
+                        {
+                          right_neighbor = right_neighbor->child(nbnb);
+                          right_neighbor->set_neighbor (nbnb, second_child);
+                        }
+                    }
+                  // inform all listeners that cell refinement is done
+                  triangulation.signals.post_refinement_on_cell(cell);
+                }
+          }
+
+        // in 1d, we can not have distorted children unless the parent
+        // was already distorted (that is because we don't use
+        // boundary information for 1d triangulations). so return an
+        // empty list
+        return typename Triangulation<1,spacedim>::DistortedCellList();
+      }
+
+
+      /**
+       * A function that performs the refinement of a triangulation in
+       * 2d.
+       */
+      template <int spacedim>
+      static
+      typename Triangulation<2,spacedim>::DistortedCellList
+      execute_refinement (Triangulation<2,spacedim> &triangulation,
+                          const bool check_for_distorted_cells)
+      {
+        const unsigned int dim = 2;
+
+        // check whether a new level is needed we have to check for
+        // this on the highest level only (on this, all used cells are
+        // also active, so we only have to check for this)
+        if (true)
+          {
+            typename Triangulation<dim,spacedim>::raw_cell_iterator
+            cell = triangulation.begin_active (triangulation.levels.size()-1),
+            endc = triangulation.end();
+            for (; cell != endc; ++cell)
+              if (cell->used())
+                if (cell->refine_flag_set())
+                  {
+                    triangulation.levels.push_back (new internal::Triangulation::TriaLevel<dim>);
+                    break;
+                  }
+          }
+
+
+        // first clear user flags and pointers of lines; we're going
+        // to use them to flag which lines need refinement
+        for (typename Triangulation<dim,spacedim>::line_iterator
+             line=triangulation.begin_line(); line!=triangulation.end_line(); ++line)
+          {
+            line->clear_user_flag();
+            line->clear_user_data();
+          }
+        // running over all cells and lines count the number
+        // n_single_lines of lines which can be stored as single
+        // lines, e.g. inner lines
+        unsigned int n_single_lines=0;
+
+        // New lines to be created: number lines which are stored in
+        // pairs (the children of lines must be stored in pairs)
+        unsigned int n_lines_in_pairs = 0;
+
+        // check how much space is needed on every level we need not
+        // check the highest level since either - on the highest level
+        // no cells are flagged for refinement - there are, but
+        // prepare_refinement added another empty level
+        unsigned int needed_vertices = 0;
+        for (int level=triangulation.levels.size()-2; level>=0; --level)
+          {
+            // count number of flagged cells on this level and compute
+            // how many new vertices and new lines will be needed
+            unsigned int needed_cells = 0;
+
+            typename Triangulation<dim,spacedim>::active_cell_iterator
+            cell = triangulation.begin_active(level),
+            endc = triangulation.begin_active(level+1);
+            for (; cell!=endc; ++cell)
+              if (cell->refine_flag_set())
+                {
+                  if (cell->refine_flag_set()==RefinementCase<dim>::cut_xy)
+                    {
+                      needed_cells += 4;
+
+                      // new vertex at center of cell is needed in any
+                      // case
+                      ++needed_vertices;
+
+                      // the four inner lines can be stored as singles
+                      n_single_lines += 4;
+                    }
+                  else // cut_x || cut_y
+                    {
+                      // set the flag showing that anisotropic
+                      // refinement is used for at least one cell
+                      triangulation.anisotropic_refinement = true;
+
+                      needed_cells += 2;
+                      // no vertex at center
+
+                      // the inner line can be stored as single
+                      n_single_lines += 1;
+
+                    }
+
+                  // mark all faces (lines) for refinement; checking
+                  // locally whether the neighbor would also like to
+                  // refine them is rather difficult for lines so we
+                  // only flag them and after visiting all cells, we
+                  // decide which lines need refinement;
+                  for (unsigned int line_no=0; line_no<GeometryInfo<dim>::faces_per_cell;
+                       ++line_no)
+                    {
+                      if (GeometryInfo<dim>::face_refinement_case(
+                            cell->refine_flag_set(), line_no)==RefinementCase<1>::cut_x)
+                        {
+                          typename Triangulation<dim,spacedim>::line_iterator
+                          line = cell->line(line_no);
+                          if (line->has_children() == false)
+                            {
+                              line->set_user_flag ();
+//TODO[WB]: we overwrite the user_index here because we later on need
+// to find out which boundary object we have to ask to refine this
+// line. we can't use the boundary_id field because that can
+// only be used for lines at the boundary of the domain, but we also
+// need a domain description for interior lines in the codim-1 case
+                              if (spacedim > dim)
+                                {
+                                  if (line->at_boundary())
+                                    // if possible honor boundary
+                                    // indicator
+                                    line->set_user_index(line->boundary_id());
+                                  else
+                                    // otherwise take manifold
+                                    // description from the adjacent
+                                    // cell
+                                    line->set_user_index(cell->material_id());
+                                }
+                            }
+                        }
+                    }
+                }
+
+
+            // count number of used cells on the next higher level
+            const unsigned int used_cells
+              = std::count_if (triangulation.levels[level+1]->cells.used.begin(),
+                               triangulation.levels[level+1]->cells.used.end(),
+                               std::bind2nd (std::equal_to<bool>(), true));
+
+
+            // reserve space for the used_cells cells already existing
+            // on the next higher level as well as for the
+            // needed_cells that will be created on that level
+            triangulation.levels[level+1]
+            ->reserve_space (used_cells+needed_cells, 2, spacedim);
+
+            // reserve space for needed_cells new quads on the next
+            // higher level
+            triangulation.levels[level+1]->cells.
+            reserve_space (needed_cells,0);
+          }
+
+        // now count the lines which were flagged for refinement
+        for (typename Triangulation<dim,spacedim>::line_iterator
+             line=triangulation.begin_line(); line!=triangulation.end_line(); ++line)
+          if (line->user_flag_set())
+            {
+              Assert (line->has_children() == false, ExcInternalError());
+              n_lines_in_pairs += 2;
+              needed_vertices  += 1;
+            }
+        // reserve space for n_lines_in_pairs new lines.  note, that
+        // we can't reserve space for the single lines here as well,
+        // as all the space reserved for lines in pairs would be
+        // counted as unused and we would end up with too little space
+        // to store all lines. memory reservation for n_single_lines
+        // can only be done AFTER we refined the lines of the current
+        // cells
+        triangulation.faces->lines.
+        reserve_space (n_lines_in_pairs, 0);
+
+        // add to needed vertices how many vertices are already in use
+        needed_vertices += std::count_if (triangulation.vertices_used.begin(), triangulation.vertices_used.end(),
+                                          std::bind2nd (std::equal_to<bool>(), true));
+        // if we need more vertices: create them, if not: leave the
+        // array as is, since shrinking is not really possible because
+        // some of the vertices at the end may be in use
+        if (needed_vertices > triangulation.vertices.size())
+          {
+            triangulation.vertices.resize (needed_vertices, Point<spacedim>());
+            triangulation.vertices_used.resize (needed_vertices, false);
+          }
+
+
+        // Do REFINEMENT on every level; exclude highest level as
+        // above
+
+        //  index of next unused vertex
+        unsigned int next_unused_vertex = 0;
+
+        // first the refinement of lines.  children are stored
+        // pairwise
+        if (true)
+          {
+            // only active objects can be refined further
+            typename Triangulation<dim,spacedim>::active_line_iterator
+            line = triangulation.begin_active_line(),
+            endl = triangulation.end_line();
+            typename Triangulation<dim,spacedim>::raw_line_iterator
+            next_unused_line = triangulation.begin_raw_line ();
+
+            for (; line!=endl; ++line)
+              if (line->user_flag_set())
+                {
+                  // this line needs to be refined
+
+                  // find the next unused vertex and set it
+                  // appropriately
+                  while (triangulation.vertices_used[next_unused_vertex] == true)
+                    ++next_unused_vertex;
+                  Assert (next_unused_vertex < triangulation.vertices.size(),
+                          ExcMessage("Internal error: During refinement, the triangulation wants to access an element of the 'vertices' array but it turns out that the array is not large enough."));
+                  triangulation.vertices_used[next_unused_vertex] = true;
+
+                  if (spacedim == dim)
+                    {
+                      // for the case of a domain in an
+                      // equal-dimensional space we only have to treat
+                      // boundary lines differently; for interior
+                      // lines we can compute the midpoint as the mean
+                      // of the two vertices: if (line->at_boundary())
+                      triangulation.vertices[next_unused_vertex]
+                        = line->center(true);
+                    }
+                  else
+                    // however, if spacedim>dim, we always have to ask
+                    // the boundary object for its answer. We use the
+                    // same object of the cell (which was stored in
+                    // line->user_index() before) unless a manifold_id
+                    // has been set on this very line.
+                    if (line->manifold_id() == numbers::invalid_manifold_id)
+                      triangulation.vertices[next_unused_vertex]
+                        = triangulation.get_manifold(line->user_index()).get_new_point_on_line (line);
+                    else
+                      triangulation.vertices[next_unused_vertex]
+                        = line->center(true);
+
+                  // now that we created the right point, make up the
+                  // two child lines.  To this end, find a pair of
+                  // unused lines
+                  bool pair_found=false;
+                  (void)pair_found;
+                  for (; next_unused_line!=endl; ++next_unused_line)
+                    if (!next_unused_line->used() &&
+                        !(++next_unused_line)->used())
+                      {
+                        // go back to the first of the two unused
+                        // lines
+                        --next_unused_line;
+                        pair_found=true;
+                        break;
+                      }
+                  Assert (pair_found, ExcInternalError());
+
+                  // there are now two consecutive unused lines, such
+                  // that the children of a line will be consecutive.
+                  // then set the child pointer of the present line
+                  line->set_children (0, next_unused_line->index());
+
+                  // set the two new lines
+                  const typename Triangulation<dim,spacedim>::raw_line_iterator
+                  children[2] = { next_unused_line,
+                                  ++next_unused_line
+                                };
+                  // some tests; if any of the iterators should be
+                  // invalid, then already dereferencing will fail
+                  Assert (children[0]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                  Assert (children[1]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                  children[0]->set (internal::Triangulation
+                                    ::TriaObject<1>(line->vertex_index(0),
+                                                    next_unused_vertex));
+                  children[1]->set (internal::Triangulation
+                                    ::TriaObject<1>(next_unused_vertex,
+                                                    line->vertex_index(1)));
+
+                  children[0]->set_used_flag();
+                  children[1]->set_used_flag();
+                  children[0]->clear_children();
+                  children[1]->clear_children();
+                  children[0]->clear_user_data();
+                  children[1]->clear_user_data();
+                  children[0]->clear_user_flag();
+                  children[1]->clear_user_flag();
+
+                  children[0]->set_boundary_id (line->boundary_id());
+                  children[1]->set_boundary_id (line->boundary_id());
+
+                  children[0]->set_manifold_id (line->manifold_id());
+                  children[1]->set_manifold_id (line->manifold_id());
+
+                  // finally clear flag indicating the need for
+                  // refinement
+                  line->clear_user_flag ();
+                }
+          }
+
+
+        // Now set up the new cells
+
+        // reserve space for inner lines (can be stored as single
+        // lines)
+        triangulation.faces->lines.
+        reserve_space (0,n_single_lines);
+
+        typename Triangulation<2,spacedim>::DistortedCellList
+        cells_with_distorted_children;
+
+        // reset next_unused_line, as now also single empty places in
+        // the vector can be used
+        typename Triangulation<dim,spacedim>::raw_line_iterator
+        next_unused_line = triangulation.begin_raw_line ();
+
+        for (int level=0; level<static_cast<int>(triangulation.levels.size())-1; ++level)
+          {
+
+            // Remember: as we don't operate on the finest level,
+            // begin_*(level+1) is allowed
+            typename Triangulation<dim,spacedim>::active_cell_iterator
+            cell = triangulation.begin_active(level),
+            endc = triangulation.begin_active(level+1);
+
+            typename Triangulation<dim,spacedim>::raw_cell_iterator
+            next_unused_cell = triangulation.begin_raw (level+1);
+
+            for (; cell!=endc; ++cell)
+              if (cell->refine_flag_set())
+                {
+                  // set the user flag to indicate, that at least one
+                  // line is at the boundary
+
+                  // TODO[Tobias Leicht] find a better place to set
+                  // this flag, so that we do not need so much time to
+                  // check each cell here
+                  if (cell->at_boundary())
+                    cell->set_user_flag();
+
+                  // actually set up the children and update neighbor
+                  // information
+                  create_children (triangulation,
+                                   next_unused_vertex,
+                                   next_unused_line,
+                                   next_unused_cell,
+                                   cell);
+
+                  if ((check_for_distorted_cells == true)
+                      &&
+                      has_distorted_children (cell,
+                                              internal::int2type<dim>(),
+                                              internal::int2type<spacedim>()))
+                    cells_with_distorted_children.distorted_cells.push_back (cell);
+                  // inform all listeners that cell refinement is done
+                  triangulation.signals.post_refinement_on_cell(cell);
+                }
+          }
+
+        return cells_with_distorted_children;
+      }
+
+
+      /**
+       * A function that performs the refinement of a triangulation in
+       * 3d.
+       */
+      template <int spacedim>
+      static
+      typename Triangulation<3,spacedim>::DistortedCellList
+      execute_refinement (Triangulation<3,spacedim> &triangulation,
+                          const bool check_for_distorted_cells)
+      {
+        const unsigned int dim = 3;
+
+        // this function probably also works for spacedim>3 but it
+        // isn't tested. it will probably be necessary to pull new
+        // vertices onto the manifold just as we do for the other
+        // functions above.
+        Assert (spacedim == 3, ExcNotImplemented());
+
+        // check whether a new level is needed we have to check for
+        // this on the highest level only (on this, all used cells are
+        // also active, so we only have to check for this)
+        if (true)
+          {
+            typename Triangulation<dim,spacedim>::raw_cell_iterator
+            cell = triangulation.begin_active (triangulation.levels.size()-1),
+            endc = triangulation.end();
+            for (; cell != endc; ++cell)
+              if (cell->used())
+                if (cell->refine_flag_set())
+                  {
+                    triangulation.levels.push_back (new internal::Triangulation::TriaLevel<dim>);
+                    break;
+                  }
+          }
+
+
+        // first clear user flags for quads and lines; we're going to
+        // use them to flag which lines and quads need refinement
+        triangulation.faces->quads.clear_user_data();
+
+        for (typename Triangulation<dim,spacedim>::line_iterator
+             line=triangulation.begin_line(); line!=triangulation.end_line(); ++line)
+          line->clear_user_flag();
+        for (typename Triangulation<dim,spacedim>::quad_iterator
+             quad=triangulation.begin_quad(); quad!=triangulation.end_quad(); ++quad)
+          quad->clear_user_flag();
+
+        // create an array of face refine cases. User indices of faces
+        // will be set to values corresponding with indices in this
+        // array.
+        const RefinementCase<dim-1>  face_refinement_cases[4]=
+        {
+          RefinementCase<dim-1>::no_refinement,
+          RefinementCase<dim-1>::cut_x,
+          RefinementCase<dim-1>::cut_y,
+          RefinementCase<dim-1>::cut_xy
+        };
+
+        // check how much space is needed on every level we need not
+        // check the highest level since either
+        // - on the highest level no cells are flagged for refinement
+        // - there are, but prepare_refinement added another empty
+        // level which then is the highest level
+
+        // variables to hold the number of newly to be created
+        // vertices, lines and quads. as these are stored globally,
+        // declare them outside the loop over al levels. we need lines
+        // and quads in pairs for refinement of old ones and lines and
+        // quads, that can be stored as single ones, as they are newly
+        // created in the inside of an existing cell
+        unsigned int needed_vertices = 0;
+        unsigned int needed_lines_single  = 0;
+        unsigned int needed_quads_single  = 0;
+        unsigned int needed_lines_pair  = 0;
+        unsigned int needed_quads_pair  = 0;
+        for (int level=triangulation.levels.size()-2; level>=0; --level)
+          {
+            // count number of flagged cells on this level and compute
+            // how many new vertices and new lines will be needed
+            unsigned int new_cells = 0;
+
+            typename Triangulation<dim,spacedim>::active_cell_iterator
+            acell = triangulation.begin_active(level),
+            aendc = triangulation.begin_active(level+1);
+            for (; acell!=aendc; ++acell)
+              if (acell->refine_flag_set())
+                {
+                  RefinementCase<dim> ref_case=acell->refine_flag_set();
+
+                  // now for interior vertices, lines and quads, which
+                  // are needed in any case
+                  if (ref_case==RefinementCase<dim>::cut_x ||
+                      ref_case==RefinementCase<dim>::cut_y ||
+                      ref_case==RefinementCase<dim>::cut_z)
+                    {
+                      ++needed_quads_single;
+                      new_cells+=2;
+                      triangulation.anisotropic_refinement=true;
+                    }
+                  else if (ref_case==RefinementCase<dim>::cut_xy ||
+                           ref_case==RefinementCase<dim>::cut_xz ||
+                           ref_case==RefinementCase<dim>::cut_yz)
+                    {
+                      ++needed_lines_single;
+                      needed_quads_single += 4;
+                      new_cells+=4;
+                      triangulation.anisotropic_refinement=true;
+                    }
+                  else if  (ref_case==RefinementCase<dim>::cut_xyz)
+                    {
+                      ++needed_vertices;
+                      needed_lines_single += 6;
+                      needed_quads_single += 12;
+                      new_cells+=8;
+                    }
+                  else
+                    {
+                      // we should never get here
+                      Assert(false, ExcInternalError());
+                    }
+
+                  // mark all faces for refinement; checking locally
+                  // if and how the neighbor would like to refine
+                  // these is difficult so we only flag them and after
+                  // visiting all cells, we decide which faces need
+                  // which refinement;
+                  for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell;
+                       ++face)
+                    {
+                      typename Triangulation<dim,spacedim>::face_iterator
+                      aface = acell->face(face);
+                      // get the RefineCase this faces has for the
+                      // given RefineCase of the cell
+                      RefinementCase<dim-1> face_ref_case=
+                        GeometryInfo<dim>::face_refinement_case(ref_case,
+                                                                face,
+                                                                acell->face_orientation(face),
+                                                                acell->face_flip(face),
+                                                                acell->face_rotation(face));
+                      // only do something, if this face has to be
+                      // refined
+                      if (face_ref_case)
+                        {
+                          if (face_ref_case==RefinementCase<dim-1>::isotropic_refinement)
+                            {
+                              if (aface->number_of_children()<4)
+                                // we use user_flags to denote needed
+                                // isotropic refinement
+                                aface->set_user_flag();
+                            }
+                          else if (aface->refinement_case()!=face_ref_case)
+                            // we use user_indices to denote needed
+                            // anisotropic refinement. note, that we
+                            // can have at most one anisotropic
+                            // refinement case for this face, as
+                            // otherwise prepare_refinement() would
+                            // have changed one of the cells to yield
+                            // isotropic refinement at this
+                            // face. therefore we set the user_index
+                            // uniquely
+                            {
+                              Assert(aface->refinement_case()==RefinementCase<dim-1>::isotropic_refinement ||
+                                     aface->refinement_case()==RefinementCase<dim-1>::no_refinement,
+                                     ExcInternalError());
+                              aface->set_user_index(face_ref_case);
+                            }
+                        }
+                    }// for all faces
+
+                  // flag all lines, that have to be refined
+                  for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                    if (GeometryInfo<dim>::line_refinement_case(ref_case,line) &&
+                        !acell->line(line)->has_children())
+                      acell->line(line)->set_user_flag();
+
+                }// if refine_flag set and for all cells on this level
+
+
+            // count number of used cells on the next higher level
+            const unsigned int used_cells
+              = std::count_if (triangulation.levels[level+1]->cells.used.begin(),
+                               triangulation.levels[level+1]->cells.used.end(),
+                               std::bind2nd (std::equal_to<bool>(), true));
+
+
+            // reserve space for the used_cells cells already existing
+            // on the next higher level as well as for the
+            // 8*flagged_cells that will be created on that level
+            triangulation.levels[level+1]
+            ->reserve_space (used_cells+new_cells, 3, spacedim);
+            // reserve space for 8*flagged_cells new hexes on the next
+            // higher level
+            triangulation.levels[level+1]->cells.reserve_space (new_cells);
+          }// for all levels
+        // now count the quads and lines which were flagged for
+        // refinement
+        for (typename Triangulation<dim,spacedim>::quad_iterator
+             quad=triangulation.begin_quad(); quad!=triangulation.end_quad(); ++quad)
+          {
+            if (quad->user_flag_set())
+              {
+                // isotropic refinement: 1 interior vertex, 4 quads
+                // and 4 interior lines. we store the interior lines
+                // in pairs in case the face is already or will be
+                // refined anisotropically
+                needed_quads_pair += 4;
+                needed_lines_pair += 4;
+                needed_vertices += 1;
+              }
+            if (quad->user_index())
+              {
+                // anisotropic refinement: 1 interior
+                // line and two quads
+                needed_quads_pair += 2;
+                needed_lines_single += 1;
+                // there is a kind of complicated situation here which
+                // requires our attention. if the quad is refined
+                // isotropcally, two of the interior lines will get a
+                // new mother line - the interior line of our
+                // anisotropically refined quad. if those two lines
+                // are not consecutive, we cannot do so and have to
+                // replace them by two lines that are consecutive. we
+                // try to avoid that situation, but it may happen
+                // nevertheless throug repeated refinement and
+                // coarsening. thus we have to check here, as we will
+                // need some additional space to store those new lines
+                // in case we need them...
+                if (quad->has_children())
+                  {
+                    Assert(quad->refinement_case()==RefinementCase<dim-1>::isotropic_refinement, ExcInternalError());
+                    if ((face_refinement_cases[quad->user_index()]==RefinementCase<dim-1>::cut_x
+                         && (quad->child(0)->line_index(1)+1!=quad->child(2)->line_index(1))) ||
+                        (face_refinement_cases[quad->user_index()]==RefinementCase<dim-1>::cut_y
+                         && (quad->child(0)->line_index(3)+1!=quad->child(1)->line_index(3))))
+                      needed_lines_pair +=2;
+                  }
+              }
+          }
+
+        for (typename Triangulation<dim,spacedim>::line_iterator
+             line=triangulation.begin_line(); line!=triangulation.end_line(); ++line)
+          if (line->user_flag_set())
+            {
+              needed_lines_pair += 2;
+              needed_vertices += 1;
+            }
+
+        // reserve space for needed_lines new lines stored in pairs
+        triangulation.faces->lines.
+        reserve_space (needed_lines_pair,needed_lines_single);
+        // reserve space for needed_quads new quads stored in pairs
+        triangulation.faces->quads.
+        reserve_space (needed_quads_pair,needed_quads_single);
+
+
+        // add to needed vertices how many vertices are already in use
+        needed_vertices += std::count_if (triangulation.vertices_used.begin(), triangulation.vertices_used.end(),
+                                          std::bind2nd (std::equal_to<bool>(), true));
+        // if we need more vertices: create them, if not: leave the
+        // array as is, since shrinking is not really possible because
+        // some of the vertices at the end may be in use
+        if (needed_vertices > triangulation.vertices.size())
+          {
+            triangulation.vertices.resize (needed_vertices, Point<spacedim>());
+            triangulation.vertices_used.resize (needed_vertices, false);
+          }
+
+
+        ///////////////////////////////////////////
+        // Before we start with the actual refinement, we do some
+        // sanity checks if in debug mode. especially, we try to catch
+        // the notorious problem with lines being twice refined,
+        // i.e. there are cells adjacent at one line ("around the
+        // edge", but not at a face), with two cells differing by more
+        // than one refinement level
+        //
+        // this check is very simple to implement here, since we have
+        // all lines flagged if they shall be refined
+#ifdef DEBUG
+        for (typename Triangulation<dim,spacedim>::active_cell_iterator
+             cell=triangulation.begin_active(); cell!=triangulation.end(); ++cell)
+          if (!cell->refine_flag_set())
+            for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+              if (cell->line(line)->has_children())
+                for (unsigned int c=0; c<2; ++c)
+                  Assert (cell->line(line)->child(c)->user_flag_set() == false,
+                          ExcInternalError());
+#endif
+
+        ///////////////////////////////////////////
+        // Do refinement on every level
+        //
+        // To make life a bit easier, we first refine those lines and
+        // quads that were flagged for refinement and then compose the
+        // newly to be created cells.
+        //
+        // index of next unused vertex
+        unsigned int next_unused_vertex = 0;
+
+        // first for lines
+        if (true)
+          {
+            // only active objects can be refined further
+            typename Triangulation<dim,spacedim>::active_line_iterator
+            line = triangulation.begin_active_line(),
+            endl = triangulation.end_line();
+            typename Triangulation<dim,spacedim>::raw_line_iterator
+            next_unused_line = triangulation.begin_raw_line ();
+
+            for (; line!=endl; ++line)
+              if (line->user_flag_set())
+                {
+                  // this line needs to be refined
+
+                  // find the next unused vertex and set it
+                  // appropriately
+                  while (triangulation.vertices_used[next_unused_vertex] == true)
+                    ++next_unused_vertex;
+                  Assert (next_unused_vertex < triangulation.vertices.size(),
+                          ExcMessage("Internal error: During refinement, the triangulation wants to access an element of the 'vertices' array but it turns out that the array is not large enough."));
+                  triangulation.vertices_used[next_unused_vertex] = true;
+
+                  triangulation.vertices[next_unused_vertex]
+                    = line->center(true);
+
+                  // now that we created the right point, make up the
+                  // two child lines (++ takes care of the end of the
+                  // vector)
+                  next_unused_line=triangulation.faces->lines.next_free_pair_object(triangulation);
+                  Assert(next_unused_line.state() == IteratorState::valid,
+                         ExcInternalError());
+
+                  // now we found two consecutive unused lines, such
+                  // that the children of a line will be consecutive.
+                  // then set the child pointer of the present line
+                  line->set_children (0, next_unused_line->index());
+
+                  // set the two new lines
+                  const typename Triangulation<dim,spacedim>::raw_line_iterator
+                  children[2] = { next_unused_line,
+                                  ++next_unused_line
+                                };
+
+                  // some tests; if any of the iterators should be
+                  // invalid, then already dereferencing will fail
+                  Assert (children[0]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                  Assert (children[1]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                  children[0]->set (internal::Triangulation
+                                    ::TriaObject<1>(line->vertex_index(0),
+                                                    next_unused_vertex));
+                  children[1]->set (internal::Triangulation
+                                    ::TriaObject<1>(next_unused_vertex,
+                                                    line->vertex_index(1)));
+
+                  children[0]->set_used_flag();
+                  children[1]->set_used_flag();
+                  children[0]->clear_children();
+                  children[1]->clear_children();
+                  children[0]->clear_user_data();
+                  children[1]->clear_user_data();
+                  children[0]->clear_user_flag();
+                  children[1]->clear_user_flag();
+
+                  children[0]->set_boundary_id (line->boundary_id());
+                  children[1]->set_boundary_id (line->boundary_id());
+
+                  children[0]->set_manifold_id (line->manifold_id());
+                  children[1]->set_manifold_id (line->manifold_id());
+
+                  // finally clear flag
+                  // indicating the need
+                  // for refinement
+                  line->clear_user_flag ();
+                }
+          }
+
+
+        ///////////////////////////////////////
+        // now refine marked quads
+        ///////////////////////////////////////
+
+        // here we encounter several cases:
+
+        // a) the quad is unrefined and shall be refined isotropically
+
+        // b) the quad is unrefined and shall be refined
+        // anisotropically
+
+        // c) the quad is unrefined and shall be refined both
+        // anisotropically and isotropically (this is reduced to case
+        // b) and then case b) for the children again)
+
+        // d) the quad is refined anisotropically and shall be refined
+        // isotropically (this is reduced to case b) for the
+        // anisotropic children)
+
+        // e) the quad is refined isotropically and shall be refined
+        // anisotropically (this is transformed to case c), however we
+        // might have to renumber/rename children...)
+
+        // we need a loop in cases c) and d), as the anisotropic
+        // children migt have a lower index than the mother quad
+        for (unsigned int loop=0; loop<2; ++loop)
+          {
+            // usually, only active objects can be refined
+            // further. however, in cases d) and e) that is not true,
+            // so we have to use 'normal' iterators here
+            typename Triangulation<dim,spacedim>::quad_iterator
+            quad = triangulation.begin_quad(),
+            endq = triangulation.end_quad();
+            typename Triangulation<dim,spacedim>::raw_line_iterator
+            next_unused_line = triangulation.begin_raw_line ();
+            typename Triangulation<dim,spacedim>::raw_quad_iterator
+            next_unused_quad = triangulation.begin_raw_quad ();
+
+            for (; quad!=endq; ++quad)
+              {
+                if (quad->user_index())
+                  {
+                    RefinementCase<dim-1> aniso_quad_ref_case=face_refinement_cases[quad->user_index()];
+                    // there is one unlikely event here, where we
+                    // already have refind the face: if the face was
+                    // refined anisotropically and we want to refine
+                    // it isotropically, both children are flagged for
+                    // anisotropic refinement. however, if those
+                    // children were already flagged for anisotropic
+                    // refinement, they might already be processed and
+                    // refined.
+                    if (aniso_quad_ref_case == quad->refinement_case())
+                      continue;
+
+                    Assert(quad->refinement_case()==RefinementCase<dim-1>::cut_xy ||
+                           quad->refinement_case()==RefinementCase<dim-1>::no_refinement,
+                           ExcInternalError());
+
+                    // this quad needs to be refined anisotropically
+                    Assert(quad->user_index() == RefinementCase<dim-1>::cut_x ||
+                           quad->user_index() == RefinementCase<dim-1>::cut_y,
+                           ExcInternalError());
+
+                    // make the new line interior to the quad
+                    typename Triangulation<dim,spacedim>::raw_line_iterator new_line;
+
+                    new_line=triangulation.faces->lines.next_free_single_object(triangulation);
+                    Assert (new_line->used() == false,
+                            ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                    // first collect the
+                    // indices of the vertices:
+                    // *--1--*
+                    // |  |  |
+                    // |  |  |    cut_x
+                    // |  |  |
+                    // *--0--*
+                    //
+                    // *-----*
+                    // |     |
+                    // 0-----1    cut_y
+                    // |     |
+                    // *-----*
+                    unsigned int vertex_indices[2];
+                    if (aniso_quad_ref_case==RefinementCase<dim-1>::cut_x)
+                      {
+                        vertex_indices[0]=quad->line(2)->child(0)->vertex_index(1);
+                        vertex_indices[1]=quad->line(3)->child(0)->vertex_index(1);
+                      }
+                    else
+                      {
+                        vertex_indices[0]=quad->line(0)->child(0)->vertex_index(1);
+                        vertex_indices[1]=quad->line(1)->child(0)->vertex_index(1);
+                      }
+
+                    new_line->set (internal::Triangulation::
+                                   TriaObject<1>(vertex_indices[0], vertex_indices[1]));
+                    new_line->set_used_flag();
+                    new_line->clear_user_flag();
+                    new_line->clear_user_data();
+                    new_line->clear_children();
+                    new_line->set_boundary_id(quad->boundary_id());
+                    new_line->set_manifold_id(quad->manifold_id());
+
+                    // child 0 and 1 of a line are switched if the
+                    // line orientation is false. set up a miniature
+                    // table, indicating which child to take for line
+                    // orientations false and true. first index: child
+                    // index in standard orientation, second index:
+                    // line orientation
+                    const unsigned int index[2][2]=
+                    {
+                      {1,0},   // child 0, line_orientation=false and true
+                      {0,1}
+                    };  // child 1, line_orientation=false and true
+
+                    // find some space (consecutive) for the two newly
+                    // to be created quads.
+                    typename Triangulation<dim,spacedim>::raw_quad_iterator new_quads[2];
+
+                    next_unused_quad=triangulation.faces->quads.next_free_pair_object(triangulation);
+                    new_quads[0] = next_unused_quad;
+                    Assert (new_quads[0]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                    ++next_unused_quad;
+                    new_quads[1] = next_unused_quad;
+                    Assert (new_quads[1]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+
+                    if (aniso_quad_ref_case==RefinementCase<dim-1>::cut_x)
+                      {
+                        new_quads[0]->set (internal::Triangulation
+                                           ::TriaObject<2>(quad->line_index(0),
+                                                           new_line->index(),
+                                                           quad->line(2)->child(index[0][quad->line_orientation(2)])->index(),
+                                                           quad->line(3)->child(index[0][quad->line_orientation(3)])->index()));
+                        new_quads[1]->set (internal::Triangulation
+                                           ::TriaObject<2>(new_line->index(),
+                                                           quad->line_index(1),
+                                                           quad->line(2)->child(index[1][quad->line_orientation(2)])->index(),
+                                                           quad->line(3)->child(index[1][quad->line_orientation(3)])->index()));
+                      }
+                    else
+                      {
+                        new_quads[0]->set (internal::Triangulation
+                                           ::TriaObject<2>(quad->line(0)->child(index[0][quad->line_orientation(0)])->index(),
+                                                           quad->line(1)->child(index[0][quad->line_orientation(1)])->index(),
+                                                           quad->line_index(2),
+                                                           new_line->index()));
+                        new_quads[1]->set (internal::Triangulation
+                                           ::TriaObject<2>(quad->line(0)->child(index[1][quad->line_orientation(0)])->index(),
+                                                           quad->line(1)->child(index[1][quad->line_orientation(1)])->index(),
+                                                           new_line->index(),
+                                                           quad->line_index(3)));
+                      }
+
+                    for (unsigned int i=0; i<2; ++i)
+                      {
+                        new_quads[i]->set_used_flag();
+                        new_quads[i]->clear_user_flag();
+                        new_quads[i]->clear_user_data();
+                        new_quads[i]->clear_children();
+                        new_quads[i]->set_boundary_id (quad->boundary_id());
+                        new_quads[i]->set_manifold_id (quad->manifold_id());
+                        // set all line orientations to true, change
+                        // this after the loop, as we have to consider
+                        // different lines for each child
+                        for (unsigned int j=0; j<GeometryInfo<dim>::lines_per_face; ++j)
+                          new_quads[i]->set_line_orientation(j,true);
+                      }
+                    // now set the line orientation of children of
+                    // outer lines correctly, the lines in the
+                    // interior of the refined quad are automatically
+                    // oriented conforming to the standard
+                    new_quads[0]->set_line_orientation(0,quad->line_orientation(0));
+                    new_quads[0]->set_line_orientation(2,quad->line_orientation(2));
+                    new_quads[1]->set_line_orientation(1,quad->line_orientation(1));
+                    new_quads[1]->set_line_orientation(3,quad->line_orientation(3));
+                    if (aniso_quad_ref_case==RefinementCase<dim-1>::cut_x)
+                      {
+                        new_quads[0]->set_line_orientation(3,quad->line_orientation(3));
+                        new_quads[1]->set_line_orientation(2,quad->line_orientation(2));
+                      }
+                    else
+                      {
+                        new_quads[0]->set_line_orientation(1,quad->line_orientation(1));
+                        new_quads[1]->set_line_orientation(0,quad->line_orientation(0));
+                      }
+
+                    // test, whether this face is refined
+                    // isotropically already. if so, set the correct
+                    // children pointers.
+                    if (quad->refinement_case()==RefinementCase<dim-1>::cut_xy)
+                      {
+                        // we will put a new refinemnt level of
+                        // anisotropic refinement between the
+                        // unrefined and isotropically refined quad
+                        // ending up with the same fine quads but
+                        // introducing anisotropically refined ones as
+                        // children of the unrefined quad and mother
+                        // cells of the original fine ones.
+
+                        // this process includes the creation of a new
+                        // middle line which we will assign as the
+                        // mother line of two of the existing inner
+                        // lines. If those inner lines are not
+                        // consecutive in memory, we won't find them
+                        // later on, so we have to create new ones
+                        // instead and replace all occurrences of the
+                        // old ones with those new ones. As this is
+                        // kind of ugly, we hope we don't have to do
+                        // it often...
+                        typename Triangulation<dim,spacedim>::line_iterator old_child[2];
+                        if (aniso_quad_ref_case==RefinementCase<dim-1>::cut_x)
+                          {
+                            old_child[0]=quad->child(0)->line(1);
+                            old_child[1]=quad->child(2)->line(1);
+                          }
+                        else
+                          {
+                            Assert(aniso_quad_ref_case==RefinementCase<dim-1>::cut_y, ExcInternalError());
+
+                            old_child[0]=quad->child(0)->line(3);
+                            old_child[1]=quad->child(1)->line(3);
+                          }
+
+                        if (old_child[0]->index()+1 != old_child[1]->index())
+                          {
+                            // this is exactly the ugly case we taked
+                            // about. so, no coimplaining, lets get
+                            // two new lines and copy all info
+                            typename Triangulation<dim,spacedim>::raw_line_iterator new_child[2];
+
+                            new_child[0]=new_child[1]=triangulation.faces->lines.next_free_pair_object(triangulation);
+                            ++new_child[1];
+
+                            new_child[0]->set_used_flag();
+                            new_child[1]->set_used_flag();
+
+                            const int old_index_0=old_child[0]->index(),
+                                      old_index_1=old_child[1]->index(),
+                                      new_index_0=new_child[0]->index(),
+                                      new_index_1=new_child[1]->index();
+
+                            // loop over all quads and replace the old
+                            // lines
+                            for (unsigned int q=0; q<triangulation.faces->quads.cells.size(); ++q)
+                              for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_face; ++l)
+                                {
+                                  const int this_index=triangulation.faces->quads.cells[q].face(l);
+                                  if (this_index==old_index_0)
+                                    triangulation.faces->quads.cells[q].set_face(l,new_index_0);
+                                  else if (this_index==old_index_1)
+                                    triangulation.faces->quads.cells[q].set_face(l,new_index_1);
+                                }
+                            // now we have to copy all information of
+                            // the two lines
+                            for (unsigned int i=0; i<2; ++i)
+                              {
+                                Assert(!old_child[i]->has_children(), ExcInternalError());
+
+                                new_child[i]->set(internal::Triangulation::TriaObject<1>(old_child[i]->vertex_index(0),
+                                                                                         old_child[i]->vertex_index(1)));
+                                new_child[i]->set_boundary_id(old_child[i]->boundary_id());
+                                new_child[i]->set_manifold_id(old_child[i]->manifold_id());
+                                new_child[i]->set_user_index(old_child[i]->user_index());
+                                if (old_child[i]->user_flag_set())
+                                  new_child[i]->set_user_flag();
+                                else
+                                  new_child[i]->clear_user_flag();
+
+                                new_child[i]->clear_children();
+
+                                old_child[i]->clear_user_flag();
+                                old_child[i]->clear_user_index();
+                                old_child[i]->clear_used_flag();
+                              }
+                          }
+                        // now that we cared about the lines, go on
+                        // with the quads themselves, where we might
+                        // encounter similar situations...
+                        if (aniso_quad_ref_case==RefinementCase<dim-1>::cut_x)
+                          {
+                            new_line->set_children(0, quad->child(0)->line_index(1));
+                            Assert(new_line->child(1)==quad->child(2)->line(1),
+                                   ExcInternalError());
+                            // now evereything is quite
+                            // complicated. we have the children
+                            // numbered according to
+                            //
+                            // *---*---*
+                            // |n+2|n+3|
+                            // *---*---*
+                            // | n |n+1|
+                            // *---*---*
+                            //
+                            // from the original isotropic
+                            // refinement. we have to reorder them as
+                            //
+                            // *---*---*
+                            // |n+1|n+3|
+                            // *---*---*
+                            // | n |n+2|
+                            // *---*---*
+                            //
+                            // such that n and n+1 are consecutive
+                            // children of m and n+2 and n+3 are
+                            // consecutive children of m+1, where m
+                            // and m+1 are given as in
+                            //
+                            // *---*---*
+                            // |   |   |
+                            // | m |m+1|
+                            // |   |   |
+                            // *---*---*
+                            //
+                            // this is a bit ugly, of course: loop
+                            // over all cells on all levels and look
+                            // for faces n+1 (switch_1) and n+2
+                            // (switch_2).
+                            const typename Triangulation<dim,spacedim>::quad_iterator
+                            switch_1=quad->child(1),
+                            switch_2=quad->child(2);
+                            const int switch_1_index=switch_1->index();
+                            const int switch_2_index=switch_2->index();
+                            for (unsigned int l=0; l<triangulation.levels.size(); ++l)
+                              for (unsigned int h=0; h<triangulation.levels[l]->cells.cells.size(); ++h)
+                                for (unsigned int q=0; q<GeometryInfo<dim>::faces_per_cell; ++q)
+                                  {
+                                    const int face_index=triangulation.levels[l]->cells.cells[h].face(q);
+                                    if (face_index==switch_1_index)
+                                      triangulation.levels[l]->cells.cells[h].set_face(q,switch_2_index);
+                                    else if (face_index==switch_2_index)
+                                      triangulation.levels[l]->cells.cells[h].set_face(q,switch_1_index);
+                                  }
+                            // now we have to copy all information of
+                            // the two quads
+                            const unsigned int switch_1_lines[4]=
+                            {
+                              switch_1->line_index(0),
+                              switch_1->line_index(1),
+                              switch_1->line_index(2),
+                              switch_1->line_index(3)
+                            };
+                            const bool switch_1_line_orientations[4]=
+                            {
+                              switch_1->line_orientation(0),
+                              switch_1->line_orientation(1),
+                              switch_1->line_orientation(2),
+                              switch_1->line_orientation(3)
+                            };
+                            const types::boundary_id switch_1_boundary_id=switch_1->boundary_id();
+                            const unsigned int switch_1_user_index=switch_1->user_index();
+                            const bool switch_1_user_flag=switch_1->user_flag_set();
+                            const RefinementCase<dim-1> switch_1_refinement_case=switch_1->refinement_case();
+                            const int switch_1_first_child_pair=(switch_1_refinement_case ? switch_1->child_index(0) : -1);
+                            const int switch_1_second_child_pair=(switch_1_refinement_case==RefinementCase<dim-1>::cut_xy ? switch_1->child_index(2) : -1);
+
+                            switch_1->set(internal::Triangulation::TriaObject<2>(switch_2->line_index(0),
+                                                                                 switch_2->line_index(1),
+                                                                                 switch_2->line_index(2),
+                                                                                 switch_2->line_index(3)));
+                            switch_1->set_line_orientation(0, switch_2->line_orientation(0));
+                            switch_1->set_line_orientation(1, switch_2->line_orientation(1));
+                            switch_1->set_line_orientation(2, switch_2->line_orientation(2));
+                            switch_1->set_line_orientation(3, switch_2->line_orientation(3));
+                            switch_1->set_boundary_id(switch_2->boundary_id());
+                            switch_1->set_manifold_id(switch_2->manifold_id());
+                            switch_1->set_user_index(switch_2->user_index());
+                            if (switch_2->user_flag_set())
+                              switch_1->set_user_flag();
+                            else
+                              switch_1->clear_user_flag();
+                            switch_1->clear_refinement_case();
+                            switch_1->set_refinement_case(switch_2->refinement_case());
+                            switch_1->clear_children();
+                            if (switch_2->refinement_case())
+                              switch_1->set_children(0, switch_2->child_index(0));
+                            if (switch_2->refinement_case()==RefinementCase<dim-1>::cut_xy)
+                              switch_1->set_children(2, switch_2->child_index(2));
+
+                            switch_2->set(internal::Triangulation::TriaObject<2>(switch_1_lines[0],
+                                                                                 switch_1_lines[1],
+                                                                                 switch_1_lines[2],
+                                                                                 switch_1_lines[3]));
+                            switch_2->set_line_orientation(0, switch_1_line_orientations[0]);
+                            switch_2->set_line_orientation(1, switch_1_line_orientations[1]);
+                            switch_2->set_line_orientation(2, switch_1_line_orientations[2]);
+                            switch_2->set_line_orientation(3, switch_1_line_orientations[3]);
+                            switch_2->set_boundary_id(switch_1_boundary_id);
+                            switch_2->set_manifold_id(switch_1->manifold_id());
+                            switch_2->set_user_index(switch_1_user_index);
+                            if (switch_1_user_flag)
+                              switch_2->set_user_flag();
+                            else
+                              switch_2->clear_user_flag();
+                            switch_2->clear_refinement_case();
+                            switch_2->set_refinement_case(switch_1_refinement_case);
+                            switch_2->clear_children();
+                            switch_2->set_children(0, switch_1_first_child_pair);
+                            switch_2->set_children(2, switch_1_second_child_pair);
+
+                            new_quads[0]->set_refinement_case(RefinementCase<2>::cut_y);
+                            new_quads[0]->set_children(0, quad->child_index(0));
+                            new_quads[1]->set_refinement_case(RefinementCase<2>::cut_y);
+                            new_quads[1]->set_children(0, quad->child_index(2));
+                          }
+                        else
+                          {
+                            new_quads[0]->set_refinement_case(RefinementCase<2>::cut_x);
+                            new_quads[0]->set_children(0, quad->child_index(0));
+                            new_quads[1]->set_refinement_case(RefinementCase<2>::cut_x);
+                            new_quads[1]->set_children(0, quad->child_index(2));
+                            new_line->set_children(0, quad->child(0)->line_index(3));
+                            Assert(new_line->child(1)==quad->child(1)->line(3),
+                                   ExcInternalError());
+                          }
+                        quad->clear_children();
+                      }
+
+                    // note these quads as children to the present one
+                    quad->set_children (0, new_quads[0]->index());
+
+                    quad->set_refinement_case(aniso_quad_ref_case);
+
+                    // finally clear flag indicating the need for
+                    // refinement
+                    quad->clear_user_data ();
+                  } // if (anisotropic refinement)
+
+                if (quad->user_flag_set())
+                  {
+                    // this quad needs to be refined isotropically
+
+                    // first of all: we only get here in the first run
+                    // of the loop
+                    Assert(loop==0,ExcInternalError());
+
+                    // find the next unused vertex. we'll need this in
+                    // any case
+                    while (triangulation.vertices_used[next_unused_vertex] == true)
+                      ++next_unused_vertex;
+                    Assert (next_unused_vertex < triangulation.vertices.size(),
+                            ExcMessage("Internal error: During refinement, the triangulation wants to access an element of the 'vertices' array but it turns out that the array is not large enough."));
+
+                    // now: if the quad is refined anisotropically
+                    // already, set the anisotropic refinement flag
+                    // for both children. Additionally, we have to
+                    // refine the inner line, as it is an outer line
+                    // of the two (anisotropic) children
+                    const RefinementCase<dim-1> quad_ref_case=quad->refinement_case();
+
+                    if (quad_ref_case==RefinementCase<dim-1>::cut_x ||
+                        quad_ref_case==RefinementCase<dim-1>::cut_y)
+                      {
+                        // set the 'opposite' refine case for children
+                        quad->child(0)->set_user_index(RefinementCase<dim-1>::cut_xy-quad_ref_case);
+                        quad->child(1)->set_user_index(RefinementCase<dim-1>::cut_xy-quad_ref_case);
+                        // refine the inner line
+                        typename Triangulation<dim,spacedim>::line_iterator middle_line;
+                        if (quad_ref_case==RefinementCase<dim-1>::cut_x)
+                          middle_line=quad->child(0)->line(1);
+                        else
+                          middle_line=quad->child(0)->line(3);
+
+                        // if the face has been refined
+                        // anisotropically in the last refinement step
+                        // it might be, that it is flagged already and
+                        // that the middle line is thus refined
+                        // already. if not create children.
+                        if (!middle_line->has_children())
+                          {
+                            // set the middle vertex
+                            // appropriately. double refinement of
+                            // quads can only happen in the interior
+                            // of the domain, so we need not care
+                            // about boundary quads here
+                            triangulation.vertices[next_unused_vertex]
+                              = middle_line->center(true);
+                            triangulation.vertices_used[next_unused_vertex] = true;
+
+                            // now search a slot for the two
+                            // child lines
+                            next_unused_line=triangulation.faces->lines.next_free_pair_object(triangulation);
+
+                            // set the child pointer of the present
+                            // line
+                            middle_line->set_children (0, next_unused_line->index());
+
+                            // set the two new lines
+                            const typename Triangulation<dim,spacedim>::raw_line_iterator
+                            children[2] = { next_unused_line,
+                                            ++next_unused_line
+                                          };
+
+                            // some tests; if any of the iterators
+                            // should be invalid, then already
+                            // dereferencing will fail
+                            Assert (children[0]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                            Assert (children[1]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                            children[0]->set (internal::Triangulation::
+                                              TriaObject<1>(middle_line->vertex_index(0),
+                                                            next_unused_vertex));
+                            children[1]->set (internal::Triangulation::
+                                              TriaObject<1>(next_unused_vertex,
+                                                            middle_line->vertex_index(1)));
+
+                            children[0]->set_used_flag();
+                            children[1]->set_used_flag();
+                            children[0]->clear_children();
+                            children[1]->clear_children();
+                            children[0]->clear_user_data();
+                            children[1]->clear_user_data();
+                            children[0]->clear_user_flag();
+                            children[1]->clear_user_flag();
+
+                            children[0]->set_boundary_id (middle_line->boundary_id());
+                            children[1]->set_boundary_id (middle_line->boundary_id());
+
+                            children[0]->set_manifold_id (middle_line->manifold_id());
+                            children[1]->set_manifold_id (middle_line->manifold_id());
+                          }
+                        // now remove the flag from the quad and go to
+                        // the next quad, the actual refinement of the
+                        // quad takes place later on in this pass of
+                        // the loop or in the next one
+                        quad->clear_user_flag();
+                        continue;
+                      } // if (several refinement cases)
+
+                    // if we got here, we have an unrefined quad and
+                    // have to do the usual work like in an purely
+                    // isotropic refinement
+                    Assert(quad_ref_case==RefinementCase<dim-1>::no_refinement, ExcInternalError());
+
+                    // set the middle vertex
+                    // appropriately
+                    if (quad->at_boundary() ||
+                        (quad->manifold_id() != numbers::invalid_manifold_id) )
+                      triangulation.vertices[next_unused_vertex]
+                        = quad->center(true);
+                    else
+                      {
+                        // it might be that the quad itself is not at
+                        // the boundary, but that one of its lines
+                        // actually is. in this case, the newly
+                        // created vertices at the centers of the
+                        // lines are not necessarily the mean values
+                        // of the adjacent vertices, so do not compute
+                        // the new vertex as the mean value of the 4
+                        // vertices of the face, but rather as a
+                        // weighted mean value of the 8 vertices which
+                        // we already have (the four old ones, and the
+                        // four ones inserted as middle points for the
+                        // four lines). summing up some more points is
+                        // generally cheaper than first asking whether
+                        // one of the lines is at the boundary
+                        //
+                        // note that the exact weights are chosen such
+                        // as to minimize the distortion of the four
+                        // new quads from the optimal shape; their
+                        // derivation and values is copied over from
+                        // the @p{MappingQ::set_laplace_on_vector}
+                        // function
+                        triangulation.vertices[next_unused_vertex] =
+                          quad->center(true, true);
+                      }
+                    triangulation.vertices_used[next_unused_vertex] = true;
+                    // now that we created the right point, make up
+                    // the four lines interior to the quad (++ takes
+                    // care of the end of the vector)
+                    typename Triangulation<dim,spacedim>::raw_line_iterator new_lines[4];
+
+                    for (unsigned int i=0; i<4; ++i)
+                      {
+                        if (i%2==0)
+                          // search a free pair of lines for 0. and
+                          // 2. line, so that two of them end up
+                          // together, which is necessary if later on
+                          // we want to refine the quad
+                          // anisotropically and the two lines end up
+                          // as children of new line
+                          next_unused_line=triangulation.faces->lines.next_free_pair_object(triangulation);
+
+                        new_lines[i] = next_unused_line;
+                        ++next_unused_line;
+
+                        Assert (new_lines[i]->used() == false,
+                                ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                      }
+
+                    // set the data of the four lines.  first collect
+                    // the indices of the five vertices:
+                    //
+                    // *--3--*
+                    // |  |  |
+                    // 0--4--1
+                    // |  |  |
+                    // *--2--*
+                    //
+                    // the lines are numbered as follows:
+                    //
+                    // *--*--*
+                    // |  1  |
+                    // *2-*-3*
+                    // |  0  |
+                    // *--*--*
+
+                    const unsigned int vertex_indices[5]
+                      = { quad->line(0)->child(0)->vertex_index(1),
+                          quad->line(1)->child(0)->vertex_index(1),
+                          quad->line(2)->child(0)->vertex_index(1),
+                          quad->line(3)->child(0)->vertex_index(1),
+                          next_unused_vertex
+                        };
+
+                    new_lines[0]->set (internal::Triangulation::
+                                       TriaObject<1>(vertex_indices[2], vertex_indices[4]));
+                    new_lines[1]->set (internal::Triangulation::
+                                       TriaObject<1>(vertex_indices[4], vertex_indices[3]));
+                    new_lines[2]->set (internal::Triangulation::
+                                       TriaObject<1>(vertex_indices[0], vertex_indices[4]));
+                    new_lines[3]->set (internal::Triangulation::
+                                       TriaObject<1>(vertex_indices[4], vertex_indices[1]));
+
+                    for (unsigned int i=0; i<4; ++i)
+                      {
+                        new_lines[i]->set_used_flag();
+                        new_lines[i]->clear_user_flag();
+                        new_lines[i]->clear_user_data();
+                        new_lines[i]->clear_children();
+                        new_lines[i]->set_boundary_id(quad->boundary_id());
+                        new_lines[i]->set_manifold_id(quad->manifold_id());
+                      }
+
+                    // now for the quads. again, first collect some
+                    // data about the indices of the lines, with the
+                    // following numbering:
+                    //
+                    //   .-6-.-7-.
+                    //   1   9   3
+                    //   .-10.11-.
+                    //   0   8   2
+                    //   .-4-.-5-.
+
+                    // child 0 and 1 of a line are switched if the
+                    // line orientation is false. set up a miniature
+                    // table, indicating which child to take for line
+                    // orientations false and true. first index: child
+                    // index in standard orientation, second index:
+                    // line orientation
+                    const unsigned int index[2][2]=
+                    {
+                      {1,0},   // child 0, line_orientation=false and true
+                      {0,1}
+                    };  // child 1, line_orientation=false and true
+
+                    const int line_indices[12]
+                      = { quad->line(0)->child(index[0][quad->line_orientation(0)])->index(),
+                          quad->line(0)->child(index[1][quad->line_orientation(0)])->index(),
+                          quad->line(1)->child(index[0][quad->line_orientation(1)])->index(),
+                          quad->line(1)->child(index[1][quad->line_orientation(1)])->index(),
+                          quad->line(2)->child(index[0][quad->line_orientation(2)])->index(),
+                          quad->line(2)->child(index[1][quad->line_orientation(2)])->index(),
+                          quad->line(3)->child(index[0][quad->line_orientation(3)])->index(),
+                          quad->line(3)->child(index[1][quad->line_orientation(3)])->index(),
+                          new_lines[0]->index(),
+                          new_lines[1]->index(),
+                          new_lines[2]->index(),
+                          new_lines[3]->index()
+                        };
+
+                    // find some space (consecutive)
+                    // for the first two newly to be
+                    // created quads.
+                    typename Triangulation<dim,spacedim>::raw_quad_iterator new_quads[4];
+
+                    next_unused_quad=triangulation.faces->quads.next_free_pair_object(triangulation);
+
+                    new_quads[0] = next_unused_quad;
+                    Assert (new_quads[0]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                    ++next_unused_quad;
+                    new_quads[1] = next_unused_quad;
+                    Assert (new_quads[1]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                    next_unused_quad=triangulation.faces->quads.next_free_pair_object(triangulation);
+                    new_quads[2] = next_unused_quad;
+                    Assert (new_quads[2]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                    ++next_unused_quad;
+                    new_quads[3] = next_unused_quad;
+                    Assert (new_quads[3]->used() == false, ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+
+                    // note these quads as children to the present one
+                    quad->set_children (0, new_quads[0]->index());
+                    quad->set_children (2, new_quads[2]->index());
+                    new_quads[0]->set (internal::Triangulation
+                                       ::TriaObject<2>(line_indices[0],
+                                                       line_indices[8],
+                                                       line_indices[4],
+                                                       line_indices[10]));
+
+                    quad->set_refinement_case(RefinementCase<2>::cut_xy);
+
+                    new_quads[0]->set (internal::Triangulation
+                                       ::TriaObject<2>(line_indices[0],
+                                                       line_indices[8],
+                                                       line_indices[4],
+                                                       line_indices[10]));
+                    new_quads[1]->set (internal::Triangulation
+                                       ::TriaObject<2>(line_indices[8],
+                                                       line_indices[2],
+                                                       line_indices[5],
+                                                       line_indices[11]));
+                    new_quads[2]->set (internal::Triangulation
+                                       ::TriaObject<2>(line_indices[1],
+                                                       line_indices[9],
+                                                       line_indices[10],
+                                                       line_indices[6]));
+                    new_quads[3]->set (internal::Triangulation
+                                       ::TriaObject<2>(line_indices[9],
+                                                       line_indices[3],
+                                                       line_indices[11],
+                                                       line_indices[7]));
+                    for (unsigned int i=0; i<4; ++i)
+                      {
+                        new_quads[i]->set_used_flag();
+                        new_quads[i]->clear_user_flag();
+                        new_quads[i]->clear_user_data();
+                        new_quads[i]->clear_children();
+                        new_quads[i]->set_boundary_id (quad->boundary_id());
+                        new_quads[i]->set_manifold_id (quad->manifold_id());
+                        // set all line orientations to true, change
+                        // this after the loop, as we have to consider
+                        // different lines for each child
+                        for (unsigned int j=0; j<GeometryInfo<dim>::lines_per_face; ++j)
+                          new_quads[i]->set_line_orientation(j,true);
+                      }
+                    // now set the line orientation of children of
+                    // outer lines correctly, the lines in the
+                    // interior of the refined quad are automatically
+                    // oriented conforming to the standard
+                    new_quads[0]->set_line_orientation(0,quad->line_orientation(0));
+                    new_quads[0]->set_line_orientation(2,quad->line_orientation(2));
+                    new_quads[1]->set_line_orientation(1,quad->line_orientation(1));
+                    new_quads[1]->set_line_orientation(2,quad->line_orientation(2));
+                    new_quads[2]->set_line_orientation(0,quad->line_orientation(0));
+                    new_quads[2]->set_line_orientation(3,quad->line_orientation(3));
+                    new_quads[3]->set_line_orientation(1,quad->line_orientation(1));
+                    new_quads[3]->set_line_orientation(3,quad->line_orientation(3));
+
+                    // finally clear flag indicating the need for
+                    // refinement
+                    quad->clear_user_flag ();
+                  } // if (isotropic refinement)
+              } // for all quads
+          } // looped two times over all quads, all quads refined now
+
+        ///////////////////////////////////
+        // Now, finally, set up the new
+        // cells
+        ///////////////////////////////////
+
+        typename Triangulation<3,spacedim>::DistortedCellList
+        cells_with_distorted_children;
+
+        for (unsigned int level=0; level!=triangulation.levels.size()-1; ++level)
+          {
+            // only active objects can be refined further; remember
+            // that we won't operate on the finest level, so
+            // triangulation.begin_*(level+1) is allowed
+            typename Triangulation<dim,spacedim>::active_hex_iterator
+            hex  = triangulation.begin_active_hex(level),
+            endh = triangulation.begin_active_hex(level+1);
+            typename Triangulation<dim,spacedim>::raw_hex_iterator
+            next_unused_hex  = triangulation.begin_raw_hex (level+1);
+
+            for (; hex!=endh; ++hex)
+              if (hex->refine_flag_set())
+                {
+                  // this hex needs to be refined
+
+                  // clear flag indicating the need for refinement. do
+                  // it here already, since we can't do it anymore
+                  // once the cell has children
+                  const RefinementCase<dim> ref_case=hex->refine_flag_set();
+                  hex->clear_refine_flag ();
+                  hex->set_refinement_case(ref_case);
+
+                  // depending on the refine case we might have to
+                  // create additional vertices, lines and quads
+                  // interior of the hex before the actual children
+                  // can be set up.
+
+                  // in a first step: reserve the needed space for
+                  // lines, quads and hexes and initialize them
+                  // correctly
+
+                  unsigned int n_new_lines=0;
+                  unsigned int n_new_quads=0;
+                  unsigned int n_new_hexes=0;
+                  switch (ref_case)
+                    {
+                    case RefinementCase<dim>::cut_x:
+                    case RefinementCase<dim>::cut_y:
+                    case RefinementCase<dim>::cut_z:
+                      n_new_lines=0;
+                      n_new_quads=1;
+                      n_new_hexes=2;
+                      break;
+                    case RefinementCase<dim>::cut_xy:
+                    case RefinementCase<dim>::cut_xz:
+                    case RefinementCase<dim>::cut_yz:
+                      n_new_lines=1;
+                      n_new_quads=4;
+                      n_new_hexes=4;
+                      break;
+                    case RefinementCase<dim>::cut_xyz:
+                      n_new_lines=6;
+                      n_new_quads=12;
+                      n_new_hexes=8;
+                      break;
+                    default:
+                      Assert(false, ExcInternalError());
+                      break;
+                    }
+
+                  // find some space for the newly to be created
+                  // interior lines and initialize them.
+                  std::vector<typename Triangulation<dim,spacedim>::raw_line_iterator>
+                  new_lines(n_new_lines);
+                  for (unsigned int i=0; i<n_new_lines; ++i)
+                    {
+                      new_lines[i] = triangulation.faces->lines.next_free_single_object(triangulation);
+
+                      Assert (new_lines[i]->used() == false,
+                              ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                      new_lines[i]->set_used_flag();
+                      new_lines[i]->clear_user_flag();
+                      new_lines[i]->clear_user_data();
+                      new_lines[i]->clear_children();
+                      // interior line
+                      new_lines[i]->set_boundary_id(numbers::internal_face_boundary_id);
+                      // they inherit geometry description of the hex they belong to
+                      new_lines[i]->set_manifold_id(hex->manifold_id());
+                    }
+
+                  // find some space for the newly to be created
+                  // interior quads and initialize them.
+                  std::vector<typename Triangulation<dim,spacedim>::raw_quad_iterator>
+                  new_quads(n_new_quads);
+                  for (unsigned int i=0; i<n_new_quads; ++i)
+                    {
+                      new_quads[i] = triangulation.faces->quads.next_free_single_object(triangulation);
+
+                      Assert (new_quads[i]->used() == false,
+                              ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                      new_quads[i]->set_used_flag();
+                      new_quads[i]->clear_user_flag();
+                      new_quads[i]->clear_user_data();
+                      new_quads[i]->clear_children();
+                      // interior quad
+                      new_quads[i]->set_boundary_id (numbers::internal_face_boundary_id);
+                      // they inherit geometry description of the hex they belong to
+                      new_quads[i]->set_manifold_id (hex->manifold_id());
+                      // set all line orientation flags to true by
+                      // default, change this afterwards, if necessary
+                      for (unsigned int j=0; j<GeometryInfo<dim>::lines_per_face; ++j)
+                        new_quads[i]->set_line_orientation(j,true);
+                    }
+
+                  types::subdomain_id subdomainid = hex->subdomain_id();
+
+                  // find some space for the newly to be created hexes
+                  // and initialize them.
+                  std::vector<typename Triangulation<dim,spacedim>::raw_hex_iterator>
+                  new_hexes(n_new_hexes);
+                  for (unsigned int i=0; i<n_new_hexes; ++i)
+                    {
+                      if (i%2==0)
+                        next_unused_hex=triangulation.levels[level+1]->cells.next_free_hex(triangulation,level+1);
+                      else
+                        ++next_unused_hex;
+
+                      new_hexes[i]=next_unused_hex;
+
+                      Assert (new_hexes[i]->used() == false,
+                              ExcMessage("Internal error: We want to use a cell during refinement that should be unused, but turns out not to be."));
+                      new_hexes[i]->set_used_flag();
+                      new_hexes[i]->clear_user_flag();
+                      new_hexes[i]->clear_user_data();
+                      new_hexes[i]->clear_children();
+                      // inherit material
+                      // properties
+                      new_hexes[i]->set_material_id (hex->material_id());
+                      new_hexes[i]->set_manifold_id (hex->manifold_id());
+                      new_hexes[i]->set_subdomain_id (subdomainid);
+
+                      if (i%2)
+                        new_hexes[i]->set_parent (hex->index ());
+                      // set the face_orientation flag to true for all
+                      // faces initially, as this is the default value
+                      // which is true for all faces interior to the
+                      // hex. later on go the other way round and
+                      // reset faces that are at the boundary of the
+                      // mother cube
+                      //
+                      // the same is true for the face_flip and
+                      // face_rotation flags. however, the latter two
+                      // are set to false by default as this is the
+                      // standard value
+                      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+                        {
+                          new_hexes[i]->set_face_orientation(f, true);
+                          new_hexes[i]->set_face_flip(f, false);
+                          new_hexes[i]->set_face_rotation(f, false);
+                        }
+                    }
+                  // note these hexes as children to the present cell
+                  for (unsigned int i=0; i<n_new_hexes/2; ++i)
+                    hex->set_children (2*i, new_hexes[2*i]->index());
+
+                  // we have to take into account whether the
+                  // different faces are oriented correctly or in the
+                  // opposite direction, so store that up front
+
+                  // face_orientation
+                  const bool f_or[6]
+                    = { hex->face_orientation (0),
+                        hex->face_orientation (1),
+                        hex->face_orientation (2),
+                        hex->face_orientation (3),
+                        hex->face_orientation (4),
+                        hex->face_orientation (5)
+                      };
+
+                  // face_flip
+                  const bool f_fl[6]
+                    = { hex->face_flip (0),
+                        hex->face_flip (1),
+                        hex->face_flip (2),
+                        hex->face_flip (3),
+                        hex->face_flip (4),
+                        hex->face_flip (5)
+                      };
+
+                  // face_rotation
+                  const bool f_ro[6]
+                    = { hex->face_rotation (0),
+                        hex->face_rotation (1),
+                        hex->face_rotation (2),
+                        hex->face_rotation (3),
+                        hex->face_rotation (4),
+                        hex->face_rotation (5)
+                      };
+
+                  // some commonly used fields which
+                  // have varying size
+                  const unsigned int *vertex_indices=0;
+                  const typename Triangulation<dim,spacedim>::raw_line_iterator
+                  *lines=0;
+                  const unsigned int *line_indices=0;
+                  const bool *line_orientation=0;
+                  const int *quad_indices=0;
+
+                  // little helper table, indicating, whether the
+                  // child with index 0 or with index 1 can be found
+                  // at the standard origin of an anisotropically
+                  // refined quads in real orientation index 1:
+                  // (RefineCase - 1) index 2: face_flip
+
+                  // index 3: face rotation
+                  // note: face orientation has no influence
+                  const unsigned int child_at_origin[2][2][2]=
+                  {
+                    { { 0, 0 },  // RefinementCase<dim>::cut_x, face_flip=false, face_rotation=false and true
+                      { 1, 1 }
+                    }, // RefinementCase<dim>::cut_x, face_flip=true,  face_rotation=false and true
+                    { { 0, 1 },  // RefinementCase<dim>::cut_y, face_flip=false, face_rotation=false and true
+                      { 1, 0 }
+                    }
+                  };// RefinementCase<dim>::cut_y, face_flip=true,  face_rotation=false and true
+
+                  ///////////////////////////////////////
+                  //
+                  // in the following we will do the same thing for
+                  // each refinement case: create a new vertex (if
+                  // needed), create new interior lines (if needed),
+                  // create new interior quads and afterwards build
+                  // the children hexes out of these and the existing
+                  // subfaces of the outer quads (which have been
+                  // created above). However, even if the steps are
+                  // quite similar, the actual work strongly depends
+                  // on the actual refinement case. therefore, we use
+                  // separate blocks of code for each of these cases,
+                  // which hopefully increases the readability to some
+                  // extend.
+
+                  switch (ref_case)
+                    {
+                    case RefinementCase<dim>::cut_x:
+                    {
+                      //////////////////////////////
+                      //
+                      //     RefinementCase<dim>::cut_x
+                      //
+                      // the refined cube will look
+                      // like this:
+                      //
+                      //        *----*----*
+                      //       /    /    /|
+                      //      /    /    / |
+                      //     /    /    /  |
+                      //    *----*----*   |
+                      //    |    |    |   |
+                      //    |    |    |   *
+                      //    |    |    |  /
+                      //    |    |    | /
+                      //    |    |    |/
+                      //    *----*----*
+                      //
+                      // again, first collect some data about the
+                      // indices of the lines, with the following
+                      // numbering:
+
+                      // face 2: front plane
+                      //   (note: x,y exchanged)
+                      //   *---*---*
+                      //   |   |   |
+                      //   |   0   |
+                      //   |   |   |
+                      //   *---*---*
+                      //       m0
+                      // face 3: back plane
+                      //   (note: x,y exchanged)
+                      //       m1
+                      //   *---*---*
+                      //   |   |   |
+                      //   |   1   |
+                      //   |   |   |
+                      //   *---*---*
+                      // face 4: bottom plane
+                      //       *---*---*
+                      //      /   /   /
+                      //     /   2   /
+                      //    /   /   /
+                      //   *---*---*
+                      //       m0
+                      // face 5: top plane
+                      //           m1
+                      //       *---*---*
+                      //      /   /   /
+                      //     /   3   /
+                      //    /   /   /
+                      //   *---*---*
+
+                      // set up a list of line iterators first. from
+                      // this, construct lists of line_indices and
+                      // line orientations later on
+                      const typename Triangulation<dim,spacedim>::raw_line_iterator
+                      lines_x[4]
+                      =
+                      {
+                        hex->face(2)->child(0)
+                        ->line((hex->face(2)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //0
+                        hex->face(3)->child(0)
+                        ->line((hex->face(3)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //1
+                        hex->face(4)->child(0)
+                        ->line((hex->face(4)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //2
+                        hex->face(5)->child(0)
+                        ->line((hex->face(5)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3)         //3
+                      };
+
+                      lines=&lines_x[0];
+
+                      unsigned int line_indices_x[4];
+
+                      for (unsigned int i=0; i<4; ++i)
+                        line_indices_x[i]=lines[i]->index();
+                      line_indices=&line_indices_x[0];
+
+                      // the orientation of lines for the inner quads
+                      // is quite tricky. as these lines are newly
+                      // created ones and thus have no parents, they
+                      // cannot inherit this property. set up an array
+                      // and fill it with the respective values
+                      bool line_orientation_x[4];
+
+                      // the middle vertice marked as m0 above is the
+                      // start vertex for lines 0 and 2 in standard
+                      // orientation, whereas m1 is the end vertex of
+                      // lines 1 and 3 in standard orientation
+                      const unsigned int middle_vertices[2]=
+                      {
+                        hex->line(2)->child(0)->vertex_index(1),
+                        hex->line(7)->child(0)->vertex_index(1)
+                      };
+
+                      for (unsigned int i=0; i<4; ++i)
+                        if (lines[i]->vertex_index(i%2)==middle_vertices[i%2])
+                          line_orientation_x[i]=true;
+                        else
+                          {
+                            // it must be the other
+                            // way round then
+                            Assert(lines[i]->vertex_index((i+1)%2)==middle_vertices[i%2],
+                                   ExcInternalError());
+                            line_orientation_x[i]=false;
+                          }
+
+                      line_orientation=&line_orientation_x[0];
+
+                      // set up the new quad, line numbering is as
+                      // indicated above
+                      new_quads[0]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[0],
+                                                         line_indices[1],
+                                                         line_indices[2],
+                                                         line_indices[3]));
+
+                      new_quads[0]->set_line_orientation(0,line_orientation[0]);
+                      new_quads[0]->set_line_orientation(1,line_orientation[1]);
+                      new_quads[0]->set_line_orientation(2,line_orientation[2]);
+                      new_quads[0]->set_line_orientation(3,line_orientation[3]);
+
+                      // the quads are numbered as follows:
+                      //
+                      // planes in the interior of the old hex:
+                      //
+                      //      *
+                      //     /|
+                      //    / | x
+                      //   /  | *-------*      *---------*
+                      //  *   | |       |     /         /
+                      //  | 0 | |       |    /         /
+                      //  |   * |       |   /         /
+                      //  |  /  *-------*y *---------*x
+                      //  | /
+                      //  |/
+                      //  *
+                      //
+                      // children of the faces of the old hex
+                      //
+                      //      *---*---*        *---*---*
+                      //     /|   |   |       /   /   /|
+                      //    / |   |   |      / 9 / 10/ |
+                      //   /  | 5 | 6 |     /   /   /  |
+                      //  *   |   |   |    *---*---*   |
+                      //  | 1 *---*---*    |   |   | 2 *
+                      //  |  /   /   /     |   |   |  /
+                      //  | / 7 / 8 /      | 3 | 4 | /
+                      //  |/   /   /       |   |   |/
+                      //  *---*---*        *---*---*
+                      //
+                      // note that we have to take care of the
+                      // orientation of faces.
+                      const int quad_indices_x[11]
+                      =
+                      {
+                        new_quads[0]->index(),     //0
+
+                        hex->face(0)->index(),     //1
+
+                        hex->face(1)->index(),     //2
+
+                        hex->face(2)->child_index(  child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),  //3
+                        hex->face(2)->child_index(1-child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),
+
+                        hex->face(3)->child_index(  child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),  //5
+                        hex->face(3)->child_index(1-child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),
+
+                        hex->face(4)->child_index(  child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),  //7
+                        hex->face(4)->child_index(1-child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),
+
+                        hex->face(5)->child_index(  child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]]),  //9
+                        hex->face(5)->child_index(1-child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]])
+
+                      };
+                      quad_indices=&quad_indices_x[0];
+
+                      new_hexes[0]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[1],
+                                                         quad_indices[0],
+                                                         quad_indices[3],
+                                                         quad_indices[5],
+                                                         quad_indices[7],
+                                                         quad_indices[9]));
+                      new_hexes[1]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[0],
+                                                         quad_indices[2],
+                                                         quad_indices[4],
+                                                         quad_indices[6],
+                                                         quad_indices[8],
+                                                         quad_indices[10]));
+                      break;
+                    }
+                    case RefinementCase<dim>::cut_y:
+                    {
+                      //////////////////////////////
+                      //
+                      //     RefinementCase<dim>::cut_y
+                      //
+                      // the refined cube will look like this:
+                      //
+                      //        *---------*
+                      //       /         /|
+                      //      *---------* |
+                      //     /         /| |
+                      //    *---------* | |
+                      //    |         | | |
+                      //    |         | | *
+                      //    |         | |/
+                      //    |         | *
+                      //    |         |/
+                      //    *---------*
+                      //
+                      // again, first collect some data about the
+                      // indices of the lines, with the following
+                      // numbering:
+
+                      // face 0: left plane
+                      //       *
+                      //      /|
+                      //     * |
+                      //    /| |
+                      //   * | |
+                      //   | 0 |
+                      //   | | *
+                      //   | |/
+                      //   | *m0
+                      //   |/
+                      //   *
+                      // face 1: right plane
+                      //       *
+                      //      /|
+                      //   m1* |
+                      //    /| |
+                      //   * | |
+                      //   | 1 |
+                      //   | | *
+                      //   | |/
+                      //   | *
+                      //   |/
+                      //   *
+                      // face 4: bottom plane
+                      //       *-------*
+                      //      /       /
+                      //   m0*---2---*
+                      //    /       /
+                      //   *-------*
+                      // face 5: top plane
+                      //       *-------*
+                      //      /       /
+                      //     *---3---*m1
+                      //    /       /
+                      //   *-------*
+
+                      // set up a list of line iterators first. from
+                      // this, construct lists of line_indices and
+                      // line orientations later on
+                      const typename Triangulation<dim,spacedim>::raw_line_iterator
+                      lines_y[4]
+                      =
+                      {
+                        hex->face(0)->child(0)
+                        ->line((hex->face(0)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //0
+                        hex->face(1)->child(0)
+                        ->line((hex->face(1)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //1
+                        hex->face(4)->child(0)
+                        ->line((hex->face(4)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //2
+                        hex->face(5)->child(0)
+                        ->line((hex->face(5)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3)         //3
+                      };
+
+                      lines=&lines_y[0];
+
+                      unsigned int line_indices_y[4];
+
+                      for (unsigned int i=0; i<4; ++i)
+                        line_indices_y[i]=lines[i]->index();
+                      line_indices=&line_indices_y[0];
+
+                      // the orientation of lines for the inner quads
+                      // is quite tricky. as these lines are newly
+                      // created ones and thus have no parents, they
+                      // cannot inherit this property. set up an array
+                      // and fill it with the respective values
+                      bool line_orientation_y[4];
+
+                      // the middle vertice marked as m0 above is the
+                      // start vertex for lines 0 and 2 in standard
+                      // orientation, whereas m1 is the end vertex of
+                      // lines 1 and 3 in standard orientation
+                      const unsigned int middle_vertices[2]=
+                      {
+                        hex->line(0)->child(0)->vertex_index(1),
+                        hex->line(5)->child(0)->vertex_index(1)
+                      };
+
+                      for (unsigned int i=0; i<4; ++i)
+                        if (lines[i]->vertex_index(i%2)==middle_vertices[i%2])
+                          line_orientation_y[i]=true;
+                        else
+                          {
+                            // it must be the other way round then
+                            Assert(lines[i]->vertex_index((i+1)%2)==middle_vertices[i%2],
+                                   ExcInternalError());
+                            line_orientation_y[i]=false;
+                          }
+
+                      line_orientation=&line_orientation_y[0];
+
+                      // set up the new quad, line numbering is as
+                      // indicated above
+                      new_quads[0]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[2],
+                                                         line_indices[3],
+                                                         line_indices[0],
+                                                         line_indices[1]));
+
+                      new_quads[0]->set_line_orientation(0,line_orientation[2]);
+                      new_quads[0]->set_line_orientation(1,line_orientation[3]);
+                      new_quads[0]->set_line_orientation(2,line_orientation[0]);
+                      new_quads[0]->set_line_orientation(3,line_orientation[1]);
+
+                      // the quads are numbered as follows:
+                      //
+                      // planes in the interior of the old hex:
+                      //
+                      //      *
+                      //     /|
+                      //    / | x
+                      //   /  | *-------*      *---------*
+                      //  *   | |       |     /         /
+                      //  |   | |   0   |    /         /
+                      //  |   * |       |   /         /
+                      //  |  /  *-------*y *---------*x
+                      //  | /
+                      //  |/
+                      //  *
+                      //
+                      // children of the faces of the old hex
+                      //
+                      //      *-------*        *-------*
+                      //     /|       |       /   10  /|
+                      //    * |       |      *-------* |
+                      //   /| |   6   |     /   9   /| |
+                      //  * |2|       |    *-------* |4|
+                      //  | | *-------*    |       | | *
+                      //  |1|/   8   /     |       |3|/
+                      //  | *-------*      |   5   | *
+                      //  |/   7   /       |       |/
+                      //  *-------*        *-------*
+                      //
+                      // note that we have to take care of the
+                      // orientation of faces.
+                      const int quad_indices_y[11]
+                      =
+                      {
+                        new_quads[0]->index(),     //0
+
+                        hex->face(0)->child_index(  child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),  //1
+                        hex->face(0)->child_index(1-child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),
+
+                        hex->face(1)->child_index(  child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),  //3
+                        hex->face(1)->child_index(1-child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),
+
+                        hex->face(2)->index(),     //5
+
+                        hex->face(3)->index(),     //6
+
+                        hex->face(4)->child_index(  child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),  //7
+                        hex->face(4)->child_index(1-child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),
+
+                        hex->face(5)->child_index(  child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]]),  //9
+                        hex->face(5)->child_index(1-child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]])
+
+                      };
+                      quad_indices=&quad_indices_y[0];
+
+                      new_hexes[0]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[1],
+                                                         quad_indices[3],
+                                                         quad_indices[5],
+                                                         quad_indices[0],
+                                                         quad_indices[7],
+                                                         quad_indices[9]));
+                      new_hexes[1]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[2],
+                                                         quad_indices[4],
+                                                         quad_indices[0],
+                                                         quad_indices[6],
+                                                         quad_indices[8],
+                                                         quad_indices[10]));
+                      break;
+                    }
+                    case RefinementCase<dim>::cut_z:
+                    {
+                      //////////////////////////////
+                      //
+                      //     RefinementCase<dim>::cut_z
+                      //
+                      // the refined cube will look like this:
+                      //
+                      //        *---------*
+                      //       /         /|
+                      //      /         / |
+                      //     /         /  *
+                      //    *---------*  /|
+                      //    |         | / |
+                      //    |         |/  *
+                      //    *---------*  /
+                      //    |         | /
+                      //    |         |/
+                      //    *---------*
+                      //
+                      // again, first collect some data about the
+                      // indices of the lines, with the following
+                      // numbering:
+
+                      // face 0: left plane
+                      //       *
+                      //      /|
+                      //     / |
+                      //    /  *
+                      //   *  /|
+                      //   | 0 |
+                      //   |/  *
+                      // m0*  /
+                      //   | /
+                      //   |/
+                      //   *
+                      // face 1: right plane
+                      //       *
+                      //      /|
+                      //     / |
+                      //    /  *m1
+                      //   *  /|
+                      //   | 1 |
+                      //   |/  *
+                      //   *  /
+                      //   | /
+                      //   |/
+                      //   *
+                      // face 2: front plane
+                      //   (note: x,y exchanged)
+                      //   *-------*
+                      //   |       |
+                      // m0*---2---*
+                      //   |       |
+                      //   *-------*
+                      // face 3: back plane
+                      //   (note: x,y exchanged)
+                      //   *-------*
+                      //   |       |
+                      //   *---3---*m1
+                      //   |       |
+                      //   *-------*
+
+                      // set up a list of line iterators first. from
+                      // this, construct lists of line_indices and
+                      // line orientations later on
+                      const typename Triangulation<dim,spacedim>::raw_line_iterator
+                      lines_z[4]
+                      =
+                      {
+                        hex->face(0)->child(0)
+                        ->line((hex->face(0)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //0
+                        hex->face(1)->child(0)
+                        ->line((hex->face(1)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //1
+                        hex->face(2)->child(0)
+                        ->line((hex->face(2)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //2
+                        hex->face(3)->child(0)
+                        ->line((hex->face(3)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3)         //3
+                      };
+
+                      lines=&lines_z[0];
+
+                      unsigned int line_indices_z[4];
+
+                      for (unsigned int i=0; i<4; ++i)
+                        line_indices_z[i]=lines[i]->index();
+                      line_indices=&line_indices_z[0];
+
+                      // the orientation of lines for the inner quads
+                      // is quite tricky. as these lines are newly
+                      // created ones and thus have no parents, they
+                      // cannot inherit this property. set up an array
+                      // and fill it with the respective values
+                      bool line_orientation_z[4];
+
+                      // the middle vertex marked as m0 above is the
+                      // start vertex for lines 0 and 2 in standard
+                      // orientation, whereas m1 is the end vertex of
+                      // lines 1 and 3 in standard orientation
+                      const unsigned int middle_vertices[2]=
+                      {
+                        middle_vertex_index<dim,spacedim>(hex->line(8)),
+                        middle_vertex_index<dim,spacedim>(hex->line(11))
+                      };
+
+                      for (unsigned int i=0; i<4; ++i)
+                        if (lines[i]->vertex_index(i%2)==middle_vertices[i%2])
+                          line_orientation_z[i]=true;
+                        else
+                          {
+                            // it must be the other way round then
+                            Assert(lines[i]->vertex_index((i+1)%2)==middle_vertices[i%2],
+                                   ExcInternalError());
+                            line_orientation_z[i]=false;
+                          }
+
+                      line_orientation=&line_orientation_z[0];
+
+                      // set up the new quad, line numbering is as
+                      // indicated above
+                      new_quads[0]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[0],
+                                                         line_indices[1],
+                                                         line_indices[2],
+                                                         line_indices[3]));
+
+                      new_quads[0]->set_line_orientation(0,line_orientation[0]);
+                      new_quads[0]->set_line_orientation(1,line_orientation[1]);
+                      new_quads[0]->set_line_orientation(2,line_orientation[2]);
+                      new_quads[0]->set_line_orientation(3,line_orientation[3]);
+
+                      // the quads are numbered as follows:
+                      //
+                      // planes in the interior of the old hex:
+                      //
+                      //      *
+                      //     /|
+                      //    / | x
+                      //   /  | *-------*      *---------*
+                      //  *   | |       |     /         /
+                      //  |   | |       |    /    0    /
+                      //  |   * |       |   /         /
+                      //  |  /  *-------*y *---------*x
+                      //  | /
+                      //  |/
+                      //  *
+                      //
+                      // children of the faces of the old hex
+                      //
+                      //      *---*---*        *-------*
+                      //     /|   8   |       /       /|
+                      //    / |       |      /   10  / |
+                      //   /  *-------*     /       /  *
+                      //  * 2/|       |    *-------* 4/|
+                      //  | / |   7   |    |   6   | / |
+                      //  |/1 *-------*    |       |/3 *
+                      //  *  /       /     *-------*  /
+                      //  | /   9   /      |       | /
+                      //  |/       /       |   5   |/
+                      //  *-------*        *---*---*
+                      //
+                      // note that we have to take care of the
+                      // orientation of faces.
+                      const int quad_indices_z[11]
+                      =
+                      {
+                        new_quads[0]->index(),     //0
+
+                        hex->face(0)->child_index(  child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),  //1
+                        hex->face(0)->child_index(1-child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),
+
+                        hex->face(1)->child_index(  child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),  //3
+                        hex->face(1)->child_index(1-child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),
+
+                        hex->face(2)->child_index(  child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),  //5
+                        hex->face(2)->child_index(1-child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),
+
+                        hex->face(3)->child_index(  child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),  //7
+                        hex->face(3)->child_index(1-child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),
+
+                        hex->face(4)->index(),     //9
+
+                        hex->face(5)->index()      //10
+                      };
+                      quad_indices=&quad_indices_z[0];
+
+                      new_hexes[0]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[1],
+                                                         quad_indices[3],
+                                                         quad_indices[5],
+                                                         quad_indices[7],
+                                                         quad_indices[9],
+                                                         quad_indices[0]));
+                      new_hexes[1]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[2],
+                                                         quad_indices[4],
+                                                         quad_indices[6],
+                                                         quad_indices[8],
+                                                         quad_indices[0],
+                                                         quad_indices[10]));
+                      break;
+                    }
+                    case RefinementCase<dim>::cut_xy:
+                    {
+                      //////////////////////////////
+                      //
+                      //     RefinementCase<dim>::cut_xy
+                      //
+                      // the refined cube will look like this:
+                      //
+                      //        *----*----*
+                      //       /    /    /|
+                      //      *----*----* |
+                      //     /    /    /| |
+                      //    *----*----* | |
+                      //    |    |    | | |
+                      //    |    |    | | *
+                      //    |    |    | |/
+                      //    |    |    | *
+                      //    |    |    |/
+                      //    *----*----*
+                      //
+
+                      // first, create the new internal line
+                      new_lines[0]->set (internal::Triangulation::
+                                         TriaObject<1>(middle_vertex_index<dim,spacedim>(hex->face(4)),
+                                                       middle_vertex_index<dim,spacedim>(hex->face(5))));
+
+                      // again, first collect some data about the
+                      // indices of the lines, with the following
+                      // numbering:
+
+                      // face 0: left plane
+                      //       *
+                      //      /|
+                      //     * |
+                      //    /| |
+                      //   * | |
+                      //   | 0 |
+                      //   | | *
+                      //   | |/
+                      //   | *
+                      //   |/
+                      //   *
+                      // face 1: right plane
+                      //       *
+                      //      /|
+                      //     * |
+                      //    /| |
+                      //   * | |
+                      //   | 1 |
+                      //   | | *
+                      //   | |/
+                      //   | *
+                      //   |/
+                      //   *
+                      // face 2: front plane
+                      //   (note: x,y exchanged)
+                      //   *---*---*
+                      //   |   |   |
+                      //   |   2   |
+                      //   |   |   |
+                      //   *-------*
+                      // face 3: back plane
+                      //   (note: x,y exchanged)
+                      //   *---*---*
+                      //   |   |   |
+                      //   |   3   |
+                      //   |   |   |
+                      //   *---*---*
+                      // face 4: bottom plane
+                      //       *---*---*
+                      //      /   5   /
+                      //     *-6-*-7-*
+                      //    /   4   /
+                      //   *---*---*
+                      // face 5: top plane
+                      //       *---*---*
+                      //      /   9   /
+                      //     *10-*-11*
+                      //    /   8   /
+                      //   *---*---*
+                      // middle planes
+                      //     *-------*   *---*---*
+                      //    /       /    |   |   |
+                      //   /       /     |   12  |
+                      //  /       /      |   |   |
+                      // *-------*       *---*---*
+
+                      // set up a list of line iterators first. from
+                      // this, construct lists of line_indices and
+                      // line orientations later on
+                      const typename Triangulation<dim,spacedim>::raw_line_iterator
+                      lines_xy[13]
+                      =
+                      {
+                        hex->face(0)->child(0)
+                        ->line((hex->face(0)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //0
+                        hex->face(1)->child(0)
+                        ->line((hex->face(1)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //1
+                        hex->face(2)->child(0)
+                        ->line((hex->face(2)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //2
+                        hex->face(3)->child(0)
+                        ->line((hex->face(3)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //3
+
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[4],f_fl[4],f_ro[4])),        //4
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[4],f_fl[4],f_ro[4])),        //5
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[4],f_fl[4],f_ro[4])),        //6
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[4],f_fl[4],f_ro[4])),        //7
+
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[5],f_fl[5],f_ro[5])),        //8
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[5],f_fl[5],f_ro[5])),        //9
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[5],f_fl[5],f_ro[5])),        //10
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[5],f_fl[5],f_ro[5])),        //11
+
+                        new_lines[0]                        //12
+                      };
+
+                      lines=&lines_xy[0];
+
+                      unsigned int line_indices_xy[13];
+
+                      for (unsigned int i=0; i<13; ++i)
+                        line_indices_xy[i]=lines[i]->index();
+                      line_indices=&line_indices_xy[0];
+
+                      // the orientation of lines for the inner quads
+                      // is quite tricky. as these lines are newly
+                      // created ones and thus have no parents, they
+                      // cannot inherit this property. set up an array
+                      // and fill it with the respective values
+                      bool line_orientation_xy[13];
+
+                      // the middle vertices of the lines of our
+                      // bottom face
+                      const unsigned int middle_vertices[4]=
+                      {
+                        hex->line(0)->child(0)->vertex_index(1),
+                        hex->line(1)->child(0)->vertex_index(1),
+                        hex->line(2)->child(0)->vertex_index(1),
+                        hex->line(3)->child(0)->vertex_index(1),
+                      };
+
+                      // note: for lines 0 to 3 the orientation of the
+                      // line is 'true', if vertex 0 is on the bottom
+                      // face
+                      for (unsigned int i=0; i<4; ++i)
+                        if (lines[i]->vertex_index(0)==middle_vertices[i])
+                          line_orientation_xy[i]=true;
+                        else
+                          {
+                            // it must be the other way round then
+                            Assert(lines[i]->vertex_index(1)==middle_vertices[i],
+                                   ExcInternalError());
+                            line_orientation_xy[i]=false;
+                          }
+
+                      // note: for lines 4 to 11 (inner lines of the
+                      // outer quads) the following holds: the second
+                      // vertex of the even lines in standard
+                      // orientation is the vertex in the middle of
+                      // the quad, whereas for odd lines the first
+                      // vertex is the same middle vertex.
+                      for (unsigned int i=4; i<12; ++i)
+                        if (lines[i]->vertex_index((i+1)%2) ==
+                            middle_vertex_index<dim,spacedim>(hex->face(3+i/4)))
+                          line_orientation_xy[i]=true;
+                        else
+                          {
+                            // it must be the other way
+                            // round then
+                            Assert(lines[i]->vertex_index(i%2) ==
+                                   (middle_vertex_index<dim,spacedim>(hex->face(3+i/4))),
+                                   ExcInternalError());
+                            line_orientation_xy[i]=false;
+                          }
+                      // for the last line the line orientation is
+                      // always true, since it was just constructed
+                      // that way
+
+                      line_orientation_xy[12]=true;
+                      line_orientation=&line_orientation_xy[0];
+
+                      // set up the 4 quads, numbered as follows (left
+                      // quad numbering, right line numbering
+                      // extracted from above)
+                      //
+                      //      *          *
+                      //     /|         9|
+                      //    * |        * |
+                      //  y/| |       8| 3
+                      //  * |1|      * | |
+                      //  | | |x     | 12|
+                      //  |0| *      | | *
+                      //  | |/       2 |5
+                      //  | *        | *
+                      //  |/         |4
+                      //  *          *
+                      //
+                      //  x
+                      //  *---*---*      *10-*-11*
+                      //  |   |   |      |   |   |
+                      //  | 2 | 3 |      0   12  1
+                      //  |   |   |      |   |   |
+                      //  *---*---*y     *-6-*-7-*
+
+                      new_quads[0]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[2],
+                                                         line_indices[12],
+                                                         line_indices[4],
+                                                         line_indices[8]));
+                      new_quads[1]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[12],
+                                                         line_indices[3],
+                                                         line_indices[5],
+                                                         line_indices[9]));
+                      new_quads[2]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[6],
+                                                         line_indices[10],
+                                                         line_indices[0],
+                                                         line_indices[12]));
+                      new_quads[3]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[7],
+                                                         line_indices[11],
+                                                         line_indices[12],
+                                                         line_indices[1]));
+
+                      new_quads[0]->set_line_orientation(0,line_orientation[2]);
+                      new_quads[0]->set_line_orientation(2,line_orientation[4]);
+                      new_quads[0]->set_line_orientation(3,line_orientation[8]);
+
+                      new_quads[1]->set_line_orientation(1,line_orientation[3]);
+                      new_quads[1]->set_line_orientation(2,line_orientation[5]);
+                      new_quads[1]->set_line_orientation(3,line_orientation[9]);
+
+                      new_quads[2]->set_line_orientation(0,line_orientation[6]);
+                      new_quads[2]->set_line_orientation(1,line_orientation[10]);
+                      new_quads[2]->set_line_orientation(2,line_orientation[0]);
+
+                      new_quads[3]->set_line_orientation(0,line_orientation[7]);
+                      new_quads[3]->set_line_orientation(1,line_orientation[11]);
+                      new_quads[3]->set_line_orientation(3,line_orientation[1]);
+
+                      // the quads are numbered as follows:
+                      //
+                      // planes in the interior of the old hex:
+                      //
+                      //      *
+                      //     /|
+                      //    * | x
+                      //   /| | *---*---*      *---------*
+                      //  * |1| |   |   |     /         /
+                      //  | | | | 2 | 3 |    /         /
+                      //  |0| * |   |   |   /         /
+                      //  | |/  *---*---*y *---------*x
+                      //  | *
+                      //  |/
+                      //  *
+                      //
+                      // children of the faces of the old hex
+                      //
+                      //      *---*---*        *---*---*
+                      //     /|   |   |       /18 / 19/|
+                      //    * |10 | 11|      /---/---* |
+                      //   /| |   |   |     /16 / 17/| |
+                      //  * |5|   |   |    *---*---* |7|
+                      //  | | *---*---*    |   |   | | *
+                      //  |4|/14 / 15/     |   |   |6|/
+                      //  | *---/---/      | 8 | 9 | *
+                      //  |/12 / 13/       |   |   |/
+                      //  *---*---*        *---*---*
+                      //
+                      // note that we have to take care of the
+                      // orientation of faces.
+                      const int quad_indices_xy[20]
+                      =
+                      {
+                        new_quads[0]->index(),     //0
+                        new_quads[1]->index(),
+                        new_quads[2]->index(),
+                        new_quads[3]->index(),
+
+                        hex->face(0)->child_index(  child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),  //4
+                        hex->face(0)->child_index(1-child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),
+
+                        hex->face(1)->child_index(  child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),  //6
+                        hex->face(1)->child_index(1-child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),
+
+                        hex->face(2)->child_index(  child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),  //8
+                        hex->face(2)->child_index(1-child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),
+
+                        hex->face(3)->child_index(  child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),  //10
+                        hex->face(3)->child_index(1-child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),
+
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[4],f_fl[4],f_ro[4])),  //12
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[4],f_fl[4],f_ro[4])),
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[4],f_fl[4],f_ro[4])),
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[4],f_fl[4],f_ro[4])),
+
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[5],f_fl[5],f_ro[5])),  //16
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[5],f_fl[5],f_ro[5])),
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[5],f_fl[5],f_ro[5])),
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[5],f_fl[5],f_ro[5]))
+                      };
+                      quad_indices=&quad_indices_xy[0];
+
+                      new_hexes[0]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[4],
+                                                         quad_indices[0],
+                                                         quad_indices[8],
+                                                         quad_indices[2],
+                                                         quad_indices[12],
+                                                         quad_indices[16]));
+                      new_hexes[1]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[0],
+                                                         quad_indices[6],
+                                                         quad_indices[9],
+                                                         quad_indices[3],
+                                                         quad_indices[13],
+                                                         quad_indices[17]));
+                      new_hexes[2]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[5],
+                                                         quad_indices[1],
+                                                         quad_indices[2],
+                                                         quad_indices[10],
+                                                         quad_indices[14],
+                                                         quad_indices[18]));
+                      new_hexes[3]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[1],
+                                                         quad_indices[7],
+                                                         quad_indices[3],
+                                                         quad_indices[11],
+                                                         quad_indices[15],
+                                                         quad_indices[19]));
+                      break;
+                    }
+                    case RefinementCase<dim>::cut_xz:
+                    {
+                      //////////////////////////////
+                      //
+                      //     RefinementCase<dim>::cut_xz
+                      //
+                      // the refined cube will look like this:
+                      //
+                      //        *----*----*
+                      //       /    /    /|
+                      //      /    /    / |
+                      //     /    /    /  *
+                      //    *----*----*  /|
+                      //    |    |    | / |
+                      //    |    |    |/  *
+                      //    *----*----*  /
+                      //    |    |    | /
+                      //    |    |    |/
+                      //    *----*----*
+                      //
+
+                      // first, create the new internal line
+                      new_lines[0]->set (internal::Triangulation::
+                                         TriaObject<1>(middle_vertex_index<dim,spacedim>(hex->face(2)),
+                                                       middle_vertex_index<dim,spacedim>(hex->face(3))));
+
+                      // again, first collect some data about the
+                      // indices of the lines, with the following
+                      // numbering:
+
+                      // face 0: left plane
+                      //       *
+                      //      /|
+                      //     / |
+                      //    /  *
+                      //   *  /|
+                      //   | 0 |
+                      //   |/  *
+                      //   *  /
+                      //   | /
+                      //   |/
+                      //   *
+                      // face 1: right plane
+                      //       *
+                      //      /|
+                      //     / |
+                      //    /  *
+                      //   *  /|
+                      //   | 1 |
+                      //   |/  *
+                      //   *  /
+                      //   | /
+                      //   |/
+                      //   *
+                      // face 2: front plane
+                      //   (note: x,y exchanged)
+                      //   *---*---*
+                      //   |   5   |
+                      //   *-6-*-7-*
+                      //   |   4   |
+                      //   *---*---*
+                      // face 3: back plane
+                      //   (note: x,y exchanged)
+                      //   *---*---*
+                      //   |   9   |
+                      //   *10-*-11*
+                      //   |   8   |
+                      //   *---*---*
+                      // face 4: bottom plane
+                      //       *---*---*
+                      //      /   /   /
+                      //     /   2   /
+                      //    /   /   /
+                      //   *---*---*
+                      // face 5: top plane
+                      //       *---*---*
+                      //      /   /   /
+                      //     /   3   /
+                      //    /   /   /
+                      //   *---*---*
+                      // middle planes
+                      //     *---*---*   *-------*
+                      //    /   /   /    |       |
+                      //   /   12  /     |       |
+                      //  /   /   /      |       |
+                      // *---*---*       *-------*
+
+                      // set up a list of line iterators first. from
+                      // this, construct lists of line_indices and
+                      // line orientations later on
+                      const typename Triangulation<dim,spacedim>::raw_line_iterator
+                      lines_xz[13]
+                      =
+                      {
+                        hex->face(0)->child(0)
+                        ->line((hex->face(0)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //0
+                        hex->face(1)->child(0)
+                        ->line((hex->face(1)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //1
+                        hex->face(4)->child(0)
+                        ->line((hex->face(4)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //2
+                        hex->face(5)->child(0)
+                        ->line((hex->face(5)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //3
+
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[2],f_fl[2],f_ro[2])),        //4
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[2],f_fl[2],f_ro[2])),        //5
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[2],f_fl[2],f_ro[2])),        //6
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[2],f_fl[2],f_ro[2])),        //7
+
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[3],f_fl[3],f_ro[3])),        //8
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[3],f_fl[3],f_ro[3])),        //9
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[3],f_fl[3],f_ro[3])),        //10
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[3],f_fl[3],f_ro[3])),        //11
+
+                        new_lines[0]                        //12
+                      };
+
+                      lines=&lines_xz[0];
+
+                      unsigned int line_indices_xz[13];
+
+                      for (unsigned int i=0; i<13; ++i)
+                        line_indices_xz[i]=lines[i]->index();
+                      line_indices=&line_indices_xz[0];
+
+                      // the orientation of lines for the inner quads
+                      // is quite tricky. as these lines are newly
+                      // created ones and thus have no parents, they
+                      // cannot inherit this property. set up an array
+                      // and fill it with the respective values
+                      bool line_orientation_xz[13];
+
+                      // the middle vertices of the
+                      // lines of our front face
+                      const unsigned int middle_vertices[4]=
+                      {
+                        hex->line(8)->child(0)->vertex_index(1),
+                        hex->line(9)->child(0)->vertex_index(1),
+                        hex->line(2)->child(0)->vertex_index(1),
+                        hex->line(6)->child(0)->vertex_index(1),
+                      };
+
+                      // note: for lines 0 to 3 the orientation of the
+                      // line is 'true', if vertex 0 is on the front
+                      for (unsigned int i=0; i<4; ++i)
+                        if (lines[i]->vertex_index(0)==middle_vertices[i])
+                          line_orientation_xz[i]=true;
+                        else
+                          {
+                            // it must be the other way round then
+                            Assert(lines[i]->vertex_index(1)==middle_vertices[i],
+                                   ExcInternalError());
+                            line_orientation_xz[i]=false;
+                          }
+
+                      // note: for lines 4 to 11 (inner lines of the
+                      // outer quads) the following holds: the second
+                      // vertex of the even lines in standard
+                      // orientation is the vertex in the middle of
+                      // the quad, whereas for odd lines the first
+                      // vertex is the same middle vertex.
+                      for (unsigned int i=4; i<12; ++i)
+                        if (lines[i]->vertex_index((i+1)%2) ==
+                            middle_vertex_index<dim,spacedim>(hex->face(1+i/4)))
+                          line_orientation_xz[i]=true;
+                        else
+                          {
+                            // it must be the other way
+                            // round then
+                            Assert(lines[i]->vertex_index(i%2) ==
+                                   (middle_vertex_index<dim,spacedim>(hex->face(1+i/4))),
+                                   ExcInternalError());
+                            line_orientation_xz[i]=false;
+                          }
+                      // for the last line the line orientation is
+                      // always true, since it was just constructed
+                      // that way
+
+                      line_orientation_xz[12]=true;
+                      line_orientation=&line_orientation_xz[0];
+
+                      // set up the 4 quads, numbered as follows (left
+                      // quad numbering, right line numbering
+                      // extracted from above), the drawings denote
+                      // middle planes
+                      //
+                      //      *          *
+                      //     /|         /|
+                      //    / |        3 9
+                      //  y/  *       /  *
+                      //  * 3/|      *  /|
+                      //  | / |x     5 12|8
+                      //  |/  *      |/  *
+                      //  * 2/       *  /
+                      //  | /        4 2
+                      //  |/         |/
+                      //  *          *
+                      //
+                      //       y
+                      //      *----*----*      *-10-*-11-*
+                      //     /    /    /      /    /    /
+                      //    / 0  /  1 /      0    12   1
+                      //   /    /    /      /    /    /
+                      //  *----*----*x     *--6-*--7-*
+
+                      new_quads[0]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[0],
+                                                         line_indices[12],
+                                                         line_indices[6],
+                                                         line_indices[10]));
+                      new_quads[1]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[12],
+                                                         line_indices[1],
+                                                         line_indices[7],
+                                                         line_indices[11]));
+                      new_quads[2]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[4],
+                                                         line_indices[8],
+                                                         line_indices[2],
+                                                         line_indices[12]));
+                      new_quads[3]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[5],
+                                                         line_indices[9],
+                                                         line_indices[12],
+                                                         line_indices[3]));
+
+                      new_quads[0]->set_line_orientation(0,line_orientation[0]);
+                      new_quads[0]->set_line_orientation(2,line_orientation[6]);
+                      new_quads[0]->set_line_orientation(3,line_orientation[10]);
+
+                      new_quads[1]->set_line_orientation(1,line_orientation[1]);
+                      new_quads[1]->set_line_orientation(2,line_orientation[7]);
+                      new_quads[1]->set_line_orientation(3,line_orientation[11]);
+
+                      new_quads[2]->set_line_orientation(0,line_orientation[4]);
+                      new_quads[2]->set_line_orientation(1,line_orientation[8]);
+                      new_quads[2]->set_line_orientation(2,line_orientation[2]);
+
+                      new_quads[3]->set_line_orientation(0,line_orientation[5]);
+                      new_quads[3]->set_line_orientation(1,line_orientation[9]);
+                      new_quads[3]->set_line_orientation(3,line_orientation[3]);
+
+                      // the quads are numbered as follows:
+                      //
+                      // planes in the interior of the old hex:
+                      //
+                      //      *
+                      //     /|
+                      //    / | x
+                      //   /3 * *-------*      *----*----*
+                      //  *  /| |       |     /    /    /
+                      //  | / | |       |    /  0 /  1 /
+                      //  |/  * |       |   /    /    /
+                      //  * 2/  *-------*y *----*----*x
+                      //  | /
+                      //  |/
+                      //  *
+                      //
+                      // children of the faces
+                      // of the old hex
+                      //      *---*---*        *---*---*
+                      //     /|13 | 15|       /   /   /|
+                      //    / |   |   |      /18 / 19/ |
+                      //   /  *---*---*     /   /   /  *
+                      //  * 5/|   |   |    *---*---* 7/|
+                      //  | / |12 | 14|    | 9 | 11| / |
+                      //  |/4 *---*---*    |   |   |/6 *
+                      //  *  /   /   /     *---*---*  /
+                      //  | /16 / 17/      |   |   | /
+                      //  |/   /   /       | 8 | 10|/
+                      //  *---*---*        *---*---*
+                      //
+                      // note that we have to take care of the
+                      // orientation of faces.
+                      const int quad_indices_xz[20]
+                      =
+                      {
+                        new_quads[0]->index(),     //0
+                        new_quads[1]->index(),
+                        new_quads[2]->index(),
+                        new_quads[3]->index(),
+
+                        hex->face(0)->child_index(  child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),  //4
+                        hex->face(0)->child_index(1-child_at_origin[hex->face(0)->refinement_case()-1][f_fl[0]][f_ro[0]]),
+
+                        hex->face(1)->child_index(  child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),  //6
+                        hex->face(1)->child_index(1-child_at_origin[hex->face(1)->refinement_case()-1][f_fl[1]][f_ro[1]]),
+
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[2],f_fl[2],f_ro[2])),  //8
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[2],f_fl[2],f_ro[2])),
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[2],f_fl[2],f_ro[2])),
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[2],f_fl[2],f_ro[2])),
+
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[3],f_fl[3],f_ro[3])),  //12
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[3],f_fl[3],f_ro[3])),
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[3],f_fl[3],f_ro[3])),
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[3],f_fl[3],f_ro[3])),
+
+                        hex->face(4)->child_index(  child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),  //16
+                        hex->face(4)->child_index(1-child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),
+
+                        hex->face(5)->child_index(  child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]]),  //18
+                        hex->face(5)->child_index(1-child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]])
+                      };
+                      quad_indices=&quad_indices_xz[0];
+
+                      // due to the exchange of x and y for the front
+                      // and back face, we order the children
+                      // according to
+                      //
+                      // *---*---*
+                      // | 1 | 3 |
+                      // *---*---*
+                      // | 0 | 2 |
+                      // *---*---*
+                      new_hexes[0]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[4],
+                                                         quad_indices[2],
+                                                         quad_indices[8],
+                                                         quad_indices[12],
+                                                         quad_indices[16],
+                                                         quad_indices[0]));
+                      new_hexes[1]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[5],
+                                                         quad_indices[3],
+                                                         quad_indices[9],
+                                                         quad_indices[13],
+                                                         quad_indices[0],
+                                                         quad_indices[18]));
+                      new_hexes[2]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[2],
+                                                         quad_indices[6],
+                                                         quad_indices[10],
+                                                         quad_indices[14],
+                                                         quad_indices[17],
+                                                         quad_indices[1]));
+                      new_hexes[3]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[3],
+                                                         quad_indices[7],
+                                                         quad_indices[11],
+                                                         quad_indices[15],
+                                                         quad_indices[1],
+                                                         quad_indices[19]));
+                      break;
+                    }
+                    case RefinementCase<dim>::cut_yz:
+                    {
+                      //////////////////////////////
+                      //
+                      //     RefinementCase<dim>::cut_yz
+                      //
+                      // the refined cube will look like this:
+                      //
+                      //        *---------*
+                      //       /         /|
+                      //      *---------* |
+                      //     /         /| |
+                      //    *---------* |/|
+                      //    |         | * |
+                      //    |         |/| *
+                      //    *---------* |/
+                      //    |         | *
+                      //    |         |/
+                      //    *---------*
+                      //
+
+                      // first, create the new
+                      // internal line
+                      new_lines[0]->set (internal::Triangulation::
+                                         TriaObject<1>(middle_vertex_index<dim,spacedim>(hex->face(0)),
+                                                       middle_vertex_index<dim,spacedim>(hex->face(1))));
+
+                      // again, first collect some data about the
+                      // indices of the lines, with the following
+                      // numbering: (note that face 0 and 1 each are
+                      // shown twice for better readability)
+
+                      // face 0: left plane
+                      //       *            *
+                      //      /|           /|
+                      //     * |          * |
+                      //    /| *         /| *
+                      //   * 5/|        * |7|
+                      //   | * |        | * |
+                      //   |/| *        |6| *
+                      //   * 4/         * |/
+                      //   | *          | *
+                      //   |/           |/
+                      //   *            *
+                      // face 1: right plane
+                      //       *            *
+                      //      /|           /|
+                      //     * |          * |
+                      //    /| *         /| *
+                      //   * 9/|        * |11
+                      //   | * |        | * |
+                      //   |/| *        |10 *
+                      //   * 8/         * |/
+                      //   | *          | *
+                      //   |/           |/
+                      //   *            *
+                      // face 2: front plane
+                      //   (note: x,y exchanged)
+                      //   *-------*
+                      //   |       |
+                      //   *---0---*
+                      //   |       |
+                      //   *-------*
+                      // face 3: back plane
+                      //   (note: x,y exchanged)
+                      //   *-------*
+                      //   |       |
+                      //   *---1---*
+                      //   |       |
+                      //   *-------*
+                      // face 4: bottom plane
+                      //       *-------*
+                      //      /       /
+                      //     *---2---*
+                      //    /       /
+                      //   *-------*
+                      // face 5: top plane
+                      //       *-------*
+                      //      /       /
+                      //     *---3---*
+                      //    /       /
+                      //   *-------*
+                      // middle planes
+                      //     *-------*   *-------*
+                      //    /       /    |       |
+                      //   *---12--*     |       |
+                      //  /       /      |       |
+                      // *-------*       *-------*
+
+                      // set up a list of line iterators first. from
+                      // this, construct lists of line_indices and
+                      // line orientations later on
+                      const typename Triangulation<dim,spacedim>::raw_line_iterator
+                      lines_yz[13]
+                      =
+                      {
+                        hex->face(2)->child(0)
+                        ->line((hex->face(2)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //0
+                        hex->face(3)->child(0)
+                        ->line((hex->face(3)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //1
+                        hex->face(4)->child(0)
+                        ->line((hex->face(4)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //2
+                        hex->face(5)->child(0)
+                        ->line((hex->face(5)->refinement_case() == RefinementCase<2>::cut_x) ? 1 : 3),        //3
+
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[0],f_fl[0],f_ro[0])),        //4
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[0],f_fl[0],f_ro[0])),        //5
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[0],f_fl[0],f_ro[0])),        //6
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[0],f_fl[0],f_ro[0])),        //7
+
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[1],f_fl[1],f_ro[1])),        //8
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[1],f_fl[1],f_ro[1])),        //9
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[1],f_fl[1],f_ro[1])),        //10
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[1],f_fl[1],f_ro[1])),        //11
+
+                        new_lines[0]                        //12
+                      };
+
+                      lines=&lines_yz[0];
+
+                      unsigned int line_indices_yz[13];
+
+                      for (unsigned int i=0; i<13; ++i)
+                        line_indices_yz[i]=lines[i]->index();
+                      line_indices=&line_indices_yz[0];
+
+                      // the orientation of lines for the inner quads
+                      // is quite tricky. as these lines are newly
+                      // created ones and thus have no parents, they
+                      // cannot inherit this property. set up an array
+                      // and fill it with the respective values
+                      bool line_orientation_yz[13];
+
+                      // the middle vertices of the lines of our front
+                      // face
+                      const unsigned int middle_vertices[4]=
+                      {
+                        hex->line(8)->child(0)->vertex_index(1),
+                        hex->line(10)->child(0)->vertex_index(1),
+                        hex->line(0)->child(0)->vertex_index(1),
+                        hex->line(4)->child(0)->vertex_index(1),
+                      };
+
+                      // note: for lines 0 to 3 the orientation of the
+                      // line is 'true', if vertex 0 is on the front
+                      for (unsigned int i=0; i<4; ++i)
+                        if (lines[i]->vertex_index(0)==middle_vertices[i])
+                          line_orientation_yz[i]=true;
+                        else
+                          {
+                            // it must be the other way round then
+                            Assert(lines[i]->vertex_index(1)==middle_vertices[i],
+                                   ExcInternalError());
+                            line_orientation_yz[i]=false;
+                          }
+
+                      // note: for lines 4 to 11 (inner lines of the
+                      // outer quads) the following holds: the second
+                      // vertex of the even lines in standard
+                      // orientation is the vertex in the middle of
+                      // the quad, whereas for odd lines the first
+                      // vertex is the same middle vertex.
+                      for (unsigned int i=4; i<12; ++i)
+                        if (lines[i]->vertex_index((i+1)%2) ==
+                            middle_vertex_index<dim,spacedim>(hex->face(i/4-1)))
+                          line_orientation_yz[i]=true;
+                        else
+                          {
+                            // it must be the other way
+                            // round then
+                            Assert(lines[i]->vertex_index(i%2) ==
+                                   (middle_vertex_index<dim,spacedim>(hex->face(i/4-1))),
+                                   ExcInternalError());
+                            line_orientation_yz[i]=false;
+                          }
+                      // for the last line the line orientation is
+                      // always true, since it was just constructed
+                      // that way
+
+                      line_orientation_yz[12]=true;
+                      line_orientation=&line_orientation_yz[0];
+
+                      // set up the 4 quads, numbered as follows (left
+                      // quad numbering, right line numbering
+                      // extracted from above)
+                      //
+                      //  x
+                      //  *-------*      *---3---*
+                      //  |   3   |      5       9
+                      //  *-------*      *---12--*
+                      //  |   2   |      4       8
+                      //  *-------*y     *---2---*
+                      //
+                      //       y
+                      //      *---------*      *----1----*
+                      //     /    1    /      7         11
+                      //    *---------*      *----12---*
+                      //   /    0    /      6         10
+                      //  *---------*x     *----0----*
+
+                      new_quads[0]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[6],
+                                                         line_indices[10],
+                                                         line_indices[0],
+                                                         line_indices[12]));
+                      new_quads[1]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[7],
+                                                         line_indices[11],
+                                                         line_indices[12],
+                                                         line_indices[1]));
+                      new_quads[2]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[2],
+                                                         line_indices[12],
+                                                         line_indices[4],
+                                                         line_indices[8]));
+                      new_quads[3]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[12],
+                                                         line_indices[3],
+                                                         line_indices[5],
+                                                         line_indices[9]));
+
+                      new_quads[0]->set_line_orientation(0,line_orientation[6]);
+                      new_quads[0]->set_line_orientation(1,line_orientation[10]);
+                      new_quads[0]->set_line_orientation(2,line_orientation[0]);
+
+                      new_quads[1]->set_line_orientation(0,line_orientation[7]);
+                      new_quads[1]->set_line_orientation(1,line_orientation[11]);
+                      new_quads[1]->set_line_orientation(3,line_orientation[1]);
+
+                      new_quads[2]->set_line_orientation(0,line_orientation[2]);
+                      new_quads[2]->set_line_orientation(2,line_orientation[4]);
+                      new_quads[2]->set_line_orientation(3,line_orientation[8]);
+
+                      new_quads[3]->set_line_orientation(1,line_orientation[3]);
+                      new_quads[3]->set_line_orientation(2,line_orientation[5]);
+                      new_quads[3]->set_line_orientation(3,line_orientation[9]);
+
+                      // the quads are numbered as follows:
+                      //
+                      // planes in the interior of the old hex:
+                      //
+                      //      *
+                      //     /|
+                      //    / | x
+                      //   /  | *-------*      *---------*
+                      //  *   | |   3   |     /    1    /
+                      //  |   | *-------*    *---------*
+                      //  |   * |   2   |   /    0    /
+                      //  |  /  *-------*y *---------*x
+                      //  | /
+                      //  |/
+                      //  *
+                      //
+                      // children of the faces
+                      // of the old hex
+                      //      *-------*        *-------*
+                      //     /|       |       /  19   /|
+                      //    * |  15   |      *-------* |
+                      //   /|7*-------*     /  18   /|11
+                      //  * |/|       |    *-------* |/|
+                      //  |6* |  14   |    |       10* |
+                      //  |/|5*-------*    |  13   |/|9*
+                      //  * |/  17   /     *-------* |/
+                      //  |4*-------*      |       |8*
+                      //  |/  16   /       |  12   |/
+                      //  *-------*        *-------*
+                      //
+                      // note that we have to take care of the
+                      // orientation of faces.
+                      const int quad_indices_yz[20]
+                      =
+                      {
+                        new_quads[0]->index(),     //0
+                        new_quads[1]->index(),
+                        new_quads[2]->index(),
+                        new_quads[3]->index(),
+
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[0],f_fl[0],f_ro[0])),  //4
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[0],f_fl[0],f_ro[0])),
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[0],f_fl[0],f_ro[0])),
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[0],f_fl[0],f_ro[0])),
+
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[1],f_fl[1],f_ro[1])),  //8
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[1],f_fl[1],f_ro[1])),
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[1],f_fl[1],f_ro[1])),
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[1],f_fl[1],f_ro[1])),
+
+                        hex->face(2)->child_index(  child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),  //12
+                        hex->face(2)->child_index(1-child_at_origin[hex->face(2)->refinement_case()-1][f_fl[2]][f_ro[2]]),
+
+                        hex->face(3)->child_index(  child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),  //14
+                        hex->face(3)->child_index(1-child_at_origin[hex->face(3)->refinement_case()-1][f_fl[3]][f_ro[3]]),
+
+                        hex->face(4)->child_index(  child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),  //16
+                        hex->face(4)->child_index(1-child_at_origin[hex->face(4)->refinement_case()-1][f_fl[4]][f_ro[4]]),
+
+                        hex->face(5)->child_index(  child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]]),  //18
+                        hex->face(5)->child_index(1-child_at_origin[hex->face(5)->refinement_case()-1][f_fl[5]][f_ro[5]])
+                      };
+                      quad_indices=&quad_indices_yz[0];
+
+                      new_hexes[0]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[4],
+                                                         quad_indices[8],
+                                                         quad_indices[12],
+                                                         quad_indices[2],
+                                                         quad_indices[16],
+                                                         quad_indices[0]));
+                      new_hexes[1]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[5],
+                                                         quad_indices[9],
+                                                         quad_indices[2],
+                                                         quad_indices[14],
+                                                         quad_indices[17],
+                                                         quad_indices[1]));
+                      new_hexes[2]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[6],
+                                                         quad_indices[10],
+                                                         quad_indices[13],
+                                                         quad_indices[3],
+                                                         quad_indices[0],
+                                                         quad_indices[18]));
+                      new_hexes[3]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[7],
+                                                         quad_indices[11],
+                                                         quad_indices[3],
+                                                         quad_indices[15],
+                                                         quad_indices[1],
+                                                         quad_indices[19]));
+                      break;
+                    }
+                    case RefinementCase<dim>::cut_xyz:
+                    {
+                      //////////////////////////////
+                      //
+                      //     RefinementCase<dim>::cut_xyz
+                      //     isotropic refinement
+                      //
+                      // the refined cube will look
+                      // like this:
+                      //
+                      //        *----*----*
+                      //       /    /    /|
+                      //      *----*----* |
+                      //     /    /    /| *
+                      //    *----*----* |/|
+                      //    |    |    | * |
+                      //    |    |    |/| *
+                      //    *----*----* |/
+                      //    |    |    | *
+                      //    |    |    |/
+                      //    *----*----*
+                      //
+
+                      // find the next unused vertex and set it
+                      // appropriately
+                      while (triangulation.vertices_used[next_unused_vertex] == true)
+                        ++next_unused_vertex;
+                      Assert (next_unused_vertex < triangulation.vertices.size(),
+                              ExcMessage("Internal error: During refinement, the triangulation wants to access an element of the 'vertices' array but it turns out that the array is not large enough."));
+                      triangulation.vertices_used[next_unused_vertex] = true;
+
+                      // the new vertex is definitely in the interior,
+                      // so we need not worry about the
+                      // boundary. However we need to worry about
+                      // Manifolds. Let the cell compute its own
+                      // center, by querying the underlying manifold
+                      // object.
+                      triangulation.vertices[next_unused_vertex] =
+                        hex->center(true, true);
+
+                      // set the data of the six lines.  first collect
+                      // the indices of the seven vertices (consider
+                      // the two planes to be crossed to form the
+                      // planes cutting the hex in two vertically and
+                      // horizontally)
+                      //
+                      //     *--3--*   *--5--*
+                      //    /  /  /    |  |  |
+                      //   0--6--1     0--6--1
+                      //  /  /  /      |  |  |
+                      // *--2--*       *--4--*
+                      // the lines are numbered
+                      // as follows:
+                      //     *--*--*   *--*--*
+                      //    /  1  /    |  5  |
+                      //   *2-*-3*     *2-*-3*
+                      //  /  0  /      |  4  |
+                      // *--*--*       *--*--*
+                      //
+                      const unsigned int vertex_indices_xyz[7]
+                        = { middle_vertex_index<dim,spacedim>(hex->face(0)),
+                            middle_vertex_index<dim,spacedim>(hex->face(1)),
+                            middle_vertex_index<dim,spacedim>(hex->face(2)),
+                            middle_vertex_index<dim,spacedim>(hex->face(3)),
+                            middle_vertex_index<dim,spacedim>(hex->face(4)),
+                            middle_vertex_index<dim,spacedim>(hex->face(5)),
+                            next_unused_vertex
+                          };
+                      vertex_indices=&vertex_indices_xyz[0];
+
+                      new_lines[0]->set (internal::Triangulation::
+                                         TriaObject<1>(vertex_indices[2], vertex_indices[6]));
+                      new_lines[1]->set (internal::Triangulation::
+                                         TriaObject<1>(vertex_indices[6], vertex_indices[3]));
+                      new_lines[2]->set (internal::Triangulation::
+                                         TriaObject<1>(vertex_indices[0], vertex_indices[6]));
+                      new_lines[3]->set (internal::Triangulation::
+                                         TriaObject<1>(vertex_indices[6], vertex_indices[1]));
+                      new_lines[4]->set (internal::Triangulation::
+                                         TriaObject<1>(vertex_indices[4], vertex_indices[6]));
+                      new_lines[5]->set (internal::Triangulation::
+                                         TriaObject<1>(vertex_indices[6], vertex_indices[5]));
+
+                      // again, first collect some data about the
+                      // indices of the lines, with the following
+                      // numbering: (note that face 0 and 1 each are
+                      // shown twice for better readability)
+
+                      // face 0: left plane
+                      //       *            *
+                      //      /|           /|
+                      //     * |          * |
+                      //    /| *         /| *
+                      //   * 1/|        * |3|
+                      //   | * |        | * |
+                      //   |/| *        |2| *
+                      //   * 0/         * |/
+                      //   | *          | *
+                      //   |/           |/
+                      //   *            *
+                      // face 1: right plane
+                      //       *            *
+                      //      /|           /|
+                      //     * |          * |
+                      //    /| *         /| *
+                      //   * 5/|        * |7|
+                      //   | * |        | * |
+                      //   |/| *        |6| *
+                      //   * 4/         * |/
+                      //   | *          | *
+                      //   |/           |/
+                      //   *            *
+                      // face 2: front plane
+                      //   (note: x,y exchanged)
+                      //   *---*---*
+                      //   |   11  |
+                      //   *-8-*-9-*
+                      //   |   10  |
+                      //   *---*---*
+                      // face 3: back plane
+                      //   (note: x,y exchanged)
+                      //   *---*---*
+                      //   |   15  |
+                      //   *12-*-13*
+                      //   |   14  |
+                      //   *---*---*
+                      // face 4: bottom plane
+                      //       *---*---*
+                      //      /  17   /
+                      //     *18-*-19*
+                      //    /   16  /
+                      //   *---*---*
+                      // face 5: top plane
+                      //       *---*---*
+                      //      /  21   /
+                      //     *22-*-23*
+                      //    /   20  /
+                      //   *---*---*
+                      // middle planes
+                      //     *---*---*   *---*---*
+                      //    /  25   /    |   29  |
+                      //   *26-*-27*     *26-*-27*
+                      //  /   24  /      |   28  |
+                      // *---*---*       *---*---*
+
+                      // set up a list of line iterators first. from
+                      // this, construct lists of line_indices and
+                      // line orientations later on
+                      const typename Triangulation<dim,spacedim>::raw_line_iterator
+                      lines_xyz[30]
+                      =
+                      {
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[0],f_fl[0],f_ro[0])),        //0
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[0],f_fl[0],f_ro[0])),        //1
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[0],f_fl[0],f_ro[0])),        //2
+                        hex->face(0)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[0],f_fl[0],f_ro[0]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[0],f_fl[0],f_ro[0])),        //3
+
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[1],f_fl[1],f_ro[1])),        //4
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[1],f_fl[1],f_ro[1])),        //5
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[1],f_fl[1],f_ro[1])),        //6
+                        hex->face(1)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[1],f_fl[1],f_ro[1]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[1],f_fl[1],f_ro[1])),        //7
+
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[2],f_fl[2],f_ro[2])),        //8
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[2],f_fl[2],f_ro[2])),        //9
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[2],f_fl[2],f_ro[2])),        //10
+                        hex->face(2)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[2],f_fl[2],f_ro[2]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[2],f_fl[2],f_ro[2])),        //11
+
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[3],f_fl[3],f_ro[3])),        //12
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[3],f_fl[3],f_ro[3])),        //13
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[3],f_fl[3],f_ro[3])),        //14
+                        hex->face(3)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[3],f_fl[3],f_ro[3]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[3],f_fl[3],f_ro[3])),        //15
+
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[4],f_fl[4],f_ro[4])),        //16
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[4],f_fl[4],f_ro[4])),        //17
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[4],f_fl[4],f_ro[4])),        //18
+                        hex->face(4)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[4],f_fl[4],f_ro[4]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[4],f_fl[4],f_ro[4])),        //19
+
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(1,f_or[5],f_fl[5],f_ro[5])),        //20
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(0,f_or[5],f_fl[5],f_ro[5])),        //21
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(3,f_or[5],f_fl[5],f_ro[5])),        //22
+                        hex->face(5)->isotropic_child(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[5],f_fl[5],f_ro[5]))
+                        ->line(GeometryInfo<dim>::standard_to_real_face_line(2,f_or[5],f_fl[5],f_ro[5])),        //23
+
+                        new_lines[0],                       //24
+                        new_lines[1],                       //25
+                        new_lines[2],                       //26
+                        new_lines[3],                       //27
+                        new_lines[4],                       //28
+                        new_lines[5]                        //29
+                      };
+
+                      lines=&lines_xyz[0];
+
+                      unsigned int line_indices_xyz[30];
+                      for (unsigned int i=0; i<30; ++i)
+                        line_indices_xyz[i]=lines[i]->index();
+                      line_indices=&line_indices_xyz[0];
+
+                      // the orientation of lines for the inner quads
+                      // is quite tricky. as these lines are newly
+                      // created ones and thus have no parents, they
+                      // cannot inherit this property. set up an array
+                      // and fill it with the respective values
+                      bool line_orientation_xyz[30];
+
+                      // note: for the first 24 lines (inner lines of
+                      // the outer quads) the following holds: the
+                      // second vertex of the even lines in standard
+                      // orientation is the vertex in the middle of
+                      // the quad, whereas for odd lines the first
+                      // vertex is the same middle vertex.
+                      for (unsigned int i=0; i<24; ++i)
+                        if (lines[i]->vertex_index((i+1)%2)==vertex_indices[i/4])
+                          line_orientation_xyz[i]=true;
+                        else
+                          {
+                            // it must be the other way
+                            // round then
+                            Assert(lines[i]->vertex_index(i%2)==vertex_indices[i/4],
+                                   ExcInternalError());
+                            line_orientation_xyz[i]=false;
+                          }
+                      // for the last 6 lines the line orientation is
+                      // always true, since they were just constructed
+                      // that way
+                      for (unsigned int i=24; i<30; ++i)
+                        line_orientation_xyz[i]=true;
+                      line_orientation=&line_orientation_xyz[0];
+
+                      // set up the 12 quads, numbered as follows
+                      // (left quad numbering, right line numbering
+                      // extracted from above)
+                      //
+                      //      *          *
+                      //     /|        21|
+                      //    * |        * 15
+                      //  y/|3*      20| *
+                      //  * |/|      * |/|
+                      //  |2* |x    11 * 14
+                      //  |/|1*      |/| *
+                      //  * |/       * |17
+                      //  |0*       10 *
+                      //  |/         |16
+                      //  *          *
+                      //
+                      //  x
+                      //  *---*---*      *22-*-23*
+                      //  | 5 | 7 |      1  29   5
+                      //  *---*---*      *26-*-27*
+                      //  | 4 | 6 |      0  28   4
+                      //  *---*---*y     *18-*-19*
+                      //
+                      //       y
+                      //      *----*----*      *-12-*-13-*
+                      //     / 10 / 11 /      3   25    7
+                      //    *----*----*      *-26-*-27-*
+                      //   / 8  / 9  /      2   24    6
+                      //  *----*----*x     *--8-*--9-*
+
+                      new_quads[0]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[10],
+                                                         line_indices[28],
+                                                         line_indices[16],
+                                                         line_indices[24]));
+                      new_quads[1]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[28],
+                                                         line_indices[14],
+                                                         line_indices[17],
+                                                         line_indices[25]));
+                      new_quads[2]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[11],
+                                                         line_indices[29],
+                                                         line_indices[24],
+                                                         line_indices[20]));
+                      new_quads[3]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[29],
+                                                         line_indices[15],
+                                                         line_indices[25],
+                                                         line_indices[21]));
+                      new_quads[4]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[18],
+                                                         line_indices[26],
+                                                         line_indices[0],
+                                                         line_indices[28]));
+                      new_quads[5]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[26],
+                                                         line_indices[22],
+                                                         line_indices[1],
+                                                         line_indices[29]));
+                      new_quads[6]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[19],
+                                                         line_indices[27],
+                                                         line_indices[28],
+                                                         line_indices[4]));
+                      new_quads[7]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[27],
+                                                         line_indices[23],
+                                                         line_indices[29],
+                                                         line_indices[5]));
+                      new_quads[8]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[2],
+                                                         line_indices[24],
+                                                         line_indices[8],
+                                                         line_indices[26]));
+                      new_quads[9]->set (internal::Triangulation
+                                         ::TriaObject<2>(line_indices[24],
+                                                         line_indices[6],
+                                                         line_indices[9],
+                                                         line_indices[27]));
+                      new_quads[10]->set (internal::Triangulation
+                                          ::TriaObject<2>(line_indices[3],
+                                                          line_indices[25],
+                                                          line_indices[26],
+                                                          line_indices[12]));
+                      new_quads[11]->set (internal::Triangulation
+                                          ::TriaObject<2>(line_indices[25],
+                                                          line_indices[7],
+                                                          line_indices[27],
+                                                          line_indices[13]));
+
+                      // now reset the line_orientation flags of outer
+                      // lines as they cannot be set in a loop (at
+                      // least not easily)
+                      new_quads[0]->set_line_orientation(0,line_orientation[10]);
+                      new_quads[0]->set_line_orientation(2,line_orientation[16]);
+
+                      new_quads[1]->set_line_orientation(1,line_orientation[14]);
+                      new_quads[1]->set_line_orientation(2,line_orientation[17]);
+
+                      new_quads[2]->set_line_orientation(0,line_orientation[11]);
+                      new_quads[2]->set_line_orientation(3,line_orientation[20]);
+
+                      new_quads[3]->set_line_orientation(1,line_orientation[15]);
+                      new_quads[3]->set_line_orientation(3,line_orientation[21]);
+
+                      new_quads[4]->set_line_orientation(0,line_orientation[18]);
+                      new_quads[4]->set_line_orientation(2,line_orientation[0]);
+
+                      new_quads[5]->set_line_orientation(1,line_orientation[22]);
+                      new_quads[5]->set_line_orientation(2,line_orientation[1]);
+
+                      new_quads[6]->set_line_orientation(0,line_orientation[19]);
+                      new_quads[6]->set_line_orientation(3,line_orientation[4]);
+
+                      new_quads[7]->set_line_orientation(1,line_orientation[23]);
+                      new_quads[7]->set_line_orientation(3,line_orientation[5]);
+
+                      new_quads[8]->set_line_orientation(0,line_orientation[2]);
+                      new_quads[8]->set_line_orientation(2,line_orientation[8]);
+
+                      new_quads[9]->set_line_orientation(1,line_orientation[6]);
+                      new_quads[9]->set_line_orientation(2,line_orientation[9]);
+
+                      new_quads[10]->set_line_orientation(0,line_orientation[3]);
+                      new_quads[10]->set_line_orientation(3,line_orientation[12]);
+
+                      new_quads[11]->set_line_orientation(1,line_orientation[7]);
+                      new_quads[11]->set_line_orientation(3,line_orientation[13]);
+
+                      /////////////////////////////////
+                      // create the eight new hexes
+                      //
+                      // again first collect some data.  here, we need
+                      // the indices of a whole lotta quads.
+
+                      // the quads are numbered as follows:
+                      //
+                      // planes in the interior of the old hex:
+                      //
+                      //      *
+                      //     /|
+                      //    * |
+                      //   /|3*  *---*---*      *----*----*
+                      //  * |/|  | 5 | 7 |     / 10 / 11 /
+                      //  |2* |  *---*---*    *----*----*
+                      //  |/|1*  | 4 | 6 |   / 8  / 9  /
+                      //  * |/   *---*---*y *----*----*x
+                      //  |0*
+                      //  |/
+                      //  *
+                      //
+                      // children of the faces
+                      // of the old hex
+                      //      *-------*        *-------*
+                      //     /|25   27|       /34   35/|
+                      //    15|       |      /       /19
+                      //   /  |       |     /32   33/  |
+                      //  *   |24   26|    *-------*18 |
+                      //  1413*-------*    |21   23| 17*
+                      //  |  /30   31/     |       |  /
+                      //  12/       /      |       |16
+                      //  |/28   29/       |20   22|/
+                      //  *-------*        *-------*
+                      //
+                      // note that we have to
+                      // take care of the
+                      // orientation of
+                      // faces.
+                      const int quad_indices_xyz[36]
+                      =
+                      {
+                        new_quads[0]->index(),     //0
+                        new_quads[1]->index(),
+                        new_quads[2]->index(),
+                        new_quads[3]->index(),
+                        new_quads[4]->index(),
+                        new_quads[5]->index(),
+                        new_quads[6]->index(),
+                        new_quads[7]->index(),
+                        new_quads[8]->index(),
+                        new_quads[9]->index(),
+                        new_quads[10]->index(),
+                        new_quads[11]->index(),    //11
+
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[0],f_fl[0],f_ro[0])),  //12
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[0],f_fl[0],f_ro[0])),
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[0],f_fl[0],f_ro[0])),
+                        hex->face(0)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[0],f_fl[0],f_ro[0])),
+
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[1],f_fl[1],f_ro[1])),  //16
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[1],f_fl[1],f_ro[1])),
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[1],f_fl[1],f_ro[1])),
+                        hex->face(1)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[1],f_fl[1],f_ro[1])),
+
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[2],f_fl[2],f_ro[2])),  //20
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[2],f_fl[2],f_ro[2])),
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[2],f_fl[2],f_ro[2])),
+                        hex->face(2)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[2],f_fl[2],f_ro[2])),
+
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[3],f_fl[3],f_ro[3])),  //24
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[3],f_fl[3],f_ro[3])),
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[3],f_fl[3],f_ro[3])),
+                        hex->face(3)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[3],f_fl[3],f_ro[3])),
+
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[4],f_fl[4],f_ro[4])),  //28
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[4],f_fl[4],f_ro[4])),
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[4],f_fl[4],f_ro[4])),
+                        hex->face(4)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[4],f_fl[4],f_ro[4])),
+
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(0,f_or[5],f_fl[5],f_ro[5])),  //32
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(1,f_or[5],f_fl[5],f_ro[5])),
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(2,f_or[5],f_fl[5],f_ro[5])),
+                        hex->face(5)->isotropic_child_index(GeometryInfo<dim>::standard_to_real_face_vertex(3,f_or[5],f_fl[5],f_ro[5]))
+                      };
+                      quad_indices=&quad_indices_xyz[0];
+
+                      // bottom children
+                      new_hexes[0]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[12],
+                                                         quad_indices[0],
+                                                         quad_indices[20],
+                                                         quad_indices[4],
+                                                         quad_indices[28],
+                                                         quad_indices[8]));
+                      new_hexes[1]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[0],
+                                                         quad_indices[16],
+                                                         quad_indices[22],
+                                                         quad_indices[6],
+                                                         quad_indices[29],
+                                                         quad_indices[9]));
+                      new_hexes[2]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[13],
+                                                         quad_indices[1],
+                                                         quad_indices[4],
+                                                         quad_indices[24],
+                                                         quad_indices[30],
+                                                         quad_indices[10]));
+                      new_hexes[3]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[1],
+                                                         quad_indices[17],
+                                                         quad_indices[6],
+                                                         quad_indices[26],
+                                                         quad_indices[31],
+                                                         quad_indices[11]));
+
+                      // top children
+                      new_hexes[4]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[14],
+                                                         quad_indices[2],
+                                                         quad_indices[21],
+                                                         quad_indices[5],
+                                                         quad_indices[8],
+                                                         quad_indices[32]));
+                      new_hexes[5]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[2],
+                                                         quad_indices[18],
+                                                         quad_indices[23],
+                                                         quad_indices[7],
+                                                         quad_indices[9],
+                                                         quad_indices[33]));
+                      new_hexes[6]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[15],
+                                                         quad_indices[3],
+                                                         quad_indices[5],
+                                                         quad_indices[25],
+                                                         quad_indices[10],
+                                                         quad_indices[34]));
+                      new_hexes[7]->set (internal::Triangulation
+                                         ::TriaObject<3>(quad_indices[3],
+                                                         quad_indices[19],
+                                                         quad_indices[7],
+                                                         quad_indices[27],
+                                                         quad_indices[11],
+                                                         quad_indices[35]));
+                      break;
+                    }
+                    default:
+                      // all refinement cases have been treated, there
+                      // only remains
+                      // RefinementCase<dim>::no_refinement as
+                      // untreated enumeration value. However, in that
+                      // case we should have aborted much
+                      // earlier. thus we should never get here
+                      Assert(false, ExcInternalError());
+                      break;
+                    }//switch (ref_case)
+
+                  // and set face orientation flags. note that new
+                  // faces in the interior of the mother cell always
+                  // have a correctly oriented face, but the ones on
+                  // the outer faces will inherit this flag
+                  //
+                  // the flag have been set to true for all faces
+                  // initially, now go the other way round and reset
+                  // faces that are at the boundary of the mother cube
+                  //
+                  // the same is true for the face_flip and
+                  // face_rotation flags. however, the latter two are
+                  // set to false by default as this is the standard
+                  // value
+
+                  // loop over all faces and all (relevant) subfaces
+                  // of that in order to set the correct values for
+                  // face_orientation, face_flip and face_rotation,
+                  // which are inherited from the corresponding face
+                  // of the mother cube
+                  for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+                    for (unsigned int s=0;
+                         s<std::max(GeometryInfo<dim-1>::n_children(GeometryInfo<dim>::face_refinement_case(ref_case,f)),
+                                    1U);
+                         ++s)
+                      {
+                        const unsigned int current_child
+                          =GeometryInfo<dim>::child_cell_on_face(ref_case,
+                                                                 f,
+                                                                 s,
+                                                                 f_or[f],
+                                                                 f_fl[f],
+                                                                 f_ro[f],
+                                                                 GeometryInfo<dim>::face_refinement_case(ref_case,
+                                                                     f,
+                                                                     f_or[f],
+                                                                     f_fl[f],
+                                                                     f_ro[f]));
+                        new_hexes[current_child]->set_face_orientation (f, f_or[f]);
+                        new_hexes[current_child]->set_face_flip        (f, f_fl[f]);
+                        new_hexes[current_child]->set_face_rotation    (f, f_ro[f]);
+                      }
+
+                  // now see if we have created cells that are
+                  // distorted and if so add them to our list
+                  if ((check_for_distorted_cells == true)
+                      &&
+                      has_distorted_children (hex,
+                                              internal::int2type<dim>(),
+                                              internal::int2type<spacedim>()))
+                    cells_with_distorted_children.distorted_cells.push_back (hex);
+
+                  // note that the refinement flag was already cleared
+                  // at the beginning of this loop
+
+                  // inform all listeners that cell refinement is done
+                  triangulation.signals.post_refinement_on_cell(hex);
+                }
+          }
+
+        // clear user data on quads. we used some of this data to
+        // indicate anisotropic refinemnt cases on faces. all data
+        // should be cleared by now, but the information whether we
+        // used indices or pointers is still present. reset it now to
+        // enable the user to use whichever he likes later on.
+        triangulation.faces->quads.clear_user_data();
+
+        // return the list with distorted children
+        return cells_with_distorted_children;
+      }
+
+
+      /**
+       * At the boundary of the domain, the new point on the face may
+       * be far inside the current cell, if the boundary has a strong
+       * curvature. If we allow anisotropic refinement here, the
+       * resulting cell may be strongly distorted. To prevent this,
+       * this function flags such cells for isotropic refinement. It
+       * is called automatically from
+       * prepare_coarsening_and_refinement().
+       *
+       * This function does nothing in 1d (therefore the
+       * specialization).
+       */
+      template <int spacedim>
+      static
+      void
+      prevent_distorted_boundary_cells (const Triangulation<1,spacedim> &);
+
+
+      template <int dim, int spacedim>
+      static
+      void
+      prevent_distorted_boundary_cells (Triangulation<dim,spacedim> &triangulation)
+      {
+        // If the codimension is one, we cannot perform this check
+        // yet.
+        if (spacedim>dim) return;
+
+        for (typename Triangulation<dim,spacedim>::cell_iterator
+             cell=triangulation.begin(); cell!=triangulation.end(); ++cell)
+          if (cell->at_boundary() &&
+              cell->refine_flag_set() &&
+              cell->refine_flag_set()!=RefinementCase<dim>::isotropic_refinement)
+            {
+              // The cell is at the boundary and it is flagged for
+              // anisotropic refinement. Therefore, we have a closer
+              // look
+              const RefinementCase<dim> ref_case=cell->refine_flag_set();
+              for (unsigned int face_no=0;
+                   face_no<GeometryInfo<dim>::faces_per_cell;
+                   ++face_no)
+                if (cell->face(face_no)->at_boundary())
+                  {
+                    // this is the critical face at the boundary.
+                    if (GeometryInfo<dim>::face_refinement_case(ref_case,face_no)
+                        !=RefinementCase<dim-1>::isotropic_refinement)
+                      {
+                        // up to now, we do not want to refine this
+                        // cell along the face under consideration
+                        // here.
+                        const typename Triangulation<dim,spacedim>::face_iterator
+                        face = cell->face(face_no);
+                        // the new point on the boundary would be this
+                        // one.
+                        const Point<spacedim> new_bound
+                          = face->center(true);
+                        // to check it, transform to the unit cell
+                        // with Q1Mapping
+                        const Point<dim> new_unit
+                          = StaticMappingQ1<dim,spacedim>::mapping.
+                            transform_real_to_unit_cell(cell,
+                                                        new_bound);
+
+                        // Now, we have to calculate the distance from
+                        // the face in the unit cell.
+
+                        // take the correct coordinate direction (0
+                        // for faces 0 and 1, 1 for faces 2 and 3, 2
+                        // for faces 4 and 5) and subtract the correct
+                        // boundary value of the face (0 for faces 0,
+                        // 2, and 4; 1 for faces 1, 3 and 5)
+                        const double dist = std::fabs(new_unit[face_no/2] - face_no%2);
+
+                        // compare this with the empirical value
+                        // allowed. if it is too big, flag the face
+                        // for isotropic refinement
+                        const double allowed=0.25;
+
+                        if (dist>allowed)
+                          cell->flag_for_face_refinement(face_no);
+                      }//if flagged for anistropic refinement
+                  }//if (cell->face(face)->at_boundary())
+            }//for all cells
+      }
+
+
+      /**
+       * Some dimension dependent stuff for mesh smoothing.
+       *
+       * At present, this function does nothing in 1d and 2D, but
+       * makes sure no two cells with a level difference greater than
+       * one share one line in 3D. This is a requirement needed for
+       * the interpolation of hanging nodes, since otherwise to steps
+       * of interpolation would be necessary. This would make the
+       * processes implemented in the @p ConstraintMatrix class much
+       * more complex, since these two steps of interpolation do not
+       * commute.
+       */
+      template <int dim, int spacedim>
+      static
+      void
+      prepare_refinement_dim_dependent (const Triangulation<dim,spacedim> &)
+      {
+        Assert (dim < 3,
+                ExcMessage ("Wrong function called -- there should "
+                            "be a specialization."));
+      }
+
+
+      template <int spacedim>
+      static
+      void
+      prepare_refinement_dim_dependent (Triangulation<3,spacedim> &triangulation)
+      {
+        const unsigned int dim = 3;
+
+        // first clear flags on lines, since we need them to determine
+        // which lines will be refined
+        triangulation.clear_user_flags_line();
+
+        // also clear flags on hexes, since we need them to mark those
+        // cells which are to be coarsened
+        triangulation.clear_user_flags_hex();
+
+        // variable to store whether the mesh was changed in the
+        // present loop and in the whole process
+        bool mesh_changed      = false;
+
+        do
+          {
+            mesh_changed = false;
+
+            // for this following, we need to know which cells are
+            // going to be coarsened, if we had to make a
+            // decision. the following function sets these flags:
+            triangulation.fix_coarsen_flags ();
+
+
+            // flag those lines that are refined and will not be
+            // coarsened and those that will be refined
+            for (typename Triangulation<dim,spacedim>::cell_iterator
+                 cell=triangulation.begin(); cell!=triangulation.end(); ++cell)
+              if (cell->refine_flag_set())
+                {
+                  for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                    if (GeometryInfo<dim>::line_refinement_case(cell->refine_flag_set(), line)
+                        ==RefinementCase<1>::cut_x)
+                      // flag a line, that will be
+                      // refined
+                      cell->line(line)->set_user_flag();
+                }
+              else if (cell->has_children() && !cell->child(0)->coarsen_flag_set())
+                {
+                  for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                    if (GeometryInfo<dim>::line_refinement_case(cell->refinement_case(), line)
+                        ==RefinementCase<1>::cut_x)
+                      // flag a line, that is refined
+                      // and will stay so
+                      cell->line(line)->set_user_flag();
+                }
+              else if (cell->has_children() && cell->child(0)->coarsen_flag_set())
+                cell->set_user_flag();
+
+
+            // now check whether there are cells with lines that are
+            // more than once refined or that will be more than once
+            // refined. The first thing should never be the case, in
+            // the second case we flag the cell for refinement
+            for (typename Triangulation<dim,spacedim>::active_cell_iterator
+                 cell=triangulation.last_active(); cell!=triangulation.end(); --cell)
+              for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                {
+                  if (cell->line(line)->has_children())
+                    {
+                      // if this line is refined, its children should
+                      // not have further children
+                      //
+                      // however, if any of the children is flagged
+                      // for further refinement, we need to refine
+                      // this cell also (at least, if the cell is not
+                      // already flagged)
+                      bool offending_line_found = false;
+
+                      for (unsigned int c=0; c<2; ++c)
+                        {
+                          Assert (cell->line(line)->child(c)->has_children() == false,
+                                  ExcInternalError());
+
+                          if (cell->line(line)->child(c)->user_flag_set () &&
+                              (GeometryInfo<dim>::line_refinement_case(cell->refine_flag_set(),
+                                                                       line)
+                               ==RefinementCase<1>::no_refinement))
+                            {
+                              // tag this cell for refinement
+                              cell->clear_coarsen_flag ();
+                              // if anisotropic coarsening is allowed:
+                              // extend the refine_flag in the needed
+                              // direction, else set refine_flag
+                              // (isotropic)
+                              if (triangulation.smooth_grid &
+                                  Triangulation<dim,spacedim>::allow_anisotropic_smoothing)
+                                cell->flag_for_line_refinement(line);
+                              else
+                                cell->set_refine_flag();
+
+                              for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+                                if (GeometryInfo<dim>::line_refinement_case(cell->refine_flag_set(), line)
+                                    ==RefinementCase<1>::cut_x)
+                                  // flag a line, that will be refined
+                                  cell->line(l)->set_user_flag();
+
+                              // note that we have changed the grid
+                              offending_line_found = true;
+
+                              // it may save us several loop
+                              // iterations if we flag all lines of
+                              // this cell now (and not at the outset
+                              // of the next iteration) for refinement
+                              for (unsigned int l=0;
+                                   l<GeometryInfo<dim>::lines_per_cell; ++l)
+                                if (!cell->line(l)->has_children() &&
+                                    (GeometryInfo<dim>::line_refinement_case(cell->refine_flag_set(),
+                                                                             l)
+                                     !=RefinementCase<1>::no_refinement))
+                                  cell->line(l)->set_user_flag();
+
+                              break;
+                            }
+                        }
+
+                      if (offending_line_found)
+                        {
+                          mesh_changed = true;
+                          break;
+                        }
+                    }
+                }
+
+
+            // there is another thing here: if any of the lines will
+            // be refined, then we may not coarsen the present cell
+            // similarly, if any of the lines *is* already refined, we
+            // may not coarsen the current cell. however, there's a
+            // catch: if the line is refined, but the cell behind it
+            // is going to be coarsened, then the situation
+            // changes. if we forget this second condition, the
+            // refine_and_coarsen_3d test will start to fail. note
+            // that to know which cells are going to be coarsened, the
+            // call for fix_coarsen_flags above is necessary
+            for (typename Triangulation<dim,spacedim>::cell_iterator
+                 cell=triangulation.last(); cell!=triangulation.end(); --cell)
+              {
+                if (cell->user_flag_set())
+                  for (unsigned int line=0; line<GeometryInfo<dim>::lines_per_cell; ++line)
+                    if (cell->line(line)->has_children() &&
+                        (cell->line(line)->child(0)->user_flag_set() ||
+                         cell->line(line)->child(1)->user_flag_set()))
+                      {
+                        for (unsigned int c=0; c<cell->n_children(); ++c)
+                          cell->child(c)->clear_coarsen_flag ();
+                        cell->clear_user_flag();
+                        for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+                          if (GeometryInfo<dim>::line_refinement_case(cell->refinement_case(), l)
+                              ==RefinementCase<1>::cut_x)
+                            // flag a line, that is refined
+                            // and will stay so
+                            cell->line(l)->set_user_flag();
+                        mesh_changed = true;
+                        break;
+                      }
+              }
+          }
+        while (mesh_changed == true);
+      }
+
+
+
+      /**
+       * Helper function for @p fix_coarsen_flags. Return whether
+       * coarsening of this cell is allowed.  Coarsening can be
+       * forbidden if the neighboring cells are or will be refined
+       * twice along the common face.
+       */
+      template <int dim, int spacedim>
+      static
+      bool
+      coarsening_allowed (const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+      {
+        // in 1d, coarsening is always allowed since we don't enforce
+        // the 2:1 constraint there
+        if (dim == 1)
+          return true;
+
+        const RefinementCase<dim> ref_case = cell->refinement_case();
+        for (unsigned int n=0; n<GeometryInfo<dim>::faces_per_cell; ++n)
+          {
+
+            // if the cell is not refined along that face, coarsening
+            // will not change anything, so do nothing. the same
+            // applies, if the face is at the boandary
+            const RefinementCase<dim-1> face_ref_case =
+              GeometryInfo<dim>::face_refinement_case(cell->refinement_case(), n);
+
+            const unsigned int n_subfaces
+              = GeometryInfo<dim-1>::n_children(face_ref_case);
+
+            if (n_subfaces == 0 || cell->at_boundary(n))
+              continue;
+            for (unsigned int c=0; c<n_subfaces; ++c)
+              {
+                const typename Triangulation<dim,spacedim>::cell_iterator
+                child = cell->child(GeometryInfo<dim>::
+                                    child_cell_on_face(ref_case,
+                                                       n,c));
+
+                const typename Triangulation<dim,spacedim>::cell_iterator
+                child_neighbor = child->neighbor(n);
+                if (!child->neighbor_is_coarser(n))
+                  // in 2d, if the child's neighbor is coarser, then
+                  // it has no children. however, in 3d it might be
+                  // otherwise. consider for example, that our face
+                  // might be refined with cut_x, but the neighbor is
+                  // refined with cut_xy at that face. then the
+                  // neighbor pointers of the children of our cell
+                  // will point to the common neighbor cell, not to
+                  // its children. what we really want to know in the
+                  // following is, whether the neighbor cell is
+                  // refined twice with reference to our cell.  that
+                  // only has to be asked, if the child's neighbor is
+                  // not a coarser one.
+                  if ((child_neighbor->has_children() &&
+                       !child_neighbor->user_flag_set())||
+                      // neighbor has children, which are further
+                      // refined along the face, otherwise something
+                      // went wrong in the construction of neighbor
+                      // pointers.  then only allow coarsening if this
+                      // neighbor will be coarsened as well
+                      // (user_pointer is set).  the same applies, if
+                      // the neighbors children are not refined but
+                      // will be after refinement
+                      child_neighbor->refine_flag_set())
+                    return false;
+              }
+          }
+        return true;
+      }
+    };
+  }
+}
+
+
+template <int dim, int spacedim>
+const StraightBoundary<dim,spacedim>
+Triangulation<dim, spacedim>::straight_boundary = StraightBoundary<dim,spacedim>();
+
+
+
+template <int dim, int spacedim>
+const unsigned int
+Triangulation<dim, spacedim>::dimension;
+
+
+
+template <int dim, int spacedim>
+Triangulation<dim, spacedim>::
+Triangulation (const MeshSmoothing smooth_grid,
+               const bool check_for_distorted_cells)
+  :
+  smooth_grid(smooth_grid),
+  faces(NULL),
+  anisotropic_refinement(false),
+  check_for_distorted_cells(check_for_distorted_cells),
+  vertex_to_boundary_id_map_1d (0),
+  vertex_to_manifold_id_map_1d (0)
+{
+  if (dim == 1)
+    {
+      vertex_to_boundary_id_map_1d
+        = new std::map<unsigned int, types::boundary_id>();
+      vertex_to_manifold_id_map_1d
+        = new std::map<unsigned int, types::manifold_id>();
+    }
+
+  // connect the any_change signal to the other top level signals
+  signals.create.connect (signals.any_change);
+  signals.post_refinement.connect (signals.any_change);
+  signals.clear.connect (signals.any_change);
+}
+
+
+template <int dim, int spacedim>
+Triangulation<dim, spacedim>::
+Triangulation (const Triangulation<dim, spacedim> &other)
+// do not set any subscriptors;
+// anyway, calling this constructor
+// is an error!
+  :
+  Subscriptor(),
+  check_for_distorted_cells(other.check_for_distorted_cells),
+  vertex_to_boundary_id_map_1d (0),
+  vertex_to_manifold_id_map_1d (0)
+{
+  Assert (false, ExcMessage ("You are not allowed to call this constructor "
+                             "because copying Triangulation objects is not "
+                             "allowed. Use Triangulation::copy_from() instead."));
+}
+
+
+
+template <int dim, int spacedim>
+Triangulation<dim, spacedim>::~Triangulation ()
+{
+  for (unsigned int i=0; i<levels.size(); ++i)
+    delete levels[i];
+  levels.clear ();
+  delete faces;
+
+  // the vertex_to_boundary_id_map_1d field
+  // should be unused except in 1d
+  Assert ((dim == 1)
+          ||
+          (vertex_to_boundary_id_map_1d == 0),
+          ExcInternalError());
+  delete vertex_to_boundary_id_map_1d;
+  // the vertex_to_manifold_id_map_1d field
+  // should be unused except in 1d
+  Assert ((dim == 1)
+          ||
+          (vertex_to_manifold_id_map_1d == 0),
+          ExcInternalError());
+  delete vertex_to_manifold_id_map_1d;
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::clear ()
+{
+  clear_despite_subscriptions();
+  signals.clear();
+}
+
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_mesh_smoothing(const MeshSmoothing mesh_smoothing)
+{
+  Assert (n_levels() == 0,
+          ExcTriangulationNotEmpty (vertices.size(), levels.size()));
+  smooth_grid=mesh_smoothing;
+}
+
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_boundary (const types::manifold_id m_number,
+                                            const Boundary<dim, spacedim> &boundary_object)
+{
+  set_manifold(m_number, boundary_object);
+}
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_manifold (const types::manifold_id m_number,
+                                            const Manifold<dim, spacedim> &manifold_object)
+{
+  Assert(m_number < numbers::invalid_manifold_id,
+         ExcIndexRange(m_number,0,numbers::invalid_manifold_id));
+
+  manifold[m_number] = &manifold_object;
+}
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_boundary (const types::manifold_id m_number)
+{
+  set_manifold(m_number);
+}
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_manifold (const types::manifold_id m_number)
+{
+  Assert(m_number < numbers::invalid_manifold_id,
+         ExcIndexRange(m_number,0,numbers::invalid_manifold_id));
+
+  //delete the entry located at number.
+  manifold.erase(m_number);
+}
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_all_manifold_ids (const types::manifold_id m_number)
+{
+  typename Triangulation<dim,spacedim>::active_cell_iterator
+  cell=this->begin_active(), endc=this->end();
+
+  for (; cell != endc; ++cell)
+    cell->set_all_manifold_ids(m_number);
+}
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_all_manifold_ids_on_boundary (const types::manifold_id m_number)
+{
+  typename Triangulation<dim,spacedim>::active_cell_iterator
+  cell=this->begin_active(), endc=this->end();
+
+  for (; cell != endc; ++cell)
+    for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+      if (cell->face(f)->at_boundary())
+        cell->face(f)->set_all_manifold_ids(m_number);
+}
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::set_all_manifold_ids_on_boundary (const types::boundary_id b_id,
+    const types::manifold_id m_number)
+{
+  bool boundary_found = false;
+  typename Triangulation<dim,spacedim>::active_cell_iterator
+  cell=this->begin_active(), endc=this->end();
+
+  for (; cell != endc; ++cell)
+    {
+      // loop on faces
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->face(f)->at_boundary() && cell->face(f)->boundary_id()==b_id)
+          {
+            boundary_found = true;
+            cell->face(f)->set_manifold_id(m_number);
+          }
+
+      // loop on edges if dim >= 3
+      if (dim>=3)
+        for (unsigned int e=0; e<GeometryInfo<dim>::lines_per_cell; ++e)
+          if (cell->line(e)->at_boundary() && cell->line(e)->boundary_id()==b_id)
+            {
+              boundary_found = true;
+              cell->line(e)->set_manifold_id(m_number);
+            }
+    }
+
+  (void)boundary_found;
+  Assert(boundary_found, ExcBoundaryIdNotFound(b_id));
+}
+
+
+template <int dim, int spacedim>
+const Boundary<dim,spacedim> &
+Triangulation<dim, spacedim>::get_boundary (const types::manifold_id m_number) const
+{
+  const Boundary<dim, spacedim> *man =
+    dynamic_cast<const Boundary<dim, spacedim> *>(&get_manifold(m_number));
+  Assert(man != NULL,
+         ExcMessage("You tried to get a Boundary, but I only have a Manifold."));
+
+  return *man;
+}
+
+
+template <int dim, int spacedim>
+const Manifold<dim,spacedim> &
+Triangulation<dim, spacedim>::get_manifold (const types::manifold_id m_number) const
+{
+  //look, if there is a manifold stored at
+  //manifold_id number.
+  typename std::map<types::manifold_id, SmartPointer<const Manifold<dim,spacedim>, Triangulation<dim, spacedim> > >::const_iterator it
+    = manifold.find(m_number);
+
+  if (it != manifold.end())
+    {
+      //if we have found an entry, return it
+      return *(it->second);
+    }
+  else
+    {
+      //if we have not found an entry connected with number, we return
+      //straight_boundary
+      return straight_boundary;
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+std::vector<types::boundary_id>
+Triangulation<dim, spacedim>::get_boundary_ids () const
+{
+  // in 1d, we store a map of all used boundary indicators. use it for
+  // our purposes
+  if (dim == 1)
+    {
+      std::vector<types::boundary_id> boundary_ids;
+      for (std::map<unsigned int, types::boundary_id>::const_iterator
+           p = vertex_to_boundary_id_map_1d->begin();
+           p !=  vertex_to_boundary_id_map_1d->end();
+           ++p)
+        boundary_ids.push_back (p->second);
+
+      return boundary_ids;
+    }
+  else
+    {
+      std::set<types::boundary_id> b_ids;
+      active_cell_iterator cell=begin_active();
+      for (; cell!=end(); ++cell)
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          if (cell->at_boundary(face))
+            b_ids.insert(cell->face(face)->boundary_id());
+      std::vector<types::boundary_id> boundary_ids(b_ids.begin(), b_ids.end());
+      return boundary_ids;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<types::boundary_id>
+Triangulation<dim, spacedim>::get_boundary_indicators () const
+{
+  return get_boundary_ids();
+}
+
+
+
+template <int dim, int spacedim>
+std::vector<types::manifold_id>
+Triangulation<dim, spacedim>::get_manifold_ids () const
+{
+  std::set<types::manifold_id> m_ids;
+  active_cell_iterator cell=begin_active();
+  for (; cell!=end(); ++cell)
+    {
+      m_ids.insert(cell->manifold_id());
+      if (dim>1)
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          if (cell->at_boundary(face))
+            m_ids.insert(cell->face(face)->manifold_id());
+    }
+  std::vector<types::manifold_id> manifold_indicators(m_ids.begin(), m_ids.end());
+  return manifold_indicators;
+}
+
+/*-----------------------------------------------------------------*/
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::
+copy_triangulation (const Triangulation<dim, spacedim> &old_tria)
+{
+  Assert ((vertices.size() == 0) &&
+          (levels.size () == 0) &&
+          (faces == NULL),
+          ExcTriangulationNotEmpty(vertices.size(), levels.size()));
+  Assert ((old_tria.levels.size() != 0) &&
+          (old_tria.vertices.size() != 0) &&
+          (dim == 1 || old_tria.faces != NULL),
+          ExcMessage("When calling Triangulation::copy_triangulation(), "
+                     "the target triangulation must be empty but the source "
+                     "triangulation (the argument to this function) must contain "
+                     "something. Here, it seems like the source does not "
+                     "contain anything at all."));
+
+
+  // copy normal elements
+  vertices               = old_tria.vertices;
+  vertices_used          = old_tria.vertices_used;
+  anisotropic_refinement = old_tria.anisotropic_refinement;
+  smooth_grid            = old_tria.smooth_grid;
+
+  faces         = new internal::Triangulation::TriaFaces<dim>(*old_tria.faces);
+
+  typename std::map<types::manifold_id,
+           SmartPointer<const Manifold<dim,spacedim> , Triangulation<dim, spacedim> > >::const_iterator
+           bdry_iterator = old_tria.manifold.begin();
+  for (; bdry_iterator != old_tria.manifold.end() ; bdry_iterator++)
+    manifold[bdry_iterator->first] = bdry_iterator->second;
+
+
+  levels.reserve (old_tria.levels.size());
+  for (unsigned int level=0; level<old_tria.levels.size(); ++level)
+    levels.push_back (new
+                      internal::Triangulation::
+                      TriaLevel<dim>(*old_tria.levels[level]));
+
+  number_cache = old_tria.number_cache;
+
+  if (dim == 1)
+    {
+      delete vertex_to_boundary_id_map_1d;
+      vertex_to_boundary_id_map_1d
+        = (new std::map<unsigned int, types::boundary_id>
+           (*old_tria.vertex_to_boundary_id_map_1d));
+
+      delete vertex_to_manifold_id_map_1d;
+      vertex_to_manifold_id_map_1d
+        = (new std::map<unsigned int, types::manifold_id>
+           (*old_tria.vertex_to_manifold_id_map_1d));
+    }
+
+  // inform those who are listening on old_tria of the copy operation
+  old_tria.signals.copy (*this);
+  // also inform all listeners of the current triangulation that the
+  // triangulation has been created
+  signals.create();
+
+  // note that we need not copy the
+  // subscriptor!
+}
+
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim,spacedim>::
+create_triangulation_compatibility (const std::vector<Point<spacedim> > &v,
+                                    const std::vector<CellData<dim> >   &cells,
+                                    const SubCellData                   &subcelldata)
+{
+  std::vector<CellData<dim> > reordered_cells (cells);
+  SubCellData                 reordered_subcelldata (subcelldata);
+
+  // in-place reordering of data
+  reorder_compatibility (reordered_cells, reordered_subcelldata);
+
+  // now create triangulation from
+  // reordered data
+  create_triangulation(v, reordered_cells, reordered_subcelldata);
+}
+
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim,spacedim>::
+create_triangulation (const std::vector<Point<spacedim> >    &v,
+                      const std::vector<CellData<dim> > &cells,
+                      const SubCellData &subcelldata)
+{
+  Assert ((vertices.size() == 0) &&
+          (levels.size () == 0) &&
+          (faces == NULL),
+          ExcTriangulationNotEmpty(vertices.size(), levels.size()));
+  // check that no forbidden arrays
+  // are used
+  Assert (subcelldata.check_consistency(dim), ExcInternalError());
+
+  // try to create a triangulation; if this fails, we still want to
+  // throw an exception but if we just do so we'll get into trouble
+  // because sometimes other objects are already attached to it:
+  try
+    {
+      internal::Triangulation::Implementation::create_triangulation (v, cells, subcelldata, *this);
+    }
+  catch (...)
+    {
+      clear_despite_subscriptions();
+      throw;
+    }
+
+  // update our counts of the various elements of a triangulation, and set
+  // active_cell_indices of all cells
+  internal::Triangulation::Implementation
+  ::compute_number_cache (*this, levels.size(), number_cache);
+  reset_active_cell_indices ();
+
+  // now verify that there are indeed no distorted cells. as per the
+  // documentation of this class, we first collect all distorted cells
+  // and then throw an exception if there are any
+  if (check_for_distorted_cells == true)
+    {
+      DistortedCellList distorted_cells = collect_distorted_coarse_cells (*this);
+      // throw the array (and fill the various location fields) if
+      // there are distorted cells. otherwise, just fall off the end
+      // of the function
+      AssertThrow (distorted_cells.distorted_cells.size() == 0,
+                   distorted_cells);
+    }
+
+
+  /*
+      When the triangulation is a manifold (dim < spacedim), the normal field
+      provided from the map class depends on the order of the vertices.
+      It may happen that this normal field is discontinous.
+      The following code takes care that this is not the case by setting the
+      cell direction flag on those cell that produce the wrong orientation.
+
+      To determine if 2 neighbours have the same or opposite orientation
+      we use a table of truth.
+      Its entries are indexes by the local indeces of the common face.
+      For example if two elements share a face, and this face is
+      face 0 for element 0 and face 1 for element 1, then
+      table(0,1) will tell whether the orientation are the same (true) or
+      opposite (false).
+
+      Even though there may be a combinatorial/graph theory argument to get
+      this table in any dimension, I tested by hand all the different possible
+      cases in 1D and 2D to generate the table.
+
+      Assuming that a surface respects the standard orientation for 2d meshes,
+      the tables of truth are symmetric and their true values are the following
+      1D curves:  (0,1)
+      2D surface: (0,1),(0,2),(1,3),(2,3)
+
+      We store this data using an n_faces x n_faces full matrix, which is actually
+      much bigger than the minimal data required, but it makes the code more readable.
+
+    */
+  if (dim < spacedim)
+    {
+      Table<2,bool> correct(GeometryInfo< dim >::faces_per_cell,
+                            GeometryInfo< dim >::faces_per_cell);
+      switch (dim)
+        {
+        case 1:
+        {
+          bool values [][2] = {{false,true},
+            {true,false}
+          };
+          for (unsigned int i=0; i< GeometryInfo< dim >::faces_per_cell; ++i)
+            for (unsigned int j=0; j< GeometryInfo< dim >::faces_per_cell; ++j)
+              correct(i,j) = ( values[i][j]);
+          break;
+        }
+        case 2:
+        {
+          bool values [][4]= {{false,true ,true , false},
+            {true ,false,false, true },
+            {true ,false,false, true },
+            {false,true ,true , false}
+          };
+          for (unsigned int i=0; i< GeometryInfo< dim >::faces_per_cell; ++i)
+            for (unsigned int j=0; j< GeometryInfo< dim >::faces_per_cell; ++j)
+              correct(i,j) = ( values[i][j]);
+          break;
+        }
+        default:
+          Assert (false, ExcNotImplemented());
+        }
+
+
+      std::list<active_cell_iterator> this_round, next_round;
+      active_cell_iterator neighbor;
+
+      this_round.push_back (begin_active());
+      begin_active()->set_direction_flag (true);
+      begin_active()->set_user_flag ();
+
+      while (this_round.size() > 0)
+        {
+          for ( typename std::list<active_cell_iterator>::iterator cell = this_round.begin();
+                cell != this_round.end(); ++cell)
+            {
+              for (unsigned int i = 0; i < GeometryInfo< dim >::faces_per_cell; ++i)
+                {
+                  if ( !((*cell)->face(i)->at_boundary()) )
+                    {
+                      neighbor = (*cell)->neighbor(i);
+
+                      unsigned int cf = (*cell)->face_index(i);
+                      unsigned int j = 0;
+                      while (neighbor->face_index(j) != cf)
+                        {
+                          ++j;
+                        }
+
+                      if ( (correct(i,j) && !(*cell)->direction_flag())
+                           ||
+                           (!correct(i,j) && (*cell)->direction_flag()) )
+                        {
+                          if (neighbor->user_flag_set() == false)
+                            {
+                              neighbor->set_direction_flag (false);
+                              neighbor->set_user_flag ();
+                              next_round.push_back (neighbor);
+                            }
+                          else
+                            Assert (neighbor->direction_flag() == false,
+                                    ExcNonOrientableTriangulation());
+
+                        }
+                    }
+                }
+            }
+
+          // Before we quit let's check
+          // that if the triangulation
+          // is disconnected that we
+          // still get all cells
+          if (next_round.size() == 0)
+            for (active_cell_iterator cell = begin_active();
+                 cell != end(); ++cell)
+              if (cell->user_flag_set() == false)
+                {
+                  next_round.push_back (cell);
+                  cell->set_direction_flag (true);
+                  cell->set_user_flag ();
+                  break;
+                }
+
+          this_round = next_round;
+          next_round.clear();
+        }
+    }
+
+  // inform all listeners that the triangulation has been created
+  signals.create();
+}
+
+
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim,spacedim>::
+flip_all_direction_flags()
+{
+  AssertThrow (dim+1 == spacedim, ExcMessage ("Only works for dim == spacedim-1"));
+  for (active_cell_iterator cell = begin_active();
+       cell != end(); ++cell)
+    cell->set_direction_flag (!cell->direction_flag());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::set_all_refine_flags ()
+{
+  Assert(n_cells()>0, ExcMessage("Error: An empty Triangulation can not be refined."));
+  active_cell_iterator cell = begin_active(),
+                       endc = end();
+
+  for (; cell != endc; ++cell)
+    {
+      cell->clear_coarsen_flag();
+      cell->set_refine_flag ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::refine_global (const unsigned int times)
+{
+  for (unsigned int i=0; i<times; ++i)
+    {
+      set_all_refine_flags();
+      execute_coarsening_and_refinement ();
+    }
+}
+
+
+
+/*-------------------- refine/coarsen flags -------------------------*/
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_refine_flags (std::vector<bool> &v) const
+{
+  v.resize (dim*n_active_cells(), false);
+  std::vector<bool>::iterator  i = v.begin();
+  active_cell_iterator cell = begin_active(),
+                       endc = end();
+  for (; cell!=endc; ++cell)
+    for (unsigned int j=0; j<dim; ++j,++i)
+      if (cell->refine_flag_set() & (1<<j) )
+        *i = true;
+
+  Assert (i == v.end(), ExcInternalError());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_refine_flags (std::ostream &out) const
+{
+  std::vector<bool> v;
+  save_refine_flags (v);
+  write_bool_vector (mn_tria_refine_flags_begin, v, mn_tria_refine_flags_end,
+                     out);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_refine_flags (std::istream &in)
+{
+  std::vector<bool> v;
+  read_bool_vector (mn_tria_refine_flags_begin, v, mn_tria_refine_flags_end,
+                    in);
+  load_refine_flags (v);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_refine_flags (const std::vector<bool> &v)
+{
+  AssertThrow (v.size() == dim*n_active_cells(), ExcGridReadError());
+
+  active_cell_iterator cell = begin_active(),
+                       endc = end();
+  std::vector<bool>::const_iterator i = v.begin();
+  for (; cell!=endc; ++cell)
+    {
+      unsigned int ref_case=0;
+
+      for (unsigned int j=0; j<dim; ++j, ++i)
+        if (*i == true)
+          ref_case+=1<<j;
+      Assert(ref_case<RefinementCase<dim>::isotropic_refinement+1,
+             ExcGridReadError());
+      if (ref_case>0)
+        cell->set_refine_flag(RefinementCase<dim>(ref_case));
+      else
+        cell->clear_refine_flag();
+    }
+
+  Assert (i == v.end(), ExcInternalError());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_coarsen_flags (std::vector<bool> &v) const
+{
+  v.resize (n_active_cells(), false);
+  std::vector<bool>::iterator  i = v.begin();
+  active_cell_iterator cell = begin_active(),
+                       endc = end();
+  for (; cell!=endc; ++cell, ++i)
+    *i = cell->coarsen_flag_set();
+
+  Assert (i == v.end(), ExcInternalError());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_coarsen_flags (std::ostream &out) const
+{
+  std::vector<bool> v;
+  save_coarsen_flags (v);
+  write_bool_vector (mn_tria_coarsen_flags_begin, v, mn_tria_coarsen_flags_end,
+                     out);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_coarsen_flags (std::istream &in)
+{
+  std::vector<bool> v;
+  read_bool_vector (mn_tria_coarsen_flags_begin, v, mn_tria_coarsen_flags_end,
+                    in);
+  load_coarsen_flags (v);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_coarsen_flags (const std::vector<bool> &v)
+{
+  Assert (v.size() == n_active_cells(), ExcGridReadError());
+
+  active_cell_iterator cell = begin_active(),
+                       endc = end();
+  std::vector<bool>::const_iterator i = v.begin();
+  for (; cell!=endc; ++cell, ++i)
+    if (*i == true)
+      cell->set_coarsen_flag();
+    else
+      cell->clear_coarsen_flag();
+
+  Assert (i == v.end(), ExcInternalError());
+}
+
+
+template <int dim, int spacedim>
+bool Triangulation<dim,spacedim>::get_anisotropic_refinement_flag() const
+{
+  return anisotropic_refinement;
+}
+
+
+
+/*-------------------- user data/flags -------------------------*/
+
+
+namespace
+{
+  // clear user data of cells
+  template <int dim>
+  void clear_user_data (std::vector<internal::Triangulation::TriaLevel<dim>*> &levels)
+  {
+    for (unsigned int level=0; level<levels.size(); ++level)
+      levels[level]->cells.clear_user_data();
+  }
+
+
+  // clear user data of faces
+  void clear_user_data (internal::Triangulation::TriaFaces<1> *)
+  {
+    // nothing to do in 1d
+  }
+
+
+  void clear_user_data (internal::Triangulation::TriaFaces<2> *faces)
+  {
+    faces->lines.clear_user_data();
+  }
+
+
+  void clear_user_data (internal::Triangulation::TriaFaces<3> *faces)
+  {
+    faces->lines.clear_user_data();
+    faces->quads.clear_user_data();
+  }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim,spacedim>::clear_user_data ()
+{
+  // let functions in anonymous namespace do their work
+  dealii::clear_user_data (levels);
+  dealii::clear_user_data (faces);
+}
+
+
+
+namespace
+{
+  void clear_user_flags_line (std::vector<internal::Triangulation::TriaLevel<1>*> &levels,
+                              internal::Triangulation::TriaFaces<1> *)
+  {
+    for (unsigned int level=0; level<levels.size(); ++level)
+      levels[level]->cells.clear_user_flags();
+  }
+
+  template <int dim>
+  void clear_user_flags_line (std::vector<internal::Triangulation::TriaLevel<dim>*> &,
+                              internal::Triangulation::TriaFaces<dim> *faces)
+  {
+    faces->lines.clear_user_flags();
+  }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim,spacedim>::clear_user_flags_line ()
+{
+  dealii::clear_user_flags_line (levels, faces);
+}
+
+
+
+namespace
+{
+  void clear_user_flags_quad (std::vector<internal::Triangulation::TriaLevel<1>*> &,
+                              internal::Triangulation::TriaFaces<1> *)
+  {
+    // nothing to do in 1d
+  }
+
+  void clear_user_flags_quad (std::vector<internal::Triangulation::TriaLevel<2>*> &levels,
+                              internal::Triangulation::TriaFaces<2> *)
+  {
+    for (unsigned int level=0; level<levels.size(); ++level)
+      levels[level]->cells.clear_user_flags();
+  }
+
+  template <int dim>
+  void clear_user_flags_quad (std::vector<internal::Triangulation::TriaLevel<dim>*> &,
+                              internal::Triangulation::TriaFaces<dim> *faces)
+  {
+    faces->quads.clear_user_flags();
+  }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim,spacedim>::clear_user_flags_quad ()
+{
+  dealii::clear_user_flags_quad (levels, faces);
+}
+
+
+
+namespace
+{
+  void clear_user_flags_hex (std::vector<internal::Triangulation::TriaLevel<1>*> &,
+                             internal::Triangulation::TriaFaces<1> *)
+  {
+    // nothing to do in 1d
+  }
+
+
+  void clear_user_flags_hex (std::vector<internal::Triangulation::TriaLevel<2>*> &,
+                             internal::Triangulation::TriaFaces<2> *)
+  {
+    // nothing to do in 2d
+  }
+
+  void clear_user_flags_hex (std::vector<internal::Triangulation::TriaLevel<3>*> &levels,
+                             internal::Triangulation::TriaFaces<3> *)
+  {
+    for (unsigned int level=0; level<levels.size(); ++level)
+      levels[level]->cells.clear_user_flags();
+  }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim,spacedim>::clear_user_flags_hex ()
+{
+  dealii::clear_user_flags_hex (levels, faces);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim,spacedim>::clear_user_flags ()
+{
+  clear_user_flags_line ();
+  clear_user_flags_quad ();
+  clear_user_flags_hex ();
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags (std::ostream &out) const
+{
+  save_user_flags_line (out);
+
+  if (dim>=2)
+    save_user_flags_quad (out);
+
+  if (dim>=3)
+    save_user_flags_hex (out);
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags (std::vector<bool> &v) const
+{
+  // clear vector and append
+  // all the stuff later on
+  v.clear ();
+
+  std::vector<bool> tmp;
+
+  save_user_flags_line (tmp);
+  v.insert (v.end(), tmp.begin(), tmp.end());
+
+  if (dim >= 2)
+    {
+      save_user_flags_quad (tmp);
+      v.insert (v.end(), tmp.begin(), tmp.end());
+    }
+
+  if (dim >= 3)
+    {
+      save_user_flags_hex (tmp);
+      v.insert (v.end(), tmp.begin(), tmp.end());
+    }
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags (std::istream &in)
+{
+  load_user_flags_line (in);
+
+  if (dim>=2)
+    load_user_flags_quad (in);
+
+  if (dim>=3)
+    load_user_flags_hex (in);
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags (const std::vector<bool> &v)
+{
+  Assert (v.size() == n_lines()+n_quads()+n_hexs(), ExcInternalError());
+  std::vector<bool> tmp;
+
+  // first extract the flags
+  // belonging to lines
+  tmp.insert (tmp.end(),
+              v.begin(), v.begin()+n_lines());
+  // and set the lines
+  load_user_flags_line (tmp);
+
+  if (dim >= 2)
+    {
+      tmp.clear ();
+      tmp.insert (tmp.end(),
+                  v.begin()+n_lines(), v.begin()+n_lines()+n_quads());
+      load_user_flags_quad (tmp);
+    }
+
+  if (dim >= 3)
+    {
+      tmp.clear();
+      tmp.insert (tmp.end(),
+                  v.begin()+n_lines()+n_quads(), v.begin()+n_lines()+n_quads()+n_hexs());
+      load_user_flags_hex (tmp);
+    }
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags_line (std::vector<bool> &v) const
+{
+  v.resize (n_lines(), false);
+  std::vector<bool>::iterator  i = v.begin();
+  line_iterator line = begin_line(),
+                endl = end_line();
+  for (; line!=endl; ++line, ++i)
+    *i = line->user_flag_set();
+
+  Assert (i == v.end(), ExcInternalError());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags_line (std::ostream &out) const
+{
+  std::vector<bool> v;
+  save_user_flags_line (v);
+  write_bool_vector (mn_tria_line_user_flags_begin, v, mn_tria_line_user_flags_end,
+                     out);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags_line (std::istream &in)
+{
+  std::vector<bool> v;
+  read_bool_vector (mn_tria_line_user_flags_begin, v, mn_tria_line_user_flags_end,
+                    in);
+  load_user_flags_line (v);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags_line (const std::vector<bool> &v)
+{
+  Assert (v.size() == n_lines(), ExcGridReadError());
+
+  line_iterator line = begin_line(),
+                endl = end_line();
+  std::vector<bool>::const_iterator i = v.begin();
+  for (; line!=endl; ++line, ++i)
+    if (*i == true)
+      line->set_user_flag();
+    else
+      line->clear_user_flag();
+
+  Assert (i == v.end(), ExcInternalError());
+}
+
+
+namespace
+{
+  template <typename Iterator>
+  bool get_user_flag (const Iterator &i)
+  {
+    return i->user_flag_set();
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  bool get_user_flag (const TriaIterator<InvalidAccessor<structdim,dim,spacedim> > &)
+  {
+    Assert (false, ExcInternalError());
+    return false;
+  }
+
+
+
+  template <typename Iterator>
+  void set_user_flag (const Iterator &i)
+  {
+    i->set_user_flag();
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  void set_user_flag (const TriaIterator<InvalidAccessor<structdim,dim,spacedim> > &)
+  {
+    Assert (false, ExcInternalError());
+  }
+
+
+
+  template <typename Iterator>
+  void clear_user_flag (const Iterator &i)
+  {
+    i->clear_user_flag();
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  void clear_user_flag (const TriaIterator<InvalidAccessor<structdim,dim,spacedim> > &)
+  {
+    Assert (false, ExcInternalError());
+  }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags_quad (std::vector<bool> &v) const
+{
+  v.resize (n_quads(), false);
+
+  if (dim >= 2)
+    {
+      std::vector<bool>::iterator  i = v.begin();
+      quad_iterator quad = begin_quad(),
+                    endq = end_quad();
+      for (; quad!=endq; ++quad, ++i)
+        *i = get_user_flag (quad);
+
+      Assert (i == v.end(), ExcInternalError());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags_quad (std::ostream &out) const
+{
+  std::vector<bool> v;
+  save_user_flags_quad (v);
+  write_bool_vector (mn_tria_quad_user_flags_begin, v, mn_tria_quad_user_flags_end,
+                     out);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags_quad (std::istream &in)
+{
+  std::vector<bool> v;
+  read_bool_vector (mn_tria_quad_user_flags_begin, v, mn_tria_quad_user_flags_end,
+                    in);
+  load_user_flags_quad (v);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags_quad (const std::vector<bool> &v)
+{
+  Assert (v.size() == n_quads(), ExcGridReadError());
+
+  if (dim >= 2)
+    {
+      quad_iterator quad = begin_quad(),
+                    endq = end_quad();
+      std::vector<bool>::const_iterator i = v.begin();
+      for (; quad!=endq; ++quad, ++i)
+        if (*i == true)
+          set_user_flag(quad);
+        else
+          clear_user_flag(quad);
+
+      Assert (i == v.end(), ExcInternalError());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags_hex (std::vector<bool> &v) const
+{
+  v.resize (n_hexs(), false);
+
+  if (dim >= 3)
+    {
+      std::vector<bool>::iterator  i = v.begin();
+      hex_iterator hex = begin_hex(),
+                   endh = end_hex();
+      for (; hex!=endh; ++hex, ++i)
+        *i = get_user_flag (hex);
+
+      Assert (i == v.end(), ExcInternalError());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_flags_hex (std::ostream &out) const
+{
+  std::vector<bool> v;
+  save_user_flags_hex (v);
+  write_bool_vector (mn_tria_hex_user_flags_begin, v, mn_tria_hex_user_flags_end,
+                     out);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags_hex (std::istream &in)
+{
+  std::vector<bool> v;
+  read_bool_vector (mn_tria_hex_user_flags_begin, v, mn_tria_hex_user_flags_end,
+                    in);
+  load_user_flags_hex (v);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_flags_hex (const std::vector<bool> &v)
+{
+  Assert (v.size() == n_hexs(), ExcGridReadError());
+
+  if (dim >= 3)
+    {
+      hex_iterator hex = begin_hex(),
+                   endh = end_hex();
+      std::vector<bool>::const_iterator i = v.begin();
+      for (; hex!=endh; ++hex, ++i)
+        if (*i == true)
+          set_user_flag(hex);
+        else
+          clear_user_flag(hex);
+
+      Assert (i == v.end(), ExcInternalError());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_indices (std::vector<unsigned int> &v) const
+{
+  // clear vector and append all the
+  // stuff later on
+  v.clear ();
+
+  std::vector<unsigned int> tmp;
+
+  save_user_indices_line (tmp);
+  v.insert (v.end(), tmp.begin(), tmp.end());
+
+  if (dim >= 2)
+    {
+      save_user_indices_quad (tmp);
+      v.insert (v.end(), tmp.begin(), tmp.end());
+    }
+
+  if (dim >= 3)
+    {
+      save_user_indices_hex (tmp);
+      v.insert (v.end(), tmp.begin(), tmp.end());
+    }
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_indices (const std::vector<unsigned int> &v)
+{
+  Assert (v.size() == n_lines()+n_quads()+n_hexs(), ExcInternalError());
+  std::vector<unsigned int> tmp;
+
+  // first extract the indices
+  // belonging to lines
+  tmp.insert (tmp.end(),
+              v.begin(), v.begin()+n_lines());
+  // and set the lines
+  load_user_indices_line (tmp);
+
+  if (dim >= 2)
+    {
+      tmp.clear ();
+      tmp.insert (tmp.end(),
+                  v.begin()+n_lines(), v.begin()+n_lines()+n_quads());
+      load_user_indices_quad (tmp);
+    }
+
+  if (dim >= 3)
+    {
+      tmp.clear ();
+      tmp.insert (tmp.end(),
+                  v.begin()+n_lines()+n_quads(), v.begin()+n_lines()+n_quads()+n_hexs());
+      load_user_indices_hex (tmp);
+    }
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+namespace
+{
+  template <typename Iterator>
+  unsigned int get_user_index (const Iterator &i)
+  {
+    return i->user_index();
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  unsigned int get_user_index (const TriaIterator<InvalidAccessor<structdim,dim,spacedim> > &)
+  {
+    Assert (false, ExcInternalError());
+    return numbers::invalid_unsigned_int;
+  }
+
+
+
+  template <typename Iterator>
+  void set_user_index (const Iterator &i,
+                       const unsigned int x)
+  {
+    i->set_user_index(x);
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  void set_user_index (const TriaIterator<InvalidAccessor<structdim,dim,spacedim> > &,
+                       const unsigned int)
+  {
+    Assert (false, ExcInternalError());
+  }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_indices_line (std::vector<unsigned int> &v) const
+{
+  v.resize (n_lines(), 0);
+  std::vector<unsigned int>::iterator  i = v.begin();
+  line_iterator line = begin_line(),
+                endl = end_line();
+  for (; line!=endl; ++line, ++i)
+    *i = line->user_index();
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_indices_line (const std::vector<unsigned int> &v)
+{
+  Assert (v.size() == n_lines(), ExcGridReadError());
+
+  line_iterator line = begin_line(),
+                endl = end_line();
+  std::vector<unsigned int>::const_iterator i = v.begin();
+  for (; line!=endl; ++line, ++i)
+    line->set_user_index(*i);
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_indices_quad (std::vector<unsigned int> &v) const
+{
+  v.resize (n_quads(), 0);
+
+  if (dim >= 2)
+    {
+      std::vector<unsigned int>::iterator  i = v.begin();
+      quad_iterator quad = begin_quad(),
+                    endq = end_quad();
+      for (; quad!=endq; ++quad, ++i)
+        *i = get_user_index(quad);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_indices_quad (const std::vector<unsigned int> &v)
+{
+  Assert (v.size() == n_quads(), ExcGridReadError());
+
+  if (dim >= 2)
+    {
+      quad_iterator quad = begin_quad(),
+                    endq = end_quad();
+      std::vector<unsigned int>::const_iterator i = v.begin();
+      for (; quad!=endq; ++quad, ++i)
+        set_user_index(quad, *i);
+    }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_indices_hex (std::vector<unsigned int> &v) const
+{
+  v.resize (n_hexs(), 0);
+
+  if (dim >= 3)
+    {
+      std::vector<unsigned int>::iterator  i = v.begin();
+      hex_iterator hex = begin_hex(),
+                   endh = end_hex();
+      for (; hex!=endh; ++hex, ++i)
+        *i = get_user_index(hex);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_indices_hex (const std::vector<unsigned int> &v)
+{
+  Assert (v.size() == n_hexs(), ExcGridReadError());
+
+  if (dim >= 3)
+    {
+      hex_iterator hex = begin_hex(),
+                   endh = end_hex();
+      std::vector<unsigned int>::const_iterator i = v.begin();
+      for (; hex!=endh; ++hex, ++i)
+        set_user_index(hex, *i);
+    }
+}
+
+
+
+//---------------- user pointers ----------------------------------------//
+
+
+namespace
+{
+  template <typename Iterator>
+  void *get_user_pointer (const Iterator &i)
+  {
+    return i->user_pointer();
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  void *get_user_pointer (const TriaIterator<InvalidAccessor<structdim,dim,spacedim> > &)
+  {
+    Assert (false, ExcInternalError());
+    return 0;
+  }
+
+
+
+  template <typename Iterator>
+  void set_user_pointer (const Iterator &i,
+                         void *x)
+  {
+    i->set_user_pointer(x);
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  void set_user_pointer (const TriaIterator<InvalidAccessor<structdim,dim,spacedim> > &,
+                         void *)
+  {
+    Assert (false, ExcInternalError());
+  }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_pointers (std::vector<void *> &v) const
+{
+  // clear vector and append all the
+  // stuff later on
+  v.clear ();
+
+  std::vector<void *> tmp;
+
+  save_user_pointers_line (tmp);
+  v.insert (v.end(), tmp.begin(), tmp.end());
+
+  if (dim >= 2)
+    {
+      save_user_pointers_quad (tmp);
+      v.insert (v.end(), tmp.begin(), tmp.end());
+    }
+
+  if (dim >= 3)
+    {
+      save_user_pointers_hex (tmp);
+      v.insert (v.end(), tmp.begin(), tmp.end());
+    }
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_pointers (const std::vector<void *> &v)
+{
+  Assert (v.size() == n_lines()+n_quads()+n_hexs(), ExcInternalError());
+  std::vector<void *> tmp;
+
+  // first extract the pointers
+  // belonging to lines
+  tmp.insert (tmp.end(),
+              v.begin(), v.begin()+n_lines());
+  // and set the lines
+  load_user_pointers_line (tmp);
+
+  if (dim >= 2)
+    {
+      tmp.clear ();
+      tmp.insert (tmp.end(),
+                  v.begin()+n_lines(), v.begin()+n_lines()+n_quads());
+      load_user_pointers_quad (tmp);
+    }
+
+  if (dim >= 3)
+    {
+      tmp.clear ();
+      tmp.insert (tmp.end(),
+                  v.begin()+n_lines()+n_quads(), v.begin()+n_lines()+n_quads()+n_hexs());
+      load_user_pointers_hex (tmp);
+    }
+
+  if (dim >= 4)
+    Assert (false, ExcNotImplemented());
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_pointers_line (std::vector<void *> &v) const
+{
+  v.resize (n_lines(), 0);
+  std::vector<void *>::iterator  i = v.begin();
+  line_iterator line = begin_line(),
+                endl = end_line();
+  for (; line!=endl; ++line, ++i)
+    *i = line->user_pointer();
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_pointers_line (const std::vector<void *> &v)
+{
+  Assert (v.size() == n_lines(), ExcGridReadError());
+
+  line_iterator line = begin_line(),
+                endl = end_line();
+  std::vector<void *>::const_iterator i = v.begin();
+  for (; line!=endl; ++line, ++i)
+    line->set_user_pointer(*i);
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_pointers_quad (std::vector<void *> &v) const
+{
+  v.resize (n_quads(), 0);
+
+  if (dim >= 2)
+    {
+      std::vector<void *>::iterator  i = v.begin();
+      quad_iterator quad = begin_quad(),
+                    endq = end_quad();
+      for (; quad!=endq; ++quad, ++i)
+        *i = get_user_pointer(quad);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_pointers_quad (const std::vector<void *> &v)
+{
+  Assert (v.size() == n_quads(), ExcGridReadError());
+
+  if (dim >= 2)
+    {
+      quad_iterator quad = begin_quad(),
+                    endq = end_quad();
+      std::vector<void *>::const_iterator i = v.begin();
+      for (; quad!=endq; ++quad, ++i)
+        set_user_pointer(quad, *i);
+    }
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::save_user_pointers_hex (std::vector<void *> &v) const
+{
+  v.resize (n_hexs(), 0);
+
+  if (dim >= 3)
+    {
+      std::vector<void *>::iterator  i = v.begin();
+      hex_iterator hex = begin_hex(),
+                   endh = end_hex();
+      for (; hex!=endh; ++hex, ++i)
+        *i = get_user_pointer(hex);
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::load_user_pointers_hex (const std::vector<void *> &v)
+{
+  Assert (v.size() == n_hexs(), ExcGridReadError());
+
+  if (dim >= 3)
+    {
+      hex_iterator hex = begin_hex(),
+                   endh = end_hex();
+      std::vector<void *>::const_iterator i = v.begin();
+      for (; hex!=endh; ++hex, ++i)
+        set_user_pointer(hex, *i);
+    }
+}
+
+
+
+/*------------------------ Cell iterator functions ------------------------*/
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::raw_cell_iterator
+Triangulation<dim,spacedim>::begin_raw (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      return begin_raw_line (level);
+    case 2:
+      return begin_raw_quad (level);
+    case 3:
+      return begin_raw_hex (level);
+    default:
+      Assert (false, ExcNotImplemented());
+      return raw_cell_iterator();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::cell_iterator
+Triangulation<dim,spacedim>::begin (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      return begin_line (level);
+    case 2:
+      return begin_quad (level);
+    case 3:
+      return begin_hex (level);
+    default:
+      Assert (false, ExcImpossibleInDim(dim));
+      return cell_iterator();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::active_cell_iterator
+Triangulation<dim,spacedim>::begin_active (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      return begin_active_line (level);
+    case 2:
+      return begin_active_quad (level);
+    case 3:
+      return begin_active_hex (level);
+    default:
+      Assert (false, ExcNotImplemented());
+      return active_cell_iterator();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::cell_iterator
+Triangulation<dim,spacedim>::last () const
+{
+  const unsigned int level = levels.size()-1;
+
+  Assert (level<n_global_levels() || level<levels.size(), ExcInvalidLevel(level));
+  if (levels[level]->cells.cells.size() ==0)
+    return end(level);
+
+  // find the last raw iterator on
+  // this level
+  raw_cell_iterator ri (const_cast<Triangulation<dim,spacedim>*>(this),
+                        level,
+                        levels[level]->cells.cells.size()-1);
+
+  // then move to the last used one
+  if (ri->used()==true)
+    return ri;
+  while ((--ri).state() == IteratorState::valid)
+    if (ri->used()==true)
+      return ri;
+  return ri;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::active_cell_iterator
+Triangulation<dim,spacedim>::last_active () const
+{
+  // get the last used cell
+  cell_iterator cell = last();
+
+  if (cell != end())
+    {
+      // then move to the last active one
+      if (cell->active()==true)
+        return cell;
+      while ((--cell).state() == IteratorState::valid)
+        if (cell->active()==true)
+          return cell;
+    }
+  return cell;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::cell_iterator
+Triangulation<dim,spacedim>::end () const
+{
+  return cell_iterator (const_cast<Triangulation<dim, spacedim>*>(this),
+                        -1,
+                        -1);
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::raw_cell_iterator
+Triangulation<dim, spacedim>::end_raw (const unsigned int level) const
+{
+  Assert (level<n_global_levels(), ExcInvalidLevel(level));
+  if (level < levels.size()-1)
+    return begin_raw (level+1);
+  else
+    return end();
+}
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::cell_iterator
+Triangulation<dim, spacedim>::end (const unsigned int level) const
+{
+  if (level < levels.size()-1)
+    return begin (level+1);
+  Assert (level<n_global_levels() || level<levels.size(), ExcInvalidLevel(level));
+  return end();
+}
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::active_cell_iterator
+Triangulation<dim, spacedim>::end_active (const unsigned int level) const
+{
+  Assert (level<n_global_levels() || level < levels.size(), ExcInvalidLevel(level));
+  return (level >= levels.size()-1 ?
+          active_cell_iterator(end()) :
+          begin_active (level+1));
+}
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename Triangulation<dim, spacedim>::cell_iterator>
+Triangulation<dim, spacedim>::cell_iterators () const
+{
+  return
+    IteratorRange<typename Triangulation<dim, spacedim>::cell_iterator>
+    (begin(), end());
+}
+
+
+template <int dim, int spacedim>
+IteratorRange<typename Triangulation<dim, spacedim>::active_cell_iterator>
+Triangulation<dim, spacedim>::active_cell_iterators () const
+{
+  return
+    IteratorRange<typename Triangulation<dim, spacedim>::active_cell_iterator>
+    (begin_active(), end());
+}
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename Triangulation<dim, spacedim>::cell_iterator>
+Triangulation<dim, spacedim>::cell_iterators_on_level (const unsigned int level) const
+{
+  return
+    IteratorRange<typename Triangulation<dim, spacedim>::cell_iterator>
+    (begin(level), end(level));
+}
+
+
+
+template <int dim, int spacedim>
+IteratorRange<typename Triangulation<dim, spacedim>::active_cell_iterator>
+Triangulation<dim, spacedim>::active_cell_iterators_on_level (const unsigned int level) const
+{
+  return
+    IteratorRange<typename Triangulation<dim, spacedim>::active_cell_iterator>
+    (begin_active(level), end_active(level));
+}
+
+
+/*------------------------ Face iterator functions ------------------------*/
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::face_iterator
+Triangulation<dim,spacedim>::begin_face () const
+{
+  switch (dim)
+    {
+    case 1:
+      Assert (false, ExcImpossibleInDim(1));
+      return raw_face_iterator();
+    case 2:
+      return begin_line ();
+    case 3:
+      return begin_quad ();
+    default:
+      Assert (false, ExcNotImplemented());
+      return face_iterator ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::active_face_iterator
+Triangulation<dim,spacedim>::begin_active_face () const
+{
+  switch (dim)
+    {
+    case 1:
+      Assert (false, ExcImpossibleInDim(1));
+      return raw_face_iterator();
+    case 2:
+      return begin_active_line ();
+    case 3:
+      return begin_active_quad ();
+    default:
+      Assert (false, ExcNotImplemented());
+      return active_face_iterator ();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::face_iterator
+Triangulation<dim,spacedim>::end_face () const
+{
+  switch (dim)
+    {
+    case 1:
+      Assert (false, ExcImpossibleInDim(1));
+      return raw_face_iterator();
+    case 2:
+      return end_line ();
+    case 3:
+      return end_quad ();
+    default:
+      Assert (false, ExcNotImplemented());
+      return raw_face_iterator ();
+    }
+}
+
+
+/*------------------------ Vertex iterator functions ------------------------*/
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::vertex_iterator
+Triangulation<dim,spacedim>::begin_vertex() const
+{
+  if (dim==1)
+    {
+      // This does not work if dim==1 because TriaAccessor<0,1,spacedim> does not
+      // implement operator++
+      Assert(false, ExcNotImplemented());
+      return raw_vertex_iterator();
+    }
+  else
+    {
+      vertex_iterator i = raw_vertex_iterator(const_cast<Triangulation<dim, spacedim>*>(this),
+                                              0,
+                                              0);
+      if (i.state() != IteratorState::valid)
+        return i;
+      // This loop will end because every triangulation has used vertices.
+      while (i->used() == false)
+        if ((++i).state() != IteratorState::valid)
+          return i;
+      return i;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::active_vertex_iterator
+Triangulation<dim,spacedim>::begin_active_vertex() const
+{
+  return begin_vertex();
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::vertex_iterator
+Triangulation<dim,spacedim>::end_vertex() const
+{
+  if (dim==1)
+    {
+      Assert(false, ExcNotImplemented());
+      return raw_vertex_iterator();
+    }
+  else
+    return raw_vertex_iterator(const_cast<Triangulation<dim, spacedim>*>(this),
+                               -1,
+                               numbers::invalid_unsigned_int);
+}
+
+
+
+
+/*------------------------ Line iterator functions ------------------------*/
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::raw_line_iterator
+Triangulation<dim, spacedim>::begin_raw_line (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      Assert (level<n_global_levels() || level<levels.size(), ExcInvalidLevel(level));
+
+      if (level >= levels.size() || levels[level]->cells.cells.size() == 0)
+        return end_line();
+
+      return raw_line_iterator (const_cast<Triangulation<dim,spacedim>*>(this),
+                                level,
+                                0);
+
+    default:
+      Assert (level == 0, ExcFacesHaveNoLevel());
+      return raw_line_iterator (const_cast<Triangulation<dim, spacedim>*>(this),
+                                0,
+                                0);
+    }
+}
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::line_iterator
+Triangulation<dim, spacedim>::begin_line (const unsigned int level) const
+{
+  // level is checked in begin_raw
+  raw_line_iterator ri = begin_raw_line (level);
+  if (ri.state() != IteratorState::valid)
+    return ri;
+  while (ri->used() == false)
+    if ((++ri).state() != IteratorState::valid)
+      return ri;
+  return ri;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::active_line_iterator
+Triangulation<dim, spacedim>::begin_active_line (const unsigned int level) const
+{
+  // level is checked in begin_raw
+  line_iterator i = begin_line (level);
+  if (i.state() != IteratorState::valid)
+    return i;
+  while (i->has_children())
+    if ((++i).state() != IteratorState::valid)
+      return i;
+  return i;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::line_iterator
+Triangulation<dim, spacedim>::end_line () const
+{
+  return raw_line_iterator (const_cast<Triangulation<dim, spacedim>*>(this),
+                            -1,
+                            -1);
+}
+
+
+
+/*------------------------ Quad iterator functions ------------------------*/
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::raw_quad_iterator
+Triangulation<dim,spacedim>::begin_raw_quad (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      Assert (false, ExcImpossibleInDim(1));
+      return raw_hex_iterator();
+    case 2:
+    {
+      Assert (level<n_global_levels() || level<levels.size(), ExcInvalidLevel(level));
+
+      if (level >= levels.size() || levels[level]->cells.cells.size() == 0)
+        return end_quad();
+
+      return raw_quad_iterator (const_cast<Triangulation<dim,spacedim>*>(this),
+                                level,
+                                0);
+    }
+
+    case 3:
+    {
+      Assert (level == 0, ExcFacesHaveNoLevel());
+
+      return raw_quad_iterator (const_cast<Triangulation<dim,spacedim>*>(this),
+                                0,
+                                0);
+    }
+
+
+    default:
+      Assert (false, ExcNotImplemented());
+      return raw_hex_iterator();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::quad_iterator
+Triangulation<dim,spacedim>::begin_quad (const unsigned int level) const
+{
+  // level is checked in begin_raw
+  raw_quad_iterator ri = begin_raw_quad (level);
+  if (ri.state() != IteratorState::valid)
+    return ri;
+  while (ri->used() == false)
+    if ((++ri).state() != IteratorState::valid)
+      return ri;
+  return ri;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::active_quad_iterator
+Triangulation<dim,spacedim>::begin_active_quad (const unsigned int level) const
+{
+  // level is checked in begin_raw
+  quad_iterator i = begin_quad (level);
+  if (i.state() != IteratorState::valid)
+    return i;
+  while (i->has_children())
+    if ((++i).state() != IteratorState::valid)
+      return i;
+  return i;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::quad_iterator
+Triangulation<dim,spacedim>::end_quad () const
+{
+  return raw_quad_iterator (const_cast<Triangulation<dim, spacedim>*>(this),
+                            -1,
+                            -1);
+}
+
+
+/*------------------------ Hex iterator functions ------------------------*/
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::raw_hex_iterator
+Triangulation<dim,spacedim>::begin_raw_hex (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+    case 2:
+      Assert (false, ExcImpossibleInDim(1));
+      return raw_hex_iterator();
+    case 3:
+    {
+      Assert (level<n_global_levels() || level<levels.size(), ExcInvalidLevel(level));
+
+      if (level >= levels.size() || levels[level]->cells.cells.size() == 0)
+        return end_hex();
+
+      return raw_hex_iterator (const_cast<Triangulation<dim,spacedim>*>(this),
+                               level,
+                               0);
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+      return raw_hex_iterator();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::hex_iterator
+Triangulation<dim,spacedim>::begin_hex (const unsigned int level) const
+{
+  // level is checked in begin_raw
+  raw_hex_iterator ri = begin_raw_hex (level);
+  if (ri.state() != IteratorState::valid)
+    return ri;
+  while (ri->used() == false)
+    if ((++ri).state() != IteratorState::valid)
+      return ri;
+  return ri;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::active_hex_iterator
+Triangulation<dim, spacedim>::begin_active_hex (const unsigned int level) const
+{
+  // level is checked in begin_raw
+  hex_iterator i = begin_hex (level);
+  if (i.state() != IteratorState::valid)
+    return i;
+  while (i->has_children())
+    if ((++i).state() != IteratorState::valid)
+      return i;
+  return i;
+}
+
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim, spacedim>::hex_iterator
+Triangulation<dim, spacedim>::end_hex () const
+{
+  return raw_hex_iterator (const_cast<Triangulation<dim,spacedim>*>(this),
+                           -1,
+                           -1);
+}
+
+
+
+
+// -------------------------------- number of cells etc ---------------
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    inline
+    unsigned int
+    n_cells (const internal::Triangulation::NumberCache<1> &c)
+    {
+      return c.n_lines;
+    }
+
+
+    inline
+    unsigned int
+    n_active_cells (const internal::Triangulation::NumberCache<1> &c)
+    {
+      return c.n_active_lines;
+    }
+
+
+    inline
+    unsigned int
+    n_cells (const internal::Triangulation::NumberCache<2> &c)
+    {
+      return c.n_quads;
+    }
+
+
+    inline
+    unsigned int
+    n_active_cells (const internal::Triangulation::NumberCache<2> &c)
+    {
+      return c.n_active_quads;
+    }
+
+
+    inline
+    unsigned int
+    n_cells (const internal::Triangulation::NumberCache<3> &c)
+    {
+      return c.n_hexes;
+    }
+
+
+    inline
+    unsigned int
+    n_active_cells (const internal::Triangulation::NumberCache<3> &c)
+    {
+      return c.n_active_hexes;
+    }
+  }
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_cells () const
+{
+  return internal::Triangulation::n_cells (number_cache);
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_cells () const
+{
+  return internal::Triangulation::n_active_cells (number_cache);
+}
+
+template <int dim, int spacedim>
+types::global_dof_index Triangulation<dim, spacedim>::n_global_active_cells () const
+{
+  return n_active_cells();
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_faces () const
+{
+  switch (dim)
+    {
+    case 1:
+      return 0;
+    case 2:
+      return n_lines();
+    case 3:
+      return n_quads();
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return 0;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_raw_faces () const
+{
+  switch (dim)
+    {
+    case 2:
+      return n_raw_lines();
+    case 3:
+      return n_raw_quads();
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return 0;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_faces () const
+{
+  switch (dim)
+    {
+    case 1:
+      return 0;
+    case 2:
+      return n_active_lines();
+    case 3:
+      return n_active_quads();
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return 0;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_raw_cells (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      return n_raw_lines(level);
+    case 2:
+      return n_raw_quads(level);
+    case 3:
+      return n_raw_hexs(level);
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_cells (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      return n_lines(level);
+    case 2:
+      return n_quads(level);
+    case 3:
+      return n_hexs(level);
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_cells (const unsigned int level) const
+{
+  switch (dim)
+    {
+    case 1:
+      return n_active_lines(level);
+    case 2:
+      return n_active_quads(level);
+    case 3:
+      return n_active_hexs(level);
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+  return 0;
+}
+
+
+template <int dim, int spacedim>
+bool Triangulation<dim, spacedim>::has_hanging_nodes () const
+{
+  for (unsigned int lvl = 0; lvl<n_global_levels()-1; lvl++)
+    if (n_active_cells(lvl) != 0)
+      return true;
+
+  return false;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_lines () const
+{
+  return number_cache.n_lines;
+}
+
+
+//TODO: Merge the following 6 functions somehow
+template <>
+unsigned int Triangulation<1,1>::n_raw_lines (const unsigned int level) const
+{
+  Assert(level < n_levels(), ExcIndexRange(level,0,n_levels()));
+  return levels[level]->cells.cells.size();
+}
+
+
+template <>
+unsigned int Triangulation<1,1>::n_raw_lines () const
+{
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+
+
+template <>
+unsigned int Triangulation<1,2>::n_raw_lines (const unsigned int level) const
+{
+  Assert(level < n_levels(), ExcIndexRange(level,0,n_levels()));
+  return levels[level]->cells.cells.size();
+}
+
+
+template <>
+unsigned int Triangulation<1,2>::n_raw_lines () const
+{
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::n_raw_lines (const unsigned int level) const
+{
+  Assert(level < n_levels(), ExcIndexRange(level,0,n_levels()));
+  return levels[level]->cells.cells.size();
+}
+
+template <>
+unsigned int Triangulation<1,3>::n_raw_lines () const
+{
+  Assert(false, ExcNotImplemented());
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_raw_lines (const unsigned int) const
+{
+  Assert(false, ExcFacesHaveNoLevel());
+  return 0;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_raw_lines () const
+{
+  return faces->lines.cells.size();
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_lines (const unsigned int level) const
+{
+  Assert (level < number_cache.n_lines_level.size(),
+          ExcIndexRange (level, 0, number_cache.n_lines_level.size()));
+  Assert (dim == 1, ExcFacesHaveNoLevel());
+  return number_cache.n_lines_level[level];
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_lines () const
+{
+  return number_cache.n_active_lines;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_lines (const unsigned int level) const
+{
+  Assert (level < number_cache.n_lines_level.size(),
+          ExcIndexRange (level, 0, number_cache.n_lines_level.size()));
+  Assert (dim == 1, ExcFacesHaveNoLevel());
+
+  return number_cache.n_active_lines_level[level];
+}
+
+
+template <>
+unsigned int Triangulation<1,1>::n_quads () const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,1>::n_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,1>::n_raw_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,1>::n_raw_hexs (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,1>::n_active_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,1>::n_active_quads () const
+{
+  return 0;
+}
+
+
+
+
+template <>
+unsigned int Triangulation<1,2>::n_quads () const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,2>::n_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,2>::n_raw_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,2>::n_raw_hexs (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,2>::n_active_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,2>::n_active_quads () const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::n_quads () const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::n_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::n_raw_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::n_raw_hexs (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::n_active_quads (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::n_active_quads () const
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_quads () const
+{
+  return number_cache.n_quads;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_quads (const unsigned int level) const
+{
+  Assert (dim == 2, ExcFacesHaveNoLevel());
+  Assert (level < number_cache.n_quads_level.size(),
+          ExcIndexRange (level, 0, number_cache.n_quads_level.size()));
+  return number_cache.n_quads_level[level];
+}
+
+
+
+template <>
+unsigned int Triangulation<2,2>::n_raw_quads (const unsigned int level) const
+{
+  Assert(level < n_levels(), ExcIndexRange(level,0,n_levels()));
+  return levels[level]->cells.cells.size();
+}
+
+
+
+template <>
+unsigned int Triangulation<2,3>::n_raw_quads (const unsigned int level) const
+{
+  Assert(level < n_levels(), ExcIndexRange(level,0,n_levels()));
+  return levels[level]->cells.cells.size();
+}
+
+
+template <>
+unsigned int Triangulation<3,3>::n_raw_quads (const unsigned int) const
+{
+  Assert(false, ExcFacesHaveNoLevel());
+  return 0;
+}
+
+
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_raw_quads () const
+{
+  Assert (false, ExcNotImplemented());
+  return 0;
+}
+
+
+
+template <>
+unsigned int Triangulation<3,3>::n_raw_quads () const
+{
+  return faces->quads.cells.size();
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_quads () const
+{
+  return number_cache.n_active_quads;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_quads (const unsigned int level) const
+{
+  Assert (level < number_cache.n_quads_level.size(),
+          ExcIndexRange (level, 0, number_cache.n_quads_level.size()));
+  Assert (dim == 2, ExcFacesHaveNoLevel());
+
+  return number_cache.n_active_quads_level[level];
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_hexs () const
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_hexs (const unsigned int) const
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_raw_hexs (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_hexs () const
+{
+  return 0;
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::n_active_hexs (const unsigned int) const
+{
+  return 0;
+}
+
+
+template <>
+unsigned int Triangulation<3,3>::n_hexs () const
+{
+  return number_cache.n_hexes;
+}
+
+
+
+template <>
+unsigned int Triangulation<3,3>::n_hexs (const unsigned int level) const
+{
+  Assert (level < number_cache.n_hexes_level.size(),
+          ExcIndexRange (level, 0, number_cache.n_hexes_level.size()));
+
+  return number_cache.n_hexes_level[level];
+}
+
+
+
+template <>
+unsigned int Triangulation<3,3>::n_raw_hexs (const unsigned int level) const
+{
+  Assert(level < n_levels(), ExcIndexRange(level,0,n_levels()));
+  return levels[level]->cells.cells.size();
+}
+
+
+template <>
+unsigned int Triangulation<3,3>::n_active_hexs () const
+{
+  return number_cache.n_active_hexes;
+}
+
+
+
+template <>
+unsigned int Triangulation<3,3>::n_active_hexs (const unsigned int level) const
+{
+  Assert (level < number_cache.n_hexes_level.size(),
+          ExcIndexRange (level, 0, number_cache.n_hexes_level.size()));
+
+  return number_cache.n_active_hexes_level[level];
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+Triangulation<dim, spacedim>::n_used_vertices () const
+{
+  return std::count_if (vertices_used.begin(), vertices_used.end(),
+                        std::bind2nd (std::equal_to<bool>(), true));
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<bool> &
+Triangulation<dim, spacedim>::get_used_vertices () const
+{
+  return vertices_used;
+}
+
+
+
+
+template <>
+unsigned int Triangulation<1,1>::max_adjacent_cells () const
+{
+  return 2;
+}
+
+
+
+template <>
+unsigned int Triangulation<1,2>::max_adjacent_cells () const
+{
+  return 2;
+}
+
+
+template <>
+unsigned int Triangulation<1,3>::max_adjacent_cells () const
+{
+  return 2;
+}
+
+
+template <int dim, int spacedim>
+unsigned int Triangulation<dim, spacedim>::max_adjacent_cells () const
+{
+  cell_iterator cell = begin(0),
+                endc = (n_levels() > 1 ? begin(1) : cell_iterator(end()));
+  // store the largest index of the
+  // vertices used on level 0
+  unsigned int max_vertex_index = 0;
+  for (; cell!=endc; ++cell)
+    for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell; ++vertex)
+      if (cell->vertex_index(vertex) > max_vertex_index)
+        max_vertex_index = cell->vertex_index(vertex);
+
+  // store the number of times a cell
+  // touches a vertex. An unsigned
+  // int should suffice, even for
+  // larger dimensions
+  std::vector<unsigned short int> usage_count (max_vertex_index+1, 0);
+  // touch a vertex's usage count
+  // every time we find an adjacent
+  // element
+  for (cell=begin(); cell!=endc; ++cell)
+    for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell; ++vertex)
+      ++usage_count[cell->vertex_index(vertex)];
+
+  return std::max (GeometryInfo<dim>::vertices_per_cell,
+                   static_cast<unsigned int>(*std::max_element (usage_count.begin(),
+                                             usage_count.end())));
+}
+
+
+
+template <int dim, int spacedim>
+types::subdomain_id
+Triangulation<dim,spacedim>::locally_owned_subdomain () const
+{
+  return numbers::invalid_subdomain_id;
+}
+
+
+
+template <int dim, int spacedim>
+Triangulation<dim,spacedim> &
+Triangulation<dim,spacedim>::get_triangulation ()
+{
+  return *this;
+}
+
+
+
+template <int dim, int spacedim>
+const Triangulation<dim,spacedim> &
+Triangulation<dim,spacedim>::get_triangulation () const
+{
+  return *this;
+}
+
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::execute_coarsening_and_refinement ()
+{
+  prepare_coarsening_and_refinement ();
+
+  // verify a case with which we have had
+  // some difficulty in the past (see the
+  // deal.II/coarsening_* tests)
+  if (smooth_grid & limit_level_difference_at_vertices)
+    Assert (satisfies_level1_at_vertex_rule (*this) == true,
+            ExcInternalError());
+
+  // Inform all listeners about beginning of refinement.
+  signals.pre_refinement();
+
+  execute_coarsening();
+
+  const DistortedCellList
+  cells_with_distorted_children = execute_refinement();
+
+  // verify a case with which we have had
+  // some difficulty in the past (see the
+  // deal.II/coarsening_* tests)
+  if (smooth_grid & limit_level_difference_at_vertices)
+    Assert (satisfies_level1_at_vertex_rule (*this) == true,
+            ExcInternalError());
+
+  // finally build up neighbor connectivity information, and set
+  // active cell indices
+  update_neighbors(*this);
+  reset_active_cell_indices ();
+
+  // Inform all listeners about end of refinement.
+  signals.post_refinement();
+
+  AssertThrow (cells_with_distorted_children.distorted_cells.size() == 0,
+               cells_with_distorted_children);
+}
+
+
+
+template <int dim, int spacedim>
+void
+Triangulation<dim,spacedim>::reset_active_cell_indices ()
+{
+  unsigned int active_cell_index = 0;
+  for (raw_cell_iterator cell=begin_raw(); cell!=end(); ++cell)
+    if ((cell->used() == false) || cell->has_children())
+      cell->set_active_cell_index (numbers::invalid_unsigned_int);
+    else
+      {
+        cell->set_active_cell_index (active_cell_index);
+        ++active_cell_index;
+      }
+
+  Assert (active_cell_index == n_active_cells(), ExcInternalError());
+}
+
+
+
+template<int dim, int spacedim>
+void
+Triangulation<dim, spacedim>::clear_despite_subscriptions()
+{
+  // This is the former function
+  // clear without the assertion in
+  // the beginning.
+  for (unsigned int i=0; i<levels.size(); ++i)
+    delete levels[i];
+  levels.clear ();
+
+  delete faces;
+  faces = NULL;
+
+  vertices.clear ();
+  vertices_used.clear ();
+
+  manifold.clear();
+
+  number_cache = internal::Triangulation::NumberCache<dim>();
+}
+
+
+template <int dim, int spacedim>
+typename Triangulation<dim,spacedim>::DistortedCellList
+Triangulation<dim,spacedim>::execute_refinement ()
+{
+  const DistortedCellList
+  cells_with_distorted_children
+    =
+      internal::Triangulation::Implementation::
+      execute_refinement (*this,check_for_distorted_cells);
+
+
+
+  // re-compute number of lines
+  internal::Triangulation::Implementation
+  ::compute_number_cache (*this, levels.size(), number_cache);
+
+#ifdef DEBUG
+  for (unsigned int level=0; level<levels.size(); ++level)
+    levels[level]->cells.monitor_memory (dim);
+
+  // check whether really all
+  // refinement flags are reset (also
+  // of previously non-active cells
+  // which we may not have
+  // touched. If the refinement flag
+  // of a non-active cell is set,
+  // something went wrong since the
+  // cell-accessors should have
+  // caught this)
+  cell_iterator cell = begin(),
+                endc = end();
+  while (cell != endc)
+    Assert (!(cell++)->refine_flag_set(), ExcInternalError ());
+#endif
+
+  return cells_with_distorted_children;
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::execute_coarsening ()
+{
+  // create a vector counting for each line how
+  // many cells contain this line. in 3D, this
+  // is used later on to decide which lines can
+  // be deleted after coarsening a cell. in
+  // other dimensions it will be ignored
+  std::vector<unsigned int> line_cell_count = count_cells_bounded_by_line (*this);
+  std::vector<unsigned int> quad_cell_count = count_cells_bounded_by_quad (*this);
+
+  // loop over all cells. Flag all
+  // cells of which all children are
+  // flagged for
+  // coarsening and delete the childrens'
+  // flags. In effect, only those
+  // cells are flagged of which originally
+  // all children were flagged and for which
+  // all children are on the same refinement
+  // level. For flagging, the user flags are
+  // used, to avoid confusion and because
+  // non-active cells can't be flagged for
+  // coarsening. Note that because of the
+  // effects of @p{fix_coarsen_flags}, of a
+  // cell either all or no children must
+  // be flagged for coarsening, so it is
+  // ok to only check the first child
+  clear_user_flags ();
+
+  cell_iterator cell = begin(),
+                endc = end();
+  for (; cell!=endc; ++cell)
+    if (!cell->active())
+      if (cell->child(0)->coarsen_flag_set())
+        {
+          cell->set_user_flag();
+          for (unsigned int child=0; child<cell->n_children(); ++child)
+            {
+              Assert (cell->child(child)->coarsen_flag_set(),
+                      ExcInternalError());
+              cell->child(child)->clear_coarsen_flag();
+            }
+        }
+
+
+  // now do the actual coarsening
+  // step. Since the loop goes over
+  // used cells we only need not
+  // worry about deleting some cells
+  // since the ++operator will then
+  // just hop over them if we should
+  // hit one. Do the loop in the
+  // reverse way since we may only
+  // delete some cells if their
+  // neighbors have already been
+  // deleted (if the latter are on a
+  // higher level for example)
+  //
+  // since we delete the *children* of cells, we can ignore cells
+  // on the highest level, i.e., level must be less than or equal
+  // to n_levels()-2.
+  if (levels.size() >= 2)
+    for (cell = last(); cell!=endc; --cell)
+      if (cell->level()<=static_cast<int>(levels.size()-2) && cell->user_flag_set())
+        {
+          // inform all listeners that cell coarsening is going to happen
+          signals.pre_coarsening_on_cell(cell);
+          // use a separate function,
+          // since this is dimension
+          // specific
+          internal::Triangulation::Implementation
+          ::delete_children (*this, cell, line_cell_count, quad_cell_count);
+        }
+
+  // re-compute number of lines and
+  // quads
+  internal::Triangulation::Implementation
+  ::compute_number_cache (*this, levels.size(), number_cache);
+
+  // in principle no user flags
+  // should be
+  // set any more at this point
+#if DEBUG
+  for (cell=begin(); cell!=endc; ++cell)
+    Assert (cell->user_flag_set() == false, ExcInternalError());
+#endif
+}
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::fix_coarsen_flags ()
+{
+  // copy a piece of code from prepare_coarsening_and_refinement that
+  // ensures that the level difference at vertices is limited if so
+  // desired. we need this code here since at least in 1d we don't
+  // call the dimension-independent version of
+  // prepare_coarsening_and_refinement function. in 2d and 3d, having
+  // this hunk here makes our lives a bit easier as well as it takes
+  // care of these cases earlier than it would otherwise happen.
+  //
+  // the main difference to the code in p_c_and_r is that here we
+  // absolutely have to make sure that we get things right, i.e. that
+  // in particular we set flags right if
+  // limit_level_difference_at_vertices is set. to do so we iterate
+  // until the flags don't change any more
+  std::vector<bool> previous_coarsen_flags (n_active_cells());
+  save_coarsen_flags (previous_coarsen_flags);
+
+  std::vector<int> vertex_level (vertices.size(), 0);
+
+  bool continue_iterating = true;
+
+  do
+    {
+      if (smooth_grid & limit_level_difference_at_vertices)
+        {
+          Assert(!anisotropic_refinement,
+                 ExcMessage("In case of anisotropic refinement the "
+                            "limit_level_difference_at_vertices flag for "
+                            "mesh smoothing must not be set!"));
+
+          // store highest level one of the cells adjacent to a vertex
+          // belongs to
+          std::fill (vertex_level.begin(), vertex_level.end(), 0);
+          active_cell_iterator cell = begin_active(),
+                               endc = end();
+          for (; cell!=endc; ++cell)
+            {
+              if (cell->refine_flag_set())
+                for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                     ++vertex)
+                  vertex_level[cell->vertex_index(vertex)]
+                    = std::max (vertex_level[cell->vertex_index(vertex)],
+                                cell->level()+1);
+              else if (!cell->coarsen_flag_set())
+                for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                     ++vertex)
+                  vertex_level[cell->vertex_index(vertex)]
+                    = std::max (vertex_level[cell->vertex_index(vertex)],
+                                cell->level());
+              else
+                {
+                  // if coarsen flag is set then tentatively assume
+                  // that the cell will be coarsened. this isn't
+                  // always true (the coarsen flag could be removed
+                  // again) and so we may make an error here. we try
+                  // to correct this by iterating over the entire
+                  // process until we are converged
+                  Assert (cell->coarsen_flag_set(), ExcInternalError());
+                  for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                       ++vertex)
+                    vertex_level[cell->vertex_index(vertex)]
+                      = std::max (vertex_level[cell->vertex_index(vertex)],
+                                  cell->level()-1);
+                }
+            }
+
+
+          // loop over all cells in reverse order. do so because we
+          // can then update the vertex levels on the adjacent
+          // vertices and maybe already flag additional cells in this
+          // loop
+          //
+          // note that not only may we have to add additional
+          // refinement flags, but we will also have to remove
+          // coarsening flags on cells adjacent to vertices that will
+          // see refinement
+          for (cell=last_active(); cell != endc; --cell)
+            if (cell->refine_flag_set() == false)
+              {
+                for (unsigned int vertex=0;
+                     vertex<GeometryInfo<dim>::vertices_per_cell; ++vertex)
+                  if (vertex_level[cell->vertex_index(vertex)] >=
+                      cell->level()+1)
+                    {
+                      // remove coarsen flag...
+                      cell->clear_coarsen_flag();
+
+                      // ...and if necessary also refine the current
+                      // cell, at the same time updating the level
+                      // information about vertices
+                      if (vertex_level[cell->vertex_index(vertex)] >
+                          cell->level()+1)
+                        {
+                          cell->set_refine_flag();
+
+                          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell;
+                               ++v)
+                            vertex_level[cell->vertex_index(v)]
+                              = std::max (vertex_level[cell->vertex_index(v)],
+                                          cell->level()+1);
+                        }
+
+                      // continue and see whether we may, for example,
+                      // go into the inner 'if' above based on a
+                      // different vertex
+                    }
+              }
+        }
+
+      // loop over all cells. Flag all cells of which all children are
+      // flagged for coarsening and delete the childrens' flags. Also
+      // delete all flags of cells for which not all children of a
+      // cell are flagged. In effect, only those cells are flagged of
+      // which originally all children were flagged and for which all
+      // children are on the same refinement level. For flagging, the
+      // user flags are used, to avoid confusion and because
+      // non-active cells can't be flagged for coarsening
+      //
+      // In effect, all coarsen flags are turned into user flags of
+      // the mother cell if coarsening is possible or deleted
+      // otherwise.
+      clear_user_flags ();
+      // Coarsen flags of cells with no mother cell, i.e. on the
+      // coarsest level are deleted explicitly.
+      active_cell_iterator acell  = begin_active(0),
+                           end_ac = end_active(0);
+      for (; acell!=end_ac; ++acell)
+        acell->clear_coarsen_flag();
+
+      cell_iterator cell = begin(),
+                    endc = end();
+      for (; cell!=endc; ++cell)
+        {
+          // nothing to do if we are already on the finest level
+          if (cell->active())
+            continue;
+
+          const unsigned int n_children=cell->n_children();
+          unsigned int flagged_children=0;
+          for (unsigned int child=0; child<n_children; ++child)
+            if (cell->child(child)->active() &&
+                cell->child(child)->coarsen_flag_set())
+              {
+                ++flagged_children;
+                // clear flag since we don't need it anymore
+                cell->child(child)->clear_coarsen_flag();
+              }
+
+          // flag this cell for coarsening if all children were
+          // flagged
+          if (flagged_children == n_children)
+            cell->set_user_flag();
+        }
+
+      // in principle no coarsen flags should be set any more at this
+      // point
+#if DEBUG
+      for (cell=begin(); cell!=endc; ++cell)
+        Assert (cell->coarsen_flag_set() == false, ExcInternalError());
+#endif
+
+      // now loop over all cells which have the user flag set. their
+      // children were flagged for coarsening. set the coarsen flag
+      // again if we are sure that none of the neighbors of these
+      // children are refined, or will be refined, since then we would
+      // get a two-level jump in refinement. on the other hand, if one
+      // of the children's neighbors has their user flag set, then we
+      // know that its children will go away by coarsening, and we
+      // will be ok.
+      //
+      // note on the other hand that we do allow level-2 jumps in
+      // refinement between neighbors in 1d, so this whole procedure
+      // is only necessary if we are not in 1d
+      //
+      // since we remove some coarsening/user flags in the process, we
+      // have to work from the finest level to the coarsest one, since
+      // we occasionally inspect user flags of cells on finer levels
+      // and need to be sure that these flags are final
+      for (cell=last(); cell!=endc; --cell)
+        if (cell->user_flag_set())
+          // if allowed: flag the
+          // children for coarsening
+          if (internal::Triangulation::Implementation::template coarsening_allowed<dim,spacedim>(cell))
+            for (unsigned int c=0; c<cell->n_children(); ++c)
+              {
+                Assert (cell->child(c)->refine_flag_set()==false,
+                        ExcInternalError());
+
+                cell->child(c)->set_coarsen_flag();
+              }
+
+      // clear all user flags again, now that we don't need them any
+      // more
+      clear_user_flags ();
+
+
+      // now see if anything has changed in the last iteration of this
+      // function
+      std::vector<bool> current_coarsen_flags (n_active_cells());
+      save_coarsen_flags (current_coarsen_flags);
+
+      continue_iterating = (current_coarsen_flags != previous_coarsen_flags);
+      previous_coarsen_flags = current_coarsen_flags;
+    }
+  while (continue_iterating == true);
+}
+
+
+//TODO: merge the following 3 functions since they are the same
+template <>
+bool Triangulation<1,1>::prepare_coarsening_and_refinement ()
+{
+  // save the flags to determine whether something was changed in the
+  // course of this function
+  std::vector<bool> flags_before;
+  save_coarsen_flags (flags_before);
+
+  // do nothing in 1d, except setting the coarsening flags correctly
+  fix_coarsen_flags ();
+
+  std::vector<bool> flags_after;
+  save_coarsen_flags (flags_after);
+
+  return (flags_before != flags_after);
+}
+
+
+template <>
+bool Triangulation<1,2>::prepare_coarsening_and_refinement ()
+{
+  // save the flags to determine whether something was changed in the
+  // course of this function
+  std::vector<bool> flags_before;
+  save_coarsen_flags (flags_before);
+
+  // do nothing in 1d, except setting the coarsening flags correctly
+  fix_coarsen_flags ();
+
+  std::vector<bool> flags_after;
+  save_coarsen_flags (flags_after);
+
+  return (flags_before != flags_after);
+}
+
+
+template <>
+bool Triangulation<1,3>::prepare_coarsening_and_refinement ()
+{
+  // save the flags to determine whether something was changed in the
+  // course of this function
+  std::vector<bool> flags_before;
+  save_coarsen_flags (flags_before);
+
+  // do nothing in 1d, except setting the coarsening flags correctly
+  fix_coarsen_flags ();
+
+  std::vector<bool> flags_after;
+  save_coarsen_flags (flags_after);
+
+  return (flags_before != flags_after);
+}
+
+
+
+
+namespace
+{
+
+  // check if the given @param cell marked for coarsening would
+  // produce an unrefined island. To break up long chains of these
+  // cells we recursively check our neighbors in case we change this
+  // cell. This reduces the number of outer iterations dramatically.
+  template <int dim, int spacedim>
+  void
+  possibly_do_not_produce_unrefined_islands(
+    const typename Triangulation<dim,spacedim>::cell_iterator &cell)
+  {
+    Assert (cell->has_children(), ExcInternalError());
+
+    unsigned int n_neighbors=0;
+    // count all neighbors that will be refined along the face of our
+    // cell after the next step
+    unsigned int count=0;
+    for (unsigned int n=0; n<GeometryInfo<dim>::faces_per_cell; ++n)
+      {
+        const typename Triangulation<dim,spacedim>::cell_iterator neighbor = cell->neighbor(n);
+        if (neighbor.state() == IteratorState::valid)
+          {
+            ++n_neighbors;
+            if (face_will_be_refined_by_neighbor(cell,n))
+              ++count;
+          }
+      }
+    // clear coarsen flags if either all existing neighbors will be
+    // refined or all but one will be and the cell is in the interior
+    // of the domain
+    if (count==n_neighbors ||
+        (count>=n_neighbors-1 &&
+         n_neighbors == GeometryInfo<dim>::faces_per_cell) )
+      {
+        for (unsigned int c=0; c<cell->n_children(); ++c)
+          cell->child(c)->clear_coarsen_flag();
+
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          if (!cell->at_boundary(face)
+              &&
+              ( !cell->neighbor(face)->active() )
+              && (cell_will_be_coarsened(cell->neighbor(face))) )
+            possibly_do_not_produce_unrefined_islands<dim,spacedim>( cell->neighbor(face) );
+      }
+  }
+
+
+  // see if the current cell needs to be refined to avoid unrefined
+  // islands.
+  //
+  // there are sometimes chains of cells that induce refinement of
+  // each other. to avoid running the loop in
+  // prepare_coarsening_and_refinement over and over again for each
+  // one of them, at least for the isotropic refinement case we seek
+  // to flag neighboring elements as well as necessary. this takes
+  // care of (slightly pathological) cases like
+  // deal.II/mesh_smoothing_03
+  template <int dim, int spacedim>
+  void
+  possibly_refine_unrefined_island
+  (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+   const bool allow_anisotropic_smoothing)
+  {
+    Assert (cell->has_children() == false, ExcInternalError());
+    Assert (cell->refine_flag_set() == false, ExcInternalError());
+
+
+    // now we provide two algorithms. the first one is the standard
+    // one, coming from the time, where only isotropic refinement was
+    // possible. it simply counts the neighbors that are or will be
+    // refined and compares to the number of other ones. the second
+    // one does this check independently for each direction: if all
+    // neighbors in one direction (normally two, at the boundary only
+    // one) are refined, the current cell is flagged to be refined in
+    // an according direction.
+
+    if (allow_anisotropic_smoothing == false)
+      {
+        // use first algorithm
+        unsigned int refined_neighbors = 0,
+                     unrefined_neighbors = 0;
+        for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+          if (!cell->at_boundary(face))
+            {
+              if (face_will_be_refined_by_neighbor(cell,face))
+                ++refined_neighbors;
+              else
+                ++unrefined_neighbors;
+            }
+
+        if (unrefined_neighbors < refined_neighbors)
+          {
+            cell->clear_coarsen_flag();
+            cell->set_refine_flag ();
+
+            // ok, so now we have flagged this cell. if we know that
+            // there were any unrefined neighbors at all, see if any
+            // of those will have to be refined as well
+            if (unrefined_neighbors > 0)
+              for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                if (!cell->at_boundary(face)
+                    &&
+                    (face_will_be_refined_by_neighbor(cell,face) == false)
+                    &&
+                    (cell->neighbor(face)->has_children() == false)
+                    &&
+                    (cell->neighbor(face)->refine_flag_set() == false))
+                  possibly_refine_unrefined_island<dim,spacedim>
+                  (cell->neighbor(face),
+                   allow_anisotropic_smoothing);
+          }
+      }
+    else
+      {
+        // variable to store the cell refine case needed to fulfill
+        // all smoothing requirements
+        RefinementCase<dim> smoothing_cell_refinement_case
+          = RefinementCase<dim>::no_refinement;
+
+        // use second algorithm, do the check individually for each
+        // direction
+        for (unsigned int face_pair=0;
+             face_pair<GeometryInfo<dim>::faces_per_cell/2; ++face_pair)
+          {
+            // variable to store the cell refine case needed to refine
+            // at the current face pair in the same way as the
+            // neighbors do...
+            RefinementCase<dim> directional_cell_refinement_case
+              = RefinementCase<dim>::isotropic_refinement;
+
+            for (unsigned int face_index=0; face_index<2; ++face_index)
+              {
+                unsigned int face=2*face_pair+face_index;
+                // variable to store the refine case (to come) of the
+                // face under consideration
+                RefinementCase<dim-1> expected_face_ref_case
+                  = RefinementCase<dim-1>::no_refinement;
+
+                if (cell->neighbor(face).state() == IteratorState::valid)
+                  face_will_be_refined_by_neighbor<dim,spacedim>(cell,face,expected_face_ref_case);
+                // now extract which refine case would be necessary to
+                // achieve the same face refinement. set the
+                // intersection with other requirements for the same
+                // direction.
+
+                // note: using the intersection is not an obvious
+                // decision, we could also argue that it is more
+                // natural to use the union. however, intersection is
+                // the less aggressive tactic and favours a smaller
+                // number of refined cells over an intensive
+                // smoothing. this way we try not to lose too much of
+                // the effort we put in anisotropic refinement
+                // indicators due to overly aggressive smoothing...
+                directional_cell_refinement_case
+                  = (directional_cell_refinement_case &
+                     GeometryInfo<dim>::min_cell_refinement_case_for_face_refinement(
+                       expected_face_ref_case,
+                       face,
+                       cell->face_orientation(face),
+                       cell->face_flip(face),
+                       cell->face_rotation(face)));
+              }//for both face indices
+            // if both requirements sum up to something useful, add
+            // this to the refine case for smoothing. note: if
+            // directional_cell_refinement_case is isotropic still,
+            // then something went wrong...
+            Assert(directional_cell_refinement_case <
+                   RefinementCase<dim>::isotropic_refinement,
+                   ExcInternalError());
+            smoothing_cell_refinement_case = smoothing_cell_refinement_case |
+                                             directional_cell_refinement_case;
+          }//for all face_pairs
+        // no we collected contributions from all directions. combine
+        // the new flags with the existing refine case, but only if
+        // smoothing is required
+        if (smoothing_cell_refinement_case)
+          {
+            cell->clear_coarsen_flag();
+            cell->set_refine_flag(cell->refine_flag_set() |
+                                  smoothing_cell_refinement_case);
+          }
+      }
+  }
+}
+
+
+template <int dim, int spacedim>
+bool Triangulation<dim,spacedim>::prepare_coarsening_and_refinement ()
+{
+  // save the flags to determine whether something was changed in the
+  // course of this function
+  std::vector<bool> flags_before[2];
+  save_coarsen_flags (flags_before[0]);
+  save_refine_flags (flags_before[1]);
+
+  // save the flags at the outset of each loop. we do so in order to
+  // find out whether something was changed in the present loop, in
+  // which case we would have to re-run the loop. the other
+  // possibility to find this out would be to set a flag
+  // @p{something_changed} to true each time we change something.
+  // however, sometimes one change in one of the parts of the loop is
+  // undone by another one, so we might end up in an endless loop. we
+  // could be tempted to break this loop at an arbitrary number of
+  // runs, but that would not be a clean solution, since we would
+  // either have to 1/ break the loop too early, in which case the
+  // promise that a second call to this function immediately after the
+  // first one does not change anything, would be broken, or 2/ we do
+  // as many loops as there are levels. we know that information is
+  // transported over one level in each run of the loop, so this is
+  // enough. Unfortunately, each loop is rather expensive, so we chose
+  // the way presented here
+  std::vector<bool> flags_before_loop[2] = {flags_before[0],
+                                            flags_before[1]
+                                           };
+
+  // now for what is done in each loop: we have to fulfill several
+  // tasks at the same time, namely several mesh smoothing algorithms
+  // and mesh regularisation, by which we mean that the next mesh
+  // fulfills several requirements such as no double refinement at
+  // each face or line, etc.
+  //
+  // since doing these things at once seems almost impossible (in the
+  // first year of this library, they were done in two functions, one
+  // for refinement and one for coarsening, and most things within
+  // these were done at once, so the code was rather impossible to
+  // join into this, only, function), we do them one after each
+  // other. the order in which we do them is such that the important
+  // tasks, namely regularisation, are done last and the least
+  // important things are done the first. the following order is
+  // chosen:
+  //
+  // 0/ Only if coarsest_level_1 or patch_level_1 is set: clear all
+  //    coarsen flags on level 1 to avoid level 0 cells being created
+  //    by coarsening.  As coarsen flags will never be added, this can
+  //    be done once and for all before the actual loop starts.
+  //
+  // 1/ do not coarsen a cell if 'most of the neighbors' will be
+  //    refined after the step. This is to prevent occurrence of
+  //    unrefined islands.
+  //
+  // 2/ eliminate refined islands in the interior and at the
+  //    boundary. since they don't do much harm besides increasing the
+  //    number of degrees of freedom, doing this has a rather low
+  //    priority.
+  //
+  // 3/ limit the level difference of neighboring cells at each
+  //    vertex.
+  //
+  // 4/ eliminate unrefined islands. this has higher priority since
+  //    this diminishes the approximation properties not only of the
+  //    unrefined island, but also of the surrounding patch.
+  //
+  // 5/ ensure patch level 1. Then the triangulation consists of
+  //    patches, i.e. of cells that are refined once. It follows that
+  //    if at least one of the children of a cell is or will be
+  //    refined than all children need to be refined. This step only
+  //    sets refinement flags and does not set coarsening flags.  If
+  //    the patch_level_1 flag is set, then
+  //    eliminate_unrefined_islands, eliminate_refined_inner_islands
+  //    and eliminate_refined_boundary_islands will be fulfilled
+  //    automatically and do not need to be enforced separately.
+  //
+  // 6/ take care of the requirement that no double refinement is done
+  //    at each face
+  //
+  // 7/ take care that no double refinement is done at each line in 3d
+  //    or higher dimensions.
+  //
+  // 8/ make sure that all children of each cell are either flagged
+  //    for coarsening or none of the children is
+  //
+  // For some of these steps, it is known that they interact. Namely,
+  // it is not possible to guarantee that after step 6 another step 5
+  // would have no effect; the same holds for the opposite order and
+  // also when taking into account step 7. however, it is important to
+  // guarantee that step five or six do not undo something that step 5
+  // did, and step 7 not something of step 6, otherwise the
+  // requirements will not be satisfied even if the loop
+  // terminates. this is accomplished by the fact that steps 5 and 6
+  // only *add* refinement flags and delete coarsening flags
+  // (therefore, step 6 can't undo something that step 4 already did),
+  // and step 7 only deletes coarsening flags, never adds some. step 7
+  // needs also take care that it won't tag cells for refinement for
+  // which some neighbors are more refined or will be refined.
+
+  //////////////////////////////////////
+  // STEP 0:
+  //    Only if coarsest_level_1 or patch_level_1 is set: clear all
+  //    coarsen flags on level 1 to avoid level 0 cells being created
+  //    by coarsening.
+  if (((smooth_grid & coarsest_level_1) ||
+       (smooth_grid & patch_level_1)) && n_levels()>=2)
+    {
+      active_cell_iterator
+      cell=begin_active(1),
+      endc=end_active(1);
+
+      for (; cell!=endc; ++cell)
+        cell->clear_coarsen_flag();
+    }
+
+  bool mesh_changed_in_this_loop = false;
+  do
+    {
+      //////////////////////////////////////
+      // STEP 1:
+      //    do not coarsen a cell if 'most of the neighbors' will be
+      //    refined after the step. This is to prevent the occurrence
+      //    of unrefined islands.  If patch_level_1 is set, this will
+      //    be automatically fulfilled.
+      if (smooth_grid & do_not_produce_unrefined_islands &&
+          !(smooth_grid & patch_level_1))
+        {
+          cell_iterator       cell;
+          const cell_iterator endc = end();
+
+          for (cell=begin(); cell!=endc; ++cell)
+            {
+              // only do something if this
+              // cell will be coarsened
+              if (!cell->active() && cell_will_be_coarsened(cell))
+                possibly_do_not_produce_unrefined_islands<dim,spacedim>(cell);
+            }
+        }
+
+
+      //////////////////////////////////////
+      // STEP 2:
+      //    eliminate refined islands in the interior and at the
+      //    boundary. since they don't do much harm besides increasing
+      //    the number of degrees of freedom, doing this has a rather
+      //    low priority.  If patch_level_1 is set, this will be
+      //    automatically fulfilled.
+      //
+      //    there is one corner case to consider: if this is a
+      //    distributed triangulation, there may be refined islands on
+      //    the boundary of which we own only part (e.g. a single cell
+      //    in the corner of a domain). the rest of the island is
+      //    ghost cells and it *looks* like the area around it
+      //    (artificial cells) are coarser but this is only because
+      //    they may actually be equally fine on other
+      //    processors. it's hard to detect this case but we can do
+      //    the following: only set coarsen flags to remove this
+      //    refined island if all cells we want to set flags on are
+      //    locally owned
+      if (smooth_grid & (eliminate_refined_inner_islands |
+                         eliminate_refined_boundary_islands) &&
+          !(smooth_grid & patch_level_1))
+        {
+          cell_iterator       cell;
+          const cell_iterator endc = end();
+
+          for (cell=begin(); cell!=endc; ++cell)
+            if (!cell->active() ||
+                (cell->active() &&
+                 cell->refine_flag_set() &&
+                 cell->is_locally_owned()))
+              {
+                // check whether all children are active, i.e. not
+                // refined themselves. This is a precondition that the
+                // children may be coarsened away. If the cell is only
+                // flagged for refinement, then all future children
+                // will be active
+                bool all_children_active = true;
+                if (!cell->active())
+                  for (unsigned int c=0; c<cell->n_children(); ++c)
+                    if (!cell->child(c)->active() ||
+                        cell->child(c)->is_ghost() ||
+                        cell->child(c)->is_artificial())
+                      {
+                        all_children_active = false;
+                        break;
+                      }
+
+                if (all_children_active)
+                  {
+                    // count number of refined and unrefined neighbors
+                    // of cell.  neighbors on lower levels are counted
+                    // as unrefined since they can only get to the
+                    // same level as this cell by the next refinement
+                    // cycle
+                    unsigned int unrefined_neighbors = 0,
+                                 total_neighbors = 0;
+
+                    for (unsigned int n=0; n<GeometryInfo<dim>::faces_per_cell; ++n)
+                      {
+                        const cell_iterator neighbor = cell->neighbor(n);
+                        if (neighbor.state() == IteratorState::valid)
+                          {
+                            ++total_neighbors;
+
+                            if (!face_will_be_refined_by_neighbor(cell,n))
+                              ++unrefined_neighbors;
+                          }
+
+                      }
+
+                    // if all neighbors unrefined: mark this cell for
+                    // coarsening or don't refine if marked for that
+                    //
+                    // also do the distinction between the two
+                    // versions of the eliminate_refined_*_islands
+                    // flag
+                    //
+                    // the last check is whether there are any
+                    // neighbors at all. if not so, then we are (e.g.)
+                    // on the coarsest grid with one cell, for which,
+                    // of course, we do not remove the refine flag.
+                    if ((unrefined_neighbors == total_neighbors)
+                        &&
+                        (((unrefined_neighbors==GeometryInfo<dim>::faces_per_cell) &&
+                          (smooth_grid & eliminate_refined_inner_islands)) ||
+                         ((unrefined_neighbors<GeometryInfo<dim>::faces_per_cell) &&
+                          (smooth_grid & eliminate_refined_boundary_islands)) )
+                        &&
+                        (total_neighbors != 0))
+                      {
+                        if (!cell->active())
+                          for (unsigned int c=0; c<cell->n_children(); ++c)
+                            {
+                              cell->child(c)->clear_refine_flag ();
+                              cell->child(c)->set_coarsen_flag ();
+                            }
+                        else
+                          cell->clear_refine_flag();
+                      }
+                  }
+              }
+        }
+
+      //////////////////////////////////////
+      // STEP 3:
+      //    limit the level difference of neighboring cells at each
+      //    vertex.
+      //
+      //    in case of anisotropic refinement this does not make
+      //    sense. as soon as one cell is anisotropically refined, an
+      //    Assertion is thrown. therefore we can ignore this problem
+      //    later on
+      if (smooth_grid & limit_level_difference_at_vertices)
+        {
+          Assert(!anisotropic_refinement,
+                 ExcMessage("In case of anisotropic refinement the "
+                            "limit_level_difference_at_vertices flag for "
+                            "mesh smoothing must not be set!"));
+
+          // store highest level one of the cells adjacent to a vertex
+          // belongs to
+          std::vector<int> vertex_level (vertices.size(), 0);
+          active_cell_iterator cell = begin_active(),
+                               endc = end();
+          for (; cell!=endc; ++cell)
+            {
+              if (cell->refine_flag_set())
+                for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                     ++vertex)
+                  vertex_level[cell->vertex_index(vertex)]
+                    = std::max (vertex_level[cell->vertex_index(vertex)],
+                                cell->level()+1);
+              else if (!cell->coarsen_flag_set())
+                for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                     ++vertex)
+                  vertex_level[cell->vertex_index(vertex)]
+                    = std::max (vertex_level[cell->vertex_index(vertex)],
+                                cell->level());
+              else
+                {
+                  // if coarsen flag is set then tentatively assume
+                  // that the cell will be coarsened. this isn't
+                  // always true (the coarsen flag could be removed
+                  // again) and so we may make an error here
+                  Assert (cell->coarsen_flag_set(), ExcInternalError());
+                  for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_cell;
+                       ++vertex)
+                    vertex_level[cell->vertex_index(vertex)]
+                      = std::max (vertex_level[cell->vertex_index(vertex)],
+                                  cell->level()-1);
+                }
+            }
+
+
+          // loop over all cells in reverse order. do so because we
+          // can then update the vertex levels on the adjacent
+          // vertices and maybe already flag additional cells in this
+          // loop
+          //
+          // note that not only may we have to add additional
+          // refinement flags, but we will also have to remove
+          // coarsening flags on cells adjacent to vertices that will
+          // see refinement
+          for (cell=last_active(); cell != endc; --cell)
+            if (cell->refine_flag_set() == false)
+              {
+                for (unsigned int vertex=0;
+                     vertex<GeometryInfo<dim>::vertices_per_cell; ++vertex)
+                  if (vertex_level[cell->vertex_index(vertex)] >=
+                      cell->level()+1)
+                    {
+                      // remove coarsen flag...
+                      cell->clear_coarsen_flag();
+
+                      // ...and if necessary also refine the current
+                      // cell, at the same time updating the level
+                      // information about vertices
+                      if (vertex_level[cell->vertex_index(vertex)] >
+                          cell->level()+1)
+                        {
+                          cell->set_refine_flag();
+
+                          for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell;
+                               ++v)
+                            vertex_level[cell->vertex_index(v)]
+                              = std::max (vertex_level[cell->vertex_index(v)],
+                                          cell->level()+1);
+                        }
+
+                      // continue and see whether we may, for example,
+                      // go into the inner'if'
+                      // above based on a
+                      // different vertex
+                    }
+              }
+        }
+
+      /////////////////////////////////////
+      // STEP 4:
+      //    eliminate unrefined islands. this has higher priority
+      //    since this diminishes the approximation properties not
+      //    only of the unrefined island, but also of the surrounding
+      //    patch.
+      //
+      //    do the loop from finest to coarsest cells since we may
+      //    trigger a cascade by marking cells for refinement which
+      //    may trigger more cells further down below
+      if (smooth_grid & eliminate_unrefined_islands)
+        {
+          active_cell_iterator cell=last_active(),
+                               endc=end();
+
+          for (; cell != endc; --cell)
+            // only do something if cell is not already flagged for
+            // (isotropic) refinement
+            if (cell->refine_flag_set() != RefinementCase<dim>::isotropic_refinement)
+              possibly_refine_unrefined_island<dim,spacedim>
+              (cell,
+               (smooth_grid & allow_anisotropic_smoothing) != 0);
+        }
+
+      /////////////////////////////////
+      // STEP 5:
+      //    ensure patch level 1.
+      //
+      //    Introduce some terminology:
+      //    - a cell that is refined
+      //      once is a patch of
+      //      level 1 simply called patch.
+      //    - a cell that is globally
+      //      refined twice is called
+      //      a patch of level 2.
+      //    - patch level n says that
+      //      the triangulation consists
+      //      of patches of level n.
+      //      This makes sense only
+      //      if the grid is already at
+      //      least n times globally
+      //      refined.
+      //
+      //    E.g. from patch level 1 follows: if at least one of the
+      //    children of a cell is or will be refined than enforce all
+      //    children to be refined.
+
+      //    This step 4 only sets refinement flags and does not set
+      //    coarsening flags.
+      if (smooth_grid & patch_level_1)
+        {
+
+          // An important assumption (A) is that before calling this
+          // function the grid was already of patch level 1.
+
+          // loop over all cells whose children are all active.  (By
+          // assumption (A) either all or none of the children are
+          // active).  If the refine flag of at least one of the
+          // children is set then set_refine_flag and
+          // clear_coarsen_flag of all children.
+          for (cell_iterator cell = begin(); cell != end(); ++cell)
+            if (!cell->active())
+              {
+                // ensure the invariant. we can then check whether all
+                // of its children are further refined or not by
+                // simply looking at the first child
+                Assert (cell_is_patch_level_1(cell),
+                        ExcInternalError());
+                if (cell->child(0)->has_children() == true)
+                  continue;
+
+                // cell is found to be a patch.  combine the refine
+                // cases of all children
+                RefinementCase<dim> combined_ref_case = RefinementCase<dim>::no_refinement;
+                for (unsigned int i=0; i<cell->n_children(); ++i)
+                  combined_ref_case = combined_ref_case |
+                                      cell->child(i)->refine_flag_set();
+                if (combined_ref_case != RefinementCase<dim>::no_refinement)
+                  for (unsigned int i=0; i<cell->n_children(); ++i)
+                    {
+                      cell_iterator child = cell->child(i);
+
+                      child->clear_coarsen_flag();
+                      child->set_refine_flag(combined_ref_case);
+                    }
+              }
+
+          // The code above dealt with the case where we may get a
+          // non-patch_level_1 mesh from refinement. Now also deal
+          // with the case where we could get such a mesh by
+          // coarsening.  Coarsen the children (and remove the
+          // grandchildren) only if all cell->grandchild(i)
+          // ->coarsen_flag_set() are set.
+          //
+          // for a case where this is a bit tricky, take a look at the
+          // mesh_smoothing_0[12] testcases
+          for (cell_iterator cell = begin(); cell != end(); ++cell)
+            {
+              // check if this cell has active grandchildren. note
+              // that we know that it is patch_level_1, i.e. if one of
+              // its children is active then so are all, and it isn't
+              // going to have any grandchildren at all:
+              if (cell->active()
+                  ||
+                  cell->child(0)->active())
+                continue;
+
+              // cell is not active, and so are none of its
+              // children. check the grandchildren. note that the
+              // children are also patch_level_1, and so we only ever
+              // need to check their first child
+              const unsigned int n_children=cell->n_children();
+              bool has_active_grandchildren = false;
+
+              for (unsigned int i=0; i<n_children; ++i)
+                if (cell->child(i)->child(0)->active())
+                  {
+                    has_active_grandchildren = true;
+                    break;
+                  }
+
+              if (has_active_grandchildren == false)
+                continue;
+
+
+              // ok, there are active grandchildren. see if either all
+              // or none of them are flagged for coarsening
+              unsigned int n_grandchildren=0;
+
+              // count all coarsen flags of the grandchildren.
+              unsigned int n_coarsen_flags=0;
+
+              // cell is not a patch (of level 1) as it has a
+              // grandchild.  Is cell a patch of level 2??  Therefore:
+              // find out whether all cell->child(i) are patches
+              for (unsigned int c=0; c<n_children; ++c)
+                {
+                  // get at the child. by assumption (A), and the
+                  // check by which we got here, the child is not
+                  // active
+                  cell_iterator child=cell->child(c);
+
+                  const unsigned int nn_children=child->n_children();
+                  n_grandchildren += nn_children;
+
+                  // if child is found to be a patch of active cells
+                  // itself, then add up how many of its children are
+                  // supposed to be coarsened
+                  if (child->child(0)->active())
+                    for (unsigned int cc=0; cc<nn_children; ++cc)
+                      if (child->child(cc)->coarsen_flag_set())
+                        ++n_coarsen_flags;
+                }
+
+              // if not all grandchildren are supposed to be coarsened
+              // (e.g. because some simply don't have the flag set, or
+              // because they are not active and therefore cannot
+              // carry the flag), then remove the coarsen flag from
+              // all of the active grandchildren. note that there may
+              // be coarsen flags on the grandgrandchildren -- we
+              // don't clear them here, but we'll get to them in later
+              // iterations if necessary
+              //
+              // there is nothing we have to do if no coarsen flags
+              // have been set at all
+              if ((n_coarsen_flags != n_grandchildren)
+                  &&
+                  (n_coarsen_flags > 0))
+                for (unsigned int c=0; c<n_children; ++c)
+                  {
+                    const cell_iterator child = cell->child(c);
+                    if (child->child(0)->active())
+                      for (unsigned int cc=0; cc<child->n_children(); ++cc)
+                        child->child(cc)->clear_coarsen_flag();
+                  }
+            }
+        }
+
+      //////////////////////////////////
+      //
+      //  at the boundary we could end up with cells with negative
+      //  volume or at least with a part, that is negative, if the
+      //  cell is refined anisotropically. we have to check, whether
+      //  that can happen
+      internal::Triangulation::Implementation::prevent_distorted_boundary_cells(*this);
+
+      /////////////////////////////////
+      // STEP 6:
+      //    take care of the requirement that no
+      //    double refinement is done at each face
+      //
+      //    in case of anisotropic refinement it is only likely, but
+      //    not sure, that the cells, which are more refined along a
+      //    certain face common to two cells are on a higher
+      //    level. therefore we cannot be sure, that the requirement
+      //    of no double refinement is fulfilled after a single pass
+      //    of the following actions. We could just wait for the next
+      //    global loop. when this function terminates, the
+      //    requirement will be fulfilled. However, it might be faster
+      //    to insert an inner loop here.
+      bool changed = true;
+      while (changed)
+        {
+          changed=false;
+          active_cell_iterator cell=last_active(),
+                               endc=end();
+
+          for (; cell != endc; --cell)
+            if (cell->refine_flag_set())
+              {
+                // loop over neighbors of cell
+                for (unsigned int i=0; i<GeometryInfo<dim>::faces_per_cell; ++i)
+                  {
+                    // only do something if the face is not at the
+                    // boundary and if the face will be refined with
+                    // the RefineCase currently flagged for
+                    if (cell->neighbor(i).state() == IteratorState::valid &&
+                        (GeometryInfo<dim>::face_refinement_case(cell->refine_flag_set(),
+                                                                 i)
+                         != RefinementCase<dim-1>::no_refinement))
+                      {
+                        // 1) if the neighbor has children: nothing to
+                        // worry about.  2) if the neighbor is active
+                        // and a coarser one, ensure, that its
+                        // refine_flag is set 3) if the neighbor is
+                        // active and as refined along the face as our
+                        // current cell, make sure, that no
+                        // coarsen_flag is set. if we remove the
+                        // coarsen flag of our neighbor,
+                        // fix_coarsen_flags() makes sure, that the
+                        // mother cell will not be coarsened
+                        if (cell->neighbor(i)->active())
+                          {
+                            if (cell->neighbor_is_coarser(i))
+                              {
+                                if (cell->neighbor(i)->coarsen_flag_set())
+                                  cell->neighbor(i)->clear_coarsen_flag();
+                                // we'll set the refine flag for this
+                                // neighbor below. we note, that we
+                                // have changed something by setting
+                                // the changed flag to true. We do not
+                                // need to do so, if we just removed
+                                // the coarsen flag, as the changed
+                                // flag only indicates the need to
+                                // re-run the inner loop. however, we
+                                // only loop over cells flagged for
+                                // refinement here, so nothing to
+                                // worry about if we remove coarsen
+                                // flags
+
+                                if (dim==2)
+                                  {
+                                    if (smooth_grid & allow_anisotropic_smoothing)
+                                      changed=cell->neighbor(i)->flag_for_face_refinement(cell->neighbor_of_coarser_neighbor(i).first,
+                                                                                          RefinementCase<dim-1>::cut_x);
+                                    else
+                                      {
+                                        if (!cell->neighbor(i)->refine_flag_set())
+                                          changed=true;
+                                        cell->neighbor(i)->set_refine_flag();
+                                      }
+                                  }
+                                else //i.e. if (dim==3)
+                                  {
+// ugly situations might arise here, consider the following situation, which
+// shows neighboring cells at the common face, where the upper right element is
+// coarser at the given face. Now the upper child element of the lower left
+// wants to refine according to cut_z, such that there is a 'horizontal'
+// refinement of the face marked with #####
+//
+//                            /               /
+//                           /               /
+//                          *---------------*
+//                          |               |
+//                          |               |
+//                          |               |
+//                          |               |
+//                          |               |
+//                          |               | /
+//                          |               |/
+//                          *---------------*
+//
+//
+//     *---------------*
+//    /|              /|
+//   / |     #####   / |
+//     |               |
+//     *---------------*
+//    /|              /|
+//   / |             / |
+//     |               |
+//     *---------------*
+//    /               /
+//   /               /
+//
+// this introduces too many hanging nodes and the neighboring (coarser) cell
+// (upper right) has to be refined. If it is only refined according to cut_z,
+// then everything is ok:
+//
+//                            /               /
+//                           /               /
+//                          *---------------*
+//                          |               |
+//                          |               | /
+//                          |               |/
+//                          *---------------*
+//                          |               |
+//                          |               | /
+//                          |               |/
+//                          *---------------*
+//
+//
+//     *---------------*
+//    /|              /|
+//   / *---------------*
+//    /|              /|
+//     *---------------*
+//    /|              /|
+//   / |             / |
+//     |               |
+//     *---------------*
+//    /               /
+//   /               /
+//
+// if however the cell wants to refine itself in an other way, or if we disallow
+// anisotropic smoothing, then simply refining the neighbor isotropically is not
+// going to work, since this introduces a refinement of face ##### with both
+// cut_x and cut_y, which is not possible:
+//
+//                            /       /       /
+//                           /       /       /
+//                          *-------*-------*
+//                          |       |       |
+//                          |       |       | /
+//                          |       |       |/
+//                          *-------*-------*
+//                          |       |       |
+//                          |       |       | /
+//                          |       |       |/
+//                          *-------*-------*
+//
+//
+//     *---------------*
+//    /|              /|
+//   / *---------------*
+//    /|              /|
+//     *---------------*
+//    /|              /|
+//   / |             / |
+//     |               |
+//     *---------------*
+//    /               /
+//   /               /
+//
+// thus, in this case we also need to refine our current cell in the new
+// direction:
+//
+//                            /       /       /
+//                           /       /       /
+//                          *-------*-------*
+//                          |       |       |
+//                          |       |       | /
+//                          |       |       |/
+//                          *-------*-------*
+//                          |       |       |
+//                          |       |       | /
+//                          |       |       |/
+//                          *-------*-------*
+//
+//
+//     *-------*-------*
+//    /|      /|      /|
+//   / *-------*-------*
+//    /|      /|      /|
+//     *-------*-------*
+//    /|      /       /|
+//   / |             / |
+//     |               |
+//     *---------------*
+//    /               /
+//   /               /
+
+                                    std::pair<unsigned int, unsigned int> nb_indices
+                                      =cell->neighbor_of_coarser_neighbor(i);
+                                    unsigned int refined_along_x=0,
+                                                 refined_along_y=0,
+                                                 to_be_refined_along_x=0,
+                                                 to_be_refined_along_y=0;
+
+                                    const int this_face_index=cell->face_index(i);
+
+// step 1: detect, along which axis the face is currently refined
+                                    if ((this_face_index
+                                         == cell->neighbor(i)->face(nb_indices.first)->child_index(0)) ||
+                                        (this_face_index
+                                         == cell->neighbor(i)->face(nb_indices.first)->child_index(1)))
+                                      {
+                                        // this might be an
+                                        // anisotropic child. get the
+                                        // face refine case of the
+                                        // neighbors face and count
+                                        // refinements in x and y
+                                        // direction.
+                                        RefinementCase<dim-1> frc=cell->neighbor(i)->face(nb_indices.first)->refinement_case();
+                                        if (frc & RefinementCase<dim>::cut_x)
+                                          ++refined_along_x;
+                                        if (frc & RefinementCase<dim>::cut_y)
+                                          ++refined_along_y;
+                                      }
+                                    else
+                                      // this has to be an isotropic
+                                      // child
+                                      {
+                                        ++refined_along_x;
+                                        ++refined_along_y;
+                                      }
+// step 2: detect, along which axis the face has to be refined given the current
+// refine flag
+                                    RefinementCase<dim-1> flagged_frc=
+                                      GeometryInfo<dim>::face_refinement_case(cell->refine_flag_set(),
+                                                                              i,
+                                                                              cell->face_orientation(i),
+                                                                              cell->face_flip(i),
+                                                                              cell->face_rotation(i));
+                                    if (flagged_frc & RefinementCase<dim>::cut_x)
+                                      ++to_be_refined_along_x;
+                                    if (flagged_frc & RefinementCase<dim>::cut_y)
+                                      ++to_be_refined_along_y;
+
+// step 3: set the refine flag of the (coarser and active) neighbor.
+                                    if ((smooth_grid & allow_anisotropic_smoothing) ||
+                                        cell->neighbor(i)->refine_flag_set())
+                                      {
+                                        if (refined_along_x + to_be_refined_along_x > 1)
+                                          changed |= cell->neighbor(i)->flag_for_face_refinement(nb_indices.first,
+                                                                                                 RefinementCase<dim-1>::cut_axis(0));
+                                        if (refined_along_y + to_be_refined_along_y > 1)
+                                          changed |= cell->neighbor(i)->flag_for_face_refinement(nb_indices.first,
+                                                                                                 RefinementCase<dim-1>::cut_axis(1));
+                                      }
+                                    else
+                                      {
+                                        if (cell->neighbor(i)->refine_flag_set()!=RefinementCase<dim>::isotropic_refinement)
+                                          changed=true;
+                                        cell->neighbor(i)->set_refine_flag();
+                                      }
+
+// step 4: if necessary (see above) add to the refine flag of the current cell
+                                    cell_iterator nb=cell->neighbor(i);
+                                    RefinementCase<dim-1> nb_frc
+                                      = GeometryInfo<dim>::face_refinement_case(nb->refine_flag_set(),
+                                                                                nb_indices.first,
+                                                                                nb->face_orientation(nb_indices.first),
+                                                                                nb->face_flip(nb_indices.first),
+                                                                                nb->face_rotation(nb_indices.first));
+                                    if ((nb_frc & RefinementCase<dim>::cut_x) &&
+                                        !(refined_along_x || to_be_refined_along_x))
+                                      changed |= cell->flag_for_face_refinement(i,RefinementCase<dim-1>::cut_axis(0));
+                                    if ((nb_frc & RefinementCase<dim>::cut_y) &&
+                                        !(refined_along_y || to_be_refined_along_y))
+                                      changed |= cell->flag_for_face_refinement(i,RefinementCase<dim-1>::cut_axis(1));
+                                  }
+                              }// if neighbor is coarser
+                            else // -> now the neighbor is not coarser
+                              {
+                                cell->neighbor(i)->clear_coarsen_flag();
+                                const unsigned int nb_nb=cell->neighbor_of_neighbor(i);
+                                const cell_iterator neighbor=cell->neighbor(i);
+                                RefinementCase<dim-1> face_ref_case=
+                                  GeometryInfo<dim>::face_refinement_case(neighbor->refine_flag_set(),
+                                                                          nb_nb,
+                                                                          neighbor->face_orientation(nb_nb),
+                                                                          neighbor->face_flip(nb_nb),
+                                                                          neighbor->face_rotation(nb_nb));
+                                RefinementCase<dim-1> needed_face_ref_case
+                                  =GeometryInfo<dim>::face_refinement_case(cell->refine_flag_set(),
+                                                                           i,
+                                                                           cell->face_orientation(i),
+                                                                           cell->face_flip(i),
+                                                                           cell->face_rotation(i));
+                                // if the neighbor wants to refine the
+                                // face with cut_x and we want cut_y
+                                // or vice versa, we have to refine
+                                // isotropically at the given face
+                                if ((face_ref_case==RefinementCase<dim>::cut_x && needed_face_ref_case==RefinementCase<dim>::cut_y) ||
+                                    (face_ref_case==RefinementCase<dim>::cut_y && needed_face_ref_case==RefinementCase<dim>::cut_x))
+                                  {
+                                    changed=cell->flag_for_face_refinement(i, face_ref_case);
+                                    neighbor->flag_for_face_refinement(nb_nb, needed_face_ref_case);
+                                  }
+                              }
+                          }
+                        else //-> the neighbor is not active
+                          {
+                            RefinementCase<dim-1> face_ref_case = cell->face(i)->refinement_case(),
+                                                  needed_face_ref_case = GeometryInfo<dim>::face_refinement_case(cell->refine_flag_set(),
+                                                                         i,
+                                                                         cell->face_orientation(i),
+                                                                         cell->face_flip(i),
+                                                                         cell->face_rotation(i));
+                            // if the face is refined with cut_x and
+                            // we want cut_y or vice versa, we have to
+                            // refine isotropically at the given face
+                            if ((face_ref_case==RefinementCase<dim>::cut_x && needed_face_ref_case==RefinementCase<dim>::cut_y) ||
+                                (face_ref_case==RefinementCase<dim>::cut_y && needed_face_ref_case==RefinementCase<dim>::cut_x))
+                              changed=cell->flag_for_face_refinement(i, face_ref_case);
+                          }
+                      }
+                  }
+              }
+        }
+
+      //////////////////////////////////////
+      // STEP 7:
+      //    take care that no double refinement
+      //    is done at each line in 3d or higher
+      //    dimensions.
+      internal::Triangulation::Implementation::prepare_refinement_dim_dependent (*this);
+
+      //////////////////////////////////////
+      // STEP 8:
+      //    make sure that all children of each
+      //    cell are either flagged for coarsening
+      //    or none of the children is
+      fix_coarsen_flags ();
+      // get the refinement and coarsening
+      // flags
+      std::vector<bool> flags_after_loop[2];
+      save_coarsen_flags (flags_after_loop[0]);
+      save_refine_flags (flags_after_loop[1]);
+
+      // find out whether something was
+      // changed in this loop
+      mesh_changed_in_this_loop
+        = ((flags_before_loop[0] != flags_after_loop[0]) ||
+           (flags_before_loop[1] != flags_after_loop[1]));
+
+      // set the flags for the next loop
+      // already
+      flags_before_loop[0].swap(flags_after_loop[0]);
+      flags_before_loop[1].swap(flags_after_loop[1]);
+    }
+  while (mesh_changed_in_this_loop);
+
+
+  // find out whether something was really changed in this
+  // function. Note that @p{flags_before_loop} represents the state
+  // after the last loop, i.e.  the present state
+  return ((flags_before[0] != flags_before_loop[0]) ||
+          (flags_before[1] != flags_before_loop[1]));
+}
+
+
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::write_bool_vector (const unsigned int  magic_number1,
+                                                      const std::vector<bool> &v,
+                                                      const unsigned int  magic_number2,
+                                                      std::ostream            &out)
+{
+  const unsigned int N = v.size();
+  unsigned char *flags = new unsigned char[N/8+1];
+  for (unsigned int i=0; i<N/8+1; ++i) flags[i]=0;
+
+  for (unsigned int position=0; position<N; ++position)
+    flags[position/8] |= (v[position] ? (1<<(position%8)) : 0);
+
+  AssertThrow (out, ExcIO());
+
+  // format:
+  // 0. magic number
+  // 1. number of flags
+  // 2. the flags
+  // 3. magic number
+  out << magic_number1 << ' ' << N << std::endl;
+  for (unsigned int i=0; i<N/8+1; ++i)
+    out << static_cast<unsigned int>(flags[i]) << ' ';
+
+  out << std::endl << magic_number2 << std::endl;
+
+  delete[] flags;
+
+  AssertThrow (out, ExcIO());
+}
+
+
+template <int dim, int spacedim>
+void Triangulation<dim, spacedim>::read_bool_vector (const unsigned int  magic_number1,
+                                                     std::vector<bool>       &v,
+                                                     const unsigned int  magic_number2,
+                                                     std::istream            &in)
+{
+  AssertThrow (in, ExcIO());
+
+  unsigned int magic_number;
+  in >> magic_number;
+  AssertThrow (magic_number==magic_number1, ExcGridReadError());
+
+  unsigned int N;
+  in >> N;
+  v.resize (N);
+
+  unsigned char *flags = new unsigned char[N/8+1];
+  unsigned short int tmp;
+  for (unsigned int i=0; i<N/8+1; ++i)
+    {
+      in >> tmp;
+      flags[i] = tmp;
+    }
+
+  for (unsigned int position=0; position!=N; ++position)
+    v[position] = (flags[position/8] & (1<<(position%8)));
+
+  in >> magic_number;
+  AssertThrow (magic_number==magic_number2, ExcGridReadError());
+
+  delete[] flags;
+
+  AssertThrow (in, ExcIO());
+}
+
+
+
+template <int dim, int spacedim>
+std::size_t
+Triangulation<dim, spacedim>::memory_consumption () const
+{
+  std::size_t mem = 0;
+  mem += MemoryConsumption::memory_consumption(levels);
+  for (unsigned int i=0; i<levels.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (*levels[i]);
+  mem += MemoryConsumption::memory_consumption (vertices);
+  mem += MemoryConsumption::memory_consumption (vertices_used);
+  mem += sizeof(manifold);
+  mem += sizeof(smooth_grid);
+  mem += MemoryConsumption::memory_consumption (number_cache);
+  mem += sizeof (faces);
+  mem += MemoryConsumption::memory_consumption (*faces);
+
+  return mem;
+}
+
+
+
+
+template<int dim, int spacedim>
+Triangulation<dim, spacedim>::DistortedCellList::~DistortedCellList () throw ()
+{
+  // don't do anything here. the compiler will automatically convert
+  // any exceptions created by the destructors of the member variables
+  // into abort() in order to satisfy the throw() specification
+}
+
+
+template <>
+const Manifold<2,1> &Triangulation<2, 1>::get_manifold(const types::manifold_id) const
+{
+  Assert(false, ExcImpossibleInDim(1));
+  // We cannot simply create a temporary Manifold<2,1> because it is not
+  // instantiated and would lead to unresolved symbols. Given the fact that
+  // this function should be unreachable anyaway, just dereference a
+  // nullptr:
+  return *static_cast<FlatManifold<2,1>*>(0);
+}
+
+template <>
+const Manifold<3,1> &Triangulation<3, 1>::get_manifold(const types::manifold_id) const
+{
+  Assert(false, ExcImpossibleInDim(1));
+  // We cannot simply create a temporary Manifold<2,1> because it is not
+  // instantiated and would lead to unresolved symbols. Given the fact that
+  // this function should be unreachable anyaway, just dereference a
+  // nullptr:
+  return *static_cast<FlatManifold<3,1>*>(0);
+}
+
+template <>
+const Manifold<3,2> &Triangulation<3, 2>::get_manifold(const types::manifold_id) const
+{
+  Assert(false, ExcImpossibleInDim(2));
+  // We cannot simply create a temporary Manifold<2,1> because it is not
+  // instantiated and would lead to unresolved symbols. Given the fact that
+  // this function should be unreachable anyaway, just dereference a
+  // nullptr:
+  return *static_cast<FlatManifold<3,2>*>(0);
+}
+
+// explicit instantiations
+#include "tria.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/tria.inst.in b/source/grid/tria.inst.in
new file mode 100644
index 0000000..817fb1d
--- /dev/null
+++ b/source/grid/tria.inst.in
@@ -0,0 +1,26 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    template class Triangulation<deal_II_dimension, deal_II_space_dimension>;
+#endif
+#if deal_II_dimension == deal_II_space_dimension
+    template struct CellData<deal_II_dimension>;
+#endif
+  }
diff --git a/source/grid/tria_accessor.cc b/source/grid/tria_accessor.cc
new file mode 100644
index 0000000..bb7f9ba
--- /dev/null
+++ b/source/grid/tria_accessor.cc
@@ -0,0 +1,2281 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_levels.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_accessor.templates.h>
+#include <deal.II/grid/tria_iterator.templates.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/manifold.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/fe/fe_q.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+// anonymous namespace for helper functions
+namespace
+{
+  // given the number of face's child
+  // (subface_no), return the number of the
+  // subface concerning the FaceRefineCase of
+  // the face
+  inline
+  unsigned int translate_subface_no(const TriaIterator<TriaAccessor<2, 3, 3> > &face,
+                                    const unsigned int                           subface_no)
+  {
+    Assert(face->has_children(), ExcInternalError());
+    Assert(subface_no<face->n_children(), ExcInternalError());
+
+    if (face->child(subface_no)->has_children())
+      // although the subface is refine, it
+      // still matches the face of the cell
+      // invoking the
+      // neighbor_of_coarser_neighbor
+      // function. this means that we are
+      // looking from one cell (anisotropic
+      // child) to a coarser neighbor which is
+      // refined stronger than we are
+      // (isotropically). So we won't be able
+      // to use the neighbor_child_on_subface
+      // function anyway, as the neighbor is
+      // not active. In this case, simply
+      // return the subface_no.
+      return subface_no;
+
+    const bool first_child_has_children=face->child(0)->has_children();
+    // if the first child has children
+    // (FaceRefineCase case_x1y or case_y1x),
+    // then the current subface_no needs to be
+    // 1 and the result of this function is 2,
+    // else simply return the given number,
+    // which is 0 or 1 in an anisotropic case
+    // (case_x, case_y, casex2y or casey2x) or
+    // 0...3 in an isotropic case (case_xy)
+    return subface_no + first_child_has_children;
+  }
+
+
+
+  // given the number of face's child
+  // (subface_no) and grandchild
+  // (subsubface_no), return the number of the
+  // subface concerning the FaceRefineCase of
+  // the face
+  inline
+  unsigned int translate_subface_no(const TriaIterator<TriaAccessor<2, 3, 3> > &face,
+                                    const unsigned int                           subface_no,
+                                    const unsigned int                           subsubface_no)
+  {
+    Assert(face->has_children(), ExcInternalError());
+    // the subface must be refined, otherwise
+    // we would have ended up in the second
+    // function of this name...
+    Assert(face->child(subface_no)->has_children(), ExcInternalError());
+    Assert(subsubface_no<face->child(subface_no)->n_children(), ExcInternalError());
+    // This can only be an anisotropic refinement case
+    Assert(face->refinement_case() < RefinementCase<2>::isotropic_refinement,
+           ExcInternalError());
+
+    const bool first_child_has_children=face->child(0)->has_children();
+
+    static const unsigned int e = numbers::invalid_unsigned_int;
+
+    // array containing the translation of the
+    // numbers,
+    //
+    // first index: subface_no
+    // second index: subsubface_no
+    // third index: does the first subface have children? -> no and yes
+    static const unsigned int translated_subface_no[2][2][2]
+    =
+    {
+      { {e,0},       // first  subface, first  subsubface, first_child_has_children==no and yes
+        {e,1}
+      },      // first  subface, second subsubface, first_child_has_children==no and yes
+      { {1,2},       // second subface, first  subsubface, first_child_has_children==no and yes
+        {2,3}
+      }
+    };     // second subface, second subsubface, first_child_has_children==no and yes
+
+    Assert(translated_subface_no[subface_no][subsubface_no][first_child_has_children]!=e,
+           ExcInternalError());
+
+    return translated_subface_no[subface_no][subsubface_no][first_child_has_children];
+  }
+
+
+  template <int dim, int spacedim>
+  Point<spacedim>
+  barycenter (const TriaAccessor<1, dim, spacedim> &accessor)
+  {
+    return (accessor.vertex(1)+accessor.vertex(0))/2.;
+  }
+
+
+  Point<2>
+  barycenter (const TriaAccessor<2, 2, 2> &accessor)
+  {
+    // the evaluation of the formulae
+    // is a bit tricky when done dimension
+    // independently, so we write this function
+    // for 2D and 3D separately
+    /*
+      Get the computation of the barycenter by this little Maple script. We
+      use the bilinear mapping of the unit quad to the real quad. However,
+      every transformation mapping the unit faces to strait lines should
+      do.
+
+      Remember that the area of the quad is given by
+      |K| = \int_K 1 dx dy  = \int_{\hat K} |det J| d(xi) d(eta)
+      and that the barycenter is given by
+      \vec x_s = 1/|K| \int_K \vec x dx dy
+      = 1/|K| \int_{\hat K} \vec x(xi,eta) |det J| d(xi) d(eta)
+
+      # x and y are arrays holding the x- and y-values of the four vertices
+      # of this cell in real space.
+      x := array(0..3);
+      y := array(0..3);
+      tphi[0] := (1-xi)*(1-eta):
+      tphi[1] :=     xi*(1-eta):
+      tphi[2] := (1-xi)*eta:
+      tphi[3] :=     xi*eta:
+      x_real := sum(x[s]*tphi[s], s=0..3):
+      y_real := sum(y[s]*tphi[s], s=0..3):
+      detJ := diff(x_real,xi)*diff(y_real,eta) - diff(x_real,eta)*diff(y_real,xi):
+
+      measure := simplify ( int ( int (detJ, xi=0..1), eta=0..1)):
+
+      xs := simplify (1/measure * int ( int (x_real * detJ, xi=0..1), eta=0..1)):
+      ys := simplify (1/measure * int ( int (y_real * detJ, xi=0..1), eta=0..1)):
+      readlib(C):
+
+      C(array(1..2, [xs, ys]), optimized);
+    */
+
+    const double x[4] = { accessor.vertex(0)(0),
+                          accessor.vertex(1)(0),
+                          accessor.vertex(2)(0),
+                          accessor.vertex(3)(0)
+                        };
+    const double y[4] = { accessor.vertex(0)(1),
+                          accessor.vertex(1)(1),
+                          accessor.vertex(2)(1),
+                          accessor.vertex(3)(1)
+                        };
+    const double t1 = x[0]*x[1];
+    const double t3 = x[0]*x[0];
+    const double t5 = x[1]*x[1];
+    const double t9 = y[0]*x[0];
+    const double t11 = y[1]*x[1];
+    const double t14 = x[2]*x[2];
+    const double t16 = x[3]*x[3];
+    const double t20 = x[2]*x[3];
+    const double t27 = t1*y[1]+t3*y[1]-t5*y[0]-t3*y[2]+t5*y[3]+t9*x[2]-t11*x[3]-t1*y[0]-t14*y[3]+t16*y[2]-t16*y[1]+t14*y[0]-t20*y[3]-x[0]*x[2]*y[2]+x[1]*x[3]*y[3]+t20*y[2];
+    const double t37 = 1/(-x[1]*y[0]+x[1]*y[3]+y[0]*x[2]+x[0]*y[1]-x[0]*y[2]-y[1]*x[3]-x[2]*y[3]+x[3]*y[2]);
+    const double t39 = y[2]*y[2];
+    const double t51 = y[0]*y[0];
+    const double t53 = y[1]*y[1];
+    const double t59 = y[3]*y[3];
+    const double t63 = t39*x[3]+y[2]*y[0]*x[2]+y[3]*x[3]*y[2]-y[2]*x[2]*y[3]-y[3]*y[1]*x[3]-t9*y[2]+t11*y[3]+t51*x[2]-t53*x[3]-x[1]*t51+t9*y[1]-t11*y[0]+x[0]*t53-t59*x[2]+t59*x[1]-t39*x[0];
+
+    return Point<2> (t27*t37/3, t63*t37/3);
+  }
+
+
+
+  Point<3>
+  barycenter (const TriaAccessor<3,3,3> &accessor)
+  {
+    /*
+      Get the computation of the barycenter by this little Maple script. We
+      use the trilinear mapping of the unit hex to the real hex.
+
+      Remember that the area of the hex is given by
+      |K| = \int_K 1 dx dy dz = \int_{\hat K} |det J| d(xi) d(eta) d(zeta)
+      and that the barycenter is given by
+      \vec x_s = 1/|K| \int_K \vec x dx dy dz
+      = 1/|K| \int_{\hat K} \vec x(xi,eta,zeta) |det J| d(xi) d(eta) d(zeta)
+
+      Note, that in the ordering of the shape functions tphi[0]-tphi[7]
+      below, eta and zeta have been exchanged (zeta belongs to the y, and
+      eta to the z direction). However, the resulting Jacobian determinant
+      detJ should be the same, as a matrix and the matrix created from it
+      by exchanging two consecutive lines and two neighboring columns have
+      the same determinant.
+
+      # x, y and z are arrays holding the x-, y- and z-values of the four vertices
+      # of this cell in real space.
+      x := array(0..7):
+      y := array(0..7):
+      z := array(0..7):
+      tphi[0] := (1-xi)*(1-eta)*(1-zeta):
+      tphi[1] := xi*(1-eta)*(1-zeta):
+      tphi[2] := xi*eta*(1-zeta):
+      tphi[3] := (1-xi)*eta*(1-zeta):
+      tphi[4] := (1-xi)*(1-eta)*zeta:
+      tphi[5] := xi*(1-eta)*zeta:
+      tphi[6] := xi*eta*zeta:
+      tphi[7] := (1-xi)*eta*zeta:
+      x_real := sum(x[s]*tphi[s], s=0..7):
+      y_real := sum(y[s]*tphi[s], s=0..7):
+      z_real := sum(z[s]*tphi[s], s=0..7):
+      with (linalg):
+      J := matrix(3,3, [[diff(x_real, xi), diff(x_real, eta), diff(x_real, zeta)],
+      [diff(y_real, xi), diff(y_real, eta), diff(y_real, zeta)],
+      [diff(z_real, xi), diff(z_real, eta), diff(z_real, zeta)]]):
+      detJ := det (J):
+
+      measure := simplify ( int ( int ( int (detJ, xi=0..1), eta=0..1), zeta=0..1)):
+
+      xs := simplify (1/measure * int ( int ( int (x_real * detJ, xi=0..1), eta=0..1), zeta=0..1)):
+      ys := simplify (1/measure * int ( int ( int (y_real * detJ, xi=0..1), eta=0..1), zeta=0..1)):
+      zs := simplify (1/measure * int ( int ( int (z_real * detJ, xi=0..1), eta=0..1), zeta=0..1)):
+
+      readlib(C):
+
+      C(array(1..3, [xs, ys, zs]));
+
+
+      This script takes more than several hours when using an old version
+      of maple on an old and slow computer. Therefore, when changing to
+      the new deal.II numbering scheme (lexicographic numbering) the code
+      lines below have not been reproduced with maple but only the
+      ordering of points in the definitions of x[], y[] and z[] have been
+      changed.
+
+      For the case, someone is willing to rerun the maple script, he/she
+      should use following ordering of shape functions:
+
+      tphi[0] := (1-xi)*(1-eta)*(1-zeta):
+      tphi[1] :=     xi*(1-eta)*(1-zeta):
+      tphi[2] := (1-xi)*    eta*(1-zeta):
+      tphi[3] :=     xi*    eta*(1-zeta):
+      tphi[4] := (1-xi)*(1-eta)*zeta:
+      tphi[5] :=     xi*(1-eta)*zeta:
+      tphi[6] := (1-xi)*    eta*zeta:
+      tphi[7] :=     xi*    eta*zeta:
+
+      and change the ordering of points in the definitions of x[], y[] and
+      z[] back to the standard ordering.
+    */
+
+    const double x[8] = { accessor.vertex(0)(0),
+                          accessor.vertex(1)(0),
+                          accessor.vertex(5)(0),
+                          accessor.vertex(4)(0),
+                          accessor.vertex(2)(0),
+                          accessor.vertex(3)(0),
+                          accessor.vertex(7)(0),
+                          accessor.vertex(6)(0)
+                        };
+    const double y[8] = { accessor.vertex(0)(1),
+                          accessor.vertex(1)(1),
+                          accessor.vertex(5)(1),
+                          accessor.vertex(4)(1),
+                          accessor.vertex(2)(1),
+                          accessor.vertex(3)(1),
+                          accessor.vertex(7)(1),
+                          accessor.vertex(6)(1)
+                        };
+    const double z[8] = { accessor.vertex(0)(2),
+                          accessor.vertex(1)(2),
+                          accessor.vertex(5)(2),
+                          accessor.vertex(4)(2),
+                          accessor.vertex(2)(2),
+                          accessor.vertex(3)(2),
+                          accessor.vertex(7)(2),
+                          accessor.vertex(6)(2)
+                        };
+
+    double s1, s2, s3, s4, s5, s6, s7, s8;
+
+    s1 = 1.0/6.0;
+    s8 = -x[2]*x[2]*y[0]*z[3]-2.0*z[6]*x[7]*x[7]*y[4]-z[5]*x[7]*x[7]*y[4]-z
+         [6]*x[7]*x[7]*y[5]+2.0*y[6]*x[7]*x[7]*z[4]-z[5]*x[6]*x[6]*y[4]+x[6]*x[6]*y[4]*z
+         [7]-z[1]*x[0]*x[0]*y[2]-x[6]*x[6]*y[7]*z[4]+2.0*x[6]*x[6]*y[5]*z[7]-2.0*x[6]*x
+         [6]*y[7]*z[5]+y[5]*x[6]*x[6]*z[4]+2.0*x[5]*x[5]*y[4]*z[6]+x[0]*x[0]*y[7]*z[4]
+         -2.0*x[5]*x[5]*y[6]*z[4];
+    s7 = s8-y[6]*x[5]*x[5]*z[7]+z[6]*x[5]*x[5]*y[7]-y[1]*x[0]*x[0]*z[5]+x[7]*
+         z[5]*x[4]*y[7]-x[7]*y[6]*x[5]*z[7]-2.0*x[7]*x[6]*y[7]*z[4]+2.0*x[7]*x[6]*y[4]*z
+         [7]-x[7]*x[5]*y[7]*z[4]-2.0*x[7]*y[6]*x[4]*z[7]-x[7]*y[5]*x[4]*z[7]+x[2]*x[2]*y
+         [3]*z[0]-x[7]*x[6]*y[7]*z[5]+x[7]*x[6]*y[5]*z[7]+2.0*x[1]*x[1]*y[0]*z[5]+x[7]*z
+         [6]*x[5]*y[7];
+    s8 = -2.0*x[1]*x[1]*y[5]*z[0]+z[1]*x[0]*x[0]*y[5]+2.0*x[2]*x[2]*y[3]*z[1]
+         -z[5]*x[4]*x[4]*y[1]+y[5]*x[4]*x[4]*z[1]-2.0*x[5]*x[5]*y[4]*z[1]+2.0*x[5]*x[5]*
+         y[1]*z[4]-2.0*x[2]*x[2]*y[1]*z[3]-y[1]*x[2]*x[2]*z[0]+x[7]*y[2]*x[3]*z[7]+x[7]*
+         z[2]*x[6]*y[3]+2.0*x[7]*z[6]*x[4]*y[7]+z[5]*x[1]*x[1]*y[4]+z[1]*x[2]*x[2]*y[0]
+         -2.0*y[0]*x[3]*x[3]*z[7];
+    s6 = s8+2.0*z[0]*x[3]*x[3]*y[7]-x[7]*x[2]*y[3]*z[7]-x[7]*z[2]*x[3]*y[7]+x
+         [7]*x[2]*y[7]*z[3]-x[7]*y[2]*x[6]*z[3]+x[4]*x[5]*y[1]*z[4]-x[4]*x[5]*y[4]*z[1]+
+         x[4]*z[5]*x[1]*y[4]-x[4]*y[5]*x[1]*z[4]-2.0*x[5]*z[5]*x[4]*y[1]-2.0*x[5]*y[5]*x
+         [1]*z[4]+2.0*x[5]*z[5]*x[1]*y[4]+2.0*x[5]*y[5]*x[4]*z[1]-x[6]*z[5]*x[7]*y[4]-z
+         [2]*x[3]*x[3]*y[6]+s7;
+    s8 = -2.0*x[6]*z[6]*x[7]*y[5]-x[6]*y[6]*x[4]*z[7]+y[2]*x[3]*x[3]*z[6]+x
+         [6]*y[6]*x[7]*z[4]+2.0*y[2]*x[3]*x[3]*z[7]+x[0]*x[1]*y[0]*z[5]+x[0]*y[1]*x[5]*z
+         [0]-x[0]*z[1]*x[5]*y[0]-2.0*z[2]*x[3]*x[3]*y[7]+2.0*x[6]*z[6]*x[5]*y[7]-x[0]*x
+         [1]*y[5]*z[0]-x[6]*y[5]*x[4]*z[6]-2.0*x[3]*z[0]*x[7]*y[3]-x[6]*z[6]*x[7]*y[4]
+         -2.0*x[1]*z[1]*x[5]*y[0];
+    s7 = s8+2.0*x[1]*y[1]*x[5]*z[0]+2.0*x[1]*z[1]*x[0]*y[5]+2.0*x[3]*y[0]*x
+         [7]*z[3]+2.0*x[3]*x[0]*y[3]*z[7]-2.0*x[3]*x[0]*y[7]*z[3]-2.0*x[1]*y[1]*x[0]*z
+         [5]-2.0*x[6]*y[6]*x[5]*z[7]+s6-y[5]*x[1]*x[1]*z[4]+x[6]*z[6]*x[4]*y[7]-2.0*x[2]
+         *y[2]*x[3]*z[1]+x[6]*z[5]*x[4]*y[6]+x[6]*x[5]*y[4]*z[6]-y[6]*x[7]*x[7]*z[2]-x
+         [6]*x[5]*y[6]*z[4];
+    s8 = x[3]*x[3]*y[7]*z[4]-2.0*y[6]*x[7]*x[7]*z[3]+z[6]*x[7]*x[7]*y[2]+2.0*
+         z[6]*x[7]*x[7]*y[3]+2.0*y[1]*x[0]*x[0]*z[3]+2.0*x[0]*x[1]*y[3]*z[0]-2.0*x[0]*y
+         [0]*x[3]*z[4]-2.0*x[0]*z[1]*x[4]*y[0]-2.0*x[0]*y[1]*x[3]*z[0]+2.0*x[0]*y[0]*x
+         [4]*z[3]-2.0*x[0]*z[0]*x[4]*y[3]+2.0*x[0]*x[1]*y[0]*z[4]+2.0*x[0]*z[1]*x[3]*y
+         [0]-2.0*x[0]*x[1]*y[0]*z[3]-2.0*x[0]*x[1]*y[4]*z[0]+2.0*x[0]*y[1]*x[4]*z[0];
+    s5 = s8+2.0*x[0]*z[0]*x[3]*y[4]+x[1]*y[1]*x[0]*z[3]-x[1]*z[1]*x[4]*y[0]-x
+         [1]*y[1]*x[0]*z[4]+x[1]*z[1]*x[0]*y[4]-x[1]*y[1]*x[3]*z[0]-x[1]*z[1]*x[0]*y[3]-
+         x[0]*z[5]*x[4]*y[1]+x[0]*y[5]*x[4]*z[1]-2.0*x[4]*x[0]*y[4]*z[7]-2.0*x[4]*y[5]*x
+         [0]*z[4]+2.0*x[4]*z[5]*x[0]*y[4]-2.0*x[4]*x[5]*y[4]*z[0]-2.0*x[4]*y[0]*x[7]*z
+         [4]-x[5]*y[5]*x[0]*z[4]+s7;
+    s8 = x[5]*z[5]*x[0]*y[4]-x[5]*z[5]*x[4]*y[0]+x[1]*z[5]*x[0]*y[4]+x[5]*y
+         [5]*x[4]*z[0]-x[0]*y[0]*x[7]*z[4]-x[0]*z[5]*x[4]*y[0]-x[1]*y[5]*x[0]*z[4]+x[0]*
+         z[0]*x[7]*y[4]+x[0]*y[5]*x[4]*z[0]-x[0]*z[0]*x[4]*y[7]+x[0]*x[5]*y[0]*z[4]+x[0]
+         *y[0]*x[4]*z[7]-x[0]*x[5]*y[4]*z[0]-x[3]*x[3]*y[4]*z[7]+2.0*x[2]*z[2]*x[3]*y[1]
+         ;
+    s7 = s8-x[5]*x[5]*y[4]*z[0]+2.0*y[5]*x[4]*x[4]*z[0]-2.0*z[0]*x[4]*x[4]*y
+         [7]+2.0*y[0]*x[4]*x[4]*z[7]-2.0*z[5]*x[4]*x[4]*y[0]+x[5]*x[5]*y[4]*z[7]-x[5]*x
+         [5]*y[7]*z[4]-2.0*y[5]*x[4]*x[4]*z[7]+2.0*z[5]*x[4]*x[4]*y[7]-x[0]*x[0]*y[7]*z
+         [3]+y[2]*x[0]*x[0]*z[3]+x[0]*x[0]*y[3]*z[7]-x[5]*x[1]*y[4]*z[0]+x[5]*y[1]*x[4]*
+         z[0]-x[4]*y[0]*x[3]*z[4];
+    s8 = -x[4]*y[1]*x[0]*z[4]+x[4]*z[1]*x[0]*y[4]+x[4]*x[0]*y[3]*z[4]-x[4]*x
+         [0]*y[4]*z[3]+x[4]*x[1]*y[0]*z[4]-x[4]*x[1]*y[4]*z[0]+x[4]*z[0]*x[3]*y[4]+x[5]*
+         x[1]*y[0]*z[4]+x[1]*z[1]*x[3]*y[0]+x[1]*y[1]*x[4]*z[0]-x[5]*z[1]*x[4]*y[0]-2.0*
+         y[1]*x[0]*x[0]*z[4]+2.0*z[1]*x[0]*x[0]*y[4]+2.0*x[0]*x[0]*y[3]*z[4]-2.0*z[1]*x
+         [0]*x[0]*y[3];
+    s6 = s8-2.0*x[0]*x[0]*y[4]*z[3]+x[1]*x[1]*y[3]*z[0]+x[1]*x[1]*y[0]*z[4]-x
+         [1]*x[1]*y[0]*z[3]-x[1]*x[1]*y[4]*z[0]-z[1]*x[4]*x[4]*y[0]+y[0]*x[4]*x[4]*z[3]-
+         z[0]*x[4]*x[4]*y[3]+y[1]*x[4]*x[4]*z[0]-x[0]*x[0]*y[4]*z[7]-y[5]*x[0]*x[0]*z[4]
+         +z[5]*x[0]*x[0]*y[4]+x[5]*x[5]*y[0]*z[4]-x[0]*y[0]*x[3]*z[7]+x[0]*z[0]*x[3]*y
+         [7]+s7;
+    s8 = s6+x[0]*x[2]*y[3]*z[0]-x[0]*x[2]*y[0]*z[3]+x[0]*y[0]*x[7]*z[3]-x[0]*
+         y[2]*x[3]*z[0]+x[0]*z[2]*x[3]*y[0]-x[0]*z[0]*x[7]*y[3]+x[1]*x[2]*y[3]*z[0]-z[2]
+         *x[0]*x[0]*y[3]+x[3]*z[2]*x[6]*y[3]-x[3]*x[2]*y[3]*z[6]+x[3]*x[2]*y[6]*z[3]-x
+         [3]*y[2]*x[6]*z[3]-2.0*x[3]*y[2]*x[7]*z[3]+2.0*x[3]*z[2]*x[7]*y[3];
+    s7 = s8+2.0*x[4]*y[5]*x[7]*z[4]+2.0*x[4]*x[5]*y[4]*z[7]-2.0*x[4]*z[5]*x
+         [7]*y[4]-2.0*x[4]*x[5]*y[7]*z[4]+x[5]*y[5]*x[7]*z[4]-x[5]*z[5]*x[7]*y[4]-x[5]*y
+         [5]*x[4]*z[7]+x[5]*z[5]*x[4]*y[7]+2.0*x[3]*x[2]*y[7]*z[3]-2.0*x[2]*z[2]*x[1]*y
+         [3]+2.0*x[4]*z[0]*x[7]*y[4]+2.0*x[4]*x[0]*y[7]*z[4]+2.0*x[4]*x[5]*y[0]*z[4]-x
+         [7]*x[6]*y[2]*z[7]-2.0*x[3]*x[2]*y[3]*z[7]-x[0]*x[4]*y[7]*z[3];
+    s8 = x[0]*x[3]*y[7]*z[4]-x[0]*x[3]*y[4]*z[7]+x[0]*x[4]*y[3]*z[7]-2.0*x[7]
+         *z[6]*x[3]*y[7]+x[3]*x[7]*y[4]*z[3]-x[3]*x[4]*y[7]*z[3]-x[3]*x[7]*y[3]*z[4]+x
+         [3]*x[4]*y[3]*z[7]+2.0*x[2]*y[2]*x[1]*z[3]+y[6]*x[3]*x[3]*z[7]-z[6]*x[3]*x[3]*y
+         [7]-x[1]*z[5]*x[4]*y[1]-x[1]*x[5]*y[4]*z[1]-x[1]*z[2]*x[0]*y[3]-x[1]*x[2]*y[0]*
+         z[3]+x[1]*y[2]*x[0]*z[3];
+    s4 = s8+x[1]*x[5]*y[1]*z[4]+x[1]*y[5]*x[4]*z[1]+x[4]*y[0]*x[7]*z[3]-x[4]*
+         z[0]*x[7]*y[3]-x[4]*x[4]*y[7]*z[3]+x[4]*x[4]*y[3]*z[7]+x[3]*z[6]*x[7]*y[3]-x[3]
+         *x[6]*y[3]*z[7]+x[3]*x[6]*y[7]*z[3]-x[3]*z[6]*x[2]*y[7]-x[3]*y[6]*x[7]*z[3]+x
+         [3]*z[6]*x[7]*y[2]+x[3]*y[6]*x[2]*z[7]+2.0*x[5]*z[5]*x[4]*y[6]+s5+s7;
+    s8 = s4-2.0*x[5]*z[5]*x[6]*y[4]-x[5]*z[6]*x[7]*y[5]+x[5]*x[6]*y[5]*z[7]-x
+         [5]*x[6]*y[7]*z[5]-2.0*x[5]*y[5]*x[4]*z[6]+2.0*x[5]*y[5]*x[6]*z[4]-x[3]*y[6]*x
+         [7]*z[2]+x[4]*x[7]*y[4]*z[3]+x[4]*x[3]*y[7]*z[4]-x[4]*x[7]*y[3]*z[4]-x[4]*x[3]*
+         y[4]*z[7]-z[1]*x[5]*x[5]*y[0]+y[1]*x[5]*x[5]*z[0]+x[4]*y[6]*x[7]*z[4];
+    s7 = s8-x[4]*x[6]*y[7]*z[4]+x[4]*x[6]*y[4]*z[7]-x[4]*z[6]*x[7]*y[4]-x[5]*
+         y[6]*x[4]*z[7]-x[5]*x[6]*y[7]*z[4]+x[5]*x[6]*y[4]*z[7]+x[5]*z[6]*x[4]*y[7]-y[6]
+         *x[4]*x[4]*z[7]+z[6]*x[4]*x[4]*y[7]+x[7]*x[5]*y[4]*z[7]-y[2]*x[7]*x[7]*z[3]+z
+         [2]*x[7]*x[7]*y[3]-y[0]*x[3]*x[3]*z[4]-y[1]*x[3]*x[3]*z[0]+z[1]*x[3]*x[3]*y[0];
+    s8 = z[0]*x[3]*x[3]*y[4]-x[2]*y[1]*x[3]*z[0]+x[2]*z[1]*x[3]*y[0]+x[3]*y
+         [1]*x[0]*z[3]+x[3]*x[1]*y[3]*z[0]+x[3]*x[0]*y[3]*z[4]-x[3]*z[1]*x[0]*y[3]-x[3]*
+         x[0]*y[4]*z[3]+x[3]*y[0]*x[4]*z[3]-x[3]*z[0]*x[4]*y[3]-x[3]*x[1]*y[0]*z[3]+x[3]
+         *z[0]*x[7]*y[4]-x[3]*y[0]*x[7]*z[4]+z[0]*x[7]*x[7]*y[4]-y[0]*x[7]*x[7]*z[4];
+    s6 = s8+y[1]*x[0]*x[0]*z[2]-2.0*y[2]*x[3]*x[3]*z[0]+2.0*z[2]*x[3]*x[3]*y
+         [0]-2.0*x[1]*x[1]*y[0]*z[2]+2.0*x[1]*x[1]*y[2]*z[0]-y[2]*x[3]*x[3]*z[1]+z[2]*x
+         [3]*x[3]*y[1]-y[5]*x[4]*x[4]*z[6]+z[5]*x[4]*x[4]*y[6]+x[7]*x[0]*y[7]*z[4]-x[7]*
+         z[0]*x[4]*y[7]-x[7]*x[0]*y[4]*z[7]+x[7]*y[0]*x[4]*z[7]-x[0]*x[1]*y[0]*z[2]+x[0]
+         *z[1]*x[2]*y[0]+s7;
+    s8 = s6+x[0]*x[1]*y[2]*z[0]-x[0]*y[1]*x[2]*z[0]-x[3]*z[1]*x[0]*y[2]+2.0*x
+         [3]*x[2]*y[3]*z[0]+y[0]*x[7]*x[7]*z[3]-z[0]*x[7]*x[7]*y[3]-2.0*x[3]*z[2]*x[0]*y
+         [3]-2.0*x[3]*x[2]*y[0]*z[3]+2.0*x[3]*y[2]*x[0]*z[3]+x[3]*x[2]*y[3]*z[1]-x[3]*x
+         [2]*y[1]*z[3]-x[5]*y[1]*x[0]*z[5]+x[3]*y[1]*x[0]*z[2]+x[4]*y[6]*x[7]*z[5];
+    s7 = s8-x[5]*x[1]*y[5]*z[0]+2.0*x[1]*z[1]*x[2]*y[0]-2.0*x[1]*z[1]*x[0]*y
+         [2]+x[1]*x[2]*y[3]*z[1]-x[1]*x[2]*y[1]*z[3]+2.0*x[1]*y[1]*x[0]*z[2]-2.0*x[1]*y
+         [1]*x[2]*z[0]-z[2]*x[1]*x[1]*y[3]+y[2]*x[1]*x[1]*z[3]+y[5]*x[7]*x[7]*z[4]+y[6]*
+         x[7]*x[7]*z[5]+x[7]*x[6]*y[7]*z[2]+x[7]*y[6]*x[2]*z[7]-x[7]*z[6]*x[2]*y[7]-2.0*
+         x[7]*x[6]*y[3]*z[7];
+    s8 = s7+2.0*x[7]*x[6]*y[7]*z[3]+2.0*x[7]*y[6]*x[3]*z[7]-x[3]*z[2]*x[1]*y
+         [3]+x[3]*y[2]*x[1]*z[3]+x[5]*x[1]*y[0]*z[5]+x[4]*y[5]*x[6]*z[4]+x[5]*z[1]*x[0]*
+         y[5]-x[4]*z[6]*x[7]*y[5]-x[4]*x[5]*y[6]*z[4]+x[4]*x[5]*y[4]*z[6]-x[4]*z[5]*x[6]
+         *y[4]-x[1]*y[2]*x[3]*z[1]+x[1]*z[2]*x[3]*y[1]-x[2]*x[1]*y[0]*z[2]-x[2]*z[1]*x
+         [0]*y[2];
+    s5 = s8+x[2]*x[1]*y[2]*z[0]-x[2]*z[2]*x[0]*y[3]+x[2]*y[2]*x[0]*z[3]-x[2]*
+         y[2]*x[3]*z[0]+x[2]*z[2]*x[3]*y[0]+x[2]*y[1]*x[0]*z[2]+x[5]*y[6]*x[7]*z[5]+x[6]
+         *y[5]*x[7]*z[4]+2.0*x[6]*y[6]*x[7]*z[5]-x[7]*y[0]*x[3]*z[7]+x[7]*z[0]*x[3]*y[7]
+         -x[7]*x[0]*y[7]*z[3]+x[7]*x[0]*y[3]*z[7]+2.0*x[7]*x[7]*y[4]*z[3]-2.0*x[7]*x[7]*
+         y[3]*z[4]-2.0*x[1]*x[1]*y[2]*z[5];
+    s8 = s5-2.0*x[7]*x[4]*y[7]*z[3]+2.0*x[7]*x[3]*y[7]*z[4]-2.0*x[7]*x[3]*y
+         [4]*z[7]+2.0*x[7]*x[4]*y[3]*z[7]+2.0*x[1]*x[1]*y[5]*z[2]-x[1]*x[1]*y[2]*z[6]+x
+         [1]*x[1]*y[6]*z[2]+z[1]*x[5]*x[5]*y[2]-y[1]*x[5]*x[5]*z[2]-x[1]*x[1]*y[6]*z[5]+
+         x[1]*x[1]*y[5]*z[6]+x[5]*x[5]*y[6]*z[2]-x[5]*x[5]*y[2]*z[6]-2.0*y[1]*x[5]*x[5]*
+         z[6];
+    s7 = s8+2.0*z[1]*x[5]*x[5]*y[6]+2.0*x[1]*z[1]*x[5]*y[2]+2.0*x[1]*y[1]*x
+         [2]*z[5]-2.0*x[1]*z[1]*x[2]*y[5]-2.0*x[1]*y[1]*x[5]*z[2]-x[1]*y[1]*x[6]*z[2]-x
+         [1]*z[1]*x[2]*y[6]+x[1]*z[1]*x[6]*y[2]+x[1]*y[1]*x[2]*z[6]-x[5]*x[1]*y[2]*z[5]+
+         x[5]*y[1]*x[2]*z[5]-x[5]*z[1]*x[2]*y[5]+x[5]*x[1]*y[5]*z[2]-x[5]*y[1]*x[6]*z[2]
+         -x[5]*x[1]*y[2]*z[6];
+    s8 = s7+x[5]*x[1]*y[6]*z[2]+x[5]*z[1]*x[6]*y[2]+x[1]*x[2]*y[5]*z[6]-x[1]*
+         x[2]*y[6]*z[5]-x[1]*z[1]*x[6]*y[5]-x[1]*y[1]*x[5]*z[6]+x[1]*z[1]*x[5]*y[6]+x[1]
+         *y[1]*x[6]*z[5]-x[5]*x[6]*y[5]*z[2]+x[5]*x[2]*y[5]*z[6]-x[5]*x[2]*y[6]*z[5]+x
+         [5]*x[6]*y[2]*z[5]-2.0*x[5]*z[1]*x[6]*y[5]-2.0*x[5]*x[1]*y[6]*z[5]+2.0*x[5]*x
+         [1]*y[5]*z[6];
+    s6 = s8+2.0*x[5]*y[1]*x[6]*z[5]+2.0*x[2]*x[1]*y[6]*z[2]+2.0*x[2]*z[1]*x
+         [6]*y[2]-2.0*x[2]*x[1]*y[2]*z[6]+x[2]*x[5]*y[6]*z[2]+x[2]*x[6]*y[2]*z[5]-x[2]*x
+         [5]*y[2]*z[6]+y[1]*x[2]*x[2]*z[5]-z[1]*x[2]*x[2]*y[5]-2.0*x[2]*y[1]*x[6]*z[2]-x
+         [2]*x[6]*y[5]*z[2]-2.0*z[1]*x[2]*x[2]*y[6]+x[2]*x[2]*y[5]*z[6]-x[2]*x[2]*y[6]*z
+         [5]+2.0*y[1]*x[2]*x[2]*z[6]+x[2]*z[1]*x[5]*y[2];
+    s8 = s6-x[2]*x[1]*y[2]*z[5]+x[2]*x[1]*y[5]*z[2]-x[2]*y[1]*x[5]*z[2]+x[6]*
+         y[1]*x[2]*z[5]-x[6]*z[1]*x[2]*y[5]-z[1]*x[6]*x[6]*y[5]+y[1]*x[6]*x[6]*z[5]-y[1]
+         *x[6]*x[6]*z[2]-2.0*x[6]*x[6]*y[5]*z[2]+2.0*x[6]*x[6]*y[2]*z[5]+z[1]*x[6]*x[6]*
+         y[2]-x[6]*x[1]*y[6]*z[5]-x[6]*y[1]*x[5]*z[6]+x[6]*x[1]*y[5]*z[6];
+    s7 = s8+x[6]*z[1]*x[5]*y[6]-x[6]*z[1]*x[2]*y[6]-x[6]*x[1]*y[2]*z[6]+2.0*x
+         [6]*x[5]*y[6]*z[2]+2.0*x[6]*x[2]*y[5]*z[6]-2.0*x[6]*x[2]*y[6]*z[5]-2.0*x[6]*x
+         [5]*y[2]*z[6]+x[6]*x[1]*y[6]*z[2]+x[6]*y[1]*x[2]*z[6]-x[2]*x[2]*y[3]*z[7]+x[2]*
+         x[2]*y[7]*z[3]-x[2]*z[2]*x[3]*y[7]-x[2]*y[2]*x[7]*z[3]+x[2]*z[2]*x[7]*y[3]+x[2]
+         *y[2]*x[3]*z[7]-x[6]*x[6]*y[3]*z[7];
+    s8 = s7+x[6]*x[6]*y[7]*z[3]-x[6]*x[2]*y[3]*z[7]+x[6]*x[2]*y[7]*z[3]-x[6]*
+         y[6]*x[7]*z[3]+x[6]*y[6]*x[3]*z[7]-x[6]*z[6]*x[3]*y[7]+x[6]*z[6]*x[7]*y[3]+y[6]
+         *x[2]*x[2]*z[7]-z[6]*x[2]*x[2]*y[7]+2.0*x[2]*x[2]*y[6]*z[3]-x[2]*y[6]*x[7]*z[2]
+         -2.0*x[2]*y[2]*x[6]*z[3]-2.0*x[2]*x[2]*y[3]*z[6]+2.0*x[2]*y[2]*x[3]*z[6]-x[2]*x
+         [6]*y[2]*z[7];
+    s3 = s8+x[2]*x[6]*y[7]*z[2]+x[2]*z[6]*x[7]*y[2]+2.0*x[2]*z[2]*x[6]*y[3]
+         -2.0*x[2]*z[2]*x[3]*y[6]-y[2]*x[6]*x[6]*z[3]-2.0*x[6]*x[6]*y[2]*z[7]+2.0*x[6]*x
+         [6]*y[7]*z[2]+z[2]*x[6]*x[6]*y[3]-2.0*x[6]*y[6]*x[7]*z[2]+x[6]*y[2]*x[3]*z[6]-x
+         [6]*x[2]*y[3]*z[6]+2.0*x[6]*z[6]*x[7]*y[2]+2.0*x[6]*y[6]*x[2]*z[7]-2.0*x[6]*z
+         [6]*x[2]*y[7]+x[6]*x[2]*y[6]*z[3]-x[6]*z[2]*x[3]*y[6];
+    s8 = y[1]*x[0]*z[3]+x[1]*y[3]*z[0]-y[0]*x[3]*z[7]-x[1]*y[5]*z[0]-y[0]*x
+         [3]*z[4]-x[1]*y[0]*z[2]+z[1]*x[2]*y[0]-y[1]*x[0]*z[5]-z[1]*x[0]*y[2]-y[1]*x[0]*
+         z[4]+z[1]*x[5]*y[2]+z[0]*x[7]*y[4]+z[0]*x[3]*y[7]+z[1]*x[0]*y[4]-x[1]*y[2]*z[5]
+         +x[2]*y[3]*z[0]+y[1]*x[2]*z[5]-x[2]*y[3]*z[7];
+    s7 = s8-z[1]*x[2]*y[5]-y[1]*x[3]*z[0]-x[0]*y[7]*z[3]-z[1]*x[0]*y[3]+y[5]*
+         x[4]*z[0]-x[0]*y[4]*z[3]+y[5]*x[7]*z[4]-z[0]*x[4]*y[3]+x[1]*y[0]*z[4]-z[2]*x[3]
+         *y[7]-y[6]*x[7]*z[2]+x[1]*y[5]*z[2]+y[6]*x[7]*z[5]+x[0]*y[7]*z[4]+x[1]*y[2]*z
+         [0]-z[1]*x[4]*y[0]-z[0]*x[4]*y[7]-z[2]*x[0]*y[3];
+    s8 = x[5]*y[0]*z[4]+z[1]*x[0]*y[5]-x[2]*y[0]*z[3]-z[1]*x[5]*y[0]+y[1]*x
+         [5]*z[0]-x[1]*y[0]*z[3]-x[1]*y[4]*z[0]-y[1]*x[5]*z[2]+x[2]*y[7]*z[3]+y[0]*x[4]*
+         z[3]-x[0]*y[4]*z[7]+x[1]*y[0]*z[5]-y[1]*x[6]*z[2]-y[2]*x[6]*z[3]+y[0]*x[7]*z[3]
+         -y[2]*x[7]*z[3]+z[2]*x[7]*y[3]+y[2]*x[0]*z[3];
+    s6 = s8+y[2]*x[3]*z[7]-y[2]*x[3]*z[0]-x[6]*y[5]*z[2]-y[5]*x[0]*z[4]+z[2]*
+         x[3]*y[0]+x[2]*y[3]*z[1]+x[0]*y[3]*z[7]-x[2]*y[1]*z[3]+y[1]*x[4]*z[0]+y[1]*x[0]
+         *z[2]-z[1]*x[2]*y[6]+y[2]*x[3]*z[6]-y[1]*x[2]*z[0]+z[1]*x[3]*y[0]-x[1]*y[2]*z
+         [6]-x[2]*y[3]*z[6]+x[0]*y[3]*z[4]+z[0]*x[3]*y[4]+s7;
+    s8 = x[5]*y[4]*z[7]+s6+y[5]*x[6]*z[4]-y[5]*x[4]*z[6]+z[6]*x[5]*y[7]-x[6]*
+         y[2]*z[7]-x[6]*y[7]*z[5]+x[5]*y[6]*z[2]+x[6]*y[5]*z[7]+x[6]*y[7]*z[2]+y[6]*x[7]
+         *z[4]-y[6]*x[4]*z[7]-y[6]*x[7]*z[3]+z[6]*x[7]*y[2]+x[2]*y[5]*z[6]-x[2]*y[6]*z
+         [5]+y[6]*x[2]*z[7]+x[6]*y[2]*z[5];
+    s7 = s8-x[5]*y[2]*z[6]-z[6]*x[7]*y[5]-z[5]*x[7]*y[4]+z[5]*x[0]*y[4]-y[5]*
+         x[4]*z[7]+y[0]*x[4]*z[7]-z[6]*x[2]*y[7]-x[5]*y[4]*z[0]-x[5]*y[7]*z[4]-y[0]*x[7]
+         *z[4]+y[5]*x[4]*z[1]-x[6]*y[7]*z[4]+x[7]*y[4]*z[3]-x[4]*y[7]*z[3]+x[3]*y[7]*z
+         [4]-x[7]*y[3]*z[4]-x[6]*y[3]*z[7]+x[6]*y[4]*z[7];
+    s8 = -x[3]*y[4]*z[7]+x[4]*y[3]*z[7]-z[6]*x[7]*y[4]-z[1]*x[6]*y[5]+x[6]*y
+         [7]*z[3]-x[1]*y[6]*z[5]-y[1]*x[5]*z[6]+z[5]*x[4]*y[7]-z[5]*x[4]*y[0]+x[1]*y[5]*
+         z[6]-y[6]*x[5]*z[7]-y[2]*x[3]*z[1]+z[1]*x[5]*y[6]-y[5]*x[1]*z[4]+z[6]*x[4]*y[7]
+         +x[5]*y[1]*z[4]-x[5]*y[6]*z[4]+y[6]*x[3]*z[7]-x[5]*y[4]*z[1];
+    s5 = s8+x[5]*y[4]*z[6]+z[5]*x[1]*y[4]+y[1]*x[6]*z[5]-z[6]*x[3]*y[7]+z[6]*
+         x[7]*y[3]-z[5]*x[6]*y[4]-z[5]*x[4]*y[1]+z[5]*x[4]*y[6]+x[1]*y[6]*z[2]+x[2]*y[6]
+         *z[3]+z[2]*x[6]*y[3]+z[1]*x[6]*y[2]+z[2]*x[3]*y[1]-z[2]*x[1]*y[3]-z[2]*x[3]*y
+         [6]+y[2]*x[1]*z[3]+y[1]*x[2]*z[6]-z[0]*x[7]*y[3]+s7;
+    s4 = 1/s5;
+    s2 = s3*s4;
+    const double unknown0 = s1*s2;
+    s1 = 1.0/6.0;
+    s8 = 2.0*x[1]*y[0]*y[0]*z[4]+x[5]*y[0]*y[0]*z[4]-x[1]*y[4]*y[4]*z[0]+z[1]
+         *x[0]*y[4]*y[4]+x[1]*y[0]*y[0]*z[5]-z[1]*x[5]*y[0]*y[0]-2.0*z[1]*x[4]*y[0]*y[0]
+         +2.0*z[1]*x[3]*y[0]*y[0]+z[2]*x[3]*y[0]*y[0]+y[0]*y[0]*x[7]*z[3]+2.0*y[0]*y[0]*
+         x[4]*z[3]-2.0*x[1]*y[0]*y[0]*z[3]-2.0*x[5]*y[4]*y[4]*z[0]+2.0*z[5]*x[0]*y[4]*y
+         [4]+2.0*y[4]*y[5]*x[7]*z[4];
+    s7 = s8-x[3]*y[4]*y[4]*z[7]+x[7]*y[4]*y[4]*z[3]+z[0]*x[3]*y[4]*y[4]-2.0*x
+         [0]*y[4]*y[4]*z[7]-y[1]*x[1]*y[4]*z[0]-x[0]*y[4]*y[4]*z[3]+2.0*z[0]*x[7]*y[4]*y
+         [4]+y[4]*z[6]*x[4]*y[7]-y[0]*y[0]*x[7]*z[4]+y[0]*y[0]*x[4]*z[7]+2.0*y[4]*z[5]*x
+         [4]*y[7]-2.0*y[4]*x[5]*y[7]*z[4]-y[4]*x[6]*y[7]*z[4]-y[4]*y[6]*x[4]*z[7]-2.0*y
+         [4]*y[5]*x[4]*z[7];
+    s8 = y[4]*y[6]*x[7]*z[4]-y[7]*y[2]*x[7]*z[3]+y[7]*z[2]*x[7]*y[3]+y[7]*y
+         [2]*x[3]*z[7]+2.0*x[5]*y[4]*y[4]*z[7]-y[7]*x[2]*y[3]*z[7]-y[0]*z[0]*x[4]*y[7]+z
+         [6]*x[7]*y[3]*y[3]-y[0]*x[0]*y[4]*z[7]+y[0]*x[0]*y[7]*z[4]-2.0*x[2]*y[3]*y[3]*z
+         [7]-z[5]*x[4]*y[0]*y[0]+y[0]*z[0]*x[7]*y[4]-2.0*z[6]*x[3]*y[7]*y[7]+z[1]*x[2]*y
+         [0]*y[0];
+    s6 = s8+y[4]*y[0]*x[4]*z[3]-2.0*y[4]*z[0]*x[4]*y[7]+2.0*y[4]*x[0]*y[7]*z
+         [4]-y[4]*z[0]*x[4]*y[3]-y[4]*x[0]*y[7]*z[3]+y[4]*z[0]*x[3]*y[7]-y[4]*y[0]*x[3]*
+         z[4]+y[0]*x[4]*y[3]*z[7]-y[0]*x[7]*y[3]*z[4]-y[0]*x[3]*y[4]*z[7]+y[0]*x[7]*y[4]
+         *z[3]+x[2]*y[7]*y[7]*z[3]-z[2]*x[3]*y[7]*y[7]-2.0*z[2]*x[0]*y[3]*y[3]+2.0*y[0]*
+         z[1]*x[0]*y[4]+s7;
+    s8 = -2.0*y[0]*y[1]*x[0]*z[4]-y[0]*y[1]*x[0]*z[5]-y[0]*y[0]*x[3]*z[7]-z
+         [1]*x[0]*y[3]*y[3]-y[0]*x[1]*y[5]*z[0]-2.0*z[0]*x[7]*y[3]*y[3]+x[0]*y[3]*y[3]*z
+         [4]+2.0*x[0]*y[3]*y[3]*z[7]-z[0]*x[4]*y[3]*y[3]+2.0*x[2]*y[3]*y[3]*z[0]+x[1]*y
+         [3]*y[3]*z[0]+2.0*y[7]*z[6]*x[7]*y[3]+2.0*y[7]*y[6]*x[3]*z[7]-2.0*y[7]*y[6]*x
+         [7]*z[3]-2.0*y[7]*x[6]*y[3]*z[7];
+    s7 = s8+y[4]*x[4]*y[3]*z[7]-y[4]*x[4]*y[7]*z[3]+y[4]*x[3]*y[7]*z[4]-y[4]*
+         x[7]*y[3]*z[4]+2.0*y[4]*y[0]*x[4]*z[7]-2.0*y[4]*y[0]*x[7]*z[4]+2.0*x[6]*y[7]*y
+         [7]*z[3]+y[4]*x[0]*y[3]*z[4]+y[0]*y[1]*x[5]*z[0]+y[0]*z[1]*x[0]*y[5]-x[2]*y[0]*
+         y[0]*z[3]+x[4]*y[3]*y[3]*z[7]-x[7]*y[3]*y[3]*z[4]-x[5]*y[4]*y[4]*z[1]+y[3]*z[0]
+         *x[3]*y[4];
+    s8 = y[3]*y[0]*x[4]*z[3]+2.0*y[3]*y[0]*x[7]*z[3]+2.0*y[3]*y[2]*x[0]*z[3]
+         -2.0*y[3]*y[2]*x[3]*z[0]+2.0*y[3]*z[2]*x[3]*y[0]+y[3]*z[1]*x[3]*y[0]-2.0*y[3]*x
+         [2]*y[0]*z[3]-y[3]*x[1]*y[0]*z[3]-y[3]*y[1]*x[3]*z[0]-2.0*y[3]*x[0]*y[7]*z[3]-y
+         [3]*x[0]*y[4]*z[3]-2.0*y[3]*y[0]*x[3]*z[7]-y[3]*y[0]*x[3]*z[4]+2.0*y[3]*z[0]*x
+         [3]*y[7]+y[3]*y[1]*x[0]*z[3]+z[5]*x[1]*y[4]*y[4];
+    s5 = s8-2.0*y[0]*y[0]*x[3]*z[4]-2.0*y[0]*x[1]*y[4]*z[0]+y[3]*x[7]*y[4]*z
+         [3]-y[3]*x[4]*y[7]*z[3]+y[3]*x[3]*y[7]*z[4]-y[3]*x[3]*y[4]*z[7]+y[3]*x[0]*y[7]*
+         z[4]-y[3]*z[0]*x[4]*y[7]-2.0*y[4]*y[5]*x[0]*z[4]+s6+y[7]*x[0]*y[3]*z[7]-y[7]*z
+         [0]*x[7]*y[3]+y[7]*y[0]*x[7]*z[3]-y[7]*y[0]*x[3]*z[7]+2.0*y[0]*y[1]*x[4]*z[0]+
+         s7;
+    s8 = -2.0*y[7]*x[7]*y[3]*z[4]-2.0*y[7]*x[3]*y[4]*z[7]+2.0*y[7]*x[4]*y[3]*
+         z[7]+y[7]*y[0]*x[4]*z[7]-y[7]*y[0]*x[7]*z[4]+2.0*y[7]*x[7]*y[4]*z[3]-y[7]*x[0]*
+         y[4]*z[7]+y[7]*z[0]*x[7]*y[4]+z[5]*x[4]*y[7]*y[7]+2.0*z[6]*x[4]*y[7]*y[7]-x[5]*
+         y[7]*y[7]*z[4]-2.0*x[6]*y[7]*y[7]*z[4]+2.0*y[7]*x[6]*y[4]*z[7]-2.0*y[7]*z[6]*x
+         [7]*y[4]+2.0*y[7]*y[6]*x[7]*z[4];
+    s7 = s8-2.0*y[7]*y[6]*x[4]*z[7]-y[7]*z[5]*x[7]*y[4]-y[7]*y[5]*x[4]*z[7]-x
+         [0]*y[7]*y[7]*z[3]+z[0]*x[3]*y[7]*y[7]+y[7]*x[5]*y[4]*z[7]+y[7]*y[5]*x[7]*z[4]-
+         y[4]*x[1]*y[5]*z[0]-x[1]*y[0]*y[0]*z[2]-y[4]*y[5]*x[1]*z[4]-2.0*y[4]*z[5]*x[4]*
+         y[0]-y[4]*y[1]*x[0]*z[4]+y[4]*y[5]*x[4]*z[1]+y[0]*z[0]*x[3]*y[7]-y[0]*z[1]*x[0]
+         *y[2];
+    s8 = 2.0*y[0]*x[1]*y[3]*z[0]+y[4]*y[1]*x[4]*z[0]+2.0*y[0]*y[1]*x[0]*z[3]+
+         y[4]*x[1]*y[0]*z[5]-y[4]*z[1]*x[5]*y[0]+y[4]*z[1]*x[0]*y[5]-y[4]*z[1]*x[4]*y[0]
+         +y[4]*x[1]*y[0]*z[4]-y[4]*z[5]*x[4]*y[1]+x[5]*y[4]*y[4]*z[6]-z[5]*x[6]*y[4]*y
+         [4]+y[4]*x[5]*y[1]*z[4]-y[0]*z[2]*x[0]*y[3]+y[0]*y[5]*x[4]*z[0]+y[0]*x[1]*y[2]*
+         z[0];
+    s6 = s8-2.0*y[0]*z[0]*x[4]*y[3]-2.0*y[0]*x[0]*y[4]*z[3]-2.0*y[0]*z[1]*x
+         [0]*y[3]-y[0]*x[0]*y[7]*z[3]-2.0*y[0]*y[1]*x[3]*z[0]+y[0]*x[2]*y[3]*z[0]-y[0]*y
+         [1]*x[2]*z[0]+y[0]*y[1]*x[0]*z[2]-y[0]*x[2]*y[1]*z[3]+y[0]*x[0]*y[3]*z[7]+y[0]*
+         x[2]*y[3]*z[1]-y[0]*y[2]*x[3]*z[0]+y[0]*y[2]*x[0]*z[3]-y[0]*y[5]*x[0]*z[4]-y[4]
+         *y[5]*x[4]*z[6]+s7;
+    s8 = s6+y[4]*z[6]*x[5]*y[7]-y[4]*x[6]*y[7]*z[5]+y[4]*x[6]*y[5]*z[7]-y[4]*
+         z[6]*x[7]*y[5]-y[4]*x[5]*y[6]*z[4]+y[4]*z[5]*x[4]*y[6]+y[4]*y[5]*x[6]*z[4]-2.0*
+         y[1]*y[1]*x[0]*z[5]+2.0*y[1]*y[1]*x[5]*z[0]-2.0*y[2]*y[2]*x[6]*z[3]+x[5]*y[1]*y
+         [1]*z[4]-z[5]*x[4]*y[1]*y[1]-x[6]*y[2]*y[2]*z[7]+z[6]*x[7]*y[2]*y[2];
+    s7 = s8-x[1]*y[5]*y[5]*z[0]+z[1]*x[0]*y[5]*y[5]+y[1]*y[5]*x[4]*z[1]-y[1]*
+         y[5]*x[1]*z[4]-2.0*y[2]*z[2]*x[3]*y[6]+2.0*y[1]*z[1]*x[0]*y[5]-2.0*y[1]*z[1]*x
+         [5]*y[0]+2.0*y[1]*x[1]*y[0]*z[5]-y[2]*x[2]*y[3]*z[7]-y[2]*z[2]*x[3]*y[7]+y[2]*x
+         [2]*y[7]*z[3]+y[2]*z[2]*x[7]*y[3]-2.0*y[2]*x[2]*y[3]*z[6]+2.0*y[2]*x[2]*y[6]*z
+         [3]+2.0*y[2]*z[2]*x[6]*y[3]-y[3]*y[2]*x[6]*z[3];
+    s8 = y[3]*y[2]*x[3]*z[6]+y[3]*x[2]*y[6]*z[3]-y[3]*z[2]*x[3]*y[6]-y[2]*y
+         [2]*x[7]*z[3]+2.0*y[2]*y[2]*x[3]*z[6]+y[2]*y[2]*x[3]*z[7]-2.0*y[1]*x[1]*y[5]*z
+         [0]-x[2]*y[3]*y[3]*z[6]+z[2]*x[6]*y[3]*y[3]+2.0*y[6]*x[2]*y[5]*z[6]+2.0*y[6]*x
+         [6]*y[2]*z[5]-2.0*y[6]*x[5]*y[2]*z[6]+2.0*y[3]*x[2]*y[7]*z[3]-2.0*y[3]*z[2]*x
+         [3]*y[7]-y[0]*z[0]*x[7]*y[3]-y[0]*z[2]*x[1]*y[3];
+    s4 = s8-y[2]*y[6]*x[7]*z[2]+y[0]*z[2]*x[3]*y[1]+y[1]*z[5]*x[1]*y[4]-y[1]*
+         x[5]*y[4]*z[1]+2.0*y[0]*z[0]*x[3]*y[4]+2.0*y[0]*x[0]*y[3]*z[4]+2.0*z[2]*x[7]*y
+         [3]*y[3]-2.0*z[5]*x[7]*y[4]*y[4]+x[6]*y[4]*y[4]*z[7]-z[6]*x[7]*y[4]*y[4]+y[1]*y
+         [1]*x[0]*z[3]+y[3]*x[6]*y[7]*z[2]-y[3]*z[6]*x[2]*y[7]+2.0*y[3]*y[2]*x[3]*z[7]+
+         s5+s7;
+    s8 = s4+y[2]*x[6]*y[7]*z[2]-y[2]*y[6]*x[7]*z[3]+y[2]*y[6]*x[2]*z[7]-y[2]*
+         z[6]*x[2]*y[7]-y[2]*x[6]*y[3]*z[7]+y[2]*y[6]*x[3]*z[7]+y[2]*z[6]*x[7]*y[3]-2.0*
+         y[3]*y[2]*x[7]*z[3]-x[6]*y[3]*y[3]*z[7]+y[1]*y[1]*x[4]*z[0]-y[1]*y[1]*x[3]*z[0]
+         +x[2]*y[6]*y[6]*z[3]-z[2]*x[3]*y[6]*y[6]-y[1]*y[1]*x[0]*z[4];
+    s7 = s8+y[5]*x[1]*y[0]*z[5]+y[6]*x[2]*y[7]*z[3]-y[6]*y[2]*x[6]*z[3]+y[6]*
+         y[2]*x[3]*z[6]-y[6]*x[2]*y[3]*z[6]+y[6]*z[2]*x[6]*y[3]-y[5]*y[1]*x[0]*z[5]-y[5]
+         *z[1]*x[5]*y[0]+y[5]*y[1]*x[5]*z[0]-y[6]*z[2]*x[3]*y[7]-y[7]*y[6]*x[7]*z[2]+2.0
+         *y[6]*y[6]*x[2]*z[7]+y[6]*y[6]*x[3]*z[7]+x[6]*y[7]*y[7]*z[2]-z[6]*x[2]*y[7]*y
+         [7];
+    s8 = -x[2]*y[1]*y[1]*z[3]+2.0*y[1]*y[1]*x[0]*z[2]-2.0*y[1]*y[1]*x[2]*z[0]
+         +z[2]*x[3]*y[1]*y[1]-z[1]*x[0]*y[2]*y[2]+x[1]*y[2]*y[2]*z[0]+y[2]*y[2]*x[0]*z
+         [3]-y[2]*y[2]*x[3]*z[0]-2.0*y[2]*y[2]*x[3]*z[1]+y[1]*x[1]*y[3]*z[0]-2.0*y[6]*y
+         [6]*x[7]*z[2]+2.0*y[5]*y[5]*x[4]*z[1]-2.0*y[5]*y[5]*x[1]*z[4]-y[6]*y[6]*x[7]*z
+         [3]-2.0*y[1]*x[1]*y[0]*z[2];
+    s6 = s8+2.0*y[1]*z[1]*x[2]*y[0]-2.0*y[1]*z[1]*x[0]*y[2]+2.0*y[1]*x[1]*y
+         [2]*z[0]+y[1]*x[2]*y[3]*z[1]-y[1]*y[2]*x[3]*z[1]-y[1]*z[2]*x[1]*y[3]+y[1]*y[2]*
+         x[1]*z[3]-y[2]*x[1]*y[0]*z[2]+y[2]*z[1]*x[2]*y[0]+y[2]*x[2]*y[3]*z[0]-y[7]*x[6]
+         *y[2]*z[7]+y[7]*z[6]*x[7]*y[2]+y[7]*y[6]*x[2]*z[7]-y[6]*x[6]*y[3]*z[7]+y[6]*x
+         [6]*y[7]*z[3]+s7;
+    s8 = s6-y[6]*z[6]*x[3]*y[7]+y[6]*z[6]*x[7]*y[3]+2.0*y[2]*y[2]*x[1]*z[3]+x
+         [2]*y[3]*y[3]*z[1]-z[2]*x[1]*y[3]*y[3]+y[1]*x[1]*y[0]*z[4]+y[1]*z[1]*x[3]*y[0]-
+         y[1]*x[1]*y[0]*z[3]+2.0*y[5]*x[5]*y[1]*z[4]-2.0*y[5]*x[5]*y[4]*z[1]+2.0*y[5]*z
+         [5]*x[1]*y[4]-2.0*y[5]*z[5]*x[4]*y[1]-2.0*y[6]*x[6]*y[2]*z[7]+2.0*y[6]*x[6]*y
+         [7]*z[2];
+    s7 = s8+2.0*y[6]*z[6]*x[7]*y[2]-2.0*y[6]*z[6]*x[2]*y[7]-y[1]*z[1]*x[4]*y
+         [0]+y[1]*z[1]*x[0]*y[4]-y[1]*z[1]*x[0]*y[3]+2.0*y[6]*y[6]*x[7]*z[5]+2.0*y[5]*y
+         [5]*x[6]*z[4]-2.0*y[5]*y[5]*x[4]*z[6]+x[6]*y[5]*y[5]*z[7]-y[3]*x[2]*y[1]*z[3]-y
+         [3]*y[2]*x[3]*z[1]+y[3]*z[2]*x[3]*y[1]+y[3]*y[2]*x[1]*z[3]-y[2]*x[2]*y[0]*z[3]+
+         y[2]*z[2]*x[3]*y[0];
+    s8 = s7+2.0*y[2]*x[2]*y[3]*z[1]-2.0*y[2]*x[2]*y[1]*z[3]+y[2]*y[1]*x[0]*z
+         [2]-y[2]*y[1]*x[2]*z[0]+2.0*y[2]*z[2]*x[3]*y[1]-2.0*y[2]*z[2]*x[1]*y[3]-y[2]*z
+         [2]*x[0]*y[3]+y[5]*z[6]*x[5]*y[7]-y[5]*x[6]*y[7]*z[5]-y[5]*y[6]*x[4]*z[7]-y[5]*
+         y[6]*x[5]*z[7]-2.0*y[5]*x[5]*y[6]*z[4]+2.0*y[5]*x[5]*y[4]*z[6]-2.0*y[5]*z[5]*x
+         [6]*y[4]+2.0*y[5]*z[5]*x[4]*y[6];
+    s5 = s8-y[1]*y[5]*x[0]*z[4]-z[6]*x[7]*y[5]*y[5]+y[6]*y[6]*x[7]*z[4]-y[6]*
+         y[6]*x[4]*z[7]-2.0*y[6]*y[6]*x[5]*z[7]-x[5]*y[6]*y[6]*z[4]+z[5]*x[4]*y[6]*y[6]+
+         z[6]*x[5]*y[7]*y[7]-x[6]*y[7]*y[7]*z[5]+y[1]*y[5]*x[4]*z[0]+y[7]*y[6]*x[7]*z[5]
+         +y[6]*y[5]*x[7]*z[4]+y[5]*y[6]*x[7]*z[5]+y[6]*y[5]*x[6]*z[4]-y[6]*y[5]*x[4]*z
+         [6]+2.0*y[6]*z[6]*x[5]*y[7];
+    s8 = s5-2.0*y[6]*x[6]*y[7]*z[5]+2.0*y[6]*x[6]*y[5]*z[7]-2.0*y[6]*z[6]*x
+         [7]*y[5]-y[6]*x[5]*y[7]*z[4]-y[6]*x[6]*y[7]*z[4]+y[6]*x[6]*y[4]*z[7]-y[6]*z[6]*
+         x[7]*y[4]+y[6]*z[5]*x[4]*y[7]+y[6]*z[6]*x[4]*y[7]+y[6]*x[5]*y[4]*z[6]-y[6]*z[5]
+         *x[6]*y[4]+y[7]*x[6]*y[5]*z[7]-y[7]*z[6]*x[7]*y[5]-2.0*y[6]*x[6]*y[5]*z[2];
+    s7 = s8-y[7]*y[6]*x[5]*z[7]+2.0*y[4]*y[5]*x[4]*z[0]+2.0*x[3]*y[7]*y[7]*z
+         [4]-2.0*x[4]*y[7]*y[7]*z[3]-z[0]*x[4]*y[7]*y[7]+x[0]*y[7]*y[7]*z[4]-y[0]*z[5]*x
+         [4]*y[1]+y[0]*x[5]*y[1]*z[4]-y[0]*x[5]*y[4]*z[0]+y[0]*z[5]*x[0]*y[4]-y[5]*y[5]*
+         x[0]*z[4]+y[5]*y[5]*x[4]*z[0]+2.0*y[1]*y[1]*x[2]*z[5]-2.0*y[1]*y[1]*x[5]*z[2]+z
+         [1]*x[5]*y[2]*y[2];
+    s8 = s7-x[1]*y[2]*y[2]*z[5]-y[5]*z[5]*x[4]*y[0]+y[5]*z[5]*x[0]*y[4]-y[5]*
+         x[5]*y[4]*z[0]-y[2]*x[1]*y[6]*z[5]-y[2]*y[1]*x[5]*z[6]+y[2]*z[1]*x[5]*y[6]+y[2]
+         *y[1]*x[6]*z[5]-y[1]*z[1]*x[6]*y[5]-y[1]*x[1]*y[6]*z[5]+y[1]*x[1]*y[5]*z[6]+y
+         [1]*z[1]*x[5]*y[6]+y[5]*x[5]*y[0]*z[4]+y[2]*y[1]*x[2]*z[5]-y[2]*z[1]*x[2]*y[5];
+    s6 = s8+y[2]*x[1]*y[5]*z[2]-y[2]*y[1]*x[5]*z[2]-y[1]*y[1]*x[5]*z[6]+y[1]*
+         y[1]*x[6]*z[5]-z[1]*x[2]*y[5]*y[5]+x[1]*y[5]*y[5]*z[2]+2.0*y[1]*z[1]*x[5]*y[2]
+         -2.0*y[1]*x[1]*y[2]*z[5]-2.0*y[1]*z[1]*x[2]*y[5]+2.0*y[1]*x[1]*y[5]*z[2]-y[1]*y
+         [1]*x[6]*z[2]+y[1]*y[1]*x[2]*z[6]-2.0*y[5]*x[1]*y[6]*z[5]-2.0*y[5]*y[1]*x[5]*z
+         [6]+2.0*y[5]*z[1]*x[5]*y[6]+2.0*y[5]*y[1]*x[6]*z[5];
+    s8 = s6-y[6]*z[1]*x[6]*y[5]-y[6]*y[1]*x[5]*z[6]+y[6]*x[1]*y[5]*z[6]+y[6]*
+         y[1]*x[6]*z[5]-2.0*z[1]*x[6]*y[5]*y[5]+2.0*x[1]*y[5]*y[5]*z[6]-x[1]*y[6]*y[6]*z
+         [5]+z[1]*x[5]*y[6]*y[6]+y[5]*z[1]*x[5]*y[2]-y[5]*x[1]*y[2]*z[5]+y[5]*y[1]*x[2]*
+         z[5]-y[5]*y[1]*x[5]*z[2]-y[6]*z[1]*x[2]*y[5]+y[6]*x[1]*y[5]*z[2];
+    s7 = s8-y[1]*z[1]*x[2]*y[6]-y[1]*x[1]*y[2]*z[6]+y[1]*x[1]*y[6]*z[2]+y[1]*
+         z[1]*x[6]*y[2]+y[5]*x[5]*y[6]*z[2]-y[5]*x[2]*y[6]*z[5]+y[5]*x[6]*y[2]*z[5]-y[5]
+         *x[5]*y[2]*z[6]-x[6]*y[5]*y[5]*z[2]+x[2]*y[5]*y[5]*z[6]-y[5]*y[5]*x[4]*z[7]+y
+         [5]*y[5]*x[7]*z[4]-y[1]*x[6]*y[5]*z[2]+y[1]*x[2]*y[5]*z[6]-y[2]*x[6]*y[5]*z[2]
+         -2.0*y[2]*y[1]*x[6]*z[2];
+    s8 = s7-2.0*y[2]*z[1]*x[2]*y[6]+2.0*y[2]*x[1]*y[6]*z[2]+2.0*y[2]*y[1]*x
+         [2]*z[6]-2.0*x[1]*y[2]*y[2]*z[6]+2.0*z[1]*x[6]*y[2]*y[2]+x[6]*y[2]*y[2]*z[5]-x
+         [5]*y[2]*y[2]*z[6]+2.0*x[5]*y[6]*y[6]*z[2]-2.0*x[2]*y[6]*y[6]*z[5]-z[1]*x[2]*y
+         [6]*y[6]-y[6]*y[1]*x[6]*z[2]-y[6]*x[1]*y[2]*z[6]+y[6]*z[1]*x[6]*y[2]+y[6]*y[1]*
+         x[2]*z[6]+x[1]*y[6]*y[6]*z[2];
+    s3 = s8+y[2]*x[5]*y[6]*z[2]+y[2]*x[2]*y[5]*z[6]-y[2]*x[2]*y[6]*z[5]+y[5]*
+         z[5]*x[4]*y[7]+y[5]*x[5]*y[4]*z[7]-y[5]*z[5]*x[7]*y[4]-y[5]*x[5]*y[7]*z[4]+2.0*
+         y[4]*x[5]*y[0]*z[4]-y[3]*z[6]*x[3]*y[7]+y[3]*y[6]*x[3]*z[7]+y[3]*x[6]*y[7]*z[3]
+         -y[3]*y[6]*x[7]*z[3]-y[2]*y[1]*x[3]*z[0]-y[2]*z[1]*x[0]*y[3]+y[2]*y[1]*x[0]*z
+         [3]+y[2]*x[1]*y[3]*z[0];
+    s8 = y[1]*x[0]*z[3]+x[1]*y[3]*z[0]-y[0]*x[3]*z[7]-x[1]*y[5]*z[0]-y[0]*x
+         [3]*z[4]-x[1]*y[0]*z[2]+z[1]*x[2]*y[0]-y[1]*x[0]*z[5]-z[1]*x[0]*y[2]-y[1]*x[0]*
+         z[4]+z[1]*x[5]*y[2]+z[0]*x[7]*y[4]+z[0]*x[3]*y[7]+z[1]*x[0]*y[4]-x[1]*y[2]*z[5]
+         +x[2]*y[3]*z[0]+y[1]*x[2]*z[5]-x[2]*y[3]*z[7];
+    s7 = s8-z[1]*x[2]*y[5]-y[1]*x[3]*z[0]-x[0]*y[7]*z[3]-z[1]*x[0]*y[3]+y[5]*
+         x[4]*z[0]-x[0]*y[4]*z[3]+y[5]*x[7]*z[4]-z[0]*x[4]*y[3]+x[1]*y[0]*z[4]-z[2]*x[3]
+         *y[7]-y[6]*x[7]*z[2]+x[1]*y[5]*z[2]+y[6]*x[7]*z[5]+x[0]*y[7]*z[4]+x[1]*y[2]*z
+         [0]-z[1]*x[4]*y[0]-z[0]*x[4]*y[7]-z[2]*x[0]*y[3];
+    s8 = x[5]*y[0]*z[4]+z[1]*x[0]*y[5]-x[2]*y[0]*z[3]-z[1]*x[5]*y[0]+y[1]*x
+         [5]*z[0]-x[1]*y[0]*z[3]-x[1]*y[4]*z[0]-y[1]*x[5]*z[2]+x[2]*y[7]*z[3]+y[0]*x[4]*
+         z[3]-x[0]*y[4]*z[7]+x[1]*y[0]*z[5]-y[1]*x[6]*z[2]-y[2]*x[6]*z[3]+y[0]*x[7]*z[3]
+         -y[2]*x[7]*z[3]+z[2]*x[7]*y[3]+y[2]*x[0]*z[3];
+    s6 = s8+y[2]*x[3]*z[7]-y[2]*x[3]*z[0]-x[6]*y[5]*z[2]-y[5]*x[0]*z[4]+z[2]*
+         x[3]*y[0]+x[2]*y[3]*z[1]+x[0]*y[3]*z[7]-x[2]*y[1]*z[3]+y[1]*x[4]*z[0]+y[1]*x[0]
+         *z[2]-z[1]*x[2]*y[6]+y[2]*x[3]*z[6]-y[1]*x[2]*z[0]+z[1]*x[3]*y[0]-x[1]*y[2]*z
+         [6]-x[2]*y[3]*z[6]+x[0]*y[3]*z[4]+z[0]*x[3]*y[4]+s7;
+    s8 = x[5]*y[4]*z[7]+s6+y[5]*x[6]*z[4]-y[5]*x[4]*z[6]+z[6]*x[5]*y[7]-x[6]*
+         y[2]*z[7]-x[6]*y[7]*z[5]+x[5]*y[6]*z[2]+x[6]*y[5]*z[7]+x[6]*y[7]*z[2]+y[6]*x[7]
+         *z[4]-y[6]*x[4]*z[7]-y[6]*x[7]*z[3]+z[6]*x[7]*y[2]+x[2]*y[5]*z[6]-x[2]*y[6]*z
+         [5]+y[6]*x[2]*z[7]+x[6]*y[2]*z[5];
+    s7 = s8-x[5]*y[2]*z[6]-z[6]*x[7]*y[5]-z[5]*x[7]*y[4]+z[5]*x[0]*y[4]-y[5]*
+         x[4]*z[7]+y[0]*x[4]*z[7]-z[6]*x[2]*y[7]-x[5]*y[4]*z[0]-x[5]*y[7]*z[4]-y[0]*x[7]
+         *z[4]+y[5]*x[4]*z[1]-x[6]*y[7]*z[4]+x[7]*y[4]*z[3]-x[4]*y[7]*z[3]+x[3]*y[7]*z
+         [4]-x[7]*y[3]*z[4]-x[6]*y[3]*z[7]+x[6]*y[4]*z[7];
+    s8 = -x[3]*y[4]*z[7]+x[4]*y[3]*z[7]-z[6]*x[7]*y[4]-z[1]*x[6]*y[5]+x[6]*y
+         [7]*z[3]-x[1]*y[6]*z[5]-y[1]*x[5]*z[6]+z[5]*x[4]*y[7]-z[5]*x[4]*y[0]+x[1]*y[5]*
+         z[6]-y[6]*x[5]*z[7]-y[2]*x[3]*z[1]+z[1]*x[5]*y[6]-y[5]*x[1]*z[4]+z[6]*x[4]*y[7]
+         +x[5]*y[1]*z[4]-x[5]*y[6]*z[4]+y[6]*x[3]*z[7]-x[5]*y[4]*z[1];
+    s5 = s8+x[5]*y[4]*z[6]+z[5]*x[1]*y[4]+y[1]*x[6]*z[5]-z[6]*x[3]*y[7]+z[6]*
+         x[7]*y[3]-z[5]*x[6]*y[4]-z[5]*x[4]*y[1]+z[5]*x[4]*y[6]+x[1]*y[6]*z[2]+x[2]*y[6]
+         *z[3]+z[2]*x[6]*y[3]+z[1]*x[6]*y[2]+z[2]*x[3]*y[1]-z[2]*x[1]*y[3]-z[2]*x[3]*y
+         [6]+y[2]*x[1]*z[3]+y[1]*x[2]*z[6]-z[0]*x[7]*y[3]+s7;
+    s4 = 1/s5;
+    s2 = s3*s4;
+    const double unknown1 = s1*s2;
+    s1 = 1.0/6.0;
+    s8 = -z[2]*x[1]*y[2]*z[5]+z[2]*y[1]*x[2]*z[5]-z[2]*z[1]*x[2]*y[5]+z[2]*z
+         [1]*x[5]*y[2]+2.0*y[5]*x[7]*z[4]*z[4]-y[1]*x[2]*z[0]*z[0]+x[0]*y[3]*z[7]*z[7]
+         -2.0*z[5]*z[5]*x[4]*y[1]+2.0*z[5]*z[5]*x[1]*y[4]+z[5]*z[5]*x[0]*y[4]-2.0*z[2]*z
+         [2]*x[1]*y[3]+2.0*z[2]*z[2]*x[3]*y[1]-x[0]*y[4]*z[7]*z[7]-y[0]*x[3]*z[7]*z[7]+x
+         [1]*y[0]*z[5]*z[5];
+    s7 = s8-y[1]*x[0]*z[5]*z[5]+z[1]*y[1]*x[2]*z[6]+y[1]*x[0]*z[2]*z[2]+z[2]*
+         z[2]*x[3]*y[0]-z[2]*z[2]*x[0]*y[3]-x[1]*y[0]*z[2]*z[2]+2.0*z[5]*z[5]*x[4]*y[6]
+         -2.0*z[5]*z[5]*x[6]*y[4]-z[5]*z[5]*x[7]*y[4]-x[6]*y[7]*z[5]*z[5]+2.0*z[2]*y[1]*
+         x[2]*z[6]-2.0*z[2]*x[1]*y[2]*z[6]+2.0*z[2]*z[1]*x[6]*y[2]-y[6]*x[5]*z[7]*z[7]+
+         2.0*x[6]*y[4]*z[7]*z[7];
+    s8 = -2.0*y[6]*x[4]*z[7]*z[7]+x[6]*y[5]*z[7]*z[7]-2.0*z[2]*z[1]*x[2]*y[6]
+         +z[4]*y[6]*x[7]*z[5]+x[5]*y[4]*z[6]*z[6]+z[6]*z[6]*x[4]*y[7]-z[6]*z[6]*x[7]*y
+         [4]-2.0*z[6]*z[6]*x[7]*y[5]+2.0*z[6]*z[6]*x[5]*y[7]-y[5]*x[4]*z[6]*z[6]+2.0*z
+         [0]*z[0]*x[3]*y[4]-x[6]*y[5]*z[2]*z[2]+z[1]*z[1]*x[5]*y[6]-z[1]*z[1]*x[6]*y[5]-
+         z[5]*z[5]*x[4]*y[0];
+    s6 = s8+2.0*x[1]*y[3]*z[0]*z[0]+2.0*x[1]*y[6]*z[2]*z[2]-2.0*y[1]*x[6]*z
+         [2]*z[2]-y[1]*x[5]*z[2]*z[2]-z[1]*z[1]*x[2]*y[6]-2.0*z[1]*z[1]*x[2]*y[5]+2.0*z
+         [1]*z[1]*x[5]*y[2]+z[1]*y[1]*x[6]*z[5]+y[1]*x[2]*z[5]*z[5]+z[2]*z[1]*x[2]*y[0]+
+         z[1]*x[1]*y[5]*z[6]-z[1]*x[1]*y[6]*z[5]-z[1]*y[1]*x[5]*z[6]-z[1]*x[2]*y[6]*z[5]
+         +z[1]*x[6]*y[2]*z[5]+s7;
+    s8 = -x[1]*y[2]*z[5]*z[5]+z[1]*x[5]*y[6]*z[2]-2.0*z[2]*z[2]*x[3]*y[6]+2.0
+         *z[2]*z[2]*x[6]*y[3]+z[2]*z[2]*x[7]*y[3]-z[2]*z[2]*x[3]*y[7]-z[1]*x[6]*y[5]*z
+         [2]+2.0*z[1]*x[1]*y[5]*z[2]-2.0*x[3]*y[4]*z[7]*z[7]+2.0*x[4]*y[3]*z[7]*z[7]+x
+         [5]*y[6]*z[2]*z[2]+y[1]*x[2]*z[6]*z[6]+y[0]*x[4]*z[7]*z[7]+z[2]*x[2]*y[3]*z[0]-
+         x[1]*y[2]*z[6]*z[6];
+    s7 = s8-z[7]*z[2]*x[3]*y[7]+x[2]*y[6]*z[3]*z[3]-y[2]*x[6]*z[3]*z[3]-z[6]*
+         x[2]*y[3]*z[7]-z[2]*z[1]*x[0]*y[2]+z[6]*z[2]*x[6]*y[3]-z[6]*z[2]*x[3]*y[6]+z[6]
+         *x[2]*y[6]*z[3]+z[2]*x[1]*y[2]*z[0]+z[6]*y[2]*x[3]*z[7]-z[4]*z[5]*x[6]*y[4]+z
+         [4]*z[5]*x[4]*y[6]-z[4]*y[6]*x[5]*z[7]+z[4]*z[6]*x[4]*y[7]+z[4]*x[5]*y[4]*z[6];
+    s8 = -z[6]*y[2]*x[6]*z[3]-z[4]*y[5]*x[4]*z[6]-z[2]*y[1]*x[5]*z[6]+z[2]*x
+         [1]*y[5]*z[6]+z[4]*x[6]*y[4]*z[7]+2.0*z[4]*z[5]*x[4]*y[7]-z[4]*z[6]*x[7]*y[4]+x
+         [6]*y[7]*z[3]*z[3]-2.0*z[4]*z[5]*x[7]*y[4]-2.0*z[4]*y[5]*x[4]*z[7]-z[4]*y[6]*x
+         [4]*z[7]+z[4]*x[6]*y[5]*z[7]-z[4]*x[6]*y[7]*z[5]+2.0*z[4]*x[5]*y[4]*z[7]+z[2]*x
+         [2]*y[5]*z[6]-z[2]*x[2]*y[6]*z[5];
+    s5 = s8+z[2]*x[6]*y[2]*z[5]-z[2]*x[5]*y[2]*z[6]-z[2]*x[2]*y[3]*z[7]-x[2]*
+         y[3]*z[7]*z[7]+2.0*z[2]*x[2]*y[3]*z[1]-z[2]*y[2]*x[3]*z[0]+z[2]*y[2]*x[0]*z[3]-
+         z[2]*x[2]*y[0]*z[3]-z[7]*y[2]*x[7]*z[3]+z[7]*z[2]*x[7]*y[3]+z[7]*x[2]*y[7]*z[3]
+         +z[6]*y[1]*x[2]*z[5]-z[6]*x[1]*y[2]*z[5]+z[5]*x[1]*y[5]*z[2]+s6+s7;
+    s8 = z[5]*z[1]*x[5]*y[2]-z[5]*z[1]*x[2]*y[5]-y[6]*x[7]*z[2]*z[2]+2.0*z[2]
+         *x[2]*y[6]*z[3]-2.0*z[2]*x[2]*y[3]*z[6]+2.0*z[2]*y[2]*x[3]*z[6]+y[2]*x[3]*z[6]*
+         z[6]+y[6]*x[7]*z[5]*z[5]+z[2]*y[2]*x[3]*z[7]-z[2]*y[2]*x[7]*z[3]-2.0*z[2]*y[2]*
+         x[6]*z[3]+z[2]*x[2]*y[7]*z[3]+x[6]*y[2]*z[5]*z[5]-2.0*z[2]*x[2]*y[1]*z[3]-x[2]*
+         y[6]*z[5]*z[5];
+    s7 = s8-y[1]*x[5]*z[6]*z[6]+z[6]*x[1]*y[6]*z[2]-z[3]*z[2]*x[3]*y[6]+z[6]*
+         z[1]*x[6]*y[2]-z[6]*z[1]*x[2]*y[6]-z[6]*y[1]*x[6]*z[2]-2.0*x[5]*y[2]*z[6]*z[6]+
+         z[4]*z[1]*x[0]*y[4]-z[3]*x[2]*y[3]*z[6]-z[5]*y[1]*x[5]*z[2]+z[3]*y[2]*x[3]*z[6]
+         +2.0*x[2]*y[5]*z[6]*z[6]-z[5]*x[1]*y[5]*z[0]+y[2]*x[3]*z[7]*z[7]-x[2]*y[3]*z[6]
+         *z[6];
+    s8 = z[5]*y[5]*x[4]*z[0]+z[3]*z[2]*x[6]*y[3]+x[1]*y[5]*z[6]*z[6]+z[5]*y
+         [5]*x[7]*z[4]-z[1]*x[1]*y[2]*z[6]+z[1]*x[1]*y[6]*z[2]+2.0*z[6]*y[6]*x[7]*z[5]-z
+         [7]*y[6]*x[7]*z[2]-z[3]*y[6]*x[7]*z[2]+x[6]*y[7]*z[2]*z[2]-2.0*z[6]*y[6]*x[7]*z
+         [2]-2.0*x[6]*y[3]*z[7]*z[7]-x[6]*y[2]*z[7]*z[7]-z[5]*x[6]*y[5]*z[2]+y[6]*x[2]*z
+         [7]*z[7];
+    s6 = s8+2.0*y[6]*x[3]*z[7]*z[7]+z[6]*z[6]*x[7]*y[3]-y[6]*x[7]*z[3]*z[3]+z
+         [5]*x[5]*y[0]*z[4]+2.0*z[6]*z[6]*x[7]*y[2]-2.0*z[6]*z[6]*x[2]*y[7]-z[6]*z[6]*x
+         [3]*y[7]+z[7]*y[6]*x[7]*z[5]+z[7]*y[5]*x[7]*z[4]-2.0*z[7]*x[7]*y[3]*z[4]+2.0*z
+         [7]*x[3]*y[7]*z[4]-2.0*z[7]*x[4]*y[7]*z[3]+2.0*z[7]*x[7]*y[4]*z[3]-z[7]*y[0]*x
+         [7]*z[4]-2.0*z[7]*z[6]*x[3]*y[7]+s7;
+    s8 = s6+2.0*z[7]*z[6]*x[7]*y[3]+2.0*z[7]*x[6]*y[7]*z[3]+z[7]*x[6]*y[7]*z
+         [2]-2.0*z[7]*y[6]*x[7]*z[3]+z[7]*z[6]*x[7]*y[2]-z[7]*z[6]*x[2]*y[7]+z[5]*y[1]*x
+         [5]*z[0]-z[5]*z[1]*x[5]*y[0]+2.0*y[1]*x[6]*z[5]*z[5]-2.0*x[1]*y[6]*z[5]*z[5]+z
+         [5]*z[1]*x[0]*y[5]+z[6]*y[6]*x[3]*z[7]+2.0*z[6]*x[6]*y[7]*z[2]-z[6]*y[6]*x[7]*z
+         [3];
+    s7 = s8+2.0*z[6]*y[6]*x[2]*z[7]-z[6]*x[6]*y[3]*z[7]+z[6]*x[6]*y[7]*z[3]
+         -2.0*z[6]*x[6]*y[2]*z[7]-2.0*z[1]*y[1]*x[5]*z[2]-z[1]*y[1]*x[6]*z[2]-z[7]*z[0]*
+         x[7]*y[3]-2.0*z[6]*x[6]*y[5]*z[2]-z[2]*z[6]*x[3]*y[7]+z[2]*x[6]*y[7]*z[3]-z[2]*
+         z[6]*x[2]*y[7]+y[5]*x[6]*z[4]*z[4]+z[2]*y[6]*x[2]*z[7]+y[6]*x[7]*z[4]*z[4]+z[2]
+         *z[6]*x[7]*y[2]-2.0*x[5]*y[7]*z[4]*z[4];
+    s8 = -x[6]*y[7]*z[4]*z[4]-z[5]*y[5]*x[0]*z[4]-z[2]*x[6]*y[2]*z[7]-x[5]*y
+         [6]*z[4]*z[4]-2.0*z[5]*y[1]*x[5]*z[6]+2.0*z[5]*z[1]*x[5]*y[6]+2.0*z[5]*x[1]*y
+         [5]*z[6]-2.0*z[5]*z[1]*x[6]*y[5]-z[5]*x[5]*y[2]*z[6]+z[5]*x[5]*y[6]*z[2]+z[5]*x
+         [2]*y[5]*z[6]+z[5]*z[5]*x[4]*y[7]-y[5]*x[4]*z[7]*z[7]+x[5]*y[4]*z[7]*z[7]+z[6]*
+         z[1]*x[5]*y[6]+z[6]*y[1]*x[6]*z[5];
+    s4 = s8-z[6]*z[1]*x[6]*y[5]-z[6]*x[1]*y[6]*z[5]+z[2]*z[6]*x[7]*y[3]+2.0*z
+         [6]*x[6]*y[2]*z[5]+2.0*z[6]*x[5]*y[6]*z[2]-2.0*z[6]*x[2]*y[6]*z[5]+z[7]*z[0]*x
+         [3]*y[7]+z[7]*z[0]*x[7]*y[4]+z[3]*z[6]*x[7]*y[3]-z[3]*z[6]*x[3]*y[7]-z[3]*x[6]*
+         y[3]*z[7]+z[3]*y[6]*x[2]*z[7]-z[3]*x[6]*y[2]*z[7]+z[5]*x[5]*y[4]*z[7]+s5+s7;
+    s8 = s4+z[3]*y[6]*x[3]*z[7]-z[7]*x[0]*y[7]*z[3]+z[6]*x[5]*y[4]*z[7]+z[7]*
+         y[0]*x[7]*z[3]+z[5]*z[6]*x[4]*y[7]-2.0*z[5]*x[5]*y[6]*z[4]+2.0*z[5]*x[5]*y[4]*z
+         [6]-z[5]*x[5]*y[7]*z[4]-z[5]*y[6]*x[5]*z[7]-z[5]*z[6]*x[7]*y[4]-z[7]*z[0]*x[4]*
+         y[7]-z[5]*z[6]*x[7]*y[5]-z[5]*y[5]*x[4]*z[7]+z[7]*x[0]*y[7]*z[4];
+    s7 = s8-2.0*z[5]*y[5]*x[4]*z[6]+z[5]*z[6]*x[5]*y[7]+z[5]*x[6]*y[5]*z[7]+
+         2.0*z[5]*y[5]*x[6]*z[4]+z[6]*z[5]*x[4]*y[6]-z[6]*x[5]*y[6]*z[4]-z[6]*z[5]*x[6]*
+         y[4]-z[6]*x[6]*y[7]*z[4]-2.0*z[6]*y[6]*x[5]*z[7]+z[6]*x[6]*y[4]*z[7]-z[6]*y[5]*
+         x[4]*z[7]-z[6]*y[6]*x[4]*z[7]+z[6]*y[6]*x[7]*z[4]+z[6]*y[5]*x[6]*z[4]+2.0*z[6]*
+         x[6]*y[5]*z[7];
+    s8 = -2.0*z[6]*x[6]*y[7]*z[5]-z[2]*y[1]*x[2]*z[0]+2.0*z[7]*z[6]*x[4]*y[7]
+         -2.0*z[7]*x[6]*y[7]*z[4]-2.0*z[7]*z[6]*x[7]*y[4]+z[7]*z[5]*x[4]*y[7]-z[7]*z[5]*
+         x[7]*y[4]-z[7]*x[5]*y[7]*z[4]+2.0*z[7]*y[6]*x[7]*z[4]-z[7]*z[6]*x[7]*y[5]+z[7]*
+         z[6]*x[5]*y[7]-z[7]*x[6]*y[7]*z[5]+z[1]*z[1]*x[6]*y[2]+s7+x[1]*y[5]*z[2]*z[2];
+    s6 = s8+2.0*z[2]*y[2]*x[1]*z[3]-2.0*z[2]*y[2]*x[3]*z[1]-2.0*x[1]*y[4]*z
+         [0]*z[0]+2.0*y[1]*x[4]*z[0]*z[0]+2.0*x[2]*y[7]*z[3]*z[3]-2.0*y[2]*x[7]*z[3]*z
+         [3]-x[1]*y[5]*z[0]*z[0]+z[0]*z[0]*x[7]*y[4]+z[0]*z[0]*x[3]*y[7]+x[2]*y[3]*z[0]*
+         z[0]-2.0*y[1]*x[3]*z[0]*z[0]+y[5]*x[4]*z[0]*z[0]-2.0*z[0]*z[0]*x[4]*y[3]+x[1]*y
+         [2]*z[0]*z[0]-z[0]*z[0]*x[4]*y[7]+y[1]*x[5]*z[0]*z[0];
+    s8 = s6-y[2]*x[3]*z[0]*z[0]+y[1]*x[0]*z[3]*z[3]-2.0*x[0]*y[7]*z[3]*z[3]-x
+         [0]*y[4]*z[3]*z[3]-2.0*x[2]*y[0]*z[3]*z[3]-x[1]*y[0]*z[3]*z[3]+y[0]*x[4]*z[3]*z
+         [3]-2.0*z[0]*y[1]*x[0]*z[4]+2.0*z[0]*z[1]*x[0]*y[4]+2.0*z[0]*x[1]*y[0]*z[4]-2.0
+         *z[0]*z[1]*x[4]*y[0]-2.0*z[3]*x[2]*y[3]*z[7]-2.0*z[3]*z[2]*x[3]*y[7]+2.0*z[3]*z
+         [2]*x[7]*y[3];
+    s7 = s8+2.0*z[3]*y[2]*x[3]*z[7]+2.0*z[5]*y[5]*x[4]*z[1]+2.0*z[0]*y[1]*x
+         [0]*z[3]-z[0]*y[0]*x[3]*z[7]-2.0*z[0]*y[0]*x[3]*z[4]-z[0]*x[1]*y[0]*z[2]+z[0]*z
+         [1]*x[2]*y[0]-z[0]*y[1]*x[0]*z[5]-z[0]*z[1]*x[0]*y[2]-z[0]*x[0]*y[7]*z[3]-2.0*z
+         [0]*z[1]*x[0]*y[3]-z[5]*x[5]*y[4]*z[0]-2.0*z[0]*x[0]*y[4]*z[3]+z[0]*x[0]*y[7]*z
+         [4]-z[0]*z[2]*x[0]*y[3];
+    s8 = s7+z[0]*x[5]*y[0]*z[4]+z[0]*z[1]*x[0]*y[5]-z[0]*x[2]*y[0]*z[3]-z[0]*
+         z[1]*x[5]*y[0]-2.0*z[0]*x[1]*y[0]*z[3]+2.0*z[0]*y[0]*x[4]*z[3]-z[0]*x[0]*y[4]*z
+         [7]+z[0]*x[1]*y[0]*z[5]+z[0]*y[0]*x[7]*z[3]+z[0]*y[2]*x[0]*z[3]-z[0]*y[5]*x[0]*
+         z[4]+z[0]*z[2]*x[3]*y[0]+z[0]*x[2]*y[3]*z[1]+z[0]*x[0]*y[3]*z[7]-z[0]*x[2]*y[1]
+         *z[3];
+    s5 = s8+z[0]*y[1]*x[0]*z[2]+z[3]*x[1]*y[3]*z[0]-2.0*z[3]*y[0]*x[3]*z[7]-z
+         [3]*y[0]*x[3]*z[4]-z[3]*x[1]*y[0]*z[2]+z[3]*z[0]*x[7]*y[4]+2.0*z[3]*z[0]*x[3]*y
+         [7]+2.0*z[3]*x[2]*y[3]*z[0]-z[3]*y[1]*x[3]*z[0]-z[3]*z[1]*x[0]*y[3]-z[3]*z[0]*x
+         [4]*y[3]+z[3]*x[1]*y[2]*z[0]-z[3]*z[0]*x[4]*y[7]-2.0*z[3]*z[2]*x[0]*y[3]-z[3]*x
+         [0]*y[4]*z[7]-2.0*z[3]*y[2]*x[3]*z[0];
+    s8 = s5+2.0*z[3]*z[2]*x[3]*y[0]+z[3]*x[2]*y[3]*z[1]+2.0*z[3]*x[0]*y[3]*z
+         [7]+z[3]*y[1]*x[0]*z[2]-z[4]*y[0]*x[3]*z[7]-z[4]*x[1]*y[5]*z[0]-z[4]*y[1]*x[0]*
+         z[5]+2.0*z[4]*z[0]*x[7]*y[4]+z[4]*z[0]*x[3]*y[7]+2.0*z[4]*y[5]*x[4]*z[0]+2.0*y
+         [0]*x[7]*z[3]*z[3]+2.0*y[2]*x[0]*z[3]*z[3]-x[2]*y[1]*z[3]*z[3]-y[0]*x[3]*z[4]*z
+         [4];
+    s7 = s8-y[1]*x[0]*z[4]*z[4]+x[1]*y[0]*z[4]*z[4]+2.0*x[0]*y[7]*z[4]*z[4]+
+         2.0*x[5]*y[0]*z[4]*z[4]-2.0*y[5]*x[0]*z[4]*z[4]+2.0*z[1]*z[1]*x[2]*y[0]-2.0*z
+         [1]*z[1]*x[0]*y[2]+z[1]*z[1]*x[0]*y[4]-z[1]*z[1]*x[0]*y[3]-z[1]*z[1]*x[4]*y[0]+
+         2.0*z[1]*z[1]*x[0]*y[5]-2.0*z[1]*z[1]*x[5]*y[0]+x[2]*y[3]*z[1]*z[1]-x[5]*y[4]*z
+         [0]*z[0]-z[0]*z[0]*x[7]*y[3];
+    s8 = s7+x[7]*y[4]*z[3]*z[3]-x[4]*y[7]*z[3]*z[3]+y[2]*x[1]*z[3]*z[3]+x[0]*
+         y[3]*z[4]*z[4]-2.0*y[0]*x[7]*z[4]*z[4]+x[3]*y[7]*z[4]*z[4]-x[7]*y[3]*z[4]*z[4]-
+         y[5]*x[1]*z[4]*z[4]+x[5]*y[1]*z[4]*z[4]+z[1]*z[1]*x[3]*y[0]+y[5]*x[4]*z[1]*z[1]
+         -y[2]*x[3]*z[1]*z[1]-x[5]*y[4]*z[1]*z[1]-z[4]*x[0]*y[4]*z[3]-z[4]*z[0]*x[4]*y
+         [3];
+    s6 = s8-z[4]*z[1]*x[4]*y[0]-2.0*z[4]*z[0]*x[4]*y[7]+z[4]*y[1]*x[5]*z[0]
+         -2.0*z[5]*x[5]*y[4]*z[1]-z[4]*x[1]*y[4]*z[0]+z[4]*y[0]*x[4]*z[3]-2.0*z[4]*x[0]*
+         y[4]*z[7]+z[4]*x[1]*y[0]*z[5]-2.0*z[1]*x[1]*y[2]*z[5]+z[4]*x[0]*y[3]*z[7]+2.0*z
+         [5]*x[5]*y[1]*z[4]+z[4]*y[1]*x[4]*z[0]+z[1]*y[1]*x[0]*z[3]+z[1]*x[1]*y[3]*z[0]
+         -2.0*z[1]*x[1]*y[5]*z[0]-2.0*z[1]*x[1]*y[0]*z[2];
+    s8 = s6-2.0*z[1]*y[1]*x[0]*z[5]-z[1]*y[1]*x[0]*z[4]+2.0*z[1]*y[1]*x[2]*z
+         [5]-z[1]*y[1]*x[3]*z[0]-2.0*z[5]*y[5]*x[1]*z[4]+z[1]*y[5]*x[4]*z[0]+z[1]*x[1]*y
+         [0]*z[4]+2.0*z[1]*x[1]*y[2]*z[0]-z[1]*z[2]*x[0]*y[3]+2.0*z[1]*y[1]*x[5]*z[0]-z
+         [1]*x[1]*y[0]*z[3]-z[1]*x[1]*y[4]*z[0]+2.0*z[1]*x[1]*y[0]*z[5]-z[1]*y[2]*x[3]*z
+         [0];
+    s7 = s8+z[1]*z[2]*x[3]*y[0]-z[1]*x[2]*y[1]*z[3]+z[1]*y[1]*x[4]*z[0]+2.0*z
+         [1]*y[1]*x[0]*z[2]+2.0*z[0]*z[1]*x[3]*y[0]+2.0*z[0]*x[0]*y[3]*z[4]+z[0]*z[5]*x
+         [0]*y[4]+z[0]*y[0]*x[4]*z[7]-z[0]*y[0]*x[7]*z[4]-z[0]*x[7]*y[3]*z[4]-z[0]*z[5]*
+         x[4]*y[0]-z[0]*x[5]*y[4]*z[1]+z[3]*z[1]*x[3]*y[0]+z[3]*x[0]*y[3]*z[4]+z[3]*z[0]
+         *x[3]*y[4]+z[3]*y[0]*x[4]*z[7];
+    s8 = s7+z[3]*x[3]*y[7]*z[4]-z[3]*x[7]*y[3]*z[4]-z[3]*x[3]*y[4]*z[7]+z[3]*
+         x[4]*y[3]*z[7]-z[3]*y[2]*x[3]*z[1]+z[3]*z[2]*x[3]*y[1]-z[3]*z[2]*x[1]*y[3]-2.0*
+         z[3]*z[0]*x[7]*y[3]+z[4]*z[0]*x[3]*y[4]+2.0*z[4]*z[5]*x[0]*y[4]+2.0*z[4]*y[0]*x
+         [4]*z[7]-2.0*z[4]*x[5]*y[4]*z[0]+z[4]*y[5]*x[4]*z[1]+z[4]*x[7]*y[4]*z[3]-z[4]*x
+         [4]*y[7]*z[3];
+    s3 = s8-z[4]*x[3]*y[4]*z[7]+z[4]*x[4]*y[3]*z[7]-2.0*z[4]*z[5]*x[4]*y[0]-z
+         [4]*x[5]*y[4]*z[1]+z[4]*z[5]*x[1]*y[4]-z[4]*z[5]*x[4]*y[1]-2.0*z[1]*y[1]*x[2]*z
+         [0]+z[1]*z[5]*x[0]*y[4]-z[1]*z[5]*x[4]*y[0]-z[1]*y[5]*x[1]*z[4]+z[1]*x[5]*y[1]*
+         z[4]+z[1]*z[5]*x[1]*y[4]-z[1]*z[5]*x[4]*y[1]+z[1]*z[2]*x[3]*y[1]-z[1]*z[2]*x[1]
+         *y[3]+z[1]*y[2]*x[1]*z[3];
+    s8 = y[1]*x[0]*z[3]+x[1]*y[3]*z[0]-y[0]*x[3]*z[7]-x[1]*y[5]*z[0]-y[0]*x
+         [3]*z[4]-x[1]*y[0]*z[2]+z[1]*x[2]*y[0]-y[1]*x[0]*z[5]-z[1]*x[0]*y[2]-y[1]*x[0]*
+         z[4]+z[1]*x[5]*y[2]+z[0]*x[7]*y[4]+z[0]*x[3]*y[7]+z[1]*x[0]*y[4]-x[1]*y[2]*z[5]
+         +x[2]*y[3]*z[0]+y[1]*x[2]*z[5]-x[2]*y[3]*z[7];
+    s7 = s8-z[1]*x[2]*y[5]-y[1]*x[3]*z[0]-x[0]*y[7]*z[3]-z[1]*x[0]*y[3]+y[5]*
+         x[4]*z[0]-x[0]*y[4]*z[3]+y[5]*x[7]*z[4]-z[0]*x[4]*y[3]+x[1]*y[0]*z[4]-z[2]*x[3]
+         *y[7]-y[6]*x[7]*z[2]+x[1]*y[5]*z[2]+y[6]*x[7]*z[5]+x[0]*y[7]*z[4]+x[1]*y[2]*z
+         [0]-z[1]*x[4]*y[0]-z[0]*x[4]*y[7]-z[2]*x[0]*y[3];
+    s8 = x[5]*y[0]*z[4]+z[1]*x[0]*y[5]-x[2]*y[0]*z[3]-z[1]*x[5]*y[0]+y[1]*x
+         [5]*z[0]-x[1]*y[0]*z[3]-x[1]*y[4]*z[0]-y[1]*x[5]*z[2]+x[2]*y[7]*z[3]+y[0]*x[4]*
+         z[3]-x[0]*y[4]*z[7]+x[1]*y[0]*z[5]-y[1]*x[6]*z[2]-y[2]*x[6]*z[3]+y[0]*x[7]*z[3]
+         -y[2]*x[7]*z[3]+z[2]*x[7]*y[3]+y[2]*x[0]*z[3];
+    s6 = s8+y[2]*x[3]*z[7]-y[2]*x[3]*z[0]-x[6]*y[5]*z[2]-y[5]*x[0]*z[4]+z[2]*
+         x[3]*y[0]+x[2]*y[3]*z[1]+x[0]*y[3]*z[7]-x[2]*y[1]*z[3]+y[1]*x[4]*z[0]+y[1]*x[0]
+         *z[2]-z[1]*x[2]*y[6]+y[2]*x[3]*z[6]-y[1]*x[2]*z[0]+z[1]*x[3]*y[0]-x[1]*y[2]*z
+         [6]-x[2]*y[3]*z[6]+x[0]*y[3]*z[4]+z[0]*x[3]*y[4]+s7;
+    s8 = x[5]*y[4]*z[7]+s6+y[5]*x[6]*z[4]-y[5]*x[4]*z[6]+z[6]*x[5]*y[7]-x[6]*
+         y[2]*z[7]-x[6]*y[7]*z[5]+x[5]*y[6]*z[2]+x[6]*y[5]*z[7]+x[6]*y[7]*z[2]+y[6]*x[7]
+         *z[4]-y[6]*x[4]*z[7]-y[6]*x[7]*z[3]+z[6]*x[7]*y[2]+x[2]*y[5]*z[6]-x[2]*y[6]*z
+         [5]+y[6]*x[2]*z[7]+x[6]*y[2]*z[5];
+    s7 = s8-x[5]*y[2]*z[6]-z[6]*x[7]*y[5]-z[5]*x[7]*y[4]+z[5]*x[0]*y[4]-y[5]*
+         x[4]*z[7]+y[0]*x[4]*z[7]-z[6]*x[2]*y[7]-x[5]*y[4]*z[0]-x[5]*y[7]*z[4]-y[0]*x[7]
+         *z[4]+y[5]*x[4]*z[1]-x[6]*y[7]*z[4]+x[7]*y[4]*z[3]-x[4]*y[7]*z[3]+x[3]*y[7]*z
+         [4]-x[7]*y[3]*z[4]-x[6]*y[3]*z[7]+x[6]*y[4]*z[7];
+    s8 = -x[3]*y[4]*z[7]+x[4]*y[3]*z[7]-z[6]*x[7]*y[4]-z[1]*x[6]*y[5]+x[6]*y
+         [7]*z[3]-x[1]*y[6]*z[5]-y[1]*x[5]*z[6]+z[5]*x[4]*y[7]-z[5]*x[4]*y[0]+x[1]*y[5]*
+         z[6]-y[6]*x[5]*z[7]-y[2]*x[3]*z[1]+z[1]*x[5]*y[6]-y[5]*x[1]*z[4]+z[6]*x[4]*y[7]
+         +x[5]*y[1]*z[4]-x[5]*y[6]*z[4]+y[6]*x[3]*z[7]-x[5]*y[4]*z[1];
+    s5 = s8+x[5]*y[4]*z[6]+z[5]*x[1]*y[4]+y[1]*x[6]*z[5]-z[6]*x[3]*y[7]+z[6]*
+         x[7]*y[3]-z[5]*x[6]*y[4]-z[5]*x[4]*y[1]+z[5]*x[4]*y[6]+x[1]*y[6]*z[2]+x[2]*y[6]
+         *z[3]+z[2]*x[6]*y[3]+z[1]*x[6]*y[2]+z[2]*x[3]*y[1]-z[2]*x[1]*y[3]-z[2]*x[3]*y
+         [6]+y[2]*x[1]*z[3]+y[1]*x[2]*z[6]-z[0]*x[7]*y[3]+s7;
+    s4 = 1/s5;
+    s2 = s3*s4;
+    const double unknown2 = s1*s2;
+
+    return Point<3> (unknown0, unknown1, unknown2);
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  Point<spacedim>
+  barycenter (const TriaAccessor<structdim, dim, spacedim> &)
+  {
+    // this function catches all the cases not
+    // explicitly handled above
+    Assert (false, ExcNotImplemented());
+    return Point<spacedim>();
+  }
+
+
+
+
+  template <int dim, int spacedim>
+  double
+  measure (const TriaAccessor<1, dim, spacedim> &accessor)
+  {
+    // remember that we use (dim-)linear
+    // mappings
+    return (accessor.vertex(1)-accessor.vertex(0)).norm();
+  }
+
+
+
+  double
+  measure (const TriaAccessor<2,2,2> &accessor)
+  {
+    // the evaluation of the formulae
+    // is a bit tricky when done dimension
+    // independently, so we write this function
+    // for 2D and 3D separately
+    /*
+      Get the computation of the measure by this little Maple script. We
+      use the blinear mapping of the unit quad to the real quad. However,
+      every transformation mapping the unit faces to straight lines should
+      do.
+
+      Remember that the area of the quad is given by
+      \int_K 1 dx dy  = \int_{\hat K} |det J| d(xi) d(eta)
+
+      # x and y are arrays holding the x- and y-values of the four vertices
+      # of this cell in real space.
+      x := array(0..3);
+      y := array(0..3);
+      tphi[0] := (1-xi)*(1-eta):
+      tphi[1] :=     xi*(1-eta):
+      tphi[2] := (1-xi)*eta:
+      tphi[3] :=     xi*eta:
+      x_real := sum(x[s]*tphi[s], s=0..3):
+      y_real := sum(y[s]*tphi[s], s=0..3):
+      detJ := diff(x_real,xi)*diff(y_real,eta) - diff(x_real,eta)*diff(y_real,xi):
+
+      measure := simplify ( int ( int (detJ, xi=0..1), eta=0..1)):
+      readlib(C):
+
+      C(measure, optimized);
+
+      additional optimizaton: divide by 2 only one time
+    */
+
+    const double x[4] = { accessor.vertex(0)(0),
+                          accessor.vertex(1)(0),
+                          accessor.vertex(2)(0),
+                          accessor.vertex(3)(0)
+                        };
+    const double y[4] = { accessor.vertex(0)(1),
+                          accessor.vertex(1)(1),
+                          accessor.vertex(2)(1),
+                          accessor.vertex(3)(1)
+                        };
+
+    return (-x[1]*y[0]+x[1]*y[3]+y[0]*x[2]+x[0]*y[1]-x[0]*y[2]-y[1]*x[3]-x[2]*y[3]+x[3]*y[2])/2;
+  }
+
+
+  double
+  measure (const TriaAccessor<3, 3, 3> &accessor)
+  {
+    unsigned int vertex_indices[GeometryInfo<3>::vertices_per_cell];
+    for (unsigned int i=0; i<GeometryInfo<3>::vertices_per_cell; ++i)
+      vertex_indices[i] = accessor.vertex_index(i);
+
+    return GridTools::cell_measure<3>(accessor.get_triangulation().get_vertices(),
+                                      vertex_indices);
+  }
+
+
+  // a 2d face in 3d space
+  double measure (const dealii::TriaAccessor<2,3,3> &accessor)
+  {
+    // If the face is planar, the diagonal from vertex 0 to vertex 3,
+    // v_03, should be in the plane P_012 of vertices 0, 1 and 2.  Get
+    // the normal vector of P_012 and test if v_03 is orthogonal to
+    // that. If so, the face is planar and computing its area is simple.
+    const Tensor<1,3> v01 = accessor.vertex(1) - accessor.vertex(0);
+    const Tensor<1,3> v02 = accessor.vertex(2) - accessor.vertex(0);
+
+    Tensor<1,3> normal = cross_product_3d(v01, v02);
+
+    const Tensor<1,3> v03 = accessor.vertex(3) - accessor.vertex(0);
+
+    // check whether v03 does not lie in the plane of v01 and v02
+    // (i.e., whether the face is not planar). we do so by checking
+    // whether the triple product (v01 x v02) * v03 forms a positive
+    // volume relative to |v01|*|v02|*|v03|. the test checks the
+    // squares of these to avoid taking norms/square roots:
+    if (std::abs((v03 * normal) * (v03 * normal) /
+                 ((v03 * v03) * (v01 * v01) * (v02 * v02)))
+        >=
+        1e-24)
+      {
+        Assert (false,
+                ExcMessage("Computing the measure of a nonplanar face is not implemented!"));
+        return std::numeric_limits<double>::quiet_NaN();
+      }
+
+    // the face is planar. then its area is 1/2 of the norm of the
+    // cross product of the two diagonals
+    const Tensor<1,3> v12 = accessor.vertex(2) - accessor.vertex(1);
+    Tensor<1,3> twice_area = cross_product_3d(v03, v12);
+    return 0.5 * twice_area.norm();
+  }
+
+
+
+  template <int structdim, int dim, int spacedim>
+  double
+  measure (const TriaAccessor<structdim, dim, spacedim> &)
+  {
+    // catch-all for all cases not explicitly
+    // listed above
+    Assert (false, ExcNotImplemented());
+    return std::numeric_limits<double>::quiet_NaN();
+  }
+
+
+  template <int dim, int spacedim>
+  Point<spacedim> get_new_point_on_object(const TriaAccessor<1, dim, spacedim> &obj)
+  {
+    TriaIterator<TriaAccessor<1,dim,spacedim> > it(obj);
+    return obj.get_manifold().get_new_point_on_line(it);
+  }
+
+  template <int dim, int spacedim>
+  Point<spacedim> get_new_point_on_object(const TriaAccessor<2, dim, spacedim> &obj)
+  {
+    TriaIterator<TriaAccessor<2,dim,spacedim> > it(obj);
+    return obj.get_manifold().get_new_point_on_quad(it);
+  }
+
+  template <int dim, int spacedim>
+  Point<spacedim> get_new_point_on_object(const TriaAccessor<3, dim, spacedim> &obj)
+  {
+    TriaIterator<TriaAccessor<3,dim,spacedim> > it(obj);
+    return obj.get_manifold().get_new_point_on_hex(it);
+  }
+
+  template <int structdim, int dim, int spacedim>
+  Point<spacedim> get_new_point_on_object(const TriaAccessor<structdim, dim, spacedim> &obj,
+                                          const bool use_laplace)
+  {
+    if (use_laplace == false)
+      return get_new_point_on_object(obj);
+    else
+      {
+        TriaRawIterator<TriaAccessor<structdim, dim, spacedim> > it(obj);
+        Quadrature<spacedim> quadrature = Manifolds::get_default_quadrature(it, use_laplace);
+        return obj.get_manifold().get_new_point(quadrature);
+      }
+  }
+}
+
+
+
+/*------------------------ Static variables: TriaAccessorBase ---------------------------*/
+
+template <int structdim, int dim, int spacedim>
+const unsigned int TriaAccessorBase<structdim, dim, spacedim>::dimension;
+
+template <int structdim, int dim, int spacedim>
+const unsigned int TriaAccessorBase<structdim, dim, spacedim>::space_dimension;
+
+template <int structdim, int dim, int spacedim>
+const unsigned int TriaAccessorBase<structdim, dim, spacedim>::structure_dimension;
+
+
+/*------------------------ Functions: TriaAccessor ---------------------------*/
+
+template <int structdim, int dim, int spacedim>
+void
+TriaAccessor<structdim, dim, spacedim>::
+set (const internal::Triangulation::TriaObject<structdim> &object) const
+{
+  this->objects().cells[this->present_index] = object;
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+Point<spacedim>
+TriaAccessor<structdim, dim, spacedim>::
+barycenter () const
+{
+  // call the function in the anonymous
+  // namespace above
+  return dealii::barycenter (*this);
+}
+
+
+
+template <int structdim, int dim, int spacedim>
+double
+TriaAccessor<structdim, dim, spacedim>::
+measure () const
+{
+  // call the function in the anonymous
+  // namespace above
+  return dealii::measure (*this);
+}
+
+
+
+template <>
+double TriaAccessor<1,1,1>::extent_in_direction(const unsigned int axis) const
+{
+  (void)axis;
+  Assert (axis == 0, ExcIndexRange (axis, 0, 1));
+
+  return this->diameter();
+}
+
+
+template <>
+double TriaAccessor<1,1,2>::extent_in_direction(const unsigned int axis) const
+{
+  (void)axis;
+  Assert (axis == 0, ExcIndexRange (axis, 0, 1));
+
+  return this->diameter();
+}
+
+
+template <>
+double TriaAccessor<2,2,2>::extent_in_direction(const unsigned int axis) const
+{
+  const unsigned int lines[2][2] = {{2,3}, /// Lines along x-axis, see GeometryInfo
+    {0,1}
+  };/// Lines along y-axis
+
+  Assert (axis < 2, ExcIndexRange (axis, 0, 2));
+
+  return std::max(this->line(lines[axis][0])->diameter(),
+                  this->line(lines[axis][1])->diameter());
+}
+
+template <>
+double TriaAccessor<2,2,3>::extent_in_direction(const unsigned int axis) const
+{
+  const unsigned int lines[2][2] = {{2,3}, /// Lines along x-axis, see GeometryInfo
+    {0,1}
+  };/// Lines along y-axis
+
+  Assert (axis < 2, ExcIndexRange (axis, 0, 2));
+
+  return std::max(this->line(lines[axis][0])->diameter(),
+                  this->line(lines[axis][1])->diameter());
+}
+
+
+template <>
+double TriaAccessor<3,3,3>::extent_in_direction(const unsigned int axis) const
+{
+  const unsigned int lines[3][4] = {{2,3,6,7},     /// Lines along x-axis, see GeometryInfo
+    {0,1,4,5},    /// Lines along y-axis
+    {8,9,10,11}
+  }; /// Lines along z-axis
+
+  Assert (axis < 3, ExcIndexRange (axis, 0, 3));
+
+  double lengths[4] = { this->line(lines[axis][0])->diameter(),
+                        this->line(lines[axis][1])->diameter(),
+                        this->line(lines[axis][2])->diameter(),
+                        this->line(lines[axis][3])->diameter()
+                      };
+
+  return std::max(std::max(lengths[0], lengths[1]),
+                  std::max(lengths[2], lengths[3]));
+}
+
+
+// Recursively set manifold ids on hex iterators.
+template <>
+void
+TriaAccessor<3,3,3>::
+set_all_manifold_ids (const types::manifold_id manifold_ind) const
+{
+  set_manifold_id (manifold_ind);
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->set_all_manifold_ids (manifold_ind);
+
+  // for hexes also set manifold_id
+  // of bounding quads and lines
+
+  // Six bonding quads
+  for (unsigned int i=0; i<6; ++i)
+    this->quad(i)->set_manifold_id(manifold_ind);
+  // Twelve bounding lines
+  for (unsigned int i=0; i<12; ++i)
+    this->line(i)->set_manifold_id (manifold_ind);
+}
+
+
+template <int structdim, int dim, int spacedim>
+Point<spacedim>
+TriaAccessor<structdim, dim, spacedim>::intermediate_point (const Point<structdim> &coordinates) const
+{
+  // We use an FE_Q<structdim>(1) to extract the "weights" of each
+  // vertex, used to get a point from the manifold.
+  static FE_Q<structdim> fe(1);
+
+  // Surrounding points and weights.
+  std::vector<Point<spacedim> > p(GeometryInfo<structdim>::vertices_per_cell);
+  std::vector<double>   w(GeometryInfo<structdim>::vertices_per_cell);
+
+  for (unsigned int i=0; i<GeometryInfo<structdim>::vertices_per_cell; ++i)
+    {
+      p[i] = this->vertex(i);
+      w[i] = fe.shape_value(i, coordinates);
+    }
+
+  Quadrature<spacedim> quadrature(p, w);
+  return this->get_manifold().get_new_point(quadrature);
+}
+
+
+template <int structdim, int dim, int spacedim>
+Point<spacedim>
+TriaAccessor<structdim, dim, spacedim>::center (const bool respect_manifold,
+                                                const bool use_laplace) const
+{
+  if (respect_manifold == false)
+    {
+      Assert(use_laplace == false, ExcNotImplemented());
+      Point<spacedim> p;
+      for (unsigned int v=0; v<GeometryInfo<structdim>::vertices_per_cell; ++v)
+        p += vertex(v);
+      return p/GeometryInfo<structdim>::vertices_per_cell;
+    }
+  else
+    return get_new_point_on_object(*this, use_laplace);
+}
+
+
+/*------------------------ Functions: CellAccessor<1> -----------------------*/
+
+
+
+template <>
+bool CellAccessor<1>::point_inside (const Point<1> &p) const
+{
+  return (this->vertex(0)[0] <= p[0]) && (p[0] <= this->vertex(1)[0]);
+}
+
+
+
+
+
+
+/*------------------------ Functions: CellAccessor<2> -----------------------*/
+
+
+
+template <>
+bool CellAccessor<2>::point_inside (const Point<2> &p) const
+{
+  // we check whether the point is
+  // inside the cell by making sure
+  // that it on the inner side of
+  // each line defined by the faces,
+  // i.e. for each of the four faces
+  // we take the line that connects
+  // the two vertices and subdivide
+  // the whole domain by that in two
+  // and check whether the point is
+  // on the `cell-side' (rather than
+  // the `out-side') of this line. if
+  // the point is on the `cell-side'
+  // for all four faces, it must be
+  // inside the cell.
+
+  // we want the faces in counter
+  // clockwise orientation
+  static const int direction[4]= {-1,1,1,-1};
+  for (unsigned int f=0; f<4; ++f)
+    {
+      // vector from the first vertex
+      // of the line to the point
+      const Tensor<1,2> to_p = p-this->vertex(
+                                 GeometryInfo<2>::face_to_cell_vertices(f,0));
+      // vector describing the line
+      const Tensor<1,2> face = direction[f]*(
+                                 this->vertex(GeometryInfo<2>::face_to_cell_vertices(f,1)) -
+                                 this->vertex(GeometryInfo<2>::face_to_cell_vertices(f,0)));
+
+      // if we rotate the face vector
+      // by 90 degrees to the left
+      // (i.e. it points to the
+      // inside) and take the scalar
+      // product with the vector from
+      // the vertex to the point,
+      // then the point is in the
+      // `cell-side' if the scalar
+      // product is positive. if this
+      // is not the case, we can be
+      // sure that the point is
+      // outside
+      if ((-face[1]*to_p[0]+face[0]*to_p[1])<0)
+        return false;
+    };
+
+  // if we arrived here, then the
+  // point is inside for all four
+  // faces, and thus inside
+  return true;
+}
+
+
+
+
+
+
+
+/*------------------------ Functions: CellAccessor<3> -----------------------*/
+
+
+
+template <>
+bool CellAccessor<3>::point_inside (const Point<3> &p) const
+{
+  // original implementation by Joerg
+  // Weimar
+
+  // we first eliminate points based
+  // on the maximum and minimum of
+  // the corner coordinates, then
+  // transform to the unit cell, and
+  // check there.
+  const unsigned int dim = 3;
+  const unsigned int spacedim = 3;
+  Point<spacedim> maxp = this->vertex(0);
+  Point<spacedim> minp = this->vertex(0);
+
+  for (unsigned int v=1; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        maxp[d] = std::max (maxp[d],this->vertex(v)[d]);
+        minp[d] = std::min (minp[d],this->vertex(v)[d]);
+      }
+
+  // rule out points outside the
+  // bounding box of this cell
+  for (unsigned int d=0; d<dim; d++)
+    if ((p[d] < minp[d]) || (p[d] > maxp[d]))
+      return false;
+
+  // now we need to check more carefully: transform to the
+  // unit cube and check there. unfortunately, this isn't
+  // completely trivial since the transform_real_to_unit_cell
+  // function may throw an exception that indicates that the
+  // point given could not be inverted. we take this as a sign
+  // that the point actually lies outside, as also documented
+  // for that function
+  try
+    {
+      const TriaRawIterator< CellAccessor<dim,spacedim> > cell_iterator (*this);
+      return (GeometryInfo<dim>::is_inside_unit_cell
+              (StaticMappingQ1<dim,spacedim>::mapping.transform_real_to_unit_cell(cell_iterator, p)));
+    }
+  catch (const Mapping<dim,spacedim>::ExcTransformationFailed &)
+    {
+      return false;
+    }
+}
+
+
+
+
+
+/*------------------------ Functions: CellAccessor<dim,spacedim> -----------------------*/
+
+// For codim>0 we proceed as follows:
+// 1) project point onto manifold and
+// 2) transform to the unit cell with a Q1 mapping
+// 3) then check if inside unit cell
+template<int dim, int spacedim>
+template<int dim_,int spacedim_ >
+bool CellAccessor<dim,spacedim>::
+point_inside_codim(const Point<spacedim_> &p) const
+{
+  const TriaRawIterator< CellAccessor<dim_,spacedim_> > cell_iterator (*this);
+  const Point< dim_ > p_unit =
+    StaticMappingQ1<dim_,spacedim_>::mapping.transform_real_to_unit_cell(cell_iterator, p);
+
+  return GeometryInfo< dim_ >::is_inside_unit_cell(p_unit);
+
+}
+
+
+
+template <>
+bool CellAccessor<1,2>::point_inside (const Point<2> &p) const
+{
+  return point_inside_codim<1,2>(p);
+}
+
+
+template <>
+bool CellAccessor<1,3>::point_inside (const Point<3> &p) const
+{
+  return point_inside_codim<1,3>(p);
+}
+
+
+template <>
+bool CellAccessor<2,3>::point_inside (const Point<3> &p) const
+{
+  return point_inside_codim<2,3>(p);
+}
+
+
+
+template <int dim, int spacedim>
+bool CellAccessor<dim, spacedim>::at_boundary () const
+{
+  switch (dim)
+    {
+    case 1:
+      return at_boundary(0) || at_boundary(1);
+    case 2:
+      return (at_boundary(0) || at_boundary(1) ||
+              at_boundary(2) || at_boundary(3));
+    case 3:
+      return (at_boundary(0) || at_boundary(1) ||
+              at_boundary(2) || at_boundary(3) ||
+              at_boundary(4) || at_boundary(5));
+    default:
+      Assert (false, ExcNotImplemented());
+      return false;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+types::material_id CellAccessor<dim, spacedim>::material_id () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  return this->tria->levels[this->present_level]->cells.boundary_or_material_id[this->present_index].material_id;
+}
+
+
+
+template <int dim, int spacedim>
+void CellAccessor<dim, spacedim>::set_material_id (const types::material_id mat_id) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert ( mat_id < numbers::invalid_material_id, ExcIndexRange(mat_id,0,numbers::invalid_material_id));
+  this->tria->levels[this->present_level]->cells.boundary_or_material_id[this->present_index].material_id = mat_id;
+}
+
+
+
+template <int dim, int spacedim>
+void CellAccessor<dim, spacedim>::recursively_set_material_id (const types::material_id mat_id) const
+{
+  set_material_id (mat_id);
+
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_set_material_id (mat_id);
+}
+
+
+
+template <int dim, int spacedim>
+void
+CellAccessor<dim, spacedim>::set_subdomain_id (const types::subdomain_id new_subdomain_id) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (this->active(),
+          ExcMessage("set_subdomain_id() can only be called on active cells!"));
+  this->tria->levels[this->present_level]->subdomain_ids[this->present_index]
+    = new_subdomain_id;
+}
+
+
+template <int dim, int spacedim>
+types::subdomain_id CellAccessor<dim, spacedim>::level_subdomain_id () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  return this->tria->levels[this->present_level]->level_subdomain_ids[this->present_index];
+}
+
+
+
+template <int dim, int spacedim>
+void
+CellAccessor<dim, spacedim>::set_level_subdomain_id (const types::subdomain_id new_level_subdomain_id) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  this->tria->levels[this->present_level]->level_subdomain_ids[this->present_index]
+    = new_level_subdomain_id;
+}
+
+
+template <int dim, int spacedim>
+bool CellAccessor<dim, spacedim>::direction_flag () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  if (dim==spacedim)
+    return true;
+  else
+    return this->tria->levels[this->present_level]->direction_flags[this->present_index];
+}
+
+
+
+template <int dim, int spacedim>
+void
+CellAccessor<dim, spacedim>::set_direction_flag (const bool new_direction_flag) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  if (dim<spacedim)
+    this->tria->levels[this->present_level]->direction_flags[this->present_index]
+      = new_direction_flag;
+  else
+    Assert (new_direction_flag == true,
+            ExcMessage ("If dim==spacedim, direction flags are always true and "
+                        "can not be set to anything else."));
+}
+
+
+
+template <int dim, int spacedim>
+void
+CellAccessor<dim, spacedim>::set_active_cell_index (const unsigned int active_cell_index)
+{
+  // set the active cell index. allow setting it also for non-active (and unused)
+  // cells to allow resetting the index after refinement
+  this->tria->levels[this->present_level]->active_cell_indices[this->present_index]
+    = active_cell_index;
+}
+
+
+
+template <int dim, int spacedim>
+void
+CellAccessor<dim, spacedim>::set_parent (const unsigned int parent_index)
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (this->present_level > 0, TriaAccessorExceptions::ExcCellHasNoParent ());
+  this->tria->levels[this->present_level]->parents[this->present_index / 2]
+    = parent_index;
+}
+
+
+
+template <int dim, int spacedim>
+int
+CellAccessor<dim, spacedim>::
+parent_index () const
+{
+  Assert (this->present_level > 0, TriaAccessorExceptions::ExcCellHasNoParent ());
+
+  // the parent of two consecutive cells
+  // is stored only once, since it is
+  // the same
+  return this->tria->levels[this->present_level]->parents[this->present_index / 2];
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int
+CellAccessor<dim, spacedim>::
+active_cell_index () const
+{
+  Assert (this->has_children()==false, TriaAccessorExceptions::ExcCellNotActive());
+  return this->tria->levels[this->present_level]->active_cell_indices[this->present_index];
+}
+
+
+
+template <int dim, int spacedim>
+TriaIterator<CellAccessor<dim,spacedim> >
+CellAccessor<dim, spacedim>::parent () const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (this->present_level > 0, TriaAccessorExceptions::ExcCellHasNoParent ());
+  TriaIterator<CellAccessor<dim,spacedim> >
+  q (this->tria, this->present_level-1, parent_index ());
+
+  return q;
+}
+
+
+template <int dim, int spacedim>
+void
+CellAccessor<dim, spacedim>::
+recursively_set_subdomain_id (const types::subdomain_id new_subdomain_id) const
+{
+  if (this->has_children())
+    for (unsigned int c=0; c<this->n_children(); ++c)
+      this->child(c)->recursively_set_subdomain_id (new_subdomain_id);
+  else
+    set_subdomain_id (new_subdomain_id);
+}
+
+
+
+template <int dim, int spacedim>
+void CellAccessor<dim, spacedim>::set_neighbor (const unsigned int i,
+                                                const TriaIterator<CellAccessor<dim, spacedim> > &pointer) const
+{
+  AssertIndexRange (i, GeometryInfo<dim>::faces_per_cell);
+
+  if (pointer.state() == IteratorState::valid)
+    {
+      this->tria->levels[this->present_level]->
+      neighbors[this->present_index*GeometryInfo<dim>::faces_per_cell+i].first
+        = pointer->present_level;
+      this->tria->levels[this->present_level]->
+      neighbors[this->present_index*GeometryInfo<dim>::faces_per_cell+i].second
+        = pointer->present_index;
+    }
+  else
+    {
+      this->tria->levels[this->present_level]->
+      neighbors[this->present_index*GeometryInfo<dim>::faces_per_cell+i].first
+        = -1;
+      this->tria->levels[this->present_level]->
+      neighbors[this->present_index*GeometryInfo<dim>::faces_per_cell+i].second
+        = -1;
+    };
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int CellAccessor<dim,spacedim>::neighbor_of_neighbor_internal (const unsigned int neighbor) const
+{
+  AssertIndexRange (neighbor, GeometryInfo<dim>::faces_per_cell);
+
+  // if we have a 1d mesh in 1d, we
+  // can assume that the left
+  // neighbor of the right neigbor is
+  // the current cell. but that is an
+  // invariant that isn't true if the
+  // mesh is embedded in a higher
+  // dimensional space, so we have to
+  // fall back onto the generic code
+  // below
+  if ((dim==1) && (spacedim==dim))
+    return GeometryInfo<dim>::opposite_face[neighbor];
+
+  const TriaIterator<CellAccessor<dim, spacedim> > neighbor_cell = this->neighbor(neighbor);
+
+  // usually, on regular patches of
+  // the grid, this cell is just on
+  // the opposite side of the
+  // neighbor that the neighbor is of
+  // this cell. for example in 2d, if
+  // we want to know the
+  // neighbor_of_neighbor if
+  // neighbor==1 (the right
+  // neighbor), then we will get 3
+  // (the left neighbor) in most
+  // cases. look up this relationship
+  // in the table provided by
+  // GeometryInfo and try it
+  const unsigned int this_face_index=face_index(neighbor);
+
+  const unsigned int neighbor_guess
+    = GeometryInfo<dim>::opposite_face[neighbor];
+
+  if (neighbor_cell->face_index (neighbor_guess) == this_face_index)
+    return neighbor_guess;
+  else
+    // if the guess was false, then
+    // we need to loop over all
+    // neighbors and find the number
+    // the hard way
+    {
+      for (unsigned int face_no=0; face_no<GeometryInfo<dim>::faces_per_cell; ++face_no)
+        if (neighbor_cell->face_index (face_no) == this_face_index)
+          return face_no;
+
+      // running over all neighbors
+      // faces we did not find the
+      // present face. Thereby the
+      // neighbor must be coarser
+      // than the present
+      // cell. Return an invalid
+      // unsigned int in this case.
+      return numbers::invalid_unsigned_int;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+unsigned int CellAccessor<dim,spacedim>::neighbor_of_neighbor (const unsigned int neighbor) const
+{
+  const unsigned int n2=neighbor_of_neighbor_internal(neighbor);
+  Assert (n2!=numbers::invalid_unsigned_int,
+          TriaAccessorExceptions::ExcNeighborIsCoarser());
+
+  return n2;
+}
+
+
+
+template <int dim, int spacedim>
+bool
+CellAccessor<dim,spacedim>::neighbor_is_coarser (const unsigned int neighbor) const
+{
+  return neighbor_of_neighbor_internal(neighbor)==numbers::invalid_unsigned_int;
+}
+
+
+
+template <int dim, int spacedim>
+std::pair<unsigned int, unsigned int>
+CellAccessor<dim, spacedim>::neighbor_of_coarser_neighbor (const unsigned int neighbor) const
+{
+  AssertIndexRange (neighbor, GeometryInfo<dim>::faces_per_cell);
+  // make sure that the neighbor is
+  // on a coarser level
+  Assert (neighbor_is_coarser(neighbor),
+          TriaAccessorExceptions::ExcNeighborIsNotCoarser());
+
+  switch (dim)
+    {
+    case 2:
+    {
+      const int this_face_index=face_index(neighbor);
+      const TriaIterator<CellAccessor<2,spacedim> > neighbor_cell = this->neighbor(neighbor);
+
+      // usually, on regular patches of
+      // the grid, this cell is just on
+      // the opposite side of the
+      // neighbor that the neighbor is of
+      // this cell. for example in 2d, if
+      // we want to know the
+      // neighbor_of_neighbor if
+      // neighbor==1 (the right
+      // neighbor), then we will get 0
+      // (the left neighbor) in most
+      // cases. look up this relationship
+      // in the table provided by
+      // GeometryInfo and try it
+      const unsigned int face_no_guess
+        = GeometryInfo<2>::opposite_face[neighbor];
+
+      const TriaIterator<TriaAccessor<1, 2, spacedim> > face_guess
+        =neighbor_cell->face(face_no_guess);
+
+      if (face_guess->has_children())
+        for (unsigned int subface_no=0; subface_no<face_guess->n_children(); ++subface_no)
+          if (face_guess->child_index(subface_no)==this_face_index)
+            return std::make_pair (face_no_guess, subface_no);
+
+      // if the guess was false, then
+      // we need to loop over all faces
+      // and subfaces and find the
+      // number the hard way
+      for (unsigned int face_no=0; face_no<GeometryInfo<2>::faces_per_cell; ++face_no)
+        {
+          if (face_no!=face_no_guess)
+            {
+              const TriaIterator<TriaAccessor<1, 2, spacedim> > face
+                =neighbor_cell->face(face_no);
+              if (face->has_children())
+                for (unsigned int subface_no=0; subface_no<face->n_children(); ++subface_no)
+                  if (face->child_index(subface_no)==this_face_index)
+                    return std::make_pair (face_no, subface_no);
+            }
+        }
+
+      // we should never get here,
+      // since then we did not find
+      // our way back...
+      Assert (false, ExcInternalError());
+      return std::make_pair (numbers::invalid_unsigned_int,
+                             numbers::invalid_unsigned_int);
+    }
+
+    case 3:
+    {
+      const int this_face_index=face_index(neighbor);
+      const TriaIterator<CellAccessor<3, spacedim> >
+      neighbor_cell = this->neighbor(neighbor);
+
+      // usually, on regular patches of the grid, this cell is just on the
+      // opposite side of the neighbor that the neighbor is of this cell.
+      // for example in 2d, if we want to know the neighbor_of_neighbor if
+      // neighbor==1 (the right neighbor), then we will get 0 (the left
+      // neighbor) in most cases. look up this relationship in the table
+      // provided by GeometryInfo and try it
+      const unsigned int face_no_guess
+        = GeometryInfo<3>::opposite_face[neighbor];
+
+      const TriaIterator<TriaAccessor<3-1, 3, spacedim> > face_guess
+        =neighbor_cell->face(face_no_guess);
+
+      if (face_guess->has_children())
+        for (unsigned int subface_no=0; subface_no<face_guess->n_children(); ++subface_no)
+          {
+            if (face_guess->child_index(subface_no)==this_face_index)
+              // call a helper function, that translates the current
+              // subface number to a subface number for the current
+              // FaceRefineCase
+              return std::make_pair (face_no_guess, translate_subface_no(face_guess, subface_no));
+
+            if (face_guess->child(subface_no)->has_children())
+              for (unsigned int subsub_no=0; subsub_no<face_guess->child(subface_no)->n_children(); ++subsub_no)
+                if (face_guess->child(subface_no)->child_index(subsub_no)==this_face_index)
+                  // call a helper function, that translates the current
+                  // subface number and subsubface number to a subface
+                  // number for the current FaceRefineCase
+                  return std::make_pair (face_no_guess, translate_subface_no(face_guess, subface_no, subsub_no));
+          }
+
+      // if the guess was false, then we need to loop over all faces and
+      // subfaces and find the number the hard way
+      for (unsigned int face_no=0; face_no<GeometryInfo<3>::faces_per_cell; ++face_no)
+        {
+          if (face_no==face_no_guess)
+            continue;
+
+          const TriaIterator<TriaAccessor<3-1, 3, spacedim> > face
+            =neighbor_cell->face(face_no);
+
+          if (!face->has_children())
+            continue;
+
+          for (unsigned int subface_no=0; subface_no<face->n_children(); ++subface_no)
+            {
+              if (face->child_index(subface_no)==this_face_index)
+                // call a helper function, that translates the current
+                // subface number to a subface number for the current
+                // FaceRefineCase
+                return std::make_pair (face_no, translate_subface_no(face, subface_no));
+
+              if (face->child(subface_no)->has_children())
+                for (unsigned int subsub_no=0; subsub_no<face->child(subface_no)->n_children(); ++subsub_no)
+                  if (face->child(subface_no)->child_index(subsub_no)==this_face_index)
+                    // call a helper function, that translates the current
+                    // subface number and subsubface number to a subface
+                    // number for the current FaceRefineCase
+                    return std::make_pair (face_no, translate_subface_no(face, subface_no, subsub_no));
+            }
+        }
+
+      // we should never get here, since then we did not find our way
+      // back...
+      Assert (false, ExcInternalError());
+      return std::make_pair (numbers::invalid_unsigned_int,
+                             numbers::invalid_unsigned_int);
+    }
+
+    default:
+    {
+      Assert(false, ExcImpossibleInDim(1));
+      return std::make_pair (numbers::invalid_unsigned_int,
+                             numbers::invalid_unsigned_int);
+    }
+    }
+}
+
+
+
+template <int dim, int spacedim>
+bool CellAccessor<dim, spacedim>::at_boundary (const unsigned int i) const
+{
+  Assert (this->used(), TriaAccessorExceptions::ExcCellNotUsed());
+  Assert (i<GeometryInfo<dim>::faces_per_cell,
+          ExcIndexRange (i,0,GeometryInfo<dim>::faces_per_cell));
+
+  return (neighbor_index(i) == -1);
+}
+
+
+
+template <int dim, int spacedim>
+bool CellAccessor<dim, spacedim>::has_boundary_lines () const
+{
+  if (dim == 1)
+    return at_boundary ();
+  else
+    {
+      for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+        if (this->line(l)->at_boundary())
+          return true;
+
+      return false;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+TriaIterator<CellAccessor<dim,spacedim> >
+CellAccessor<dim, spacedim>::
+neighbor_child_on_subface (const unsigned int face,
+                           const unsigned int subface) const
+{
+  Assert (!this->has_children(),
+          ExcMessage ("The present cell must not have children!"));
+  Assert (!this->at_boundary(face),
+          ExcMessage ("The present cell must have a valid neighbor!"));
+  Assert (this->neighbor(face)->has_children() == true,
+          ExcMessage ("The neighbor must have children!"));
+
+  switch (dim)
+    {
+    case 2:
+    {
+      const unsigned int neighbor_neighbor
+        = this->neighbor_of_neighbor (face);
+      const unsigned int neighbor_child_index
+        = GeometryInfo<dim>::child_cell_on_face(
+            this->neighbor(face)->refinement_case(),neighbor_neighbor,subface);
+
+      TriaIterator<CellAccessor<dim,spacedim> > sub_neighbor
+        = this->neighbor(face)->child(neighbor_child_index);
+      // the neighbors child can have children,
+      // which are not further refined along the
+      // face under consideration. as we are
+      // normally interested in one of this
+      // child's child, search for the right one.
+      while (sub_neighbor->has_children())
+        {
+          Assert ((GeometryInfo<dim>::face_refinement_case(sub_neighbor->refinement_case(),
+                                                           neighbor_neighbor) ==
+                   RefinementCase<dim>::no_refinement),
+                  ExcInternalError());
+          sub_neighbor = sub_neighbor->child(GeometryInfo<dim>::child_cell_on_face(
+                                               sub_neighbor->refinement_case(),neighbor_neighbor,0));
+
+        }
+
+      return sub_neighbor;
+    }
+
+
+    case 3:
+    {
+      // this function returns the neighbor's
+      // child on a given face and
+      // subface.
+
+      // we have to consider one other aspect here:
+      // The face might be refined
+      // anisotropically. In this case, the subface
+      // number refers to the following, where we
+      // look at the face from the current cell,
+      // thus the subfaces are in standard
+      // orientation concerning the cell
+      //
+      // for isotropic refinement
+      //
+      // *---*---*
+      // | 2 | 3 |
+      // *---*---*
+      // | 0 | 1 |
+      // *---*---*
+      //
+      // for 2*anisotropic refinement
+      // (first cut_y, then cut_x)
+      //
+      // *---*---*
+      // | 2 | 3 |
+      // *---*---*
+      // | 0 | 1 |
+      // *---*---*
+      //
+      // for 2*anisotropic refinement
+      // (first cut_x, then cut_y)
+      //
+      // *---*---*
+      // | 1 | 3 |
+      // *---*---*
+      // | 0 | 2 |
+      // *---*---*
+      //
+      // for purely anisotropic refinement:
+      //
+      // *---*---*      *-------*
+      // |   |   |        |   1   |
+      // | 0 | 1 |  or  *-------*
+      // |   |   |        |   0   |
+      // *---*---*        *-------*
+      //
+      // for "mixed" refinement:
+      //
+      // *---*---*      *---*---*      *---*---*      *-------*
+      // |   | 2 |      | 1 |   |      | 1 | 2 |      |   2   |
+      // | 0 *---*  or  *---* 2 |  or  *---*---*  or  *---*---*
+      // |   | 1 |      | 0 |   |      |   0   |      | 0 | 1 |
+      // *---*---*      *---*---*      *-------*      *---*---*
+
+      const typename Triangulation<3,spacedim>::face_iterator
+      mother_face = this->face(face);
+      const unsigned int total_children=mother_face->number_of_children();
+      Assert (subface<total_children,ExcIndexRange(subface,0,total_children));
+      Assert (total_children<=GeometryInfo<3>::max_children_per_face, ExcInternalError());
+
+      unsigned int neighbor_neighbor;
+      TriaIterator<CellAccessor<3,spacedim> > neighbor_child;
+      const TriaIterator<CellAccessor<3,spacedim> > neighbor
+        = this->neighbor(face);
+
+
+      const RefinementCase<2> mother_face_ref_case
+        = mother_face->refinement_case();
+      if (mother_face_ref_case==RefinementCase<2>::cut_xy) // total_children==4
+        {
+          // this case is quite easy. we are sure,
+          // that the neighbor is not coarser.
+
+          // get the neighbor's number for the given
+          // face and the neighbor
+          neighbor_neighbor
+            = this->neighbor_of_neighbor (face);
+
+          // now use the info provided by GeometryInfo
+          // to extract the neighbors child number
+          const unsigned int neighbor_child_index
+            = GeometryInfo<3>::child_cell_on_face(neighbor->refinement_case(),
+                                                  neighbor_neighbor, subface,
+                                                  neighbor->face_orientation(neighbor_neighbor),
+                                                  neighbor->face_flip(neighbor_neighbor),
+                                                  neighbor->face_rotation(neighbor_neighbor));
+          neighbor_child = neighbor->child(neighbor_child_index);
+
+          // make sure that the neighbor child cell we
+          // have found shares the desired subface.
+          Assert((this->face(face)->child(subface) ==
+                  neighbor_child->face(neighbor_neighbor)),
+                 ExcInternalError());
+        }
+      else //-> the face is refined anisotropically
+        {
+          // first of all, we have to find the
+          // neighbor at one of the anisotropic
+          // children of the
+          // mother_face. determine, which of
+          // these we need.
+          unsigned int first_child_to_find;
+          unsigned int neighbor_child_index;
+          if (total_children==2)
+            first_child_to_find=subface;
+          else
+            {
+              first_child_to_find=subface/2;
+              if (total_children==3 &&
+                  subface==1 &&
+                  !mother_face->child(0)->has_children())
+                first_child_to_find=1;
+            }
+          if (neighbor_is_coarser(face))
+            {
+              std::pair<unsigned int, unsigned int> indices=neighbor_of_coarser_neighbor(face);
+              neighbor_neighbor=indices.first;
+
+
+              // we have to translate our
+              // subface_index according to the
+              // RefineCase and subface index of
+              // the coarser face (our face is an
+              // anisotropic child of the coarser
+              // face), 'a' denotes our
+              // subface_index 0 and 'b' denotes
+              // our subface_index 1, whereas 0...3
+              // denote isotropic subfaces of the
+              // coarser face
+              //
+              // cut_x and coarser_subface_index=0
+              //
+              // *---*---*
+              // |b=2|   |
+              // |   |   |
+              // |a=0|   |
+              // *---*---*
+              //
+              // cut_x and coarser_subface_index=1
+              //
+              // *---*---*
+              // |   |b=3|
+              // |   |   |
+              // |   |a=1|
+              // *---*---*
+              //
+              // cut_y and coarser_subface_index=0
+              //
+              // *-------*
+              // |       |
+              // *-------*
+              // |a=0 b=1|
+              // *-------*
+              //
+              // cut_y and coarser_subface_index=1
+              //
+              // *-------*
+              // |a=2 b=3|
+              // *-------*
+              // |       |
+              // *-------*
+              unsigned int iso_subface;
+              if (neighbor->face(neighbor_neighbor)->refinement_case()==RefinementCase<2>::cut_x)
+                iso_subface=2*first_child_to_find + indices.second;
+              else
+                {
+                  Assert(neighbor->face(neighbor_neighbor)->refinement_case()==RefinementCase<2>::cut_y,
+                         ExcInternalError());
+                  iso_subface=first_child_to_find + 2*indices.second;
+                }
+              neighbor_child_index
+                = GeometryInfo<3>::child_cell_on_face(neighbor->refinement_case(),
+                                                      neighbor_neighbor,
+                                                      iso_subface,
+                                                      neighbor->face_orientation(neighbor_neighbor),
+                                                      neighbor->face_flip(neighbor_neighbor),
+                                                      neighbor->face_rotation(neighbor_neighbor));
+            }
+          else //neighbor is not coarser
+            {
+              neighbor_neighbor=neighbor_of_neighbor(face);
+              neighbor_child_index
+                = GeometryInfo<3>::child_cell_on_face(neighbor->refinement_case(),
+                                                      neighbor_neighbor,
+                                                      first_child_to_find,
+                                                      neighbor->face_orientation(neighbor_neighbor),
+                                                      neighbor->face_flip(neighbor_neighbor),
+                                                      neighbor->face_rotation(neighbor_neighbor),
+                                                      mother_face_ref_case);
+            }
+
+          neighbor_child=neighbor->child(neighbor_child_index);
+          // it might be, that the neighbor_child
+          // has children, which are not refined
+          // along the given subface. go down that
+          // list and deliver the last of those.
+          while (neighbor_child->has_children() &&
+                 GeometryInfo<3>::face_refinement_case(neighbor_child->refinement_case(),
+                                                       neighbor_neighbor)
+                 == RefinementCase<2>::no_refinement)
+            neighbor_child =
+              neighbor_child->child(GeometryInfo<3>::
+                                    child_cell_on_face(neighbor_child->refinement_case(),
+                                                       neighbor_neighbor,
+                                                       0));
+
+          // if there are two total subfaces, we
+          // are finished. if there are four we
+          // have to get a child of our current
+          // neighbor_child. If there are three,
+          // we have to check which of the two
+          // possibilities applies.
+          if (total_children==3)
+            {
+              if (mother_face->child(0)->has_children())
+                {
+                  if (subface<2)
+                    neighbor_child =
+                      neighbor_child->child(GeometryInfo<3>::
+                                            child_cell_on_face(neighbor_child->refinement_case(),
+                                                               neighbor_neighbor,subface,
+                                                               neighbor_child->face_orientation(neighbor_neighbor),
+                                                               neighbor_child->face_flip(neighbor_neighbor),
+                                                               neighbor_child->face_rotation(neighbor_neighbor),
+                                                               mother_face->child(0)->refinement_case()));
+                }
+              else
+                {
+                  Assert(mother_face->child(1)->has_children(), ExcInternalError());
+                  if (subface>0)
+                    neighbor_child =
+                      neighbor_child->child(GeometryInfo<3>::
+                                            child_cell_on_face(neighbor_child->refinement_case(),
+                                                               neighbor_neighbor,subface-1,
+                                                               neighbor_child->face_orientation(neighbor_neighbor),
+                                                               neighbor_child->face_flip(neighbor_neighbor),
+                                                               neighbor_child->face_rotation(neighbor_neighbor),
+                                                               mother_face->child(1)->refinement_case()));
+                }
+            }
+          else if (total_children==4)
+            {
+              neighbor_child =
+                neighbor_child->child(GeometryInfo<3>::
+                                      child_cell_on_face(neighbor_child->refinement_case(),
+                                                         neighbor_neighbor,subface%2,
+                                                         neighbor_child->face_orientation(neighbor_neighbor),
+                                                         neighbor_child->face_flip(neighbor_neighbor),
+                                                         neighbor_child->face_rotation(neighbor_neighbor),
+                                                         mother_face->child(subface/2)->refinement_case()));
+            }
+        }
+
+      // it might be, that the neighbor_child has
+      // children, which are not refined along the
+      // given subface. go down that list and
+      // deliver the last of those.
+      while (neighbor_child->has_children())
+        neighbor_child
+          = neighbor_child->child(GeometryInfo<3>::
+                                  child_cell_on_face(neighbor_child->refinement_case(),
+                                                     neighbor_neighbor,
+                                                     0));
+
+#ifdef DEBUG
+      // check, whether the face neighbor_child
+      // matches the requested subface
+      typename Triangulation<3,spacedim>::face_iterator requested;
+      switch (this->subface_case(face))
+        {
+        case internal::SubfaceCase<3>::case_x:
+        case internal::SubfaceCase<3>::case_y:
+        case internal::SubfaceCase<3>::case_xy:
+          requested = mother_face->child(subface);
+          break;
+        case internal::SubfaceCase<3>::case_x1y2y:
+        case internal::SubfaceCase<3>::case_y1x2x:
+          requested = mother_face->child(subface/2)->child(subface%2);
+          break;
+
+        case internal::SubfaceCase<3>::case_x1y:
+        case internal::SubfaceCase<3>::case_y1x:
+          switch (subface)
+            {
+            case 0:
+            case 1:
+              requested = mother_face->child(0)->child(subface);
+              break;
+            case 2:
+              requested = mother_face->child(1);
+              break;
+            default:
+              Assert(false, ExcInternalError());
+            }
+          break;
+        case internal::SubfaceCase<3>::case_x2y:
+        case internal::SubfaceCase<3>::case_y2x:
+          switch (subface)
+            {
+            case 0:
+              requested=mother_face->child(0);
+              break;
+            case 1:
+            case 2:
+              requested=mother_face->child(1)->child(subface-1);
+              break;
+            default:
+              Assert(false, ExcInternalError());
+            }
+          break;
+        default:
+          Assert(false, ExcInternalError());
+          break;
+        }
+      Assert (requested==neighbor_child->face(neighbor_neighbor),
+              ExcInternalError());
+#endif
+
+      return neighbor_child;
+
+    }
+
+    default:
+      // 1d or more than 3d
+      Assert (false, ExcNotImplemented());
+      return TriaIterator<CellAccessor<dim,spacedim> >();
+    }
+}
+
+
+
+// explicit instantiations
+#include "tria_accessor.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/tria_accessor.inst.in b/source/grid/tria_accessor.inst.in
new file mode 100644
index 0000000..e3ed445
--- /dev/null
+++ b/source/grid/tria_accessor.inst.in
@@ -0,0 +1,93 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class TriaAccessorBase<1,deal_II_dimension>;
+#if deal_II_dimension >= 2
+    template class TriaAccessorBase<2,deal_II_dimension>;
+#endif
+#if deal_II_dimension >= 3
+    template class TriaAccessorBase<3,deal_II_dimension>;
+#endif
+
+    template class TriaAccessor<1,deal_II_dimension,deal_II_dimension>;
+#if deal_II_dimension >= 2
+    template class TriaAccessor<2,deal_II_dimension,deal_II_dimension>;
+#endif
+#if deal_II_dimension >= 3
+    template class TriaAccessor<3,deal_II_dimension,deal_II_dimension>;
+#endif
+
+
+    template class CellAccessor<deal_II_dimension>;
+    template class TriaRawIterator<TriaAccessor<1, deal_II_dimension, deal_II_dimension> >;
+    template class TriaRawIterator<CellAccessor<deal_II_dimension> >;
+    template class TriaIterator<TriaAccessor<1, deal_II_dimension, deal_II_dimension> >;
+    template class TriaIterator<CellAccessor<deal_II_dimension> >;
+    template class TriaActiveIterator<TriaAccessor<1, deal_II_dimension, deal_II_dimension> >;
+    template class TriaActiveIterator<CellAccessor<deal_II_dimension> >;
+
+
+#if deal_II_dimension >= 2
+    template class TriaRawIterator<TriaAccessor<2, deal_II_dimension, deal_II_dimension> >;
+    template class TriaIterator<TriaAccessor<2, deal_II_dimension, deal_II_dimension> >;
+    template class TriaActiveIterator<TriaAccessor<2, deal_II_dimension, deal_II_dimension> >;
+#endif
+
+#if deal_II_dimension >= 3
+    template class TriaRawIterator<TriaAccessor<3, deal_II_dimension, deal_II_dimension> >;
+    template class TriaIterator<TriaAccessor<3, deal_II_dimension, deal_II_dimension> >;
+    template class TriaActiveIterator<TriaAccessor<3, deal_II_dimension, deal_II_dimension> >;
+    template class CellAccessor<1, 3>;
+#endif
+
+#if deal_II_dimension == 1
+    template class TriaAccessorBase<1,deal_II_dimension,2>;
+    template class TriaAccessorBase<1,deal_II_dimension,3>;
+
+    template class TriaAccessor<1,deal_II_dimension,2>;
+    template class TriaAccessor<1,deal_II_dimension,3>;
+
+
+#endif
+#if deal_II_dimension == 2
+    template class TriaAccessorBase<1,deal_II_dimension,3>;
+    template class TriaAccessorBase<2,deal_II_dimension,3>;
+
+    template class TriaAccessor<1,deal_II_dimension,3>;
+    template class TriaAccessor<2,deal_II_dimension,3>;
+#endif
+
+#if deal_II_dimension != 3
+    template class CellAccessor<deal_II_dimension, deal_II_dimension+1>;
+    template class TriaRawIterator<TriaAccessor<1, deal_II_dimension, deal_II_dimension+1> >;
+    template class TriaRawIterator<CellAccessor<deal_II_dimension, deal_II_dimension+1> >;
+    template class TriaIterator<TriaAccessor<1, deal_II_dimension, deal_II_dimension+1> >;
+    template class TriaIterator<CellAccessor<deal_II_dimension, deal_II_dimension+1> >;
+    template class TriaActiveIterator<TriaAccessor<1, deal_II_dimension, deal_II_dimension+1> >;
+    template class TriaActiveIterator<CellAccessor<deal_II_dimension, deal_II_dimension+1> >;
+
+
+#if deal_II_dimension == 2
+    template class TriaRawIterator<TriaAccessor<2, deal_II_dimension, deal_II_dimension+1> >;
+    template class TriaIterator<TriaAccessor<2, deal_II_dimension, deal_II_dimension+1> >;
+    template class TriaActiveIterator<TriaAccessor<2, deal_II_dimension, deal_II_dimension+1> >;
+#endif
+
+#endif
+  }
diff --git a/source/grid/tria_boundary.cc b/source/grid/tria_boundary.cc
new file mode 100644
index 0000000..b1731fc
--- /dev/null
+++ b/source/grid/tria_boundary.cc
@@ -0,0 +1,907 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/tensor.h>
+#include <deal.II/grid/tria_boundary.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/fe/fe_q.h>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+/* -------------------------- Boundary --------------------- */
+
+
+template <int dim, int spacedim>
+Boundary<dim, spacedim>::~Boundary ()
+{}
+
+
+template <int dim, int spacedim>
+void
+Boundary<dim, spacedim>::
+get_intermediate_points_on_line (const typename Triangulation<dim, spacedim>::line_iterator &,
+                                 std::vector<Point<spacedim> > &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+}
+
+
+
+template <int dim, int spacedim>
+void
+Boundary<dim, spacedim>::
+get_intermediate_points_on_quad (const typename Triangulation<dim, spacedim>::quad_iterator &,
+                                 std::vector<Point<spacedim> > &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+}
+
+
+template <int dim, int spacedim>
+void
+Boundary<dim,spacedim>::
+get_intermediate_points_on_face (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                                 std::vector<Point<spacedim> > &points) const
+{
+  Assert (dim>1, ExcImpossibleInDim(dim));
+
+  switch (dim)
+    {
+    case 2:
+      get_intermediate_points_on_line (face, points);
+      break;
+    case 3:
+      get_intermediate_points_on_quad (face, points);
+      break;
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+}
+
+
+template <>
+void
+Boundary<1,1>::
+get_intermediate_points_on_face (const Triangulation<1,1>::face_iterator &,
+                                 std::vector<Point<1> > &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+template <>
+void
+Boundary<1,2>::
+get_intermediate_points_on_face (const Triangulation<1,2>::face_iterator &,
+                                 std::vector<Point<2> > &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+template <>
+void
+Boundary<1,3>::
+get_intermediate_points_on_face (const Triangulation<1,3>::face_iterator &,
+                                 std::vector<Point<3> > &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+
+template <int dim, int spacedim>
+Tensor<1,spacedim>
+Boundary<dim, spacedim>::
+normal_vector (const typename Triangulation<dim, spacedim>::face_iterator &,
+               const Point<spacedim> &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+  return Tensor<1,spacedim>();
+}
+
+
+
+template <int dim, int spacedim>
+void
+Boundary<dim, spacedim>::
+get_normals_at_vertices (const typename Triangulation<dim, spacedim>::face_iterator &,
+                         FaceVertexNormals &) const
+{
+  Assert (false, ExcPureFunctionCalled());
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Boundary<dim, spacedim>::
+project_to_surface (const typename Triangulation<dim, spacedim>::line_iterator &,
+                    const Point<spacedim>                                &trial_point) const
+{
+  if (spacedim <= 1)
+    return trial_point;
+  else
+    {
+      Assert (false, ExcPureFunctionCalled());
+      return Point<spacedim>();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Boundary<dim, spacedim>::
+project_to_surface (const typename Triangulation<dim, spacedim>::quad_iterator &,
+                    const Point<spacedim>                                &trial_point) const
+{
+  if (spacedim <= 2)
+    return trial_point;
+  else
+    {
+      Assert (false, ExcPureFunctionCalled());
+      return Point<spacedim>();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+Boundary<dim, spacedim>::
+project_to_surface (const typename Triangulation<dim, spacedim>::hex_iterator &,
+                    const Point<spacedim>                                &trial_point) const
+{
+  if (spacedim <= 3)
+    return trial_point;
+  else
+    {
+      Assert (false, ExcPureFunctionCalled());
+      return Point<spacedim>();
+    }
+}
+
+
+
+template <int dim, int spacedim>
+const std::vector<Point<1> > &
+Boundary<dim,spacedim>::
+get_line_support_points (const unsigned int n_intermediate_points) const
+{
+  if (points.size() <= n_intermediate_points ||
+      points[n_intermediate_points].get() == 0)
+    {
+      Threads::Mutex::ScopedLock lock(mutex);
+      if (points.size() <= n_intermediate_points)
+        points.resize(n_intermediate_points+1);
+
+      // another thread might have created points in the meantime
+      if (points[n_intermediate_points].get() == 0)
+        {
+          std_cxx11::shared_ptr<QGaussLobatto<1> >
+          quadrature (new QGaussLobatto<1>(n_intermediate_points+2));
+          points[n_intermediate_points] = quadrature;
+        }
+    }
+  return points[n_intermediate_points]->get_points();
+}
+
+
+
+
+/* -------------------------- StraightBoundary --------------------- */
+
+
+template <int dim, int spacedim>
+StraightBoundary<dim, spacedim>::StraightBoundary ()
+{}
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+StraightBoundary<dim, spacedim>::
+get_new_point_on_line (const typename Triangulation<dim, spacedim>::line_iterator &line) const
+{
+  return (line->vertex(0) + line->vertex(1)) / 2;
+}
+
+
+namespace
+{
+  // compute the new midpoint of a quad --
+  // either of a 2d cell on a manifold in 3d
+  // or of a face of a 3d triangulation in 3d
+  template <int dim>
+  Point<3>
+  compute_new_point_on_quad (const typename Triangulation<dim, 3>::quad_iterator &quad)
+  {
+    // generate a new point in the middle of
+    // the face based on the points on the
+    // edges and the vertices.
+    //
+    // there is a pathological situation when
+    // this face is on a straight boundary, but
+    // one of its edges and the face behind it
+    // are not; if that face is refined first,
+    // the new point in the middle of that edge
+    // may not be at the same position as
+    // quad->line(.)->center() would have been,
+    // but would have been moved to the
+    // non-straight boundary. We cater to that
+    // situation by using existing edge
+    // midpoints if available, or center() if
+    // not
+    //
+    // note that this situation can not happen
+    // during mesh refinement, as there the
+    // edges are refined first and only then
+    // the face. thus, the check whether a line
+    // has children does not lead to the
+    // situation where the new face midpoints
+    // have different positions depending on
+    // which of the two cells is refined first.
+    //
+    // the situation where the edges aren't
+    // refined happens when a higher order
+    // MappingQ requests the midpoint of a
+    // face, though, and it is for these cases
+    // that we need to have the check available
+    //
+    // note that the factor of 1/8 for each
+    // of the 8 surrounding points isn't
+    // chosen arbitrarily. rather, we may ask
+    // where the harmonic map would place the
+    // point (0,0) if we map the square
+    // [-1,1]^2 onto the domain that is
+    // described using the 4 vertices and 4
+    // edge point points of this quad. we can
+    // then discretize the harmonic map using
+    // four cells and Q1 elements on each of
+    // the quadrants of the square [-1,1]^2
+    // and see where the midpoint would land
+    // (this is the procedure we choose, for
+    // example, in
+    // GridGenerator::laplace_solve) and it
+    // turns out that it will land at the
+    // mean of the 8 surrounding
+    // points. whether a discretization of
+    // the harmonic map with only 4 cells is
+    // adequate is a different question
+    // altogether, of course.
+    return (quad->vertex(0) + quad->vertex(1) +
+            quad->vertex(2) + quad->vertex(3) +
+            (quad->line(0)->has_children() ?
+             quad->line(0)->child(0)->vertex(1) :
+             quad->line(0)->center()) +
+            (quad->line(1)->has_children() ?
+             quad->line(1)->child(0)->vertex(1) :
+             quad->line(1)->center()) +
+            (quad->line(2)->has_children() ?
+             quad->line(2)->child(0)->vertex(1) :
+             quad->line(2)->center()) +
+            (quad->line(3)->has_children() ?
+             quad->line(3)->child(0)->vertex(1) :
+             quad->line(3)->center())               ) / 8;
+  }
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+StraightBoundary<dim, spacedim>::
+get_new_point_on_quad (const typename Triangulation<dim, spacedim>::quad_iterator &quad) const
+{
+  return FlatManifold<dim,spacedim>::get_new_point_on_quad(quad);
+}
+
+
+template <>
+Point<3>
+StraightBoundary<2,3>::
+get_new_point_on_quad (const Triangulation<2,3>::quad_iterator &quad) const
+{
+  return compute_new_point_on_quad<2> (quad);
+}
+
+
+
+template <>
+Point<3>
+StraightBoundary<3>::
+get_new_point_on_quad (const Triangulation<3>::quad_iterator &quad) const
+{
+  return compute_new_point_on_quad<3> (quad);
+}
+
+
+
+template <int dim, int spacedim>
+void
+StraightBoundary<dim, spacedim>::
+get_intermediate_points_on_line (const typename Triangulation<dim, spacedim>::line_iterator &line,
+                                 std::vector<Point<spacedim> > &points) const
+{
+  const unsigned int n=points.size();
+  Assert(n>0, ExcInternalError());
+
+  // Use interior points of QGaussLobatto quadrature formula support points
+  // for consistency with MappingQ
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(n);
+
+  const Point<spacedim> vertices[2] = { line->vertex(0),
+                                        line->vertex(1)
+                                      };
+
+  for (unsigned int i=0; i<n; ++i)
+    {
+      const double x = line_points[1+i][0];
+      points[i] = (1-x)*vertices[0] + x*vertices[1];
+    }
+}
+
+
+
+
+template <int dim, int spacedim>
+void
+StraightBoundary<dim, spacedim>::
+get_intermediate_points_on_quad (const typename Triangulation<dim, spacedim>::quad_iterator &,
+                                 std::vector<Point<spacedim> > &) const
+{
+  Assert(false, ExcImpossibleInDim(dim));
+}
+
+
+
+template <>
+void
+StraightBoundary<3>::
+get_intermediate_points_on_quad (const Triangulation<3>::quad_iterator &quad,
+                                 std::vector<Point<3> > &points) const
+{
+  const unsigned int spacedim = 3;
+
+  const unsigned int n=points.size(),
+                     m=static_cast<unsigned int>(std::sqrt(static_cast<double>(n)));
+  // is n a square number
+  Assert(m*m==n, ExcInternalError());
+
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(m);
+
+  const Point<spacedim> vertices[4] = { quad->vertex(0),
+                                        quad->vertex(1),
+                                        quad->vertex(2),
+                                        quad->vertex(3)
+                                      };
+
+  for (unsigned int i=0; i<m; ++i)
+    {
+      const double y=line_points[1+i][0];
+      for (unsigned int j=0; j<m; ++j)
+        {
+          const double x=line_points[1+j][0];
+          points[i*m+j]=((1-x) * vertices[0] +
+                         x     * vertices[1]) * (1-y) +
+                        ((1-x) * vertices[2] +
+                         x     * vertices[3]) * y;
+        }
+    }
+}
+
+
+
+template <>
+void
+StraightBoundary<2,3>::
+get_intermediate_points_on_quad (const Triangulation<2,3>::quad_iterator &quad,
+                                 std::vector<Point<3> > &points) const
+{
+  const unsigned int spacedim = 3;
+
+  const unsigned int n=points.size(),
+                     m=static_cast<unsigned int>(std::sqrt(static_cast<double>(n)));
+  // is n a square number
+  Assert(m*m==n, ExcInternalError());
+
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(m);
+
+  const Point<spacedim> vertices[4] = { quad->vertex(0),
+                                        quad->vertex(1),
+                                        quad->vertex(2),
+                                        quad->vertex(3)
+                                      };
+
+  for (unsigned int i=0; i<m; ++i)
+    {
+      const double y=line_points[1+i][0];
+      for (unsigned int j=0; j<m; ++j)
+        {
+          const double x=line_points[1+j][0];
+          points[i*m+j]=((1-x) * vertices[0] +
+                         x     * vertices[1]) * (1-y) +
+                        ((1-x) * vertices[2] +
+                         x     * vertices[3]) * y;
+        }
+    }
+}
+
+
+
+template <>
+Tensor<1,1>
+StraightBoundary<1,1>::
+normal_vector (const Triangulation<1,1>::face_iterator &,
+               const Point<1> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return Tensor<1,1>();
+}
+
+
+template <>
+Tensor<1,2>
+StraightBoundary<1,2>::
+normal_vector (const Triangulation<1,2>::face_iterator &,
+               const Point<2> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return Tensor<1,2>();
+}
+
+
+template <>
+Tensor<1,3>
+StraightBoundary<1,3>::
+normal_vector (const Triangulation<1,3>::face_iterator &,
+               const Point<3> &) const
+{
+  Assert (false, ExcNotImplemented());
+  return Tensor<1,3>();
+}
+
+
+namespace internal
+{
+  namespace
+  {
+    /**
+     * Compute the normalized cross product of a set of dim-1 basis
+     * vectors.
+     */
+    Tensor<1,2>
+    normalized_alternating_product (const Tensor<1,2> (&basis_vectors)[1])
+    {
+      Tensor<1,2> tmp = cross_product_2d (basis_vectors[0]);
+      return tmp/tmp.norm();
+    }
+
+
+
+    Tensor<1,3>
+    normalized_alternating_product (const Tensor<1,3> ( &)[1])
+    {
+      // we get here from StraightBoundary<2,3>::normal_vector, but
+      // the implementation below is bogus for this case anyway
+      // (see the assert at the beginning of that function).
+      Assert (false, ExcNotImplemented());
+      return Tensor<1,3>();
+    }
+
+
+
+    Tensor<1,3>
+    normalized_alternating_product (const Tensor<1,3> (&basis_vectors)[2])
+    {
+      Tensor<1,3> tmp = cross_product_3d (basis_vectors[0], basis_vectors[1]);
+      return tmp/tmp.norm();
+    }
+
+  }
+}
+
+
+template <int dim, int spacedim>
+Tensor<1,spacedim>
+StraightBoundary<dim,spacedim>::
+normal_vector (const typename Triangulation<dim,spacedim>::face_iterator &face,
+               const Point<spacedim> &p) const
+{
+  // I don't think the implementation below will work when dim!=spacedim;
+  // in fact, I believe that we don't even have enough information here,
+  // because we would need to know not only about the tangent vectors
+  // of the face, but also of the cell, to compute the normal vector.
+  // Someone will have to think about this some more.
+  Assert (dim == spacedim, ExcNotImplemented());
+
+  // in order to find out what the normal vector is, we first need to
+  // find the reference coordinates of the point p on the given face,
+  // or at least the reference coordinates of the closest point on the
+  // face
+  //
+  // in other words, we need to find a point xi so that f(xi)=||F(xi)-p||^2->min
+  // where F(xi) is the mapping. this algorithm is implemented in
+  // MappingQ1<dim,spacedim>::transform_real_to_unit_cell but only for cells,
+  // while we need it for faces here. it's also implemented in somewhat
+  // more generality there using the machinery of the MappingQ1 class
+  // while we really only need it for a specific case here
+  //
+  // in any case, the iteration we use here is a Gauss-Newton's iteration with
+  //   xi^{n+1} = xi^n - H(xi^n)^{-1} J(xi^n)
+  // where
+  //   J(xi) = (grad F(xi))^T (F(xi)-p)
+  // and
+  //   H(xi) = [grad F(xi)]^T [grad F(xi)]
+  // In all this,
+  //   F(xi) = sum_v vertex[v] phi_v(xi)
+  // We get the shape functions phi_v from an object of type FE_Q<dim-1>(1)
+
+  // we start with the point xi=1/2, xi=(1/2,1/2), ...
+  const unsigned int facedim = dim-1;
+
+  Point<facedim> xi;
+  for (unsigned int i=0; i<facedim; ++i)
+    xi[i] = 1./2;
+
+  FE_Q<facedim> linear_fe(1);
+
+  const double eps = 1e-12;
+  Tensor<1,spacedim> grad_F[facedim];
+  unsigned int iteration = 0;
+  while (true)
+    {
+      Point<spacedim> F;
+      for (unsigned int v=0; v<GeometryInfo<facedim>::vertices_per_cell; ++v)
+        F += face->vertex(v) * linear_fe.shape_value(v, xi);
+
+      for (unsigned int i=0; i<facedim; ++i)
+        {
+          grad_F[i] = 0;
+          for (unsigned int v=0; v<GeometryInfo<facedim>::vertices_per_cell; ++v)
+            grad_F[i] += face->vertex(v) * linear_fe.shape_grad(v, xi)[i];
+        }
+
+      Tensor<1,facedim> J;
+      for (unsigned int i=0; i<facedim; ++i)
+        for (unsigned int j=0; j<spacedim; ++j)
+          J[i] += grad_F[i][j] * (F-p)[j];
+
+      Tensor<2,facedim> H;
+      for (unsigned int i=0; i<facedim; ++i)
+        for (unsigned int j=0; j<facedim; ++j)
+          for (unsigned int k=0; k<spacedim; ++k)
+            H[i][j] += grad_F[i][k] * grad_F[j][k];
+
+      const Tensor<1,facedim> delta_xi = -invert(H) * J;
+      xi += delta_xi;
+      ++iteration;
+
+      Assert (iteration<10,
+              ExcMessage("The Newton iteration to find the reference point "
+                         "did not converge in 10 iterations. Do you have a "
+                         "deformed cell? (See the glossary for a definition "
+                         "of what a deformed cell is. You may want to output "
+                         "the vertices of your cell."));
+
+      if (delta_xi.norm() < eps)
+        break;
+    }
+
+  // so now we have the reference coordinates xi of the point p.
+  // we then have to compute the normal vector, which we can do
+  // by taking the (normalize) alternating product of all the tangent
+  // vectors given by grad_F
+  return internal::normalized_alternating_product(grad_F);
+}
+
+
+
+template <>
+void
+StraightBoundary<1>::
+get_normals_at_vertices (const Triangulation<1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+template <>
+void
+StraightBoundary<1,2>::
+get_normals_at_vertices (const Triangulation<1,2>::face_iterator &,
+                         Boundary<1,2>::FaceVertexNormals &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <>
+void
+StraightBoundary<1,3>::
+get_normals_at_vertices (const Triangulation<1,3>::face_iterator &,
+                         Boundary<1,3>::FaceVertexNormals &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <>
+void
+StraightBoundary<2>::
+get_normals_at_vertices (const Triangulation<2>::face_iterator &face,
+                         Boundary<2,2>::FaceVertexNormals &face_vertex_normals) const
+{
+  const Tensor<1,2> tangent = face->vertex(1) - face->vertex(0);
+  for (unsigned int vertex=0; vertex<GeometryInfo<2>::vertices_per_face; ++vertex)
+    // compute normals from tangent
+    face_vertex_normals[vertex] = Point<2>(tangent[1],
+                                           -tangent[0]);
+}
+
+template <>
+void
+StraightBoundary<2,3>::
+get_normals_at_vertices (const Triangulation<2,3>::face_iterator &face,
+                         Boundary<2,3>::FaceVertexNormals &face_vertex_normals) const
+{
+  const Tensor<1,3> tangent = face->vertex(1) - face->vertex(0);
+  for (unsigned int vertex=0; vertex<GeometryInfo<2>::vertices_per_face; ++vertex)
+    // compute normals from tangent
+    face_vertex_normals[vertex] = Point<3>(tangent[1],
+                                           -tangent[0],0);
+  Assert(false, ExcNotImplemented());
+}
+
+
+
+
+template <>
+void
+StraightBoundary<3>::
+get_normals_at_vertices (const Triangulation<3>::face_iterator &face,
+                         Boundary<3,3>::FaceVertexNormals &face_vertex_normals) const
+{
+  const unsigned int vertices_per_face = GeometryInfo<3>::vertices_per_face;
+
+  static const unsigned int neighboring_vertices[4][2]=
+  { {1,2},{3,0},{0,3},{2,1}};
+  for (unsigned int vertex=0; vertex<vertices_per_face; ++vertex)
+    {
+      // first define the two tangent vectors at the vertex by using the
+      // two lines radiating away from this vertex
+      const Tensor<1,3> tangents[2]
+        = { face->vertex(neighboring_vertices[vertex][0])
+            - face->vertex(vertex),
+            face->vertex(neighboring_vertices[vertex][1])
+            - face->vertex(vertex)
+          };
+
+      // then compute the normal by taking the cross product. since the
+      // normal is not required to be normalized, no problem here
+      face_vertex_normals[vertex] = cross_product_3d(tangents[0], tangents[1]);
+    };
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+StraightBoundary<dim, spacedim>::
+project_to_surface (const typename Triangulation<dim, spacedim>::line_iterator &line,
+                    const Point<spacedim>                                &trial_point) const
+{
+  if (spacedim <= 1)
+    return trial_point;
+  else
+    {
+      // find the point that lies on
+      // the line p1--p2. the
+      // formulas pan out to
+      // something rather simple
+      // because the mapping to the
+      // line is linear
+      const Point<spacedim> p1 = line->vertex(0),
+                            p2 = line->vertex(1);
+      const double s = (trial_point-p1)*(p2-p1) / ((p2-p1)*(p2-p1));
+      return p1 + s*(p2-p1);
+    }
+}
+
+
+
+namespace internal
+{
+  template <typename Iterator, int spacedim, int dim>
+  Point<spacedim>
+  compute_projection (const Iterator        &object,
+                      const Point<spacedim> &y,
+                      internal::int2type<dim>)
+  {
+    // let's look at this for
+    // simplicity for a quad (dim==2)
+    // in a space with spacedim>2:
+
+    // all points on the surface are given by
+    //   x(\xi) = sum_i v_i phi_x(\xi)
+    // where v_i are the vertices of the quad,
+    // and \xi=(\xi_1,\xi_2) are the reference
+    // coordinates of the quad. so what we are
+    // trying to do is find a point x on
+    // the surface that is closest to the point
+    // y. there are different ways
+    // to solve this problem, but in the end
+    // it's a nonlinear problem and we have to
+    // find reference coordinates \xi so that
+    //   J(\xi) = 1/2 || x(\xi)-y ||^2
+    // is minimal. x(\xi) is a function that
+    // is dim-linear in \xi, so J(\xi) is
+    // a polynomial of degree 2*dim that
+    // we'd like to minimize. unless dim==1,
+    // we'll have to use a Newton
+    // method to find the
+    // answer. This leads to the
+    // following formulation of
+    // Newton steps:
+    //
+    // Given \xi_k, find \delta\xi_k so that
+    //   H_k \delta\xi_k = - F_k
+    // where H_k is an approximation to the
+    // second derivatives of J at \xi_k, and
+    // F_k is the first derivative of J.
+    // We'll iterate this a number of times
+    // until the right hand side is small
+    // enough. As a stopping criterion, we
+    // terminate if ||\delta\xi||<eps.
+    //
+    // As for the Hessian, the best choice
+    // would be
+    //   H_k = J''(\xi_k)
+    // but we'll opt for the simpler
+    // Gauss-Newton form
+    //   H_k = A^T A
+    // i.e.
+    //   (H_k)_{nm} = \sum_{i,j} v_i*v_j *
+    //                   \partial_n phi_i *
+    //                   \partial_m phi_j
+    // we start at xi=(0.5,0.5).
+    Point<dim> xi;
+    for (unsigned int d=0; d<dim; ++d)
+      xi[d] = 0.5;
+
+    Point<spacedim> x_k;
+    for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+      x_k += object->vertex(i) *
+             GeometryInfo<dim>::d_linear_shape_function (xi, i);
+
+    do
+      {
+        Tensor<1,dim> F_k;
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          F_k += (x_k-y)*object->vertex(i) *
+                 GeometryInfo<dim>::d_linear_shape_function_gradient (xi, i);
+
+        Tensor<2,dim> H_k;
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          for (unsigned int j=0; j<GeometryInfo<dim>::vertices_per_cell; ++j)
+            {
+              Tensor<2, dim> tmp = outer_product(
+                                     GeometryInfo<dim>::d_linear_shape_function_gradient(xi, i),
+                                     GeometryInfo<dim>::d_linear_shape_function_gradient(xi, j));
+              H_k += (object->vertex(i) * object->vertex(j)) * tmp;
+            }
+
+        const Tensor<1,dim> delta_xi = - invert(H_k) * F_k;
+        xi += delta_xi;
+
+        x_k = Point<spacedim>();
+        for (unsigned int i=0; i<GeometryInfo<dim>::vertices_per_cell; ++i)
+          x_k += object->vertex(i) *
+                 GeometryInfo<dim>::d_linear_shape_function (xi, i);
+
+        if (delta_xi.norm() < 1e-5)
+          break;
+      }
+    while (true);
+
+    return x_k;
+  }
+
+
+  // specialization for a quad in 1d
+  template <typename Iterator>
+  Point<1>
+  compute_projection (const Iterator &,
+                      const Point<1> &y,
+                      /* it's a quad: */internal::int2type<2>)
+  {
+    return y;
+  }
+
+  // specialization for a quad in 2d
+  template <typename Iterator>
+  Point<2>
+  compute_projection (const Iterator &,
+                      const Point<2> &y,
+                      /* it's a quad: */internal::int2type<2>)
+  {
+    return y;
+  }
+}
+
+
+
+
+
+template <>
+Point<3>
+StraightBoundary<1,3>::
+project_to_surface (const Triangulation<1, 3>::quad_iterator &,
+                    const Point<3>  &y) const
+{
+  return y;
+}
+
+//TODO[SP]: This is just a horrible way out to make it compile in codim 2.
+template <int dim, int spacedim>
+Point<spacedim>
+StraightBoundary<dim, spacedim>::
+project_to_surface (const typename Triangulation<dim, spacedim>::quad_iterator &quad,
+                    const Point<spacedim>  &y) const
+{
+  if (spacedim <= 2)
+    return y;
+  else
+    return internal::compute_projection (quad, y,
+                                         /* it's a quad */internal::int2type<2>());
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+StraightBoundary<dim, spacedim>::
+project_to_surface (const typename Triangulation<dim, spacedim>::hex_iterator &,
+                    const Point<spacedim>                                &trial_point) const
+{
+  if (spacedim <= 3)
+    return trial_point;
+  else
+    {
+      // we can presumably call the
+      // same function as above (it's
+      // written in a generic way)
+      // but someone needs to check
+      // whether that actually yields
+      // the correct result
+      Assert (false, ExcNotImplemented());
+      return Point<spacedim>();
+    }
+}
+
+
+
+// explicit instantiations
+#include "tria_boundary.inst"
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/tria_boundary.inst.in b/source/grid/tria_boundary.inst.in
new file mode 100644
index 0000000..1472e48
--- /dev/null
+++ b/source/grid/tria_boundary.inst.in
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension 	
+    template class Boundary<deal_II_dimension, deal_II_space_dimension>;
+    template class StraightBoundary<deal_II_dimension, deal_II_space_dimension>;
+#endif
+  }
+
+
+
diff --git a/source/grid/tria_boundary_lib.cc b/source/grid/tria_boundary_lib.cc
new file mode 100644
index 0000000..268c7c1
--- /dev/null
+++ b/source/grid/tria_boundary_lib.cc
@@ -0,0 +1,1493 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/base/tensor.h>
+#include <cmath>
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <int dim, int spacedim>
+CylinderBoundary<dim,spacedim>::CylinderBoundary (const double radius,
+                                                  const unsigned int axis)
+  :
+  radius(radius),
+  direction (get_axis_vector (axis)),
+  point_on_axis (Point<spacedim>())
+{}
+
+
+template <int dim, int spacedim>
+CylinderBoundary<dim,spacedim>::CylinderBoundary (const double           radius,
+                                                  const Point<spacedim> &direction,
+                                                  const Point<spacedim> &point_on_axis)
+  :
+  radius(radius),
+  direction (direction / direction.norm()),
+  point_on_axis (point_on_axis)
+{}
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+CylinderBoundary<dim,spacedim>::get_axis_vector (const unsigned int axis)
+{
+  Assert (axis < spacedim, ExcIndexRange (axis, 0, spacedim));
+
+  Point<spacedim> axis_vector;
+  axis_vector[axis] = 1;
+  return axis_vector;
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+CylinderBoundary<dim,spacedim>::
+get_new_point_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line) const
+{
+  // compute a proposed new point
+  const Point<spacedim> middle = StraightBoundary<dim,spacedim>::get_new_point_on_line (line);
+
+  // we then have to project this
+  // point out to the given radius
+  // from the axis. to this end, we
+  // have to take into account the
+  // offset point_on_axis and the
+  // direction of the axis
+  const Tensor<1,spacedim> vector_from_axis = (middle-point_on_axis) -
+                                              ((middle-point_on_axis) * direction) * direction;
+  // scale it to the desired length
+  // and put everything back
+  // together, unless we have a point
+  // on the axis
+  if (vector_from_axis.norm() <= 1e-10 * middle.norm())
+    return middle;
+  else
+    return Point<spacedim>(vector_from_axis / vector_from_axis.norm() * radius +
+                           ((middle-point_on_axis) * direction) * direction +
+                           point_on_axis);
+}
+
+
+
+template<>
+Point<3>
+CylinderBoundary<3>::
+get_new_point_on_quad (const Triangulation<3>::quad_iterator &quad) const
+{
+  const Point<3> middle = StraightBoundary<3,3>::get_new_point_on_quad (quad);
+
+  // same algorithm as above
+  const unsigned int spacedim = 3;
+
+  const Tensor<1,spacedim> vector_from_axis = (middle-point_on_axis) -
+                                              ((middle-point_on_axis) * direction) * direction;
+  if (vector_from_axis.norm() <= 1e-10 * middle.norm())
+    return middle;
+  else
+    return Point<3>(vector_from_axis / vector_from_axis.norm() * radius +
+                    ((middle-point_on_axis) * direction) * direction +
+                    point_on_axis);
+}
+
+template<>
+Point<3>
+CylinderBoundary<2,3>::
+get_new_point_on_quad (const Triangulation<2,3>::quad_iterator &quad) const
+{
+  const Point<3> middle = StraightBoundary<2,3>::get_new_point_on_quad (quad);
+
+  // same algorithm as above
+  const unsigned int spacedim = 3;
+  const Tensor<1,spacedim> vector_from_axis = (middle-point_on_axis) -
+                                              ((middle-point_on_axis) * direction) * direction;
+  if (vector_from_axis.norm() <= 1e-10 * middle.norm())
+    return middle;
+  else
+    return Point<3>(vector_from_axis / vector_from_axis.norm() * radius +
+                    ((middle-point_on_axis) * direction) * direction +
+                    point_on_axis);
+}
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+CylinderBoundary<dim,spacedim>::
+get_new_point_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim(dim));
+  return Point<spacedim>();
+}
+
+
+
+template <int dim, int spacedim>
+void
+CylinderBoundary<dim,spacedim>::get_intermediate_points_on_line (
+  const typename Triangulation<dim,spacedim>::line_iterator &line,
+  std::vector<Point<spacedim> > &points) const
+{
+  if (points.size()==1)
+    points[0]=get_new_point_on_line(line);
+  else
+    get_intermediate_points_between_points(line->vertex(0), line->vertex(1), points);
+}
+
+
+template <int dim, int spacedim>
+void
+CylinderBoundary<dim,spacedim>::get_intermediate_points_between_points (
+  const Point<spacedim> &v0,
+  const Point<spacedim> &v1,
+  std::vector<Point<spacedim> > &points) const
+{
+  const unsigned int n=points.size();
+  Assert(n>0, ExcInternalError());
+
+  // Do a simple linear interpolation followed by projection, using the same
+  // algorithm as above
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(n);
+
+  for (unsigned int i=0; i<n; ++i)
+    {
+      const double x = line_points[i+1][0];
+      const Point<spacedim> middle = (1-x)*v0 + x*v1;
+
+      const Tensor<1,spacedim> vector_from_axis = (middle-point_on_axis) -
+                                                  ((middle-point_on_axis) * direction) * direction;
+      if (vector_from_axis.norm() <= 1e-10 * middle.norm())
+        points[i] = middle;
+      else
+        points[i] = Point<spacedim>(vector_from_axis / vector_from_axis.norm() * radius +
+                                    ((middle-point_on_axis) * direction) * direction +
+                                    point_on_axis);
+    }
+}
+
+
+
+template <>
+void
+CylinderBoundary<3>::get_intermediate_points_on_quad (
+  const Triangulation<3>::quad_iterator &quad,
+  std::vector<Point<3> > &points) const
+{
+  if (points.size()==1)
+    points[0]=get_new_point_on_quad(quad);
+  else
+    {
+      unsigned int m=static_cast<unsigned int> (std::sqrt(static_cast<double>(points.size())));
+      Assert(points.size()==m*m, ExcInternalError());
+
+      std::vector<Point<3> > lp0(m);
+      std::vector<Point<3> > lp1(m);
+
+      get_intermediate_points_on_line(quad->line(0), lp0);
+      get_intermediate_points_on_line(quad->line(1), lp1);
+
+      std::vector<Point<3> > lps(m);
+      for (unsigned int i=0; i<m; ++i)
+        {
+          get_intermediate_points_between_points(lp0[i], lp1[i], lps);
+
+          for (unsigned int j=0; j<m; ++j)
+            points[i*m+j]=lps[j];
+        }
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void
+CylinderBoundary<dim,spacedim>::get_intermediate_points_on_quad (
+  const typename Triangulation<dim,spacedim>::quad_iterator &,
+  std::vector<Point<spacedim> > &) const
+{
+  Assert (false, ExcImpossibleInDim(dim));
+}
+
+
+
+
+template <>
+void
+CylinderBoundary<1>::
+get_normals_at_vertices (const Triangulation<1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+
+template <int dim, int spacedim>
+void
+CylinderBoundary<dim,spacedim>::
+get_normals_at_vertices (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                         typename Boundary<dim,spacedim>::FaceVertexNormals &face_vertex_normals) const
+{
+  for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_face; ++v)
+    {
+      const Point<spacedim> vertex = face->vertex(v);
+
+      const Tensor<1,spacedim> vector_from_axis = (vertex-point_on_axis) -
+                                                  ((vertex-point_on_axis) * direction) * direction;
+
+      face_vertex_normals[v] = (vector_from_axis / vector_from_axis.norm());
+    }
+}
+
+
+
+template <int dim, int spacedim>
+double
+CylinderBoundary<dim,spacedim>::get_radius () const
+{
+  return radius;
+}
+
+
+//======================================================================//
+
+template<int dim>
+ConeBoundary<dim>::ConeBoundary (const double radius_0,
+                                 const double radius_1,
+                                 const Point<dim> x_0,
+                                 const Point<dim> x_1)
+  :
+  radius_0 (radius_0),
+  radius_1 (radius_1),
+  x_0 (x_0),
+  x_1 (x_1)
+{}
+
+
+
+template<int dim>
+double ConeBoundary<dim>::get_radius (Point<dim> x) const
+{
+  for (unsigned int i = 0; i < dim; ++i)
+    if ((x_1 (i) - x_0 (i)) != 0)
+      return (radius_1 - radius_0) * (x (i) - x_0 (i)) / (x_1 (i) - x_0 (i)) + radius_0;
+
+  return 0;
+}
+
+
+
+template<int dim>
+void
+ConeBoundary<dim>::
+get_intermediate_points_between_points (const Point<dim> &p0,
+                                        const Point<dim> &p1,
+                                        std::vector<Point<dim> > &points) const
+{
+  const unsigned int n = points.size ();
+  const Tensor<1,dim> axis = x_1 - x_0;
+
+  Assert (n > 0, ExcInternalError ());
+
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(n);
+
+  for (unsigned int i=0; i<n; ++i)
+    {
+      const double x = line_points[i+1][0];
+
+      // Compute the current point.
+      const Point<dim> x_i = (1-x)*p0 + x*p1;
+      // To project this point on the boundary of the cone we first compute
+      // the orthogonal projection of this point onto the axis of the cone.
+      const double c = (x_i - x_0) * axis / (axis*axis);
+      const Point<dim> x_ip = x_0 + c * axis;
+      // Compute the projection of the middle point on the boundary of the
+      // cone.
+      points[i] = x_ip + get_radius (x_ip) *  (x_i - x_ip) / (x_i - x_ip).norm ();
+    }
+}
+
+template<int dim>
+Point<dim>
+ConeBoundary<dim>::
+get_new_point_on_line (const typename Triangulation<dim>::line_iterator &line) const
+{
+  const Tensor<1,dim> axis = x_1 - x_0;
+  // Compute the middle point of the line.
+  const Point<dim> middle = StraightBoundary<dim>::get_new_point_on_line (line);
+  // To project it on the boundary of the cone we first compute the orthogonal
+  // projection of the middle point onto the axis of the cone.
+  const double c = (middle - x_0) * axis / (axis*axis);
+  const Point<dim> middle_p = x_0 + c * axis;
+  // Compute the projection of the middle point on the boundary of the cone.
+  return middle_p + get_radius (middle_p) * (middle - middle_p) / (middle - middle_p).norm ();
+}
+
+
+
+template <>
+Point<3>
+ConeBoundary<3>::
+get_new_point_on_quad (const Triangulation<3>::quad_iterator &quad) const
+{
+  const int dim = 3;
+
+  const Tensor<1,dim> axis = x_1 - x_0;
+  // Compute the middle point of the quad.
+  const Point<dim> middle = StraightBoundary<3,3>::get_new_point_on_quad (quad);
+  // Same algorithm as above: To project it on the boundary of the cone we
+  // first compute the orthogonal projection of the middle point onto the axis
+  // of the cone.
+  const double c = (middle - x_0) * axis / (axis*axis);
+  const Point<dim> middle_p = x_0 + c * axis;
+  // Compute the projection of the middle point on the boundary of the cone.
+  return middle_p + get_radius (middle_p) * (middle - middle_p) / (middle - middle_p).norm ();
+}
+
+
+
+template<int dim>
+Point<dim>
+ConeBoundary<dim>::
+get_new_point_on_quad (const typename Triangulation<dim>::quad_iterator &) const
+{
+  Assert (false, ExcImpossibleInDim (dim));
+
+  return Point<dim>();
+}
+
+
+
+template<int dim>
+void
+ConeBoundary<dim>::
+get_intermediate_points_on_line (const typename Triangulation<dim>::line_iterator &line,
+                                 std::vector<Point<dim> > &points) const
+{
+  if (points.size () == 1)
+    points[0] = get_new_point_on_line (line);
+  else
+    get_intermediate_points_between_points (line->vertex (0), line->vertex (1), points);
+}
+
+
+
+
+template<>
+void
+ConeBoundary<3>::
+get_intermediate_points_on_quad (const Triangulation<3>::quad_iterator &quad,
+                                 std::vector<Point<3> > &points) const
+{
+  if (points.size () == 1)
+    points[0] = get_new_point_on_quad (quad);
+  else
+    {
+      unsigned int n = static_cast<unsigned int> (std::sqrt (static_cast<double> (points.size ())));
+
+      Assert (points.size () == n * n, ExcInternalError ());
+
+      std::vector<Point<3> > points_line_0 (n);
+      std::vector<Point<3> > points_line_1 (n);
+
+      get_intermediate_points_on_line (quad->line (0), points_line_0);
+      get_intermediate_points_on_line (quad->line (1), points_line_1);
+
+      std::vector<Point<3> > points_line_segment (n);
+
+      for (unsigned int i = 0; i < n; ++i)
+        {
+          get_intermediate_points_between_points (points_line_0[i],
+                                                  points_line_1[i],
+                                                  points_line_segment);
+
+          for (unsigned int j = 0; j < n; ++j)
+            points[i * n + j] = points_line_segment[j];
+        }
+    }
+}
+
+
+
+template <int dim>
+void
+ConeBoundary<dim>::
+get_intermediate_points_on_quad (const typename Triangulation<dim>::quad_iterator &,
+                                 std::vector<Point<dim> > &) const
+{
+  Assert (false, ExcImpossibleInDim (dim));
+}
+
+
+
+
+template<>
+void
+ConeBoundary<1>::
+get_normals_at_vertices (const Triangulation<1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const
+{
+  Assert (false, ExcImpossibleInDim (1));
+}
+
+
+
+template<int dim>
+void
+ConeBoundary<dim>::
+get_normals_at_vertices (const typename Triangulation<dim>::face_iterator &face,
+                         typename Boundary<dim>::FaceVertexNormals &face_vertex_normals) const
+{
+  const Tensor<1,dim> axis = x_1 - x_0;
+
+  for (unsigned int vertex = 0; vertex < GeometryInfo<dim>::vertices_per_face; ++vertex)
+    {
+      // Compute the orthogonal projection of the vertex onto the axis of the
+      // cone.
+      const double c = (face->vertex (vertex) - x_0) * axis / (axis*axis);
+      const Point<dim> vertex_p = x_0 + c * axis;
+      // Then compute the vector pointing from the point <tt>vertex_p</tt> on
+      // the axis to the vertex.
+      const Tensor<1,dim> axis_to_vertex = face->vertex (vertex) - vertex_p;
+
+      face_vertex_normals[vertex] = axis_to_vertex / axis_to_vertex.norm ();
+    }
+}
+
+
+//======================================================================//
+
+template <int dim, int spacedim>
+HyperBallBoundary<dim,spacedim>::HyperBallBoundary (const Point<spacedim> p,
+                                                    const double     radius)
+  :
+  center(p),
+  radius(radius),
+  compute_radius_automatically(false)
+{}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+HyperBallBoundary<dim,spacedim>::get_new_point_on_line (const typename Triangulation<dim,spacedim>::line_iterator &line) const
+{
+  Point<spacedim> middle = StraightBoundary<dim,spacedim>::get_new_point_on_line (line);
+
+  middle -= center;
+
+  double r=0;
+  if (compute_radius_automatically)
+    r = (line->vertex(0) - center).norm();
+  else
+    r = radius;
+
+  // project to boundary
+  middle *= r / std::sqrt(middle.square());
+  middle += center;
+  return middle;
+}
+
+
+
+template <>
+Point<1>
+HyperBallBoundary<1,1>::
+get_new_point_on_quad (const Triangulation<1,1>::quad_iterator &) const
+{
+  Assert (false, ExcInternalError());
+  return Point<1>();
+}
+
+
+template <>
+Point<2>
+HyperBallBoundary<1,2>::
+get_new_point_on_quad (const Triangulation<1,2>::quad_iterator &) const
+{
+  Assert (false, ExcInternalError());
+  return Point<2>();
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+HyperBallBoundary<dim,spacedim>::
+get_new_point_on_quad (const typename Triangulation<dim,spacedim>::quad_iterator &quad) const
+{
+  Point<spacedim> middle = StraightBoundary<dim,spacedim>::get_new_point_on_quad (quad);
+
+  middle -= center;
+
+  double r=0;
+  if (compute_radius_automatically)
+    r = (quad->vertex(0) - center).norm();
+  else
+    r = radius;
+
+  // project to boundary
+  middle *= r / std::sqrt(middle.square());
+
+  middle += center;
+  return middle;
+}
+
+
+
+template <>
+void
+HyperBallBoundary<1>::get_intermediate_points_on_line (
+  const Triangulation<1>::line_iterator &,
+  std::vector<Point<1> > &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+template <int dim, int spacedim>
+void
+HyperBallBoundary<dim,spacedim>::get_intermediate_points_on_line (
+  const typename Triangulation<dim,spacedim>::line_iterator &line,
+  std::vector<Point<spacedim> > &points) const
+{
+  if (points.size()==1)
+    points[0]=get_new_point_on_line(line);
+  else
+    get_intermediate_points_between_points(line->vertex(0), line->vertex(1), points);
+}
+
+
+
+template <int dim, int spacedim>
+void
+HyperBallBoundary<dim,spacedim>::get_intermediate_points_between_points (
+  const Point<spacedim> &p0, const Point<spacedim> &p1,
+  std::vector<Point<spacedim> > &points) const
+{
+  const unsigned int n=points.size();
+  Assert(n>0, ExcInternalError());
+
+  const Tensor<1,spacedim> v0=p0-center,
+                           v1=p1-center;
+  const double length=(v1-v0).norm();
+
+  double eps=1e-12;
+  (void)eps;
+  double r=0;
+  if (compute_radius_automatically)
+    r = (p0 - center).norm();
+  else
+    r = radius;
+
+  Assert(std::fabs(v0*v0-r*r)<eps*r*r, ExcInternalError());
+  Assert(std::fabs(v1*v1-r*r)<eps*r*r, ExcInternalError());
+
+  const double alpha=std::acos((v0*v1)/std::sqrt((v0*v0)*(v1*v1)));
+  const Tensor<1,spacedim> pm=0.5*(v0+v1);
+
+  const double h=pm.norm();
+
+  // n even:  m=n/2,
+  // n odd:   m=(n-1)/2
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(n);
+  const unsigned int m=n/2;
+  for (unsigned int i=0; i<m ; ++i)
+    {
+      const double beta = alpha * (line_points[i+1][0]-0.5);
+      const double d = h*std::tan(beta);
+      points[i]      = Point<spacedim>(pm+d/length*(v1-v0));
+      points[n-1-i]  = Point<spacedim>(pm-d/length*(v1-v0));
+    }
+
+  if ((n+1)%2==0)
+    // if the number of parts is even insert the midpoint
+    points[(n-1)/2] = Point<spacedim>(pm);
+
+
+  // project the points from the straight line to the HyperBallBoundary
+  for (unsigned int i=0; i<n; ++i)
+    {
+      points[i] *= r / std::sqrt(points[i].square());
+      points[i] += center;
+    }
+}
+
+
+
+template <>
+void
+HyperBallBoundary<3>::get_intermediate_points_on_quad (
+  const Triangulation<3>::quad_iterator &quad,
+  std::vector<Point<3> > &points) const
+{
+  if (points.size()==1)
+    points[0]=get_new_point_on_quad(quad);
+  else
+    {
+      unsigned int m=static_cast<unsigned int> (std::sqrt(static_cast<double>(points.size())));
+      Assert(points.size()==m*m, ExcInternalError());
+
+      std::vector<Point<3> > lp0(m);
+      std::vector<Point<3> > lp1(m);
+
+      get_intermediate_points_on_line(quad->line(0), lp0);
+      get_intermediate_points_on_line(quad->line(1), lp1);
+
+      std::vector<Point<3> > lps(m);
+      for (unsigned int i=0; i<m; ++i)
+        {
+          get_intermediate_points_between_points(lp0[i], lp1[i], lps);
+
+          for (unsigned int j=0; j<m; ++j)
+            points[i*m+j]=lps[j];
+        }
+    }
+}
+
+
+
+template <>
+void
+HyperBallBoundary<2,3>::get_intermediate_points_on_quad (
+  const Triangulation<2,3>::quad_iterator &quad,
+  std::vector<Point<3> > &points) const
+{
+  if (points.size()==1)
+    points[0]=get_new_point_on_quad(quad);
+  else
+    {
+      unsigned int m=static_cast<unsigned int> (std::sqrt(static_cast<double>(points.size())));
+      Assert(points.size()==m*m, ExcInternalError());
+
+      std::vector<Point<3> > lp0(m);
+      std::vector<Point<3> > lp1(m);
+
+      get_intermediate_points_on_line(quad->line(0), lp0);
+      get_intermediate_points_on_line(quad->line(1), lp1);
+
+      std::vector<Point<3> > lps(m);
+      for (unsigned int i=0; i<m; ++i)
+        {
+          get_intermediate_points_between_points(lp0[i], lp1[i], lps);
+
+          for (unsigned int j=0; j<m; ++j)
+            points[i*m+j]=lps[j];
+        }
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void
+HyperBallBoundary<dim,spacedim>::get_intermediate_points_on_quad (
+  const typename Triangulation<dim,spacedim>::quad_iterator &,
+  std::vector<Point<spacedim> > &) const
+{
+  Assert(false, ExcImpossibleInDim(dim));
+}
+
+
+
+template <int dim, int spacedim>
+Tensor<1,spacedim>
+HyperBallBoundary<dim,spacedim>::
+normal_vector (const typename Triangulation<dim,spacedim>::face_iterator &,
+               const Point<spacedim> &p) const
+{
+  const Tensor<1,spacedim> unnormalized_normal = p-center;
+  return unnormalized_normal/unnormalized_normal.norm();
+}
+
+
+
+template <>
+void
+HyperBallBoundary<1>::
+get_normals_at_vertices (const Triangulation<1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+template <>
+void
+HyperBallBoundary<1,2>::
+get_normals_at_vertices (const Triangulation<1,2>::face_iterator &,
+                         Boundary<1,2>::FaceVertexNormals &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+template <int dim, int spacedim>
+void
+HyperBallBoundary<dim,spacedim>::
+get_normals_at_vertices (const typename Triangulation<dim,spacedim>::face_iterator &face,
+                         typename Boundary<dim,spacedim>::FaceVertexNormals &face_vertex_normals) const
+{
+  for (unsigned int vertex=0; vertex<GeometryInfo<dim>::vertices_per_face; ++vertex)
+    face_vertex_normals[vertex] = face->vertex(vertex)-center;
+}
+
+
+
+template <int dim, int spacedim>
+Point<spacedim>
+HyperBallBoundary<dim,spacedim>::get_center () const
+{
+  return center;
+}
+
+
+
+template <int dim, int spacedim>
+double
+HyperBallBoundary<dim,spacedim>::get_radius () const
+{
+  Assert(!compute_radius_automatically, ExcRadiusNotSet());
+  return radius;
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+
+template <int dim>
+HalfHyperBallBoundary<dim>::HalfHyperBallBoundary (const Point<dim> center,
+                                                   const double     radius) :
+  HyperBallBoundary<dim> (center, radius)
+{}
+
+
+
+template <int dim>
+Point<dim>
+HalfHyperBallBoundary<dim>::
+get_new_point_on_line (const typename Triangulation<dim>::line_iterator &line) const
+{
+  // check whether center of object is at x==x_center, since then it belongs
+  // to the plane part of the boundary. however, this is not the case if it is
+  // at the outer perimeter
+  const Point<dim> line_center = line->center();
+  const Point<dim> vertices[2] = { line->vertex(0), line->vertex(1) };
+
+  if ((line_center(0) == this->center(0))
+      &&
+      ((std::fabs(vertices[0].distance(this->center)-this->radius) >
+        1e-5*this->radius)
+       ||
+       (std::fabs(vertices[1].distance(this->center)-this->radius) >
+        1e-5*this->radius)))
+    return line_center;
+  else
+    return HyperBallBoundary<dim>::get_new_point_on_line (line);
+}
+
+
+
+template <>
+Point<1>
+HalfHyperBallBoundary<1>::
+get_new_point_on_quad (const Triangulation<1>::quad_iterator &) const
+{
+  Assert (false, ExcInternalError());
+  return Point<1>();
+}
+
+
+
+template <int dim>
+Point<dim>
+HalfHyperBallBoundary<dim>::
+get_new_point_on_quad (const typename Triangulation<dim>::quad_iterator &quad) const
+{
+  const Point<dim> quad_center = quad->center();
+  if (quad_center(0) == this->center(0))
+    return quad_center;
+  else
+    return HyperBallBoundary<dim>::get_new_point_on_quad (quad);
+}
+
+
+
+template <int dim>
+void
+HalfHyperBallBoundary<dim>::
+get_intermediate_points_on_line (const typename Triangulation<dim>::line_iterator &line,
+                                 std::vector<Point<dim> > &points) const
+{
+  // check whether center of object is at x==0, since then it belongs to the
+  // plane part of the boundary
+  const Point<dim> line_center = line->center();
+  if (line_center(0) == this->center(0))
+    return StraightBoundary<dim>::get_intermediate_points_on_line (line, points);
+  else
+    return HyperBallBoundary<dim>::get_intermediate_points_on_line (line, points);
+}
+
+
+
+template <int dim>
+void
+HalfHyperBallBoundary<dim>::
+get_intermediate_points_on_quad (const typename Triangulation<dim>::quad_iterator &quad,
+                                 std::vector<Point<dim> > &points) const
+{
+  if (points.size()==1)
+    points[0]=get_new_point_on_quad(quad);
+  else
+    {
+      // check whether center of object is at x==0, since then it belongs to
+      // the plane part of the boundary
+      const Point<dim> quad_center = quad->center();
+      if (quad_center(0) == this->center(0))
+        StraightBoundary<dim>::get_intermediate_points_on_quad (quad, points);
+      else
+        HyperBallBoundary<dim>::get_intermediate_points_on_quad (quad, points);
+    }
+}
+
+
+
+template <>
+void
+HalfHyperBallBoundary<1>::
+get_intermediate_points_on_quad (const Triangulation<1>::quad_iterator &,
+                                 std::vector<Point<1> > &) const
+{
+  Assert (false, ExcInternalError());
+}
+
+
+
+template <>
+void
+HalfHyperBallBoundary<1>::
+get_normals_at_vertices (const Triangulation<1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+template <int dim>
+void
+HalfHyperBallBoundary<dim>::
+get_normals_at_vertices (const typename Triangulation<dim>::face_iterator &face,
+                         typename Boundary<dim>::FaceVertexNormals &face_vertex_normals) const
+{
+  // check whether center of object is at x==0, since then it belongs to the
+  // plane part of the boundary
+  const Point<dim> quad_center = face->center();
+  if (quad_center(0) == this->center(0))
+    StraightBoundary<dim>::get_normals_at_vertices (face, face_vertex_normals);
+  else
+    HyperBallBoundary<dim>::get_normals_at_vertices (face, face_vertex_normals);
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+
+
+template <int dim>
+HyperShellBoundary<dim>::HyperShellBoundary (const Point<dim> &center)
+  :
+  HyperBallBoundary<dim>(center, 0.)
+{
+  this->compute_radius_automatically=true;
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+
+
+
+template <int dim>
+HalfHyperShellBoundary<dim>::HalfHyperShellBoundary (const Point<dim> &center,
+                                                     const double inner_radius,
+                                                     const double outer_radius)
+  :
+  HyperShellBoundary<dim> (center),
+  inner_radius (inner_radius),
+  outer_radius (outer_radius)
+{
+  if (dim > 2)
+    Assert ((inner_radius >= 0) &&
+            (outer_radius > 0) &&
+            (outer_radius > inner_radius),
+            ExcMessage ("Inner and outer radii must be specified explicitly in 3d."));
+}
+
+
+
+template <int dim>
+Point<dim>
+HalfHyperShellBoundary<dim>::
+get_new_point_on_line (const typename Triangulation<dim>::line_iterator &line) const
+{
+  switch (dim)
+    {
+    // in 2d, first check whether the two end points of the line are on the
+    // axis of symmetry. if so, then return the mid point
+    case 2:
+    {
+      if ((line->vertex(0)(0) == this->center(0))
+          &&
+          (line->vertex(1)(0) == this->center(0)))
+        return (line->vertex(0) + line->vertex(1))/2;
+      else
+        // otherwise we are on the outer or inner part of the shell. proceed
+        // as in the base class
+        return HyperShellBoundary<dim>::get_new_point_on_line (line);
+    }
+
+    // in 3d, a line is a straight line if it is on the symmetry plane and if
+    // not both of its end points are on either the inner or outer sphere
+    case 3:
+    {
+
+      if (((line->vertex(0)(0) == this->center(0))
+           &&
+           (line->vertex(1)(0) == this->center(0)))
+          &&
+          !(((std::fabs (line->vertex(0).distance (this->center)
+                         - inner_radius) < 1e-12 * outer_radius)
+             &&
+             (std::fabs (line->vertex(1).distance (this->center)
+                         - inner_radius) < 1e-12 * outer_radius))
+            ||
+            ((std::fabs (line->vertex(0).distance (this->center)
+                         - outer_radius) < 1e-12 * outer_radius)
+             &&
+             (std::fabs (line->vertex(1).distance (this->center)
+                         - outer_radius) < 1e-12 * outer_radius))))
+        return (line->vertex(0) + line->vertex(1))/2;
+      else
+        // otherwise we are on the outer or inner part of the shell. proceed
+        // as in the base class
+        return HyperShellBoundary<dim>::get_new_point_on_line (line);
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+
+  return Point<dim>();
+}
+
+
+
+template <>
+Point<1>
+HalfHyperShellBoundary<1>::
+get_new_point_on_quad (const Triangulation<1>::quad_iterator &) const
+{
+  Assert (false, ExcInternalError());
+  return Point<1>();
+}
+
+
+
+
+template <int dim>
+Point<dim>
+HalfHyperShellBoundary<dim>::
+get_new_point_on_quad (const typename Triangulation<dim>::quad_iterator &quad) const
+{
+  // if this quad is on the symmetry plane, take the center point and project
+  // it outward to the same radius as the centers of the two radial lines
+  if ((quad->vertex(0)(0) == this->center(0)) &&
+      (quad->vertex(1)(0) == this->center(0)) &&
+      (quad->vertex(2)(0) == this->center(0)) &&
+      (quad->vertex(3)(0) == this->center(0)))
+    {
+      const Point<dim> quad_center = (quad->vertex(0) + quad->vertex(1) +
+                                      quad->vertex(2) + quad->vertex(3)   )/4;
+      const Tensor<1,dim> quad_center_offset = quad_center - this->center;
+
+
+      if (std::fabs (quad->line(0)->center().distance(this->center) -
+                     quad->line(1)->center().distance(this->center))
+          < 1e-12 * outer_radius)
+        {
+          // lines 0 and 1 are radial
+          const double needed_radius
+            = quad->line(0)->center().distance(this->center);
+
+          return (this->center +
+                  quad_center_offset/quad_center_offset.norm() * needed_radius);
+        }
+      else if (std::fabs (quad->line(2)->center().distance(this->center) -
+                          quad->line(3)->center().distance(this->center))
+               < 1e-12 * outer_radius)
+        {
+          // lines 2 and 3 are radial
+          const double needed_radius
+            = quad->line(2)->center().distance(this->center);
+
+          return (this->center +
+                  quad_center_offset/quad_center_offset.norm() * needed_radius);
+        }
+      else
+        Assert (false, ExcInternalError());
+    }
+
+  // otherwise we are on the outer or inner part of the shell. proceed as in
+  // the base class
+  return HyperShellBoundary<dim>::get_new_point_on_quad (quad);
+}
+
+
+
+template <int dim>
+void
+HalfHyperShellBoundary<dim>::
+get_intermediate_points_on_line (const typename Triangulation<dim>::line_iterator &line,
+                                 std::vector<Point<dim> > &points) const
+{
+  switch (dim)
+    {
+    // in 2d, first check whether the two end points of the line are on the
+    // axis of symmetry. if so, then return the mid point
+    case 2:
+    {
+      if ((line->vertex(0)(0) == this->center(0))
+          &&
+          (line->vertex(1)(0) == this->center(0)))
+        StraightBoundary<dim>::get_intermediate_points_on_line (line, points);
+      else
+        // otherwise we are on the outer or inner part of the shell. proceed
+        // as in the base class
+        HyperShellBoundary<dim>::get_intermediate_points_on_line (line, points);
+      break;
+    }
+
+    // in 3d, a line is a straight line if it is on the symmetry plane and if
+    // not both of its end points are on either the inner or outer sphere
+    case 3:
+    {
+      if (((line->vertex(0)(0) == this->center(0))
+           &&
+           (line->vertex(1)(0) == this->center(0)))
+          &&
+          !(((std::fabs (line->vertex(0).distance (this->center)
+                         - inner_radius) < 1e-12 * outer_radius)
+             &&
+             (std::fabs (line->vertex(1).distance (this->center)
+                         - inner_radius) < 1e-12 * outer_radius))
+            ||
+            ((std::fabs (line->vertex(0).distance (this->center)
+                         - outer_radius) < 1e-12 * outer_radius)
+             &&
+             (std::fabs (line->vertex(1).distance (this->center)
+                         - outer_radius) < 1e-12 * outer_radius))))
+        StraightBoundary<dim>::get_intermediate_points_on_line (line, points);
+      else
+        // otherwise we are on the outer or inner part of the shell. proceed
+        // as in the base class
+        HyperShellBoundary<dim>::get_intermediate_points_on_line (line, points);
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcNotImplemented());
+    }
+}
+
+
+
+template <int dim>
+void
+HalfHyperShellBoundary<dim>::
+get_intermediate_points_on_quad (const typename Triangulation<dim>::quad_iterator &quad,
+                                 std::vector<Point<dim> > &points) const
+{
+  Assert (dim < 3, ExcNotImplemented());
+
+  // check whether center of object is at x==0, since then it belongs to the
+  // plane part of the boundary
+  const Point<dim> quad_center = quad->center();
+  if (quad_center(0) == this->center(0))
+    StraightBoundary<dim>::get_intermediate_points_on_quad (quad, points);
+  else
+    HyperShellBoundary<dim>::get_intermediate_points_on_quad (quad, points);
+}
+
+
+
+template <>
+void
+HalfHyperShellBoundary<1>::
+get_intermediate_points_on_quad (const Triangulation<1>::quad_iterator &,
+                                 std::vector<Point<1> > &) const
+{
+  Assert (false, ExcInternalError());
+}
+
+
+
+template <>
+void
+HalfHyperShellBoundary<1>::
+get_normals_at_vertices (const Triangulation<1>::face_iterator &,
+                         Boundary<1,1>::FaceVertexNormals &) const
+{
+  Assert (false, ExcImpossibleInDim(1));
+}
+
+
+
+
+
+template <int dim>
+void
+HalfHyperShellBoundary<dim>::
+get_normals_at_vertices (const typename Triangulation<dim>::face_iterator &face,
+                         typename Boundary<dim>::FaceVertexNormals &face_vertex_normals) const
+{
+  if (face->center()(0) == this->center(0))
+    StraightBoundary<dim>::get_normals_at_vertices (face, face_vertex_normals);
+  else
+    HyperShellBoundary<dim>::get_normals_at_vertices (face, face_vertex_normals);
+}
+
+
+
+
+template <int dim, int spacedim>
+TorusBoundary<dim,spacedim>::TorusBoundary (const double R__,
+                                            const double r__)
+  :
+  R(R__),
+  r(r__)
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+
+template <>
+TorusBoundary<2,3>::TorusBoundary (const double R__,
+                                   const double r__)
+  :
+  R(R__),
+  r(r__)
+{
+  Assert (R>r, ExcMessage("Outer radius must be greater than inner radius."));
+}
+
+
+
+template <int dim, int spacedim>
+double
+TorusBoundary<dim,spacedim>::get_correct_angle(const double angle,
+                                               const double x,
+                                               const double y) const
+{
+  if (y>=0)
+    {
+      if (x >=0)
+        return angle;
+
+      return numbers::PI-angle;
+    }
+
+  if (x <=0)
+    return numbers::PI+angle;
+
+  return 2.0*numbers::PI-angle;
+}
+
+
+
+template <>
+Point<3>
+TorusBoundary<2,3>::get_real_coord (const Point<2> &surfP) const
+{
+  const double theta=surfP(0);
+  const double phi=surfP(1);
+
+  return Point<3> ((R+r*std::cos(phi))*std::cos(theta),
+                   r*std::sin(phi),
+                   (R+r*std::cos(phi))*std::sin(theta));
+}
+
+
+
+template <>
+Point<2>
+TorusBoundary<2,3>::get_surf_coord(const Point<3> &p) const
+{
+  const double phi=std::asin(std::abs(p(1))/r);
+  const double Rr_2=p(0)*p(0)+p(2)*p(2);
+
+  Point<2> surfP;
+  surfP(1)=get_correct_angle(phi,Rr_2-R*R,p(1));//phi
+
+  if (std::abs(p(0))<1.E-5)
+    {
+      if (p(2)>=0)
+        surfP(0) =  numbers::PI*0.5;
+      else
+        surfP(0) = -numbers::PI*0.5;
+    }
+  else
+    {
+      const double theta = std::atan(std::abs(p(2)/p(0)));
+      surfP(0)=  get_correct_angle(theta,p(0),p(2));
+    }
+
+  return surfP;
+}
+
+
+
+template <>
+Point<3>
+TorusBoundary<2,3>::get_new_point_on_line (const Triangulation<2,3>::line_iterator &line) const
+{
+  //Just get the average
+  Point<2>  p0=get_surf_coord(line->vertex(0));
+  Point<2>  p1=get_surf_coord(line->vertex(1));
+
+  Point<2>  middle(0,0);
+
+  //Take care for periodic conditions, For instance phi0= 0, phi1= 3/2*Pi
+  //middle has to be 7/4*Pi not 3/4*Pi. This also works for -Pi/2 + Pi, middle
+  //is 5/4*Pi
+  for (unsigned int i=0; i<2; i++)
+    if (std::abs(p0(i)-p1(i))> numbers::PI)
+      middle(i)=2*numbers::PI;
+
+  middle+=  p0 + p1;
+  middle*=0.5;
+
+  Point<3> midReal=get_real_coord(middle);
+  return midReal;
+}
+
+
+
+template <>
+Point<3>
+TorusBoundary<2,3>::get_new_point_on_quad (const Triangulation<2,3>::quad_iterator &quad) const
+{
+  //Just get the average
+  Point<2> p[4];
+
+  for (unsigned int i=0; i<4; i++)
+    p[i]=get_surf_coord(quad->vertex(i));
+
+  Point<2>  middle(0,0);
+
+  //Take care for periodic conditions, see get_new_point_on_line() above
+  //For instance phi0= 0, phi1= 3/2*Pi  middle has to be 7/4*Pi not 3/4*Pi
+  //This also works for -Pi/2 + Pi + Pi- Pi/2, middle is 5/4*Pi
+  for (unsigned int i=0; i<2; i++)
+    for (unsigned int j=1; j<4; j++)
+      {
+        if (std::abs(p[0](i)-p[j](i))> numbers::PI)
+          {
+            middle(i)+=2*numbers::PI;
+          }
+      }
+
+  for (unsigned int i=0; i<4; i++)
+    middle+=p[i];
+
+  middle*= 0.25;
+
+  return get_real_coord(middle);
+}
+
+
+
+//Normal field without unit length
+template <>
+Point<3>
+TorusBoundary<2,3>:: get_surf_norm_from_sp(const Point<2> &surfP) const
+{
+
+  Point<3> n;
+  double theta=surfP[0];
+  double phi=surfP[1];
+
+  double f=R+r*std::cos(phi);
+
+  n[0]=r*std::cos(phi)*std::cos(theta)*f;
+  n[1]=r*std::sin(phi)*f;
+  n[2]=r*std::sin(theta)*std::cos(phi)*f;
+
+  return n;
+}
+
+
+
+//Normal field without unit length
+template <>
+Point<3>
+TorusBoundary<2,3>::get_surf_norm(const Point<3> &p) const
+{
+
+  Point<2> surfP=get_surf_coord(p);
+  return get_surf_norm_from_sp(surfP);
+
+}
+
+
+
+template<>
+void
+TorusBoundary<2,3>::
+get_intermediate_points_on_line (const Triangulation<2, 3>::line_iterator   &line,
+                                 std::vector< Point< 3 > > &points) const
+{
+  //Almost the same implementation as StraightBoundary<2,3>
+  unsigned int npoints=points.size();
+  if (npoints==0) return;
+
+  Point<2> p[2];
+
+  for (unsigned int i=0; i<2; i++)
+    p[i]=get_surf_coord(line->vertex(i));
+
+  unsigned int offset[2];
+  offset[0]=0;
+  offset[1]=0;
+
+  //Take care for periodic conditions & negative angles, see
+  //get_new_point_on_line() above. Because we dont have a symmetric
+  //interpolation (just the middle) we need to add 2*Pi to each almost zero
+  //and negative angles.
+  for (unsigned int i=0; i<2; i++)
+    for (unsigned int j=1; j<2; j++)
+      {
+        if (std::abs(p[0](i)-p[j](i))> numbers::PI)
+          {
+            offset[i]++;
+            break;
+          }
+      }
+
+  for (unsigned int i=0; i<2; i++)
+    for (unsigned int j=0; j<2; j++)
+      if (p[j](i)<1.E-12 ) //Take care for periodic conditions & negative angles
+        p[j](i)+=2*numbers::PI*offset[i];
+
+
+  Point<2>  target;
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(npoints);
+  for (unsigned int i=0; i<npoints; i++)
+    {
+      const double x = line_points[i+1][0];
+      target=  (1-x)*p[0] + x*p[1];
+      points[i]=get_real_coord(target);
+    }
+}
+
+
+
+template<>
+void
+TorusBoundary<2,3>::
+get_intermediate_points_on_quad (const Triangulation< 2, 3 >::quad_iterator &quad,
+                                 std::vector< Point< 3 > > &points )const
+{
+  //Almost the same implementation as  StraightBoundary<2,3>
+  const unsigned int n=points.size(),
+                     m=static_cast<unsigned int>(std::sqrt(static_cast<double>(n)));
+  // is n a square number
+  Assert(m*m==n, ExcInternalError());
+
+  Point<2>  p[4];
+
+  for (unsigned int i=0; i<4; i++)
+    p[i]=get_surf_coord(quad->vertex(i));
+
+  Point<2>  target;
+  unsigned int offset[2];
+  offset[0]=0;
+  offset[1]=0;
+
+  //Take care for periodic conditions & negative angles, see
+  //get_new_point_on_line() above.  Because we dont have a symmetric
+  //interpolation (just the middle) we need to add 2*Pi to each almost zero
+  //and negative angles.
+  for (unsigned int i=0; i<2; i++)
+    for (unsigned int j=1; j<4; j++)
+      {
+        if (std::abs(p[0](i)-p[j](i))> numbers::PI)
+          {
+            offset[i]++;
+            break;
+          }
+      }
+
+  for (unsigned int i=0; i<2; i++)
+    for (unsigned int j=0; j<4; j++)
+      if (p[j](i)<1.E-12 ) //Take care for periodic conditions & negative angles
+        p[j](i)+=2*numbers::PI*offset[i];
+
+  const std::vector<Point<1> > &line_points = this->get_line_support_points(m);
+  for (unsigned int i=0; i<m; ++i)
+    {
+      const double y=line_points[i+1][0];
+      for (unsigned int j=0; j<m; ++j)
+        {
+          const double x=line_points[j+1][0];
+          target=((1-x) * p[0] +
+                  x     * p[1]) * (1-y) +
+                 ((1-x) * p[2] +
+                  x     * p[3]) * y;
+
+          points[i*m+j]=get_real_coord(target);
+        }
+    }
+}
+
+
+
+template<>
+void
+TorusBoundary<2,3>::
+get_normals_at_vertices (const Triangulation<2,3 >::face_iterator &face,
+                         Boundary<2,3>::FaceVertexNormals &face_vertex_normals) const
+{
+  for (unsigned int i=0; i<GeometryInfo<2>::vertices_per_face; i++)
+    face_vertex_normals[i]=get_surf_norm(face->vertex(i));
+}
+
+
+
+// explicit instantiations
+#include "tria_boundary_lib.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/grid/tria_boundary_lib.inst.in b/source/grid/tria_boundary_lib.inst.in
new file mode 100644
index 0000000..6951701
--- /dev/null
+++ b/source/grid/tria_boundary_lib.inst.in
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template class CylinderBoundary<deal_II_dimension>;
+    template class ConeBoundary<deal_II_dimension>;
+    template class HyperBallBoundary<deal_II_dimension>;
+    template class HalfHyperBallBoundary<deal_II_dimension>;
+    template class HyperShellBoundary<deal_II_dimension>;
+    template class HalfHyperShellBoundary<deal_II_dimension>;
+
+#if deal_II_dimension != 3
+    template class HyperBallBoundary<deal_II_dimension,deal_II_dimension+1>;
+#endif
+#if deal_II_dimension == 3
+   template class CylinderBoundary<deal_II_dimension-1,deal_II_dimension>;
+#endif
+  }
+
+
diff --git a/source/grid/tria_faces.cc b/source/grid/tria_faces.cc
new file mode 100644
index 0000000..fef213c
--- /dev/null
+++ b/source/grid/tria_faces.cc
@@ -0,0 +1,52 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/grid/tria_faces.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+
+    std::size_t
+    TriaFaces<1>::memory_consumption () const
+    {
+      return 0;
+    }
+
+
+    std::size_t
+    TriaFaces<2>::memory_consumption () const
+    {
+      return MemoryConsumption::memory_consumption (lines);
+    }
+
+
+    std::size_t
+    TriaFaces<3>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (quads) +
+              MemoryConsumption::memory_consumption (lines) );
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/tria_levels.cc b/source/grid/tria_levels.cc
new file mode 100644
index 0000000..2f843d6
--- /dev/null
+++ b/source/grid/tria_levels.cc
@@ -0,0 +1,211 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/grid/tria_levels.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    template <int dim>
+    void
+    TriaLevel<dim>::reserve_space (const unsigned int total_cells,
+                                   const unsigned int dimension,
+                                   const unsigned int space_dimension)
+    {
+      // we need space for total_cells cells. Maybe we have more already
+      // with those cells which are unused, so only allocate new space if
+      // needed.
+      //
+      // note that all arrays should have equal sizes (checked by
+      // @p{monitor_memory}
+      if (total_cells > refine_flags.size())
+        {
+          refine_flags.reserve (total_cells);
+          refine_flags.insert (refine_flags.end(),
+                               total_cells - refine_flags.size(),
+                               RefinementCase<dim>::no_refinement);
+
+          coarsen_flags.reserve (total_cells);
+          coarsen_flags.insert (coarsen_flags.end(),
+                                total_cells - coarsen_flags.size(),
+                                false);
+
+          active_cell_indices.reserve (total_cells);
+          active_cell_indices.insert (active_cell_indices.end(),
+                                      total_cells - active_cell_indices.size(),
+                                      numbers::invalid_unsigned_int);
+
+          subdomain_ids.reserve (total_cells);
+          subdomain_ids.insert (subdomain_ids.end(),
+                                total_cells - subdomain_ids.size(),
+                                0);
+
+          level_subdomain_ids.reserve (total_cells);
+          level_subdomain_ids.insert (level_subdomain_ids.end(),
+                                      total_cells - level_subdomain_ids.size(),
+                                      0);
+
+          if (dimension < space_dimension)
+            {
+              direction_flags.reserve (total_cells);
+              direction_flags.insert (direction_flags.end(),
+                                      total_cells - direction_flags.size(),
+                                      true);
+            }
+          else
+            direction_flags.clear ();
+
+          parents.reserve ((int) (total_cells + 1) / 2);
+          parents.insert (parents.end (),
+                          (total_cells + 1) / 2 - parents.size (),
+                          -1);
+
+          neighbors.reserve (total_cells*(2*dimension));
+          neighbors.insert (neighbors.end(),
+                            total_cells*(2*dimension) - neighbors.size(),
+                            std::make_pair(-1,-1));
+        };
+    }
+
+
+    template <int dim>
+    void
+    TriaLevel<dim>::monitor_memory (const unsigned int true_dimension) const
+    {
+      (void)true_dimension;
+      Assert (2*true_dimension*refine_flags.size() == neighbors.size(),
+              ExcMemoryInexact (refine_flags.size(), neighbors.size()));
+      Assert (2*true_dimension*coarsen_flags.size() == neighbors.size(),
+              ExcMemoryInexact (coarsen_flags.size(), neighbors.size()));
+    }
+
+
+    template <int dim>
+    std::size_t
+    TriaLevel<dim>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (refine_flags) +
+              MemoryConsumption::memory_consumption (coarsen_flags) +
+              MemoryConsumption::memory_consumption (active_cell_indices) +
+              MemoryConsumption::memory_consumption (neighbors) +
+              MemoryConsumption::memory_consumption (subdomain_ids) +
+              MemoryConsumption::memory_consumption (level_subdomain_ids) +
+              MemoryConsumption::memory_consumption (parents) +
+              MemoryConsumption::memory_consumption (direction_flags) +
+              MemoryConsumption::memory_consumption (cells));
+    }
+
+// This specialization should be only temporary, until the TriaObjects
+// classes are straightened out.
+
+    void
+    TriaLevel<3>::reserve_space (const unsigned int total_cells,
+                                 const unsigned int dimension,
+                                 const unsigned int space_dimension)
+    {
+      // we need space for total_cells
+      // cells. Maybe we have more already
+      // with those cells which are unused,
+      // so only allocate new space if needed.
+      //
+      // note that all arrays should have equal
+      // sizes (checked by @p{monitor_memory}
+      if (total_cells > refine_flags.size())
+        {
+          refine_flags.reserve (total_cells);
+          refine_flags.insert (refine_flags.end(),
+                               total_cells - refine_flags.size(),
+                               RefinementCase<3>::no_refinement);
+
+          coarsen_flags.reserve (total_cells);
+          coarsen_flags.insert (coarsen_flags.end(),
+                                total_cells - coarsen_flags.size(),
+                                false);
+
+          active_cell_indices.reserve (total_cells);
+          active_cell_indices.insert (active_cell_indices.end(),
+                                      total_cells - active_cell_indices.size(),
+                                      numbers::invalid_unsigned_int);
+
+          subdomain_ids.reserve (total_cells);
+          subdomain_ids.insert (subdomain_ids.end(),
+                                total_cells - subdomain_ids.size(),
+                                0);
+
+          level_subdomain_ids.reserve (total_cells);
+          level_subdomain_ids.insert (level_subdomain_ids.end(),
+                                      total_cells - level_subdomain_ids.size(),
+                                      0);
+
+          if (dimension < space_dimension)
+            {
+              direction_flags.reserve (total_cells);
+              direction_flags.insert (direction_flags.end(),
+                                      total_cells - direction_flags.size(),
+                                      true);
+            }
+          else
+            direction_flags.clear ();
+
+          parents.reserve ((int) (total_cells + 1) / 2);
+          parents.insert (parents.end (),
+                          (total_cells + 1) / 2 - parents.size (),
+                          -1);
+
+          neighbors.reserve (total_cells*(2*dimension));
+          neighbors.insert (neighbors.end(),
+                            total_cells*(2*dimension) - neighbors.size(),
+                            std::make_pair(-1,-1));
+        };
+    }
+
+
+    void
+    TriaLevel<3>::monitor_memory (const unsigned int true_dimension) const
+    {
+      (void)true_dimension;
+      Assert (2*true_dimension*refine_flags.size() == neighbors.size(),
+              ExcMemoryInexact (refine_flags.size(), neighbors.size()));
+      Assert (2*true_dimension*coarsen_flags.size() == neighbors.size(),
+              ExcMemoryInexact (coarsen_flags.size(), neighbors.size()));
+    }
+
+
+    std::size_t
+    TriaLevel<3>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (refine_flags) +
+              MemoryConsumption::memory_consumption (coarsen_flags) +
+              MemoryConsumption::memory_consumption (active_cell_indices) +
+              MemoryConsumption::memory_consumption (neighbors) +
+              MemoryConsumption::memory_consumption (subdomain_ids) +
+              MemoryConsumption::memory_consumption (parents) +
+              MemoryConsumption::memory_consumption (direction_flags) +
+              MemoryConsumption::memory_consumption (cells));
+    }
+  }
+}
+
+
+template class internal::Triangulation::TriaLevel<1>;
+template class internal::Triangulation::TriaLevel<2>;
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/tria_objects.cc b/source/grid/tria_objects.cc
new file mode 100644
index 0000000..0beb8f7
--- /dev/null
+++ b/source/grid/tria_objects.cc
@@ -0,0 +1,472 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/grid/tria_objects.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_accessor.h>
+
+#include <algorithm>
+#include <functional>
+
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace Triangulation
+  {
+    template<class G>
+    void
+    TriaObjects<G>::reserve_space (const unsigned int new_objects_in_pairs,
+                                   const unsigned int new_objects_single)
+    {
+      Assert(new_objects_in_pairs%2==0, ExcInternalError());
+
+      next_free_single=0;
+      next_free_pair=0;
+      reverse_order_next_free_single=false;
+
+      // count the number of objects, of unused single objects and of
+      // unused pairs of objects
+      unsigned int n_objects=0;
+      unsigned int n_unused_pairs=0;
+      unsigned int n_unused_singles=0;
+      for (unsigned int i=0; i<used.size(); ++i)
+        {
+          if (used[i])
+            ++n_objects;
+          else if (i+1<used.size())
+            {
+              if (used[i+1])
+                {
+                  ++n_unused_singles;
+                  if (next_free_single==0)
+                    next_free_single=i;
+                }
+              else
+                {
+                  ++n_unused_pairs;
+                  if (next_free_pair==0)
+                    next_free_pair=i;
+                  ++i;
+                }
+            }
+          else
+            ++n_unused_singles;
+        }
+      Assert(n_objects+2*n_unused_pairs+n_unused_singles==used.size(),
+             ExcInternalError());
+
+      // how many single objects are needed in addition to
+      // n_unused_objects?
+      const int additional_single_objects=
+        new_objects_single-n_unused_singles;
+
+      unsigned int new_size=
+        used.size() + new_objects_in_pairs - 2*n_unused_pairs;
+      if (additional_single_objects>0)
+        new_size+=additional_single_objects;
+
+      // only allocate space if necessary
+      if (new_size>cells.size())
+        {
+          cells.reserve (new_size);
+          cells.insert (cells.end(),
+                        new_size-cells.size(),
+                        G ());
+
+          used.reserve (new_size);
+          used.insert (used.end(),
+                       new_size-used.size(),
+                       false);
+
+          user_flags.reserve (new_size);
+          user_flags.insert (user_flags.end(),
+                             new_size-user_flags.size(),
+                             false);
+
+          const unsigned int factor = GeometryInfo<G::dimension>::max_children_per_cell / 2;
+          children.reserve (factor*new_size);
+          children.insert (children.end(),
+                           factor*new_size-children.size(),
+                           -1);
+
+          if (G::dimension > 1)
+            {
+              refinement_cases.reserve (new_size);
+              refinement_cases.insert (refinement_cases.end(),
+                                       new_size - refinement_cases.size(),
+                                       RefinementCase<G::dimension>::no_refinement);
+            }
+
+          // first reserve, then resize. Otherwise the std library can decide to allocate
+          // more entries.
+          boundary_or_material_id.reserve (new_size);
+          boundary_or_material_id.resize (new_size);
+
+          user_data.reserve (new_size);
+          user_data.resize (new_size);
+
+          manifold_id.reserve (new_size);
+          manifold_id.insert (manifold_id.end(),
+                              new_size-manifold_id.size(),
+                              numbers::flat_manifold_id);
+
+        }
+
+      if (n_unused_singles==0)
+        {
+          next_free_single=new_size-1;
+          reverse_order_next_free_single=true;
+        }
+    }
+
+
+    template <>
+    template <int dim, int spacedim>
+    typename dealii::Triangulation<dim,spacedim>::raw_hex_iterator
+    TriaObjects<TriaObject<3> >::next_free_hex (const dealii::Triangulation<dim,spacedim> &tria,
+                                                const unsigned int               level)
+    {
+      // TODO: Think of a way to ensure that we are using the correct triangulation, i.e. the one containing *this.
+
+      int pos=next_free_pair,
+          last=used.size()-1;
+      for (; pos<last; ++pos)
+        if (!used[pos])
+          {
+            // this should be a pair slot
+            Assert(!used[pos+1], ExcInternalError());
+            break;
+          }
+      if (pos>=last)
+        // no free slot
+        return tria.end_hex();
+      else
+        next_free_pair=pos+2;
+
+      return typename dealii::Triangulation<dim,spacedim>::raw_hex_iterator(&tria,level,pos);
+    }
+
+
+    void
+    TriaObjectsHex::reserve_space (const unsigned int new_hexes)
+    {
+      const unsigned int new_size = new_hexes +
+                                    std::count_if (used.begin(),
+                                                   used.end(),
+                                                   std::bind2nd (std::equal_to<bool>(), true));
+
+      // see above...
+      if (new_size>cells.size())
+        {
+          cells.reserve (new_size);
+          cells.insert (cells.end(),
+                        new_size-cells.size(),
+                        TriaObject<3> ());
+
+          used.reserve (new_size);
+          used.insert (used.end(),
+                       new_size-used.size(),
+                       false);
+
+          user_flags.reserve (new_size);
+          user_flags.insert (user_flags.end(),
+                             new_size-user_flags.size(),
+                             false);
+
+          children.reserve (4*new_size);
+          children.insert (children.end(),
+                           4*new_size-children.size(),
+                           -1);
+
+          // for the following two fields, we know exactly how many elements
+          // we need, so first reserve then resize (resize itself, at least
+          // with some compiler libraries, appears to round up the size it
+          // actually reserves)
+          boundary_or_material_id.reserve (new_size);
+          boundary_or_material_id.resize (new_size);
+
+          manifold_id.reserve (new_size);
+          manifold_id.insert (manifold_id.end(),
+                              new_size-manifold_id.size(),
+                              numbers::flat_manifold_id);
+
+          user_data.reserve (new_size);
+          user_data.resize (new_size);
+
+          face_orientations.reserve (new_size * GeometryInfo<3>::faces_per_cell);
+          face_orientations.insert (face_orientations.end(),
+                                    new_size * GeometryInfo<3>::faces_per_cell
+                                    - face_orientations.size(),
+                                    true);
+
+          refinement_cases.reserve (new_size);
+          refinement_cases.insert (refinement_cases.end(),
+                                   new_size-refinement_cases.size(),
+                                   RefinementCase<3>::no_refinement);
+
+          face_flips.reserve (new_size * GeometryInfo<3>::faces_per_cell);
+          face_flips.insert (face_flips.end(),
+                             new_size * GeometryInfo<3>::faces_per_cell
+                             - face_flips.size(),
+                             false);
+          face_rotations.reserve (new_size * GeometryInfo<3>::faces_per_cell);
+          face_rotations.insert (face_rotations.end(),
+                                 new_size * GeometryInfo<3>::faces_per_cell
+                                 - face_rotations.size(),
+                                 false);
+        }
+      next_free_single=next_free_pair=0;
+    }
+
+
+    void
+    TriaObjectsQuad3D::reserve_space (const unsigned int new_quads_in_pairs,
+                                      const unsigned int new_quads_single)
+    {
+      Assert(new_quads_in_pairs%2==0, ExcInternalError());
+
+      next_free_single=0;
+      next_free_pair=0;
+      reverse_order_next_free_single=false;
+
+      // count the number of objects, of unused single objects and of
+      // unused pairs of objects
+      unsigned int n_quads=0;
+      unsigned int n_unused_pairs=0;
+      unsigned int n_unused_singles=0;
+      for (unsigned int i=0; i<used.size(); ++i)
+        {
+          if (used[i])
+            ++n_quads;
+          else if (i+1<used.size())
+            {
+              if (used[i+1])
+                {
+                  ++n_unused_singles;
+                  if (next_free_single==0)
+                    next_free_single=i;
+                }
+              else
+                {
+                  ++n_unused_pairs;
+                  if (next_free_pair==0)
+                    next_free_pair=i;
+                  ++i;
+                }
+            }
+          else
+            ++n_unused_singles;
+        }
+      Assert(n_quads+2*n_unused_pairs+n_unused_singles==used.size(),
+             ExcInternalError());
+
+      // how many single quads are needed in addition to n_unused_quads?
+      const int additional_single_quads=
+        new_quads_single-n_unused_singles;
+
+      unsigned int new_size=
+        used.size() + new_quads_in_pairs - 2*n_unused_pairs;
+      if (additional_single_quads>0)
+        new_size+=additional_single_quads;
+
+      // see above...
+      if (new_size>cells.size())
+        {
+          // reseve space for the base class
+          TriaObjects<TriaObject<2> >::reserve_space(new_quads_in_pairs,new_quads_single);
+          // reserve the field of the derived class
+          line_orientations.reserve (new_size * GeometryInfo<2>::lines_per_cell);
+          line_orientations.insert (line_orientations.end(),
+                                    new_size * GeometryInfo<2>::lines_per_cell
+                                    - line_orientations.size(),
+                                    true);
+        }
+
+      if (n_unused_singles==0)
+        {
+          next_free_single=new_size-1;
+          reverse_order_next_free_single=true;
+        }
+    }
+
+
+    template<>
+    void
+    TriaObjects<TriaObject<1> >::monitor_memory (const unsigned int) const
+    {
+      Assert (cells.size() == used.size(),
+              ExcMemoryInexact (cells.size(), used.size()));
+      Assert (cells.size() == user_flags.size(),
+              ExcMemoryInexact (cells.size(), user_flags.size()));
+      Assert (cells.size() == children.size(),
+              ExcMemoryInexact (cells.size(), children.size()));
+      Assert (cells.size() == boundary_or_material_id.size(),
+              ExcMemoryInexact (cells.size(), boundary_or_material_id.size()));
+      Assert (cells.size() == manifold_id.size(),
+              ExcMemoryInexact (cells.size(), manifold_id.size()));
+      Assert (cells.size() == user_data.size(),
+              ExcMemoryInexact (cells.size(), user_data.size()));
+    }
+
+
+    template<>
+    void
+    TriaObjects<TriaObject<2> >::monitor_memory (const unsigned int) const
+    {
+      Assert (cells.size() == used.size(),
+              ExcMemoryInexact (cells.size(), used.size()));
+      Assert (cells.size() == user_flags.size(),
+              ExcMemoryInexact (cells.size(), user_flags.size()));
+      Assert (2*cells.size() == children.size(),
+              ExcMemoryInexact (cells.size(), children.size()));
+      Assert (cells.size() == refinement_cases.size(),
+              ExcMemoryInexact (cells.size(), refinement_cases.size()));
+      Assert (cells.size() == boundary_or_material_id.size(),
+              ExcMemoryInexact (cells.size(), boundary_or_material_id.size()));
+      Assert (cells.size() == manifold_id.size(),
+              ExcMemoryInexact (cells.size(), manifold_id.size()));
+      Assert (cells.size() == user_data.size(),
+              ExcMemoryInexact (cells.size(), user_data.size()));
+    }
+
+
+    void
+    TriaObjectsHex::monitor_memory (const unsigned int) const
+    {
+      Assert (cells.size() == used.size(),
+              ExcMemoryInexact (cells.size(), used.size()));
+      Assert (cells.size() == user_flags.size(),
+              ExcMemoryInexact (cells.size(), user_flags.size()));
+      Assert (4*cells.size() == children.size(),
+              ExcMemoryInexact (cells.size(), children.size()));
+      Assert (cells.size() == boundary_or_material_id.size(),
+              ExcMemoryInexact (cells.size(), boundary_or_material_id.size()));
+      Assert (cells.size() == manifold_id.size(),
+              ExcMemoryInexact (cells.size(), manifold_id.size()));
+      Assert (cells.size() == user_data.size(),
+              ExcMemoryInexact (cells.size(), user_data.size()));
+      Assert (cells.size() * GeometryInfo<3>::faces_per_cell
+              == face_orientations.size(),
+              ExcMemoryInexact (cells.size() * GeometryInfo<3>::faces_per_cell,
+                                face_orientations.size()));
+      Assert (cells.size() * GeometryInfo<3>::faces_per_cell
+              == face_flips.size(),
+              ExcMemoryInexact (cells.size() * GeometryInfo<3>::faces_per_cell,
+                                face_flips.size()));
+      Assert (cells.size() * GeometryInfo<3>::faces_per_cell
+              == face_rotations.size(),
+              ExcMemoryInexact (cells.size() * GeometryInfo<3>::faces_per_cell,
+                                face_rotations.size()));
+    }
+
+
+    void
+    TriaObjectsQuad3D::monitor_memory (const unsigned int) const
+    {
+      // check that we have not allocated too much memory. note that bool
+      // vectors allocate their memory in chunks of whole integers, so they
+      // may over-allocate by up to as many elements as an integer has bits
+      Assert (cells.size() * GeometryInfo<2>::lines_per_cell
+              == line_orientations.size(),
+              ExcMemoryInexact (cells.size() * GeometryInfo<2>::lines_per_cell,
+                                line_orientations.size()));
+      TriaObjects<TriaObject<2> >::monitor_memory (3);
+
+    }
+
+
+    template <typename G>
+    void
+    TriaObjects<G>::clear()
+    {
+      cells.clear();
+      children.clear();
+      refinement_cases.clear();
+      used.clear();
+      user_flags.clear();
+      boundary_or_material_id.clear();
+      manifold_id.clear();
+      user_data.clear();
+      user_data_type = data_unknown;
+    }
+
+
+    void
+    TriaObjectsHex::clear()
+    {
+      TriaObjects<TriaObject<3> >::clear();
+      face_orientations.clear();
+      face_flips.clear();
+      face_rotations.clear();
+    }
+
+
+    void
+    TriaObjectsQuad3D::clear()
+    {
+      TriaObjects<TriaObject<2> >::clear();
+      line_orientations.clear();
+    }
+
+
+    template<typename G>
+    std::size_t
+    TriaObjects<G>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (cells) +
+              MemoryConsumption::memory_consumption (children) +
+              MemoryConsumption::memory_consumption (used) +
+              MemoryConsumption::memory_consumption (user_flags) +
+              MemoryConsumption::memory_consumption (boundary_or_material_id) +
+              MemoryConsumption::memory_consumption (manifold_id) +
+              MemoryConsumption::memory_consumption (refinement_cases) +
+              user_data.capacity() * sizeof(UserData) + sizeof(user_data));
+    }
+
+
+    std::size_t
+    TriaObjectsHex::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (face_orientations) +
+              MemoryConsumption::memory_consumption (face_flips) +
+              MemoryConsumption::memory_consumption (face_rotations) +
+              TriaObjects<TriaObject<3> >::memory_consumption() );
+    }
+
+
+    std::size_t
+    TriaObjectsQuad3D::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (line_orientations) +
+              this->TriaObjects<TriaObject<2> >::memory_consumption() );
+    }
+
+
+
+// explicit instantiations
+    template class TriaObjects<TriaObject<1> >;
+    template class TriaObjects<TriaObject<2> >;
+
+#include "tria_objects.inst"
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/grid/tria_objects.inst.in b/source/grid/tria_objects.inst.in
new file mode 100644
index 0000000..7df5d1f
--- /dev/null
+++ b/source/grid/tria_objects.inst.in
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+#if deal_II_dimension >= 3
+    template dealii::Triangulation<deal_II_dimension>::raw_hex_iterator
+    TriaObjects<TriaObject<3> >::next_free_hex(const dealii::Triangulation<deal_II_dimension> &, const unsigned int);
+#endif
+  }
diff --git a/source/hp/CMakeLists.txt b/source/hp/CMakeLists.txt
new file mode 100644
index 0000000..8013a22
--- /dev/null
+++ b/source/hp/CMakeLists.txt
@@ -0,0 +1,39 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  dof_faces.cc
+  dof_handler.cc
+  dof_level.cc
+  fe_collection.cc
+  fe_values.cc
+  mapping_collection.cc
+  )
+
+SET(_inst
+  dof_handler.inst.in
+  fe_collection.inst.in
+  fe_values.inst.in
+  mapping_collection.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/hp/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_hp OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_hp "${_inst}")
diff --git a/source/hp/dof_faces.cc b/source/hp/dof_faces.cc
new file mode 100644
index 0000000..2007208
--- /dev/null
+++ b/source/hp/dof_faces.cc
@@ -0,0 +1,78 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/hp/dof_faces.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace hp
+  {
+// ---------------------- DoFObjects ----------------------------
+
+    template <int structdim>
+    std::size_t
+    DoFIndicesOnFacesOrEdges<structdim>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (dofs) +
+              MemoryConsumption::memory_consumption (dof_offsets));
+    }
+
+
+    // explicit instantiations
+    template
+    std::size_t
+    DoFIndicesOnFacesOrEdges<1>::memory_consumption () const;
+
+    template
+    std::size_t
+    DoFIndicesOnFacesOrEdges<2>::memory_consumption () const;
+
+    template
+    std::size_t
+    DoFIndicesOnFacesOrEdges<3>::memory_consumption () const;
+
+
+// ---------------------- DoFFaces ----------------------------
+
+    std::size_t
+    DoFIndicesOnFaces<1>::memory_consumption () const
+    {
+      return 0;
+    }
+
+
+
+    std::size_t
+    DoFIndicesOnFaces<2>::memory_consumption () const
+    {
+      return MemoryConsumption::memory_consumption (lines);
+    }
+
+
+
+    std::size_t
+    DoFIndicesOnFaces<3>::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (lines) +
+              MemoryConsumption::memory_consumption (quads) );
+    }
+
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/hp/dof_handler.cc b/source/hp/dof_handler.cc
new file mode 100644
index 0000000..3279b07
--- /dev/null
+++ b/source/hp/dof_handler.cc
@@ -0,0 +1,3389 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/std_cxx11/bind.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/hp/dof_level.h>
+#include <deal.II/hp/dof_faces.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/tria_levels.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/distributed/shared_tria.h>
+#include <deal.II/distributed/tria.h>
+
+#include <set>
+#include <algorithm>
+#include <functional>
+
+DEAL_II_NAMESPACE_OPEN
+
+// The following is necessary for compilation under Visual Studio which is unable to correctly
+// distinguish between dealii::DoFHandler and dealii::hp::DoFHandler.
+// Plus it makes code in dof_handler.cc easier to read.
+// Requires C++11 support which is in Visual Studio 2013 and newer.
+#if _MSC_VER >= 1800
+template <int dim, int spacedim> using HpDoFHandler = ::dealii::hp::DoFHandler<dim, spacedim>;
+#else
+// When using older Visual Studio or a different compiler just fall back.
+#define HpDoFHandler DoFHandler
+#endif
+
+namespace parallel
+{
+  namespace distributed
+  {
+    template <int, int> class Triangulation;
+  }
+}
+
+
+namespace internal
+{
+  namespace hp
+  {
+    typedef
+    std::vector<std::pair<unsigned int, unsigned int> > DoFIdentities;
+
+
+    /**
+     * Make sure that the given @p
+     * identities pointer points to a
+     * valid array. If the pointer is
+     * zero beforehand, create an
+     * entry with the correct
+     * data. If it is nonzero, don't
+     * touch it.
+     *
+     * @p structdim denotes the
+     * dimension of the objects on
+     * which identities are to be
+     * represented, i.e. zero for
+     * vertices, one for lines, etc.
+     */
+    template <int structdim, int dim, int spacedim>
+    void
+    ensure_existence_of_dof_identities (const FiniteElement<dim,spacedim> &fe1,
+                                        const FiniteElement<dim,spacedim> &fe2,
+                                        std_cxx11::shared_ptr<DoFIdentities> &identities)
+    {
+      // see if we need to fill this
+      // entry, or whether it already
+      // exists
+      if (identities.get() == 0)
+        {
+          switch (structdim)
+            {
+            case 0:
+            {
+              identities =
+                std_cxx11::shared_ptr<DoFIdentities>
+                (new DoFIdentities(fe1.hp_vertex_dof_identities(fe2)));
+              break;
+            }
+
+            case 1:
+            {
+              identities =
+                std_cxx11::shared_ptr<DoFIdentities>
+                (new DoFIdentities(fe1.hp_line_dof_identities(fe2)));
+              break;
+            }
+
+            case 2:
+            {
+              identities =
+                std_cxx11::shared_ptr<DoFIdentities>
+                (new DoFIdentities(fe1.hp_quad_dof_identities(fe2)));
+              break;
+            }
+
+            default:
+              Assert (false, ExcNotImplemented());
+            }
+
+          // double check whether the
+          // newly created entries
+          // make any sense at all
+          for (unsigned int i=0; i<identities->size(); ++i)
+            {
+              Assert ((*identities)[i].first < fe1.template n_dofs_per_object<structdim>(),
+                      ExcInternalError());
+              Assert ((*identities)[i].second < fe2.template n_dofs_per_object<structdim>(),
+                      ExcInternalError());
+            }
+        }
+    }
+
+
+
+    /**
+     * For an object, such as a line
+     * or a quad iterator, determine
+     * the fe_index of the most
+     * dominating finite element that
+     * lives on this object.
+     *
+     * Return numbers::invalid_unsigned_int if we couldn't find one.
+     */
+    template <int dim, int spacedim, typename iterator>
+    unsigned int
+    get_most_dominating_fe_index (const iterator &object)
+    {
+      unsigned int dominating_fe_index = 0;
+      for (; dominating_fe_index<object->n_active_fe_indices();
+           ++dominating_fe_index)
+        {
+          const FiniteElement<dim, spacedim> &this_fe
+            = object->get_fe (object->nth_active_fe_index(dominating_fe_index));
+
+          FiniteElementDomination::Domination
+          domination = FiniteElementDomination::either_element_can_dominate;
+          for (unsigned int other_fe_index=0;
+               other_fe_index<object->n_active_fe_indices();
+               ++other_fe_index)
+            if (other_fe_index != dominating_fe_index)
+              {
+                const FiniteElement<dim, spacedim>
+                &that_fe
+                  = object->get_fe (object->nth_active_fe_index(other_fe_index));
+
+                domination = domination &
+                             this_fe.compare_for_face_domination(that_fe);
+              }
+
+          // see if this element is
+          // able to dominate all the
+          // other ones, and if so
+          // take it
+          if ((domination == FiniteElementDomination::this_element_dominates)
+              ||
+              (domination == FiniteElementDomination::either_element_can_dominate)
+              ||
+              (domination == FiniteElementDomination::no_requirements))
+            break;
+        }
+
+      // check that we have
+      // found one such fe
+      if (dominating_fe_index != object->n_active_fe_indices())
+        {
+          // return the finite element
+          // index used on it. note
+          // that only a single fe can
+          // be active on such subfaces
+          return object->nth_active_fe_index(dominating_fe_index);
+        }
+      else
+        {
+          // if we couldn't find the most dominating object
+          return numbers::invalid_unsigned_int;
+        }
+    }
+  }
+}
+
+
+
+namespace internal
+{
+  namespace hp
+  {
+    namespace DoFHandler
+    {
+      // access class
+      // dealii::hp::DoFHandler instead of
+      // namespace internal::hp::DoFHandler, etc
+      using dealii::hp::DoFHandler;
+
+      /**
+       * A class with the same purpose as the similarly named class of the
+       * Triangulation class. See there for more information.
+       */
+      struct Implementation
+      {
+        /**
+         * Do that part of reserving
+         * space that pertains to
+         * vertices, since this is the
+         * same in all space
+         * dimensions.
+         */
+        template<int dim, int spacedim>
+        static
+        void
+        reserve_space_vertices (DoFHandler<dim,spacedim> &dof_handler)
+        {
+          // The final step is allocating
+          // memory is to set up vertex dof
+          // information. since vertices
+          // are sequentially numbered,
+          // what we do first is to set up
+          // an array in which we record
+          // whether a vertex is associated
+          // with any of the given fe's, by
+          // setting a bit. in a later
+          // step, we then actually
+          // allocate memory for the
+          // required dofs
+          std::vector<std::vector<bool> >
+          vertex_fe_association (dof_handler.finite_elements->size(),
+                                 std::vector<bool> (dof_handler.tria->n_vertices(), false));
+
+          for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+               cell=dof_handler.begin_active(); cell!=dof_handler.end(); ++cell)
+            for (unsigned int v=0; v<GeometryInfo<dim>::vertices_per_cell; ++v)
+              vertex_fe_association[cell->active_fe_index()][cell->vertex_index(v)]
+                = true;
+
+          // in debug mode, make sure
+          // that each vertex is
+          // associated with at least one
+          // fe (note that except for
+          // unused vertices, all
+          // vertices are actually
+          // active)
+#ifdef DEBUG
+          for (unsigned int v=0; v<dof_handler.tria->n_vertices(); ++v)
+            if (dof_handler.tria->vertex_used(v) == true)
+              {
+                unsigned int fe=0;
+                for (; fe<dof_handler.finite_elements->size(); ++fe)
+                  if (vertex_fe_association[fe][v] == true)
+                    break;
+                Assert (fe != dof_handler.finite_elements->size(), ExcInternalError());
+              }
+#endif
+
+          // next count how much memory
+          // we actually need. for each
+          // vertex, we need one slot per
+          // fe to store the fe_index,
+          // plus dofs_per_vertex for
+          // this fe. in addition, we
+          // need one slot as the end
+          // marker for the
+          // fe_indices. at the same time
+          // already fill the
+          // vertex_dofs_offsets field
+          dof_handler.vertex_dofs_offsets.resize (dof_handler.tria->n_vertices(),
+                                                  numbers::invalid_dof_index);
+
+          unsigned int vertex_slots_needed = 0;
+          for (unsigned int v=0; v<dof_handler.tria->n_vertices(); ++v)
+            if (dof_handler.tria->vertex_used(v) == true)
+              {
+                dof_handler.vertex_dofs_offsets[v] = vertex_slots_needed;
+
+                for (unsigned int fe=0; fe<dof_handler.finite_elements->size(); ++fe)
+                  if (vertex_fe_association[fe][v] == true)
+                    vertex_slots_needed += (*dof_handler.finite_elements)[fe].dofs_per_vertex + 1;
+                ++vertex_slots_needed;
+              }
+
+          // now allocate the space we
+          // have determined we need, and
+          // set up the linked lists for
+          // each of the vertices
+          dof_handler.vertex_dofs.resize (vertex_slots_needed,
+                                          DoFHandler<dim,spacedim>::invalid_dof_index);
+          for (unsigned int v=0; v<dof_handler.tria->n_vertices(); ++v)
+            if (dof_handler.tria->vertex_used(v) == true)
+              {
+                types::global_dof_index pointer = dof_handler.vertex_dofs_offsets[v];
+                for (unsigned int fe=0; fe<dof_handler.finite_elements->size(); ++fe)
+                  if (vertex_fe_association[fe][v] == true)
+                    {
+                      // if this vertex
+                      // uses this fe,
+                      // then set the
+                      // fe_index and
+                      // move the pointer
+                      // ahead
+                      dof_handler.vertex_dofs[pointer] = fe;
+                      pointer += (*dof_handler.finite_elements)[fe].dofs_per_vertex + 1;
+                    }
+                // finally place the end
+                // marker
+                dof_handler.vertex_dofs[pointer] = numbers::invalid_dof_index;
+              }
+        }
+
+
+
+        /**
+         * Distribute dofs on the given cell,
+         * with new dofs starting with index
+         * @p next_free_dof. Return the next
+         * unused index number. The finite
+         * element used is the one given to
+         * @p distribute_dofs, which is copied
+         * to @p selected_fe.
+         *
+         * This function is excluded from the
+         * @p distribute_dofs function since
+         * it can not be implemented dimension
+         * independent.
+         */
+        template <int spacedim>
+        static
+        types::global_dof_index
+        distribute_dofs_on_cell (const typename dealii::hp::DoFHandler<1,spacedim>::active_cell_iterator &cell,
+                                 types::global_dof_index                                                 next_free_dof)
+        {
+          const unsigned int dim = 1;
+
+          const FiniteElement<dim,spacedim> &fe       = cell->get_fe();
+          const unsigned int                 fe_index = cell->active_fe_index ();
+
+          // number dofs on vertices. to do
+          // so, check whether dofs for
+          // this vertex have been
+          // distributed and for the
+          // present fe (only check the
+          // first dof), and if this isn't
+          // the case distribute new ones
+          // there
+          if (fe.dofs_per_vertex > 0)
+            for (unsigned int vertex=0; vertex<GeometryInfo<1>::vertices_per_cell; ++vertex)
+              if (cell->vertex_dof_index(vertex, 0, fe_index) ==
+                  DoFHandler<dim,spacedim>::invalid_dof_index)
+                for (unsigned int d=0; d<fe.dofs_per_vertex; ++d, ++next_free_dof)
+                  cell->set_vertex_dof_index (vertex, d, next_free_dof, fe_index);
+
+          // finally for the line. this one
+          // shouldn't be numbered yet
+          if (fe.dofs_per_line > 0)
+            {
+              Assert ((cell->dof_index(0, fe_index) ==
+                       DoFHandler<dim,spacedim>::invalid_dof_index),
+                      ExcInternalError());
+
+              for (unsigned int d=0; d<fe.dofs_per_line; ++d, ++next_free_dof)
+                cell->set_dof_index (d, next_free_dof, fe_index);
+            }
+
+          // note that this cell has been processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        template <int spacedim>
+        static
+        types::global_dof_index
+        distribute_dofs_on_cell (const typename dealii::hp::DoFHandler<2,spacedim>::active_cell_iterator &cell,
+                                 types::global_dof_index                                                 next_free_dof)
+        {
+          const unsigned int dim = 2;
+
+          const FiniteElement<dim,spacedim> &fe       = cell->get_fe();
+          const unsigned int                 fe_index = cell->active_fe_index ();
+
+          // number dofs on vertices. to do
+          // so, check whether dofs for
+          // this vertex have been
+          // distributed and for the
+          // present fe (only check the
+          // first dof), and if this isn't
+          // the case distribute new ones
+          // there
+          if (fe.dofs_per_vertex > 0)
+            for (unsigned int vertex=0; vertex<GeometryInfo<2>::vertices_per_cell; ++vertex)
+              if (cell->vertex_dof_index(vertex, 0, fe_index) ==
+                  DoFHandler<dim,spacedim>::invalid_dof_index)
+                for (unsigned int d=0; d<fe.dofs_per_vertex; ++d, ++next_free_dof)
+                  cell->set_vertex_dof_index (vertex, d, next_free_dof, fe_index);
+
+          // next the sides. do the
+          // same as above: check whether
+          // the line is already numbered
+          // for the present fe_index, and
+          // if not do it
+          if (fe.dofs_per_line > 0)
+            for (unsigned int l=0; l<GeometryInfo<2>::lines_per_cell; ++l)
+              {
+                typename HpDoFHandler<dim,spacedim>::line_iterator
+                line = cell->line(l);
+
+                if (line->dof_index(0,fe_index) ==
+                    DoFHandler<dim,spacedim>::invalid_dof_index)
+                  for (unsigned int d=0; d<fe.dofs_per_line; ++d, ++next_free_dof)
+                    line->set_dof_index (d, next_free_dof, fe_index);
+              }
+
+
+          // finally for the quad. this one
+          // shouldn't be numbered yet
+          if (fe.dofs_per_quad > 0)
+            {
+              Assert ((cell->dof_index(0, fe_index) ==
+                       DoFHandler<dim,spacedim>::invalid_dof_index),
+                      ExcInternalError());
+
+              for (unsigned int d=0; d<fe.dofs_per_quad; ++d, ++next_free_dof)
+                cell->set_dof_index (d, next_free_dof, fe_index);
+            }
+
+          // note that this cell has been processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        template <int spacedim>
+        static
+        types::global_dof_index
+        distribute_dofs_on_cell (const typename dealii::hp::DoFHandler<3,spacedim>::active_cell_iterator &cell,
+                                 types::global_dof_index                                                 next_free_dof)
+        {
+          const unsigned int dim = 3;
+
+          const FiniteElement<dim,spacedim> &fe       = cell->get_fe();
+          const unsigned int                 fe_index = cell->active_fe_index ();
+
+          // number dofs on vertices. to do
+          // so, check whether dofs for
+          // this vertex have been
+          // distributed and for the
+          // present fe (only check the
+          // first dof), and if this isn't
+          // the case distribute new ones
+          // there
+          if (fe.dofs_per_vertex > 0)
+            for (unsigned int vertex=0; vertex<GeometryInfo<3>::vertices_per_cell; ++vertex)
+              if (cell->vertex_dof_index(vertex, 0, fe_index) ==
+                  DoFHandler<dim,spacedim>::invalid_dof_index)
+                for (unsigned int d=0; d<fe.dofs_per_vertex; ++d, ++next_free_dof)
+                  cell->set_vertex_dof_index (vertex, d, next_free_dof, fe_index);
+
+          // next the four lines. do the
+          // same as above: check whether
+          // the line is already numbered
+          // for the present fe_index, and
+          // if not do it
+          if (fe.dofs_per_line > 0)
+            for (unsigned int l=0; l<GeometryInfo<3>::lines_per_cell; ++l)
+              {
+                typename HpDoFHandler<dim,spacedim>::line_iterator
+                line = cell->line(l);
+
+                if (line->dof_index(0,fe_index) ==
+                    DoFHandler<dim,spacedim>::invalid_dof_index)
+                  for (unsigned int d=0; d<fe.dofs_per_line; ++d, ++next_free_dof)
+                    line->set_dof_index (d, next_free_dof, fe_index);
+              }
+
+          // same for quads
+          if (fe.dofs_per_quad > 0)
+            for (unsigned int q=0; q<GeometryInfo<3>::quads_per_cell; ++q)
+              {
+                typename HpDoFHandler<dim,spacedim>::quad_iterator
+                quad = cell->quad(q);
+
+                if (quad->dof_index(0,fe_index) ==
+                    DoFHandler<dim,spacedim>::invalid_dof_index)
+                  for (unsigned int d=0; d<fe.dofs_per_quad; ++d, ++next_free_dof)
+                    quad->set_dof_index (d, next_free_dof, fe_index);
+              }
+
+
+          // finally for the hex. this one
+          // shouldn't be numbered yet
+          if (fe.dofs_per_hex > 0)
+            {
+              Assert ((cell->dof_index(0, fe_index) ==
+                       DoFHandler<dim,spacedim>::invalid_dof_index),
+                      ExcInternalError());
+
+              for (unsigned int d=0; d<fe.dofs_per_hex; ++d, ++next_free_dof)
+                cell->set_dof_index (d, next_free_dof, fe_index);
+            }
+
+          // note that this cell has been processed
+          cell->set_user_flag ();
+
+          return next_free_dof;
+        }
+
+
+        /**
+         * Reserve enough space in the
+         * <tt>levels[]</tt> objects to store the
+         * numbers of the degrees of freedom
+         * needed for the given element. The
+         * given element is that one which
+         * was selected when calling
+         * @p distribute_dofs the last time.
+         */
+        template <int spacedim>
+        static
+        void
+        reserve_space (DoFHandler<1,spacedim> &dof_handler)
+        {
+          const unsigned int dim = 1;
+
+          typedef DoFHandler<dim,spacedim> BaseClass;
+
+          Assert (dof_handler.finite_elements != 0,
+                  typename BaseClass::ExcNoFESelected());
+          Assert (dof_handler.finite_elements->size() > 0,
+                  typename BaseClass::ExcNoFESelected());
+          Assert (dof_handler.tria->n_levels() > 0,
+                  typename
+                  BaseClass::ExcInvalidTriangulation());
+          Assert (dof_handler.tria->n_levels() == dof_handler.levels.size (),
+                  ExcInternalError ());
+
+          // Release all space except the
+          // active_fe_indices field which
+          // we have to backup before
+          {
+            std::vector<std::vector<DoFLevel::active_fe_index_type> >
+            active_fe_backup(dof_handler.levels.size ());
+            for (unsigned int level = 0; level<dof_handler.levels.size (); ++level)
+              std::swap (dof_handler.levels[level]->active_fe_indices,
+                         active_fe_backup[level]);
+
+            // delete all levels and set them up
+            // newly, since vectors are
+            // troublesome if you want to change
+            // their size
+            dof_handler.clear_space ();
+
+            for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+              {
+                dof_handler.levels.push_back (new internal::hp::DoFLevel);
+                std::swap (active_fe_backup[level],
+                           dof_handler.levels[level]->active_fe_indices);
+              }
+          }
+
+          // LINE (CELL) DOFs
+
+          // count how much space we need
+          // on each level for the cell
+          // dofs and set the
+          // dof_*_offsets
+          // data. initially set the latter
+          // to an invalid index, and only
+          // later set it to something
+          // reasonable for active dof_handler.cells
+          //
+          // note that for dof_handler.cells, the
+          // situation is simpler than for
+          // other (lower dimensional)
+          // objects since exactly one
+          // finite element is used for it
+          for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+            {
+              dof_handler.levels[level]->dof_offsets
+                = std::vector<DoFLevel::offset_type> (
+                    dof_handler.tria->n_raw_lines(level),
+                    (DoFLevel::offset_type)(-1));
+              dof_handler.levels[level]->cell_cache_offsets
+                = std::vector<DoFLevel::offset_type> (
+                    dof_handler.tria->n_raw_lines(level),
+                    (DoFLevel::offset_type)(-1));
+
+              types::global_dof_index next_free_dof = 0;
+              types::global_dof_index cache_size = 0;
+              for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+                   cell=dof_handler.begin_active(level);
+                   cell!=dof_handler.end_active(level); ++cell)
+                if (!cell->has_children())
+                  {
+                    dof_handler.levels[level]->dof_offsets[cell->index()] = next_free_dof;
+                    next_free_dof += cell->get_fe().dofs_per_line;
+
+                    dof_handler.levels[level]->cell_cache_offsets[cell->index()] = cache_size;
+                    cache_size += cell->get_fe().dofs_per_cell;
+                  }
+
+              dof_handler.levels[level]->dof_indices
+                = std::vector<types::global_dof_index> (next_free_dof,
+                                                        DoFHandler<dim,spacedim>::invalid_dof_index);
+              dof_handler.levels[level]->cell_dof_indices_cache
+                = std::vector<types::global_dof_index> (cache_size,
+                                                        DoFHandler<dim,spacedim>::invalid_dof_index);
+            }
+
+          // safety check: make sure that
+          // the number of DoFs we
+          // allocated is actually correct
+          // (above we have also set the
+          // dof_*_offsets field, so
+          // we couldn't use this simpler
+          // algorithm)
+#ifdef DEBUG
+          for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+            {
+              types::global_dof_index counter = 0;
+              for (typename HpDoFHandler<dim,spacedim>::cell_iterator
+                   cell=dof_handler.begin_active(level);
+                   cell!=dof_handler.end_active(level); ++cell)
+                if (!cell->has_children())
+                  counter += cell->get_fe().dofs_per_line;
+
+              Assert (dof_handler.levels[level]->dof_indices.size() == counter,
+                      ExcInternalError());
+              Assert (static_cast<unsigned int>
+                      (std::count (dof_handler.levels[level]->dof_offsets.begin(),
+                                   dof_handler.levels[level]->dof_offsets.end(),
+                                   (DoFLevel::offset_type)(-1)))
+                      ==
+                      dof_handler.tria->n_raw_lines(level) - dof_handler.tria->n_active_lines(level),
+                      ExcInternalError());
+            }
+#endif
+
+
+          // VERTEX DOFS
+          reserve_space_vertices (dof_handler);
+        }
+
+
+        template <int spacedim>
+        static
+        void
+        reserve_space (DoFHandler<2,spacedim> &dof_handler)
+        {
+          const unsigned int dim = 2;
+
+          typedef DoFHandler<dim,spacedim> BaseClass;
+
+          Assert (dof_handler.finite_elements != 0,
+                  typename BaseClass::ExcNoFESelected());
+          Assert (dof_handler.finite_elements->size() > 0,
+                  typename BaseClass::ExcNoFESelected());
+          Assert (dof_handler.tria->n_levels() > 0,
+                  typename BaseClass::ExcInvalidTriangulation());
+          Assert (dof_handler.tria->n_levels() == dof_handler.levels.size (),
+                  ExcInternalError ());
+
+          // Release all space except the
+          // active_fe_indices field which
+          // we have to backup before
+          {
+            std::vector<std::vector<DoFLevel::active_fe_index_type> >
+            active_fe_backup(dof_handler.levels.size ());
+            for (unsigned int level = 0; level<dof_handler.levels.size (); ++level)
+              std::swap (dof_handler.levels[level]->active_fe_indices,
+                         active_fe_backup[level]);
+
+            // delete all levels and set them up
+            // newly, since vectors are
+            // troublesome if you want to change
+            // their size
+            dof_handler.clear_space ();
+
+            for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+              {
+                dof_handler.levels.push_back (new internal::hp::DoFLevel);
+                std::swap (active_fe_backup[level],
+                           dof_handler.levels[level]->active_fe_indices);
+              }
+            dof_handler.faces = new internal::hp::DoFIndicesOnFaces<2>;
+          }
+
+
+          // QUAD (CELL) DOFs
+
+          // count how much space we need
+          // on each level for the cell
+          // dofs and set the
+          // dof_*_offsets
+          // data. initially set the latter
+          // to an invalid index, and only
+          // later set it to something
+          // reasonable for active dof_handler.cells
+          //
+          // note that for dof_handler.cells, the
+          // situation is simpler than for
+          // other (lower dimensional)
+          // objects since exactly one
+          // finite element is used for it
+          for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+            {
+              dof_handler.levels[level]->dof_offsets
+                = std::vector<DoFLevel::offset_type> (
+                    dof_handler.tria->n_raw_quads(level),
+                    (DoFLevel::offset_type)(-1));
+              dof_handler.levels[level]->cell_cache_offsets
+                = std::vector<DoFLevel::offset_type> (
+                    dof_handler.tria->n_raw_quads(level),
+                    (DoFLevel::offset_type)(-1));
+
+              types::global_dof_index next_free_dof = 0;
+              types::global_dof_index cache_size = 0;
+              for (typename HpDoFHandler<dim, spacedim>::active_cell_iterator
+                   cell=dof_handler.begin_active(level);
+                   cell!=dof_handler.end_active(level); ++cell)
+                if (!cell->has_children())
+                  {
+                    dof_handler.levels[level]->dof_offsets[cell->index()] = next_free_dof;
+                    next_free_dof += cell->get_fe().dofs_per_quad;
+
+                    dof_handler.levels[level]->cell_cache_offsets[cell->index()] = cache_size;
+                    cache_size += cell->get_fe().dofs_per_cell;
+                  }
+
+              dof_handler.levels[level]->dof_indices
+                = std::vector<types::global_dof_index> (next_free_dof,
+                                                        DoFHandler<dim,spacedim>::invalid_dof_index);
+              dof_handler.levels[level]->cell_dof_indices_cache
+                = std::vector<types::global_dof_index> (cache_size,
+                                                        DoFHandler<dim,spacedim>::invalid_dof_index);
+            }
+
+          // safety check: make sure that
+          // the number of DoFs we
+          // allocated is actually correct
+          // (above we have also set the
+          // dof_*_offsets field, so
+          // we couldn't use this simpler
+          // algorithm)
+#ifdef DEBUG
+          for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+            {
+              types::global_dof_index counter = 0;
+              for (typename HpDoFHandler<dim,spacedim>::cell_iterator
+                   cell=dof_handler.begin_active(level);
+                   cell!=dof_handler.end_active(level); ++cell)
+                if (!cell->has_children())
+                  counter += cell->get_fe().dofs_per_quad;
+
+              Assert (dof_handler.levels[level]->dof_indices.size() == counter,
+                      ExcInternalError());
+              Assert (static_cast<unsigned int>
+                      (std::count (dof_handler.levels[level]->dof_offsets.begin(),
+                                   dof_handler.levels[level]->dof_offsets.end(),
+                                   (DoFLevel::offset_type)(-1)))
+                      ==
+                      dof_handler.tria->n_raw_quads(level) - dof_handler.tria->n_active_quads(level),
+                      ExcInternalError());
+            }
+#endif
+
+
+          // LINE DOFS
+          //
+          // same here: count line dofs,
+          // then allocate as much space as
+          // we need and prime the linked
+          // list for lines (see the
+          // description in hp::DoFLevel)
+          // with the indices we will
+          // need. note that our task is
+          // more complicated since two
+          // adjacent dof_handler.cells may have
+          // different active_fe_indices,
+          // in which case we need to
+          // allocate *two* sets of line
+          // dofs for the same line
+          //
+          // the way we do things is that
+          // we loop over all active dof_handler.cells
+          // (these are the ones that have
+          // DoFs only anyway) and all
+          // their dof_handler.faces. We note in the
+          // user flags whether we have
+          // previously visited a face and
+          // if so skip it (consequently,
+          // we have to save and later
+          // restore the line flags)
+          {
+            std::vector<bool> saved_line_user_flags;
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .save_user_flags_line (saved_line_user_flags);
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .clear_user_flags_line ();
+
+            // an array to hold how many
+            // slots (see the hp::DoFLevel
+            // class) we will have to store
+            // on each level
+            unsigned int n_line_slots = 0;
+
+            for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+                 cell=dof_handler.begin_active(); cell!=dof_handler.end(); ++cell)
+              for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                if (! cell->face(face)->user_flag_set())
+                  {
+                    // ok, face has not been
+                    // visited. so we need to
+                    // allocate space for it. let's
+                    // see how much we need: we need
+                    // one set if a) there is no
+                    // neighbor behind this face, or
+                    // b) the neighbor is either
+                    // coarser or finer than we are,
+                    // or c) the neighbor is neither
+                    // coarser nor finer, but has
+                    // happens to have the same
+                    // active_fe_index:
+                    if (cell->at_boundary(face)
+                        ||
+                        cell->face(face)->has_children()
+                        ||
+                        cell->neighbor_is_coarser(face)
+                        ||
+                        (!cell->at_boundary(face)
+                         &&
+                         (cell->active_fe_index() == cell->neighbor(face)->active_fe_index())))
+                      // ok, one set of
+                      // dofs. that makes
+                      // one index, 1 times
+                      // dofs_per_line
+                      // dofs, and one stop
+                      // index
+                      n_line_slots
+                      += (*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_line + 2;
+
+                    // otherwise we do
+                    // indeed need two
+                    // sets, i.e. two
+                    // indices, two sets of
+                    // dofs, and one stop
+                    // index:
+                    else
+                      n_line_slots
+                      += ((*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_line
+                          +
+                          (*dof_handler.finite_elements)[cell->neighbor(face)->active_fe_index()]
+                          .dofs_per_line
+                          +
+                          3);
+
+                    // mark this face as
+                    // visited
+                    cell->face(face)->set_user_flag ();
+                  }
+
+            // now that we know how many
+            // line dofs we will have to
+            // have on each level, allocate
+            // the memory. note that we
+            // allocate offsets for all
+            // lines, though only the
+            // active ones will have a
+            // non-invalid value later on
+            dof_handler.faces->lines.dof_offsets
+              = std::vector<unsigned int> (dof_handler.tria->n_raw_lines(),
+                                           (unsigned int)(-1));
+            dof_handler.faces->lines.dofs
+              = std::vector<types::global_dof_index> (n_line_slots,
+                                                      DoFHandler<dim,spacedim>::invalid_dof_index);
+
+            // with the memory now
+            // allocated, loop over the
+            // dof_handler.cells again and prime the
+            // _offset values as well as
+            // the fe_index fields
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .clear_user_flags_line ();
+
+            unsigned int next_free_line_slot = 0;
+
+            for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+                 cell=dof_handler.begin_active(); cell!=dof_handler.end(); ++cell)
+              for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                if (! cell->face(face)->user_flag_set())
+                  {
+                    // same decision tree
+                    // as before
+                    if (cell->at_boundary(face)
+                        ||
+                        cell->face(face)->has_children()
+                        ||
+                        cell->neighbor_is_coarser(face)
+                        ||
+                        (!cell->at_boundary(face)
+                         &&
+                         (cell->active_fe_index() == cell->neighbor(face)->active_fe_index())))
+                      {
+                        dof_handler.faces
+                        ->lines.dof_offsets[cell->face(face)->index()]
+                          = next_free_line_slot;
+
+                        // set first slot
+                        // for this line to
+                        // active_fe_index
+                        // of this face
+                        dof_handler.faces
+                        ->lines.dofs[next_free_line_slot]
+                          = cell->active_fe_index();
+
+                        // the next
+                        // dofs_per_line
+                        // indices remain
+                        // unset for the
+                        // moment (i.e. at
+                        // invalid_dof_index).
+                        // following this
+                        // comes the stop
+                        // index, which
+                        // also is
+                        // invalid_dof_index
+                        // and therefore
+                        // does not have to
+                        // be explicitly
+                        // set
+
+                        // finally, mark
+                        // those slots as
+                        // used
+                        next_free_line_slot
+                        += (*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_line + 2;
+                      }
+                    else
+                      {
+                        dof_handler.faces
+                        ->lines.dof_offsets[cell->face(face)->index()]
+                          = next_free_line_slot;
+
+                        // set first slot
+                        // for this line to
+                        // active_fe_index
+                        // of this face
+                        dof_handler.faces
+                        ->lines.dofs[next_free_line_slot]
+                          = cell->active_fe_index();
+
+                        // the next
+                        // dofs_per_line
+                        // indices remain
+                        // unset for the
+                        // moment (i.e. at
+                        // invalid_dof_index).
+                        //
+                        // then comes the
+                        // fe_index for the
+                        // neighboring
+                        // cell:
+                        dof_handler.faces
+                        ->lines.dofs[next_free_line_slot
+                                     +
+                                     (*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_line
+                                     +
+                                     1]
+                          = cell->neighbor(face)->active_fe_index();
+                        // then again a set
+                        // of dofs that we
+                        // need not set
+                        // right now
+                        //
+                        // following this
+                        // comes the stop
+                        // index, which
+                        // also is
+                        // invalid_dof_index
+                        // and therefore
+                        // does not have to
+                        // be explicitly
+                        // set
+
+                        // finally, mark
+                        // those slots as
+                        // used
+                        next_free_line_slot
+                        += ((*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_line
+                            +
+                            (*dof_handler.finite_elements)[cell->neighbor(face)->active_fe_index()]
+                            .dofs_per_line
+                            +
+                            3);
+                      }
+
+                    // mark this face as
+                    // visited
+                    cell->face(face)->set_user_flag ();
+                  }
+
+            // we should have moved the
+            // cursor for each level to the
+            // total number of dofs on that
+            // level. check that
+            Assert (next_free_line_slot == n_line_slots,
+                    ExcInternalError());
+
+            // at the end, restore the user
+            // flags for the lines
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .load_user_flags_line (saved_line_user_flags);
+          }
+
+
+          // VERTEX DOFS
+          reserve_space_vertices (dof_handler);
+        }
+
+
+        template <int spacedim>
+        static
+        void
+        reserve_space (DoFHandler<3,spacedim> &dof_handler)
+        {
+          const unsigned int dim = 3;
+
+          typedef DoFHandler<dim,spacedim> BaseClass;
+
+          Assert (dof_handler.finite_elements != 0,
+                  typename BaseClass::ExcNoFESelected());
+          Assert (dof_handler.finite_elements->size() > 0,
+                  typename BaseClass::ExcNoFESelected());
+          Assert (dof_handler.tria->n_levels() > 0,
+                  typename BaseClass::ExcInvalidTriangulation());
+          Assert (dof_handler.tria->n_levels() == dof_handler.levels.size (),
+                  ExcInternalError ());
+
+          // Release all space except the
+          // active_fe_indices field which
+          // we have to backup before
+          {
+            std::vector<std::vector<DoFLevel::active_fe_index_type> >
+            active_fe_backup(dof_handler.levels.size ());
+            for (unsigned int level = 0; level<dof_handler.levels.size (); ++level)
+              std::swap (dof_handler.levels[level]->active_fe_indices,
+                         active_fe_backup[level]);
+
+            // delete all levels and set them up
+            // newly, since vectors are
+            // troublesome if you want to change
+            // their size
+            dof_handler.clear_space ();
+
+            for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+              {
+                dof_handler.levels.push_back (new internal::hp::DoFLevel);
+                std::swap (active_fe_backup[level],
+                           dof_handler.levels[level]->active_fe_indices);
+              }
+            dof_handler.faces = new internal::hp::DoFIndicesOnFaces<3>;
+          }
+
+
+          // HEX (CELL) DOFs
+
+          // count how much space we need
+          // on each level for the cell
+          // dofs and set the
+          // dof_*_offsets
+          // data. initially set the latter
+          // to an invalid index, and only
+          // later set it to something
+          // reasonable for active dof_handler.cells
+          //
+          // note that for dof_handler.cells, the
+          // situation is simpler than for
+          // other (lower dimensional)
+          // objects since exactly one
+          // finite element is used for it
+          for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+            {
+              dof_handler.levels[level]->dof_offsets
+                = std::vector<DoFLevel::offset_type> (
+                    dof_handler.tria->n_raw_hexs(level),
+                    (DoFLevel::offset_type)(-1));
+              dof_handler.levels[level]->cell_cache_offsets
+                = std::vector<DoFLevel::offset_type> (
+                    dof_handler.tria->n_raw_hexs(level),
+                    (DoFLevel::offset_type)(-1));
+
+              types::global_dof_index next_free_dof = 0;
+              types::global_dof_index cache_size = 0;
+              for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+                   cell=dof_handler.begin_active(level);
+                   cell!=dof_handler.end_active(level); ++cell)
+                if (!cell->has_children())
+                  {
+                    dof_handler.levels[level]->dof_offsets[cell->index()] = next_free_dof;
+                    next_free_dof += cell->get_fe().dofs_per_hex;
+
+                    dof_handler.levels[level]->cell_cache_offsets[cell->index()] = cache_size;
+                    cache_size += cell->get_fe().dofs_per_cell;
+                  }
+
+              dof_handler.levels[level]->dof_indices
+                = std::vector<types::global_dof_index> (next_free_dof,
+                                                        DoFHandler<dim,spacedim>::invalid_dof_index);
+              dof_handler.levels[level]->cell_dof_indices_cache
+                = std::vector<types::global_dof_index> (cache_size,
+                                                        DoFHandler<dim,spacedim>::invalid_dof_index);
+            }
+
+          // safety check: make sure that
+          // the number of DoFs we
+          // allocated is actually correct
+          // (above we have also set the
+          // dof_*_offsets field, so
+          // we couldn't use this simpler
+          // algorithm)
+#ifdef DEBUG
+          for (unsigned int level=0; level<dof_handler.tria->n_levels(); ++level)
+            {
+              types::global_dof_index counter = 0;
+              for (typename HpDoFHandler<dim,spacedim>::cell_iterator
+                   cell=dof_handler.begin_active(level);
+                   cell!=dof_handler.end_active(level); ++cell)
+                if (!cell->has_children())
+                  counter += cell->get_fe().dofs_per_hex;
+
+              Assert (dof_handler.levels[level]->dof_indices.size() == counter,
+                      ExcInternalError());
+              Assert (static_cast<unsigned int>
+                      (std::count (dof_handler.levels[level]->dof_offsets.begin(),
+                                   dof_handler.levels[level]->dof_offsets.end(),
+                                   (DoFLevel::offset_type)(-1)))
+                      ==
+                      dof_handler.tria->n_raw_hexs(level) - dof_handler.tria->n_active_hexs(level),
+                      ExcInternalError());
+            }
+#endif
+
+
+          // QUAD DOFS
+          //
+          // same here: count quad dofs,
+          // then allocate as much space as
+          // we need and prime the linked
+          // list for quad (see the
+          // description in hp::DoFLevel)
+          // with the indices we will
+          // need. note that our task is
+          // more complicated since two
+          // adjacent dof_handler.cells may have
+          // different active_fe_indices,
+          // in which case we need to
+          // allocate *two* sets of line
+          // dofs for the same line
+          //
+          // the way we do things is that
+          // we loop over all active dof_handler.cells
+          // (these are the ones that have
+          // DoFs only anyway) and all
+          // their dof_handler.faces. We note in the
+          // user flags whether we have
+          // previously visited a face and
+          // if so skip it (consequently,
+          // we have to save and later
+          // restore the line flags)
+          {
+            std::vector<bool> saved_quad_user_flags;
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .save_user_flags_quad (saved_quad_user_flags);
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .clear_user_flags_quad ();
+
+            // examine, how how many
+            // slots (see the hp::DoFLevel
+            // class) we will have to store
+            unsigned int n_quad_slots = 0;
+
+            for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+                 cell=dof_handler.begin_active(); cell!=dof_handler.end(); ++cell)
+              for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                if (! cell->face(face)->user_flag_set())
+                  {
+                    // ok, face has not been
+                    // visited. so we need to
+                    // allocate space for
+                    // it. let's see how much
+                    // we need: we need one
+                    // set if a) there is no
+                    // neighbor behind this
+                    // face, or b) the
+                    // neighbor is not on the
+                    // same level or further
+                    // refined, or c) the
+                    // neighbor is on the
+                    // same level, but
+                    // happens to have the
+                    // same active_fe_index:
+                    if (cell->at_boundary(face)
+                        ||
+                        cell->face(face)->has_children()
+                        ||
+                        cell->neighbor_is_coarser(face)
+                        ||
+                        (!cell->at_boundary(face)
+                         &&
+                         (cell->active_fe_index() == cell->neighbor(face)->active_fe_index())))
+                      // ok, one set of
+                      // dofs. that makes
+                      // one index, 1 times
+                      // dofs_per_quad
+                      // dofs, and one stop
+                      // index
+                      n_quad_slots
+                      += (*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_quad + 2;
+
+                    // otherwise we do
+                    // indeed need two
+                    // sets, i.e. two
+                    // indices, two sets of
+                    // dofs, and one stop
+                    // index:
+                    else
+                      n_quad_slots
+                      += ((*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_quad
+                          +
+                          (*dof_handler.finite_elements)[cell->neighbor(face)->active_fe_index()]
+                          .dofs_per_quad
+                          +
+                          3);
+
+                    // mark this face as
+                    // visited
+                    cell->face(face)->set_user_flag ();
+                  }
+
+            // now that we know how many
+            // quad dofs we will have to
+            // have,  allocate
+            // the memory. note that we
+            // allocate offsets for all
+            // quads, though only the
+            // active ones will have a
+            // non-invalid value later on
+            if (true)
+              {
+                dof_handler.faces->quads.dof_offsets
+                  = std::vector<unsigned int>
+                    (dof_handler.tria->n_raw_quads(),
+                     (unsigned int)(-1));
+                dof_handler.faces->quads.dofs
+                  = std::vector<types::global_dof_index> (n_quad_slots,
+                                                          DoFHandler<dim,spacedim>::invalid_dof_index);
+              }
+
+            // with the memory now
+            // allocated, loop over the
+            // dof_handler.cells again and prime the
+            // _offset values as well as
+            // the fe_index fields
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .clear_user_flags_quad ();
+
+            unsigned int next_free_quad_slot = 0;
+
+            for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+                 cell=dof_handler.begin_active(); cell!=dof_handler.end(); ++cell)
+              for (unsigned int face=0; face<GeometryInfo<dim>::faces_per_cell; ++face)
+                if (! cell->face(face)->user_flag_set())
+                  {
+                    // same decision tree
+                    // as before
+                    if (cell->at_boundary(face)
+                        ||
+                        cell->face(face)->has_children()
+                        ||
+                        cell->neighbor_is_coarser(face)
+                        ||
+                        (!cell->at_boundary(face)
+                         &&
+                         (cell->active_fe_index() == cell->neighbor(face)->active_fe_index())))
+                      {
+                        dof_handler.faces
+                        ->quads.dof_offsets[cell->face(face)->index()]
+                          = next_free_quad_slot;
+
+                        // set first slot
+                        // for this quad to
+                        // active_fe_index
+                        // of this face
+                        dof_handler.faces
+                        ->quads.dofs[next_free_quad_slot]
+                          = cell->active_fe_index();
+
+                        // the next
+                        // dofs_per_quad
+                        // indices remain
+                        // unset for the
+                        // moment (i.e. at
+                        // invalid_dof_index).
+                        // following this
+                        // comes the stop
+                        // index, which
+                        // also is
+                        // invalid_dof_index
+                        // and therefore
+                        // does not have to
+                        // be explicitly
+                        // set
+
+                        // finally, mark
+                        // those slots as
+                        // used
+                        next_free_quad_slot
+                        += (*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_quad + 2;
+                      }
+                    else
+                      {
+                        dof_handler.faces
+                        ->quads.dof_offsets[cell->face(face)->index()]
+                          = next_free_quad_slot;
+
+                        // set first slot
+                        // for this quad to
+                        // active_fe_index
+                        // of this face
+                        dof_handler.faces
+                        ->quads.dofs[next_free_quad_slot]
+                          = cell->active_fe_index();
+
+                        // the next
+                        // dofs_per_quad
+                        // indices remain
+                        // unset for the
+                        // moment (i.e. at
+                        // invalid_dof_index).
+                        //
+                        // then comes the
+                        // fe_index for the
+                        // neighboring
+                        // cell:
+                        dof_handler.faces
+                        ->quads.dofs[next_free_quad_slot
+                                     +
+                                     (*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_quad
+                                     +
+                                     1]
+                          = cell->neighbor(face)->active_fe_index();
+                        // then again a set
+                        // of dofs that we
+                        // need not set
+                        // right now
+                        //
+                        // following this
+                        // comes the stop
+                        // index, which
+                        // also is
+                        // invalid_dof_index
+                        // and therefore
+                        // does not have to
+                        // be explicitly
+                        // set
+
+                        // finally, mark
+                        // those slots as
+                        // used
+                        next_free_quad_slot
+                        += ((*dof_handler.finite_elements)[cell->active_fe_index()].dofs_per_quad
+                            +
+                            (*dof_handler.finite_elements)[cell->neighbor(face)->active_fe_index()]
+                            .dofs_per_quad
+                            +
+                            3);
+                      }
+
+                    // mark this face as
+                    // visited
+                    cell->face(face)->set_user_flag ();
+                  }
+
+            // we should have moved the
+            // cursor to the total number
+            // of dofs. check that
+            Assert (next_free_quad_slot == n_quad_slots,
+                    ExcInternalError());
+
+            // at the end, restore the user
+            // flags for the quads
+            const_cast<dealii::Triangulation<dim,spacedim>&>(*dof_handler.tria)
+            .load_user_flags_quad (saved_quad_user_flags);
+          }
+
+
+          // LINE DOFS
+
+          // the situation here is pretty
+          // much like with vertices: there
+          // can be an arbitrary number of
+          // finite elements associated
+          // with each line.
+          //
+          // the algorithm we use is
+          // somewhat similar to what we do
+          // in reserve_space_vertices()
+          if (true)
+            {
+              // what we do first is to set up
+              // an array in which we record
+              // whether a line is associated
+              // with any of the given fe's, by
+              // setting a bit. in a later
+              // step, we then actually
+              // allocate memory for the
+              // required dofs
+              std::vector<std::vector<bool> >
+              line_fe_association (dof_handler.finite_elements->size(),
+                                   std::vector<bool> (dof_handler.tria->n_raw_lines(),
+                                                      false));
+
+              for (typename HpDoFHandler<dim,spacedim>::active_cell_iterator
+                   cell=dof_handler.begin_active();
+                   cell!=dof_handler.end(); ++cell)
+                for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+                  line_fe_association[cell->active_fe_index()][cell->line_index(l)]
+                    = true;
+
+              // first check which of the
+              // lines is used at all,
+              // i.e. is associated with a
+              // finite element. we do this
+              // since not all lines may
+              // actually be used, in which
+              // case we do not have to
+              // allocate any memory at
+              // all
+              std::vector<bool> line_is_used (dof_handler.tria->n_raw_lines(), false);
+              for (unsigned int line=0; line<dof_handler.tria->n_raw_lines(); ++line)
+                for (unsigned int fe=0; fe<dof_handler.finite_elements->size(); ++fe)
+                  if (line_fe_association[fe][line] == true)
+                    {
+                      line_is_used[line] = true;
+                      break;
+                    }
+
+              // next count how much memory
+              // we actually need. for each
+              // line, we need one slot per
+              // fe to store the fe_index,
+              // plus dofs_per_line for
+              // this fe. in addition, we
+              // need one slot as the end
+              // marker for the
+              // fe_indices. at the same
+              // time already fill the
+              // line_dofs_offsets field
+              dof_handler.faces->lines.dof_offsets
+              .resize (dof_handler.tria->n_raw_lines(),
+                       numbers::invalid_unsigned_int);
+
+              unsigned int line_slots_needed = 0;
+              for (unsigned int line=0; line<dof_handler.tria->n_raw_lines(); ++line)
+                if (line_is_used[line] == true)
+                  {
+                    dof_handler.faces->lines.dof_offsets[line] = line_slots_needed;
+
+                    for (unsigned int fe=0; fe<dof_handler.finite_elements->size(); ++fe)
+                      if (line_fe_association[fe][line] == true)
+                        line_slots_needed += (*dof_handler.finite_elements)[fe].dofs_per_line + 1;
+                    ++line_slots_needed;
+                  }
+
+              // now allocate the space we
+              // have determined we need, and
+              // set up the linked lists for
+              // each of the lines
+              dof_handler.faces->lines.dofs.resize (line_slots_needed,
+                                                    DoFHandler<dim,spacedim>::invalid_dof_index);
+              for (unsigned int line=0; line<dof_handler.tria->n_raw_lines(); ++line)
+                if (line_is_used[line] == true)
+                  {
+                    unsigned int pointer = dof_handler.faces->lines.dof_offsets[line];
+                    for (unsigned int fe=0; fe<dof_handler.finite_elements->size(); ++fe)
+                      if (line_fe_association[fe][line] == true)
+                        {
+                          // if this line
+                          // uses this fe,
+                          // then set the
+                          // fe_index and
+                          // move the
+                          // pointer ahead
+                          dof_handler.faces->lines.dofs[pointer] = fe;
+                          pointer += (*dof_handler.finite_elements)[fe].dofs_per_line + 1;
+                        }
+                    // finally place the end
+                    // marker
+                    dof_handler.faces->lines.dofs[pointer] = numbers::invalid_dof_index;
+                  }
+            }
+
+
+
+          // VERTEX DOFS
+          reserve_space_vertices (dof_handler);
+        }
+
+
+        /**
+         * Implement the function of same name
+         * in the mother class.
+         */
+        template <int spacedim>
+        static
+        unsigned int
+        max_couplings_between_dofs (const DoFHandler<1,spacedim> &dof_handler)
+        {
+          return std::min(static_cast<types::global_dof_index> (3*
+                                                                dof_handler.finite_elements->max_dofs_per_vertex() +
+                                                                2*dof_handler.finite_elements->max_dofs_per_line()),
+                          dof_handler.n_dofs());
+        }
+
+
+
+        template <int spacedim>
+        static
+        unsigned int
+        max_couplings_between_dofs (const DoFHandler<2,spacedim> &dof_handler)
+        {
+          // get these numbers by drawing pictures
+          // and counting...
+          // example:
+          //   |     |     |
+          // --x-----x--x--X--
+          //   |     |  |  |
+          //   |     x--x--x
+          //   |     |  |  |
+          // --x--x--*--x--x--
+          //   |  |  |     |
+          //   x--x--x     |
+          //   |  |  |     |
+          // --X--x--x-----x--
+          //   |     |     |
+          // x = vertices connected with center vertex *;
+          //   = total of 19
+          // (the X vertices are connected with * if
+          // the vertices adjacent to X are hanging
+          // nodes)
+          // count lines -> 28 (don't forget to count
+          // mother and children separately!)
+          types::global_dof_index max_couplings;
+          switch (dof_handler.tria->max_adjacent_cells())
+            {
+            case 4:
+              max_couplings=19*dof_handler.finite_elements->max_dofs_per_vertex() +
+                            28*dof_handler.finite_elements->max_dofs_per_line() +
+                            8*dof_handler.finite_elements->max_dofs_per_quad();
+              break;
+            case 5:
+              max_couplings=21*dof_handler.finite_elements->max_dofs_per_vertex() +
+                            31*dof_handler.finite_elements->max_dofs_per_line() +
+                            9*dof_handler.finite_elements->max_dofs_per_quad();
+              break;
+            case 6:
+              max_couplings=28*dof_handler.finite_elements->max_dofs_per_vertex() +
+                            42*dof_handler.finite_elements->max_dofs_per_line() +
+                            12*dof_handler.finite_elements->max_dofs_per_quad();
+              break;
+            case 7:
+              max_couplings=30*dof_handler.finite_elements->max_dofs_per_vertex() +
+                            45*dof_handler.finite_elements->max_dofs_per_line() +
+                            13*dof_handler.finite_elements->max_dofs_per_quad();
+              break;
+            case 8:
+              max_couplings=37*dof_handler.finite_elements->max_dofs_per_vertex() +
+                            56*dof_handler.finite_elements->max_dofs_per_line() +
+                            16*dof_handler.finite_elements->max_dofs_per_quad();
+              break;
+            default:
+              Assert (false, ExcNotImplemented());
+              max_couplings=0;
+            };
+          return std::min(max_couplings,dof_handler.n_dofs());
+        }
+
+
+        template <int spacedim>
+        static
+        unsigned int
+        max_couplings_between_dofs (const DoFHandler<3,spacedim> &dof_handler)
+        {
+//TODO:[?] Invent significantly better estimates than the ones in this function
+          // doing the same thing here is a rather
+          // complicated thing, compared to the 2d
+          // case, since it is hard to draw pictures
+          // with several refined hexahedra :-) so I
+          // presently only give a coarse estimate
+          // for the case that at most 8 hexes meet
+          // at each vertex
+          //
+          // can anyone give better estimate here?
+          const unsigned int max_adjacent_cells = dof_handler.tria->max_adjacent_cells();
+
+          types::global_dof_index max_couplings;
+          if (max_adjacent_cells <= 8)
+            max_couplings=7*7*7*dof_handler.finite_elements->max_dofs_per_vertex() +
+                          7*6*7*3*dof_handler.finite_elements->max_dofs_per_line() +
+                          9*4*7*3*dof_handler.finite_elements->max_dofs_per_quad() +
+                          27*dof_handler.finite_elements->max_dofs_per_hex();
+          else
+            {
+              Assert (false, ExcNotImplemented());
+              max_couplings=0;
+            }
+
+          return std::min(max_couplings,dof_handler.n_dofs());
+        }
+      };
+    }
+  }
+}
+
+
+namespace hp
+{
+  template<int dim, int spacedim>
+  const unsigned int DoFHandler<dim,spacedim>::dimension;
+
+  template<int dim, int spacedim>
+  const types::global_dof_index DoFHandler<dim,spacedim>::invalid_dof_index;
+
+  template<int dim, int spacedim>
+  const unsigned int DoFHandler<dim,spacedim>::default_fe_index;
+
+
+
+  template<int dim, int spacedim>
+  DoFHandler<dim,spacedim>::DoFHandler (const Triangulation<dim,spacedim> &tria)
+    :
+    tria(&tria, typeid(*this).name()),
+    faces (NULL)
+  {
+    Assert ((dynamic_cast<const parallel::distributed::Triangulation< dim, spacedim >*>
+             (&tria)
+             == 0),
+            ExcMessage ("The given triangulation is parallel distributed but "
+                        "this class does not currently support this."));
+
+    create_active_fe_table ();
+
+    tria_listeners.push_back
+    (tria.signals.pre_refinement
+     .connect (std_cxx11::bind (&DoFHandler<dim,spacedim>::pre_refinement_action,
+                                std_cxx11::ref(*this))));
+    tria_listeners.push_back
+    (tria.signals.post_refinement
+     .connect (std_cxx11::bind (&DoFHandler<dim,spacedim>::post_refinement_action,
+                                std_cxx11::ref(*this))));
+    tria_listeners.push_back
+    (tria.signals.create
+     .connect (std_cxx11::bind (&DoFHandler<dim,spacedim>::post_refinement_action,
+                                std_cxx11::ref(*this))));
+  }
+
+
+  template<int dim, int spacedim>
+  DoFHandler<dim,spacedim>::~DoFHandler ()
+  {
+    // unsubscribe as a listener to refinement
+    // of the underlying triangulation
+    for (unsigned int i=0; i<tria_listeners.size(); ++i)
+      tria_listeners[i].disconnect ();
+    tria_listeners.clear ();
+
+    // ...and release allocated memory
+    clear ();
+  }
+
+
+  /*------------------------ Cell iterator functions ------------------------*/
+
+  template <int dim, int spacedim>
+  typename DoFHandler<dim,spacedim>::cell_iterator
+  DoFHandler<dim, spacedim>::begin(const unsigned int level) const
+  {
+    return cell_iterator (*this->get_triangulation().begin(level),
+                          this);
+  }
+
+
+
+  template <int dim, int spacedim>
+  typename DoFHandler<dim,spacedim>::active_cell_iterator
+  DoFHandler<dim,spacedim>::begin_active (const unsigned int level) const
+  {
+    // level is checked in begin
+    cell_iterator i = begin (level);
+    if (i.state() != IteratorState::valid)
+      return i;
+    while (i->has_children())
+      if ((++i).state() != IteratorState::valid)
+        return i;
+    return i;
+  }
+
+
+
+  template <int dim, int spacedim>
+  typename DoFHandler<dim,spacedim>::cell_iterator
+  DoFHandler<dim,spacedim>::end () const
+  {
+    return cell_iterator (&this->get_triangulation(),
+                          -1,
+                          -1,
+                          this);
+  }
+
+
+  template <int dim, int spacedim>
+  typename DoFHandler<dim,spacedim>::cell_iterator
+  DoFHandler<dim,spacedim>::end (const unsigned int level) const
+  {
+    return (level == this->get_triangulation().n_levels()-1 ?
+            end() :
+            begin (level+1));
+  }
+
+
+  template <int dim, int spacedim>
+  typename DoFHandler<dim, spacedim>::active_cell_iterator
+  DoFHandler<dim, spacedim>::end_active (const unsigned int level) const
+  {
+    return (level == this->get_triangulation().n_levels()-1 ?
+            active_cell_iterator(end()) :
+            begin_active (level+1));
+  }
+
+
+
+  template <int dim, int spacedim>
+  IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+  DoFHandler<dim, spacedim>::cell_iterators () const
+  {
+    return
+      IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+      (begin(), end());
+  }
+
+
+  template <int dim, int spacedim>
+  IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+  DoFHandler<dim, spacedim>::active_cell_iterators () const
+  {
+    return
+      IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+      (begin_active(), end());
+  }
+
+
+
+  template <int dim, int spacedim>
+  IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+  DoFHandler<dim, spacedim>::cell_iterators_on_level (const unsigned int level) const
+  {
+    return
+      IteratorRange<typename DoFHandler<dim, spacedim>::cell_iterator>
+      (begin(level), end(level));
+  }
+
+
+
+  template <int dim, int spacedim>
+  IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+  DoFHandler<dim, spacedim>::active_cell_iterators_on_level (const unsigned int level) const
+  {
+    return
+      IteratorRange<typename DoFHandler<dim, spacedim>::active_cell_iterator>
+      (begin_active(level), end_active(level));
+  }
+
+
+
+
+//------------------------------------------------------------------
+
+
+  template <>
+  types::global_dof_index DoFHandler<1>::n_boundary_dofs () const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+
+    DoFHandler<1,1>::cell_iterator cell;
+    types::global_dof_index n = 0;
+
+    // search left-most cell
+    cell = this->begin_active();
+    while (!cell->at_boundary(0))
+      cell = cell->neighbor(0);
+    n += cell->get_fe().dofs_per_vertex;
+
+    // same with right-most cell
+    cell = this->begin_active();
+    while (!cell->at_boundary(1))
+      cell = cell->neighbor(1);
+    n += cell->get_fe().dofs_per_vertex;
+
+    return n;
+  }
+
+
+
+  template <>
+  types::global_dof_index DoFHandler<1>::n_boundary_dofs (const FunctionMap &boundary_ids) const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+
+    // check that only boundary
+    // indicators 0 and 1 are allowed
+    // in 1d
+    for (FunctionMap::const_iterator i=boundary_ids.begin();
+         i!=boundary_ids.end(); ++i)
+      Assert ((i->first == 0) || (i->first == 1),
+              ExcInvalidBoundaryIndicator());
+
+    DoFHandler<1,1>::active_cell_iterator cell;
+    types::global_dof_index n = 0;
+
+    // search left-most cell
+    if (boundary_ids.find (0) != boundary_ids.end())
+      {
+        cell = this->begin_active();
+        while (!cell->at_boundary(0))
+          cell = cell->neighbor(0);
+        n += cell->get_fe().dofs_per_vertex;
+      }
+
+    // same with right-most cell
+    if (boundary_ids.find (1) != boundary_ids.end())
+      {
+        cell = this->begin_active();
+        while (!cell->at_boundary(1))
+          cell = cell->neighbor(1);
+        n += cell->get_fe().dofs_per_vertex;
+      }
+
+    return n;
+  }
+
+
+
+  template <>
+  types::global_dof_index DoFHandler<1>::n_boundary_dofs (const std::set<types::boundary_id> &boundary_ids) const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+
+    // check that only boundary
+    // indicators 0 and 1 are allowed
+    // in 1d
+    for (std::set<types::boundary_id>::const_iterator i=boundary_ids.begin();
+         i!=boundary_ids.end(); ++i)
+      Assert ((*i == 0) || (*i == 1),
+              ExcInvalidBoundaryIndicator());
+
+    DoFHandler<1,1>::active_cell_iterator cell;
+    types::global_dof_index n = 0;
+
+    // search left-most cell
+    if (boundary_ids.find (0) != boundary_ids.end())
+      {
+        cell = this->begin_active();
+        while (!cell->at_boundary(0))
+          cell = cell->neighbor(0);
+        n += cell->get_fe().dofs_per_vertex;
+      }
+
+    // same with right-most cell
+    if (boundary_ids.find (1) != boundary_ids.end())
+      {
+        cell = this->begin_active();
+        while (!cell->at_boundary(1))
+          cell = cell->neighbor(1);
+        n += cell->get_fe().dofs_per_vertex;
+      }
+
+    return n;
+  }
+
+
+  template <>
+  types::global_dof_index DoFHandler<1,2>::n_boundary_dofs () const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+  template <>
+  types::global_dof_index DoFHandler<1,2>::n_boundary_dofs (const FunctionMap &) const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+  template <>
+  types::global_dof_index DoFHandler<1,2>::n_boundary_dofs (const std::set<types::boundary_id> &) const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+
+
+  template <>
+  types::global_dof_index DoFHandler<1,3>::n_boundary_dofs () const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+  template <>
+  types::global_dof_index DoFHandler<1,3>::n_boundary_dofs (const FunctionMap &) const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+  template <>
+  types::global_dof_index DoFHandler<1,3>::n_boundary_dofs (const std::set<types::boundary_id> &) const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+
+  template<int dim, int spacedim>
+  types::global_dof_index DoFHandler<dim,spacedim>::n_boundary_dofs () const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+
+    std::set<types::global_dof_index> boundary_dofs;
+    std::vector<types::global_dof_index> dofs_on_face;
+    dofs_on_face.reserve (this->get_fe ().max_dofs_per_face());
+
+    // loop over all faces to check
+    // whether they are at a
+    // boundary. note that we need not
+    // take special care of single
+    // lines in 3d (using
+    // @p{cell->has_boundary_lines}),
+    // since we do not support
+    // boundaries of dimension dim-2,
+    // and so every boundary line is
+    // also part of a boundary face.
+    typename HpDoFHandler<dim,spacedim>::active_cell_iterator cell = this->begin_active (),
+                                                              endc = this->end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->at_boundary(f))
+          {
+            const unsigned int dofs_per_face = cell->get_fe().dofs_per_face;
+            dofs_on_face.resize (dofs_per_face);
+
+            cell->face(f)->get_dof_indices (dofs_on_face,
+                                            cell->active_fe_index());
+            for (unsigned int i=0; i<dofs_per_face; ++i)
+              boundary_dofs.insert(dofs_on_face[i]);
+          };
+    return boundary_dofs.size();
+  }
+
+
+
+  template<int dim, int spacedim>
+  types::global_dof_index
+  DoFHandler<dim,spacedim>::n_boundary_dofs (const FunctionMap &boundary_ids) const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+    Assert (boundary_ids.find(numbers::internal_face_boundary_id) == boundary_ids.end(),
+            ExcInvalidBoundaryIndicator());
+
+    // same as above, but with
+    // additional checks for set of
+    // boundary indicators
+    std::set<types::global_dof_index> boundary_dofs;
+    std::vector<types::global_dof_index> dofs_on_face;
+    dofs_on_face.reserve (this->get_fe ().max_dofs_per_face());
+
+    typename HpDoFHandler<dim,spacedim>::active_cell_iterator cell = this->begin_active (),
+                                                              endc = this->end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->at_boundary(f) &&
+            (boundary_ids.find(cell->face(f)->boundary_id()) !=
+             boundary_ids.end()))
+          {
+            const unsigned int dofs_per_face = cell->get_fe().dofs_per_face;
+            dofs_on_face.resize (dofs_per_face);
+
+            cell->face(f)->get_dof_indices (dofs_on_face,
+                                            cell->active_fe_index());
+            for (unsigned int i=0; i<dofs_per_face; ++i)
+              boundary_dofs.insert(dofs_on_face[i]);
+          }
+    return boundary_dofs.size();
+  }
+
+
+
+  template<int dim, int spacedim>
+  types::global_dof_index
+  DoFHandler<dim,spacedim>::n_boundary_dofs (const std::set<types::boundary_id> &boundary_ids) const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+    Assert (boundary_ids.find (numbers::internal_face_boundary_id) == boundary_ids.end(),
+            ExcInvalidBoundaryIndicator());
+
+    // same as above, but with
+    // additional checks for set of
+    // boundary indicators
+    std::set<types::global_dof_index> boundary_dofs;
+    std::vector<types::global_dof_index> dofs_on_face;
+    dofs_on_face.reserve (this->get_fe ().max_dofs_per_face());
+
+    typename HpDoFHandler<dim,spacedim>::active_cell_iterator cell = this->begin_active (),
+                                                              endc = this->end();
+    for (; cell!=endc; ++cell)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        if (cell->at_boundary(f) &&
+            (boundary_ids.find(cell->face(f)->boundary_id()) !=
+             boundary_ids.end()))
+          {
+            const unsigned int dofs_per_face = cell->get_fe().dofs_per_face;
+            dofs_on_face.resize (dofs_per_face);
+
+            cell->face(f)->get_dof_indices (dofs_on_face,
+                                            cell->active_fe_index());
+            for (unsigned int i=0; i<dofs_per_face; ++i)
+              boundary_dofs.insert(dofs_on_face[i]);
+          };
+    return boundary_dofs.size();
+  }
+
+
+
+  template <>
+  types::global_dof_index DoFHandler<2,3>::n_boundary_dofs () const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+
+
+  template <>
+  types::global_dof_index DoFHandler<2,3>::n_boundary_dofs (const FunctionMap &) const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+
+
+  template <>
+  types::global_dof_index DoFHandler<2,3>::n_boundary_dofs (const std::set<types::boundary_id> &) const
+  {
+    Assert(false,ExcNotImplemented());
+    return 0;
+  }
+
+
+
+  template<int dim, int spacedim>
+  std::size_t
+  DoFHandler<dim,spacedim>::memory_consumption () const
+  {
+    std::size_t mem = (MemoryConsumption::memory_consumption (tria) +
+                       MemoryConsumption::memory_consumption (finite_elements) +
+                       MemoryConsumption::memory_consumption (tria) +
+                       MemoryConsumption::memory_consumption (levels) +
+                       MemoryConsumption::memory_consumption (*faces) +
+                       MemoryConsumption::memory_consumption (number_cache) +
+                       MemoryConsumption::memory_consumption (vertex_dofs) +
+                       MemoryConsumption::memory_consumption (vertex_dofs_offsets) +
+                       MemoryConsumption::memory_consumption (has_children));
+    for (unsigned int i=0; i<levels.size(); ++i)
+      mem += MemoryConsumption::memory_consumption (*levels[i]);
+    mem += MemoryConsumption::memory_consumption (*faces);
+
+    return mem;
+  }
+
+
+
+  template<int dim, int spacedim>
+  void
+  DoFHandler<dim,spacedim>::
+  compute_vertex_dof_identities (std::vector<types::global_dof_index> &new_dof_indices) const
+  {
+    // Note: we may wish to have
+    // something here similar to what
+    // we do for lines and quads,
+    // namely that we only identify
+    // dofs for any fe towards the
+    // most dominating one. however,
+    // it is not clear whether this
+    // is actually necessary for
+    // vertices at all, I can't think
+    // of a finite element that would
+    // make that necessary...
+    Table<2,std_cxx11::shared_ptr<dealii::internal::hp::DoFIdentities> >
+    vertex_dof_identities (get_fe().size(),
+                           get_fe().size());
+
+    // loop over all vertices and
+    // see which one we need to
+    // work on
+    for (unsigned int vertex_index=0; vertex_index<get_tria().n_vertices();
+         ++vertex_index)
+      {
+        const unsigned int n_active_fe_indices
+          = dealii::internal::DoFAccessor::Implementation::
+            n_active_vertex_fe_indices (*this, vertex_index);
+        if (n_active_fe_indices > 1)
+          {
+            const unsigned int
+            first_fe_index
+              = dealii::internal::DoFAccessor::Implementation::
+                nth_active_vertex_fe_index (*this, vertex_index, 0);
+
+            // loop over all the
+            // other FEs with which
+            // we want to identify
+            // the DoF indices of
+            // the first FE of
+            for (unsigned int f=1; f<n_active_fe_indices; ++f)
+              {
+                const unsigned int
+                other_fe_index
+                  = dealii::internal::DoFAccessor::Implementation::
+                    nth_active_vertex_fe_index (*this, vertex_index, f);
+
+                // make sure the
+                // entry in the
+                // equivalence
+                // table exists
+                dealii::internal::hp::ensure_existence_of_dof_identities<0>
+                (get_fe()[first_fe_index],
+                 get_fe()[other_fe_index],
+                 vertex_dof_identities[first_fe_index][other_fe_index]);
+
+                // then loop
+                // through the
+                // identities we
+                // have. first get
+                // the global
+                // numbers of the
+                // dofs we want to
+                // identify and
+                // make sure they
+                // are not yet
+                // constrained to
+                // anything else,
+                // except for to
+                // each other. use
+                // the rule that we
+                // will always
+                // constrain the
+                // dof with the
+                // higher fe
+                // index to the
+                // one with the
+                // lower, to avoid
+                // circular
+                // reasoning.
+                dealii::internal::hp::DoFIdentities &identities
+                  = *vertex_dof_identities[first_fe_index][other_fe_index];
+                for (unsigned int i=0; i<identities.size(); ++i)
+                  {
+                    const types::global_dof_index lower_dof_index
+                      = dealii::internal::DoFAccessor::Implementation::
+                        get_vertex_dof_index (*this,
+                                              vertex_index,
+                                              first_fe_index,
+                                              identities[i].first);
+                    const types::global_dof_index higher_dof_index
+                      = dealii::internal::DoFAccessor::Implementation::
+                        get_vertex_dof_index (*this,
+                                              vertex_index,
+                                              other_fe_index,
+                                              identities[i].second);
+
+                    Assert ((new_dof_indices[higher_dof_index] ==
+                             numbers::invalid_dof_index)
+                            ||
+                            (new_dof_indices[higher_dof_index] ==
+                             lower_dof_index),
+                            ExcInternalError());
+
+                    new_dof_indices[higher_dof_index] = lower_dof_index;
+                  }
+              }
+          }
+      }
+  }
+
+
+  template <>
+  void
+  DoFHandler<1,1>::
+  compute_line_dof_identities (std::vector<types::global_dof_index> &) const
+  {}
+
+
+
+  template <>
+  void
+  DoFHandler<1,2>::
+  compute_line_dof_identities (std::vector<types::global_dof_index> &) const
+  {}
+
+  template <>
+  void
+  DoFHandler<1,3>::
+  compute_line_dof_identities (std::vector<types::global_dof_index> &) const
+  {}
+
+
+  template<int dim, int spacedim>
+  void
+  DoFHandler<dim,spacedim>::
+  compute_line_dof_identities (std::vector<types::global_dof_index> &new_dof_indices) const
+  {
+    // we will mark lines that we have already treated, so first save and clear
+    // the user flags on lines and later restore them
+    std::vector<bool> user_flags;
+    this->get_triangulation().save_user_flags_line(user_flags);
+    const_cast<Triangulation<dim,spacedim> &>(this->get_triangulation()).clear_user_flags_line ();
+
+    // An implementation of the algorithm described in the hp paper, including
+    // the modification mentioned later in the "complications in 3-d" subsections
+    //
+    // as explained there, we do something only if there are exactly 2 finite
+    // elements associated with an object. if there is only one, then there is
+    // nothing to do anyway, and if there are 3 or more, then we can get into
+    // trouble. note that this only happens for lines in 3d and higher, and for
+    // quads only in 4d and higher, so this isn't a particularly frequent case
+    //
+    // there is one case, however, that we would like to handle (see, for
+    // example, the hp/crash_15 testcase): if we have FESystem(FE_Q(2),FE_DGQ(i))
+    // elements for a bunch of values 'i', then we should be able to handle this
+    // because we can simply unify *all* dofs, not only a some. so what we do
+    // is to first treat all pairs of finite elements that have *identical* dofs,
+    // and then only deal with those that are not identical of which we can
+    // handle at most 2
+    Table<2,std_cxx11::shared_ptr<internal::hp::DoFIdentities> >
+    line_dof_identities (finite_elements->size(),
+                         finite_elements->size());
+
+    for (active_cell_iterator cell=begin_active(); cell!=end(); ++cell)
+      for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+        if (cell->line(l)->user_flag_set() == false)
+          {
+            const line_iterator line = cell->line(l);
+            line->set_user_flag ();
+
+            unsigned int unique_sets_of_dofs
+              = line->n_active_fe_indices();
+
+            // do a first loop over all sets of dofs and do identity
+            // uniquification
+            for (unsigned int f=0; f<line->n_active_fe_indices(); ++f)
+              for (unsigned int g=f+1; g<line->n_active_fe_indices(); ++g)
+                {
+                  const unsigned int fe_index_1 = line->nth_active_fe_index (f),
+                                     fe_index_2 = line->nth_active_fe_index (g);
+
+                  if (((*finite_elements)[fe_index_1].dofs_per_line
+                       ==
+                       (*finite_elements)[fe_index_2].dofs_per_line)
+                      &&
+                      ((*finite_elements)[fe_index_1].dofs_per_line > 0))
+                    {
+                      internal::hp::ensure_existence_of_dof_identities<1>
+                      ((*finite_elements)[fe_index_1],
+                       (*finite_elements)[fe_index_2],
+                       line_dof_identities[fe_index_1][fe_index_2]);
+                      // see if these sets of dofs are identical. the first
+                      // condition for this is that indeed there are n identities
+                      if (line_dof_identities[fe_index_1][fe_index_2]->size()
+                          ==
+                          (*finite_elements)[fe_index_1].dofs_per_line)
+                        {
+                          unsigned int i=0;
+                          for (; i<(*finite_elements)[fe_index_1].dofs_per_line; ++i)
+                            if (((*(line_dof_identities[fe_index_1][fe_index_2]))[i].first != i)
+                                &&
+                                ((*(line_dof_identities[fe_index_1][fe_index_2]))[i].second != i))
+                              // not an identity
+                              break;
+
+                          if (i == (*finite_elements)[fe_index_1].dofs_per_line)
+                            {
+                              // The line dofs (i.e., the ones interior to a line) of these two finite elements are identical.
+                              // Note that there could be situations when one element still dominates another, e.g.:
+                              // FE_Q(2) x FE_Nothing(dominate) vs
+                              // FE_Q(2) x FE_Q(1)
+
+                              --unique_sets_of_dofs;
+
+                              for (unsigned int j=0; j<(*finite_elements)[fe_index_1].dofs_per_line; ++j)
+                                {
+                                  const types::global_dof_index master_dof_index
+                                    = line->dof_index (j, fe_index_1);
+                                  const types::global_dof_index slave_dof_index
+                                    = line->dof_index (j, fe_index_2);
+
+                                  // if master dof was already constrained,
+                                  // constrain to that one, otherwise constrain
+                                  // slave to master
+                                  if (new_dof_indices[master_dof_index] !=
+                                      numbers::invalid_dof_index)
+                                    {
+                                      Assert (new_dof_indices[new_dof_indices[master_dof_index]] ==
+                                              numbers::invalid_dof_index,
+                                              ExcInternalError());
+
+                                      new_dof_indices[slave_dof_index]
+                                        = new_dof_indices[master_dof_index];
+                                    }
+                                  else
+                                    {
+                                      Assert ((new_dof_indices[master_dof_index] ==
+                                               numbers::invalid_dof_index)
+                                              ||
+                                              (new_dof_indices[slave_dof_index] ==
+                                               master_dof_index),
+                                              ExcInternalError());
+
+                                      new_dof_indices[slave_dof_index] = master_dof_index;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+
+            // if at this point, there is only one unique set of dofs left, then
+            // we have taken care of everything above. if there are two, then we
+            // need to deal with them here. if there are more, then we punt, as
+            // described in the paper (and mentioned above)
+//TODO: The check for 'dim==2' was inserted by intuition. It fixes
+// the previous problems with step-27 in 3D. But an explanation
+// for this is still required, and what we do here is not what we
+// describe in the paper!.
+            if ((unique_sets_of_dofs == 2) && (dim == 2))
+              {
+                // find out which is the most dominating finite element of the
+                // ones that are used on this line
+                const unsigned int most_dominating_fe_index
+                  = internal::hp::get_most_dominating_fe_index<dim,spacedim> (line);
+
+                // if we found the most dominating element, then use this to eliminate some of
+                // the degrees of freedom by identification. otherwise, the code that computes
+                // hanging node constraints will have to deal with it by computing
+                // appropriate constraints along this face/edge
+                if (most_dominating_fe_index != numbers::invalid_unsigned_int)
+                  {
+                    const unsigned int n_active_fe_indices
+                      = line->n_active_fe_indices ();
+
+                    // loop over the indices of all the finite elements that are not
+                    // dominating, and identify their dofs to the most dominating
+                    // one
+                    for (unsigned int f=0; f<n_active_fe_indices; ++f)
+                      if (line->nth_active_fe_index (f) !=
+                          most_dominating_fe_index)
+                        {
+                          const unsigned int
+                          other_fe_index = line->nth_active_fe_index (f);
+
+                          internal::hp::ensure_existence_of_dof_identities<1>
+                          ((*finite_elements)[most_dominating_fe_index],
+                           (*finite_elements)[other_fe_index],
+                           line_dof_identities[most_dominating_fe_index][other_fe_index]);
+
+                          internal::hp::DoFIdentities &identities
+                            = *line_dof_identities[most_dominating_fe_index][other_fe_index];
+                          for (unsigned int i=0; i<identities.size(); ++i)
+                            {
+                              const types::global_dof_index master_dof_index
+                                = line->dof_index (identities[i].first, most_dominating_fe_index);
+                              const types::global_dof_index slave_dof_index
+                                = line->dof_index (identities[i].second, other_fe_index);
+
+                              Assert ((new_dof_indices[master_dof_index] ==
+                                       numbers::invalid_dof_index)
+                                      ||
+                                      (new_dof_indices[slave_dof_index] ==
+                                       master_dof_index),
+                                      ExcInternalError());
+
+                              new_dof_indices[slave_dof_index] = master_dof_index;
+                            }
+                        }
+                  }
+              }
+          }
+
+    // finally restore the user flags
+    const_cast<Triangulation<dim,spacedim> &>(this->get_triangulation())
+    .load_user_flags_line(user_flags);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  DoFHandler<dim,spacedim>::
+  compute_quad_dof_identities (std::vector<types::global_dof_index> &) const
+  {
+    // this function should only be called for dim<3 where there are
+    // no quad dof identies. for dim>=3, the specialization below should
+    // take care of it
+    Assert (dim < 3, ExcInternalError());
+  }
+
+
+  template <>
+  void
+  DoFHandler<3,3>::
+  compute_quad_dof_identities (std::vector<types::global_dof_index> &new_dof_indices) const
+  {
+    const int dim = 3;
+    const int spacedim = 3;
+
+    // we will mark quads that we
+    // have already treated, so first
+    // save and clear the user flags
+    // on quads and later restore
+    // them
+    std::vector<bool> user_flags;
+    this->get_triangulation().save_user_flags_quad(user_flags);
+    const_cast<Triangulation<dim,spacedim> &>(this->get_triangulation()).clear_user_flags_quad ();
+
+    // An implementation of the
+    // algorithm described in the hp
+    // paper, including the
+    // modification mentioned later
+    // in the "complications in 3-d"
+    // subsections
+    //
+    // as explained there, we do
+    // something only if there are
+    // exactly 2 finite elements
+    // associated with an object. if
+    // there is only one, then there
+    // is nothing to do anyway, and
+    // if there are 3 or more, then
+    // we can get into trouble. note
+    // that this only happens for
+    // lines in 3d and higher, and
+    // for quads only in 4d and
+    // higher, so this isn't a
+    // particularly frequent case
+    Table<2,std_cxx11::shared_ptr<internal::hp::DoFIdentities> >
+    quad_dof_identities (finite_elements->size(),
+                         finite_elements->size());
+
+    for (active_cell_iterator cell=begin_active(); cell!=end(); ++cell)
+      for (unsigned int q=0; q<GeometryInfo<dim>::quads_per_cell; ++q)
+        if ((cell->quad(q)->user_flag_set() == false)
+            &&
+            (cell->quad(q)->n_active_fe_indices() == 2))
+          {
+            const quad_iterator quad = cell->quad(q);
+            quad->set_user_flag ();
+
+            // find out which is the
+            // most dominating finite
+            // element of the ones that
+            // are used on this quad
+            const unsigned int most_dominating_fe_index
+              = internal::hp::get_most_dominating_fe_index<dim,spacedim> (quad);
+
+            // if we found the most dominating element, then use this to eliminate some of
+            // the degrees of freedom by identification. otherwise, the code that computes
+            // hanging node constraints will have to deal with it by computing
+            // appropriate constraints along this face/edge
+            if (most_dominating_fe_index != numbers::invalid_unsigned_int)
+              {
+                const unsigned int n_active_fe_indices
+                  = quad->n_active_fe_indices ();
+
+                // loop over the indices of
+                // all the finite elements
+                // that are not dominating,
+                // and identify their dofs
+                // to the most dominating
+                // one
+                for (unsigned int f=0; f<n_active_fe_indices; ++f)
+                  if (quad->nth_active_fe_index (f) !=
+                      most_dominating_fe_index)
+                    {
+                      const unsigned int
+                      other_fe_index = quad->nth_active_fe_index (f);
+
+                      internal::hp::ensure_existence_of_dof_identities<2>
+                      ((*finite_elements)[most_dominating_fe_index],
+                       (*finite_elements)[other_fe_index],
+                       quad_dof_identities[most_dominating_fe_index][other_fe_index]);
+
+                      internal::hp::DoFIdentities &identities
+                        = *quad_dof_identities[most_dominating_fe_index][other_fe_index];
+                      for (unsigned int i=0; i<identities.size(); ++i)
+                        {
+                          const types::global_dof_index master_dof_index
+                            = quad->dof_index (identities[i].first, most_dominating_fe_index);
+                          const types::global_dof_index slave_dof_index
+                            = quad->dof_index (identities[i].second, other_fe_index);
+
+                          Assert ((new_dof_indices[master_dof_index] ==
+                                   numbers::invalid_dof_index)
+                                  ||
+                                  (new_dof_indices[slave_dof_index] ==
+                                   master_dof_index),
+                                  ExcInternalError());
+
+                          new_dof_indices[slave_dof_index] = master_dof_index;
+                        }
+                    }
+              }
+          }
+
+    // finally restore the user flags
+    const_cast<Triangulation<dim,spacedim> &>(this->get_triangulation())
+    .load_user_flags_quad(user_flags);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::set_active_fe_indices (const std::vector<unsigned int> &active_fe_indices)
+  {
+    Assert(active_fe_indices.size()==get_tria().n_active_cells(),
+           ExcDimensionMismatch(active_fe_indices.size(), get_tria().n_active_cells()));
+
+    create_active_fe_table ();
+    // we could set the values directly, since
+    // they are stored as protected data of
+    // this object, but for simplicity we use
+    // the cell-wise access. this way we also
+    // have to pass some debug-mode tests which
+    // we would have to duplicate ourselves
+    // otherwise
+    active_cell_iterator cell=begin_active(),
+                         endc=end();
+    for (unsigned int i=0; cell!=endc; ++cell, ++i)
+      cell->set_active_fe_index(active_fe_indices[i]);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::get_active_fe_indices (std::vector<unsigned int> &active_fe_indices) const
+  {
+    active_fe_indices.resize(get_tria().n_active_cells());
+
+    // we could try to extract the values directly, since
+    // they are stored as protected data of
+    // this object, but for simplicity we use
+    // the cell-wise access.
+    active_cell_iterator cell=begin_active(),
+                         endc=end();
+    for (unsigned int i=0; cell!=endc; ++cell, ++i)
+      active_fe_indices[i]=cell->active_fe_index();
+  }
+
+
+
+  template<int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::distribute_dofs (const hp::FECollection<dim,spacedim> &ff)
+  {
+    Assert (tria->n_levels() > 0, ExcInvalidTriangulation());
+
+    finite_elements = &ff;
+
+    // This call ensures that the
+    // active_fe_indices vectors are
+    // initialized correctly.
+    create_active_fe_table ();
+
+    // up front make sure that the fe
+    // collection is large enough to
+    // cover all fe indices presently
+    // in use on the mesh
+    for (active_cell_iterator cell = begin_active(); cell != end(); ++cell)
+      Assert (cell->active_fe_index() < finite_elements->size(),
+              ExcInvalidFEIndex (cell->active_fe_index(),
+                                 finite_elements->size()));
+
+
+    // then allocate space for all
+    // the other tables
+    dealii::internal::hp::DoFHandler::Implementation::reserve_space (*this);
+
+    // Clear user flags because we will
+    // need them. But first we save
+    // them and make sure that we
+    // restore them later such that at
+    // the end of this function the
+    // Triangulation will be in the
+    // same state as it was at the
+    // beginning of this function.
+    std::vector<bool> user_flags;
+    tria->save_user_flags(user_flags);
+    const_cast<Triangulation<dim,spacedim> &>(*tria).clear_user_flags ();
+
+
+    /////////////////////////////////
+
+    // Step 1: distribute DoFs on all
+    // active entities
+    {
+      types::global_dof_index next_free_dof = 0;
+      active_cell_iterator cell = begin_active(),
+                           endc = end();
+
+      for (; cell != endc; ++cell)
+        next_free_dof
+          = dealii::internal::hp::DoFHandler::Implementation::distribute_dofs_on_cell<spacedim> (cell,
+              next_free_dof);
+
+      number_cache.n_global_dofs = next_free_dof;
+    }
+
+
+    /////////////////////////////////
+
+    // Step 2: identify certain dofs
+    // if the finite element tells us
+    // that they should have the same
+    // value. only pertinent for
+    // faces and other
+    // lower-dimensional objects
+    // where elements come together
+    std::vector<types::global_dof_index>
+    constrained_indices (number_cache.n_global_dofs, numbers::invalid_dof_index);
+    compute_vertex_dof_identities (constrained_indices);
+    compute_line_dof_identities (constrained_indices);
+    compute_quad_dof_identities (constrained_indices);
+
+    // loop over all dofs and assign
+    // new numbers to those which are
+    // not constrained
+    std::vector<types::global_dof_index>
+    new_dof_indices (number_cache.n_global_dofs, numbers::invalid_dof_index);
+    types::global_dof_index next_free_dof = 0;
+    for (types::global_dof_index i=0; i<number_cache.n_global_dofs; ++i)
+      if (constrained_indices[i] == numbers::invalid_dof_index)
+        {
+          new_dof_indices[i] = next_free_dof;
+          ++next_free_dof;
+        }
+
+    // then loop over all those that
+    // are constrained and record the
+    // new dof number for those:
+    for (types::global_dof_index i=0; i<number_cache.n_global_dofs; ++i)
+      if (constrained_indices[i] != numbers::invalid_dof_index)
+        {
+          Assert (new_dof_indices[constrained_indices[i]] !=
+                  numbers::invalid_dof_index,
+                  ExcInternalError());
+
+          new_dof_indices[i] = new_dof_indices[constrained_indices[i]];
+        }
+
+    for (types::global_dof_index i=0; i<number_cache.n_global_dofs; ++i)
+      {
+        Assert (new_dof_indices[i] != numbers::invalid_dof_index,
+                ExcInternalError());
+        Assert (new_dof_indices[i] < next_free_dof,
+                ExcInternalError());
+      }
+
+    // finally, do the renumbering
+    // and set the number of actually
+    // used dof indices
+    renumber_dofs_internal (new_dof_indices, dealii::internal::int2type<dim>());
+
+    // now set the elements of the
+    // number cache appropriately
+    number_cache.n_global_dofs        = next_free_dof;
+    number_cache.n_locally_owned_dofs = number_cache.n_global_dofs;
+
+    if (dynamic_cast<const parallel::shared::Triangulation< dim, spacedim >*>
+        (&this->get_triangulation())
+        == 0)
+      {
+        number_cache.locally_owned_dofs
+          = IndexSet (number_cache.n_global_dofs);
+        number_cache.locally_owned_dofs.add_range (0,
+                                                   number_cache.n_global_dofs);
+        Assert (number_cache.n_global_dofs < std::numeric_limits<unsigned int>::max (),
+                ExcMessage ("Global number of degrees of freedom is too large."));
+        number_cache.n_locally_owned_dofs_per_processor
+          = std::vector<types::global_dof_index> (1,
+                                                  (types::global_dof_index) number_cache.n_global_dofs);
+      }
+    else
+      {
+        AssertThrow(false, ExcNotImplemented() );
+        //number_cache.locally_owned_dofs = dealii::DoFTools::locally_owned_dofs_with_subdomain(this,tria->locally_owned_subdomain() );
+        //TODO: update n_locally_owned_dofs_per_processor as well
+      }
+
+    number_cache.locally_owned_dofs_per_processor
+      = std::vector<IndexSet> (1,
+                               number_cache.locally_owned_dofs);
+
+    // update the cache used for cell dof indices and compress the data on the levels. do
+    // the latter on separate tasks to gain parallelism, starting with the highest
+    // level (there is most to do there, so start it first)
+    for (active_cell_iterator cell = begin_active();
+         cell != end(); ++cell)
+      cell->update_cell_dof_indices_cache ();
+
+    {
+      Threads::TaskGroup<> tg;
+      for (int level=levels.size()-1; level>=0; --level)
+        tg += Threads::new_task (&dealii::internal::hp::DoFLevel::compress_data<dim,spacedim>,
+                                 *levels[level], *finite_elements);
+      tg.join_all ();
+    }
+
+    // finally restore the user flags
+    const_cast<Triangulation<dim,spacedim> &>(*tria).load_user_flags(user_flags);
+  }
+
+
+
+  template<int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::clear ()
+  {
+    // release lock to old fe
+    finite_elements = 0;
+
+    // release memory
+    clear_space ();
+  }
+
+
+
+  template<int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::renumber_dofs (const std::vector<types::global_dof_index> &new_numbers)
+  {
+    Assert (new_numbers.size() == n_dofs(), ExcRenumberingIncomplete());
+#ifdef DEBUG
+    // assert that the new indices are
+    // consecutively numbered
+    if (true)
+      {
+        std::vector<types::global_dof_index> tmp(new_numbers);
+        std::sort (tmp.begin(), tmp.end());
+        std::vector<types::global_dof_index>::const_iterator p = tmp.begin();
+        types::global_dof_index                              i = 0;
+        for (; p!=tmp.end(); ++p, ++i)
+          Assert (*p == i, ExcNewNumbersNotConsecutive(i));
+      }
+#endif
+
+    // uncompress the internal storage scheme of dofs on cells
+    // so that we can access dofs in turns. uncompress in parallel, starting
+    // with the most expensive levels (the highest ones)
+    {
+      Threads::TaskGroup<> tg;
+      for (int level=levels.size()-1; level>=0; --level)
+        tg += Threads::new_task (&dealii::internal::hp::DoFLevel::uncompress_data<dim,spacedim>,
+                                 *levels[level], *finite_elements);
+      tg.join_all ();
+    }
+
+    // do the renumbering
+    renumber_dofs_internal (new_numbers, dealii::internal::int2type<dim>());
+
+    // update the cache used for cell dof indices
+    for (active_cell_iterator cell = begin_active();
+         cell != end(); ++cell)
+      cell->update_cell_dof_indices_cache ();
+
+    // now re-compress the dof indices
+    {
+      Threads::TaskGroup<> tg;
+      for (int level=levels.size()-1; level>=0; --level)
+        tg += Threads::new_task (&dealii::internal::hp::DoFLevel::compress_data<dim,spacedim>,
+                                 *levels[level], *finite_elements);
+      tg.join_all ();
+    }
+  }
+
+
+
+  template<int dim, int spacedim>
+  void
+  DoFHandler<dim,spacedim>::
+  renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                          dealii::internal::int2type<0>)
+  {
+    Assert (new_numbers.size() == n_dofs(), ExcRenumberingIncomplete());
+
+    for (unsigned int vertex_index=0; vertex_index<get_tria().n_vertices();
+         ++vertex_index)
+      {
+        const unsigned int n_active_fe_indices
+          = dealii::internal::DoFAccessor::Implementation::
+            n_active_vertex_fe_indices (*this, vertex_index);
+
+        for (unsigned int f=0; f<n_active_fe_indices; ++f)
+          {
+            const unsigned int fe_index
+              = dealii::internal::DoFAccessor::Implementation::
+                nth_active_vertex_fe_index (*this, vertex_index, f);
+
+            for (unsigned int d=0; d<(*finite_elements)[fe_index].dofs_per_vertex; ++d)
+              {
+                const types::global_dof_index vertex_dof_index
+                  = dealii::internal::DoFAccessor::Implementation::
+                    get_vertex_dof_index(*this,
+                                         vertex_index,
+                                         fe_index,
+                                         d);
+                dealii::internal::DoFAccessor::Implementation::
+                set_vertex_dof_index (*this,
+                                      vertex_index,
+                                      fe_index,
+                                      d,
+                                      new_numbers[vertex_dof_index]);
+              }
+          }
+      }
+  }
+
+
+
+  template<int dim, int spacedim>
+  void
+  DoFHandler<dim,spacedim>::
+  renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                          dealii::internal::int2type<1>)
+  {
+    Assert (new_numbers.size() == n_dofs(), ExcRenumberingIncomplete());
+
+    renumber_dofs_internal (new_numbers, internal::int2type<0>());
+
+    // save user flags on lines so we
+    // can use them to mark lines
+    // we've already treated
+    std::vector<bool> saved_line_user_flags;
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .save_user_flags_line (saved_line_user_flags);
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .clear_user_flags_line ();
+
+    for (active_cell_iterator cell = begin_active(); cell!=end(); ++cell)
+      for (unsigned int l=0; l<GeometryInfo<dim>::lines_per_cell; ++l)
+        if (cell->line(l)->user_flag_set() == false)
+          {
+            const line_iterator line = cell->line(l);
+            line->set_user_flag();
+
+            const unsigned int n_active_fe_indices
+              = line->n_active_fe_indices ();
+
+            for (unsigned int f=0; f<n_active_fe_indices; ++f)
+              {
+                const unsigned int fe_index
+                  = line->nth_active_fe_index (f);
+
+                for (unsigned int d=0; d<(*finite_elements)[fe_index].dofs_per_line; ++d)
+                  line->set_dof_index (d,
+                                       new_numbers[line->dof_index(d,fe_index)],
+                                       fe_index);
+              }
+          }
+
+    // at the end, restore the user
+    // flags for the lines
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .load_user_flags_line (saved_line_user_flags);
+  }
+
+
+
+//TODO: Merge the following three functions -- they are identical
+  template<>
+  void
+  DoFHandler<2,2>::
+  renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                          dealii::internal::int2type<2>)
+  {
+    const unsigned int dim = 2;
+    const unsigned int spacedim = 2;
+
+    Assert (new_numbers.size() == n_dofs(), ExcRenumberingIncomplete());
+
+    renumber_dofs_internal (new_numbers, internal::int2type<1>());
+
+    // save user flags on quads so we
+    // can use them to mark quads
+    // we've already treated
+    std::vector<bool> saved_quad_user_flags;
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .save_user_flags_quad (saved_quad_user_flags);
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .clear_user_flags_quad ();
+
+    for (active_cell_iterator cell = begin_active(); cell!=end(); ++cell)
+      for (unsigned int q=0; q<GeometryInfo<dim>::quads_per_cell; ++q)
+        if (cell->quad(q)->user_flag_set() == false)
+          {
+            const quad_iterator quad = cell->quad(q);
+            quad->set_user_flag();
+
+            const unsigned int n_active_fe_indices
+              = quad->n_active_fe_indices ();
+
+            for (unsigned int f=0; f<n_active_fe_indices; ++f)
+              {
+                const unsigned int fe_index
+                  = quad->nth_active_fe_index (f);
+
+                for (unsigned int d=0; d<(*finite_elements)[fe_index].dofs_per_quad; ++d)
+                  quad->set_dof_index (d,
+                                       new_numbers[quad->dof_index(d,fe_index)],
+                                       fe_index);
+              }
+          }
+
+    // at the end, restore the user
+    // flags for the quads
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .load_user_flags_quad (saved_quad_user_flags);
+  }
+
+
+
+  template<>
+  void
+  DoFHandler<2,3>::
+  renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                          dealii::internal::int2type<2>)
+  {
+    const unsigned int dim = 2;
+    const unsigned int spacedim = 3;
+
+    Assert (new_numbers.size() == n_dofs(), ExcRenumberingIncomplete());
+
+    renumber_dofs_internal (new_numbers, internal::int2type<1>());
+
+    // save user flags on quads so we
+    // can use them to mark quads
+    // we've already treated
+    std::vector<bool> saved_quad_user_flags;
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .save_user_flags_quad (saved_quad_user_flags);
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .clear_user_flags_quad ();
+
+    for (active_cell_iterator cell = begin_active(); cell!=end(); ++cell)
+      for (unsigned int q=0; q<GeometryInfo<dim>::quads_per_cell; ++q)
+        if (cell->quad(q)->user_flag_set() == false)
+          {
+            const quad_iterator quad = cell->quad(q);
+            quad->set_user_flag();
+
+            const unsigned int n_active_fe_indices
+              = quad->n_active_fe_indices ();
+
+            for (unsigned int f=0; f<n_active_fe_indices; ++f)
+              {
+                const unsigned int fe_index
+                  = quad->nth_active_fe_index (f);
+
+                for (unsigned int d=0; d<(*finite_elements)[fe_index].dofs_per_quad; ++d)
+                  quad->set_dof_index (d,
+                                       new_numbers[quad->dof_index(d,fe_index)],
+                                       fe_index);
+              }
+          }
+
+    // at the end, restore the user
+    // flags for the quads
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .load_user_flags_quad (saved_quad_user_flags);
+  }
+
+
+  template<>
+  void
+  DoFHandler<3,3>::
+  renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                          dealii::internal::int2type<2>)
+  {
+    const unsigned int dim = 3;
+    const unsigned int spacedim = 3;
+
+    Assert (new_numbers.size() == n_dofs(), ExcRenumberingIncomplete());
+
+    renumber_dofs_internal (new_numbers, internal::int2type<1>());
+
+    // save user flags on quads so we
+    // can use them to mark quads
+    // we've already treated
+    std::vector<bool> saved_quad_user_flags;
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .save_user_flags_quad (saved_quad_user_flags);
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .clear_user_flags_quad ();
+
+    for (active_cell_iterator cell = begin_active(); cell!=end(); ++cell)
+      for (unsigned int q=0; q<GeometryInfo<dim>::quads_per_cell; ++q)
+        if (cell->quad(q)->user_flag_set() == false)
+          {
+            const quad_iterator quad = cell->quad(q);
+            quad->set_user_flag();
+
+            const unsigned int n_active_fe_indices
+              = quad->n_active_fe_indices ();
+
+            for (unsigned int f=0; f<n_active_fe_indices; ++f)
+              {
+                const unsigned int fe_index
+                  = quad->nth_active_fe_index (f);
+
+                for (unsigned int d=0; d<(*finite_elements)[fe_index].dofs_per_quad; ++d)
+                  quad->set_dof_index (d,
+                                       new_numbers[quad->dof_index(d,fe_index)],
+                                       fe_index);
+              }
+          }
+
+    // at the end, restore the user
+    // flags for the quads
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .load_user_flags_quad (saved_quad_user_flags);
+  }
+
+
+  template<>
+  void
+  DoFHandler<3,3>::
+  renumber_dofs_internal (const std::vector<types::global_dof_index> &new_numbers,
+                          dealii::internal::int2type<3>)
+  {
+    const unsigned int dim = 3;
+    const unsigned int spacedim = 3;
+
+    Assert (new_numbers.size() == n_dofs(), ExcRenumberingIncomplete());
+
+    renumber_dofs_internal (new_numbers, internal::int2type<2>());
+
+    // save user flags on hexes so we
+    // can use them to mark hexes
+    // we've already treated
+    std::vector<bool> saved_hex_user_flags;
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .save_user_flags_hex (saved_hex_user_flags);
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .clear_user_flags_hex ();
+
+    // we're in 3d, so hexes are also
+    // cells. stick with the same
+    // kind of notation as in the
+    // previous functions, though
+    for (active_cell_iterator cell = begin_active(); cell!=end(); ++cell)
+      if (cell->user_flag_set() == false)
+        {
+          const hex_iterator hex = cell;
+          hex->set_user_flag();
+
+          const unsigned int n_active_fe_indices
+            = hex->n_active_fe_indices ();
+
+          for (unsigned int f=0; f<n_active_fe_indices; ++f)
+            {
+              const unsigned int fe_index
+                = hex->nth_active_fe_index (f);
+
+              for (unsigned int d=0; d<(*finite_elements)[fe_index].dofs_per_hex; ++d)
+                hex->set_dof_index (d,
+                                    new_numbers[hex->dof_index(d,fe_index)],
+                                    fe_index);
+            }
+        }
+
+    // at the end, restore the user
+    // flags for the hexs
+    const_cast<dealii::Triangulation<dim,spacedim>&>(*tria)
+    .load_user_flags_hex (saved_hex_user_flags);
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  DoFHandler<dim, spacedim>::max_couplings_between_dofs () const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+    return dealii::internal::hp::DoFHandler::Implementation::max_couplings_between_dofs (*this);
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  DoFHandler<dim,spacedim>::max_couplings_between_boundary_dofs () const
+  {
+    Assert (finite_elements != 0, ExcNoFESelected());
+
+    switch (dim)
+      {
+      case 1:
+        return finite_elements->max_dofs_per_vertex();
+      case 2:
+        return (3*finite_elements->max_dofs_per_vertex()
+                +
+                2*finite_elements->max_dofs_per_line());
+      case 3:
+        // we need to take refinement of
+        // one boundary face into consideration
+        // here; in fact, this function returns
+        // what #max_coupling_between_dofs<2>
+        // returns
+        //
+        // we assume here, that only four faces
+        // meet at the boundary; this assumption
+        // is not justified and needs to be
+        // fixed some time. fortunately, omitting
+        // it for now does no harm since the
+        // matrix will cry foul if its requirements
+        // are not satisfied
+        return (19*finite_elements->max_dofs_per_vertex() +
+                28*finite_elements->max_dofs_per_line() +
+                8*finite_elements->max_dofs_per_quad());
+      default:
+        Assert (false, ExcNotImplemented());
+        return 0;
+      }
+  }
+
+
+
+  template<int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::create_active_fe_table ()
+  {
+    // Create sufficiently many
+    // hp::DoFLevels.
+    while (levels.size () < tria->n_levels ())
+      levels.push_back (new dealii::internal::hp::DoFLevel);
+
+    // then make sure that on each
+    // level we have the appropriate
+    // size of active_fe_indices;
+    // preset them to zero, i.e. the
+    // default FE
+    for (unsigned int level=0; level<levels.size(); ++level)
+      {
+        if (levels[level]->active_fe_indices.size () == 0)
+          levels[level]->active_fe_indices.resize (tria->n_raw_cells(level),
+                                                   0);
+        else
+          {
+            // Either the
+            // active_fe_indices have
+            // size zero because they
+            // were just created, or
+            // the correct
+            // size. Other sizes
+            // indicate that
+            // something went wrong.
+            Assert (levels[level]->active_fe_indices.size () ==
+                    tria->n_raw_cells(level),
+                    ExcInternalError ());
+          }
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::pre_refinement_action ()
+  {
+    create_active_fe_table ();
+
+    // Remember if the cells already have
+    // children. That will make the transfer
+    // of the active_fe_index to the finer
+    // levels easier.
+    Assert (has_children.size () == 0, ExcInternalError ());
+    for (unsigned int i=0; i<levels.size(); ++i)
+      {
+        const unsigned int cells_on_level = tria->n_raw_cells(i);
+        std::vector<bool> *has_children_level =
+          new std::vector<bool> (cells_on_level);
+
+        // Check for each cell, if it has children. in 1d,
+        // we don't store refinement cases, so use the 'children'
+        // vector instead
+        if (dim == 1)
+          std::transform (tria->levels[i]->cells.children.begin (),
+                          tria->levels[i]->cells.children.end (),
+                          has_children_level->begin (),
+                          std::bind2nd (std::not_equal_to<int>(), -1));
+        else
+          std::transform (tria->levels[i]->cells.refinement_cases.begin (),
+                          tria->levels[i]->cells.refinement_cases.end (),
+                          has_children_level->begin (),
+                          std::bind2nd (std::not_equal_to<unsigned char>(),
+                                        static_cast<unsigned char>(RefinementCase<dim>::no_refinement)));
+
+        has_children.push_back (has_children_level);
+      }
+  }
+
+
+
+  template<int dim, int spacedim>
+  void
+  DoFHandler<dim,spacedim>::post_refinement_action ()
+  {
+    Assert (has_children.size () == levels.size (), ExcInternalError ());
+
+    // Normally only one level is added, but if this Triangulation
+    // is created by copy_triangulation, it can be more than one level.
+    while (levels.size () < tria->n_levels ())
+      levels.push_back (new dealii::internal::hp::DoFLevel);
+
+    // Coarsening can lead to the loss
+    // of levels. Hence remove them.
+    while (levels.size () > tria->n_levels ())
+      {
+        delete levels[levels.size ()-1];
+        levels.pop_back ();
+      }
+
+    Assert(levels.size () == tria->n_levels (), ExcInternalError());
+
+    // Resize active_fe_indices
+    // vectors. use zero indicator to
+    // extend
+    for (unsigned int i=0; i<levels.size(); ++i)
+      levels[i]->active_fe_indices.resize (tria->n_raw_cells(i), 0);
+
+    // if a finite element collection
+    // has already been set, then
+    // actually try to set
+    // active_fe_indices for child
+    // cells of refined cells to the
+    // active_fe_index of the mother
+    // cell. if no finite element
+    // collection has been assigned
+    // yet, then all indicators are
+    // zero anyway, and there is no
+    // point trying to set anything
+    // (besides, we would trip over
+    // an assertion in
+    // set_active_fe_index)
+    if (finite_elements != 0)
+      {
+        cell_iterator cell = begin(),
+                      endc = end ();
+        for (; cell != endc; ++cell)
+          {
+            // Look if the cell got children during refinement by
+            // checking whether it has children now but didn't have
+            // children before refinement (the has_children array is
+            // set in pre-refinement action)
+            //
+            // Note: Although one level is added to
+            // the DoFHandler levels, when the
+            // triangulation got one, for the buffer
+            // has_children this new level is not
+            // required, because the cells on the
+            // finest level never have children. Hence
+            // cell->has_children () will always return
+            // false on that level, which would cause
+            // shortcut evaluation of the following
+            // expression. Thus an index error in
+            // has_children should never occur.
+            if (cell->has_children () &&
+                !(*has_children [cell->level ()])[cell->index ()])
+              {
+                // Set active_fe_index in children to the same value
+                // as in the parent cell. we can't access the
+                // active_fe_index in the parent cell any more through
+                // cell->active_fe_index() since that function is not
+                // allowed for inactive cells, but we can access this
+                // information from the DoFLevels directly
+                for (unsigned int i = 0; i < cell->n_children(); ++i)
+                  cell->child (i)->set_active_fe_index
+                  (levels[cell->level()]->active_fe_index (cell->index()));
+              }
+          }
+      }
+
+    // Free buffer objects
+    std::vector<std::vector<bool> *>::iterator children_level;
+    for (children_level = has_children.begin ();
+         children_level != has_children.end ();
+         ++children_level)
+      delete (*children_level);
+    has_children.clear ();
+  }
+
+
+  template <int dim, int spacedim>
+  template <int structdim>
+  types::global_dof_index
+  DoFHandler<dim,spacedim>::get_dof_index (const unsigned int,
+                                           const unsigned int,
+                                           const unsigned int,
+                                           const unsigned int) const
+  {
+    Assert (false, ExcNotImplemented());
+    return numbers::invalid_dof_index;
+  }
+
+
+  template <int dim, int spacedim>
+  template <int structdim>
+  void
+  DoFHandler<dim,spacedim>::set_dof_index (const unsigned int,
+                                           const unsigned int,
+                                           const unsigned int,
+                                           const unsigned int,
+                                           const types::global_dof_index) const
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+  template<int dim, int spacedim>
+  void DoFHandler<dim,spacedim>::clear_space ()
+  {
+    for (unsigned int i=0; i<levels.size(); ++i)
+      delete levels[i];
+    levels.resize (0);
+    delete faces;
+    faces = NULL;
+
+    {
+      std::vector<types::global_dof_index> tmp;
+      std::swap (vertex_dofs, tmp);
+    }
+
+    {
+      std::vector<types::global_dof_index> tmp;
+      std::swap (vertex_dofs_offsets, tmp);
+    }
+  }
+}
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "dof_handler.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/hp/dof_handler.inst.in b/source/hp/dof_handler.inst.in
new file mode 100644
index 0000000..a872a73
--- /dev/null
+++ b/source/hp/dof_handler.inst.in
@@ -0,0 +1,89 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  namespace hp
+  \{
+    template class DoFHandler<deal_II_dimension>;
+
+#if deal_II_dimension != 3
+    template class DoFHandler<deal_II_dimension, deal_II_dimension+1>;
+    
+    template
+    types::global_dof_index
+    DoFHandler<deal_II_dimension, deal_II_dimension+1>::get_dof_index<1> (const unsigned int, const unsigned int, const unsigned int, const unsigned int) const;
+    
+    template
+    void
+    DoFHandler<deal_II_dimension, deal_II_dimension+1>::set_dof_index<1> (const unsigned int, const unsigned int, const unsigned int, const unsigned int, const types::global_dof_index) const;
+    
+    #if deal_II_dimension >= 2
+    template
+    types::global_dof_index
+    DoFHandler<deal_II_dimension, deal_II_dimension+1>::get_dof_index<2> (const unsigned int, const unsigned int, const unsigned int, const unsigned int) const;
+    
+    template
+    void
+    DoFHandler<deal_II_dimension, deal_II_dimension+1>::set_dof_index<2> (const unsigned int, const unsigned int, const unsigned int, const unsigned int, const types::global_dof_index) const;
+    #endif
+#endif
+
+#if deal_II_dimension == 3
+   template class DoFHandler<1, 3>;
+
+template
+types::global_dof_index
+DoFHandler<1,3>::get_dof_index<1> (const unsigned int, const unsigned int, const unsigned int, const unsigned int) const;
+
+template
+void
+DoFHandler<1,3>::set_dof_index<1> (const unsigned int, const unsigned int, const unsigned int, const unsigned int, const types::global_dof_index) const;
+#endif
+
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension>::get_dof_index<1> (const unsigned int, const unsigned int, const unsigned int, const unsigned int) const;
+
+#if deal_II_dimension >= 2
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension>::get_dof_index<2> (const unsigned int, const unsigned int, const unsigned int, const unsigned int) const;
+
+#if deal_II_dimension >= 3
+template
+types::global_dof_index
+DoFHandler<deal_II_dimension>::get_dof_index<3> (const unsigned int, const unsigned int, const unsigned int, const unsigned int) const;
+#endif
+#endif
+
+template
+void
+DoFHandler<deal_II_dimension>::set_dof_index<1> (const unsigned int, const unsigned int, const unsigned int, const unsigned int, const types::global_dof_index) const;
+
+#if deal_II_dimension >= 2
+template
+void
+DoFHandler<deal_II_dimension>::set_dof_index<2> (const unsigned int, const unsigned int, const unsigned int, const unsigned int, const types::global_dof_index) const;
+
+#if deal_II_dimension >= 3
+template
+void
+DoFHandler<deal_II_dimension>::set_dof_index<3> (const unsigned int, const unsigned int, const unsigned int, const unsigned int, const types::global_dof_index) const;
+#endif
+#endif
+  \}
+}
diff --git a/source/hp/dof_level.cc b/source/hp/dof_level.cc
new file mode 100644
index 0000000..595d16f
--- /dev/null
+++ b/source/hp/dof_level.cc
@@ -0,0 +1,244 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/hp/dof_level.h>
+#include <deal.II/hp/fe_collection.h>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+  namespace hp
+  {
+    template <int dim, int spacedim>
+    void
+    DoFLevel::compress_data (const dealii::hp::FECollection<dim,spacedim> &fe_collection)
+    {
+      (void)fe_collection;
+      return;
+
+      if (dof_offsets.size() == 0 || dof_indices.size()==0)
+        return;
+
+      // in a first run through, count how many new slots we need in the
+      // dof_indices array after compression. note that the 'cell'
+      // counter is incremented inside the loop
+      unsigned int new_size = 0;
+      for (unsigned int cell=0; cell<dof_offsets.size(); )
+        // see if this cell is active on the current level
+        if (dof_offsets[cell] != (offset_type)(-1))
+          {
+            // find the next cell active on this level
+            unsigned int next_cell = cell+1;
+            while ((next_cell<dof_offsets.size()) &&
+                   (dof_offsets[next_cell] == (offset_type)(-1)))
+              ++next_cell;
+
+            const unsigned int next_offset = (next_cell < dof_offsets.size() ?
+                                              dof_offsets[next_cell] :
+                                              dof_indices.size());
+
+            Assert (next_offset-dof_offsets[cell] == fe_collection[active_fe_indices[cell]].template n_dofs_per_object<dim>(),
+                    ExcInternalError());
+
+            // see if the range of dofs for this cell can be compressed and if so
+            // how many slots we have to store for them
+            if (next_offset > dof_offsets[cell])
+              {
+                bool compressible = true;
+                for (unsigned int j=dof_offsets[cell]+1; j<next_offset; ++j)
+                  if (dof_indices[j] != dof_indices[j-1]+1)
+                    {
+                      compressible = false;
+                      break;
+                    }
+                if (compressible == true)
+                  new_size += 1;
+                else
+                  new_size += (next_offset-dof_offsets[cell]);
+              }
+
+            // then move on to the next cell
+            cell = next_cell;
+          }
+        else
+          ++cell;
+
+      // now allocate the new array and copy into it whatever we need
+      std::vector<types::global_dof_index> new_dof_indices;
+      new_dof_indices.reserve(new_size);
+      for (unsigned int cell=0; cell<dof_offsets.size(); )
+        // see if this cell is active on the current level
+        if (dof_offsets[cell] != (offset_type)(-1))
+          {
+            // find the next cell active on this level
+            unsigned int next_cell = cell+1;
+            while ((next_cell<dof_offsets.size()) &&
+                   (dof_offsets[next_cell] == (offset_type)(-1)))
+              ++next_cell;
+
+            const unsigned int next_offset = (next_cell < dof_offsets.size() ?
+                                              dof_offsets[next_cell] :
+                                              dof_indices.size());
+
+            Assert (next_offset-dof_offsets[cell] == fe_collection[active_fe_indices[cell]].template n_dofs_per_object<dim>(),
+                    ExcInternalError());
+
+            // see if the range of dofs for this cell can be compressed and if so
+            // how many slots we have to store for them
+            if (next_offset > dof_offsets[cell])
+              {
+                bool compressible = true;
+                for (unsigned int j=dof_offsets[cell]+1; j<next_offset; ++j)
+                  if (dof_indices[j] != dof_indices[j-1]+1)
+                    {
+                      compressible = false;
+                      break;
+                    }
+
+                // if this cell is compressible, then copy the first index and mark this
+                // in the dof_offsets array
+                if (compressible == true)
+                  {
+                    new_dof_indices.push_back (dof_indices[dof_offsets[cell]]);
+
+                    // make sure that the current active_fe_index indicates
+                    // that this entry hasn't been compressed yet
+                    Assert ((signed_active_fe_index_type)active_fe_indices[cell] >= 0, ExcInternalError());
+
+                    // then mark the compression
+                    active_fe_indices[cell] = (active_fe_index_type)~(signed_active_fe_index_type)active_fe_indices[cell];
+                  }
+                else
+                  for (unsigned int i=dof_offsets[cell]; i<next_offset; ++i)
+                    new_dof_indices.push_back (dof_indices[i]);
+              }
+
+            // then move on to the next cell
+            cell = next_cell;
+          }
+        else
+          ++cell;
+
+      // finally swap old and new content
+      Assert (new_dof_indices.size() == new_size, ExcInternalError());
+      dof_indices.swap (new_dof_indices);
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    DoFLevel::uncompress_data(const dealii::hp::FECollection<dim,spacedim> &fe_collection)
+    {
+      return;
+
+      if (dof_offsets.size() == 0 || dof_indices.size()==0)
+        return;
+
+      // in a first run through, count how many new slots we need in the
+      // dof_indices array after uncompression.
+      unsigned int new_size = 0;
+      for (unsigned int cell=0; cell<dof_offsets.size(); ++cell)
+        if (dof_offsets[cell] != (offset_type)(-1))
+          {
+            // we know now that the slot for this cell is used. extract the
+            // active_fe_index for it and see how many entries we need
+            new_size += fe_collection[active_fe_index(cell)].template n_dofs_per_object<dim>();
+          }
+
+      // now allocate the new array and copy into it whatever we need
+      std::vector<types::global_dof_index> new_dof_indices;
+      new_dof_indices.reserve(new_size);
+      std::vector<offset_type> new_dof_offsets (dof_offsets.size(), (offset_type)(-1));
+      for (unsigned int cell=0; cell<dof_offsets.size(); )
+        // see if this cell is active on the current level
+        if (dof_offsets[cell] != (offset_type)(-1))
+          {
+            // find the next cell active on this level
+            unsigned int next_cell = cell+1;
+            while ((next_cell<dof_offsets.size()) &&
+                   (dof_offsets[next_cell] == (offset_type)(-1)))
+              ++next_cell;
+
+            const unsigned int next_offset = (next_cell < dof_offsets.size() ?
+                                              dof_offsets[next_cell] :
+                                              dof_indices.size());
+
+            // set offset for this cell
+            new_dof_offsets[cell] = new_dof_indices.size();
+
+            // see if we need to uncompress this set of dofs
+            if ((signed_active_fe_index_type)active_fe_indices[cell]>=0)
+              {
+                // apparently not. simply copy them
+                Assert (next_offset-dof_offsets[cell] == fe_collection[active_fe_indices[cell]].template n_dofs_per_object<dim>(),
+                        ExcInternalError());
+                for (unsigned int i=dof_offsets[cell]; i<next_offset; ++i)
+                  new_dof_indices.push_back (dof_indices[i]);
+              }
+            else
+              {
+                // apparently so. uncompress
+                Assert (next_offset-dof_offsets[cell] == 1,
+                        ExcInternalError());
+                for (unsigned int i=0; i<fe_collection[active_fe_indices[cell]].template n_dofs_per_object<dim>(); ++i)
+                  new_dof_indices.push_back (dof_indices[dof_offsets[cell]]+i);
+              }
+
+            // then move on to the next cell
+            cell = next_cell;
+          }
+        else
+          ++cell;
+
+      // verify correct size, then swap arrays
+      Assert (new_dof_indices.size() == new_size, ExcInternalError());
+      dof_indices.swap (new_dof_indices);
+      dof_offsets.swap (new_dof_offsets);
+    }
+
+
+    std::size_t
+    DoFLevel::memory_consumption () const
+    {
+      return (MemoryConsumption::memory_consumption (active_fe_indices) +
+              MemoryConsumption::memory_consumption (dof_indices) +
+              MemoryConsumption::memory_consumption (dof_offsets) +
+              MemoryConsumption::memory_consumption (cell_cache_offsets) +
+              MemoryConsumption::memory_consumption(cell_dof_indices_cache));
+    }
+
+
+    // explicit instantiations
+    template void DoFLevel::compress_data(const dealii::hp::FECollection<1,1> &);
+    template void DoFLevel::compress_data(const dealii::hp::FECollection<1,2> &);
+    template void DoFLevel::compress_data(const dealii::hp::FECollection<1,3> &);
+    template void DoFLevel::compress_data(const dealii::hp::FECollection<2,2> &);
+    template void DoFLevel::compress_data(const dealii::hp::FECollection<2,3> &);
+    template void DoFLevel::compress_data(const dealii::hp::FECollection<3,3> &);
+
+    template void DoFLevel::uncompress_data(const dealii::hp::FECollection<1,1> &);
+    template void DoFLevel::uncompress_data(const dealii::hp::FECollection<1,2> &);
+    template void DoFLevel::uncompress_data(const dealii::hp::FECollection<1,3> &);
+    template void DoFLevel::uncompress_data(const dealii::hp::FECollection<2,2> &);
+    template void DoFLevel::uncompress_data(const dealii::hp::FECollection<2,3> &);
+    template void DoFLevel::uncompress_data(const dealii::hp::FECollection<3,3> &);
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/hp/fe_collection.cc b/source/hp/fe_collection.cc
new file mode 100644
index 0000000..5d3ba28
--- /dev/null
+++ b/source/hp/fe_collection.cc
@@ -0,0 +1,420 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/hp/fe_collection.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace hp
+{
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::find_least_face_dominating_fe (const std::set<unsigned int> &fes) const
+  {
+    // If the set of elements to be dominated contains only a single element X,
+    // then by definition the dominating set contains this single element X
+    // (because each element can dominate itself). There may also be others,
+    // say Y1...YN. Next you have to find one or more elements in the dominating
+    // set {X,Y1...YN} that is the weakest. Well, you can't find one that is
+    // weaker than X because if it were, it would not dominate X. In other words,
+    // X is guaranteed to be in the subset of {X,Y1...YN} of weakest dominating
+    // elements. Since we only guarantee that the function returns one of them,
+    // we may as well return X right away.
+    if (fes.size()==1)
+      return *fes.begin();
+
+    const hp::FECollection<dim,spacedim> &fe_collection = *this;
+    std::set<unsigned int> candidate_fes;
+
+    // first loop over all FEs and check which can dominate those given in @p fes:
+    for (unsigned int cur_fe = 0; cur_fe < fe_collection.size(); cur_fe++)
+      {
+        FiniteElementDomination::Domination domination = FiniteElementDomination::no_requirements;
+        // check if cur_fe can dominate all FEs in @p fes:
+        for (std::set<unsigned int>::const_iterator it = fes.begin();
+             it!=fes.end(); ++it)
+          {
+            Assert (*it < fe_collection.size(),
+                    ExcIndexRangeType<unsigned int> (*it, 0, fe_collection.size()));
+            domination = domination &
+                         fe_collection[cur_fe].compare_for_face_domination
+                         (fe_collection[*it]);
+          }
+
+        // if we found dominating element, keep them in a set.
+        if (domination == FiniteElementDomination::this_element_dominates ||
+            domination == FiniteElementDomination::either_element_can_dominate /*covers cases like {Q2,Q3,Q1,Q1} with fes={2,3}*/)
+          candidate_fes.insert(cur_fe);
+      }
+
+    // among the ones we found, pick one that is dominated by all others and
+    // thus should represent the largest FE space.
+    if (candidate_fes.size() == 1)
+      {
+        return *candidate_fes.begin();
+      }
+    else
+      for (std::set<unsigned int>::const_iterator it = candidate_fes.begin(); it!=candidate_fes.end(); ++it)
+        {
+          FiniteElementDomination::Domination domination = FiniteElementDomination::no_requirements;
+          for (std::set<unsigned int>::const_iterator ito = candidate_fes.begin(); ito!=candidate_fes.end(); ++ito)
+            if (it != ito)
+              {
+                domination = domination &
+                             fe_collection[*it].compare_for_face_domination(fe_collection[*ito]);
+              }
+
+          if (domination == FiniteElementDomination::other_element_dominates ||
+              domination == FiniteElementDomination::either_element_can_dominate /*covers cases like candidate_fes={Q1,Q1}*/)
+            return *it;
+        }
+    // We couldn't find the FE, return invalid_unsigned_int :
+    return numbers::invalid_unsigned_int;
+  }
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::FECollection ()
+  {}
+
+
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::FECollection (const FiniteElement<dim,spacedim> &fe)
+  {
+    push_back (fe);
+  }
+
+
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::FECollection (const FiniteElement<dim,spacedim> &fe1,
+                                            const FiniteElement<dim,spacedim> &fe2)
+  {
+    push_back(fe1);
+    push_back(fe2);
+  }
+
+
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::FECollection (const FiniteElement<dim,spacedim> &fe1,
+                                            const FiniteElement<dim,spacedim> &fe2,
+                                            const FiniteElement<dim,spacedim> &fe3)
+  {
+    push_back(fe1);
+    push_back(fe2);
+    push_back(fe3);
+  }
+
+
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::FECollection (const FiniteElement<dim,spacedim> &fe1,
+                                            const FiniteElement<dim,spacedim> &fe2,
+                                            const FiniteElement<dim,spacedim> &fe3,
+                                            const FiniteElement<dim,spacedim> &fe4)
+  {
+    push_back(fe1);
+    push_back(fe2);
+    push_back(fe3);
+    push_back(fe4);
+  }
+
+
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::FECollection (const FiniteElement<dim,spacedim> &fe1,
+                                            const FiniteElement<dim,spacedim> &fe2,
+                                            const FiniteElement<dim,spacedim> &fe3,
+                                            const FiniteElement<dim,spacedim> &fe4,
+                                            const FiniteElement<dim,spacedim> &fe5)
+  {
+    push_back(fe1);
+    push_back(fe2);
+    push_back(fe3);
+    push_back(fe4);
+    push_back(fe5);
+  }
+
+
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::
+  FECollection (const std::vector<const FiniteElement<dim,spacedim>*>  &fes)
+  {
+    Assert (fes.size() > 0,
+            ExcMessage ("Need to pass at least one finite element."));
+
+    for (unsigned int i = 0; i < fes.size(); ++i)
+      push_back(*fes[i]);
+  }
+
+
+
+  template <int dim, int spacedim>
+  FECollection<dim,spacedim>::
+  FECollection (const FECollection<dim,spacedim> &fe_collection)
+    :
+    Subscriptor (),
+    // copy the array
+    // of shared
+    // pointers. nothing
+    // bad should
+    // happen -- they
+    // simply all point
+    // to the same
+    // objects, and the
+    // last one to die
+    // will delete the
+    // mappings
+    finite_elements (fe_collection.finite_elements)
+  {}
+
+
+
+  template <int dim, int spacedim>
+  void FECollection<dim,spacedim>::push_back (const FiniteElement<dim,spacedim> &new_fe)
+  {
+    // check that the new element has the right
+    // number of components. only check with
+    // the first element, since all the other
+    // elements have already passed the test
+    // against the first element
+    if (finite_elements.size() != 0)
+      Assert (new_fe.n_components() == finite_elements[0]->n_components(),
+              ExcMessage ("All elements inside a collection need to have the "
+                          "same number of vector components!"));
+
+    finite_elements
+    .push_back (std_cxx11::shared_ptr<const FiniteElement<dim,spacedim> >(new_fe.clone()));
+  }
+
+
+
+  template <int dim, int spacedim>
+  ComponentMask
+  FECollection<dim,spacedim>::
+  component_mask (const FEValuesExtractors::Scalar &scalar) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const ComponentMask mask = (*this)[0].component_mask(scalar);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].component_mask(scalar),
+              ExcInternalError());
+
+    return mask;
+  }
+
+
+  template <int dim, int spacedim>
+  ComponentMask
+  FECollection<dim,spacedim>::
+  component_mask (const FEValuesExtractors::Vector &vector) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const ComponentMask mask = (*this)[0].component_mask(vector);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].component_mask(vector),
+              ExcInternalError());
+
+    return mask;
+  }
+
+
+  template <int dim, int spacedim>
+  ComponentMask
+  FECollection<dim,spacedim>::
+  component_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const ComponentMask mask = (*this)[0].component_mask(sym_tensor);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].component_mask(sym_tensor),
+              ExcInternalError());
+
+    return mask;
+  }
+
+
+  template <int dim, int spacedim>
+  ComponentMask
+  FECollection<dim,spacedim>::
+  component_mask (const BlockMask &block_mask) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const ComponentMask mask = (*this)[0].component_mask(block_mask);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].component_mask(block_mask),
+              ExcMessage ("Not all elements of this collection agree on what "
+                          "the appropriate mask should be."));
+
+    return mask;
+  }
+
+
+  template <int dim, int spacedim>
+  BlockMask
+  FECollection<dim,spacedim>::
+  block_mask (const FEValuesExtractors::Scalar &scalar) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const BlockMask mask = (*this)[0].block_mask(scalar);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].block_mask(scalar),
+              ExcMessage ("Not all elements of this collection agree on what "
+                          "the appropriate mask should be."));
+
+    return mask;
+  }
+
+
+  template <int dim, int spacedim>
+  BlockMask
+  FECollection<dim,spacedim>::
+  block_mask (const FEValuesExtractors::Vector &vector) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const BlockMask mask = (*this)[0].block_mask(vector);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].block_mask(vector),
+              ExcMessage ("Not all elements of this collection agree on what "
+                          "the appropriate mask should be."));
+
+    return mask;
+  }
+
+
+  template <int dim, int spacedim>
+  BlockMask
+  FECollection<dim,spacedim>::
+  block_mask (const FEValuesExtractors::SymmetricTensor<2> &sym_tensor) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const BlockMask mask = (*this)[0].block_mask(sym_tensor);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].block_mask(sym_tensor),
+              ExcMessage ("Not all elements of this collection agree on what "
+                          "the appropriate mask should be."));
+
+    return mask;
+  }
+
+
+
+  template <int dim, int spacedim>
+  BlockMask
+  FECollection<dim,spacedim>::
+  block_mask (const ComponentMask &component_mask) const
+  {
+    Assert (size() > 0,
+            ExcMessage ("This collection contains no finite element."));
+
+    // get the mask from the first element of the collection
+    const BlockMask mask = (*this)[0].block_mask(component_mask);
+
+    // but then also verify that the other elements of the collection
+    // would return the same mask
+    for (unsigned int c=1; c<size(); ++c)
+      Assert (mask == (*this)[c].block_mask(component_mask),
+              ExcMessage ("Not all elements of this collection agree on what "
+                          "the appropriate mask should be."));
+
+    return mask;
+  }
+
+
+
+  template <int dim, int spacedim>
+  unsigned int
+  FECollection<dim,spacedim>::n_blocks () const
+  {
+    Assert (finite_elements.size () > 0, ExcNoFiniteElements());
+
+    const unsigned int nb = finite_elements[0]->n_blocks ();
+    for (unsigned int i=1; i<finite_elements.size(); ++i)
+      Assert (finite_elements[i]->n_blocks() == nb,
+              ExcMessage ("Not all finite elements in this collection have "
+                          "the same number of components."));
+
+    return nb;
+  }
+
+
+
+  template <int dim, int spacedim>
+  std::size_t
+  FECollection<dim,spacedim>::memory_consumption () const
+  {
+    std::size_t mem
+      = (sizeof(*this) +
+         MemoryConsumption::memory_consumption (finite_elements));
+    for (unsigned int i=0; i<finite_elements.size(); ++i)
+      mem += finite_elements[i]->memory_consumption();
+
+    return mem;
+  }
+}
+
+
+
+// explicit instantiations
+#include "fe_collection.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/hp/fe_collection.inst.in b/source/hp/fe_collection.inst.in
new file mode 100644
index 0000000..4a5b383
--- /dev/null
+++ b/source/hp/fe_collection.inst.in
@@ -0,0 +1,30 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  namespace hp
+  \{
+    template class FECollection<deal_II_dimension>;
+
+#if deal_II_dimension != 3
+    template class FECollection<deal_II_dimension, deal_II_dimension+1>;
+#endif
+#if deal_II_dimension == 3
+    template class FECollection<1,3>;
+#endif
+  \}
+}
diff --git a/source/hp/fe_values.cc b/source/hp/fe_values.cc
new file mode 100644
index 0000000..940df00
--- /dev/null
+++ b/source/hp/fe_values.cc
@@ -0,0 +1,476 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace internal
+{
+
+  namespace hp
+  {
+// -------------------------- FEValuesBase -------------------------
+
+    template <int dim, int q_dim, class FEValuesType>
+    FEValuesBase<dim,q_dim,FEValuesType>::FEValuesBase
+    (const dealii::hp::MappingCollection<dim,FEValuesType::space_dimension> &mapping_collection,
+     const dealii::hp::FECollection<dim,FEValuesType::space_dimension>      &fe_collection,
+     const dealii::hp::QCollection<q_dim>                                   &q_collection,
+     const UpdateFlags                                                       update_flags)
+      :
+      fe_collection (&fe_collection),
+      mapping_collection (&mapping_collection),
+      q_collection (q_collection),
+      fe_values_table (fe_collection.size(),
+                       mapping_collection.size(),
+                       q_collection.size()),
+      present_fe_values_index (numbers::invalid_unsigned_int,
+                               numbers::invalid_unsigned_int,
+                               numbers::invalid_unsigned_int),
+      update_flags (update_flags)
+    {}
+
+
+    template <int dim, int q_dim, class FEValuesType>
+    FEValuesBase<dim,q_dim,FEValuesType>::FEValuesBase
+    (const dealii::hp::FECollection<dim,FEValuesType::space_dimension> &fe_collection,
+     const dealii::hp::QCollection<q_dim>                              &q_collection,
+     const UpdateFlags                                                  update_flags)
+      :
+      fe_collection (&fe_collection),
+      mapping_collection (&dealii::hp::StaticMappingQ1<dim,FEValuesType::space_dimension>::
+                          mapping_collection),
+      q_collection (q_collection),
+      fe_values_table (fe_collection.size(),
+                       1,
+                       q_collection.size()),
+      present_fe_values_index (numbers::invalid_unsigned_int,
+                               numbers::invalid_unsigned_int,
+                               numbers::invalid_unsigned_int),
+      update_flags (update_flags)
+    {}
+
+
+
+    template <int dim, int q_dim, class FEValuesType>
+    FEValuesType &
+    FEValuesBase<dim,q_dim,FEValuesType>::select_fe_values
+    (const unsigned int fe_index,
+     const unsigned int mapping_index,
+     const unsigned int q_index)
+    {
+      Assert (fe_index < fe_collection->size(),
+              ExcIndexRange (fe_index, 0, fe_collection->size()));
+      Assert (mapping_index < mapping_collection->size(),
+              ExcIndexRange (mapping_index, 0, mapping_collection->size()));
+      Assert (q_index < q_collection.size(),
+              ExcIndexRange (q_index, 0, q_collection.size()));
+
+
+      // set the triple of indices
+      // that we want to work with
+      present_fe_values_index = TableIndices<3> (fe_index,
+                                                 mapping_index,
+                                                 q_index);
+
+      // first check whether we
+      // already have an object for
+      // this particular combination
+      // of indices
+      if (fe_values_table(present_fe_values_index).get() == 0)
+        fe_values_table(present_fe_values_index)
+          =
+            std_cxx11::shared_ptr<FEValuesType>
+            (new FEValuesType ((*mapping_collection)[mapping_index],
+                               (*fe_collection)[fe_index],
+                               q_collection[q_index],
+                               update_flags));
+
+      // now there definitely is one!
+      return *fe_values_table(present_fe_values_index);
+    }
+  }
+}
+
+
+
+namespace hp
+{
+
+// -------------------------- FEValues -------------------------
+
+
+  template <int dim, int spacedim>
+  FEValues<dim,spacedim>::FEValues (const hp::MappingCollection<dim,spacedim> &mapping,
+                                    const hp::FECollection<dim,spacedim>      &fe_collection,
+                                    const hp::QCollection<dim>       &q_collection,
+                                    const UpdateFlags                 update_flags)
+    :
+    internal::hp::FEValuesBase<dim,dim,dealii::FEValues<dim,spacedim> > (mapping,
+        fe_collection,
+        q_collection,
+        update_flags)
+  {}
+
+
+  template <int dim, int spacedim>
+  FEValues<dim,spacedim>::FEValues (const hp::FECollection<dim,spacedim> &fe_collection,
+                                    const hp::QCollection<dim>      &q_collection,
+                                    const UpdateFlags            update_flags)
+    :
+    internal::hp::FEValuesBase<dim,dim,dealii::FEValues<dim,spacedim> > (fe_collection,
+        q_collection,
+        update_flags)
+  {}
+
+
+  template <int dim, int spacedim>
+  template <typename DoFHandlerType, bool lda>
+  void
+  FEValues<dim,spacedim>::reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,lda> > cell,
+                                  const unsigned int q_index,
+                                  const unsigned int mapping_index,
+                                  const unsigned int fe_index)
+  {
+    // determine which indices we
+    // should actually use
+    unsigned int real_q_index       = q_index,
+                 real_mapping_index = mapping_index,
+                 real_fe_index      = fe_index;
+
+    if (real_q_index == numbers::invalid_unsigned_int)
+      {
+        if (this->q_collection.size() > 1)
+          real_q_index = cell->active_fe_index();
+        else
+          real_q_index = 0;
+      }
+
+    if (real_mapping_index == numbers::invalid_unsigned_int)
+      {
+        if (this->mapping_collection->size() > 1)
+          real_mapping_index = cell->active_fe_index();
+        else
+          real_mapping_index = 0;
+      }
+
+    if (real_fe_index == numbers::invalid_unsigned_int)
+      real_fe_index = cell->active_fe_index();
+
+    // some checks
+    Assert (real_q_index < this->q_collection.size(),
+            ExcIndexRange (real_q_index, 0, this->q_collection.size()));
+    Assert (real_mapping_index < this->mapping_collection->size(),
+            ExcIndexRange (real_mapping_index, 0, this->mapping_collection->size()));
+    Assert (real_fe_index < this->fe_collection->size(),
+            ExcIndexRange (real_fe_index, 0, this->fe_collection->size()));
+
+    // now finally actually get the
+    // corresponding object and
+    // initialize it
+    this->select_fe_values (real_fe_index,
+                            real_mapping_index,
+                            real_q_index).reinit (cell);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  FEValues<dim,spacedim>::reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                  const unsigned int q_index,
+                                  const unsigned int mapping_index,
+                                  const unsigned int fe_index)
+  {
+    // determine which indices we
+    // should actually use
+    unsigned int real_q_index       = q_index,
+                 real_mapping_index = mapping_index,
+                 real_fe_index      = fe_index;
+
+    if (real_q_index == numbers::invalid_unsigned_int)
+      real_q_index = 0;
+
+    if (real_mapping_index == numbers::invalid_unsigned_int)
+      real_mapping_index = 0;
+
+    if (real_fe_index == numbers::invalid_unsigned_int)
+      real_fe_index = 0;
+
+    // some checks
+    Assert (real_q_index < this->q_collection.size(),
+            ExcIndexRange (real_q_index, 0, this->q_collection.size()));
+    Assert (real_mapping_index < this->mapping_collection->size(),
+            ExcIndexRange (real_mapping_index, 0, this->mapping_collection->size()));
+    Assert (real_fe_index < this->fe_collection->size(),
+            ExcIndexRange (real_fe_index, 0, this->fe_collection->size()));
+
+    // now finally actually get the
+    // corresponding object and
+    // initialize it
+    this->select_fe_values (real_fe_index,
+                            real_mapping_index,
+                            real_q_index).reinit (cell);
+  }
+
+
+// -------------------------- FEFaceValues -------------------------
+
+
+  template <int dim, int spacedim>
+  FEFaceValues<dim,spacedim>::FEFaceValues (const hp::MappingCollection<dim,spacedim> &mapping,
+                                            const hp::FECollection<dim,spacedim>  &fe_collection,
+                                            const hp::QCollection<dim-1> &q_collection,
+                                            const UpdateFlags         update_flags)
+    :
+    internal::hp::FEValuesBase<dim,dim-1,dealii::FEFaceValues<dim,spacedim> > (mapping,
+        fe_collection,
+        q_collection,
+        update_flags)
+  {}
+
+
+  template <int dim, int spacedim>
+  FEFaceValues<dim,spacedim>::FEFaceValues (const hp::FECollection<dim,spacedim>  &fe_collection,
+                                            const hp::QCollection<dim-1> &q_collection,
+                                            const UpdateFlags         update_flags)
+    :
+    internal::hp::FEValuesBase<dim,dim-1,dealii::FEFaceValues<dim,spacedim> > (fe_collection,
+        q_collection,
+        update_flags)
+  {}
+
+
+  template <int dim, int spacedim>
+  template <typename DoFHandlerType, bool lda>
+  void
+  FEFaceValues<dim,spacedim>::reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,lda> > cell,
+                                      const unsigned int face_no,
+                                      const unsigned int q_index,
+                                      const unsigned int mapping_index,
+                                      const unsigned int fe_index)
+  {
+    // determine which indices we
+    // should actually use
+    unsigned int real_q_index       = q_index,
+                 real_mapping_index = mapping_index,
+                 real_fe_index      = fe_index;
+
+    if (real_q_index == numbers::invalid_unsigned_int)
+      {
+        if (this->q_collection.size() > 1)
+          real_q_index = cell->active_fe_index();
+        else
+          real_q_index = 0;
+      }
+
+    if (real_mapping_index == numbers::invalid_unsigned_int)
+      {
+        if (this->mapping_collection->size() > 1)
+          real_mapping_index = cell->active_fe_index();
+        else
+          real_mapping_index = 0;
+      }
+
+    if (real_fe_index == numbers::invalid_unsigned_int)
+      real_fe_index = cell->active_fe_index();
+
+    // some checks
+    Assert (real_q_index < this->q_collection.size(),
+            ExcIndexRange (real_q_index, 0, this->q_collection.size()));
+    Assert (real_mapping_index < this->mapping_collection->size(),
+            ExcIndexRange (real_mapping_index, 0, this->mapping_collection->size()));
+    Assert (real_fe_index < this->fe_collection->size(),
+            ExcIndexRange (real_fe_index, 0, this->fe_collection->size()));
+
+    // now finally actually get the
+    // corresponding object and
+    // initialize it
+    this->select_fe_values (real_fe_index,
+                            real_mapping_index,
+                            real_q_index).reinit (cell, face_no);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  FEFaceValues<dim,spacedim>::reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                      const unsigned int face_no,
+                                      const unsigned int q_index,
+                                      const unsigned int mapping_index,
+                                      const unsigned int fe_index)
+  {
+    // determine which indices we
+    // should actually use
+    unsigned int real_q_index       = q_index,
+                 real_mapping_index = mapping_index,
+                 real_fe_index      = fe_index;
+
+    if (real_q_index == numbers::invalid_unsigned_int)
+      real_q_index = 0;
+
+    if (real_mapping_index == numbers::invalid_unsigned_int)
+      real_mapping_index = 0;
+
+    if (real_fe_index == numbers::invalid_unsigned_int)
+      real_fe_index = 0;
+
+    // some checks
+    Assert (real_q_index < this->q_collection.size(),
+            ExcIndexRange (real_q_index, 0, this->q_collection.size()));
+    Assert (real_mapping_index < this->mapping_collection->size(),
+            ExcIndexRange (real_mapping_index, 0, this->mapping_collection->size()));
+    Assert (real_fe_index < this->fe_collection->size(),
+            ExcIndexRange (real_fe_index, 0, this->fe_collection->size()));
+
+    // now finally actually get the
+    // corresponding object and
+    // initialize it
+    this->select_fe_values (real_fe_index,
+                            real_mapping_index,
+                            real_q_index).reinit (cell, face_no);
+  }
+
+
+// -------------------------- FESubfaceValues -------------------------
+
+
+  template <int dim, int spacedim>
+  FESubfaceValues<dim,spacedim>::FESubfaceValues (const hp::MappingCollection<dim,spacedim> &mapping,
+                                                  const hp::FECollection<dim,spacedim>  &fe_collection,
+                                                  const hp::QCollection<dim-1> &q_collection,
+                                                  const UpdateFlags         update_flags)
+    :
+    internal::hp::FEValuesBase<dim,dim-1,dealii::FESubfaceValues<dim,spacedim> > (mapping,
+        fe_collection,
+        q_collection,
+        update_flags)
+  {}
+
+
+  template <int dim, int spacedim>
+  FESubfaceValues<dim,spacedim>::FESubfaceValues (const hp::FECollection<dim,spacedim>  &fe_collection,
+                                                  const hp::QCollection<dim-1> &q_collection,
+                                                  const UpdateFlags         update_flags)
+    :
+    internal::hp::FEValuesBase<dim,dim-1,dealii::FESubfaceValues<dim,spacedim> > (fe_collection,
+        q_collection,
+        update_flags)
+  {}
+
+
+  template <int dim, int spacedim>
+  template <typename DoFHandlerType, bool lda>
+  void
+  FESubfaceValues<dim,spacedim>::reinit (const TriaIterator<DoFCellAccessor<DoFHandlerType,lda> > cell,
+                                         const unsigned int face_no,
+                                         const unsigned int subface_no,
+                                         const unsigned int q_index,
+                                         const unsigned int mapping_index,
+                                         const unsigned int fe_index)
+  {
+    // determine which indices we
+    // should actually use
+    unsigned int real_q_index       = q_index,
+                 real_mapping_index = mapping_index,
+                 real_fe_index      = fe_index;
+
+    if (real_q_index == numbers::invalid_unsigned_int)
+      {
+        if (this->q_collection.size() > 1)
+          real_q_index = cell->active_fe_index();
+        else
+          real_q_index = 0;
+      }
+
+    if (real_mapping_index == numbers::invalid_unsigned_int)
+      {
+        if (this->mapping_collection->size() > 1)
+          real_mapping_index = cell->active_fe_index();
+        else
+          real_mapping_index = 0;
+      }
+
+    if (real_fe_index == numbers::invalid_unsigned_int)
+      real_fe_index = cell->active_fe_index();
+
+    // some checks
+    Assert (real_q_index < this->q_collection.size(),
+            ExcIndexRange (real_q_index, 0, this->q_collection.size()));
+    Assert (real_mapping_index < this->mapping_collection->size(),
+            ExcIndexRange (real_mapping_index, 0, this->mapping_collection->size()));
+    Assert (real_fe_index < this->fe_collection->size(),
+            ExcIndexRange (real_fe_index, 0, this->fe_collection->size()));
+
+    // now finally actually get the
+    // corresponding object and
+    // initialize it
+    this->select_fe_values (real_fe_index,
+                            real_mapping_index,
+                            real_q_index).reinit (cell, face_no, subface_no);
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  FESubfaceValues<dim,spacedim>::reinit (const typename Triangulation<dim,spacedim>::cell_iterator &cell,
+                                         const unsigned int face_no,
+                                         const unsigned int subface_no,
+                                         const unsigned int q_index,
+                                         const unsigned int mapping_index,
+                                         const unsigned int fe_index)
+  {
+    // determine which indices we
+    // should actually use
+    unsigned int real_q_index       = q_index,
+                 real_mapping_index = mapping_index,
+                 real_fe_index      = fe_index;
+
+    if (real_q_index == numbers::invalid_unsigned_int)
+      real_q_index = 0;
+
+    if (real_mapping_index == numbers::invalid_unsigned_int)
+      real_mapping_index = 0;
+
+    if (real_fe_index == numbers::invalid_unsigned_int)
+      real_fe_index = 0;
+
+    // some checks
+    Assert (real_q_index < this->q_collection.size(),
+            ExcIndexRange (real_q_index, 0, this->q_collection.size()));
+    Assert (real_mapping_index < this->mapping_collection->size(),
+            ExcIndexRange (real_mapping_index, 0, this->mapping_collection->size()));
+    Assert (real_fe_index < this->fe_collection->size(),
+            ExcIndexRange (real_fe_index, 0, this->fe_collection->size()));
+
+    // now finally actually get the
+    // corresponding object and
+    // initialize it
+    this->select_fe_values (real_fe_index,
+                            real_mapping_index,
+                            real_q_index).reinit (cell, face_no, subface_no);
+  }
+}
+
+
+// explicit instantiations
+#include "fe_values.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/hp/fe_values.inst.in b/source/hp/fe_values.inst.in
new file mode 100644
index 0000000..57c8a5a
--- /dev/null
+++ b/source/hp/fe_values.inst.in
@@ -0,0 +1,102 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+namespace internal
+\{
+  namespace hp
+  \{
+    template class FEValuesBase<deal_II_dimension,deal_II_dimension,
+                                dealii::FEValues<deal_II_dimension> >;
+    template class FEValuesBase<deal_II_dimension,deal_II_dimension-1,
+                                dealii::FEFaceValues<deal_II_dimension> >;
+    template class FEValuesBase<deal_II_dimension,deal_II_dimension-1,
+                                dealii::FESubfaceValues<deal_II_dimension> >;
+  \}
+\}
+
+namespace hp
+\{
+  template class FEValues<deal_II_dimension>;
+  template class FEFaceValues<deal_II_dimension, deal_II_dimension>;
+  template class FESubfaceValues<deal_II_dimension, deal_II_dimension>;
+\}
+
+
+
+#if deal_II_dimension != 3
+
+namespace internal
+\{
+  namespace hp
+  \{
+    template class FEValuesBase<deal_II_dimension,deal_II_dimension,
+                                dealii::FEValues<deal_II_dimension,deal_II_dimension+1> >;
+    template class FEValuesBase<deal_II_dimension,deal_II_dimension-1,
+                                dealii::FEFaceValues<deal_II_dimension,deal_II_dimension+1> >;
+    template class FEValuesBase<deal_II_dimension,deal_II_dimension-1,
+                                dealii::FESubfaceValues<deal_II_dimension,deal_II_dimension+1> >;
+  \}
+\}
+
+namespace hp
+\{
+  template class FEValues<deal_II_dimension, deal_II_dimension+1>;
+  template class FEFaceValues<deal_II_dimension, deal_II_dimension+1>;
+  template class FESubfaceValues<deal_II_dimension, deal_II_dimension+1>;
+\}
+#endif
+
+#if deal_II_dimension == 3
+
+namespace internal
+\{
+  namespace hp
+  \{
+    template class FEValuesBase<1,1,
+                                dealii::FEValues<1,3> >;
+    template class FEValuesBase<1,1-1,
+                                dealii::FEFaceValues<1,3> >;
+    template class FEValuesBase<1,1-1,
+                                dealii::FESubfaceValues<1,3> >;
+  \}
+\}
+
+namespace hp
+\{
+  template class FEValues<1, 3>;
+  template class FEFaceValues<1, 3>;
+  template class FESubfaceValues<1, 3>;
+\}
+#endif
+}
+
+for (dof_handler : DOFHANDLER_TEMPLATES; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS; lda : BOOL)
+{
+namespace hp
+\{
+#if deal_II_dimension <= deal_II_space_dimension
+
+    template void FEValues<deal_II_dimension,deal_II_space_dimension>::reinit(
+    TriaIterator<DoFCellAccessor<dealii::dof_handler<deal_II_dimension,deal_II_space_dimension>, lda> >, unsigned int, unsigned int, unsigned int);
+    template void FEFaceValues<deal_II_dimension,deal_II_space_dimension>::reinit(
+    TriaIterator<DoFCellAccessor<dealii::dof_handler<deal_II_dimension,deal_II_space_dimension>, lda> >, unsigned int, unsigned int, unsigned int, unsigned int);
+    template void FESubfaceValues<deal_II_dimension,deal_II_space_dimension>::reinit(
+    TriaIterator<DoFCellAccessor<dealii::dof_handler<deal_II_dimension,deal_II_space_dimension>, lda> >, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
+#endif
+\}
+}
diff --git a/source/hp/mapping_collection.cc b/source/hp/mapping_collection.cc
new file mode 100644
index 0000000..b1bc5f0
--- /dev/null
+++ b/source/hp/mapping_collection.cc
@@ -0,0 +1,117 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/hp/mapping_collection.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace hp
+{
+
+  template<int dim, int spacedim>
+  MappingCollection<dim,spacedim>::MappingCollection ()
+  {}
+
+
+
+  template<int dim, int spacedim>
+  MappingCollection<dim,spacedim>::
+  MappingCollection (const Mapping<dim,spacedim> &mapping)
+  {
+    mappings
+    .push_back (std_cxx11::shared_ptr<const Mapping<dim,spacedim> >(mapping.clone()));
+  }
+
+
+
+  template<int dim, int spacedim>
+  MappingCollection<dim,spacedim>::
+  MappingCollection (const MappingCollection<dim,spacedim> &mapping_collection)
+    :
+    Subscriptor (),
+    // copy the array
+    // of shared
+    // pointers. nothing
+    // bad should
+    // happen -- they
+    // simply all point
+    // to the same
+    // objects, and the
+    // last one to die
+    // will delete the
+    // mappings
+    mappings (mapping_collection.mappings)
+  {}
+
+
+
+  template<int dim, int spacedim>
+  std::size_t
+  MappingCollection<dim,spacedim>::memory_consumption () const
+  {
+    return (sizeof(*this) +
+            MemoryConsumption::memory_consumption (mappings));
+  }
+
+
+
+  template<int dim, int spacedim>
+  void
+  MappingCollection<dim,spacedim>::push_back (const Mapping<dim,spacedim> &new_mapping)
+  {
+    mappings
+    .push_back (std_cxx11::shared_ptr<const Mapping<dim,spacedim> >(new_mapping.clone()));
+  }
+
+//---------------------------------------------------------------------------
+
+
+  namespace
+  {
+    /**
+     * Create and return a reference to a static MappingQ1 object. We can't
+     * use the one in ::StaticMappingQ1 to initialize the static object below
+     * since we can't make sure that the constructor for that object is run
+     * before we want to use the object (when constructing mapping_collection
+     * below).  Therefore we create a helper function which returns a
+     * reference to a static object that will be constructed the first time
+     * this function is called.
+     */
+    template<int dim, int spacedim>
+    MappingQGeneric<dim,spacedim> &
+    get_static_mapping_q1()
+    {
+      static MappingQ1<dim,spacedim> mapping;
+      return mapping;
+    }
+  }
+
+  template<int dim, int spacedim>
+  MappingCollection<dim,spacedim>
+  StaticMappingQ1<dim,spacedim>::mapping_collection
+    = MappingCollection<dim,spacedim>(get_static_mapping_q1<dim,spacedim>());
+
+}
+
+
+
+// explicit instantiations
+#include "mapping_collection.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/hp/mapping_collection.inst.in b/source/hp/mapping_collection.inst.in
new file mode 100644
index 0000000..fba1a1e
--- /dev/null
+++ b/source/hp/mapping_collection.inst.in
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  namespace hp
+  \{
+    template class MappingCollection<deal_II_dimension>;
+    template struct StaticMappingQ1<deal_II_dimension>;
+
+#if deal_II_dimension != 3
+    template class MappingCollection<deal_II_dimension,deal_II_dimension+1>;
+    template struct StaticMappingQ1<deal_II_dimension,deal_II_dimension+1>;
+#endif
+
+#if deal_II_dimension == 3
+    template class MappingCollection<1,3>;
+    template struct StaticMappingQ1<1,3>;
+#endif
+  \}
+}
diff --git a/source/integrators/CMakeLists.txt b/source/integrators/CMakeLists.txt
new file mode 100644
index 0000000..b303065
--- /dev/null
+++ b/source/integrators/CMakeLists.txt
@@ -0,0 +1,25 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/integrators/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_integrators OBJECT ${_src} ${_header})
diff --git a/source/lac/CMakeLists.txt b/source/lac/CMakeLists.txt
new file mode 100644
index 0000000..3290ed8
--- /dev/null
+++ b/source/lac/CMakeLists.txt
@@ -0,0 +1,134 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  block_matrix_array.cc
+  block_sparse_matrix.cc
+  block_sparse_matrix_ez.cc
+  block_sparsity_pattern.cc
+  block_vector.cc
+  chunk_sparse_matrix.cc
+  chunk_sparsity_pattern.cc
+  dynamic_sparsity_pattern.cc
+  constraint_matrix.cc
+  full_matrix.cc
+  lapack_full_matrix.cc
+  matrix_lib.cc
+  matrix_out.cc
+  parallel_vector.cc
+  precondition_block.cc
+  precondition_block_ez.cc
+  relaxation_block.cc
+  solver.cc
+  solver_control.cc
+  sparse_decomposition.cc
+  sparse_direct.cc
+  sparse_ilu.cc
+  sparse_matrix.cc
+  sparse_matrix_inst2.cc
+  sparse_matrix_ez.cc
+  sparse_mic.cc
+  sparse_vanka.cc
+  sparsity_pattern.cc
+  sparsity_tools.cc
+  swappable_vector.cc
+  tridiagonal_matrix.cc
+  vector.cc
+  vector_memory.cc
+  vector_view.cc
+  )
+
+SET(_inst
+  block_sparse_matrix.inst.in
+  block_vector.inst.in
+  chunk_sparse_matrix.inst.in
+  constraint_matrix.inst.in
+  full_matrix.inst.in
+  lapack_full_matrix.inst.in
+  parallel_vector.inst.in
+  precondition_block.inst.in
+  relaxation_block.inst.in
+  solver.inst.in
+  sparse_matrix_ez.inst.in
+  sparse_matrix.inst.in
+  vector.inst.in
+  vector_memory.inst.in
+  vector_view.inst.in
+  )
+
+# Add PETSc wrapper files. If PETSc has not been found,
+# then these files should be empty and there is no need
+# to even look at them
+IF(DEAL_II_WITH_PETSC)
+  SET(_src
+    ${_src}
+    petsc_block_sparse_matrix.cc
+    petsc_full_matrix.cc
+    petsc_matrix_base.cc
+    petsc_matrix_free.cc
+    petsc_parallel_block_sparse_matrix.cc
+    petsc_parallel_block_vector.cc
+    petsc_parallel_sparse_matrix.cc
+    petsc_parallel_vector.cc
+    petsc_precondition.cc
+    petsc_solver.cc
+    petsc_sparse_matrix.cc
+    petsc_vector_base.cc
+    petsc_vector.cc
+  )
+ENDIF()
+
+# Same for SLEPc
+IF(DEAL_II_WITH_SLEPC)
+  SET(_src
+    ${_src}
+    slepc_solver.cc
+    slepc_spectral_transformation.cc
+  )
+ENDIF()
+
+# Also add Trilinos wrapper files
+IF(DEAL_II_WITH_TRILINOS)
+  SET(_src
+    ${_src}
+    trilinos_block_sparse_matrix.cc
+    trilinos_block_vector.cc
+    trilinos_parallel_block_vector.cc
+    trilinos_precondition.cc
+    trilinos_precondition_ml.cc
+    trilinos_precondition_muelu.cc
+    trilinos_solver.cc
+    trilinos_sparse_matrix.cc
+    trilinos_sparsity_pattern.cc
+    trilinos_vector_base.cc
+    trilinos_vector.cc
+  )
+  SET(_inst
+    ${_inst}
+    trilinos_sparse_matrix.inst.in
+    trilinos_vector_base.inst.in
+  )
+ENDIF()
+
+
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/lac/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_lac OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_lac "${_inst}")
diff --git a/source/lac/block_matrix_array.cc b/source/lac/block_matrix_array.cc
new file mode 100644
index 0000000..58c4746
--- /dev/null
+++ b/source/lac/block_matrix_array.cc
@@ -0,0 +1,447 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/block_matrix_array.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_parallel_block_vector.h>
+
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number, typename BlockVectorType>
+BlockMatrixArray<number,BlockVectorType>::Entry::Entry (const Entry &e)
+  :
+  row(e.row),
+  col(e.col),
+  prefix(e.prefix),
+  transpose(e.transpose),
+  matrix(e.matrix)
+{
+  Entry &e2 = const_cast<Entry &>(e);
+  e2.matrix = 0;
+}
+
+
+
+template <typename number, typename BlockVectorType>
+BlockMatrixArray<number,BlockVectorType>::Entry::~Entry ()
+{
+  if (matrix)
+    delete matrix;
+}
+
+
+template <typename number, typename BlockVectorType>
+BlockMatrixArray<number,BlockVectorType>::BlockMatrixArray ()
+  : block_rows (0),
+    block_cols (0)
+{}
+
+
+
+template <typename number, typename BlockVectorType>
+BlockMatrixArray<number,BlockVectorType>::BlockMatrixArray
+(const unsigned int n_block_rows,
+ const unsigned int n_block_cols)
+  : block_rows (n_block_rows),
+    block_cols (n_block_cols)
+{}
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockMatrixArray<number,BlockVectorType>::initialize
+(const unsigned int n_block_rows,
+ const unsigned int n_block_cols)
+{
+  block_rows = n_block_rows;
+  block_cols = n_block_cols;
+}
+
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockMatrixArray<number,BlockVectorType>::reinit
+(const unsigned int n_block_rows,
+ const unsigned int n_block_cols)
+{
+  clear();
+  block_rows = n_block_rows;
+  block_cols = n_block_cols;
+}
+
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockMatrixArray<number,BlockVectorType>::clear ()
+{
+  entries.clear();
+}
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockMatrixArray<number,BlockVectorType>::vmult_add (BlockVectorType &dst,
+                                                     const BlockVectorType &src) const
+{
+  GrowingVectorMemory<typename BlockVectorType::BlockType > mem;
+  Assert (dst.n_blocks() == block_rows,
+          ExcDimensionMismatch(dst.n_blocks(), block_rows));
+  Assert (src.n_blocks() == block_cols,
+          ExcDimensionMismatch(src.n_blocks(), block_cols));
+
+  typename VectorMemory<typename BlockVectorType::BlockType >::Pointer p_aux(mem);
+  typename BlockVectorType::BlockType &aux = *p_aux;
+
+  typename std::vector<Entry>::const_iterator m = entries.begin();
+  typename std::vector<Entry>::const_iterator end = entries.end();
+
+  for (; m != end ; ++m)
+    {
+      aux.reinit(dst.block(m->row));
+      if (m->transpose)
+        m->matrix->Tvmult(aux, src.block(m->col));
+      else
+        m->matrix->vmult(aux, src.block(m->col));
+      dst.block(m->row).add (m->prefix, aux);
+    }
+}
+
+
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockMatrixArray<number,BlockVectorType>::vmult (BlockVectorType &dst,
+                                                 const BlockVectorType &src) const
+{
+  dst = 0.;
+  vmult_add (dst, src);
+}
+
+
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockMatrixArray<number,BlockVectorType>::Tvmult_add (BlockVectorType &dst,
+                                                      const BlockVectorType &src) const
+{
+  GrowingVectorMemory<typename BlockVectorType::BlockType > mem;
+  Assert (dst.n_blocks() == block_cols,
+          ExcDimensionMismatch(dst.n_blocks(), block_cols));
+  Assert (src.n_blocks() == block_rows,
+          ExcDimensionMismatch(src.n_blocks(), block_rows));
+
+  typename std::vector<Entry>::const_iterator m = entries.begin();
+  typename std::vector<Entry>::const_iterator end = entries.end();
+
+  typename VectorMemory<typename BlockVectorType::BlockType >::Pointer p_aux(mem);
+  typename BlockVectorType::BlockType &aux = *p_aux;
+
+  for (; m != end ; ++m)
+    {
+      aux.reinit(dst.block(m->col));
+      if (m->transpose)
+        m->matrix->vmult(aux, src.block(m->row));
+      else
+        m->matrix->Tvmult(aux, src.block(m->row));
+      dst.block(m->col).add (m->prefix, aux);
+    }
+}
+
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockMatrixArray<number,BlockVectorType>::Tvmult (BlockVectorType &dst,
+                                                  const BlockVectorType &src) const
+{
+  dst = 0.;
+  Tvmult_add (dst, src);
+}
+
+
+
+
+template <typename number, typename BlockVectorType>
+number
+BlockMatrixArray<number,BlockVectorType>::matrix_scalar_product
+(const BlockVectorType &u,
+ const BlockVectorType &v) const
+{
+  GrowingVectorMemory<typename BlockVectorType::BlockType > mem;
+  Assert (u.n_blocks() == block_rows,
+          ExcDimensionMismatch(u.n_blocks(), block_rows));
+  Assert (v.n_blocks() == block_cols,
+          ExcDimensionMismatch(v.n_blocks(), block_cols));
+
+  typename VectorMemory<typename BlockVectorType::BlockType >::Pointer p_aux(mem);
+  typename BlockVectorType::BlockType &aux = *p_aux;
+
+  typename std::vector<Entry>::const_iterator m;
+  typename std::vector<Entry>::const_iterator end = entries.end();
+
+  number result = 0.;
+
+  for (unsigned int i=0; i<block_rows; ++i)
+    {
+      aux.reinit(u.block(i));
+      for (m = entries.begin(); m != end ; ++m)
+        {
+          if (m->row != i)
+            continue;
+          if (m->transpose)
+            m->matrix->Tvmult_add(aux, v.block(m->col));
+          else
+            m->matrix->vmult(aux, v.block(m->col));
+        }
+      result += u.block(i)*aux;
+    }
+
+  return result;
+}
+
+
+
+template <typename number, typename BlockVectorType>
+number
+BlockMatrixArray<number,BlockVectorType>::matrix_norm_square
+(const BlockVectorType &u) const
+{
+  return matrix_scalar_product(u,u);
+}
+
+
+
+template <typename number, typename BlockVectorType>
+unsigned int
+BlockMatrixArray<number,BlockVectorType>::n_block_rows () const
+{
+  return block_rows;
+}
+
+
+
+template <typename number, typename BlockVectorType>
+unsigned int
+BlockMatrixArray<number,BlockVectorType>::n_block_cols () const
+{
+  return block_cols;
+}
+
+
+
+//---------------------------------------------------------------------------
+
+template <typename number, typename BlockVectorType>
+BlockTrianglePrecondition<number,BlockVectorType>::BlockTrianglePrecondition()
+  : BlockMatrixArray<number,BlockVectorType> (),
+    backward(false)
+{}
+
+
+template <typename number, typename BlockVectorType>
+BlockTrianglePrecondition<number,BlockVectorType>::BlockTrianglePrecondition
+(const unsigned int block_rows)
+  :
+  BlockMatrixArray<number,BlockVectorType> (block_rows, block_rows),
+  backward(false)
+{}
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockTrianglePrecondition<number,BlockVectorType>::reinit
+(const unsigned int n)
+{
+  BlockMatrixArray<number,BlockVectorType>::reinit(n,n);
+}
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockTrianglePrecondition<number,BlockVectorType>::do_row
+(BlockVectorType &dst,
+ size_type        row_num) const
+{
+  GrowingVectorMemory<typename BlockVectorType::BlockType > mem;
+  typename std::vector<typename BlockMatrixArray<number,BlockVectorType>::Entry>::const_iterator
+  m = this->entries.begin();
+  typename std::vector<typename BlockMatrixArray<number,BlockVectorType>::Entry>::const_iterator
+  end = this->entries.end();
+  std::vector<typename std::vector<typename BlockMatrixArray<number,BlockVectorType>::Entry>::const_iterator>
+  diagonals;
+
+  typename VectorMemory<typename BlockVectorType::BlockType >::Pointer p_aux(mem);
+  typename BlockVectorType::BlockType &aux = *p_aux;
+
+  aux.reinit(dst.block(row_num), true);
+
+  // Loop over all entries, since
+  // they are not ordered by rows.
+  for (; m != end ; ++m)
+    {
+      const size_type i=m->row;
+      // Ignore everything not in
+      // this row
+      if (i != row_num)
+        continue;
+      const size_type j=m->col;
+      // Only use the lower (upper)
+      // triangle for forward
+      // (backward) substitution
+      if (((j > i) && !backward) || ((j < i) && backward))
+        continue;
+      if (j == i)
+        {
+          diagonals.push_back(m);
+        }
+      else
+        {
+          if (m->transpose)
+            m->matrix->Tvmult(aux, dst.block(j));
+          else
+            m->matrix->vmult(aux, dst.block(j));
+          dst.block(i).add (-1 * m->prefix, aux);
+        }
+    }
+  Assert (diagonals.size() != 0, ExcNoDiagonal(row_num));
+
+  // Inverting the diagonal block is
+  // simple, if there is only one
+  // matrix
+  if (diagonals.size() == 1)
+    {
+      if (diagonals[0]->transpose)
+        diagonals[0]->matrix->Tvmult(aux, dst.block(row_num));
+      else
+        diagonals[0]->matrix->vmult(aux, dst.block(row_num));
+      dst.block(row_num).equ (diagonals[0]->prefix, aux);
+    }
+  else
+    {
+      aux = 0.;
+      for (size_type i=0; i<diagonals.size(); ++i)
+        {
+          m = diagonals[i];
+          // First, divide by the current
+          // factor, such that we can
+          // multiply by it later.
+          aux /= m->prefix;
+          if (m->transpose)
+            m->matrix->Tvmult_add(aux, dst.block(row_num));
+          else
+            m->matrix->vmult_add(aux, dst.block(row_num));
+          aux *= m->prefix;
+        }
+      dst.block(row_num) = aux;
+    }
+}
+
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockTrianglePrecondition<number,BlockVectorType>::vmult_add
+(BlockVectorType       &dst,
+ const BlockVectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (src.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_cols()));
+
+  BlockVectorType aux;
+  aux.reinit(dst);
+  vmult(aux, src);
+  dst.add(aux);
+}
+
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockTrianglePrecondition<number,BlockVectorType>::vmult
+(BlockVectorType       &dst,
+ const BlockVectorType &src) const
+{
+  Assert (dst.n_blocks() == n_block_rows(),
+          ExcDimensionMismatch(dst.n_blocks(), n_block_rows()));
+  Assert (src.n_blocks() == n_block_cols(),
+          ExcDimensionMismatch(src.n_blocks(), n_block_cols()));
+
+  dst.equ(1., src);
+
+  if (backward)
+    {
+      for (unsigned int i=n_block_rows(); i>0;)
+        do_row(dst, --i);
+    }
+  else
+    {
+      for (unsigned int i=0; i<n_block_rows(); ++i)
+        do_row(dst, i);
+    }
+
+}
+
+template <typename number, typename BlockVectorType>
+void
+BlockTrianglePrecondition<number,BlockVectorType>::Tvmult
+(BlockVectorType &,
+ const BlockVectorType &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+
+template <typename number, typename BlockVectorType>
+void
+BlockTrianglePrecondition<number,BlockVectorType>::Tvmult_add
+(BlockVectorType &,
+ const BlockVectorType &) const
+{
+  Assert (false, ExcNotImplemented());
+}
+
+template class BlockMatrixArray<float>;
+template class BlockMatrixArray<double>;
+template class BlockTrianglePrecondition<float>;
+template class BlockTrianglePrecondition<double>;
+
+#ifdef DEAL_II_WITH_TRILINOS
+template class BlockMatrixArray<float, TrilinosWrappers::MPI::BlockVector>;
+template class BlockMatrixArray<double, TrilinosWrappers::MPI::BlockVector>;
+template class BlockTrianglePrecondition<float, TrilinosWrappers::MPI::BlockVector>;
+template class BlockTrianglePrecondition<double, TrilinosWrappers::MPI::BlockVector>;
+#endif
+
+#ifdef DEAL_II_WITH_PETSC
+template class BlockMatrixArray<float, PETScWrappers::MPI::BlockVector>;
+template class BlockMatrixArray<double, PETScWrappers::MPI::BlockVector>;
+template class BlockTrianglePrecondition<float, PETScWrappers::MPI::BlockVector>;
+template class BlockTrianglePrecondition<double, PETScWrappers::MPI::BlockVector>;
+#endif
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/block_sparse_matrix.cc b/source/lac/block_sparse_matrix.cc
new file mode 100644
index 0000000..4b83e78
--- /dev/null
+++ b/source/lac/block_sparse_matrix.cc
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/block_sparse_matrix.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "block_sparse_matrix.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/block_sparse_matrix.inst.in b/source/lac/block_sparse_matrix.inst.in
new file mode 100644
index 0000000..7f2cbc9
--- /dev/null
+++ b/source/lac/block_sparse_matrix.inst.in
@@ -0,0 +1,19 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+for (S : REAL_SCALARS)
+  {
+    template class BlockSparseMatrix<S>;
+  }
diff --git a/source/lac/block_sparse_matrix_ez.cc b/source/lac/block_sparse_matrix_ez.cc
new file mode 100644
index 0000000..9f4b860
--- /dev/null
+++ b/source/lac/block_sparse_matrix_ez.cc
@@ -0,0 +1,26 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/block_sparse_matrix_ez.h>
+#include <deal.II/lac/block_sparse_matrix_ez.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// explicit instantiations
+template class BlockSparseMatrixEZ<double>;
+template class BlockSparseMatrixEZ<float>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/block_sparsity_pattern.cc b/source/lac/block_sparsity_pattern.cc
new file mode 100644
index 0000000..62bb232
--- /dev/null
+++ b/source/lac/block_sparsity_pattern.cc
@@ -0,0 +1,695 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/vector_slice.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <class SparsityPatternBase>
+BlockSparsityPatternBase<SparsityPatternBase>::BlockSparsityPatternBase ()
+  :
+  rows (0),
+  columns (0)
+{}
+
+
+
+template <class SparsityPatternBase>
+BlockSparsityPatternBase<SparsityPatternBase>::
+BlockSparsityPatternBase (const size_type n_block_rows,
+                          const size_type n_block_columns)
+  :
+  rows (0),
+  columns (0)
+{
+  reinit (n_block_rows, n_block_columns);
+}
+
+
+
+template <class SparsityPatternBase>
+BlockSparsityPatternBase<SparsityPatternBase>::
+BlockSparsityPatternBase (const BlockSparsityPatternBase &s)
+  :
+  Subscriptor ()
+{
+  (void)s;
+  Assert(s.rows==0, ExcInvalidConstructorCall());
+  Assert(s.columns==0, ExcInvalidConstructorCall());
+
+  rows = 0;
+  columns=0;
+}
+
+
+
+template <class SparsityPatternBase>
+BlockSparsityPatternBase<SparsityPatternBase>::~BlockSparsityPatternBase ()
+{
+  // clear all memory
+  reinit (0,0);
+}
+
+
+
+template <class SparsityPatternBase>
+void
+BlockSparsityPatternBase<SparsityPatternBase>::
+reinit (const size_type n_block_rows,
+        const size_type n_block_columns)
+{
+  // delete previous content and
+  // clean the sub_objects array
+  // completely
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=0; j<columns; ++j)
+      {
+        SparsityPatternBase *sp = sub_objects[i][j];
+        sub_objects[i][j] = 0;
+        delete sp;
+      };
+  sub_objects.reinit (0,0);
+
+  // then set new sizes
+  rows = n_block_rows;
+  columns = n_block_columns;
+  sub_objects.reinit (rows, columns);
+
+  // allocate new objects
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=0; j<columns; ++j)
+      {
+        SparsityPatternBase *p = new SparsityPatternBase;
+        sub_objects[i][j] = p;
+      }
+}
+
+
+template <class SparsityPatternBase>
+BlockSparsityPatternBase<SparsityPatternBase> &
+BlockSparsityPatternBase<SparsityPatternBase>::
+operator = (const BlockSparsityPatternBase<SparsityPatternBase> &bsp)
+{
+  Assert (rows == bsp.rows, ExcDimensionMismatch(rows, bsp.rows));
+  Assert (columns == bsp.columns, ExcDimensionMismatch(columns, bsp.columns));
+  // copy objects
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=0; j<columns; ++j)
+      *sub_objects[i][j] = *bsp.sub_objects[i][j];
+  // update index objects
+  collect_sizes ();
+
+  return *this;
+}
+
+
+
+template <class SparsityPatternBase>
+void
+BlockSparsityPatternBase<SparsityPatternBase>::collect_sizes ()
+{
+  std::vector<size_type > row_sizes (rows);
+  std::vector<size_type > col_sizes (columns);
+
+  // first find out the row sizes
+  // from the first block column
+  for (size_type r=0; r<rows; ++r)
+    row_sizes[r] = sub_objects[r][0]->n_rows();
+  // then check that the following
+  // block columns have the same
+  // sizes
+  for (size_type c=1; c<columns; ++c)
+    for (size_type r=0; r<rows; ++r)
+      Assert (row_sizes[r] == sub_objects[r][c]->n_rows(),
+              ExcIncompatibleRowNumbers (r,0,r,c));
+
+  // finally initialize the row
+  // indices with this array
+  row_indices.reinit (row_sizes);
+
+
+  // then do the same with the columns
+  for (size_type c=0; c<columns; ++c)
+    col_sizes[c] = sub_objects[0][c]->n_cols();
+  for (size_type r=1; r<rows; ++r)
+    for (size_type c=0; c<columns; ++c)
+      Assert (col_sizes[c] == sub_objects[r][c]->n_cols(),
+              ExcIncompatibleRowNumbers (0,c,r,c));
+
+  // finally initialize the row
+  // indices with this array
+  column_indices.reinit (col_sizes);
+}
+
+
+
+template <class SparsityPatternBase>
+void
+BlockSparsityPatternBase<SparsityPatternBase>::compress ()
+{
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=0; j<columns; ++j)
+      sub_objects[i][j]->compress ();
+}
+
+
+
+template <class SparsityPatternBase>
+bool
+BlockSparsityPatternBase<SparsityPatternBase>::empty () const
+{
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=0; j<columns; ++j)
+      if (sub_objects[i][j]->empty () == false)
+        return false;
+  return true;
+}
+
+
+
+template <class SparsityPatternBase>
+typename BlockSparsityPatternBase<SparsityPatternBase>::size_type
+BlockSparsityPatternBase<SparsityPatternBase>::max_entries_per_row () const
+{
+  size_type max_entries = 0;
+  for (size_type block_row=0; block_row<rows; ++block_row)
+    {
+      size_type this_row = 0;
+      for (size_type c=0; c<columns; ++c)
+        this_row += sub_objects[block_row][c]->max_entries_per_row ();
+
+      if (this_row > max_entries)
+        max_entries = this_row;
+    };
+  return max_entries;
+}
+
+
+
+template <class SparsityPatternBase>
+typename BlockSparsityPatternBase<SparsityPatternBase>::size_type
+BlockSparsityPatternBase<SparsityPatternBase>::n_rows () const
+{
+  // only count in first column, since
+  // all rows should be equivalent
+  size_type count = 0;
+  for (size_type r=0; r<rows; ++r)
+    count += sub_objects[r][0]->n_rows();
+  return count;
+}
+
+
+
+template <class SparsityPatternBase>
+typename BlockSparsityPatternBase<SparsityPatternBase>::size_type
+BlockSparsityPatternBase<SparsityPatternBase>::n_cols () const
+{
+  // only count in first row, since
+  // all rows should be equivalent
+  size_type count = 0;
+  for (size_type c=0; c<columns; ++c)
+    count += sub_objects[0][c]->n_cols();
+  return count;
+}
+
+
+
+template <class SparsityPatternBase>
+typename BlockSparsityPatternBase<SparsityPatternBase>::size_type
+BlockSparsityPatternBase<SparsityPatternBase>::n_nonzero_elements () const
+{
+  size_type count = 0;
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=0; j<columns; ++j)
+      count += sub_objects[i][j]->n_nonzero_elements ();
+  return count;
+}
+
+
+
+template <class SparsityPatternBase>
+void
+BlockSparsityPatternBase<SparsityPatternBase>::print(std::ostream &out) const
+{
+  size_type k=0;
+  for (size_type ib=0; ib<n_block_rows(); ++ib)
+    {
+      for (size_type i=0; i<block(ib,0).n_rows(); ++i)
+        {
+          out << '[' << i+k;
+          size_type l=0;
+          for (size_type jb=0; jb<n_block_cols(); ++jb)
+            {
+              const SparsityPatternBase &b = block(ib,jb);
+              for (size_type j=0; j<b.n_cols(); ++j)
+                if (b.exists(i,j))
+                  out << ',' << l+j;
+              l += b.n_cols();
+            }
+          out << ']' << std::endl;
+        }
+      k += block(ib,0).n_rows();
+    }
+}
+
+
+template <>
+void
+BlockSparsityPatternBase<DynamicSparsityPattern>::print(std::ostream &out) const
+{
+  size_type k=0;
+  for (size_type ib=0; ib<n_block_rows(); ++ib)
+    {
+      for (size_type i=0; i<block(ib,0).n_rows(); ++i)
+        {
+          out << '[' << i+k;
+          size_type l=0;
+          for (size_type jb=0; jb<n_block_cols(); ++jb)
+            {
+              const DynamicSparsityPattern &b = block(ib,jb);
+              if (b.row_index_set().size()==0 || b.row_index_set().is_element(i))
+                for (size_type j=0; j<b.n_cols(); ++j)
+                  if (b.exists(i,j))
+                    out << ',' << l+j;
+              l += b.n_cols();
+            }
+          out << ']' << std::endl;
+        }
+      k += block(ib,0).n_rows();
+    }
+}
+
+
+template <class SparsityPatternBase>
+void
+BlockSparsityPatternBase<SparsityPatternBase>::print_gnuplot(std::ostream &out) const
+{
+  size_type k=0;
+  for (size_type ib=0; ib<n_block_rows(); ++ib)
+    {
+      for (size_type i=0; i<block(ib,0).n_rows(); ++i)
+        {
+          size_type l=0;
+          for (size_type jb=0; jb<n_block_cols(); ++jb)
+            {
+              const SparsityPatternBase &b = block(ib,jb);
+              for (size_type j=0; j<b.n_cols(); ++j)
+                if (b.exists(i,j))
+                  out << l+j << " " << -static_cast<signed int>(i+k) << std::endl;
+              l += b.n_cols();
+            }
+        }
+      k += block(ib,0).n_rows();
+    }
+}
+
+
+
+BlockSparsityPattern::BlockSparsityPattern ()
+{}
+
+
+
+BlockSparsityPattern::BlockSparsityPattern (const size_type n_rows,
+                                            const size_type n_columns)
+  :
+  BlockSparsityPatternBase<SparsityPattern>(n_rows,
+                                            n_columns)
+{}
+
+
+void
+BlockSparsityPattern::reinit(
+  const BlockIndices &rows,
+  const BlockIndices &cols,
+  const std::vector<std::vector<unsigned int> > &row_lengths)
+{
+  AssertDimension (row_lengths.size(), cols.size());
+
+  this->reinit(rows.size(), cols.size());
+  for (size_type j=0; j<cols.size(); ++j)
+    for (size_type i=0; i<rows.size(); ++i)
+      {
+        const size_type start = rows.local_to_global(i, 0);
+        const size_type length = rows.block_size(i);
+
+        if (row_lengths[j].size()==1)
+          block(i,j).reinit(rows.block_size(i),
+                            cols.block_size(j), row_lengths[j][0]);
+        else
+          {
+            VectorSlice<const std::vector<unsigned int> >
+            block_rows(row_lengths[j], start, length);
+            block(i,j).reinit(rows.block_size(i),
+                              cols.block_size(j),
+                              block_rows);
+          }
+      }
+  this->collect_sizes();
+  Assert (this->row_indices == rows, ExcInternalError());
+  Assert (this->column_indices == cols, ExcInternalError());
+}
+
+
+bool
+BlockSparsityPattern::is_compressed () const
+{
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=0; j<columns; ++j)
+      if (sub_objects[i][j]->is_compressed () == false)
+        return false;
+  return true;
+}
+
+
+std::size_t
+BlockSparsityPattern::memory_consumption () const
+{
+  std::size_t mem = 0;
+  mem += (MemoryConsumption::memory_consumption (rows) +
+          MemoryConsumption::memory_consumption (columns) +
+          MemoryConsumption::memory_consumption (sub_objects) +
+          MemoryConsumption::memory_consumption (row_indices) +
+          MemoryConsumption::memory_consumption (column_indices));
+  for (size_type r=0; r<rows; ++r)
+    for (size_type c=0; c<columns; ++c)
+      mem += MemoryConsumption::memory_consumption (*sub_objects[r][c]);
+
+  return mem;
+}
+
+
+
+void
+BlockSparsityPattern::copy_from  (const BlockDynamicSparsityPattern &dsp)
+{
+  // delete old content, set block
+  // sizes anew
+  reinit (dsp.n_block_rows(), dsp.n_block_cols());
+
+  // copy over blocks
+  for (size_type i=0; i<n_block_rows(); ++i)
+    for (size_type j=0; j<n_block_cols(); ++j)
+      block(i,j).copy_from (dsp.block(i,j));
+
+  // and finally enquire their new
+  // sizes
+  collect_sizes();
+}
+
+
+
+
+BlockDynamicSparsityPattern::BlockDynamicSparsityPattern ()
+{}
+
+
+
+BlockDynamicSparsityPattern::
+BlockDynamicSparsityPattern (const size_type n_rows,
+                             const size_type n_columns)
+  :
+  BlockSparsityPatternBase<DynamicSparsityPattern>(n_rows,
+                                                   n_columns)
+{}
+
+
+
+BlockDynamicSparsityPattern::
+BlockDynamicSparsityPattern (const std::vector<size_type> &row_indices,
+                             const std::vector<size_type> &col_indices)
+  :
+  BlockSparsityPatternBase<DynamicSparsityPattern>(row_indices.size(),
+                                                   col_indices.size())
+{
+  for (size_type i=0; i<row_indices.size(); ++i)
+    for (size_type j=0; j<col_indices.size(); ++j)
+      this->block(i,j).reinit(row_indices[i],col_indices[j]);
+  this->collect_sizes();
+}
+
+
+BlockDynamicSparsityPattern::
+BlockDynamicSparsityPattern (const std::vector<IndexSet> &partitioning)
+  :
+  BlockSparsityPatternBase<DynamicSparsityPattern>(partitioning.size(),
+                                                   partitioning.size())
+{
+  for (size_type i=0; i<partitioning.size(); ++i)
+    for (size_type j=0; j<partitioning.size(); ++j)
+      this->block(i,j).reinit(partitioning[i].size(),
+                              partitioning[j].size(),
+                              partitioning[i]);
+  this->collect_sizes();
+}
+
+
+BlockDynamicSparsityPattern::
+BlockDynamicSparsityPattern (const BlockIndices &row_indices,
+                             const BlockIndices &col_indices)
+{
+  reinit(row_indices, col_indices);
+}
+
+
+void
+BlockDynamicSparsityPattern::reinit (
+  const std::vector<size_type> &row_block_sizes,
+  const std::vector<size_type> &col_block_sizes)
+{
+  BlockSparsityPatternBase<DynamicSparsityPattern>::
+  reinit(row_block_sizes.size(), col_block_sizes.size());
+  for (size_type i=0; i<row_block_sizes.size(); ++i)
+    for (size_type j=0; j<col_block_sizes.size(); ++j)
+      this->block(i,j).reinit(row_block_sizes[i],col_block_sizes[j]);
+  this->collect_sizes();
+}
+
+void
+BlockDynamicSparsityPattern::reinit (
+  const std::vector< IndexSet > &partitioning)
+{
+  BlockSparsityPatternBase<DynamicSparsityPattern>::
+  reinit(partitioning.size(), partitioning.size());
+  for (size_type i=0; i<partitioning.size(); ++i)
+    for (size_type j=0; j<partitioning.size(); ++j)
+      this->block(i,j).reinit(partitioning[i].size(),
+                              partitioning[j].size(),
+                              partitioning[i]);
+  this->collect_sizes();
+}
+
+void
+BlockDynamicSparsityPattern::reinit (
+  const BlockIndices &row_indices,
+  const BlockIndices &col_indices)
+{
+  BlockSparsityPatternBase<DynamicSparsityPattern>::reinit(row_indices.size(),
+                                                           col_indices.size());
+  for (size_type i=0; i<row_indices.size(); ++i)
+    for (size_type j=0; j<col_indices.size(); ++j)
+      this->block(i,j).reinit(row_indices.block_size(i),
+                              col_indices.block_size(j));
+  this->collect_sizes();
+}
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+namespace TrilinosWrappers
+{
+
+  BlockSparsityPattern::BlockSparsityPattern ()
+  {}
+
+
+
+  BlockSparsityPattern::
+  BlockSparsityPattern (const size_type n_rows,
+                        const size_type n_columns)
+    :
+    dealii::BlockSparsityPatternBase<SparsityPattern>(n_rows,
+                                                      n_columns)
+  {}
+
+
+
+  BlockSparsityPattern::
+  BlockSparsityPattern (const std::vector<size_type> &row_indices,
+                        const std::vector<size_type> &col_indices)
+    :
+    BlockSparsityPatternBase<SparsityPattern>(row_indices.size(),
+                                              col_indices.size())
+  {
+    for (size_type i=0; i<row_indices.size(); ++i)
+      for (size_type j=0; j<col_indices.size(); ++j)
+        this->block(i,j).reinit(row_indices[i],col_indices[j]);
+    this->collect_sizes();
+  }
+
+
+
+  BlockSparsityPattern::
+  BlockSparsityPattern (const std::vector<Epetra_Map> &parallel_partitioning)
+    :
+    BlockSparsityPatternBase<SparsityPattern>
+    (parallel_partitioning.size(),
+     parallel_partitioning.size())
+  {
+    for (size_type i=0; i<parallel_partitioning.size(); ++i)
+      for (size_type j=0; j<parallel_partitioning.size(); ++j)
+        this->block(i,j).reinit(parallel_partitioning[i],
+                                parallel_partitioning[j]);
+    this->collect_sizes();
+  }
+
+
+
+  BlockSparsityPattern::
+  BlockSparsityPattern (const std::vector<IndexSet> &parallel_partitioning,
+                        const MPI_Comm              &communicator)
+    :
+    BlockSparsityPatternBase<SparsityPattern>
+    (parallel_partitioning.size(),
+     parallel_partitioning.size())
+  {
+    for (size_type i=0; i<parallel_partitioning.size(); ++i)
+      for (size_type j=0; j<parallel_partitioning.size(); ++j)
+        this->block(i,j).reinit(parallel_partitioning[i],
+                                parallel_partitioning[j],
+                                communicator);
+    this->collect_sizes();
+  }
+
+
+
+  BlockSparsityPattern::
+  BlockSparsityPattern (const std::vector<IndexSet> &row_parallel_partitioning,
+                        const std::vector<IndexSet> &col_parallel_partitioning,
+                        const std::vector<IndexSet> &writable_rows,
+                        const MPI_Comm              &communicator)
+    :
+    BlockSparsityPatternBase<SparsityPattern>
+    (row_parallel_partitioning.size(),
+     col_parallel_partitioning.size())
+  {
+    for (size_type i=0; i<row_parallel_partitioning.size(); ++i)
+      for (size_type j=0; j<col_parallel_partitioning.size(); ++j)
+        this->block(i,j).reinit(row_parallel_partitioning[i],
+                                col_parallel_partitioning[j],
+                                writable_rows[i],
+                                communicator);
+    this->collect_sizes();
+  }
+
+
+
+  void
+  BlockSparsityPattern::reinit (const std::vector<size_type> &row_block_sizes,
+                                const std::vector<size_type> &col_block_sizes)
+  {
+    dealii::BlockSparsityPatternBase<SparsityPattern>::
+    reinit(row_block_sizes.size(), col_block_sizes.size());
+    for (size_type i=0; i<row_block_sizes.size(); ++i)
+      for (size_type j=0; j<col_block_sizes.size(); ++j)
+        this->block(i,j).reinit(row_block_sizes[i],col_block_sizes[j]);
+    this->collect_sizes();
+  }
+
+
+
+  void
+  BlockSparsityPattern::reinit (const std::vector<Epetra_Map> &parallel_partitioning)
+  {
+    dealii::BlockSparsityPatternBase<SparsityPattern>::
+    reinit(parallel_partitioning.size(),
+           parallel_partitioning.size());
+    for (size_type i=0; i<parallel_partitioning.size(); ++i)
+      for (size_type j=0; j<parallel_partitioning.size(); ++j)
+        this->block(i,j).reinit(parallel_partitioning[i],
+                                parallel_partitioning[j]);
+    this->collect_sizes();
+  }
+
+
+
+  void
+  BlockSparsityPattern::reinit (const std::vector<IndexSet> &parallel_partitioning,
+                                const MPI_Comm &communicator)
+  {
+    dealii::BlockSparsityPatternBase<SparsityPattern>::
+    reinit(parallel_partitioning.size(),
+           parallel_partitioning.size());
+    for (size_type i=0; i<parallel_partitioning.size(); ++i)
+      for (size_type j=0; j<parallel_partitioning.size(); ++j)
+        this->block(i,j).reinit(parallel_partitioning[i],
+                                parallel_partitioning[j],
+                                communicator);
+    this->collect_sizes();
+  }
+
+
+
+  void
+  BlockSparsityPattern::reinit (const std::vector<IndexSet> &row_parallel_partitioning,
+                                const std::vector<IndexSet> &col_parallel_partitioning,
+                                const MPI_Comm &communicator)
+  {
+    dealii::BlockSparsityPatternBase<SparsityPattern>::
+    reinit(row_parallel_partitioning.size(),
+           col_parallel_partitioning.size());
+    for (size_type i=0; i<row_parallel_partitioning.size(); ++i)
+      for (size_type j=0; j<col_parallel_partitioning.size(); ++j)
+        this->block(i,j).reinit(row_parallel_partitioning[i],
+                                col_parallel_partitioning[j],
+                                communicator);
+    this->collect_sizes();
+  }
+
+
+
+  void
+  BlockSparsityPattern::reinit (const std::vector<IndexSet> &row_parallel_partitioning,
+                                const std::vector<IndexSet> &col_parallel_partitioning,
+                                const std::vector<IndexSet> &writable_rows,
+                                const MPI_Comm &communicator)
+  {
+    AssertDimension(writable_rows.size(), row_parallel_partitioning.size());
+    dealii::BlockSparsityPatternBase<SparsityPattern>::
+    reinit(row_parallel_partitioning.size(),
+           col_parallel_partitioning.size());
+    for (size_type i=0; i<row_parallel_partitioning.size(); ++i)
+      for (size_type j=0; j<col_parallel_partitioning.size(); ++j)
+        this->block(i,j).reinit(row_parallel_partitioning[i],
+                                col_parallel_partitioning[j],
+                                writable_rows[i],
+                                communicator);
+    this->collect_sizes();
+  }
+
+}
+
+#endif
+
+template class BlockSparsityPatternBase<SparsityPattern>;
+template class BlockSparsityPatternBase<DynamicSparsityPattern>;
+#ifdef DEAL_II_WITH_TRILINOS
+template class BlockSparsityPatternBase<TrilinosWrappers::SparsityPattern>;
+#endif
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/block_vector.cc b/source/lac/block_vector.cc
new file mode 100644
index 0000000..d1a440f
--- /dev/null
+++ b/source/lac/block_vector.cc
@@ -0,0 +1,33 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/block_vector.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "block_vector.inst"
+
+// these functions can't be generated by the preprocessor since
+// the template arguments need to be different
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+template BlockVector<double>::BlockVector (const BlockVector<float> &);
+template BlockVector<float>::BlockVector (const BlockVector<double> &);
+
+template BlockVector<std::complex<double> >::BlockVector (const BlockVector<std::complex<float> > &);
+template BlockVector<std::complex<float> >::BlockVector (const BlockVector<std::complex<double> > &);
+#endif
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/block_vector.inst.in b/source/lac/block_vector.inst.in
new file mode 100644
index 0000000..4e69f2d
--- /dev/null
+++ b/source/lac/block_vector.inst.in
@@ -0,0 +1,39 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S : REAL_SCALARS)
+  {
+    template class BlockVector<S>;
+  }
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template void BlockVector<S1>::reinit<S2>(const BlockVector<S2>&,
+					      const bool);
+  }
+
+
+for (S : COMPLEX_SCALARS)
+  {
+    template class BlockVector<S>;
+  }
+
+for (S1, S2 : COMPLEX_SCALARS)
+  {
+    template void BlockVector<S1>::reinit<S2>(const BlockVector<S2>&,
+					      const bool);
+  }
diff --git a/source/lac/chunk_sparse_matrix.cc b/source/lac/chunk_sparse_matrix.cc
new file mode 100644
index 0000000..cc3d321
--- /dev/null
+++ b/source/lac/chunk_sparse_matrix.cc
@@ -0,0 +1,21 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/chunk_sparse_matrix.templates.h>
+#include <deal.II/lac/block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+#include "chunk_sparse_matrix.inst"
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/chunk_sparse_matrix.inst.in b/source/lac/chunk_sparse_matrix.inst.in
new file mode 100644
index 0000000..b951fc0
--- /dev/null
+++ b/source/lac/chunk_sparse_matrix.inst.in
@@ -0,0 +1,228 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+// real instantiations
+
+for (S : REAL_SCALARS)
+  {
+    template class ChunkSparseMatrix<S>;
+  }
+
+
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template ChunkSparseMatrix<S1> &
+      ChunkSparseMatrix<S1>::copy_from<S2> (const ChunkSparseMatrix<S2> &);
+
+    template 
+      void ChunkSparseMatrix<S1>::copy_from<S2> (const FullMatrix<S2> &);
+
+    template void ChunkSparseMatrix<S1>::add<S2> (const S1,
+					     const ChunkSparseMatrix<S2> &);
+  }
+
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template S2
+      ChunkSparseMatrix<S1>::
+      matrix_norm_square<S2> (const Vector<S2> &) const;
+
+    template S2
+      ChunkSparseMatrix<S1>::
+      matrix_scalar_product<S2> (const Vector<S2> &,
+				 const Vector<S2> &) const;
+
+    template S2 ChunkSparseMatrix<S1>::
+      residual<S2> (Vector<S2> &,
+		    const Vector<S2> &,
+		    const Vector<S2> &) const;
+
+    template void ChunkSparseMatrix<S1>::
+      precondition_SSOR<S2> (Vector<S2> &,
+			     const Vector<S2> &,
+			     const S1) const;
+
+    template void ChunkSparseMatrix<S1>::
+      precondition_SOR<S2> (Vector<S2> &,
+			    const Vector<S2> &,
+			    const S1) const;
+
+    template void ChunkSparseMatrix<S1>::
+      precondition_TSOR<S2> (Vector<S2> &,
+			     const Vector<S2> &,
+			     const S1) const;
+
+    template void ChunkSparseMatrix<S1>::
+      precondition_Jacobi<S2> (Vector<S2> &,
+			       const Vector<S2> &,
+			       const S1) const;
+
+    template void ChunkSparseMatrix<S1>::
+      SOR<S2> (Vector<S2> &,
+	       const S1) const;
+    template void ChunkSparseMatrix<S1>::
+      TSOR<S2> (Vector<S2> &,
+		const S1) const;
+    template void ChunkSparseMatrix<S1>::
+      SSOR<S2> (Vector<S2> &,
+		const S1) const;
+    template void ChunkSparseMatrix<S1>::
+      PSOR<S2> (Vector<S2> &,
+		const std::vector<types::global_dof_index>&,
+		const std::vector<types::global_dof_index>&,
+		const S1) const;
+    template void ChunkSparseMatrix<S1>::
+      TPSOR<S2> (Vector<S2> &,
+		 const std::vector<types::global_dof_index>&,
+		 const std::vector<types::global_dof_index>&,
+		 const S1) const;
+    template void ChunkSparseMatrix<S1>::
+      SOR_step<S2> (Vector<S2> &,
+		    const Vector<S2> &,
+		    const S1) const;
+    template void ChunkSparseMatrix<S1>::
+      TSOR_step<S2> (Vector<S2> &,
+		     const Vector<S2> &,
+		     const S1) const;
+    template void ChunkSparseMatrix<S1>::
+      SSOR_step<S2> (Vector<S2> &,
+		     const Vector<S2> &, 
+		     const S1) const;
+  }
+
+
+for (S1, S2, S3 : REAL_SCALARS;
+     V1, V2     : DEAL_II_VEC_TEMPLATES)
+  {
+    template void ChunkSparseMatrix<S1>::
+      vmult (V1<S2> &, const V2<S3> &) const;
+    template void ChunkSparseMatrix<S1>::
+      Tvmult (V1<S2> &, const V2<S3> &) const;
+    template void ChunkSparseMatrix<S1>::
+      vmult_add (V1<S2> &, const V2<S3> &) const;
+    template void ChunkSparseMatrix<S1>::
+      Tvmult_add (V1<S2> &, const V2<S3> &) const;
+  }
+
+
+
+// complex instantiations
+
+// for (S : COMPLEX_SCALARS)
+//   {
+//     template class ChunkSparseMatrix<S>;
+//   }
+
+
+
+// for (S1, S2 : COMPLEX_SCALARS)
+//   {
+//     template ChunkSparseMatrix<S1> &
+//       ChunkSparseMatrix<S1>::copy_from<S2> (const ChunkSparseMatrix<S2> &);
+
+//     template 
+//       void ChunkSparseMatrix<S1>::copy_from<S2> (const FullMatrix<S2> &);
+
+//     template void ChunkSparseMatrix<S1>::add<S2> (const S1,
+// 					     const ChunkSparseMatrix<S2> &);
+//   }
+
+
+// for (S1, S2 : COMPLEX_SCALARS)
+//   {
+//     template S2
+//       ChunkSparseMatrix<S1>::
+//       matrix_norm_square<S2> (const Vector<S2> &) const;
+
+//     template S2
+//       ChunkSparseMatrix<S1>::
+//       matrix_scalar_product<S2> (const Vector<S2> &,
+// 				 const Vector<S2> &) const;
+
+//     template S2 ChunkSparseMatrix<S1>::
+//       residual<S2> (Vector<S2> &,
+// 		    const Vector<S2> &,
+// 		    const Vector<S2> &) const;
+
+//     template void ChunkSparseMatrix<S1>::
+//       precondition_SSOR<S2> (Vector<S2> &,
+// 			     const Vector<S2> &,
+// 			     const S1) const;
+
+//     template void ChunkSparseMatrix<S1>::
+//       precondition_SOR<S2> (Vector<S2> &,
+// 			    const Vector<S2> &,
+// 			    const S1) const;
+
+//     template void ChunkSparseMatrix<S1>::
+//       precondition_TSOR<S2> (Vector<S2> &,
+// 			     const Vector<S2> &,
+// 			     const S1) const;
+
+//     template void ChunkSparseMatrix<S1>::
+//       precondition_Jacobi<S2> (Vector<S2> &,
+// 			       const Vector<S2> &,
+// 			       const S1) const;
+
+//     template void ChunkSparseMatrix<S1>::
+//       SOR<S2> (Vector<S2> &,
+// 	       const S1) const;
+//     template void ChunkSparseMatrix<S1>::
+//       TSOR<S2> (Vector<S2> &,
+// 		const S1) const;
+//     template void ChunkSparseMatrix<S1>::
+//       SSOR<S2> (Vector<S2> &,
+// 		const S1) const;
+//     template void ChunkSparseMatrix<S1>::
+//       PSOR<S2> (Vector<S2> &,
+// 		const std::vector<types::global_dof_index>&,
+// 		const std::vector<types::global_dof_index>&,
+// 		const S1) const;
+//     template void ChunkSparseMatrix<S1>::
+//       TPSOR<S2> (Vector<S2> &,
+// 		 const std::vector<types::global_dof_index>&,
+// 		 const std::vector<types::global_dof_index>&,
+// 		 const S1) const;
+//     template void ChunkSparseMatrix<S1>::
+//       SOR_step<S2> (Vector<S2> &,
+// 		    const Vector<S2> &,
+// 		    const S1) const;
+//     template void ChunkSparseMatrix<S1>::
+//       TSOR_step<S2> (Vector<S2> &,
+// 		     const Vector<S2> &,
+// 		     const S1) const;
+//     template void ChunkSparseMatrix<S1>::
+//       SSOR_step<S2> (Vector<S2> &,
+// 		     const Vector<S2> &, 
+// 		     const S1) const;
+//   }
+
+
+// for (S1, S2, S3 : COMPLEX_SCALARS;
+//      V1, V2     : DEAL_II_VEC_TEMPLATES)
+//   {
+//     template void ChunkSparseMatrix<S1>::
+//       vmult (V1<S2> &, const V2<S3> &) const;
+//     template void ChunkSparseMatrix<S1>::
+//       Tvmult (V1<S2> &, const V2<S3> &) const;
+//     template void ChunkSparseMatrix<S1>::
+//       vmult_add (V1<S2> &, const V2<S3> &) const;
+//     template void ChunkSparseMatrix<S1>::
+//       Tvmult_add (V1<S2> &, const V2<S3> &) const;
+//   }
diff --git a/source/lac/chunk_sparsity_pattern.cc b/source/lac/chunk_sparsity_pattern.cc
new file mode 100644
index 0000000..1f31479
--- /dev/null
+++ b/source/lac/chunk_sparsity_pattern.cc
@@ -0,0 +1,641 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/chunk_sparsity_pattern.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/full_matrix.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+ChunkSparsityPattern::ChunkSparsityPattern ()
+{
+  reinit (0,0,0,1);
+}
+
+
+
+ChunkSparsityPattern::ChunkSparsityPattern (const ChunkSparsityPattern &s)
+  :
+  Subscriptor(),
+  chunk_size (s.chunk_size),
+  sparsity_pattern(s.sparsity_pattern)
+{
+  Assert (s.rows == 0, ExcInvalidConstructorCall());
+  Assert (s.cols == 0, ExcInvalidConstructorCall());
+
+  reinit (0,0,0,0);
+}
+
+
+
+ChunkSparsityPattern::ChunkSparsityPattern (const size_type m,
+                                            const size_type n,
+                                            const size_type max_per_row,
+                                            const size_type chunk_size)
+{
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+
+  reinit (m,n,max_per_row, chunk_size);
+}
+
+
+
+ChunkSparsityPattern::ChunkSparsityPattern (
+  const size_type m,
+  const size_type n,
+  const std::vector<size_type> &row_lengths,
+  const size_type chunk_size)
+{
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+
+  reinit (m, n, row_lengths, chunk_size);
+}
+
+
+
+ChunkSparsityPattern::ChunkSparsityPattern (const size_type n,
+                                            const size_type max_per_row,
+                                            const size_type chunk_size)
+{
+  reinit (n, n, max_per_row, chunk_size);
+}
+
+
+
+ChunkSparsityPattern::ChunkSparsityPattern (
+  const size_type                m,
+  const std::vector<size_type > &row_lengths,
+  const size_type chunk_size)
+{
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+
+  reinit (m, m, row_lengths, chunk_size);
+}
+
+
+
+ChunkSparsityPattern::~ChunkSparsityPattern ()
+{}
+
+
+
+ChunkSparsityPattern &
+ChunkSparsityPattern::operator = (const ChunkSparsityPattern &s)
+{
+  Assert (s.rows == 0, ExcInvalidConstructorCall());
+  Assert (s.cols == 0, ExcInvalidConstructorCall());
+
+  // perform the checks in the underlying object as well
+  sparsity_pattern = s.sparsity_pattern;
+
+  return *this;
+}
+
+
+
+void
+ChunkSparsityPattern::reinit (const size_type m,
+                              const size_type n,
+                              const size_type max_per_row,
+                              const size_type chunk_size)
+{
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+
+  // simply map this function to the other @p{reinit} function
+  const std::vector<size_type> row_lengths (m, max_per_row);
+  reinit (m, n, row_lengths, chunk_size);
+}
+
+
+
+void
+ChunkSparsityPattern::reinit (
+  const size_type m,
+  const size_type n,
+  const VectorSlice<const std::vector<size_type> > &row_lengths,
+  const size_type chunk_size)
+{
+  Assert (row_lengths.size() == m, ExcInvalidNumber (m));
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+
+  rows = m;
+  cols = n;
+
+  this->chunk_size = chunk_size;
+
+  // pass down to the necessary information to the underlying object. we need
+  // to calculate how many chunks we need: we need to round up (m/chunk_size)
+  // and (n/chunk_size). rounding up in integer arithmetic equals
+  // ((m+chunk_size-1)/chunk_size):
+  const size_type m_chunks = (m+chunk_size-1) / chunk_size,
+                  n_chunks = (n+chunk_size-1) / chunk_size;
+
+  // compute the maximum number of chunks in each row. the passed array
+  // denotes the number of entries in each row of the big matrix -- in the
+  // worst case, these are all in independent chunks, so we have to calculate
+  // it as follows (as an example: let chunk_size==2, row_lengths={2,2,...},
+  // and entries in row zero at columns {0,2} and for row one at {4,6} -->
+  // we'll need 4 chunks for the first chunk row!) :
+  std::vector<unsigned int> chunk_row_lengths (m_chunks, 0);
+  for (size_type i=0; i<m; ++i)
+    chunk_row_lengths[i/chunk_size] += row_lengths[i];
+
+  // for the case that the reduced sparsity pattern optimizes the diagonal but
+  // the actual sparsity pattern does not, need to take one more entry in the
+  // row to fit the user-required entry
+  if (m != n && m_chunks == n_chunks)
+    for (unsigned int i=0; i<m_chunks; ++i)
+      ++chunk_row_lengths[i];
+
+  sparsity_pattern.reinit (m_chunks,
+                           n_chunks,
+                           chunk_row_lengths);
+}
+
+
+
+void
+ChunkSparsityPattern::compress ()
+{
+  sparsity_pattern.compress ();
+}
+
+
+
+template <typename SparsityPatternType>
+void
+ChunkSparsityPattern::copy_from (const SparsityPatternType &dsp,
+                                 const size_type            chunk_size)
+{
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+  this->chunk_size = chunk_size;
+  rows = dsp.n_rows();
+  cols = dsp.n_cols();
+
+  // simple case: just use the given sparsity pattern
+  if (chunk_size == 1)
+    {
+      sparsity_pattern.copy_from (dsp);
+      return;
+    }
+
+  // create a temporary compressed sparsity pattern that collects all entries
+  // from the input sparsity pattern and then initialize the underlying small
+  // sparsity pattern
+  const size_type m_chunks = (dsp.n_rows()+chunk_size-1) / chunk_size,
+                  n_chunks = (dsp.n_cols()+chunk_size-1) / chunk_size;
+  DynamicSparsityPattern temporary_sp(m_chunks, n_chunks);
+
+  for (size_type row = 0; row<dsp.n_rows(); ++row)
+    {
+      const size_type reduced_row = row/chunk_size;
+
+      // TODO: This could be made more efficient if we cached the
+      // previous column and only called add() if the previous and the
+      // current column lead to different chunk columns
+      for (typename SparsityPatternType::iterator col_num = dsp.begin(row);
+           col_num != dsp.end(row); ++col_num)
+        temporary_sp.add (reduced_row, col_num->column()/chunk_size);
+    }
+
+  sparsity_pattern.copy_from (temporary_sp);
+}
+
+
+
+
+template <typename number>
+void ChunkSparsityPattern::copy_from (const FullMatrix<number> &matrix,
+                                      const size_type chunk_size)
+{
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+
+  // count number of entries per row, then initialize the underlying sparsity
+  // pattern. remember to also allocate space for the diagonal entry (if that
+  // hasn't happened yet) if m==n since we always allocate that for diagonal
+  // matrices
+  std::vector<size_type> entries_per_row (matrix.m(), 0);
+  for (size_type row=0; row<matrix.m(); ++row)
+    {
+      for (size_type col=0; col<matrix.n(); ++col)
+        if (matrix(row,col) != 0)
+          ++entries_per_row[row];
+
+      if ((matrix.m() == matrix.n())
+          &&
+          (matrix(row,row) == 0))
+        ++entries_per_row[row];
+    }
+
+  reinit (matrix.m(), matrix.n(),
+          entries_per_row,
+          chunk_size);
+
+  // then actually fill it
+  for (size_type row=0; row<matrix.m(); ++row)
+    for (size_type col=0; col<matrix.n(); ++col)
+      if (matrix(row,col) != 0)
+        add (row,col);
+
+  // finally compress
+  compress ();
+}
+
+
+
+void
+ChunkSparsityPattern::reinit (
+  const size_type m,
+  const size_type n,
+  const std::vector<size_type> &row_lengths,
+  const size_type chunk_size)
+{
+  Assert (chunk_size > 0, ExcInvalidNumber (chunk_size));
+
+  reinit(m, n, make_slice(row_lengths), chunk_size);
+}
+
+
+
+namespace internal
+{
+  namespace
+  {
+    template <typename SparsityPatternType>
+    void copy_sparsity (const SparsityPatternType &src,
+                        SparsityPattern           &dst)
+    {
+      dst.copy_from(src);
+    }
+
+    void copy_sparsity (const SparsityPattern &src,
+                        SparsityPattern       &dst)
+    {
+      dst = src;
+    }
+  }
+}
+
+
+
+template <typename Sparsity>
+void
+ChunkSparsityPattern::create_from
+(const unsigned int m,
+ const unsigned int n,
+ const Sparsity    &sparsity_pattern_for_chunks,
+ const unsigned int chunk_size_in,
+ const bool)
+{
+  Assert (m > (sparsity_pattern_for_chunks.n_rows()-1) * chunk_size_in &&
+          m <= sparsity_pattern_for_chunks.n_rows() * chunk_size_in,
+          ExcMessage("Number of rows m is not compatible with chunk size "
+                     "and number of rows in sparsity pattern for the chunks."));
+  Assert (n > (sparsity_pattern_for_chunks.n_cols()-1) * chunk_size_in &&
+          n <= sparsity_pattern_for_chunks.n_cols() * chunk_size_in,
+          ExcMessage("Number of columns m is not compatible with chunk size "
+                     "and number of columns in sparsity pattern for the chunks."));
+
+  internal::copy_sparsity(sparsity_pattern_for_chunks, sparsity_pattern);
+  chunk_size = chunk_size_in;
+  rows = m;
+  cols = n;
+}
+
+
+
+bool
+ChunkSparsityPattern::empty () const
+{
+  return sparsity_pattern.empty();
+}
+
+
+
+ChunkSparsityPattern::size_type
+ChunkSparsityPattern::max_entries_per_row () const
+{
+  return sparsity_pattern.max_entries_per_row() * chunk_size;
+}
+
+
+
+void
+ChunkSparsityPattern::add (const size_type i,
+                           const size_type j)
+{
+  Assert (i<rows, ExcInvalidIndex(i,rows));
+  Assert (j<cols, ExcInvalidIndex(j,cols));
+
+  sparsity_pattern.add (i/chunk_size, j/chunk_size);
+}
+
+
+bool
+ChunkSparsityPattern::exists (const size_type i,
+                              const size_type j) const
+{
+  Assert (i<rows, ExcIndexRange(i,0,rows));
+  Assert (j<cols, ExcIndexRange(j,0,cols));
+
+  return sparsity_pattern.exists (i/chunk_size,
+                                  j/chunk_size);
+}
+
+
+
+void
+ChunkSparsityPattern::symmetrize ()
+{
+  // matrix must be square. note that the for some matrix sizes, the current
+  // sparsity pattern may not be square even if the underlying sparsity
+  // pattern is (e.g. a 10x11 matrix with chunk_size 4)
+  Assert (rows==cols, ExcNotQuadratic());
+
+  sparsity_pattern.symmetrize ();
+}
+
+
+
+ChunkSparsityPattern::size_type
+ChunkSparsityPattern::row_length (const size_type i) const
+{
+  Assert (i<rows, ExcIndexRange(i,0,rows));
+
+  // find out if we did padding and if this row is affected by it
+  if (n_cols() % chunk_size == 0)
+    return sparsity_pattern.row_length (i/chunk_size) * chunk_size;
+  else
+    // if columns don't align, then just iterate over all chunks and see
+    // what this leads to
+    {
+      SparsityPattern::const_iterator p = sparsity_pattern.begin(i/chunk_size),
+                                      end = sparsity_pattern.end(i/chunk_size);
+      unsigned int n = 0;
+      for ( ; p != end; ++p)
+        if (p->column() != sparsity_pattern.n_cols() - 1)
+          n += chunk_size;
+        else
+          n += (n_cols() % chunk_size);
+      return n;
+    }
+}
+
+
+
+ChunkSparsityPattern::size_type
+ChunkSparsityPattern::n_nonzero_elements () const
+{
+  if ((n_rows() % chunk_size == 0)
+      &&
+      (n_cols() % chunk_size == 0))
+    return (sparsity_pattern.n_nonzero_elements() *
+            chunk_size *
+            chunk_size);
+  else
+    // some of the chunks reach beyond the extent of this matrix. this
+    // requires a somewhat more complicated computations, in particular if the
+    // columns don't align
+    {
+      if ((n_rows() % chunk_size != 0)
+          &&
+          (n_cols() % chunk_size == 0))
+        {
+          // columns align with chunks, but not rows
+          size_type n = sparsity_pattern.n_nonzero_elements() *
+                        chunk_size *
+                        chunk_size;
+          n -= (sparsity_pattern.n_rows() * chunk_size - n_rows()) *
+               sparsity_pattern.row_length(sparsity_pattern.n_rows()-1) *
+               chunk_size;
+          return n;
+        }
+
+      else
+        {
+          // if columns don't align, then just iterate over all chunks and see
+          // what this leads to. follow the advice in the documentation of the
+          // sparsity pattern iterators to do the loop over individual rows,
+          // rather than all elements
+          size_type n = 0;
+
+          for (size_type row = 0; row < sparsity_pattern.n_rows(); ++row)
+            {
+              SparsityPattern::const_iterator p = sparsity_pattern.begin(row);
+              for (; p!=sparsity_pattern.end(row); ++p)
+                if ((row != sparsity_pattern.n_rows() - 1)
+                    &&
+                    (p->column() != sparsity_pattern.n_cols() - 1))
+                  n += chunk_size * chunk_size;
+                else if ((row == sparsity_pattern.n_rows() - 1)
+                         &&
+                         (p->column() != sparsity_pattern.n_cols() - 1))
+                  // last chunk row, but not last chunk column. only a smaller
+                  // number (n_rows % chunk_size) of rows actually exist
+                  n += (n_rows() % chunk_size) * chunk_size;
+                else if ((row != sparsity_pattern.n_rows() - 1)
+                         &&
+                         (p->column() == sparsity_pattern.n_cols() - 1))
+                  // last chunk column, but not row
+                  n += (n_cols() % chunk_size) * chunk_size;
+                else
+                  // bottom right chunk
+                  n += (n_cols() % chunk_size) *
+                       (n_rows() % chunk_size);
+            }
+
+          return n;
+        }
+    }
+}
+
+
+
+void
+ChunkSparsityPattern::print (std::ostream &out) const
+{
+  Assert ((sparsity_pattern.rowstart!=0) && (sparsity_pattern.colnums!=0),
+          ExcEmptyObject());
+
+  AssertThrow (out, ExcIO());
+
+  for (size_type i=0; i<sparsity_pattern.rows; ++i)
+    for (size_type d=0;
+         (d<chunk_size) && (i*chunk_size + d < n_rows());
+         ++d)
+      {
+        out << '[' << i *chunk_size+d;
+        for (size_type j=sparsity_pattern.rowstart[i];
+             j<sparsity_pattern.rowstart[i+1]; ++j)
+          if (sparsity_pattern.colnums[j] != sparsity_pattern.invalid_entry)
+            for (size_type e=0;
+                 ((e<chunk_size) &&
+                  (sparsity_pattern.colnums[j]*chunk_size + e < n_cols()));
+                 ++e)
+              out << ',' << sparsity_pattern.colnums[j]*chunk_size+e;
+        out << ']' << std::endl;
+      }
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+void
+ChunkSparsityPattern::print_gnuplot (std::ostream &out) const
+{
+  Assert ((sparsity_pattern.rowstart!=0) &&
+          (sparsity_pattern.colnums!=0), ExcEmptyObject());
+
+  AssertThrow (out, ExcIO());
+
+  // for each entry in the underlying sparsity pattern, repeat everything
+  // chunk_size x chunk_size times
+  for (size_type i=0; i<sparsity_pattern.rows; ++i)
+    for (size_type j=sparsity_pattern.rowstart[i];
+         j<sparsity_pattern.rowstart[i+1]; ++j)
+      if (sparsity_pattern.colnums[j] != sparsity_pattern.invalid_entry)
+        for (size_type d=0;
+             ((d<chunk_size) &&
+              (sparsity_pattern.colnums[j]*chunk_size+d < n_cols()));
+             ++d)
+          for (size_type e=0;
+               (e<chunk_size) && (i*chunk_size + e < n_rows());
+               ++e)
+            // while matrix entries are usually written (i,j), with i vertical
+            // and j horizontal, gnuplot output is x-y, that is we have to
+            // exchange the order of output
+            out << sparsity_pattern.colnums[j]*chunk_size+d << " "
+                << -static_cast<signed int>(i*chunk_size+e)
+                << std::endl;
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+ChunkSparsityPattern::size_type
+ChunkSparsityPattern::bandwidth () const
+{
+  // calculate the bandwidth from that of the underlying sparsity
+  // pattern. note that even if the bandwidth of that is zero, then the
+  // bandwidth of the chunky pattern is chunk_size-1, if it is 1 then the
+  // chunky pattern has chunk_size+(chunk_size-1), etc
+  //
+  // we'll cut it off at max(n(),m())
+  return std::min (sparsity_pattern.bandwidth()*chunk_size
+                   + (chunk_size-1),
+                   std::max(n_rows(), n_cols()));
+}
+
+
+
+bool
+ChunkSparsityPattern::stores_only_added_elements () const
+{
+  if (chunk_size == 1)
+    return sparsity_pattern.stores_only_added_elements ();
+  else
+    return false;
+}
+
+
+
+void
+ChunkSparsityPattern::block_write (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // first the simple objects, bracketed in [...]
+  out << '['
+      << rows << ' '
+      << cols << ' '
+      << chunk_size << ' '
+      << "][";
+  // then the underlying sparsity pattern
+  sparsity_pattern.block_write (out);
+  out << ']';
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+void
+ChunkSparsityPattern::block_read (std::istream &in)
+{
+  AssertThrow (in, ExcIO());
+
+  char c;
+
+  // first read in simple data
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+  in >> rows
+     >> cols
+     >> chunk_size;
+
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+
+  // then read the underlying sparsity pattern
+  sparsity_pattern.block_read (in);
+
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+}
+
+
+
+std::size_t
+ChunkSparsityPattern::memory_consumption () const
+{
+  return (sizeof(*this) +
+          sparsity_pattern.memory_consumption());
+}
+
+
+
+// explicit instantiations
+template
+void ChunkSparsityPattern::copy_from<DynamicSparsityPattern> (const DynamicSparsityPattern &,
+    const size_type);
+template
+void ChunkSparsityPattern::create_from<SparsityPattern>
+(const unsigned int,
+ const unsigned int,
+ const SparsityPattern &,
+ const unsigned int,
+ const bool);
+template
+void ChunkSparsityPattern::create_from<DynamicSparsityPattern>
+(const unsigned int,
+ const unsigned int,
+ const DynamicSparsityPattern &,
+ const unsigned int,
+ const bool);
+template
+void ChunkSparsityPattern::copy_from<float> (const FullMatrix<float> &,
+                                             const size_type);
+template
+void ChunkSparsityPattern::copy_from<double> (const FullMatrix<double> &,
+                                              const size_type);
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/constraint_matrix.cc b/source/lac/constraint_matrix.cc
new file mode 100644
index 0000000..d105f48
--- /dev/null
+++ b/source/lac/constraint_matrix.cc
@@ -0,0 +1,1414 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/constraint_matrix.templates.h>
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/sparse_matrix_ez.h>
+#include <deal.II/lac/chunk_sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix_ez.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_block_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/matrix_block.h>
+
+#include <algorithm>
+#include <numeric>
+#include <set>
+#include <ostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+// Static member variable
+const Table<2,bool> ConstraintMatrix::default_empty_table = Table<2,bool>();
+
+
+
+bool
+ConstraintMatrix::check_zero_weight (const std::pair<size_type, double> &p)
+{
+  return (p.second == 0);
+}
+
+
+
+bool
+ConstraintMatrix::ConstraintLine::operator < (const ConstraintLine &a) const
+{
+  return line < a.line;
+}
+
+
+
+bool
+ConstraintMatrix::ConstraintLine::operator == (const ConstraintLine &a) const
+{
+  return line == a.line;
+}
+
+
+
+std::size_t
+ConstraintMatrix::ConstraintLine::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (line) +
+          MemoryConsumption::memory_consumption (entries) +
+          MemoryConsumption::memory_consumption (inhomogeneity));
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const std::set<size_type> &lines)
+{
+  for (std::set<size_type>::const_iterator
+       i = lines.begin(); i != lines.end(); ++i)
+    add_line (*i);
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const std::vector<bool> &lines)
+{
+  for (size_type i=0; i<lines.size(); ++i)
+    if (lines[i] == true)
+      add_line (i);
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const IndexSet &lines)
+{
+  for (size_type i=0; i<lines.n_elements(); ++i)
+    add_line (lines.nth_index_in_set(i));
+}
+
+
+
+void
+ConstraintMatrix::add_entries
+(const size_type                                  line,
+ const std::vector<std::pair<size_type,double> > &col_val_pairs)
+{
+  Assert (sorted==false, ExcMatrixIsClosed());
+  Assert (is_constrained(line), ExcLineInexistant(line));
+
+  ConstraintLine *line_ptr = &lines[lines_cache[calculate_line_index(line)]];
+  Assert (line_ptr->line == line, ExcInternalError());
+
+  // if in debug mode, check whether an entry for this column already
+  // exists and if its the same as the one entered at present
+  //
+  // in any case: skip this entry if an entry for this column already
+  // exists, since we don't want to enter it twice
+  for (std::vector<std::pair<size_type,double> >::const_iterator
+       col_val_pair = col_val_pairs.begin();
+       col_val_pair!=col_val_pairs.end(); ++col_val_pair)
+    {
+      Assert (line != col_val_pair->first,
+              ExcMessage ("Can't constrain a degree of freedom to itself"));
+
+      for (ConstraintLine::Entries::const_iterator
+           p=line_ptr->entries.begin();
+           p != line_ptr->entries.end(); ++p)
+        if (p->first == col_val_pair->first)
+          {
+            // entry exists, break innermost loop
+            Assert (p->second == col_val_pair->second,
+                    ExcEntryAlreadyExists(line, col_val_pair->first,
+                                          p->second, col_val_pair->second));
+            break;
+          }
+
+      line_ptr->entries.push_back (*col_val_pair);
+    }
+}
+
+
+
+void ConstraintMatrix::add_selected_constraints
+(const ConstraintMatrix &constraints,
+ const IndexSet         &filter)
+{
+  if (constraints.n_constraints() == 0)
+    return;
+
+  Assert (filter.size() > constraints.lines.back().line,
+          ExcMessage ("Filter needs to be larger than constraint matrix size."));
+  for (std::vector<ConstraintLine>::const_iterator line=constraints.lines.begin();
+       line!=constraints.lines.end(); ++line)
+    if (filter.is_element(line->line))
+      {
+        const size_type row = filter.index_within_set (line->line);
+        add_line (row);
+        set_inhomogeneity (row, line->inhomogeneity);
+        for (size_type i=0; i<line->entries.size(); ++i)
+          if (filter.is_element(line->entries[i].first))
+            add_entry (row, filter.index_within_set (line->entries[i].first),
+                       line->entries[i].second);
+      }
+}
+
+
+
+void ConstraintMatrix::close ()
+{
+  if (sorted == true)
+    return;
+
+  // sort the lines
+  std::sort (lines.begin(), lines.end());
+
+  // update list of pointers and give the vector a sharp size since we
+  // won't modify the size any more after this point.
+  {
+    std::vector<size_type> new_lines (lines_cache.size(),
+                                      numbers::invalid_size_type);
+    size_type counter = 0;
+    for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+         line!=lines.end(); ++line, ++counter)
+      new_lines[calculate_line_index(line->line)] = counter;
+    std::swap (lines_cache, new_lines);
+  }
+
+  // in debug mode: check whether we really set the pointers correctly.
+  for (size_type i=0; i<lines_cache.size(); ++i)
+    if (lines_cache[i] != numbers::invalid_size_type)
+      Assert (i == calculate_line_index(lines[lines_cache[i]].line),
+              ExcInternalError());
+
+  // first, strip zero entries, as we have to do that only once
+  for (std::vector<ConstraintLine>::iterator line = lines.begin();
+       line!=lines.end(); ++line)
+    // first remove zero entries. that would mean that in the linear
+    // constraint for a node, x_i = ax_1 + bx_2 + ..., another node times 0
+    // appears. obviously, 0*something can be omitted
+    line->entries.erase (std::remove_if (line->entries.begin(),
+                                         line->entries.end(),
+                                         &check_zero_weight),
+                         line->entries.end());
+
+
+
+#ifdef DEBUG
+  // In debug mode we are computing an estimate for the maximum number
+  // of constraints so that we can bail out if there is a cycle in the
+  // constraints (which is easier than searching for cycles in the graph).
+  //
+  // Let us figure out the largest dof index. This is an upper bound for the
+  // number of constraints because it is an approximation for the number of dofs
+  // in our system.
+  size_type largest_idx = 0;
+  for (std::vector<ConstraintLine>::iterator line = lines.begin();
+       line!=lines.end(); ++line)
+    {
+      for (ConstraintLine::Entries::iterator it = line->entries.begin(); it!=line->entries.end(); ++it)
+        {
+          largest_idx=std::max(largest_idx, it->first);
+        }
+    }
+#endif
+
+  // replace references to dofs that are themselves constrained. note that
+  // because we may replace references to other dofs that may themselves be
+  // constrained to third ones, we have to iterate over all this until we
+  // replace no chains of constraints any more
+  //
+  // the iteration replaces references to constrained degrees of freedom by
+  // second-order references. for example if x3=x0/2+x2/2 and x2=x0/2+x1/2,
+  // then the new list will be x3=x0/2+x0/4+x1/4. note that x0 appear
+  // twice. we will throw this duplicate out in the following step, where
+  // we sort the list so that throwing out duplicates becomes much more
+  // efficient. also, we have to do it only once, rather than in each
+  // iteration
+  size_type iteration = 0;
+  while (true)
+    {
+      bool chained_constraint_replaced = false;
+
+      for (std::vector<ConstraintLine>::iterator line = lines.begin();
+           line!=lines.end(); ++line)
+        {
+#ifdef DEBUG
+          // we need to keep track of how many replacements we do in this line, because we can
+          // end up in a cycle A->B->C->A without the number of entries growing.
+          size_type n_replacements = 0;
+#endif
+
+          // loop over all entries of this line (including ones that we
+          // have appended in this go around) and see whether they are
+          // further constrained. ignore elements that we don't store on
+          // the current processor
+          size_type entry = 0;
+          while (entry < line->entries.size())
+            if (((local_lines.size() == 0)
+                 ||
+                 (local_lines.is_element(line->entries[entry].first)))
+                &&
+                is_constrained (line->entries[entry].first))
+              {
+                // ok, this entry is further constrained:
+                chained_constraint_replaced = true;
+
+                // look up the chain of constraints for this entry
+                const size_type  dof_index = line->entries[entry].first;
+                const double     weight = line->entries[entry].second;
+
+                Assert (dof_index != line->line,
+                        ExcMessage ("Cycle in constraints detected!"));
+
+                const ConstraintLine *constrained_line =
+                  &lines[lines_cache[calculate_line_index(dof_index)]];
+                Assert (constrained_line->line == dof_index,
+                        ExcInternalError());
+
+                // now we have to replace an entry by its expansion. we do
+                // that by overwriting the entry by the first entry of the
+                // expansion and adding the remaining ones to the end,
+                // where we will later process them once more
+                //
+                // we can of course only do that if the DoF that we are
+                // currently handle is constrained by a linear combination
+                // of other dofs:
+                if (constrained_line->entries.size() > 0)
+                  {
+                    for (size_type i=0; i<constrained_line->entries.size(); ++i)
+                      Assert (dof_index != constrained_line->entries[i].first,
+                              ExcMessage ("Cycle in constraints detected!"));
+
+                    // replace first entry, then tack the rest to the end
+                    // of the list
+                    line->entries[entry] =
+                      std::make_pair (constrained_line->entries[0].first,
+                                      constrained_line->entries[0].second *
+                                      weight);
+
+                    for (size_type i=1; i<constrained_line->entries.size(); ++i)
+                      line->entries
+                      .push_back (std::make_pair (constrained_line->entries[i].first,
+                                                  constrained_line->entries[i].second *
+                                                  weight));
+
+#ifdef DEBUG
+                    // keep track of how many entries we replace in this
+                    // line. If we do more than there are constraints or
+                    // dofs in our system, we must have a cycle.
+                    ++n_replacements;
+                    Assert(n_replacements/2<largest_idx, ExcMessage("Cycle in constraints detected!"));
+                    if (n_replacements/2>=largest_idx)
+                      return; // this enables us to test for this Exception.
+#endif
+                  }
+                else
+                  // the DoF that we encountered is not constrained by a
+                  // linear combination of other dofs but is equal to just
+                  // the inhomogeneity (i.e. its chain of entries is
+                  // empty). in that case, we can't just overwrite the
+                  // current entry, but we have to actually eliminate it
+                  {
+                    line->entries.erase (line->entries.begin()+entry);
+                  }
+
+                line->inhomogeneity += constrained_line->inhomogeneity *
+                                       weight;
+
+                // now that we're here, do not increase index by one but
+                // rather make another pass for the present entry because
+                // we have replaced the present entry by another one, or
+                // because we have deleted it and shifted all following
+                // ones one forward
+              }
+            else
+              // entry not further constrained. just move ahead by one
+              ++entry;
+        }
+
+      // if we didn't do anything in this round, then quit the loop
+      if (chained_constraint_replaced == false)
+        break;
+
+      // increase iteration count. note that we should not iterate more
+      // times than there are constraints, since this puts a natural upper
+      // bound on the length of constraint chains
+      ++iteration;
+      Assert (iteration <= lines.size(), ExcInternalError());
+    }
+
+  // finally sort the entries and re-scale them if necessary. in this step,
+  // we also throw out duplicates as mentioned above. moreover, as some
+  // entries might have had zero weights, we replace them by a vector with
+  // sharp sizes.
+  for (std::vector<ConstraintLine>::iterator line = lines.begin();
+       line!=lines.end(); ++line)
+    {
+      std::sort (line->entries.begin(), line->entries.end());
+
+      // loop over the now sorted list and see whether any of the entries
+      // references the same dofs more than once in order to find how many
+      // non-duplicate entries we have. This lets us allocate the correct
+      // amount of memory for the constraint entries.
+      size_type duplicates = 0;
+      for (size_type i=1; i<line->entries.size(); ++i)
+        if (line->entries[i].first == line->entries[i-1].first)
+          duplicates++;
+
+      if (duplicates > 0 || line->entries.size() < line->entries.capacity())
+        {
+          ConstraintLine::Entries new_entries;
+
+          // if we have no duplicates, copy verbatim the entries. this way,
+          // the final size is of the vector is correct.
+          if (duplicates == 0)
+            new_entries = line->entries;
+          else
+            {
+              // otherwise, we need to go through the list by and and
+              // resolve the duplicates
+              new_entries.reserve (line->entries.size() - duplicates);
+              new_entries.push_back(line->entries[0]);
+              for (size_type j=1; j<line->entries.size(); ++j)
+                if (line->entries[j].first == line->entries[j-1].first)
+                  {
+                    Assert (new_entries.back().first == line->entries[j].first,
+                            ExcInternalError());
+                    new_entries.back().second += line->entries[j].second;
+                  }
+                else
+                  new_entries.push_back (line->entries[j]);
+
+              Assert (new_entries.size() == line->entries.size() - duplicates,
+                      ExcInternalError());
+
+              // make sure there are really no duplicates left and that the
+              // list is still sorted
+              for (size_type j=1; j<new_entries.size(); ++j)
+                {
+                  Assert (new_entries[j].first != new_entries[j-1].first,
+                          ExcInternalError());
+                  Assert (new_entries[j].first > new_entries[j-1].first,
+                          ExcInternalError());
+                }
+            }
+
+          // replace old list of constraints for this dof by the new one
+          line->entries.swap (new_entries);
+        }
+
+      // finally do the following check: if the sum of weights for the
+      // constraints is close to one, but not exactly one, then rescale all
+      // the weights so that they sum up to 1. this adds a little numerical
+      // stability and avoids all sorts of problems where the actual value
+      // is close to, but not quite what we expected
+      //
+      // the case where the weights don't quite sum up happens when we
+      // compute the interpolation weights "on the fly", i.e. not from
+      // precomputed tables. in this case, the interpolation weights are
+      // also subject to round-off
+      double sum = 0;
+      for (size_type i=0; i<line->entries.size(); ++i)
+        sum += line->entries[i].second;
+      if ((sum != 1.0) && (std::fabs (sum-1.) < 1.e-13))
+        {
+          for (size_type i=0; i<line->entries.size(); ++i)
+            line->entries[i].second /= sum;
+          line->inhomogeneity /= sum;
+        }
+    } // end of loop over all constraint lines
+
+#ifdef DEBUG
+  // if in debug mode: check that no dof is constrained to another dof that
+  // is also constrained. exclude dofs from this check whose constraint
+  // lines are not stored on the local processor
+  for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+       line!=lines.end(); ++line)
+    for (ConstraintLine::Entries::const_iterator
+         entry=line->entries.begin();
+         entry!=line->entries.end(); ++entry)
+      if ((local_lines.size() == 0)
+          ||
+          (local_lines.is_element(entry->first)))
+        {
+          // make sure that entry->first is not the index of a line itself
+          const bool is_circle = is_constrained(entry->first);
+          Assert (is_circle == false,
+                  ExcDoFConstrainedToConstrainedDoF(line->line, entry->first));
+        }
+#endif
+
+  sorted = true;
+}
+
+
+
+void
+ConstraintMatrix::merge (const ConstraintMatrix &other_constraints,
+                         const MergeConflictBehavior merge_conflict_behavior)
+{
+  AssertThrow(local_lines == other_constraints.local_lines,
+              ExcNotImplemented());
+
+  // store the previous state with respect to sorting
+  const bool object_was_sorted = sorted;
+  sorted = false;
+
+  if (other_constraints.lines_cache.size() > lines_cache.size())
+    lines_cache.resize(other_constraints.lines_cache.size(),
+                       numbers::invalid_size_type);
+
+  // first action is to fold into the present object possible constraints
+  // in the second object. we don't strictly need to do this any more since
+  // the ConstraintMatrix has learned to deal with chains of constraints in
+  // the close() function, but we have traditionally done this and it's not
+  // overly hard to do.
+  //
+  // for this, loop over all constraints and replace the constraint lines
+  // with a new one where constraints are replaced if necessary.
+  ConstraintLine::Entries tmp;
+  for (std::vector<ConstraintLine>::iterator line=lines.begin();
+       line!=lines.end(); ++line)
+    {
+      tmp.clear ();
+      for (size_type i=0; i<line->entries.size(); ++i)
+        {
+          // if the present dof is not constrained, or if we won't take the
+          // constraint from the other object, then simply copy it over
+          if (other_constraints.is_constrained(line->entries[i].first) == false
+              ||
+              ((merge_conflict_behavior != right_object_wins)
+               &&
+               other_constraints.is_constrained(line->entries[i].first)
+               &&
+               this->is_constrained(line->entries[i].first)))
+            tmp.push_back(line->entries[i]);
+          else
+            // otherwise resolve further constraints by replacing the old
+            // entry by a sequence of new entries taken from the other
+            // object, but with multiplied weights
+            {
+              const ConstraintLine::Entries *other_line
+                = other_constraints.get_constraint_entries (line->entries[i].first);
+              Assert (other_line != 0,
+                      ExcInternalError());
+
+              const double weight = line->entries[i].second;
+
+              for (ConstraintLine::Entries::const_iterator j=other_line->begin();
+                   j!=other_line->end(); ++j)
+                tmp.push_back (std::pair<size_type,double>(j->first,
+                                                           j->second*weight));
+
+              line->inhomogeneity += other_constraints.get_inhomogeneity(line->entries[i].first) *
+                                     weight;
+            }
+        }
+      // finally exchange old and newly resolved line
+      line->entries.swap (tmp);
+    }
+
+
+
+  // next action: append those lines at the end that we want to add
+  for (std::vector<ConstraintLine>::const_iterator
+       line=other_constraints.lines.begin();
+       line!=other_constraints.lines.end(); ++line)
+    if (is_constrained(line->line) == false)
+      lines.push_back (*line);
+    else
+      {
+        // the constrained dof we want to copy from the other object is
+        // also constrained here. let's see what we should do with that
+        switch (merge_conflict_behavior)
+          {
+          case no_conflicts_allowed:
+            AssertThrow (false,
+                         ExcDoFIsConstrainedFromBothObjects (line->line));
+            break;
+
+          case left_object_wins:
+            // ignore this constraint
+            break;
+
+          case right_object_wins:
+            // we need to replace the existing constraint by the one from
+            // the other object
+            lines[lines_cache[calculate_line_index(line->line)]].entries
+              = line->entries;
+            lines[lines_cache[calculate_line_index(line->line)]].inhomogeneity
+              = line->inhomogeneity;
+            break;
+
+          default:
+            Assert (false, ExcNotImplemented());
+          }
+      }
+
+  // update the lines cache
+  size_type counter = 0;
+  for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+       line!=lines.end(); ++line, ++counter)
+    lines_cache[calculate_line_index(line->line)] = counter;
+
+  // if the object was sorted before, then make sure it is so afterward as
+  // well. otherwise leave everything in the unsorted state
+  if (object_was_sorted == true)
+    close ();
+}
+
+
+
+void ConstraintMatrix::shift (const size_type offset)
+{
+  //TODO: this doesn't work with IndexSets yet. [TH]
+  AssertThrow(local_lines.size()==0, ExcNotImplemented());
+
+  lines_cache.insert (lines_cache.begin(), offset,
+                      numbers::invalid_size_type);
+
+  for (std::vector<ConstraintLine>::iterator i = lines.begin();
+       i != lines.end(); ++i)
+    {
+      i->line += offset;
+      for (ConstraintLine::Entries::iterator
+           j = i->entries.begin();
+           j != i->entries.end(); ++j)
+        j->first += offset;
+    }
+}
+
+
+
+void ConstraintMatrix::clear ()
+{
+  {
+    std::vector<ConstraintLine> tmp;
+    lines.swap (tmp);
+  }
+
+  {
+    std::vector<size_type> tmp;
+    lines_cache.swap (tmp);
+  }
+
+  sorted = false;
+}
+
+
+
+void ConstraintMatrix::reinit (const IndexSet &local_constraints)
+{
+  local_lines = local_constraints;
+
+  // make sure the IndexSet is compressed. Otherwise this can lead to crashes
+  // that are hard to find (only happen in release mode).
+  // see tests/mpi/constraint_matrix_crash_01
+  local_lines.compress();
+
+  clear();
+}
+
+
+
+void ConstraintMatrix::condense (SparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.is_compressed() == false, ExcMatrixIsClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(), ExcNotQuadratic());
+
+  // store for each index whether it must be distributed or not. If entry
+  // is numbers::invalid_unsigned_int, no distribution is necessary.
+  // otherwise, the number states which line in the constraint matrix
+  // handles this index
+  std::vector<size_type> distribute(sparsity.n_rows(),
+                                    numbers::invalid_size_type);
+
+  for (size_type c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const size_type n_rows = sparsity.n_rows();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      if (distribute[row] == numbers::invalid_size_type)
+        {
+          // regular line. loop over cols all valid cols. note that this
+          // changes the line we are presently working on: we add additional
+          // entries. these are put to the end of the row. however, as
+          // constrained nodes cannot be constrained to other constrained
+          // nodes, nothing will happen if we run into these added nodes, as
+          // they can't be distributed further. we might store the position of
+          // the last old entry and stop work there, but since operating on
+          // the newly added ones only takes two comparisons (column index
+          // valid, distribute[column] necessarily
+          // ==numbers::invalid_size_type), it is cheaper to not do so and
+          // run right until the end of the line
+          for (SparsityPattern::iterator entry = sparsity.begin(row);
+               ((entry != sparsity.end(row)) &&
+                entry->is_valid_entry());
+               ++entry)
+            {
+              const size_type column = entry->column();
+
+              if (distribute[column] != numbers::invalid_size_type)
+                {
+                  // distribute entry at regular row @p{row} and irregular
+                  // column sparsity.colnums[j]
+                  for (size_type q=0;
+                       q!=lines[distribute[column]].entries.size();
+                       ++q)
+                    sparsity.add (row,
+                                  lines[distribute[column]].entries[q].first);
+                }
+            }
+        }
+      else
+        // row must be distributed. note that here the present row is not
+        // touched (unlike above)
+        {
+          for (SparsityPattern::iterator entry = sparsity.begin(row);
+               (entry != sparsity.end(row)) && entry->is_valid_entry(); ++entry)
+            {
+              const size_type column = entry->column();
+              if (distribute[column] == numbers::invalid_size_type)
+                // distribute entry at irregular row @p{row} and regular
+                // column sparsity.colnums[j]
+                for (size_type q=0;
+                     q!=lines[distribute[row]].entries.size(); ++q)
+                  sparsity.add (lines[distribute[row]].entries[q].first,
+                                column);
+              else
+                // distribute entry at irregular row @p{row} and irregular
+                // column sparsity.get_column_numbers()[j]
+                for (size_type p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                  for (size_type q=0;
+                       q!=lines[distribute[column]].entries.size(); ++q)
+                    sparsity.add (lines[distribute[row]].entries[p].first,
+                                  lines[distribute[column]].entries[q].first);
+            }
+        }
+    }
+
+  sparsity.compress();
+}
+
+
+
+
+void ConstraintMatrix::condense (DynamicSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+
+  // store for each index whether it must be distributed or not. If entry
+  // is numbers::invalid_unsigned_int, no distribution is necessary.
+  // otherwise, the number states which line in the constraint matrix
+  // handles this index
+  std::vector<size_type> distribute(sparsity.n_rows(),
+                                    numbers::invalid_size_type);
+
+  for (size_type c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const size_type n_rows = sparsity.n_rows();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      if (distribute[row] == numbers::invalid_size_type)
+        // regular line. loop over cols. note that as we proceed to
+        // distribute cols, the loop may get longer
+        for (size_type j=0; j<sparsity.row_length(row); ++j)
+          {
+            const size_type column = sparsity.column_number(row,j);
+
+            if (distribute[column] != numbers::invalid_size_type)
+              {
+                // distribute entry at regular row @p{row} and irregular
+                // column column. note that this changes the line we are
+                // presently working on: we add additional entries. if we
+                // add another entry at a column behind the present one, we
+                // will encounter it later on (but since it can't be
+                // further constrained, won't have to do anything about
+                // it). if we add it up front of the present column, we
+                // will find the present column later on again as it was
+                // shifted back (again nothing happens, in particular no
+                // endless loop, as when we encounter it the second time we
+                // won't be able to add more entries as they all already
+                // exist, but we do the same work more often than
+                // necessary, and the loop gets longer), so move the cursor
+                // one to the right in the case that we add an entry up
+                // front that did not exist before. check whether it
+                // existed before by tracking the length of this row
+                size_type old_rowlength = sparsity.row_length(row);
+                for (size_type q=0;
+                     q!=lines[distribute[column]].entries.size();
+                     ++q)
+                  {
+                    const size_type
+                    new_col = lines[distribute[column]].entries[q].first;
+
+                    sparsity.add (row, new_col);
+
+                    const size_type new_rowlength = sparsity.row_length(row);
+                    if ((new_col < column) && (old_rowlength != new_rowlength))
+                      ++j;
+                    old_rowlength = new_rowlength;
+                  };
+              };
+          }
+      else
+        // row must be distributed
+        for (size_type j=0; j<sparsity.row_length(row); ++j)
+          {
+            const size_type column = sparsity.column_number(row,j);
+
+            if (distribute[column] == numbers::invalid_size_type)
+              // distribute entry at irregular row @p{row} and regular
+              // column sparsity.colnums[j]
+              for (size_type q=0;
+                   q!=lines[distribute[row]].entries.size(); ++q)
+                sparsity.add (lines[distribute[row]].entries[q].first,
+                              column);
+            else
+              // distribute entry at irregular row @p{row} and irregular
+              // column sparsity.get_column_numbers()[j]
+              for (size_type p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                for (size_type q=0;
+                     q!=lines[distribute[sparsity.column_number(row,j)]]
+                     .entries.size(); ++q)
+                  sparsity.add (lines[distribute[row]].entries[p].first,
+                                lines[distribute[sparsity.column_number(row,j)]]
+                                .entries[q].first);
+          };
+    };
+}
+
+
+
+void ConstraintMatrix::condense (BlockSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.is_compressed() == false, ExcMatrixIsClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+          ExcNotQuadratic());
+
+  const BlockIndices &
+  index_mapping = sparsity.get_column_indices();
+
+  const size_type n_blocks = sparsity.n_block_rows();
+
+  // store for each index whether it must be distributed or not. If entry
+  // is numbers::invalid_unsigned_int, no distribution is necessary.
+  // otherwise, the number states which line in the constraint matrix
+  // handles this index
+  std::vector<size_type> distribute (sparsity.n_rows(),
+                                     numbers::invalid_size_type);
+
+  for (size_type c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const size_type n_rows = sparsity.n_rows();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      // get index of this row within the blocks
+      const std::pair<size_type,size_type>
+      block_index = index_mapping.global_to_local(row);
+      const size_type block_row = block_index.first;
+
+      if (distribute[row] == numbers::invalid_size_type)
+        // regular line. loop over all columns and see whether this column
+        // must be distributed
+        {
+
+          // to loop over all entries in this row, we have to loop over all
+          // blocks in this blockrow and the corresponding row therein
+          for (size_type block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const SparsityPattern &
+              block_sparsity = sparsity.block(block_row, block_col);
+
+              for (SparsityPattern::const_iterator
+                   entry = block_sparsity.begin(block_index.second);
+                   (entry != block_sparsity.end(block_index.second)) &&
+                   entry->is_valid_entry();
+                   ++entry)
+                {
+                  const size_type global_col
+                    = index_mapping.local_to_global(block_col, entry->column());
+
+                  if (distribute[global_col] != numbers::invalid_size_type)
+                    // distribute entry at regular row @p{row} and
+                    // irregular column global_col
+                    {
+                      for (size_type q=0;
+                           q!=lines[distribute[global_col]].entries.size(); ++q)
+                        sparsity.add (row,
+                                      lines[distribute[global_col]].entries[q].first);
+                    }
+                }
+            }
+        }
+      else
+        {
+          // row must be distributed. split the whole row into the chunks
+          // defined by the blocks
+          for (size_type block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const SparsityPattern &
+              block_sparsity = sparsity.block(block_row,block_col);
+
+              for (SparsityPattern::const_iterator
+                   entry = block_sparsity.begin(block_index.second);
+                   (entry != block_sparsity.end(block_index.second)) &&
+                   entry->is_valid_entry();
+                   ++entry)
+                {
+                  const size_type global_col
+                    = index_mapping.local_to_global (block_col, entry->column());
+
+                  if (distribute[global_col] == numbers::invalid_size_type)
+                    // distribute entry at irregular row @p{row} and
+                    // regular column global_col.
+                    {
+                      for (size_type q=0; q!=lines[distribute[row]].entries.size(); ++q)
+                        sparsity.add (lines[distribute[row]].entries[q].first, global_col);
+                    }
+                  else
+                    // distribute entry at irregular row @p{row} and
+                    // irregular column @p{global_col}
+                    {
+                      for (size_type p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                        for (size_type q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+                          sparsity.add (lines[distribute[row]].entries[p].first,
+                                        lines[distribute[global_col]].entries[q].first);
+                    }
+                }
+            }
+        }
+    }
+
+  sparsity.compress();
+}
+
+
+
+
+void ConstraintMatrix::condense (BlockDynamicSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+          ExcNotQuadratic());
+
+  const BlockIndices &
+  index_mapping = sparsity.get_column_indices();
+
+  const size_type n_blocks = sparsity.n_block_rows();
+
+  // store for each index whether it must be distributed or not. If entry
+  // is numbers::invalid_unsigned_int, no distribution is necessary.
+  // otherwise, the number states which line in the constraint matrix
+  // handles this index
+  std::vector<size_type> distribute (sparsity.n_rows(),
+                                     numbers::invalid_size_type);
+
+  for (size_type c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = static_cast<signed int>(c);
+
+  const size_type n_rows = sparsity.n_rows();
+  for (size_type row=0; row<n_rows; ++row)
+    {
+      // get index of this row within the blocks
+      const std::pair<size_type,size_type>
+      block_index = index_mapping.global_to_local(row);
+      const size_type block_row = block_index.first;
+      const size_type local_row = block_index.second;
+
+      if (distribute[row] == numbers::invalid_size_type)
+        // regular line. loop over all columns and see whether this column
+        // must be distributed. note that as we proceed to distribute cols,
+        // the loop over cols may get longer.
+        //
+        // don't try to be clever here as in the algorithm for the
+        // DynamicSparsityPattern, as that would be much more
+        // complicated here. after all, we know that compressed patterns
+        // are inefficient...
+        {
+
+          // to loop over all entries in this row, we have to loop over all
+          // blocks in this blockrow and the corresponding row therein
+          for (size_type block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const DynamicSparsityPattern &
+              block_sparsity = sparsity.block(block_row, block_col);
+
+              for (size_type j=0; j<block_sparsity.row_length(local_row); ++j)
+                {
+                  const size_type global_col
+                    = index_mapping.local_to_global(block_col,
+                                                    block_sparsity.column_number(local_row,j));
+
+                  if (distribute[global_col] != numbers::invalid_size_type)
+                    // distribute entry at regular row @p{row} and
+                    // irregular column global_col
+                    {
+                      for (size_type q=0;
+                           q!=lines[distribute[global_col]]
+                           .entries.size(); ++q)
+                        sparsity.add (row,
+                                      lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        }
+      else
+        {
+          // row must be distributed. split the whole row into the chunks
+          // defined by the blocks
+          for (size_type block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const DynamicSparsityPattern &
+              block_sparsity = sparsity.block(block_row,block_col);
+
+              for (size_type j=0; j<block_sparsity.row_length(local_row); ++j)
+                {
+                  const size_type global_col
+                    = index_mapping.local_to_global (block_col,
+                                                     block_sparsity.column_number(local_row,j));
+
+                  if (distribute[global_col] == numbers::invalid_size_type)
+                    // distribute entry at irregular row @p{row} and
+                    // regular column global_col.
+                    {
+                      for (size_type q=0;
+                           q!=lines[distribute[row]].entries.size(); ++q)
+                        sparsity.add (lines[distribute[row]].entries[q].first,
+                                      global_col);
+                    }
+                  else
+                    // distribute entry at irregular row @p{row} and
+                    // irregular column @p{global_col}
+                    {
+                      for (size_type p=0;
+                           p!=lines[distribute[row]].entries.size(); ++p)
+                        for (size_type q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+                          sparsity.add (lines[distribute[row]].entries[p].first,
+                                        lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        };
+    };
+}
+
+
+
+bool ConstraintMatrix::is_identity_constrained (const size_type index) const
+{
+  if (is_constrained(index) == false)
+    return false;
+
+  const ConstraintLine &p = lines[lines_cache[calculate_line_index(index)]];
+  Assert (p.line == index, ExcInternalError());
+
+  // return if an entry for this line was found and if it has only one
+  // entry equal to 1.0
+  return ((p.entries.size() == 1) &&
+          (p.entries[0].second == 1.0));
+}
+
+
+bool ConstraintMatrix::are_identity_constrained (const size_type index1,
+                                                 const size_type index2) const
+{
+  if (is_constrained(index1) == true)
+    {
+      const ConstraintLine &p = lines[lines_cache[calculate_line_index(index1)]];
+      Assert (p.line == index1, ExcInternalError());
+
+      // return if an entry for this line was found and if it has only one
+      // entry equal to 1.0 and that one is index2
+      return ((p.entries.size() == 1) &&
+              (p.entries[0].first == index2) &&
+              (p.entries[0].second == 1.0));
+    }
+  else if (is_constrained(index2) == true)
+    {
+      const ConstraintLine &p = lines[lines_cache[calculate_line_index(index2)]];
+      Assert (p.line == index2, ExcInternalError());
+
+      // return if an entry for this line was found and if it has only one
+      // entry equal to 1.0 and that one is index1
+      return ((p.entries.size() == 1) &&
+              (p.entries[0].first == index1) &&
+              (p.entries[0].second == 1.0));
+    }
+  else
+    return false;
+}
+
+
+
+ConstraintMatrix::size_type
+ConstraintMatrix::max_constraint_indirections () const
+{
+  size_type return_value = 0;
+  for (std::vector<ConstraintLine>::const_iterator i=lines.begin();
+       i!=lines.end(); ++i)
+    // use static cast, since typeof(size)==std::size_t, which is !=
+    // size_type on AIX
+    return_value = std::max(return_value,
+                            static_cast<size_type>(i->entries.size()));
+
+  return return_value;
+}
+
+
+
+bool ConstraintMatrix::has_inhomogeneities () const
+{
+  for (std::vector<ConstraintLine>::const_iterator i=lines.begin();
+       i!=lines.end(); ++i)
+    if (i->inhomogeneity != 0.)
+      return true;
+
+  return false;
+}
+
+
+void ConstraintMatrix::print (std::ostream &out) const
+{
+  for (size_type i=0; i!=lines.size(); ++i)
+    {
+      // output the list of constraints as pairs of dofs and their weights
+      if (lines[i].entries.size() > 0)
+        {
+          for (size_type j=0; j<lines[i].entries.size(); ++j)
+            out << "    " << lines[i].line
+                << " " << lines[i].entries[j].first
+                << ":  " << lines[i].entries[j].second << "\n";
+
+          // print out inhomogeneity.
+          if (lines[i].inhomogeneity != 0)
+            out << "    " << lines[i].line
+                << ": " << lines[i].inhomogeneity << "\n";
+        }
+      else
+        // but also output something if the constraint simply reads
+        // x[13]=0, i.e. where the right hand side is not a linear
+        // combination of other dofs
+        {
+          if (lines[i].inhomogeneity != 0)
+            out << "    " << lines[i].line
+                << " = " << lines[i].inhomogeneity
+                << "\n";
+          else
+            out << "    " << lines[i].line << " = 0\n";
+        }
+    }
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+void
+ConstraintMatrix::write_dot (std::ostream &out) const
+{
+  out << "digraph constraints {"
+      << std::endl;
+  for (size_type i=0; i!=lines.size(); ++i)
+    {
+      // same concept as in the previous function
+      if (lines[i].entries.size() > 0)
+        for (size_type j=0; j<lines[i].entries.size(); ++j)
+          out << "  " << lines[i].line << "->" << lines[i].entries[j].first
+              << "; // weight: "
+              << lines[i].entries[j].second
+              << "\n";
+      else
+        out << "  " << lines[i].line << "\n";
+    }
+  out << "}" << std::endl;
+}
+
+
+
+std::size_t
+ConstraintMatrix::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (lines) +
+          MemoryConsumption::memory_consumption (lines_cache) +
+          MemoryConsumption::memory_consumption (sorted) +
+          MemoryConsumption::memory_consumption (local_lines));
+}
+
+
+
+void
+ConstraintMatrix::resolve_indices (std::vector<types::global_dof_index> &indices) const
+{
+  const unsigned int indices_size = indices.size();
+  const std::vector<std::pair<types::global_dof_index,double> > *line_ptr;
+  for (unsigned int i=0; i<indices_size; ++i)
+    {
+      line_ptr = get_constraint_entries(indices[i]);
+      // if the index is constraint, the constraints indices are added to the
+      // indices vector
+      if (line_ptr!=NULL)
+        {
+          const unsigned int line_size = line_ptr->size();
+          for (unsigned int j=0; j<line_size; ++j)
+            indices.push_back((*line_ptr)[j].first);
+        }
+    }
+
+  // keep only the unique elements
+  std::sort(indices.begin(),indices.end());
+  std::vector<types::global_dof_index>::iterator it;
+  it = std::unique(indices.begin(),indices.end());
+  indices.resize(it-indices.begin());
+}
+
+
+
+// explicit instantiations
+//
+// define a list of functions for vectors and matrices, respectively, where
+// the vector/matrix can be replaced using a preprocessor variable
+// VectorType/MatrixType. note that we need a space between "VectorType" and
+// ">" to disambiguate ">>" when VectorType trails in an angle bracket
+
+// TODO: The way we define all the instantiations is probably not the very
+// best one. Try to find a better description.
+
+#define VECTOR_FUNCTIONS(VectorType) \
+  template void ConstraintMatrix::condense<VectorType >(const VectorType &uncondensed,\
+                                                        VectorType       &condensed) const;\
+  template void ConstraintMatrix::condense<VectorType >(VectorType &vec) const;\
+  template void ConstraintMatrix:: \
+  distribute_local_to_global<VectorType > (const Vector<double>            &, \
+                                           const std::vector<ConstraintMatrix::size_type>  &, \
+                                           VectorType                      &, \
+                                           const FullMatrix<double>        &) const
+
+#define PARALLEL_VECTOR_FUNCTIONS(VectorType) \
+  template void ConstraintMatrix:: \
+  distribute_local_to_global<VectorType > (const Vector<double>            &, \
+                                           const std::vector<ConstraintMatrix::size_type>  &, \
+                                           VectorType                      &, \
+                                           const FullMatrix<double>        &) const
+
+
+#ifdef DEAL_II_WITH_PETSC
+VECTOR_FUNCTIONS(PETScWrappers::MPI::Vector);
+VECTOR_FUNCTIONS(PETScWrappers::MPI::BlockVector);
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+PARALLEL_VECTOR_FUNCTIONS(TrilinosWrappers::MPI::Vector);
+PARALLEL_VECTOR_FUNCTIONS(TrilinosWrappers::MPI::BlockVector);
+#endif
+
+#define MATRIX_VECTOR_FUNCTIONS(MatrixType, VectorType) \
+  template void ConstraintMatrix:: \
+  distribute_local_to_global<MatrixType,VectorType > (const FullMatrix<MatrixType::value_type>        &, \
+                                                      const Vector<VectorType::value_type>            &, \
+                                                      const std::vector<ConstraintMatrix::size_type> &, \
+                                                      MatrixType                      &, \
+                                                      VectorType                      &, \
+                                                      bool                             , \
+                                                      internal::bool2type<false>) const
+#define MATRIX_FUNCTIONS(MatrixType) \
+  template void ConstraintMatrix:: \
+  distribute_local_to_global<MatrixType,Vector<MatrixType::value_type> > (const FullMatrix<MatrixType::value_type>        &, \
+      const Vector<MatrixType::value_type>            &, \
+      const std::vector<ConstraintMatrix::size_type> &, \
+      MatrixType                      &, \
+      Vector<MatrixType::value_type>                  &, \
+      bool                             , \
+      internal::bool2type<false>) const
+#define BLOCK_MATRIX_VECTOR_FUNCTIONS(MatrixType, VectorType)   \
+  template void ConstraintMatrix:: \
+  distribute_local_to_global<MatrixType,VectorType > (const FullMatrix<MatrixType::value_type>        &, \
+                                                      const Vector<VectorType::value_type>            &, \
+                                                      const std::vector<ConstraintMatrix::size_type> &, \
+                                                      MatrixType                      &, \
+                                                      VectorType                      &, \
+                                                      bool                             , \
+                                                      internal::bool2type<true>) const
+#define BLOCK_MATRIX_FUNCTIONS(MatrixType)      \
+  template void ConstraintMatrix:: \
+  distribute_local_to_global<MatrixType,Vector<MatrixType::value_type> > (const FullMatrix<MatrixType::value_type>        &, \
+      const Vector<MatrixType::value_type>            &, \
+      const std::vector<ConstraintMatrix::size_type> &, \
+      MatrixType                      &, \
+      Vector<MatrixType::value_type>                  &, \
+      bool                             , \
+      internal::bool2type<true>) const
+
+MATRIX_FUNCTIONS(SparseMatrix<double>);
+MATRIX_FUNCTIONS(SparseMatrix<float>);
+MATRIX_FUNCTIONS(FullMatrix<double>);
+MATRIX_FUNCTIONS(FullMatrix<float>);
+MATRIX_FUNCTIONS(FullMatrix<std::complex<double> >);
+
+BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrix<double>);
+BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrix<float>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<double>, BlockVector<double>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<float>,  BlockVector<float>);
+
+MATRIX_FUNCTIONS(SparseMatrixEZ<double>);
+MATRIX_FUNCTIONS(SparseMatrixEZ<float>);
+MATRIX_FUNCTIONS(ChunkSparseMatrix<double>);
+MATRIX_FUNCTIONS(ChunkSparseMatrix<float>);
+
+// BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrixEZ<double>);
+// BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrixEZ<float>,  Vector<float>);
+
+#ifdef DEAL_II_WITH_PETSC
+MATRIX_FUNCTIONS(PETScWrappers::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(PETScWrappers::BlockSparseMatrix);
+MATRIX_FUNCTIONS(PETScWrappers::MPI::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix);
+MATRIX_VECTOR_FUNCTIONS(PETScWrappers::SparseMatrix, PETScWrappers::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(PETScWrappers::BlockSparseMatrix, PETScWrappers::BlockVector);
+MATRIX_VECTOR_FUNCTIONS(PETScWrappers::MPI::SparseMatrix, PETScWrappers::MPI::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix ,PETScWrappers::MPI::BlockVector);
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+MATRIX_FUNCTIONS(TrilinosWrappers::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix);
+MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::SparseMatrix, TrilinosWrappers::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix, TrilinosWrappers::BlockVector);
+MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::SparseMatrix, TrilinosWrappers::MPI::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix, TrilinosWrappers::MPI::BlockVector);
+#endif
+
+
+#define SPARSITY_FUNCTIONS(SparsityPatternType)                                      \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityPatternType> ( \
+      const std::vector<ConstraintMatrix::size_type> &,                              \
+      SparsityPatternType &,                                                         \
+      const bool,                                                                    \
+      const Table<2,bool> &,                                                         \
+      internal::bool2type<false>) const;                                             \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityPatternType> ( \
+      const std::vector<ConstraintMatrix::size_type> &,                              \
+      const std::vector<ConstraintMatrix::size_type> &,                              \
+      SparsityPatternType &,                                                         \
+      const bool,                                                                    \
+      const Table<2,bool> &) const
+#define BLOCK_SPARSITY_FUNCTIONS(SparsityPatternType)                                \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityPatternType> ( \
+      const std::vector<ConstraintMatrix::size_type> &,                              \
+      SparsityPatternType &,                                                         \
+      const bool,                                                                    \
+      const Table<2,bool> &,                                                         \
+      internal::bool2type<true>) const;                                              \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityPatternType> ( \
+      const std::vector<ConstraintMatrix::size_type> &,                              \
+      const std::vector<ConstraintMatrix::size_type> &,                              \
+      SparsityPatternType &,                                                         \
+      const bool,                                                                    \
+      const Table<2,bool> &) const
+
+SPARSITY_FUNCTIONS(SparsityPattern);
+SPARSITY_FUNCTIONS(DynamicSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockDynamicSparsityPattern);
+
+#ifdef DEAL_II_WITH_TRILINOS
+SPARSITY_FUNCTIONS(TrilinosWrappers::SparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(TrilinosWrappers::BlockSparsityPattern);
+#endif
+
+
+#define ONLY_MATRIX_FUNCTIONS(MatrixType) \
+  template void ConstraintMatrix::distribute_local_to_global<MatrixType > (\
+      const FullMatrix<MatrixType::value_type>        &, \
+      const std::vector<ConstraintMatrix::size_type> &, \
+      const std::vector<ConstraintMatrix::size_type> &, \
+      MatrixType                      &) const
+
+ONLY_MATRIX_FUNCTIONS(FullMatrix<float>);
+ONLY_MATRIX_FUNCTIONS(FullMatrix<double>);
+ONLY_MATRIX_FUNCTIONS(SparseMatrix<float>);
+ONLY_MATRIX_FUNCTIONS(SparseMatrix<double>);
+ONLY_MATRIX_FUNCTIONS(MatrixBlock<SparseMatrix<float> >);
+ONLY_MATRIX_FUNCTIONS(MatrixBlock<SparseMatrix<double> >);
+ONLY_MATRIX_FUNCTIONS(BlockSparseMatrix<float>);
+ONLY_MATRIX_FUNCTIONS(BlockSparseMatrix<double>);
+
+#ifdef DEAL_II_WITH_TRILINOS
+ONLY_MATRIX_FUNCTIONS(TrilinosWrappers::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix);
+#endif
+
+#ifdef DEAL_II_WITH_PETSC
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::BlockSparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::MPI::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix);
+#endif
+
+#include "constraint_matrix.inst"
+
+// allocate scratch data. Cannot use the generic template instantiation
+// because we need to provide an initializer object of type
+// internals::ConstraintMatrixData<Number> that can be passed to the
+// constructor of scratch_data (it won't allow one to be constructed in place).
+namespace internals
+{
+#define SCRATCH_INITIALIZER(Number,Name)                                \
+  ConstraintMatrixData<Number>::ScratchData scratch_data_initializer_##Name; \
+  template<> Threads::ThreadLocalStorage<ConstraintMatrixData<Number>::ScratchData> \
+  ConstraintMatrixData<Number>::scratch_data(scratch_data_initializer_##Name)
+
+  SCRATCH_INITIALIZER(double,double);
+  SCRATCH_INITIALIZER(float,float);
+  SCRATCH_INITIALIZER(long double,ldouble);
+  SCRATCH_INITIALIZER(std::complex<double>,cdouble);
+  SCRATCH_INITIALIZER(std::complex<float>,cfloat);
+  SCRATCH_INITIALIZER(std::complex<long double>,cldouble);
+#undef SCRATCH_INITIALIZER
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/constraint_matrix.inst.in b/source/lac/constraint_matrix.inst.in
new file mode 100644
index 0000000..11418f9
--- /dev/null
+++ b/source/lac/constraint_matrix.inst.in
@@ -0,0 +1,69 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+for (S: REAL_SCALARS; T : DEAL_II_VEC_TEMPLATES)
+  {
+    template void ConstraintMatrix::condense<T<S> >(const T<S> &, T<S> &) const;
+    template void ConstraintMatrix::condense<T<S> >(T<S> &vec) const;
+    template void ConstraintMatrix::distribute_local_to_global<T<S> > (
+      const Vector<double>&, const std::vector<types::global_dof_index> &, T<S> &, const FullMatrix<double>&) const;
+    template void ConstraintMatrix::set_zero<T<S> >(T<S> &) const;
+  }
+
+
+for (S: REAL_SCALARS; T : DEAL_II_VEC_TEMPLATES)
+  {
+    template void ConstraintMatrix::condense<parallel::distributed::T<S> >(const parallel::distributed::T<S> &, parallel::distributed::T<S> &) const;
+    template void ConstraintMatrix::condense<parallel::distributed::T<S> >(parallel::distributed::T<S> &vec) const;
+    template void ConstraintMatrix::distribute_local_to_global<parallel::distributed::T<S> > (
+      const Vector<double>&, const std::vector<types::global_dof_index> &, parallel::distributed::T<S> &, const FullMatrix<double>&) const;
+    template void ConstraintMatrix::set_zero<parallel::distributed::T<S> >(parallel::distributed::T<S> &) const;
+  }
+
+
+for (V: EXTERNAL_SEQUENTIAL_VECTORS)
+  {
+    template void ConstraintMatrix::condense<V >(const V&, V&) const;
+    template void ConstraintMatrix::condense<V >(V&vec) const;
+    template void ConstraintMatrix::distribute_local_to_global<V > (
+      const Vector<double>&, const std::vector<types::global_dof_index> &, V&, const FullMatrix<double>&) const;
+    template void ConstraintMatrix::set_zero<V >(V&) const;
+  }
+
+
+for (V: EXTERNAL_PARALLEL_VECTORS)
+  {
+    template void ConstraintMatrix::set_zero<V >(V&) const;
+  }
+
+
+for (S : REAL_SCALARS)
+  {
+    template void ConstraintMatrix::condense<S>(SparseMatrix<S>&) const;
+    template void ConstraintMatrix::condense<S>(BlockSparseMatrix<S>&) const;
+  }
+
+
+for (S1 : REAL_SCALARS; S2 : REAL_SCALARS)
+  {
+    template void ConstraintMatrix::condense<S1,Vector<S2> >(SparseMatrix<S1>&, Vector<S2>&) const;
+    template void ConstraintMatrix::condense<S1,BlockVector<S2> >(BlockSparseMatrix<S1>&, BlockVector<S2>&) const;
+  }
+
+
+for (Vec : SERIAL_VECTORS)
+  {
+    template void ConstraintMatrix::distribute<Vec>(Vec &) const;
+  }
diff --git a/source/lac/dynamic_sparsity_pattern.cc b/source/lac/dynamic_sparsity_pattern.cc
new file mode 100644
index 0000000..94c3be4
--- /dev/null
+++ b/source/lac/dynamic_sparsity_pattern.cc
@@ -0,0 +1,494 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/base/memory_consumption.h>
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <functional>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <typename ForwardIterator>
+void
+DynamicSparsityPattern::Line::add_entries (ForwardIterator begin,
+                                           ForwardIterator end,
+                                           const bool      indices_are_sorted)
+{
+  const int n_elements = end - begin;
+  if (n_elements <= 0)
+    return;
+
+  const size_type stop_size = entries.size() + n_elements;
+
+  if (indices_are_sorted == true && n_elements > 3)
+    {
+      // in debug mode, check whether the
+      // indices really are sorted.
+#ifdef DEBUG
+      {
+        ForwardIterator test = begin, test1 = begin;
+        ++test1;
+        for ( ; test1 != end; ++test, ++test1)
+          Assert (*test1 > *test, ExcInternalError());
+      }
+#endif
+
+      if (entries.size() == 0 || entries.back() < *begin)
+        {
+          entries.insert(entries.end(), begin, end);
+          return;
+        }
+
+      // find a possible insertion point for
+      // the first entry. check whether the
+      // first entry is a duplicate before
+      // actually doing something.
+      ForwardIterator my_it = begin;
+      size_type col = *my_it;
+      std::vector<size_type>::iterator it =
+        Utilities::lower_bound(entries.begin(), entries.end(), col);
+      while (*it == col)
+        {
+          ++my_it;
+          if (my_it == end)
+            break;
+          col = *my_it;
+          // check the very next entry in the
+          // current array
+          ++it;
+          if (it == entries.end())
+            break;
+          if (*it > col)
+            break;
+          if (*it == col)
+            continue;
+          // ok, it wasn't the very next one, do a
+          // binary search to find the insert point
+          it = Utilities::lower_bound(it, entries.end(), col);
+          if (it == entries.end())
+            break;
+        }
+      // all input entries were duplicates.
+      if (my_it == end)
+        return;
+
+      // resize vector by just inserting the
+      // list
+      const size_type pos1 = it - entries.begin();
+      Assert (pos1 <= entries.size(), ExcInternalError());
+      entries.insert (it, my_it, end);
+      it = entries.begin() + pos1;
+      Assert (entries.size() >= (size_type)(it-entries.begin()), ExcInternalError());
+
+      // now merge the two lists.
+      std::vector<size_type>::iterator it2 = it + (end-my_it);
+
+      // as long as there are indices both in
+      // the end of the entries list and in the
+      // input list
+      while (my_it != end && it2 != entries.end())
+        {
+          if (*my_it < *it2)
+            *it++ = *my_it++;
+          else if (*my_it == *it2)
+            {
+              *it++ = *it2++;
+              ++my_it;
+            }
+          else
+            *it++ = *it2++;
+        }
+      // in case there are indices left in the
+      // input list
+      while (my_it != end)
+        *it++ = *my_it++;
+
+      // in case there are indices left in the
+      // end of entries
+      while (it2 != entries.end())
+        *it++ = *it2++;
+
+      // resize and return
+      const size_type new_size = it - entries.begin();
+      Assert (new_size <= stop_size, ExcInternalError());
+      entries.resize (new_size);
+      return;
+    }
+
+  // unsorted case or case with too few
+  // elements
+  ForwardIterator my_it = begin;
+
+  // If necessary, increase the size of the
+  // array.
+  if (stop_size > entries.capacity())
+    entries.reserve (stop_size);
+
+  size_type col = *my_it;
+  std::vector<size_type>::iterator it, it2;
+  // insert the first element as for one
+  // entry only first check the last
+  // element (or if line is still empty)
+  if ( (entries.size()==0) || ( entries.back() < col) )
+    {
+      entries.push_back(col);
+      it = entries.end()-1;
+    }
+  else
+    {
+      // do a binary search to find the place
+      // where to insert:
+      it2 = Utilities::lower_bound(entries.begin(), entries.end(), col);
+
+      // If this entry is a duplicate, continue
+      // immediately Insert at the right place
+      // in the vector. Vector grows
+      // automatically to fit elements. Always
+      // doubles its size.
+      if (*it2 != col)
+        it = entries.insert(it2, col);
+      else
+        it = it2;
+    }
+
+  ++my_it;
+  // Now try to be smart and insert with
+  // bias in the direction we are
+  // walking. This has the advantage that
+  // for sorted lists, we always search in
+  // the right direction, what should
+  // decrease the work needed in here.
+  for ( ; my_it != end; ++my_it)
+    {
+      col = *my_it;
+      // need a special insertion command when
+      // we're at the end of the list
+      if (col > entries.back())
+        {
+          entries.push_back(col);
+          it = entries.end()-1;
+        }
+      // search to the right (preferred search
+      // direction)
+      else if (col > *it)
+        {
+          it2 = Utilities::lower_bound(it++, entries.end(), col);
+          if (*it2 != col)
+            it = entries.insert(it2, col);
+        }
+      // search to the left
+      else if (col < *it)
+        {
+          it2 = Utilities::lower_bound(entries.begin(), it, col);
+          if (*it2 != col)
+            it = entries.insert(it2, col);
+        }
+      // if we're neither larger nor smaller,
+      // then this was a duplicate and we can
+      // just continue.
+    }
+}
+
+
+DynamicSparsityPattern::size_type
+DynamicSparsityPattern::Line::memory_consumption () const
+{
+  return entries.capacity()*sizeof(size_type)+sizeof(Line);
+}
+
+
+DynamicSparsityPattern::DynamicSparsityPattern ()
+  :
+  rows(0),
+  cols(0),
+  rowset(0)
+{}
+
+
+
+DynamicSparsityPattern::
+DynamicSparsityPattern (const DynamicSparsityPattern &s)
+  :
+  Subscriptor(),
+  rows(0),
+  cols(0),
+  rowset(0)
+{
+  (void)s;
+  Assert (s.rows == 0, ExcInvalidConstructorCall());
+  Assert (s.cols == 0, ExcInvalidConstructorCall());
+}
+
+
+
+DynamicSparsityPattern::DynamicSparsityPattern (const size_type m,
+                                                const size_type n,
+                                                const IndexSet &rowset_
+                                               )
+  :
+  rows(0),
+  cols(0),
+  rowset(0)
+{
+  reinit (m,n, rowset_);
+}
+
+
+DynamicSparsityPattern::DynamicSparsityPattern (const IndexSet &rowset_)
+  :
+  rows(0),
+  cols(0),
+  rowset(0)
+{
+  reinit (rowset_.size(), rowset_.size(), rowset_);
+}
+
+
+DynamicSparsityPattern::DynamicSparsityPattern (const size_type n)
+  :
+  rows(0),
+  cols(0),
+  rowset(0)
+{
+  reinit (n,n);
+}
+
+
+
+DynamicSparsityPattern &
+DynamicSparsityPattern::operator = (const DynamicSparsityPattern &s)
+{
+  (void)s;
+  Assert (s.rows == 0, ExcInvalidConstructorCall());
+  Assert (s.cols == 0, ExcInvalidConstructorCall());
+
+  Assert (rows == 0, ExcInvalidConstructorCall());
+  Assert (cols == 0, ExcInvalidConstructorCall());
+
+  return *this;
+}
+
+
+
+void
+DynamicSparsityPattern::reinit (const size_type m,
+                                const size_type n,
+                                const IndexSet &rowset_)
+{
+  rows = m;
+  cols = n;
+  rowset=rowset_;
+
+  Assert(rowset.size()==0 || rowset.size() == m, ExcInvalidConstructorCall());
+
+  std::vector<Line> new_lines (rowset.size()==0 ? rows : rowset.n_elements());
+  lines.swap (new_lines);
+}
+
+
+
+void
+DynamicSparsityPattern::compress ()
+{}
+
+
+
+bool
+DynamicSparsityPattern::empty () const
+{
+  return ((rows==0) && (cols==0));
+}
+
+
+
+DynamicSparsityPattern::size_type
+DynamicSparsityPattern::max_entries_per_row () const
+{
+  size_type m = 0;
+  for (size_type i=0; i<lines.size(); ++i)
+    {
+      m = std::max (m, static_cast<size_type>(lines[i].entries.size()));
+    }
+
+  return m;
+}
+
+
+
+bool
+DynamicSparsityPattern::exists (const size_type i,
+                                const size_type j) const
+{
+  Assert (i<rows, ExcIndexRange(i, 0, rows));
+  Assert (j<cols, ExcIndexRange(j, 0, cols));
+  Assert( rowset.size()==0 || rowset.is_element(i), ExcInternalError());
+
+  const size_type rowindex =
+    rowset.size()==0 ? i : rowset.index_within_set(i);
+
+  return std::binary_search (lines[rowindex].entries.begin(),
+                             lines[rowindex].entries.end(),
+                             j);
+}
+
+
+
+void
+DynamicSparsityPattern::symmetrize ()
+{
+  Assert (rows==cols, ExcNotQuadratic());
+
+  // loop over all elements presently
+  // in the sparsity pattern and add
+  // the transpose element. note:
+  //
+  // 1. that the sparsity pattern
+  // changes which we work on, but
+  // not the present row
+  //
+  // 2. that the @p{add} function can
+  // be called on elements that
+  // already exist without any harm
+  for (size_type row=0; row<lines.size(); ++row)
+    {
+      const size_type rowindex =
+        rowset.size()==0 ? row : rowset.nth_index_in_set(row);
+
+      for (std::vector<size_type>::const_iterator
+           j=lines[row].entries.begin();
+           j != lines[row].entries.end();
+           ++j)
+        // add the transpose entry if
+        // this is not the diagonal
+        if (rowindex != *j)
+          add (*j, rowindex);
+    }
+}
+
+
+
+void
+DynamicSparsityPattern::print (std::ostream &out) const
+{
+  for (size_type row=0; row<lines.size(); ++row)
+    {
+      out << '[' << (rowset.size()==0 ? row : rowset.nth_index_in_set(row));
+
+      for (std::vector<size_type >::const_iterator
+           j=lines[row].entries.begin();
+           j != lines[row].entries.end(); ++j)
+        out << ',' << *j;
+
+      out << ']' << std::endl;
+    }
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+void
+DynamicSparsityPattern::print_gnuplot (std::ostream &out) const
+{
+  for (size_type row=0; row<lines.size(); ++row)
+    {
+      const size_type rowindex =
+        rowset.size()==0 ? row : rowset.nth_index_in_set(row);
+
+      for (std::vector<size_type >::const_iterator
+           j=lines[row].entries.begin();
+           j != lines[row].entries.end(); ++j)
+        // while matrix entries are usually
+        // written (i,j), with i vertical and
+        // j horizontal, gnuplot output is
+        // x-y, that is we have to exchange
+        // the order of output
+        out << *j << " "
+            << -static_cast<signed int>(rowindex)
+            << std::endl;
+    }
+
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+DynamicSparsityPattern::size_type
+DynamicSparsityPattern::bandwidth () const
+{
+  size_type b=0;
+  for (size_type row=0; row<lines.size(); ++row)
+    {
+      const size_type rowindex =
+        rowset.size()==0 ? row : rowset.nth_index_in_set(row);
+
+      for (std::vector<size_type>::const_iterator
+           j=lines[row].entries.begin();
+           j != lines[row].entries.end(); ++j)
+        if (static_cast<size_type>(std::abs(static_cast<int>(rowindex-*j))) > b)
+          b = std::abs(static_cast<signed int>(rowindex-*j));
+    }
+
+  return b;
+}
+
+
+
+DynamicSparsityPattern::size_type
+DynamicSparsityPattern::n_nonzero_elements () const
+{
+  size_type n=0;
+  for (size_type i=0; i<lines.size(); ++i)
+    {
+      n += lines[i].entries.size();
+    }
+
+  return n;
+}
+
+
+DynamicSparsityPattern::size_type
+DynamicSparsityPattern::memory_consumption () const
+{
+  //TODO: IndexSet...
+  size_type mem = sizeof(DynamicSparsityPattern);
+  for (size_type i=0; i<lines.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (lines[i]);
+
+  return mem;
+}
+
+
+// explicit instantiations
+template void DynamicSparsityPattern::Line::add_entries(size_type *,
+                                                        size_type *,
+                                                        const bool);
+template void DynamicSparsityPattern::Line::add_entries(const size_type *,
+                                                        const size_type *,
+                                                        const bool);
+#ifndef DEAL_II_VECTOR_ITERATOR_IS_POINTER
+template void DynamicSparsityPattern::Line::
+add_entries(std::vector<size_type>::iterator,
+            std::vector<size_type>::iterator,
+            const bool);
+#endif
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/full_matrix.cc b/source/lac/full_matrix.cc
new file mode 100644
index 0000000..7994f58
--- /dev/null
+++ b/source/lac/full_matrix.cc
@@ -0,0 +1,57 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/full_matrix.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "full_matrix.inst"
+
+// This is needed if PETSc was compiled with complex, though, it may
+// be used elsewhere too.
+template void dealii::FullMatrix<double>::vmult<std::complex<double> >(dealii::Vector<std::complex<double> > &, dealii::Vector<std::complex<double> > const &, bool) const;
+
+// do a few functions that currently don't fit the scheme because they have
+// two template arguments that need to be different (the case of same
+// arguments is covered by the default copy constructor and copy operator that
+// is declared separately)
+
+#define TEMPL_OP_EQ(S1,S2)                            \
+  template FullMatrix<S1>& FullMatrix<S1>::operator = \
+  (const FullMatrix<S2>&)
+
+TEMPL_OP_EQ(double,float);
+TEMPL_OP_EQ(float,double);
+
+TEMPL_OP_EQ(long double,double);
+TEMPL_OP_EQ(double,long double);
+
+TEMPL_OP_EQ(long double,float);
+TEMPL_OP_EQ(float,long double);
+
+
+TEMPL_OP_EQ(std::complex<double>,std::complex<float>);
+TEMPL_OP_EQ(std::complex<float>,std::complex<double>);
+
+TEMPL_OP_EQ(std::complex<long double>,std::complex<double>);
+TEMPL_OP_EQ(std::complex<double>,std::complex<long double>);
+
+TEMPL_OP_EQ(std::complex<long double>,std::complex<float>);
+TEMPL_OP_EQ(std::complex<float>,std::complex<long double>);
+
+#undef TEMPL_OP_EQ
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/full_matrix.inst.in b/source/lac/full_matrix.inst.in
new file mode 100644
index 0000000..fee41fc
--- /dev/null
+++ b/source/lac/full_matrix.inst.in
@@ -0,0 +1,237 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S : REAL_SCALARS)
+  {
+    template class FullMatrix<S>;
+
+    template void FullMatrix<S>::print(
+      LogStream&, const unsigned int, const unsigned int) const;
+    template void FullMatrix<S>::print(
+      std::ostream&, const unsigned int, const unsigned int) const;    
+
+    template void FullMatrix<S>::copy_from<1>(
+      const Tensor<2,1>&, const size_type, const size_type, const size_type, const size_type, const size_type, const size_type);
+
+    template void FullMatrix<S>::copy_from<2>(
+      const Tensor<2,2>&, const size_type, const size_type, const size_type, const size_type, const size_type, const size_type);
+
+    template void FullMatrix<S>::copy_from<3>(
+      const Tensor<2,3>&, const size_type, const size_type, const size_type, const size_type, const size_type, const size_type);
+
+    template void FullMatrix<S>::copy_to<1>(
+      Tensor<2,1>&, const size_type, const size_type, const size_type, const size_type, const size_type, const size_type) const;
+
+    template void FullMatrix<S>::copy_to<2>(
+      Tensor<2,2>&, const size_type, const size_type, const size_type, const size_type, const size_type, const size_type) const;
+
+    template void FullMatrix<S>::copy_to<3>(
+      Tensor<2,3>&, const size_type, const size_type, const size_type, const size_type, const size_type, const size_type) const;
+  }
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template
+      FullMatrix<S1>& FullMatrix<S1>::operator = (const LAPACKFullMatrix<S2>&);
+
+    template
+      void FullMatrix<S1>::fill<S2> (
+	const FullMatrix<S2>&, size_type, size_type, size_type, size_type);
+    template
+      void FullMatrix<S1>::add<S2> (const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::add<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::add<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::add<S2> (
+	const FullMatrix<S2>&, S1, size_type, size_type, size_type, size_type);
+    template
+      void FullMatrix<S1>::Tadd<S2> (const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::Tadd<S2> (
+	const FullMatrix<S2>&, S1, size_type, size_type, size_type, size_type);
+    template
+      void FullMatrix<S1>::equ<S2> (const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::equ<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::equ<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::mmult<S2> (FullMatrix<S2>&, const FullMatrix<S2>&, const bool) const;
+    template
+      void FullMatrix<S1>::Tmmult<S2> (FullMatrix<S2>&, const FullMatrix<S2>&, const bool) const;
+    template
+      void FullMatrix<S1>::mTmult<S2> (FullMatrix<S2>&, const FullMatrix<S2>&, const bool) const;
+    template
+      void FullMatrix<S1>::TmTmult<S2> (FullMatrix<S2>&, const FullMatrix<S2>&, const bool) const;
+    template
+      void FullMatrix<S1>::invert<S2> (const FullMatrix<S2>&);
+ 
+    template 
+      void FullMatrix<S1>::left_invert<S2> (const FullMatrix<S2> &);
+    template 
+      void FullMatrix<S1>::right_invert<S2> (const FullMatrix<S2> &);
+
+    template
+      void FullMatrix<S1>::fill_permutation<S2> (
+	const FullMatrix<S2>&,
+	const std::vector<size_type>&,
+	const std::vector<size_type>&);
+    template
+      void FullMatrix<S1>::vmult<S2>(
+      Vector<S2>&, const Vector<S2>&, bool) const;
+    template
+      void FullMatrix<S1>::Tvmult<S2>(
+      Vector<S2>&, const Vector<S2>&, bool) const;
+    template
+      S2 FullMatrix<S1>::matrix_norm_square<S2> (
+      const Vector<S2> &) const;
+    template
+      S2 FullMatrix<S1>::matrix_scalar_product<S2>(
+      const Vector<S2>&, const Vector<S2>&) const;
+    template
+      void FullMatrix<S1>::forward<S2>(
+      Vector<S2>&, const Vector<S2>&) const;
+    template
+      void FullMatrix<S1>::backward<S2>(
+      Vector<S2>&, const Vector<S2>&) const;
+
+    template
+      void FullMatrix<S1>::precondition_Jacobi<S2> (
+	Vector<S2> &, const Vector<S2> &, const S1) const;
+
+    template
+      void FullMatrix<S1>::cholesky<S2> (const FullMatrix<S2>&);
+
+    template
+      void FullMatrix<S1>::outer_product<S2> (const Vector<S2>&,
+                                              const Vector<S2>&);
+  }
+
+for (S1, S2, S3 : REAL_SCALARS)
+  {
+    template
+      S1
+      FullMatrix<S1>::residual<S2,S3>(Vector<S2>&,
+				      const Vector<S2>&,
+				      const Vector<S3>&) const;
+  }
+
+
+
+
+
+// same for complex scalars
+
+for (S : COMPLEX_SCALARS)
+  {
+    template class FullMatrix<S>;
+
+    template void FullMatrix<S>::print(
+      LogStream&, const unsigned int, const unsigned int) const;
+    template void FullMatrix<S>::print(
+      std::ostream&, const unsigned int, const unsigned int) const;
+  }
+
+for (S1, S2 : COMPLEX_SCALARS)
+  {
+    template
+      void FullMatrix<S1>::fill<S2> (
+	const FullMatrix<S2>&, size_type, size_type, size_type, size_type);
+    template
+      void FullMatrix<S1>::add<S2> (const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::add<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::add<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::add<S2> (
+	const FullMatrix<S2>&, S1, size_type, size_type, size_type, size_type);
+    template
+      void FullMatrix<S1>::Tadd<S2> (const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::Tadd<S2> (
+	const FullMatrix<S2>&, S1, size_type, size_type,
+  size_type, size_type);
+    template
+      void FullMatrix<S1>::equ<S2> (const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::equ<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::equ<S2> (const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&,
+				    const S1, const FullMatrix<S2>&);
+    template
+      void FullMatrix<S1>::mmult<S2> (FullMatrix<S2>&, const FullMatrix<S2>&, const bool) const;
+    template
+      void FullMatrix<S1>::Tmmult<S2> (FullMatrix<S2>&, const FullMatrix<S2>&, const bool) const;
+    template
+      void FullMatrix<S1>::invert<S2> (const FullMatrix<S2>&);
+
+    template 
+      void FullMatrix<S1>::left_invert<S2> (const FullMatrix<S2> &);
+    template 
+      void FullMatrix<S1>::right_invert<S2> (const FullMatrix<S2> &);
+
+    template
+      void FullMatrix<S1>::fill_permutation<S2> (
+	const FullMatrix<S2>&,
+	const std::vector<size_type>&,
+	const std::vector<size_type>&);
+    template
+      void FullMatrix<S1>::vmult<S2>(
+      Vector<S2>&, const Vector<S2>&, bool) const;
+    template
+      void FullMatrix<S1>::Tvmult<S2>(
+      Vector<S2>&, const Vector<S2>&, bool) const;
+    template
+      S2 FullMatrix<S1>::matrix_norm_square<S2> (
+      const Vector<S2> &) const;
+    template
+      S2 FullMatrix<S1>::matrix_scalar_product<S2>(
+      const Vector<S2>&, const Vector<S2>&) const;
+    template
+      void FullMatrix<S1>::forward<S2>(
+      Vector<S2>&, const Vector<S2>&) const;
+    template
+      void FullMatrix<S1>::backward<S2>(
+      Vector<S2>&, const Vector<S2>&) const;
+
+    template
+      void FullMatrix<S1>::precondition_Jacobi<S2> (
+	Vector<S2> &, const Vector<S2> &, const S1) const;
+  }
+
+for (S1, S2, S3 : COMPLEX_SCALARS)
+  {
+    template
+      S1
+      FullMatrix<S1>::residual<S2,S3>(Vector<S2>&,
+				      const Vector<S2>&,
+				      const Vector<S3>&) const;
+  }
diff --git a/source/lac/lapack_full_matrix.cc b/source/lac/lapack_full_matrix.cc
new file mode 100644
index 0000000..c6f79ba
--- /dev/null
+++ b/source/lac/lapack_full_matrix.cc
@@ -0,0 +1,1057 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/lapack_full_matrix.h>
+#include <deal.II/lac/lapack_templates.h>
+#include <deal.II/lac/lapack_support.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+
+#include <iostream>
+#include <iomanip>
+
+DEAL_II_NAMESPACE_OPEN
+
+using namespace LAPACKSupport;
+
+template <typename number>
+LAPACKFullMatrix<number>::LAPACKFullMatrix (const size_type n)
+  :
+  TransposeTable<number> (n,n),
+  state (matrix)
+{}
+
+
+template <typename number>
+LAPACKFullMatrix<number>::LAPACKFullMatrix (const size_type m,
+                                            const size_type n)
+  :
+  TransposeTable<number> (m, n),
+  state (matrix)
+{}
+
+
+template <typename number>
+LAPACKFullMatrix<number>::LAPACKFullMatrix (const LAPACKFullMatrix &M)
+  :
+  TransposeTable<number> (M),
+  state (matrix)
+{}
+
+
+template <typename number>
+LAPACKFullMatrix<number> &
+LAPACKFullMatrix<number>::operator = (const LAPACKFullMatrix<number> &M)
+{
+  TransposeTable<number>::operator=(M);
+  state = LAPACKSupport::matrix;
+  return *this;
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::reinit (const size_type n)
+{
+  this->TransposeTable<number>::reinit (n, n);
+  state = LAPACKSupport::matrix;
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::reinit (const size_type m,
+                                  const size_type n)
+{
+  this->TransposeTable<number>::reinit (m, n);
+  state = LAPACKSupport::matrix;
+}
+
+
+template <typename number>
+template <typename number2>
+LAPACKFullMatrix<number> &
+LAPACKFullMatrix<number>::operator = (const FullMatrix<number2> &M)
+{
+  Assert (this->n_rows() == M.n_rows(), ExcDimensionMismatch(this->n_rows(), M.n_rows()));
+  Assert (this->n_cols() == M.n(), ExcDimensionMismatch(this->n_cols(), M.n()));
+  for (size_type i=0; i<this->n_rows(); ++i)
+    for (size_type j=0; j<this->n_cols(); ++j)
+      (*this)(i,j) = M(i,j);
+
+  state = LAPACKSupport::matrix;
+  return *this;
+}
+
+
+template <typename number>
+template <typename number2>
+LAPACKFullMatrix<number> &
+LAPACKFullMatrix<number>::operator = (const SparseMatrix<number2> &M)
+{
+  Assert (this->n_rows() == M.n(), ExcDimensionMismatch(this->n_rows(), M.n()));
+  Assert (this->n_cols() == M.m(), ExcDimensionMismatch(this->n_cols(), M.m()));
+  for (size_type i=0; i<this->n_rows(); ++i)
+    for (size_type j=0; j<this->n_cols(); ++j)
+      (*this)(i,j) = M.el(i,j);
+
+  state = LAPACKSupport::matrix;
+  return *this;
+}
+
+
+template <typename number>
+LAPACKFullMatrix<number> &
+LAPACKFullMatrix<number>::operator = (const double d)
+{
+  (void)d;
+  Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+
+  if (this->n_elements() != 0)
+    this->reset_values();
+
+  state = LAPACKSupport::matrix;
+  return *this;
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::vmult (
+  Vector<number>       &w,
+  const Vector<number> &v,
+  const bool            adding) const
+{
+  const int mm = this->n_rows();
+  const int nn = this->n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+  const number null = 0.;
+
+  switch (state)
+    {
+    case matrix:
+    case inverse_matrix:
+    {
+      AssertDimension(v.size(), this->n_cols());
+      AssertDimension(w.size(), this->n_rows());
+
+      gemv("N", &mm, &nn, &alpha, &this->values[0], &mm, v.val, &one, &beta, w.val, &one);
+      break;
+    }
+    case svd:
+    {
+      AssertDimension(v.size(), this->n_cols());
+      AssertDimension(w.size(), this->n_rows());
+      // Compute V^T v
+      work.resize(std::max(mm,nn));
+      gemv("N", &nn, &nn, &alpha, &svd_vt->values[0], &nn, v.val, &one, &null, &work[0], &one);
+      // Multiply by singular values
+      for (size_type i=0; i<wr.size(); ++i)
+        work[i] *= wr[i];
+      // Multiply with U
+      gemv("N", &mm, &mm, &alpha, &svd_u->values[0], &mm, &work[0], &one, &beta, w.val, &one);
+      break;
+    }
+    case inverse_svd:
+    {
+      AssertDimension(w.size(), this->n_cols());
+      AssertDimension(v.size(), this->n_rows());
+      // Compute U^T v
+      work.resize(std::max(mm,nn));
+      gemv("T", &mm, &mm, &alpha, &svd_u->values[0], &mm, v.val, &one, &null, &work[0], &one);
+      // Multiply by singular values
+      for (size_type i=0; i<wr.size(); ++i)
+        work[i] *= wr[i];
+      // Multiply with V
+      gemv("T", &nn, &nn, &alpha, &svd_vt->values[0], &nn, &work[0], &one, &beta, w.val, &one);
+      break;
+    }
+    default:
+      Assert (false, ExcState(state));
+    }
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::Tvmult (
+  Vector<number>       &w,
+  const Vector<number> &v,
+  const bool            adding) const
+{
+  const int mm = this->n_rows();
+  const int nn = this->n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+  const number null = 0.;
+
+  switch (state)
+    {
+    case matrix:
+    case inverse_matrix:
+    {
+      AssertDimension(w.size(), this->n_cols());
+      AssertDimension(v.size(), this->n_rows());
+
+      gemv("T", &mm, &nn, &alpha, &this->values[0], &mm, v.val, &one, &beta, w.val, &one);
+      break;
+    }
+    case svd:
+    {
+      AssertDimension(w.size(), this->n_cols());
+      AssertDimension(v.size(), this->n_rows());
+
+      // Compute U^T v
+      work.resize(std::max(mm,nn));
+      gemv("T", &mm, &mm, &alpha, &svd_u->values[0], &mm, v.val, &one, &null, &work[0], &one);
+      // Multiply by singular values
+      for (size_type i=0; i<wr.size(); ++i)
+        work[i] *= wr[i];
+      // Multiply with V
+      gemv("T", &nn, &nn, &alpha, &svd_vt->values[0], &nn, &work[0], &one, &beta, w.val, &one);
+      break;
+      case inverse_svd:
+      {
+        AssertDimension(v.size(), this->n_cols());
+        AssertDimension(w.size(), this->n_rows());
+
+        // Compute V^T v
+        work.resize(std::max(mm,nn));
+        gemv("N", &nn, &nn, &alpha, &svd_vt->values[0], &nn, v.val, &one, &null, &work[0], &one);
+        // Multiply by singular values
+        for (size_type i=0; i<wr.size(); ++i)
+          work[i] *= wr[i];
+        // Multiply with U
+        gemv("N", &mm, &mm, &alpha, &svd_u->values[0], &mm, &work[0], &one, &beta, w.val, &one);
+        break;
+      }
+    }
+    default:
+      Assert (false, ExcState(state));
+    }
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::vmult_add (Vector<number>       &w,
+                                     const Vector<number> &v) const
+{
+  vmult(w, v, true);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::Tvmult_add (Vector<number>       &w,
+                                      const Vector<number> &v) const
+{
+  Tvmult(w, v, true);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::mmult(LAPACKFullMatrix<number>       &C,
+                                const LAPACKFullMatrix<number> &B,
+                                const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert(C.state == matrix || C.state == inverse_matrix, ExcState(state));
+  Assert (this->n_cols() == B.n_rows(), ExcDimensionMismatch(this->n_cols(), B.n_rows()));
+  Assert (C.n_cols() == B.n_cols(), ExcDimensionMismatch(C.n_cols(), B.n_cols()));
+  Assert (C.n_rows() == this->n_rows(), ExcDimensionMismatch(this->n_rows(), C.n_rows()));
+  const int mm = this->n_rows();
+  const int nn = B.n_cols();
+  const int kk = this->n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  gemm("N", "N", &mm, &nn, &kk, &alpha, &this->values[0], &mm, &B.values[0],
+       &kk, &beta, &C.values[0], &mm);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::mmult(FullMatrix<number>             &C,
+                                const LAPACKFullMatrix<number> &B,
+                                const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert (this->n_cols() == B.n_rows(), ExcDimensionMismatch(this->n_cols(), B.n_rows()));
+  Assert (C.n_cols() == B.n_cols(), ExcDimensionMismatch(C.n_cols(), B.n_cols()));
+  Assert (C.n_rows() == this->n_rows(), ExcDimensionMismatch(this->n_rows(), C.n_rows()));
+  const int mm = this->n_rows();
+  const int nn = B.n_cols();
+  const int kk = this->n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  // since FullMatrix stores the matrix in transposed order compared to this
+  // matrix, compute B^T * A^T = (A * B)^T
+  gemm("T", "T", &nn, &mm, &kk, &alpha, &B.values[0], &kk, &this->values[0],
+       &mm, &beta, &C(0,0), &nn);
+}
+
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::Tmmult(LAPACKFullMatrix<number>       &C,
+                                 const LAPACKFullMatrix<number> &B,
+                                 const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert(C.state == matrix || C.state == inverse_matrix, ExcState(state));
+  Assert (this->n_rows() == B.n_rows(), ExcDimensionMismatch(this->n_rows(), B.n_rows()));
+  Assert (C.n_cols() == B.n_cols(), ExcDimensionMismatch(C.n_cols(), B.n_cols()));
+  Assert (C.n_rows() == this->n_cols(), ExcDimensionMismatch(this->n_cols(), C.n_rows()));
+  const int mm = this->n_cols();
+  const int nn = B.n_cols();
+  const int kk = B.n_rows();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  gemm("T", "N", &mm, &nn, &kk, &alpha, &this->values[0], &kk, &B.values[0],
+       &kk, &beta, &C.values[0], &mm);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::Tmmult(FullMatrix<number>             &C,
+                                 const LAPACKFullMatrix<number> &B,
+                                 const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert (this->n_rows() == B.n_rows(), ExcDimensionMismatch(this->n_rows(), B.n_rows()));
+  Assert (C.n_cols() == B.n_cols(), ExcDimensionMismatch(C.n_cols(), B.n_cols()));
+  Assert (C.n_rows() == this->n_cols(), ExcDimensionMismatch(this->n_cols(), C.n_rows()));
+  const int mm = this->n_cols();
+  const int nn = B.n_cols();
+  const int kk = B.n_rows();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  // since FullMatrix stores the matrix in transposed order compared to this
+  // matrix, compute B^T * A = (A^T * B)^T
+  gemm("T", "N", &nn, &mm, &kk, &alpha, &B.values[0], &kk, &this->values[0],
+       &kk, &beta, &C(0,0), &nn);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::mTmult(LAPACKFullMatrix<number>       &C,
+                                 const LAPACKFullMatrix<number> &B,
+                                 const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert(C.state == matrix || C.state == inverse_matrix, ExcState(state));
+  Assert (this->n_cols() == B.n_cols(), ExcDimensionMismatch(this->n_cols(), B.n_cols()));
+  Assert (C.n_cols() == B.n_rows(), ExcDimensionMismatch(C.n_cols(), B.n_rows()));
+  Assert (C.n_rows() == this->n_rows(), ExcDimensionMismatch(this->n_rows(), C.n_rows()));
+  const int mm = this->n_rows();
+  const int nn = B.n_rows();
+  const int kk = B.n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  gemm("N", "T", &mm, &nn, &kk, &alpha, &this->values[0], &mm, &B.values[0],
+       &nn, &beta, &C.values[0], &mm);
+}
+
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::mTmult(FullMatrix<number>             &C,
+                                 const LAPACKFullMatrix<number> &B,
+                                 const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert (this->n_cols() == B.n_cols(), ExcDimensionMismatch(this->n_cols(), B.n_cols()));
+  Assert (C.n_cols() == B.n_rows(), ExcDimensionMismatch(C.n_cols(), B.n_rows()));
+  Assert (C.n_rows() == this->n_rows(), ExcDimensionMismatch(this->n_rows(), C.n_rows()));
+  const int mm = this->n_rows();
+  const int nn = B.n_rows();
+  const int kk = B.n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  // since FullMatrix stores the matrix in transposed order compared to this
+  // matrix, compute B * A^T = (A * B^T)^T
+  gemm("N", "T", &nn, &mm, &kk, &alpha, &B.values[0], &nn, &this->values[0],
+       &mm, &beta, &C(0,0), &nn);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::TmTmult(LAPACKFullMatrix<number>       &C,
+                                  const LAPACKFullMatrix<number> &B,
+                                  const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert(C.state == matrix || C.state == inverse_matrix, ExcState(state));
+  Assert (this->n_rows() == B.n_cols(), ExcDimensionMismatch(this->n_rows(), B.n_cols()));
+  Assert (C.n_cols() == B.n_rows(), ExcDimensionMismatch(C.n_cols(), B.n_rows()));
+  Assert (C.n_rows() == this->n_cols(), ExcDimensionMismatch(this->n_cols(), C.n_rows()));
+  const int mm = this->n_cols();
+  const int nn = B.n_rows();
+  const int kk = B.n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  gemm("T", "T", &mm, &nn, &kk, &alpha, &this->values[0], &kk, &B.values[0],
+       &nn, &beta, &C.values[0], &mm);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::TmTmult(FullMatrix<number>             &C,
+                                  const LAPACKFullMatrix<number> &B,
+                                  const bool                      adding) const
+{
+  Assert(state == matrix || state == inverse_matrix, ExcState(state));
+  Assert(B.state == matrix || B.state == inverse_matrix, ExcState(state));
+  Assert (this->n_rows() == B.n_cols(), ExcDimensionMismatch(this->n_rows(), B.n_cols()));
+  Assert (C.n_cols() == B.n_rows(), ExcDimensionMismatch(C.n_cols(), B.n_rows()));
+  Assert (C.n_rows() == this->n_cols(), ExcDimensionMismatch(this->n_cols(), C.n_rows()));
+  const int mm = this->n_cols();
+  const int nn = B.n_rows();
+  const int kk = B.n_cols();
+  const number alpha = 1.;
+  const number beta = (adding ? 1. : 0.);
+
+  // since FullMatrix stores the matrix in transposed order compared to this
+  // matrix, compute B * A = (A^T * B^T)^T
+  gemm("N", "N", &nn, &mm, &kk, &alpha, &B.values[0], &nn, &this->values[0],
+       &kk, &beta, &C(0,0), &nn);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::compute_lu_factorization()
+{
+  Assert(state == matrix, ExcState(state));
+  const int mm = this->n_rows();
+  const int nn = this->n_cols();
+  number *values = const_cast<number *> (&this->values[0]);
+  ipiv.resize(mm);
+  int info = 0;
+  getrf(&mm, &nn, values, &mm, &ipiv[0], &info);
+
+  AssertThrow(info >= 0, ExcInternalError());
+  AssertThrow(info == 0, LACExceptions::ExcSingular());
+
+  state = lu;
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::compute_svd()
+{
+  Assert(state == matrix, ExcState(state));
+  state = LAPACKSupport::unusable;
+
+  const int mm = this->n_rows();
+  const int nn = this->n_cols();
+  number *values = const_cast<number *> (&this->values[0]);
+  wr.resize(std::max(mm,nn));
+  std::fill(wr.begin(), wr.end(), 0.);
+  ipiv.resize(8*mm);
+
+  svd_u.reset (new LAPACKFullMatrix<number>(mm,mm));
+  svd_vt.reset (new LAPACKFullMatrix<number>(nn,nn));
+  number *mu  = const_cast<number *> (&svd_u->values[0]);
+  number *mvt = const_cast<number *> (&svd_vt->values[0]);
+  int info = 0;
+
+  // First determine optimal workspace size
+  work.resize(1);
+  int lwork = -1;
+  gesdd(&LAPACKSupport::A, &mm, &nn, values, &mm,
+        &wr[0], mu, &mm, mvt, &nn,
+        &work[0], &lwork, &ipiv[0], &info);
+  AssertThrow (info==0, LAPACKSupport::ExcErrorCode("gesdd", info));
+  // Resize the work array. Add one to the size computed by LAPACK to be on
+  // the safe side.
+  lwork = static_cast<int>(work[0] + 1);
+
+  work.resize(lwork);
+  // Do the actual SVD.
+  gesdd(&LAPACKSupport::A, &mm, &nn, values, &mm,
+        &wr[0], mu, &mm, mvt, &nn,
+        &work[0], &lwork, &ipiv[0], &info);
+  AssertThrow (info==0, LAPACKSupport::ExcErrorCode("gesdd", info));
+
+  work.resize(0);
+  ipiv.resize(0);
+
+  state = LAPACKSupport::svd;
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::compute_inverse_svd(const double threshold)
+{
+  if (state == LAPACKSupport::matrix)
+    compute_svd();
+
+  Assert (state==LAPACKSupport::svd, ExcState(state));
+
+  const double lim = wr[0]*threshold;
+  for (size_type i=0; i<wr.size(); ++i)
+    {
+      if (wr[i] > lim)
+        wr[i] = 1./wr[i];
+      else
+        wr[i] = 0.;
+    }
+  state = LAPACKSupport::inverse_svd;
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::invert()
+{
+  Assert(state == matrix || state == lu,
+         ExcState(state));
+  const int mm = this->n_rows();
+  const int nn = this->n_cols();
+  Assert (nn == mm, ExcNotQuadratic());
+
+  number *values = const_cast<number *> (&this->values[0]);
+  ipiv.resize(mm);
+  int info = 0;
+
+  if (state == matrix)
+    {
+      getrf(&mm, &nn, values, &mm, &ipiv[0], &info);
+
+      AssertThrow(info >= 0, ExcInternalError());
+      AssertThrow(info == 0, LACExceptions::ExcSingular());
+    }
+
+  inv_work.resize (mm);
+  getri(&mm, values, &mm, &ipiv[0], &inv_work[0], &mm, &info);
+
+  AssertThrow(info >= 0, ExcInternalError());
+  AssertThrow(info == 0, LACExceptions::ExcSingular());
+
+  state = inverse_matrix;
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::apply_lu_factorization(Vector<number> &v,
+                                                 const bool transposed) const
+{
+  Assert(state == lu, ExcState(state));
+  Assert(this->n_rows() == this->n_cols(),
+         LACExceptions::ExcNotQuadratic());
+  AssertDimension(this->n_rows(), v.size());
+
+  const char *trans = transposed ? &T : &N;
+  const int nn = this->n_cols();
+  const number *values = &this->values[0];
+  int info = 0;
+
+  getrs(trans, &nn, &one, values, &nn, &ipiv[0],
+        v.begin(), &nn, &info);
+
+  AssertThrow(info == 0, ExcInternalError());
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::apply_lu_factorization(LAPACKFullMatrix<number> &B,
+                                                 const bool transposed) const
+{
+  Assert(state == lu, ExcState(state));
+  Assert(B.state == matrix, ExcState(state));
+  Assert(this->n_rows() == this->n_cols(), LACExceptions::ExcNotQuadratic());
+  AssertDimension(this->n_rows(), B.n_rows());
+
+  const char *trans = transposed ? &T : &N;
+  const int nn = this->n_cols();
+  const int kk = B.n_cols();
+  const number *values = &this->values[0];
+  int info = 0;
+
+  getrs(trans, &nn, &kk, values, &nn, &ipiv[0], &B.values[0], &nn, &info);
+
+  AssertThrow(info == 0, ExcInternalError());
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::compute_eigenvalues(const bool right,
+                                              const bool left)
+{
+  Assert(state == matrix, ExcState(state));
+  const int nn = this->n_cols();
+  wr.resize(nn);
+  wi.resize(nn);
+  if (right) vr.resize(nn*nn);
+  if (left)  vl.resize(nn*nn);
+
+  number *values = const_cast<number *> (&this->values[0]);
+
+  int info  = 0;
+  int lwork = 1;
+  const char *const jobvr = (right) ? (&V) : (&N);
+  const char *const jobvl = (left)  ? (&V) : (&N);
+
+  /*
+   * The LAPACK routine xGEEV requires a sufficiently large work array; the
+   * minimum requirement is
+   *
+   * work.size >= 4*nn.
+   *
+   * However, for better performance, a larger work array may be needed. The
+   * first call determines the optimal work size and the second does the work.
+   */
+  lwork = -1;
+  work.resize(1);
+
+  geev(jobvl, jobvr, &nn, values, &nn,
+       &wr[0], &wi[0],
+       &vl[0], &nn, &vr[0], &nn,
+       &work[0], &lwork, &info);
+  // geev returns info=0 on success. Since we only queried the optimal size
+  // for work, everything else would not be acceptable.
+  Assert (info == 0, ExcInternalError());
+  // Allocate working array according to suggestion (same strategy as was
+  // noted in compute_svd).
+  lwork = static_cast<int>(work[0] + 1);
+
+  // resize workspace array
+  work.resize((size_type ) lwork);
+
+  // Finally compute the eigenvalues.
+  geev(jobvl, jobvr, &nn, values, &nn,
+       &wr[0], &wi[0],
+       &vl[0], &nn, &vr[0], &nn,
+       &work[0], &lwork, &info);
+  // Negative return value implies a wrong argument. This should be internal.
+
+  Assert (info >=0, ExcInternalError());
+//TODO:[GK] What if the QR method fails?
+  if (info != 0)
+    std::cerr << "LAPACK error in geev" << std::endl;
+
+  state = LAPACKSupport::State(eigenvalues | unusable);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::compute_eigenvalues_symmetric(const number        lower_bound,
+                                                        const number        upper_bound,
+                                                        const number        abs_accuracy,
+                                                        Vector<number>     &eigenvalues,
+                                                        FullMatrix<number> &eigenvectors)
+{
+  Assert(state == matrix, ExcState(state));
+  const int nn = (this->n_cols() > 0 ? this->n_cols() : 1);
+  Assert(static_cast<size_type>(nn) == this->n_rows(), ExcNotQuadratic());
+
+  wr.resize(nn);
+  LAPACKFullMatrix<number> matrix_eigenvectors(nn, nn);
+
+  number *values_A = const_cast<number *> (&this->values[0]);
+  number *values_eigenvectors = const_cast<number *> (&matrix_eigenvectors.values[0]);
+
+  int info(0),
+      lwork(-1),
+      n_eigenpairs(0);
+  const char *const jobz(&V);
+  const char *const uplo(&U);
+  const char *const range(&V);
+  const int *const  dummy(&one);
+  std::vector<int> iwork(static_cast<size_type> (5*nn));
+  std::vector<int> ifail(static_cast<size_type> (nn));
+
+
+  /*
+   * The LAPACK routine xSYEVX requires a sufficiently large work array; the
+   * minimum requirement is
+   *
+   * work.size >= 8*nn.
+   *
+   * However, for better performance, a larger work array may be needed. The
+   * first call determines the optimal work size and the second does the work.
+   */
+  work.resize(1);
+
+  syevx (jobz, range,
+         uplo, &nn, values_A, &nn,
+         &lower_bound, &upper_bound,
+         dummy, dummy, &abs_accuracy,
+         &n_eigenpairs, &wr[0], values_eigenvectors,
+         &nn, &work[0], &lwork, &iwork[0],
+         &ifail[0], &info);
+  // syevx returns info=0 on success. Since we only queried the optimal size
+  // for work, everything else would not be acceptable.
+  Assert (info == 0, ExcInternalError());
+  // Allocate working array according to suggestion (same strategy as was noted in
+  // compute_svd).
+  lwork = static_cast<int>(work[0] + 1);
+  work.resize(static_cast<size_type> (lwork));
+
+  // Finally compute the eigenvalues.
+  syevx (jobz, range,
+         uplo, &nn, values_A, &nn,
+         &lower_bound, &upper_bound,
+         dummy, dummy, &abs_accuracy,
+         &n_eigenpairs, &wr[0], values_eigenvectors,
+         &nn, &work[0], &lwork, &iwork[0],
+         &ifail[0], &info);
+
+  // Negative return value implies a wrong argument. This should be internal.
+  Assert (info >=0, ExcInternalError());
+  if (info != 0)
+    std::cerr << "LAPACK error in syevx" << std::endl;
+
+  eigenvalues.reinit(n_eigenpairs);
+  eigenvectors.reinit(nn, n_eigenpairs, true);
+
+  for (size_type i=0; i < static_cast<size_type> (n_eigenpairs); ++i)
+    {
+      eigenvalues(i) = wr[i];
+      size_type col_begin(i*nn);
+      for (size_type j=0; j < static_cast<size_type> (nn); ++j)
+        {
+          eigenvectors(j,i) = values_eigenvectors[col_begin+j];
+        }
+    }
+
+  state = LAPACKSupport::State(unusable);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::compute_generalized_eigenvalues_symmetric(
+  LAPACKFullMatrix<number> &B,
+  const number lower_bound,
+  const number upper_bound,
+  const number abs_accuracy,
+  Vector<number> &eigenvalues,
+  std::vector<Vector<number> > &eigenvectors,
+  const int itype)
+{
+  Assert(state == matrix, ExcState(state));
+  const int nn = (this->n_cols() > 0 ? this->n_cols() : 1);
+  Assert(static_cast<size_type>(nn) == this->n_rows(), ExcNotQuadratic());
+  Assert(B.n_rows() == B.n_cols(), ExcNotQuadratic());
+  Assert(static_cast<size_type>(nn) == B.n_cols(),
+         ExcDimensionMismatch (nn, B.n_cols()));
+
+  wr.resize(nn);
+  LAPACKFullMatrix<number> matrix_eigenvectors(nn, nn);
+
+  number *values_A = const_cast<number *> (&this->values[0]);
+  number *values_B = const_cast<number *> (&B.values[0]);
+  number *values_eigenvectors = const_cast<number *> (&matrix_eigenvectors.values[0]);
+
+  int info(0),
+      lwork(-1),
+      n_eigenpairs(0);
+  const char *const jobz(&V);
+  const char *const uplo(&U);
+  const char *const range(&V);
+  const int *const  dummy(&one);
+  std::vector<int> iwork(static_cast<size_type> (5*nn));
+  std::vector<int> ifail(static_cast<size_type> (nn));
+
+
+  /*
+   * The LAPACK routine xSYGVX requires a sufficiently large work array; the
+   * minimum requirement is
+   *
+   * work.size >= 8*nn.
+   *
+   * However, for better performance, a larger work array may be needed. The
+   * first call determines the optimal work size and the second does the work.
+   */
+  work.resize(1);
+
+  sygvx (&itype, jobz, range, uplo, &nn, values_A, &nn,
+         values_B, &nn, &lower_bound, &upper_bound,
+         dummy, dummy, &abs_accuracy, &n_eigenpairs,
+         &wr[0], values_eigenvectors, &nn, &work[0],
+         &lwork, &iwork[0], &ifail[0], &info);
+  // sygvx returns info=0 on success. Since we only queried the optimal size
+  // for work, everything else would not be acceptable.
+  Assert (info == 0, ExcInternalError());
+  // Allocate working array according to suggestion (same strategy as was
+  // noted in compute_svd).
+  lwork = static_cast<int>(work[0] + 1);
+
+  // resize workspace arrays
+  work.resize(static_cast<size_type> (lwork));
+
+  // Finally compute the generalized eigenvalues.
+  sygvx (&itype, jobz, range, uplo, &nn, values_A, &nn,
+         values_B, &nn, &lower_bound, &upper_bound,
+         dummy, dummy, &abs_accuracy, &n_eigenpairs,
+         &wr[0], values_eigenvectors, &nn, &work[0],
+         &lwork, &iwork[0], &ifail[0], &info);
+
+  // Negative return value implies a wrong argument. This should be internal.
+  Assert (info >=0, ExcInternalError());
+  if (info != 0)
+    std::cerr << "LAPACK error in sygvx" << std::endl;
+
+  eigenvalues.reinit(n_eigenpairs);
+  eigenvectors.resize(n_eigenpairs);
+
+  for (size_type i=0; i < static_cast<size_type> (n_eigenpairs); ++i)
+    {
+      eigenvalues(i) = wr[i];
+      size_type col_begin(i*nn);
+      eigenvectors[i].reinit(nn, true);
+      for (size_type j=0; j < static_cast<size_type> (nn); ++j)
+        {
+          eigenvectors[i](j) = values_eigenvectors[col_begin+j];
+        }
+    }
+
+  state = LAPACKSupport::State(unusable);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::compute_generalized_eigenvalues_symmetric (
+  LAPACKFullMatrix<number> &B,
+  std::vector<Vector<number> > &eigenvectors,
+  const int itype)
+{
+  Assert(state == matrix, ExcState(state));
+  const int nn = this->n_cols();
+  Assert(static_cast<size_type>(nn) == this->n_rows(), ExcNotQuadratic());
+  Assert(B.n_rows() == B.n_cols(), ExcNotQuadratic());
+  Assert(static_cast<size_type>(nn) == B.n_cols(),
+         ExcDimensionMismatch (nn, B.n_cols()));
+  Assert(eigenvectors.size() <= static_cast<size_type>(nn),
+         ExcMessage ("eigenvectors.size() > matrix.n_cols()"));
+
+  wr.resize(nn);
+  wi.resize(nn); //This is set purely for consistency reasons with the
+  //eigenvalues() function.
+
+  number *values_A = const_cast<number *> (&this->values[0]);
+  number *values_B = const_cast<number *> (&B.values[0]);
+
+  int info  = 0;
+  int lwork = -1;
+  const char *const jobz = (eigenvectors.size() > 0) ? (&V) : (&N);
+  const char *const uplo = (&U);
+
+  /*
+   * The LAPACK routine xSYGV requires a sufficiently large work array; the
+   * minimum requirement is
+   *
+   * work.size >= 3*nn - 1.
+   *
+   * However, for better performance, a larger work array may be needed. The
+   * first call determines the optimal work size and the second does the work.
+   */
+  work.resize(1);
+
+  sygv (&itype, jobz, uplo, &nn, values_A, &nn,
+        values_B, &nn,
+        &wr[0], &work[0], &lwork, &info);
+  // sygv returns info=0 on success. Since we only queried the optimal size
+  // for work, everything else would not be acceptable.
+  Assert (info == 0, ExcInternalError());
+  // Allocate working array according to suggestion (same strategy as was
+  // noted in compute_svd).
+  lwork = static_cast<int>(work[0] + 1);
+
+  // resize workspace array
+  work.resize((size_type) lwork);
+
+  // Finally compute the generalized eigenvalues.
+  sygv (&itype, jobz, uplo, &nn, values_A, &nn,
+        values_B, &nn,
+        &wr[0], &work[0], &lwork, &info);
+  // Negative return value implies a wrong argument. This should be internal.
+
+  Assert (info >=0, ExcInternalError());
+  if (info != 0)
+    std::cerr << "LAPACK error in sygv" << std::endl;
+
+  for (size_type i=0; i < eigenvectors.size(); ++i)
+    {
+      size_type col_begin(i*nn);
+      eigenvectors[i].reinit(nn, true);
+      for (size_type j=0; j < static_cast<size_type>(nn); ++j)
+        {
+          eigenvectors[i](j) = values_A[col_begin+j];
+        }
+    }
+  state = LAPACKSupport::State(eigenvalues | unusable);
+}
+
+
+template <typename number>
+void
+LAPACKFullMatrix<number>::print_formatted (
+  std::ostream       &out,
+  const unsigned int  precision,
+  const bool          scientific,
+  const unsigned int  width_,
+  const char         *zero_string,
+  const double        denominator,
+  const double        threshold) const
+{
+  unsigned int width = width_;
+
+  Assert ((!this->empty()) || (this->n_cols()+this->n_rows()==0),
+          ExcInternalError());
+
+  // set output format, but store old
+  // state
+  std::ios::fmtflags old_flags = out.flags();
+  unsigned int old_precision = out.precision (precision);
+
+  if (scientific)
+    {
+      out.setf (std::ios::scientific, std::ios::floatfield);
+      if (!width)
+        width = precision+7;
+    }
+  else
+    {
+      out.setf (std::ios::fixed, std::ios::floatfield);
+      if (!width)
+        width = precision+2;
+    }
+
+  for (size_type i=0; i<this->n_rows(); ++i)
+    {
+      for (size_type j=0; j<this->n_cols(); ++j)
+        if (std::fabs(this->el(i,j)) > threshold)
+          out << std::setw(width)
+              << this->el(i,j) * denominator << ' ';
+        else
+          out << std::setw(width) << zero_string << ' ';
+      out << std::endl;
+    };
+
+  AssertThrow (out, ExcIO());
+  // reset output format
+  out.flags (old_flags);
+  out.precision(old_precision);
+}
+
+
+//----------------------------------------------------------------------//
+
+template <typename number>
+void
+PreconditionLU<number>::initialize(const LAPACKFullMatrix<number> &M)
+{
+  matrix = &M;
+  mem = 0;
+}
+
+
+template <typename number>
+void
+PreconditionLU<number>::initialize(const LAPACKFullMatrix<number> &M,
+                                   VectorMemory<Vector<number> > &V)
+{
+  matrix = &M;
+  mem = &V;
+}
+
+
+template <typename number>
+void
+PreconditionLU<number>::vmult(Vector<number> &dst,
+                              const Vector<number> &src) const
+{
+  dst = src;
+  matrix->apply_lu_factorization(dst, false);
+}
+
+
+template <typename number>
+void
+PreconditionLU<number>::Tvmult(Vector<number> &dst,
+                               const Vector<number> &src) const
+{
+  dst = src;
+  matrix->apply_lu_factorization(dst, true);
+}
+
+
+template <typename number>
+void
+PreconditionLU<number>::vmult(BlockVector<number> &dst,
+                              const BlockVector<number> &src) const
+{
+  Assert(mem != 0, ExcNotInitialized());
+  Vector<number> *aux = mem->alloc();
+  *aux = src;
+  matrix->apply_lu_factorization(*aux, false);
+  dst = *aux;
+}
+
+
+template <typename number>
+void
+PreconditionLU<number>::Tvmult(BlockVector<number> &dst,
+                               const BlockVector<number> &src) const
+{
+  Assert(mem != 0, ExcNotInitialized());
+  Vector<number> *aux = mem->alloc();
+  *aux = src;
+  matrix->apply_lu_factorization(*aux, true);
+  dst = *aux;
+}
+
+
+
+#include "lapack_full_matrix.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/lapack_full_matrix.inst.in b/source/lac/lapack_full_matrix.inst.in
new file mode 100644
index 0000000..1b2958d
--- /dev/null
+++ b/source/lac/lapack_full_matrix.inst.in
@@ -0,0 +1,31 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S : REAL_SCALARS)
+  {
+    template class LAPACKFullMatrix<S>;
+    template class PreconditionLU<S>;
+  }
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template LAPACKFullMatrix<S1> &
+    LAPACKFullMatrix<S1>::operator = (const FullMatrix<S2> &M);
+
+    template LAPACKFullMatrix<S1> &
+    LAPACKFullMatrix<S1>::operator = (const SparseMatrix<S2> &M);
+  }
diff --git a/source/lac/matrix_lib.cc b/source/lac/matrix_lib.cc
new file mode 100644
index 0000000..b8ea151
--- /dev/null
+++ b/source/lac/matrix_lib.cc
@@ -0,0 +1,169 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/matrix_lib.templates.h>
+#include <deal.II/lac/sparse_matrix.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+MeanValueFilter::MeanValueFilter(size_type component)
+  :
+  component(component)
+{}
+
+
+template<typename number, typename vnumber>
+ProductSparseMatrix<number, vnumber>::ProductSparseMatrix(
+  const MatrixType &mat1,
+  const MatrixType &mat2,
+  VectorMemory<VectorType> &mem)
+  :
+  m1(&mat1, typeid(*this).name()),
+  m2(&mat2, typeid(*this).name()),
+  mem(&mem, typeid(*this).name())
+{
+  Assert(mat1.n() == mat2.m(), ExcDimensionMismatch(mat1.n(),mat2.m()));
+}
+
+
+template<typename number, typename vnumber>
+ProductSparseMatrix<number, vnumber>::ProductSparseMatrix()
+  :
+  m1(0, typeid(*this).name()),
+  m2(0, typeid(*this).name()),
+  mem(0, typeid(*this).name())
+{}
+
+
+template<typename number, typename vnumber>
+void
+ProductSparseMatrix<number, vnumber>::initialize(
+  const MatrixType &mat1,
+  const MatrixType &mat2,
+  VectorMemory<VectorType> &memory)
+{
+  Assert(mat1.n() == mat2.m(), ExcDimensionMismatch(mat1.n(),mat2.m()));
+  mem = &memory;
+  m1 = &mat1;
+  m2 = &mat2;
+}
+
+
+template<typename number, typename vnumber>
+void
+ProductSparseMatrix<number, vnumber>::clear()
+{
+  m1 = 0;
+  m2 = 0;
+}
+
+
+template<typename number, typename vnumber>
+void
+ProductSparseMatrix<number, vnumber>::vmult (VectorType &dst, const VectorType &src) const
+{
+  Assert(mem != 0, ExcNotInitialized());
+  Assert(m1 != 0, ExcNotInitialized());
+  Assert(m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(m1->n());
+  m2->vmult (*v, src);
+  m1->vmult (dst, *v);
+  mem->free(v);
+}
+
+
+template<typename number, typename vnumber>
+void
+ProductSparseMatrix<number, vnumber>::vmult_add (VectorType &dst, const VectorType &src) const
+{
+  Assert(mem != 0, ExcNotInitialized());
+  Assert(m1 != 0, ExcNotInitialized());
+  Assert(m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(m1->n());
+  m2->vmult (*v, src);
+  m1->vmult_add (dst, *v);
+  mem->free(v);
+}
+
+
+template<typename number, typename vnumber>
+void
+ProductSparseMatrix<number, vnumber>::Tvmult (VectorType &dst, const VectorType &src) const
+{
+  Assert(mem != 0, ExcNotInitialized());
+  Assert(m1 != 0, ExcNotInitialized());
+  Assert(m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(m1->n());
+  m1->Tvmult (*v, src);
+  m2->Tvmult (dst, *v);
+  mem->free(v);
+}
+
+
+template<typename number, typename vnumber>
+void
+ProductSparseMatrix<number, vnumber>::Tvmult_add (VectorType &dst, const VectorType &src) const
+{
+  Assert(mem != 0, ExcNotInitialized());
+  Assert(m1 != 0, ExcNotInitialized());
+  Assert(m2 != 0, ExcNotInitialized());
+
+  VectorType *v = mem->alloc();
+  v->reinit(m1->n());
+  m1->Tvmult (*v, src);
+  m2->Tvmult_add (dst, *v);
+  mem->free(v);
+}
+
+
+template class ProductSparseMatrix<double, double>;
+template class ProductSparseMatrix<double, float>;
+template class ProductSparseMatrix<float, double>;
+template class ProductSparseMatrix<float, float>;
+
+template void MeanValueFilter::filter(Vector<float> &) const;
+template void MeanValueFilter::filter(Vector<double> &) const;
+template void MeanValueFilter::filter(BlockVector<float> &) const;
+template void MeanValueFilter::filter(BlockVector<double> &) const;
+template void MeanValueFilter::vmult(Vector<float> &,
+                                     const Vector<float> &) const;
+template void MeanValueFilter::vmult(Vector<double> &,
+                                     const Vector<double> &) const;
+template void MeanValueFilter::vmult(BlockVector<float> &,
+                                     const BlockVector<float> &) const;
+template void MeanValueFilter::vmult(BlockVector<double> &,
+                                     const BlockVector<double> &) const;
+
+template void MeanValueFilter::vmult_add(Vector<float> &,
+                                         const Vector<float> &) const;
+template void MeanValueFilter::vmult_add(Vector<double> &,
+                                         const Vector<double> &) const;
+template void MeanValueFilter::vmult_add(BlockVector<float> &,
+                                         const BlockVector<float> &) const;
+template void MeanValueFilter::vmult_add(BlockVector<double> &,
+                                         const BlockVector<double> &) const;
+
+template class InverseMatrixRichardson<Vector<float> >;
+template class InverseMatrixRichardson<Vector<double> >;
+template class InverseMatrixRichardson<BlockVector<float> >;
+template class InverseMatrixRichardson<BlockVector<double> >;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/matrix_out.cc b/source/lac/matrix_out.cc
new file mode 100644
index 0000000..99a0037
--- /dev/null
+++ b/source/lac/matrix_out.cc
@@ -0,0 +1,51 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/matrix_out.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+MatrixOut::Options::Options (const bool         show_absolute_values,
+                             const unsigned int block_size,
+                             const bool         discontinuous)
+  :
+  show_absolute_values (show_absolute_values),
+  block_size (block_size),
+  discontinuous (discontinuous)
+{}
+
+
+
+MatrixOut::~MatrixOut ()
+{}
+
+
+
+const std::vector<MatrixOut::Patch> &
+MatrixOut::get_patches () const
+{
+  return patches;
+}
+
+
+
+std::vector<std::string>
+MatrixOut::get_dataset_names () const
+{
+  return std::vector<std::string>(1,name);
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/parallel_vector.cc b/source/lac/parallel_vector.cc
new file mode 100644
index 0000000..fd01c6d
--- /dev/null
+++ b/source/lac/parallel_vector.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_vector.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "parallel_vector.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/parallel_vector.inst.in b/source/lac/parallel_vector.inst.in
new file mode 100644
index 0000000..1291f9e
--- /dev/null
+++ b/source/lac/parallel_vector.inst.in
@@ -0,0 +1,39 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (SCALAR : REAL_SCALARS)
+{
+  namespace parallel
+  \{
+    namespace distributed
+    \{
+      template class Vector<SCALAR>;
+    \}
+  \}
+}
+
+for (S1, S2 : REAL_SCALARS)
+{
+  namespace parallel
+  \{
+    namespace distributed
+    \{
+      template void Vector<S1>::reinit<S2> (const Vector<S2>&,
+                                            const bool);
+    \}
+  \}
+}
diff --git a/source/lac/petsc_block_sparse_matrix.cc b/source/lac/petsc_block_sparse_matrix.cc
new file mode 100644
index 0000000..bbe6817
--- /dev/null
+++ b/source/lac/petsc_block_sparse_matrix.cc
@@ -0,0 +1,82 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  BlockSparseMatrix::BlockSparseMatrix ()
+  {}
+
+
+
+  BlockSparseMatrix::~BlockSparseMatrix ()
+  {}
+
+
+
+  BlockSparseMatrix &
+  BlockSparseMatrix::operator = (const BlockSparseMatrix &m)
+  {
+    BaseClass::operator = (m);
+
+    return *this;
+  }
+
+
+  void
+  BlockSparseMatrix::
+  reinit (const size_type n_block_rows,
+          const size_type n_block_columns)
+  {
+    // first delete previous content of
+    // the subobjects array
+    clear ();
+
+    // then resize. set sizes of blocks to
+    // zero. user will later have to call
+    // collect_sizes for this
+    this->sub_objects.reinit (n_block_rows,
+                              n_block_columns);
+    this->row_block_indices.reinit (n_block_rows, 0);
+    this->column_block_indices.reinit (n_block_columns, 0);
+
+    // and reinitialize the blocks
+    for (size_type r=0; r<this->n_block_rows(); ++r)
+      for (size_type c=0; c<this->n_block_cols(); ++c)
+        {
+          BlockType *p = new BlockType();
+          this->sub_objects[r][c] = p;
+        }
+  }
+
+
+
+  void
+  BlockSparseMatrix::collect_sizes ()
+  {
+    BaseClass::collect_sizes ();
+  }
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/source/lac/petsc_full_matrix.cc b/source/lac/petsc_full_matrix.cc
new file mode 100644
index 0000000..6ff013a
--- /dev/null
+++ b/source/lac/petsc_full_matrix.cc
@@ -0,0 +1,80 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_full_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+
+  FullMatrix::FullMatrix ()
+  {
+    // empty constructor generate an empty matrix
+    do_reinit (0, 0);
+  }
+
+  FullMatrix::FullMatrix (const size_type m,
+                          const size_type n)
+  {
+    do_reinit (m, n);
+  }
+
+  void
+  FullMatrix::reinit (const size_type m,
+                      const size_type n)
+  {
+    // get rid of old matrix and generate a
+    // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr = MatDestroy (matrix);
+#else
+    const int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    do_reinit (m, n);
+  }
+
+  void
+  FullMatrix::do_reinit (const size_type m,
+                         const size_type n)
+  {
+    // use the call sequence indicating only a maximal number of
+    // elements per row for all rows globally
+    const int ierr
+      = MatCreateSeqDense (PETSC_COMM_SELF, m, n, PETSC_NULL,
+                           &matrix);
+
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  const MPI_Comm &
+  FullMatrix::get_mpi_communicator () const
+  {
+    static const MPI_Comm communicator = MPI_COMM_SELF;
+    return communicator;
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_matrix_base.cc b/source/lac/petsc_matrix_base.cc
new file mode 100644
index 0000000..7f5b0d7
--- /dev/null
+++ b/source/lac/petsc_matrix_base.cc
@@ -0,0 +1,663 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_matrix_base.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_full_matrix.h>
+#  include <deal.II/lac/petsc_sparse_matrix.h>
+#  include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#  include <deal.II/lac/petsc_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  namespace MatrixIterators
+  {
+    void
+    MatrixBase::const_iterator::Accessor::
+    visit_present_row ()
+    {
+      // if we are asked to visit the
+      // past-the-end line, then simply
+      // release all our caches and go on
+      // with life
+      if (this->a_row == matrix->m())
+        {
+          colnum_cache.reset ();
+          value_cache.reset ();
+
+          return;
+        }
+
+      // get a representation of the present row
+      PetscInt           ncols;
+      const PetscInt    *colnums;
+      const PetscScalar *values;
+
+      int ierr;
+      (void)ierr;
+      ierr = MatGetRow(*matrix, this->a_row, &ncols, &colnums, &values);
+      AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+      // copy it into our caches if the line
+      // isn't empty. if it is, then we've
+      // done something wrong, since we
+      // shouldn't have initialized an
+      // iterator for an empty line (what
+      // would it point to?)
+      Assert (ncols != 0, ExcInternalError());
+      colnum_cache.reset (new std::vector<size_type> (colnums, colnums+ncols));
+      value_cache.reset (new std::vector<PetscScalar> (values, values+ncols));
+
+      // and finally restore the matrix
+      ierr = MatRestoreRow(*matrix, this->a_row, &ncols, &colnums, &values);
+      AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+    }
+  }
+
+
+
+  MatrixBase::MatrixBase ()
+    :
+    last_action (VectorOperation::unknown)
+  {}
+
+
+
+  MatrixBase::~MatrixBase ()
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr = MatDestroy (matrix);
+#else
+    const int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::clear ()
+  {
+    // destroy the matrix...
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    int ierr = MatDestroy (matrix);
+#else
+    int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+    // ...and replace it by an empty
+    // sequential matrix
+    const int m=0, n=0, n_nonzero_per_row=0;
+    ierr = MatCreateSeqAIJ(PETSC_COMM_SELF, m, n, n_nonzero_per_row,
+                           0, &matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  MatrixBase &
+  MatrixBase::operator = (const value_type d)
+  {
+    (void)d;
+    Assert (d==value_type(), ExcScalarAssignmentOnlyForZeroValue());
+
+    assert_is_compressed ();
+
+    const int ierr = MatZeroEntries (matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  void
+  MatrixBase::clear_row (const size_type   row,
+                         const PetscScalar new_diag_value)
+  {
+    assert_is_compressed ();
+
+    // now set all the entries of this row to zero
+    const PetscInt petsc_row = row;
+
+    IS index_set;
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISCreateGeneral (get_mpi_communicator(), 1, &petsc_row, &index_set);
+#else
+    ISCreateGeneral (get_mpi_communicator(), 1, &petsc_row, PETSC_COPY_VALUES, &index_set);
+#endif
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value);
+#else
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value, PETSC_NULL, PETSC_NULL);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISDestroy (index_set);
+#else
+    ISDestroy (&index_set);
+#endif
+  }
+
+
+
+  void
+  MatrixBase::clear_rows (const std::vector<size_type> &rows,
+                          const PetscScalar             new_diag_value)
+  {
+    assert_is_compressed ();
+
+    // now set all the entries of these rows
+    // to zero
+    const std::vector<PetscInt> petsc_rows (rows.begin(), rows.end());
+
+    // call the functions. note that we have
+    // to call them even if #rows is empty,
+    // since this is a collective operation
+    IS index_set;
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISCreateGeneral (get_mpi_communicator(), rows.size(),
+                     &petsc_rows[0], &index_set);
+#else
+    ISCreateGeneral (get_mpi_communicator(), rows.size(),
+                     &petsc_rows[0], PETSC_COPY_VALUES, &index_set);
+#endif
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value);
+#else
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value, PETSC_NULL, PETSC_NULL);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISDestroy (index_set);
+#else
+    ISDestroy (&index_set);
+#endif
+  }
+
+
+
+  PetscScalar
+  MatrixBase::el (const size_type i,
+                  const size_type j) const
+  {
+    PetscInt petsc_i = i, petsc_j = j;
+
+    PetscScalar value;
+
+    const int ierr
+      = MatGetValues (matrix, 1, &petsc_i, 1, &petsc_j,
+                      &value);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return value;
+  }
+
+
+
+  PetscScalar
+  MatrixBase::diag_element (const size_type i) const
+  {
+    Assert (m() == n(), ExcNotQuadratic());
+
+    // this doesn't seem to work any
+    // different than any other element
+    return el(i,i);
+  }
+
+
+
+  void
+  MatrixBase::compress (const VectorOperation::values operation)
+  {
+#ifdef DEBUG
+#ifdef DEAL_II_WITH_MPI
+    // Check that all processors agree that last_action is the same (or none!)
+
+    int my_int_last_action = last_action;
+    int all_int_last_action;
+
+    MPI_Allreduce(&my_int_last_action, &all_int_last_action, 1, MPI_INT,
+                  MPI_BOR, get_mpi_communicator());
+
+    AssertThrow(all_int_last_action != (VectorOperation::add | VectorOperation::insert),
+                ExcMessage("Error: not all processors agree on the last VectorOperation before this compress() call."));
+#endif
+#endif
+
+    AssertThrow(last_action == VectorOperation::unknown
+                || last_action == operation,
+                ExcMessage("Missing compress() or calling with wrong VectorOperation argument."));
+
+    // flush buffers
+    int ierr;
+    ierr = MatAssemblyBegin (matrix,MAT_FINAL_ASSEMBLY);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = MatAssemblyEnd (matrix,MAT_FINAL_ASSEMBLY);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    last_action = VectorOperation::unknown;
+  }
+
+
+
+  MatrixBase::size_type
+  MatrixBase::m () const
+  {
+    PetscInt n_rows, n_cols;
+
+    int ierr = MatGetSize (matrix, &n_rows, &n_cols);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return n_rows;
+  }
+
+
+
+  MatrixBase::size_type
+  MatrixBase::n () const
+  {
+    PetscInt n_rows, n_cols;
+
+    int ierr = MatGetSize (matrix, &n_rows, &n_cols);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return n_cols;
+  }
+
+
+
+  MatrixBase::size_type
+  MatrixBase::local_size () const
+  {
+    PetscInt n_rows, n_cols;
+
+    int ierr = MatGetLocalSize (matrix, &n_rows, &n_cols);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return n_rows;
+  }
+
+
+
+  std::pair<MatrixBase::size_type, MatrixBase::size_type>
+  MatrixBase::local_range () const
+  {
+    PetscInt begin, end;
+
+    const int ierr = MatGetOwnershipRange (static_cast<const Mat &>(matrix),
+                                           &begin, &end);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return std::make_pair (begin, end);
+  }
+
+
+
+  MatrixBase::size_type
+  MatrixBase::n_nonzero_elements () const
+  {
+    MatInfo mat_info;
+    const int ierr
+      = MatGetInfo (matrix, MAT_GLOBAL_SUM, &mat_info);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return static_cast<size_type>(mat_info.nz_used);
+  }
+
+
+
+  MatrixBase::size_type
+  MatrixBase::
+  row_length (const size_type row) const
+  {
+//TODO: this function will probably only work if compress() was called on the
+//matrix previously. however, we can't do this here, since it would impose
+//global communication and one would have to make sure that this function is
+//called the same number of times from all processors, something that is
+//unreasonable. there should simply be a way in PETSc to query the number of
+//entries in a row bypassing the call to compress(), but I can't find one
+    Assert (row < m(), ExcInternalError());
+
+    // get a representation of the present
+    // row
+    PetscInt ncols;
+    const PetscInt    *colnums;
+    const PetscScalar *values;
+
+//TODO: this is probably horribly inefficient; we should lobby for a way to
+//query this information from PETSc
+    int ierr;
+    ierr = MatGetRow(*this, row, &ncols, &colnums, &values);
+    AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+    // then restore the matrix and return the number of columns in this row as
+    // queried previously. Starting with PETSc 3.4, MatRestoreRow actually
+    // resets the last three arguments to zero/NULL, to avoid abuse of pointers
+    // now dangling. as a consequence, we need to save the size of the array
+    // and return the saved value.
+    const PetscInt ncols_saved = ncols;
+    ierr = MatRestoreRow(*this, row, &ncols, &colnums, &values);
+    AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+    return ncols_saved;
+  }
+
+
+  PetscReal
+  MatrixBase::l1_norm () const
+  {
+    PetscReal result;
+
+    const int ierr
+      = MatNorm (matrix, NORM_1, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+
+
+
+  PetscReal
+  MatrixBase::linfty_norm () const
+  {
+    PetscReal result;
+
+    const int ierr
+      = MatNorm (matrix, NORM_INFINITY, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+
+
+
+  PetscReal
+  MatrixBase::frobenius_norm () const
+  {
+    PetscReal result;
+
+    const int ierr
+      = MatNorm (matrix, NORM_FROBENIUS, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+
+
+  PetscScalar
+  MatrixBase::matrix_norm_square (const VectorBase &v) const
+  {
+    Vector tmp(v.size());
+    vmult (tmp, v);
+    return tmp*v;
+  }
+
+
+  PetscScalar
+  MatrixBase::matrix_scalar_product (const VectorBase &u,
+                                     const VectorBase &v) const
+  {
+    Vector tmp(v.size());
+    vmult (tmp, v);
+    return u*tmp;
+  }
+
+
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+  PetscScalar
+  MatrixBase::trace () const
+  {
+    PetscScalar result;
+
+    const int ierr
+      = MatGetTrace (matrix, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+#endif
+
+
+
+  MatrixBase &
+  MatrixBase::operator *= (const PetscScalar a)
+  {
+    const int ierr = MatScale (matrix, a);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  MatrixBase &
+  MatrixBase::operator /= (const PetscScalar a)
+  {
+    const PetscScalar factor = 1./a;
+    const int ierr = MatScale (matrix, factor);
+
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+  MatrixBase &
+  MatrixBase::add (const MatrixBase &other,
+                   const PetscScalar factor)
+  {
+    const int ierr = MatAXPY (matrix, factor,
+                              other, DIFFERENT_NONZERO_PATTERN);
+    (void)ierr;
+
+    Assert (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+  void
+  MatrixBase::vmult (VectorBase       &dst,
+                     const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMult (matrix, src, dst);
+    (void)ierr;
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::Tvmult (VectorBase       &dst,
+                      const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMultTranspose (matrix, src, dst);
+    (void)ierr;
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::vmult_add (VectorBase       &dst,
+                         const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMultAdd (matrix, src, dst, dst);
+    (void)ierr;
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::Tvmult_add (VectorBase       &dst,
+                          const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMultTransposeAdd (matrix, src, dst, dst);
+    (void)ierr;
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  PetscScalar
+  MatrixBase::residual (VectorBase       &dst,
+                        const VectorBase &x,
+                        const VectorBase &b) const
+  {
+    // avoid the use of a temporary, and
+    // rather do one negation pass more than
+    // necessary
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1;
+
+    return dst.l2_norm();
+  }
+
+
+
+  MatrixBase::operator Mat () const
+  {
+    return matrix;
+  }
+
+  void
+  MatrixBase::transpose ()
+  {
+    int ierr = MatTranspose(matrix, MAT_REUSE_MATRIX, &matrix);
+    (void)ierr;
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+  PetscTruth
+#else
+  PetscBool
+#endif
+  MatrixBase::is_symmetric (const double tolerance)
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+    PetscBool
+#endif
+    truth;
+    assert_is_compressed ();
+    MatIsSymmetric (matrix, tolerance, &truth);
+    return truth;
+  }
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+  PetscTruth
+#else
+  PetscBool
+#endif
+  MatrixBase::is_hermitian (const double tolerance)
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+    PetscBool
+#endif
+    truth;
+
+    assert_is_compressed ();
+    MatIsHermitian (matrix, tolerance, &truth);
+
+    return truth;
+  }
+
+  void
+  MatrixBase::write_ascii (const PetscViewerFormat format)
+  {
+    assert_is_compressed ();
+
+    // Set options
+    PetscViewerSetFormat (PETSC_VIEWER_STDOUT_WORLD,
+                          format);
+
+    // Write to screen
+    MatView (matrix, PETSC_VIEWER_STDOUT_WORLD);
+  }
+
+  void
+  MatrixBase::print (std::ostream &out,
+                     const bool    /*alternative_output*/) const
+  {
+    std::pair<MatrixBase::size_type, MatrixBase::size_type>
+    loc_range = local_range();
+
+    PetscInt ncols;
+    const PetscInt    *colnums;
+    const PetscScalar *values;
+
+    MatrixBase::size_type row;
+    for (row = loc_range.first; row < loc_range.second; ++row)
+      {
+        int ierr = MatGetRow(*this, row, &ncols, &colnums, &values);
+        (void)ierr;
+        AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+        for (PetscInt col = 0; col < ncols; ++col)
+          {
+            out << "(" << row << "," << colnums[col] << ") " << values[col] << std::endl;
+          }
+
+        ierr = MatRestoreRow(*this, row, &ncols, &colnums, &values);
+        AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+      }
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  std::size_t
+  MatrixBase::memory_consumption() const
+  {
+    MatInfo info;
+    MatGetInfo(matrix, MAT_LOCAL, &info);
+
+    return sizeof(*this) + static_cast<size_type>(info.memory);
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_matrix_free.cc b/source/lac/petsc_matrix_free.cc
new file mode 100644
index 0000000..9f2af4c
--- /dev/null
+++ b/source/lac/petsc_matrix_free.cc
@@ -0,0 +1,291 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/petsc_matrix_free.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  MatrixFree::MatrixFree ()
+    : communicator (PETSC_COMM_SELF)
+  {
+    const int m=0;
+    do_reinit (m, m, m, m);
+  }
+
+
+
+  MatrixFree::MatrixFree (const MPI_Comm     &communicator,
+                          const unsigned int  m,
+                          const unsigned int  n,
+                          const unsigned int  local_rows,
+                          const unsigned int  local_columns)
+    : communicator (communicator)
+  {
+    do_reinit (m, n, local_rows, local_columns);
+  }
+
+
+
+  MatrixFree::MatrixFree (const MPI_Comm     &communicator,
+                          const unsigned int  m,
+                          const unsigned int  n,
+                          const std::vector<unsigned int> &local_rows_per_process,
+                          const std::vector<unsigned int> &local_columns_per_process,
+                          const unsigned int  this_process)
+    : communicator (communicator)
+  {
+    Assert (local_rows_per_process.size() == local_columns_per_process.size(),
+            ExcDimensionMismatch (local_rows_per_process.size(),
+                                  local_columns_per_process.size()));
+    Assert (this_process < local_rows_per_process.size(),
+            ExcInternalError());
+
+    do_reinit (m, n,
+               local_rows_per_process[this_process],
+               local_columns_per_process[this_process]);
+  }
+
+
+
+  MatrixFree::MatrixFree (const unsigned int  m,
+                          const unsigned int  n,
+                          const unsigned int  local_rows,
+                          const unsigned int  local_columns)
+    : communicator (MPI_COMM_WORLD)
+  {
+    do_reinit (m, n, local_rows, local_columns);
+  }
+
+
+
+  MatrixFree::MatrixFree (const unsigned int  m,
+                          const unsigned int  n,
+                          const std::vector<unsigned int> &local_rows_per_process,
+                          const std::vector<unsigned int> &local_columns_per_process,
+                          const unsigned int  this_process)
+    : communicator (MPI_COMM_WORLD)
+  {
+    Assert (local_rows_per_process.size() == local_columns_per_process.size(),
+            ExcDimensionMismatch (local_rows_per_process.size(),
+                                  local_columns_per_process.size()));
+    Assert (this_process < local_rows_per_process.size(),
+            ExcInternalError());
+
+    do_reinit (m, n,
+               local_rows_per_process[this_process],
+               local_columns_per_process[this_process]);
+  }
+
+
+
+  void MatrixFree::reinit (const MPI_Comm     &communicator,
+                           const unsigned int  m,
+                           const unsigned int  n,
+                           const unsigned int  local_rows,
+                           const unsigned int  local_columns)
+  {
+    this->communicator = communicator;
+
+    // destroy the matrix and
+    // generate a new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    int ierr = MatDestroy (matrix);
+#else
+    int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    do_reinit (m, n, local_rows, local_columns);
+  }
+
+
+
+  void MatrixFree::reinit (const MPI_Comm     &communicator,
+                           const unsigned int  m,
+                           const unsigned int  n,
+                           const std::vector<unsigned int> &local_rows_per_process,
+                           const std::vector<unsigned int> &local_columns_per_process,
+                           const unsigned int  this_process)
+  {
+    Assert (local_rows_per_process.size() == local_columns_per_process.size(),
+            ExcDimensionMismatch (local_rows_per_process.size(),
+                                  local_columns_per_process.size()));
+    Assert (this_process < local_rows_per_process.size(),
+            ExcInternalError());
+
+    this->communicator = communicator;
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    int ierr = MatDestroy (matrix);
+#else
+    int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    do_reinit (m, n,
+               local_rows_per_process[this_process],
+               local_columns_per_process[this_process]);
+  }
+
+
+
+  void MatrixFree::reinit (const unsigned int  m,
+                           const unsigned int  n,
+                           const unsigned int  local_rows,
+                           const unsigned int  local_columns)
+  {
+    reinit (MPI_COMM_WORLD, m, n, local_rows, local_columns);
+  }
+
+
+
+  void MatrixFree::reinit (const unsigned int  m,
+                           const unsigned int  n,
+                           const std::vector<unsigned int> &local_rows_per_process,
+                           const std::vector<unsigned int> &local_columns_per_process,
+                           const unsigned int  this_process)
+  {
+    reinit (MPI_COMM_WORLD, m, n, local_rows_per_process, local_columns_per_process, this_process);
+  }
+
+
+
+  void MatrixFree::clear ()
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    int ierr = MatDestroy (matrix);
+#else
+    int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    const int m=0;
+    do_reinit (m, m, m, m);
+  }
+
+
+
+  void MatrixFree::vmult (Vec  &dst, const Vec  &src) const
+  {
+
+//TODO: Translate the given PETSc Vec* vector into a deal.II
+// vector so we can call the vmult function with the usual
+// interface; then convert back. This could be much more
+// efficient, if the PETScWrappers::*::Vector classes
+// had a way to simply generate such a vector object from
+// a given PETSc Vec* object without allocating new memory
+// and without taking ownership of the Vec*
+
+    VectorBase  *x = 0;
+    VectorBase  *y = 0;
+    // because we do not know,
+    // if dst and src are sequential
+    // or distributed vectors,
+    // we ask for the vector-type
+    // and reinit x and y with
+    // dealii::PETScWrappers::*::Vector:
+    const char  *vec_type;
+    int ierr = VecGetType (src, &vec_type);
+
+    PetscInt  local_size;
+    ierr = VecGetLocalSize (src, &local_size);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    if (strcmp(vec_type,"mpi") == 0)
+      {
+        PetscInt  size;
+        ierr = VecGetSize (src, &size);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        x = new PETScWrappers::MPI::Vector (this->get_mpi_communicator (), size, local_size);
+        y = new PETScWrappers::MPI::Vector (this->get_mpi_communicator (), size, local_size);
+      }
+    else if (strcmp(vec_type,"seq") == 0)
+      {
+        x = new PETScWrappers::Vector (local_size);
+        y = new PETScWrappers::Vector (local_size);
+      }
+    else
+      AssertThrow (false, ExcMessage("PETScWrappers::MPI::MatrixFree::do_matrix_vector_action: "
+                                     "This only works for Petsc Vec Type = VECMPI | VECSEQ"));
+
+    // copy src to x
+    x->equ(1., PETScWrappers::VectorBase(src));
+    // and call vmult(x,y) which must
+    // be reimplemented in derived classes
+    vmult (*y, *x);
+
+    // copy the result back to dst
+    ierr = VecCopy (static_cast<const Vec &>(*y), dst);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    delete (x);
+    delete (y);
+  }
+
+
+
+  int MatrixFree::matrix_free_mult (Mat  A, Vec  src, Vec  dst)
+  {
+    // create a pointer to this MatrixFree
+    // object and link the given matrix A
+    // to the matrix-vector multiplication
+    // of this MatrixFree object,
+    void  *this_object;
+    int ierr = MatShellGetContext (A, &this_object);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // call vmult of this object:
+    reinterpret_cast<MatrixFree *>(this_object)->vmult (dst, src);
+
+    return (0);
+  }
+
+
+
+  void MatrixFree::do_reinit (const unsigned int  m,
+                              const unsigned int  n,
+                              const unsigned int  local_rows,
+                              const unsigned int  local_columns)
+  {
+    Assert (local_rows <= m, ExcDimensionMismatch (local_rows, m));
+    Assert (local_columns <= n, ExcDimensionMismatch (local_columns, n));
+
+    int ierr;
+    // create a PETSc MatShell matrix-type
+    // object of dimension m x n and local size
+    // local_rows x local_columns
+    ierr = MatCreateShell(communicator, local_rows, local_columns, m, n, (void *)this, &matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+    // register the MatrixFree::matrix_free_mult function
+    // as the matrix multiplication used by this matrix
+    ierr = MatShellSetOperation (matrix, MATOP_MULT,
+                                 (void( *)(void))&dealii::PETScWrappers::MatrixFree::matrix_free_mult);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = MatSetFromOptions (matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_parallel_block_sparse_matrix.cc b/source/lac/petsc_parallel_block_sparse_matrix.cc
new file mode 100644
index 0000000..3c3555f
--- /dev/null
+++ b/source/lac/petsc_parallel_block_sparse_matrix.cc
@@ -0,0 +1,144 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  namespace MPI
+  {
+
+    BlockSparseMatrix::BlockSparseMatrix ()
+    {}
+
+
+    BlockSparseMatrix::~BlockSparseMatrix ()
+    {}
+
+
+    BlockSparseMatrix &
+    BlockSparseMatrix::operator = (const BlockSparseMatrix &m)
+    {
+      BaseClass::operator = (m);
+
+      return *this;
+    }
+
+
+    void
+    BlockSparseMatrix::
+    reinit (const size_type n_block_rows,
+            const size_type n_block_columns)
+    {
+      // first delete previous content of
+      // the subobjects array
+      clear ();
+
+      // then resize. set sizes of blocks to
+      // zero. user will later have to call
+      // collect_sizes for this
+      this->sub_objects.reinit (n_block_rows,
+                                n_block_columns);
+      this->row_block_indices.reinit (n_block_rows, 0);
+      this->column_block_indices.reinit (n_block_columns, 0);
+
+      // and reinitialize the blocks
+      for (size_type r=0; r<this->n_block_rows(); ++r)
+        for (size_type c=0; c<this->n_block_cols(); ++c)
+          {
+            BlockType *p = new BlockType();
+            this->sub_objects[r][c] = p;
+          }
+    }
+
+    void
+    BlockSparseMatrix::
+    reinit(const std::vector<IndexSet> &rows,
+           const std::vector<IndexSet> &cols,
+           const BlockDynamicSparsityPattern &bdsp,
+           const MPI_Comm &com)
+    {
+      Assert(rows.size() == bdsp.n_block_rows(), ExcMessage("invalid size"));
+      Assert(cols.size() == bdsp.n_block_cols(), ExcMessage("invalid size"));
+
+
+      clear();
+      this->sub_objects.reinit (bdsp.n_block_rows(),
+                                bdsp.n_block_cols());
+
+      std::vector<types::global_dof_index> row_sizes;
+      for (unsigned int r=0; r<bdsp.n_block_rows(); ++r)
+        row_sizes.push_back( bdsp.block(r,0).n_rows() );
+      this->row_block_indices.reinit (row_sizes);
+
+      std::vector<types::global_dof_index> col_sizes;
+      for (unsigned int c=0; c<bdsp.n_block_cols(); ++c)
+        col_sizes.push_back( bdsp.block(0,c).n_cols() );
+      this->column_block_indices.reinit (col_sizes);
+
+      for (unsigned int r=0; r<this->n_block_rows(); ++r)
+        for (unsigned int c=0; c<this->n_block_cols(); ++c)
+          {
+            Assert(rows[r].size() == bdsp.block(r,c).n_rows(), ExcMessage("invalid size"));
+            Assert(cols[c].size() == bdsp.block(r,c).n_cols(), ExcMessage("invalid size"));
+
+            BlockType *p = new BlockType();
+            p->reinit(rows[r],
+                      cols[c],
+                      bdsp.block(r,c),
+                      com);
+            this->sub_objects[r][c] = p;
+          }
+
+      collect_sizes();
+    }
+
+    void
+    BlockSparseMatrix::
+    reinit(const std::vector<IndexSet> &sizes,
+           const BlockDynamicSparsityPattern &bdsp,
+           const MPI_Comm &com)
+    {
+      reinit(sizes, sizes, bdsp, com);
+    }
+
+
+
+    void
+    BlockSparseMatrix::collect_sizes ()
+    {
+      BaseClass::collect_sizes ();
+    }
+
+
+
+    const MPI_Comm &
+    BlockSparseMatrix::get_mpi_communicator () const
+    {
+      return block(0,0).get_mpi_communicator();
+    }
+
+  }
+}
+
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/source/lac/petsc_parallel_block_vector.cc b/source/lac/petsc_parallel_block_vector.cc
new file mode 100644
index 0000000..5c66680
--- /dev/null
+++ b/source/lac/petsc_parallel_block_vector.cc
@@ -0,0 +1,64 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  namespace MPI
+  {
+    typedef types::global_dof_index size_type;
+
+    BlockVector &
+    BlockVector::operator = (const PETScWrappers::BlockVector &v)
+    {
+      this->block_indices = v.get_block_indices();
+
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->block(i) = v.block(i);
+
+      return *this;
+    }
+
+
+    void
+    BlockVector::reinit (const unsigned int num_blocks)
+    {
+      std::vector<size_type> block_sizes (num_blocks, 0);
+      this->block_indices.reinit (block_sizes);
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        components[i].reinit (MPI_COMM_SELF, 0, 0);
+
+      collect_sizes();
+    }
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_parallel_sparse_matrix.cc b/source/lac/petsc_parallel_sparse_matrix.cc
new file mode 100644
index 0000000..6aff059
--- /dev/null
+++ b/source/lac/petsc_parallel_sparse_matrix.cc
@@ -0,0 +1,899 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/sparsity_pattern.h>
+#  include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  namespace MPI
+  {
+
+    SparseMatrix::SparseMatrix ()
+    {
+      // just like for vectors: since we
+      // create an empty matrix, we can as
+      // well make it sequential
+      const int m=0, n=0, n_nonzero_per_row=0;
+      const int ierr
+        = MatCreateSeqAIJ(PETSC_COMM_SELF, m, n, n_nonzero_per_row,
+                          0, &matrix);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+    }
+
+
+    SparseMatrix::~SparseMatrix ()
+    {
+      int ierr;
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      ierr = MatDestroy (matrix);
+#else
+      ierr = MatDestroy (&matrix);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+    }
+
+    SparseMatrix::SparseMatrix (const MPI_Comm  &communicator,
+                                const size_type  m,
+                                const size_type  n,
+                                const size_type  local_rows,
+                                const size_type  local_columns,
+                                const size_type  n_nonzero_per_row,
+                                const bool       is_symmetric,
+                                const size_type  n_offdiag_nonzero_per_row)
+      :
+      communicator (communicator)
+    {
+      do_reinit (m, n, local_rows, local_columns,
+                 n_nonzero_per_row, is_symmetric,
+                 n_offdiag_nonzero_per_row);
+    }
+
+
+
+    SparseMatrix::SparseMatrix (const MPI_Comm               &communicator,
+                                const size_type               m,
+                                const size_type               n,
+                                const size_type               local_rows,
+                                const size_type               local_columns,
+                                const std::vector<size_type> &row_lengths,
+                                const bool                    is_symmetric,
+                                const std::vector<size_type> &offdiag_row_lengths)
+      :
+      communicator (communicator)
+    {
+      do_reinit (m, n, local_rows, local_columns,
+                 row_lengths, is_symmetric, offdiag_row_lengths);
+    }
+
+
+
+    template <typename SparsityPatternType>
+    SparseMatrix::
+    SparseMatrix (const MPI_Comm               &communicator,
+                  const SparsityPatternType    &sparsity_pattern,
+                  const std::vector<size_type> &local_rows_per_process,
+                  const std::vector<size_type> &local_columns_per_process,
+                  const unsigned int            this_process,
+                  const bool                    preset_nonzero_locations)
+      :
+      communicator (communicator)
+    {
+      do_reinit (sparsity_pattern, local_rows_per_process,
+                 local_columns_per_process, this_process,
+                 preset_nonzero_locations);
+    }
+
+
+    void
+    SparseMatrix::
+    reinit (const SparseMatrix &other)
+    {
+      if (&other == this)
+        return;
+
+      this->communicator = other.communicator;
+
+      int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      ierr = MatDestroy (matrix);
+#else
+      ierr = MatDestroy (&matrix);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = MatDuplicate(other.matrix, MAT_DO_NOT_COPY_VALUES, &matrix);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+    }
+
+
+    SparseMatrix &
+    SparseMatrix::operator = (const value_type d)
+    {
+      MatrixBase::operator = (d);
+      return *this;
+    }
+
+    void
+    SparseMatrix::copy_from (const SparseMatrix &other)
+    {
+      if (&other == this)
+        return;
+
+      this->communicator = other.communicator;
+
+      int ierr = MatCopy(other.matrix, matrix, SAME_NONZERO_PATTERN);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+    }
+
+    void
+    SparseMatrix::reinit (const MPI_Comm  &communicator,
+                          const size_type  m,
+                          const size_type  n,
+                          const size_type  local_rows,
+                          const size_type  local_columns,
+                          const size_type  n_nonzero_per_row,
+                          const bool       is_symmetric,
+                          const size_type  n_offdiag_nonzero_per_row)
+    {
+      this->communicator = communicator;
+
+      // get rid of old matrix and generate a
+      // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      const int ierr = MatDestroy (matrix);
+#else
+      const int ierr = MatDestroy (&matrix);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      do_reinit (m, n, local_rows, local_columns,
+                 n_nonzero_per_row, is_symmetric,
+                 n_offdiag_nonzero_per_row);
+    }
+
+
+
+    void
+    SparseMatrix::reinit (const MPI_Comm               &communicator,
+                          const size_type               m,
+                          const size_type               n,
+                          const size_type               local_rows,
+                          const size_type               local_columns,
+                          const std::vector<size_type> &row_lengths,
+                          const bool                    is_symmetric,
+                          const std::vector<size_type> &offdiag_row_lengths)
+    {
+      this->communicator = communicator;
+
+      // get rid of old matrix and generate a
+      // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      const int ierr = MatDestroy (matrix);
+#else
+      const int ierr = MatDestroy (&matrix);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      do_reinit (m, n, local_rows, local_columns,
+                 row_lengths, is_symmetric, offdiag_row_lengths);
+    }
+
+
+
+    template <typename SparsityPatternType>
+    void
+    SparseMatrix::
+    reinit (const MPI_Comm               &communicator,
+            const SparsityPatternType    &sparsity_pattern,
+            const std::vector<size_type> &local_rows_per_process,
+            const std::vector<size_type> &local_columns_per_process,
+            const unsigned int            this_process,
+            const bool                    preset_nonzero_locations)
+    {
+      this->communicator = communicator;
+
+      // get rid of old matrix and generate a
+      // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      const int ierr = MatDestroy (matrix);
+#else
+      const int ierr = MatDestroy (&matrix);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      do_reinit (sparsity_pattern, local_rows_per_process,
+                 local_columns_per_process, this_process,
+                 preset_nonzero_locations);
+    }
+
+    template <typename SparsityPatternType>
+    void
+    SparseMatrix::
+    reinit (const IndexSet            &local_rows,
+            const IndexSet            &local_columns,
+            const SparsityPatternType &sparsity_pattern,
+            const MPI_Comm            &communicator)
+    {
+      this->communicator = communicator;
+
+      // get rid of old matrix and generate a
+      // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      const int ierr = MatDestroy (matrix);
+#else
+      const int ierr = MatDestroy (&matrix);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      do_reinit (local_rows, local_columns, sparsity_pattern);
+    }
+
+    void
+    SparseMatrix::do_reinit (const size_type m,
+                             const size_type n,
+                             const size_type local_rows,
+                             const size_type local_columns,
+                             const size_type n_nonzero_per_row,
+                             const bool      is_symmetric,
+                             const size_type n_offdiag_nonzero_per_row)
+    {
+      Assert (local_rows <= m, ExcLocalRowsTooLarge (local_rows, m));
+
+      // use the call sequence indicating only
+      // a maximal number of elements per row
+      // for all rows globally
+      int ierr;
+
+#if DEAL_II_PETSC_VERSION_LT(3,3,0)
+      ierr
+        = MatCreateMPIAIJ (communicator,
+                           local_rows, local_columns,
+                           m, n,
+                           n_nonzero_per_row, 0,
+                           n_offdiag_nonzero_per_row, 0,
+                           &matrix);
+#else
+      ierr
+        = MatCreateAIJ (communicator,
+                        local_rows, local_columns,
+                        m, n,
+                        n_nonzero_per_row, 0,
+                        n_offdiag_nonzero_per_row, 0,
+                        &matrix);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = MatSetOption (matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      // set symmetric flag, if so requested
+      if (is_symmetric == true)
+        {
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+          const int ierr
+            = MatSetOption (matrix, MAT_SYMMETRIC);
+#else
+          const int ierr
+            = MatSetOption (matrix, MAT_SYMMETRIC, PETSC_TRUE);
+#endif
+
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+        }
+    }
+
+
+
+    void
+    SparseMatrix::do_reinit (const size_type m,
+                             const size_type n,
+                             const size_type local_rows,
+                             const size_type local_columns,
+                             const std::vector<size_type> &row_lengths,
+                             const bool      is_symmetric,
+                             const std::vector<size_type> &offdiag_row_lengths)
+    {
+      Assert (local_rows <= m, ExcLocalRowsTooLarge (local_rows, m));
+
+      Assert (row_lengths.size() == m,
+              ExcDimensionMismatch (row_lengths.size(), m));
+
+      // For the case that
+      // local_columns is smaller
+      // than one of the row lengths
+      // MatCreateMPIAIJ throws an
+      // error. In this case use a
+      // PETScWrappers::SparseMatrix
+      for (size_type i=0; i<row_lengths.size(); ++i)
+        Assert(row_lengths[i]<=local_columns,
+               ExcIndexRange(row_lengths[i], 1, local_columns+1));
+
+      // use the call sequence indicating a
+      // maximal number of elements for each
+      // row individually. annoyingly, we
+      // always use unsigned ints for cases
+      // like this, while PETSc wants to see
+      // signed integers. so we have to
+      // convert, unless we want to play dirty
+      // tricks with conversions of pointers
+      const std::vector<PetscInt> int_row_lengths (row_lengths.begin(),
+                                                   row_lengths.end());
+      const std::vector<PetscInt> int_offdiag_row_lengths (offdiag_row_lengths.begin(),
+                                                           offdiag_row_lengths.end());
+
+//TODO: There must be a significantly better way to provide information about the off-diagonal blocks of the matrix. this way, petsc keeps allocating tiny chunks of memory, and gets completely hung up over this
+
+      int ierr;
+
+#if DEAL_II_PETSC_VERSION_LT(3,3,0)
+      ierr
+        = MatCreateMPIAIJ (communicator,
+                           local_rows, local_columns,
+                           m, n,
+                           0, &int_row_lengths[0],
+                           0, offdiag_row_lengths.size() ? &int_offdiag_row_lengths[0] : 0,
+                           &matrix);
+#else
+      ierr
+        = MatCreateAIJ (communicator,
+                        local_rows, local_columns,
+                        m, n,
+                        0, &int_row_lengths[0],
+                        0, offdiag_row_lengths.size() ? &int_offdiag_row_lengths[0] : 0,
+                        &matrix);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+//TODO: Sometimes the actual number of nonzero entries allocated is greater than the number of nonzero entries, which petsc will complain about unless explicitly disabled with MatSetOption. There is probably a way to prevent a different number nonzero elements being allocated in the first place. (See also previous TODO).
+
+      ierr = MatSetOption (matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      // set symmetric flag, if so requested
+      if (is_symmetric == true)
+        {
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+          const int ierr
+            = MatSetOption (matrix, MAT_SYMMETRIC);
+#else
+          const int ierr
+            = MatSetOption (matrix, MAT_SYMMETRIC, PETSC_TRUE);
+#endif
+
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+        }
+    }
+
+
+    template <typename SparsityPatternType>
+    void
+    SparseMatrix::
+    do_reinit (const IndexSet            &local_rows,
+               const IndexSet            &local_columns,
+               const SparsityPatternType &sparsity_pattern)
+    {
+      Assert(sparsity_pattern.n_rows()==local_rows.size(),
+             ExcMessage("SparsityPattern and IndexSet have different number of rows"));
+      Assert(sparsity_pattern.n_cols()==local_columns.size(),
+             ExcMessage("SparsityPattern and IndexSet have different number of columns"));
+      Assert(local_rows.is_contiguous() && local_columns.is_contiguous(),
+             ExcMessage("PETSc only supports contiguous row/column ranges"));
+
+#ifdef DEBUG
+      {
+        // check indexsets
+        types::global_dof_index row_owners = Utilities::MPI::sum(local_rows.n_elements(), communicator);
+        types::global_dof_index col_owners = Utilities::MPI::sum(local_columns.n_elements(), communicator);
+        Assert(row_owners == sparsity_pattern.n_rows(),
+               ExcMessage(std::string("Each row has to be owned by exactly one owner (n_rows()=")
+                          + Utilities::to_string(sparsity_pattern.n_rows())
+                          + " but sum(local_rows.n_elements())="
+                          + Utilities::to_string(row_owners)
+                          + ")"));
+        Assert(col_owners == sparsity_pattern.n_cols(),
+               ExcMessage(std::string("Each column has to be owned by exactly one owner (n_cols()=")
+                          + Utilities::to_string(sparsity_pattern.n_cols())
+                          + " but sum(local_columns.n_elements())="
+                          + Utilities::to_string(col_owners)
+                          + ")"));
+      }
+#endif
+
+
+      // create the matrix. We do not set row length but set the
+      // correct SparsityPattern later.
+      int ierr;
+
+      ierr = MatCreate(communicator,&matrix);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = MatSetSizes(matrix,
+                         local_rows.n_elements(),
+                         local_columns.n_elements(),
+                         sparsity_pattern.n_rows(),
+                         sparsity_pattern.n_cols());
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = MatSetType(matrix,MATMPIAIJ);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+
+      // next preset the exact given matrix
+      // entries with zeros. this doesn't avoid any
+      // memory allocations, but it at least
+      // avoids some searches later on. the
+      // key here is that we can use the
+      // matrix set routines that set an
+      // entire row at once, not a single
+      // entry at a time
+      //
+      // for the usefulness of this option
+      // read the documentation of this
+      // class.
+      //if (preset_nonzero_locations == true)
+      if (local_rows.n_elements()>0)
+        {
+          Assert(local_columns.n_elements()>0, ExcInternalError());
+          // MatMPIAIJSetPreallocationCSR
+          // can be used to allocate the sparsity
+          // pattern of a matrix
+
+          const PetscInt local_row_start = local_rows.nth_index_in_set(0);
+          const PetscInt
+          local_row_end = local_row_start + local_rows.n_elements();
+
+
+          // first set up the column number
+          // array for the rows to be stored
+          // on the local processor. have one
+          // dummy entry at the end to make
+          // sure petsc doesn't read past the
+          // end
+          std::vector<PetscInt>
+
+          rowstart_in_window (local_row_end - local_row_start + 1, 0),
+                             colnums_in_window;
+          {
+            unsigned int n_cols = 0;
+            for (PetscInt i=local_row_start; i<local_row_end; ++i)
+              {
+                const PetscInt row_length = sparsity_pattern.row_length(i);
+                rowstart_in_window[i+1-local_row_start]
+                  = rowstart_in_window[i-local_row_start] + row_length;
+                n_cols += row_length;
+              }
+            colnums_in_window.resize (n_cols+1, -1);
+          }
+
+          // now copy over the information
+          // from the sparsity pattern.
+          {
+            PetscInt *ptr = & colnums_in_window[0];
+            for (PetscInt i=local_row_start; i<local_row_end; ++i)
+              for (typename SparsityPatternType::iterator p=sparsity_pattern.begin(i);
+                   p != sparsity_pattern.end(i); ++p, ++ptr)
+                *ptr = p->column();
+          }
+
+
+          // then call the petsc function
+          // that summarily allocates these
+          // entries:
+          MatMPIAIJSetPreallocationCSR (matrix,
+                                        &rowstart_in_window[0],
+                                        &colnums_in_window[0],
+                                        0);
+        }
+      else
+        {
+          PetscInt i=0;
+          MatMPIAIJSetPreallocationCSR (matrix,
+                                        &i,
+                                        &i,
+                                        0);
+
+
+        }
+      compress (dealii::VectorOperation::insert);
+
+      {
+
+        // Tell PETSc that we are not
+        // planning on adding new entries
+        // to the matrix. Generate errors
+        // in debug mode.
+        int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+#ifdef DEBUG
+        ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATION_ERR);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+        ierr = MatSetOption (matrix, MAT_NO_NEW_NONZERO_LOCATIONS);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+#else
+#ifdef DEBUG
+        ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+        ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+#endif
+
+        // Tell PETSc to keep the
+        // SparsityPattern entries even if
+        // we delete a row with
+        // clear_rows() which calls
+        // MatZeroRows(). Otherwise one can
+        // not write into that row
+        // afterwards.
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+        ierr = MatSetOption (matrix, MAT_KEEP_ZEROED_ROWS);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#elif DEAL_II_PETSC_VERSION_LT(3,1,0)
+        ierr = MatSetOption (matrix, MAT_KEEP_ZEROED_ROWS, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+        ierr = MatSetOption (matrix, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+
+      }
+
+    }
+
+
+    template <typename SparsityPatternType>
+    void
+    SparseMatrix::
+    do_reinit (const SparsityPatternType    &sparsity_pattern,
+               const std::vector<size_type> &local_rows_per_process,
+               const std::vector<size_type> &local_columns_per_process,
+               const unsigned int            this_process,
+               const bool                    preset_nonzero_locations)
+    {
+      Assert (local_rows_per_process.size() == local_columns_per_process.size(),
+              ExcDimensionMismatch (local_rows_per_process.size(),
+                                    local_columns_per_process.size()));
+      Assert (this_process < local_rows_per_process.size(),
+              ExcInternalError());
+      assert_is_compressed ();
+
+      // for each row that we own locally, we
+      // have to count how many of the
+      // entries in the sparsity pattern lie
+      // in the column area we have locally,
+      // and how many arent. for this, we
+      // first have to know which areas are
+      // ours
+      size_type local_row_start = 0;
+      size_type local_col_start = 0;
+      for (unsigned int p=0; p<this_process; ++p)
+        {
+          local_row_start += local_rows_per_process[p];
+          local_col_start += local_columns_per_process[p];
+        }
+      const size_type
+      local_row_end = local_row_start + local_rows_per_process[this_process];
+
+#if DEAL_II_PETSC_VERSION_LT(2,3,3)
+      //old version to create the matrix, we
+      //can skip calculating the row length
+      //at least starting from 2.3.3 (tested,
+      //see below)
+
+      const size_type
+      local_col_end = local_col_start + local_columns_per_process[this_process];
+
+      // then count the elements in- and
+      // out-of-window for the rows we own
+      std::vector<PetscInt>
+
+      row_lengths_in_window (local_row_end - local_row_start),
+                            row_lengths_out_of_window (local_row_end - local_row_start);
+      for (size_type row = local_row_start; row<local_row_end; ++row)
+        for (size_type c=0; c<sparsity_pattern.row_length(row); ++c)
+          {
+            const size_type column = sparsity_pattern.column_number(row,c);
+
+            if ((column >= local_col_start) &&
+                (column < local_col_end))
+              ++row_lengths_in_window[row-local_row_start];
+            else
+              ++row_lengths_out_of_window[row-local_row_start];
+          }
+
+
+      // create the matrix. completely
+      // confusingly, PETSc wants us to pass
+      // arrays for the local number of
+      // elements that starts with zero for
+      // the first _local_ row, i.e. it
+      // doesn't index into an array for
+      // _all_ rows.
+      const int ierr
+        = MatCreateMPIAIJ(communicator,
+                          local_rows_per_process[this_process],
+                          local_columns_per_process[this_process],
+                          sparsity_pattern.n_rows(),
+                          sparsity_pattern.n_cols(),
+                          0, &row_lengths_in_window[0],
+                          0, &row_lengths_out_of_window[0],
+                          &matrix);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#else //PETSC_VERSION>=2.3.3
+      // create the matrix. We
+      // do not set row length but set the
+      // correct SparsityPattern later.
+      int ierr;
+
+      ierr = MatCreate(communicator,&matrix);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = MatSetSizes(matrix,
+                         local_rows_per_process[this_process],
+                         local_columns_per_process[this_process],
+                         sparsity_pattern.n_rows(),
+                         sparsity_pattern.n_cols());
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = MatSetType(matrix,MATMPIAIJ);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+
+
+      // next preset the exact given matrix
+      // entries with zeros, if the user
+      // requested so. this doesn't avoid any
+      // memory allocations, but it at least
+      // avoids some searches later on. the
+      // key here is that we can use the
+      // matrix set routines that set an
+      // entire row at once, not a single
+      // entry at a time
+      //
+      // for the usefulness of this option
+      // read the documentation of this
+      // class.
+      if (preset_nonzero_locations == true)
+        {
+          // MatMPIAIJSetPreallocationCSR
+          // can be used to allocate the sparsity
+          // pattern of a matrix if it is already
+          // available:
+
+          // first set up the column number
+          // array for the rows to be stored
+          // on the local processor. have one
+          // dummy entry at the end to make
+          // sure petsc doesn't read past the
+          // end
+          std::vector<PetscInt>
+
+          rowstart_in_window (local_row_end - local_row_start + 1, 0),
+                             colnums_in_window;
+          {
+            size_type n_cols = 0;
+            for (size_type i=local_row_start; i<local_row_end; ++i)
+              {
+                const size_type row_length = sparsity_pattern.row_length(i);
+                rowstart_in_window[i+1-local_row_start]
+                  = rowstart_in_window[i-local_row_start] + row_length;
+                n_cols += row_length;
+              }
+            colnums_in_window.resize (n_cols+1, -1);
+          }
+
+          // now copy over the information
+          // from the sparsity pattern.
+          {
+            PetscInt *ptr = & colnums_in_window[0];
+            for (size_type i=local_row_start; i<local_row_end; ++i)
+              for (typename SparsityPatternType::iterator p=sparsity_pattern.begin(i);
+                   p != sparsity_pattern.end(i); ++p, ++ptr)
+                *ptr = p->column();
+          }
+
+
+          // then call the petsc function
+          // that summarily allocates these
+          // entries:
+          MatMPIAIJSetPreallocationCSR (matrix,
+                                        &rowstart_in_window[0],
+                                        &colnums_in_window[0],
+                                        0);
+
+#if DEAL_II_PETSC_VERSION_LT(2,3,3)
+          // this is only needed for old
+          // PETSc versions:
+
+          // for some reason, it does not
+          // seem to be possible to force
+          // actual allocation of actual
+          // entries by using the last
+          // arguments to the call above. if
+          // we don't initialize the entries
+          // like in the following loop, then
+          // the program is unbearably slow
+          // because elements are allocated
+          // and accessed in random order,
+          // which is not what PETSc likes
+          //
+          // note that we actually have to
+          // set the entries to something
+          // non-zero! do the allocation one
+          // row at a time
+          {
+            const std::vector<PetscScalar>
+            values (sparsity_pattern.max_entries_per_row(),
+                    1.);
+
+            for (size_type i=local_row_start; i<local_row_end; ++i)
+              {
+                PetscInt petsc_i = i;
+                MatSetValues (matrix, 1, &petsc_i,
+                              sparsity_pattern.row_length(i),
+                              &colnums_in_window[rowstart_in_window[i-local_row_start]],
+                              &values[0], INSERT_VALUES);
+              }
+          }
+
+          compress (VectorOperation::insert);
+
+          // set the dummy entries set above
+          // back to zero
+          *this = 0;
+#endif // version <=2.3.3
+
+
+          // Tell PETSc that we are not
+          // planning on adding new entries
+          // to the matrix. Generate errors
+          // in debug mode.
+          int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+#ifdef DEBUG
+          ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATION_ERR);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+          ierr = MatSetOption (matrix, MAT_NO_NEW_NONZERO_LOCATIONS);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+#else
+#ifdef DEBUG
+          ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+          ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+#endif
+
+          // Tell PETSc to keep the
+          // SparsityPattern entries even if
+          // we delete a row with
+          // clear_rows() which calls
+          // MatZeroRows(). Otherwise one can
+          // not write into that row
+          // afterwards.
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+          ierr = MatSetOption (matrix, MAT_KEEP_ZEROED_ROWS);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+#elif DEAL_II_PETSC_VERSION_LT(3,1,0)
+          ierr = MatSetOption (matrix, MAT_KEEP_ZEROED_ROWS, PETSC_TRUE);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+          ierr = MatSetOption (matrix, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+
+        }
+    }
+
+    // explicit instantiations
+    //
+    template
+    SparseMatrix::SparseMatrix (const MPI_Comm &,
+                                const SparsityPattern &,
+                                const std::vector<size_type> &,
+                                const std::vector<size_type> &,
+                                const unsigned int,
+                                const bool);
+    template
+    SparseMatrix::SparseMatrix (const MPI_Comm &,
+                                const DynamicSparsityPattern &,
+                                const std::vector<size_type> &,
+                                const std::vector<size_type> &,
+                                const unsigned int,
+                                const bool);
+
+    template void
+    SparseMatrix::reinit (const MPI_Comm &,
+                          const SparsityPattern &,
+                          const std::vector<size_type> &,
+                          const std::vector<size_type> &,
+                          const unsigned int,
+                          const bool);
+    template void
+    SparseMatrix::reinit (const MPI_Comm &,
+                          const DynamicSparsityPattern &,
+                          const std::vector<size_type> &,
+                          const std::vector<size_type> &,
+                          const unsigned int,
+                          const bool);
+
+    template void
+    SparseMatrix::
+    reinit (const IndexSet &,
+            const IndexSet &,
+            const DynamicSparsityPattern &,
+            const MPI_Comm &);
+
+    template void
+    SparseMatrix::do_reinit (const SparsityPattern &,
+                             const std::vector<size_type> &,
+                             const std::vector<size_type> &,
+                             const unsigned int ,
+                             const bool);
+    template void
+    SparseMatrix::do_reinit (const DynamicSparsityPattern &,
+                             const std::vector<size_type> &,
+                             const std::vector<size_type> &,
+                             const unsigned int ,
+                             const bool);
+
+    template void
+    SparseMatrix::
+    do_reinit (const IndexSet &,
+               const IndexSet &,
+               const DynamicSparsityPattern &);
+
+
+    PetscScalar
+    SparseMatrix::matrix_norm_square (const Vector &v) const
+    {
+      Vector tmp (v);
+      vmult (tmp, v);
+      // note, that v*tmp returns  sum_i conjugate(v)_i * tmp_i
+      return v*tmp;
+    }
+
+    PetscScalar
+    SparseMatrix::matrix_scalar_product (const Vector &u,
+                                         const Vector &v) const
+    {
+      Vector tmp (v);
+      vmult (tmp, v);
+      // note, that v*tmp returns  sum_i conjugate(v)_i * tmp_i
+      return u*tmp;
+    }
+
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_parallel_vector.cc b/source/lac/petsc_parallel_vector.cc
new file mode 100644
index 0000000..11e5e8c
--- /dev/null
+++ b/source/lac/petsc_parallel_vector.cc
@@ -0,0 +1,450 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_parallel_vector.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_vector.h>
+#  include <cmath>
+#  include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  namespace MPI
+  {
+
+    Vector::Vector ()
+    {
+      // this is an invalid empty vector, so we can just as well create a
+      // sequential one to avoid all the overhead incurred by parallelism
+      const int n = 0;
+      const int ierr
+        = VecCreateSeq (PETSC_COMM_SELF, n, &vector);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+      ghosted = false;
+    }
+
+
+
+    Vector::Vector (const MPI_Comm &communicator,
+                    const size_type n,
+                    const size_type local_size)
+      :
+      communicator (communicator)
+    {
+      Vector::create_vector (n, local_size);
+    }
+
+
+
+    Vector::Vector (const MPI_Comm   &communicator,
+                    const VectorBase  &v,
+                    const size_type   local_size)
+      :
+      communicator (communicator)
+    {
+      Vector::create_vector (v.size(), local_size);
+
+      VectorBase::operator = (v);
+    }
+
+
+
+    Vector::Vector (const IndexSet   &local,
+                    const IndexSet &ghost,
+                    const MPI_Comm     &communicator)
+      :
+      communicator (communicator)
+    {
+      Assert(local.is_contiguous(), ExcNotImplemented());
+
+      IndexSet ghost_set = ghost;
+      ghost_set.subtract_set(local);
+
+      Vector::create_vector(local.size(), local.n_elements(), ghost_set);
+    }
+
+
+
+    Vector::Vector (const IndexSet   &local,
+                    const MPI_Comm     &communicator)
+      :
+      communicator (communicator)
+    {
+      Assert(local.is_contiguous(), ExcNotImplemented());
+      Vector::create_vector(local.size(), local.n_elements());
+    }
+
+
+
+    void
+    Vector::clear ()
+    {
+      // destroy the PETSc Vec and create an invalid empty vector,
+      // so we can just as well create a sequential one to avoid
+      // all the overhead incurred by parallelism
+      attained_ownership = true;
+      VectorBase::clear ();
+
+      const int n = 0;
+      int ierr = VecCreateSeq (PETSC_COMM_SELF, n, &vector);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+    }
+
+
+
+    void
+    Vector::reinit (const MPI_Comm  &comm,
+                    const size_type  n,
+                    const size_type  local_sz,
+                    const bool       omit_zeroing_entries)
+    {
+      communicator = comm;
+
+      // only do something if the sizes
+      // mismatch (may not be true for every proc)
+
+      int k_global, k = ((size() != n) || (local_size() != local_sz));
+      MPI_Allreduce (&k, &k_global, 1,
+                     MPI_INT, MPI_LOR, communicator);
+
+      if (k_global || has_ghost_elements())
+        {
+          // FIXME: I'd like to use this here,
+          // but somehow it leads to odd errors
+          // somewhere down the line in some of
+          // the tests:
+//         const int ierr = VecSetSizes (vector, n, n);
+//         AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+          // so let's go the slow way:
+          int ierr;
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+          ierr = VecDestroy (vector);
+#else
+          ierr = VecDestroy (&vector);
+#endif
+
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+          create_vector (n, local_sz);
+        }
+
+      // finally clear the new vector if so
+      // desired
+      if (omit_zeroing_entries == false)
+        *this = 0;
+    }
+
+
+
+    void
+    Vector::reinit (const Vector &v,
+                    const bool    omit_zeroing_entries)
+    {
+      if (v.has_ghost_elements())
+        {
+          reinit (v.locally_owned_elements(), v.ghost_indices, v.communicator);
+          if (!omit_zeroing_entries)
+            {
+              int ierr = VecSet(vector, 0.0);
+              AssertThrow (ierr == 0, ExcPETScError(ierr));
+            }
+        }
+      else
+        reinit (v.communicator, v.size(), v.local_size(), omit_zeroing_entries);
+    }
+
+
+
+    void
+    Vector::reinit (const IndexSet   &local,
+                    const IndexSet &ghost,
+                    const MPI_Comm     &comm)
+    {
+      int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      ierr = VecDestroy (vector);
+#else
+      ierr = VecDestroy (&vector);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      communicator = comm;
+
+      Assert(local.is_contiguous(), ExcNotImplemented());
+
+      IndexSet ghost_set = ghost;
+      ghost_set.subtract_set(local);
+
+      create_vector(local.size(), local.n_elements(), ghost_set);
+    }
+
+    void
+    Vector::reinit (const IndexSet &local,
+                    const MPI_Comm &comm)
+    {
+      int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      ierr = VecDestroy (vector);
+#else
+      ierr = VecDestroy (&vector);
+#endif
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      communicator = comm;
+
+      Assert(local.is_contiguous(), ExcNotImplemented());
+      Assert(local.size()>0, ExcMessage("can not create vector of size 0."));
+      create_vector(local.size(), local.n_elements());
+    }
+
+
+    Vector &
+    Vector::operator = (const PETScWrappers::Vector &v)
+    {
+      Assert(last_action==VectorOperation::unknown,
+             ExcMessage("Call to compress() required before calling operator=."));
+      //TODO [TH]: can not access v.last_action here. Implement is_compressed()?
+      //Assert(v.last_action==VectorOperation::unknown,
+      //    ExcMessage("Call to compress() required before calling operator=."));
+      int ierr;
+
+      // get a pointer to the local memory of
+      // this vector
+      PetscScalar *dest_array;
+      ierr = VecGetArray (vector, &dest_array);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      // then also a pointer to the source
+      // vector
+      PetscScalar *src_array;
+      ierr = VecGetArray (static_cast<const Vec &>(v), &src_array);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      // then copy:
+      const std::pair<size_type, size_type>
+      local_elements = local_range ();
+      std::copy (src_array + local_elements.first,
+                 src_array + local_elements.second,
+                 dest_array);
+
+      // finally restore the arrays
+      ierr = VecRestoreArray (vector, &dest_array);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = VecRestoreArray (static_cast<const Vec &>(v), &src_array);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      if (has_ghost_elements())
+        {
+          ierr = VecGhostUpdateBegin(vector, INSERT_VALUES, SCATTER_FORWARD);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+          ierr = VecGhostUpdateEnd(vector, INSERT_VALUES, SCATTER_FORWARD);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+        }
+      return *this;
+    }
+
+
+    void
+    Vector::create_vector (const size_type n,
+                           const size_type local_size)
+    {
+      (void)n;
+      Assert (local_size <= n, ExcIndexRange (local_size, 0, n));
+      ghosted = false;
+
+      const int ierr
+        = VecCreateMPI (communicator, local_size, PETSC_DETERMINE,
+                        &vector);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      Assert (size() == n,
+              ExcDimensionMismatch (size(), n));
+    }
+
+
+
+    void
+    Vector::create_vector (const size_type n,
+                           const size_type local_size,
+                           const IndexSet &ghostnodes)
+    {
+      (void)n;
+      Assert (local_size <= n, ExcIndexRange (local_size, 0, n));
+      ghosted = true;
+      ghost_indices = ghostnodes;
+
+      std::vector<size_type> ghostindices;
+      ghostnodes.fill_index_vector(ghostindices);
+
+      const PetscInt *ptr
+        = (ghostindices.size() > 0
+           ?
+           (const PetscInt *)(&(ghostindices[0]))
+           :
+           0);
+
+      int ierr
+        = VecCreateGhost(communicator,
+                         local_size,
+                         PETSC_DETERMINE,
+                         ghostindices.size(),
+                         ptr,
+                         &vector);
+
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      Assert (size() == n,
+              ExcDimensionMismatch (size(), n));
+
+#if DEBUG
+      {
+        // test ghost allocation in debug mode
+        PetscInt begin, end;
+
+        ierr = VecGetOwnershipRange (vector, &begin, &end);
+
+        Assert(local_size==(size_type)(end-begin), ExcInternalError());
+
+        Vec l;
+        ierr = VecGhostGetLocalForm(vector, &l);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        PetscInt lsize;
+        ierr = VecGetSize(l, &lsize);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        ierr = VecGhostRestoreLocalForm(vector, &l);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        Assert (lsize==end-begin+(PetscInt)ghost_indices.n_elements(),
+                ExcInternalError());
+      }
+#endif
+
+
+      // in PETSc versions up to 3.5, VecCreateGhost zeroed out the locally
+      // owned vector elements but forgot about the ghost elements. we need to
+      // do this ourselves
+      //
+      // see https://code.google.com/p/dealii/issues/detail?id=233
+#if DEAL_II_PETSC_VERSION_LT(3,6,0)
+      PETScWrappers::MPI::Vector zero;
+      zero.reinit (communicator, this->size(), local_size);
+      *this = zero;
+#endif
+
+    }
+
+
+
+    bool
+    Vector::all_zero() const
+    {
+      unsigned int has_nonzero = VectorBase::all_zero()?0:1;
+#ifdef DEAL_II_WITH_MPI
+      // in parallel, check that the vector
+      // is zero on _all_ processors.
+      unsigned int num_nonzero = Utilities::MPI::sum(has_nonzero, communicator);
+      return num_nonzero == 0;
+#else
+      return has_nonzero == 0;
+#endif
+    }
+
+
+    void
+    Vector::print (std::ostream      &out,
+                   const unsigned int precision,
+                   const bool         scientific,
+                   const bool         across) const
+    {
+      AssertThrow (out, ExcIO());
+
+      // get a representation of the vector and
+      // loop over all the elements
+      PetscScalar *val;
+      PetscInt    nlocal, istart, iend;
+
+      int ierr = VecGetArray (vector, &val);
+
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = VecGetLocalSize (vector, &nlocal);
+
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      ierr = VecGetOwnershipRange (vector, &istart, &iend);
+
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      // save the state of out stream
+      std::ios::fmtflags old_flags = out.flags();
+      unsigned int old_precision = out.precision (precision);
+
+      out.precision (precision);
+      if (scientific)
+        out.setf (std::ios::scientific, std::ios::floatfield);
+      else
+        out.setf (std::ios::fixed, std::ios::floatfield);
+
+      for ( unsigned int i = 0;
+            i < Utilities::MPI::n_mpi_processes(communicator);
+            i++)
+        {
+          // This is slow, but most likely only used to debug.
+          MPI_Barrier(communicator);
+          if (i == Utilities::MPI::this_mpi_process(communicator))
+            {
+              if (across)
+                {
+                  out << "[Proc" << i << " " << istart << "-" << iend-1 << "]" << ' ';
+                  for (PetscInt i=0; i<nlocal; ++i)
+                    out << val[i] << ' ';
+                }
+              else
+                {
+                  out << "[Proc " << i << " " << istart << "-" << iend-1 << "]" << std::endl;
+                  for (PetscInt i=0; i<nlocal; ++i)
+                    out << val[i] << std::endl;
+                }
+              out << std::endl;
+            }
+        }
+      // reset output format
+      out.flags (old_flags);
+      out.precision(old_precision);
+
+      // restore the representation of the
+      // vector
+      ierr = VecRestoreArray (vector, &val);
+      AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      AssertThrow (out, ExcIO());
+    }
+
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_precondition.cc b/source/lac/petsc_precondition.cc
new file mode 100644
index 0000000..8906e0c
--- /dev/null
+++ b/source/lac/petsc_precondition.cc
@@ -0,0 +1,755 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_precondition.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/utilities.h>
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <deal.II/lac/petsc_vector_base.h>
+#  include <deal.II/lac/petsc_solver.h>
+#  include <petscconf.h>
+#  include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  PreconditionerBase::PreconditionerBase ()
+    :
+    pc(NULL), matrix(NULL)
+  {}
+
+
+  PreconditionerBase::~PreconditionerBase ()
+  {
+    if (pc!=NULL)
+      {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+        int ierr = PCDestroy(pc);
+#else
+        int ierr = PCDestroy(&pc);
+#endif
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+  }
+
+
+  void
+  PreconditionerBase::vmult (VectorBase       &dst,
+                             const VectorBase &src) const
+  {
+    AssertThrow (pc != NULL, StandardExceptions::ExcInvalidState ());
+
+    int ierr;
+    ierr = PCApply(pc, src, dst);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  void
+  PreconditionerBase::create_pc ()
+  {
+    // only allow the creation of the
+    // preconditioner once
+    AssertThrow (pc == NULL, StandardExceptions::ExcInvalidState ());
+
+    MPI_Comm comm;
+    int ierr;
+    // this ugly cast is necessary because the
+    // type Mat and PETScObject are
+    // unrelated.
+    ierr = PetscObjectGetComm(reinterpret_cast<PetscObject>(matrix), &comm);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCCreate(comm, &pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3, 5, 0)
+    ierr = PCSetOperators(pc , matrix, matrix, SAME_PRECONDITIONER);
+#else
+    ierr = PCSetOperators(pc , matrix, matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  const PC &
+  PreconditionerBase::get_pc () const
+  {
+    return pc;
+  }
+
+
+  PreconditionerBase::operator Mat () const
+  {
+    return matrix;
+  }
+
+
+  /* ----------------- PreconditionJacobi -------------------- */
+  PreconditionJacobi::PreconditionJacobi (const MPI_Comm comm,
+                                          const AdditionalData &additional_data_)
+  {
+    additional_data = additional_data_;
+
+    int ierr = PCCreate(comm, &pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    initialize();
+  }
+
+
+  PreconditionJacobi::PreconditionJacobi ()
+  {}
+
+
+  PreconditionJacobi::PreconditionJacobi (const MatrixBase     &matrix,
+                                          const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+  void
+  PreconditionJacobi::initialize()
+  {
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCJACOBI));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+  void
+  PreconditionJacobi::initialize (const MatrixBase     &matrix_,
+                                  const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+    initialize();
+
+    int ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionBlockJacobi -------------------- */
+  PreconditionBlockJacobi::PreconditionBlockJacobi (const MPI_Comm comm,
+                                                    const AdditionalData &additional_data_)
+  {
+    additional_data = additional_data_;
+
+    int ierr = PCCreate(comm, &pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    initialize();
+  }
+
+
+  PreconditionBlockJacobi::PreconditionBlockJacobi ()
+  {}
+
+
+  PreconditionBlockJacobi::
+  PreconditionBlockJacobi (const MatrixBase     &matrix,
+                           const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+  void
+  PreconditionBlockJacobi::initialize()
+  {
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCBJACOBI));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  void
+  PreconditionBlockJacobi::initialize (const MatrixBase     &matrix_,
+                                       const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+    initialize();
+
+    int ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionSOR -------------------- */
+
+  PreconditionSOR::AdditionalData::
+  AdditionalData (const double omega)
+    :
+    omega (omega)
+  {}
+
+
+  PreconditionSOR::PreconditionSOR ()
+  {}
+
+
+  PreconditionSOR::PreconditionSOR (const MatrixBase     &matrix,
+                                    const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+
+  void
+  PreconditionSOR::initialize (const MatrixBase     &matrix_,
+                               const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCSOR));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // then set flags as given
+    ierr = PCSORSetOmega (pc, additional_data.omega);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionSSOR -------------------- */
+
+  PreconditionSSOR::AdditionalData::
+  AdditionalData (const double omega)
+    :
+    omega (omega)
+  {}
+
+
+  PreconditionSSOR::PreconditionSSOR ()
+  {}
+
+
+  PreconditionSSOR::PreconditionSSOR (const MatrixBase     &matrix,
+                                      const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+
+  void
+  PreconditionSSOR::initialize (const MatrixBase     &matrix_,
+                                const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCSOR));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // then set flags as given
+    ierr = PCSORSetOmega (pc, additional_data.omega);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // convert SOR to SSOR
+    ierr = PCSORSetSymmetric (pc, SOR_SYMMETRIC_SWEEP);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionEisenstat -------------------- */
+
+  PreconditionEisenstat::AdditionalData::
+  AdditionalData (const double omega)
+    :
+    omega (omega)
+  {}
+
+
+  PreconditionEisenstat::PreconditionEisenstat ()
+  {}
+
+
+  PreconditionEisenstat::PreconditionEisenstat (const MatrixBase     &matrix,
+                                                const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+
+  void
+  PreconditionEisenstat::initialize (const MatrixBase     &matrix_,
+                                     const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCEISENSTAT));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // then set flags as given
+    ierr = PCEisenstatSetOmega (pc, additional_data.omega);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionICC -------------------- */
+
+
+  PreconditionICC::AdditionalData::
+  AdditionalData (const unsigned int levels)
+    :
+    levels (levels)
+  {}
+
+
+  PreconditionICC::PreconditionICC ()
+  {}
+
+
+  PreconditionICC::PreconditionICC (const MatrixBase     &matrix,
+                                    const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+
+  void
+  PreconditionICC::initialize (const MatrixBase     &matrix_,
+                               const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCICC));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // then set flags
+    PCFactorSetLevels (pc, additional_data.levels);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionILU -------------------- */
+
+  PreconditionILU::AdditionalData::
+  AdditionalData (const unsigned int levels)
+    :
+    levels (levels)
+  {}
+
+
+  PreconditionILU::PreconditionILU ()
+  {}
+
+
+  PreconditionILU::PreconditionILU (const MatrixBase     &matrix,
+                                    const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+
+  void
+  PreconditionILU::initialize (const MatrixBase     &matrix_,
+                               const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCILU));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // then set flags
+    PCFactorSetLevels (pc, additional_data.levels);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionBoomerAMG -------------------- */
+
+  PreconditionBoomerAMG::AdditionalData::
+  AdditionalData(const bool symmetric_operator,
+                 const double strong_threshold,
+                 const double max_row_sum,
+                 const unsigned int aggressive_coarsening_num_levels,
+                 const bool output_details
+                )
+    :
+    symmetric_operator(symmetric_operator),
+    strong_threshold(strong_threshold),
+    max_row_sum(max_row_sum),
+    aggressive_coarsening_num_levels(aggressive_coarsening_num_levels),
+    output_details(output_details)
+  {}
+
+
+  PreconditionBoomerAMG::PreconditionBoomerAMG ()
+  {}
+
+  PreconditionBoomerAMG::PreconditionBoomerAMG (const MPI_Comm comm,
+                                                const AdditionalData &additional_data_)
+  {
+    additional_data = additional_data_;
+
+    int ierr = PCCreate(comm, &pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#ifdef PETSC_HAVE_HYPRE
+    initialize();
+#else // PETSC_HAVE_HYPRE
+    (void)pc;
+    Assert (false,
+            ExcMessage ("Your PETSc installation does not include a copy of "
+                        "the hypre package necessary for this preconditioner."));
+#endif
+  }
+
+
+  PreconditionBoomerAMG::PreconditionBoomerAMG (const MatrixBase     &matrix,
+                                                const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+  void
+  PreconditionBoomerAMG::initialize ()
+  {
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCHYPRE));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCHYPRESetType(pc, "boomeramg");
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    if (additional_data.output_details)
+      PetscOptionsSetValue("-pc_hypre_boomeramg_print_statistics","1");
+
+    PetscOptionsSetValue("-pc_hypre_boomeramg_agg_nl",
+                         Utilities::int_to_string(
+                           additional_data.aggressive_coarsening_num_levels
+                         ).c_str());
+
+    std::stringstream ssStream;
+    ssStream << additional_data.max_row_sum;
+    PetscOptionsSetValue("-pc_hypre_boomeramg_max_row_sum", ssStream.str().c_str());
+
+    ssStream.str(""); // empty the stringstream
+    ssStream << additional_data.strong_threshold;
+    PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", ssStream.str().c_str());
+
+    if (additional_data.symmetric_operator)
+      {
+        PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_up", "symmetric-SOR/Jacobi");
+        PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_down", "symmetric-SOR/Jacobi");
+        PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_coarse", "Gaussian-elimination");
+      }
+    else
+      {
+        PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_up", "SOR/Jacobi");
+        PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_down", "SOR/Jacobi");
+        PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_coarse", "Gaussian-elimination");
+      }
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+  void
+  PreconditionBoomerAMG::initialize (const MatrixBase     &matrix_,
+                                     const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+#ifdef PETSC_HAVE_HYPRE
+    create_pc();
+    initialize ();
+
+    int ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#else // PETSC_HAVE_HYPRE
+    (void)pc;
+    Assert (false,
+            ExcMessage ("Your PETSc installation does not include a copy of "
+                        "the hypre package necessary for this preconditioner."));
+#endif
+  }
+
+
+  /* ----------------- PreconditionParaSails -------------------- */
+
+  PreconditionParaSails::AdditionalData::
+  AdditionalData(const unsigned int symmetric,
+                 const unsigned int n_levels,
+                 const double threshold,
+                 const double filter,
+                 const bool output_details)
+    :
+    symmetric(symmetric),
+    n_levels(n_levels),
+    threshold(threshold),
+    filter(filter),
+    output_details(output_details)
+  {}
+
+
+  PreconditionParaSails::PreconditionParaSails ()
+  {}
+
+
+  PreconditionParaSails::PreconditionParaSails (const MatrixBase     &matrix,
+                                                const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+
+  void
+  PreconditionParaSails::initialize (const MatrixBase     &matrix_,
+                                     const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+#ifdef PETSC_HAVE_HYPRE
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCHYPRE));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCHYPRESetType(pc, "parasails");
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    if (additional_data.output_details)
+      PetscOptionsSetValue("-pc_hypre_parasails_logging","1");
+
+    Assert ((additional_data.symmetric == 0 ||
+             additional_data.symmetric == 1 ||
+             additional_data.symmetric == 2),
+            ExcMessage("ParaSails parameter symmetric can only be equal to 0, 1, 2!"));
+
+    std::stringstream ssStream;
+
+    switch (additional_data.symmetric)
+      {
+      case 0:
+      {
+        ssStream << "nonsymmetric";
+        break;
+      }
+
+      case 1:
+      {
+        ssStream << "SPD";
+        break;
+      }
+
+      case 2:
+      {
+        ssStream << "nonsymmetric,SPD";
+        break;
+      }
+
+      default:
+        Assert (false,
+                ExcMessage("ParaSails parameter symmetric can only be equal to 0, 1, 2!"));
+      };
+
+    PetscOptionsSetValue("-pc_hypre_parasails_sym",ssStream.str().c_str());
+
+    PetscOptionsSetValue("-pc_hypre_parasails_nlevels",
+                         Utilities::int_to_string(
+                           additional_data.n_levels
+                         ).c_str());
+
+    ssStream.str(""); // empty the stringstream
+    ssStream << additional_data.threshold;
+    PetscOptionsSetValue("-pc_hypre_parasails_thresh", ssStream.str().c_str());
+
+    ssStream.str(""); // empty the stringstream
+    ssStream << additional_data.filter;
+    PetscOptionsSetValue("-pc_hypre_parasails_filter", ssStream.str().c_str());
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#else // PETSC_HAVE_HYPRE
+    (void)pc;
+    Assert (false,
+            ExcMessage ("Your PETSc installation does not include a copy of "
+                        "the hypre package necessary for this preconditioner."));
+#endif
+  }
+
+
+  /* ----------------- PreconditionNone ------------------------- */
+
+  PreconditionNone::PreconditionNone ()
+  {}
+
+
+  PreconditionNone::PreconditionNone (const MatrixBase     &matrix,
+                                      const AdditionalData &additional_data)
+  {
+    initialize (matrix, additional_data);
+  }
+
+
+  void
+  PreconditionNone::initialize (const MatrixBase     &matrix_,
+                                const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCNONE));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  /* ----------------- PreconditionLU -------------------- */
+
+  PreconditionLU::AdditionalData::
+  AdditionalData (const double pivoting,
+                  const double zero_pivot,
+                  const double damping)
+    :
+    pivoting (pivoting),
+    zero_pivot (zero_pivot),
+    damping (damping)
+  {}
+
+
+  PreconditionLU::PreconditionLU ()
+  {}
+
+
+  PreconditionLU::PreconditionLU (const MatrixBase     &matrix,
+                                  const AdditionalData &additional_data)
+  {
+    initialize(matrix, additional_data);
+  }
+
+
+  void
+  PreconditionLU::initialize (const MatrixBase     &matrix_,
+                              const AdditionalData &additional_data_)
+  {
+    matrix = static_cast<Mat>(matrix_);
+    additional_data = additional_data_;
+
+    create_pc();
+
+    int ierr;
+    ierr = PCSetType (pc, const_cast<char *>(PCLU));
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // set flags as given
+#if DEAL_II_PETSC_VERSION_LT(3,0,1)
+    ierr = PCFactorSetPivoting (pc, additional_data.pivoting);
+#else
+    ierr = PCFactorSetColumnPivot (pc, additional_data.pivoting);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCFactorSetZeroPivot (pc, additional_data.zero_pivot);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,0,1)
+    ierr = PCFactorSetShiftNonzero (pc, additional_data.damping);
+#else
+    ierr = PCFactorSetShiftAmount (pc, additional_data.damping);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetFromOptions (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = PCSetUp (pc);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_solver.cc b/source/lac/petsc_solver.cc
new file mode 100644
index 0000000..c12e7f2
--- /dev/null
+++ b/source/lac/petsc_solver.cc
@@ -0,0 +1,917 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/petsc_solver.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <deal.II/lac/petsc_vector_base.h>
+#  include <deal.II/lac/petsc_precondition.h>
+#  include <cmath>
+
+#include <petscversion.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+
+  SolverBase::SolverData::~SolverData ()
+  {
+    if (ksp != NULL)
+      {
+        // destroy the solver object
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+        int ierr = KSPDestroy (ksp);
+#else
+        int ierr = KSPDestroy (&ksp);
+#endif
+
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+  }
+
+
+
+  SolverBase::SolverBase (SolverControl  &cn,
+                          const MPI_Comm &mpi_communicator)
+    :
+    solver_control (cn),
+    mpi_communicator (mpi_communicator)
+  {}
+
+
+
+  SolverBase::~SolverBase ()
+  {}
+
+
+
+  void
+  SolverBase::solve (const MatrixBase         &A,
+                     VectorBase               &x,
+                     const VectorBase         &b,
+                     const PreconditionerBase &preconditioner)
+  {
+    int ierr;
+
+    /*
+      TODO: PETSc duplicates communicators, so this does not work (you put MPI_COMM_SELF in, but get something other out when you ask PETSc for the communicator. This mainly fails due to the MatrixFree classes, that can not ask PETSc for a communicator. //Timo Heister
+    Assert(A.get_mpi_communicator()==mpi_communicator, ExcMessage("PETSc Solver and Matrix need to use the same MPI_Comm."));
+    Assert(x.get_mpi_communicator()==mpi_communicator, ExcMessage("PETSc Solver and Vector need to use the same MPI_Comm."));
+    Assert(b.get_mpi_communicator()==mpi_communicator, ExcMessage("PETSc Solver and Vector need to use the same MPI_Comm."));
+    */
+
+    // first create a solver object if this
+    // is necessary
+    if (solver_data.get() == 0)
+      {
+        solver_data.reset (new SolverData());
+
+        ierr = KSPCreate (mpi_communicator, &solver_data->ksp);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        // let derived classes set the solver
+        // type, and the preconditioning
+        // object set the type of
+        // preconditioner
+        set_solver_type (solver_data->ksp);
+
+        ierr = KSPSetPC (solver_data->ksp, preconditioner.get_pc());
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        // make sure the preconditioner has an associated matrix set
+        const Mat B = preconditioner;
+        AssertThrow (B != NULL,
+                     ExcMessage("PETSc preconditioner should have an"
+                                "associated matrix set to be used in solver."));
+
+        // setting the preconditioner overwrites the used matrices.
+        // hence, we need to set the matrices after the preconditioner.
+#if DEAL_II_PETSC_VERSION_LT(3, 5, 0)
+        // the last argument is irrelevant here,
+        // since we use the solver only once anyway
+        ierr = KSPSetOperators (solver_data->ksp, A, preconditioner,
+                                SAME_PRECONDITIONER);
+#else
+        ierr = KSPSetOperators (solver_data->ksp, A, preconditioner);
+#endif
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        // then a convergence monitor
+        // function. that function simply
+        // checks with the solver_control
+        // object we have in this object for
+        // convergence
+        KSPSetConvergenceTest (solver_data->ksp, &convergence_test,
+                               reinterpret_cast<void *>(&solver_control),
+                               PETSC_NULL);
+      }
+
+    // set the command line option prefix name
+    ierr = KSPSetOptionsPrefix(solver_data->ksp, prefix_name.c_str());
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // set the command line options provided
+    // by the user to override the defaults
+    ierr = KSPSetFromOptions (solver_data->ksp);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // then do the real work: set up solver
+    // internal data and solve the
+    // system.
+    ierr = KSPSetUp (solver_data->ksp);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = KSPSolve (solver_data->ksp, b, x);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // do not destroy solver object
+//    solver_data.reset ();
+
+    // in case of failure: throw
+    // exception
+    if (solver_control.last_check() != SolverControl::success)
+      AssertThrow(false, SolverControl::NoConvergence (solver_control.last_step(),
+                                                       solver_control.last_value()));
+    // otherwise exit as normal
+  }
+
+
+  void
+  SolverBase::set_prefix(const std::string &prefix)
+  {
+    prefix_name = prefix ;
+  }
+
+
+  void
+  SolverBase::reset()
+  {
+    solver_data.reset ();
+  }
+
+
+  SolverControl &
+  SolverBase::control() const
+  {
+    return solver_control;
+  }
+
+
+  int
+  SolverBase::convergence_test (KSP                 /*ksp*/,
+                                const PetscInt      iteration,
+                                const PetscReal     residual_norm,
+                                KSPConvergedReason *reason,
+                                void               *solver_control_x)
+  {
+    SolverControl &solver_control = *reinterpret_cast<SolverControl *>(solver_control_x);
+
+    const SolverControl::State state
+      = solver_control.check (iteration, residual_norm);
+
+    switch (state)
+      {
+      case ::dealii::SolverControl::iterate:
+        *reason = KSP_CONVERGED_ITERATING;
+        break;
+
+      case ::dealii::SolverControl::success:
+        *reason = static_cast<KSPConvergedReason>(1);
+        break;
+
+      case ::dealii::SolverControl::failure:
+        if (solver_control.last_step() > solver_control.max_steps())
+          *reason = KSP_DIVERGED_ITS;
+        else
+          *reason = KSP_DIVERGED_DTOL;
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // return without failure
+    return 0;
+  }
+
+  void
+  SolverBase::initialize(const PreconditionerBase &preconditioner)
+  {
+    int ierr;
+
+    solver_data.reset (new SolverData());
+
+    ierr = KSPCreate (mpi_communicator, &solver_data->ksp);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // let derived classes set the solver
+    // type, and the preconditioning
+    // object set the type of
+    // preconditioner
+    set_solver_type (solver_data->ksp);
+
+    ierr = KSPSetPC (solver_data->ksp, preconditioner.get_pc());
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // then a convergence monitor
+    // function. that function simply
+    // checks with the solver_control
+    // object we have in this object for
+    // convergence
+    KSPSetConvergenceTest (solver_data->ksp, &convergence_test,
+                           reinterpret_cast<void *>(&solver_control),
+                           PETSC_NULL);
+
+    // set the command line options provided
+    // by the user to override the defaults
+    ierr = KSPSetFromOptions (solver_data->ksp);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  /* ---------------------- SolverRichardson ------------------------ */
+
+  SolverRichardson::AdditionalData::
+  AdditionalData (const double omega)
+    :
+    omega (omega)
+  {}
+
+
+
+  SolverRichardson::SolverRichardson (SolverControl        &cn,
+                                      const MPI_Comm       &mpi_communicator,
+                                      const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverRichardson::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPRICHARDSON);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // set the damping factor from the data
+    ierr = KSPRichardsonSetScale (ksp, additional_data.omega);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+
+    // Hand over the absolute
+    // tolerance and the maximum
+    // iteration number to the PETSc
+    // convergence criterion. The
+    // custom deal.II SolverControl
+    // object is ignored by the PETSc
+    // Richardson method (when no
+    // PETSc monitoring is present),
+    // since in this case PETSc
+    // uses a faster version of
+    // the Richardson iteration,
+    // where no residual is
+    // available.
+    KSPSetTolerances(ksp, PETSC_DEFAULT, this->solver_control.tolerance(),
+                     PETSC_DEFAULT, this->solver_control.max_steps()+1);
+  }
+
+
+  /* ---------------------- SolverChebychev ------------------------ */
+
+  SolverChebychev::SolverChebychev (SolverControl        &cn,
+                                    const MPI_Comm       &mpi_communicator,
+                                    const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverChebychev::set_solver_type (KSP &ksp) const
+  {
+    // set the type of solver. note the
+    // completely pointless change in
+    // spelling Chebyshev between PETSc 3.2
+    // and 3.3...
+    int ierr;
+
+#if DEAL_II_PETSC_VERSION_LT(3,3,0)
+    ierr = KSPSetType (ksp, KSPCHEBYCHEV);
+#else
+    ierr = KSPSetType (ksp, KSPCHEBYSHEV);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverCG ------------------------ */
+
+  SolverCG::SolverCG (SolverControl        &cn,
+                      const MPI_Comm       &mpi_communicator,
+                      const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverCG::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPCG);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverBiCG ------------------------ */
+
+  SolverBiCG::SolverBiCG (SolverControl        &cn,
+                          const MPI_Comm       &mpi_communicator,
+                          const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverBiCG::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPBICG);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverGMRES ------------------------ */
+
+  SolverGMRES::AdditionalData::
+  AdditionalData (const unsigned int restart_parameter,
+                  const bool right_preconditioning)
+    :
+    restart_parameter (restart_parameter),
+    right_preconditioning (right_preconditioning)
+  {}
+
+
+
+  SolverGMRES::SolverGMRES (SolverControl        &cn,
+                            const MPI_Comm       &mpi_communicator,
+                            const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverGMRES::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPGMRES);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // set the restart parameter from the
+    // data. we would like to use the simple
+    // code that is commented out, but this
+    // leads to nasty warning and error
+    // messages due to some stupidity on
+    // PETSc's side: KSPGMRESSetRestart is
+    // implemented as a macro in which return
+    // statements are hidden. This may work
+    // if people strictly follow the PETSc
+    // coding style of always having
+    // functions return an integer error
+    // code, but the present function isn't
+    // like this.
+    /*
+        ierr = KSPGMRESSetRestart (ksp, additional_data.restart_parameter);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+    */
+    // so rather expand their macros by hand,
+    // and do some equally nasty stuff that at
+    // least doesn't yield warnings...
+    int (*fun_ptr)(KSP,int);
+    ierr = PetscObjectQueryFunction((PetscObject)(ksp),
+                                    "KSPGMRESSetRestart_C",
+                                    (void (* *)())&fun_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = (*fun_ptr)(ksp,additional_data.restart_parameter);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // Set preconditioning side to
+    // right
+    if (additional_data.right_preconditioning)
+      {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+        ierr = KSPSetPreconditionerSide(ksp, PC_RIGHT);
+#else
+        ierr = KSPSetPCSide(ksp, PC_RIGHT);
+#endif
+
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverBicgstab ------------------------ */
+
+  SolverBicgstab::SolverBicgstab (SolverControl        &cn,
+                                  const MPI_Comm       &mpi_communicator,
+                                  const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverBicgstab::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPBCGS);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverCGS ------------------------ */
+
+  SolverCGS::SolverCGS (SolverControl        &cn,
+                        const MPI_Comm       &mpi_communicator,
+                        const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverCGS::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPCGS);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverTFQMR ------------------------ */
+
+  SolverTFQMR::SolverTFQMR (SolverControl        &cn,
+                            const MPI_Comm       &mpi_communicator,
+                            const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverTFQMR::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPTFQMR);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverTCQMR ------------------------ */
+
+  SolverTCQMR::SolverTCQMR (SolverControl        &cn,
+                            const MPI_Comm       &mpi_communicator,
+                            const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverTCQMR::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPTCQMR);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverCR ------------------------ */
+
+  SolverCR::SolverCR (SolverControl        &cn,
+                      const MPI_Comm       &mpi_communicator,
+                      const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverCR::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPCR);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverLSQR ------------------------ */
+
+  SolverLSQR::SolverLSQR (SolverControl        &cn,
+                          const MPI_Comm       &mpi_communicator,
+                          const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverLSQR::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPLSQR);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // in the deal.II solvers, we always
+    // honor the initial guess in the
+    // solution vector. do so here as well:
+    KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+  }
+
+
+  /* ---------------------- SolverPreOnly ------------------------ */
+
+  SolverPreOnly::SolverPreOnly (SolverControl        &cn,
+                                const MPI_Comm       &mpi_communicator,
+                                const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {}
+
+
+  void
+  SolverPreOnly::set_solver_type (KSP &ksp) const
+  {
+    int ierr;
+    ierr = KSPSetType (ksp, KSPPREONLY);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // The KSPPREONLY solver of
+    // PETSc never calls the convergence
+    // monitor, which leads to failure
+    // even when everything was ok.
+    // Therefore the SolverControl status
+    // is set to some nice values, which
+    // guarantee a nice result at the end
+    // of the solution process.
+    solver_control.check (1, 0.0);
+
+    // Using the PREONLY solver with
+    // a nonzero initial guess leads
+    // PETSc to produce some error messages.
+    KSPSetInitialGuessNonzero (ksp, PETSC_FALSE);
+  }
+
+
+  /* ---------------------- SparseDirectMUMPS------------------------ */
+
+  SparseDirectMUMPS::SolverDataMUMPS::~SolverDataMUMPS ()
+  {
+    // destroy the solver object
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    int ierr = KSPDestroy (ksp);
+#else
+    int ierr = KSPDestroy (&ksp);
+#endif
+
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  SparseDirectMUMPS::SparseDirectMUMPS (SolverControl     &cn,
+                                        const MPI_Comm       &mpi_communicator,
+                                        const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data),
+    symmetric_mode(false)
+  {}
+
+
+  void
+  SparseDirectMUMPS::set_solver_type (KSP &ksp) const
+  {
+    /**
+    * KSPPREONLY implements a stub method that applies only the
+    * preconditioner.  Its use is due to SparseDirectMUMPS being a direct
+    * (rather than iterative) solver
+    */
+    int ierr;
+    ierr = KSPSetType (ksp, KSPPREONLY);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    /**
+     * The KSPPREONLY solver of PETSc never calls the convergence monitor,
+     * which leads to failure even when everything was ok. Therefore, the
+     * SolverControl status is set to some nice values, which guarantee a
+     * nice result at the end of the solution process.
+     */
+    solver_control.check (1, 0.0);
+
+    /**
+     * Using a PREONLY solver with a nonzero initial guess leads PETSc to
+     * produce some error messages.
+     */
+    KSPSetInitialGuessNonzero (ksp, PETSC_FALSE);
+  }
+
+  void
+  SparseDirectMUMPS::solve (const MatrixBase &A,
+                            VectorBase       &x,
+                            const VectorBase &b)
+  {
+#ifdef PETSC_HAVE_MUMPS
+    int ierr;
+
+    /**
+     * factorization matrix to be obtained from MUMPS
+     */
+    Mat F;
+
+    /**
+     * setting MUMPS integer control parameters ICNTL to be passed to
+     * MUMPS.  Setting entry 7 of MUMPS ICNTL array (of size 40) to a value
+     * of 2. This sets use of Approximate Minimum Fill (AMF)
+     */
+    PetscInt ival=2, icntl=7;
+    /**
+     * number of iterations to solution (should be 1) for a direct solver
+     */
+    PetscInt its;
+    /**
+     * norm of residual
+     */
+    PetscReal rnorm;
+
+    /**
+     * creating a solver object if this is necessary
+     */
+    if (solver_data.get() == 0)
+      {
+        solver_data.reset (new SolverDataMUMPS ());
+
+        /**
+         * creates the default KSP context and puts it in the location
+         * solver_data->ksp
+         */
+        ierr = KSPCreate (mpi_communicator, &solver_data->ksp);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * set the matrices involved. the last argument is irrelevant here,
+         * since we use the solver only once anyway
+         */
+#if DEAL_II_PETSC_VERSION_LT(3, 5, 0)
+        ierr = KSPSetOperators (solver_data->ksp, A, A,
+                                DIFFERENT_NONZERO_PATTERN);
+#else
+        ierr = KSPSetOperators (solver_data->ksp, A, A);
+#endif
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * setting the solver type
+         */
+        set_solver_type (solver_data->ksp);
+
+        /**
+        * getting the associated preconditioner context
+        */
+        ierr = KSPGetPC (solver_data->ksp, & solver_data->pc);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * build PETSc PC for particular PCLU or PCCHOLESKY preconditioner
+         * depending on whether the symmetric mode has been set
+         */
+        if (symmetric_mode)
+          ierr = PCSetType (solver_data->pc, PCCHOLESKY);
+        else
+          ierr = PCSetType (solver_data->pc, PCLU);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * convergence monitor function that checks with the solver_control
+         * object for convergence
+         */
+        KSPSetConvergenceTest (solver_data->ksp, &convergence_test,
+                               reinterpret_cast<void *>(&solver_control),
+                               PETSC_NULL);
+
+        /**
+         * set the software that is to be used to perform the lu
+         * factorization here we start to see differences with the base
+         * class solve function
+         */
+#if DEAL_II_PETSC_VERSION_GTE(3,2,0)
+        ierr = PCFactorSetMatSolverPackage (solver_data->pc, MATSOLVERMUMPS);
+#else
+        ierr = PCFactorSetMatSolverPackage (solver_data->pc, MAT_SOLVER_MUMPS);
+#endif
+        AssertThrow (ierr == 0, ExcPETScError (ierr));
+
+        /**
+         * set up the package to call for the factorization
+         */
+        ierr = PCFactorSetUpMatSolverPackage (solver_data->pc);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * get the factored matrix F from the preconditioner context.  This
+         * routine is valid only for LU, ILU, Cholesky, and imcomplete
+         * Cholesky
+         */
+        ierr = PCFactorGetMatrix(solver_data->pc, &F);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * Passing the control parameters to MUMPS
+         */
+        ierr = MatMumpsSetIcntl (F, icntl, ival);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * set the command line option prefix name
+         */
+        ierr = KSPSetOptionsPrefix(solver_data->ksp, prefix_name.c_str());
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        /**
+         * set the command line options provided by the user to override
+         * the defaults
+         */
+        ierr = KSPSetFromOptions (solver_data->ksp);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+      }
+
+    /**
+     * solve the linear system
+     */
+    ierr = KSPSolve (solver_data->ksp, b, x);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    /**
+    * in case of failure throw exception
+    */
+    if (solver_control.last_check() != SolverControl::success)
+      {
+        AssertThrow(false, SolverControl::NoConvergence (solver_control.last_step(),
+                                                         solver_control.last_value()));
+      }
+    else
+      {
+        /**
+         * obtain convergence information. obtain the number of iterations
+         * and residual norm
+         */
+        ierr = KSPGetIterationNumber (solver_data->ksp, &its);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+        ierr = KSPGetResidualNorm (solver_data->ksp, &rnorm);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+
+#else  // PETSC_HAVE_MUMPS
+    Assert (false,
+            ExcMessage ("Your PETSc installation does not include a copy of "
+                        "the MUMPS package necessary for this solver. You will need to configure "
+                        "PETSc so that it includes MUMPS, recompile it, and then re-configure "
+                        "and recompile deal.II as well."));
+
+    // Cast to void to silence compiler warnings
+    (void) A;
+    (void) x;
+    (void) b;
+#endif
+
+  }
+
+  PetscErrorCode SparseDirectMUMPS::convergence_test (KSP               /*ksp*/,
+                                                      const PetscInt      iteration,
+                                                      const PetscReal     residual_norm,
+                                                      KSPConvergedReason *reason,
+                                                      void               *solver_control_x)
+  {
+    SolverControl &solver_control = *reinterpret_cast<SolverControl *>(solver_control_x);
+
+    const SolverControl::State state
+      = solver_control.check (iteration, residual_norm);
+
+    switch (state)
+      {
+      case ::dealii::SolverControl::iterate:
+        *reason = KSP_CONVERGED_ITERATING;
+        break;
+
+      case ::dealii::SolverControl::success:
+        *reason = static_cast<KSPConvergedReason>(1);
+        break;
+
+      case ::dealii::SolverControl::failure:
+        if (solver_control.last_step() > solver_control.max_steps())
+          *reason = KSP_DIVERGED_ITS;
+        else
+          *reason = KSP_DIVERGED_DTOL;
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    return 0;
+  }
+
+  void
+  SparseDirectMUMPS::set_symmetric_mode(const bool flag)
+  {
+    symmetric_mode = flag;
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_sparse_matrix.cc b/source/lac/petsc_sparse_matrix.cc
new file mode 100644
index 0000000..22c5772
--- /dev/null
+++ b/source/lac/petsc_sparse_matrix.cc
@@ -0,0 +1,356 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/sparsity_pattern.h>
+#  include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+
+  SparseMatrix::SparseMatrix ()
+  {
+    const int m=0, n=0, n_nonzero_per_row=0;
+    const int ierr
+      = MatCreateSeqAIJ(PETSC_COMM_SELF, m, n, n_nonzero_per_row,
+                        0, &matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  SparseMatrix::SparseMatrix (const size_type m,
+                              const size_type n,
+                              const size_type n_nonzero_per_row,
+                              const bool      is_symmetric)
+  {
+    do_reinit (m, n, n_nonzero_per_row, is_symmetric);
+  }
+
+
+
+  SparseMatrix::SparseMatrix (const size_type               m,
+                              const size_type               n,
+                              const std::vector<size_type> &row_lengths,
+                              const bool                    is_symmetric)
+  {
+    do_reinit (m, n, row_lengths, is_symmetric);
+  }
+
+
+
+  template <typename SparsityPatternType>
+  SparseMatrix::
+  SparseMatrix (const SparsityPatternType &sparsity_pattern,
+                const bool                 preset_nonzero_locations)
+  {
+    do_reinit (sparsity_pattern, preset_nonzero_locations);
+  }
+
+
+
+  SparseMatrix &
+  SparseMatrix::operator = (const double d)
+  {
+    MatrixBase::operator = (d);
+    return *this;
+  }
+
+
+
+  void
+  SparseMatrix::reinit (const size_type m,
+                        const size_type n,
+                        const size_type n_nonzero_per_row,
+                        const bool      is_symmetric)
+  {
+    // get rid of old matrix and generate a
+    // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr = MatDestroy (matrix);
+#else
+    const int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    do_reinit (m, n, n_nonzero_per_row, is_symmetric);
+  }
+
+
+
+  void
+  SparseMatrix::reinit (const size_type               m,
+                        const size_type               n,
+                        const std::vector<size_type> &row_lengths,
+                        const bool                    is_symmetric)
+  {
+    // get rid of old matrix and generate a
+    // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr = MatDestroy (matrix);
+#else
+    const int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    do_reinit (m, n, row_lengths, is_symmetric);
+  }
+
+
+
+  template <typename SparsityPatternType>
+  void
+  SparseMatrix::
+  reinit (const SparsityPatternType &sparsity_pattern,
+          const bool                 preset_nonzero_locations)
+  {
+    // get rid of old matrix and generate a
+    // new one
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr = MatDestroy (matrix);
+#else
+    const int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    do_reinit (sparsity_pattern, preset_nonzero_locations);
+  }
+
+
+
+  const MPI_Comm &
+  SparseMatrix::get_mpi_communicator () const
+  {
+    static MPI_Comm comm;
+    PetscObjectGetComm((PetscObject)matrix, &comm);
+    return comm;
+  }
+
+
+
+  void
+  SparseMatrix::do_reinit (const size_type m,
+                           const size_type n,
+                           const size_type n_nonzero_per_row,
+                           const bool      is_symmetric)
+  {
+    // use the call sequence indicating only
+    // a maximal number of elements per row
+    // for all rows globally
+    const int ierr
+      = MatCreateSeqAIJ(PETSC_COMM_SELF, m, n, n_nonzero_per_row,
+                        0, &matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // set symmetric flag, if so requested
+    if (is_symmetric == true)
+      {
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+        const int ierr
+          = MatSetOption (matrix, MAT_SYMMETRIC);
+#else
+        const int ierr
+          = MatSetOption (matrix, MAT_SYMMETRIC, PETSC_TRUE);
+#endif
+
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+  }
+
+
+
+  void
+  SparseMatrix::do_reinit (const size_type               m,
+                           const size_type               n,
+                           const std::vector<size_type> &row_lengths,
+                           const bool                    is_symmetric)
+  {
+    Assert (row_lengths.size() == m,
+            ExcDimensionMismatch (row_lengths.size(), m));
+
+    // use the call sequence indicating a
+    // maximal number of elements for each
+    // row individually. annoyingly, we
+    // always use unsigned ints for cases
+    // like this, while PETSc wants to see
+    // signed integers. so we have to
+    // convert, unless we want to play dirty
+    // tricks with conversions of pointers
+    const std::vector<PetscInt>
+    int_row_lengths (row_lengths.begin(), row_lengths.end());
+
+    const int ierr
+      = MatCreateSeqAIJ(PETSC_COMM_SELF, m, n, 0,
+                        &int_row_lengths[0], &matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // set symmetric flag, if so requested
+    if (is_symmetric == true)
+      {
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+        const int ierr
+          = MatSetOption (matrix, MAT_SYMMETRIC);
+#else
+        const int ierr
+          = MatSetOption (matrix, MAT_SYMMETRIC, PETSC_TRUE);
+#endif
+
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+  }
+
+
+
+  template <typename SparsityPatternType>
+  void
+  SparseMatrix::do_reinit (const SparsityPatternType &sparsity_pattern,
+                           const bool                 preset_nonzero_locations)
+  {
+    std::vector<size_type> row_lengths (sparsity_pattern.n_rows());
+    for (size_type i=0; i<sparsity_pattern.n_rows(); ++i)
+      row_lengths[i] = sparsity_pattern.row_length (i);
+
+    do_reinit (sparsity_pattern.n_rows(),
+               sparsity_pattern.n_cols(),
+               row_lengths, false);
+
+    // next preset the exact given matrix
+    // entries with zeros, if the user
+    // requested so. this doesn't avoid any
+    // memory allocations, but it at least
+    // avoids some searches later on. the
+    // key here is that we can use the
+    // matrix set routines that set an
+    // entire row at once, not a single
+    // entry at a time
+    //
+    // for the usefulness of this option
+    // read the documentation of this
+    // class.
+    if (preset_nonzero_locations == true)
+      {
+        std::vector<PetscInt>    row_entries;
+        std::vector<PetscScalar> row_values;
+        for (size_type i=0; i<sparsity_pattern.n_rows(); ++i)
+          {
+            row_entries.resize (row_lengths[i]);
+            row_values.resize (row_lengths[i], 0.0);
+            for (size_type j=0; j<row_lengths[i]; ++j)
+              row_entries[j] = sparsity_pattern.column_number (i,j);
+
+            const PetscInt int_row = i;
+            MatSetValues (matrix, 1, &int_row,
+                          row_lengths[i], &row_entries[0],
+                          &row_values[0], INSERT_VALUES);
+          }
+        compress (VectorOperation::insert);
+
+
+        // Tell PETSc that we are not
+        // planning on adding new entries
+        // to the matrix. Generate errors
+        // in debug mode.
+        int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+#ifdef DEBUG
+        ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATION_ERR);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+        ierr = MatSetOption (matrix, MAT_NO_NEW_NONZERO_LOCATIONS);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+#else
+#ifdef DEBUG
+        ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+        ierr = MatSetOption (matrix, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+#endif
+
+        // Tell PETSc to keep the
+        // SparsityPattern entries even if
+        // we delete a row with
+        // clear_rows() which calls
+        // MatZeroRows(). Otherwise one can
+        // not write into that row
+        // afterwards.
+#if DEAL_II_PETSC_VERSION_LT(3,0,0)
+        ierr = MatSetOption (matrix, MAT_KEEP_ZEROED_ROWS);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#elif DEAL_II_PETSC_VERSION_LT(3,1,0)
+        ierr = MatSetOption (matrix, MAT_KEEP_ZEROED_ROWS, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#else
+        ierr = MatSetOption (matrix, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+#endif
+
+      }
+  }
+
+
+  // Explicit instantiations
+  //
+  template
+  SparseMatrix::SparseMatrix (const SparsityPattern &,
+                              const bool);
+  template
+  SparseMatrix::SparseMatrix (const DynamicSparsityPattern &,
+                              const bool);
+
+  template void
+  SparseMatrix::reinit (const SparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const DynamicSparsityPattern &,
+                        const bool);
+
+  template void
+  SparseMatrix::do_reinit (const SparsityPattern &,
+                           const bool);
+  template void
+  SparseMatrix::do_reinit (const DynamicSparsityPattern &,
+                           const bool);
+
+  PetscScalar
+  SparseMatrix::matrix_norm_square (const VectorBase &v) const
+  {
+    Vector tmp (v.size());
+    vmult (tmp, v);
+    return tmp*v;
+  }
+
+  PetscScalar
+  SparseMatrix::matrix_scalar_product (const VectorBase &u,
+                                       const VectorBase &v) const
+  {
+    Vector tmp (v.size());
+    vmult (tmp, v);
+    return u*tmp;
+  }
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_vector.cc b/source/lac/petsc_vector.cc
new file mode 100644
index 0000000..3541efa
--- /dev/null
+++ b/source/lac/petsc_vector.cc
@@ -0,0 +1,131 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_vector.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+
+
+  Vector::Vector ()
+  {
+    Vector::create_vector (0);
+  }
+
+
+
+  Vector::Vector (const size_type n)
+  {
+    Vector::create_vector (n);
+  }
+
+
+
+  Vector::Vector (const Vector &v)
+    :
+    VectorBase ()
+  {
+    // first create a dummy vector, then copy
+    // over the other one
+    Vector::create_vector (1);
+    Vector::operator = (v);
+  }
+
+
+
+  Vector::Vector (const MPI::Vector &v)
+  {
+    // first create a dummy vector, then copy
+    // over the other one
+    Vector::create_vector (1);
+    Vector::operator = (v);
+  }
+
+
+
+  void
+  Vector::clear ()
+  {
+    VectorBase::clear ();
+    Vector::create_vector (0);
+  }
+
+
+
+  void
+  Vector::reinit (const size_type n,
+                  const bool      omit_zeroing_entries)
+  {
+    // only do something if the sizes
+    // mismatch
+    if (size() != n)
+      {
+        // FIXME: I'd like to use this here,
+        // but somehow it leads to odd errors
+        // somewhere down the line in some of
+        // the tests:
+//         const int ierr = VecSetSizes (vector, n, n);
+//         AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        // so let's go the slow way:
+        if (attained_ownership)
+          {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+            int ierr = VecDestroy (vector);
+#else
+            int ierr = VecDestroy (&vector);
+#endif
+            AssertThrow (ierr == 0, ExcPETScError(ierr));
+          }
+
+        create_vector (n);
+      }
+
+    // finally clear the new vector if so
+    // desired
+    if (omit_zeroing_entries == false)
+      *this = 0;
+  }
+
+
+
+  void
+  Vector::reinit (const Vector &v,
+                  const bool    omit_zeroing_entries)
+  {
+    reinit (v.size(), omit_zeroing_entries);
+  }
+
+
+
+  void
+  Vector::create_vector (const size_type n)
+  {
+    const int ierr
+      = VecCreateSeq (PETSC_COMM_SELF, n, &vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+    attained_ownership = true;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/petsc_vector_base.cc b/source/lac/petsc_vector_base.cc
new file mode 100644
index 0000000..e3191cd
--- /dev/null
+++ b/source/lac/petsc_vector_base.cc
@@ -0,0 +1,1203 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2004 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/petsc_vector_base.h>
+
+#ifdef DEAL_II_WITH_PETSC
+
+#  include <deal.II/base/memory_consumption.h>
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <cmath>
+#  include <deal.II/base/multithread_info.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  namespace internal
+  {
+    VectorReference::operator PetscScalar () const
+    {
+      Assert (index < vector.size(),
+              ExcIndexRange (index, 0, vector.size()));
+
+      // if the vector is local, then
+      // simply access the element we
+      // are interested in
+      if (dynamic_cast<const PETScWrappers::Vector *>(&vector) != 0)
+        {
+          PetscInt idx = index;
+          PetscScalar value;
+          int ierr = VecGetValues(vector.vector, 1, &idx, &value);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+          return value;
+        }
+      // else see if we are dealing
+      // with a parallel vector
+      else if (dynamic_cast<const PETScWrappers::MPI::Vector *>(&vector) != 0)
+        {
+          int ierr;
+
+          // there is the possibility
+          // that the vector has
+          // ghost elements. in that
+          // case, we first need to
+          // figure out which
+          // elements we own locally,
+          // then get a pointer to
+          // the elements that are
+          // stored here (both the
+          // ones we own as well as
+          // the ghost elements). in
+          // this array, the locally
+          // owned elements come
+          // first followed by the
+          // ghost elements whose
+          // position we can get from
+          // an index set
+          if (vector.ghosted)
+            {
+              PetscInt begin, end;
+              ierr = VecGetOwnershipRange (vector.vector, &begin, &end);
+              AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+              Vec locally_stored_elements = PETSC_NULL;
+              ierr = VecGhostGetLocalForm(vector.vector, &locally_stored_elements);
+              AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+              PetscInt lsize;
+              ierr = VecGetSize(locally_stored_elements, &lsize);
+              AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+              PetscScalar *ptr;
+              ierr = VecGetArray(locally_stored_elements, &ptr);
+              AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+              PetscScalar value;
+
+              if ( index>=static_cast<size_type>(begin)
+                   && index<static_cast<size_type>(end) )
+                {
+                  //local entry
+                  value = *(ptr+index-begin);
+                }
+              else
+                {
+                  //ghost entry
+                  const size_type ghostidx
+                    = vector.ghost_indices.index_within_set(index);
+
+                  Assert(ghostidx+end-begin<(size_type)lsize, ExcInternalError());
+                  value = *(ptr+ghostidx+end-begin);
+                }
+
+              ierr = VecRestoreArray(locally_stored_elements, &ptr);
+              AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+              ierr = VecGhostRestoreLocalForm(vector.vector, &locally_stored_elements);
+              AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+              return value;
+            }
+
+
+          // first verify that the requested
+          // element is actually locally
+          // available
+          PetscInt begin, end;
+
+          ierr = VecGetOwnershipRange (vector.vector, &begin, &end);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+
+
+          AssertThrow ((index >= static_cast<size_type>(begin)) &&
+                       (index < static_cast<size_type>(end)),
+                       ExcAccessToNonlocalElement (index, begin, end-1));
+
+          // old version which only work with
+          // VecGetArray()...
+          PetscInt idx = index;
+          PetscScalar value;
+          ierr = VecGetValues(vector.vector, 1, &idx, &value);
+          AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+          return value;
+        }
+      else
+        // what? what other kind of vector
+        // exists there?
+        Assert (false, ExcInternalError());
+
+      return -1e20;
+    }
+  }
+
+  VectorBase::VectorBase ()
+    :
+    ghosted(false),
+    last_action (::dealii::VectorOperation::unknown),
+    attained_ownership(true)
+  {
+    Assert( MultithreadInfo::is_running_single_threaded(),
+            ExcMessage("PETSc does not support multi-threaded access, set "
+                       "the thread limit to 1 in MPI_InitFinalize()."));
+  }
+
+
+
+  VectorBase::VectorBase (const VectorBase &v)
+    :
+    Subscriptor (),
+    ghosted(v.ghosted),
+    ghost_indices(v.ghost_indices),
+    last_action (::dealii::VectorOperation::unknown),
+    attained_ownership(true)
+  {
+    Assert( MultithreadInfo::is_running_single_threaded(),
+            ExcMessage("PETSc does not support multi-threaded access, set "
+                       "the thread limit to 1 in MPI_InitFinalize()."));
+
+    int ierr = VecDuplicate (v.vector, &vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = VecCopy (v.vector, vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  VectorBase::VectorBase (const Vec &v)
+    :
+    Subscriptor (),
+    vector(v),
+    ghosted(false),
+    last_action (::dealii::VectorOperation::unknown),
+    attained_ownership(false)
+  {
+    Assert( MultithreadInfo::is_running_single_threaded(),
+            ExcMessage("PETSc does not support multi-threaded access, set "
+                       "the thread limit to 1 in MPI_InitFinalize()."));
+  }
+
+
+
+  VectorBase::~VectorBase ()
+  {
+    if (attained_ownership)
+      {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+        const int ierr = VecDestroy (vector);
+#else
+        const int ierr = VecDestroy (&vector);
+#endif
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+  }
+
+
+
+  void
+  VectorBase::clear ()
+  {
+    if (attained_ownership)
+      {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+        const int ierr = VecDestroy (vector);
+#else
+        const int ierr = VecDestroy (&vector);
+#endif
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+
+    ghosted = false;
+    ghost_indices.clear ();
+    last_action = ::dealii::VectorOperation::unknown;
+    attained_ownership = true;
+  }
+
+
+
+  VectorBase &
+  VectorBase::operator = (const PetscScalar s)
+  {
+    AssertIsFinite(s);
+
+    //TODO[TH]: assert(is_compressed())
+
+    int ierr = VecSet (vector, s);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    if (has_ghost_elements())
+      {
+        Vec ghost = PETSC_NULL;
+        ierr = VecGhostGetLocalForm(vector, &ghost);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        ierr = VecSet (ghost, s);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+        ierr = VecGhostRestoreLocalForm(vector, &ghost);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+
+    return *this;
+  }
+
+
+
+  bool
+  VectorBase::operator == (const VectorBase &v) const
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+    PetscBool
+#endif
+    flag;
+
+    const int ierr = VecEqual (vector, v.vector, &flag);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return (flag == PETSC_TRUE);
+  }
+
+
+
+  bool
+  VectorBase::operator != (const VectorBase &v) const
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+    PetscBool
+#endif
+    flag;
+
+    const int ierr = VecEqual (vector, v.vector, &flag);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return (flag == PETSC_FALSE);
+  }
+
+
+
+  VectorBase::size_type
+  VectorBase::size () const
+  {
+    PetscInt sz;
+    const int ierr = VecGetSize (vector, &sz);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return sz;
+  }
+
+
+
+  VectorBase::size_type
+  VectorBase::local_size () const
+  {
+    PetscInt sz;
+    const int ierr = VecGetLocalSize (vector, &sz);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return sz;
+  }
+
+
+
+  std::pair<VectorBase::size_type, VectorBase::size_type>
+  VectorBase::local_range () const
+  {
+    PetscInt begin, end;
+    const int ierr = VecGetOwnershipRange (static_cast<const Vec &>(vector),
+                                           &begin, &end);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return std::make_pair (begin, end);
+  }
+
+
+
+  void
+  VectorBase::set (const std::vector<size_type> &indices,
+                   const std::vector<PetscScalar>  &values)
+  {
+    Assert (indices.size() == values.size(),
+            ExcMessage ("Function called with arguments of different sizes"));
+    do_set_add_operation(indices.size(), &indices[0], &values[0], false);
+  }
+
+
+
+  void
+  VectorBase::add (const std::vector<size_type> &indices,
+                   const std::vector<PetscScalar>  &values)
+  {
+    Assert (indices.size() == values.size(),
+            ExcMessage ("Function called with arguments of different sizes"));
+    do_set_add_operation(indices.size(), &indices[0], &values[0], true);
+  }
+
+
+
+  void
+  VectorBase::add (const std::vector<size_type>    &indices,
+                   const ::dealii::Vector<PetscScalar> &values)
+  {
+    Assert (indices.size() == values.size(),
+            ExcMessage ("Function called with arguments of different sizes"));
+    do_set_add_operation(indices.size(), &indices[0], values.begin(), true);
+  }
+
+
+
+  void
+  VectorBase::add (const size_type    n_elements,
+                   const size_type   *indices,
+                   const PetscScalar *values)
+  {
+    do_set_add_operation(n_elements, indices, values, true);
+  }
+
+
+
+  PetscScalar
+  VectorBase::operator * (const VectorBase &vec) const
+  {
+    Assert (size() == vec.size(),
+            ExcDimensionMismatch(size(), vec.size()));
+
+    PetscScalar result;
+
+    //For complex vectors, VecDot() computes
+    //    val = (x,y) = y^H x,
+    //where y^H denotes the conjugate transpose of y.
+    //Note that this corresponds to the usual "mathematicians" complex inner product where the SECOND argument gets the complex conjugate.
+    const int ierr = VecDot (vec.vector, vector, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+
+
+
+  PetscScalar
+  VectorBase::add_and_dot (const PetscScalar a,
+                           const VectorBase &V,
+                           const VectorBase &W)
+  {
+    this->add(a, V);
+    return *this * W;
+  }
+
+
+
+  void
+  VectorBase::compress (const VectorOperation::values operation)
+  {
+#ifdef DEBUG
+#ifdef DEAL_II_WITH_MPI
+    // Check that all processors agree that last_action is the same (or none!)
+
+    int my_int_last_action = last_action;
+    int all_int_last_action;
+
+    MPI_Allreduce(&my_int_last_action, &all_int_last_action, 1, MPI_INT,
+                  MPI_BOR, get_mpi_communicator());
+
+    AssertThrow(all_int_last_action != (::dealii::VectorOperation::add | ::dealii::VectorOperation::insert),
+                ExcMessage("Error: not all processors agree on the last VectorOperation before this compress() call."));
+#endif
+#endif
+
+    AssertThrow(last_action == ::dealii::VectorOperation::unknown
+                || last_action == operation,
+                ExcMessage("Missing compress() or calling with wrong VectorOperation argument."));
+
+    // note that one may think that
+    // we only need to do something
+    // if in fact the state is
+    // anything but
+    // last_action::unknown. but
+    // that's not true: one
+    // frequently gets into
+    // situations where only one
+    // processor (or a subset of
+    // processors) actually writes
+    // something into a vector, but
+    // we still need to call
+    // VecAssemblyBegin/End on all
+    // processors.
+    int ierr;
+    ierr = VecAssemblyBegin(vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+    ierr = VecAssemblyEnd(vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // reset the last action field to
+    // indicate that we're back to a
+    // pristine state
+    last_action = ::dealii::VectorOperation::unknown;
+  }
+
+
+
+  VectorBase::real_type
+  VectorBase::norm_sqr () const
+  {
+    const real_type d = l2_norm();
+    return d*d;
+  }
+
+
+
+  PetscScalar
+  VectorBase::mean_value () const
+  {
+    int ierr;
+
+    // We can only use our more efficient
+    // routine in the serial case.
+    if (dynamic_cast<const PETScWrappers::MPI::Vector *>(this) != 0)
+      {
+        PetscScalar sum;
+        ierr = VecSum(vector, &sum);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+        return sum/static_cast<PetscReal>(size());
+      }
+
+    // get a representation of the vector and
+    // loop over all the elements
+    PetscScalar *start_ptr;
+    ierr = VecGetArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    PetscScalar mean = 0;
+    {
+      PetscScalar sum0 = 0,
+                  sum1 = 0,
+                  sum2 = 0,
+                  sum3 = 0;
+
+      // use modern processors better by
+      // allowing pipelined commands to be
+      // executed in parallel
+      const PetscScalar *ptr  = start_ptr;
+      const PetscScalar *eptr = ptr + (size()/4)*4;
+      while (ptr!=eptr)
+        {
+          sum0 += *ptr++;
+          sum1 += *ptr++;
+          sum2 += *ptr++;
+          sum3 += *ptr++;
+        };
+      // add up remaining elements
+      while (ptr != start_ptr+size())
+        sum0 += *ptr++;
+
+      mean = (sum0+sum1+sum2+sum3)/static_cast<PetscReal>(size());
+    }
+
+    // restore the representation of the
+    // vector
+    ierr = VecRestoreArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return mean;
+  }
+
+
+  VectorBase::real_type
+  VectorBase::l1_norm () const
+  {
+    real_type d;
+
+    const int ierr = VecNorm (vector, NORM_1, &d);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return d;
+  }
+
+
+
+  VectorBase::real_type
+  VectorBase::l2_norm () const
+  {
+    real_type d;
+
+    const int ierr = VecNorm (vector, NORM_2, &d);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return d;
+  }
+
+
+
+  VectorBase::real_type
+  VectorBase::lp_norm (const real_type p) const
+  {
+    // get a representation of the vector and
+    // loop over all the elements
+    PetscScalar *start_ptr;
+    int ierr = VecGetArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    real_type norm = 0;
+    {
+      real_type sum0 = 0,
+                sum1 = 0,
+                sum2 = 0,
+                sum3 = 0;
+
+      // use modern processors better by
+      // allowing pipelined commands to be
+      // executed in parallel
+      const PetscScalar *ptr  = start_ptr;
+      const PetscScalar *eptr = ptr + (size()/4)*4;
+      while (ptr!=eptr)
+        {
+          sum0 += std::pow(numbers::NumberTraits<value_type>::abs(*ptr++), p);
+          sum1 += std::pow(numbers::NumberTraits<value_type>::abs(*ptr++), p);
+          sum2 += std::pow(numbers::NumberTraits<value_type>::abs(*ptr++), p);
+          sum3 += std::pow(numbers::NumberTraits<value_type>::abs(*ptr++), p);
+        }
+      // add up remaining elements
+      while (ptr != start_ptr+size())
+        sum0 += std::pow(numbers::NumberTraits<value_type>::abs(*ptr++), p);
+
+      norm = std::pow(sum0+sum1+sum2+sum3,
+                      1./p);
+    }
+
+    // restore the representation of the
+    // vector
+    ierr = VecRestoreArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return norm;
+  }
+
+
+
+  VectorBase::real_type
+  VectorBase::linfty_norm () const
+  {
+    real_type d;
+
+    const int ierr = VecNorm (vector, NORM_INFINITY, &d);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return d;
+  }
+
+
+
+  VectorBase::real_type
+  VectorBase::normalize () const
+  {
+    real_type d;
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    const int ierr = VecNormalize (vector, &d);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return d;
+  }
+
+
+  VectorBase::real_type
+  VectorBase::min ()  const
+  {
+    PetscInt  p;
+    real_type d;
+
+    const int ierr = VecMin (vector, &p, &d);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return d;
+  }
+
+
+  VectorBase::real_type
+  VectorBase::max ()  const
+  {
+    PetscInt  p;
+    real_type d;
+
+    const int ierr = VecMax (vector, &p, &d);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return d;
+  }
+
+
+  VectorBase &
+  VectorBase::abs ()
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    const int ierr = VecAbs (vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  VectorBase &
+  VectorBase::conjugate ()
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    const int ierr = VecConjugate (vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  VectorBase &
+  VectorBase::mult ()
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    const int ierr = VecPointwiseMult (vector,vector,vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+  VectorBase &
+  VectorBase::mult (const VectorBase &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    const int ierr = VecPointwiseMult (vector,vector,v);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+  VectorBase &
+  VectorBase::mult (const VectorBase &u,
+                    const VectorBase &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    const int ierr = VecPointwiseMult (vector,u,v);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+  bool
+  VectorBase::all_zero () const
+  {
+    // get a representation of the vector and
+    // loop over all the elements
+    PetscScalar *start_ptr;
+    int ierr = VecGetArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    const PetscScalar *ptr  = start_ptr,
+                       *eptr = start_ptr + local_size();
+    bool flag = true;
+    while (ptr != eptr)
+      {
+        if (*ptr != value_type())
+          {
+            flag = false;
+            break;
+          }
+        ++ptr;
+      }
+
+    // restore the representation of the
+    // vector
+    ierr = VecRestoreArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return flag;
+  }
+
+
+  namespace internal
+  {
+    template <typename T>
+    bool is_non_negative (const T &t)
+    {
+      return t >= 0;
+    }
+
+
+
+    template <typename T>
+    bool is_non_negative (const std::complex<T> &)
+    {
+      Assert (false,
+              ExcMessage ("You can't ask a complex value "
+                          "whether it is non-negative."))
+      return true;
+    }
+  }
+
+
+
+  bool
+  VectorBase::is_non_negative () const
+  {
+    // get a representation of the vector and
+    // loop over all the elements
+    PetscScalar *start_ptr;
+    int ierr = VecGetArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    const PetscScalar *ptr  = start_ptr,
+                       *eptr = start_ptr + local_size();
+    bool flag = true;
+    while (ptr != eptr)
+      {
+        if (! internal::is_non_negative(*ptr))
+          {
+            flag = false;
+            break;
+          }
+        ++ptr;
+      }
+
+    // restore the representation of the
+    // vector
+    ierr = VecRestoreArray (vector, &start_ptr);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return flag;
+  }
+
+
+
+  VectorBase &
+  VectorBase::operator *= (const PetscScalar a)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(a);
+
+    const int ierr = VecScale (vector, a);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  VectorBase &
+  VectorBase::operator /= (const PetscScalar a)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(a);
+
+    const PetscScalar factor = 1./a;
+    AssertIsFinite(factor);
+
+    const int ierr = VecScale (vector, factor);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  VectorBase &
+  VectorBase::operator += (const VectorBase &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    const int ierr = VecAXPY (vector, 1, v);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  VectorBase &
+  VectorBase::operator -= (const VectorBase &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    const int ierr = VecAXPY (vector, -1, v);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  void
+  VectorBase::add (const PetscScalar s)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(s);
+
+    const int ierr = VecShift (vector, s);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::add (const VectorBase &v)
+  {
+    *this += v;
+  }
+
+
+
+  void
+  VectorBase::add (const PetscScalar a,
+                   const VectorBase &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(a);
+
+    const int ierr = VecAXPY (vector, a, v);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::add (const PetscScalar a,
+                   const VectorBase &v,
+                   const PetscScalar b,
+                   const VectorBase &w)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+
+    const PetscScalar weights[2] = {a,b};
+    Vec               addends[2] = {v.vector, w.vector};
+
+    const int ierr = VecMAXPY (vector, 2, weights, addends);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::sadd (const PetscScalar s,
+                    const VectorBase &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(s);
+
+    const int ierr = VecAYPX (vector, s, v);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::sadd (const PetscScalar s,
+                    const PetscScalar a,
+                    const VectorBase     &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(s);
+    AssertIsFinite(a);
+
+    // there is nothing like a AXPAY
+    // operation in Petsc, so do it in two
+    // steps
+    *this *= s;
+    add (a,v);
+  }
+
+
+
+  void
+  VectorBase::sadd (const PetscScalar s,
+                    const PetscScalar a,
+                    const VectorBase     &v,
+                    const PetscScalar b,
+                    const VectorBase     &w)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(s);
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+
+    // there is no operation like MAXPAY, so
+    // do it in two steps
+    *this *= s;
+
+    const PetscScalar weights[2] = {a,b};
+    Vec               addends[2] = {v.vector,w.vector};
+
+    const int ierr = VecMAXPY (vector, 2, weights, addends);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::sadd (const PetscScalar s,
+                    const PetscScalar a,
+                    const VectorBase     &v,
+                    const PetscScalar b,
+                    const VectorBase     &w,
+                    const PetscScalar c,
+                    const VectorBase     &x)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(s);
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+    AssertIsFinite(c);
+
+    // there is no operation like MAXPAY, so
+    // do it in two steps
+    *this *= s;
+
+    const PetscScalar weights[3] = {a,b,c};
+    Vec               addends[3] = {v.vector, w.vector, x.vector};
+
+    const int ierr = VecMAXPY (vector, 3, weights, addends);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::scale (const VectorBase &factors)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    const int ierr
+      = VecPointwiseMult (vector, factors, vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::equ (const PetscScalar a,
+                   const VectorBase &v)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(a);
+
+    Assert (size() == v.size(),
+            ExcDimensionMismatch (size(), v.size()));
+
+    // there is no simple operation for this
+    // in PETSc. there are multiple ways to
+    // emulate it, we choose this one:
+    const int ierr = VecCopy (v.vector, vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    *this *= a;
+  }
+
+
+
+  void
+  VectorBase::equ (const PetscScalar a,
+                   const VectorBase &v,
+                   const PetscScalar b,
+                   const VectorBase &w)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+
+    Assert (size() == v.size(),
+            ExcDimensionMismatch (size(), v.size()));
+
+    // there is no simple operation for this
+    // in PETSc. there are multiple ways to
+    // emulate it, we choose this one:
+    const int ierr = VecCopy (v.vector, vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    sadd (a, b, w);
+  }
+
+
+
+  void
+  VectorBase::ratio (const VectorBase &a,
+                     const VectorBase &b)
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    const int ierr = VecPointwiseDivide (vector, a, b);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  VectorBase::write_ascii (const PetscViewerFormat format)
+  {
+    //TODO[TH]:assert(is_compressed())
+
+    // Set options
+    PetscViewerSetFormat (PETSC_VIEWER_STDOUT_WORLD,
+                          format);
+
+    // Write to screen
+    VecView (vector, PETSC_VIEWER_STDOUT_WORLD);
+  }
+
+
+
+  void
+  VectorBase::print (std::ostream      &out,
+                     const unsigned int precision,
+                     const bool         scientific,
+                     const bool         across) const
+  {
+    AssertThrow (out, ExcIO());
+
+    // get a representation of the vector and
+    // loop over all the elements
+    PetscScalar *val;
+    int ierr = VecGetArray (vector, &val);
+
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    // save the state of out stream
+    const std::ios::fmtflags old_flags = out.flags();
+    const unsigned int old_precision = out.precision (precision);
+
+    out.precision (precision);
+    if (scientific)
+      out.setf (std::ios::scientific, std::ios::floatfield);
+    else
+      out.setf (std::ios::fixed, std::ios::floatfield);
+
+    if (across)
+      for (size_type i=0; i<local_size(); ++i)
+        out << val[i] << ' ';
+    else
+      for (size_type i=0; i<local_size(); ++i)
+        out << val[i] << std::endl;
+    out << std::endl;
+
+    // reset output format
+    out.flags (old_flags);
+    out.precision(old_precision);
+
+    // restore the representation of the
+    // vector
+    ierr = VecRestoreArray (vector, &val);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  void
+  VectorBase::swap (VectorBase &v)
+  {
+    const int ierr = VecSwap (vector, v.vector);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  VectorBase::operator const Vec &() const
+  {
+    return vector;
+  }
+
+
+  std::size_t
+  VectorBase::memory_consumption () const
+  {
+    std::size_t mem = sizeof(Vec)+sizeof(last_action)
+                      +MemoryConsumption::memory_consumption(ghosted)
+                      +MemoryConsumption::memory_consumption(ghost_indices);
+
+    // TH: I am relatively sure that PETSc is
+    // storing the local data in a contiguous
+    // block without indices:
+    mem += local_size()*sizeof(PetscScalar);
+    // assume that PETSc is storing one index
+    // and one double per ghost element
+    if (ghosted)
+      mem += ghost_indices.n_elements()*(sizeof(PetscScalar)+sizeof(int));
+
+    //TODO[TH]: size of constant memory for PETSc?
+    return mem;
+  }
+
+
+
+  void
+  VectorBase::do_set_add_operation (const size_type    n_elements,
+                                    const size_type   *indices,
+                                    const PetscScalar *values,
+                                    const bool         add_values)
+  {
+    ::dealii::VectorOperation::values action = (add_values ?
+                                                ::dealii::VectorOperation::add :
+                                                ::dealii::VectorOperation::insert);
+    Assert ((last_action == action)
+            ||
+            (last_action == ::dealii::VectorOperation::unknown),
+            internal::VectorReference::ExcWrongMode (action,
+                                                     last_action));
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    // VecSetValues complains if we
+    // come with an empty
+    // vector. however, it is not a
+    // collective operation, so we
+    // can skip the call if necessary
+    // (unlike the above calls)
+    if (n_elements != 0)
+      {
+#ifdef PETSC_USE_64BIT_INDICES
+        std::vector<PetscInt> petsc_ind (n_elements);
+        for (size_type i=0; i<n_elements; ++i)
+          petsc_ind[i] = indices[i];
+        const PetscInt *petsc_indices = &petsc_ind[0];
+#else
+        const int *petsc_indices = (const int *)indices;
+#endif
+
+        const InsertMode mode = (add_values ? ADD_VALUES : INSERT_VALUES);
+        const int ierr
+          = VecSetValues (vector, n_elements, petsc_indices, values,
+                          mode);
+        AssertThrow (ierr == 0, ExcPETScError(ierr));
+      }
+
+    // set the mode here, independent of whether we have actually
+    // written elements or whether the list was empty
+    last_action = action;
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/precondition_block.cc b/source/lac/precondition_block.cc
new file mode 100644
index 0000000..252b159
--- /dev/null
+++ b/source/lac/precondition_block.cc
@@ -0,0 +1,21 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/precondition_block.templates.h>
+#include <deal.II/lac/sparse_matrix.h>
+
+DEAL_II_NAMESPACE_OPEN
+#include "precondition_block.inst"
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/precondition_block.inst.in b/source/lac/precondition_block.inst.in
new file mode 100644
index 0000000..9e44a18
--- /dev/null
+++ b/source/lac/precondition_block.inst.in
@@ -0,0 +1,89 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S : REAL_SCALARS)
+  {
+    template class PreconditionBlockBase<S>;
+  }
+
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template class PreconditionBlock<SparseMatrix<S1>, S2>;
+    template class PreconditionBlockJacobi<SparseMatrix<S1>, S2>;
+    template class PreconditionBlockSOR<SparseMatrix<S1>, S2>;
+    template class PreconditionBlockSSOR<SparseMatrix<S1>, S2>;
+  }
+
+
+for (S1, S2, S3 : REAL_SCALARS)
+  {
+// ------------ PreconditionBlockJacobi -----------------
+    template
+      void PreconditionBlockJacobi<SparseMatrix<S1>, S2>::vmult<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockJacobi<SparseMatrix<S1>, S2>::Tvmult<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockJacobi<SparseMatrix<S1>, S2>::vmult_add<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockJacobi<SparseMatrix<S1>, S2>::Tvmult_add<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockJacobi<SparseMatrix<S1>, S2>::step<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockJacobi<SparseMatrix<S1>, S2>::Tstep<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+
+// ------------ PreconditionBlockSOR -----------------
+    template
+      void PreconditionBlockSOR<SparseMatrix<S1>, S2>::vmult<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSOR<SparseMatrix<S1>, S2>::Tvmult<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSOR<SparseMatrix<S1>, S2>::vmult_add<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSOR<SparseMatrix<S1>, S2>::Tvmult_add<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSOR<SparseMatrix<S1>, S2>::step<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSOR<SparseMatrix<S1>, S2>::Tstep<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+
+// ------------ PreconditionBlockSSOR -----------------
+    template
+      void PreconditionBlockSSOR<SparseMatrix<S1>, S2>::vmult<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSSOR<SparseMatrix<S1>, S2>::Tvmult<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSSOR<SparseMatrix<S1>, S2>::step<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void PreconditionBlockSSOR<SparseMatrix<S1>, S2>::Tstep<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+  }
+
diff --git a/source/lac/precondition_block_ez.cc b/source/lac/precondition_block_ez.cc
new file mode 100644
index 0000000..9f39bb5
--- /dev/null
+++ b/source/lac/precondition_block_ez.cc
@@ -0,0 +1,204 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/precondition_block.templates.h>
+#include <deal.II/lac/sparse_matrix_ez.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// explicit instantiations for "float" PreconditionBlock
+template class PreconditionBlock<SparseMatrixEZ<float>, float>;
+
+// the instantiation for class PreconditionBlock<SparseMatrixEZ<float>, double> is skipped
+// because it does not make sense to have inverse block matrices with
+// higher precision than the matrix itself
+
+
+// explicit instantiations for "double" PreconditionBlock
+template class PreconditionBlock<SparseMatrixEZ<double>, float>;
+
+template class PreconditionBlock<SparseMatrixEZ<double>, double>;
+
+
+/*--------------------- PreconditionBlockJacobi -----------------------*/
+
+
+// explicit instantiations for "float" PreconditionBlock
+template class PreconditionBlockJacobi<SparseMatrixEZ<float>, float>;
+
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::vmult<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::vmult<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::Tvmult<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::Tvmult<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::vmult_add<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::vmult_add<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::Tvmult_add<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<float>, float>::Tvmult_add<double>
+(Vector<double> &, const Vector<double> &) const;
+
+template class PreconditionBlockJacobi<SparseMatrixEZ<double>, float>;
+
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::vmult<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::vmult<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::Tvmult<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::Tvmult<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::vmult_add<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::vmult_add<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::Tvmult_add<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, float>::Tvmult_add<double>
+(Vector<double> &, const Vector<double> &) const;
+
+template class PreconditionBlockJacobi<SparseMatrixEZ<double>, double>;
+
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::vmult<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::vmult<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::Tvmult<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::Tvmult<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::vmult_add<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::vmult_add<double>
+(Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::Tvmult_add<float>
+(Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockJacobi<SparseMatrixEZ<double>, double>::Tvmult_add<double>
+(Vector<double> &, const Vector<double> &) const;
+
+/*--------------------- PreconditionBlockGaussSeidel -----------------------*/
+
+
+// explicit instantiations for "float" PreconditionBlock
+template class PreconditionBlockSOR<SparseMatrixEZ<float>, float>;
+
+template void PreconditionBlockSOR<SparseMatrixEZ<float>, float>::vmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<float>, float>::vmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<float>, float>::Tvmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<float>, float>::Tvmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+
+
+// the instantiation for class PreconditionBlockSOR<SparseMatrixEZ<float>, double> is skipped
+// because it does not make sense to have inverse block matrices with
+// higher precision than the matrix itself
+
+
+// explicit instantiations for "double" PreconditionBlockSOR
+template class PreconditionBlockSOR<SparseMatrixEZ<double>, float>;
+
+
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::vmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::vmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::Tvmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::Tvmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::vmult_add<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::vmult_add<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::Tvmult_add<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, float>::Tvmult_add<double> (
+  Vector<double> &, const Vector<double> &) const;
+
+template class PreconditionBlockSOR<SparseMatrixEZ<double>, double>;
+
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::vmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::vmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::Tvmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::Tvmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::vmult_add<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::vmult_add<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::Tvmult_add<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSOR<SparseMatrixEZ<double>, double>::Tvmult_add<double> (
+  Vector<double> &, const Vector<double> &) const;
+
+
+/*--------------------- PreconditionBlockSSOR -----------------------*/
+
+
+// explicit instantiations for "float" PreconditionBlock
+template class PreconditionBlockSSOR<SparseMatrixEZ<float>, float>;
+
+template void PreconditionBlockSSOR<SparseMatrixEZ<float>, float>::vmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<float>, float>::vmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<float>, float>::Tvmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<float>, float>::Tvmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+
+
+// the instantiation for class PreconditionBlockSSOR<SparseMatrixEZ<float>, double> is skipped
+// because it does not make sense to have inverse block matrices with
+// higher precision than the matrix itself
+
+
+// explicit instantiations for "double" PreconditionBlockSSOR
+template class PreconditionBlockSSOR<SparseMatrixEZ<double>, float>;
+
+
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, float>::vmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, float>::vmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, float>::Tvmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, float>::Tvmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+
+template class PreconditionBlockSSOR<SparseMatrixEZ<double>, double>;
+
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, double>::vmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, double>::vmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, double>::Tvmult<float> (
+  Vector<float> &, const Vector<float> &) const;
+template void PreconditionBlockSSOR<SparseMatrixEZ<double>, double>::Tvmult<double> (
+  Vector<double> &, const Vector<double> &) const;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/relaxation_block.cc b/source/lac/relaxation_block.cc
new file mode 100644
index 0000000..3ab3951
--- /dev/null
+++ b/source/lac/relaxation_block.cc
@@ -0,0 +1,21 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/relaxation_block.templates.h>
+#include <deal.II/lac/sparse_matrix.h>
+
+DEAL_II_NAMESPACE_OPEN
+#include "relaxation_block.inst"
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/relaxation_block.inst.in b/source/lac/relaxation_block.inst.in
new file mode 100644
index 0000000..dbd6b9b
--- /dev/null
+++ b/source/lac/relaxation_block.inst.in
@@ -0,0 +1,53 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template class RelaxationBlock<SparseMatrix<S1>, S2>;
+    template class RelaxationBlockJacobi<SparseMatrix<S1>, S2>;
+    template class RelaxationBlockSOR<SparseMatrix<S1>, S2>;
+    template class RelaxationBlockSSOR<SparseMatrix<S1>, S2>;
+  }
+
+
+for (S1, S2, S3 : REAL_SCALARS)
+  {
+// ------------ RelaxationBlockJacobi -----------------
+    template
+      void RelaxationBlockJacobi<SparseMatrix<S1>, S2>::step<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void RelaxationBlockJacobi<SparseMatrix<S1>, S2>::Tstep<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+
+// ------------ RelaxationBlockSOR -----------------
+    template
+      void RelaxationBlockSOR<SparseMatrix<S1>, S2>::step<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void RelaxationBlockSOR<SparseMatrix<S1>, S2>::Tstep<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+
+// ------------ RelaxationBlockSSOR -----------------
+    template
+      void RelaxationBlockSSOR<SparseMatrix<S1>, S2>::step<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+    template
+      void RelaxationBlockSSOR<SparseMatrix<S1>, S2>::Tstep<S3>
+      (Vector<S3> &, const Vector<S3> &) const;
+  }
+
diff --git a/source/lac/slepc_solver.cc b/source/lac/slepc_solver.cc
new file mode 100644
index 0000000..59003b0
--- /dev/null
+++ b/source/lac/slepc_solver.cc
@@ -0,0 +1,457 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/slepc_solver.h>
+
+#ifdef DEAL_II_WITH_SLEPC
+
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <deal.II/lac/petsc_vector_base.h>
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/slepc_spectral_transformation.h>
+
+#  include <cmath>
+#  include <vector>
+
+#  include <petscversion.h>
+#  include <slepcversion.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace SLEPcWrappers
+{
+
+  SolverBase::SolverBase (SolverControl  &cn,
+                          const MPI_Comm &mpi_communicator)
+    :
+    solver_control (cn),
+    mpi_communicator (mpi_communicator)
+  {
+    // create eigensolver context
+    int ierr = EPSCreate (mpi_communicator, &eps);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // hand over the absolute tolerance and the maximum number of
+    // iteration steps to the SLEPc convergence criterion.
+    ierr = EPSSetTolerances(eps, this->solver_control.tolerance(),
+                            this->solver_control.max_steps());
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // default values:
+    set_which_eigenpairs(EPS_LARGEST_MAGNITUDE);
+    set_problem_type(EPS_GNHEP);
+
+    // TODO:
+    // By default, EPS initializes the starting vector or the initial subspace randomly.
+  }
+
+  SolverBase::~SolverBase ()
+  {
+    if (eps != NULL)
+      {
+        // Destroy the solver object.
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+        int ierr = EPSDestroy (eps);
+#else
+        int ierr = EPSDestroy (&eps);
+#endif
+        AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+      }
+  }
+
+  void
+  SolverBase::set_matrices (const PETScWrappers::MatrixBase &A)
+  {
+    // standard eigenspectrum problem
+    int ierr = EPSSetOperators (eps, A, PETSC_NULL);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  void
+  SolverBase::set_matrices (const PETScWrappers::MatrixBase &A,
+                            const PETScWrappers::MatrixBase &B)
+  {
+    // generalized eigenspectrum problem
+    int ierr = EPSSetOperators (eps, A, B);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  void
+  SolverBase::set_transformation (SLEPcWrappers::TransformationBase &transformation)
+  {
+    // set transformation type if any
+    // STSetShift is called inside
+    int ierr = EPSSetST(eps,transformation.st);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+  }
+
+  void
+  SolverBase::set_initial_vector (const PETScWrappers::VectorBase &this_initial_vector)
+  {
+    Assert(this_initial_vector.l2_norm()>0.0,
+           ExcMessage("Initial vector should be nonzero."));
+
+    int ierr;
+    Vec vec = this_initial_vector;
+#if DEAL_II_PETSC_VERSION_LT(3,1,0)
+    ierr = EPSSetInitialVector (eps, &vec);
+#else
+    ierr = EPSSetInitialSpace (eps, 1, &vec);
+#endif
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  void
+  SolverBase::set_target_eigenvalue (const PetscScalar &this_target)
+  {
+    // set target eigenvalues to solve for
+    // in all transformation except STSHIFT there is a direct connection between
+    // the target and the shift, read more on p41 of SLEPc manual.
+    int ierr = EPSSetTarget (eps, this_target );
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  void
+  SolverBase::set_which_eigenpairs (const EPSWhich eps_which)
+  {
+    // set which portion of the eigenspectrum to solve for
+    int ierr = EPSSetWhichEigenpairs (eps, eps_which);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  void
+  SolverBase::set_problem_type (const EPSProblemType eps_problem)
+  {
+    int ierr = EPSSetProblemType (eps, eps_problem);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  void
+  SolverBase::solve (const unsigned int  n_eigenpairs,
+                     unsigned int       *n_converged)
+  {
+    int ierr;
+
+    // set number of eigenvectors to compute
+    ierr = EPSSetDimensions (eps, n_eigenpairs,
+                             PETSC_DECIDE, PETSC_DECIDE);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // set the solve options to the eigenvalue problem solver context
+    ierr = EPSSetFromOptions (eps);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // TODO breaks step-36
+    // force Krylov solver to use true residual instead of an estimate.
+    //EPSSetTrueResidual(solver_data->eps, PETSC_TRUE);
+    //AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // Set convergence test to be absolute
+    ierr = EPSSetConvergenceTest (eps, EPS_CONV_ABS);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // TODO Set the convergence test function
+    // ierr = EPSSetConvergenceTestFunction (solver_data->eps, &convergence_test,
+    //              reinterpret_cast<void *>(&solver_control));
+    // AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // solve the eigensystem
+    ierr = EPSSolve (eps);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // get number of converged eigenstates
+    ierr = EPSGetConverged (eps,
+                            reinterpret_cast<PetscInt *>(n_converged));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    PetscInt n_iterations   = 0;
+    PetscReal residual_norm = 0;
+
+    // @todo Investigate elaborating on some of this to act on the
+    // complete eigenspectrum
+    {
+      // get the number of solver iterations
+      ierr = EPSGetIterationNumber (eps, &n_iterations);
+      AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+      // get the maximum of residual norm among converged eigenvectors.
+      for (unsigned int i = 0; i < *n_converged; i++)
+        {
+          double residual_norm_i = 0.0;
+          // EPSComputeResidualNorm is L2-norm and is not consistent with the stopping criteria
+          // used during the solution process.
+          // Yet, this is the norm which gives error bounds (Saad, 1992, ch3):
+          //   | \lambda - \widehat\lambda | <= ||r||_2
+          ierr = EPSComputeResidualNorm (eps, i, &residual_norm_i);
+
+          // EPSComputeRelativeError may not be consistent with the stopping criteria
+          // used during the solution process. Given EPS_CONV_ABS set above,
+          // this can be either the l2 norm or the mass-matrix induced norm
+          // when EPS_GHEP is set.
+          // ierr = EPSComputeRelativeError (solver_data->eps, i, &residual_norm_i);
+
+          // EPSGetErrorEstimate is consistent with the residual norm
+          // used during the solution process. However, it is not guaranteed to
+          // be derived from the residual even when EPSSetTrueResidual is set.
+          // ierr = EPSGetErrorEstimate (solver_data->eps, i, &residual_norm_i);
+
+          AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+          residual_norm = std::max (residual_norm, residual_norm_i);
+        }
+
+      // check the solver state
+      const SolverControl::State state
+        = solver_control.check (n_iterations, residual_norm);
+
+      // get the solver state according to SLEPc
+      get_solver_state (state);
+
+      // as SLEPc uses different stopping criteria, we have to omit this step.
+      // This can be checked only in conjunction with EPSGetErrorEstimate.
+      // and in case of failure: throw exception
+      // if (solver_control.last_check () != SolverControl::success)
+      //   AssertThrow(false, SolverControl::NoConvergence (solver_control.last_step(),
+      //                                                    solver_control.last_value()));
+    }
+  }
+
+  void
+  SolverBase::get_eigenpair (const unsigned int            index,
+                             PetscScalar               &eigenvalues,
+                             PETScWrappers::VectorBase &eigenvectors)
+  {
+    // get converged eigenpair
+    int ierr = EPSGetEigenpair (eps, index,
+                                &eigenvalues, PETSC_NULL,
+                                eigenvectors, PETSC_NULL);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+
+  void
+  SolverBase::get_eigenpair (const unsigned int         index,
+                             double                    &real_eigenvalues,
+                             double                    &imag_eigenvalues,
+                             PETScWrappers::VectorBase &real_eigenvectors,
+                             PETScWrappers::VectorBase &imag_eigenvectors)
+  {
+#ifndef PETSC_USE_COMPLEX
+    // get converged eigenpair
+    int ierr = EPSGetEigenpair (eps, index,
+                                &real_eigenvalues, &imag_eigenvalues,
+                                real_eigenvectors, imag_eigenvectors);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+#else
+    Assert ((false),
+            ExcMessage ("Your PETSc/SLEPc installation was configured with scalar-type complex "
+                        "but this function is not defined for complex types."));
+#endif
+  }
+
+  void
+  SolverBase::get_solver_state (const SolverControl::State state)
+  {
+    switch (state)
+      {
+      case ::dealii::SolverControl::iterate:
+        reason = EPS_CONVERGED_ITERATING;
+        break;
+
+      case ::dealii::SolverControl::success:
+        reason = static_cast<EPSConvergedReason>(1);
+        break;
+
+      case ::dealii::SolverControl::failure:
+        if (solver_control.last_step() > solver_control.max_steps())
+          reason = EPS_DIVERGED_ITS;
+        else
+          reason = EPS_DIVERGED_BREAKDOWN;
+        break;
+
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+  }
+
+  /* ---------------------- SolverControls ----------------------- */
+  SolverControl &
+  SolverBase::control () const
+  {
+    return solver_control;
+  }
+
+  int
+  SolverBase::convergence_test (EPS          /*eps             */,
+                                PetscScalar  /*real_eigenvalue */,
+                                PetscScalar  /*imag_eigenvalue */,
+                                PetscReal    /*residual norm associated to the eigenpair   */,
+                                PetscReal   */*(output) computed error estimate */,
+                                void        */*solver_control_x*/)
+  {
+    // If the error estimate returned by the convergence test function is less
+    // than the tolerance, then the eigenvalue is accepted as converged.
+    // This function is undefined (future reference only).
+
+    // return without failure.
+    return 0;
+  }
+
+  /* ---------------------- SolverKrylovSchur ------------------------ */
+  SolverKrylovSchur::SolverKrylovSchur (SolverControl        &cn,
+                                        const MPI_Comm       &mpi_communicator,
+                                        const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {
+    int ierr = EPSSetType (eps, const_cast<char *>(EPSKRYLOVSCHUR));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  /* ---------------------- SolverArnoldi ------------------------ */
+  SolverArnoldi::AdditionalData::
+  AdditionalData (const bool delayed_reorthogonalization)
+    :
+    delayed_reorthogonalization (delayed_reorthogonalization)
+  {}
+
+  SolverArnoldi::SolverArnoldi (SolverControl        &cn,
+                                const MPI_Comm       &mpi_communicator,
+                                const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {
+    int ierr = EPSSetType (eps, const_cast<char *>(EPSARNOLDI));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    // if requested, set delayed reorthogonalization in the Arnoldi
+    // iteration.
+    if (additional_data.delayed_reorthogonalization)
+      {
+        ierr = EPSArnoldiSetDelayed (eps, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+      }
+  }
+
+
+  /* ---------------------- Lanczos ------------------------ */
+  SolverLanczos::AdditionalData::
+  AdditionalData(const EPSLanczosReorthogType r)
+    : reorthog(r)
+  {}
+
+  SolverLanczos::SolverLanczos (SolverControl        &cn,
+                                const MPI_Comm       &mpi_communicator,
+                                const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {
+    int ierr = EPSSetType (eps, const_cast<char *>(EPSLANCZOS));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    ierr = EPSLanczosSetReorthog(eps,additional_data.reorthog);
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  /* ----------------------- Power ------------------------- */
+  SolverPower::SolverPower (SolverControl        &cn,
+                            const MPI_Comm       &mpi_communicator,
+                            const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {
+    int ierr = EPSSetType (eps, const_cast<char *>(EPSPOWER));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+  }
+
+  /* ---------------- Generalized Davidson ----------------- */
+  SolverGeneralizedDavidson::AdditionalData::
+  AdditionalData(bool double_expansion)
+    :  double_expansion(double_expansion)
+  {}
+
+  SolverGeneralizedDavidson::SolverGeneralizedDavidson (SolverControl        &cn,
+                                                        const MPI_Comm       &mpi_communicator,
+                                                        const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+    int ierr = EPSSetType (eps, const_cast<char *>(EPSGD));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+
+    if (additional_data.double_expansion)
+      {
+        ierr = EPSGDSetDoubleExpansion (eps, PETSC_TRUE);
+        AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+      }
+#else
+    // PETSc/SLEPc version must be > 3.1.0.
+    Assert ((false),
+            ExcMessage ("Your SLEPc installation does not include a copy of the "
+                        "Generalized Davidson solver. A SLEPc version > 3.1.0 is required."));
+#endif
+  }
+
+  /* ------------------ Jacobi Davidson -------------------- */
+  SolverJacobiDavidson::SolverJacobiDavidson (SolverControl        &cn,
+                                              const MPI_Comm       &mpi_communicator,
+                                              const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+    int ierr;
+    ierr = EPSSetType (eps, const_cast<char *>(EPSJD));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+#else
+    // PETSc/SLEPc version must be > 3.1.0.
+    Assert ((false),
+            ExcMessage ("Your SLEPc installation does not include a copy of the "
+                        "Jacobi-Davidson solver. A SLEPc version > 3.1.0 is required."));
+#endif
+  }
+
+  /* ---------------------- LAPACK ------------------------- */
+  SolverLAPACK::SolverLAPACK (SolverControl        &cn,
+                              const MPI_Comm       &mpi_communicator,
+                              const AdditionalData &data)
+    :
+    SolverBase (cn, mpi_communicator),
+    additional_data (data)
+  {
+    // 'Tis overwhelmingly likely that PETSc/SLEPc *always* has
+    // BLAS/LAPACK, but let's be defensive.
+#if PETSC_HAVE_BLASLAPACK
+    int ierr;
+    ierr = EPSSetType (eps, const_cast<char *>(EPSLAPACK));
+    AssertThrow (ierr == 0, ExcSLEPcError(ierr));
+#else
+    Assert ((false),
+            ExcMessage ("Your PETSc/SLEPc installation was not configured with BLAS/LAPACK "
+                        "but this is needed to use the LAPACK solver."));
+#endif
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_SLEPC
+
diff --git a/source/lac/slepc_spectral_transformation.cc b/source/lac/slepc_spectral_transformation.cc
new file mode 100644
index 0000000..6e9805a
--- /dev/null
+++ b/source/lac/slepc_spectral_transformation.cc
@@ -0,0 +1,172 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/slepc_spectral_transformation.h>
+
+#ifdef DEAL_II_WITH_SLEPC
+
+#  include <deal.II/lac/slepc_solver.h>
+#  include <deal.II/lac/petsc_matrix_base.h>
+#  include <deal.II/lac/petsc_vector_base.h>
+#  include <deal.II/lac/petsc_vector.h>
+
+#  include <cmath>
+#  include <vector>
+
+#  include <petscversion.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace SLEPcWrappers
+{
+  TransformationBase::TransformationBase (const MPI_Comm &mpi_communicator)
+  {
+    int ierr = STCreate(mpi_communicator, &st);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+  }
+
+  TransformationBase::~TransformationBase ()
+  {
+    if (st!=NULL)
+      {
+        int ierr = STDestroy(&st);
+        AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+      }
+  }
+
+  void TransformationBase::set_matrix_mode(const STMatMode mode)
+  {
+    int ierr = STSetMatMode(st,mode);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+  }
+
+  void TransformationBase::set_solver(const PETScWrappers::SolverBase &solver)
+  {
+    int ierr = STSetKSP(st,solver.solver_data->ksp);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+  }
+
+  /* ------------------- TransformationShift --------------------- */
+
+  TransformationShift::AdditionalData::
+  AdditionalData (const double shift_parameter)
+    :
+    shift_parameter (shift_parameter)
+  {}
+
+  TransformationShift::TransformationShift (const MPI_Comm &mpi_communicator,
+                                            const AdditionalData &data)
+    :
+    TransformationBase(mpi_communicator),
+    additional_data (data)
+  {
+    int ierr;
+    ierr = STSetType (st, const_cast<char *>(STSHIFT));
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+
+    ierr = STSetShift (st, additional_data.shift_parameter);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+  }
+
+  /* ---------------- TransformationShiftInvert ------------------ */
+
+  TransformationShiftInvert::AdditionalData::
+  AdditionalData (const double shift_parameter)
+    :
+    shift_parameter (shift_parameter)
+  {}
+
+  TransformationShiftInvert::TransformationShiftInvert (const MPI_Comm &mpi_communicator,
+                                                        const AdditionalData &data)
+    :
+    TransformationBase(mpi_communicator),
+    additional_data (data)
+  {
+    int ierr;
+#if DEAL_II_PETSC_VERSION_LT(3,1,0)
+    ierr = STSetType (st, const_cast<char *>(STSINV));
+#else
+    ierr = STSetType (st, const_cast<char *>(STSINVERT));
+#endif
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+
+    ierr = STSetShift (st, additional_data.shift_parameter);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+  }
+
+  /* --------------- TransformationSpectrumFolding ----------------- */
+
+  TransformationSpectrumFolding::AdditionalData::
+  AdditionalData (const double shift_parameter)
+    :
+    shift_parameter (shift_parameter)
+  {}
+
+  TransformationSpectrumFolding::TransformationSpectrumFolding (const MPI_Comm &mpi_communicator,
+      const AdditionalData &data)
+    :
+    TransformationBase(mpi_communicator),
+    additional_data (data)
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,5,0)
+    int ierr;
+    ierr = STSetType (st, const_cast<char *>(STFOLD));
+    (void)ierr;
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+
+    ierr = STSetShift (st, additional_data.shift_parameter);
+    (void)ierr;
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+#else
+    // PETSc/SLEPc version must be < 3.5.0.
+    (void)st;
+    Assert ((false),
+            ExcMessage ("Folding transformation has been removed in SLEPc 3.5.0 and newer."
+                        "You cannot use this transformation anymore."));
+#endif
+  }
+
+  /* ------------------- TransformationCayley --------------------- */
+
+  TransformationCayley::AdditionalData::
+  AdditionalData (const double shift_parameter,
+                  const double antishift_parameter)
+    :
+    shift_parameter (shift_parameter),
+    antishift_parameter (antishift_parameter)
+  {
+  }
+
+  TransformationCayley::TransformationCayley (const MPI_Comm &mpi_communicator,
+                                              const AdditionalData &data)
+    :
+    TransformationBase(mpi_communicator),
+    additional_data (data)
+  {
+    int ierr = STSetType (st, const_cast<char *>(STCAYLEY));
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+
+    ierr = STSetShift (st, additional_data.shift_parameter);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+
+    ierr = STCayleySetAntishift (st, additional_data.antishift_parameter);
+    AssertThrow (ierr == 0, SolverBase::ExcSLEPcError(ierr));
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_SLEPC
diff --git a/source/lac/solver.cc b/source/lac/solver.cc
new file mode 100644
index 0000000..2819e22
--- /dev/null
+++ b/source/lac/solver.cc
@@ -0,0 +1,33 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/solver.h>
+#include <deal.II/lac/vector_memory.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "solver.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/solver.inst.in b/source/lac/solver.inst.in
new file mode 100644
index 0000000..36650c9
--- /dev/null
+++ b/source/lac/solver.inst.in
@@ -0,0 +1,21 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S : SERIAL_VECTORS)
+  {
+    template class Solver<S>;
+  }
diff --git a/source/lac/solver_control.cc b/source/lac/solver_control.cc
new file mode 100644
index 0000000..1977817
--- /dev/null
+++ b/source/lac/solver_control.cc
@@ -0,0 +1,334 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/lac/solver_control.h>
+
+#include <cmath>
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+/*----------------------- SolverControl ---------------------------------*/
+
+
+SolverControl::SolverControl (const unsigned int maxiter,
+                              const double tolerance,
+                              const bool m_log_history,
+                              const bool m_log_result)
+  :
+  maxsteps(maxiter),
+  tol(tolerance),
+  lvalue(1.e300),
+  lstep(0),
+  check_failure(false),
+  relative_failure_residual(0),
+  failure_residual(0),
+  m_log_history(m_log_history),
+  m_log_frequency(1),
+  m_log_result(m_log_result),
+  history_data_enabled(false)
+{}
+
+
+
+SolverControl::~SolverControl()
+{}
+
+
+
+SolverControl::State
+SolverControl::check (const unsigned int step,
+                      const double check_value)
+{
+  // if this is the first time we
+  // come here, then store the
+  // residual for later comparisons
+  if (step==0)
+    {
+      initial_val = check_value;
+      if (history_data_enabled)
+        history_data.resize(maxsteps);
+    }
+
+  if (m_log_history && ((step % m_log_frequency) == 0))
+    deallog << "Check " << step << "\t" << check_value << std::endl;
+
+  lstep  = step;
+  lvalue = check_value;
+
+  if (step==0)
+    {
+      if (check_failure)
+        failure_residual=relative_failure_residual*check_value;
+
+      if (m_log_result)
+        deallog << "Starting value " << check_value << std::endl;
+    }
+
+  if (history_data_enabled)
+    history_data[step] = check_value;
+
+  if (check_value <= tol)
+    {
+      if (m_log_result)
+        deallog << "Convergence step " << step
+                << " value " << check_value << std::endl;
+      lcheck = success;
+      return success;
+    }
+
+  if ((step >= maxsteps) ||
+      numbers::is_nan(check_value) ||
+      (check_failure && (check_value > failure_residual))
+     )
+    {
+      if (m_log_result)
+        deallog << "Failure step " << step
+                << " value " << check_value << std::endl;
+      lcheck = failure;
+      return failure;
+    }
+
+  lcheck = iterate;
+  return iterate;
+}
+
+
+
+SolverControl::State
+SolverControl::last_check() const
+{
+  return lcheck;
+}
+
+
+double
+SolverControl::initial_value() const
+{
+  return initial_val;
+}
+
+
+double
+SolverControl::last_value() const
+{
+  return lvalue;
+}
+
+
+unsigned int
+SolverControl::last_step() const
+{
+  return lstep;
+}
+
+
+unsigned int
+SolverControl::log_frequency (unsigned int f)
+{
+  if (f==0)
+    f = 1;
+  unsigned int old = m_log_frequency;
+  m_log_frequency = f;
+  return old;
+}
+
+
+void
+SolverControl::enable_history_data ()
+{
+  history_data_enabled = true;
+}
+
+
+double
+SolverControl::average_reduction() const
+{
+  if (lstep == 0)
+    return 0.;
+
+  Assert (history_data_enabled, ExcHistoryDataRequired());
+  Assert (history_data.size() > lstep, ExcInternalError());
+  Assert (history_data[0] > 0., ExcInternalError());
+  Assert (history_data[lstep] > 0., ExcInternalError());
+
+  return std::pow(history_data[lstep]/history_data[0], 1./lstep);
+}
+
+
+
+double
+SolverControl::step_reduction(unsigned int step) const
+{
+  Assert (history_data_enabled, ExcHistoryDataRequired());
+  Assert (history_data.size() > lstep, ExcInternalError());
+  Assert (step <=lstep, ExcIndexRange(step,1,lstep+1));
+  Assert (step>0, ExcIndexRange(step,1,lstep+1));
+
+  return history_data[step]/history_data[step-1];
+}
+
+
+double
+SolverControl::final_reduction() const
+{
+  return step_reduction(lstep);
+}
+
+
+void
+SolverControl::declare_parameters (ParameterHandler &param)
+{
+  param.declare_entry ("Max steps", "100", Patterns::Integer());
+  param.declare_entry ("Tolerance", "1.e-10", Patterns::Double());
+  param.declare_entry ("Log history", "false", Patterns::Bool());
+  param.declare_entry ("Log frequency", "1", Patterns::Integer());
+  param.declare_entry ("Log result", "true", Patterns::Bool());
+}
+
+
+void SolverControl::parse_parameters (ParameterHandler &param)
+{
+  set_max_steps (param.get_integer("Max steps"));
+  set_tolerance (param.get_double("Tolerance"));
+  log_history (param.get_bool("Log history"));
+  log_result (param.get_bool("Log result"));
+  log_frequency (param.get_integer("Log frequency"));
+}
+
+/*----------------------- ReductionControl ---------------------------------*/
+
+
+ReductionControl::ReductionControl(const unsigned int n,
+                                   const double tol,
+                                   const double red,
+                                   const bool m_log_history,
+                                   const bool m_log_result)
+  :
+  SolverControl (n, tol, m_log_history, m_log_result),
+  reduce(red)
+{}
+
+
+ReductionControl::ReductionControl (const SolverControl &c)
+  :
+  SolverControl(c)
+{
+  set_reduction(0.);
+}
+
+
+ReductionControl &
+ReductionControl::operator= (const SolverControl &c)
+{
+  SolverControl::operator=(c);
+  set_reduction(0.);
+  return *this;
+}
+
+
+ReductionControl::~ReductionControl()
+{}
+
+
+SolverControl::State
+ReductionControl::check (const unsigned int step,
+                         const double check_value)
+{
+  // if this is the first time we
+  // come here, then store the
+  // residual for later comparisons
+  if (step==0)
+    {
+      initial_val = check_value;
+      reduced_tol = check_value * reduce;
+    };
+
+  // check whether desired reduction
+  // has been achieved. also check
+  // for equality in case initial
+  // residual already was zero
+  if (check_value <= reduced_tol)
+    {
+      if (m_log_result)
+        deallog << "Convergence step " << step
+                << " value " << check_value << std::endl;
+      lstep  = step;
+      lvalue = check_value;
+
+      lcheck = success;
+      return success;
+    }
+  else
+    return SolverControl::check(step, check_value);
+}
+
+
+
+void
+ReductionControl::declare_parameters (ParameterHandler &param)
+{
+  SolverControl::declare_parameters (param);
+  param.declare_entry("Reduction", "1.e-2", Patterns::Double());
+}
+
+
+void
+ReductionControl::parse_parameters (ParameterHandler &param)
+{
+  SolverControl::parse_parameters (param);
+  set_reduction (param.get_double("Reduction"));
+}
+
+
+/*---------------------- IterationNumberControl -----------------------------*/
+
+
+IterationNumberControl::IterationNumberControl(const unsigned int n,
+                                               const double       tolerance,
+                                               const bool m_log_history,
+                                               const bool m_log_result)
+  :
+  SolverControl (n, tolerance, m_log_history, m_log_result) {}
+
+
+IterationNumberControl::~IterationNumberControl()
+{}
+
+
+SolverControl::State
+IterationNumberControl::check (const unsigned int step,
+                               const double check_value)
+{
+  // check whether the given number of iterations was reached, and return
+  // success in that case. Otherwise, go on to the check of the base class.
+  if (step >= this->maxsteps)
+    {
+      if (m_log_result)
+        deallog << "Convergence step " << step
+                << " value " << check_value << std::endl;
+      lstep  = step;
+      lvalue = check_value;
+
+      lcheck = success;
+      return success;
+    }
+  else
+    return SolverControl::check(step, check_value);
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_decomposition.cc b/source/lac/sparse_decomposition.cc
new file mode 100644
index 0000000..256183f
--- /dev/null
+++ b/source/lac/sparse_decomposition.cc
@@ -0,0 +1,41 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/sparse_decomposition.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template class SparseLUDecomposition<double>;
+template void SparseLUDecomposition<double>::initialize<double> (const SparseMatrix<double> &,
+    const AdditionalData data);
+template void SparseLUDecomposition<double>::initialize<float> (const SparseMatrix<float> &,
+    const AdditionalData data);
+
+template void SparseLUDecomposition<double>::copy_from<double> (const SparseMatrix<double> &);
+template void SparseLUDecomposition<double>::copy_from<float> (const SparseMatrix<float> &);
+
+
+template class SparseLUDecomposition<float>;
+template void SparseLUDecomposition<float>::initialize<double> (const SparseMatrix<double> &,
+    const AdditionalData data);
+template void SparseLUDecomposition<float>::initialize<float> (const SparseMatrix<float> &,
+    const AdditionalData data);
+
+template void SparseLUDecomposition<float>::copy_from<double> (const SparseMatrix<double> &);
+template void SparseLUDecomposition<float>::copy_from<float> (const SparseMatrix<float> &);
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_direct.cc b/source/lac/sparse_direct.cc
new file mode 100644
index 0000000..225b8d8
--- /dev/null
+++ b/source/lac/sparse_direct.cc
@@ -0,0 +1,514 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2001 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/sparse_direct.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/vector.h>
+
+#include <cerrno>
+#include <iostream>
+#include <list>
+#include <typeinfo>
+#include <vector>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// include UMFPACK file.
+#ifdef DEAL_II_WITH_UMFPACK
+#  include <umfpack.h>
+#endif
+
+
+
+SparseDirectUMFPACK::~SparseDirectUMFPACK ()
+{
+  clear ();
+}
+
+
+void
+SparseDirectUMFPACK::
+initialize (const SparsityPattern &)
+{}
+
+
+#ifdef DEAL_II_WITH_UMFPACK
+
+SparseDirectUMFPACK::SparseDirectUMFPACK ()
+  :
+  _m (0),
+  _n (0),
+  symbolic_decomposition (0),
+  numeric_decomposition (0),
+  control (UMFPACK_CONTROL)
+{
+  umfpack_dl_defaults (&control[0]);
+}
+
+
+
+void
+SparseDirectUMFPACK::clear ()
+{
+  // delete objects that haven't been deleted yet
+  if (symbolic_decomposition != 0)
+    {
+      umfpack_dl_free_symbolic (&symbolic_decomposition);
+      symbolic_decomposition = 0;
+    }
+
+  if (numeric_decomposition != 0)
+    {
+      umfpack_dl_free_numeric (&numeric_decomposition);
+      numeric_decomposition = 0;
+    }
+
+  {
+    std::vector<long int> tmp;
+    tmp.swap (Ap);
+  }
+
+  {
+    std::vector<long int> tmp;
+    tmp.swap (Ai);
+  }
+
+  {
+    std::vector<double> tmp;
+    tmp.swap (Ax);
+  }
+
+  umfpack_dl_defaults (&control[0]);
+}
+
+
+
+template <typename number>
+void
+SparseDirectUMFPACK::
+sort_arrays (const SparseMatrix<number> &matrix)
+{
+  // do the copying around of entries so that the diagonal entry is in the
+  // right place. note that this is easy to detect: since all entries apart
+  // from the diagonal entry are sorted, we know that the diagonal entry is
+  // in the wrong place if and only if its column index is larger than the
+  // column index of the second entry in a row
+  //
+  // ignore rows with only one or no entry
+  for (size_type row=0; row<matrix.m(); ++row)
+    {
+      // we may have to move some elements that are left of the diagonal
+      // but presently after the diagonal entry to the left, whereas the
+      // diagonal entry has to move to the right. we could first figure out
+      // where to move everything to, but for simplicity we just make a
+      // series of swaps instead (this is kind of a single run of
+      // bubble-sort, which gives us the desired result since the array is
+      // already "almost" sorted)
+      //
+      // in the first loop, the condition in the while-header also checks
+      // that the row has at least two entries and that the diagonal entry
+      // is really in the wrong place
+      long int cursor = Ap[row];
+      while ((cursor < Ap[row+1]-1) &&
+             (Ai[cursor] > Ai[cursor+1]))
+        {
+          std::swap (Ai[cursor], Ai[cursor+1]);
+          std::swap (Ax[cursor], Ax[cursor+1]);
+          ++cursor;
+        }
+    }
+}
+
+
+
+template <typename number>
+void
+SparseDirectUMFPACK::
+sort_arrays (const SparseMatrixEZ<number> &matrix)
+{
+  //same thing for SparseMatrixEZ
+  for (size_type row=0; row<matrix.m(); ++row)
+    {
+      long int cursor = Ap[row];
+      while ((cursor < Ap[row+1]-1) &&
+             (Ai[cursor] > Ai[cursor+1]))
+        {
+          std::swap (Ai[cursor], Ai[cursor+1]);
+          std::swap (Ax[cursor], Ax[cursor+1]);
+          ++cursor;
+        }
+    }
+}
+
+
+
+template <typename number>
+void
+SparseDirectUMFPACK::
+sort_arrays (const BlockSparseMatrix<number> &matrix)
+{
+  // the case for block matrices is a bit more difficult, since all we know
+  // is that *within each block*, the diagonal of that block may come
+  // first. however, that means that there may be as many entries per row
+  // in the wrong place as there are block columns. we can do the same
+  // thing as above, but we have to do it multiple times
+  for (size_type row=0; row<matrix.m(); ++row)
+    {
+      long int cursor = Ap[row];
+      for (size_type block=0; block<matrix.n_block_cols(); ++block)
+        {
+          // find the next out-of-order element
+          while ((cursor < Ap[row+1]-1) &&
+                 (Ai[cursor] < Ai[cursor+1]))
+            ++cursor;
+
+          // if there is none, then just go on
+          if (cursor == Ap[row+1]-1)
+            break;
+
+          // otherwise swap this entry with successive ones as long as
+          // necessary
+          long int element = cursor;
+          while ((element < Ap[row+1]-1) &&
+                 (Ai[element] > Ai[element+1]))
+            {
+              std::swap (Ai[element], Ai[element+1]);
+              std::swap (Ax[element], Ax[element+1]);
+              ++element;
+            }
+        }
+    }
+}
+
+
+
+template <class Matrix>
+void
+SparseDirectUMFPACK::
+factorize (const Matrix &matrix)
+{
+  Assert (matrix.m() == matrix.n(), ExcNotQuadratic())
+
+  clear ();
+
+  _m = matrix.m();
+  _n = matrix.n();
+
+  const size_type N = matrix.m();
+
+  // copy over the data from the matrix to the data structures UMFPACK
+  // wants. note two things: first, UMFPACK wants compressed column storage
+  // whereas we always do compressed row storage; we work around this by,
+  // rather than shuffling things around, copy over the data we have, but
+  // then call the umfpack_dl_solve function with the UMFPACK_At argument,
+  // meaning that we want to solve for the transpose system
+  //
+  // second: the data we have in the sparse matrices is "almost" right
+  // already; UMFPACK wants the entries in each row (i.e. really: column)
+  // to be sorted in ascending order. we almost have that, except that we
+  // usually store the diagonal first in each row to allow for some
+  // optimizations. thus, we have to resort things a little bit, but only
+  // within each row
+  //
+  // final note: if the matrix has entries in the sparsity pattern that are
+  // actually occupied by entries that have a zero numerical value, then we
+  // keep them anyway. people are supposed to provide accurate sparsity
+  // patterns.
+  Ap.resize (N+1);
+  Ai.resize (matrix.n_nonzero_elements());
+  Ax.resize (matrix.n_nonzero_elements());
+
+  // first fill row lengths array
+  Ap[0] = 0;
+  for (size_type row=1; row<=N; ++row)
+    Ap[row] = Ap[row-1] + matrix.get_row_length(row-1);
+  Assert (static_cast<size_type>(Ap.back()) == Ai.size(),
+          ExcInternalError());
+
+  // then copy over matrix elements. note that for sparse matrices,
+  // iterators are sorted so that they traverse each row from start to end
+  // before moving on to the next row. however, this isn't true for block
+  // matrices, so we have to do a bit of book keeping
+  {
+    // have an array that for each row points to the first entry not yet
+    // written to
+    std::vector<long int> row_pointers = Ap;
+
+    // loop over the elements of the matrix row by row, as suggested in the
+    // documentation of the sparse matrix iterator class
+    for (size_type row = 0; row < matrix.m(); ++row)
+      {
+        for (typename Matrix::const_iterator p=matrix.begin(row);
+             p!=matrix.end(row); ++p)
+          {
+            // write entry into the first free one for this row
+            Ai[row_pointers[row]] = p->column();
+            Ax[row_pointers[row]] = p->value();
+
+            // then move pointer ahead
+            ++row_pointers[row];
+          }
+      }
+
+    // at the end, we should have written all rows completely
+    for (size_type i=0; i<Ap.size()-1; ++i)
+      Assert (row_pointers[i] == Ap[i+1], ExcInternalError());
+  }
+
+  // make sure that the elements in each row are sorted. we have to be more
+  // careful for block sparse matrices, so ship this task out to a
+  // different function
+  sort_arrays (matrix);
+
+  int status;
+  status = umfpack_dl_symbolic (N, N,
+                                &Ap[0], &Ai[0], &Ax[0],
+                                &symbolic_decomposition,
+                                &control[0], 0);
+  AssertThrow (status == UMFPACK_OK,
+               ExcUMFPACKError("umfpack_dl_symbolic", status));
+
+  status = umfpack_dl_numeric (&Ap[0], &Ai[0], &Ax[0],
+                               symbolic_decomposition,
+                               &numeric_decomposition,
+                               &control[0], 0);
+  AssertThrow (status == UMFPACK_OK,
+               ExcUMFPACKError("umfpack_dl_numeric", status));
+
+  umfpack_dl_free_symbolic (&symbolic_decomposition) ;
+}
+
+
+
+void
+SparseDirectUMFPACK::solve (Vector<double> &rhs_and_solution,
+                            bool            transpose /*=false*/) const
+{
+  // make sure that some kind of factorize() call has happened before
+  Assert (Ap.size() != 0, ExcNotInitialized());
+  Assert (Ai.size() != 0, ExcNotInitialized());
+  Assert (Ai.size() == Ax.size(), ExcNotInitialized());
+
+  Vector<double> rhs (rhs_and_solution.size());
+  rhs = rhs_and_solution;
+
+  // solve the system. note that since UMFPACK wants compressed column
+  // storage instead of the compressed row storage format we use in
+  // deal.II's SparsityPattern classes, we solve for UMFPACK's A^T instead
+
+  // Conversely, if we solve for the transpose, we have to use UMFPACK_A
+  // instead.
+  const int status
+    = umfpack_dl_solve (transpose ? UMFPACK_A : UMFPACK_At,
+                        &Ap[0], &Ai[0], &Ax[0],
+                        rhs_and_solution.begin(), rhs.begin(),
+                        numeric_decomposition,
+                        &control[0], 0);
+  AssertThrow (status == UMFPACK_OK, ExcUMFPACKError("umfpack_dl_solve", status));
+}
+
+
+void
+SparseDirectUMFPACK::solve (BlockVector<double> &rhs_and_solution,
+                            bool                 transpose /*=false*/) const
+{
+  // the UMFPACK functions want a contiguous array of elements, so
+  // there is no way around copying data around. thus, just copy the
+  // data into a regular vector and back
+  Vector<double> tmp (rhs_and_solution.size());
+  tmp = rhs_and_solution;
+  solve (tmp, transpose);
+  rhs_and_solution = tmp;
+}
+
+
+
+template <class Matrix>
+void
+SparseDirectUMFPACK::solve (const Matrix   &matrix,
+                            Vector<double> &rhs_and_solution,
+                            bool            transpose /*=false*/)
+{
+  factorize (matrix);
+  solve (rhs_and_solution, transpose);
+}
+
+
+template <class Matrix>
+void
+SparseDirectUMFPACK::solve (const Matrix        &matrix,
+                            BlockVector<double> &rhs_and_solution,
+                            bool                 transpose /*=false*/)
+{
+  factorize (matrix);
+  solve (rhs_and_solution, transpose);
+}
+
+
+#else
+
+
+SparseDirectUMFPACK::SparseDirectUMFPACK ()
+  :
+  symbolic_decomposition (0),
+  numeric_decomposition (0),
+  control (0)
+{}
+
+
+void
+SparseDirectUMFPACK::clear ()
+{}
+
+
+template <class Matrix>
+void SparseDirectUMFPACK::factorize (const Matrix &)
+{
+  AssertThrow(false, ExcMessage("To call this function you need UMFPACK, but you configured deal.II without passing the necessary switch to 'cmake'. Please consult the installation instructions in doc/readme.html."));
+}
+
+
+void
+SparseDirectUMFPACK::solve (Vector<double> &, bool) const
+{
+  AssertThrow(false, ExcMessage("To call this function you need UMFPACK, but you configured deal.II without passing the necessary switch to 'cmake'. Please consult the installation instructions in doc/readme.html."));
+}
+
+
+
+void
+SparseDirectUMFPACK::solve (BlockVector<double> &, bool) const
+{
+  AssertThrow(false, ExcMessage("To call this function you need UMFPACK, but you configured deal.II without passing the necessary switch to 'cmake'. Please consult the installation instructions in doc/readme.html."));
+}
+
+
+template <class Matrix>
+void
+SparseDirectUMFPACK::solve (const Matrix &,
+                            Vector<double> &,
+                            bool)
+{
+  AssertThrow(false, ExcMessage("To call this function you need UMFPACK, but you configured deal.II without passing the necessary switch to 'cmake'. Please consult the installation instructions in doc/readme.html."));
+}
+
+
+
+template <class Matrix>
+void
+SparseDirectUMFPACK::solve (const Matrix &,
+                            BlockVector<double> &,
+                            bool)
+{
+  AssertThrow(false, ExcMessage("To call this function you need UMFPACK, but you configured deal.II without passing the necessary switch to 'cmake'. Please consult the installation instructions in doc/readme.html."));
+}
+
+#endif
+
+
+template <class Matrix>
+void
+SparseDirectUMFPACK::initialize (const Matrix        &M,
+                                 const AdditionalData)
+{
+  this->factorize(M);
+}
+
+
+void
+SparseDirectUMFPACK::vmult (
+  Vector<double>       &dst,
+  const Vector<double> &src) const
+{
+  dst = src;
+  this->solve(dst);
+}
+
+
+
+void
+SparseDirectUMFPACK::vmult (
+  BlockVector<double>       &dst,
+  const BlockVector<double> &src) const
+{
+  dst = src;
+  this->solve(dst);
+}
+
+
+void
+SparseDirectUMFPACK::Tvmult (
+  Vector<double> &dst,
+  const Vector<double> &src) const
+{
+  dst = src;
+  this->solve(dst, /*transpose=*/ true);
+}
+
+
+
+void
+SparseDirectUMFPACK::Tvmult (
+  BlockVector<double>       &dst,
+  const BlockVector<double> &src) const
+{
+  dst = src;
+  this->solve(dst, /*transpose=*/ true);
+}
+
+SparseDirectUMFPACK::size_type
+SparseDirectUMFPACK::m () const
+{
+  Assert (_m!=0, ExcNotInitialized());
+  return _m;
+}
+
+SparseDirectUMFPACK::size_type
+SparseDirectUMFPACK::n () const
+{
+  Assert (_n!=0, ExcNotInitialized());
+  return _n;
+}
+
+
+// explicit instantiations for SparseMatrixUMFPACK
+#define InstantiateUMFPACK(MatrixType)                      \
+  template                                                  \
+  void SparseDirectUMFPACK::factorize (const MatrixType &); \
+  template                                                  \
+  void SparseDirectUMFPACK::solve (const MatrixType &,      \
+                                   Vector<double> &,        \
+                                   bool);                   \
+  template                                                  \
+  void SparseDirectUMFPACK::solve (const MatrixType &,      \
+                                   BlockVector<double> &,   \
+                                   bool);                   \
+  template                                                  \
+  void SparseDirectUMFPACK::initialize (const MatrixType &, \
+                                        const AdditionalData);
+
+InstantiateUMFPACK(SparseMatrix<double>)
+InstantiateUMFPACK(SparseMatrix<float>)
+InstantiateUMFPACK(SparseMatrixEZ<double>)
+InstantiateUMFPACK(SparseMatrixEZ<float>)
+InstantiateUMFPACK(BlockSparseMatrix<double>)
+InstantiateUMFPACK(BlockSparseMatrix<float>)
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_ilu.cc b/source/lac/sparse_ilu.cc
new file mode 100644
index 0000000..035e0a9
--- /dev/null
+++ b/source/lac/sparse_ilu.cc
@@ -0,0 +1,51 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/sparse_ilu.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// explicit instantiations
+template class SparseILU<double>;
+template void SparseILU<double>::initialize<double> (const SparseMatrix<double> &,
+                                                     const AdditionalData &data);
+template void SparseILU<double>::vmult <double> (Vector<double> &,
+                                                 const Vector<double> &) const;
+template void SparseILU<double>::Tvmult <double> (Vector<double> &,
+                                                  const Vector<double> &) const;
+template void SparseILU<double>::initialize<float> (const SparseMatrix<float> &,
+                                                    const AdditionalData &data);
+template void SparseILU<double>::vmult<float> (Vector<float> &,
+                                               const Vector<float> &) const;
+template void SparseILU<double>::Tvmult<float> (Vector<float> &,
+                                                const Vector<float> &) const;
+
+
+template class SparseILU<float>;
+template void SparseILU<float>::initialize<double> (const SparseMatrix<double> &,
+                                                    const AdditionalData &data);
+template void SparseILU<float>::vmult<double> (Vector<double> &,
+                                               const Vector<double> &) const;
+template void SparseILU<float>::Tvmult<double> (Vector<double> &,
+                                                const Vector<double> &) const;
+template void SparseILU<float>::initialize<float> (const SparseMatrix<float> &,
+                                                   const AdditionalData &data);
+template void SparseILU<float>::vmult<float> (Vector<float> &,
+                                              const Vector<float> &) const;
+template void SparseILU<float>::Tvmult<float> (Vector<float> &,
+                                               const Vector<float> &) const;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_matrix.cc b/source/lac/sparse_matrix.cc
new file mode 100644
index 0000000..21f590a
--- /dev/null
+++ b/source/lac/sparse_matrix.cc
@@ -0,0 +1,26 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/sparse_matrix.templates.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#define SPLIT_INSTANTIATIONS_COUNT 2
+#define SPLIT_INSTANTIATIONS_INDEX 0
+#include "sparse_matrix.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_matrix.inst.in b/source/lac/sparse_matrix.inst.in
new file mode 100644
index 0000000..dee0b31
--- /dev/null
+++ b/source/lac/sparse_matrix.inst.in
@@ -0,0 +1,294 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+// real instantiations
+
+for (S : REAL_SCALARS)
+  {
+    template class SparseMatrix<S>;
+  }
+
+
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template SparseMatrix<S1> &
+      SparseMatrix<S1>::copy_from<S2> (const SparseMatrix<S2> &);
+
+    template
+      void SparseMatrix<S1>::copy_from<S2> (const FullMatrix<S2> &);
+
+    template void SparseMatrix<S1>::add<S2> (const S1,
+                                             const SparseMatrix<S2> &);
+
+    template void SparseMatrix<S1>::add<S2> (const size_type,
+                                             const size_type,
+                                             const size_type *,
+                                             const S2 *,
+                                             const bool,
+                                             const bool);
+
+    template void SparseMatrix<S1>::set<S2> (const size_type,
+                                             const size_type,
+                                             const size_type *,
+                                             const S2 *,
+                                             const bool);
+  }
+
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template S2
+      SparseMatrix<S1>::
+      matrix_norm_square<S2> (const Vector<S2> &) const;
+
+    template S2
+      SparseMatrix<S1>::
+      matrix_scalar_product<S2> (const Vector<S2> &,
+                                 const Vector<S2> &) const;
+
+    template S2 SparseMatrix<S1>::
+      residual<S2> (Vector<S2> &,
+                    const Vector<S2> &,
+                    const Vector<S2> &) const;
+
+    template void SparseMatrix<S1>::
+      precondition_SSOR<S2> (Vector<S2> &,
+                             const Vector<S2> &,
+                             const S1,
+                             const std::vector<std::size_t>&) const;
+
+    template void SparseMatrix<S1>::
+      precondition_SOR<S2> (Vector<S2> &,
+                            const Vector<S2> &,
+                            const S1) const;
+
+    template void SparseMatrix<S1>::
+      precondition_TSOR<S2> (Vector<S2> &,
+                             const Vector<S2> &,
+                             const S1) const;
+
+    template void SparseMatrix<S1>::
+      precondition_Jacobi<S2> (Vector<S2> &,
+                               const Vector<S2> &,
+                               const S1) const;
+
+    template void SparseMatrix<S1>::
+      SOR<S2> (Vector<S2> &,
+               const S1) const;
+    template void SparseMatrix<S1>::
+      TSOR<S2> (Vector<S2> &,
+                const S1) const;
+    template void SparseMatrix<S1>::
+      SSOR<S2> (Vector<S2> &,
+                const S1) const;
+    template void SparseMatrix<S1>::
+      PSOR<S2> (Vector<S2> &,
+                const std::vector<size_type>&,
+                const std::vector<size_type>&,
+                const S1) const;
+    template void SparseMatrix<S1>::
+      TPSOR<S2> (Vector<S2> &,
+                 const std::vector<size_type>&,
+                 const std::vector<size_type>&,
+                 const S1) const;
+    template void SparseMatrix<S1>::
+      Jacobi_step<S2> (Vector<S2> &,
+                       const Vector<S2> &,
+                       const S1) const;
+    template void SparseMatrix<S1>::
+      SOR_step<S2> (Vector<S2> &,
+                    const Vector<S2> &,
+                    const S1) const;
+    template void SparseMatrix<S1>::
+      TSOR_step<S2> (Vector<S2> &,
+                     const Vector<S2> &,
+                     const S1) const;
+    template void SparseMatrix<S1>::
+      SSOR_step<S2> (Vector<S2> &,
+                     const Vector<S2> &,
+                     const S1) const;
+  }
+
+for (S1, S2, S3 : REAL_SCALARS;
+     V1, V2     : DEAL_II_VEC_TEMPLATES)
+  {
+    template void SparseMatrix<S1>::
+      vmult (V1<S2> &, const V2<S3> &) const;
+    template void SparseMatrix<S1>::
+      Tvmult (V1<S2> &, const V2<S3> &) const;
+    template void SparseMatrix<S1>::
+      vmult_add (V1<S2> &, const V2<S3> &) const;
+    template void SparseMatrix<S1>::
+      Tvmult_add (V1<S2> &, const V2<S3> &) const;
+  }
+
+for (S1 : REAL_SCALARS)
+  {
+    template void SparseMatrix<S1>::
+      vmult (parallel::distributed::Vector<S1> &, const parallel::distributed::Vector<S1> &) const;
+    template void SparseMatrix<S1>::
+      Tvmult (parallel::distributed::Vector<S1> &, const parallel::distributed::Vector<S1> &) const;
+    template void SparseMatrix<S1>::
+      vmult_add (parallel::distributed::Vector<S1> &, const parallel::distributed::Vector<S1> &) const;
+    template void SparseMatrix<S1>::
+      Tvmult_add (parallel::distributed::Vector<S1> &, const parallel::distributed::Vector<S1> &) const;
+  }
+
+for (S1, S2, S3: REAL_SCALARS)
+  {
+    template void SparseMatrix<S1>::
+      mmult (SparseMatrix<S2> &, const SparseMatrix<S3> &, const Vector<S1>&,
+             const bool) const;
+    template void SparseMatrix<S1>::
+      Tmmult (SparseMatrix<S2> &, const SparseMatrix<S3> &, const Vector<S1>&,
+              const bool) const;
+  }
+
+
+
+// complex instantiations
+
+for (S : COMPLEX_SCALARS)
+  {
+    template class SparseMatrix<S>;
+  }
+
+
+
+for (S1, S2 : COMPLEX_SCALARS)
+  {
+    template SparseMatrix<S1> &
+      SparseMatrix<S1>::copy_from<S2> (const SparseMatrix<S2> &);
+
+    template
+      void SparseMatrix<S1>::copy_from<S2> (const FullMatrix<S2> &);
+
+    template void SparseMatrix<S1>::add<S2> (const S1,
+                                             const SparseMatrix<S2> &);
+
+    template void SparseMatrix<S1>::add<S2> (const size_type,
+                                             const size_type,
+                                             const size_type *,
+                                             const S2 *,
+                                             const bool,
+                                             const bool);
+
+    template void SparseMatrix<S1>::set<S2> (const size_type,
+                                             const size_type,
+                                             const size_type *,
+                                             const S2 *,
+                                             const bool);
+  }
+
+
+for (S1, S2 : COMPLEX_SCALARS)
+  {
+    template S2
+      SparseMatrix<S1>::
+      matrix_norm_square<S2> (const Vector<S2> &) const;
+
+    template S2
+      SparseMatrix<S1>::
+      matrix_scalar_product<S2> (const Vector<S2> &,
+                                 const Vector<S2> &) const;
+
+    template S2 SparseMatrix<S1>::
+      residual<S2> (Vector<S2> &,
+                    const Vector<S2> &,
+                    const Vector<S2> &) const;
+
+    template void SparseMatrix<S1>::
+      precondition_SSOR<S2> (Vector<S2> &,
+                             const Vector<S2> &,
+                             const S1,
+                             const std::vector<std::size_t>&) const;
+
+    template void SparseMatrix<S1>::
+      precondition_SOR<S2> (Vector<S2> &,
+                            const Vector<S2> &,
+                            const S1) const;
+
+    template void SparseMatrix<S1>::
+      precondition_TSOR<S2> (Vector<S2> &,
+                             const Vector<S2> &,
+                             const S1) const;
+
+    template void SparseMatrix<S1>::
+      precondition_Jacobi<S2> (Vector<S2> &,
+                               const Vector<S2> &,
+                               const S1) const;
+
+    template void SparseMatrix<S1>::
+      SOR<S2> (Vector<S2> &,
+               const S1) const;
+    template void SparseMatrix<S1>::
+      TSOR<S2> (Vector<S2> &,
+                const S1) const;
+    template void SparseMatrix<S1>::
+      SSOR<S2> (Vector<S2> &,
+                const S1) const;
+    template void SparseMatrix<S1>::
+      PSOR<S2> (Vector<S2> &,
+                const std::vector<size_type>&,
+                const std::vector<size_type>&,
+                const S1) const;
+    template void SparseMatrix<S1>::
+      TPSOR<S2> (Vector<S2> &,
+                 const std::vector<size_type>&,
+                 const std::vector<size_type>&,
+                 const S1) const;
+    template void SparseMatrix<S1>::
+      Jacobi_step<S2> (Vector<S2> &,
+                       const Vector<S2> &,
+                       const S1) const;
+    template void SparseMatrix<S1>::
+      SOR_step<S2> (Vector<S2> &,
+                    const Vector<S2> &,
+                    const S1) const;
+    template void SparseMatrix<S1>::
+      TSOR_step<S2> (Vector<S2> &,
+                     const Vector<S2> &,
+                     const S1) const;
+    template void SparseMatrix<S1>::
+      SSOR_step<S2> (Vector<S2> &,
+                     const Vector<S2> &,
+                     const S1) const;
+  }
+
+for (S1, S2, S3 : COMPLEX_SCALARS;
+     V1, V2     : DEAL_II_VEC_TEMPLATES)
+  {
+    template void SparseMatrix<S1>::
+      vmult (V1<S2> &, const V2<S3> &) const;
+    template void SparseMatrix<S1>::
+      Tvmult (V1<S2> &, const V2<S3> &) const;
+    template void SparseMatrix<S1>::
+      vmult_add (V1<S2> &, const V2<S3> &) const;
+    template void SparseMatrix<S1>::
+      Tvmult_add (V1<S2> &, const V2<S3> &) const;
+  }
+
+for (S1, S2, S3: COMPLEX_SCALARS)
+  {
+    template void SparseMatrix<S1>::
+      mmult (SparseMatrix<S2> &, const SparseMatrix<S3> &, const Vector<S1>&,
+             const bool) const;
+    template void SparseMatrix<S1>::
+      Tmmult (SparseMatrix<S2> &, const SparseMatrix<S3> &, const Vector<S1>&,
+              const bool) const;
+  }
diff --git a/source/lac/sparse_matrix_ez.cc b/source/lac/sparse_matrix_ez.cc
new file mode 100644
index 0000000..9db5ae1
--- /dev/null
+++ b/source/lac/sparse_matrix_ez.cc
@@ -0,0 +1,21 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/sparse_matrix_ez.templates.h>
+#include <deal.II/lac/block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+#include "sparse_matrix_ez.inst"
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_matrix_ez.inst.in b/source/lac/sparse_matrix_ez.inst.in
new file mode 100644
index 0000000..9827253
--- /dev/null
+++ b/source/lac/sparse_matrix_ez.inst.in
@@ -0,0 +1,57 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S : REAL_SCALARS)
+  {
+    template class SparseMatrixEZ<S>;
+  }
+
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    template
+      void SparseMatrixEZ<S1>::vmult<S2> (Vector<S2> &,
+					  const Vector<S2> &) const;
+    template
+      void SparseMatrixEZ<S1>::Tvmult<S2> (Vector<S2> &,
+					   const Vector<S2> &) const;
+    template
+      void SparseMatrixEZ<S1>::vmult_add<S2> (Vector<S2> &,
+					      const Vector<S2> &) const;
+    template
+      void SparseMatrixEZ<S1>::Tvmult_add<S2> (Vector<S2> &,
+					       const Vector<S2> &) const;
+
+    template
+      void SparseMatrixEZ<S1>::precondition_SSOR<S2> (Vector<S2> &,
+						      const Vector<S2> &,
+						      const S1,
+						      const std::vector<std::size_t>&) const;
+    template
+      void SparseMatrixEZ<S1>::precondition_SOR<S2> (Vector<S2> &,
+						     const Vector<S2> &,
+						     const S1) const;
+    template
+      void SparseMatrixEZ<S1>::precondition_TSOR<S2> (Vector<S2> &,
+						      const Vector<S2> &,
+						      const S1) const;
+    template
+      void SparseMatrixEZ<S1>::precondition_Jacobi<S2> (Vector<S2> &,
+							const Vector<S2> &,
+							const S1) const;
+  }
+
diff --git a/source/lac/sparse_matrix_inst2.cc b/source/lac/sparse_matrix_inst2.cc
new file mode 100644
index 0000000..2452756
--- /dev/null
+++ b/source/lac/sparse_matrix_inst2.cc
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/sparse_matrix.templates.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#define SPLIT_INSTANTIATIONS_COUNT 2
+#define SPLIT_INSTANTIATIONS_INDEX 1
+#include "sparse_matrix.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_mic.cc b/source/lac/sparse_mic.cc
new file mode 100644
index 0000000..0fb5ca5
--- /dev/null
+++ b/source/lac/sparse_mic.cc
@@ -0,0 +1,53 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2002 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/sparse_mic.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// explicit instantiations for double and float matrices
+template class SparseMIC<double>;
+template void SparseMIC<double>::initialize<double> (const SparseMatrix<double> &,
+                                                     const AdditionalData &data);
+template void SparseMIC<double>::vmult<double> (Vector<double> &,
+                                                const Vector<double> &) const;
+template void SparseMIC<double>::Tvmult<double> (Vector<double> &,
+                                                 const Vector<double> &) const;
+template void SparseMIC<double>::initialize<float> (const SparseMatrix<float> &,
+                                                    const AdditionalData &data);
+template void SparseMIC<double>::vmult<float> (Vector<float> &,
+                                               const Vector<float> &) const;
+template void SparseMIC<double>::Tvmult<float> (Vector<float> &,
+                                                const Vector<float> &) const;
+
+template class SparseMIC<float>;
+template void SparseMIC<float>::initialize<double> (const SparseMatrix<double> &,
+                                                    const AdditionalData &data);
+template void SparseMIC<float>::vmult<double> (Vector<double> &,
+                                               const Vector<double> &) const;
+template void SparseMIC<float>::Tvmult<double> (Vector<double> &,
+                                                const Vector<double> &) const;
+template void SparseMIC<float>::initialize<float> (const SparseMatrix<float> &,
+                                                   const AdditionalData &data);
+template void SparseMIC<float>::vmult<float> (Vector<float> &,
+                                              const Vector<float> &) const;
+template void SparseMIC<float>::Tvmult<float> (Vector<float> &,
+                                               const Vector<float> &) const;
+
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparse_vanka.cc b/source/lac/sparse_vanka.cc
new file mode 100644
index 0000000..9f4397d
--- /dev/null
+++ b/source/lac/sparse_vanka.cc
@@ -0,0 +1,39 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/sparse_vanka.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+// explicit instantiations
+template class SparseVanka<float>;
+template class SparseVanka<double>;
+
+template void SparseVanka<double>::vmult<float> (Vector<float>       &dst,
+                                                 const Vector<float> &src) const;
+template void SparseVanka<double>::vmult<double> (Vector<double>       &dst,
+                                                  const Vector<double> &src) const;
+
+
+template class SparseBlockVanka<float>;
+template class SparseBlockVanka<double>;
+
+template void SparseBlockVanka<double>::vmult<float> (Vector<float>       &dst,
+                                                      const Vector<float> &src) const;
+template void SparseBlockVanka<double>::vmult<double> (Vector<double>       &dst,
+                                                       const Vector<double> &src) const;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparsity_pattern.cc b/source/lac/sparsity_pattern.cc
new file mode 100644
index 0000000..070a846
--- /dev/null
+++ b/source/lac/sparsity_pattern.cc
@@ -0,0 +1,1023 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/vector_slice.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+#include <iostream>
+#include <iomanip>
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <functional>
+
+DEAL_II_NAMESPACE_OPEN
+
+#ifdef DEAL_II_MSVC
+__declspec(selectany) // Weak extern binding due to multiple link error
+#endif
+const SparsityPattern::size_type SparsityPattern::invalid_entry;
+
+
+
+SparsityPattern::SparsityPattern ()
+  :
+  max_dim(0),
+  max_vec_len(0),
+  rowstart(0),
+  colnums(0),
+  compressed(false),
+  store_diagonal_first_in_row(false)
+{
+  reinit (0,0,0);
+}
+
+
+
+SparsityPattern::SparsityPattern (const SparsityPattern &s)
+  :
+  Subscriptor(),
+  max_dim(0),
+  max_vec_len(0),
+  rowstart(0),
+  colnums(0),
+  compressed(false),
+  store_diagonal_first_in_row(false)
+{
+  (void)s;
+  Assert (s.rowstart == 0, ExcInvalidConstructorCall());
+  Assert (s.colnums == 0, ExcInvalidConstructorCall());
+  Assert (s.rows == 0, ExcInvalidConstructorCall());
+  Assert (s.cols == 0, ExcInvalidConstructorCall());
+
+  reinit (0,0,0);
+}
+
+
+
+SparsityPattern::SparsityPattern (const size_type m,
+                                  const size_type n,
+                                  const unsigned int max_per_row)
+  :
+  max_dim(0),
+  max_vec_len(0),
+  rowstart(0),
+  colnums(0),
+  compressed(false),
+  store_diagonal_first_in_row(m == n)
+{
+  reinit (m,n,max_per_row);
+}
+
+
+
+SparsityPattern::SparsityPattern (const size_type m,
+                                  const size_type n,
+                                  const std::vector<unsigned int> &row_lengths)
+  :
+  max_dim(0),
+  max_vec_len(0),
+  rowstart(0),
+  colnums(0),
+  store_diagonal_first_in_row(m == n)
+{
+  reinit (m, n, row_lengths);
+}
+
+
+
+SparsityPattern::SparsityPattern (const size_type m,
+                                  const unsigned int max_per_row)
+  :
+  max_dim(0),
+  max_vec_len(0),
+  rowstart(0),
+  colnums(0)
+{
+  reinit (m, m, max_per_row);
+}
+
+
+
+SparsityPattern::SparsityPattern (const size_type               m,
+                                  const std::vector<unsigned int> &row_lengths)
+  :
+  max_dim(0),
+  max_vec_len(0),
+  rowstart(0),
+  colnums(0)
+{
+  reinit (m, m, row_lengths);
+}
+
+
+
+SparsityPattern::SparsityPattern (const SparsityPattern &original,
+                                  const unsigned int        max_per_row,
+                                  const size_type        extra_off_diagonals)
+  :
+  max_dim(0),
+  max_vec_len(0),
+  rowstart(0),
+  colnums(0)
+{
+  Assert (original.rows==original.cols, ExcNotQuadratic());
+  Assert (original.is_compressed(), ExcNotCompressed());
+
+  reinit (original.rows, original.cols, max_per_row);
+
+  // now copy the entries from the other object
+  for (size_type row=0; row<original.rows; ++row)
+    {
+      // copy the elements of this row of the other object
+      //
+      // note that the first object actually is the main-diagonal element,
+      // which we need not copy
+      //
+      // we do the copying in two steps: first we note that the elements in
+      // @p{original} are sorted, so we may first copy all the elements up to
+      // the first side-diagonal one which is to be filled in. then we insert
+      // the side-diagonals, finally copy the rest from that element onwards
+      // which is not a side-diagonal any more.
+      const size_type *const
+      original_row_start = &original.colnums[original.rowstart[row]] + 1;
+      // the following requires that @p{original} be compressed since
+      // otherwise there might be invalid_entry's
+      const size_type *const
+      original_row_end   = &original.colnums[original.rowstart[row+1]];
+
+      // find pointers before and after extra off-diagonals. if at top or
+      // bottom of matrix, then set these pointers such that no copying is
+      // necessary (see the @p{copy} commands)
+      const size_type *const
+      original_last_before_side_diagonals
+        = (row > extra_off_diagonals ?
+           Utilities::lower_bound (original_row_start,
+                                   original_row_end,
+                                   row-extra_off_diagonals) :
+           original_row_start);
+
+      const size_type *const
+      original_first_after_side_diagonals
+        = (row < rows-extra_off_diagonals-1 ?
+           std::upper_bound (original_row_start,
+                             original_row_end,
+                             row+extra_off_diagonals) :
+           original_row_end);
+
+      // find first free slot. the first slot in each row is the diagonal
+      // element
+      size_type *next_free_slot = &colnums[rowstart[row]] + 1;
+
+      // copy elements before side-diagonals
+      next_free_slot = std::copy (original_row_start,
+                                  original_last_before_side_diagonals,
+                                  next_free_slot);
+
+      // insert left and right side-diagonals
+      for (size_type i=1; i<=std::min(row,extra_off_diagonals);
+           ++i, ++next_free_slot)
+        *next_free_slot = row-i;
+      for (size_type i=1; i<=std::min(extra_off_diagonals, rows-row-1);
+           ++i, ++next_free_slot)
+        *next_free_slot = row+i;
+
+      // copy rest
+      next_free_slot = std::copy (original_first_after_side_diagonals,
+                                  original_row_end,
+                                  next_free_slot);
+
+      // this error may happen if the sum of previous elements per row and
+      // those of the new diagonals exceeds the maximum number of elements per
+      // row given to this constructor
+      Assert (next_free_slot <= &colnums[rowstart[row+1]],
+              ExcNotEnoughSpace (0,rowstart[row+1]-rowstart[row]));
+    };
+}
+
+
+
+SparsityPattern::~SparsityPattern ()
+{
+  if (rowstart != 0)  delete[] rowstart;
+  if (colnums != 0)   delete[] colnums;
+}
+
+
+
+SparsityPattern &
+SparsityPattern::operator = (const SparsityPattern &s)
+{
+  (void)s;
+  Assert (s.rowstart == 0, ExcInvalidConstructorCall());
+  Assert (s.colnums == 0, ExcInvalidConstructorCall());
+  Assert (s.rows == 0, ExcInvalidConstructorCall());
+  Assert (s.cols == 0, ExcInvalidConstructorCall());
+
+  Assert (rowstart == 0, ExcInvalidConstructorCall());
+  Assert (colnums == 0, ExcInvalidConstructorCall());
+  Assert (rows == 0, ExcInvalidConstructorCall());
+  Assert (cols == 0, ExcInvalidConstructorCall());
+
+  return *this;
+}
+
+
+
+void
+SparsityPattern::reinit (const size_type m,
+                         const size_type n,
+                         const unsigned int max_per_row)
+{
+  // simply map this function to the other @p{reinit} function
+  const std::vector<unsigned int> row_lengths (m, max_per_row);
+  reinit (m, n, row_lengths);
+}
+
+
+
+void
+SparsityPattern::reinit (const size_type m,
+                         const size_type n,
+                         const VectorSlice<const std::vector<unsigned int> > &row_lengths)
+{
+  AssertDimension (row_lengths.size(), m);
+
+  rows = m;
+  cols = n;
+
+  // delete empty matrices
+  if ((m==0) || (n==0))
+    {
+      if (rowstart)  delete[] rowstart;
+      if (colnums)   delete[] colnums;
+      rowstart = 0;
+      colnums = 0;
+      max_vec_len = max_dim = rows = cols = 0;
+      // if dimension is zero: ignore max_per_row
+      max_row_length = 0;
+      compressed = false;
+      return;
+    }
+
+  // first, if the matrix is quadratic, we will have to make sure that each
+  // row has at least one entry for the diagonal element. make this more
+  // obvious by having a variable which we can query
+  store_diagonal_first_in_row = (m == n);
+
+  // find out how many entries we need in the @p{colnums} array. if this
+  // number is larger than @p{max_vec_len}, then we will need to reallocate
+  // memory
+  //
+  // note that the number of elements per row is bounded by the number of
+  // columns
+  //
+  std::size_t vec_len = 0;
+  for (size_type i=0; i<m; ++i)
+    vec_len += std::min(static_cast<size_type>(store_diagonal_first_in_row ?
+                                               std::max(row_lengths[i], 1U) :
+                                               row_lengths[i]),
+                        n);
+
+  // sometimes, no entries are requested in the matrix (this most often
+  // happens when blocks in a block matrix are simply zero). in that case,
+  // allocate exactly one element, to have a valid pointer to some memory
+  if (vec_len == 0)
+    {
+      vec_len = 1;
+      if (colnums)
+        {
+          delete[] colnums;
+          colnums = 0;
+        }
+
+      max_vec_len = vec_len;
+      colnums = new size_type[max_vec_len];
+    }
+
+  max_row_length = (row_lengths.size() == 0 ?
+                    0 :
+                    std::min (static_cast<size_type>(*std::max_element(row_lengths.begin(),
+                                                     row_lengths.end())),
+                              n));
+
+  if (store_diagonal_first_in_row && (max_row_length==0) && (m!=0))
+    max_row_length = 1;
+
+  // allocate memory for the rowstart values, if necessary. even though we
+  // re-set the pointers again immediately after deleting their old content,
+  // set them to zero in between because the allocation might fail, in which
+  // case we get an exception and the destructor of this object will be called
+  // -- where we look at the non-nullness of the (now invalid) pointer again
+  // and try to delete the memory a second time.
+  if (rows > max_dim)
+    {
+      if (rowstart)
+        {
+          delete[] rowstart;
+          rowstart = 0;
+        }
+
+      max_dim = rows;
+      rowstart = new std::size_t[max_dim+1];
+    }
+
+  // allocate memory for the column numbers if necessary
+  if (vec_len > max_vec_len)
+    {
+      if (colnums)
+        {
+          delete[] colnums;
+          colnums = 0;
+        }
+
+      max_vec_len = vec_len;
+      colnums = new size_type[max_vec_len];
+    }
+
+  // set the rowstart array
+  rowstart[0] = 0;
+  for (size_type i=1; i<=rows; ++i)
+    rowstart[i] = rowstart[i-1] +
+                  (store_diagonal_first_in_row ?
+                   std::max(std::min(static_cast<size_type>(row_lengths[i-1]),n),
+                            static_cast<size_type> (1U)) :
+                   std::min(static_cast<size_type>(row_lengths[i-1]),n));
+  Assert ((rowstart[rows]==vec_len)
+          ||
+          ((vec_len == 1) && (rowstart[rows] == 0)),
+          ExcInternalError());
+
+  // preset the column numbers by a value indicating it is not in use
+  std::fill_n (&colnums[0], vec_len, invalid_entry);
+
+  // if diagonal elements are special: let the first entry in each row be the
+  // diagonal value
+  if (store_diagonal_first_in_row)
+    for (size_type i=0; i<rows; i++)
+      colnums[rowstart[i]] = i;
+
+  compressed = false;
+}
+
+
+
+void
+SparsityPattern::compress ()
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+
+  // do nothing if already compressed
+  if (compressed)
+    return;
+
+  size_type next_free_entry = 0,
+            next_row_start  = 0,
+            row_length      = 0;
+
+  // first find out how many non-zero elements there are, in order to allocate
+  // the right amount of memory
+  const std::size_t nonzero_elements
+    = std::count_if (&colnums[rowstart[0]],
+                     &colnums[rowstart[rows]],
+                     std::bind2nd(std::not_equal_to<size_type>(), invalid_entry));
+  // now allocate the respective memory
+  size_type *new_colnums = new size_type[nonzero_elements];
+
+
+  // reserve temporary storage to store the entries of one row
+  std::vector<size_type> tmp_entries (max_row_length);
+
+  // Traverse all rows
+  for (size_type line=0; line<rows; ++line)
+    {
+      // copy used entries, break if first unused entry is reached
+      row_length = 0;
+      for (size_type j=rowstart[line]; j<rowstart[line+1]; ++j,++row_length)
+        if (colnums[j] != invalid_entry)
+          tmp_entries[row_length] = colnums[j];
+        else
+          break;
+      // now @p{rowstart} is the number of entries in this line
+
+      // Sort only beginning at the second entry, if optimized storage of
+      // diagonal entries is on.
+
+      // if this line is empty or has only one entry, don't sort
+      if (row_length > 1)
+        std::sort ((store_diagonal_first_in_row)
+                   ? tmp_entries.begin()+1
+                   : tmp_entries.begin(),
+                   tmp_entries.begin()+row_length);
+
+      // insert column numbers into the new field
+      for (size_type j=0; j<row_length; ++j)
+        new_colnums[next_free_entry++] = tmp_entries[j];
+
+      // note new start of this and the next row
+      rowstart[line] = next_row_start;
+      next_row_start = next_free_entry;
+
+      // some internal checks: either the matrix is not quadratic, or if it
+      // is, then the first element of this row must be the diagonal element
+      // (i.e. with column index==line number)
+      Assert ((!store_diagonal_first_in_row) ||
+              (new_colnums[rowstart[line]] == line),
+              ExcInternalError());
+      // assert that the first entry does not show up in the remaining ones
+      // and that the remaining ones are unique among themselves (this handles
+      // both cases, quadratic and rectangular matrices)
+      //
+      // the only exception here is if the row contains no entries at all
+      Assert ((rowstart[line] == next_row_start)
+              ||
+              (std::find (&new_colnums[rowstart[line]+1],
+                          &new_colnums[next_row_start],
+                          new_colnums[rowstart[line]]) ==
+               &new_colnums[next_row_start]),
+              ExcInternalError());
+      Assert ((rowstart[line] == next_row_start)
+              ||
+              (std::adjacent_find(&new_colnums[rowstart[line]+1],
+                                  &new_colnums[next_row_start]) ==
+               &new_colnums[next_row_start]),
+              ExcInternalError());
+    };
+
+  // assert that we have used all allocated space, no more and no less
+  Assert (next_free_entry == nonzero_elements,
+          ExcInternalError());
+
+  // set iterator-past-the-end
+  rowstart[rows] = next_row_start;
+
+  // set colnums to the newly allocated array and delete the old one
+  delete[] colnums;
+  colnums = new_colnums;
+
+  // store the size
+  max_vec_len = nonzero_elements;
+
+  compressed = true;
+}
+
+
+
+template <typename SparsityPatternType>
+void
+SparsityPattern::copy_from (const SparsityPatternType &dsp)
+{
+  // first determine row lengths for each row. if the matrix is quadratic,
+  // then we might have to add an additional entry for the diagonal, if that
+  // is not yet present. as we have to call compress anyway later on, don't
+  // bother to check whether that diagonal entry is in a certain row or not
+  const bool do_diag_optimize = (dsp.n_rows() == dsp.n_cols());
+  std::vector<unsigned int> row_lengths (dsp.n_rows());
+  for (size_type i=0; i<dsp.n_rows(); ++i)
+    {
+      row_lengths[i] = dsp.row_length(i);
+      if (do_diag_optimize && !dsp.exists(i,i))
+        ++row_lengths[i];
+    }
+  reinit (dsp.n_rows(), dsp.n_cols(), row_lengths);
+
+  // now enter all the elements into the matrix, if there are any. note that
+  // if the matrix is quadratic, then we already have the diagonal element
+  // preallocated
+  if (n_rows() != 0 && n_cols() != 0)
+    for (size_type row = 0; row<dsp.n_rows(); ++row)
+      {
+        size_type *cols = &colnums[rowstart[row]] + (do_diag_optimize ? 1 : 0);
+        typename SparsityPatternType::iterator col_num = dsp.begin (row),
+                                               end_row = dsp.end (row);
+
+        for (; col_num != end_row; ++col_num)
+          {
+            const size_type col = col_num->column();
+            if ((col!=row) || !do_diag_optimize)
+              *cols++ = col;
+          }
+      }
+
+  // do not need to compress the sparsity pattern since we already have
+  // allocated the right amount of data, and the SparsityPatternType data is sorted,
+  // too.
+  compressed = true;
+}
+
+
+
+template <typename number>
+void SparsityPattern::copy_from (const FullMatrix<number> &matrix)
+{
+  // first init with the number of entries per row. if this matrix is square
+  // then we also have to allocate memory for the diagonal entry, unless we
+  // have already counted it
+  std::vector<unsigned int> entries_per_row (matrix.m(), 0);
+  for (size_type row=0; row<matrix.m(); ++row)
+    {
+      for (size_type col=0; col<matrix.n(); ++col)
+        if (matrix(row,col) != 0)
+          ++entries_per_row[row];
+      if ((matrix.m() == matrix.n())
+          &&
+          (matrix(row,row) == 0)
+          &&
+          (matrix.m() == matrix.n()))
+        ++entries_per_row[row];
+    }
+
+  reinit (matrix.m(), matrix.n(), entries_per_row);
+
+  // now set entries
+  for (size_type row=0; row<matrix.m(); ++row)
+    for (size_type col=0; col<matrix.n(); ++col)
+      if (matrix(row,col) != 0)
+        add (row,col);
+
+  // finally compress
+  compress ();
+}
+
+
+void
+SparsityPattern::reinit (const size_type               m,
+                         const size_type               n,
+                         const std::vector<unsigned int> &row_lengths)
+{
+  reinit(m, n, make_slice(row_lengths));
+}
+
+
+
+
+bool
+SparsityPattern::empty () const
+{
+  // let's try to be on the safe side of life by using multiple possibilities
+  // in the check for emptiness... (sorry for this kludge -- emptying matrices
+  // and freeing memory was not present in the original implementation and I
+  // don't know at how many places I missed something in adding it, so I try
+  // to be cautious. wb)
+  if ((rowstart==0) || (rows==0) || (cols==0))
+    {
+      Assert (rowstart==0, ExcInternalError());
+      Assert (rows==0, ExcInternalError());
+      Assert (cols==0, ExcInternalError());
+      Assert (colnums==0, ExcInternalError());
+      Assert (max_vec_len==0, ExcInternalError());
+
+      return true;
+    };
+  return false;
+}
+
+
+
+SparsityPattern::size_type
+SparsityPattern::max_entries_per_row () const
+{
+  // if compress() has not yet been called, we can get the maximum number of
+  // elements per row using the stored value
+  if (!compressed)
+    return max_row_length;
+
+  // if compress() was called, we use a better algorithm which gives us a
+  // sharp bound
+  size_type m = 0;
+  for (size_type i=1; i<=rows; ++i)
+    m = std::max (m, static_cast<size_type>(rowstart[i]-rowstart[i-1]));
+
+  return m;
+}
+
+
+
+SparsityPattern::size_type
+SparsityPattern::operator () (const size_type i,
+                              const size_type j) const
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+  Assert (i<rows, ExcIndexRange(i,0,rows));
+  Assert (j<cols, ExcIndexRange(j,0,cols));
+  Assert (compressed, ExcNotCompressed());
+
+  // let's see whether there is something in this line
+  if (rowstart[i] == rowstart[i+1])
+    return invalid_entry;
+
+  // If special storage of diagonals was requested, we can get the diagonal
+  // element faster by this query.
+  if (store_diagonal_first_in_row && (i==j))
+    return rowstart[i];
+
+  // all other entries are sorted, so we can use a binary search algorithm
+  //
+  // note that the entries are only sorted upon compression, so this would
+  // fail for non-compressed sparsity patterns; however, that is why the
+  // Assertion is at the top of this function, so it may not be called for
+  // noncompressed structures.
+  const size_type *sorted_region_start = (store_diagonal_first_in_row ?
+                                          &colnums[rowstart[i]+1] :
+                                          &colnums[rowstart[i]]);
+  const size_type *const p
+    = Utilities::lower_bound<const size_type *> (sorted_region_start,
+                                                 &colnums[rowstart[i+1]],
+                                                 j);
+  if ((p != &colnums[rowstart[i+1]])  &&  (*p == j))
+    return (p - &colnums[0]);
+  else
+    return invalid_entry;
+}
+
+
+
+void
+SparsityPattern::add (const size_type i,
+                      const size_type j)
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+  Assert (i<rows, ExcIndexRange(i,0,rows));
+  Assert (j<cols, ExcIndexRange(j,0,cols));
+  Assert (compressed==false, ExcMatrixIsCompressed());
+
+  for (std::size_t k=rowstart[i]; k<rowstart[i+1]; k++)
+    {
+      // entry already exists
+      if (colnums[k] == j) return;
+      // empty entry found, put new entry here
+      if (colnums[k] == invalid_entry)
+        {
+          colnums[k] = j;
+          return;
+        };
+    };
+
+  // if we came thus far, something went wrong: there was not enough space in
+  // this line
+  Assert (false, ExcNotEnoughSpace(i, rowstart[i+1]-rowstart[i]));
+}
+
+
+
+template <typename ForwardIterator>
+void
+SparsityPattern::add_entries (const size_type row,
+                              ForwardIterator begin,
+                              ForwardIterator end,
+                              const bool      indices_are_sorted)
+{
+  if (indices_are_sorted == true)
+    {
+      if (begin != end)
+        {
+          ForwardIterator it = begin;
+          bool has_larger_entries = false;
+          // skip diagonal
+          std::size_t k=rowstart[row]+store_diagonal_first_in_row;
+          for ( ; k<rowstart[row+1]; k++)
+            if (colnums[k] == invalid_entry)
+              break;
+            else if (colnums[k] >= *it)
+              {
+                has_larger_entries = true;
+                break;
+              }
+          if (has_larger_entries == false)
+            for ( ; it != end; ++it)
+              {
+                if (store_diagonal_first_in_row && *it == row)
+                  continue;
+                Assert (k <= rowstart[row+1],
+                        ExcNotEnoughSpace(row, rowstart[row+1]-rowstart[row]));
+                colnums[k++] = *it;
+              }
+          else
+            // cannot just append the new range at the end, forward to the
+            // other function
+            for (ForwardIterator p = begin; p != end; ++p)
+              add (row, *p);
+        }
+    }
+  else
+    {
+      // forward to the other function.
+      for (ForwardIterator it = begin; it != end; ++it)
+        add (row, *it);
+    }
+}
+
+
+bool
+SparsityPattern::exists (const size_type i, const size_type j) const
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+  Assert (i<rows, ExcIndexRange(i,0,rows));
+  Assert (j<cols, ExcIndexRange(j,0,cols));
+
+  for (size_type k=rowstart[i]; k<rowstart[i+1]; k++)
+    {
+      // entry already exists
+      if (colnums[k] == j) return true;
+    }
+  return false;
+}
+
+
+
+SparsityPattern::size_type
+SparsityPattern::row_position (const size_type i, const size_type j) const
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+  Assert (i<rows, ExcIndexRange(i,0,rows));
+  Assert (j<cols, ExcIndexRange(j,0,cols));
+
+  for (size_type k=rowstart[i]; k<rowstart[i+1]; k++)
+    {
+      // entry exists
+      if (colnums[k] == j) return k-rowstart[i];
+    }
+  return numbers::invalid_size_type;
+}
+
+
+
+std::pair<SparsityPattern::size_type, SparsityPattern::size_type>
+SparsityPattern::matrix_position (const size_type global_index) const
+{
+  Assert (compressed == true, ExcNotCompressed());
+  Assert (global_index < n_nonzero_elements(),
+          ExcIndexRange (global_index, 0, n_nonzero_elements()));
+
+  // first find the row in which the entry is located. for this note that the
+  // rowstart array indexes the global indices at which each row starts. since
+  // it is sorted, and since there is an element for the one-past-last row, we
+  // can simply use a bisection search on it
+  const size_type row
+    = (std::upper_bound (&rowstart[0], &rowstart[rows], global_index)
+       - &rowstart[0] - 1);
+
+  // now, the column index is simple since that is what the colnums array
+  // stores:
+  const size_type col = colnums[global_index];
+
+  // so return the respective pair
+  return std::make_pair (row,col);
+}
+
+
+
+void
+SparsityPattern::symmetrize ()
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+  Assert (compressed==false, ExcMatrixIsCompressed());
+  // Note that we only require a quadratic matrix here, no special treatment
+  // of diagonals
+  Assert (rows==cols, ExcNotQuadratic());
+
+  // loop over all elements presently in the sparsity pattern and add the
+  // transpose element. note:
+  //
+  // 1. that the sparsity pattern changes which we work on, but not the
+  // present row
+  //
+  // 2. that the @p{add} function can be called on elements that already exist
+  // without any harm
+  for (size_type row=0; row<rows; ++row)
+    for (size_type k=rowstart[row]; k<rowstart[row+1]; k++)
+      {
+        // check whether we are at the end of the entries of this row. if so,
+        // go to next row
+        if (colnums[k] == invalid_entry)
+          break;
+
+        // otherwise add the transpose entry if this is not the diagonal (that
+        // would not harm, only take time to check up)
+        if (colnums[k] != row)
+          add (colnums[k], row);
+      };
+}
+
+
+
+void
+SparsityPattern::print (std::ostream &out) const
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+
+  AssertThrow (out, ExcIO());
+
+  for (size_type i=0; i<rows; ++i)
+    {
+      out << '[' << i;
+      for (size_type j=rowstart[i]; j<rowstart[i+1]; ++j)
+        if (colnums[j] != invalid_entry)
+          out << ',' << colnums[j];
+      out << ']' << std::endl;
+    }
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+void
+SparsityPattern::print_gnuplot (std::ostream &out) const
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+
+  AssertThrow (out, ExcIO());
+
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=rowstart[i]; j<rowstart[i+1]; ++j)
+      if (colnums[j] != invalid_entry)
+        // while matrix entries are usually written (i,j), with i vertical and
+        // j horizontal, gnuplot output is x-y, that is we have to exchange
+        // the order of output
+        out << colnums[j] << " " << -static_cast<signed int>(i) << std::endl;
+
+  AssertThrow (out, ExcIO());
+}
+
+void
+SparsityPattern::print_svg (std::ostream &out) const
+{
+  unsigned int m = this->n_rows();
+  unsigned int n = this->n_cols();
+  out << "<svg xmlns=\"http://www.w3.org/2000/svg\" version=\"1.1\" viewBox=\"0 0 " << n+2
+      << " " << m+2 << " \">\n"
+      "<style type=\"text/css\" >\n"
+      "     <![CDATA[\n"
+      "      rect.pixel {\n"
+      "          fill:   #ff0000;\n"
+      "      }\n"
+      "    ]]>\n"
+      "  </style>\n\n"
+      "   <rect width=\"" << n+2 << "\" height=\"" << m+2 << "\" fill=\"rgb(128, 128, 128)\"/>\n"
+      "   <rect x=\"1\" y=\"1\" width=\"" << n << "\" height=\"" << m
+      << "\" fill=\"rgb(255, 255, 255)\"/>\n\n";
+
+  SparsityPattern::iterator
+  it = this->begin(),
+  end = this->end();
+  for (; it!=end; ++it)
+    {
+      out << "  <rect class=\"pixel\" x=\"" << it->column()+1
+          << "\" y=\"" << it->row()+1
+          << "\" width=\".9\" height=\".9\"/>\n";
+    }
+  out << "</svg>" << std::endl;
+
+}
+
+
+
+
+SparsityPattern::size_type
+SparsityPattern::bandwidth () const
+{
+  Assert ((rowstart!=0) && (colnums!=0), ExcEmptyObject());
+  size_type b=0;
+  for (size_type i=0; i<rows; ++i)
+    for (size_type j=rowstart[i]; j<rowstart[i+1]; ++j)
+      if (colnums[j] != invalid_entry)
+        {
+          if (static_cast<size_type>(std::abs(static_cast<int>(i-colnums[j]))) > b)
+            b = std::abs(static_cast<signed int>(i-colnums[j]));
+        }
+      else
+        // leave if at the end of the entries of this line
+        break;
+  return b;
+}
+
+
+void
+SparsityPattern::block_write (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+
+  // first the simple objects, bracketed in [...]
+  out << '[' << max_dim << ' '
+      << rows << ' '
+      << cols << ' '
+      << max_vec_len << ' '
+      << max_row_length << ' '
+      << compressed << ' '
+      << store_diagonal_first_in_row << "][";
+  // then write out real data
+  out.write (reinterpret_cast<const char *>(&rowstart[0]),
+             reinterpret_cast<const char *>(&rowstart[max_dim+1])
+             - reinterpret_cast<const char *>(&rowstart[0]));
+  out << "][";
+  out.write (reinterpret_cast<const char *>(&colnums[0]),
+             reinterpret_cast<const char *>(&colnums[max_vec_len])
+             - reinterpret_cast<const char *>(&colnums[0]));
+  out << ']';
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+void
+SparsityPattern::block_read (std::istream &in)
+{
+  AssertThrow (in, ExcIO());
+
+  char c;
+
+  // first read in simple data
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+  in >> max_dim
+     >> rows
+     >> cols
+     >> max_vec_len
+     >> max_row_length
+     >> compressed
+     >> store_diagonal_first_in_row;
+
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+
+  // reallocate space
+  if (rowstart)
+    delete[] rowstart;
+  if (colnums)
+    delete[] colnums;
+
+  rowstart = new std::size_t[max_dim+1];
+  colnums  = new size_type[max_vec_len];
+
+  // then read data
+  in.read (reinterpret_cast<char *>(&rowstart[0]),
+           reinterpret_cast<char *>(&rowstart[max_dim+1])
+           - reinterpret_cast<char *>(&rowstart[0]));
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+  in >> c;
+  AssertThrow (c == '[', ExcIO());
+  in.read (reinterpret_cast<char *>(&colnums[0]),
+           reinterpret_cast<char *>(&colnums[max_vec_len])
+           - reinterpret_cast<char *>(&colnums[0]));
+  in >> c;
+  AssertThrow (c == ']', ExcIO());
+}
+
+
+
+std::size_t
+SparsityPattern::memory_consumption () const
+{
+  return (max_dim * sizeof(size_type) +
+          sizeof(*this) +
+          max_vec_len * sizeof(size_type));
+}
+
+
+
+// explicit instantiations
+template void SparsityPattern::copy_from<SparsityPattern> (const SparsityPattern &);
+template void SparsityPattern::copy_from<DynamicSparsityPattern> (const DynamicSparsityPattern &);
+template void SparsityPattern::copy_from<float> (const FullMatrix<float> &);
+template void SparsityPattern::copy_from<double> (const FullMatrix<double> &);
+
+template void SparsityPattern::add_entries<const SparsityPattern::size_type *> (const size_type ,
+    const size_type *,
+    const size_type *,
+    const bool);
+#ifndef DEAL_II_VECTOR_ITERATOR_IS_POINTER
+template void SparsityPattern::add_entries<std::vector<SparsityPattern::size_type>::const_iterator>
+(const size_type,
+ std::vector<size_type>::const_iterator,
+ std::vector<size_type>::const_iterator,
+ const bool);
+#endif
+template void SparsityPattern::add_entries<std::vector<SparsityPattern::size_type>::iterator>
+(const size_type,
+ std::vector<size_type>::iterator,
+ std::vector<size_type>::iterator,
+ const bool);
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/sparsity_tools.cc b/source/lac/sparsity_tools.cc
new file mode 100644
index 0000000..a274d91
--- /dev/null
+++ b/source/lac/sparsity_tools.cc
@@ -0,0 +1,776 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/exceptions.h>
+#include <deal.II/lac/exceptions.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/sparsity_tools.h>
+
+#include <algorithm>
+#include <functional>
+#include <set>
+
+#ifdef DEAL_II_WITH_MPI
+#include <deal.II/base/utilities.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#endif
+
+#ifdef DEAL_II_WITH_METIS
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+extern "C"
+{
+#include <metis.h>
+}
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+#endif
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace SparsityTools
+{
+
+  void partition (const SparsityPattern     &sparsity_pattern,
+                  const unsigned int         n_partitions,
+                  std::vector<unsigned int> &partition_indices)
+  {
+    Assert (sparsity_pattern.n_rows()==sparsity_pattern.n_cols(),
+            ExcNotQuadratic());
+    Assert (sparsity_pattern.is_compressed(),
+            SparsityPattern::ExcNotCompressed());
+
+    Assert (n_partitions > 0, ExcInvalidNumberOfPartitions(n_partitions));
+    Assert (partition_indices.size() == sparsity_pattern.n_rows(),
+            ExcInvalidArraySize (partition_indices.size(),
+                                 sparsity_pattern.n_rows()));
+
+    // check for an easy return
+    if (n_partitions == 1 || (sparsity_pattern.n_rows()==1))
+      {
+        std::fill_n (partition_indices.begin(), partition_indices.size(), 0U);
+        return;
+      }
+
+    // Make sure that METIS is actually
+    // installed and detected
+#ifndef DEAL_II_WITH_METIS
+    (void)sparsity_pattern;
+    AssertThrow (false, ExcMETISNotInstalled());
+#else
+
+    // generate the data structures for
+    // METIS. Note that this is particularly
+    // simple, since METIS wants exactly our
+    // compressed row storage format. we only
+    // have to set up a few auxiliary arrays
+    idx_t
+    n       = static_cast<signed int>(sparsity_pattern.n_rows()),
+    ncon    = 1,                              // number of balancing constraints (should be >0)
+    nparts  = static_cast<int>(n_partitions), // number of subdomains to create
+    dummy;                                    // the numbers of edges cut by the
+    // resulting partition
+
+    // We can not partition n items into more than n parts. METIS will
+    // generate non-sensical output (everything is owned by a single process)
+    // and complain with a message (but won't return an error code!):
+    // ***Cannot bisect a graph with 0 vertices!
+    // ***You are trying to partition a graph into too many parts!
+    nparts = std::min(n, nparts);
+
+    // use default options for METIS
+    idx_t options[METIS_NOPTIONS];
+    METIS_SetDefaultOptions (options);
+
+    // one more nuisance: we have to copy our own data to arrays that store
+    // signed integers :-(
+    std::vector<idx_t> int_rowstart(1);
+    int_rowstart.reserve(sparsity_pattern.n_rows()+1);
+    std::vector<idx_t> int_colnums;
+    int_colnums.reserve(sparsity_pattern.n_nonzero_elements());
+    for (SparsityPattern::size_type row=0; row<sparsity_pattern.n_rows(); ++row)
+      {
+        for (SparsityPattern::iterator col=sparsity_pattern.begin(row);
+             col < sparsity_pattern.end(row); ++col)
+          int_colnums.push_back(col->column());
+        int_rowstart.push_back(int_colnums.size());
+      }
+
+    std::vector<idx_t> int_partition_indices (sparsity_pattern.n_rows());
+
+    // Make use of METIS' error code.
+    int ierr;
+
+    // Select which type of partitioning to create
+
+    // Use recursive if the number of partitions is less than or equal to 8
+    if (nparts <= 8)
+      ierr = METIS_PartGraphRecursive(&n, &ncon, &int_rowstart[0], &int_colnums[0],
+                                      NULL, NULL, NULL,
+                                      &nparts,NULL,NULL,&options[0],
+                                      &dummy,&int_partition_indices[0]);
+
+    // Otherwise use kway
+    else
+      ierr = METIS_PartGraphKway(&n, &ncon, &int_rowstart[0], &int_colnums[0],
+                                 NULL, NULL, NULL,
+                                 &nparts,NULL,NULL,&options[0],
+                                 &dummy,&int_partition_indices[0]);
+
+    // If metis returns normally, an error code METIS_OK=1 is returned from
+    // the above functions (see metish.h)
+    AssertThrow (ierr == 1, ExcMETISError (ierr));
+
+    // now copy back generated indices into the output array
+    std::copy (int_partition_indices.begin(),
+               int_partition_indices.end(),
+               partition_indices.begin());
+#endif
+  }
+
+
+  namespace internal
+  {
+    /**
+     * Given a connectivity graph and a list of indices (where
+     * invalid_size_type indicates that a node has not been numbered yet),
+     * pick a valid starting index among the as-yet unnumbered one.
+     */
+    DynamicSparsityPattern::size_type
+    find_unnumbered_starting_index (const DynamicSparsityPattern &sparsity,
+                                    const std::vector<DynamicSparsityPattern::size_type> &new_indices)
+    {
+      DynamicSparsityPattern::size_type starting_point   = numbers::invalid_size_type;
+      DynamicSparsityPattern::size_type min_coordination = sparsity.n_rows();
+      for (DynamicSparsityPattern::size_type row=0; row<sparsity.n_rows(); ++row)
+        // look over all as-yet unnumbered indices
+        if (new_indices[row] == numbers::invalid_size_type)
+          {
+            if (sparsity.row_length(row) < min_coordination)
+              {
+                min_coordination = sparsity.row_length(row);
+                starting_point   = row;
+              }
+          }
+
+      // now we still have to care for the case that no unnumbered dof has a
+      // coordination number less than sparsity.n_rows(). this rather exotic
+      // case only happens if we only have one cell, as far as I can see,
+      // but there may be others as well.
+      //
+      // if that should be the case, we can chose an arbitrary dof as
+      // starting point, e.g. the first unnumbered one
+      if (starting_point == numbers::invalid_size_type)
+        {
+          for (DynamicSparsityPattern::size_type i=0; i<new_indices.size(); ++i)
+            if (new_indices[i] == numbers::invalid_size_type)
+              {
+                starting_point = i;
+                break;
+              }
+
+          Assert (starting_point != numbers::invalid_size_type,
+                  ExcInternalError());
+        }
+
+      return starting_point;
+    }
+  }
+
+
+
+  void
+  reorder_Cuthill_McKee (const DynamicSparsityPattern                         &sparsity,
+                         std::vector<DynamicSparsityPattern::size_type>       &new_indices,
+                         const std::vector<DynamicSparsityPattern::size_type> &starting_indices)
+  {
+    Assert (sparsity.n_rows() == sparsity.n_cols(),
+            ExcDimensionMismatch (sparsity.n_rows(), sparsity.n_cols()));
+    Assert (sparsity.n_rows() == new_indices.size(),
+            ExcDimensionMismatch (sparsity.n_rows(), new_indices.size()));
+    Assert (starting_indices.size() <= sparsity.n_rows(),
+            ExcMessage ("You can't specify more starting indices than there are rows"));
+    Assert (sparsity.row_index_set().size() == 0 ||
+            sparsity.row_index_set().size() == sparsity.n_rows(),
+            ExcMessage("Only valid for sparsity patterns which store all rows."));
+    for (SparsityPattern::size_type i=0; i<starting_indices.size(); ++i)
+      Assert (starting_indices[i] < sparsity.n_rows(),
+              ExcMessage ("Invalid starting index"));
+
+    // store the indices of the dofs renumbered in the last round. Default to
+    // starting points
+    std::vector<DynamicSparsityPattern::size_type> last_round_dofs (starting_indices);
+
+    // initialize the new_indices array with invalid values
+    std::fill (new_indices.begin(), new_indices.end(),
+               numbers::invalid_size_type);
+
+    // delete disallowed elements
+    for (DynamicSparsityPattern::size_type i=0; i<last_round_dofs.size(); ++i)
+      if ((last_round_dofs[i]==numbers::invalid_size_type) ||
+          (last_round_dofs[i]>=sparsity.n_rows()))
+        last_round_dofs[i] = numbers::invalid_size_type;
+
+    std::remove_if (last_round_dofs.begin(), last_round_dofs.end(),
+                    std::bind2nd(std::equal_to<DynamicSparsityPattern::size_type>(),
+                                 numbers::invalid_size_type));
+
+    // now if no valid points remain: find dof with lowest coordination number
+    if (last_round_dofs.empty())
+      last_round_dofs
+      .push_back (internal::find_unnumbered_starting_index (sparsity,
+                                                            new_indices));
+
+    // store next free dof index
+    DynamicSparsityPattern::size_type next_free_number = 0;
+
+    // enumerate the first round dofs
+    for (DynamicSparsityPattern::size_type i=0; i!=last_round_dofs.size(); ++i)
+      new_indices[last_round_dofs[i]] = next_free_number++;
+
+    // now do as many steps as needed to renumber all dofs
+    while (true)
+      {
+        // store the indices of the dofs to be renumbered in the next round
+        std::vector<DynamicSparsityPattern::size_type> next_round_dofs;
+
+        // find all neighbors of the dofs numbered in the last round
+        for (DynamicSparsityPattern::size_type i=0; i<last_round_dofs.size(); ++i)
+          for (DynamicSparsityPattern::iterator j=sparsity.begin(last_round_dofs[i]);
+               j<sparsity.end(last_round_dofs[i]); ++j)
+            next_round_dofs.push_back (j->column());
+
+        // sort dof numbers
+        std::sort (next_round_dofs.begin(), next_round_dofs.end());
+
+        // delete multiple entries
+        std::vector<DynamicSparsityPattern::size_type>::iterator end_sorted;
+        end_sorted = std::unique (next_round_dofs.begin(), next_round_dofs.end());
+        next_round_dofs.erase (end_sorted, next_round_dofs.end());
+
+        // eliminate dofs which are already numbered
+        for (int s=next_round_dofs.size()-1; s>=0; --s)
+          if (new_indices[next_round_dofs[s]] != numbers::invalid_size_type)
+            next_round_dofs.erase (next_round_dofs.begin() + s);
+
+        // check whether there are any new dofs in the list. if there are
+        // none, then we have completely numbered the current component of the
+        // graph. check if there are as yet unnumbered components of the graph
+        // that we would then have to do next
+        if (next_round_dofs.empty())
+          {
+            if (std::find (new_indices.begin(), new_indices.end(),
+                           numbers::invalid_size_type)
+                ==
+                new_indices.end())
+              // no unnumbered indices, so we can leave now
+              break;
+
+            // otherwise find a valid starting point for the next component of
+            // the graph and continue with numbering that one. we only do so
+            // if no starting indices were provided by the user (see the
+            // documentation of this function) so produce an error if we got
+            // here and starting indices were given
+            Assert (starting_indices.empty(),
+                    ExcMessage ("The input graph appears to have more than one "
+                                "component, but as stated in the documentation "
+                                "we only want to reorder such graphs if no "
+                                "starting indices are given. The function was "
+                                "called with starting indices, however."))
+
+            next_round_dofs
+            .push_back (internal::find_unnumbered_starting_index (sparsity,
+                                                                  new_indices));
+          }
+
+
+
+        // store for each coordination number the dofs with these coordination
+        // number
+        std::multimap<DynamicSparsityPattern::size_type, int> dofs_by_coordination;
+
+        // find coordination number for each of these dofs
+        for (std::vector<DynamicSparsityPattern::size_type>::iterator s=next_round_dofs.begin();
+             s!=next_round_dofs.end(); ++s)
+          {
+            const DynamicSparsityPattern::size_type coordination = sparsity.row_length(*s);
+
+            // insert this dof at its coordination number
+            const std::pair<const DynamicSparsityPattern::size_type, int> new_entry (coordination, *s);
+            dofs_by_coordination.insert (new_entry);
+          }
+
+        // assign new DoF numbers to the elements of the present front:
+        std::multimap<DynamicSparsityPattern::size_type, int>::iterator i;
+        for (i = dofs_by_coordination.begin(); i!=dofs_by_coordination.end(); ++i)
+          new_indices[i->second] = next_free_number++;
+
+        // after that: copy this round's dofs for the next round
+        last_round_dofs = next_round_dofs;
+      }
+
+    // test for all indices numbered. this mostly tests whether the
+    // front-marching-algorithm (which Cuthill-McKee actually is) has reached
+    // all points.
+    Assert ((std::find (new_indices.begin(), new_indices.end(), numbers::invalid_size_type)
+             ==
+             new_indices.end())
+            &&
+            (next_free_number == sparsity.n_rows()),
+            ExcInternalError());
+  }
+
+
+
+  void
+  reorder_Cuthill_McKee (const SparsityPattern                   &sparsity,
+                         std::vector<SparsityPattern::size_type> &new_indices,
+                         const std::vector<SparsityPattern::size_type> &starting_indices)
+  {
+    DynamicSparsityPattern dsp(sparsity.n_rows(), sparsity.n_cols());
+    for (unsigned int row=0; row<sparsity.n_rows(); ++row)
+      {
+        for (SparsityPattern::iterator it=sparsity.begin(row); it!=sparsity.end(row)
+             && it->is_valid_entry() ; ++it)
+          dsp.add(row, it->column());
+      }
+    reorder_Cuthill_McKee(dsp, new_indices, starting_indices);
+  }
+
+
+
+  namespace internal
+  {
+    void
+    reorder_hierarchical (const DynamicSparsityPattern                   &connectivity,
+                          std::vector<DynamicSparsityPattern::size_type> &renumbering)
+    {
+      AssertDimension (connectivity.n_rows(), connectivity.n_cols());
+      AssertDimension (connectivity.n_rows(), renumbering.size());
+      Assert (connectivity.row_index_set().size() == 0 ||
+              connectivity.row_index_set().size() == connectivity.n_rows(),
+              ExcMessage("Only valid for sparsity patterns which store all rows."));
+
+      std::vector<types::global_dof_index> touched_nodes(connectivity.n_rows(),
+                                                         numbers::invalid_dof_index);
+      std::vector<unsigned int> row_lengths(connectivity.n_rows());
+      std::set<types::global_dof_index> current_neighbors;
+      std::vector<std::vector<types::global_dof_index> > groups;
+
+      // First collect the number of neighbors for each node. We use this
+      // field to find next nodes with the minimum number of non-touched
+      // neighbors in the field n_remaining_neighbors, so we will count down
+      // on this field. We also cache the row lengths because we need this
+      // data frequently and getting it from the sparsity pattern is more
+      // expensive.
+      for (types::global_dof_index row=0; row<connectivity.n_rows(); ++row)
+        {
+          row_lengths[row] = connectivity.row_length(row);
+          Assert(row_lengths[row] > 0, ExcInternalError());
+        }
+      std::vector<unsigned int> n_remaining_neighbors(row_lengths);
+
+      // This outer loop is typically traversed only once, unless the global
+      // graph is not connected
+      while (true)
+        {
+          // Find cell with the minimal number of neighbors (typically a
+          // corner node when based on FEM meshes). If no cell is left, we are
+          // done. Together with the outer while loop, this loop can possibly
+          // be of quadratic complexity in the number of disconnected
+          // partitions, i.e. up to connectivity.n_rows() in the worst case,
+          // but that is not the usual use case of this loop and thus not
+          // optimized for.
+          std::pair<types::global_dof_index,types::global_dof_index> min_neighbors
+          (numbers::invalid_dof_index, numbers::invalid_dof_index);
+          for (types::global_dof_index i=0; i<touched_nodes.size(); ++i)
+            if (touched_nodes[i] == numbers::invalid_dof_index)
+              if (row_lengths[i] < min_neighbors.second)
+                {
+                  min_neighbors = std::make_pair(i, n_remaining_neighbors[i]);
+                  if (n_remaining_neighbors[i] <= 1)
+                    break;
+                }
+          if (min_neighbors.first == numbers::invalid_dof_index)
+            break;
+
+          Assert(min_neighbors.second > 0, ExcInternalError());
+
+          current_neighbors.clear();
+          current_neighbors.insert(min_neighbors.first);
+          while (!current_neighbors.empty())
+            {
+              // Find node with minimum number of untouched neighbors among the
+              // next set of possible neighbors
+              min_neighbors = std::make_pair (numbers::invalid_dof_index,
+                                              numbers::invalid_dof_index);
+              for (std::set<types::global_dof_index>::iterator it=current_neighbors.begin();
+                   it != current_neighbors.end(); ++it)
+                {
+                  Assert (touched_nodes[*it] == numbers::invalid_dof_index,
+                          ExcInternalError());
+                  if (n_remaining_neighbors[*it] < min_neighbors.second)
+                    min_neighbors = std::make_pair(*it, n_remaining_neighbors[*it]);
+                }
+
+              // Among the set of nodes with the minimal number of neighbors,
+              // choose the one with the largest number of touched neighbors,
+              // i.e., the one with the largest row length
+              const types::global_dof_index best_row_length = min_neighbors.second;
+              for (std::set<types::global_dof_index>::iterator it=current_neighbors.begin();
+                   it != current_neighbors.end(); ++it)
+                if (n_remaining_neighbors[*it] == best_row_length)
+                  if (row_lengths[*it] > min_neighbors.second)
+                    min_neighbors = std::make_pair(*it, row_lengths[*it]);
+
+              // Add the pivot and all direct neighbors of the pivot node not
+              // yet touched to the list of new entries.
+              groups.push_back(std::vector<types::global_dof_index>());
+              std::vector<types::global_dof_index> &next_group = groups.back();
+
+              next_group.push_back(min_neighbors.first);
+              touched_nodes[min_neighbors.first] = groups.size()-1;
+              for (DynamicSparsityPattern::iterator it
+                   = connectivity.begin(min_neighbors.first);
+                   it != connectivity.end(min_neighbors.first); ++it)
+                if (touched_nodes[it->column()] == numbers::invalid_dof_index)
+                  {
+                    next_group.push_back(it->column());
+                    touched_nodes[it->column()] = groups.size()-1;
+                  }
+
+              // Add all neighbors of the current list not yet touched to the
+              // set of possible next pivots. The added node is no longer a
+              // valid neighbor (here we assume symmetry of the
+              // connectivity). Delete the entries of the current list from
+              // the set of possible next pivots.
+              for (unsigned int i=0; i<next_group.size(); ++i)
+                {
+                  for (DynamicSparsityPattern::iterator it
+                       = connectivity.begin(next_group[i]);
+                       it != connectivity.end(next_group[i]); ++it)
+                    {
+                      if (touched_nodes[it->column()] == numbers::invalid_dof_index)
+                        current_neighbors.insert(it->column());
+                      n_remaining_neighbors[it->column()]--;
+                    }
+                  current_neighbors.erase(next_group[i]);
+                }
+            }
+        }
+
+      // Sanity check: for all nodes, there should not be any neighbors left
+      for (types::global_dof_index row=0; row<connectivity.n_rows(); ++row)
+        Assert(n_remaining_neighbors[row] == 0, ExcInternalError());
+
+      // If the number of groups is smaller than the number of nodes, we
+      // continue by recursively calling this method
+      if (groups.size() < connectivity.n_rows())
+        {
+          // Form the connectivity of the groups
+          DynamicSparsityPattern connectivity_next(groups.size(),
+                                                   groups.size());
+          for (types::global_dof_index i=0; i<groups.size(); ++i)
+            for (types::global_dof_index col=0; col<groups[i].size(); ++col)
+              for (DynamicSparsityPattern::iterator it
+                   = connectivity.begin(groups[i][col]);
+                   it != connectivity.end(groups[i][col]); ++it)
+                connectivity_next.add(i, touched_nodes[it->column()]);
+
+          // Recursively call the reordering
+          std::vector<types::global_dof_index> renumbering_next(groups.size());
+          reorder_hierarchical(connectivity_next, renumbering_next);
+
+          // Renumber the indices group by group according to the incoming
+          // ordering for the groups
+          for (types::global_dof_index i=0,count=0; i<groups.size(); ++i)
+            for (types::global_dof_index col=0; col<groups[renumbering_next[i]].size(); ++col, ++count)
+              renumbering[count] = groups[renumbering_next[i]][col];
+        }
+      else
+        {
+          // All groups should have size one and no more recursion is possible,
+          // so use the numbering of the groups
+          for (types::global_dof_index i=0,count=0; i<groups.size(); ++i)
+            for (types::global_dof_index col=0; col<groups[i].size(); ++col, ++count)
+              renumbering[count] = groups[i][col];
+        }
+    }
+  }
+
+  void
+  reorder_hierarchical (const DynamicSparsityPattern                   &connectivity,
+                        std::vector<DynamicSparsityPattern::size_type> &renumbering)
+  {
+    // the internal renumbering keeps the numbering the wrong way around (but
+    // we cannot invert the numbering inside that method because it is used
+    // recursively), so invert it here
+    internal::reorder_hierarchical(connectivity, renumbering);
+    renumbering = Utilities::invert_permutation(renumbering);
+  }
+
+
+
+#ifdef DEAL_II_WITH_MPI
+  void distribute_sparsity_pattern
+  (DynamicSparsityPattern                               &dsp,
+   const std::vector<DynamicSparsityPattern::size_type> &rows_per_cpu,
+   const MPI_Comm                                       &mpi_comm,
+   const IndexSet                                       &myrange)
+  {
+    const unsigned int myid = Utilities::MPI::this_mpi_process(mpi_comm);
+    std::vector<DynamicSparsityPattern::size_type> start_index(rows_per_cpu.size()+1);
+    start_index[0]=0;
+    for (DynamicSparsityPattern::size_type i=0; i<rows_per_cpu.size(); ++i)
+      start_index[i+1]=start_index[i]+rows_per_cpu[i];
+
+    typedef std::map<DynamicSparsityPattern::size_type,
+            std::vector<DynamicSparsityPattern::size_type> >
+            map_vec_t;
+
+    map_vec_t send_data;
+
+    {
+      unsigned int dest_cpu=0;
+
+      DynamicSparsityPattern::size_type n_local_rel_rows = myrange.n_elements();
+      for (DynamicSparsityPattern::size_type row_idx=0; row_idx<n_local_rel_rows; ++row_idx)
+        {
+          DynamicSparsityPattern::size_type row=myrange.nth_index_in_set(row_idx);
+
+          //calculate destination CPU
+          while (row>=start_index[dest_cpu+1])
+            ++dest_cpu;
+
+          //skip myself
+          if (dest_cpu==myid)
+            {
+              row_idx+=rows_per_cpu[myid]-1;
+              continue;
+            }
+
+          DynamicSparsityPattern::size_type rlen = dsp.row_length(row);
+
+          //skip empty lines
+          if (!rlen)
+            continue;
+
+          //save entries
+          std::vector<DynamicSparsityPattern::size_type> &dst = send_data[dest_cpu];
+
+          dst.push_back(rlen); // number of entries
+          dst.push_back(row); // row index
+          for (DynamicSparsityPattern::size_type c=0; c<rlen; ++c)
+            {
+              //columns
+              DynamicSparsityPattern::size_type column = dsp.column_number(row, c);
+              dst.push_back(column);
+            }
+        }
+
+    }
+
+    unsigned int num_receive=0;
+    {
+      std::vector<unsigned int> send_to;
+      send_to.reserve(send_data.size());
+      for (map_vec_t::iterator it=send_data.begin(); it!=send_data.end(); ++it)
+        send_to.push_back(it->first);
+
+      num_receive =
+        Utilities::MPI::
+        compute_point_to_point_communication_pattern(mpi_comm, send_to).size();
+    }
+
+    std::vector<MPI_Request> requests(send_data.size());
+
+
+    // send data
+    {
+      unsigned int idx=0;
+      for (map_vec_t::iterator it=send_data.begin(); it!=send_data.end(); ++it, ++idx)
+        MPI_Isend(&(it->second[0]),
+                  it->second.size(),
+                  DEAL_II_DOF_INDEX_MPI_TYPE,
+                  it->first,
+                  124,
+                  mpi_comm,
+                  &requests[idx]);
+    }
+
+    {
+      //receive
+      std::vector<DynamicSparsityPattern::size_type> recv_buf;
+      for (unsigned int index=0; index<num_receive; ++index)
+        {
+          MPI_Status status;
+          int len;
+          MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &status);
+          Assert (status.MPI_TAG==124, ExcInternalError());
+
+          MPI_Get_count(&status, DEAL_II_DOF_INDEX_MPI_TYPE, &len);
+          recv_buf.resize(len);
+          MPI_Recv(&recv_buf[0], len, DEAL_II_DOF_INDEX_MPI_TYPE, status.MPI_SOURCE,
+                   status.MPI_TAG, mpi_comm, &status);
+
+          std::vector<DynamicSparsityPattern::size_type>::const_iterator ptr = recv_buf.begin();
+          std::vector<DynamicSparsityPattern::size_type>::const_iterator end = recv_buf.end();
+          while (ptr!=end)
+            {
+              DynamicSparsityPattern::size_type num=*(ptr++);
+              Assert(ptr!=end, ExcInternalError());
+              DynamicSparsityPattern::size_type row=*(ptr++);
+              for (unsigned int c=0; c<num; ++c)
+                {
+                  Assert(ptr!=end, ExcInternalError());
+                  dsp.add(row, *ptr);
+                  ptr++;
+                }
+            }
+          Assert(ptr==end, ExcInternalError());
+        }
+    }
+
+    // complete all sends, so that we can safely destroy the buffers.
+    if (requests.size())
+      MPI_Waitall(requests.size(), &requests[0], MPI_STATUSES_IGNORE);
+
+  }
+
+  void distribute_sparsity_pattern(BlockDynamicSparsityPattern &dsp,
+                                   const std::vector<IndexSet> &owned_set_per_cpu,
+                                   const MPI_Comm              &mpi_comm,
+                                   const IndexSet              &myrange)
+  {
+    const unsigned int myid = Utilities::MPI::this_mpi_process(mpi_comm);
+
+    typedef std::map<BlockDynamicSparsityPattern::size_type,
+            std::vector<BlockDynamicSparsityPattern::size_type> >
+            map_vec_t;
+    map_vec_t send_data;
+
+    {
+      unsigned int dest_cpu=0;
+
+      BlockDynamicSparsityPattern::size_type n_local_rel_rows = myrange.n_elements();
+      for (BlockDynamicSparsityPattern::size_type row_idx=0; row_idx<n_local_rel_rows; ++row_idx)
+        {
+          BlockDynamicSparsityPattern::size_type row=myrange.nth_index_in_set(row_idx);
+
+          // calculate destination CPU, note that we start the search
+          // at last destination cpu, because even if the owned ranges
+          // are not contiguous, they hopefully consist of large blocks
+          while (!owned_set_per_cpu[dest_cpu].is_element(row))
+            {
+              ++dest_cpu;
+              if (dest_cpu==owned_set_per_cpu.size()) // wrap around
+                dest_cpu=0;
+            }
+
+          //skip myself
+          if (dest_cpu==myid)
+            continue;
+
+          BlockDynamicSparsityPattern::size_type rlen = dsp.row_length(row);
+
+          //skip empty lines
+          if (!rlen)
+            continue;
+
+          //save entries
+          std::vector<BlockDynamicSparsityPattern::size_type> &dst = send_data[dest_cpu];
+
+          dst.push_back(rlen); // number of entries
+          dst.push_back(row); // row index
+          for (BlockDynamicSparsityPattern::size_type c=0; c<rlen; ++c)
+            {
+              //columns
+              BlockDynamicSparsityPattern::size_type column = dsp.column_number(row, c);
+              dst.push_back(column);
+            }
+        }
+
+    }
+
+    unsigned int num_receive=0;
+    {
+      std::vector<unsigned int> send_to;
+      send_to.reserve(send_data.size());
+      for (map_vec_t::iterator it=send_data.begin(); it!=send_data.end(); ++it)
+        send_to.push_back(it->first);
+
+      num_receive =
+        Utilities::MPI::
+        compute_point_to_point_communication_pattern(mpi_comm, send_to).size();
+    }
+
+    std::vector<MPI_Request> requests(send_data.size());
+
+
+    // send data
+    {
+      unsigned int idx=0;
+      for (map_vec_t::iterator it=send_data.begin(); it!=send_data.end(); ++it, ++idx)
+        MPI_Isend(&(it->second[0]),
+                  it->second.size(),
+                  DEAL_II_DOF_INDEX_MPI_TYPE,
+                  it->first,
+                  124,
+                  mpi_comm,
+                  &requests[idx]);
+    }
+
+    {
+      //receive
+      std::vector<BlockDynamicSparsityPattern::size_type> recv_buf;
+      for (unsigned int index=0; index<num_receive; ++index)
+        {
+          MPI_Status status;
+          int len;
+          MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &status);
+          Assert (status.MPI_TAG==124, ExcInternalError());
+
+          MPI_Get_count(&status, DEAL_II_DOF_INDEX_MPI_TYPE, &len);
+          recv_buf.resize(len);
+          MPI_Recv(&recv_buf[0], len, DEAL_II_DOF_INDEX_MPI_TYPE, status.MPI_SOURCE,
+                   status.MPI_TAG, mpi_comm, &status);
+
+          std::vector<BlockDynamicSparsityPattern::size_type>::const_iterator ptr = recv_buf.begin();
+          std::vector<BlockDynamicSparsityPattern::size_type>::const_iterator end = recv_buf.end();
+          while (ptr!=end)
+            {
+              BlockDynamicSparsityPattern::size_type num=*(ptr++);
+              Assert(ptr!=end, ExcInternalError());
+              BlockDynamicSparsityPattern::size_type row=*(ptr++);
+              for (unsigned int c=0; c<num; ++c)
+                {
+                  Assert(ptr!=end, ExcInternalError());
+                  dsp.add(row, *ptr);
+                  ptr++;
+                }
+            }
+          Assert(ptr==end, ExcInternalError());
+        }
+    }
+
+    // complete all sends, so that we can safely destroy the buffers.
+    if (requests.size())
+      MPI_Waitall(requests.size(), &requests[0], MPI_STATUSES_IGNORE);
+  }
+#endif
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/swappable_vector.cc b/source/lac/swappable_vector.cc
new file mode 100644
index 0000000..f1579e0
--- /dev/null
+++ b/source/lac/swappable_vector.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/swappable_vector.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// explicit instantiations
+template class SwappableVector<double>;
+template class SwappableVector<float>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/tridiagonal_matrix.cc b/source/lac/tridiagonal_matrix.cc
new file mode 100644
index 0000000..a152648
--- /dev/null
+++ b/source/lac/tridiagonal_matrix.cc
@@ -0,0 +1,272 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/tridiagonal_matrix.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/lapack_templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+using namespace LAPACKSupport;
+
+template<typename number>
+TridiagonalMatrix<number>::TridiagonalMatrix(
+  size_type size,
+  bool symmetric)
+  :
+  diagonal(size, 0.),
+  left((symmetric ? 0 : size), 0.),
+  right(size, 0.),
+  is_symmetric(symmetric),
+  state(matrix)
+{}
+
+
+template<typename number>
+void
+TridiagonalMatrix<number>::reinit(
+  size_type size,
+  bool symmetric)
+{
+  is_symmetric = symmetric;
+  diagonal.resize(size);
+  right.resize(size);
+  left.resize(symmetric ? 0 : size);
+  state = matrix;
+}
+
+
+template<typename number>
+bool
+TridiagonalMatrix<number>::all_zero() const
+{
+  Assert(state == matrix, ExcState(state));
+
+  typename std::vector<number>::const_iterator i;
+  typename std::vector<number>::const_iterator e;
+
+  e = diagonal.end();
+  for (i=diagonal.begin() ; i != e ; ++i)
+    if (*i != 0.) return false;
+
+  e = left.end();
+  for (i=left.begin() ; i != e ; ++i)
+    if (*i != 0.) return false;
+
+  e = right.end();
+  for (i=right.begin() ; i != e ; ++i)
+    if (*i != 0.) return false;
+  return true;
+}
+
+
+template<typename number>
+void
+TridiagonalMatrix<number>::vmult (
+  Vector<number>       &w,
+  const Vector<number> &v,
+  const bool            adding) const
+{
+  Assert(state == matrix, ExcState(state));
+
+  Assert(w.size() == n(), ExcDimensionMismatch(w.size(), n()));
+  Assert(v.size() == n(), ExcDimensionMismatch(v.size(), n()));
+
+  if (n()==0) return;
+
+  // The actual loop skips the first
+  // and last row
+  const size_type e=n()-1;
+  // Let iterators point to the first
+  // entry of each diagonal
+  typename std::vector<number>::const_iterator d = diagonal.begin();
+  typename std::vector<number>::const_iterator r = right.begin();
+  // The left diagonal starts one
+  // later or is equal to the right
+  // one for symmetric storage
+  typename std::vector<number>::const_iterator l = left.begin();
+  if (is_symmetric)
+    l = r;
+  else
+    ++l;
+
+  if (adding)
+    {
+      // Treat first row separately
+      w(0) += (*d) * v(0) + (*r) * v(1);
+      ++d;
+      ++r;
+      // All rows with three entries
+      for (size_type i=1; i<e; ++i,++d,++r,++l)
+        w(i) += (*l) * v(i-1) + (*d) * v(i) + (*r) * v(i+1);
+      // Last row is special again
+      w(e) += (*l) * v(e-1) + (*d) * v(e);
+    }
+  else
+    {
+      w(0) = (*d) * v(0) + (*r) * v(1);
+      ++d;
+      ++r;
+      for (size_type i=1; i<e; ++i,++d,++r,++l)
+        w(i) = (*l) * v(i-1) + (*d) * v(i) + (*r) * v(i+1);
+      w(e) = (*l) * v(e-1) + (*d) * v(e);
+    }
+}
+
+
+template<typename number>
+void
+TridiagonalMatrix<number>::vmult_add (
+  Vector<number>       &w,
+  const Vector<number> &v) const
+{
+  vmult(w, v, true);
+}
+
+
+template<typename number>
+void
+TridiagonalMatrix<number>::Tvmult (
+  Vector<number>       &w,
+  const Vector<number> &v,
+  const bool            adding) const
+{
+  Assert(state == matrix, ExcState(state));
+
+  Assert(w.size() == n(), ExcDimensionMismatch(w.size(), n()));
+  Assert(v.size() == n(), ExcDimensionMismatch(v.size(), n()));
+
+  if (n()==0) return;
+
+  const size_type e=n()-1;
+  typename std::vector<number>::const_iterator d = diagonal.begin();
+  typename std::vector<number>::const_iterator r = right.begin();
+  typename std::vector<number>::const_iterator l = left.begin();
+  if (is_symmetric)
+    l = r;
+  else
+    ++l;
+
+  if (adding)
+    {
+      w(0) += (*d) * v(0) + (*l) * v(1);
+      ++d;
+      ++l;
+      for (size_type i=1; i<e; ++i,++d,++r,++l)
+        w(i) += (*l) * v(i+1) + (*d) * v(i) + (*r) * v(i-1);
+      w(e) += (*d) * v(e) + (*r) * v(e-1);
+    }
+  else
+    {
+      w(0) = (*d) * v(0) + (*l) * v(1);
+      ++d;
+      ++l;
+      for (size_type i=1; i<e; ++i,++d,++r,++l)
+        w(i) = (*l) * v(i+1) + (*d) * v(i) + (*r) * v(i-1);
+      w(e) = (*d) * v(e) + (*r) * v(e-1);
+    }
+}
+
+
+template<typename number>
+void
+TridiagonalMatrix<number>::Tvmult_add (
+  Vector<number>       &w,
+  const Vector<number> &v) const
+{
+  Tvmult(w, v, true);
+}
+
+
+template<typename number>
+number
+TridiagonalMatrix<number>::matrix_scalar_product(
+  const Vector<number> &w,
+  const Vector<number> &v) const
+{
+  Assert(state == matrix, ExcState(state));
+
+  const size_type e=n()-1;
+  typename std::vector<number>::const_iterator d = diagonal.begin();
+  typename std::vector<number>::const_iterator r = right.begin();
+  typename std::vector<number>::const_iterator l = left.begin();
+  if (is_symmetric)
+    l = r;
+  else
+    ++l;
+
+  number result = w(0) * ((*d) * v(0) + (*r) * v(1));
+  ++d;
+  ++r;
+  for (size_type i=1; i<e; ++i,++d,++r,++l)
+    result += w(i) * ((*l) * v(i-1)+ (*d) * v(i)+ (*r) * v(i+1));
+  result += w(e) * ((*l) * v(e-1) + (*d) * v(e));
+  return result;
+}
+
+
+template<typename number>
+number
+TridiagonalMatrix<number>::matrix_norm_square(
+  const Vector<number> &v) const
+{
+  return matrix_scalar_product(v,v);
+}
+
+
+template<>
+void
+TridiagonalMatrix<double>::compute_eigenvalues()
+{
+#ifdef DEAL_II_WITH_LAPACK
+  Assert(state == matrix, ExcState(state));
+  Assert(is_symmetric, ExcNotImplemented());
+
+  const int nn = n();
+  int info;
+  stev (&N, &nn, &*diagonal.begin(), &*right.begin(), 0, &one, 0, &info);
+  Assert(info == 0, ExcInternalError());
+
+  state = eigenvalues;
+#else
+  Assert(false, ExcNeedsLAPACK());
+#endif
+}
+
+
+template<typename number>
+number
+TridiagonalMatrix<number>::eigenvalue(const size_type i) const
+{
+  Assert(state == eigenvalues, ExcState(state));
+  Assert(i<n(), ExcIndexRange(i,0,n()));
+  return diagonal[i];
+}
+
+
+/*
+template<typename number>
+TridiagonalMatrix<number>::
+{
+}
+
+
+*/
+
+template class TridiagonalMatrix<float>;
+template class TridiagonalMatrix<double>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/trilinos_block_sparse_matrix.cc b/source/lac/trilinos_block_sparse_matrix.cc
new file mode 100644
index 0000000..1b6d801
--- /dev/null
+++ b/source/lac/trilinos_block_sparse_matrix.cc
@@ -0,0 +1,444 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/lac/block_sparse_matrix.h>
+#  include <deal.II/lac/block_sparsity_pattern.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  BlockSparseMatrix::BlockSparseMatrix ()
+  {}
+
+
+
+  BlockSparseMatrix::~BlockSparseMatrix ()
+  {
+    // delete previous content of
+    // the subobjects array
+    clear ();
+  }
+
+
+
+  BlockSparseMatrix &
+  BlockSparseMatrix::operator = (const BlockSparseMatrix &m)
+  {
+    BaseClass::operator = (m);
+
+    return *this;
+  }
+
+
+
+  void
+  BlockSparseMatrix::
+  reinit (const size_type n_block_rows,
+          const size_type n_block_columns)
+  {
+    // first delete previous content of
+    // the subobjects array
+    clear ();
+
+    // then resize. set sizes of blocks to
+    // zero. user will later have to call
+    // collect_sizes for this
+    this->sub_objects.reinit (n_block_rows,
+                              n_block_columns);
+    this->row_block_indices.reinit (n_block_rows, 0);
+    this->column_block_indices.reinit (n_block_columns, 0);
+
+    // and reinitialize the blocks
+    for (size_type r=0; r<this->n_block_rows(); ++r)
+      for (size_type c=0; c<this->n_block_cols(); ++c)
+        {
+          BlockType *p = new BlockType();
+
+          Assert (this->sub_objects[r][c] == 0,
+                  ExcInternalError());
+          this->sub_objects[r][c] = p;
+        }
+  }
+
+
+
+  template <typename BlockSparsityPatternType>
+  void
+  BlockSparseMatrix::
+  reinit (const std::vector<Epetra_Map>  &parallel_partitioning,
+          const BlockSparsityPatternType &block_sparsity_pattern,
+          const bool                      exchange_data)
+  {
+    Assert (parallel_partitioning.size() == block_sparsity_pattern.n_block_rows(),
+            ExcDimensionMismatch (parallel_partitioning.size(),
+                                  block_sparsity_pattern.n_block_rows()));
+    Assert (parallel_partitioning.size() == block_sparsity_pattern.n_block_cols(),
+            ExcDimensionMismatch (parallel_partitioning.size(),
+                                  block_sparsity_pattern.n_block_cols()));
+
+    const size_type n_block_rows = parallel_partitioning.size();
+    (void)n_block_rows;
+
+    Assert (n_block_rows == block_sparsity_pattern.n_block_rows(),
+            ExcDimensionMismatch (n_block_rows,
+                                  block_sparsity_pattern.n_block_rows()));
+    Assert (n_block_rows == block_sparsity_pattern.n_block_cols(),
+            ExcDimensionMismatch (n_block_rows,
+                                  block_sparsity_pattern.n_block_cols()));
+
+
+    // Call the other basic reinit function, ...
+    reinit (block_sparsity_pattern.n_block_rows(),
+            block_sparsity_pattern.n_block_cols());
+
+    // ... set the correct sizes, ...
+    this->row_block_indices    = block_sparsity_pattern.get_row_indices();
+    this->column_block_indices = block_sparsity_pattern.get_column_indices();
+
+    // ... and then assign the correct
+    // data to the blocks.
+    for (size_type r=0; r<this->n_block_rows(); ++r)
+      for (size_type c=0; c<this->n_block_cols(); ++c)
+        {
+          this->sub_objects[r][c]->reinit (parallel_partitioning[r],
+                                           parallel_partitioning[c],
+                                           block_sparsity_pattern.block(r,c),
+                                           exchange_data);
+        }
+  }
+
+
+
+  template <typename BlockSparsityPatternType>
+  void
+  BlockSparseMatrix::
+  reinit (const std::vector<IndexSet>    &parallel_partitioning,
+          const BlockSparsityPatternType &block_sparsity_pattern,
+          const MPI_Comm                 &communicator,
+          const bool                      exchange_data)
+  {
+    std::vector<Epetra_Map> epetra_maps;
+    for (size_type i=0; i<block_sparsity_pattern.n_block_rows(); ++i)
+      epetra_maps.push_back
+      (parallel_partitioning[i].make_trilinos_map(communicator, false));
+
+    reinit (epetra_maps, block_sparsity_pattern, exchange_data);
+
+  }
+
+
+
+  template <typename BlockSparsityPatternType>
+  void
+  BlockSparseMatrix::
+  reinit (const BlockSparsityPatternType &block_sparsity_pattern)
+  {
+    std::vector<Epetra_Map> parallel_partitioning;
+    for (size_type i=0; i<block_sparsity_pattern.n_block_rows(); ++i)
+      parallel_partitioning.push_back
+      (Epetra_Map(static_cast<TrilinosWrappers::types::int_type>(block_sparsity_pattern.block(i,0).n_rows()),
+                  0,
+                  Utilities::Trilinos::comm_self()));
+
+    reinit (parallel_partitioning, block_sparsity_pattern);
+  }
+
+
+
+  template <>
+  void
+  BlockSparseMatrix::
+  reinit (const BlockSparsityPattern    &block_sparsity_pattern)
+  {
+
+    // Call the other basic reinit function, ...
+    reinit (block_sparsity_pattern.n_block_rows(),
+            block_sparsity_pattern.n_block_cols());
+
+    // ... set the correct sizes, ...
+    this->row_block_indices    = block_sparsity_pattern.get_row_indices();
+    this->column_block_indices = block_sparsity_pattern.get_column_indices();
+
+    // ... and then assign the correct
+    // data to the blocks.
+    for (size_type r=0; r<this->n_block_rows(); ++r)
+      for (size_type c=0; c<this->n_block_cols(); ++c)
+        {
+          this->sub_objects[r][c]->reinit (block_sparsity_pattern.block(r,c));
+        }
+  }
+
+
+
+  void
+  BlockSparseMatrix::
+  reinit (const std::vector<Epetra_Map>             &parallel_partitioning,
+          const ::dealii::BlockSparseMatrix<double> &dealii_block_sparse_matrix,
+          const double                               drop_tolerance)
+  {
+    const size_type n_block_rows = parallel_partitioning.size();
+
+    Assert (n_block_rows == dealii_block_sparse_matrix.n_block_rows(),
+            ExcDimensionMismatch (n_block_rows,
+                                  dealii_block_sparse_matrix.n_block_rows()));
+    Assert (n_block_rows == dealii_block_sparse_matrix.n_block_cols(),
+            ExcDimensionMismatch (n_block_rows,
+                                  dealii_block_sparse_matrix.n_block_cols()));
+
+    // Call the other basic reinit function ...
+    reinit (n_block_rows, n_block_rows);
+
+    // ... and then assign the correct
+    // data to the blocks.
+    for (size_type r=0; r<this->n_block_rows(); ++r)
+      for (size_type c=0; c<this->n_block_cols(); ++c)
+        {
+          this->sub_objects[r][c]->reinit(parallel_partitioning[r],
+                                          parallel_partitioning[c],
+                                          dealii_block_sparse_matrix.block(r,c),
+                                          drop_tolerance);
+        }
+
+    collect_sizes();
+  }
+
+
+
+  void
+  BlockSparseMatrix::
+  reinit (const ::dealii::BlockSparseMatrix<double> &dealii_block_sparse_matrix,
+          const double                               drop_tolerance)
+  {
+    Assert (dealii_block_sparse_matrix.n_block_rows() ==
+            dealii_block_sparse_matrix.n_block_cols(),
+            ExcDimensionMismatch (dealii_block_sparse_matrix.n_block_rows(),
+                                  dealii_block_sparse_matrix.n_block_cols()));
+    Assert (dealii_block_sparse_matrix.m() ==
+            dealii_block_sparse_matrix.n(),
+            ExcDimensionMismatch (dealii_block_sparse_matrix.m(),
+                                  dealii_block_sparse_matrix.n()));
+
+    // produce a dummy local map and pass it
+    // off to the other function
+#ifdef DEAL_II_WITH_MPI
+    Epetra_MpiComm    trilinos_communicator (MPI_COMM_SELF);
+#else
+    Epetra_SerialComm trilinos_communicator;
+#endif
+
+    std::vector<Epetra_Map> parallel_partitioning;
+    for (size_type i=0; i<dealii_block_sparse_matrix.n_block_rows(); ++i)
+      parallel_partitioning.push_back (Epetra_Map(static_cast<TrilinosWrappers::types::int_type>(dealii_block_sparse_matrix.block(i,0).m()),
+                                                  0,
+                                                  trilinos_communicator));
+
+    reinit (parallel_partitioning, dealii_block_sparse_matrix, drop_tolerance);
+  }
+
+
+
+
+
+  void
+  BlockSparseMatrix::collect_sizes ()
+  {
+    // simply forward to the (non-public) function of the base class
+    BaseClass::collect_sizes ();
+  }
+
+
+
+  BlockSparseMatrix::size_type
+  BlockSparseMatrix::n_nonzero_elements () const
+  {
+    size_type n_nonzero = 0;
+    for (size_type rows = 0; rows<this->n_block_rows(); ++rows)
+      for (size_type cols = 0; cols<this->n_block_cols(); ++cols)
+        n_nonzero += this->block(rows,cols).n_nonzero_elements();
+
+    return n_nonzero;
+  }
+
+
+
+  TrilinosScalar
+  BlockSparseMatrix::residual (MPI::BlockVector       &dst,
+                               const MPI::BlockVector &x,
+                               const MPI::BlockVector &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  // TODO: In the following we
+  // use the same code as just
+  // above six more times. Use
+  // templates.
+  TrilinosScalar
+  BlockSparseMatrix::residual (BlockVector       &dst,
+                               const BlockVector &x,
+                               const BlockVector &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  TrilinosScalar
+  BlockSparseMatrix::residual (MPI::BlockVector       &dst,
+                               const MPI::Vector      &x,
+                               const MPI::BlockVector &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  TrilinosScalar
+  BlockSparseMatrix::residual (BlockVector       &dst,
+                               const Vector      &x,
+                               const BlockVector &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  TrilinosScalar
+  BlockSparseMatrix::residual (MPI::Vector            &dst,
+                               const MPI::BlockVector &x,
+                               const MPI::Vector      &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  TrilinosScalar
+  BlockSparseMatrix::residual (Vector            &dst,
+                               const BlockVector &x,
+                               const Vector      &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  TrilinosScalar
+  BlockSparseMatrix::residual (VectorBase       &dst,
+                               const VectorBase &x,
+                               const VectorBase &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  std::vector<Epetra_Map>
+  BlockSparseMatrix::domain_partitioner () const
+  {
+    Assert (this->n_block_cols() != 0, ExcNotInitialized());
+    Assert (this->n_block_rows() != 0, ExcNotInitialized());
+
+    std::vector<Epetra_Map> domain_partitioner;
+    for (size_type c = 0; c < this->n_block_cols(); ++c)
+      domain_partitioner.push_back(this->sub_objects[0][c]->domain_partitioner());
+
+    return domain_partitioner;
+  }
+
+
+
+  std::vector<Epetra_Map>
+  BlockSparseMatrix::range_partitioner () const
+  {
+    Assert (this->n_block_cols() != 0, ExcNotInitialized());
+    Assert (this->n_block_rows() != 0, ExcNotInitialized());
+
+    std::vector<Epetra_Map> range_partitioner;
+    for (size_type r = 0; r < this->n_block_rows(); ++r)
+      range_partitioner.push_back(this->sub_objects[r][0]->range_partitioner());
+
+    return range_partitioner;
+  }
+
+
+
+
+
+
+
+  // -------------------- explicit instantiations -----------------------
+  //
+  template void
+  BlockSparseMatrix::reinit (const dealii::BlockSparsityPattern &);
+  template void
+  BlockSparseMatrix::reinit (const dealii::BlockDynamicSparsityPattern &);
+
+  template void
+  BlockSparseMatrix::reinit (const std::vector<Epetra_Map> &,
+                             const dealii::BlockSparsityPattern &,
+                             const bool);
+  template void
+  BlockSparseMatrix::reinit (const std::vector<Epetra_Map> &,
+                             const dealii::BlockDynamicSparsityPattern &,
+                             const bool);
+
+  template void
+  BlockSparseMatrix::reinit (const std::vector<IndexSet> &,
+                             const dealii::BlockDynamicSparsityPattern &,
+                             const MPI_Comm &,
+                             const bool);
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/source/lac/trilinos_block_vector.cc b/source/lac/trilinos_block_vector.cc
new file mode 100644
index 0000000..811dc94
--- /dev/null
+++ b/source/lac/trilinos_block_vector.cc
@@ -0,0 +1,428 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/lac/trilinos_block_sparse_matrix.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace
+  {
+    // define a helper function that queries the size of an Epetra_Map object
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements();
+    }
+#else
+    long long int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements64();
+    }
+#endif
+  }
+
+
+  namespace MPI
+  {
+    BlockVector &
+    BlockVector::operator = (const value_type s)
+    {
+      BaseClass::operator = (s);
+      return *this;
+    }
+
+
+
+    BlockVector &
+    BlockVector::operator = (const BlockVector &v)
+    {
+      // we only allow assignment to vectors with the same number of blocks
+      // or to an empty BlockVector
+      Assert (n_blocks() == 0 || n_blocks() == v.n_blocks(),
+              ExcDimensionMismatch(n_blocks(), v.n_blocks()));
+
+      if (this->n_blocks() != v.n_blocks())
+        reinit(v.n_blocks());
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.block(i);
+
+      collect_sizes();
+
+      return *this;
+    }
+
+
+
+#ifdef DEAL_II_WITH_CXX11
+    BlockVector &
+    BlockVector::operator= (BlockVector &&v)
+    {
+      swap(v);
+      return *this;
+    }
+#endif
+
+
+
+    BlockVector &
+    BlockVector::operator = (const ::dealii::TrilinosWrappers::BlockVector &v)
+    {
+      Assert (n_blocks() == v.n_blocks(),
+              ExcDimensionMismatch(n_blocks(),v.n_blocks()));
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.block(i);
+
+      return *this;
+    }
+
+
+
+    BlockVector::~BlockVector ()
+    {}
+
+
+
+    void
+    BlockVector::reinit (const std::vector<Epetra_Map> &input_maps,
+                         const bool                     omit_zeroing_entries)
+    {
+      const size_type no_blocks = input_maps.size();
+      std::vector<size_type> block_sizes (no_blocks);
+
+      for (size_type i=0; i<no_blocks; ++i)
+        {
+          block_sizes[i] = n_global_elements(input_maps[i]);
+        }
+
+      this->block_indices.reinit (block_sizes);
+      if (components.size() != n_blocks())
+        components.resize(n_blocks());
+
+      for (size_type i=0; i<n_blocks(); ++i)
+        components[i].reinit(input_maps[i], omit_zeroing_entries);
+
+      collect_sizes();
+    }
+
+
+
+    void
+    BlockVector::reinit (const std::vector<IndexSet> &parallel_partitioning,
+                         const MPI_Comm              &communicator,
+                         const bool                   omit_zeroing_entries)
+    {
+      const size_type no_blocks = parallel_partitioning.size();
+      std::vector<size_type> block_sizes (no_blocks);
+
+      for (size_type i=0; i<no_blocks; ++i)
+        {
+          block_sizes[i] = parallel_partitioning[i].size();
+        }
+
+      this->block_indices.reinit (block_sizes);
+      if (components.size() != n_blocks())
+        components.resize(n_blocks());
+
+      for (size_type i=0; i<n_blocks(); ++i)
+        components[i].reinit(parallel_partitioning[i], communicator, omit_zeroing_entries);
+
+      collect_sizes();
+    }
+
+    void
+    BlockVector::reinit (const std::vector<IndexSet> &parallel_partitioning,
+                         const std::vector<IndexSet> &ghost_values,
+                         const MPI_Comm              &communicator,
+                         const bool                   vector_writable)
+    {
+      const size_type no_blocks = parallel_partitioning.size();
+      std::vector<size_type> block_sizes (no_blocks);
+
+      for (size_type i=0; i<no_blocks; ++i)
+        {
+          block_sizes[i] = parallel_partitioning[i].size();
+        }
+
+      this->block_indices.reinit (block_sizes);
+      if (components.size() != n_blocks())
+        components.resize(n_blocks());
+
+      for (size_type i=0; i<n_blocks(); ++i)
+        components[i].reinit(parallel_partitioning[i], ghost_values[i],
+                             communicator, vector_writable);
+
+      collect_sizes();
+    }
+
+
+    void
+    BlockVector::reinit (const BlockVector &v,
+                         const bool omit_zeroing_entries)
+    {
+      block_indices = v.get_block_indices();
+      if (components.size() != n_blocks())
+        components.resize(n_blocks());
+
+      for (size_type i=0; i<n_blocks(); ++i)
+        components[i].reinit(v.block(i), omit_zeroing_entries, false);
+
+      collect_sizes();
+    }
+
+
+
+    void
+    BlockVector::reinit (const size_type num_blocks)
+    {
+      std::vector<size_type> block_sizes (num_blocks, 0);
+      this->block_indices.reinit (block_sizes);
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        components[i].clear();
+
+      collect_sizes();
+    }
+
+
+
+    void
+    BlockVector::import_nonlocal_data_for_fe
+    (const TrilinosWrappers::BlockSparseMatrix &m,
+     const BlockVector                         &v)
+    {
+      Assert (m.n_block_rows() == v.n_blocks(),
+              ExcDimensionMismatch(m.n_block_rows(),v.n_blocks()));
+      Assert (m.n_block_cols() == v.n_blocks(),
+              ExcDimensionMismatch(m.n_block_cols(),v.n_blocks()));
+
+      if (v.n_blocks() != n_blocks())
+        {
+          block_indices = v.get_block_indices();
+          components.resize(v.n_blocks());
+        }
+
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        components[i].import_nonlocal_data_for_fe(m.block(i,i), v.block(i));
+
+      collect_sizes();
+    }
+
+
+
+    void BlockVector::print (std::ostream       &out,
+                             const unsigned int  precision,
+                             const bool          scientific,
+                             const bool          across) const
+    {
+      for (size_type i=0; i<this->n_blocks(); ++i)
+        {
+          if (across)
+            out << 'C' << i << ':';
+          else
+            out << "Component " << i << std::endl;
+          this->components[i].print(out, precision, scientific, across);
+        }
+    }
+
+  } /* end of namespace MPI */
+
+
+
+
+
+
+  BlockVector &
+  BlockVector::operator = (const value_type s)
+  {
+    BaseClass::operator = (s);
+    return *this;
+  }
+
+
+
+  void
+  BlockVector::reinit (const std::vector<Epetra_Map> &input_maps,
+                       const bool                     omit_zeroing_entries)
+  {
+    size_type no_blocks = input_maps.size();
+    std::vector<size_type> block_sizes (no_blocks);
+
+    for (size_type i=0; i<no_blocks; ++i)
+      block_sizes[i] = n_global_elements(input_maps[i]);
+
+
+    this->block_indices.reinit (block_sizes);
+    if (components.size() != n_blocks())
+      components.resize(n_blocks());
+
+    for (size_type i=0; i<n_blocks(); ++i)
+      components[i].reinit(input_maps[i], omit_zeroing_entries);
+
+    collect_sizes();
+  }
+
+
+
+  void
+  BlockVector::reinit (const std::vector<IndexSet> &partitioning,
+                       const MPI_Comm              &communicator,
+                       const bool                   omit_zeroing_entries)
+  {
+    size_type no_blocks = partitioning.size();
+    std::vector<size_type> block_sizes (no_blocks);
+
+    for (size_type i=0; i<no_blocks; ++i)
+      block_sizes[i] = partitioning[i].size();
+
+
+    this->block_indices.reinit (block_sizes);
+    if (components.size() != n_blocks())
+      components.resize(n_blocks());
+
+    for (size_type i=0; i<n_blocks(); ++i)
+      components[i].reinit(partitioning[i], communicator, omit_zeroing_entries);
+
+    collect_sizes();
+  }
+
+
+
+  void
+  BlockVector::reinit (const std::vector<size_type> &block_sizes,
+                       const bool                    omit_zeroing_entries)
+  {
+    this->block_indices.reinit (block_sizes);
+    if (components.size() != n_blocks())
+      components.resize(n_blocks());
+
+    for (size_type i=0; i<n_blocks(); ++i)
+      components[i].reinit(block_sizes[i], omit_zeroing_entries);
+
+    collect_sizes();
+  }
+
+
+
+  void
+  BlockVector::reinit (const MPI::BlockVector &v)
+  {
+    block_indices = v.get_block_indices();
+    if (components.size() != n_blocks())
+      components.resize(n_blocks());
+
+    for (size_type i=0; i<n_blocks(); ++i)
+      components[i] = v.block(i);
+  }
+
+
+
+  void
+  BlockVector::reinit (const size_type num_blocks)
+  {
+    std::vector<size_type> block_sizes (num_blocks, 0);
+    block_indices.reinit (block_sizes);
+    if (components.size() != n_blocks())
+      components.resize(n_blocks());
+
+    for (size_type i=0; i<n_blocks(); ++i)
+      block(i).clear();
+
+    collect_sizes();
+  }
+
+
+
+  void
+  BlockVector::reinit (const BlockVector &v,
+                       const bool         omit_zeroing_entries)
+  {
+    block_indices = v.get_block_indices();
+    if (components.size() != n_blocks())
+      components.resize(n_blocks());
+
+    for (size_type i=0; i<n_blocks(); ++i)
+      components[i].reinit(v.block(i), omit_zeroing_entries);
+
+    collect_sizes();
+  }
+
+
+
+  BlockVector &
+  BlockVector::operator = (const MPI::BlockVector &v)
+  {
+    reinit (v);
+
+    return *this;
+  }
+
+
+
+  BlockVector &
+  BlockVector::operator = (const BlockVector &v)
+  {
+    if (n_blocks() != v.n_blocks())
+      {
+        std::vector<size_type> block_sizes (v.n_blocks(), 0);
+        block_indices.reinit (block_sizes);
+        if (components.size() != n_blocks())
+          components.resize(n_blocks());
+      }
+
+    for (size_type i=0; i<this->n_blocks(); ++i)
+      this->components[i] = v.block(i);
+
+    collect_sizes();
+
+    return *this;
+  }
+
+
+
+  void BlockVector::print (std::ostream       &out,
+                           const unsigned int  precision,
+                           const bool          scientific,
+                           const bool          across) const
+  {
+    for (size_type i=0; i<this->n_blocks(); ++i)
+      {
+        if (across)
+          out << 'C' << i << ':';
+        else
+          out << "Component " << i << std::endl;
+        this->components[i].print(out, precision, scientific, across);
+      }
+  }
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/source/lac/trilinos_parallel_block_vector.cc b/source/lac/trilinos_parallel_block_vector.cc
new file mode 100644
index 0000000..1017f44
--- /dev/null
+++ b/source/lac/trilinos_parallel_block_vector.cc
@@ -0,0 +1,52 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/trilinos_parallel_block_vector.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace MPI
+  {
+    BlockVector::BlockVector (const std::vector<Epetra_Map> &parallel_partitioning)
+    {
+      reinit (parallel_partitioning, false);
+    }
+
+
+
+    bool
+    BlockVector::is_compressed () const
+    {
+      bool compressed = true;
+      for (unsigned int row=0; row<n_blocks(); ++row)
+        if (block(row).is_compressed() == false)
+          {
+            compressed = false;
+            break;
+          }
+
+      return compressed;
+    }
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_precondition.cc b/source/lac/trilinos_precondition.cc
new file mode 100644
index 0000000..09457ef
--- /dev/null
+++ b/source/lac/trilinos_precondition.cc
@@ -0,0 +1,722 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_precondition.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/lac/vector.h>
+#  include <deal.II/lac/sparse_matrix.h>
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Ifpack.h>
+#  include <Ifpack_Chebyshev.h>
+#  include <Teuchos_ParameterList.hpp>
+#  include <Teuchos_RCP.hpp>
+#  include <Epetra_MultiVector.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+
+  PreconditionBase::PreconditionBase()
+#ifdef DEAL_II_WITH_MPI
+    :
+    communicator (MPI_COMM_SELF)
+#endif
+  {}
+
+
+
+  PreconditionBase::PreconditionBase(const PreconditionBase &base)
+    :
+    Subscriptor (),
+    preconditioner (base.preconditioner),
+#ifdef DEAL_II_WITH_MPI
+    communicator (base.communicator),
+#endif
+    vector_distributor (new Epetra_Map(*base.vector_distributor))
+  {}
+
+
+
+  PreconditionBase::~PreconditionBase()
+  {}
+
+
+
+  void PreconditionBase::clear ()
+  {
+    preconditioner.reset();
+#ifdef DEAL_II_WITH_MPI
+    communicator = MPI_COMM_SELF;
+#endif
+    vector_distributor.reset();
+  }
+
+
+
+  /* -------------------------- PreconditionJacobi -------------------------- */
+
+  PreconditionJacobi::AdditionalData::
+  AdditionalData (const double omega,
+                  const double min_diagonal,
+                  const unsigned int n_sweeps)
+    :
+    omega (omega),
+    min_diagonal (min_diagonal),
+    n_sweeps     (n_sweeps)
+  {}
+
+
+
+  void
+  PreconditionJacobi::initialize (const SparseMatrix   &matrix,
+                                  const AdditionalData &additional_data)
+  {
+    // release memory before reallocation
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("point relaxation",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           0));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("relaxation: sweeps", static_cast<int>(additional_data.n_sweeps));
+    parameter_list.set ("relaxation: type", "Jacobi");
+    parameter_list.set ("relaxation: damping factor", additional_data.omega);
+    parameter_list.set ("relaxation: min diagonal value",
+                        additional_data.min_diagonal);
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* -------------------------- PreconditionSSOR -------------------------- */
+
+  PreconditionSSOR::AdditionalData::
+  AdditionalData (const double       omega,
+                  const double       min_diagonal,
+                  const unsigned int overlap,
+                  const unsigned int n_sweeps)
+    :
+    omega        (omega),
+    min_diagonal (min_diagonal),
+    overlap      (overlap),
+    n_sweeps     (n_sweeps)
+  {}
+
+
+
+  void
+  PreconditionSSOR::initialize (const SparseMatrix   &matrix,
+                                const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("point relaxation",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("relaxation: sweeps", static_cast<int>(additional_data.n_sweeps));
+    parameter_list.set ("relaxation: type", "symmetric Gauss-Seidel");
+    parameter_list.set ("relaxation: damping factor", additional_data.omega);
+    parameter_list.set ("relaxation: min diagonal value",
+                        additional_data.min_diagonal);
+    parameter_list.set ("schwarz: combine mode", "Add");
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* -------------------------- PreconditionSOR -------------------------- */
+
+  PreconditionSOR::AdditionalData::
+  AdditionalData (const double       omega,
+                  const double       min_diagonal,
+                  const unsigned int overlap,
+                  const unsigned int n_sweeps)
+    :
+    omega        (omega),
+    min_diagonal (min_diagonal),
+    overlap      (overlap),
+    n_sweeps     (n_sweeps)
+  {}
+
+
+
+  void
+  PreconditionSOR::initialize (const SparseMatrix   &matrix,
+                               const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("point relaxation",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("relaxation: sweeps", static_cast<int>(additional_data.n_sweeps));
+    parameter_list.set ("relaxation: type", "Gauss-Seidel");
+    parameter_list.set ("relaxation: damping factor", additional_data.omega);
+    parameter_list.set ("relaxation: min diagonal value",
+                        additional_data.min_diagonal);
+    parameter_list.set ("schwarz: combine mode", "Add");
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* ----------------------- PreconditionBlockJacobi ---------------------- */
+
+  PreconditionBlockJacobi::AdditionalData::
+  AdditionalData (const unsigned int block_size,
+                  const std::string  block_creation_type,
+                  const double omega,
+                  const double min_diagonal,
+                  const unsigned int n_sweeps)
+    :
+    block_size(block_size),
+    block_creation_type(block_creation_type),
+    omega (omega),
+    min_diagonal (min_diagonal),
+    n_sweeps     (n_sweeps)
+  {}
+
+
+
+  void
+  PreconditionBlockJacobi::initialize (const SparseMatrix   &matrix,
+                                       const AdditionalData &additional_data)
+  {
+    // release memory before reallocation
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("block relaxation",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           0));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("relaxation: sweeps", static_cast<int>(additional_data.n_sweeps));
+    parameter_list.set ("relaxation: type", "Jacobi");
+    parameter_list.set ("relaxation: damping factor", additional_data.omega);
+    parameter_list.set ("relaxation: min diagonal value",
+                        additional_data.min_diagonal);
+    parameter_list.set ("partitioner: type", additional_data.block_creation_type);
+    int n_local_parts = (matrix.trilinos_matrix().NumMyRows()+additional_data.
+                         block_size-1)/additional_data.block_size;
+    parameter_list.set ("partitioner: local parts", n_local_parts);
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* ----------------------- PreconditionBlockSSOR ------------------------ */
+
+  PreconditionBlockSSOR::AdditionalData::
+  AdditionalData (const unsigned int block_size,
+                  const std::string  block_creation_type,
+                  const double       omega,
+                  const double       min_diagonal,
+                  const unsigned int overlap,
+                  const unsigned int n_sweeps)
+    :
+    block_size(block_size),
+    block_creation_type(block_creation_type),
+    omega        (omega),
+    min_diagonal (min_diagonal),
+    overlap      (overlap),
+    n_sweeps     (n_sweeps)
+  {}
+
+
+
+  void
+  PreconditionBlockSSOR::initialize (const SparseMatrix   &matrix,
+                                     const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("block relaxation",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("relaxation: sweeps", static_cast<int>(additional_data.n_sweeps));
+    parameter_list.set ("relaxation: type", "symmetric Gauss-Seidel");
+    parameter_list.set ("relaxation: damping factor", additional_data.omega);
+    parameter_list.set ("relaxation: min diagonal value",
+                        additional_data.min_diagonal);
+    parameter_list.set ("schwarz: combine mode", "Add");
+    parameter_list.set ("partitioner: type", additional_data.block_creation_type);
+    int n_local_parts = (matrix.trilinos_matrix().NumMyRows()+additional_data.
+                         block_size-1)/additional_data.block_size;
+    parameter_list.set ("partitioner: local parts", n_local_parts);
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* ------------------------ PreconditionBlockSOR ------------------------ */
+
+  PreconditionBlockSOR::AdditionalData::
+  AdditionalData (const unsigned int block_size,
+                  const std::string  block_creation_type,
+                  const double       omega,
+                  const double       min_diagonal,
+                  const unsigned int overlap,
+                  const unsigned int n_sweeps)
+    :
+    block_size(block_size),
+    block_creation_type(block_creation_type),
+    omega        (omega),
+    min_diagonal (min_diagonal),
+    overlap      (overlap),
+    n_sweeps     (n_sweeps)
+  {}
+
+
+
+  void
+  PreconditionBlockSOR::initialize (const SparseMatrix   &matrix,
+                                    const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("block relaxation",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("relaxation: sweeps", static_cast<int>(additional_data.n_sweeps));
+    parameter_list.set ("relaxation: type", "Gauss-Seidel");
+    parameter_list.set ("relaxation: damping factor", additional_data.omega);
+    parameter_list.set ("relaxation: min diagonal value",
+                        additional_data.min_diagonal);
+    parameter_list.set ("schwarz: combine mode", "Add");
+    parameter_list.set ("partitioner: type", additional_data.block_creation_type);
+    int n_local_parts = (matrix.trilinos_matrix().NumMyRows()+additional_data.
+                         block_size-1)/additional_data.block_size;
+    parameter_list.set ("partitioner: local parts", n_local_parts);
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* -------------------------- PreconditionIC -------------------------- */
+
+  PreconditionIC::AdditionalData::
+  AdditionalData (const unsigned int ic_fill,
+                  const double       ic_atol,
+                  const double       ic_rtol,
+                  const unsigned int overlap)
+    :
+    ic_fill (ic_fill),
+    ic_atol (ic_atol),
+    ic_rtol (ic_rtol),
+    overlap (overlap)
+  {}
+
+
+
+  void
+  PreconditionIC::initialize (const SparseMatrix   &matrix,
+                              const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("IC",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("fact: level-of-fill",(int)additional_data.ic_fill);
+    parameter_list.set ("fact: absolute threshold",additional_data.ic_atol);
+    parameter_list.set ("fact: relative threshold",additional_data.ic_rtol);
+    parameter_list.set ("schwarz: combine mode", "Add");
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* -------------------------- PreconditionILU -------------------------- */
+
+  PreconditionILU::AdditionalData::
+  AdditionalData (const unsigned int ilu_fill,
+                  const double       ilu_atol,
+                  const double       ilu_rtol,
+                  const unsigned int overlap)
+    :
+    ilu_fill (ilu_fill),
+    ilu_atol (ilu_atol),
+    ilu_rtol (ilu_rtol),
+    overlap  (overlap)
+  {}
+
+
+
+  void
+  PreconditionILU::initialize (const SparseMatrix   &matrix,
+                               const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("ILU",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("fact: level-of-fill", static_cast<int>(additional_data.ilu_fill));
+    parameter_list.set ("fact: absolute threshold", additional_data.ilu_atol);
+    parameter_list.set ("fact: relative threshold", additional_data.ilu_rtol);
+    parameter_list.set ("schwarz: combine mode", "Add");
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* -------------------------- PreconditionILUT -------------------------- */
+
+  PreconditionILUT::AdditionalData::
+  AdditionalData (const double       ilut_drop,
+                  const unsigned int ilut_fill,
+                  const double       ilut_atol,
+                  const double       ilut_rtol,
+                  const unsigned int overlap)
+    :
+    ilut_drop (ilut_drop),
+    ilut_fill (ilut_fill),
+    ilut_atol (ilut_atol),
+    ilut_rtol (ilut_rtol),
+    overlap  (overlap)
+  {}
+
+
+
+  void
+  PreconditionILUT::initialize (const SparseMatrix   &matrix,
+                                const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("ILUT",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("fact: drop value",additional_data.ilut_drop);
+    parameter_list.set ("fact: level-of-fill",(int)additional_data.ilut_fill);
+    parameter_list.set ("fact: absolute threshold",additional_data.ilut_atol);
+    parameter_list.set ("fact: relative threshold",additional_data.ilut_rtol);
+    parameter_list.set ("schwarz: combine mode", "Add");
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* ---------------------- PreconditionBlockDirect --------------------- */
+
+  PreconditionBlockwiseDirect::AdditionalData::
+  AdditionalData (const unsigned int overlap)
+    :
+    overlap  (overlap)
+  {}
+
+
+
+  void
+  PreconditionBlockwiseDirect::initialize (const SparseMatrix   &matrix,
+                                           const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (Ifpack().Create
+                          ("Amesos",
+                           const_cast<Epetra_CrsMatrix *>(&matrix.trilinos_matrix()),
+                           additional_data.overlap));
+
+    Ifpack_Preconditioner *ifpack = static_cast<Ifpack_Preconditioner *>
+                                    (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("schwarz: combine mode", "Add");
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  /* ---------------------- PreconditionBlockDirect --------------------- */
+
+  PreconditionChebyshev::AdditionalData::
+  AdditionalData (const unsigned int degree,
+                  const double       max_eigenvalue,
+                  const double       eigenvalue_ratio,
+                  const double       min_eigenvalue,
+                  const double       min_diagonal,
+                  const bool         nonzero_starting)
+    :
+    degree  (degree),
+    max_eigenvalue (max_eigenvalue),
+    eigenvalue_ratio (eigenvalue_ratio),
+    min_eigenvalue (min_eigenvalue),
+    min_diagonal (min_diagonal),
+    nonzero_starting (nonzero_starting)
+  {}
+
+
+
+  void
+  PreconditionChebyshev::initialize (const SparseMatrix   &matrix,
+                                     const AdditionalData &additional_data)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (new Ifpack_Chebyshev (&matrix.trilinos_matrix()));
+
+    Ifpack_Chebyshev *ifpack = static_cast<Ifpack_Chebyshev *>
+                               (preconditioner.get());
+    Assert (ifpack != 0, ExcMessage ("Trilinos could not create this "
+                                     "preconditioner"));
+
+    int ierr;
+
+    Teuchos::ParameterList parameter_list;
+    parameter_list.set ("chebyshev: ratio eigenvalue",
+                        additional_data.eigenvalue_ratio);
+    parameter_list.set ("chebyshev: min eigenvalue",
+                        additional_data.min_eigenvalue);
+    parameter_list.set ("chebyshev: max eigenvalue",
+                        additional_data.max_eigenvalue);
+    parameter_list.set ("chebyshev: degree",
+                        (int)additional_data.degree);
+    parameter_list.set ("chebyshev: min diagonal value",
+                        additional_data.min_diagonal);
+    parameter_list.set ("chebyshev: zero starting solution",
+                        !additional_data.nonzero_starting);
+
+    ierr = ifpack->SetParameters(parameter_list);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Initialize();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = ifpack->Compute();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+
+
+
+
+
+  /* -------------------------- PreconditionIdentity --------------------- */
+
+  void
+  PreconditionIdentity::vmult(VectorBase       &dst,
+                              const VectorBase &src) const
+  {
+    dst = src;
+  }
+
+  void
+  PreconditionIdentity::Tvmult(VectorBase       &dst,
+                               const VectorBase &src) const
+  {
+    dst = src;
+  }
+
+  void
+  PreconditionIdentity::vmult(dealii::Vector<double>       &dst,
+                              const dealii::Vector<double> &src) const
+  {
+    dst = src;
+  }
+
+  void
+  PreconditionIdentity::Tvmult(dealii::Vector<double>       &dst,
+                               const dealii::Vector<double> &src) const
+  {
+    dst = src;
+  }
+
+  void
+  PreconditionIdentity::vmult(parallel::distributed::Vector<double>       &dst,
+                              const parallel::distributed::Vector<double> &src) const
+  {
+    dst = src;
+  }
+
+  void
+  PreconditionIdentity::Tvmult(parallel::distributed::Vector<double>       &dst,
+                               const parallel::distributed::Vector<double> &src) const
+  {
+    dst = src;
+  }
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_precondition_ml.cc b/source/lac/trilinos_precondition_ml.cc
new file mode 100644
index 0000000..070268d
--- /dev/null
+++ b/source/lac/trilinos_precondition_ml.cc
@@ -0,0 +1,345 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_precondition.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/lac/vector.h>
+#  include <deal.II/lac/sparse_matrix.h>
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Ifpack.h>
+#  include <Ifpack_Chebyshev.h>
+#  include <Teuchos_ParameterList.hpp>
+#  include <Teuchos_RCP.hpp>
+#  include <Epetra_MultiVector.h>
+#  include <ml_include.h>
+#  include <ml_MultiLevelPreconditioner.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    int n_global_rows (const Epetra_RowMatrix &matrix)
+    {
+      return matrix.NumGlobalRows();
+    }
+
+    int global_length (const Epetra_MultiVector &vector)
+    {
+      return vector.GlobalLength();
+    }
+
+    int gid(const Epetra_Map &map, unsigned int i)
+    {
+      return map.GID(i);
+    }
+#else
+    long long int n_global_rows (const Epetra_RowMatrix &matrix)
+    {
+      return matrix.NumGlobalRows64();
+    }
+
+    long long int global_length (const Epetra_MultiVector &vector)
+    {
+      return vector.GlobalLength64();
+    }
+
+    long long int gid(const Epetra_Map &map, dealii::types::global_dof_index i)
+    {
+      return map.GID64(i);
+    }
+#endif
+  }
+
+
+
+  /* -------------------------- PreconditionAMG -------------------------- */
+
+  PreconditionAMG::AdditionalData::
+  AdditionalData (const bool                             elliptic,
+                  const bool                             higher_order_elements,
+                  const unsigned int                     n_cycles,
+                  const bool                             w_cycle,
+                  const double                           aggregation_threshold,
+                  const std::vector<std::vector<bool> > &constant_modes,
+                  const unsigned int                     smoother_sweeps,
+                  const unsigned int                     smoother_overlap,
+                  const bool                             output_details,
+                  const char                            *smoother_type,
+                  const char                            *coarse_type)
+    :
+    elliptic (elliptic),
+    higher_order_elements (higher_order_elements),
+    n_cycles (n_cycles),
+    w_cycle (w_cycle),
+    aggregation_threshold (aggregation_threshold),
+    constant_modes (constant_modes),
+    smoother_sweeps (smoother_sweeps),
+    smoother_overlap (smoother_overlap),
+    output_details (output_details),
+    smoother_type (smoother_type),
+    coarse_type (coarse_type)
+  {}
+
+
+  PreconditionAMG::~PreconditionAMG()
+  {
+    preconditioner.reset();
+    trilinos_matrix.reset();
+  }
+
+
+
+  void
+  PreconditionAMG:: initialize (const SparseMatrix   &matrix,
+                                const AdditionalData &additional_data)
+  {
+    initialize(matrix.trilinos_matrix(), additional_data);
+  }
+
+
+
+  void
+  PreconditionAMG:: initialize (const Epetra_RowMatrix &matrix,
+                                const AdditionalData   &additional_data)
+  {
+    // Build the AMG preconditioner.
+    Teuchos::ParameterList parameter_list;
+
+    if (additional_data.elliptic == true)
+      {
+        ML_Epetra::SetDefaults("SA",parameter_list);
+
+        // uncoupled mode can give a lot of warnings or even fail when there
+        // are too many entries per row and aggreggation gets complicated, but
+        // MIS does not work if too few elements are located on one
+        // processor. work around these warnings by choosing the different
+        // strategies in different situations: for low order, always use the
+        // standard choice uncoupled. if higher order, right now we also just
+        // use Uncoupled, but we should be aware that maybe MIS might be
+        // needed
+        if (additional_data.higher_order_elements)
+          parameter_list.set("aggregation: type", "Uncoupled");
+      }
+    else
+      {
+        ML_Epetra::SetDefaults("NSSA",parameter_list);
+        parameter_list.set("aggregation: type", "Uncoupled");
+        parameter_list.set("aggregation: block scaling", true);
+      }
+
+    parameter_list.set("smoother: type", additional_data.smoother_type);
+    parameter_list.set("coarse: type", additional_data.coarse_type);
+
+    // Force re-initialization of the random seed to make ML deterministic
+    // (only supported in trilinos >12.2):
+#if DEAL_II_TRILINOS_VERSION_GTE(12,4,0)
+    parameter_list.set("initialize random seed", true);
+#endif
+
+    parameter_list.set("smoother: sweeps",
+                       static_cast<int>(additional_data.smoother_sweeps));
+    parameter_list.set("cycle applications",
+                       static_cast<int>(additional_data.n_cycles));
+    if (additional_data.w_cycle == true)
+      parameter_list.set("prec type", "MGW");
+    else
+      parameter_list.set("prec type", "MGV");
+
+    parameter_list.set("smoother: Chebyshev alpha",10.);
+    parameter_list.set("smoother: ifpack overlap",
+                       static_cast<int>(additional_data.smoother_overlap));
+    parameter_list.set("aggregation: threshold",
+                       additional_data.aggregation_threshold);
+    parameter_list.set("coarse: max size", 2000);
+
+    if (additional_data.output_details)
+      parameter_list.set("ML output", 10);
+    else
+      parameter_list.set("ML output", 0);
+
+    const Epetra_Map &domain_map = matrix.OperatorDomainMap();
+
+    const size_type constant_modes_dimension =
+      additional_data.constant_modes.size();
+    Epetra_MultiVector distributed_constant_modes (domain_map,
+                                                   constant_modes_dimension > 0 ?
+                                                   constant_modes_dimension : 1);
+    std::vector<double> dummy (constant_modes_dimension);
+
+    if (constant_modes_dimension > 0)
+      {
+        const size_type global_size = n_global_rows(matrix);
+        (void)global_length; // work around compiler warning about unused function in release mode
+        Assert (global_size ==
+                static_cast<size_type>(global_length(distributed_constant_modes)),
+                ExcDimensionMismatch(global_size,
+                                     global_length(distributed_constant_modes)));
+        const bool constant_modes_are_global
+          = additional_data.constant_modes[0].size() == global_size;
+        const size_type my_size = domain_map.NumMyElements();
+
+        // Reshape null space as a contiguous vector of doubles so that
+        // Trilinos can read from it.
+        const size_type expected_mode_size =
+          constant_modes_are_global ? global_size : my_size;
+        for (size_type d=0; d<constant_modes_dimension; ++d)
+          {
+            Assert (additional_data.constant_modes[d].size() == expected_mode_size,
+                    ExcDimensionMismatch(additional_data.constant_modes[d].size(), expected_mode_size));
+            for (size_type row=0; row<my_size; ++row)
+              {
+                const TrilinosWrappers::types::int_type mode_index =
+                  constant_modes_are_global ? gid(domain_map,row) : row;
+                distributed_constant_modes[d][row] =
+                  additional_data.constant_modes[d][mode_index];
+              }
+          }
+        (void)expected_mode_size;
+
+        parameter_list.set("null space: type", "pre-computed");
+        parameter_list.set("null space: dimension",
+                           distributed_constant_modes.NumVectors());
+        if (my_size > 0)
+          parameter_list.set("null space: vectors",
+                             distributed_constant_modes.Values());
+        // We need to set a valid pointer to data even if there is no data on
+        // the current processor. Therefore, pass a dummy in that case
+        else
+          parameter_list.set("null space: vectors",
+                             &dummy[0]);
+      }
+
+    initialize (matrix, parameter_list);
+
+    if (additional_data.output_details)
+      {
+        ML_Epetra::MultiLevelPreconditioner *multilevel_operator =
+          dynamic_cast<ML_Epetra::MultiLevelPreconditioner *> (preconditioner.get());
+        Assert (multilevel_operator != 0,
+                ExcMessage ("Preconditioner setup failed."));
+        multilevel_operator->PrintUnused(0);
+      }
+  }
+
+
+
+  void
+  PreconditionAMG::initialize (const SparseMatrix           &matrix,
+                               const Teuchos::ParameterList &ml_parameters)
+  {
+    initialize(matrix.trilinos_matrix(), ml_parameters);
+  }
+
+
+
+  void
+  PreconditionAMG::initialize (const Epetra_RowMatrix       &matrix,
+                               const Teuchos::ParameterList &ml_parameters)
+  {
+    preconditioner.reset ();
+    preconditioner.reset (new ML_Epetra::MultiLevelPreconditioner
+                          (matrix, ml_parameters));
+  }
+
+
+
+  template <typename number>
+  void
+  PreconditionAMG::
+  initialize (const ::dealii::SparseMatrix<number> &deal_ii_sparse_matrix,
+              const AdditionalData                 &additional_data,
+              const double                          drop_tolerance,
+              const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    preconditioner.reset();
+    const size_type n_rows = deal_ii_sparse_matrix.m();
+
+    // Init Epetra Matrix using an
+    // equidistributed map; avoid
+    // storing the nonzero
+    // elements.
+    vector_distributor.reset (new Epetra_Map(static_cast<TrilinosWrappers::types::int_type>(n_rows),
+                                             0, communicator));
+
+    if (trilinos_matrix.get() == 0)
+      trilinos_matrix.reset (new SparseMatrix());
+
+    trilinos_matrix->reinit (*vector_distributor, *vector_distributor,
+                             deal_ii_sparse_matrix, drop_tolerance, true,
+                             use_this_sparsity);
+
+    initialize (*trilinos_matrix, additional_data);
+  }
+
+
+
+  void PreconditionAMG::reinit ()
+  {
+    ML_Epetra::MultiLevelPreconditioner *multilevel_operator =
+      dynamic_cast<ML_Epetra::MultiLevelPreconditioner *> (preconditioner.get());
+    multilevel_operator->ReComputePreconditioner();
+  }
+
+
+
+  void PreconditionAMG::clear ()
+  {
+    PreconditionBase::clear();
+    trilinos_matrix.reset();
+  }
+
+
+
+  PreconditionAMG::size_type
+  PreconditionAMG::memory_consumption() const
+  {
+    unsigned int memory = sizeof(this);
+
+    // todo: find a way to read out ML's data
+    // sizes
+    if (trilinos_matrix.get() != 0)
+      memory += trilinos_matrix->memory_consumption();
+    return memory;
+  }
+
+
+
+
+  // explicit instantiations
+  template void PreconditionAMG::initialize (const ::dealii::SparseMatrix<double> &,
+                                             const AdditionalData &, const double,
+                                             const ::dealii::SparsityPattern *);
+  template void PreconditionAMG::initialize (const ::dealii::SparseMatrix<float> &,
+                                             const AdditionalData &, const double,
+                                             const ::dealii::SparsityPattern *);
+
+
+
+
+
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_precondition_muelu.cc b/source/lac/trilinos_precondition_muelu.cc
new file mode 100644
index 0000000..149783f
--- /dev/null
+++ b/source/lac/trilinos_precondition_muelu.cc
@@ -0,0 +1,330 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_precondition.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+#if DEAL_II_TRILINOS_VERSION_GTE(11,14,0)
+
+#  include <deal.II/lac/vector.h>
+#  include <deal.II/lac/sparse_matrix.h>
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Teuchos_ParameterList.hpp>
+#  include <Teuchos_RCP.hpp>
+#  include <Epetra_MultiVector.h>
+#  include <ml_include.h>
+#  include <ml_MultiLevelPreconditioner.h>
+
+#  include <MueLu.hpp>
+#  include <MueLu_EpetraOperator.hpp>
+#  include <MueLu_MLParameterListInterpreter.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    int n_global_rows (const Epetra_RowMatrix &matrix)
+    {
+      return matrix.NumGlobalRows();
+    }
+
+    int global_length (const Epetra_MultiVector &vector)
+    {
+      return vector.GlobalLength();
+    }
+
+    int gid(const Epetra_Map &map, unsigned int i)
+    {
+      return map.GID(i);
+    }
+#else
+    long long int n_global_rows (const Epetra_RowMatrix &matrix)
+    {
+      return matrix.NumGlobalRows64();
+    }
+
+    long long int global_length (const Epetra_MultiVector &vector)
+    {
+      return vector.GlobalLength64();
+    }
+
+    long long int gid(const Epetra_Map &map, dealii::types::global_dof_index i)
+    {
+      return map.GID64(i);
+    }
+#endif
+  }
+
+
+
+  PreconditionAMGMueLu::AdditionalData::
+  AdditionalData (const bool                             elliptic,
+                  const unsigned int                     n_cycles,
+                  const bool                             w_cycle,
+                  const double                           aggregation_threshold,
+                  const std::vector<std::vector<bool> > &constant_modes,
+                  const unsigned int                     smoother_sweeps,
+                  const unsigned int                     smoother_overlap,
+                  const bool                             output_details,
+                  const char                            *smoother_type,
+                  const char                            *coarse_type)
+    :
+    elliptic (elliptic),
+    n_cycles (n_cycles),
+    w_cycle (w_cycle),
+    aggregation_threshold (aggregation_threshold),
+    constant_modes (constant_modes),
+    smoother_sweeps (smoother_sweeps),
+    smoother_overlap (smoother_overlap),
+    output_details (output_details),
+    smoother_type (smoother_type),
+    coarse_type (coarse_type)
+  {}
+
+
+  PreconditionAMGMueLu::~PreconditionAMGMueLu()
+  {
+    preconditioner.reset();
+    trilinos_matrix.reset();
+  }
+
+
+
+  void
+  PreconditionAMGMueLu::initialize (const SparseMatrix   &matrix,
+                                    const AdditionalData &additional_data)
+  {
+    initialize(matrix.trilinos_matrix(), additional_data);
+  }
+
+
+
+  void
+  PreconditionAMGMueLu::initialize (const Epetra_CrsMatrix &matrix,
+                                    const AdditionalData   &additional_data)
+  {
+    // Build the AMG preconditioner.
+    Teuchos::ParameterList parameter_list;
+
+    if (additional_data.elliptic == true)
+      ML_Epetra::SetDefaults("SA",parameter_list);
+    else
+      {
+        ML_Epetra::SetDefaults("NSSA",parameter_list);
+        parameter_list.set("aggregation: block scaling", true);
+      }
+    // MIS does not exist anymore, only choice are uncoupled and coupled. When using
+    // uncoupled, aggregates cannot span multiple processes. When using coupled
+    // aggregates can span multiple processes.
+    parameter_list.set("aggregation: type", "Uncoupled");
+
+    parameter_list.set("smoother: type", additional_data.smoother_type);
+    parameter_list.set("coarse: type", additional_data.coarse_type);
+
+    parameter_list.set("smoother: sweeps",
+                       static_cast<int>(additional_data.smoother_sweeps));
+    parameter_list.set("cycle applications",
+                       static_cast<int>(additional_data.n_cycles));
+    if (additional_data.w_cycle == true)
+      parameter_list.set("prec type", "MGW");
+    else
+      parameter_list.set("prec type", "MGV");
+
+    parameter_list.set("smoother: Chebyshev alpha",10.);
+    parameter_list.set("smoother: ifpack overlap",
+                       static_cast<int>(additional_data.smoother_overlap));
+    parameter_list.set("aggregation: threshold",
+                       additional_data.aggregation_threshold);
+    parameter_list.set("coarse: max size", 2000);
+
+    if (additional_data.output_details)
+      parameter_list.set("ML output", 10);
+    else
+      parameter_list.set("ML output", 0);
+
+    const Epetra_Map &domain_map = matrix.OperatorDomainMap();
+
+    const size_type constant_modes_dimension =
+      additional_data.constant_modes.size();
+    Epetra_MultiVector distributed_constant_modes (domain_map,
+                                                   constant_modes_dimension > 0 ?
+                                                   constant_modes_dimension : 1);
+    std::vector<double> dummy (constant_modes_dimension);
+
+    if (constant_modes_dimension > 0)
+      {
+        const size_type n_rows = n_global_rows(matrix);
+        const bool constant_modes_are_global =
+          additional_data.constant_modes[0].size() == n_rows;
+        const size_type n_relevant_rows =
+          constant_modes_are_global ? n_rows : additional_data.constant_modes[0].size();
+        const size_type my_size = domain_map.NumMyElements();
+        if (constant_modes_are_global == false)
+          Assert (n_relevant_rows == my_size,
+                  ExcDimensionMismatch(n_relevant_rows, my_size));
+        Assert (n_rows ==
+                static_cast<size_type>(global_length(distributed_constant_modes)),
+                ExcDimensionMismatch(n_rows,
+                                     global_length(distributed_constant_modes)));
+
+        (void)n_relevant_rows;
+        (void)global_length;
+
+        // Reshape null space as a contiguous vector of doubles so that
+        // Trilinos can read from it.
+        for (size_type d=0; d<constant_modes_dimension; ++d)
+          for (size_type row=0; row<my_size; ++row)
+            {
+              TrilinosWrappers::types::int_type global_row_id =
+                constant_modes_are_global ? gid(domain_map,row) : row;
+              distributed_constant_modes[d][row] =
+                additional_data.constant_modes[d][global_row_id];
+            }
+
+        parameter_list.set("null space: type", "pre-computed");
+        parameter_list.set("null space: dimension",
+                           distributed_constant_modes.NumVectors());
+        if (my_size > 0)
+          parameter_list.set("null space: vectors",
+                             distributed_constant_modes.Values());
+        // We need to set a valid pointer to data even if there is no data on
+        // the current processor. Therefore, pass a dummy in that case
+        else
+          parameter_list.set("null space: vectors",
+                             &dummy[0]);
+      }
+
+    initialize (matrix, parameter_list);
+  }
+
+
+
+  void
+  PreconditionAMGMueLu::initialize (const SparseMatrix           &matrix,
+                                    Teuchos::ParameterList &muelu_parameters)
+  {
+    initialize(matrix.trilinos_matrix(), muelu_parameters);
+  }
+
+
+
+  void
+  PreconditionAMGMueLu::initialize (const Epetra_CrsMatrix       &matrix,
+                                    Teuchos::ParameterList &muelu_parameters)
+  {
+    // We cannot use MueLu::CreateEpetraOperator directly because, we cannot
+    // transfer ownership of MueLu::EpetraOperator from Teuchos::RCP to
+    // std::shared_ptr.
+
+    // For now, just use serial node, i.e. no multithreaing or GPU.
+    typedef KokkosClassic::DefaultNode::DefaultNodeType node;
+    preconditioner.reset ();
+
+    // Cast matrix into a MueLu::Matrix. The constness needs to be cast away.
+    // MueLu uses Teuchos::RCP which are Trilinos version of std::shared_ptr.
+    Teuchos::RCP<Epetra_CrsMatrix> rcp_matrix = Teuchos::rcpFromRef(
+                                                  *(const_cast<Epetra_CrsMatrix *>(&matrix)));
+    Teuchos::RCP<Xpetra::CrsMatrix<double,int,int,node> > muelu_crs_matrix =
+      Teuchos::rcp(new Xpetra::EpetraCrsMatrix (rcp_matrix));
+    Teuchos::RCP<Xpetra::Matrix<double,int,int,node> > muelu_matrix =
+      Teuchos::rcp(new Xpetra::CrsMatrixWrap<double,int,int,node> (muelu_crs_matrix));
+
+    // Create the multigrid hierarchy using ML parameters.
+    Teuchos::RCP<MueLu::HierarchyManager<double,int,int,node> > hierarchy_factory;
+    hierarchy_factory = Teuchos::rcp(
+                          new MueLu::MLParameterListInterpreter<double,int,int,node> (muelu_parameters));
+    Teuchos::RCP<MueLu::Hierarchy<double,int,int,node> > hierarchy = hierarchy_factory->CreateHierarchy();
+    hierarchy->GetLevel(0)->Set("A",muelu_matrix);
+    hierarchy_factory->SetupHierarchy(*hierarchy);
+
+    // MueLu::EpetraOperator is just a wrapper around a "standard"
+    // Epetra_Operator.
+    preconditioner.reset(new MueLu::EpetraOperator(hierarchy));
+  }
+
+
+
+  template <typename number>
+  void
+  PreconditionAMGMueLu::
+  initialize (const ::dealii::SparseMatrix<number> &deal_ii_sparse_matrix,
+              const AdditionalData                 &additional_data,
+              const double                          drop_tolerance,
+              const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    preconditioner.reset();
+    const size_type n_rows = deal_ii_sparse_matrix.m();
+
+    // Init Epetra Matrix using an
+    // equidistributed map; avoid
+    // storing the nonzero
+    // elements.
+    vector_distributor.reset (new Epetra_Map(static_cast<TrilinosWrappers::types::int_type>(n_rows),
+                                             0, communicator));
+
+    if (trilinos_matrix.get() == 0)
+      trilinos_matrix.reset (new SparseMatrix());
+
+    trilinos_matrix->reinit (*vector_distributor, *vector_distributor,
+                             deal_ii_sparse_matrix, drop_tolerance, true,
+                             use_this_sparsity);
+
+    initialize (*trilinos_matrix, additional_data);
+  }
+
+
+
+  void PreconditionAMGMueLu::clear ()
+  {
+    PreconditionBase::clear();
+    trilinos_matrix.reset();
+  }
+
+
+
+  PreconditionAMGMueLu::size_type
+  PreconditionAMGMueLu::memory_consumption() const
+  {
+    unsigned int memory = sizeof(this);
+
+    // todo: find a way to read out ML's data
+    // sizes
+    if (trilinos_matrix.get() != 0)
+      memory += trilinos_matrix->memory_consumption();
+    return memory;
+  }
+
+
+
+  // explicit instantiations
+  template void PreconditionAMGMueLu::initialize (const ::dealii::SparseMatrix<double> &,
+                                                  const AdditionalData &, const double,
+                                                  const ::dealii::SparsityPattern *);
+  template void PreconditionAMGMueLu::initialize (const ::dealii::SparseMatrix<float> &,
+                                                  const AdditionalData &, const double,
+                                                  const ::dealii::SparsityPattern *);
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_TRILINOS_VERSION_GTE(11,14,0)
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_solver.cc b/source/lac/trilinos_solver.cc
new file mode 100644
index 0000000..6b53970
--- /dev/null
+++ b/source/lac/trilinos_solver.cc
@@ -0,0 +1,569 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_solver.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/conditional_ostream.h>
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_vector_base.h>
+#  include <deal.II/lac/trilinos_precondition.h>
+
+#  include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+
+  SolverBase::AdditionalData::AdditionalData (const bool         output_solver_details,
+                                              const unsigned int gmres_restart_parameter)
+    :
+    output_solver_details (output_solver_details),
+    gmres_restart_parameter (gmres_restart_parameter)
+  {}
+
+
+
+  SolverBase::SolverBase (SolverControl  &cn)
+    :
+    solver_name    (gmres),
+    solver_control (cn)
+  {}
+
+
+
+  SolverBase::SolverBase (const enum SolverBase::SolverName  solver_name,
+                          SolverControl                     &cn)
+    :
+    solver_name    (solver_name),
+    solver_control (cn)
+  {}
+
+
+
+  SolverBase::~SolverBase ()
+  {}
+
+
+
+  SolverControl &
+  SolverBase::control() const
+  {
+    return solver_control;
+  }
+
+
+
+  void
+  SolverBase::solve (const SparseMatrix     &A,
+                     VectorBase             &x,
+                     const VectorBase       &b,
+                     const PreconditionBase &preconditioner)
+  {
+    linear_problem.reset();
+
+    // We need an Epetra_LinearProblem object to let the AztecOO solver know
+    // about the matrix and vectors.
+    linear_problem.reset
+    (new Epetra_LinearProblem(const_cast<Epetra_CrsMatrix *>(&A.trilinos_matrix()),
+                              &x.trilinos_vector(),
+                              const_cast<Epetra_MultiVector *>(&b.trilinos_vector())));
+
+    do_solve(preconditioner);
+  }
+
+
+
+  void
+  SolverBase::solve (Epetra_Operator        &A,
+                     VectorBase             &x,
+                     const VectorBase       &b,
+                     const PreconditionBase &preconditioner)
+  {
+    linear_problem.reset();
+
+    // We need an Epetra_LinearProblem object to let the AztecOO solver know
+    // about the matrix and vectors.
+    linear_problem.reset
+    (new Epetra_LinearProblem(&A,
+                              &x.trilinos_vector(),
+                              const_cast<Epetra_MultiVector *>(&b.trilinos_vector())));
+
+    do_solve(preconditioner);
+  }
+
+
+
+  void
+  SolverBase::solve (const SparseMatrix           &A,
+                     dealii::Vector<double>       &x,
+                     const dealii::Vector<double> &b,
+                     const PreconditionBase       &preconditioner)
+  {
+    linear_problem.reset();
+
+    // In case we call the solver with deal.II vectors, we create views of the
+    // vectors in Epetra format.
+    Assert (x.size() == A.n(),
+            ExcDimensionMismatch(x.size(), A.n()));
+    Assert (b.size() == A.m(),
+            ExcDimensionMismatch(b.size(), A.m()));
+    Assert (A.local_range ().second == A.m(),
+            ExcMessage ("Can only work in serial when using deal.II vectors."));
+    Assert (A.trilinos_matrix().Filled(),
+            ExcMessage ("Matrix is not compressed. Call compress() method."));
+
+    Epetra_Vector ep_x (View, A.domain_partitioner(), x.begin());
+    Epetra_Vector ep_b (View, A.range_partitioner(), const_cast<double *>(b.begin()));
+
+    // We need an Epetra_LinearProblem object to let the AztecOO solver know
+    // about the matrix and vectors.
+    linear_problem.reset (new Epetra_LinearProblem
+                          (const_cast<Epetra_CrsMatrix *>(&A.trilinos_matrix()),
+                           &ep_x, &ep_b));
+
+    do_solve(preconditioner);
+  }
+
+
+
+  void
+  SolverBase::solve (Epetra_Operator              &A,
+                     dealii::Vector<double>       &x,
+                     const dealii::Vector<double> &b,
+                     const PreconditionBase       &preconditioner)
+  {
+    linear_problem.reset();
+
+    Epetra_Vector ep_x (View, A.OperatorDomainMap(), x.begin());
+    Epetra_Vector ep_b (View, A.OperatorRangeMap(), const_cast<double *>(b.begin()));
+
+    // We need an Epetra_LinearProblem object to let the AztecOO solver know
+    // about the matrix and vectors.
+    linear_problem.reset (new Epetra_LinearProblem(&A,&ep_x, &ep_b));
+
+    do_solve(preconditioner);
+  }
+
+
+
+  void
+  SolverBase::solve (const SparseMatrix                                  &A,
+                     dealii::parallel::distributed::Vector<double>       &x,
+                     const dealii::parallel::distributed::Vector<double> &b,
+                     const PreconditionBase                              &preconditioner)
+  {
+    linear_problem.reset();
+
+    // In case we call the solver with deal.II vectors, we create views of the
+    // vectors in Epetra format.
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(x.local_size()),
+                     A.domain_partitioner().NumMyElements());
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(b.local_size()),
+                     A.range_partitioner().NumMyElements());
+
+    Epetra_Vector ep_x (View, A.domain_partitioner(), x.begin());
+    Epetra_Vector ep_b (View, A.range_partitioner(), const_cast<double *>(b.begin()));
+
+    // We need an Epetra_LinearProblem object to let the AztecOO solver know
+    // about the matrix and vectors.
+    linear_problem.reset (new Epetra_LinearProblem
+                          (const_cast<Epetra_CrsMatrix *>(&A.trilinos_matrix()),
+                           &ep_x, &ep_b));
+
+    do_solve(preconditioner);
+  }
+
+
+
+  void
+  SolverBase::solve (Epetra_Operator                                     &A,
+                     dealii::parallel::distributed::Vector<double>       &x,
+                     const dealii::parallel::distributed::Vector<double> &b,
+                     const PreconditionBase                              &preconditioner)
+  {
+    linear_problem.reset();
+
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(x.local_size()),
+                     A.OperatorDomainMap().NumMyElements());
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(b.local_size()),
+                     A.OperatorRangeMap().NumMyElements());
+
+    Epetra_Vector ep_x (View, A.OperatorDomainMap(), x.begin());
+    Epetra_Vector ep_b (View, A.OperatorRangeMap(), const_cast<double *>(b.begin()));
+
+    // We need an Epetra_LinearProblem object to let the AztecOO solver know
+    // about the matrix and vectors.
+    linear_problem.reset (new Epetra_LinearProblem(&A,&ep_x, &ep_b));
+
+    do_solve(preconditioner);
+  }
+
+
+
+  void
+  SolverBase::do_solve(const PreconditionBase &preconditioner)
+  {
+    int ierr;
+
+    // Next we can allocate the AztecOO solver...
+    solver.SetProblem(*linear_problem);
+
+    // ... and we can specify the solver to be used.
+    switch (solver_name)
+      {
+      case cg:
+        solver.SetAztecOption(AZ_solver, AZ_cg);
+        break;
+      case cgs:
+        solver.SetAztecOption(AZ_solver, AZ_cgs);
+        break;
+      case gmres:
+        solver.SetAztecOption(AZ_solver, AZ_gmres);
+        solver.SetAztecOption(AZ_kspace, additional_data.gmres_restart_parameter);
+        break;
+      case bicgstab:
+        solver.SetAztecOption(AZ_solver, AZ_bicgstab);
+        break;
+      case tfqmr:
+        solver.SetAztecOption(AZ_solver, AZ_tfqmr);
+        break;
+      default:
+        Assert (false, ExcNotImplemented());
+      }
+
+    // Introduce the preconditioner, if the identity preconditioner is used,
+    // the precondioner is set to none, ...
+    if (preconditioner.preconditioner.use_count()!=0)
+      {
+        ierr = solver.SetPrecOperator (const_cast<Epetra_Operator *>
+                                       (preconditioner.preconditioner.get()));
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+    else
+      solver.SetAztecOption(AZ_precond,AZ_none);
+
+    // ... set some options, ...
+    solver.SetAztecOption (AZ_output, additional_data.output_solver_details ?
+                           AZ_all : AZ_none);
+    solver.SetAztecOption (AZ_conv, AZ_noscaled);
+
+    // ... and then solve!
+    ierr = solver.Iterate (solver_control.max_steps(),
+                           solver_control.tolerance());
+
+    // report errors in more detail than just by checking whether the return
+    // status is zero or greater. the error strings are taken from the
+    // implementation of the AztecOO::Iterate function
+    switch (ierr)
+      {
+      case -1:
+        AssertThrow (false, ExcMessage("AztecOO::Iterate error code -1: "
+                                       "option not implemented"));
+      case -2:
+        AssertThrow (false, ExcMessage("AztecOO::Iterate error code -2: "
+                                       "numerical breakdown"));
+      case -3:
+        AssertThrow (false, ExcMessage("AztecOO::Iterate error code -3: "
+                                       "loss of precision"));
+      case -4:
+        AssertThrow (false, ExcMessage("AztecOO::Iterate error code -4: "
+                                       "GMRES Hessenberg ill-conditioned"));
+      default:
+        AssertThrow (ierr >= 0, ExcTrilinosError(ierr));
+      }
+
+    // Finally, let the deal.II SolverControl object know what has
+    // happened. If the solve succeeded, the status of the solver control will
+    // turn into SolverControl::success.
+    solver_control.check (solver.NumIters(), solver.TrueResidual());
+
+    if (solver_control.last_check() != SolverControl::success)
+      AssertThrow(false, SolverControl::NoConvergence (solver_control.last_step(),
+                                                       solver_control.last_value()));
+  }
+
+
+
+
+
+  /* ---------------------- SolverCG ------------------------ */
+
+  SolverCG::AdditionalData::
+  AdditionalData (const bool output_solver_details)
+    :
+    output_solver_details (output_solver_details)
+  {}
+
+
+
+  SolverCG::SolverCG (SolverControl        &cn,
+                      const AdditionalData &data)
+    :
+    SolverBase (cn),
+    additional_data (data.output_solver_details)
+  {
+    solver_name = cg;
+  }
+
+
+  /* ---------------------- SolverGMRES ------------------------ */
+
+  SolverGMRES::AdditionalData::
+  AdditionalData (const bool output_solver_details,
+                  const unsigned int restart_parameter)
+    :
+    output_solver_details (output_solver_details),
+    restart_parameter (restart_parameter)
+  {}
+
+
+
+  SolverGMRES::SolverGMRES (SolverControl        &cn,
+                            const AdditionalData &data)
+    :
+    SolverBase (cn),
+    additional_data (data.output_solver_details,
+                     data.restart_parameter)
+  {
+    solver_name = gmres;
+  }
+
+
+  /* ---------------------- SolverBicgstab ------------------------ */
+
+  SolverBicgstab::AdditionalData::
+  AdditionalData (const bool output_solver_details)
+    :
+    output_solver_details (output_solver_details)
+  {}
+
+
+
+
+  SolverBicgstab::SolverBicgstab (SolverControl        &cn,
+                                  const AdditionalData &data)
+    :
+    SolverBase (cn),
+    additional_data (data.output_solver_details)
+  {
+    solver_name = bicgstab;
+  }
+
+
+  /* ---------------------- SolverCGS ------------------------ */
+
+  SolverCGS::AdditionalData::
+  AdditionalData (const bool output_solver_details)
+    :
+    output_solver_details (output_solver_details)
+  {}
+
+
+
+
+  SolverCGS::SolverCGS (SolverControl        &cn,
+                        const AdditionalData &data)
+    :
+    SolverBase (cn),
+    additional_data (data.output_solver_details)
+  {
+    solver_name = cgs;
+  }
+
+
+  /* ---------------------- SolverTFQMR ------------------------ */
+
+  SolverTFQMR::AdditionalData::
+  AdditionalData (const bool output_solver_details)
+    :
+    output_solver_details (output_solver_details)
+  {}
+
+
+
+  SolverTFQMR::SolverTFQMR (SolverControl        &cn,
+                            const AdditionalData &data)
+    :
+    SolverBase (cn),
+    additional_data (data.output_solver_details)
+  {
+    solver_name = tfqmr;
+  }
+
+
+
+  /* ---------------------- SolverDirect ------------------------ */
+
+  SolverDirect::AdditionalData::
+  AdditionalData (const bool output_solver_details,
+                  const std::string &solver_type)
+    :
+    output_solver_details (output_solver_details),
+    solver_type(solver_type)
+  {}
+
+
+
+
+  SolverDirect::SolverDirect (SolverControl  &cn,
+                              const AdditionalData &data)
+    :
+    solver_control (cn),
+    additional_data (data.output_solver_details,data.solver_type)
+  {}
+
+
+
+  SolverDirect::~SolverDirect ()
+  {}
+
+
+
+  SolverControl &
+  SolverDirect::control() const
+  {
+    return solver_control;
+  }
+
+
+
+  void
+  SolverDirect::do_solve()
+  {
+    // Fetch return value of Amesos Solver functions
+    int ierr;
+
+    // First set whether we want to print the solver information to screen or
+    // not.
+    ConditionalOStream  verbose_cout (std::cout,
+                                      additional_data.output_solver_details);
+
+    solver.reset();
+
+    // Next allocate the Amesos solver, this is done in two steps, first we
+    // create a solver Factory and and generate with that the concrete Amesos
+    // solver, if possible.
+    Amesos Factory;
+
+    AssertThrow(
+      Factory.Query(additional_data.solver_type.c_str()),
+      ExcMessage (std::string ("You tried to select the solver type <") +
+                  additional_data.solver_type +
+                  "> but this solver is not supported by Trilinos either "
+                  "because it does not exist, or because Trilinos was not "
+                  "configured for its use.")
+    );
+
+    solver.reset (
+      Factory.Create(additional_data.solver_type.c_str(), *linear_problem)
+    );
+
+    verbose_cout << "Starting symbolic factorization" << std::endl;
+    ierr = solver->SymbolicFactorization();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    verbose_cout << "Starting numeric factorization" << std::endl;
+    ierr = solver->NumericFactorization();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    verbose_cout << "Starting solve" << std::endl;
+    ierr = solver->Solve();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    // Finally, let the deal.II SolverControl object know what has
+    // happened. If the solve succeeded, the status of the solver control will
+    // turn into SolverControl::success.
+    solver_control.check (0, 0);
+
+    if (solver_control.last_check() != SolverControl::success)
+      AssertThrow(false, SolverControl::NoConvergence (solver_control.last_step(),
+                                                       solver_control.last_value()));
+  }
+
+
+  void
+  SolverDirect::solve (const SparseMatrix     &A,
+                       VectorBase             &x,
+                       const VectorBase       &b)
+  {
+    // We need an Epetra_LinearProblem object to let the Amesos solver know
+    // about the matrix and vectors.
+    linear_problem.reset
+    (new Epetra_LinearProblem(const_cast<Epetra_CrsMatrix *>(&A.trilinos_matrix()),
+                              &x.trilinos_vector(),
+                              const_cast<Epetra_MultiVector *>(&b.trilinos_vector())));
+
+    do_solve();
+  }
+
+
+
+  void
+  SolverDirect::solve (const SparseMatrix           &A,
+                       dealii::Vector<double>       &x,
+                       const dealii::Vector<double> &b)
+  {
+
+    // In case we call the solver with deal.II vectors, we create views of the
+    // vectors in Epetra format.
+    Assert (x.size() == A.n(),
+            ExcDimensionMismatch(x.size(), A.n()));
+    Assert (b.size() == A.m(),
+            ExcDimensionMismatch(b.size(), A.m()));
+    Assert (A.local_range ().second == A.m(),
+            ExcMessage ("Can only work in serial when using deal.II vectors."));
+    Epetra_Vector ep_x (View, A.domain_partitioner(), x.begin());
+    Epetra_Vector ep_b (View, A.range_partitioner(), const_cast<double *>(b.begin()));
+
+    // We need an Epetra_LinearProblem object to let the Amesos solver know
+    // about the matrix and vectors.
+    linear_problem.reset (new Epetra_LinearProblem
+                          (const_cast<Epetra_CrsMatrix *>(&A.trilinos_matrix()),
+                           &ep_x, &ep_b));
+
+    do_solve();
+  }
+
+
+
+  void
+  SolverDirect::solve (const SparseMatrix                                  &A,
+                       dealii::parallel::distributed::Vector<double>       &x,
+                       const dealii::parallel::distributed::Vector<double> &b)
+  {
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(x.local_size()),
+                     A.domain_partitioner().NumMyElements());
+    AssertDimension (static_cast<TrilinosWrappers::types::int_type>(b.local_size()),
+                     A.range_partitioner().NumMyElements());
+    Epetra_Vector ep_x (View, A.domain_partitioner(), x.begin());
+    Epetra_Vector ep_b (View, A.range_partitioner(), const_cast<double *>(b.begin()));
+
+    // We need an Epetra_LinearProblem object to let the Amesos solver know
+    // about the matrix and vectors.
+    linear_problem.reset (new Epetra_LinearProblem
+                          (const_cast<Epetra_CrsMatrix *>(&A.trilinos_matrix()),
+                           &ep_x, &ep_b));
+
+    do_solve();
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_PETSC
diff --git a/source/lac/trilinos_sparse_matrix.cc b/source/lac/trilinos_sparse_matrix.cc
new file mode 100644
index 0000000..ba82179
--- /dev/null
+++ b/source/lac/trilinos_sparse_matrix.cc
@@ -0,0 +1,2546 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/utilities.h>
+#  include <deal.II/lac/sparse_matrix.h>
+#  include <deal.II/lac/trilinos_sparsity_pattern.h>
+#  include <deal.II/lac/sparsity_pattern.h>
+#  include <deal.II/lac/dynamic_sparsity_pattern.h>
+#  include <deal.II/lac/sparsity_tools.h>
+#  include <deal.II/lac/parallel_vector.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Epetra_Export.h>
+#  include <ml_epetra_utils.h>
+#  include <ml_struct.h>
+#  include <Teuchos_RCP.hpp>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    // define a helper function that queries the size of an Epetra_Map object
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+    int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements();
+    }
+
+    int min_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MinMyGID();
+    }
+
+    int max_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MaxMyGID();
+    }
+
+    int n_global_cols(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalCols();
+    }
+
+    int global_column_index(const Epetra_CrsMatrix &matrix, int i)
+    {
+      return matrix.GCID(i);
+    }
+
+    int global_row_index(const Epetra_CrsMatrix &matrix, int i)
+    {
+      return matrix.GRID(i);
+    }
+#else
+    // define a helper function that queries the size of an Epetra_Map object
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+    long long int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements64();
+    }
+
+    long long int min_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MinMyGID64();
+    }
+
+    long long int max_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MaxMyGID64();
+    }
+
+    long long int n_global_cols(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalCols64();
+    }
+
+    long long int global_column_index(const Epetra_CrsMatrix &matrix, int i)
+    {
+      return matrix.GCID64(i);
+    }
+
+    long long int global_row_index(const Epetra_CrsMatrix &matrix, int i)
+    {
+      return matrix.GRID64(i);
+    }
+#endif
+  }
+
+
+  namespace SparseMatrixIterators
+  {
+    void
+    AccessorBase::visit_present_row ()
+    {
+      // if we are asked to visit the past-the-end line, then simply
+      // release all our caches and go on with life.
+      //
+      // do the same if the row we're supposed to visit is not locally
+      // owned. this is simply going to make non-locally owned rows
+      // look like they're empty
+      if ((this->a_row == matrix->m())
+          ||
+          (matrix->in_local_range (this->a_row) == false))
+        {
+          colnum_cache.reset ();
+          value_cache.reset ();
+
+          return;
+        }
+
+      // get a representation of the present row
+      int ncols;
+      TrilinosWrappers::types::int_type colnums = matrix->n();
+      if (value_cache.get() == 0)
+        {
+          value_cache.reset (new std::vector<TrilinosScalar> (matrix->n()));
+          colnum_cache.reset (new std::vector<size_type> (matrix->n()));
+        }
+      else
+        {
+          value_cache->resize (matrix->n());
+          colnum_cache->resize (matrix->n());
+        }
+
+      int ierr = matrix->trilinos_matrix().
+                 ExtractGlobalRowCopy((TrilinosWrappers::types::int_type)this->a_row,
+                                      colnums,
+                                      ncols, &((*value_cache)[0]),
+                                      reinterpret_cast<TrilinosWrappers::types::int_type *>(&((*colnum_cache)[0])));
+      value_cache->resize (ncols);
+      colnum_cache->resize (ncols);
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      // copy it into our caches if the
+      // line isn't empty. if it is, then
+      // we've done something wrong, since
+      // we shouldn't have initialized an
+      // iterator for an empty line (what
+      // would it point to?)
+    }
+  }
+
+
+  // The constructor is actually the
+  // only point where we have to check
+  // whether we build a serial or a
+  // parallel Trilinos matrix.
+  // Actually, it does not even matter
+  // how many threads there are, but
+  // only if we use an MPI compiler or
+  // a standard compiler. So, even one
+  // thread on a configuration with
+  // MPI will still get a parallel
+  // interface.
+  SparseMatrix::SparseMatrix ()
+    :
+    column_space_map (new Epetra_Map (0, 0,
+                                      Utilities::Trilinos::comm_self())),
+    matrix (new Epetra_FECrsMatrix(View, *column_space_map,
+                                   *column_space_map, 0)),
+    last_action (Zero),
+    compressed (true)
+  {
+    matrix->FillComplete();
+  }
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map  &input_map,
+                              const size_type n_max_entries_per_row)
+    :
+    column_space_map (new Epetra_Map (input_map)),
+    matrix (new Epetra_FECrsMatrix(Copy, *column_space_map,
+                                   TrilinosWrappers::types::int_type(n_max_entries_per_row), false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map                &input_map,
+                              const std::vector<unsigned int> &n_entries_per_row)
+    :
+    column_space_map (new Epetra_Map (input_map)),
+    matrix (new Epetra_FECrsMatrix
+            (Copy, *column_space_map,
+             (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+             false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map  &input_row_map,
+                              const Epetra_Map  &input_col_map,
+                              const size_type n_max_entries_per_row)
+    :
+    column_space_map (new Epetra_Map (input_col_map)),
+    matrix (new Epetra_FECrsMatrix(Copy, input_row_map,
+                                   TrilinosWrappers::types::int_type(n_max_entries_per_row), false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map                &input_row_map,
+                              const Epetra_Map                &input_col_map,
+                              const std::vector<unsigned int> &n_entries_per_row)
+    :
+    column_space_map (new Epetra_Map (input_col_map)),
+    matrix (new Epetra_FECrsMatrix(Copy, input_row_map,
+                                   (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+                                   false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const size_type m,
+                              const size_type n,
+                              const unsigned int n_max_entries_per_row)
+    :
+    column_space_map (new Epetra_Map (static_cast<TrilinosWrappers::types::int_type>(n), 0,
+                                      Utilities::Trilinos::comm_self())),
+
+    // on one processor only, we know how the
+    // columns of the matrix will be
+    // distributed (everything on one
+    // processor), so we can hand in this
+    // information to the constructor. we
+    // can't do so in parallel, where the
+    // information from columns is only
+    // available when entries have been added
+    matrix (new Epetra_FECrsMatrix(Copy,
+                                   Epetra_Map (static_cast<TrilinosWrappers::types::int_type>(m), 0,
+                                               Utilities::Trilinos::comm_self()),
+                                   *column_space_map,
+                                   n_max_entries_per_row,
+                                   false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const size_type                  m,
+                              const size_type                  n,
+                              const std::vector<unsigned int> &n_entries_per_row)
+    :
+    column_space_map (new Epetra_Map (static_cast<TrilinosWrappers::types::int_type>(n), 0,
+                                      Utilities::Trilinos::comm_self())),
+    matrix (new Epetra_FECrsMatrix(Copy,
+                                   Epetra_Map (static_cast<TrilinosWrappers::types::int_type>(m), 0,
+                                               Utilities::Trilinos::comm_self()),
+                                   *column_space_map,
+                                   (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+                                   false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet     &parallel_partitioning,
+                              const MPI_Comm     &communicator,
+                              const unsigned int  n_max_entries_per_row)
+    :
+    column_space_map (new Epetra_Map(parallel_partitioning.
+                                     make_trilinos_map(communicator, false))),
+    matrix (new Epetra_FECrsMatrix(Copy,
+                                   *column_space_map,
+                                   n_max_entries_per_row,
+                                   false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet                  &parallel_partitioning,
+                              const MPI_Comm                  &communicator,
+                              const std::vector<unsigned int> &n_entries_per_row)
+    :
+    column_space_map (new Epetra_Map(parallel_partitioning.
+                                     make_trilinos_map(communicator, false))),
+    matrix (new Epetra_FECrsMatrix(Copy,
+                                   *column_space_map,
+                                   (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+                                   false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet  &row_parallel_partitioning,
+                              const IndexSet  &col_parallel_partitioning,
+                              const MPI_Comm  &communicator,
+                              const size_type  n_max_entries_per_row)
+    :
+    column_space_map (new Epetra_Map(col_parallel_partitioning.
+                                     make_trilinos_map(communicator, false))),
+    matrix (new Epetra_FECrsMatrix(Copy,
+                                   row_parallel_partitioning.
+                                   make_trilinos_map(communicator, false),
+                                   n_max_entries_per_row,
+                                   false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet                  &row_parallel_partitioning,
+                              const IndexSet                  &col_parallel_partitioning,
+                              const MPI_Comm                  &communicator,
+                              const std::vector<unsigned int> &n_entries_per_row)
+    :
+    column_space_map (new Epetra_Map(col_parallel_partitioning.
+                                     make_trilinos_map(communicator, false))),
+    matrix (new Epetra_FECrsMatrix(Copy,
+                                   row_parallel_partitioning.
+                                   make_trilinos_map(communicator, false),
+                                   (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+                                   false)),
+    last_action (Zero),
+    compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const SparsityPattern &sparsity_pattern)
+    :
+    column_space_map (new Epetra_Map (sparsity_pattern.domain_partitioner())),
+    matrix (new Epetra_FECrsMatrix(Copy,
+                                   sparsity_pattern.trilinos_sparsity_pattern(),
+                                   false)),
+    last_action (Zero),
+    compressed (true)
+  {
+    Assert(sparsity_pattern.trilinos_sparsity_pattern().Filled() == true,
+           ExcMessage("The Trilinos sparsity pattern has not been compressed."));
+    compress(VectorOperation::insert);
+  }
+
+
+
+  SparseMatrix::~SparseMatrix ()
+  {}
+
+
+
+  void
+  SparseMatrix::copy_from (const SparseMatrix &rhs)
+  {
+    if (this == &rhs)
+      return;
+
+    nonlocal_matrix.reset();
+    nonlocal_matrix_exporter.reset();
+
+    // check whether we need to update the whole matrix layout (we have
+    // different maps or if we detect a row where the columns of the two
+    // matrices do not match)
+    bool needs_deep_copy =
+      !matrix->RowMap().SameAs(rhs.matrix->RowMap()) ||
+      !matrix->ColMap().SameAs(rhs.matrix->ColMap()) ||
+      !matrix->DomainMap().SameAs(rhs.matrix->DomainMap()) ||
+      n_nonzero_elements() != rhs.n_nonzero_elements();
+    if (!needs_deep_copy)
+      {
+        const std::pair<size_type, size_type>
+        local_range = rhs.local_range();
+
+        int ierr;
+        // Try to copy all the rows of the matrix one by one. In case of error
+        // (i.e., the column indices are different), we need to abort and blow
+        // away the matrix.
+        for (size_type row=local_range.first; row < local_range.second; ++row)
+          {
+            const int row_local =
+              matrix->RowMap().LID(static_cast<TrilinosWrappers::types::int_type>(row));
+
+            int n_entries, rhs_n_entries;
+            TrilinosScalar *value_ptr, *rhs_value_ptr;
+            int *index_ptr, *rhs_index_ptr;
+            ierr = rhs.matrix->ExtractMyRowView (row_local, rhs_n_entries,
+                                                 rhs_value_ptr, rhs_index_ptr);
+            (void)ierr;
+            Assert (ierr == 0, ExcTrilinosError(ierr));
+
+            ierr = matrix->ExtractMyRowView (row_local, n_entries, value_ptr,
+                                             index_ptr);
+            Assert (ierr == 0, ExcTrilinosError(ierr));
+
+            if (n_entries != rhs_n_entries ||
+                std::memcmp(static_cast<void *>(index_ptr),
+                            static_cast<void *>(rhs_index_ptr),
+                            sizeof(int)*n_entries) != 0)
+              {
+                needs_deep_copy = true;
+                break;
+              }
+
+            for (int i=0; i<n_entries; ++i)
+              value_ptr[i] = rhs_value_ptr[i];
+          }
+      }
+
+    if (needs_deep_copy)
+      {
+        column_space_map.reset (new Epetra_Map (rhs.domain_partitioner()));
+
+        // release memory before reallocation
+        matrix.reset ();
+        matrix.reset (new Epetra_FECrsMatrix(*rhs.matrix));
+
+        matrix->FillComplete(*column_space_map, matrix->RowMap());
+      }
+
+    if (rhs.nonlocal_matrix.get() != 0)
+      nonlocal_matrix.reset(new Epetra_CrsMatrix(Copy, rhs.nonlocal_matrix->Graph()));
+  }
+
+
+
+  namespace
+  {
+    typedef SparseMatrix::size_type size_type;
+
+    template <typename SparsityPatternType>
+    void
+    reinit_matrix (const Epetra_Map                          &input_row_map,
+                   const Epetra_Map                          &input_col_map,
+                   const SparsityPatternType                 &sparsity_pattern,
+                   const bool                                 exchange_data,
+                   std_cxx11::shared_ptr<Epetra_Map>         &column_space_map,
+                   std_cxx11::shared_ptr<Epetra_FECrsMatrix> &matrix,
+                   std_cxx11::shared_ptr<Epetra_CrsMatrix>   &nonlocal_matrix,
+                   std_cxx11::shared_ptr<Epetra_Export>      &nonlocal_matrix_exporter)
+    {
+      // release memory before reallocation
+      matrix.reset();
+      nonlocal_matrix.reset();
+      nonlocal_matrix_exporter.reset();
+
+      if (input_row_map.Comm().MyPID() == 0)
+        {
+          AssertDimension (sparsity_pattern.n_rows(),
+                           static_cast<size_type>(n_global_elements(input_row_map)));
+          AssertDimension (sparsity_pattern.n_cols(),
+                           static_cast<size_type>(n_global_elements(input_col_map)));
+        }
+
+      column_space_map.reset (new Epetra_Map (input_col_map));
+
+      // if we want to exchange data, build a usual Trilinos sparsity pattern
+      // and let that handle the exchange. otherwise, manually create a
+      // CrsGraph, which consumes considerably less memory because it can set
+      // correct number of indices right from the start
+      if (exchange_data)
+        {
+          SparsityPattern trilinos_sparsity;
+          trilinos_sparsity.reinit (input_row_map, input_col_map,
+                                    sparsity_pattern, exchange_data);
+          matrix.reset (new Epetra_FECrsMatrix
+                        (Copy, trilinos_sparsity.trilinos_sparsity_pattern(), false));
+
+          return;
+        }
+
+      const size_type first_row = min_my_gid(input_row_map),
+                      last_row = max_my_gid(input_row_map)+1;
+      std::vector<int> n_entries_per_row(last_row-first_row);
+
+      for (size_type row=first_row; row<last_row; ++row)
+        n_entries_per_row[row-first_row] = sparsity_pattern.row_length(row);
+
+      // The deal.II notation of a Sparsity pattern corresponds to the Epetra
+      // concept of a Graph. Hence, we generate a graph by copying the
+      // sparsity pattern into it, and then build up the matrix from the
+      // graph. This is considerable faster than directly filling elements
+      // into the matrix. Moreover, it consumes less memory, since the
+      // internal reordering is done on ints only, and we can leave the
+      // doubles aside.
+
+      // for more than one processor, need to specify only row map first and
+      // let the matrix entries decide about the column map (which says which
+      // columns are present in the matrix, not to be confused with the
+      // col_map that tells how the domain dofs of the matrix will be
+      // distributed). for only one processor, we can directly assign the
+      // columns as well. Compare this with bug # 4123 in the Sandia Bugzilla.
+      std_cxx11::shared_ptr<Epetra_CrsGraph> graph;
+      if (input_row_map.Comm().NumProc() > 1)
+        graph.reset (new Epetra_CrsGraph (Copy, input_row_map,
+                                          &n_entries_per_row[0], true));
+      else
+        graph.reset (new Epetra_CrsGraph (Copy, input_row_map, input_col_map,
+                                          &n_entries_per_row[0], true));
+
+      // This functions assumes that the sparsity pattern sits on all
+      // processors (completely). The parallel version uses an Epetra graph
+      // that is already distributed.
+
+      // now insert the indices
+      std::vector<TrilinosWrappers::types::int_type>   row_indices;
+
+      for (size_type row=first_row; row<last_row; ++row)
+        {
+          const int row_length = sparsity_pattern.row_length(row);
+          if (row_length == 0)
+            continue;
+
+          row_indices.resize (row_length, -1);
+          {
+            typename SparsityPatternType::iterator p = sparsity_pattern.begin(row);
+            for (size_type col=0; p != sparsity_pattern.end(row); ++p, ++col)
+              row_indices[col] = p->column();
+          }
+          graph->Epetra_CrsGraph::InsertGlobalIndices (row, row_length,
+                                                       &row_indices[0]);
+        }
+
+      // Eventually, optimize the graph structure (sort indices, make memory
+      // contiguous, etc). note that the documentation of the function indeed
+      // states that we first need to provide the column (domain) map and then
+      // the row (range) map
+      graph->FillComplete(input_col_map, input_row_map);
+      graph->OptimizeStorage();
+
+      // check whether we got the number of columns right.
+      AssertDimension (sparsity_pattern.n_cols(),
+                       static_cast<size_type>(n_global_cols(*graph)));
+      (void)n_global_cols;
+
+      // And now finally generate the matrix.
+      matrix.reset (new Epetra_FECrsMatrix(Copy, *graph, false));
+    }
+
+
+
+    // for the non-local graph, we need to circumvent the problem that some
+    // processors will not add into the non-local graph at all: We do not want
+    // to insert dummy elements on >5000 processors because that gets very
+    // slow. Thus, we set a flag in Epetra_CrsGraph that sets the correct
+    // flag. Since it is protected, we need to expose this information by
+    // deriving a class from Epetra_CrsGraph for the purpose of creating the
+    // data structure
+    class Epetra_CrsGraphMod : public Epetra_CrsGraph
+    {
+    public:
+      Epetra_CrsGraphMod (const Epetra_Map &row_map,
+                          const int        *n_entries_per_row)
+        :
+        Epetra_CrsGraph(Copy, row_map, n_entries_per_row, true)
+      {};
+
+      void SetIndicesAreGlobal()
+      {
+        this->Epetra_CrsGraph::SetIndicesAreGlobal(true);
+      }
+    };
+
+
+    // specialization for DynamicSparsityPattern which can provide us with
+    // more information about the non-locally owned rows
+    template <>
+    void
+    reinit_matrix (const Epetra_Map                          &input_row_map,
+                   const Epetra_Map                          &input_col_map,
+                   const DynamicSparsityPattern              &sparsity_pattern,
+                   const bool                                 exchange_data,
+                   std_cxx11::shared_ptr<Epetra_Map>         &column_space_map,
+                   std_cxx11::shared_ptr<Epetra_FECrsMatrix> &matrix,
+                   std_cxx11::shared_ptr<Epetra_CrsMatrix>   &nonlocal_matrix,
+                   std_cxx11::shared_ptr<Epetra_Export>      &nonlocal_matrix_exporter)
+    {
+      matrix.reset();
+      nonlocal_matrix.reset();
+      nonlocal_matrix_exporter.reset();
+
+      AssertDimension (sparsity_pattern.n_rows(),
+                       static_cast<size_type>(n_global_elements(input_row_map)));
+      AssertDimension (sparsity_pattern.n_cols(),
+                       static_cast<size_type>(n_global_elements(input_col_map)));
+
+      column_space_map.reset (new Epetra_Map (input_col_map));
+
+      IndexSet relevant_rows (sparsity_pattern.row_index_set());
+      // serial case
+      if (relevant_rows.size() == 0)
+        {
+          relevant_rows.set_size(n_global_elements(input_row_map));
+          relevant_rows.add_range(0, n_global_elements(input_row_map));
+        }
+      relevant_rows.compress();
+      Assert(relevant_rows.n_elements() >= static_cast<unsigned int>(input_row_map.NumMyElements()),
+             ExcMessage("Locally relevant rows of sparsity pattern must contain "
+                        "all locally owned rows"));
+
+      // check whether the relevant rows correspond to exactly the same map as
+      // the owned rows. In that case, do not create the nonlocal graph and
+      // fill the columns by demand
+      bool have_ghost_rows = false;
+      {
+        std::vector<dealii::types::global_dof_index> indices;
+        relevant_rows.fill_index_vector(indices);
+        Epetra_Map relevant_map (TrilinosWrappers::types::int_type(-1),
+                                 TrilinosWrappers::types::int_type(relevant_rows.n_elements()),
+                                 (indices.empty() ? 0 :
+                                  reinterpret_cast<TrilinosWrappers::types::int_type *>(&indices[0])),
+                                 0, input_row_map.Comm());
+        if (relevant_map.SameAs(input_row_map))
+          have_ghost_rows = false;
+        else
+          have_ghost_rows = true;
+      }
+
+      const unsigned int n_rows = relevant_rows.n_elements();
+      std::vector<TrilinosWrappers::types::int_type> ghost_rows;
+      std::vector<int> n_entries_per_row(input_row_map.NumMyElements());
+      std::vector<int> n_entries_per_ghost_row;
+      for (unsigned int i=0, own=0; i<n_rows; ++i)
+        {
+          const TrilinosWrappers::types::int_type global_row =
+            relevant_rows.nth_index_in_set(i);
+          if (input_row_map.MyGID(global_row))
+            n_entries_per_row[own++] = sparsity_pattern.row_length(global_row);
+          else if (sparsity_pattern.row_length(global_row) > 0)
+            {
+              ghost_rows.push_back(global_row);
+              n_entries_per_ghost_row.push_back(sparsity_pattern.row_length(global_row));
+            }
+        }
+
+      Epetra_Map off_processor_map(-1, ghost_rows.size(),
+                                   (ghost_rows.size()>0)?(&ghost_rows[0]):NULL,
+                                   0, input_row_map.Comm());
+
+      std_cxx11::shared_ptr<Epetra_CrsGraph> graph;
+      std_cxx11::shared_ptr<Epetra_CrsGraphMod> nonlocal_graph;
+      if (input_row_map.Comm().NumProc() > 1)
+        {
+          graph.reset (new Epetra_CrsGraph (Copy, input_row_map,
+                                            (n_entries_per_row.size()>0)?(&n_entries_per_row[0]):NULL,
+                                            exchange_data ? false : true));
+          if (have_ghost_rows == true)
+            nonlocal_graph.reset (new Epetra_CrsGraphMod (off_processor_map,
+                                                          &n_entries_per_ghost_row[0]));
+        }
+      else
+        graph.reset (new Epetra_CrsGraph (Copy, input_row_map, input_col_map,
+                                          (n_entries_per_row.size()>0)?(&n_entries_per_row[0]):NULL,
+                                          true));
+
+      // now insert the indices, select between the right matrix
+      std::vector<TrilinosWrappers::types::int_type> row_indices;
+
+      for (unsigned int i=0; i<n_rows; ++i)
+        {
+          const TrilinosWrappers::types::int_type global_row =
+            relevant_rows.nth_index_in_set(i);
+          const int row_length = sparsity_pattern.row_length(global_row);
+          if (row_length == 0)
+            continue;
+
+          row_indices.resize (row_length, -1);
+          for (int col=0; col < row_length; ++col)
+            row_indices[col] = sparsity_pattern.column_number(global_row, col);
+
+          if (input_row_map.MyGID(global_row))
+            graph->InsertGlobalIndices (global_row, row_length, &row_indices[0]);
+          else
+            {
+              Assert(nonlocal_graph.get() != 0, ExcInternalError());
+              nonlocal_graph->InsertGlobalIndices (global_row, row_length,
+                                                   &row_indices[0]);
+            }
+        }
+
+      // finalize nonlocal graph and create nonlocal matrix
+      if (nonlocal_graph.get() != 0)
+        {
+          // must make sure the IndicesAreGlobal flag is set on all processors
+          // because some processors might not call InsertGlobalIndices (and
+          // we do not want to insert dummy indices on all processors for
+          // large-scale simulations due to the bad impact on performance)
+          nonlocal_graph->SetIndicesAreGlobal();
+          Assert(nonlocal_graph->IndicesAreGlobal() == true,
+                 ExcInternalError());
+          nonlocal_graph->FillComplete(input_col_map, input_row_map);
+          nonlocal_graph->OptimizeStorage();
+
+          // insert data from nonlocal graph into the final sparsity pattern
+          if (exchange_data)
+            {
+              Epetra_Export exporter(nonlocal_graph->RowMap(), input_row_map);
+              int ierr = graph->Export(*nonlocal_graph, exporter, Add);
+              (void)ierr;
+              Assert (ierr==0, ExcTrilinosError(ierr));
+            }
+
+          nonlocal_matrix.reset (new Epetra_CrsMatrix(Copy, *nonlocal_graph));
+        }
+
+      graph->FillComplete(input_col_map, input_row_map);
+      graph->OptimizeStorage();
+
+      AssertDimension (sparsity_pattern.n_cols(),static_cast<size_type>(
+                         n_global_cols(*graph)));
+
+      matrix.reset (new Epetra_FECrsMatrix(Copy, *graph, false));
+    }
+  }
+
+
+
+  template <typename SparsityPatternType>
+  void
+  SparseMatrix::reinit (const SparsityPatternType &sparsity_pattern)
+  {
+    const Epetra_Map rows (static_cast<TrilinosWrappers::types::int_type>(sparsity_pattern.n_rows()),
+                           0,
+                           Utilities::Trilinos::comm_self());
+    const Epetra_Map columns (static_cast<TrilinosWrappers::types::int_type>(sparsity_pattern.n_cols()),
+                              0,
+                              Utilities::Trilinos::comm_self());
+
+    reinit_matrix (rows, columns, sparsity_pattern, false,
+                   column_space_map, matrix, nonlocal_matrix,
+                   nonlocal_matrix_exporter);
+  }
+
+
+
+  template <typename SparsityPatternType>
+  void
+  SparseMatrix::reinit (const Epetra_Map          &input_map,
+                        const SparsityPatternType &sparsity_pattern,
+                        const bool                 exchange_data)
+  {
+    reinit_matrix (input_map, input_map, sparsity_pattern, exchange_data,
+                   column_space_map, matrix, nonlocal_matrix,
+                   nonlocal_matrix_exporter);
+  }
+
+
+
+
+
+
+  template <typename SparsityPatternType>
+  inline
+  void SparseMatrix::reinit (const IndexSet            &row_parallel_partitioning,
+                             const IndexSet            &col_parallel_partitioning,
+                             const SparsityPatternType &sparsity_pattern,
+                             const MPI_Comm            &communicator,
+                             const bool                 exchange_data)
+  {
+    Epetra_Map row_map =
+      row_parallel_partitioning.make_trilinos_map (communicator, false);
+    Epetra_Map col_map =
+      col_parallel_partitioning.make_trilinos_map (communicator, false);
+    reinit_matrix (row_map, col_map, sparsity_pattern, exchange_data,
+                   column_space_map, matrix, nonlocal_matrix,
+                   nonlocal_matrix_exporter);
+
+    // In the end, the matrix needs to be compressed in order to be really
+    // ready.
+    last_action = Zero;
+    compress(VectorOperation::insert);
+  }
+
+
+
+  template <typename SparsityPatternType>
+  inline
+  void SparseMatrix::reinit (const Epetra_Map          &row_map,
+                             const Epetra_Map          &col_map,
+                             const SparsityPatternType &sparsity_pattern,
+                             const bool                 exchange_data)
+  {
+    reinit_matrix (row_map, col_map, sparsity_pattern, exchange_data,
+                   column_space_map, matrix, nonlocal_matrix,
+                   nonlocal_matrix_exporter);
+
+    // In the end, the matrix needs to be compressed in order to be really
+    // ready.
+    last_action = Zero;
+    compress(VectorOperation::insert);
+  }
+
+
+
+
+  void
+  SparseMatrix::reinit (const SparsityPattern &sparsity_pattern)
+  {
+    matrix.reset ();
+    nonlocal_matrix_exporter.reset();
+
+    // reinit with a (parallel) Trilinos sparsity pattern.
+    column_space_map.reset (new Epetra_Map
+                            (sparsity_pattern.domain_partitioner()));
+    matrix.reset (new Epetra_FECrsMatrix
+                  (Copy, sparsity_pattern.trilinos_sparsity_pattern(), false));
+
+    if (sparsity_pattern.nonlocal_graph.get() != 0)
+      nonlocal_matrix.reset (new Epetra_CrsMatrix(Copy, *sparsity_pattern.nonlocal_graph));
+    else
+      nonlocal_matrix.reset ();
+
+    last_action = Zero;
+    compress(VectorOperation::insert);
+  }
+
+
+
+  void
+  SparseMatrix::reinit (const SparseMatrix &sparse_matrix)
+  {
+    if (this == &sparse_matrix)
+      return;
+
+    column_space_map.reset (new Epetra_Map (sparse_matrix.domain_partitioner()));
+    matrix.reset ();
+    nonlocal_matrix_exporter.reset();
+    matrix.reset (new Epetra_FECrsMatrix
+                  (Copy, sparse_matrix.trilinos_sparsity_pattern(), false));
+
+    if (sparse_matrix.nonlocal_matrix != 0)
+      nonlocal_matrix.reset (new Epetra_CrsMatrix
+                             (Copy, sparse_matrix.nonlocal_matrix->Graph()));
+    else
+      nonlocal_matrix.reset();
+
+    last_action = Zero;
+    compress(VectorOperation::insert);
+  }
+
+
+
+  template <typename number>
+  inline
+  void SparseMatrix::reinit (const IndexSet      &row_parallel_partitioning,
+                             const IndexSet      &col_parallel_partitioning,
+                             const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                             const MPI_Comm      &communicator,
+                             const double         drop_tolerance,
+                             const bool           copy_values,
+                             const ::dealii::SparsityPattern *use_this_sparsity)
+  {
+    if (copy_values == false)
+      {
+        // in case we do not copy values, just
+        // call the other function.
+        if (use_this_sparsity == 0)
+          reinit (row_parallel_partitioning, col_parallel_partitioning,
+                  dealii_sparse_matrix.get_sparsity_pattern(),
+                  communicator, false);
+        else
+          reinit (row_parallel_partitioning, col_parallel_partitioning,
+                  *use_this_sparsity, communicator, false);
+        return;
+      }
+
+    const size_type n_rows = dealii_sparse_matrix.m();
+
+    AssertDimension (row_parallel_partitioning.size(), n_rows);
+    AssertDimension (col_parallel_partitioning.size(), dealii_sparse_matrix.n());
+
+    const ::dealii::SparsityPattern &sparsity_pattern =
+      (use_this_sparsity!=0)? *use_this_sparsity :
+      dealii_sparse_matrix.get_sparsity_pattern();
+
+    if (matrix.get() == 0 ||
+        m() != n_rows ||
+        n_nonzero_elements() != sparsity_pattern.n_nonzero_elements())
+      {
+        reinit (row_parallel_partitioning, col_parallel_partitioning,
+                sparsity_pattern, communicator, false);
+      }
+
+    // fill the values. the same as above: go through all rows of the
+    // matrix, and then all columns. since the sparsity patterns of the
+    // input matrix and the specified sparsity pattern might be different,
+    // need to go through the row for both these sparsity structures
+    // simultaneously in order to really set the correct values.
+    size_type maximum_row_length = matrix->MaxNumEntries();
+    std::vector<size_type> row_indices (maximum_row_length);
+    std::vector<TrilinosScalar> values (maximum_row_length);
+
+    for (size_type row=0; row<n_rows; ++row)
+      // see if the row is locally stored on this processor
+      if (row_parallel_partitioning.is_element(row) == true)
+        {
+          ::dealii::SparsityPattern::iterator select_index =
+            sparsity_pattern.begin(row);
+          typename ::dealii::SparseMatrix<number>::const_iterator it =
+            dealii_sparse_matrix.begin(row);
+          size_type col = 0;
+          if (sparsity_pattern.n_rows() == sparsity_pattern.n_cols())
+            {
+              // optimized diagonal
+              AssertDimension(it->column(), row);
+              if (std::fabs(it->value()) > drop_tolerance)
+                {
+                  values[col] = it->value();
+                  row_indices[col++] = it->column();
+                }
+              ++select_index;
+              ++it;
+            }
+
+          while (it != dealii_sparse_matrix.end(row) &&
+                 select_index != sparsity_pattern.end(row))
+            {
+              while (select_index->column() < it->column() &&
+                     select_index != sparsity_pattern.end(row))
+                ++select_index;
+              while (it->column() < select_index->column() &&
+                     it != dealii_sparse_matrix.end(row))
+                ++it;
+
+              if (it == dealii_sparse_matrix.end(row))
+                break;
+              if (std::fabs(it->value()) > drop_tolerance)
+                {
+                  values[col] = it->value();
+                  row_indices[col++] = it->column();
+                }
+              ++select_index;
+              ++it;
+            }
+          set (row, col, reinterpret_cast<size_type *>(&row_indices[0]),
+               &values[0], false);
+        }
+    compress(VectorOperation::insert);
+  }
+
+
+
+  template <typename number>
+  void
+  SparseMatrix::reinit (const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                        const double                          drop_tolerance,
+                        const bool                            copy_values,
+                        const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    reinit (complete_index_set(dealii_sparse_matrix.m()),
+            complete_index_set(dealii_sparse_matrix.n()),
+            dealii_sparse_matrix, MPI_COMM_SELF, drop_tolerance,
+            copy_values, use_this_sparsity);
+  }
+
+
+
+  template <typename number>
+  void
+  SparseMatrix::reinit (const Epetra_Map                     &input_map,
+                        const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                        const double                          drop_tolerance,
+                        const bool                            copy_values,
+                        const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    reinit (IndexSet(input_map), IndexSet(input_map), dealii_sparse_matrix,
+            MPI_COMM_SELF, drop_tolerance, copy_values, use_this_sparsity);
+  }
+
+
+
+  template <typename number>
+  void
+  SparseMatrix::reinit (const Epetra_Map                     &input_row_map,
+                        const Epetra_Map                     &input_col_map,
+                        const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                        const double                          drop_tolerance,
+                        const bool                            copy_values,
+                        const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    reinit (IndexSet(input_row_map), IndexSet(input_col_map),
+            dealii_sparse_matrix, MPI_COMM_SELF,
+            drop_tolerance, copy_values, use_this_sparsity);
+  }
+
+
+
+  void
+  SparseMatrix::reinit (const Epetra_CrsMatrix &input_matrix,
+                        const bool              copy_values)
+  {
+    Assert (input_matrix.Filled()==true,
+            ExcMessage("Input CrsMatrix has not called FillComplete()!"));
+
+    column_space_map.reset (new Epetra_Map (input_matrix.DomainMap()));
+
+    const Epetra_CrsGraph *graph = &input_matrix.Graph();
+
+    nonlocal_matrix.reset();
+    nonlocal_matrix_exporter.reset();
+    matrix.reset ();
+    matrix.reset (new Epetra_FECrsMatrix(Copy, *graph, false));
+
+    matrix->FillComplete (*column_space_map, input_matrix.RangeMap(), true);
+
+    if (copy_values == true)
+      {
+        // point to the first data entry in the two
+        // matrices and copy the content
+        const TrilinosScalar *in_values = input_matrix[0];
+        TrilinosScalar *values = (*matrix)[0];
+        const size_type my_nonzeros = input_matrix.NumMyNonzeros();
+        std::memcpy (&values[0], &in_values[0],
+                     my_nonzeros*sizeof (TrilinosScalar));
+      }
+
+    last_action = Zero;
+    compress(VectorOperation::insert);
+  }
+
+
+
+  void
+  SparseMatrix::compress (::dealii::VectorOperation::values operation)
+  {
+
+    Epetra_CombineMode mode = last_action;
+    if (last_action == Zero)
+      {
+        if ((operation==::dealii::VectorOperation::add) ||
+            (operation==::dealii::VectorOperation::unknown))
+          mode = Add;
+        else if (operation==::dealii::VectorOperation::insert)
+          mode = Insert;
+      }
+    else
+      {
+        Assert(
+          ((last_action == Add) && (operation!=::dealii::VectorOperation::insert))
+          ||
+          ((last_action == Insert) && (operation!=::dealii::VectorOperation::add)),
+          ExcMessage("Operation and argument to compress() do not match"));
+      }
+
+    // flush buffers
+    int ierr;
+    if (nonlocal_matrix.get() != 0 && mode == Add)
+      {
+        // do only export in case of an add() operation, otherwise the owning
+        // processor must have set the correct entry
+        nonlocal_matrix->FillComplete(*column_space_map, matrix->RowMap());
+        if (nonlocal_matrix_exporter.get() == 0)
+          nonlocal_matrix_exporter.reset
+          (new Epetra_Export(nonlocal_matrix->RowMap(), matrix->RowMap()));
+        ierr = matrix->Export(*nonlocal_matrix, *nonlocal_matrix_exporter, mode);
+        AssertThrow(ierr == 0, ExcTrilinosError(ierr));
+        ierr = matrix->FillComplete(*column_space_map, matrix->RowMap());
+        nonlocal_matrix->PutScalar(0);
+      }
+    else
+      ierr = matrix->GlobalAssemble (*column_space_map, matrix->RowMap(),
+                                     true, mode);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = matrix->OptimizeStorage ();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    last_action = Zero;
+
+    compressed = true;
+  }
+
+
+
+  void
+  SparseMatrix::clear ()
+  {
+    // When we clear the matrix, reset
+    // the pointer and generate an
+    // empty matrix.
+    column_space_map.reset (new Epetra_Map (0, 0,
+                                            Utilities::Trilinos::comm_self()));
+    matrix.reset (new Epetra_FECrsMatrix(View, *column_space_map, 0));
+    nonlocal_matrix.reset();
+    nonlocal_matrix_exporter.reset();
+
+    matrix->FillComplete();
+
+    compressed = true;
+  }
+
+
+
+  void
+  SparseMatrix::clear_row (const size_type      row,
+                           const TrilinosScalar new_diag_value)
+  {
+    Assert (matrix->Filled()==true, ExcMatrixNotCompressed());
+
+    // Only do this on the rows owned
+    // locally on this processor.
+    int local_row =
+      matrix->LRID(static_cast<TrilinosWrappers::types::int_type>(row));
+    if (local_row >= 0)
+      {
+        TrilinosScalar *values;
+        int *col_indices;
+        int num_entries;
+        const int ierr = matrix->ExtractMyRowView(local_row, num_entries,
+                                                  values, col_indices);
+        (void)ierr;
+
+        Assert (ierr == 0,
+                ExcTrilinosError(ierr));
+
+        int *diag_find = std::find(col_indices,col_indices+num_entries,local_row);
+        int diag_index = (int)(diag_find - col_indices);
+
+        for (TrilinosWrappers::types::int_type j=0; j<num_entries; ++j)
+          if (diag_index != j || new_diag_value == 0)
+            values[j] = 0.;
+
+        if (diag_find && std::fabs(values[diag_index]) == 0.0 &&
+            new_diag_value != 0.0)
+          values[diag_index] = new_diag_value;
+      }
+  }
+
+
+
+  void
+  SparseMatrix::clear_rows (const std::vector<size_type> &rows,
+                            const TrilinosScalar          new_diag_value)
+  {
+    for (size_type row=0; row<rows.size(); ++row)
+      clear_row(rows[row], new_diag_value);
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::operator() (const size_type i,
+                            const size_type j) const
+  {
+    // Extract local indices in
+    // the matrix.
+    int trilinos_i = matrix->LRID(static_cast<TrilinosWrappers::types::int_type>(i)),
+        trilinos_j = matrix->LCID(static_cast<TrilinosWrappers::types::int_type>(j));
+    TrilinosScalar value = 0.;
+
+    // If the data is not on the
+    // present processor, we throw
+    // an exception. This is one of
+    // the two tiny differences to
+    // the el(i,j) call, which does
+    // not throw any assertions.
+    if (trilinos_i == -1)
+      {
+        Assert (false, ExcAccessToNonLocalElement(i, j, local_range().first,
+                                                  local_range().second));
+      }
+    else
+      {
+        // Check whether the matrix has
+        // already been transformed to local
+        // indices.
+        Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+        // Prepare pointers for extraction
+        // of a view of the row.
+        int nnz_present = matrix->NumMyEntries(trilinos_i);
+        int nnz_extracted;
+        int *col_indices;
+        TrilinosScalar *values;
+
+        // Generate the view and make
+        // sure that we have not generated
+        // an error.
+        // TODO Check that col_indices are int and not long long
+        int ierr = matrix->ExtractMyRowView(trilinos_i, nnz_extracted,
+                                            values, col_indices);
+        (void)ierr;
+        Assert (ierr==0, ExcTrilinosError(ierr));
+
+        Assert (nnz_present == nnz_extracted,
+                ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+        // Search the index where we
+        // look for the value, and then
+        // finally get it.
+
+        int *el_find = std::find(col_indices, col_indices + nnz_present, trilinos_j);
+
+        int local_col_index = (int)(el_find - col_indices);
+
+        // This is actually the only
+        // difference to the el(i,j)
+        // function, which means that
+        // we throw an exception in
+        // this case instead of just
+        // returning zero for an
+        // element that is not present
+        // in the sparsity pattern.
+        if (local_col_index == nnz_present)
+          {
+            Assert (false, ExcInvalidIndex (i,j));
+          }
+        else
+          value = values[local_col_index];
+      }
+
+    return value;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::el (const size_type i,
+                    const size_type j) const
+  {
+    // Extract local indices in
+    // the matrix.
+    int trilinos_i = matrix->LRID(static_cast<TrilinosWrappers::types::int_type>(i)),
+        trilinos_j = matrix->LCID(static_cast<TrilinosWrappers::types::int_type>(j));
+    TrilinosScalar value = 0.;
+
+    // If the data is not on the
+    // present processor, we can't
+    // continue. Just print out zero
+    // as discussed in the
+    // documentation of this
+    // function. if you want error
+    // checking, use operator().
+    if ((trilinos_i == -1 ) || (trilinos_j == -1))
+      return 0.;
+    else
+      {
+        // Check whether the matrix
+        // already is transformed to
+        // local indices.
+        Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+        // Prepare pointers for extraction
+        // of a view of the row.
+        int nnz_present = matrix->NumMyEntries(trilinos_i);
+        int nnz_extracted;
+        int *col_indices;
+        TrilinosScalar *values;
+
+        // Generate the view and make
+        // sure that we have not generated
+        // an error.
+        int ierr = matrix->ExtractMyRowView(trilinos_i, nnz_extracted,
+                                            values, col_indices);
+        (void)ierr;
+        Assert (ierr==0, ExcTrilinosError(ierr));
+
+        Assert (nnz_present == nnz_extracted,
+                ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+        // Search the index where we
+        // look for the value, and then
+        // finally get it.
+        int *el_find = std::find(col_indices, col_indices + nnz_present, trilinos_j);
+
+        int local_col_index = (int)(el_find - col_indices);
+
+
+        // This is actually the only
+        // difference to the () function
+        // querying (i,j), where we throw an
+        // exception instead of just
+        // returning zero for an element
+        // that is not present in the
+        // sparsity pattern.
+        if (local_col_index == nnz_present)
+          value = 0;
+        else
+          value = values[local_col_index];
+      }
+
+    return value;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::diag_element (const size_type i) const
+  {
+    Assert (m() == n(), ExcNotQuadratic());
+
+#ifdef DEBUG
+    // use operator() in debug mode because
+    // it checks if this is a valid element
+    // (in parallel)
+    return operator()(i,i);
+#else
+    // Trilinos doesn't seem to have a
+    // more efficient way to access the
+    // diagonal than by just using the
+    // standard el(i,j) function.
+    return el(i,i);
+#endif
+  }
+
+
+
+  unsigned int
+  SparseMatrix::row_length (const size_type row) const
+  {
+    Assert (row < m(), ExcInternalError());
+
+    // get a representation of the
+    // present row
+    int ncols = -1;
+    int local_row = matrix->LRID(static_cast<TrilinosWrappers::types::int_type>(row));
+
+    // on the processor who owns this
+    // row, we'll have a non-negative
+    // value.
+    if (local_row >= 0)
+      {
+        int ierr = matrix->NumMyRowEntries (local_row, ncols);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+
+    return ncols;
+  }
+
+
+
+  void
+  SparseMatrix::set (const std::vector<size_type>     &row_indices,
+                     const std::vector<size_type>     &col_indices,
+                     const FullMatrix<TrilinosScalar> &values,
+                     const bool                        elide_zero_values)
+  {
+    Assert (row_indices.size() == values.m(),
+            ExcDimensionMismatch(row_indices.size(), values.m()));
+    Assert (col_indices.size() == values.n(),
+            ExcDimensionMismatch(col_indices.size(), values.n()));
+
+    for (size_type i=0; i<row_indices.size(); ++i)
+      set (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  void
+  SparseMatrix::set (const size_type                    row,
+                     const std::vector<size_type>      &col_indices,
+                     const std::vector<TrilinosScalar> &values,
+                     const bool                         elide_zero_values)
+  {
+    Assert (col_indices.size() == values.size(),
+            ExcDimensionMismatch(col_indices.size(), values.size()));
+
+    set (row, col_indices.size(), &col_indices[0], &values[0],
+         elide_zero_values);
+  }
+
+
+
+  void
+  SparseMatrix::set (const size_type       row,
+                     const size_type       n_cols,
+                     const size_type      *col_indices,
+                     const TrilinosScalar *values,
+                     const bool            elide_zero_values)
+  {
+    AssertIndexRange(row, this->m());
+
+    int ierr;
+    if (last_action == Add)
+      {
+        ierr = matrix->GlobalAssemble (*column_space_map, matrix->RowMap(),
+                                       true);
+
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+      }
+
+    last_action = Insert;
+
+    TrilinosWrappers::types::int_type *col_index_ptr;
+    TrilinosScalar *col_value_ptr;
+    TrilinosWrappers::types::int_type n_columns;
+
+    TrilinosScalar short_val_array[100];
+    TrilinosWrappers::types::int_type short_index_array[100];
+    std::vector<TrilinosScalar> long_val_array;
+    std::vector<TrilinosWrappers::types::int_type> long_index_array;
+
+
+    // If we don't elide zeros, the pointers are already available... need to
+    // cast to non-const pointers as that is the format taken by Trilinos (but
+    // we will not modify const data)
+    if (elide_zero_values == false)
+      {
+        col_index_ptr = (TrilinosWrappers::types::int_type *)col_indices;
+        col_value_ptr = const_cast<TrilinosScalar *>(values);
+        n_columns = n_cols;
+      }
+    else
+      {
+        // Otherwise, extract nonzero values in each row and get the
+        // respective indices.
+        if (n_cols > 100)
+          {
+            long_val_array.resize(n_cols);
+            long_index_array.resize(n_cols);
+            col_index_ptr = &long_index_array[0];
+            col_value_ptr = &long_val_array[0];
+          }
+        else
+          {
+            col_index_ptr = &short_index_array[0];
+            col_value_ptr = &short_val_array[0];
+          }
+
+        n_columns = 0;
+        for (size_type j=0; j<n_cols; ++j)
+          {
+            const double value = values[j];
+            AssertIsFinite(value);
+            if (value != 0)
+              {
+                col_index_ptr[n_columns] = col_indices[j];
+                col_value_ptr[n_columns] = value;
+                n_columns++;
+              }
+          }
+
+        Assert(n_columns <= (TrilinosWrappers::types::int_type)n_cols, ExcInternalError());
+      }
+
+
+    // If the calling matrix owns the row to which we want to insert values,
+    // we can directly call the Epetra_CrsMatrix input function, which is much
+    // faster than the Epetra_FECrsMatrix function. We distinguish between two
+    // cases: the first one is when the matrix is not filled (i.e., it is
+    // possible to add new elements to the sparsity pattern), and the second
+    // one is when the pattern is already fixed. In the former case, we add
+    // the possibility to insert new values, and in the second we just replace
+    // data.
+    if (matrix->RowMap().MyGID(static_cast<TrilinosWrappers::types::int_type>(row)) == true)
+      {
+        if (matrix->Filled() == false)
+          {
+            ierr = matrix->Epetra_CrsMatrix::InsertGlobalValues(
+                     static_cast<TrilinosWrappers::types::int_type>(row),
+                     static_cast<int>(n_columns),const_cast<double *>(col_value_ptr),
+                     col_index_ptr);
+
+            // When inserting elements, we do not want to create exceptions in
+            // the case when inserting non-local data (since that's what we
+            // want to do right now).
+            if (ierr > 0)
+              ierr = 0;
+          }
+        else
+          ierr = matrix->Epetra_CrsMatrix::ReplaceGlobalValues(row, n_columns,
+                                                               col_value_ptr,
+                                                               col_index_ptr);
+      }
+    else
+      {
+        // When we're at off-processor data, we have to stick with the
+        // standard Insert/ReplaceGlobalValues function. Nevertheless, the way
+        // we call it is the fastest one (any other will lead to repeated
+        // allocation and deallocation of memory in order to call the function
+        // we already use, which is very inefficient if writing one element at
+        // a time).
+        compressed = false;
+
+        if (matrix->Filled() == false)
+          {
+            ierr = matrix->InsertGlobalValues (1,
+                                               (TrilinosWrappers::types::int_type *)&row,
+                                               n_columns, col_index_ptr,
+                                               &col_value_ptr,
+                                               Epetra_FECrsMatrix::ROW_MAJOR);
+            if (ierr > 0)
+              ierr = 0;
+          }
+        else
+          ierr = matrix->ReplaceGlobalValues (1,
+                                              (TrilinosWrappers::types::int_type *)&row,
+                                              n_columns, col_index_ptr,
+                                              &col_value_ptr,
+                                              Epetra_FECrsMatrix::ROW_MAJOR);
+        // use the FECrsMatrix facilities for set even in the case when we
+        // have explicitly set the off-processor rows because that only works
+        // properly when adding elements, not when setting them (since we want
+        // to only touch elements that have been set explicitly, and there is
+        // no way on the receiving processor to identify them otherwise)
+      }
+
+    Assert (ierr <= 0, ExcAccessToNonPresentElement(row, col_index_ptr[0]));
+    AssertThrow (ierr >= 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  void
+  SparseMatrix::add (const std::vector<size_type>     &indices,
+                     const FullMatrix<TrilinosScalar> &values,
+                     const bool                        elide_zero_values)
+  {
+    Assert (indices.size() == values.m(),
+            ExcDimensionMismatch(indices.size(), values.m()));
+    Assert (values.m() == values.n(), ExcNotQuadratic());
+
+    for (size_type i=0; i<indices.size(); ++i)
+      add (indices[i], indices.size(), &indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  void
+  SparseMatrix::add (const std::vector<size_type>  &row_indices,
+                     const std::vector<size_type>  &col_indices,
+                     const FullMatrix<TrilinosScalar> &values,
+                     const bool                        elide_zero_values)
+  {
+    Assert (row_indices.size() == values.m(),
+            ExcDimensionMismatch(row_indices.size(), values.m()));
+    Assert (col_indices.size() == values.n(),
+            ExcDimensionMismatch(col_indices.size(), values.n()));
+
+    for (size_type i=0; i<row_indices.size(); ++i)
+      add (row_indices[i], col_indices.size(), &col_indices[0],
+           &values(i,0), elide_zero_values);
+  }
+
+
+
+  void
+  SparseMatrix::add (const size_type                    row,
+                     const std::vector<size_type>      &col_indices,
+                     const std::vector<TrilinosScalar> &values,
+                     const bool                         elide_zero_values)
+  {
+    Assert (col_indices.size() == values.size(),
+            ExcDimensionMismatch(col_indices.size(), values.size()));
+
+    add (row, col_indices.size(), &col_indices[0], &values[0],
+         elide_zero_values);
+  }
+
+
+
+  void
+  SparseMatrix::add (const size_type       row,
+                     const size_type       n_cols,
+                     const size_type      *col_indices,
+                     const TrilinosScalar *values,
+                     const bool            elide_zero_values,
+                     const bool            /*col_indices_are_sorted*/)
+  {
+    AssertIndexRange(row, this->m());
+    int ierr;
+    if (last_action == Insert)
+      {
+        // TODO: this could lead to a dead lock when only one processor
+        // calls GlobalAssemble.
+        ierr = matrix->GlobalAssemble(*column_space_map,
+                                      matrix->RowMap(), false);
+
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+
+    last_action = Add;
+
+    TrilinosWrappers::types::int_type *col_index_ptr;
+    TrilinosScalar *col_value_ptr;
+    TrilinosWrappers::types::int_type n_columns;
+
+    double short_val_array[100];
+    TrilinosWrappers::types::int_type short_index_array[100];
+    std::vector<TrilinosScalar> long_val_array;
+    std::vector<TrilinosWrappers::types::int_type> long_index_array;
+
+    // If we don't elide zeros, the pointers are already available... need to
+    // cast to non-const pointers as that is the format taken by Trilinos (but
+    // we will not modify const data)
+    if (elide_zero_values == false)
+      {
+        col_index_ptr = (TrilinosWrappers::types::int_type *)col_indices;
+        col_value_ptr = const_cast<TrilinosScalar *>(values);
+        n_columns = n_cols;
+#ifdef DEBUG
+        for (size_type j=0; j<n_cols; ++j)
+          AssertIsFinite(values[j]);
+#endif
+      }
+    else
+      {
+        // Otherwise, extract nonzero values in each row and the corresponding
+        // index.
+        if (n_cols > 100)
+          {
+            long_val_array.resize(n_cols);
+            long_index_array.resize(n_cols);
+            col_index_ptr = &long_index_array[0];
+            col_value_ptr = &long_val_array[0];
+          }
+        else
+          {
+            col_index_ptr = &short_index_array[0];
+            col_value_ptr = &short_val_array[0];
+          }
+
+        n_columns = 0;
+        for (size_type j=0; j<n_cols; ++j)
+          {
+            const double value = values[j];
+
+            AssertIsFinite(value);
+            if (value != 0)
+              {
+                col_index_ptr[n_columns] = col_indices[j];
+                col_value_ptr[n_columns] = value;
+                n_columns++;
+              }
+          }
+
+        Assert(n_columns <= (TrilinosWrappers::types::int_type)n_cols, ExcInternalError());
+
+      }
+
+    // If the calling processor owns the row to which we want to add values, we
+    // can directly call the Epetra_CrsMatrix input function, which is much
+    // faster than the Epetra_FECrsMatrix function.
+    if (matrix->RowMap().MyGID(static_cast<TrilinosWrappers::types::int_type>(row)) == true)
+      {
+        ierr = matrix->Epetra_CrsMatrix::SumIntoGlobalValues(row, n_columns,
+                                                             col_value_ptr,
+                                                             col_index_ptr);
+      }
+    else if (nonlocal_matrix.get() != 0)
+      {
+        compressed = false;
+        // this is the case when we have explicitly set the off-processor rows
+        // and want to create a separate matrix object for them (to retain
+        // thread-safety)
+        Assert (nonlocal_matrix->RowMap().LID(static_cast<TrilinosWrappers::types::int_type>(row)) != -1,
+                ExcMessage("Attempted to write into off-processor matrix row "
+                           "that has not be specified as being writable upon "
+                           "initialization"));
+        ierr = nonlocal_matrix->SumIntoGlobalValues(row, n_columns,
+                                                    col_value_ptr,
+                                                    col_index_ptr);
+      }
+    else
+      {
+        // When we're at off-processor data, we have to stick with the
+        // standard SumIntoGlobalValues function. Nevertheless, the way we
+        // call it is the fastest one (any other will lead to repeated
+        // allocation and deallocation of memory in order to call the function
+        // we already use, which is very inefficient if writing one element at
+        // a time).
+        compressed = false;
+
+        ierr = matrix->SumIntoGlobalValues (1,
+                                            (TrilinosWrappers::types::int_type *)&row, n_columns,
+                                            col_index_ptr,
+                                            &col_value_ptr,
+                                            Epetra_FECrsMatrix::ROW_MAJOR);
+      }
+
+#ifdef DEBUG
+    if (ierr > 0)
+      {
+        std::cout << "------------------------------------------"
+                  << std::endl;
+        std::cout << "Got error " << ierr << " in row " << row
+                  << " of proc " << matrix->RowMap().Comm().MyPID()
+                  << " when trying to add the columns:" << std::endl;
+        for (TrilinosWrappers::types::int_type i=0; i<n_columns; ++i)
+          std::cout << col_index_ptr[i] << " ";
+        std::cout << std::endl << std::endl;
+        std::cout << "Matrix row "
+                  << (matrix->RowMap().MyGID(static_cast<TrilinosWrappers::types::int_type>(row)) == false ? "(nonlocal part)" : "")
+                  << " has the following indices:" << std::endl;
+        std::vector<TrilinosWrappers::types::int_type> indices;
+        const Epetra_CrsGraph *graph =
+          (nonlocal_matrix.get() != 0 &&
+           matrix->RowMap().MyGID(static_cast<TrilinosWrappers::types::int_type>(row)) == false) ?
+          &nonlocal_matrix->Graph() : &matrix->Graph();
+
+        indices.resize(graph->NumGlobalIndices(static_cast<TrilinosWrappers::types::int_type>(row)));
+        int n_indices = 0;
+        graph->ExtractGlobalRowCopy(static_cast<TrilinosWrappers::types::int_type>(row),
+                                    indices.size(), n_indices, &indices[0]);
+        AssertDimension(static_cast<unsigned int>(n_indices), indices.size());
+
+        for (TrilinosWrappers::types::int_type i=0; i<n_indices; ++i)
+          std::cout << indices[i] << " ";
+        std::cout << std::endl << std::endl;
+        Assert (ierr <= 0,
+                ExcAccessToNonPresentElement(row, col_index_ptr[0]));
+      }
+#endif
+    Assert (ierr >= 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  SparseMatrix &
+  SparseMatrix::operator = (const double d)
+  {
+    Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+    compress (::dealii::VectorOperation::unknown); // TODO: why do we do this? Should we not check for is_compressed?
+
+    const int ierr = matrix->PutScalar(d);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    if (nonlocal_matrix.get() != 0)
+      nonlocal_matrix->PutScalar(d);
+
+    return *this;
+  }
+
+
+
+  void
+  SparseMatrix::add (const TrilinosScalar  factor,
+                     const SparseMatrix   &rhs)
+  {
+    AssertDimension (rhs.m(), m());
+    AssertDimension (rhs.n(), n());
+    AssertDimension (rhs.local_range().first, local_range().first);
+    AssertDimension (rhs.local_range().second, local_range().second);
+    Assert(matrix->RowMap().SameAs(rhs.matrix->RowMap()),
+           ExcMessage("Can only add matrices with same distribution of rows"));
+    Assert(matrix->Filled() && rhs.matrix->Filled(),
+           ExcMessage("Addition of matrices only allowed if matrices are "
+                      "filled, i.e., compress() has been called"));
+
+    const std::pair<size_type, size_type>
+    local_range = rhs.local_range();
+    const bool same_col_map = matrix->ColMap().SameAs(rhs.matrix->ColMap());
+
+    int ierr;
+    for (size_type row=local_range.first; row < local_range.second; ++row)
+      {
+        const int row_local =
+          matrix->RowMap().LID(static_cast<TrilinosWrappers::types::int_type>(row));
+
+        // First get a view to the matrix columns of both matrices. Note that
+        // the data is in local index spaces so we need to be careful not only
+        // to compare column indices in case they are derived from the same
+        // map.
+        int n_entries, rhs_n_entries;
+        TrilinosScalar *value_ptr, *rhs_value_ptr;
+        int *index_ptr, *rhs_index_ptr;
+        ierr = rhs.matrix->ExtractMyRowView (row_local, rhs_n_entries,
+                                             rhs_value_ptr, rhs_index_ptr);
+        (void)ierr;
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+
+        ierr = matrix->ExtractMyRowView (row_local, n_entries, value_ptr,
+                                         index_ptr);
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+        bool expensive_checks = (n_entries != rhs_n_entries || !same_col_map);
+        if (!expensive_checks)
+          {
+            // check if the column indices are the same. If yes, can simply
+            // copy over the data.
+            expensive_checks = std::memcmp(static_cast<void *>(index_ptr),
+                                           static_cast<void *>(rhs_index_ptr),
+                                           sizeof(int)*n_entries) != 0;
+            if (!expensive_checks)
+              for (int i=0; i<n_entries; ++i)
+                value_ptr[i] += rhs_value_ptr[i] * factor;
+          }
+        // Now to the expensive case where we need to check all column indices
+        // against each other (transformed into global index space) and where
+        // we need to make sure that all entries we are about to add into the
+        // lhs matrix actually exist
+        if (expensive_checks)
+          {
+            for (int i=0; i<rhs_n_entries; ++i)
+              {
+                if (rhs_value_ptr[i] == 0.)
+                  continue;
+                const TrilinosWrappers::types::int_type rhs_global_col =
+                  global_column_index(*rhs.matrix, rhs_index_ptr[i]);
+                int local_col = matrix->ColMap().LID(rhs_global_col);
+                int *local_index = Utilities::lower_bound(index_ptr,
+                                                          index_ptr+n_entries,
+                                                          local_col);
+                Assert(local_index != index_ptr + n_entries &&
+                       *local_index == local_col,
+                       ExcMessage("Adding the entries from the other matrix "
+                                  "failed, because the sparsity pattern "
+                                  "of that matrix includes more elements than the "
+                                  "calling matrix, which is not allowed."));
+                value_ptr[local_index-index_ptr] += factor * rhs_value_ptr[i];
+              }
+          }
+      }
+  }
+
+
+
+  void
+  SparseMatrix::transpose ()
+  {
+    // This only flips a flag that tells
+    // Trilinos that any vmult operation
+    // should be done with the
+    // transpose. However, the matrix
+    // structure is not reset.
+    int ierr;
+
+    if (!matrix->UseTranspose())
+      {
+        ierr = matrix->SetUseTranspose (true);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+    else
+      {
+        ierr = matrix->SetUseTranspose (false);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+  }
+
+
+
+  SparseMatrix &
+  SparseMatrix::operator *= (const TrilinosScalar a)
+  {
+    const int ierr = matrix->Scale (a);
+    Assert (ierr == 0, ExcTrilinosError(ierr));
+    (void)ierr; // removes -Wunused-variable in optimized mode
+
+    return *this;
+  }
+
+
+
+  SparseMatrix &
+  SparseMatrix::operator /= (const TrilinosScalar a)
+  {
+    Assert (a !=0, ExcDivideByZero());
+
+    const TrilinosScalar factor = 1./a;
+
+    const int ierr = matrix->Scale (factor);
+    Assert (ierr == 0, ExcTrilinosError(ierr));
+    (void)ierr; // removes -Wunused-variable in optimized mode
+
+    return *this;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::l1_norm () const
+  {
+    Assert (matrix->Filled(), ExcMatrixNotCompressed());
+    return matrix->NormOne();
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::linfty_norm () const
+  {
+    Assert (matrix->Filled(), ExcMatrixNotCompressed());
+    return matrix->NormInf();
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::frobenius_norm () const
+  {
+    Assert (matrix->Filled(), ExcMatrixNotCompressed());
+    return matrix->NormFrobenius();
+  }
+
+
+
+  namespace internal
+  {
+    namespace SparseMatrix
+    {
+      template <typename VectorType>
+      inline
+      void check_vector_map_equality(const Epetra_CrsMatrix &,
+                                     const VectorType &,
+                                     const VectorType &)
+      {
+      }
+
+      inline
+      void check_vector_map_equality(const Epetra_CrsMatrix              &m,
+                                     const TrilinosWrappers::MPI::Vector &in,
+                                     const TrilinosWrappers::MPI::Vector &out)
+      {
+        Assert (in.vector_partitioner().SameAs(m.DomainMap()) == true,
+                ExcMessage ("Column map of matrix does not fit with vector map!"));
+        Assert (out.vector_partitioner().SameAs(m.RangeMap()) == true,
+                ExcMessage ("Row map of matrix does not fit with vector map!"));
+        (void)m;
+        (void)in;
+        (void)out;
+      }
+    }
+  }
+
+
+  template <typename VectorType>
+  void
+  SparseMatrix::vmult (VectorType       &dst,
+                       const VectorType &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+    Assert (matrix->Filled(), ExcMatrixNotCompressed());
+    (void)src;
+    (void)dst;
+
+    internal::SparseMatrix::check_vector_map_equality(*matrix, src, dst);
+    const size_type dst_local_size = dst.end() - dst.begin();
+    AssertDimension (dst_local_size, static_cast<size_type>(matrix->RangeMap().NumMyPoints()));
+    const size_type src_local_size = src.end() - src.begin();
+    AssertDimension (src_local_size, static_cast<size_type>(matrix->DomainMap().NumMyPoints()));
+
+    Epetra_MultiVector tril_dst (View, matrix->RangeMap(), dst.begin(),
+                                 dst_local_size, 1);
+    Epetra_MultiVector tril_src (View, matrix->DomainMap(),
+                                 const_cast<TrilinosScalar *>(src.begin()),
+                                 src_local_size, 1);
+
+    const int ierr = matrix->Multiply (false, tril_src, tril_dst);
+    Assert (ierr == 0, ExcTrilinosError(ierr));
+    (void)ierr; // removes -Wunused-variable in optimized mode
+  }
+
+
+
+  template <typename VectorType>
+  void
+  SparseMatrix::Tvmult (VectorType       &dst,
+                        const VectorType &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+    Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+    internal::SparseMatrix::check_vector_map_equality(*matrix, dst, src);
+    const size_type dst_local_size = dst.end() - dst.begin();
+    AssertDimension (dst_local_size, static_cast<size_type>(matrix->DomainMap().NumMyPoints()));
+    const size_type src_local_size = src.end() - src.begin();
+    AssertDimension (src_local_size, static_cast<size_type>(matrix->RangeMap().NumMyPoints()));
+
+    Epetra_MultiVector tril_dst (View, matrix->DomainMap(), dst.begin(),
+                                 dst_local_size, 1);
+    Epetra_MultiVector tril_src (View, matrix->RangeMap(),
+                                 const_cast<double *>(src.begin()),
+                                 src_local_size, 1);
+
+    const int ierr = matrix->Multiply (true, tril_src, tril_dst);
+    Assert (ierr == 0, ExcTrilinosError(ierr));
+    (void)ierr; // removes -Wunused-variable in optimized mode
+  }
+
+
+
+  template <typename VectorType>
+  void
+  SparseMatrix::vmult_add (VectorType       &dst,
+                           const VectorType &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    // Reinit a temporary vector with fast argument set, which does not
+    // overwrite the content (to save time). However, the
+    // TrilinosWrappers::Vector classes do not support this, so create a
+    // deal.II local vector that has this fast setting. It will be accepted in
+    // vmult because it only checks the local size.
+    dealii::Vector<TrilinosScalar> temp_vector;
+    temp_vector.reinit(dst.end()-dst.begin(), true);
+    dealii::VectorView<TrilinosScalar> src_view(src.end()-src.begin(), src.begin());
+    dealii::VectorView<TrilinosScalar> dst_view(dst.end()-dst.begin(), dst.begin());
+    vmult (temp_vector, static_cast<const dealii::Vector<TrilinosScalar>&>(src_view));
+    if (dst_view.size() > 0)
+      dst_view += temp_vector;
+  }
+
+
+
+  template <typename VectorType>
+  void
+  SparseMatrix::Tvmult_add (VectorType       &dst,
+                            const VectorType &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    // Reinit a temporary vector with fast argument set, which does not
+    // overwrite the content (to save time). However, the
+    // TrilinosWrappers::Vector classes do not support this, so create a
+    // deal.II local vector that has this fast setting. It will be accepted in
+    // vmult because it only checks the local size.
+    dealii::Vector<TrilinosScalar> temp_vector;
+    temp_vector.reinit(dst.end()-dst.begin(), true);
+    dealii::VectorView<TrilinosScalar> src_view(src.end()-src.begin(), src.begin());
+    dealii::VectorView<TrilinosScalar> dst_view(dst.end()-dst.begin(), dst.begin());
+    Tvmult (temp_vector, static_cast<const dealii::Vector<TrilinosScalar>&>(src_view));
+    if (dst_view.size() > 0)
+      dst_view += temp_vector;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::matrix_norm_square (const VectorBase &v) const
+  {
+    Assert (matrix->RowMap().SameAs(matrix->DomainMap()),
+            ExcNotQuadratic());
+
+    VectorBase temp_vector;
+    temp_vector.reinit(v, true);
+
+    vmult (temp_vector, v);
+    return temp_vector*v;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::matrix_scalar_product (const VectorBase &u,
+                                       const VectorBase &v) const
+  {
+    Assert (matrix->RowMap().SameAs(matrix->DomainMap()),
+            ExcNotQuadratic());
+
+    VectorBase temp_vector;
+    temp_vector.reinit(v, true);
+
+    vmult (temp_vector, v);
+    return u*temp_vector;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::residual (VectorBase       &dst,
+                          const VectorBase &x,
+                          const VectorBase &b) const
+  {
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1.;
+
+    return dst.l2_norm();
+  }
+
+
+
+  namespace internals
+  {
+    typedef dealii::types::global_dof_index size_type;
+
+    void perform_mmult (const SparseMatrix &inputleft,
+                        const SparseMatrix &inputright,
+                        SparseMatrix       &result,
+                        const VectorBase   &V,
+                        const bool          transpose_left)
+    {
+#ifdef DEAL_II_WITH_64BIT_INDICES
+      Assert(false,ExcNotImplemented())
+#endif
+      const bool use_vector = (V.size() == inputright.m() ? true : false);
+      if (transpose_left == false)
+        {
+          Assert (inputleft.n() == inputright.m(),
+                  ExcDimensionMismatch(inputleft.n(), inputright.m()));
+          Assert (inputleft.domain_partitioner().SameAs(inputright.range_partitioner()),
+                  ExcMessage ("Parallel partitioning of A and B does not fit."));
+        }
+      else
+        {
+          Assert (inputleft.m() == inputright.m(),
+                  ExcDimensionMismatch(inputleft.m(), inputright.m()));
+          Assert (inputleft.range_partitioner().SameAs(inputright.range_partitioner()),
+                  ExcMessage ("Parallel partitioning of A and B does not fit."));
+        }
+
+      result.clear();
+
+      // create a suitable operator B: in case
+      // we do not use a vector, all we need to
+      // do is to set the pointer. Otherwise,
+      // we insert the data from B, but
+      // multiply each row with the respective
+      // vector element.
+      Teuchos::RCP<Epetra_CrsMatrix> mod_B;
+      if (use_vector == false)
+        {
+          mod_B = Teuchos::rcp(const_cast<Epetra_CrsMatrix *>
+                               (&inputright.trilinos_matrix()),
+                               false);
+        }
+      else
+        {
+          mod_B = Teuchos::rcp(new Epetra_CrsMatrix
+                               (Copy, inputright.trilinos_sparsity_pattern()),
+                               true);
+          mod_B->FillComplete(inputright.domain_partitioner(),
+                              inputright.range_partitioner());
+          Assert (inputright.local_range() == V.local_range(),
+                  ExcMessage ("Parallel distribution of matrix B and vector V "
+                              "does not match."));
+
+          const int local_N = inputright.local_size();
+          for (int i=0; i<local_N; ++i)
+            {
+              int N_entries = -1;
+              double *new_data, *B_data;
+              mod_B->ExtractMyRowView (i, N_entries, new_data);
+              inputright.trilinos_matrix().ExtractMyRowView (i, N_entries, B_data);
+              double value = V.trilinos_vector()[0][i];
+              for (TrilinosWrappers::types::int_type j=0; j<N_entries; ++j)
+                new_data[j] = value * B_data[j];
+            }
+        }
+
+      // use ML built-in method for performing
+      // the matrix-matrix product.
+      // create ML operators on top of the
+      // Epetra matrices. if we use a
+      // transposed matrix, let ML know it
+      ML_Comm *comm;
+      ML_Comm_Create(&comm);
+#ifdef ML_MPI
+      const Epetra_MpiComm *epcomm = dynamic_cast<const Epetra_MpiComm *>(&(inputleft.trilinos_matrix().Comm()));
+      // Get the MPI communicator, as it may not be MPI_COMM_W0RLD, and update the ML comm object
+      if (epcomm) ML_Comm_Set_UsrComm(comm,epcomm->Comm());
+#endif
+      ML_Operator *A_ = ML_Operator_Create(comm);
+      ML_Operator *B_ = ML_Operator_Create(comm);
+      ML_Operator *C_ = ML_Operator_Create(comm);
+      SparseMatrix transposed_mat;
+
+      if (transpose_left == false)
+        ML_Operator_WrapEpetraCrsMatrix
+        (const_cast<Epetra_CrsMatrix *>(&inputleft.trilinos_matrix()),A_,
+         false);
+      else
+        {
+          // create transposed matrix
+          SparsityPattern sparsity_transposed (inputleft.domain_partitioner(),
+                                               inputleft.range_partitioner());
+          Assert (inputleft.domain_partitioner().LinearMap() == true,
+                  ExcMessage("Matrix must be partitioned contiguously between procs."));
+          for (unsigned int i=0; i<inputleft.local_size(); ++i)
+            {
+              int num_entries, * indices;
+              inputleft.trilinos_sparsity_pattern().ExtractMyRowView(i, num_entries,
+                                                                     indices);
+              Assert (num_entries >= 0, ExcInternalError());
+#ifndef DEAL_II_WITH_64BIT_INDICES
+              const size_type GID = inputleft.trilinos_matrix().RowMap().GID(i);
+              for (TrilinosWrappers::types::int_type j=0; j<num_entries; ++j)
+                sparsity_transposed.add (inputleft.trilinos_matrix().ColMap().GID(indices[j]),
+                                         GID);
+#else
+              const size_type GID = inputleft.trilinos_matrix().RowMap().GID64(i);
+              for (TrilinosWrappers::types::int_type j=0; j<num_entries; ++j)
+                sparsity_transposed.add (inputleft.trilinos_matrix().ColMap().GID64(indices[j]),
+                                         GID);
+#endif
+            }
+
+          sparsity_transposed.compress();
+          transposed_mat.reinit (sparsity_transposed);
+          for (unsigned int i=0; i<inputleft.local_size(); ++i)
+            {
+              int num_entries, * indices;
+              double *values;
+              inputleft.trilinos_matrix().ExtractMyRowView(i, num_entries,
+                                                           values, indices);
+              Assert (num_entries >= 0, ExcInternalError());
+#ifndef DEAL_II_WITH_64BIT_INDICES
+              const size_type GID = inputleft.trilinos_matrix().RowMap().GID(i);
+              for (TrilinosWrappers::types::int_type j=0; j<num_entries; ++j)
+                transposed_mat.set (inputleft.trilinos_matrix().ColMap().GID(indices[j]),
+                                    GID, values[j]);
+#else
+              const size_type GID = inputleft.trilinos_matrix().RowMap().GID64(i);
+              for (TrilinosWrappers::types::int_type j=0; j<num_entries; ++j)
+                transposed_mat.set (inputleft.trilinos_matrix().ColMap().GID64(indices[j]),
+                                    GID, values[j]);
+#endif
+            }
+          transposed_mat.compress(VectorOperation::insert);
+          ML_Operator_WrapEpetraCrsMatrix
+          (const_cast<Epetra_CrsMatrix *>(&transposed_mat.trilinos_matrix()),
+           A_,false);
+        }
+      ML_Operator_WrapEpetraCrsMatrix(mod_B.get(),B_,false);
+
+      // We implement the multiplication by
+      // hand in a similar way as is done in
+      // ml/src/Operator/ml_rap.c for a triple
+      // matrix product. This means that the
+      // code is very similar to the one found
+      // in ml/src/Operator/ml_rap.c
+
+      // import data if necessary
+      ML_Operator *Btmp, *Ctmp, *Ctmp2, *tptr;
+      ML_CommInfoOP *getrow_comm;
+      int max_per_proc;
+      TrilinosWrappers::types::int_type N_input_vector = B_->invec_leng;
+      getrow_comm = B_->getrow->pre_comm;
+      if ( getrow_comm != NULL)
+        for (TrilinosWrappers::types::int_type i = 0; i < getrow_comm->N_neighbors; i++)
+          for (TrilinosWrappers::types::int_type j = 0; j < getrow_comm->neighbors[i].N_send; j++)
+            AssertThrow (getrow_comm->neighbors[i].send_list[j] < N_input_vector,
+                         ExcInternalError());
+
+      ML_create_unique_col_id(N_input_vector, &(B_->getrow->loc_glob_map),
+                              getrow_comm, &max_per_proc, B_->comm);
+      B_->getrow->use_loc_glob_map = ML_YES;
+      if (A_->getrow->pre_comm != NULL)
+        ML_exchange_rows( B_, &Btmp, A_->getrow->pre_comm);
+      else Btmp = B_;
+
+      // perform matrix-matrix product
+      ML_matmat_mult(A_, Btmp , &Ctmp);
+
+      // release temporary structures we needed
+      // for multiplication
+      ML_free(B_->getrow->loc_glob_map);
+      B_->getrow->loc_glob_map = NULL;
+      B_->getrow->use_loc_glob_map = ML_NO;
+      if (A_->getrow->pre_comm != NULL)
+        {
+          tptr = Btmp;
+          while ( (tptr!= NULL) && (tptr->sub_matrix != B_))
+            tptr = tptr->sub_matrix;
+          if (tptr != NULL) tptr->sub_matrix = NULL;
+          ML_RECUR_CSR_MSRdata_Destroy(Btmp);
+          ML_Operator_Destroy(&Btmp);
+        }
+
+      // make correct data structures
+      if (A_->getrow->post_comm != NULL)
+        ML_exchange_rows(Ctmp, &Ctmp2, A_->getrow->post_comm);
+      else
+        Ctmp2 = Ctmp;
+
+      ML_back_to_csrlocal(Ctmp2, C_, max_per_proc);
+
+      ML_RECUR_CSR_MSRdata_Destroy (Ctmp);
+      ML_Operator_Destroy (&Ctmp);
+
+      if (A_->getrow->post_comm != NULL)
+        {
+          ML_RECUR_CSR_MSRdata_Destroy(Ctmp2);
+          ML_Operator_Destroy (&Ctmp2);
+        }
+
+      // create an Epetra matrix from the ML
+      // matrix that we got as a result.
+      Epetra_CrsMatrix *C_mat;
+      ML_Operator2EpetraCrsMatrix(C_, C_mat);
+      C_mat->FillComplete();
+      C_mat->OptimizeStorage();
+      result.reinit (*C_mat);
+
+      // destroy allocated memory
+      delete C_mat;
+      ML_Operator_Destroy (&A_);
+      ML_Operator_Destroy (&B_);
+      ML_Operator_Destroy (&C_);
+      ML_Comm_Destroy (&comm);
+    }
+  }
+
+
+  void
+  SparseMatrix::mmult (SparseMatrix       &C,
+                       const SparseMatrix &B,
+                       const VectorBase   &V) const
+  {
+#ifdef DEAL_II_WITH_64BIT_INDICES
+    Assert(false,ExcNotImplemented())
+#endif
+    internals::perform_mmult (*this, B, C, V, false);
+  }
+
+
+
+  void
+  SparseMatrix::Tmmult (SparseMatrix       &C,
+                        const SparseMatrix &B,
+                        const VectorBase   &V) const
+  {
+#ifdef DEAL_II_WITH_64BIT_INDICES
+    Assert(false,ExcNotImplemented())
+#endif
+    internals::perform_mmult (*this, B, C, V, true);
+  }
+
+
+
+  void
+  SparseMatrix::write_ascii ()
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  // As of now, no particularly neat
+  // ouput is generated in case of
+  // multiple processors.
+  void
+  SparseMatrix::print (std::ostream &out,
+                       const bool    print_detailed_trilinos_information) const
+  {
+    if (print_detailed_trilinos_information == true)
+      out << *matrix;
+    else
+      {
+        double *values;
+        int *indices;
+        int num_entries;
+
+        for (int i=0; i<matrix->NumMyRows(); ++i)
+          {
+            matrix->ExtractMyRowView (i, num_entries, values, indices);
+            for (TrilinosWrappers::types::int_type j=0; j<num_entries; ++j)
+              out << "(" << global_row_index(*matrix,i) << ","
+                  << global_column_index(*matrix,indices[j]) << ") "
+                  << values[j] << std::endl;
+          }
+      }
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  SparseMatrix::size_type
+  SparseMatrix::memory_consumption () const
+  {
+    size_type static_memory = sizeof(this) + sizeof (*matrix)
+                              + sizeof(*matrix->Graph().DataPtr());
+    return ((sizeof(TrilinosScalar)+sizeof(TrilinosWrappers::types::int_type))*
+            matrix->NumMyNonzeros() + sizeof(int)*local_size() + static_memory);
+  }
+
+
+
+  const Epetra_Map &
+  SparseMatrix::domain_partitioner () const
+  {
+    return matrix->DomainMap();
+  }
+
+
+
+  const Epetra_Map &
+  SparseMatrix::range_partitioner () const
+  {
+    return matrix->RangeMap();
+  }
+
+
+
+  const Epetra_Map &
+  SparseMatrix::row_partitioner () const
+  {
+    return matrix->RowMap();
+  }
+
+
+
+  const Epetra_Map &
+  SparseMatrix::col_partitioner () const
+  {
+    return matrix->ColMap();
+  }
+
+
+
+  MPI_Comm SparseMatrix::get_mpi_communicator () const
+  {
+
+#ifdef DEAL_II_WITH_MPI
+
+    const Epetra_MpiComm *mpi_comm
+      = dynamic_cast<const Epetra_MpiComm *>(&matrix->RangeMap().Comm());
+    return mpi_comm->Comm();
+#else
+
+    return MPI_COMM_SELF;
+
+#endif
+
+  }
+}
+
+
+
+// explicit instantiations
+#include "trilinos_sparse_matrix.inst"
+
+
+// TODO: put these instantiations into generic file
+namespace TrilinosWrappers
+{
+  template void
+  SparseMatrix::reinit (const dealii::SparsityPattern &);
+
+  template void
+  SparseMatrix::reinit (const DynamicSparsityPattern &);
+
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const dealii::SparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const DynamicSparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const dealii::SparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const DynamicSparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const IndexSet &,
+                        const IndexSet &,
+                        const dealii::SparsityPattern &,
+                        const MPI_Comm &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const IndexSet &,
+                        const IndexSet &,
+                        const DynamicSparsityPattern &,
+                        const MPI_Comm &,
+                        const bool);
+
+  template void
+  SparseMatrix::vmult (VectorBase &,
+                       const VectorBase &) const;
+  template void
+  SparseMatrix::vmult (Vector &,
+                       const Vector &) const;
+  template void
+  SparseMatrix::vmult (MPI::Vector &,
+                       const MPI::Vector &) const;
+  template void
+  SparseMatrix::vmult (dealii::Vector<double> &,
+                       const dealii::Vector<double> &) const;
+  template void
+  SparseMatrix::vmult (dealii::parallel::distributed::Vector<double> &,
+                       const dealii::parallel::distributed::Vector<double> &) const;
+  template void
+  SparseMatrix::Tvmult (VectorBase &,
+                        const VectorBase &) const;
+  template void
+  SparseMatrix::Tvmult (Vector &,
+                        const Vector &) const;
+  template void
+  SparseMatrix::Tvmult (MPI::Vector &,
+                        const MPI::Vector &) const;
+  template void
+  SparseMatrix::Tvmult (dealii::Vector<double> &,
+                        const dealii::Vector<double> &) const;
+  template void
+  SparseMatrix::Tvmult (dealii::parallel::distributed::Vector<double> &,
+                        const dealii::parallel::distributed::Vector<double> &) const;
+  template void
+  SparseMatrix::vmult_add (VectorBase &,
+                           const VectorBase &) const;
+  template void
+  SparseMatrix::vmult_add (Vector &,
+                           const Vector &) const;
+  template void
+  SparseMatrix::vmult_add (MPI::Vector &,
+                           const MPI::Vector &) const;
+  template void
+  SparseMatrix::vmult_add (dealii::Vector<double> &,
+                           const dealii::Vector<double> &) const;
+  template void
+  SparseMatrix::vmult_add (dealii::parallel::distributed::Vector<double> &,
+                           const dealii::parallel::distributed::Vector<double> &) const;
+  template void
+  SparseMatrix::Tvmult_add (VectorBase &,
+                            const VectorBase &) const;
+  template void
+  SparseMatrix::Tvmult_add (Vector &,
+                            const Vector &) const;
+  template void
+  SparseMatrix::Tvmult_add (MPI::Vector &,
+                            const MPI::Vector &) const;
+  template void
+  SparseMatrix::Tvmult_add (dealii::Vector<double> &,
+                            const dealii::Vector<double> &) const;
+  template void
+  SparseMatrix::Tvmult_add (dealii::parallel::distributed::Vector<double> &,
+                            const dealii::parallel::distributed::Vector<double> &) const;
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_sparse_matrix.inst.in b/source/lac/trilinos_sparse_matrix.inst.in
new file mode 100644
index 0000000..0a96b56
--- /dev/null
+++ b/source/lac/trilinos_sparse_matrix.inst.in
@@ -0,0 +1,49 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (S : REAL_SCALARS)
+  {
+    namespace TrilinosWrappers
+    \{
+      template void
+      SparseMatrix::reinit (const dealii::SparseMatrix<S> &,
+                            const double,
+                            const bool,
+                            const dealii::SparsityPattern *);
+      template void
+      SparseMatrix::reinit (const Epetra_Map &,
+                            const dealii::SparseMatrix<S> &,
+                            const double,
+                            const bool,
+                            const dealii::SparsityPattern *);
+      template void
+      SparseMatrix::reinit (const Epetra_Map &,
+                            const Epetra_Map &,
+                            const dealii::SparseMatrix<S> &,
+                            const double,
+                            const bool,
+                            const dealii::SparsityPattern *);
+      template void
+      SparseMatrix::reinit (const IndexSet &,
+                            const IndexSet &,
+                            const dealii::SparseMatrix<S> &,
+                            const MPI_Comm &,
+                            const double,
+                            const bool,
+                            const dealii::SparsityPattern *);
+    \}
+  }
diff --git a/source/lac/trilinos_sparsity_pattern.cc b/source/lac/trilinos_sparsity_pattern.cc
new file mode 100644
index 0000000..76386a4
--- /dev/null
+++ b/source/lac/trilinos_sparsity_pattern.cc
@@ -0,0 +1,1199 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_sparsity_pattern.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/base/utilities.h>
+#  include <deal.II/lac/sparsity_pattern.h>
+#  include <deal.II/lac/dynamic_sparsity_pattern.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Epetra_Export.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace
+  {
+    // define a helper function that queries the size of an Epetra_Map object
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements();
+    }
+
+    int min_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MinMyGID();
+    }
+
+    int max_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MaxMyGID();
+    }
+
+    int n_global_rows(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalRows();
+    }
+
+    int n_global_cols(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalCols();
+    }
+
+    int n_global_entries(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalEntries();
+    }
+
+    int global_row_index(const Epetra_CrsGraph &graph, int i)
+    {
+      return graph.GRID(i);
+    }
+#else
+    long long int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements64();
+    }
+
+    long long int min_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MinMyGID64();
+    }
+
+    long long int max_my_gid(const Epetra_BlockMap &map)
+    {
+      return map.MaxMyGID64();
+    }
+
+    long long int n_global_rows(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalRows64();
+    }
+
+    long long int n_global_cols(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalCols64();
+    }
+
+    long long int n_global_entries(const Epetra_CrsGraph &graph)
+    {
+      return graph.NumGlobalEntries64();
+    }
+
+    long long int global_row_index(const Epetra_CrsGraph &graph, int i)
+    {
+      return graph.GRID64(i);
+    }
+#endif
+  }
+
+  namespace SparsityPatternIterators
+  {
+    void
+    Accessor::visit_present_row ()
+    {
+      // if we are asked to visit the
+      // past-the-end line, then simply
+      // release all our caches and go on
+      // with life
+      if (this->a_row == sparsity_pattern->n_rows())
+        {
+          colnum_cache.reset ();
+
+          return;
+        }
+//TODO: Is this thread safe?
+
+      // otherwise first flush Trilinos caches
+      sparsity_pattern->compress ();
+
+      // get a representation of the present
+      // row
+      int ncols;
+      // TODO: casting a size_type to an int, could be a problem
+      int colnums = sparsity_pattern->n_cols();
+
+      int ierr;
+      ierr = sparsity_pattern->graph->ExtractGlobalRowCopy((TrilinosWrappers::types::int_type)this->a_row,
+                                                           colnums,
+                                                           ncols,
+                                                           (TrilinosWrappers::types::int_type *)&(*colnum_cache)[0]);
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      // copy it into our caches if the
+      // line isn't empty. if it is, then
+      // we've done something wrong, since
+      // we shouldn't have initialized an
+      // iterator for an empty line (what
+      // would it point to?)
+      Assert (ncols != 0, ExcInternalError());
+      colnum_cache.reset (new std::vector<size_type> (colnums,
+                                                      colnums+ncols));
+    }
+  }
+
+
+  // The constructor is actually the
+  // only point where we have to check
+  // whether we build a serial or a
+  // parallel Trilinos matrix.
+  // Actually, it does not even matter
+  // how many threads there are, but
+  // only if we use an MPI compiler or
+  // a standard compiler. So, even one
+  // thread on a configuration with
+  // MPI will still get a parallel
+  // interface.
+  SparsityPattern::SparsityPattern ()
+  {
+    column_space_map.reset(new Epetra_Map (TrilinosWrappers::types::int_type(0),
+                                           TrilinosWrappers::types::int_type(0),
+                                           Utilities::Trilinos::comm_self()));
+    graph.reset (new Epetra_FECrsGraph(View,
+                                       *column_space_map,
+                                       *column_space_map,
+                                       0));
+    graph->FillComplete();
+  }
+
+
+  SparsityPattern::SparsityPattern (const Epetra_Map  &input_map,
+                                    const size_type n_entries_per_row)
+  {
+    reinit (input_map, input_map, n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::SparsityPattern (const Epetra_Map             &input_map,
+                                    const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit (input_map, input_map, n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::SparsityPattern (const Epetra_Map  &input_row_map,
+                                    const Epetra_Map  &input_col_map,
+                                    const size_type n_entries_per_row)
+  {
+    reinit (input_row_map, input_col_map, n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::SparsityPattern (const Epetra_Map                &input_row_map,
+                                    const Epetra_Map                &input_col_map,
+                                    const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit (input_row_map, input_col_map, n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::SparsityPattern (const size_type m,
+                                    const size_type n,
+                                    const size_type n_entries_per_row)
+  {
+    reinit (m, n, n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::SparsityPattern (const size_type               m,
+                                    const size_type               n,
+                                    const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit (m, n, n_entries_per_row);
+  }
+
+
+  // Copy function only works if the
+  // sparsity pattern is empty.
+  SparsityPattern::SparsityPattern (const SparsityPattern &input_sparsity)
+    :
+    Subscriptor(),
+    column_space_map (new Epetra_Map(TrilinosWrappers::types::int_type(0),
+                                     TrilinosWrappers::types::int_type(0),
+                                     Utilities::Trilinos::comm_self())),
+    graph (new Epetra_FECrsGraph(View,
+                                 *column_space_map,
+                                 *column_space_map,
+                                 0))
+  {
+    (void)input_sparsity;
+    Assert (input_sparsity.n_rows() == 0,
+            ExcMessage ("Copy constructor only works for empty sparsity patterns."));
+  }
+
+
+
+  SparsityPattern::SparsityPattern  (const IndexSet  &parallel_partitioning,
+                                     const MPI_Comm  &communicator,
+                                     const size_type  n_entries_per_row)
+  {
+    reinit (parallel_partitioning, parallel_partitioning, communicator,
+            n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::SparsityPattern  (const IndexSet     &parallel_partitioning,
+                                     const MPI_Comm     &communicator,
+                                     const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit (parallel_partitioning, parallel_partitioning, communicator,
+            n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::SparsityPattern  (const IndexSet  &row_parallel_partitioning,
+                                     const IndexSet  &col_parallel_partitioning,
+                                     const MPI_Comm  &communicator,
+                                     const size_type  n_entries_per_row)
+  {
+    reinit (row_parallel_partitioning, col_parallel_partitioning,
+            communicator, n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::
+  SparsityPattern  (const IndexSet     &row_parallel_partitioning,
+                    const IndexSet     &col_parallel_partitioning,
+                    const MPI_Comm     &communicator,
+                    const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit (row_parallel_partitioning, col_parallel_partitioning,
+            communicator, n_entries_per_row);
+  }
+
+
+
+  SparsityPattern::
+  SparsityPattern  (const IndexSet     &row_parallel_partitioning,
+                    const IndexSet     &col_parallel_partitioning,
+                    const IndexSet     &writable_rows,
+                    const MPI_Comm     &communicator,
+                    const size_type     n_max_entries_per_row)
+  {
+    reinit (row_parallel_partitioning, col_parallel_partitioning,
+            writable_rows, communicator, n_max_entries_per_row);
+  }
+
+
+
+  SparsityPattern::~SparsityPattern ()
+  {}
+
+
+
+  void
+  SparsityPattern::reinit (const size_type  m,
+                           const size_type  n,
+                           const size_type  n_entries_per_row)
+  {
+    reinit (complete_index_set(m), complete_index_set(n), MPI_COMM_SELF,
+            n_entries_per_row);
+  }
+
+
+
+  void
+  SparsityPattern::reinit (const size_type  m,
+                           const size_type  n,
+                           const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit (complete_index_set(m), complete_index_set(n), MPI_COMM_SELF,
+            n_entries_per_row);
+  }
+
+
+
+  namespace
+  {
+    typedef SparsityPattern::size_type size_type;
+
+    void
+    reinit_sp (const Epetra_Map                         &row_map,
+               const Epetra_Map                         &col_map,
+               const size_type                           n_entries_per_row,
+               std_cxx11::shared_ptr<Epetra_Map>        &column_space_map,
+               std_cxx11::shared_ptr<Epetra_FECrsGraph> &graph,
+               std_cxx11::shared_ptr<Epetra_CrsGraph>   &nonlocal_graph)
+    {
+      Assert(row_map.IsOneToOne(),
+             ExcMessage("Row map must be 1-to-1, i.e., no overlap between "
+                        "the maps of different processors."));
+      Assert(col_map.IsOneToOne(),
+             ExcMessage("Column map must be 1-to-1, i.e., no overlap between "
+                        "the maps of different processors."));
+
+      nonlocal_graph.reset();
+      graph.reset ();
+      column_space_map.reset (new Epetra_Map (col_map));
+
+      // for more than one processor, need to specify only row map first and
+      // let the matrix entries decide about the column map (which says which
+      // columns are present in the matrix, not to be confused with the
+      // col_map that tells how the domain dofs of the matrix will be
+      // distributed). for only one processor, we can directly assign the
+      // columns as well. If we use a recent Trilinos version, we can also
+      // require building a non-local graph which gives us thread-safe
+      // initialization.
+      if (row_map.Comm().NumProc() > 1)
+        graph.reset (new Epetra_FECrsGraph(Copy, row_map,
+                                           n_entries_per_row, false
+                                           // TODO: Check which new Trilinos
+                                           // version supports this... Remember
+                                           // to change tests/trilinos/assemble_matrix_parallel_07
+                                           // too.
+                                           //#if DEAL_II_TRILINOS_VERSION_GTE(11,14,0)
+                                           //, true
+                                           //#endif
+                                          ));
+      else
+        graph.reset (new Epetra_FECrsGraph(Copy, row_map, col_map,
+                                           n_entries_per_row, false));
+    }
+
+
+
+    void
+    reinit_sp (const Epetra_Map                         &row_map,
+               const Epetra_Map                         &col_map,
+               const std::vector<size_type>             &n_entries_per_row,
+               std_cxx11::shared_ptr<Epetra_Map>        &column_space_map,
+               std_cxx11::shared_ptr<Epetra_FECrsGraph> &graph,
+               std_cxx11::shared_ptr<Epetra_CrsGraph>   &nonlocal_graph)
+    {
+      Assert(row_map.IsOneToOne(),
+             ExcMessage("Row map must be 1-to-1, i.e., no overlap between "
+                        "the maps of different processors."));
+      Assert(col_map.IsOneToOne(),
+             ExcMessage("Column map must be 1-to-1, i.e., no overlap between "
+                        "the maps of different processors."));
+
+      // release memory before reallocation
+      nonlocal_graph.reset();
+      graph.reset ();
+      AssertDimension (n_entries_per_row.size(),
+                       static_cast<size_type>(n_global_elements(row_map)));
+
+      column_space_map.reset (new Epetra_Map (col_map));
+      std::vector<int> local_entries_per_row(max_my_gid(row_map)-
+                                             min_my_gid(row_map));
+      for (unsigned int i=0; i<local_entries_per_row.size(); ++i)
+        local_entries_per_row[i] = n_entries_per_row[min_my_gid(row_map)+i];
+
+      if (row_map.Comm().NumProc() > 1)
+        graph.reset(new Epetra_FECrsGraph(Copy, row_map,
+                                          &local_entries_per_row[0],
+                                          false
+                                          // TODO: Check which new Trilinos
+                                          // version supports this... Remember
+                                          // to change tests/trilinos/assemble_matrix_parallel_07
+                                          // too.
+                                          //#if DEAL_II_TRILINOS_VERSION_GTE(11,14,0)
+                                          //, true
+                                          //#endif
+                                         ));
+      else
+        graph.reset(new Epetra_FECrsGraph(Copy, row_map, col_map,
+                                          &local_entries_per_row[0],
+                                          false));
+    }
+
+
+
+    template <typename SparsityPatternType>
+    void
+    reinit_sp (const Epetra_Map                         &row_map,
+               const Epetra_Map                         &col_map,
+               const SparsityPatternType                &sp,
+               const bool                                exchange_data,
+               std_cxx11::shared_ptr<Epetra_Map>        &column_space_map,
+               std_cxx11::shared_ptr<Epetra_FECrsGraph> &graph,
+               std_cxx11::shared_ptr<Epetra_CrsGraph>   &nonlocal_graph)
+    {
+      nonlocal_graph.reset ();
+      graph.reset ();
+
+      AssertDimension (sp.n_rows(),
+                       static_cast<size_type>(n_global_elements(row_map)));
+      AssertDimension (sp.n_cols(),
+                       static_cast<size_type>(n_global_elements(col_map)));
+
+      column_space_map.reset (new Epetra_Map (col_map));
+
+      Assert (row_map.LinearMap() == true,
+              ExcMessage ("This function only works if the row map is contiguous."));
+
+      const size_type first_row = min_my_gid(row_map),
+                      last_row = max_my_gid(row_map)+1;
+      std::vector<int> n_entries_per_row(last_row - first_row);
+
+      // Trilinos wants the row length as an int this is hopefully never going
+      // to be a problem.
+      for (size_type row=first_row; row<last_row; ++row)
+        n_entries_per_row[row-first_row] = static_cast<int>(sp.row_length(row));
+
+      if (row_map.Comm().NumProc() > 1)
+        graph.reset(new Epetra_FECrsGraph(Copy, row_map,
+                                          &n_entries_per_row[0],
+                                          false));
+      else
+        graph.reset (new Epetra_FECrsGraph(Copy, row_map, col_map,
+                                           &n_entries_per_row[0],
+                                           false));
+
+      AssertDimension (sp.n_rows(),
+                       static_cast<size_type>(n_global_rows(*graph)));
+
+      std::vector<TrilinosWrappers::types::int_type> row_indices;
+
+      // Include possibility to exchange data since DynamicSparsityPattern is
+      // able to do so
+      if (exchange_data==false)
+        for (size_type row=first_row; row<last_row; ++row)
+          {
+            const TrilinosWrappers::types::int_type row_length = sp.row_length(row);
+            if (row_length == 0)
+              continue;
+
+            row_indices.resize (row_length, -1);
+            {
+              typename SparsityPatternType::iterator p = sp.begin(row);
+              // avoid incrementing p over the end of the current row because
+              // it is slow for DynamicSparsityPattern in parallel
+              for (int col=0; col<row_length; )
+                {
+                  row_indices[col++] = p->column();
+                  if (col < row_length)
+                    ++p;
+                }
+            }
+            graph->Epetra_CrsGraph::InsertGlobalIndices (row, row_length,
+                                                         &row_indices[0]);
+          }
+      else
+        for (size_type row=0; row<sp.n_rows(); ++row)
+          {
+            const TrilinosWrappers::types::int_type row_length = sp.row_length(row);
+            if (row_length == 0)
+              continue;
+
+            row_indices.resize (row_length, -1);
+            {
+              typename SparsityPatternType::iterator p = sp.begin(row);
+              // avoid incrementing p over the end of the current row because
+              // it is slow for DynamicSparsityPattern in parallel
+              for (int col=0; col<row_length; )
+                {
+                  row_indices[col++] = p->column();
+                  if (col < row_length)
+                    ++p;
+                }
+            }
+            graph->InsertGlobalIndices (1,
+                                        reinterpret_cast<TrilinosWrappers::types::int_type *>(&row),
+                                        row_length, &row_indices[0]);
+          }
+
+      int ierr =
+        graph->GlobalAssemble (*column_space_map,
+                               static_cast<const Epetra_Map &>(graph->RangeMap()),
+                               true);
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      ierr = graph->OptimizeStorage ();
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    }
+  }
+
+
+  void
+  SparsityPattern::reinit (const Epetra_Map  &input_map,
+                           const size_type    n_entries_per_row)
+  {
+    reinit_sp (input_map, input_map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void
+  SparsityPattern::reinit (const Epetra_Map  &input_row_map,
+                           const Epetra_Map  &input_col_map,
+                           const size_type    n_entries_per_row)
+  {
+    reinit_sp (input_row_map, input_col_map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void
+  SparsityPattern::reinit (const Epetra_Map   &input_map,
+                           const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit_sp (input_map, input_map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void
+  SparsityPattern::reinit (const Epetra_Map   &input_row_map,
+                           const Epetra_Map   &input_col_map,
+                           const std::vector<size_type> &n_entries_per_row)
+  {
+    reinit_sp (input_row_map, input_col_map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void
+  SparsityPattern::reinit (const IndexSet  &parallel_partitioning,
+                           const MPI_Comm  &communicator,
+                           const size_type  n_entries_per_row)
+  {
+    Epetra_Map map = parallel_partitioning.make_trilinos_map (communicator,
+                                                              false);
+    reinit_sp (map, map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void SparsityPattern::reinit (const IndexSet     &parallel_partitioning,
+                                const MPI_Comm     &communicator,
+                                const std::vector<size_type> &n_entries_per_row)
+  {
+    Epetra_Map map = parallel_partitioning.make_trilinos_map (communicator,
+                                                              false);
+    reinit_sp (map, map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void SparsityPattern::reinit (const IndexSet &row_parallel_partitioning,
+                                const IndexSet &col_parallel_partitioning,
+                                const MPI_Comm &communicator,
+                                const size_type  n_entries_per_row)
+  {
+    Epetra_Map row_map =
+      row_parallel_partitioning.make_trilinos_map (communicator, false);
+    Epetra_Map col_map =
+      col_parallel_partitioning.make_trilinos_map (communicator, false);
+    reinit_sp (row_map, col_map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void
+  SparsityPattern::reinit (const IndexSet     &row_parallel_partitioning,
+                           const IndexSet     &col_parallel_partitioning,
+                           const MPI_Comm     &communicator,
+                           const std::vector<size_type> &n_entries_per_row)
+  {
+    Epetra_Map row_map =
+      row_parallel_partitioning.make_trilinos_map (communicator, false);
+    Epetra_Map col_map =
+      col_parallel_partitioning.make_trilinos_map (communicator, false);
+    reinit_sp (row_map, col_map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void
+  SparsityPattern::reinit (const IndexSet  &row_parallel_partitioning,
+                           const IndexSet  &col_parallel_partitioning,
+                           const IndexSet  &writable_rows,
+                           const MPI_Comm  &communicator,
+                           const size_type  n_entries_per_row)
+  {
+    Epetra_Map row_map =
+      row_parallel_partitioning.make_trilinos_map (communicator, false);
+    Epetra_Map col_map =
+      col_parallel_partitioning.make_trilinos_map (communicator, false);
+    reinit_sp (row_map, col_map, n_entries_per_row,
+               column_space_map, graph, nonlocal_graph);
+
+    IndexSet nonlocal_partitioner = writable_rows;
+    AssertDimension(nonlocal_partitioner.size(), row_parallel_partitioning.size());
+#ifdef DEBUG
+    {
+      IndexSet tmp = writable_rows & row_parallel_partitioning;
+      Assert (tmp == row_parallel_partitioning,
+              ExcMessage("The set of writable rows passed to this method does not "
+                         "contain the locally owned rows, which is not allowed."));
+    }
+#endif
+    nonlocal_partitioner.subtract_set(row_parallel_partitioning);
+    if (Utilities::MPI::n_mpi_processes(communicator) > 1)
+      {
+        Epetra_Map nonlocal_map =
+          nonlocal_partitioner.make_trilinos_map(communicator, true);
+        nonlocal_graph.reset(new Epetra_CrsGraph(Copy, nonlocal_map, 0));
+      }
+    else
+      Assert(nonlocal_partitioner.n_elements() == 0, ExcInternalError());
+  }
+
+
+
+  template<typename SparsityPatternType>
+  void
+  SparsityPattern::reinit (const IndexSet            &row_parallel_partitioning,
+                           const IndexSet            &col_parallel_partitioning,
+                           const SparsityPatternType &nontrilinos_sparsity_pattern,
+                           const MPI_Comm            &communicator,
+                           const bool                 exchange_data)
+  {
+    Epetra_Map row_map =
+      row_parallel_partitioning.make_trilinos_map (communicator, false);
+    Epetra_Map col_map =
+      col_parallel_partitioning.make_trilinos_map (communicator, false);
+    reinit_sp (row_map, col_map, nontrilinos_sparsity_pattern, exchange_data,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  template<typename SparsityPatternType>
+  void
+  SparsityPattern::reinit (const IndexSet            &parallel_partitioning,
+                           const SparsityPatternType &nontrilinos_sparsity_pattern,
+                           const MPI_Comm            &communicator,
+                           const bool                 exchange_data)
+  {
+    Epetra_Map map = parallel_partitioning.make_trilinos_map (communicator,
+                                                              false);
+    reinit_sp (map, map, nontrilinos_sparsity_pattern, exchange_data,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  template <typename SparsityPatternType>
+  void
+  SparsityPattern::reinit (const Epetra_Map          &input_map,
+                           const SparsityPatternType &sp,
+                           const bool                 exchange_data)
+  {
+    reinit_sp (input_map, input_map, sp, exchange_data,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  template <typename SparsityPatternType>
+  void
+  SparsityPattern::reinit (const Epetra_Map          &input_row_map,
+                           const Epetra_Map          &input_col_map,
+                           const SparsityPatternType &sp,
+                           const bool                 exchange_data)
+  {
+    reinit_sp (input_row_map, input_col_map, sp, exchange_data,
+               column_space_map, graph, nonlocal_graph);
+
+    compress();
+  }
+
+
+
+  SparsityPattern &
+  SparsityPattern::operator = (const SparsityPattern &)
+  {
+    Assert (false, ExcNotImplemented());
+    return *this;
+  }
+
+
+
+  template <typename SparsityPatternType>
+  void
+  SparsityPattern::copy_from (const SparsityPatternType &sp)
+  {
+    const Epetra_Map rows (TrilinosWrappers::types::int_type(sp.n_rows()), 0,
+                           Utilities::Trilinos::comm_self());
+    const Epetra_Map columns (TrilinosWrappers::types::int_type(sp.n_cols()), 0,
+                              Utilities::Trilinos::comm_self());
+
+    reinit_sp (rows, columns, sp, false,
+               column_space_map, graph, nonlocal_graph);
+  }
+
+
+
+  void
+  SparsityPattern::clear ()
+  {
+    // When we clear the matrix, reset
+    // the pointer and generate an
+    // empty sparsity pattern.
+    column_space_map.reset (new Epetra_Map (TrilinosWrappers::types::int_type(0),
+                                            TrilinosWrappers::types::int_type(0),
+                                            Utilities::Trilinos::comm_self()));
+    graph.reset (new Epetra_FECrsGraph(View, *column_space_map,
+                                       *column_space_map, 0));
+    graph->FillComplete();
+
+    nonlocal_graph.reset();
+  }
+
+
+
+  void
+  SparsityPattern::compress ()
+  {
+    int ierr;
+    Assert (column_space_map.get() != 0, ExcInternalError());
+    if (nonlocal_graph.get() != 0)
+      {
+        if (nonlocal_graph->IndicesAreGlobal() == false &&
+            nonlocal_graph->RowMap().NumMyElements() > 0)
+          {
+            // insert dummy element
+            TrilinosWrappers::types::int_type row = nonlocal_graph->RowMap().MyGID(
+                                                      static_cast<TrilinosWrappers::types::int_type> (0));
+            nonlocal_graph->InsertGlobalIndices(row, 1, &row);
+          }
+        Assert(nonlocal_graph->RowMap().NumMyElements() == 0 ||
+               nonlocal_graph->IndicesAreGlobal() == true,
+               ExcInternalError());
+        nonlocal_graph->FillComplete(*column_space_map,
+                                     static_cast<const Epetra_Map &>(graph->RangeMap()));
+        nonlocal_graph->OptimizeStorage();
+        Epetra_Export exporter(nonlocal_graph->RowMap(), graph->RowMap());
+        ierr = graph->Export(*nonlocal_graph, exporter, Add);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+        ierr =
+          graph->FillComplete(*column_space_map,
+                              static_cast<const Epetra_Map &>(graph->RangeMap()));
+      }
+    else
+      ierr = graph->GlobalAssemble (*column_space_map,
+                                    static_cast<const Epetra_Map &>(graph->RangeMap()),
+                                    true);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    ierr = graph->OptimizeStorage ();
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  bool
+  SparsityPattern::exists (const size_type i,
+                           const size_type j) const
+  {
+    // Extract local indices in
+    // the matrix.
+    int trilinos_i = graph->LRID(static_cast<TrilinosWrappers::types::int_type>(i)),
+        trilinos_j = graph->LCID(static_cast<TrilinosWrappers::types::int_type>(j));
+
+    // If the data is not on the
+    // present processor, we throw
+    // an exception. This is on of
+    // the two tiny differences to
+    // the el(i,j) call, which does
+    // not throw any assertions.
+    if (trilinos_i == -1)
+      {
+        return false;
+      }
+    else
+      {
+        // Check whether the matrix
+        // already is transformed to
+        // local indices.
+        if (graph->Filled() == false)
+          {
+            int nnz_present = graph->NumGlobalIndices(i);
+            int nnz_extracted;
+            TrilinosWrappers::types::int_type *col_indices;
+
+            // Generate the view and make
+            // sure that we have not generated
+            // an error.
+            // TODO: trilinos_i is the local row index -> it is an int but
+            // ExtractGlobalRowView requires trilinos_i to be the global row
+            // index and thus it should be a long long int
+            int ierr = graph->ExtractGlobalRowView(
+                         static_cast<TrilinosWrappers::types::int_type>(trilinos_i),
+                         nnz_extracted, col_indices);
+            (void)ierr;
+            Assert (ierr==0, ExcTrilinosError(ierr));
+            Assert (nnz_present == nnz_extracted,
+                    ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+            // Search the index
+            TrilinosWrappers::types::int_type *el_find =
+              std::find(col_indices, col_indices + nnz_present, trilinos_j);
+
+            TrilinosWrappers::types::int_type local_col_index =
+              (TrilinosWrappers::types::int_type)(el_find - col_indices);
+
+            if (local_col_index == nnz_present)
+              return false;
+          }
+        else
+          {
+            // Prepare pointers for extraction
+            // of a view of the row.
+            int nnz_present = graph->NumGlobalIndices(
+                                static_cast<TrilinosWrappers::types::int_type>(i));
+            int nnz_extracted;
+            int *col_indices;
+
+            // Generate the view and make
+            // sure that we have not generated
+            // an error.
+            int ierr = graph->ExtractMyRowView(trilinos_i,
+                                               nnz_extracted, col_indices);
+            (void)ierr;
+            Assert (ierr==0, ExcTrilinosError(ierr));
+
+            Assert (nnz_present == nnz_extracted,
+                    ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+            // Search the index
+            int *el_find = std::find(col_indices, col_indices + nnz_present,
+                                     static_cast<int>(trilinos_j));
+
+            int local_col_index = (int)(el_find - col_indices);
+
+            if (local_col_index == nnz_present)
+              return false;
+          }
+      }
+
+    return true;
+  }
+
+
+
+  SparsityPattern::size_type
+  SparsityPattern::bandwidth () const
+  {
+    size_type local_b=0;
+    TrilinosWrappers::types::int_type global_b=0;
+    for (int i=0; i<(int)local_size(); ++i)
+      {
+        int *indices;
+        int num_entries;
+        graph->ExtractMyRowView(i, num_entries, indices);
+        for (unsigned int j=0; j<(unsigned int)num_entries; ++j)
+          {
+            if (static_cast<size_type>(std::abs(static_cast<TrilinosWrappers::types::int_type>(i-indices[j]))) > local_b)
+              local_b = std::abs(static_cast<TrilinosWrappers::types::int_type>(i-indices[j]));
+          }
+      }
+    graph->Comm().MaxAll((TrilinosWrappers::types::int_type *)&local_b, &global_b, 1);
+    return static_cast<size_type>(global_b);
+  }
+
+
+
+  SparsityPattern::size_type
+  SparsityPattern::n_rows () const
+  {
+    const TrilinosWrappers::types::int_type n_rows = n_global_rows(*graph);
+    return n_rows;
+  }
+
+
+
+  SparsityPattern::size_type
+  SparsityPattern::n_cols () const
+  {
+    TrilinosWrappers::types::int_type n_cols;
+    if (graph->Filled() == true)
+      n_cols = n_global_cols(*graph);
+    else
+      n_cols = n_global_elements(*column_space_map);
+
+    return n_cols;
+  }
+
+
+
+  unsigned int
+  SparsityPattern::local_size () const
+  {
+    int n_rows = graph -> NumMyRows();
+
+    return n_rows;
+  }
+
+
+
+  std::pair<SparsityPattern::size_type, SparsityPattern::size_type>
+  SparsityPattern::local_range () const
+  {
+    size_type begin, end;
+    begin =  min_my_gid(graph->RowMap());
+    end = max_my_gid(graph->RowMap())+1;
+
+    return std::make_pair (begin, end);
+  }
+
+
+
+  SparsityPattern::size_type
+  SparsityPattern::n_nonzero_elements () const
+  {
+    TrilinosWrappers::types::int_type nnz = n_global_entries(*graph);
+
+    return static_cast<size_type>(nnz);
+  }
+
+
+
+  unsigned int
+  SparsityPattern::max_entries_per_row () const
+  {
+    int nnz = graph->MaxNumIndices();
+
+    return static_cast<unsigned int>(nnz);
+  }
+
+
+
+  SparsityPattern::size_type
+  SparsityPattern::row_length (const size_type row) const
+  {
+    Assert (row < n_rows(), ExcInternalError());
+
+    // get a representation of the
+    // present row
+    TrilinosWrappers::types::int_type ncols = -1;
+    TrilinosWrappers::types::int_type local_row =
+      graph->LRID(static_cast<TrilinosWrappers::types::int_type>(row));
+
+    // on the processor who owns this
+    // row, we'll have a non-negative
+    // value.
+    if (local_row >= 0)
+      ncols = graph->NumMyIndices (local_row);
+
+    return static_cast<size_type>(ncols);
+  }
+
+
+
+  const Epetra_Map &
+  SparsityPattern::domain_partitioner () const
+  {
+    return static_cast<const Epetra_Map &>(graph->DomainMap());
+  }
+
+
+
+  const Epetra_Map &
+  SparsityPattern::range_partitioner () const
+  {
+    return static_cast<const Epetra_Map &>(graph->RangeMap());
+  }
+
+
+
+  const Epetra_Map &
+  SparsityPattern::row_partitioner () const
+  {
+    return static_cast<const Epetra_Map &>(graph->RowMap());
+  }
+
+
+
+  const Epetra_Map &
+  SparsityPattern::col_partitioner () const
+  {
+    return static_cast<const Epetra_Map &>(graph->ColMap());
+  }
+
+
+
+  const Epetra_Comm &
+  SparsityPattern::trilinos_communicator () const
+  {
+    return graph->RangeMap().Comm();
+  }
+
+
+
+  MPI_Comm
+  SparsityPattern::get_mpi_communicator () const
+  {
+
+#ifdef DEAL_II_WITH_MPI
+
+    const Epetra_MpiComm *mpi_comm
+      = dynamic_cast<const Epetra_MpiComm *>(&graph->RangeMap().Comm());
+    return mpi_comm->Comm();
+#else
+
+    return MPI_COMM_SELF;
+
+#endif
+
+  }
+
+
+
+  void
+  SparsityPattern::write_ascii ()
+  {
+    Assert (false, ExcNotImplemented());
+  }
+
+
+
+  // As of now, no particularly neat
+  // ouput is generated in case of
+  // multiple processors.
+  void
+  SparsityPattern::print (std::ostream &out,
+                          const bool    write_extended_trilinos_info) const
+  {
+    if (write_extended_trilinos_info)
+      out << *graph;
+    else
+      {
+        int *indices;
+        int num_entries;
+
+        for (int i=0; i<graph->NumMyRows(); ++i)
+          {
+            graph->ExtractMyRowView (i, num_entries, indices);
+            for (int j=0; j<num_entries; ++j)
+              out << "(" << i << "," << indices[global_row_index(*graph,j)] << ") "
+                  << std::endl;
+          }
+      }
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  void
+  SparsityPattern::print_gnuplot (std::ostream &out) const
+  {
+    Assert (graph->Filled() == true, ExcInternalError());
+    for (unsigned int row=0; row<local_size(); ++row)
+      {
+        int *indices;
+        int num_entries;
+        graph->ExtractMyRowView (row, num_entries, indices);
+
+        for (unsigned int j=0; j<(unsigned int)num_entries; ++j)
+          // while matrix entries are usually
+          // written (i,j), with i vertical and
+          // j horizontal, gnuplot output is
+          // x-y, that is we have to exchange
+          // the order of output
+          out << indices[global_row_index(*graph,static_cast<int>(j))]
+              << " " << -static_cast<signed int>(row) << std::endl;
+      }
+
+    AssertThrow (out, ExcIO());
+  }
+
+//TODO: Implement!
+  std::size_t
+  SparsityPattern::memory_consumption() const
+  {
+    Assert(false, ExcNotImplemented());
+    return 0;
+  }
+
+
+  // explicit instantiations
+  //
+  template void
+  SparsityPattern::copy_from (const dealii::SparsityPattern &);
+  template void
+  SparsityPattern::copy_from (const dealii::DynamicSparsityPattern &);
+
+
+  template void
+  SparsityPattern::reinit (const Epetra_Map &,
+                           const dealii::SparsityPattern &,
+                           bool);
+  template void
+  SparsityPattern::reinit (const Epetra_Map &,
+                           const dealii::DynamicSparsityPattern &,
+                           bool);
+
+  template void
+  SparsityPattern::reinit (const Epetra_Map &,
+                           const Epetra_Map &,
+                           const dealii::SparsityPattern &,
+                           bool);
+  template void
+  SparsityPattern::reinit (const Epetra_Map &,
+                           const Epetra_Map &,
+                           const dealii::DynamicSparsityPattern &,
+                           bool);
+
+
+  template void
+  SparsityPattern::reinit (const IndexSet &,
+                           const dealii::SparsityPattern &,
+                           const MPI_Comm &,
+                           bool);
+  template void
+  SparsityPattern::reinit (const IndexSet &,
+                           const dealii::DynamicSparsityPattern &,
+                           const MPI_Comm &,
+                           bool);
+
+
+  template void
+  SparsityPattern::reinit (const IndexSet &,
+                           const IndexSet &,
+                           const dealii::SparsityPattern &,
+                           const MPI_Comm &,
+                           bool);
+  template void
+  SparsityPattern::reinit (const IndexSet &,
+                           const IndexSet &,
+                           const dealii::DynamicSparsityPattern &,
+                           const MPI_Comm &,
+                           bool);
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_vector.cc b/source/lac/trilinos_vector.cc
new file mode 100644
index 0000000..b0cbba0
--- /dev/null
+++ b/source/lac/trilinos_vector.cc
@@ -0,0 +1,757 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/trilinos_vector.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_block_vector.h>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Epetra_Import.h>
+#  include <Epetra_Vector.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#  include <cmath>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    // define a helper function that queries the size of an Epetra_BlockMap object
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+    int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements();
+    }
+    // define a helper function that queries the pointer to internal array
+    // containing list of global IDs assigned to the calling processor
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+    int *my_global_elements(const Epetra_BlockMap &map)
+    {
+      return map.MyGlobalElements();
+    }
+    // define a helper function that queries the global vector length of an
+    // Epetra_FEVector object  by calling either the 32- or 64-bit
+    // function necessary.
+    int global_length(const Epetra_FEVector &vector)
+    {
+      return vector.GlobalLength();
+    }
+#else
+    // define a helper function that queries the size of an Epetra_BlockMap object
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+    long long int n_global_elements (const Epetra_BlockMap &map)
+    {
+      return map.NumGlobalElements64();
+    }
+    // define a helper function that queries the pointer to internal array
+    // containing list of global IDs assigned to the calling processor
+    // by calling either the 32- or 64-bit function necessary, and returns the
+    // result in the correct data type so that we can use it in calling other
+    // Epetra member functions that are overloaded by index type
+    long long int *my_global_elements(const Epetra_BlockMap &map)
+    {
+      return map.MyGlobalElements64();
+    }
+    // define a helper function that queries the global vector length of an
+    // Epetra_FEVector object  by calling either the 32- or 64-bit
+    // function necessary.
+    long long int global_length(const Epetra_FEVector &vector)
+    {
+      return vector.GlobalLength64();
+    }
+#endif
+  }
+
+  namespace MPI
+  {
+
+
+    Vector::Vector ()
+    {
+      last_action = Zero;
+      vector.reset(new Epetra_FEVector(Epetra_Map(0,0,0,Utilities::Trilinos::comm_self())));
+    }
+
+
+
+    Vector::Vector (const Epetra_Map &parallel_partitioning)
+    {
+      reinit (parallel_partitioning);
+    }
+
+
+
+    Vector::Vector (const IndexSet &parallel_partitioning,
+                    const MPI_Comm &communicator)
+    {
+      reinit (parallel_partitioning, communicator);
+    }
+
+
+
+    Vector::Vector (const Vector &v)
+      :
+      VectorBase()
+    {
+      last_action = Zero;
+      vector.reset (new Epetra_FEVector(*v.vector));
+      has_ghosts = v.has_ghosts;
+    }
+
+
+
+#ifdef DEAL_II_WITH_CXX11
+    Vector::Vector (Vector &&v)
+    {
+      // initialize a minimal, valid object and swap
+      last_action = Zero;
+      vector.reset(new Epetra_FEVector(Epetra_Map(0,0,0,Utilities::Trilinos::comm_self())));
+
+      swap(v);
+    }
+#endif
+
+
+
+    Vector::Vector (const Epetra_Map &input_map,
+                    const VectorBase &v)
+      :
+      VectorBase()
+    {
+      AssertThrow (n_global_elements(input_map) == n_global_elements(v.vector->Map()),
+                   ExcDimensionMismatch (n_global_elements(input_map),
+                                         n_global_elements(v.vector->Map())));
+
+      last_action = Zero;
+
+      if (input_map.SameAs(v.vector->Map()) == true)
+        vector.reset (new Epetra_FEVector(*v.vector));
+      else
+        {
+          vector.reset (new Epetra_FEVector(input_map));
+          reinit (v, false, true);
+        }
+    }
+
+
+
+    Vector::Vector (const IndexSet   &parallel_partitioner,
+                    const VectorBase &v,
+                    const MPI_Comm   &communicator)
+      :
+      VectorBase()
+    {
+      AssertThrow (parallel_partitioner.size() ==
+                   static_cast<size_type>(n_global_elements(v.vector->Map())),
+                   ExcDimensionMismatch (parallel_partitioner.size(),
+                                         n_global_elements(v.vector->Map())));
+
+      last_action = Zero;
+
+      vector.reset (new Epetra_FEVector
+                    (parallel_partitioner.make_trilinos_map(communicator,
+                                                            true)));
+      reinit (v, false, true);
+    }
+
+    Vector::Vector (const IndexSet &local,
+                    const IndexSet &ghost,
+                    const MPI_Comm &communicator)
+      :
+      VectorBase()
+    {
+      IndexSet parallel_partitioning = local;
+      parallel_partitioning.add_indices(ghost);
+      reinit(parallel_partitioning, communicator);
+    }
+
+
+
+    Vector::~Vector ()
+    {}
+
+
+
+    void
+    Vector::reinit (const Epetra_Map &input_map,
+                    const bool        omit_zeroing_entries)
+    {
+      nonlocal_vector.reset();
+
+      if (vector->Map().SameAs(input_map)==false)
+        vector.reset (new Epetra_FEVector(input_map));
+      else if (omit_zeroing_entries == false)
+        {
+          const int ierr = vector->PutScalar(0.);
+          (void)ierr;
+          Assert (ierr == 0, ExcTrilinosError(ierr));
+        }
+
+      has_ghosts = vector->Map().UniqueGIDs()==false;
+      last_action = Zero;
+    }
+
+
+
+    void
+    Vector::reinit (const IndexSet &parallel_partitioner,
+                    const MPI_Comm &communicator,
+                    const bool      omit_zeroing_entries)
+    {
+      nonlocal_vector.reset();
+
+      Epetra_Map map = parallel_partitioner.make_trilinos_map (communicator,
+                                                               true);
+      reinit (map, omit_zeroing_entries);
+    }
+
+
+
+    void
+    Vector::reinit (const VectorBase &v,
+                    const bool        omit_zeroing_entries,
+                    const bool        allow_different_maps)
+    {
+      nonlocal_vector.reset();
+
+      // In case we do not allow to have different maps, this call means that
+      // we have to reset the vector. So clear the vector, initialize our map
+      // with the map in v, and generate the vector.
+      if (allow_different_maps == false)
+        {
+          if (vector->Map().SameAs(v.vector->Map()) == false)
+            {
+              vector.reset (new Epetra_FEVector(v.vector->Map()));
+              has_ghosts = v.has_ghosts;
+              last_action = Zero;
+            }
+          else if (omit_zeroing_entries == false)
+            {
+              // old and new vectors
+              // have exactly the
+              // same map, i.e. size
+              // and parallel
+              // distribution
+              int ierr;
+              ierr = vector->GlobalAssemble (last_action);
+              (void)ierr;
+              Assert (ierr == 0, ExcTrilinosError(ierr));
+
+              ierr = vector->PutScalar(0.0);
+              Assert (ierr == 0, ExcTrilinosError(ierr));
+
+              last_action = Zero;
+            }
+        }
+
+      // Otherwise, we have to check that the two vectors are already of the
+      // same size, create an object for the data exchange and then insert all
+      // the data. The first assertion is only a check whether the user knows
+      // what she is doing.
+      else
+        {
+          Assert (omit_zeroing_entries == false,
+                  ExcMessage ("It is not possible to exchange data with the "
+                              "option 'omit_zeroing_entries' set, which would not write "
+                              "elements."));
+
+          AssertThrow (size() == v.size(),
+                       ExcDimensionMismatch (size(), v.size()));
+
+          Epetra_Import data_exchange (vector->Map(), v.vector->Map());
+
+          const int ierr = vector->Import(*v.vector, data_exchange, Insert);
+          AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+          last_action = Insert;
+        }
+
+    }
+
+
+
+    void
+    Vector::reinit (const BlockVector &v,
+                    const bool         import_data)
+    {
+      nonlocal_vector.reset();
+
+      // In case we do not allow to have different maps, this call means that
+      // we have to reset the vector. So clear the vector, initialize our map
+      // with the map in v, and generate the vector.
+      if (v.n_blocks() == 0)
+        return;
+
+      // create a vector that holds all the elements contained in the block
+      // vector. need to manually create an Epetra_Map.
+      size_type n_elements = 0, added_elements = 0, block_offset = 0;
+      for (size_type block=0; block<v.n_blocks(); ++block)
+        n_elements += v.block(block).local_size();
+      std::vector<TrilinosWrappers::types::int_type> global_ids (n_elements, -1);
+      for (size_type block=0; block<v.n_blocks(); ++block)
+        {
+          TrilinosWrappers::types::int_type *glob_elements =
+            my_global_elements(v.block(block).vector_partitioner());
+          for (size_type i=0; i<v.block(block).local_size(); ++i)
+            global_ids[added_elements++] = glob_elements[i] + block_offset;
+          block_offset += v.block(block).size();
+        }
+
+      Assert (n_elements == added_elements, ExcInternalError());
+      Epetra_Map new_map (v.size(), n_elements, &global_ids[0], 0,
+                          v.block(0).vector_partitioner().Comm());
+
+      std_cxx11::shared_ptr<Epetra_FEVector> actual_vec;
+      if ( import_data == true )
+        actual_vec.reset (new Epetra_FEVector (new_map));
+      else
+        {
+          vector.reset (new Epetra_FEVector (new_map));
+          actual_vec = vector;
+        }
+
+      TrilinosScalar *entries = (*actual_vec)[0];
+      block_offset = 0;
+      for (size_type block=0; block<v.n_blocks(); ++block)
+        {
+          v.block(block).trilinos_vector().ExtractCopy (entries, 0);
+          entries += v.block(block).local_size();
+        }
+
+      if (import_data == true)
+        {
+          AssertThrow (static_cast<size_type>(global_length(*actual_vec))
+                       == v.size(),
+                       ExcDimensionMismatch (global_length(*actual_vec),
+                                             v.size()));
+
+          Epetra_Import data_exchange (vector->Map(), actual_vec->Map());
+
+          const int ierr = vector->Import(*actual_vec, data_exchange, Insert);
+          AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+          last_action = Insert;
+        }
+
+    }
+
+
+    void Vector::reinit(const IndexSet &locally_owned_entries,
+                        const IndexSet &ghost_entries,
+                        const MPI_Comm &communicator,
+                        const bool      vector_writable)
+    {
+      nonlocal_vector.reset();
+      if (vector_writable == false)
+        {
+          IndexSet parallel_partitioning = locally_owned_entries;
+          parallel_partitioning.add_indices(ghost_entries);
+          reinit(parallel_partitioning, communicator);
+        }
+      else
+        {
+          Epetra_Map map = locally_owned_entries.make_trilinos_map (communicator,
+                                                                    true);
+          Assert (map.IsOneToOne(),
+                  ExcMessage("A writable vector must not have ghost entries in "
+                             "its parallel partitioning"));
+          reinit (map);
+
+          IndexSet nonlocal_entries(ghost_entries);
+          nonlocal_entries.subtract_set(locally_owned_entries);
+          if (Utilities::MPI::n_mpi_processes(communicator) > 1)
+            {
+              Epetra_Map nonlocal_map =
+                nonlocal_entries.make_trilinos_map(communicator, true);
+              nonlocal_vector.reset(new Epetra_MultiVector(nonlocal_map, 1));
+            }
+        }
+    }
+
+
+    Vector &
+    Vector::operator = (const Vector &v)
+    {
+      // distinguish three cases. First case: both vectors have the same
+      // layout (just need to copy the local data, not reset the memory and
+      // the underlying Epetra_Map). The third case means that we have to
+      // rebuild the calling vector.
+      if (vector->Map().SameAs(v.vector->Map()))
+        {
+          *vector = *v.vector;
+          if (v.nonlocal_vector.get() != 0)
+            nonlocal_vector.reset(new Epetra_MultiVector(v.nonlocal_vector->Map(), 1));
+          last_action = Zero;
+        }
+      // Second case: vectors have the same global
+      // size, but different parallel layouts (and
+      // one of them a one-to-one mapping). Then we
+      // can call the import/export functionality.
+      else if (size() == v.size() &&
+               (v.vector->Map().UniqueGIDs() || vector->Map().UniqueGIDs()))
+        {
+          reinit (v, false, true);
+        }
+      // Third case: Vectors do not have the same
+      // size.
+      else
+        {
+          vector.reset (new Epetra_FEVector(*v.vector));
+          last_action = Zero;
+          has_ghosts = v.has_ghosts;
+        }
+
+      if (v.nonlocal_vector.get() != 0)
+        nonlocal_vector.reset(new Epetra_MultiVector(v.nonlocal_vector->Map(), 1));
+
+      return *this;
+    }
+
+
+
+#ifdef DEAL_II_WITH_CXX11
+    Vector &Vector::operator= (Vector &&v)
+    {
+      swap(v);
+      return *this;
+    }
+#endif
+
+
+
+    Vector &
+    Vector::operator = (const TrilinosWrappers::Vector &v)
+    {
+      nonlocal_vector.reset();
+
+      Assert (size() == v.size(), ExcDimensionMismatch(size(), v.size()));
+
+      Epetra_Import data_exchange (vector->Map(), v.vector->Map());
+      const int ierr = vector->Import(*v.vector, data_exchange, Insert);
+
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      last_action = Insert;
+
+      return *this;
+    }
+
+
+
+    void
+    Vector::import_nonlocal_data_for_fe (const TrilinosWrappers::SparseMatrix &m,
+                                         const Vector                         &v)
+    {
+      Assert (m.trilinos_matrix().Filled() == true,
+              ExcMessage ("Matrix is not compressed. "
+                          "Cannot find exchange information!"));
+      Assert (v.vector->Map().UniqueGIDs() == true,
+              ExcMessage ("The input vector has overlapping data, "
+                          "which is not allowed."));
+
+      if (vector->Map().SameAs(m.trilinos_matrix().ColMap()) == false)
+        {
+          vector.reset (new Epetra_FEVector(
+                          m.trilinos_matrix().ColMap()
+                        ));
+        }
+
+      Epetra_Import data_exchange (vector->Map(), v.vector->Map());
+      const int ierr = vector->Import(*v.vector, data_exchange, Insert);
+
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+      last_action = Insert;
+    }
+
+  } /* end of namespace MPI */
+
+
+
+
+  Vector::Vector ()
+  {
+    last_action = Zero;
+    Epetra_LocalMap map (0, 0, Utilities::Trilinos::comm_self());
+    vector.reset (new Epetra_FEVector(map));
+  }
+
+
+
+  Vector::Vector (const size_type n)
+  {
+    last_action = Zero;
+    Epetra_LocalMap map ((TrilinosWrappers::types::int_type)n, 0, Utilities::Trilinos::comm_self());
+    vector.reset (new Epetra_FEVector (map));
+  }
+
+
+
+  Vector::Vector (const Epetra_Map &input_map)
+  {
+    last_action = Zero;
+    Epetra_LocalMap map (n_global_elements(input_map),
+                         input_map.IndexBase(),
+                         input_map.Comm());
+    vector.reset (new Epetra_FEVector(map));
+  }
+
+
+
+  Vector::Vector (const IndexSet &partitioning,
+                  const MPI_Comm &communicator)
+  {
+    last_action = Zero;
+    Epetra_LocalMap map (static_cast<TrilinosWrappers::types::int_type>(partitioning.size()),
+                         0,
+#ifdef DEAL_II_WITH_MPI
+                         Epetra_MpiComm(communicator));
+#else
+                         Epetra_SerialComm());
+    (void)communicator;
+#endif
+    vector.reset (new Epetra_FEVector(map));
+  }
+
+
+
+  Vector::Vector (const VectorBase &v)
+  {
+    last_action = Zero;
+    Epetra_LocalMap map (n_global_elements(v.vector->Map()),
+                         v.vector->Map().IndexBase(),
+                         v.vector->Map().Comm());
+    vector.reset (new Epetra_FEVector(map));
+
+    if (vector->Map().SameAs(v.vector->Map()) == true)
+      {
+        const int ierr = vector->Update(1.0, *v.vector, 0.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+    else
+      reinit (v, false, true);
+
+  }
+
+
+
+  void
+  Vector::reinit (const size_type n,
+                  const bool      omit_zeroing_entries)
+  {
+    if (size() != n)
+      {
+        Epetra_LocalMap map ((TrilinosWrappers::types::int_type)n, 0,
+                             Utilities::Trilinos::comm_self());
+        vector.reset (new Epetra_FEVector (map));
+      }
+    else if (omit_zeroing_entries == false)
+      {
+        int ierr;
+        ierr = vector->GlobalAssemble(last_action);
+        (void)ierr;
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+
+        ierr = vector->PutScalar(0.0);
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+      }
+
+    last_action = Zero;
+  }
+
+
+
+  void
+  Vector::reinit (const Epetra_Map &input_map,
+                  const bool        omit_zeroing_entries)
+  {
+    if (n_global_elements(vector->Map()) != n_global_elements(input_map))
+      {
+        Epetra_LocalMap map (n_global_elements(input_map),
+                             input_map.IndexBase(),
+                             input_map.Comm());
+        vector.reset (new Epetra_FEVector (map));
+      }
+    else if (omit_zeroing_entries == false)
+      {
+        int ierr;
+        ierr = vector->GlobalAssemble(last_action);
+        (void)ierr;
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+
+        ierr = vector->PutScalar(0.0);
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+      }
+
+    last_action = Zero;
+  }
+
+
+
+  void
+  Vector::reinit (const IndexSet &partitioning,
+                  const MPI_Comm &communicator,
+                  const bool      omit_zeroing_entries)
+  {
+    if (n_global_elements(vector->Map()) !=
+        static_cast<TrilinosWrappers::types::int_type>(partitioning.size()))
+      {
+        Epetra_LocalMap map (static_cast<TrilinosWrappers::types::int_type>(partitioning.size()),
+                             0,
+#ifdef DEAL_II_WITH_MPI
+                             Epetra_MpiComm(communicator));
+#else
+                             Epetra_SerialComm());
+        (void)communicator;
+#endif
+        vector.reset (new Epetra_FEVector(map));
+      }
+    else if (omit_zeroing_entries == false)
+      {
+        int ierr;
+        ierr = vector->GlobalAssemble(last_action);
+        (void)ierr;
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+
+        ierr = vector->PutScalar(0.0);
+        Assert (ierr == 0, ExcTrilinosError(ierr));
+      }
+
+    last_action = Zero;
+  }
+
+
+
+  void
+  Vector::reinit (const VectorBase &v,
+                  const bool        omit_zeroing_entries,
+                  const bool        allow_different_maps)
+  {
+    // In case we do not allow to
+    // have different maps, this
+    // call means that we have to
+    // reset the vector. So clear
+    // the vector, initialize our
+    // map with the map in v, and
+    // generate the vector.
+    (void)omit_zeroing_entries;
+    if (allow_different_maps == false)
+      {
+        if (local_range() != v.local_range())
+          {
+            Epetra_LocalMap map (global_length(*(v.vector)),
+                                 v.vector->Map().IndexBase(),
+                                 v.vector->Comm());
+            vector.reset (new Epetra_FEVector(map));
+          }
+        else
+          {
+            int ierr;
+            Assert (vector->Map().SameAs(v.vector->Map()) == true,
+                    ExcMessage ("The Epetra maps in the assignment operator ="
+                                " do not match, even though the local_range "
+                                " seems to be the same. Check vector setup!"));
+
+            ierr = vector->GlobalAssemble(last_action);
+            (void)ierr;
+            Assert (ierr == 0, ExcTrilinosError(ierr));
+
+            ierr = vector->PutScalar(0.0);
+            Assert (ierr == 0, ExcTrilinosError(ierr));
+          }
+        last_action = Zero;
+      }
+
+    // Otherwise, we have to check
+    // that the two vectors are
+    // already of the same size,
+    // create an object for the data
+    // exchange and then insert all
+    // the data.
+    else
+      {
+        Assert (omit_zeroing_entries == false,
+                ExcMessage ("It is not possible to exchange data with the "
+                            "option 'omit_zeroing_entries' set, which would not write "
+                            "elements."));
+
+        AssertThrow (size() == v.size(),
+                     ExcDimensionMismatch (size(), v.size()));
+
+        Epetra_Import data_exchange (vector->Map(), v.vector->Map());
+
+        const int ierr = vector->Import(*v.vector, data_exchange, Insert);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Insert;
+      }
+
+  }
+
+
+
+  Vector &
+  Vector::operator = (const MPI::Vector &v)
+  {
+    if (size() != v.size())
+      {
+        Epetra_LocalMap map (n_global_elements(v.vector->Map()),
+                             v.vector->Map().IndexBase(),
+                             v.vector->Comm());
+        vector.reset (new Epetra_FEVector(map));
+      }
+
+    reinit (v, false, true);
+    return *this;
+  }
+
+
+
+  Vector &
+  Vector::operator = (const Vector &v)
+  {
+    if (size() != v.size())
+      {
+        Epetra_LocalMap map (n_global_elements(v.vector->Map()),
+                             v.vector->Map().IndexBase(),
+                             v.vector->Comm());
+        vector.reset (new Epetra_FEVector(map));
+      }
+
+    const int ierr = vector->Update(1.0, *v.vector, 0.0);
+    Assert (ierr == 0, ExcTrilinosError(ierr));
+    (void)ierr;
+
+    return *this;
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_vector_base.cc b/source/lac/trilinos_vector_base.cc
new file mode 100644
index 0000000..d90e9c6
--- /dev/null
+++ b/source/lac/trilinos_vector_base.cc
@@ -0,0 +1,552 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/trilinos_vector_base.h>
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+#  include <cmath>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+#  include <Epetra_Import.h>
+#  include <Epetra_Export.h>
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace
+  {
+#ifndef DEAL_II_WITH_64BIT_INDICES
+    // define a helper function that queries the global vector length of an
+    // Epetra_FEVector object  by calling either the 32- or 64-bit
+    // function necessary.
+    int global_length(const Epetra_FEVector &vector)
+    {
+      return vector.GlobalLength();
+    }
+#else
+    // define a helper function that queries the global vector length of an
+    // Epetra_FEVector object  by calling either the 32- or 64-bit
+    // function necessary.
+    long long int global_length(const Epetra_FEVector &vector)
+    {
+      return vector.GlobalLength64();
+    }
+#endif
+  }
+
+  namespace internal
+  {
+    VectorReference::operator TrilinosScalar () const
+    {
+      Assert (index < vector.size(),
+              ExcIndexRange (index, 0, vector.size()));
+
+      // Trilinos allows for vectors to be referenced by the [] or ()
+      // operators but only () checks index bounds. We check these bounds by
+      // ourselves, so we can use []. Note that we can only get local values.
+
+      const TrilinosWrappers::types::int_type local_index =
+        vector.vector->Map().LID(static_cast<TrilinosWrappers::types::int_type>(index));
+      Assert (local_index >= 0,
+              VectorBase::ExcAccessToNonLocalElement (index, vector.local_size(),
+                                                      vector.vector->Map().MinMyGID(),
+                                                      vector.vector->Map().MaxMyGID()));
+
+
+      return (*(vector.vector))[0][local_index];
+    }
+  }
+
+
+
+  VectorBase::VectorBase ()
+    :
+    last_action (Zero),
+    compressed  (true),
+    has_ghosts  (false),
+#ifdef DEAL_II_WITH_MPI
+    vector(new Epetra_FEVector(
+             Epetra_Map(0,0,Epetra_MpiComm(MPI_COMM_SELF))))
+#else
+    vector(new Epetra_FEVector(
+             Epetra_Map(0,0,Epetra_SerialComm())))
+#endif
+  {}
+
+
+
+  VectorBase::VectorBase (const VectorBase &v)
+    :
+    Subscriptor(),
+    last_action (Zero),
+    compressed (true),
+    has_ghosts  (v.has_ghosts),
+    vector(new Epetra_FEVector(*v.vector))
+  {}
+
+
+
+  VectorBase::~VectorBase ()
+  {}
+
+
+
+  void
+  VectorBase::clear ()
+  {
+    // When we clear the vector, reset the pointer and generate an empty
+    // vector.
+#ifdef DEAL_II_WITH_MPI
+    Epetra_Map map (0, 0, Epetra_MpiComm(MPI_COMM_SELF));
+#else
+    Epetra_Map map (0, 0, Epetra_SerialComm());
+#endif
+
+    has_ghosts = false;
+    vector.reset (new Epetra_FEVector(map));
+    last_action = Zero;
+  }
+
+
+
+  VectorBase &
+  VectorBase::operator = (const VectorBase &v)
+  {
+    Assert (vector.get() != 0,
+            ExcMessage("Vector is not constructed properly."));
+
+    if (local_range() != v.local_range())
+      {
+        last_action = Zero;
+        vector.reset (new Epetra_FEVector(*v.vector));
+        has_ghosts = v.has_ghosts;
+      }
+    else
+      {
+        Assert (vector->Map().SameAs(v.vector->Map()) == true,
+                ExcMessage ("The Epetra maps in the assignment operator ="
+                            " do not match, even though the local_range "
+                            " seems to be the same. Check vector setup!"));
+        int ierr;
+        ierr = vector->GlobalAssemble(last_action);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        ierr = vector->Update(1.0, *v.vector, 0.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Zero;
+      }
+
+    return *this;
+  }
+
+
+
+  template <typename number>
+  VectorBase &
+  VectorBase::operator = (const ::dealii::Vector<number> &v)
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+
+    // this is probably not very efficient
+    // but works. in particular, we could do
+    // better if we know that
+    // number==TrilinosScalar because then we
+    // could elide the copying of elements
+    //
+    // let's hope this isn't a
+    // particularly frequent operation
+    std::pair<size_type, size_type>
+    local_range = this->local_range ();
+    for (size_type i=local_range.first; i<local_range.second; ++i)
+      (*vector)[0][i-local_range.first] = v(i);
+
+    return *this;
+  }
+
+
+
+  void
+  VectorBase::compress (::dealii::VectorOperation::values given_last_action)
+  {
+    //Select which mode to send to Trilinos. Note that we use last_action if
+    //available and ignore what the user tells us to detect wrongly mixed
+    //operations. Typically given_last_action is only used on machines that do
+    //not execute an operation (because they have no own cells for example).
+    Epetra_CombineMode mode = last_action;
+    if (last_action == Zero)
+      {
+        if (given_last_action==::dealii::VectorOperation::add)
+          mode = Add;
+        else if (given_last_action==::dealii::VectorOperation::insert)
+          mode = Insert;
+      }
+
+#ifdef DEBUG
+#  ifdef DEAL_II_WITH_MPI
+    // check that every process has decided to use the same mode. This will
+    // otherwise result in undefined behaviour in the call to
+    // GlobalAssemble().
+    double double_mode = mode;
+    Utilities::MPI::MinMaxAvg result
+      = Utilities::MPI::min_max_avg (double_mode,
+                                     dynamic_cast<const Epetra_MpiComm *>
+                                     (&vector_partitioner().Comm())->GetMpiComm());
+    Assert(result.max-result.min<1e-5,
+           ExcMessage ("Not all processors agree whether the last operation on "
+                       "this vector was an addition or a set operation. This will "
+                       "prevent the compress() operation from succeeding."));
+
+#  endif
+#endif
+
+    // Now pass over the information about what we did last to the vector.
+    int ierr = 0;
+    if (nonlocal_vector.get() == 0 || mode != Add)
+      ierr = vector->GlobalAssemble(mode);
+    else
+      {
+        Epetra_Export exporter(nonlocal_vector->Map(), vector->Map());
+        ierr = vector->Export(*nonlocal_vector, exporter, mode);
+        nonlocal_vector->PutScalar(0.);
+      }
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    last_action = Zero;
+
+    compressed = true;
+  }
+
+
+
+  TrilinosScalar
+  VectorBase::el (const size_type index) const
+  {
+    // Extract local indices in the vector.
+    TrilinosWrappers::types::int_type trilinos_i =
+      vector->Map().LID(static_cast<TrilinosWrappers::types::int_type>(index));
+
+    // If the element is not present on the current processor, we can't
+    // continue. Just print out 0 as opposed to the () method below.
+    if (trilinos_i == -1)
+      return 0.;
+    else
+      return (*vector)[0][trilinos_i];
+  }
+
+
+
+  TrilinosScalar
+  VectorBase::operator () (const size_type index) const
+  {
+    // Extract local indices in the vector.
+    TrilinosWrappers::types::int_type trilinos_i =
+      vector->Map().LID(static_cast<TrilinosWrappers::types::int_type>(index));
+    TrilinosScalar value = 0.;
+
+    // If the element is not present on the current processor, we can't
+    // continue. This is the main difference to the el() function.
+    if (trilinos_i == -1)
+      {
+        Assert (false, ExcAccessToNonLocalElement(index, local_size(),
+                                                  vector->Map().MinMyGID(),
+                                                  vector->Map().MaxMyGID()));
+      }
+    else
+      value = (*vector)[0][trilinos_i];
+
+    return value;
+  }
+
+
+
+  void
+  VectorBase::add (const VectorBase &v,
+                   const bool        allow_different_maps)
+  {
+    if (allow_different_maps == false)
+      *this += v;
+    else
+      {
+        Assert (!has_ghost_elements(), ExcGhostsPresent());
+        AssertThrow (size() == v.size(),
+                     ExcDimensionMismatch (size(), v.size()));
+
+#if DEAL_II_TRILINOS_VERSION_GTE(11,11,0)
+        Epetra_Import data_exchange (vector->Map(), v.vector->Map());
+        int ierr = vector->Import(*v.vector, data_exchange, Epetra_AddLocalAlso);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+        last_action = Add;
+#else
+        // In versions older than 11.11 the Import function is broken for adding
+        // Hence, we provide a workaround in this case
+
+        Epetra_MultiVector dummy(vector->Map(), 1, false);
+        Epetra_Import data_exchange (dummy.Map(), v.vector->Map());
+
+        int ierr = dummy.Import(*v.vector, data_exchange, Insert);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        ierr = vector->Update (1.0, dummy, 1.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+#endif
+      }
+  }
+
+
+
+  bool
+  VectorBase::operator == (const VectorBase &v) const
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+    if (local_size() != v.local_size())
+      return false;
+
+    size_type i;
+    for (i=0; i<local_size(); i++)
+      if ((*(v.vector))[0][i]!=(*vector)[0][i]) return false;
+
+    return true;
+  }
+
+
+
+  bool
+  VectorBase::operator != (const VectorBase &v) const
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+
+    return (!(*this==v));
+  }
+
+
+
+  bool
+  VectorBase::all_zero () const
+  {
+    // get a representation of the vector and
+    // loop over all the elements
+    TrilinosScalar *start_ptr = (*vector)[0];
+    const TrilinosScalar *ptr  = start_ptr,
+                          *eptr = start_ptr + local_size();
+    unsigned int flag = 0;
+    while (ptr != eptr)
+      {
+        if (*ptr != 0)
+          {
+            flag = 1;
+            break;
+          }
+        ++ptr;
+      }
+
+#ifdef DEAL_II_WITH_MPI
+    // in parallel, check that the vector
+    // is zero on _all_ processors.
+    const Epetra_MpiComm *mpi_comm
+      = dynamic_cast<const Epetra_MpiComm *>(&vector->Map().Comm());
+    unsigned int num_nonzero = Utilities::MPI::sum(flag, mpi_comm->Comm());
+    return num_nonzero == 0;
+#else
+    return flag == 0;
+#endif
+
+  }
+
+
+
+  bool
+  VectorBase::is_non_negative () const
+  {
+#ifdef DEAL_II_WITH_MPI
+    // if this vector is a parallel one, then
+    // we need to communicate to determine
+    // the answer to the current
+    // function. this still has to be
+    // implemented
+    AssertThrow(local_size() == size(), ExcNotImplemented());
+#endif
+    // get a representation of the vector and
+    // loop over all the elements
+    TrilinosScalar *start_ptr;
+    int leading_dimension;
+    int ierr = vector->ExtractView (&start_ptr, &leading_dimension);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    // TODO: This
+    // won't work in parallel like
+    // this. Find out a better way to
+    // this in that case.
+    const TrilinosScalar *ptr  = start_ptr,
+                          *eptr = start_ptr + size();
+    bool flag = true;
+    while (ptr != eptr)
+      {
+        if (*ptr < 0.0)
+          {
+            flag = false;
+            break;
+          }
+        ++ptr;
+      }
+
+    return flag;
+  }
+
+
+
+  void
+  VectorBase::equ (const TrilinosScalar  a,
+                   const VectorBase     &v,
+                   const TrilinosScalar  b,
+                   const VectorBase     &w)
+  {
+    // if we have ghost values, do not allow
+    // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (v.local_size() == w.local_size(),
+            ExcDimensionMismatch (v.local_size(), w.local_size()));
+
+    AssertIsFinite(a);
+    AssertIsFinite(b);
+
+    // If we don't have the same map, copy.
+    if (vector->Map().SameAs(v.vector->Map())==false)
+      {
+        sadd(0., a, v, b, w);
+      }
+    else
+      {
+        // Otherwise, just update. verify
+        // that *this does not only have
+        // the same map as v (the
+        // if-condition above) but also as
+        // w
+        Assert (vector->Map().SameAs(w.vector->Map()),
+                ExcDifferentParallelPartitioning());
+        int ierr = vector->Update(a, *v.vector, b, *w.vector, 0.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Zero;
+      }
+  }
+
+
+
+  // TODO: up to now only local
+  // data printed out! Find a
+  // way to neatly output
+  // distributed data...
+  void
+  VectorBase::print (const char *format) const
+  {
+    Assert (global_length(*vector)!=0, ExcEmptyObject());
+    (void)global_length;
+
+    for (size_type j=0; j<size(); ++j)
+      {
+        double t = (*vector)[0][j];
+
+        if (format != 0)
+          std::printf (format, t);
+        else
+          std::printf (" %5.2f", double(t));
+      }
+    std::printf ("\n");
+  }
+
+
+
+  void
+  VectorBase::print (std::ostream      &out,
+                     const unsigned int precision,
+                     const bool         scientific,
+                     const bool         across) const
+  {
+    AssertThrow (out, ExcIO());
+
+    // get a representation of the
+    // vector and loop over all
+    // the elements TODO: up to
+    // now only local data printed
+    // out! Find a way to neatly
+    // output distributed data...
+    TrilinosScalar *val;
+    int leading_dimension;
+    int ierr = vector->ExtractView (&val, &leading_dimension);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    out.precision (precision);
+    if (scientific)
+      out.setf (std::ios::scientific, std::ios::floatfield);
+    else
+      out.setf (std::ios::fixed, std::ios::floatfield);
+
+    if (across)
+      for (size_type i=0; i<size(); ++i)
+        out << static_cast<double>(val[i]) << ' ';
+    else
+      for (size_type i=0; i<size(); ++i)
+        out << static_cast<double>(val[i]) << std::endl;
+    out << std::endl;
+
+    // restore the representation
+    // of the vector
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  void
+  VectorBase::swap (VectorBase &v)
+  {
+    std::swap(last_action, v.last_action);
+    std::swap(compressed, v.compressed);
+    std::swap(vector, v.vector);
+  }
+
+
+
+  std::size_t
+  VectorBase::memory_consumption () const
+  {
+    //TODO[TH]: No accurate memory
+    //consumption for Trilinos vectors
+    //yet. This is a rough approximation with
+    //one index and the value per local
+    //entry.
+    return sizeof(*this)
+           + this->local_size()*( sizeof(double)+
+                                  sizeof(TrilinosWrappers::types::int_type) );
+  }
+
+} /* end of namespace TrilinosWrappers */
+
+
+namespace TrilinosWrappers
+{
+#include "trilinos_vector_base.inst"
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_WITH_TRILINOS
diff --git a/source/lac/trilinos_vector_base.inst.in b/source/lac/trilinos_vector_base.inst.in
new file mode 100644
index 0000000..55898b4
--- /dev/null
+++ b/source/lac/trilinos_vector_base.inst.in
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (SCALAR : REAL_SCALARS)
+  {
+    template
+      VectorBase &
+      VectorBase::operator = (const ::dealii::Vector<SCALAR> &v);
+  }
diff --git a/source/lac/vector.cc b/source/lac/vector.cc
new file mode 100644
index 0000000..9da88f6
--- /dev/null
+++ b/source/lac/vector.cc
@@ -0,0 +1,98 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/vector.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "vector.inst"
+
+// instantiate for integers:
+template class Vector<int>;
+namespace internal
+{
+  namespace Vector
+  {
+    template void copy_vector<int,double> (const dealii::Vector<int> &,
+                                           dealii::Vector<double> &);
+
+    template void copy_vector<int,int> (const dealii::Vector<int> &,
+                                        dealii::Vector<int> &);
+  }
+}
+
+template
+void Vector<int>::reinit<double>(const Vector<double> &, const bool);
+
+
+// do a few functions that currently don't fit the scheme because they have
+// two template arguments that need to be different (the case of same
+// arguments is covered by the default copy constructor and copy operator that
+// is declared separately)
+
+#define TEMPL_COPY_CONSTRUCTOR(S1,S2)                   \
+  template Vector<S1>::Vector (const Vector<S2> &)
+
+#ifndef DEAL_II_EXPLICIT_CONSTRUCTOR_BUG
+TEMPL_COPY_CONSTRUCTOR(double,float);
+TEMPL_COPY_CONSTRUCTOR(float,double);
+
+TEMPL_COPY_CONSTRUCTOR(long double,double);
+TEMPL_COPY_CONSTRUCTOR(double,long double);
+
+TEMPL_COPY_CONSTRUCTOR(long double,float);
+TEMPL_COPY_CONSTRUCTOR(float,long double);
+
+
+TEMPL_COPY_CONSTRUCTOR(std::complex<double>,std::complex<float>);
+TEMPL_COPY_CONSTRUCTOR(std::complex<float>,std::complex<double>);
+
+TEMPL_COPY_CONSTRUCTOR(std::complex<long double>,std::complex<double>);
+TEMPL_COPY_CONSTRUCTOR(std::complex<double>,std::complex<long double>);
+
+TEMPL_COPY_CONSTRUCTOR(std::complex<long double>,std::complex<float>);
+TEMPL_COPY_CONSTRUCTOR(std::complex<float>,std::complex<long double>);
+
+#endif
+
+#undef TEMPL_COPY_CONSTRUCTOR
+
+
+#define TEMPL_OP_EQ(S1,S2) \
+  template void Vector<S1>::scale (const Vector<S2>&);  \
+  template void Vector<S1>::equ (const S1, const Vector<S2>&)
+
+TEMPL_OP_EQ(double,float);
+TEMPL_OP_EQ(float,double);
+
+TEMPL_OP_EQ(long double,double);
+TEMPL_OP_EQ(double,long double);
+
+TEMPL_OP_EQ(long double,float);
+TEMPL_OP_EQ(float,long double);
+
+
+TEMPL_OP_EQ(std::complex<double>,std::complex<float>);
+TEMPL_OP_EQ(std::complex<float>,std::complex<double>);
+
+TEMPL_OP_EQ(std::complex<long double>,std::complex<double>);
+TEMPL_OP_EQ(std::complex<double>,std::complex<long double>);
+
+TEMPL_OP_EQ(std::complex<long double>,std::complex<float>);
+TEMPL_OP_EQ(std::complex<float>,std::complex<long double>);
+
+#undef TEMPL_OP_EQ
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/vector.inst.in b/source/lac/vector.inst.in
new file mode 100644
index 0000000..4a7b118
--- /dev/null
+++ b/source/lac/vector.inst.in
@@ -0,0 +1,79 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (SCALAR : REAL_SCALARS)
+  {
+    template class Vector<SCALAR>;
+  }
+
+for (S1, S2 : REAL_SCALARS)
+  {
+    namespace internal
+    \{
+      namespace Vector
+      \{
+      template void copy_vector<S1,S2> (const dealii::Vector<S1>&,
+                                        dealii::Vector<S2>&);
+      \}
+    \}
+    template
+      bool
+      Vector<S1>::operator==<S2>(const Vector<S2>&) const;
+    template
+      S1
+      Vector<S1>::operator*<S2>(const Vector<S2>&) const;
+    template
+      void Vector<S1>::reinit<S2>(const Vector<S2>&, const bool);
+  }
+
+
+
+for (SCALAR : COMPLEX_SCALARS)
+  {
+    template class Vector<SCALAR>;
+  }
+
+for (S1, S2 : COMPLEX_SCALARS)
+  {
+    namespace internal
+    \{
+      namespace Vector
+      \{
+      template void copy_vector<S1,S2> (const dealii::Vector<S1>&,
+                                        dealii::Vector<S2>&);
+      \}
+    \}
+    template
+      bool
+      Vector<S1>::operator==<S2>(const Vector<S2>&) const;
+    template
+      S1
+      Vector<S1>::operator*<S2>(const Vector<S2>&) const;
+    template
+      void Vector<S1>::reinit<S2>(const Vector<S2>&, const bool);
+  }
+
+for (S1: REAL_SCALARS; S2: COMPLEX_SCALARS)
+  {
+    namespace internal
+    \{
+      namespace Vector
+      \{
+      template void copy_vector<S1,S2> (const dealii::Vector<S1>&,
+                                        dealii::Vector<S2>&);
+      \}
+    \}
diff --git a/source/lac/vector_memory.cc b/source/lac/vector_memory.cc
new file mode 100644
index 0000000..9e38ad9
--- /dev/null
+++ b/source/lac/vector_memory.cc
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#include <deal.II/lac/vector_memory.templates.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "vector_memory.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/vector_memory.inst.in b/source/lac/vector_memory.inst.in
new file mode 100644
index 0000000..37fc030
--- /dev/null
+++ b/source/lac/vector_memory.inst.in
@@ -0,0 +1,31 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (VECTOR : SERIAL_VECTORS)
+  {
+    template class VectorMemory<VECTOR>;
+    template class GrowingVectorMemory<VECTOR>;
+  }
+
+ for (SCALAR : COMPLEX_SCALARS)
+  {
+    template class VectorMemory<Vector<SCALAR> >;
+    template class GrowingVectorMemory<Vector<SCALAR> >;
+
+    template class VectorMemory<BlockVector<SCALAR> >;
+    template class GrowingVectorMemory<BlockVector<SCALAR> >;
+  }
diff --git a/source/lac/vector_view.cc b/source/lac/vector_view.cc
new file mode 100644
index 0000000..b69d661
--- /dev/null
+++ b/source/lac/vector_view.cc
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/vector_view.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "vector_view.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/lac/vector_view.inst.in b/source/lac/vector_view.inst.in
new file mode 100644
index 0000000..b7f376b
--- /dev/null
+++ b/source/lac/vector_view.inst.in
@@ -0,0 +1,26 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (SCALAR : REAL_SCALARS)
+  {
+    template class VectorView<SCALAR>;
+  }
+
+for (SCALAR : COMPLEX_SCALARS)
+  {
+    template class VectorView<SCALAR>;
+  }
diff --git a/source/matrix_free/CMakeLists.txt b/source/matrix_free/CMakeLists.txt
new file mode 100644
index 0000000..1558ab4
--- /dev/null
+++ b/source/matrix_free/CMakeLists.txt
@@ -0,0 +1,31 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  matrix_free.cc
+  )
+
+SET(_inst
+  matrix_free.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/matrix_free/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_matrix_free OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_matrix_free "${_inst}")
diff --git a/source/matrix_free/matrix_free.cc b/source/matrix_free/matrix_free.cc
new file mode 100644
index 0000000..21622d8
--- /dev/null
+++ b/source/matrix_free/matrix_free.cc
@@ -0,0 +1,31 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2011 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/matrix_free/matrix_free.templates.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/conditional_ostream.h>
+
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "matrix_free.inst"
+
+template struct internal::MatrixFreeFunctions::ShapeInfo<double>;
+template struct internal::MatrixFreeFunctions::ShapeInfo<float>;
+template struct internal::MatrixFreeFunctions::ShapeInfo<long double>;
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/matrix_free/matrix_free.inst.in b/source/matrix_free/matrix_free.inst.in
new file mode 100644
index 0000000..6e1115a
--- /dev/null
+++ b/source/matrix_free/matrix_free.inst.in
@@ -0,0 +1,46 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  template class MatrixFree<deal_II_dimension,double>;
+  template class MatrixFree<deal_II_dimension,float>;
+
+  template void MatrixFree<deal_II_dimension,double>::
+  print_memory_consumption<std::ostream> (std::ostream &) const;
+  template void MatrixFree<deal_II_dimension,double>::
+  print_memory_consumption<ConditionalOStream> (ConditionalOStream &) const;
+
+  template void MatrixFree<deal_II_dimension,float>::
+  print_memory_consumption<std::ostream> (std::ostream &) const;
+  template void MatrixFree<deal_II_dimension,float>::
+  print_memory_consumption<ConditionalOStream> (ConditionalOStream &) const;
+
+  template struct internal::MatrixFreeFunctions::MappingInfo<deal_II_dimension,double>;
+  template struct internal::MatrixFreeFunctions::MappingInfo<deal_II_dimension,float>;
+
+#ifndef DEAL_II_MSVC
+  template void internal::MatrixFreeFunctions::ShapeInfo<double>::reinit
+  <deal_II_dimension>(const Quadrature<1> &, const FiniteElement
+                      <deal_II_dimension,deal_II_dimension> &, const unsigned int);
+  template void internal::MatrixFreeFunctions::ShapeInfo<float>::reinit
+  <deal_II_dimension>(const Quadrature<1> &, const FiniteElement
+                      <deal_II_dimension,deal_II_dimension> &, const unsigned int);
+  template void internal::MatrixFreeFunctions::ShapeInfo<long double>::reinit
+  <deal_II_dimension>(const Quadrature<1> &, const FiniteElement
+                      <deal_II_dimension,deal_II_dimension> &, const unsigned int);
+#endif
+}
diff --git a/source/meshworker/CMakeLists.txt b/source/meshworker/CMakeLists.txt
new file mode 100644
index 0000000..dbea07c
--- /dev/null
+++ b/source/meshworker/CMakeLists.txt
@@ -0,0 +1,34 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  mesh_worker.cc
+  mesh_worker_info.cc
+  mesh_worker_vector_selector.cc
+  )
+
+SET(_inst
+  mesh_worker_info.inst.in
+  mesh_worker_vector_selector.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/meshworker/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_meshworker OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_meshworker "${_inst}")
diff --git a/source/meshworker/mesh_worker.cc b/source/meshworker/mesh_worker.cc
new file mode 100644
index 0000000..5b50bc2
--- /dev/null
+++ b/source/meshworker/mesh_worker.cc
@@ -0,0 +1,123 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2006 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/meshworker/local_results.h>
+#include <deal.II/meshworker/local_integrator.h>
+#include <deal.II/lac/block_indices.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MeshWorker
+{
+  template <typename number>
+  void
+  LocalResults<number>::reinit(const BlockIndices &bi)
+  {
+    for (unsigned int i=0; i<J.size(); ++i)
+      J[i] = 0.;
+    for (unsigned int i=0; i<R.size(); ++i)
+      R[i].reinit(bi);
+    for (unsigned int i=0; i<M1.size(); ++i)
+      M1[i].matrix.reinit(bi.block_size(M1[i].row),
+                          bi.block_size(M1[i].column));
+    for (unsigned int i=0; i<M2.size(); ++i)
+      M2[i].matrix.reinit(bi.block_size(M2[i].row),
+                          bi.block_size(M2[i].column));
+    quadrature_data.reset_values();
+  }
+
+
+  template <typename number>
+  std::size_t
+  LocalResults<number>::memory_consumption () const
+  {
+    std::size_t mem = sizeof(*this)
+                      + MemoryConsumption::memory_consumption(J)
+                      + MemoryConsumption::memory_consumption(R)
+                      + MemoryConsumption::memory_consumption(M1)
+                      + MemoryConsumption::memory_consumption(M2)
+                      + MemoryConsumption::memory_consumption(quadrature_data);
+    return mem;
+  }
+
+
+  template class LocalResults<float>;
+  template class LocalResults<double>;
+  template class LocalResults<long double>;
+
+  template <int dim, int spacedim, typename number>
+  LocalIntegrator<dim, spacedim, number>::LocalIntegrator ()
+    :
+    use_cell(true), use_boundary(true), use_face(true)
+  {}
+
+
+  template <int dim, int spacedim, typename number>
+  LocalIntegrator<dim, spacedim, number>::LocalIntegrator (bool c, bool b, bool f)
+    :
+    use_cell(c), use_boundary(b), use_face(f)
+  {}
+
+
+  template <int dim, int spacedim, typename number>
+  LocalIntegrator<dim, spacedim, number>::~LocalIntegrator ()
+  {}
+
+  template <int dim, int spacedim, typename number>
+  void
+  LocalIntegrator<dim, spacedim, number>::cell (DoFInfo<dim, spacedim, number> &,
+                                                IntegrationInfo<dim, spacedim> &) const
+  {
+    Assert(false, ExcPureFunction());
+  }
+
+
+  template <int dim, int spacedim, typename number>
+  void
+  LocalIntegrator<dim, spacedim, number>::boundary (DoFInfo<dim, spacedim, number> &,
+                                                    IntegrationInfo<dim, spacedim> &) const
+  {
+    Assert(false, ExcPureFunction());
+  }
+
+
+  template <int dim, int spacedim, typename number>
+  void
+  LocalIntegrator<dim, spacedim, number>::face (DoFInfo<dim, spacedim, number> &,
+                                                DoFInfo<dim, spacedim, number> &,
+                                                IntegrationInfo<dim, spacedim> &,
+                                                IntegrationInfo<dim, spacedim> &) const
+  {
+    Assert(false, ExcPureFunction());
+  }
+
+
+  template class LocalIntegrator<1,1,float>;
+  template class LocalIntegrator<1,1,double>;
+  template class LocalIntegrator<1,2,float>;
+  template class LocalIntegrator<1,2,double>;
+  template class LocalIntegrator<1,3,float>;
+  template class LocalIntegrator<1,3,double>;
+  template class LocalIntegrator<2,2,float>;
+  template class LocalIntegrator<2,2,double>;
+  template class LocalIntegrator<2,3,float>;
+  template class LocalIntegrator<2,3,double>;
+  template class LocalIntegrator<3,3,float>;
+  template class LocalIntegrator<3,3,double>;
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/meshworker/mesh_worker_info.cc b/source/meshworker/mesh_worker_info.cc
new file mode 100644
index 0000000..288946d
--- /dev/null
+++ b/source/meshworker/mesh_worker_info.cc
@@ -0,0 +1,34 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#include <deal.II/meshworker/dof_info.templates.h>
+#include <deal.II/meshworker/integration_info.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// explicit instantiations
+#include "mesh_worker_info.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
+
diff --git a/source/meshworker/mesh_worker_info.inst.in b/source/meshworker/mesh_worker_info.inst.in
new file mode 100644
index 0000000..2cf38c8
--- /dev/null
+++ b/source/meshworker/mesh_worker_info.inst.in
@@ -0,0 +1,58 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+namespace MeshWorker
+\{
+  template class IntegrationInfo<deal_II_dimension, deal_II_dimension>;
+  template class IntegrationInfoBox<deal_II_dimension, deal_II_dimension>;
+
+  template class DoFInfo<deal_II_dimension,deal_II_dimension,float>;
+  template class DoFInfoBox<deal_II_dimension,
+                            DoFInfo<deal_II_dimension,deal_II_dimension,float> >;
+
+  template void IntegrationInfo<deal_II_dimension>::fill_local_data(
+    const DoFInfo<deal_II_dimension, deal_II_dimension, float>&, bool);
+
+  template class DoFInfo<deal_II_dimension,deal_II_dimension,double>;
+  template class DoFInfoBox<deal_II_dimension,
+                            DoFInfo<deal_II_dimension,deal_II_dimension,double> >;
+
+  template void IntegrationInfo<deal_II_dimension>::fill_local_data(
+    const DoFInfo<deal_II_dimension, deal_II_dimension, double>&, bool);
+
+  template class DoFInfo<deal_II_dimension,deal_II_dimension,long double>;
+  template class DoFInfoBox<deal_II_dimension,
+                            DoFInfo<deal_II_dimension,deal_II_dimension,long double> >;
+
+  template void IntegrationInfo<deal_II_dimension>::fill_local_data(
+    const DoFInfo<deal_II_dimension, deal_II_dimension, long double>&, bool);
+
+//   template void IntegrationInfo<deal_II_dimension>
+//   ::initialize<FEValues<deal_II_dimension> >(
+//     const FiniteElement<deal_II_dimension>&, const Mapping<deal_II_dimension>&,
+//     const Quadrature<FEValues<deal_II_dimension>::integral_dimension>&, const UpdateFlags, const BlockInfo*);
+//   template void IntegrationInfo<deal_II_dimension>
+//   ::initialize<FEFaceValues<deal_II_dimension> >(
+//     const FiniteElement<deal_II_dimension>&, const Mapping<deal_II_dimension>&,
+//     const Quadrature<FEFaceValues<deal_II_dimension>::integral_dimension>&, const UpdateFlags, const BlockInfo*);
+//   template void IntegrationInfo<deal_II_dimension>
+//   ::initialize<FESubfaceValues<deal_II_dimension> >(
+//     const FiniteElement<deal_II_dimension>&, const Mapping<deal_II_dimension>&,
+//     const Quadrature<FESubfaceValues<deal_II_dimension>::integral_dimension>&, const UpdateFlags, const BlockInfo*);
+\}
+}
diff --git a/source/meshworker/mesh_worker_vector_selector.cc b/source/meshworker/mesh_worker_vector_selector.cc
new file mode 100644
index 0000000..b73025c
--- /dev/null
+++ b/source/meshworker/mesh_worker_vector_selector.cc
@@ -0,0 +1,33 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#include <deal.II/meshworker/vector_selector.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#include "mesh_worker_vector_selector.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/meshworker/mesh_worker_vector_selector.inst.in b/source/meshworker/mesh_worker_vector_selector.inst.in
new file mode 100644
index 0000000..7804841
--- /dev/null
+++ b/source/meshworker/mesh_worker_vector_selector.inst.in
@@ -0,0 +1,35 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  namespace MeshWorker
+  \{
+    template class VectorDataBase<deal_II_dimension>;
+  \}
+}
+
+for (VECTOR : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{
+  namespace MeshWorker
+  \{
+    template class VectorData<VECTOR,deal_II_dimension>;
+    template class VectorData<const VECTOR,deal_II_dimension>;
+    template class MGVectorData<VECTOR,deal_II_dimension>;
+    template class MGVectorData<const VECTOR,deal_II_dimension>;
+  \}
+}
diff --git a/source/multigrid/CMakeLists.txt b/source/multigrid/CMakeLists.txt
new file mode 100644
index 0000000..2c3a298
--- /dev/null
+++ b/source/multigrid/CMakeLists.txt
@@ -0,0 +1,45 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2016 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  mg_base.cc
+  mg_level_global_transfer.cc
+  mg_tools.cc
+  mg_transfer_block.cc
+  mg_transfer_component.cc
+  mg_transfer_matrix_free.cc
+  mg_transfer_prebuilt.cc
+  multigrid.cc
+  )
+
+SET(_inst
+  mg_base.inst.in
+  mg_level_global_transfer.inst.in
+  mg_tools.inst.in
+  mg_transfer_block.inst.in
+  mg_transfer_component.inst.in
+  mg_transfer_matrix_free.inst.in
+  mg_transfer_prebuilt.inst.in
+  multigrid.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/multigrid/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_multigrid OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_multigrid "${_inst}")
diff --git a/source/multigrid/mg_base.cc b/source/multigrid/mg_base.cc
new file mode 100644
index 0000000..f2dd74d
--- /dev/null
+++ b/source/multigrid/mg_base.cc
@@ -0,0 +1,54 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/multigrid/mg_base.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename VectorType>
+MGTransferBase<VectorType>::~MGTransferBase()
+{}
+
+
+template <typename VectorType>
+MGMatrixBase<VectorType>::~MGMatrixBase()
+{}
+
+
+template <typename VectorType>
+MGSmootherBase<VectorType>::~MGSmootherBase()
+{}
+
+
+template <typename VectorType>
+MGCoarseGridBase<VectorType>::~MGCoarseGridBase()
+{}
+
+
+// Explicit instantiations
+
+#include "mg_base.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/mg_base.inst.in b/source/multigrid/mg_base.inst.in
new file mode 100644
index 0000000..a40aedb
--- /dev/null
+++ b/source/multigrid/mg_base.inst.in
@@ -0,0 +1,23 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS)
+{
+  template class MGTransferBase< VEC >;
+  template class MGMatrixBase< VEC >;
+  template class MGSmootherBase< VEC >;
+  template class MGCoarseGridBase< VEC >;
+}
diff --git a/source/multigrid/mg_level_global_transfer.cc b/source/multigrid/mg_level_global_transfer.cc
new file mode 100644
index 0000000..936865a
--- /dev/null
+++ b/source/multigrid/mg_level_global_transfer.cc
@@ -0,0 +1,588 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_transfer.h>
+#include <deal.II/multigrid/mg_transfer.templates.h>
+
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  /**
+   * Internal data structure that is used in the MPI communication in fill_and_communicate_copy_indices().
+   * It represents an entry in the copy_indices* map, that associates a level dof index with a global dof index.
+   */
+  struct DoFPair
+  {
+    unsigned int level;
+    types::global_dof_index global_dof_index;
+    types::global_dof_index level_dof_index;
+
+    DoFPair(const unsigned int level,
+            const types::global_dof_index global_dof_index,
+            const types::global_dof_index level_dof_index)
+      :
+      level(level), global_dof_index(global_dof_index), level_dof_index(level_dof_index)
+    {}
+
+    DoFPair()
+    {}
+  };
+
+
+
+  /**
+   * Internal function for filling the copy indices from global to level indices
+   */
+  template <int dim, int spacedim>
+  void fill_copy_indices(const DoFHandler<dim,spacedim> &mg_dof,
+                         const MGConstrainedDoFs        *mg_constrained_dofs,
+                         std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > > &copy_indices,
+                         std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > > &copy_indices_global_mine,
+                         std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > > &copy_indices_level_mine)
+  {
+    // Now we are filling the variables copy_indices*, which are essentially
+    // maps from global to mgdof for each level stored as a std::vector of
+    // pairs. We need to split this map on each level depending on the ownership
+    // of the global and mgdof, so that we later not access non-local elements
+    // in copy_to/from_mg.
+    // We keep track in the bitfield dof_touched which global dof has
+    // been processed already (on the current level). This is the same as
+    // the multigrid running in serial.
+
+    // map cpu_index -> vector of data
+    // that will be copied into copy_indices_level_mine
+    std::vector<DoFPair> send_data_temp;
+
+    const unsigned int n_levels = mg_dof.get_triangulation().n_global_levels();
+    copy_indices.resize(n_levels);
+    copy_indices_global_mine.resize(n_levels);
+    copy_indices_level_mine.resize(n_levels);
+    IndexSet globally_relevant;
+    DoFTools::extract_locally_relevant_dofs(mg_dof, globally_relevant);
+
+    const unsigned int dofs_per_cell = mg_dof.get_fe().dofs_per_cell;
+    std::vector<types::global_dof_index> global_dof_indices (dofs_per_cell);
+    std::vector<types::global_dof_index> level_dof_indices  (dofs_per_cell);
+
+    for (unsigned int level=0; level<n_levels; ++level)
+      {
+        std::vector<bool> dof_touched(globally_relevant.n_elements(), false);
+        copy_indices[level].clear();
+        copy_indices_level_mine[level].clear();
+        copy_indices_global_mine[level].clear();
+
+        typename DoFHandler<dim,spacedim>::active_cell_iterator
+        level_cell = mg_dof.begin_active(level);
+        const typename DoFHandler<dim,spacedim>::active_cell_iterator
+        level_end  = mg_dof.end_active(level);
+
+        for (; level_cell!=level_end; ++level_cell)
+          {
+            if (mg_dof.get_triangulation().locally_owned_subdomain()!=numbers::invalid_subdomain_id
+                &&  (level_cell->level_subdomain_id()==numbers::artificial_subdomain_id
+                     ||  level_cell->subdomain_id()==numbers::artificial_subdomain_id)
+               )
+              continue;
+
+            // get the dof numbers of this cell for the global and the level-wise
+            // numbering
+            level_cell->get_dof_indices (global_dof_indices);
+            level_cell->get_mg_dof_indices (level_dof_indices);
+
+            for (unsigned int i=0; i<dofs_per_cell; ++i)
+              {
+                // we need to ignore if the DoF is on a refinement edge (hanging node)
+                if (mg_constrained_dofs != 0
+                    && mg_constrained_dofs->at_refinement_edge(level, level_dof_indices[i]))
+                  continue;
+                types::global_dof_index global_idx = globally_relevant.index_within_set(global_dof_indices[i]);
+                //skip if we did this global dof already (on this or a coarser level)
+                if (dof_touched[global_idx])
+                  continue;
+                bool global_mine = mg_dof.locally_owned_dofs().is_element(global_dof_indices[i]);
+                bool level_mine = mg_dof.locally_owned_mg_dofs(level).is_element(level_dof_indices[i]);
+
+
+                if (global_mine && level_mine)
+                  {
+                    copy_indices[level].push_back(
+                      std::make_pair (global_dof_indices[i], level_dof_indices[i]));
+                  }
+                else if (global_mine)
+                  {
+                    copy_indices_global_mine[level].push_back(
+                      std::make_pair (global_dof_indices[i], level_dof_indices[i]));
+
+                    //send this to the owner of the level_dof:
+                    send_data_temp.push_back(DoFPair(level, global_dof_indices[i], level_dof_indices[i]));
+                  }
+                else
+                  {
+                    // somebody will send those to me
+                  }
+
+                dof_touched[global_idx] = true;
+              }
+          }
+      }
+
+    const dealii::parallel::distributed::Triangulation<dim,spacedim> *tria =
+      (dynamic_cast<const parallel::distributed::Triangulation<dim,spacedim>*>
+       (&mg_dof.get_triangulation()));
+    AssertThrow(send_data_temp.size()==0 || tria!=NULL, ExcMessage("parallel Multigrid only works with a distributed Triangulation!"));
+
+#ifdef DEAL_II_WITH_MPI
+    if (tria)
+      {
+        // TODO: Searching the owner for every single DoF becomes quite
+        // inefficient. Please fix this, Timo.
+        // The list of neighbors is symmetric (our neighbors have us as a neighbor),
+        // so we can use it to send and to know how many messages we will get.
+        std::set<unsigned int> neighbors = tria->level_ghost_owners();
+        std::map<int, std::vector<DoFPair> > send_data;
+
+        // * find owners of the level dofs and insert into send_data accordingly
+        for (typename std::vector<DoFPair>::iterator dofpair=send_data_temp.begin(); dofpair != send_data_temp.end(); ++dofpair)
+          {
+            std::set<unsigned int>::iterator it;
+            for (it = neighbors.begin(); it != neighbors.end(); ++it)
+              {
+                if (mg_dof.locally_owned_mg_dofs_per_processor(dofpair->level)[*it].is_element(dofpair->level_dof_index))
+                  {
+                    send_data[*it].push_back(*dofpair);
+                    break;
+                  }
+              }
+            // Is this level dof not owned by any of our neighbors? That
+            // would certainly be a bug!
+            Assert(it!=neighbors.end(), ExcMessage("could not find DoF owner."));
+          }
+
+        // * send
+        std::vector<MPI_Request> requests;
+        {
+          for (std::set<unsigned int>::iterator it = neighbors.begin(); it != neighbors.end(); ++it)
+            {
+              requests.push_back(MPI_Request());
+              unsigned int dest = *it;
+              std::vector<DoFPair> &data = send_data[dest];
+              // If there is nothing to send, we still need to send a message, because
+              // the receiving end will be waitng. In that case we just send
+              // an empty message.
+              if (data.size())
+                MPI_Isend(&data[0], data.size()*sizeof(data[0]), MPI_BYTE, dest, 71, tria->get_communicator(), &*requests.rbegin());
+              else
+                MPI_Isend(NULL, 0, MPI_BYTE, dest, 71, tria->get_communicator(), &*requests.rbegin());
+            }
+        }
+
+        // * receive
+        {
+          // We should get one message from each of our neighbors
+          std::vector<DoFPair> receive_buffer;
+          for (unsigned int counter=0; counter<neighbors.size(); ++counter)
+            {
+              MPI_Status status;
+              int len;
+              MPI_Probe(MPI_ANY_SOURCE, 71, tria->get_communicator(), &status);
+              MPI_Get_count(&status, MPI_BYTE, &len);
+
+              if (len==0)
+                {
+                  int err = MPI_Recv(NULL, 0, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG,
+                                     tria->get_communicator(), &status);
+                  AssertThrow(err==MPI_SUCCESS, ExcInternalError());
+                  continue;
+                }
+
+              int count = len / sizeof(DoFPair);
+              Assert(static_cast<int>(count * sizeof(DoFPair)) == len, ExcInternalError());
+              receive_buffer.resize(count);
+
+              void *ptr = &receive_buffer[0];
+              int err = MPI_Recv(ptr, len, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG,
+                                 tria->get_communicator(), &status);
+              AssertThrow(err==MPI_SUCCESS, ExcInternalError());
+
+              for (unsigned int i=0; i<receive_buffer.size(); ++i)
+                {
+                  copy_indices_level_mine[receive_buffer[i].level].push_back(
+                    std::make_pair (receive_buffer[i].global_dof_index, receive_buffer[i].level_dof_index)
+                  );
+                }
+            }
+        }
+
+        // * wait for all MPI_Isend to complete
+        if (requests.size() > 0)
+          {
+            MPI_Waitall(requests.size(), &requests[0], MPI_STATUSES_IGNORE);
+            requests.clear();
+          }
+#ifdef DEBUG
+        // Make sure in debug mode, that everybody sent/received all packages
+        // on this level. If a deadlock occurs here, the list of expected
+        // senders is not computed correctly.
+        MPI_Barrier(tria->get_communicator());
+#endif
+      }
+#endif
+
+    // Sort the indices. This will produce more reliable debug output for regression tests
+    // and likely won't hurt performance even in release mode.
+    std::less<std::pair<types::global_dof_index, types::global_dof_index> > compare;
+    for (unsigned int level=0; level<copy_indices.size(); ++level)
+      std::sort(copy_indices[level].begin(), copy_indices[level].end(), compare);
+    for (unsigned int level=0; level<copy_indices_level_mine.size(); ++level)
+      std::sort(copy_indices_level_mine[level].begin(), copy_indices_level_mine[level].end(), compare);
+    for (unsigned int level=0; level<copy_indices_global_mine.size(); ++level)
+      std::sort(copy_indices_global_mine[level].begin(), copy_indices_global_mine[level].end(), compare);
+  }
+}
+
+
+
+/* ------------------ MGLevelGlobalTransfer<VectorType> ----------------- */
+
+
+template <typename VectorType>
+template <int dim, int spacedim>
+void
+MGLevelGlobalTransfer<VectorType>::fill_and_communicate_copy_indices
+(const DoFHandler<dim,spacedim> &mg_dof)
+{
+  fill_copy_indices(mg_dof, mg_constrained_dofs, copy_indices,
+                    copy_indices_global_mine, copy_indices_level_mine);
+
+  // check if we can run a plain copy operation between the global DoFs and
+  // the finest level.
+  perform_plain_copy =
+    (copy_indices.back().size() == mg_dof.locally_owned_dofs().n_elements())
+    &&
+    (mg_dof.locally_owned_dofs().n_elements() ==
+     mg_dof.locally_owned_mg_dofs(mg_dof.get_triangulation().n_global_levels()-1).n_elements());
+  if (perform_plain_copy)
+    {
+      AssertDimension(copy_indices_global_mine.back().size(), 0);
+      AssertDimension(copy_indices_level_mine.back().size(), 0);
+
+      // check whether there is a renumbering of degrees of freedom on
+      // either the finest level or the global dofs, which means that we
+      // cannot apply a plain copy
+      for (unsigned int i=0; i<copy_indices.back().size(); ++i)
+        if (copy_indices.back()[i].first != copy_indices.back()[i].second)
+          {
+            perform_plain_copy = false;
+            break;
+          }
+    }
+  const parallel::Triangulation<dim, spacedim> *ptria =
+    dynamic_cast<const parallel::Triangulation<dim, spacedim> *>
+    (&mg_dof.get_tria());
+  const MPI_Comm mpi_communicator = ptria != 0 ? ptria->get_communicator() :
+                                    MPI_COMM_SELF;
+  perform_plain_copy =
+    Utilities::MPI::min(static_cast<int>(perform_plain_copy),
+                        mpi_communicator);
+
+}
+
+
+
+template <typename VectorType>
+void
+MGLevelGlobalTransfer<VectorType>::clear()
+{
+  sizes.resize(0);
+  copy_indices.clear();
+  copy_indices_global_mine.clear();
+  copy_indices_level_mine.clear();
+  component_to_block_map.resize(0);
+  mg_constrained_dofs = 0;
+}
+
+
+
+template <typename VectorType>
+void
+MGLevelGlobalTransfer<VectorType>::print_indices (std::ostream &os) const
+{
+  for (unsigned int level = 0; level<copy_indices.size(); ++level)
+    {
+      for (unsigned int i=0; i<copy_indices[level].size(); ++i)
+        os << "copy_indices[" << level
+           << "]\t" << copy_indices[level][i].first << '\t' << copy_indices[level][i].second << std::endl;
+    }
+
+  for (unsigned int level = 0; level<copy_indices_level_mine.size(); ++level)
+    {
+      for (unsigned int i=0; i<copy_indices_level_mine[level].size(); ++i)
+        os << "copy_ifrom  [" << level
+           << "]\t" << copy_indices_level_mine[level][i].first << '\t' << copy_indices_level_mine[level][i].second << std::endl;
+    }
+  for (unsigned int level = 0; level<copy_indices_global_mine.size(); ++level)
+    {
+      for (unsigned int i=0; i<copy_indices_global_mine[level].size(); ++i)
+        os << "copy_ito    [" << level
+           << "]\t" << copy_indices_global_mine[level][i].first << '\t' << copy_indices_global_mine[level][i].second << std::endl;
+    }
+}
+
+
+
+template <typename VectorType>
+std::size_t
+MGLevelGlobalTransfer<VectorType>::memory_consumption () const
+{
+  std::size_t result = sizeof(*this);
+  result += MemoryConsumption::memory_consumption(sizes);
+  result += MemoryConsumption::memory_consumption(copy_indices);
+  result += MemoryConsumption::memory_consumption(copy_indices_global_mine);
+  result += MemoryConsumption::memory_consumption(copy_indices_level_mine);
+
+  return result;
+}
+
+
+
+/* ------------------ MGLevelGlobalTransfer<VectorType> ----------------- */
+
+
+template <typename Number>
+template <int dim, int spacedim>
+void
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::fill_and_communicate_copy_indices
+(const DoFHandler<dim,spacedim> &mg_dof)
+{
+  // first go to the usual routine...
+  std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > >
+  copy_indices;
+  std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > >
+  copy_indices_global_mine;
+  std::vector<std::vector<std::pair<types::global_dof_index, types::global_dof_index> > >
+  copy_indices_level_mine;
+
+  fill_copy_indices(mg_dof, mg_constrained_dofs, copy_indices,
+                    copy_indices_global_mine, copy_indices_level_mine);
+
+  // get all degrees of freedom that we need read access to in copy_to_mg
+  // and copy_from_mg, respectively. We fill an IndexSet once on each level
+  // (for the global_mine indices accessing remote level indices) and once
+  // globally (for the level_mine indices accessing remote global indices).
+
+  // the variables index_set and level_index_set are going to define the
+  // ghost indices of the respective vectors (due to construction, these are
+  // precisely the indices that we need)
+  const parallel::Triangulation<dim, spacedim> *ptria =
+    dynamic_cast<const parallel::Triangulation<dim, spacedim> *>
+    (&mg_dof.get_tria());
+  const MPI_Comm mpi_communicator = ptria != 0 ? ptria->get_communicator() :
+                                    MPI_COMM_SELF;
+
+  IndexSet index_set(mg_dof.locally_owned_dofs().size());
+  std::vector<types::global_dof_index> accessed_indices;
+  ghosted_level_vector.resize(0, mg_dof.get_triangulation().n_global_levels()-1);
+  std::vector<IndexSet> level_index_set(mg_dof.get_triangulation().n_global_levels());
+  for (unsigned int l=0; l<mg_dof.get_triangulation().n_global_levels(); ++l)
+    {
+      for (unsigned int i=0; i<copy_indices_level_mine[l].size(); ++i)
+        accessed_indices.push_back(copy_indices_level_mine[l][i].first);
+      std::vector<types::global_dof_index> accessed_level_indices;
+      for (unsigned int i=0; i<copy_indices_global_mine[l].size(); ++i)
+        accessed_level_indices.push_back(copy_indices_global_mine[l][i].second);
+      std::sort(accessed_level_indices.begin(), accessed_level_indices.end());
+      level_index_set[l].set_size(mg_dof.locally_owned_mg_dofs(l).size());
+      level_index_set[l].add_indices(accessed_level_indices.begin(),
+                                     accessed_level_indices.end());
+      level_index_set[l].compress();
+      ghosted_level_vector[l].reinit(mg_dof.locally_owned_mg_dofs(l),
+                                     level_index_set[l],
+                                     mpi_communicator);
+    }
+  std::sort(accessed_indices.begin(), accessed_indices.end());
+  index_set.add_indices(accessed_indices.begin(), accessed_indices.end());
+  index_set.compress();
+  ghosted_global_vector.reinit(mg_dof.locally_owned_dofs(),
+                               index_set,
+                               mpi_communicator);
+
+  // localize the copy indices for faster access. Since all access will be
+  // through the ghosted vector in 'data', we can use this (much faster)
+  // option
+  this->copy_indices.resize(mg_dof.get_triangulation().n_global_levels());
+  this->copy_indices_level_mine.resize(mg_dof.get_triangulation().n_global_levels());
+  this->copy_indices_global_mine.resize(mg_dof.get_triangulation().n_global_levels());
+  for (unsigned int level=0; level<mg_dof.get_triangulation().n_global_levels(); ++level)
+    {
+      const Utilities::MPI::Partitioner &global_partitioner =
+        *ghosted_global_vector.get_partitioner();
+      const Utilities::MPI::Partitioner &level_partitioner =
+        *ghosted_level_vector[level].get_partitioner();
+      // owned-owned case: the locally owned indices are going to control
+      // the local index
+      this->copy_indices[level].resize(copy_indices[level].size());
+      for (unsigned int i=0; i<copy_indices[level].size(); ++i)
+        this->copy_indices[level][i] =
+          std::pair<unsigned int,unsigned int>
+          (global_partitioner.global_to_local(copy_indices[level][i].first),
+           level_partitioner.global_to_local(copy_indices[level][i].second));
+
+      // remote-owned case: the locally owned indices for the level and the
+      // ghost dofs for the global indices set the local index
+      this->copy_indices_level_mine[level].
+      resize(copy_indices_level_mine[level].size());
+      for (unsigned int i=0; i<copy_indices_level_mine[level].size(); ++i)
+        this->copy_indices_level_mine[level][i] =
+          std::pair<unsigned int,unsigned int>
+          (global_partitioner.global_to_local(copy_indices_level_mine[level][i].first),
+           level_partitioner.global_to_local(copy_indices_level_mine[level][i].second));
+
+      // owned-remote case: the locally owned indices for the global dofs
+      // and the ghost dofs for the level indices set the local index
+      this->copy_indices_global_mine[level].
+      resize(copy_indices_global_mine[level].size());
+      for (unsigned int i=0; i<copy_indices_global_mine[level].size(); ++i)
+        this->copy_indices_global_mine[level][i] =
+          std::pair<unsigned int,unsigned int>
+          (global_partitioner.global_to_local(copy_indices_global_mine[level][i].first),
+           level_partitioner.global_to_local(copy_indices_global_mine[level][i].second));
+    }
+
+  perform_plain_copy = this->copy_indices.back().size()
+                       == mg_dof.locally_owned_dofs().n_elements();
+  if (perform_plain_copy)
+    {
+      AssertDimension(this->copy_indices_global_mine.back().size(), 0);
+      AssertDimension(this->copy_indices_level_mine.back().size(), 0);
+
+      // check whether there is a renumbering of degrees of freedom on
+      // either the finest level or the global dofs, which means that we
+      // cannot apply a plain copy
+      for (unsigned int i=0; i<this->copy_indices.back().size(); ++i)
+        if (this->copy_indices.back()[i].first !=
+            this->copy_indices.back()[i].second)
+          {
+            perform_plain_copy = false;
+            break;
+          }
+    }
+  perform_plain_copy =
+    Utilities::MPI::min(static_cast<int>(perform_plain_copy),
+                        mpi_communicator);
+
+  // if we do a plain copy, no need to hold additional ghosted vectors
+  if (perform_plain_copy)
+    {
+      ghosted_global_vector.reinit(0);
+      ghosted_level_vector.resize(0, 0);
+    }
+}
+
+
+
+template <typename Number>
+void
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::clear()
+{
+  sizes.resize(0);
+  copy_indices.clear();
+  copy_indices_global_mine.clear();
+  copy_indices_level_mine.clear();
+  component_to_block_map.resize(0);
+  mg_constrained_dofs = 0;
+  ghosted_global_vector.reinit(0);
+  ghosted_level_vector.resize(0, 0);
+}
+
+
+
+template <typename Number>
+void
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::print_indices (std::ostream &os) const
+{
+  for (unsigned int level = 0; level<copy_indices.size(); ++level)
+    {
+      for (unsigned int i=0; i<copy_indices[level].size(); ++i)
+        os << "copy_indices[" << level
+           << "]\t" << copy_indices[level][i].first << '\t' << copy_indices[level][i].second << std::endl;
+    }
+
+  for (unsigned int level = 0; level<copy_indices_level_mine.size(); ++level)
+    {
+      for (unsigned int i=0; i<copy_indices_level_mine[level].size(); ++i)
+        os << "copy_ifrom  [" << level
+           << "]\t" << copy_indices_level_mine[level][i].first << '\t' << copy_indices_level_mine[level][i].second << std::endl;
+    }
+  for (unsigned int level = 0; level<copy_indices_global_mine.size(); ++level)
+    {
+      for (unsigned int i=0; i<copy_indices_global_mine[level].size(); ++i)
+        os << "copy_ito    [" << level
+           << "]\t" << copy_indices_global_mine[level][i].first << '\t' << copy_indices_global_mine[level][i].second << std::endl;
+    }
+}
+
+
+
+template <typename Number>
+std::size_t
+MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::memory_consumption () const
+{
+  std::size_t result = sizeof(*this);
+  result += MemoryConsumption::memory_consumption(sizes);
+  result += MemoryConsumption::memory_consumption(copy_indices);
+  result += MemoryConsumption::memory_consumption(copy_indices_global_mine);
+  result += MemoryConsumption::memory_consumption(copy_indices_level_mine);
+  result += ghosted_global_vector.memory_consumption();
+  for (unsigned int i=ghosted_level_vector.min_level();
+       i<=ghosted_level_vector.max_level(); ++i)
+    result += ghosted_level_vector[i].memory_consumption();
+
+  return result;
+}
+
+
+
+// explicit instantiation
+#include "mg_level_global_transfer.inst"
+
+// create two additional instantiations currently not supported by the
+// automatic template instantiation scheme
+template class MGLevelGlobalTransfer<parallel::distributed::Vector<float> >;
+template class MGLevelGlobalTransfer<parallel::distributed::Vector<long double> >;
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/mg_level_global_transfer.inst.in b/source/multigrid/mg_level_global_transfer.inst.in
new file mode 100644
index 0000000..8e7a292
--- /dev/null
+++ b/source/multigrid/mg_level_global_transfer.inst.in
@@ -0,0 +1,80 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (V1 : VECTORS_WITH_MATRIX)
+  {
+    template class MGLevelGlobalTransfer< V1 >;
+  }
+
+for (deal_II_dimension : DIMENSIONS; V1 : VECTORS_WITH_MATRIX)
+  {
+    template
+      void MGLevelGlobalTransfer< V1 >::fill_and_communicate_copy_indices<deal_II_dimension,deal_II_dimension>(
+        const DoFHandler<deal_II_dimension,deal_II_dimension> &mg_dof);
+  }
+
+for (deal_II_dimension : DIMENSIONS; V1,V2 : DEAL_II_VEC_TEMPLATES; S1, S2 : REAL_SCALARS)
+  {
+    template void
+      MGLevelGlobalTransfer<V1<S1> >::copy_to_mg (
+        const DoFHandler<deal_II_dimension>&, MGLevelObject<V1<S1> >&, const V2<S2>&) const;
+    template void
+      MGLevelGlobalTransfer<V1<S1> >::copy_from_mg (const DoFHandler<deal_II_dimension>&, V2<S2>&,
+                                                    const MGLevelObject<V1<S1> >&) const;
+    template void
+      MGLevelGlobalTransfer<V1<S1> >::copy_from_mg_add (const DoFHandler<deal_II_dimension>&, V2<S2>&,
+                                                        const MGLevelObject<V1<S1> >&) const;
+  }
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    template
+      void MGLevelGlobalTransfer< parallel::distributed::Vector<float> >::fill_and_communicate_copy_indices<deal_II_dimension,deal_II_dimension>(
+        const DoFHandler<deal_II_dimension,deal_II_dimension> &mg_dof);
+    template
+      void MGLevelGlobalTransfer< parallel::distributed::Vector<long double> >::fill_and_communicate_copy_indices<deal_II_dimension,deal_II_dimension>(
+        const DoFHandler<deal_II_dimension,deal_II_dimension> &mg_dof);
+  }
+
+for (deal_II_dimension : DIMENSIONS; S1, S2 : REAL_SCALARS)
+  {
+    template void
+      MGLevelGlobalTransfer<parallel::distributed::Vector<S1> >::copy_to_mg (
+        const DoFHandler<deal_II_dimension>&, MGLevelObject<parallel::distributed::Vector<S1> >&, const parallel::distributed::Vector<S2>&) const;
+    template void
+      MGLevelGlobalTransfer<parallel::distributed::Vector<S1> >::copy_from_mg (const DoFHandler<deal_II_dimension>&, parallel::distributed::Vector<S2>&,
+        const MGLevelObject<parallel::distributed::Vector<S1> >&) const;
+    template void
+      MGLevelGlobalTransfer<parallel::distributed::Vector<S1> >::copy_from_mg_add (const DoFHandler<deal_II_dimension>&, parallel::distributed::Vector<S2>&,
+        const MGLevelObject<parallel::distributed::Vector<S1> >&) const;
+  }
+
+for(deal_II_dimension : DIMENSIONS)
+  {
+#ifdef DEAL_II_WITH_TRILINOS
+
+    template void
+      MGLevelGlobalTransfer<TrilinosWrappers::MPI::Vector>::copy_to_mg (
+        const DoFHandler<deal_II_dimension>&, MGLevelObject<TrilinosWrappers::MPI::Vector>&, const TrilinosWrappers::MPI::Vector&) const;
+    template void
+      MGLevelGlobalTransfer<TrilinosWrappers::MPI::Vector>::copy_from_mg (const DoFHandler<deal_II_dimension>&, TrilinosWrappers::MPI::Vector&,
+        const MGLevelObject<TrilinosWrappers::MPI::Vector>&) const;
+    template void
+      MGLevelGlobalTransfer<TrilinosWrappers::MPI::Vector>::copy_from_mg_add (const DoFHandler<deal_II_dimension>&, TrilinosWrappers::MPI::Vector&,
+        const MGLevelObject<TrilinosWrappers::MPI::Vector>&) const;
+#endif
+  }
diff --git a/source/multigrid/mg_tools.cc b/source/multigrid/mg_tools.cc
new file mode 100644
index 0000000..932429d
--- /dev/null
+++ b/source/multigrid/mg_tools.cc
@@ -0,0 +1,1575 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/lac/sparsity_pattern.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_base.h>
+#include <deal.II/base/mg_level_object.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/component_mask.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+#include <vector>
+#include <algorithm>
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace MGTools
+{
+
+
+  // specializations for 1D
+  template <>
+  void
+  compute_row_length_vector(
+    const DoFHandler<1,1> &,
+    const unsigned int,
+    std::vector<unsigned int> &,
+    const DoFTools::Coupling)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void
+  compute_row_length_vector(
+    const DoFHandler<1,1> &,
+    const unsigned int,
+    std::vector<unsigned int> &,
+    const Table<2,DoFTools::Coupling> &,
+    const Table<2,DoFTools::Coupling> &)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void
+  compute_row_length_vector(
+    const DoFHandler<1,2> &,
+    const unsigned int,
+    std::vector<unsigned int> &,
+    const DoFTools::Coupling)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+  template <>
+  void
+  compute_row_length_vector(
+    const DoFHandler<1,2> &,
+    const unsigned int,
+    std::vector<unsigned int> &,
+    const Table<2,DoFTools::Coupling> &,
+    const Table<2,DoFTools::Coupling> &)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+
+// Template for 2D and 3D. For 1D see specialization above
+  template <int dim, int spacedim>
+  void
+  compute_row_length_vector(
+    const DoFHandler<dim,spacedim> &dofs,
+    const unsigned int level,
+    std::vector<unsigned int> &row_lengths,
+    const DoFTools::Coupling             flux_coupling)
+  {
+    Assert (row_lengths.size() == dofs.n_dofs(),
+            ExcDimensionMismatch(row_lengths.size(), dofs.n_dofs()));
+
+    // Function starts here by
+    // resetting the counters.
+    std::fill(row_lengths.begin(), row_lengths.end(), 0);
+    // We need the user flags, so we
+    // save them for later restoration
+    std::vector<bool> old_flags;
+    // We need a non-constant
+    // triangulation for the user
+    // flags. Since we restore them in
+    // the end, this cast is safe.
+    Triangulation<dim,spacedim> &user_flags_triangulation =
+      const_cast<Triangulation<dim,spacedim>&> (dofs.get_triangulation());
+    user_flags_triangulation.save_user_flags(old_flags);
+    user_flags_triangulation.clear_user_flags();
+
+    const typename DoFHandler<dim,spacedim>::cell_iterator end = dofs.end(level);
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell;
+    std::vector<types::global_dof_index> cell_indices;
+    std::vector<types::global_dof_index> neighbor_indices;
+
+    // We loop over cells and go from
+    // cells to lower dimensional
+    // objects. This is the only way to
+    // cope with the fact, that an
+    // unknown number of cells may
+    // share an object of dimension
+    // smaller than dim-1.
+    for (cell = dofs.begin(level); cell != end; ++cell)
+      {
+        const FiniteElement<dim> &fe = cell->get_fe();
+        cell_indices.resize(fe.dofs_per_cell);
+        cell->get_mg_dof_indices(cell_indices);
+        unsigned int i = 0;
+        // First, dofs on
+        // vertices. We assume that
+        // each vertex dof couples
+        // with all dofs on
+        // adjacent grid cells.
+
+        // Adding all dofs of the cells
+        // will add dofs of the faces
+        // of the cell adjacent to the
+        // vertex twice. Therefore, we
+        // subtract these here and add
+        // them in a loop over the
+        // faces below.
+
+        // in 1d, faces and vertices
+        // are identical. Nevertheless,
+        // this will only work if
+        // dofs_per_face is zero and
+        // dofs_per_vertex is
+        // arbitrary, not the other way
+        // round.
+//TODO: This assumes that even in hp context, the dofs per face coincide!
+        unsigned int increment = fe.dofs_per_cell - dim * fe.dofs_per_face;
+        while (i < fe.first_line_index)
+          row_lengths[cell_indices[i++]] += increment;
+        // From now on, if an object is
+        // a cell, its dofs only couple
+        // inside the cell. Since the
+        // faces are handled below, we
+        // have to subtract ALL faces
+        // in this case.
+
+        // In all other cases we
+        // subtract adjacent faces to be
+        // added in the loop below.
+        increment = (dim>1)
+                    ? fe.dofs_per_cell - (dim-1) * fe.dofs_per_face
+                    : fe.dofs_per_cell - GeometryInfo<dim>::faces_per_cell * fe.dofs_per_face;
+        while (i < fe.first_quad_index)
+          row_lengths[cell_indices[i++]] += increment;
+
+        // Now quads in 2D and 3D
+        increment = (dim>2)
+                    ? fe.dofs_per_cell - (dim-2) * fe.dofs_per_face
+                    : fe.dofs_per_cell - GeometryInfo<dim>::faces_per_cell * fe.dofs_per_face;
+        while (i < fe.first_hex_index)
+          row_lengths[cell_indices[i++]] += increment;
+        // Finally, cells in 3D
+        increment = fe.dofs_per_cell - GeometryInfo<dim>::faces_per_cell * fe.dofs_per_face;
+        while (i < fe.dofs_per_cell)
+          row_lengths[cell_indices[i++]] += increment;
+
+        // At this point, we have
+        // counted all dofs
+        // contributiong from cells
+        // coupled topologically to the
+        // adjacent cells, but we
+        // subtracted some faces.
+
+        // Now, let's go by the faces
+        // and add the missing
+        // contribution as well as the
+        // flux contributions.
+        for (unsigned int iface=0; iface<GeometryInfo<dim>::faces_per_cell; ++iface)
+          {
+            bool level_boundary = cell->at_boundary(iface);
+            typename DoFHandler<dim,spacedim>::cell_iterator neighbor;
+            if (!level_boundary)
+              {
+                neighbor = cell->neighbor(iface);
+                if (static_cast<unsigned int>(neighbor->level()) != level)
+                  level_boundary = true;
+              }
+
+            if (level_boundary)
+              {
+                for (unsigned int local_dof=0; local_dof<fe.dofs_per_cell; ++local_dof)
+                  row_lengths[cell_indices[local_dof]] += fe.dofs_per_face;
+                continue;
+              }
+
+            const FiniteElement<dim> &nfe = neighbor->get_fe();
+            typename DoFHandler<dim,spacedim>::face_iterator face = cell->face(iface);
+
+            // Flux couplings are
+            // computed from both sides
+            // for simplicity.
+
+            // The dofs on the common face
+            // will be handled below,
+            // therefore, we subtract them
+            // here.
+            if (flux_coupling != DoFTools::none)
+              {
+                const unsigned int dof_increment = nfe.dofs_per_cell - nfe.dofs_per_face;
+                for (unsigned int local_dof=0; local_dof<fe.dofs_per_cell; ++local_dof)
+                  row_lengths[cell_indices[local_dof]] += dof_increment;
+              }
+
+            // Do this only once per
+            // face.
+            if (face->user_flag_set())
+              continue;
+            face->set_user_flag();
+            // At this point, we assume
+            // that each cell added its
+            // dofs minus the face to
+            // the couplings of the
+            // face dofs. Since we
+            // subtracted two faces, we
+            // have to re-add one.
+
+            // If one side of the face
+            // is refined, all the fine
+            // face dofs couple with
+            // the coarse one.
+            neighbor_indices.resize(nfe.dofs_per_cell);
+            neighbor->get_mg_dof_indices(neighbor_indices);
+            for (unsigned int local_dof=0; local_dof<fe.dofs_per_cell; ++local_dof)
+              row_lengths[cell_indices[local_dof]] += nfe.dofs_per_face;
+            for (unsigned int local_dof=0; local_dof<nfe.dofs_per_cell; ++local_dof)
+              row_lengths[neighbor_indices[local_dof]] += fe.dofs_per_face;
+          }
+      }
+    user_flags_triangulation.load_user_flags(old_flags);
+  }
+
+
+// This is the template for 2D and 3D. See version for 1D above
+  template <int dim, int spacedim>
+  void
+  compute_row_length_vector(
+    const DoFHandler<dim,spacedim> &dofs,
+    const unsigned int level,
+    std::vector<unsigned int> &row_lengths,
+    const Table<2,DoFTools::Coupling> &couplings,
+    const Table<2,DoFTools::Coupling> &flux_couplings)
+  {
+    Assert (row_lengths.size() == dofs.n_dofs(),
+            ExcDimensionMismatch(row_lengths.size(), dofs.n_dofs()));
+
+    // Function starts here by
+    // resetting the counters.
+    std::fill(row_lengths.begin(), row_lengths.end(), 0);
+    // We need the user flags, so we
+    // save them for later restoration
+    std::vector<bool> old_flags;
+    // We need a non-constant
+    // triangulation for the user
+    // flags. Since we restore them in
+    // the end, this cast is safe.
+    Triangulation<dim,spacedim> &user_flags_triangulation =
+      const_cast<Triangulation<dim,spacedim>&> (dofs.get_triangulation());
+    user_flags_triangulation.save_user_flags(old_flags);
+    user_flags_triangulation.clear_user_flags();
+
+    const typename DoFHandler<dim,spacedim>::cell_iterator end = dofs.end(level);
+    typename DoFHandler<dim,spacedim>::active_cell_iterator cell;
+    std::vector<types::global_dof_index> cell_indices;
+    std::vector<types::global_dof_index> neighbor_indices;
+
+    // We have to translate the
+    // couplings from components to
+    // blocks, so this works for
+    // nonprimitive elements as well.
+    std::vector<Table<2, DoFTools::Coupling> > couple_cell;
+    std::vector<Table<2, DoFTools::Coupling> > couple_face;
+    DoFTools::convert_couplings_to_blocks(dofs, couplings, couple_cell);
+    DoFTools::convert_couplings_to_blocks(dofs, flux_couplings, couple_face);
+
+    // We loop over cells and go from
+    // cells to lower dimensional
+    // objects. This is the only way to
+    // cope withthe fact, that an
+    // unknown number of cells may
+    // share an object of dimension
+    // smaller than dim-1.
+    for (cell = dofs.begin_active(); cell != end; ++cell)
+      {
+        const FiniteElement<dim> &fe = cell->get_fe();
+        const unsigned int fe_index = cell->active_fe_index();
+
+        Assert (couplings.n_rows()==fe.n_components(),
+                ExcDimensionMismatch(couplings.n_rows(), fe.n_components()));
+        Assert (couplings.n_cols()==fe.n_components(),
+                ExcDimensionMismatch(couplings.n_cols(), fe.n_components()));
+        Assert (flux_couplings.n_rows()==fe.n_components(),
+                ExcDimensionMismatch(flux_couplings.n_rows(), fe.n_components()));
+        Assert (flux_couplings.n_cols()==fe.n_components(),
+                ExcDimensionMismatch(flux_couplings.n_cols(), fe.n_components()));
+
+        cell_indices.resize(fe.dofs_per_cell);
+        cell->get_mg_dof_indices(cell_indices);
+        unsigned int i = 0;
+        // First, dofs on
+        // vertices. We assume that
+        // each vertex dof couples
+        // with all dofs on
+        // adjacent grid cells.
+
+        // Adding all dofs of the cells
+        // will add dofs of the faces
+        // of the cell adjacent to the
+        // vertex twice. Therefore, we
+        // subtract these here and add
+        // them in a loop over the
+        // faces below.
+
+        // in 1d, faces and vertices
+        // are identical. Nevertheless,
+        // this will only work if
+        // dofs_per_face is zero and
+        // dofs_per_vertex is
+        // arbitrary, not the other way
+        // round.
+        unsigned int increment;
+        while (i < fe.first_line_index)
+          {
+            for (unsigned int base=0; base<fe.n_base_elements(); ++base)
+              for (unsigned int mult=0; mult<fe.element_multiplicity(base); ++mult)
+                if (couple_cell[fe_index](fe.system_to_block_index(i).first,
+                                          fe.first_block_of_base(base) + mult) != DoFTools::none)
+                  {
+                    increment = fe.base_element(base).dofs_per_cell
+                                - dim * fe.base_element(base).dofs_per_face;
+                    row_lengths[cell_indices[i]] += increment;
+                  }
+            ++i;
+          }
+        // From now on, if an object is
+        // a cell, its dofs only couple
+        // inside the cell. Since the
+        // faces are handled below, we
+        // have to subtract ALL faces
+        // in this case.
+
+        // In all other cases we
+        // subtract adjacent faces to be
+        // added in the loop below.
+        while (i < fe.first_quad_index)
+          {
+            for (unsigned int base=0; base<fe.n_base_elements(); ++base)
+              for (unsigned int mult=0; mult<fe.element_multiplicity(base); ++mult)
+                if (couple_cell[fe_index](fe.system_to_block_index(i).first,
+                                          fe.first_block_of_base(base) + mult) != DoFTools::none)
+                  {
+                    increment = fe.base_element(base).dofs_per_cell
+                                - ((dim>1)
+                                   ? (dim-1)
+                                   : GeometryInfo<dim>::faces_per_cell)
+                                * fe.base_element(base).dofs_per_face;
+                    row_lengths[cell_indices[i]] += increment;
+                  }
+            ++i;
+          }
+
+        // Now quads in 2D and 3D
+        while (i < fe.first_hex_index)
+          {
+            for (unsigned int base=0; base<fe.n_base_elements(); ++base)
+              for (unsigned int mult=0; mult<fe.element_multiplicity(base); ++mult)
+                if (couple_cell[fe_index](fe.system_to_block_index(i).first,
+                                          fe.first_block_of_base(base) + mult) != DoFTools::none)
+                  {
+                    increment = fe.base_element(base).dofs_per_cell
+                                - ((dim>2)
+                                   ? (dim-2)
+                                   : GeometryInfo<dim>::faces_per_cell)
+                                * fe.base_element(base).dofs_per_face;
+                    row_lengths[cell_indices[i]] += increment;
+                  }
+            ++i;
+          }
+
+        // Finally, cells in 3D
+        while (i < fe.dofs_per_cell)
+          {
+            for (unsigned int base=0; base<fe.n_base_elements(); ++base)
+              for (unsigned int mult=0; mult<fe.element_multiplicity(base); ++mult)
+                if (couple_cell[fe_index](fe.system_to_block_index(i).first,
+                                          fe.first_block_of_base(base) + mult) != DoFTools::none)
+                  {
+                    increment = fe.base_element(base).dofs_per_cell
+                                - GeometryInfo<dim>::faces_per_cell
+                                * fe.base_element(base).dofs_per_face;
+                    row_lengths[cell_indices[i]] += increment;
+                  }
+            ++i;
+          }
+
+        // At this point, we have
+        // counted all dofs
+        // contributiong from cells
+        // coupled topologically to the
+        // adjacent cells, but we
+        // subtracted some faces.
+
+        // Now, let's go by the faces
+        // and add the missing
+        // contribution as well as the
+        // flux contributions.
+        for (unsigned int iface=0; iface<GeometryInfo<dim>::faces_per_cell; ++iface)
+          {
+            bool level_boundary = cell->at_boundary(iface);
+            typename DoFHandler<dim,spacedim>::cell_iterator neighbor;
+            if (!level_boundary)
+              {
+                neighbor = cell->neighbor(iface);
+                if (static_cast<unsigned int>(neighbor->level()) != level)
+                  level_boundary = true;
+              }
+
+            if (level_boundary)
+              {
+                for (unsigned int local_dof=0; local_dof<fe.dofs_per_cell; ++local_dof)
+                  row_lengths[cell_indices[local_dof]] += fe.dofs_per_face;
+                continue;
+              }
+
+            const FiniteElement<dim> &nfe = neighbor->get_fe();
+            typename DoFHandler<dim,spacedim>::face_iterator face = cell->face(iface);
+
+            // Flux couplings are
+            // computed from both sides
+            // for simplicity.
+
+            // The dofs on the common face
+            // will be handled below,
+            // therefore, we subtract them
+            // here.
+            for (unsigned int base=0; base<nfe.n_base_elements(); ++base)
+              for (unsigned int mult=0; mult<nfe.element_multiplicity(base); ++mult)
+                for (unsigned int local_dof=0; local_dof<fe.dofs_per_cell; ++local_dof)
+                  if (couple_face[fe_index](fe.system_to_block_index(local_dof).first,
+                                            nfe.first_block_of_base(base) + mult) != DoFTools::none)
+                    {
+                      const unsigned int dof_increment = nfe.base_element(base).dofs_per_cell
+                                                         - nfe.base_element(base).dofs_per_face;
+                      row_lengths[cell_indices[local_dof]] += dof_increment;
+                    }
+
+            // Do this only once per
+            // face and not on the
+            // hanging faces.
+            if (face->user_flag_set())
+              continue;
+            face->set_user_flag();
+            // At this point, we assume
+            // that each cell added its
+            // dofs minus the face to
+            // the couplings of the
+            // face dofs. Since we
+            // subtracted two faces, we
+            // have to re-add one.
+
+            // If one side of the face
+            // is refined, all the fine
+            // face dofs couple with
+            // the coarse one.
+
+            // Wolfgang, do they couple
+            // with each other by
+            // constraints?
+
+            // This will not work with
+            // different couplings on
+            // different cells.
+            neighbor_indices.resize(nfe.dofs_per_cell);
+            neighbor->get_mg_dof_indices(neighbor_indices);
+            for (unsigned int base=0; base<nfe.n_base_elements(); ++base)
+              for (unsigned int mult=0; mult<nfe.element_multiplicity(base); ++mult)
+                for (unsigned int local_dof=0; local_dof<fe.dofs_per_cell; ++local_dof)
+                  if (couple_cell[fe_index](fe.system_to_component_index(local_dof).first,
+                                            nfe.first_block_of_base(base) + mult) != DoFTools::none)
+                    row_lengths[cell_indices[local_dof]]
+                    += nfe.base_element(base).dofs_per_face;
+            for (unsigned int base=0; base<fe.n_base_elements(); ++base)
+              for (unsigned int mult=0; mult<fe.element_multiplicity(base); ++mult)
+                for (unsigned int local_dof=0; local_dof<nfe.dofs_per_cell; ++local_dof)
+                  if (couple_cell[fe_index](nfe.system_to_component_index(local_dof).first,
+                                            fe.first_block_of_base(base) + mult) != DoFTools::none)
+                    row_lengths[neighbor_indices[local_dof]]
+                    += fe.base_element(base).dofs_per_face;
+          }
+      }
+    user_flags_triangulation.load_user_flags(old_flags);
+  }
+
+
+
+  template <typename DoFHandlerType, typename SparsityPatternType>
+  void make_sparsity_pattern (const DoFHandlerType &dof,
+                              SparsityPatternType  &sparsity,
+                              const unsigned int    level)
+  {
+    const types::global_dof_index n_dofs = dof.n_dofs(level);
+    (void)n_dofs;
+
+    Assert (sparsity.n_rows() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), n_dofs));
+    Assert (sparsity.n_cols() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), n_dofs));
+
+    const unsigned int dofs_per_cell = dof.get_fe().dofs_per_cell;
+    std::vector<types::global_dof_index> dofs_on_this_cell(dofs_per_cell);
+    typename DoFHandlerType::cell_iterator cell = dof.begin(level),
+                                           endc = dof.end(level);
+    for (; cell!=endc; ++cell)
+      if (dof.get_triangulation().locally_owned_subdomain()==numbers::invalid_subdomain_id
+          || cell->level_subdomain_id()==dof.get_triangulation().locally_owned_subdomain())
+        {
+          cell->get_mg_dof_indices (dofs_on_this_cell);
+          // make sparsity pattern for this cell
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              sparsity.add (dofs_on_this_cell[i],
+                            dofs_on_this_cell[j]);
+        }
+  }
+
+
+
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern (const DoFHandler<dim,spacedim> &dof,
+                              SparsityPatternType            &sparsity,
+                              const unsigned int              level)
+  {
+    const types::global_dof_index n_dofs = dof.n_dofs(level);
+    (void)n_dofs;
+
+    Assert (sparsity.n_rows() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), n_dofs));
+    Assert (sparsity.n_cols() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), n_dofs));
+
+    const unsigned int dofs_per_cell = dof.get_fe().dofs_per_cell;
+    std::vector<types::global_dof_index> dofs_on_this_cell(dofs_per_cell);
+    std::vector<types::global_dof_index> dofs_on_other_cell(dofs_per_cell);
+    typename DoFHandler<dim,spacedim>::cell_iterator cell = dof.begin(level),
+                                                     endc = dof.end(level);
+    for (; cell!=endc; ++cell)
+      {
+        if (!cell->is_locally_owned_on_level()) continue;
+
+        cell->get_mg_dof_indices (dofs_on_this_cell);
+        // make sparsity pattern for this cell
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            sparsity.add (dofs_on_this_cell[i],
+                          dofs_on_this_cell[j]);
+
+        // Loop over all interior neighbors
+        for (unsigned int face = 0;
+             face < GeometryInfo<dim>::faces_per_cell;
+             ++face)
+          {
+            if ( (! cell->at_boundary(face)) &&
+                 (static_cast<unsigned int>(cell->neighbor_level(face)) == level) )
+              {
+                typename DoFHandler<dim,spacedim>::cell_iterator
+                neighbor = cell->neighbor(face);
+                neighbor->get_mg_dof_indices (dofs_on_other_cell);
+                // only add one direction The other is taken care of by
+                // neighbor (except when the neighbor is not owned by the same
+                // processor)
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  {
+                    for (unsigned int j=0; j<dofs_per_cell; ++j)
+                      {
+                        sparsity.add (dofs_on_this_cell[i],
+                                      dofs_on_other_cell[j]);
+                      }
+                  }
+                if (neighbor->is_locally_owned_on_level() == false)
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    for (unsigned int j=0; j<dofs_per_cell; ++j)
+                      {
+                        sparsity.add (dofs_on_other_cell[i],
+                                      dofs_on_other_cell[j]);
+                        sparsity.add (dofs_on_other_cell[i],
+                                      dofs_on_this_cell[j]);
+                      }
+              }
+          }
+      }
+  }
+
+
+
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern_edge (const DoFHandler<dim,spacedim> &dof,
+                                   SparsityPatternType            &sparsity,
+                                   const unsigned int              level)
+  {
+    Assert ((level>=1) && (level<dof.get_triangulation().n_global_levels()),
+            ExcIndexRange(level, 1, dof.get_triangulation().n_global_levels()));
+
+    const types::global_dof_index fine_dofs = dof.n_dofs(level);
+    const types::global_dof_index coarse_dofs = dof.n_dofs(level-1);
+    (void)fine_dofs;
+    (void)coarse_dofs;
+
+    // Matrix maps from fine level to coarse level
+
+    Assert (sparsity.n_rows() == coarse_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), coarse_dofs));
+    Assert (sparsity.n_cols() == fine_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), fine_dofs));
+
+    const unsigned int dofs_per_cell = dof.get_fe().dofs_per_cell;
+    std::vector<types::global_dof_index> dofs_on_this_cell(dofs_per_cell);
+    std::vector<types::global_dof_index> dofs_on_other_cell(dofs_per_cell);
+    typename DoFHandler<dim,spacedim>::cell_iterator cell = dof.begin(level),
+                                                     endc = dof.end(level);
+    for (; cell!=endc; ++cell)
+      {
+        if (!cell->is_locally_owned_on_level()) continue;
+
+        cell->get_mg_dof_indices (dofs_on_this_cell);
+        // Loop over all interior neighbors
+        for (unsigned int face = 0;
+             face < GeometryInfo<dim>::faces_per_cell;
+             ++face)
+          {
+            // Neighbor is coarser
+
+            if ( (! cell->at_boundary(face)) &&
+                 (static_cast<unsigned int>(cell->neighbor_level(face)) != level) )
+              {
+                typename DoFHandler<dim,spacedim>::cell_iterator
+                neighbor = cell->neighbor(face);
+                neighbor->get_mg_dof_indices (dofs_on_other_cell);
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  {
+                    for (unsigned int j=0; j<dofs_per_cell; ++j)
+                      {
+                        sparsity.add (dofs_on_other_cell[i],
+                                      dofs_on_this_cell[j]);
+                        sparsity.add (dofs_on_other_cell[j],
+                                      dofs_on_this_cell[i]);
+                      }
+                  }
+              }
+          }
+      }
+  }
+
+
+
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern (const DoFHandler<dim,spacedim>    &dof,
+                              SparsityPatternType               &sparsity,
+                              const unsigned int                 level,
+                              const Table<2,DoFTools::Coupling> &int_mask,
+                              const Table<2,DoFTools::Coupling> &flux_mask)
+  {
+    const FiniteElement<dim> &fe = dof.get_fe();
+    const types::global_dof_index n_dofs = dof.n_dofs(level);
+    const unsigned int n_comp = fe.n_components();
+    (void)n_dofs;
+    (void)n_comp;
+
+    Assert (sparsity.n_rows() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), n_dofs));
+    Assert (sparsity.n_cols() == n_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), n_dofs));
+    Assert (int_mask.n_rows() == n_comp,
+            ExcDimensionMismatch (int_mask.n_rows(), n_comp));
+    Assert (int_mask.n_cols() == n_comp,
+            ExcDimensionMismatch (int_mask.n_cols(), n_comp));
+    Assert (flux_mask.n_rows() == n_comp,
+            ExcDimensionMismatch (flux_mask.n_rows(), n_comp));
+    Assert (flux_mask.n_cols() == n_comp,
+            ExcDimensionMismatch (flux_mask.n_cols(), n_comp));
+
+    const unsigned int total_dofs = fe.dofs_per_cell;
+    std::vector<types::global_dof_index> dofs_on_this_cell(total_dofs);
+    std::vector<types::global_dof_index> dofs_on_other_cell(total_dofs);
+    Table<2,bool> support_on_face(total_dofs, GeometryInfo<dim>::faces_per_cell);
+
+    typename DoFHandler<dim,spacedim>::cell_iterator cell = dof.begin(level),
+                                                     endc = dof.end(level);
+
+    const Table<2,DoFTools::Coupling>
+    int_dof_mask  = DoFTools::dof_couplings_from_component_couplings(fe, int_mask),
+    flux_dof_mask = DoFTools::dof_couplings_from_component_couplings(fe, flux_mask);
+
+    for (unsigned int i=0; i<total_dofs; ++i)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        support_on_face(i,f) = fe.has_support_on_face(i,f);
+
+    // Clear user flags because we will
+    // need them. But first we save
+    // them and make sure that we
+    // restore them later such that at
+    // the end of this function the
+    // Triangulation will be in the
+    // same state as it was at the
+    // beginning of this function.
+    std::vector<bool> user_flags;
+    dof.get_triangulation().save_user_flags(user_flags);
+    const_cast<Triangulation<dim,spacedim> &>(dof.get_triangulation()).clear_user_flags ();
+
+    for (; cell!=endc; ++cell)
+      {
+        if (!cell->is_locally_owned_on_level()) continue;
+
+        cell->get_mg_dof_indices (dofs_on_this_cell);
+        // make sparsity pattern for this cell
+        for (unsigned int i=0; i<total_dofs; ++i)
+          for (unsigned int j=0; j<total_dofs; ++j)
+            if (int_dof_mask[i][j] != DoFTools::none)
+              sparsity.add (dofs_on_this_cell[i],
+                            dofs_on_this_cell[j]);
+
+        // Loop over all interior neighbors
+        for (unsigned int face = 0;
+             face < GeometryInfo<dim>::faces_per_cell;
+             ++face)
+          {
+            typename DoFHandler<dim,spacedim>::face_iterator cell_face = cell->face(face);
+            if (cell_face->user_flag_set ())
+              continue;
+
+            if (cell->at_boundary (face) )
+              {
+                for (unsigned int i=0; i<total_dofs; ++i)
+                  {
+                    const bool i_non_zero_i = support_on_face (i, face);
+                    for (unsigned int j=0; j<total_dofs; ++j)
+                      {
+                        const bool j_non_zero_i = support_on_face (j, face);
+
+                        if (flux_dof_mask(i,j) == DoFTools::always)
+                          sparsity.add (dofs_on_this_cell[i],
+                                        dofs_on_this_cell[j]);
+                        if (flux_dof_mask(i,j) == DoFTools::nonzero
+                            && i_non_zero_i && j_non_zero_i)
+                          sparsity.add (dofs_on_this_cell[i],
+                                        dofs_on_this_cell[j]);
+                      }
+                  }
+              }
+            else
+              {
+                typename DoFHandler<dim,spacedim>::cell_iterator
+                neighbor = cell->neighbor(face);
+
+                if (neighbor->level() < cell->level())
+                  continue;
+
+                unsigned int neighbor_face = cell->neighbor_of_neighbor(face);
+
+                neighbor->get_mg_dof_indices (dofs_on_other_cell);
+                for (unsigned int i=0; i<total_dofs; ++i)
+                  {
+                    const bool i_non_zero_i = support_on_face (i, face);
+                    const bool i_non_zero_e = support_on_face (i, neighbor_face);
+                    for (unsigned int j=0; j<total_dofs; ++j)
+                      {
+                        const bool j_non_zero_i = support_on_face (j, face);
+                        const bool j_non_zero_e = support_on_face (j, neighbor_face);
+                        if (flux_dof_mask(i,j) == DoFTools::always)
+                          {
+                            sparsity.add (dofs_on_this_cell[i],
+                                          dofs_on_other_cell[j]);
+                            sparsity.add (dofs_on_other_cell[i],
+                                          dofs_on_this_cell[j]);
+                            sparsity.add (dofs_on_this_cell[i],
+                                          dofs_on_this_cell[j]);
+                            sparsity.add (dofs_on_other_cell[i],
+                                          dofs_on_other_cell[j]);
+                          }
+                        if (flux_dof_mask(i,j) == DoFTools::nonzero)
+                          {
+                            if (i_non_zero_i && j_non_zero_e)
+                              sparsity.add (dofs_on_this_cell[i],
+                                            dofs_on_other_cell[j]);
+                            if (i_non_zero_e && j_non_zero_i)
+                              sparsity.add (dofs_on_other_cell[i],
+                                            dofs_on_this_cell[j]);
+                            if (i_non_zero_i && j_non_zero_i)
+                              sparsity.add (dofs_on_this_cell[i],
+                                            dofs_on_this_cell[j]);
+                            if (i_non_zero_e && j_non_zero_e)
+                              sparsity.add (dofs_on_other_cell[i],
+                                            dofs_on_other_cell[j]);
+                          }
+
+                        if (flux_dof_mask(j,i) == DoFTools::always)
+                          {
+                            sparsity.add (dofs_on_this_cell[j],
+                                          dofs_on_other_cell[i]);
+                            sparsity.add (dofs_on_other_cell[j],
+                                          dofs_on_this_cell[i]);
+                            sparsity.add (dofs_on_this_cell[j],
+                                          dofs_on_this_cell[i]);
+                            sparsity.add (dofs_on_other_cell[j],
+                                          dofs_on_other_cell[i]);
+                          }
+                        if (flux_dof_mask(j,i) == DoFTools::nonzero)
+                          {
+                            if (j_non_zero_i && i_non_zero_e)
+                              sparsity.add (dofs_on_this_cell[j],
+                                            dofs_on_other_cell[i]);
+                            if (j_non_zero_e && i_non_zero_i)
+                              sparsity.add (dofs_on_other_cell[j],
+                                            dofs_on_this_cell[i]);
+                            if (j_non_zero_i && i_non_zero_i)
+                              sparsity.add (dofs_on_this_cell[j],
+                                            dofs_on_this_cell[i]);
+                            if (j_non_zero_e && i_non_zero_e)
+                              sparsity.add (dofs_on_other_cell[j],
+                                            dofs_on_other_cell[i]);
+                          }
+                      }
+                  }
+                neighbor->face(neighbor_face)->set_user_flag ();
+              }
+          }
+      }
+
+    // finally restore the user flags
+    const_cast<Triangulation<dim,spacedim> &>(dof.get_triangulation()).load_user_flags(user_flags);
+  }
+
+
+
+  template <int dim, typename SparsityPatternType, int spacedim>
+  void
+  make_flux_sparsity_pattern_edge (const DoFHandler<dim,spacedim>    &dof,
+                                   SparsityPatternType               &sparsity,
+                                   const unsigned int                 level,
+                                   const Table<2,DoFTools::Coupling> &flux_mask)
+  {
+    const FiniteElement<dim> &fe = dof.get_fe();
+    const unsigned int n_comp = fe.n_components();
+    (void)n_comp;
+
+    Assert ((level>=1) && (level<dof.get_triangulation().n_global_levels()),
+            ExcIndexRange(level, 1, dof.get_triangulation().n_global_levels()));
+
+    const types::global_dof_index fine_dofs = dof.n_dofs(level);
+    const types::global_dof_index coarse_dofs = dof.n_dofs(level-1);
+    (void)fine_dofs;
+    (void)coarse_dofs;
+
+    // Matrix maps from fine level to coarse level
+
+    Assert (sparsity.n_rows() == coarse_dofs,
+            ExcDimensionMismatch (sparsity.n_rows(), coarse_dofs));
+    Assert (sparsity.n_cols() == fine_dofs,
+            ExcDimensionMismatch (sparsity.n_cols(), fine_dofs));
+    Assert (flux_mask.n_rows() == n_comp,
+            ExcDimensionMismatch (flux_mask.n_rows(), n_comp));
+    Assert (flux_mask.n_cols() == n_comp,
+            ExcDimensionMismatch (flux_mask.n_cols(), n_comp));
+
+    const unsigned int dofs_per_cell = dof.get_fe().dofs_per_cell;
+    std::vector<types::global_dof_index> dofs_on_this_cell(dofs_per_cell);
+    std::vector<types::global_dof_index> dofs_on_other_cell(dofs_per_cell);
+    Table<2,bool> support_on_face(dofs_per_cell, GeometryInfo<dim>::faces_per_cell);
+
+    typename DoFHandler<dim,spacedim>::cell_iterator cell = dof.begin(level),
+                                                     endc = dof.end(level);
+
+    const Table<2,DoFTools::Coupling> flux_dof_mask
+      = DoFTools::dof_couplings_from_component_couplings(fe, flux_mask);
+
+    for (unsigned int i=0; i<dofs_per_cell; ++i)
+      for (unsigned int f=0; f<GeometryInfo<dim>::faces_per_cell; ++f)
+        support_on_face(i,f) = fe.has_support_on_face(i,f);
+
+    for (; cell!=endc; ++cell)
+      {
+        if (!cell->is_locally_owned_on_level()) continue;
+
+        cell->get_mg_dof_indices (dofs_on_this_cell);
+        // Loop over all interior neighbors
+        for (unsigned int face = 0;
+             face < GeometryInfo<dim>::faces_per_cell;
+             ++face)
+          {
+            // Neighbor is coarser
+
+            if ( (! cell->at_boundary(face)) &&
+                 (static_cast<unsigned int>(cell->neighbor_level(face)) != level) )
+              {
+                typename DoFHandler<dim,spacedim>::cell_iterator
+                neighbor = cell->neighbor(face);
+                neighbor->get_mg_dof_indices (dofs_on_other_cell);
+
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  {
+                    for (unsigned int j=0; j<dofs_per_cell; ++j)
+                      {
+                        if (flux_dof_mask(i,j) != DoFTools::none)
+                          {
+                            sparsity.add (dofs_on_other_cell[i],
+                                          dofs_on_this_cell[j]);
+                            sparsity.add (dofs_on_other_cell[j],
+                                          dofs_on_this_cell[i]);
+                          }
+                      }
+                  }
+              }
+          }
+      }
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  count_dofs_per_component (const DoFHandler<dim,spacedim> &dof_handler,
+                            std::vector<std::vector<types::global_dof_index> > &result,
+                            bool                              only_once,
+                            std::vector<unsigned int>         target_component)
+  {
+    const FiniteElement<dim> &fe = dof_handler.get_fe();
+    const unsigned int n_components = fe.n_components();
+    const unsigned int nlevels = dof_handler.get_triangulation().n_global_levels();
+
+    Assert (result.size() == nlevels,
+            ExcDimensionMismatch(result.size(), nlevels));
+
+    if (target_component.size() == 0)
+      {
+        target_component.resize(n_components);
+        for (unsigned int i=0; i<n_components; ++i)
+          target_component[i] = i;
+      }
+
+    Assert(target_component.size() == n_components,
+           ExcDimensionMismatch(target_component.size(), n_components));
+
+    for (unsigned int l=0; l<nlevels; ++l)
+      {
+        result[l].resize (n_components);
+        std::fill (result[l].begin(),result[l].end(), 0U);
+
+        // special case for only one
+        // component. treat this first
+        // since it does not require any
+        // computations
+        if (n_components == 1)
+          {
+            result[l][0] = dof_handler.n_dofs(l);
+          }
+        else
+          {
+            // otherwise determine the number
+            // of dofs in each component
+            // separately. do so in parallel
+            std::vector<std::vector<bool> >
+            dofs_in_component (n_components,
+                               std::vector<bool>(dof_handler.n_dofs(l),
+                                                 false));
+            std::vector<ComponentMask> component_select (n_components);
+            Threads::TaskGroup<> tasks;
+            for (unsigned int i=0; i<n_components; ++i)
+              {
+                void (*fun_ptr) (const unsigned int       level,
+                                 const DoFHandler<dim,spacedim> &,
+                                 const ComponentMask &,
+                                 std::vector<bool> &)
+                  = &DoFTools::extract_level_dofs<DoFHandler<dim,spacedim> >;
+
+                std::vector<bool> tmp(n_components, false);
+                tmp[i] = true;
+                component_select[i] = ComponentMask(tmp);
+
+                tasks += Threads::new_task (fun_ptr,
+                                            l, dof_handler,
+                                            component_select[i],
+                                            dofs_in_component[i]);
+              }
+            tasks.join_all();
+
+            // next count what we got
+            unsigned int component = 0;
+            for (unsigned int b=0; b<fe.n_base_elements(); ++b)
+              {
+                const FiniteElement<dim> &base = fe.base_element(b);
+                // Dimension of base element
+                unsigned int d = base.n_components();
+
+                for (unsigned int m=0; m<fe.element_multiplicity(b); ++m)
+                  {
+                    for (unsigned int dd=0; dd<d; ++dd)
+                      {
+                        if (base.is_primitive() || (!only_once || dd==0))
+                          result[l][target_component[component]]
+                          += std::count(dofs_in_component[component].begin(),
+                                        dofs_in_component[component].end(),
+                                        true);
+                        ++component;
+                      }
+                  }
+              }
+            // finally sanity check
+            Assert (!dof_handler.get_fe().is_primitive()
+                    ||
+                    std::accumulate (result[l].begin(),
+                                     result[l].end(), 0U)
+                    ==
+                    dof_handler.n_dofs(l),
+                    ExcInternalError());
+          }
+      }
+  }
+
+
+
+  template <typename DoFHandlerType>
+  void
+  count_dofs_per_block
+  (const DoFHandlerType                               &dof_handler,
+   std::vector<std::vector<types::global_dof_index> > &dofs_per_block,
+   std::vector<unsigned int>                           target_block)
+  {
+    const FiniteElement<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe = dof_handler.get_fe();
+    const unsigned int n_blocks = fe.n_blocks();
+    const unsigned int n_levels = dof_handler.get_triangulation().n_global_levels();
+
+    AssertDimension (dofs_per_block.size(), n_levels);
+
+    for (unsigned int l=0; l<n_levels; ++l)
+      std::fill (dofs_per_block[l].begin(), dofs_per_block[l].end(), 0U);
+    // If the empty vector was given as
+    // default argument, set up this
+    // vector as identity.
+    if (target_block.size()==0)
+      {
+        target_block.resize(n_blocks);
+        for (unsigned int i=0; i<n_blocks; ++i)
+          target_block[i] = i;
+      }
+    Assert(target_block.size()==n_blocks,
+           ExcDimensionMismatch(target_block.size(),n_blocks));
+
+    const unsigned int max_block
+      = *std::max_element (target_block.begin(),
+                           target_block.end());
+    const unsigned int n_target_blocks = max_block + 1;
+    (void)n_target_blocks;
+
+    for (unsigned int l=0; l<n_levels; ++l)
+      AssertDimension (dofs_per_block[l].size(), n_target_blocks);
+
+    // special case for only one
+    // block. treat this first
+    // since it does not require any
+    // computations
+    if (n_blocks == 1)
+      {
+        for (unsigned int l=0; l<n_levels; ++l)
+          dofs_per_block[l][0] = dof_handler.n_dofs(l);
+        return;
+      }
+    // otherwise determine the number
+    // of dofs in each block
+    // separately. do so in parallel
+    for (unsigned int l=0; l<n_levels; ++l)
+      {
+        std::vector<std::vector<bool> >
+        dofs_in_block (n_blocks, std::vector<bool>(dof_handler.n_dofs(l), false));
+        std::vector<BlockMask> block_select (n_blocks);
+        Threads::TaskGroup<> tasks;
+        for (unsigned int i=0; i<n_blocks; ++i)
+          {
+            void (*fun_ptr) (const unsigned int level,
+                             const DoFHandlerType &,
+                             const BlockMask &,
+                             std::vector<bool> &)
+              = &DoFTools::extract_level_dofs<DoFHandlerType>;
+
+            std::vector<bool> tmp(n_blocks, false);
+            tmp[i] = true;
+            block_select[i] = tmp;
+
+            tasks += Threads::new_task (fun_ptr,
+                                        l, dof_handler, block_select[i],
+                                        dofs_in_block[i]);
+          }
+        tasks.join_all ();
+
+        // next count what we got
+        for (unsigned int block=0; block<fe.n_blocks(); ++block)
+          dofs_per_block[l][target_block[block]]
+          += std::count(dofs_in_block[block].begin(),
+                        dofs_in_block[block].end(),
+                        true);
+      }
+  }
+
+
+  template <>
+  void
+  make_boundary_list(
+    const DoFHandler<1,1> &,
+    const FunctionMap<1>::type &,
+    std::vector<std::set<types::global_dof_index> > &,
+    const ComponentMask &)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+
+  template <>
+  void
+  make_boundary_list(
+    const DoFHandler<1,2> &,
+    const FunctionMap<1>::type &,
+    std::vector<std::set<types::global_dof_index> > &,
+    const ComponentMask &)
+  {
+    Assert(false, ExcNotImplemented());
+  }
+
+
+
+  template <int dim, int spacedim>
+  void
+  make_boundary_list(
+    const DoFHandler<dim,spacedim> &dof,
+    const typename FunctionMap<dim>::type &function_map,
+    std::vector<std::set<types::global_dof_index> > &boundary_indices,
+    const ComponentMask &component_mask)
+  {
+    // if for whatever reason we were
+    // passed an empty map, return
+    // immediately
+    if (function_map.size() == 0)
+      return;
+
+    const unsigned int n_levels = dof.get_triangulation().n_global_levels();
+
+    (void)n_levels;
+
+
+    const unsigned int n_components = DoFTools::n_components(dof);
+    const bool          fe_is_system = (n_components != 1);
+
+    AssertDimension (boundary_indices.size(), n_levels);
+
+    std::vector<types::global_dof_index> local_dofs;
+    local_dofs.reserve (DoFTools::max_dofs_per_face(dof));
+    std::fill (local_dofs.begin (),
+               local_dofs.end (),
+               DoFHandler<dim,spacedim>::invalid_dof_index);
+
+    // First, deal with the simpler
+    // case when we have to identify
+    // all boundary dofs
+    if (component_mask.n_selected_components(n_components) == n_components)
+      {
+        typename DoFHandler<dim,spacedim>::cell_iterator
+        cell = dof.begin(),
+        endc = dof.end();
+        for (; cell!=endc; ++cell)
+          {
+            if (dof.get_triangulation().locally_owned_subdomain()!=numbers::invalid_subdomain_id
+                && cell->level_subdomain_id()==numbers::artificial_subdomain_id)
+              continue;
+            const FiniteElement<dim> &fe = cell->get_fe();
+            const unsigned int level = cell->level();
+            local_dofs.resize(fe.dofs_per_face);
+
+            for (unsigned int face_no = 0; face_no < GeometryInfo<dim>::faces_per_cell;
+                 ++face_no)
+              if (cell->at_boundary(face_no) == true)
+                {
+                  const typename DoFHandler<dim,spacedim>::face_iterator
+                  face = cell->face(face_no);
+                  const types::boundary_id bi = face->boundary_id();
+                  // Face is listed in
+                  // boundary map
+                  if (function_map.find(bi) != function_map.end())
+                    {
+                      face->get_mg_dof_indices(level, local_dofs);
+                      for (unsigned int i=0; i<fe.dofs_per_face; ++i)
+                        boundary_indices[level].insert(local_dofs[i]);
+                    }
+                }
+          }
+      }
+    else
+      {
+        Assert (component_mask.n_selected_components(n_components) > 0,
+                ExcMessage("It's probably worthwhile to select at least one component."));
+
+        typename DoFHandler<dim,spacedim>::cell_iterator
+        cell = dof.begin(),
+        endc = dof.end();
+        for (; cell!=endc; ++cell)
+          if (dof.get_triangulation().locally_owned_subdomain()==numbers::invalid_subdomain_id
+              || cell->level_subdomain_id()!=numbers::artificial_subdomain_id)
+            for (unsigned int face_no = 0; face_no < GeometryInfo<dim>::faces_per_cell;
+                 ++face_no)
+              {
+                if (cell->at_boundary(face_no) == false)
+                  continue;
+
+                const FiniteElement<dim> &fe = cell->get_fe();
+                const unsigned int level = cell->level();
+
+                // we can presently deal only with
+                // primitive elements for boundary
+                // values. this does not preclude
+                // us using non-primitive elements
+                // in components that we aren't
+                // interested in, however. make
+                // sure that all shape functions
+                // that are non-zero for the
+                // components we are interested in,
+                // are in fact primitive
+                for (unsigned int i=0; i<cell->get_fe().dofs_per_cell; ++i)
+                  {
+                    const ComponentMask &nonzero_component_array
+                      = cell->get_fe().get_nonzero_components (i);
+                    for (unsigned int c=0; c<n_components; ++c)
+                      if ((nonzero_component_array[c] == true)
+                          &&
+                          (component_mask[c] == true))
+                        Assert (cell->get_fe().is_primitive (i),
+                                ExcMessage ("This function can only deal with requested boundary "
+                                            "values that correspond to primitive (scalar) base "
+                                            "elements"));
+                  }
+
+                typename DoFHandler<dim,spacedim>::face_iterator face = cell->face(face_no);
+                const types::boundary_id boundary_component = face->boundary_id();
+                if (function_map.find(boundary_component) != function_map.end())
+                  // face is of the right component
+                  {
+                    // get indices, physical location and
+                    // boundary values of dofs on this
+                    // face
+                    local_dofs.resize (fe.dofs_per_face);
+                    face->get_mg_dof_indices (level, local_dofs);
+                    if (fe_is_system)
+                      {
+                        // enter those dofs
+                        // into the list that
+                        // match the
+                        // component
+                        // signature. avoid
+                        // the usual
+                        // complication that
+                        // we can't just use
+                        // *_system_to_component_index
+                        // for non-primitive
+                        // FEs
+                        for (unsigned int i=0; i<local_dofs.size(); ++i)
+                          {
+                            unsigned int component;
+                            if (fe.is_primitive())
+                              component = fe.face_system_to_component_index(i).first;
+                            else
+                              {
+                                // non-primitive
+                                // case. make
+                                // sure that
+                                // this
+                                // particular
+                                // shape
+                                // function
+                                // _is_
+                                // primitive,
+                                // and get at
+                                // it's
+                                // component. use
+                                // usual
+                                // trick to
+                                // transfer
+                                // face dof
+                                // index to
+                                // cell dof
+                                // index
+                                const unsigned int cell_i
+                                  = (dim == 1 ?
+                                     i
+                                     :
+                                     (dim == 2 ?
+                                      (i<2*fe.dofs_per_vertex ? i : i+2*fe.dofs_per_vertex)
+                                      :
+                                      (dim == 3 ?
+                                       (i<4*fe.dofs_per_vertex ?
+                                        i
+                                        :
+                                        (i<4*fe.dofs_per_vertex+4*fe.dofs_per_line ?
+                                         i+4*fe.dofs_per_vertex
+                                         :
+                                         i+4*fe.dofs_per_vertex+8*fe.dofs_per_line))
+                                       :
+                                       numbers::invalid_unsigned_int)));
+                                Assert (cell_i < fe.dofs_per_cell, ExcInternalError());
+
+                                // make sure
+                                // that if
+                                // this is
+                                // not a
+                                // primitive
+                                // shape function,
+                                // then all
+                                // the
+                                // corresponding
+                                // components
+                                // in the
+                                // mask are
+                                // not set
+//                         if (!fe.is_primitive(cell_i))
+//                           for (unsigned int c=0; c<n_components; ++c)
+//                             if (fe.get_nonzero_components(cell_i)[c])
+//                               Assert (component_mask[c] == false,
+//                                       ExcFENotPrimitive());
+
+// let's pick the first of possibly more than one non-zero
+// components. if shape function is non-primitive, then we will ignore
+// the result in the following anyway, otherwise there's only one
+// non-zero component which we will use
+                                component = fe.get_nonzero_components(cell_i).first_selected_component();
+                              }
+
+                            if (component_mask[component] == true)
+                              boundary_indices[level].insert(local_dofs[i]);
+                          }
+                      }
+                    else
+                      for (unsigned int i=0; i<local_dofs.size(); ++i)
+                        boundary_indices[level].insert(local_dofs[i]);
+                  }
+              }
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  make_boundary_list(const DoFHandler<dim,spacedim> &dof,
+                     const typename FunctionMap<dim>::type &function_map,
+                     std::vector<IndexSet> &boundary_indices,
+                     const ComponentMask &component_mask)
+  {
+    Assert (boundary_indices.size() == dof.get_triangulation().n_global_levels(),
+            ExcDimensionMismatch (boundary_indices.size(),
+                                  dof.get_triangulation().n_global_levels()));
+
+    std::vector<std::set<types::global_dof_index> >
+    my_boundary_indices (dof.get_triangulation().n_global_levels());
+    make_boundary_list (dof, function_map, my_boundary_indices, component_mask);
+    for (unsigned int i=0; i<dof.get_triangulation().n_global_levels(); ++i)
+      {
+        boundary_indices[i] = IndexSet (dof.n_dofs(i));
+        boundary_indices[i].add_indices (my_boundary_indices[i].begin(),
+                                         my_boundary_indices[i].end());
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  extract_non_interface_dofs (const DoFHandler<dim,spacedim> &mg_dof_handler,
+                              std::vector<std::set<types::global_dof_index> >  &non_interface_dofs)
+  {
+    Assert (non_interface_dofs.size() == mg_dof_handler.get_triangulation().n_global_levels(),
+            ExcDimensionMismatch (non_interface_dofs.size(),
+                                  mg_dof_handler.get_triangulation().n_global_levels()));
+
+    const FiniteElement<dim,spacedim> &fe = mg_dof_handler.get_fe();
+
+    const unsigned int   dofs_per_cell   = fe.dofs_per_cell;
+    const unsigned int   dofs_per_face   = fe.dofs_per_face;
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+    std::vector<bool> cell_dofs(dofs_per_cell, false);
+    std::vector<bool> cell_dofs_interface(dofs_per_cell, false);
+
+    typename DoFHandler<dim>::cell_iterator cell = mg_dof_handler.begin(),
+                                            endc = mg_dof_handler.end();
+
+
+    for (; cell!=endc; ++cell)
+      {
+        if (mg_dof_handler.get_triangulation().locally_owned_subdomain()!=numbers::invalid_subdomain_id
+            && cell->level_subdomain_id()!=mg_dof_handler.get_triangulation().locally_owned_subdomain())
+          continue;
+
+        std::fill (cell_dofs.begin(), cell_dofs.end(), false);
+        std::fill (cell_dofs_interface.begin(), cell_dofs_interface.end(), false);
+
+        for (unsigned int face_nr=0; face_nr<GeometryInfo<dim>::faces_per_cell; ++face_nr)
+          {
+            const typename DoFHandler<dim,spacedim>::face_iterator face = cell->face(face_nr);
+            if (!face->at_boundary())
+              {
+                //interior face
+                const typename DoFHandler<dim>::cell_iterator
+                neighbor = cell->neighbor(face_nr);
+
+                if ((neighbor->level() < cell->level()))
+                  {
+                    for (unsigned int j=0; j<dofs_per_face; ++j)
+                      cell_dofs_interface[fe.face_to_cell_index(j,face_nr)] = true;
+                  }
+                else
+                  {
+                    for (unsigned int j=0; j<dofs_per_face; ++j)
+                      cell_dofs[fe.face_to_cell_index(j,face_nr)] = true;
+                  }
+              }
+            else
+              {
+                //boundary face
+                for (unsigned int j=0; j<dofs_per_face; ++j)
+                  cell_dofs[fe.face_to_cell_index(j,face_nr)] = true;
+              }
+          }
+
+        const unsigned int level = cell->level();
+        cell->get_mg_dof_indices (local_dof_indices);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          if (cell_dofs[i] && !cell_dofs_interface[i])
+            non_interface_dofs[level].insert(local_dof_indices[i]);
+      }
+  }
+
+
+  template <int dim, int spacedim>
+  void
+  extract_inner_interface_dofs (const DoFHandler<dim,spacedim> &mg_dof_handler,
+                                std::vector<IndexSet>  &interface_dofs)
+  {
+    Assert (interface_dofs.size() == mg_dof_handler.get_triangulation().n_global_levels(),
+            ExcDimensionMismatch (interface_dofs.size(),
+                                  mg_dof_handler.get_triangulation().n_global_levels()));
+
+    std::vector<std::vector<types::global_dof_index> >
+    tmp_interface_dofs(interface_dofs.size());
+
+    const FiniteElement<dim,spacedim> &fe = mg_dof_handler.get_fe();
+
+    const unsigned int   dofs_per_cell   = fe.dofs_per_cell;
+    const unsigned int   dofs_per_face   = fe.dofs_per_face;
+
+    std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+    std::vector<bool> cell_dofs(dofs_per_cell, false);
+
+    typename DoFHandler<dim>::cell_iterator cell = mg_dof_handler.begin(),
+                                            endc = mg_dof_handler.end();
+
+    for (; cell!=endc; ++cell)
+      {
+        // Do not look at artificial level cells (in a serial computation we
+        // need to ignore the level_subdomain_id() because it is never set).
+        if (mg_dof_handler.get_triangulation().locally_owned_subdomain()!=numbers::invalid_subdomain_id
+            && cell->level_subdomain_id()==numbers::artificial_subdomain_id)
+          continue;
+
+        bool has_coarser_neighbor = false;
+
+        std::fill (cell_dofs.begin(), cell_dofs.end(), false);
+
+        for (unsigned int face_nr=0; face_nr<GeometryInfo<dim>::faces_per_cell; ++face_nr)
+          {
+            const typename DoFHandler<dim,spacedim>::face_iterator face = cell->face(face_nr);
+            if (!face->at_boundary())
+              {
+                //interior face
+                const typename DoFHandler<dim>::cell_iterator
+                neighbor = cell->neighbor(face_nr);
+
+                // only process cell pairs if one or both of them are owned by me (ignore if running in serial)
+                if (mg_dof_handler.get_triangulation().locally_owned_subdomain()!=numbers::invalid_subdomain_id
+                    &&
+                    neighbor->level_subdomain_id()==numbers::artificial_subdomain_id)
+                  continue;
+
+                // Do refinement face from the coarse side
+                if (neighbor->level() < cell->level())
+                  {
+                    for (unsigned int j=0; j<dofs_per_face; ++j)
+                      cell_dofs[fe.face_to_cell_index(j,face_nr)] = true;
+
+                    has_coarser_neighbor = true;
+                  }
+              }
+          }
+
+        if (has_coarser_neighbor == false)
+          continue;
+
+        const unsigned int level = cell->level();
+        cell->get_mg_dof_indices (local_dof_indices);
+
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            if (cell_dofs[i])
+              tmp_interface_dofs[level].push_back(local_dof_indices[i]);
+          }
+      }
+
+    for (unsigned int l=0; l<mg_dof_handler.get_triangulation().n_global_levels(); ++l)
+      {
+        interface_dofs[l].clear();
+        std::sort(tmp_interface_dofs[l].begin(), tmp_interface_dofs[l].end());
+        interface_dofs[l].add_indices(tmp_interface_dofs[l].begin(),
+                                      std::unique(tmp_interface_dofs[l].begin(),
+                                                  tmp_interface_dofs[l].end()));
+        interface_dofs[l].compress();
+      }
+
+  }
+}
+
+
+// explicit instantiations
+#include "mg_tools.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/mg_tools.inst.in b/source/multigrid/mg_tools.inst.in
new file mode 100644
index 0000000..329b7ce
--- /dev/null
+++ b/source/multigrid/mg_tools.inst.in
@@ -0,0 +1,128 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (PATTERN : SPARSITY_PATTERNS; deal_II_dimension : DIMENSIONS;
+     deal_II_space_dimension: SPACE_DIMENSIONS)
+  {
+    namespace MGTools
+  \{
+
+#if deal_II_dimension <= deal_II_space_dimension
+      template void
+	make_sparsity_pattern<DoFHandler<deal_II_dimension, deal_II_space_dimension>, PATTERN> (
+	  const DoFHandler<deal_II_dimension, deal_II_space_dimension> &,
+	  PATTERN &,
+	  const unsigned int);
+#endif
+
+#if deal_II_dimension == deal_II_space_dimension
+    template void
+      make_flux_sparsity_pattern<deal_II_dimension> (
+	const DoFHandler<deal_II_dimension> &,
+	PATTERN &,
+	const unsigned int);
+
+    template void
+      make_flux_sparsity_pattern_edge<deal_II_dimension> (
+	const DoFHandler<deal_II_dimension> &,
+	PATTERN &,
+	const unsigned int);
+
+#if deal_II_dimension > 1
+
+    template void
+      make_flux_sparsity_pattern<deal_II_dimension> (
+	const DoFHandler<deal_II_dimension> &,
+	PATTERN &,
+	const unsigned int,
+	const Table<2,DoFTools::Coupling>&,
+	const Table<2,DoFTools::Coupling>&);
+
+    template void
+      make_flux_sparsity_pattern_edge<deal_II_dimension> (
+	const DoFHandler<deal_II_dimension> &,
+	PATTERN &,
+	const unsigned int,
+	const Table<2,DoFTools::Coupling>&);
+#endif
+#endif
+    \}
+}
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+    namespace MGTools
+    \{
+
+#if deal_II_dimension > 1
+      template void
+	compute_row_length_vector(
+	  const DoFHandler<deal_II_dimension>&, unsigned int,
+	  std::vector<unsigned int>&, const DoFTools::Coupling);
+      template void
+	compute_row_length_vector(
+	  const DoFHandler<deal_II_dimension>&, unsigned int,
+	  std::vector<unsigned int>&,
+	  const Table<2,DoFTools::Coupling>&, const Table<2,DoFTools::Coupling>&);
+#endif
+
+      template void count_dofs_per_component (
+	const DoFHandler<deal_II_dimension>&, std::vector<std::vector<types::global_dof_index> >&,
+	bool, std::vector<unsigned int>);
+      template void count_dofs_per_block (
+	const DoFHandler<deal_II_dimension>&, std::vector<std::vector<types::global_dof_index> >&,
+	std::vector<unsigned int>);
+
+#if deal_II_dimension > 1
+      template void make_boundary_list(
+	const DoFHandler<deal_II_dimension>&,
+	const FunctionMap<deal_II_dimension>::type&,
+	std::vector<std::set<types::global_dof_index> >&,
+	const ComponentMask &);
+#endif
+
+    template void make_boundary_list(
+	const DoFHandler<deal_II_dimension>&,
+	const FunctionMap<deal_II_dimension>::type&,
+	std::vector<IndexSet>&,
+	const ComponentMask &);
+
+      template
+	void
+	extract_inner_interface_dofs (const DoFHandler<deal_II_dimension> &mg_dof_handler,
+				      std::vector<IndexSet>  &interface_dofs);
+
+      template
+        void
+        extract_non_interface_dofs (const DoFHandler<deal_II_dimension> & mg_dof_handler,
+                                    std::vector<std::set<types::global_dof_index> > &non_interface_dofs);
+
+#if deal_II_dimension < 3
+      template void count_dofs_per_block (
+	const DoFHandler<deal_II_dimension,deal_II_dimension+1>&,
+	std::vector<std::vector<types::global_dof_index> >&, std::vector<unsigned int>);
+#endif
+
+#if deal_II_dimension == 3
+      template void count_dofs_per_block (
+	const DoFHandler<1,3>&,
+	std::vector<std::vector<types::global_dof_index> >&, std::vector<unsigned int>);
+#endif
+    \}
+  }
+
diff --git a/source/multigrid/mg_transfer_block.cc b/source/multigrid/mg_transfer_block.cc
new file mode 100644
index 0000000..67a97e2
--- /dev/null
+++ b/source/multigrid/mg_transfer_block.cc
@@ -0,0 +1,630 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/logstream.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_transfer_block.h>
+#include <deal.II/multigrid/mg_transfer_block.templates.h>
+#include <deal.II/multigrid/mg_tools.h>
+
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include <utility>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace
+{
+  /**
+   * Adjust vectors on all levels
+   * to correct size. The degrees
+   * of freedom on each level are
+   * counted by block and only the
+   * block selected is used.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  reinit_vector_by_blocks (
+    const dealii::DoFHandler<dim,spacedim> &mg_dof,
+    MGLevelObject<BlockVector<number> > &v,
+    const std::vector<bool> &sel,
+    std::vector<std::vector<types::global_dof_index> > &ndofs)
+  {
+    std::vector<bool> selected=sel;
+    // Compute the number of blocks needed
+    const unsigned int n_selected
+      = std::accumulate(selected.begin(),
+                        selected.end(),
+                        0U);
+
+    if (ndofs.size() == 0)
+      {
+        std::vector<std::vector<types::global_dof_index> >
+        new_dofs(mg_dof.get_triangulation().n_levels(),
+                 std::vector<types::global_dof_index>(selected.size()));
+        std::swap(ndofs, new_dofs);
+        MGTools::count_dofs_per_block (mg_dof, ndofs);
+      }
+
+    for (unsigned int level=v.min_level();
+         level<=v.max_level(); ++level)
+      {
+        v[level].reinit(n_selected, 0);
+        unsigned int k=0;
+        for (unsigned int i=0; i<selected.size() && (k<v[level].n_blocks()); ++i)
+          {
+            if (selected[i])
+              {
+                v[level].block(k++).reinit(ndofs[level][i]);
+              }
+            v[level].collect_sizes();
+          }
+      }
+  }
+
+
+  /**
+   * Adjust block vectors on all
+   * levels to correct size. The
+   * degrees of freedom on each
+   * level are counted by block.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  reinit_vector_by_blocks (
+    const dealii::DoFHandler<dim,spacedim> &mg_dof,
+    MGLevelObject<dealii::Vector<number> > &v,
+    const unsigned int selected_block,
+    std::vector<std::vector<types::global_dof_index> > &ndofs)
+  {
+    const unsigned int n_blocks = mg_dof.get_fe().n_blocks();
+    Assert(selected_block < n_blocks, ExcIndexRange(selected_block, 0, n_blocks));
+
+    std::vector<bool> selected(n_blocks, false);
+    selected[selected_block] = true;
+
+    if (ndofs.size() == 0)
+      {
+        std::vector<std::vector<types::global_dof_index> >
+        new_dofs(mg_dof.get_triangulation().n_levels(),
+                 std::vector<types::global_dof_index>(selected.size()));
+        std::swap(ndofs, new_dofs);
+        MGTools::count_dofs_per_block (mg_dof, ndofs);
+      }
+
+    for (unsigned int level=v.min_level();
+         level<=v.max_level(); ++level)
+      {
+        v[level].reinit(ndofs[level][selected_block]);
+      }
+  }
+}
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlockSelect<number>::copy_to_mg (
+  const DoFHandler<dim,spacedim>        &mg_dof_handler,
+  MGLevelObject<Vector<number> > &dst,
+  const BlockVector<number2>     &src) const
+{
+  reinit_vector_by_blocks(mg_dof_handler, dst, selected_block, sizes);
+  // For MGTransferBlockSelect, the
+  // multilevel block is always the
+  // first, since only one block is
+  // selected.
+  bool first = true;
+  for (unsigned int level=mg_dof_handler.get_triangulation().n_levels(); level != 0;)
+    {
+      --level;
+      for (IT i= copy_indices[selected_block][level].begin();
+           i != copy_indices[selected_block][level].end(); ++i)
+        dst[level](i->second) = src.block(selected_block)(i->first);
+      if (!first)
+        restrict_and_add (level+1, dst[level], dst[level+1]);
+      first = false;
+    }
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlockSelect<number>::copy_to_mg (
+  const DoFHandler<dim,spacedim>        &mg_dof_handler,
+  MGLevelObject<Vector<number> > &dst,
+  const Vector<number2>          &src) const
+{
+  reinit_vector_by_blocks(mg_dof_handler, dst, selected_block, sizes);
+  // For MGTransferBlockSelect, the
+  // multilevel block is always the
+  // first, since only one block is selected.
+  bool first = true;
+  for (unsigned int level=mg_dof_handler.get_triangulation().n_levels(); level != 0;)
+    {
+      --level;
+      for (IT i= copy_indices[selected_block][level].begin();
+           i != copy_indices[selected_block][level].end(); ++i)
+        dst[level](i->second) = src(i->first);
+      if (!first)
+        restrict_and_add (level+1, dst[level], dst[level+1]);
+      first = false;
+    }
+}
+
+
+
+template <typename number>
+template <int dim, typename number2, int spacedim>
+void
+MGTransferBlock<number>::copy_to_mg (
+  const DoFHandler<dim,spacedim> &mg_dof_handler,
+  MGLevelObject<BlockVector<number> > &dst,
+  const BlockVector<number2> &src) const
+{
+  reinit_vector_by_blocks(mg_dof_handler, dst, selected, sizes);
+  bool first = true;
+  for (unsigned int level=mg_dof_handler.get_triangulation().n_levels(); level != 0;)
+    {
+      --level;
+      for (unsigned int block=0; block<selected.size(); ++block)
+        if (selected[block])
+          for (IT i= copy_indices[block][level].begin();
+               i != copy_indices[block][level].end(); ++i)
+            dst[level].block(mg_block[block])(i->second) = src.block(block)(i->first);
+      if (!first)
+        restrict_and_add (level+1, dst[level], dst[level+1]);
+      first = false;
+    }
+}
+
+
+
+template <int dim, int spacedim>
+void MGTransferBlockBase::build_matrices (
+  const DoFHandler<dim,spacedim> &,
+  const DoFHandler<dim,spacedim> &mg_dof)
+{
+  const FiniteElement<dim> &fe = mg_dof.get_fe();
+  const unsigned int n_blocks  = fe.n_blocks();
+  const unsigned int dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int n_levels      = mg_dof.get_triangulation().n_levels();
+
+  Assert (selected.size() == n_blocks,
+          ExcDimensionMismatch(selected.size(), n_blocks));
+
+  // Compute the mapping between real
+  // blocks and blocks used for
+  // multigrid computations.
+  mg_block.resize(n_blocks);
+  n_mg_blocks = 0;
+  for (unsigned int i=0; i<n_blocks; ++i)
+    if (selected[i])
+      mg_block[i] = n_mg_blocks++;
+    else
+      mg_block[i] = numbers::invalid_unsigned_int;
+
+  // Compute the lengths of all blocks
+  sizes.clear ();
+  sizes.resize(n_levels, std::vector<types::global_dof_index>(fe.n_blocks()));
+  MGTools::count_dofs_per_block(mg_dof, sizes);
+
+  // Fill some index vectors
+  // for later use.
+  mg_block_start = sizes;
+  // Compute start indices from sizes
+  for (unsigned int l=0; l<mg_block_start.size(); ++l)
+    {
+      types::global_dof_index k=0;
+      for (unsigned int i=0; i<mg_block_start[l].size(); ++i)
+        {
+          const types::global_dof_index t=mg_block_start[l][i];
+          mg_block_start[l][i] = k;
+          k += t;
+        }
+    }
+
+  block_start.resize(n_blocks);
+  DoFTools::count_dofs_per_block (static_cast<const DoFHandler<dim,spacedim>&>(mg_dof),
+                                  block_start);
+
+  types::global_dof_index k=0;
+  for (unsigned int i=0; i<block_start.size(); ++i)
+    {
+      const types::global_dof_index t=block_start[i];
+      block_start[i] = k;
+      k += t;
+    }
+  // Build index vectors for
+  // copy_to_mg and
+  // copy_from_mg. These vectors must
+  // be prebuilt, since the
+  // get_dof_indices functions are
+  // too slow
+  copy_indices.resize(n_blocks);
+  for (unsigned int block=0; block<n_blocks; ++block)
+    if (selected[block])
+      copy_indices[block].resize(n_levels);
+
+// Building the prolongation matrices starts here!
+
+  // reset the size of the array of
+  // matrices. call resize(0) first,
+  // in order to delete all elements
+  // and clear their memory. then
+  // repopulate these arrays
+  //
+  // note that on resize(0), the
+  // shared_ptr class takes care of
+  // deleting the object it points to
+  // by itself
+  prolongation_matrices.resize (0);
+  prolongation_sparsities.resize (0);
+
+  for (unsigned int i=0; i<n_levels-1; ++i)
+    {
+      prolongation_sparsities
+      .push_back (std_cxx11::shared_ptr<BlockSparsityPattern> (new BlockSparsityPattern));
+      prolongation_matrices
+      .push_back (std_cxx11::shared_ptr<BlockSparseMatrix<double> > (new BlockSparseMatrix<double>));
+    }
+
+  // two fields which will store the
+  // indices of the multigrid dofs
+  // for a cell and one of its children
+  std::vector<types::global_dof_index> dof_indices_parent (dofs_per_cell);
+  std::vector<types::global_dof_index> dof_indices_child (dofs_per_cell);
+
+  // for each level: first build the
+  // sparsity pattern of the matrices
+  // and then build the matrices
+  // themselves. note that we only
+  // need to take care of cells on
+  // the coarser level which have
+  // children
+
+  for (unsigned int level=0; level<n_levels-1; ++level)
+    {
+      // reset the dimension of the
+      // structure.  note that for
+      // the number of entries per
+      // row, the number of parent
+      // dofs coupling to a child dof
+      // is necessary. this, is the
+      // number of degrees of freedom
+      // per cell
+      prolongation_sparsities[level]->reinit (n_blocks, n_blocks);
+      for (unsigned int i=0; i<n_blocks; ++i)
+        for (unsigned int j=0; j<n_blocks; ++j)
+          if (i==j)
+            prolongation_sparsities[level]->block(i,j)
+            .reinit(sizes[level+1][i],
+                    sizes[level][j],
+                    dofs_per_cell+1);
+          else
+            prolongation_sparsities[level]->block(i,j)
+            .reinit(sizes[level+1][i],
+                    sizes[level][j],
+                    0);
+
+      prolongation_sparsities[level]->collect_sizes();
+
+      for (typename DoFHandler<dim,spacedim>::cell_iterator cell=mg_dof.begin(level);
+           cell != mg_dof.end(level); ++cell)
+        if (cell->has_children())
+          {
+            cell->get_mg_dof_indices (dof_indices_parent);
+
+            Assert(cell->n_children()==GeometryInfo<dim>::max_children_per_cell,
+                   ExcNotImplemented());
+            for (unsigned int child=0; child<cell->n_children(); ++child)
+              {
+                // set an alias to the
+                // prolongation matrix for
+                // this child
+                const FullMatrix<double> &prolongation
+                  = mg_dof.get_fe().get_prolongation_matrix (child, cell->refinement_case());
+
+                cell->child(child)->get_mg_dof_indices (dof_indices_child);
+
+                // now tag the entries in the
+                // matrix which will be used
+                // for this pair of parent/child
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    if (prolongation(i,j) != 0)
+                      {
+                        const unsigned int icomp
+                          = fe.system_to_block_index(i).first;
+                        const unsigned int jcomp
+                          = fe.system_to_block_index(j).first;
+                        if ((icomp==jcomp) && selected[icomp])
+                          prolongation_sparsities[level]->add(dof_indices_child[i],
+                                                              dof_indices_parent[j]);
+                      };
+              };
+          };
+      prolongation_sparsities[level]->compress ();
+
+      prolongation_matrices[level]->reinit (*prolongation_sparsities[level]);
+      // now actually build the matrices
+      for (typename DoFHandler<dim,spacedim>::cell_iterator cell=mg_dof.begin(level);
+           cell != mg_dof.end(level); ++cell)
+        if (cell->has_children())
+          {
+            cell->get_mg_dof_indices (dof_indices_parent);
+
+            Assert(cell->n_children()==GeometryInfo<dim>::max_children_per_cell,
+                   ExcNotImplemented());
+            for (unsigned int child=0; child<cell->n_children(); ++child)
+              {
+                // set an alias to the
+                // prolongation matrix for
+                // this child
+                const FullMatrix<double> &prolongation
+                  = mg_dof.get_fe().get_prolongation_matrix (child, cell->refinement_case());
+
+                cell->child(child)->get_mg_dof_indices (dof_indices_child);
+
+                // now set the entries in the
+                // matrix
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    if (prolongation(i,j) != 0)
+                      {
+                        const unsigned int icomp = fe.system_to_block_index(i).first;
+                        const unsigned int jcomp = fe.system_to_block_index(j).first;
+                        if ((icomp==jcomp) && selected[icomp])
+                          prolongation_matrices[level]->set(dof_indices_child[i],
+                                                            dof_indices_parent[j],
+                                                            prolongation(i,j));
+                      }
+              }
+          }
+    }
+  // impose boundary conditions
+  // but only in the column of
+  // the prolongation matrix
+  if (mg_constrained_dofs != 0 && mg_constrained_dofs->have_boundary_indices())
+    {
+      std::vector<types::global_dof_index> constrain_indices;
+      std::vector<std::vector<bool> > constraints_per_block (n_blocks);
+      for (int level=n_levels-2; level>=0; --level)
+        {
+          if (mg_constrained_dofs->get_boundary_indices(level).n_elements() == 0)
+            continue;
+
+          // need to delete all the columns in the
+          // matrix that are on the boundary. to achieve
+          // this, create an array as long as there are
+          // matrix columns, and find which columns we
+          // need to filter away.
+          constrain_indices.resize (0);
+          constrain_indices.resize (prolongation_matrices[level]->n(), 0);
+          IndexSet::ElementIterator dof
+          = mg_constrained_dofs->get_boundary_indices(level).begin(),
+          endd = mg_constrained_dofs->get_boundary_indices(level).end();
+          for (; dof != endd; ++dof)
+            constrain_indices[*dof] = 1;
+
+          unsigned int index = 0;
+          for (unsigned int block=0; block<n_blocks; ++block)
+            {
+              const types::global_dof_index n_dofs = prolongation_matrices[level]->block(block, block).m();
+              constraints_per_block[block].resize(0);
+              constraints_per_block[block].resize(n_dofs, 0);
+              for (types::global_dof_index i=0; i<n_dofs; ++i, ++index)
+                constraints_per_block[block][i] = (constrain_indices[index] == 1);
+
+              for (types::global_dof_index i=0; i<n_dofs; ++i)
+                {
+                  SparseMatrix<double>::iterator
+                  start_row = prolongation_matrices[level]->block(block, block).begin(i),
+                  end_row   = prolongation_matrices[level]->block(block, block).end(i);
+                  for (; start_row != end_row; ++start_row)
+                    {
+                      if (constraints_per_block[block][start_row->column()])
+                        start_row->value() = 0.;
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+
+template <typename number>
+template <int dim, int spacedim>
+void MGTransferBlockSelect<number>::build_matrices (
+  const DoFHandler<dim,spacedim> &dof,
+  const DoFHandler<dim,spacedim> &mg_dof,
+  unsigned int select)
+{
+  const FiniteElement<dim> &fe = mg_dof.get_fe();
+  unsigned int n_blocks = mg_dof.get_fe().n_blocks();
+
+  selected_block = select;
+  selected.resize(n_blocks, false);
+  selected[select] = true;
+
+  MGTransferBlockBase::build_matrices (dof, mg_dof);
+
+  std::vector<types::global_dof_index> temp_copy_indices;
+  std::vector<types::global_dof_index> global_dof_indices (fe.dofs_per_cell);
+  std::vector<types::global_dof_index> level_dof_indices  (fe.dofs_per_cell);
+
+  for (int level=dof.get_triangulation().n_levels()-1; level>=0; --level)
+    {
+      typename DoFHandler<dim,spacedim>::active_cell_iterator
+      level_cell = mg_dof.begin_active(level);
+      const typename DoFHandler<dim,spacedim>::active_cell_iterator
+      level_end  = mg_dof.end_active(level);
+
+      temp_copy_indices.resize (0);
+      temp_copy_indices.resize (sizes[level][selected_block],
+                                numbers::invalid_dof_index);
+
+      // Compute coarse level right hand side
+      // by restricting from fine level.
+      for (; level_cell!=level_end; ++level_cell)
+        {
+          // get the dof numbers of
+          // this cell for the global
+          // and the level-wise
+          // numbering
+          level_cell->get_dof_indices(global_dof_indices);
+          level_cell->get_mg_dof_indices (level_dof_indices);
+
+          for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+            {
+              const unsigned int block = fe.system_to_block_index(i).first;
+              if (selected[block])
+                {
+                  if (mg_constrained_dofs != 0)
+                    {
+                      if (!mg_constrained_dofs->at_refinement_edge(level,level_dof_indices[i]))
+                        temp_copy_indices[level_dof_indices[i] - mg_block_start[level][block]]
+                          = global_dof_indices[i] - block_start[block];
+                    }
+                  else
+                    temp_copy_indices[level_dof_indices[i] - mg_block_start[level][block]]
+                      = global_dof_indices[i] - block_start[block];
+                }
+            }
+        }
+
+      // now all the active dofs got a valid entry,
+      // the other ones have an invalid entry. Count
+      // the invalid entries and then resize the
+      // copy_indices object. Then, insert the pairs
+      // of global index and level index into
+      // copy_indices.
+      const types::global_dof_index n_active_dofs =
+        std::count_if (temp_copy_indices.begin(), temp_copy_indices.end(),
+                       std::bind2nd(std::not_equal_to<types::global_dof_index>(),
+                                    numbers::invalid_dof_index));
+      copy_indices[selected_block][level].resize (n_active_dofs);
+      types::global_dof_index counter = 0;
+      for (types::global_dof_index i=0; i<temp_copy_indices.size(); ++i)
+        if (temp_copy_indices[i] != numbers::invalid_dof_index)
+          copy_indices[selected_block][level][counter++] =
+            std::pair<types::global_dof_index, unsigned int> (temp_copy_indices[i], i);
+      Assert (counter == n_active_dofs, ExcInternalError());
+    }
+}
+
+
+
+
+template <typename number>
+template <int dim, int spacedim>
+void MGTransferBlock<number>::build_matrices (
+  const DoFHandler<dim,spacedim> &dof,
+  const DoFHandler<dim,spacedim> &mg_dof,
+  const std::vector<bool> &sel)
+{
+  const FiniteElement<dim> &fe = mg_dof.get_fe();
+  unsigned int n_blocks = mg_dof.get_fe().n_blocks();
+
+  if (sel.size() != 0)
+    {
+      Assert(sel.size() == n_blocks,
+             ExcDimensionMismatch(sel.size(), n_blocks));
+      selected = sel;
+    }
+  if (selected.size() == 0)
+    selected = std::vector<bool> (n_blocks, true);
+
+  MGTransferBlockBase::build_matrices (dof, mg_dof);
+
+  std::vector<std::vector<types::global_dof_index> > temp_copy_indices (n_blocks);
+  std::vector<types::global_dof_index> global_dof_indices (fe.dofs_per_cell);
+  std::vector<types::global_dof_index> level_dof_indices  (fe.dofs_per_cell);
+  for (int level=dof.get_triangulation().n_levels()-1; level>=0; --level)
+    {
+      typename DoFHandler<dim,spacedim>::active_cell_iterator
+      level_cell = mg_dof.begin_active(level);
+      const typename DoFHandler<dim,spacedim>::active_cell_iterator
+      level_end  = mg_dof.end_active(level);
+
+      for (unsigned int block=0; block<n_blocks; ++block)
+        if (selected[block])
+          {
+            temp_copy_indices[block].resize (0);
+            temp_copy_indices[block].resize (sizes[level][block],
+                                             numbers::invalid_dof_index);
+          }
+
+      // Compute coarse level right hand side
+      // by restricting from fine level.
+      for (; level_cell!=level_end; ++level_cell)
+        {
+          // get the dof numbers of
+          // this cell for the global
+          // and the level-wise
+          // numbering
+          level_cell->get_dof_indices(global_dof_indices);
+          level_cell->get_mg_dof_indices (level_dof_indices);
+
+          for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+            {
+              const unsigned int block = fe.system_to_block_index(i).first;
+              if (selected[block])
+                temp_copy_indices[block][level_dof_indices[i] - mg_block_start[level][block]]
+                  = global_dof_indices[i] - block_start[block];
+            }
+        }
+
+      for (unsigned int block=0; block<n_blocks; ++block)
+        if (selected[block])
+          {
+            const types::global_dof_index n_active_dofs =
+              std::count_if (temp_copy_indices[block].begin(),
+                             temp_copy_indices[block].end(),
+                             std::bind2nd(std::not_equal_to<types::global_dof_index>(),
+                                          numbers::invalid_dof_index));
+            copy_indices[block][level].resize (n_active_dofs);
+            types::global_dof_index counter = 0;
+            for (types::global_dof_index i=0; i<temp_copy_indices[block].size(); ++i)
+              if (temp_copy_indices[block][i] != numbers::invalid_dof_index)
+                copy_indices[block][level][counter++] =
+                  std::pair<types::global_dof_index, unsigned int>
+                  (temp_copy_indices[block][i], i);
+            Assert (counter == n_active_dofs, ExcInternalError());
+          }
+    }
+}
+
+
+
+// explicit instantiations
+#include "mg_transfer_block.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/mg_transfer_block.inst.in b/source/multigrid/mg_transfer_block.inst.in
new file mode 100644
index 0000000..d44ef27
--- /dev/null
+++ b/source/multigrid/mg_transfer_block.inst.in
@@ -0,0 +1,134 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+template
+void MGTransferBlock<float>::build_matrices<deal_II_dimension>
+(const DoFHandler<deal_II_dimension>&, const DoFHandler<deal_II_dimension>&,
+ const std::vector<bool>&);
+
+template
+void MGTransferBlock<double>::build_matrices<deal_II_dimension>
+(const DoFHandler<deal_II_dimension>&, const DoFHandler<deal_II_dimension>&,
+ const std::vector<bool>&);
+
+template
+void MGTransferBlockSelect<float>::build_matrices<deal_II_dimension>
+(const DoFHandler<deal_II_dimension>&, const DoFHandler<deal_II_dimension>&,
+ const unsigned int);
+
+template
+void MGTransferBlockSelect<double>::build_matrices<deal_II_dimension>
+(const DoFHandler<deal_II_dimension>&, const DoFHandler<deal_II_dimension>&,
+ const unsigned int);
+
+template void
+MGTransferBlock<float>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<BlockVector<float> >&,
+  const BlockVector<double>&) const;
+template void
+MGTransferBlock<float>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<BlockVector<float> >&) const;
+template void
+MGTransferBlock<float>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<BlockVector<float> >&) const;
+
+template void
+MGTransferBlock<double>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<BlockVector<double> >&,
+  const BlockVector<double>&) const;
+template void
+MGTransferBlock<double>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<BlockVector<double> >&) const;
+template void
+MGTransferBlock<double>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<BlockVector<double> >&) const;
+
+template void
+MGTransferBlockSelect<float>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<float> >&,
+  const Vector<double>&) const;
+template void
+MGTransferBlockSelect<float>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<float> >&,
+  const BlockVector<double>&) const;
+template void
+MGTransferBlockSelect<float>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+template void
+MGTransferBlockSelect<float>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+template void
+MGTransferBlockSelect<float>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+template void
+MGTransferBlockSelect<float>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+
+template void
+MGTransferBlockSelect<double>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<double> >&,
+  const Vector<double>&) const;
+template void
+MGTransferBlockSelect<double>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<double> >&,
+  const BlockVector<double>&) const;
+template void
+MGTransferBlockSelect<double>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+template void
+MGTransferBlockSelect<double>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+template void
+MGTransferBlockSelect<double>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+template void
+MGTransferBlockSelect<double>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+  }
+
diff --git a/source/multigrid/mg_transfer_component.cc b/source/multigrid/mg_transfer_component.cc
new file mode 100644
index 0000000..8f61f8f
--- /dev/null
+++ b/source/multigrid/mg_transfer_component.cc
@@ -0,0 +1,689 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/block_indices.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_transfer_component.h>
+#include <deal.II/multigrid/mg_transfer_component.templates.h>
+#include <deal.II/multigrid/mg_tools.h>
+
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace
+{
+  /**
+   * Adjust block-vectors on all
+   * levels to correct size.  Count
+   * the numbers of degrees of
+   * freedom on each level
+   * component-wise. Then, assign
+   * each block of @p vector the
+   * corresponding size.
+   *
+   * The boolean field @p selected
+   * allows restricting this
+   * operation to certain
+   * components. In this case, @p
+   * vector will only have as many
+   * blocks as there are true
+   * values in @p selected (no
+   * blocks of length zero are
+   * padded in). If this argument
+   * is omitted, all blocks will be
+   * considered.
+   *
+   * Degrees of freedom must be
+   * sorted by component in order
+   * to obtain reasonable results
+   * from this function.
+   *
+   * The argument
+   * @p target_component allows to
+   * re-sort and group components
+   * as in
+   * DoFRenumbering::component_wise.
+   *
+   *
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  reinit_vector_by_components (
+    const dealii::DoFHandler<dim,spacedim> &mg_dof,
+    MGLevelObject<BlockVector<number> > &v,
+    const std::vector<bool> &sel,
+    const std::vector<unsigned int> &target_comp,
+    std::vector<std::vector<types::global_dof_index> > &ndofs)
+  {
+    std::vector<bool> selected=sel;
+    std::vector<unsigned int> target_component=target_comp;
+    const unsigned int ncomp = mg_dof.get_fe().n_components();
+
+    // If the selected and
+    // target_component have size 0,
+    // they must be replaced by default
+    // values.
+    //
+    // Since we already made copies
+    // directly after this function was
+    // called, we use the arguments
+    // directly.
+    if (target_component.size() == 0)
+      {
+        target_component.resize(ncomp);
+        for (unsigned int i=0; i<ncomp; ++i)
+          target_component[i] = i;
+      }
+
+    // If selected is an empty vector,
+    // all components are selected.
+    if (selected.size() == 0)
+      {
+        selected.resize(target_component.size());
+        std::fill_n (selected.begin(), ncomp, false);
+        for (unsigned int i=0; i<target_component.size(); ++i)
+          selected[target_component[i]] = true;
+      }
+
+    Assert (selected.size() == target_component.size(),
+            ExcDimensionMismatch(selected.size(), target_component.size()));
+
+    // Compute the number of blocks needed
+    const unsigned int n_selected
+      = std::accumulate(selected.begin(),
+                        selected.end(),
+                        0U);
+
+    if (ndofs.size() == 0)
+      {
+        std::vector<std::vector<types::global_dof_index> >
+        new_dofs(mg_dof.get_triangulation().n_levels(),
+                 std::vector<types::global_dof_index>(target_component.size()));
+        std::swap(ndofs, new_dofs);
+        MGTools::count_dofs_per_block (mg_dof, ndofs, target_component);
+      }
+
+    for (unsigned int level=v.min_level();
+         level<=v.max_level(); ++level)
+      {
+        v[level].reinit(n_selected, 0);
+        unsigned int k=0;
+        for (unsigned int i=0; i<selected.size() && (k<v[level].n_blocks()); ++i)
+          {
+            if (selected[i])
+              {
+                v[level].block(k++).reinit(ndofs[level][i]);
+              }
+            v[level].collect_sizes();
+          }
+      }
+  }
+
+
+  /**
+   * Adjust vectors on all levels
+   * to correct size.  Count the
+   * numbers of degrees of freedom
+   * on each level component-wise
+   * in a single component. Then,
+   * assign @p vector the
+   * corresponding size.
+   *
+   * The boolean field @p selected
+   * may be nonzero in a single
+   * component, indicating the
+   * block of a block vector the
+   * argument @p v corresponds to.
+   *
+   * Degrees of freedom must be
+   * sorted by component in order
+   * to obtain reasonable results
+   * from this function.
+   *
+   * The argument
+   * @p target_component allows to
+   * re-sort and group components
+   * as in
+   * DoFRenumbering::component_wise.
+   */
+  template <int dim, typename number, int spacedim>
+  void
+  reinit_vector_by_components (
+    const dealii::DoFHandler<dim,spacedim> &mg_dof,
+    MGLevelObject<dealii::Vector<number> > &v,
+    const ComponentMask &component_mask,
+    const std::vector<unsigned int> &target_component,
+    std::vector<std::vector<types::global_dof_index> > &ndofs)
+  {
+    Assert (component_mask.represents_n_components(target_component.size()),
+            ExcMessage ("The component mask does not have the correct size."));
+
+    unsigned int selected_block = 0;
+    for (unsigned int i=0; i<target_component.size(); ++i)
+      if (component_mask[i])
+        selected_block = target_component[i];
+
+    if (ndofs.size() == 0)
+      {
+        std::vector<std::vector<types::global_dof_index> >
+        new_dofs(mg_dof.get_triangulation().n_levels(),
+                 std::vector<types::global_dof_index>(target_component.size()));
+        std::swap(ndofs, new_dofs);
+        MGTools::count_dofs_per_block (mg_dof, ndofs,
+                                       target_component);
+      }
+
+    for (unsigned int level=v.min_level();
+         level<=v.max_level(); ++level)
+      {
+        v[level].reinit(ndofs[level][selected_block]);
+      }
+  }
+}
+
+
+template <typename number>
+template <int dim, class InVector, int spacedim>
+void
+MGTransferSelect<number>::do_copy_to_mg (
+  const DoFHandler<dim,spacedim>        &mg_dof_handler,
+  MGLevelObject<Vector<number> > &dst,
+  const InVector                 &src) const
+{
+  dst=0;
+
+  Assert(sizes.size()==mg_dof_handler.get_triangulation().n_levels(),
+         ExcMatricesNotBuilt());
+
+  reinit_vector_by_components(mg_dof_handler, dst,
+                              mg_component_mask,
+                              mg_target_component, sizes);
+
+  // traverse the grid top-down
+  // (i.e. starting with the most
+  // refined grid). this way, we can
+  // always get that part of one
+  // level of the output vector which
+  // corresponds to a region which is
+  // more refined, by restriction of
+  // the respective vector on the
+  // next finer level, which we then
+  // already have built.
+
+  bool first = true;
+  for (unsigned int level=mg_dof_handler.get_triangulation().n_levels(); level!=0;)
+    {
+      --level;
+
+      typedef std::vector<std::pair<types::global_dof_index, unsigned int> >::const_iterator IT;
+      for (IT i=copy_to_and_from_indices[level].begin();
+           i != copy_to_and_from_indices[level].end(); ++i)
+        dst[level](i->second) = src(i->first);
+      // for that part of the level
+      // which is further refined:
+      // get the defect by
+      // restriction of the defect on
+      // one level higher
+      if (!first)
+        restrict_and_add (level+1, dst[level], dst[level+1]);
+      first = false;
+    }
+}
+
+
+template <int dim, int spacedim>
+void MGTransferComponentBase::build_matrices (
+  const DoFHandler<dim,spacedim> &,
+  const DoFHandler<dim,spacedim> &mg_dof)
+{
+  // Fill target component with
+  // standard values (identity) if it
+  // is empty
+  if (target_component.size() == 0)
+    {
+      target_component.resize(mg_dof.get_fe().n_components());
+      for (unsigned int i=0; i<target_component.size(); ++i)
+        target_component[i] = i;
+    }
+  else
+    {
+      // otherwise, check it for consistency
+      Assert (target_component.size() == mg_dof.get_fe().n_components(),
+              ExcDimensionMismatch(target_component.size(),
+                                   mg_dof.get_fe().n_components()));
+
+      for (unsigned int i=0; i<target_component.size(); ++i)
+        {
+          Assert(i<target_component.size(),
+                 ExcIndexRange(i,0,target_component.size()));
+        }
+    }
+  // Do the same for the multilevel
+  // components. These may be
+  // different.
+  if (mg_target_component.size() == 0)
+    {
+      mg_target_component.resize(mg_dof.get_fe().n_components());
+      for (unsigned int i=0; i<mg_target_component.size(); ++i)
+        mg_target_component[i] = target_component[i];
+    }
+  else
+    {
+      Assert (mg_target_component.size() == mg_dof.get_fe().n_components(),
+              ExcDimensionMismatch(mg_target_component.size(),
+                                   mg_dof.get_fe().n_components()));
+
+      for (unsigned int i=0; i<mg_target_component.size(); ++i)
+        {
+          Assert(i<mg_target_component.size(),
+                 ExcIndexRange(i,0,mg_target_component.size()));
+        }
+    }
+
+  const FiniteElement<dim> &fe = mg_dof.get_fe();
+
+  // Effective number of components
+  // is the maximum entry in
+  // mg_target_component. This
+  // assumes that the values in that
+  // vector don't have holes.
+  const unsigned int n_components  =
+    *std::max_element(mg_target_component.begin(), mg_target_component.end()) + 1;
+  const unsigned int dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int n_levels      = mg_dof.get_triangulation().n_levels();
+
+  Assert (mg_component_mask.represents_n_components(fe.n_components()),
+          ExcMessage ("Component mask has wrong size."));
+
+  // Compute the lengths of all blocks
+  sizes.resize(n_levels);
+  for (unsigned int l=0; l<n_levels; ++l)
+    sizes[l].resize(n_components);
+
+  MGTools::count_dofs_per_block(mg_dof, sizes, mg_target_component);
+
+  // Fill some index vectors
+  // for later use.
+  mg_component_start = sizes;
+  // Compute start indices from sizes
+  for (unsigned int l=0; l<mg_component_start.size(); ++l)
+    {
+      types::global_dof_index k=0;
+      for (unsigned int i=0; i<mg_component_start[l].size(); ++i)
+        {
+          const types::global_dof_index t=mg_component_start[l][i];
+          mg_component_start[l][i] = k;
+          k += t;
+        }
+    }
+
+  component_start.resize(*std::max_element (target_component.begin(),
+                                            target_component.end()) + 1);
+  DoFTools::
+  count_dofs_per_block (mg_dof, component_start, target_component);
+
+  types::global_dof_index k=0;
+  for (unsigned int i=0; i<component_start.size(); ++i)
+    {
+      const types::global_dof_index t=component_start[i];
+      component_start[i] = k;
+      k += t;
+    }
+
+  // Build index vectors for
+  // copy_to_mg and
+  // copy_from_mg. These vectors must
+  // be prebuilt, since the
+  // get_dof_indices functions are
+  // too slow
+
+  copy_to_and_from_indices.resize(n_levels);
+
+// Building the prolongation matrices starts here!
+
+  // reset the size of the array of
+  // matrices. call resize(0) first,
+  // in order to delete all elements
+  // and clear their memory. then
+  // repopulate these arrays
+  //
+  // note that on resize(0), the
+  // shared_ptr class takes care of
+  // deleting the object it points to
+  // by itself
+  prolongation_matrices.resize (0);
+  prolongation_sparsities.resize (0);
+
+  for (unsigned int i=0; i<n_levels-1; ++i)
+    {
+      prolongation_sparsities
+      .push_back (std_cxx11::shared_ptr<BlockSparsityPattern> (new BlockSparsityPattern));
+      prolongation_matrices
+      .push_back (std_cxx11::shared_ptr<BlockSparseMatrix<double> > (new BlockSparseMatrix<double>));
+    }
+
+  // two fields which will store the
+  // indices of the multigrid dofs
+  // for a cell and one of its children
+  std::vector<types::global_dof_index> dof_indices_parent (dofs_per_cell);
+  std::vector<types::global_dof_index> dof_indices_child (dofs_per_cell);
+
+  // for each level: first build the
+  // sparsity pattern of the matrices
+  // and then build the matrices
+  // themselves. note that we only
+  // need to take care of cells on
+  // the coarser level which have
+  // children
+  for (unsigned int level=0; level<n_levels-1; ++level)
+    {
+      // reset the dimension of the
+      // structure.  note that for
+      // the number of entries per
+      // row, the number of parent
+      // dofs coupling to a child dof
+      // is necessary. this, is the
+      // number of degrees of freedom
+      // per cell
+      prolongation_sparsities[level]->reinit (n_components, n_components);
+      for (unsigned int i=0; i<n_components; ++i)
+        for (unsigned int j=0; j<n_components; ++j)
+          if (i==j)
+            prolongation_sparsities[level]->block(i,j)
+            .reinit(sizes[level+1][i],
+                    sizes[level][j],
+                    dofs_per_cell+1);
+          else
+            prolongation_sparsities[level]->block(i,j)
+            .reinit(sizes[level+1][i],
+                    sizes[level][j],
+                    0);
+
+      prolongation_sparsities[level]->collect_sizes();
+
+      for (typename DoFHandler<dim,spacedim>::cell_iterator cell=mg_dof.begin(level);
+           cell != mg_dof.end(level); ++cell)
+        if (cell->has_children())
+          {
+            cell->get_mg_dof_indices (dof_indices_parent);
+
+            for (unsigned int child=0; child<cell->n_children(); ++child)
+              {
+                // set an alias to the
+                // prolongation matrix for
+                // this child
+                const FullMatrix<double> &prolongation
+                  = mg_dof.get_fe().get_prolongation_matrix (child, cell->refinement_case());
+
+                cell->child(child)->get_mg_dof_indices (dof_indices_child);
+
+                // now tag the entries in the
+                // matrix which will be used
+                // for this pair of parent/child
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    if (prolongation(i,j) != 0)
+                      {
+                        const unsigned int icomp
+                          = fe.system_to_component_index(i).first;
+                        const unsigned int jcomp
+                          = fe.system_to_component_index(j).first;
+                        if ((icomp==jcomp) && mg_component_mask[icomp])
+                          prolongation_sparsities[level]->add(dof_indices_child[i],
+                                                              dof_indices_parent[j]);
+                      };
+              };
+          };
+      prolongation_sparsities[level]->compress ();
+
+      prolongation_matrices[level]->reinit (*prolongation_sparsities[level]);
+      // now actually build the matrices
+      for (typename DoFHandler<dim,spacedim>::cell_iterator cell=mg_dof.begin(level);
+           cell != mg_dof.end(level); ++cell)
+        if (cell->has_children())
+          {
+            cell->get_mg_dof_indices (dof_indices_parent);
+
+            for (unsigned int child=0; child<cell->n_children(); ++child)
+              {
+                // set an alias to the
+                // prolongation matrix for
+                // this child
+                const FullMatrix<double> &prolongation
+                  = mg_dof.get_fe().get_prolongation_matrix (child, cell->refinement_case());
+
+                cell->child(child)->get_mg_dof_indices (dof_indices_child);
+
+                // now set the entries in the
+                // matrix
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    if (prolongation(i,j) != 0)
+                      {
+                        const unsigned int icomp = fe.system_to_component_index(i).first;
+                        const unsigned int jcomp = fe.system_to_component_index(j).first;
+                        if ((icomp==jcomp) && mg_component_mask[icomp])
+                          prolongation_matrices[level]->set(dof_indices_child[i],
+                                                            dof_indices_parent[j],
+                                                            prolongation(i,j));
+                      }
+              }
+          }
+    }
+  // impose boundary conditions
+  // but only in the column of
+  // the prolongation matrix
+  //TODO: this way is not very efficient
+
+  if (boundary_indices.size() != 0)
+    {
+      std::vector<std::vector<types::global_dof_index> >
+      dofs_per_component(mg_dof.get_triangulation().n_levels(),
+                         std::vector<types::global_dof_index>(n_components));
+
+      MGTools::count_dofs_per_block (mg_dof, dofs_per_component, mg_target_component);
+      for (unsigned int level=0; level<n_levels-1; ++level)
+        {
+          if (boundary_indices[level].size() == 0)
+            continue;
+
+          for (unsigned int iblock=0; iblock<n_components; ++iblock)
+            for (unsigned int jblock=0; jblock<n_components; ++jblock)
+              if (iblock==jblock)
+                {
+                  const types::global_dof_index n_dofs = prolongation_matrices[level]->block(iblock,jblock).m();
+                  for (types::global_dof_index i=0; i<n_dofs; ++i)
+                    {
+                      SparseMatrix<double>::iterator anfang = prolongation_matrices[level]->block(iblock,jblock).begin(i),
+                                                     ende = prolongation_matrices[level]->block(iblock,jblock).end(i);
+                      for (; anfang != ende; ++anfang)
+                        {
+                          const types::global_dof_index column_number = anfang->column();
+
+                          //convert global indices into local ones
+                          const BlockIndices block_indices_coarse (dofs_per_component[level]);
+                          const types::global_dof_index global_j = block_indices_coarse.local_to_global(iblock, column_number);
+
+                          std::set<types::global_dof_index>::const_iterator found_dof =
+                            boundary_indices[level].find(global_j);
+
+                          const bool is_boundary_index =
+                            (found_dof != boundary_indices[level].end());
+
+                          if (is_boundary_index)
+                            {
+                              prolongation_matrices[level]->block(iblock,jblock)
+                              .set(i,column_number,0);
+                            }
+                        }
+                    }
+                }
+        }
+    }
+}
+
+
+template <typename number>
+template <int dim, int spacedim>
+void MGTransferSelect<number>::build_matrices (
+  const DoFHandler<dim,spacedim> &dof,
+  const DoFHandler<dim,spacedim> &mg_dof,
+  unsigned int select,
+  unsigned int mg_select,
+  const std::vector<unsigned int> &t_component,
+  const std::vector<unsigned int> &mg_t_component,
+  const std::vector<std::set<types::global_dof_index> > &bdry_indices)
+{
+  const FiniteElement<dim> &fe = mg_dof.get_fe();
+  unsigned int ncomp = mg_dof.get_fe().n_components();
+
+  target_component = t_component;
+  mg_target_component = mg_t_component;
+  boundary_indices = bdry_indices;
+
+  selected_component = select;
+  mg_selected_component = mg_select;
+
+  {
+    std::vector<bool> tmp(ncomp, false);
+    for (unsigned int c=0; c<ncomp; ++c)
+      if (t_component[c] == selected_component)
+        tmp[c] = true;
+    component_mask = ComponentMask(tmp);
+  }
+
+  {
+    std::vector<bool> tmp(ncomp, false);
+    for (unsigned int c=0; c<ncomp; ++c)
+      if (mg_t_component[c] == mg_selected_component)
+        tmp[c] = true;
+    mg_component_mask = ComponentMask(tmp);
+  }
+
+  // If components are renumbered,
+  // find the first original
+  // component corresponding to the
+  // target component.
+  for (unsigned int i=0; i<target_component.size(); ++i)
+    {
+      if (target_component[i] == select)
+        {
+          selected_component = i;
+          break;
+        }
+    }
+
+  for (unsigned int i=0; i<mg_target_component.size(); ++i)
+    {
+      if (mg_target_component[i] == mg_select)
+        {
+          mg_selected_component = i;
+          break;
+        }
+    }
+
+  MGTransferComponentBase::build_matrices (dof, mg_dof);
+
+  interface_dofs.resize(mg_dof.get_triangulation().n_levels());
+  for (unsigned int l=0; l<mg_dof.get_triangulation().n_levels(); ++l)
+    {
+      interface_dofs[l].clear();
+      interface_dofs[l].set_size(mg_dof.n_dofs(l));
+    }
+  MGTools::extract_inner_interface_dofs(mg_dof, interface_dofs);
+
+  // use a temporary vector to create the
+  // relation between global and level dofs
+  std::vector<types::global_dof_index> temp_copy_indices;
+  std::vector<types::global_dof_index> global_dof_indices (fe.dofs_per_cell);
+  std::vector<types::global_dof_index> level_dof_indices  (fe.dofs_per_cell);
+  for (int level=dof.get_triangulation().n_levels()-1; level>=0; --level)
+    {
+      copy_to_and_from_indices[level].clear();
+      typename DoFHandler<dim,spacedim>::active_cell_iterator
+      level_cell = mg_dof.begin_active(level);
+      const typename DoFHandler<dim,spacedim>::active_cell_iterator
+      level_end  = mg_dof.end_active(level);
+
+      temp_copy_indices.resize (0);
+      temp_copy_indices.resize (mg_dof.n_dofs(level), numbers::invalid_dof_index);
+
+      // Compute coarse level right hand side
+      // by restricting from fine level.
+      for (; level_cell!=level_end; ++level_cell)
+        {
+          // get the dof numbers of
+          // this cell for the global
+          // and the level-wise
+          // numbering
+          level_cell->get_dof_indices(global_dof_indices);
+          level_cell->get_mg_dof_indices (level_dof_indices);
+
+          for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+            {
+              const unsigned int component
+                = fe.system_to_component_index(i).first;
+              if (component_mask[component] &&
+                  !interface_dofs[level].is_element(level_dof_indices[i]))
+                {
+                  const types::global_dof_index level_start
+                    = mg_component_start[level][mg_target_component[component]];
+                  const types::global_dof_index global_start
+                    = component_start[target_component[component]];
+                  temp_copy_indices[level_dof_indices[i]-level_start] =
+                    global_dof_indices[i] - global_start;
+                }
+            }
+        }
+
+      // write indices from vector into the map from
+      // global to level dofs
+      const types::global_dof_index n_active_dofs =
+        std::count_if (temp_copy_indices.begin(), temp_copy_indices.end(),
+                       std::bind2nd(std::not_equal_to<types::global_dof_index>(),
+                                    numbers::invalid_dof_index));
+      copy_to_and_from_indices[level].resize (n_active_dofs);
+      types::global_dof_index counter = 0;
+      for (types::global_dof_index i=0; i<temp_copy_indices.size(); ++i)
+        if (temp_copy_indices[i] != numbers::invalid_dof_index)
+          copy_to_and_from_indices[level][counter++] =
+            std::pair<types::global_dof_index, unsigned int> (temp_copy_indices[i], i);
+      Assert (counter == n_active_dofs, ExcInternalError());
+    }
+}
+
+
+
+// explicit instantiations
+#include "mg_transfer_component.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/mg_transfer_component.inst.in b/source/multigrid/mg_transfer_component.inst.in
new file mode 100644
index 0000000..e7036ed
--- /dev/null
+++ b/source/multigrid/mg_transfer_component.inst.in
@@ -0,0 +1,100 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+template
+void MGTransferSelect<float>::build_matrices<deal_II_dimension>
+(const DoFHandler<deal_II_dimension> &d,
+ const DoFHandler<deal_II_dimension> &,
+ unsigned int, unsigned int,
+ const std::vector<unsigned int>&,
+ const std::vector<unsigned int>&,
+ const std::vector<std::set<types::global_dof_index> >&);
+
+template
+void MGTransferSelect<double>::build_matrices<deal_II_dimension>
+(const DoFHandler<deal_II_dimension> &d,
+ const DoFHandler<deal_II_dimension> &,
+ unsigned int, unsigned int,
+ const std::vector<unsigned int>&,
+ const std::vector<unsigned int>&,
+ const std::vector<std::set<types::global_dof_index> >&);
+
+template void
+MGTransferSelect<float>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<float> >&,
+  const Vector<double>&) const;
+template void
+MGTransferSelect<float>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<float> >&,
+  const BlockVector<double>&) const;
+template void
+MGTransferSelect<float>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+template void
+MGTransferSelect<float>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+template void
+MGTransferSelect<float>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+template void
+MGTransferSelect<float>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<float> >&) const;
+
+template void
+MGTransferSelect<double>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<double> >&,
+  const Vector<double>&) const;
+template void
+MGTransferSelect<double>::copy_to_mg (
+  const DoFHandler<deal_II_dimension>&,
+  MGLevelObject<Vector<double> >&,
+  const BlockVector<double>&) const;
+template void
+MGTransferSelect<double>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+template void
+MGTransferSelect<double>::copy_from_mg (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+template void
+MGTransferSelect<double>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  Vector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+template void
+MGTransferSelect<double>::copy_from_mg_add (
+  const DoFHandler<deal_II_dimension>&,
+  BlockVector<double>&,
+  const MGLevelObject<Vector<double> >&) const;
+  }
+
diff --git a/source/multigrid/mg_transfer_matrix_free.cc b/source/multigrid/mg_transfer_matrix_free.cc
new file mode 100644
index 0000000..da97e20
--- /dev/null
+++ b/source/multigrid/mg_transfer_matrix_free.cc
@@ -0,0 +1,947 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2016 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_tools.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_transfer_matrix_free.h>
+
+#include <deal.II/matrix_free/shape_info.h>
+#include <deal.II/matrix_free/fe_evaluation.h>
+
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<int dim, typename Number>
+MGTransferMatrixFree<dim,Number>::MGTransferMatrixFree ()
+  :
+  fe_degree(0),
+  element_is_continuous(false),
+  n_components(0),
+  n_child_cell_dofs(0)
+{}
+
+
+
+template<int dim, typename Number>
+MGTransferMatrixFree<dim,Number>::MGTransferMatrixFree (const MGConstrainedDoFs &mg_c)
+  :
+  fe_degree(0),
+  element_is_continuous(false),
+  n_components(0),
+  n_child_cell_dofs(0)
+{
+  this->mg_constrained_dofs = &mg_c;
+}
+
+
+
+template <int dim, typename Number>
+MGTransferMatrixFree<dim,Number>::~MGTransferMatrixFree ()
+{}
+
+
+
+template <int dim, typename Number>
+void MGTransferMatrixFree<dim,Number>::initialize_constraints
+(const MGConstrainedDoFs &mg_c)
+{
+  this->mg_constrained_dofs = &mg_c;
+}
+
+
+
+template <int dim, typename Number>
+void MGTransferMatrixFree<dim,Number>::clear ()
+{
+  this->MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::clear();
+  fe_degree = 0;
+  element_is_continuous = false;
+  n_components = 0;
+  n_child_cell_dofs = 0;
+  level_dof_indices.clear();
+  parent_child_connect.clear();
+  n_owned_level_cells.clear();
+  shape_info = internal::MatrixFreeFunctions::ShapeInfo<Number>();
+  evaluation_data.clear();
+  weights_on_refined.clear();
+}
+
+
+
+namespace
+{
+  // given the collection of child cells in lexicographic ordering as seen
+  // from the parent, compute the first index of the given child
+  template <int dim>
+  unsigned int
+  compute_shift_within_children(const unsigned int child,
+                                const unsigned int fe_shift_1d,
+                                const unsigned int fe_degree)
+  {
+    // we put the degrees of freedom of all child cells in
+    // lexicographic ordering
+    unsigned int c_tensor_index[dim];
+    unsigned int tmp = child;
+    for (unsigned int d=0; d<dim; ++d)
+      {
+        c_tensor_index[d] = tmp % 2;
+        tmp /= 2;
+      }
+    const unsigned int n_child_dofs_1d = fe_degree + 1 + fe_shift_1d;
+    unsigned int factor = 1;
+    unsigned int shift = fe_shift_1d * c_tensor_index[0];
+    for (unsigned int d=1; d<dim; ++d)
+      {
+        factor *= n_child_dofs_1d;
+        shift = shift + factor * fe_shift_1d * c_tensor_index[d];
+      }
+    return shift;
+  }
+
+
+
+  // puts the indices on the given child cell in lexicographic ordering with
+  // respect to the collection of all child cells as seen from the parent
+  template <int dim>
+  void add_child_indices(const unsigned int child,
+                         const unsigned int fe_shift_1d,
+                         const unsigned int fe_degree,
+                         const std::vector<unsigned int> &lexicographic_numbering,
+                         const std::vector<types::global_dof_index> &local_dof_indices,
+                         types::global_dof_index *target_indices)
+  {
+    const unsigned int n_child_dofs_1d = fe_degree + 1 + fe_shift_1d;
+    const unsigned int shift =
+      compute_shift_within_children<dim>(child, fe_shift_1d, fe_degree);
+    const unsigned int n_components =
+      local_dof_indices.size()/Utilities::fixed_power<dim>(fe_degree+1);
+    types::global_dof_index *indices = target_indices + shift;
+    const unsigned int n_scalar_cell_dofs = Utilities::fixed_power<dim>(n_child_dofs_1d);
+    for (unsigned int c=0, m=0; c<n_components; ++c)
+      for (unsigned int k=0; k<(dim>2 ? (fe_degree+1) : 1); ++k)
+        for (unsigned int j=0; j<(dim>1 ? (fe_degree+1) : 1); ++j)
+          for (unsigned int i=0; i<(fe_degree+1); ++i, ++m)
+            {
+              const unsigned int index = c*n_scalar_cell_dofs+k*n_child_dofs_1d*
+                                         n_child_dofs_1d+j*n_child_dofs_1d+i;
+              Assert(indices[index] == numbers::invalid_dof_index ||
+                     indices[index] == local_dof_indices[lexicographic_numbering[m]],
+                     ExcInternalError());
+              indices[index] = local_dof_indices[lexicographic_numbering[m]];
+            }
+  }
+
+
+
+  // initialize the vectors needed for the transfer (and merge with the
+  // content in copy_indices_global_mine)
+  template <typename Number>
+  void
+  reinit_ghosted_vector(const IndexSet &locally_owned,
+                        std::vector<types::global_dof_index> &ghosted_level_dofs,
+                        const MPI_Comm &communicator,
+                        parallel::distributed::Vector<Number> &ghosted_level_vector,
+                        std::vector<std::pair<unsigned int,unsigned int> > &copy_indices_global_mine)
+  {
+    std::sort(ghosted_level_dofs.begin(), ghosted_level_dofs.end());
+    IndexSet ghosted_dofs(locally_owned.size());
+    ghosted_dofs.add_indices(ghosted_level_dofs.begin(),
+                             std::unique(ghosted_level_dofs.begin(),
+                                         ghosted_level_dofs.end()));
+    ghosted_dofs.compress();
+
+    // Add possible ghosts from the previous content in the vector
+    if (ghosted_level_vector.size() == locally_owned.size())
+      {
+        // shift the local number of the copy indices according to the new
+        // partitioner that we are going to use for the vector
+        const std_cxx11::shared_ptr<const Utilities::MPI::Partitioner> part
+          = ghosted_level_vector.get_partitioner();
+        ghosted_dofs.add_indices(part->ghost_indices());
+        for (unsigned int i=0; i<copy_indices_global_mine.size(); ++i)
+          copy_indices_global_mine[i].second =
+            locally_owned.n_elements() +
+            ghosted_dofs.index_within_set(part->local_to_global(copy_indices_global_mine[i].second));
+      }
+    ghosted_level_vector.reinit(locally_owned, ghosted_dofs, communicator);
+  }
+
+  // Transform the ghost indices to local index space for the vector
+  void
+  copy_indices_to_mpi_local_numbers(const Utilities::MPI::Partitioner &part,
+                                    const std::vector<types::global_dof_index> &mine,
+                                    const std::vector<types::global_dof_index> &remote,
+                                    std::vector<unsigned int> &localized_indices)
+  {
+    localized_indices.resize(mine.size()+remote.size(),
+                             numbers::invalid_unsigned_int);
+    for (unsigned int i=0; i<mine.size(); ++i)
+      if (mine[i] != numbers::invalid_dof_index)
+        localized_indices[i] = part.global_to_local(mine[i]);
+
+    for (unsigned int i=0; i<remote.size(); ++i)
+      if (remote[i] != numbers::invalid_dof_index)
+        localized_indices[i+mine.size()] = part.global_to_local(remote[i]);
+  }
+}
+
+
+
+template <int dim, typename Number>
+void MGTransferMatrixFree<dim,Number>::build
+(const DoFHandler<dim,dim>  &mg_dof)
+{
+  this->fill_and_communicate_copy_indices(mg_dof);
+
+  // we collect all child DoFs of a mother cell together. For faster
+  // tensorized operations, we align the degrees of freedom
+  // lexicographically. We distinguish FE_Q elements and FE_DGQ elements
+
+  const Triangulation<dim> &tria = mg_dof.get_triangulation();
+
+  // ---------------------------- 1. Extract 1D info about the finite element
+  // step 1.1: create a 1D copy of the finite element from FETools where we
+  // substitute the template argument
+  AssertDimension(mg_dof.get_fe().n_base_elements(), 1);
+  std::string fe_name = mg_dof.get_fe().base_element(0).get_name();
+  {
+    const std::size_t template_starts = fe_name.find_first_of('<');
+    Assert (fe_name[template_starts+1] == (dim==1?'1':(dim==2?'2':'3')),
+            ExcInternalError());
+    fe_name[template_starts+1] = '1';
+  }
+  std_cxx11::shared_ptr<FiniteElement<1> > fe_1d
+  (FETools::get_fe_from_name<1>(fe_name));
+  const FiniteElement<1> &fe = *fe_1d;
+  unsigned int n_child_dofs_1d = numbers::invalid_unsigned_int;
+
+  {
+    // currently, we have only FE_Q and FE_DGQ type elements implemented
+    n_components = mg_dof.get_fe().element_multiplicity(0);
+    AssertDimension(Utilities::fixed_power<dim>(fe.dofs_per_cell)*n_components,
+                    mg_dof.get_fe().dofs_per_cell);
+    AssertDimension(fe.degree, mg_dof.get_fe().degree);
+    fe_degree = fe.degree;
+    element_is_continuous = fe.dofs_per_vertex > 0;
+    Assert(fe.dofs_per_vertex < 2, ExcNotImplemented());
+
+    // step 1.2: get renumbering of 1D basis functions to lexicographic
+    // numbers. The distinction according to fe.dofs_per_vertex is to support
+    // both continuous and discontinuous bases.
+    std::vector<unsigned int> renumbering(fe.dofs_per_cell);
+    {
+      AssertIndexRange(fe.dofs_per_vertex, 2);
+      renumbering[0] = 0;
+      for (unsigned int i=0; i<fe.dofs_per_line; ++i)
+        renumbering[i+fe.dofs_per_vertex] =
+          GeometryInfo<1>::vertices_per_cell*fe.dofs_per_vertex + i;
+      if (fe.dofs_per_vertex > 0)
+        renumbering[fe.dofs_per_cell-fe.dofs_per_vertex] = fe.dofs_per_vertex;
+    }
+
+    // step 1.3: create a 1D quadrature formula from the finite element that
+    // collects the support points of the basis functions on the two children.
+    std::vector<Point<1> > basic_support_points = fe.get_unit_support_points();
+    Assert(fe.dofs_per_vertex == 0 || fe.dofs_per_vertex == 1,
+           ExcNotImplemented());
+    std::vector<Point<1> > points_refined(fe.dofs_per_vertex > 0 ?
+                                          (2 * fe.dofs_per_cell - 1) :
+                                          (2 * fe.dofs_per_cell));
+    const unsigned int shift = fe.dofs_per_cell - fe.dofs_per_vertex;
+    for (unsigned int c=0; c<GeometryInfo<1>::max_children_per_cell; ++c)
+      for (unsigned int j=0; j<basic_support_points.size(); ++j)
+        points_refined[shift*c+j][0] =
+          c*0.5 + 0.5 * basic_support_points[renumbering[j]][0];
+
+    n_child_dofs_1d = points_refined.size();
+    n_child_cell_dofs = n_components*Utilities::fixed_power<dim>(n_child_dofs_1d);
+
+    // step 1.4: evaluate the polynomials and store the data in ShapeInfo
+    const Quadrature<1> quadrature(points_refined);
+    shape_info.reinit(quadrature, mg_dof.get_fe(), 0);
+
+    for (unsigned int c=0; c<GeometryInfo<1>::max_children_per_cell; ++c)
+      for (unsigned int i=0; i<fe.dofs_per_cell; ++i)
+        for (unsigned int j=0; j<fe.dofs_per_cell; ++j)
+          Assert(std::abs(shape_info.shape_values[i*n_child_dofs_1d+j+c*shift][0] -
+                          fe.get_prolongation_matrix(c)(renumbering[j],renumbering[i]))
+                 < std::max(2.*(double)std::numeric_limits<Number>::epsilon(),1e-12),
+                 ExcInternalError());
+  }
+
+  // -------------- 2. Extract and match dof indices between child and parent
+  const unsigned int n_levels = tria.n_global_levels();
+  level_dof_indices.resize(n_levels);
+  parent_child_connect.resize(n_levels-1);
+  n_owned_level_cells.resize(n_levels-1);
+  std::vector<std::vector<unsigned int> > coarse_level_indices(n_levels-1);
+  for (unsigned int level=0; level<std::min(tria.n_levels(),n_levels-1); ++level)
+    coarse_level_indices[level].resize(tria.n_raw_cells(level),
+                                       numbers::invalid_unsigned_int);
+  std::vector<types::global_dof_index> local_dof_indices(mg_dof.get_fe().dofs_per_cell);
+  dirichlet_indices.resize(n_levels-1);
+
+  // We use the vectors stored ghosted_level_vector in the base class for
+  // keeping ghosted transfer indices. To avoid keeping two very similar
+  // vectors, we merge them here.
+  if (this->ghosted_level_vector.max_level() != n_levels-1)
+    this->ghosted_level_vector.resize(0, n_levels-1);
+
+  for (unsigned int level=n_levels-1; level > 0; --level)
+    {
+      unsigned int counter = 0;
+      std::vector<types::global_dof_index> global_level_dof_indices;
+      std::vector<types::global_dof_index> global_level_dof_indices_remote;
+      std::vector<types::global_dof_index> ghosted_level_dofs;
+      std::vector<types::global_dof_index> global_level_dof_indices_l0;
+      std::vector<types::global_dof_index> ghosted_level_dofs_l0;
+
+      // step 2.1: loop over the cells on the coarse side
+      for (typename DoFHandler<dim>::cell_iterator cell = mg_dof.begin(level-1);
+           cell != mg_dof.end(level-1); ++cell)
+        {
+          // need to look into a cell if it has children and it is locally owned
+          if (!cell->has_children())
+            continue;
+
+          bool consider_cell = false;
+          if (tria.locally_owned_subdomain()==numbers::invalid_subdomain_id
+              || cell->level_subdomain_id()==tria.locally_owned_subdomain()
+             )
+            consider_cell = true;
+
+          // due to the particular way we store DoF indices (via children), we
+          // also need to add the DoF indices for coarse cells where we own at
+          // least one child
+          bool cell_is_remote = !consider_cell;
+          for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+            if (cell->child(c)->level_subdomain_id()==tria.locally_owned_subdomain())
+              {
+                consider_cell = true;
+                break;
+              }
+
+          if (!consider_cell)
+            continue;
+
+          // step 2.2: loop through children and append the dof indices to the
+          // appropriate list. We need separate lists for the owned coarse
+          // cell case (which will be part of restriction/prolongation between
+          // level-1 and level) and the remote case (which needs to store DoF
+          // indices for the operations between level and level+1).
+          AssertDimension(cell->n_children(),
+                          GeometryInfo<dim>::max_children_per_cell);
+          std::vector<types::global_dof_index> &next_indices =
+            cell_is_remote ? global_level_dof_indices_remote : global_level_dof_indices;
+          const std::size_t start_index = next_indices.size();
+          next_indices.resize(start_index + n_child_cell_dofs,
+                              numbers::invalid_dof_index);
+          for (unsigned int c=0; c<GeometryInfo<dim>::max_children_per_cell; ++c)
+            {
+              if (cell_is_remote && cell->child(c)->level_subdomain_id() !=
+                  tria.locally_owned_subdomain())
+                continue;
+              cell->child(c)->get_mg_dof_indices(local_dof_indices);
+
+              const IndexSet &owned_level_dofs = mg_dof.locally_owned_mg_dofs(level);
+              for (unsigned int i=0; i<local_dof_indices.size(); ++i)
+                if (!owned_level_dofs.is_element(local_dof_indices[i]))
+                  ghosted_level_dofs.push_back(local_dof_indices[i]);
+
+              add_child_indices<dim>(c, fe.dofs_per_cell - fe.dofs_per_vertex,
+                                     fe.degree, shape_info.lexicographic_numbering,
+                                     local_dof_indices,
+                                     &next_indices[start_index]);
+
+              // step 2.3 store the connectivity to the parent
+              if (cell->child(c)->has_children() &&
+                  (tria.locally_owned_subdomain()==numbers::invalid_subdomain_id
+                   || cell->child(c)->level_subdomain_id()==tria.locally_owned_subdomain()
+                  ))
+                {
+                  const unsigned int child_index = coarse_level_indices[level][cell->child(c)->index()];
+                  AssertIndexRange(child_index, parent_child_connect[level].size());
+                  unsigned int parent_index = counter;
+                  // remote cells, i.e., cells where we work on a further
+                  // level but are not treated on the current level, need to
+                  // be placed at the end of the list; however, we do not yet
+                  // know the exact position in the array, so shift their
+                  // parent index by the number of cells so we can set the
+                  // correct number after the end of this loop
+                  if (cell_is_remote)
+                    parent_index = start_index/n_child_cell_dofs + tria.n_cells(level);
+                  parent_child_connect[level][child_index] =
+                    std::make_pair(parent_index, c);
+                  AssertIndexRange(mg_dof.get_fe().dofs_per_cell,
+                                   static_cast<unsigned short>(-1));
+
+                  // set Dirichlet boundary conditions (as a list of
+                  // constrained DoFs) for the child
+                  if (this->mg_constrained_dofs != 0)
+                    for (unsigned int i=0; i<mg_dof.get_fe().dofs_per_cell; ++i)
+                      if (this->mg_constrained_dofs->is_boundary_index(level, local_dof_indices[shape_info.lexicographic_numbering[i]]))
+                        dirichlet_indices[level][child_index].push_back(i);
+                }
+            }
+          if (!cell_is_remote)
+            {
+              AssertIndexRange(static_cast<unsigned int>(cell->index()),
+                               coarse_level_indices[level-1].size());
+              coarse_level_indices[level-1][cell->index()] = counter++;
+            }
+
+          // step 2.4: include indices for the coarsest cells. we still insert
+          // the indices as if they were from a child in order to use the same
+          // code (the coarsest level does not matter much in terms of memory,
+          // so we gain in code simplicity)
+          if (level == 1 && !cell_is_remote)
+            {
+              cell->get_mg_dof_indices(local_dof_indices);
+
+              const IndexSet &owned_level_dofs_l0 = mg_dof.locally_owned_mg_dofs(0);
+              for (unsigned int i=0; i<local_dof_indices.size(); ++i)
+                if (!owned_level_dofs_l0.is_element(local_dof_indices[i]))
+                  ghosted_level_dofs_l0.push_back(local_dof_indices[i]);
+
+              const std::size_t start_index = global_level_dof_indices_l0.size();
+              global_level_dof_indices_l0.resize(start_index+n_child_cell_dofs,
+                                                 numbers::invalid_dof_index);
+              add_child_indices<dim>(0, fe.dofs_per_cell - fe.dofs_per_vertex,
+                                     fe.degree, shape_info.lexicographic_numbering,
+                                     local_dof_indices,
+                                     &global_level_dof_indices_l0[start_index]);
+
+              dirichlet_indices[0].push_back(std::vector<unsigned short>());
+              if (this->mg_constrained_dofs != 0)
+                for (unsigned int i=0; i<mg_dof.get_fe().dofs_per_cell; ++i)
+                  if (this->mg_constrained_dofs->is_boundary_index(0, local_dof_indices[shape_info.lexicographic_numbering[i]]))
+                    dirichlet_indices[0].back().push_back(i);
+            }
+        }
+
+      // step 2.5: store information about the current level and prepare the
+      // Dirichlet indices and parent-child relationship for the next coarser
+      // level
+      AssertDimension(counter*n_child_cell_dofs, global_level_dof_indices.size());
+      n_owned_level_cells[level-1] = counter;
+      dirichlet_indices[level-1].resize(counter);
+      parent_child_connect[level-1].
+      resize(counter, std::make_pair(numbers::invalid_unsigned_int,
+                                     numbers::invalid_unsigned_int));
+
+      // step 2.6: put the cells with remotely owned parent to the end of the
+      // list (these are needed for the transfer from level to level+1 but not
+      // for the transfer from level-1 to level).
+      if (level < n_levels-1)
+        for (std::vector<std::pair<unsigned int,unsigned int> >::iterator
+             i=parent_child_connect[level].begin(); i!=parent_child_connect[level].end(); ++i)
+          if (i->first >= tria.n_cells(level))
+            {
+              i->first -= tria.n_cells(level);
+              i->first += counter;
+            }
+
+      // step 2.7: Initialize the ghosted vector
+      const parallel::Triangulation<dim,dim> *ptria =
+        (dynamic_cast<const parallel::Triangulation<dim,dim>*> (&tria));
+      const MPI_Comm communicator =
+        ptria != 0 ? ptria->get_communicator() : MPI_COMM_SELF;
+
+      reinit_ghosted_vector(mg_dof.locally_owned_mg_dofs(level),
+                            ghosted_level_dofs, communicator,
+                            this->ghosted_level_vector[level],
+                            this->copy_indices_global_mine[level]);
+
+      copy_indices_to_mpi_local_numbers(*this->ghosted_level_vector[level].get_partitioner(),
+                                        global_level_dof_indices,
+                                        global_level_dof_indices_remote,
+                                        level_dof_indices[level]);
+
+      // step 2.8: Initialize the ghosted vector for level 0
+      if (level == 1)
+        {
+          for (unsigned int i = 0; i<parent_child_connect[0].size(); ++i)
+            parent_child_connect[0][i] = std::make_pair(i, 0U);
+
+          reinit_ghosted_vector(mg_dof.locally_owned_mg_dofs(0),
+                                ghosted_level_dofs_l0, communicator,
+                                this->ghosted_level_vector[0],
+                                this->copy_indices_global_mine[0]);
+
+          copy_indices_to_mpi_local_numbers(*this->ghosted_level_vector[0].get_partitioner(),
+                                            global_level_dof_indices_l0,
+                                            std::vector<types::global_dof_index>(),
+                                            level_dof_indices[0]);
+        }
+    }
+
+  // ------------------------ 3. compute weights to make restriction additive
+  //
+  // get the valence of the individual components and compute the weights as
+  // the inverse of the valence
+  weights_on_refined.resize(n_levels);
+  for (unsigned int level = 1; level<n_levels; ++level)
+    {
+      this->ghosted_level_vector[level] = 0;
+      for (unsigned int c=0; c<n_owned_level_cells[level-1]; ++c)
+        for (unsigned int j=0; j<n_child_cell_dofs; ++j)
+          this->ghosted_level_vector[level].local_element(level_dof_indices[level][n_child_cell_dofs*c+j]) += Number(1.);
+      this->ghosted_level_vector[level].compress(VectorOperation::add);
+      this->ghosted_level_vector[level].update_ghost_values();
+
+      const unsigned int vec_size = VectorizedArray<Number>::n_array_elements;
+      std::vector<unsigned int> degree_to_3 (n_child_dofs_1d);
+      degree_to_3[0] = 0;
+      for (unsigned int i=1; i<n_child_dofs_1d-1; ++i)
+        degree_to_3[i] = 1;
+      degree_to_3.back() = 2;
+
+      // we only store 3^dim weights because all dofs on a line have the same
+      // valence, and all dofs on a quad have the same valence.
+      weights_on_refined[level].resize(((n_owned_level_cells[level-1]+vec_size-1)/vec_size)*Utilities::fixed_power<dim>(3));
+      for (unsigned int c=0; c<n_owned_level_cells[level-1]; ++c)
+        {
+          const unsigned int comp = c/vec_size;
+          const unsigned int v = c%vec_size;
+
+          for (unsigned int k=0, m=0; k<(dim>2 ? n_child_dofs_1d : 1); ++k)
+            for (unsigned int j=0; j<(dim>1 ? n_child_dofs_1d : 1); ++j)
+              {
+                unsigned int shift = 9*degree_to_3[k] + 3*degree_to_3[j];
+                for (unsigned int i=0; i<n_child_dofs_1d; ++i, ++m)
+                  weights_on_refined[level][comp*Utilities::fixed_power<dim>(3)+shift+degree_to_3[i]][v] = Number(1.)/
+                      this->ghosted_level_vector[level].local_element(level_dof_indices[level][n_child_cell_dofs*c+m]);
+              }
+        }
+    }
+
+  evaluation_data.resize(3*n_child_cell_dofs);
+}
+
+
+
+template <int dim, typename Number>
+void MGTransferMatrixFree<dim,Number>
+::prolongate (const unsigned int                           to_level,
+              parallel::distributed::Vector<Number>       &dst,
+              const parallel::distributed::Vector<Number> &src) const
+{
+  Assert ((to_level >= 1) && (to_level<=level_dof_indices.size()),
+          ExcIndexRange (to_level, 1, level_dof_indices.size()+1));
+
+  AssertDimension(this->ghosted_level_vector[to_level].local_size(),
+                  dst.local_size());
+  AssertDimension(this->ghosted_level_vector[to_level-1].local_size(),
+                  src.local_size());
+
+  this->ghosted_level_vector[to_level-1] = src;
+  this->ghosted_level_vector[to_level-1].update_ghost_values();
+  this->ghosted_level_vector[to_level] = 0.;
+
+  // the implementation in do_prolongate_add is templated in the degree of the
+  // element (for efficiency reasons), so we need to find the appropriate
+  // kernel here...
+  if (fe_degree == 0)
+    do_prolongate_add<0>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 1)
+    do_prolongate_add<1>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 2)
+    do_prolongate_add<2>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 3)
+    do_prolongate_add<3>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 4)
+    do_prolongate_add<4>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 5)
+    do_prolongate_add<5>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 6)
+    do_prolongate_add<6>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 7)
+    do_prolongate_add<7>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 8)
+    do_prolongate_add<8>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 9)
+    do_prolongate_add<9>(to_level, this->ghosted_level_vector[to_level],
+                         this->ghosted_level_vector[to_level-1]);
+  else if (fe_degree == 10)
+    do_prolongate_add<10>(to_level, this->ghosted_level_vector[to_level],
+                          this->ghosted_level_vector[to_level-1]);
+  else
+    AssertThrow(false, ExcNotImplemented("Only degrees 0 up to 10 implemented."));
+
+  this->ghosted_level_vector[to_level].compress(VectorOperation::add);
+  dst = this->ghosted_level_vector[to_level];
+}
+
+
+
+template <int dim, typename Number>
+void MGTransferMatrixFree<dim,Number>
+::restrict_and_add (const unsigned int                           from_level,
+                    parallel::distributed::Vector<Number>       &dst,
+                    const parallel::distributed::Vector<Number> &src) const
+{
+  Assert ((from_level >= 1) && (from_level<=level_dof_indices.size()),
+          ExcIndexRange (from_level, 1, level_dof_indices.size()+1));
+
+  AssertDimension(this->ghosted_level_vector[from_level].local_size(),
+                  src.local_size());
+  AssertDimension(this->ghosted_level_vector[from_level-1].local_size(),
+                  dst.local_size());
+
+  this->ghosted_level_vector[from_level] = src;
+  this->ghosted_level_vector[from_level].update_ghost_values();
+  this->ghosted_level_vector[from_level-1] = 0.;
+
+  if (fe_degree == 0)
+    do_restrict_add<0>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 1)
+    do_restrict_add<1>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 2)
+    do_restrict_add<2>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 3)
+    do_restrict_add<3>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 4)
+    do_restrict_add<4>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 5)
+    do_restrict_add<5>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 6)
+    do_restrict_add<6>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 7)
+    do_restrict_add<7>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 8)
+    do_restrict_add<8>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 9)
+    do_restrict_add<9>(from_level, this->ghosted_level_vector[from_level-1],
+                       this->ghosted_level_vector[from_level]);
+  else if (fe_degree == 10)
+    do_restrict_add<10>(from_level, this->ghosted_level_vector[from_level-1],
+                        this->ghosted_level_vector[from_level]);
+  else
+    AssertThrow(false, ExcNotImplemented("Only degrees 0 up to 10 implemented."));
+
+  this->ghosted_level_vector[from_level-1].compress(VectorOperation::add);
+  dst += this->ghosted_level_vector[from_level-1];
+}
+
+
+
+namespace
+{
+  template <int dim, typename Eval, typename Number, bool prolongate>
+  void
+  perform_tensorized_op(const Eval &evaluator,
+                        const unsigned int n_child_cell_dofs,
+                        const unsigned int n_components,
+                        AlignedVector<VectorizedArray<Number> > &evaluation_data)
+  {
+    AssertDimension(n_components * Eval::n_q_points, n_child_cell_dofs);
+    VectorizedArray<Number> *t0 = &evaluation_data[0];
+    VectorizedArray<Number> *t1 = &evaluation_data[n_child_cell_dofs];
+    VectorizedArray<Number> *t2 = &evaluation_data[2*n_child_cell_dofs];
+
+    for (unsigned int c=0; c<n_components; ++c)
+      {
+        // for the prolongate case, we go from dofs (living on the parent cell) to
+        // quads (living on all children) in the FEEvaluation terminology
+        if (dim == 1)
+          evaluator.template values<0,prolongate,false>(t0, t2);
+        else if (dim == 2)
+          {
+            evaluator.template values<0,prolongate,false>(t0, t1);
+            evaluator.template values<1,prolongate,false>(t1, t2);
+          }
+        else if (dim == 3)
+          {
+            evaluator.template values<0,prolongate,false>(t0, t2);
+            evaluator.template values<1,prolongate,false>(t2, t1);
+            evaluator.template values<2,prolongate,false>(t1, t2);
+          }
+        else
+          Assert(false, ExcNotImplemented());
+        if (prolongate)
+          {
+            t0 += Eval::dofs_per_cell;
+            t2 += Eval::n_q_points;
+          }
+        else
+          {
+            t0 += Eval::n_q_points;
+            t2 += Eval::dofs_per_cell;
+          }
+      }
+  }
+
+  template <int dim, int degree, typename Number>
+  void weight_dofs_on_child (const VectorizedArray<Number> *weights,
+                             const unsigned int n_components,
+                             VectorizedArray<Number> *data)
+  {
+    Assert(degree > 0, ExcNotImplemented());
+    const int loop_length = 2*degree+1;
+    unsigned int degree_to_3 [loop_length];
+    degree_to_3[0] = 0;
+    for (int i=1; i<loop_length-1; ++i)
+      degree_to_3[i] = 1;
+    degree_to_3[loop_length-1] = 2;
+    for (unsigned int c=0; c<n_components; ++c)
+      for (int k=0; k<(dim>2 ? loop_length : 1); ++k)
+        for (int j=0; j<(dim>1 ? loop_length : 1); ++j)
+          {
+            const unsigned int shift = 9*degree_to_3[k] + 3*degree_to_3[j];
+            data[0] *= weights[shift];
+            // loop bound as int avoids compiler warnings in case loop_length
+            // == 1 (polynomial degree 0)
+            for (int i=1; i<loop_length-1; ++i)
+              data[i] *= weights[shift+1];
+            data[loop_length-1] *= weights[shift+2];
+            data += loop_length;
+          }
+  }
+}
+
+
+
+template <int dim, typename Number>
+template <int degree>
+void MGTransferMatrixFree<dim,Number>
+::do_prolongate_add (const unsigned int                           to_level,
+                     parallel::distributed::Vector<Number>       &dst,
+                     const parallel::distributed::Vector<Number> &src) const
+{
+  const unsigned int vec_size = VectorizedArray<Number>::n_array_elements;
+  const unsigned int n_child_dofs_1d = 2*(fe_degree+1) - element_is_continuous;
+  const unsigned int n_scalar_cell_dofs = Utilities::fixed_power<dim>(n_child_dofs_1d);
+  const unsigned int three_to_dim = Utilities::fixed_int_power<3,dim>::value;
+
+  for (unsigned int cell=0; cell < n_owned_level_cells[to_level-1];
+       cell += vec_size)
+    {
+      const unsigned int n_chunks = cell+vec_size > n_owned_level_cells[to_level-1] ?
+                                    n_owned_level_cells[to_level-1] - cell : vec_size;
+
+      // read from source vector
+      for (unsigned int v=0; v<n_chunks; ++v)
+        {
+          const unsigned int shift = compute_shift_within_children<dim>
+                                     (parent_child_connect[to_level-1][cell+v].second,
+                                      degree+1-element_is_continuous, degree);
+          const unsigned int *indices = &level_dof_indices[to_level-1][parent_child_connect[to_level-1][cell+v].first*n_child_cell_dofs+shift];
+          for (unsigned int c=0, m=0; c<n_components; ++c)
+            {
+              for (unsigned int k=0; k<(dim>2 ? (degree+1) : 1); ++k)
+                for (unsigned int j=0; j<(dim>1 ? (degree+1) : 1); ++j)
+                  for (unsigned int i=0; i<(degree+1); ++i, ++m)
+                    evaluation_data[m][v] =
+                      src.local_element(indices[c*n_scalar_cell_dofs +
+                                                k*n_child_dofs_1d*n_child_dofs_1d+
+                                                j*n_child_dofs_1d+i]);
+
+              // apply Dirichlet boundary conditions on parent cell
+              for (std::vector<unsigned short>::const_iterator i=dirichlet_indices[to_level-1][cell+v].begin(); i!=dirichlet_indices[to_level-1][cell+v].end(); ++i)
+                evaluation_data[*i][v] = 0.;
+            }
+        }
+
+      // perform tensorized operation
+      Assert(shape_info.element_type ==
+             internal::MatrixFreeFunctions::tensor_symmetric, ExcNotImplemented());
+      if (element_is_continuous)
+        {
+          AssertDimension(shape_info.shape_val_evenodd.size(),
+                          (degree+1)*(degree+1));
+          typedef internal::EvaluatorTensorProduct<internal::evaluate_evenodd,dim,degree,2*degree+1,VectorizedArray<Number> > Evaluator;
+          Evaluator evaluator(shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd);
+          perform_tensorized_op<dim,Evaluator,Number,true>(evaluator,
+                                                           n_child_cell_dofs,
+                                                           n_components,
+                                                           evaluation_data);
+          weight_dofs_on_child<dim,degree,Number>(&weights_on_refined[to_level][(cell/vec_size)*three_to_dim],
+                                                  n_components,
+                                                  &evaluation_data[2*n_child_cell_dofs]);
+        }
+      else
+        {
+          AssertDimension(shape_info.shape_val_evenodd.size(),
+                          (degree+1)*(degree+1));
+          typedef internal::EvaluatorTensorProduct<internal::evaluate_evenodd,dim,degree,2*degree+2,VectorizedArray<Number> > Evaluator;
+          Evaluator evaluator(shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd);
+          perform_tensorized_op<dim,Evaluator,Number,true>(evaluator,
+                                                           n_child_cell_dofs,
+                                                           n_components,
+                                                           evaluation_data);
+        }
+
+      // write into dst vector
+      const unsigned int *indices = &level_dof_indices[to_level][cell*
+                                                                 n_child_cell_dofs];
+      for (unsigned int v=0; v<n_chunks; ++v)
+        {
+          for (unsigned int i=0; i<n_child_cell_dofs; ++i)
+            dst.local_element(indices[i]) += evaluation_data[2*n_child_cell_dofs+i][v];
+          indices += n_child_cell_dofs;
+        }
+    }
+}
+
+
+
+template <int dim, typename Number>
+template <int degree>
+void MGTransferMatrixFree<dim,Number>
+::do_restrict_add (const unsigned int                           from_level,
+                   parallel::distributed::Vector<Number>       &dst,
+                   const parallel::distributed::Vector<Number> &src) const
+{
+  const unsigned int vec_size = VectorizedArray<Number>::n_array_elements;
+  const unsigned int n_child_dofs_1d = 2*(fe_degree+1) - element_is_continuous;
+  const unsigned int n_scalar_cell_dofs = Utilities::fixed_power<dim>(n_child_dofs_1d);
+  const unsigned int three_to_dim = Utilities::fixed_int_power<3,dim>::value;
+
+  for (unsigned int cell=0; cell < n_owned_level_cells[from_level-1];
+       cell += vec_size)
+    {
+      const unsigned int n_chunks = cell+vec_size > n_owned_level_cells[from_level-1] ?
+                                    n_owned_level_cells[from_level-1] - cell : vec_size;
+
+      // read from source vector
+      {
+        const unsigned int *indices = &level_dof_indices[from_level][cell*
+                                      n_child_cell_dofs];
+        for (unsigned int v=0; v<n_chunks; ++v)
+          {
+            for (unsigned int i=0; i<n_child_cell_dofs; ++i)
+              evaluation_data[i][v] = src.local_element(indices[i]);
+            indices += n_child_cell_dofs;
+          }
+      }
+
+      // perform tensorized operation
+      Assert(shape_info.element_type ==
+             internal::MatrixFreeFunctions::tensor_symmetric, ExcNotImplemented());
+      if (element_is_continuous)
+        {
+          AssertDimension(shape_info.shape_val_evenodd.size(),
+                          (degree+1)*(degree+1));
+          typedef internal::EvaluatorTensorProduct<internal::evaluate_evenodd,dim,degree,2*degree+1,VectorizedArray<Number> > Evaluator;
+          Evaluator evaluator(shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd);
+          weight_dofs_on_child<dim,degree,Number>(&weights_on_refined[from_level][(cell/vec_size)*three_to_dim],
+                                                  n_components,
+                                                  &evaluation_data[0]);
+          perform_tensorized_op<dim,Evaluator,Number,false>(evaluator,
+                                                            n_child_cell_dofs,
+                                                            n_components,
+                                                            evaluation_data);
+        }
+      else
+        {
+          AssertDimension(shape_info.shape_val_evenodd.size(),
+                          (degree+1)*(degree+1));
+          typedef internal::EvaluatorTensorProduct<internal::evaluate_evenodd,dim,degree,2*degree+2,VectorizedArray<Number> > Evaluator;
+          Evaluator evaluator(shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd,
+                              shape_info.shape_val_evenodd);
+          perform_tensorized_op<dim,Evaluator,Number,false>(evaluator,
+                                                            n_child_cell_dofs,
+                                                            n_components,
+                                                            evaluation_data);
+        }
+
+      // write into dst vector
+      for (unsigned int v=0; v<n_chunks; ++v)
+        {
+          const unsigned int shift = compute_shift_within_children<dim>
+                                     (parent_child_connect[from_level-1][cell+v].second,
+                                      degree+1-element_is_continuous, degree);
+          AssertIndexRange(parent_child_connect[from_level-1][cell+v].first*
+                           n_child_cell_dofs+n_child_cell_dofs-1,
+                           level_dof_indices[from_level-1].size());
+          const unsigned int *indices = &level_dof_indices[from_level-1][parent_child_connect[from_level-1][cell+v].first*n_child_cell_dofs+shift];
+          for (unsigned int c=0, m=0; c<n_components; ++c)
+            {
+              // apply Dirichlet boundary conditions on parent cell
+              for (std::vector<unsigned short>::const_iterator i=dirichlet_indices[from_level-1][cell+v].begin(); i!=dirichlet_indices[from_level-1][cell+v].end(); ++i)
+                evaluation_data[2*n_child_cell_dofs+(*i)][v] = 0.;
+
+              for (unsigned int k=0; k<(dim>2 ? (degree+1) : 1); ++k)
+                for (unsigned int j=0; j<(dim>1 ? (degree+1) : 1); ++j)
+                  for (unsigned int i=0; i<(degree+1); ++i, ++m)
+                    dst.local_element(indices[c*n_scalar_cell_dofs +
+                                              k*n_child_dofs_1d*n_child_dofs_1d+
+                                              j*n_child_dofs_1d+i])
+                    += evaluation_data[2*n_child_cell_dofs+m][v];
+            }
+        }
+    }
+}
+
+
+
+template <int dim, typename Number>
+std::size_t
+MGTransferMatrixFree<dim,Number>::memory_consumption() const
+{
+  std::size_t memory = MGLevelGlobalTransfer<parallel::distributed::Vector<Number> >::memory_consumption();
+  memory += MemoryConsumption::memory_consumption(level_dof_indices);
+  memory += MemoryConsumption::memory_consumption(parent_child_connect);
+  memory += MemoryConsumption::memory_consumption(n_owned_level_cells);
+  memory += shape_info.memory_consumption();
+  memory += MemoryConsumption::memory_consumption(evaluation_data);
+  memory += MemoryConsumption::memory_consumption(weights_on_refined);
+  memory += MemoryConsumption::memory_consumption(dirichlet_indices);
+  return memory;
+}
+
+
+// explicit instantiation
+#include "mg_transfer_matrix_free.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/mg_transfer_matrix_free.inst.in b/source/multigrid/mg_transfer_matrix_free.inst.in
new file mode 100644
index 0000000..184183d
--- /dev/null
+++ b/source/multigrid/mg_transfer_matrix_free.inst.in
@@ -0,0 +1,21 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; S1 : REAL_SCALARS)
+  {
+    template class MGTransferMatrixFree< deal_II_dimension, S1 >;
+  }
diff --git a/source/multigrid/mg_transfer_prebuilt.cc b/source/multigrid/mg_transfer_prebuilt.cc
new file mode 100644
index 0000000..9ea0757
--- /dev/null
+++ b/source/multigrid/mg_transfer_prebuilt.cc
@@ -0,0 +1,297 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2003 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/dynamic_sparsity_pattern.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/multigrid/mg_tools.h>
+#include <deal.II/multigrid/mg_transfer.h>
+
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template<typename VectorType>
+MGTransferPrebuilt<VectorType>::MGTransferPrebuilt ()
+{}
+
+
+
+template<typename VectorType>
+MGTransferPrebuilt<VectorType>::MGTransferPrebuilt (const ConstraintMatrix &c, const MGConstrainedDoFs &mg_c)
+  :
+  constraints(&c)
+{
+  this->mg_constrained_dofs = &mg_c;
+}
+
+
+
+template <typename VectorType>
+MGTransferPrebuilt<VectorType>::~MGTransferPrebuilt ()
+{}
+
+
+
+template <typename VectorType>
+void MGTransferPrebuilt<VectorType>::initialize_constraints
+(const ConstraintMatrix &c, const MGConstrainedDoFs &mg_c)
+{
+  constraints = &c;
+  this->mg_constrained_dofs = &mg_c;
+}
+
+
+
+template <typename VectorType>
+void MGTransferPrebuilt<VectorType>::clear ()
+{
+  MGLevelGlobalTransfer<VectorType>::clear();
+  prolongation_matrices.resize(0);
+  prolongation_sparsities.resize(0);
+  interface_dofs.resize(0);
+  constraints = 0;
+}
+
+
+
+template <typename VectorType>
+void MGTransferPrebuilt<VectorType>::prolongate (const unsigned int to_level,
+                                                 VectorType        &dst,
+                                                 const VectorType  &src) const
+{
+  Assert ((to_level >= 1) && (to_level<=prolongation_matrices.size()),
+          ExcIndexRange (to_level, 1, prolongation_matrices.size()+1));
+
+  prolongation_matrices[to_level-1]->vmult (dst, src);
+}
+
+
+
+template <typename VectorType>
+void MGTransferPrebuilt<VectorType>::restrict_and_add (const unsigned int from_level,
+                                                       VectorType        &dst,
+                                                       const VectorType  &src) const
+{
+  Assert ((from_level >= 1) && (from_level<=prolongation_matrices.size()),
+          ExcIndexRange (from_level, 1, prolongation_matrices.size()+1));
+  (void)from_level;
+
+  prolongation_matrices[from_level-1]->Tvmult_add (dst, src);
+}
+
+
+
+template <typename VectorType>
+template <int dim, int spacedim>
+void MGTransferPrebuilt<VectorType>::build_matrices
+(const DoFHandler<dim,spacedim>  &mg_dof)
+{
+  const unsigned int n_levels      = mg_dof.get_triangulation().n_global_levels();
+  const unsigned int dofs_per_cell = mg_dof.get_fe().dofs_per_cell;
+
+  this->sizes.resize(n_levels);
+  for (unsigned int l=0; l<n_levels; ++l)
+    this->sizes[l] = mg_dof.n_dofs(l);
+
+  // reset the size of the array of
+  // matrices. call resize(0) first,
+  // in order to delete all elements
+  // and clear their memory. then
+  // repopulate these arrays
+  //
+  // note that on resize(0), the
+  // shared_ptr class takes care of
+  // deleting the object it points to
+  // by itself
+  prolongation_matrices.resize (0);
+  prolongation_sparsities.resize (0);
+
+  for (unsigned int i=0; i<n_levels-1; ++i)
+    {
+      prolongation_sparsities.push_back
+      (std_cxx11::shared_ptr<typename internal::MatrixSelector<VectorType>::Sparsity> (new typename internal::MatrixSelector<VectorType>::Sparsity));
+      prolongation_matrices.push_back
+      (std_cxx11::shared_ptr<typename internal::MatrixSelector<VectorType>::Matrix> (new typename internal::MatrixSelector<VectorType>::Matrix));
+    }
+
+  // two fields which will store the
+  // indices of the multigrid dofs
+  // for a cell and one of its children
+  std::vector<types::global_dof_index> dof_indices_parent (dofs_per_cell);
+  std::vector<types::global_dof_index> dof_indices_child (dofs_per_cell);
+  std::vector<types::global_dof_index> entries (dofs_per_cell);
+
+  // for each level: first build the sparsity
+  // pattern of the matrices and then build the
+  // matrices themselves. note that we only
+  // need to take care of cells on the coarser
+  // level which have children
+  for (unsigned int level=0; level<n_levels-1; ++level)
+    {
+      // reset the dimension of the structure.  note that for the number of
+      // entries per row, the number of parent dofs coupling to a child dof is
+      // necessary. this, of course, is the number of degrees of freedom per
+      // cell
+      //
+      // increment dofs_per_cell since a useless diagonal element will be
+      // stored
+      IndexSet level_p1_relevant_dofs;
+      DoFTools::extract_locally_relevant_level_dofs(mg_dof, level+1,
+                                                    level_p1_relevant_dofs);
+      DynamicSparsityPattern dsp (this->sizes[level+1],
+                                  this->sizes[level],
+                                  level_p1_relevant_dofs);
+      for (typename DoFHandler<dim,spacedim>::cell_iterator cell=mg_dof.begin(level);
+           cell != mg_dof.end(level); ++cell)
+        if (cell->has_children() &&
+            ( mg_dof.get_triangulation().locally_owned_subdomain()==numbers::invalid_subdomain_id
+              || cell->level_subdomain_id()==mg_dof.get_triangulation().locally_owned_subdomain()
+            ))
+          {
+            cell->get_mg_dof_indices (dof_indices_parent);
+
+            Assert(cell->n_children()==GeometryInfo<dim>::max_children_per_cell,
+                   ExcNotImplemented());
+            for (unsigned int child=0; child<cell->n_children(); ++child)
+              {
+                // set an alias to the prolongation matrix for this child
+                const FullMatrix<double> &prolongation
+                  = mg_dof.get_fe().get_prolongation_matrix (child,
+                                                             cell->refinement_case());
+
+                Assert (prolongation.n() != 0, ExcNoProlongation());
+
+                cell->child(child)->get_mg_dof_indices (dof_indices_child);
+
+                // now tag the entries in the
+                // matrix which will be used
+                // for this pair of parent/child
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  {
+                    entries.resize(0);
+                    for (unsigned int j=0; j<dofs_per_cell; ++j)
+                      if (prolongation(i,j) != 0)
+                        entries.push_back (dof_indices_parent[j]);
+                    dsp.add_entries (dof_indices_child[i],
+                                     entries.begin(), entries.end());
+                  }
+              }
+          }
+
+      internal::MatrixSelector<VectorType>::reinit(*prolongation_matrices[level],
+                                                   *prolongation_sparsities[level],
+                                                   level,
+                                                   dsp,
+                                                   mg_dof);
+      dsp.reinit(0,0);
+
+      FullMatrix<double> prolongation;
+
+      // now actually build the matrices
+      for (typename DoFHandler<dim,spacedim>::cell_iterator cell=mg_dof.begin(level);
+           cell != mg_dof.end(level); ++cell)
+        if (cell->has_children() &&
+            (mg_dof.get_triangulation().locally_owned_subdomain()==numbers::invalid_subdomain_id
+             || cell->level_subdomain_id()==mg_dof.get_triangulation().locally_owned_subdomain())
+           )
+          {
+            cell->get_mg_dof_indices (dof_indices_parent);
+
+            Assert(cell->n_children()==GeometryInfo<dim>::max_children_per_cell,
+                   ExcNotImplemented());
+            for (unsigned int child=0; child<cell->n_children(); ++child)
+              {
+                // set an alias to the prolongation matrix for this child
+                prolongation
+                  = mg_dof.get_fe().get_prolongation_matrix (child,
+                                                             cell->refinement_case());
+
+                if (this->mg_constrained_dofs != 0 &&
+                    this->mg_constrained_dofs->have_boundary_indices())
+                  for (unsigned int j=0; j<dofs_per_cell; ++j)
+                    if (this->mg_constrained_dofs->is_boundary_index(level, dof_indices_parent[j]))
+                      for (unsigned int i=0; i<dofs_per_cell; ++i)
+                        prolongation(i,j) = 0.;
+
+                cell->child(child)->get_mg_dof_indices (dof_indices_child);
+
+                // now set the entries in the matrix
+                for (unsigned int i=0; i<dofs_per_cell; ++i)
+                  prolongation_matrices[level]->set (dof_indices_child[i],
+                                                     dofs_per_cell,
+                                                     &dof_indices_parent[0],
+                                                     &prolongation(i,0),
+                                                     true);
+              }
+          }
+      prolongation_matrices[level]->compress(VectorOperation::insert);
+    }
+
+  this->fill_and_communicate_copy_indices(mg_dof);
+}
+
+
+
+template <typename VectorType>
+void
+MGTransferPrebuilt<VectorType>::print_matrices (std::ostream &os) const
+{
+  for (unsigned int level = 0; level<prolongation_matrices.size(); ++level)
+    {
+      os << "Level " << level << std::endl;
+      prolongation_matrices[level]->print(os);
+      os << std::endl;
+    }
+}
+
+
+
+template <typename VectorType>
+std::size_t
+MGTransferPrebuilt<VectorType>::memory_consumption () const
+{
+  std::size_t result = MGLevelGlobalTransfer<VectorType>::memory_consumption();
+  for (unsigned int i=0; i<prolongation_matrices.size(); ++i)
+    result += prolongation_matrices[i]->memory_consumption()
+              + prolongation_sparsities[i]->memory_consumption();
+
+  return result;
+}
+
+
+// explicit instantiation
+#include "mg_transfer_prebuilt.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/mg_transfer_prebuilt.inst.in b/source/multigrid/mg_transfer_prebuilt.inst.in
new file mode 100644
index 0000000..7b5da44
--- /dev/null
+++ b/source/multigrid/mg_transfer_prebuilt.inst.in
@@ -0,0 +1,28 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (V1 : VECTORS_WITH_MATRIX)
+  {
+    template class MGTransferPrebuilt< V1 >;
+  }
+
+for (deal_II_dimension : DIMENSIONS; V1 : VECTORS_WITH_MATRIX)
+  {
+    template
+      void MGTransferPrebuilt< V1 >::build_matrices<deal_II_dimension>(
+        const DoFHandler<deal_II_dimension> &mg_dof);
+  }
diff --git a/source/multigrid/multigrid.cc b/source/multigrid/multigrid.cc
new file mode 100644
index 0000000..7d83ea1
--- /dev/null
+++ b/source/multigrid/multigrid.cc
@@ -0,0 +1,296 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/multigrid/mg_transfer.h>
+#include <deal.II/multigrid/mg_transfer_block.h>
+#include <deal.II/multigrid/mg_transfer_component.h>
+#include <deal.II/multigrid/mg_smoother.h>
+#include <deal.II/multigrid/mg_transfer_block.templates.h>
+#include <deal.II/multigrid/mg_transfer_component.templates.h>
+#include <deal.II/multigrid/multigrid.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+MGTransferBlockBase::MGTransferBlockBase ()
+{}
+
+
+MGTransferBlockBase::MGTransferBlockBase (
+  const ConstraintMatrix &c, const MGConstrainedDoFs &mg_c)
+  :
+  constraints(&c),
+  mg_constrained_dofs(&mg_c)
+{}
+
+
+template <typename number>
+MGTransferBlock<number>::MGTransferBlock ()
+  :
+  memory(0, typeid(*this).name())
+{}
+
+
+template <typename number>
+MGTransferBlock<number>::~MGTransferBlock ()
+{
+  if (memory != 0) memory = 0;
+}
+
+
+template <typename number>
+void
+MGTransferBlock<number>::initialize (const std::vector<number> &f,
+                                     VectorMemory<Vector<number> > &mem)
+{
+  factors = f;
+  memory = &mem;
+}
+
+
+template <typename number>
+void MGTransferBlock<number>::prolongate (
+  const unsigned int   to_level,
+  BlockVector<number>       &dst,
+  const BlockVector<number> &src) const
+{
+  Assert ((to_level >= 1) && (to_level<=prolongation_matrices.size()),
+          ExcIndexRange (to_level, 1, prolongation_matrices.size()+1));
+  Assert (src.n_blocks() == this->n_mg_blocks,
+          ExcDimensionMismatch(src.n_blocks(), this->n_mg_blocks));
+  Assert (dst.n_blocks() == this->n_mg_blocks,
+          ExcDimensionMismatch(dst.n_blocks(), this->n_mg_blocks));
+
+  // Multiplicate with prolongation
+  // matrix, but only those blocks
+  // selected.
+  for (unsigned int b=0; b<this->mg_block.size(); ++b)
+    {
+      if (this->selected[b])
+        prolongation_matrices[to_level-1]->block(b,b).vmult (
+          dst.block(this->mg_block[b]), src.block(this->mg_block[b]));
+    }
+}
+
+
+template <typename number>
+void MGTransferBlock<number>::restrict_and_add (
+  const unsigned int   from_level,
+  BlockVector<number>       &dst,
+  const BlockVector<number> &src) const
+{
+  Assert ((from_level >= 1) && (from_level<=prolongation_matrices.size()),
+          ExcIndexRange (from_level, 1, prolongation_matrices.size()+1));
+  Assert (src.n_blocks() == this->n_mg_blocks,
+          ExcDimensionMismatch(src.n_blocks(), this->n_mg_blocks));
+  Assert (dst.n_blocks() == this->n_mg_blocks,
+          ExcDimensionMismatch(dst.n_blocks(), this->n_mg_blocks));
+
+  for (unsigned int b=0; b<this->mg_block.size(); ++b)
+    {
+      if (this->selected[b])
+        {
+          if (factors.size() != 0)
+            {
+              Assert (memory != 0, ExcNotInitialized());
+              Vector<number> *aux = memory->alloc();
+              aux->reinit(dst.block(this->mg_block[b]));
+              prolongation_matrices[from_level-1]->block(b,b).Tvmult (
+                *aux, src.block(this->mg_block[b]));
+
+              dst.block(this->mg_block[b]).add(factors[b], *aux);
+              memory->free(aux);
+            }
+          else
+            {
+              prolongation_matrices[from_level-1]->block(b,b).Tvmult_add (
+                dst.block(this->mg_block[b]), src.block(this->mg_block[b]));
+            }
+        }
+    }
+}
+
+
+
+std::size_t
+MGTransferComponentBase::memory_consumption () const
+{
+  std::size_t result = sizeof(*this);
+  result += MemoryConsumption::memory_consumption(component_mask)
+            - sizeof(ComponentMask);
+  result += MemoryConsumption::memory_consumption(target_component)
+            - sizeof(mg_target_component);
+  result += MemoryConsumption::memory_consumption(sizes)
+            - sizeof(sizes);
+  result += MemoryConsumption::memory_consumption(component_start)
+            - sizeof(component_start);
+  result += MemoryConsumption::memory_consumption(mg_component_start)
+            - sizeof(mg_component_start);
+  result += MemoryConsumption::memory_consumption(prolongation_sparsities)
+            - sizeof(prolongation_sparsities);
+  result += MemoryConsumption::memory_consumption(prolongation_matrices)
+            - sizeof(prolongation_matrices);
+//TODO:[GK] Add this.
+//   result += MemoryConsumption::memory_consumption(copy_to_and_from_indices)
+//          - sizeof(copy_to_and_from_indices);
+  return result;
+}
+
+
+//TODO:[GK] Add all those little vectors.
+std::size_t
+MGTransferBlockBase::memory_consumption () const
+{
+  std::size_t result = sizeof(*this);
+  result += sizeof(unsigned int) * sizes.size();
+  result += MemoryConsumption::memory_consumption(selected)
+            - sizeof(selected);
+  result += MemoryConsumption::memory_consumption(mg_block)
+            - sizeof(mg_block);
+  result += MemoryConsumption::memory_consumption(block_start)
+            - sizeof(block_start);
+  result += MemoryConsumption::memory_consumption(mg_block_start)
+            - sizeof(mg_block_start);
+  result += MemoryConsumption::memory_consumption(prolongation_sparsities)
+            - sizeof(prolongation_sparsities);
+  result += MemoryConsumption::memory_consumption(prolongation_matrices)
+            - sizeof(prolongation_matrices);
+//TODO:[GK] Add this.
+//   result += MemoryConsumption::memory_consumption(copy_indices)
+//          - sizeof(copy_indices);
+  return result;
+}
+
+
+//----------------------------------------------------------------------//
+
+template<typename number>
+MGTransferSelect<number>::MGTransferSelect ()
+{}
+
+
+template<typename number>
+MGTransferSelect<number>::MGTransferSelect (const ConstraintMatrix &c)
+  :
+  constraints(&c)
+{}
+
+template <typename number>
+MGTransferSelect<number>::~MGTransferSelect ()
+{}
+
+
+template <typename number>
+void MGTransferSelect<number>::prolongate (
+  const unsigned int   to_level,
+  Vector<number>       &dst,
+  const Vector<number> &src) const
+{
+  Assert ((to_level >= 1) && (to_level<=prolongation_matrices.size()),
+          ExcIndexRange (to_level, 1, prolongation_matrices.size()+1));
+
+  prolongation_matrices[to_level-1]->block(mg_target_component[mg_selected_component],
+                                           mg_target_component[mg_selected_component])
+  .vmult (dst, src);
+}
+
+
+template <typename number>
+void MGTransferSelect<number>::restrict_and_add (
+  const unsigned int   from_level,
+  Vector<number>       &dst,
+  const Vector<number> &src) const
+{
+  Assert ((from_level >= 1) && (from_level<=prolongation_matrices.size()),
+          ExcIndexRange (from_level, 1, prolongation_matrices.size()+1));
+
+  prolongation_matrices[from_level-1]->block(mg_target_component[mg_selected_component],
+                                             mg_target_component[mg_selected_component])
+  .Tvmult_add (dst, src);
+}
+
+
+//----------------------------------------------------------------------//
+
+template <typename number>
+MGTransferBlockSelect<number>::MGTransferBlockSelect ()
+{}
+
+
+template <typename number>
+MGTransferBlockSelect<number>::MGTransferBlockSelect (
+  const ConstraintMatrix &c, const MGConstrainedDoFs &mg_c)
+  : MGTransferBlockBase(c, mg_c)
+{}
+
+template <typename number>
+MGTransferBlockSelect<number>::~MGTransferBlockSelect ()
+{}
+
+
+template <typename number>
+void MGTransferBlockSelect<number>::prolongate (
+  const unsigned int   to_level,
+  Vector<number>       &dst,
+  const Vector<number> &src) const
+{
+  Assert ((to_level >= 1) && (to_level<=prolongation_matrices.size()),
+          ExcIndexRange (to_level, 1, prolongation_matrices.size()+1));
+
+  prolongation_matrices[to_level-1]->block(selected_block,
+                                           selected_block)
+  .vmult (dst, src);
+}
+
+
+template <typename number>
+void MGTransferBlockSelect<number>::restrict_and_add (
+  const unsigned int   from_level,
+  Vector<number>       &dst,
+  const Vector<number> &src) const
+{
+  Assert ((from_level >= 1) && (from_level<=prolongation_matrices.size()),
+          ExcIndexRange (from_level, 1, prolongation_matrices.size()+1));
+
+  prolongation_matrices[from_level-1]->block(selected_block,
+                                             selected_block)
+  .Tvmult_add (dst, src);
+}
+
+
+
+// Explicit instantiations
+
+#include "multigrid.inst"
+
+template class MGTransferBlock<float>;
+template class MGTransferBlock<double>;
+template class MGTransferSelect<float>;
+template class MGTransferSelect<double>;
+template class MGTransferBlockSelect<float>;
+template class MGTransferBlockSelect<double>;
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/multigrid/multigrid.inst.in b/source/multigrid/multigrid.inst.in
new file mode 100644
index 0000000..49c2db0
--- /dev/null
+++ b/source/multigrid/multigrid.inst.in
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2012 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS)
+{
+  template class Multigrid< VEC >;
+}
diff --git a/source/numerics/CMakeLists.txt b/source/numerics/CMakeLists.txt
new file mode 100644
index 0000000..c8adfa3
--- /dev/null
+++ b/source/numerics/CMakeLists.txt
@@ -0,0 +1,87 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2012 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  data_out.cc
+  data_out_dof_data.cc
+  data_out_faces.cc
+  data_out_rotation.cc
+  data_out_stack.cc
+  data_postprocessor.cc
+  derivative_approximation.cc
+  dof_output_operator.cc
+  error_estimator_1d.cc
+  error_estimator.cc
+  error_estimator_inst2.cc
+  fe_field_function.cc
+  histogram.cc
+  matrix_creator.cc
+  matrix_creator_inst2.cc
+  matrix_creator_inst3.cc
+  matrix_tools_once.cc
+  matrix_tools.cc
+  point_value_history.cc
+  solution_transfer.cc
+  solution_transfer_inst2.cc
+  solution_transfer_inst3.cc
+  solution_transfer_inst4.cc
+  time_dependent.cc
+  vector_tools_boundary.cc
+  vector_tools_constraints.cc
+  vector_tools_integrate_difference.cc
+  vector_tools_interpolate.cc
+  vector_tools_mean_value.cc
+  vector_tools_point_value.cc
+  vector_tools_point_gradient.cc
+  vector_tools_project.cc
+  vector_tools_rhs.cc
+  )
+
+SET(_inst
+  data_out_dof_data.inst.in
+  data_out_faces.inst.in
+  data_out.inst.in
+  data_out_rotation.inst.in
+  data_out_stack.inst.in
+  data_postprocessor.inst.in
+  derivative_approximation.inst.in
+  dof_output_operator.inst.in
+  error_estimator_1d.inst.in
+  error_estimator.inst.in
+  fe_field_function.inst.in
+  matrix_creator.inst.in
+  matrix_tools.inst.in
+  point_value_history.inst.in
+  solution_transfer.inst.in
+  time_dependent.inst.in
+  vector_tools_boundary.inst.in
+  vector_tools_constraints.inst.in
+  vector_tools_integrate_difference.inst.in
+  vector_tools_interpolate.inst.in
+  vector_tools_mean_value.inst.in
+  vector_tools_point_value.inst.in
+  vector_tools_point_gradient.inst.in
+  vector_tools_project.inst.in
+  vector_tools_rhs.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/numerics/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_numerics OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_numerics "${_inst}")
diff --git a/source/numerics/data_out.cc b/source/numerics/data_out.cc
new file mode 100644
index 0000000..5920e81
--- /dev/null
+++ b/source/numerics/data_out.cc
@@ -0,0 +1,539 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/work_stream.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DataOut
+  {
+    template <int dim, int spacedim>
+    ParallelData<dim,spacedim>::
+    ParallelData (const unsigned int n_datasets,
+                  const unsigned int n_subdivisions,
+                  const std::vector<unsigned int> &n_postprocessor_outputs,
+                  const Mapping<dim,spacedim> &mapping,
+                  const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                  const UpdateFlags update_flags,
+                  const std::vector<std::vector<unsigned int> > &cell_to_patch_index_map)
+      :
+      ParallelDataBase<dim,spacedim> (n_datasets,
+                                      n_subdivisions,
+                                      n_postprocessor_outputs,
+                                      mapping,
+                                      finite_elements,
+                                      update_flags,
+                                      false),
+      cell_to_patch_index_map (&cell_to_patch_index_map)
+    {}
+  }
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+void
+DataOut<dim,DoFHandlerType>::
+build_one_patch
+(const std::pair<cell_iterator, unsigned int>                                                *cell_and_index,
+ internal::DataOut::ParallelData<DoFHandlerType::dimension, DoFHandlerType::space_dimension> &scratch_data,
+ const unsigned int                                                                           n_subdivisions,
+ const CurvedCellRegion                                                                       curved_cell_region,
+ std::vector<DataOutBase::Patch<DoFHandlerType::dimension, DoFHandlerType::space_dimension> > &patches)
+{
+  // first create the output object that we will write into
+  ::dealii::DataOutBase::Patch<DoFHandlerType::dimension, DoFHandlerType::space_dimension> patch;
+  patch.n_subdivisions = n_subdivisions;
+
+  // use ucd_to_deal map as patch vertices are in the old, unnatural
+  // ordering. if the mapping does not preserve locations
+  // (e.g. MappingQEulerian), we need to compute the offset of the vertex for
+  // the graphical output. Otherwise, we can just use the vertex info.
+  for (unsigned int vertex=0; vertex<GeometryInfo<DoFHandlerType::dimension>::vertices_per_cell; ++vertex)
+    if (scratch_data.mapping_collection[0].preserves_vertex_locations())
+      patch.vertices[vertex] = cell_and_index->first->vertex(vertex);
+    else
+      patch.vertices[vertex] = scratch_data.mapping_collection[0].transform_unit_to_real_cell
+                               (cell_and_index->first,
+                                GeometryInfo<DoFHandlerType::dimension>::unit_cell_vertex (vertex));
+
+  if (scratch_data.n_datasets > 0)
+    {
+      // create DoFHandlerType::active_cell_iterator and initialize FEValues
+      scratch_data.reinit_all_fe_values(this->dof_data, cell_and_index->first);
+
+      const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values
+        = scratch_data.get_present_fe_values (0);
+
+      const unsigned int n_q_points = fe_patch_values.n_quadrature_points;
+
+      // depending on the requested output of curved cells, if necessary
+      // append the quadrature points to the last rows of the patch.data
+      // member. This is the case if we want to produce curved cells at the
+      // boundary and this cell actually is at the boundary, or else if we
+      // want to produce curved cells everywhere
+      //
+      // note: a cell is *always* at the boundary if dim<spacedim
+      if (curved_cell_region==curved_inner_cells
+          ||
+          (curved_cell_region==curved_boundary
+           &&
+           (cell_and_index->first->at_boundary()
+            ||
+            (DoFHandlerType::dimension != DoFHandlerType::space_dimension))))
+        {
+          Assert(patch.space_dim==DoFHandlerType::space_dimension, ExcInternalError());
+          const std::vector<Point<DoFHandlerType::space_dimension> > &q_points=fe_patch_values.get_quadrature_points();
+          // resize the patch.data member in order to have enough memory for
+          // the quadrature points as well
+          patch.data.reinit (scratch_data.n_datasets+DoFHandlerType::space_dimension, n_q_points);
+          // set the flag indicating that for this cell the points are
+          // explicitly given
+          patch.points_are_available=true;
+          // copy points to patch.data
+          for (unsigned int i=0; i<DoFHandlerType::space_dimension; ++i)
+            for (unsigned int q=0; q<n_q_points; ++q)
+              patch.data(patch.data.size(0)-DoFHandlerType::space_dimension+i,q)=q_points[q][i];
+        }
+      else
+        {
+          patch.data.reinit(scratch_data.n_datasets, n_q_points);
+          patch.points_are_available = false;
+        }
+
+
+      // counter for data records
+      unsigned int offset=0;
+
+      // first fill dof_data
+      for (unsigned int dataset=0; dataset<this->dof_data.size(); ++dataset)
+        {
+          const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &this_fe_patch_values
+            = scratch_data.get_present_fe_values (dataset);
+          const unsigned int n_components =
+            this_fe_patch_values.get_fe().n_components();
+
+          const DataPostprocessor<DoFHandlerType::space_dimension> *postprocessor=this->dof_data[dataset]->postprocessor;
+
+          if (postprocessor != 0)
+            {
+              // we have to postprocess the data, so determine, which fields
+              // have to be updated
+              const UpdateFlags update_flags=postprocessor->get_needed_update_flags();
+              if (n_components == 1)
+                {
+                  // at each point there is only one component of value,
+                  // gradient etc.
+                  if (update_flags & update_values)
+                    this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                                  scratch_data.patch_values);
+                  if (update_flags & update_gradients)
+                    this->dof_data[dataset]->get_function_gradients (this_fe_patch_values,
+                                                                     scratch_data.patch_gradients);
+                  if (update_flags & update_hessians)
+                    this->dof_data[dataset]->get_function_hessians (this_fe_patch_values,
+                                                                    scratch_data.patch_hessians);
+
+                  if (update_flags & update_quadrature_points)
+                    scratch_data.patch_evaluation_points = this_fe_patch_values.get_quadrature_points();
+
+
+                  std::vector<Point<DoFHandlerType::space_dimension> > dummy_normals;
+                  postprocessor->
+                  compute_derived_quantities_scalar(scratch_data.patch_values,
+                                                    scratch_data.patch_gradients,
+                                                    scratch_data.patch_hessians,
+                                                    dummy_normals,
+                                                    scratch_data.patch_evaluation_points,
+                                                    scratch_data.postprocessed_values[dataset]);
+                }
+              else
+                {
+                  scratch_data.resize_system_vectors (n_components);
+
+                  // at each point there is a vector valued function and its
+                  // derivative...
+                  if (update_flags & update_values)
+                    this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                                  scratch_data.patch_values_system);
+                  if (update_flags & update_gradients)
+                    this->dof_data[dataset]->get_function_gradients (this_fe_patch_values,
+                                                                     scratch_data.patch_gradients_system);
+                  if (update_flags & update_hessians)
+                    this->dof_data[dataset]->get_function_hessians (this_fe_patch_values,
+                                                                    scratch_data.patch_hessians_system);
+
+                  if (update_flags & update_quadrature_points)
+                    scratch_data.patch_evaluation_points = this_fe_patch_values.get_quadrature_points();
+
+                  std::vector<Point<DoFHandlerType::space_dimension> > dummy_normals;
+
+                  postprocessor->
+                  compute_derived_quantities_vector(scratch_data.patch_values_system,
+                                                    scratch_data.patch_gradients_system,
+                                                    scratch_data.patch_hessians_system,
+                                                    dummy_normals,
+                                                    scratch_data.patch_evaluation_points,
+                                                    scratch_data.postprocessed_values[dataset]);
+                }
+
+              for (unsigned int q=0; q<n_q_points; ++q)
+                for (unsigned int component=0;
+                     component<this->dof_data[dataset]->n_output_variables;
+                     ++component)
+                  patch.data(offset+component,q)
+                    = scratch_data.postprocessed_values[dataset][q](component);
+            }
+          else
+            // now we use the given data vector without modifications. again,
+            // we treat single component functions separately for efficiency
+            // reasons.
+            if (n_components == 1)
+              {
+                this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                              scratch_data.patch_values);
+                for (unsigned int q=0; q<n_q_points; ++q)
+                  patch.data(offset,q) = scratch_data.patch_values[q];
+              }
+            else
+              {
+                scratch_data.resize_system_vectors(n_components);
+                this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                              scratch_data.patch_values_system);
+                for (unsigned int component=0; component<n_components;
+                     ++component)
+                  for (unsigned int q=0; q<n_q_points; ++q)
+                    patch.data(offset+component,q) =
+                      scratch_data.patch_values_system[q](component);
+              }
+          // increment the counter for the actual data record
+          offset+=this->dof_data[dataset]->n_output_variables;
+        }
+
+      // then do the cell data. only compute the number of a cell if needed;
+      // also make sure that we only access cell data if the
+      // first_cell/next_cell functions only return active cells
+      if (this->cell_data.size() != 0)
+        {
+          Assert (!cell_and_index->first->has_children(), ExcNotImplemented());
+
+          for (unsigned int dataset=0; dataset<this->cell_data.size(); ++dataset)
+            {
+              const double value
+                = this->cell_data[dataset]->get_cell_data_value (cell_and_index->second);
+              for (unsigned int q=0; q<n_q_points; ++q)
+                patch.data(offset+dataset,q) = value;
+            }
+        }
+    }
+
+
+  for (unsigned int f=0; f<GeometryInfo<DoFHandlerType::dimension>::faces_per_cell; ++f)
+    {
+      // let's look up whether the neighbor behind that face is noted in the
+      // table of cells which we treat. this can only happen if the neighbor
+      // exists, and is on the same level as this cell, but it may also happen
+      // that the neighbor is not a member of the range of cells over which we
+      // loop, in which case the respective entry in the
+      // cell_to_patch_index_map will have the value no_neighbor. (note that
+      // since we allocated only as much space in this array as the maximum
+      // index of the cells we loop over, not every neighbor may have its
+      // space in it, so we have to assume that it is extended by values
+      // no_neighbor)
+      if (cell_and_index->first->at_boundary(f)
+          ||
+          (cell_and_index->first->neighbor(f)->level() != cell_and_index->first->level()))
+        {
+          patch.neighbors[f] = numbers::invalid_unsigned_int;
+          continue;
+        }
+
+      const cell_iterator neighbor = cell_and_index->first->neighbor(f);
+      Assert (static_cast<unsigned int>(neighbor->level()) <
+              scratch_data.cell_to_patch_index_map->size(),
+              ExcInternalError());
+      if ((static_cast<unsigned int>(neighbor->index()) >=
+           (*scratch_data.cell_to_patch_index_map)[neighbor->level()].size())
+          ||
+          ((*scratch_data.cell_to_patch_index_map)[neighbor->level()][neighbor->index()]
+           ==
+           dealii::DataOutBase::Patch<DoFHandlerType::dimension>::no_neighbor))
+        {
+          patch.neighbors[f] = numbers::invalid_unsigned_int;
+          continue;
+        }
+
+      // now, there is a neighbor, so get its patch number and set it for the
+      // neighbor index
+      patch.neighbors[f]
+        = (*scratch_data.cell_to_patch_index_map)[neighbor->level()][neighbor->index()];
+    }
+
+  const unsigned int patch_idx =
+    (*scratch_data.cell_to_patch_index_map)[cell_and_index->first->level()][cell_and_index->first->index()];
+  // did we mess up the indices?
+  Assert(patch_idx < patches.size(), ExcInternalError());
+  patch.patch_index = patch_idx;
+
+  // Put the patch into the patches vector. instead of copying the data,
+  // simply swap the contents to avoid the penalty of writing into another
+  // processor's memory
+  patches[patch_idx].swap (patch);
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+void DataOut<dim,DoFHandlerType>::build_patches (const unsigned int n_subdivisions)
+{
+  build_patches (StaticMappingQ1<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::mapping,
+                 n_subdivisions, no_curved_cells);
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+void DataOut<dim,DoFHandlerType>::build_patches
+(const Mapping<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &mapping,
+ const unsigned int                                                        n_subdivisions_,
+ const CurvedCellRegion                                                    curved_region)
+{
+  // Check consistency of redundant template parameter
+  Assert (dim==DoFHandlerType::dimension, ExcDimensionMismatch(dim, DoFHandlerType::dimension));
+
+  Assert (this->triangulation != 0,
+          Exceptions::DataOut::ExcNoTriangulationSelected());
+
+  const unsigned int n_subdivisions = (n_subdivisions_ != 0)
+                                      ? n_subdivisions_
+                                      : this->default_subdivisions;
+  Assert (n_subdivisions >= 1,
+          Exceptions::DataOut::ExcInvalidNumberOfSubdivisions(n_subdivisions));
+
+  // First count the cells we want to create patches of. Also fill the object
+  // that maps the cell indices to the patch numbers, as this will be needed
+  // for generation of neighborship information.
+  // Note, there is a confusing mess of different indices here at play:
+  // patch_index - the index of a patch in all_cells
+  // cell->index - only unique on each level, used in cell_to_patch_index_map
+  // active_index - index for a cell when counting from begin_active() using ++cell
+  // cell_index - unique index of a cell counted using next_locally_owned_cell()
+  //              starting from first_locally_owned_cell()
+  //
+  // It turns out that we create one patch for each selected cell, so patch_index==cell_index.
+  //
+  // will be cell_to_patch_index_map[cell->level][cell->index] = patch_index
+  std::vector<std::vector<unsigned int> > cell_to_patch_index_map;
+  cell_to_patch_index_map.resize (this->triangulation->n_levels());
+  for (unsigned int l=0; l<this->triangulation->n_levels(); ++l)
+    {
+      // max_index is the largest cell->index on level l
+      unsigned int max_index = 0;
+      for (cell_iterator cell=first_locally_owned_cell(); cell != this->triangulation->end();
+           cell = next_locally_owned_cell(cell))
+        if (static_cast<unsigned int>(cell->level()) == l)
+          max_index = std::max (max_index,
+                                static_cast<unsigned int>(cell->index()));
+
+      cell_to_patch_index_map[l].resize (max_index+1,
+                                         dealii::DataOutBase::Patch<DoFHandlerType::dimension,
+                                         DoFHandlerType::space_dimension>::no_neighbor);
+    }
+
+  // will be all_cells[patch_index] = pair(cell, active_index)
+  std::vector<std::pair<cell_iterator, unsigned int> > all_cells;
+  {
+    // important: we need to compute the active_index of the cell in the range
+    // 0..n_active_cells() because this is where we need to look up cell
+    // data from (cell data vectors do not have the length distance computed by
+    // first_locally_owned_cell/next_locally_owned_cell because this might skip
+    // some values (FilteredIterator).
+    active_cell_iterator active_cell = this->triangulation->begin_active();
+    unsigned int active_index = 0;
+    cell_iterator cell = first_locally_owned_cell();
+    for (; cell != this->triangulation->end();
+         cell = next_locally_owned_cell(cell))
+      {
+        // move forward until active_cell points at the cell (cell) we are looking
+        // at to compute the current active_index
+        while (active_cell!=this->triangulation->end()
+               && cell->active()
+               && active_cell_iterator(cell) != active_cell)
+          {
+            ++active_cell;
+            ++active_index;
+          }
+
+        Assert (static_cast<unsigned int>(cell->level()) <
+                cell_to_patch_index_map.size(),
+                ExcInternalError());
+        Assert (static_cast<unsigned int>(cell->index()) <
+                cell_to_patch_index_map[cell->level()].size(),
+                ExcInternalError());
+        Assert (active_index < this->triangulation->n_active_cells(),
+                ExcInternalError());
+        cell_to_patch_index_map[cell->level()][cell->index()] = all_cells.size();
+
+        all_cells.push_back (std::make_pair(cell, active_index));
+      }
+  }
+
+  this->patches.clear ();
+  this->patches.resize(all_cells.size());
+
+  // now create a default object for the WorkStream object to work with
+  unsigned int n_datasets=this->cell_data.size();
+  for (unsigned int i=0; i<this->dof_data.size(); ++i)
+    n_datasets += this->dof_data[i]->n_output_variables;
+
+  std::vector<unsigned int> n_postprocessor_outputs (this->dof_data.size());
+  for (unsigned int dataset=0; dataset<this->dof_data.size(); ++dataset)
+    if (this->dof_data[dataset]->postprocessor)
+      n_postprocessor_outputs[dataset] = this->dof_data[dataset]->n_output_variables;
+    else
+      n_postprocessor_outputs[dataset] = 0;
+
+  const CurvedCellRegion curved_cell_region
+    = (n_subdivisions<2 ? no_curved_cells : curved_region);
+
+  UpdateFlags update_flags = update_values;
+  if (curved_cell_region != no_curved_cells)
+    update_flags |= update_quadrature_points;
+
+  for (unsigned int i=0; i<this->dof_data.size(); ++i)
+    if (this->dof_data[i]->postprocessor)
+      update_flags |= this->dof_data[i]->postprocessor->get_needed_update_flags();
+  // perhaps update_normal_vectors is present, which would only be useful on
+  // faces, but we may not use it here.
+  Assert (!(update_flags & update_normal_vectors),
+          ExcMessage("The update of normal vectors may not be requested for evaluation of "
+                     "data on cells via DataPostprocessor."));
+
+  internal::DataOut::ParallelData<DoFHandlerType::dimension, DoFHandlerType::space_dimension>
+  thread_data (n_datasets, n_subdivisions,
+               n_postprocessor_outputs,
+               mapping,
+               this->get_finite_elements(),
+               update_flags,
+               cell_to_patch_index_map);
+
+  // now build the patches in parallel
+  if (all_cells.size() > 0)
+    WorkStream::run (&all_cells[0],
+                     &all_cells[0]+all_cells.size(),
+                     std_cxx11::bind(&DataOut<dim,DoFHandlerType>::build_one_patch,
+                                     this,
+                                     std_cxx11::_1,
+                                     std_cxx11::_2,
+                                     /* no std_cxx11::_3, since this function doesn't actually need a
+                                        copy data object -- it just writes everything right into the
+                                        output array */
+                                     n_subdivisions,
+                                     curved_cell_region,
+                                     std_cxx11::ref(this->patches)),
+                     // no copy-local-to-global function needed here
+                     std_cxx11::function<void (const int &)>(),
+                     thread_data,
+                     /* dummy CopyData object = */ 0,
+                     // experimenting shows that we can make things run a bit
+                     // faster if we increase the number of cells we work on
+                     // per item (i.e., WorkStream's chunk_size argument,
+                     // about 10% improvement) and the items in flight at any
+                     // given time (another 5% on the testcase discussed in
+                     // @ref workstream_paper, on 32 cores) and if
+                     8*MultithreadInfo::n_threads(),
+                     64);
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOut<dim,DoFHandlerType>::cell_iterator
+DataOut<dim,DoFHandlerType>::first_cell ()
+{
+  return this->triangulation->begin_active ();
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOut<dim,DoFHandlerType>::cell_iterator
+DataOut<dim,DoFHandlerType>::next_cell
+(const typename DataOut<dim,DoFHandlerType>::cell_iterator &cell)
+{
+  // convert the iterator to an active_iterator and advance this to the next
+  // active cell
+  typename Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension>::
+  active_cell_iterator active_cell = cell;
+  ++active_cell;
+  return active_cell;
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOut<dim,DoFHandlerType>::cell_iterator
+DataOut<dim,DoFHandlerType>::first_locally_owned_cell ()
+{
+  typename DataOut<dim,DoFHandlerType>::cell_iterator
+  cell = first_cell();
+
+  // skip cells if the current one has no children (is active) and is a ghost
+  // or artificial cell
+  while ((cell != this->triangulation->end()) &&
+         (cell->has_children() == false) &&
+         !cell->is_locally_owned())
+    cell = next_cell(cell);
+
+  return cell;
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOut<dim,DoFHandlerType>::cell_iterator
+DataOut<dim,DoFHandlerType>::next_locally_owned_cell
+(const typename DataOut<dim,DoFHandlerType>::cell_iterator &old_cell)
+{
+  typename DataOut<dim,DoFHandlerType>::cell_iterator
+  cell = next_cell(old_cell);
+  while ((cell != this->triangulation->end()) &&
+         (cell->has_children() == false) &&
+         !cell->is_locally_owned())
+    cell = next_cell(cell);
+  return cell;
+}
+
+
+// explicit instantiations
+#include "data_out.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/data_out.inst.in b/source/numerics/data_out.inst.in
new file mode 100644
index 0000000..2271d25
--- /dev/null
+++ b/source/numerics/data_out.inst.in
@@ -0,0 +1,40 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension : DIMENSIONS)
+{
+  namespace internal \{
+    namespace DataOut \{
+#if deal_II_dimension <= deal_II_space_dimension
+      template struct ParallelData<deal_II_dimension,deal_II_space_dimension>;
+#endif
+    \}
+  \}
+}
+
+
+for (DH : DOFHANDLER_TEMPLATES; deal_II_dimension : DIMENSIONS)
+{
+  template class DataOut<deal_II_dimension, DH<deal_II_dimension> >;
+  #if deal_II_dimension < 3
+  template class DataOut<deal_II_dimension, DH<deal_II_dimension,deal_II_dimension+1> >;
+  #endif
+
+  #if deal_II_dimension == 3
+     template class DataOut<1, DH<1,3> >;
+  #endif
+}
\ No newline at end of file
diff --git a/source/numerics/data_out_dof_data.cc b/source/numerics/data_out_dof_data.cc
new file mode 100644
index 0000000..92a859d
--- /dev/null
+++ b/source/numerics/data_out_dof_data.cc
@@ -0,0 +1,1369 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/data_out_dof_data.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/dof_handler.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DataOut
+  {
+    template <int dim, int spacedim>
+    ParallelDataBase<dim,spacedim>::
+    ParallelDataBase (const unsigned int n_datasets,
+                      const unsigned int n_subdivisions,
+                      const std::vector<unsigned int> &n_postprocessor_outputs,
+                      const Mapping<dim,spacedim> &mapping,
+                      const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                      const UpdateFlags update_flags,
+                      const bool        use_face_values)
+      :
+      n_datasets (n_datasets),
+      n_subdivisions (n_subdivisions),
+      postprocessed_values (n_postprocessor_outputs.size()),
+      mapping_collection (mapping),
+      finite_elements (finite_elements),
+      update_flags (update_flags)
+    {
+      unsigned int n_q_points = 0;
+      if (use_face_values == false)
+        {
+          dealii::hp::QCollection<dim>
+          quadrature(QIterated<dim>(QTrapez<1>(), n_subdivisions));
+          n_q_points = quadrature[0].size();
+          x_fe_values.resize(this->finite_elements.size());
+          for (unsigned int i=0; i<this->finite_elements.size(); ++i)
+            {
+              // check if there is a finite element that is equal to the
+              // present one, then we can re-use the FEValues object
+              for (unsigned int j=0; j<i; ++j)
+                if (this->finite_elements[i].get() ==
+                    this->finite_elements[j].get())
+                  {
+                    x_fe_values[i] = x_fe_values[j];
+                    break;
+                  }
+              if (x_fe_values[i].get() == 0)
+                x_fe_values[i].reset(new dealii::hp::FEValues<dim,spacedim>
+                                     (this->mapping_collection,
+                                      *this->finite_elements[i],
+                                      quadrature,
+                                      this->update_flags));
+            }
+        }
+      else
+        {
+          dealii::hp::QCollection<dim-1>
+          quadrature(QIterated<dim-1>(QTrapez<1>(), n_subdivisions));
+          n_q_points = quadrature[0].size();
+          x_fe_face_values.resize(this->finite_elements.size());
+          for (unsigned int i=0; i<this->finite_elements.size(); ++i)
+            {
+              // check if there is a finite element that is equal to the
+              // present one, then we can re-use the FEValues object
+              for (unsigned int j=0; j<i; ++j)
+                if (this->finite_elements[i].get() ==
+                    this->finite_elements[j].get())
+                  {
+                    x_fe_face_values[i] = x_fe_face_values[j];
+                    break;
+                  }
+              if (x_fe_face_values[i].get() == 0)
+                x_fe_face_values[i].reset(new dealii::hp::FEFaceValues<dim,spacedim>
+                                          (this->mapping_collection,
+                                           *this->finite_elements[i],
+                                           quadrature,
+                                           this->update_flags));
+            }
+        }
+
+      patch_values.resize (n_q_points);
+      patch_values_system.resize (n_q_points);
+      patch_gradients.resize (n_q_points);
+      patch_gradients_system.resize (n_q_points);
+      patch_hessians.resize (n_q_points);
+      patch_hessians_system.resize (n_q_points);
+
+      for (unsigned int dataset=0; dataset<n_postprocessor_outputs.size(); ++dataset)
+        if (n_postprocessor_outputs[dataset] != 0)
+          postprocessed_values[dataset]
+          .resize(n_q_points,
+                  dealii::Vector<double>(n_postprocessor_outputs[dataset]));
+    }
+
+
+
+
+
+    // implement copy constructor to create a thread's own version of
+    // x_fe_values
+    template <int dim, int spacedim>
+    ParallelDataBase<dim,spacedim>::
+    ParallelDataBase (const ParallelDataBase<dim,spacedim> &data)
+      :
+      n_datasets (data.n_datasets),
+      n_subdivisions (data.n_subdivisions),
+      patch_values (data.patch_values),
+      patch_values_system (data.patch_values_system),
+      patch_gradients (data.patch_gradients),
+      patch_gradients_system (data.patch_gradients_system),
+      patch_hessians (data.patch_hessians),
+      patch_hessians_system (data.patch_hessians_system),
+      postprocessed_values (data.postprocessed_values),
+      mapping_collection (data.mapping_collection),
+      finite_elements (data.finite_elements),
+      update_flags (data.update_flags)
+    {
+      if (data.x_fe_values.empty() == false)
+        {
+          Assert(data.x_fe_face_values.empty() == true, ExcInternalError());
+          dealii::hp::QCollection<dim>
+          quadrature(QIterated<dim>(QTrapez<1>(), n_subdivisions));
+          x_fe_values.resize(this->finite_elements.size());
+          for (unsigned int i=0; i<this->finite_elements.size(); ++i)
+            {
+              // check if there is a finite element that is equal to the
+              // present one, then we can re-use the FEValues object
+              for (unsigned int j=0; j<i; ++j)
+                if (this->finite_elements[i].get() ==
+                    this->finite_elements[j].get())
+                  {
+                    x_fe_values[i] = x_fe_values[j];
+                    break;
+                  }
+              if (x_fe_values[i].get() == 0)
+                x_fe_values[i].reset(new dealii::hp::FEValues<dim,spacedim>
+                                     (this->mapping_collection,
+                                      *this->finite_elements[i],
+                                      quadrature,
+                                      this->update_flags));
+            }
+        }
+      else
+        {
+          dealii::hp::QCollection<dim-1>
+          quadrature(QIterated<dim-1>(QTrapez<1>(), n_subdivisions));
+          x_fe_face_values.resize(this->finite_elements.size());
+          for (unsigned int i=0; i<this->finite_elements.size(); ++i)
+            {
+              // check if there is a finite element that is equal to the
+              // present one, then we can re-use the FEValues object
+              for (unsigned int j=0; j<i; ++j)
+                if (this->finite_elements[i].get() ==
+                    this->finite_elements[j].get())
+                  {
+                    x_fe_face_values[i] = x_fe_face_values[j];
+                    break;
+                  }
+              if (x_fe_face_values[i].get() == 0)
+                x_fe_face_values[i].reset(new dealii::hp::FEFaceValues<dim,spacedim>
+                                          (this->mapping_collection,
+                                           *this->finite_elements[i],
+                                           quadrature,
+                                           this->update_flags));
+            }
+        }
+    }
+
+
+
+    template <int dim, int spacedim>
+    template <typename DoFHandlerType>
+    void
+    ParallelDataBase<dim,spacedim>::
+    reinit_all_fe_values(std::vector<std_cxx11::shared_ptr<DataEntryBase<DoFHandlerType> > > &dof_data,
+                         const typename dealii::Triangulation<dim,spacedim>::cell_iterator   &cell,
+                         const unsigned int                                                   face)
+    {
+      for (unsigned int dataset=0; dataset<dof_data.size(); ++dataset)
+        {
+          bool duplicate = false;
+          for (unsigned int j=0; j<dataset; ++j)
+            if (finite_elements[dataset].get() == finite_elements[j].get())
+              duplicate = true;
+          if (duplicate == false)
+            {
+              typename DoFHandlerType::active_cell_iterator dh_cell(&cell->get_triangulation(),
+                                                                    cell->level(),
+                                                                    cell->index(),
+                                                                    dof_data[dataset]->dof_handler);
+              if (x_fe_values.empty())
+                {
+                  AssertIndexRange(face,
+                                   GeometryInfo<dim>::faces_per_cell);
+                  x_fe_face_values[dataset]->reinit(dh_cell, face);
+                }
+              else
+                x_fe_values[dataset]->reinit (dh_cell);
+            }
+        }
+      if (dof_data.empty())
+        {
+          if (x_fe_values.empty())
+            {
+              AssertIndexRange(face,
+                               GeometryInfo<dim>::faces_per_cell);
+              x_fe_face_values[0]->reinit(cell, face);
+            }
+          else
+            x_fe_values[0]->reinit (cell);
+        }
+    }
+
+
+
+    template <int dim, int spacedim>
+    const FEValuesBase<dim,spacedim> &
+    ParallelDataBase<dim,spacedim>::
+    get_present_fe_values(const unsigned int dataset) const
+    {
+      AssertIndexRange(dataset, finite_elements.size());
+      if (x_fe_values.empty())
+        return x_fe_face_values[dataset]->get_present_fe_values();
+      else
+        return x_fe_values[dataset]->get_present_fe_values();
+    }
+
+
+
+    template <int dim, int spacedim>
+    void
+    ParallelDataBase<dim,spacedim>::
+    resize_system_vectors(const unsigned int n_components)
+    {
+      Assert(patch_values_system.size() > 0, ExcInternalError());
+      AssertDimension(patch_values_system.size(),
+                      patch_gradients_system.size());
+      AssertDimension(patch_values_system.size(),
+                      patch_hessians_system.size());
+      if (patch_values_system[0].size() == n_components)
+        return;
+      for (unsigned int k=0; k<patch_values_system.size(); ++k)
+        {
+          patch_values_system[k].reinit(n_components);
+          patch_gradients_system[k].resize(n_components);
+          patch_hessians_system[k].resize(n_components);
+        }
+    }
+
+
+
+
+    /**
+     * In a WorkStream context, use this function to append the patch computed
+     * by the parallel stage to the array of patches.
+     */
+    template <int dim, int spacedim>
+    void
+    append_patch_to_list (const DataOutBase::Patch<dim,spacedim> &patch,
+                          std::vector<DataOutBase::Patch<dim,spacedim> > &patches)
+    {
+      patches.push_back (patch);
+      patches.back().patch_index = patches.size()-1;
+    }
+  }
+}
+
+namespace internal
+{
+  namespace DataOut
+  {
+    template <typename DoFHandlerType>
+    DataEntryBase<DoFHandlerType>::DataEntryBase
+    (const DoFHandlerType           *dofs,
+     const std::vector<std::string> &names_in,
+     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation)
+      :
+      dof_handler (dofs, typeid(dealii::DataOut_DoFData<DoFHandlerType,DoFHandlerType::dimension,DoFHandlerType::space_dimension>).name()),
+      names(names_in),
+      data_component_interpretation (data_component_interpretation),
+      postprocessor(0, typeid(*this).name()),
+      n_output_variables(names.size())
+    {
+      Assert (names.size() == data_component_interpretation.size(),
+              ExcDimensionMismatch(data_component_interpretation.size(),
+                                   names.size()));
+
+      // check that the names use only allowed characters
+      for (unsigned int i=0; i<names.size(); ++i)
+        Assert (names[i].find_first_not_of("abcdefghijklmnopqrstuvwxyz"
+                                           "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                           "0123456789_<>()") == std::string::npos,
+                Exceptions::DataOut::ExcInvalidCharacter (names[i],
+                                                          names[i].find_first_not_of("abcdefghijklmnopqrstuvwxyz"
+                                                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                                              "0123456789_<>()")));
+    }
+
+
+
+    template <typename DoFHandlerType>
+    DataEntryBase<DoFHandlerType>::DataEntryBase
+    (const DoFHandlerType *dofs,
+     const DataPostprocessor<DoFHandlerType::space_dimension> *data_postprocessor)
+      :
+      dof_handler (dofs, typeid(dealii::DataOut_DoFData<DoFHandlerType,DoFHandlerType::dimension,DoFHandlerType::space_dimension>).name()),
+      names(data_postprocessor->get_names()),
+      data_component_interpretation (data_postprocessor->get_data_component_interpretation()),
+      postprocessor(data_postprocessor, typeid(*this).name()),
+      n_output_variables(names.size())
+    {
+      Assert (data_postprocessor->get_names().size()
+              ==
+              data_postprocessor->get_data_component_interpretation().size(),
+              ExcDimensionMismatch (data_postprocessor->get_names().size(),
+                                    data_postprocessor->get_data_component_interpretation().size()));
+
+      // check that the names use only allowed characters
+      for (unsigned int i=0; i<names.size(); ++i)
+        Assert (names[i].find_first_not_of("abcdefghijklmnopqrstuvwxyz"
+                                           "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                           "0123456789_<>()") == std::string::npos,
+                Exceptions::DataOut::ExcInvalidCharacter (names[i],
+                                                          names[i].find_first_not_of("abcdefghijklmnopqrstuvwxyz"
+                                                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                                              "0123456789_<>()")));
+    }
+
+
+
+    template <typename DoFHandlerType>
+    DataEntryBase<DoFHandlerType>::~DataEntryBase ()
+    {}
+
+
+
+    /**
+     * Class that stores a pointer to a vector of type equal to the template
+     * argument, and provides the functions to extract data from it.
+     *
+     * @author Wolfgang Bangerth, 2004
+     */
+    template <typename DoFHandlerType, typename VectorType>
+    class DataEntry : public DataEntryBase<DoFHandlerType>
+    {
+    public:
+      /**
+       * Constructor. Give a list of names for the individual components of
+       * the vector and their interpretation as scalar or vector data. This
+       * constructor assumes that no postprocessor is going to be used.
+       */
+      DataEntry
+      (const DoFHandlerType           *dofs,
+       const VectorType               *data,
+       const std::vector<std::string> &names,
+       const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation);
+
+      /**
+       * Constructor when a data postprocessor is going to be used. In that
+       * case, the names and vector declarations are going to be acquired from
+       * the postprocessor.
+       */
+      DataEntry (const DoFHandlerType                                     *dofs,
+                 const VectorType                                         *data,
+                 const DataPostprocessor<DoFHandlerType::space_dimension> *data_postprocessor);
+
+      /**
+       * Assuming that the stored vector is a cell vector, extract the given
+       * element from it.
+       */
+      virtual
+      double
+      get_cell_data_value (const unsigned int cell_number) const;
+
+      /**
+       * Given a FEValuesBase object, extract the values on the present cell
+       * from the vector we actually store.
+       */
+      virtual
+      void
+      get_function_values
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<double> &patch_values) const;
+
+      /**
+       * Given a FEValuesBase object, extract the values on the present cell
+       * from the vector we actually store. This function does the same as the
+       * one above but for vector-valued finite elements.
+       */
+      virtual
+      void
+      get_function_values
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<dealii::Vector<double> > &patch_values_system) const;
+
+      /**
+       * Given a FEValuesBase object, extract the gradients on the present
+       * cell from the vector we actually store.
+       */
+      virtual
+      void
+      get_function_gradients
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<Tensor<1,DoFHandlerType::space_dimension> > &patch_gradients) const;
+
+      /**
+       * Given a FEValuesBase object, extract the gradients on the present
+       * cell from the vector we actually store. This function does the same
+       * as the one above but for vector-valued finite elements.
+       */
+      virtual
+      void
+      get_function_gradients
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<std::vector<Tensor<1,DoFHandlerType::space_dimension> > > &patch_gradients_system) const;
+
+      /**
+       * Given a FEValuesBase object, extract the second derivatives on the
+       * present cell from the vector we actually store.
+       */
+      virtual
+      void
+      get_function_hessians
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<Tensor<2,DoFHandlerType::space_dimension> > &patch_hessians) const;
+
+      /**
+       * Given a FEValuesBase object, extract the second derivatives on the
+       * present cell from the vector we actually store. This function does
+       * the same as the one above but for vector-valued finite elements.
+       */
+      virtual
+      void
+      get_function_hessians
+      (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+       std::vector<std::vector< Tensor<2,DoFHandlerType::space_dimension> > > &patch_hessians_system) const;
+
+      /**
+       * Clear all references to the vectors.
+       */
+      virtual void clear ();
+
+      /**
+       * Determine an estimate for the memory consumption (in bytes) of this
+       * object.
+       */
+      virtual std::size_t memory_consumption () const;
+
+    private:
+      /**
+       * Pointer to the data vector. Note that ownership of the vector pointed
+       * to remains with the caller of this class.
+       */
+      const VectorType *vector;
+    };
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    DataEntry<DoFHandlerType,VectorType>::
+    DataEntry (const DoFHandlerType           *dofs,
+               const VectorType               *data,
+               const std::vector<std::string> &names,
+               const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation)
+      :
+      DataEntryBase<DoFHandlerType> (dofs, names, data_component_interpretation),
+      vector (data)
+    {}
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    DataEntry<DoFHandlerType,VectorType>::
+    DataEntry (const DoFHandlerType                                     *dofs,
+               const VectorType                                         *data,
+               const DataPostprocessor<DoFHandlerType::space_dimension> *data_postprocessor)
+      :
+      DataEntryBase<DoFHandlerType> (dofs, data_postprocessor),
+      vector (data)
+    {}
+
+
+    namespace
+    {
+      template <typename VectorType>
+      double
+      get_vector_element (const VectorType   &vector,
+                          const unsigned int  cell_number)
+      {
+        return vector[cell_number];
+      }
+
+
+      double
+      get_vector_element (const IndexSet &is,
+                          const unsigned int cell_number)
+      {
+        return (is.is_element(cell_number) ? 1 : 0);
+      }
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    double
+    DataEntry<DoFHandlerType,VectorType>::
+    get_cell_data_value (const unsigned int cell_number) const
+    {
+      return get_vector_element(*vector, cell_number);
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    void
+    DataEntry<DoFHandlerType,VectorType>::get_function_values
+    (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+     std::vector<dealii::Vector<double> >                  &patch_values_system) const
+    {
+      // FIXME: FEValuesBase gives us data in types that match that of
+      // the solution vector. but this function needs to pass it back
+      // up as 'double' vectors. this requires the use of a temporary
+      // variable here if the data we get is not a 'double' vector.
+      // (of course, in reality, this also means that we may lose
+      // information to begin with.)
+      //
+      // the correct thing would be to also use the correct data type
+      // upstream somewhere, but this is complicated because we hide
+      // the actual data type from upstream. rather, we should at
+      // least make sure we can deal with complex numbers
+      if (typeid(typename VectorType::value_type) == typeid(double))
+        {
+          fe_patch_values.get_function_values (*vector,
+                                               // reinterpret output argument type; because of
+                                               // the 'if' statement above, this is the
+                                               // identity cast whenever the code is
+                                               // executed, but the cast is necessary
+                                               // to allow compilation even if we don't get here
+                                               reinterpret_cast<std::vector<dealii::Vector<typename VectorType::value_type> >&>
+                                               (patch_values_system));
+        }
+      else
+        {
+          std::vector<dealii::Vector<typename VectorType::value_type> > tmp(patch_values_system.size());
+          for (unsigned int i = 0; i < patch_values_system.size(); i++)
+            tmp[i].reinit(patch_values_system[i]);
+
+          fe_patch_values.get_function_values (*vector, tmp);
+
+          for (unsigned int i = 0; i < patch_values_system.size(); i++)
+            patch_values_system[i] = tmp[i];
+        }
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    void
+    DataEntry<DoFHandlerType,VectorType>::get_function_values
+    (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+     std::vector<double>                                   &patch_values) const
+    {
+      // FIXME: FEValuesBase gives us data in types that match that of
+      // the solution vector. but this function needs to pass it back
+      // up as 'double' vectors. this requires the use of a temporary
+      // variable here if the data we get is not a 'double' vector.
+      // (of course, in reality, this also means that we may lose
+      // information to begin with.)
+      //
+      // the correct thing would be to also use the correct data type
+      // upstream somewhere, but this is complicated because we hide
+      // the actual data type from upstream. rather, we should at
+      // least make sure we can deal with complex numbers
+      if (typeid(typename VectorType::value_type) == typeid(double))
+        {
+          fe_patch_values.get_function_values (*vector,
+                                               // reinterpret output argument type; because of
+                                               // the 'if' statement above, this is the
+                                               // identity cast whenever the code is
+                                               // executed, but the cast is necessary
+                                               // to allow compilation even if we don't get here
+                                               reinterpret_cast<std::vector<typename VectorType::value_type>&>
+                                               (patch_values));
+        }
+      else
+        {
+          std::vector<typename VectorType::value_type> tmp (patch_values.size());
+
+          fe_patch_values.get_function_values (*vector, tmp);
+
+          for (unsigned int i = 0; i < tmp.size(); i++)
+            patch_values[i] = tmp[i];
+        }
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    void
+    DataEntry<DoFHandlerType,VectorType>::get_function_gradients
+    (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension>     &fe_patch_values,
+     std::vector<std::vector<Tensor<1,DoFHandlerType::space_dimension> > > &patch_gradients_system) const
+    {
+      // FIXME: FEValuesBase gives us data in types that match that of
+      // the solution vector. but this function needs to pass it back
+      // up as 'double' vectors. this requires the use of a temporary
+      // variable here if the data we get is not a 'double' vector.
+      // (of course, in reality, this also means that we may lose
+      // information to begin with.)
+      //
+      // the correct thing would be to also use the correct data type
+      // upstream somewhere, but this is complicated because we hide
+      // the actual data type from upstream. rather, we should at
+      // least make sure we can deal with complex numbers
+      if (typeid(typename VectorType::value_type) == typeid(double))
+        {
+          fe_patch_values.get_function_gradients (*vector,
+                                                  // reinterpret output argument type; because of
+                                                  // the 'if' statement above, this is the
+                                                  // identity cast whenever the code is
+                                                  // executed, but the cast is necessary
+                                                  // to allow compilation even if we don't get here
+                                                  reinterpret_cast<std::vector<std::vector<Tensor<1,DoFHandlerType::space_dimension,typename VectorType::value_type> > >&>
+                                                  (patch_gradients_system));
+        }
+      else
+        {
+          std::vector<std::vector<Tensor<1,DoFHandlerType::space_dimension,
+              typename VectorType::value_type> > >
+              tmp(patch_gradients_system.size());
+          for (unsigned int i = 0; i < tmp.size(); i++)
+            tmp[i].resize(patch_gradients_system[i].size());
+
+          fe_patch_values.get_function_gradients (*vector, tmp);
+
+          for (unsigned int i = 0; i < tmp.size(); i++)
+            for (unsigned int j = 0; j < tmp[i].size(); j++)
+              patch_gradients_system[i][j] = tmp[i][j];
+        }
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    void
+    DataEntry<DoFHandlerType,VectorType>::get_function_gradients
+    (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+     std::vector<Tensor<1,DoFHandlerType::space_dimension> >           &patch_gradients) const
+    {
+      // FIXME: FEValuesBase gives us data in types that match that of
+      // the solution vector. but this function needs to pass it back
+      // up as 'double' vectors. this requires the use of a temporary
+      // variable here if the data we get is not a 'double' vector.
+      // (of course, in reality, this also means that we may lose
+      // information to begin with.)
+      //
+      // the correct thing would be to also use the correct data type
+      // upstream somewhere, but this is complicated because we hide
+      // the actual data type from upstream. rather, we should at
+      // least make sure we can deal with complex numbers
+      if (typeid(typename VectorType::value_type) == typeid(double))
+        {
+          fe_patch_values.get_function_gradients (*vector,
+                                                  // reinterpret output argument type; because of
+                                                  // the 'if' statement above, this is the
+                                                  // identity cast whenever the code is
+                                                  // executed, but the cast is necessary
+                                                  // to allow compilation even if we don't get here
+                                                  reinterpret_cast<std::vector<Tensor<1,DoFHandlerType::space_dimension,typename VectorType::value_type> >&>
+                                                  (patch_gradients));
+        }
+      else
+        {
+          std::vector<Tensor<1,DoFHandlerType::space_dimension,typename VectorType::value_type> >  tmp;
+          tmp.resize(patch_gradients.size());
+
+          fe_patch_values.get_function_gradients (*vector, tmp);
+
+          for (unsigned int i = 0; i < tmp.size(); i++)
+            patch_gradients[i] = tmp[i];
+        }
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    void
+    DataEntry<DoFHandlerType,VectorType>::get_function_hessians
+    (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+     std::vector<std::vector<Tensor<2,DoFHandlerType::space_dimension> > > &patch_hessians_system) const
+    {
+      // FIXME: FEValuesBase gives us data in types that match that of
+      // the solution vector. but this function needs to pass it back
+      // up as 'double' vectors. this requires the use of a temporary
+      // variable here if the data we get is not a 'double' vector.
+      // (of course, in reality, this also means that we may lose
+      // information to begin with.)
+      //
+      // the correct thing would be to also use the correct data type
+      // upstream somewhere, but this is complicated because we hide
+      // the actual data type from upstream. rather, we should at
+      // least make sure we can deal with complex numbers
+      if (typeid(typename VectorType::value_type) == typeid(double))
+        {
+          fe_patch_values.get_function_hessians (*vector,
+                                                 // reinterpret output argument type; because of
+                                                 // the 'if' statement above, this is the
+                                                 // identity cast whenever the code is
+                                                 // executed, but the cast is necessary
+                                                 // to allow compilation even if we don't get here
+                                                 reinterpret_cast<std::vector<std::vector<Tensor<2,DoFHandlerType::space_dimension,typename VectorType::value_type> > >&>
+                                                 (patch_hessians_system));
+        }
+      else
+        {
+          std::vector<std::vector<Tensor<2,DoFHandlerType::space_dimension,
+              typename VectorType::value_type> > >
+              tmp(patch_hessians_system.size());
+          for (unsigned int i = 0; i < tmp.size(); i++)
+            tmp[i].resize(patch_hessians_system[i].size());
+
+          fe_patch_values.get_function_hessians (*vector, tmp);
+
+          for (unsigned int i = 0; i < tmp.size(); i++)
+            for (unsigned int j = 0; j < tmp[i].size(); j++)
+              patch_hessians_system[i][j] = tmp[i][j];
+        }
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    void
+    DataEntry<DoFHandlerType,VectorType>::get_function_hessians
+    (const FEValuesBase<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &fe_patch_values,
+     std::vector<Tensor<2,DoFHandlerType::space_dimension> >                       &patch_hessians) const
+    {
+      // FIXME: FEValuesBase gives us data in types that match that of
+      // the solution vector. but this function needs to pass it back
+      // up as 'double' vectors. this requires the use of a temporary
+      // variable here if the data we get is not a 'double' vector.
+      // (of course, in reality, this also means that we may lose
+      // information to begin with.)
+      //
+      // the correct thing would be to also use the correct data type
+      // upstream somewhere, but this is complicated because we hide
+      // the actual data type from upstream. rather, we should at
+      // least make sure we can deal with complex numbers
+      if (typeid(typename VectorType::value_type) == typeid(double))
+        {
+          fe_patch_values.get_function_hessians (*vector,
+                                                 // reinterpret output argument type; because of
+                                                 // the 'if' statement above, this is the
+                                                 // identity cast whenever the code is
+                                                 // executed, but the cast is necessary
+                                                 // to allow compilation even if we don't get here
+                                                 reinterpret_cast<std::vector<Tensor<2,DoFHandlerType
+                                                 ::space_dimension,typename VectorType::value_type> >&>
+                                                 (patch_hessians));
+        }
+      else
+        {
+          std::vector<Tensor<2,DoFHandlerType::space_dimension,typename VectorType::value_type> >
+          tmp(patch_hessians.size());
+
+          fe_patch_values.get_function_hessians (*vector, tmp);
+
+          for (unsigned int i = 0; i < tmp.size(); i++)
+            patch_hessians[i] = tmp[i];
+        }
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    std::size_t
+    DataEntry<DoFHandlerType,VectorType>::memory_consumption () const
+    {
+      return (sizeof (vector) +
+              MemoryConsumption::memory_consumption (this->names));
+    }
+
+
+
+    template <typename DoFHandlerType, typename VectorType>
+    void
+    DataEntry<DoFHandlerType,VectorType>::clear ()
+    {
+      vector = 0;
+      this->dof_handler = 0;
+    }
+  }
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::DataOut_DoFData ()
+  :
+  triangulation(0,typeid(*this).name()),
+  dofs(0,typeid(*this).name())
+{}
+
+
+
+template <typename DoFHandlerType, int patch_dim, int patch_space_dim>
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::~DataOut_DoFData ()
+{
+  clear ();
+}
+
+
+
+template <typename DoFHandlerType, int patch_dim, int patch_space_dim>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+attach_dof_handler (const DoFHandlerType &d)
+{
+  Assert (dof_data.size() == 0,
+          Exceptions::DataOut::ExcOldDataStillPresent());
+  Assert (cell_data.size() == 0,
+          Exceptions::DataOut::ExcOldDataStillPresent());
+
+  triangulation = SmartPointer<const Triangulation<DoFHandlerType::dimension,
+  DoFHandlerType::space_dimension> >
+  (&d.get_triangulation(), typeid(*this).name());
+  dofs = SmartPointer<const DoFHandlerType>(&d, typeid(*this).name());
+}
+
+
+
+template <typename DoFHandlerType, int patch_dim, int patch_space_dim>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+attach_triangulation (const Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> &tria)
+{
+  Assert (dof_data.size() == 0,
+          Exceptions::DataOut::ExcOldDataStillPresent());
+  Assert (cell_data.size() == 0,
+          Exceptions::DataOut::ExcOldDataStillPresent());
+
+  triangulation = SmartPointer<const Triangulation<DoFHandlerType::dimension,
+  DoFHandlerType::space_dimension> >
+  (&tria, typeid(*this).name());
+}
+
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+template <typename VectorType>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+add_data_vector (const VectorType                         &vec,
+                 const std::string                        &name,
+                 const DataVectorType                      type,
+                 const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation)
+{
+  Assert (triangulation != 0,
+          Exceptions::DataOut::ExcNoTriangulationSelected ());
+  const unsigned int n_components =
+    dofs != 0 ? dofs->get_fe().n_components () : 1;
+
+  std::vector<std::string> names;
+  // if only one component or vector is cell vector: we only need one name
+  if ((n_components == 1) ||
+      (vec.size() == triangulation->n_active_cells()))
+    {
+      names.resize (1, name);
+    }
+  else
+    // otherwise append _i to the given name
+    {
+      names.resize (n_components);
+      for (unsigned int i=0; i<n_components; ++i)
+        {
+          std::ostringstream namebuf;
+          namebuf << '_' << i;
+          names[i] = name + namebuf.str();
+        }
+    }
+
+  add_data_vector (vec, names, type, data_component_interpretation);
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+template <typename VectorType>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+add_data_vector (const VectorType                         &vec,
+                 const std::vector<std::string>           &names,
+                 const DataVectorType                      type,
+                 const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation_)
+{
+  Assert (triangulation != 0,
+          Exceptions::DataOut::ExcNoTriangulationSelected ());
+
+  const std::vector<DataComponentInterpretation::DataComponentInterpretation> &
+  data_component_interpretation
+    = (data_component_interpretation_.size() != 0
+       ?
+       data_component_interpretation_
+       :
+       std::vector<DataComponentInterpretation::DataComponentInterpretation>
+       (names.size(), DataComponentInterpretation::component_is_scalar));
+
+  // either cell data and one name,
+  // or dof data and n_components names
+  DataVectorType actual_type = type;
+  if (type == type_automatic)
+    {
+      // in the rare case that someone has a DGP(0) attached, we can not decide what she wants here:
+      Assert((dofs == 0) || (triangulation->n_active_cells() != dofs->n_dofs()),
+             ExcMessage("Unable to determine the type of vector automatically because the number of DoFs "
+                        "is equal to the number of cells. Please specify DataVectorType."));
+
+      if (vec.size() == triangulation->n_active_cells())
+        actual_type = type_cell_data;
+      else
+        actual_type = type_dof_data;
+    }
+
+  switch (actual_type)
+    {
+    case type_cell_data:
+      Assert (vec.size() == triangulation->n_active_cells(),
+              ExcDimensionMismatch (vec.size(),
+                                    triangulation->n_active_cells()));
+      Assert (names.size() == 1,
+              Exceptions::DataOut::ExcInvalidNumberOfNames (names.size(), 1));
+      break;
+
+    case type_dof_data:
+      Assert (dofs != 0,
+              Exceptions::DataOut::ExcNoDoFHandlerSelected ());
+      Assert (vec.size() == dofs->n_dofs(),
+              Exceptions::DataOut::ExcInvalidVectorSize (vec.size(),
+                                                         dofs->n_dofs(),
+                                                         triangulation->n_active_cells()));
+      Assert (names.size() == dofs->get_fe().n_components(),
+              Exceptions::DataOut::ExcInvalidNumberOfNames (names.size(),
+                                                            dofs->get_fe().n_components()));
+      break;
+
+    case type_automatic:
+      // this case should have been handled above...
+      Assert (false, ExcInternalError());
+    }
+
+  internal::DataOut::DataEntryBase<DoFHandlerType> *new_entry
+    = new internal::DataOut::DataEntry<DoFHandlerType,VectorType>(dofs, &vec, names,
+        data_component_interpretation);
+  if (actual_type == type_dof_data)
+    dof_data.push_back (std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> >(new_entry));
+  else
+    cell_data.push_back (std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> >(new_entry));
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+template <typename VectorType>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+add_data_vector (const VectorType                       &vec,
+                 const DataPostprocessor<DoFHandlerType::space_dimension> &data_postprocessor)
+{
+  // this is a specialized version of the other function where we have a
+  // postprocessor. if we do, we know that we have type_dof_data, which makes
+  // things a bit simpler, we also don't need to deal with some of the other
+  // stuff and use a different constructor of DataEntry
+
+  Assert (dofs != 0,
+          Exceptions::DataOut::ExcNoDoFHandlerSelected ());
+
+  Assert (vec.size() == dofs->n_dofs(),
+          Exceptions::DataOut::ExcInvalidVectorSize (vec.size(),
+                                                     dofs->n_dofs(),
+                                                     dofs->get_triangulation().n_active_cells()));
+
+  internal::DataOut::DataEntryBase<DoFHandlerType> *new_entry
+    = new internal::DataOut::DataEntry<DoFHandlerType,VectorType>(dofs, &vec, &data_postprocessor);
+  dof_data.push_back (std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> >(new_entry));
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+template <typename VectorType>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+add_data_vector (const DoFHandlerType                   &dof_handler,
+                 const VectorType                       &vec,
+                 const DataPostprocessor<DoFHandlerType::space_dimension> &data_postprocessor)
+{
+  // this is a specialized version of the other function where we have a
+  // postprocessor. if we do, we know that we have type_dof_data, which makes
+  // things a bit simpler, we also don't need to deal with some of the other
+  // stuff and use a different constructor of DataEntry
+
+  AssertDimension (vec.size(), dof_handler.n_dofs());
+
+  internal::DataOut::DataEntryBase<DoFHandlerType> *new_entry
+    = new internal::DataOut::DataEntry<DoFHandlerType,VectorType>(&dof_handler, &vec, &data_postprocessor);
+  dof_data.push_back (std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> >(new_entry));
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+template <typename VectorType>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+add_data_vector
+(const DoFHandlerType           &dof_handler,
+ const VectorType               &data,
+ const std::string              &name,
+ const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation)
+{
+  const unsigned int n_components = dof_handler.get_fe().n_components ();
+
+  std::vector<std::string> names;
+  // if only one component: we only need one name
+  if (n_components == 1)
+    names.resize (1, name);
+  else
+    // otherwise append _i to the given name
+    {
+      names.resize (n_components);
+      for (unsigned int i=0; i<n_components; ++i)
+        {
+          std::ostringstream namebuf;
+          namebuf << '_' << i;
+          names[i] = name + namebuf.str();
+        }
+    }
+
+  add_data_vector (dof_handler, data, names, data_component_interpretation);
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+template <typename VectorType>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+add_data_vector
+(const DoFHandlerType           &dof_handler,
+ const VectorType               &data,
+ const std::vector<std::string> &names,
+ const std::vector<DataComponentInterpretation::DataComponentInterpretation> &data_component_interpretation_)
+{
+  // this is an extended version of the other functions where we pass a vector
+  // together with its DoFHandler. if we do, we know that we have
+  // type_dof_data, which makes things a bit simpler
+  if (triangulation == 0)
+    triangulation = SmartPointer<const Triangulation<DoFHandlerType::dimension,DoFHandlerType::space_dimension> >(&dof_handler.get_triangulation(), typeid(*this).name());
+
+  Assert (&dof_handler.get_triangulation() == triangulation,
+          ExcMessage("The triangulation attached to the DoFHandler does not "
+                     "match with the one set previously"));
+
+  Assert (data.size() == dof_handler.n_dofs(),
+          ExcDimensionMismatch (data.size(), dof_handler.n_dofs()));
+
+  const std::vector<DataComponentInterpretation::DataComponentInterpretation> &
+  data_component_interpretation
+    = (data_component_interpretation_.size() != 0
+       ?
+       data_component_interpretation_
+       :
+       std::vector<DataComponentInterpretation::DataComponentInterpretation>
+       (names.size(), DataComponentInterpretation::component_is_scalar));
+
+  internal::DataOut::DataEntryBase<DoFHandlerType> *new_entry
+    = new internal::DataOut::DataEntry<DoFHandlerType,VectorType>(&dof_handler, &data, names,
+        data_component_interpretation);
+  dof_data.push_back (std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> >(new_entry));
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+void DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::clear_data_vectors ()
+{
+  dof_data.erase (dof_data.begin(), dof_data.end());
+  cell_data.erase (cell_data.begin(), cell_data.end());
+
+  // delete patches
+  std::vector<Patch> dummy;
+  patches.swap (dummy);
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+clear_input_data_references ()
+{
+  for (unsigned int i=0; i<dof_data.size(); ++i)
+    dof_data[i]->clear ();
+
+  for (unsigned int i=0; i<cell_data.size(); ++i)
+    cell_data[i]->clear ();
+
+  if (dofs != 0)
+    dofs = 0;
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+void
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::clear ()
+{
+  dof_data.erase (dof_data.begin(), dof_data.end());
+  cell_data.erase (cell_data.begin(), cell_data.end());
+
+  if (dofs != 0)
+    dofs = 0;
+
+  // delete patches
+  std::vector<Patch> dummy;
+  patches.swap (dummy);
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+std::vector<std::string>
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::
+get_dataset_names () const
+{
+  std::vector<std::string> names;
+  // collect the names of dof
+  // and cell data
+  typedef
+  typename std::vector<std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> > >::const_iterator
+  data_iterator;
+
+  for (data_iterator  d=dof_data.begin();
+       d!=dof_data.end(); ++d)
+    for (unsigned int i=0; i<(*d)->names.size(); ++i)
+      names.push_back ((*d)->names[i]);
+  for (data_iterator d=cell_data.begin(); d!=cell_data.end(); ++d)
+    {
+      Assert ((*d)->names.size() == 1, ExcInternalError());
+      names.push_back ((*d)->names[0]);
+    }
+
+  return names;
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::get_vector_data_ranges () const
+{
+  std::vector<std_cxx11::tuple<unsigned int, unsigned int, std::string> >
+  ranges;
+
+  // collect the ranges of dof
+  // and cell data
+  typedef
+  typename std::vector<std_cxx11::shared_ptr<internal::DataOut::DataEntryBase<DoFHandlerType> > >::const_iterator
+  data_iterator;
+
+  unsigned int output_component = 0;
+  for (data_iterator  d=dof_data.begin();
+       d!=dof_data.end(); ++d)
+    for (unsigned int i=0; i<(*d)->n_output_variables;
+         ++i, ++output_component)
+      // see what kind of data we have
+      // here. note that for the purpose of
+      // the current function all we care
+      // about is vector data
+      if ((*d)->data_component_interpretation[i] ==
+          DataComponentInterpretation::component_is_part_of_vector)
+        {
+          // ensure that there is a
+          // continuous number of next
+          // space_dim components that all
+          // deal with vectors
+          Assert (i+patch_space_dim <=
+                  (*d)->n_output_variables,
+                  Exceptions::DataOut::ExcInvalidVectorDeclaration (i,
+                                                                    (*d)->names[i]));
+          for (unsigned int dd=1; dd<patch_space_dim; ++dd)
+            Assert ((*d)->data_component_interpretation[i+dd]
+                    ==
+                    DataComponentInterpretation::component_is_part_of_vector,
+                    Exceptions::DataOut::ExcInvalidVectorDeclaration (i,
+                                                                      (*d)->names[i]));
+
+          // all seems alright, so figure out
+          // whether there is a common name
+          // to these components. if not,
+          // leave the name empty and let the
+          // output format writer decide what
+          // to do here
+          std::string name = (*d)->names[i];
+          for (unsigned int dd=1; dd<patch_space_dim; ++dd)
+            if (name != (*d)->names[i+dd])
+              {
+                name = "";
+                break;
+              }
+
+          // finally add a corresponding
+          // range
+          std_cxx11::tuple<unsigned int, unsigned int, std::string>
+          range (output_component,
+                 output_component+patch_space_dim-1,
+                 name);
+
+          ranges.push_back (range);
+
+          // increase the 'component' counter
+          // by the appropriate amount, same
+          // for 'i', since we have already
+          // dealt with all these components
+          output_component += patch_space_dim-1;
+          i += patch_space_dim-1;
+        }
+
+  // note that we do not have to traverse the
+  // list of cell data here because cell data
+  // is one value per (logical) cell and
+  // therefore cannot be a vector
+
+  // as a final check, the 'component'
+  // counter should be at the total number of
+  // components added up now
+#ifdef DEBUG
+  unsigned int n_output_components = 0;
+  for (data_iterator  d=dof_data.begin();
+       d!=dof_data.end(); ++d)
+    n_output_components += (*d)->n_output_variables;
+  Assert (output_component == n_output_components,
+          ExcInternalError());
+#endif
+
+  return ranges;
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+const std::vector< dealii::DataOutBase::Patch<patch_dim, patch_space_dim> > &
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::get_patches () const
+{
+  return patches;
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<DoFHandlerType::dimension,
+    DoFHandlerType::space_dimension> > >
+    DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::get_finite_elements() const
+{
+  const unsigned int dhdim = DoFHandlerType::dimension;
+  const unsigned int dhspacedim = DoFHandlerType::space_dimension;
+  std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dhdim,dhspacedim> > >
+  finite_elements(this->dof_data.size());
+  for (unsigned int i=0; i<this->dof_data.size(); ++i)
+    {
+      Assert (dof_data[i]->dof_handler != 0,
+              Exceptions::DataOut::ExcNoDoFHandlerSelected ());
+
+      // avoid creating too many finite elements and doing a lot of work on
+      // initializing FEValues downstream: if two DoFHandlers are the same
+      // (checked by pointer comparison), we can re-use the shared_ptr object
+      // for the second one. We cannot check for finite element equalities
+      // because we need different FEValues objects for different dof
+      // handlers.
+      bool duplicate = false;
+      for (unsigned int j=0; j<i; ++j)
+        if (dof_data[i]->dof_handler == dof_data[j]->dof_handler)
+          {
+            finite_elements[i] = finite_elements[j];
+            duplicate = true;
+          }
+      if (duplicate == false)
+        finite_elements[i].reset(new dealii::hp::FECollection<dhdim,dhspacedim>
+                                 (this->dof_data[i]->dof_handler->get_fe()));
+    }
+  if (this->dof_data.empty())
+    {
+      finite_elements.resize(1);
+      finite_elements[0].reset(new dealii::hp::FECollection<dhdim,dhspacedim>
+                               (FE_DGQ<dhdim,dhspacedim>(0)));
+    }
+  return finite_elements;
+}
+
+
+
+template <typename DoFHandlerType,
+          int patch_dim, int patch_space_dim>
+std::size_t
+DataOut_DoFData<DoFHandlerType,patch_dim,patch_space_dim>::memory_consumption () const
+{
+  return (DataOutInterface<patch_dim,patch_space_dim>::memory_consumption () +
+          MemoryConsumption::memory_consumption (dofs) +
+          MemoryConsumption::memory_consumption (patches));
+}
+
+
+
+// explicit instantiations
+#include "data_out_dof_data.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/data_out_dof_data.inst.in b/source/numerics/data_out_dof_data.inst.in
new file mode 100644
index 0000000..8bddc89
--- /dev/null
+++ b/source/numerics/data_out_dof_data.inst.in
@@ -0,0 +1,430 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS; DH : DOFHANDLER_TEMPLATES; deal_II_dimension : DIMENSIONS)
+{
+// codim=0
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<VEC> (const VEC            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<VEC> (const VEC                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<VEC> (const VEC                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC            &,
+                        const std::string   &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC                       &,
+                        const std::vector<std::string> &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC                 &,
+                        const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+
+
+// stuff needed for face data
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<VEC> (const VEC            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<VEC> (const VEC                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<VEC> (const VEC                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC            &,
+                        const std::string   &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC                       &,
+                        const std::vector<std::string> &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC                 &,
+                        const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+
+
+// things for DataOutRotation
+
+#if deal_II_dimension < 3
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<VEC> (const VEC            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<VEC> (const VEC                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<VEC> (const VEC                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC            &,
+                        const std::string   &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC                       &,
+                        const std::vector<std::string> &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension> &,
+                        const VEC                 &,
+                        const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+#endif
+
+
+// codim 1
+
+#if deal_II_dimension < 3
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<VEC> (const VEC            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<VEC> (const VEC                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<VEC> (const VEC                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension+1>::space_dimension> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension+1> &,
+                        const VEC            &,
+                        const std::string   &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension+1> &,
+                        const VEC                       &,
+                        const std::vector<std::string> &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<VEC> (const DH<deal_II_dimension,deal_II_dimension+1> &,
+                        const VEC                 &,
+                        const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension+1>::space_dimension> &);
+#endif
+
+
+
+// codim 2
+
+  #if deal_II_dimension == 3
+  template void
+  DataOut_DoFData<DH<1,3>,1,3>::
+  add_data_vector<VEC> (const VEC            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<1,3>,1,3>::
+  add_data_vector<VEC> (const VEC                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<1,3>,1,3>::
+  add_data_vector<VEC> (const VEC                 &,
+                      const DataPostprocessor<DH<1,3>::space_dimension> &);
+
+  template void
+  DataOut_DoFData<DH<1,3>,1,3>::
+  add_data_vector<VEC> (const DH<1,3> &,
+                        const VEC            &,
+                        const std::string   &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<1,3>,1,3>::
+  add_data_vector<VEC> (const DH<1,3> &,
+                        const VEC                       &,
+                        const std::vector<std::string> &,
+                        const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<1,3>,1,3>::
+  add_data_vector<VEC> (const DH<1,3> &,
+                        const VEC                 &,
+                        const DataPostprocessor<DH<1,3>::space_dimension> &);
+  #endif
+
+}
+
+
+
+for (DH : DOFHANDLER_TEMPLATES; deal_II_dimension : DIMENSIONS)
+{
+// codim=0
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<IndexSet> (const IndexSet            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<IndexSet> (const IndexSet                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<IndexSet> (const DH<deal_II_dimension,deal_II_dimension> &,
+                             const IndexSet            &,
+                             const std::string   &,
+                             const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<IndexSet> (const DH<deal_II_dimension,deal_II_dimension> &,
+                             const IndexSet                       &,
+                             const std::vector<std::string> &,
+                             const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension,deal_II_dimension>::
+  add_data_vector<IndexSet> (const IndexSet                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+
+
+// stuff needed for face data
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<IndexSet> (const IndexSet            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<IndexSet> (const IndexSet                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<IndexSet> (const IndexSet                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<IndexSet> (const DH<deal_II_dimension,deal_II_dimension> &,
+                             const IndexSet            &,
+                             const std::string   &,
+                             const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>::
+  add_data_vector<IndexSet> (const DH<deal_II_dimension,deal_II_dimension> &,
+                             const IndexSet                       &,
+                             const std::vector<std::string> &,
+                             const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+// things for DataOutRotation
+
+#if deal_II_dimension < 3
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const IndexSet            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const IndexSet                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const IndexSet                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension>::space_dimension> &);
+
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const DH<deal_II_dimension,deal_II_dimension> &,
+                             const IndexSet            &,
+                             const std::string   &,
+                             const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension>,deal_II_dimension+1,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const DH<deal_II_dimension,deal_II_dimension> &,
+                             const IndexSet                       &,
+                             const std::vector<std::string> &,
+                             const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+#endif
+
+// codim 1
+
+#if deal_II_dimension < 3
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const IndexSet            &,
+                       const std::string   &,
+                       const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const IndexSet                       &,
+                     const std::vector<std::string> &,
+                     const DataVectorType,
+                     const std::vector<DataComponentInterpretation::DataComponentInterpretation> &);
+
+  template void
+  DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>::
+  add_data_vector<IndexSet> (const IndexSet                 &,
+                      const DataPostprocessor<DH<deal_II_dimension,deal_II_dimension+1>::space_dimension> &);
+#endif
+
+}
+
+
+
+for (DH : DOFHANDLER_TEMPLATES; deal_II_dimension : DIMENSIONS)
+{
+  template class DataOut_DoFData<DH<deal_II_dimension>,deal_II_dimension>;
+
+#if deal_II_dimension < 3
+  template class DataOut_DoFData<DH<deal_II_dimension>,deal_II_dimension+1>;
+  template class DataOut_DoFData<DH<deal_II_dimension>,deal_II_dimension,deal_II_dimension+1>;
+  template class DataOut_DoFData<DH<deal_II_dimension,deal_II_dimension+1>,deal_II_dimension,deal_II_dimension+1>;
+#endif
+
+#if deal_II_dimension >= 2
+  template class DataOut_DoFData<DH<deal_II_dimension>,deal_II_dimension-1,deal_II_dimension>;
+#endif
+
+#if deal_II_dimension == 3
+     template class DataOut_DoFData<DH<1>,1,3>;
+     template class DataOut_DoFData<DH<1,3>,1,3>;
+#endif
+
+}
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension : DIMENSIONS)
+{
+  namespace internal \{
+    namespace DataOut \{
+#if deal_II_dimension <= deal_II_space_dimension
+      template struct ParallelDataBase<deal_II_dimension,deal_II_space_dimension>;
+#endif
+    \}
+  \}
+}
+
+
+for (DH : DOFHANDLER_TEMPLATES; deal_II_dimension : DIMENSIONS; deal_II_space_dimension : DIMENSIONS)
+{
+  namespace internal \{
+    namespace DataOut \{
+#if deal_II_dimension <= deal_II_space_dimension
+      template
+      void
+      ParallelDataBase<deal_II_dimension,deal_II_space_dimension>::
+      reinit_all_fe_values<dealii::DH<deal_II_dimension,deal_II_space_dimension> >
+      (std::vector<std_cxx11::shared_ptr<DataEntryBase<dealii::DH<deal_II_dimension,deal_II_space_dimension> > > > &dof_data,
+       const dealii::Triangulation<deal_II_dimension,deal_II_space_dimension>::cell_iterator &cell,
+       const unsigned int face);
+#endif
+    \}
+  \}
+}
diff --git a/source/numerics/data_out_faces.cc b/source/numerics/data_out_faces.cc
new file mode 100644
index 0000000..ca4562f
--- /dev/null
+++ b/source/numerics/data_out_faces.cc
@@ -0,0 +1,430 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/numerics/data_out_faces.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace DataOutFaces
+  {
+    template <int dim, int spacedim>
+    ParallelData<dim,spacedim>::
+    ParallelData (const unsigned int n_datasets,
+                  const unsigned int n_subdivisions,
+                  const std::vector<unsigned int> &n_postprocessor_outputs,
+                  const Mapping<dim,spacedim> &mapping,
+                  const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                  const UpdateFlags update_flags)
+      :
+      internal::DataOut::
+      ParallelDataBase<dim,spacedim> (n_datasets,
+                                      n_subdivisions,
+                                      n_postprocessor_outputs,
+                                      mapping,
+                                      finite_elements,
+                                      update_flags,
+                                      true)
+    {}
+
+
+
+    /**
+     * In a WorkStream context, use this function to append the patch computed
+     * by the parallel stage to the array of patches.
+     */
+    template <int dim, int spacedim>
+    void
+    append_patch_to_list (const DataOutBase::Patch<dim-1,spacedim> &patch,
+                          std::vector<DataOutBase::Patch<dim-1,spacedim> > &patches)
+    {
+      patches.push_back (patch);
+      patches.back().patch_index = patches.size()-1;
+    }
+  }
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+DataOutFaces<dim,DoFHandlerType>::DataOutFaces(const bool so)
+  :
+  surface_only(so)
+{
+  Assert (dim == DoFHandlerType::dimension,
+          ExcNotImplemented());
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+void
+DataOutFaces<dim,DoFHandlerType>::
+build_one_patch (const FaceDescriptor *cell_and_face,
+                 internal::DataOutFaces::ParallelData<dimension, dimension> &data,
+                 DataOutBase::Patch<dimension-1,space_dimension>  &patch)
+{
+  Assert (cell_and_face->first->is_locally_owned(),
+          ExcNotImplemented());
+
+  // we use the mapping to transform the vertices. However, the mapping works
+  // on cells, not faces, so transform the face vertex to a cell vertex, that
+  // to a unit cell vertex and then, finally, that to the mapped vertex. In
+  // most cases this complicated procedure will be the identity.
+  for (unsigned int vertex=0; vertex<GeometryInfo<dimension-1>::vertices_per_cell; ++vertex)
+    patch.vertices[vertex] = data.mapping_collection[0].transform_unit_to_real_cell
+                             (cell_and_face->first,
+                              GeometryInfo<dimension>::unit_cell_vertex
+                              (GeometryInfo<dim>::face_to_cell_vertices
+                               (cell_and_face->second,
+                                vertex,
+                                cell_and_face->first->face_orientation(cell_and_face->second),
+                                cell_and_face->first->face_flip(cell_and_face->second),
+                                cell_and_face->first->face_rotation(cell_and_face->second))));
+
+  if (data.n_datasets > 0)
+    {
+      data.reinit_all_fe_values(this->dof_data, cell_and_face->first,
+                                cell_and_face->second);
+      const FEValuesBase<dimension> &fe_patch_values
+        = data.get_present_fe_values (0);
+
+      const unsigned int n_q_points = fe_patch_values.n_quadrature_points;
+
+      // store the intermediate points
+      Assert(patch.space_dim==dimension, ExcInternalError());
+      const std::vector<Point<dimension> > &q_points=fe_patch_values.get_quadrature_points();
+      // resize the patch.data member in order to have enough memory for the
+      // quadrature points as well
+      patch.data.reinit(data.n_datasets+dimension,
+                        patch.data.size(1));
+      // set the flag indicating that for this cell the points are explicitly
+      // given
+      patch.points_are_available=true;
+      // copy points to patch.data
+      for (unsigned int i=0; i<dimension; ++i)
+        for (unsigned int q=0; q<n_q_points; ++q)
+          patch.data(patch.data.size(0)-dimension+i,q)=q_points[q][i];
+
+      // counter for data records
+      unsigned int offset=0;
+
+      // first fill dof_data
+      for (unsigned int dataset=0; dataset<this->dof_data.size(); ++dataset)
+        {
+          const FEValuesBase<dimension> &this_fe_patch_values
+            = data.get_present_fe_values (dataset);
+          const unsigned int n_components
+            = this_fe_patch_values.get_fe().n_components();
+          const DataPostprocessor<dim> *postprocessor=this->dof_data[dataset]->postprocessor;
+          if (postprocessor != 0)
+            {
+              // we have to postprocess the data, so determine, which fields
+              // have to be updated
+              const UpdateFlags update_flags=postprocessor->get_needed_update_flags();
+
+              // get normals, if needed. this is a geometrical information and
+              // thus does not depend on the number of components of the data
+              // vector
+              if (update_flags & update_normal_vectors)
+                {
+//TODO: undo this copying when we can change the data type of
+//  data.patch_normals to Tensor<1,spacedim> as well
+                  for (unsigned int q=0; q<this_fe_patch_values.n_quadrature_points; ++q)
+                    data.patch_normals[q] = Point<dim>(this_fe_patch_values.get_all_normal_vectors()[q]);
+                }
+
+              if (n_components == 1)
+                {
+                  // at each point there is only one component of value,
+                  // gradient etc.
+                  if (update_flags & update_values)
+                    this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                                  data.patch_values);
+                  if (update_flags & update_gradients)
+                    this->dof_data[dataset]->get_function_gradients (this_fe_patch_values,
+                                                                     data.patch_gradients);
+                  if (update_flags & update_hessians)
+                    this->dof_data[dataset]->get_function_hessians (this_fe_patch_values,
+                                                                    data.patch_hessians);
+
+                  if (update_flags & update_quadrature_points)
+                    data.patch_evaluation_points = this_fe_patch_values.get_quadrature_points();
+
+                  postprocessor->
+                  compute_derived_quantities_scalar(data.patch_values,
+                                                    data.patch_gradients,
+                                                    data.patch_hessians,
+                                                    data.patch_normals,
+                                                    data.patch_evaluation_points,
+                                                    data.postprocessed_values[dataset]);
+                }
+              else
+                {
+                  // at each point there is a vector valued function and its
+                  // derivative...
+                  data.resize_system_vectors(n_components);
+                  if (update_flags & update_values)
+                    this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                                  data.patch_values_system);
+                  if (update_flags & update_gradients)
+                    this->dof_data[dataset]->get_function_gradients (this_fe_patch_values,
+                                                                     data.patch_gradients_system);
+                  if (update_flags & update_hessians)
+                    this->dof_data[dataset]->get_function_hessians (this_fe_patch_values,
+                                                                    data.patch_hessians_system);
+
+                  if (update_flags & update_quadrature_points)
+                    data.patch_evaluation_points = this_fe_patch_values.get_quadrature_points();
+
+                  postprocessor->
+                  compute_derived_quantities_vector(data.patch_values_system,
+                                                    data.patch_gradients_system,
+                                                    data.patch_hessians_system,
+                                                    data.patch_normals,
+                                                    data.patch_evaluation_points,
+                                                    data.postprocessed_values[dataset]);
+                }
+
+              for (unsigned int q=0; q<n_q_points; ++q)
+                for (unsigned int component=0;
+                     component<this->dof_data[dataset]->n_output_variables; ++component)
+                  patch.data(offset+component,q)
+                    = data.postprocessed_values[dataset][q](component);
+            }
+          else
+            // now we use the given data vector without modifications. again,
+            // we treat single component functions separately for efficiency
+            // reasons.
+            if (n_components == 1)
+              {
+                this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                              data.patch_values);
+                for (unsigned int q=0; q<n_q_points; ++q)
+                  patch.data(offset,q) = data.patch_values[q];
+              }
+            else
+              {
+                data.resize_system_vectors(n_components);
+                this->dof_data[dataset]->get_function_values (this_fe_patch_values,
+                                                              data.patch_values_system);
+                for (unsigned int component=0; component<n_components;
+                     ++component)
+                  for (unsigned int q=0; q<n_q_points; ++q)
+                    patch.data(offset+component,q) =
+                      data.patch_values_system[q](component);
+              }
+          // increment the counter for the actual data record
+          offset+=this->dof_data[dataset]->n_output_variables;
+        }
+
+      // then do the cell data
+      for (unsigned int dataset=0; dataset<this->cell_data.size(); ++dataset)
+        {
+          // we need to get at the number of the cell to which this face
+          // belongs in order to access the cell data. this is not readily
+          // available, so choose the following rather inefficient way:
+          Assert (cell_and_face->first->active(),
+                  ExcMessage("The current function is trying to generate cell-data output "
+                             "for a face that does not belong to an active cell. This is "
+                             "not supported."));
+          const unsigned int cell_number
+            = std::distance (this->triangulation->begin_active(),
+                             typename Triangulation<dimension,space_dimension>::active_cell_iterator(cell_and_face->first));
+
+          const double value
+            = this->cell_data[dataset]->get_cell_data_value (cell_number);
+          for (unsigned int q=0; q<n_q_points; ++q)
+            patch.data(dataset+offset,q) = value;
+        }
+    }
+}
+
+
+
+
+template <int dim, typename DoFHandlerType>
+void DataOutFaces<dim,DoFHandlerType>::build_patches (const unsigned int n_subdivisions_)
+{
+  build_patches (StaticMappingQ1<dimension>::mapping, n_subdivisions_);
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+void DataOutFaces<dim,DoFHandlerType>::build_patches (const Mapping<dimension> &mapping,
+                                                      const unsigned int n_subdivisions_)
+{
+  // Check consistency of redundant template parameter
+  Assert (dim==dimension, ExcDimensionMismatch(dim, dimension));
+
+  const unsigned int n_subdivisions = (n_subdivisions_ != 0)
+                                      ? n_subdivisions_
+                                      : this->default_subdivisions;
+
+  Assert (n_subdivisions >= 1,
+          Exceptions::DataOut::ExcInvalidNumberOfSubdivisions(n_subdivisions));
+
+  Assert (this->triangulation != 0,
+          Exceptions::DataOut::ExcNoTriangulationSelected());
+
+  unsigned int n_datasets     = this->cell_data.size();
+  for (unsigned int i=0; i<this->dof_data.size(); ++i)
+    n_datasets += this->dof_data[i]->n_output_variables;
+
+  // first count the cells we want to create patches of and make sure there is
+  // enough memory for that
+  std::vector<FaceDescriptor> all_faces;
+  for (FaceDescriptor face=first_face();
+       face.first != this->triangulation->end();
+       face = next_face(face))
+    all_faces.push_back (face);
+
+  // clear the patches array and allocate the right number of elements
+  this->patches.clear ();
+  this->patches.reserve (all_faces.size());
+  Assert (this->patches.size() == 0, ExcInternalError());
+
+
+  std::vector<unsigned int> n_postprocessor_outputs (this->dof_data.size());
+  for (unsigned int dataset=0; dataset<this->dof_data.size(); ++dataset)
+    if (this->dof_data[dataset]->postprocessor)
+      n_postprocessor_outputs[dataset] = this->dof_data[dataset]->n_output_variables;
+    else
+      n_postprocessor_outputs[dataset] = 0;
+
+  UpdateFlags update_flags=update_values;
+  for (unsigned int i=0; i<this->dof_data.size(); ++i)
+    if (this->dof_data[i]->postprocessor)
+      update_flags |= this->dof_data[i]->postprocessor->get_needed_update_flags();
+  update_flags |= update_quadrature_points;
+
+  internal::DataOutFaces::ParallelData<dimension, space_dimension>
+  thread_data (n_datasets,
+               n_subdivisions,
+               n_postprocessor_outputs,
+               mapping,
+               this->get_finite_elements(),
+               update_flags);
+  DataOutBase::Patch<dimension-1,space_dimension> sample_patch;
+  sample_patch.n_subdivisions = n_subdivisions;
+  sample_patch.data.reinit (n_datasets,
+                            Utilities::fixed_power<dimension-1>(n_subdivisions+1));
+
+  // now build the patches in parallel
+  WorkStream::run (&all_faces[0],
+                   &all_faces[0]+all_faces.size(),
+                   std_cxx11::bind(&DataOutFaces<dim,DoFHandlerType>::build_one_patch,
+                                   this, std_cxx11::_1, std_cxx11::_2, std_cxx11::_3),
+                   std_cxx11::bind(&internal::DataOutFaces::
+                                   append_patch_to_list<dim,space_dimension>,
+                                   std_cxx11::_1, std_cxx11::ref(this->patches)),
+                   thread_data,
+                   sample_patch);
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOutFaces<dim,DoFHandlerType>::FaceDescriptor
+DataOutFaces<dim,DoFHandlerType>::first_face ()
+{
+  // simply find first active cell with a face on the boundary
+  typename Triangulation<dimension,space_dimension>::active_cell_iterator cell = this->triangulation->begin_active();
+  for (; cell != this->triangulation->end(); ++cell)
+    if (cell->is_locally_owned())
+      for (unsigned int f=0; f<GeometryInfo<dimension>::faces_per_cell; ++f)
+        if (!surface_only || cell->face(f)->at_boundary())
+          return FaceDescriptor(cell, f);
+
+  // just return an invalid descriptor if we haven't found a locally
+  // owned face. this can happen in parallel where all boundary
+  // faces are owned by other processors
+  return FaceDescriptor();
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOutFaces<dim,DoFHandlerType>::FaceDescriptor
+DataOutFaces<dim,DoFHandlerType>::next_face (const FaceDescriptor &old_face)
+{
+  FaceDescriptor face = old_face;
+
+  // first check whether the present cell has more faces on the boundary. since
+  // we started with this face, its cell must clearly be locally owned
+  Assert (face.first->is_locally_owned(), ExcInternalError());
+  for (unsigned int f=face.second+1; f<GeometryInfo<dimension>::faces_per_cell; ++f)
+    if (!surface_only || face.first->face(f)->at_boundary())
+      // yup, that is so, so return it
+      {
+        face.second = f;
+        return face;
+      }
+
+  // otherwise find the next active cell that has a face on the boundary
+
+  // convert the iterator to an active_iterator and advance this to the next
+  // active cell
+  typename Triangulation<dimension,space_dimension>::active_cell_iterator active_cell = face.first;
+
+  // increase face pointer by one
+  ++active_cell;
+
+  // while there are active cells
+  while (active_cell != this->triangulation->end())
+    {
+      // check all the faces of this active cell. but skip it altogether
+      // if it isn't locally owned
+      if (active_cell->is_locally_owned())
+        for (unsigned int f=0; f<GeometryInfo<dimension>::faces_per_cell; ++f)
+          if (!surface_only || active_cell->face(f)->at_boundary())
+            {
+              face.first  = active_cell;
+              face.second = f;
+              return face;
+            }
+
+      // the present cell had no faces on the boundary (or was not locally
+      // owned), so check next cell
+      ++active_cell;
+    }
+
+  // we fell off the edge, so return with invalid pointer
+  face.first  = this->triangulation->end();
+  face.second = 0;
+  return face;
+}
+
+
+
+// explicit instantiations
+#include "data_out_faces.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/data_out_faces.inst.in b/source/numerics/data_out_faces.inst.in
new file mode 100644
index 0000000..114a6b3
--- /dev/null
+++ b/source/numerics/data_out_faces.inst.in
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  // don't instantiate anything for the 1d
+#if deal_II_dimension >=2
+  template class DataOutFaces<deal_II_dimension, DoFHandler<deal_II_dimension> >;
+  template class DataOutFaces<deal_II_dimension, hp::DoFHandler<deal_II_dimension> >;
+#endif
+}
diff --git a/source/numerics/data_out_rotation.cc b/source/numerics/data_out_rotation.cc
new file mode 100644
index 0000000..9b29b06
--- /dev/null
+++ b/source/numerics/data_out_rotation.cc
@@ -0,0 +1,512 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/numerics/data_out_rotation.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+//TODO: Update documentation
+//TODO: Unify code for dimensions
+
+
+//TODO: build_some_patches isn't going to work if first_cell/next_cell
+//don't iterate over all cells and if cell data is requested. in that
+//case, we need to calculate cell_number as in the DataOut class
+
+// Not implemented for 3D
+
+
+namespace internal
+{
+  namespace DataOutRotation
+  {
+    template <int dim, int spacedim>
+    ParallelData<dim,spacedim>::
+    ParallelData (const unsigned int n_datasets,
+                  const unsigned int n_subdivisions,
+                  const unsigned int n_patches_per_circle,
+                  const std::vector<unsigned int> &n_postprocessor_outputs,
+                  const Mapping<dim,spacedim> &mapping,
+                  const std::vector<std_cxx11::shared_ptr<dealii::hp::FECollection<dim,spacedim> > > &finite_elements,
+                  const UpdateFlags update_flags)
+      :
+      internal::DataOut::
+      ParallelDataBase<dim,spacedim> (n_datasets,
+                                      n_subdivisions,
+                                      n_postprocessor_outputs,
+                                      mapping,
+                                      finite_elements,
+                                      update_flags,
+                                      false),
+      n_patches_per_circle (n_patches_per_circle)
+    {}
+
+
+
+    /**
+     * In a WorkStream context, use this function to append the patch computed
+     * by the parallel stage to the array of patches.
+     */
+    template <int dim, int spacedim>
+    void
+    append_patch_to_list (const std::vector<DataOutBase::Patch<dim+1,spacedim+1> > &new_patches,
+                          std::vector<DataOutBase::Patch<dim+1,spacedim+1> > &patches)
+    {
+      for (unsigned int i=0; i<new_patches.size(); ++i)
+        {
+          patches.push_back (new_patches[i]);
+          patches.back().patch_index = patches.size()-1;
+        }
+    }
+  }
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+void
+DataOutRotation<dim,DoFHandlerType>::
+build_one_patch (const cell_iterator *cell,
+                 internal::DataOutRotation::ParallelData<dimension, space_dimension> &data,
+                 std::vector<DataOutBase::Patch<dimension+1,space_dimension+1> > &patches)
+{
+  if (dim == 3)
+    {
+      // would this function make any sense after all? who would want to
+      // output/compute in four space dimensions?
+      Assert (false, ExcNotImplemented());
+      return;
+    }
+
+  Assert ((*cell)->is_locally_owned(),
+          ExcNotImplemented());
+
+  const unsigned int n_patches_per_circle = data.n_patches_per_circle;
+
+  // another abbreviation denoting the number of q_points in each direction
+  const unsigned int n_points = data.n_subdivisions+1;
+
+  // set up an array that holds the directions in the plane of rotation in
+  // which we will put points in the whole domain (not the rotationally
+  // reduced one in which the computation took place. for simplicity add the
+  // initial direction at the end again
+  std::vector<Point<dimension+1> > angle_directions (n_patches_per_circle+1);
+  for (unsigned int i=0; i<=n_patches_per_circle; ++i)
+    {
+      angle_directions[i][dimension-1] = std::cos(2*numbers::PI *
+                                                  i/n_patches_per_circle);
+      angle_directions[i][dimension] = std::sin(2*numbers::PI *
+                                                i/n_patches_per_circle);
+    }
+
+  for (unsigned int angle=0; angle<n_patches_per_circle; ++angle)
+    {
+      // first compute the vertices of the patch. note that they will have to
+      // be computed from the vertices of the cell, which has one dimension
+      // less, however.
+      switch (dimension)
+        {
+        case 1:
+        {
+          const double r1 = (*cell)->vertex(0)(0),
+                       r2 = (*cell)->vertex(1)(0);
+          Assert (r1 >= 0, ExcRadialVariableHasNegativeValues(r1));
+          Assert (r2 >= 0, ExcRadialVariableHasNegativeValues(r2));
+
+          patches[angle].vertices[0] = r1*angle_directions[angle];
+          patches[angle].vertices[1] = r2*angle_directions[angle];
+          patches[angle].vertices[2] = r1*angle_directions[angle+1];
+          patches[angle].vertices[3] = r2*angle_directions[angle+1];
+
+          break;
+        };
+
+        case 2:
+        {
+          for (unsigned int vertex=0;
+               vertex<GeometryInfo<dimension>::vertices_per_cell;
+               ++vertex)
+            {
+              const Point<dimension> v = (*cell)->vertex(vertex);
+
+              // make sure that the radial variable does attain negative
+              // values
+              Assert (v(0) >= 0, ExcRadialVariableHasNegativeValues(v(0)));
+
+              // now set the vertices of the patch
+              patches[angle].vertices[vertex] = v(0) * angle_directions[angle];
+              patches[angle].vertices[vertex][0] = v(1);
+
+              patches[angle].vertices[vertex+GeometryInfo<dimension>::vertices_per_cell]
+                = v(0) * angle_directions[angle+1];
+              patches[angle].vertices[vertex+GeometryInfo<dimension>::vertices_per_cell][0]
+                = v(1);
+            };
+
+          break;
+        };
+
+        default:
+          Assert (false, ExcNotImplemented());
+        };
+
+      unsigned int offset=0;
+
+      // then fill in data
+      if (data.n_datasets > 0)
+        {
+          data.reinit_all_fe_values(this->dof_data, *cell);
+          // first fill dof_data
+          for (unsigned int dataset=0; dataset<this->dof_data.size(); ++dataset)
+            {
+              const FEValuesBase<dimension> &fe_patch_values
+                = data.get_present_fe_values(dataset);
+              const unsigned int n_components
+                = fe_patch_values.get_fe().n_components();
+              const DataPostprocessor<dim> *postprocessor=this->dof_data[dataset]->postprocessor;
+              if (postprocessor != 0)
+                {
+                  // we have to postprocess the
+                  // data, so determine, which
+                  // fields have to be updated
+                  const UpdateFlags update_flags=postprocessor->get_needed_update_flags();
+
+                  if (n_components == 1)
+                    {
+                      // at each point there is
+                      // only one component of
+                      // value, gradient etc.
+                      if (update_flags & update_values)
+                        this->dof_data[dataset]->get_function_values (fe_patch_values,
+                                                                      data.patch_values);
+                      if (update_flags & update_gradients)
+                        this->dof_data[dataset]->get_function_gradients (fe_patch_values,
+                                                                         data.patch_gradients);
+                      if (update_flags & update_hessians)
+                        this->dof_data[dataset]->get_function_hessians (fe_patch_values,
+                                                                        data.patch_hessians);
+
+                      if (update_flags & update_quadrature_points)
+                        data.patch_evaluation_points = fe_patch_values.get_quadrature_points();
+
+                      std::vector<Point<space_dimension> > dummy_normals;
+                      postprocessor->
+                      compute_derived_quantities_scalar(data.patch_values,
+                                                        data.patch_gradients,
+                                                        data.patch_hessians,
+                                                        dummy_normals,
+                                                        data.patch_evaluation_points,
+                                                        data.postprocessed_values[dataset]);
+                    }
+                  else
+                    {
+                      data.resize_system_vectors(n_components);
+
+                      // at each point there is a vector valued function and
+                      // its derivative...
+                      if (update_flags & update_values)
+                        this->dof_data[dataset]->get_function_values (fe_patch_values,
+                                                                      data.patch_values_system);
+                      if (update_flags & update_gradients)
+                        this->dof_data[dataset]->get_function_gradients (fe_patch_values,
+                                                                         data.patch_gradients_system);
+                      if (update_flags & update_hessians)
+                        this->dof_data[dataset]->get_function_hessians (fe_patch_values,
+                                                                        data.patch_hessians_system);
+
+                      if (update_flags & update_quadrature_points)
+                        data.patch_evaluation_points = fe_patch_values.get_quadrature_points();
+
+                      std::vector<Point<space_dimension> > dummy_normals;
+                      postprocessor->
+                      compute_derived_quantities_vector(data.patch_values_system,
+                                                        data.patch_gradients_system,
+                                                        data.patch_hessians_system,
+                                                        dummy_normals,
+                                                        data.patch_evaluation_points,
+                                                        data.postprocessed_values[dataset]);
+                    }
+
+                  for (unsigned int component=0;
+                       component<this->dof_data[dataset]->n_output_variables;
+                       ++component)
+                    {
+                      switch (dimension)
+                        {
+                        case 1:
+                          for (unsigned int x=0; x<n_points; ++x)
+                            for (unsigned int y=0; y<n_points; ++y)
+                              patches[angle].data(offset+component,
+                                                  x*n_points + y)
+                                = data.postprocessed_values[dataset][x](component);
+                          break;
+
+                        case 2:
+                          for (unsigned int x=0; x<n_points; ++x)
+                            for (unsigned int y=0; y<n_points; ++y)
+                              for (unsigned int z=0; z<n_points; ++z)
+                                patches[angle].data(offset+component,
+                                                    x*n_points*n_points +
+                                                    y*n_points +
+                                                    z)
+                                  = data.postprocessed_values[dataset][x*n_points+z](component);
+                          break;
+
+                        default:
+                          Assert (false, ExcNotImplemented());
+                        }
+                    }
+                }
+              else if (n_components == 1)
+                {
+                  this->dof_data[dataset]->get_function_values (fe_patch_values,
+                                                                data.patch_values);
+
+                  switch (dimension)
+                    {
+                    case 1:
+                      for (unsigned int x=0; x<n_points; ++x)
+                        for (unsigned int y=0; y<n_points; ++y)
+                          patches[angle].data(offset,
+                                              x*n_points + y)
+                            = data.patch_values[x];
+                      break;
+
+                    case 2:
+                      for (unsigned int x=0; x<n_points; ++x)
+                        for (unsigned int y=0; y<n_points; ++y)
+                          for (unsigned int z=0; z<n_points; ++z)
+                            patches[angle].data(offset,
+                                                x*n_points*n_points +
+                                                y +
+                                                z*n_points)
+                              = data.patch_values[x*n_points+z];
+                      break;
+
+                    default:
+                      Assert (false, ExcNotImplemented());
+                    }
+                }
+              else
+                // system of components
+                {
+                  data.resize_system_vectors(n_components);
+                  this->dof_data[dataset]->get_function_values (fe_patch_values,
+                                                                data.patch_values_system);
+
+                  for (unsigned int component=0; component<n_components;
+                       ++component)
+                    {
+                      switch (dimension)
+                        {
+                        case 1:
+                          for (unsigned int x=0; x<n_points; ++x)
+                            for (unsigned int y=0; y<n_points; ++y)
+                              patches[angle].data(offset+component,
+                                                  x*n_points + y)
+                                = data.patch_values_system[x](component);
+                          break;
+
+                        case 2:
+                          for (unsigned int x=0; x<n_points; ++x)
+                            for (unsigned int y=0; y<n_points; ++y)
+                              for (unsigned int z=0; z<n_points; ++z)
+                                patches[angle].data(offset+component,
+                                                    x*n_points*n_points +
+                                                    y*n_points +
+                                                    z)
+                                  = data.patch_values_system[x*n_points+z](component);
+                          break;
+
+                        default:
+                          Assert (false, ExcNotImplemented());
+                        }
+                    }
+                }
+              offset+=this->dof_data[dataset]->n_output_variables;
+            }
+
+          // then do the cell data
+          for (unsigned int dataset=0; dataset<this->cell_data.size(); ++dataset)
+            {
+              // we need to get at the number of the cell to which this face
+              // belongs in order to access the cell data. this is not readily
+              // available, so choose the following rather inefficient way:
+              Assert ((*cell)->active(),
+                      ExcMessage("Cell must be active for cell data"));
+              const unsigned int cell_number
+                = std::distance (this->triangulation->begin_active(),
+                                 typename Triangulation<dimension,space_dimension>::active_cell_iterator(*cell));
+              const double value
+                = this->cell_data[dataset]->get_cell_data_value (cell_number);
+              switch (dimension)
+                {
+                case 1:
+                  for (unsigned int x=0; x<n_points; ++x)
+                    for (unsigned int y=0; y<n_points; ++y)
+                      patches[angle].data(dataset+offset,
+                                          x*n_points +
+                                          y)
+                        = value;
+                  break;
+
+                case 2:
+                  for (unsigned int x=0; x<n_points; ++x)
+                    for (unsigned int y=0; y<n_points; ++y)
+                      for (unsigned int z=0; z<n_points; ++z)
+                        patches[angle].data(dataset+offset,
+                                            x*n_points*n_points +
+                                            y*n_points +
+                                            z)
+                          = value;
+                  break;
+
+                default:
+                  Assert (false, ExcNotImplemented());
+                }
+            }
+        }
+    }
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+void DataOutRotation<dim,DoFHandlerType>::build_patches (const unsigned int n_patches_per_circle,
+                                                         const unsigned int nnnn_subdivisions)
+{
+  // Check consistency of redundant
+  // template parameter
+  Assert (dim==dimension, ExcDimensionMismatch(dim, dimension));
+  Assert (this->triangulation != 0,
+          Exceptions::DataOut::ExcNoTriangulationSelected());
+
+  const unsigned int n_subdivisions = (nnnn_subdivisions != 0)
+                                      ? nnnn_subdivisions
+                                      : this->default_subdivisions;
+  Assert (n_subdivisions >= 1,
+          Exceptions::DataOut::ExcInvalidNumberOfSubdivisions(n_subdivisions));
+
+  unsigned int n_datasets=this->cell_data.size();
+  for (unsigned int i=0; i<this->dof_data.size(); ++i)
+    n_datasets+= this->dof_data[i]->n_output_variables;
+
+  UpdateFlags update_flags=update_values | update_quadrature_points;
+  for (unsigned int i=0; i<this->dof_data.size(); ++i)
+    if (this->dof_data[i]->postprocessor)
+      update_flags |= this->dof_data[i]->postprocessor->get_needed_update_flags();
+  // perhaps update_normal_vectors is present,
+  // which would only be useful on faces, but
+  // we may not use it here.
+  Assert (!(update_flags & update_normal_vectors),
+          ExcMessage("The update of normal vectors may not be requested for "
+                     "evaluation of data on cells via DataPostprocessor."));
+
+  // first count the cells we want to
+  // create patches of and make sure
+  // there is enough memory for that
+  std::vector<cell_iterator> all_cells;
+  for (cell_iterator cell=first_cell(); cell != this->triangulation->end();
+       cell = next_cell(cell))
+    all_cells.push_back (cell);
+
+  // then also take into account that
+  // we want more than one patch to
+  // come out of every cell, as they
+  // are repeated around the axis of
+  // rotation
+  this->patches.clear();
+  this->patches.reserve (all_cells.size() * n_patches_per_circle);
+
+
+  std::vector<unsigned int> n_postprocessor_outputs (this->dof_data.size());
+  for (unsigned int dataset=0; dataset<this->dof_data.size(); ++dataset)
+    if (this->dof_data[dataset]->postprocessor)
+      n_postprocessor_outputs[dataset] = this->dof_data[dataset]->n_output_variables;
+    else
+      n_postprocessor_outputs[dataset] = 0;
+
+  internal::DataOutRotation::ParallelData<dimension, space_dimension>
+  thread_data (n_datasets,
+               n_subdivisions, n_patches_per_circle,
+               n_postprocessor_outputs,
+               StaticMappingQ1<dimension,space_dimension>::mapping,
+               this->get_finite_elements(),
+               update_flags);
+  std::vector<DataOutBase::Patch<dimension+1,space_dimension+1> >
+  new_patches (n_patches_per_circle);
+  for (unsigned int i=0; i<new_patches.size(); ++i)
+    {
+      new_patches[i].n_subdivisions = n_subdivisions;
+      new_patches[i].data.reinit (n_datasets,
+                                  Utilities::fixed_power<dimension+1>(n_subdivisions+1));
+    }
+
+  // now build the patches in parallel
+  WorkStream::run (&all_cells[0],
+                   &all_cells[0]+all_cells.size(),
+                   std_cxx11::bind(&DataOutRotation<dim,DoFHandlerType>::build_one_patch,
+                                   this, std_cxx11::_1, std_cxx11::_2, std_cxx11::_3),
+                   std_cxx11::bind(&internal::DataOutRotation
+                                   ::append_patch_to_list<dim,space_dimension>,
+                                   std_cxx11::_1, std_cxx11::ref(this->patches)),
+                   thread_data,
+                   new_patches);
+}
+
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOutRotation<dim,DoFHandlerType>::cell_iterator
+DataOutRotation<dim,DoFHandlerType>::first_cell ()
+{
+  return this->triangulation->begin_active ();
+}
+
+
+template <int dim, typename DoFHandlerType>
+typename DataOutRotation<dim,DoFHandlerType>::cell_iterator
+DataOutRotation<dim,DoFHandlerType>::next_cell (const cell_iterator &cell)
+{
+  // convert the iterator to an
+  // active_iterator and advance
+  // this to the next active cell
+  typename Triangulation<dimension,space_dimension>::active_cell_iterator active_cell = cell;
+  ++active_cell;
+  return active_cell;
+}
+
+
+
+// explicit instantiations
+#include "data_out_rotation.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/data_out_rotation.inst.in b/source/numerics/data_out_rotation.inst.in
new file mode 100644
index 0000000..ecc3a6e
--- /dev/null
+++ b/source/numerics/data_out_rotation.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+#if deal_II_dimension < 3
+  template class DataOutRotation<deal_II_dimension, DoFHandler<deal_II_dimension> >;
+#endif
+}
diff --git a/source/numerics/data_out_stack.cc b/source/numerics/data_out_stack.cc
new file mode 100644
index 0000000..638c71c
--- /dev/null
+++ b/source/numerics/data_out_stack.cc
@@ -0,0 +1,478 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/numerics/data_out_stack.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+
+#include <sstream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+std::size_t
+DataOutStack<dim,spacedim,DoFHandlerType>::DataVector::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (data) +
+          MemoryConsumption::memory_consumption (names));
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+DataOutStack<dim,spacedim,DoFHandlerType>::~DataOutStack ()
+{}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+void DataOutStack<dim,spacedim,DoFHandlerType>::new_parameter_value (const double p,
+    const double dp)
+{
+  parameter      = p;
+  parameter_step = dp;
+
+  // check whether the user called finish_parameter_value() at the end of the previous
+  // parameter step
+  //
+  // this is to prevent serious waste of memory
+  for (typename std::vector<DataVector>::const_iterator i=dof_data.begin();
+       i!=dof_data.end(); ++i)
+    Assert (i->data.size() == 0,
+            ExcDataNotCleared ());
+  for (typename std::vector<DataVector>::const_iterator i=cell_data.begin();
+       i!=cell_data.end(); ++i)
+    Assert (i->data.size() == 0,
+            ExcDataNotCleared ());
+
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+void DataOutStack<dim,spacedim,DoFHandlerType>::attach_dof_handler (const DoFHandlerType &dof)
+{
+  // Check consistency of redundant
+  // template parameter
+  Assert (dim==DoFHandlerType::dimension, ExcDimensionMismatch(dim, DoFHandlerType::dimension));
+
+  dof_handler = &dof;
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+void DataOutStack<dim,spacedim,DoFHandlerType>::declare_data_vector (const std::string &name,
+    const VectorType   vector_type)
+{
+  std::vector<std::string> names;
+  names.push_back (name);
+  declare_data_vector (names, vector_type);
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+void DataOutStack<dim,spacedim,DoFHandlerType>::declare_data_vector (const std::vector<std::string> &names,
+    const VectorType    vector_type)
+{
+  // make sure this function is
+  // not called after some parameter
+  // values have already been
+  // processed
+  Assert (patches.size() == 0, ExcDataAlreadyAdded());
+
+  // also make sure that no name is
+  // used twice
+  for (std::vector<std::string>::const_iterator name=names.begin(); name!=names.end(); ++name)
+    {
+      for (typename std::vector<DataVector>::const_iterator data_set=dof_data.begin();
+           data_set!=dof_data.end(); ++data_set)
+        for (unsigned int i=0; i<data_set->names.size(); ++i)
+          Assert (*name != data_set->names[i], ExcNameAlreadyUsed(*name));
+
+      for (typename std::vector<DataVector>::const_iterator data_set=cell_data.begin();
+           data_set!=cell_data.end(); ++data_set)
+        for (unsigned int i=0; i<data_set->names.size(); ++i)
+          Assert (*name != data_set->names[i], ExcNameAlreadyUsed(*name));
+    };
+
+  switch (vector_type)
+    {
+    case dof_vector:
+      dof_data.push_back (DataVector());
+      dof_data.back().names = names;
+      break;
+
+    case cell_vector:
+      cell_data.push_back (DataVector());
+      cell_data.back().names = names;
+      break;
+    };
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+template <typename number>
+void DataOutStack<dim,spacedim,DoFHandlerType>::add_data_vector (const Vector<number> &vec,
+    const std::string    &name)
+{
+  const unsigned int n_components = dof_handler->get_fe().n_components ();
+
+  std::vector<std::string> names;
+  // if only one component or vector
+  // is cell vector: we only need one
+  // name
+  if ((n_components == 1) ||
+      (vec.size() == dof_handler->get_triangulation().n_active_cells()))
+    {
+      names.resize (1, name);
+    }
+  else
+    // otherwise append _i to the
+    // given name
+    {
+      names.resize (n_components);
+      for (unsigned int i=0; i<n_components; ++i)
+        {
+          std::ostringstream namebuf;
+          namebuf << '_' << i;
+          names[i] = name + namebuf.str();
+        }
+    }
+
+  add_data_vector (vec, names);
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+template <typename number>
+void DataOutStack<dim,spacedim,DoFHandlerType>::add_data_vector (const Vector<number> &vec,
+    const std::vector<std::string> &names)
+{
+  Assert (dof_handler != 0,
+          Exceptions::DataOut::ExcNoDoFHandlerSelected ());
+  // either cell data and one name,
+  // or dof data and n_components names
+  Assert (((vec.size() == dof_handler->get_triangulation().n_active_cells()) &&
+           (names.size() == 1))
+          ||
+          ((vec.size() == dof_handler->n_dofs()) &&
+           (names.size() == dof_handler->get_fe().n_components())),
+          Exceptions::DataOut::ExcInvalidNumberOfNames (names.size(),
+                                                        dof_handler->get_fe().n_components()));
+  for (unsigned int i=0; i<names.size(); ++i)
+    Assert (names[i].find_first_not_of("abcdefghijklmnopqrstuvwxyz"
+                                       "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                       "0123456789_<>()") == std::string::npos,
+            Exceptions::DataOut::ExcInvalidCharacter (names[i],
+                                                      names[i].find_first_not_of("abcdefghijklmnopqrstuvwxyz"
+                                                          "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                                          "0123456789_<>()")));
+
+  if (vec.size() == dof_handler->n_dofs())
+    {
+      typename std::vector<DataVector>::iterator data_vector=dof_data.begin();
+      for (; data_vector!=dof_data.end(); ++data_vector)
+        if (data_vector->names == names)
+          {
+            data_vector->data.reinit (vec.size());
+            std::copy (vec.begin(), vec.end(),
+                       data_vector->data.begin());
+            return;
+          };
+
+      // ok. not found. there is a
+      // slight chance that
+      // n_dofs==n_cells, so only
+      // bomb out if the next if
+      // statement will not be run
+      if (dof_handler->n_dofs() != dof_handler->get_triangulation().n_active_cells())
+        Assert (false, ExcVectorNotDeclared (names[0]));
+    }
+
+  // search cell data
+  if ((vec.size() != dof_handler->n_dofs()) ||
+      (dof_handler->n_dofs() == dof_handler->get_triangulation().n_active_cells()))
+    {
+      typename std::vector<DataVector>::iterator data_vector=cell_data.begin();
+      for (; data_vector!=cell_data.end(); ++data_vector)
+        if (data_vector->names == names)
+          {
+            data_vector->data.reinit (vec.size());
+            std::copy (vec.begin(), vec.end(),
+                       data_vector->data.begin());
+            return;
+          };
+      Assert (false, ExcVectorNotDeclared (names[0]));
+    };
+
+  // we have either return or Assert
+  // statements above, so shouldn't
+  // get here!
+  Assert (false, ExcInternalError());
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+void DataOutStack<dim,spacedim,DoFHandlerType>::build_patches (const unsigned int nnnn_subdivisions)
+{
+  // this is mostly copied from the
+  // DataOut class
+  unsigned int n_subdivisions = (nnnn_subdivisions != 0)
+                                ? nnnn_subdivisions
+                                : this->default_subdivisions;
+
+  Assert (n_subdivisions >= 1,
+          Exceptions::DataOut::ExcInvalidNumberOfSubdivisions(n_subdivisions));
+  Assert (dof_handler != 0,
+          Exceptions::DataOut::ExcNoDoFHandlerSelected());
+
+  const unsigned int n_components   = dof_handler->get_fe().n_components();
+  const unsigned int n_datasets     = dof_data.size() * n_components +
+                                      cell_data.size();
+
+  // first count the cells we want to
+  // create patches of and make sure
+  // there is enough memory for that
+  unsigned int n_patches = 0;
+  for (typename DoFHandlerType::active_cell_iterator
+       cell=dof_handler->begin_active();
+       cell != dof_handler->end(); ++cell)
+    ++n_patches;
+
+
+  // before we start the loop:
+  // create a quadrature rule that
+  // actually has the points on this
+  // patch, and an object that
+  // extracts the data on each
+  // cell to these points
+  QTrapez<1>     q_trapez;
+  QIterated<dim> patch_points (q_trapez, n_subdivisions);
+
+  // create collection objects from
+  // single quadratures,
+  // and finite elements. if we have
+  // an hp DoFHandler,
+  // dof_handler.get_fe() returns a
+  // collection of which we do a
+  // shallow copy instead
+  const hp::QCollection<dim>       q_collection (patch_points);
+  const hp::FECollection<dim>      fe_collection(dof_handler->get_fe());
+
+  hp::FEValues<dim> x_fe_patch_values (fe_collection, q_collection,
+                                       update_values);
+
+  const unsigned int n_q_points = patch_points.size();
+  std::vector<double>          patch_values (n_q_points);
+  std::vector<Vector<double> > patch_values_system (n_q_points,
+                                                    Vector<double>(n_components));
+
+  // add the required number of
+  // patches. first initialize a template
+  // patch with n_q_points (in the the plane
+  // of the cells) times n_subdivisions+1 (in
+  // the time direction) points
+  dealii::DataOutBase::Patch<dim+1,dim+1>  default_patch;
+  default_patch.n_subdivisions = n_subdivisions;
+  default_patch.data.reinit (n_datasets, n_q_points*(n_subdivisions+1));
+  patches.insert (patches.end(), n_patches, default_patch);
+
+  // now loop over all cells and
+  // actually create the patches
+  typename std::vector< dealii::DataOutBase::Patch<dim+1,dim+1> >::iterator
+  patch = patches.begin() + (patches.size()-n_patches);
+  unsigned int cell_number = 0;
+  for (typename DoFHandlerType::active_cell_iterator cell=dof_handler->begin_active();
+       cell != dof_handler->end(); ++cell, ++patch, ++cell_number)
+    {
+      Assert (cell->is_locally_owned(),
+              ExcNotImplemented());
+
+      Assert (patch != patches.end(), ExcInternalError());
+
+      // first fill in the vertices of the patch
+
+      // Patches are organized such
+      // that the parameter direction
+      // is the last
+      // coordinate. Thus, vertices
+      // are two copies of the space
+      // patch, one at parameter-step
+      // and one at parameter.
+      switch (dim)
+        {
+        case 1:
+          patch->vertices[0] = Point<dim+1>(cell->vertex(0)(0),
+                                            parameter-parameter_step);
+          patch->vertices[1] = Point<dim+1>(cell->vertex(1)(0),
+                                            parameter-parameter_step);
+          patch->vertices[2] = Point<dim+1>(cell->vertex(0)(0),
+                                            parameter);
+          patch->vertices[3] = Point<dim+1>(cell->vertex(1)(0),
+                                            parameter);
+          break;
+
+        case 2:
+          patch->vertices[0] = Point<dim+1>(cell->vertex(0)(0),
+                                            cell->vertex(0)(1),
+                                            parameter-parameter_step);
+          patch->vertices[1] = Point<dim+1>(cell->vertex(1)(0),
+                                            cell->vertex(1)(1),
+                                            parameter-parameter_step);
+          patch->vertices[2] = Point<dim+1>(cell->vertex(2)(0),
+                                            cell->vertex(2)(1),
+                                            parameter-parameter_step);
+          patch->vertices[3] = Point<dim+1>(cell->vertex(3)(0),
+                                            cell->vertex(3)(1),
+                                            parameter-parameter_step);
+          patch->vertices[4] = Point<dim+1>(cell->vertex(0)(0),
+                                            cell->vertex(0)(1),
+                                            parameter);
+          patch->vertices[5] = Point<dim+1>(cell->vertex(1)(0),
+                                            cell->vertex(1)(1),
+                                            parameter);
+          patch->vertices[6] = Point<dim+1>(cell->vertex(2)(0),
+                                            cell->vertex(2)(1),
+                                            parameter);
+          patch->vertices[7] = Point<dim+1>(cell->vertex(3)(0),
+                                            cell->vertex(3)(1),
+                                            parameter);
+          break;
+
+        default:
+          Assert (false, ExcNotImplemented());
+        };
+
+
+      // now fill in the the data values.
+      // note that the required order is
+      // with highest coordinate running
+      // fastest, we need to enter each
+      // value (n_subdivisions+1) times
+      // in succession
+      if (n_datasets > 0)
+        {
+          x_fe_patch_values.reinit (cell);
+          const FEValues<dim> &fe_patch_values
+            = x_fe_patch_values.get_present_fe_values ();
+
+          // first fill dof_data
+          for (unsigned int dataset=0; dataset<dof_data.size(); ++dataset)
+            {
+              if (n_components == 1)
+                {
+                  fe_patch_values.get_function_values (dof_data[dataset].data,
+                                                       patch_values);
+                  for (unsigned int i=0; i<n_subdivisions+1; ++i)
+                    for (unsigned int q=0; q<n_q_points; ++q)
+                      patch->data(dataset,q+n_q_points*i) = patch_values[q];
+                }
+              else
+                // system of components
+                {
+                  fe_patch_values.get_function_values (dof_data[dataset].data,
+                                                       patch_values_system);
+                  for (unsigned int component=0; component<n_components; ++component)
+                    for (unsigned int i=0; i<n_subdivisions+1; ++i)
+                      for (unsigned int q=0; q<n_q_points; ++q)
+                        patch->data(dataset*n_components+component,
+                                    q+n_q_points*i)
+                          = patch_values_system[q](component);
+                }
+            }
+
+          // then do the cell data
+          for (unsigned int dataset=0; dataset<cell_data.size(); ++dataset)
+            {
+              const double value = cell_data[dataset].data(cell_number);
+              for (unsigned int q=0; q<n_q_points; ++q)
+                for (unsigned int i=0; i<n_subdivisions+1; ++i)
+                  patch->data(dataset+dof_data.size()*n_components,
+                              q*(n_subdivisions+1)+i) = value;
+            }
+        }
+    }
+}
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+void DataOutStack<dim,spacedim,DoFHandlerType>::finish_parameter_value ()
+{
+  // release lock on dof handler
+  dof_handler = 0;
+  for (typename std::vector<DataVector>::iterator i=dof_data.begin();
+       i!=dof_data.end(); ++i)
+    i->data.reinit (0);
+
+  for (typename std::vector<DataVector>::iterator i=cell_data.begin();
+       i!=cell_data.end(); ++i)
+    i->data.reinit (0);
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+std::size_t
+DataOutStack<dim,spacedim,DoFHandlerType>::memory_consumption () const
+{
+  return (DataOutInterface<dim+1>::memory_consumption () +
+          MemoryConsumption::memory_consumption (parameter) +
+          MemoryConsumption::memory_consumption (parameter_step) +
+          MemoryConsumption::memory_consumption (dof_handler) +
+          MemoryConsumption::memory_consumption (patches) +
+          MemoryConsumption::memory_consumption (dof_data) +
+          MemoryConsumption::memory_consumption (cell_data));
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+const std::vector< dealii::DataOutBase::Patch<dim+1,dim+1> > &
+DataOutStack<dim,spacedim,DoFHandlerType>::get_patches () const
+{
+  return patches;
+}
+
+
+
+template <int dim, int spacedim, typename DoFHandlerType>
+std::vector<std::string> DataOutStack<dim,spacedim,DoFHandlerType>::get_dataset_names () const
+{
+  std::vector<std::string> names;
+  for (typename std::vector<DataVector>::const_iterator dataset=dof_data.begin();
+       dataset!=dof_data.end(); ++dataset)
+    names.insert (names.end(), dataset->names.begin(), dataset->names.end());
+  for (typename std::vector<DataVector>::const_iterator dataset=cell_data.begin();
+       dataset!=cell_data.end(); ++dataset)
+    names.insert (names.end(), dataset->names.begin(), dataset->names.end());
+
+  return names;
+}
+
+
+
+// explicit instantiations
+#include "data_out_stack.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/data_out_stack.inst.in b/source/numerics/data_out_stack.inst.in
new file mode 100644
index 0000000..b8e9f6d
--- /dev/null
+++ b/source/numerics/data_out_stack.inst.in
@@ -0,0 +1,53 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+#if deal_II_dimension < 3
+template class DataOutStack<deal_II_dimension,deal_II_dimension,DoFHandler<deal_II_dimension> >;
+
+template void DataOutStack<deal_II_dimension,deal_II_dimension,DoFHandler<deal_II_dimension> >::
+add_data_vector<double> (const Vector<double> &,
+                         const std::string    &);
+template void DataOutStack<deal_II_dimension,deal_II_dimension,DoFHandler<deal_II_dimension> >::
+add_data_vector<float> (const Vector<float>  &,
+                        const std::string    &);
+
+template void DataOutStack<deal_II_dimension,deal_II_dimension,DoFHandler<deal_II_dimension> >::
+add_data_vector<double> (const Vector<double> &,
+                         const std::vector<std::string>    &);
+template void DataOutStack<deal_II_dimension,deal_II_dimension,DoFHandler<deal_II_dimension> >::
+add_data_vector<float> (const Vector<float>  &,
+                        const std::vector<std::string>    &);
+
+
+template class DataOutStack<deal_II_dimension,deal_II_dimension,hp::DoFHandler<deal_II_dimension> >;
+
+template void DataOutStack<deal_II_dimension,deal_II_dimension,hp::DoFHandler<deal_II_dimension> >::
+add_data_vector<double> (const Vector<double> &,
+                         const std::string    &);
+template void DataOutStack<deal_II_dimension,deal_II_dimension,hp::DoFHandler<deal_II_dimension> >::
+add_data_vector<float> (const Vector<float>  &,
+                        const std::string    &);
+                        
+template void DataOutStack<deal_II_dimension,deal_II_dimension,hp::DoFHandler<deal_II_dimension> >::
+add_data_vector<double> (const Vector<double> &,
+                         const std::vector<std::string>    &);
+template void DataOutStack<deal_II_dimension,deal_II_dimension,hp::DoFHandler<deal_II_dimension> >::
+add_data_vector<float> (const Vector<float>  &,
+                        const std::vector<std::string>    &);
+#endif
+}
diff --git a/source/numerics/data_postprocessor.cc b/source/numerics/data_postprocessor.cc
new file mode 100644
index 0000000..41c7cda
--- /dev/null
+++ b/source/numerics/data_postprocessor.cc
@@ -0,0 +1,166 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/numerics/data_postprocessor.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+// -------------------------- DataPostprocessor ---------------------------
+
+template <int dim>
+DataPostprocessor<dim>::~DataPostprocessor()
+{}
+
+
+
+template <int dim>
+void
+DataPostprocessor<dim>::
+compute_derived_quantities_scalar (const std::vector<double>         &/*uh*/,
+                                   const std::vector<Tensor<1,dim> > &/*duh*/,
+                                   const std::vector<Tensor<2,dim> > &/*dduh*/,
+                                   const std::vector<Point<dim> >    &/*normals*/,
+                                   const std::vector<Point<dim> >    &/*evaluation_points*/,
+                                   std::vector<Vector<double> >      &computed_quantities) const
+{
+  computed_quantities.clear();
+  AssertThrow(false,ExcPureFunctionCalled());
+}
+
+
+
+template <int dim>
+void
+DataPostprocessor<dim>::
+compute_derived_quantities_vector (const std::vector<Vector<double> > &/*uh*/,
+                                   const std::vector<std::vector<Tensor<1,dim> > > &/*duh*/,
+                                   const std::vector<std::vector<Tensor<2,dim> > > &/*dduh*/,
+                                   const std::vector<Point<dim> >                  &/*normals*/,
+                                   const std::vector<Point<dim> >                  &/*evaluation_points*/,
+                                   std::vector<Vector<double> >                    &computed_quantities) const
+{
+  computed_quantities.clear();
+  AssertThrow(false,ExcPureFunctionCalled());
+}
+
+
+
+template <int dim>
+std::vector<DataComponentInterpretation::DataComponentInterpretation>
+DataPostprocessor<dim>::get_data_component_interpretation () const
+{
+  // default implementation assumes that all
+  // components are independent scalars
+  return
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    (get_names().size(),
+     DataComponentInterpretation::component_is_scalar);
+}
+
+
+// -------------------------- DataPostprocessorScalar ---------------------------
+
+template <int dim>
+DataPostprocessorScalar<dim>::
+DataPostprocessorScalar (const std::string &name,
+                         const UpdateFlags  update_flags)
+  :
+  name (name),
+  update_flags (update_flags)
+{}
+
+
+
+template <int dim>
+std::vector<std::string>
+DataPostprocessorScalar<dim>::
+get_names () const
+{
+  return std::vector<std::string> (1, name);
+}
+
+
+
+template <int dim>
+std::vector<DataComponentInterpretation::DataComponentInterpretation>
+DataPostprocessorScalar<dim>::
+get_data_component_interpretation () const
+{
+  return
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    (1, DataComponentInterpretation::component_is_scalar);
+}
+
+
+template <int dim>
+UpdateFlags
+DataPostprocessorScalar<dim>::
+get_needed_update_flags () const
+{
+  return update_flags;
+}
+
+
+
+// -------------------------- DataPostprocessorVector ---------------------------
+
+template <int dim>
+DataPostprocessorVector<dim>::
+DataPostprocessorVector (const std::string &name,
+                         const UpdateFlags  update_flags)
+  :
+  name (name),
+  update_flags (update_flags)
+{}
+
+
+
+template <int dim>
+std::vector<std::string>
+DataPostprocessorVector<dim>::
+get_names () const
+{
+  return std::vector<std::string> (dim, name);
+}
+
+
+
+template <int dim>
+std::vector<DataComponentInterpretation::DataComponentInterpretation>
+DataPostprocessorVector<dim>::
+get_data_component_interpretation () const
+{
+  return
+    std::vector<DataComponentInterpretation::DataComponentInterpretation>
+    (dim, DataComponentInterpretation::component_is_part_of_vector);
+}
+
+
+template <int dim>
+UpdateFlags
+DataPostprocessorVector<dim>::
+get_needed_update_flags () const
+{
+  return update_flags;
+}
+
+
+// explicit instantiation
+#include "data_postprocessor.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/data_postprocessor.inst.in b/source/numerics/data_postprocessor.inst.in
new file mode 100644
index 0000000..d213865
--- /dev/null
+++ b/source/numerics/data_postprocessor.inst.in
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  template class DataPostprocessor<deal_II_dimension>;
+  template class DataPostprocessorScalar<deal_II_dimension>;
+  template class DataPostprocessorVector<deal_II_dimension>;
+}
diff --git a/source/numerics/derivative_approximation.cc b/source/numerics/derivative_approximation.cc
new file mode 100644
index 0000000..055a71e
--- /dev/null
+++ b/source/numerics/derivative_approximation.cc
@@ -0,0 +1,1143 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2000 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/filtered_iterator.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/hp/fe_collection.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/numerics/derivative_approximation.h>
+
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+namespace
+{
+  template <typename T>
+  inline T sqr (const T t)
+  {
+    return t*t;
+  }
+}
+
+// --------------- First the classes and functions that describe individual
+// --------------- derivatives
+
+namespace DerivativeApproximation
+{
+  namespace internal
+  {
+    /**
+     * The following class is used to describe the data needed to compute the
+     * finite difference approximation to the gradient on a cell. See the
+     * general documentation of this class for more information on
+     * implementation details.
+     *
+     * @author Wolfgang Bangerth, 2000
+     */
+    template <int dim>
+    class Gradient
+    {
+    public:
+      /**
+       * Declare which data fields have to be updated for the function @p
+       * get_projected_derivative to work.
+       */
+      static const UpdateFlags update_flags;
+
+      /**
+       * Declare the data type which holds the derivative described by this
+       * class.
+       */
+      typedef Tensor<1,dim> Derivative;
+
+      /**
+       * Likewise declare the data type that holds the derivative projected to a
+       * certain directions.
+       */
+      typedef Tensor<0,dim> ProjectedDerivative;
+
+      /**
+       * Given an FEValues object initialized to a cell, and a solution vector,
+       * extract the desired derivative at the first quadrature point (which is
+       * the only one, as we only evaluate the finite element field at the
+       * center of each cell).
+       */
+      template <class InputVector, int spacedim>
+      static ProjectedDerivative
+      get_projected_derivative (const FEValues<dim,spacedim>  &fe_values,
+                                const InputVector    &solution,
+                                const unsigned int    component);
+
+      /**
+       * Return the norm of the derivative object. Here, for the gradient, we
+       * choose the Euclidean norm of the gradient vector.
+       */
+      static double derivative_norm (const Derivative &d);
+
+      /**
+       * If for the present derivative order, symmetrization of the derivative
+       * tensor is necessary, then do so on the argument.
+       *
+       * For the first derivatives, no such thing is necessary, so this function
+       * is a no-op.
+       */
+      static void symmetrize (Derivative &derivative_tensor);
+    };
+
+    // static variables
+    template <int dim>
+    const UpdateFlags Gradient<dim>::update_flags = update_values;
+
+
+    template <int dim>
+    template <class InputVector, int spacedim>
+    inline
+    typename Gradient<dim>::ProjectedDerivative
+    Gradient<dim>::
+    get_projected_derivative (const FEValues<dim,spacedim>  &fe_values,
+                              const InputVector    &solution,
+                              const unsigned int    component)
+    {
+      if (fe_values.get_fe().n_components() == 1)
+        {
+          std::vector<typename InputVector::value_type> values (1);
+          fe_values.get_function_values (solution, values);
+          return values[0];
+        }
+      else
+        {
+          std::vector<Vector<typename InputVector::value_type> > values
+          (1, Vector<typename InputVector::value_type>(fe_values.get_fe().n_components()));
+          fe_values.get_function_values (solution, values);
+          return values[0](component);
+        }
+    }
+
+
+
+    template <int dim>
+    inline
+    double
+    Gradient<dim>::derivative_norm (const Derivative &d)
+    {
+      double s = 0;
+      for (unsigned int i=0; i<dim; ++i)
+        s += d[i]*d[i];
+      return std::sqrt(s);
+    }
+
+
+
+    template <int dim>
+    inline
+    void
+    Gradient<dim>::symmetrize (Derivative &)
+    {
+      // nothing to do here
+    }
+
+
+
+    /**
+     * The following class is used to describe the data needed to compute the
+     * finite difference approximation to the second derivatives on a cell. See
+     * the general documentation of this class for more information on
+     * implementational details.
+     *
+     * @author Wolfgang Bangerth, 2000
+     */
+    template <int dim>
+    class SecondDerivative
+    {
+    public:
+      /**
+       * Declare which data fields have to be updated for the function @p
+       * get_projected_derivative to work.
+       */
+      static const UpdateFlags update_flags;
+
+      /**
+       * Declare the data type which holds the derivative described by this
+       * class.
+       */
+      typedef Tensor<2,dim> Derivative;
+
+      /**
+       * Likewise declare the data type that holds the derivative projected to a
+       * certain directions.
+       */
+      typedef Tensor<1,dim> ProjectedDerivative;
+
+      /**
+       * Given an FEValues object initialized to a cell, and a solution vector,
+       * extract the desired derivative at the first quadrature point (which is
+       * the only one, as we only evaluate the finite element field at the
+       * center of each cell).
+       */
+      template <class InputVector, int spacedim>
+      static ProjectedDerivative
+      get_projected_derivative (const FEValues<dim,spacedim>  &fe_values,
+                                const InputVector    &solution,
+                                const unsigned int    component);
+
+      /**
+       * Return the norm of the derivative object. Here, for the (symmetric)
+       * tensor of second derivatives, we choose the absolute value of the
+       * largest eigenvalue, which is the matrix norm associated to the $l_2$
+       * norm of vectors. It is also the largest value of the curvature of the
+       * solution.
+       */
+      static double derivative_norm (const Derivative &d);
+
+      /**
+       * If for the present derivative order, symmetrization of the derivative
+       * tensor is necessary, then do so on the argument.
+       *
+       * For the second derivatives, each entry of the tensor is set to the mean
+       * of its value and the value of the transpose element.
+       *
+       * Note that this function actually modifies its argument.
+       */
+      static void symmetrize (Derivative &derivative_tensor);
+    };
+
+    template <int dim>
+    const UpdateFlags SecondDerivative<dim>::update_flags = update_gradients;
+
+
+    template <int dim>
+    template <class InputVector, int spacedim>
+    inline
+    typename SecondDerivative<dim>::ProjectedDerivative
+    SecondDerivative<dim>::
+    get_projected_derivative (const FEValues<dim,spacedim>  &fe_values,
+                              const InputVector    &solution,
+                              const unsigned int    component)
+    {
+      if (fe_values.get_fe().n_components() == 1)
+        {
+          std::vector<Tensor<1,dim,typename InputVector::value_type> > values (1);
+          fe_values.get_function_gradients (solution, values);
+          return ProjectedDerivative(values[0]);
+        }
+      else
+        {
+          std::vector<std::vector<Tensor<1,dim,typename InputVector::value_type> > > values
+          (1, std::vector<Tensor<1,dim,typename InputVector::value_type> >(fe_values.get_fe().n_components()));
+          fe_values.get_function_gradients (solution, values);
+          return ProjectedDerivative(values[0][component]);
+        };
+    }
+
+
+
+    template <>
+    inline
+    double
+    SecondDerivative<1>::
+    derivative_norm (const Derivative &d)
+    {
+      return std::fabs (d[0][0]);
+    }
+
+
+
+    template <>
+    inline
+    double
+    SecondDerivative<2>::
+    derivative_norm (const Derivative &d)
+    {
+      // note that d should be a
+      // symmetric 2x2 tensor, so the
+      // eigenvalues are:
+      //
+      // 1/2(a+b\pm\sqrt((a-b)^2+4c^2))
+      //
+      // if the d_11=a, d_22=b,
+      // d_12=d_21=c
+      const double radicand = dealii::sqr(d[0][0] - d[1][1]) +
+                              4*dealii::sqr(d[0][1]);
+      const double eigenvalues[2]
+        = { 0.5*(d[0][0] + d[1][1] + std::sqrt(radicand)),
+            0.5*(d[0][0] + d[1][1] - std::sqrt(radicand))
+          };
+
+      return std::max (std::fabs (eigenvalues[0]),
+                       std::fabs (eigenvalues[1]));
+    }
+
+
+
+    template <>
+    inline
+    double
+    SecondDerivative<3>::
+    derivative_norm (const Derivative &d)
+    {
+      /*
+      compute the three eigenvalues of the tensor @p{d} and take the
+      largest. one could use the following maple script to generate C
+      code:
+
+      with(linalg);
+      readlib(C);
+      A:=matrix(3,3,[[a00,a01,a02],[a01,a11,a12],[a02,a12,a22]]);
+      E:=eigenvals(A);
+      EE:=vector(3,[E[1],E[2],E[3]]);
+      C(EE);
+
+      Unfortunately, with both optimized and non-optimized output, at some
+      places the code `sqrt(-1.0)' is emitted, and I don't know what
+      Maple intends to do with it. This happens both with Maple4 and
+      Maple5.
+
+      Fortunately, Roger Young provided the following Fortran code, which
+      is transcribed below to C. The code uses an algorithm that uses the
+      invariants of a symmetric matrix. (The translated algorithm is
+      augmented by a test for R>0, since R==0 indicates that all three
+      eigenvalues are equal.)
+
+
+          PROGRAM MAIN
+
+      C FIND EIGENVALUES OF REAL SYMMETRIC MATRIX
+      C (ROGER YOUNG, 2001)
+
+          IMPLICIT NONE
+
+          REAL*8 A11, A12, A13, A22, A23, A33
+          REAL*8 I1, J2, J3, AM
+          REAL*8 S11, S12, S13, S22, S23, S33
+          REAL*8 SS12, SS23, SS13
+          REAL*8 R,R3, XX,YY, THETA
+          REAL*8 A1,A2,A3
+          REAL*8 PI
+          PARAMETER (PI=3.141592653587932384D0)
+          REAL*8 A,B,C, TOL
+          PARAMETER (TOL=1.D-14)
+
+      C DEFINE A TEST MATRIX
+
+          A11 = -1.D0
+          A12 = 5.D0
+          A13 = 3.D0
+          A22 = -2.D0
+          A23 = 0.5D0
+          A33 = 4.D0
+
+
+          I1 = A11 + A22 + A33
+          AM = I1/3.D0
+
+          S11 = A11 - AM
+          S22 = A22 - AM
+          S33 = A33 - AM
+          S12 = A12
+          S13 = A13
+          S23 = A23
+
+          SS12 = S12*S12
+          SS23 = S23*S23
+          SS13 = S13*S13
+
+          J2 = S11*S11 + S22*S22 + S33*S33
+          J2 = J2 + 2.D0*(SS12 + SS23 + SS13)
+          J2 = J2/2.D0
+
+          J3 = S11**3 + S22**3 + S33**3
+          J3 = J3 + 3.D0*S11*(SS12 + SS13)
+          J3 = J3 + 3.D0*S22*(SS12 + SS23)
+          J3 = J3 + 3.D0*S33*(SS13 + SS23)
+          J3 = J3 + 6.D0*S12*S23*S13
+          J3 = J3/3.D0
+
+          R = SQRT(4.D0*J2/3.D0)
+          R3 = R*R*R
+          XX = 4.D0*J3/R3
+
+          YY = 1.D0 - DABS(XX)
+          IF(YY.LE.0.D0)THEN
+             IF(YY.GT.(-TOL))THEN
+                WRITE(6,*)'Equal roots: XX= ',XX
+                A = -(XX/DABS(XX))*SQRT(J2/3.D0)
+                B = AM + A
+                C = AM - 2.D0*A
+                WRITE(6,*)B,' (twice) ',C
+                STOP
+             ELSE
+                WRITE(6,*)'Error: XX= ',XX
+                STOP
+             ENDIF
+          ENDIF
+
+          THETA = (ACOS(XX))/3.D0
+
+          A1 = AM + R*COS(THETA)
+          A2 = AM + R*COS(THETA + 2.D0*PI/3.D0)
+          A3 = AM + R*COS(THETA + 4.D0*PI/3.D0)
+
+          WRITE(6,*)A1,A2,A3
+
+          STOP
+          END
+
+       */
+
+      const double am = trace(d) / 3.;
+
+      // s := d - trace(d) I
+      Tensor<2,3> s = d;
+      for (unsigned int i=0; i<3; ++i)
+        s[i][i] -= am;
+
+      const double ss01 = s[0][1] * s[0][1],
+                   ss12 = s[1][2] * s[1][2],
+                   ss02 = s[0][2] * s[0][2];
+
+      const double J2 = (s[0][0]*s[0][0] + s[1][1]*s[1][1] + s[2][2]*s[2][2]
+                         + 2 * (ss01 + ss02 + ss12))  / 2.;
+      const double J3 = (std::pow(s[0][0],3) + std::pow(s[1][1],3) + std::pow(s[2][2],3)
+                         + 3. * s[0][0] * (ss01 + ss02)
+                         + 3. * s[1][1] * (ss01 + ss12)
+                         + 3. * s[2][2] * (ss02 + ss12)
+                         + 6. * s[0][1] * s[0][2] * s[1][2]) / 3.;
+
+      const double R  = std::sqrt (4. * J2 / 3.);
+
+      double EE[3] = { 0, 0, 0 };
+      // the eigenvalues are away from
+      // @p{am} in the order of R. thus,
+      // if R<<AM, then we have the
+      // degenerate case with three
+      // identical eigenvalues. check
+      // this first
+      if (R <= 1e-14*std::fabs(am))
+        EE[0] = EE[1] = EE[2] = am;
+      else
+        {
+          // at least two eigenvalues are
+          // distinct
+          const double R3 = R*R*R;
+          const double XX = 4. * J3 / R3;
+          const double YY = 1. - std::fabs(XX);
+
+          Assert (YY > -1e-14, ExcInternalError());
+
+          if (YY < 0)
+            {
+              // two roots are equal
+              const double a = (XX>0 ? -1. : 1.) * R / 2;
+              EE[0] = EE[1] = am + a;
+              EE[2] = am - 2.*a;
+            }
+          else
+            {
+              const double theta = std::acos(XX) / 3.;
+              EE[0] = am + R*std::cos(theta);
+              EE[1] = am + R*std::cos(theta + 2./3.*numbers::PI);
+              EE[2] = am + R*std::cos(theta + 4./3.*numbers::PI);
+            };
+        };
+
+      return std::max (std::fabs (EE[0]),
+                       std::max (std::fabs (EE[1]),
+                                 std::fabs (EE[2])));
+    }
+
+
+
+    template <int dim>
+    inline
+    double
+    SecondDerivative<dim>::
+    derivative_norm (const Derivative &)
+    {
+      // computing the spectral norm is
+      // not so simple in general. it is
+      // feasible for dim==3 as shown
+      // above, since then there are
+      // still closed form expressions of
+      // the roots of the characteristic
+      // polynomial, and they can easily
+      // be computed using
+      // maple. however, for higher
+      // dimensions, some other method
+      // needs to be employed. maybe some
+      // steps of the power method would
+      // suffice?
+      Assert (false, ExcNotImplemented());
+      return 0;
+    }
+
+
+
+    template <int dim>
+    inline
+    void
+    SecondDerivative<dim>::symmetrize (Derivative &d)
+    {
+      // symmetrize non-diagonal entries
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=i+1; j<dim; ++j)
+          {
+            const double s = (d[i][j] + d[j][i]) / 2;
+            d[i][j] = d[j][i] = s;
+          };
+    }
+
+
+
+    template <int dim>
+    class ThirdDerivative
+    {
+    public:
+      /**
+       * Declare which data fields have to be updated for the function @p
+       * get_projected_derivative to work.
+       */
+      static const UpdateFlags update_flags;
+
+      /**
+       * Declare the data type which
+       * holds the derivative described
+       * by this class.
+       */
+      typedef Tensor<3,dim> Derivative;
+
+      /**
+       * Likewise declare the data type that holds the derivative projected to a
+       * certain directions.
+       */
+      typedef Tensor<2,dim> ProjectedDerivative;
+
+      /**
+       * Given an FEValues object initialized to a cell, and a solution vector,
+       * extract the desired derivative at the first quadrature point (which is
+       * the only one, as we only evaluate the finite element field at the
+       * center of each cell).
+       */
+      template <class InputVector, int spacedim>
+      static ProjectedDerivative
+      get_projected_derivative (const FEValues<dim,spacedim>  &fe_values,
+                                const InputVector    &solution,
+                                const unsigned int    component);
+
+      /**
+       * Return the norm of the derivative object. Here, for the (symmetric)
+       * tensor of second derivatives, we choose the absolute value of the
+       * largest eigenvalue, which is the matrix norm associated to the $l_2$
+       * norm of vectors. It is also the largest value of the curvature of the
+       * solution.
+       */
+      static double derivative_norm (const Derivative &d);
+
+      /**
+       * If for the present derivative order, symmetrization of the derivative
+       * tensor is necessary, then do so on the argument.
+       *
+       * For the second derivatives, each entry of the tensor is set to the mean
+       * of its value and the value of the transpose element.
+       *
+       * Note that this function actually modifies its argument.
+       */
+      static void symmetrize (Derivative &derivative_tensor);
+    };
+
+    template <int dim>
+    const UpdateFlags ThirdDerivative<dim>::update_flags = update_hessians;
+
+
+    template <int dim>
+    template <class InputVector, int spacedim>
+    inline
+    typename ThirdDerivative<dim>::ProjectedDerivative
+    ThirdDerivative<dim>::
+    get_projected_derivative (const FEValues<dim,spacedim>  &fe_values,
+                              const InputVector    &solution,
+                              const unsigned int    component)
+    {
+      if (fe_values.get_fe().n_components() == 1)
+        {
+          std::vector<Tensor<2,dim,typename InputVector::value_type> > values (1);
+          fe_values.get_function_hessians (solution, values);
+          return ProjectedDerivative(values[0]);
+        }
+      else
+        {
+          std::vector<std::vector<Tensor<2,dim,typename InputVector::value_type> > > values
+          (1, std::vector<Tensor<2,dim,typename InputVector::value_type> >(fe_values.get_fe().n_components()));
+          fe_values.get_function_hessians (solution, values);
+          return ProjectedDerivative(values[0][component]);
+        };
+    }
+
+
+
+    template <>
+    inline
+    double
+    ThirdDerivative<1>::
+    derivative_norm (const Derivative &d)
+    {
+      return std::fabs (d[0][0][0]);
+    }
+
+
+
+    template <int dim>
+    inline
+    double
+    ThirdDerivative<dim>::
+    derivative_norm (const Derivative &d)
+    {
+      // return the Frobenius-norm. this is a
+      // member function of Tensor<rank_,dim>
+      return d.norm();
+    }
+
+
+    template <int dim>
+    inline
+    void
+    ThirdDerivative<dim>::symmetrize (Derivative &d)
+    {
+      // symmetrize non-diagonal entries
+
+      // first do it in the case, that i,j,k are
+      // pairwise different (which can onlky happen
+      // in dim >= 3)
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=i+1; j<dim; ++j)
+          for (unsigned int k=j+1; k<dim; ++k)
+            {
+              const double s = (d[i][j][k] +
+                                d[i][k][j] +
+                                d[j][i][k] +
+                                d[j][k][i] +
+                                d[k][i][j] +
+                                d[k][j][i]) / 6;
+              d[i][j][k]
+                = d[i][k][j]
+                  = d[j][i][k]
+                    = d[j][k][i]
+                      = d[k][i][j]
+                        = d[k][j][i]
+                          = s;
+            }
+      // now do the case, where two indices are
+      // equal
+      for (unsigned int i=0; i<dim; ++i)
+        for (unsigned int j=i+1; j<dim; ++j)
+          {
+            // case 1: index i (lower one) is
+            // double
+            const double s = (d[i][i][j] +
+                              d[i][j][i] +
+                              d[j][i][i] ) / 3;
+            d[i][i][j]
+              = d[i][j][i]
+                = d[j][i][i]
+                  = s;
+
+            // case 2: index j (higher one) is
+            // double
+            const double t = (d[i][j][j] +
+                              d[j][i][j] +
+                              d[j][j][i] ) / 3;
+            d[i][j][j]
+              = d[j][i][j]
+                = d[j][j][i]
+                  = t;
+          }
+    }
+
+
+    template <int order, int dim>
+    class DerivativeSelector
+    {
+    public:
+      /**
+       * typedef to select the DerivativeDescription corresponding to the
+       * <tt>order</tt>th derivative. In this general template we set an unvalid
+       * typedef to void, the real typedefs have to be specialized.
+       */
+      typedef void DerivDescr;
+
+    };
+
+    template <int dim>
+    class DerivativeSelector<1,dim>
+    {
+    public:
+
+      typedef Gradient<dim> DerivDescr;
+    };
+
+    template <int dim>
+    class DerivativeSelector<2,dim>
+    {
+    public:
+
+      typedef SecondDerivative<dim> DerivDescr;
+    };
+
+    template <int dim>
+    class DerivativeSelector<3,dim>
+    {
+    public:
+
+      typedef ThirdDerivative<dim> DerivDescr;
+    };
+  }
+}
+
+// Dummy structures and dummy function used for WorkStream
+namespace DerivativeApproximation
+{
+  namespace internal
+  {
+    namespace Assembler
+    {
+      struct Scratch
+      {
+        Scratch() {}
+      };
+
+      struct CopyData
+      {
+        CopyData() {}
+      };
+    }
+  }
+}
+
+// ------------------------------- now for the functions that do the
+// ------------------------------- actual work
+
+namespace DerivativeApproximation
+{
+  namespace internal
+  {
+    /**
+    * Compute the derivative approximation on one cell. This computes the full
+    * derivative tensor.
+    */
+    template <class DerivativeDescription, int dim, template <int, int> class DoFHandlerType,
+              class InputVector, int spacedim>
+    void
+    approximate_cell (const Mapping<dim,spacedim>        &mapping,
+                      const DoFHandlerType<dim,spacedim> &dof_handler,
+                      const InputVector                  &solution,
+                      const unsigned int                  component,
+                      const TriaActiveIterator<dealii::DoFCellAccessor<DoFHandlerType<dim, spacedim>,
+                      false> >  &cell,
+                      typename DerivativeDescription::Derivative    &derivative)
+    {
+      QMidpoint<dim> midpoint_rule;
+
+      // create collection objects from
+      // single quadratures, mappings,
+      // and finite elements. if we have
+      // an hp DoFHandler,
+      // dof_handler.get_fe() returns a
+      // collection of which we do a
+      // shallow copy instead
+      const hp::QCollection<dim>       q_collection (midpoint_rule);
+      const hp::FECollection<dim>      fe_collection(dof_handler.get_fe());
+      const hp::MappingCollection<dim> mapping_collection (mapping);
+
+      hp::FEValues<dim> x_fe_midpoint_value (mapping_collection, fe_collection,
+                                             q_collection,
+                                             DerivativeDescription::update_flags |
+                                             update_quadrature_points);
+
+      // matrix Y=sum_i y_i y_i^T
+      Tensor<2,dim> Y;
+
+
+      // vector to hold iterators to all
+      // active neighbors of a cell
+      // reserve the maximal number of
+      // active neighbors
+      std::vector<TriaActiveIterator<dealii::DoFCellAccessor<DoFHandlerType<dim, spacedim>,
+          false> > > active_neighbors;
+
+      active_neighbors.reserve (GeometryInfo<dim>::faces_per_cell *
+                                GeometryInfo<dim>::max_children_per_face);
+
+      // vector
+      // g=sum_i y_i (f(x+y_i)-f(x))/|y_i|
+      // or related type for higher
+      // derivatives
+      typename DerivativeDescription::Derivative projected_derivative;
+
+      // reinit fe values object...
+      x_fe_midpoint_value.reinit (cell);
+      const FEValues<dim> &fe_midpoint_value
+        = x_fe_midpoint_value.get_present_fe_values();
+
+      // ...and get the value of the
+      // projected derivative...
+      const typename DerivativeDescription::ProjectedDerivative
+      this_midpoint_value
+        = DerivativeDescription::get_projected_derivative (fe_midpoint_value,
+                                                           solution,
+                                                           component);
+      // ...and the place where it lives
+      const Point<dim> this_center = fe_midpoint_value.quadrature_point(0);
+
+      // loop over all neighbors and
+      // accumulate the difference
+      // quotients from them. note
+      // that things get a bit more
+      // complicated if the neighbor
+      // is more refined than the
+      // present one
+      //
+      // to make processing simpler,
+      // first collect all neighbor
+      // cells in a vector, and then
+      // collect the data from them
+      GridTools::get_active_neighbors<DoFHandlerType<dim,spacedim> >(cell, active_neighbors);
+
+      // now loop over all active
+      // neighbors and collect the
+      // data we need
+      typename std::vector<TriaActiveIterator<dealii::DoFCellAccessor<DoFHandlerType<dim, spacedim>,
+               false> > >::const_iterator
+               neighbor_ptr = active_neighbors.begin();
+      for (; neighbor_ptr!=active_neighbors.end(); ++neighbor_ptr)
+        {
+          const TriaActiveIterator < dealii::DoFCellAccessor < DoFHandlerType < dim, spacedim >, false > >
+          neighbor = *neighbor_ptr;
+
+          // reinit fe values object...
+          x_fe_midpoint_value.reinit (neighbor);
+          const FEValues<dim> &neighbor_fe_midpoint_value
+            = x_fe_midpoint_value.get_present_fe_values();
+
+          // ...and get the value of the
+          // solution...
+          const typename DerivativeDescription::ProjectedDerivative
+          neighbor_midpoint_value
+            = DerivativeDescription::get_projected_derivative (neighbor_fe_midpoint_value,
+                                                               solution, component);
+
+          // ...and the place where it lives
+          const Point<dim>
+          neighbor_center = neighbor_fe_midpoint_value.quadrature_point(0);
+
+
+          // vector for the
+          // normalized
+          // direction between
+          // the centers of two
+          // cells
+          Tensor<1,dim> y        = neighbor_center - this_center;
+          const double  distance = y.norm();
+          // normalize y
+          y /= distance;
+          // *** note that unlike in
+          // the docs, y denotes the
+          // normalized vector
+          // connecting the centers
+          // of the two cells, rather
+          // than the normal
+          // difference! ***
+
+          // add up the
+          // contribution of
+          // this cell to Y
+          for (unsigned int i=0; i<dim; ++i)
+            for (unsigned int j=0; j<dim; ++j)
+              Y[i][j] += y[i] * y[j];
+
+          // then update the sum
+          // of difference
+          // quotients
+          typename DerivativeDescription::ProjectedDerivative
+          projected_finite_difference
+            = (neighbor_midpoint_value -
+               this_midpoint_value);
+          projected_finite_difference /= distance;
+
+          projected_derivative += outer_product(y, projected_finite_difference);
+        };
+
+      // can we determine an
+      // approximation of the
+      // gradient for the present
+      // cell? if so, then we need to
+      // have passed over vectors y_i
+      // which span the whole space,
+      // otherwise we would not have
+      // all components of the
+      // gradient
+      AssertThrow (determinant(Y) != 0,
+                   ExcInsufficientDirections());
+
+      // compute Y^-1 g
+      const Tensor<2,dim> Y_inverse = invert(Y);
+
+      derivative = Y_inverse * projected_derivative;
+
+      // finally symmetrize the derivative
+      DerivativeDescription::symmetrize (derivative);
+    }
+
+
+
+    /**
+     * Compute the derivative approximation on a given cell.  Fill the @p
+     * derivative_norm vector with the norm of the computed derivative tensors
+     * on the cell.
+     */
+    template <class DerivativeDescription, int dim,
+              template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+    void
+    approximate
+    (SynchronousIterators<std_cxx11::tuple<TriaActiveIterator < dealii::DoFCellAccessor < DoFHandlerType < dim, spacedim >, false > >, Vector<float>::iterator> > const &cell,
+     const Mapping<dim,spacedim>        &mapping,
+     const DoFHandlerType<dim,spacedim> &dof_handler,
+     const InputVector                  &solution,
+     const unsigned int                  component)
+    {
+      // if the cell is not locally owned, then there is nothing to do
+      if (std_cxx11::get<0>(cell.iterators)->is_locally_owned() == false)
+        *std_cxx11::get<1>(cell.iterators) = 0;
+      else
+        {
+          typename DerivativeDescription::Derivative derivative;
+          // call the function doing the actual
+          // work on this cell
+          approximate_cell<DerivativeDescription,dim,DoFHandlerType,InputVector, spacedim>
+          (mapping,dof_handler,solution,component,std_cxx11::get<0>(cell.iterators),derivative);
+
+          // evaluate the norm and fill the vector
+          //*derivative_norm_on_this_cell
+          *std_cxx11::get<1>(cell.iterators) = DerivativeDescription::derivative_norm (derivative);
+        }
+    }
+
+
+    /**
+     * Kind of the main function of this class. It is called by the public entry
+     * points to this class with the correct template first argument and then
+     * simply calls the @p approximate function, after setting up several
+     * threads and doing some administration that is independent of the actual
+     * derivative to be computed.
+     *
+     * The @p component argument denotes which component of the solution vector
+     * we are to work on.
+     */
+    template <class DerivativeDescription, int dim,
+              template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+    void
+    approximate_derivative (const Mapping<dim,spacedim>        &mapping,
+                            const DoFHandlerType<dim,spacedim> &dof_handler,
+                            const InputVector                  &solution,
+                            const unsigned int                  component,
+                            Vector<float>                      &derivative_norm)
+    {
+      Assert (derivative_norm.size() == dof_handler.get_triangulation().n_active_cells(),
+              ExcVectorLengthVsNActiveCells (derivative_norm.size(),
+                                             dof_handler.get_triangulation().n_active_cells()));
+      Assert (component < dof_handler.get_fe().n_components(),
+              ExcIndexRange (component, 0, dof_handler.get_fe().n_components()));
+
+      typedef std_cxx11::tuple<TriaActiveIterator<dealii::DoFCellAccessor
+      <DoFHandlerType<dim, spacedim>, false> >,
+      Vector<float>::iterator> Iterators;
+      SynchronousIterators<Iterators> begin(Iterators(dof_handler.begin_active(),
+                                                      derivative_norm.begin())),
+                                                                            end(Iterators(dof_handler.end(),
+                                                                                derivative_norm.end()));
+
+      // There is no need for a copier because there is no conflict between threads
+      // to write in derivative_norm. Scratch and CopyData are also useless.
+      WorkStream::run(begin,
+                      end,
+                      static_cast<std_cxx11::function<void (SynchronousIterators<Iterators> const &,
+                                                            Assembler::Scratch const &, Assembler::CopyData &)> >
+                      (std_cxx11::bind(&approximate<DerivativeDescription,dim,DoFHandlerType,
+                                       InputVector,spacedim>,
+                                       std_cxx11::_1,
+                                       std_cxx11::cref(mapping),
+                                       std_cxx11::cref(dof_handler),
+                                       std_cxx11::cref(solution),component)),
+                      std_cxx11::function<void (internal::Assembler::CopyData const &)> (),
+                      internal::Assembler::Scratch (),internal::Assembler::CopyData ());
+    }
+
+  } // namespace internal
+
+} // namespace DerivativeApproximation
+
+
+// ------------------------ finally for the public interface of this namespace
+
+namespace DerivativeApproximation
+{
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_gradient (const Mapping<dim,spacedim>        &mapping,
+                        const DoFHandlerType<dim,spacedim> &dof_handler,
+                        const InputVector                  &solution,
+                        Vector<float>                      &derivative_norm,
+                        const unsigned int                  component)
+  {
+    internal::approximate_derivative<internal::Gradient<dim>,dim> (mapping,
+        dof_handler,
+        solution,
+        component,
+        derivative_norm);
+  }
+
+
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_gradient (const DoFHandlerType<dim,spacedim> &dof_handler,
+                        const InputVector                  &solution,
+                        Vector<float>                      &derivative_norm,
+                        const unsigned int                  component)
+  {
+    internal::approximate_derivative<internal::Gradient<dim>,dim> (StaticMappingQ1<dim>::mapping,
+        dof_handler,
+        solution,
+        component,
+        derivative_norm);
+  }
+
+
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_second_derivative (const Mapping<dim,spacedim>        &mapping,
+                                 const DoFHandlerType<dim,spacedim> &dof_handler,
+                                 const InputVector                  &solution,
+                                 Vector<float>                      &derivative_norm,
+                                 const unsigned int                  component)
+  {
+    internal::approximate_derivative<internal::SecondDerivative<dim>,dim> (mapping,
+        dof_handler,
+        solution,
+        component,
+        derivative_norm);
+  }
+
+
+  template <int dim, template <int, int> class DoFHandlerType, class InputVector, int spacedim>
+  void
+  approximate_second_derivative (const DoFHandlerType<dim,spacedim> &dof_handler,
+                                 const InputVector                  &solution,
+                                 Vector<float>                      &derivative_norm,
+                                 const unsigned int                  component)
+  {
+    internal::approximate_derivative<internal::SecondDerivative<dim>,dim> (StaticMappingQ1<dim>::mapping,
+        dof_handler,
+        solution,
+        component,
+        derivative_norm);
+  }
+
+
+  template <typename DoFHandlerType, int dim, int spacedim, class InputVector, int order>
+  void
+  approximate_derivative_tensor
+  (const Mapping<dim, spacedim>                        &mapping,
+   const DoFHandlerType                                &dof,
+   const InputVector                                   &solution,
+#ifndef _MSC_VER
+   const typename DoFHandlerType::active_cell_iterator &cell,
+#else
+   const TriaActiveIterator < dealii::DoFCellAccessor < DoFHandlerType, false > > &cell,
+#endif
+   Tensor<order, dim>                                  &derivative,
+   const unsigned int                                   component)
+  {
+    internal::approximate_cell<typename internal::DerivativeSelector<order,DoFHandlerType::dimension>::DerivDescr>
+    (mapping,
+     dof,
+     solution,
+     component,
+     cell,
+     derivative);
+  }
+
+
+
+  template <typename DoFHandlerType, int dim, int spacedim, class InputVector, int order>
+  void
+  approximate_derivative_tensor
+  (const DoFHandlerType                                &dof,
+   const InputVector                                   &solution,
+#ifndef _MSC_VER
+   const typename DoFHandlerType::active_cell_iterator &cell,
+#else
+   const TriaActiveIterator < dealii::DoFCellAccessor < DoFHandlerType, false > > &cell,
+#endif
+   Tensor<order, dim>                                  &derivative,
+   const unsigned int                                   component)
+  {
+    // just call the respective function with Q1 mapping
+    approximate_derivative_tensor<DoFHandlerType, dim, spacedim, InputVector, order>
+    (StaticMappingQ1<dim, spacedim>::mapping,
+     dof,
+     solution,
+     cell,
+     derivative,
+     component);
+  }
+
+
+
+
+
+  template <int dim, int order>
+  double
+  derivative_norm (const Tensor<order,dim> &derivative)
+  {
+    return internal::DerivativeSelector<order,dim>::DerivDescr::derivative_norm(derivative);
+  }
+
+}
+
+
+// --------------------------- explicit instantiations ---------------------
+#include "derivative_approximation.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/derivative_approximation.inst.in b/source/numerics/derivative_approximation.inst.in
new file mode 100644
index 0000000..044f75f
--- /dev/null
+++ b/source/numerics/derivative_approximation.inst.in
@@ -0,0 +1,133 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS ; VEC : SERIAL_VECTORS ; DH : DOFHANDLER_TEMPLATES)
+{
+  namespace DerivativeApproximation
+  \{
+template
+void
+approximate_gradient<deal_II_dimension>
+(const Mapping<deal_II_dimension> &mapping,
+ const DH<deal_II_dimension> &dof_handler,
+ const VEC             &solution,
+ Vector<float>         &derivative_norm,
+ const unsigned int     component);
+
+template
+void
+approximate_gradient<deal_II_dimension>
+(const DH<deal_II_dimension> &dof_handler,
+ const VEC             &solution,
+ Vector<float>         &derivative_norm,
+ const unsigned int     component);
+
+template
+void
+approximate_second_derivative<deal_II_dimension>
+(const Mapping<deal_II_dimension> &mapping,
+ const DH<deal_II_dimension> &dof_handler,
+ const VEC             &solution,
+ Vector<float>         &derivative_norm,
+ const unsigned int     component);
+
+template
+void
+approximate_second_derivative<deal_II_dimension>
+(const DH<deal_II_dimension> &dof_handler,
+ const VEC             &solution,
+ Vector<float>         &derivative_norm,
+ const unsigned int     component);
+
+template
+void
+approximate_derivative_tensor < DH <deal_II_dimension>, deal_II_dimension, deal_II_dimension, VEC, 1 >
+(const Mapping<deal_II_dimension> &mapping,
+ const DH <deal_II_dimension> &dof_handler,
+ const VEC &solution,
+ const DH <deal_II_dimension>::active_cell_iterator &cell,
+ Tensor<1,deal_II_dimension> &derivative,
+ const unsigned int     component);
+
+template
+void
+approximate_derivative_tensor < DH <deal_II_dimension>, deal_II_dimension, deal_II_dimension, VEC, 2 >
+(const Mapping <deal_II_dimension> &mapping,
+ const DH <deal_II_dimension> &dof_handler,
+ const VEC &solution,
+ const DH <deal_II_dimension>::active_cell_iterator &cell,
+ Tensor<2,deal_II_dimension> &derivative,
+ const unsigned int     component);
+
+template
+void
+approximate_derivative_tensor < DH <deal_II_dimension>, deal_II_dimension, deal_II_dimension, VEC, 3 >
+(const Mapping <deal_II_dimension> &mapping,
+ const DH <deal_II_dimension> &dof_handler,
+ const VEC &solution,
+ const DH <deal_II_dimension>::active_cell_iterator &cell,
+ Tensor<3,deal_II_dimension> &derivative,
+ const unsigned int     component);
+
+
+template
+void
+approximate_derivative_tensor < DH <deal_II_dimension>, deal_II_dimension, deal_II_dimension, VEC, 1 >
+(const DH <deal_II_dimension> &dof_handler,
+ const VEC &solution,
+ const DH <deal_II_dimension>::active_cell_iterator &cell,
+ Tensor<1,deal_II_dimension> &derivative,
+ const unsigned int     component);
+
+template
+void
+approximate_derivative_tensor < DH <deal_II_dimension>, deal_II_dimension, deal_II_dimension, VEC, 2 >
+(const DH <deal_II_dimension> &dof_handler,
+ const VEC &solution,
+ const DH <deal_II_dimension>::active_cell_iterator &cell,
+ Tensor<2,deal_II_dimension> &derivative,
+ const unsigned int     component);
+
+template
+void
+approximate_derivative_tensor < DH <deal_II_dimension>, deal_II_dimension, deal_II_dimension, VEC, 3 >
+(const DH <deal_II_dimension> &dof_handler,
+ const VEC &solution,
+ const DH <deal_II_dimension>::active_cell_iterator &cell,
+ Tensor<3,deal_II_dimension> &derivative,
+ const unsigned int     component);
+
+  \}
+}
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  namespace DerivativeApproximation
+  \{
+template
+double
+derivative_norm(const Tensor<1,deal_II_dimension> &derivative);
+
+template
+double
+derivative_norm(const Tensor<2,deal_II_dimension> &derivative);
+
+template
+double
+derivative_norm(const Tensor<3,deal_II_dimension> &derivative);
+  \}
+}
diff --git a/source/numerics/dof_output_operator.cc b/source/numerics/dof_output_operator.cc
new file mode 100644
index 0000000..c91b21b
--- /dev/null
+++ b/source/numerics/dof_output_operator.cc
@@ -0,0 +1,40 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/base/parameter_handler.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/vector_memory.h>
+
+#include <deal.II/numerics/dof_output_operator.templates.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Algorithms
+{
+#include "dof_output_operator.inst"
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/dof_output_operator.inst.in b/source/numerics/dof_output_operator.inst.in
new file mode 100644
index 0000000..8b62589
--- /dev/null
+++ b/source/numerics/dof_output_operator.inst.in
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{
+  template class DoFOutputOperator<VEC, deal_II_dimension, deal_II_dimension>;
+}
diff --git a/source/numerics/error_estimator.cc b/source/numerics/error_estimator.cc
new file mode 100644
index 0000000..b094468
--- /dev/null
+++ b/source/numerics/error_estimator.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/numerics/error_estimator.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#define SPLIT_INSTANTIATIONS_COUNT 2
+#define SPLIT_INSTANTIATIONS_INDEX 0
+#include "error_estimator.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/error_estimator.inst.in b/source/numerics/error_estimator.inst.in
new file mode 100644
index 0000000..bdf76ec
--- /dev/null
+++ b/source/numerics/error_estimator.inst.in
@@ -0,0 +1,157 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS ; deal_II_space_dimension : SPACE_DIMENSIONS)
+{
+#if deal_II_dimension != 1 && deal_II_dimension <= deal_II_space_dimension
+template class KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>;
+#endif
+}
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension : SPACE_DIMENSIONS; DH : DOFHANDLER_TEMPLATES )
+{
+#if deal_II_dimension != 1 && deal_II_dimension <= deal_II_space_dimension
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id,
+          const KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::Strategy);
+
+#endif
+}
diff --git a/source/numerics/error_estimator_1d.cc b/source/numerics/error_estimator_1d.cc
new file mode 100644
index 0000000..3420168
--- /dev/null
+++ b/source/numerics/error_estimator_1d.cc
@@ -0,0 +1,456 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/fe/fe_update_flags.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/hp/q_collection.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/distributed/tria.h>
+
+#include <deal.II/base/std_cxx11/bind.h>
+
+#include <numeric>
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<1,spacedim>::
+estimate (const Mapping<1,spacedim>                  &mapping,
+          const DoFHandlerType                       &dof_handler,
+          const Quadrature<0>                        &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id)
+{
+  // just pass on to the other function
+  const std::vector<const InputVector *> solutions (1, &solution);
+  std::vector<Vector<float>*>              errors (1, &error);
+  estimate (mapping, dof_handler, quadrature, neumann_bc, solutions, errors,
+            component_mask, coefficients, n_threads, subdomain_id, material_id);
+}
+
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<1,spacedim>::
+estimate (const DoFHandlerType                       &dof_handler,
+          const Quadrature<0>                        &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id)
+{
+  estimate(StaticMappingQ1<1,spacedim>::mapping, dof_handler, quadrature, neumann_bc, solution,
+           error, component_mask, coefficients, n_threads, subdomain_id, material_id);
+}
+
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<1,spacedim>::
+estimate (const DoFHandlerType                       &dof_handler,
+          const Quadrature<0>                        &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const std::vector<const InputVector *>     &solutions,
+          std::vector<Vector<float>*>                &errors,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id)
+{
+  estimate(StaticMappingQ1<1,spacedim>::mapping, dof_handler, quadrature, neumann_bc, solutions,
+           errors, component_mask, coefficients, n_threads, subdomain_id, material_id);
+}
+
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<1,spacedim>::
+estimate (const Mapping<1,spacedim>                  &mapping,
+          const DoFHandlerType                       &dof_handler,
+          const hp::QCollection<0>                   &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id)
+{
+  // just pass on to the other function
+  const std::vector<const InputVector *> solutions (1, &solution);
+  std::vector<Vector<float>*>              errors (1, &error);
+  estimate (mapping, dof_handler, quadrature, neumann_bc, solutions, errors,
+            component_mask, coefficients, n_threads, subdomain_id, material_id);
+}
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<1,spacedim>::
+estimate (const DoFHandlerType                       &dof_handler,
+          const hp::QCollection<0>                   &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const InputVector                          &solution,
+          Vector<float>                              &error,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id)
+{
+  estimate(StaticMappingQ1<1,spacedim>::mapping, dof_handler, quadrature, neumann_bc, solution,
+           error, component_mask, coefficients, n_threads, subdomain_id, material_id);
+}
+
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void
+KellyErrorEstimator<1,spacedim>::
+estimate (const DoFHandlerType                       &dof_handler,
+          const hp::QCollection<0>                   &quadrature,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const std::vector<const InputVector *>     &solutions,
+          std::vector<Vector<float>*>                &errors,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficients,
+          const unsigned int                          n_threads,
+          const types::subdomain_id                   subdomain_id,
+          const types::material_id                    material_id)
+{
+  estimate(StaticMappingQ1<1,spacedim>::mapping, dof_handler, quadrature, neumann_bc, solutions,
+           errors, component_mask, coefficients, n_threads, subdomain_id, material_id);
+}
+
+
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void KellyErrorEstimator<1,spacedim>::
+estimate (const Mapping<1,spacedim>                  & /*mapping*/,
+          const DoFHandlerType                       & /*dof_handler*/,
+          const hp::QCollection<0> &,
+          const typename FunctionMap<spacedim>::type & /*neumann_bc*/,
+          const std::vector<const InputVector *>     & /*solutions*/,
+          std::vector<Vector<float>*>                & /*errors*/,
+          const ComponentMask                        & /*component_mask_*/,
+          const Function<spacedim>                   * /*coefficient*/,
+          const unsigned int,
+          const types::subdomain_id                    /*subdomain_id*/,
+          const types::material_id                     /*material_id*/)
+{
+  Assert (false, ExcInternalError());
+}
+
+
+
+template <int spacedim>
+template <typename InputVector, typename DoFHandlerType>
+void KellyErrorEstimator<1,spacedim>::
+estimate (const Mapping<1,spacedim>                  &mapping,
+          const DoFHandlerType                       &dof_handler,
+          const Quadrature<0> &,
+          const typename FunctionMap<spacedim>::type &neumann_bc,
+          const std::vector<const InputVector *>     &solutions,
+          std::vector<Vector<float>*>                &errors,
+          const ComponentMask                        &component_mask,
+          const Function<spacedim>                   *coefficient,
+          const unsigned int,
+          const types::subdomain_id                   subdomain_id_,
+          const types::material_id                    material_id)
+{
+#ifdef DEAL_II_WITH_P4EST
+  if (dynamic_cast<const parallel::distributed::Triangulation<1,spacedim>*>
+      (&dof_handler.get_triangulation())
+      != 0)
+    Assert ((subdomain_id_ == numbers::invalid_subdomain_id)
+            ||
+            (subdomain_id_ ==
+             dynamic_cast<const parallel::distributed::Triangulation<1,spacedim>&>
+             (dof_handler.get_triangulation()).locally_owned_subdomain()),
+            ExcMessage ("For parallel distributed triangulations, the only "
+                        "valid subdomain_id that can be passed here is the "
+                        "one that corresponds to the locally owned subdomain id."));
+
+  const types::subdomain_id subdomain_id
+    = ((dynamic_cast<const parallel::distributed::Triangulation<1,spacedim>*>
+        (&dof_handler.get_triangulation())
+        != 0)
+       ?
+       dynamic_cast<const parallel::distributed::Triangulation<1,spacedim>&>
+       (dof_handler.get_triangulation()).locally_owned_subdomain()
+       :
+       subdomain_id_);
+#else
+  const types::subdomain_id subdomain_id
+    = subdomain_id_;
+#endif
+
+  const unsigned int n_components       = dof_handler.get_fe().n_components();
+  const unsigned int n_solution_vectors = solutions.size();
+
+  // sanity checks
+  Assert (neumann_bc.find(numbers::internal_face_boundary_id) == neumann_bc.end(),
+          ExcMessage("You are not allowed to list the special boundary "
+                     "indicator for internal boundaries in your boundary "
+                     "value map."));
+
+  for (typename FunctionMap<spacedim>::type::const_iterator i=neumann_bc.begin();
+       i!=neumann_bc.end(); ++i)
+    Assert (i->second->n_components == n_components,
+            ExcInvalidBoundaryFunction(i->first,
+                                       i->second->n_components,
+                                       n_components));
+
+  Assert (component_mask.represents_n_components(n_components),
+          ExcInvalidComponentMask());
+  Assert (component_mask.n_selected_components(n_components) > 0,
+          ExcInvalidComponentMask());
+
+  Assert ((coefficient == 0) ||
+          (coefficient->n_components == n_components) ||
+          (coefficient->n_components == 1),
+          ExcInvalidCoefficient());
+
+  Assert (solutions.size() > 0,
+          ExcNoSolutions());
+  Assert (solutions.size() == errors.size(),
+          ExcIncompatibleNumberOfElements(solutions.size(), errors.size()));
+  for (unsigned int n=0; n<solutions.size(); ++n)
+    Assert (solutions[n]->size() == dof_handler.n_dofs(),
+            ExcDimensionMismatch(solutions[n]->size(),
+                                 dof_handler.n_dofs()));
+
+  Assert ((coefficient == 0) ||
+          (coefficient->n_components == n_components) ||
+          (coefficient->n_components == 1),
+          ExcInvalidCoefficient());
+
+  for (typename FunctionMap<spacedim>::type::const_iterator i=neumann_bc.begin();
+       i!=neumann_bc.end(); ++i)
+    Assert (i->second->n_components == n_components,
+            ExcInvalidBoundaryFunction(i->first,
+                                       i->second->n_components,
+                                       n_components));
+
+  // reserve one slot for each cell and set it to zero
+  for (unsigned int n=0; n<n_solution_vectors; ++n)
+    (*errors[n]).reinit (dof_handler.get_triangulation().n_active_cells());
+
+  // fields to get the gradients on the present and the neighbor cell.
+  //
+  // for the neighbor gradient, we need several auxiliary fields, depending on
+  // the way we get it (see below)
+  std::vector<std::vector<std::vector<Tensor<1,spacedim,typename InputVector::value_type> > > >
+  gradients_here (n_solution_vectors,
+                  std::vector<std::vector<Tensor<1,spacedim,typename InputVector::value_type> > >(2, std::vector<Tensor<1,spacedim,typename InputVector::value_type> >(n_components)));
+  std::vector<std::vector<std::vector<Tensor<1,spacedim,typename InputVector::value_type> > > >
+  gradients_neighbor (gradients_here);
+  std::vector<Vector<typename InputVector::value_type> >
+  grad_neighbor (n_solution_vectors, Vector<typename InputVector::value_type>(n_components));
+
+  // reserve some space for coefficient values at one point.  if there is no
+  // coefficient, then we fill it by unity once and for all and don't set it
+  // any more
+  Vector<double> coefficient_values (n_components);
+  if (coefficient == 0)
+    for (unsigned int c=0; c<n_components; ++c)
+      coefficient_values(c) = 1;
+
+  const QTrapez<1> quadrature;
+  const hp::QCollection<1> q_collection(quadrature);
+  const QGauss<0> face_quadrature(1);
+  const hp::QCollection<0> q_face_collection(face_quadrature);
+
+  const hp::FECollection<1,spacedim> fe (dof_handler.get_fe());
+
+  hp::MappingCollection<1,spacedim> mapping_collection;
+  mapping_collection.push_back (mapping);
+
+  hp::FEValues<1,spacedim> fe_values (mapping_collection, fe, q_collection,
+                                      update_gradients);
+  hp::FEFaceValues<1,spacedim> fe_face_values (/*mapping_collection,*/ fe, q_face_collection,
+      update_normal_vectors);
+
+  // loop over all cells and do something on the cells which we're told to
+  // work on. note that the error indicator is only a sum over the two
+  // contributions from the two vertices of each cell.
+  for (typename DoFHandlerType::active_cell_iterator cell = dof_handler.begin_active();
+       cell != dof_handler.end();
+       ++cell)
+    if (((subdomain_id == numbers::invalid_subdomain_id)
+         ||
+         (cell->subdomain_id() == subdomain_id))
+        &&
+        ((material_id == numbers::invalid_material_id)
+         ||
+         (cell->material_id() == material_id)))
+      {
+        for (unsigned int n=0; n<n_solution_vectors; ++n)
+          (*errors[n])(cell->active_cell_index()) = 0;
+
+        fe_values.reinit (cell);
+        for (unsigned int s=0; s<n_solution_vectors; ++s)
+          fe_values.get_present_fe_values()
+          .get_function_gradients (*solutions[s], gradients_here[s]);
+
+        // loop over the two points bounding this line. n==0 is left point,
+        // n==1 is right point
+        for (unsigned int n=0; n<2; ++n)
+          {
+            // find left or right active neighbor
+            typename DoFHandlerType::cell_iterator neighbor = cell->neighbor(n);
+            if (neighbor.state() == IteratorState::valid)
+              while (neighbor->has_children())
+                neighbor = neighbor->child(n==0 ? 1 : 0);
+
+            fe_face_values.reinit (cell, n);
+            Tensor<1,spacedim> normal =
+              fe_face_values.get_present_fe_values().get_all_normal_vectors()[0];
+
+            if (neighbor.state() == IteratorState::valid)
+              {
+                fe_values.reinit (neighbor);
+
+                for (unsigned int s=0; s<n_solution_vectors; ++s)
+                  fe_values.get_present_fe_values()
+                  .get_function_gradients (*solutions[s],
+                                           gradients_neighbor[s]);
+
+                fe_face_values.reinit (neighbor, n==0 ? 1 : 0);
+                Tensor<1,spacedim> neighbor_normal =
+                  fe_face_values.get_present_fe_values().get_all_normal_vectors()[0];
+
+                // extract the gradient in normal direction of all the components.
+                for (unsigned int s=0; s<n_solution_vectors; ++s)
+                  for (unsigned int c=0; c<n_components; ++c)
+                    grad_neighbor[s](c)
+                      = - (gradients_neighbor[s][n==0 ? 1 : 0][c]*neighbor_normal);
+              }
+            else if (neumann_bc.find(n) != neumann_bc.end())
+              // if Neumann b.c., then fill the gradients field which will be
+              // used later on.
+              {
+                if (n_components==1)
+                  {
+                    const double
+                    v = neumann_bc.find(n)->second->value(cell->vertex(n));
+
+                    for (unsigned int s=0; s<n_solution_vectors; ++s)
+                      grad_neighbor[s](0) = v;
+                  }
+                else
+                  {
+                    Vector<double> v(n_components);
+                    neumann_bc.find(n)->second->vector_value(cell->vertex(n), v);
+
+                    for (unsigned int s=0; s<n_solution_vectors; ++s)
+                      grad_neighbor[s] = v;
+                  }
+              }
+            else
+              // fill with zeroes.
+              for (unsigned int s=0; s<n_solution_vectors; ++s)
+                grad_neighbor[s] = 0;
+
+            // if there is a coefficient, then evaluate it at the present
+            // position. if there is none, reuse the preset values.
+            if (coefficient != 0)
+              {
+                if (coefficient->n_components == 1)
+                  {
+                    const double c_value = coefficient->value (cell->vertex(n));
+                    for (unsigned int c=0; c<n_components; ++c)
+                      coefficient_values(c) = c_value;
+                  }
+                else
+                  coefficient->vector_value(cell->vertex(n),
+                                            coefficient_values);
+              }
+
+
+            for (unsigned int s=0; s<n_solution_vectors; ++s)
+              for (unsigned int component=0; component<n_components; ++component)
+                if (component_mask[component] == true)
+                  {
+                    // get gradient here
+                    const double grad_here = gradients_here[s][n][component]
+                                             * normal;
+
+                    const double jump = ((grad_here - grad_neighbor[s](component)) *
+                                         coefficient_values(component));
+                    (*errors[s])(cell->active_cell_index()) += jump*jump * cell->diameter();
+                  }
+          }
+
+        for (unsigned int s=0; s<n_solution_vectors; ++s)
+          (*errors[s])(cell->active_cell_index()) = std::sqrt((*errors[s])(cell->active_cell_index()));
+      }
+}
+
+
+// explicit instantiations
+#include "error_estimator_1d.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/error_estimator_1d.inst.in b/source/numerics/error_estimator_1d.inst.in
new file mode 100644
index 0000000..5a54f24
--- /dev/null
+++ b/source/numerics/error_estimator_1d.inst.in
@@ -0,0 +1,142 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension : SPACE_DIMENSIONS; DH : DOFHANDLER_TEMPLATES )
+{
+#if deal_II_dimension == 1 && deal_II_dimension <= deal_II_space_dimension
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const Quadrature<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const VEC       &,
+          Vector<float>           &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (const Mapping<deal_II_dimension, deal_II_space_dimension>      &,
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+template
+void
+KellyErrorEstimator<deal_II_dimension, deal_II_space_dimension>::
+estimate<VEC,DH<deal_II_dimension,deal_II_space_dimension> > (
+          const DH<deal_II_dimension,deal_II_space_dimension>   &,
+          const hp::QCollection<deal_II_dimension-1> &,
+          const FunctionMap<deal_II_space_dimension>::type &,
+          const std::vector<const VEC *>       &,
+          std::vector<Vector<float> *>         &,
+          const ComponentMask &,
+          const Function<deal_II_space_dimension>     *,
+          const unsigned int       ,
+          const unsigned int       ,
+          const types::material_id);
+
+#endif
+}
diff --git a/source/numerics/error_estimator_inst2.cc b/source/numerics/error_estimator_inst2.cc
new file mode 100644
index 0000000..8bf9900
--- /dev/null
+++ b/source/numerics/error_estimator_inst2.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/numerics/error_estimator.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+#define SPLIT_INSTANTIATIONS_COUNT 2
+#define SPLIT_INSTANTIATIONS_INDEX 1
+#include "error_estimator.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/fe_field_function.cc b/source/numerics/fe_field_function.cc
new file mode 100644
index 0000000..12e2eec
--- /dev/null
+++ b/source/numerics/fe_field_function.cc
@@ -0,0 +1,38 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/fe_field_function.templates.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/hp/dof_handler.h>
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace Functions
+{
+#  include "fe_field_function.inst"
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/fe_field_function.inst.in b/source/numerics/fe_field_function.inst.in
new file mode 100644
index 0000000..50660e2
--- /dev/null
+++ b/source/numerics/fe_field_function.inst.in
@@ -0,0 +1,27 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2007 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (VECTOR : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{
+  template class FEFieldFunction<deal_II_dimension,
+                                 DoFHandler<deal_II_dimension>,
+                                 VECTOR>;
+
+  template class FEFieldFunction<deal_II_dimension,
+                                 hp::DoFHandler<deal_II_dimension>,
+                                 VECTOR>;
+}
diff --git a/source/numerics/histogram.cc b/source/numerics/histogram.cc
new file mode 100644
index 0000000..b81c580
--- /dev/null
+++ b/source/numerics/histogram.cc
@@ -0,0 +1,348 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/numerics/histogram.h>
+#include <algorithm>
+#include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+template <typename number>
+bool Histogram::logarithmic_less (const number n1,
+                                  const number n2)
+{
+  return (((n1<n2) && (n1>0)) ||
+          ((n1<n2) && (n2<=0)) ||
+          ((n2<n1) && (n1>0) && (n2<=0)));
+}
+
+
+
+Histogram::Interval::Interval (const double left_point,
+                               const double right_point) :
+  left_point (left_point),
+  right_point (right_point),
+  content (0)
+{}
+
+
+
+std::size_t
+Histogram::Interval::memory_consumption () const
+{
+  return sizeof(*this);
+}
+
+
+
+template <typename number>
+void Histogram::evaluate (const std::vector<Vector<number> > &values,
+                          const std::vector<double>          &y_values_,
+                          const unsigned int                  n_intervals,
+                          const IntervalSpacing               interval_spacing)
+{
+  Assert (values.size() > 0,
+          ExcMessage("Your input data needs to contain at least one input vector."));
+  Assert (n_intervals > 0,
+          ExcMessage("The number of intervals needs to be at least one."));
+  for (unsigned int i=0; i<values.size(); ++i)
+    Assert (values[i].size() > 0, ExcEmptyData());
+  Assert (values.size() == y_values_.size(),
+          ExcIncompatibleArraySize(values.size(), y_values_.size()));
+
+  // store y_values
+  y_values = y_values_;
+
+  // first find minimum and maximum value
+  // in the indicators
+  number min_value=0, max_value=0;
+  switch (interval_spacing)
+    {
+    case linear:
+    {
+      min_value = *std::min_element(values[0].begin(),
+                                    values[0].end());
+      max_value = *std::max_element(values[0].begin(),
+                                    values[0].end());
+
+      for (unsigned int i=1; i<values.size(); ++i)
+        {
+          min_value = std::min (min_value,
+                                *std::min_element(values[i].begin(),
+                                                  values[i].end()));
+          max_value = std::max (max_value,
+                                *std::max_element(values[i].begin(),
+                                                  values[i].end()));
+        };
+
+      break;
+    };
+
+    case logarithmic:
+    {
+      typedef bool (*comparator) (const number, const number);
+      const comparator logarithmic_less_function
+        = &Histogram::template logarithmic_less<number>;
+
+      min_value = *std::min_element(values[0].begin(),
+                                    values[0].end(),
+                                    logarithmic_less_function);
+
+      max_value = *std::max_element(values[0].begin(),
+                                    values[0].end(),
+                                    logarithmic_less_function);
+
+      for (unsigned int i=1; i<values.size(); ++i)
+        {
+          min_value = std::min (min_value,
+                                *std::min_element(values[i].begin(),
+                                                  values[i].end(),
+                                                  logarithmic_less_function),
+                                logarithmic_less_function);
+
+          max_value = std::max (max_value,
+                                *std::max_element(values[i].begin(),
+                                                  values[i].end(),
+                                                  logarithmic_less_function),
+                                logarithmic_less_function);
+        }
+
+      break;
+    }
+
+    default:
+      Assert (false, ExcInternalError());
+    }
+
+  // move right bound arbitrarily if
+  // necessary. sometimes in logarithmic
+  // mode, max_value may be larger than
+  // min_value, but only up to rounding
+  // precision.
+  if (max_value <= min_value)
+    max_value = min_value+1;
+
+
+  // now set up the intervals based on
+  // the min and max values
+  intervals.clear ();
+  // set up one list of intervals
+  // for the first data vector. we will
+  // then produce all the other lists
+  // for the other data vectors by
+  // copying
+  intervals.push_back (std::vector<Interval>());
+
+  switch (interval_spacing)
+    {
+    case linear:
+    {
+      const float delta = (max_value-min_value)/n_intervals;
+
+      for (unsigned int n=0; n<n_intervals; ++n)
+        intervals[0].push_back (Interval(min_value+n*delta,
+                                         min_value+(n+1)*delta));
+
+      break;
+    };
+
+    case logarithmic:
+    {
+      const float delta = (std::log(max_value)-std::log(min_value))/n_intervals;
+
+      for (unsigned int n=0; n<n_intervals; ++n)
+        intervals[0].push_back (Interval(std::exp(std::log(min_value)+n*delta),
+                                         std::exp(std::log(min_value)+(n+1)*delta)));
+
+      break;
+    };
+
+    default:
+      Assert (false, ExcInternalError());
+    };
+
+  // fill the other lists of intervals
+  for (unsigned int i=1; i<values.size(); ++i)
+    intervals.push_back (intervals[0]);
+
+
+  // finally fill the intervals
+  for (unsigned int i=0; i<values.size(); ++i)
+    for (typename Vector<number>::const_iterator p=values[i].begin();
+         p < values[i].end(); ++p)
+      {
+        // find the right place for *p in
+        // intervals[i]. use regular
+        // operator< here instead of
+        // the logarithmic one to
+        // map negative or zero value
+        // to the leftmost interval always
+        for (unsigned int n=0; n<n_intervals; ++n)
+          if (*p <= intervals[i][n].right_point)
+            {
+              ++intervals[i][n].content;
+              break;
+            };
+      };
+}
+
+
+
+template <typename number>
+void Histogram::evaluate (const Vector<number>    &values,
+                          const unsigned int       n_intervals,
+                          const IntervalSpacing    interval_spacing)
+{
+  std::vector<Vector<number> > values_list (1,
+                                            values);
+  evaluate (values_list, std::vector<double>(1,0.), n_intervals, interval_spacing);
+}
+
+
+
+void Histogram::write_gnuplot (std::ostream &out) const
+{
+  AssertThrow (out, ExcIO());
+  Assert (!intervals.empty(),
+          ExcMessage("There is nothing to write into the output file. "
+                     "Did you forget to call the evaluate() function?"));
+
+  // do a simple 2d plot, if only
+  // one data set is available
+  if (intervals.size()==1)
+    {
+      for (unsigned int n=0; n<intervals[0].size(); ++n)
+        out << intervals[0][n].left_point
+            << ' '
+            << intervals[0][n].content
+            << std::endl
+            << intervals[0][n].right_point
+            << ' '
+            << intervals[0][n].content
+            << std::endl;
+    }
+  else
+    // otherwise create a whole 3d plot
+    // for the data. use th patch method
+    // of gnuplot for this
+    //
+    // run this loop backwards since otherwise
+    // gnuplot thinks the upper side is the
+    // lower side and draws the diagram in
+    // strange colors
+    for (int i=intervals.size()-1; i>=0; --i)
+      {
+        for (unsigned int n=0; n<intervals[i].size(); ++n)
+          out << intervals[i][n].left_point
+              << ' '
+              << (i<static_cast<int>(intervals.size())-1 ?
+                  y_values[i+1] :
+                  y_values[i] + (y_values[i]-y_values[i-1]))
+              << ' '
+              << intervals[i][n].content
+              << std::endl
+              << intervals[i][n].right_point
+              << ' '
+              << (i<static_cast<int>(intervals.size())-1 ?
+                  y_values[i+1] :
+                  y_values[i] + (y_values[i]-y_values[i-1]))
+              << ' '
+              << intervals[i][n].content
+              << std::endl;
+
+        out << std::endl;
+        for (unsigned int n=0; n<intervals[i].size(); ++n)
+          out << intervals[i][n].left_point
+              << ' '
+              << y_values[i]
+              << ' '
+              << intervals[i][n].content
+              << std::endl
+              << intervals[i][n].right_point
+              << ' '
+              << y_values[i]
+              << ' '
+              << intervals[i][n].content
+              << std::endl;
+
+        out << std::endl;
+
+      };
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+std::string Histogram::get_interval_spacing_names ()
+{
+  return "linear|logarithmic";
+}
+
+
+
+Histogram::IntervalSpacing
+Histogram::parse_interval_spacing (const std::string &name)
+{
+  if (name=="linear")
+    return linear;
+  else if (name=="logarithmic")
+    return logarithmic;
+  else
+    {
+      AssertThrow (false, ExcInvalidName(name));
+
+      return linear;
+    };
+}
+
+
+
+std::size_t
+Histogram::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (intervals) +
+          MemoryConsumption::memory_consumption (y_values));
+}
+
+
+
+// explicit instantiations for float
+template
+void Histogram::evaluate<float> (const std::vector<Vector<float> > &values,
+                                 const std::vector<double>                  &y_values,
+                                 const unsigned int                          n_intervals,
+                                 const IntervalSpacing                       interval_spacing);
+template
+void Histogram::evaluate<float> (const Vector<float>   &values,
+                                 const unsigned int     n_intervals,
+                                 const IntervalSpacing  interval_spacing);
+
+
+// explicit instantiations for double
+template
+void Histogram::evaluate<double> (const std::vector<Vector<double> >  &values,
+                                  const std::vector<double>                    &y_values,
+                                  const unsigned int                            n_intervals,
+                                  const IntervalSpacing                         interval_spacing);
+template
+void Histogram::evaluate<double> (const Vector<double>   &values,
+                                  const unsigned int      n_intervals,
+                                  const IntervalSpacing   interval_spacing);
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/matrix_creator.cc b/source/numerics/matrix_creator.cc
new file mode 100644
index 0000000..1bb6205
--- /dev/null
+++ b/source/numerics/matrix_creator.cc
@@ -0,0 +1,28 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/numerics/matrix_creator.templates.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+// explicit instantiations
+#define SPLIT_INSTANTIATIONS_COUNT 3
+#ifndef SPLIT_INSTANTIATIONS_INDEX
+#define SPLIT_INSTANTIATIONS_INDEX 0
+#endif
+#include "matrix_creator.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/matrix_creator.inst.in b/source/numerics/matrix_creator.inst.in
new file mode 100644
index 0000000..a84cafc
--- /dev/null
+++ b/source/numerics/matrix_creator.inst.in
@@ -0,0 +1,315 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+
+// non-hp version of create_mass_matrix
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,double,deal_II_space_dimension>
+      (const Mapping<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,double,deal_II_space_dimension>
+      (const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,double,deal_II_space_dimension>
+      (const Mapping<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,double,deal_II_space_dimension>
+      (const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+
+    template
+      void MatrixCreator::create_boundary_mass_matrix<deal_II_dimension,deal_II_space_dimension>
+      (const DoFHandler<deal_II_dimension,deal_II_space_dimension>     &dof,
+       const Quadrature<deal_II_dimension-1>   &q,
+       SparseMatrix<double>      &matrix,
+       const FunctionMap<deal_II_space_dimension>::type &rhs,
+       Vector<double>            &rhs_vector,
+       std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+       const Function<deal_II_space_dimension> * const a,
+       std::vector<unsigned int>);
+
+    template
+      void MatrixCreator::create_boundary_mass_matrix<deal_II_dimension,deal_II_space_dimension>
+      (const Mapping<deal_II_dimension,deal_II_space_dimension> &,
+       const DoFHandler<deal_II_dimension,deal_II_space_dimension>     &dof,
+       const Quadrature<deal_II_dimension-1>   &q,
+       SparseMatrix<double>      &matrix,
+       const FunctionMap<deal_II_space_dimension>::type &rhs,
+       Vector<double>            &rhs_vector,
+       std::vector<types::global_dof_index> &dof_to_boundary_mapping,
+       const Function<deal_II_space_dimension> * const a,
+       std::vector<unsigned int>);
+
+    template
+      void
+      MatrixCreator::create_boundary_mass_matrix<deal_II_dimension,deal_II_space_dimension>
+      (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>&,
+       const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+       const hp::QCollection<deal_II_dimension-1>&,
+       SparseMatrix<double>&,
+       const FunctionMap<deal_II_space_dimension>::type&,
+       Vector<double>&,
+       std::vector<types::global_dof_index>&,
+       const Function<deal_II_space_dimension> * const,
+       std::vector<unsigned int>);
+
+    template
+      void MatrixCreator::create_boundary_mass_matrix<deal_II_dimension,deal_II_space_dimension>
+      (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+       const hp::QCollection<deal_II_dimension-1>&,
+       SparseMatrix<double>&,
+       const FunctionMap<deal_II_space_dimension>::type&,
+       Vector<double>&,
+       std::vector<types::global_dof_index>&,
+       const Function<deal_II_space_dimension> * const,
+       std::vector<unsigned int>);
+
+// same for float
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,float,deal_II_space_dimension>
+      (const Mapping<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,float,deal_II_space_dimension>
+      (const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,float,deal_II_space_dimension>
+      (const Mapping<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<float>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,float,deal_II_space_dimension>
+      (const DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<float>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+#endif
+  }
+
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+// hp versions of functions
+#if deal_II_dimension <= deal_II_space_dimension
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,double,deal_II_space_dimension>
+      (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,double,deal_II_space_dimension>
+      (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+
+#endif
+
+#if deal_II_dimension == deal_II_space_dimension
+
+       
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension>
+      (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension>
+      (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+// non-hp versions of create_laplace_matrix
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const DoFHandler<deal_II_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const Mapping<deal_II_dimension>       &mapping,
+       const DoFHandler<deal_II_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const Mapping<deal_II_dimension>       &mapping,
+       const DoFHandler<deal_II_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const DoFHandler<deal_II_dimension>    &dof,
+       const Quadrature<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+// hp versions of create_laplace_matrix
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const hp::DoFHandler<deal_II_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const hp::MappingCollection<deal_II_dimension>       &mapping,
+       const hp::DoFHandler<deal_II_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const hp::MappingCollection<deal_II_dimension>       &mapping,
+       const hp::DoFHandler<deal_II_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+    template
+      void MatrixCreator::create_laplace_matrix<deal_II_dimension>
+      (const hp::DoFHandler<deal_II_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<double>     &matrix,
+       const Function<deal_II_dimension>      &rhs,
+       Vector<double>           &rhs_vector,
+       const Function<deal_II_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+#endif
+
+// same for float
+#if deal_II_dimension <= deal_II_space_dimension
+
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,float,deal_II_space_dimension>
+      (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension,float,deal_II_space_dimension>
+      (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>       &mapping,
+       const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<float>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+
+#endif
+
+#if deal_II_dimension == deal_II_space_dimension
+
+       
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension>
+      (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+    template
+      void MatrixCreator::create_mass_matrix<deal_II_dimension>
+      (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>    &dof,
+       const hp::QCollection<deal_II_dimension>    &q,
+       SparseMatrix<float>     &matrix,
+       const Function<deal_II_space_dimension>      &rhs,
+       Vector<float>           &rhs_vector,
+       const Function<deal_II_space_dimension> * const coefficient,
+       const ConstraintMatrix   &constraints);
+
+#endif
diff --git a/source/numerics/matrix_creator_inst2.cc b/source/numerics/matrix_creator_inst2.cc
new file mode 100644
index 0000000..73c34c5
--- /dev/null
+++ b/source/numerics/matrix_creator_inst2.cc
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// This file compiles a part of the instantiations from matrix_creator.cc
+// to reduce the compilation unit (and memory consumption)
+
+#define SPLIT_INSTANTIATIONS_INDEX 1
+#include "matrix_creator.cc"
diff --git a/source/numerics/matrix_creator_inst3.cc b/source/numerics/matrix_creator_inst3.cc
new file mode 100644
index 0000000..2cc0df2
--- /dev/null
+++ b/source/numerics/matrix_creator_inst3.cc
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2016 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// This file compiles a part of the instantiations from matrix_creator.cc
+// to reduce the compilation unit (and memory consumption)
+
+#define SPLIT_INSTANTIATIONS_INDEX 2
+#include "matrix_creator.cc"
diff --git a/source/numerics/matrix_tools.cc b/source/numerics/matrix_tools.cc
new file mode 100644
index 0000000..28b4675
--- /dev/null
+++ b/source/numerics/matrix_tools.cc
@@ -0,0 +1,615 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/function.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+#  include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#  include <deal.II/lac/petsc_sparse_matrix.h>
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_vector.h>
+#  include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_block_vector.h>
+#endif
+
+#include <algorithm>
+
+
+#include <algorithm>
+#include <set>
+#include <cmath>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MatrixTools
+{
+  namespace
+  {
+    template <typename Iterator>
+    bool column_less_than(const typename Iterator::value_type p,
+                          const unsigned int column)
+    {
+      return (p.column() < column);
+    }
+  }
+
+//TODO:[WB] I don't think that the optimized storage of diagonals is needed (GK)
+  template <typename number>
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         SparseMatrix<number>  &matrix,
+                         Vector<number>   &solution,
+                         Vector<number>   &right_hand_side,
+                         const bool        eliminate_columns)
+  {
+    Assert (matrix.n() == right_hand_side.size(),
+            ExcDimensionMismatch(matrix.n(), right_hand_side.size()));
+    Assert (matrix.n() == solution.size(),
+            ExcDimensionMismatch(matrix.n(), solution.size()));
+    Assert (matrix.n() == matrix.m(),
+            ExcDimensionMismatch(matrix.n(), matrix.m()));
+
+    // if no boundary values are to be applied
+    // simply return
+    if (boundary_values.size() == 0)
+      return;
+
+
+    const types::global_dof_index n_dofs = matrix.m();
+
+    // if a diagonal entry is zero
+    // later, then we use another
+    // number instead. take it to be
+    // the first nonzero diagonal
+    // element of the matrix, or 1 if
+    // there is no such thing
+    number first_nonzero_diagonal_entry = 1;
+    for (unsigned int i=0; i<n_dofs; ++i)
+      if (matrix.diag_element(i) != 0)
+        {
+          first_nonzero_diagonal_entry = matrix.diag_element(i);
+          break;
+        }
+
+
+    std::map<types::global_dof_index,double>::const_iterator dof  = boundary_values.begin(),
+                                                             endd = boundary_values.end();
+    for (; dof != endd; ++dof)
+      {
+        Assert (dof->first < n_dofs, ExcInternalError());
+
+        const types::global_dof_index dof_number = dof->first;
+        // for each boundary dof:
+
+        // set entries of this line to zero except for the diagonal
+        // entry
+        for (typename SparseMatrix<number>::iterator
+             p = matrix.begin(dof_number);
+             p != matrix.end(dof_number); ++p)
+          if (p->column() != dof_number)
+            p->value() = 0.;
+
+        // set right hand side to
+        // wanted value: if main diagonal
+        // entry nonzero, don't touch it
+        // and scale rhs accordingly. If
+        // zero, take the first main
+        // diagonal entry we can find, or
+        // one if no nonzero main diagonal
+        // element exists. Normally, however,
+        // the main diagonal entry should
+        // not be zero.
+        //
+        // store the new rhs entry to make
+        // the gauss step more efficient
+        number new_rhs;
+        if (matrix.diag_element(dof_number) != 0.0)
+          {
+            new_rhs = dof->second * matrix.diag_element(dof_number);
+            right_hand_side(dof_number) = new_rhs;
+          }
+        else
+          {
+            matrix.set (dof_number, dof_number,
+                        first_nonzero_diagonal_entry);
+            new_rhs = dof->second * first_nonzero_diagonal_entry;
+            right_hand_side(dof_number) = new_rhs;
+          }
+
+
+        // if the user wants to have
+        // the symmetry of the matrix
+        // preserved, and if the
+        // sparsity pattern is
+        // symmetric, then do a Gauss
+        // elimination step with the
+        // present row
+        if (eliminate_columns)
+          {
+            // store the only nonzero entry
+            // of this line for the Gauss
+            // elimination step
+            const number diagonal_entry = matrix.diag_element(dof_number);
+
+            // we have to loop over all rows of the matrix which have
+            // a nonzero entry in the column which we work in
+            // presently. if the sparsity pattern is symmetric, then
+            // we can get the positions of these rows cheaply by
+            // looking at the nonzero column numbers of the present
+            // row. we need not look at the first entry of each row,
+            // since that is the diagonal element and thus the present
+            // row
+            for (typename SparseMatrix<number>::iterator
+                 q = matrix.begin(dof_number)+1;
+                 q != matrix.end(dof_number); ++q)
+              {
+                const types::global_dof_index row = q->column();
+
+                // find the position of
+                // element
+                // (row,dof_number)
+                bool (*comp)(const typename SparseMatrix<number>::iterator::value_type p,
+                             const unsigned int column)
+                  = &column_less_than<typename SparseMatrix<number>::iterator>;
+                const typename SparseMatrix<number>::iterator
+                p = Utilities::lower_bound(matrix.begin(row)+1,
+                                           matrix.end(row),
+                                           dof_number,
+                                           comp);
+
+                // check whether this line has an entry in the
+                // regarding column (check for ==dof_number and !=
+                // next_row, since if row==dof_number-1, *p is a
+                // past-the-end pointer but points to dof_number
+                // anyway...)
+                //
+                // there should be such an entry! we know this because
+                // we have assumed that the sparsity pattern is
+                // symmetric and we only walk over those rows for
+                // which the current row has a column entry
+                Assert ((p != matrix.end(row))
+                        &&
+                        (p->column() == dof_number),
+                        ExcMessage("This function is trying to access an element of the "
+                                   "matrix that doesn't seem to exist. Are you using a "
+                                   "nonsymmetric sparsity pattern? If so, you are not "
+                                   "allowed to set the eliminate_column argument of this "
+                                   "function, see the documentation."));
+
+                // correct right hand side
+                right_hand_side(row) -= p->value() /
+                                        diagonal_entry * new_rhs;
+
+                // set matrix entry to zero
+                p->value() = 0.;
+              }
+          }
+
+        // preset solution vector
+        solution(dof_number) = dof->second;
+      }
+  }
+
+
+
+  template <typename number>
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         BlockSparseMatrix<number>  &matrix,
+                         BlockVector<number>   &solution,
+                         BlockVector<number>   &right_hand_side,
+                         const bool             eliminate_columns)
+  {
+    const unsigned int blocks = matrix.n_block_rows();
+
+    Assert (matrix.n() == right_hand_side.size(),
+            ExcDimensionMismatch(matrix.n(), right_hand_side.size()));
+    Assert (matrix.n() == solution.size(),
+            ExcDimensionMismatch(matrix.n(), solution.size()));
+    Assert (matrix.n_block_rows() == matrix.n_block_cols(),
+            ExcNotQuadratic());
+    Assert (matrix.get_sparsity_pattern().get_row_indices() ==
+            matrix.get_sparsity_pattern().get_column_indices(),
+            ExcNotQuadratic());
+    Assert (matrix.get_sparsity_pattern().get_column_indices() ==
+            solution.get_block_indices (),
+            ExcBlocksDontMatch ());
+    Assert (matrix.get_sparsity_pattern().get_row_indices() ==
+            right_hand_side.get_block_indices (),
+            ExcBlocksDontMatch ());
+
+    // if no boundary values are to be applied
+    // simply return
+    if (boundary_values.size() == 0)
+      return;
+
+
+    const types::global_dof_index n_dofs = matrix.m();
+
+    // if a diagonal entry is zero
+    // later, then we use another
+    // number instead. take it to be
+    // the first nonzero diagonal
+    // element of the matrix, or 1 if
+    // there is no such thing
+    number first_nonzero_diagonal_entry = 0;
+    for (unsigned int diag_block=0; diag_block<blocks; ++diag_block)
+      {
+        for (unsigned int i=0; i<matrix.block(diag_block,diag_block).n(); ++i)
+          if (matrix.block(diag_block,diag_block).diag_element(i) != 0)
+            {
+              first_nonzero_diagonal_entry
+                = matrix.block(diag_block,diag_block).diag_element(i);
+              break;
+            }
+        // check whether we have found
+        // something in the present
+        // block
+        if (first_nonzero_diagonal_entry != 0)
+          break;
+      }
+    // nothing found on all diagonal
+    // blocks? if so, use 1.0 instead
+    if (first_nonzero_diagonal_entry == 0)
+      first_nonzero_diagonal_entry = 1;
+
+
+    std::map<types::global_dof_index,double>::const_iterator dof  = boundary_values.begin(),
+                                                             endd = boundary_values.end();
+    const BlockSparsityPattern &
+    sparsity_pattern = matrix.get_sparsity_pattern();
+
+    // pointer to the mapping between
+    // global and block indices. since
+    // the row and column mappings are
+    // equal, store a pointer on only
+    // one of them
+    const BlockIndices &
+    index_mapping = sparsity_pattern.get_column_indices();
+
+    // now loop over all boundary dofs
+    for (; dof != endd; ++dof)
+      {
+        Assert (dof->first < n_dofs, ExcInternalError());
+        (void)n_dofs;
+
+        // get global index and index
+        // in the block in which this
+        // dof is located
+        const types::global_dof_index dof_number = dof->first;
+        const std::pair<unsigned int,types::global_dof_index>
+        block_index = index_mapping.global_to_local (dof_number);
+
+        // for each boundary dof:
+
+        // set entries of this line
+        // to zero except for the diagonal
+        // entry. Note that the diagonal
+        // entry is always the first one
+        // in a row for square matrices
+        for (unsigned int block_col=0; block_col<blocks; ++block_col)
+          for (typename SparseMatrix<number>::iterator
+               p = (block_col == block_index.first ?
+                    matrix.block(block_index.first,block_col).begin(block_index.second) + 1 :
+                    matrix.block(block_index.first,block_col).begin(block_index.second));
+               p != matrix.block(block_index.first,block_col).end(block_index.second);
+               ++p)
+            p->value() = 0;
+
+        // set right hand side to
+        // wanted value: if main diagonal
+        // entry nonzero, don't touch it
+        // and scale rhs accordingly. If
+        // zero, take the first main
+        // diagonal entry we can find, or
+        // one if no nonzero main diagonal
+        // element exists. Normally, however,
+        // the main diagonal entry should
+        // not be zero.
+        //
+        // store the new rhs entry to make
+        // the gauss step more efficient
+        number new_rhs;
+        if (matrix.block(block_index.first, block_index.first)
+            .diag_element(block_index.second) != 0.0)
+          new_rhs = dof->second *
+                    matrix.block(block_index.first, block_index.first)
+                    .diag_element(block_index.second);
+        else
+          {
+            matrix.block(block_index.first, block_index.first)
+            .diag_element(block_index.second)
+              = first_nonzero_diagonal_entry;
+            new_rhs = dof->second * first_nonzero_diagonal_entry;
+          }
+        right_hand_side.block(block_index.first)(block_index.second)
+          = new_rhs;
+
+
+        // if the user wants to have
+        // the symmetry of the matrix
+        // preserved, and if the
+        // sparsity pattern is
+        // symmetric, then do a Gauss
+        // elimination step with the
+        // present row. this is a
+        // little more complicated for
+        // block matrices.
+        if (eliminate_columns)
+          {
+            // store the only nonzero entry
+            // of this line for the Gauss
+            // elimination step
+            const number diagonal_entry
+              = matrix.block(block_index.first,block_index.first)
+                .diag_element(block_index.second);
+
+            // we have to loop over all
+            // rows of the matrix which
+            // have a nonzero entry in
+            // the column which we work
+            // in presently. if the
+            // sparsity pattern is
+            // symmetric, then we can
+            // get the positions of
+            // these rows cheaply by
+            // looking at the nonzero
+            // column numbers of the
+            // present row.
+            //
+            // note that if we check
+            // whether row @p{row} in
+            // block (r,c) is non-zero,
+            // then we have to check
+            // for the existence of
+            // column @p{row} in block
+            // (c,r), i.e. of the
+            // transpose block
+            for (unsigned int block_row=0; block_row<blocks; ++block_row)
+              {
+                // get pointers to the sparsity patterns of this block and of
+                // the transpose one
+                const SparsityPattern &this_sparsity
+                  = sparsity_pattern.block (block_row, block_index.first);
+
+                SparseMatrix<number> &this_matrix
+                  = matrix.block(block_row, block_index.first);
+                SparseMatrix<number> &transpose_matrix
+                  = matrix.block(block_index.first, block_row);
+
+                // traverse the row of the transpose block to find the
+                // interesting rows in the present block.  don't use the
+                // diagonal element of the diagonal block
+                for (typename SparseMatrix<number>::iterator
+                     q = (block_index.first == block_row ?
+                          transpose_matrix.begin(block_index.second)+1 :
+                          transpose_matrix.begin(block_index.second));
+                     q != transpose_matrix.end(block_index.second);
+                     ++q)
+                  {
+                    // get the number of the column in this row in which a
+                    // nonzero entry is. this is also the row of the transpose
+                    // block which has an entry in the interesting row
+                    const types::global_dof_index row = q->column();
+
+                    // find the position of element (row,dof_number) in this
+                    // block (not in the transpose one). note that we have to
+                    // take care of special cases with square sub-matrices
+                    bool (*comp)(typename SparseMatrix<number>::iterator::value_type p,
+                                 const unsigned int column)
+                      = &column_less_than<typename SparseMatrix<number>::iterator>;
+
+                    typename SparseMatrix<number>::iterator p = this_matrix.end();
+
+                    if (this_sparsity.n_rows() == this_sparsity.n_cols())
+                      {
+                        if (this_matrix.begin(row)->column()
+                            ==
+                            block_index.second)
+                          p = this_matrix.begin(row);
+                        else
+                          p = Utilities::lower_bound(this_matrix.begin(row)+1,
+                                                     this_matrix.end(row),
+                                                     block_index.second,
+                                                     comp);
+                      }
+                    else
+                      p = Utilities::lower_bound(this_matrix.begin(row),
+                                                 this_matrix.end(row),
+                                                 block_index.second,
+                                                 comp);
+
+                    // check whether this line has an entry in the
+                    // regarding column (check for ==dof_number and !=
+                    // next_row, since if row==dof_number-1, *p is a
+                    // past-the-end pointer but points to dof_number
+                    // anyway...)
+                    //
+                    // there should be such an entry! we know this because
+                    // we have assumed that the sparsity pattern is
+                    // symmetric and we only walk over those rows for
+                    // which the current row has a column entry
+                    Assert ((p->column() == block_index.second) &&
+                            (p != this_matrix.end(row)),
+                            ExcInternalError());
+
+                    // correct right hand side
+                    right_hand_side.block(block_row)(row)
+                    -= p->value() /
+                       diagonal_entry * new_rhs;
+
+                    // set matrix entry to zero
+                    p->value() = 0.;
+                  }
+              }
+          }
+
+        // preset solution vector
+        solution.block(block_index.first)(block_index.second) = dof->second;
+      }
+  }
+
+
+
+
+
+
+  void
+  local_apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                               const std::vector<types::global_dof_index> &local_dof_indices,
+                               FullMatrix<double> &local_matrix,
+                               Vector<double>     &local_rhs,
+                               const bool          eliminate_columns)
+  {
+    Assert (local_dof_indices.size() == local_matrix.m(),
+            ExcDimensionMismatch(local_dof_indices.size(),
+                                 local_matrix.m()));
+    Assert (local_dof_indices.size() == local_matrix.n(),
+            ExcDimensionMismatch(local_dof_indices.size(),
+                                 local_matrix.n()));
+    Assert (local_dof_indices.size() == local_rhs.size(),
+            ExcDimensionMismatch(local_dof_indices.size(),
+                                 local_rhs.size()));
+
+    // if there is nothing to do, then exit
+    // right away
+    if (boundary_values.size() == 0)
+      return;
+
+    // otherwise traverse all the dofs used in
+    // the local matrices and vectors and see
+    // what's there to do
+
+    // if we need to treat an entry, then we
+    // set the diagonal entry to its absolute
+    // value. if it is zero, we used to set it
+    // to one, which is a really terrible
+    // choice that can lead to hours of
+    // searching for bugs in programs (I
+    // experienced this :-( ) if the matrix
+    // entries are otherwise very large. this
+    // is so since iterative solvers would
+    // simply not correct boundary nodes for
+    // their correct values since the residual
+    // contributions of their rows of the
+    // linear system is almost zero if the
+    // diagonal entry is one. thus, set it to
+    // the average absolute value of the
+    // nonzero diagonal elements.
+    //
+    // we only compute this value lazily the
+    // first time we need it.
+    double average_diagonal = 0;
+    const unsigned int n_local_dofs = local_dof_indices.size();
+    for (unsigned int i=0; i<n_local_dofs; ++i)
+      {
+        const std::map<types::global_dof_index, double>::const_iterator
+        boundary_value = boundary_values.find (local_dof_indices[i]);
+        if (boundary_value != boundary_values.end())
+          {
+            // remove this row, except for the
+            // diagonal element
+            for (unsigned int j=0; j<n_local_dofs; ++j)
+              if (i != j)
+                local_matrix(i,j) = 0;
+
+            // replace diagonal entry by its
+            // absolute value to make sure that
+            // everything remains positive, or
+            // by the average diagonal value if
+            // zero
+            if (local_matrix(i,i) == 0.)
+              {
+                // if average diagonal hasn't
+                // yet been computed, do so now
+                if (average_diagonal == 0.)
+                  {
+                    unsigned int nonzero_diagonals = 0;
+                    for (unsigned int k=0; k<n_local_dofs; ++k)
+                      if (local_matrix(k,k) != 0.)
+                        {
+                          average_diagonal += std::fabs(local_matrix(k,k));
+                          ++nonzero_diagonals;
+                        }
+                    if (nonzero_diagonals != 0)
+                      average_diagonal /= nonzero_diagonals;
+                    else
+                      average_diagonal = 0;
+                  }
+
+                // only if all diagonal entries
+                // are zero, then resort to the
+                // last measure: choose one
+                if (average_diagonal == 0.)
+                  average_diagonal = 1.;
+
+                local_matrix(i,i) = average_diagonal;
+              }
+            else
+              local_matrix(i,i) = std::fabs(local_matrix(i,i));
+
+            // and replace rhs entry by correct
+            // value
+            local_rhs(i) = local_matrix(i,i) * boundary_value->second;
+
+            // finally do the elimination step
+            // if requested
+            if (eliminate_columns == true)
+              {
+                for (unsigned int row=0; row<n_local_dofs; ++row)
+                  if (row != i)
+                    {
+                      local_rhs(row) -= local_matrix(row,i) * boundary_value->second;
+                      local_matrix(row,i) = 0;
+                    }
+              }
+          }
+      }
+  }
+}
+
+
+
+// explicit instantiations
+#include "matrix_tools.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/matrix_tools.inst.in b/source/numerics/matrix_tools.inst.in
new file mode 100644
index 0000000..3516f40
--- /dev/null
+++ b/source/numerics/matrix_tools.inst.in
@@ -0,0 +1,35 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2010 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (number: REAL_SCALARS)
+  {
+    template
+      void MatrixTools::apply_boundary_values
+      (const std::map<types::global_dof_index,double> &boundary_values,
+       SparseMatrix<number>  &matrix,
+       Vector<number>   &solution,
+       Vector<number>   &right_hand_side,
+       const bool        eliminate_columns);
+
+    template
+      void MatrixTools::apply_boundary_values 
+      (const std::map<types::global_dof_index,double> &boundary_values,
+       BlockSparseMatrix<number>  &matrix,
+       BlockVector<number>   &solution,
+       BlockVector<number>   &right_hand_side,
+       const bool        eliminate_columns);
+
+  }
diff --git a/source/numerics/matrix_tools_once.cc b/source/numerics/matrix_tools_once.cc
new file mode 100644
index 0000000..d22fffb
--- /dev/null
+++ b/source/numerics/matrix_tools_once.cc
@@ -0,0 +1,523 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/function.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/geometry_info.h>
+#include <deal.II/base/quadrature.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q1.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/hp/fe_values.h>
+#include <deal.II/hp/mapping_collection.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+
+#ifdef DEAL_II_WITH_PETSC
+#  include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#  include <deal.II/lac/petsc_sparse_matrix.h>
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <deal.II/lac/petsc_vector.h>
+#  include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#endif
+
+#ifdef DEAL_II_WITH_TRILINOS
+#  include <deal.II/lac/trilinos_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_vector.h>
+#  include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#  include <deal.II/lac/trilinos_block_vector.h>
+#endif
+
+#include <algorithm>
+#include <set>
+#include <cmath>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace MatrixTools
+{
+
+#ifdef DEAL_II_WITH_PETSC
+
+  namespace internal
+  {
+    namespace PETScWrappers
+    {
+      template <typename PETScMatrix, typename PETScVector>
+      void
+      apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                             PETScMatrix      &matrix,
+                             PETScVector      &solution,
+                             PETScVector      &right_hand_side,
+                             const bool        eliminate_columns)
+      {
+        (void)eliminate_columns;
+        Assert (eliminate_columns == false, ExcNotImplemented());
+
+        Assert (matrix.n() == right_hand_side.size(),
+                ExcDimensionMismatch(matrix.n(), right_hand_side.size()));
+        Assert (matrix.n() == solution.size(),
+                ExcDimensionMismatch(matrix.n(), solution.size()));
+
+        // if no boundary values are to be applied, then
+        // jump straight to the compress() calls that we still have
+        // to perform because they are collective operations
+        if (boundary_values.size() > 0)
+          {
+            const std::pair<types::global_dof_index, types::global_dof_index> local_range
+              = matrix.local_range();
+            Assert (local_range == right_hand_side.local_range(),
+                    ExcInternalError());
+            Assert (local_range == solution.local_range(),
+                    ExcInternalError());
+
+            // determine the first nonzero diagonal
+            // entry from within the part of the
+            // matrix that we can see. if we can't
+            // find such an entry, take one
+            PetscScalar average_nonzero_diagonal_entry = 1;
+            for (types::global_dof_index i=local_range.first; i<local_range.second; ++i)
+              if (matrix.diag_element(i) != PetscScalar ())
+                {
+                  average_nonzero_diagonal_entry = std::abs(matrix.diag_element(i));
+                  break;
+                }
+
+            // figure out which rows of the matrix we
+            // have to eliminate on this processor
+            std::vector<types::global_dof_index> constrained_rows;
+            for (std::map<types::global_dof_index,double>::const_iterator
+                 dof  = boundary_values.begin();
+                 dof != boundary_values.end();
+                 ++dof)
+              if ((dof->first >= local_range.first) &&
+                  (dof->first < local_range.second))
+                constrained_rows.push_back (dof->first);
+
+            // then eliminate these rows and set
+            // their diagonal entry to what we have
+            // determined above. note that for petsc
+            // matrices interleaving read with write
+            // operations is very expensive. thus, we
+            // here always replace the diagonal
+            // element, rather than first checking
+            // whether it is nonzero and in that case
+            // preserving it. this is different from
+            // the case of deal.II sparse matrices
+            // treated in the other functions.
+            matrix.clear_rows (constrained_rows, average_nonzero_diagonal_entry);
+
+            std::vector<types::global_dof_index> indices;
+            std::vector<PetscScalar>  solution_values;
+            for (std::map<types::global_dof_index,double>::const_iterator
+                 dof  = boundary_values.begin();
+                 dof != boundary_values.end();
+                 ++dof)
+              if ((dof->first >= local_range.first) &&
+                  (dof->first < local_range.second))
+                {
+                  indices.push_back (dof->first);
+                  solution_values.push_back (dof->second);
+                }
+            solution.set (indices, solution_values);
+
+            // now also set appropriate values for
+            // the rhs
+            for (unsigned int i=0; i<solution_values.size(); ++i)
+              solution_values[i] *= average_nonzero_diagonal_entry;
+
+            right_hand_side.set (indices, solution_values);
+          }
+        else
+          {
+            // clear_rows() is a collective operation so we still have to call
+            // it:
+            std::vector<types::global_dof_index> constrained_rows;
+            matrix.clear_rows (constrained_rows, 1.);
+          }
+
+        // clean up
+        solution.compress (VectorOperation::insert);
+        right_hand_side.compress (VectorOperation::insert);
+      }
+    }
+  }
+
+
+
+
+
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         PETScWrappers::SparseMatrix   &matrix,
+                         PETScWrappers::Vector   &solution,
+                         PETScWrappers::Vector   &right_hand_side,
+                         const bool        eliminate_columns)
+  {
+    // simply redirect to the generic function
+    // used for both petsc matrix types
+    internal::PETScWrappers::apply_boundary_values (boundary_values, matrix, solution,
+                                                    right_hand_side, eliminate_columns);
+  }
+
+
+
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         PETScWrappers::MPI::SparseMatrix   &matrix,
+                         PETScWrappers::MPI::Vector   &solution,
+                         PETScWrappers::MPI::Vector   &right_hand_side,
+                         const bool        eliminate_columns)
+  {
+    // simply redirect to the generic function
+    // used for both petsc matrix types
+    internal::PETScWrappers::apply_boundary_values (boundary_values, matrix, solution,
+                                                    right_hand_side, eliminate_columns);
+  }
+
+
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double>  &boundary_values,
+                         PETScWrappers::MPI::BlockSparseMatrix &matrix,
+                         PETScWrappers::MPI::BlockVector        &solution,
+                         PETScWrappers::MPI::BlockVector        &right_hand_side,
+                         const bool                            eliminate_columns)
+  {
+    Assert (matrix.n() == right_hand_side.size(),
+            ExcDimensionMismatch(matrix.n(), right_hand_side.size()));
+    Assert (matrix.n() == solution.size(),
+            ExcDimensionMismatch(matrix.n(), solution.size()));
+    Assert (matrix.n_block_rows() == matrix.n_block_cols(),
+            ExcNotQuadratic());
+
+    const unsigned int n_blocks = matrix.n_block_rows();
+
+    // We need to find the subdivision
+    // into blocks for the boundary values.
+    // To this end, generate a vector of
+    // maps with the respective indices.
+    std::vector<std::map<dealii::types::global_dof_index,double> > block_boundary_values(n_blocks);
+    {
+      int block = 0;
+      dealii::types::global_dof_index offset = 0;
+      for (std::map<types::global_dof_index,double>::const_iterator
+           dof  = boundary_values.begin();
+           dof != boundary_values.end();
+           ++dof)
+        {
+          if (dof->first >= matrix.block(block,0).m() + offset)
+            {
+              offset += matrix.block(block,0).m();
+              block++;
+            }
+          const types::global_dof_index index = dof->first - offset;
+          block_boundary_values[block].insert(std::pair<types::global_dof_index, double> (index,dof->second));
+        }
+    }
+
+    // Now call the non-block variants on
+    // the diagonal subblocks and the
+    // solution/rhs.
+    for (unsigned int block=0; block<n_blocks; ++block)
+      internal::PETScWrappers::apply_boundary_values(block_boundary_values[block],
+                                                     matrix.block(block,block),
+                                                     solution.block(block),
+                                                     right_hand_side.block(block),
+                                                     eliminate_columns);
+
+    // Finally, we need to do something
+    // about the off-diagonal matrices. This
+    // is luckily not difficult. Just clear
+    // the whole row.
+    for (unsigned int block_m=0; block_m<n_blocks; ++block_m)
+      {
+        const std::pair<types::global_dof_index, types::global_dof_index> local_range
+          = matrix.block(block_m,0).local_range();
+
+        std::vector<types::global_dof_index> constrained_rows;
+        for (std::map<types::global_dof_index,double>::const_iterator
+             dof  = block_boundary_values[block_m].begin();
+             dof != block_boundary_values[block_m].end();
+             ++dof)
+          if ((dof->first >= local_range.first) &&
+              (dof->first < local_range.second))
+            constrained_rows.push_back (dof->first);
+
+        for (unsigned int block_n=0; block_n<n_blocks; ++block_n)
+          if (block_m != block_n)
+            matrix.block(block_m,block_n).clear_rows(constrained_rows);
+      }
+  }
+
+#endif
+
+
+
+#ifdef DEAL_II_WITH_TRILINOS
+
+  namespace internal
+  {
+    namespace TrilinosWrappers
+    {
+      template <typename TrilinosMatrix, typename TrilinosVector>
+      void
+      apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                             TrilinosMatrix      &matrix,
+                             TrilinosVector      &solution,
+                             TrilinosVector      &right_hand_side,
+                             const bool           eliminate_columns)
+      {
+        Assert (eliminate_columns == false, ExcNotImplemented());
+        (void)eliminate_columns;
+
+        Assert (matrix.n() == right_hand_side.size(),
+                ExcDimensionMismatch(matrix.n(), right_hand_side.size()));
+        Assert (matrix.n() == solution.size(),
+                ExcDimensionMismatch(matrix.m(), solution.size()));
+
+        // if no boundary values are to be applied, then
+        // jump straight to the compress() calls that we still have
+        // to perform because they are collective operations
+        if (boundary_values.size() > 0)
+          {
+            const std::pair<types::global_dof_index, types::global_dof_index> local_range
+              = matrix.local_range();
+            Assert (local_range == right_hand_side.local_range(),
+                    ExcInternalError());
+            Assert (local_range == solution.local_range(),
+                    ExcInternalError());
+
+            // determine the first nonzero diagonal
+            // entry from within the part of the
+            // matrix that we can see. if we can't
+            // find such an entry, take one
+            TrilinosScalar average_nonzero_diagonal_entry = 1;
+            for (types::global_dof_index i=local_range.first; i<local_range.second; ++i)
+              if (matrix.diag_element(i) != 0)
+                {
+                  average_nonzero_diagonal_entry = std::fabs(matrix.diag_element(i));
+                  break;
+                }
+
+            // figure out which rows of the matrix we
+            // have to eliminate on this processor
+            std::vector<types::global_dof_index> constrained_rows;
+            for (std::map<types::global_dof_index,double>::const_iterator
+                 dof  = boundary_values.begin();
+                 dof != boundary_values.end();
+                 ++dof)
+              if ((dof->first >= local_range.first) &&
+                  (dof->first < local_range.second))
+                constrained_rows.push_back (dof->first);
+
+            // then eliminate these rows and
+            // set their diagonal entry to
+            // what we have determined
+            // above. if the value already is
+            // nonzero, it will be preserved,
+            // in accordance with the basic
+            // matrix classes in deal.II.
+            matrix.clear_rows (constrained_rows, average_nonzero_diagonal_entry);
+
+            std::vector<types::global_dof_index> indices;
+            std::vector<TrilinosScalar>  solution_values;
+            for (std::map<types::global_dof_index,double>::const_iterator
+                 dof  = boundary_values.begin();
+                 dof != boundary_values.end();
+                 ++dof)
+              if ((dof->first >= local_range.first) &&
+                  (dof->first < local_range.second))
+                {
+                  indices.push_back (dof->first);
+                  solution_values.push_back (dof->second);
+                }
+            solution.set (indices, solution_values);
+
+            // now also set appropriate
+            // values for the rhs
+            for (unsigned int i=0; i<solution_values.size(); ++i)
+              solution_values[i] *= matrix.diag_element(indices[i]);
+
+            right_hand_side.set (indices, solution_values);
+          }
+        else
+          {
+            // clear_rows() is a collective operation so we still have to call
+            // it:
+            std::vector<types::global_dof_index> constrained_rows;
+            matrix.clear_rows (constrained_rows, 1.);
+          }
+
+        // clean up
+        matrix.compress (VectorOperation::insert);
+        solution.compress (VectorOperation::insert);
+        right_hand_side.compress (VectorOperation::insert);
+      }
+
+
+
+      template <typename TrilinosMatrix, typename TrilinosBlockVector>
+      void
+      apply_block_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                                   TrilinosMatrix      &matrix,
+                                   TrilinosBlockVector &solution,
+                                   TrilinosBlockVector &right_hand_side,
+                                   const bool          eliminate_columns)
+      {
+        Assert (eliminate_columns == false, ExcNotImplemented());
+
+        Assert (matrix.n() == right_hand_side.size(),
+                ExcDimensionMismatch(matrix.n(), right_hand_side.size()));
+        Assert (matrix.n() == solution.size(),
+                ExcDimensionMismatch(matrix.n(), solution.size()));
+        Assert (matrix.n_block_rows() == matrix.n_block_cols(),
+                ExcNotQuadratic());
+
+        const unsigned int n_blocks = matrix.n_block_rows();
+
+        // We need to find the subdivision
+        // into blocks for the boundary values.
+        // To this end, generate a vector of
+        // maps with the respective indices.
+        std::vector<std::map<types::global_dof_index,double> > block_boundary_values(n_blocks);
+        {
+          int block=0;
+          types::global_dof_index offset = 0;
+          for (std::map<types::global_dof_index,double>::const_iterator
+               dof  = boundary_values.begin();
+               dof != boundary_values.end();
+               ++dof)
+            {
+              if (dof->first >= matrix.block(block,0).m() + offset)
+                {
+                  offset += matrix.block(block,0).m();
+                  block++;
+                }
+              const types::global_dof_index index = dof->first - offset;
+              block_boundary_values[block].insert(
+                std::pair<types::global_dof_index, double> (index,dof->second));
+            }
+        }
+
+        // Now call the non-block variants on
+        // the diagonal subblocks and the
+        // solution/rhs.
+        for (unsigned int block=0; block<n_blocks; ++block)
+          TrilinosWrappers::apply_boundary_values(block_boundary_values[block],
+                                                  matrix.block(block,block),
+                                                  solution.block(block),
+                                                  right_hand_side.block(block),
+                                                  eliminate_columns);
+
+        // Finally, we need to do something
+        // about the off-diagonal matrices. This
+        // is luckily not difficult. Just clear
+        // the whole row.
+        for (unsigned int block_m=0; block_m<n_blocks; ++block_m)
+          {
+            const std::pair<types::global_dof_index, types::global_dof_index> local_range
+              = matrix.block(block_m,0).local_range();
+
+            std::vector<types::global_dof_index> constrained_rows;
+            for (std::map<types::global_dof_index,double>::const_iterator
+                 dof  = block_boundary_values[block_m].begin();
+                 dof != block_boundary_values[block_m].end();
+                 ++dof)
+              if ((dof->first >= local_range.first) &&
+                  (dof->first < local_range.second))
+                constrained_rows.push_back (dof->first);
+
+            for (unsigned int block_n=0; block_n<n_blocks; ++block_n)
+              if (block_m != block_n)
+                matrix.block(block_m,block_n).clear_rows(constrained_rows);
+          }
+      }
+    }
+  }
+
+
+
+
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         TrilinosWrappers::SparseMatrix   &matrix,
+                         TrilinosWrappers::Vector         &solution,
+                         TrilinosWrappers::Vector         &right_hand_side,
+                         const bool        eliminate_columns)
+  {
+    // simply redirect to the generic function
+    // used for both trilinos matrix types
+    internal::TrilinosWrappers::apply_boundary_values (boundary_values, matrix, solution,
+                                                       right_hand_side, eliminate_columns);
+  }
+
+
+
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double> &boundary_values,
+                         TrilinosWrappers::SparseMatrix   &matrix,
+                         TrilinosWrappers::MPI::Vector    &solution,
+                         TrilinosWrappers::MPI::Vector    &right_hand_side,
+                         const bool        eliminate_columns)
+  {
+    // simply redirect to the generic function
+    // used for both trilinos matrix types
+    internal::TrilinosWrappers::apply_boundary_values (boundary_values, matrix, solution,
+                                                       right_hand_side, eliminate_columns);
+  }
+
+
+
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double>  &boundary_values,
+                         TrilinosWrappers::BlockSparseMatrix &matrix,
+                         TrilinosWrappers::BlockVector        &solution,
+                         TrilinosWrappers::BlockVector        &right_hand_side,
+                         const bool                            eliminate_columns)
+  {
+    internal::TrilinosWrappers::apply_block_boundary_values (boundary_values, matrix,
+                                                             solution, right_hand_side,
+                                                             eliminate_columns);
+  }
+
+
+
+  void
+  apply_boundary_values (const std::map<types::global_dof_index,double>  &boundary_values,
+                         TrilinosWrappers::BlockSparseMatrix &matrix,
+                         TrilinosWrappers::MPI::BlockVector   &solution,
+                         TrilinosWrappers::MPI::BlockVector   &right_hand_side,
+                         const bool                            eliminate_columns)
+  {
+    internal::TrilinosWrappers::apply_block_boundary_values (boundary_values, matrix,
+                                                             solution, right_hand_side,
+                                                             eliminate_columns);
+  }
+
+#endif
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/point_value_history.cc b/source/numerics/point_value_history.cc
new file mode 100644
index 0000000..ee6a4bd
--- /dev/null
+++ b/source/numerics/point_value_history.cc
@@ -0,0 +1,1358 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+
+#include <deal.II/numerics/vector_tools.h>
+
+#include <deal.II/numerics/point_value_history.h>
+
+#include <algorithm>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace internal
+{
+  namespace PointValueHistory
+  {
+/// Only a constructor needed for this class (a struct really)
+    template <int dim>
+    PointGeometryData<dim>
+    ::PointGeometryData (const Point <dim> &new_requested_location,
+                         const std::vector <Point <dim> > &new_locations,
+                         const std::vector <types::global_dof_index> &new_sol_indices)
+    {
+      requested_location = new_requested_location;
+      support_point_locations = new_locations;
+      solution_indices = new_sol_indices;
+    }
+  }
+}
+
+
+
+template <int dim>
+PointValueHistory<dim>
+::PointValueHistory (const unsigned int n_independent_variables) :
+  n_indep (n_independent_variables)
+{
+  closed = false;
+  cleared = false;
+  triangulation_changed = false;
+  have_dof_handler = false;
+
+  // make a vector for keys
+  dataset_key = std::vector <double> (); // initialize the std::vector
+
+  // make a vector of independent values
+  independent_values
+    = std::vector<std::vector <double> > (n_indep, std::vector <double> (0));
+  indep_names = std::vector <std::string> ();
+}
+
+
+
+template <int dim>
+PointValueHistory<dim>::PointValueHistory (const DoFHandler<dim> &dof_handler,
+                                           const unsigned int n_independent_variables) :
+  dof_handler (&dof_handler),
+  n_indep (n_independent_variables)
+{
+  closed = false;
+  cleared = false;
+  triangulation_changed = false;
+  have_dof_handler = true;
+
+  // make a vector to store keys
+  dataset_key = std::vector <double> (); // initialize the std::vector
+
+  // make a vector for the independent values
+  independent_values
+    = std::vector<std::vector <double> > (n_indep, std::vector <double> (0));
+  indep_names = std::vector <std::string> ();
+
+  tria_listener = dof_handler.get_triangulation().signals.any_change.connect (std_cxx11::bind (&PointValueHistory<dim>::tria_change_listener,
+                  std_cxx11::ref(*this)));
+}
+
+
+
+template <int dim>
+PointValueHistory<dim>::PointValueHistory (const PointValueHistory &point_value_history)
+{
+  dataset_key = point_value_history.dataset_key;
+  independent_values = point_value_history.independent_values;
+  indep_names = point_value_history.indep_names;
+  data_store = point_value_history.data_store;
+  component_mask = point_value_history.component_mask;
+  component_names_map = point_value_history.component_names_map;
+  point_geometry_data = point_value_history.point_geometry_data;
+
+  closed = point_value_history.closed;
+  cleared = point_value_history.cleared;
+
+  dof_handler = point_value_history.dof_handler;
+
+  triangulation_changed = point_value_history.triangulation_changed;
+  have_dof_handler = point_value_history.have_dof_handler;
+  n_indep = point_value_history.n_indep;
+
+  // What to do with tria_listener?
+  // Presume subscribe new instance?
+  if (have_dof_handler)
+    {
+      tria_listener = dof_handler->get_triangulation().signals.any_change.connect (std_cxx11::bind     (&PointValueHistory<dim>::tria_change_listener,
+                      std_cxx11::ref(*this)));
+    }
+}
+
+
+
+template <int dim>
+PointValueHistory<dim> &
+PointValueHistory<dim>::operator= (const PointValueHistory &point_value_history)
+{
+  dataset_key = point_value_history.dataset_key;
+  independent_values = point_value_history.independent_values;
+  indep_names = point_value_history.indep_names;
+  data_store = point_value_history.data_store;
+  component_mask = point_value_history.component_mask;
+  component_names_map = point_value_history.component_names_map;
+  point_geometry_data = point_value_history.point_geometry_data;
+
+  closed = point_value_history.closed;
+  cleared = point_value_history.cleared;
+
+  dof_handler = point_value_history.dof_handler;
+
+  triangulation_changed = point_value_history.triangulation_changed;
+  have_dof_handler = point_value_history.have_dof_handler;
+  n_indep = point_value_history.n_indep;
+
+  // What to do with tria_listener?
+  // Presume subscribe new instance?
+  if (have_dof_handler)
+    {
+      tria_listener = dof_handler->get_triangulation().signals.any_change.connect (std_cxx11::bind     (&PointValueHistory<dim>::tria_change_listener,
+                      std_cxx11::ref(*this)));
+    }
+
+  return * this;
+}
+
+
+
+template <int dim>
+PointValueHistory<dim>
+::~PointValueHistory ()
+{
+  if (have_dof_handler)
+    {
+      tria_listener.disconnect ();
+    }
+}
+
+
+
+template <int dim>
+void PointValueHistory<dim>
+::add_point (const Point <dim> &location)
+{
+  // can't be closed to add additional points
+  // or vectors
+  AssertThrow (!closed, ExcInvalidState ());
+  AssertThrow (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+  AssertThrow (!triangulation_changed, ExcDoFHandlerChanged ());
+
+  // Implementation assumes that support
+  // points locations are dofs locations
+  AssertThrow (dof_handler->get_fe ().has_support_points (), ExcNotImplemented ());
+
+  // FEValues object to extract quadrature
+  // points from
+  std::vector <Point <dim> >
+  unit_support_points = dof_handler->get_fe ().get_unit_support_points ();
+
+  // While in general quadrature points seems
+  // to refer to Gauss quadrature points, in
+  // this case the quadrature points are
+  // forced to be the support points of the
+  // FE.
+  Quadrature<dim>
+  support_point_quadrature (dof_handler->get_fe ().get_unit_support_points ());
+  FEValues<dim> fe_values (dof_handler->get_fe (),
+                           support_point_quadrature,
+                           update_quadrature_points);
+  unsigned int n_support_points
+    = dof_handler->get_fe ().get_unit_support_points ().size ();
+  unsigned int n_components
+    = dof_handler->get_fe ().n_components ();
+
+  // set up a loop over all the cells in the
+  // DoFHandler
+  typename DoFHandler<dim>::active_cell_iterator
+  cell = dof_handler->begin_active ();
+  typename DoFHandler<dim>::active_cell_iterator
+  endc = dof_handler->end ();
+
+  // default values to be replaced as closer
+  // points are found however they need to be
+  // consistent in case they are actually
+  // chosen
+  typename DoFHandler<dim>::active_cell_iterator current_cell = cell;
+  std::vector <unsigned int> current_fe_index (n_components, 0); // need one index per component
+  fe_values.reinit (cell);
+  std::vector <Point <dim> > current_points (n_components, Point <dim > ());
+  for (unsigned int support_point = 0;
+       support_point < n_support_points; support_point++)
+    {
+      // setup valid data in the empty
+      // vectors
+      unsigned int component
+        = dof_handler->get_fe ().system_to_component_index (support_point).first;
+      current_points [component] = fe_values.quadrature_point (support_point);
+      current_fe_index [component] = support_point;
+    }
+
+  // check each cell to find a suitable
+  // support points
+  // GridTools::find_active_cell_around_point
+  // is an alternative. That method is not
+  // used here mostly because of the history
+  // of the class. The algorithm used in
+  // add_points below may be slightly more
+  // efficient than find_active_cell_around_point
+  // because it operates on a set of points.
+
+  for (; cell != endc; cell++)
+    {
+      fe_values.reinit (cell);
+
+      for (unsigned int support_point = 0;
+           support_point < n_support_points; support_point++)
+        {
+          unsigned int component
+            = dof_handler->get_fe ().system_to_component_index (support_point).first;
+          Point<dim> test_point
+            = fe_values.quadrature_point (support_point);
+
+          if (location.distance (test_point) <
+              location.distance (current_points [component]))
+            {
+              // save the data
+              current_points [component] = test_point;
+              current_cell = cell;
+              current_fe_index [component] = support_point;
+            }
+        }
+    }
+
+
+  std::vector<types::global_dof_index>
+  local_dof_indices (dof_handler->get_fe ().dofs_per_cell);
+  std::vector <types::global_dof_index> new_solution_indices;
+  current_cell->get_dof_indices (local_dof_indices);
+  // there is an implicit assumption here
+  // that all the closest support point to
+  // the requested point for all finite
+  // element components lie in the same cell.
+  // this could possibly be violated if
+  // components use different fe orders,
+  // requested points are on the edge or
+  // vertex of a cell and we are unlucky with
+  // floating point rounding. Worst case
+  // scenario however is that the point
+  // selected isn't the closest possible, it
+  // will still lie within one cell distance.
+  // calling
+  // GridTools::find_active_cell_around_point
+  // to obtain a cell to search is an
+  // option for these methods, but currently
+  // the GridTools function does not cater for
+  // a vector of points, and does not seem to
+  // be intrinsicly faster than this method.
+  for (unsigned int component = 0;
+       component < dof_handler->get_fe ().n_components (); component++)
+    {
+      new_solution_indices
+      .push_back (local_dof_indices[current_fe_index [component]]);
+    }
+
+  internal::PointValueHistory::PointGeometryData<dim>
+  new_point_geometry_data (location, current_points, new_solution_indices);
+  point_geometry_data.push_back (new_point_geometry_data);
+
+  std::map <std::string, std::vector <std::vector <double> > >::iterator
+  data_store_begin = data_store.begin ();
+  for (; data_store_begin != data_store.end (); data_store_begin++)
+    {
+      // add an extra row to each vector
+      // entry
+      const ComponentMask &current_mask = (component_mask.find (data_store_begin->first))->second;
+      unsigned int n_stored = current_mask.n_selected_components();
+      for (unsigned int component = 0; component < n_stored; component++)
+        {
+          data_store_begin->second.push_back (std::vector<double> (0));
+        }
+    }
+}
+
+
+
+template <int dim>
+void PointValueHistory<dim>
+::add_points (const std::vector <Point <dim> > &locations)
+{
+  // This algorithm adds points in the same
+  // order as they appear in the vector
+  // locations and users may depend on this
+  // so do not change order added!
+
+  // can't be closed to add additional points or vectors
+  AssertThrow (!closed, ExcInvalidState ());
+  AssertThrow (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+  AssertThrow (!triangulation_changed, ExcDoFHandlerChanged ());
+
+
+  // Implementation assumes that support
+  // points locations are dofs locations
+  AssertThrow (dof_handler->get_fe ().has_support_points (), ExcNotImplemented ());
+
+  // FEValues object to extract quadrature
+  // points from
+  std::vector <Point <dim> > unit_support_points = dof_handler->get_fe ().get_unit_support_points ();
+
+  // While in general quadrature points seems
+  // to refer to Gauss quadrature points, in
+  // this case the quadrature points are
+  // forced to be the support points of the
+  // FE.
+  Quadrature<dim> support_point_quadrature (dof_handler->get_fe ().get_unit_support_points ());
+  FEValues<dim> fe_values (dof_handler->get_fe (), support_point_quadrature, update_quadrature_points);
+  unsigned int n_support_points = dof_handler->get_fe ().get_unit_support_points ().size ();
+  unsigned int n_components = dof_handler->get_fe ().n_components ();
+
+  // set up a loop over all the cells in the
+  // DoFHandler
+  typename DoFHandler<dim>::active_cell_iterator cell = dof_handler->begin_active ();
+  typename DoFHandler<dim>::active_cell_iterator endc = dof_handler->end ();
+
+  // default values to be replaced as closer
+  // points are found however they need to be
+  // consistent in case they are actually
+  // chosen vector <vector>s defined where
+  // previously single vectors were used
+
+  // need to store one value per point per component
+  std::vector <typename DoFHandler<dim>::active_cell_iterator > current_cell (locations.size (), cell);
+
+  fe_values.reinit (cell);
+  std::vector <Point <dim> > temp_points (n_components, Point <dim > ());
+  std::vector <unsigned int> temp_fe_index (n_components, 0);
+  for (unsigned int support_point = 0; support_point < n_support_points; support_point++)
+    {
+      // setup valid data in the empty
+      // vectors
+      unsigned int component = dof_handler->get_fe ().system_to_component_index (support_point).first;
+      temp_points [component] = fe_values.quadrature_point (support_point);
+      temp_fe_index [component] = support_point;
+    }
+  std::vector <std::vector <Point <dim> > > current_points (locations.size (), temp_points); // give a valid start point
+  std::vector <std::vector <unsigned int> > current_fe_index (locations.size (), temp_fe_index);
+
+  // check each cell to find suitable support
+  // points
+  // GridTools::find_active_cell_around_point
+  // is an alternative. That method is not
+  // used here mostly because of the history
+  // of the class. The algorithm used here
+  // may be slightly more
+  // efficient than find_active_cell_around_point
+  // because it operates on a set of points.
+  for (; cell != endc; cell++)
+    {
+      fe_values.reinit (cell);
+      for (unsigned int support_point = 0; support_point < n_support_points; support_point++)
+        {
+          unsigned int component = dof_handler->get_fe ().system_to_component_index (support_point).first;
+          Point<dim> test_point = fe_values.quadrature_point (support_point);
+
+          for (unsigned int point = 0; point < locations.size (); point++)
+            {
+              if (locations[point].distance (test_point) < locations[point].distance (current_points[point][component]))
+                {
+                  // save the data
+                  current_points[point][component] = test_point;
+                  current_cell[point] = cell;
+                  current_fe_index[point][component] = support_point;
+                }
+            }
+        }
+    }
+
+  std::vector<types::global_dof_index> local_dof_indices (dof_handler->get_fe ().dofs_per_cell);
+  for (unsigned int point = 0; point < locations.size (); point++)
+    {
+      current_cell[point]->get_dof_indices (local_dof_indices);
+      std::vector<types::global_dof_index> new_solution_indices;
+
+      for (unsigned int component = 0; component < dof_handler->get_fe ().n_components (); component++)
+        {
+          new_solution_indices.push_back (local_dof_indices[current_fe_index[point][component]]);
+        }
+
+      internal::PointValueHistory::PointGeometryData<dim> new_point_geometry_data (locations[point], current_points[point], new_solution_indices);
+
+      point_geometry_data.push_back (new_point_geometry_data);
+
+      std::map <std::string, std::vector <std::vector <double> > >::iterator
+      data_store_begin = data_store.begin ();
+      for (; data_store_begin != data_store.end (); data_store_begin++)
+        {
+          // add an extra row to each vector
+          // entry
+          const ComponentMask current_mask = (component_mask.find (data_store_begin->first))->second;
+          unsigned int n_stored = current_mask.n_selected_components();
+          for (unsigned int component = 0; component < n_stored; component++)
+            {
+              data_store_begin->second.push_back (std::vector<double> (0));
+            }
+        }
+    }
+}
+
+
+
+
+
+
+template <int dim>
+void PointValueHistory<dim>
+::add_field_name (const std::string &vector_name,
+                  const ComponentMask &mask)
+{
+  // can't be closed to add additional points
+  // or vectors
+  AssertThrow (!closed, ExcInvalidState ());
+  AssertThrow (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+  AssertThrow (!triangulation_changed, ExcDoFHandlerChanged ());
+
+  // insert a component mask that is always of the right size
+  if (mask.represents_the_all_selected_mask() == false)
+    component_mask.insert (std::make_pair (vector_name, mask));
+  else
+    component_mask.insert (std::make_pair (vector_name,
+                                           ComponentMask(std::vector<bool>(dof_handler->get_fe().n_components(), true))));
+
+  // insert an empty vector of strings
+  // to ensure each field has an entry
+  // in the map
+  std::pair <std::string, std::vector <std::string> >
+  empty_names (vector_name, std::vector <std::string> ());
+  component_names_map.insert (empty_names);
+
+  // make and add a new vector
+  // point_geometry_data.size() long
+  std::pair<std::string, std::vector <std::vector <double> > > pair_data;
+  pair_data.first = vector_name;
+  const unsigned int n_stored = (mask.represents_the_all_selected_mask() == false
+                                 ?
+                                 mask.n_selected_components()
+                                 :
+                                 dof_handler->get_fe().n_components());
+
+  int n_datastreams = point_geometry_data.size () * n_stored; // each point has n_stored sub parts
+  std::vector < std::vector <double> > vector_size (n_datastreams,
+                                                    std::vector <double> (0));
+  pair_data.second = vector_size;
+  data_store.insert (pair_data);
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::add_field_name(const std::string &vector_name, const unsigned int n_components)
+{
+  std::vector <bool> temp_mask (n_components, true);
+  add_field_name (vector_name, temp_mask);
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::add_component_names(const std::string &vector_name,
+                      const std::vector <std::string> &component_names)
+{
+  typename std::map <std::string, std::vector <std::string> >::iterator names = component_names_map.find(vector_name);
+  Assert (names != component_names_map.end(), ExcMessage("vector_name not in class"));
+
+  typename std::map <std::string, ComponentMask>::iterator mask = component_mask.find(vector_name);
+  Assert (mask != component_mask.end(), ExcMessage("vector_name not in class"));
+  unsigned int n_stored = mask->second.n_selected_components();
+  (void)n_stored;
+  Assert (component_names.size() == n_stored, ExcDimensionMismatch (component_names.size(), n_stored));
+
+  names->second = component_names;
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::add_independent_names(const std::vector <std::string> &independent_names)
+{
+  Assert (independent_names.size() == n_indep, ExcDimensionMismatch (independent_names.size(), n_indep));
+
+  indep_names = independent_names;
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::close ()
+{
+  closed = true;
+}
+
+
+
+template <int dim>
+void PointValueHistory<dim>
+::clear ()
+{
+  cleared = true;
+  dof_handler = 0;
+  have_dof_handler = false;
+}
+
+// Need to test that the internal data has a full and complete dataset for
+// each key. That is that the data has not got 'out of sync'. Testing that
+// dataset_key is within 1 of independent_values is cheap and is done in all
+// three methods. Evaluate_field will check that its vector_name is within 1
+// of dataset_key. However this leaves the possibility that the user has
+// neglected to call evaluate_field on one vector_name consistently. To catch
+// this case start_new_dataset will call bool deap_check () which will test
+// all vector_names and return a bool. This can be called from an Assert
+// statement.
+
+
+
+template <int dim>
+template <typename VectorType>
+void PointValueHistory<dim>
+::evaluate_field (const std::string &vector_name, const VectorType &solution)
+{
+  // must be closed to add data to internal
+  // members.
+  Assert (closed, ExcInvalidState ());
+  Assert (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+  AssertThrow (!triangulation_changed, ExcDoFHandlerChanged ());
+
+  if (n_indep != 0) // hopefully this will get optimized, can't test independent_values[0] unless n_indep > 0
+    {
+      Assert (std::abs ((int) dataset_key.size () - (int) independent_values[0].size ()) < 2, ExcDataLostSync ());
+    }
+  // Look up the field name and get an
+  // iterator for the map. Doing this
+  // up front means that it only needs
+  // to be done once and also allows us
+  // to check vector_name is in the map.
+  typename std::map <std::string, std::vector <std::vector <double> > >::iterator data_store_field = data_store.find(vector_name);
+  Assert (data_store_field != data_store.end(), ExcMessage("vector_name not in class"));
+  // Repeat for component_mask
+  typename std::map <std::string, ComponentMask>::iterator mask = component_mask.find(vector_name);
+  Assert (mask != component_mask.end(), ExcMessage("vector_name not in class"));
+
+  unsigned int n_stored = mask->second.n_selected_components(dof_handler->get_fe ().n_components ());
+
+  typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+  for (unsigned int data_store_index = 0; point != point_geometry_data.end (); point++, data_store_index++)
+    {
+      // Look up the components to add
+      // in the component_mask, and
+      // access the data associated with
+      // those components
+
+      for (unsigned int store_index = 0, comp = 0; comp < dof_handler->get_fe ().n_components (); comp++)
+        {
+          if (mask->second[comp])
+            {
+              unsigned int solution_index = point->solution_indices[comp];
+              data_store_field->second[data_store_index * n_stored + store_index].push_back (solution (solution_index));
+              store_index++;
+            }
+        }
+    }
+}
+
+
+
+
+
+template <int dim>
+template <typename VectorType>
+void PointValueHistory<dim>
+::evaluate_field (const std::vector <std::string> &vector_names,
+                  const VectorType                &solution,
+                  const DataPostprocessor< dim>   &data_postprocessor,
+                  const Quadrature<dim>           &quadrature)
+{
+  // must be closed to add data to internal
+  // members.
+  Assert (closed, ExcInvalidState ());
+  Assert (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+  if (n_indep != 0) // hopefully this will get optimized, can't test independent_values[0] unless n_indep > 0
+    {
+      Assert (std::abs ((int) dataset_key.size () - (int) independent_values[0].size ()) < 2, ExcDataLostSync ());
+    }
+
+  // Make an FEValues object
+  const UpdateFlags update_flags = data_postprocessor.get_needed_update_flags() | update_quadrature_points;
+  Assert (!(update_flags & update_normal_vectors),
+          ExcMessage("The update of normal vectors may not be requested for evaluation of "
+                     "data on cells via DataPostprocessor."));
+  FEValues<dim> fe_values (dof_handler->get_fe (), quadrature, update_flags);
+  unsigned int n_components = dof_handler->get_fe ().n_components ();
+  unsigned int n_quadrature_points = quadrature.size();
+
+  unsigned int n_output_variables = data_postprocessor.get_names().size();
+
+  // Loop over points and find correct cell
+  typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+  for (unsigned int data_store_index = 0; point != point_geometry_data.end (); point++, data_store_index++)
+    {
+      // we now have a point to query,
+      // need to know what cell it is in
+      Point <dim> requested_location = point->requested_location;
+      typename DoFHandler<dim>::active_cell_iterator cell = GridTools::find_active_cell_around_point (StaticMappingQ1<dim>::mapping, *dof_handler, requested_location).first;
+
+
+      fe_values.reinit (cell);
+      std::vector< Vector< double > > computed_quantities (1, Vector <double> (n_output_variables)); // just one point needed
+
+      // The case of a scalar FE
+      if (n_components == 1)
+        {
+          // Extract data for the
+          // PostProcessor object
+          std::vector< typename VectorType::value_type > uh (n_quadrature_points, 0.0);
+          std::vector< Tensor< 1, dim, typename VectorType::value_type > > duh (n_quadrature_points, Tensor <1, dim, typename VectorType::value_type> ());
+          std::vector< Tensor< 2, dim, typename VectorType::value_type > > dduh (n_quadrature_points, Tensor <2, dim, typename VectorType::value_type> ());
+          std::vector<Point<dim> > dummy_normals (1, Point<dim> ());
+          std::vector<Point<dim> > evaluation_points;
+          // at each point there is
+          // only one component of
+          // value, gradient etc.
+          if (update_flags & update_values)
+            fe_values.get_function_values (solution,
+                                           uh);
+          if (update_flags & update_gradients)
+            fe_values.get_function_gradients (solution,
+                                              duh);
+          if (update_flags & update_hessians)
+            fe_values.get_function_hessians (solution,
+                                             dduh);
+
+          // find the closest quadrature point
+          evaluation_points = fe_values.get_quadrature_points();
+          double distance = cell->diameter ();
+          unsigned int selected_point = 0;
+          for (unsigned int q_point = 0; q_point < n_quadrature_points; q_point++)
+            {
+              if (requested_location.distance (evaluation_points[q_point]) < distance)
+                {
+                  selected_point = q_point;
+                  distance = requested_location.distance (evaluation_points[q_point]);
+                }
+            }
+
+          // Call compute_derived_quantities_vector
+          // or compute_derived_quantities_scalar
+          // TODO this function should also operate with typename VectorType::value_type
+          data_postprocessor.
+          compute_derived_quantities_scalar(std::vector< double > (1, uh[selected_point]),
+                                            std::vector< Tensor< 1, dim > > (1, Tensor< 1, dim >(duh[selected_point]) ),
+                                            std::vector< Tensor< 2, dim > > (1, Tensor< 2, dim >(dduh[selected_point]) ),
+                                            dummy_normals,
+                                            std::vector<Point<dim> > (1, evaluation_points[selected_point]),
+                                            computed_quantities);
+
+        }
+      else     // The case of a vector FE
+        {
+          // Extract data for the PostProcessor object
+          std::vector< Vector< typename VectorType::value_type > > uh (n_quadrature_points, Vector <typename VectorType::value_type> (n_components));
+          std::vector< std::vector< Tensor< 1, dim, typename VectorType::value_type > > > duh (n_quadrature_points, std::vector< Tensor< 1, dim, typename VectorType::value_type > > (n_components,  Tensor< 1, dim, typename VectorType::value_type >()));
+          std::vector< std::vector< Tensor< 2, dim, typename VectorType::value_type > > > dduh (n_quadrature_points, std::vector< Tensor< 2, dim, typename VectorType::value_type > > (n_components,  Tensor< 2, dim, typename VectorType::value_type >()));
+          std::vector<Point<dim> > dummy_normals  (1, Point<dim> ());
+          std::vector<Point<dim> > evaluation_points;
+          // at each point there is
+          // a vector valued
+          // function and its
+          // derivative...
+          if (update_flags & update_values)
+            fe_values.get_function_values (solution,
+                                           uh);
+          if (update_flags & update_gradients)
+            fe_values.get_function_gradients (solution,
+                                              duh);
+          if (update_flags & update_hessians)
+            fe_values.get_function_hessians (solution,
+                                             dduh);
+
+          // find the closest quadrature point
+          evaluation_points = fe_values.get_quadrature_points();
+          double distance = cell->diameter ();
+          unsigned int selected_point = 0;
+          for (unsigned int q_point = 0; q_point < n_quadrature_points; q_point++)
+            {
+              if (requested_location.distance (evaluation_points[q_point]) < distance)
+                {
+                  selected_point = q_point;
+                  distance = requested_location.distance (evaluation_points[q_point]);
+                }
+            }
+
+          // FIXME: We need tmp vectors below because the data
+          // postprocessors are not equipped to deal with anything but
+          // doubles (scalars and tensors).
+          const Vector< typename VectorType::value_type >                        &uh_s   = uh[selected_point];
+          const std::vector< Tensor< 1, dim, typename VectorType::value_type > > &duh_s  = duh[selected_point];
+          const std::vector< Tensor< 2, dim, typename VectorType::value_type > > &dduh_s = dduh[selected_point];
+          std::vector< Tensor< 1, dim > > tmp_d (duh_s.size());
+          for (unsigned int i = 0; i < duh_s.size(); i++)
+            tmp_d[i] = duh_s[i];
+
+          std::vector< Tensor< 2, dim > > tmp_dd (dduh_s.size());
+          for (unsigned int i = 0; i < dduh_s.size(); i++)
+            tmp_dd[i] = dduh_s[i];
+
+          Vector< double > tmp(uh_s.size());
+          for (unsigned int i = 0; i < uh_s.size(); i++)
+            tmp[i] = uh_s[i];
+          // Call compute_derived_quantities_vector
+          // or compute_derived_quantities_scalar
+          data_postprocessor.
+          compute_derived_quantities_vector(std::vector< Vector< double > > (1, tmp),
+                                            std::vector< std::vector< Tensor< 1, dim > > > (1, tmp_d),
+                                            std::vector< std::vector< Tensor< 2, dim > > > (1, tmp_dd),
+                                            dummy_normals,
+                                            std::vector<Point<dim> > (1, evaluation_points[selected_point]),
+                                            computed_quantities);
+        }
+
+
+      // we now have the data and need to save it
+      // loop over data names
+      typename std::vector<std::string>::const_iterator name = vector_names.begin();
+      for (; name != vector_names.end(); name++)
+        {
+          typename std::map <std::string, std::vector <std::vector <double> > >::iterator data_store_field = data_store.find(*name);
+          Assert (data_store_field != data_store.end(), ExcMessage("vector_name not in class"));
+          // Repeat for component_mask
+          typename std::map <std::string, ComponentMask>::iterator mask = component_mask.find(*name);
+          Assert (mask != component_mask.end(), ExcMessage("vector_name not in class"));
+
+          unsigned int n_stored = mask->second.n_selected_components(n_output_variables);
+
+          // Push back computed quantities according
+          // to the component_mask.
+          for (unsigned int store_index = 0, comp = 0; comp < n_output_variables; comp++)
+            {
+              if (mask->second[comp])
+                {
+                  data_store_field->second[data_store_index * n_stored + store_index].push_back (computed_quantities[0](comp));
+                  store_index++;
+                }
+            }
+        }
+    } // end of loop over points
+}
+
+
+template <int dim>
+template <typename VectorType>
+void PointValueHistory<dim>
+::evaluate_field (const std::string            &vector_name,
+                  const VectorType             &solution,
+                  const DataPostprocessor<dim> &data_postprocessor,
+                  const Quadrature<dim>        &quadrature)
+{
+  std::vector <std::string> vector_names;
+  vector_names.push_back (vector_name);
+  evaluate_field (vector_names, solution, data_postprocessor, quadrature);
+}
+
+
+
+template <int dim>
+template <typename VectorType>
+void PointValueHistory<dim>
+::evaluate_field_at_requested_location (const std::string &vector_name,
+                                        const VectorType  &solution)
+{
+  // must be closed to add data to internal
+  // members.
+  Assert (closed, ExcInvalidState ());
+  Assert (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+
+  if (n_indep != 0) // hopefully this will get optimized, can't test independent_values[0] unless n_indep > 0
+    {
+      Assert (std::abs ((int) dataset_key.size () - (int) independent_values[0].size ()) < 2, ExcDataLostSync ());
+    }
+  // Look up the field name and get an
+  // iterator for the map. Doing this
+  // up front means that it only needs
+  // to be done once and also allows us
+  // to check vector_name is in the map.
+  typename std::map <std::string, std::vector <std::vector <double> > >::iterator data_store_field = data_store.find(vector_name);
+  Assert (data_store_field != data_store.end(), ExcMessage("vector_name not in class"));
+  // Repeat for component_mask
+  typename std::map <std::string, ComponentMask>::iterator mask = component_mask.find(vector_name);
+  Assert (mask != component_mask.end(), ExcMessage("vector_name not in class"));
+
+  unsigned int n_stored = mask->second.n_selected_components(dof_handler->get_fe ().n_components ());
+
+  typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+  Vector <double> value (dof_handler->get_fe().n_components());
+  for (unsigned int data_store_index = 0; point != point_geometry_data.end (); point++, data_store_index++)
+    {
+      // Make a Vector <double> for the value
+      // at the point. It will have as many
+      // components as there are in the fe.
+      VectorTools::point_value (*dof_handler, solution, point->requested_location, value);
+
+      // Look up the component_mask and add
+      // in components according to that mask
+      for (unsigned int store_index = 0, comp = 0; comp < mask->second.size(); comp++)
+        {
+          if (mask->second[comp])
+            {
+              data_store_field->second[data_store_index * n_stored + store_index].push_back (value (comp));
+              store_index++;
+            }
+        }
+    }
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::start_new_dataset (double key)
+{
+  // must be closed to add data to internal
+  // members.
+  Assert (closed, ExcInvalidState ());
+  Assert (!cleared, ExcInvalidState ());
+  Assert (deep_check (false), ExcDataLostSync ());
+
+  dataset_key.push_back (key);
+}
+
+
+
+template <int dim>
+void PointValueHistory<dim>
+::push_back_independent (const std::vector <double> &indep_values)
+{
+  // must be closed to add data to internal
+  // members.
+  Assert (closed, ExcInvalidState ());
+  Assert (!cleared, ExcInvalidState ());
+  Assert (indep_values.size () == n_indep, ExcDimensionMismatch (indep_values.size (), n_indep));
+  Assert (n_indep != 0, ExcNoIndependent ());
+  Assert (std::abs ((int) dataset_key.size () - (int) independent_values[0].size ()) < 2, ExcDataLostSync ());
+
+  for (unsigned int component = 0; component < n_indep; component++)
+    independent_values[component].push_back (indep_values[component]);
+}
+
+
+
+template <int dim>
+void PointValueHistory<dim>
+::write_gnuplot (const std::string &base_name, const std::vector <Point <dim> > postprocessor_locations)
+{
+  AssertThrow (closed, ExcInvalidState ());
+  AssertThrow (!cleared, ExcInvalidState ());
+  AssertThrow (deep_check (true), ExcDataLostSync ());
+
+  // write inputs to a file
+  if (n_indep != 0)
+    {
+      std::string filename = base_name + "_indep.gpl";
+      std::ofstream to_gnuplot (filename.c_str ());
+
+      to_gnuplot << "# Data independent of mesh location\n";
+
+      // write column headings
+      to_gnuplot << "# <Key> ";
+
+      if (indep_names.size() > 0)
+        {
+          for (unsigned int name = 0; name < indep_names.size(); name++)
+            {
+              to_gnuplot << "<" << indep_names [name] << "> ";
+            }
+          to_gnuplot << "\n";
+        }
+      else
+        {
+          for (unsigned int component = 0; component < n_indep; component++)
+            {
+              to_gnuplot << "<Indep_" << component << "> ";
+            }
+          to_gnuplot << "\n";
+        }
+      // write general data stored
+      for (unsigned int key = 0; key < dataset_key.size (); key++)
+        {
+          to_gnuplot << dataset_key[key];
+
+          for (unsigned int component = 0; component < n_indep; component++)
+            {
+              to_gnuplot << " " << independent_values[component][key];
+            }
+          to_gnuplot << "\n";
+        }
+
+      to_gnuplot.close ();
+    }
+
+
+
+  // write points to a file
+  if (have_dof_handler)
+    {
+      AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+      AssertThrow (postprocessor_locations.size() == 0 || postprocessor_locations.size() == point_geometry_data.size(), ExcDimensionMismatch (postprocessor_locations.size(), point_geometry_data.size()));
+      // We previously required the
+      // number of dofs to remain the
+      // same to provide some sort of
+      // test on the relevance of the
+      // support point indices stored.
+      // We now relax that to allow
+      // adaptive refinement strategies
+      // to make use of the
+      // evaluate_field_requested_locations
+      // method. Note that the support point
+      // information is not meaningful if
+      // the number of dofs has changed.
+      //AssertThrow (!triangulation_changed, ExcDoFHandlerChanged ());
+
+      typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+      for (unsigned int data_store_index = 0; point != point_geometry_data.end (); point++, data_store_index++)
+        {
+          // for each point, open a file to
+          // be written to
+          std::string filename = base_name + "_" + Utilities::int_to_string (data_store_index, 2) + ".gpl"; // store by order pushed back
+          // due to
+          // Utilities::int_to_string(data_store_index,
+          // 2) call, can handle up to 100
+          // points
+          std::ofstream to_gnuplot (filename.c_str ());
+
+          // put helpful info about the
+          // support point into the file as
+          // comments
+          to_gnuplot << "# Requested location: " << point->requested_location << "\n";
+          to_gnuplot << "# DoF_index : Support location (for each component)\n";
+          for (unsigned int component = 0; component < dof_handler->get_fe ().n_components (); component++)
+            {
+              to_gnuplot << "# " << point->solution_indices[component] << " : " << point->support_point_locations [component] << "\n";
+            }
+          if (triangulation_changed)
+            to_gnuplot << "# (Original components and locations, may be invalidated by mesh change.)\n";
+
+          if (postprocessor_locations.size() != 0)
+            {
+              to_gnuplot << "# Postprocessor location: " << postprocessor_locations[data_store_index];
+              if (triangulation_changed)
+                to_gnuplot << " (may be approximate)\n";
+            }
+          to_gnuplot << "#\n";
+
+
+          // write column headings
+          to_gnuplot << "# <Key> ";
+
+          if (indep_names.size() > 0)
+            {
+              for (unsigned int name = 0; name < indep_names.size(); name++)
+                {
+                  to_gnuplot << "<" << indep_names [name] << "> ";
+                }
+            }
+          else
+            {
+              for (unsigned int component = 0; component < n_indep; component++)
+                {
+                  to_gnuplot << "<Indep_" << component << "> ";
+                }
+            }
+
+          for (std::map <std::string, std::vector <std::vector <double> > >::iterator
+               data_store_begin = data_store.begin (); data_store_begin != data_store.end (); ++data_store_begin)
+            {
+              typename std::map <std::string, ComponentMask>::iterator mask = component_mask.find(data_store_begin->first);
+              unsigned int n_stored = mask->second.n_selected_components();
+              std::vector <std::string> names = (component_names_map.find (data_store_begin->first))->second;
+
+              if (names.size() > 0)
+                {
+                  AssertThrow (names.size() == n_stored, ExcDimensionMismatch (names.size(), n_stored));
+                  for (unsigned int component = 0; component < names.size(); component++)
+                    {
+                      to_gnuplot << "<" << names[component] << "> ";
+                    }
+                }
+              else
+                {
+                  for (unsigned int component = 0; component < n_stored; component++)
+                    {
+                      to_gnuplot << "<" << data_store_begin->first << "_" << component << "> ";
+                    }
+                }
+            }
+          to_gnuplot << "\n";
+
+          // write data stored for the point
+          for (unsigned int key = 0; key < dataset_key.size (); key++)
+            {
+              to_gnuplot << dataset_key[key];
+
+              for (unsigned int component = 0; component < n_indep; component++)
+                {
+                  to_gnuplot << " " << independent_values[component][key];
+                }
+
+              for (std::map <std::string, std::vector <std::vector <double> > >::iterator
+                   data_store_begin = data_store.begin ();
+                   data_store_begin != data_store.end (); ++data_store_begin)
+                {
+                  typename std::map <std::string, ComponentMask>::iterator mask = component_mask.find(data_store_begin->first);
+                  unsigned int n_stored = mask->second.n_selected_components();
+
+                  for (unsigned int component = 0; component < n_stored; component++)
+                    {
+                      to_gnuplot << " " << (data_store_begin->second)[data_store_index * n_stored + component][key];
+                    }
+                }
+              to_gnuplot << "\n";
+            }
+
+          to_gnuplot.close ();
+        }
+    }
+}
+
+
+
+template <int dim>
+Vector<double> PointValueHistory<dim>
+::mark_support_locations ()
+{
+  // a method to put a one at each point on
+  // the grid where a location is defined
+  AssertThrow (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+  AssertThrow (!triangulation_changed, ExcDoFHandlerChanged ());
+
+  Vector<double> dof_vector (dof_handler->n_dofs ());
+
+  typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+  for (; point != point_geometry_data.end (); point++)
+    {
+      for (unsigned int component = 0; component < dof_handler->get_fe ().n_components (); component++)
+        {
+          dof_vector (point->solution_indices[component]) = 1;
+        }
+    }
+  return dof_vector;
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::get_support_locations (std::vector <std::vector<Point <dim> > > &locations)
+{
+  AssertThrow (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+  AssertThrow (!triangulation_changed, ExcDoFHandlerChanged ());
+
+  std::vector <std::vector <Point <dim> > > actual_points;
+  typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+
+  for (; point != point_geometry_data.end (); point++)
+    {
+      actual_points.push_back (point->support_point_locations);
+    }
+  locations = actual_points;
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::get_points (std::vector <std::vector<Point <dim> > > &locations)
+{
+  get_support_locations (locations);
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::get_postprocessor_locations (const Quadrature<dim> &quadrature, std::vector<Point <dim> > &locations)
+{
+  Assert (!cleared, ExcInvalidState ());
+  AssertThrow (have_dof_handler, ExcDoFHandlerRequired ());
+
+  locations = std::vector<Point <dim> > ();
+
+  FEValues<dim> fe_values (dof_handler->get_fe (), quadrature, update_quadrature_points);
+  unsigned int n_quadrature_points = quadrature.size();
+  std::vector<Point<dim> > evaluation_points;
+
+  // Loop over points and find correct cell
+  typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+  for (unsigned int data_store_index = 0; point != point_geometry_data.end (); point++, data_store_index++)
+    {
+      // we now have a point to query,
+      // need to know what cell it is in
+      Point <dim> requested_location = point->requested_location;
+      typename DoFHandler<dim>::active_cell_iterator cell = GridTools::find_active_cell_around_point (StaticMappingQ1<dim>::mapping, *dof_handler, requested_location).first;
+      fe_values.reinit (cell);
+
+      evaluation_points = fe_values.get_quadrature_points();
+      double distance = cell->diameter ();
+      unsigned int selected_point = 0;
+
+      for (unsigned int q_point = 0; q_point < n_quadrature_points; q_point++)
+        {
+          if (requested_location.distance (evaluation_points[q_point]) < distance)
+            {
+              selected_point = q_point;
+              distance = requested_location.distance (evaluation_points[q_point]);
+            }
+        }
+
+      locations.push_back (evaluation_points[selected_point]);
+    }
+}
+
+
+template <int dim>
+void PointValueHistory<dim>
+::status (std::ostream &out)
+{
+  out << "***PointValueHistory status output***\n\n";
+  out << "Closed: " << closed << "\n";
+  out << "Cleared: " << cleared << "\n";
+  out << "Triangulation_changed: " << triangulation_changed << "\n";
+  out << "Have_dof_handler: " << have_dof_handler << "\n";
+  out << "Geometric Data" << "\n";
+
+  typename std::vector <internal::PointValueHistory::PointGeometryData <dim> >::iterator point = point_geometry_data.begin ();
+  if (point == point_geometry_data.end ())
+    {
+      out << "No points stored currently\n";
+    }
+  else
+    {
+      if (!cleared)
+        {
+          for (; point != point_geometry_data.end (); point++)
+            {
+              out << "# Requested location: " << point->requested_location << "\n";
+              out << "# DoF_index : Support location (for each component)\n";
+              for (unsigned int component = 0; component < dof_handler->get_fe ().n_components (); component++)
+                {
+                  out << point->solution_indices[component] << " : " << point->support_point_locations [component] << "\n";
+                }
+              out << "\n";
+            }
+        }
+      else
+        {
+          out << "#Cannot access DoF_indices once cleared\n";
+        }
+    }
+  out << "\n";
+
+  if (independent_values.size () != 0)
+    {
+      out << "Independent value(s): " << independent_values.size () << " : " << independent_values[0].size () << "\n";
+      if (indep_names.size() > 0)
+        {
+          out << "Names: ";
+          for (unsigned int name = 0; name < indep_names.size(); name++)
+            {
+              out << "<" << indep_names [name] << "> ";
+            }
+          out << "\n";
+        }
+    }
+  else
+    {
+      out << "No independent values stored\n";
+    }
+
+  std::map <std::string, std::vector <std::vector <double> > >::iterator
+  data_store_begin = data_store.begin ();
+  if (data_store_begin != data_store.end())
+    {
+      out << "Mnemonic: data set size (mask size, n true components) : n data sets\n";
+    }
+  for (; data_store_begin != data_store.end (); data_store_begin++)
+    {
+      // Find field mnemonic
+      std::string vector_name = data_store_begin->first;
+      typename std::map <std::string, ComponentMask>::iterator mask = component_mask.find(vector_name);
+      Assert (mask != component_mask.end(), ExcMessage("vector_name not in class"));
+      typename std::map <std::string, std::vector <std::string> >::iterator component_names = component_names_map.find(vector_name);
+      Assert (component_names != component_names_map.end(), ExcMessage("vector_name not in class"));
+
+      if (data_store_begin->second.size () != 0)
+        {
+          out << data_store_begin->first << ": " << data_store_begin->second.size () << " (";
+          out << mask->second.size() << ", " << mask->second.n_selected_components() << ") : ";
+          out << (data_store_begin->second)[0].size () << "\n";
+        }
+      else
+        {
+          out << data_store_begin->first << ": " << data_store_begin->second.size () << " (";
+          out << mask->second.size() << ", " << mask->second.n_selected_components() << ") : ";
+          out << "No points added" << "\n";
+        }
+      // add names, if available
+      if (component_names->second.size() > 0)
+        {
+          for (unsigned int name = 0; name < component_names->second.size(); name++)
+            {
+              out << "<" << component_names->second[name] << "> ";
+            }
+          out << "\n";
+        }
+    }
+  out << "\n";
+  out << "***end of status output***\n\n";
+}
+
+
+
+template <int dim>
+bool PointValueHistory<dim>
+::deep_check (const bool strict)
+{
+  // test ways that it can fail, if control
+  // reaches last statement return true
+  if (strict)
+    {
+      if (n_indep != 0)
+        {
+          if (dataset_key.size () != independent_values[0].size ())
+            {
+              return false;
+            }
+        }
+      std::map <std::string, std::vector <std::vector <double> > >::iterator
+      data_store_begin = data_store.begin ();
+      if (have_dof_handler)
+        {
+          for (; data_store_begin != data_store.end (); data_store_begin++)
+            {
+              Assert (data_store_begin->second.size() > 0,
+                      ExcInternalError());
+              if ((data_store_begin->second)[0].size () != dataset_key.size ())
+                return false;
+              // this loop only tests one
+              // member for each name,
+              // i.e. checks the user it will
+              // not catch internal errors
+              // which do not update all
+              // fields for a name.
+            }
+        }
+      return true;
+    }
+  if (n_indep != 0)
+    {
+      if (std::abs ((int) dataset_key.size () - (int) independent_values[0].size ()) >= 2)
+        {
+          return false;
+        }
+    }
+
+  if (have_dof_handler)
+    {
+      std::map <std::string, std::vector <std::vector <double> > >::iterator
+      data_store_begin = data_store.begin ();
+      for (; data_store_begin != data_store.end (); data_store_begin++)
+        {
+          Assert (data_store_begin->second.size() > 0,
+                  ExcInternalError());
+
+          if (std::abs ((int) (data_store_begin->second)[0].size () - (int) dataset_key.size ()) >= 2)
+            return false;
+          // this loop only tests one member
+          // for each name, i.e. checks the
+          // user it will not catch internal
+          // errors which do not update all
+          // fields for a name.
+        }
+    }
+  return true;
+}
+
+
+
+template <int dim>
+void PointValueHistory<dim>
+::tria_change_listener ()
+{
+  // this function is called by the
+  // Triangulation whenever something
+  // changes, by virtue of having
+  // attached the function to the
+  // signal handler in the
+  // triangulation object
+
+  // we record the fact that the mesh
+  // has changed. we need to take
+  // this into account next time we
+  // evaluate the solution
+  triangulation_changed = true;
+}
+
+
+// explicit instantiations
+#include "point_value_history.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/point_value_history.inst.in b/source/numerics/point_value_history.inst.in
new file mode 100644
index 0000000..717c1c7
--- /dev/null
+++ b/source/numerics/point_value_history.inst.in
@@ -0,0 +1,59 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2009 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+  template class PointValueHistory<deal_II_dimension>;
+}
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{  
+  template
+  void PointValueHistory<deal_II_dimension>::evaluate_field 
+  (const std::string &,
+   const VEC &);
+}
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{  
+  template
+  void PointValueHistory<deal_II_dimension>::evaluate_field_at_requested_location 
+  (const std::string &,
+   const VEC &);
+}
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{  
+  template
+  void PointValueHistory<deal_II_dimension>::evaluate_field 
+  (const std::vector <std::string> &,
+   const VEC &,
+   const DataPostprocessor<deal_II_dimension> &,
+   const Quadrature<deal_II_dimension> &);
+}
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{  
+  template
+  void PointValueHistory<deal_II_dimension>::evaluate_field 
+  (const std::string &,
+   const VEC &,
+   const DataPostprocessor<deal_II_dimension> &,
+   const Quadrature<deal_II_dimension> &);
+}
diff --git a/source/numerics/solution_transfer.cc b/source/numerics/solution_transfer.cc
new file mode 100644
index 0000000..1419ce7
--- /dev/null
+++ b/source/numerics/solution_transfer.cc
@@ -0,0 +1,576 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/fe/fe.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/numerics/solution_transfer.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+SolutionTransfer<dim, VectorType, DoFHandlerType>::SolutionTransfer(const DoFHandlerType &dof)
+  :
+  dof_handler(&dof, typeid(*this).name()),
+  n_dofs_old(0),
+  prepared_for(none)
+{}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+SolutionTransfer<dim, VectorType, DoFHandlerType>::~SolutionTransfer()
+{
+  clear ();
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+void SolutionTransfer<dim, VectorType, DoFHandlerType>::clear ()
+{
+  indices_on_cell.clear();
+  dof_values_on_cell.clear();
+  cell_map.clear();
+
+  prepared_for=none;
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+void SolutionTransfer<dim, VectorType, DoFHandlerType>::prepare_for_pure_refinement()
+{
+  Assert(prepared_for!=pure_refinement, ExcAlreadyPrepForRef());
+  Assert(prepared_for!=coarsening_and_refinement,
+         ExcAlreadyPrepForCoarseAndRef());
+
+  clear();
+
+  const unsigned int n_active_cells = dof_handler->get_triangulation().n_active_cells();
+  n_dofs_old=dof_handler->n_dofs();
+
+  // efficient reallocation of indices_on_cell
+  std::vector<std::vector<types::global_dof_index> > (n_active_cells)
+  .swap(indices_on_cell);
+
+  typename DoFHandlerType::active_cell_iterator cell = dof_handler->begin_active(),
+                                                endc = dof_handler->end();
+
+  for (unsigned int i=0; cell!=endc; ++cell, ++i)
+    {
+      indices_on_cell[i].resize(cell->get_fe().dofs_per_cell);
+      // on each cell store the indices of the
+      // dofs. after refining we get the values
+      // on the children by taking these
+      // indices, getting the respective values
+      // out of the data vectors and prolonging
+      // them to the children
+      cell->get_dof_indices(indices_on_cell[i]);
+      cell_map[std::make_pair(cell->level(),cell->index())]
+        = Pointerstruct(&indices_on_cell[i], cell->active_fe_index());
+    }
+  prepared_for=pure_refinement;
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+void
+SolutionTransfer<dim, VectorType, DoFHandlerType>::refine_interpolate
+(const VectorType &in,
+ VectorType       &out) const
+{
+  Assert(prepared_for==pure_refinement, ExcNotPrepared());
+  Assert(in.size()==n_dofs_old, ExcDimensionMismatch(in.size(),n_dofs_old));
+  Assert(out.size()==dof_handler->n_dofs(),
+         ExcDimensionMismatch(out.size(),dof_handler->n_dofs()));
+  Assert(&in != &out,
+         ExcMessage ("Vectors cannot be used as input and output"
+                     " at the same time!"));
+
+  Vector<typename VectorType::value_type> local_values(0);
+
+  typename DoFHandlerType::cell_iterator cell = dof_handler->begin(),
+                                         endc = dof_handler->end();
+
+  typename std::map<std::pair<unsigned int, unsigned int>, Pointerstruct>::const_iterator
+  pointerstruct,
+  cell_map_end=cell_map.end();
+
+  for (; cell!=endc; ++cell)
+    {
+      pointerstruct=cell_map.find(std::make_pair(cell->level(),cell->index()));
+
+      if (pointerstruct!=cell_map_end)
+        // this cell was refined or not
+        // touched at all, so we can get
+        // the new values by just setting
+        // or interpolating to the children,
+        // which is both done by one
+        // function
+        {
+          const unsigned int this_fe_index = pointerstruct->second.active_fe_index;
+          const unsigned int dofs_per_cell=cell->get_dof_handler().get_fe()[this_fe_index].dofs_per_cell;
+          local_values.reinit(dofs_per_cell, true);
+
+          // make sure that the size of the stored indices is the same as
+          // dofs_per_cell. since we store the desired fe_index, we know
+          // what this size should be
+          Assert(dofs_per_cell==(*pointerstruct->second.indices_ptr).size(),
+                 ExcInternalError());
+          for (unsigned int i=0; i<dofs_per_cell; ++i)
+            local_values(i)=in((*pointerstruct->second.indices_ptr)[i]);
+          cell->set_dof_values_by_interpolation(local_values, out,
+                                                this_fe_index);
+        }
+    }
+}
+
+
+
+namespace internal
+{
+  /**
+   * Generate a table that contains
+   * interpolation matrices between
+   * each combination of finite
+   * elements used in a DoFHandler of
+   * some kind. Since not all
+   * elements can be interpolated
+   * onto each other, the table may
+   * contain empty matrices for those
+   * combinations of elements for
+   * which no such interpolation is
+   * implemented.
+   */
+  template <typename DoFHandlerType>
+  void extract_interpolation_matrices (const DoFHandlerType &,
+                                       dealii::Table<2,FullMatrix<double> > &)
+  {}
+
+  template <int dim, int spacedim>
+  void extract_interpolation_matrices (const dealii::hp::DoFHandler<dim,spacedim> &dof,
+                                       dealii::Table<2,FullMatrix<double> > &matrices)
+  {
+    const dealii::hp::FECollection<dim,spacedim> &fe = dof.get_fe();
+    matrices.reinit (fe.size(), fe.size());
+    for (unsigned int i=0; i<fe.size(); ++i)
+      for (unsigned int j=0; j<fe.size(); ++j)
+        if (i != j)
+          {
+            matrices(i,j).reinit (fe[i].dofs_per_cell, fe[j].dofs_per_cell);
+
+            // see if we can get the interpolation matrices for this
+            // combination of elements. if not, reset the matrix sizes to zero
+            // to indicate that this particular combination isn't
+            // supported. this isn't an outright error right away since we may
+            // never need to actually interpolate between these two elements
+            // on actual cells; we simply have to trigger an error if someone
+            // actually tries
+            try
+              {
+                fe[i].get_interpolation_matrix (fe[j], matrices(i,j));
+              }
+            catch (const typename FiniteElement<dim,spacedim>::ExcInterpolationNotImplemented &)
+              {
+                matrices(i,j).reinit (0,0);
+              }
+          }
+  }
+
+
+  template <int dim, int spacedim>
+  void restriction_additive (const FiniteElement<dim,spacedim> &,
+                             std::vector<std::vector<bool> > &)
+  {}
+
+  template <int dim, int spacedim>
+  void restriction_additive (const dealii::hp::FECollection<dim,spacedim> &fe,
+                             std::vector<std::vector<bool> > &restriction_is_additive)
+  {
+    restriction_is_additive.resize (fe.size());
+    for (unsigned int f=0; f<fe.size(); ++f)
+      {
+        restriction_is_additive[f].resize (fe[f].dofs_per_cell);
+        for (unsigned int i=0; i<fe[f].dofs_per_cell; ++i)
+          restriction_is_additive[f][i] = fe[f].restriction_is_additive(i);
+      }
+  }
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+void
+SolutionTransfer<dim, VectorType, DoFHandlerType>::
+prepare_for_coarsening_and_refinement(const std::vector<VectorType> &all_in)
+{
+  Assert (prepared_for!=pure_refinement, ExcAlreadyPrepForRef());
+  Assert (prepared_for!=coarsening_and_refinement,
+          ExcAlreadyPrepForCoarseAndRef());
+
+  const unsigned int in_size=all_in.size();
+  Assert(in_size!=0,
+         ExcMessage("The array of input vectors you pass to this "
+                    "function has no elements. This is not useful."));
+
+  clear();
+
+  const unsigned int n_active_cells = dof_handler->get_triangulation().n_active_cells();
+  (void)n_active_cells;
+  n_dofs_old = dof_handler->n_dofs();
+
+  for (unsigned int i=0; i<in_size; ++i)
+    {
+      Assert(all_in[i].size()==n_dofs_old,
+             ExcDimensionMismatch(all_in[i].size(),n_dofs_old));
+    }
+
+  // first count the number
+  // of cells that will be coarsened
+  // and that'll stay or be refined
+  unsigned int n_cells_to_coarsen=0;
+  unsigned int n_cells_to_stay_or_refine=0;
+  for (typename DoFHandlerType::active_cell_iterator act_cell = dof_handler->begin_active();
+       act_cell!=dof_handler->end(); ++act_cell)
+    {
+      if (act_cell->coarsen_flag_set())
+        ++n_cells_to_coarsen;
+      else
+        ++n_cells_to_stay_or_refine;
+    }
+  Assert((n_cells_to_coarsen+n_cells_to_stay_or_refine)==n_active_cells,
+         ExcInternalError());
+
+  unsigned int n_coarsen_fathers=0;
+  for (typename DoFHandlerType::cell_iterator cell=dof_handler->begin();
+       cell!=dof_handler->end(); ++cell)
+    if (!cell->active() && cell->child(0)->coarsen_flag_set())
+      ++n_coarsen_fathers;
+  Assert(n_cells_to_coarsen>=2*n_coarsen_fathers, ExcInternalError());
+
+  // allocate the needed memory. initialize
+  // the following arrays in an efficient
+  // way, without copying much
+  std::vector<std::vector<types::global_dof_index> >(n_cells_to_stay_or_refine)
+  .swap(indices_on_cell);
+
+  std::vector<std::vector<Vector<typename VectorType::value_type> > >
+  (n_coarsen_fathers,
+   std::vector<Vector<typename VectorType::value_type> > (in_size))
+  .swap(dof_values_on_cell);
+
+  Table<2,FullMatrix<double> > interpolation_hp;
+  std::vector<std::vector<bool> > restriction_is_additive;
+
+  internal::extract_interpolation_matrices (*dof_handler, interpolation_hp);
+  internal::restriction_additive (dof_handler->get_fe(), restriction_is_additive);
+
+  // we need counters for
+  // the 'to_stay_or_refine' cells 'n_sr' and
+  // the 'coarsen_fathers' cells 'n_cf',
+  unsigned int n_sr=0, n_cf=0;
+  for (typename DoFHandlerType::cell_iterator cell=dof_handler->begin();
+       cell!=dof_handler->end(); ++cell)
+    {
+      // CASE 1: active cell that remains as it is
+      if (cell->active() && !cell->coarsen_flag_set())
+        {
+          const unsigned int dofs_per_cell=cell->get_fe().dofs_per_cell;
+          indices_on_cell[n_sr].resize(dofs_per_cell);
+          // cell will not be coarsened,
+          // so we get away by storing the
+          // dof indices and later
+          // interpolating to the children
+          cell->get_dof_indices(indices_on_cell[n_sr]);
+          cell_map[std::make_pair(cell->level(), cell->index())]
+            = Pointerstruct(&indices_on_cell[n_sr], cell->active_fe_index());
+          ++n_sr;
+        }
+
+      // CASE 2: cell is inactive but will become active
+      else if (cell->has_children() && cell->child(0)->coarsen_flag_set())
+        {
+          // note that if one child has the
+          // coarsen flag, then all should
+          // have if Tria::prepare_* has
+          // worked correctly
+          for (unsigned int i=1; i<cell->n_children(); ++i)
+            Assert(cell->child(i)->coarsen_flag_set(),
+                   ExcMessage("It looks like you didn't call "
+                              "Triangulation::prepare_coarsening_and_refinement before "
+                              "calling the current function. This can't work."));
+
+          // we will need to interpolate from the children of this cell
+          // to the current one. in the hp context, this also means
+          // we need to figure out which finite element space to interpolate
+          // to since that is not implied by the global FE as in the non-hp
+          // case.
+          bool different_fe_on_children = false;
+          for (unsigned int child=1; child<cell->n_children(); ++child)
+            if (cell->child(child)->active_fe_index()
+                != cell->child(0)->active_fe_index())
+              {
+                different_fe_on_children = true;
+                break;
+              }
+
+          // take FE index from the child with most
+          // degrees of freedom locally
+          unsigned int most_general_child = 0;
+          if (different_fe_on_children == true)
+            for (unsigned int child=1; child<cell->n_children(); ++child)
+              if (cell->child(child)->get_fe().dofs_per_cell >
+                  cell->child(most_general_child)->get_fe().dofs_per_cell)
+                most_general_child = child;
+          const unsigned int target_fe_index = cell->child(most_general_child)->active_fe_index();
+
+          const unsigned int dofs_per_cell=cell->get_dof_handler().get_fe()[target_fe_index].dofs_per_cell;
+
+          std::vector<Vector<typename VectorType::value_type> >(in_size,
+                                                                Vector<typename VectorType::value_type>(dofs_per_cell))
+          .swap(dof_values_on_cell[n_cf]);
+
+
+          // store the data of each of the input vectors. get this data
+          // as interpolated onto a finite element space that encompasses
+          // that of all the children. note that cell->get_interpolated_dof_values
+          // already does all of the interpolations between spaces
+          for (unsigned int j=0; j<in_size; ++j)
+            cell->get_interpolated_dof_values(all_in[j],
+                                              dof_values_on_cell[n_cf][j],
+                                              target_fe_index);
+          cell_map[std::make_pair(cell->level(), cell->index())]
+            = Pointerstruct(&dof_values_on_cell[n_cf], target_fe_index);
+          ++n_cf;
+        }
+    }
+  Assert(n_sr==n_cells_to_stay_or_refine, ExcInternalError());
+  Assert(n_cf==n_coarsen_fathers, ExcInternalError());
+
+  prepared_for=coarsening_and_refinement;
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+void
+SolutionTransfer<dim, VectorType, DoFHandlerType>::prepare_for_coarsening_and_refinement
+(const VectorType &in)
+{
+  std::vector<VectorType> all_in=std::vector<VectorType>(1, in);
+  prepare_for_coarsening_and_refinement(all_in);
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+void SolutionTransfer<dim, VectorType, DoFHandlerType>::
+interpolate (const std::vector<VectorType> &all_in,
+             std::vector<VectorType>       &all_out) const
+{
+  Assert(prepared_for==coarsening_and_refinement, ExcNotPrepared());
+  const unsigned int size=all_in.size();
+  Assert(all_out.size()==size, ExcDimensionMismatch(all_out.size(), size));
+  for (unsigned int i=0; i<size; ++i)
+    Assert (all_in[i].size() == n_dofs_old,
+            ExcDimensionMismatch(all_in[i].size(), n_dofs_old));
+  for (unsigned int i=0; i<all_out.size(); ++i)
+    Assert (all_out[i].size() == dof_handler->n_dofs(),
+            ExcDimensionMismatch(all_out[i].size(), dof_handler->n_dofs()));
+  for (unsigned int i=0; i<size; ++i)
+    for (unsigned int j=0; j<size; ++j)
+      Assert(&all_in[i] != &all_out[j],
+             ExcMessage ("Vectors cannot be used as input and output"
+                         " at the same time!"));
+
+  Vector<typename VectorType::value_type> local_values;
+  std::vector<types::global_dof_index> dofs;
+
+  typename std::map<std::pair<unsigned int, unsigned int>, Pointerstruct>::const_iterator
+  pointerstruct,
+  cell_map_end=cell_map.end();
+
+  Table<2,FullMatrix<double> > interpolation_hp;
+  internal::extract_interpolation_matrices (*dof_handler, interpolation_hp);
+  Vector<typename VectorType::value_type> tmp, tmp2;
+
+  typename DoFHandlerType::cell_iterator cell = dof_handler->begin(),
+                                         endc = dof_handler->end();
+  for (; cell!=endc; ++cell)
+    {
+      pointerstruct=cell_map.find(std::make_pair(cell->level(),cell->index()));
+
+      if (pointerstruct!=cell_map_end)
+        {
+          const std::vector<types::global_dof_index> *const indexptr
+            =pointerstruct->second.indices_ptr;
+
+          const std::vector<Vector<typename VectorType::value_type> > *const valuesptr
+            =pointerstruct->second.dof_values_ptr;
+
+          // cell stayed as it was or was refined
+          if (indexptr)
+            {
+              Assert (valuesptr == 0,
+                      ExcInternalError());
+
+              const unsigned int old_fe_index =
+                pointerstruct->second.active_fe_index;
+
+              // get the values of
+              // each of the input
+              // data vectors on this
+              // cell and prolong it
+              // to its children
+              unsigned int in_size = indexptr->size();
+              for (unsigned int j=0; j<size; ++j)
+                {
+                  tmp.reinit (in_size, true);
+                  for (unsigned int i=0; i<in_size; ++i)
+                    tmp(i) = all_in[j]((*indexptr)[i]);
+
+                  cell->set_dof_values_by_interpolation (tmp, all_out[j],
+                                                         old_fe_index);
+                }
+            }
+          else if (valuesptr)
+            // the children of this cell were
+            // deleted
+            {
+              Assert (!cell->has_children(), ExcInternalError());
+              Assert (indexptr == 0,
+                      ExcInternalError());
+
+              const unsigned int dofs_per_cell = cell->get_fe().dofs_per_cell;
+              dofs.resize(dofs_per_cell);
+              // get the local
+              // indices
+              cell->get_dof_indices(dofs);
+
+              // distribute the
+              // stored data to the
+              // new vectors
+              for (unsigned int j=0; j<size; ++j)
+                {
+                  // make sure that the size of
+                  // the stored indices is the
+                  // same as
+                  // dofs_per_cell. this is
+                  // kind of a test if we use
+                  // the same fe in the hp
+                  // case. to really do that
+                  // test we would have to
+                  // store the fe_index of all
+                  // cells
+                  const Vector<typename VectorType::value_type> *data = 0;
+                  const unsigned int active_fe_index = cell->active_fe_index();
+                  if (active_fe_index != pointerstruct->second.active_fe_index)
+                    {
+                      const unsigned int old_index = pointerstruct->second.active_fe_index;
+                      tmp.reinit (dofs_per_cell, true);
+                      AssertDimension ((*valuesptr)[j].size(),
+                                       interpolation_hp(active_fe_index,old_index).n());
+                      AssertDimension (tmp.size(),
+                                       interpolation_hp(active_fe_index,old_index).m());
+                      interpolation_hp(active_fe_index,old_index).vmult (tmp, (*valuesptr)[j]);
+                      data = &tmp;
+                    }
+                  else
+                    data = &(*valuesptr)[j];
+
+
+                  for (unsigned int i=0; i<dofs_per_cell; ++i)
+                    all_out[j](dofs[i])=(*data)(i);
+                }
+            }
+          // undefined status
+          else
+            Assert(false, ExcInternalError());
+        }
+    }
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+void SolutionTransfer<dim, VectorType, DoFHandlerType>::interpolate
+(const VectorType &in,
+ VectorType       &out) const
+{
+  Assert (in.size()==n_dofs_old,
+          ExcDimensionMismatch(in.size(), n_dofs_old));
+  Assert (out.size()==dof_handler->n_dofs(),
+          ExcDimensionMismatch(out.size(), dof_handler->n_dofs()));
+
+  std::vector<VectorType> all_in(1);
+  all_in[0] = in;
+  std::vector<VectorType> all_out(1);
+  all_out[0] = out;
+  interpolate(all_in,
+              all_out);
+  out=all_out[0];
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+std::size_t
+SolutionTransfer<dim, VectorType, DoFHandlerType>::memory_consumption () const
+{
+  // at the moment we do not include the memory
+  // consumption of the cell_map as we have no
+  // real idea about memory consumption of a
+  // std::map
+  return (MemoryConsumption::memory_consumption (dof_handler) +
+          MemoryConsumption::memory_consumption (n_dofs_old) +
+          sizeof (prepared_for) +
+          MemoryConsumption::memory_consumption (indices_on_cell) +
+          MemoryConsumption::memory_consumption (dof_values_on_cell));
+}
+
+
+
+template<int dim, typename VectorType, typename DoFHandlerType>
+std::size_t
+SolutionTransfer<dim, VectorType, DoFHandlerType>::Pointerstruct::memory_consumption () const
+{
+  return sizeof(*this);
+}
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#define SPLIT_INSTANTIATIONS_COUNT 4
+#ifndef SPLIT_INSTANTIATIONS_INDEX
+#define SPLIT_INSTANTIATIONS_INDEX 0
+#endif
+#include "solution_transfer.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/solution_transfer.inst.in b/source/numerics/solution_transfer.inst.in
new file mode 100644
index 0000000..da017a3
--- /dev/null
+++ b/source/numerics/solution_transfer.inst.in
@@ -0,0 +1,26 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+{
+#if deal_II_dimension <= deal_II_space_dimension
+    template class SolutionTransfer<deal_II_dimension, VEC, DoFHandler<deal_II_dimension, deal_II_space_dimension> >;
+    template class SolutionTransfer<deal_II_dimension, VEC, hp::DoFHandler<deal_II_dimension, deal_II_space_dimension> >;
+#endif
+}
+
+
+
diff --git a/source/numerics/solution_transfer_inst2.cc b/source/numerics/solution_transfer_inst2.cc
new file mode 100644
index 0000000..4ae3565
--- /dev/null
+++ b/source/numerics/solution_transfer_inst2.cc
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// This file compiles the second quarter of the instantiations from solution_transfer.cc
+// to reduce the compilation unit (and memory consumption)
+
+#define SPLIT_INSTANTIATIONS_INDEX 1
+#include "solution_transfer.cc"
diff --git a/source/numerics/solution_transfer_inst3.cc b/source/numerics/solution_transfer_inst3.cc
new file mode 100644
index 0000000..a0c437a
--- /dev/null
+++ b/source/numerics/solution_transfer_inst3.cc
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// This file compiles the third quarter of the instantiations from solution_transfer.cc
+// to reduce the compilation unit (and memory consumption)
+
+#define SPLIT_INSTANTIATIONS_INDEX 2
+#include "solution_transfer.cc"
diff --git a/source/numerics/solution_transfer_inst4.cc b/source/numerics/solution_transfer_inst4.cc
new file mode 100644
index 0000000..2195bd6
--- /dev/null
+++ b/source/numerics/solution_transfer_inst4.cc
@@ -0,0 +1,20 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// This file compiles the last quarter of the instantiations from solution_transfer.cc
+// to reduce the compilation unit (and memory consumption)
+
+#define SPLIT_INSTANTIATIONS_INDEX 3
+#include "solution_transfer.cc"
diff --git a/source/numerics/time_dependent.cc b/source/numerics/time_dependent.cc
new file mode 100644
index 0000000..c385300
--- /dev/null
+++ b/source/numerics/time_dependent.cc
@@ -0,0 +1,1242 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1999 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+#include <deal.II/numerics/time_dependent.h>
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/parallel.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_refinement.h>
+#include <deal.II/lac/vector.h>
+
+#include <functional>
+#include <algorithm>
+#include <numeric>
+
+DEAL_II_NAMESPACE_OPEN
+
+TimeDependent::TimeSteppingData::TimeSteppingData (const unsigned int look_ahead,
+                                                   const unsigned int look_back)
+  :
+  look_ahead (look_ahead),
+  look_back (look_back)
+{}
+
+
+TimeDependent::TimeDependent (const TimeSteppingData &data_primal,
+                              const TimeSteppingData &data_dual,
+                              const TimeSteppingData &data_postprocess):
+  sweep_no (numbers::invalid_unsigned_int),
+  timestepping_data_primal (data_primal),
+  timestepping_data_dual (data_dual),
+  timestepping_data_postprocess (data_postprocess)
+{}
+
+
+TimeDependent::~TimeDependent ()
+{
+  while (timesteps.size() != 0)
+    delete_timestep (0);
+}
+
+
+void
+TimeDependent::insert_timestep (const TimeStepBase *position,
+                                TimeStepBase       *new_timestep)
+{
+  Assert ((std::find(timesteps.begin(), timesteps.end(), position) != timesteps.end()) ||
+          (position == 0),
+          ExcInvalidPosition());
+  // first insert the new time step
+  // into the doubly linked list
+  // of timesteps
+  if (position == 0)
+    {
+      // at the end
+      new_timestep->set_next_timestep (0);
+      if (timesteps.size() > 0)
+        {
+          timesteps.back()->set_next_timestep (new_timestep);
+          new_timestep->set_previous_timestep (timesteps.back());
+        }
+      else
+        new_timestep->set_previous_timestep (0);
+    }
+  else if (position == timesteps[0])
+    {
+      // at the beginning
+      new_timestep->set_previous_timestep (0);
+      if (timesteps.size() > 0)
+        {
+          timesteps[0]->set_previous_timestep (new_timestep);
+          new_timestep->set_next_timestep (timesteps[0]);
+        }
+      else
+        new_timestep->set_next_timestep (0);
+    }
+  else
+    {
+      // inner time step
+      std::vector<SmartPointer<TimeStepBase,TimeDependent> >::iterator insert_position
+        = std::find(timesteps.begin(), timesteps.end(), position);
+
+      (*(insert_position-1))->set_next_timestep (new_timestep);
+      new_timestep->set_previous_timestep (*(insert_position-1));
+      new_timestep->set_next_timestep (*insert_position);
+      (*insert_position)->set_previous_timestep (new_timestep);
+    };
+
+  // finally enter it into the
+  // array
+  timesteps.insert ((position == 0 ?
+                     timesteps.end() :
+                     std::find(timesteps.begin(), timesteps.end(), position)),
+                    new_timestep);
+}
+
+
+void
+TimeDependent::add_timestep (TimeStepBase *new_timestep)
+{
+  insert_timestep (0, new_timestep);
+}
+
+
+void TimeDependent::delete_timestep (const unsigned int position)
+{
+  Assert (position<timesteps.size(),
+          ExcInvalidPosition());
+
+  // Remember time step object for
+  // later deletion and unlock
+  // SmartPointer
+  TimeStepBase *t = timesteps[position];
+  timesteps[position] = 0;
+  // Now delete unsubscribed object
+  delete t;
+
+  timesteps.erase (timesteps.begin() + position);
+
+  // reset "next" pointer of previous
+  // time step if possible
+  //
+  // note that if now position==size,
+  // then we deleted the last time step
+  if (position != 0)
+    timesteps[position-1]->set_next_timestep ((position<timesteps.size()) ?
+                                              timesteps[position] :
+                                              /*null*/SmartPointer<TimeStepBase,TimeDependent>());
+
+  // same for "previous" pointer of next
+  // time step
+  if (position<timesteps.size())
+    timesteps[position]->set_previous_timestep ((position!=0) ?
+                                                timesteps[position-1] :
+                                                /*null*/SmartPointer<TimeStepBase,TimeDependent>());
+}
+
+
+void
+TimeDependent::solve_primal_problem ()
+{
+  do_loop (std::mem_fun(&TimeStepBase::init_for_primal_problem),
+           std::mem_fun(&TimeStepBase::solve_primal_problem),
+           timestepping_data_primal,
+           forward);
+}
+
+
+void
+TimeDependent::solve_dual_problem ()
+{
+  do_loop (std::mem_fun(&TimeStepBase::init_for_dual_problem),
+           std::mem_fun(&TimeStepBase::solve_dual_problem),
+           timestepping_data_dual,
+           backward);
+}
+
+
+void
+TimeDependent::postprocess ()
+{
+  do_loop (std::mem_fun(&TimeStepBase::init_for_postprocessing),
+           std::mem_fun(&TimeStepBase::postprocess_timestep),
+           timestepping_data_postprocess,
+           forward);
+}
+
+
+
+void TimeDependent::start_sweep (const unsigned int s)
+{
+  sweep_no = s;
+
+  // reset the number each
+  // time step has, since some time
+  // steps might have been added since
+  // the last time we visited them
+  //
+  // also set the sweep we will
+  // process in the sequel
+  for (unsigned int step=0; step<timesteps.size(); ++step)
+    {
+      timesteps[step]->set_timestep_no (step);
+      timesteps[step]->set_sweep_no (sweep_no);
+    };
+
+  for (unsigned int step=0; step<timesteps.size(); ++step)
+    timesteps[step]->start_sweep ();
+}
+
+
+
+void TimeDependent::end_sweep ()
+{
+  void (TimeDependent::*p) (const unsigned int, const unsigned int)
+    = &TimeDependent::end_sweep;
+  parallel::apply_to_subranges (0U, timesteps.size(),
+                                std_cxx11::bind (p, this, std_cxx11::_1, std_cxx11::_2),
+                                1);
+}
+
+
+
+void TimeDependent::end_sweep (const unsigned int begin,
+                               const unsigned int end)
+{
+  for (unsigned int step=begin; step<end; ++step)
+    timesteps[step]->end_sweep ();
+}
+
+
+
+std::size_t
+TimeDependent::memory_consumption () const
+{
+  std::size_t mem = (MemoryConsumption::memory_consumption (timesteps) +
+                     MemoryConsumption::memory_consumption (sweep_no) +
+                     sizeof(timestepping_data_primal) +
+                     sizeof(timestepping_data_dual) +
+                     sizeof(timestepping_data_postprocess));
+  for (unsigned int i=0; i<timesteps.size(); ++i)
+    mem += MemoryConsumption::memory_consumption (*timesteps[i]);
+
+  return mem;
+}
+
+
+
+/* --------------------------------------------------------------------- */
+
+
+TimeStepBase::TimeStepBase (const double time) :
+  previous_timestep(0),
+  next_timestep (0),
+  sweep_no (numbers::invalid_unsigned_int),
+  timestep_no (numbers::invalid_unsigned_int),
+  time (time)
+{}
+
+
+
+TimeStepBase::~TimeStepBase ()
+{}
+
+
+
+void
+TimeStepBase::wake_up (const unsigned int )
+{}
+
+
+
+void
+TimeStepBase::sleep (const unsigned)
+{}
+
+
+
+void
+TimeStepBase::start_sweep ()
+{}
+
+
+
+void
+TimeStepBase::end_sweep ()
+{}
+
+
+
+void
+TimeStepBase::init_for_primal_problem ()
+{
+  next_action = primal_problem;
+}
+
+
+
+void
+TimeStepBase::init_for_dual_problem ()
+{
+  next_action = dual_problem;
+}
+
+
+
+void
+TimeStepBase::init_for_postprocessing ()
+{
+  next_action = postprocess;
+}
+
+
+
+void
+TimeStepBase::solve_dual_problem ()
+{
+  Assert (false, ExcPureFunctionCalled());
+}
+
+
+
+void
+TimeStepBase::postprocess_timestep ()
+{
+  Assert (false, ExcPureFunctionCalled());
+}
+
+
+
+double
+TimeStepBase::get_time () const
+{
+  return time;
+}
+
+
+
+unsigned int
+TimeStepBase::get_timestep_no () const
+{
+  return timestep_no;
+}
+
+
+
+double
+TimeStepBase::get_backward_timestep () const
+{
+  Assert (previous_timestep != 0,
+          ExcMessage("The backward time step cannot be computed because "
+                     "there is no previous time step."));
+  return time - previous_timestep->time;
+}
+
+
+
+double
+TimeStepBase::get_forward_timestep () const
+{
+  Assert (next_timestep != 0,
+          ExcMessage("The forward time step cannot be computed because "
+                     "there is no next time step."));
+  return next_timestep->time - time;
+}
+
+
+
+void
+TimeStepBase::set_previous_timestep (const TimeStepBase *previous)
+{
+  previous_timestep = previous;
+}
+
+
+
+void
+TimeStepBase::set_next_timestep (const TimeStepBase *next)
+{
+  next_timestep     = next;
+}
+
+
+
+void
+TimeStepBase::set_timestep_no (const unsigned int step_no)
+{
+  timestep_no = step_no;
+}
+
+
+
+void
+TimeStepBase::set_sweep_no (const unsigned int sweep)
+{
+  sweep_no = sweep;
+}
+
+
+
+std::size_t
+TimeStepBase::memory_consumption () const
+{
+  // only simple data types
+  return sizeof(*this);
+}
+
+
+
+template <int dim>
+TimeStepBase_Tria<dim>::TimeStepBase_Tria() :
+  TimeStepBase (0),
+  tria (0, typeid(*this).name()),
+  coarse_grid (0, typeid(*this).name()),
+  flags (),
+  refinement_flags(0)
+{
+  Assert (false, ExcPureFunctionCalled());
+}
+
+
+
+template <int dim>
+TimeStepBase_Tria<dim>::TimeStepBase_Tria (const double              time,
+                                           const Triangulation<dim> &coarse_grid,
+                                           const Flags              &flags,
+                                           const RefinementFlags    &refinement_flags) :
+  TimeStepBase (time),
+  tria(0, typeid(*this).name()),
+  coarse_grid (&coarse_grid, typeid(*this).name()),
+  flags (flags),
+  refinement_flags (refinement_flags)
+{}
+
+
+
+template <int dim>
+TimeStepBase_Tria<dim>::~TimeStepBase_Tria ()
+{
+  if (!flags.delete_and_rebuild_tria)
+    {
+      Triangulation<dim> *t = tria;
+      tria = 0;
+      delete t;
+    }
+  else
+    Assert (tria==0, ExcInternalError());
+
+  coarse_grid = 0;
+}
+
+
+
+template <int dim>
+void
+TimeStepBase_Tria<dim>::wake_up (const unsigned int wakeup_level)
+{
+  TimeStepBase::wake_up (wakeup_level);
+
+  if (wakeup_level == flags.wakeup_level_to_build_grid)
+    if (flags.delete_and_rebuild_tria || !tria)
+      restore_grid ();
+}
+
+
+
+template <int dim>
+void
+TimeStepBase_Tria<dim>::sleep (const unsigned int sleep_level)
+{
+  if (sleep_level == flags.sleep_level_to_delete_grid)
+    {
+      Assert (tria!=0, ExcInternalError());
+
+      if (flags.delete_and_rebuild_tria)
+        {
+          Triangulation<dim> *t = tria;
+          tria = 0;
+          delete t;
+        }
+    }
+
+  TimeStepBase::sleep (sleep_level);
+}
+
+
+
+template <int dim>
+void TimeStepBase_Tria<dim>::save_refine_flags ()
+{
+  // for any of the non-initial grids
+  // store the refinement flags
+  refine_flags.push_back (std::vector<bool>());
+  coarsen_flags.push_back (std::vector<bool>());
+  tria->save_refine_flags (refine_flags.back());
+  tria->save_coarsen_flags (coarsen_flags.back());
+}
+
+
+
+template <int dim>
+void TimeStepBase_Tria<dim>::restore_grid ()
+{
+  Assert (tria == 0, ExcGridNotDeleted());
+  Assert (refine_flags.size() == coarsen_flags.size(),
+          ExcInternalError());
+
+  // create a virgin triangulation and
+  // set it to a copy of the coarse grid
+  tria = new Triangulation<dim> ();
+  tria->copy_triangulation (*coarse_grid);
+
+  // for each of the previous refinement
+  // sweeps
+  for (unsigned int previous_sweep=0; previous_sweep<refine_flags.size();
+       ++previous_sweep)
+    {
+      // get flags
+      tria->load_refine_flags  (refine_flags[previous_sweep]);
+      tria->load_coarsen_flags (coarsen_flags[previous_sweep]);
+
+      // limit refinement depth if the user
+      // desired so
+//       if (flags.max_refinement_level != 0)
+//      {
+//        typename Triangulation<dim>::active_cell_iterator cell, endc;
+//        for (cell = tria->begin_active(),
+//             endc = tria->end();
+//             cell!=endc; ++cell)
+//          if (static_cast<unsigned int>(cell->level()) >=
+//              flags.max_refinement_level)
+//            cell->clear_refine_flag();
+//      };
+
+      tria->execute_coarsening_and_refinement ();
+    };
+}
+
+
+
+// have a few helper functions
+namespace
+{
+  template <int dim>
+  void
+  mirror_refinement_flags (const typename Triangulation<dim>::cell_iterator &new_cell,
+                           const typename Triangulation<dim>::cell_iterator &old_cell)
+  {
+    // mirror the refinement
+    // flags from the present time level to
+    // the previous if the dual problem was
+    // used for the refinement, since the
+    // error is computed on a time-space cell
+    //
+    // we don't mirror the coarsening flags
+    // since we want stronger refinement. if
+    // this was the wrong decision, the error
+    // on the child cells of the previous
+    // time slab will indicate coarsening
+    // in the next iteration, so this is not
+    // so dangerous here.
+    //
+    // also, we only have to check whether
+    // the present cell flagged for
+    // refinement and the previous one is on
+    // the same level and also active. If it
+    // already has children, then there is
+    // no problem at all, if it is on a lower
+    // level than the present one, then it
+    // will be refined below anyway.
+    if (new_cell->active())
+      {
+        if (new_cell->refine_flag_set() && old_cell->active())
+          {
+            if (old_cell->coarsen_flag_set())
+              old_cell->clear_coarsen_flag();
+
+            old_cell->set_refine_flag();
+          };
+
+        return;
+      };
+
+    if (old_cell->has_children() && new_cell->has_children())
+      {
+        Assert(old_cell->n_children()==new_cell->n_children(), ExcNotImplemented());
+        for (unsigned int c=0; c<new_cell->n_children(); ++c)
+          dealii::mirror_refinement_flags<dim> (new_cell->child(c), old_cell->child(c));
+      }
+  }
+
+
+
+  template <int dim>
+  bool
+  adapt_grid_cells (const typename Triangulation<dim>::cell_iterator &cell1,
+                    const typename Triangulation<dim>::cell_iterator &cell2)
+  {
+
+    if (cell2->has_children() && cell1->has_children())
+      {
+        bool grids_changed = false;
+
+        Assert(cell2->n_children()==cell1->n_children(), ExcNotImplemented());
+        for (unsigned int c=0; c<cell1->n_children(); ++c)
+          grids_changed |= dealii::adapt_grid_cells<dim> (cell1->child(c),
+                                                          cell2->child(c));
+        return grids_changed;
+      };
+
+
+    if (!cell1->has_children() && !cell2->has_children())
+      // none of the two have children, so
+      // make sure that not one is flagged
+      // for refinement and the other for
+      // coarsening
+      {
+        if (cell1->refine_flag_set() && cell2->coarsen_flag_set())
+          {
+            cell2->clear_coarsen_flag();
+            return true;
+          }
+        else if (cell1->coarsen_flag_set() && cell2->refine_flag_set())
+          {
+            cell1->clear_coarsen_flag();
+            return true;
+          };
+
+        return false;
+      };
+
+
+    if (cell1->has_children() && !cell2->has_children())
+      // cell1 has children, cell2 has not
+      // -> cell2 needs to be refined if any
+      // of cell1's children is flagged
+      // for refinement. None of them should
+      // be refined further, since then in the
+      // last round something must have gone
+      // wrong
+      //
+      // if cell2 was flagged for coarsening,
+      // we need to clear that flag in any
+      // case. The only exception would be
+      // if all children of cell1 were
+      // flagged for coarsening, but rules
+      // for coarsening are so complicated
+      // that we will not attempt to cover
+      // them. Rather accept one cell which
+      // is not coarsened...
+      {
+        bool changed_grid = false;
+        if (cell2->coarsen_flag_set())
+          {
+            cell2->clear_coarsen_flag();
+            changed_grid = true;
+          };
+
+        if (!cell2->refine_flag_set())
+          for (unsigned int c=0; c<cell1->n_children(); ++c)
+            if (cell1->child(c)->refine_flag_set() ||
+                cell1->child(c)->has_children())
+              {
+                cell2->set_refine_flag();
+                changed_grid = true;
+                break;
+              };
+        return changed_grid;
+      };
+
+    if (!cell1->has_children() && cell2->has_children())
+      // same thing, other way round...
+      {
+        bool changed_grid = false;
+        if (cell1->coarsen_flag_set())
+          {
+            cell1->clear_coarsen_flag();
+            changed_grid = true;
+          };
+
+        if (!cell1->refine_flag_set())
+          for (unsigned int c=0; c<cell2->n_children(); ++c)
+            if (cell2->child(c)->refine_flag_set() ||
+                cell2->child(c)->has_children())
+              {
+                cell1->set_refine_flag();
+                changed_grid = true;
+                break;
+              };
+        return changed_grid;
+      };
+
+    Assert (false, ExcInternalError());
+    return false;
+  }
+
+
+
+  template <int dim>
+  bool
+  adapt_grids (Triangulation<dim> &tria1,
+               Triangulation<dim> &tria2)
+  {
+    bool grids_changed = false;
+
+    typename Triangulation<dim>::cell_iterator cell1 = tria1.begin(),
+                                               cell2 = tria2.begin();
+    typename Triangulation<dim>::cell_iterator endc;
+    endc = (tria1.n_levels() == 1 ?
+            typename Triangulation<dim>::cell_iterator(tria1.end()) :
+            tria1.begin(1));
+    for (; cell1!=endc; ++cell1, ++cell2)
+      grids_changed |= dealii::adapt_grid_cells<dim> (cell1, cell2);
+
+    return grids_changed;
+  }
+}
+
+
+template <int dim>
+void TimeStepBase_Tria<dim>::refine_grid (const RefinementData refinement_data)
+{
+  Vector<float> criteria;
+  get_tria_refinement_criteria (criteria);
+
+  // copy the following two values since
+  // we may need modified values in the
+  // process of this function
+  double refinement_threshold = refinement_data.refinement_threshold,
+         coarsening_threshold = refinement_data.coarsening_threshold;
+
+  // prepare an array where the criteria
+  // are stored in a sorted fashion
+  // we need this if cell number correction
+  // is switched on.
+  // the criteria are sorted in ascending
+  // order
+  // only fill it when needed
+  Vector<float> sorted_criteria;
+  // two pointers into this array denoting
+  // the position where the two thresholds
+  // are assumed
+  Vector<float>::const_iterator p_refinement_threshold=0,
+                                p_coarsening_threshold=0;
+
+
+  // if we are to do some cell number
+  // correction steps, we have to find out
+  // which further cells (beyond
+  // refinement_threshold) to refine in case
+  // we need more cells, and which cells
+  // to not refine in case we need less cells
+  // (or even to coarsen, if necessary). to
+  // this end, we first define pointers into
+  // a sorted array of criteria pointing
+  // to the thresholds of refinement or
+  // coarsening; moving these pointers amounts
+  // to changing the threshold such that the
+  // number of cells flagged for refinement
+  // or coarsening would be changed by one
+  if ((timestep_no != 0) &&
+      (sweep_no>=refinement_flags.first_sweep_with_correction) &&
+      (refinement_flags.cell_number_correction_steps > 0))
+    {
+      sorted_criteria = criteria;
+      std::sort (sorted_criteria.begin(),
+                 sorted_criteria.end());
+      p_refinement_threshold = Utilities::lower_bound (sorted_criteria.begin(),
+                                                       sorted_criteria.end(),
+                                                       static_cast<float>(refinement_threshold));
+      p_coarsening_threshold = std::upper_bound (sorted_criteria.begin(),
+                                                 sorted_criteria.end(),
+                                                 static_cast<float>(coarsening_threshold));
+    };
+
+
+  // actually flag cells the first time
+  GridRefinement::refine (*tria, criteria, refinement_threshold);
+  GridRefinement::coarsen (*tria, criteria, coarsening_threshold);
+
+  // store this number for the following
+  // since its computation is rather
+  // expensive and since it doesn't change
+  const unsigned int n_active_cells = tria->n_active_cells ();
+
+  // if not on first time level: try to
+  // adjust the number of resulting
+  // cells to those on the previous
+  // time level. Only do the cell number
+  // correction for higher sweeps and if
+  // there are sufficiently many cells
+  // already to avoid "grid stall" i.e.
+  // that the grid's evolution is hindered
+  // by the correction (this usually
+  // happens if there are very few cells,
+  // since then the number of cells touched
+  // by the correction step may exceed the
+  // number of cells which are flagged for
+  // refinement; in this case it often
+  // happens that the number of cells
+  // does not grow between sweeps, which
+  // clearly is not the wanted behaviour)
+  //
+  // however, if we do not do anything, we
+  // can get into trouble somewhen later.
+  // therefore, we also use the correction
+  // step for the first sweep or if the
+  // number of cells is between 100 and 300
+  // (unlike in the first version of the
+  // algorithm), but relax the conditions
+  // for the correction to allow deviations
+  // which are three times as high than
+  // allowed (sweep==1 || cell number<200)
+  // or twice as high (sweep==2 ||
+  // cell number<300). Also, since
+  // refinement never does any harm other
+  // than increased work, we allow for
+  // arbitrary growth of cell number if
+  // the estimated cell number is below
+  // 200.
+  //
+  // repeat this loop several times since
+  // the first estimate may not be totally
+  // correct
+  if ((timestep_no != 0) && (sweep_no>=refinement_flags.first_sweep_with_correction))
+    for (unsigned int loop=0;
+         loop<refinement_flags.cell_number_correction_steps; ++loop)
+      {
+        Triangulation<dim> *previous_tria
+          = dynamic_cast<const TimeStepBase_Tria<dim>*>(previous_timestep)->tria;
+
+        // do one adaption step if desired
+        // (there are more coming below then
+        // also)
+        if (refinement_flags.adapt_grids)
+          dealii::adapt_grids<dim> (*previous_tria, *tria);
+
+        // perform flagging of cells
+        // needed to regularize the
+        // triangulation
+        tria->prepare_coarsening_and_refinement ();
+        previous_tria->prepare_coarsening_and_refinement ();
+
+
+        // now count the number of elements
+        // which will result on the previous
+        // grid after it will be refined. The
+        // number which will really result
+        // should be approximately that that we
+        // compute here, since we already
+        // performed most of the prepare*
+        // steps for the previous grid
+        //
+        // use a double value since for each
+        // four cells (in 2D) that we flagged
+        // for coarsening we result in one
+        // new. but since we loop over flagged
+        // cells, we have to subtract 3/4 of
+        // a cell for each flagged cell
+        Assert(!tria->get_anisotropic_refinement_flag(), ExcNotImplemented());
+        Assert(!previous_tria->get_anisotropic_refinement_flag(), ExcNotImplemented());
+        double previous_cells = previous_tria->n_active_cells();
+        typename Triangulation<dim>::active_cell_iterator cell, endc;
+        cell = previous_tria->begin_active();
+        endc = previous_tria->end();
+        for (; cell!=endc; ++cell)
+          if (cell->refine_flag_set())
+            previous_cells += (GeometryInfo<dim>::max_children_per_cell-1);
+          else if (cell->coarsen_flag_set())
+            previous_cells -= (GeometryInfo<dim>::max_children_per_cell-1) /
+                              GeometryInfo<dim>::max_children_per_cell;
+
+        // @p{previous_cells} now gives the
+        // number of cells which would result
+        // from the flags on the previous grid
+        // if we refined it now. However, some
+        // more flags will be set when we adapt
+        // the previous grid with this one
+        // after the flags have been set for
+        // this time level; on the other hand,
+        // we don't account for this, since the
+        // number of cells on this time level
+        // will be changed afterwards by the
+        // same way, when it is adapted to the
+        // next time level
+
+        // now estimate the number of cells which
+        // will result on this level
+        double estimated_cells = n_active_cells;
+        cell = tria->begin_active();
+        endc = tria->end();
+        for (; cell!=endc; ++cell)
+          if (cell->refine_flag_set())
+            estimated_cells += (GeometryInfo<dim>::max_children_per_cell-1);
+          else if (cell->coarsen_flag_set())
+            estimated_cells -= (GeometryInfo<dim>::max_children_per_cell-1) /
+                               GeometryInfo<dim>::max_children_per_cell;
+
+        // calculate the allowed delta in
+        // cell numbers; be more lenient
+        // if there are few cells
+        double delta_up = refinement_flags.cell_number_corridor_top,
+               delta_down = refinement_flags.cell_number_corridor_bottom;
+
+        const std::vector<std::pair<unsigned int,double> > &relaxations
+          = (sweep_no >= refinement_flags.correction_relaxations.size() ?
+             refinement_flags.correction_relaxations.back() :
+             refinement_flags.correction_relaxations[sweep_no]);
+        for (unsigned int r=0; r!=relaxations.size(); ++r)
+          if (n_active_cells < relaxations[r].first)
+            {
+              delta_up   *= relaxations[r].second;
+              delta_down *= relaxations[r].second;
+              break;
+            };
+
+        // now, if the number of estimated
+        // cells exceeds the number of cells
+        // on the old time level by more than
+        // delta: cut the top threshold
+        //
+        // note that for each cell that
+        // we unflag we have to diminish the
+        // estimated number of cells by
+        // @p{children_per_cell}.
+        if (estimated_cells > previous_cells*(1.+delta_up))
+          {
+            // only limit the cell number
+            // if there will not be less
+            // than some number of cells
+            //
+            // also note that when using the
+            // dual estimator, the initial
+            // time level is not refined
+            // on its own, so we may not
+            // limit the number of the second
+            // time level on the basis of
+            // the initial one; since for
+            // the dual estimator, we
+            // mirror the refinement
+            // flags, the initial level
+            // will be passively refined
+            // later on.
+            if (estimated_cells>refinement_flags.min_cells_for_correction)
+              {
+                // number of cells by which the
+                // new grid is to be diminished
+                double delta_cells = estimated_cells -
+                                     previous_cells*(1.+delta_up);
+
+                // if we need to reduce the
+                // number of cells, we need
+                // to raise the thresholds,
+                // i.e. move ahead in the
+                // sorted array, since this
+                // is sorted in ascending
+                // order. do so by removing
+                // cells tagged for refinement
+
+                for (unsigned int i=0; i<delta_cells;
+                     i += GeometryInfo<dim>::max_children_per_cell-1)
+                  if (p_refinement_threshold != sorted_criteria.end())
+                    ++p_refinement_threshold;
+                  else
+                    break;
+              }
+            else
+              // too many cells, but we
+              // won't do anything about
+              // that
+              break;
+          }
+        else
+          // likewise: if the estimated number
+          // of cells is less than 90 per cent
+          // of those at the previous time level:
+          // raise threshold by refining
+          // additional cells. if we start to
+          // run into the area of cells
+          // which are to be coarsened, we
+          // raise the limit for these too
+          if (estimated_cells < previous_cells*(1.-delta_down))
+            {
+              // number of cells by which the
+              // new grid is to be enlarged
+              double delta_cells = previous_cells*(1.-delta_down)-estimated_cells;
+              // heuristics: usually, if we
+              // add @p{delta_cells} to the
+              // present state, we end up
+              // with much more than only
+              // (1-delta_down)*prev_cells
+              // because of the effect of
+              // regularization and because
+              // of adaption to the
+              // following grid. Therefore,
+              // if we are not in the last
+              // correction loop, we try not
+              // to add as many cells as seem
+              // necessary at first and hope
+              // to get closer to the limit
+              // this way. Only in the last
+              // loop do we have to take the
+              // full number to guarantee the
+              // wanted result.
+              //
+              // The value 0.9 is taken from
+              // practice, as the additional
+              // number of cells introduced
+              // by regularization is
+              // approximately 10 per cent
+              // of the flagged cells.
+              if (loop != refinement_flags.cell_number_correction_steps-1)
+                delta_cells *= 0.9;
+
+              // if more cells need to be
+              // refined, we need to lower
+              // the thresholds, i.e. to
+              // move to the beginning
+              // of sorted_criteria, which is
+              // sorted in ascending order
+              for (unsigned int i=0; i<delta_cells;
+                   i += (GeometryInfo<dim>::max_children_per_cell-1))
+                if (p_refinement_threshold != p_coarsening_threshold)
+                  --refinement_threshold;
+                else if (p_coarsening_threshold != sorted_criteria.begin())
+                  --p_coarsening_threshold, --p_refinement_threshold;
+                else
+                  break;
+            }
+          else
+            // estimated cell number is ok,
+            // stop correction steps
+            break;
+
+        if (p_refinement_threshold == sorted_criteria.end())
+          {
+            Assert (p_coarsening_threshold != p_refinement_threshold,
+                    ExcInternalError());
+            --p_refinement_threshold;
+          };
+
+        coarsening_threshold = *p_coarsening_threshold;
+        refinement_threshold = *p_refinement_threshold;
+
+        if (coarsening_threshold>=refinement_threshold)
+          coarsening_threshold = 0.999*refinement_threshold;
+
+        // now that we have re-adjusted
+        // thresholds: clear all refine and
+        // coarsening flags and do it all
+        // over again
+        cell = tria->begin_active();
+        endc  = tria->end();
+        for (; cell!=endc; ++cell)
+          {
+            cell->clear_refine_flag ();
+            cell->clear_coarsen_flag ();
+          };
+
+
+        // flag cells finally
+        GridRefinement::refine (*tria, criteria, refinement_threshold);
+        GridRefinement::coarsen (*tria, criteria, coarsening_threshold);
+      };
+
+  // if step number is greater than
+  // one: adapt this and the previous
+  // grid to each other. Don't do so
+  // for the initial grid because
+  // it is always taken to be the first
+  // grid and needs therefore no
+  // treatment of its own.
+  if ((timestep_no >= 1) && (refinement_flags.adapt_grids))
+    {
+      Triangulation<dim> *previous_tria
+        = dynamic_cast<const TimeStepBase_Tria<dim>*>(previous_timestep)->tria;
+
+
+      // if we used the dual estimator, we
+      // computed the error information on
+      // a time slab, rather than on a level
+      // of its own. we then mirror the
+      // refinement flags we determined for
+      // the present level to the previous
+      // one
+      //
+      // do this mirroring only, if cell number
+      // adjustment is on, since otherwise
+      // strange things may happen
+      if (refinement_flags.mirror_flags_to_previous_grid)
+        {
+          dealii::adapt_grids<dim> (*previous_tria, *tria);
+
+          typename Triangulation<dim>::cell_iterator old_cell, new_cell, endc;
+          old_cell = previous_tria->begin(0);
+          new_cell = tria->begin(0);
+          endc     = tria->end(0);
+          for (; new_cell!=endc; ++new_cell, ++old_cell)
+            dealii::mirror_refinement_flags<dim> (new_cell, old_cell);
+        };
+
+      tria->prepare_coarsening_and_refinement ();
+      previous_tria->prepare_coarsening_and_refinement ();
+
+      // adapt present and previous grids
+      // to each other: flag additional
+      // cells to avoid the previous grid
+      // to have cells refined twice more
+      // than the present one and vica versa.
+      dealii::adapt_grids<dim> (*previous_tria, *tria);
+    };
+}
+
+
+
+template <int dim>
+void TimeStepBase_Tria<dim>::init_for_refinement ()
+{
+  next_action = grid_refinement;
+}
+
+
+
+template <int dim>
+std::size_t
+TimeStepBase_Tria<dim>::memory_consumption () const
+{
+  return (TimeStepBase::memory_consumption () +
+          sizeof(tria) +
+          MemoryConsumption::memory_consumption (coarse_grid) +
+          sizeof(flags) + sizeof(refinement_flags) +
+          MemoryConsumption::memory_consumption (refine_flags) +
+          MemoryConsumption::memory_consumption (coarsen_flags));
+}
+
+
+
+template <int dim>
+TimeStepBase_Tria_Flags::Flags<dim>::Flags ()
+  :
+  delete_and_rebuild_tria (false),
+  wakeup_level_to_build_grid (0),
+  sleep_level_to_delete_grid (0)
+{
+  Assert (false, ExcInternalError());
+}
+
+
+
+template <int dim>
+TimeStepBase_Tria_Flags::Flags<dim>::Flags (const bool delete_and_rebuild_tria,
+                                            const unsigned int wakeup_level_to_build_grid,
+                                            const unsigned int sleep_level_to_delete_grid):
+  delete_and_rebuild_tria (delete_and_rebuild_tria),
+  wakeup_level_to_build_grid (wakeup_level_to_build_grid),
+  sleep_level_to_delete_grid (sleep_level_to_delete_grid)
+{
+//   Assert (!delete_and_rebuild_tria || (wakeup_level_to_build_grid>=1),
+//        ExcInvalidParameter(wakeup_level_to_build_grid));
+//   Assert (!delete_and_rebuild_tria || (sleep_level_to_delete_grid>=1),
+//        ExcInvalidParameter(sleep_level_to_delete_grid));
+}
+
+
+template <int dim>
+typename TimeStepBase_Tria_Flags::RefinementFlags<dim>::CorrectionRelaxations
+TimeStepBase_Tria_Flags::RefinementFlags<dim>::default_correction_relaxations
+(1,    // one element, denoting the first and all subsequent sweeps
+ std::vector<std::pair<unsigned int,double> >(1,    // one element, denoting the upper bound
+                                              // for the following
+                                              // relaxation
+                                              std::make_pair (0U, 0.)));
+
+
+template <int dim>
+TimeStepBase_Tria_Flags::RefinementFlags<dim>::
+RefinementFlags (const unsigned int max_refinement_level,
+                 const unsigned int first_sweep_with_correction,
+                 const unsigned int min_cells_for_correction,
+                 const double       cell_number_corridor_top,
+                 const double       cell_number_corridor_bottom,
+                 const CorrectionRelaxations &correction_relaxations,
+                 const unsigned int cell_number_correction_steps,
+                 const bool         mirror_flags_to_previous_grid,
+                 const bool         adapt_grids) :
+  max_refinement_level(max_refinement_level),
+  first_sweep_with_correction (first_sweep_with_correction),
+  min_cells_for_correction(min_cells_for_correction),
+  cell_number_corridor_top(cell_number_corridor_top),
+  cell_number_corridor_bottom(cell_number_corridor_bottom),
+  correction_relaxations (correction_relaxations.size() != 0 ?
+                          correction_relaxations :
+                          default_correction_relaxations),
+  cell_number_correction_steps(cell_number_correction_steps),
+  mirror_flags_to_previous_grid(mirror_flags_to_previous_grid),
+  adapt_grids(adapt_grids)
+{
+  Assert (cell_number_corridor_top>=0, ExcInvalidValue (cell_number_corridor_top));
+  Assert (cell_number_corridor_bottom>=0, ExcInvalidValue (cell_number_corridor_bottom));
+  Assert (cell_number_corridor_bottom<=1, ExcInvalidValue (cell_number_corridor_bottom));
+}
+
+
+template <int dim>
+TimeStepBase_Tria_Flags::RefinementData<dim>::
+RefinementData (const double         _refinement_threshold,
+                const double         _coarsening_threshold) :
+  refinement_threshold(_refinement_threshold),
+  // in some rare cases it may happen that
+  // both thresholds are the same (e.g. if
+  // there are many cells with the same
+  // error indicator). That would mean that
+  // all cells will be flagged for
+  // refinement or coarsening, but some will
+  // be flagged for both, namely those for
+  // which the indicator equals the
+  // thresholds. This is forbidden, however.
+  //
+  // In some rare cases with very few cells
+  // we also could get integer round off
+  // errors and get problems with
+  // the top and bottom fractions.
+  //
+  // In these case we arbitrarily reduce the
+  // bottom threshold by one permille below
+  // the top threshold
+  coarsening_threshold((_coarsening_threshold == _refinement_threshold ?
+                        _coarsening_threshold :
+                        0.999*_coarsening_threshold))
+{
+  Assert (refinement_threshold >= 0, ExcInvalidValue(refinement_threshold));
+  Assert (coarsening_threshold >= 0, ExcInvalidValue(coarsening_threshold));
+  // allow both thresholds to be zero,
+  // since this is needed in case all indicators
+  // are zero
+  Assert ((coarsening_threshold < refinement_threshold) ||
+          ((coarsening_threshold == 0) && (refinement_threshold == 0)),
+          ExcInvalidValue (coarsening_threshold));
+}
+
+
+
+/*-------------- Explicit Instantiations -------------------------------*/
+#include "time_dependent.inst"
+
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/time_dependent.inst.in b/source/numerics/time_dependent.inst.in
new file mode 100644
index 0000000..fd494b2
--- /dev/null
+++ b/source/numerics/time_dependent.inst.in
@@ -0,0 +1,26 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS)
+{
+   template class TimeStepBase_Tria<deal_II_dimension>;
+   namespace TimeStepBase_Tria_Flags
+   \{
+     template struct Flags<deal_II_dimension>;
+     template struct RefinementFlags<deal_II_dimension>;
+     template struct RefinementData<deal_II_dimension>;
+   \}
+}
diff --git a/source/numerics/vector_tools_boundary.cc b/source/numerics/vector_tools_boundary.cc
new file mode 100644
index 0000000..51c7824
--- /dev/null
+++ b/source/numerics/vector_tools_boundary.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_boundary.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_boundary.inst.in b/source/numerics/vector_tools_boundary.inst.in
new file mode 100644
index 0000000..c23601a
--- /dev/null
+++ b/source/numerics/vector_tools_boundary.inst.in
@@ -0,0 +1,187 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension : SPACE_DIMENSIONS; DH : DOFHANDLER_TEMPLATES)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace VectorTools \{
+      template
+        void interpolate_boundary_values
+        (const Mapping<deal_II_dimension,deal_II_space_dimension>    &,
+         const DH<deal_II_dimension,deal_II_space_dimension> &,
+         const FunctionMap<deal_II_space_dimension>::type &,
+         std::map<types::global_dof_index,double>       &,
+         const ComponentMask    &);
+
+      template
+        void interpolate_boundary_values
+        (const Mapping<deal_II_dimension,deal_II_space_dimension>    &,
+         const DH<deal_II_dimension,deal_II_space_dimension> &,
+         const types::boundary_id,
+         const Function<deal_II_space_dimension>   &,
+         std::map<types::global_dof_index,double>       &,
+         const ComponentMask    &);
+
+      template
+        void interpolate_boundary_values (
+          const DH<deal_II_dimension,deal_II_space_dimension> &,
+          const types::boundary_id,
+          const Function<deal_II_space_dimension>   &,
+          std::map<types::global_dof_index,double>       &,
+          const ComponentMask    &);
+
+      template
+        void interpolate_boundary_values
+        (const DH<deal_II_dimension,deal_II_space_dimension> &,
+         const FunctionMap<deal_II_space_dimension>::type &,
+         std::map<types::global_dof_index,double>       &,
+         const ComponentMask    &);
+
+      template
+        void interpolate_boundary_values (
+          const Mapping<deal_II_dimension,deal_II_space_dimension>    &,
+          const DH<deal_II_dimension,deal_II_space_dimension> &,
+          const FunctionMap<deal_II_space_dimension>::type   &,
+          ConstraintMatrix                    &,
+          const ComponentMask    &);
+
+      template
+        void interpolate_boundary_values
+        (const Mapping<deal_II_dimension,deal_II_space_dimension>    &,
+         const DH<deal_II_dimension,deal_II_space_dimension> &,
+         const types::boundary_id,
+         const Function<deal_II_space_dimension>   &,
+         ConstraintMatrix                    &,
+         const ComponentMask             &);
+
+      template
+        void interpolate_boundary_values (
+          const DH<deal_II_dimension,deal_II_space_dimension> &,
+          const types::boundary_id,
+          const Function<deal_II_space_dimension>   &,
+          ConstraintMatrix                    &,
+          const ComponentMask    &);
+
+      template
+        void interpolate_boundary_values (
+          const DH<deal_II_dimension,deal_II_space_dimension> &,
+          const FunctionMap<deal_II_space_dimension>::type   &,
+          ConstraintMatrix                    &,
+          const ComponentMask    &);
+        \}
+#endif
+}
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension : SPACE_DIMENSIONS)
+  {
+    namespace VectorTools \{
+#if deal_II_dimension == deal_II_space_dimension
+
+      template
+        void project_boundary_values<deal_II_dimension>
+        (const Mapping<deal_II_dimension>     &,
+         const DoFHandler<deal_II_dimension>  &,
+         const FunctionMap<deal_II_dimension>::type &,
+         const Quadrature<deal_II_dimension-1>&,
+         std::map<types::global_dof_index,double>&, std::vector<unsigned int>);
+
+      template
+        void project_boundary_values<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension>  &,
+         const FunctionMap<deal_II_dimension>::type &,
+         const Quadrature<deal_II_dimension-1>&,
+         std::map<types::global_dof_index,double>&, 
+         std::vector<unsigned int>);
+
+      template
+        void project_boundary_values<deal_II_dimension>
+        (const Mapping<deal_II_dimension>     &,
+         const DoFHandler<deal_II_dimension>  &,
+         const FunctionMap<deal_II_dimension>::type &,
+         const Quadrature<deal_II_dimension-1>&,
+         ConstraintMatrix&, std::vector<unsigned int>);
+
+      template
+        void project_boundary_values<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension>  &,
+         const FunctionMap<deal_II_dimension>::type &,
+         const Quadrature<deal_II_dimension-1>&,
+         ConstraintMatrix&, 
+         std::vector<unsigned int>);
+
+      template
+        void project_boundary_values<deal_II_dimension,deal_II_space_dimension>
+        (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension> &,
+         const FunctionMap<deal_II_space_dimension>::type &,
+         const hp::QCollection<deal_II_dimension-1> &,
+         std::map<types::global_dof_index,double> &,
+         std::vector<unsigned int>);
+
+
+
+#if deal_II_dimension != 1
+      template
+        void project_boundary_values_curl_conforming<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension>&,
+         const unsigned int,
+         const Function<deal_II_dimension>&,
+         const types::boundary_id,
+         ConstraintMatrix&,
+         const Mapping<deal_II_dimension>&);
+      template
+        void project_boundary_values_curl_conforming<deal_II_dimension>
+        (const hp::DoFHandler<deal_II_dimension>&,
+         const unsigned int,
+         const Function<deal_II_dimension>&,
+         const types::boundary_id,
+         ConstraintMatrix&,
+         const hp::MappingCollection<deal_II_dimension>&);
+      template
+        void project_boundary_values_curl_conforming_l2<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension>&,
+         const unsigned int,
+         const Function<deal_II_dimension>&,
+         const types::boundary_id,
+         ConstraintMatrix&,
+         const Mapping<deal_II_dimension>&);
+      template
+        void project_boundary_values_curl_conforming_l2<deal_II_dimension>
+        (const hp::DoFHandler<deal_II_dimension>&,
+         const unsigned int,
+         const Function<deal_II_dimension>&,
+         const types::boundary_id,
+         ConstraintMatrix&,
+         const hp::MappingCollection<deal_II_dimension>&);
+      template
+        void project_boundary_values_div_conforming<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension>&,
+         const unsigned int,
+         const Function<deal_II_dimension>&,
+         const types::boundary_id,
+         ConstraintMatrix&,
+         const Mapping<deal_II_dimension>&);
+      template
+        void project_boundary_values_div_conforming<deal_II_dimension>
+        (const hp::DoFHandler<deal_II_dimension>&,
+         const unsigned int,
+         const Function<deal_II_dimension>&,
+         const types::boundary_id,
+         ConstraintMatrix&,
+         const hp::MappingCollection<deal_II_dimension>&);
+#endif
+#endif
+      \}
+  }
diff --git a/source/numerics/vector_tools_constraints.cc b/source/numerics/vector_tools_constraints.cc
new file mode 100644
index 0000000..d215e88
--- /dev/null
+++ b/source/numerics/vector_tools_constraints.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_constraints.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_constraints.inst.in b/source/numerics/vector_tools_constraints.inst.in
new file mode 100644
index 0000000..dfd4d27
--- /dev/null
+++ b/source/numerics/vector_tools_constraints.inst.in
@@ -0,0 +1,94 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+{
+  namespace VectorTools \{
+
+#if deal_II_dimension == deal_II_space_dimension
+#if deal_II_dimension != 1
+  template
+  void
+  compute_nonzero_normal_flux_constraints (const DoFHandler<deal_II_dimension> &dof_handler,
+                                           const unsigned int     first_vector_component,
+                                           const std::set<types::boundary_id> &boundary_ids,
+                                           FunctionMap<deal_II_dimension>::type &function_map,
+                                           ConstraintMatrix      &constraints,
+                                           const Mapping<deal_II_dimension>    &mapping);
+
+  template
+  void
+  compute_nonzero_normal_flux_constraints (const hp::DoFHandler<deal_II_dimension> &dof_handler,
+                                           const unsigned int     first_vector_component,
+                                           const std::set<types::boundary_id> &boundary_ids,
+                                           FunctionMap<deal_II_dimension>::type &function_map,
+                                           ConstraintMatrix      &constraints,
+                                           const Mapping<deal_II_dimension>    &mapping);
+
+  template
+  void
+  compute_nonzero_tangential_flux_constraints (const DoFHandler<deal_II_dimension> &dof_handler,
+                                               const unsigned int     first_vector_component,
+                                               const std::set<types::boundary_id> &boundary_ids,
+                                               FunctionMap<deal_II_dimension>::type &function_map,
+                                               ConstraintMatrix      &constraints,
+                                               const Mapping<deal_II_dimension>    &mapping);
+  template
+  void
+  compute_nonzero_tangential_flux_constraints (const hp::DoFHandler<deal_II_dimension> &dof_handler,
+                                               const unsigned int     first_vector_component,
+                                               const std::set<types::boundary_id> &boundary_ids,
+                                               FunctionMap<deal_II_dimension>::type &function_map,
+                                               ConstraintMatrix      &constraints,
+                                               const Mapping<deal_II_dimension>    &mapping);
+    
+  template
+  void
+  compute_no_normal_flux_constraints (const DoFHandler<deal_II_dimension> &dof_handler,
+                                      const unsigned int     first_vector_component,
+                                      const std::set<types::boundary_id> &boundary_ids,
+                                      ConstraintMatrix      &constraints,
+                                      const Mapping<deal_II_dimension>    &mapping);
+
+  template
+  void
+  compute_no_normal_flux_constraints (const hp::DoFHandler<deal_II_dimension> &dof_handler,
+                                      const unsigned int     first_vector_component,
+                                      const std::set<types::boundary_id> &boundary_ids,
+                                      ConstraintMatrix      &constraints,
+                                      const Mapping<deal_II_dimension>    &mapping);
+    
+  template
+  void
+  compute_normal_flux_constraints (const DoFHandler<deal_II_dimension> &dof_handler,
+                                   const unsigned int     first_vector_component,
+                                   const std::set<types::boundary_id> &boundary_ids,
+                                   ConstraintMatrix      &constraints,
+                                   const Mapping<deal_II_dimension>    &mapping);
+
+  template
+  void
+  compute_normal_flux_constraints (const hp::DoFHandler<deal_II_dimension> &dof_handler,
+                                   const unsigned int     first_vector_component,
+                                   const std::set<types::boundary_id> &boundary_ids,
+                                   ConstraintMatrix      &constraints,
+                                   const Mapping<deal_II_dimension>    &mapping);
+#endif
+#endif
+  \}
+}
\ No newline at end of file
diff --git a/source/numerics/vector_tools_integrate_difference.cc b/source/numerics/vector_tools_integrate_difference.cc
new file mode 100644
index 0000000..dde35d8
--- /dev/null
+++ b/source/numerics/vector_tools_integrate_difference.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_integrate_difference.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_integrate_difference.inst.in b/source/numerics/vector_tools_integrate_difference.inst.in
new file mode 100644
index 0000000..690974f
--- /dev/null
+++ b/source/numerics/vector_tools_integrate_difference.inst.in
@@ -0,0 +1,116 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace VectorTools \{
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<float>, deal_II_space_dimension>
+        (const Mapping<deal_II_dimension, deal_II_space_dimension>&,
+         const DoFHandler<deal_II_dimension, deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<float>&,
+         const Quadrature<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<float>, deal_II_space_dimension>
+        (const DoFHandler<deal_II_dimension, deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<float>&,
+         const Quadrature<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<double>, deal_II_space_dimension >
+        (const Mapping<deal_II_dimension, deal_II_space_dimension>&,
+         const DoFHandler<deal_II_dimension, deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<double>&,
+         const Quadrature<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<double>, deal_II_space_dimension >
+        (const DoFHandler<deal_II_dimension, deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<double>&,
+         const Quadrature<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<double>, deal_II_space_dimension>
+        (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>&,
+         const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<double>&,
+         const hp::QCollection<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<double>, deal_II_space_dimension>
+        (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<double>&,
+         const hp::QCollection<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<float>, deal_II_space_dimension>
+        (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>&,
+         const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<float>&,
+         const hp::QCollection<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      template
+        void integrate_difference<deal_II_dimension, VEC, Vector<float>, deal_II_space_dimension>
+        (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const VEC&,
+         const Function<deal_II_space_dimension>&,
+         Vector<float>&,
+         const hp::QCollection<deal_II_dimension>&,
+         const NormType&,
+         const Function<deal_II_space_dimension>*,
+         const double);
+
+      \}
+#endif
+  }
diff --git a/source/numerics/vector_tools_interpolate.cc b/source/numerics/vector_tools_interpolate.cc
new file mode 100644
index 0000000..9b75253
--- /dev/null
+++ b/source/numerics/vector_tools_interpolate.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_interpolate.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_interpolate.inst.in b/source/numerics/vector_tools_interpolate.inst.in
new file mode 100644
index 0000000..bd96acf
--- /dev/null
+++ b/source/numerics/vector_tools_interpolate.inst.in
@@ -0,0 +1,120 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace VectorTools \{
+
+      template
+        void interpolate
+        (const Mapping<deal_II_dimension,deal_II_space_dimension> &,
+         const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const Function<deal_II_space_dimension>&,
+         VEC&);
+
+      template
+        void interpolate
+        (const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const Function<deal_II_space_dimension>&,
+         VEC&);
+
+      template
+        void interpolate
+        (const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const FullMatrix<double>&,
+         const VEC&,
+         VEC&);
+
+      template
+         void get_position_vector
+	 (const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+	  VEC&,
+	  const ComponentMask&);
+
+      \}
+#endif
+  }
+
+
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension == deal_II_space_dimension
+
+    namespace VectorTools \{
+
+      template
+        void interpolate
+        (const Mapping<deal_II_dimension>&,
+         const hp::DoFHandler<deal_II_dimension>&,
+         const Function<deal_II_dimension>&,
+         VEC&);
+      template
+        void interpolate
+        (const hp::DoFHandler<deal_II_dimension>&,
+         const Function<deal_II_dimension>&,
+         VEC&);
+         
+     template
+      void interpolate_based_on_material_id(const Mapping<deal_II_dimension, deal_II_space_dimension>&,
+                                            const DoFHandler<deal_II_dimension, deal_II_space_dimension>&,
+                                            const std::map< types::material_id, const Function<deal_II_space_dimension>* >&,
+                                            VEC&,
+                                            const ComponentMask&);
+         
+     template
+      void interpolate_based_on_material_id(const Mapping<deal_II_dimension>&,
+                                            const hp::DoFHandler<deal_II_dimension>&,
+                                            const std::map< types::material_id, const Function<deal_II_dimension>* >&,
+                                            VEC&,
+                                            const ComponentMask&);
+         
+      \}
+#endif
+  }
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS)
+{
+  namespace VectorTools \{
+
+  template
+    void interpolate_to_different_mesh
+    (const DoFHandler<deal_II_dimension> &,
+     const VEC                           &,
+     const DoFHandler<deal_II_dimension> &,
+     VEC                                 &);
+
+  template
+    void interpolate_to_different_mesh
+    (const DoFHandler<deal_II_dimension> &,
+     const VEC                           &,
+     const DoFHandler<deal_II_dimension> &,
+     const ConstraintMatrix              &,
+     VEC                                 &);
+
+  template
+    void interpolate_to_different_mesh
+    (const InterGridMap<DoFHandler<deal_II_dimension> > &,
+     const VEC                                          &,
+     const ConstraintMatrix                             &,
+     VEC                                                &);
+  \}
+}
diff --git a/source/numerics/vector_tools_mean_value.cc b/source/numerics/vector_tools_mean_value.cc
new file mode 100644
index 0000000..43b8e51
--- /dev/null
+++ b/source/numerics/vector_tools_mean_value.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_mean_value.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_mean_value.inst.in b/source/numerics/vector_tools_mean_value.inst.in
new file mode 100644
index 0000000..6892ce0
--- /dev/null
+++ b/source/numerics/vector_tools_mean_value.inst.in
@@ -0,0 +1,49 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace VectorTools \{
+
+      template
+        double compute_mean_value<deal_II_dimension>
+        (const Mapping<deal_II_dimension,deal_II_space_dimension>&,
+         const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const Quadrature<deal_II_dimension>&,
+         const VEC&,
+         const unsigned int);
+
+      template
+        double compute_mean_value<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension,deal_II_space_dimension>&,
+         const Quadrature<deal_II_dimension>&,
+         const VEC&,
+         const unsigned int);
+
+      \}
+#endif
+  }
+
+
+
+for (VEC : SERIAL_VECTORS)
+{
+    namespace VectorTools \{
+      template
+        void subtract_mean_value(VEC &, const std::vector<bool> &);
+    \}
+}
diff --git a/source/numerics/vector_tools_point_gradient.cc b/source/numerics/vector_tools_point_gradient.cc
new file mode 100644
index 0000000..7cc9ed6
--- /dev/null
+++ b/source/numerics/vector_tools_point_gradient.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_point_gradient.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_point_gradient.inst.in b/source/numerics/vector_tools_point_gradient.inst.in
new file mode 100644
index 0000000..ad02adc
--- /dev/null
+++ b/source/numerics/vector_tools_point_gradient.inst.in
@@ -0,0 +1,80 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension == deal_II_space_dimension
+
+    namespace VectorTools \{
+
+  template
+    void point_gradient (
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&,
+      std::vector<Tensor<1,deal_II_space_dimension,VEC::value_type> >&);
+
+  template
+    Tensor<1,deal_II_space_dimension,VEC::value_type> point_gradient (
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&);
+
+  template
+     void point_gradient (
+      const hp::MappingCollection<deal_II_dimension>&,
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&,
+      std::vector<Tensor<1,deal_II_space_dimension,VEC::value_type> >&);
+
+  template
+    Tensor<1,deal_II_space_dimension,VEC::value_type> point_gradient (
+     const hp::MappingCollection<deal_II_dimension>&,
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&);
+
+  template
+    void point_gradient (
+      const DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&,
+      std::vector<Tensor<1,deal_II_space_dimension,VEC::value_type> >&);
+
+  template
+    Tensor<1,deal_II_space_dimension,VEC::value_type> point_gradient (
+      const DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&);
+
+  template
+    void point_gradient (
+      const Mapping<deal_II_dimension>&,
+      const DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&,
+      std::vector<Tensor<1,deal_II_space_dimension,VEC::value_type> >&);
+
+  template
+    Tensor<1,deal_II_space_dimension,VEC::value_type> point_gradient (
+      const Mapping<deal_II_dimension>&,
+      const DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&);
+
+      \}
+#endif
+  }
diff --git a/source/numerics/vector_tools_point_value.cc b/source/numerics/vector_tools_point_value.cc
new file mode 100644
index 0000000..b4f3eb2
--- /dev/null
+++ b/source/numerics/vector_tools_point_value.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_point_value.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_point_value.inst.in b/source/numerics/vector_tools_point_value.inst.in
new file mode 100644
index 0000000..c5ee68d
--- /dev/null
+++ b/source/numerics/vector_tools_point_value.inst.in
@@ -0,0 +1,164 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension == deal_II_space_dimension
+
+    namespace VectorTools \{
+
+  template
+    void point_value<deal_II_dimension> (
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&,
+      Vector<double>&);
+
+  template
+    double point_value<deal_II_dimension> (
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&);
+
+  template
+    void point_value<deal_II_dimension> (
+      const hp::MappingCollection<deal_II_dimension>&,
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&,
+      Vector<double>&);
+
+  template
+    double point_value<deal_II_dimension> (
+      const hp::MappingCollection<deal_II_dimension>&,
+      const hp::DoFHandler<deal_II_dimension>&,
+      const VEC&,
+      const Point<deal_II_dimension>&);
+
+      template
+        void point_difference<deal_II_dimension> (
+          const DoFHandler<deal_II_dimension>&,
+          const VEC&,
+          const Function<deal_II_dimension>&,
+          Vector<double>&,
+          const Point<deal_II_dimension>&);
+
+      template
+        void point_difference<deal_II_dimension> (
+          const Mapping<deal_II_dimension>&,
+          const DoFHandler<deal_II_dimension>&,
+          const VEC&,
+          const Function<deal_II_dimension>&,
+          Vector<double>&,
+          const Point<deal_II_dimension>&);
+
+      template
+        void point_value<deal_II_dimension> (
+          const DoFHandler<deal_II_dimension>&,
+          const VEC&,
+          const Point<deal_II_dimension>&,
+          Vector<double>&);
+
+      template
+        double point_value<deal_II_dimension> (
+          const DoFHandler<deal_II_dimension>&,
+          const VEC&,
+          const Point<deal_II_dimension>&);
+
+      template
+        void point_value<deal_II_dimension> (
+          const Mapping<deal_II_dimension>&,
+          const DoFHandler<deal_II_dimension>&,
+          const VEC&,
+          const Point<deal_II_dimension>&,
+          Vector<double>&);
+
+      template
+        double point_value<deal_II_dimension> (
+          const Mapping<deal_II_dimension>&,
+          const DoFHandler<deal_II_dimension>&,
+          const VEC&,
+          const Point<deal_II_dimension>&);
+
+      \}
+#endif
+  }
+
+
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+    namespace VectorTools \{
+
+#if deal_II_dimension == deal_II_space_dimension
+
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const Mapping<deal_II_dimension>    &,
+         const DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const hp::MappingCollection<deal_II_dimension>    &,
+         const hp::DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const hp::DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const Mapping<deal_II_dimension>    &,
+         const DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const hp::MappingCollection<deal_II_dimension>    &,
+         const hp::DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+      template
+        void create_point_source_vector<deal_II_dimension>
+        (const hp::DoFHandler<deal_II_dimension> &,
+         const Point<deal_II_dimension>      &,
+         const Point<deal_II_dimension>      &,
+         Vector<double>                      &);
+#endif
+      \}
+  }
diff --git a/source/numerics/vector_tools_project.cc b/source/numerics/vector_tools_project.cc
new file mode 100644
index 0000000..3fd0d4b
--- /dev/null
+++ b/source/numerics/vector_tools_project.cc
@@ -0,0 +1,24 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_project.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_project.inst.in b/source/numerics/vector_tools_project.inst.in
new file mode 100644
index 0000000..2afeedb
--- /dev/null
+++ b/source/numerics/vector_tools_project.inst.in
@@ -0,0 +1,69 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (VEC : SERIAL_VECTORS ; deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace VectorTools \{
+
+      template
+        void project
+        (const Mapping<deal_II_dimension,deal_II_space_dimension>      &,
+         const DoFHandler<deal_II_dimension,deal_II_space_dimension>   &,
+         const ConstraintMatrix                &,
+         const Quadrature<deal_II_dimension>   &,
+         const Function<deal_II_space_dimension>     &,
+         VEC                                   &,
+         const bool,
+         const Quadrature<deal_II_dimension-1> &,
+         const bool);
+
+      template
+        void project
+        (const DoFHandler<deal_II_dimension,deal_II_space_dimension>   &,
+         const ConstraintMatrix                &,
+         const Quadrature<deal_II_dimension>   &,
+         const Function<deal_II_space_dimension>     &,
+         VEC                                   &,
+         const bool,
+         const Quadrature<deal_II_dimension-1> &,
+         const bool);
+
+      template
+        void project
+        (const hp::MappingCollection<deal_II_dimension,deal_II_space_dimension>      &,
+         const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>   &,
+         const ConstraintMatrix                &,
+         const hp::QCollection<deal_II_dimension>   &,
+         const Function<deal_II_space_dimension>     &,
+         VEC                                   &,
+         const bool,
+         const hp::QCollection<deal_II_dimension-1> &,
+         const bool);
+
+      template
+        void project
+        (const hp::DoFHandler<deal_II_dimension,deal_II_space_dimension>   &,
+         const ConstraintMatrix                &,
+         const hp::QCollection<deal_II_dimension>   &,
+         const Function<deal_II_space_dimension>     &,
+         VEC                                   &,
+         const bool,
+         const hp::QCollection<deal_II_dimension-1> &,
+         const bool);
+      \}
+#endif
+  }
diff --git a/source/numerics/vector_tools_rhs.cc b/source/numerics/vector_tools_rhs.cc
new file mode 100644
index 0000000..5355480
--- /dev/null
+++ b/source/numerics/vector_tools_rhs.cc
@@ -0,0 +1,85 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#include <deal.II/numerics/vector_tools.templates.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace VectorTools
+{
+
+// separate implementation for 1D because otherwise we get linker errors since
+// (hp::)FEFaceValues<1> is not compiled
+  template <>
+  void
+  create_boundary_right_hand_side (const Mapping<1,1> &,
+                                   const DoFHandler<1,1> &,
+                                   const Quadrature<0> &,
+                                   const Function<1> &,
+                                   Vector<double> &,
+                                   const std::set<types::boundary_id> &)
+  {
+    Assert (false, ExcImpossibleInDim(1));
+  }
+
+
+
+  template <>
+  void
+  create_boundary_right_hand_side (const Mapping<1,2> &,
+                                   const DoFHandler<1,2> &,
+                                   const Quadrature<0> &,
+                                   const Function<2> &,
+                                   Vector<double> &,
+                                   const std::set<types::boundary_id> &)
+  {
+    Assert (false, ExcImpossibleInDim(1));
+  }
+
+
+
+  template <>
+  void
+  create_boundary_right_hand_side (const hp::MappingCollection<1,1> &,
+                                   const hp::DoFHandler<1,1> &,
+                                   const hp::QCollection<0> &,
+                                   const Function<1> &,
+                                   Vector<double> &,
+                                   const std::set<types::boundary_id> &)
+  {
+    Assert (false, ExcImpossibleInDim(1));
+  }
+
+
+
+  template <>
+  void
+  create_boundary_right_hand_side (const hp::MappingCollection<1,2> &,
+                                   const hp::DoFHandler<1,2> &,
+                                   const hp::QCollection<0> &,
+                                   const Function<2> &,
+                                   Vector<double> &,
+                                   const std::set<types::boundary_id> &)
+  {
+    Assert (false, ExcImpossibleInDim(1));
+  }
+}
+
+// ---------------------------- explicit instantiations --------------------
+#include "vector_tools_rhs.inst"
+
+DEAL_II_NAMESPACE_CLOSE
diff --git a/source/numerics/vector_tools_rhs.inst.in b/source/numerics/vector_tools_rhs.inst.in
new file mode 100644
index 0000000..b6fd287
--- /dev/null
+++ b/source/numerics/vector_tools_rhs.inst.in
@@ -0,0 +1,107 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+#if deal_II_dimension <= deal_II_space_dimension
+    namespace VectorTools \{
+      template
+        void create_right_hand_side<deal_II_dimension,deal_II_space_dimension>
+        (const Mapping<deal_II_dimension,deal_II_space_dimension>    &,
+         const DoFHandler<deal_II_dimension,deal_II_space_dimension> &,
+         const Quadrature<deal_II_dimension> &,
+         const Function<deal_II_space_dimension>   &,
+         Vector<double>                      &);
+
+      template
+        void create_right_hand_side<deal_II_dimension,deal_II_space_dimension>
+        (const DoFHandler<deal_II_dimension,deal_II_space_dimension> &,
+         const Quadrature<deal_II_dimension> &,
+         const Function<deal_II_space_dimension>   &,
+         Vector<double>  &);
+
+      \}
+#endif
+  }
+
+//TODO[SP]: replace <deal_II_dimension> by <deal_II_dimension, deal_II_space_dimension>
+// where applicable and move to codimension cases above also when applicable
+for (deal_II_dimension : DIMENSIONS; deal_II_space_dimension :  SPACE_DIMENSIONS)
+  {
+    namespace VectorTools \{
+
+#if deal_II_dimension == deal_II_space_dimension
+      template
+        void create_right_hand_side<deal_II_dimension>
+        (const hp::MappingCollection<deal_II_dimension>    &,
+         const hp::DoFHandler<deal_II_dimension> &,
+         const hp::QCollection<deal_II_dimension> &,
+         const Function<deal_II_dimension>   &,
+         Vector<double>                      &);
+
+      template
+        void create_right_hand_side<deal_II_dimension>
+        (const hp::DoFHandler<deal_II_dimension> &,
+         const hp::QCollection<deal_II_dimension> &,
+         const Function<deal_II_dimension>   &,
+         Vector<double>                      &);
+
+#if deal_II_dimension > 1
+      template
+        void
+        create_boundary_right_hand_side<deal_II_dimension>
+        (const Mapping<deal_II_dimension>    &,
+         const DoFHandler<deal_II_dimension> &,
+         const Quadrature<deal_II_dimension-1> &,
+         const Function<deal_II_dimension>   &,
+         Vector<double>                      &,
+         const std::set<types::boundary_id> &);
+#endif
+
+      template
+        void
+        create_boundary_right_hand_side<deal_II_dimension>
+        (const DoFHandler<deal_II_dimension> &,
+         const Quadrature<deal_II_dimension-1> &,
+         const Function<deal_II_dimension>   &,
+         Vector<double>                      &,
+         const std::set<types::boundary_id> &);
+
+#if deal_II_dimension > 1
+      template
+        void
+        create_boundary_right_hand_side<deal_II_dimension>
+        (const hp::MappingCollection<deal_II_dimension>    &,
+         const hp::DoFHandler<deal_II_dimension> &,
+         const hp::QCollection<deal_II_dimension-1> &,
+         const Function<deal_II_dimension>   &,
+         Vector<double>                      &,
+         const std::set<types::boundary_id> &);
+#endif
+
+      template
+        void
+        create_boundary_right_hand_side<deal_II_dimension>
+        (const hp::DoFHandler<deal_II_dimension> &,
+         const hp::QCollection<deal_II_dimension-1> &,
+         const Function<deal_II_dimension>   &,
+         Vector<double>                      &,
+         const std::set<types::boundary_id> &);
+
+
+#endif
+      \}
+  }
diff --git a/source/opencascade/CMakeLists.txt b/source/opencascade/CMakeLists.txt
new file mode 100644
index 0000000..4c26a5d
--- /dev/null
+++ b/source/opencascade/CMakeLists.txt
@@ -0,0 +1,33 @@
+## ---------------------------------------------------------------------
+## $Id$
+##
+## Copyright (C) 2012 - 2014 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+
+SET(_src
+  utilities.cc
+  boundary_lib.cc
+  )
+
+SET(_inst
+  boundary_lib.inst.in
+  )
+
+FILE(GLOB _header
+  ${CMAKE_SOURCE_DIR}/include/deal.II/opencascade/*.h
+  )
+
+DEAL_II_ADD_LIBRARY(obj_opencascade OBJECT ${_src} ${_header} ${_inst})
+EXPAND_INSTANTIATIONS(obj_opencascade "${_inst}")
diff --git a/source/opencascade/boundary_lib.cc b/source/opencascade/boundary_lib.cc
new file mode 100644
index 0000000..cb077f1
--- /dev/null
+++ b/source/opencascade/boundary_lib.cc
@@ -0,0 +1,263 @@
+#include <deal.II/opencascade/boundary_lib.h>
+
+#ifdef DEAL_II_WITH_OPENCASCADE
+
+#include <GCPnts_AbscissaPoint.hxx>
+#include <BRepAdaptor_Curve.hxx>
+#include <BRepAdaptor_CompCurve.hxx>
+#include <BRepAdaptor_HCurve.hxx>
+#include <BRepAdaptor_HCompCurve.hxx>
+#include <GCPnts_AbscissaPoint.hxx>
+#include <ShapeAnalysis_Curve.hxx>
+#include <BRep_Tool.hxx>
+#include <TopoDS.hxx>
+#include <Adaptor3d_HCurve.hxx>
+#include <Handle_Adaptor3d_HCurve.hxx>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace OpenCASCADE
+{
+
+
+  namespace
+  {
+    /**
+     * Return a Geometrical curve representation for the given
+     * TopoDS_Shape. This function will fail when the given shape is
+     * not of topological dimension one.
+     */
+    Handle_Adaptor3d_HCurve curve_adaptor(const TopoDS_Shape &shape)
+    {
+      Assert( (shape.ShapeType() == TopAbs_WIRE) ||
+              (shape.ShapeType() == TopAbs_EDGE),
+              ExcUnsupportedShape());
+      if (shape.ShapeType() == TopAbs_WIRE)
+        return (Handle(BRepAdaptor_HCompCurve(new BRepAdaptor_HCompCurve(TopoDS::Wire(shape)))));
+      else if (shape.ShapeType() == TopAbs_EDGE)
+        return (Handle(BRepAdaptor_HCurve(new BRepAdaptor_HCurve(TopoDS::Edge(shape)))));
+
+      Assert(false, ExcInternalError());
+      return Handle(BRepAdaptor_HCurve(new BRepAdaptor_HCurve()));
+    }
+
+
+
+// Helper internal functions.
+    double shape_length(const TopoDS_Shape &sh)
+    {
+      Handle_Adaptor3d_HCurve adapt = curve_adaptor(sh);
+      return GCPnts_AbscissaPoint::Length(adapt->GetCurve());
+    }
+  }
+
+  /*============================== NormalProjectionBoundary ==============================*/
+  template <int dim, int spacedim>
+  NormalProjectionBoundary<dim,spacedim>::NormalProjectionBoundary(const TopoDS_Shape &sh,
+      const double tolerance) :
+    sh(sh),
+    tolerance(tolerance)
+  {
+    Assert(spacedim == 3, ExcNotImplemented());
+  }
+
+
+  template <int dim, int spacedim>
+  Point<spacedim>  NormalProjectionBoundary<dim,spacedim>::
+  project_to_manifold (const std::vector<Point<spacedim> > &surrounding_points,
+                       const Point<spacedim> &candidate) const
+  {
+    (void)surrounding_points;
+#ifdef DEBUG
+    for (unsigned int i=0; i<surrounding_points.size(); ++i)
+      Assert(closest_point(sh, surrounding_points[i], tolerance)
+             .distance(surrounding_points[i]) <
+             std::max(tolerance*surrounding_points[i].norm(), tolerance),
+             ExcPointNotOnManifold(surrounding_points[i]));
+#endif
+    return closest_point(sh, candidate,tolerance);
+  }
+
+
+  /*============================== DirectionalProjectionBoundary ==============================*/
+  template <int dim, int spacedim>
+  DirectionalProjectionBoundary<dim,spacedim>::DirectionalProjectionBoundary(const TopoDS_Shape &sh,
+      const Tensor<1,spacedim> &direction,
+      const double tolerance) :
+    sh(sh),
+    direction(direction),
+    tolerance(tolerance)
+  {
+    Assert(spacedim == 3, ExcNotImplemented());
+  }
+
+
+  template <int dim, int spacedim>
+  Point<spacedim>  DirectionalProjectionBoundary<dim,spacedim>::
+  project_to_manifold (const std::vector<Point<spacedim> > &surrounding_points,
+                       const Point<spacedim> &candidate) const
+  {
+    (void)surrounding_points;
+#ifdef DEBUG
+    for (unsigned int i=0; i<surrounding_points.size(); ++i)
+      Assert(closest_point(sh, surrounding_points[i],tolerance)
+             .distance(surrounding_points[i]) <
+             std::max(tolerance*surrounding_points[i].norm(), tolerance),
+             ExcPointNotOnManifold(surrounding_points[i]));
+#endif
+    return line_intersection(sh, candidate, direction, tolerance);
+  }
+
+
+
+  /*============================== NormalToMeshProjectionBoundary ==============================*/
+  template <int dim, int spacedim>
+  NormalToMeshProjectionBoundary<dim,spacedim>::NormalToMeshProjectionBoundary(const TopoDS_Shape &sh,
+      const double tolerance) :
+    sh(sh),
+    tolerance(tolerance)
+  {
+    Assert(spacedim == 3, ExcNotImplemented());
+    Assert(std_cxx11::get<0>(count_elements(sh)) > 0,
+           ExcMessage("NormalToMeshProjectionBoundary needs a shape containing faces to operate."));
+  }
+
+
+  template <int dim, int spacedim>
+  Point<spacedim>  NormalToMeshProjectionBoundary<dim,spacedim>::
+  project_to_manifold (const std::vector<Point<spacedim> > &surrounding_points,
+                       const Point<spacedim> &candidate) const
+  {
+    TopoDS_Shape out_shape;
+    Tensor<1,3> average_normal;
+#ifdef DEBUG
+    for (unsigned int i=0; i<surrounding_points.size(); ++i)
+      {
+        Assert(closest_point(sh, surrounding_points[i], tolerance)
+               .distance(surrounding_points[i]) <
+               std::max(tolerance*surrounding_points[i].norm(), tolerance),
+               ExcPointNotOnManifold(surrounding_points[i]));
+      }
+#endif
+
+    switch (surrounding_points.size())
+      {
+      case 2:
+      {
+        for (unsigned int i=0; i<surrounding_points.size(); ++i)
+          {
+            std_cxx11::tuple<Point<3>, Point<3>, double>
+            p_and_diff_forms =
+              closest_point_and_differential_forms(sh,
+                                                   surrounding_points[i],
+                                                   tolerance);
+            average_normal += std_cxx11::get<1>(p_and_diff_forms);
+          }
+
+        average_normal/=2.0;
+
+        Assert(average_normal.norm() > 1e-4,
+               ExcMessage("Failed to refine cell: the average of the surface normals at the surrounding edge turns out to be a null vector, making the projection direction undetermined."));
+
+        Tensor<1,3> T = surrounding_points[0]-surrounding_points[1];
+        T /= T.norm();
+        average_normal = average_normal-(average_normal*T)*T;
+        average_normal /= average_normal.norm();
+        break;
+      }
+      case 8:
+      {
+        Tensor<1,3> u = surrounding_points[1]-surrounding_points[0];
+        Tensor<1,3> v = surrounding_points[2]-surrounding_points[0];
+        const double n1_coords[3] = {u[1] *v[2]-u[2] *v[1],u[2] *v[0]-u[0] *v[2],u[0] *v[1]-u[1] *v[0]};
+        Tensor<1,3> n1(n1_coords);
+        n1 = n1/n1.norm();
+        u = surrounding_points[2]-surrounding_points[3];
+        v = surrounding_points[1]-surrounding_points[3];
+        const double n2_coords[3] = {u[1] *v[2]-u[2] *v[1],u[2] *v[0]-u[0] *v[2],u[0] *v[1]-u[1] *v[0]};
+        Tensor<1,3> n2(n2_coords);
+        n2 = n2/n2.norm();
+        u = surrounding_points[4]-surrounding_points[7];
+        v = surrounding_points[6]-surrounding_points[7];
+        const double n3_coords[3] = {u[1] *v[2]-u[2] *v[1],u[2] *v[0]-u[0] *v[2],u[0] *v[1]-u[1] *v[0]};
+        Tensor<1,3> n3(n3_coords);
+        n3 = n3/n3.norm();
+        u = surrounding_points[6]-surrounding_points[7];
+        v = surrounding_points[5]-surrounding_points[7];
+        const double n4_coords[3] = {u[1] *v[2]-u[2] *v[1],u[2] *v[0]-u[0] *v[2],u[0] *v[1]-u[1] *v[0]};
+        Tensor<1,3> n4(n4_coords);
+        n4 = n4/n4.norm();
+        //for (unsigned int i=0; i<surrounding_points.size(); ++i)
+        //    cout<<surrounding_points[i]<<endl;
+        //cout<<"-"<<endl;
+        //cout<<n1<<endl;cout<<n2<<endl;cout<<n3<<endl;cout<<n4<<endl;
+
+        average_normal = (n1+n2+n3+n4)/4.0;
+
+        Assert(average_normal.norm() > tolerance,
+               ExcMessage("Failed to refine cell: the normal estimated via the surrounding points turns out to be a null vector, making the projection direction undetermined."));
+
+        average_normal /= average_normal.norm();
+        break;
+      }
+      default:
+      {
+        AssertThrow(false, ExcNotImplemented());
+        break;
+      }
+      }
+
+    return line_intersection(sh, candidate, average_normal, tolerance);
+  }
+
+
+  /*============================== ArclengthProjectionLineManifold ==============================*/
+  template <int dim, int spacedim>
+  ArclengthProjectionLineManifold<dim,spacedim>::ArclengthProjectionLineManifold(const TopoDS_Shape &sh,
+      const double tolerance):
+
+    ChartManifold<dim,spacedim,1>(sh.Closed() ?
+                                  Point<1>(shape_length(sh)) :
+                                  Point<1>()),
+    curve(curve_adaptor(sh)),
+    tolerance(tolerance),
+    length(shape_length(sh))
+  {
+    Assert(spacedim == 3, ExcNotImplemented());
+  }
+
+
+  template <int dim, int spacedim>
+  Point<1>
+  ArclengthProjectionLineManifold<dim,spacedim>::pull_back(const Point<spacedim> &space_point) const
+  {
+    double t (0.0);
+    ShapeAnalysis_Curve curve_analysis;
+    gp_Pnt proj;
+    const double dist = curve_analysis.Project(curve->GetCurve(), point(space_point), tolerance, proj, t, true);
+    Assert(dist < tolerance*length, ExcPointNotOnManifold(space_point));
+    (void)dist; // Silence compiler warning in Release mode.
+    return Point<1>(GCPnts_AbscissaPoint::Length(curve->GetCurve(),curve->GetCurve().FirstParameter(),t));
+  }
+
+
+
+  template <int dim, int spacedim>
+  Point<spacedim>
+  ArclengthProjectionLineManifold<dim,spacedim>::push_forward(const Point<1> &chart_point) const
+  {
+    GCPnts_AbscissaPoint AP(curve->GetCurve(), chart_point[0], curve->GetCurve().FirstParameter());
+    gp_Pnt P = curve->GetCurve().Value(AP.Parameter());
+    return point(P);
+  }
+
+
+// Explicit instantiations
+#include "boundary_lib.inst"
+
+} // end namespace OpenCASCADE
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/source/opencascade/boundary_lib.inst.in b/source/opencascade/boundary_lib.inst.in
new file mode 100644
index 0000000..7d4db99
--- /dev/null
+++ b/source/opencascade/boundary_lib.inst.in
@@ -0,0 +1,28 @@
+// ---------------------------------------------------------------------
+// $Id$
+//
+// Copyright (C) 1998 - 2014 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+for (deal_II_dimension : DIMENSIONS)
+  {
+	template class NormalProjectionBoundary<deal_II_dimension, 3>;
+	template class DirectionalProjectionBoundary<deal_II_dimension, 3>;
+        template class NormalToMeshProjectionBoundary<deal_II_dimension, 3>;
+	template class ArclengthProjectionLineManifold<deal_II_dimension, 3>; 
+  }
+
+
+
diff --git a/source/opencascade/utilities.cc b/source/opencascade/utilities.cc
new file mode 100644
index 0000000..c170b97
--- /dev/null
+++ b/source/opencascade/utilities.cc
@@ -0,0 +1,638 @@
+#include <deal.II/opencascade/utilities.h>
+
+#ifdef DEAL_II_WITH_OPENCASCADE
+
+#include <deal.II/base/point.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/exceptions.h>
+
+#include <boost/bind.hpp>
+
+#include <cstdio>
+#include <iostream>
+#include <set>
+
+DEAL_II_DISABLE_EXTRA_DIAGNOSTICS
+
+#include <IGESControl_Controller.hxx>
+#include <IGESControl_Reader.hxx>
+#include <IGESControl_Writer.hxx>
+
+#include <STEPControl_Controller.hxx>
+#include <STEPControl_Reader.hxx>
+#include <STEPControl_Writer.hxx>
+
+#include <TopoDS.hxx>
+#include <TopoDS_Shape.hxx>
+#include <TopoDS_Face.hxx>
+#include <TopoDS_Edge.hxx>
+#include <TopExp_Explorer.hxx>
+
+#include <Handle_Standard_Transient.hxx>
+
+#include <TColStd_SequenceOfTransient.hxx>
+#include <TColStd_HSequenceOfTransient.hxx>
+#include <TColgp_HArray1OfPnt.hxx>
+
+#include <gp_Pnt.hxx>
+#include <gp_Lin.hxx>
+#include <gp_Vec.hxx>
+#include <GeomAPI_ProjectPointOnSurf.hxx>
+#include <GeomAPI_ProjectPointOnCurve.hxx>
+#include <IntCurvesFace_ShapeIntersector.hxx>
+
+#include <BRepTools.hxx>
+#include <BRepAdaptor_Curve.hxx>
+#include <BRepAdaptor_HCurve.hxx>
+#include <BRepAdaptor_HCompCurve.hxx>
+#include <BRepAdaptor_Surface.hxx>
+#include <BRep_Builder.hxx>
+#include <BRepBuilderAPI_Transform.hxx>
+#include <BRepBuilderAPI_MakeEdge.hxx>
+#include <BRepAlgo_Section.hxx>
+
+#include <Geom_Plane.hxx>
+#include <Geom_BoundedCurve.hxx>
+#include <GeomAPI_Interpolate.hxx>
+#include <GeomConvert_CompCurveToBSplineCurve.hxx>
+#include <GeomLProp_SLProps.hxx>
+
+#include <GCPnts_AbscissaPoint.hxx>
+#include <ShapeAnalysis_Surface.hxx>
+
+DEAL_II_ENABLE_EXTRA_DIAGNOSTICS
+
+#include <vector>
+#include <algorithm>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace OpenCASCADE
+{
+  std_cxx11::tuple<unsigned int, unsigned int, unsigned int>
+  count_elements(const TopoDS_Shape &shape)
+  {
+    TopExp_Explorer exp;
+    unsigned int n_faces=0, n_edges=0, n_vertices=0;
+    for (exp.Init(shape, TopAbs_FACE);
+         exp.More(); exp.Next(), ++n_faces)
+      {}
+    for (exp.Init(shape, TopAbs_EDGE);
+         exp.More(); exp.Next(), ++n_edges)
+      {}
+    for (exp.Init(shape, TopAbs_VERTEX);
+         exp.More(); exp.Next(), ++n_vertices)
+      {}
+    return std_cxx11::tuple<unsigned int, unsigned int, unsigned int>(n_faces, n_edges, n_vertices);
+  }
+
+  void extract_geometrical_shapes(const TopoDS_Shape &shape,
+                                  std::vector<TopoDS_Face> &faces,
+                                  std::vector<TopoDS_Edge> &edges,
+                                  std::vector<TopoDS_Vertex> &vertices)
+  {
+    faces.resize(0);
+    edges.resize(0);
+    vertices.resize(0);
+
+    TopExp_Explorer exp;
+    for (exp.Init(shape, TopAbs_FACE); exp.More(); exp.Next())
+      {
+        faces.push_back(TopoDS::Face(exp.Current()));
+      }
+    for (exp.Init(shape, TopAbs_EDGE); exp.More(); exp.Next())
+      {
+        edges.push_back(TopoDS::Edge(exp.Current()));
+      }
+    for (exp.Init(shape, TopAbs_VERTEX); exp.More(); exp.Next())
+      {
+        vertices.push_back(TopoDS::Vertex(exp.Current()));
+      }
+  }
+
+
+  void extract_compound_shapes(const TopoDS_Shape &shape,
+                               std::vector<TopoDS_Compound> &compounds,
+                               std::vector<TopoDS_CompSolid> &compsolids,
+                               std::vector<TopoDS_Solid> &solids,
+                               std::vector<TopoDS_Shell> &shells,
+                               std::vector<TopoDS_Wire> &wires)
+  {
+    compounds.resize(0);
+    compsolids.resize(0);
+    solids.resize(0);
+    shells.resize(0);
+    wires.resize(0);
+
+    TopExp_Explorer exp;
+    for (exp.Init(shape, TopAbs_COMPOUND); exp.More(); exp.Next())
+      {
+        compounds.push_back(TopoDS::Compound(exp.Current()));
+      }
+    for (exp.Init(shape, TopAbs_COMPSOLID); exp.More(); exp.Next())
+      {
+        compsolids.push_back(TopoDS::CompSolid(exp.Current()));
+      }
+    for (exp.Init(shape, TopAbs_SOLID); exp.More(); exp.Next())
+      {
+        solids.push_back(TopoDS::Solid(exp.Current()));
+      }
+    for (exp.Init(shape, TopAbs_SHELL); exp.More(); exp.Next())
+      {
+        shells.push_back(TopoDS::Shell(exp.Current()));
+      }
+    for (exp.Init(shape, TopAbs_WIRE); exp.More(); exp.Next())
+      {
+        wires.push_back(TopoDS::Wire(exp.Current()));
+      }
+  }
+
+  gp_Pnt point(const Point<3> &p)
+  {
+    return gp_Pnt(p(0), p(1), p(2));
+  }
+
+
+  Point<3> point(const gp_Pnt &p)
+  {
+    return Point<3>(p.X(), p.Y(), p.Z());
+  }
+
+  bool point_compare(const Point<3> &p1, const Point<3> &p2,
+                     const Tensor<1,3> &direction,
+                     const double tolerance)
+  {
+    const double rel_tol=std::max(tolerance, std::max(p1.norm(), p2.norm())*tolerance);
+    if (direction.norm() > 0.0)
+      return (p1*direction < p2*direction-rel_tol);
+    else
+      for (int d=2; d>=0; --d)
+        if (p1[d] < p2[d]-rel_tol)
+          return true;
+        else if (p2[d] < p1[d]-rel_tol)
+          return false;
+
+    // If we got here, for all d, none of the conditions above was
+    // satisfied. The two points are equal up to tolerance
+    return false;
+  }
+
+
+  TopoDS_Shape read_IGES(const std::string &filename,
+                         const double scale_factor)
+  {
+    IGESControl_Reader reader;
+    IFSelect_ReturnStatus stat;
+    stat = reader.ReadFile(filename.c_str());
+    AssertThrow(stat == IFSelect_RetDone,
+                ExcMessage("Error in reading file!"));
+
+    Standard_Boolean failsonly = Standard_False;
+    IFSelect_PrintCount mode = IFSelect_ItemsByEntity;
+    reader.PrintCheckLoad (failsonly, mode);
+
+    Standard_Integer nRoots = reader.TransferRoots();
+    //selects all IGES entities (including non visible ones) in the
+    //file and puts them into a list called MyList,
+
+    AssertThrow(nRoots > 0,
+                ExcMessage("Read nothing from file."));
+
+    // Handle IGES Scale here.
+    gp_Pnt Origin;
+    gp_Trsf scale;
+    scale.SetScale (Origin, scale_factor);
+
+    TopoDS_Shape sh = reader.OneShape();
+    BRepBuilderAPI_Transform trans(sh, scale);
+
+    return trans.Shape();   // this is the actual translation
+  }
+
+  void write_IGES(const TopoDS_Shape &shape,
+                  const std::string &filename)
+  {
+    IGESControl_Controller::Init();
+    IGESControl_Writer ICW ("MM", 0);
+    Standard_Boolean ok = ICW.AddShape (shape);
+    AssertThrow(ok, ExcMessage("Failed to add shape to IGES controller."));
+    ICW.ComputeModel();
+    Standard_Boolean OK = ICW.Write (filename.c_str());
+    AssertThrow(OK, ExcMessage("Failed to write IGES file."));
+  }
+
+  TopoDS_Shape read_STEP(const std::string &filename,
+                         const double scale_factor)
+  {
+    STEPControl_Reader reader;
+    IFSelect_ReturnStatus stat;
+    stat = reader.ReadFile(filename.c_str());
+    AssertThrow(stat == IFSelect_RetDone,
+                ExcMessage("Error in reading file!"));
+
+    Standard_Boolean failsonly = Standard_False;
+    IFSelect_PrintCount mode = IFSelect_ItemsByEntity;
+    reader.PrintCheckLoad (failsonly, mode);
+
+    Standard_Integer nRoots = reader.TransferRoots();
+    //selects all IGES entities (including non visible ones) in the
+    //file and puts them into a list called MyList,
+
+    AssertThrow(nRoots > 0,
+                ExcMessage("Read nothing from file."));
+
+    // Handle STEP Scale here.
+    gp_Pnt Origin;
+    gp_Trsf scale;
+    scale.SetScale (Origin, scale_factor);
+
+    TopoDS_Shape sh = reader.OneShape();
+    BRepBuilderAPI_Transform trans(sh, scale);
+
+    return trans.Shape();   // this is the actual translation
+  }
+
+  void write_STEP(const TopoDS_Shape &shape,
+                  const std::string &filename)
+  {
+    STEPControl_Controller::Init();
+    STEPControl_Writer SCW;
+    IFSelect_ReturnStatus status;
+    status = SCW.Transfer(shape, STEPControl_AsIs);
+    AssertThrow(status == IFSelect_RetDone, ExcMessage("Failed to add shape to STEP controller."));
+
+    status = SCW.Write(filename.c_str());
+
+    AssertThrow(status == IFSelect_RetDone, ExcMessage("Failed to write translated shape to STEP file."));
+  }
+
+  double get_shape_tolerance(const TopoDS_Shape &shape)
+  {
+    double tolerance = 0.0;
+
+    std::vector<TopoDS_Face> faces;
+    std::vector<TopoDS_Edge> edges;
+    std::vector<TopoDS_Vertex> vertices;
+
+    extract_geometrical_shapes(shape,
+                               faces,
+                               edges,
+                               vertices);
+
+    for (unsigned int i=0; i<vertices.size(); ++i)
+      tolerance = fmax(tolerance,BRep_Tool::Tolerance(vertices[i]));
+
+    for (unsigned int i=0; i<edges.size(); ++i)
+      tolerance = fmax(tolerance,BRep_Tool::Tolerance(edges[i]));
+
+    for (unsigned int i=0; i<faces.size(); ++i)
+      tolerance = fmax(tolerance,BRep_Tool::Tolerance(faces[i]));
+
+
+    return tolerance;
+  }
+
+  TopoDS_Shape intersect_plane(const TopoDS_Shape &in_shape,
+                               const double c_x,
+                               const double c_y,
+                               const double c_z,
+                               const double c,
+                               const double /*tolerance*/)
+  {
+    Handle(Geom_Plane) plane = new Geom_Plane(c_x,c_y,c_z,c);
+    BRepAlgo_Section section(in_shape, plane);
+    TopoDS_Shape edges = section.Shape();
+    return edges;
+  }
+
+  TopoDS_Edge join_edges(const TopoDS_Shape &in_shape,
+                         const double tolerance)
+  {
+    TopoDS_Edge out_shape;
+    TopoDS_Shape edges = in_shape;
+    std::vector<Handle_Geom_BoundedCurve> intersections;
+    TopLoc_Location L;
+    Standard_Real First;
+    Standard_Real Last;
+    gp_Pnt PIn(0.0,0.0,0.0);
+    gp_Pnt PFin(0.0,0.0,0.0);
+    gp_Pnt PMid(0.0,0.0,0.0);
+    TopExp_Explorer edgeExplorer(edges , TopAbs_EDGE);
+    TopoDS_Edge edge;
+    while (edgeExplorer.More())
+      {
+        edge = TopoDS::Edge(edgeExplorer.Current());
+        Handle(Geom_Curve) curve = BRep_Tool::Curve(edge,L,First,Last);
+        intersections.push_back(Handle(Geom_BoundedCurve)::DownCast(curve));
+        edgeExplorer.Next();
+      }
+
+    // Now we build a single bspline out of all the geometrical
+    // curves, in Lexycographical order
+    unsigned int numIntersEdges = intersections.size();
+    Assert(numIntersEdges>0, ExcMessage("No curves to process!"));
+
+    GeomConvert_CompCurveToBSplineCurve convert_bspline(intersections[0]);
+
+    bool check = false, one_added = true, one_failed=true;
+    std::vector<bool> added(numIntersEdges, false);
+    added[0] = true;
+    while (one_added == true)
+      {
+        one_added = false;
+        one_failed = false;
+        for (unsigned int i=1; i<numIntersEdges; ++i)
+          if (added[i] == false)
+            {
+              Handle(Geom_Curve) curve = intersections[i];
+              Handle(Geom_BoundedCurve) bcurve = Handle(Geom_BoundedCurve)::DownCast(curve);
+              check = convert_bspline.Add(bcurve,tolerance,0,1,0);
+              if (check == false)  // If we failed, try again with the reversed curve
+                {
+                  curve->Reverse();
+                  Handle(Geom_BoundedCurve) bcurve = Handle(Geom_BoundedCurve)::DownCast(curve);
+                  check = convert_bspline.Add(bcurve,tolerance,0,1,0);
+                }
+              one_failed = one_failed || (check == false);
+              one_added = one_added || (check == true);
+              added[i] = check;
+            }
+      }
+
+    Assert(one_failed == false,
+           ExcMessage("Joining some of the Edges failed."));
+
+    Handle(Geom_Curve) bspline = convert_bspline.BSplineCurve();
+
+    out_shape = BRepBuilderAPI_MakeEdge(bspline);
+    return out_shape;
+  }
+
+
+  Point<3> line_intersection(const TopoDS_Shape &in_shape,
+                             const Point<3> &origin,
+                             const Tensor<1,3> &direction,
+                             const double tolerance)
+  {
+    // translating original Point<dim> to gp point
+
+    gp_Pnt P0 = point(origin);
+    gp_Ax1 gpaxis(P0, gp_Dir(direction[0], direction[1], direction[2]));
+    gp_Lin line(gpaxis);
+
+    // destination point
+    gp_Pnt Pproj(0.0,0.0,0.0);
+
+    // we prepare now the surface for the projection we get the whole
+    // shape from the iges model
+    IntCurvesFace_ShapeIntersector Inters;
+    Inters.Load(in_shape,tolerance);
+
+    // Keep in mind: PerformNearest sounds pretty but DOESN'T WORK!!!
+    // The closest point must be found by hand
+    Inters.Perform(line,-RealLast(),+RealLast());
+    Assert(Inters.IsDone(), ExcMessage("Could not project point."));
+
+    double minDistance = 1e7;
+    double distance;
+    Point<3> result;
+    for (int i=0; i<Inters.NbPnt(); ++i)
+      {
+        distance = point(origin).Distance(Inters.Pnt(i+1));
+        //cout<<"Point "<<i<<": "<<point(Inters.Pnt(i+1))<<"  distance: "<<distance<<endl;
+        if (distance < minDistance)
+          {
+            minDistance = distance;
+            result = point(Inters.Pnt(i+1));
+          }
+      }
+
+    return result;
+  }
+
+  TopoDS_Edge interpolation_curve(std::vector<Point<3> > &curve_points,
+                                  const Tensor<1,3> &direction,
+                                  const bool closed,
+                                  const double tolerance)
+  {
+
+    unsigned int n_vertices = curve_points.size();
+
+    if (direction*direction > 0)
+      {
+        std::sort(curve_points.begin(), curve_points.end(),
+                  boost::bind(&OpenCASCADE::point_compare, _1, _2, direction, tolerance));
+      }
+
+    // set up array of vertices
+    Handle(TColgp_HArray1OfPnt) vertices = new TColgp_HArray1OfPnt(1,n_vertices);
+    for (unsigned int vertex=0; vertex<n_vertices; ++vertex)
+      {
+        vertices->SetValue(vertex+1,point(curve_points[vertex]));
+      }
+
+
+    GeomAPI_Interpolate bspline_generator(vertices, closed, tolerance);
+    bspline_generator.Perform();
+    Assert( (bspline_generator.IsDone()), ExcMessage("Interpolated bspline generation failed"));
+
+    Handle(Geom_BSplineCurve) bspline = bspline_generator.Curve();
+    TopoDS_Edge out_shape = BRepBuilderAPI_MakeEdge(bspline);
+    return out_shape;
+  }
+
+  std_cxx11::tuple<Point<3>, TopoDS_Shape, double, double>
+  project_point_and_pull_back(const TopoDS_Shape &in_shape,
+                              const Point<3> &origin,
+                              const double tolerance)
+  {
+    TopExp_Explorer exp;
+    gp_Pnt Pproj = point(origin);
+
+    double minDistance = 1e7;
+    gp_Pnt tmp_proj(0.0,0.0,0.0);
+
+    unsigned int counter = 0;
+    unsigned int face_counter = 0;
+
+    TopoDS_Shape out_shape;
+    double u=0;
+    double v=0;
+
+    for (exp.Init(in_shape, TopAbs_FACE); exp.More(); exp.Next())
+      {
+        TopoDS_Face face = TopoDS::Face(exp.Current());
+
+        // the projection function needs a surface, so we obtain the
+        // surface upon which the face is defined
+        Handle(Geom_Surface) SurfToProj = BRep_Tool::Surface(face);
+
+        ShapeAnalysis_Surface projector(SurfToProj);
+        gp_Pnt2d proj_params = projector.ValueOfUV(point(origin), tolerance);
+
+        SurfToProj->D0(proj_params.X(),proj_params.Y(),tmp_proj);
+
+        double distance = point(tmp_proj).distance(origin);
+        if (distance < minDistance)
+          {
+            minDistance = distance;
+            Pproj = tmp_proj;
+            out_shape = face;
+            u=proj_params.X();
+            v=proj_params.Y();
+            ++counter;
+          }
+        ++face_counter;
+      }
+
+    // face counter tells us if the shape contained faces: if it does, there is no need
+    // to loop on edges. Even if the closest point lies on the boundary of a parametric surface,
+    // we need in fact to retain the face and both u and v, if we want to use this method to
+    // retrieve the surface normal
+    if (face_counter==0)
+      for (exp.Init(in_shape, TopAbs_EDGE); exp.More(); exp.Next())
+        {
+          TopoDS_Edge edge = TopoDS::Edge(exp.Current());
+          if (!BRep_Tool::Degenerated(edge))
+            {
+              TopLoc_Location L;
+              Standard_Real First;
+              Standard_Real Last;
+
+              // the projection function needs a Curve, so we obtain the
+              // curve upon which the edge is defined
+              Handle(Geom_Curve) CurveToProj = BRep_Tool::Curve(edge,L,First,Last);
+
+              GeomAPI_ProjectPointOnCurve Proj(point(origin),CurveToProj);
+              unsigned int num_proj_points = Proj.NbPoints();
+              if ((num_proj_points > 0) && (Proj.LowerDistance() < minDistance))
+                {
+                  minDistance = Proj.LowerDistance();
+                  Pproj = Proj.NearestPoint();
+                  out_shape = edge;
+                  u=Proj.LowerDistanceParameter();
+                  ++counter;
+                }
+            }
+        }
+
+    Assert(counter > 0, ExcMessage("Could not find projection points."));
+    return std_cxx11::tuple<Point<3>, TopoDS_Shape, double, double>
+           (point(Pproj),out_shape, u, v);
+  }
+
+
+  Point<3> closest_point(const TopoDS_Shape &in_shape,
+                         const Point<3> &origin,
+                         const double tolerance)
+  {
+    std_cxx11::tuple<Point<3>, TopoDS_Shape, double, double>
+    ref = project_point_and_pull_back(in_shape, origin, tolerance);
+    return std_cxx11::get<0>(ref);
+  }
+
+  std_cxx11::tuple<Point<3>, Point<3>, double>
+  closest_point_and_differential_forms(const TopoDS_Shape &in_shape,
+                                       const Point<3> &origin,
+                                       const double tolerance)
+
+  {
+    std_cxx11::tuple<Point<3>, TopoDS_Shape, double, double>
+    shape_and_params = project_point_and_pull_back(in_shape,
+                                                   origin,
+                                                   tolerance);
+
+    TopoDS_Shape &out_shape = std_cxx11::get<1>(shape_and_params);
+    double &u = std_cxx11::get<2>(shape_and_params);
+    double &v = std_cxx11::get<3>(shape_and_params);
+
+    // just a check here: the number of faces in out_shape must be 1, otherwise
+    // something is wrong
+    std_cxx11::tuple<unsigned int, unsigned int, unsigned int> numbers =
+      count_elements(out_shape);
+    (void)numbers;
+
+    Assert(std_cxx11::get<0>(numbers) > 0,
+           ExcMessage("Could not find normal: the shape containing the closest point has 0 faces."));
+    Assert(std_cxx11::get<0>(numbers) < 2,
+           ExcMessage("Could not find normal: the shape containing the closest point has more than 1 face."));
+
+
+    TopExp_Explorer exp;
+    exp.Init(out_shape, TopAbs_FACE);
+    TopoDS_Face face = TopoDS::Face(exp.Current());
+    return push_forward_and_differential_forms(face, u, v, tolerance);
+  }
+
+  Point<3> push_forward(const TopoDS_Shape &in_shape,
+                        const double u,
+                        const double v)
+  {
+    switch (in_shape.ShapeType())
+      {
+      case TopAbs_FACE:
+      {
+        BRepAdaptor_Surface surf(TopoDS::Face(in_shape));
+        return point(surf.Value(u,v));
+      }
+      case TopAbs_EDGE:
+      {
+        BRepAdaptor_Curve curve(TopoDS::Edge(in_shape));
+        return point(curve.Value(u));
+      }
+      default:
+        Assert(false, ExcUnsupportedShape());
+      }
+    return Point<3>();
+  }
+
+  std_cxx11::tuple<Point<3>, Point<3>, double >
+  push_forward_and_differential_forms(const TopoDS_Face &face,
+                                      const double u,
+                                      const double v,
+                                      const double /*tolerance*/)
+  {
+    Handle(Geom_Surface) SurfToProj = BRep_Tool::Surface(face);
+    GeomLProp_SLProps props(SurfToProj, u, v, 1, 1e-7);
+    gp_Pnt Value = props.Value();
+    Assert(props.IsNormalDefined(), ExcMessage("Normal is not well defined!"));
+    gp_Dir Normal = props.Normal();
+    Assert(props.IsCurvatureDefined(), ExcMessage("Curvature is not well defined!"));
+    Standard_Real Mean_Curvature = props.MeanCurvature();
+    Point<3> normal = Point<3>(Normal.X(),Normal.Y(),Normal.Z());
+    return std_cxx11::tuple<Point<3>, Point<3>, double>(point(Value), normal, Mean_Curvature);
+  }
+
+
+
+  void create_triangulation(const TopoDS_Face &face,
+                            Triangulation<2,3> &tria)
+  {
+    BRepAdaptor_Surface surf(face);
+    const double u0 = surf.FirstUParameter();
+    const double u1 = surf.LastUParameter();
+    const double v0 = surf.FirstVParameter();
+    const double v1 = surf.LastVParameter();
+
+    std::vector<CellData<2> > cells;
+    std::vector<Point<3> > vertices;
+    SubCellData t;
+
+    vertices.push_back(point(surf.Value(u0,v0)));
+    vertices.push_back(point(surf.Value(u1,v0)));
+    vertices.push_back(point(surf.Value(u0,v1)));
+    vertices.push_back(point(surf.Value(u1,v1)));
+
+    CellData<2> cell;
+    for (unsigned int i=0; i<4; ++i)
+      cell.vertices[i] = i;
+
+    cells.push_back(cell);
+    tria.create_triangulation(vertices, cells, t);
+  }
+
+} // end namespace
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..f1206b5
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,145 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# Set up the testsuite.
+#
+# We define toplevel targets:
+#    setup_tests    - set up testsuite subprojects
+#    prune_tests    - remove all testsuite subprojects
+#
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
+
+MACRO(SET_IF_EMPTY _variable)
+  IF("${${_variable}}" STREQUAL "")
+    SET(${_variable} ${ARGN})
+  ENDIF()
+ENDMACRO()
+
+SET_IF_EMPTY(MAKEOPTS $ENV{MAKEOPTS})
+
+IF(DEFINED DEAL_II_HAVE_TESTS_DIRECTORY)
+
+  IF(POLICY CMP0037)
+    # allow to override "test" target for quick tests
+    CMAKE_POLICY(SET CMP0037 OLD)
+  ENDIF()
+
+  #
+  # If this CMakeLists.txt file is called from within the deal.II build
+  # system, set up quick tests as well:
+  #
+  ADD_SUBDIRECTORY(quick_tests)
+
+  MESSAGE(STATUS "Setting up testsuite")
+
+  #
+  # Write minimalistic CTestTestfile.cmake files to CMAKE_BINARY_DIR and
+  # CMAKE_BINARY_DIR/tests:
+  #
+  FILE(WRITE ${CMAKE_BINARY_DIR}/CTestTestfile.cmake "SUBDIRS(tests)")
+
+  SET(_options "-DDEAL_II_DIR=${CMAKE_BINARY_DIR}")
+
+ELSE()
+
+  MESSAGE(STATUS "This is CMake ${CMAKE_VERSION}")
+  MESSAGE(STATUS "")
+
+  IF("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
+    MESSAGE(FATAL_ERROR "The testsuite cannot be configured in-source. "
+      "Please create a separate build directory!"
+      )
+  ENDIF()
+
+  FIND_PACKAGE(deal.II 8.4.0 REQUIRED HINTS ${DEAL_II_DIR} $ENV{DEAL_II_DIR})
+  PROJECT(testsuite NONE)
+  SET(_options "-DDEAL_II_DIR=${DEAL_II_PATH}")
+ENDIF()
+
+FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.cmake "")
+
+#
+# Always undefine the following variables in the setup_tests target:
+#
+FOREACH(_var DIFF_DIR NUMDIFF_DIR TEST_PICKUP_REGEX TEST_TIME_LIMIT)
+  LIST(APPEND _options "-U${_var}")
+  IF(NOT "${${_var}}" STREQUAL "")
+    LIST(APPEND _options "-D${_var}=${${_var}}")
+  ENDIF()
+ENDFOREACH()
+
+#
+# Find all testsuite subprojects, i.e., every directory that contains a
+# CMakeLists.txt file (with the exception of "quick_tests").
+#
+SET(_categories)
+FILE(GLOB _dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+  ${CMAKE_CURRENT_SOURCE_DIR}/*
+  )
+FOREACH(_dir ${_dirs})
+  IF( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${_dir}/CMakeLists.txt AND
+      NOT ${_dir} MATCHES quick_tests)
+    LIST(APPEND _categories ${_dir})
+  ENDIF()
+ENDFOREACH()
+
+#
+# Custom targets for the testsuite:
+#
+
+# Setup tests:
+ADD_CUSTOM_TARGET(setup_tests)
+
+# Remove all tests:
+ADD_CUSTOM_TARGET(prune_tests)
+
+FOREACH(_category ${_categories})
+  SET(_category_dir ${CMAKE_CURRENT_SOURCE_DIR}/${_category})
+
+  FILE(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${_category})
+
+  IF(DEAL_II_MSVC)
+    SET(_command ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${_options} ${_category_dir})
+  ELSE()
+    SET(_command ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${_options} ${_category_dir} > /dev/null)
+  ENDIF()
+
+  ADD_CUSTOM_TARGET(setup_tests_${_category}
+    COMMAND ${_command}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${_category}
+    COMMENT "Processing tests/${_category}"
+    )
+  ADD_DEPENDENCIES(setup_tests setup_tests_${_category})
+
+  ADD_CUSTOM_TARGET(prune_tests_${_category}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory
+      ${CMAKE_CURRENT_BINARY_DIR}/${_category}
+    COMMAND ${CMAKE_COMMAND} -E make_directory
+      ${CMAKE_CURRENT_BINARY_DIR}/${_category}
+    COMMENT "Processing tests/${_category}"
+    )
+  ADD_DEPENDENCIES(prune_tests prune_tests_${_category})
+
+  FILE(APPEND ${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.cmake
+    "SUBDIRS(${_category})\n"
+    )
+ENDFOREACH()
+
+
+IF(DEFINED DEAL_II_HAVE_TESTS_DIRECTORY)
+  MESSAGE(STATUS "Setting up testsuite - Done")
+ENDIF()
diff --git a/tests/quick_tests/CMakeLists.txt b/tests/quick_tests/CMakeLists.txt
new file mode 100644
index 0000000..8ed1d64
--- /dev/null
+++ b/tests/quick_tests/CMakeLists.txt
@@ -0,0 +1,121 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 - 2015 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+#
+# A minimalistic set of tests:
+#
+ENABLE_TESTING()
+
+INCLUDE_DIRECTORIES(
+  ${CMAKE_BINARY_DIR}/include/
+  ${CMAKE_SOURCE_DIR}/include/
+  ${DEAL_II_BUNDLED_INCLUDE_DIRS}
+  ${DEAL_II_INCLUDE_DIRS}
+  )
+
+# Use the first available build type (this prefers debug mode if available):
+LIST(GET DEAL_II_BUILD_TYPES 0 _mybuild)
+MESSAGE(STATUS "Setting up quick_tests in ${_mybuild} mode")
+
+SET(ALL_TESTS) # clean variable
+
+# define a macro to set up a quick test:
+MACRO(make_quicktest test_basename build_name mpi_run)
+  STRING(TOLOWER ${build_name} _build_lowercase)
+  SET(_target ${test_basename}.${_build_lowercase})
+  LIST(APPEND ALL_TESTS "${_target}")
+  ADD_EXECUTABLE(${_target} EXCLUDE_FROM_ALL ${test_basename}.cc)
+  DEAL_II_INSOURCE_SETUP_TARGET(${_target} ${build_name})
+
+  IF("${mpi_run}" STREQUAL "")
+    SET(_command ./${_target})
+  ELSE()
+    SET(_command ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${mpi_run} ${MPIEXEC_PREFLAGS} ./${_target})
+  ENDIF()
+  ADD_CUSTOM_TARGET(${_target}.run
+    DEPENDS ${_target}
+    COMMAND
+      ${_command} > ${_target}-OK 2>&1
+      ||(echo "${_target}: RUN failed. Output:"
+         && cat ${_target}-OK
+         && rm ${_target}-OK
+         && exit 1)
+    COMMAND echo "${_target}: PASSED."
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+
+  # this is a hack to make sure the -OK file is deleted
+  # even if compilation fails.
+  ADD_CUSTOM_TARGET(kill-${_target}-OK
+        COMMAND rm -f ${_target}-OK 
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+  ADD_DEPENDENCIES(${_target} kill-${_target}-OK)
+
+  ADD_TEST(NAME ${_target}
+    COMMAND ${CMAKE_COMMAND} -DTRGT=${_target}.run -DTEST=${_target}
+      -DBINARY_DIR=${CMAKE_BINARY_DIR}
+      -P ${CMAKE_SOURCE_DIR}/cmake/scripts/run_test.cmake
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+  SET_TESTS_PROPERTIES(${_target} PROPERTIES LABEL "sanity checks")
+ENDMACRO()
+
+
+# Simple assembly/solver test. This makes sure we can compile and link correctly
+# in debug and release.
+FOREACH(_build ${DEAL_II_BUILD_TYPES})
+  make_quicktest("step" ${_build} "")
+ENDFOREACH()
+
+# Test whether thread affinity is well behaved
+IF (DEAL_II_WITH_THREADS)
+make_quicktest("affinity" ${_mybuild} "")
+ENDIF()
+
+# Test if MPI is configured correctly
+IF (DEAL_II_WITH_MPI)
+  make_quicktest("mpi" ${_mybuild} 2)
+ENDIF()
+
+# Test if TBB works correctly
+IF (DEAL_II_WITH_THREADS)
+  make_quicktest("tbb" ${_mybuild} "")
+ENDIF()
+
+# Test p4est. This test exposes a bug in OpenMPI 1.3 and 1.4
+# Update to OpenMPI 1.5 or newer.
+IF (DEAL_II_WITH_P4EST)
+  make_quicktest("p4est" ${_mybuild} 10)
+ENDIF()
+
+# Test petsc
+IF (DEAL_II_WITH_PETSC)
+  make_quicktest("step-petsc" ${_mybuild} "")
+ENDIF()
+
+# Test slepc
+IF (DEAL_II_WITH_PETSC AND DEAL_II_WITH_SLEPC)
+  make_quicktest("step-slepc" ${_mybuild} "")
+ENDIF()
+
+# A custom test target:
+ADD_CUSTOM_TARGET(test
+  COMMAND ${CMAKE_COMMAND} -D ALL_TESTS="${ALL_TESTS}" -P ${CMAKE_CURRENT_SOURCE_DIR}/run.cmake
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  COMMENT "Running quicktests..."
+  )
+
+MESSAGE(STATUS "Setting up quick_tests in ${_mybuild} mode - Done")
diff --git a/tests/quick_tests/affinity.cc b/tests/quick_tests/affinity.cc
new file mode 100644
index 0000000..35dd8db
--- /dev/null
+++ b/tests/quick_tests/affinity.cc
@@ -0,0 +1,114 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+/*
+  Test that OpenMP is not messing with thread affinity, which will stop TBB
+  from creating threads.
+ */
+
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/base/multithread_info.h>
+#include <deal.II/base/utilities.h>
+#include <stdio.h>
+
+#if defined(__linux__)
+#include <sched.h>
+#include <sys/sysinfo.h>
+#endif
+
+bool getaffinity(unsigned int &bits_set,unsigned int &mask)
+{
+  bits_set = 0;
+  mask = 0x00;
+
+#if defined(__linux__)
+  cpu_set_t my_set;
+  CPU_ZERO(&my_set);
+
+  unsigned int len = sizeof(my_set);
+  int   ret = sched_getaffinity(0, len, &my_set);
+
+  if (ret!=0)
+    {
+      printf("sched_getaffinity() failed, return value: %d\n", ret);
+      return false;
+    }
+  for (int i=0; i<CPU_SETSIZE; ++i)
+    bits_set += CPU_ISSET(i,&my_set);
+
+  mask = *(int *)(&my_set);
+#else
+  // sadly we don't have an implementation
+  // for mac/windows
+#endif
+  return true;
+}
+
+int get_num_thread_env()
+{
+  const char *penv = getenv ("DEAL_II_NUM_THREADS");
+  if (penv!=NULL)
+    {
+      int max_threads_env = -1;
+      try
+        {
+          max_threads_env = dealii::Utilities::string_to_int(std::string(penv));
+        }
+      catch (...)
+        {
+          return -1;
+        }
+      return max_threads_env;
+    }
+
+  return -1;
+}
+
+
+int main ()
+{
+  // we need this, otherwise gcc will not link against deal.II
+  dealii::Triangulation<2> test;
+
+  unsigned int bits_set, mask;
+  if (!getaffinity(bits_set, mask))
+    return 1;
+
+  unsigned int nprocs = dealii::MultithreadInfo::n_cores();
+  unsigned int tbbprocs = dealii::MultithreadInfo::n_threads();
+  int env = get_num_thread_env();
+  printf("aff_ncpus=%d, mask=%08X, nprocs=%d, tbb_threads=%d, DEAL_II_NUM_THREADS=%d\n",
+         bits_set, mask, nprocs, tbbprocs, env );
+
+  if (bits_set !=0  && bits_set!=nprocs)
+    {
+      printf("Warning: sched_getaffinity() returns that we can only use %d out of %d CPUs.\n",bits_set, nprocs);
+      return 2;
+    }
+  if (env != -1 && nprocs != tbbprocs)
+    {
+      printf("Warning: number of threads is set to %d in envirnoment using DEAL_II_NUM_THREADS.\n", env);
+      return 0; // do not return an error!
+    }
+  if (nprocs != tbbprocs)
+    {
+      printf("Warning: for some reason TBB only wants to use %d out of %d CPUs.\n",
+             tbbprocs, nprocs);
+      return 3;
+    }
+
+  return 0;
+}
diff --git a/tests/quick_tests/mpi.cc b/tests/quick_tests/mpi.cc
new file mode 100644
index 0000000..4cd4259
--- /dev/null
+++ b/tests/quick_tests/mpi.cc
@@ -0,0 +1,71 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// test that MPI is working correctly. Note that this test expects to
+// be executed with exactly two threads.
+
+#include <deal.II/grid/tria.h>
+#include <stdio.h>
+#include <sched.h>
+#include <mpi.h>
+#include <iostream>
+
+int main(int argc, char *argv[] )
+{
+  MPI_Init( &argc, &argv );
+
+  int myrank, nproc;
+  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+  MPI_Comm_size(MPI_COMM_WORLD, &nproc);
+
+  std::cout << " Hi from " << myrank << "/" << nproc << std::endl;
+
+  if (nproc != 2)
+    {
+      std::cerr << "ERROR: process does not see nproc=2!" << std::endl;
+      return -1;
+    }
+
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  int err;
+  int value = myrank;
+
+  if (myrank==1)
+    err = MPI_Send(&value, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
+  else if (myrank==0)
+    err = MPI_Recv(&value, 1, MPI_INT, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+  if (myrank==0 && value!=1)
+    {
+      std::cerr << "ERROR: MPI_Send/Recv did not work!" << std::endl;
+      return -1;
+    }
+
+  value = 1;
+  int output = 0;
+
+  MPI_Allreduce(&value, &output, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+  if (output != nproc)
+    {
+      std::cerr << "ERROR: MPI_Allreduce doesn't seem to work!" << std::endl;
+      return -1;
+    }
+
+  // we need this, otherwise gcc will not link against deal.II
+  dealii::Triangulation<2> test;
+
+  MPI_Finalize();
+}
diff --git a/tests/quick_tests/p4est.cc b/tests/quick_tests/p4est.cc
new file mode 100644
index 0000000..79913e6
--- /dev/null
+++ b/tests/quick_tests/p4est.cc
@@ -0,0 +1,83 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2008 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+// Test DoFTools::count_dofs_per_component
+
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/tensor.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/intergrid_map.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_dgq.h>
+
+#include <fstream>
+#include <numeric>
+#include <cstdlib>
+
+using namespace dealii;
+
+template<int dim>
+void test()
+{
+  parallel::distributed::Triangulation<dim>
+  triangulation (MPI_COMM_WORLD,
+                 Triangulation<dim>::limit_level_difference_at_vertices);
+
+  FESystem<dim> fe (FE_Q<dim>(3),2,
+                    FE_DGQ<dim>(1),1);
+
+  DoFHandler<dim> dof_handler (triangulation);
+
+  GridGenerator::hyper_cube(triangulation);
+  triangulation.refine_global (2);
+  dof_handler.distribute_dofs (fe);
+
+  std::vector<types::global_dof_index> dofs_per_component (fe.n_components());
+  DoFTools::count_dofs_per_component (dof_handler, dofs_per_component);
+
+  Assert (std::accumulate (dofs_per_component.begin(), dofs_per_component.end(), 0U)
+          == dof_handler.n_dofs(),
+          ExcInternalError());
+
+  unsigned int myid = Utilities::MPI::this_mpi_process (MPI_COMM_WORLD);
+  if (myid == 0)
+    {
+      deallog << "Total number of dofs: " << dof_handler.n_dofs() << std::endl;
+      for (unsigned int i=0; i<dofs_per_component.size(); ++i)
+        deallog << "Component " << i << " has " << dofs_per_component[i] << " global dofs"
+                << std::endl;
+    }
+}
+
+
+int main(int argc, char *argv[])
+{
+  Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv, 1);
+
+  test<2>();
+  test<3>();
+
+  return 0;
+}
diff --git a/tests/quick_tests/run.cmake b/tests/quick_tests/run.cmake
new file mode 100644
index 0000000..bededca
--- /dev/null
+++ b/tests/quick_tests/run.cmake
@@ -0,0 +1,64 @@
+## ---------------------------------------------------------------------
+##
+## Copyright (C) 2013 by the deal.II authors
+##
+## This file is part of the deal.II library.
+##
+## The deal.II library is free software; you can use it, redistribute
+## it, and/or modify it under the terms of the GNU Lesser General
+## Public License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+## The full text of the license can be found in the file LICENSE at
+## the top level of the deal.II distribution.
+##
+## ---------------------------------------------------------------------
+
+# This file is run when "make test" is executed by the user and is 
+# responsible for running the tests and printing some helpful
+# error messages.
+
+SEPARATE_ARGUMENTS(ALL_TESTS)
+
+EXECUTE_PROCESS(COMMAND ${CMAKE_CTEST_COMMAND} --force-new-ctest-process --output-on-failure -O quicktests.log RESULT_VARIABLE res_var)
+
+if(NOT "${res_var}" STREQUAL "0")
+  MESSAGE( "
+
+*******************************     WARNING     *******************************
+
+Some of the tests failed!
+
+Please scroll up or check the file tests/quick_tests/quicktests.log for the
+error messages. If you are unable to fix the problems, see the FAQ or write
+to the mailing list linked at http://www.dealii.org\n"
+    )
+
+  FOREACH(test ${ALL_TESTS})  
+    IF (${test} MATCHES "^affinity" AND NOT EXISTS ${test}-OK)
+      MESSAGE("
+The affinity test can fail when you are linking in a library like BLAS
+which uses OpenMP. Even without calling any BLAS functions, OpenMP messes
+with the thread affinity which causes TBB to run single-threaded only. You
+can fix this by exporting OMP_NUM_THREADS=1. Also see GOMP_CPU_AFFINITY 
+and OMP_PROC_BIND.\n"
+        )
+    ENDIF()
+
+    IF (${test} MATCHES "^step-petsc" AND NOT EXISTS ${test}-OK)
+      MESSAGE("
+Additional information about PETSc issues is available
+at:\nhttp://www.dealii.org/developer/external-libs/petsc.html\n"
+        )
+    ENDIF()
+
+    IF (${test} MATCHES "^p4est" AND NOT EXISTS ${test}-OK)
+      MESSAGE("
+The p4est test can fail if you are running an OpenMPI version before 1.5.
+This is a known problem and the only work around is to update to a more
+recent version or use a different MPI library like MPICH.\n"
+        )
+    ENDIF()
+
+  ENDFOREACH()
+
+ENDIF()
diff --git a/tests/quick_tests/step-petsc.cc b/tests/quick_tests/step-petsc.cc
new file mode 100644
index 0000000..6147033
--- /dev/null
+++ b/tests/quick_tests/step-petsc.cc
@@ -0,0 +1,218 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2013 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+ */
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/table_handler.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_solver.h>
+#include <deal.II/lac/petsc_precondition.h>
+
+#include <fstream>
+#include <iostream>
+
+using namespace dealii;
+
+// Test that deal.II is working with PETSc by solving the Laplace's
+// problem in 2d.
+class LaplaceProblem
+{
+public:
+  LaplaceProblem ();
+  void run ();
+
+private:
+  void setup_system ();
+  void assemble_system ();
+  void solve ();
+
+  Triangulation<2> triangulation;
+  FE_Q<2>          fe;
+  DoFHandler<2>    dof_handler;
+
+  PETScWrappers::SparseMatrix A;
+  PETScWrappers::Vector       b, x;
+  ConstraintMatrix            constraints;
+
+  TableHandler output_table;
+};
+
+LaplaceProblem::LaplaceProblem ()
+  :
+  fe (1),
+  dof_handler (triangulation)
+{}
+
+void LaplaceProblem::setup_system ()
+{
+  dof_handler.distribute_dofs (fe);
+
+  constraints.clear ();
+  DoFTools::make_zero_boundary_constraints (dof_handler, constraints);
+  constraints.close ();
+
+  A.reinit (dof_handler.n_dofs(), dof_handler.n_dofs(),
+            dof_handler.max_couplings_between_dofs());
+  b.reinit (dof_handler.n_dofs());
+  x.reinit (dof_handler.n_dofs());
+
+  // some output
+  output_table.add_value ("cells", triangulation.n_active_cells());
+  output_table.add_value ("dofs",  dof_handler.n_dofs());
+}
+
+void LaplaceProblem::assemble_system ()
+{
+  QGauss<2> quadrature_formula(2);
+
+  FEValues<2> fe_values (fe, quadrature_formula,
+                         update_values            |
+                         update_gradients         |
+                         update_quadrature_points |
+                         update_JxW_values);
+
+  const unsigned int dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int n_q_points    = quadrature_formula.size();
+
+  FullMatrix<double> cell_A (dofs_per_cell, dofs_per_cell);
+  Vector<double>     cell_b (dofs_per_cell);
+
+  std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+  DoFHandler<2>::active_cell_iterator
+  cell = dof_handler.begin_active (),
+  endc = dof_handler.end ();
+
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+      cell_A = 0;
+      cell_b = 0;
+
+      for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          {
+            for (unsigned int j=0; j<dofs_per_cell; ++j)
+              {
+                cell_A (i, j)
+                +=
+                  fe_values.shape_grad (i, q_point) *
+                  fe_values.shape_grad (j, q_point)
+                  *
+                  fe_values.JxW (q_point);
+              }
+
+            cell_b (i)
+            +=
+              fe_values.shape_value (i, q_point)
+              *
+              fe_values.JxW (q_point);
+          }
+
+      cell->get_dof_indices (local_dof_indices);
+
+      constraints.distribute_local_to_global (cell_A, local_dof_indices, A);
+      constraints.distribute_local_to_global (cell_b, local_dof_indices, b);
+    }
+
+  A.compress (VectorOperation::add);
+  b.compress (VectorOperation::add);
+}
+
+void LaplaceProblem::solve ()
+{
+  SolverControl solver_control (1e03, 1e-03);
+  PETScWrappers::SolverCG cg_solver (solver_control);
+  PETScWrappers::PreconditionBlockJacobi preconditioner (A);
+  cg_solver.solve (A, x, b, preconditioner);
+
+  // some output
+  // ?
+}
+
+void LaplaceProblem::run ()
+{
+  GridGenerator::hyper_cube (triangulation, -1, 1);
+
+  for (unsigned int c=0; c<5; ++c)
+    {
+      triangulation.refine_global (1);
+      setup_system ();
+      assemble_system ();
+      solve ();
+    }
+
+  // finialise output
+  output_table.write_text (std::cout);
+  deallog << std::endl;
+}
+
+
+int main (int argc, char **argv)
+{
+  try
+    {
+      Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv, 1);
+      {
+        LaplaceProblem problem;
+        problem.run ();
+        deallog << "OK" << std::endl;
+      }
+    }
+
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/tests/quick_tests/step-slepc.cc b/tests/quick_tests/step-slepc.cc
new file mode 100644
index 0000000..42f9d79
--- /dev/null
+++ b/tests/quick_tests/step-slepc.cc
@@ -0,0 +1,237 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2013 - 2015 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE at
+ * the top level of the deal.II distribution.
+ *
+ * ---------------------------------------------------------------------
+ */
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/table_handler.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/slepc_solver.h>
+
+#include <fstream>
+#include <iostream>
+
+using namespace dealii;
+
+// Test that deal.II is working with SLEPc by solving the Laplace's
+// eigenspectrum problem in 2d.
+class LaplaceEigenspectrumProblem
+{
+public:
+  LaplaceEigenspectrumProblem ();
+  void run ();
+
+private:
+  void setup_system ();
+  void assemble_system ();
+  void solve ();
+
+  Triangulation<2> triangulation;
+  FE_Q<2>          fe;
+  DoFHandler<2>    dof_handler;
+
+  PETScWrappers::SparseMatrix        A, B;
+  std::vector<PETScWrappers::Vector> x;
+  std::vector<double>                lambda;
+  ConstraintMatrix                   constraints;
+
+  TableHandler output_table;
+};
+
+LaplaceEigenspectrumProblem::LaplaceEigenspectrumProblem ()
+  :
+  fe (1),
+  dof_handler (triangulation)
+{}
+
+void LaplaceEigenspectrumProblem::setup_system ()
+{
+  dof_handler.distribute_dofs (fe);
+
+  constraints.clear ();
+  DoFTools::make_zero_boundary_constraints (dof_handler, constraints);
+  constraints.close ();
+
+  A.reinit (dof_handler.n_dofs(), dof_handler.n_dofs(),
+            dof_handler.max_couplings_between_dofs());
+  B.reinit (dof_handler.n_dofs(), dof_handler.n_dofs(),
+            dof_handler.max_couplings_between_dofs());
+
+  x.resize (1);
+  x[0].reinit (dof_handler.n_dofs ());
+  lambda.resize (1);
+  lambda[0] = 0.;
+
+  // some output
+  output_table.add_value ("cells", triangulation.n_active_cells());
+  output_table.add_value ("dofs",  dof_handler.n_dofs());
+}
+
+void LaplaceEigenspectrumProblem::assemble_system ()
+{
+  QGauss<2> quadrature_formula(2);
+
+  FEValues<2> fe_values (fe, quadrature_formula,
+                         update_values            |
+                         update_gradients         |
+                         update_quadrature_points |
+                         update_JxW_values);
+
+  const unsigned int dofs_per_cell = fe.dofs_per_cell;
+  const unsigned int n_q_points    = quadrature_formula.size();
+
+  FullMatrix<double> cell_A (dofs_per_cell, dofs_per_cell);
+  FullMatrix<double> cell_B (dofs_per_cell, dofs_per_cell);
+
+  std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
+
+  typename DoFHandler<2>::active_cell_iterator
+  cell = dof_handler.begin_active (),
+  endc = dof_handler.end ();
+
+  for (; cell!=endc; ++cell)
+    {
+      fe_values.reinit (cell);
+      cell_A = 0;
+      cell_B = 0;
+
+      for (unsigned int q_point=0; q_point<n_q_points; ++q_point)
+        for (unsigned int i=0; i<dofs_per_cell; ++i)
+          for (unsigned int j=0; j<dofs_per_cell; ++j)
+            {
+              cell_A (i, j)
+              +=
+                fe_values.shape_grad (i, q_point) *
+                fe_values.shape_grad (j, q_point)
+                *
+                fe_values.JxW (q_point);
+
+              cell_B (i, j)
+              +=
+                fe_values.shape_value (i, q_point) *
+                fe_values.shape_value (j, q_point)
+                *
+                fe_values.JxW (q_point);
+            }
+
+      cell->get_dof_indices (local_dof_indices);
+
+      constraints.distribute_local_to_global (cell_A, local_dof_indices, A);
+      constraints.distribute_local_to_global (cell_B, local_dof_indices, B);
+    }
+
+  A.compress (VectorOperation::add);
+  B.compress (VectorOperation::add);
+}
+
+void LaplaceEigenspectrumProblem::solve ()
+{
+  SolverControl solver_control (1000, 1e-03);
+  SLEPcWrappers::SolverArnoldi eigensolver (solver_control);
+  eigensolver.set_which_eigenpairs (EPS_SMALLEST_REAL);
+  eigensolver.solve (A, B, lambda, x, x.size());
+
+  // some output
+  output_table.add_value ("lambda", lambda[0]);
+  output_table.add_value ("error", std::fabs(2.-lambda[0]));
+}
+
+void LaplaceEigenspectrumProblem::run ()
+{
+  const double radius = dealii::numbers::PI/2.;
+  GridGenerator::hyper_cube (triangulation, -radius, radius);
+
+  // set the old eigenvalue to a silly number.
+  double old_lambda = 1000;
+
+  for (unsigned int c=0; c<5; ++c)
+    {
+      // obtain numerical result
+      triangulation.refine_global (1);
+      setup_system ();
+      assemble_system ();
+      solve ();
+
+      // check energy convergence with previous result
+      AssertThrow (lambda[0]<old_lambda, ExcMessage("solution is not converging"));
+      old_lambda = lambda[0];
+    }
+
+  // push back analytic result
+  output_table.add_value ("cells", "inf");
+  output_table.add_value ("dofs",  "inf");
+  output_table.add_value ("lambda", 2.);
+  output_table.add_value ("error", "-");
+
+  // finialise output
+  output_table.write_text (std::cout);
+  deallog << std::endl;
+}
+
+
+int main (int argc, char **argv)
+{
+  try
+    {
+      Utilities::MPI::MPI_InitFinalize mpi_initialization (argc, argv, 1);
+      {
+        LaplaceEigenspectrumProblem problem;
+        problem.run ();
+      }
+    }
+
+  catch (std::exception &exc)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Exception on processing: " << std::endl
+                << exc.what() << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+
+      return 1;
+    }
+  catch (...)
+    {
+      std::cerr << std::endl << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      std::cerr << "Unknown exception!" << std::endl
+                << "Aborting!" << std::endl
+                << "----------------------------------------------------"
+                << std::endl;
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/tests/quick_tests/step.cc b/tests/quick_tests/step.cc
new file mode 100644
index 0000000..b7e1c19
--- /dev/null
+++ b/tests/quick_tests/step.cc
@@ -0,0 +1,252 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2005 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+
+
+// copied from bits/step-11 with slight modifications to make it run faster
+
+#include <deal.II/base/logstream.h>
+
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/table_handler.h>
+#include <deal.II/lac/vector.h>
+#include <deal.II/lac/sparse_matrix.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/precondition.h>
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q.h>
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+
+#include <deal.II/lac/compressed_sparsity_pattern.h>
+
+#include <algorithm>
+#include <iomanip>
+#include <iomanip>
+#include <cmath>
+
+using namespace dealii;
+
+
+template <int dim>
+class LaplaceProblem
+{
+public:
+  LaplaceProblem (const unsigned int mapping_degree);
+  void run ();
+
+private:
+  void setup_system ();
+  void assemble_and_solve ();
+  void solve ();
+
+  Triangulation<dim>   triangulation;
+  FE_Q<dim>            fe;
+  DoFHandler<dim>      dof_handler;
+  MappingQ<dim>        mapping;
+
+  SparsityPattern      sparsity_pattern;
+  SparseMatrix<double> system_matrix;
+  ConstraintMatrix     mean_value_constraints;
+
+  Vector<double>       solution;
+  Vector<double>       system_rhs;
+
+  TableHandler         output_table;
+
+  double last_error;
+};
+
+
+
+template <int dim>
+LaplaceProblem<dim>::LaplaceProblem (const unsigned int mapping_degree) :
+  fe (1),
+  dof_handler (triangulation),
+  mapping (mapping_degree)
+{
+  deallog << "Using mapping with degree " << mapping_degree << ":"
+          << std::endl
+          << "============================"
+          << std::endl;
+}
+
+
+
+template <int dim>
+void LaplaceProblem<dim>::setup_system ()
+{
+  dof_handler.distribute_dofs (fe);
+  solution.reinit (dof_handler.n_dofs());
+  system_rhs.reinit (dof_handler.n_dofs());
+
+  std::vector<bool> boundary_dofs (dof_handler.n_dofs(), false);
+  DoFTools::extract_boundary_dofs (dof_handler, std::vector<bool>(1,true),
+                                   boundary_dofs);
+
+  const unsigned int first_boundary_dof
+    = std::distance (boundary_dofs.begin(),
+                     std::find (boundary_dofs.begin(),
+                                boundary_dofs.end(),
+                                true));
+
+  mean_value_constraints.clear ();
+  mean_value_constraints.add_line (first_boundary_dof);
+  for (unsigned int i=first_boundary_dof+1; i<dof_handler.n_dofs(); ++i)
+    if (boundary_dofs[i] == true)
+      mean_value_constraints.add_entry (first_boundary_dof,
+                                        i, -1);
+  mean_value_constraints.close ();
+
+  CompressedSparsityPattern csp (dof_handler.n_dofs(),
+                                 dof_handler.n_dofs());
+  DoFTools::make_sparsity_pattern (dof_handler, csp);
+  mean_value_constraints.condense (csp);
+
+  sparsity_pattern.copy_from (csp);
+  system_matrix.reinit (sparsity_pattern);
+}
+
+
+
+template <int dim>
+void LaplaceProblem<dim>::assemble_and_solve ()
+{
+
+  const unsigned int gauss_degree
+    = std::max (static_cast<unsigned int>(std::ceil(1.*(mapping.get_degree()+1)/2)),
+                2U);
+  MatrixTools::create_laplace_matrix (mapping, dof_handler,
+                                      QGauss<dim>(gauss_degree),
+                                      system_matrix);
+  VectorTools::create_right_hand_side (mapping, dof_handler,
+                                       QGauss<dim>(gauss_degree),
+                                       ConstantFunction<dim>(-2),
+                                       system_rhs);
+  Vector<double> tmp (system_rhs.size());
+  VectorTools::create_boundary_right_hand_side (mapping, dof_handler,
+                                                QGauss<dim-1>(gauss_degree),
+                                                ConstantFunction<dim>(1),
+                                                tmp);
+  system_rhs += tmp;
+
+  mean_value_constraints.condense (system_matrix);
+  mean_value_constraints.condense (system_rhs);
+
+  solve ();
+  mean_value_constraints.distribute (solution);
+
+  Vector<float> norm_per_cell (triangulation.n_active_cells());
+  VectorTools::integrate_difference (mapping, dof_handler,
+                                     solution,
+                                     ZeroFunction<dim>(),
+                                     norm_per_cell,
+                                     QGauss<dim>(gauss_degree+1),
+                                     VectorTools::H1_seminorm);
+  const double norm = norm_per_cell.l2_norm();
+
+  output_table.add_value ("cells", triangulation.n_active_cells());
+  output_table.add_value ("|u|_1", norm);
+  output_table.add_value ("error", std::fabs(norm-std::sqrt(3.14159265358/2)));
+
+  last_error = std::fabs(norm-std::sqrt(3.14159265358/2));
+}
+
+
+
+template <int dim>
+void LaplaceProblem<dim>::solve ()
+{
+  SolverControl           solver_control (1000, 1e-12);
+  SolverCG<>              cg (solver_control);
+
+  PreconditionSSOR<> preconditioner;
+  preconditioner.initialize(system_matrix, 1.2);
+
+  cg.solve (system_matrix, solution, system_rhs,
+            preconditioner);
+}
+
+
+
+template <int dim>
+void LaplaceProblem<dim>::run ()
+{
+  GridGenerator::hyper_ball (triangulation);
+  static const HyperBallBoundary<dim> boundary;
+  triangulation.set_boundary (0, boundary);
+
+  for (unsigned int cycle=0; cycle<6; ++cycle, triangulation.refine_global(1))
+    {
+      setup_system ();
+      assemble_and_solve ();
+    };
+
+  AssertThrow (last_error<1e-3, ExcMessage("solution is not converging"));
+
+
+
+  output_table.set_precision("|u|_1", 6);
+  output_table.set_precision("error", 6);
+  output_table.write_text (std::cout);
+  deallog << std::endl;
+}
+
+
+
+int main ()
+{
+  try
+    {
+      LaplaceProblem<2>(1).run ();
+    }
+  catch (std::exception &exc)
+    {
+      deallog << std::endl << std::endl
+              << "----------------------------------------------------"
+              << std::endl;
+      deallog << "Exception on processing: " << std::endl
+              << exc.what() << std::endl
+              << "Aborting!" << std::endl
+              << "----------------------------------------------------"
+              << std::endl;
+      return -1;
+    }
+  catch (...)
+    {
+      deallog << std::endl << std::endl
+              << "----------------------------------------------------"
+              << std::endl;
+      deallog << "Unknown exception!" << std::endl
+              << "Aborting!" << std::endl
+              << "----------------------------------------------------"
+              << std::endl;
+      return -1;
+    };
+
+  return 0;
+}
diff --git a/tests/quick_tests/tbb.cc b/tests/quick_tests/tbb.cc
new file mode 100644
index 0000000..7eacc44
--- /dev/null
+++ b/tests/quick_tests/tbb.cc
@@ -0,0 +1,87 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (C) 2013 - 2015 by the deal.II authors
+//
+// This file is part of the deal.II library.
+//
+// The deal.II library is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the deal.II distribution.
+//
+// ---------------------------------------------------------------------
+
+// test Threads::new_task and WorkStream::run
+
+#include <deal.II/base/thread_management.h>
+#include <deal.II/base/work_stream.h>
+#include <tbb/task_scheduler_init.h>
+#include <iostream>
+
+using namespace dealii;
+
+void add_one(unsigned int &var)
+{
+  var += 1;
+}
+
+void test1()
+{
+  unsigned int tmp = 1;
+  Threads::Task<> task = Threads::new_task (&add_one,tmp);
+  task.join();
+  if (tmp!=2)
+    exit(1);
+}
+
+struct scratch_data
+{
+};
+
+struct copy_data
+{
+  int value;
+};
+
+
+void assemble(const std::vector<int>::iterator &it,
+              scratch_data &scratch,
+              copy_data &data)
+{
+  data.value = (*it);
+}
+
+void copy(int &value, const copy_data &data)
+{
+  value += data.value;
+}
+
+void test2()
+{
+  const int maxi = 10000;
+  std::vector<int> v(maxi);
+  for (unsigned int i=0; i<v.size(); ++i)
+    v[i] = i+1;
+  int result = 0;
+  WorkStream::run(v.begin(),
+                  v.end(),
+                  &assemble,
+                  std_cxx11::bind(&copy,
+                                  std_cxx11::ref(result),
+                                  std_cxx11::_1),
+                  scratch_data(), copy_data());
+  std::cout << "result: " << result << std::endl;
+
+  if (result != maxi*(maxi+1)/2)
+    exit(2);
+}
+
+int main ()
+{
+  std::cout << "TBB will use " << tbb::task_scheduler_init::default_num_threads() << " threads." << std::endl;
+
+  test1();
+  test2();
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/deal.ii.git